]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/commitdiff
Merge tag 'wireless-drivers-next-for-davem-2016-12-01' of git://git.kernel.org/pub...
authorDavid S. Miller <davem@davemloft.net>
Fri, 2 Dec 2016 18:58:10 +0000 (13:58 -0500)
committerDavid S. Miller <davem@davemloft.net>
Fri, 2 Dec 2016 18:58:10 +0000 (13:58 -0500)
Kalle Valo says:

====================
wireless-drivers-next patches for 4.10

Major changes:

rsi

* filter rx frames
* configure tx power
* make it possible to select antenna
* support 802.11d

brcmfmac

* cleanup of scheduled scan code
* support for bcm43341 chipset with different chip id
* support rev6 of PCIe device interface

ath10k

* add spectral scan support for QCA6174 and QCA9377 families
* show used tx bitrate with 10.4 firmware

wil6210

* add power save mode support
* add abort scan functionality
* add support settings retry limit for short frames

bcma

* add Dell Inspiron 3148
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
3286 files changed:
.mailmap
CREDITS
Documentation/00-INDEX
Documentation/80211/cfg80211.rst [new file with mode: 0644]
Documentation/80211/conf.py [new file with mode: 0644]
Documentation/80211/index.rst [new file with mode: 0644]
Documentation/80211/introduction.rst [new file with mode: 0644]
Documentation/80211/mac80211-advanced.rst [new file with mode: 0644]
Documentation/80211/mac80211.rst [new file with mode: 0644]
Documentation/ABI/testing/sysfs-class-cxl
Documentation/ABI/testing/sysfs-devices-system-ibm-rtl
Documentation/DocBook/80211.tmpl [deleted file]
Documentation/DocBook/Makefile
Documentation/Makefile
Documentation/accounting/.gitignore [deleted file]
Documentation/accounting/Makefile [deleted file]
Documentation/accounting/delay-accounting.txt
Documentation/accounting/getdelays.c [deleted file]
Documentation/arm/00-INDEX
Documentation/auxdisplay/.gitignore [deleted file]
Documentation/auxdisplay/Makefile [deleted file]
Documentation/auxdisplay/cfag12864b
Documentation/auxdisplay/cfag12864b-example.c [deleted file]
Documentation/blackfin/00-INDEX
Documentation/blackfin/Makefile [deleted file]
Documentation/blackfin/gptimers-example.c [deleted file]
Documentation/device-mapper/dm-raid.txt
Documentation/devicetree/bindings/auxdisplay/img-ascii-lcd.txt [new file with mode: 0644]
Documentation/devicetree/bindings/clock/uniphier-clock.txt
Documentation/devicetree/bindings/i2c/i2c.txt
Documentation/devicetree/bindings/i2c/trivial-devices.txt
Documentation/devicetree/bindings/infiniband/hisilicon-hns-roce.txt
Documentation/devicetree/bindings/input/touchscreen/melfas_mip4.txt [new file with mode: 0644]
Documentation/devicetree/bindings/ipmi.txt [deleted file]
Documentation/devicetree/bindings/ipmi/aspeed,ast2400-ibt-bmc.txt [new file with mode: 0644]
Documentation/devicetree/bindings/ipmi/ipmi-smic.txt [new file with mode: 0644]
Documentation/devicetree/bindings/mips/brcm/soc.txt
Documentation/devicetree/bindings/mmc/synopsys-dw-mshc.txt
Documentation/devicetree/bindings/net/brcm,amac.txt
Documentation/devicetree/bindings/net/dsa/marvell.txt
Documentation/devicetree/bindings/net/hisilicon-hns-dsaf.txt
Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt
Documentation/devicetree/bindings/net/marvell-orion-net.txt
Documentation/devicetree/bindings/net/mdio-mux-mmioreg.txt
Documentation/devicetree/bindings/net/oxnas-dwmac.txt [new file with mode: 0644]
Documentation/devicetree/bindings/net/phy.txt
Documentation/devicetree/bindings/pci/rockchip-pcie.txt
Documentation/devicetree/bindings/pinctrl/pinctrl-aspeed.txt
Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.txt
Documentation/devicetree/bindings/reset/uniphier-reset.txt
Documentation/devicetree/bindings/rtc/dallas,ds1390.txt
Documentation/devicetree/bindings/rtc/epson,rx8900.txt [new file with mode: 0644]
Documentation/devicetree/bindings/rtc/rtc-omap.txt
Documentation/devicetree/bindings/serial/cdns,uart.txt
Documentation/devicetree/bindings/serial/renesas,sci-serial.txt
Documentation/devicetree/bindings/sound/omap-abe-twl6040.txt
Documentation/devicetree/bindings/timer/jcore,pit.txt [new file with mode: 0644]
Documentation/devicetree/bindings/usb/dwc2.txt
Documentation/devicetree/bindings/vendor-prefixes.txt
Documentation/driver-model/devres.txt
Documentation/features/perf/kprobes-event/arch-support.txt
Documentation/filesystems/.gitignore [deleted file]
Documentation/filesystems/00-INDEX
Documentation/filesystems/Locking
Documentation/filesystems/Makefile [deleted file]
Documentation/filesystems/dnotify_test.c [deleted file]
Documentation/filesystems/proc.txt
Documentation/filesystems/vfs.txt
Documentation/gpio/board.txt
Documentation/i2c/i2c-topology
Documentation/ia64/.gitignore [deleted file]
Documentation/ia64/Makefile [deleted file]
Documentation/ia64/aliasing-test.c [deleted file]
Documentation/index.rst
Documentation/input/alps.txt
Documentation/kbuild/makefiles.txt
Documentation/kernel-parameters.txt
Documentation/kselftest.txt
Documentation/laptops/.gitignore [deleted file]
Documentation/laptops/00-INDEX
Documentation/laptops/Makefile [deleted file]
Documentation/laptops/dslm.c [deleted file]
Documentation/laptops/laptop-mode.txt
Documentation/mic/Makefile [deleted file]
Documentation/mic/mpssd/.gitignore [deleted file]
Documentation/mic/mpssd/Makefile [deleted file]
Documentation/mic/mpssd/micctrl [deleted file]
Documentation/mic/mpssd/mpss [deleted file]
Documentation/mic/mpssd/mpssd.c [deleted file]
Documentation/mic/mpssd/mpssd.h [deleted file]
Documentation/mic/mpssd/sysfs.c [deleted file]
Documentation/misc-devices/Makefile [deleted file]
Documentation/misc-devices/mei/.gitignore [deleted file]
Documentation/misc-devices/mei/Makefile [deleted file]
Documentation/misc-devices/mei/TODO [deleted file]
Documentation/misc-devices/mei/mei-amt-version.c [deleted file]
Documentation/networking/00-INDEX
Documentation/networking/Makefile [deleted file]
Documentation/networking/batman-adv.txt
Documentation/networking/dsa/dsa.txt
Documentation/networking/ip-sysctl.txt
Documentation/networking/mac80211_hwsim/README
Documentation/networking/netdev-FAQ.txt
Documentation/networking/nf_conntrack-sysctl.txt
Documentation/networking/phy.txt
Documentation/networking/seg6-sysctl.txt [new file with mode: 0644]
Documentation/networking/timestamping.txt
Documentation/networking/timestamping/.gitignore [deleted file]
Documentation/networking/timestamping/Makefile [deleted file]
Documentation/networking/timestamping/hwtstamp_config.c [deleted file]
Documentation/networking/timestamping/timestamping.c [deleted file]
Documentation/networking/timestamping/txtimestamp.c [deleted file]
Documentation/pcmcia/.gitignore [deleted file]
Documentation/pcmcia/Makefile [deleted file]
Documentation/pcmcia/crc32hash.c [deleted file]
Documentation/pcmcia/devicetable.txt
Documentation/prctl/.gitignore [deleted file]
Documentation/prctl/Makefile [deleted file]
Documentation/prctl/disable-tsc-ctxt-sw-stress-test.c [deleted file]
Documentation/prctl/disable-tsc-on-off-stress-test.c [deleted file]
Documentation/prctl/disable-tsc-test.c [deleted file]
Documentation/ptp/.gitignore [deleted file]
Documentation/ptp/Makefile [deleted file]
Documentation/ptp/testptp.c [deleted file]
Documentation/ptp/testptp.mk [deleted file]
Documentation/scsi/g_NCR5380.txt
Documentation/spi/00-INDEX
Documentation/timers/.gitignore [deleted file]
Documentation/timers/00-INDEX
Documentation/timers/Makefile [deleted file]
Documentation/timers/hpet.txt
Documentation/timers/hpet_example.c [deleted file]
Documentation/vDSO/.gitignore [deleted file]
Documentation/vDSO/Makefile [deleted file]
Documentation/vDSO/parse_vdso.c [deleted file]
Documentation/vDSO/vdso_standalone_test_x86.c [deleted file]
Documentation/vDSO/vdso_test.c [deleted file]
Documentation/virtual/kvm/api.txt
Documentation/virtual/kvm/locking.txt
Documentation/watchdog/Makefile [deleted file]
Documentation/watchdog/src/.gitignore [deleted file]
Documentation/watchdog/src/Makefile [deleted file]
Documentation/watchdog/src/watchdog-simple.c [deleted file]
Documentation/watchdog/src/watchdog-test.c [deleted file]
Documentation/watchdog/watchdog-api.txt
Documentation/watchdog/wdt.txt
MAINTAINERS
Makefile
arch/Kconfig
arch/alpha/include/asm/Kbuild
arch/alpha/include/asm/uaccess.h
arch/alpha/include/uapi/asm/socket.h
arch/alpha/kernel/Makefile
arch/alpha/kernel/alpha_ksyms.c [deleted file]
arch/alpha/kernel/machvec_impl.h
arch/alpha/kernel/ptrace.c
arch/alpha/kernel/setup.c
arch/alpha/lib/callback_srm.S
arch/alpha/lib/checksum.c
arch/alpha/lib/clear_page.S
arch/alpha/lib/clear_user.S
arch/alpha/lib/copy_page.S
arch/alpha/lib/copy_user.S
arch/alpha/lib/csum_ipv6_magic.S
arch/alpha/lib/csum_partial_copy.c
arch/alpha/lib/dec_and_lock.c
arch/alpha/lib/divide.S
arch/alpha/lib/ev6-clear_page.S
arch/alpha/lib/ev6-clear_user.S
arch/alpha/lib/ev6-copy_page.S
arch/alpha/lib/ev6-copy_user.S
arch/alpha/lib/ev6-csum_ipv6_magic.S
arch/alpha/lib/ev6-divide.S
arch/alpha/lib/ev6-memchr.S
arch/alpha/lib/ev6-memcpy.S
arch/alpha/lib/ev6-memset.S
arch/alpha/lib/ev67-strcat.S
arch/alpha/lib/ev67-strchr.S
arch/alpha/lib/ev67-strlen.S
arch/alpha/lib/ev67-strncat.S
arch/alpha/lib/ev67-strrchr.S
arch/alpha/lib/fpreg.c
arch/alpha/lib/memchr.S
arch/alpha/lib/memcpy.c
arch/alpha/lib/memmove.S
arch/alpha/lib/memset.S
arch/alpha/lib/strcat.S
arch/alpha/lib/strchr.S
arch/alpha/lib/strcpy.S
arch/alpha/lib/strlen.S
arch/alpha/lib/strncat.S
arch/alpha/lib/strncpy.S
arch/alpha/lib/strrchr.S
arch/arc/Kconfig
arch/arc/Makefile
arch/arc/boot/Makefile
arch/arc/boot/dts/axc001.dtsi
arch/arc/boot/dts/nsim_700.dts
arch/arc/boot/dts/nsimosci.dts
arch/arc/configs/nsim_700_defconfig
arch/arc/configs/nsim_hs_defconfig
arch/arc/configs/nsim_hs_smp_defconfig
arch/arc/configs/nsimosci_defconfig
arch/arc/configs/nsimosci_hs_defconfig
arch/arc/configs/nsimosci_hs_smp_defconfig
arch/arc/include/asm/arcregs.h
arch/arc/include/asm/cache.h
arch/arc/include/asm/elf.h
arch/arc/include/asm/mcip.h
arch/arc/include/asm/module.h
arch/arc/include/asm/setup.h
arch/arc/include/asm/smp.h
arch/arc/include/asm/syscalls.h
arch/arc/include/uapi/asm/unistd.h
arch/arc/kernel/devtree.c
arch/arc/kernel/mcip.c
arch/arc/kernel/module.c
arch/arc/kernel/process.c
arch/arc/kernel/setup.c
arch/arc/kernel/signal.c
arch/arc/kernel/smp.c
arch/arc/kernel/time.c
arch/arc/kernel/troubleshoot.c
arch/arc/mm/cache.c
arch/arc/mm/dma.c
arch/arc/mm/tlb.c
arch/arc/mm/tlbex.S
arch/arc/plat-eznps/smp.c
arch/arm/boot/dts/imx53-qsb.dts
arch/arm/boot/dts/logicpd-som-lv.dtsi
arch/arm/boot/dts/logicpd-torpedo-som.dtsi
arch/arm/boot/dts/omap5-board-common.dtsi
arch/arm/boot/dts/ste-snowball.dts
arch/arm/boot/dts/stih410-b2260.dts
arch/arm/boot/dts/sun8i-a23-a33.dtsi
arch/arm/boot/dts/uniphier-pro5.dtsi
arch/arm/boot/dts/uniphier-pxs2.dtsi
arch/arm/boot/dts/vf500.dtsi
arch/arm/configs/multi_v7_defconfig
arch/arm/include/asm/kvm_asm.h
arch/arm/include/asm/kvm_host.h
arch/arm/include/asm/kvm_hyp.h
arch/arm/include/asm/uaccess.h
arch/arm/include/asm/unistd.h
arch/arm/include/uapi/asm/unistd.h
arch/arm/kernel/calls.S
arch/arm/kernel/traps.c
arch/arm/kernel/vmlinux-xip.lds.S
arch/arm/kvm/arm.c
arch/arm/kvm/hyp/tlb.c
arch/arm/lib/backtrace.S
arch/arm/lib/copy_from_user.S
arch/arm/mach-imx/gpc.c
arch/arm/mach-imx/mach-imx6q.c
arch/arm/mach-mvebu/Kconfig
arch/arm/mach-omap2/Kconfig
arch/arm/mach-omap2/id.c
arch/arm/mach-omap2/prm3xxx.c
arch/arm/mach-omap2/voltage.c
arch/arm/mach-uniphier/Kconfig
arch/arm/mm/abort-lv4t.S
arch/arm/mm/dma-mapping.c
arch/arm/mm/proc-v7m.S
arch/arm64/Kconfig
arch/arm64/Kconfig.platforms
arch/arm64/Makefile
arch/arm64/boot/dts/broadcom/ns2-svk.dts
arch/arm64/boot/dts/broadcom/ns2.dtsi
arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi
arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi
arch/arm64/boot/dts/marvell/armada-3720-db.dts
arch/arm64/boot/dts/marvell/armada-37xx.dtsi
arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi
arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi
arch/arm64/boot/dts/rockchip/rk3368-geekbox.dts
arch/arm64/boot/dts/rockchip/rk3368-orion-r68-meta.dts
arch/arm64/boot/dts/rockchip/rk3399.dtsi
arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi
arch/arm64/include/asm/alternative.h
arch/arm64/include/asm/cpucaps.h [new file with mode: 0644]
arch/arm64/include/asm/cpufeature.h
arch/arm64/include/asm/exec.h
arch/arm64/include/asm/kvm_asm.h
arch/arm64/include/asm/kvm_emulate.h
arch/arm64/include/asm/kvm_host.h
arch/arm64/include/asm/kvm_mmu.h
arch/arm64/include/asm/lse.h
arch/arm64/include/asm/memory.h
arch/arm64/include/asm/module.h
arch/arm64/include/asm/percpu.h
arch/arm64/include/asm/perf_event.h
arch/arm64/include/asm/processor.h
arch/arm64/include/asm/sysreg.h
arch/arm64/include/asm/uaccess.h
arch/arm64/kernel/armv8_deprecated.c
arch/arm64/kernel/cpu_errata.c
arch/arm64/kernel/cpufeature.c
arch/arm64/kernel/head.S
arch/arm64/kernel/perf_event.c
arch/arm64/kernel/process.c
arch/arm64/kernel/sleep.S
arch/arm64/kernel/smp.c
arch/arm64/kernel/suspend.c
arch/arm64/kernel/traps.c
arch/arm64/kvm/hyp/tlb.c
arch/arm64/kvm/sys_regs.c
arch/arm64/lib/copy_from_user.S
arch/arm64/mm/fault.c
arch/arm64/mm/init.c
arch/arm64/mm/numa.c
arch/blackfin/include/asm/uaccess.h
arch/blackfin/kernel/ptrace.c
arch/cris/arch-v32/drivers/cryptocop.c
arch/cris/arch-v32/kernel/ptrace.c
arch/frv/include/uapi/asm/socket.h
arch/h8300/include/asm/thread_info.h
arch/h8300/kernel/signal.c
arch/ia64/hp/sim/boot/Makefile
arch/ia64/include/asm/export.h [new file with mode: 0644]
arch/ia64/include/asm/libata-portmap.h
arch/ia64/include/uapi/asm/socket.h
arch/ia64/kernel/entry.S
arch/ia64/kernel/err_inject.c
arch/ia64/kernel/esi_stub.S
arch/ia64/kernel/head.S
arch/ia64/kernel/ia64_ksyms.c
arch/ia64/kernel/ivt.S
arch/ia64/kernel/pal.S
arch/ia64/kernel/ptrace.c
arch/ia64/kernel/setup.c
arch/ia64/lib/Makefile
arch/ia64/lib/clear_page.S
arch/ia64/lib/clear_user.S
arch/ia64/lib/copy_page.S
arch/ia64/lib/copy_page_mck.S
arch/ia64/lib/copy_user.S
arch/ia64/lib/flush.S
arch/ia64/lib/idiv32.S
arch/ia64/lib/idiv64.S
arch/ia64/lib/ip_fast_csum.S
arch/ia64/lib/memcpy.S
arch/ia64/lib/memcpy_mck.S
arch/ia64/lib/memset.S
arch/ia64/lib/strlen.S
arch/ia64/lib/strlen_user.S
arch/ia64/lib/strncpy_from_user.S
arch/ia64/lib/strnlen_user.S
arch/ia64/lib/xor.S
arch/m32r/include/uapi/asm/socket.h
arch/m32r/kernel/ptrace.c
arch/m68k/include/asm/export.h [new file with mode: 0644]
arch/m68k/kernel/Makefile
arch/m68k/kernel/m68k_ksyms.c [deleted file]
arch/m68k/lib/ashldi3.c
arch/m68k/lib/ashrdi3.c
arch/m68k/lib/divsi3.S
arch/m68k/lib/lshrdi3.c
arch/m68k/lib/modsi3.S
arch/m68k/lib/muldi3.c
arch/m68k/lib/mulsi3.S
arch/m68k/lib/udivsi3.S
arch/m68k/lib/umodsi3.S
arch/metag/include/asm/atomic.h
arch/mips/Kbuild.platforms
arch/mips/Kconfig
arch/mips/Makefile
arch/mips/alchemy/common/setup.c
arch/mips/bcm47xx/serial.c
arch/mips/bcm63xx/clk.c
arch/mips/bmips/Kconfig
arch/mips/bmips/setup.c
arch/mips/boot/Makefile
arch/mips/boot/dts/brcm/Makefile
arch/mips/boot/dts/brcm/bcm3368-netgear-cvg834g.dts [new file with mode: 0644]
arch/mips/boot/dts/brcm/bcm3368.dtsi [new file with mode: 0644]
arch/mips/boot/dts/brcm/bcm63268-comtrend-vr-3032u.dts [new file with mode: 0644]
arch/mips/boot/dts/brcm/bcm63268.dtsi [new file with mode: 0644]
arch/mips/boot/dts/brcm/bcm6358-neufbox4-sercomm.dts [new file with mode: 0644]
arch/mips/boot/dts/brcm/bcm6362-neufbox6-sercomm.dts [new file with mode: 0644]
arch/mips/boot/dts/brcm/bcm6362.dtsi [new file with mode: 0644]
arch/mips/boot/dts/brcm/bcm7125.dtsi
arch/mips/boot/dts/brcm/bcm7346.dtsi
arch/mips/boot/dts/brcm/bcm7358.dtsi
arch/mips/boot/dts/brcm/bcm7360.dtsi
arch/mips/boot/dts/brcm/bcm7362.dtsi
arch/mips/boot/dts/brcm/bcm7420.dtsi
arch/mips/boot/dts/brcm/bcm7425.dtsi
arch/mips/boot/dts/brcm/bcm7435.dtsi
arch/mips/boot/dts/brcm/bcm96358nb4ser.dts [deleted file]
arch/mips/boot/dts/brcm/bcm97125cbmb.dts
arch/mips/boot/dts/brcm/bcm97346dbsmb.dts
arch/mips/boot/dts/brcm/bcm97358svmb.dts
arch/mips/boot/dts/brcm/bcm97360svmb.dts
arch/mips/boot/dts/brcm/bcm97362svmb.dts
arch/mips/boot/dts/brcm/bcm97420c.dts
arch/mips/boot/dts/brcm/bcm97425svmb.dts
arch/mips/boot/dts/brcm/bcm97435svmb.dts
arch/mips/boot/dts/brcm/bcm97xxx-nand-cs1-bch24.dtsi [new file with mode: 0644]
arch/mips/boot/dts/brcm/bcm97xxx-nand-cs1-bch4.dtsi [new file with mode: 0644]
arch/mips/boot/dts/cavium-octeon/dlink_dsr-1000n.dts
arch/mips/boot/dts/cavium-octeon/dlink_dsr-500n-1000n.dtsi [new file with mode: 0644]
arch/mips/boot/dts/cavium-octeon/dlink_dsr-500n.dts [new file with mode: 0644]
arch/mips/boot/dts/mti/Makefile
arch/mips/boot/dts/mti/malta.dts
arch/mips/boot/dts/mti/sead3.dts
arch/mips/cavium-octeon/executive/cvmx-helper-board.c
arch/mips/cavium-octeon/executive/cvmx-helper-rgmii.c
arch/mips/cavium-octeon/executive/cvmx-helper-sgmii.c
arch/mips/cavium-octeon/executive/cvmx-helper-xaui.c
arch/mips/cavium-octeon/executive/cvmx-helper.c
arch/mips/cavium-octeon/setup.c
arch/mips/configs/generic/32r1.config [new file with mode: 0644]
arch/mips/configs/generic/32r2.config [new file with mode: 0644]
arch/mips/configs/generic/32r6.config [new file with mode: 0644]
arch/mips/configs/generic/64r1.config [new file with mode: 0644]
arch/mips/configs/generic/64r2.config [new file with mode: 0644]
arch/mips/configs/generic/64r6.config [new file with mode: 0644]
arch/mips/configs/generic/board-sead-3.config [new file with mode: 0644]
arch/mips/configs/generic/eb.config [new file with mode: 0644]
arch/mips/configs/generic/el.config [new file with mode: 0644]
arch/mips/configs/generic/micro32r2.config [new file with mode: 0644]
arch/mips/configs/generic_defconfig [new file with mode: 0644]
arch/mips/configs/loongson1c_defconfig [new file with mode: 0644]
arch/mips/configs/malta_defconfig
arch/mips/configs/malta_kvm_defconfig
arch/mips/configs/malta_kvm_guest_defconfig
arch/mips/configs/malta_qemu_32r6_defconfig
arch/mips/configs/maltaaprp_defconfig
arch/mips/configs/maltasmvp_defconfig
arch/mips/configs/maltasmvp_eva_defconfig
arch/mips/configs/maltaup_defconfig
arch/mips/configs/maltaup_xpa_defconfig
arch/mips/configs/pistachio_defconfig
arch/mips/configs/sead3_defconfig [deleted file]
arch/mips/configs/sead3micro_defconfig [deleted file]
arch/mips/generic/Kconfig [new file with mode: 0644]
arch/mips/generic/Makefile [new file with mode: 0644]
arch/mips/generic/Platform [new file with mode: 0644]
arch/mips/generic/board-sead3.c [new file with mode: 0644]
arch/mips/generic/init.c [new file with mode: 0644]
arch/mips/generic/irq.c [new file with mode: 0644]
arch/mips/generic/proc.c [new file with mode: 0644]
arch/mips/generic/vmlinux.its.S [new file with mode: 0644]
arch/mips/include/asm/addrspace.h
arch/mips/include/asm/barrier.h
arch/mips/include/asm/cacheflush.h
arch/mips/include/asm/cpu-type.h
arch/mips/include/asm/cpu.h
arch/mips/include/asm/device.h
arch/mips/include/asm/dma-coherence.h
arch/mips/include/asm/dma-mapping.h
arch/mips/include/asm/fpu_emulator.h
arch/mips/include/asm/i8259.h
arch/mips/include/asm/kvm_host.h
arch/mips/include/asm/mach-generic/dma-coherence.h
arch/mips/include/asm/mach-generic/floppy.h
arch/mips/include/asm/mach-generic/spaces.h
arch/mips/include/asm/mach-ip27/spaces.h
arch/mips/include/asm/mach-loongson32/irq.h
arch/mips/include/asm/mach-loongson32/loongson1.h
arch/mips/include/asm/mach-loongson32/platform.h
arch/mips/include/asm/mach-loongson32/regs-clk.h
arch/mips/include/asm/mach-loongson32/regs-mux.h
arch/mips/include/asm/mach-sead3/cpu-feature-overrides.h [deleted file]
arch/mips/include/asm/mach-sead3/irq.h [deleted file]
arch/mips/include/asm/mach-sead3/kernel-entry-init.h [deleted file]
arch/mips/include/asm/mach-sead3/war.h [deleted file]
arch/mips/include/asm/machine.h [new file with mode: 0644]
arch/mips/include/asm/mips-boards/sead3int.h [deleted file]
arch/mips/include/asm/mips-cm.h
arch/mips/include/asm/octeon/cvmx-helper-board.h
arch/mips/include/asm/octeon/cvmx-mdio.h [deleted file]
arch/mips/include/asm/pci.h
arch/mips/include/asm/pgalloc.h
arch/mips/include/asm/pm-cps.h
arch/mips/include/asm/ptrace.h
arch/mips/include/asm/smp.h
arch/mips/include/asm/switch_to.h
arch/mips/include/asm/uaccess.h
arch/mips/include/uapi/asm/socket.h
arch/mips/include/uapi/asm/unistd.h
arch/mips/kernel/binfmt_elfn32.c
arch/mips/kernel/binfmt_elfo32.c
arch/mips/kernel/branch.c
arch/mips/kernel/kprobes.c
arch/mips/kernel/linux32.c
arch/mips/kernel/mips-cpc.c
arch/mips/kernel/mips-r2-to-r6-emul.c
arch/mips/kernel/module.c
arch/mips/kernel/pm-cps.c
arch/mips/kernel/probes-common.h [new file with mode: 0644]
arch/mips/kernel/proc.c
arch/mips/kernel/ptrace.c
arch/mips/kernel/ptrace32.c
arch/mips/kernel/r2300_fpu.S
arch/mips/kernel/r6000_fpu.S
arch/mips/kernel/relocate.c
arch/mips/kernel/scall32-o32.S
arch/mips/kernel/scall64-64.S
arch/mips/kernel/scall64-n32.S
arch/mips/kernel/scall64-o32.S
arch/mips/kernel/setup.c
arch/mips/kernel/smp-gic.c [deleted file]
arch/mips/kernel/smp-mt.c
arch/mips/kernel/smp.c
arch/mips/kernel/traps.c
arch/mips/kernel/uprobes.c
arch/mips/kvm/commpage.c
arch/mips/kvm/dyntrans.c
arch/mips/kvm/emulate.c
arch/mips/kvm/interrupt.c
arch/mips/kvm/mips.c
arch/mips/kvm/mmu.c
arch/mips/kvm/trap_emul.c
arch/mips/lantiq/xway/vmmc.c
arch/mips/lantiq/xway/xrx200_phy_fw.c
arch/mips/lib/ashldi3.c
arch/mips/lib/ashrdi3.c
arch/mips/lib/bswapdi.c
arch/mips/lib/bswapsi.c
arch/mips/lib/cmpdi2.c
arch/mips/lib/delay.c
arch/mips/lib/dump_tlb.c
arch/mips/lib/iomap-pci.c
arch/mips/lib/iomap.c
arch/mips/lib/lshrdi3.c
arch/mips/lib/r3k_dump_tlb.c
arch/mips/lib/ucmpdi2.c
arch/mips/loongson32/Kconfig
arch/mips/loongson32/Makefile
arch/mips/loongson32/Platform
arch/mips/loongson32/common/irq.c
arch/mips/loongson32/common/platform.c
arch/mips/loongson32/common/setup.c
arch/mips/loongson32/ls1c/Makefile [new file with mode: 0644]
arch/mips/loongson32/ls1c/board.c [new file with mode: 0644]
arch/mips/mm/c-octeon.c
arch/mips/mm/c-r3k.c
arch/mips/mm/c-r4k.c
arch/mips/mm/c-tx39.c
arch/mips/mm/cache.c
arch/mips/mm/dma-default.c
arch/mips/mm/extable.c
arch/mips/mm/fault.c
arch/mips/mm/gup.c
arch/mips/mm/highmem.c
arch/mips/mm/init.c
arch/mips/mm/ioremap.c
arch/mips/mm/mmap.c
arch/mips/mm/page.c
arch/mips/mm/tlb-r4k.c
arch/mips/mti-malta/malta-dt.c
arch/mips/mti-malta/malta-dtshim.c
arch/mips/mti-malta/malta-init.c
arch/mips/mti-malta/malta-int.c
arch/mips/mti-malta/malta-platform.c
arch/mips/mti-malta/malta-reset.c
arch/mips/mti-malta/malta-setup.c
arch/mips/mti-sead3/Makefile [deleted file]
arch/mips/mti-sead3/Platform [deleted file]
arch/mips/mti-sead3/sead3-console.c [deleted file]
arch/mips/mti-sead3/sead3-display.c [deleted file]
arch/mips/mti-sead3/sead3-init.c [deleted file]
arch/mips/mti-sead3/sead3-int.c [deleted file]
arch/mips/mti-sead3/sead3-lcd.c [deleted file]
arch/mips/mti-sead3/sead3-platform.c [deleted file]
arch/mips/mti-sead3/sead3-reset.c [deleted file]
arch/mips/mti-sead3/sead3-setup.c [deleted file]
arch/mips/mti-sead3/sead3-time.c [deleted file]
arch/mips/pci/Makefile
arch/mips/pci/pci-alchemy.c
arch/mips/pci/pci-ar71xx.c
arch/mips/pci/pci-ar724x.c
arch/mips/pci/pci-generic.c [new file with mode: 0644]
arch/mips/pci/pci-lantiq.c
arch/mips/pci/pci-legacy.c [new file with mode: 0644]
arch/mips/pci/pci-mt7620.c
arch/mips/pci/pci-octeon.c
arch/mips/pci/pci-rt2880.c
arch/mips/pci/pci-rt3883.c
arch/mips/pci/pci.c
arch/mips/pci/pcie-octeon.c
arch/mips/pnx833x/common/platform.c
arch/mips/ralink/timer.c
arch/mips/txx9/Kconfig
arch/mips/txx9/generic/pci.c
arch/mips/txx9/generic/setup.c
arch/mips/txx9/generic/setup_tx3927.c
arch/mips/txx9/generic/setup_tx4927.c
arch/mips/txx9/generic/setup_tx4938.c
arch/mips/txx9/jmr3927/setup.c
arch/mips/txx9/rbtx4927/setup.c
arch/mips/txx9/rbtx4938/setup.c
arch/mips/vdso/Makefile
arch/mn10300/include/uapi/asm/socket.h
arch/nios2/kernel/time.c
arch/openrisc/include/asm/cache.h
arch/parisc/Kconfig
arch/parisc/include/uapi/asm/socket.h
arch/parisc/include/uapi/asm/unistd.h
arch/parisc/kernel/cache.c
arch/parisc/kernel/drivers.c
arch/parisc/kernel/inventory.c
arch/parisc/kernel/pacache.S
arch/parisc/kernel/pci-dma.c
arch/parisc/kernel/setup.c
arch/parisc/kernel/syscall.S
arch/parisc/kernel/time.c
arch/powerpc/Makefile
arch/powerpc/boot/main.c
arch/powerpc/boot/opal-calls.S
arch/powerpc/boot/opal.c
arch/powerpc/boot/ops.h
arch/powerpc/configs/dpaa.config [new file with mode: 0644]
arch/powerpc/include/asm/Kbuild
arch/powerpc/include/asm/asm-prototypes.h
arch/powerpc/include/asm/checksum.h
arch/powerpc/include/asm/cpuidle.h
arch/powerpc/include/asm/cputable.h
arch/powerpc/include/asm/exception-64s.h
arch/powerpc/include/asm/hw_irq.h
arch/powerpc/include/asm/libata-portmap.h
arch/powerpc/include/asm/mmu.h
arch/powerpc/include/asm/ppc-opcode.h
arch/powerpc/include/asm/reg.h
arch/powerpc/include/asm/reg_8xx.h
arch/powerpc/include/asm/tlb.h
arch/powerpc/include/asm/unistd.h
arch/powerpc/include/uapi/asm/socket.h
arch/powerpc/kernel/Makefile
arch/powerpc/kernel/cpu_setup_power.S
arch/powerpc/kernel/cputable.c
arch/powerpc/kernel/entry_32.S
arch/powerpc/kernel/entry_64.S
arch/powerpc/kernel/epapr_hcalls.S
arch/powerpc/kernel/exceptions-64s.S
arch/powerpc/kernel/fpu.S
arch/powerpc/kernel/head_32.S
arch/powerpc/kernel/head_40x.S
arch/powerpc/kernel/head_44x.S
arch/powerpc/kernel/head_64.S
arch/powerpc/kernel/head_8xx.S
arch/powerpc/kernel/head_fsl_booke.S
arch/powerpc/kernel/hw_breakpoint.c
arch/powerpc/kernel/idle_book3s.S
arch/powerpc/kernel/misc.S
arch/powerpc/kernel/misc_32.S
arch/powerpc/kernel/misc_64.S
arch/powerpc/kernel/pci-common.c
arch/powerpc/kernel/pci_32.c
arch/powerpc/kernel/ppc_ksyms.c [deleted file]
arch/powerpc/kernel/ppc_ksyms_32.c [deleted file]
arch/powerpc/kernel/process.c
arch/powerpc/kernel/ptrace32.c
arch/powerpc/kernel/setup-common.c
arch/powerpc/kernel/setup_32.c
arch/powerpc/kernel/setup_64.c
arch/powerpc/kernel/time.c
arch/powerpc/kernel/traps.c
arch/powerpc/kernel/vector.S
arch/powerpc/kvm/book3s_hv_rm_xics.c
arch/powerpc/lib/Makefile
arch/powerpc/lib/checksum_32.S
arch/powerpc/lib/checksum_64.S
arch/powerpc/lib/copy_32.S
arch/powerpc/lib/copypage_64.S
arch/powerpc/lib/copyuser_64.S
arch/powerpc/lib/hweight_64.S
arch/powerpc/lib/mem_64.S
arch/powerpc/lib/memcmp_64.S
arch/powerpc/lib/memcpy_64.S
arch/powerpc/lib/ppc_ksyms.c [deleted file]
arch/powerpc/lib/string.S
arch/powerpc/lib/string_64.S
arch/powerpc/mm/copro_fault.c
arch/powerpc/mm/hash_low_32.S
arch/powerpc/mm/hash_utils_64.c
arch/powerpc/mm/numa.c
arch/powerpc/mm/pgtable-radix.c
arch/powerpc/mm/tlb-radix.c
arch/powerpc/platforms/82xx/Kconfig
arch/powerpc/platforms/82xx/ep8248e.c
arch/powerpc/platforms/83xx/asp834x.c
arch/powerpc/platforms/83xx/km83xx.c
arch/powerpc/platforms/83xx/misc.c
arch/powerpc/platforms/83xx/mpc830x_rdb.c
arch/powerpc/platforms/83xx/mpc831x_rdb.c
arch/powerpc/platforms/83xx/mpc832x_mds.c
arch/powerpc/platforms/83xx/mpc832x_rdb.c
arch/powerpc/platforms/83xx/mpc834x_itx.c
arch/powerpc/platforms/83xx/mpc834x_mds.c
arch/powerpc/platforms/83xx/mpc836x_mds.c
arch/powerpc/platforms/83xx/mpc836x_rdk.c
arch/powerpc/platforms/83xx/mpc837x_mds.c
arch/powerpc/platforms/83xx/mpc837x_rdb.c
arch/powerpc/platforms/83xx/mpc83xx.h
arch/powerpc/platforms/83xx/sbc834x.c
arch/powerpc/platforms/85xx/Kconfig
arch/powerpc/platforms/85xx/bsc913x_qds.c
arch/powerpc/platforms/85xx/bsc913x_rdb.c
arch/powerpc/platforms/85xx/c293pcie.c
arch/powerpc/platforms/85xx/corenet_generic.c
arch/powerpc/platforms/85xx/ge_imp3a.c
arch/powerpc/platforms/85xx/mpc8536_ds.c
arch/powerpc/platforms/85xx/mpc85xx_ads.c
arch/powerpc/platforms/85xx/mpc85xx_cds.c
arch/powerpc/platforms/85xx/mpc85xx_ds.c
arch/powerpc/platforms/85xx/mpc85xx_mds.c
arch/powerpc/platforms/85xx/mpc85xx_rdb.c
arch/powerpc/platforms/85xx/mvme2500.c
arch/powerpc/platforms/85xx/p1010rdb.c
arch/powerpc/platforms/85xx/p1022_ds.c
arch/powerpc/platforms/85xx/p1022_rdk.c
arch/powerpc/platforms/85xx/p1023_rdb.c
arch/powerpc/platforms/85xx/ppa8548.c
arch/powerpc/platforms/85xx/qemu_e500.c
arch/powerpc/platforms/85xx/sbc8548.c
arch/powerpc/platforms/85xx/sgy_cts1000.c
arch/powerpc/platforms/85xx/socrates.c
arch/powerpc/platforms/85xx/stx_gp3.c
arch/powerpc/platforms/85xx/tqm85xx.c
arch/powerpc/platforms/85xx/twr_p102x.c
arch/powerpc/platforms/85xx/xes_mpc85xx.c
arch/powerpc/platforms/86xx/gef_ppc9a.c
arch/powerpc/platforms/86xx/gef_sbc310.c
arch/powerpc/platforms/86xx/gef_sbc610.c
arch/powerpc/platforms/86xx/mpc8610_hpcd.c
arch/powerpc/platforms/86xx/mpc86xx_hpcn.c
arch/powerpc/platforms/86xx/mvme7100.c
arch/powerpc/platforms/86xx/sbc8641d.c
arch/powerpc/platforms/pseries/lpar.c
arch/powerpc/relocs_check.sh
arch/powerpc/sysdev/cpm1.c
arch/powerpc/sysdev/cpm2.c
arch/powerpc/sysdev/cpm_common.c
arch/powerpc/sysdev/dcr-low.S
arch/powerpc/sysdev/fsl_pci.c
arch/powerpc/sysdev/fsl_soc.c
arch/powerpc/sysdev/fsl_soc.h
arch/powerpc/sysdev/mpic.c
arch/s390/hypfs/hypfs_diag.c
arch/s390/include/asm/Kbuild
arch/s390/include/asm/ftrace.h
arch/s390/include/asm/processor.h
arch/s390/include/asm/unistd.h
arch/s390/include/uapi/asm/socket.h
arch/s390/kernel/Makefile
arch/s390/kernel/dis.c
arch/s390/kernel/dumpstack.c
arch/s390/kernel/entry.S
arch/s390/kernel/mcount.S
arch/s390/kernel/perf_event.c
arch/s390/kernel/s390_ksyms.c [deleted file]
arch/s390/kernel/stacktrace.c
arch/s390/kernel/vmlinux.lds.S
arch/s390/kvm/intercept.c
arch/s390/kvm/sthyi.c
arch/s390/lib/mem.S
arch/s390/mm/gup.c
arch/s390/mm/hugetlbpage.c
arch/s390/mm/init.c
arch/s390/oprofile/init.c
arch/s390/pci/pci_dma.c
arch/score/kernel/ptrace.c
arch/score/kernel/traps.c
arch/sh/Makefile
arch/sh/boards/Kconfig
arch/sh/configs/j2_defconfig
arch/sh/mm/gup.c
arch/sparc/Kconfig
arch/sparc/include/asm/Kbuild
arch/sparc/include/asm/cpudata_64.h
arch/sparc/include/asm/hypervisor.h
arch/sparc/include/asm/iommu_64.h
arch/sparc/include/asm/spinlock_32.h
arch/sparc/include/asm/spinlock_64.h
arch/sparc/include/asm/string.h
arch/sparc/include/asm/string_32.h
arch/sparc/include/asm/string_64.h
arch/sparc/include/asm/topology_64.h
arch/sparc/include/asm/uaccess_64.h
arch/sparc/include/uapi/asm/socket.h
arch/sparc/kernel/Makefile
arch/sparc/kernel/entry.S
arch/sparc/kernel/head_32.S
arch/sparc/kernel/head_64.S
arch/sparc/kernel/helpers.S
arch/sparc/kernel/hvapi.c
arch/sparc/kernel/hvcalls.S
arch/sparc/kernel/iommu.c
arch/sparc/kernel/iommu_common.h
arch/sparc/kernel/jump_label.c
arch/sparc/kernel/mdesc.c
arch/sparc/kernel/pci_sun4v.c
arch/sparc/kernel/pci_sun4v.h
arch/sparc/kernel/pci_sun4v_asm.S
arch/sparc/kernel/ptrace_64.c
arch/sparc/kernel/signal_32.c
arch/sparc/kernel/smp_64.c
arch/sparc/kernel/sparc_ksyms.c [new file with mode: 0644]
arch/sparc/kernel/sparc_ksyms_32.c [deleted file]
arch/sparc/kernel/sparc_ksyms_64.c [deleted file]
arch/sparc/lib/GENcopy_from_user.S
arch/sparc/lib/GENcopy_to_user.S
arch/sparc/lib/GENmemcpy.S
arch/sparc/lib/Makefile
arch/sparc/lib/NG2copy_from_user.S
arch/sparc/lib/NG2copy_to_user.S
arch/sparc/lib/NG2memcpy.S
arch/sparc/lib/NG4copy_from_user.S
arch/sparc/lib/NG4copy_to_user.S
arch/sparc/lib/NG4memcpy.S
arch/sparc/lib/NGcopy_from_user.S
arch/sparc/lib/NGcopy_to_user.S
arch/sparc/lib/NGmemcpy.S
arch/sparc/lib/U1copy_from_user.S
arch/sparc/lib/U1copy_to_user.S
arch/sparc/lib/U1memcpy.S
arch/sparc/lib/U3copy_from_user.S
arch/sparc/lib/U3copy_to_user.S
arch/sparc/lib/U3memcpy.S
arch/sparc/lib/VISsave.S
arch/sparc/lib/ashldi3.S
arch/sparc/lib/ashrdi3.S
arch/sparc/lib/atomic_64.S
arch/sparc/lib/bitops.S
arch/sparc/lib/blockops.S
arch/sparc/lib/bzero.S
arch/sparc/lib/checksum_32.S
arch/sparc/lib/checksum_64.S
arch/sparc/lib/clear_page.S
arch/sparc/lib/copy_in_user.S
arch/sparc/lib/copy_page.S
arch/sparc/lib/copy_user.S
arch/sparc/lib/csum_copy.S
arch/sparc/lib/divdi3.S
arch/sparc/lib/ffs.S
arch/sparc/lib/hweight.S
arch/sparc/lib/ipcsum.S
arch/sparc/lib/ksyms.c [deleted file]
arch/sparc/lib/locks.S
arch/sparc/lib/lshrdi3.S
arch/sparc/lib/mcount.S
arch/sparc/lib/memcmp.S
arch/sparc/lib/memcpy.S
arch/sparc/lib/memmove.S
arch/sparc/lib/memscan_32.S
arch/sparc/lib/memscan_64.S
arch/sparc/lib/memset.S
arch/sparc/lib/muldi3.S
arch/sparc/lib/strlen.S
arch/sparc/lib/strncmp_32.S
arch/sparc/lib/strncmp_64.S
arch/sparc/lib/user_fixup.c [deleted file]
arch/sparc/lib/xor.S
arch/sparc/mm/gup.c
arch/sparc/mm/init_64.c
arch/sparc/mm/tsb.c
arch/sparc/mm/ultra.S
arch/tile/include/asm/cache.h
arch/tile/kernel/time.c
arch/x86/boot/compressed/Makefile
arch/x86/boot/cpu.c
arch/x86/crypto/aesni-intel_glue.c
arch/x86/entry/Makefile
arch/x86/entry/entry_32.S
arch/x86/entry/entry_64.S
arch/x86/entry/syscalls/syscall_32.tbl
arch/x86/entry/syscalls/syscall_64.tbl
arch/x86/entry/thunk_32.S
arch/x86/entry/thunk_64.S
arch/x86/events/amd/core.c
arch/x86/events/core.c
arch/x86/events/intel/core.c
arch/x86/events/intel/cstate.c
arch/x86/events/intel/ds.c
arch/x86/events/intel/lbr.c
arch/x86/events/intel/rapl.c
arch/x86/events/intel/uncore.c
arch/x86/events/intel/uncore_snb.c
arch/x86/events/perf_event.h
arch/x86/include/asm/cpufeatures.h
arch/x86/include/asm/export.h [new file with mode: 0644]
arch/x86/include/asm/intel-family.h
arch/x86/include/asm/intel-mid.h
arch/x86/include/asm/io.h
arch/x86/include/asm/kvm_host.h
arch/x86/include/asm/msr-index.h
arch/x86/include/asm/percpu.h
arch/x86/include/asm/rwsem.h
arch/x86/include/asm/thread_info.h
arch/x86/kernel/Makefile
arch/x86/kernel/acpi/boot.c
arch/x86/kernel/apm_32.c
arch/x86/kernel/cpu/amd.c
arch/x86/kernel/cpu/common.c
arch/x86/kernel/cpu/microcode/amd.c
arch/x86/kernel/cpu/scattered.c
arch/x86/kernel/cpu/vmware.c
arch/x86/kernel/dumpstack.c
arch/x86/kernel/e820.c
arch/x86/kernel/fpu/core.c
arch/x86/kernel/fpu/xstate.c
arch/x86/kernel/head_32.S
arch/x86/kernel/head_64.S
arch/x86/kernel/i386_ksyms_32.c [deleted file]
arch/x86/kernel/kprobes/core.c
arch/x86/kernel/mcount_64.S
arch/x86/kernel/quirks.c
arch/x86/kernel/setup.c
arch/x86/kernel/signal_compat.c
arch/x86/kernel/smp.c
arch/x86/kernel/smpboot.c
arch/x86/kernel/step.c
arch/x86/kernel/sysfb_simplefb.c
arch/x86/kernel/unwind_guess.c
arch/x86/kernel/x8664_ksyms_64.c [deleted file]
arch/x86/kvm/emulate.c
arch/x86/kvm/ioapic.c
arch/x86/kvm/ioapic.h
arch/x86/kvm/irq_comm.c
arch/x86/kvm/lapic.c
arch/x86/kvm/svm.c
arch/x86/kvm/vmx.c
arch/x86/kvm/x86.c
arch/x86/lib/checksum_32.S
arch/x86/lib/clear_page_64.S
arch/x86/lib/cmpxchg8b_emu.S
arch/x86/lib/copy_page_64.S
arch/x86/lib/copy_user_64.S
arch/x86/lib/csum-partial_64.c
arch/x86/lib/getuser.S
arch/x86/lib/hweight.S
arch/x86/lib/memcpy_64.S
arch/x86/lib/memmove_64.S
arch/x86/lib/memset_64.S
arch/x86/lib/putuser.S
arch/x86/lib/strstr_32.c
arch/x86/mm/extable.c
arch/x86/mm/gup.c
arch/x86/mm/kaslr.c
arch/x86/mm/mpx.c
arch/x86/mm/pat.c
arch/x86/platform/efi/efi.c
arch/x86/platform/efi/efi_64.c
arch/x86/platform/intel-mid/device_libs/Makefile
arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c [new file with mode: 0644]
arch/x86/platform/intel-mid/device_libs/platform_wdt.c [deleted file]
arch/x86/platform/intel-mid/pwr.c
arch/x86/platform/uv/bios_uv.c
arch/x86/purgatory/Makefile
arch/x86/um/Makefile
arch/x86/um/checksum_32.S
arch/x86/um/ksyms.c [deleted file]
arch/x86/um/ptrace_32.c
arch/x86/um/ptrace_64.c
arch/x86/xen/enlighten.c
arch/xtensa/include/uapi/asm/socket.h
arch/xtensa/include/uapi/asm/unistd.h
arch/xtensa/kernel/time.c
arch/xtensa/kernel/traps.c
block/badblocks.c
block/blk-cgroup.c
block/blk-flush.c
block/blk-mq.c
block/blk-softirq.c
crypto/algif_aead.c
crypto/algif_hash.c
crypto/algif_skcipher.c
crypto/asymmetric_keys/x509_cert_parser.c
crypto/scatterwalk.c
drivers/Makefile
drivers/acpi/acpi_apd.c
drivers/acpi/acpi_lpss.c
drivers/acpi/acpi_pad.c
drivers/acpi/acpi_platform.c
drivers/acpi/acpica/dsinit.c
drivers/acpi/acpica/dsmethod.c
drivers/acpi/acpica/dswload2.c
drivers/acpi/acpica/evrgnini.c
drivers/acpi/acpica/nsload.c
drivers/acpi/acpica/tbfadt.c
drivers/acpi/apei/ghes.c
drivers/acpi/dptf/int340x_thermal.c
drivers/acpi/ec.c
drivers/acpi/event.c
drivers/acpi/fan.c
drivers/acpi/osl.c
drivers/acpi/pci_link.c
drivers/acpi/property.c
drivers/acpi/scan.c
drivers/acpi/sleep.c
drivers/android/binder.c
drivers/ata/ahci.c
drivers/ata/ahci.h
drivers/ata/ahci_qoriq.c
drivers/ata/ahci_st.c
drivers/ata/libahci.c
drivers/ata/libata-scsi.c
drivers/ata/pata_at91.c
drivers/ata/pata_octeon_cf.c
drivers/ata/sata_mv.c
drivers/atm/solos-pci.c
drivers/auxdisplay/Kconfig
drivers/auxdisplay/Makefile
drivers/auxdisplay/img-ascii-lcd.c [new file with mode: 0644]
drivers/base/Kconfig
drivers/base/dd.c
drivers/base/devres.c
drivers/base/power/main.c
drivers/block/DAC960.c
drivers/block/aoe/aoecmd.c
drivers/block/drbd/drbd_main.c
drivers/block/nbd.c
drivers/block/rbd.c
drivers/block/virtio_blk.c
drivers/bluetooth/btwilink.c
drivers/bluetooth/hci_bcm.c
drivers/bus/Kconfig
drivers/char/hw_random/core.c
drivers/char/ipmi/Kconfig
drivers/char/ipmi/Makefile
drivers/char/ipmi/bt-bmc.c [new file with mode: 0644]
drivers/char/ipmi/ipmi_msghandler.c
drivers/char/ppdev.c
drivers/char/random.c
drivers/char/tpm/tpm-interface.c
drivers/char/virtio_console.c
drivers/clk/at91/clk-programmable.c
drivers/clk/bcm/clk-bcm2835.c
drivers/clk/berlin/bg2.c
drivers/clk/berlin/bg2q.c
drivers/clk/clk-efm32gg.c
drivers/clk/clk-max77686.c
drivers/clk/clk-qoriq.c
drivers/clk/clk-xgene.c
drivers/clk/hisilicon/clk-hi6220.c
drivers/clk/imx/clk-pllv3.c
drivers/clk/mediatek/Kconfig
drivers/clk/mmp/clk-of-mmp2.c
drivers/clk/mmp/clk-of-pxa168.c
drivers/clk/mmp/clk-of-pxa910.c
drivers/clk/mvebu/armada-37xx-periph.c
drivers/clk/rockchip/clk-ddr.c
drivers/clk/samsung/clk-exynos-audss.c
drivers/clk/samsung/clk-exynos-clkout.c
drivers/clk/sunxi-ng/ccu-sun6i-a31.c
drivers/clk/sunxi/clk-sunxi.c
drivers/clk/uniphier/clk-uniphier-core.c
drivers/clk/uniphier/clk-uniphier-mio.c
drivers/clk/uniphier/clk-uniphier-mux.c
drivers/clk/uniphier/clk-uniphier.h
drivers/clocksource/Kconfig
drivers/clocksource/Makefile
drivers/clocksource/jcore-pit.c [new file with mode: 0644]
drivers/clocksource/timer-sun5i.c
drivers/cpufreq/cppc_cpufreq.c
drivers/cpufreq/cpufreq_conservative.c
drivers/cpufreq/intel_pstate.c
drivers/cpuidle/Kconfig.mips
drivers/cpuidle/cpuidle-cps.c
drivers/crypto/caam/caamalg.c
drivers/crypto/chelsio/chcr_algo.c
drivers/crypto/chelsio/chcr_core.c
drivers/dax/Kconfig
drivers/dax/dax.c
drivers/dax/pmem.c
drivers/devfreq/devfreq.c
drivers/devfreq/event/Kconfig
drivers/devfreq/event/exynos-nocp.c
drivers/dma/Kconfig
drivers/dma/cppi41.c
drivers/dma/edma.c
drivers/dma/sun6i-dma.c
drivers/extcon/extcon-qcom-spmi-misc.c
drivers/firewire/net.c
drivers/firewire/nosy.c
drivers/firmware/efi/libstub/Makefile
drivers/gpio/Kconfig
drivers/gpio/Makefile
drivers/gpio/gpio-ath79.c
drivers/gpio/gpio-mpc8xxx.c
drivers/gpio/gpio-mvebu.c
drivers/gpio/gpio-mxs.c
drivers/gpio/gpio-pca953x.c
drivers/gpio/gpio-stmpe.c
drivers/gpio/gpio-tc3589x.c
drivers/gpio/gpio-ts4800.c
drivers/gpio/gpiolib-acpi.c
drivers/gpio/gpiolib-of.c
drivers/gpio/gpiolib.c
drivers/gpu/drm/amd/amdgpu/amdgpu.h
drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
drivers/gpu/drm/amd/amdgpu/ci_dpm.c
drivers/gpu/drm/amd/amdgpu/cz_dpm.c
drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
drivers/gpu/drm/amd/amdgpu/kv_dpm.c
drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
drivers/gpu/drm/amd/amdgpu/si_dpm.c
drivers/gpu/drm/amd/amdgpu/tonga_ih.c
drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
drivers/gpu/drm/amd/amdgpu/vi.c
drivers/gpu/drm/amd/include/amd_shared.h
drivers/gpu/drm/amd/powerplay/eventmgr/eventactionchains.c
drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c
drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c
drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c
drivers/gpu/drm/amd/powerplay/hwmgr/process_pptables_v1_0.c
drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
drivers/gpu/drm/amd/powerplay/hwmgr/smu7_thermal.c
drivers/gpu/drm/amd/powerplay/smumgr/iceland_smc.c
drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
drivers/gpu/drm/amd/scheduler/sched_fence.c
drivers/gpu/drm/arc/arcpgu_hdmi.c
drivers/gpu/drm/arm/hdlcd_crtc.c
drivers/gpu/drm/armada/armada_crtc.c
drivers/gpu/drm/ast/ast_ttm.c
drivers/gpu/drm/cirrus/cirrus_ttm.c
drivers/gpu/drm/drm_atomic.c
drivers/gpu/drm/drm_atomic_helper.c
drivers/gpu/drm/drm_dp_mst_topology.c
drivers/gpu/drm/drm_fb_helper.c
drivers/gpu/drm/drm_info.c
drivers/gpu/drm/etnaviv/etnaviv_buffer.c
drivers/gpu/drm/etnaviv/etnaviv_gem.c
drivers/gpu/drm/etnaviv/etnaviv_mmu.c
drivers/gpu/drm/exynos/exynos_drm_drv.c
drivers/gpu/drm/exynos/exynos_drm_drv.h
drivers/gpu/drm/exynos/exynos_drm_fb.c
drivers/gpu/drm/exynos/exynos_drm_g2d.c
drivers/gpu/drm/exynos/exynos_hdmi.c
drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c
drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c
drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_plane.c
drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c
drivers/gpu/drm/i915/i915_drv.c
drivers/gpu/drm/i915/i915_drv.h
drivers/gpu/drm/i915/i915_gem.c
drivers/gpu/drm/i915/i915_gem_execbuffer.c
drivers/gpu/drm/i915/i915_gem_fence.c
drivers/gpu/drm/i915/i915_gem_userptr.c
drivers/gpu/drm/i915/i915_pci.c
drivers/gpu/drm/i915/intel_bios.c
drivers/gpu/drm/i915/intel_device_info.c
drivers/gpu/drm/i915/intel_display.c
drivers/gpu/drm/i915/intel_dp.c
drivers/gpu/drm/i915/intel_fbc.c
drivers/gpu/drm/i915/intel_hdmi.c
drivers/gpu/drm/i915/intel_pm.c
drivers/gpu/drm/i915/intel_runtime_pm.c
drivers/gpu/drm/i915/intel_sprite.c
drivers/gpu/drm/i915/intel_vbt_defs.h
drivers/gpu/drm/imx/imx-drm-core.c
drivers/gpu/drm/imx/ipuv3-crtc.c
drivers/gpu/drm/imx/ipuv3-plane.c
drivers/gpu/drm/mediatek/mtk_disp_ovl.c
drivers/gpu/drm/mediatek/mtk_dpi.c
drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c
drivers/gpu/drm/mediatek/mtk_dsi.c
drivers/gpu/drm/mediatek/mtk_hdmi.c
drivers/gpu/drm/mediatek/mtk_mt8173_hdmi_phy.c
drivers/gpu/drm/mgag200/mgag200_ttm.c
drivers/gpu/drm/msm/dsi/dsi_host.c
drivers/gpu/drm/msm/dsi/pll/dsi_pll_28nm.c
drivers/gpu/drm/msm/dsi/pll/dsi_pll_28nm_8960.c
drivers/gpu/drm/msm/hdmi/hdmi_phy_8996.c
drivers/gpu/drm/msm/hdmi/hdmi_pll_8960.c
drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.c
drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c
drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c
drivers/gpu/drm/msm/msm_drv.c
drivers/gpu/drm/msm/msm_gem_shrinker.c
drivers/gpu/drm/nouveau/nouveau_acpi.c
drivers/gpu/drm/nouveau/nouveau_ttm.c
drivers/gpu/drm/radeon/ni.c
drivers/gpu/drm/radeon/r600_dpm.c
drivers/gpu/drm/radeon/radeon_atpx_handler.c
drivers/gpu/drm/radeon/radeon_connectors.c
drivers/gpu/drm/radeon/radeon_device.c
drivers/gpu/drm/radeon/radeon_display.c
drivers/gpu/drm/radeon/radeon_dp_auxch.c
drivers/gpu/drm/radeon/radeon_drv.c
drivers/gpu/drm/radeon/radeon_i2c.c
drivers/gpu/drm/radeon/radeon_object.c
drivers/gpu/drm/radeon/radeon_ttm.c
drivers/gpu/drm/radeon/si.c
drivers/gpu/drm/radeon/si_dpm.c
drivers/gpu/drm/radeon/sid.h
drivers/gpu/drm/rcar-du/rcar_du_kms.c
drivers/gpu/drm/sti/sti_drv.c
drivers/gpu/drm/sun4i/sun4i_drv.c
drivers/gpu/drm/sun4i/sun4i_rgb.c
drivers/gpu/drm/udl/udl_main.c
drivers/gpu/drm/via/via_dmablit.c
drivers/gpu/drm/virtio/virtgpu_display.c
drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
drivers/gpu/ipu-v3/ipu-image-convert.c
drivers/hid/hid-cp2112.c
drivers/hid/hid-dr.c
drivers/hid/hid-ids.h
drivers/hid/hid-led.c
drivers/hid/hid-lg.c
drivers/hid/hid-magicmouse.c
drivers/hid/hid-rmi.c
drivers/hid/hid-sensor-custom.c
drivers/hid/hid-sensor-hub.c
drivers/hid/intel-ish-hid/ipc/ipc.c
drivers/hid/intel-ish-hid/ipc/pci-ish.c
drivers/hid/usbhid/hid-quirks.c
drivers/hv/hv_util.c
drivers/hv/vmbus_drv.c
drivers/hwmon/adm9240.c
drivers/hwmon/hwmon.c
drivers/hwmon/max31790.c
drivers/i2c/Kconfig
drivers/i2c/busses/Kconfig
drivers/i2c/busses/i2c-designware-core.c
drivers/i2c/busses/i2c-digicolor.c
drivers/i2c/busses/i2c-i801.c
drivers/i2c/busses/i2c-imx.c
drivers/i2c/busses/i2c-jz4780.c
drivers/i2c/busses/i2c-rk3x.c
drivers/i2c/busses/i2c-xgene-slimpro.c
drivers/i2c/busses/i2c-xlp9xx.c
drivers/i2c/busses/i2c-xlr.c
drivers/i2c/i2c-core.c
drivers/i2c/muxes/Kconfig
drivers/i2c/muxes/i2c-demux-pinctrl.c
drivers/i2c/muxes/i2c-mux-pca954x.c
drivers/iio/accel/st_accel_core.c
drivers/iio/adc/Kconfig
drivers/iio/chemical/atlas-ph-sensor.c
drivers/iio/common/hid-sensors/hid-sensor-attributes.c
drivers/iio/common/st_sensors/st_sensors_core.c
drivers/iio/orientation/hid-sensor-rotation.c
drivers/iio/temperature/maxim_thermocouple.c
drivers/infiniband/Kconfig
drivers/infiniband/core/addr.c
drivers/infiniband/core/cm.c
drivers/infiniband/core/cma.c
drivers/infiniband/core/umem.c
drivers/infiniband/core/umem_odp.c
drivers/infiniband/core/uverbs_main.c
drivers/infiniband/hw/Makefile
drivers/infiniband/hw/cxgb4/cq.c
drivers/infiniband/hw/cxgb4/device.c
drivers/infiniband/hw/cxgb4/iw_cxgb4.h
drivers/infiniband/hw/cxgb4/mem.c
drivers/infiniband/hw/cxgb4/qp.c
drivers/infiniband/hw/hfi1/affinity.c
drivers/infiniband/hw/hfi1/affinity.h
drivers/infiniband/hw/hfi1/chip.c
drivers/infiniband/hw/hfi1/chip.h
drivers/infiniband/hw/hfi1/driver.c
drivers/infiniband/hw/hfi1/file_ops.c
drivers/infiniband/hw/hfi1/hfi.h
drivers/infiniband/hw/hfi1/init.c
drivers/infiniband/hw/hfi1/pcie.c
drivers/infiniband/hw/hfi1/pio.c
drivers/infiniband/hw/hfi1/rc.c
drivers/infiniband/hw/hfi1/sdma.c
drivers/infiniband/hw/hfi1/sysfs.c
drivers/infiniband/hw/hfi1/trace_rx.h
drivers/infiniband/hw/hfi1/user_sdma.c
drivers/infiniband/hw/hns/hns_roce_cq.c
drivers/infiniband/hw/hns/hns_roce_device.h
drivers/infiniband/hw/hns/hns_roce_eq.c
drivers/infiniband/hw/hns/hns_roce_eq.h
drivers/infiniband/hw/hns/hns_roce_hem.c
drivers/infiniband/hw/hns/hns_roce_hem.h
drivers/infiniband/hw/hns/hns_roce_hw_v1.c
drivers/infiniband/hw/hns/hns_roce_hw_v1.h
drivers/infiniband/hw/hns/hns_roce_main.c
drivers/infiniband/hw/hns/hns_roce_mr.c
drivers/infiniband/hw/hns/hns_roce_pd.c
drivers/infiniband/hw/hns/hns_roce_qp.c
drivers/infiniband/hw/mlx4/ah.c
drivers/infiniband/hw/mlx4/cq.c
drivers/infiniband/hw/mlx5/cq.c
drivers/infiniband/hw/mlx5/main.c
drivers/infiniband/hw/mlx5/mlx5_ib.h
drivers/infiniband/hw/mlx5/mr.c
drivers/infiniband/hw/mlx5/qp.c
drivers/infiniband/hw/mthca/mthca_memfree.c
drivers/infiniband/hw/qedr/Kconfig [new file with mode: 0644]
drivers/infiniband/hw/qedr/Makefile [new file with mode: 0644]
drivers/infiniband/hw/qedr/main.c [new file with mode: 0644]
drivers/infiniband/hw/qedr/qedr.h [new file with mode: 0644]
drivers/infiniband/hw/qedr/qedr_cm.c [new file with mode: 0644]
drivers/infiniband/hw/qedr/qedr_cm.h [new file with mode: 0644]
drivers/infiniband/hw/qedr/qedr_hsi.h [new file with mode: 0644]
drivers/infiniband/hw/qedr/qedr_hsi_rdma.h [new file with mode: 0644]
drivers/infiniband/hw/qedr/verbs.c [new file with mode: 0644]
drivers/infiniband/hw/qedr/verbs.h [new file with mode: 0644]
drivers/infiniband/hw/qib/qib_user_pages.c
drivers/infiniband/hw/usnic/usnic_uiom.c
drivers/infiniband/sw/rdmavt/dma.c
drivers/infiniband/sw/rxe/rxe_net.c
drivers/infiniband/sw/rxe/rxe_qp.c
drivers/infiniband/sw/rxe/rxe_queue.c
drivers/infiniband/sw/rxe/rxe_queue.h
drivers/infiniband/sw/rxe/rxe_req.c
drivers/infiniband/ulp/ipoib/ipoib.h
drivers/infiniband/ulp/ipoib/ipoib_cm.c
drivers/infiniband/ulp/ipoib/ipoib_ib.c
drivers/infiniband/ulp/ipoib/ipoib_main.c
drivers/infiniband/ulp/ipoib/ipoib_multicast.c
drivers/input/mouse/alps.c
drivers/input/mouse/alps.h
drivers/input/mouse/elantech.c
drivers/input/mouse/focaltech.c
drivers/input/rmi4/rmi_i2c.c
drivers/input/rmi4/rmi_spi.c
drivers/input/serio/i8042-io.h
drivers/input/serio/i8042-ip22io.h
drivers/input/serio/i8042-ppcio.h
drivers/input/serio/i8042-sparcio.h
drivers/input/serio/i8042-unicore32io.h
drivers/input/serio/i8042-x86ia64io.h
drivers/input/serio/i8042.c
drivers/input/touchscreen/melfas_mip4.c
drivers/iommu/arm-smmu-v3.c
drivers/iommu/arm-smmu.c
drivers/iommu/intel-iommu.c
drivers/ipack/ipack.c
drivers/irqchip/Kconfig
drivers/irqchip/irq-eznps.c
drivers/irqchip/irq-gic-v3-its.c
drivers/irqchip/irq-gic-v3.c
drivers/irqchip/irq-gic.c
drivers/irqchip/irq-i8259.c
drivers/irqchip/irq-jcore-aic.c
drivers/mailbox/pcc.c
drivers/md/dm-raid.c
drivers/md/dm-raid1.c
drivers/md/dm-rq.c
drivers/md/dm-table.c
drivers/md/dm.c
drivers/md/md.c
drivers/md/raid1.c
drivers/md/raid10.c
drivers/md/raid5-cache.c
drivers/media/dvb-frontends/Kconfig
drivers/media/dvb-frontends/Makefile
drivers/media/dvb-frontends/gp8psk-fe.c [new file with mode: 0644]
drivers/media/dvb-frontends/gp8psk-fe.h [new file with mode: 0644]
drivers/media/i2c/ir-kbd-i2c.c
drivers/media/pci/ivtv/ivtv-udma.c
drivers/media/pci/ivtv/ivtv-yuv.c
drivers/media/platform/omap/omap_vout.c
drivers/media/tuners/tuner-xc2028.c
drivers/media/usb/b2c2/flexcop-usb.c
drivers/media/usb/b2c2/flexcop-usb.h
drivers/media/usb/cpia2/cpia2_usb.c
drivers/media/usb/dvb-usb/Makefile
drivers/media/usb/dvb-usb/af9005.c
drivers/media/usb/dvb-usb/cinergyT2-core.c
drivers/media/usb/dvb-usb/cinergyT2-fe.c
drivers/media/usb/dvb-usb/cxusb.c
drivers/media/usb/dvb-usb/cxusb.h
drivers/media/usb/dvb-usb/dib0700_core.c
drivers/media/usb/dvb-usb/dib0700_devices.c
drivers/media/usb/dvb-usb/dibusb-common.c
drivers/media/usb/dvb-usb/dibusb.h
drivers/media/usb/dvb-usb/digitv.c
drivers/media/usb/dvb-usb/digitv.h
drivers/media/usb/dvb-usb/dtt200u-fe.c
drivers/media/usb/dvb-usb/dtt200u.c
drivers/media/usb/dvb-usb/dtv5100.c
drivers/media/usb/dvb-usb/dvb-usb-init.c
drivers/media/usb/dvb-usb/dvb-usb.h
drivers/media/usb/dvb-usb/dw2102.c
drivers/media/usb/dvb-usb/gp8psk-fe.c [deleted file]
drivers/media/usb/dvb-usb/gp8psk.c
drivers/media/usb/dvb-usb/gp8psk.h
drivers/media/usb/dvb-usb/nova-t-usb2.c
drivers/media/usb/dvb-usb/pctv452e.c
drivers/media/usb/dvb-usb/technisat-usb2.c
drivers/media/usb/s2255/s2255drv.c
drivers/media/usb/stkwebcam/stk-webcam.c
drivers/media/v4l2-core/Kconfig
drivers/media/v4l2-core/videobuf-dma-sg.c
drivers/media/v4l2-core/videobuf2-memops.c
drivers/memstick/host/rtsx_usb_ms.c
drivers/mfd/intel-lpss-pci.c
drivers/mfd/intel-lpss.c
drivers/mfd/intel_soc_pmic_bxtwc.c
drivers/mfd/mfd-core.c
drivers/mfd/stmpe.c
drivers/mfd/syscon.c
drivers/mfd/wm8994-core.c
drivers/misc/cxl/api.c
drivers/misc/cxl/context.c
drivers/misc/cxl/cxl.h
drivers/misc/cxl/file.c
drivers/misc/cxl/guest.c
drivers/misc/cxl/main.c
drivers/misc/cxl/pci.c
drivers/misc/cxl/sysfs.c
drivers/misc/genwqe/card_utils.c
drivers/misc/mei/bus-fixup.c
drivers/misc/mei/hw-txe.c
drivers/misc/mic/scif/scif_rma.c
drivers/misc/sgi-gru/grufault.c
drivers/misc/sgi-gru/grumain.c
drivers/misc/vmw_vmci/vmci_doorbell.c
drivers/misc/vmw_vmci/vmci_driver.c
drivers/mmc/card/block.c
drivers/mmc/card/mmc_test.c
drivers/mmc/card/queue.h
drivers/mmc/core/mmc.c
drivers/mmc/host/dw_mmc-pltfm.c
drivers/mmc/host/dw_mmc.c
drivers/mmc/host/mxs-mmc.c
drivers/mmc/host/rtsx_usb_sdmmc.c
drivers/mmc/host/sdhci-esdhc-imx.c
drivers/mmc/host/sdhci-msm.c
drivers/mmc/host/sdhci-of-arasan.c
drivers/mmc/host/sdhci-of-esdhc.c
drivers/mmc/host/sdhci-pci-core.c
drivers/mmc/host/sdhci-pci.h
drivers/mmc/host/sdhci-pxav3.c
drivers/mmc/host/sdhci.c
drivers/mmc/host/sdhci.h
drivers/mtd/nand/gpmi-nand/gpmi-lib.c
drivers/mtd/nand/mtk_ecc.c
drivers/mtd/nand/nand_base.c
drivers/mtd/ubi/eba.c
drivers/mtd/ubi/fastmap.c
drivers/net/bonding/bond_main.c
drivers/net/can/sja1000/plx_pci.c
drivers/net/cris/eth_v10.c
drivers/net/dsa/b53/b53_common.c
drivers/net/dsa/b53/b53_mmap.c
drivers/net/dsa/bcm_sf2.c
drivers/net/dsa/mv88e6xxx/Kconfig
drivers/net/dsa/mv88e6xxx/Makefile
drivers/net/dsa/mv88e6xxx/chip.c
drivers/net/dsa/mv88e6xxx/global1.c
drivers/net/dsa/mv88e6xxx/global1.h
drivers/net/dsa/mv88e6xxx/global2.c
drivers/net/dsa/mv88e6xxx/mv88e6xxx.h
drivers/net/dsa/mv88e6xxx/port.c [new file with mode: 0644]
drivers/net/dsa/mv88e6xxx/port.h [new file with mode: 0644]
drivers/net/ethernet/3com/3c509.c
drivers/net/ethernet/3com/3c59x.c
drivers/net/ethernet/3com/typhoon.c
drivers/net/ethernet/Kconfig
drivers/net/ethernet/Makefile
drivers/net/ethernet/adaptec/starfire.c
drivers/net/ethernet/aeroflex/greth.c
drivers/net/ethernet/allwinner/sun4i-emac.c
drivers/net/ethernet/alteon/acenic.c
drivers/net/ethernet/altera/altera_tse.h
drivers/net/ethernet/altera/altera_tse_main.c
drivers/net/ethernet/amd/Kconfig
drivers/net/ethernet/amd/amd8111e.c
drivers/net/ethernet/amd/atarilance.c
drivers/net/ethernet/amd/pcnet32.c
drivers/net/ethernet/amd/xgbe/Makefile
drivers/net/ethernet/amd/xgbe/xgbe-common.h
drivers/net/ethernet/amd/xgbe/xgbe-debugfs.c
drivers/net/ethernet/amd/xgbe/xgbe-dev.c
drivers/net/ethernet/amd/xgbe/xgbe-drv.c
drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
drivers/net/ethernet/amd/xgbe/xgbe-i2c.c [new file with mode: 0644]
drivers/net/ethernet/amd/xgbe/xgbe-main.c
drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
drivers/net/ethernet/amd/xgbe/xgbe-pci.c [new file with mode: 0644]
drivers/net/ethernet/amd/xgbe/xgbe-phy-v1.c [new file with mode: 0644]
drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c [new file with mode: 0644]
drivers/net/ethernet/amd/xgbe/xgbe-platform.c [new file with mode: 0644]
drivers/net/ethernet/amd/xgbe/xgbe.h
drivers/net/ethernet/apm/xgene/xgene_enet_hw.c
drivers/net/ethernet/apm/xgene/xgene_enet_hw.h
drivers/net/ethernet/apm/xgene/xgene_enet_main.c
drivers/net/ethernet/apm/xgene/xgene_enet_ring2.c
drivers/net/ethernet/arc/Kconfig
drivers/net/ethernet/arc/emac_main.c
drivers/net/ethernet/atheros/alx/alx.h
drivers/net/ethernet/atheros/alx/ethtool.c
drivers/net/ethernet/atheros/alx/main.c
drivers/net/ethernet/atheros/atl1c/atl1c_ethtool.c
drivers/net/ethernet/atheros/atl1e/atl1e_ethtool.c
drivers/net/ethernet/atheros/atlx/atl1.c
drivers/net/ethernet/atheros/atlx/atl2.c
drivers/net/ethernet/aurora/nb8800.c
drivers/net/ethernet/broadcom/bcm63xx_enet.c
drivers/net/ethernet/broadcom/bgmac-bcma.c
drivers/net/ethernet/broadcom/bgmac-platform.c
drivers/net/ethernet/broadcom/bgmac.c
drivers/net/ethernet/broadcom/bgmac.h
drivers/net/ethernet/broadcom/bnx2.c
drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
drivers/net/ethernet/broadcom/bnxt/bnxt.c
drivers/net/ethernet/broadcom/bnxt/bnxt.h
drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h
drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
drivers/net/ethernet/broadcom/genet/bcmgenet.c
drivers/net/ethernet/broadcom/sb1250-mac.c
drivers/net/ethernet/brocade/bna/bnad.c
drivers/net/ethernet/brocade/bna/bnad_ethtool.c
drivers/net/ethernet/cadence/macb.c
drivers/net/ethernet/cadence/macb.h
drivers/net/ethernet/cavium/Kconfig
drivers/net/ethernet/cavium/liquidio/Makefile
drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c
drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.h
drivers/net/ethernet/cavium/liquidio/cn23xx_pf_regs.h
drivers/net/ethernet/cavium/liquidio/cn23xx_vf_device.c [new file with mode: 0644]
drivers/net/ethernet/cavium/liquidio/cn23xx_vf_device.h [new file with mode: 0644]
drivers/net/ethernet/cavium/liquidio/cn23xx_vf_regs.h [new file with mode: 0644]
drivers/net/ethernet/cavium/liquidio/cn66xx_device.c
drivers/net/ethernet/cavium/liquidio/cn66xx_device.h
drivers/net/ethernet/cavium/liquidio/cn66xx_regs.h
drivers/net/ethernet/cavium/liquidio/cn68xx_device.c
drivers/net/ethernet/cavium/liquidio/cn68xx_device.h
drivers/net/ethernet/cavium/liquidio/cn68xx_regs.h
drivers/net/ethernet/cavium/liquidio/lio_core.c
drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
drivers/net/ethernet/cavium/liquidio/lio_main.c
drivers/net/ethernet/cavium/liquidio/lio_vf_main.c [new file with mode: 0644]
drivers/net/ethernet/cavium/liquidio/liquidio_common.h
drivers/net/ethernet/cavium/liquidio/liquidio_image.h
drivers/net/ethernet/cavium/liquidio/octeon_config.h
drivers/net/ethernet/cavium/liquidio/octeon_console.c
drivers/net/ethernet/cavium/liquidio/octeon_device.c
drivers/net/ethernet/cavium/liquidio/octeon_device.h
drivers/net/ethernet/cavium/liquidio/octeon_droq.c
drivers/net/ethernet/cavium/liquidio/octeon_droq.h
drivers/net/ethernet/cavium/liquidio/octeon_iq.h
drivers/net/ethernet/cavium/liquidio/octeon_mailbox.c [new file with mode: 0644]
drivers/net/ethernet/cavium/liquidio/octeon_mailbox.h [new file with mode: 0644]
drivers/net/ethernet/cavium/liquidio/octeon_main.h
drivers/net/ethernet/cavium/liquidio/octeon_mem_ops.c
drivers/net/ethernet/cavium/liquidio/octeon_mem_ops.h
drivers/net/ethernet/cavium/liquidio/octeon_network.h
drivers/net/ethernet/cavium/liquidio/octeon_nic.c
drivers/net/ethernet/cavium/liquidio/octeon_nic.h
drivers/net/ethernet/cavium/liquidio/request_manager.c
drivers/net/ethernet/cavium/liquidio/response_manager.c
drivers/net/ethernet/cavium/liquidio/response_manager.h
drivers/net/ethernet/cavium/thunder/nic.h
drivers/net/ethernet/cavium/thunder/nic_main.c
drivers/net/ethernet/cavium/thunder/nic_reg.h
drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c
drivers/net/ethernet/cavium/thunder/nicvf_main.c
drivers/net/ethernet/cavium/thunder/nicvf_queues.c
drivers/net/ethernet/cavium/thunder/nicvf_queues.h
drivers/net/ethernet/cavium/thunder/q_struct.h
drivers/net/ethernet/cavium/thunder/thunder_bgx.c
drivers/net/ethernet/cavium/thunder/thunder_bgx.h
drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
drivers/net/ethernet/chelsio/cxgb4/sched.c
drivers/net/ethernet/chelsio/cxgb4/sge.c
drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h
drivers/net/ethernet/cisco/enic/enic.h
drivers/net/ethernet/cisco/enic/enic_main.c
drivers/net/ethernet/cisco/enic/vnic_rq.c
drivers/net/ethernet/emulex/benet/be_main.c
drivers/net/ethernet/ethoc.c
drivers/net/ethernet/ezchip/nps_enet.c
drivers/net/ethernet/freescale/Kconfig
drivers/net/ethernet/freescale/Makefile
drivers/net/ethernet/freescale/dpaa/Kconfig [new file with mode: 0644]
drivers/net/ethernet/freescale/dpaa/Makefile [new file with mode: 0644]
drivers/net/ethernet/freescale/dpaa/dpaa_eth.c [new file with mode: 0644]
drivers/net/ethernet/freescale/dpaa/dpaa_eth.h [new file with mode: 0644]
drivers/net/ethernet/freescale/dpaa/dpaa_eth_sysfs.c [new file with mode: 0644]
drivers/net/ethernet/freescale/dpaa/dpaa_eth_trace.h [new file with mode: 0644]
drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c [new file with mode: 0644]
drivers/net/ethernet/freescale/fec_main.c
drivers/net/ethernet/freescale/fman/fman_tgec.c
drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
drivers/net/ethernet/freescale/gianfar_ptp.c
drivers/net/ethernet/freescale/ucc_geth_ethtool.c
drivers/net/ethernet/hisilicon/hns/hnae.c
drivers/net/ethernet/hisilicon/hns/hnae.h
drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c
drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h
drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h
drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c
drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h
drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c
drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.h
drivers/net/ethernet/hisilicon/hns/hns_enet.c
drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
drivers/net/ethernet/hisilicon/hns_mdio.c
drivers/net/ethernet/ibm/ehea/ehea_main.c
drivers/net/ethernet/ibm/ibmveth.c
drivers/net/ethernet/ibm/ibmvnic.c
drivers/net/ethernet/ibm/ibmvnic.h
drivers/net/ethernet/intel/i40e/i40e.h
drivers/net/ethernet/intel/i40e/i40e_adminq.c
drivers/net/ethernet/intel/i40e/i40e_client.c
drivers/net/ethernet/intel/i40e/i40e_client.h
drivers/net/ethernet/intel/i40e/i40e_common.c
drivers/net/ethernet/intel/i40e/i40e_debugfs.c
drivers/net/ethernet/intel/i40e/i40e_ethtool.c
drivers/net/ethernet/intel/i40e/i40e_fcoe.c
drivers/net/ethernet/intel/i40e/i40e_main.c
drivers/net/ethernet/intel/i40e/i40e_nvm.c
drivers/net/ethernet/intel/i40e/i40e_ptp.c
drivers/net/ethernet/intel/i40e/i40e_txrx.c
drivers/net/ethernet/intel/i40e/i40e_txrx.h
drivers/net/ethernet/intel/i40e/i40e_type.h
drivers/net/ethernet/intel/i40e/i40e_virtchnl.h
drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
drivers/net/ethernet/intel/i40evf/i40e_adminq.c
drivers/net/ethernet/intel/i40evf/i40e_txrx.c
drivers/net/ethernet/intel/i40evf/i40e_txrx.h
drivers/net/ethernet/intel/i40evf/i40e_type.h
drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h
drivers/net/ethernet/intel/i40evf/i40evf.h
drivers/net/ethernet/intel/i40evf/i40evf_main.c
drivers/net/ethernet/intel/igb/igb_ptp.c
drivers/net/ethernet/intel/ixgbe/ixgbe.h
drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c
drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c
drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
drivers/net/ethernet/intel/ixgbe/ixgbe_common.h
drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c
drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h
drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c
drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
drivers/net/ethernet/lantiq_etop.c
drivers/net/ethernet/marvell/Kconfig
drivers/net/ethernet/marvell/mv643xx_eth.c
drivers/net/ethernet/marvell/mvneta.c
drivers/net/ethernet/marvell/mvpp2.c
drivers/net/ethernet/marvell/pxa168_eth.c
drivers/net/ethernet/marvell/sky2.c
drivers/net/ethernet/mellanox/mlx4/cmd.c
drivers/net/ethernet/mellanox/mlx4/en_clock.c
drivers/net/ethernet/mellanox/mlx4/en_cq.c
drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
drivers/net/ethernet/mellanox/mlx4/en_main.c
drivers/net/ethernet/mellanox/mlx4/en_netdev.c
drivers/net/ethernet/mellanox/mlx4/en_port.c
drivers/net/ethernet/mellanox/mlx4/en_rx.c
drivers/net/ethernet/mellanox/mlx4/en_selftest.c
drivers/net/ethernet/mellanox/mlx4/en_tx.c
drivers/net/ethernet/mellanox/mlx4/eq.c
drivers/net/ethernet/mellanox/mlx4/fw.c
drivers/net/ethernet/mellanox/mlx4/main.c
drivers/net/ethernet/mellanox/mlx4/mlx4.h
drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h
drivers/net/ethernet/mellanox/mlx4/port.c
drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
drivers/net/ethernet/mellanox/mlx5/core/Makefile
drivers/net/ethernet/mellanox/mlx5/core/alloc.c
drivers/net/ethernet/mellanox/mlx5/core/cmd.c
drivers/net/ethernet/mellanox/mlx5/core/en.h
drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
drivers/net/ethernet/mellanox/mlx5/core/en_common.c
drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
drivers/net/ethernet/mellanox/mlx5/core/en_main.c
drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
drivers/net/ethernet/mellanox/mlx5/core/eq.c
drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
drivers/net/ethernet/mellanox/mlx5/core/health.c
drivers/net/ethernet/mellanox/mlx5/core/main.c
drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
drivers/net/ethernet/mellanox/mlx5/core/port.c
drivers/net/ethernet/mellanox/mlx5/core/rl.c
drivers/net/ethernet/mellanox/mlx5/core/vport.c
drivers/net/ethernet/mellanox/mlx5/core/wq.c
drivers/net/ethernet/mellanox/mlx5/core/wq.h
drivers/net/ethernet/mellanox/mlxsw/Kconfig
drivers/net/ethernet/mellanox/mlxsw/Makefile
drivers/net/ethernet/mellanox/mlxsw/core.c
drivers/net/ethernet/mellanox/mlxsw/core.h
drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
drivers/net/ethernet/mellanox/mlxsw/core_thermal.c [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlxsw/i2c.c [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlxsw/i2c.h [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlxsw/ib.h [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlxsw/minimal.c [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlxsw/pci.c
drivers/net/ethernet/mellanox/mlxsw/pci.h
drivers/net/ethernet/mellanox/mlxsw/pci_hw.h [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlxsw/port.h
drivers/net/ethernet/mellanox/mlxsw/reg.h
drivers/net/ethernet/mellanox/mlxsw/resources.h
drivers/net/ethernet/mellanox/mlxsw/spectrum.c
drivers/net/ethernet/mellanox/mlxsw/spectrum.h
drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
drivers/net/ethernet/mellanox/mlxsw/switchib.c [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlxsw/switchx2.c
drivers/net/ethernet/mellanox/mlxsw/trap.h
drivers/net/ethernet/netronome/nfp/nfp_bpf.h
drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c
drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c
drivers/net/ethernet/netronome/nfp/nfp_net.h
drivers/net/ethernet/netronome/nfp/nfp_net_common.c
drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c
drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
drivers/net/ethernet/netronome/nfp/nfp_net_offload.c
drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c
drivers/net/ethernet/qlogic/Kconfig
drivers/net/ethernet/qlogic/qed/Makefile
drivers/net/ethernet/qlogic/qed/qed.h
drivers/net/ethernet/qlogic/qed/qed_cxt.c
drivers/net/ethernet/qlogic/qed/qed_dcbx.c
drivers/net/ethernet/qlogic/qed/qed_debug.c
drivers/net/ethernet/qlogic/qed/qed_dev.c
drivers/net/ethernet/qlogic/qed/qed_hsi.h
drivers/net/ethernet/qlogic/qed/qed_int.c
drivers/net/ethernet/qlogic/qed/qed_iscsi.c [new file with mode: 0644]
drivers/net/ethernet/qlogic/qed/qed_iscsi.h [new file with mode: 0644]
drivers/net/ethernet/qlogic/qed/qed_l2.c
drivers/net/ethernet/qlogic/qed/qed_l2.h
drivers/net/ethernet/qlogic/qed/qed_ll2.c
drivers/net/ethernet/qlogic/qed/qed_ll2.h
drivers/net/ethernet/qlogic/qed/qed_main.c
drivers/net/ethernet/qlogic/qed/qed_mcp.c
drivers/net/ethernet/qlogic/qed/qed_mcp.h
drivers/net/ethernet/qlogic/qed/qed_ooo.c [new file with mode: 0644]
drivers/net/ethernet/qlogic/qed/qed_ooo.h [new file with mode: 0644]
drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
drivers/net/ethernet/qlogic/qed/qed_roce.c
drivers/net/ethernet/qlogic/qed/qed_roce.h
drivers/net/ethernet/qlogic/qed/qed_selftest.c
drivers/net/ethernet/qlogic/qed/qed_selftest.h
drivers/net/ethernet/qlogic/qed/qed_sp.h
drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
drivers/net/ethernet/qlogic/qed/qed_spq.c
drivers/net/ethernet/qlogic/qed/qed_sriov.c
drivers/net/ethernet/qlogic/qed/qed_sriov.h
drivers/net/ethernet/qlogic/qed/qed_vf.c
drivers/net/ethernet/qlogic/qed/qed_vf.h
drivers/net/ethernet/qlogic/qede/Makefile
drivers/net/ethernet/qlogic/qede/qede.h
drivers/net/ethernet/qlogic/qede/qede_ethtool.c
drivers/net/ethernet/qlogic/qede/qede_main.c
drivers/net/ethernet/qualcomm/emac/emac-mac.c
drivers/net/ethernet/qualcomm/emac/emac-sgmii.c
drivers/net/ethernet/qualcomm/emac/emac.c
drivers/net/ethernet/realtek/r8169.c
drivers/net/ethernet/rocker/rocker_main.c
drivers/net/ethernet/rocker/rocker_ofdpa.c
drivers/net/ethernet/sfc/Kconfig
drivers/net/ethernet/sfc/Makefile
drivers/net/ethernet/sfc/ef10.c
drivers/net/ethernet/sfc/ef10_regs.h
drivers/net/ethernet/sfc/efx.c
drivers/net/ethernet/sfc/enum.h
drivers/net/ethernet/sfc/ethtool.c
drivers/net/ethernet/sfc/falcon.c [deleted file]
drivers/net/ethernet/sfc/falcon/Kconfig [new file with mode: 0644]
drivers/net/ethernet/sfc/falcon/Makefile [new file with mode: 0644]
drivers/net/ethernet/sfc/falcon/bitfield.h [new file with mode: 0644]
drivers/net/ethernet/sfc/falcon/efx.c [new file with mode: 0644]
drivers/net/ethernet/sfc/falcon/efx.h [new file with mode: 0644]
drivers/net/ethernet/sfc/falcon/enum.h [new file with mode: 0644]
drivers/net/ethernet/sfc/falcon/ethtool.c [new file with mode: 0644]
drivers/net/ethernet/sfc/falcon/falcon.c [new file with mode: 0644]
drivers/net/ethernet/sfc/falcon/falcon_boards.c [new file with mode: 0644]
drivers/net/ethernet/sfc/falcon/farch.c [new file with mode: 0644]
drivers/net/ethernet/sfc/falcon/farch_regs.h [new file with mode: 0644]
drivers/net/ethernet/sfc/falcon/filter.h [new file with mode: 0644]
drivers/net/ethernet/sfc/falcon/io.h [new file with mode: 0644]
drivers/net/ethernet/sfc/falcon/mdio_10g.c [new file with mode: 0644]
drivers/net/ethernet/sfc/falcon/mdio_10g.h [new file with mode: 0644]
drivers/net/ethernet/sfc/falcon/mtd.c [new file with mode: 0644]
drivers/net/ethernet/sfc/falcon/net_driver.h [new file with mode: 0644]
drivers/net/ethernet/sfc/falcon/nic.c [new file with mode: 0644]
drivers/net/ethernet/sfc/falcon/nic.h [new file with mode: 0644]
drivers/net/ethernet/sfc/falcon/phy.h [new file with mode: 0644]
drivers/net/ethernet/sfc/falcon/qt202x_phy.c [new file with mode: 0644]
drivers/net/ethernet/sfc/falcon/rx.c [new file with mode: 0644]
drivers/net/ethernet/sfc/falcon/selftest.c [new file with mode: 0644]
drivers/net/ethernet/sfc/falcon/selftest.h [new file with mode: 0644]
drivers/net/ethernet/sfc/falcon/tenxpress.c [new file with mode: 0644]
drivers/net/ethernet/sfc/falcon/tx.c [new file with mode: 0644]
drivers/net/ethernet/sfc/falcon/tx.h [new file with mode: 0644]
drivers/net/ethernet/sfc/falcon/txc43128_phy.c [new file with mode: 0644]
drivers/net/ethernet/sfc/falcon/workarounds.h [new file with mode: 0644]
drivers/net/ethernet/sfc/falcon_boards.c [deleted file]
drivers/net/ethernet/sfc/farch.c
drivers/net/ethernet/sfc/mcdi.c
drivers/net/ethernet/sfc/mcdi_pcol.h
drivers/net/ethernet/sfc/mcdi_port.c
drivers/net/ethernet/sfc/mdio_10g.c [deleted file]
drivers/net/ethernet/sfc/mdio_10g.h [deleted file]
drivers/net/ethernet/sfc/net_driver.h
drivers/net/ethernet/sfc/nic.h
drivers/net/ethernet/sfc/phy.h [deleted file]
drivers/net/ethernet/sfc/qt202x_phy.c [deleted file]
drivers/net/ethernet/sfc/rx.c
drivers/net/ethernet/sfc/siena.c
drivers/net/ethernet/sfc/tenxpress.c [deleted file]
drivers/net/ethernet/sfc/tx.c
drivers/net/ethernet/sfc/tx.h [new file with mode: 0644]
drivers/net/ethernet/sfc/tx_tso.c [new file with mode: 0644]
drivers/net/ethernet/sfc/txc43128_phy.c [deleted file]
drivers/net/ethernet/sfc/workarounds.h
drivers/net/ethernet/smsc/smsc911x.c
drivers/net/ethernet/smsc/smsc9420.c
drivers/net/ethernet/stmicro/stmmac/Kconfig
drivers/net/ethernet/stmicro/stmmac/Makefile
drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c
drivers/net/ethernet/stmicro/stmmac/chain_mode.c
drivers/net/ethernet/stmicro/stmmac/common.h
drivers/net/ethernet/stmicro/stmmac/descs.h
drivers/net/ethernet/stmicro/stmmac/descs_com.h
drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c [new file with mode: 0644]
drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c
drivers/net/ethernet/stmicro/stmmac/dwmac4.h
drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.h
drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
drivers/net/ethernet/stmicro/stmmac/enh_desc.c
drivers/net/ethernet/stmicro/stmmac/norm_desc.c
drivers/net/ethernet/stmicro/stmmac/ring_mode.c
drivers/net/ethernet/stmicro/stmmac/stmmac.h
drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h
drivers/net/ethernet/sun/sunbmac.c
drivers/net/ethernet/sun/sunbmac.h
drivers/net/ethernet/sun/sunqe.c
drivers/net/ethernet/sun/sunqe.h
drivers/net/ethernet/sun/sunvnet_common.c
drivers/net/ethernet/synopsys/dwc_eth_qos.c
drivers/net/ethernet/ti/cpsw-phy-sel.c
drivers/net/ethernet/ti/cpsw.c
drivers/net/ethernet/ti/davinci_cpdma.c
drivers/net/ethernet/ti/davinci_cpdma.h
drivers/net/ethernet/ti/davinci_emac.c
drivers/net/ethernet/ti/netcp_core.c
drivers/net/ethernet/toshiba/ps3_gelic_wireless.c
drivers/net/ethernet/xilinx/ll_temac_main.c
drivers/net/ethernet/xscale/ixp4xx_eth.c
drivers/net/geneve.c
drivers/net/gtp.c
drivers/net/hyperv/netvsc.c
drivers/net/hyperv/netvsc_drv.c
drivers/net/hyperv/rndis_filter.c
drivers/net/ieee802154/adf7242.c
drivers/net/ieee802154/atusb.c
drivers/net/ipvlan/ipvlan.h
drivers/net/ipvlan/ipvlan_main.c
drivers/net/macsec.c
drivers/net/macvlan.c
drivers/net/macvtap.c
drivers/net/mii.c
drivers/net/phy/Kconfig
drivers/net/phy/Makefile
drivers/net/phy/at803x.c
drivers/net/phy/bcm-cygnus.c
drivers/net/phy/bcm-phy-lib.c
drivers/net/phy/bcm-phy-lib.h
drivers/net/phy/bcm7xxx.c
drivers/net/phy/broadcom.c
drivers/net/phy/dp83640.c
drivers/net/phy/dp83848.c
drivers/net/phy/fixed_phy.c
drivers/net/phy/marvell.c
drivers/net/phy/mdio-mux-mmioreg.c
drivers/net/phy/mdio_bus.c
drivers/net/phy/mdio_device.c
drivers/net/phy/meson-gxl.c [new file with mode: 0644]
drivers/net/phy/micrel.c
drivers/net/phy/microchip.c
drivers/net/phy/mscc.c
drivers/net/phy/phy.c
drivers/net/phy/phy_device.c
drivers/net/phy/phy_led_triggers.c
drivers/net/phy/vitesse.c
drivers/net/ppp/ppp_generic.c
drivers/net/ppp/pppoe.c
drivers/net/team/team.c
drivers/net/tun.c
drivers/net/usb/asix_common.c
drivers/net/usb/ax88172a.c
drivers/net/usb/ax88179_178a.c
drivers/net/usb/kalmia.c
drivers/net/usb/lan78xx.c
drivers/net/usb/r8152.c
drivers/net/virtio_net.c
drivers/net/vmxnet3/vmxnet3_drv.c
drivers/net/vrf.c
drivers/net/vxlan.c
drivers/net/wan/Kconfig
drivers/net/wan/slic_ds26522.c
drivers/net/wireless/Kconfig
drivers/net/wireless/ath/ath10k/core.h
drivers/net/wireless/ath/ath10k/debug.c
drivers/net/wireless/ath/ath10k/mac.c
drivers/net/wireless/ath/ath6kl/sdio.c
drivers/net/wireless/ath/ath9k/ar9003_calib.c
drivers/net/wireless/ath/ath9k/hw.h
drivers/net/wireless/ath/ath9k/init.c
drivers/net/wireless/broadcom/b43/main.c
drivers/net/wireless/broadcom/b43legacy/main.c
drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
drivers/net/wireless/intel/iwlwifi/mvm/d3.c
drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c
drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
drivers/net/wireless/intel/iwlwifi/mvm/ops.c
drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
drivers/net/wireless/intel/iwlwifi/mvm/scan.c
drivers/net/wireless/intel/iwlwifi/pcie/drv.c
drivers/net/wireless/intel/iwlwifi/pcie/tx.c
drivers/net/wireless/mac80211_hwsim.c
drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c
drivers/net/wireless/ralink/rt2x00/rt2x00dev.c
drivers/net/wireless/realtek/rtlwifi/core.c
drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c
drivers/net/wireless/realtek/rtlwifi/rtl8192ce/sw.c
drivers/net/wireless/realtek/rtlwifi/rtl8192cu/sw.c
drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c
drivers/net/wireless/realtek/rtlwifi/rtl8192ee/sw.c
drivers/net/wireless/realtek/rtlwifi/rtl8192se/sw.c
drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c
drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c
drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c
drivers/net/wireless/realtek/rtlwifi/wifi.h
drivers/net/wireless/ti/wlcore/main.c
drivers/net/wireless/ti/wlcore/sdio.c
drivers/net/xen-netback/xenbus.c
drivers/net/xen-netfront.c
drivers/nfc/mei_phy.c
drivers/ntb/hw/intel/ntb_hw_intel.c
drivers/ntb/ntb_transport.c
drivers/ntb/test/ntb_perf.c
drivers/ntb/test/ntb_pingpong.c
drivers/nvdimm/Kconfig
drivers/nvdimm/namespace_devs.c
drivers/nvdimm/pmem.c
drivers/nvme/host/core.c
drivers/nvme/host/lightnvm.c
drivers/nvme/host/pci.c
drivers/nvme/host/rdma.c
drivers/nvme/host/scsi.c
drivers/nvme/target/admin-cmd.c
drivers/nvme/target/core.c
drivers/nvme/target/discovery.c
drivers/nvme/target/rdma.c
drivers/of/base.c
drivers/of/of_mdio.c
drivers/of/platform.c
drivers/pci/host/pci-layerscape.c
drivers/pci/host/pcie-designware-plat.c
drivers/pci/host/pcie-designware.c
drivers/pci/host/pcie-qcom.c
drivers/pci/host/pcie-rockchip.c
drivers/pci/msi.c
drivers/pci/pci-mid.c
drivers/pci/setup-res.c
drivers/pcmcia/soc_common.c
drivers/perf/xgene_pmu.c
drivers/phy/phy-da8xx-usb.c
drivers/phy/phy-rockchip-pcie.c
drivers/phy/phy-sun4i-usb.c
drivers/phy/phy-twl4030-usb.c
drivers/pinctrl/aspeed/pinctrl-aspeed-g5.c
drivers/pinctrl/aspeed/pinctrl-aspeed.c
drivers/pinctrl/bcm/pinctrl-iproc-gpio.c
drivers/pinctrl/bcm/pinctrl-nsp-gpio.c
drivers/pinctrl/freescale/pinctrl-imx.c
drivers/pinctrl/intel/pinctrl-baytrail.c
drivers/pinctrl/intel/pinctrl-cherryview.c
drivers/pinctrl/intel/pinctrl-intel.c
drivers/pinctrl/pinctrl-st.c
drivers/pinctrl/stm32/pinctrl-stm32.c
drivers/platform/goldfish/goldfish_pipe.c
drivers/platform/x86/Kconfig
drivers/platform/x86/ideapad-laptop.c
drivers/platform/x86/intel-hid.c
drivers/platform/x86/intel-vbtn.c
drivers/platform/x86/toshiba-wmi.c
drivers/ptp/ptp_clock.c
drivers/ptp/ptp_sysfs.c
drivers/rapidio/devices/rio_mport_cdev.c
drivers/regulator/core.c
drivers/reset/reset-uniphier.c
drivers/rtc/Kconfig
drivers/rtc/Makefile
drivers/rtc/rtc-ac100.c
drivers/rtc/rtc-asm9260.c
drivers/rtc/rtc-at32ap700x.c
drivers/rtc/rtc-bq32k.c
drivers/rtc/rtc-cmos.c
drivers/rtc/rtc-coh901331.c
drivers/rtc/rtc-davinci.c
drivers/rtc/rtc-digicolor.c
drivers/rtc/rtc-ds1302.c
drivers/rtc/rtc-ds1307.c
drivers/rtc/rtc-ds1347.c
drivers/rtc/rtc-gemini.c
drivers/rtc/rtc-isl12057.c [deleted file]
drivers/rtc/rtc-jz4740.c
drivers/rtc/rtc-mcp795.c
drivers/rtc/rtc-mt6397.c
drivers/rtc/rtc-nuc900.c
drivers/rtc/rtc-omap.c
drivers/rtc/rtc-palmas.c
drivers/rtc/rtc-pcf2123.c
drivers/rtc/rtc-pcf50633.c
drivers/rtc/rtc-pic32.c
drivers/rtc/rtc-rv8803.c
drivers/rtc/rtc-rx6110.c
drivers/rtc/rtc-rx8025.c
drivers/rtc/rtc-spear.c
drivers/rtc/rtc-stmp3xxx.c
drivers/rtc/rtc-sysfs.c
drivers/rtc/rtc-tegra.c
drivers/rtc/rtc-twl.c
drivers/s390/block/dasd_eckd.c
drivers/s390/cio/chp.c
drivers/s390/scsi/zfcp_dbf.c
drivers/scsi/NCR5380.c
drivers/scsi/arcmsr/arcmsr_hba.c
drivers/scsi/be2iscsi/be_main.c
drivers/scsi/cxgbi/cxgb4i/cxgb4i.c
drivers/scsi/cxgbi/libcxgbi.c
drivers/scsi/device_handler/scsi_dh_alua.c
drivers/scsi/g_NCR5380.c
drivers/scsi/g_NCR5380.h
drivers/scsi/ipr.c
drivers/scsi/libiscsi.c
drivers/scsi/megaraid/megaraid_sas.h
drivers/scsi/megaraid/megaraid_sas_base.c
drivers/scsi/mpt3sas/mpt3sas_scsih.c
drivers/scsi/pmcraid.c
drivers/scsi/qla2xxx/qla_os.c
drivers/scsi/scsi_debug.c
drivers/scsi/scsi_dh.c
drivers/scsi/scsi_scan.c
drivers/scsi/st.c
drivers/scsi/ufs/Kconfig
drivers/scsi/ufs/ufs_quirks.h
drivers/scsi/ufs/ufshcd.c
drivers/scsi/vmw_pvscsi.c
drivers/scsi/vmw_pvscsi.h
drivers/soc/Kconfig
drivers/soc/fsl/Makefile
drivers/soc/fsl/qbman/Kconfig [new file with mode: 0644]
drivers/soc/fsl/qbman/Makefile [new file with mode: 0644]
drivers/soc/fsl/qbman/bman.c [new file with mode: 0644]
drivers/soc/fsl/qbman/bman_ccsr.c [new file with mode: 0644]
drivers/soc/fsl/qbman/bman_portal.c [new file with mode: 0644]
drivers/soc/fsl/qbman/bman_priv.h [new file with mode: 0644]
drivers/soc/fsl/qbman/bman_test.c [new file with mode: 0644]
drivers/soc/fsl/qbman/bman_test.h [new file with mode: 0644]
drivers/soc/fsl/qbman/bman_test_api.c [new file with mode: 0644]
drivers/soc/fsl/qbman/dpaa_sys.h [new file with mode: 0644]
drivers/soc/fsl/qbman/qman.c [new file with mode: 0644]
drivers/soc/fsl/qbman/qman_ccsr.c [new file with mode: 0644]
drivers/soc/fsl/qbman/qman_portal.c [new file with mode: 0644]
drivers/soc/fsl/qbman/qman_priv.h [new file with mode: 0644]
drivers/soc/fsl/qbman/qman_test.c [new file with mode: 0644]
drivers/soc/fsl/qbman/qman_test.h [new file with mode: 0644]
drivers/soc/fsl/qbman/qman_test_api.c [new file with mode: 0644]
drivers/soc/fsl/qbman/qman_test_stash.c [new file with mode: 0644]
drivers/soc/fsl/qe/gpio.c
drivers/soc/fsl/qe/qe.c
drivers/soc/fsl/qe/qe_common.c
drivers/soc/fsl/qe/qe_tdm.c
drivers/spi/spi-fsl-dspi.c
drivers/spi/spi-fsl-espi.c
drivers/spi/spi.c
drivers/staging/android/ion/ion.c
drivers/staging/android/ion/ion_of.c
drivers/staging/comedi/drivers/ni_tio.c
drivers/staging/greybus/arche-platform.c
drivers/staging/greybus/es2.c
drivers/staging/greybus/gpio.c
drivers/staging/greybus/module.c
drivers/staging/greybus/uart.c
drivers/staging/iio/accel/sca3000_core.c
drivers/staging/iio/impedance-analyzer/ad5933.c
drivers/staging/lustre/lustre/llite/lproc_llite.c
drivers/staging/media/bcm2048/radio-bcm2048.c
drivers/staging/nvec/nvec_ps2.c
drivers/staging/sm750fb/ddk750_reg.h
drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c
drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c
drivers/staging/wilc1000/host_interface.c
drivers/target/iscsi/cxgbit/cxgbit_main.c
drivers/target/iscsi/iscsi_target.c
drivers/target/iscsi/iscsi_target_login.c
drivers/target/target_core_transport.c
drivers/target/target_core_user.c
drivers/target/target_core_xcopy.c
drivers/target/tcm_fc/tfc_cmd.c
drivers/target/tcm_fc/tfc_sess.c
drivers/thermal/intel_pch_thermal.c
drivers/thermal/intel_powerclamp.c
drivers/thermal/thermal_core.c
drivers/tty/serial/8250/8250_lpss.c
drivers/tty/serial/8250/8250_port.c
drivers/tty/serial/8250/8250_uniphier.c
drivers/tty/serial/Kconfig
drivers/tty/serial/atmel_serial.c
drivers/tty/serial/fsl_lpuart.c
drivers/tty/serial/pch_uart.c
drivers/tty/serial/sc16is7xx.c
drivers/tty/serial/serial_core.c
drivers/tty/serial/stm32-usart.h
drivers/tty/serial/xilinx_uartps.c
drivers/tty/vt/vt.c
drivers/usb/chipidea/core.c
drivers/usb/chipidea/host.c
drivers/usb/chipidea/udc.c
drivers/usb/class/cdc-acm.c
drivers/usb/dwc2/core.c
drivers/usb/dwc2/core.h
drivers/usb/dwc2/gadget.c
drivers/usb/dwc3/core.c
drivers/usb/dwc3/dwc3-st.c
drivers/usb/dwc3/gadget.c
drivers/usb/gadget/function/f_fs.c
drivers/usb/gadget/function/u_ether.c
drivers/usb/gadget/udc/atmel_usba_udc.c
drivers/usb/host/ehci-hcd.c
drivers/usb/host/ehci-platform.c
drivers/usb/host/ehci-sead3.c [deleted file]
drivers/usb/host/ohci-at91.c
drivers/usb/host/ohci-hcd.c
drivers/usb/host/pci-quirks.c
drivers/usb/host/xhci-hub.c
drivers/usb/host/xhci-pci.c
drivers/usb/host/xhci.h
drivers/usb/musb/da8xx.c
drivers/usb/musb/musb_core.c
drivers/usb/musb/musb_core.h
drivers/usb/musb/musb_dsps.c
drivers/usb/musb/musb_gadget.c
drivers/usb/musb/omap2430.c
drivers/usb/musb/tusb6010.c
drivers/usb/renesas_usbhs/rcar3.c
drivers/usb/serial/cp210x.c
drivers/usb/serial/ftdi_sio.c
drivers/usb/serial/ftdi_sio_ids.h
drivers/usb/serial/usb-serial.c
drivers/usb/storage/transport.c
drivers/usb/wusbcore/crypto.c
drivers/uwb/lc-rc.c
drivers/uwb/pal.c
drivers/vfio/pci/vfio_pci.c
drivers/vfio/pci/vfio_pci_intrs.c
drivers/video/fbdev/Kconfig
drivers/video/fbdev/amba-clcd-versatile.c
drivers/video/fbdev/cobalt_lcdfb.c
drivers/video/fbdev/pvr2fb.c
drivers/virt/fsl_hypervisor.c
drivers/virtio/config.c [deleted file]
drivers/virtio/virtio_balloon.c
drivers/virtio/virtio_pci_legacy.c
drivers/virtio/virtio_ring.c
drivers/vme/vme.c
drivers/watchdog/Kconfig
drivers/watchdog/wdat_wdt.c
drivers/xen/manage.c
drivers/xen/xenbus/xenbus_dev_frontend.c
drivers/xen/xenbus/xenbus_probe_frontend.c
fs/afs/cmservice.c
fs/afs/fsclient.c
fs/afs/internal.h
fs/afs/rxrpc.c
fs/aio.c
fs/befs/befs.h
fs/befs/btree.c
fs/befs/datastream.c
fs/befs/debug.c
fs/befs/io.c
fs/befs/io.h
fs/befs/linuxvfs.c
fs/befs/super.c
fs/btrfs/compression.c
fs/btrfs/ctree.h
fs/btrfs/disk-io.c
fs/btrfs/extent-tree.c
fs/btrfs/extent_io.c
fs/btrfs/extent_io.h
fs/btrfs/free-space-tree.c
fs/btrfs/inode.c
fs/btrfs/ioctl.c
fs/btrfs/relocation.c
fs/btrfs/send.c
fs/btrfs/tests/extent-io-tests.c
fs/btrfs/tests/free-space-tree-tests.c
fs/btrfs/tree-log.c
fs/ceph/file.c
fs/ceph/inode.c
fs/ceph/super.c
fs/ceph/xattr.c
fs/cifs/cifs_debug.c
fs/cifs/cifs_fs_sb.h
fs/cifs/cifs_ioctl.h
fs/cifs/cifsacl.c
fs/cifs/cifsfs.c
fs/cifs/cifsglob.h
fs/cifs/cifsproto.h
fs/cifs/cifssmb.c
fs/cifs/connect.c
fs/cifs/file.c
fs/cifs/ioctl.c
fs/cifs/misc.c
fs/cifs/readdir.c
fs/cifs/smb2inode.c
fs/cifs/smb2misc.c
fs/cifs/smb2ops.c
fs/cifs/smb2pdu.c
fs/cifs/smb2pdu.h
fs/cifs/xattr.c
fs/coredump.c
fs/crypto/crypto.c
fs/crypto/fname.c
fs/crypto/keyinfo.c
fs/crypto/policy.c
fs/dlm/netlink.c
fs/exec.c
fs/exofs/dir.c
fs/ext2/inode.c
fs/ext4/block_validity.c
fs/ext4/ext4.h
fs/ext4/mballoc.h
fs/ext4/namei.c
fs/ext4/super.c
fs/ext4/sysfs.c
fs/ext4/xattr.c
fs/f2fs/gc.c
fs/fuse/dir.c
fs/fuse/file.c
fs/fuse/fuse_i.h
fs/fuse/inode.c
fs/iomap.c
fs/isofs/inode.c
fs/jbd2/transaction.c
fs/kernfs/dir.c
fs/kernfs/file.c
fs/lockd/netns.h
fs/lockd/svc.c
fs/locks.c
fs/namei.c
fs/namespace.c
fs/nfs/blocklayout/blocklayout.c
fs/nfs/callback.c
fs/nfs/client.c
fs/nfs/inode.c
fs/nfs/namespace.c
fs/nfs/netns.h
fs/nfs/nfs4_fs.h
fs/nfs/nfs4proc.c
fs/nfs/nfs4session.c
fs/nfs/nfs4state.c
fs/nfs/pnfs.c
fs/nfs_common/grace.c
fs/nfsd/netns.h
fs/nfsd/nfs4state.c
fs/nfsd/nfsctl.c
fs/nsfs.c
fs/ntfs/dir.c
fs/ocfs2/dir.c
fs/orangefs/dcache.c
fs/orangefs/file.c
fs/orangefs/namei.c
fs/orangefs/orangefs-debugfs.c
fs/orangefs/orangefs-kernel.h
fs/orangefs/orangefs-mod.c
fs/overlayfs/copy_up.c
fs/overlayfs/dir.c
fs/overlayfs/inode.c
fs/overlayfs/super.c
fs/proc/array.c
fs/proc/base.c
fs/proc/task_mmu.c
fs/proc/task_nommu.c
fs/quota/netlink.c
fs/read_write.c
fs/splice.c
fs/super.c
fs/sysfs/dir.c
fs/ubifs/dir.c
fs/ubifs/xattr.c
fs/xattr.c
fs/xfs/libxfs/xfs_bmap.c
fs/xfs/libxfs/xfs_bmap.h
fs/xfs/libxfs/xfs_btree.c
fs/xfs/libxfs/xfs_defer.c
fs/xfs/libxfs/xfs_dquot_buf.c
fs/xfs/libxfs/xfs_format.h
fs/xfs/libxfs/xfs_inode_buf.c
fs/xfs/libxfs/xfs_inode_buf.h
fs/xfs/xfs_file.c
fs/xfs/xfs_icache.c
fs/xfs/xfs_iomap.c
fs/xfs/xfs_mount.c
fs/xfs/xfs_reflink.c
fs/xfs/xfs_reflink.h
fs/xfs/xfs_sysfs.c
fs/xfs/xfs_trace.h
include/acpi/actbl.h
include/acpi/pcc.h
include/acpi/platform/aclinux.h
include/asm-generic/export.h [new file with mode: 0644]
include/asm-generic/libata-portmap.h [deleted file]
include/asm-generic/percpu.h
include/asm-generic/sections.h
include/asm-generic/vmlinux.lds.h
include/drm/drm_plane.h
include/dt-bindings/net/mdio.h [new file with mode: 0644]
include/linux/acpi.h
include/linux/ata.h
include/linux/blk-cgroup.h
include/linux/bpf-cgroup.h [new file with mode: 0644]
include/linux/bpf.h
include/linux/bpf_verifier.h
include/linux/brcmphy.h
include/linux/ceph/osd_client.h
include/linux/cgroup-defs.h
include/linux/cgroup.h
include/linux/clk-provider.h
include/linux/compiler-gcc.h
include/linux/compiler.h
include/linux/console.h
include/linux/cpufreq.h
include/linux/cpuhotplug.h
include/linux/debugfs.h
include/linux/device.h
include/linux/drbd_genl.h
include/linux/export.h
include/linux/fdtable.h
include/linux/filter.h
include/linux/frontswap.h
include/linux/fs.h
include/linux/genhd.h
include/linux/genl_magic_func.h
include/linux/huge_mm.h
include/linux/hyperv.h
include/linux/ieee80211.h
include/linux/if_arp.h
include/linux/if_vlan.h
include/linux/init.h
include/linux/io.h
include/linux/iomap.h
include/linux/ipv6.h
include/linux/irqchip/arm-gic-v3.h
include/linux/kasan.h
include/linux/kconfig.h
include/linux/kernfs.h
include/linux/libata.h
include/linux/mbus.h
include/linux/mii.h
include/linux/mlx4/device.h
include/linux/mlx5/device.h
include/linux/mlx5/driver.h
include/linux/mlx5/fs.h
include/linux/mlx5/mlx5_ifc.h
include/linux/mlx5/port.h
include/linux/mlx5/srq.h
include/linux/mlx5/vport.h
include/linux/mm.h
include/linux/mmzone.h
include/linux/mtd/nand.h
include/linux/netdevice.h
include/linux/netfilter.h
include/linux/netfilter/ipset/ip_set.h
include/linux/netfilter/ipset/ip_set_bitmap.h
include/linux/netfilter/ipset/ip_set_comment.h
include/linux/netfilter/ipset/ip_set_counter.h [new file with mode: 0644]
include/linux/netfilter/ipset/ip_set_skbinfo.h [new file with mode: 0644]
include/linux/netfilter/ipset/ip_set_timeout.h
include/linux/netfilter/x_tables.h
include/linux/netfilter_ingress.h
include/linux/netpoll.h
include/linux/nvme.h
include/linux/perf_event.h
include/linux/phy.h
include/linux/phy/phy.h
include/linux/pim.h
include/linux/pkeys.h
include/linux/proc_fs.h
include/linux/ptp_clock_kernel.h
include/linux/qed/qed_chain.h
include/linux/qed/qed_eth_if.h
include/linux/qed/qed_if.h
include/linux/qed/qed_iscsi_if.h [new file with mode: 0644]
include/linux/qed/qede_roce.h
include/linux/random.h
include/linux/regmap.h
include/linux/sched.h
include/linux/seg6.h [new file with mode: 0644]
include/linux/seg6_genl.h [new file with mode: 0644]
include/linux/seg6_hmac.h [new file with mode: 0644]
include/linux/seg6_iptunnel.h [new file with mode: 0644]
include/linux/skbuff.h
include/linux/stmmac.h
include/linux/sunrpc/svc_xprt.h
include/linux/syscalls.h
include/linux/tcp.h
include/linux/thread_info.h
include/linux/virtio_net.h
include/net/act_api.h
include/net/addrconf.h
include/net/bluetooth/hci_core.h
include/net/bonding.h
include/net/busy_poll.h
include/net/cfg80211.h
include/net/devlink.h
include/net/dst_metadata.h
include/net/fib_rules.h
include/net/flow.h
include/net/flow_dissector.h
include/net/genetlink.h
include/net/gro_cells.h
include/net/ieee80211_radiotap.h
include/net/if_inet6.h
include/net/inet_connection_sock.h
include/net/inet_sock.h
include/net/ip.h
include/net/ip6_fib.h
include/net/ip6_route.h
include/net/ip6_tunnel.h
include/net/ip_fib.h
include/net/ip_tunnels.h
include/net/ipv6.h
include/net/lwtunnel.h
include/net/mac80211.h
include/net/net_namespace.h
include/net/netfilter/nf_conntrack_l4proto.h
include/net/netfilter/nf_conntrack_labels.h
include/net/netfilter/nf_conntrack_synproxy.h
include/net/netfilter/nf_log.h
include/net/netfilter/nf_queue.h
include/net/netfilter/nf_socket.h [new file with mode: 0644]
include/net/netfilter/nf_tables.h
include/net/netfilter/nf_tables_core.h
include/net/netfilter/nft_fib.h [new file with mode: 0644]
include/net/netlink.h
include/net/netns/conntrack.h
include/net/netns/generic.h
include/net/netns/ipv6.h
include/net/pkt_cls.h
include/net/pkt_sched.h
include/net/route.h
include/net/sctp/sctp.h
include/net/sctp/structs.h
include/net/secure_seq.h
include/net/seg6.h [new file with mode: 0644]
include/net/seg6_hmac.h [new file with mode: 0644]
include/net/sock.h
include/net/tc_act/tc_skbedit.h
include/net/tc_act/tc_tunnel_key.h
include/net/tcp.h
include/net/udp.h
include/net/udplite.h
include/net/vxlan.h
include/soc/fsl/bman.h [new file with mode: 0644]
include/soc/fsl/qman.h [new file with mode: 0644]
include/target/target_core_base.h
include/trace/events/cgroup.h [new file with mode: 0644]
include/trace/events/mdio.h [new file with mode: 0644]
include/uapi/asm-generic/socket.h
include/uapi/asm-generic/unistd.h
include/uapi/linux/Kbuild
include/uapi/linux/atm_zatm.h
include/uapi/linux/bpf.h
include/uapi/linux/bpqether.h
include/uapi/linux/bt-bmc.h [new file with mode: 0644]
include/uapi/linux/btrfs.h
include/uapi/linux/devlink.h
include/uapi/linux/ethtool.h
include/uapi/linux/fib_rules.h
include/uapi/linux/genetlink.h
include/uapi/linux/if_link.h
include/uapi/linux/in.h
include/uapi/linux/in6.h
include/uapi/linux/ipv6.h
include/uapi/linux/kvm.h
include/uapi/linux/l2tp.h
include/uapi/linux/lwtunnel.h
include/uapi/linux/net_tstamp.h
include/uapi/linux/netfilter.h
include/uapi/linux/netfilter/nf_tables.h
include/uapi/linux/nl80211.h
include/uapi/linux/openvswitch.h
include/uapi/linux/pci_regs.h
include/uapi/linux/pkt_cls.h
include/uapi/linux/rtnetlink.h
include/uapi/linux/seg6.h [new file with mode: 0644]
include/uapi/linux/seg6_genl.h [new file with mode: 0644]
include/uapi/linux/seg6_hmac.h [new file with mode: 0644]
include/uapi/linux/seg6_iptunnel.h [new file with mode: 0644]
include/uapi/linux/sockios.h
include/uapi/linux/tc_act/tc_skbedit.h
include/uapi/linux/tc_act/tc_tunnel_key.h
include/uapi/linux/tcp.h
include/uapi/rdma/qedr-abi.h [new file with mode: 0644]
include/uapi/sound/asoc.h
init/Kconfig
init/Makefile
init/do_mounts_rd.c
init/main.c
ipc/msgutil.c
kernel/audit.c
kernel/bpf/Makefile
kernel/bpf/bpf_lru_list.c [new file with mode: 0644]
kernel/bpf/bpf_lru_list.h [new file with mode: 0644]
kernel/bpf/cgroup.c [new file with mode: 0644]
kernel/bpf/hashtab.c
kernel/bpf/inode.c
kernel/bpf/syscall.c
kernel/bpf/verifier.c
kernel/cgroup.c
kernel/cpu.c
kernel/cpuset.c
kernel/events/core.c
kernel/events/uprobes.c
kernel/exit.c
kernel/fork.c
kernel/irq/manage.c
kernel/kcov.c
kernel/locking/lockdep_internals.h
kernel/power/suspend.c
kernel/power/suspend_test.c
kernel/printk/printk.c
kernel/ptrace.c
kernel/rcu/tiny.c
kernel/rcu/tree.c
kernel/sched/auto_group.c
kernel/sched/core.c
kernel/sched/debug.c
kernel/sched/fair.c
kernel/sched/wait.c
kernel/seccomp.c
kernel/softirq.c
kernel/taskstats.c
kernel/time/alarmtimer.c
kernel/time/timer.c
kernel/trace/ftrace.c
lib/Kconfig.debug
lib/genalloc.c
lib/iov_iter.c
lib/irq_poll.c
lib/mpi/mpi-pow.c
lib/nlattr.c
lib/percpu-refcount.c
lib/random32.c
lib/stackdepot.c
lib/test_bpf.c
mm/Kconfig
mm/cma.c
mm/filemap.c
mm/frame_vector.c
mm/gup.c
mm/huge_memory.c
mm/hugetlb.c
mm/kasan/kasan.c
mm/kmemleak.c
mm/list_lru.c
mm/memcontrol.c
mm/memory-failure.c
mm/memory.c
mm/memory_hotplug.c
mm/mempolicy.c
mm/mprotect.c
mm/mremap.c
mm/nommu.c
mm/page_alloc.c
mm/percpu.c
mm/process_vm_access.c
mm/shmem.c
mm/slab.c
mm/slab.h
mm/slab_common.c
mm/swapfile.c
mm/util.c
mm/vmscan.c
net/8021q/vlan.c
net/8021q/vlan.h
net/Kconfig
net/batman-adv/Kconfig
net/batman-adv/bat_iv_ogm.c
net/batman-adv/bat_v.c
net/batman-adv/bat_v_elp.c
net/batman-adv/bat_v_ogm.c
net/batman-adv/debugfs.c
net/batman-adv/distributed-arp-table.c
net/batman-adv/fragmentation.c
net/batman-adv/fragmentation.h
net/batman-adv/gateway_client.c
net/batman-adv/hard-interface.c
net/batman-adv/hard-interface.h
net/batman-adv/hash.h
net/batman-adv/icmp_socket.c
net/batman-adv/log.c
net/batman-adv/log.h
net/batman-adv/main.c
net/batman-adv/main.h
net/batman-adv/multicast.c
net/batman-adv/multicast.h
net/batman-adv/netlink.c
net/batman-adv/network-coding.c
net/batman-adv/originator.c
net/batman-adv/packet.h
net/batman-adv/routing.c
net/batman-adv/send.c
net/batman-adv/send.h
net/batman-adv/soft-interface.c
net/batman-adv/sysfs.c
net/batman-adv/tp_meter.c
net/batman-adv/translation-table.c
net/batman-adv/tvlv.c
net/batman-adv/types.h
net/bluetooth/6lowpan.c
net/bluetooth/hci_conn.c
net/bluetooth/hci_request.c
net/bluetooth/hci_request.h
net/bluetooth/l2cap_core.c
net/bluetooth/mgmt.c
net/bluetooth/rfcomm/tty.c
net/bluetooth/sco.c
net/bridge/br_multicast.c
net/bridge/br_netfilter_hooks.c
net/bridge/br_netlink.c
net/bridge/br_private.h
net/bridge/br_sysfs_br.c
net/bridge/netfilter/Kconfig
net/bridge/netfilter/ebt_arpreply.c
net/bridge/netfilter/ebt_log.c
net/bridge/netfilter/ebt_nflog.c
net/bridge/netfilter/ebt_redirect.c
net/bridge/netfilter/ebtable_broute.c
net/bridge/netfilter/ebtables.c
net/bridge/netfilter/nf_log_bridge.c
net/bridge/netfilter/nft_meta_bridge.c
net/bridge/netfilter/nft_reject_bridge.c
net/caif/caif_dev.c
net/caif/cfcnfg.c
net/can/bcm.c
net/ceph/ceph_fs.c
net/ceph/osd_client.c
net/ceph/pagevec.c
net/core/Makefile
net/core/datagram.c
net/core/dev.c
net/core/devlink.c
net/core/drop_monitor.c
net/core/ethtool.c
net/core/fib_rules.c
net/core/filter.c
net/core/flow_dissector.c
net/core/lwt_bpf.c [new file with mode: 0644]
net/core/lwtunnel.c
net/core/neighbour.c
net/core/net-sysfs.c
net/core/net_namespace.c
net/core/netpoll.c
net/core/pktgen.c
net/core/rtnetlink.c
net/core/secure_seq.c
net/core/skbuff.c
net/core/sock.c
net/core/sock_reuseport.c
net/core/stream.c
net/dccp/ipv4.c
net/dccp/ipv6.c
net/dccp/proto.c
net/decnet/af_decnet.c
net/ethernet/eth.c
net/hsr/hsr_forward.c
net/hsr/hsr_netlink.c
net/ieee802154/netlink.c
net/ieee802154/nl802154.c
net/ipv4/af_inet.c
net/ipv4/fib_frontend.c
net/ipv4/fib_trie.c
net/ipv4/fou.c
net/ipv4/gre_offload.c
net/ipv4/icmp.c
net/ipv4/igmp.c
net/ipv4/inet_connection_sock.c
net/ipv4/inet_diag.c
net/ipv4/inet_hashtables.c
net/ipv4/ip_forward.c
net/ipv4/ip_gre.c
net/ipv4/ip_output.c
net/ipv4/ip_sockglue.c
net/ipv4/ip_tunnel.c
net/ipv4/ip_tunnel_core.c
net/ipv4/ip_vti.c
net/ipv4/ipip.c
net/ipv4/ipmr.c
net/ipv4/netfilter/Kconfig
net/ipv4/netfilter/Makefile
net/ipv4/netfilter/arp_tables.c
net/ipv4/netfilter/ip_tables.c
net/ipv4/netfilter/ipt_CLUSTERIP.c
net/ipv4/netfilter/ipt_MASQUERADE.c
net/ipv4/netfilter/ipt_REJECT.c
net/ipv4/netfilter/ipt_SYNPROXY.c
net/ipv4/netfilter/ipt_rpfilter.c
net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
net/ipv4/netfilter/nf_socket_ipv4.c [new file with mode: 0644]
net/ipv4/netfilter/nft_dup_ipv4.c
net/ipv4/netfilter/nft_fib_ipv4.c [new file with mode: 0644]
net/ipv4/netfilter/nft_masq_ipv4.c
net/ipv4/netfilter/nft_redir_ipv4.c
net/ipv4/netfilter/nft_reject_ipv4.c
net/ipv4/ping.c
net/ipv4/raw.c
net/ipv4/raw_diag.c
net/ipv4/route.c
net/ipv4/syncookies.c
net/ipv4/sysctl_net_ipv4.c
net/ipv4/tcp.c
net/ipv4/tcp_bbr.c
net/ipv4/tcp_cong.c
net/ipv4/tcp_dctcp.c
net/ipv4/tcp_highspeed.c
net/ipv4/tcp_hybla.c
net/ipv4/tcp_illinois.c
net/ipv4/tcp_input.c
net/ipv4/tcp_ipv4.c
net/ipv4/tcp_lp.c
net/ipv4/tcp_metrics.c
net/ipv4/tcp_minisocks.c
net/ipv4/tcp_output.c
net/ipv4/tcp_scalable.c
net/ipv4/tcp_vegas.c
net/ipv4/tcp_veno.c
net/ipv4/tcp_westwood.c
net/ipv4/tcp_yeah.c
net/ipv4/udp.c
net/ipv4/udp_impl.h
net/ipv4/udp_offload.c
net/ipv4/udplite.c
net/ipv6/Kconfig
net/ipv6/Makefile
net/ipv6/addrconf.c
net/ipv6/af_inet6.c
net/ipv6/ah6.c
net/ipv6/datagram.c
net/ipv6/esp6.c
net/ipv6/exthdrs.c
net/ipv6/icmp.c
net/ipv6/ila/ila_xlat.c
net/ipv6/inet6_connection_sock.c
net/ipv6/inet6_hashtables.c
net/ipv6/ip6_gre.c
net/ipv6/ip6_offload.c
net/ipv6/ip6_output.c
net/ipv6/ip6_tunnel.c
net/ipv6/ip6_udp_tunnel.c
net/ipv6/ip6_vti.c
net/ipv6/ip6mr.c
net/ipv6/ipcomp6.c
net/ipv6/ipv6_sockglue.c
net/ipv6/mcast.c
net/ipv6/netfilter.c
net/ipv6/netfilter/Kconfig
net/ipv6/netfilter/Makefile
net/ipv6/netfilter/ip6_tables.c
net/ipv6/netfilter/ip6t_MASQUERADE.c
net/ipv6/netfilter/ip6t_REJECT.c
net/ipv6/netfilter/ip6t_SYNPROXY.c
net/ipv6/netfilter/ip6t_rpfilter.c
net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
net/ipv6/netfilter/nf_socket_ipv6.c [new file with mode: 0644]
net/ipv6/netfilter/nft_dup_ipv6.c
net/ipv6/netfilter/nft_fib_ipv6.c [new file with mode: 0644]
net/ipv6/netfilter/nft_masq_ipv6.c
net/ipv6/netfilter/nft_redir_ipv6.c
net/ipv6/netfilter/nft_reject_ipv6.c
net/ipv6/ping.c
net/ipv6/raw.c
net/ipv6/reassembly.c
net/ipv6/route.c
net/ipv6/seg6.c [new file with mode: 0644]
net/ipv6/seg6_hmac.c [new file with mode: 0644]
net/ipv6/seg6_iptunnel.c [new file with mode: 0644]
net/ipv6/sit.c
net/ipv6/syncookies.c
net/ipv6/tcp_ipv6.c
net/ipv6/udp.c
net/ipv6/udp_impl.h
net/ipv6/udplite.c
net/ipv6/xfrm6_tunnel.c
net/irda/irnetlink.c
net/key/af_key.c
net/l2tp/l2tp_core.c
net/l2tp/l2tp_ip.c
net/l2tp/l2tp_ip6.c
net/l2tp/l2tp_netlink.c
net/l2tp/l2tp_ppp.c
net/llc/af_llc.c
net/mac80211/Makefile
net/mac80211/aes_ccm.c
net/mac80211/aes_ccm.h
net/mac80211/aes_cmac.c
net/mac80211/aes_cmac.h
net/mac80211/aes_gcm.c
net/mac80211/aes_gcm.h
net/mac80211/aes_gmac.c
net/mac80211/aes_gmac.h
net/mac80211/agg-rx.c
net/mac80211/cfg.c
net/mac80211/debugfs.c
net/mac80211/debugfs_netdev.c
net/mac80211/debugfs_sta.c
net/mac80211/fils_aead.c [new file with mode: 0644]
net/mac80211/fils_aead.h [new file with mode: 0644]
net/mac80211/ieee80211_i.h
net/mac80211/iface.c
net/mac80211/main.c
net/mac80211/mlme.c
net/mac80211/offchannel.c
net/mac80211/rx.c
net/mac80211/sta_info.c
net/mac80211/sta_info.h
net/mac80211/tx.c
net/mac80211/util.c
net/mac80211/vht.c
net/mac80211/wme.c
net/mac80211/wpa.c
net/ncsi/internal.h
net/ncsi/ncsi-aen.c
net/ncsi/ncsi-manage.c
net/netfilter/Kconfig
net/netfilter/Makefile
net/netfilter/core.c
net/netfilter/ipset/Kconfig
net/netfilter/ipset/Makefile
net/netfilter/ipset/ip_set_bitmap_gen.h
net/netfilter/ipset/ip_set_core.c
net/netfilter/ipset/ip_set_hash_gen.h
net/netfilter/ipset/ip_set_hash_ip.c
net/netfilter/ipset/ip_set_hash_ipmac.c [new file with mode: 0644]
net/netfilter/ipset/ip_set_hash_ipmark.c
net/netfilter/ipset/ip_set_hash_ipport.c
net/netfilter/ipset/ip_set_hash_ipportip.c
net/netfilter/ipset/ip_set_hash_ipportnet.c
net/netfilter/ipset/ip_set_hash_net.c
net/netfilter/ipset/ip_set_hash_netiface.c
net/netfilter/ipset/ip_set_hash_netnet.c
net/netfilter/ipset/ip_set_hash_netport.c
net/netfilter/ipset/ip_set_hash_netportnet.c
net/netfilter/ipset/ip_set_list_set.c
net/netfilter/ipvs/ip_vs_core.c
net/netfilter/ipvs/ip_vs_ctl.c
net/netfilter/ipvs/ip_vs_sync.c
net/netfilter/nf_conntrack_core.c
net/netfilter/nf_conntrack_helper.c
net/netfilter/nf_conntrack_proto.c
net/netfilter/nf_conntrack_proto_dccp.c
net/netfilter/nf_conntrack_proto_gre.c
net/netfilter/nf_conntrack_proto_sctp.c
net/netfilter/nf_conntrack_proto_udplite.c
net/netfilter/nf_conntrack_sip.c
net/netfilter/nf_dup_netdev.c
net/netfilter/nf_internals.h
net/netfilter/nf_log_common.c
net/netfilter/nf_log_netdev.c [new file with mode: 0644]
net/netfilter/nf_queue.c
net/netfilter/nf_synproxy_core.c
net/netfilter/nf_tables_api.c
net/netfilter/nf_tables_core.c
net/netfilter/nf_tables_trace.c
net/netfilter/nfnetlink_log.c
net/netfilter/nfnetlink_queue.c
net/netfilter/nft_bitwise.c
net/netfilter/nft_byteorder.c
net/netfilter/nft_cmp.c
net/netfilter/nft_ct.c
net/netfilter/nft_dynset.c
net/netfilter/nft_exthdr.c
net/netfilter/nft_fib.c [new file with mode: 0644]
net/netfilter/nft_fib_inet.c [new file with mode: 0644]
net/netfilter/nft_hash.c
net/netfilter/nft_immediate.c
net/netfilter/nft_log.c
net/netfilter/nft_lookup.c
net/netfilter/nft_meta.c
net/netfilter/nft_numgen.c
net/netfilter/nft_payload.c
net/netfilter/nft_queue.c
net/netfilter/nft_range.c
net/netfilter/nft_reject_inet.c
net/netfilter/nft_rt.c [new file with mode: 0644]
net/netfilter/nft_set_hash.c
net/netfilter/nft_set_rbtree.c
net/netfilter/x_tables.c
net/netfilter/xt_AUDIT.c
net/netfilter/xt_LOG.c
net/netfilter/xt_NETMAP.c
net/netfilter/xt_NFLOG.c
net/netfilter/xt_NFQUEUE.c
net/netfilter/xt_REDIRECT.c
net/netfilter/xt_TCPMSS.c
net/netfilter/xt_TEE.c
net/netfilter/xt_TPROXY.c
net/netfilter/xt_addrtype.c
net/netfilter/xt_cluster.c
net/netfilter/xt_connlimit.c
net/netfilter/xt_connmark.c
net/netfilter/xt_conntrack.c
net/netfilter/xt_devgroup.c
net/netfilter/xt_dscp.c
net/netfilter/xt_hashlimit.c
net/netfilter/xt_ipcomp.c
net/netfilter/xt_ipvs.c
net/netfilter/xt_multiport.c
net/netfilter/xt_nfacct.c
net/netfilter/xt_osf.c
net/netfilter/xt_owner.c
net/netfilter/xt_pkttype.c
net/netfilter/xt_policy.c
net/netfilter/xt_recent.c
net/netfilter/xt_set.c
net/netfilter/xt_socket.c
net/netlabel/netlabel_calipso.c
net/netlabel/netlabel_cipso_v4.c
net/netlabel/netlabel_mgmt.c
net/netlabel/netlabel_unlabeled.c
net/netlink/diag.c
net/netlink/genetlink.c
net/nfc/netlink.c
net/openvswitch/actions.c
net/openvswitch/conntrack.c
net/openvswitch/datapath.c
net/openvswitch/datapath.h
net/openvswitch/flow.c
net/openvswitch/flow.h
net/openvswitch/flow_netlink.c
net/openvswitch/vport-netdev.c
net/openvswitch/vport.c
net/openvswitch/vport.h
net/packet/af_packet.c
net/phonet/pep.c
net/phonet/pn_dev.c
net/rds/Makefile
net/rds/af_rds.c
net/rds/connection.c
net/rds/message.c
net/rds/rds.h
net/rds/recv.c
net/rds/send.c
net/rds/tcp.c
net/rds/tcp_connect.c
net/rds/tcp_listen.c
net/rds/tcp_send.c
net/rxrpc/call_object.c
net/rxrpc/input.c
net/rxrpc/peer_object.c
net/sched/act_api.c
net/sched/act_bpf.c
net/sched/act_connmark.c
net/sched/act_csum.c
net/sched/act_gact.c
net/sched/act_ife.c
net/sched/act_ipt.c
net/sched/act_mirred.c
net/sched/act_nat.c
net/sched/act_pedit.c
net/sched/act_police.c
net/sched/act_simple.c
net/sched/act_skbedit.c
net/sched/act_skbmod.c
net/sched/act_tunnel_key.c
net/sched/act_vlan.c
net/sched/cls_api.c
net/sched/cls_bpf.c
net/sched/cls_flower.c
net/sched/em_ipset.c
net/sched/sch_api.c
net/sched/sch_fq.c
net/sctp/associola.c
net/sctp/input.c
net/sctp/ipv6.c
net/sctp/output.c
net/sctp/sm_statefuns.c
net/sctp/socket.c
net/socket.c
net/sunrpc/auth_gss/auth_gss.c
net/sunrpc/auth_gss/gss_krb5_crypto.c
net/sunrpc/auth_gss/svcauth_gss.c
net/sunrpc/clnt.c
net/sunrpc/netns.h
net/sunrpc/sunrpc_syms.c
net/sunrpc/svc_xprt.c
net/sunrpc/svcsock.c
net/sunrpc/xprtrdma/frwr_ops.c
net/sunrpc/xprtrdma/svc_rdma_backchannel.c
net/sunrpc/xprtrdma/svc_rdma_transport.c
net/sunrpc/xprtrdma/xprt_rdma.h
net/sunrpc/xprtsock.c
net/switchdev/switchdev.c
net/tipc/bcast.c
net/tipc/bcast.h
net/tipc/core.c
net/tipc/core.h
net/tipc/link.c
net/tipc/monitor.c
net/tipc/msg.h
net/tipc/name_distr.c
net/tipc/netlink.c
net/tipc/netlink_compat.c
net/tipc/node.c
net/tipc/socket.c
net/unix/af_unix.c
net/vmw_vsock/virtio_transport.c
net/vmw_vsock/virtio_transport_common.c
net/wimax/stack.c
net/wireless/core.c
net/wireless/core.h
net/wireless/mesh.c
net/wireless/mlme.c
net/wireless/nl80211.c
net/wireless/rdev-ops.h
net/wireless/scan.c
net/wireless/sme.c
net/wireless/sysfs.c
net/wireless/trace.h
net/wireless/util.c
net/xfrm/xfrm_state.c
samples/Kconfig
samples/Makefile
samples/auxdisplay/.gitignore [new file with mode: 0644]
samples/auxdisplay/Makefile [new file with mode: 0644]
samples/auxdisplay/cfag12864b-example.c [new file with mode: 0644]
samples/blackfin/Makefile [new file with mode: 0644]
samples/blackfin/gptimers-example.c [new file with mode: 0644]
samples/bpf/Makefile
samples/bpf/bpf_helpers.h
samples/bpf/bpf_load.c
samples/bpf/bpf_load.h
samples/bpf/libbpf.c
samples/bpf/libbpf.h
samples/bpf/lwt_len_hist.sh [new file with mode: 0644]
samples/bpf/lwt_len_hist_kern.c [new file with mode: 0644]
samples/bpf/lwt_len_hist_user.c [new file with mode: 0644]
samples/bpf/map_perf_test_kern.c
samples/bpf/map_perf_test_user.c
samples/bpf/parse_ldabs.c
samples/bpf/parse_simple.c
samples/bpf/parse_varlen.c
samples/bpf/sock_flags_kern.c [new file with mode: 0644]
samples/bpf/sockex2_kern.c
samples/bpf/tc_l2_redirect.sh [new file with mode: 0755]
samples/bpf/tc_l2_redirect_kern.c [new file with mode: 0644]
samples/bpf/tc_l2_redirect_user.c [new file with mode: 0644]
samples/bpf/tcbpf1_kern.c
samples/bpf/tcbpf2_kern.c
samples/bpf/test_cgrp2_attach.c [new file with mode: 0644]
samples/bpf/test_cgrp2_sock.c [new file with mode: 0644]
samples/bpf/test_cgrp2_sock.sh [new file with mode: 0755]
samples/bpf/test_cgrp2_sock2.c [new file with mode: 0644]
samples/bpf/test_cgrp2_sock2.sh [new file with mode: 0755]
samples/bpf/test_cgrp2_tc_kern.c
samples/bpf/test_lru_dist.c [new file with mode: 0644]
samples/bpf/test_lwt_bpf.c [new file with mode: 0644]
samples/bpf/test_lwt_bpf.sh [new file with mode: 0644]
samples/bpf/tracex2_user.c
samples/bpf/tracex3_user.c
samples/bpf/xdp1_user.c
samples/mei/.gitignore [new file with mode: 0644]
samples/mei/Makefile [new file with mode: 0644]
samples/mei/TODO [new file with mode: 0644]
samples/mei/mei-amt-version.c [new file with mode: 0644]
samples/mic/mpssd/.gitignore [new file with mode: 0644]
samples/mic/mpssd/Makefile [new file with mode: 0644]
samples/mic/mpssd/micctrl [new file with mode: 0755]
samples/mic/mpssd/mpss [new file with mode: 0755]
samples/mic/mpssd/mpssd.c [new file with mode: 0644]
samples/mic/mpssd/mpssd.h [new file with mode: 0644]
samples/mic/mpssd/sysfs.c [new file with mode: 0644]
samples/timers/.gitignore [new file with mode: 0644]
samples/timers/Makefile [new file with mode: 0644]
samples/timers/hpet_example.c [new file with mode: 0644]
samples/watchdog/.gitignore [new file with mode: 0644]
samples/watchdog/Makefile [new file with mode: 0644]
samples/watchdog/watchdog-simple.c [new file with mode: 0644]
scripts/Makefile.build
scripts/Makefile.extrawarn
scripts/Makefile.gcc-plugins
scripts/Makefile.modpost
scripts/Makefile.ubsan
scripts/basic/fixdep.c
scripts/bloat-o-meter
scripts/coccicheck
scripts/coccinelle/api/memdup_user.cocci
scripts/coccinelle/api/pm_runtime.cocci
scripts/coccinelle/misc/cond_no_effect.cocci [new file with mode: 0644]
scripts/gcc-plugins/cyc_complexity_plugin.c
scripts/gcc-plugins/gcc-common.h
scripts/gcc-plugins/latent_entropy_plugin.c [new file with mode: 0644]
scripts/gcc-plugins/sancov_plugin.c
scripts/gcc-x86_64-has-stack-protector.sh
scripts/gen_initramfs_list.sh
scripts/genksyms/lex.l
scripts/genksyms/lex.lex.c_shipped
scripts/link-vmlinux.sh
security/apparmor/domain.c
security/keys/Kconfig
security/keys/big_key.c
security/keys/proc.c
security/selinux/hooks.c
security/tomoyo/domain.c
sound/core/info.c
sound/core/seq/seq_compat.c
sound/core/seq/seq_timer.c
sound/pci/asihpi/hpioctl.c
sound/pci/hda/dell_wmi_helper.c
sound/pci/hda/hda_intel.c
sound/pci/hda/patch_realtek.c
sound/pci/hda/thinkpad_helper.c
sound/soc/codecs/cs4270.c
sound/soc/codecs/da7219.c
sound/soc/codecs/hdmi-codec.c
sound/soc/codecs/rt298.c
sound/soc/codecs/rt5663.c
sound/soc/codecs/sti-sas.c
sound/soc/codecs/tas571x.c
sound/soc/intel/Kconfig
sound/soc/intel/atom/sst/sst_acpi.c
sound/soc/intel/boards/bxt_da7219_max98357a.c
sound/soc/intel/skylake/skl.c
sound/soc/pxa/Kconfig
sound/soc/qcom/lpass-cpu.c
sound/soc/qcom/lpass-platform.c
sound/soc/qcom/lpass.h
sound/soc/samsung/ac97.c
sound/soc/samsung/i2s.c
sound/soc/samsung/pcm.c
sound/soc/samsung/s3c2412-i2s.c
sound/soc/samsung/s3c24xx-i2s.c
sound/soc/samsung/spdif.c
sound/soc/sti/uniperif_player.c
sound/soc/sunxi/sun4i-codec.c
sound/usb/card.c
sound/usb/line6/driver.c
sound/usb/line6/podhd.c
sound/usb/quirks-table.h
tools/accounting/.gitignore [new file with mode: 0644]
tools/accounting/Makefile [new file with mode: 0644]
tools/accounting/getdelays.c [new file with mode: 0644]
tools/arch/x86/include/asm/cpufeatures.h
tools/laptop/dslm/.gitignore [new file with mode: 0644]
tools/laptop/dslm/Makefile [new file with mode: 0644]
tools/laptop/dslm/dslm.c [new file with mode: 0644]
tools/objtool/arch/x86/decode.c
tools/objtool/builtin-check.c
tools/pcmcia/.gitignore [new file with mode: 0644]
tools/pcmcia/Makefile [new file with mode: 0644]
tools/pcmcia/crc32hash.c [new file with mode: 0644]
tools/perf/jvmti/Makefile
tools/perf/ui/browsers/hists.c
tools/perf/util/header.c
tools/perf/util/hist.c
tools/perf/util/parse-events.l
tools/power/acpi/Makefile.config
tools/power/acpi/Makefile.rules
tools/power/acpi/tools/acpidbg/Makefile
tools/power/acpi/tools/acpidbg/acpidbg.c
tools/power/acpi/tools/acpidump/Makefile
tools/power/cpupower/utils/cpufreq-set.c
tools/testing/selftests/bpf/Makefile
tools/testing/selftests/bpf/bpf_util.h [new file with mode: 0644]
tools/testing/selftests/bpf/test_lru_map.c [new file with mode: 0644]
tools/testing/selftests/bpf/test_maps.c
tools/testing/selftests/bpf/test_verifier.c
tools/testing/selftests/filesystems/.gitignore [new file with mode: 0644]
tools/testing/selftests/filesystems/Makefile [new file with mode: 0644]
tools/testing/selftests/filesystems/dnotify_test.c [new file with mode: 0644]
tools/testing/selftests/futex/functional/run.sh
tools/testing/selftests/futex/run.sh
tools/testing/selftests/ia64/.gitignore [new file with mode: 0644]
tools/testing/selftests/ia64/Makefile [new file with mode: 0644]
tools/testing/selftests/ia64/aliasing-test.c [new file with mode: 0644]
tools/testing/selftests/networking/timestamping/.gitignore [new file with mode: 0644]
tools/testing/selftests/networking/timestamping/Makefile [new file with mode: 0644]
tools/testing/selftests/networking/timestamping/hwtstamp_config.c [new file with mode: 0644]
tools/testing/selftests/networking/timestamping/timestamping.c [new file with mode: 0644]
tools/testing/selftests/networking/timestamping/txtimestamp.c [new file with mode: 0644]
tools/testing/selftests/powerpc/copyloops/asm/export.h [new file with mode: 0644]
tools/testing/selftests/powerpc/math/.gitignore
tools/testing/selftests/powerpc/signal/.gitignore [new file with mode: 0644]
tools/testing/selftests/powerpc/stringloops/asm/export.h [new file with mode: 0644]
tools/testing/selftests/powerpc/tm/.gitignore
tools/testing/selftests/prctl/.gitignore [new file with mode: 0644]
tools/testing/selftests/prctl/Makefile [new file with mode: 0644]
tools/testing/selftests/prctl/disable-tsc-ctxt-sw-stress-test.c [new file with mode: 0644]
tools/testing/selftests/prctl/disable-tsc-on-off-stress-test.c [new file with mode: 0644]
tools/testing/selftests/prctl/disable-tsc-test.c [new file with mode: 0644]
tools/testing/selftests/ptp/.gitignore [new file with mode: 0644]
tools/testing/selftests/ptp/Makefile [new file with mode: 0644]
tools/testing/selftests/ptp/testptp.c [new file with mode: 0644]
tools/testing/selftests/ptp/testptp.mk [new file with mode: 0644]
tools/testing/selftests/timers/posix_timers.c
tools/testing/selftests/vDSO/.gitignore [new file with mode: 0644]
tools/testing/selftests/vDSO/Makefile [new file with mode: 0644]
tools/testing/selftests/vDSO/parse_vdso.c [new file with mode: 0644]
tools/testing/selftests/vDSO/vdso_standalone_test_x86.c [new file with mode: 0644]
tools/testing/selftests/vDSO/vdso_test.c [new file with mode: 0644]
tools/testing/selftests/watchdog/.gitignore [new file with mode: 0644]
tools/testing/selftests/watchdog/Makefile [new file with mode: 0644]
tools/testing/selftests/watchdog/watchdog-test.c [new file with mode: 0644]
tools/testing/selftests/zram/README
tools/virtio/ringtest/Makefile
tools/virtio/ringtest/main.c
tools/virtio/ringtest/main.h
tools/virtio/ringtest/noring.c
tools/virtio/ringtest/ptr_ring.c
tools/virtio/ringtest/ring.c
tools/virtio/ringtest/virtio_ring_0_9.c
virt/kvm/arm/pmu.c
virt/kvm/arm/vgic/vgic-mmio.c
virt/kvm/arm/vgic/vgic-mmio.h
virt/kvm/arm/vgic/vgic.c
virt/kvm/async_pf.c
virt/kvm/eventfd.c
virt/kvm/kvm_main.c

index 2408e56e241ba199593f56d96d8e43a5a407a103..02d261407683dcfa483cf15247b2cf31cff0432a 100644 (file)
--- a/.mailmap
+++ b/.mailmap
@@ -127,6 +127,7 @@ Peter Oruba <peter@oruba.de>
 Peter Oruba <peter.oruba@amd.com>
 Pratyush Anand <pratyush.anand@gmail.com> <pratyush.anand@st.com>
 Praveen BP <praveenbp@ti.com>
+Qais Yousef <qsyousef@gmail.com> <qais.yousef@imgtec.com>
 Rajesh Shah <rajesh.shah@intel.com>
 Ralf Baechle <ralf@linux-mips.org>
 Ralf Wildenhues <Ralf.Wildenhues@gmx.de>
diff --git a/CREDITS b/CREDITS
index 513aaa3546bff3fa1d95c21bf4c9d9b2da2daa4f..837367624e4598e1b936c25b9c2095f5d43e33eb 100644 (file)
--- a/CREDITS
+++ b/CREDITS
@@ -1864,10 +1864,11 @@ S: The Netherlands
 
 N: Martin Kepplinger
 E: martink@posteo.de
-E: martin.kepplinger@theobroma-systems.com
+E: martin.kepplinger@ginzinger.com
 W: http://www.martinkepplinger.com
 D: mma8452 accelerators iio driver
-D: Kernel cleanups
+D: pegasus_notetaker input driver
+D: Kernel fixes and cleanups
 S: Garnisonstraße 26
 S: 4020 Linz
 S: Austria
index cb9a6c6fa83b2288792d10e046cfd56c0ba668b5..3acc4f1a6f8420552614eed779d414dd9a926520 100644 (file)
@@ -46,7 +46,8 @@ IRQ.txt
 Intel-IOMMU.txt
        - basic info on the Intel IOMMU virtualization support.
 Makefile
-       - some files in Documentation dir are actually sample code to build
+       - This file does nothing. Removing it breaks make htmldocs and
+         make distclean.
 ManagementStyle
        - how to (attempt to) manage kernel hackers.
 RCU/
diff --git a/Documentation/80211/cfg80211.rst b/Documentation/80211/cfg80211.rst
new file mode 100644 (file)
index 0000000..b1e149e
--- /dev/null
@@ -0,0 +1,345 @@
+==================
+cfg80211 subsystem
+==================
+
+Device registration
+===================
+
+.. kernel-doc:: include/net/cfg80211.h
+   :doc: Device registration
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: ieee80211_channel_flags
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: ieee80211_channel
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: ieee80211_rate_flags
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: ieee80211_rate
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: ieee80211_sta_ht_cap
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: ieee80211_supported_band
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: cfg80211_signal_type
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: wiphy_params_flags
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: wiphy_flags
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: wiphy
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: wireless_dev
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: wiphy_new
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: wiphy_register
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: wiphy_unregister
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: wiphy_free
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: wiphy_name
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: wiphy_dev
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: wiphy_priv
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: priv_to_wiphy
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: set_wiphy_dev
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: wdev_priv
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: ieee80211_iface_limit
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: ieee80211_iface_combination
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: cfg80211_check_combinations
+
+Actions and configuration
+=========================
+
+.. kernel-doc:: include/net/cfg80211.h
+   :doc: Actions and configuration
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: cfg80211_ops
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: vif_params
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: key_params
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: survey_info_flags
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: survey_info
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: cfg80211_beacon_data
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: cfg80211_ap_settings
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: station_parameters
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: rate_info_flags
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: rate_info
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: station_info
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: monitor_flags
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: mpath_info_flags
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: mpath_info
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: bss_parameters
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: ieee80211_txq_params
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: cfg80211_crypto_settings
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: cfg80211_auth_request
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: cfg80211_assoc_request
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: cfg80211_deauth_request
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: cfg80211_disassoc_request
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: cfg80211_ibss_params
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: cfg80211_connect_params
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: cfg80211_pmksa
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: cfg80211_rx_mlme_mgmt
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: cfg80211_auth_timeout
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: cfg80211_rx_assoc_resp
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: cfg80211_assoc_timeout
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: cfg80211_tx_mlme_mgmt
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: cfg80211_ibss_joined
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: cfg80211_connect_result
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: cfg80211_connect_bss
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: cfg80211_connect_timeout
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: cfg80211_roamed
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: cfg80211_disconnected
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: cfg80211_ready_on_channel
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: cfg80211_remain_on_channel_expired
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: cfg80211_new_sta
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: cfg80211_rx_mgmt
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: cfg80211_mgmt_tx_status
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: cfg80211_cqm_rssi_notify
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: cfg80211_cqm_pktloss_notify
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: cfg80211_michael_mic_failure
+
+Scanning and BSS list handling
+==============================
+
+.. kernel-doc:: include/net/cfg80211.h
+   :doc: Scanning and BSS list handling
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: cfg80211_ssid
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: cfg80211_scan_request
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: cfg80211_scan_done
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: cfg80211_bss
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: cfg80211_inform_bss
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: cfg80211_inform_bss_frame_data
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: cfg80211_inform_bss_data
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: cfg80211_unlink_bss
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: cfg80211_find_ie
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: ieee80211_bss_get_ie
+
+Utility functions
+=================
+
+.. kernel-doc:: include/net/cfg80211.h
+   :doc: Utility functions
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: ieee80211_channel_to_frequency
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: ieee80211_frequency_to_channel
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: ieee80211_get_channel
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: ieee80211_get_response_rate
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: ieee80211_hdrlen
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: ieee80211_get_hdrlen_from_skb
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: ieee80211_radiotap_iterator
+
+Data path helpers
+=================
+
+.. kernel-doc:: include/net/cfg80211.h
+   :doc: Data path helpers
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: ieee80211_data_to_8023
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: ieee80211_data_from_8023
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: ieee80211_amsdu_to_8023s
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: cfg80211_classify8021d
+
+Regulatory enforcement infrastructure
+=====================================
+
+.. kernel-doc:: include/net/cfg80211.h
+   :doc: Regulatory enforcement infrastructure
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: regulatory_hint
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: wiphy_apply_custom_regulatory
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: freq_reg_info
+
+RFkill integration
+==================
+
+.. kernel-doc:: include/net/cfg80211.h
+   :doc: RFkill integration
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: wiphy_rfkill_set_hw_state
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: wiphy_rfkill_start_polling
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: wiphy_rfkill_stop_polling
+
+Test mode
+=========
+
+.. kernel-doc:: include/net/cfg80211.h
+   :doc: Test mode
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: cfg80211_testmode_alloc_reply_skb
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: cfg80211_testmode_reply
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: cfg80211_testmode_alloc_event_skb
+
+.. kernel-doc:: include/net/cfg80211.h
+   :functions: cfg80211_testmode_event
diff --git a/Documentation/80211/conf.py b/Documentation/80211/conf.py
new file mode 100644 (file)
index 0000000..20c7c27
--- /dev/null
@@ -0,0 +1,5 @@
+# -*- coding: utf-8; mode: python -*-
+
+project = "Linux 802.11 Driver Developer's Guide"
+
+tags.add("subproject")
diff --git a/Documentation/80211/index.rst b/Documentation/80211/index.rst
new file mode 100644 (file)
index 0000000..90bba47
--- /dev/null
@@ -0,0 +1,17 @@
+=====================================
+Linux 802.11 Driver Developer's Guide
+=====================================
+
+.. toctree::
+
+   introduction
+   cfg80211
+   mac80211
+   mac80211-advanced
+
+.. only::  subproject
+
+   Indices
+   =======
+
+   * :ref:`genindex`
diff --git a/Documentation/80211/introduction.rst b/Documentation/80211/introduction.rst
new file mode 100644 (file)
index 0000000..4938fa8
--- /dev/null
@@ -0,0 +1,17 @@
+============
+Introduction
+============
+
+Explaining wireless 802.11 networking in the Linux kernel
+
+Copyright 2007-2009 Johannes Berg
+
+These books attempt to give a description of the various subsystems
+that play a role in 802.11 wireless networking in Linux. Since these
+books are for kernel developers they attempts to document the
+structures and functions used in the kernel as well as giving a
+higher-level overview.
+
+The reader is expected to be familiar with the 802.11 standard as
+published by the IEEE in 802.11-2007 (or possibly later versions).
+References to this standard will be given as "802.11-2007 8.1.5".
diff --git a/Documentation/80211/mac80211-advanced.rst b/Documentation/80211/mac80211-advanced.rst
new file mode 100644 (file)
index 0000000..70a89b2
--- /dev/null
@@ -0,0 +1,295 @@
+=============================
+mac80211 subsystem (advanced)
+=============================
+
+Information contained within this part of the book is of interest only
+for advanced interaction of mac80211 with drivers to exploit more
+hardware capabilities and improve performance.
+
+LED support
+===========
+
+Mac80211 supports various ways of blinking LEDs. Wherever possible,
+device LEDs should be exposed as LED class devices and hooked up to the
+appropriate trigger, which will then be triggered appropriately by
+mac80211.
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: ieee80211_get_tx_led_name
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: ieee80211_get_rx_led_name
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: ieee80211_get_assoc_led_name
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: ieee80211_get_radio_led_name
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: ieee80211_tpt_blink
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: ieee80211_tpt_led_trigger_flags
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: ieee80211_create_tpt_led_trigger
+
+Hardware crypto acceleration
+============================
+
+.. kernel-doc:: include/net/mac80211.h
+   :doc: Hardware crypto acceleration
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: set_key_cmd
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: ieee80211_key_conf
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: ieee80211_key_flags
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: ieee80211_get_tkip_p1k
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: ieee80211_get_tkip_p1k_iv
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: ieee80211_get_tkip_p2k
+
+Powersave support
+=================
+
+.. kernel-doc:: include/net/mac80211.h
+   :doc: Powersave support
+
+Beacon filter support
+=====================
+
+.. kernel-doc:: include/net/mac80211.h
+   :doc: Beacon filter support
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: ieee80211_beacon_loss
+
+Multiple queues and QoS support
+===============================
+
+TBD
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: ieee80211_tx_queue_params
+
+Access point mode support
+=========================
+
+TBD
+
+Some parts of the if_conf should be discussed here instead
+
+Insert notes about VLAN interfaces with hw crypto here or in the hw
+crypto chapter.
+
+support for powersaving clients
+-------------------------------
+
+.. kernel-doc:: include/net/mac80211.h
+   :doc: AP support for powersaving clients
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: ieee80211_get_buffered_bc
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: ieee80211_beacon_get
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: ieee80211_sta_eosp
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: ieee80211_frame_release_type
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: ieee80211_sta_ps_transition
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: ieee80211_sta_ps_transition_ni
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: ieee80211_sta_set_buffered
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: ieee80211_sta_block_awake
+
+Supporting multiple virtual interfaces
+======================================
+
+TBD
+
+Note: WDS with identical MAC address should almost always be OK
+
+Insert notes about having multiple virtual interfaces with different MAC
+addresses here, note which configurations are supported by mac80211, add
+notes about supporting hw crypto with it.
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: ieee80211_iterate_active_interfaces
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: ieee80211_iterate_active_interfaces_atomic
+
+Station handling
+================
+
+TODO
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: ieee80211_sta
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: sta_notify_cmd
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: ieee80211_find_sta
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: ieee80211_find_sta_by_ifaddr
+
+Hardware scan offload
+=====================
+
+TBD
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: ieee80211_scan_completed
+
+Aggregation
+===========
+
+TX A-MPDU aggregation
+---------------------
+
+.. kernel-doc:: net/mac80211/agg-tx.c
+   :doc: TX A-MPDU aggregation
+
+.. WARNING: DOCPROC directive not supported: !Cnet/mac80211/agg-tx.c
+
+RX A-MPDU aggregation
+---------------------
+
+.. kernel-doc:: net/mac80211/agg-rx.c
+   :doc: RX A-MPDU aggregation
+
+.. WARNING: DOCPROC directive not supported: !Cnet/mac80211/agg-rx.c
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: ieee80211_ampdu_mlme_action
+
+Spatial Multiplexing Powersave (SMPS)
+=====================================
+
+.. kernel-doc:: include/net/mac80211.h
+   :doc: Spatial multiplexing power save
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: ieee80211_request_smps
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: ieee80211_smps_mode
+
+TBD
+
+This part of the book describes the rate control algorithm interface and
+how it relates to mac80211 and drivers.
+
+Rate Control API
+================
+
+TBD
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: ieee80211_start_tx_ba_session
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: ieee80211_start_tx_ba_cb_irqsafe
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: ieee80211_stop_tx_ba_session
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: ieee80211_stop_tx_ba_cb_irqsafe
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: ieee80211_rate_control_changed
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: ieee80211_tx_rate_control
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: rate_control_send_low
+
+TBD
+
+This part of the book describes mac80211 internals.
+
+Key handling
+============
+
+Key handling basics
+-------------------
+
+.. kernel-doc:: net/mac80211/key.c
+   :doc: Key handling basics
+
+MORE TBD
+--------
+
+TBD
+
+Receive processing
+==================
+
+TBD
+
+Transmit processing
+===================
+
+TBD
+
+Station info handling
+=====================
+
+Programming information
+-----------------------
+
+.. kernel-doc:: net/mac80211/sta_info.h
+   :functions: sta_info
+
+.. kernel-doc:: net/mac80211/sta_info.h
+   :functions: ieee80211_sta_info_flags
+
+STA information lifetime rules
+------------------------------
+
+.. kernel-doc:: net/mac80211/sta_info.c
+   :doc: STA information lifetime rules
+
+Aggregation
+===========
+
+.. kernel-doc:: net/mac80211/sta_info.h
+   :functions: sta_ampdu_mlme
+
+.. kernel-doc:: net/mac80211/sta_info.h
+   :functions: tid_ampdu_tx
+
+.. kernel-doc:: net/mac80211/sta_info.h
+   :functions: tid_ampdu_rx
+
+Synchronisation
+===============
+
+TBD
+
+Locking, lots of RCU
diff --git a/Documentation/80211/mac80211.rst b/Documentation/80211/mac80211.rst
new file mode 100644 (file)
index 0000000..85a8335
--- /dev/null
@@ -0,0 +1,216 @@
+===========================
+mac80211 subsystem (basics)
+===========================
+
+You should read and understand the information contained within this
+part of the book while implementing a mac80211 driver. In some chapters,
+advanced usage is noted, those may be skipped if this isn't needed.
+
+This part of the book only covers station and monitor mode
+functionality, additional information required to implement the other
+modes is covered in the second part of the book.
+
+Basic hardware handling
+=======================
+
+TBD
+
+This chapter shall contain information on getting a hw struct allocated
+and registered with mac80211.
+
+Since it is required to allocate rates/modes before registering a hw
+struct, this chapter shall also contain information on setting up the
+rate/mode structs.
+
+Additionally, some discussion about the callbacks and the general
+programming model should be in here, including the definition of
+ieee80211_ops which will be referred to a lot.
+
+Finally, a discussion of hardware capabilities should be done with
+references to other parts of the book.
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: ieee80211_hw
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: ieee80211_hw_flags
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: SET_IEEE80211_DEV
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: SET_IEEE80211_PERM_ADDR
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: ieee80211_ops
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: ieee80211_alloc_hw
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: ieee80211_register_hw
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: ieee80211_unregister_hw
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: ieee80211_free_hw
+
+PHY configuration
+=================
+
+TBD
+
+This chapter should describe PHY handling including start/stop callbacks
+and the various structures used.
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: ieee80211_conf
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: ieee80211_conf_flags
+
+Virtual interfaces
+==================
+
+TBD
+
+This chapter should describe virtual interface basics that are relevant
+to the driver (VLANs, MGMT etc are not.) It should explain the use of
+the add_iface/remove_iface callbacks as well as the interface
+configuration callbacks.
+
+Things related to AP mode should be discussed there.
+
+Things related to supporting multiple interfaces should be in the
+appropriate chapter, a BIG FAT note should be here about this though and
+the recommendation to allow only a single interface in STA mode at
+first!
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: ieee80211_vif
+
+Receive and transmit processing
+===============================
+
+what should be here
+-------------------
+
+TBD
+
+This should describe the receive and transmit paths in mac80211/the
+drivers as well as transmit status handling.
+
+Frame format
+------------
+
+.. kernel-doc:: include/net/mac80211.h
+   :doc: Frame format
+
+Packet alignment
+----------------
+
+.. kernel-doc:: net/mac80211/rx.c
+   :doc: Packet alignment
+
+Calling into mac80211 from interrupts
+-------------------------------------
+
+.. kernel-doc:: include/net/mac80211.h
+   :doc: Calling mac80211 from interrupts
+
+functions/definitions
+---------------------
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: ieee80211_rx_status
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: mac80211_rx_flags
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: mac80211_tx_info_flags
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: mac80211_tx_control_flags
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: mac80211_rate_control_flags
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: ieee80211_tx_rate
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: ieee80211_tx_info
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: ieee80211_tx_info_clear_status
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: ieee80211_rx
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: ieee80211_rx_ni
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: ieee80211_rx_irqsafe
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: ieee80211_tx_status
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: ieee80211_tx_status_ni
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: ieee80211_tx_status_irqsafe
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: ieee80211_rts_get
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: ieee80211_rts_duration
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: ieee80211_ctstoself_get
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: ieee80211_ctstoself_duration
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: ieee80211_generic_frame_duration
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: ieee80211_wake_queue
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: ieee80211_stop_queue
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: ieee80211_wake_queues
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: ieee80211_stop_queues
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: ieee80211_queue_stopped
+
+Frame filtering
+===============
+
+.. kernel-doc:: include/net/mac80211.h
+   :doc: Frame filtering
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: ieee80211_filter_flags
+
+The mac80211 workqueue
+======================
+
+.. kernel-doc:: include/net/mac80211.h
+   :doc: mac80211 workqueue
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: ieee80211_queue_work
+
+.. kernel-doc:: include/net/mac80211.h
+   :functions: ieee80211_queue_delayed_work
index 4ba0a2a61926251edf33e5f94a4eff45028d44a8..640f65e79ef1c00c94508b6b9f9fe8b63a1305a6 100644 (file)
@@ -220,8 +220,11 @@ What:           /sys/class/cxl/<card>/reset
 Date:           October 2014
 Contact:        linuxppc-dev@lists.ozlabs.org
 Description:    write only
-                Writing 1 will issue a PERST to card which may cause the card
-                to reload the FPGA depending on load_image_on_perst.
+                Writing 1 will issue a PERST to card provided there are no
+                contexts active on any one of the card AFUs. This may cause
+                the card to reload the FPGA depending on load_image_on_perst.
+                Writing -1 will do a force PERST irrespective of any active
+                contexts on the card AFUs.
 Users:         https://github.com/ibm-capi/libcxl
 
 What:          /sys/class/cxl/<card>/perst_reloads_same_image (not in a guest)
index b82deeaec314b8ff711b122282396340496946c8..470def06ab0a42e40f860d1fe54f8e0d489ec4c3 100644 (file)
@@ -1,4 +1,4 @@
-What:           state
+What:           /sys/devices/system/ibm_rtl/state
 Date:           Sep 2010
 KernelVersion:  2.6.37
 Contact:        Vernon Mauery <vernux@us.ibm.com>
@@ -10,7 +10,7 @@ Description:    The state file allows a means by which to change in and
 Users:          The ibm-prtm userspace daemon uses this interface.
 
 
-What:           version
+What:           /sys/devices/system/ibm_rtl/version
 Date:           Sep 2010
 KernelVersion:  2.6.37
 Contact:        Vernon Mauery <vernux@us.ibm.com>
diff --git a/Documentation/DocBook/80211.tmpl b/Documentation/DocBook/80211.tmpl
deleted file mode 100644 (file)
index 800fe7a..0000000
+++ /dev/null
@@ -1,584 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!DOCTYPE set PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
-       "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
-<set>
-  <setinfo>
-    <title>The 802.11 subsystems &ndash; for kernel developers</title>
-    <subtitle>
-      Explaining wireless 802.11 networking in the Linux kernel
-    </subtitle>
-
-    <copyright>
-      <year>2007-2009</year>
-      <holder>Johannes Berg</holder>
-    </copyright>
-
-    <authorgroup>
-      <author>
-        <firstname>Johannes</firstname>
-        <surname>Berg</surname>
-        <affiliation>
-          <address><email>johannes@sipsolutions.net</email></address>
-        </affiliation>
-      </author>
-    </authorgroup>
-
-    <legalnotice>
-      <para>
-        This documentation is free software; you can redistribute
-        it and/or modify it under the terms of the GNU General Public
-        License version 2 as published by the Free Software Foundation.
-      </para>
-      <para>
-        This documentation is distributed in the hope that it will be
-        useful, but WITHOUT ANY WARRANTY; without even the implied
-        warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-        See the GNU General Public License for more details.
-      </para>
-      <para>
-        You should have received a copy of the GNU General Public
-        License along with this documentation; if not, write to the Free
-        Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
-        MA 02111-1307 USA
-      </para>
-      <para>
-        For more details see the file COPYING in the source
-        distribution of Linux.
-      </para>
-    </legalnotice>
-
-    <abstract>
-      <para>
-        These books attempt to give a description of the
-        various subsystems that play a role in 802.11 wireless
-        networking in Linux. Since these books are for kernel
-        developers they attempts to document the structures
-        and functions used in the kernel as well as giving a
-        higher-level overview.
-      </para>
-      <para>
-       The reader is expected to be familiar with the 802.11
-       standard as published by the IEEE in 802.11-2007 (or
-       possibly later versions). References to this standard
-       will be given as "802.11-2007 8.1.5".
-      </para>
-    </abstract>
-  </setinfo>
-  <book id="cfg80211-developers-guide">
-    <bookinfo>
-      <title>The cfg80211 subsystem</title>
-
-      <abstract>
-!Pinclude/net/cfg80211.h Introduction
-      </abstract>
-    </bookinfo>
-      <chapter>
-      <title>Device registration</title>
-!Pinclude/net/cfg80211.h Device registration
-!Finclude/net/cfg80211.h ieee80211_channel_flags
-!Finclude/net/cfg80211.h ieee80211_channel
-!Finclude/net/cfg80211.h ieee80211_rate_flags
-!Finclude/net/cfg80211.h ieee80211_rate
-!Finclude/net/cfg80211.h ieee80211_sta_ht_cap
-!Finclude/net/cfg80211.h ieee80211_supported_band
-!Finclude/net/cfg80211.h cfg80211_signal_type
-!Finclude/net/cfg80211.h wiphy_params_flags
-!Finclude/net/cfg80211.h wiphy_flags
-!Finclude/net/cfg80211.h wiphy
-!Finclude/net/cfg80211.h wireless_dev
-!Finclude/net/cfg80211.h wiphy_new
-!Finclude/net/cfg80211.h wiphy_register
-!Finclude/net/cfg80211.h wiphy_unregister
-!Finclude/net/cfg80211.h wiphy_free
-
-!Finclude/net/cfg80211.h wiphy_name
-!Finclude/net/cfg80211.h wiphy_dev
-!Finclude/net/cfg80211.h wiphy_priv
-!Finclude/net/cfg80211.h priv_to_wiphy
-!Finclude/net/cfg80211.h set_wiphy_dev
-!Finclude/net/cfg80211.h wdev_priv
-!Finclude/net/cfg80211.h ieee80211_iface_limit
-!Finclude/net/cfg80211.h ieee80211_iface_combination
-!Finclude/net/cfg80211.h cfg80211_check_combinations
-      </chapter>
-      <chapter>
-      <title>Actions and configuration</title>
-!Pinclude/net/cfg80211.h Actions and configuration
-!Finclude/net/cfg80211.h cfg80211_ops
-!Finclude/net/cfg80211.h vif_params
-!Finclude/net/cfg80211.h key_params
-!Finclude/net/cfg80211.h survey_info_flags
-!Finclude/net/cfg80211.h survey_info
-!Finclude/net/cfg80211.h cfg80211_beacon_data
-!Finclude/net/cfg80211.h cfg80211_ap_settings
-!Finclude/net/cfg80211.h station_parameters
-!Finclude/net/cfg80211.h rate_info_flags
-!Finclude/net/cfg80211.h rate_info
-!Finclude/net/cfg80211.h station_info
-!Finclude/net/cfg80211.h monitor_flags
-!Finclude/net/cfg80211.h mpath_info_flags
-!Finclude/net/cfg80211.h mpath_info
-!Finclude/net/cfg80211.h bss_parameters
-!Finclude/net/cfg80211.h ieee80211_txq_params
-!Finclude/net/cfg80211.h cfg80211_crypto_settings
-!Finclude/net/cfg80211.h cfg80211_auth_request
-!Finclude/net/cfg80211.h cfg80211_assoc_request
-!Finclude/net/cfg80211.h cfg80211_deauth_request
-!Finclude/net/cfg80211.h cfg80211_disassoc_request
-!Finclude/net/cfg80211.h cfg80211_ibss_params
-!Finclude/net/cfg80211.h cfg80211_connect_params
-!Finclude/net/cfg80211.h cfg80211_pmksa
-!Finclude/net/cfg80211.h cfg80211_rx_mlme_mgmt
-!Finclude/net/cfg80211.h cfg80211_auth_timeout
-!Finclude/net/cfg80211.h cfg80211_rx_assoc_resp
-!Finclude/net/cfg80211.h cfg80211_assoc_timeout
-!Finclude/net/cfg80211.h cfg80211_tx_mlme_mgmt
-!Finclude/net/cfg80211.h cfg80211_ibss_joined
-!Finclude/net/cfg80211.h cfg80211_connect_result
-!Finclude/net/cfg80211.h cfg80211_connect_bss
-!Finclude/net/cfg80211.h cfg80211_connect_timeout
-!Finclude/net/cfg80211.h cfg80211_roamed
-!Finclude/net/cfg80211.h cfg80211_disconnected
-!Finclude/net/cfg80211.h cfg80211_ready_on_channel
-!Finclude/net/cfg80211.h cfg80211_remain_on_channel_expired
-!Finclude/net/cfg80211.h cfg80211_new_sta
-!Finclude/net/cfg80211.h cfg80211_rx_mgmt
-!Finclude/net/cfg80211.h cfg80211_mgmt_tx_status
-!Finclude/net/cfg80211.h cfg80211_cqm_rssi_notify
-!Finclude/net/cfg80211.h cfg80211_cqm_pktloss_notify
-!Finclude/net/cfg80211.h cfg80211_michael_mic_failure
-      </chapter>
-      <chapter>
-      <title>Scanning and BSS list handling</title>
-!Pinclude/net/cfg80211.h Scanning and BSS list handling
-!Finclude/net/cfg80211.h cfg80211_ssid
-!Finclude/net/cfg80211.h cfg80211_scan_request
-!Finclude/net/cfg80211.h cfg80211_scan_done
-!Finclude/net/cfg80211.h cfg80211_bss
-!Finclude/net/cfg80211.h cfg80211_inform_bss
-!Finclude/net/cfg80211.h cfg80211_inform_bss_frame_data
-!Finclude/net/cfg80211.h cfg80211_inform_bss_data
-!Finclude/net/cfg80211.h cfg80211_unlink_bss
-!Finclude/net/cfg80211.h cfg80211_find_ie
-!Finclude/net/cfg80211.h ieee80211_bss_get_ie
-      </chapter>
-      <chapter>
-      <title>Utility functions</title>
-!Pinclude/net/cfg80211.h Utility functions
-!Finclude/net/cfg80211.h ieee80211_channel_to_frequency
-!Finclude/net/cfg80211.h ieee80211_frequency_to_channel
-!Finclude/net/cfg80211.h ieee80211_get_channel
-!Finclude/net/cfg80211.h ieee80211_get_response_rate
-!Finclude/net/cfg80211.h ieee80211_hdrlen
-!Finclude/net/cfg80211.h ieee80211_get_hdrlen_from_skb
-!Finclude/net/cfg80211.h ieee80211_radiotap_iterator
-      </chapter>
-      <chapter>
-      <title>Data path helpers</title>
-!Pinclude/net/cfg80211.h Data path helpers
-!Finclude/net/cfg80211.h ieee80211_data_to_8023
-!Finclude/net/cfg80211.h ieee80211_data_from_8023
-!Finclude/net/cfg80211.h ieee80211_amsdu_to_8023s
-!Finclude/net/cfg80211.h cfg80211_classify8021d
-      </chapter>
-      <chapter>
-      <title>Regulatory enforcement infrastructure</title>
-!Pinclude/net/cfg80211.h Regulatory enforcement infrastructure
-!Finclude/net/cfg80211.h regulatory_hint
-!Finclude/net/cfg80211.h wiphy_apply_custom_regulatory
-!Finclude/net/cfg80211.h freq_reg_info
-      </chapter>
-      <chapter>
-      <title>RFkill integration</title>
-!Pinclude/net/cfg80211.h RFkill integration
-!Finclude/net/cfg80211.h wiphy_rfkill_set_hw_state
-!Finclude/net/cfg80211.h wiphy_rfkill_start_polling
-!Finclude/net/cfg80211.h wiphy_rfkill_stop_polling
-      </chapter>
-      <chapter>
-      <title>Test mode</title>
-!Pinclude/net/cfg80211.h Test mode
-!Finclude/net/cfg80211.h cfg80211_testmode_alloc_reply_skb
-!Finclude/net/cfg80211.h cfg80211_testmode_reply
-!Finclude/net/cfg80211.h cfg80211_testmode_alloc_event_skb
-!Finclude/net/cfg80211.h cfg80211_testmode_event
-      </chapter>
-  </book>
-  <book id="mac80211-developers-guide">
-    <bookinfo>
-      <title>The mac80211 subsystem</title>
-      <abstract>
-!Pinclude/net/mac80211.h Introduction
-!Pinclude/net/mac80211.h Warning
-      </abstract>
-    </bookinfo>
-
-    <toc></toc>
-
-  <!--
-  Generally, this document shall be ordered by increasing complexity.
-  It is important to note that readers should be able to read only
-  the first few sections to get a working driver and only advanced
-  usage should require reading the full document.
-  -->
-
-    <part>
-      <title>The basic mac80211 driver interface</title>
-      <partintro>
-        <para>
-          You should read and understand the information contained
-          within this part of the book while implementing a driver.
-          In some chapters, advanced usage is noted, that may be
-          skipped at first.
-        </para>
-        <para>
-          This part of the book only covers station and monitor mode
-          functionality, additional information required to implement
-          the other modes is covered in the second part of the book.
-        </para>
-      </partintro>
-
-      <chapter id="basics">
-        <title>Basic hardware handling</title>
-        <para>TBD</para>
-        <para>
-          This chapter shall contain information on getting a hw
-          struct allocated and registered with mac80211.
-        </para>
-        <para>
-          Since it is required to allocate rates/modes before registering
-          a hw struct, this chapter shall also contain information on setting
-          up the rate/mode structs.
-        </para>
-        <para>
-          Additionally, some discussion about the callbacks and
-          the general programming model should be in here, including
-          the definition of ieee80211_ops which will be referred to
-          a lot.
-        </para>
-        <para>
-          Finally, a discussion of hardware capabilities should be done
-          with references to other parts of the book.
-        </para>
-  <!-- intentionally multiple !F lines to get proper order -->
-!Finclude/net/mac80211.h ieee80211_hw
-!Finclude/net/mac80211.h ieee80211_hw_flags
-!Finclude/net/mac80211.h SET_IEEE80211_DEV
-!Finclude/net/mac80211.h SET_IEEE80211_PERM_ADDR
-!Finclude/net/mac80211.h ieee80211_ops
-!Finclude/net/mac80211.h ieee80211_alloc_hw
-!Finclude/net/mac80211.h ieee80211_register_hw
-!Finclude/net/mac80211.h ieee80211_unregister_hw
-!Finclude/net/mac80211.h ieee80211_free_hw
-      </chapter>
-
-      <chapter id="phy-handling">
-        <title>PHY configuration</title>
-        <para>TBD</para>
-        <para>
-          This chapter should describe PHY handling including
-          start/stop callbacks and the various structures used.
-        </para>
-!Finclude/net/mac80211.h ieee80211_conf
-!Finclude/net/mac80211.h ieee80211_conf_flags
-      </chapter>
-
-      <chapter id="iface-handling">
-        <title>Virtual interfaces</title>
-        <para>TBD</para>
-        <para>
-          This chapter should describe virtual interface basics
-          that are relevant to the driver (VLANs, MGMT etc are not.)
-          It should explain the use of the add_iface/remove_iface
-          callbacks as well as the interface configuration callbacks.
-        </para>
-        <para>Things related to AP mode should be discussed there.</para>
-        <para>
-          Things related to supporting multiple interfaces should be
-          in the appropriate chapter, a BIG FAT note should be here about
-          this though and the recommendation to allow only a single
-          interface in STA mode at first!
-        </para>
-!Finclude/net/mac80211.h ieee80211_vif
-      </chapter>
-
-      <chapter id="rx-tx">
-        <title>Receive and transmit processing</title>
-        <sect1>
-          <title>what should be here</title>
-          <para>TBD</para>
-          <para>
-            This should describe the receive and transmit
-            paths in mac80211/the drivers as well as
-            transmit status handling.
-          </para>
-        </sect1>
-        <sect1>
-          <title>Frame format</title>
-!Pinclude/net/mac80211.h Frame format
-        </sect1>
-        <sect1>
-          <title>Packet alignment</title>
-!Pnet/mac80211/rx.c Packet alignment
-        </sect1>
-        <sect1>
-          <title>Calling into mac80211 from interrupts</title>
-!Pinclude/net/mac80211.h Calling mac80211 from interrupts
-        </sect1>
-        <sect1>
-          <title>functions/definitions</title>
-!Finclude/net/mac80211.h ieee80211_rx_status
-!Finclude/net/mac80211.h mac80211_rx_flags
-!Finclude/net/mac80211.h mac80211_tx_info_flags
-!Finclude/net/mac80211.h mac80211_tx_control_flags
-!Finclude/net/mac80211.h mac80211_rate_control_flags
-!Finclude/net/mac80211.h ieee80211_tx_rate
-!Finclude/net/mac80211.h ieee80211_tx_info
-!Finclude/net/mac80211.h ieee80211_tx_info_clear_status
-!Finclude/net/mac80211.h ieee80211_rx
-!Finclude/net/mac80211.h ieee80211_rx_ni
-!Finclude/net/mac80211.h ieee80211_rx_irqsafe
-!Finclude/net/mac80211.h ieee80211_tx_status
-!Finclude/net/mac80211.h ieee80211_tx_status_ni
-!Finclude/net/mac80211.h ieee80211_tx_status_irqsafe
-!Finclude/net/mac80211.h ieee80211_rts_get
-!Finclude/net/mac80211.h ieee80211_rts_duration
-!Finclude/net/mac80211.h ieee80211_ctstoself_get
-!Finclude/net/mac80211.h ieee80211_ctstoself_duration
-!Finclude/net/mac80211.h ieee80211_generic_frame_duration
-!Finclude/net/mac80211.h ieee80211_wake_queue
-!Finclude/net/mac80211.h ieee80211_stop_queue
-!Finclude/net/mac80211.h ieee80211_wake_queues
-!Finclude/net/mac80211.h ieee80211_stop_queues
-!Finclude/net/mac80211.h ieee80211_queue_stopped
-        </sect1>
-      </chapter>
-
-      <chapter id="filters">
-        <title>Frame filtering</title>
-!Pinclude/net/mac80211.h Frame filtering
-!Finclude/net/mac80211.h ieee80211_filter_flags
-      </chapter>
-
-      <chapter id="workqueue">
-        <title>The mac80211 workqueue</title>
-!Pinclude/net/mac80211.h mac80211 workqueue
-!Finclude/net/mac80211.h ieee80211_queue_work
-!Finclude/net/mac80211.h ieee80211_queue_delayed_work
-      </chapter>
-    </part>
-
-    <part id="advanced">
-      <title>Advanced driver interface</title>
-      <partintro>
-        <para>
-         Information contained within this part of the book is
-         of interest only for advanced interaction of mac80211
-         with drivers to exploit more hardware capabilities and
-         improve performance.
-        </para>
-      </partintro>
-
-      <chapter id="led-support">
-        <title>LED support</title>
-        <para>
-         Mac80211 supports various ways of blinking LEDs. Wherever possible,
-         device LEDs should be exposed as LED class devices and hooked up to
-         the appropriate trigger, which will then be triggered appropriately
-         by mac80211.
-        </para>
-!Finclude/net/mac80211.h ieee80211_get_tx_led_name
-!Finclude/net/mac80211.h ieee80211_get_rx_led_name
-!Finclude/net/mac80211.h ieee80211_get_assoc_led_name
-!Finclude/net/mac80211.h ieee80211_get_radio_led_name
-!Finclude/net/mac80211.h ieee80211_tpt_blink
-!Finclude/net/mac80211.h ieee80211_tpt_led_trigger_flags
-!Finclude/net/mac80211.h ieee80211_create_tpt_led_trigger
-      </chapter>
-
-      <chapter id="hardware-crypto-offload">
-        <title>Hardware crypto acceleration</title>
-!Pinclude/net/mac80211.h Hardware crypto acceleration
-  <!-- intentionally multiple !F lines to get proper order -->
-!Finclude/net/mac80211.h set_key_cmd
-!Finclude/net/mac80211.h ieee80211_key_conf
-!Finclude/net/mac80211.h ieee80211_key_flags
-!Finclude/net/mac80211.h ieee80211_get_tkip_p1k
-!Finclude/net/mac80211.h ieee80211_get_tkip_p1k_iv
-!Finclude/net/mac80211.h ieee80211_get_tkip_p2k
-      </chapter>
-
-      <chapter id="powersave">
-        <title>Powersave support</title>
-!Pinclude/net/mac80211.h Powersave support
-      </chapter>
-
-      <chapter id="beacon-filter">
-        <title>Beacon filter support</title>
-!Pinclude/net/mac80211.h Beacon filter support
-!Finclude/net/mac80211.h ieee80211_beacon_loss
-      </chapter>
-
-      <chapter id="qos">
-        <title>Multiple queues and QoS support</title>
-        <para>TBD</para>
-!Finclude/net/mac80211.h ieee80211_tx_queue_params
-      </chapter>
-
-      <chapter id="AP">
-        <title>Access point mode support</title>
-        <para>TBD</para>
-        <para>Some parts of the if_conf should be discussed here instead</para>
-        <para>
-          Insert notes about VLAN interfaces with hw crypto here or
-          in the hw crypto chapter.
-        </para>
-      <section id="ps-client">
-        <title>support for powersaving clients</title>
-!Pinclude/net/mac80211.h AP support for powersaving clients
-!Finclude/net/mac80211.h ieee80211_get_buffered_bc
-!Finclude/net/mac80211.h ieee80211_beacon_get
-!Finclude/net/mac80211.h ieee80211_sta_eosp
-!Finclude/net/mac80211.h ieee80211_frame_release_type
-!Finclude/net/mac80211.h ieee80211_sta_ps_transition
-!Finclude/net/mac80211.h ieee80211_sta_ps_transition_ni
-!Finclude/net/mac80211.h ieee80211_sta_set_buffered
-!Finclude/net/mac80211.h ieee80211_sta_block_awake
-      </section>
-      </chapter>
-
-      <chapter id="multi-iface">
-        <title>Supporting multiple virtual interfaces</title>
-        <para>TBD</para>
-        <para>
-          Note: WDS with identical MAC address should almost always be OK
-        </para>
-        <para>
-          Insert notes about having multiple virtual interfaces with
-          different MAC addresses here, note which configurations are
-          supported by mac80211, add notes about supporting hw crypto
-          with it.
-        </para>
-!Finclude/net/mac80211.h ieee80211_iterate_active_interfaces
-!Finclude/net/mac80211.h ieee80211_iterate_active_interfaces_atomic
-      </chapter>
-
-      <chapter id="station-handling">
-        <title>Station handling</title>
-        <para>TODO</para>
-!Finclude/net/mac80211.h ieee80211_sta
-!Finclude/net/mac80211.h sta_notify_cmd
-!Finclude/net/mac80211.h ieee80211_find_sta
-!Finclude/net/mac80211.h ieee80211_find_sta_by_ifaddr
-      </chapter>
-
-      <chapter id="hardware-scan-offload">
-        <title>Hardware scan offload</title>
-        <para>TBD</para>
-!Finclude/net/mac80211.h ieee80211_scan_completed
-      </chapter>
-
-      <chapter id="aggregation">
-        <title>Aggregation</title>
-        <sect1>
-          <title>TX A-MPDU aggregation</title>
-!Pnet/mac80211/agg-tx.c TX A-MPDU aggregation
-!Cnet/mac80211/agg-tx.c
-        </sect1>
-        <sect1>
-          <title>RX A-MPDU aggregation</title>
-!Pnet/mac80211/agg-rx.c RX A-MPDU aggregation
-!Cnet/mac80211/agg-rx.c
-!Finclude/net/mac80211.h ieee80211_ampdu_mlme_action
-        </sect1>
-      </chapter>
-
-      <chapter id="smps">
-        <title>Spatial Multiplexing Powersave (SMPS)</title>
-!Pinclude/net/mac80211.h Spatial multiplexing power save
-!Finclude/net/mac80211.h ieee80211_request_smps
-!Finclude/net/mac80211.h ieee80211_smps_mode
-      </chapter>
-    </part>
-
-    <part id="rate-control">
-      <title>Rate control interface</title>
-      <partintro>
-        <para>TBD</para>
-        <para>
-         This part of the book describes the rate control algorithm
-         interface and how it relates to mac80211 and drivers.
-        </para>
-      </partintro>
-      <chapter id="ratecontrol-api">
-        <title>Rate Control API</title>
-        <para>TBD</para>
-!Finclude/net/mac80211.h ieee80211_start_tx_ba_session
-!Finclude/net/mac80211.h ieee80211_start_tx_ba_cb_irqsafe
-!Finclude/net/mac80211.h ieee80211_stop_tx_ba_session
-!Finclude/net/mac80211.h ieee80211_stop_tx_ba_cb_irqsafe
-!Finclude/net/mac80211.h ieee80211_rate_control_changed
-!Finclude/net/mac80211.h ieee80211_tx_rate_control
-!Finclude/net/mac80211.h rate_control_send_low
-      </chapter>
-    </part>
-
-    <part id="internal">
-      <title>Internals</title>
-      <partintro>
-        <para>TBD</para>
-        <para>
-         This part of the book describes mac80211 internals.
-        </para>
-      </partintro>
-
-      <chapter id="key-handling">
-        <title>Key handling</title>
-        <sect1>
-          <title>Key handling basics</title>
-!Pnet/mac80211/key.c Key handling basics
-        </sect1>
-        <sect1>
-          <title>MORE TBD</title>
-          <para>TBD</para>
-        </sect1>
-      </chapter>
-
-      <chapter id="rx-processing">
-        <title>Receive processing</title>
-        <para>TBD</para>
-      </chapter>
-
-      <chapter id="tx-processing">
-        <title>Transmit processing</title>
-        <para>TBD</para>
-      </chapter>
-
-      <chapter id="sta-info">
-        <title>Station info handling</title>
-        <sect1>
-          <title>Programming information</title>
-!Fnet/mac80211/sta_info.h sta_info
-!Fnet/mac80211/sta_info.h ieee80211_sta_info_flags
-        </sect1>
-        <sect1>
-          <title>STA information lifetime rules</title>
-!Pnet/mac80211/sta_info.c STA information lifetime rules
-        </sect1>
-      </chapter>
-
-      <chapter id="aggregation-internals">
-        <title>Aggregation</title>
-!Fnet/mac80211/sta_info.h sta_ampdu_mlme
-!Fnet/mac80211/sta_info.h tid_ampdu_tx
-!Fnet/mac80211/sta_info.h tid_ampdu_rx
-      </chapter>
-
-      <chapter id="synchronisation">
-        <title>Synchronisation</title>
-        <para>TBD</para>
-        <para>Locking, lots of RCU</para>
-      </chapter>
-    </part>
-  </book>
-</set>
index 736f5916daea1d8573ad53543dd16c9d20f4809f..fdf8232d0eeb28ffd361320a926674d461995e16 100644 (file)
@@ -12,7 +12,7 @@ DOCBOOKS := z8530book.xml  \
            kernel-api.xml filesystems.xml lsm.xml usb.xml kgdb.xml \
            gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \
            genericirq.xml s390-drivers.xml uio-howto.xml scsi.xml \
-           80211.xml debugobjects.xml sh.xml regulator.xml \
+           debugobjects.xml sh.xml regulator.xml \
            alsa-driver-api.xml writing-an-alsa-driver.xml \
            tracepoint.xml w1.xml \
            writing_musb_glue_layer.xml crypto-API.xml iio.xml
index de955e151af8ee4adc33acbaa22f56b62697ac33..c2a469112c37bbc95d4a58e671eab5344b4aa428 100644 (file)
@@ -1,3 +1 @@
-subdir-y := accounting auxdisplay blackfin \
-       filesystems filesystems ia64 laptops mic misc-devices \
-       networking pcmcia prctl ptp timers vDSO watchdog
+subdir-y :=
diff --git a/Documentation/accounting/.gitignore b/Documentation/accounting/.gitignore
deleted file mode 100644 (file)
index 8648520..0000000
+++ /dev/null
@@ -1 +0,0 @@
-getdelays
diff --git a/Documentation/accounting/Makefile b/Documentation/accounting/Makefile
deleted file mode 100644 (file)
index 7e232cb..0000000
+++ /dev/null
@@ -1,7 +0,0 @@
-# List of programs to build
-hostprogs-y := getdelays
-
-# Tell kbuild to always build the programs
-always := $(hostprogs-y)
-
-HOSTCFLAGS_getdelays.o += -I$(objtree)/usr/include
index 8a12f0730c94da018615aebc8e657daf65eadd80..042ea59b5853bdbb328d12591fc124c03f0ebcdd 100644 (file)
@@ -54,9 +54,9 @@ are sent to userspace without requiring a command. If it is the last exiting
 task of a thread group, the per-tgid statistics are also sent. More details
 are given in the taskstats interface description.
 
-The getdelays.c userspace utility in this directory allows simple commands to
-be run and the corresponding delay statistics to be displayed. It also serves
-as an example of using the taskstats interface.
+The getdelays.c userspace utility in tools/accounting directory allows simple
+commands to be run and the corresponding delay statistics to be displayed. It
+also serves as an example of using the taskstats interface.
 
 Usage
 -----
diff --git a/Documentation/accounting/getdelays.c b/Documentation/accounting/getdelays.c
deleted file mode 100644 (file)
index b5ca536..0000000
+++ /dev/null
@@ -1,550 +0,0 @@
-/* getdelays.c
- *
- * Utility to get per-pid and per-tgid delay accounting statistics
- * Also illustrates usage of the taskstats interface
- *
- * Copyright (C) Shailabh Nagar, IBM Corp. 2005
- * Copyright (C) Balbir Singh, IBM Corp. 2006
- * Copyright (c) Jay Lan, SGI. 2006
- *
- * Compile with
- *     gcc -I/usr/src/linux/include getdelays.c -o getdelays
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <errno.h>
-#include <unistd.h>
-#include <poll.h>
-#include <string.h>
-#include <fcntl.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/socket.h>
-#include <sys/wait.h>
-#include <signal.h>
-
-#include <linux/genetlink.h>
-#include <linux/taskstats.h>
-#include <linux/cgroupstats.h>
-
-/*
- * Generic macros for dealing with netlink sockets. Might be duplicated
- * elsewhere. It is recommended that commercial grade applications use
- * libnl or libnetlink and use the interfaces provided by the library
- */
-#define GENLMSG_DATA(glh)      ((void *)(NLMSG_DATA(glh) + GENL_HDRLEN))
-#define GENLMSG_PAYLOAD(glh)   (NLMSG_PAYLOAD(glh, 0) - GENL_HDRLEN)
-#define NLA_DATA(na)           ((void *)((char*)(na) + NLA_HDRLEN))
-#define NLA_PAYLOAD(len)       (len - NLA_HDRLEN)
-
-#define err(code, fmt, arg...)                 \
-       do {                                    \
-               fprintf(stderr, fmt, ##arg);    \
-               exit(code);                     \
-       } while (0)
-
-int done;
-int rcvbufsz;
-char name[100];
-int dbg;
-int print_delays;
-int print_io_accounting;
-int print_task_context_switch_counts;
-
-#define PRINTF(fmt, arg...) {                  \
-           if (dbg) {                          \
-               printf(fmt, ##arg);             \
-           }                                   \
-       }
-
-/* Maximum size of response requested or message sent */
-#define MAX_MSG_SIZE   1024
-/* Maximum number of cpus expected to be specified in a cpumask */
-#define MAX_CPUS       32
-
-struct msgtemplate {
-       struct nlmsghdr n;
-       struct genlmsghdr g;
-       char buf[MAX_MSG_SIZE];
-};
-
-char cpumask[100+6*MAX_CPUS];
-
-static void usage(void)
-{
-       fprintf(stderr, "getdelays [-dilv] [-w logfile] [-r bufsize] "
-                       "[-m cpumask] [-t tgid] [-p pid]\n");
-       fprintf(stderr, "  -d: print delayacct stats\n");
-       fprintf(stderr, "  -i: print IO accounting (works only with -p)\n");
-       fprintf(stderr, "  -l: listen forever\n");
-       fprintf(stderr, "  -v: debug on\n");
-       fprintf(stderr, "  -C: container path\n");
-}
-
-/*
- * Create a raw netlink socket and bind
- */
-static int create_nl_socket(int protocol)
-{
-       int fd;
-       struct sockaddr_nl local;
-
-       fd = socket(AF_NETLINK, SOCK_RAW, protocol);
-       if (fd < 0)
-               return -1;
-
-       if (rcvbufsz)
-               if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF,
-                               &rcvbufsz, sizeof(rcvbufsz)) < 0) {
-                       fprintf(stderr, "Unable to set socket rcv buf size to %d\n",
-                               rcvbufsz);
-                       goto error;
-               }
-
-       memset(&local, 0, sizeof(local));
-       local.nl_family = AF_NETLINK;
-
-       if (bind(fd, (struct sockaddr *) &local, sizeof(local)) < 0)
-               goto error;
-
-       return fd;
-error:
-       close(fd);
-       return -1;
-}
-
-
-static int send_cmd(int sd, __u16 nlmsg_type, __u32 nlmsg_pid,
-            __u8 genl_cmd, __u16 nla_type,
-            void *nla_data, int nla_len)
-{
-       struct nlattr *na;
-       struct sockaddr_nl nladdr;
-       int r, buflen;
-       char *buf;
-
-       struct msgtemplate msg;
-
-       msg.n.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN);
-       msg.n.nlmsg_type = nlmsg_type;
-       msg.n.nlmsg_flags = NLM_F_REQUEST;
-       msg.n.nlmsg_seq = 0;
-       msg.n.nlmsg_pid = nlmsg_pid;
-       msg.g.cmd = genl_cmd;
-       msg.g.version = 0x1;
-       na = (struct nlattr *) GENLMSG_DATA(&msg);
-       na->nla_type = nla_type;
-       na->nla_len = nla_len + 1 + NLA_HDRLEN;
-       memcpy(NLA_DATA(na), nla_data, nla_len);
-       msg.n.nlmsg_len += NLMSG_ALIGN(na->nla_len);
-
-       buf = (char *) &msg;
-       buflen = msg.n.nlmsg_len ;
-       memset(&nladdr, 0, sizeof(nladdr));
-       nladdr.nl_family = AF_NETLINK;
-       while ((r = sendto(sd, buf, buflen, 0, (struct sockaddr *) &nladdr,
-                          sizeof(nladdr))) < buflen) {
-               if (r > 0) {
-                       buf += r;
-                       buflen -= r;
-               } else if (errno != EAGAIN)
-                       return -1;
-       }
-       return 0;
-}
-
-
-/*
- * Probe the controller in genetlink to find the family id
- * for the TASKSTATS family
- */
-static int get_family_id(int sd)
-{
-       struct {
-               struct nlmsghdr n;
-               struct genlmsghdr g;
-               char buf[256];
-       } ans;
-
-       int id = 0, rc;
-       struct nlattr *na;
-       int rep_len;
-
-       strcpy(name, TASKSTATS_GENL_NAME);
-       rc = send_cmd(sd, GENL_ID_CTRL, getpid(), CTRL_CMD_GETFAMILY,
-                       CTRL_ATTR_FAMILY_NAME, (void *)name,
-                       strlen(TASKSTATS_GENL_NAME)+1);
-       if (rc < 0)
-               return 0;       /* sendto() failure? */
-
-       rep_len = recv(sd, &ans, sizeof(ans), 0);
-       if (ans.n.nlmsg_type == NLMSG_ERROR ||
-           (rep_len < 0) || !NLMSG_OK((&ans.n), rep_len))
-               return 0;
-
-       na = (struct nlattr *) GENLMSG_DATA(&ans);
-       na = (struct nlattr *) ((char *) na + NLA_ALIGN(na->nla_len));
-       if (na->nla_type == CTRL_ATTR_FAMILY_ID) {
-               id = *(__u16 *) NLA_DATA(na);
-       }
-       return id;
-}
-
-#define average_ms(t, c) (t / 1000000ULL / (c ? c : 1))
-
-static void print_delayacct(struct taskstats *t)
-{
-       printf("\n\nCPU   %15s%15s%15s%15s%15s\n"
-              "      %15llu%15llu%15llu%15llu%15.3fms\n"
-              "IO    %15s%15s%15s\n"
-              "      %15llu%15llu%15llums\n"
-              "SWAP  %15s%15s%15s\n"
-              "      %15llu%15llu%15llums\n"
-              "RECLAIM  %12s%15s%15s\n"
-              "      %15llu%15llu%15llums\n",
-              "count", "real total", "virtual total",
-              "delay total", "delay average",
-              (unsigned long long)t->cpu_count,
-              (unsigned long long)t->cpu_run_real_total,
-              (unsigned long long)t->cpu_run_virtual_total,
-              (unsigned long long)t->cpu_delay_total,
-              average_ms((double)t->cpu_delay_total, t->cpu_count),
-              "count", "delay total", "delay average",
-              (unsigned long long)t->blkio_count,
-              (unsigned long long)t->blkio_delay_total,
-              average_ms(t->blkio_delay_total, t->blkio_count),
-              "count", "delay total", "delay average",
-              (unsigned long long)t->swapin_count,
-              (unsigned long long)t->swapin_delay_total,
-              average_ms(t->swapin_delay_total, t->swapin_count),
-              "count", "delay total", "delay average",
-              (unsigned long long)t->freepages_count,
-              (unsigned long long)t->freepages_delay_total,
-              average_ms(t->freepages_delay_total, t->freepages_count));
-}
-
-static void task_context_switch_counts(struct taskstats *t)
-{
-       printf("\n\nTask   %15s%15s\n"
-              "       %15llu%15llu\n",
-              "voluntary", "nonvoluntary",
-              (unsigned long long)t->nvcsw, (unsigned long long)t->nivcsw);
-}
-
-static void print_cgroupstats(struct cgroupstats *c)
-{
-       printf("sleeping %llu, blocked %llu, running %llu, stopped %llu, "
-               "uninterruptible %llu\n", (unsigned long long)c->nr_sleeping,
-               (unsigned long long)c->nr_io_wait,
-               (unsigned long long)c->nr_running,
-               (unsigned long long)c->nr_stopped,
-               (unsigned long long)c->nr_uninterruptible);
-}
-
-
-static void print_ioacct(struct taskstats *t)
-{
-       printf("%s: read=%llu, write=%llu, cancelled_write=%llu\n",
-               t->ac_comm,
-               (unsigned long long)t->read_bytes,
-               (unsigned long long)t->write_bytes,
-               (unsigned long long)t->cancelled_write_bytes);
-}
-
-int main(int argc, char *argv[])
-{
-       int c, rc, rep_len, aggr_len, len2;
-       int cmd_type = TASKSTATS_CMD_ATTR_UNSPEC;
-       __u16 id;
-       __u32 mypid;
-
-       struct nlattr *na;
-       int nl_sd = -1;
-       int len = 0;
-       pid_t tid = 0;
-       pid_t rtid = 0;
-
-       int fd = 0;
-       int count = 0;
-       int write_file = 0;
-       int maskset = 0;
-       char *logfile = NULL;
-       int loop = 0;
-       int containerset = 0;
-       char *containerpath = NULL;
-       int cfd = 0;
-       int forking = 0;
-       sigset_t sigset;
-
-       struct msgtemplate msg;
-
-       while (!forking) {
-               c = getopt(argc, argv, "qdiw:r:m:t:p:vlC:c:");
-               if (c < 0)
-                       break;
-
-               switch (c) {
-               case 'd':
-                       printf("print delayacct stats ON\n");
-                       print_delays = 1;
-                       break;
-               case 'i':
-                       printf("printing IO accounting\n");
-                       print_io_accounting = 1;
-                       break;
-               case 'q':
-                       printf("printing task/process context switch rates\n");
-                       print_task_context_switch_counts = 1;
-                       break;
-               case 'C':
-                       containerset = 1;
-                       containerpath = optarg;
-                       break;
-               case 'w':
-                       logfile = strdup(optarg);
-                       printf("write to file %s\n", logfile);
-                       write_file = 1;
-                       break;
-               case 'r':
-                       rcvbufsz = atoi(optarg);
-                       printf("receive buf size %d\n", rcvbufsz);
-                       if (rcvbufsz < 0)
-                               err(1, "Invalid rcv buf size\n");
-                       break;
-               case 'm':
-                       strncpy(cpumask, optarg, sizeof(cpumask));
-                       cpumask[sizeof(cpumask) - 1] = '\0';
-                       maskset = 1;
-                       printf("cpumask %s maskset %d\n", cpumask, maskset);
-                       break;
-               case 't':
-                       tid = atoi(optarg);
-                       if (!tid)
-                               err(1, "Invalid tgid\n");
-                       cmd_type = TASKSTATS_CMD_ATTR_TGID;
-                       break;
-               case 'p':
-                       tid = atoi(optarg);
-                       if (!tid)
-                               err(1, "Invalid pid\n");
-                       cmd_type = TASKSTATS_CMD_ATTR_PID;
-                       break;
-               case 'c':
-
-                       /* Block SIGCHLD for sigwait() later */
-                       if (sigemptyset(&sigset) == -1)
-                               err(1, "Failed to empty sigset");
-                       if (sigaddset(&sigset, SIGCHLD))
-                               err(1, "Failed to set sigchld in sigset");
-                       sigprocmask(SIG_BLOCK, &sigset, NULL);
-
-                       /* fork/exec a child */
-                       tid = fork();
-                       if (tid < 0)
-                               err(1, "Fork failed\n");
-                       if (tid == 0)
-                               if (execvp(argv[optind - 1],
-                                   &argv[optind - 1]) < 0)
-                                       exit(-1);
-
-                       /* Set the command type and avoid further processing */
-                       cmd_type = TASKSTATS_CMD_ATTR_PID;
-                       forking = 1;
-                       break;
-               case 'v':
-                       printf("debug on\n");
-                       dbg = 1;
-                       break;
-               case 'l':
-                       printf("listen forever\n");
-                       loop = 1;
-                       break;
-               default:
-                       usage();
-                       exit(-1);
-               }
-       }
-
-       if (write_file) {
-               fd = open(logfile, O_WRONLY | O_CREAT | O_TRUNC,
-                         S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
-               if (fd == -1) {
-                       perror("Cannot open output file\n");
-                       exit(1);
-               }
-       }
-
-       nl_sd = create_nl_socket(NETLINK_GENERIC);
-       if (nl_sd < 0)
-               err(1, "error creating Netlink socket\n");
-
-
-       mypid = getpid();
-       id = get_family_id(nl_sd);
-       if (!id) {
-               fprintf(stderr, "Error getting family id, errno %d\n", errno);
-               goto err;
-       }
-       PRINTF("family id %d\n", id);
-
-       if (maskset) {
-               rc = send_cmd(nl_sd, id, mypid, TASKSTATS_CMD_GET,
-                             TASKSTATS_CMD_ATTR_REGISTER_CPUMASK,
-                             &cpumask, strlen(cpumask) + 1);
-               PRINTF("Sent register cpumask, retval %d\n", rc);
-               if (rc < 0) {
-                       fprintf(stderr, "error sending register cpumask\n");
-                       goto err;
-               }
-       }
-
-       if (tid && containerset) {
-               fprintf(stderr, "Select either -t or -C, not both\n");
-               goto err;
-       }
-
-       /*
-        * If we forked a child, wait for it to exit. Cannot use waitpid()
-        * as all the delicious data would be reaped as part of the wait
-        */
-       if (tid && forking) {
-               int sig_received;
-               sigwait(&sigset, &sig_received);
-       }
-
-       if (tid) {
-               rc = send_cmd(nl_sd, id, mypid, TASKSTATS_CMD_GET,
-                             cmd_type, &tid, sizeof(__u32));
-               PRINTF("Sent pid/tgid, retval %d\n", rc);
-               if (rc < 0) {
-                       fprintf(stderr, "error sending tid/tgid cmd\n");
-                       goto done;
-               }
-       }
-
-       if (containerset) {
-               cfd = open(containerpath, O_RDONLY);
-               if (cfd < 0) {
-                       perror("error opening container file");
-                       goto err;
-               }
-               rc = send_cmd(nl_sd, id, mypid, CGROUPSTATS_CMD_GET,
-                             CGROUPSTATS_CMD_ATTR_FD, &cfd, sizeof(__u32));
-               if (rc < 0) {
-                       perror("error sending cgroupstats command");
-                       goto err;
-               }
-       }
-       if (!maskset && !tid && !containerset) {
-               usage();
-               goto err;
-       }
-
-       do {
-               rep_len = recv(nl_sd, &msg, sizeof(msg), 0);
-               PRINTF("received %d bytes\n", rep_len);
-
-               if (rep_len < 0) {
-                       fprintf(stderr, "nonfatal reply error: errno %d\n",
-                               errno);
-                       continue;
-               }
-               if (msg.n.nlmsg_type == NLMSG_ERROR ||
-                   !NLMSG_OK((&msg.n), rep_len)) {
-                       struct nlmsgerr *err = NLMSG_DATA(&msg);
-                       fprintf(stderr, "fatal reply error,  errno %d\n",
-                               err->error);
-                       goto done;
-               }
-
-               PRINTF("nlmsghdr size=%zu, nlmsg_len=%d, rep_len=%d\n",
-                      sizeof(struct nlmsghdr), msg.n.nlmsg_len, rep_len);
-
-
-               rep_len = GENLMSG_PAYLOAD(&msg.n);
-
-               na = (struct nlattr *) GENLMSG_DATA(&msg);
-               len = 0;
-               while (len < rep_len) {
-                       len += NLA_ALIGN(na->nla_len);
-                       switch (na->nla_type) {
-                       case TASKSTATS_TYPE_AGGR_TGID:
-                               /* Fall through */
-                       case TASKSTATS_TYPE_AGGR_PID:
-                               aggr_len = NLA_PAYLOAD(na->nla_len);
-                               len2 = 0;
-                               /* For nested attributes, na follows */
-                               na = (struct nlattr *) NLA_DATA(na);
-                               done = 0;
-                               while (len2 < aggr_len) {
-                                       switch (na->nla_type) {
-                                       case TASKSTATS_TYPE_PID:
-                                               rtid = *(int *) NLA_DATA(na);
-                                               if (print_delays)
-                                                       printf("PID\t%d\n", rtid);
-                                               break;
-                                       case TASKSTATS_TYPE_TGID:
-                                               rtid = *(int *) NLA_DATA(na);
-                                               if (print_delays)
-                                                       printf("TGID\t%d\n", rtid);
-                                               break;
-                                       case TASKSTATS_TYPE_STATS:
-                                               count++;
-                                               if (print_delays)
-                                                       print_delayacct((struct taskstats *) NLA_DATA(na));
-                                               if (print_io_accounting)
-                                                       print_ioacct((struct taskstats *) NLA_DATA(na));
-                                               if (print_task_context_switch_counts)
-                                                       task_context_switch_counts((struct taskstats *) NLA_DATA(na));
-                                               if (fd) {
-                                                       if (write(fd, NLA_DATA(na), na->nla_len) < 0) {
-                                                               err(1,"write error\n");
-                                                       }
-                                               }
-                                               if (!loop)
-                                                       goto done;
-                                               break;
-                                       case TASKSTATS_TYPE_NULL:
-                                               break;
-                                       default:
-                                               fprintf(stderr, "Unknown nested"
-                                                       " nla_type %d\n",
-                                                       na->nla_type);
-                                               break;
-                                       }
-                                       len2 += NLA_ALIGN(na->nla_len);
-                                       na = (struct nlattr *)((char *)na +
-                                                              NLA_ALIGN(na->nla_len));
-                               }
-                               break;
-
-                       case CGROUPSTATS_TYPE_CGROUP_STATS:
-                               print_cgroupstats(NLA_DATA(na));
-                               break;
-                       default:
-                               fprintf(stderr, "Unknown nla_type %d\n",
-                                       na->nla_type);
-                       case TASKSTATS_TYPE_NULL:
-                               break;
-                       }
-                       na = (struct nlattr *) (GENLMSG_DATA(&msg) + len);
-               }
-       } while (loop);
-done:
-       if (maskset) {
-               rc = send_cmd(nl_sd, id, mypid, TASKSTATS_CMD_GET,
-                             TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK,
-                             &cpumask, strlen(cpumask) + 1);
-               printf("Sent deregister mask, retval %d\n", rc);
-               if (rc < 0)
-                       err(rc, "error sending deregister cpumask\n");
-       }
-err:
-       close(nl_sd);
-       if (fd)
-               close(fd);
-       if (cfd)
-               close(cfd);
-       return 0;
-}
index dea011c8d7c718a8ff7a37dea7d35a81d0f67271..b6e69fd371c4f82c227197f937c8fff9876cec3a 100644 (file)
@@ -8,8 +8,6 @@ Interrupts
        - ARM Interrupt subsystem documentation
 IXP4xx
        - Intel IXP4xx Network processor.
-Makefile
-       - Build sourcefiles as part of the Documentation-build for arm
 Netwinder
        - Netwinder specific documentation
 Porting
diff --git a/Documentation/auxdisplay/.gitignore b/Documentation/auxdisplay/.gitignore
deleted file mode 100644 (file)
index 7af2228..0000000
+++ /dev/null
@@ -1 +0,0 @@
-cfag12864b-example
diff --git a/Documentation/auxdisplay/Makefile b/Documentation/auxdisplay/Makefile
deleted file mode 100644 (file)
index ada4dac..0000000
+++ /dev/null
@@ -1,7 +0,0 @@
-# List of programs to build
-hostprogs-y := cfag12864b-example
-
-# Tell kbuild to always build the programs
-always := $(hostprogs-y)
-
-HOSTCFLAGS_cfag12864b-example.o += -I$(objtree)/usr/include
index eb7be393a51061c6e1cd844dc884edc1a276c53e..12fd51b8de757cac99cf07e330059cf3fb88243f 100644 (file)
@@ -101,5 +101,5 @@ Although the LCD won't get updated until the next refresh time arrives.
 Also, you can mmap the framebuffer: open & mmap, munmap & close...
 which is the best option for most uses.
 
-Check Documentation/auxdisplay/cfag12864b-example.c
+Check samples/auxdisplay/cfag12864b-example.c
 for a real working userspace complete program with usage examples.
diff --git a/Documentation/auxdisplay/cfag12864b-example.c b/Documentation/auxdisplay/cfag12864b-example.c
deleted file mode 100644 (file)
index e7823ff..0000000
+++ /dev/null
@@ -1,281 +0,0 @@
-/*
- *    Filename: cfag12864b-example.c
- *     Version: 0.1.0
- * Description: cfag12864b LCD userspace example program
- *     License: GPLv2
- *
- *      Author: Copyright (C) Miguel Ojeda Sandonis
- *        Date: 2006-10-31
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2 as
- *  published by the Free Software Foundation.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- *
- */
-
-/*
- * ------------------------
- * start of cfag12864b code
- * ------------------------
- */
-
-#include <string.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/mman.h>
-
-#define CFAG12864B_WIDTH               (128)
-#define CFAG12864B_HEIGHT              (64)
-#define CFAG12864B_SIZE                        (128 * 64 / 8)
-#define CFAG12864B_BPB                 (8)
-#define CFAG12864B_ADDRESS(x, y)       ((y) * CFAG12864B_WIDTH / \
-                                       CFAG12864B_BPB + (x) / CFAG12864B_BPB)
-#define CFAG12864B_BIT(n)              (((unsigned char) 1) << (n))
-
-#undef CFAG12864B_DOCHECK
-#ifdef CFAG12864B_DOCHECK
-       #define CFAG12864B_CHECK(x, y)          ((x) < CFAG12864B_WIDTH && \
-                                               (y) < CFAG12864B_HEIGHT)
-#else
-       #define CFAG12864B_CHECK(x, y)          (1)
-#endif
-
-int cfag12864b_fd;
-unsigned char * cfag12864b_mem;
-unsigned char cfag12864b_buffer[CFAG12864B_SIZE];
-
-/*
- * init a cfag12864b framebuffer device
- *
- * No error:       return = 0
- * Unable to open: return = -1
- * Unable to mmap: return = -2
- */
-static int cfag12864b_init(char *path)
-{
-       cfag12864b_fd = open(path, O_RDWR);
-       if (cfag12864b_fd == -1)
-               return -1;
-
-       cfag12864b_mem = mmap(0, CFAG12864B_SIZE, PROT_READ | PROT_WRITE,
-               MAP_SHARED, cfag12864b_fd, 0);
-       if (cfag12864b_mem == MAP_FAILED) {
-               close(cfag12864b_fd);
-               return -2;
-       }
-
-       return 0;
-}
-
-/*
- * exit a cfag12864b framebuffer device
- */
-static void cfag12864b_exit(void)
-{
-       munmap(cfag12864b_mem, CFAG12864B_SIZE);
-       close(cfag12864b_fd);
-}
-
-/*
- * set (x, y) pixel
- */
-static void cfag12864b_set(unsigned char x, unsigned char y)
-{
-       if (CFAG12864B_CHECK(x, y))
-               cfag12864b_buffer[CFAG12864B_ADDRESS(x, y)] |=
-                       CFAG12864B_BIT(x % CFAG12864B_BPB);
-}
-
-/*
- * unset (x, y) pixel
- */
-static void cfag12864b_unset(unsigned char x, unsigned char y)
-{
-       if (CFAG12864B_CHECK(x, y))
-               cfag12864b_buffer[CFAG12864B_ADDRESS(x, y)] &=
-                       ~CFAG12864B_BIT(x % CFAG12864B_BPB);
-}
-
-/*
- * is set (x, y) pixel?
- *
- * Pixel off: return = 0
- * Pixel on:  return = 1
- */
-static unsigned char cfag12864b_isset(unsigned char x, unsigned char y)
-{
-       if (CFAG12864B_CHECK(x, y))
-               if (cfag12864b_buffer[CFAG12864B_ADDRESS(x, y)] &
-                       CFAG12864B_BIT(x % CFAG12864B_BPB))
-                       return 1;
-
-       return 0;
-}
-
-/*
- * not (x, y) pixel
- */
-static void cfag12864b_not(unsigned char x, unsigned char y)
-{
-       if (cfag12864b_isset(x, y))
-               cfag12864b_unset(x, y);
-       else
-               cfag12864b_set(x, y);
-}
-
-/*
- * fill (set all pixels)
- */
-static void cfag12864b_fill(void)
-{
-       unsigned short i;
-
-       for (i = 0; i < CFAG12864B_SIZE; i++)
-               cfag12864b_buffer[i] = 0xFF;
-}
-
-/*
- * clear (unset all pixels)
- */
-static void cfag12864b_clear(void)
-{
-       unsigned short i;
-
-       for (i = 0; i < CFAG12864B_SIZE; i++)
-               cfag12864b_buffer[i] = 0;
-}
-
-/*
- * format a [128*64] matrix
- *
- * Pixel off: src[i] = 0
- * Pixel on:  src[i] > 0
- */
-static void cfag12864b_format(unsigned char * matrix)
-{
-       unsigned char i, j, n;
-
-       for (i = 0; i < CFAG12864B_HEIGHT; i++)
-       for (j = 0; j < CFAG12864B_WIDTH / CFAG12864B_BPB; j++) {
-               cfag12864b_buffer[i * CFAG12864B_WIDTH / CFAG12864B_BPB +
-                       j] = 0;
-               for (n = 0; n < CFAG12864B_BPB; n++)
-                       if (matrix[i * CFAG12864B_WIDTH +
-                               j * CFAG12864B_BPB + n])
-                               cfag12864b_buffer[i * CFAG12864B_WIDTH /
-                                       CFAG12864B_BPB + j] |=
-                                       CFAG12864B_BIT(n);
-       }
-}
-
-/*
- * blit buffer to lcd
- */
-static void cfag12864b_blit(void)
-{
-       memcpy(cfag12864b_mem, cfag12864b_buffer, CFAG12864B_SIZE);
-}
-
-/*
- * ----------------------
- * end of cfag12864b code
- * ----------------------
- */
-
-#include <stdio.h>
-
-#define EXAMPLES       6
-
-static void example(unsigned char n)
-{
-       unsigned short i, j;
-       unsigned char matrix[CFAG12864B_WIDTH * CFAG12864B_HEIGHT];
-
-       if (n > EXAMPLES)
-               return;
-
-       printf("Example %i/%i - ", n, EXAMPLES);
-
-       switch (n) {
-       case 1:
-               printf("Draw points setting bits");
-               cfag12864b_clear();
-               for (i = 0; i < CFAG12864B_WIDTH; i += 2)
-                       for (j = 0; j < CFAG12864B_HEIGHT; j += 2)
-                               cfag12864b_set(i, j);
-               break;
-
-       case 2:
-               printf("Clear the LCD");
-               cfag12864b_clear();
-               break;
-
-       case 3:
-               printf("Draw rows formatting a [128*64] matrix");
-               memset(matrix, 0, CFAG12864B_WIDTH * CFAG12864B_HEIGHT);
-               for (i = 0; i < CFAG12864B_WIDTH; i++)
-                       for (j = 0; j < CFAG12864B_HEIGHT; j += 2)
-                               matrix[j * CFAG12864B_WIDTH + i] = 1;
-               cfag12864b_format(matrix);
-               break;
-
-       case 4:
-               printf("Fill the lcd");
-               cfag12864b_fill();
-               break;
-
-       case 5:
-               printf("Draw columns unsetting bits");
-               for (i = 0; i < CFAG12864B_WIDTH; i += 2)
-                       for (j = 0; j < CFAG12864B_HEIGHT; j++)
-                               cfag12864b_unset(i, j);
-               break;
-
-       case 6:
-               printf("Do negative not-ing all bits");
-               for (i = 0; i < CFAG12864B_WIDTH; i++)
-                       for (j = 0; j < CFAG12864B_HEIGHT; j ++)
-                               cfag12864b_not(i, j);
-               break;
-       }
-
-       puts(" - [Press Enter]");
-}
-
-int main(int argc, char *argv[])
-{
-       unsigned char n;
-
-       if (argc != 2) {
-               printf(
-                       "Sintax:  %s fbdev\n"
-                       "Usually: /dev/fb0, /dev/fb1...\n", argv[0]);
-               return -1;
-       }
-
-       if (cfag12864b_init(argv[1])) {
-               printf("Can't init %s fbdev\n", argv[1]);
-               return -2;
-       }
-
-       for (n = 1; n <= EXAMPLES; n++) {
-               example(n);
-               cfag12864b_blit();
-               while (getchar() != '\n');
-       }
-
-       cfag12864b_exit();
-
-       return 0;
-}
index c54fcdd4ae9f68ce6ee439722c0bfac494651406..265a1effebde97cca268e53bb6d49b899dcbe7e0 100644 (file)
@@ -1,10 +1,6 @@
 00-INDEX
        - This file
-Makefile
-       - Makefile for gptimers example file.
 bfin-gpio-notes.txt
        - Notes in developing/using bfin-gpio driver.
 bfin-spi-notes.txt
        - Notes for using bfin spi bus driver.
-gptimers-example.c
-       - gptimers example
diff --git a/Documentation/blackfin/Makefile b/Documentation/blackfin/Makefile
deleted file mode 100644 (file)
index 6782c58..0000000
+++ /dev/null
@@ -1,5 +0,0 @@
-ifneq ($(CONFIG_BLACKFIN),)
-ifneq ($(CONFIG_BFIN_GPTIMERS),)
-obj-m := gptimers-example.o
-endif
-endif
diff --git a/Documentation/blackfin/gptimers-example.c b/Documentation/blackfin/gptimers-example.c
deleted file mode 100644 (file)
index 283eba9..0000000
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Simple gptimers example
- *     http://docs.blackfin.uclinux.org/doku.php?id=linux-kernel:drivers:gptimers
- *
- * Copyright 2007-2009 Analog Devices Inc.
- *
- * Licensed under the GPL-2 or later.
- */
-
-#include <linux/interrupt.h>
-#include <linux/module.h>
-
-#include <asm/gptimers.h>
-#include <asm/portmux.h>
-
-/* ... random driver includes ... */
-
-#define DRIVER_NAME "gptimer_example"
-
-#ifdef IRQ_TIMER5
-#define SAMPLE_IRQ_TIMER IRQ_TIMER5
-#else
-#define SAMPLE_IRQ_TIMER IRQ_TIMER2
-#endif
-
-struct gptimer_data {
-       uint32_t period, width;
-};
-static struct gptimer_data data;
-
-/* ... random driver state ... */
-
-static irqreturn_t gptimer_example_irq(int irq, void *dev_id)
-{
-       struct gptimer_data *data = dev_id;
-
-       /* make sure it was our timer which caused the interrupt */
-       if (!get_gptimer_intr(TIMER5_id))
-               return IRQ_NONE;
-
-       /* read the width/period values that were captured for the waveform */
-       data->width = get_gptimer_pwidth(TIMER5_id);
-       data->period = get_gptimer_period(TIMER5_id);
-
-       /* acknowledge the interrupt */
-       clear_gptimer_intr(TIMER5_id);
-
-       /* tell the upper layers we took care of things */
-       return IRQ_HANDLED;
-}
-
-/* ... random driver code ... */
-
-static int __init gptimer_example_init(void)
-{
-       int ret;
-
-       /* grab the peripheral pins */
-       ret = peripheral_request(P_TMR5, DRIVER_NAME);
-       if (ret) {
-               printk(KERN_NOTICE DRIVER_NAME ": peripheral request failed\n");
-               return ret;
-       }
-
-       /* grab the IRQ for the timer */
-       ret = request_irq(SAMPLE_IRQ_TIMER, gptimer_example_irq,
-                       IRQF_SHARED, DRIVER_NAME, &data);
-       if (ret) {
-               printk(KERN_NOTICE DRIVER_NAME ": IRQ request failed\n");
-               peripheral_free(P_TMR5);
-               return ret;
-       }
-
-       /* setup the timer and enable it */
-       set_gptimer_config(TIMER5_id,
-                       WDTH_CAP | PULSE_HI | PERIOD_CNT | IRQ_ENA);
-       enable_gptimers(TIMER5bit);
-
-       return 0;
-}
-module_init(gptimer_example_init);
-
-static void __exit gptimer_example_exit(void)
-{
-       disable_gptimers(TIMER5bit);
-       free_irq(SAMPLE_IRQ_TIMER, &data);
-       peripheral_free(P_TMR5);
-}
-module_exit(gptimer_example_exit);
-
-MODULE_LICENSE("BSD");
index e5b6497116f41be1a6e91e9d59b77a0991eabfa1..c75b64a85859fbca05571093c02799a25f994e1a 100644 (file)
@@ -309,3 +309,4 @@ Version History
        with a reshape in progress.
 1.9.0   Add support for RAID level takeover/reshape/region size
        and set size reduction.
+1.9.1   Fix activation of existing RAID 4/10 mapped devices
diff --git a/Documentation/devicetree/bindings/auxdisplay/img-ascii-lcd.txt b/Documentation/devicetree/bindings/auxdisplay/img-ascii-lcd.txt
new file mode 100644 (file)
index 0000000..b69bb68
--- /dev/null
@@ -0,0 +1,17 @@
+Binding for ASCII LCD displays on Imagination Technologies boards
+
+Required properties:
+- compatible : should be one of:
+    "img,boston-lcd"
+    "mti,malta-lcd"
+    "mti,sead3-lcd"
+
+Required properties for "img,boston-lcd":
+- reg : memory region locating the device registers
+
+Required properties for "mti,malta-lcd" or "mti,sead3-lcd":
+- regmap: phandle of the system controller containing the LCD registers
+- offset: offset in bytes to the LCD registers within the system controller
+
+The layout of the registers & properties of the display are determined
+from the compatible string, making this binding somewhat trivial.
index c7179d3b5c33e11d0f8b1af713cc7625b96ecfd5..812163060fa3e4cb4e4f39fe22c9fbd2aeb41d9f 100644 (file)
@@ -24,7 +24,7 @@ Example:
                reg = <0x61840000 0x4000>;
 
                clock {
-                       compatible = "socionext,uniphier-ld20-clock";
+                       compatible = "socionext,uniphier-ld11-clock";
                        #clock-cells = <1>;
                };
 
@@ -43,8 +43,8 @@ Provided clocks:
 21: USB3 ch1 PHY1
 
 
-Media I/O (MIO) clock
----------------------
+Media I/O (MIO) clock, SD clock
+-------------------------------
 
 Required properties:
 - compatible: should be one of the following:
@@ -52,10 +52,10 @@ Required properties:
     "socionext,uniphier-ld4-mio-clock"  - for LD4 SoC.
     "socionext,uniphier-pro4-mio-clock" - for Pro4 SoC.
     "socionext,uniphier-sld8-mio-clock" - for sLD8 SoC.
-    "socionext,uniphier-pro5-mio-clock" - for Pro5 SoC.
-    "socionext,uniphier-pxs2-mio-clock" - for PXs2/LD6b SoC.
+    "socionext,uniphier-pro5-sd-clock"  - for Pro5 SoC.
+    "socionext,uniphier-pxs2-sd-clock"  - for PXs2/LD6b SoC.
     "socionext,uniphier-ld11-mio-clock" - for LD11 SoC.
-    "socionext,uniphier-ld20-mio-clock" - for LD20 SoC.
+    "socionext,uniphier-ld20-sd-clock"  - for LD20 SoC.
 - #clock-cells: should be 1.
 
 Example:
@@ -66,7 +66,7 @@ Example:
                reg = <0x59810000 0x800>;
 
                clock {
-                       compatible = "socionext,uniphier-ld20-mio-clock";
+                       compatible = "socionext,uniphier-ld11-mio-clock";
                        #clock-cells = <1>;
                };
 
@@ -112,7 +112,7 @@ Example:
                reg = <0x59820000 0x200>;
 
                clock {
-                       compatible = "socionext,uniphier-ld20-peri-clock";
+                       compatible = "socionext,uniphier-ld11-peri-clock";
                        #clock-cells = <1>;
                };
 
index f31b2ad1552bd425c232c05b8ae3ed431b030260..5fa691e6f6388320acd4199995ef0072e9e70faa 100644 (file)
@@ -32,6 +32,14 @@ wants to support one of the below features, it should adapt the bindings below.
 - clock-frequency
        frequency of bus clock in Hz.
 
+- i2c-bus
+       For I2C adapters that have child nodes that are a mixture of both I2C
+       devices and non-I2C devices, the 'i2c-bus' subnode can be used for
+       populating I2C devices. If the 'i2c-bus' subnode is present, only
+       subnodes of this will be considered as I2C slaves. The properties,
+       '#address-cells' and '#size-cells' must be defined under this subnode
+       if present.
+
 - i2c-scl-falling-time-ns
        Number of nanoseconds the SCL signal takes to fall; t(f) in the I2C
        specification.
index 1416c6a0d2cd8c198944f5c4b2718e27263be44f..fbbad6446741e53fc9a4e65ec74ffd793e2fc44d 100644 (file)
@@ -51,7 +51,6 @@ fsl,sgtl5000          SGTL5000: Ultra Low-Power Audio Codec
 gmt,g751               G751: Digital Temperature Sensor and Thermal Watchdog with Two-Wire Interface
 infineon,slb9635tt     Infineon SLB9635 (Soft-) I2C TPM (old protocol, max 100khz)
 infineon,slb9645tt     Infineon SLB9645 I2C TPM (new protocol, max 400khz)
-isil,isl12057          Intersil ISL12057 I2C RTC Chip
 isil,isl29028          Intersil ISL29028 Ambient Light and Proximity Sensor
 maxim,ds1050           5 Bit Programmable, Pulse-Width Modulator
 maxim,max1237          Low-Power, 4-/12-Channel, 2-Wire Serial, 12-Bit ADCs
index f97993be2dcbdc82dee8866e3510b18f6e121d54..d3b273e4336a72623263d9e62e50a9c39f45c178 100644 (file)
@@ -14,6 +14,7 @@ length of memory mapped region.
 representing a ethernet device.
 - dsaf-handle: phandle, specifies a reference to a node
 representing a dsaf device.
+- node_guid: a number that uniquely identifies a device or component
 - #address-cells: must be 2
 - #size-cells: must be 2
 Optional properties:
@@ -32,6 +33,7 @@ Example:
                        dma-coherent;
                        eth-handle = <&eth2 &eth3 &eth4 &eth5 &eth6 &eth7>;
                        dsaf-handle = <&soc0_dsa>;
+                       node-guid = [00 9A CD 00 00 01 02 03];
                        #address-cells = <2>;
                        #size-cells = <2>;
                        interrupt-parent = <&mbigen_dsa>;
diff --git a/Documentation/devicetree/bindings/input/touchscreen/melfas_mip4.txt b/Documentation/devicetree/bindings/input/touchscreen/melfas_mip4.txt
new file mode 100644 (file)
index 0000000..7b8944c
--- /dev/null
@@ -0,0 +1,21 @@
+* MELFAS MIP4 Touchscreen
+
+Required properties:
+- compatible: must be "melfas,mip4_ts"
+- reg: I2C slave address of the chip (0x48 or 0x34)
+- interrupt-parent: interrupt controller to which the chip is connected
+- interrupts: interrupt to which the chip is connected
+
+Optional properties:
+- ce-gpios: GPIO connected to the CE (chip enable) pin of the chip
+
+Example:
+       i2c@00000000 {
+               touchscreen: melfas_mip4@48 {
+                       compatible = "melfas,mip4_ts";
+                       reg = <0x48>;
+                       interrupt-parent = <&gpio>;
+                       interrupts = <0 IRQ_TYPE_EDGE_FALLING>;
+                       ce-gpios = <&gpio 0 GPIO_ACTIVE_HIGH>;
+               };
+       };
diff --git a/Documentation/devicetree/bindings/ipmi.txt b/Documentation/devicetree/bindings/ipmi.txt
deleted file mode 100644 (file)
index d5f1a87..0000000
+++ /dev/null
@@ -1,25 +0,0 @@
-IPMI device
-
-Required properties:
-- compatible: should be one of ipmi-kcs, ipmi-smic, or ipmi-bt
-- device_type: should be ipmi
-- reg: Address and length of the register set for the device
-
-Optional properties:
-- interrupts: The interrupt for the device.  Without this the interface
-       is polled.
-- reg-size - The size of the register.  Defaults to 1
-- reg-spacing - The number of bytes between register starts.  Defaults to 1
-- reg-shift - The amount to shift the registers to the right to get the data
-       into bit zero.
-
-Example:
-
-smic@fff3a000 {
-       compatible = "ipmi-smic";
-       device_type = "ipmi";
-       reg = <0xfff3a000 0x1000>;
-       interrupts = <0 24 4>;
-       reg-size = <4>;
-       reg-spacing = <4>;
-};
diff --git a/Documentation/devicetree/bindings/ipmi/aspeed,ast2400-ibt-bmc.txt b/Documentation/devicetree/bindings/ipmi/aspeed,ast2400-ibt-bmc.txt
new file mode 100644 (file)
index 0000000..6f28969
--- /dev/null
@@ -0,0 +1,23 @@
+* Aspeed BT (Block Transfer) IPMI interface
+
+The Aspeed SOCs (AST2400 and AST2500) are commonly used as BMCs
+(BaseBoard Management Controllers) and the BT interface can be used to
+perform in-band IPMI communication with their host.
+
+Required properties:
+
+- compatible : should be "aspeed,ast2400-ibt-bmc"
+- reg: physical address and size of the registers
+
+Optional properties:
+
+- interrupts: interrupt generated by the BT interface. without an
+  interrupt, the driver will operate in poll mode.
+
+Example:
+
+       ibt@1e789140 {
+               compatible = "aspeed,ast2400-ibt-bmc";
+               reg = <0x1e789140 0x18>;
+               interrupts = <8>;
+       };
diff --git a/Documentation/devicetree/bindings/ipmi/ipmi-smic.txt b/Documentation/devicetree/bindings/ipmi/ipmi-smic.txt
new file mode 100644 (file)
index 0000000..d5f1a87
--- /dev/null
@@ -0,0 +1,25 @@
+IPMI device
+
+Required properties:
+- compatible: should be one of ipmi-kcs, ipmi-smic, or ipmi-bt
+- device_type: should be ipmi
+- reg: Address and length of the register set for the device
+
+Optional properties:
+- interrupts: The interrupt for the device.  Without this the interface
+       is polled.
+- reg-size - The size of the register.  Defaults to 1
+- reg-spacing - The number of bytes between register starts.  Defaults to 1
+- reg-shift - The amount to shift the registers to the right to get the data
+       into bit zero.
+
+Example:
+
+smic@fff3a000 {
+       compatible = "ipmi-smic";
+       device_type = "ipmi";
+       reg = <0xfff3a000 0x1000>;
+       interrupts = <0 24 4>;
+       reg-size = <4>;
+       reg-spacing = <4>;
+};
index 4a7e030e4f9bae7880650d9fbb3afb79673a4f77..e4e1cd91fb1f2f9701d6df46da1b3461aaaa3a3d 100644 (file)
@@ -2,9 +2,9 @@
 
 Required properties:
 
-- compatible: "brcm,bcm3384", "brcm,bcm33843"
+- compatible: "brcm,bcm3368", "brcm,bcm3384", "brcm,bcm33843"
               "brcm,bcm3384-viper", "brcm,bcm33843-viper"
-              "brcm,bcm6328", "brcm,bcm6358", "brcm,bcm6368",
+              "brcm,bcm6328", "brcm,bcm6358", "brcm,bcm6362", "brcm,bcm6368",
               "brcm,bcm63168", "brcm,bcm63268",
               "brcm,bcm7125", "brcm,bcm7346", "brcm,bcm7358", "brcm,bcm7360",
               "brcm,bcm7362", "brcm,bcm7420", "brcm,bcm7425"
index 4e00e859e885a0ce8ae59ae234b2637f2e894e70..bfa461aaac99b3e3033727572009c308efc3b6bf 100644 (file)
@@ -43,6 +43,9 @@ Optional properties:
   reset signal present internally in some host controller IC designs.
   See Documentation/devicetree/bindings/reset/reset.txt for details.
 
+* reset-names: request name for using "resets" property. Must be "reset".
+       (It will be used together with "resets" property.)
+
 * clocks: from common clock binding: handle to biu and ciu clocks for the
   bus interface unit clock and the card interface unit clock.
 
@@ -103,6 +106,8 @@ board specific portions as listed below.
                interrupts = <0 75 0>;
                #address-cells = <1>;
                #size-cells = <0>;
+               resets = <&rst 20>;
+               reset-names = "reset";
        };
 
 [board specific internal DMA resources]
index ba5ecc1041a5e74c9bffd3672fae4c7e3ee789cc..2fefa1a44afd4472f75a8fd74b73d7ae6c9083b9 100644 (file)
@@ -2,11 +2,17 @@ Broadcom AMAC Ethernet Controller Device Tree Bindings
 -------------------------------------------------------------
 
 Required properties:
- - compatible: "brcm,amac" or "brcm,nsp-amac"
- - reg:                Address and length of the GMAC registers,
-               Address and length of the GMAC IDM registers
- - reg-names:  Names of the registers.  Must have both "amac_base" and
-               "idm_base"
+ - compatible: "brcm,amac"
+               "brcm,nsp-amac"
+               "brcm,ns2-amac"
+ - reg:                Address and length of the register set for the device. It
+               contains the information of registers in the same order as
+               described by reg-names
+ - reg-names:  Names of the registers.
+               "amac_base":    Address and length of the GMAC registers
+               "idm_base":     Address and length of the GMAC IDM registers
+               "nicpm_base":   Address and length of the NIC Port Manager
+                               registers (required for Northstar2)
  - interrupts: Interrupt number
 
 Optional properties:
index 32025eb4b31bc80020533b88d038378b30ae342f..b3dd6b40e0de29a71cdd96627cca55a09215ea9d 100644 (file)
@@ -14,7 +14,8 @@ The properties described here are those specific to Marvell devices.
 Additional required and optional properties can be found in dsa.txt.
 
 Required properties:
-- compatible           : Should be one of "marvell,mv88e6085",
+- compatible          : Should be one of "marvell,mv88e6085" or
+                        "marvell,mv88e6190"
 - reg                  : Address on the MII bus for the switch.
 
 Optional properties:
index d4b7f2e4998403113158027d99fbaef8d430dcc6..abfbeecbcf3927f63a46dc7c578562e2c199dd4b 100644 (file)
@@ -45,6 +45,12 @@ Required properties:
   depends on the hardware user manual.
 - port-mode-offset: is offset of port mode field for each port in dsaf. Its
   value depends on the hardware user manual.
+- mc-mac-mask: mask of multicast address, determines bit in multicast address
+  to set:
+  1 stands for this bit will be precisely matched, TCAM will check this bit of
+    MAC address.
+  0 stands for this bit will be fuzzy matched, TCAM won't care about this bit
+    of MAC address.
 
 [1] Documentation/devicetree/bindings/net/phy.txt
 
@@ -74,10 +80,12 @@ dsaf0: dsa@c7000000 {
                reg = 0;
                phy-handle = <&phy0>;
                serdes-syscon = <&serdes>;
+               mc-mac-mask = [ff f0 00 00 00 00];
        };
 
        port@1 {
                 reg = 1;
                 serdes-syscon = <&serdes>;
+               mc-mac-mask = [ff f0 00 00 00 00];
         };
 };
index 73be8970815eef93c17d80b68422d82ac055e068..7aa840c8768d2f699f99d3dcbd0e8983214f9419 100644 (file)
@@ -1,7 +1,10 @@
-* Marvell Armada 370 / Armada XP Ethernet Controller (NETA)
+* Marvell Armada 370 / Armada XP / Armada 3700 Ethernet Controller (NETA)
 
 Required properties:
-- compatible: "marvell,armada-370-neta" or "marvell,armada-xp-neta".
+- compatible: could be one of the followings
+       "marvell,armada-370-neta"
+       "marvell,armada-xp-neta"
+       "marvell,armada-3700-neta"
 - reg: address and length of the register set for the device.
 - interrupts: interrupt for the device
 - phy: See ethernet.txt file in the same directory.
index bce52b2ec55ece41a14b997772956e077a7259e6..6fd988c84c4f9f4d7eb7df2f2c52283c27d164d1 100644 (file)
@@ -49,6 +49,7 @@ Optional port properties:
 and
 
  - phy-handle: See ethernet.txt file in the same directory.
+ - phy-mode: See ethernet.txt file in the same directory.
 
 or
 
index 8516929c7251877615b0081ae6d3c971b8a0c920..065e8bdb957d9d577e987dca04ce49c84afb2935 100644 (file)
@@ -3,7 +3,7 @@ Properties for an MDIO bus multiplexer controlled by a memory-mapped device
 This is a special case of a MDIO bus multiplexer.  A memory-mapped device,
 like an FPGA, is used to control which child bus is connected.  The mdio-mux
 node must be a child of the memory-mapped device.  The driver currently only
-supports devices with eight-bit registers.
+supports devices with 8, 16 or 32-bit registers.
 
 Required properties in addition to the generic multiplexer properties:
 
@@ -11,7 +11,7 @@ Required properties in addition to the generic multiplexer properties:
 
 - reg : integer, contains the offset of the register that controls the bus
        multiplexer.  The size field in the 'reg' property is the size of
-       register, and must therefore be 1.
+       register, and must therefore be 1, 2, or 4.
 
 - mux-mask : integer, contains an eight-bit mask that specifies which
        bits in the register control the actual bus multiplexer.  The
diff --git a/Documentation/devicetree/bindings/net/oxnas-dwmac.txt b/Documentation/devicetree/bindings/net/oxnas-dwmac.txt
new file mode 100644 (file)
index 0000000..df0534e
--- /dev/null
@@ -0,0 +1,39 @@
+* Oxford Semiconductor OXNAS DWMAC Ethernet controller
+
+The device inherits all the properties of the dwmac/stmmac devices
+described in the file stmmac.txt in the current directory with the
+following changes.
+
+Required properties on all platforms:
+
+- compatible:  For the OX820 SoC, it should be :
+               - "oxsemi,ox820-dwmac" to select glue
+               - "snps,dwmac-3.512" to select IP version.
+
+- clocks: Should contain phandles to the following clocks
+- clock-names: Should contain the following:
+               - "stmmaceth" for the host clock - see stmmac.txt
+               - "gmac" for the peripheral gate clock
+
+- oxsemi,sys-ctrl: a phandle to the system controller syscon node
+
+Example :
+
+etha: ethernet@40400000 {
+       compatible = "oxsemi,ox820-dwmac", "snps,dwmac-3.512";
+       reg = <0x40400000 0x2000>;
+       interrupts = <GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>,
+                    <GIC_SPI 17 IRQ_TYPE_LEVEL_HIGH>;
+       interrupt-names = "macirq", "eth_wake_irq";
+       mac-address = [000000000000]; /* Filled in by U-Boot */
+       phy-mode = "rgmii";
+
+       clocks = <&stdclk CLK_820_ETHA>, <&gmacclk>;
+       clock-names = "gmac", "stmmaceth";
+       resets = <&reset RESET_MAC>;
+
+       /* Regmap for sys registers */
+       oxsemi,sys-ctrl = <&sys>;
+
+       status = "disabled";
+};
index bc1c3c8bf8fa37fa7e08dcabc65f1752a8a79749..54749b60a4666adb70514674761fd1031320376b 100644 (file)
@@ -35,6 +35,12 @@ Optional Properties:
 - broken-turn-around: If set, indicates the PHY device does not correctly
   release the turn around line low at the end of a MDIO transaction.
 
+- enet-phy-lane-swap: If set, indicates the PHY will swap the TX/RX lanes to
+  compensate for the board being designed with the lanes swapped.
+
+- eee-broken-modes: Bits to clear in the MDIO_AN_EEE_ADV register to
+  disable EEE broken modes.
+
 Example:
 
 ethernet-phy@0 {
index ba67b39939c10b15f7e1a99291c935a1c2c81bf8..71aeda1ca05598d74e8db3f429f212d0095f702a 100644 (file)
@@ -26,13 +26,16 @@ Required properties:
        - "sys"
        - "legacy"
        - "client"
-- resets: Must contain five entries for each entry in reset-names.
+- resets: Must contain seven entries for each entry in reset-names.
           See ../reset/reset.txt for details.
 - reset-names: Must include the following names
        - "core"
        - "mgmt"
        - "mgmt-sticky"
        - "pipe"
+       - "pm"
+       - "aclk"
+       - "pclk"
 - pinctrl-names : The pin control state names
 - pinctrl-0: The "default" pinctrl state
 - #interrupt-cells: specifies the number of cells needed to encode an
@@ -86,8 +89,10 @@ pcie0: pcie@f8000000 {
        reg = <0x0 0xf8000000 0x0 0x2000000>, <0x0 0xfd000000 0x0 0x1000000>;
        reg-names = "axi-base", "apb-base";
        resets = <&cru SRST_PCIE_CORE>, <&cru SRST_PCIE_MGMT>,
-                <&cru SRST_PCIE_MGMT_STICKY>, <&cru SRST_PCIE_PIPE>;
-       reset-names = "core", "mgmt", "mgmt-sticky", "pipe";
+                <&cru SRST_PCIE_MGMT_STICKY>, <&cru SRST_PCIE_PIPE> ,
+                <&cru SRST_PCIE_PM>, <&cru SRST_P_PCIE>, <&cru SRST_A_PCIE>;
+       reset-names = "core", "mgmt", "mgmt-sticky", "pipe",
+                     "pm", "pclk", "aclk";
        phys = <&pcie_phy>;
        phy-names = "pcie-phy";
        pinctrl-names = "default";
index 5e60ad18f147c2399b418026968068a9ccc7cf0d..2ad18c4ea55c5f022f0181c78896d5a8e33d4e74 100644 (file)
@@ -43,7 +43,9 @@ aspeed,ast2500-pinctrl, aspeed,g5-pinctrl:
 
 GPID0 GPID2 GPIE0 I2C10 I2C11 I2C12 I2C13 I2C14 I2C3 I2C4 I2C5 I2C6 I2C7 I2C8
 I2C9 MAC1LINK MDIO1 MDIO2 OSCCLK PEWAKE PWM0 PWM1 PWM2 PWM3 PWM4 PWM5 PWM6 PWM7
-RGMII1 RGMII2 RMII1 RMII2 SD1 SPI1 TIMER4 TIMER5 TIMER6 TIMER7 TIMER8
+RGMII1 RGMII2 RMII1 RMII2 SD1 SPI1 SPI1DEBUG SPI1PASSTHRU TIMER4 TIMER5 TIMER6
+TIMER7 TIMER8 VGABIOSROM
+
 
 Examples:
 
index f9753c416974d553cce347f2fbf3c82bc56d9afc..b24583aa34c3bf45363e3eee4ceac292d713b67d 100644 (file)
@@ -14,11 +14,6 @@ Required properies:
  - #size-cells : The value of this property must be 1
  - ranges      : defines mapping between pin controller node (parent) to
    gpio-bank node (children).
- - interrupt-parent: phandle of the interrupt parent to which the external
-   GPIO interrupts are forwarded to.
- - st,syscfg: Should be phandle/offset pair. The phandle to the syscon node
-   which includes IRQ mux selection register, and the offset of the IRQ mux
-   selection register.
  - pins-are-numbered: Specify the subnodes are using numbered pinmux to
    specify pins.
 
@@ -37,6 +32,11 @@ Required properties:
 
 Optional properties:
  - reset:        : Reference to the reset controller
+ - interrupt-parent: phandle of the interrupt parent to which the external
+   GPIO interrupts are forwarded to.
+ - st,syscfg: Should be phandle/offset pair. The phandle to the syscon node
+   which includes IRQ mux selection register, and the offset of the IRQ mux
+   selection register.
 
 Example:
 #include <dt-bindings/pinctrl/stm32f429-pinfunc.h>
index e6bbfccd56c326214d4d7cdaf78c664f34f863e5..5020524cddebf70ddaedeeb6b61bd47bdcb0a4a1 100644 (file)
@@ -6,25 +6,25 @@ System reset
 
 Required properties:
 - compatible: should be one of the following:
-    "socionext,uniphier-sld3-reset" - for PH1-sLD3 SoC.
-    "socionext,uniphier-ld4-reset"  - for PH1-LD4 SoC.
-    "socionext,uniphier-pro4-reset" - for PH1-Pro4 SoC.
-    "socionext,uniphier-sld8-reset" - for PH1-sLD8 SoC.
-    "socionext,uniphier-pro5-reset" - for PH1-Pro5 SoC.
-    "socionext,uniphier-pxs2-reset" - for ProXstream2/PH1-LD6b SoC.
-    "socionext,uniphier-ld11-reset" - for PH1-LD11 SoC.
-    "socionext,uniphier-ld20-reset" - for PH1-LD20 SoC.
+    "socionext,uniphier-sld3-reset" - for sLD3 SoC.
+    "socionext,uniphier-ld4-reset"  - for LD4 SoC.
+    "socionext,uniphier-pro4-reset" - for Pro4 SoC.
+    "socionext,uniphier-sld8-reset" - for sLD8 SoC.
+    "socionext,uniphier-pro5-reset" - for Pro5 SoC.
+    "socionext,uniphier-pxs2-reset" - for PXs2/LD6b SoC.
+    "socionext,uniphier-ld11-reset" - for LD11 SoC.
+    "socionext,uniphier-ld20-reset" - for LD20 SoC.
 - #reset-cells: should be 1.
 
 Example:
 
        sysctrl@61840000 {
-               compatible = "socionext,uniphier-ld20-sysctrl",
+               compatible = "socionext,uniphier-ld11-sysctrl",
                             "simple-mfd", "syscon";
                reg = <0x61840000 0x4000>;
 
                reset {
-                       compatible = "socionext,uniphier-ld20-reset";
+                       compatible = "socionext,uniphier-ld11-reset";
                        #reset-cells = <1>;
                };
 
@@ -32,30 +32,30 @@ Example:
        };
 
 
-Media I/O (MIO) reset
----------------------
+Media I/O (MIO) reset, SD reset
+-------------------------------
 
 Required properties:
 - compatible: should be one of the following:
-    "socionext,uniphier-sld3-mio-reset" - for PH1-sLD3 SoC.
-    "socionext,uniphier-ld4-mio-reset"  - for PH1-LD4 SoC.
-    "socionext,uniphier-pro4-mio-reset" - for PH1-Pro4 SoC.
-    "socionext,uniphier-sld8-mio-reset" - for PH1-sLD8 SoC.
-    "socionext,uniphier-pro5-mio-reset" - for PH1-Pro5 SoC.
-    "socionext,uniphier-pxs2-mio-reset" - for ProXstream2/PH1-LD6b SoC.
-    "socionext,uniphier-ld11-mio-reset" - for PH1-LD11 SoC.
-    "socionext,uniphier-ld20-mio-reset" - for PH1-LD20 SoC.
+    "socionext,uniphier-sld3-mio-reset" - for sLD3 SoC.
+    "socionext,uniphier-ld4-mio-reset"  - for LD4 SoC.
+    "socionext,uniphier-pro4-mio-reset" - for Pro4 SoC.
+    "socionext,uniphier-sld8-mio-reset" - for sLD8 SoC.
+    "socionext,uniphier-pro5-sd-reset"  - for Pro5 SoC.
+    "socionext,uniphier-pxs2-sd-reset"  - for PXs2/LD6b SoC.
+    "socionext,uniphier-ld11-mio-reset" - for LD11 SoC.
+    "socionext,uniphier-ld20-sd-reset"  - for LD20 SoC.
 - #reset-cells: should be 1.
 
 Example:
 
        mioctrl@59810000 {
-               compatible = "socionext,uniphier-ld20-mioctrl",
+               compatible = "socionext,uniphier-ld11-mioctrl",
                             "simple-mfd", "syscon";
                reg = <0x59810000 0x800>;
 
                reset {
-                       compatible = "socionext,uniphier-ld20-mio-reset";
+                       compatible = "socionext,uniphier-ld11-mio-reset";
                        #reset-cells = <1>;
                };
 
@@ -68,24 +68,24 @@ Peripheral reset
 
 Required properties:
 - compatible: should be one of the following:
-    "socionext,uniphier-ld4-peri-reset"  - for PH1-LD4 SoC.
-    "socionext,uniphier-pro4-peri-reset" - for PH1-Pro4 SoC.
-    "socionext,uniphier-sld8-peri-reset" - for PH1-sLD8 SoC.
-    "socionext,uniphier-pro5-peri-reset" - for PH1-Pro5 SoC.
-    "socionext,uniphier-pxs2-peri-reset" - for ProXstream2/PH1-LD6b SoC.
-    "socionext,uniphier-ld11-peri-reset" - for PH1-LD11 SoC.
-    "socionext,uniphier-ld20-peri-reset" - for PH1-LD20 SoC.
+    "socionext,uniphier-ld4-peri-reset"  - for LD4 SoC.
+    "socionext,uniphier-pro4-peri-reset" - for Pro4 SoC.
+    "socionext,uniphier-sld8-peri-reset" - for sLD8 SoC.
+    "socionext,uniphier-pro5-peri-reset" - for Pro5 SoC.
+    "socionext,uniphier-pxs2-peri-reset" - for PXs2/LD6b SoC.
+    "socionext,uniphier-ld11-peri-reset" - for LD11 SoC.
+    "socionext,uniphier-ld20-peri-reset" - for LD20 SoC.
 - #reset-cells: should be 1.
 
 Example:
 
        perictrl@59820000 {
-               compatible = "socionext,uniphier-ld20-perictrl",
+               compatible = "socionext,uniphier-ld11-perictrl",
                             "simple-mfd", "syscon";
                reg = <0x59820000 0x200>;
 
                reset {
-                       compatible = "socionext,uniphier-ld20-peri-reset";
+                       compatible = "socionext,uniphier-ld11-peri-reset";
                        #reset-cells = <1>;
                };
 
index 8e76f26487966b40d3041b948624b91bfacfe883..9882b819f173f3277e55b73cbbcffa4b89f4e1cc 100644 (file)
@@ -11,7 +11,7 @@ Optional properties:
 - trickle-diode-disable : Do not use internal trickle charger diode
        Should be given if internal trickle charger diode should be disabled
 Example:
-       ds1390: rtc@68 {
+       ds1390: rtc@0 {
                compatible = "dallas,ds1390";
                trickle-resistor-ohms = <250>;
                reg = <0>;
diff --git a/Documentation/devicetree/bindings/rtc/epson,rx8900.txt b/Documentation/devicetree/bindings/rtc/epson,rx8900.txt
new file mode 100644 (file)
index 0000000..3f61e51
--- /dev/null
@@ -0,0 +1,22 @@
+Real Time Clock driver for:
+  - Epson RX8900
+  - Micro Crystal rv8803
+
+Required properties:
+- compatible: should be: "microcrystal,rv8803" or "epson,rx8900"
+- reg : the I2C address of the device for I2C
+
+Optional properties:
+- epson,vdet-disable : boolean, if present will disable voltage detector.
+  Should be set if no backup battery is used.
+- trickle-diode-disable : boolean, if present will disable internal trickle
+  charger diode
+
+Example:
+
+       rtc: rtc@32 {
+               compatible = "epson,rx8900"
+               reg = <0x32>;
+               epson,vdet-disable;
+               trickle-diode-disable;
+       };
index bf7d11ae9bea68f107936211d8256203f89a6157..bee41f97044e11606bc451c8fa29b0b49c2b202a 100644 (file)
@@ -18,6 +18,18 @@ Optional properties:
   through pmic_power_en
 - clocks: Any internal or external clocks feeding in to rtc
 - clock-names: Corresponding names of the clocks
+- pinctrl-0: a phandle pointing to the pin settings for the device
+- pinctrl-names: should be "default"
+
+Optional subnodes:
+- generic pinctrl node
+
+Required pinctrl subnodes properties:
+- pins - Names of ext_wakeup pins to configure
+
+Optional pinctrl subnodes properties:
+- input-enable - Enables ext_wakeup
+- ti,active-high - Set input active high (by default active low)
 
 Example:
 
@@ -30,4 +42,13 @@ rtc@1c23000 {
        system-power-controller;
        clocks = <&clk_32k_rtc>, <&clk_32768_ck>;
        clock-names = "ext-clk", "int-clk";
+
+       pinctrl-0 = <&ext_wakeup>;
+       pinctrl-names = "default";
+
+       ext_wakeup: ext-wakeup {
+               pins = "ext_wakeup0";
+               input-enable;
+               ti,active-high;
+       };
 };
index a3eb154c32caf9f273c8801811565722a3201e38..227bb770b0276af8cb716bd89d88e8c055c168f8 100644 (file)
@@ -1,7 +1,9 @@
 Binding for Cadence UART Controller
 
 Required properties:
-- compatible : should be "cdns,uart-r1p8", or "xlnx,xuartps"
+- compatible :
+  Use "xlnx,xuartps","cdns,uart-r1p8" for Zynq-7xxx SoC.
+  Use "xlnx,zynqmp-uart","cdns,uart-r1p12" for Zynq Ultrascale+ MPSoC.
 - reg: Should contain UART controller registers location and length.
 - interrupts: Should contain UART controller interrupts.
 - clocks: Must contain phandles to the UART clocks
index 1e4000d83aee06828c974000e5122567b8fda631..8d27d1a603e7bf755c8451186cdb507dd1d50a58 100644 (file)
@@ -9,6 +9,14 @@ Required properties:
     - "renesas,scifb-r8a73a4" for R8A73A4 (R-Mobile APE6) SCIFB compatible UART.
     - "renesas,scifa-r8a7740" for R8A7740 (R-Mobile A1) SCIFA compatible UART.
     - "renesas,scifb-r8a7740" for R8A7740 (R-Mobile A1) SCIFB compatible UART.
+    - "renesas,scif-r8a7743" for R8A7743 (RZ/G1M) SCIF compatible UART.
+    - "renesas,scifa-r8a7743" for R8A7743 (RZ/G1M) SCIFA compatible UART.
+    - "renesas,scifb-r8a7743" for R8A7743 (RZ/G1M) SCIFB compatible UART.
+    - "renesas,hscif-r8a7743" for R8A7743 (RZ/G1M) HSCIF compatible UART.
+    - "renesas,scif-r8a7745" for R8A7745 (RZ/G1E) SCIF compatible UART.
+    - "renesas,scifa-r8a7745" for R8A7745 (RZ/G1E) SCIFA compatible UART.
+    - "renesas,scifb-r8a7745" for R8A7745 (RZ/G1E) SCIFB compatible UART.
+    - "renesas,hscif-r8a7745" for R8A7745 (RZ/G1E) HSCIF compatible UART.
     - "renesas,scif-r8a7778" for R8A7778 (R-Car M1) SCIF compatible UART.
     - "renesas,scif-r8a7779" for R8A7779 (R-Car H1) SCIF compatible UART.
     - "renesas,scif-r8a7790" for R8A7790 (R-Car H2) SCIF compatible UART.
index fd40c852d7c7e18c12e5fabc49f0b141614267ac..462b04e8209f4d8de453005b52967ed16a63224b 100644 (file)
@@ -12,7 +12,7 @@ Required properties:
 
 Optional properties:
 - ti,dmic: phandle for the OMAP dmic node if the machine have it connected
-- ti,jack_detection: Need to be present if the board capable to detect jack
+- ti,jack-detection: Need to be present if the board capable to detect jack
   insertion, removal.
 
 Available audio endpoints for the audio-routing table:
diff --git a/Documentation/devicetree/bindings/timer/jcore,pit.txt b/Documentation/devicetree/bindings/timer/jcore,pit.txt
new file mode 100644 (file)
index 0000000..af5dd35
--- /dev/null
@@ -0,0 +1,24 @@
+J-Core Programmable Interval Timer and Clocksource
+
+Required properties:
+
+- compatible: Must be "jcore,pit".
+
+- reg: Memory region(s) for timer/clocksource registers. For SMP,
+  there should be one region per cpu, indexed by the sequential,
+  zero-based hardware cpu number.
+
+- interrupts: An interrupt to assign for the timer. The actual pit
+  core is integrated with the aic and allows the timer interrupt
+  assignment to be programmed by software, but this property is
+  required in order to reserve an interrupt number that doesn't
+  conflict with other devices.
+
+
+Example:
+
+timer@200 {
+       compatible = "jcore,pit";
+       reg = < 0x200 0x30 0x500 0x30 >;
+       interrupts = < 0x48 >;
+};
index 455f2c310a1b90ce94a087e1983a7e8d5ee869ab..2c30a5479069b98ef22467fd91a0f090c87f1d91 100644 (file)
@@ -28,10 +28,7 @@ Refer to phy/phy-bindings.txt for generic phy consumer properties
 - g-use-dma: enable dma usage in gadget driver.
 - g-rx-fifo-size: size of rx fifo size in gadget mode.
 - g-np-tx-fifo-size: size of non-periodic tx fifo size in gadget mode.
-
-Deprecated properties:
-- g-tx-fifo-size: size of periodic tx fifo per endpoint (except ep0)
-  in gadget mode.
+- g-tx-fifo-size: size of periodic tx fifo per endpoint (except ep0) in gadget mode.
 
 Example:
 
index 24c6f658bce147162d92f2009c1a4d319c490be3..f0a48ea78659c933839554ca879babb1b621b264 100644 (file)
@@ -163,9 +163,11 @@ maxim      Maxim Integrated Products
 meas   Measurement Specialties
 mediatek       MediaTek Inc.
 melexis        Melexis N.V.
+melfas MELFAS Inc.
 merrii Merrii Technology Co., Ltd.
 micrel Micrel Inc.
 microchip      Microchip Technology Inc.
+microcrystal   Micro Crystal AG
 micron Micron Technology Inc.
 minix  MINIX Technology Ltd.
 mitsubishi     Mitsubishi Electric Corporation
index 167070895498893a5585098cfb73d65a666a11e0..ca9d1eb46bc00e38f1f1250775ce43cac241e9a7 100644 (file)
@@ -332,6 +332,10 @@ MEM
 MFD
  devm_mfd_add_devices()
 
+PER-CPU MEM
+  devm_alloc_percpu()
+  devm_free_percpu()
+
 PCI
   pcim_enable_device() : after success, all PCI ops become managed
   pcim_pin_device()    : keep PCI device enabled after release
index 9855ad044386891d017469fd39cf85be13bc70f2..4660bf222db17099e221fc37f2d002ab7b45fd8b 100644 (file)
@@ -22,7 +22,7 @@
     |        m68k: | TODO |
     |       metag: | TODO |
     |  microblaze: | TODO |
-    |        mips: | TODO |
+    |        mips: |  ok  |
     |     mn10300: | TODO |
     |       nios2: | TODO |
     |    openrisc: | TODO |
diff --git a/Documentation/filesystems/.gitignore b/Documentation/filesystems/.gitignore
deleted file mode 100644 (file)
index 31d6e42..0000000
+++ /dev/null
@@ -1 +0,0 @@
-dnotify_test
index 9922939e7d99e3dfbcd51d2dad611f745a9a4f94..f66e748fc5e4eb097c10f4ada57c19fb43ccc0e5 100644 (file)
@@ -2,8 +2,6 @@
        - this file (info on some of the filesystems supported by linux).
 Locking
        - info on locking rules as they pertain to Linux VFS.
-Makefile
-       - Makefile for building the filsystems-part of DocBook.
 9p.txt
        - 9p (v9fs) is an implementation of the Plan 9 remote fs protocol.
 adfs.txt
index 14cdc101d165d94bb6114763989322ac1958848c..1b5f15653b1bb82ca0fc7801fe01fe8c4c75047a 100644 (file)
@@ -447,7 +447,6 @@ prototypes:
        int (*flush) (struct file *);
        int (*release) (struct inode *, struct file *);
        int (*fsync) (struct file *, loff_t start, loff_t end, int datasync);
-       int (*aio_fsync) (struct kiocb *, int datasync);
        int (*fasync) (int, struct file *, int);
        int (*lock) (struct file *, int, struct file_lock *);
        ssize_t (*readv) (struct file *, const struct iovec *, unsigned long,
diff --git a/Documentation/filesystems/Makefile b/Documentation/filesystems/Makefile
deleted file mode 100644 (file)
index 883010c..0000000
+++ /dev/null
@@ -1,5 +0,0 @@
-# List of programs to build
-hostprogs-y := dnotify_test
-
-# Tell kbuild to always build the programs
-always := $(hostprogs-y)
diff --git a/Documentation/filesystems/dnotify_test.c b/Documentation/filesystems/dnotify_test.c
deleted file mode 100644 (file)
index 8b37b4a..0000000
+++ /dev/null
@@ -1,34 +0,0 @@
-#define _GNU_SOURCE    /* needed to get the defines */
-#include <fcntl.h>     /* in glibc 2.2 this has the needed
-                                  values defined */
-#include <signal.h>
-#include <stdio.h>
-#include <unistd.h>
-
-static volatile int event_fd;
-
-static void handler(int sig, siginfo_t *si, void *data)
-{
-       event_fd = si->si_fd;
-}
-
-int main(void)
-{
-       struct sigaction act;
-       int fd;
-
-       act.sa_sigaction = handler;
-       sigemptyset(&act.sa_mask);
-       act.sa_flags = SA_SIGINFO;
-       sigaction(SIGRTMIN + 1, &act, NULL);
-
-       fd = open(".", O_RDONLY);
-       fcntl(fd, F_SETSIG, SIGRTMIN + 1);
-       fcntl(fd, F_NOTIFY, DN_MODIFY|DN_CREATE|DN_MULTISHOT);
-       /* we will now be notified if any of the files
-          in "." is modified or new files are created */
-       while (1) {
-               pause();
-               printf("Got event on fd=%d\n", event_fd);
-       }
-}
index 219ffd41a9117d1f598f97ba2745e22a371dfc29..74329fd0add2237a848a72fb71a46d5e4e98cee4 100644 (file)
@@ -395,32 +395,6 @@ is not associated with a file:
 
  or if empty, the mapping is anonymous.
 
-The /proc/PID/task/TID/maps is a view of the virtual memory from the viewpoint
-of the individual tasks of a process. In this file you will see a mapping marked
-as [stack] if that task sees it as a stack. Hence, for the example above, the
-task-level map, i.e. /proc/PID/task/TID/maps for thread 1001 will look like this:
-
-08048000-08049000 r-xp 00000000 03:00 8312       /opt/test
-08049000-0804a000 rw-p 00001000 03:00 8312       /opt/test
-0804a000-0806b000 rw-p 00000000 00:00 0          [heap]
-a7cb1000-a7cb2000 ---p 00000000 00:00 0
-a7cb2000-a7eb2000 rw-p 00000000 00:00 0
-a7eb2000-a7eb3000 ---p 00000000 00:00 0
-a7eb3000-a7ed5000 rw-p 00000000 00:00 0          [stack]
-a7ed5000-a8008000 r-xp 00000000 03:00 4222       /lib/libc.so.6
-a8008000-a800a000 r--p 00133000 03:00 4222       /lib/libc.so.6
-a800a000-a800b000 rw-p 00135000 03:00 4222       /lib/libc.so.6
-a800b000-a800e000 rw-p 00000000 00:00 0
-a800e000-a8022000 r-xp 00000000 03:00 14462      /lib/libpthread.so.0
-a8022000-a8023000 r--p 00013000 03:00 14462      /lib/libpthread.so.0
-a8023000-a8024000 rw-p 00014000 03:00 14462      /lib/libpthread.so.0
-a8024000-a8027000 rw-p 00000000 00:00 0
-a8027000-a8043000 r-xp 00000000 03:00 8317       /lib/ld-linux.so.2
-a8043000-a8044000 r--p 0001b000 03:00 8317       /lib/ld-linux.so.2
-a8044000-a8045000 rw-p 0001c000 03:00 8317       /lib/ld-linux.so.2
-aff35000-aff4a000 rw-p 00000000 00:00 0
-ffffe000-fffff000 r-xp 00000000 00:00 0          [vdso]
-
 The /proc/PID/smaps is an extension based on maps, showing the memory
 consumption for each of the process's mappings. For each of mappings there
 is a series of lines such as the following:
index d619c8d71966e255474b3bce54f2b277dd1b337d..b5039a00caafae44660514da3829994436a35e84 100644 (file)
@@ -828,7 +828,6 @@ struct file_operations {
        int (*flush) (struct file *, fl_owner_t id);
        int (*release) (struct inode *, struct file *);
        int (*fsync) (struct file *, loff_t, loff_t, int datasync);
-       int (*aio_fsync) (struct kiocb *, int datasync);
        int (*fasync) (int, struct file *, int);
        int (*lock) (struct file *, int, struct file_lock *);
        ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
index 40884c4fe40c5f85c4f64e096b7469141652a8cd..a0f61898d493b720fbc014554b628f5b6e25a93f 100644 (file)
@@ -6,7 +6,7 @@ Note that it only applies to the new descriptor-based interface. For a
 description of the deprecated integer-based GPIO interface please refer to
 gpio-legacy.txt (actually, there is no real mapping possible with the old
 interface; you just fetch an integer from somewhere and request the
-corresponding GPIO.
+corresponding GPIO).
 
 All platforms can enable the GPIO library, but if the platform strictly
 requires GPIO functionality to be present, it needs to select GPIOLIB from its
@@ -162,6 +162,9 @@ The driver controlling "foo.0" will then be able to obtain its GPIOs as follows:
 
 Since the "led" GPIOs are mapped as active-high, this example will switch their
 signals to 1, i.e. enabling the LEDs. And for the "power" GPIO, which is mapped
-as active-low, its actual signal will be 0 after this code. Contrary to the legacy
-integer GPIO interface, the active-low property is handled during mapping and is
-thus transparent to GPIO consumers.
+as active-low, its actual signal will be 0 after this code. Contrary to the
+legacy integer GPIO interface, the active-low property is handled during
+mapping and is thus transparent to GPIO consumers.
+
+A set of functions such as gpiod_set_value() is available to work with
+the new descriptor-oriented interface.
index e0aefeece551b52b5d1208a80123ad7c6668f47c..1a014fede0b72b442dbc04a93a184e1bdf5fe324 100644 (file)
@@ -326,7 +326,7 @@ Two parent-locked sibling muxes
 
 This is a good topology.
 
-                                   .--------.
+                                    .--------.
                    .----------.  .--| dev D1 |
                    |  parent- |--'  '--------'
                 .--|  locked  |     .--------.
@@ -350,7 +350,7 @@ Mux-locked and parent-locked sibling muxes
 
 This is a good topology.
 
-                                   .--------.
+                                    .--------.
                    .----------.  .--| dev D1 |
                    |   mux-   |--'  '--------'
                 .--|  locked  |     .--------.
diff --git a/Documentation/ia64/.gitignore b/Documentation/ia64/.gitignore
deleted file mode 100644 (file)
index ab806ed..0000000
+++ /dev/null
@@ -1 +0,0 @@
-aliasing-test
diff --git a/Documentation/ia64/Makefile b/Documentation/ia64/Makefile
deleted file mode 100644 (file)
index d493163..0000000
+++ /dev/null
@@ -1,5 +0,0 @@
-# List of programs to build
-hostprogs-y := aliasing-test
-
-# Tell kbuild to always build the programs
-always := $(hostprogs-y)
diff --git a/Documentation/ia64/aliasing-test.c b/Documentation/ia64/aliasing-test.c
deleted file mode 100644 (file)
index 62a190d..0000000
+++ /dev/null
@@ -1,263 +0,0 @@
-/*
- * Exercise /dev/mem mmap cases that have been troublesome in the past
- *
- * (c) Copyright 2007 Hewlett-Packard Development Company, L.P.
- *     Bjorn Helgaas <bjorn.helgaas@hp.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <sys/types.h>
-#include <dirent.h>
-#include <fcntl.h>
-#include <fnmatch.h>
-#include <string.h>
-#include <sys/ioctl.h>
-#include <sys/mman.h>
-#include <sys/stat.h>
-#include <unistd.h>
-#include <linux/pci.h>
-
-int sum;
-
-static int map_mem(char *path, off_t offset, size_t length, int touch)
-{
-       int fd, rc;
-       void *addr;
-       int *c;
-
-       fd = open(path, O_RDWR);
-       if (fd == -1) {
-               perror(path);
-               return -1;
-       }
-
-       if (fnmatch("/proc/bus/pci/*", path, 0) == 0) {
-               rc = ioctl(fd, PCIIOC_MMAP_IS_MEM);
-               if (rc == -1)
-                       perror("PCIIOC_MMAP_IS_MEM ioctl");
-       }
-
-       addr = mmap(NULL, length, PROT_READ|PROT_WRITE, MAP_SHARED, fd, offset);
-       if (addr == MAP_FAILED)
-               return 1;
-
-       if (touch) {
-               c = (int *) addr;
-               while (c < (int *) (addr + length))
-                       sum += *c++;
-       }
-
-       rc = munmap(addr, length);
-       if (rc == -1) {
-               perror("munmap");
-               return -1;
-       }
-
-       close(fd);
-       return 0;
-}
-
-static int scan_tree(char *path, char *file, off_t offset, size_t length, int touch)
-{
-       struct dirent **namelist;
-       char *name, *path2;
-       int i, n, r, rc = 0, result = 0;
-       struct stat buf;
-
-       n = scandir(path, &namelist, 0, alphasort);
-       if (n < 0) {
-               perror("scandir");
-               return -1;
-       }
-
-       for (i = 0; i < n; i++) {
-               name = namelist[i]->d_name;
-
-               if (fnmatch(".", name, 0) == 0)
-                       goto skip;
-               if (fnmatch("..", name, 0) == 0)
-                       goto skip;
-
-               path2 = malloc(strlen(path) + strlen(name) + 3);
-               strcpy(path2, path);
-               strcat(path2, "/");
-               strcat(path2, name);
-
-               if (fnmatch(file, name, 0) == 0) {
-                       rc = map_mem(path2, offset, length, touch);
-                       if (rc == 0)
-                               fprintf(stderr, "PASS: %s 0x%lx-0x%lx is %s\n", path2, offset, offset + length, touch ? "readable" : "mappable");
-                       else if (rc > 0)
-                               fprintf(stderr, "PASS: %s 0x%lx-0x%lx not mappable\n", path2, offset, offset + length);
-                       else {
-                               fprintf(stderr, "FAIL: %s 0x%lx-0x%lx not accessible\n", path2, offset, offset + length);
-                               return rc;
-                       }
-               } else {
-                       r = lstat(path2, &buf);
-                       if (r == 0 && S_ISDIR(buf.st_mode)) {
-                               rc = scan_tree(path2, file, offset, length, touch);
-                               if (rc < 0)
-                                       return rc;
-                       }
-               }
-
-               result |= rc;
-               free(path2);
-
-skip:
-               free(namelist[i]);
-       }
-       free(namelist);
-       return result;
-}
-
-char buf[1024];
-
-static int read_rom(char *path)
-{
-       int fd, rc;
-       size_t size = 0;
-
-       fd = open(path, O_RDWR);
-       if (fd == -1) {
-               perror(path);
-               return -1;
-       }
-
-       rc = write(fd, "1", 2);
-       if (rc <= 0) {
-               close(fd);
-               perror("write");
-               return -1;
-       }
-
-       do {
-               rc = read(fd, buf, sizeof(buf));
-               if (rc > 0)
-                       size += rc;
-       } while (rc > 0);
-
-       close(fd);
-       return size;
-}
-
-static int scan_rom(char *path, char *file)
-{
-       struct dirent **namelist;
-       char *name, *path2;
-       int i, n, r, rc = 0, result = 0;
-       struct stat buf;
-
-       n = scandir(path, &namelist, 0, alphasort);
-       if (n < 0) {
-               perror("scandir");
-               return -1;
-       }
-
-       for (i = 0; i < n; i++) {
-               name = namelist[i]->d_name;
-
-               if (fnmatch(".", name, 0) == 0)
-                       goto skip;
-               if (fnmatch("..", name, 0) == 0)
-                       goto skip;
-
-               path2 = malloc(strlen(path) + strlen(name) + 3);
-               strcpy(path2, path);
-               strcat(path2, "/");
-               strcat(path2, name);
-
-               if (fnmatch(file, name, 0) == 0) {
-                       rc = read_rom(path2);
-
-                       /*
-                        * It's OK if the ROM is unreadable.  Maybe there
-                        * is no ROM, or some other error occurred.  The
-                        * important thing is that no MCA happened.
-                        */
-                       if (rc > 0)
-                               fprintf(stderr, "PASS: %s read %d bytes\n", path2, rc);
-                       else {
-                               fprintf(stderr, "PASS: %s not readable\n", path2);
-                               return rc;
-                       }
-               } else {
-                       r = lstat(path2, &buf);
-                       if (r == 0 && S_ISDIR(buf.st_mode)) {
-                               rc = scan_rom(path2, file);
-                               if (rc < 0)
-                                       return rc;
-                       }
-               }
-
-               result |= rc;
-               free(path2);
-
-skip:
-               free(namelist[i]);
-       }
-       free(namelist);
-       return result;
-}
-
-int main(void)
-{
-       int rc;
-
-       if (map_mem("/dev/mem", 0, 0xA0000, 1) == 0)
-               fprintf(stderr, "PASS: /dev/mem 0x0-0xa0000 is readable\n");
-       else
-               fprintf(stderr, "FAIL: /dev/mem 0x0-0xa0000 not accessible\n");
-
-       /*
-        * It's not safe to blindly read the VGA frame buffer.  If you know
-        * how to poke the card the right way, it should respond, but it's
-        * not safe in general.  Many machines, e.g., Intel chipsets, cover
-        * up a non-responding card by just returning -1, but others will
-        * report the failure as a machine check.
-        */
-       if (map_mem("/dev/mem", 0xA0000, 0x20000, 0) == 0)
-               fprintf(stderr, "PASS: /dev/mem 0xa0000-0xc0000 is mappable\n");
-       else
-               fprintf(stderr, "FAIL: /dev/mem 0xa0000-0xc0000 not accessible\n");
-
-       if (map_mem("/dev/mem", 0xC0000, 0x40000, 1) == 0)
-               fprintf(stderr, "PASS: /dev/mem 0xc0000-0x100000 is readable\n");
-       else
-               fprintf(stderr, "FAIL: /dev/mem 0xc0000-0x100000 not accessible\n");
-
-       /*
-        * Often you can map all the individual pieces above (0-0xA0000,
-        * 0xA0000-0xC0000, and 0xC0000-0x100000), but can't map the whole
-        * thing at once.  This is because the individual pieces use different
-        * attributes, and there's no single attribute supported over the
-        * whole region.
-        */
-       rc = map_mem("/dev/mem", 0, 1024*1024, 0);
-       if (rc == 0)
-               fprintf(stderr, "PASS: /dev/mem 0x0-0x100000 is mappable\n");
-       else if (rc > 0)
-               fprintf(stderr, "PASS: /dev/mem 0x0-0x100000 not mappable\n");
-       else
-               fprintf(stderr, "FAIL: /dev/mem 0x0-0x100000 not accessible\n");
-
-       scan_tree("/sys/class/pci_bus", "legacy_mem", 0, 0xA0000, 1);
-       scan_tree("/sys/class/pci_bus", "legacy_mem", 0xA0000, 0x20000, 0);
-       scan_tree("/sys/class/pci_bus", "legacy_mem", 0xC0000, 0x40000, 1);
-       scan_tree("/sys/class/pci_bus", "legacy_mem", 0, 1024*1024, 0);
-
-       scan_rom("/sys/devices", "rom");
-
-       scan_tree("/proc/bus/pci", "??.?", 0, 0xA0000, 1);
-       scan_tree("/proc/bus/pci", "??.?", 0xA0000, 0x20000, 0);
-       scan_tree("/proc/bus/pci", "??.?", 0xC0000, 0x40000, 1);
-       scan_tree("/proc/bus/pci", "??.?", 0, 1024*1024, 0);
-
-       return rc;
-}
index d9ccb94fca951d9ac71a4882cde836750e399aae..c53d089455a427d059aacd28c67e32093b590ce5 100644 (file)
@@ -17,6 +17,7 @@ Contents:
    driver-api/index
    media/index
    gpu/index
+   80211/index
 
 Indices and tables
 ==================
index 1fec1135791d98c987105872c63b5e96589633d3..8d1341ccde6498bebc534bcd7c1ef4560560a96f 100644 (file)
@@ -319,3 +319,60 @@ For touchpad packet, the format is:
                otherwise byte 0 bit 4 must be set and byte 0/4/5 are
                in NEW fmt
  F:         Number of fingers - 3, 0 means 3 fingers, 1 means 4 ...
+
+
+ALPS Absolute Mode - Protocol Version 8
+---------------------------------------
+
+Spoken by SS4 (73 03 14) and SS5 (73 03 28) hardware.
+
+The packet type is given by the APD field, bits 4-5 of byte 3.
+
+Touchpad packet (APD = 0x2):
+
+           b7   b6   b5   b4   b3   b2   b1   b0
+ byte 0:  SWM  SWR  SWL    1    1    0    0   X7
+ byte 1:    0   X6   X5   X4   X3   X2   X1   X0
+ byte 2:    0   Y6   Y5   Y4   Y3   Y2   Y1   Y0
+ byte 3:    0  T&P    1    0    1    0    0   Y7
+ byte 4:    0   Z6   Z5   Z4   Z3   Z2   Z1   Z0
+ byte 5:    0    0    0    0    0    0    0    0
+
+SWM, SWR, SWL: Middle, Right, and Left button states
+
+Touchpad 1 Finger packet (APD = 0x0):
+
+           b7   b6   b5   b4   b3   b2   b1   b0
+ byte 0:  SWM  SWR  SWL    1    1   X2   X1   X0
+ byte 1:   X9   X8   X7    1   X6   X5   X4   X3
+ byte 2:    0  X11  X10  LFB   Y3   Y2   Y1   Y0
+ byte 3:   Y5   Y4    0    0    1 TAPF2 TAPF1 TAPF0
+ byte 4:  Zv7  Y11  Y10    1   Y9   Y8   Y7   Y6
+ byte 5:  Zv6  Zv5  Zv4    0  Zv3  Zv2  Zv1  Zv0
+
+TAPF: ???
+LFB:  ???
+
+Touchpad 2 Finger packet (APD = 0x1):
+
+           b7   b6   b5   b4   b3   b2   b1   b0
+ byte 0:  SWM  SWR  SWL    1    1  AX6  AX5  AX4
+ byte 1: AX11 AX10  AX9  AX8  AX7  AZ1  AY4  AZ0
+ byte 2: AY11 AY10  AY9  CONT AY8  AY7  AY6  AY5
+ byte 3:    0    0    0    1    1  BX6  BX5  BX4
+ byte 4: BX11 BX10  BX9  BX8  BX7  BZ1  BY4  BZ0
+ byte 5: BY11 BY10  BY9    0  BY8  BY7  BY5  BY5
+
+CONT: A 3-or-4 Finger packet is to follow
+
+Touchpad 3-or-4 Finger packet (APD = 0x3):
+
+           b7   b6   b5   b4   b3   b2   b1   b0
+ byte 0:  SWM  SWR  SWL    1    1  AX6  AX5  AX4
+ byte 1: AX11 AX10  AX9  AX8  AX7  AZ1  AY4  AZ0
+ byte 2: AY11 AY10  AY9  OVF  AY8  AY7  AY6  AY5
+ byte 3:    0    0    1    1    1  BX6  BX5  BX4
+ byte 4: BX11 BX10  BX9  BX8  BX7  BZ1  BY4  BZ0
+ byte 5: BY11 BY10  BY9    0  BY8  BY7  BY5  BY5
+
+OVF: 5th finger detected
index 385a5ef41c17b9d0023e39d5f0683b6b76199373..9b9c4797fc55653dec668d82ae8c43ccdab58e4b 100644 (file)
@@ -41,6 +41,7 @@ This document describes the Linux kernel Makefiles.
           --- 6.8 Custom kbuild commands
           --- 6.9 Preprocessing linker scripts
           --- 6.10 Generic header files
+          --- 6.11 Post-link pass
 
        === 7 Kbuild syntax for exported headers
                --- 7.1 header-y
@@ -1237,6 +1238,21 @@ When kbuild executes, the following steps are followed (roughly):
        to list the file in the Kbuild file.
        See "7.4 generic-y" for further info on syntax etc.
 
+--- 6.11 Post-link pass
+
+       If the file arch/xxx/Makefile.postlink exists, this makefile
+       will be invoked for post-link objects (vmlinux and modules.ko)
+       for architectures to run post-link passes on. Must also handle
+       the clean target.
+
+       This pass runs after kallsyms generation. If the architecture
+       needs to modify symbol locations, rather than manipulate the
+       kallsyms, it may be easier to add another postlink target for
+       .tmp_vmlinux? targets to be called from link-vmlinux.sh.
+
+       For example, powerpc uses this to check relocation sanity of
+       the linked vmlinux file.
+
 === 7 Kbuild syntax for exported headers
 
 The kernel includes a set of headers that is exported to userspace.
index 58f3c10417593eca0ec09c7ae1686d44b61b105b..37babf91f2cb6de20e0b1a66843d1636d65c71fb 100644 (file)
@@ -1511,7 +1511,14 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
        i8042.nopnp     [HW] Don't use ACPIPnP / PnPBIOS to discover KBD/AUX
                             controllers
        i8042.notimeout [HW] Ignore timeout condition signalled by controller
-       i8042.reset     [HW] Reset the controller during init and cleanup
+       i8042.reset     [HW] Reset the controller during init, cleanup and
+                            suspend-to-ram transitions, only during s2r
+                            transitions, or never reset
+                       Format: { 1 | Y | y | 0 | N | n }
+                       1, Y, y: always reset controller
+                       0, N, n: don't ever reset controller
+                       Default: only on s2r transitions on x86; most other
+                       architectures force reset to be always executed
        i8042.unlock    [HW] Unlock (ignore) the keylock
        i8042.kbdreset  [HW] Reset device connected to KBD port
 
index 979eacae243d1c3acb63c7e65b6d35e6cb1aba67..54bee77fa728576eebeebe71f3896b097bfaa6b7 100644 (file)
@@ -1,8 +1,9 @@
 Linux Kernel Selftests
 
 The kernel contains a set of "self tests" under the tools/testing/selftests/
-directory. These are intended to be small unit tests to exercise individual
-code paths in the kernel.
+directory. These are intended to be small tests to exercise individual code
+paths in the kernel. Tests are intended to be run after building, installing
+and booting a kernel.
 
 On some systems, hot-plug tests could hang forever waiting for cpu and
 memory to be ready to be offlined. A special hot-plug target is created
diff --git a/Documentation/laptops/.gitignore b/Documentation/laptops/.gitignore
deleted file mode 100644 (file)
index 9fc984e..0000000
+++ /dev/null
@@ -1 +0,0 @@
-dslm
index 7c0ac2a26b9e7f26add74355d988b0a645413323..86169dc766f7b1e8c4af53cee2ff72d0d8c055a9 100644 (file)
@@ -1,13 +1,9 @@
 00-INDEX
        - This file
-Makefile
-       - Makefile for building dslm example program.
 asus-laptop.txt
        - information on the Asus Laptop Extras driver.
 disk-shock-protection.txt
        - information on hard disk shock protection.
-dslm.c
-       - Simple Disk Sleep Monitor program
 laptop-mode.txt
        - how to conserve battery power using laptop-mode.
 sony-laptop.txt
diff --git a/Documentation/laptops/Makefile b/Documentation/laptops/Makefile
deleted file mode 100644 (file)
index 0abe44f..0000000
+++ /dev/null
@@ -1,5 +0,0 @@
-# List of programs to build
-hostprogs-y := dslm
-
-# Tell kbuild to always build the programs
-always := $(hostprogs-y)
diff --git a/Documentation/laptops/dslm.c b/Documentation/laptops/dslm.c
deleted file mode 100644 (file)
index d5dd2d4..0000000
+++ /dev/null
@@ -1,166 +0,0 @@
-/*
- * dslm.c
- * Simple Disk Sleep Monitor
- *  by Bartek Kania
- * Licensed under the GPL
- */
-#include <unistd.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <fcntl.h>
-#include <errno.h>
-#include <time.h>
-#include <string.h>
-#include <signal.h>
-#include <sys/ioctl.h>
-#include <linux/hdreg.h>
-
-#ifdef DEBUG
-#define D(x) x
-#else
-#define D(x)
-#endif
-
-int endit = 0;
-
-/* Check if the disk is in powersave-mode
- * Most of the code is stolen from hdparm.
- * 1 = active, 0 = standby/sleep, -1 = unknown */
-static int check_powermode(int fd)
-{
-    unsigned char args[4] = {WIN_CHECKPOWERMODE1,0,0,0};
-    int state;
-
-    if (ioctl(fd, HDIO_DRIVE_CMD, &args)
-       && (args[0] = WIN_CHECKPOWERMODE2) /* try again with 0x98 */
-       && ioctl(fd, HDIO_DRIVE_CMD, &args)) {
-       if (errno != EIO || args[0] != 0 || args[1] != 0) {
-           state = -1; /* "unknown"; */
-       } else
-           state = 0; /* "sleeping"; */
-    } else {
-       state = (args[2] == 255) ? 1 : 0;
-    }
-    D(printf(" drive state is:  %d\n", state));
-
-    return state;
-}
-
-static char *state_name(int i)
-{
-    if (i == -1) return "unknown";
-    if (i == 0) return "sleeping";
-    if (i == 1) return "active";
-
-    return "internal error";
-}
-
-static char *myctime(time_t time)
-{
-    char *ts = ctime(&time);
-    ts[strlen(ts) - 1] = 0;
-
-    return ts;
-}
-
-static void measure(int fd)
-{
-    time_t start_time;
-    int last_state;
-    time_t last_time;
-    int curr_state;
-    time_t curr_time = 0;
-    time_t time_diff;
-    time_t active_time = 0;
-    time_t sleep_time = 0;
-    time_t unknown_time = 0;
-    time_t total_time = 0;
-    int changes = 0;
-    float tmp;
-
-    printf("Starting measurements\n");
-
-    last_state = check_powermode(fd);
-    start_time = last_time = time(0);
-    printf("  System is in state %s\n\n", state_name(last_state));
-
-    while(!endit) {
-       sleep(1);
-       curr_state = check_powermode(fd);
-
-       if (curr_state != last_state || endit) {
-           changes++;
-           curr_time = time(0);
-           time_diff = curr_time - last_time;
-
-           if (last_state == 1) active_time += time_diff;
-           else if (last_state == 0) sleep_time += time_diff;
-           else unknown_time += time_diff;
-
-           last_state = curr_state;
-           last_time = curr_time;
-
-           printf("%s: State-change to %s\n", myctime(curr_time),
-                  state_name(curr_state));
-       }
-    }
-    changes--; /* Compensate for SIGINT */
-
-    total_time = time(0) - start_time;
-    printf("\nTotal running time:  %lus\n", curr_time - start_time);
-    printf(" State changed %d times\n", changes);
-
-    tmp = (float)sleep_time / (float)total_time * 100;
-    printf(" Time in sleep state:   %lus (%.2f%%)\n", sleep_time, tmp);
-    tmp = (float)active_time / (float)total_time * 100;
-    printf(" Time in active state:  %lus (%.2f%%)\n", active_time, tmp);
-    tmp = (float)unknown_time / (float)total_time * 100;
-    printf(" Time in unknown state: %lus (%.2f%%)\n", unknown_time, tmp);
-}
-
-static void ender(int s)
-{
-    endit = 1;
-}
-
-static void usage(void)
-{
-    puts("usage: dslm [-w <time>] <disk>");
-    exit(0);
-}
-
-int main(int argc, char **argv)
-{
-    int fd;
-    char *disk = 0;
-    int settle_time = 60;
-
-    /* Parse the simple command-line */
-    if (argc == 2)
-       disk = argv[1];
-    else if (argc == 4) {
-       settle_time = atoi(argv[2]);
-       disk = argv[3];
-    } else
-       usage();
-
-    if (!(fd = open(disk, O_RDONLY|O_NONBLOCK))) {
-       printf("Can't open %s, because: %s\n", disk, strerror(errno));
-       exit(-1);
-    }
-
-    if (settle_time) {
-       printf("Waiting %d seconds for the system to settle down to "
-              "'normal'\n", settle_time);
-       sleep(settle_time);
-    } else
-       puts("Not waiting for system to settle down");
-
-    signal(SIGINT, ender);
-
-    measure(fd);
-
-    close(fd);
-
-    return 0;
-}
index 4ebbfc3f1c6ea803b1cb7b5b71c1e98c6d8e3f17..19276f5d195cb75f5cef6a4a554f17e972c5cd55 100644 (file)
@@ -779,4 +779,4 @@ Monitoring tool
 ---------------
 
 Bartek Kania submitted this, it can be used to measure how much time your disk
-spends spun up/down.  See Documentation/laptops/dslm.c
+spends spun up/down.  See tools/laptop/dslm/dslm.c
diff --git a/Documentation/mic/Makefile b/Documentation/mic/Makefile
deleted file mode 100644 (file)
index a191d45..0000000
+++ /dev/null
@@ -1 +0,0 @@
-subdir-y := mpssd
diff --git a/Documentation/mic/mpssd/.gitignore b/Documentation/mic/mpssd/.gitignore
deleted file mode 100644 (file)
index 8b7c72f..0000000
+++ /dev/null
@@ -1 +0,0 @@
-mpssd
diff --git a/Documentation/mic/mpssd/Makefile b/Documentation/mic/mpssd/Makefile
deleted file mode 100644 (file)
index 06871b0..0000000
+++ /dev/null
@@ -1,21 +0,0 @@
-ifndef CROSS_COMPILE
-# List of programs to build
-hostprogs-$(CONFIG_X86_64) := mpssd
-
-mpssd-objs := mpssd.o sysfs.o
-
-# Tell kbuild to always build the programs
-always := $(hostprogs-y)
-
-HOSTCFLAGS += -I$(objtree)/usr/include -I$(srctree)/tools/include
-
-ifdef DEBUG
-HOSTCFLAGS += -DDEBUG=$(DEBUG)
-endif
-
-HOSTLOADLIBES_mpssd := -lpthread
-
-install:
-       install mpssd /usr/sbin/mpssd
-       install micctrl /usr/sbin/micctrl
-endif
diff --git a/Documentation/mic/mpssd/micctrl b/Documentation/mic/mpssd/micctrl
deleted file mode 100755 (executable)
index 8f2629b..0000000
+++ /dev/null
@@ -1,173 +0,0 @@
-#!/bin/bash
-# Intel MIC Platform Software Stack (MPSS)
-#
-# Copyright(c) 2013 Intel Corporation.
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License, version 2, as
-# published by the Free Software Foundation.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
-#
-# The full GNU General Public License is included in this distribution in
-# the file called "COPYING".
-#
-# Intel MIC User Space Tools.
-#
-# micctrl - Controls MIC boot/start/stop.
-#
-# chkconfig: 2345 95 05
-# description: start MPSS stack processing.
-#
-### BEGIN INIT INFO
-# Provides: micctrl
-### END INIT INFO
-
-# Source function library.
-. /etc/init.d/functions
-
-sysfs="/sys/class/mic"
-
-_status()
-{
-       f=$sysfs/$1
-       echo -e $1 state: "`cat $f/state`" shutdown_status: "`cat $f/shutdown_status`"
-}
-
-status()
-{
-       if [ "`echo $1 | head -c3`" == "mic" ]; then
-               _status $1
-               return $?
-       fi
-       for f in $sysfs/*
-       do
-               _status `basename $f`
-               RETVAL=$?
-               [ $RETVAL -ne 0 ] && return $RETVAL
-       done
-       return 0
-}
-
-_reset()
-{
-       f=$sysfs/$1
-       echo reset > $f/state
-}
-
-reset()
-{
-       if [ "`echo $1 | head -c3`" == "mic" ]; then
-               _reset $1
-               return $?
-       fi
-       for f in $sysfs/*
-       do
-               _reset `basename $f`
-               RETVAL=$?
-               [ $RETVAL -ne 0 ] && return $RETVAL
-       done
-       return 0
-}
-
-_boot()
-{
-       f=$sysfs/$1
-       echo "linux" > $f/bootmode
-       echo "mic/uos.img" > $f/firmware
-       echo "mic/$1.image" > $f/ramdisk
-       echo "boot" > $f/state
-}
-
-boot()
-{
-       if [ "`echo $1 | head -c3`" == "mic" ]; then
-               _boot $1
-               return $?
-       fi
-       for f in $sysfs/*
-       do
-               _boot `basename $f`
-               RETVAL=$?
-               [ $RETVAL -ne 0 ] && return $RETVAL
-       done
-       return 0
-}
-
-_shutdown()
-{
-       f=$sysfs/$1
-       echo shutdown > $f/state
-}
-
-shutdown()
-{
-       if [ "`echo $1 | head -c3`" == "mic" ]; then
-               _shutdown $1
-               return $?
-       fi
-       for f in $sysfs/*
-       do
-               _shutdown `basename $f`
-               RETVAL=$?
-               [ $RETVAL -ne 0 ] && return $RETVAL
-       done
-       return 0
-}
-
-_wait()
-{
-       f=$sysfs/$1
-       while [ "`cat $f/state`" != "offline" -a "`cat $f/state`" != "online" ]
-       do
-               sleep 1
-               echo -e "Waiting for $1 to go offline"
-       done
-}
-
-wait()
-{
-       if [ "`echo $1 | head -c3`" == "mic" ]; then
-               _wait $1
-               return $?
-       fi
-       # Wait for the cards to go offline
-       for f in $sysfs/*
-       do
-               _wait `basename $f`
-               RETVAL=$?
-               [ $RETVAL -ne 0 ] && return $RETVAL
-       done
-       return 0
-}
-
-if [ ! -d "$sysfs" ]; then
-       echo -e $"Module unloaded "
-       exit 3
-fi
-
-case $1 in
-       -s)
-               status $2
-               ;;
-       -r)
-               reset $2
-               ;;
-       -b)
-               boot $2
-               ;;
-       -S)
-               shutdown $2
-               ;;
-       -w)
-               wait $2
-               ;;
-       *)
-               echo $"Usage: $0 {-s (status) |-r (reset) |-b (boot) |-S (shutdown) |-w (wait)}"
-               exit 2
-esac
-
-exit $?
diff --git a/Documentation/mic/mpssd/mpss b/Documentation/mic/mpssd/mpss
deleted file mode 100755 (executable)
index 5fcf9fa..0000000
+++ /dev/null
@@ -1,200 +0,0 @@
-#!/bin/bash
-# Intel MIC Platform Software Stack (MPSS)
-#
-# Copyright(c) 2013 Intel Corporation.
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License, version 2, as
-# published by the Free Software Foundation.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
-#
-# The full GNU General Public License is included in this distribution in
-# the file called "COPYING".
-#
-# Intel MIC User Space Tools.
-#
-# mpss Start mpssd.
-#
-# chkconfig: 2345 95 05
-# description: start MPSS stack processing.
-#
-### BEGIN INIT INFO
-# Provides: mpss
-# Required-Start:
-# Required-Stop:
-# Short-Description: MPSS stack control
-# Description: MPSS stack control
-### END INIT INFO
-
-# Source function library.
-. /etc/init.d/functions
-
-exec=/usr/sbin/mpssd
-sysfs="/sys/class/mic"
-mic_modules="mic_host mic_x100_dma scif vop"
-
-start()
-{
-       [ -x $exec ] || exit 5
-
-       if [ "`ps -e | awk '{print $4}' | grep mpssd | head -1`" = "mpssd" ]; then
-               echo -e $"MPSSD already running! "
-               success
-               echo
-               return 0
-       fi
-
-       echo -e $"Starting MPSS Stack"
-       echo -e $"Loading MIC drivers:" $mic_modules
-
-       modprobe -a $mic_modules
-       RETVAL=$?
-       if [ $RETVAL -ne 0 ]; then
-               failure
-               echo
-               return $RETVAL
-       fi
-
-       # Start the daemon
-       echo -n $"Starting MPSSD "
-       $exec
-       RETVAL=$?
-       if [ $RETVAL -ne 0 ]; then
-               failure
-               echo
-               return $RETVAL
-       fi
-       success
-       echo
-
-       sleep 5
-
-       # Boot the cards
-       micctrl -b
-
-       # Wait till ping works
-       for f in $sysfs/*
-       do
-               count=100
-               ipaddr=`cat $f/cmdline`
-               ipaddr=${ipaddr#*address,}
-               ipaddr=`echo $ipaddr | cut -d, -f1 | cut -d\; -f1`
-               while [ $count -ge 0 ]
-               do
-                       echo -e "Pinging "`basename $f`" "
-                       ping -c 1 $ipaddr &> /dev/null
-                       RETVAL=$?
-                       if [ $RETVAL -eq 0 ]; then
-                               success
-                               break
-                       fi
-                       sleep 1
-                       count=`expr $count - 1`
-               done
-               [ $RETVAL -ne 0 ] && failure || success
-               echo
-       done
-       return $RETVAL
-}
-
-stop()
-{
-       echo -e $"Shutting down MPSS Stack: "
-
-       # Bail out if module is unloaded
-       if [ ! -d "$sysfs" ]; then
-               echo -n $"Module unloaded "
-               success
-               echo
-               return 0
-       fi
-
-       # Shut down the cards.
-       micctrl -S
-
-       # Wait for the cards to go offline
-       for f in $sysfs/*
-       do
-               while [ "`cat $f/state`" != "ready" ]
-               do
-                       sleep 1
-                       echo -e "Waiting for "`basename $f`" to become ready"
-               done
-       done
-
-       # Display the status of the cards
-       micctrl -s
-
-       # Kill MPSSD now
-       echo -n $"Killing MPSSD"
-       killall -9 mpssd 2>/dev/null
-       RETVAL=$?
-       [ $RETVAL -ne 0 ] && failure || success
-       echo
-       return $RETVAL
-}
-
-restart()
-{
-       stop
-       sleep 5
-       start
-}
-
-status()
-{
-       micctrl -s
-       if [ "`ps -e | awk '{print $4}' | grep mpssd | head -n 1`" = "mpssd" ]; then
-               echo "mpssd is running"
-       else
-               echo "mpssd is stopped"
-       fi
-       return 0
-}
-
-unload()
-{
-       if [ ! -d "$sysfs" ]; then
-               echo -n $"No MIC_HOST Module: "
-               success
-               echo
-               return
-       fi
-
-       stop
-
-       sleep 5
-       echo -n $"Removing MIC drivers:" $mic_modules
-       modprobe -r $mic_modules
-       RETVAL=$?
-       [ $RETVAL -ne 0 ] && failure || success
-       echo
-       return $RETVAL
-}
-
-case $1 in
-       start)
-               start
-               ;;
-       stop)
-               stop
-               ;;
-       restart)
-               restart
-               ;;
-       status)
-               status
-               ;;
-       unload)
-               unload
-               ;;
-       *)
-               echo $"Usage: $0 {start|stop|restart|status|unload}"
-               exit 2
-esac
-
-exit $?
diff --git a/Documentation/mic/mpssd/mpssd.c b/Documentation/mic/mpssd/mpssd.c
deleted file mode 100644 (file)
index 49db1de..0000000
+++ /dev/null
@@ -1,1826 +0,0 @@
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2013 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Intel MIC User Space Tools.
- */
-
-#define _GNU_SOURCE
-
-#include <stdlib.h>
-#include <fcntl.h>
-#include <getopt.h>
-#include <assert.h>
-#include <unistd.h>
-#include <stdbool.h>
-#include <signal.h>
-#include <poll.h>
-#include <features.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/mman.h>
-#include <sys/socket.h>
-#include <linux/virtio_ring.h>
-#include <linux/virtio_net.h>
-#include <linux/virtio_console.h>
-#include <linux/virtio_blk.h>
-#include <linux/version.h>
-#include "mpssd.h"
-#include <linux/mic_ioctl.h>
-#include <linux/mic_common.h>
-#include <tools/endian.h>
-
-static void *init_mic(void *arg);
-
-static FILE *logfp;
-static struct mic_info mic_list;
-
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
-
-#define min_t(type, x, y) ({                           \
-               type __min1 = (x);                      \
-               type __min2 = (y);                      \
-               __min1 < __min2 ? __min1 : __min2; })
-
-/* align addr on a size boundary - adjust address up/down if needed */
-#define _ALIGN_DOWN(addr, size)  ((addr)&(~((size)-1)))
-#define _ALIGN_UP(addr, size)    _ALIGN_DOWN(addr + size - 1, size)
-
-/* align addr on a size boundary - adjust address up if needed */
-#define _ALIGN(addr, size)     _ALIGN_UP(addr, size)
-
-/* to align the pointer to the (next) page boundary */
-#define PAGE_ALIGN(addr)        _ALIGN(addr, PAGE_SIZE)
-
-#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
-
-#define GSO_ENABLED            1
-#define MAX_GSO_SIZE           (64 * 1024)
-#define ETH_H_LEN              14
-#define MAX_NET_PKT_SIZE       (_ALIGN_UP(MAX_GSO_SIZE + ETH_H_LEN, 64))
-#define MIC_DEVICE_PAGE_END    0x1000
-
-#ifndef VIRTIO_NET_HDR_F_DATA_VALID
-#define VIRTIO_NET_HDR_F_DATA_VALID    2       /* Csum is valid */
-#endif
-
-static struct {
-       struct mic_device_desc dd;
-       struct mic_vqconfig vqconfig[2];
-       __u32 host_features, guest_acknowledgements;
-       struct virtio_console_config cons_config;
-} virtcons_dev_page = {
-       .dd = {
-               .type = VIRTIO_ID_CONSOLE,
-               .num_vq = ARRAY_SIZE(virtcons_dev_page.vqconfig),
-               .feature_len = sizeof(virtcons_dev_page.host_features),
-               .config_len = sizeof(virtcons_dev_page.cons_config),
-       },
-       .vqconfig[0] = {
-               .num = htole16(MIC_VRING_ENTRIES),
-       },
-       .vqconfig[1] = {
-               .num = htole16(MIC_VRING_ENTRIES),
-       },
-};
-
-static struct {
-       struct mic_device_desc dd;
-       struct mic_vqconfig vqconfig[2];
-       __u32 host_features, guest_acknowledgements;
-       struct virtio_net_config net_config;
-} virtnet_dev_page = {
-       .dd = {
-               .type = VIRTIO_ID_NET,
-               .num_vq = ARRAY_SIZE(virtnet_dev_page.vqconfig),
-               .feature_len = sizeof(virtnet_dev_page.host_features),
-               .config_len = sizeof(virtnet_dev_page.net_config),
-       },
-       .vqconfig[0] = {
-               .num = htole16(MIC_VRING_ENTRIES),
-       },
-       .vqconfig[1] = {
-               .num = htole16(MIC_VRING_ENTRIES),
-       },
-#if GSO_ENABLED
-       .host_features = htole32(
-               1 << VIRTIO_NET_F_CSUM |
-               1 << VIRTIO_NET_F_GSO |
-               1 << VIRTIO_NET_F_GUEST_TSO4 |
-               1 << VIRTIO_NET_F_GUEST_TSO6 |
-               1 << VIRTIO_NET_F_GUEST_ECN),
-#else
-               .host_features = 0,
-#endif
-};
-
-static const char *mic_config_dir = "/etc/mpss";
-static const char *virtblk_backend = "VIRTBLK_BACKEND";
-static struct {
-       struct mic_device_desc dd;
-       struct mic_vqconfig vqconfig[1];
-       __u32 host_features, guest_acknowledgements;
-       struct virtio_blk_config blk_config;
-} virtblk_dev_page = {
-       .dd = {
-               .type = VIRTIO_ID_BLOCK,
-               .num_vq = ARRAY_SIZE(virtblk_dev_page.vqconfig),
-               .feature_len = sizeof(virtblk_dev_page.host_features),
-               .config_len = sizeof(virtblk_dev_page.blk_config),
-       },
-       .vqconfig[0] = {
-               .num = htole16(MIC_VRING_ENTRIES),
-       },
-       .host_features =
-               htole32(1<<VIRTIO_BLK_F_SEG_MAX),
-       .blk_config = {
-               .seg_max = htole32(MIC_VRING_ENTRIES - 2),
-               .capacity = htole64(0),
-        }
-};
-
-static char *myname;
-
-static int
-tap_configure(struct mic_info *mic, char *dev)
-{
-       pid_t pid;
-       char *ifargv[7];
-       char ipaddr[IFNAMSIZ];
-       int ret = 0;
-
-       pid = fork();
-       if (pid == 0) {
-               ifargv[0] = "ip";
-               ifargv[1] = "link";
-               ifargv[2] = "set";
-               ifargv[3] = dev;
-               ifargv[4] = "up";
-               ifargv[5] = NULL;
-               mpsslog("Configuring %s\n", dev);
-               ret = execvp("ip", ifargv);
-               if (ret < 0) {
-                       mpsslog("%s execvp failed errno %s\n",
-                               mic->name, strerror(errno));
-                       return ret;
-               }
-       }
-       if (pid < 0) {
-               mpsslog("%s fork failed errno %s\n",
-                       mic->name, strerror(errno));
-               return ret;
-       }
-
-       ret = waitpid(pid, NULL, 0);
-       if (ret < 0) {
-               mpsslog("%s waitpid failed errno %s\n",
-                       mic->name, strerror(errno));
-               return ret;
-       }
-
-       snprintf(ipaddr, IFNAMSIZ, "172.31.%d.254/24", mic->id + 1);
-
-       pid = fork();
-       if (pid == 0) {
-               ifargv[0] = "ip";
-               ifargv[1] = "addr";
-               ifargv[2] = "add";
-               ifargv[3] = ipaddr;
-               ifargv[4] = "dev";
-               ifargv[5] = dev;
-               ifargv[6] = NULL;
-               mpsslog("Configuring %s ipaddr %s\n", dev, ipaddr);
-               ret = execvp("ip", ifargv);
-               if (ret < 0) {
-                       mpsslog("%s execvp failed errno %s\n",
-                               mic->name, strerror(errno));
-                       return ret;
-               }
-       }
-       if (pid < 0) {
-               mpsslog("%s fork failed errno %s\n",
-                       mic->name, strerror(errno));
-               return ret;
-       }
-
-       ret = waitpid(pid, NULL, 0);
-       if (ret < 0) {
-               mpsslog("%s waitpid failed errno %s\n",
-                       mic->name, strerror(errno));
-               return ret;
-       }
-       mpsslog("MIC name %s %s %d DONE!\n",
-               mic->name, __func__, __LINE__);
-       return 0;
-}
-
-static int tun_alloc(struct mic_info *mic, char *dev)
-{
-       struct ifreq ifr;
-       int fd, err;
-#if GSO_ENABLED
-       unsigned offload;
-#endif
-       fd = open("/dev/net/tun", O_RDWR);
-       if (fd < 0) {
-               mpsslog("Could not open /dev/net/tun %s\n", strerror(errno));
-               goto done;
-       }
-
-       memset(&ifr, 0, sizeof(ifr));
-
-       ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
-       if (*dev)
-               strncpy(ifr.ifr_name, dev, IFNAMSIZ);
-
-       err = ioctl(fd, TUNSETIFF, (void *)&ifr);
-       if (err < 0) {
-               mpsslog("%s %s %d TUNSETIFF failed %s\n",
-                       mic->name, __func__, __LINE__, strerror(errno));
-               close(fd);
-               return err;
-       }
-#if GSO_ENABLED
-       offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_TSO_ECN;
-
-       err = ioctl(fd, TUNSETOFFLOAD, offload);
-       if (err < 0) {
-               mpsslog("%s %s %d TUNSETOFFLOAD failed %s\n",
-                       mic->name, __func__, __LINE__, strerror(errno));
-               close(fd);
-               return err;
-       }
-#endif
-       strcpy(dev, ifr.ifr_name);
-       mpsslog("Created TAP %s\n", dev);
-done:
-       return fd;
-}
-
-#define NET_FD_VIRTIO_NET 0
-#define NET_FD_TUN 1
-#define MAX_NET_FD 2
-
-static void set_dp(struct mic_info *mic, int type, void *dp)
-{
-       switch (type) {
-       case VIRTIO_ID_CONSOLE:
-               mic->mic_console.console_dp = dp;
-               return;
-       case VIRTIO_ID_NET:
-               mic->mic_net.net_dp = dp;
-               return;
-       case VIRTIO_ID_BLOCK:
-               mic->mic_virtblk.block_dp = dp;
-               return;
-       }
-       mpsslog("%s %s %d not found\n", mic->name, __func__, type);
-       assert(0);
-}
-
-static void *get_dp(struct mic_info *mic, int type)
-{
-       switch (type) {
-       case VIRTIO_ID_CONSOLE:
-               return mic->mic_console.console_dp;
-       case VIRTIO_ID_NET:
-               return mic->mic_net.net_dp;
-       case VIRTIO_ID_BLOCK:
-               return mic->mic_virtblk.block_dp;
-       }
-       mpsslog("%s %s %d not found\n", mic->name, __func__, type);
-       assert(0);
-       return NULL;
-}
-
-static struct mic_device_desc *get_device_desc(struct mic_info *mic, int type)
-{
-       struct mic_device_desc *d;
-       int i;
-       void *dp = get_dp(mic, type);
-
-       for (i = sizeof(struct mic_bootparam); i < PAGE_SIZE;
-               i += mic_total_desc_size(d)) {
-               d = dp + i;
-
-               /* End of list */
-               if (d->type == 0)
-                       break;
-
-               if (d->type == -1)
-                       continue;
-
-               mpsslog("%s %s d-> type %d d %p\n",
-                       mic->name, __func__, d->type, d);
-
-               if (d->type == (__u8)type)
-                       return d;
-       }
-       mpsslog("%s %s %d not found\n", mic->name, __func__, type);
-       return NULL;
-}
-
-/* See comments in vhost.c for explanation of next_desc() */
-static unsigned next_desc(struct vring_desc *desc)
-{
-       unsigned int next;
-
-       if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT))
-               return -1U;
-       next = le16toh(desc->next);
-       return next;
-}
-
-/* Sum up all the IOVEC length */
-static ssize_t
-sum_iovec_len(struct mic_copy_desc *copy)
-{
-       ssize_t sum = 0;
-       unsigned int i;
-
-       for (i = 0; i < copy->iovcnt; i++)
-               sum += copy->iov[i].iov_len;
-       return sum;
-}
-
-static inline void verify_out_len(struct mic_info *mic,
-       struct mic_copy_desc *copy)
-{
-       if (copy->out_len != sum_iovec_len(copy)) {
-               mpsslog("%s %s %d BUG copy->out_len 0x%x len 0x%zx\n",
-                       mic->name, __func__, __LINE__,
-                       copy->out_len, sum_iovec_len(copy));
-               assert(copy->out_len == sum_iovec_len(copy));
-       }
-}
-
-/* Display an iovec */
-static void
-disp_iovec(struct mic_info *mic, struct mic_copy_desc *copy,
-          const char *s, int line)
-{
-       unsigned int i;
-
-       for (i = 0; i < copy->iovcnt; i++)
-               mpsslog("%s %s %d copy->iov[%d] addr %p len 0x%zx\n",
-                       mic->name, s, line, i,
-                       copy->iov[i].iov_base, copy->iov[i].iov_len);
-}
-
-static inline __u16 read_avail_idx(struct mic_vring *vr)
-{
-       return ACCESS_ONCE(vr->info->avail_idx);
-}
-
-static inline void txrx_prepare(int type, bool tx, struct mic_vring *vr,
-                               struct mic_copy_desc *copy, ssize_t len)
-{
-       copy->vr_idx = tx ? 0 : 1;
-       copy->update_used = true;
-       if (type == VIRTIO_ID_NET)
-               copy->iov[1].iov_len = len - sizeof(struct virtio_net_hdr);
-       else
-               copy->iov[0].iov_len = len;
-}
-
-/* Central API which triggers the copies */
-static int
-mic_virtio_copy(struct mic_info *mic, int fd,
-               struct mic_vring *vr, struct mic_copy_desc *copy)
-{
-       int ret;
-
-       ret = ioctl(fd, MIC_VIRTIO_COPY_DESC, copy);
-       if (ret) {
-               mpsslog("%s %s %d errno %s ret %d\n",
-                       mic->name, __func__, __LINE__,
-                       strerror(errno), ret);
-       }
-       return ret;
-}
-
-static inline unsigned _vring_size(unsigned int num, unsigned long align)
-{
-       return ((sizeof(struct vring_desc) * num + sizeof(__u16) * (3 + num)
-                               + align - 1) & ~(align - 1))
-               + sizeof(__u16) * 3 + sizeof(struct vring_used_elem) * num;
-}
-
-/*
- * This initialization routine requires at least one
- * vring i.e. vr0. vr1 is optional.
- */
-static void *
-init_vr(struct mic_info *mic, int fd, int type,
-       struct mic_vring *vr0, struct mic_vring *vr1, int num_vq)
-{
-       int vr_size;
-       char *va;
-
-       vr_size = PAGE_ALIGN(_vring_size(MIC_VRING_ENTRIES,
-                                        MIC_VIRTIO_RING_ALIGN) +
-                            sizeof(struct _mic_vring_info));
-       va = mmap(NULL, MIC_DEVICE_PAGE_END + vr_size * num_vq,
-               PROT_READ, MAP_SHARED, fd, 0);
-       if (MAP_FAILED == va) {
-               mpsslog("%s %s %d mmap failed errno %s\n",
-                       mic->name, __func__, __LINE__,
-                       strerror(errno));
-               goto done;
-       }
-       set_dp(mic, type, va);
-       vr0->va = (struct mic_vring *)&va[MIC_DEVICE_PAGE_END];
-       vr0->info = vr0->va +
-               _vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN);
-       vring_init(&vr0->vr,
-                  MIC_VRING_ENTRIES, vr0->va, MIC_VIRTIO_RING_ALIGN);
-       mpsslog("%s %s vr0 %p vr0->info %p vr_size 0x%x vring 0x%x ",
-               __func__, mic->name, vr0->va, vr0->info, vr_size,
-               _vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN));
-       mpsslog("magic 0x%x expected 0x%x\n",
-               le32toh(vr0->info->magic), MIC_MAGIC + type);
-       assert(le32toh(vr0->info->magic) == MIC_MAGIC + type);
-       if (vr1) {
-               vr1->va = (struct mic_vring *)
-                       &va[MIC_DEVICE_PAGE_END + vr_size];
-               vr1->info = vr1->va + _vring_size(MIC_VRING_ENTRIES,
-                       MIC_VIRTIO_RING_ALIGN);
-               vring_init(&vr1->vr,
-                          MIC_VRING_ENTRIES, vr1->va, MIC_VIRTIO_RING_ALIGN);
-               mpsslog("%s %s vr1 %p vr1->info %p vr_size 0x%x vring 0x%x ",
-                       __func__, mic->name, vr1->va, vr1->info, vr_size,
-                       _vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN));
-               mpsslog("magic 0x%x expected 0x%x\n",
-                       le32toh(vr1->info->magic), MIC_MAGIC + type + 1);
-               assert(le32toh(vr1->info->magic) == MIC_MAGIC + type + 1);
-       }
-done:
-       return va;
-}
-
-static int
-wait_for_card_driver(struct mic_info *mic, int fd, int type)
-{
-       struct pollfd pollfd;
-       int err;
-       struct mic_device_desc *desc = get_device_desc(mic, type);
-       __u8 prev_status;
-
-       if (!desc)
-               return -ENODEV;
-       prev_status = desc->status;
-       pollfd.fd = fd;
-       mpsslog("%s %s Waiting .... desc-> type %d status 0x%x\n",
-               mic->name, __func__, type, desc->status);
-
-       while (1) {
-               pollfd.events = POLLIN;
-               pollfd.revents = 0;
-               err = poll(&pollfd, 1, -1);
-               if (err < 0) {
-                       mpsslog("%s %s poll failed %s\n",
-                               mic->name, __func__, strerror(errno));
-                       continue;
-               }
-
-               if (pollfd.revents) {
-                       if (desc->status != prev_status) {
-                               mpsslog("%s %s Waiting... desc-> type %d "
-                                       "status 0x%x\n",
-                                       mic->name, __func__, type,
-                                       desc->status);
-                               prev_status = desc->status;
-                       }
-                       if (desc->status & VIRTIO_CONFIG_S_DRIVER_OK) {
-                               mpsslog("%s %s poll.revents %d\n",
-                                       mic->name, __func__, pollfd.revents);
-                               mpsslog("%s %s desc-> type %d status 0x%x\n",
-                                       mic->name, __func__, type,
-                                       desc->status);
-                               break;
-                       }
-               }
-       }
-       return 0;
-}
-
-/* Spin till we have some descriptors */
-static void
-spin_for_descriptors(struct mic_info *mic, struct mic_vring *vr)
-{
-       __u16 avail_idx = read_avail_idx(vr);
-
-       while (avail_idx == le16toh(ACCESS_ONCE(vr->vr.avail->idx))) {
-#ifdef DEBUG
-               mpsslog("%s %s waiting for desc avail %d info_avail %d\n",
-                       mic->name, __func__,
-                       le16toh(vr->vr.avail->idx), vr->info->avail_idx);
-#endif
-               sched_yield();
-       }
-}
-
-static void *
-virtio_net(void *arg)
-{
-       static __u8 vnet_hdr[2][sizeof(struct virtio_net_hdr)];
-       static __u8 vnet_buf[2][MAX_NET_PKT_SIZE] __attribute__ ((aligned(64)));
-       struct iovec vnet_iov[2][2] = {
-               { { .iov_base = vnet_hdr[0], .iov_len = sizeof(vnet_hdr[0]) },
-                 { .iov_base = vnet_buf[0], .iov_len = sizeof(vnet_buf[0]) } },
-               { { .iov_base = vnet_hdr[1], .iov_len = sizeof(vnet_hdr[1]) },
-                 { .iov_base = vnet_buf[1], .iov_len = sizeof(vnet_buf[1]) } },
-       };
-       struct iovec *iov0 = vnet_iov[0], *iov1 = vnet_iov[1];
-       struct mic_info *mic = (struct mic_info *)arg;
-       char if_name[IFNAMSIZ];
-       struct pollfd net_poll[MAX_NET_FD];
-       struct mic_vring tx_vr, rx_vr;
-       struct mic_copy_desc copy;
-       struct mic_device_desc *desc;
-       int err;
-
-       snprintf(if_name, IFNAMSIZ, "mic%d", mic->id);
-       mic->mic_net.tap_fd = tun_alloc(mic, if_name);
-       if (mic->mic_net.tap_fd < 0)
-               goto done;
-
-       if (tap_configure(mic, if_name))
-               goto done;
-       mpsslog("MIC name %s id %d\n", mic->name, mic->id);
-
-       net_poll[NET_FD_VIRTIO_NET].fd = mic->mic_net.virtio_net_fd;
-       net_poll[NET_FD_VIRTIO_NET].events = POLLIN;
-       net_poll[NET_FD_TUN].fd = mic->mic_net.tap_fd;
-       net_poll[NET_FD_TUN].events = POLLIN;
-
-       if (MAP_FAILED == init_vr(mic, mic->mic_net.virtio_net_fd,
-                                 VIRTIO_ID_NET, &tx_vr, &rx_vr,
-               virtnet_dev_page.dd.num_vq)) {
-               mpsslog("%s init_vr failed %s\n",
-                       mic->name, strerror(errno));
-               goto done;
-       }
-
-       copy.iovcnt = 2;
-       desc = get_device_desc(mic, VIRTIO_ID_NET);
-
-       while (1) {
-               ssize_t len;
-
-               net_poll[NET_FD_VIRTIO_NET].revents = 0;
-               net_poll[NET_FD_TUN].revents = 0;
-
-               /* Start polling for data from tap and virtio net */
-               err = poll(net_poll, 2, -1);
-               if (err < 0) {
-                       mpsslog("%s poll failed %s\n",
-                               __func__, strerror(errno));
-                       continue;
-               }
-               if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
-                       err = wait_for_card_driver(mic,
-                                                  mic->mic_net.virtio_net_fd,
-                                                  VIRTIO_ID_NET);
-                       if (err) {
-                               mpsslog("%s %s %d Exiting...\n",
-                                       mic->name, __func__, __LINE__);
-                               break;
-                       }
-               }
-               /*
-                * Check if there is data to be read from TUN and write to
-                * virtio net fd if there is.
-                */
-               if (net_poll[NET_FD_TUN].revents & POLLIN) {
-                       copy.iov = iov0;
-                       len = readv(net_poll[NET_FD_TUN].fd,
-                               copy.iov, copy.iovcnt);
-                       if (len > 0) {
-                               struct virtio_net_hdr *hdr
-                                       = (struct virtio_net_hdr *)vnet_hdr[0];
-
-                               /* Disable checksums on the card since we are on
-                                  a reliable PCIe link */
-                               hdr->flags |= VIRTIO_NET_HDR_F_DATA_VALID;
-#ifdef DEBUG
-                               mpsslog("%s %s %d hdr->flags 0x%x ", mic->name,
-                                       __func__, __LINE__, hdr->flags);
-                               mpsslog("copy.out_len %d hdr->gso_type 0x%x\n",
-                                       copy.out_len, hdr->gso_type);
-#endif
-#ifdef DEBUG
-                               disp_iovec(mic, copy, __func__, __LINE__);
-                               mpsslog("%s %s %d read from tap 0x%lx\n",
-                                       mic->name, __func__, __LINE__,
-                                       len);
-#endif
-                               spin_for_descriptors(mic, &tx_vr);
-                               txrx_prepare(VIRTIO_ID_NET, 1, &tx_vr, &copy,
-                                            len);
-
-                               err = mic_virtio_copy(mic,
-                                       mic->mic_net.virtio_net_fd, &tx_vr,
-                                       &copy);
-                               if (err < 0) {
-                                       mpsslog("%s %s %d mic_virtio_copy %s\n",
-                                               mic->name, __func__, __LINE__,
-                                               strerror(errno));
-                               }
-                               if (!err)
-                                       verify_out_len(mic, &copy);
-#ifdef DEBUG
-                               disp_iovec(mic, copy, __func__, __LINE__);
-                               mpsslog("%s %s %d wrote to net 0x%lx\n",
-                                       mic->name, __func__, __LINE__,
-                                       sum_iovec_len(&copy));
-#endif
-                               /* Reinitialize IOV for next run */
-                               iov0[1].iov_len = MAX_NET_PKT_SIZE;
-                       } else if (len < 0) {
-                               disp_iovec(mic, &copy, __func__, __LINE__);
-                               mpsslog("%s %s %d read failed %s ", mic->name,
-                                       __func__, __LINE__, strerror(errno));
-                               mpsslog("cnt %d sum %zd\n",
-                                       copy.iovcnt, sum_iovec_len(&copy));
-                       }
-               }
-
-               /*
-                * Check if there is data to be read from virtio net and
-                * write to TUN if there is.
-                */
-               if (net_poll[NET_FD_VIRTIO_NET].revents & POLLIN) {
-                       while (rx_vr.info->avail_idx !=
-                               le16toh(rx_vr.vr.avail->idx)) {
-                               copy.iov = iov1;
-                               txrx_prepare(VIRTIO_ID_NET, 0, &rx_vr, &copy,
-                                            MAX_NET_PKT_SIZE
-                                       + sizeof(struct virtio_net_hdr));
-
-                               err = mic_virtio_copy(mic,
-                                       mic->mic_net.virtio_net_fd, &rx_vr,
-                                       &copy);
-                               if (!err) {
-#ifdef DEBUG
-                                       struct virtio_net_hdr *hdr
-                                               = (struct virtio_net_hdr *)
-                                                       vnet_hdr[1];
-
-                                       mpsslog("%s %s %d hdr->flags 0x%x, ",
-                                               mic->name, __func__, __LINE__,
-                                               hdr->flags);
-                                       mpsslog("out_len %d gso_type 0x%x\n",
-                                               copy.out_len,
-                                               hdr->gso_type);
-#endif
-                                       /* Set the correct output iov_len */
-                                       iov1[1].iov_len = copy.out_len -
-                                               sizeof(struct virtio_net_hdr);
-                                       verify_out_len(mic, &copy);
-#ifdef DEBUG
-                                       disp_iovec(mic, copy, __func__,
-                                                  __LINE__);
-                                       mpsslog("%s %s %d ",
-                                               mic->name, __func__, __LINE__);
-                                       mpsslog("read from net 0x%lx\n",
-                                               sum_iovec_len(copy));
-#endif
-                                       len = writev(net_poll[NET_FD_TUN].fd,
-                                               copy.iov, copy.iovcnt);
-                                       if (len != sum_iovec_len(&copy)) {
-                                               mpsslog("Tun write failed %s ",
-                                                       strerror(errno));
-                                               mpsslog("len 0x%zx ", len);
-                                               mpsslog("read_len 0x%zx\n",
-                                                       sum_iovec_len(&copy));
-                                       } else {
-#ifdef DEBUG
-                                               disp_iovec(mic, &copy, __func__,
-                                                          __LINE__);
-                                               mpsslog("%s %s %d ",
-                                                       mic->name, __func__,
-                                                       __LINE__);
-                                               mpsslog("wrote to tap 0x%lx\n",
-                                                       len);
-#endif
-                                       }
-                               } else {
-                                       mpsslog("%s %s %d mic_virtio_copy %s\n",
-                                               mic->name, __func__, __LINE__,
-                                               strerror(errno));
-                                       break;
-                               }
-                       }
-               }
-               if (net_poll[NET_FD_VIRTIO_NET].revents & POLLERR)
-                       mpsslog("%s: %s: POLLERR\n", __func__, mic->name);
-       }
-done:
-       pthread_exit(NULL);
-}
-
-/* virtio_console */
-#define VIRTIO_CONSOLE_FD 0
-#define MONITOR_FD (VIRTIO_CONSOLE_FD + 1)
-#define MAX_CONSOLE_FD (MONITOR_FD + 1)  /* must be the last one + 1 */
-#define MAX_BUFFER_SIZE PAGE_SIZE
-
-static void *
-virtio_console(void *arg)
-{
-       static __u8 vcons_buf[2][PAGE_SIZE];
-       struct iovec vcons_iov[2] = {
-               { .iov_base = vcons_buf[0], .iov_len = sizeof(vcons_buf[0]) },
-               { .iov_base = vcons_buf[1], .iov_len = sizeof(vcons_buf[1]) },
-       };
-       struct iovec *iov0 = &vcons_iov[0], *iov1 = &vcons_iov[1];
-       struct mic_info *mic = (struct mic_info *)arg;
-       int err;
-       struct pollfd console_poll[MAX_CONSOLE_FD];
-       int pty_fd;
-       char *pts_name;
-       ssize_t len;
-       struct mic_vring tx_vr, rx_vr;
-       struct mic_copy_desc copy;
-       struct mic_device_desc *desc;
-
-       pty_fd = posix_openpt(O_RDWR);
-       if (pty_fd < 0) {
-               mpsslog("can't open a pseudoterminal master device: %s\n",
-                       strerror(errno));
-               goto _return;
-       }
-       pts_name = ptsname(pty_fd);
-       if (pts_name == NULL) {
-               mpsslog("can't get pts name\n");
-               goto _close_pty;
-       }
-       printf("%s console message goes to %s\n", mic->name, pts_name);
-       mpsslog("%s console message goes to %s\n", mic->name, pts_name);
-       err = grantpt(pty_fd);
-       if (err < 0) {
-               mpsslog("can't grant access: %s %s\n",
-                       pts_name, strerror(errno));
-               goto _close_pty;
-       }
-       err = unlockpt(pty_fd);
-       if (err < 0) {
-               mpsslog("can't unlock a pseudoterminal: %s %s\n",
-                       pts_name, strerror(errno));
-               goto _close_pty;
-       }
-       console_poll[MONITOR_FD].fd = pty_fd;
-       console_poll[MONITOR_FD].events = POLLIN;
-
-       console_poll[VIRTIO_CONSOLE_FD].fd = mic->mic_console.virtio_console_fd;
-       console_poll[VIRTIO_CONSOLE_FD].events = POLLIN;
-
-       if (MAP_FAILED == init_vr(mic, mic->mic_console.virtio_console_fd,
-                                 VIRTIO_ID_CONSOLE, &tx_vr, &rx_vr,
-               virtcons_dev_page.dd.num_vq)) {
-               mpsslog("%s init_vr failed %s\n",
-                       mic->name, strerror(errno));
-               goto _close_pty;
-       }
-
-       copy.iovcnt = 1;
-       desc = get_device_desc(mic, VIRTIO_ID_CONSOLE);
-
-       for (;;) {
-               console_poll[MONITOR_FD].revents = 0;
-               console_poll[VIRTIO_CONSOLE_FD].revents = 0;
-               err = poll(console_poll, MAX_CONSOLE_FD, -1);
-               if (err < 0) {
-                       mpsslog("%s %d: poll failed: %s\n", __func__, __LINE__,
-                               strerror(errno));
-                       continue;
-               }
-               if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
-                       err = wait_for_card_driver(mic,
-                                       mic->mic_console.virtio_console_fd,
-                                       VIRTIO_ID_CONSOLE);
-                       if (err) {
-                               mpsslog("%s %s %d Exiting...\n",
-                                       mic->name, __func__, __LINE__);
-                               break;
-                       }
-               }
-
-               if (console_poll[MONITOR_FD].revents & POLLIN) {
-                       copy.iov = iov0;
-                       len = readv(pty_fd, copy.iov, copy.iovcnt);
-                       if (len > 0) {
-#ifdef DEBUG
-                               disp_iovec(mic, copy, __func__, __LINE__);
-                               mpsslog("%s %s %d read from tap 0x%lx\n",
-                                       mic->name, __func__, __LINE__,
-                                       len);
-#endif
-                               spin_for_descriptors(mic, &tx_vr);
-                               txrx_prepare(VIRTIO_ID_CONSOLE, 1, &tx_vr,
-                                            &copy, len);
-
-                               err = mic_virtio_copy(mic,
-                                       mic->mic_console.virtio_console_fd,
-                                       &tx_vr, &copy);
-                               if (err < 0) {
-                                       mpsslog("%s %s %d mic_virtio_copy %s\n",
-                                               mic->name, __func__, __LINE__,
-                                               strerror(errno));
-                               }
-                               if (!err)
-                                       verify_out_len(mic, &copy);
-#ifdef DEBUG
-                               disp_iovec(mic, copy, __func__, __LINE__);
-                               mpsslog("%s %s %d wrote to net 0x%lx\n",
-                                       mic->name, __func__, __LINE__,
-                                       sum_iovec_len(copy));
-#endif
-                               /* Reinitialize IOV for next run */
-                               iov0->iov_len = PAGE_SIZE;
-                       } else if (len < 0) {
-                               disp_iovec(mic, &copy, __func__, __LINE__);
-                               mpsslog("%s %s %d read failed %s ",
-                                       mic->name, __func__, __LINE__,
-                                       strerror(errno));
-                               mpsslog("cnt %d sum %zd\n",
-                                       copy.iovcnt, sum_iovec_len(&copy));
-                       }
-               }
-
-               if (console_poll[VIRTIO_CONSOLE_FD].revents & POLLIN) {
-                       while (rx_vr.info->avail_idx !=
-                               le16toh(rx_vr.vr.avail->idx)) {
-                               copy.iov = iov1;
-                               txrx_prepare(VIRTIO_ID_CONSOLE, 0, &rx_vr,
-                                            &copy, PAGE_SIZE);
-
-                               err = mic_virtio_copy(mic,
-                                       mic->mic_console.virtio_console_fd,
-                                       &rx_vr, &copy);
-                               if (!err) {
-                                       /* Set the correct output iov_len */
-                                       iov1->iov_len = copy.out_len;
-                                       verify_out_len(mic, &copy);
-#ifdef DEBUG
-                                       disp_iovec(mic, copy, __func__,
-                                                  __LINE__);
-                                       mpsslog("%s %s %d ",
-                                               mic->name, __func__, __LINE__);
-                                       mpsslog("read from net 0x%lx\n",
-                                               sum_iovec_len(copy));
-#endif
-                                       len = writev(pty_fd,
-                                               copy.iov, copy.iovcnt);
-                                       if (len != sum_iovec_len(&copy)) {
-                                               mpsslog("Tun write failed %s ",
-                                                       strerror(errno));
-                                               mpsslog("len 0x%zx ", len);
-                                               mpsslog("read_len 0x%zx\n",
-                                                       sum_iovec_len(&copy));
-                                       } else {
-#ifdef DEBUG
-                                               disp_iovec(mic, copy, __func__,
-                                                          __LINE__);
-                                               mpsslog("%s %s %d ",
-                                                       mic->name, __func__,
-                                                       __LINE__);
-                                               mpsslog("wrote to tap 0x%lx\n",
-                                                       len);
-#endif
-                                       }
-                               } else {
-                                       mpsslog("%s %s %d mic_virtio_copy %s\n",
-                                               mic->name, __func__, __LINE__,
-                                               strerror(errno));
-                                       break;
-                               }
-                       }
-               }
-               if (console_poll[NET_FD_VIRTIO_NET].revents & POLLERR)
-                       mpsslog("%s: %s: POLLERR\n", __func__, mic->name);
-       }
-_close_pty:
-       close(pty_fd);
-_return:
-       pthread_exit(NULL);
-}
-
-static void
-add_virtio_device(struct mic_info *mic, struct mic_device_desc *dd)
-{
-       char path[PATH_MAX];
-       int fd, err;
-
-       snprintf(path, PATH_MAX, "/dev/vop_virtio%d", mic->id);
-       fd = open(path, O_RDWR);
-       if (fd < 0) {
-               mpsslog("Could not open %s %s\n", path, strerror(errno));
-               return;
-       }
-
-       err = ioctl(fd, MIC_VIRTIO_ADD_DEVICE, dd);
-       if (err < 0) {
-               mpsslog("Could not add %d %s\n", dd->type, strerror(errno));
-               close(fd);
-               return;
-       }
-       switch (dd->type) {
-       case VIRTIO_ID_NET:
-               mic->mic_net.virtio_net_fd = fd;
-               mpsslog("Added VIRTIO_ID_NET for %s\n", mic->name);
-               break;
-       case VIRTIO_ID_CONSOLE:
-               mic->mic_console.virtio_console_fd = fd;
-               mpsslog("Added VIRTIO_ID_CONSOLE for %s\n", mic->name);
-               break;
-       case VIRTIO_ID_BLOCK:
-               mic->mic_virtblk.virtio_block_fd = fd;
-               mpsslog("Added VIRTIO_ID_BLOCK for %s\n", mic->name);
-               break;
-       }
-}
-
-static bool
-set_backend_file(struct mic_info *mic)
-{
-       FILE *config;
-       char buff[PATH_MAX], *line, *evv, *p;
-
-       snprintf(buff, PATH_MAX, "%s/mpssd%03d.conf", mic_config_dir, mic->id);
-       config = fopen(buff, "r");
-       if (config == NULL)
-               return false;
-       do {  /* look for "virtblk_backend=XXXX" */
-               line = fgets(buff, PATH_MAX, config);
-               if (line == NULL)
-                       break;
-               if (*line == '#')
-                       continue;
-               p = strchr(line, '\n');
-               if (p)
-                       *p = '\0';
-       } while (strncmp(line, virtblk_backend, strlen(virtblk_backend)) != 0);
-       fclose(config);
-       if (line == NULL)
-               return false;
-       evv = strchr(line, '=');
-       if (evv == NULL)
-               return false;
-       mic->mic_virtblk.backend_file = malloc(strlen(evv) + 1);
-       if (mic->mic_virtblk.backend_file == NULL) {
-               mpsslog("%s %d can't allocate memory\n", mic->name, mic->id);
-               return false;
-       }
-       strcpy(mic->mic_virtblk.backend_file, evv + 1);
-       return true;
-}
-
-#define SECTOR_SIZE 512
-static bool
-set_backend_size(struct mic_info *mic)
-{
-       mic->mic_virtblk.backend_size = lseek(mic->mic_virtblk.backend, 0,
-               SEEK_END);
-       if (mic->mic_virtblk.backend_size < 0) {
-               mpsslog("%s: can't seek: %s\n",
-                       mic->name, mic->mic_virtblk.backend_file);
-               return false;
-       }
-       virtblk_dev_page.blk_config.capacity =
-               mic->mic_virtblk.backend_size / SECTOR_SIZE;
-       if ((mic->mic_virtblk.backend_size % SECTOR_SIZE) != 0)
-               virtblk_dev_page.blk_config.capacity++;
-
-       virtblk_dev_page.blk_config.capacity =
-               htole64(virtblk_dev_page.blk_config.capacity);
-
-       return true;
-}
-
-static bool
-open_backend(struct mic_info *mic)
-{
-       if (!set_backend_file(mic))
-               goto _error_exit;
-       mic->mic_virtblk.backend = open(mic->mic_virtblk.backend_file, O_RDWR);
-       if (mic->mic_virtblk.backend < 0) {
-               mpsslog("%s: can't open: %s\n", mic->name,
-                       mic->mic_virtblk.backend_file);
-               goto _error_free;
-       }
-       if (!set_backend_size(mic))
-               goto _error_close;
-       mic->mic_virtblk.backend_addr = mmap(NULL,
-               mic->mic_virtblk.backend_size,
-               PROT_READ|PROT_WRITE, MAP_SHARED,
-               mic->mic_virtblk.backend, 0L);
-       if (mic->mic_virtblk.backend_addr == MAP_FAILED) {
-               mpsslog("%s: can't map: %s %s\n",
-                       mic->name, mic->mic_virtblk.backend_file,
-                       strerror(errno));
-               goto _error_close;
-       }
-       return true;
-
- _error_close:
-       close(mic->mic_virtblk.backend);
- _error_free:
-       free(mic->mic_virtblk.backend_file);
- _error_exit:
-       return false;
-}
-
-static void
-close_backend(struct mic_info *mic)
-{
-       munmap(mic->mic_virtblk.backend_addr, mic->mic_virtblk.backend_size);
-       close(mic->mic_virtblk.backend);
-       free(mic->mic_virtblk.backend_file);
-}
-
-static bool
-start_virtblk(struct mic_info *mic, struct mic_vring *vring)
-{
-       if (((unsigned long)&virtblk_dev_page.blk_config % 8) != 0) {
-               mpsslog("%s: blk_config is not 8 byte aligned.\n",
-                       mic->name);
-               return false;
-       }
-       add_virtio_device(mic, &virtblk_dev_page.dd);
-       if (MAP_FAILED == init_vr(mic, mic->mic_virtblk.virtio_block_fd,
-                                 VIRTIO_ID_BLOCK, vring, NULL,
-                                 virtblk_dev_page.dd.num_vq)) {
-               mpsslog("%s init_vr failed %s\n",
-                       mic->name, strerror(errno));
-               return false;
-       }
-       return true;
-}
-
-static void
-stop_virtblk(struct mic_info *mic)
-{
-       int vr_size, ret;
-
-       vr_size = PAGE_ALIGN(_vring_size(MIC_VRING_ENTRIES,
-                                        MIC_VIRTIO_RING_ALIGN) +
-                            sizeof(struct _mic_vring_info));
-       ret = munmap(mic->mic_virtblk.block_dp,
-               MIC_DEVICE_PAGE_END + vr_size * virtblk_dev_page.dd.num_vq);
-       if (ret < 0)
-               mpsslog("%s munmap errno %d\n", mic->name, errno);
-       close(mic->mic_virtblk.virtio_block_fd);
-}
-
-static __u8
-header_error_check(struct vring_desc *desc)
-{
-       if (le32toh(desc->len) != sizeof(struct virtio_blk_outhdr)) {
-               mpsslog("%s() %d: length is not sizeof(virtio_blk_outhd)\n",
-                       __func__, __LINE__);
-               return -EIO;
-       }
-       if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT)) {
-               mpsslog("%s() %d: alone\n",
-                       __func__, __LINE__);
-               return -EIO;
-       }
-       if (le16toh(desc->flags) & VRING_DESC_F_WRITE) {
-               mpsslog("%s() %d: not read\n",
-                       __func__, __LINE__);
-               return -EIO;
-       }
-       return 0;
-}
-
-static int
-read_header(int fd, struct virtio_blk_outhdr *hdr, __u32 desc_idx)
-{
-       struct iovec iovec;
-       struct mic_copy_desc copy;
-
-       iovec.iov_len = sizeof(*hdr);
-       iovec.iov_base = hdr;
-       copy.iov = &iovec;
-       copy.iovcnt = 1;
-       copy.vr_idx = 0;  /* only one vring on virtio_block */
-       copy.update_used = false;  /* do not update used index */
-       return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
-}
-
-static int
-transfer_blocks(int fd, struct iovec *iovec, __u32 iovcnt)
-{
-       struct mic_copy_desc copy;
-
-       copy.iov = iovec;
-       copy.iovcnt = iovcnt;
-       copy.vr_idx = 0;  /* only one vring on virtio_block */
-       copy.update_used = false;  /* do not update used index */
-       return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
-}
-
-static __u8
-status_error_check(struct vring_desc *desc)
-{
-       if (le32toh(desc->len) != sizeof(__u8)) {
-               mpsslog("%s() %d: length is not sizeof(status)\n",
-                       __func__, __LINE__);
-               return -EIO;
-       }
-       return 0;
-}
-
-static int
-write_status(int fd, __u8 *status)
-{
-       struct iovec iovec;
-       struct mic_copy_desc copy;
-
-       iovec.iov_base = status;
-       iovec.iov_len = sizeof(*status);
-       copy.iov = &iovec;
-       copy.iovcnt = 1;
-       copy.vr_idx = 0;  /* only one vring on virtio_block */
-       copy.update_used = true; /* Update used index */
-       return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
-}
-
-#ifndef VIRTIO_BLK_T_GET_ID
-#define VIRTIO_BLK_T_GET_ID    8
-#endif
-
-static void *
-virtio_block(void *arg)
-{
-       struct mic_info *mic = (struct mic_info *)arg;
-       int ret;
-       struct pollfd block_poll;
-       struct mic_vring vring;
-       __u16 avail_idx;
-       __u32 desc_idx;
-       struct vring_desc *desc;
-       struct iovec *iovec, *piov;
-       __u8 status;
-       __u32 buffer_desc_idx;
-       struct virtio_blk_outhdr hdr;
-       void *fos;
-
-       for (;;) {  /* forever */
-               if (!open_backend(mic)) { /* No virtblk */
-                       for (mic->mic_virtblk.signaled = 0;
-                               !mic->mic_virtblk.signaled;)
-                               sleep(1);
-                       continue;
-               }
-
-               /* backend file is specified. */
-               if (!start_virtblk(mic, &vring))
-                       goto _close_backend;
-               iovec = malloc(sizeof(*iovec) *
-                       le32toh(virtblk_dev_page.blk_config.seg_max));
-               if (!iovec) {
-                       mpsslog("%s: can't alloc iovec: %s\n",
-                               mic->name, strerror(ENOMEM));
-                       goto _stop_virtblk;
-               }
-
-               block_poll.fd = mic->mic_virtblk.virtio_block_fd;
-               block_poll.events = POLLIN;
-               for (mic->mic_virtblk.signaled = 0;
-                    !mic->mic_virtblk.signaled;) {
-                       block_poll.revents = 0;
-                                       /* timeout in 1 sec to see signaled */
-                       ret = poll(&block_poll, 1, 1000);
-                       if (ret < 0) {
-                               mpsslog("%s %d: poll failed: %s\n",
-                                       __func__, __LINE__,
-                                       strerror(errno));
-                               continue;
-                       }
-
-                       if (!(block_poll.revents & POLLIN)) {
-#ifdef DEBUG
-                               mpsslog("%s %d: block_poll.revents=0x%x\n",
-                                       __func__, __LINE__, block_poll.revents);
-#endif
-                               continue;
-                       }
-
-                       /* POLLIN */
-                       while (vring.info->avail_idx !=
-                               le16toh(vring.vr.avail->idx)) {
-                               /* read header element */
-                               avail_idx =
-                                       vring.info->avail_idx &
-                                       (vring.vr.num - 1);
-                               desc_idx = le16toh(
-                                       vring.vr.avail->ring[avail_idx]);
-                               desc = &vring.vr.desc[desc_idx];
-#ifdef DEBUG
-                               mpsslog("%s() %d: avail_idx=%d ",
-                                       __func__, __LINE__,
-                                       vring.info->avail_idx);
-                               mpsslog("vring.vr.num=%d desc=%p\n",
-                                       vring.vr.num, desc);
-#endif
-                               status = header_error_check(desc);
-                               ret = read_header(
-                                       mic->mic_virtblk.virtio_block_fd,
-                                       &hdr, desc_idx);
-                               if (ret < 0) {
-                                       mpsslog("%s() %d %s: ret=%d %s\n",
-                                               __func__, __LINE__,
-                                               mic->name, ret,
-                                               strerror(errno));
-                                       break;
-                               }
-                               /* buffer element */
-                               piov = iovec;
-                               status = 0;
-                               fos = mic->mic_virtblk.backend_addr +
-                                       (hdr.sector * SECTOR_SIZE);
-                               buffer_desc_idx = next_desc(desc);
-                               desc_idx = buffer_desc_idx;
-                               for (desc = &vring.vr.desc[buffer_desc_idx];
-                                    desc->flags & VRING_DESC_F_NEXT;
-                                    desc_idx = next_desc(desc),
-                                            desc = &vring.vr.desc[desc_idx]) {
-                                       piov->iov_len = desc->len;
-                                       piov->iov_base = fos;
-                                       piov++;
-                                       fos += desc->len;
-                               }
-                               /* Returning NULLs for VIRTIO_BLK_T_GET_ID. */
-                               if (hdr.type & ~(VIRTIO_BLK_T_OUT |
-                                       VIRTIO_BLK_T_GET_ID)) {
-                                       /*
-                                         VIRTIO_BLK_T_IN - does not do
-                                         anything. Probably for documenting.
-                                         VIRTIO_BLK_T_SCSI_CMD - for
-                                         virtio_scsi.
-                                         VIRTIO_BLK_T_FLUSH - turned off in
-                                         config space.
-                                         VIRTIO_BLK_T_BARRIER - defined but not
-                                         used in anywhere.
-                                       */
-                                       mpsslog("%s() %d: type %x ",
-                                               __func__, __LINE__,
-                                               hdr.type);
-                                       mpsslog("is not supported\n");
-                                       status = -ENOTSUP;
-
-                               } else {
-                                       ret = transfer_blocks(
-                                       mic->mic_virtblk.virtio_block_fd,
-                                               iovec,
-                                               piov - iovec);
-                                       if (ret < 0 &&
-                                           status != 0)
-                                               status = ret;
-                               }
-                               /* write status and update used pointer */
-                               if (status != 0)
-                                       status = status_error_check(desc);
-                               ret = write_status(
-                                       mic->mic_virtblk.virtio_block_fd,
-                                       &status);
-#ifdef DEBUG
-                               mpsslog("%s() %d: write status=%d on desc=%p\n",
-                                       __func__, __LINE__,
-                                       status, desc);
-#endif
-                       }
-               }
-               free(iovec);
-_stop_virtblk:
-               stop_virtblk(mic);
-_close_backend:
-               close_backend(mic);
-       }  /* forever */
-
-       pthread_exit(NULL);
-}
-
-static void
-reset(struct mic_info *mic)
-{
-#define RESET_TIMEOUT 120
-       int i = RESET_TIMEOUT;
-       setsysfs(mic->name, "state", "reset");
-       while (i) {
-               char *state;
-               state = readsysfs(mic->name, "state");
-               if (!state)
-                       goto retry;
-               mpsslog("%s: %s %d state %s\n",
-                       mic->name, __func__, __LINE__, state);
-
-               if (!strcmp(state, "ready")) {
-                       free(state);
-                       break;
-               }
-               free(state);
-retry:
-               sleep(1);
-               i--;
-       }
-}
-
-static int
-get_mic_shutdown_status(struct mic_info *mic, char *shutdown_status)
-{
-       if (!strcmp(shutdown_status, "nop"))
-               return MIC_NOP;
-       if (!strcmp(shutdown_status, "crashed"))
-               return MIC_CRASHED;
-       if (!strcmp(shutdown_status, "halted"))
-               return MIC_HALTED;
-       if (!strcmp(shutdown_status, "poweroff"))
-               return MIC_POWER_OFF;
-       if (!strcmp(shutdown_status, "restart"))
-               return MIC_RESTART;
-       mpsslog("%s: BUG invalid status %s\n", mic->name, shutdown_status);
-       /* Invalid state */
-       assert(0);
-};
-
-static int get_mic_state(struct mic_info *mic)
-{
-       char *state = NULL;
-       enum mic_states mic_state;
-
-       while (!state) {
-               state = readsysfs(mic->name, "state");
-               sleep(1);
-       }
-       mpsslog("%s: %s %d state %s\n",
-               mic->name, __func__, __LINE__, state);
-
-       if (!strcmp(state, "ready")) {
-               mic_state = MIC_READY;
-       } else if (!strcmp(state, "booting")) {
-               mic_state = MIC_BOOTING;
-       } else if (!strcmp(state, "online")) {
-               mic_state = MIC_ONLINE;
-       } else if (!strcmp(state, "shutting_down")) {
-               mic_state = MIC_SHUTTING_DOWN;
-       } else if (!strcmp(state, "reset_failed")) {
-               mic_state = MIC_RESET_FAILED;
-       } else if (!strcmp(state, "resetting")) {
-               mic_state = MIC_RESETTING;
-       } else {
-               mpsslog("%s: BUG invalid state %s\n", mic->name, state);
-               assert(0);
-       }
-
-       free(state);
-       return mic_state;
-};
-
-static void mic_handle_shutdown(struct mic_info *mic)
-{
-#define SHUTDOWN_TIMEOUT 60
-       int i = SHUTDOWN_TIMEOUT;
-       char *shutdown_status;
-       while (i) {
-               shutdown_status = readsysfs(mic->name, "shutdown_status");
-               if (!shutdown_status) {
-                       sleep(1);
-                       continue;
-               }
-               mpsslog("%s: %s %d shutdown_status %s\n",
-                       mic->name, __func__, __LINE__, shutdown_status);
-               switch (get_mic_shutdown_status(mic, shutdown_status)) {
-               case MIC_RESTART:
-                       mic->restart = 1;
-               case MIC_HALTED:
-               case MIC_POWER_OFF:
-               case MIC_CRASHED:
-                       free(shutdown_status);
-                       goto reset;
-               default:
-                       break;
-               }
-               free(shutdown_status);
-               sleep(1);
-               i--;
-       }
-reset:
-       if (!i)
-               mpsslog("%s: %s %d timing out waiting for shutdown_status %s\n",
-                       mic->name, __func__, __LINE__, shutdown_status);
-       reset(mic);
-}
-
-static int open_state_fd(struct mic_info *mic)
-{
-       char pathname[PATH_MAX];
-       int fd;
-
-       snprintf(pathname, PATH_MAX - 1, "%s/%s/%s",
-                MICSYSFSDIR, mic->name, "state");
-
-       fd = open(pathname, O_RDONLY);
-       if (fd < 0)
-               mpsslog("%s: opening file %s failed %s\n",
-                       mic->name, pathname, strerror(errno));
-       return fd;
-}
-
-static int block_till_state_change(int fd, struct mic_info *mic)
-{
-       struct pollfd ufds[1];
-       char value[PAGE_SIZE];
-       int ret;
-
-       ufds[0].fd = fd;
-       ufds[0].events = POLLERR | POLLPRI;
-       ret = poll(ufds, 1, -1);
-       if (ret < 0) {
-               mpsslog("%s: %s %d poll failed %s\n",
-                       mic->name, __func__, __LINE__, strerror(errno));
-               return ret;
-       }
-
-       ret = lseek(fd, 0, SEEK_SET);
-       if (ret < 0) {
-               mpsslog("%s: %s %d Failed to seek to 0: %s\n",
-                       mic->name, __func__, __LINE__, strerror(errno));
-               return ret;
-       }
-
-       ret = read(fd, value, sizeof(value));
-       if (ret < 0) {
-               mpsslog("%s: %s %d Failed to read sysfs entry: %s\n",
-                       mic->name, __func__, __LINE__, strerror(errno));
-               return ret;
-       }
-
-       return 0;
-}
-
-static void *
-mic_config(void *arg)
-{
-       struct mic_info *mic = (struct mic_info *)arg;
-       int fd, ret, stat = 0;
-
-       fd = open_state_fd(mic);
-       if (fd < 0) {
-               mpsslog("%s: %s %d open state fd failed %s\n",
-                       mic->name, __func__, __LINE__, strerror(errno));
-               goto exit;
-       }
-
-       do {
-               ret = block_till_state_change(fd, mic);
-               if (ret < 0) {
-                       mpsslog("%s: %s %d block_till_state_change error %s\n",
-                               mic->name, __func__, __LINE__, strerror(errno));
-                       goto close_exit;
-               }
-
-               switch (get_mic_state(mic)) {
-               case MIC_SHUTTING_DOWN:
-                       mic_handle_shutdown(mic);
-                       break;
-               case MIC_READY:
-               case MIC_RESET_FAILED:
-                       ret = kill(mic->pid, SIGTERM);
-                       mpsslog("%s: %s %d kill pid %d ret %d\n",
-                               mic->name, __func__, __LINE__,
-                               mic->pid, ret);
-                       if (!ret) {
-                               ret = waitpid(mic->pid, &stat,
-                                             WIFSIGNALED(stat));
-                               mpsslog("%s: %s %d waitpid ret %d pid %d\n",
-                                       mic->name, __func__, __LINE__,
-                                       ret, mic->pid);
-                       }
-                       if (mic->boot_on_resume) {
-                               setsysfs(mic->name, "state", "boot");
-                               mic->boot_on_resume = 0;
-                       }
-                       goto close_exit;
-               default:
-                       break;
-               }
-       } while (1);
-
-close_exit:
-       close(fd);
-exit:
-       init_mic(mic);
-       pthread_exit(NULL);
-}
-
-static void
-set_cmdline(struct mic_info *mic)
-{
-       char buffer[PATH_MAX];
-       int len;
-
-       len = snprintf(buffer, PATH_MAX,
-               "clocksource=tsc highres=off nohz=off ");
-       len += snprintf(buffer + len, PATH_MAX - len,
-               "cpufreq_on;corec6_off;pc3_off;pc6_off ");
-       len += snprintf(buffer + len, PATH_MAX - len,
-               "ifcfg=static;address,172.31.%d.1;netmask,255.255.255.0",
-               mic->id + 1);
-
-       setsysfs(mic->name, "cmdline", buffer);
-       mpsslog("%s: Command line: \"%s\"\n", mic->name, buffer);
-       snprintf(buffer, PATH_MAX, "172.31.%d.1", mic->id + 1);
-       mpsslog("%s: IPADDR: \"%s\"\n", mic->name, buffer);
-}
-
-static void
-set_log_buf_info(struct mic_info *mic)
-{
-       int fd;
-       off_t len;
-       char system_map[] = "/lib/firmware/mic/System.map";
-       char *map, *temp, log_buf[17] = {'\0'};
-
-       fd = open(system_map, O_RDONLY);
-       if (fd < 0) {
-               mpsslog("%s: Opening System.map failed: %d\n",
-                       mic->name, errno);
-               return;
-       }
-       len = lseek(fd, 0, SEEK_END);
-       if (len < 0) {
-               mpsslog("%s: Reading System.map size failed: %d\n",
-                       mic->name, errno);
-               close(fd);
-               return;
-       }
-       map = mmap(NULL, len, PROT_READ, MAP_PRIVATE, fd, 0);
-       if (map == MAP_FAILED) {
-               mpsslog("%s: mmap of System.map failed: %d\n",
-                       mic->name, errno);
-               close(fd);
-               return;
-       }
-       temp = strstr(map, "__log_buf");
-       if (!temp) {
-               mpsslog("%s: __log_buf not found: %d\n", mic->name, errno);
-               munmap(map, len);
-               close(fd);
-               return;
-       }
-       strncpy(log_buf, temp - 19, 16);
-       setsysfs(mic->name, "log_buf_addr", log_buf);
-       mpsslog("%s: log_buf_addr: %s\n", mic->name, log_buf);
-       temp = strstr(map, "log_buf_len");
-       if (!temp) {
-               mpsslog("%s: log_buf_len not found: %d\n", mic->name, errno);
-               munmap(map, len);
-               close(fd);
-               return;
-       }
-       strncpy(log_buf, temp - 19, 16);
-       setsysfs(mic->name, "log_buf_len", log_buf);
-       mpsslog("%s: log_buf_len: %s\n", mic->name, log_buf);
-       munmap(map, len);
-       close(fd);
-}
-
-static void
-change_virtblk_backend(int x, siginfo_t *siginfo, void *p)
-{
-       struct mic_info *mic;
-
-       for (mic = mic_list.next; mic != NULL; mic = mic->next)
-               mic->mic_virtblk.signaled = 1/* true */;
-}
-
-static void
-set_mic_boot_params(struct mic_info *mic)
-{
-       set_log_buf_info(mic);
-       set_cmdline(mic);
-}
-
-static void *
-init_mic(void *arg)
-{
-       struct mic_info *mic = (struct mic_info *)arg;
-       struct sigaction ignore = {
-               .sa_flags = 0,
-               .sa_handler = SIG_IGN
-       };
-       struct sigaction act = {
-               .sa_flags = SA_SIGINFO,
-               .sa_sigaction = change_virtblk_backend,
-       };
-       char buffer[PATH_MAX];
-       int err, fd;
-
-       /*
-        * Currently, one virtio block device is supported for each MIC card
-        * at a time. Any user (or test) can send a SIGUSR1 to the MIC daemon.
-        * The signal informs the virtio block backend about a change in the
-        * configuration file which specifies the virtio backend file name on
-        * the host. Virtio block backend then re-reads the configuration file
-        * and switches to the new block device. This signalling mechanism may
-        * not be required once multiple virtio block devices are supported by
-        * the MIC daemon.
-        */
-       sigaction(SIGUSR1, &ignore, NULL);
-retry:
-       fd = open_state_fd(mic);
-       if (fd < 0) {
-               mpsslog("%s: %s %d open state fd failed %s\n",
-                       mic->name, __func__, __LINE__, strerror(errno));
-               sleep(2);
-               goto retry;
-       }
-
-       if (mic->restart) {
-               snprintf(buffer, PATH_MAX, "boot");
-               setsysfs(mic->name, "state", buffer);
-               mpsslog("%s restarting mic %d\n",
-                       mic->name, mic->restart);
-               mic->restart = 0;
-       }
-
-       while (1) {
-               while (block_till_state_change(fd, mic)) {
-                       mpsslog("%s: %s %d block_till_state_change error %s\n",
-                               mic->name, __func__, __LINE__, strerror(errno));
-                       sleep(2);
-                       continue;
-               }
-
-               if (get_mic_state(mic) == MIC_BOOTING)
-                       break;
-       }
-
-       mic->pid = fork();
-       switch (mic->pid) {
-       case 0:
-               add_virtio_device(mic, &virtcons_dev_page.dd);
-               add_virtio_device(mic, &virtnet_dev_page.dd);
-               err = pthread_create(&mic->mic_console.console_thread, NULL,
-                       virtio_console, mic);
-               if (err)
-                       mpsslog("%s virtcons pthread_create failed %s\n",
-                               mic->name, strerror(err));
-               err = pthread_create(&mic->mic_net.net_thread, NULL,
-                       virtio_net, mic);
-               if (err)
-                       mpsslog("%s virtnet pthread_create failed %s\n",
-                               mic->name, strerror(err));
-               err = pthread_create(&mic->mic_virtblk.block_thread, NULL,
-                       virtio_block, mic);
-               if (err)
-                       mpsslog("%s virtblk pthread_create failed %s\n",
-                               mic->name, strerror(err));
-               sigemptyset(&act.sa_mask);
-               err = sigaction(SIGUSR1, &act, NULL);
-               if (err)
-                       mpsslog("%s sigaction SIGUSR1 failed %s\n",
-                               mic->name, strerror(errno));
-               while (1)
-                       sleep(60);
-       case -1:
-               mpsslog("fork failed MIC name %s id %d errno %d\n",
-                       mic->name, mic->id, errno);
-               break;
-       default:
-               err = pthread_create(&mic->config_thread, NULL,
-                                    mic_config, mic);
-               if (err)
-                       mpsslog("%s mic_config pthread_create failed %s\n",
-                               mic->name, strerror(err));
-       }
-
-       return NULL;
-}
-
-static void
-start_daemon(void)
-{
-       struct mic_info *mic;
-       int err;
-
-       for (mic = mic_list.next; mic; mic = mic->next) {
-               set_mic_boot_params(mic);
-               err = pthread_create(&mic->init_thread, NULL, init_mic, mic);
-               if (err)
-                       mpsslog("%s init_mic pthread_create failed %s\n",
-                               mic->name, strerror(err));
-       }
-
-       while (1)
-               sleep(60);
-}
-
-static int
-init_mic_list(void)
-{
-       struct mic_info *mic = &mic_list;
-       struct dirent *file;
-       DIR *dp;
-       int cnt = 0;
-
-       dp = opendir(MICSYSFSDIR);
-       if (!dp)
-               return 0;
-
-       while ((file = readdir(dp)) != NULL) {
-               if (!strncmp(file->d_name, "mic", 3)) {
-                       mic->next = calloc(1, sizeof(struct mic_info));
-                       if (mic->next) {
-                               mic = mic->next;
-                               mic->id = atoi(&file->d_name[3]);
-                               mic->name = malloc(strlen(file->d_name) + 16);
-                               if (mic->name)
-                                       strcpy(mic->name, file->d_name);
-                               mpsslog("MIC name %s id %d\n", mic->name,
-                                       mic->id);
-                               cnt++;
-                       }
-               }
-       }
-
-       closedir(dp);
-       return cnt;
-}
-
-void
-mpsslog(char *format, ...)
-{
-       va_list args;
-       char buffer[4096];
-       char ts[52], *ts1;
-       time_t t;
-
-       if (logfp == NULL)
-               return;
-
-       va_start(args, format);
-       vsprintf(buffer, format, args);
-       va_end(args);
-
-       time(&t);
-       ts1 = ctime_r(&t, ts);
-       ts1[strlen(ts1) - 1] = '\0';
-       fprintf(logfp, "%s: %s", ts1, buffer);
-
-       fflush(logfp);
-}
-
-int
-main(int argc, char *argv[])
-{
-       int cnt;
-       pid_t pid;
-
-       myname = argv[0];
-
-       logfp = fopen(LOGFILE_NAME, "a+");
-       if (!logfp) {
-               fprintf(stderr, "cannot open logfile '%s'\n", LOGFILE_NAME);
-               exit(1);
-       }
-       pid = fork();
-       switch (pid) {
-       case 0:
-               break;
-       case -1:
-               exit(2);
-       default:
-               exit(0);
-       }
-
-       mpsslog("MIC Daemon start\n");
-
-       cnt = init_mic_list();
-       if (cnt == 0) {
-               mpsslog("MIC module not loaded\n");
-               exit(3);
-       }
-       mpsslog("MIC found %d devices\n", cnt);
-
-       start_daemon();
-
-       exit(0);
-}
diff --git a/Documentation/mic/mpssd/mpssd.h b/Documentation/mic/mpssd/mpssd.h
deleted file mode 100644 (file)
index 8bd6494..0000000
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2013 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Intel MIC User Space Tools.
- */
-#ifndef _MPSSD_H_
-#define _MPSSD_H_
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include <dirent.h>
-#include <libgen.h>
-#include <pthread.h>
-#include <stdarg.h>
-#include <time.h>
-#include <errno.h>
-#include <sys/dir.h>
-#include <sys/ioctl.h>
-#include <sys/poll.h>
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-#include <sys/mman.h>
-#include <sys/utsname.h>
-#include <sys/wait.h>
-#include <netinet/in.h>
-#include <arpa/inet.h>
-#include <netdb.h>
-#include <pthread.h>
-#include <signal.h>
-#include <limits.h>
-#include <syslog.h>
-#include <getopt.h>
-#include <net/if.h>
-#include <linux/if_tun.h>
-#include <linux/if_tun.h>
-#include <linux/virtio_ids.h>
-
-#define MICSYSFSDIR "/sys/class/mic"
-#define LOGFILE_NAME "/var/log/mpssd"
-#define PAGE_SIZE 4096
-
-struct mic_console_info {
-       pthread_t       console_thread;
-       int             virtio_console_fd;
-       void            *console_dp;
-};
-
-struct mic_net_info {
-       pthread_t       net_thread;
-       int             virtio_net_fd;
-       int             tap_fd;
-       void            *net_dp;
-};
-
-struct mic_virtblk_info {
-       pthread_t       block_thread;
-       int             virtio_block_fd;
-       void            *block_dp;
-       volatile sig_atomic_t   signaled;
-       char            *backend_file;
-       int             backend;
-       void            *backend_addr;
-       long            backend_size;
-};
-
-struct mic_info {
-       int             id;
-       char            *name;
-       pthread_t       config_thread;
-       pthread_t       init_thread;
-       pid_t           pid;
-       struct mic_console_info mic_console;
-       struct mic_net_info     mic_net;
-       struct mic_virtblk_info mic_virtblk;
-       int             restart;
-       int             boot_on_resume;
-       struct mic_info *next;
-};
-
-__attribute__((format(printf, 1, 2)))
-void mpsslog(char *format, ...);
-char *readsysfs(char *dir, char *entry);
-int setsysfs(char *dir, char *entry, char *value);
-#endif
diff --git a/Documentation/mic/mpssd/sysfs.c b/Documentation/mic/mpssd/sysfs.c
deleted file mode 100644 (file)
index 8dd3269..0000000
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Intel MIC Platform Software Stack (MPSS)
- *
- * Copyright(c) 2013 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * Intel MIC User Space Tools.
- */
-
-#include "mpssd.h"
-
-#define PAGE_SIZE 4096
-
-char *
-readsysfs(char *dir, char *entry)
-{
-       char filename[PATH_MAX];
-       char value[PAGE_SIZE];
-       char *string = NULL;
-       int fd;
-       int len;
-
-       if (dir == NULL)
-               snprintf(filename, PATH_MAX, "%s/%s", MICSYSFSDIR, entry);
-       else
-               snprintf(filename, PATH_MAX,
-                        "%s/%s/%s", MICSYSFSDIR, dir, entry);
-
-       fd = open(filename, O_RDONLY);
-       if (fd < 0) {
-               mpsslog("Failed to open sysfs entry '%s': %s\n",
-                       filename, strerror(errno));
-               return NULL;
-       }
-
-       len = read(fd, value, sizeof(value));
-       if (len < 0) {
-               mpsslog("Failed to read sysfs entry '%s': %s\n",
-                       filename, strerror(errno));
-               goto readsys_ret;
-       }
-       if (len == 0)
-               goto readsys_ret;
-
-       value[len - 1] = '\0';
-
-       string = malloc(strlen(value) + 1);
-       if (string)
-               strcpy(string, value);
-
-readsys_ret:
-       close(fd);
-       return string;
-}
-
-int
-setsysfs(char *dir, char *entry, char *value)
-{
-       char filename[PATH_MAX];
-       char *oldvalue;
-       int fd, ret = 0;
-
-       if (dir == NULL)
-               snprintf(filename, PATH_MAX, "%s/%s", MICSYSFSDIR, entry);
-       else
-               snprintf(filename, PATH_MAX, "%s/%s/%s",
-                        MICSYSFSDIR, dir, entry);
-
-       oldvalue = readsysfs(dir, entry);
-
-       fd = open(filename, O_RDWR);
-       if (fd < 0) {
-               ret = errno;
-               mpsslog("Failed to open sysfs entry '%s': %s\n",
-                       filename, strerror(errno));
-               goto done;
-       }
-
-       if (!oldvalue || strcmp(value, oldvalue)) {
-               if (write(fd, value, strlen(value)) < 0) {
-                       ret = errno;
-                       mpsslog("Failed to write new sysfs entry '%s': %s\n",
-                               filename, strerror(errno));
-               }
-       }
-       close(fd);
-done:
-       if (oldvalue)
-               free(oldvalue);
-       return ret;
-}
diff --git a/Documentation/misc-devices/Makefile b/Documentation/misc-devices/Makefile
deleted file mode 100644 (file)
index e2b7aa4..0000000
+++ /dev/null
@@ -1 +0,0 @@
-subdir-y := mei
diff --git a/Documentation/misc-devices/mei/.gitignore b/Documentation/misc-devices/mei/.gitignore
deleted file mode 100644 (file)
index f356b81..0000000
+++ /dev/null
@@ -1 +0,0 @@
-mei-amt-version
diff --git a/Documentation/misc-devices/mei/Makefile b/Documentation/misc-devices/mei/Makefile
deleted file mode 100644 (file)
index d758047..0000000
+++ /dev/null
@@ -1,5 +0,0 @@
-# List of programs to build
-hostprogs-y := mei-amt-version
-HOSTCFLAGS_mei-amt-version.o += -I$(objtree)/usr/include
-# Tell kbuild to always build the programs
-always := $(hostprogs-y)
diff --git a/Documentation/misc-devices/mei/TODO b/Documentation/misc-devices/mei/TODO
deleted file mode 100644 (file)
index 6b3625d..0000000
+++ /dev/null
@@ -1,2 +0,0 @@
-TODO:
-       - Cleanup and split the timer function
diff --git a/Documentation/misc-devices/mei/mei-amt-version.c b/Documentation/misc-devices/mei/mei-amt-version.c
deleted file mode 100644 (file)
index 57d0d87..0000000
+++ /dev/null
@@ -1,479 +0,0 @@
-/******************************************************************************
- * Intel Management Engine Interface (Intel MEI) Linux driver
- * Intel MEI Interface Header
- *
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2012 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110,
- * USA
- *
- * The full GNU General Public License is included in this distribution
- * in the file called LICENSE.GPL.
- *
- * Contact Information:
- *     Intel Corporation.
- *     linux-mei@linux.intel.com
- *     http://www.intel.com
- *
- * BSD LICENSE
- *
- * Copyright(c) 2003 - 2012 Intel Corporation. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  * Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  * Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *  * Neither the name Intel Corporation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- *****************************************************************************/
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <fcntl.h>
-#include <sys/ioctl.h>
-#include <unistd.h>
-#include <errno.h>
-#include <stdint.h>
-#include <stdbool.h>
-#include <bits/wordsize.h>
-#include <linux/mei.h>
-
-/*****************************************************************************
- * Intel Management Engine Interface
- *****************************************************************************/
-
-#define mei_msg(_me, fmt, ARGS...) do {         \
-       if (_me->verbose)                       \
-               fprintf(stderr, fmt, ##ARGS);   \
-} while (0)
-
-#define mei_err(_me, fmt, ARGS...) do {         \
-       fprintf(stderr, "Error: " fmt, ##ARGS); \
-} while (0)
-
-struct mei {
-       uuid_le guid;
-       bool initialized;
-       bool verbose;
-       unsigned int buf_size;
-       unsigned char prot_ver;
-       int fd;
-};
-
-static void mei_deinit(struct mei *cl)
-{
-       if (cl->fd != -1)
-               close(cl->fd);
-       cl->fd = -1;
-       cl->buf_size = 0;
-       cl->prot_ver = 0;
-       cl->initialized = false;
-}
-
-static bool mei_init(struct mei *me, const uuid_le *guid,
-               unsigned char req_protocol_version, bool verbose)
-{
-       int result;
-       struct mei_client *cl;
-       struct mei_connect_client_data data;
-
-       me->verbose = verbose;
-
-       me->fd = open("/dev/mei", O_RDWR);
-       if (me->fd == -1) {
-               mei_err(me, "Cannot establish a handle to the Intel MEI driver\n");
-               goto err;
-       }
-       memcpy(&me->guid, guid, sizeof(*guid));
-       memset(&data, 0, sizeof(data));
-       me->initialized = true;
-
-       memcpy(&data.in_client_uuid, &me->guid, sizeof(me->guid));
-       result = ioctl(me->fd, IOCTL_MEI_CONNECT_CLIENT, &data);
-       if (result) {
-               mei_err(me, "IOCTL_MEI_CONNECT_CLIENT receive message. err=%d\n", result);
-               goto err;
-       }
-       cl = &data.out_client_properties;
-       mei_msg(me, "max_message_length %d\n", cl->max_msg_length);
-       mei_msg(me, "protocol_version %d\n", cl->protocol_version);
-
-       if ((req_protocol_version > 0) &&
-            (cl->protocol_version != req_protocol_version)) {
-               mei_err(me, "Intel MEI protocol version not supported\n");
-               goto err;
-       }
-
-       me->buf_size = cl->max_msg_length;
-       me->prot_ver = cl->protocol_version;
-
-       return true;
-err:
-       mei_deinit(me);
-       return false;
-}
-
-static ssize_t mei_recv_msg(struct mei *me, unsigned char *buffer,
-                       ssize_t len, unsigned long timeout)
-{
-       ssize_t rc;
-
-       mei_msg(me, "call read length = %zd\n", len);
-
-       rc = read(me->fd, buffer, len);
-       if (rc < 0) {
-               mei_err(me, "read failed with status %zd %s\n",
-                               rc, strerror(errno));
-               mei_deinit(me);
-       } else {
-               mei_msg(me, "read succeeded with result %zd\n", rc);
-       }
-       return rc;
-}
-
-static ssize_t mei_send_msg(struct mei *me, const unsigned char *buffer,
-                       ssize_t len, unsigned long timeout)
-{
-       struct timeval tv;
-       ssize_t written;
-       ssize_t rc;
-       fd_set set;
-
-       tv.tv_sec = timeout / 1000;
-       tv.tv_usec = (timeout % 1000) * 1000000;
-
-       mei_msg(me, "call write length = %zd\n", len);
-
-       written = write(me->fd, buffer, len);
-       if (written < 0) {
-               rc = -errno;
-               mei_err(me, "write failed with status %zd %s\n",
-                       written, strerror(errno));
-               goto out;
-       }
-
-       FD_ZERO(&set);
-       FD_SET(me->fd, &set);
-       rc = select(me->fd + 1 , &set, NULL, NULL, &tv);
-       if (rc > 0 && FD_ISSET(me->fd, &set)) {
-               mei_msg(me, "write success\n");
-       } else if (rc == 0) {
-               mei_err(me, "write failed on timeout with status\n");
-               goto out;
-       } else { /* rc < 0 */
-               mei_err(me, "write failed on select with status %zd\n", rc);
-               goto out;
-       }
-
-       rc = written;
-out:
-       if (rc < 0)
-               mei_deinit(me);
-
-       return rc;
-}
-
-/***************************************************************************
- * Intel Advanced Management Technology ME Client
- ***************************************************************************/
-
-#define AMT_MAJOR_VERSION 1
-#define AMT_MINOR_VERSION 1
-
-#define AMT_STATUS_SUCCESS                0x0
-#define AMT_STATUS_INTERNAL_ERROR         0x1
-#define AMT_STATUS_NOT_READY              0x2
-#define AMT_STATUS_INVALID_AMT_MODE       0x3
-#define AMT_STATUS_INVALID_MESSAGE_LENGTH 0x4
-
-#define AMT_STATUS_HOST_IF_EMPTY_RESPONSE  0x4000
-#define AMT_STATUS_SDK_RESOURCES      0x1004
-
-
-#define AMT_BIOS_VERSION_LEN   65
-#define AMT_VERSIONS_NUMBER    50
-#define AMT_UNICODE_STRING_LEN 20
-
-struct amt_unicode_string {
-       uint16_t length;
-       char string[AMT_UNICODE_STRING_LEN];
-} __attribute__((packed));
-
-struct amt_version_type {
-       struct amt_unicode_string description;
-       struct amt_unicode_string version;
-} __attribute__((packed));
-
-struct amt_version {
-       uint8_t major;
-       uint8_t minor;
-} __attribute__((packed));
-
-struct amt_code_versions {
-       uint8_t bios[AMT_BIOS_VERSION_LEN];
-       uint32_t count;
-       struct amt_version_type versions[AMT_VERSIONS_NUMBER];
-} __attribute__((packed));
-
-/***************************************************************************
- * Intel Advanced Management Technology Host Interface
- ***************************************************************************/
-
-struct amt_host_if_msg_header {
-       struct amt_version version;
-       uint16_t _reserved;
-       uint32_t command;
-       uint32_t length;
-} __attribute__((packed));
-
-struct amt_host_if_resp_header {
-       struct amt_host_if_msg_header header;
-       uint32_t status;
-       unsigned char data[0];
-} __attribute__((packed));
-
-const uuid_le MEI_IAMTHIF = UUID_LE(0x12f80028, 0xb4b7, 0x4b2d,  \
-                               0xac, 0xa8, 0x46, 0xe0, 0xff, 0x65, 0x81, 0x4c);
-
-#define AMT_HOST_IF_CODE_VERSIONS_REQUEST  0x0400001A
-#define AMT_HOST_IF_CODE_VERSIONS_RESPONSE 0x0480001A
-
-const struct amt_host_if_msg_header CODE_VERSION_REQ = {
-       .version = {AMT_MAJOR_VERSION, AMT_MINOR_VERSION},
-       ._reserved = 0,
-       .command = AMT_HOST_IF_CODE_VERSIONS_REQUEST,
-       .length = 0
-};
-
-
-struct amt_host_if {
-       struct mei mei_cl;
-       unsigned long send_timeout;
-       bool initialized;
-};
-
-
-static bool amt_host_if_init(struct amt_host_if *acmd,
-                     unsigned long send_timeout, bool verbose)
-{
-       acmd->send_timeout = (send_timeout) ? send_timeout : 20000;
-       acmd->initialized = mei_init(&acmd->mei_cl, &MEI_IAMTHIF, 0, verbose);
-       return acmd->initialized;
-}
-
-static void amt_host_if_deinit(struct amt_host_if *acmd)
-{
-       mei_deinit(&acmd->mei_cl);
-       acmd->initialized = false;
-}
-
-static uint32_t amt_verify_code_versions(const struct amt_host_if_resp_header *resp)
-{
-       uint32_t status = AMT_STATUS_SUCCESS;
-       struct amt_code_versions *code_ver;
-       size_t code_ver_len;
-       uint32_t ver_type_cnt;
-       uint32_t len;
-       uint32_t i;
-
-       code_ver = (struct amt_code_versions *)resp->data;
-       /* length - sizeof(status) */
-       code_ver_len = resp->header.length - sizeof(uint32_t);
-       ver_type_cnt = code_ver_len -
-                       sizeof(code_ver->bios) -
-                       sizeof(code_ver->count);
-       if (code_ver->count != ver_type_cnt / sizeof(struct amt_version_type)) {
-               status = AMT_STATUS_INTERNAL_ERROR;
-               goto out;
-       }
-
-       for (i = 0; i < code_ver->count; i++) {
-               len = code_ver->versions[i].description.length;
-
-               if (len > AMT_UNICODE_STRING_LEN) {
-                       status = AMT_STATUS_INTERNAL_ERROR;
-                       goto out;
-               }
-
-               len = code_ver->versions[i].version.length;
-               if (code_ver->versions[i].version.string[len] != '\0' ||
-                   len != strlen(code_ver->versions[i].version.string)) {
-                       status = AMT_STATUS_INTERNAL_ERROR;
-                       goto out;
-               }
-       }
-out:
-       return status;
-}
-
-static uint32_t amt_verify_response_header(uint32_t command,
-                               const struct amt_host_if_msg_header *resp_hdr,
-                               uint32_t response_size)
-{
-       if (response_size < sizeof(struct amt_host_if_resp_header)) {
-               return AMT_STATUS_INTERNAL_ERROR;
-       } else if (response_size != (resp_hdr->length +
-                               sizeof(struct amt_host_if_msg_header))) {
-               return AMT_STATUS_INTERNAL_ERROR;
-       } else if (resp_hdr->command != command) {
-               return AMT_STATUS_INTERNAL_ERROR;
-       } else if (resp_hdr->_reserved != 0) {
-               return AMT_STATUS_INTERNAL_ERROR;
-       } else if (resp_hdr->version.major != AMT_MAJOR_VERSION ||
-                  resp_hdr->version.minor < AMT_MINOR_VERSION) {
-               return AMT_STATUS_INTERNAL_ERROR;
-       }
-       return AMT_STATUS_SUCCESS;
-}
-
-static uint32_t amt_host_if_call(struct amt_host_if *acmd,
-                       const unsigned char *command, ssize_t command_sz,
-                       uint8_t **read_buf, uint32_t rcmd,
-                       unsigned int expected_sz)
-{
-       uint32_t in_buf_sz;
-       uint32_t out_buf_sz;
-       ssize_t written;
-       uint32_t status;
-       struct amt_host_if_resp_header *msg_hdr;
-
-       in_buf_sz = acmd->mei_cl.buf_size;
-       *read_buf = (uint8_t *)malloc(sizeof(uint8_t) * in_buf_sz);
-       if (*read_buf == NULL)
-               return AMT_STATUS_SDK_RESOURCES;
-       memset(*read_buf, 0, in_buf_sz);
-       msg_hdr = (struct amt_host_if_resp_header *)*read_buf;
-
-       written = mei_send_msg(&acmd->mei_cl,
-                               command, command_sz, acmd->send_timeout);
-       if (written != command_sz)
-               return AMT_STATUS_INTERNAL_ERROR;
-
-       out_buf_sz = mei_recv_msg(&acmd->mei_cl, *read_buf, in_buf_sz, 2000);
-       if (out_buf_sz <= 0)
-               return AMT_STATUS_HOST_IF_EMPTY_RESPONSE;
-
-       status = msg_hdr->status;
-       if (status != AMT_STATUS_SUCCESS)
-               return status;
-
-       status = amt_verify_response_header(rcmd,
-                               &msg_hdr->header, out_buf_sz);
-       if (status != AMT_STATUS_SUCCESS)
-               return status;
-
-       if (expected_sz && expected_sz != out_buf_sz)
-               return AMT_STATUS_INTERNAL_ERROR;
-
-       return AMT_STATUS_SUCCESS;
-}
-
-
-static uint32_t amt_get_code_versions(struct amt_host_if *cmd,
-                              struct amt_code_versions *versions)
-{
-       struct amt_host_if_resp_header *response = NULL;
-       uint32_t status;
-
-       status = amt_host_if_call(cmd,
-                       (const unsigned char *)&CODE_VERSION_REQ,
-                       sizeof(CODE_VERSION_REQ),
-                       (uint8_t **)&response,
-                       AMT_HOST_IF_CODE_VERSIONS_RESPONSE, 0);
-
-       if (status != AMT_STATUS_SUCCESS)
-               goto out;
-
-       status = amt_verify_code_versions(response);
-       if (status != AMT_STATUS_SUCCESS)
-               goto out;
-
-       memcpy(versions, response->data, sizeof(struct amt_code_versions));
-out:
-       if (response != NULL)
-               free(response);
-
-       return status;
-}
-
-/************************** end of amt_host_if_command ***********************/
-int main(int argc, char **argv)
-{
-       struct amt_code_versions ver;
-       struct amt_host_if acmd;
-       unsigned int i;
-       uint32_t status;
-       int ret;
-       bool verbose;
-
-       verbose = (argc > 1 && strcmp(argv[1], "-v") == 0);
-
-       if (!amt_host_if_init(&acmd, 5000, verbose)) {
-               ret = 1;
-               goto out;
-       }
-
-       status = amt_get_code_versions(&acmd, &ver);
-
-       amt_host_if_deinit(&acmd);
-
-       switch (status) {
-       case AMT_STATUS_HOST_IF_EMPTY_RESPONSE:
-               printf("Intel AMT: DISABLED\n");
-               ret = 0;
-               break;
-       case AMT_STATUS_SUCCESS:
-               printf("Intel AMT: ENABLED\n");
-               for (i = 0; i < ver.count; i++) {
-                       printf("%s:\t%s\n", ver.versions[i].description.string,
-                               ver.versions[i].version.string);
-               }
-               ret = 0;
-               break;
-       default:
-               printf("An error has occurred\n");
-               ret = 1;
-               break;
-       }
-
-out:
-       return ret;
-}
index a7697783ac4c5ddba978e6438c01ca30a0c358c6..c6beb5f1637f9606475bb22746944333e5345b92 100644 (file)
@@ -10,8 +10,6 @@ LICENSE.qlge
        - GPLv2 for QLogic Linux qlge NIC Driver
 LICENSE.qlcnic
        - GPLv2 for QLogic Linux qlcnic NIC Driver
-Makefile
-       - Makefile for docsrc.
 PLIP.txt
        - PLIP: The Parallel Line Internet Protocol device driver
 README.ipw2100
diff --git a/Documentation/networking/Makefile b/Documentation/networking/Makefile
deleted file mode 100644 (file)
index 4c5d7c4..0000000
+++ /dev/null
@@ -1 +0,0 @@
-subdir-y := timestamping
index 8a8d3d96f6c6b0fb62e2a4651a6528c6ed321113..ccf94677b240973f35c4cd9e948c96b92163037e 100644 (file)
@@ -32,7 +32,7 @@ compatible interfaces. Once found, it will create  subfolders  in
 the /sys directories of each supported interface, e.g.
 
 # ls /sys/class/net/eth0/batman_adv/
-# iface_status  mesh_iface
+# elp_interval  iface_status  mesh_iface  throughput_override
 
 If an interface does not have the "batman_adv" subfolder it prob-
 ably is not supported. Not supported  interfaces  are:  loopback,
@@ -71,17 +71,19 @@ All  mesh  wide  settings  can be found in batman's own interface
 folder:
 
 # ls /sys/class/net/bat0/mesh/
-#aggregated_ogms        distributed_arp_table  gw_sel_class    orig_interval
-#ap_isolation           fragmentation          hop_penalty     routing_algo
-#bonding                gw_bandwidth           isolation_mark  vlan0
-#bridge_loop_avoidance  gw_mode                log_level
+# aggregated_ogms        fragmentation  isolation_mark  routing_algo
+# ap_isolation           gw_bandwidth   log_level       vlan0
+# bonding                gw_mode        multicast_mode
+# bridge_loop_avoidance  gw_sel_class   network_coding
+# distributed_arp_table  hop_penalty    orig_interval
 
 There is a special folder for debugging information:
 
 # ls /sys/kernel/debug/batman_adv/bat0/
-# bla_backbone_table  log                 transtable_global
-# bla_claim_table     originators         transtable_local
-# gateways            socket
+# bla_backbone_table  log          neighbors          transtable_local
+# bla_claim_table     mcast_flags  originators
+# dat_cache           nc           socket
+# gateways            nc_nodes     transtable_global
 
 Some of the files contain all sort of status information  regard-
 ing  the  mesh  network.  For  example, you can view the table of
@@ -159,13 +161,16 @@ file in debugfs
 The additional debug output is by default disabled. It can be en-
 abled  during run time. Following log_levels are defined:
 
-0 - All  debug  output  disabled
-1 - Enable messages related to routing / flooding / broadcasting
-2 - Enable messages related to route added / changed / deleted
-4 - Enable messages related to translation table operations
-8 - Enable messages related to bridge loop avoidance
-16 - Enable messaged related to DAT, ARP snooping and parsing
-31 - Enable all messages
+  0 - All  debug  output  disabled
+  1 - Enable messages related to routing / flooding / broadcasting
+  2 - Enable messages related to route added / changed / deleted
+  4 - Enable messages related to translation table operations
+  8 - Enable messages related to bridge loop avoidance
+ 16 - Enable messages related to DAT, ARP snooping and parsing
+ 32 - Enable messages related to network coding
+ 64 - Enable messages related to multicast
+128 - Enable messages related to throughput meter
+255 - Enable all messages
 
 The debug output can be changed at runtime  using  the  file
 /sys/class/net/bat0/mesh/log_level. e.g.
index 6d6c07cf1a9aed11628ac9824c65b84332a30896..63912ef346069b228b984c2f2d1a70f0c9c6ffc0 100644 (file)
@@ -67,13 +67,14 @@ Note that DSA does not currently create network interfaces for the "cpu" and
 Switch tagging protocols
 ------------------------
 
-DSA currently supports 4 different tagging protocols, and a tag-less mode as
+DSA currently supports 5 different tagging protocols, and a tag-less mode as
 well. The different protocols are implemented in:
 
 net/dsa/tag_trailer.c: Marvell's 4 trailer tag mode (legacy)
 net/dsa/tag_dsa.c: Marvell's original DSA tag
 net/dsa/tag_edsa.c: Marvell's enhanced DSA tag
 net/dsa/tag_brcm.c: Broadcom's 4 bytes tag
+net/dsa/tag_qca.c: Qualcomm's 2 bytes tag
 
 The exact format of the tag protocol is vendor specific, but in general, they
 all contain something which:
index 3db8c67d2c8db9bd952c551010805b1f92d8f777..5ca567fa6b8cb1570d73c26f79ea370fdd4ca97f 100644 (file)
@@ -610,8 +610,13 @@ tcp_syn_retries - INTEGER
        with the current initial RTO of 1second. With this the final timeout
        for an active TCP connection attempt will happen after 127seconds.
 
-tcp_timestamps - BOOLEAN
-       Enable timestamps as defined in RFC1323.
+tcp_timestamps - INTEGER
+Enable timestamps as defined in RFC1323.
+       0: Disabled.
+       1: Enable timestamps as defined in RFC1323 and use random offset for
+       each connection rather than only using the current time.
+       2: Like 1, but without random offsets.
+       Default: 1
 
 tcp_min_tso_segs - INTEGER
        Minimal number of segments per TSO frame.
@@ -967,6 +972,21 @@ igmp_qrv - INTEGER
        Default: 2 (as specified by RFC2236 8.1)
        Minimum: 1 (as specified by RFC6636 4.5)
 
+force_igmp_version - INTEGER
+       0 - (default) No enforcement of a IGMP version, IGMPv1/v2 fallback
+           allowed. Will back to IGMPv3 mode again if all IGMPv1/v2 Querier
+           Present timer expires.
+       1 - Enforce to use IGMP version 1. Will also reply IGMPv1 report if
+           receive IGMPv2/v3 query.
+       2 - Enforce to use IGMP version 2. Will fallback to IGMPv1 if receive
+           IGMPv1 query message. Will reply report if receive IGMPv3 query.
+       3 - Enforce to use IGMP version 3. The same react with default 0.
+
+       Note: this is not the same with force_mld_version because IGMPv3 RFC3376
+       Security Considerations does not have clear description that we could
+       ignore other version messages completely as MLDv2 RFC3810. So make
+       this value as default 0 is recommended.
+
 conf/interface/*  changes special settings per interface (where
 "interface" is the name of your network interface)
 
index 24ac91d56698d626fb266556c5b0c3b9fd212fa9..3566a725d19c7dd704b929206f2a0aad81a10dfa 100644 (file)
@@ -60,7 +60,7 @@ modprobe mac80211_hwsim
 hostapd hostapd.conf
 
 # Run wpa_supplicant (station) for wlan1
-wpa_supplicant -Dwext -iwlan1 -c wpa_supplicant.conf
+wpa_supplicant -Dnl80211 -iwlan1 -c wpa_supplicant.conf
 
 
 More test cases are available in hostap.git:
index 0fe1c6e0dbcd58fccdcc953477da11e2d6598358..a20b2fae942b29e21fba5935f47777b391ecd65f 100644 (file)
@@ -29,8 +29,8 @@ A: There are always two trees (git repositories) in play.  Both are driven
    Linus, and net-next is where the new code goes for the future release.
    You can find the trees here:
 
-       http://git.kernel.org/?p=linux/kernel/git/davem/net.git
-       http://git.kernel.org/?p=linux/kernel/git/davem/net-next.git
+        https://git.kernel.org/pub/scm/linux/kernel/git/davem/net.git
+        https://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git
 
 Q: How often do changes from these trees make it to the mainline Linus tree?
 
@@ -76,7 +76,7 @@ Q: So where are we now in this cycle?
 
 A: Load the mainline (Linus) page here:
 
-       http://git.kernel.org/?p=linux/kernel/git/torvalds/linux.git
+       https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
 
    and note the top of the "tags" section.  If it is rc1, it is early
    in the dev cycle.  If it was tagged rc7 a week ago, then a release
@@ -123,7 +123,7 @@ A: Normally Greg Kroah-Hartman collects stable commits himself, but
 
    It contains the patches which Dave has selected, but not yet handed
    off to Greg.  If Greg already has the patch, then it will be here:
-       http://git.kernel.org/cgit/linux/kernel/git/stable/stable-queue.git
+       https://git.kernel.org/pub/scm/linux/kernel/git/stable/stable-queue.git
 
    A quick way to find whether the patch is in this stable-queue is
    to simply clone the repo, and then git grep the mainline commit ID, e.g.
index 4fb51d32fccc2acd4ebaa1347dcf51fb49cab90d..399e4e866a9c4df6104d8a048b5d2546db0da0f4 100644 (file)
@@ -33,24 +33,6 @@ nf_conntrack_events - BOOLEAN
        If this option is enabled, the connection tracking code will
        provide userspace with connection tracking events via ctnetlink.
 
-nf_conntrack_events_retry_timeout - INTEGER (seconds)
-       default 15
-
-       This option is only relevant when "reliable connection tracking
-       events" are used.  Normally, ctnetlink is "lossy", that is,
-       events are normally dropped when userspace listeners can't keep up.
-
-       Userspace can request "reliable event mode".  When this mode is
-       active, the conntrack will only be destroyed after the event was
-       delivered.  If event delivery fails, the kernel periodically
-       re-tries to send the event to userspace.
-
-       This is the maximum interval the kernel should use when re-trying
-       to deliver the destroy event.
-
-       A higher number means there will be fewer delivery retries and it
-       will take longer for a backlog to be processed.
-
 nf_conntrack_expect_max - INTEGER
        Maximum size of expectation table.  Default value is
        nf_conntrack_buckets / 256. Minimum is 1.
index 7ab9404a8412088cc9bc9446ea833779b9284697..e017d933d53082b8724bdf24918aede93b02c283 100644 (file)
@@ -65,6 +65,83 @@ The MDIO bus
  drivers/net/ethernet/freescale/fsl_pq_mdio.c and an associated DTS file
  for one of the users. (e.g. "git grep fsl,.*-mdio arch/powerpc/boot/dts/")
 
+(RG)MII/electrical interface considerations
+
+ The Reduced Gigabit Medium Independent Interface (RGMII) is a 12-pin
+ electrical signal interface using a synchronous 125Mhz clock signal and several
+ data lines. Due to this design decision, a 1.5ns to 2ns delay must be added
+ between the clock line (RXC or TXC) and the data lines to let the PHY (clock
+ sink) have enough setup and hold times to sample the data lines correctly. The
+ PHY library offers different types of PHY_INTERFACE_MODE_RGMII* values to let
+ the PHY driver and optionally the MAC driver, implement the required delay. The
+ values of phy_interface_t must be understood from the perspective of the PHY
+ device itself, leading to the following:
+
+ * PHY_INTERFACE_MODE_RGMII: the PHY is not responsible for inserting any
+   internal delay by itself, it assumes that either the Ethernet MAC (if capable
+   or the PCB traces) insert the correct 1.5-2ns delay
+
+ * PHY_INTERFACE_MODE_RGMII_TXID: the PHY should insert an internal delay
+   for the transmit data lines (TXD[3:0]) processed by the PHY device
+
+ * PHY_INTERFACE_MODE_RGMII_RXID: the PHY should insert an internal delay
+   for the receive data lines (RXD[3:0]) processed by the PHY device
+
+ * PHY_INTERFACE_MODE_RGMII_ID: the PHY should insert internal delays for
+   both transmit AND receive data lines from/to the PHY device
+
+ Whenever possible, use the PHY side RGMII delay for these reasons:
+
+ * PHY devices may offer sub-nanosecond granularity in how they allow a
+   receiver/transmitter side delay (e.g: 0.5, 1.0, 1.5ns) to be specified. Such
+   precision may be required to account for differences in PCB trace lengths
+
+ * PHY devices are typically qualified for a large range of applications
+   (industrial, medical, automotive...), and they provide a constant and
+   reliable delay across temperature/pressure/voltage ranges
+
+ * PHY device drivers in PHYLIB being reusable by nature, being able to
+   configure correctly a specified delay enables more designs with similar delay
+   requirements to be operate correctly
+
+ For cases where the PHY is not capable of providing this delay, but the
+ Ethernet MAC driver is capable of doing so, the correct phy_interface_t value
+ should be PHY_INTERFACE_MODE_RGMII, and the Ethernet MAC driver should be
+ configured correctly in order to provide the required transmit and/or receive
+ side delay from the perspective of the PHY device. Conversely, if the Ethernet
+ MAC driver looks at the phy_interface_t value, for any other mode but
+ PHY_INTERFACE_MODE_RGMII, it should make sure that the MAC-level delays are
+ disabled.
+
+ In case neither the Ethernet MAC, nor the PHY are capable of providing the
+ required delays, as defined per the RGMII standard, several options may be
+ available:
+
+ * Some SoCs may offer a pin pad/mux/controller capable of configuring a given
+   set of pins'strength, delays, and voltage; and it may be a suitable
+   option to insert the expected 2ns RGMII delay.
+
+ * Modifying the PCB design to include a fixed delay (e.g: using a specifically
+   designed serpentine), which may not require software configuration at all.
+
+Common problems with RGMII delay mismatch
+
+ When there is a RGMII delay mismatch between the Ethernet MAC and the PHY, this
+ will most likely result in the clock and data line signals to be unstable when
+ the PHY or MAC take a snapshot of these signals to translate them into logical
+ 1 or 0 states and reconstruct the data being transmitted/received. Typical
+ symptoms include:
+
+ * Transmission/reception partially works, and there is frequent or occasional
+   packet loss observed
+
+ * Ethernet MAC may report some or all packets ingressing with a FCS/CRC error,
+   or just discard them all
+
+ * Switching to lower speeds such as 10/100Mbits/sec makes the problem go away
+   (since there is enough setup/hold time in that case)
+
+
 Connecting to a PHY
 
  Sometime during startup, the network driver needs to establish a connection
@@ -127,8 +204,9 @@ Letting the PHY Abstraction Layer do Everything
  values pruned from them which don't make sense for your controller (a 10/100
  controller may be connected to a gigabit capable PHY, so you would need to
  mask off SUPPORTED_1000baseT*).  See include/linux/ethtool.h for definitions
- for these bitfields. Note that you should not SET any bits, or the PHY may
- get put into an unsupported state.
+ for these bitfields. Note that you should not SET any bits, except the
+ SUPPORTED_Pause and SUPPORTED_AsymPause bits (see below), or the PHY may get
+ put into an unsupported state.
 
  Lastly, once the controller is ready to handle network traffic, you call
  phy_start(phydev).  This tells the PAL that you are ready, and configures the
@@ -139,6 +217,19 @@ Letting the PHY Abstraction Layer do Everything
  When you want to disconnect from the network (even if just briefly), you call
  phy_stop(phydev).
 
+Pause frames / flow control
+
+ The PHY does not participate directly in flow control/pause frames except by
+ making sure that the SUPPORTED_Pause and SUPPORTED_AsymPause bits are set in
+ MII_ADVERTISE to indicate towards the link partner that the Ethernet MAC
+ controller supports such a thing. Since flow control/pause frames generation
+ involves the Ethernet MAC driver, it is recommended that this driver takes care
+ of properly indicating advertisement and support for such features by setting
+ the SUPPORTED_Pause and SUPPORTED_AsymPause bits accordingly. This can be done
+ either before or after phy_connect() and/or as a result of implementing the
+ ethtool::set_pauseparam feature.
+
+
 Keeping Close Tabs on the PAL
 
  It is possible that the PAL's built-in state machine needs a little help to
@@ -251,39 +342,8 @@ Writing a PHY driver
  PHY_BASIC_FEATURES, but you can look in include/mii.h for other
  features.
 
- Each driver consists of a number of function pointers:
-
-   soft_reset: perform a PHY software reset
-   config_init: configures PHY into a sane state after a reset.
-     For instance, a Davicom PHY requires descrambling disabled.
-   probe: Allocate phy->priv, optionally refuse to bind.
-   PHY may not have been reset or had fixups run yet.
-   suspend/resume: power management
-   config_aneg: Changes the speed/duplex/negotiation settings
-   aneg_done: Determines the auto-negotiation result
-   read_status: Reads the current speed/duplex/negotiation settings
-   ack_interrupt: Clear a pending interrupt
-   did_interrupt: Checks if the PHY generated an interrupt
-   config_intr: Enable or disable interrupts
-   remove: Does any driver take-down
-   ts_info: Queries about the HW timestamping status
-   match_phy_device: used for Clause 45 capable PHYs to match devices
-   in package and ensure they are compatible
-   hwtstamp: Set the PHY HW timestamping configuration
-   rxtstamp: Requests a receive timestamp at the PHY level for a 'skb'
-   txtsamp: Requests a transmit timestamp at the PHY level for a 'skb'
-   set_wol: Enable Wake-on-LAN at the PHY level
-   get_wol: Get the Wake-on-LAN status at the PHY level
-   link_change_notify: called to inform the core is about to change the
-   link state, can be used to work around bogus PHY between state changes
-   read_mmd_indirect: Read PHY MMD indirect register
-   write_mmd_indirect: Write PHY MMD indirect register
-   module_info: Get the size and type of an EEPROM contained in an plug-in
-   module
-   module_eeprom: Get EEPROM information of a plug-in module
-   get_sset_count: Get number of strings sets that get_strings will count
-   get_strings: Get strings from requested objects (statistics)
-   get_stats: Get the extended statistics from the PHY device
+ Each driver consists of a number of function pointers, documented
+ in include/linux/phy.h under the phy_driver structure.
 
  Of these, only config_aneg and read_status are required to be
  assigned by the driver code.  The rest are optional.  Also, it is
@@ -347,3 +407,13 @@ Board Fixups
  The stubs set one of the two matching criteria, and set the other one to
  match anything.
 
+Standards
+
+ IEEE Standard 802.3: CSMA/CD Access Method and Physical Layer Specifications, Section Two:
+ http://standards.ieee.org/getieee802/download/802.3-2008_section2.pdf
+
+ RGMII v1.3:
+ http://web.archive.org/web/20160303212629/http://www.hp.com/rnd/pdfs/RGMIIv1_3.pdf
+
+ RGMII v2.0:
+ http://web.archive.org/web/20160303171328/http://www.hp.com/rnd/pdfs/RGMIIv2_0_final_hp.pdf
diff --git a/Documentation/networking/seg6-sysctl.txt b/Documentation/networking/seg6-sysctl.txt
new file mode 100644 (file)
index 0000000..bdbde23
--- /dev/null
@@ -0,0 +1,18 @@
+/proc/sys/net/conf/<iface>/seg6_* variables:
+
+seg6_enabled - BOOL
+       Accept or drop SR-enabled IPv6 packets on this interface.
+
+       Relevant packets are those with SRH present and DA = local.
+
+       0 - disabled (default)
+       not 0 - enabled
+
+seg6_require_hmac - INTEGER
+       Define HMAC policy for ingress SR-enabled packets on this interface.
+
+       -1 - Ignore HMAC field
+       0 - Accept SR packets without HMAC, validate SR packets with HMAC
+       1 - Drop SR packets without HMAC, validate SR packets with HMAC
+
+       Default is 0.
index 671cccf0dcd2677eaa04789f25de11700c6fd65b..96f50694a74837735a60a5b83d3b83a58ae72c23 100644 (file)
@@ -182,6 +182,16 @@ SOF_TIMESTAMPING_OPT_TSONLY:
   the timestamp even if sysctl net.core.tstamp_allow_data is 0.
   This option disables SOF_TIMESTAMPING_OPT_CMSG.
 
+SOF_TIMESTAMPING_OPT_STATS:
+
+  Optional stats that are obtained along with the transmit timestamps.
+  It must be used together with SOF_TIMESTAMPING_OPT_TSONLY. When the
+  transmit timestamp is available, the stats are available in a
+  separate control message of type SCM_TIMESTAMPING_OPT_STATS, as a
+  list of TLVs (struct nlattr) of types. These stats allow the
+  application to associate various transport layer stats with
+  the transmit timestamps, such as how long a certain block of
+  data was limited by peer's receiver window.
 
 New applications are encouraged to pass SOF_TIMESTAMPING_OPT_ID to
 disambiguate timestamps and SOF_TIMESTAMPING_OPT_TSONLY to operate
diff --git a/Documentation/networking/timestamping/.gitignore b/Documentation/networking/timestamping/.gitignore
deleted file mode 100644 (file)
index 9e69e98..0000000
+++ /dev/null
@@ -1,3 +0,0 @@
-timestamping
-txtimestamp
-hwtstamp_config
diff --git a/Documentation/networking/timestamping/Makefile b/Documentation/networking/timestamping/Makefile
deleted file mode 100644 (file)
index 8c20dfa..0000000
+++ /dev/null
@@ -1,14 +0,0 @@
-# To compile, from the source root
-#
-#    make headers_install
-#    make M=documentation
-
-# List of programs to build
-hostprogs-y := hwtstamp_config timestamping txtimestamp
-
-# Tell kbuild to always build the programs
-always := $(hostprogs-y)
-
-HOSTCFLAGS_timestamping.o += -I$(objtree)/usr/include
-HOSTCFLAGS_txtimestamp.o += -I$(objtree)/usr/include
-HOSTCFLAGS_hwtstamp_config.o += -I$(objtree)/usr/include
diff --git a/Documentation/networking/timestamping/hwtstamp_config.c b/Documentation/networking/timestamping/hwtstamp_config.c
deleted file mode 100644 (file)
index e8b685a..0000000
+++ /dev/null
@@ -1,134 +0,0 @@
-/* Test program for SIOC{G,S}HWTSTAMP
- * Copyright 2013 Solarflare Communications
- * Author: Ben Hutchings
- */
-
-#include <errno.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include <sys/socket.h>
-#include <sys/ioctl.h>
-
-#include <linux/if.h>
-#include <linux/net_tstamp.h>
-#include <linux/sockios.h>
-
-static int
-lookup_value(const char **names, int size, const char *name)
-{
-       int value;
-
-       for (value = 0; value < size; value++)
-               if (names[value] && strcasecmp(names[value], name) == 0)
-                       return value;
-
-       return -1;
-}
-
-static const char *
-lookup_name(const char **names, int size, int value)
-{
-       return (value >= 0 && value < size) ? names[value] : NULL;
-}
-
-static void list_names(FILE *f, const char **names, int size)
-{
-       int value;
-
-       for (value = 0; value < size; value++)
-               if (names[value])
-                       fprintf(f, "    %s\n", names[value]);
-}
-
-static const char *tx_types[] = {
-#define TX_TYPE(name) [HWTSTAMP_TX_ ## name] = #name
-       TX_TYPE(OFF),
-       TX_TYPE(ON),
-       TX_TYPE(ONESTEP_SYNC)
-#undef TX_TYPE
-};
-#define N_TX_TYPES ((int)(sizeof(tx_types) / sizeof(tx_types[0])))
-
-static const char *rx_filters[] = {
-#define RX_FILTER(name) [HWTSTAMP_FILTER_ ## name] = #name
-       RX_FILTER(NONE),
-       RX_FILTER(ALL),
-       RX_FILTER(SOME),
-       RX_FILTER(PTP_V1_L4_EVENT),
-       RX_FILTER(PTP_V1_L4_SYNC),
-       RX_FILTER(PTP_V1_L4_DELAY_REQ),
-       RX_FILTER(PTP_V2_L4_EVENT),
-       RX_FILTER(PTP_V2_L4_SYNC),
-       RX_FILTER(PTP_V2_L4_DELAY_REQ),
-       RX_FILTER(PTP_V2_L2_EVENT),
-       RX_FILTER(PTP_V2_L2_SYNC),
-       RX_FILTER(PTP_V2_L2_DELAY_REQ),
-       RX_FILTER(PTP_V2_EVENT),
-       RX_FILTER(PTP_V2_SYNC),
-       RX_FILTER(PTP_V2_DELAY_REQ),
-#undef RX_FILTER
-};
-#define N_RX_FILTERS ((int)(sizeof(rx_filters) / sizeof(rx_filters[0])))
-
-static void usage(void)
-{
-       fputs("Usage: hwtstamp_config if_name [tx_type rx_filter]\n"
-             "tx_type is any of (case-insensitive):\n",
-             stderr);
-       list_names(stderr, tx_types, N_TX_TYPES);
-       fputs("rx_filter is any of (case-insensitive):\n", stderr);
-       list_names(stderr, rx_filters, N_RX_FILTERS);
-}
-
-int main(int argc, char **argv)
-{
-       struct ifreq ifr;
-       struct hwtstamp_config config;
-       const char *name;
-       int sock;
-
-       if ((argc != 2 && argc != 4) || (strlen(argv[1]) >= IFNAMSIZ)) {
-               usage();
-               return 2;
-       }
-
-       if (argc == 4) {
-               config.flags = 0;
-               config.tx_type = lookup_value(tx_types, N_TX_TYPES, argv[2]);
-               config.rx_filter = lookup_value(rx_filters, N_RX_FILTERS, argv[3]);
-               if (config.tx_type < 0 || config.rx_filter < 0) {
-                       usage();
-                       return 2;
-               }
-       }
-
-       sock = socket(AF_INET, SOCK_DGRAM, 0);
-       if (sock < 0) {
-               perror("socket");
-               return 1;
-       }
-
-       strcpy(ifr.ifr_name, argv[1]);
-       ifr.ifr_data = (caddr_t)&config;
-
-       if (ioctl(sock, (argc == 2) ? SIOCGHWTSTAMP : SIOCSHWTSTAMP, &ifr)) {
-               perror("ioctl");
-               return 1;
-       }
-
-       printf("flags = %#x\n", config.flags);
-       name = lookup_name(tx_types, N_TX_TYPES, config.tx_type);
-       if (name)
-               printf("tx_type = %s\n", name);
-       else
-               printf("tx_type = %d\n", config.tx_type);
-       name = lookup_name(rx_filters, N_RX_FILTERS, config.rx_filter);
-       if (name)
-               printf("rx_filter = %s\n", name);
-       else
-               printf("rx_filter = %d\n", config.rx_filter);
-
-       return 0;
-}
diff --git a/Documentation/networking/timestamping/timestamping.c b/Documentation/networking/timestamping/timestamping.c
deleted file mode 100644 (file)
index 5cdfd74..0000000
+++ /dev/null
@@ -1,528 +0,0 @@
-/*
- * This program demonstrates how the various time stamping features in
- * the Linux kernel work. It emulates the behavior of a PTP
- * implementation in stand-alone master mode by sending PTPv1 Sync
- * multicasts once every second. It looks for similar packets, but
- * beyond that doesn't actually implement PTP.
- *
- * Outgoing packets are time stamped with SO_TIMESTAMPING with or
- * without hardware support.
- *
- * Incoming packets are time stamped with SO_TIMESTAMPING with or
- * without hardware support, SIOCGSTAMP[NS] (per-socket time stamp) and
- * SO_TIMESTAMP[NS].
- *
- * Copyright (C) 2009 Intel Corporation.
- * Author: Patrick Ohly <patrick.ohly@intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <errno.h>
-#include <string.h>
-
-#include <sys/time.h>
-#include <sys/socket.h>
-#include <sys/select.h>
-#include <sys/ioctl.h>
-#include <arpa/inet.h>
-#include <net/if.h>
-
-#include <asm/types.h>
-#include <linux/net_tstamp.h>
-#include <linux/errqueue.h>
-
-#ifndef SO_TIMESTAMPING
-# define SO_TIMESTAMPING         37
-# define SCM_TIMESTAMPING        SO_TIMESTAMPING
-#endif
-
-#ifndef SO_TIMESTAMPNS
-# define SO_TIMESTAMPNS 35
-#endif
-
-#ifndef SIOCGSTAMPNS
-# define SIOCGSTAMPNS 0x8907
-#endif
-
-#ifndef SIOCSHWTSTAMP
-# define SIOCSHWTSTAMP 0x89b0
-#endif
-
-static void usage(const char *error)
-{
-       if (error)
-               printf("invalid option: %s\n", error);
-       printf("timestamping interface option*\n\n"
-              "Options:\n"
-              "  IP_MULTICAST_LOOP - looping outgoing multicasts\n"
-              "  SO_TIMESTAMP - normal software time stamping, ms resolution\n"
-              "  SO_TIMESTAMPNS - more accurate software time stamping\n"
-              "  SOF_TIMESTAMPING_TX_HARDWARE - hardware time stamping of outgoing packets\n"
-              "  SOF_TIMESTAMPING_TX_SOFTWARE - software fallback for outgoing packets\n"
-              "  SOF_TIMESTAMPING_RX_HARDWARE - hardware time stamping of incoming packets\n"
-              "  SOF_TIMESTAMPING_RX_SOFTWARE - software fallback for incoming packets\n"
-              "  SOF_TIMESTAMPING_SOFTWARE - request reporting of software time stamps\n"
-              "  SOF_TIMESTAMPING_RAW_HARDWARE - request reporting of raw HW time stamps\n"
-              "  SIOCGSTAMP - check last socket time stamp\n"
-              "  SIOCGSTAMPNS - more accurate socket time stamp\n");
-       exit(1);
-}
-
-static void bail(const char *error)
-{
-       printf("%s: %s\n", error, strerror(errno));
-       exit(1);
-}
-
-static const unsigned char sync[] = {
-       0x00, 0x01, 0x00, 0x01,
-       0x5f, 0x44, 0x46, 0x4c,
-       0x54, 0x00, 0x00, 0x00,
-       0x00, 0x00, 0x00, 0x00,
-       0x00, 0x00, 0x00, 0x00,
-       0x01, 0x01,
-
-       /* fake uuid */
-       0x00, 0x01,
-       0x02, 0x03, 0x04, 0x05,
-
-       0x00, 0x01, 0x00, 0x37,
-       0x00, 0x00, 0x00, 0x08,
-       0x00, 0x00, 0x00, 0x00,
-       0x49, 0x05, 0xcd, 0x01,
-       0x29, 0xb1, 0x8d, 0xb0,
-       0x00, 0x00, 0x00, 0x00,
-       0x00, 0x01,
-
-       /* fake uuid */
-       0x00, 0x01,
-       0x02, 0x03, 0x04, 0x05,
-
-       0x00, 0x00, 0x00, 0x37,
-       0x00, 0x00, 0x00, 0x04,
-       0x44, 0x46, 0x4c, 0x54,
-       0x00, 0x00, 0xf0, 0x60,
-       0x00, 0x01, 0x00, 0x00,
-       0x00, 0x00, 0x00, 0x01,
-       0x00, 0x00, 0xf0, 0x60,
-       0x00, 0x00, 0x00, 0x00,
-       0x00, 0x00, 0x00, 0x04,
-       0x44, 0x46, 0x4c, 0x54,
-       0x00, 0x01,
-
-       /* fake uuid */
-       0x00, 0x01,
-       0x02, 0x03, 0x04, 0x05,
-
-       0x00, 0x00, 0x00, 0x00,
-       0x00, 0x00, 0x00, 0x00,
-       0x00, 0x00, 0x00, 0x00,
-       0x00, 0x00, 0x00, 0x00
-};
-
-static void sendpacket(int sock, struct sockaddr *addr, socklen_t addr_len)
-{
-       struct timeval now;
-       int res;
-
-       res = sendto(sock, sync, sizeof(sync), 0,
-               addr, addr_len);
-       gettimeofday(&now, 0);
-       if (res < 0)
-               printf("%s: %s\n", "send", strerror(errno));
-       else
-               printf("%ld.%06ld: sent %d bytes\n",
-                      (long)now.tv_sec, (long)now.tv_usec,
-                      res);
-}
-
-static void printpacket(struct msghdr *msg, int res,
-                       char *data,
-                       int sock, int recvmsg_flags,
-                       int siocgstamp, int siocgstampns)
-{
-       struct sockaddr_in *from_addr = (struct sockaddr_in *)msg->msg_name;
-       struct cmsghdr *cmsg;
-       struct timeval tv;
-       struct timespec ts;
-       struct timeval now;
-
-       gettimeofday(&now, 0);
-
-       printf("%ld.%06ld: received %s data, %d bytes from %s, %zu bytes control messages\n",
-              (long)now.tv_sec, (long)now.tv_usec,
-              (recvmsg_flags & MSG_ERRQUEUE) ? "error" : "regular",
-              res,
-              inet_ntoa(from_addr->sin_addr),
-              msg->msg_controllen);
-       for (cmsg = CMSG_FIRSTHDR(msg);
-            cmsg;
-            cmsg = CMSG_NXTHDR(msg, cmsg)) {
-               printf("   cmsg len %zu: ", cmsg->cmsg_len);
-               switch (cmsg->cmsg_level) {
-               case SOL_SOCKET:
-                       printf("SOL_SOCKET ");
-                       switch (cmsg->cmsg_type) {
-                       case SO_TIMESTAMP: {
-                               struct timeval *stamp =
-                                       (struct timeval *)CMSG_DATA(cmsg);
-                               printf("SO_TIMESTAMP %ld.%06ld",
-                                      (long)stamp->tv_sec,
-                                      (long)stamp->tv_usec);
-                               break;
-                       }
-                       case SO_TIMESTAMPNS: {
-                               struct timespec *stamp =
-                                       (struct timespec *)CMSG_DATA(cmsg);
-                               printf("SO_TIMESTAMPNS %ld.%09ld",
-                                      (long)stamp->tv_sec,
-                                      (long)stamp->tv_nsec);
-                               break;
-                       }
-                       case SO_TIMESTAMPING: {
-                               struct timespec *stamp =
-                                       (struct timespec *)CMSG_DATA(cmsg);
-                               printf("SO_TIMESTAMPING ");
-                               printf("SW %ld.%09ld ",
-                                      (long)stamp->tv_sec,
-                                      (long)stamp->tv_nsec);
-                               stamp++;
-                               /* skip deprecated HW transformed */
-                               stamp++;
-                               printf("HW raw %ld.%09ld",
-                                      (long)stamp->tv_sec,
-                                      (long)stamp->tv_nsec);
-                               break;
-                       }
-                       default:
-                               printf("type %d", cmsg->cmsg_type);
-                               break;
-                       }
-                       break;
-               case IPPROTO_IP:
-                       printf("IPPROTO_IP ");
-                       switch (cmsg->cmsg_type) {
-                       case IP_RECVERR: {
-                               struct sock_extended_err *err =
-                                       (struct sock_extended_err *)CMSG_DATA(cmsg);
-                               printf("IP_RECVERR ee_errno '%s' ee_origin %d => %s",
-                                       strerror(err->ee_errno),
-                                       err->ee_origin,
-#ifdef SO_EE_ORIGIN_TIMESTAMPING
-                                       err->ee_origin == SO_EE_ORIGIN_TIMESTAMPING ?
-                                       "bounced packet" : "unexpected origin"
-#else
-                                       "probably SO_EE_ORIGIN_TIMESTAMPING"
-#endif
-                                       );
-                               if (res < sizeof(sync))
-                                       printf(" => truncated data?!");
-                               else if (!memcmp(sync, data + res - sizeof(sync),
-                                                       sizeof(sync)))
-                                       printf(" => GOT OUR DATA BACK (HURRAY!)");
-                               break;
-                       }
-                       case IP_PKTINFO: {
-                               struct in_pktinfo *pktinfo =
-                                       (struct in_pktinfo *)CMSG_DATA(cmsg);
-                               printf("IP_PKTINFO interface index %u",
-                                       pktinfo->ipi_ifindex);
-                               break;
-                       }
-                       default:
-                               printf("type %d", cmsg->cmsg_type);
-                               break;
-                       }
-                       break;
-               default:
-                       printf("level %d type %d",
-                               cmsg->cmsg_level,
-                               cmsg->cmsg_type);
-                       break;
-               }
-               printf("\n");
-       }
-
-       if (siocgstamp) {
-               if (ioctl(sock, SIOCGSTAMP, &tv))
-                       printf("   %s: %s\n", "SIOCGSTAMP", strerror(errno));
-               else
-                       printf("SIOCGSTAMP %ld.%06ld\n",
-                              (long)tv.tv_sec,
-                              (long)tv.tv_usec);
-       }
-       if (siocgstampns) {
-               if (ioctl(sock, SIOCGSTAMPNS, &ts))
-                       printf("   %s: %s\n", "SIOCGSTAMPNS", strerror(errno));
-               else
-                       printf("SIOCGSTAMPNS %ld.%09ld\n",
-                              (long)ts.tv_sec,
-                              (long)ts.tv_nsec);
-       }
-}
-
-static void recvpacket(int sock, int recvmsg_flags,
-                      int siocgstamp, int siocgstampns)
-{
-       char data[256];
-       struct msghdr msg;
-       struct iovec entry;
-       struct sockaddr_in from_addr;
-       struct {
-               struct cmsghdr cm;
-               char control[512];
-       } control;
-       int res;
-
-       memset(&msg, 0, sizeof(msg));
-       msg.msg_iov = &entry;
-       msg.msg_iovlen = 1;
-       entry.iov_base = data;
-       entry.iov_len = sizeof(data);
-       msg.msg_name = (caddr_t)&from_addr;
-       msg.msg_namelen = sizeof(from_addr);
-       msg.msg_control = &control;
-       msg.msg_controllen = sizeof(control);
-
-       res = recvmsg(sock, &msg, recvmsg_flags|MSG_DONTWAIT);
-       if (res < 0) {
-               printf("%s %s: %s\n",
-                      "recvmsg",
-                      (recvmsg_flags & MSG_ERRQUEUE) ? "error" : "regular",
-                      strerror(errno));
-       } else {
-               printpacket(&msg, res, data,
-                           sock, recvmsg_flags,
-                           siocgstamp, siocgstampns);
-       }
-}
-
-int main(int argc, char **argv)
-{
-       int so_timestamping_flags = 0;
-       int so_timestamp = 0;
-       int so_timestampns = 0;
-       int siocgstamp = 0;
-       int siocgstampns = 0;
-       int ip_multicast_loop = 0;
-       char *interface;
-       int i;
-       int enabled = 1;
-       int sock;
-       struct ifreq device;
-       struct ifreq hwtstamp;
-       struct hwtstamp_config hwconfig, hwconfig_requested;
-       struct sockaddr_in addr;
-       struct ip_mreq imr;
-       struct in_addr iaddr;
-       int val;
-       socklen_t len;
-       struct timeval next;
-
-       if (argc < 2)
-               usage(0);
-       interface = argv[1];
-
-       for (i = 2; i < argc; i++) {
-               if (!strcasecmp(argv[i], "SO_TIMESTAMP"))
-                       so_timestamp = 1;
-               else if (!strcasecmp(argv[i], "SO_TIMESTAMPNS"))
-                       so_timestampns = 1;
-               else if (!strcasecmp(argv[i], "SIOCGSTAMP"))
-                       siocgstamp = 1;
-               else if (!strcasecmp(argv[i], "SIOCGSTAMPNS"))
-                       siocgstampns = 1;
-               else if (!strcasecmp(argv[i], "IP_MULTICAST_LOOP"))
-                       ip_multicast_loop = 1;
-               else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_TX_HARDWARE"))
-                       so_timestamping_flags |= SOF_TIMESTAMPING_TX_HARDWARE;
-               else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_TX_SOFTWARE"))
-                       so_timestamping_flags |= SOF_TIMESTAMPING_TX_SOFTWARE;
-               else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_RX_HARDWARE"))
-                       so_timestamping_flags |= SOF_TIMESTAMPING_RX_HARDWARE;
-               else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_RX_SOFTWARE"))
-                       so_timestamping_flags |= SOF_TIMESTAMPING_RX_SOFTWARE;
-               else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_SOFTWARE"))
-                       so_timestamping_flags |= SOF_TIMESTAMPING_SOFTWARE;
-               else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_RAW_HARDWARE"))
-                       so_timestamping_flags |= SOF_TIMESTAMPING_RAW_HARDWARE;
-               else
-                       usage(argv[i]);
-       }
-
-       sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP);
-       if (sock < 0)
-               bail("socket");
-
-       memset(&device, 0, sizeof(device));
-       strncpy(device.ifr_name, interface, sizeof(device.ifr_name));
-       if (ioctl(sock, SIOCGIFADDR, &device) < 0)
-               bail("getting interface IP address");
-
-       memset(&hwtstamp, 0, sizeof(hwtstamp));
-       strncpy(hwtstamp.ifr_name, interface, sizeof(hwtstamp.ifr_name));
-       hwtstamp.ifr_data = (void *)&hwconfig;
-       memset(&hwconfig, 0, sizeof(hwconfig));
-       hwconfig.tx_type =
-               (so_timestamping_flags & SOF_TIMESTAMPING_TX_HARDWARE) ?
-               HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF;
-       hwconfig.rx_filter =
-               (so_timestamping_flags & SOF_TIMESTAMPING_RX_HARDWARE) ?
-               HWTSTAMP_FILTER_PTP_V1_L4_SYNC : HWTSTAMP_FILTER_NONE;
-       hwconfig_requested = hwconfig;
-       if (ioctl(sock, SIOCSHWTSTAMP, &hwtstamp) < 0) {
-               if ((errno == EINVAL || errno == ENOTSUP) &&
-                   hwconfig_requested.tx_type == HWTSTAMP_TX_OFF &&
-                   hwconfig_requested.rx_filter == HWTSTAMP_FILTER_NONE)
-                       printf("SIOCSHWTSTAMP: disabling hardware time stamping not possible\n");
-               else
-                       bail("SIOCSHWTSTAMP");
-       }
-       printf("SIOCSHWTSTAMP: tx_type %d requested, got %d; rx_filter %d requested, got %d\n",
-              hwconfig_requested.tx_type, hwconfig.tx_type,
-              hwconfig_requested.rx_filter, hwconfig.rx_filter);
-
-       /* bind to PTP port */
-       addr.sin_family = AF_INET;
-       addr.sin_addr.s_addr = htonl(INADDR_ANY);
-       addr.sin_port = htons(319 /* PTP event port */);
-       if (bind(sock,
-                (struct sockaddr *)&addr,
-                sizeof(struct sockaddr_in)) < 0)
-               bail("bind");
-
-       /* set multicast group for outgoing packets */
-       inet_aton("224.0.1.130", &iaddr); /* alternate PTP domain 1 */
-       addr.sin_addr = iaddr;
-       imr.imr_multiaddr.s_addr = iaddr.s_addr;
-       imr.imr_interface.s_addr =
-               ((struct sockaddr_in *)&device.ifr_addr)->sin_addr.s_addr;
-       if (setsockopt(sock, IPPROTO_IP, IP_MULTICAST_IF,
-                      &imr.imr_interface.s_addr, sizeof(struct in_addr)) < 0)
-               bail("set multicast");
-
-       /* join multicast group, loop our own packet */
-       if (setsockopt(sock, IPPROTO_IP, IP_ADD_MEMBERSHIP,
-                      &imr, sizeof(struct ip_mreq)) < 0)
-               bail("join multicast group");
-
-       if (setsockopt(sock, IPPROTO_IP, IP_MULTICAST_LOOP,
-                      &ip_multicast_loop, sizeof(enabled)) < 0) {
-               bail("loop multicast");
-       }
-
-       /* set socket options for time stamping */
-       if (so_timestamp &&
-               setsockopt(sock, SOL_SOCKET, SO_TIMESTAMP,
-                          &enabled, sizeof(enabled)) < 0)
-               bail("setsockopt SO_TIMESTAMP");
-
-       if (so_timestampns &&
-               setsockopt(sock, SOL_SOCKET, SO_TIMESTAMPNS,
-                          &enabled, sizeof(enabled)) < 0)
-               bail("setsockopt SO_TIMESTAMPNS");
-
-       if (so_timestamping_flags &&
-               setsockopt(sock, SOL_SOCKET, SO_TIMESTAMPING,
-                          &so_timestamping_flags,
-                          sizeof(so_timestamping_flags)) < 0)
-               bail("setsockopt SO_TIMESTAMPING");
-
-       /* request IP_PKTINFO for debugging purposes */
-       if (setsockopt(sock, SOL_IP, IP_PKTINFO,
-                      &enabled, sizeof(enabled)) < 0)
-               printf("%s: %s\n", "setsockopt IP_PKTINFO", strerror(errno));
-
-       /* verify socket options */
-       len = sizeof(val);
-       if (getsockopt(sock, SOL_SOCKET, SO_TIMESTAMP, &val, &len) < 0)
-               printf("%s: %s\n", "getsockopt SO_TIMESTAMP", strerror(errno));
-       else
-               printf("SO_TIMESTAMP %d\n", val);
-
-       if (getsockopt(sock, SOL_SOCKET, SO_TIMESTAMPNS, &val, &len) < 0)
-               printf("%s: %s\n", "getsockopt SO_TIMESTAMPNS",
-                      strerror(errno));
-       else
-               printf("SO_TIMESTAMPNS %d\n", val);
-
-       if (getsockopt(sock, SOL_SOCKET, SO_TIMESTAMPING, &val, &len) < 0) {
-               printf("%s: %s\n", "getsockopt SO_TIMESTAMPING",
-                      strerror(errno));
-       } else {
-               printf("SO_TIMESTAMPING %d\n", val);
-               if (val != so_timestamping_flags)
-                       printf("   not the expected value %d\n",
-                              so_timestamping_flags);
-       }
-
-       /* send packets forever every five seconds */
-       gettimeofday(&next, 0);
-       next.tv_sec = (next.tv_sec + 1) / 5 * 5;
-       next.tv_usec = 0;
-       while (1) {
-               struct timeval now;
-               struct timeval delta;
-               long delta_us;
-               int res;
-               fd_set readfs, errorfs;
-
-               gettimeofday(&now, 0);
-               delta_us = (long)(next.tv_sec - now.tv_sec) * 1000000 +
-                       (long)(next.tv_usec - now.tv_usec);
-               if (delta_us > 0) {
-                       /* continue waiting for timeout or data */
-                       delta.tv_sec = delta_us / 1000000;
-                       delta.tv_usec = delta_us % 1000000;
-
-                       FD_ZERO(&readfs);
-                       FD_ZERO(&errorfs);
-                       FD_SET(sock, &readfs);
-                       FD_SET(sock, &errorfs);
-                       printf("%ld.%06ld: select %ldus\n",
-                              (long)now.tv_sec, (long)now.tv_usec,
-                              delta_us);
-                       res = select(sock + 1, &readfs, 0, &errorfs, &delta);
-                       gettimeofday(&now, 0);
-                       printf("%ld.%06ld: select returned: %d, %s\n",
-                              (long)now.tv_sec, (long)now.tv_usec,
-                              res,
-                              res < 0 ? strerror(errno) : "success");
-                       if (res > 0) {
-                               if (FD_ISSET(sock, &readfs))
-                                       printf("ready for reading\n");
-                               if (FD_ISSET(sock, &errorfs))
-                                       printf("has error\n");
-                               recvpacket(sock, 0,
-                                          siocgstamp,
-                                          siocgstampns);
-                               recvpacket(sock, MSG_ERRQUEUE,
-                                          siocgstamp,
-                                          siocgstampns);
-                       }
-               } else {
-                       /* write one packet */
-                       sendpacket(sock,
-                                  (struct sockaddr *)&addr,
-                                  sizeof(addr));
-                       next.tv_sec += 5;
-                       continue;
-               }
-       }
-
-       return 0;
-}
diff --git a/Documentation/networking/timestamping/txtimestamp.c b/Documentation/networking/timestamping/txtimestamp.c
deleted file mode 100644 (file)
index 5df0704..0000000
+++ /dev/null
@@ -1,549 +0,0 @@
-/*
- * Copyright 2014 Google Inc.
- * Author: willemb@google.com (Willem de Bruijn)
- *
- * Test software tx timestamping, including
- *
- * - SCHED, SND and ACK timestamps
- * - RAW, UDP and TCP
- * - IPv4 and IPv6
- * - various packet sizes (to test GSO and TSO)
- *
- * Consult the command line arguments for help on running
- * the various testcases.
- *
- * This test requires a dummy TCP server.
- * A simple `nc6 [-u] -l -p $DESTPORT` will do
- *
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#define _GNU_SOURCE
-
-#include <arpa/inet.h>
-#include <asm/types.h>
-#include <error.h>
-#include <errno.h>
-#include <inttypes.h>
-#include <linux/errqueue.h>
-#include <linux/if_ether.h>
-#include <linux/net_tstamp.h>
-#include <netdb.h>
-#include <net/if.h>
-#include <netinet/in.h>
-#include <netinet/ip.h>
-#include <netinet/udp.h>
-#include <netinet/tcp.h>
-#include <netpacket/packet.h>
-#include <poll.h>
-#include <stdarg.h>
-#include <stdbool.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-#include <sys/select.h>
-#include <sys/socket.h>
-#include <sys/time.h>
-#include <sys/types.h>
-#include <time.h>
-#include <unistd.h>
-
-/* command line parameters */
-static int cfg_proto = SOCK_STREAM;
-static int cfg_ipproto = IPPROTO_TCP;
-static int cfg_num_pkts = 4;
-static int do_ipv4 = 1;
-static int do_ipv6 = 1;
-static int cfg_payload_len = 10;
-static bool cfg_show_payload;
-static bool cfg_do_pktinfo;
-static bool cfg_loop_nodata;
-static uint16_t dest_port = 9000;
-
-static struct sockaddr_in daddr;
-static struct sockaddr_in6 daddr6;
-static struct timespec ts_prev;
-
-static void __print_timestamp(const char *name, struct timespec *cur,
-                             uint32_t key, int payload_len)
-{
-       if (!(cur->tv_sec | cur->tv_nsec))
-               return;
-
-       fprintf(stderr, "  %s: %lu s %lu us (seq=%u, len=%u)",
-                       name, cur->tv_sec, cur->tv_nsec / 1000,
-                       key, payload_len);
-
-       if ((ts_prev.tv_sec | ts_prev.tv_nsec)) {
-               int64_t cur_ms, prev_ms;
-
-               cur_ms = (long) cur->tv_sec * 1000 * 1000;
-               cur_ms += cur->tv_nsec / 1000;
-
-               prev_ms = (long) ts_prev.tv_sec * 1000 * 1000;
-               prev_ms += ts_prev.tv_nsec / 1000;
-
-               fprintf(stderr, "  (%+" PRId64 " us)", cur_ms - prev_ms);
-       }
-
-       ts_prev = *cur;
-       fprintf(stderr, "\n");
-}
-
-static void print_timestamp_usr(void)
-{
-       struct timespec ts;
-       struct timeval tv;      /* avoid dependency on -lrt */
-
-       gettimeofday(&tv, NULL);
-       ts.tv_sec = tv.tv_sec;
-       ts.tv_nsec = tv.tv_usec * 1000;
-
-       __print_timestamp("  USR", &ts, 0, 0);
-}
-
-static void print_timestamp(struct scm_timestamping *tss, int tstype,
-                           int tskey, int payload_len)
-{
-       const char *tsname;
-
-       switch (tstype) {
-       case SCM_TSTAMP_SCHED:
-               tsname = "  ENQ";
-               break;
-       case SCM_TSTAMP_SND:
-               tsname = "  SND";
-               break;
-       case SCM_TSTAMP_ACK:
-               tsname = "  ACK";
-               break;
-       default:
-               error(1, 0, "unknown timestamp type: %u",
-               tstype);
-       }
-       __print_timestamp(tsname, &tss->ts[0], tskey, payload_len);
-}
-
-/* TODO: convert to check_and_print payload once API is stable */
-static void print_payload(char *data, int len)
-{
-       int i;
-
-       if (!len)
-               return;
-
-       if (len > 70)
-               len = 70;
-
-       fprintf(stderr, "payload: ");
-       for (i = 0; i < len; i++)
-               fprintf(stderr, "%02hhx ", data[i]);
-       fprintf(stderr, "\n");
-}
-
-static void print_pktinfo(int family, int ifindex, void *saddr, void *daddr)
-{
-       char sa[INET6_ADDRSTRLEN], da[INET6_ADDRSTRLEN];
-
-       fprintf(stderr, "         pktinfo: ifindex=%u src=%s dst=%s\n",
-               ifindex,
-               saddr ? inet_ntop(family, saddr, sa, sizeof(sa)) : "unknown",
-               daddr ? inet_ntop(family, daddr, da, sizeof(da)) : "unknown");
-}
-
-static void __poll(int fd)
-{
-       struct pollfd pollfd;
-       int ret;
-
-       memset(&pollfd, 0, sizeof(pollfd));
-       pollfd.fd = fd;
-       ret = poll(&pollfd, 1, 100);
-       if (ret != 1)
-               error(1, errno, "poll");
-}
-
-static void __recv_errmsg_cmsg(struct msghdr *msg, int payload_len)
-{
-       struct sock_extended_err *serr = NULL;
-       struct scm_timestamping *tss = NULL;
-       struct cmsghdr *cm;
-       int batch = 0;
-
-       for (cm = CMSG_FIRSTHDR(msg);
-            cm && cm->cmsg_len;
-            cm = CMSG_NXTHDR(msg, cm)) {
-               if (cm->cmsg_level == SOL_SOCKET &&
-                   cm->cmsg_type == SCM_TIMESTAMPING) {
-                       tss = (void *) CMSG_DATA(cm);
-               } else if ((cm->cmsg_level == SOL_IP &&
-                           cm->cmsg_type == IP_RECVERR) ||
-                          (cm->cmsg_level == SOL_IPV6 &&
-                           cm->cmsg_type == IPV6_RECVERR)) {
-                       serr = (void *) CMSG_DATA(cm);
-                       if (serr->ee_errno != ENOMSG ||
-                           serr->ee_origin != SO_EE_ORIGIN_TIMESTAMPING) {
-                               fprintf(stderr, "unknown ip error %d %d\n",
-                                               serr->ee_errno,
-                                               serr->ee_origin);
-                               serr = NULL;
-                       }
-               } else if (cm->cmsg_level == SOL_IP &&
-                          cm->cmsg_type == IP_PKTINFO) {
-                       struct in_pktinfo *info = (void *) CMSG_DATA(cm);
-                       print_pktinfo(AF_INET, info->ipi_ifindex,
-                                     &info->ipi_spec_dst, &info->ipi_addr);
-               } else if (cm->cmsg_level == SOL_IPV6 &&
-                          cm->cmsg_type == IPV6_PKTINFO) {
-                       struct in6_pktinfo *info6 = (void *) CMSG_DATA(cm);
-                       print_pktinfo(AF_INET6, info6->ipi6_ifindex,
-                                     NULL, &info6->ipi6_addr);
-               } else
-                       fprintf(stderr, "unknown cmsg %d,%d\n",
-                                       cm->cmsg_level, cm->cmsg_type);
-
-               if (serr && tss) {
-                       print_timestamp(tss, serr->ee_info, serr->ee_data,
-                                       payload_len);
-                       serr = NULL;
-                       tss = NULL;
-                       batch++;
-               }
-       }
-
-       if (batch > 1)
-               fprintf(stderr, "batched %d timestamps\n", batch);
-}
-
-static int recv_errmsg(int fd)
-{
-       static char ctrl[1024 /* overprovision*/];
-       static struct msghdr msg;
-       struct iovec entry;
-       static char *data;
-       int ret = 0;
-
-       data = malloc(cfg_payload_len);
-       if (!data)
-               error(1, 0, "malloc");
-
-       memset(&msg, 0, sizeof(msg));
-       memset(&entry, 0, sizeof(entry));
-       memset(ctrl, 0, sizeof(ctrl));
-
-       entry.iov_base = data;
-       entry.iov_len = cfg_payload_len;
-       msg.msg_iov = &entry;
-       msg.msg_iovlen = 1;
-       msg.msg_name = NULL;
-       msg.msg_namelen = 0;
-       msg.msg_control = ctrl;
-       msg.msg_controllen = sizeof(ctrl);
-
-       ret = recvmsg(fd, &msg, MSG_ERRQUEUE);
-       if (ret == -1 && errno != EAGAIN)
-               error(1, errno, "recvmsg");
-
-       if (ret >= 0) {
-               __recv_errmsg_cmsg(&msg, ret);
-               if (cfg_show_payload)
-                       print_payload(data, cfg_payload_len);
-       }
-
-       free(data);
-       return ret == -1;
-}
-
-static void do_test(int family, unsigned int opt)
-{
-       char *buf;
-       int fd, i, val = 1, total_len;
-
-       if (family == AF_INET6 && cfg_proto != SOCK_STREAM) {
-               /* due to lack of checksum generation code */
-               fprintf(stderr, "test: skipping datagram over IPv6\n");
-               return;
-       }
-
-       total_len = cfg_payload_len;
-       if (cfg_proto == SOCK_RAW) {
-               total_len += sizeof(struct udphdr);
-               if (cfg_ipproto == IPPROTO_RAW)
-                       total_len += sizeof(struct iphdr);
-       }
-
-       buf = malloc(total_len);
-       if (!buf)
-               error(1, 0, "malloc");
-
-       fd = socket(family, cfg_proto, cfg_ipproto);
-       if (fd < 0)
-               error(1, errno, "socket");
-
-       if (cfg_proto == SOCK_STREAM) {
-               if (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY,
-                              (char*) &val, sizeof(val)))
-                       error(1, 0, "setsockopt no nagle");
-
-               if (family == PF_INET) {
-                       if (connect(fd, (void *) &daddr, sizeof(daddr)))
-                               error(1, errno, "connect ipv4");
-               } else {
-                       if (connect(fd, (void *) &daddr6, sizeof(daddr6)))
-                               error(1, errno, "connect ipv6");
-               }
-       }
-
-       if (cfg_do_pktinfo) {
-               if (family == AF_INET6) {
-                       if (setsockopt(fd, SOL_IPV6, IPV6_RECVPKTINFO,
-                                      &val, sizeof(val)))
-                               error(1, errno, "setsockopt pktinfo ipv6");
-               } else {
-                       if (setsockopt(fd, SOL_IP, IP_PKTINFO,
-                                      &val, sizeof(val)))
-                               error(1, errno, "setsockopt pktinfo ipv4");
-               }
-       }
-
-       opt |= SOF_TIMESTAMPING_SOFTWARE |
-              SOF_TIMESTAMPING_OPT_CMSG |
-              SOF_TIMESTAMPING_OPT_ID;
-       if (cfg_loop_nodata)
-               opt |= SOF_TIMESTAMPING_OPT_TSONLY;
-
-       if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING,
-                      (char *) &opt, sizeof(opt)))
-               error(1, 0, "setsockopt timestamping");
-
-       for (i = 0; i < cfg_num_pkts; i++) {
-               memset(&ts_prev, 0, sizeof(ts_prev));
-               memset(buf, 'a' + i, total_len);
-
-               if (cfg_proto == SOCK_RAW) {
-                       struct udphdr *udph;
-                       int off = 0;
-
-                       if (cfg_ipproto == IPPROTO_RAW) {
-                               struct iphdr *iph = (void *) buf;
-
-                               memset(iph, 0, sizeof(*iph));
-                               iph->ihl      = 5;
-                               iph->version  = 4;
-                               iph->ttl      = 2;
-                               iph->daddr    = daddr.sin_addr.s_addr;
-                               iph->protocol = IPPROTO_UDP;
-                               /* kernel writes saddr, csum, len */
-
-                               off = sizeof(*iph);
-                       }
-
-                       udph = (void *) buf + off;
-                       udph->source = ntohs(9000);     /* random spoof */
-                       udph->dest   = ntohs(dest_port);
-                       udph->len    = ntohs(sizeof(*udph) + cfg_payload_len);
-                       udph->check  = 0;       /* not allowed for IPv6 */
-               }
-
-               print_timestamp_usr();
-               if (cfg_proto != SOCK_STREAM) {
-                       if (family == PF_INET)
-                               val = sendto(fd, buf, total_len, 0, (void *) &daddr, sizeof(daddr));
-                       else
-                               val = sendto(fd, buf, total_len, 0, (void *) &daddr6, sizeof(daddr6));
-               } else {
-                       val = send(fd, buf, cfg_payload_len, 0);
-               }
-               if (val != total_len)
-                       error(1, errno, "send");
-
-               /* wait for all errors to be queued, else ACKs arrive OOO */
-               usleep(50 * 1000);
-
-               __poll(fd);
-
-               while (!recv_errmsg(fd)) {}
-       }
-
-       if (close(fd))
-               error(1, errno, "close");
-
-       free(buf);
-       usleep(400 * 1000);
-}
-
-static void __attribute__((noreturn)) usage(const char *filepath)
-{
-       fprintf(stderr, "\nUsage: %s [options] hostname\n"
-                       "\nwhere options are:\n"
-                       "  -4:   only IPv4\n"
-                       "  -6:   only IPv6\n"
-                       "  -h:   show this message\n"
-                       "  -I:   request PKTINFO\n"
-                       "  -l N: send N bytes at a time\n"
-                       "  -n:   set no-payload option\n"
-                       "  -r:   use raw\n"
-                       "  -R:   use raw (IP_HDRINCL)\n"
-                       "  -p N: connect to port N\n"
-                       "  -u:   use udp\n"
-                       "  -x:   show payload (up to 70 bytes)\n",
-                       filepath);
-       exit(1);
-}
-
-static void parse_opt(int argc, char **argv)
-{
-       int proto_count = 0;
-       char c;
-
-       while ((c = getopt(argc, argv, "46hIl:np:rRux")) != -1) {
-               switch (c) {
-               case '4':
-                       do_ipv6 = 0;
-                       break;
-               case '6':
-                       do_ipv4 = 0;
-                       break;
-               case 'I':
-                       cfg_do_pktinfo = true;
-                       break;
-               case 'n':
-                       cfg_loop_nodata = true;
-                       break;
-               case 'r':
-                       proto_count++;
-                       cfg_proto = SOCK_RAW;
-                       cfg_ipproto = IPPROTO_UDP;
-                       break;
-               case 'R':
-                       proto_count++;
-                       cfg_proto = SOCK_RAW;
-                       cfg_ipproto = IPPROTO_RAW;
-                       break;
-               case 'u':
-                       proto_count++;
-                       cfg_proto = SOCK_DGRAM;
-                       cfg_ipproto = IPPROTO_UDP;
-                       break;
-               case 'l':
-                       cfg_payload_len = strtoul(optarg, NULL, 10);
-                       break;
-               case 'p':
-                       dest_port = strtoul(optarg, NULL, 10);
-                       break;
-               case 'x':
-                       cfg_show_payload = true;
-                       break;
-               case 'h':
-               default:
-                       usage(argv[0]);
-               }
-       }
-
-       if (!cfg_payload_len)
-               error(1, 0, "payload may not be nonzero");
-       if (cfg_proto != SOCK_STREAM && cfg_payload_len > 1472)
-               error(1, 0, "udp packet might exceed expected MTU");
-       if (!do_ipv4 && !do_ipv6)
-               error(1, 0, "pass -4 or -6, not both");
-       if (proto_count > 1)
-               error(1, 0, "pass -r, -R or -u, not multiple");
-
-       if (optind != argc - 1)
-               error(1, 0, "missing required hostname argument");
-}
-
-static void resolve_hostname(const char *hostname)
-{
-       struct addrinfo *addrs, *cur;
-       int have_ipv4 = 0, have_ipv6 = 0;
-
-       if (getaddrinfo(hostname, NULL, NULL, &addrs))
-               error(1, errno, "getaddrinfo");
-
-       cur = addrs;
-       while (cur && !have_ipv4 && !have_ipv6) {
-               if (!have_ipv4 && cur->ai_family == AF_INET) {
-                       memcpy(&daddr, cur->ai_addr, sizeof(daddr));
-                       daddr.sin_port = htons(dest_port);
-                       have_ipv4 = 1;
-               }
-               else if (!have_ipv6 && cur->ai_family == AF_INET6) {
-                       memcpy(&daddr6, cur->ai_addr, sizeof(daddr6));
-                       daddr6.sin6_port = htons(dest_port);
-                       have_ipv6 = 1;
-               }
-               cur = cur->ai_next;
-       }
-       if (addrs)
-               freeaddrinfo(addrs);
-
-       do_ipv4 &= have_ipv4;
-       do_ipv6 &= have_ipv6;
-}
-
-static void do_main(int family)
-{
-       fprintf(stderr, "family:       %s\n",
-                       family == PF_INET ? "INET" : "INET6");
-
-       fprintf(stderr, "test SND\n");
-       do_test(family, SOF_TIMESTAMPING_TX_SOFTWARE);
-
-       fprintf(stderr, "test ENQ\n");
-       do_test(family, SOF_TIMESTAMPING_TX_SCHED);
-
-       fprintf(stderr, "test ENQ + SND\n");
-       do_test(family, SOF_TIMESTAMPING_TX_SCHED |
-                       SOF_TIMESTAMPING_TX_SOFTWARE);
-
-       if (cfg_proto == SOCK_STREAM) {
-               fprintf(stderr, "\ntest ACK\n");
-               do_test(family, SOF_TIMESTAMPING_TX_ACK);
-
-               fprintf(stderr, "\ntest SND + ACK\n");
-               do_test(family, SOF_TIMESTAMPING_TX_SOFTWARE |
-                               SOF_TIMESTAMPING_TX_ACK);
-
-               fprintf(stderr, "\ntest ENQ + SND + ACK\n");
-               do_test(family, SOF_TIMESTAMPING_TX_SCHED |
-                               SOF_TIMESTAMPING_TX_SOFTWARE |
-                               SOF_TIMESTAMPING_TX_ACK);
-       }
-}
-
-const char *sock_names[] = { NULL, "TCP", "UDP", "RAW" };
-
-int main(int argc, char **argv)
-{
-       if (argc == 1)
-               usage(argv[0]);
-
-       parse_opt(argc, argv);
-       resolve_hostname(argv[argc - 1]);
-
-       fprintf(stderr, "protocol:     %s\n", sock_names[cfg_proto]);
-       fprintf(stderr, "payload:      %u\n", cfg_payload_len);
-       fprintf(stderr, "server port:  %u\n", dest_port);
-       fprintf(stderr, "\n");
-
-       if (do_ipv4)
-               do_main(PF_INET);
-       if (do_ipv6)
-               do_main(PF_INET6);
-
-       return 0;
-}
diff --git a/Documentation/pcmcia/.gitignore b/Documentation/pcmcia/.gitignore
deleted file mode 100644 (file)
index 53d0813..0000000
+++ /dev/null
@@ -1 +0,0 @@
-crc32hash
diff --git a/Documentation/pcmcia/Makefile b/Documentation/pcmcia/Makefile
deleted file mode 100644 (file)
index 47a8fa1..0000000
+++ /dev/null
@@ -1,7 +0,0 @@
-# List of programs to build
-hostprogs-y := crc32hash
-
-# Tell kbuild to always build the programs
-always := $(hostprogs-y)
-
-HOSTCFLAGS_crc32hash.o += -I$(objtree)/usr/include
diff --git a/Documentation/pcmcia/crc32hash.c b/Documentation/pcmcia/crc32hash.c
deleted file mode 100644 (file)
index 44f8bee..0000000
+++ /dev/null
@@ -1,32 +0,0 @@
-/* crc32hash.c - derived from linux/lib/crc32.c, GNU GPL v2 */
-/* Usage example:
-$ ./crc32hash "Dual Speed"
-*/
-
-#include <string.h>
-#include <stdio.h>
-#include <ctype.h>
-#include <stdlib.h>
-
-static unsigned int crc32(unsigned char const *p, unsigned int len)
-{
-       int i;
-       unsigned int crc = 0;
-       while (len--) {
-               crc ^= *p++;
-               for (i = 0; i < 8; i++)
-                       crc = (crc >> 1) ^ ((crc & 1) ? 0xedb88320 : 0);
-       }
-       return crc;
-}
-
-int main(int argc, char **argv) {
-       unsigned int result;
-       if (argc != 2) {
-               printf("no string passed as argument\n");
-               return -1;
-       }
-       result = crc32((unsigned char const *)argv[1], strlen(argv[1]));
-       printf("0x%x\n", result);
-       return 0;
-}
index 199afd100cf27026bf1d918caaeb4d10636262c0..5f3e00ab54c42373882d488765bd1583617041d7 100644 (file)
@@ -27,7 +27,7 @@ pcmcia:m0149cC1ABf06pfn00fn00pa725B842DpbF1EFEE84pc0877B627pd00000000
 The hex value after "pa" is the hash of product ID string 1, after "pb" for
 string 2 and so on.
 
-Alternatively, you can use crc32hash (see Documentation/pcmcia/crc32hash.c)
+Alternatively, you can use crc32hash (see tools/pcmcia/crc32hash.c)
 to determine the crc32 hash.  Simply pass the string you want to evaluate
 as argument to this program, e.g.:
-$ ./crc32hash "Dual Speed"
+$ tools/pcmcia/crc32hash "Dual Speed"
diff --git a/Documentation/prctl/.gitignore b/Documentation/prctl/.gitignore
deleted file mode 100644 (file)
index 0b5c274..0000000
+++ /dev/null
@@ -1,3 +0,0 @@
-disable-tsc-ctxt-sw-stress-test
-disable-tsc-on-off-stress-test
-disable-tsc-test
diff --git a/Documentation/prctl/Makefile b/Documentation/prctl/Makefile
deleted file mode 100644 (file)
index 44de308..0000000
+++ /dev/null
@@ -1,10 +0,0 @@
-ifndef CROSS_COMPILE
-# List of programs to build
-hostprogs-$(CONFIG_X86) := disable-tsc-ctxt-sw-stress-test disable-tsc-on-off-stress-test disable-tsc-test
-# Tell kbuild to always build the programs
-always := $(hostprogs-y)
-
-HOSTCFLAGS_disable-tsc-ctxt-sw-stress-test.o += -I$(objtree)/usr/include
-HOSTCFLAGS_disable-tsc-on-off-stress-test.o += -I$(objtree)/usr/include
-HOSTCFLAGS_disable-tsc-test.o += -I$(objtree)/usr/include
-endif
diff --git a/Documentation/prctl/disable-tsc-ctxt-sw-stress-test.c b/Documentation/prctl/disable-tsc-ctxt-sw-stress-test.c
deleted file mode 100644 (file)
index f7499d1..0000000
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Tests for prctl(PR_GET_TSC, ...) / prctl(PR_SET_TSC, ...)
- *
- * Tests if the control register is updated correctly
- * at context switches
- *
- * Warning: this test will cause a very high load for a few seconds
- *
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <signal.h>
-#include <inttypes.h>
-#include <wait.h>
-
-
-#include <sys/prctl.h>
-#include <linux/prctl.h>
-
-/* Get/set the process' ability to use the timestamp counter instruction */
-#ifndef PR_GET_TSC
-#define PR_GET_TSC 25
-#define PR_SET_TSC 26
-# define PR_TSC_ENABLE         1   /* allow the use of the timestamp counter */
-# define PR_TSC_SIGSEGV                2   /* throw a SIGSEGV instead of reading the TSC */
-#endif
-
-static uint64_t rdtsc(void)
-{
-uint32_t lo, hi;
-/* We cannot use "=A", since this would use %rax on x86_64 */
-__asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
-return (uint64_t)hi << 32 | lo;
-}
-
-static void sigsegv_expect(int sig)
-{
-       /* */
-}
-
-static void segvtask(void)
-{
-       if (prctl(PR_SET_TSC, PR_TSC_SIGSEGV) < 0)
-       {
-               perror("prctl");
-               exit(0);
-       }
-       signal(SIGSEGV, sigsegv_expect);
-       alarm(10);
-       rdtsc();
-       fprintf(stderr, "FATAL ERROR, rdtsc() succeeded while disabled\n");
-       exit(0);
-}
-
-
-static void sigsegv_fail(int sig)
-{
-       fprintf(stderr, "FATAL ERROR, rdtsc() failed while enabled\n");
-       exit(0);
-}
-
-static void rdtsctask(void)
-{
-       if (prctl(PR_SET_TSC, PR_TSC_ENABLE) < 0)
-       {
-               perror("prctl");
-               exit(0);
-       }
-       signal(SIGSEGV, sigsegv_fail);
-       alarm(10);
-       for(;;) rdtsc();
-}
-
-
-int main(void)
-{
-       int n_tasks = 100, i;
-
-       fprintf(stderr, "[No further output means we're allright]\n");
-
-       for (i=0; i<n_tasks; i++)
-               if (fork() == 0)
-               {
-                       if (i & 1)
-                               segvtask();
-                       else
-                               rdtsctask();
-               }
-
-       for (i=0; i<n_tasks; i++)
-               wait(NULL);
-
-       exit(0);
-}
-
diff --git a/Documentation/prctl/disable-tsc-on-off-stress-test.c b/Documentation/prctl/disable-tsc-on-off-stress-test.c
deleted file mode 100644 (file)
index a06f027..0000000
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Tests for prctl(PR_GET_TSC, ...) / prctl(PR_SET_TSC, ...)
- *
- * Tests if the control register is updated correctly
- * when set with prctl()
- *
- * Warning: this test will cause a very high load for a few seconds
- *
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <signal.h>
-#include <inttypes.h>
-#include <wait.h>
-
-
-#include <sys/prctl.h>
-#include <linux/prctl.h>
-
-/* Get/set the process' ability to use the timestamp counter instruction */
-#ifndef PR_GET_TSC
-#define PR_GET_TSC 25
-#define PR_SET_TSC 26
-# define PR_TSC_ENABLE         1   /* allow the use of the timestamp counter */
-# define PR_TSC_SIGSEGV                2   /* throw a SIGSEGV instead of reading the TSC */
-#endif
-
-/* snippet from wikipedia :-) */
-
-static uint64_t rdtsc(void)
-{
-uint32_t lo, hi;
-/* We cannot use "=A", since this would use %rax on x86_64 */
-__asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
-return (uint64_t)hi << 32 | lo;
-}
-
-int should_segv = 0;
-
-static void sigsegv_cb(int sig)
-{
-       if (!should_segv)
-       {
-               fprintf(stderr, "FATAL ERROR, rdtsc() failed while enabled\n");
-               exit(0);
-       }
-       if (prctl(PR_SET_TSC, PR_TSC_ENABLE) < 0)
-       {
-               perror("prctl");
-               exit(0);
-       }
-       should_segv = 0;
-
-       rdtsc();
-}
-
-static void task(void)
-{
-       signal(SIGSEGV, sigsegv_cb);
-       alarm(10);
-       for(;;)
-       {
-               rdtsc();
-               if (should_segv)
-               {
-                       fprintf(stderr, "FATAL ERROR, rdtsc() succeeded while disabled\n");
-                       exit(0);
-               }
-               if (prctl(PR_SET_TSC, PR_TSC_SIGSEGV) < 0)
-               {
-                       perror("prctl");
-                       exit(0);
-               }
-               should_segv = 1;
-       }
-}
-
-
-int main(void)
-{
-       int n_tasks = 100, i;
-
-       fprintf(stderr, "[No further output means we're allright]\n");
-
-       for (i=0; i<n_tasks; i++)
-               if (fork() == 0)
-                       task();
-
-       for (i=0; i<n_tasks; i++)
-               wait(NULL);
-
-       exit(0);
-}
-
diff --git a/Documentation/prctl/disable-tsc-test.c b/Documentation/prctl/disable-tsc-test.c
deleted file mode 100644 (file)
index 8d494f7..0000000
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Tests for prctl(PR_GET_TSC, ...) / prctl(PR_SET_TSC, ...)
- *
- * Basic test to test behaviour of PR_GET_TSC and PR_SET_TSC
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <signal.h>
-#include <inttypes.h>
-
-
-#include <sys/prctl.h>
-#include <linux/prctl.h>
-
-/* Get/set the process' ability to use the timestamp counter instruction */
-#ifndef PR_GET_TSC
-#define PR_GET_TSC 25
-#define PR_SET_TSC 26
-# define PR_TSC_ENABLE         1   /* allow the use of the timestamp counter */
-# define PR_TSC_SIGSEGV                2   /* throw a SIGSEGV instead of reading the TSC */
-#endif
-
-const char *tsc_names[] =
-{
-       [0] = "[not set]",
-       [PR_TSC_ENABLE] = "PR_TSC_ENABLE",
-       [PR_TSC_SIGSEGV] = "PR_TSC_SIGSEGV",
-};
-
-static uint64_t rdtsc(void)
-{
-uint32_t lo, hi;
-/* We cannot use "=A", since this would use %rax on x86_64 */
-__asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
-return (uint64_t)hi << 32 | lo;
-}
-
-static void sigsegv_cb(int sig)
-{
-       int tsc_val = 0;
-
-       printf("[ SIG_SEGV ]\n");
-       printf("prctl(PR_GET_TSC, &tsc_val); ");
-       fflush(stdout);
-
-       if ( prctl(PR_GET_TSC, &tsc_val) == -1)
-               perror("prctl");
-
-       printf("tsc_val == %s\n", tsc_names[tsc_val]);
-       printf("prctl(PR_SET_TSC, PR_TSC_ENABLE)\n");
-       fflush(stdout);
-       if ( prctl(PR_SET_TSC, PR_TSC_ENABLE) == -1)
-               perror("prctl");
-
-       printf("rdtsc() == ");
-}
-
-int main(void)
-{
-       int tsc_val = 0;
-
-       signal(SIGSEGV, sigsegv_cb);
-
-       printf("rdtsc() == %llu\n", (unsigned long long)rdtsc());
-       printf("prctl(PR_GET_TSC, &tsc_val); ");
-       fflush(stdout);
-
-       if ( prctl(PR_GET_TSC, &tsc_val) == -1)
-               perror("prctl");
-
-       printf("tsc_val == %s\n", tsc_names[tsc_val]);
-       printf("rdtsc() == %llu\n", (unsigned long long)rdtsc());
-       printf("prctl(PR_SET_TSC, PR_TSC_ENABLE)\n");
-       fflush(stdout);
-
-       if ( prctl(PR_SET_TSC, PR_TSC_ENABLE) == -1)
-               perror("prctl");
-
-       printf("rdtsc() == %llu\n", (unsigned long long)rdtsc());
-       printf("prctl(PR_SET_TSC, PR_TSC_SIGSEGV)\n");
-       fflush(stdout);
-
-       if ( prctl(PR_SET_TSC, PR_TSC_SIGSEGV) == -1)
-               perror("prctl");
-
-       printf("rdtsc() == ");
-       fflush(stdout);
-       printf("%llu\n", (unsigned long long)rdtsc());
-       fflush(stdout);
-
-       exit(EXIT_SUCCESS);
-}
-
diff --git a/Documentation/ptp/.gitignore b/Documentation/ptp/.gitignore
deleted file mode 100644 (file)
index f562e49..0000000
+++ /dev/null
@@ -1 +0,0 @@
-testptp
diff --git a/Documentation/ptp/Makefile b/Documentation/ptp/Makefile
deleted file mode 100644 (file)
index 293d6c0..0000000
+++ /dev/null
@@ -1,8 +0,0 @@
-# List of programs to build
-hostprogs-y := testptp
-
-# Tell kbuild to always build the programs
-always := $(hostprogs-y)
-
-HOSTCFLAGS_testptp.o += -I$(objtree)/usr/include
-HOSTLOADLIBES_testptp := -lrt
diff --git a/Documentation/ptp/testptp.c b/Documentation/ptp/testptp.c
deleted file mode 100644 (file)
index 5d2eae1..0000000
+++ /dev/null
@@ -1,523 +0,0 @@
-/*
- * PTP 1588 clock support - User space test program
- *
- * Copyright (C) 2010 OMICRON electronics GmbH
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-#define _GNU_SOURCE
-#define __SANE_USERSPACE_TYPES__        /* For PPC64, to get LL64 types */
-#include <errno.h>
-#include <fcntl.h>
-#include <inttypes.h>
-#include <math.h>
-#include <signal.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-#include <sys/mman.h>
-#include <sys/stat.h>
-#include <sys/time.h>
-#include <sys/timex.h>
-#include <sys/types.h>
-#include <time.h>
-#include <unistd.h>
-
-#include <linux/ptp_clock.h>
-
-#define DEVICE "/dev/ptp0"
-
-#ifndef ADJ_SETOFFSET
-#define ADJ_SETOFFSET 0x0100
-#endif
-
-#ifndef CLOCK_INVALID
-#define CLOCK_INVALID -1
-#endif
-
-/* clock_adjtime is not available in GLIBC < 2.14 */
-#if !__GLIBC_PREREQ(2, 14)
-#include <sys/syscall.h>
-static int clock_adjtime(clockid_t id, struct timex *tx)
-{
-       return syscall(__NR_clock_adjtime, id, tx);
-}
-#endif
-
-static clockid_t get_clockid(int fd)
-{
-#define CLOCKFD 3
-#define FD_TO_CLOCKID(fd)      ((~(clockid_t) (fd) << 3) | CLOCKFD)
-
-       return FD_TO_CLOCKID(fd);
-}
-
-static void handle_alarm(int s)
-{
-       printf("received signal %d\n", s);
-}
-
-static int install_handler(int signum, void (*handler)(int))
-{
-       struct sigaction action;
-       sigset_t mask;
-
-       /* Unblock the signal. */
-       sigemptyset(&mask);
-       sigaddset(&mask, signum);
-       sigprocmask(SIG_UNBLOCK, &mask, NULL);
-
-       /* Install the signal handler. */
-       action.sa_handler = handler;
-       action.sa_flags = 0;
-       sigemptyset(&action.sa_mask);
-       sigaction(signum, &action, NULL);
-
-       return 0;
-}
-
-static long ppb_to_scaled_ppm(int ppb)
-{
-       /*
-        * The 'freq' field in the 'struct timex' is in parts per
-        * million, but with a 16 bit binary fractional field.
-        * Instead of calculating either one of
-        *
-        *    scaled_ppm = (ppb / 1000) << 16  [1]
-        *    scaled_ppm = (ppb << 16) / 1000  [2]
-        *
-        * we simply use double precision math, in order to avoid the
-        * truncation in [1] and the possible overflow in [2].
-        */
-       return (long) (ppb * 65.536);
-}
-
-static int64_t pctns(struct ptp_clock_time *t)
-{
-       return t->sec * 1000000000LL + t->nsec;
-}
-
-static void usage(char *progname)
-{
-       fprintf(stderr,
-               "usage: %s [options]\n"
-               " -a val     request a one-shot alarm after 'val' seconds\n"
-               " -A val     request a periodic alarm every 'val' seconds\n"
-               " -c         query the ptp clock's capabilities\n"
-               " -d name    device to open\n"
-               " -e val     read 'val' external time stamp events\n"
-               " -f val     adjust the ptp clock frequency by 'val' ppb\n"
-               " -g         get the ptp clock time\n"
-               " -h         prints this message\n"
-               " -i val     index for event/trigger\n"
-               " -k val     measure the time offset between system and phc clock\n"
-               "            for 'val' times (Maximum 25)\n"
-               " -l         list the current pin configuration\n"
-               " -L pin,val configure pin index 'pin' with function 'val'\n"
-               "            the channel index is taken from the '-i' option\n"
-               "            'val' specifies the auxiliary function:\n"
-               "            0 - none\n"
-               "            1 - external time stamp\n"
-               "            2 - periodic output\n"
-               " -p val     enable output with a period of 'val' nanoseconds\n"
-               " -P val     enable or disable (val=1|0) the system clock PPS\n"
-               " -s         set the ptp clock time from the system time\n"
-               " -S         set the system time from the ptp clock time\n"
-               " -t val     shift the ptp clock time by 'val' seconds\n"
-               " -T val     set the ptp clock time to 'val' seconds\n",
-               progname);
-}
-
-int main(int argc, char *argv[])
-{
-       struct ptp_clock_caps caps;
-       struct ptp_extts_event event;
-       struct ptp_extts_request extts_request;
-       struct ptp_perout_request perout_request;
-       struct ptp_pin_desc desc;
-       struct timespec ts;
-       struct timex tx;
-
-       static timer_t timerid;
-       struct itimerspec timeout;
-       struct sigevent sigevent;
-
-       struct ptp_clock_time *pct;
-       struct ptp_sys_offset *sysoff;
-
-
-       char *progname;
-       unsigned int i;
-       int c, cnt, fd;
-
-       char *device = DEVICE;
-       clockid_t clkid;
-       int adjfreq = 0x7fffffff;
-       int adjtime = 0;
-       int capabilities = 0;
-       int extts = 0;
-       int gettime = 0;
-       int index = 0;
-       int list_pins = 0;
-       int oneshot = 0;
-       int pct_offset = 0;
-       int n_samples = 0;
-       int periodic = 0;
-       int perout = -1;
-       int pin_index = -1, pin_func;
-       int pps = -1;
-       int seconds = 0;
-       int settime = 0;
-
-       int64_t t1, t2, tp;
-       int64_t interval, offset;
-
-       progname = strrchr(argv[0], '/');
-       progname = progname ? 1+progname : argv[0];
-       while (EOF != (c = getopt(argc, argv, "a:A:cd:e:f:ghi:k:lL:p:P:sSt:T:v"))) {
-               switch (c) {
-               case 'a':
-                       oneshot = atoi(optarg);
-                       break;
-               case 'A':
-                       periodic = atoi(optarg);
-                       break;
-               case 'c':
-                       capabilities = 1;
-                       break;
-               case 'd':
-                       device = optarg;
-                       break;
-               case 'e':
-                       extts = atoi(optarg);
-                       break;
-               case 'f':
-                       adjfreq = atoi(optarg);
-                       break;
-               case 'g':
-                       gettime = 1;
-                       break;
-               case 'i':
-                       index = atoi(optarg);
-                       break;
-               case 'k':
-                       pct_offset = 1;
-                       n_samples = atoi(optarg);
-                       break;
-               case 'l':
-                       list_pins = 1;
-                       break;
-               case 'L':
-                       cnt = sscanf(optarg, "%d,%d", &pin_index, &pin_func);
-                       if (cnt != 2) {
-                               usage(progname);
-                               return -1;
-                       }
-                       break;
-               case 'p':
-                       perout = atoi(optarg);
-                       break;
-               case 'P':
-                       pps = atoi(optarg);
-                       break;
-               case 's':
-                       settime = 1;
-                       break;
-               case 'S':
-                       settime = 2;
-                       break;
-               case 't':
-                       adjtime = atoi(optarg);
-                       break;
-               case 'T':
-                       settime = 3;
-                       seconds = atoi(optarg);
-                       break;
-               case 'h':
-                       usage(progname);
-                       return 0;
-               case '?':
-               default:
-                       usage(progname);
-                       return -1;
-               }
-       }
-
-       fd = open(device, O_RDWR);
-       if (fd < 0) {
-               fprintf(stderr, "opening %s: %s\n", device, strerror(errno));
-               return -1;
-       }
-
-       clkid = get_clockid(fd);
-       if (CLOCK_INVALID == clkid) {
-               fprintf(stderr, "failed to read clock id\n");
-               return -1;
-       }
-
-       if (capabilities) {
-               if (ioctl(fd, PTP_CLOCK_GETCAPS, &caps)) {
-                       perror("PTP_CLOCK_GETCAPS");
-               } else {
-                       printf("capabilities:\n"
-                              "  %d maximum frequency adjustment (ppb)\n"
-                              "  %d programmable alarms\n"
-                              "  %d external time stamp channels\n"
-                              "  %d programmable periodic signals\n"
-                              "  %d pulse per second\n"
-                              "  %d programmable pins\n"
-                              "  %d cross timestamping\n",
-                              caps.max_adj,
-                              caps.n_alarm,
-                              caps.n_ext_ts,
-                              caps.n_per_out,
-                              caps.pps,
-                              caps.n_pins,
-                              caps.cross_timestamping);
-               }
-       }
-
-       if (0x7fffffff != adjfreq) {
-               memset(&tx, 0, sizeof(tx));
-               tx.modes = ADJ_FREQUENCY;
-               tx.freq = ppb_to_scaled_ppm(adjfreq);
-               if (clock_adjtime(clkid, &tx)) {
-                       perror("clock_adjtime");
-               } else {
-                       puts("frequency adjustment okay");
-               }
-       }
-
-       if (adjtime) {
-               memset(&tx, 0, sizeof(tx));
-               tx.modes = ADJ_SETOFFSET;
-               tx.time.tv_sec = adjtime;
-               tx.time.tv_usec = 0;
-               if (clock_adjtime(clkid, &tx) < 0) {
-                       perror("clock_adjtime");
-               } else {
-                       puts("time shift okay");
-               }
-       }
-
-       if (gettime) {
-               if (clock_gettime(clkid, &ts)) {
-                       perror("clock_gettime");
-               } else {
-                       printf("clock time: %ld.%09ld or %s",
-                              ts.tv_sec, ts.tv_nsec, ctime(&ts.tv_sec));
-               }
-       }
-
-       if (settime == 1) {
-               clock_gettime(CLOCK_REALTIME, &ts);
-               if (clock_settime(clkid, &ts)) {
-                       perror("clock_settime");
-               } else {
-                       puts("set time okay");
-               }
-       }
-
-       if (settime == 2) {
-               clock_gettime(clkid, &ts);
-               if (clock_settime(CLOCK_REALTIME, &ts)) {
-                       perror("clock_settime");
-               } else {
-                       puts("set time okay");
-               }
-       }
-
-       if (settime == 3) {
-               ts.tv_sec = seconds;
-               ts.tv_nsec = 0;
-               if (clock_settime(clkid, &ts)) {
-                       perror("clock_settime");
-               } else {
-                       puts("set time okay");
-               }
-       }
-
-       if (extts) {
-               memset(&extts_request, 0, sizeof(extts_request));
-               extts_request.index = index;
-               extts_request.flags = PTP_ENABLE_FEATURE;
-               if (ioctl(fd, PTP_EXTTS_REQUEST, &extts_request)) {
-                       perror("PTP_EXTTS_REQUEST");
-                       extts = 0;
-               } else {
-                       puts("external time stamp request okay");
-               }
-               for (; extts; extts--) {
-                       cnt = read(fd, &event, sizeof(event));
-                       if (cnt != sizeof(event)) {
-                               perror("read");
-                               break;
-                       }
-                       printf("event index %u at %lld.%09u\n", event.index,
-                              event.t.sec, event.t.nsec);
-                       fflush(stdout);
-               }
-               /* Disable the feature again. */
-               extts_request.flags = 0;
-               if (ioctl(fd, PTP_EXTTS_REQUEST, &extts_request)) {
-                       perror("PTP_EXTTS_REQUEST");
-               }
-       }
-
-       if (list_pins) {
-               int n_pins = 0;
-               if (ioctl(fd, PTP_CLOCK_GETCAPS, &caps)) {
-                       perror("PTP_CLOCK_GETCAPS");
-               } else {
-                       n_pins = caps.n_pins;
-               }
-               for (i = 0; i < n_pins; i++) {
-                       desc.index = i;
-                       if (ioctl(fd, PTP_PIN_GETFUNC, &desc)) {
-                               perror("PTP_PIN_GETFUNC");
-                               break;
-                       }
-                       printf("name %s index %u func %u chan %u\n",
-                              desc.name, desc.index, desc.func, desc.chan);
-               }
-       }
-
-       if (oneshot) {
-               install_handler(SIGALRM, handle_alarm);
-               /* Create a timer. */
-               sigevent.sigev_notify = SIGEV_SIGNAL;
-               sigevent.sigev_signo = SIGALRM;
-               if (timer_create(clkid, &sigevent, &timerid)) {
-                       perror("timer_create");
-                       return -1;
-               }
-               /* Start the timer. */
-               memset(&timeout, 0, sizeof(timeout));
-               timeout.it_value.tv_sec = oneshot;
-               if (timer_settime(timerid, 0, &timeout, NULL)) {
-                       perror("timer_settime");
-                       return -1;
-               }
-               pause();
-               timer_delete(timerid);
-       }
-
-       if (periodic) {
-               install_handler(SIGALRM, handle_alarm);
-               /* Create a timer. */
-               sigevent.sigev_notify = SIGEV_SIGNAL;
-               sigevent.sigev_signo = SIGALRM;
-               if (timer_create(clkid, &sigevent, &timerid)) {
-                       perror("timer_create");
-                       return -1;
-               }
-               /* Start the timer. */
-               memset(&timeout, 0, sizeof(timeout));
-               timeout.it_interval.tv_sec = periodic;
-               timeout.it_value.tv_sec = periodic;
-               if (timer_settime(timerid, 0, &timeout, NULL)) {
-                       perror("timer_settime");
-                       return -1;
-               }
-               while (1) {
-                       pause();
-               }
-               timer_delete(timerid);
-       }
-
-       if (perout >= 0) {
-               if (clock_gettime(clkid, &ts)) {
-                       perror("clock_gettime");
-                       return -1;
-               }
-               memset(&perout_request, 0, sizeof(perout_request));
-               perout_request.index = index;
-               perout_request.start.sec = ts.tv_sec + 2;
-               perout_request.start.nsec = 0;
-               perout_request.period.sec = 0;
-               perout_request.period.nsec = perout;
-               if (ioctl(fd, PTP_PEROUT_REQUEST, &perout_request)) {
-                       perror("PTP_PEROUT_REQUEST");
-               } else {
-                       puts("periodic output request okay");
-               }
-       }
-
-       if (pin_index >= 0) {
-               memset(&desc, 0, sizeof(desc));
-               desc.index = pin_index;
-               desc.func = pin_func;
-               desc.chan = index;
-               if (ioctl(fd, PTP_PIN_SETFUNC, &desc)) {
-                       perror("PTP_PIN_SETFUNC");
-               } else {
-                       puts("set pin function okay");
-               }
-       }
-
-       if (pps != -1) {
-               int enable = pps ? 1 : 0;
-               if (ioctl(fd, PTP_ENABLE_PPS, enable)) {
-                       perror("PTP_ENABLE_PPS");
-               } else {
-                       puts("pps for system time request okay");
-               }
-       }
-
-       if (pct_offset) {
-               if (n_samples <= 0 || n_samples > 25) {
-                       puts("n_samples should be between 1 and 25");
-                       usage(progname);
-                       return -1;
-               }
-
-               sysoff = calloc(1, sizeof(*sysoff));
-               if (!sysoff) {
-                       perror("calloc");
-                       return -1;
-               }
-               sysoff->n_samples = n_samples;
-
-               if (ioctl(fd, PTP_SYS_OFFSET, sysoff))
-                       perror("PTP_SYS_OFFSET");
-               else
-                       puts("system and phc clock time offset request okay");
-
-               pct = &sysoff->ts[0];
-               for (i = 0; i < sysoff->n_samples; i++) {
-                       t1 = pctns(pct+2*i);
-                       tp = pctns(pct+2*i+1);
-                       t2 = pctns(pct+2*i+2);
-                       interval = t2 - t1;
-                       offset = (t2 + t1) / 2 - tp;
-
-                       printf("system time: %lld.%u\n",
-                               (pct+2*i)->sec, (pct+2*i)->nsec);
-                       printf("phc    time: %lld.%u\n",
-                               (pct+2*i+1)->sec, (pct+2*i+1)->nsec);
-                       printf("system time: %lld.%u\n",
-                               (pct+2*i+2)->sec, (pct+2*i+2)->nsec);
-                       printf("system/phc clock time offset is %" PRId64 " ns\n"
-                              "system     clock time delay  is %" PRId64 " ns\n",
-                               offset, interval);
-               }
-
-               free(sysoff);
-       }
-
-       close(fd);
-       return 0;
-}
diff --git a/Documentation/ptp/testptp.mk b/Documentation/ptp/testptp.mk
deleted file mode 100644 (file)
index 4ef2d97..0000000
+++ /dev/null
@@ -1,33 +0,0 @@
-# PTP 1588 clock support - User space test program
-#
-# Copyright (C) 2010 OMICRON electronics GmbH
-#
-#  This program is free software; you can redistribute it and/or modify
-#  it under the terms of the GNU General Public License as published by
-#  the Free Software Foundation; either version 2 of the License, or
-#  (at your option) any later version.
-#
-#  This program is distributed in the hope that it will be useful,
-#  but WITHOUT ANY WARRANTY; without even the implied warranty of
-#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-#  GNU General Public License for more details.
-#
-#  You should have received a copy of the GNU General Public License
-#  along with this program; if not, write to the Free Software
-#  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
-CC        = $(CROSS_COMPILE)gcc
-INC       = -I$(KBUILD_OUTPUT)/usr/include
-CFLAGS    = -Wall $(INC)
-LDLIBS    = -lrt
-PROGS     = testptp
-
-all: $(PROGS)
-
-testptp: testptp.o
-
-clean:
-       rm -f testptp.o
-
-distclean: clean
-       rm -f $(PROGS)
index fd880150aeead89920e75758f7194a1ae48eee2d..e2c187947e588d6146c464852e07735966f95145 100644 (file)
@@ -21,16 +21,6 @@ NCR53c400 card, the Trantor T130B in its default configuration:
 The NCR53c400 does not support DMA but it does have Pseudo-DMA which is
 supported by the driver.
 
-If the default configuration does not work for you, you can use the kernel
-command lines (eg using the lilo append command):
-       ncr5380=addr,irq
-       ncr53c400=addr,irq
-       ncr53c400a=addr,irq
-       dtc3181e=addr,irq
-
-The driver does not probe for any addresses or ports other than those in
-the OVERRIDE or given to the kernel as above.
-
 This driver provides some information on what it has detected in
 /proc/scsi/g_NCR5380/x where x is the scsi card number as detected at boot
 time. More info to come in the future.
@@ -38,6 +28,16 @@ time. More info to come in the future.
 This driver works as a module.
 When included as a module, parameters can be passed on the insmod/modprobe
 command line:
+  irq=xx[,...] the interrupt(s)
+  base=xx[,...]        the port or base address(es) (for port or memory mapped, resp.)
+  card=xx[,...]        card type(s):
+               0 = NCR5380,
+               1 = NCR53C400,
+               2 = NCR53C400A,
+               3 = Domex Technology Corp 3181E (DTC3181E)
+               4 = Hewlett Packard C2502
+
+These old-style parameters can support only one card:
   ncr_irq=xx   the interrupt
   ncr_addr=xx  the port or base address (for port or memory
                mapped, resp.)
@@ -46,11 +46,19 @@ command line:
   ncr_53c400a=1 to set up for a NCR53C400A board
   dtc_3181e=1  to set up for a Domex Technology Corp 3181E board
   hp_c2502=1   to set up for a Hewlett Packard C2502 board
+
 e.g.
-modprobe g_NCR5380 ncr_irq=5 ncr_addr=0x350 ncr_5380=1
+OLD: modprobe g_NCR5380 ncr_irq=5 ncr_addr=0x350 ncr_5380=1
+NEW: modprobe g_NCR5380 irq=5 base=0x350 card=0
   for a port mapped NCR5380 board or
-modprobe g_NCR5380 ncr_irq=255 ncr_addr=0xc8000 ncr_53c400=1
-  for a memory mapped NCR53C400 board with interrupts disabled.
+
+OLD: modprobe g_NCR5380 ncr_irq=255 ncr_addr=0xc8000 ncr_53c400=1
+NEW: modprobe g_NCR5380 irq=255 base=0xc8000 card=1
+  for a memory mapped NCR53C400 board with interrupts disabled or
+
+NEW: modprobe g_NCR5380 irq=0,7 base=0x240,0x300 card=3,4
+  for two cards: DTC3181 (in non-PnP mode) at 0x240 with no IRQ
+             and HP C2502 at 0x300 with IRQ 7
 
 (255 should be specified for no or DMA interrupt, 254 to autoprobe for an 
      IRQ line if overridden on the command line.)
index 4644bf0d9832ee47d96a9f4ec923cb31a6208f85..8e4bb17d70ebbf1d01d5960520c7878c22cbef0b 100644 (file)
@@ -1,7 +1,5 @@
 00-INDEX
        - this file.
-Makefile
-       - Makefile for the example sourcefiles.
 butterfly
        - AVR Butterfly SPI driver overview and pin configuration.
 ep93xx_spi
diff --git a/Documentation/timers/.gitignore b/Documentation/timers/.gitignore
deleted file mode 100644 (file)
index c5c45d7..0000000
+++ /dev/null
@@ -1 +0,0 @@
-hpet_example
index ee212a27772f64882b6280f7ef1065a332e5222d..3be05fe0f1f9e57d983042b5b2e4e27811bb5590 100644 (file)
@@ -4,12 +4,8 @@ highres.txt
        - High resolution timers and dynamic ticks design notes
 hpet.txt
        - High Precision Event Timer Driver for Linux
-hpet_example.c
-       - sample hpet timer test program
 hrtimers.txt
        - subsystem for high-resolution kernel timers
-Makefile
-       - Build and link hpet_example
 NO_HZ.txt
        - Summary of the different methods for the scheduler clock-interrupts management.
 timekeeping.txt
diff --git a/Documentation/timers/Makefile b/Documentation/timers/Makefile
deleted file mode 100644 (file)
index 6c09ee6..0000000
+++ /dev/null
@@ -1,5 +0,0 @@
-# List of programs to build
-hostprogs-$(CONFIG_X86) := hpet_example
-
-# Tell kbuild to always build the programs
-always := $(hostprogs-y)
index a484d2c109d7ff0689d8997c04ce477ca6e79d1c..895345ec513b5cfe90ac3a2d7c95d355ecce121a 100644 (file)
@@ -25,4 +25,4 @@ arch/x86/kernel/hpet.c.
 
 The driver provides a userspace API which resembles the API found in the
 RTC driver framework.  An example user space program is provided in
-file:Documentation/timers/hpet_example.c
+file:samples/timers/hpet_example.c
diff --git a/Documentation/timers/hpet_example.c b/Documentation/timers/hpet_example.c
deleted file mode 100644 (file)
index 3ab4993..0000000
+++ /dev/null
@@ -1,294 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <string.h>
-#include <memory.h>
-#include <malloc.h>
-#include <time.h>
-#include <ctype.h>
-#include <sys/types.h>
-#include <sys/wait.h>
-#include <signal.h>
-#include <errno.h>
-#include <sys/time.h>
-#include <linux/hpet.h>
-
-
-extern void hpet_open_close(int, const char **);
-extern void hpet_info(int, const char **);
-extern void hpet_poll(int, const char **);
-extern void hpet_fasync(int, const char **);
-extern void hpet_read(int, const char **);
-
-#include <sys/poll.h>
-#include <sys/ioctl.h>
-
-struct hpet_command {
-       char            *command;
-       void            (*func)(int argc, const char ** argv);
-} hpet_command[] = {
-       {
-               "open-close",
-               hpet_open_close
-       },
-       {
-               "info",
-               hpet_info
-       },
-       {
-               "poll",
-               hpet_poll
-       },
-       {
-               "fasync",
-               hpet_fasync
-       },
-};
-
-int
-main(int argc, const char ** argv)
-{
-       unsigned int    i;
-
-       argc--;
-       argv++;
-
-       if (!argc) {
-               fprintf(stderr, "-hpet: requires command\n");
-               return -1;
-       }
-
-
-       for (i = 0; i < (sizeof (hpet_command) / sizeof (hpet_command[0])); i++)
-               if (!strcmp(argv[0], hpet_command[i].command)) {
-                       argc--;
-                       argv++;
-                       fprintf(stderr, "-hpet: executing %s\n",
-                               hpet_command[i].command);
-                       hpet_command[i].func(argc, argv);
-                       return 0;
-               }
-
-       fprintf(stderr, "do_hpet: command %s not implemented\n", argv[0]);
-
-       return -1;
-}
-
-void
-hpet_open_close(int argc, const char **argv)
-{
-       int     fd;
-
-       if (argc != 1) {
-               fprintf(stderr, "hpet_open_close: device-name\n");
-               return;
-       }
-
-       fd = open(argv[0], O_RDONLY);
-       if (fd < 0)
-               fprintf(stderr, "hpet_open_close: open failed\n");
-       else
-               close(fd);
-
-       return;
-}
-
-void
-hpet_info(int argc, const char **argv)
-{
-       struct hpet_info        info;
-       int                     fd;
-
-       if (argc != 1) {
-               fprintf(stderr, "hpet_info: device-name\n");
-               return;
-       }
-
-       fd = open(argv[0], O_RDONLY);
-       if (fd < 0) {
-               fprintf(stderr, "hpet_info: open of %s failed\n", argv[0]);
-               return;
-       }
-
-       if (ioctl(fd, HPET_INFO, &info) < 0) {
-               fprintf(stderr, "hpet_info: failed to get info\n");
-               goto out;
-       }
-
-       fprintf(stderr, "hpet_info: hi_irqfreq 0x%lx hi_flags 0x%lx ",
-               info.hi_ireqfreq, info.hi_flags);
-       fprintf(stderr, "hi_hpet %d hi_timer %d\n",
-               info.hi_hpet, info.hi_timer);
-
-out:
-       close(fd);
-       return;
-}
-
-void
-hpet_poll(int argc, const char **argv)
-{
-       unsigned long           freq;
-       int                     iterations, i, fd;
-       struct pollfd           pfd;
-       struct hpet_info        info;
-       struct timeval          stv, etv;
-       struct timezone         tz;
-       long                    usec;
-
-       if (argc != 3) {
-               fprintf(stderr, "hpet_poll: device-name freq iterations\n");
-               return;
-       }
-
-       freq = atoi(argv[1]);
-       iterations = atoi(argv[2]);
-
-       fd = open(argv[0], O_RDONLY);
-
-       if (fd < 0) {
-               fprintf(stderr, "hpet_poll: open of %s failed\n", argv[0]);
-               return;
-       }
-
-       if (ioctl(fd, HPET_IRQFREQ, freq) < 0) {
-               fprintf(stderr, "hpet_poll: HPET_IRQFREQ failed\n");
-               goto out;
-       }
-
-       if (ioctl(fd, HPET_INFO, &info) < 0) {
-               fprintf(stderr, "hpet_poll: failed to get info\n");
-               goto out;
-       }
-
-       fprintf(stderr, "hpet_poll: info.hi_flags 0x%lx\n", info.hi_flags);
-
-       if (info.hi_flags && (ioctl(fd, HPET_EPI, 0) < 0)) {
-               fprintf(stderr, "hpet_poll: HPET_EPI failed\n");
-               goto out;
-       }
-
-       if (ioctl(fd, HPET_IE_ON, 0) < 0) {
-               fprintf(stderr, "hpet_poll, HPET_IE_ON failed\n");
-               goto out;
-       }
-
-       pfd.fd = fd;
-       pfd.events = POLLIN;
-
-       for (i = 0; i < iterations; i++) {
-               pfd.revents = 0;
-               gettimeofday(&stv, &tz);
-               if (poll(&pfd, 1, -1) < 0)
-                       fprintf(stderr, "hpet_poll: poll failed\n");
-               else {
-                       long    data;
-
-                       gettimeofday(&etv, &tz);
-                       usec = stv.tv_sec * 1000000 + stv.tv_usec;
-                       usec = (etv.tv_sec * 1000000 + etv.tv_usec) - usec;
-
-                       fprintf(stderr,
-                               "hpet_poll: expired time = 0x%lx\n", usec);
-
-                       fprintf(stderr, "hpet_poll: revents = 0x%x\n",
-                               pfd.revents);
-
-                       if (read(fd, &data, sizeof(data)) != sizeof(data)) {
-                               fprintf(stderr, "hpet_poll: read failed\n");
-                       }
-                       else
-                               fprintf(stderr, "hpet_poll: data 0x%lx\n",
-                                       data);
-               }
-       }
-
-out:
-       close(fd);
-       return;
-}
-
-static int hpet_sigio_count;
-
-static void
-hpet_sigio(int val)
-{
-       fprintf(stderr, "hpet_sigio: called\n");
-       hpet_sigio_count++;
-}
-
-void
-hpet_fasync(int argc, const char **argv)
-{
-       unsigned long           freq;
-       int                     iterations, i, fd, value;
-       sig_t                   oldsig;
-       struct hpet_info        info;
-
-       hpet_sigio_count = 0;
-       fd = -1;
-
-       if ((oldsig = signal(SIGIO, hpet_sigio)) == SIG_ERR) {
-               fprintf(stderr, "hpet_fasync: failed to set signal handler\n");
-               return;
-       }
-
-       if (argc != 3) {
-               fprintf(stderr, "hpet_fasync: device-name freq iterations\n");
-               goto out;
-       }
-
-       fd = open(argv[0], O_RDONLY);
-
-       if (fd < 0) {
-               fprintf(stderr, "hpet_fasync: failed to open %s\n", argv[0]);
-               return;
-       }
-
-
-       if ((fcntl(fd, F_SETOWN, getpid()) == 1) ||
-               ((value = fcntl(fd, F_GETFL)) == 1) ||
-               (fcntl(fd, F_SETFL, value | O_ASYNC) == 1)) {
-               fprintf(stderr, "hpet_fasync: fcntl failed\n");
-               goto out;
-       }
-
-       freq = atoi(argv[1]);
-       iterations = atoi(argv[2]);
-
-       if (ioctl(fd, HPET_IRQFREQ, freq) < 0) {
-               fprintf(stderr, "hpet_fasync: HPET_IRQFREQ failed\n");
-               goto out;
-       }
-
-       if (ioctl(fd, HPET_INFO, &info) < 0) {
-               fprintf(stderr, "hpet_fasync: failed to get info\n");
-               goto out;
-       }
-
-       fprintf(stderr, "hpet_fasync: info.hi_flags 0x%lx\n", info.hi_flags);
-
-       if (info.hi_flags && (ioctl(fd, HPET_EPI, 0) < 0)) {
-               fprintf(stderr, "hpet_fasync: HPET_EPI failed\n");
-               goto out;
-       }
-
-       if (ioctl(fd, HPET_IE_ON, 0) < 0) {
-               fprintf(stderr, "hpet_fasync, HPET_IE_ON failed\n");
-               goto out;
-       }
-
-       for (i = 0; i < iterations; i++) {
-               (void) pause();
-               fprintf(stderr, "hpet_fasync: count = %d\n", hpet_sigio_count);
-       }
-
-out:
-       signal(SIGIO, oldsig);
-
-       if (fd >= 0)
-               close(fd);
-
-       return;
-}
diff --git a/Documentation/vDSO/.gitignore b/Documentation/vDSO/.gitignore
deleted file mode 100644 (file)
index 133bf9e..0000000
+++ /dev/null
@@ -1,2 +0,0 @@
-vdso_test
-vdso_standalone_test_x86
diff --git a/Documentation/vDSO/Makefile b/Documentation/vDSO/Makefile
deleted file mode 100644 (file)
index b12e987..0000000
+++ /dev/null
@@ -1,17 +0,0 @@
-ifndef CROSS_COMPILE
-# vdso_test won't build for glibc < 2.16, so disable it
-# hostprogs-y := vdso_test
-hostprogs-$(CONFIG_X86) := vdso_standalone_test_x86
-vdso_standalone_test_x86-objs := vdso_standalone_test_x86.o parse_vdso.o
-vdso_test-objs := parse_vdso.o vdso_test.o
-
-# Tell kbuild to always build the programs
-always := $(hostprogs-y)
-
-HOSTCFLAGS := -I$(objtree)/usr/include -std=gnu99
-HOSTCFLAGS_vdso_standalone_test_x86.o := -fno-asynchronous-unwind-tables -fno-stack-protector
-HOSTLOADLIBES_vdso_standalone_test_x86 := -nostdlib
-ifeq ($(CONFIG_X86_32),y)
-HOSTLOADLIBES_vdso_standalone_test_x86 += -lgcc_s
-endif
-endif
diff --git a/Documentation/vDSO/parse_vdso.c b/Documentation/vDSO/parse_vdso.c
deleted file mode 100644 (file)
index 1dbb4b8..0000000
+++ /dev/null
@@ -1,269 +0,0 @@
-/*
- * parse_vdso.c: Linux reference vDSO parser
- * Written by Andrew Lutomirski, 2011-2014.
- *
- * This code is meant to be linked in to various programs that run on Linux.
- * As such, it is available with as few restrictions as possible.  This file
- * is licensed under the Creative Commons Zero License, version 1.0,
- * available at http://creativecommons.org/publicdomain/zero/1.0/legalcode
- *
- * The vDSO is a regular ELF DSO that the kernel maps into user space when
- * it starts a program.  It works equally well in statically and dynamically
- * linked binaries.
- *
- * This code is tested on x86.  In principle it should work on any
- * architecture that has a vDSO.
- */
-
-#include <stdbool.h>
-#include <stdint.h>
-#include <string.h>
-#include <limits.h>
-#include <elf.h>
-
-/*
- * To use this vDSO parser, first call one of the vdso_init_* functions.
- * If you've already parsed auxv, then pass the value of AT_SYSINFO_EHDR
- * to vdso_init_from_sysinfo_ehdr.  Otherwise pass auxv to vdso_init_from_auxv.
- * Then call vdso_sym for each symbol you want.  For example, to look up
- * gettimeofday on x86_64, use:
- *
- *     <some pointer> = vdso_sym("LINUX_2.6", "gettimeofday");
- * or
- *     <some pointer> = vdso_sym("LINUX_2.6", "__vdso_gettimeofday");
- *
- * vdso_sym will return 0 if the symbol doesn't exist or if the init function
- * failed or was not called.  vdso_sym is a little slow, so its return value
- * should be cached.
- *
- * vdso_sym is threadsafe; the init functions are not.
- *
- * These are the prototypes:
- */
-extern void vdso_init_from_auxv(void *auxv);
-extern void vdso_init_from_sysinfo_ehdr(uintptr_t base);
-extern void *vdso_sym(const char *version, const char *name);
-
-
-/* And here's the code. */
-#ifndef ELF_BITS
-# if ULONG_MAX > 0xffffffffUL
-#  define ELF_BITS 64
-# else
-#  define ELF_BITS 32
-# endif
-#endif
-
-#define ELF_BITS_XFORM2(bits, x) Elf##bits##_##x
-#define ELF_BITS_XFORM(bits, x) ELF_BITS_XFORM2(bits, x)
-#define ELF(x) ELF_BITS_XFORM(ELF_BITS, x)
-
-static struct vdso_info
-{
-       bool valid;
-
-       /* Load information */
-       uintptr_t load_addr;
-       uintptr_t load_offset;  /* load_addr - recorded vaddr */
-
-       /* Symbol table */
-       ELF(Sym) *symtab;
-       const char *symstrings;
-       ELF(Word) *bucket, *chain;
-       ELF(Word) nbucket, nchain;
-
-       /* Version table */
-       ELF(Versym) *versym;
-       ELF(Verdef) *verdef;
-} vdso_info;
-
-/* Straight from the ELF specification. */
-static unsigned long elf_hash(const unsigned char *name)
-{
-       unsigned long h = 0, g;
-       while (*name)
-       {
-               h = (h << 4) + *name++;
-               if (g = h & 0xf0000000)
-                       h ^= g >> 24;
-               h &= ~g;
-       }
-       return h;
-}
-
-void vdso_init_from_sysinfo_ehdr(uintptr_t base)
-{
-       size_t i;
-       bool found_vaddr = false;
-
-       vdso_info.valid = false;
-
-       vdso_info.load_addr = base;
-
-       ELF(Ehdr) *hdr = (ELF(Ehdr)*)base;
-       if (hdr->e_ident[EI_CLASS] !=
-           (ELF_BITS == 32 ? ELFCLASS32 : ELFCLASS64)) {
-               return;  /* Wrong ELF class -- check ELF_BITS */
-       }
-
-       ELF(Phdr) *pt = (ELF(Phdr)*)(vdso_info.load_addr + hdr->e_phoff);
-       ELF(Dyn) *dyn = 0;
-
-       /*
-        * We need two things from the segment table: the load offset
-        * and the dynamic table.
-        */
-       for (i = 0; i < hdr->e_phnum; i++)
-       {
-               if (pt[i].p_type == PT_LOAD && !found_vaddr) {
-                       found_vaddr = true;
-                       vdso_info.load_offset = base
-                               + (uintptr_t)pt[i].p_offset
-                               - (uintptr_t)pt[i].p_vaddr;
-               } else if (pt[i].p_type == PT_DYNAMIC) {
-                       dyn = (ELF(Dyn)*)(base + pt[i].p_offset);
-               }
-       }
-
-       if (!found_vaddr || !dyn)
-               return;  /* Failed */
-
-       /*
-        * Fish out the useful bits of the dynamic table.
-        */
-       ELF(Word) *hash = 0;
-       vdso_info.symstrings = 0;
-       vdso_info.symtab = 0;
-       vdso_info.versym = 0;
-       vdso_info.verdef = 0;
-       for (i = 0; dyn[i].d_tag != DT_NULL; i++) {
-               switch (dyn[i].d_tag) {
-               case DT_STRTAB:
-                       vdso_info.symstrings = (const char *)
-                               ((uintptr_t)dyn[i].d_un.d_ptr
-                                + vdso_info.load_offset);
-                       break;
-               case DT_SYMTAB:
-                       vdso_info.symtab = (ELF(Sym) *)
-                               ((uintptr_t)dyn[i].d_un.d_ptr
-                                + vdso_info.load_offset);
-                       break;
-               case DT_HASH:
-                       hash = (ELF(Word) *)
-                               ((uintptr_t)dyn[i].d_un.d_ptr
-                                + vdso_info.load_offset);
-                       break;
-               case DT_VERSYM:
-                       vdso_info.versym = (ELF(Versym) *)
-                               ((uintptr_t)dyn[i].d_un.d_ptr
-                                + vdso_info.load_offset);
-                       break;
-               case DT_VERDEF:
-                       vdso_info.verdef = (ELF(Verdef) *)
-                               ((uintptr_t)dyn[i].d_un.d_ptr
-                                + vdso_info.load_offset);
-                       break;
-               }
-       }
-       if (!vdso_info.symstrings || !vdso_info.symtab || !hash)
-               return;  /* Failed */
-
-       if (!vdso_info.verdef)
-               vdso_info.versym = 0;
-
-       /* Parse the hash table header. */
-       vdso_info.nbucket = hash[0];
-       vdso_info.nchain = hash[1];
-       vdso_info.bucket = &hash[2];
-       vdso_info.chain = &hash[vdso_info.nbucket + 2];
-
-       /* That's all we need. */
-       vdso_info.valid = true;
-}
-
-static bool vdso_match_version(ELF(Versym) ver,
-                              const char *name, ELF(Word) hash)
-{
-       /*
-        * This is a helper function to check if the version indexed by
-        * ver matches name (which hashes to hash).
-        *
-        * The version definition table is a mess, and I don't know how
-        * to do this in better than linear time without allocating memory
-        * to build an index.  I also don't know why the table has
-        * variable size entries in the first place.
-        *
-        * For added fun, I can't find a comprehensible specification of how
-        * to parse all the weird flags in the table.
-        *
-        * So I just parse the whole table every time.
-        */
-
-       /* First step: find the version definition */
-       ver &= 0x7fff;  /* Apparently bit 15 means "hidden" */
-       ELF(Verdef) *def = vdso_info.verdef;
-       while(true) {
-               if ((def->vd_flags & VER_FLG_BASE) == 0
-                   && (def->vd_ndx & 0x7fff) == ver)
-                       break;
-
-               if (def->vd_next == 0)
-                       return false;  /* No definition. */
-
-               def = (ELF(Verdef) *)((char *)def + def->vd_next);
-       }
-
-       /* Now figure out whether it matches. */
-       ELF(Verdaux) *aux = (ELF(Verdaux)*)((char *)def + def->vd_aux);
-       return def->vd_hash == hash
-               && !strcmp(name, vdso_info.symstrings + aux->vda_name);
-}
-
-void *vdso_sym(const char *version, const char *name)
-{
-       unsigned long ver_hash;
-       if (!vdso_info.valid)
-               return 0;
-
-       ver_hash = elf_hash(version);
-       ELF(Word) chain = vdso_info.bucket[elf_hash(name) % vdso_info.nbucket];
-
-       for (; chain != STN_UNDEF; chain = vdso_info.chain[chain]) {
-               ELF(Sym) *sym = &vdso_info.symtab[chain];
-
-               /* Check for a defined global or weak function w/ right name. */
-               if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC)
-                       continue;
-               if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL &&
-                   ELF64_ST_BIND(sym->st_info) != STB_WEAK)
-                       continue;
-               if (sym->st_shndx == SHN_UNDEF)
-                       continue;
-               if (strcmp(name, vdso_info.symstrings + sym->st_name))
-                       continue;
-
-               /* Check symbol version. */
-               if (vdso_info.versym
-                   && !vdso_match_version(vdso_info.versym[chain],
-                                          version, ver_hash))
-                       continue;
-
-               return (void *)(vdso_info.load_offset + sym->st_value);
-       }
-
-       return 0;
-}
-
-void vdso_init_from_auxv(void *auxv)
-{
-       ELF(auxv_t) *elf_auxv = auxv;
-       for (int i = 0; elf_auxv[i].a_type != AT_NULL; i++)
-       {
-               if (elf_auxv[i].a_type == AT_SYSINFO_EHDR) {
-                       vdso_init_from_sysinfo_ehdr(elf_auxv[i].a_un.a_val);
-                       return;
-               }
-       }
-
-       vdso_info.valid = false;
-}
diff --git a/Documentation/vDSO/vdso_standalone_test_x86.c b/Documentation/vDSO/vdso_standalone_test_x86.c
deleted file mode 100644 (file)
index 93b0ebf..0000000
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * vdso_test.c: Sample code to test parse_vdso.c on x86
- * Copyright (c) 2011-2014 Andy Lutomirski
- * Subject to the GNU General Public License, version 2
- *
- * You can amuse yourself by compiling with:
- * gcc -std=gnu99 -nostdlib
- *     -Os -fno-asynchronous-unwind-tables -flto -lgcc_s
- *      vdso_standalone_test_x86.c parse_vdso.c
- * to generate a small binary.  On x86_64, you can omit -lgcc_s
- * if you want the binary to be completely standalone.
- */
-
-#include <sys/syscall.h>
-#include <sys/time.h>
-#include <unistd.h>
-#include <stdint.h>
-
-extern void *vdso_sym(const char *version, const char *name);
-extern void vdso_init_from_sysinfo_ehdr(uintptr_t base);
-extern void vdso_init_from_auxv(void *auxv);
-
-/* We need a libc functions... */
-int strcmp(const char *a, const char *b)
-{
-       /* This implementation is buggy: it never returns -1. */
-       while (*a || *b) {
-               if (*a != *b)
-                       return 1;
-               if (*a == 0 || *b == 0)
-                       return 1;
-               a++;
-               b++;
-       }
-
-       return 0;
-}
-
-/* ...and two syscalls.  This is x86-specific. */
-static inline long x86_syscall3(long nr, long a0, long a1, long a2)
-{
-       long ret;
-#ifdef __x86_64__
-       asm volatile ("syscall" : "=a" (ret) : "a" (nr),
-                     "D" (a0), "S" (a1), "d" (a2) :
-                     "cc", "memory", "rcx",
-                     "r8", "r9", "r10", "r11" );
-#else
-       asm volatile ("int $0x80" : "=a" (ret) : "a" (nr),
-                     "b" (a0), "c" (a1), "d" (a2) :
-                     "cc", "memory" );
-#endif
-       return ret;
-}
-
-static inline long linux_write(int fd, const void *data, size_t len)
-{
-       return x86_syscall3(__NR_write, fd, (long)data, (long)len);
-}
-
-static inline void linux_exit(int code)
-{
-       x86_syscall3(__NR_exit, code, 0, 0);
-}
-
-void to_base10(char *lastdig, time_t n)
-{
-       while (n) {
-               *lastdig = (n % 10) + '0';
-               n /= 10;
-               lastdig--;
-       }
-}
-
-__attribute__((externally_visible)) void c_main(void **stack)
-{
-       /* Parse the stack */
-       long argc = (long)*stack;
-       stack += argc + 2;
-
-       /* Now we're pointing at the environment.  Skip it. */
-       while(*stack)
-               stack++;
-       stack++;
-
-       /* Now we're pointing at auxv.  Initialize the vDSO parser. */
-       vdso_init_from_auxv((void *)stack);
-
-       /* Find gettimeofday. */
-       typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz);
-       gtod_t gtod = (gtod_t)vdso_sym("LINUX_2.6", "__vdso_gettimeofday");
-
-       if (!gtod)
-               linux_exit(1);
-
-       struct timeval tv;
-       long ret = gtod(&tv, 0);
-
-       if (ret == 0) {
-               char buf[] = "The time is                     .000000\n";
-               to_base10(buf + 31, tv.tv_sec);
-               to_base10(buf + 38, tv.tv_usec);
-               linux_write(1, buf, sizeof(buf) - 1);
-       } else {
-               linux_exit(ret);
-       }
-
-       linux_exit(0);
-}
-
-/*
- * This is the real entry point.  It passes the initial stack into
- * the C entry point.
- */
-asm (
-       ".text\n"
-       ".global _start\n"
-       ".type _start,@function\n"
-       "_start:\n\t"
-#ifdef __x86_64__
-       "mov %rsp,%rdi\n\t"
-       "jmp c_main"
-#else
-       "push %esp\n\t"
-       "call c_main\n\t"
-       "int $3"
-#endif
-       );
diff --git a/Documentation/vDSO/vdso_test.c b/Documentation/vDSO/vdso_test.c
deleted file mode 100644 (file)
index 8daeb7d..0000000
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * vdso_test.c: Sample code to test parse_vdso.c
- * Copyright (c) 2014 Andy Lutomirski
- * Subject to the GNU General Public License, version 2
- *
- * Compile with:
- * gcc -std=gnu99 vdso_test.c parse_vdso.c
- *
- * Tested on x86, 32-bit and 64-bit.  It may work on other architectures, too.
- */
-
-#include <stdint.h>
-#include <elf.h>
-#include <stdio.h>
-#include <sys/auxv.h>
-#include <sys/time.h>
-
-extern void *vdso_sym(const char *version, const char *name);
-extern void vdso_init_from_sysinfo_ehdr(uintptr_t base);
-extern void vdso_init_from_auxv(void *auxv);
-
-int main(int argc, char **argv)
-{
-       unsigned long sysinfo_ehdr = getauxval(AT_SYSINFO_EHDR);
-       if (!sysinfo_ehdr) {
-               printf("AT_SYSINFO_EHDR is not present!\n");
-               return 0;
-       }
-
-       vdso_init_from_sysinfo_ehdr(getauxval(AT_SYSINFO_EHDR));
-
-       /* Find gettimeofday. */
-       typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz);
-       gtod_t gtod = (gtod_t)vdso_sym("LINUX_2.6", "__vdso_gettimeofday");
-
-       if (!gtod) {
-               printf("Could not find __vdso_gettimeofday\n");
-               return 1;
-       }
-
-       struct timeval tv;
-       long ret = gtod(&tv, 0);
-
-       if (ret == 0) {
-               printf("The time is %lld.%06lld\n",
-                      (long long)tv.tv_sec, (long long)tv.tv_usec);
-       } else {
-               printf("__vdso_gettimeofday failed\n");
-       }
-
-       return 0;
-}
index 739db9ab16b2c973b8a348dcbe657a0c9004e227..6bbceb9a3a19d5ce30734493e3c8785a04c1b00d 100644 (file)
@@ -777,6 +777,17 @@ Gets the current timestamp of kvmclock as seen by the current guest. In
 conjunction with KVM_SET_CLOCK, it is used to ensure monotonicity on scenarios
 such as migration.
 
+When KVM_CAP_ADJUST_CLOCK is passed to KVM_CHECK_EXTENSION, it returns the
+set of bits that KVM can return in struct kvm_clock_data's flag member.
+
+The only flag defined now is KVM_CLOCK_TSC_STABLE.  If set, the returned
+value is the exact kvmclock value seen by all VCPUs at the instant
+when KVM_GET_CLOCK was called.  If clear, the returned value is simply
+CLOCK_MONOTONIC plus a constant offset; the offset can be modified
+with KVM_SET_CLOCK.  KVM will try to make all VCPUs follow this clock,
+but the exact value read by each VCPU could differ, because the host
+TSC is not stable.
+
 struct kvm_clock_data {
        __u64 clock;  /* kvmclock current value */
        __u32 flags;
index f2491a8c68b4a6f20c8a2903c21fe7286c7e9e48..e5dd9f4d61008ad6431e067b900608788e573020 100644 (file)
@@ -4,7 +4,17 @@ KVM Lock Overview
 1. Acquisition Orders
 ---------------------
 
-(to be written)
+The acquisition orders for mutexes are as follows:
+
+- kvm->lock is taken outside vcpu->mutex
+
+- kvm->lock is taken outside kvm->slots_lock and kvm->irq_lock
+
+- kvm->slots_lock is taken outside kvm->irq_lock, though acquiring
+  them together is quite rare.
+
+For spinlocks, kvm_lock is taken outside kvm->mmu_lock.  Everything
+else is a leaf: no other lock is taken inside the critical sections.
 
 2: Exception
 ------------
diff --git a/Documentation/watchdog/Makefile b/Documentation/watchdog/Makefile
deleted file mode 100644 (file)
index 6018f45..0000000
+++ /dev/null
@@ -1 +0,0 @@
-subdir-y := src
diff --git a/Documentation/watchdog/src/.gitignore b/Documentation/watchdog/src/.gitignore
deleted file mode 100644 (file)
index ac90997..0000000
+++ /dev/null
@@ -1,2 +0,0 @@
-watchdog-simple
-watchdog-test
diff --git a/Documentation/watchdog/src/Makefile b/Documentation/watchdog/src/Makefile
deleted file mode 100644 (file)
index 4a892c3..0000000
+++ /dev/null
@@ -1,5 +0,0 @@
-# List of programs to build
-hostprogs-y := watchdog-simple watchdog-test
-
-# Tell kbuild to always build the programs
-always := $(hostprogs-y)
diff --git a/Documentation/watchdog/src/watchdog-simple.c b/Documentation/watchdog/src/watchdog-simple.c
deleted file mode 100644 (file)
index ba45803..0000000
+++ /dev/null
@@ -1,24 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <fcntl.h>
-
-int main(void)
-{
-       int fd = open("/dev/watchdog", O_WRONLY);
-       int ret = 0;
-       if (fd == -1) {
-               perror("watchdog");
-               exit(EXIT_FAILURE);
-       }
-       while (1) {
-               ret = write(fd, "\0", 1);
-               if (ret != 1) {
-                       ret = -1;
-                       break;
-               }
-               sleep(10);
-       }
-       close(fd);
-       return ret;
-}
diff --git a/Documentation/watchdog/src/watchdog-test.c b/Documentation/watchdog/src/watchdog-test.c
deleted file mode 100644 (file)
index 6983d05..0000000
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * Watchdog Driver Test Program
- */
-
-#include <errno.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <signal.h>
-#include <sys/ioctl.h>
-#include <linux/types.h>
-#include <linux/watchdog.h>
-
-int fd;
-const char v = 'V';
-
-/*
- * This function simply sends an IOCTL to the driver, which in turn ticks
- * the PC Watchdog card to reset its internal timer so it doesn't trigger
- * a computer reset.
- */
-static void keep_alive(void)
-{
-    int dummy;
-
-    printf(".");
-    ioctl(fd, WDIOC_KEEPALIVE, &dummy);
-}
-
-/*
- * The main program.  Run the program with "-d" to disable the card,
- * or "-e" to enable the card.
- */
-
-static void term(int sig)
-{
-    int ret = write(fd, &v, 1);
-
-    close(fd);
-    if (ret < 0)
-       printf("\nStopping watchdog ticks failed (%d)...\n", errno);
-    else
-       printf("\nStopping watchdog ticks...\n");
-    exit(0);
-}
-
-int main(int argc, char *argv[])
-{
-    int flags;
-    unsigned int ping_rate = 1;
-    int ret;
-
-    setbuf(stdout, NULL);
-
-    fd = open("/dev/watchdog", O_WRONLY);
-
-    if (fd == -1) {
-       printf("Watchdog device not enabled.\n");
-       exit(-1);
-    }
-
-    if (argc > 1) {
-       if (!strncasecmp(argv[1], "-d", 2)) {
-           flags = WDIOS_DISABLECARD;
-           ioctl(fd, WDIOC_SETOPTIONS, &flags);
-           printf("Watchdog card disabled.\n");
-           goto end;
-       } else if (!strncasecmp(argv[1], "-e", 2)) {
-           flags = WDIOS_ENABLECARD;
-           ioctl(fd, WDIOC_SETOPTIONS, &flags);
-           printf("Watchdog card enabled.\n");
-           goto end;
-       } else if (!strncasecmp(argv[1], "-t", 2) && argv[2]) {
-           flags = atoi(argv[2]);
-           ioctl(fd, WDIOC_SETTIMEOUT, &flags);
-           printf("Watchdog timeout set to %u seconds.\n", flags);
-           goto end;
-       } else if (!strncasecmp(argv[1], "-p", 2) && argv[2]) {
-           ping_rate = strtoul(argv[2], NULL, 0);
-           printf("Watchdog ping rate set to %u seconds.\n", ping_rate);
-       } else {
-           printf("-d to disable, -e to enable, -t <n> to set " \
-               "the timeout,\n-p <n> to set the ping rate, and \n");
-           printf("run by itself to tick the card.\n");
-           goto end;
-       }
-    }
-
-    printf("Watchdog Ticking Away!\n");
-
-    signal(SIGINT, term);
-
-    while(1) {
-       keep_alive();
-       sleep(ping_rate);
-    }
-end:
-    ret = write(fd, &v, 1);
-    if (ret < 0)
-       printf("Stopping watchdog ticks failed (%d)...\n", errno);
-    close(fd);
-    return 0;
-}
index b3a701f48118976a285259b0db73fc9d1a4948fa..0e62ba33b7fbb9507ac415689cae2700a2838b30 100644 (file)
@@ -37,7 +37,7 @@ activates as soon as /dev/watchdog is opened and will reboot unless
 the watchdog is pinged within a certain time, this time is called the
 timeout or margin.  The simplest way to ping the watchdog is to write
 some data to the device.  So a very simple watchdog daemon would look
-like this source file:  see Documentation/watchdog/src/watchdog-simple.c
+like this source file:  see samples/watchdog/watchdog-simple.c
 
 A more advanced driver could for example check that a HTTP server is
 still responding before doing the write call to ping the watchdog.
index 061c2e35384f5eb6354bba48e2da565f18040731..ed2f0b860869b1c5296fe63da31cae90f42e38a2 100644 (file)
@@ -47,4 +47,4 @@ The external event interfaces on the WDT boards are not currently supported.
 Minor numbers are however allocated for it.
 
 
-Example Watchdog Driver:  see Documentation/watchdog/src/watchdog-simple.c
+Example Watchdog Driver:  see samples/watchdog/watchdog-simple.c
index f18b5467e37fecbba7bccc00c35540b0b3002b6b..3348d0ed0a3146bea0fe2a1d1cc1e9a25f9208eb 100644 (file)
@@ -77,6 +77,7 @@ Descriptions of section entries:
        Q: Patchwork web based patch tracking system site
        T: SCM tree type and location.
           Type is one of: git, hg, quilt, stgit, topgit
+       B: Bug tracking system location.
        S: Status, one of the following:
           Supported:   Someone is actually paid to look after this.
           Maintained:  Someone actually looks after it.
@@ -281,6 +282,7 @@ L:  linux-acpi@vger.kernel.org
 W:     https://01.org/linux-acpi
 Q:     https://patchwork.kernel.org/project/linux-acpi/list/
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
+B:     https://bugzilla.kernel.org
 S:     Supported
 F:     drivers/acpi/
 F:     drivers/pnp/pnpacpi/
@@ -304,6 +306,8 @@ W:  https://acpica.org/
 W:     https://github.com/acpica/acpica/
 Q:     https://patchwork.kernel.org/project/linux-acpi/list/
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
+B:     https://bugzilla.kernel.org
+B:     https://bugs.acpica.org
 S:     Supported
 F:     drivers/acpi/acpica/
 F:     include/acpi/
@@ -313,13 +317,23 @@ ACPI FAN DRIVER
 M:     Zhang Rui <rui.zhang@intel.com>
 L:     linux-acpi@vger.kernel.org
 W:     https://01.org/linux-acpi
+B:     https://bugzilla.kernel.org
 S:     Supported
 F:     drivers/acpi/fan.c
 
+ACPI FOR ARM64 (ACPI/arm64)
+M:     Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
+M:     Hanjun Guo <hanjun.guo@linaro.org>
+M:     Sudeep Holla <sudeep.holla@arm.com>
+L:     linux-acpi@vger.kernel.org
+S:     Maintained
+F:     drivers/acpi/arm64
+
 ACPI THERMAL DRIVER
 M:     Zhang Rui <rui.zhang@intel.com>
 L:     linux-acpi@vger.kernel.org
 W:     https://01.org/linux-acpi
+B:     https://bugzilla.kernel.org
 S:     Supported
 F:     drivers/acpi/*thermal*
 
@@ -327,6 +341,7 @@ ACPI VIDEO DRIVER
 M:     Zhang Rui <rui.zhang@intel.com>
 L:     linux-acpi@vger.kernel.org
 W:     https://01.org/linux-acpi
+B:     https://bugzilla.kernel.org
 S:     Supported
 F:     drivers/acpi/acpi_video.c
 
@@ -1434,6 +1449,7 @@ F:        drivers/cpufreq/mvebu-cpufreq.c
 F:     arch/arm/configs/mvebu_*_defconfig
 
 ARM/Marvell Berlin SoC support
+M:     Jisheng Zhang <jszhang@marvell.com>
 M:     Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
@@ -2545,15 +2561,18 @@ S:      Supported
 F:     drivers/net/ethernet/broadcom/genet/
 
 BROADCOM BNX2 GIGABIT ETHERNET DRIVER
-M:     Sony Chacko <sony.chacko@qlogic.com>
-M:     Dept-HSGLinuxNICDev@qlogic.com
+M:     Rasesh Mody <rasesh.mody@cavium.com>
+M:     Harish Patil <harish.patil@cavium.com>
+M:     Dept-GELinuxNICDev@cavium.com
 L:     netdev@vger.kernel.org
 S:     Supported
 F:     drivers/net/ethernet/broadcom/bnx2.*
 F:     drivers/net/ethernet/broadcom/bnx2_*
 
 BROADCOM BNX2X 10 GIGABIT ETHERNET DRIVER
-M:     Ariel Elior <ariel.elior@qlogic.com>
+M:     Yuval Mintz <Yuval.Mintz@cavium.com>
+M:     Ariel Elior <ariel.elior@cavium.com>
+M:     everest-linux-l2@cavium.com
 L:     netdev@vger.kernel.org
 S:     Supported
 F:     drivers/net/ethernet/broadcom/bnx2x/
@@ -2760,7 +2779,9 @@ S:        Supported
 F:     drivers/scsi/bfa/
 
 BROCADE BNA 10 GIGABIT ETHERNET DRIVER
-M:     Rasesh Mody <rasesh.mody@qlogic.com>
+M:     Rasesh Mody <rasesh.mody@cavium.com>
+M:     Sudarsana Kalluru <sudarsana.kalluru@cavium.com>
+M:     Dept-GELinuxNICDev@cavium.com
 L:     netdev@vger.kernel.org
 S:     Supported
 F:     drivers/net/ethernet/brocade/bna/
@@ -4614,8 +4635,9 @@ F:        sound/usb/misc/ua101.c
 
 EXTENSIBLE FIRMWARE INTERFACE (EFI)
 M:     Matt Fleming <matt@codeblueprint.co.uk>
+M:     Ard Biesheuvel <ard.biesheuvel@linaro.org>
 L:     linux-efi@vger.kernel.org
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/mfleming/efi.git
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/efi/efi.git
 S:     Maintained
 F:     Documentation/efi-stub.txt
 F:     arch/ia64/kernel/efi.c
@@ -5280,6 +5302,12 @@ M:       Joe Perches <joe@perches.com>
 S:     Maintained
 F:     scripts/get_maintainer.pl
 
+GENWQE (IBM Generic Workqueue Card)
+M:     Frank Haverkamp <haver@linux.vnet.ibm.com>
+M:     Gabriel Krisman Bertazi <krisman@linux.vnet.ibm.com>
+S:     Supported
+F:     drivers/misc/genwqe/
+
 GFS2 FILE SYSTEM
 M:     Steven Whitehouse <swhiteho@redhat.com>
 M:     Bob Peterson <rpeterso@redhat.com>
@@ -5644,6 +5672,7 @@ HIBERNATION (aka Software Suspend, aka swsusp)
 M:     "Rafael J. Wysocki" <rjw@rjwysocki.net>
 M:     Pavel Machek <pavel@ucw.cz>
 L:     linux-pm@vger.kernel.org
+B:     https://bugzilla.kernel.org
 S:     Supported
 F:     arch/x86/power/
 F:     drivers/base/power/
@@ -6125,6 +6154,12 @@ M:       Stanislaw Gruszka <stf_xl@wp.pl>
 S:     Maintained
 F:     drivers/usb/atm/ueagle-atm.c
 
+IMGTEC ASCII LCD DRIVER
+M:     Paul Burton <paul.burton@imgtec.com>
+S:     Maintained
+F:     Documentation/devicetree/bindings/auxdisplay/img-ascii-lcd.txt
+F:     drivers/auxdisplay/img-ascii-lcd.c
+
 INA209 HARDWARE MONITOR DRIVER
 M:     Guenter Roeck <linux@roeck-us.net>
 L:     linux-hwmon@vger.kernel.org
@@ -6436,6 +6471,7 @@ F:        include/linux/mei_cl_bus.h
 F:     drivers/misc/mei/*
 F:     drivers/watchdog/mei_wdt.c
 F:     Documentation/misc-devices/mei/*
+F:     samples/mei/*
 
 INTEL MIC DRIVERS (mic)
 M:     Sudeep Dutt <sudeep.dutt@intel.com>
@@ -6622,10 +6658,10 @@ S:      Maintained
 F:     drivers/firmware/iscsi_ibft*
 
 ISCSI
-M:     Mike Christie <michaelc@cs.wisc.edu>
+M:     Lee Duncan <lduncan@suse.com>
+M:     Chris Leech <cleech@redhat.com>
 L:     open-iscsi@googlegroups.com
-W:     www.open-iscsi.org
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/mnc/linux-2.6-iscsi.git
+W:     www.open-iscsi.com
 S:     Maintained
 F:     drivers/scsi/*iscsi*
 F:     include/scsi/*iscsi*
@@ -7058,6 +7094,7 @@ F:        drivers/scsi/53c700*
 LED SUBSYSTEM
 M:     Richard Purdie <rpurdie@rpsys.net>
 M:     Jacek Anaszewski <j.anaszewski@samsung.com>
+M:     Pavel Machek <pavel@ucw.cz>
 L:     linux-leds@vger.kernel.org
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/j.anaszewski/linux-leds.git
 S:     Maintained
@@ -7203,17 +7240,11 @@ F:      drivers/lightnvm/
 F:     include/linux/lightnvm.h
 F:     include/uapi/linux/lightnvm.h
 
-LINUX FOR IBM pSERIES (RS/6000)
-M:     Paul Mackerras <paulus@au.ibm.com>
-W:     http://www.ibm.com/linux/ltc/projects/ppc
-S:     Supported
-F:     arch/powerpc/boot/rs6000.h
-
 LINUX FOR POWERPC (32-BIT AND 64-BIT)
 M:     Benjamin Herrenschmidt <benh@kernel.crashing.org>
 M:     Paul Mackerras <paulus@samba.org>
 M:     Michael Ellerman <mpe@ellerman.id.au>
-W:     http://www.penguinppc.org/
+W:     https://github.com/linuxppc/linux/wiki
 L:     linuxppc-dev@lists.ozlabs.org
 Q:     http://patchwork.ozlabs.org/project/linuxppc-dev/list/
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git
@@ -7228,6 +7259,7 @@ F:        drivers/net/ethernet/ibm/ibmvnic.*
 F:     drivers/pci/hotplug/pnv_php.c
 F:     drivers/pci/hotplug/rpa*
 F:     drivers/scsi/ibmvscsi/
+F:     tools/testing/selftests/powerpc
 N:     opal
 N:     /pmac
 N:     powermac
@@ -7284,9 +7316,8 @@ F:        arch/powerpc/platforms/83xx/
 F:     arch/powerpc/platforms/85xx/
 
 LINUX FOR POWERPC PA SEMI PWRFICIENT
-M:     Olof Johansson <olof@lixom.net>
 L:     linuxppc-dev@lists.ozlabs.org
-S:     Maintained
+S:     Orphan
 F:     arch/powerpc/platforms/pasemi/
 F:     drivers/*/*pasemi*
 F:     drivers/*/*/*pasemi*
@@ -7536,8 +7567,10 @@ S:       Maintained
 MARVELL 88E6XXX ETHERNET SWITCH FABRIC DRIVER
 M:     Andrew Lunn <andrew@lunn.ch>
 M:     Vivien Didelot <vivien.didelot@savoirfairelinux.com>
+L:     netdev@vger.kernel.org
 S:     Maintained
 F:     drivers/net/dsa/mv88e6xxx/
+F:     Documentation/devicetree/bindings/net/dsa/marvell.txt
 
 MARVELL ARMADA DRM SUPPORT
 M:     Russell King <rmk+kernel@armlinux.org.uk>
@@ -7829,6 +7862,13 @@ F:       Documentation/scsi/megaraid.txt
 F:     drivers/scsi/megaraid.*
 F:     drivers/scsi/megaraid/
 
+MELFAS MIP4 TOUCHSCREEN DRIVER
+M:     Sangwon Jee <jeesw@melfas.com>
+W:     http://www.melfas.com
+S:     Supported
+F:     drivers/input/touchscreen/melfas_mip4.c
+F:     Documentation/devicetree/bindings/input/touchscreen/melfas_mip4.txt
+
 MELLANOX ETHERNET DRIVER (mlx4_en)
 M:     Tariq Toukan <tariqt@mellanox.com>
 L:     netdev@vger.kernel.org
@@ -7898,6 +7938,10 @@ F:       mm/
 MEMORY TECHNOLOGY DEVICES (MTD)
 M:     David Woodhouse <dwmw2@infradead.org>
 M:     Brian Norris <computersforpeace@gmail.com>
+M:     Boris Brezillon <boris.brezillon@free-electrons.com>
+M:     Marek Vasut <marek.vasut@gmail.com>
+M:     Richard Weinberger <richard@nod.at>
+M:     Cyrille Pitchen <cyrille.pitchen@atmel.com>
 L:     linux-mtd@lists.infradead.org
 W:     http://www.linux-mtd.infradead.org/
 Q:     http://patchwork.ozlabs.org/project/linux-mtd/list/
@@ -8026,6 +8070,7 @@ F:        drivers/infiniband/hw/mlx4/
 F:     include/linux/mlx4/
 
 MELLANOX MLX5 core VPI driver
+M:     Saeed Mahameed <saeedm@mellanox.com>
 M:     Matan Barak <matanb@mellanox.com>
 M:     Leon Romanovsky <leonro@mellanox.com>
 L:     netdev@vger.kernel.org
@@ -8085,6 +8130,7 @@ S:        Maintained
 F:     drivers/media/dvb-frontends/mn88473*
 
 MODULE SUPPORT
+M:     Jessica Yu <jeyu@redhat.com>
 M:     Rusty Russell <rusty@rustcorp.com.au>
 S:     Maintained
 F:     include/linux/module.h
@@ -8198,7 +8244,7 @@ F:        include/linux/mfd/
 MULTIMEDIA CARD (MMC), SECURE DIGITAL (SD) AND SDIO SUBSYSTEM
 M:     Ulf Hansson <ulf.hansson@linaro.org>
 L:     linux-mmc@vger.kernel.org
-T:     git git://git.linaro.org/people/ulf.hansson/mmc.git
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/ulfh/mmc.git
 S:     Maintained
 F:     Documentation/devicetree/bindings/mmc/
 F:     drivers/mmc/
@@ -8493,11 +8539,10 @@ F:      Documentation/devicetree/bindings/net/wireless/
 F:     drivers/net/wireless/
 
 NETXEN (1/10) GbE SUPPORT
-M:     Manish Chopra <manish.chopra@qlogic.com>
-M:     Sony Chacko <sony.chacko@qlogic.com>
-M:     Rajesh Borundia <rajesh.borundia@qlogic.com>
+M:     Manish Chopra <manish.chopra@cavium.com>
+M:     Rahul Verma <rahul.verma@cavium.com>
+M:     Dept-GELinuxNICDev@cavium.com
 L:     netdev@vger.kernel.org
-W:     http://www.qlogic.com
 S:     Supported
 F:     drivers/net/ethernet/qlogic/netxen/
 
@@ -9020,15 +9065,13 @@ S:      Maintained
 F:     drivers/net/wireless/intersil/p54/
 
 PA SEMI ETHERNET DRIVER
-M:     Olof Johansson <olof@lixom.net>
 L:     netdev@vger.kernel.org
-S:     Maintained
+S:     Orphan
 F:     drivers/net/ethernet/pasemi/*
 
 PA SEMI SMBUS DRIVER
-M:     Olof Johansson <olof@lixom.net>
 L:     linux-i2c@vger.kernel.org
-S:     Maintained
+S:     Orphan
 F:     drivers/i2c/busses/i2c-pasemi.c
 
 PADATA PARALLEL EXECUTION MECHANISM
@@ -9286,7 +9329,7 @@ S:        Maintained
 F:     drivers/pci/host/*designware*
 
 PCI DRIVER FOR SYNOPSYS PROTOTYPING DEVICE
-M:     Joao Pinto <jpinto@synopsys.com>
+M:     Jose Abreu <Jose.Abreu@synopsys.com>
 L:     linux-pci@vger.kernel.org
 S:     Maintained
 F:     Documentation/devicetree/bindings/pci/designware-pcie.txt
@@ -9305,7 +9348,7 @@ PCI DRIVER FOR INTEL VOLUME MANAGEMENT DEVICE (VMD)
 M:     Keith Busch <keith.busch@intel.com>
 L:     linux-pci@vger.kernel.org
 S:     Supported
-F:     arch/x86/pci/vmd.c
+F:     drivers/pci/host/vmd.c
 
 PCIE DRIVER FOR ST SPEAR13XX
 M:     Pratyush Anand <pratyush.anand@gmail.com>
@@ -9377,6 +9420,7 @@ W:        http://lists.infradead.org/mailman/listinfo/linux-pcmcia
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/brodo/pcmcia.git
 S:     Maintained
 F:     Documentation/pcmcia/
+F:     tools/pcmcia/
 F:     drivers/pcmcia/
 F:     include/pcmcia/
 
@@ -9591,6 +9635,7 @@ POWER MANAGEMENT CORE
 M:     "Rafael J. Wysocki" <rjw@rjwysocki.net>
 L:     linux-pm@vger.kernel.org
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
+B:     https://bugzilla.kernel.org
 S:     Supported
 F:     drivers/base/power/
 F:     include/linux/pm.h
@@ -9874,33 +9919,32 @@ F:      Documentation/scsi/LICENSE.qla4xxx
 F:     drivers/scsi/qla4xxx/
 
 QLOGIC QLA3XXX NETWORK DRIVER
-M:     Jitendra Kalsaria <jitendra.kalsaria@qlogic.com>
-M:     Ron Mercer <ron.mercer@qlogic.com>
-M:     linux-driver@qlogic.com
+M:     Dept-GELinuxNICDev@cavium.com
 L:     netdev@vger.kernel.org
 S:     Supported
 F:     Documentation/networking/LICENSE.qla3xxx
 F:     drivers/net/ethernet/qlogic/qla3xxx.*
 
 QLOGIC QLCNIC (1/10)Gb ETHERNET DRIVER
-M:     Dept-GELinuxNICDev@qlogic.com
+M:     Harish Patil <harish.patil@cavium.com>
+M:     Manish Chopra <manish.chopra@cavium.com>
+M:     Dept-GELinuxNICDev@cavium.com
 L:     netdev@vger.kernel.org
 S:     Supported
 F:     drivers/net/ethernet/qlogic/qlcnic/
 
 QLOGIC QLGE 10Gb ETHERNET DRIVER
-M:     Harish Patil <harish.patil@qlogic.com>
-M:     Sudarsana Kalluru <sudarsana.kalluru@qlogic.com>
-M:     Dept-GELinuxNICDev@qlogic.com
-M:     linux-driver@qlogic.com
+M:     Harish Patil <harish.patil@cavium.com>
+M:     Manish Chopra <manish.chopra@cavium.com>
+M:     Dept-GELinuxNICDev@cavium.com
 L:     netdev@vger.kernel.org
 S:     Supported
 F:     drivers/net/ethernet/qlogic/qlge/
 
 QLOGIC QL4xxx ETHERNET DRIVER
-M:     Yuval Mintz <Yuval.Mintz@qlogic.com>
-M:     Ariel Elior <Ariel.Elior@qlogic.com>
-M:     everest-linux-l2@qlogic.com
+M:     Yuval Mintz <Yuval.Mintz@cavium.com>
+M:     Ariel Elior <Ariel.Elior@cavium.com>
+M:     everest-linux-l2@cavium.com
 L:     netdev@vger.kernel.org
 S:     Supported
 F:     drivers/net/ethernet/qlogic/qed/
@@ -11378,6 +11422,17 @@ W:     http://www.st.com/spear
 S:     Maintained
 F:     drivers/clk/spear/
 
+SPI NOR SUBSYSTEM
+M:     Cyrille Pitchen <cyrille.pitchen@atmel.com>
+M:     Marek Vasut <marek.vasut@gmail.com>
+L:     linux-mtd@lists.infradead.org
+W:     http://www.linux-mtd.infradead.org/
+Q:     http://patchwork.ozlabs.org/project/linux-mtd/list/
+T:     git git://github.com/spi-nor/linux.git
+S:     Maintained
+F:     drivers/mtd/spi-nor/
+F:     include/linux/mtd/spi-nor.h
+
 SPI SUBSYSTEM
 M:     Mark Brown <broonie@kernel.org>
 L:     linux-spi@vger.kernel.org
@@ -11570,6 +11625,7 @@ M:      "Rafael J. Wysocki" <rjw@rjwysocki.net>
 M:     Len Brown <len.brown@intel.com>
 M:     Pavel Machek <pavel@ucw.cz>
 L:     linux-pm@vger.kernel.org
+B:     https://bugzilla.kernel.org
 S:     Supported
 F:     Documentation/power/
 F:     arch/x86/kernel/acpi/
@@ -12757,6 +12813,7 @@ F:      include/uapi/linux/virtio_console.h
 
 VIRTIO CORE, NET AND BLOCK DRIVERS
 M:     "Michael S. Tsirkin" <mst@redhat.com>
+M:     Jason Wang <jasowang@redhat.com>
 L:     virtualization@lists.linux-foundation.org
 S:     Maintained
 F:     Documentation/devicetree/bindings/virtio/
@@ -12787,6 +12844,7 @@ F:      include/uapi/linux/virtio_gpu.h
 
 VIRTIO HOST (VHOST)
 M:     "Michael S. Tsirkin" <mst@redhat.com>
+M:     Jason Wang <jasowang@redhat.com>
 L:     kvm@vger.kernel.org
 L:     virtualization@lists.linux-foundation.org
 L:     netdev@vger.kernel.org
index 27f97b53e6ebd4e60407d38e13be42a0d2fe7127..0ede48ba5aaf4e52d6281fa35c82d9cca977b975 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 4
-PATCHLEVEL = 8
+PATCHLEVEL = 9
 SUBLEVEL = 0
-EXTRAVERSION =
+EXTRAVERSION = -rc6
 NAME = Psychotic Stoned Sheep
 
 # *DOCUMENTATION*
@@ -370,7 +370,7 @@ LDFLAGS_MODULE  =
 CFLAGS_KERNEL  =
 AFLAGS_KERNEL  =
 LDFLAGS_vmlinux =
-CFLAGS_GCOV    = -fprofile-arcs -ftest-coverage -fno-tree-loop-im
+CFLAGS_GCOV    = -fprofile-arcs -ftest-coverage -fno-tree-loop-im -Wno-maybe-uninitialized
 CFLAGS_KCOV    := $(call cc-option,-fsanitize-coverage=trace-pc,)
 
 
@@ -399,11 +399,12 @@ KBUILD_CFLAGS   := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \
                   -fno-strict-aliasing -fno-common \
                   -Werror-implicit-function-declaration \
                   -Wno-format-security \
-                  -std=gnu89
+                  -std=gnu89 $(call cc-option,-fno-PIE)
+
 
 KBUILD_AFLAGS_KERNEL :=
 KBUILD_CFLAGS_KERNEL :=
-KBUILD_AFLAGS   := -D__ASSEMBLY__
+KBUILD_AFLAGS   := -D__ASSEMBLY__ $(call cc-option,-fno-PIE)
 KBUILD_AFLAGS_MODULE  := -DMODULE
 KBUILD_CFLAGS_MODULE  := -DMODULE
 KBUILD_LDFLAGS_MODULE := -T $(srctree)/scripts/module-common.lds
@@ -620,19 +621,26 @@ ARCH_CFLAGS :=
 include arch/$(SRCARCH)/Makefile
 
 KBUILD_CFLAGS  += $(call cc-option,-fno-delete-null-pointer-checks,)
-KBUILD_CFLAGS  += $(call cc-disable-warning,maybe-uninitialized,)
 KBUILD_CFLAGS  += $(call cc-disable-warning,frame-address,)
 
+ifdef CONFIG_LD_DEAD_CODE_DATA_ELIMINATION
+KBUILD_CFLAGS  += $(call cc-option,-ffunction-sections,)
+KBUILD_CFLAGS  += $(call cc-option,-fdata-sections,)
+endif
+
 ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
-KBUILD_CFLAGS  += -Os
+KBUILD_CFLAGS  += -Os $(call cc-disable-warning,maybe-uninitialized,)
 else
 ifdef CONFIG_PROFILE_ALL_BRANCHES
-KBUILD_CFLAGS  += -O2
+KBUILD_CFLAGS  += -O2 $(call cc-disable-warning,maybe-uninitialized,)
 else
 KBUILD_CFLAGS   += -O2
 endif
 endif
 
+KBUILD_CFLAGS += $(call cc-ifversion, -lt, 0409, \
+                       $(call cc-disable-warning,maybe-uninitialized,))
+
 # Tell gcc to never replace conditional load with a non-conditional one
 KBUILD_CFLAGS  += $(call cc-option,--param=allow-store-data-races=0)
 
@@ -803,6 +811,10 @@ LDFLAGS_BUILD_ID = $(patsubst -Wl$(comma)%,%,\
 KBUILD_LDFLAGS_MODULE += $(LDFLAGS_BUILD_ID)
 LDFLAGS_vmlinux += $(LDFLAGS_BUILD_ID)
 
+ifdef CONFIG_LD_DEAD_CODE_DATA_ELIMINATION
+LDFLAGS_vmlinux        += $(call ld-option, --gc-sections,)
+endif
+
 ifeq ($(CONFIG_STRIP_ASM_SYMS),y)
 LDFLAGS_vmlinux        += $(call ld-option, -X,)
 endif
@@ -927,9 +939,6 @@ vmlinux_prereq: $(vmlinux-deps) FORCE
 ifdef CONFIG_HEADERS_CHECK
        $(Q)$(MAKE) -f $(srctree)/Makefile headers_check
 endif
-ifdef CONFIG_BUILD_DOCSRC
-       $(Q)$(MAKE) $(build)=Documentation
-endif
 ifdef CONFIG_GDB_SCRIPTS
        $(Q)ln -fsn `cd $(srctree) && /bin/pwd`/scripts/gdb/vmlinux-gdb.py
 endif
@@ -942,9 +951,12 @@ endif
 include/generated/autoksyms.h: FORCE
        $(Q)$(CONFIG_SHELL) $(srctree)/scripts/adjust_autoksyms.sh true
 
-# Final link of vmlinux
-      cmd_link-vmlinux = $(CONFIG_SHELL) $< $(LD) $(LDFLAGS) $(LDFLAGS_vmlinux)
-quiet_cmd_link-vmlinux = LINK    $@
+ARCH_POSTLINK := $(wildcard $(srctree)/arch/$(SRCARCH)/Makefile.postlink)
+
+# Final link of vmlinux with optional arch pass after final link
+    cmd_link-vmlinux =                                                 \
+       $(CONFIG_SHELL) $< $(LD) $(LDFLAGS) $(LDFLAGS_vmlinux) ;       \
+       $(if $(ARCH_POSTLINK), $(MAKE) -f $(ARCH_POSTLINK) $@, true)
 
 vmlinux: scripts/link-vmlinux.sh vmlinux_prereq $(vmlinux-deps) FORCE
        +$(call if_changed,link-vmlinux)
@@ -1271,6 +1283,7 @@ $(clean-dirs):
 
 vmlinuxclean:
        $(Q)$(CONFIG_SHELL) $(srctree)/scripts/link-vmlinux.sh clean
+       $(Q)$(if $(ARCH_POSTLINK), $(MAKE) -f $(ARCH_POSTLINK) clean)
 
 clean: archclean vmlinuxclean
 
index 180ea33164dc45821a78c08043dc95119c1bdb7e..659bdd079277ebdbec01f8a5c9dd38af1a19c888 100644 (file)
@@ -383,6 +383,24 @@ config GCC_PLUGIN_SANCOV
          gcc-4.5 on). It is based on the commit "Add fuzzing coverage support"
          by Dmitry Vyukov <dvyukov@google.com>.
 
+config GCC_PLUGIN_LATENT_ENTROPY
+       bool "Generate some entropy during boot and runtime"
+       depends on GCC_PLUGINS
+       help
+         By saying Y here the kernel will instrument some kernel code to
+         extract some entropy from both original and artificially created
+         program state.  This will help especially embedded systems where
+         there is little 'natural' source of entropy normally.  The cost
+         is some slowdown of the boot process (about 0.5%) and fork and
+         irq processing.
+
+         Note that entropy extracted this way is not cryptographically
+         secure!
+
+         This plugin was ported from grsecurity/PaX. More information at:
+          * https://grsecurity.net/
+          * https://pax.grsecurity.net/
+
 config HAVE_CC_STACKPROTECTOR
        bool
        help
@@ -450,6 +468,27 @@ config CC_STACKPROTECTOR_STRONG
 
 endchoice
 
+config THIN_ARCHIVES
+       bool
+       help
+         Select this if the architecture wants to use thin archives
+         instead of ld -r to create the built-in.o files.
+
+config LD_DEAD_CODE_DATA_ELIMINATION
+       bool
+       help
+         Select this if the architecture wants to do dead code and
+         data elimination with the linker by compiling with
+         -ffunction-sections -fdata-sections and linking with
+         --gc-sections.
+
+         This requires that the arch annotates or otherwise protects
+         its external entry points from being discarded. Linker scripts
+         must also merge .text.*, .data.*, and .bss.* correctly into
+         output sections. Care must be taken not to pull in unrelated
+         sections (e.g., '.text.init'). Typically '.' in section names
+         is used to distinguish them from label names / C identifiers.
+
 config HAVE_ARCH_WITHIN_STACK_FRAMES
        bool
        help
index ffd9cf5ec8c407c4686c519de89b3e827f1656a2..bf8475ce85ee2ab37a5e69faf86f7acd22ac59dc 100644 (file)
@@ -3,6 +3,7 @@
 generic-y += clkdev.h
 generic-y += cputime.h
 generic-y += exec.h
+generic-y += export.h
 generic-y += irq_work.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
index 466e42e96bfaf29d46567e45268dc4844ac81ac9..94f587535deed9b48ddfd8c6e343077533aaea34 100644 (file)
@@ -396,11 +396,12 @@ copy_to_user(void __user *to, const void *from, long n)
 extern inline long
 copy_from_user(void *to, const void __user *from, long n)
 {
+       long res = n;
        if (likely(__access_ok((unsigned long)from, n, get_fs())))
-               n = __copy_tofrom_user_nocheck(to, (__force void *)from, n);
-       else
-               memset(to, 0, n);
-       return n;
+               res = __copy_from_user_inatomic(to, from, n);
+       if (unlikely(res))
+               memset(to + (n - res), 0, res);
+       return res;
 }
 
 extern void __do_clear_user(void);
index 9e46d6e656d978cd203abe4202f8b1ee353bea90..afc901b7a6f6e68c819aec1ab9199806f24fc1c1 100644 (file)
@@ -97,4 +97,6 @@
 
 #define SO_CNX_ADVICE          53
 
+#define SCM_TIMESTAMPING_OPT_STATS     54
+
 #endif /* _UAPI_ASM_SOCKET_H */
index 3ecac0106c8a1aa1e2fb85b5b868cf9e73eb1091..8ce13d7a2ad389cb26d356b2954e7034326203ac 100644 (file)
@@ -8,7 +8,7 @@ ccflags-y       := -Wno-sign-compare
 
 obj-y    := entry.o traps.o process.o osf_sys.o irq.o \
            irq_alpha.o signal.o setup.o ptrace.o time.o \
-           alpha_ksyms.o systbls.o err_common.o io.o
+           systbls.o err_common.o io.o
 
 obj-$(CONFIG_VGA_HOSE) += console.o
 obj-$(CONFIG_SMP)      += smp.o
diff --git a/arch/alpha/kernel/alpha_ksyms.c b/arch/alpha/kernel/alpha_ksyms.c
deleted file mode 100644 (file)
index f4c7ab6..0000000
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * linux/arch/alpha/kernel/alpha_ksyms.c
- *
- * Export the alpha-specific functions that are needed for loadable
- * modules.
- */
-
-#include <linux/module.h>
-#include <asm/console.h>
-#include <asm/uaccess.h>
-#include <asm/checksum.h>
-#include <asm/fpu.h>
-#include <asm/machvec.h>
-
-#include <linux/syscalls.h>
-
-/* these are C runtime functions with special calling conventions: */
-extern void __divl (void);
-extern void __reml (void);
-extern void __divq (void);
-extern void __remq (void);
-extern void __divlu (void);
-extern void __remlu (void);
-extern void __divqu (void);
-extern void __remqu (void);
-
-EXPORT_SYMBOL(alpha_mv);
-EXPORT_SYMBOL(callback_getenv);
-EXPORT_SYMBOL(callback_setenv);
-EXPORT_SYMBOL(callback_save_env);
-
-/* platform dependent support */
-EXPORT_SYMBOL(strcat);
-EXPORT_SYMBOL(strcpy);
-EXPORT_SYMBOL(strlen);
-EXPORT_SYMBOL(strncpy);
-EXPORT_SYMBOL(strncat);
-EXPORT_SYMBOL(strchr);
-EXPORT_SYMBOL(strrchr);
-EXPORT_SYMBOL(memmove);
-EXPORT_SYMBOL(__memcpy);
-EXPORT_SYMBOL(__memset);
-EXPORT_SYMBOL(___memset);
-EXPORT_SYMBOL(__memsetw);
-EXPORT_SYMBOL(__constant_c_memset);
-EXPORT_SYMBOL(copy_page);
-EXPORT_SYMBOL(clear_page);
-
-EXPORT_SYMBOL(alpha_read_fp_reg);
-EXPORT_SYMBOL(alpha_read_fp_reg_s);
-EXPORT_SYMBOL(alpha_write_fp_reg);
-EXPORT_SYMBOL(alpha_write_fp_reg_s);
-
-/* Networking helper routines. */
-EXPORT_SYMBOL(csum_tcpudp_magic);
-EXPORT_SYMBOL(ip_compute_csum);
-EXPORT_SYMBOL(ip_fast_csum);
-EXPORT_SYMBOL(csum_partial_copy_nocheck);
-EXPORT_SYMBOL(csum_partial_copy_from_user);
-EXPORT_SYMBOL(csum_ipv6_magic);
-
-#ifdef CONFIG_MATHEMU_MODULE
-extern long (*alpha_fp_emul_imprecise)(struct pt_regs *, unsigned long);
-extern long (*alpha_fp_emul) (unsigned long pc);
-EXPORT_SYMBOL(alpha_fp_emul_imprecise);
-EXPORT_SYMBOL(alpha_fp_emul);
-#endif
-
-/*
- * The following are specially called from the uaccess assembly stubs.
- */
-EXPORT_SYMBOL(__copy_user);
-EXPORT_SYMBOL(__do_clear_user);
-
-/* 
- * SMP-specific symbols.
- */
-
-#ifdef CONFIG_SMP
-EXPORT_SYMBOL(_atomic_dec_and_lock);
-#endif /* CONFIG_SMP */
-
-/*
- * The following are special because they're not called
- * explicitly (the C compiler or assembler generates them in
- * response to division operations).  Fortunately, their
- * interface isn't gonna change any time soon now, so it's OK
- * to leave it out of version control.
- */
-# undef memcpy
-# undef memset
-EXPORT_SYMBOL(__divl);
-EXPORT_SYMBOL(__divlu);
-EXPORT_SYMBOL(__divq);
-EXPORT_SYMBOL(__divqu);
-EXPORT_SYMBOL(__reml);
-EXPORT_SYMBOL(__remlu);
-EXPORT_SYMBOL(__remq);
-EXPORT_SYMBOL(__remqu);
-EXPORT_SYMBOL(memcpy);
-EXPORT_SYMBOL(memset);
-EXPORT_SYMBOL(memchr);
index d3398f6ab74c2d0b80c538a1181d03f47fbed501..b7d69604b6d2e522b8aabc0e904c0ab78b0e637a 100644 (file)
    else beforehand.  Fine.  We'll do it ourselves.  */
 #if 0
 #define ALIAS_MV(system) \
-  struct alpha_machine_vector alpha_mv __attribute__((alias(#system "_mv")));
+  struct alpha_machine_vector alpha_mv __attribute__((alias(#system "_mv"))); \
+  EXPORT_SYMBOL(alpha_mv);
 #else
 #define ALIAS_MV(system) \
-  asm(".global alpha_mv\nalpha_mv = " #system "_mv");
+  asm(".global alpha_mv\nalpha_mv = " #system "_mv"); \
+  EXPORT_SYMBOL(alpha_mv);
 #endif
 #endif /* GENERIC */
index d9ee81769899fb8652046857d2fa3eaabd1bacc1..940dfb4065910822d42f3d11c02ba2305f25b02a 100644 (file)
@@ -157,14 +157,16 @@ put_reg(struct task_struct *task, unsigned long regno, unsigned long data)
 static inline int
 read_int(struct task_struct *task, unsigned long addr, int * data)
 {
-       int copied = access_process_vm(task, addr, data, sizeof(int), 0);
+       int copied = access_process_vm(task, addr, data, sizeof(int),
+                       FOLL_FORCE);
        return (copied == sizeof(int)) ? 0 : -EIO;
 }
 
 static inline int
 write_int(struct task_struct *task, unsigned long addr, int data)
 {
-       int copied = access_process_vm(task, addr, &data, sizeof(int), 1);
+       int copied = access_process_vm(task, addr, &data, sizeof(int),
+                       FOLL_FORCE | FOLL_WRITE);
        return (copied == sizeof(int)) ? 0 : -EIO;
 }
 
@@ -281,7 +283,8 @@ long arch_ptrace(struct task_struct *child, long request,
        /* When I and D space are separate, these will need to be fixed.  */
        case PTRACE_PEEKTEXT: /* read word at location addr. */
        case PTRACE_PEEKDATA:
-               copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0);
+               copied = access_process_vm(child, addr, &tmp, sizeof(tmp),
+                               FOLL_FORCE);
                ret = -EIO;
                if (copied != sizeof(tmp))
                        break;
index b20af76f12c1dbf548a27210fdbb196a2c77496d..4811e54069fcfbb733185d4004e24efa814b9165 100644 (file)
@@ -115,6 +115,7 @@ unsigned long alpha_agpgart_size = DEFAULT_AGP_APER_SIZE;
 
 #ifdef CONFIG_ALPHA_GENERIC
 struct alpha_machine_vector alpha_mv;
+EXPORT_SYMBOL(alpha_mv);
 #endif
 
 #ifndef alpha_using_srm
index 8804bec2c6448e15e5f34be99b25e9bef8518a15..6093addc931a5bf3dec4d7c13f6cf84c88417e39 100644 (file)
@@ -3,6 +3,7 @@
  */
 
 #include <asm/console.h>
+#include <asm/export.h>
 
 .text
 #define HWRPB_CRB_OFFSET 0xc0
@@ -92,6 +93,10 @@ CALLBACK(reset_env, CCB_RESET_ENV, 4)
 CALLBACK(save_env, CCB_SAVE_ENV, 1)
 CALLBACK(pswitch, CCB_PSWITCH, 3)
 CALLBACK(bios_emul, CCB_BIOS_EMUL, 5)
+
+EXPORT_SYMBOL(callback_getenv)
+EXPORT_SYMBOL(callback_setenv)
+EXPORT_SYMBOL(callback_save_env)
        
 .data
 __alpha_using_srm:             # For use by bootpheader
index 377f9e34eb9709631e50aa20ce87417ebdeac3a8..b57f8007db14f78565200134f6f519165d3970ea 100644 (file)
@@ -48,6 +48,7 @@ __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr,
                (__force u64)saddr + (__force u64)daddr +
                (__force u64)sum + ((len + proto) << 8));
 }
+EXPORT_SYMBOL(csum_tcpudp_magic);
 
 __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
                          __u32 len, __u8 proto, __wsum sum)
@@ -144,6 +145,7 @@ __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
 {
        return (__force __sum16)~do_csum(iph,ihl*4);
 }
+EXPORT_SYMBOL(ip_fast_csum);
 
 /*
  * computes the checksum of a memory block at buff, length len,
@@ -178,3 +180,4 @@ __sum16 ip_compute_csum(const void *buff, int len)
 {
        return (__force __sum16)~from64to16(do_csum(buff,len));
 }
+EXPORT_SYMBOL(ip_compute_csum);
index a221ae266e29098ab55cd72fb42ede2ff11588bf..263d7393c0e7a83fd15e570bb171bc726b55afbc 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Zero an entire page.
  */
-
+#include <asm/export.h>
        .text
        .align 4
        .global clear_page
@@ -37,3 +37,4 @@ clear_page:
        nop
 
        .end clear_page
+       EXPORT_SYMBOL(clear_page)
index 8860316c195769ebdf907d590aee0c19555ac956..bf5b931866ba1ed069758cfa95f01f35e18b6b02 100644 (file)
@@ -24,6 +24,7 @@
  * Clobbers:
  *     $1,$2,$3,$4,$5,$6
  */
+#include <asm/export.h>
 
 /* Allow an exception for an insn; exit if we get one.  */
 #define EX(x,y...)                     \
@@ -111,3 +112,4 @@ $exception:
        ret     $31, ($28), 1   # .. e1 :
 
        .end __do_clear_user
+       EXPORT_SYMBOL(__do_clear_user)
index 9f3b97459cc64fb5eb70ec7b15ded1a1e97a0245..2ee0bd0508c5943aa8eb64e074b6f5bb86de3920 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Copy an entire page.
  */
-
+#include <asm/export.h>
        .text
        .align 4
        .global copy_page
@@ -47,3 +47,4 @@ copy_page:
        nop
 
        .end copy_page
+       EXPORT_SYMBOL(copy_page)
index 6f3fab9eb434444bf58cd4ed528b1392c3c7cf53..509f62b6531102b539e61f11519ac4d49da02fe9 100644 (file)
@@ -26,6 +26,8 @@
  *     $1,$2,$3,$4,$5,$6,$7
  */
 
+#include <asm/export.h>
+
 /* Allow an exception for an insn; exit if we get one.  */
 #define EXI(x,y...)                    \
        99: x,##y;                      \
@@ -124,22 +126,9 @@ $65:
        bis $31,$31,$0
 $41:
 $35:
-$exitout:
-       ret $31,($28),1
-
 $exitin:
-       /* A stupid byte-by-byte zeroing of the rest of the output
-          buffer.  This cures security holes by never leaving 
-          random kernel data around to be copied elsewhere.  */
-
-       mov $0,$1
-$101:
-       EXO ( ldq_u $2,0($6) )
-       subq $1,1,$1
-       mskbl $2,$6,$2
-       EXO ( stq_u $2,0($6) )
-       addq $6,1,$6
-       bgt $1,$101
+$exitout:
        ret $31,($28),1
 
        .end __copy_user
+EXPORT_SYMBOL(__copy_user)
index 2c2acb96deb682de173c0a0b3983f92977d3ef8f..e74b4544b0cce0962dd9715f063224a89ed066d4 100644 (file)
@@ -12,6 +12,7 @@
  * added by Ivan Kokshaysky <ink@jurassic.park.msu.ru>
  */
 
+#include <asm/export.h>
        .globl csum_ipv6_magic
        .align 4
        .ent csum_ipv6_magic
@@ -113,3 +114,4 @@ csum_ipv6_magic:
        ret                     # .. e1 :
 
        .end csum_ipv6_magic
+       EXPORT_SYMBOL(csum_ipv6_magic)
index 5675dca8dbb1412e0b2fcea1c748ec9d757a3e8c..b4ff3b683bcd57802c8c422708c3c712098c4cc7 100644 (file)
@@ -374,6 +374,7 @@ csum_partial_copy_from_user(const void __user *src, void *dst, int len,
        }
        return (__force __wsum)checksum;
 }
+EXPORT_SYMBOL(csum_partial_copy_from_user);
 
 __wsum
 csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum)
@@ -386,3 +387,4 @@ csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum)
        set_fs(oldfs);
        return checksum;
 }
+EXPORT_SYMBOL(csum_partial_copy_nocheck);
index f9f5fe830e9f9c8912daaf830c4a4b54454ea43c..4221b40167eed5b85284e6d9bc831cdc31094666 100644 (file)
@@ -7,6 +7,7 @@
 
 #include <linux/spinlock.h>
 #include <linux/atomic.h>
+#include <linux/export.h>
 
   asm (".text                                  \n\
        .global _atomic_dec_and_lock            \n\
@@ -39,3 +40,4 @@ static int __used atomic_dec_and_lock_1(atomic_t *atomic, spinlock_t *lock)
        spin_unlock(lock);
        return 0;
 }
+EXPORT_SYMBOL(_atomic_dec_and_lock);
index 2d1a0484a99e009e3e198f47f77b06194b50006d..1e33bd1276213493b44c37acb877de57870cb445 100644 (file)
@@ -45,6 +45,7 @@
  *     $28 - compare status
  */
 
+#include <asm/export.h>
 #define halt .long 0
 
 /*
@@ -151,6 +152,7 @@ ufunction:
        addq    $30,STACK,$30
        ret     $31,($23),1
        .end    ufunction
+EXPORT_SYMBOL(ufunction)
 
 /*
  * Uhh.. Ugly signed division. I'd rather not have it at all, but
@@ -193,3 +195,4 @@ sfunction:
        addq    $30,STACK,$30
        ret     $31,($23),1
        .end    sfunction
+EXPORT_SYMBOL(sfunction)
index adf4f7be0e2b6d6792e420e3b40af21ff8f449ca..abe99e69a1945a2bac7ce4062bd43a7503c52907 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Zero an entire page.
  */
-
+#include <asm/export.h>
         .text
         .align 4
         .global clear_page
@@ -52,3 +52,4 @@ clear_page:
        nop
 
        .end clear_page
+       EXPORT_SYMBOL(clear_page)
index 4f42a16b7f53d18cfc08f076d540011ad6ca5215..05bef6b505984465699627db5aeec012714893f4 100644 (file)
@@ -43,6 +43,7 @@
  *     want to leave a hole (and we also want to avoid repeating lots of work)
  */
 
+#include <asm/export.h>
 /* Allow an exception for an insn; exit if we get one.  */
 #define EX(x,y...)                     \
        99: x,##y;                      \
@@ -222,4 +223,4 @@ $exception:                 # Destination for exception recovery(?)
        nop                     # .. E  .. ..   :
        ret     $31, ($28), 1   # L0 .. .. ..   : L U L U
        .end __do_clear_user
-
+       EXPORT_SYMBOL(__do_clear_user)
index b789db19275443d092494ea80a41be772dedb96a..77935061bddbb652c746938de8e8875239330cab 100644 (file)
@@ -56,7 +56,7 @@
    destination pages are in the dcache, but it is my guess that this is
    less important than the dcache miss case.  */
 
-
+#include <asm/export.h>
        .text
        .align 4
        .global copy_page
@@ -201,3 +201,4 @@ copy_page:
        nop
 
        .end copy_page
+       EXPORT_SYMBOL(copy_page)
index db42ffe9c350b6c9854470b028f6b515b028c0f6..be720b518af9e6500ce7545887a6bf6a9cb715d3 100644 (file)
@@ -37,6 +37,7 @@
  *     L       - lower subcluster; L0 - subcluster L0; L1 - subcluster L1
  */
 
+#include <asm/export.h>
 /* Allow an exception for an insn; exit if we get one.  */
 #define EXI(x,y...)                    \
        99: x,##y;                      \
@@ -227,33 +228,12 @@ $dirtyentry:
        bgt $0,$onebyteloop     # U  .. .. ..   : U L U L
 
 $zerolength:
+$exitin:
 $exitout:                      # Destination for exception recovery(?)
        nop                     # .. .. .. E
        nop                     # .. .. E  ..
        nop                     # .. E  .. ..
        ret $31,($28),1         # L0 .. .. ..   : L U L U
 
-$exitin:
-
-       /* A stupid byte-by-byte zeroing of the rest of the output
-          buffer.  This cures security holes by never leaving 
-          random kernel data around to be copied elsewhere.  */
-
-       nop
-       nop
-       nop
-       mov     $0,$1
-
-$101:
-       EXO ( stb $31,0($6) )   # L
-       subq $1,1,$1            # E
-       addq $6,1,$6            # E
-       bgt $1,$101             # U
-
-       nop
-       nop
-       nop
-       ret $31,($28),1         # L0
-
        .end __copy_user
-
+       EXPORT_SYMBOL(__copy_user)
index fc0bc399f872db671313f07e78ac8d20f5ff6637..de62627ac4fe1dce24f68cf94df29afb9099a35d 100644 (file)
@@ -52,6 +52,7 @@
  * may cause additional delay in rare cases (load-load replay traps).
  */
 
+#include <asm/export.h>
        .globl csum_ipv6_magic
        .align 4
        .ent csum_ipv6_magic
@@ -148,3 +149,4 @@ csum_ipv6_magic:
        ret                     # L0 : L U L U
 
        .end csum_ipv6_magic
+       EXPORT_SYMBOL(csum_ipv6_magic)
index 2a82b9be93fa290fdb6e859019db24839a55af18..d18dc0e96e3d7987f0d2b977406a36ef4889b8d7 100644 (file)
@@ -55,6 +55,7 @@
  * Try not to change the actual algorithm if possible for consistency.
  */
 
+#include <asm/export.h>
 #define halt .long 0
 
 /*
@@ -205,6 +206,7 @@ ufunction:
        addq    $30,STACK,$30           # E :
        ret     $31,($23),1             # L0 : L U U L
        .end    ufunction
+EXPORT_SYMBOL(ufunction)
 
 /*
  * Uhh.. Ugly signed division. I'd rather not have it at all, but
@@ -257,3 +259,4 @@ sfunction:
        addq    $30,STACK,$30           # E :
        ret     $31,($23),1             # L0 : L U U L
        .end    sfunction
+EXPORT_SYMBOL(sfunction)
index 1a5f71b9d8b10286f324b2e13e9be79ddb1d4abb..419adc53ccb4e599217aedd3bdb9a6af43bab400 100644 (file)
@@ -27,7 +27,7 @@
  *     L       - lower subcluster; L0 - subcluster L0; L1 - subcluster L1
  * Try not to change the actual algorithm if possible for consistency.
  */
-
+#include <asm/export.h>
         .set noreorder
         .set noat
 
@@ -189,3 +189,4 @@ $not_found:
        ret                     # L0 :
 
         .end memchr
+       EXPORT_SYMBOL(memchr)
index 52b37b0f2af5152177cd4a58fa815491070a4afc..b19798b2efc09a30fce801eda1587e817fcb434b 100644 (file)
@@ -19,7 +19,7 @@
  * Temp usage notes:
  *     $1,$2,          - scratch
  */
-
+#include <asm/export.h>
        .set noreorder
        .set noat
 
@@ -242,6 +242,7 @@ $nomoredata:
        nop                             # E :
 
        .end memcpy
+       EXPORT_SYMBOL(memcpy)
 
 /* For backwards module compatibility.  */
 __memcpy = memcpy
index 356bb2fdd70567721023b8e0a3fc1d59f2f5d981..fed21c6893e8e7c295fcaeac56d0b70bb6051133 100644 (file)
@@ -26,7 +26,7 @@
  * as fixes will need to be made in multiple places.  The performance gain
  * is worth it.
  */
-
+#include <asm/export.h>
        .set noat
        .set noreorder
 .text
@@ -229,6 +229,7 @@ end_b:
        nop
        ret $31,($26),1         # L0 :
        .end ___memset
+       EXPORT_SYMBOL(___memset)
 
        /*
         * This is the original body of code, prior to replication and
@@ -406,6 +407,7 @@ end:
        nop
        ret $31,($26),1         # L0 :
        .end __constant_c_memset
+       EXPORT_SYMBOL(__constant_c_memset)
 
        /*
         * This is a replicant of the __constant_c_memset code, rescheduled
@@ -594,6 +596,9 @@ end_w:
        ret $31,($26),1         # L0 :
 
        .end __memsetw
+       EXPORT_SYMBOL(__memsetw)
 
 memset = ___memset
 __memset = ___memset
+       EXPORT_SYMBOL(memset)
+       EXPORT_SYMBOL(__memset)
index c426fe3ed72f4e193a6f7d8b7ec78831c64af203..b69f60419be1bc940c81dac67fdd78b1932c8fb9 100644 (file)
@@ -19,7 +19,7 @@
  * string once.
  */
 
-
+#include <asm/export.h>
        .text
 
        .align 4
@@ -52,3 +52,4 @@ $found:       cttz    $2, $3          # U0 :
        br      __stxcpy        # L0 :
 
        .end strcat
+       EXPORT_SYMBOL(strcat)
index fbb7b4ffade9e596272c1c4ede40689d7bf73a92..ea8f2f35db9cef2487524b93038885ed9b71ce94 100644 (file)
@@ -15,7 +15,7 @@
  *     L       - lower subcluster; L0 - subcluster L0; L1 - subcluster L1
  * Try not to change the actual algorithm if possible for consistency.
  */
-
+#include <asm/export.h>
 #include <asm/regdef.h>
 
        .set noreorder
@@ -86,3 +86,4 @@ $found:       negq    t0, t1          # E : clear all but least set bit
        ret                     # L0 :
 
        .end strchr
+       EXPORT_SYMBOL(strchr)
index 503928072523e745b3b62934e52301b1fee2d327..736fd41884a8c11ba44b5b057f23c46e4908c521 100644 (file)
@@ -17,7 +17,7 @@
  *     U       - upper subcluster; U0 - subcluster U0; U1 - subcluster U1
  *     L       - lower subcluster; L0 - subcluster L0; L1 - subcluster L1
  */
-
+#include <asm/export.h>
        .set noreorder
        .set noat
 
@@ -47,3 +47,4 @@ $found:
        ret     $31, ($26)      # L0 :
 
        .end    strlen
+       EXPORT_SYMBOL(strlen)
index 4ae716cd2bfbf8e8f6c8b194531efac50f88ec7c..cd35cbade73ae8f46fcf7efff8763646e92efbf4 100644 (file)
@@ -20,7 +20,7 @@
  * Try not to change the actual algorithm if possible for consistency.
  */
 
-
+#include <asm/export.h>
        .text
 
        .align 4
@@ -92,3 +92,4 @@ $zerocount:
        ret                     # L0 :
 
        .end strncat
+       EXPORT_SYMBOL(strncat)
index dd0d8c6b9f59ffdf92d8a0bfe40aa3fdd0bbe29d..747455f0328cf35bfaeac86ac56101d70e841016 100644 (file)
@@ -18,7 +18,7 @@
  *     L       - lower subcluster; L0 - subcluster L0; L1 - subcluster L1
  */
 
-
+#include <asm/export.h>
 #include <asm/regdef.h>
 
        .set noreorder
@@ -107,3 +107,4 @@ $eos:
        nop
 
        .end strrchr
+       EXPORT_SYMBOL(strrchr)
index 05017ba34c3cc40e22a97b4de62bdc0f363901de..4aa6dbfa14eecce36fcafcf800e79c4f0a93ee84 100644 (file)
@@ -4,6 +4,9 @@
  * (C) Copyright 1998 Linus Torvalds
  */
 
+#include <linux/compiler.h>
+#include <linux/export.h>
+
 #if defined(CONFIG_ALPHA_EV6) || defined(CONFIG_ALPHA_EV67)
 #define STT(reg,val)  asm volatile ("ftoit $f"#reg",%0" : "=r"(val));
 #else
@@ -52,6 +55,7 @@ alpha_read_fp_reg (unsigned long reg)
        }
        return val;
 }
+EXPORT_SYMBOL(alpha_read_fp_reg);
 
 #if defined(CONFIG_ALPHA_EV6) || defined(CONFIG_ALPHA_EV67)
 #define LDT(reg,val)  asm volatile ("itoft %0,$f"#reg : : "r"(val));
@@ -97,6 +101,7 @@ alpha_write_fp_reg (unsigned long reg, unsigned long val)
              case 31: LDT(31, val); break;
        }
 }
+EXPORT_SYMBOL(alpha_write_fp_reg);
 
 #if defined(CONFIG_ALPHA_EV6) || defined(CONFIG_ALPHA_EV67)
 #define STS(reg,val)  asm volatile ("ftois $f"#reg",%0" : "=r"(val));
@@ -146,6 +151,7 @@ alpha_read_fp_reg_s (unsigned long reg)
        }
        return val;
 }
+EXPORT_SYMBOL(alpha_read_fp_reg_s);
 
 #if defined(CONFIG_ALPHA_EV6) || defined(CONFIG_ALPHA_EV67)
 #define LDS(reg,val)  asm volatile ("itofs %0,$f"#reg : : "r"(val));
@@ -191,3 +197,4 @@ alpha_write_fp_reg_s (unsigned long reg, unsigned long val)
              case 31: LDS(31, val); break;
        }
 }
+EXPORT_SYMBOL(alpha_write_fp_reg_s);
index 14427eeb555e6d8f80bc863e6e5f50c6c7024a87..c13d3eca2e0592736dd17112ff836dcac5c0b7cb 100644 (file)
@@ -31,7 +31,7 @@ For correctness consider that:
       - only minimum number of quadwords may be accessed
       - the third argument is an unsigned long
 */
-
+#include <asm/export.h>
         .set noreorder
         .set noat
 
@@ -162,3 +162,4 @@ $not_found:
        ret                     # .. e1 :
 
         .end memchr
+       EXPORT_SYMBOL(memchr)
index 64083fc732389419aa55e499532a278af4f8adab..57d9291ad172c6a2660104a6af6d4392efac1efb 100644 (file)
@@ -16,6 +16,7 @@
  */
 
 #include <linux/types.h>
+#include <linux/export.h>
 
 /*
  * This should be done in one go with ldq_u*2/mask/stq_u. Do it
@@ -158,6 +159,4 @@ void * memcpy(void * dest, const void *src, size_t n)
        __memcpy_unaligned_up ((unsigned long) dest, (unsigned long) src, n);
        return dest;
 }
-
-/* For backward modules compatibility, define __memcpy.  */
-asm("__memcpy = memcpy; .globl __memcpy");
+EXPORT_SYMBOL(memcpy);
index eb3b6e02242f4b91cffe943b105a04f22f1df249..6872c85cb5e54d35125ebb8bc61b47fa26f33785 100644 (file)
@@ -6,7 +6,7 @@
  * This is hand-massaged output from the original memcpy.c.  We defer to
  * memcpy whenever possible; the backwards copy loops are not unrolled.
  */
-        
+#include <asm/export.h>        
        .set noat
        .set noreorder
        .text
@@ -179,3 +179,4 @@ $egress:
        nop
 
        .end memmove
+       EXPORT_SYMBOL(memmove)
index 76ccc6d1f364d67ca8c03c859171f113da5e23ce..89a26f5e89de3db904cbfabb1ff735464d6ca3cb 100644 (file)
@@ -13,7 +13,7 @@
  * The scheduling comments are according to the EV5 documentation (and done by 
  * hand, so they might well be incorrect, please do tell me about it..)
  */
-
+#include <asm/export.h>
        .set noat
        .set noreorder
 .text
@@ -106,6 +106,8 @@ within_one_quad:
 end:
        ret $31,($26),1         /* E1 */
        .end ___memset
+EXPORT_SYMBOL(___memset)
+EXPORT_SYMBOL(__constant_c_memset)
 
        .align 5
        .ent __memsetw
@@ -122,6 +124,9 @@ __memsetw:
        br __constant_c_memset  /* .. E1 */
 
        .end __memsetw
+EXPORT_SYMBOL(__memsetw)
 
 memset = ___memset
 __memset = ___memset
+       EXPORT_SYMBOL(memset)
+       EXPORT_SYMBOL(__memset)
index 393f50384878fd0ed7dc1ef786bb5c1bed612678..249837b03d4b9958d9131ca1cd2a9ab8c02f75a7 100644 (file)
@@ -4,6 +4,7 @@
  *
  * Append a null-terminated string from SRC to DST.
  */
+#include <asm/export.h>
 
        .text
 
@@ -50,3 +51,4 @@ $found:       negq    $2, $3          # clear all but least set bit
        br      __stxcpy
 
        .end strcat
+EXPORT_SYMBOL(strcat);
index 011a175e8329234567c0665ec90de60ed35db88c..7412a173ea393617f1b65022b06bc8cbfe5b627e 100644 (file)
@@ -5,7 +5,7 @@
  * Return the address of a given character within a null-terminated
  * string, or null if it is not found.
  */
-
+#include <asm/export.h>
 #include <asm/regdef.h>
 
        .set noreorder
@@ -68,3 +68,4 @@ $retnull:
        ret                     # .. e1 :
 
        .end strchr
+       EXPORT_SYMBOL(strchr)
index e0728e4ad21fda604b166c9435637bd9c5af11fe..98deae1e4d085d5dfb54210aeeb854b6328af6a5 100644 (file)
@@ -5,7 +5,7 @@
  * Copy a null-terminated string from SRC to DST.  Return a pointer
  * to the null-terminator in the source.
  */
-
+#include <asm/export.h>
        .text
 
        .align 3
@@ -21,3 +21,4 @@ strcpy:
        br      __stxcpy        # do the copy
 
        .end strcpy
+       EXPORT_SYMBOL(strcpy)
index fe63353de152d3bf1d5a24b18b704bde21c161f2..79c416f71bacd5704a8d9f6a2841b9cb9566606b 100644 (file)
@@ -11,7 +11,7 @@
  *       do this instead of the 9 instructions that
  *       binary search needs).
  */
-
+#include <asm/export.h>
        .set noreorder
        .set noat
 
@@ -55,3 +55,4 @@ done: subq    $0, $16, $0
        ret     $31, ($26)
 
        .end    strlen
+       EXPORT_SYMBOL(strlen)
index a8278163c97204d3d5d2e0eb73c956c327bdfbd6..6c29ea60869ae4c3a3d3c4bff10839b1a764e67d 100644 (file)
@@ -9,7 +9,7 @@
  * past count, whereas libc may write to count+1.  This follows the generic
  * implementation in lib/string.c and is, IMHO, more sensible.
  */
-
+#include <asm/export.h>
        .text
 
        .align 3
@@ -82,3 +82,4 @@ $zerocount:
        ret
 
        .end strncat
+       EXPORT_SYMBOL(strncat)
index a46f7f3ad8c731961dd3f24502955ab5953477ca..e102cf1567ddd21314efd9d1fd1f8725cb538410 100644 (file)
@@ -10,7 +10,7 @@
  * version has cropped that bit o' nastiness as well as assuming that
  * __stxncpy is in range of a branch.
  */
-
+#include <asm/export.h>
        .set noat
        .set noreorder
 
@@ -79,3 +79,4 @@ $zerolen:
        ret
 
        .end    strncpy
+       EXPORT_SYMBOL(strncpy)
index 1970dc07cfd1248147bc17da538196fcb9cd9e84..4bc6cb4b9812e1e42a2ac2db7df182cd9810dafb 100644 (file)
@@ -5,7 +5,7 @@
  * Return the address of the last occurrence of a given character
  * within a null-terminated string, or null if it is not found.
  */
-
+#include <asm/export.h>
 #include <asm/regdef.h>
 
        .set noreorder
@@ -85,3 +85,4 @@ $retnull:
        ret                     # .. e1 :
 
        .end strrchr
+       EXPORT_SYMBOL(strrchr)
index ecd12379e2cdb55bf29626ce0590742875574f3a..bd204bfa29edd52f8e4d43c989ab07024e66ab98 100644 (file)
@@ -41,6 +41,8 @@ config ARC
        select PERF_USE_VMALLOC
        select HAVE_DEBUG_STACKOVERFLOW
        select HAVE_GENERIC_DMA_COHERENT
+       select HAVE_KERNEL_GZIP
+       select HAVE_KERNEL_LZMA
 
 config MIGHT_HAVE_PCI
        bool
@@ -186,14 +188,6 @@ if SMP
 config ARC_HAS_COH_CACHES
        def_bool n
 
-config ARC_MCIP
-       bool "ARConnect Multicore IP (MCIP) Support "
-       depends on ISA_ARCV2
-       help
-         This IP block enables SMP in ARC-HS38 cores.
-         It provides for cross-core interrupts, multi-core debug
-         hardware semaphores, shared memory,....
-
 config NR_CPUS
        int "Maximum number of CPUs (2-4096)"
        range 2 4096
@@ -211,6 +205,15 @@ config ARC_SMP_HALT_ON_RESET
 
 endif  #SMP
 
+config ARC_MCIP
+       bool "ARConnect Multicore IP (MCIP) Support "
+       depends on ISA_ARCV2
+       default y if SMP
+       help
+         This IP block enables SMP in ARC-HS38 cores.
+         It provides for cross-core interrupts, multi-core debug
+         hardware semaphores, shared memory,....
+
 menuconfig ARC_CACHE
        bool "Enable Cache Support"
        default y
@@ -537,14 +540,6 @@ config ARC_DBG_TLB_PARANOIA
        bool "Paranoia Checks in Low Level TLB Handlers"
        default n
 
-config ARC_DBG_TLB_MISS_COUNT
-       bool "Profile TLB Misses"
-       default n
-       select DEBUG_FS
-       help
-         Counts number of I and D TLB Misses and exports them via Debugfs
-         The counters can be cleared via Debugfs as well
-
 endif
 
 config ARC_UBOOT_SUPPORT
index aa82d13d4213855d299e864b54b64d444a126d73..19cce226d1a830793b54b98fba6f56c6a5a6a88d 100644 (file)
@@ -71,7 +71,9 @@ cflags-$(CONFIG_ARC_DW2_UNWIND)               += -fasynchronous-unwind-tables $(cfi)
 ifndef CONFIG_CC_OPTIMIZE_FOR_SIZE
 # Generic build system uses -O2, we want -O3
 # Note: No need to add to cflags-y as that happens anyways
-ARCH_CFLAGS += -O3
+#
+# Disable the false maybe-uninitialized warings gcc spits out at -O3
+ARCH_CFLAGS += -O3 $(call cc-disable-warning,maybe-uninitialized,)
 endif
 
 # small data is default for elf32 tool-chain. If not usable, disable it
index e597cb34c16a832e4d219fd95a688126427b34b3..f94cf151e06ab2e142bbf2b867ae8a2de0e255af 100644 (file)
@@ -14,9 +14,15 @@ UIMAGE_ENTRYADDR   = $(LINUX_START_TEXT)
 
 suffix-y := bin
 suffix-$(CONFIG_KERNEL_GZIP)   := gz
+suffix-$(CONFIG_KERNEL_LZMA)   := lzma
 
-targets += uImage uImage.bin uImage.gz
-extra-y += vmlinux.bin vmlinux.bin.gz
+targets += uImage
+targets += uImage.bin
+targets += uImage.gz
+targets += uImage.lzma
+extra-y += vmlinux.bin
+extra-y += vmlinux.bin.gz
+extra-y += vmlinux.bin.lzma
 
 $(obj)/vmlinux.bin: vmlinux FORCE
        $(call if_changed,objcopy)
@@ -24,12 +30,18 @@ $(obj)/vmlinux.bin: vmlinux FORCE
 $(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE
        $(call if_changed,gzip)
 
+$(obj)/vmlinux.bin.lzma: $(obj)/vmlinux.bin FORCE
+       $(call if_changed,lzma)
+
 $(obj)/uImage.bin: $(obj)/vmlinux.bin FORCE
        $(call if_changed,uimage,none)
 
 $(obj)/uImage.gz: $(obj)/vmlinux.bin.gz FORCE
        $(call if_changed,uimage,gzip)
 
+$(obj)/uImage.lzma: $(obj)/vmlinux.bin.lzma FORCE
+       $(call if_changed,uimage,lzma)
+
 $(obj)/uImage: $(obj)/uImage.$(suffix-y)
        @ln -sf $(notdir $<) $@
        @echo '  Image $@ is ready'
index 6ae2c476ad825aee57fadb1e557b95a0bb082647..53ce226f77a59857615f8fc53fca3ba1c4f09749 100644 (file)
@@ -71,7 +71,7 @@
                        reg-io-width = <4>;
                };
 
-               arcpmu0: pmu {
+               arcpct0: pct {
                        compatible = "snps,arc700-pct";
                };
        };
index ce0ccd20b5bfc821b1e2c96d043b6a6e85c7ada8..5ee96b067c085ce1f061e0aac02732d63d4ecf7e 100644 (file)
@@ -69,7 +69,7 @@
                        };
                };
 
-               arcpmu0: pmu {
+               arcpct0: pct {
                        compatible = "snps,arc700-pct";
                };
        };
index bcf603142a33c08d6a0dbc7a50f47b7c84a20e1a..3c391ba565ed080cfad8b66f4c3395975eec90da 100644 (file)
@@ -83,5 +83,9 @@
                        reg = <0xf0003000 0x44>;
                        interrupts = <7>;
                };
+
+               arcpct0: pct {
+                       compatible = "snps,arc700-pct";
+               };
        };
 };
index 7314f538847bd13cf75ee94689b2612c19045e1a..b0066a749d4c49d8a23e3bd33b4a52789789c913 100644 (file)
@@ -14,6 +14,7 @@ CONFIG_BLK_DEV_INITRD=y
 CONFIG_INITRAMFS_SOURCE="../arc_initramfs/"
 CONFIG_KALLSYMS_ALL=y
 CONFIG_EMBEDDED=y
+CONFIG_PERF_EVENTS=y
 # CONFIG_SLUB_DEBUG is not set
 # CONFIG_COMPAT_BRK is not set
 CONFIG_KPROBES=y
index 65ab9fbf83f25ab89fd6cf74e7fefb66226b2bf6..ebe9ebb92933302af79f98dae000e8567b86ec6b 100644 (file)
@@ -14,6 +14,7 @@ CONFIG_BLK_DEV_INITRD=y
 CONFIG_INITRAMFS_SOURCE="../../arc_initramfs_hs/"
 CONFIG_KALLSYMS_ALL=y
 CONFIG_EMBEDDED=y
+CONFIG_PERF_EVENTS=y
 # CONFIG_SLUB_DEBUG is not set
 # CONFIG_COMPAT_BRK is not set
 CONFIG_KPROBES=y
index 3b3990cddbe10bc21eeb485521d08a64c61ca457..4bde43278be6757c5c739bfd58a9f030a80c7568 100644 (file)
@@ -12,6 +12,7 @@ CONFIG_BLK_DEV_INITRD=y
 CONFIG_INITRAMFS_SOURCE="../arc_initramfs_hs/"
 CONFIG_KALLSYMS_ALL=y
 CONFIG_EMBEDDED=y
+CONFIG_PERF_EVENTS=y
 # CONFIG_SLUB_DEBUG is not set
 # CONFIG_COMPAT_BRK is not set
 CONFIG_KPROBES=y
index 98cf20933bbb3232da17fe94fdfe0ad3ddf38379..f6fb3d26557eb7c63b2f77263acfed80f0b45387 100644 (file)
@@ -14,6 +14,7 @@ CONFIG_BLK_DEV_INITRD=y
 CONFIG_INITRAMFS_SOURCE="../arc_initramfs/"
 CONFIG_KALLSYMS_ALL=y
 CONFIG_EMBEDDED=y
+CONFIG_PERF_EVENTS=y
 # CONFIG_SLUB_DEBUG is not set
 # CONFIG_COMPAT_BRK is not set
 CONFIG_KPROBES=y
index ddf8b96d494e90f4a776cdbea8276c54f0babc13..b9f0fe00044b6c44d62a81a91064583cd9badee3 100644 (file)
@@ -14,6 +14,7 @@ CONFIG_BLK_DEV_INITRD=y
 CONFIG_INITRAMFS_SOURCE="../arc_initramfs_hs/"
 CONFIG_KALLSYMS_ALL=y
 CONFIG_EMBEDDED=y
+CONFIG_PERF_EVENTS=y
 # CONFIG_SLUB_DEBUG is not set
 # CONFIG_COMPAT_BRK is not set
 CONFIG_KPROBES=y
index ceb90745326e52d85f3ca0a9092962c23b740910..6da71ba253a932275c133e55fe68fd19c7ec4379 100644 (file)
@@ -10,6 +10,7 @@ CONFIG_IKCONFIG_PROC=y
 # CONFIG_PID_NS is not set
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_INITRAMFS_SOURCE="../arc_initramfs_hs/"
+CONFIG_PERF_EVENTS=y
 # CONFIG_COMPAT_BRK is not set
 CONFIG_KPROBES=y
 CONFIG_MODULES=y
@@ -34,7 +35,6 @@ CONFIG_INET=y
 # CONFIG_INET_XFRM_MODE_TRANSPORT is not set
 # CONFIG_INET_XFRM_MODE_TUNNEL is not set
 # CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
 # CONFIG_IPV6 is not set
 # CONFIG_WIRELESS is not set
 CONFIG_DEVTMPFS=y
@@ -72,7 +72,6 @@ CONFIG_SERIAL_OF_PLATFORM=y
 # CONFIG_HWMON is not set
 CONFIG_DRM=y
 CONFIG_DRM_ARCPGU=y
-CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_LOGO=y
 # CONFIG_HID is not set
 # CONFIG_USB_SUPPORT is not set
index db25c65155cb80a284ab3078f60797f2b25735dc..1bd24ec3e350243de4abd0a45cee201027ccf52a 100644 (file)
 #define STATUS_AE_BIT          5       /* Exception active */
 #define STATUS_DE_BIT          6       /* PC is in delay slot */
 #define STATUS_U_BIT           7       /* User/Kernel mode */
+#define STATUS_Z_BIT            11
 #define STATUS_L_BIT           12      /* Loop inhibit */
 
 /* These masks correspond to the status word(STATUS_32) bits */
 #define STATUS_AE_MASK         (1<<STATUS_AE_BIT)
 #define STATUS_DE_MASK         (1<<STATUS_DE_BIT)
 #define STATUS_U_MASK          (1<<STATUS_U_BIT)
+#define STATUS_Z_MASK          (1<<STATUS_Z_BIT)
 #define STATUS_L_MASK          (1<<STATUS_L_BIT)
 
 /*
@@ -349,10 +351,11 @@ struct cpuinfo_arc {
        struct cpuinfo_arc_bpu bpu;
        struct bcr_identity core;
        struct bcr_isa isa;
+       const char *details, *name;
        unsigned int vec_base;
        struct cpuinfo_arc_ccm iccm, dccm;
        struct {
-               unsigned int swap:1, norm:1, minmax:1, barrel:1, crc:1, pad1:3,
+               unsigned int swap:1, norm:1, minmax:1, barrel:1, crc:1, swape:1, pad1:2,
                             fpu_sp:1, fpu_dp:1, pad2:6,
                             debug:1, ap:1, smart:1, rtt:1, pad3:4,
                             timer0:1, timer1:1, rtc:1, gfrc:1, pad4:4;
index fb781e34f322fdd5aec20e9444bb395f4253c3a9..b3410ff6a62dbcc589ffa411f326d6954c8ba80c 100644 (file)
@@ -53,7 +53,7 @@ extern void arc_cache_init(void);
 extern char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len);
 extern void read_decode_cache_bcr(void);
 
-extern int ioc_exists;
+extern int ioc_enable;
 extern unsigned long perip_base, perip_end;
 
 #endif /* !__ASSEMBLY__ */
index 7096f97a14340f5d54766e21245c7ae779da0e72..aa2d6da9d187be21b1f38ac116f40941bb5dc39d 100644 (file)
@@ -54,7 +54,7 @@ extern int elf_check_arch(const struct elf32_hdr *);
  * the loader.  We need to make sure that it is out of the way of the program
  * that it will "exec", and that there is sufficient room for the brk.
  */
-#define ELF_ET_DYN_BASE                (2 * TASK_SIZE / 3)
+#define ELF_ET_DYN_BASE                (2UL * TASK_SIZE / 3)
 
 /*
  * When the program starts, a1 contains a pointer to a function to be
index 847e3bbe387fc92f9b4433bf7e08fc0b11e3ec70..c8fbe4114badd972a18b37de313f91ab7cffd482 100644 (file)
@@ -55,6 +55,22 @@ struct mcip_cmd {
 #define IDU_M_DISTRI_DEST              0x2
 };
 
+struct mcip_bcr {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+               unsigned int pad3:8,
+                            idu:1, llm:1, num_cores:6,
+                            iocoh:1,  gfrc:1, dbg:1, pad2:1,
+                            msg:1, sem:1, ipi:1, pad:1,
+                            ver:8;
+#else
+               unsigned int ver:8,
+                            pad:1, ipi:1, sem:1, msg:1,
+                            pad2:1, dbg:1, gfrc:1, iocoh:1,
+                            num_cores:6, llm:1, idu:1,
+                            pad3:8;
+#endif
+};
+
 /*
  * MCIP programming model
  *
index 518222bb3f8ef4c551b88e93749bbc2efa6da1a8..6e91d8b339c3616b59d7b389353c477acba8a418 100644 (file)
@@ -18,6 +18,7 @@
 struct mod_arch_specific {
        void *unw_info;
        int unw_sec_idx;
+       const char *secstr;
 };
 #endif
 
index 48b37c693db39d6dd2e73df0671edbb01e3be3fa..cb954cdab07087bc6b49e72c8d117c77b905595c 100644 (file)
@@ -27,11 +27,6 @@ struct id_to_str {
        const char *str;
 };
 
-struct cpuinfo_data {
-       struct id_to_str info;
-       int up_range;
-};
-
 extern int root_mountflags, end_mem;
 
 void setup_processor(void);
@@ -43,5 +38,6 @@ void __init setup_arch_memory(void);
 #define IS_USED_RUN(v)         ((v) ? "" : "(not used) ")
 #define IS_USED_CFG(cfg)       IS_USED_RUN(IS_ENABLED(cfg))
 #define IS_AVAIL2(v, s, cfg)   IS_AVAIL1(v, s), IS_AVAIL1(v, IS_USED_CFG(cfg))
+#define IS_AVAIL3(v, v2, s)    IS_AVAIL1(v, s), IS_AVAIL1(v, IS_DISABLED_RUN(v2))
 
 #endif /* __ASMARC_SETUP_H */
index 89fdd1b0a76ebe672094daa10134204cb96cf925..0861007d9ef33b9dbb19a6d2123c3b5d717d3733 100644 (file)
@@ -37,9 +37,9 @@ extern const char *arc_platform_smp_cpuinfo(void);
  * API expected BY platform smp code (FROM arch smp code)
  *
  * smp_ipi_irq_setup:
- *     Takes @cpu and @irq to which the arch-common ISR is hooked up
+ *     Takes @cpu and @hwirq to which the arch-common ISR is hooked up
  */
-extern int smp_ipi_irq_setup(int cpu, int irq);
+extern int smp_ipi_irq_setup(int cpu, irq_hw_number_t hwirq);
 
 /*
  * struct plat_smp_ops - SMP callbacks provided by platform to ARC SMP
index e56f9fcc558133277ca03d93461a56da5f02a4b9..772b67ca56e7bacb307fe5f5a944b1318b188b97 100644 (file)
@@ -17,6 +17,7 @@ int sys_clone_wrapper(int, int, int, int, int);
 int sys_cacheflush(uint32_t, uint32_t uint32_t);
 int sys_arc_settls(void *);
 int sys_arc_gettls(void);
+int sys_arc_usr_cmpxchg(int *, int, int);
 
 #include <asm-generic/syscalls.h>
 
index 41fa2ec9e02c7721717e5c513bc9703ebed5bed4..9a34136d84b2c77b45ee3b3b7a739f2d994151d4 100644 (file)
 
 #define NR_syscalls    __NR_syscalls
 
+/* Generic syscall (fs/filesystems.c - lost in asm-generic/unistd.h */
+#define __NR_sysfs             (__NR_arch_specific_syscall + 3)
+
 /* ARC specific syscall */
 #define __NR_cacheflush                (__NR_arch_specific_syscall + 0)
 #define __NR_arc_settls                (__NR_arch_specific_syscall + 1)
 #define __NR_arc_gettls                (__NR_arch_specific_syscall + 2)
+#define __NR_arc_usr_cmpxchg   (__NR_arch_specific_syscall + 4)
 
 __SYSCALL(__NR_cacheflush, sys_cacheflush)
 __SYSCALL(__NR_arc_settls, sys_arc_settls)
 __SYSCALL(__NR_arc_gettls, sys_arc_gettls)
-
-
-/* Generic syscall (fs/filesystems.c - lost in asm-generic/unistd.h */
-#define __NR_sysfs             (__NR_arch_specific_syscall + 3)
+__SYSCALL(__NR_arc_usr_cmpxchg, sys_arc_usr_cmpxchg)
 __SYSCALL(__NR_sysfs, sys_sysfs)
 
 #undef __SYSCALL
index f1e07c2344f84cbd6fb351ad65e9a542fc8f1b4e..3b67f538f1425699219fb2cd481be592ba047731 100644 (file)
@@ -31,6 +31,8 @@ static void __init arc_set_early_base_baud(unsigned long dt_root)
                arc_base_baud = 166666666;      /* Fixed 166.6MHz clk (TB10x) */
        else if (of_flat_dt_is_compatible(dt_root, "snps,arc-sdp"))
                arc_base_baud = 33333333;       /* Fixed 33MHz clk (AXS10x) */
+       else if (of_flat_dt_is_compatible(dt_root, "ezchip,arc-nps"))
+               arc_base_baud = 800000000;      /* Fixed 800MHz clk (NPS) */
        else
                arc_base_baud = 50000000;       /* Fixed default 50MHz */
 }
index 72f9179b1a24663b582b73d0bdc1972ec46befa9..f39142acc89e032627ef88431ac4775e71949ed2 100644 (file)
 #include <asm/mcip.h>
 #include <asm/setup.h>
 
-static char smp_cpuinfo_buf[128];
-static int idu_detected;
-
 static DEFINE_RAW_SPINLOCK(mcip_lock);
 
+#ifdef CONFIG_SMP
+
+static char smp_cpuinfo_buf[128];
+
 static void mcip_setup_per_cpu(int cpu)
 {
        smp_ipi_irq_setup(cpu, IPI_IRQ);
@@ -86,21 +87,7 @@ static void mcip_ipi_clear(int irq)
 
 static void mcip_probe_n_setup(void)
 {
-       struct mcip_bcr {
-#ifdef CONFIG_CPU_BIG_ENDIAN
-               unsigned int pad3:8,
-                            idu:1, llm:1, num_cores:6,
-                            iocoh:1,  gfrc:1, dbg:1, pad2:1,
-                            msg:1, sem:1, ipi:1, pad:1,
-                            ver:8;
-#else
-               unsigned int ver:8,
-                            pad:1, ipi:1, sem:1, msg:1,
-                            pad2:1, dbg:1, gfrc:1, iocoh:1,
-                            num_cores:6, llm:1, idu:1,
-                            pad3:8;
-#endif
-       } mp;
+       struct mcip_bcr mp;
 
        READ_BCR(ARC_REG_MCIP_BCR, mp);
 
@@ -114,7 +101,6 @@ static void mcip_probe_n_setup(void)
                IS_AVAIL1(mp.gfrc, "GFRC"));
 
        cpuinfo_arc700[0].extn.gfrc = mp.gfrc;
-       idu_detected = mp.idu;
 
        if (mp.dbg) {
                __mcip_cmd_data(CMD_DEBUG_SET_SELECT, 0, 0xf);
@@ -130,6 +116,8 @@ struct plat_smp_ops plat_smp_ops = {
        .ipi_clear      = mcip_ipi_clear,
 };
 
+#endif
+
 /***************************************************************************
  * ARCv2 Interrupt Distribution Unit (IDU)
  *
@@ -193,6 +181,8 @@ idu_irq_set_affinity(struct irq_data *data, const struct cpumask *cpumask,
 {
        unsigned long flags;
        cpumask_t online;
+       unsigned int destination_bits;
+       unsigned int distribution_mode;
 
        /* errout if no online cpu per @cpumask */
        if (!cpumask_and(&online, cpumask, cpu_online_mask))
@@ -200,8 +190,15 @@ idu_irq_set_affinity(struct irq_data *data, const struct cpumask *cpumask,
 
        raw_spin_lock_irqsave(&mcip_lock, flags);
 
-       idu_set_dest(data->hwirq, cpumask_bits(&online)[0]);
-       idu_set_mode(data->hwirq, IDU_M_TRIG_LEVEL, IDU_M_DISTRI_RR);
+       destination_bits = cpumask_bits(&online)[0];
+       idu_set_dest(data->hwirq, destination_bits);
+
+       if (ffs(destination_bits) == fls(destination_bits))
+               distribution_mode = IDU_M_DISTRI_DEST;
+       else
+               distribution_mode = IDU_M_DISTRI_RR;
+
+       idu_set_mode(data->hwirq, IDU_M_TRIG_LEVEL, distribution_mode);
 
        raw_spin_unlock_irqrestore(&mcip_lock, flags);
 
@@ -219,16 +216,15 @@ static struct irq_chip idu_irq_chip = {
 
 };
 
-static int idu_first_irq;
+static irq_hw_number_t idu_first_hwirq;
 
 static void idu_cascade_isr(struct irq_desc *desc)
 {
-       struct irq_domain *domain = irq_desc_get_handler_data(desc);
-       unsigned int core_irq = irq_desc_get_irq(desc);
-       unsigned int idu_irq;
+       struct irq_domain *idu_domain = irq_desc_get_handler_data(desc);
+       irq_hw_number_t core_hwirq = irqd_to_hwirq(irq_desc_get_irq_data(desc));
+       irq_hw_number_t idu_hwirq = core_hwirq - idu_first_hwirq;
 
-       idu_irq = core_irq - idu_first_irq;
-       generic_handle_irq(irq_find_mapping(domain, idu_irq));
+       generic_handle_irq(irq_find_mapping(idu_domain, idu_hwirq));
 }
 
 static int idu_irq_map(struct irq_domain *d, unsigned int virq, irq_hw_number_t hwirq)
@@ -294,9 +290,12 @@ idu_of_init(struct device_node *intc, struct device_node *parent)
        struct irq_domain *domain;
        /* Read IDU BCR to confirm nr_irqs */
        int nr_irqs = of_irq_count(intc);
-       int i, irq;
+       int i, virq;
+       struct mcip_bcr mp;
+
+       READ_BCR(ARC_REG_MCIP_BCR, mp);
 
-       if (!idu_detected)
+       if (!mp.idu)
                panic("IDU not detected, but DeviceTree using it");
 
        pr_info("MCIP: IDU referenced from Devicetree %d irqs\n", nr_irqs);
@@ -312,11 +311,11 @@ idu_of_init(struct device_node *intc, struct device_node *parent)
                 * however we need it to get the parent virq and set IDU handler
                 * as first level isr
                 */
-               irq = irq_of_parse_and_map(intc, i);
+               virq = irq_of_parse_and_map(intc, i);
                if (!i)
-                       idu_first_irq = irq;
+                       idu_first_hwirq = irqd_to_hwirq(irq_get_irq_data(virq));
 
-               irq_set_chained_handler_and_data(irq, idu_cascade_isr, domain);
+               irq_set_chained_handler_and_data(virq, idu_cascade_isr, domain);
        }
 
        __mcip_cmd(CMD_IDU_ENABLE, 0);
index 9a2849756022c01c2b3c6da9b7b5a0e371ed2e20..42e964db29677877438f8b9bc8e6225cc5f64174 100644 (file)
@@ -30,17 +30,9 @@ int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
                              char *secstr, struct module *mod)
 {
 #ifdef CONFIG_ARC_DW2_UNWIND
-       int i;
-
        mod->arch.unw_sec_idx = 0;
        mod->arch.unw_info = NULL;
-
-       for (i = 1; i < hdr->e_shnum; i++) {
-               if (strcmp(secstr+sechdrs[i].sh_name, ".eh_frame") == 0) {
-                       mod->arch.unw_sec_idx = i;
-                       break;
-               }
-       }
+       mod->arch.secstr = secstr;
 #endif
        return 0;
 }
@@ -59,29 +51,33 @@ int apply_relocate_add(Elf32_Shdr *sechdrs,
                       unsigned int relsec,     /* sec index for relo sec */
                       struct module *module)
 {
-       int i, n;
+       int i, n, relo_type;
        Elf32_Rela *rel_entry = (void *)sechdrs[relsec].sh_addr;
        Elf32_Sym *sym_entry, *sym_sec;
-       Elf32_Addr relocation;
-       Elf32_Addr location;
-       Elf32_Addr sec_to_patch;
-       int relo_type;
-
-       sec_to_patch = sechdrs[sechdrs[relsec].sh_info].sh_addr;
+       Elf32_Addr relocation, location, tgt_addr;
+       unsigned int tgtsec;
+
+       /*
+        * @relsec has relocations e.g. .rela.init.text
+        * @tgtsec is section to patch e.g. .init.text
+        */
+       tgtsec = sechdrs[relsec].sh_info;
+       tgt_addr = sechdrs[tgtsec].sh_addr;
        sym_sec = (Elf32_Sym *) sechdrs[symindex].sh_addr;
        n = sechdrs[relsec].sh_size / sizeof(*rel_entry);
 
-       pr_debug("\n========== Module Sym reloc ===========================\n");
-       pr_debug("Section to fixup %x\n", sec_to_patch);
+       pr_debug("\nSection to fixup %s @%x\n",
+                module->arch.secstr + sechdrs[tgtsec].sh_name, tgt_addr);
        pr_debug("=========================================================\n");
-       pr_debug("rela->r_off | rela->addend | sym->st_value | ADDR | VALUE\n");
+       pr_debug("r_off\tr_add\tst_value ADDRESS  VALUE\n");
        pr_debug("=========================================================\n");
 
        /* Loop thru entries in relocation section */
        for (i = 0; i < n; i++) {
+               const char *s;
 
                /* This is where to make the change */
-               location = sec_to_patch + rel_entry[i].r_offset;
+               location = tgt_addr + rel_entry[i].r_offset;
 
                /* This is the symbol it is referring to.  Note that all
                   undefined symbols have been resolved.  */
@@ -89,10 +85,15 @@ int apply_relocate_add(Elf32_Shdr *sechdrs,
 
                relocation = sym_entry->st_value + rel_entry[i].r_addend;
 
-               pr_debug("\t%x\t\t%x\t\t%x  %x %x [%s]\n",
-                       rel_entry[i].r_offset, rel_entry[i].r_addend,
-                       sym_entry->st_value, location, relocation,
-                       strtab + sym_entry->st_name);
+               if (sym_entry->st_name == 0 && ELF_ST_TYPE (sym_entry->st_info) == STT_SECTION) {
+                       s = module->arch.secstr + sechdrs[sym_entry->st_shndx].sh_name;
+               } else {
+                       s = strtab + sym_entry->st_name;
+               }
+
+               pr_debug("   %x\t%x\t%x %x %x [%s]\n",
+                        rel_entry[i].r_offset, rel_entry[i].r_addend,
+                        sym_entry->st_value, location, relocation, s);
 
                /* This assumes modules are built with -mlong-calls
                 * so any branches/jumps are absolute 32 bit jmps
@@ -111,6 +112,10 @@ int apply_relocate_add(Elf32_Shdr *sechdrs,
                        goto relo_err;
 
        }
+
+       if (strcmp(module->arch.secstr+sechdrs[tgtsec].sh_name, ".eh_frame") == 0)
+               module->arch.unw_sec_idx = tgtsec;
+
        return 0;
 
 relo_err:
index be1972bd2729e7a41013d521e9349cd4ac028499..a41a79a4f4feaca96306577077bd4745d6cd8537 100644 (file)
@@ -41,6 +41,41 @@ SYSCALL_DEFINE0(arc_gettls)
        return task_thread_info(current)->thr_ptr;
 }
 
+SYSCALL_DEFINE3(arc_usr_cmpxchg, int *, uaddr, int, expected, int, new)
+{
+       struct pt_regs *regs = current_pt_regs();
+       int uval = -EFAULT;
+
+       /*
+        * This is only for old cores lacking LLOCK/SCOND, which by defintion
+        * can't possibly be SMP. Thus doesn't need to be SMP safe.
+        * And this also helps reduce the overhead for serializing in
+        * the UP case
+        */
+       WARN_ON_ONCE(IS_ENABLED(CONFIG_SMP));
+
+       /* Z indicates to userspace if operation succeded */
+       regs->status32 &= ~STATUS_Z_MASK;
+
+       if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
+               return -EFAULT;
+
+       preempt_disable();
+
+       if (__get_user(uval, uaddr))
+               goto done;
+
+       if (uval == expected) {
+               if (!__put_user(new, uaddr))
+                       regs->status32 |= STATUS_Z_MASK;
+       }
+
+done:
+       preempt_enable();
+
+       return uval;
+}
+
 void arch_cpu_idle(void)
 {
        /* sleep, but enable all interrupts before committing */
index 3df7f9c72f4271478e1ca26cd90f1d59da475129..0385df77a69738f06a45d45553b1c0d6e0980e30 100644 (file)
@@ -40,6 +40,29 @@ struct task_struct *_current_task[NR_CPUS];  /* For stack switching */
 
 struct cpuinfo_arc cpuinfo_arc700[NR_CPUS];
 
+static const struct id_to_str arc_cpu_rel[] = {
+#ifdef CONFIG_ISA_ARCOMPACT
+       { 0x34, "R4.10"},
+       { 0x35, "R4.11"},
+#else
+       { 0x51, "R2.0" },
+       { 0x52, "R2.1" },
+       { 0x53, "R3.0" },
+#endif
+       { 0x00, NULL   }
+};
+
+static const struct id_to_str arc_cpu_nm[] = {
+#ifdef CONFIG_ISA_ARCOMPACT
+       { 0x20, "ARC 600"   },
+       { 0x30, "ARC 770"   },  /* 750 identified seperately */
+#else
+       { 0x40, "ARC EM"  },
+       { 0x50, "ARC HS38"  },
+#endif
+       { 0x00, "Unknown"   }
+};
+
 static void read_decode_ccm_bcr(struct cpuinfo_arc *cpu)
 {
        if (is_isa_arcompact()) {
@@ -92,11 +115,26 @@ static void read_arc_build_cfg_regs(void)
        struct bcr_timer timer;
        struct bcr_generic bcr;
        struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()];
+       const struct id_to_str *tbl;
+
        FIX_PTR(cpu);
 
        READ_BCR(AUX_IDENTITY, cpu->core);
        READ_BCR(ARC_REG_ISA_CFG_BCR, cpu->isa);
 
+       for (tbl = &arc_cpu_rel[0]; tbl->id != 0; tbl++) {
+               if (cpu->core.family == tbl->id) {
+                       cpu->details = tbl->str;
+                       break;
+               }
+       }
+
+       for (tbl = &arc_cpu_nm[0]; tbl->id != 0; tbl++) {
+               if ((cpu->core.family & 0xF0) == tbl->id)
+                       break;
+       }
+       cpu->name = tbl->str;
+
        READ_BCR(ARC_REG_TIMERS_BCR, timer);
        cpu->extn.timer0 = timer.t0;
        cpu->extn.timer1 = timer.t1;
@@ -111,6 +149,9 @@ static void read_arc_build_cfg_regs(void)
        cpu->extn.swap = read_aux_reg(ARC_REG_SWAP_BCR) ? 1 : 0;        /* 1,3 */
        cpu->extn.crc = read_aux_reg(ARC_REG_CRC_BCR) ? 1 : 0;
        cpu->extn.minmax = read_aux_reg(ARC_REG_MIXMAX_BCR) > 1 ? 1 : 0; /* 2 */
+       cpu->extn.swape = (cpu->core.family >= 0x34) ? 1 :
+                               IS_ENABLED(CONFIG_ARC_HAS_SWAPE);
+
        READ_BCR(ARC_REG_XY_MEM_BCR, cpu->extn_xymem);
 
        /* Read CCM BCRs for boot reporting even if not enabled in Kconfig */
@@ -160,64 +201,38 @@ static void read_arc_build_cfg_regs(void)
        cpu->extn.rtt = bcr.ver ? 1 : 0;
 
        cpu->extn.debug = cpu->extn.ap | cpu->extn.smart | cpu->extn.rtt;
-}
 
-static const struct cpuinfo_data arc_cpu_tbl[] = {
-#ifdef CONFIG_ISA_ARCOMPACT
-       { {0x20, "ARC 600"      }, 0x2F},
-       { {0x30, "ARC 700"      }, 0x33},
-       { {0x34, "ARC 700 R4.10"}, 0x34},
-       { {0x35, "ARC 700 R4.11"}, 0x35},
-#else
-       { {0x50, "ARC HS38 R2.0"}, 0x51},
-       { {0x52, "ARC HS38 R2.1"}, 0x52},
-       { {0x53, "ARC HS38 R3.0"}, 0x53},
-#endif
-       { {0x00, NULL           } }
-};
+       /* some hacks for lack of feature BCR info in old ARC700 cores */
+       if (is_isa_arcompact()) {
+               if (!cpu->isa.ver)      /* ISA BCR absent, use Kconfig info */
+                       cpu->isa.atomic = IS_ENABLED(CONFIG_ARC_HAS_LLSC);
+               else
+                       cpu->isa.atomic = cpu->isa.atomic1;
 
+               cpu->isa.be = IS_ENABLED(CONFIG_CPU_BIG_ENDIAN);
+
+                /* there's no direct way to distinguish 750 vs. 770 */
+               if (unlikely(cpu->core.family < 0x34 || cpu->mmu.ver < 3))
+                       cpu->name = "ARC750";
+       }
+}
 
 static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len)
 {
        struct cpuinfo_arc *cpu = &cpuinfo_arc700[cpu_id];
        struct bcr_identity *core = &cpu->core;
-       const struct cpuinfo_data *tbl;
-       char *isa_nm;
-       int i, be, atomic;
-       int n = 0;
+       int i, n = 0;
 
        FIX_PTR(cpu);
 
-       if (is_isa_arcompact()) {
-               isa_nm = "ARCompact";
-               be = IS_ENABLED(CONFIG_CPU_BIG_ENDIAN);
-
-               atomic = cpu->isa.atomic1;
-               if (!cpu->isa.ver)      /* ISA BCR absent, use Kconfig info */
-                       atomic = IS_ENABLED(CONFIG_ARC_HAS_LLSC);
-       } else {
-               isa_nm = "ARCv2";
-               be = cpu->isa.be;
-               atomic = cpu->isa.atomic;
-       }
-
        n += scnprintf(buf + n, len - n,
                       "\nIDENTITY\t: ARCVER [%#02x] ARCNUM [%#02x] CHIPID [%#4x]\n",
                       core->family, core->cpu_id, core->chip_id);
 
-       for (tbl = &arc_cpu_tbl[0]; tbl->info.id != 0; tbl++) {
-               if ((core->family >= tbl->info.id) &&
-                   (core->family <= tbl->up_range)) {
-                       n += scnprintf(buf + n, len - n,
-                                      "processor [%d]\t: %s (%s ISA) %s\n",
-                                      cpu_id, tbl->info.str, isa_nm,
-                                      IS_AVAIL1(be, "[Big-Endian]"));
-                       break;
-               }
-       }
-
-       if (tbl->info.id == 0)
-               n += scnprintf(buf + n, len - n, "UNKNOWN ARC Processor\n");
+       n += scnprintf(buf + n, len - n, "processor [%d]\t: %s %s (%s ISA) %s\n",
+                      cpu_id, cpu->name, cpu->details,
+                      is_isa_arcompact() ? "ARCompact" : "ARCv2",
+                      IS_AVAIL1(cpu->isa.be, "[Big-Endian]"));
 
        n += scnprintf(buf + n, len - n, "Timers\t\t: %s%s%s%s\nISA Extn\t: ",
                       IS_AVAIL1(cpu->extn.timer0, "Timer0 "),
@@ -226,7 +241,7 @@ static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len)
                                 CONFIG_ARC_HAS_RTC));
 
        n += i = scnprintf(buf + n, len - n, "%s%s%s%s%s",
-                          IS_AVAIL2(atomic, "atomic ", CONFIG_ARC_HAS_LLSC),
+                          IS_AVAIL2(cpu->isa.atomic, "atomic ", CONFIG_ARC_HAS_LLSC),
                           IS_AVAIL2(cpu->isa.ldd, "ll64 ", CONFIG_ARC_HAS_LL64),
                           IS_AVAIL1(cpu->isa.unalign, "unalign (not used)"));
 
@@ -253,7 +268,7 @@ static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len)
                       IS_AVAIL1(cpu->extn.swap, "swap "),
                       IS_AVAIL1(cpu->extn.minmax, "minmax "),
                       IS_AVAIL1(cpu->extn.crc, "crc "),
-                      IS_AVAIL2(1, "swape", CONFIG_ARC_HAS_SWAPE));
+                      IS_AVAIL2(cpu->extn.swape, "swape", CONFIG_ARC_HAS_SWAPE));
 
        if (cpu->bpu.ver)
                n += scnprintf(buf + n, len - n,
@@ -272,9 +287,7 @@ static char *arc_extn_mumbojumbo(int cpu_id, char *buf, int len)
 
        FIX_PTR(cpu);
 
-       n += scnprintf(buf + n, len - n,
-                      "Vector Table\t: %#x\nPeripherals\t: %#lx:%#lx\n",
-                      cpu->vec_base, perip_base, perip_end);
+       n += scnprintf(buf + n, len - n, "Vector Table\t: %#x\n", cpu->vec_base);
 
        if (cpu->extn.fpu_sp || cpu->extn.fpu_dp)
                n += scnprintf(buf + n, len - n, "FPU\t\t: %s%s\n",
@@ -507,7 +520,7 @@ static void *c_start(struct seq_file *m, loff_t *pos)
         * way to pass it w/o having to kmalloc/free a 2 byte string.
         * Encode cpu-id as 0xFFcccc, which is decoded by show routine.
         */
-       return *pos < num_possible_cpus() ? cpu_to_ptr(*pos) : NULL;
+       return *pos < nr_cpu_ids ? cpu_to_ptr(*pos) : NULL;
 }
 
 static void *c_next(struct seq_file *m, void *v, loff_t *pos)
index 6cb3736b6b83613a95180cd3e9cb5ba12a9b2f7f..d347bbc086fed124627bccaebec9089652a6b7b8 100644 (file)
@@ -107,13 +107,13 @@ static int restore_usr_regs(struct pt_regs *regs, struct rt_sigframe __user *sf)
        struct user_regs_struct uregs;
 
        err = __copy_from_user(&set, &sf->uc.uc_sigmask, sizeof(set));
-       if (!err)
-               set_current_blocked(&set);
-
        err |= __copy_from_user(&uregs.scratch,
                                &(sf->uc.uc_mcontext.regs.scratch),
                                sizeof(sf->uc.uc_mcontext.regs.scratch));
+       if (err)
+               return err;
 
+       set_current_blocked(&set);
        regs->bta       = uregs.scratch.bta;
        regs->lp_start  = uregs.scratch.lp_start;
        regs->lp_end    = uregs.scratch.lp_end;
@@ -138,7 +138,7 @@ static int restore_usr_regs(struct pt_regs *regs, struct rt_sigframe __user *sf)
        regs->r0        = uregs.scratch.r0;
        regs->sp        = uregs.scratch.sp;
 
-       return err;
+       return 0;
 }
 
 static inline int is_do_ss_needed(unsigned int magic)
index f183cc648851e53d0db2736925cdf466d900946a..88674d972c9d056f33f87205aa77049c11006129 100644 (file)
@@ -22,6 +22,7 @@
 #include <linux/atomic.h>
 #include <linux/cpumask.h>
 #include <linux/reboot.h>
+#include <linux/irqdomain.h>
 #include <asm/processor.h>
 #include <asm/setup.h>
 #include <asm/mach_desc.h>
@@ -67,11 +68,13 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
        int i;
 
        /*
-        * Initialise the present map, which describes the set of CPUs
-        * actually populated at the present time.
+        * if platform didn't set the present map already, do it now
+        * boot cpu is set to present already by init/main.c
         */
-       for (i = 0; i < max_cpus; i++)
-               set_cpu_present(i, true);
+       if (num_present_cpus() <= 1) {
+               for (i = 0; i < max_cpus; i++)
+                       set_cpu_present(i, true);
+       }
 }
 
 void __init smp_cpus_done(unsigned int max_cpus)
@@ -351,20 +354,24 @@ irqreturn_t do_IPI(int irq, void *dev_id)
  */
 static DEFINE_PER_CPU(int, ipi_dev);
 
-int smp_ipi_irq_setup(int cpu, int irq)
+int smp_ipi_irq_setup(int cpu, irq_hw_number_t hwirq)
 {
        int *dev = per_cpu_ptr(&ipi_dev, cpu);
+       unsigned int virq = irq_find_mapping(NULL, hwirq);
+
+       if (!virq)
+               panic("Cannot find virq for root domain and hwirq=%lu", hwirq);
 
        /* Boot cpu calls request, all call enable */
        if (!cpu) {
                int rc;
 
-               rc = request_percpu_irq(irq, do_IPI, "IPI Interrupt", dev);
+               rc = request_percpu_irq(virq, do_IPI, "IPI Interrupt", dev);
                if (rc)
-                       panic("Percpu IRQ request failed for %d\n", irq);
+                       panic("Percpu IRQ request failed for %u\n", virq);
        }
 
-       enable_percpu_irq(irq, 0);
+       enable_percpu_irq(virq, 0);
 
        return 0;
 }
index f927b8dc6eddf614aecbd03138730badf4f3156c..c10390d1ddb6b32abe8d622870350f7be30c57df 100644 (file)
@@ -152,14 +152,17 @@ static cycle_t arc_read_rtc(struct clocksource *cs)
                cycle_t  full;
        } stamp;
 
-
-       __asm__ __volatile(
-       "1:                                             \n"
-       "       lr              %0, [AUX_RTC_LOW]       \n"
-       "       lr              %1, [AUX_RTC_HIGH]      \n"
-       "       lr              %2, [AUX_RTC_CTRL]      \n"
-       "       bbit0.nt        %2, 31, 1b              \n"
-       : "=r" (stamp.low), "=r" (stamp.high), "=r" (status));
+       /*
+        * hardware has an internal state machine which tracks readout of
+        * low/high and updates the CTRL.status if
+        *  - interrupt/exception taken between the two reads
+        *  - high increments after low has been read
+        */
+       do {
+               stamp.low = read_aux_reg(AUX_RTC_LOW);
+               stamp.high = read_aux_reg(AUX_RTC_HIGH);
+               status = read_aux_reg(AUX_RTC_CTRL);
+       } while (!(status & _BITUL(31)));
 
        return stamp.full;
 }
index 934150e7ac4895ef9e4523fd1ffabbdacce2dd74..82f9bc819f4a2d40f9849d88cc4631fa8f07d7a8 100644 (file)
@@ -237,113 +237,3 @@ void show_kernel_fault_diag(const char *str, struct pt_regs *regs,
        if (!user_mode(regs))
                show_stacktrace(current, regs);
 }
-
-#ifdef CONFIG_DEBUG_FS
-
-#include <linux/module.h>
-#include <linux/fs.h>
-#include <linux/mount.h>
-#include <linux/pagemap.h>
-#include <linux/init.h>
-#include <linux/namei.h>
-#include <linux/debugfs.h>
-
-static struct dentry *test_dentry;
-static struct dentry *test_dir;
-static struct dentry *test_u32_dentry;
-
-static u32 clr_on_read = 1;
-
-#ifdef CONFIG_ARC_DBG_TLB_MISS_COUNT
-u32 numitlb, numdtlb, num_pte_not_present;
-
-static int fill_display_data(char *kbuf)
-{
-       size_t num = 0;
-       num += sprintf(kbuf + num, "I-TLB Miss %x\n", numitlb);
-       num += sprintf(kbuf + num, "D-TLB Miss %x\n", numdtlb);
-       num += sprintf(kbuf + num, "PTE not present %x\n", num_pte_not_present);
-
-       if (clr_on_read)
-               numitlb = numdtlb = num_pte_not_present = 0;
-
-       return num;
-}
-
-static int tlb_stats_open(struct inode *inode, struct file *file)
-{
-       file->private_data = (void *)__get_free_page(GFP_KERNEL);
-       return 0;
-}
-
-/* called on user read(): display the counters */
-static ssize_t tlb_stats_output(struct file *file,     /* file descriptor */
-                               char __user *user_buf,  /* user buffer */
-                               size_t len,             /* length of buffer */
-                               loff_t *offset)         /* offset in the file */
-{
-       size_t num;
-       char *kbuf = (char *)file->private_data;
-
-       /* All of the data can he shoved in one iteration */
-       if (*offset != 0)
-               return 0;
-
-       num = fill_display_data(kbuf);
-
-       /* simple_read_from_buffer() is helper for copy to user space
-          It copies up to @2 (num) bytes from kernel buffer @4 (kbuf) at offset
-          @3 (offset) into the user space address starting at @1 (user_buf).
-          @5 (len) is max size of user buffer
-        */
-       return simple_read_from_buffer(user_buf, num, offset, kbuf, len);
-}
-
-/* called on user write : clears the counters */
-static ssize_t tlb_stats_clear(struct file *file, const char __user *user_buf,
-                              size_t length, loff_t *offset)
-{
-       numitlb = numdtlb = num_pte_not_present = 0;
-       return length;
-}
-
-static int tlb_stats_close(struct inode *inode, struct file *file)
-{
-       free_page((unsigned long)(file->private_data));
-       return 0;
-}
-
-static const struct file_operations tlb_stats_file_ops = {
-       .read = tlb_stats_output,
-       .write = tlb_stats_clear,
-       .open = tlb_stats_open,
-       .release = tlb_stats_close
-};
-#endif
-
-static int __init arc_debugfs_init(void)
-{
-       test_dir = debugfs_create_dir("arc", NULL);
-
-#ifdef CONFIG_ARC_DBG_TLB_MISS_COUNT
-       test_dentry = debugfs_create_file("tlb_stats", 0444, test_dir, NULL,
-                                         &tlb_stats_file_ops);
-#endif
-
-       test_u32_dentry =
-           debugfs_create_u32("clr_on_read", 0444, test_dir, &clr_on_read);
-
-       return 0;
-}
-
-module_init(arc_debugfs_init);
-
-static void __exit arc_debugfs_exit(void)
-{
-       debugfs_remove(test_u32_dentry);
-       debugfs_remove(test_dentry);
-       debugfs_remove(test_dir);
-}
-module_exit(arc_debugfs_exit);
-
-#endif
index 97dddbefb86a93fa2f1e05275b6f25db3f283644..2b96cfc3be751a6d56fd13a9531c40e4f8debd8b 100644 (file)
@@ -22,8 +22,8 @@
 #include <asm/setup.h>
 
 static int l2_line_sz;
-int ioc_exists;
-volatile int slc_enable = 1, ioc_enable = 1;
+static int ioc_exists;
+int slc_enable = 1, ioc_enable = 1;
 unsigned long perip_base = ARC_UNCACHED_ADDR_SPACE; /* legacy value for boot */
 unsigned long perip_end = 0xFFFFFFFF; /* legacy value */
 
@@ -53,18 +53,15 @@ char *arc_cache_mumbojumbo(int c, char *buf, int len)
        PR_CACHE(&cpuinfo_arc700[c].icache, CONFIG_ARC_HAS_ICACHE, "I-Cache");
        PR_CACHE(&cpuinfo_arc700[c].dcache, CONFIG_ARC_HAS_DCACHE, "D-Cache");
 
-       if (!is_isa_arcv2())
-                return buf;
-
        p = &cpuinfo_arc700[c].slc;
        if (p->ver)
                n += scnprintf(buf + n, len - n,
                               "SLC\t\t: %uK, %uB Line%s\n",
                               p->sz_k, p->line_len, IS_USED_RUN(slc_enable));
 
-       if (ioc_exists)
-               n += scnprintf(buf + n, len - n, "IOC\t\t:%s\n",
-                               IS_DISABLED_RUN(ioc_enable));
+       n += scnprintf(buf + n, len - n, "Peripherals\t: %#lx%s%s\n",
+                      perip_base,
+                      IS_AVAIL3(ioc_exists, ioc_enable, ", IO-Coherency "));
 
        return buf;
 }
@@ -113,8 +110,10 @@ static void read_decode_cache_bcr_arcv2(int cpu)
        }
 
        READ_BCR(ARC_REG_CLUSTER_BCR, cbcr);
-       if (cbcr.c && ioc_enable)
+       if (cbcr.c)
                ioc_exists = 1;
+       else
+               ioc_enable = 0;
 
        /* HS 2.0 didn't have AUX_VOL */
        if (cpuinfo_arc700[cpu].core.family > 0x51) {
@@ -1002,7 +1001,7 @@ void arc_cache_init(void)
                        read_aux_reg(ARC_REG_SLC_CTRL) | SLC_CTRL_DISABLE);
        }
 
-       if (is_isa_arcv2() && ioc_exists) {
+       if (is_isa_arcv2() && ioc_enable) {
                /* IO coherency base - 0x8z */
                write_aux_reg(ARC_REG_IO_COH_AP0_BASE, 0x80000);
                /* IO coherency aperture size - 512Mb: 0x8z-0xAz */
index 20afc65e22dc780c69dea280acfc6907a1680e9f..cd8aad8226dd5c151989e1233603d9cb42781bf8 100644 (file)
@@ -45,7 +45,7 @@ static void *arc_dma_alloc(struct device *dev, size_t size,
         *   -For coherent data, Read/Write to buffers terminate early in cache
         *   (vs. always going to memory - thus are faster)
         */
-       if ((is_isa_arcv2() && ioc_exists) ||
+       if ((is_isa_arcv2() && ioc_enable) ||
            (attrs & DMA_ATTR_NON_CONSISTENT))
                need_coh = 0;
 
@@ -97,7 +97,7 @@ static void arc_dma_free(struct device *dev, size_t size, void *vaddr,
        int is_non_coh = 1;
 
        is_non_coh = (attrs & DMA_ATTR_NON_CONSISTENT) ||
-                       (is_isa_arcv2() && ioc_exists);
+                       (is_isa_arcv2() && ioc_enable);
 
        if (PageHighMem(page) || !is_non_coh)
                iounmap((void __force __iomem *)vaddr);
@@ -105,6 +105,31 @@ static void arc_dma_free(struct device *dev, size_t size, void *vaddr,
        __free_pages(page, get_order(size));
 }
 
+static int arc_dma_mmap(struct device *dev, struct vm_area_struct *vma,
+                       void *cpu_addr, dma_addr_t dma_addr, size_t size,
+                       unsigned long attrs)
+{
+       unsigned long user_count = vma_pages(vma);
+       unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+       unsigned long pfn = __phys_to_pfn(plat_dma_to_phys(dev, dma_addr));
+       unsigned long off = vma->vm_pgoff;
+       int ret = -ENXIO;
+
+       vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+       if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret))
+               return ret;
+
+       if (off < count && user_count <= (count - off)) {
+               ret = remap_pfn_range(vma, vma->vm_start,
+                                     pfn + off,
+                                     user_count << PAGE_SHIFT,
+                                     vma->vm_page_prot);
+       }
+
+       return ret;
+}
+
 /*
  * streaming DMA Mapping API...
  * CPU accesses page via normal paddr, thus needs to explicitly made
@@ -193,6 +218,7 @@ static int arc_dma_supported(struct device *dev, u64 dma_mask)
 struct dma_map_ops arc_dma_ops = {
        .alloc                  = arc_dma_alloc,
        .free                   = arc_dma_free,
+       .mmap                   = arc_dma_mmap,
        .map_page               = arc_dma_map_page,
        .map_sg                 = arc_dma_map_sg,
        .sync_single_for_device = arc_dma_sync_single_for_device,
index ec868a9081a1103790e594063d1544c0766be3fb..bdb295e09160b2037c9dd90058963800cbe78d08 100644 (file)
@@ -793,16 +793,16 @@ char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len)
        char super_pg[64] = "";
 
        if (p_mmu->s_pg_sz_m)
-               scnprintf(super_pg, 64, "%dM Super Page%s, ",
+               scnprintf(super_pg, 64, "%dM Super Page %s",
                          p_mmu->s_pg_sz_m,
                          IS_USED_CFG(CONFIG_TRANSPARENT_HUGEPAGE));
 
        n += scnprintf(buf + n, len - n,
-                     "MMU [v%x]\t: %dk PAGE, %sJTLB %d (%dx%d), uDTLB %d, uITLB %d %s%s\n",
+                     "MMU [v%x]\t: %dk PAGE, %sJTLB %d (%dx%d), uDTLB %d, uITLB %d%s%s\n",
                       p_mmu->ver, p_mmu->pg_sz_k, super_pg,
                       p_mmu->sets * p_mmu->ways, p_mmu->sets, p_mmu->ways,
                       p_mmu->u_dtlb, p_mmu->u_itlb,
-                      IS_AVAIL2(p_mmu->pae, "PAE40 ", CONFIG_ARC_HAS_PAE40));
+                      IS_AVAIL2(p_mmu->pae, "PAE40 ", CONFIG_ARC_HAS_PAE40));
 
        return buf;
 }
index f1967eeb32e757bb906580fecfce84a309df9983..b30e4e36bb00dd3c5feaa685fe08dd0629404119 100644 (file)
@@ -237,15 +237,6 @@ ex_saved_reg1:
 
 2:
 
-#ifdef CONFIG_ARC_DBG_TLB_MISS_COUNT
-       and.f 0, r0, _PAGE_PRESENT
-       bz   1f
-       ld   r3, [num_pte_not_present]
-       add  r3, r3, 1
-       st   r3, [num_pte_not_present]
-1:
-#endif
-
 .endm
 
 ;-----------------------------------------------------------------
@@ -309,12 +300,6 @@ ENTRY(EV_TLBMissI)
 
        TLBMISS_FREEUP_REGS
 
-#ifdef CONFIG_ARC_DBG_TLB_MISS_COUNT
-       ld  r0, [@numitlb]
-       add r0, r0, 1
-       st  r0, [@numitlb]
-#endif
-
        ;----------------------------------------------------------------
        ; Get the PTE corresponding to V-addr accessed, r2 is setup with EFA
        LOAD_FAULT_PTE
@@ -349,12 +334,6 @@ ENTRY(EV_TLBMissD)
 
        TLBMISS_FREEUP_REGS
 
-#ifdef CONFIG_ARC_DBG_TLB_MISS_COUNT
-       ld  r0, [@numdtlb]
-       add r0, r0, 1
-       st  r0, [@numdtlb]
-#endif
-
        ;----------------------------------------------------------------
        ; Get the PTE corresponding to V-addr accessed
        ; If PTE exists, it will setup, r0 = PTE, r1 = Ptr to PTE, r2 = EFA
index 5e901f86e4bd068af0b665bd3d1a3ee3ea6c773d..56a4c8522f111cc5221e2ead6b6574100cd3f685 100644 (file)
@@ -140,16 +140,10 @@ static void eznps_init_per_cpu(int cpu)
        mtm_enable_core(cpu);
 }
 
-static void eznps_ipi_clear(int irq)
-{
-       write_aux_reg(CTOP_AUX_IACK, 1 << irq);
-}
-
 struct plat_smp_ops plat_smp_ops = {
        .info           = smp_cpuinfo_buf,
        .init_early_smp = eznps_init_cpumasks,
        .cpu_kick       = eznps_smp_wakeup_cpu,
        .ipi_send       = eznps_ipi_send,
        .init_per_cpu   = eznps_init_per_cpu,
-       .ipi_clear      = eznps_ipi_clear,
 };
index dec4b073ceb138e93a545815f0dce636cf7f6092..379939699164aa6dbd1c90b422496c1ab72edff3 100644 (file)
@@ -64,8 +64,8 @@
                        };
 
                        ldo3_reg: ldo3 {
-                               regulator-min-microvolt = <600000>;
-                               regulator-max-microvolt = <1800000>;
+                               regulator-min-microvolt = <1725000>;
+                               regulator-max-microvolt = <3300000>;
                                regulator-always-on;
                        };
 
@@ -76,8 +76,8 @@
                        };
 
                        ldo5_reg: ldo5 {
-                               regulator-min-microvolt = <1725000>;
-                               regulator-max-microvolt = <3300000>;
+                               regulator-min-microvolt = <1200000>;
+                               regulator-max-microvolt = <3600000>;
                                regulator-always-on;
                        };
 
                        };
 
                        ldo9_reg: ldo9 {
-                               regulator-min-microvolt = <1200000>;
+                               regulator-min-microvolt = <1250000>;
                                regulator-max-microvolt = <3600000>;
                                regulator-always-on;
                        };
 
                        ldo10_reg: ldo10 {
-                               regulator-min-microvolt = <1250000>;
-                               regulator-max-microvolt = <3650000>;
+                               regulator-min-microvolt = <1200000>;
+                               regulator-max-microvolt = <3600000>;
                                regulator-always-on;
                        };
                };
index 0ff1c2de95bfc1a172cd2c7a98e5baf8d93260d1..26cce4d18405d5c993377ed7a246d7c80b43dcea 100644 (file)
                };
        };
 
+       memory@80000000 {
+               device_type = "memory";
+               reg = <0x80000000 0>;
+       };
+
        wl12xx_vmmc: wl12xx_vmmc {
                compatible = "regulator-fixed";
                regulator-name = "vwl1271";
index 731ec37aed5b505b31e95e35319c3e8213e1118a..8f9a69ca818cecb759e71c1b6b97e4073c3e22e4 100644 (file)
@@ -13,9 +13,9 @@
                };
        };
 
-       memory@0 {
+       memory@80000000 {
                device_type = "memory";
-               reg = <0 0>;
+               reg = <0x80000000 0>;
        };
 
        leds {
index 6365635fea5c8dd5e65ac70459a1a6427631fd9a..4caadb25324977e67c40d4ebde2929cd6782cbf8 100644 (file)
                compatible = "ti,abe-twl6040";
                ti,model = "omap5-uevm";
 
+               ti,jack-detection;
                ti,mclk-freq = <19200000>;
 
                ti,mcpdm = <&mcpdm>;
                        ti,backup-battery-charge-high-current;
                };
 
-               gpadc {
+               gpadc: gpadc {
                        compatible = "ti,palmas-gpadc";
                        interrupts = <18 0
                                      16 0
                                smps6_reg: smps6 {
                                        /* VDD_DDR3 - over VDD_SMPS6 */
                                        regulator-name = "smps6";
-                                       regulator-min-microvolt = <1200000>;
-                                       regulator-max-microvolt = <1200000>;
+                                       regulator-min-microvolt = <1350000>;
+                                       regulator-max-microvolt = <1350000>;
                                        regulator-always-on;
                                        regulator-boot-on;
                                };
index b3df1c60d4657e59255f39ff24cccc87a895ca6d..386eee6de2320aa60365095d74378ce11a865f0d 100644 (file)
                        arm,primecell-periphid = <0x10480180>;
                        max-frequency = <100000000>;
                        bus-width = <4>;
+                       cap-sd-highspeed;
                        cap-mmc-highspeed;
+                       sd-uhs-sdr12;
+                       sd-uhs-sdr25;
+                       /* All direction control is used */
+                       st,sig-dir-cmd;
+                       st,sig-dir-dat0;
+                       st,sig-dir-dat2;
+                       st,sig-dir-dat31;
+                       st,sig-pin-fbclk;
+                       full-pwr-cycle;
                        vmmc-supply = <&ab8500_ldo_aux3_reg>;
                        vqmmc-supply = <&vmmci>;
                        pinctrl-names = "default", "sleep";
                        pinctrl-0 = <&sdi0_default_mode>;
                        pinctrl-1 = <&sdi0_sleep_mode>;
 
-                       cd-gpios  = <&gpio6 26 GPIO_ACTIVE_LOW>; // 218
+                       /* GPIO218 MMC_CD */
+                       cd-gpios  = <&gpio6 26 GPIO_ACTIVE_LOW>;
 
                        status = "okay";
                };
                                        /* VMMCI level-shifter enable */
                                        snowball_cfg3 {
                                                pins = "GPIO217_AH12";
-                                               ste,config = <&gpio_out_lo>;
+                                               ste,config = <&gpio_out_hi>;
                                        };
                                        /* VMMCI level-shifter voltage select */
                                        snowball_cfg4 {
index ef2ff2f518f619a91377238f6d6e28a0baa83aab..7fb507fcba7eed404de64af7d5669de2274e2b99 100644 (file)
@@ -74,7 +74,7 @@
                /* Low speed expansion connector */
                spi0: spi@9844000 {
                        label = "LS-SPI0";
-                       cs-gpio = <&pio30 3 0>;
+                       cs-gpios = <&pio30 3 0>;
                        status = "okay";
                };
 
index 48fc24f36fcb268b7c0ba30348e0f0cc17826a9c..300a1bd5a6ecfd42a8968c7b76d2296fbccc1a2f 100644 (file)
                        uart1_pins_a: uart1@0 {
                                allwinner,pins = "PG6", "PG7";
                                allwinner,function = "uart1";
+                               allwinner,drive = <SUN4I_PINCTRL_10_MA>;
+                               allwinner,pull = <SUN4I_PINCTRL_NO_PULL>;
                        };
 
                        uart1_pins_cts_rts_a: uart1-cts-rts@0 {
                                allwinner,pins = "PG8", "PG9";
                                allwinner,function = "uart1";
+                               allwinner,drive = <SUN4I_PINCTRL_10_MA>;
+                               allwinner,pull = <SUN4I_PINCTRL_NO_PULL>;
                        };
 
                        mmc0_pins_a: mmc0@0 {
index 2c49c3614bda53ddbea05c8157009a932623253e..5357ea9c14b1ed59944f137b228ba3c4996910c7 100644 (file)
 };
 
 &mio_clk {
-       compatible = "socionext,uniphier-pro5-mio-clock";
+       compatible = "socionext,uniphier-pro5-sd-clock";
 };
 
 &mio_rst {
-       compatible = "socionext,uniphier-pro5-mio-reset";
+       compatible = "socionext,uniphier-pro5-sd-reset";
 };
 
 &peri_clk {
index 8789cd518933dbaf1564e0e333867fdc4f73d6e4..950f07ba03371ef102289238a48c008a40bb8b86 100644 (file)
 };
 
 &mio_clk {
-       compatible = "socionext,uniphier-pxs2-mio-clock";
+       compatible = "socionext,uniphier-pxs2-sd-clock";
 };
 
 &mio_rst {
-       compatible = "socionext,uniphier-pxs2-mio-reset";
+       compatible = "socionext,uniphier-pxs2-sd-reset";
 };
 
 &peri_clk {
index a3824e61bd72c7cb57ed643519661efe02e0c1fe..d7fdb2a7d97b696458a0ccc892d727fcc2d0f236 100644 (file)
@@ -70,7 +70,7 @@
                        global_timer: timer@40002200 {
                                compatible = "arm,cortex-a9-global-timer";
                                reg = <0x40002200 0x20>;
-                               interrupts = <GIC_PPI 11 IRQ_TYPE_LEVEL_HIGH>;
+                               interrupts = <GIC_PPI 11 IRQ_TYPE_EDGE_RISING>;
                                interrupt-parent = <&intc>;
                                clocks = <&clks VF610_CLK_PLATFORM_BUS>;
                        };
index 437d0740dec604a24dbbcccf8bdbbbb4cc411f85..11f37ed1dbfffbdcc9475bc59ce1743a30b2615a 100644 (file)
@@ -850,6 +850,7 @@ CONFIG_PWM_SUN4I=y
 CONFIG_PWM_TEGRA=y
 CONFIG_PWM_VT8500=y
 CONFIG_PHY_HIX5HD2_SATA=y
+CONFIG_E1000E=y
 CONFIG_PWM_STI=y
 CONFIG_PWM_BCM2835=y
 CONFIG_PWM_BRCMSTB=m
index d7ea6bcb29bf68489323f1c777e057de0b81e364..8ef05381984b1b6ba977035c82607423b37835c0 100644 (file)
@@ -66,6 +66,7 @@ extern char __kvm_hyp_vector[];
 extern void __kvm_flush_vm_context(void);
 extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
 extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
+extern void __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu);
 
 extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
 
index 2d19e02d03fd69e75f327659f582d8abbdf909e9..d5423ab15ed5be1c705817e13d4a7d7fe35b465b 100644 (file)
@@ -57,6 +57,9 @@ struct kvm_arch {
        /* VTTBR value associated with below pgd and vmid */
        u64    vttbr;
 
+       /* The last vcpu id that ran on each physical CPU */
+       int __percpu *last_vcpu_ran;
+
        /* Timer */
        struct arch_timer_kvm   timer;
 
index 343135ede5fa01d848ec609f0eafc08f610f5aab..58508900c4bb264be2a874299b829b31a9cb5601 100644 (file)
@@ -71,6 +71,7 @@
 #define ICIALLUIS      __ACCESS_CP15(c7, 0, c1, 0)
 #define ATS1CPR                __ACCESS_CP15(c7, 0, c8, 0)
 #define TLBIALLIS      __ACCESS_CP15(c8, 0, c3, 0)
+#define TLBIALL                __ACCESS_CP15(c8, 0, c7, 0)
 #define TLBIALLNSNHIS  __ACCESS_CP15(c8, 4, c3, 4)
 #define PRRR           __ACCESS_CP15(c10, 0, c2, 0)
 #define NMRR           __ACCESS_CP15(c10, 0, c2, 1)
index a93c0f99acf7767c680158cf96acef87d1f0da51..1f59ea051bab814132074b09f55d3a57c800a471 100644 (file)
@@ -533,11 +533,12 @@ __clear_user(void __user *addr, unsigned long n)
 
 static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n)
 {
-       if (access_ok(VERIFY_READ, from, n))
-               n = __copy_from_user(to, from, n);
-       else /* security hole - plug it */
-               memset(to, 0, n);
-       return n;
+       unsigned long res = n;
+       if (likely(access_ok(VERIFY_READ, from, n)))
+               res = __copy_from_user(to, from, n);
+       if (unlikely(res))
+               memset(to + (n - res), 0, res);
+       return res;
 }
 
 static inline unsigned long __must_check copy_to_user(void __user *to, const void *from, unsigned long n)
index 194b6992338920680c14c46326999791e33defa6..ada0d29a660f2fad8bdcf9dbef82affd62e7bf24 100644 (file)
@@ -19,7 +19,7 @@
  * This may need to be greater than __NR_last_syscall+1 in order to
  * account for the padding in the syscall table
  */
-#define __NR_syscalls  (396)
+#define __NR_syscalls  (400)
 
 #define __ARCH_WANT_STAT64
 #define __ARCH_WANT_SYS_GETHOSTNAME
index 2cb9dc770e1d41e8867f949e1ef13e028568a3d3..314100a06ccb6c65161a34aae415c6ed89b060e5 100644 (file)
 #define __NR_copy_file_range           (__NR_SYSCALL_BASE+391)
 #define __NR_preadv2                   (__NR_SYSCALL_BASE+392)
 #define __NR_pwritev2                  (__NR_SYSCALL_BASE+393)
+#define __NR_pkey_mprotect             (__NR_SYSCALL_BASE+394)
+#define __NR_pkey_alloc                        (__NR_SYSCALL_BASE+395)
+#define __NR_pkey_free                 (__NR_SYSCALL_BASE+396)
 
 /*
  * The following SWIs are ARM private.
index 703fa0f3cd8f812907b47ac7c84646ff3e3aff94..08030b18f10a3b73c2bd46ffbfe72ad4b0ffde24 100644 (file)
                CALL(sys_copy_file_range)
                CALL(sys_preadv2)
                CALL(sys_pwritev2)
+               CALL(sys_pkey_mprotect)
+/* 395 */      CALL(sys_pkey_alloc)
+               CALL(sys_pkey_free)
 #ifndef syscalls_counted
 .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls
 #define syscalls_counted
index bc698383e82253a47427885359e07e22daa24179..9688ec0c6ef43f621d029c680fcd7876d120a53c 100644 (file)
@@ -74,6 +74,26 @@ void dump_backtrace_entry(unsigned long where, unsigned long from, unsigned long
                dump_mem("", "Exception stack", frame + 4, frame + 4 + sizeof(struct pt_regs));
 }
 
+void dump_backtrace_stm(u32 *stack, u32 instruction)
+{
+       char str[80], *p;
+       unsigned int x;
+       int reg;
+
+       for (reg = 10, x = 0, p = str; reg >= 0; reg--) {
+               if (instruction & BIT(reg)) {
+                       p += sprintf(p, " r%d:%08x", reg, *stack--);
+                       if (++x == 6) {
+                               x = 0;
+                               p = str;
+                               printk("%s\n", str);
+                       }
+               }
+       }
+       if (p != str)
+               printk("%s\n", str);
+}
+
 #ifndef CONFIG_ARM_UNWIND
 /*
  * Stack pointers should always be within the kernels view of
index 7fa487ef7e2f67fb3e1ac7fa8fd3edb58f2145fc..37b2a11af34592b5f60f0db77ce014588f9327f4 100644 (file)
@@ -3,6 +3,9 @@
  * Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>
  */
 
+/* No __ro_after_init data in the .rodata section - which will always be ro */
+#define RO_AFTER_INIT_DATA
+
 #include <asm-generic/vmlinux.lds.h>
 #include <asm/cache.h>
 #include <asm/thread_info.h>
@@ -223,6 +226,8 @@ SECTIONS
                . = ALIGN(PAGE_SIZE);
                __init_end = .;
 
+               *(.data..ro_after_init)
+
                NOSAVE_DATA
                CACHELINE_ALIGNED_DATA(L1_CACHE_BYTES)
                READ_MOSTLY_DATA(L1_CACHE_BYTES)
index 03e9273f18765b039179dd87c0931d4d30b567c4..19b5f5c1c0ff3ef8fa68300f5ec87fe5564c916e 100644 (file)
@@ -114,11 +114,18 @@ void kvm_arch_check_processor_compat(void *rtn)
  */
 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 {
-       int ret = 0;
+       int ret, cpu;
 
        if (type)
                return -EINVAL;
 
+       kvm->arch.last_vcpu_ran = alloc_percpu(typeof(*kvm->arch.last_vcpu_ran));
+       if (!kvm->arch.last_vcpu_ran)
+               return -ENOMEM;
+
+       for_each_possible_cpu(cpu)
+               *per_cpu_ptr(kvm->arch.last_vcpu_ran, cpu) = -1;
+
        ret = kvm_alloc_stage2_pgd(kvm);
        if (ret)
                goto out_fail_alloc;
@@ -141,6 +148,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 out_free_stage2_pgd:
        kvm_free_stage2_pgd(kvm);
 out_fail_alloc:
+       free_percpu(kvm->arch.last_vcpu_ran);
+       kvm->arch.last_vcpu_ran = NULL;
        return ret;
 }
 
@@ -168,6 +177,9 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 {
        int i;
 
+       free_percpu(kvm->arch.last_vcpu_ran);
+       kvm->arch.last_vcpu_ran = NULL;
+
        for (i = 0; i < KVM_MAX_VCPUS; ++i) {
                if (kvm->vcpus[i]) {
                        kvm_arch_vcpu_free(kvm->vcpus[i]);
@@ -312,6 +324,19 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
+       int *last_ran;
+
+       last_ran = this_cpu_ptr(vcpu->kvm->arch.last_vcpu_ran);
+
+       /*
+        * We might get preempted before the vCPU actually runs, but
+        * over-invalidation doesn't affect correctness.
+        */
+       if (*last_ran != vcpu->vcpu_id) {
+               kvm_call_hyp(__kvm_tlb_flush_local_vmid, vcpu);
+               *last_ran = vcpu->vcpu_id;
+       }
+
        vcpu->cpu = cpu;
        vcpu->arch.host_cpu_context = this_cpu_ptr(kvm_host_cpu_state);
 
@@ -1312,6 +1337,13 @@ static int init_hyp_mode(void)
                goto out_err;
        }
 
+       err = create_hyp_mappings(kvm_ksym_ref(__bss_start),
+                                 kvm_ksym_ref(__bss_stop), PAGE_HYP_RO);
+       if (err) {
+               kvm_err("Cannot map bss section\n");
+               goto out_err;
+       }
+
        /*
         * Map the Hyp stack pages
         */
index 729652854f9098d677bd59871452c7b8c1ea240b..6d810af2d9fd7c630603ee5bfa8108c42a9992d8 100644 (file)
@@ -55,6 +55,21 @@ void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
        __kvm_tlb_flush_vmid(kvm);
 }
 
+void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu)
+{
+       struct kvm *kvm = kern_hyp_va(kern_hyp_va(vcpu)->kvm);
+
+       /* Switch to requested VMID */
+       write_sysreg(kvm->arch.vttbr, VTTBR);
+       isb();
+
+       write_sysreg(0, TLBIALL);
+       dsb(nsh);
+       isb();
+
+       write_sysreg(0, VTTBR);
+}
+
 void __hyp_text __kvm_flush_vm_context(void)
 {
        write_sysreg(0, TLBIALLNSNHIS);
index fab5a50503aedab7b4d875ff6acc1879ad3eeaa0..7d7952e5a3b1563e4245d91993ed11d664ddb3b3 100644 (file)
@@ -10,6 +10,7 @@
  * 27/03/03 Ian Molton Clean up CONFIG_CPU
  *
  */
+#include <linux/kern_levels.h>
 #include <linux/linkage.h>
 #include <asm/assembler.h>
                .text
@@ -83,13 +84,13 @@ for_each_frame:     tst     frame, mask             @ Check for address exceptions
                teq     r3, r1, lsr #11
                ldreq   r0, [frame, #-8]        @ get sp
                subeq   r0, r0, #4              @ point at the last arg
-               bleq    .Ldumpstm               @ dump saved registers
+               bleq    dump_backtrace_stm      @ dump saved registers
 
 1004:          ldr     r1, [sv_pc, #0]         @ if stmfd sp!, {..., fp, ip, lr, pc}
                ldr     r3, .Ldsi               @ instruction exists,
                teq     r3, r1, lsr #11
                subeq   r0, frame, #16
-               bleq    .Ldumpstm               @ dump saved registers
+               bleq    dump_backtrace_stm      @ dump saved registers
 
                teq     sv_fp, #0               @ zero saved fp means
                beq     no_frame                @ no further frames
@@ -112,38 +113,6 @@ ENDPROC(c_backtrace)
                .long   1004b, 1006b
                .popsection
 
-#define instr r4
-#define reg   r5
-#define stack r6
-
-.Ldumpstm:     stmfd   sp!, {instr, reg, stack, r7, lr}
-               mov     stack, r0
-               mov     instr, r1
-               mov     reg, #10
-               mov     r7, #0
-1:             mov     r3, #1
- ARM(          tst     instr, r3, lsl reg      )
- THUMB(                lsl     r3, reg                 )
- THUMB(                tst     instr, r3               )
-               beq     2f
-               add     r7, r7, #1
-               teq     r7, #6
-               moveq   r7, #0
-               adr     r3, .Lcr
-               addne   r3, r3, #1              @ skip newline
-               ldr     r2, [stack], #-4
-               mov     r1, reg
-               adr     r0, .Lfp
-               bl      printk
-2:             subs    reg, reg, #1
-               bpl     1b
-               teq     r7, #0
-               adrne   r0, .Lcr
-               blne    printk
-               ldmfd   sp!, {instr, reg, stack, r7, pc}
-
-.Lfp:          .asciz  " r%d:%08x%s"
-.Lcr:          .asciz  "\n"
 .Lbad:         .asciz  "Backtrace aborted due to bad frame pointer <%p>\n"
                .align
 .Ldsi:         .word   0xe92dd800 >> 11        @ stmfd sp!, {... fp, ip, lr, pc}
index 1512bebfbf1b18ad317648891385a24e93d1f35f..7a4b060490012dd29f8a6d9fb8e24dfa58896bd1 100644 (file)
@@ -98,12 +98,9 @@ ENDPROC(arm_copy_from_user)
        .pushsection .fixup,"ax"
        .align 0
        copy_abort_preamble
-       ldmfd   sp!, {r1, r2}
-       sub     r3, r0, r1
-       rsb     r1, r3, r2
-       str     r1, [sp]
-       bl      __memzero
-       ldr     r0, [sp], #4
+       ldmfd   sp!, {r1, r2, r3}
+       sub     r0, r0, r1
+       rsb     r0, r0, r2
        copy_abort_end
        .popsection
 
index 0df062d8b2c942f84a31a923e0a4f221c6c9366d..b54db47f6f322d358f7742ecc7e17b23e0c2b667 100644 (file)
@@ -408,7 +408,7 @@ static struct genpd_onecell_data imx_gpc_onecell_data = {
 static int imx_gpc_genpd_init(struct device *dev, struct regulator *pu_reg)
 {
        struct clk *clk;
-       int i;
+       int i, ret;
 
        imx6q_pu_domain.reg = pu_reg;
 
@@ -430,13 +430,22 @@ static int imx_gpc_genpd_init(struct device *dev, struct regulator *pu_reg)
        if (!IS_ENABLED(CONFIG_PM_GENERIC_DOMAINS))
                return 0;
 
-       pm_genpd_init(&imx6q_pu_domain.base, NULL, false);
-       return of_genpd_add_provider_onecell(dev->of_node,
+       for (i = 0; i < ARRAY_SIZE(imx_gpc_domains); i++)
+               pm_genpd_init(imx_gpc_domains[i], NULL, false);
+
+       ret =  of_genpd_add_provider_onecell(dev->of_node,
                                             &imx_gpc_onecell_data);
+       if (ret)
+               goto power_off;
+
+       return 0;
 
+power_off:
+       imx6q_pm_pu_power_off(&imx6q_pu_domain.base);
 clk_err:
        while (i--)
                clk_put(imx6q_pu_domain.clk[i]);
+       imx6q_pu_domain.reg = NULL;
        return -EINVAL;
 }
 
index 97fd25105e2c0d1e0e1bb5a0a14471e4be0842f6..45801b27ee5ced633fae6a7c6ca238cf203f0056 100644 (file)
@@ -173,7 +173,7 @@ static void __init imx6q_enet_phy_init(void)
                                ksz9021rn_phy_fixup);
                phy_register_fixup_for_uid(PHY_ID_KSZ9031, MICREL_PHY_ID_MASK,
                                ksz9031rn_phy_fixup);
-               phy_register_fixup_for_uid(PHY_ID_AR8031, 0xffffffff,
+               phy_register_fixup_for_uid(PHY_ID_AR8031, 0xffffffef,
                                ar8031_phy_fixup);
                phy_register_fixup_for_uid(PHY_ID_AR8035, 0xffffffef,
                                ar8035_phy_fixup);
index f9b6bd306cfea8fc29d48485e47b60c701b21df3..541647f5719255cfd755a22b0435f97426e92612 100644 (file)
@@ -23,6 +23,7 @@ config MACH_MVEBU_V7
        select CACHE_L2X0
        select ARM_CPU_SUSPEND
        select MACH_MVEBU_ANY
+       select MVEBU_CLK_COREDIV
 
 config MACH_ARMADA_370
        bool "Marvell Armada 370 boards"
@@ -32,7 +33,6 @@ config MACH_ARMADA_370
        select CPU_PJ4B
        select MACH_MVEBU_V7
        select PINCTRL_ARMADA_370
-       select MVEBU_CLK_COREDIV
        help
          Say 'Y' here if you want your kernel to support boards based
          on the Marvell Armada 370 SoC with device tree.
@@ -50,7 +50,6 @@ config MACH_ARMADA_375
        select HAVE_SMP
        select MACH_MVEBU_V7
        select PINCTRL_ARMADA_375
-       select MVEBU_CLK_COREDIV
        help
          Say 'Y' here if you want your kernel to support boards based
          on the Marvell Armada 375 SoC with device tree.
@@ -68,7 +67,6 @@ config MACH_ARMADA_38X
        select HAVE_SMP
        select MACH_MVEBU_V7
        select PINCTRL_ARMADA_38X
-       select MVEBU_CLK_COREDIV
        help
          Say 'Y' here if you want your kernel to support boards based
          on the Marvell Armada 380/385 SoC with device tree.
index a9afeebd59f222c0db294858d50f2c2f8a3889e2..0465338183c706721d94d356683fad3fa17a4435 100644 (file)
@@ -71,6 +71,7 @@ config SOC_AM43XX
        select HAVE_ARM_TWD
        select ARM_ERRATA_754322
        select ARM_ERRATA_775420
+       select OMAP_INTERCONNECT
 
 config SOC_DRA7XX
        bool "TI DRA7XX"
index 2abd53ae3e7a13f5801b7680ebd65464f2bede5c..cc6d9fa609242e2cb4660db80de5092b95ddd636 100644 (file)
@@ -205,11 +205,15 @@ void __init omap2xxx_check_revision(void)
 
 #define OMAP3_SHOW_FEATURE(feat)               \
        if (omap3_has_ ##feat())                \
-               printk(#feat" ");
+               n += scnprintf(buf + n, sizeof(buf) - n, #feat " ");
 
 static void __init omap3_cpuinfo(void)
 {
        const char *cpu_name;
+       char buf[64];
+       int n = 0;
+
+       memset(buf, 0, sizeof(buf));
 
        /*
         * OMAP3430 and OMAP3530 are assumed to be same.
@@ -241,10 +245,10 @@ static void __init omap3_cpuinfo(void)
                cpu_name = "OMAP3503";
        }
 
-       sprintf(soc_name, "%s", cpu_name);
+       scnprintf(soc_name, sizeof(soc_name), "%s", cpu_name);
 
        /* Print verbose information */
-       pr_info("%s %s (", soc_name, soc_rev);
+       n += scnprintf(buf, sizeof(buf) - n, "%s %s (", soc_name, soc_rev);
 
        OMAP3_SHOW_FEATURE(l2cache);
        OMAP3_SHOW_FEATURE(iva);
@@ -252,8 +256,10 @@ static void __init omap3_cpuinfo(void)
        OMAP3_SHOW_FEATURE(neon);
        OMAP3_SHOW_FEATURE(isp);
        OMAP3_SHOW_FEATURE(192mhz_clk);
-
-       printk(")\n");
+       if (*(buf + n - 1) == ' ')
+               n--;
+       n += scnprintf(buf + n, sizeof(buf) - n, ")\n");
+       pr_info("%s", buf);
 }
 
 #define OMAP3_CHECK_FEATURE(status,feat)                               \
index 62680aad212666af7f07131546ff1e9bc01c2572..718981bb80cdf594d77c6b0d5d5067c806729862 100644 (file)
@@ -319,6 +319,9 @@ void __init omap3_prm_init_pm(bool has_uart4, bool has_iva)
        if (has_uart4) {
                en_uart4_mask = OMAP3630_EN_UART4_MASK;
                grpsel_uart4_mask = OMAP3630_GRPSEL_UART4_MASK;
+       } else {
+               en_uart4_mask = 0;
+               grpsel_uart4_mask = 0;
        }
 
        /* Enable wakeups in PER */
index cba8cada8c81a07a9cd5ee7ebc12108aa41597a5..cd15dbd62671690388b841199d2c18f52811504f 100644 (file)
@@ -87,6 +87,12 @@ int voltdm_scale(struct voltagedomain *voltdm,
                return -ENODATA;
        }
 
+       if (!voltdm->volt_data) {
+               pr_err("%s: No voltage data defined for vdd_%s\n",
+                       __func__, voltdm->name);
+               return -ENODATA;
+       }
+
        /* Adjust voltage to the exact voltage from the OPP table */
        for (i = 0; voltdm->volt_data[i].volt_nominal != 0; i++) {
                if (voltdm->volt_data[i].volt_nominal >= target_volt) {
index 82dddee3a469be64a585b0d3c69003aec87ff543..3930fbba30b4b3ccedd2039c552c5956cdd8d84a 100644 (file)
@@ -1,6 +1,7 @@
 config ARCH_UNIPHIER
        bool "Socionext UniPhier SoCs"
        depends on ARCH_MULTI_V7
+       select ARCH_HAS_RESET_CONTROLLER
        select ARM_AMBA
        select ARM_GLOBAL_TIMER
        select ARM_GIC
index 6d8e8e3365d17321f03b37fa67ab04a65b29f4ca..4cdfab31a0b612d11f1ad88c7985e007620fdca1 100644 (file)
@@ -7,7 +7,7 @@
  *        : r4 = aborted context pc
  *        : r5 = aborted context psr
  *
- * Returns : r4-r5, r10-r11, r13 preserved
+ * Returns : r4-r5, r9-r11, r13 preserved
  *
  * Purpose : obtain information about current aborted instruction.
  * Note: we read user space.  This means we might cause a data
@@ -48,7 +48,10 @@ ENTRY(v4t_late_abort)
 /* c */        b       do_DataAbort                    @ ldc   rd, [rn], #m    @ Same as ldr   rd, [rn], #m
 /* d */        b       do_DataAbort                    @ ldc   rd, [rn, #m]
 /* e */        b       .data_unknown
-/* f */
+/* f */        b       .data_unknown
+
+.data_unknown_r9:
+       ldr     r9, [sp], #4
 .data_unknown: @ Part of jumptable
        mov     r0, r4
        mov     r1, r8
@@ -57,6 +60,7 @@ ENTRY(v4t_late_abort)
 .data_arm_ldmstm:
        tst     r8, #1 << 21                    @ check writeback bit
        beq     do_DataAbort                    @ no writeback -> no fixup
+       str     r9, [sp, #-4]!
        mov     r7, #0x11
        orr     r7, r7, #0x1100
        and     r6, r8, r7
@@ -75,12 +79,14 @@ ENTRY(v4t_late_abort)
        subne   r7, r7, r6, lsl #2              @ Undo increment
        addeq   r7, r7, r6, lsl #2              @ Undo decrement
        str     r7, [r2, r9, lsr #14]           @ Put register 'Rn'
+       ldr     r9, [sp], #4
        b       do_DataAbort
 
 .data_arm_lateldrhpre:
        tst     r8, #1 << 21                    @ Check writeback bit
        beq     do_DataAbort                    @ No writeback -> no fixup
 .data_arm_lateldrhpost:
+       str     r9, [sp, #-4]!
        and     r9, r8, #0x00f                  @ get Rm / low nibble of immediate value
        tst     r8, #1 << 22                    @ if (immediate offset)
        andne   r6, r8, #0xf00                  @ { immediate high nibble
@@ -93,6 +99,7 @@ ENTRY(v4t_late_abort)
        subne   r7, r7, r6                      @ Undo incrmenet
        addeq   r7, r7, r6                      @ Undo decrement
        str     r7, [r2, r9, lsr #14]           @ Put register 'Rn'
+       ldr     r9, [sp], #4
        b       do_DataAbort
 
 .data_arm_lateldrpreconst:
@@ -101,12 +108,14 @@ ENTRY(v4t_late_abort)
 .data_arm_lateldrpostconst:
        movs    r6, r8, lsl #20                 @ Get offset
        beq     do_DataAbort                    @ zero -> no fixup
+       str     r9, [sp, #-4]!
        and     r9, r8, #15 << 16               @ Extract 'n' from instruction
        ldr     r7, [r2, r9, lsr #14]           @ Get register 'Rn'
        tst     r8, #1 << 23                    @ Check U bit
        subne   r7, r7, r6, lsr #20             @ Undo increment
        addeq   r7, r7, r6, lsr #20             @ Undo decrement
        str     r7, [r2, r9, lsr #14]           @ Put register 'Rn'
+       ldr     r9, [sp], #4
        b       do_DataAbort
 
 .data_arm_lateldrprereg:
@@ -115,6 +124,7 @@ ENTRY(v4t_late_abort)
 .data_arm_lateldrpostreg:
        and     r7, r8, #15                     @ Extract 'm' from instruction
        ldr     r6, [r2, r7, lsl #2]            @ Get register 'Rm'
+       str     r9, [sp, #-4]!
        mov     r9, r8, lsr #7                  @ get shift count
        ands    r9, r9, #31
        and     r7, r8, #0x70                   @ get shift type
@@ -126,33 +136,33 @@ ENTRY(v4t_late_abort)
        b       .data_arm_apply_r6_and_rn
        b       .data_arm_apply_r6_and_rn       @ 1: LSL #0
        nop
-       b       .data_unknown                   @ 2: MUL?
+       b       .data_unknown_r9                @ 2: MUL?
        nop
-       b       .data_unknown                   @ 3: MUL?
+       b       .data_unknown_r9                @ 3: MUL?
        nop
        mov     r6, r6, lsr r9                  @ 4: LSR #!0
        b       .data_arm_apply_r6_and_rn
        mov     r6, r6, lsr #32                 @ 5: LSR #32
        b       .data_arm_apply_r6_and_rn
-       b       .data_unknown                   @ 6: MUL?
+       b       .data_unknown_r9                @ 6: MUL?
        nop
-       b       .data_unknown                   @ 7: MUL?
+       b       .data_unknown_r9                @ 7: MUL?
        nop
        mov     r6, r6, asr r9                  @ 8: ASR #!0
        b       .data_arm_apply_r6_and_rn
        mov     r6, r6, asr #32                 @ 9: ASR #32
        b       .data_arm_apply_r6_and_rn
-       b       .data_unknown                   @ A: MUL?
+       b       .data_unknown_r9                @ A: MUL?
        nop
-       b       .data_unknown                   @ B: MUL?
+       b       .data_unknown_r9                @ B: MUL?
        nop
        mov     r6, r6, ror r9                  @ C: ROR #!0
        b       .data_arm_apply_r6_and_rn
        mov     r6, r6, rrx                     @ D: RRX
        b       .data_arm_apply_r6_and_rn
-       b       .data_unknown                   @ E: MUL?
+       b       .data_unknown_r9                @ E: MUL?
        nop
-       b       .data_unknown                   @ F: MUL?
+       b       .data_unknown_r9                @ F: MUL?
 
 .data_thumb_abort:
        ldrh    r8, [r4]                        @ read instruction
@@ -190,6 +200,7 @@ ENTRY(v4t_late_abort)
 .data_thumb_pushpop:
        tst     r8, #1 << 10
        beq     .data_unknown
+       str     r9, [sp, #-4]!
        and     r6, r8, #0x55                   @ hweight8(r8) + R bit
        and     r9, r8, #0xaa
        add     r6, r6, r9, lsr #1
@@ -204,9 +215,11 @@ ENTRY(v4t_late_abort)
        addeq   r7, r7, r6, lsl #2              @ increment SP if PUSH
        subne   r7, r7, r6, lsl #2              @ decrement SP if POP
        str     r7, [r2, #13 << 2]
+       ldr     r9, [sp], #4
        b       do_DataAbort
 
 .data_thumb_ldmstm:
+       str     r9, [sp, #-4]!
        and     r6, r8, #0x55                   @ hweight8(r8)
        and     r9, r8, #0xaa
        add     r6, r6, r9, lsr #1
@@ -219,4 +232,5 @@ ENTRY(v4t_late_abort)
        and     r6, r6, #15                     @ number of regs to transfer
        sub     r7, r7, r6, lsl #2              @ always decrement
        str     r7, [r2, r9, lsr #6]
+       ldr     r9, [sp], #4
        b       do_DataAbort
index ab4f74536057538ac5d8cc06930b8a04f103bcad..ab7710002ba60e99287beb41e689e3ae4d148d6e 100644 (file)
@@ -1167,7 +1167,7 @@ static int __init dma_debug_do_init(void)
        dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
        return 0;
 }
-fs_initcall(dma_debug_do_init);
+core_initcall(dma_debug_do_init);
 
 #ifdef CONFIG_ARM_DMA_USE_IOMMU
 
index f6d333f09bfe338a312fed4a246a82814d1311b7..8dea61640cc1a5e09332ad7ebf777773c9b38cc9 100644 (file)
@@ -96,7 +96,7 @@ ENTRY(cpu_cm7_proc_fin)
        ret     lr
 ENDPROC(cpu_cm7_proc_fin)
 
-       .section ".text.init", #alloc, #execinstr
+       .section ".init.text", #alloc, #execinstr
 
 __v7m_cm7_setup:
        mov     r8, #(V7M_SCB_CCR_DC | V7M_SCB_CCR_IC| V7M_SCB_CCR_BP)
index 30398dbc940a2218a2e9d792cb10b463a2d194c0..969ef880d234e3b340713948eb2e9aec8ba0b3a3 100644 (file)
@@ -915,7 +915,7 @@ config RANDOMIZE_BASE
 
 config RANDOMIZE_MODULE_REGION_FULL
        bool "Randomize the module region independently from the core kernel"
-       depends on RANDOMIZE_BASE
+       depends on RANDOMIZE_BASE && !DYNAMIC_FTRACE
        default y
        help
          Randomizes the location of the module region without considering the
index cfbdf02ef5667683e603f3669308a81c7212c6b2..101794f5ce1008b7ff007fbfc7fa23d9e63bae67 100644 (file)
@@ -190,6 +190,7 @@ config ARCH_THUNDER
 
 config ARCH_UNIPHIER
        bool "Socionext UniPhier SoC Family"
+       select ARCH_HAS_RESET_CONTROLLER
        select PINCTRL
        help
          This enables support for Socionext UniPhier SoC family.
index ab51aed6b6c18eb362f8dc65621f932bed0dc7b8..3635b8662724569d3338ebb620d603c644fe38b7 100644 (file)
@@ -15,7 +15,7 @@ CPPFLAGS_vmlinux.lds = -DTEXT_OFFSET=$(TEXT_OFFSET)
 GZFLAGS                :=-9
 
 ifneq ($(CONFIG_RELOCATABLE),)
-LDFLAGS_vmlinux                += -pie -Bsymbolic
+LDFLAGS_vmlinux                += -pie -shared -Bsymbolic
 endif
 
 ifeq ($(CONFIG_ARM64_ERRATUM_843419),y)
index 2d7872a36b91232c1007358dfdea212052dac0b1..c4d544244b19734d9341e0555c0607234c9b81f8 100644 (file)
        };
 };
 
+&enet {
+       status = "ok";
+};
+
 &pci_phy0 {
        status = "ok";
 };
                nand-ecc-mode = "hw";
                nand-ecc-strength = <8>;
                nand-ecc-step-size = <512>;
+               nand-bus-width = <16>;
+               brcm,nand-oob-sector-size = <16>;
                #address-cells = <1>;
                #size-cells = <1>;
        };
 &mdio_mux_iproc {
        mdio@10 {
                gphy0: eth-phy@10 {
+                       enet-phy-lane-swap;
                        reg = <0x10>;
                };
        };
index d95dc408629ad5ab93a37fb01c1a5d2182b2aedf..773ed593da4d976b7d65374e71a0eab1f823a025 100644 (file)
 
                #include "ns2-clock.dtsi"
 
+               enet: ethernet@61000000 {
+                       compatible = "brcm,ns2-amac";
+                       reg = <0x61000000 0x1000>,
+                             <0x61090000 0x1000>,
+                             <0x61030000 0x100>;
+                       reg-names = "amac_base", "idm_base", "nicpm_base";
+                       interrupts = <GIC_SPI 341 IRQ_TYPE_LEVEL_HIGH>;
+                       phy-handle = <&gphy0>;
+                       phy-mode = "rgmii";
+                       status = "disabled";
+               };
+
                dma0: dma@61360000 {
                        compatible = "arm,pl330", "arm,primecell";
                        reg = <0x61360000 0x1000>;
index 58635f7f4668abbc8ebfd52ba96abafe63d817db..97d331ec250013ba7c1d0d7b1b9de36881fe5ee4 100644 (file)
                             <1 14 0xf08>, /* Physical Non-Secure PPI */
                             <1 11 0xf08>, /* Virtual PPI */
                             <1 10 0xf08>; /* Hypervisor PPI */
+               fsl,erratum-a008585;
        };
 
        pmu {
                };
 
                sata: sata@3200000 {
-                       compatible = "fsl,ls1043a-ahci", "fsl,ls1021a-ahci";
+                       compatible = "fsl,ls1043a-ahci";
                        reg = <0x0 0x3200000 0x0 0x10000>;
                        interrupts = <0 69 0x4>;
                        clocks = <&clockgen 4 0>;
+                       dma-coherent;
                };
 
                msi1: msi-controller1@1571000 {
index d1059765dfee4ac41831ff5faf1f2d71df9fc241..7f0dc13b4087f5a346fcb60e7ab40080d74c5500 100644 (file)
                             <1 14 4>, /* Physical Non-Secure PPI, active-low */
                             <1 11 4>, /* Virtual PPI, active-low */
                             <1 10 4>; /* Hypervisor PPI, active-low */
+               fsl,erratum-a008585;
        };
 
        pmu {
                        reg = <0x0 0x3200000 0x0 0x10000>;
                        interrupts = <0 133 0x4>; /* Level high type */
                        clocks = <&clockgen 4 3>;
+                       dma-coherent;
                };
 
                sata1: sata@3210000 {
                        reg = <0x0 0x3210000 0x0 0x10000>;
                        interrupts = <0 136 0x4>; /* Level high type */
                        clocks = <&clockgen 4 3>;
+                       dma-coherent;
                };
 
                usb0: usb3@3100000 {
index 1372e9a6aaa457d4c687fcfbd9e05c17adcc9c1f..a59d36cd6caf05f95ce90cbc539aa7e455bc6372 100644 (file)
 &pcie0 {
        status = "okay";
 };
+
+&mdio {
+       status = "okay";
+       phy0: ethernet-phy@0 {
+               reg = <0>;
+       };
+
+       phy1: ethernet-phy@1 {
+               reg = <1>;
+       };
+};
+
+&eth0 {
+       phy-mode = "rgmii-id";
+       phy = <&phy0>;
+       status = "okay";
+};
+
+&eth1 {
+       phy-mode = "sgmii";
+       phy = <&phy1>;
+       status = "okay";
+};
index c4762538ec0100dadfc8876f4f749d3b7bdc3a34..3b8eb45bdc7667b71fa8405397f977ea0f4a9b67 100644 (file)
                                status = "disabled";
                        };
 
-                       nb_perih_clk: nb-periph-clk@13000{
+                       nb_periph_clk: nb-periph-clk@13000 {
                                compatible = "marvell,armada-3700-periph-clock-nb";
                                reg = <0x13000 0x100>;
                                clocks = <&tbg 0>, <&tbg 1>, <&tbg 2>,
                                #clock-cells = <1>;
                        };
 
-                       sb_perih_clk: sb-periph-clk@18000{
+                       sb_periph_clk: sb-periph-clk@18000 {
                                compatible = "marvell,armada-3700-periph-clock-sb";
                                reg = <0x18000 0x100>;
                                clocks = <&tbg 0>, <&tbg 1>, <&tbg 2>,
                                };
                        };
 
+                       eth0: ethernet@30000 {
+                                  compatible = "marvell,armada-3700-neta";
+                                  reg = <0x30000 0x4000>;
+                                  interrupts = <GIC_SPI 42 IRQ_TYPE_LEVEL_HIGH>;
+                                  clocks = <&sb_periph_clk 8>;
+                                  status = "disabled";
+                       };
+
+                       mdio: mdio@32004 {
+                               #address-cells = <1>;
+                               #size-cells = <0>;
+                               compatible = "marvell,orion-mdio";
+                               reg = <0x32004 0x4>;
+                       };
+
+                       eth1: ethernet@40000 {
+                               compatible = "marvell,armada-3700-neta";
+                               reg = <0x40000 0x4000>;
+                               interrupts = <GIC_SPI 45 IRQ_TYPE_LEVEL_HIGH>;
+                               clocks = <&sb_periph_clk 7>;
+                               status = "disabled";
+                       };
+
                        usb3: usb@58000 {
                                compatible = "marvell,armada3700-xhci",
                                "generic-xhci";
index e5e3ed678b6f1d37ecb71ff27d4135fb483dbd8f..602e2c2e9a4dd13a4d892dffa3dcd141b71d89bc 100644 (file)
                                #address-cells = <0x1>;
                                #size-cells = <0x0>;
                                cell-index = <1>;
-                               clocks = <&cpm_syscon0 0 3>;
+                               clocks = <&cpm_syscon0 1 21>;
                                status = "disabled";
                        };
 
index 842fb333285c97e558ebc4f15556dfab327b73e7..6bf9e241179b7dc518f81b6d840b36c8b97b2a82 100644 (file)
                                reg = <0x700600 0x50>;
                                #address-cells = <0x1>;
                                #size-cells = <0x0>;
-                               cell-index = <1>;
-                               clocks = <&cps_syscon0 0 3>;
+                               cell-index = <3>;
+                               clocks = <&cps_syscon0 1 21>;
                                status = "disabled";
                        };
 
                                reg = <0x700680 0x50>;
                                #address-cells = <1>;
                                #size-cells = <0>;
-                               cell-index = <2>;
+                               cell-index = <4>;
                                clocks = <&cps_syscon0 1 21>;
                                status = "disabled";
                        };
index 46cdddfcea6c43ad30518d359babe7e094b943ac..e5eeca2c24565a76b10c64183e5d143ccbed810a 100644 (file)
        cap-mmc-highspeed;
        clock-frequency = <150000000>;
        disable-wp;
-       keep-power-in-suspend;
        non-removable;
        num-slots = <1>;
        vmmc-supply = <&vcc_io>;
                        };
 
                        vcc_sd: SWITCH_REG1 {
-                               regulator-always-on;
-                               regulator-boot-on;
                                regulator-name = "vcc_sd";
                        };
 
index 5797933ef80e68454fca1eb5c8ed03dec566f434..ea0a8eceefd467968ce9f30c9e875481e4126345 100644 (file)
                gpio = <&gpio3 11 GPIO_ACTIVE_LOW>;
                regulator-min-microvolt = <1800000>;
                regulator-max-microvolt = <3300000>;
-               regulator-always-on;
-               regulator-boot-on;
                vin-supply = <&vcc_io>;
        };
 
        bus-width = <8>;
        cap-mmc-highspeed;
        disable-wp;
-       keep-power-in-suspend;
        mmc-pwrseq = <&emmc_pwrseq>;
        mmc-hs200-1_2v;
        mmc-hs200-1_8v;
        clock-freq-min-max = <400000 50000000>;
        cap-sd-highspeed;
        card-detect-delay = <200>;
-       keep-power-in-suspend;
        num-slots = <1>;
        pinctrl-names = "default";
        pinctrl-0 = <&sdmmc_clk &sdmmc_cmd &sdmmc_cd &sdmmc_bus4>;
index b65c193dc64effabd9d50f00a2ea176dd083dabe..7afbfb0f96a3cc6459b709b623c916650d12bc49 100644 (file)
                ranges = <0x83000000 0x0 0xfa000000 0x0 0xfa000000 0x0 0x600000
                          0x81000000 0x0 0xfa600000 0x0 0xfa600000 0x0 0x100000>;
                resets = <&cru SRST_PCIE_CORE>, <&cru SRST_PCIE_MGMT>,
-                        <&cru SRST_PCIE_MGMT_STICKY>, <&cru SRST_PCIE_PIPE>;
-               reset-names = "core", "mgmt", "mgmt-sticky", "pipe";
+                        <&cru SRST_PCIE_MGMT_STICKY>, <&cru SRST_PCIE_PIPE>,
+                        <&cru SRST_PCIE_PM>, <&cru SRST_P_PCIE>,
+                        <&cru SRST_A_PCIE>;
+               reset-names = "core", "mgmt", "mgmt-sticky", "pipe",
+                             "pm", "pclk", "aclk";
                status = "disabled";
 
                pcie0_intc: interrupt-controller {
index 08fd7cf7769cfd075b324c212c7aacddd67f48a1..56a1b2e92cf32e804c9a71abbf8308e8c8168526 100644 (file)
                        reg = <0x59801000 0x400>;
                };
 
-               mioctrl@59810000 {
-                       compatible = "socionext,uniphier-mioctrl",
+               sdctrl@59810000 {
+                       compatible = "socionext,uniphier-ld20-sdctrl",
                                     "simple-mfd", "syscon";
                        reg = <0x59810000 0x800>;
 
-                       mio_clk: clock {
-                               compatible = "socionext,uniphier-ld20-mio-clock";
+                       sd_clk: clock {
+                               compatible = "socionext,uniphier-ld20-sd-clock";
                                #clock-cells = <1>;
                        };
 
-                       mio_rst: reset {
-                               compatible = "socionext,uniphier-ld20-mio-reset";
+                       sd_rst: reset {
+                               compatible = "socionext,uniphier-ld20-sd-reset";
                                #reset-cells = <1>;
                        };
                };
index 39feb85a6931093b064fa548b1778808186d0924..6e1cb8c5af4d6465e81fbdce2e2bd61920cb41e9 100644 (file)
@@ -1,7 +1,7 @@
 #ifndef __ASM_ALTERNATIVE_H
 #define __ASM_ALTERNATIVE_H
 
-#include <asm/cpufeature.h>
+#include <asm/cpucaps.h>
 #include <asm/insn.h>
 
 #ifndef __ASSEMBLY__
diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
new file mode 100644 (file)
index 0000000..87b4465
--- /dev/null
@@ -0,0 +1,40 @@
+/*
+ * arch/arm64/include/asm/cpucaps.h
+ *
+ * Copyright (C) 2016 ARM Ltd.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_CPUCAPS_H
+#define __ASM_CPUCAPS_H
+
+#define ARM64_WORKAROUND_CLEAN_CACHE           0
+#define ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE   1
+#define ARM64_WORKAROUND_845719                        2
+#define ARM64_HAS_SYSREG_GIC_CPUIF             3
+#define ARM64_HAS_PAN                          4
+#define ARM64_HAS_LSE_ATOMICS                  5
+#define ARM64_WORKAROUND_CAVIUM_23154          6
+#define ARM64_WORKAROUND_834220                        7
+#define ARM64_HAS_NO_HW_PREFETCH               8
+#define ARM64_HAS_UAO                          9
+#define ARM64_ALT_PAN_NOT_UAO                  10
+#define ARM64_HAS_VIRT_HOST_EXTN               11
+#define ARM64_WORKAROUND_CAVIUM_27456          12
+#define ARM64_HAS_32BIT_EL0                    13
+#define ARM64_HYP_OFFSET_LOW                   14
+#define ARM64_MISMATCHED_CACHE_LINE_SIZE       15
+
+#define ARM64_NCAPS                            16
+
+#endif /* __ASM_CPUCAPS_H */
index 758d74fedfad9bafe86835a56272deebf705e591..0bc0b1de90c452b369c8562e252d841bb9590a90 100644 (file)
@@ -11,6 +11,7 @@
 
 #include <linux/jump_label.h>
 
+#include <asm/cpucaps.h>
 #include <asm/hwcap.h>
 #include <asm/sysreg.h>
 
 #define MAX_CPU_FEATURES       (8 * sizeof(elf_hwcap))
 #define cpu_feature(x)         ilog2(HWCAP_ ## x)
 
-#define ARM64_WORKAROUND_CLEAN_CACHE           0
-#define ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE   1
-#define ARM64_WORKAROUND_845719                        2
-#define ARM64_HAS_SYSREG_GIC_CPUIF             3
-#define ARM64_HAS_PAN                          4
-#define ARM64_HAS_LSE_ATOMICS                  5
-#define ARM64_WORKAROUND_CAVIUM_23154          6
-#define ARM64_WORKAROUND_834220                        7
-#define ARM64_HAS_NO_HW_PREFETCH               8
-#define ARM64_HAS_UAO                          9
-#define ARM64_ALT_PAN_NOT_UAO                  10
-#define ARM64_HAS_VIRT_HOST_EXTN               11
-#define ARM64_WORKAROUND_CAVIUM_27456          12
-#define ARM64_HAS_32BIT_EL0                    13
-#define ARM64_HYP_OFFSET_LOW                   14
-#define ARM64_MISMATCHED_CACHE_LINE_SIZE       15
-
-#define ARM64_NCAPS                            16
-
 #ifndef __ASSEMBLY__
 
 #include <linux/kernel.h>
@@ -94,7 +76,7 @@ struct arm64_cpu_capabilities {
        u16 capability;
        int def_scope;                  /* default scope */
        bool (*matches)(const struct arm64_cpu_capabilities *caps, int scope);
-       void (*enable)(void *);         /* Called on all active CPUs */
+       int (*enable)(void *);          /* Called on all active CPUs */
        union {
                struct {        /* To be used for erratum handling only */
                        u32 midr_model;
index db0563c23482d52175bf6c22f733410797122e36..f7865dd9d86854760e69ea71d30452625d6713ec 100644 (file)
@@ -18,6 +18,9 @@
 #ifndef __ASM_EXEC_H
 #define __ASM_EXEC_H
 
+#include <linux/sched.h>
+
 extern unsigned long arch_align_stack(unsigned long sp);
+void uao_thread_switch(struct task_struct *next);
 
 #endif /* __ASM_EXEC_H */
index 18f746551bf632cc88a3a406359463e9d49340eb..ec3553eb9349093a9c7675f7fbe4de38dba396c7 100644 (file)
@@ -54,6 +54,7 @@ extern char __kvm_hyp_vector[];
 extern void __kvm_flush_vm_context(void);
 extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
 extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
+extern void __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu);
 
 extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
 
index fd9d5fd788f5f1df75febd7849bc89bc15eaf144..f5ea0ba70f077479ea9b2f4b1cb2fd077e9e20e3 100644 (file)
@@ -178,11 +178,6 @@ static inline bool kvm_vcpu_dabt_isvalid(const struct kvm_vcpu *vcpu)
        return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_ISV);
 }
 
-static inline bool kvm_vcpu_dabt_iswrite(const struct kvm_vcpu *vcpu)
-{
-       return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_WNR);
-}
-
 static inline bool kvm_vcpu_dabt_issext(const struct kvm_vcpu *vcpu)
 {
        return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SSE);
@@ -203,6 +198,12 @@ static inline bool kvm_vcpu_dabt_iss1tw(const struct kvm_vcpu *vcpu)
        return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_S1PTW);
 }
 
+static inline bool kvm_vcpu_dabt_iswrite(const struct kvm_vcpu *vcpu)
+{
+       return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_WNR) ||
+               kvm_vcpu_dabt_iss1tw(vcpu); /* AF/DBM update */
+}
+
 static inline bool kvm_vcpu_dabt_is_cm(const struct kvm_vcpu *vcpu)
 {
        return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_CM);
index bd94e67667599dc1ce499ab30de2cca91f4e97be..e5050388e062209868bac64cab1740ece15b3e13 100644 (file)
@@ -62,6 +62,9 @@ struct kvm_arch {
        /* VTTBR value associated with above pgd and vmid */
        u64    vttbr;
 
+       /* The last vcpu id that ran on each physical CPU */
+       int __percpu *last_vcpu_ran;
+
        /* The maximum number of vCPUs depends on the used GIC model */
        int max_vcpus;
 
index a79b969c26fca7dcd73e06c4bfc36c6519cb646b..6f72fe8b0e3ee477a076fa5f9ef02d13c3d8eef3 100644 (file)
@@ -128,7 +128,7 @@ static inline unsigned long __kern_hyp_va(unsigned long v)
        return v;
 }
 
-#define kern_hyp_va(v)         (typeof(v))(__kern_hyp_va((unsigned long)(v)))
+#define kern_hyp_va(v)         ((typeof(v))(__kern_hyp_va((unsigned long)(v))))
 
 /*
  * We currently only support a 40bit IPA.
index 23acc00be32d019a9f0f71b75153b5b32996b083..fc756e22c84cd718278d4f0ba8ebc6a32ff4739e 100644 (file)
@@ -5,7 +5,6 @@
 
 #include <linux/stringify.h>
 #include <asm/alternative.h>
-#include <asm/cpufeature.h>
 
 #ifdef __ASSEMBLER__
 
index ba62df8c6e3540f8db0dcd86c26e4a9dbafb2a05..b71086d251954f7b72837899346525322dc5d724 100644 (file)
@@ -217,7 +217,7 @@ static inline void *phys_to_virt(phys_addr_t x)
 #define _virt_addr_valid(kaddr)        pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
 #else
 #define __virt_to_pgoff(kaddr) (((u64)(kaddr) & ~PAGE_OFFSET) / PAGE_SIZE * sizeof(struct page))
-#define __page_to_voff(kaddr)  (((u64)(page) & ~VMEMMAP_START) * PAGE_SIZE / sizeof(struct page))
+#define __page_to_voff(page)   (((u64)(page) & ~VMEMMAP_START) * PAGE_SIZE / sizeof(struct page))
 
 #define page_to_virt(page)     ((void *)((__page_to_voff(page)) | PAGE_OFFSET))
 #define virt_to_page(vaddr)    ((struct page *)((__virt_to_pgoff(vaddr)) | VMEMMAP_START))
index e12af6754634b3d2aa031ae23ce25228dc766cfb..06ff7fd9e81feab27bb67f1a4af971ddc0ebf4cc 100644 (file)
@@ -17,6 +17,7 @@
 #define __ASM_MODULE_H
 
 #include <asm-generic/module.h>
+#include <asm/memory.h>
 
 #define MODULE_ARCH_VERMAGIC   "aarch64"
 
@@ -32,6 +33,10 @@ u64 module_emit_plt_entry(struct module *mod, const Elf64_Rela *rela,
                          Elf64_Sym *sym);
 
 #ifdef CONFIG_RANDOMIZE_BASE
+#ifdef CONFIG_MODVERSIONS
+#define ARCH_RELOCATES_KCRCTAB
+#define reloc_start            (kimage_vaddr - KIMAGE_VADDR)
+#endif
 extern u64 module_alloc_base;
 #else
 #define module_alloc_base      ((u64)_etext - MODULES_VSIZE)
index 2fee2f59288c94d70814771ed06fced11ede369d..5394c8405e6604bf612fd0c639c1f15a30d0f7d9 100644 (file)
@@ -44,48 +44,44 @@ static inline unsigned long __percpu_##op(void *ptr,                        \
                                                                        \
        switch (size) {                                                 \
        case 1:                                                         \
-               do {                                                    \
-                       asm ("//__per_cpu_" #op "_1\n"                  \
-                       "ldxrb    %w[ret], %[ptr]\n"                    \
+               asm ("//__per_cpu_" #op "_1\n"                          \
+               "1:     ldxrb     %w[ret], %[ptr]\n"                    \
                        #asm_op " %w[ret], %w[ret], %w[val]\n"          \
-                       "stxrb    %w[loop], %w[ret], %[ptr]\n"          \
-                       : [loop] "=&r" (loop), [ret] "=&r" (ret),       \
-                         [ptr] "+Q"(*(u8 *)ptr)                        \
-                       : [val] "Ir" (val));                            \
-               } while (loop);                                         \
+               "       stxrb     %w[loop], %w[ret], %[ptr]\n"          \
+               "       cbnz      %w[loop], 1b"                         \
+               : [loop] "=&r" (loop), [ret] "=&r" (ret),               \
+                 [ptr] "+Q"(*(u8 *)ptr)                                \
+               : [val] "Ir" (val));                                    \
                break;                                                  \
        case 2:                                                         \
-               do {                                                    \
-                       asm ("//__per_cpu_" #op "_2\n"                  \
-                       "ldxrh    %w[ret], %[ptr]\n"                    \
+               asm ("//__per_cpu_" #op "_2\n"                          \
+               "1:     ldxrh     %w[ret], %[ptr]\n"                    \
                        #asm_op " %w[ret], %w[ret], %w[val]\n"          \
-                       "stxrh    %w[loop], %w[ret], %[ptr]\n"          \
-                       : [loop] "=&r" (loop), [ret] "=&r" (ret),       \
-                         [ptr]  "+Q"(*(u16 *)ptr)                      \
-                       : [val] "Ir" (val));                            \
-               } while (loop);                                         \
+               "       stxrh     %w[loop], %w[ret], %[ptr]\n"          \
+               "       cbnz      %w[loop], 1b"                         \
+               : [loop] "=&r" (loop), [ret] "=&r" (ret),               \
+                 [ptr]  "+Q"(*(u16 *)ptr)                              \
+               : [val] "Ir" (val));                                    \
                break;                                                  \
        case 4:                                                         \
-               do {                                                    \
-                       asm ("//__per_cpu_" #op "_4\n"                  \
-                       "ldxr     %w[ret], %[ptr]\n"                    \
+               asm ("//__per_cpu_" #op "_4\n"                          \
+               "1:     ldxr      %w[ret], %[ptr]\n"                    \
                        #asm_op " %w[ret], %w[ret], %w[val]\n"          \
-                       "stxr     %w[loop], %w[ret], %[ptr]\n"          \
-                       : [loop] "=&r" (loop), [ret] "=&r" (ret),       \
-                         [ptr] "+Q"(*(u32 *)ptr)                       \
-                       : [val] "Ir" (val));                            \
-               } while (loop);                                         \
+               "       stxr      %w[loop], %w[ret], %[ptr]\n"          \
+               "       cbnz      %w[loop], 1b"                         \
+               : [loop] "=&r" (loop), [ret] "=&r" (ret),               \
+                 [ptr] "+Q"(*(u32 *)ptr)                               \
+               : [val] "Ir" (val));                                    \
                break;                                                  \
        case 8:                                                         \
-               do {                                                    \
-                       asm ("//__per_cpu_" #op "_8\n"                  \
-                       "ldxr     %[ret], %[ptr]\n"                     \
+               asm ("//__per_cpu_" #op "_8\n"                          \
+               "1:     ldxr      %[ret], %[ptr]\n"                     \
                        #asm_op " %[ret], %[ret], %[val]\n"             \
-                       "stxr     %w[loop], %[ret], %[ptr]\n"           \
-                       : [loop] "=&r" (loop), [ret] "=&r" (ret),       \
-                         [ptr] "+Q"(*(u64 *)ptr)                       \
-                       : [val] "Ir" (val));                            \
-               } while (loop);                                         \
+               "       stxr      %w[loop], %[ret], %[ptr]\n"           \
+               "       cbnz      %w[loop], 1b"                         \
+               : [loop] "=&r" (loop), [ret] "=&r" (ret),               \
+                 [ptr] "+Q"(*(u64 *)ptr)                               \
+               : [val] "Ir" (val));                                    \
                break;                                                  \
        default:                                                        \
                BUILD_BUG();                                            \
@@ -150,44 +146,40 @@ static inline unsigned long __percpu_xchg(void *ptr, unsigned long val,
 
        switch (size) {
        case 1:
-               do {
-                       asm ("//__percpu_xchg_1\n"
-                       "ldxrb %w[ret], %[ptr]\n"
-                       "stxrb %w[loop], %w[val], %[ptr]\n"
-                       : [loop] "=&r"(loop), [ret] "=&r"(ret),
-                         [ptr] "+Q"(*(u8 *)ptr)
-                       : [val] "r" (val));
-               } while (loop);
+               asm ("//__percpu_xchg_1\n"
+               "1:     ldxrb   %w[ret], %[ptr]\n"
+               "       stxrb   %w[loop], %w[val], %[ptr]\n"
+               "       cbnz    %w[loop], 1b"
+               : [loop] "=&r"(loop), [ret] "=&r"(ret),
+                 [ptr] "+Q"(*(u8 *)ptr)
+               : [val] "r" (val));
                break;
        case 2:
-               do {
-                       asm ("//__percpu_xchg_2\n"
-                       "ldxrh %w[ret], %[ptr]\n"
-                       "stxrh %w[loop], %w[val], %[ptr]\n"
-                       : [loop] "=&r"(loop), [ret] "=&r"(ret),
-                         [ptr] "+Q"(*(u16 *)ptr)
-                       : [val] "r" (val));
-               } while (loop);
+               asm ("//__percpu_xchg_2\n"
+               "1:     ldxrh   %w[ret], %[ptr]\n"
+               "       stxrh   %w[loop], %w[val], %[ptr]\n"
+               "       cbnz    %w[loop], 1b"
+               : [loop] "=&r"(loop), [ret] "=&r"(ret),
+                 [ptr] "+Q"(*(u16 *)ptr)
+               : [val] "r" (val));
                break;
        case 4:
-               do {
-                       asm ("//__percpu_xchg_4\n"
-                       "ldxr %w[ret], %[ptr]\n"
-                       "stxr %w[loop], %w[val], %[ptr]\n"
-                       : [loop] "=&r"(loop), [ret] "=&r"(ret),
-                         [ptr] "+Q"(*(u32 *)ptr)
-                       : [val] "r" (val));
-               } while (loop);
+               asm ("//__percpu_xchg_4\n"
+               "1:     ldxr    %w[ret], %[ptr]\n"
+               "       stxr    %w[loop], %w[val], %[ptr]\n"
+               "       cbnz    %w[loop], 1b"
+               : [loop] "=&r"(loop), [ret] "=&r"(ret),
+                 [ptr] "+Q"(*(u32 *)ptr)
+               : [val] "r" (val));
                break;
        case 8:
-               do {
-                       asm ("//__percpu_xchg_8\n"
-                       "ldxr %[ret], %[ptr]\n"
-                       "stxr %w[loop], %[val], %[ptr]\n"
-                       : [loop] "=&r"(loop), [ret] "=&r"(ret),
-                         [ptr] "+Q"(*(u64 *)ptr)
-                       : [val] "r" (val));
-               } while (loop);
+               asm ("//__percpu_xchg_8\n"
+               "1:     ldxr    %[ret], %[ptr]\n"
+               "       stxr    %w[loop], %[val], %[ptr]\n"
+               "       cbnz    %w[loop], 1b"
+               : [loop] "=&r"(loop), [ret] "=&r"(ret),
+                 [ptr] "+Q"(*(u64 *)ptr)
+               : [val] "r" (val));
                break;
        default:
                BUILD_BUG();
index 2065f46fa7407deb4d43e7dc8783ca8667759500..38b6a2b49d6895dbd7904a27792520514d445044 100644 (file)
 #define        ARMV8_PMU_EVTYPE_MASK   0xc800ffff      /* Mask for writable bits */
 #define        ARMV8_PMU_EVTYPE_EVENT  0xffff          /* Mask for EVENT bits */
 
-#define ARMV8_PMU_EVTYPE_EVENT_SW_INCR 0       /* Software increment event */
+/*
+ * PMUv3 event types: required events
+ */
+#define ARMV8_PMUV3_PERFCTR_SW_INCR                            0x00
+#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL                   0x03
+#define ARMV8_PMUV3_PERFCTR_L1D_CACHE                          0x04
+#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED                                0x10
+#define ARMV8_PMUV3_PERFCTR_CPU_CYCLES                         0x11
+#define ARMV8_PMUV3_PERFCTR_BR_PRED                            0x12
 
 /*
  * Event filters for PMUv3
index df2e53d3a96959430568e8c23509f28cbf8c9142..60e34824e18c96b06c02930961c8ce51b82a90e2 100644 (file)
@@ -188,8 +188,8 @@ static inline void spin_lock_prefetch(const void *ptr)
 
 #endif
 
-void cpu_enable_pan(void *__unused);
-void cpu_enable_uao(void *__unused);
-void cpu_enable_cache_maint_trap(void *__unused);
+int cpu_enable_pan(void *__unused);
+int cpu_enable_uao(void *__unused);
+int cpu_enable_cache_maint_trap(void *__unused);
 
 #endif /* __ASM_PROCESSOR_H */
index e8d46e8e60791a8e3a3b785563e8949b8c0007ec..6c80b3699cb8a18c076634f710e031a5b3505647 100644 (file)
@@ -286,7 +286,7 @@ asm(
 
 #define write_sysreg_s(v, r) do {                                      \
        u64 __val = (u64)v;                                             \
-       asm volatile("msr_s " __stringify(r) ", %0" : : "rZ" (__val));  \
+       asm volatile("msr_s " __stringify(r) ", %x0" : : "rZ" (__val)); \
 } while (0)
 
 static inline void config_sctlr_el1(u32 clear, u32 set)
index c47257c91b77e3d6516000c0c8bec5705b97b6dc..55d0adbf65098a78241d45038d1a57f642a7992e 100644 (file)
@@ -21,6 +21,7 @@
 /*
  * User space memory access functions
  */
+#include <linux/bitops.h>
 #include <linux/kasan-checks.h>
 #include <linux/string.h>
 #include <linux/thread_info.h>
@@ -102,6 +103,13 @@ static inline void set_fs(mm_segment_t fs)
        flag;                                                           \
 })
 
+/*
+ * When dealing with data aborts or instruction traps we may end up with
+ * a tagged userland pointer. Clear the tag to get a sane pointer to pass
+ * on to access_ok(), for instance.
+ */
+#define untagged_addr(addr)            sign_extend64(addr, 55)
+
 #define access_ok(type, addr, size)    __range_ok(addr, size)
 #define user_addr_max                  get_fs
 
@@ -278,14 +286,16 @@ static inline unsigned long __must_check __copy_to_user(void __user *to, const v
 
 static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n)
 {
+       unsigned long res = n;
        kasan_check_write(to, n);
 
        if (access_ok(VERIFY_READ, from, n)) {
                check_object_size(to, n, false);
-               n = __arch_copy_from_user(to, from, n);
-       } else /* security hole - plug it */
-               memset(to, 0, n);
-       return n;
+               res = __arch_copy_from_user(to, from, n);
+       }
+       if (unlikely(res))
+               memset(to + (n - res), 0, res);
+       return res;
 }
 
 static inline unsigned long __must_check copy_to_user(void __user *to, const void *from, unsigned long n)
index 42ffdb54e162d64164ab9f515d1ce21a379fb3d7..b0988bb1bf648e0e322d1112880fedab706c6f32 100644 (file)
@@ -280,35 +280,43 @@ static void __init register_insn_emulation_sysctl(struct ctl_table *table)
 /*
  * Error-checking SWP macros implemented using ldxr{b}/stxr{b}
  */
-#define __user_swpX_asm(data, addr, res, temp, B)              \
+
+/* Arbitrary constant to ensure forward-progress of the LL/SC loop */
+#define __SWP_LL_SC_LOOPS      4
+
+#define __user_swpX_asm(data, addr, res, temp, temp2, B)       \
        __asm__ __volatile__(                                   \
+       "       mov             %w3, %w7\n"                     \
        ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN,    \
                    CONFIG_ARM64_PAN)                           \
-       "0:     ldxr"B"         %w2, [%3]\n"                    \
-       "1:     stxr"B"         %w0, %w1, [%3]\n"               \
+       "0:     ldxr"B"         %w2, [%4]\n"                    \
+       "1:     stxr"B"         %w0, %w1, [%4]\n"               \
        "       cbz             %w0, 2f\n"                      \
-       "       mov             %w0, %w4\n"                     \
+       "       sub             %w3, %w3, #1\n"                 \
+       "       cbnz            %w3, 0b\n"                      \
+       "       mov             %w0, %w5\n"                     \
        "       b               3f\n"                           \
        "2:\n"                                                  \
        "       mov             %w1, %w2\n"                     \
        "3:\n"                                                  \
        "       .pushsection     .fixup,\"ax\"\n"               \
        "       .align          2\n"                            \
-       "4:     mov             %w0, %w5\n"                     \
+       "4:     mov             %w0, %w6\n"                     \
        "       b               3b\n"                           \
        "       .popsection"                                    \
        _ASM_EXTABLE(0b, 4b)                                    \
        _ASM_EXTABLE(1b, 4b)                                    \
        ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN,    \
                CONFIG_ARM64_PAN)                               \
-       : "=&r" (res), "+r" (data), "=&r" (temp)                \
-       : "r" (addr), "i" (-EAGAIN), "i" (-EFAULT)              \
+       : "=&r" (res), "+r" (data), "=&r" (temp), "=&r" (temp2) \
+       : "r" (addr), "i" (-EAGAIN), "i" (-EFAULT),             \
+         "i" (__SWP_LL_SC_LOOPS)                               \
        : "memory")
 
-#define __user_swp_asm(data, addr, res, temp) \
-       __user_swpX_asm(data, addr, res, temp, "")
-#define __user_swpb_asm(data, addr, res, temp) \
-       __user_swpX_asm(data, addr, res, temp, "b")
+#define __user_swp_asm(data, addr, res, temp, temp2) \
+       __user_swpX_asm(data, addr, res, temp, temp2, "")
+#define __user_swpb_asm(data, addr, res, temp, temp2) \
+       __user_swpX_asm(data, addr, res, temp, temp2, "b")
 
 /*
  * Bit 22 of the instruction encoding distinguishes between
@@ -328,12 +336,12 @@ static int emulate_swpX(unsigned int address, unsigned int *data,
        }
 
        while (1) {
-               unsigned long temp;
+               unsigned long temp, temp2;
 
                if (type == TYPE_SWPB)
-                       __user_swpb_asm(*data, address, res, temp);
+                       __user_swpb_asm(*data, address, res, temp, temp2);
                else
-                       __user_swp_asm(*data, address, res, temp);
+                       __user_swp_asm(*data, address, res, temp, temp2);
 
                if (likely(res != -EAGAIN) || signal_pending(current))
                        break;
index 0150394f4cabf2f34b27c88ee6575b0a9b7b4489..b75e917aac464290b523e1b3cc8cd7822364eeb7 100644 (file)
@@ -39,10 +39,11 @@ has_mismatched_cache_line_size(const struct arm64_cpu_capabilities *entry,
                (arm64_ftr_reg_ctrel0.sys_val & arm64_ftr_reg_ctrel0.strict_mask);
 }
 
-static void cpu_enable_trap_ctr_access(void *__unused)
+static int cpu_enable_trap_ctr_access(void *__unused)
 {
        /* Clear SCTLR_EL1.UCT */
        config_sctlr_el1(SCTLR_EL1_UCT, 0);
+       return 0;
 }
 
 #define MIDR_RANGE(model, min, max) \
index d577f263cc4aa46057e3b1ac210e23038e7d0e01..c02504ea304b701e1cc077388380079ac60f37d2 100644 (file)
@@ -19,7 +19,9 @@
 #define pr_fmt(fmt) "CPU features: " fmt
 
 #include <linux/bsearch.h>
+#include <linux/cpumask.h>
 #include <linux/sort.h>
+#include <linux/stop_machine.h>
 #include <linux/types.h>
 #include <asm/cpu.h>
 #include <asm/cpufeature.h>
@@ -941,7 +943,13 @@ void __init enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps)
 {
        for (; caps->matches; caps++)
                if (caps->enable && cpus_have_cap(caps->capability))
-                       on_each_cpu(caps->enable, NULL, true);
+                       /*
+                        * Use stop_machine() as it schedules the work allowing
+                        * us to modify PSTATE, instead of on_each_cpu() which
+                        * uses an IPI, giving us a PSTATE that disappears when
+                        * we return.
+                        */
+                       stop_machine(caps->enable, NULL, cpu_online_mask);
 }
 
 /*
index 427f6d3f084c30aeb35908155aae3af9aacadf2e..332e33193ccf1575727dfc8883644a5cfabd0e08 100644 (file)
@@ -586,8 +586,9 @@ CPU_LE(     movk    x0, #0x30d0, lsl #16    )       // Clear EE and E0E on LE systems
        b.lt    4f                              // Skip if no PMU present
        mrs     x0, pmcr_el0                    // Disable debug access traps
        ubfx    x0, x0, #11, #5                 // to EL2 and allow access to
-       msr     mdcr_el2, x0                    // all PMU counters from EL1
 4:
+       csel    x0, xzr, x0, lt                 // all PMU counters from EL1
+       msr     mdcr_el2, x0                    // (if they exist)
 
        /* Stage-2 translation */
        msr     vttbr_el2, xzr
index a9310a69fffd4d453b760a15d16cc8e9cd1a90e8..57ae9d9ed9bb666e5bd20698f9dce2d1f25d731f 100644 (file)
 
 /*
  * ARMv8 PMUv3 Performance Events handling code.
- * Common event types.
+ * Common event types (some are defined in asm/perf_event.h).
  */
 
-/* Required events. */
-#define ARMV8_PMUV3_PERFCTR_SW_INCR                            0x00
-#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL                   0x03
-#define ARMV8_PMUV3_PERFCTR_L1D_CACHE                          0x04
-#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED                                0x10
-#define ARMV8_PMUV3_PERFCTR_CPU_CYCLES                         0x11
-#define ARMV8_PMUV3_PERFCTR_BR_PRED                            0x12
-
 /* At least one of the following is required. */
 #define ARMV8_PMUV3_PERFCTR_INST_RETIRED                       0x08
 #define ARMV8_PMUV3_PERFCTR_INST_SPEC                          0x1B
index 27b2f1387df40b61b4aa059be5650d329964da6b..01753cd7d3f01d3551568d64612bd5a14843ac18 100644 (file)
@@ -49,6 +49,7 @@
 #include <asm/alternative.h>
 #include <asm/compat.h>
 #include <asm/cacheflush.h>
+#include <asm/exec.h>
 #include <asm/fpsimd.h>
 #include <asm/mmu_context.h>
 #include <asm/processor.h>
@@ -186,10 +187,19 @@ void __show_regs(struct pt_regs *regs)
        printk("pc : [<%016llx>] lr : [<%016llx>] pstate: %08llx\n",
               regs->pc, lr, regs->pstate);
        printk("sp : %016llx\n", sp);
-       for (i = top_reg; i >= 0; i--) {
+
+       i = top_reg;
+
+       while (i >= 0) {
                printk("x%-2d: %016llx ", i, regs->regs[i]);
-               if (i % 2 == 0)
-                       printk("\n");
+               i--;
+
+               if (i % 2 == 0) {
+                       pr_cont("x%-2d: %016llx ", i, regs->regs[i]);
+                       i--;
+               }
+
+               pr_cont("\n");
        }
        printk("\n");
 }
@@ -301,7 +311,7 @@ static void tls_thread_switch(struct task_struct *next)
 }
 
 /* Restore the UAO state depending on next's addr_limit */
-static void uao_thread_switch(struct task_struct *next)
+void uao_thread_switch(struct task_struct *next)
 {
        if (IS_ENABLED(CONFIG_ARM64_UAO)) {
                if (task_thread_info(next)->addr_limit == KERNEL_DS)
index b8799e7c79de51dac71c5f7485709177b7cd3b5d..1bec41b5fda3917b2ed7583663a239f25c6126bf 100644 (file)
@@ -135,7 +135,7 @@ ENTRY(_cpu_resume)
 
 #ifdef CONFIG_KASAN
        mov     x0, sp
-       bl      kasan_unpoison_remaining_stack
+       bl      kasan_unpoison_task_stack_below
 #endif
 
        ldp     x19, x20, [x29, #16]
index d3f151cfd4a1f800a58511ca0a8d10d37001b601..8507703dabe4a4cb521527456f14ba26855d5b43 100644 (file)
@@ -544,6 +544,7 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor)
                        return;
                }
                bootcpu_valid = true;
+               early_map_cpu_to_node(0, acpi_numa_get_nid(0, hwid));
                return;
        }
 
index ad734142070dcd143f3a7559ff50a044e93e95ac..bb0cd787a9d31dc4762d3a98257b3e11d8afe6f0 100644 (file)
@@ -1,8 +1,11 @@
 #include <linux/ftrace.h>
 #include <linux/percpu.h>
 #include <linux/slab.h>
+#include <asm/alternative.h>
 #include <asm/cacheflush.h>
+#include <asm/cpufeature.h>
 #include <asm/debug-monitors.h>
+#include <asm/exec.h>
 #include <asm/pgtable.h>
 #include <asm/memory.h>
 #include <asm/mmu_context.h>
@@ -49,6 +52,14 @@ void notrace __cpu_suspend_exit(void)
         */
        set_my_cpu_offset(per_cpu_offset(cpu));
 
+       /*
+        * PSTATE was not saved over suspend/resume, re-enable any detected
+        * features that might not have been set correctly.
+        */
+       asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN,
+                       CONFIG_ARM64_PAN));
+       uao_thread_switch(current);
+
        /*
         * Restore HW breakpoint registers to sane values
         * before debug exceptions are possibly reenabled
index 5ff020f8fb7f65cdec1d88213f0f185ef157ef33..c9986b3e0a96f9ddd0d79ad52e0d5de7841172ec 100644 (file)
@@ -428,24 +428,28 @@ asmlinkage void __exception do_undefinstr(struct pt_regs *regs)
        force_signal_inject(SIGILL, ILL_ILLOPC, regs, 0);
 }
 
-void cpu_enable_cache_maint_trap(void *__unused)
+int cpu_enable_cache_maint_trap(void *__unused)
 {
        config_sctlr_el1(SCTLR_EL1_UCI, 0);
+       return 0;
 }
 
 #define __user_cache_maint(insn, address, res)                 \
-       asm volatile (                                          \
-               "1:     " insn ", %1\n"                         \
-               "       mov     %w0, #0\n"                      \
-               "2:\n"                                          \
-               "       .pushsection .fixup,\"ax\"\n"           \
-               "       .align  2\n"                            \
-               "3:     mov     %w0, %w2\n"                     \
-               "       b       2b\n"                           \
-               "       .popsection\n"                          \
-               _ASM_EXTABLE(1b, 3b)                            \
-               : "=r" (res)                                    \
-               : "r" (address), "i" (-EFAULT) )
+       if (untagged_addr(address) >= user_addr_max())          \
+               res = -EFAULT;                                  \
+       else                                                    \
+               asm volatile (                                  \
+                       "1:     " insn ", %1\n"                 \
+                       "       mov     %w0, #0\n"              \
+                       "2:\n"                                  \
+                       "       .pushsection .fixup,\"ax\"\n"   \
+                       "       .align  2\n"                    \
+                       "3:     mov     %w0, %w2\n"             \
+                       "       b       2b\n"                   \
+                       "       .popsection\n"                  \
+                       _ASM_EXTABLE(1b, 3b)                    \
+                       : "=r" (res)                            \
+                       : "r" (address), "i" (-EFAULT) )
 
 static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs)
 {
index 9cc0ea784ae60a0a5086f1c5f6d129cd7d360f13..88e2f2b938f070c7570a8d76ae9ca348b1fd71e9 100644 (file)
@@ -64,6 +64,21 @@ void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm)
        write_sysreg(0, vttbr_el2);
 }
 
+void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu)
+{
+       struct kvm *kvm = kern_hyp_va(kern_hyp_va(vcpu)->kvm);
+
+       /* Switch to requested VMID */
+       write_sysreg(kvm->arch.vttbr, vttbr_el2);
+       isb();
+
+       asm volatile("tlbi vmalle1" : : );
+       dsb(nsh);
+       isb();
+
+       write_sysreg(0, vttbr_el2);
+}
+
 void __hyp_text __kvm_flush_vm_context(void)
 {
        dsb(ishst);
index f302fdb3a030b452286ed5e50e4e2ecb9c0e54fc..87e7e6608cd8a31e6913be8134b90e443df314cb 100644 (file)
@@ -597,8 +597,14 @@ static bool access_pmu_evcntr(struct kvm_vcpu *vcpu,
 
                        idx = ARMV8_PMU_CYCLE_IDX;
                } else {
-                       BUG();
+                       return false;
                }
+       } else if (r->CRn == 0 && r->CRm == 9) {
+               /* PMCCNTR */
+               if (pmu_access_event_counter_el0_disabled(vcpu))
+                       return false;
+
+               idx = ARMV8_PMU_CYCLE_IDX;
        } else if (r->CRn == 14 && (r->CRm & 12) == 8) {
                /* PMEVCNTRn_EL0 */
                if (pmu_access_event_counter_el0_disabled(vcpu))
@@ -606,7 +612,7 @@ static bool access_pmu_evcntr(struct kvm_vcpu *vcpu,
 
                idx = ((r->CRm & 3) << 3) | (r->Op2 & 7);
        } else {
-               BUG();
+               return false;
        }
 
        if (!pmu_counter_idx_valid(vcpu, idx))
index 0b90497d4424c59d0a9ce2dcf5642012f452d3c8..4fd67ea03bb054d44e579426def0051429579920 100644 (file)
@@ -79,11 +79,6 @@ ENDPROC(__arch_copy_from_user)
 
        .section .fixup,"ax"
        .align  2
-9998:
-       sub     x0, end, dst
-9999:
-       strb    wzr, [dst], #1                  // zero remaining buffer space
-       cmp     dst, end
-       b.lo    9999b
+9998:  sub     x0, end, dst                    // bytes not copied
        ret
        .previous
index 53d9159662fe4c2cc12c810539782b5416f58bfb..0f87883748153bb3ab7cfedf090faf20e614393c 100644 (file)
@@ -29,7 +29,9 @@
 #include <linux/sched.h>
 #include <linux/highmem.h>
 #include <linux/perf_event.h>
+#include <linux/preempt.h>
 
+#include <asm/bug.h>
 #include <asm/cpufeature.h>
 #include <asm/exception.h>
 #include <asm/debug-monitors.h>
@@ -670,9 +672,17 @@ asmlinkage int __exception do_debug_exception(unsigned long addr,
 NOKPROBE_SYMBOL(do_debug_exception);
 
 #ifdef CONFIG_ARM64_PAN
-void cpu_enable_pan(void *__unused)
+int cpu_enable_pan(void *__unused)
 {
+       /*
+        * We modify PSTATE. This won't work from irq context as the PSTATE
+        * is discarded once we return from the exception.
+        */
+       WARN_ON_ONCE(in_interrupt());
+
        config_sctlr_el1(SCTLR_EL1_SPAN, 0);
+       asm(SET_PSTATE_PAN(1));
+       return 0;
 }
 #endif /* CONFIG_ARM64_PAN */
 
@@ -683,8 +693,9 @@ void cpu_enable_pan(void *__unused)
  * We need to enable the feature at runtime (instead of adding it to
  * PSR_MODE_EL1h) as the feature may not be implemented by the cpu.
  */
-void cpu_enable_uao(void *__unused)
+int cpu_enable_uao(void *__unused)
 {
        asm(SET_PSTATE_UAO(1));
+       return 0;
 }
 #endif /* CONFIG_ARM64_UAO */
index 21c489bdeb4ee03d0a126070452c92b1aca6a1b6..212c4d1e2f26df7f291270fb461abac912ce7161 100644 (file)
@@ -421,35 +421,35 @@ void __init mem_init(void)
 
        pr_notice("Virtual kernel memory layout:\n");
 #ifdef CONFIG_KASAN
-       pr_cont("    kasan   : 0x%16lx - 0x%16lx   (%6ld GB)\n",
+       pr_notice("    kasan   : 0x%16lx - 0x%16lx   (%6ld GB)\n",
                MLG(KASAN_SHADOW_START, KASAN_SHADOW_END));
 #endif
-       pr_cont("    modules : 0x%16lx - 0x%16lx   (%6ld MB)\n",
+       pr_notice("    modules : 0x%16lx - 0x%16lx   (%6ld MB)\n",
                MLM(MODULES_VADDR, MODULES_END));
-       pr_cont("    vmalloc : 0x%16lx - 0x%16lx   (%6ld GB)\n",
+       pr_notice("    vmalloc : 0x%16lx - 0x%16lx   (%6ld GB)\n",
                MLG(VMALLOC_START, VMALLOC_END));
-       pr_cont("      .text : 0x%p" " - 0x%p" "   (%6ld KB)\n",
+       pr_notice("      .text : 0x%p" " - 0x%p" "   (%6ld KB)\n",
                MLK_ROUNDUP(_text, _etext));
-       pr_cont("    .rodata : 0x%p" " - 0x%p" "   (%6ld KB)\n",
+       pr_notice("    .rodata : 0x%p" " - 0x%p" "   (%6ld KB)\n",
                MLK_ROUNDUP(__start_rodata, __init_begin));
-       pr_cont("      .init : 0x%p" " - 0x%p" "   (%6ld KB)\n",
+       pr_notice("      .init : 0x%p" " - 0x%p" "   (%6ld KB)\n",
                MLK_ROUNDUP(__init_begin, __init_end));
-       pr_cont("      .data : 0x%p" " - 0x%p" "   (%6ld KB)\n",
+       pr_notice("      .data : 0x%p" " - 0x%p" "   (%6ld KB)\n",
                MLK_ROUNDUP(_sdata, _edata));
-       pr_cont("       .bss : 0x%p" " - 0x%p" "   (%6ld KB)\n",
+       pr_notice("       .bss : 0x%p" " - 0x%p" "   (%6ld KB)\n",
                MLK_ROUNDUP(__bss_start, __bss_stop));
-       pr_cont("    fixed   : 0x%16lx - 0x%16lx   (%6ld KB)\n",
+       pr_notice("    fixed   : 0x%16lx - 0x%16lx   (%6ld KB)\n",
                MLK(FIXADDR_START, FIXADDR_TOP));
-       pr_cont("    PCI I/O : 0x%16lx - 0x%16lx   (%6ld MB)\n",
+       pr_notice("    PCI I/O : 0x%16lx - 0x%16lx   (%6ld MB)\n",
                MLM(PCI_IO_START, PCI_IO_END));
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
-       pr_cont("    vmemmap : 0x%16lx - 0x%16lx   (%6ld GB maximum)\n",
+       pr_notice("    vmemmap : 0x%16lx - 0x%16lx   (%6ld GB maximum)\n",
                MLG(VMEMMAP_START, VMEMMAP_START + VMEMMAP_SIZE));
-       pr_cont("              0x%16lx - 0x%16lx   (%6ld MB actual)\n",
+       pr_notice("              0x%16lx - 0x%16lx   (%6ld MB actual)\n",
                MLM((unsigned long)phys_to_page(memblock_start_of_DRAM()),
                    (unsigned long)virt_to_page(high_memory)));
 #endif
-       pr_cont("    memory  : 0x%16lx - 0x%16lx   (%6ld MB)\n",
+       pr_notice("    memory  : 0x%16lx - 0x%16lx   (%6ld MB)\n",
                MLM(__phys_to_virt(memblock_start_of_DRAM()),
                    (unsigned long)high_memory));
 
index 778a985c8a70761d8bf3337c073b76a09e50d889..4b32168cf91a0e3b99e1d899960d47fddf38ba06 100644 (file)
@@ -147,7 +147,7 @@ static int __init early_cpu_to_node(int cpu)
 
 static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
 {
-       return node_distance(from, to);
+       return node_distance(early_cpu_to_node(from), early_cpu_to_node(to));
 }
 
 static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size,
@@ -223,8 +223,11 @@ static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn)
        void *nd;
        int tnid;
 
-       pr_info("Initmem setup node %d [mem %#010Lx-%#010Lx]\n",
-               nid, start_pfn << PAGE_SHIFT, (end_pfn << PAGE_SHIFT) - 1);
+       if (start_pfn < end_pfn)
+               pr_info("Initmem setup node %d [mem %#010Lx-%#010Lx]\n", nid,
+                       start_pfn << PAGE_SHIFT, (end_pfn << PAGE_SHIFT) - 1);
+       else
+               pr_info("Initmem setup node %d [<memory-less node>]\n", nid);
 
        nd_pa = memblock_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid);
        nd = __va(nd_pa);
index 0a2a70096d8b2dc7345b6e1ca85c68b3cdd39b9e..0eff88aa6d6ae6626ad411876aeca9ee41887ca4 100644 (file)
@@ -163,18 +163,29 @@ static inline int bad_user_access_length(void)
                : "a" (__ptr(ptr)));            \
 })
 
-#define __copy_from_user(to, from, n) copy_from_user(to, from, n)
-#define __copy_to_user(to, from, n) copy_to_user(to, from, n)
 #define __copy_to_user_inatomic __copy_to_user
 #define __copy_from_user_inatomic __copy_from_user
 
+static inline unsigned long __must_check
+__copy_from_user(void *to, const void __user *from, unsigned long n)
+{
+       memcpy(to, (const void __force *)from, n);
+       return 0;
+}
+
+static inline unsigned long __must_check
+__copy_to_user(void __user *to, const void *from, unsigned long n)
+{
+       memcpy((void __force *)to, from, n);
+       SSYNC();
+       return 0;
+}
+
 static inline unsigned long __must_check
 copy_from_user(void *to, const void __user *from, unsigned long n)
 {
-       if (likely(access_ok(VERIFY_READ, from, n))) {
-               memcpy(to, (const void __force *)from, n);
-               return 0;
-       }
+       if (likely(access_ok(VERIFY_READ, from, n)))
+               return __copy_from_user(to, from, n);
        memset(to, 0, n);
        return n;
 }
@@ -182,12 +193,9 @@ copy_from_user(void *to, const void __user *from, unsigned long n)
 static inline unsigned long __must_check
 copy_to_user(void __user *to, const void *from, unsigned long n)
 {
-       if (access_ok(VERIFY_WRITE, to, n))
-               memcpy((void __force *)to, from, n);
-       else
-               return n;
-       SSYNC();
-       return 0;
+       if (likely(access_ok(VERIFY_WRITE, to, n)))
+               return __copy_to_user(to, from, n);
+       return n;
 }
 
 /*
index 8b8fe671b1a6dbf347dd4d31a7f272ef87154bcb..8d79286ee4e878044b02c963f5fb3c6deaaafb19 100644 (file)
@@ -271,7 +271,7 @@ long arch_ptrace(struct task_struct *child, long request,
                        case BFIN_MEM_ACCESS_CORE:
                        case BFIN_MEM_ACCESS_CORE_ONLY:
                                copied = access_process_vm(child, addr, &tmp,
-                                                          to_copy, 0);
+                                                          to_copy, FOLL_FORCE);
                                if (copied)
                                        break;
 
@@ -324,7 +324,8 @@ long arch_ptrace(struct task_struct *child, long request,
                        case BFIN_MEM_ACCESS_CORE:
                        case BFIN_MEM_ACCESS_CORE_ONLY:
                                copied = access_process_vm(child, addr, &data,
-                                                          to_copy, 1);
+                                                          to_copy,
+                                                          FOLL_FORCE | FOLL_WRITE);
                                break;
                        case BFIN_MEM_ACCESS_DMA:
                                if (safe_dma_memcpy(paddr, &data, to_copy))
index b5698c876fccd91c3960439753d24510ce008984..0068fd411a8473707efb4a1718e33edee0fdbb50 100644 (file)
@@ -2722,7 +2722,6 @@ static int cryptocop_ioctl_process(struct inode *inode, struct file *filp, unsig
        err = get_user_pages((unsigned long int)(oper.indata + prev_ix),
                             noinpages,
                             0,  /* read access only for in data */
-                            0, /* no force */
                             inpages,
                             NULL);
 
@@ -2736,8 +2735,7 @@ static int cryptocop_ioctl_process(struct inode *inode, struct file *filp, unsig
        if (oper.do_cipher){
                err = get_user_pages((unsigned long int)oper.cipher_outdata,
                                     nooutpages,
-                                    1, /* write access for out data */
-                                    0, /* no force */
+                                    FOLL_WRITE, /* write access for out data */
                                     outpages,
                                     NULL);
                up_read(&current->mm->mmap_sem);
@@ -3151,7 +3149,7 @@ static void print_dma_descriptors(struct cryptocop_int_operation *iop)
        printk("print_dma_descriptors start\n");
 
        printk("iop:\n");
-       printk("\tsid: 0x%lld\n", iop->sid);
+       printk("\tsid: 0x%llx\n", iop->sid);
 
        printk("\tcdesc_out: 0x%p\n", iop->cdesc_out);
        printk("\tcdesc_in: 0x%p\n", iop->cdesc_in);
index f085229cf870bc306b95df2dee4eadd9ebdd4b8b..f0df654ac6fc5ca53ffd6d6951817e92f492e0be 100644 (file)
@@ -147,7 +147,7 @@ long arch_ptrace(struct task_struct *child, long request,
                                /* The trampoline page is globally mapped, no page table to traverse.*/
                                tmp = *(unsigned long*)addr;
                        } else {
-                               copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0);
+                               copied = access_process_vm(child, addr, &tmp, sizeof(tmp), FOLL_FORCE);
 
                                if (copied != sizeof(tmp))
                                        break;
@@ -279,7 +279,7 @@ static int insn_size(struct task_struct *child, unsigned long pc)
   int opsize = 0;
 
   /* Read the opcode at pc (do what PTRACE_PEEKTEXT would do). */
-  copied = access_process_vm(child, pc, &opcode, sizeof(opcode), 0);
+  copied = access_process_vm(child, pc, &opcode, sizeof(opcode), FOLL_FORCE);
   if (copied != sizeof(opcode))
     return 0;
 
index afbc98f02d278d097d31add71104f5e4fe4040ea..81e03530ed39ee7e3b25b7442361f64aa883c179 100644 (file)
@@ -90,5 +90,7 @@
 
 #define SO_CNX_ADVICE          53
 
+#define SCM_TIMESTAMPING_OPT_STATS     54
+
 #endif /* _ASM_SOCKET_H */
 
index b408fe660cf8ceab685de783220a2db991b4dfc3..3cef06875f5ca32930c06b07db027e86f362ce94 100644 (file)
@@ -31,7 +31,6 @@ struct thread_info {
        int                cpu;                 /* cpu we're on */
        int                preempt_count;       /* 0 => preemptable, <0 => BUG */
        mm_segment_t            addr_limit;
-       struct restart_block restart_block;
 };
 
 /*
@@ -44,9 +43,6 @@ struct thread_info {
        .cpu =          0,                      \
        .preempt_count = INIT_PREEMPT_COUNT,    \
        .addr_limit     = KERNEL_DS,            \
-       .restart_block  = {                     \
-               .fn = do_no_restart_syscall,    \
-       },                                      \
 }
 
 #define init_thread_info       (init_thread_union.thread_info)
index ad1f81f574e5785ee42e26219173595c1d8a66b6..7138303cbbf25dfd7fb03c6fe177199e441617ab 100644 (file)
@@ -79,7 +79,7 @@ restore_sigcontext(struct sigcontext *usc, int *pd0)
        unsigned int er0;
 
        /* Always make any pending restarted system calls return -EINTR */
-       current_thread_info()->restart_block.fn = do_no_restart_syscall;
+       current->restart_block.fn = do_no_restart_syscall;
 
        /* restore passed registers */
 #define COPY(r)  do { err |= get_user(regs->r, &usc->sc_##r); } while (0)
index 2e805e0cc56059f51074ec738010a81d2232605f..df6e9968c84541c0dafdd3306b452fa762e3185f 100644 (file)
@@ -33,5 +33,5 @@ $(obj)/vmlinux.bin: vmlinux FORCE
 LDFLAGS_bootloader = -static -T
 
 $(obj)/bootloader: $(src)/bootloader.lds $(obj)/bootloader.o $(obj)/boot_head.o $(obj)/fw-emu.o \
-                   lib/lib.a arch/ia64/lib/built-in.o arch/ia64/lib/lib.a FORCE
+                   lib/lib.a arch/ia64/lib/lib.a FORCE
        $(call if_changed,ld)
diff --git a/arch/ia64/include/asm/export.h b/arch/ia64/include/asm/export.h
new file mode 100644 (file)
index 0000000..ad18c65
--- /dev/null
@@ -0,0 +1,3 @@
+/* EXPORT_DATA_SYMBOL != EXPORT_SYMBOL here */
+#define KSYM_FUNC(name) @fptr(name)
+#include <asm-generic/export.h>
index 0e00c9a9f4100b48e0302c13c43c90962c38d631..7a1f8310596bede26f78d60a2bee0a4ac97a9242 100644 (file)
@@ -1,12 +1,8 @@
 #ifndef __ASM_IA64_LIBATA_PORTMAP_H
 #define __ASM_IA64_LIBATA_PORTMAP_H
 
-#define ATA_PRIMARY_CMD                0x1F0
-#define ATA_PRIMARY_CTL                0x3F6
 #define ATA_PRIMARY_IRQ(dev)   isa_irq_to_vector(14)
 
-#define ATA_SECONDARY_CMD      0x170
-#define ATA_SECONDARY_CTL      0x376
 #define ATA_SECONDARY_IRQ(dev) isa_irq_to_vector(15)
 
 #endif
index 0018fad9039f4bd9435d2b6a5976093e56d38db7..57feb0c1f7d707dd51ce20ffba0a418f5b5687ff 100644 (file)
@@ -99,4 +99,6 @@
 
 #define SO_CNX_ADVICE          53
 
+#define SCM_TIMESTAMPING_OPT_STATS     54
+
 #endif /* _ASM_IA64_SOCKET_H */
index cfaa7b25084c5384e849af309b0245a652b34e67..6f27a663177c4338299d98d33145a0c60546bf7a 100644 (file)
@@ -48,6 +48,7 @@
 #include <asm/thread_info.h>
 #include <asm/unistd.h>
 #include <asm/ftrace.h>
+#include <asm/export.h>
 
 #include "minstate.h"
 
@@ -1345,12 +1346,14 @@ GLOBAL_ENTRY(unw_init_running)
        mov rp=loc0
        br.ret.sptk.many rp
 END(unw_init_running)
+EXPORT_SYMBOL(unw_init_running)
 
 #ifdef CONFIG_FUNCTION_TRACER
 #ifdef CONFIG_DYNAMIC_FTRACE
 GLOBAL_ENTRY(_mcount)
        br ftrace_stub
 END(_mcount)
+EXPORT_SYMBOL(_mcount)
 
 .here:
        br.ret.sptk.many b0
index 09f845793d12c1147bc767885673545a3bef88f3..5ed0ea92c5bfac3935b3b799607dca9b3b6086a9 100644 (file)
@@ -142,7 +142,7 @@ store_virtual_to_phys(struct device *dev, struct device_attribute *attr,
        u64 virt_addr=simple_strtoull(buf, NULL, 16);
        int ret;
 
-       ret = get_user_pages(virt_addr, 1, VM_READ, 0, NULL, NULL);
+       ret = get_user_pages(virt_addr, 1, FOLL_WRITE, NULL, NULL);
        if (ret<=0) {
 #ifdef ERR_INJ_DEBUG
                printk("Virtual address %lx is not existing.\n",virt_addr);
index 6b3d6c1f99b6db32c2208401ab4ae81c51acfe59..2c369bf77c4bc92df695f1d7cb34584355534499 100644 (file)
@@ -35,6 +35,7 @@
 
 #include <asm/processor.h>
 #include <asm/asmmacro.h>
+#include <asm/export.h>
 
 /*
  * Inputs:
@@ -94,3 +95,4 @@ GLOBAL_ENTRY(esi_call_phys)
        mov gp=loc2
        br.ret.sptk.many rp
 END(esi_call_phys)
+EXPORT_SYMBOL_GPL(esi_call_phys)
index bb748c5964433165efab01ba39a33669fe76069a..c9b5e942f67156f5b6b7cf4917658fbf85f1544f 100644 (file)
@@ -32,6 +32,7 @@
 #include <asm/mca_asm.h>
 #include <linux/init.h>
 #include <linux/linkage.h>
+#include <asm/export.h>
 
 #ifdef CONFIG_HOTPLUG_CPU
 #define SAL_PSR_BITS_TO_SET                            \
@@ -168,6 +169,7 @@ RestRR:                                                                                     \
        __PAGE_ALIGNED_DATA
 
        .global empty_zero_page
+EXPORT_DATA_SYMBOL_GPL(empty_zero_page)
 empty_zero_page:
        .skip PAGE_SIZE
 
index 09673104953828129add010722331770c426bf42..d111248af7191ad5480b554e370effb015e54245 100644 (file)
 /*
  * Architecture-specific kernel symbols
- *
- * Don't put any exports here unless it's defined in an assembler file.
- * All other exports should be put directly after the definition.
  */
 
-#include <linux/module.h>
-
-#include <linux/string.h>
-EXPORT_SYMBOL(memset);
-EXPORT_SYMBOL(memcpy);
-EXPORT_SYMBOL(strlen);
-
-#include <asm/pgtable.h>
-EXPORT_SYMBOL_GPL(empty_zero_page);
-
-#include <asm/checksum.h>
-EXPORT_SYMBOL(ip_fast_csum);           /* hand-coded assembly */
-EXPORT_SYMBOL(csum_ipv6_magic);
-
-#include <asm/page.h>
-EXPORT_SYMBOL(clear_page);
-EXPORT_SYMBOL(copy_page);
-
 #ifdef CONFIG_VIRTUAL_MEM_MAP
+#include <linux/compiler.h>
+#include <linux/export.h>
 #include <linux/bootmem.h>
 EXPORT_SYMBOL(min_low_pfn);    /* defined by bootmem.c, but not exported by generic code */
 EXPORT_SYMBOL(max_low_pfn);    /* defined by bootmem.c, but not exported by generic code */
 #endif
-
-#include <asm/processor.h>
-EXPORT_SYMBOL(ia64_cpu_info);
-#ifdef CONFIG_SMP
-EXPORT_SYMBOL(local_per_cpu_offset);
-#endif
-
-#include <asm/uaccess.h>
-EXPORT_SYMBOL(__copy_user);
-EXPORT_SYMBOL(__do_clear_user);
-EXPORT_SYMBOL(__strlen_user);
-EXPORT_SYMBOL(__strncpy_from_user);
-EXPORT_SYMBOL(__strnlen_user);
-
-/* from arch/ia64/lib */
-extern void __divsi3(void);
-extern void __udivsi3(void);
-extern void __modsi3(void);
-extern void __umodsi3(void);
-extern void __divdi3(void);
-extern void __udivdi3(void);
-extern void __moddi3(void);
-extern void __umoddi3(void);
-
-EXPORT_SYMBOL(__divsi3);
-EXPORT_SYMBOL(__udivsi3);
-EXPORT_SYMBOL(__modsi3);
-EXPORT_SYMBOL(__umodsi3);
-EXPORT_SYMBOL(__divdi3);
-EXPORT_SYMBOL(__udivdi3);
-EXPORT_SYMBOL(__moddi3);
-EXPORT_SYMBOL(__umoddi3);
-
-#if defined(CONFIG_MD_RAID456) || defined(CONFIG_MD_RAID456_MODULE)
-extern void xor_ia64_2(void);
-extern void xor_ia64_3(void);
-extern void xor_ia64_4(void);
-extern void xor_ia64_5(void);
-
-EXPORT_SYMBOL(xor_ia64_2);
-EXPORT_SYMBOL(xor_ia64_3);
-EXPORT_SYMBOL(xor_ia64_4);
-EXPORT_SYMBOL(xor_ia64_5);
-#endif
-
-#include <asm/pal.h>
-EXPORT_SYMBOL(ia64_pal_call_phys_stacked);
-EXPORT_SYMBOL(ia64_pal_call_phys_static);
-EXPORT_SYMBOL(ia64_pal_call_stacked);
-EXPORT_SYMBOL(ia64_pal_call_static);
-EXPORT_SYMBOL(ia64_load_scratch_fpregs);
-EXPORT_SYMBOL(ia64_save_scratch_fpregs);
-
-#include <asm/unwind.h>
-EXPORT_SYMBOL(unw_init_running);
-
-#if defined(CONFIG_IA64_ESI) || defined(CONFIG_IA64_ESI_MODULE)
-extern void esi_call_phys (void);
-EXPORT_SYMBOL_GPL(esi_call_phys);
-#endif
-extern char ia64_ivt[];
-EXPORT_SYMBOL(ia64_ivt);
-
-#include <asm/ftrace.h>
-#ifdef CONFIG_FUNCTION_TRACER
-/* mcount is defined in assembly */
-EXPORT_SYMBOL(_mcount);
-#endif
-
-#include <asm/cacheflush.h>
-EXPORT_SYMBOL_GPL(flush_icache_range);
index b1c3cfc93e715b54f485521ed25b36ca8af46b50..44a103a5de2b355278e086abc02b834ea959c743 100644 (file)
@@ -57,6 +57,7 @@
 #include <asm/thread_info.h>
 #include <asm/unistd.h>
 #include <asm/errno.h>
+#include <asm/export.h>
 
 #if 0
 # define PSR_DEFAULT_BITS      psr.ac
@@ -85,6 +86,7 @@
 
        .align 32768    // align on 32KB boundary
        .global ia64_ivt
+       EXPORT_DATA_SYMBOL(ia64_ivt)
 ia64_ivt:
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x0000 Entry 0 (size 64 bundles) VHPT Translation (8,20,47)
index 0b533441c3c9b2fc434315c23f45dc0924e64b4c..94fb2e3954983b899393beb7636f6341c858fad9 100644 (file)
@@ -14,6 +14,7 @@
 
 #include <asm/asmmacro.h>
 #include <asm/processor.h>
+#include <asm/export.h>
 
        .data
 pal_entry_point:
@@ -87,6 +88,7 @@ GLOBAL_ENTRY(ia64_pal_call_static)
        srlz.d                          // seralize restoration of psr.l
        br.ret.sptk.many b0
 END(ia64_pal_call_static)
+EXPORT_SYMBOL(ia64_pal_call_static)
 
 /*
  * Make a PAL call using the stacked registers calling convention.
@@ -122,6 +124,7 @@ GLOBAL_ENTRY(ia64_pal_call_stacked)
        srlz.d                          // serialize restoration of psr.l
        br.ret.sptk.many b0
 END(ia64_pal_call_stacked)
+EXPORT_SYMBOL(ia64_pal_call_stacked)
 
 /*
  * Make a physical mode PAL call using the static registers calling convention.
@@ -193,6 +196,7 @@ GLOBAL_ENTRY(ia64_pal_call_phys_static)
        srlz.d                          // seralize restoration of psr.l
        br.ret.sptk.many b0
 END(ia64_pal_call_phys_static)
+EXPORT_SYMBOL(ia64_pal_call_phys_static)
 
 /*
  * Make a PAL call using the stacked registers in physical mode.
@@ -250,6 +254,7 @@ GLOBAL_ENTRY(ia64_pal_call_phys_stacked)
        srlz.d                          // seralize restoration of psr.l
        br.ret.sptk.many b0
 END(ia64_pal_call_phys_stacked)
+EXPORT_SYMBOL(ia64_pal_call_phys_stacked)
 
 /*
  * Save scratch fp scratch regs which aren't saved in pt_regs already
@@ -275,6 +280,7 @@ GLOBAL_ENTRY(ia64_save_scratch_fpregs)
        stf.spill [r2]  = f15,32
        br.ret.sptk.many rp
 END(ia64_save_scratch_fpregs)
+EXPORT_SYMBOL(ia64_save_scratch_fpregs)
 
 /*
  * Load scratch fp scratch regs (fp10-fp15)
@@ -296,3 +302,4 @@ GLOBAL_ENTRY(ia64_load_scratch_fpregs)
        ldf.fill  f15 = [r2],32
        br.ret.sptk.many rp
 END(ia64_load_scratch_fpregs)
+EXPORT_SYMBOL(ia64_load_scratch_fpregs)
index 6f54d511cc509a03ac079871b6979f03e6b53bc8..31aa8c0f68e14a284e0f2b088afe4c29de7daee5 100644 (file)
@@ -453,7 +453,7 @@ ia64_peek (struct task_struct *child, struct switch_stack *child_stack,
                        return 0;
                }
        }
-       copied = access_process_vm(child, addr, &ret, sizeof(ret), 0);
+       copied = access_process_vm(child, addr, &ret, sizeof(ret), FOLL_FORCE);
        if (copied != sizeof(ret))
                return -EIO;
        *val = ret;
@@ -489,7 +489,8 @@ ia64_poke (struct task_struct *child, struct switch_stack *child_stack,
                                *ia64_rse_skip_regs(krbs, regnum) = val;
                        }
                }
-       } else if (access_process_vm(child, addr, &val, sizeof(val), 1)
+       } else if (access_process_vm(child, addr, &val, sizeof(val),
+                               FOLL_FORCE | FOLL_WRITE)
                   != sizeof(val))
                return -EIO;
        return 0;
@@ -543,7 +544,8 @@ ia64_sync_user_rbs (struct task_struct *child, struct switch_stack *sw,
                ret = ia64_peek(child, sw, user_rbs_end, addr, &val);
                if (ret < 0)
                        return ret;
-               if (access_process_vm(child, addr, &val, sizeof(val), 1)
+               if (access_process_vm(child, addr, &val, sizeof(val),
+                               FOLL_FORCE | FOLL_WRITE)
                    != sizeof(val))
                        return -EIO;
        }
@@ -559,7 +561,8 @@ ia64_sync_kernel_rbs (struct task_struct *child, struct switch_stack *sw,
 
        /* now copy word for word from user rbs to kernel rbs: */
        for (addr = user_rbs_start; addr < user_rbs_end; addr += 8) {
-               if (access_process_vm(child, addr, &val, sizeof(val), 0)
+               if (access_process_vm(child, addr, &val, sizeof(val),
+                               FOLL_FORCE)
                                != sizeof(val))
                        return -EIO;
 
@@ -1156,7 +1159,8 @@ arch_ptrace (struct task_struct *child, long request,
        case PTRACE_PEEKTEXT:
        case PTRACE_PEEKDATA:
                /* read word at location addr */
-               if (access_process_vm(child, addr, &data, sizeof(data), 0)
+               if (access_process_vm(child, addr, &data, sizeof(data),
+                               FOLL_FORCE)
                    != sizeof(data))
                        return -EIO;
                /* ensure return value is not mistaken for error code */
index afddb3e80a2999bd26dbd309b440a41758345770..7ec7acc844c2e7aa36a1b3ed6acf5c51f5d18f99 100644 (file)
@@ -71,7 +71,11 @@ EXPORT_SYMBOL(__per_cpu_offset);
 #endif
 
 DEFINE_PER_CPU(struct cpuinfo_ia64, ia64_cpu_info);
+EXPORT_SYMBOL(ia64_cpu_info);
 DEFINE_PER_CPU(unsigned long, local_per_cpu_offset);
+#ifdef CONFIG_SMP
+EXPORT_SYMBOL(local_per_cpu_offset);
+#endif
 unsigned long ia64_cycles_per_usec;
 struct ia64_boot_param *ia64_boot_param;
 struct screen_info screen_info;
index 98771e2a78afdb9040c5c05cdcc8b3cfde9fc44b..1f3d3877618fdc934ab20f07695476206fe35e00 100644 (file)
@@ -2,17 +2,15 @@
 # Makefile for ia64-specific library routines..
 #
 
-obj-y := io.o
-
-lib-y := __divsi3.o __udivsi3.o __modsi3.o __umodsi3.o                 \
+lib-y := io.o __divsi3.o __udivsi3.o __modsi3.o __umodsi3.o            \
        __divdi3.o __udivdi3.o __moddi3.o __umoddi3.o                   \
        checksum.o clear_page.o csum_partial_copy.o                     \
        clear_user.o strncpy_from_user.o strlen_user.o strnlen_user.o   \
        flush.o ip_fast_csum.o do_csum.o                                \
        memset.o strlen.o xor.o
 
-obj-$(CONFIG_ITANIUM)  += copy_page.o copy_user.o memcpy.o
-obj-$(CONFIG_MCKINLEY) += copy_page_mck.o memcpy_mck.o
+lib-$(CONFIG_ITANIUM)  += copy_page.o copy_user.o memcpy.o
+lib-$(CONFIG_MCKINLEY) += copy_page_mck.o memcpy_mck.o
 lib-$(CONFIG_PERFMON)  += carta_random.o
 
 AFLAGS___divdi3.o      =
index 2d814e7ed191c60c50ba85de3b8382a6bfc4abc0..3cf5b76e587ff18d4d995b02d31a0ea458d04eaa 100644 (file)
@@ -11,6 +11,7 @@
 
 #include <asm/asmmacro.h>
 #include <asm/page.h>
+#include <asm/export.h>
 
 #ifdef CONFIG_ITANIUM
 # define L3_LINE_SIZE  64      // Itanium L3 line size
@@ -74,3 +75,4 @@ GLOBAL_ENTRY(clear_page)
        mov ar.lc = saved_lc            // restore lc
        br.ret.sptk.many rp
 END(clear_page)
+EXPORT_SYMBOL(clear_page)
index eecd8577b2099c759a21ad486f24d61c831d951f..7b40731ee5d86e13dbac5d49e363912a401eeab6 100644 (file)
@@ -12,6 +12,7 @@
  */
 
 #include <asm/asmmacro.h>
+#include <asm/export.h>
 
 //
 // arguments
@@ -207,3 +208,4 @@ GLOBAL_ENTRY(__do_clear_user)
        mov ar.lc=saved_lc
        br.ret.sptk.many rp
 END(__do_clear_user)
+EXPORT_SYMBOL(__do_clear_user)
index 127d1d050d788212ddf27348304befd7ac7dc4bd..cbdb9e323ffbd5be111a8b7b654633ad264f7112 100644 (file)
@@ -16,6 +16,7 @@
  */
 #include <asm/asmmacro.h>
 #include <asm/page.h>
+#include <asm/export.h>
 
 #define PIPE_DEPTH     3
 #define EPI            p[PIPE_DEPTH-1]
@@ -96,3 +97,4 @@ GLOBAL_ENTRY(copy_page)
        mov ar.lc=saved_lc
        br.ret.sptk.many rp
 END(copy_page)
+EXPORT_SYMBOL(copy_page)
index 3c45d60a81b44789563a9a733fd82ddc15349613..c13f69036876c8ad0bcd191c0afcc76329b13041 100644 (file)
@@ -61,6 +61,7 @@
  */
 #include <asm/asmmacro.h>
 #include <asm/page.h>
+#include <asm/export.h>
 
 #define PREFETCH_DIST  8               // McKinley sustains 16 outstanding L2 misses (8 ld, 8 st)
 
@@ -183,3 +184,4 @@ GLOBAL_ENTRY(copy_page)
        mov pr = saved_pr, -1
        br.ret.sptk.many rp
 END(copy_page)
+EXPORT_SYMBOL(copy_page)
index c952bdc6a09399ee7f511c94212746adf13bd182..66facd52e8d007581c4df4b361532fd9bb28bf80 100644 (file)
@@ -30,6 +30,7 @@
  */
 
 #include <asm/asmmacro.h>
+#include <asm/export.h>
 
 //
 // Tuneable parameters
@@ -608,3 +609,4 @@ GLOBAL_ENTRY(__copy_user)
        mov ar.pfs=saved_pfs
        br.ret.sptk.many rp
 END(__copy_user)
+EXPORT_SYMBOL(__copy_user)
index 1d8c88860063d303c7efa807078326744b55e118..9a5a2f9fad132ef8c59c3aea2e4d0810f06efbc8 100644 (file)
@@ -8,6 +8,7 @@
  */
 
 #include <asm/asmmacro.h>
+#include <asm/export.h>
 
 
        /*
@@ -60,6 +61,7 @@ GLOBAL_ENTRY(flush_icache_range)
        mov     ar.lc=r3                // restore ar.lc
        br.ret.sptk.many rp
 END(flush_icache_range)
+EXPORT_SYMBOL_GPL(flush_icache_range)
 
        /*
         * clflush_cache_range(start,size)
index c91b5b0129ff929e072c80a06e146c529ce18192..715aed79a9ce34e64a66839405c1a2307383cd67 100644 (file)
@@ -15,6 +15,7 @@
  */
 
 #include <asm/asmmacro.h>
+#include <asm/export.h>
 
 #ifdef MODULO
 # define OP    mod
@@ -81,3 +82,4 @@ GLOBAL_ENTRY(NAME)
        getf.sig r8 = f6                // transfer result to result register
        br.ret.sptk.many rp
 END(NAME)
+EXPORT_SYMBOL(NAME)
index 627573c4ceb1972c248eeb6ec881f85f1b79ded2..25840f6977532ab6b2d26aa53976f10bf685287d 100644 (file)
@@ -15,6 +15,7 @@
  */
 
 #include <asm/asmmacro.h>
+#include <asm/export.h>
 
 #ifdef MODULO
 # define OP    mod
@@ -78,3 +79,4 @@ GLOBAL_ENTRY(NAME)
        getf.sig r8 = f11               // transfer result to result register
        br.ret.sptk.many rp
 END(NAME)
+EXPORT_SYMBOL(NAME)
index 620d9dc5220f377c9cd2da899a81795054a27ad0..648e0d4a48390a455caca3da555074af78d33351 100644 (file)
@@ -13,6 +13,7 @@
  */
 
 #include <asm/asmmacro.h>
+#include <asm/export.h>
 
 /*
  * Since we know that most likely this function is called with buf aligned
@@ -92,6 +93,7 @@ GLOBAL_ENTRY(ip_fast_csum)
        mov     b0=r34
        br.ret.sptk.many b0
 END(ip_fast_csum)
+EXPORT_SYMBOL(ip_fast_csum)
 
 GLOBAL_ENTRY(csum_ipv6_magic)
        ld4     r20=[in0],4
@@ -142,3 +144,4 @@ GLOBAL_ENTRY(csum_ipv6_magic)
        andcm   r8=r9,r8
        br.ret.sptk.many b0
 END(csum_ipv6_magic)
+EXPORT_SYMBOL(csum_ipv6_magic)
index 448908d80b6943958d234cc93d07eb5d7f2b49ea..ba172fd6acf4e8571a4fe873b3bf3942a8006769 100644 (file)
@@ -14,6 +14,7 @@
  *     David Mosberger-Tang <davidm@hpl.hp.com>
  */
 #include <asm/asmmacro.h>
+#include <asm/export.h>
 
 GLOBAL_ENTRY(memcpy)
 
@@ -299,3 +300,4 @@ GLOBAL_ENTRY(memcpy)
        COPY(56, 0)
 
 END(memcpy)
+EXPORT_SYMBOL(memcpy)
index ab0f8763972954117b0ac6a7dc3ac120d6e042c5..b264b6a7967b425b7add3311b542cbb64ca0b41b 100644 (file)
@@ -15,6 +15,7 @@
  */
 #include <asm/asmmacro.h>
 #include <asm/page.h>
+#include <asm/export.h>
 
 #define EK(y...) EX(y)
 
@@ -78,6 +79,7 @@ GLOBAL_ENTRY(memcpy)
        br.cond.sptk .common_code
        ;;
 END(memcpy)
+EXPORT_SYMBOL(memcpy)
 GLOBAL_ENTRY(__copy_user)
        .prologue
 // check dest alignment
@@ -664,3 +666,4 @@ EK(.ex_handler,  (p17)      st8     [dst1]=r39,8);                                          \
 
 /* end of McKinley specific optimization */
 END(__copy_user)
+EXPORT_SYMBOL(__copy_user)
index f26c16aefb1cbfdec5cd2fa5e466ae5426c627ca..87b974704075f1f5d5570260c09e4fddf74b0e7d 100644 (file)
@@ -18,6 +18,7 @@
    to get peak speed when value = 0.  */
 
 #include <asm/asmmacro.h>
+#include <asm/export.h>
 #undef ret
 
 #define dest           in0
@@ -360,3 +361,4 @@ GLOBAL_ENTRY(memset)
        br.ret.sptk.many rp
 }
 END(memset)
+EXPORT_SYMBOL(memset)
index e0cdac0a85b873a1c714c4d4f0c8ca5c40aca015..1a6e17c657b4219e902fd1d2dc5472deebdf75de 100644 (file)
@@ -17,6 +17,7 @@
  */
 
 #include <asm/asmmacro.h>
+#include <asm/export.h>
 
 //
 //
@@ -190,3 +191,4 @@ GLOBAL_ENTRY(strlen)
        mov ar.pfs=saved_pfs    // because of ar.ec, restore no matter what
        br.ret.sptk.many rp     // end of successful recovery code
 END(strlen)
+EXPORT_SYMBOL(strlen)
index c71eded4285efb49e5994ef51b163192b15327fd..9d257684e733461c4ac4f5d2b56638f2cab48791 100644 (file)
@@ -16,6 +16,7 @@
  */
 
 #include <asm/asmmacro.h>
+#include <asm/export.h>
 
 //
 // int strlen_user(char *)
@@ -196,3 +197,4 @@ GLOBAL_ENTRY(__strlen_user)
        mov ar.pfs=saved_pfs    // because of ar.ec, restore no matter what
        br.ret.sptk.many rp
 END(__strlen_user)
+EXPORT_SYMBOL(__strlen_user)
index a504381f31ebe7222dc115c9ad4f3ab5c37241b8..ca9ccf280e2e5c634938cad13ea759c7fae8c7d2 100644 (file)
@@ -17,6 +17,7 @@
  */
 
 #include <asm/asmmacro.h>
+#include <asm/export.h>
 
 GLOBAL_ENTRY(__strncpy_from_user)
        alloc r2=ar.pfs,3,0,0,0
@@ -42,3 +43,4 @@ GLOBAL_ENTRY(__strncpy_from_user)
 [.Lexit:]
        br.ret.sptk.many rp
 END(__strncpy_from_user)
+EXPORT_SYMBOL(__strncpy_from_user)
index d09066b1e49d31dd242561aa54f632f5dda20a6c..80a5dfd1d402b9d28581315f5a59424fc149d7ce 100644 (file)
@@ -13,6 +13,7 @@
  */
 
 #include <asm/asmmacro.h>
+#include <asm/export.h>
 
 GLOBAL_ENTRY(__strnlen_user)
        .prologue
@@ -43,3 +44,4 @@ GLOBAL_ENTRY(__strnlen_user)
        mov ar.lc=r16                   // restore ar.lc
        br.ret.sptk.many rp
 END(__strnlen_user)
+EXPORT_SYMBOL(__strnlen_user)
index 54e3f7eab8e9aecfa95c5df8378b7d7c22f455ab..c83f1c410691b7c2c9b47f563bf6fb5741633c45 100644 (file)
@@ -14,6 +14,7 @@
  */
 
 #include <asm/asmmacro.h>
+#include <asm/export.h>
 
 GLOBAL_ENTRY(xor_ia64_2)
        .prologue
@@ -51,6 +52,7 @@ GLOBAL_ENTRY(xor_ia64_2)
        mov pr = r29, -1
        br.ret.sptk.few rp
 END(xor_ia64_2)
+EXPORT_SYMBOL(xor_ia64_2)
 
 GLOBAL_ENTRY(xor_ia64_3)
        .prologue
@@ -91,6 +93,7 @@ GLOBAL_ENTRY(xor_ia64_3)
        mov pr = r29, -1
        br.ret.sptk.few rp
 END(xor_ia64_3)
+EXPORT_SYMBOL(xor_ia64_3)
 
 GLOBAL_ENTRY(xor_ia64_4)
        .prologue
@@ -134,6 +137,7 @@ GLOBAL_ENTRY(xor_ia64_4)
        mov pr = r29, -1
        br.ret.sptk.few rp
 END(xor_ia64_4)
+EXPORT_SYMBOL(xor_ia64_4)
 
 GLOBAL_ENTRY(xor_ia64_5)
        .prologue
@@ -182,3 +186,4 @@ GLOBAL_ENTRY(xor_ia64_5)
        mov pr = r29, -1
        br.ret.sptk.few rp
 END(xor_ia64_5)
+EXPORT_SYMBOL(xor_ia64_5)
index 5fe42fc7b6c5dd29e15edbcfbb8ea4cb370ea25c..5853f8e92c20cda02450346d839b3f0b466359ee 100644 (file)
@@ -90,4 +90,6 @@
 
 #define SO_CNX_ADVICE          53
 
+#define SCM_TIMESTAMPING_OPT_STATS     54
+
 #endif /* _ASM_M32R_SOCKET_H */
index 51f5e9aa49016fdce8112eb72083c0b167f21df8..c145605a981ff4fbc441ffe4512a6167147f6bec 100644 (file)
@@ -493,7 +493,8 @@ unregister_all_debug_traps(struct task_struct *child)
        int i;
 
        for (i = 0; i < p->nr_trap; i++)
-               access_process_vm(child, p->addr[i], &p->insn[i], sizeof(p->insn[i]), 1);
+               access_process_vm(child, p->addr[i], &p->insn[i], sizeof(p->insn[i]),
+                               FOLL_FORCE | FOLL_WRITE);
        p->nr_trap = 0;
 }
 
@@ -537,7 +538,8 @@ embed_debug_trap(struct task_struct *child, unsigned long next_pc)
        unsigned long next_insn, code;
        unsigned long addr = next_pc & ~3;
 
-       if (access_process_vm(child, addr, &next_insn, sizeof(next_insn), 0)
+       if (access_process_vm(child, addr, &next_insn, sizeof(next_insn),
+                       FOLL_FORCE)
            != sizeof(next_insn)) {
                return -1; /* error */
        }
@@ -546,7 +548,8 @@ embed_debug_trap(struct task_struct *child, unsigned long next_pc)
        if (register_debug_trap(child, next_pc, next_insn, &code)) {
                return -1; /* error */
        }
-       if (access_process_vm(child, addr, &code, sizeof(code), 1)
+       if (access_process_vm(child, addr, &code, sizeof(code),
+                       FOLL_FORCE | FOLL_WRITE)
            != sizeof(code)) {
                return -1; /* error */
        }
@@ -562,7 +565,8 @@ withdraw_debug_trap(struct pt_regs *regs)
        addr = (regs->bpc - 2) & ~3;
        regs->bpc -= 2;
        if (unregister_debug_trap(current, addr, &code)) {
-           access_process_vm(current, addr, &code, sizeof(code), 1);
+           access_process_vm(current, addr, &code, sizeof(code),
+                   FOLL_FORCE | FOLL_WRITE);
            invalidate_cache();
        }
 }
@@ -589,7 +593,8 @@ void user_enable_single_step(struct task_struct *child)
        /* Compute next pc.  */
        pc = get_stack_long(child, PT_BPC);
 
-       if (access_process_vm(child, pc&~3, &insn, sizeof(insn), 0)
+       if (access_process_vm(child, pc&~3, &insn, sizeof(insn),
+                       FOLL_FORCE)
            != sizeof(insn))
                return;
 
diff --git a/arch/m68k/include/asm/export.h b/arch/m68k/include/asm/export.h
new file mode 100644 (file)
index 0000000..0af20f4
--- /dev/null
@@ -0,0 +1,3 @@
+#define KSYM_ALIGN 2
+#define KCRC_ALIGN 2
+#include <asm-generic/export.h>
index 8a1c4d3f91c8a78bf7694a2579d20db92ed7eba8..74c898ced8cc0481e3fa70dc0b7e6d8b86d1994f 100644 (file)
@@ -13,7 +13,7 @@ extra-$(CONFIG_SUN3X) := head.o
 extra-$(CONFIG_SUN3)   := sun3-head.o
 extra-y                        += vmlinux.lds
 
-obj-y  := entry.o irq.o m68k_ksyms.o module.o process.o ptrace.o
+obj-y  := entry.o irq.o module.o process.o ptrace.o
 obj-y  += setup.o signal.o sys_m68k.o syscalltable.o time.o traps.o
 
 obj-$(CONFIG_MMU_MOTOROLA) += ints.o vectors.o
diff --git a/arch/m68k/kernel/m68k_ksyms.c b/arch/m68k/kernel/m68k_ksyms.c
deleted file mode 100644 (file)
index 774c1bd..0000000
+++ /dev/null
@@ -1,32 +0,0 @@
-#include <linux/module.h>
-
-asmlinkage long long __ashldi3 (long long, int);
-asmlinkage long long __ashrdi3 (long long, int);
-asmlinkage long long __lshrdi3 (long long, int);
-asmlinkage long long __muldi3 (long long, long long);
-
-/* The following are special because they're not called
-   explicitly (the C compiler generates them).  Fortunately,
-   their interface isn't gonna change any time soon now, so
-   it's OK to leave it out of version control.  */
-EXPORT_SYMBOL(__ashldi3);
-EXPORT_SYMBOL(__ashrdi3);
-EXPORT_SYMBOL(__lshrdi3);
-EXPORT_SYMBOL(__muldi3);
-
-#if defined(CONFIG_CPU_HAS_NO_MULDIV64)
-/*
- * Simpler 68k and ColdFire parts also need a few other gcc functions.
- */
-extern long long __divsi3(long long, long long);
-extern long long __modsi3(long long, long long);
-extern long long __mulsi3(long long, long long);
-extern long long __udivsi3(long long, long long);
-extern long long __umodsi3(long long, long long);
-
-EXPORT_SYMBOL(__divsi3);
-EXPORT_SYMBOL(__modsi3);
-EXPORT_SYMBOL(__mulsi3);
-EXPORT_SYMBOL(__udivsi3);
-EXPORT_SYMBOL(__umodsi3);
-#endif
index 37234c2df47f6e8ed6edd4eb5d60fc29178f8ef7..8dffd36ec4f2411f867629111844c907ac47f7bf 100644 (file)
@@ -13,6 +13,9 @@ but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details. */
 
+#include <linux/compiler.h>
+#include <linux/export.h>
+
 #define BITS_PER_UNIT 8
 
 typedef                 int SItype     __attribute__ ((mode (SI)));
@@ -55,3 +58,4 @@ __ashldi3 (DItype u, word_type b)
 
   return w.ll;
 }
+EXPORT_SYMBOL(__ashldi3);
index 1d59345f36c631550767bc8f8c77c9e6a69afbc0..e6565a3ee2c37065949cd92eed8deeb78c709f33 100644 (file)
@@ -13,6 +13,9 @@ but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details. */
 
+#include <linux/compiler.h>
+#include <linux/export.h>
+
 #define BITS_PER_UNIT 8
 
 typedef                 int SItype     __attribute__ ((mode (SI)));
@@ -56,3 +59,4 @@ __ashrdi3 (DItype u, word_type b)
 
   return w.ll;
 }
+EXPORT_SYMBOL(__ashrdi3);
index 2c0ec85ac661547382c2206b613e86ba9d3a63be..3a2143f51631a0e6a819388f53ee5a910a676917 100644 (file)
@@ -33,6 +33,8 @@ General Public License for more details. */
    D. V. Henkel-Wallace (gumby@cygnus.com) Fete Bastille, 1992
 */
 
+#include <asm/export.h>
+
 /* These are predefined by new versions of GNU cpp.  */
 
 #ifndef __USER_LABEL_PREFIX__
@@ -118,3 +120,4 @@ L2: movel   d1, sp@-
 L3:    movel   sp@+, d2
        rts
 
+       EXPORT_SYMBOL(__divsi3)
index 49e1ec8f2cc27a9f9880bfe78cef279afcdf3ab1..039779737c7d28bfb1d8caf29995fa440c3f2a2a 100644 (file)
@@ -13,6 +13,9 @@ but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details. */
 
+#include <linux/compiler.h>
+#include <linux/export.h>
+
 #define BITS_PER_UNIT 8
 
 typedef                 int SItype     __attribute__ ((mode (SI)));
@@ -55,3 +58,4 @@ __lshrdi3 (DItype u, word_type b)
 
   return w.ll;
 }
+EXPORT_SYMBOL(__lshrdi3);
index 1d9e0efdf31d201f67ae56496a6fae6496ef8868..1c967649a4e0e07c47331c8bb228d97602756ac9 100644 (file)
@@ -33,6 +33,8 @@ General Public License for more details. */
    D. V. Henkel-Wallace (gumby@cygnus.com) Fete Bastille, 1992
 */
 
+#include <asm/export.h>
+
 /* These are predefined by new versions of GNU cpp.  */
 
 #ifndef __USER_LABEL_PREFIX__
@@ -106,3 +108,4 @@ SYM (__modsi3):
        movel   d1, d0
        rts
 
+       EXPORT_SYMBOL(__modsi3)
index 9006d15b87218d95d77effd305bb49b4968e4914..6459af5b2af0a9c312c8c04d1dfc2009e24c30a6 100644 (file)
@@ -14,6 +14,9 @@ but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details. */
 
+#include <linux/compiler.h>
+#include <linux/export.h>
+
 #ifdef CONFIG_CPU_HAS_NO_MULDIV64
 
 #define SI_TYPE_SIZE 32
@@ -90,3 +93,4 @@ __muldi3 (DItype u, DItype v)
 
   return w.ll;
 }
+EXPORT_SYMBOL(__muldi3);
index c39ad4e738e9a6522fcef640020502f727d46a52..855675e69a8a2bb50e1c45e728ff2a218732a893 100644 (file)
@@ -32,7 +32,7 @@ General Public License for more details. */
    Some of this code comes from MINIX, via the folks at ericsson.
    D. V. Henkel-Wallace (gumby@cygnus.com) Fete Bastille, 1992
 */
-
+#include <asm/export.h>
 /* These are predefined by new versions of GNU cpp.  */
 
 #ifndef __USER_LABEL_PREFIX__
@@ -102,4 +102,4 @@ SYM (__mulsi3):
        addl    d1, d0
 
        rts
-
+       EXPORT_SYMBOL(__mulsi3)
index 35a5446572a5ee3bae78e725ae8c39619332b7a0..78440ae513bf318e34a6463494c9c7dcfea48bf7 100644 (file)
@@ -32,7 +32,7 @@ General Public License for more details. */
    Some of this code comes from MINIX, via the folks at ericsson.
    D. V. Henkel-Wallace (gumby@cygnus.com) Fete Bastille, 1992
 */
-
+#include <asm/export.h>
 /* These are predefined by new versions of GNU cpp.  */
 
 #ifndef __USER_LABEL_PREFIX__
@@ -154,4 +154,4 @@ L2: subql   IMM (1),d4
        unlk    a6              | and return
        rts
 #endif /* __mcf5200__ || __mcoldfire__ */
-
+       EXPORT_SYMBOL(__udivsi3)
index 099da514a8fd80daa85d3ba2644e3f2792175b5b..b6fd11f58948debdce515dd7114ca256540fb776 100644 (file)
@@ -32,7 +32,7 @@ General Public License for more details. */
    Some of this code comes from MINIX, via the folks at ericsson.
    D. V. Henkel-Wallace (gumby@cygnus.com) Fete Bastille, 1992
 */
-
+#include <asm/export.h>
 /* These are predefined by new versions of GNU cpp.  */
 
 #ifndef __USER_LABEL_PREFIX__
@@ -105,4 +105,4 @@ SYM (__umodsi3):
        subl    d0, d1          /* d1 = a - (a/b)*b */
        movel   d1, d0
        rts
-
+       EXPORT_SYMBOL(__umodsi3)
index 470e365f04ea4ee3f4503c060e8dacbd1e9dd0da..8ff0a70865f65cc9f9f71d9b9f126dd7fb820793 100644 (file)
 #define atomic_dec(v) atomic_sub(1, (v))
 
 #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
+#define atomic_dec_if_positive(v)       atomic_sub_if_positive(1, v)
 
 #endif
 
-#define atomic_dec_if_positive(v)       atomic_sub_if_positive(1, v)
-
 #include <asm-generic/atomic64.h>
 
 #endif /* __ASM_METAG_ATOMIC_H */
index c5cd63a4b6d53b399e91e8ebad7358a3f45fce8c..f5f1bdb292de0e9445e4caad4653927b2ef1f576 100644 (file)
@@ -11,6 +11,7 @@ platforms += cavium-octeon
 platforms += cobalt
 platforms += dec
 platforms += emma
+platforms += generic
 platforms += jazz
 platforms += jz4740
 platforms += lantiq
@@ -18,7 +19,6 @@ platforms += lasat
 platforms += loongson32
 platforms += loongson64
 platforms += mti-malta
-platforms += mti-sead3
 platforms += netlogic
 platforms += paravirt
 platforms += pic32
index 1a322c807f220e44c735650ced5142c8007366d9..b3c5bde43d34f85afc50c8eb955745b88836386c 100644 (file)
@@ -65,6 +65,7 @@ config MIPS
        select HANDLE_DOMAIN_IRQ
        select HAVE_EXIT_THREAD
        select HAVE_REGS_AND_STACK_ACCESS_API
+       select HAVE_ARCH_HARDENED_USERCOPY
 
 menu "Machine selection"
 
@@ -72,6 +73,57 @@ choice
        prompt "System type"
        default SGI_IP22
 
+config MIPS_GENERIC
+       bool "Generic board-agnostic MIPS kernel"
+       select BOOT_RAW
+       select BUILTIN_DTB
+       select CEVT_R4K
+       select CLKSRC_MIPS_GIC
+       select COMMON_CLK
+       select CPU_MIPSR2_IRQ_VI
+       select CPU_MIPSR2_IRQ_EI
+       select CSRC_R4K
+       select DMA_PERDEV_COHERENT
+       select HW_HAS_PCI
+       select IRQ_MIPS_CPU
+       select LIBFDT
+       select MIPS_CPU_SCACHE
+       select MIPS_GIC
+       select MIPS_L1_CACHE_SHIFT_7
+       select NO_EXCEPT_FILL
+       select PCI_DRIVERS_GENERIC
+       select PINCTRL
+       select SMP_UP if SMP
+       select SYS_HAS_CPU_MIPS32_R1
+       select SYS_HAS_CPU_MIPS32_R2
+       select SYS_HAS_CPU_MIPS32_R6
+       select SYS_HAS_CPU_MIPS64_R1
+       select SYS_HAS_CPU_MIPS64_R2
+       select SYS_HAS_CPU_MIPS64_R6
+       select SYS_SUPPORTS_32BIT_KERNEL
+       select SYS_SUPPORTS_64BIT_KERNEL
+       select SYS_SUPPORTS_BIG_ENDIAN
+       select SYS_SUPPORTS_HIGHMEM
+       select SYS_SUPPORTS_LITTLE_ENDIAN
+       select SYS_SUPPORTS_MICROMIPS
+       select SYS_SUPPORTS_MIPS_CPS
+       select SYS_SUPPORTS_MIPS16
+       select SYS_SUPPORTS_MULTITHREADING
+       select SYS_SUPPORTS_RELOCATABLE
+       select SYS_SUPPORTS_SMARTMIPS
+       select USB_EHCI_BIG_ENDIAN_DESC if BIG_ENDIAN
+       select USB_EHCI_BIG_ENDIAN_MMIO if BIG_ENDIAN
+       select USB_OHCI_BIG_ENDIAN_DESC if BIG_ENDIAN
+       select USB_OHCI_BIG_ENDIAN_MMIO if BIG_ENDIAN
+       select USB_UHCI_BIG_ENDIAN_DESC if BIG_ENDIAN
+       select USB_UHCI_BIG_ENDIAN_MMIO if BIG_ENDIAN
+       select USE_OF
+       help
+         Select this to build a kernel which aims to support multiple boards,
+         generally using a flattened device tree passed from the bootloader
+         using the boot protocol defined in the UHI (Unified Hosting
+         Interface) specification.
+
 config MIPS_ALCHEMY
        bool "Alchemy processor based machines"
        select ARCH_PHYS_ADDR_T_64BIT
@@ -478,6 +530,7 @@ config MIPS_MALTA
        select SYS_SUPPORTS_ZBOOT
        select SYS_SUPPORTS_RELOCATABLE
        select USE_OF
+       select LIBFDT
        select ZONE_DMA32 if 64BIT
        select BUILTIN_DTB
        select LIBFDT
@@ -493,42 +546,6 @@ config MACH_PIC32
          Microchip PIC32 is a family of general-purpose 32 bit MIPS core
          microcontrollers.
 
-config MIPS_SEAD3
-       bool "MIPS SEAD3 board"
-       select BOOT_ELF32
-       select BOOT_RAW
-       select BUILTIN_DTB
-       select CEVT_R4K
-       select CSRC_R4K
-       select CLKSRC_MIPS_GIC
-       select COMMON_CLK
-       select CPU_MIPSR2_IRQ_VI
-       select CPU_MIPSR2_IRQ_EI
-       select DMA_NONCOHERENT
-       select IRQ_MIPS_CPU
-       select MIPS_GIC
-       select LIBFDT
-       select MIPS_MSC
-       select SYS_HAS_CPU_MIPS32_R1
-       select SYS_HAS_CPU_MIPS32_R2
-       select SYS_HAS_CPU_MIPS32_R6
-       select SYS_HAS_CPU_MIPS64_R1
-       select SYS_HAS_EARLY_PRINTK
-       select SYS_SUPPORTS_32BIT_KERNEL
-       select SYS_SUPPORTS_64BIT_KERNEL
-       select SYS_SUPPORTS_BIG_ENDIAN
-       select SYS_SUPPORTS_LITTLE_ENDIAN
-       select SYS_SUPPORTS_SMARTMIPS
-       select SYS_SUPPORTS_MICROMIPS
-       select SYS_SUPPORTS_MIPS16
-       select SYS_SUPPORTS_RELOCATABLE
-       select USB_EHCI_BIG_ENDIAN_DESC
-       select USB_EHCI_BIG_ENDIAN_MMIO
-       select USE_OF
-       help
-         This enables support for the MIPS Technologies SEAD3 evaluation
-         board.
-
 config NEC_MARKEINS
        bool "NEC EMMA2RH Mark-eins board"
        select SOC_EMMA2RH
@@ -988,6 +1005,7 @@ source "arch/mips/ath79/Kconfig"
 source "arch/mips/bcm47xx/Kconfig"
 source "arch/mips/bcm63xx/Kconfig"
 source "arch/mips/bmips/Kconfig"
+source "arch/mips/generic/Kconfig"
 source "arch/mips/jazz/Kconfig"
 source "arch/mips/jz4740/Kconfig"
 source "arch/mips/lantiq/Kconfig"
@@ -1098,6 +1116,10 @@ config DMA_MAYBE_COHERENT
        select DMA_NONCOHERENT
        bool
 
+config DMA_PERDEV_COHERENT
+       bool
+       select DMA_MAYBE_COHERENT
+
 config DMA_COHERENT
        bool
 
@@ -1401,6 +1423,16 @@ config CPU_LOONGSON1B
          The Loongson 1B is a 32-bit SoC, which implements the MIPS32
          release 2 instruction set.
 
+config CPU_LOONGSON1C
+       bool "Loongson 1C"
+       depends on SYS_HAS_CPU_LOONGSON1C
+       select CPU_LOONGSON1
+       select ARCH_WANT_OPTIONAL_GPIOLIB
+       select LEDS_GPIO_REGISTER
+       help
+         The Loongson 1C is a 32-bit SoC, which implements the MIPS32
+         release 2 instruction set.
+
 config CPU_MIPS32_R1
        bool "MIPS32 Release 1"
        depends on SYS_HAS_CPU_MIPS32_R1
@@ -1850,6 +1882,9 @@ config SYS_HAS_CPU_LOONGSON2F
 config SYS_HAS_CPU_LOONGSON1B
        bool
 
+config SYS_HAS_CPU_LOONGSON1C
+       bool
+
 config SYS_HAS_CPU_MIPS32_R1
        bool
 
@@ -2906,7 +2941,7 @@ endchoice
 choice
        prompt "Kernel command line type" if !CMDLINE_OVERRIDE
        default MIPS_CMDLINE_FROM_DTB if USE_OF && !ATH79 && !MACH_INGENIC && \
-                                        !MIPS_MALTA && !MIPS_SEAD3 && \
+                                        !MIPS_MALTA && \
                                         !CAVIUM_OCTEON_SOC
        default MIPS_CMDLINE_FROM_BOOTLOADER
 
@@ -2960,7 +2995,6 @@ config PCI
        bool "Support for PCI controller"
        depends on HW_HAS_PCI
        select PCI_DOMAINS
-       select NO_GENERIC_PCI_IOPORT_MAP
        help
          Find out whether you have a PCI motherboard. PCI is the name of a
          bus system, i.e. the way the CPU talks to the other stuff inside
@@ -2981,6 +3015,17 @@ config HT_PCI
 config PCI_DOMAINS
        bool
 
+config PCI_DOMAINS_GENERIC
+       bool
+
+config PCI_DRIVERS_GENERIC
+       select PCI_DOMAINS_GENERIC if PCI_DOMAINS
+       bool
+
+config PCI_DRIVERS_LEGACY
+       def_bool !PCI_DRIVERS_GENERIC
+       select NO_GENERIC_PCI_IOPORT_MAP
+
 source "drivers/pci/Kconfig"
 
 #
index 598ab2930fce67bb373827d7bbb09be35880a75e..1a6bac7b076f31934d397f22c5ccac600e36b4b4 100644 (file)
@@ -262,7 +262,14 @@ KBUILD_CPPFLAGS += -DVMLINUX_LOAD_ADDRESS=$(load-y)
 KBUILD_CPPFLAGS += -DDATAOFFSET=$(if $(dataoffset-y),$(dataoffset-y),0)
 
 bootvars-y     = VMLINUX_LOAD_ADDRESS=$(load-y) \
-                 VMLINUX_ENTRY_ADDRESS=$(entry-y)
+                 VMLINUX_ENTRY_ADDRESS=$(entry-y) \
+                 PLATFORM="$(platform-y)"
+ifdef CONFIG_32BIT
+bootvars-y     += ADDR_BITS=32
+endif
+ifdef CONFIG_64BIT
+bootvars-y     += ADDR_BITS=64
+endif
 
 LDFLAGS                        += -m $(ld-emul)
 
@@ -302,6 +309,11 @@ boot-y                     += uImage.gz
 boot-y                 += uImage.lzma
 boot-y                 += uImage.lzo
 endif
+boot-y                 += vmlinux.itb
+boot-y                 += vmlinux.gz.itb
+boot-y                 += vmlinux.bz2.itb
+boot-y                 += vmlinux.lzma.itb
+boot-y                 += vmlinux.lzo.itb
 
 # compressed boot image targets (arch/mips/boot/compressed/)
 bootz-y                        := vmlinuz
@@ -425,4 +437,67 @@ define archhelp
        echo '  dtbs_install         - Install dtbs to $(INSTALL_DTBS_PATH)'
        echo
        echo '  These will be default as appropriate for a configured platform.'
+       echo
+       echo '  If you are targeting a system supported by generic kernels you may'
+       echo '  configure the kernel for a given architecture target like so:'
+       echo
+       echo '  {micro32,32,64}{r1,r2,r6}{el,}_defconfig <BOARDS="list of boards">'
+       echo
+       echo '  Otherwise, the following default configurations are available:'
 endef
+
+generic_config_dir = $(srctree)/arch/$(ARCH)/configs/generic
+generic_defconfigs :=
+
+#
+# If the user generates a generic kernel configuration without specifying a
+# list of boards to include the config fragments for, default to including all
+# available board config fragments.
+#
+ifeq ($(BOARDS),)
+BOARDS = $(patsubst board-%.config,%,$(notdir $(wildcard $(generic_config_dir)/board-*.config)))
+endif
+
+#
+# Generic kernel configurations which merge generic_defconfig with the
+# appropriate config fragments from arch/mips/configs/generic/, resulting in
+# the ability to easily configure the kernel for a given architecture,
+# endianness & set of boards without duplicating the needed configuration in
+# hundreds of defconfig files.
+#
+define gen_generic_defconfigs
+$(foreach bits,$(1),$(foreach rev,$(2),$(foreach endian,$(3),
+target := $(bits)$(rev)$(filter el,$(endian))_defconfig
+generic_defconfigs += $$(target)
+$$(target): $(generic_config_dir)/$(bits)$(rev).config
+$$(target): $(generic_config_dir)/$(endian).config
+)))
+endef
+
+$(eval $(call gen_generic_defconfigs,32 64,r1 r2 r6,eb el))
+$(eval $(call gen_generic_defconfigs,micro32,r2,eb el))
+
+.PHONY: $(generic_defconfigs)
+$(generic_defconfigs):
+       $(Q)$(CONFIG_SHELL) $(srctree)/scripts/kconfig/merge_config.sh \
+               -m -O $(objtree) $(srctree)/arch/$(ARCH)/configs/generic_defconfig $^ \
+               $(foreach board,$(BOARDS),$(generic_config_dir)/board-$(board).config)
+       $(Q)$(MAKE) olddefconfig
+
+#
+# Prevent generic merge_config rules attempting to merge single fragments
+#
+$(generic_config_dir)/%.config: ;
+
+#
+# Legacy defconfig compatibility - these targets used to be real defconfigs but
+# now that the boards have been converted to use the generic kernel they are
+# wrappers around the generic rules above.
+#
+.PHONY: sead3_defconfig
+sead3_defconfig:
+       $(Q)$(MAKE) 32r2el_defconfig BOARDS=sead-3
+
+.PHONY: sead3micro_defconfig
+sead3micro_defconfig:
+       $(Q)$(MAKE) micro32r2el_defconfig BOARDS=sead-3
index 2902138b3e0f56f639896c571e5bc87172f74d2f..7faaa6d593a74c119cfe6523e4c3e29009b8116a 100644 (file)
@@ -48,17 +48,17 @@ void __init plat_mem_setup(void)
                clear_c0_config(1 << 19); /* Clear Config[OD] */
 
        hw_coherentio = 0;
-       coherentio = 1;
+       coherentio = IO_COHERENCE_ENABLED;
        switch (alchemy_get_cputype()) {
        case ALCHEMY_CPU_AU1000:
        case ALCHEMY_CPU_AU1500:
        case ALCHEMY_CPU_AU1100:
-               coherentio = 0;
+               coherentio = IO_COHERENCE_DISABLED;
                break;
        case ALCHEMY_CPU_AU1200:
                /* Au1200 AB USB does not support coherent memory */
                if (0 == (read_c0_prid() & PRID_REV_MASK))
-                       coherentio = 0;
+                       coherentio = IO_COHERENCE_DISABLED;
                break;
        }
 
index df761d38f7fc989bd0ec51df191661a35278e64d..e3c9872a4aa5d317ba3cdcf060a52586aeea5203 100644 (file)
@@ -1,4 +1,7 @@
 /*
+ * 8250 UART probe driver for the BCM47XX platforms
+ * Author: Aurelien Jarno
+ *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
@@ -6,7 +9,6 @@
  * Copyright (C) 2007 Aurelien Jarno <aurelien@aurel32.net>
  */
 
-#include <linux/module.h>
 #include <linux/init.h>
 #include <linux/serial.h>
 #include <linux/serial_8250.h>
@@ -88,9 +90,4 @@ static int __init uart8250_init(void)
        }
        return -EINVAL;
 }
-
-module_init(uart8250_init);
-
-MODULE_AUTHOR("Aurelien Jarno <aurelien@aurel32.net>");
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("8250 UART probe driver for the BCM47XX platforms");
+device_initcall(uart8250_init);
index 637565284732d396354b5a1eb056bb74e2703380..b49fc9cb9cad2de2c3768ba93e54da41177ff0a7 100644 (file)
@@ -326,6 +326,9 @@ EXPORT_SYMBOL(clk_enable);
 
 void clk_disable(struct clk *clk)
 {
+       if (!clk)
+               return;
+
        mutex_lock(&clocks_mutex);
        clk_disable_unlocked(clk);
        mutex_unlock(&clocks_mutex);
index 264328d528c72e40ad12c6b8bbff2bbb9ccda742..2d60f25403de1128833a2b525061acf97c46cfbe 100644 (file)
@@ -21,10 +21,6 @@ config DT_BCM93384WVG_VIPER
        bool "BCM93384WVG Viper CPU (EXPERIMENTAL)"
        select BUILTIN_DTB
 
-config DT_BCM96358NB4SER
-       bool "BCM96358NB4SER"
-       select BUILTIN_DTB
-
 config DT_BCM96368MVWG
        bool "BCM96368MVWG"
        select BUILTIN_DTB
@@ -65,6 +61,22 @@ config DT_BCM97435SVMB
        bool "BCM97435SVMB"
        select BUILTIN_DTB
 
+config DT_COMTREND_VR3032U
+       bool "Comtrend VR-3032u"
+       select BUILTIN_DTB
+
+config DT_NETGEAR_CVG834G
+       bool "NETGEAR CVG834G"
+       select BUILTIN_DTB
+
+config DT_SFR_NEUFBOX4_SERCOMM
+       bool "SFR Neufbox 4 (Sercomm)"
+       select BUILTIN_DTB
+
+config DT_SFR_NEUFBOX6_SERCOMM
+       bool "SFR Neufbox 6 (Sercomm)"
+       select BUILTIN_DTB
+
 endchoice
 
 endif
index 6776042679dd263a9c862e79942946afaff9a37e..3b6f687f177cdf5b3826e10978a1bd465ed2a96e 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/of.h>
 #include <linux/of_fdt.h>
 #include <linux/of_platform.h>
+#include <linux/libfdt.h>
 #include <linux/smp.h>
 #include <asm/addrspace.h>
 #include <asm/bmips.h>
@@ -98,7 +99,7 @@ static void bcm6328_quirks(void)
 static void bcm6358_quirks(void)
 {
        /*
-        * BCM6358 needs special handling for its shared TLB, so
+        * BCM3368/BCM6358 need special handling for their shared TLB, so
         * disable SMP for now
         */
        bmips_smp_enabled = 0;
@@ -110,10 +111,12 @@ static void bcm6368_quirks(void)
 }
 
 static const struct bmips_quirk bmips_quirk_list[] = {
+       { "brcm,bcm3368",               &bcm6358_quirks                 },
        { "brcm,bcm3384-viper",         &bcm3384_viper_quirks           },
        { "brcm,bcm33843-viper",        &bcm3384_viper_quirks           },
        { "brcm,bcm6328",               &bcm6328_quirks                 },
        { "brcm,bcm6358",               &bcm6358_quirks                 },
+       { "brcm,bcm6362",               &bcm6368_quirks                 },
        { "brcm,bcm6368",               &bcm6368_quirks                 },
        { "brcm,bcm63168",              &bcm6368_quirks                 },
        { "brcm,bcm63268",              &bcm6368_quirks                 },
@@ -150,6 +153,8 @@ void __init plat_time_init(void)
        mips_hpt_frequency = freq;
 }
 
+extern const char __appended_dtb;
+
 void __init plat_mem_setup(void)
 {
        void *dtb;
@@ -159,6 +164,11 @@ void __init plat_mem_setup(void)
        ioport_resource.start = 0;
        ioport_resource.end = ~0;
 
+#ifdef CONFIG_MIPS_ELF_APPENDED_DTB
+       if (!fdt_check_header(&__appended_dtb))
+               dtb = (void *)&__appended_dtb;
+       else
+#endif
        /* intended to somewhat resemble ARM; see Documentation/arm/Booting */
        if (fw_arg0 == 0 && fw_arg1 == 0xffffffff)
                dtb = phys_to_virt(fw_arg2);
index acb1988f354edc58072399a076656c0f2ffd149e..2728a9a9c7c5bc4f822ab6788e1f889cb39c0a51 100644 (file)
@@ -100,3 +100,69 @@ $(obj)/uImage.lzo: $(obj)/vmlinux.bin.lzo FORCE
 $(obj)/uImage: $(obj)/uImage.$(suffix-y)
        @ln -sf $(notdir $<) $@
        @echo '  Image $@ is ready'
+
+#
+# Flattened Image Tree (.itb) images
+#
+
+targets += vmlinux.itb
+targets += vmlinux.gz.itb
+targets += vmlinux.bz2.itb
+targets += vmlinux.lzma.itb
+targets += vmlinux.lzo.itb
+
+ifeq ($(ADDR_BITS),32)
+       itb_addr_cells = 1
+endif
+ifeq ($(ADDR_BITS),64)
+       itb_addr_cells = 2
+endif
+
+quiet_cmd_cpp_its_S = ITS     $@
+      cmd_cpp_its_S = $(CPP) $(cpp_flags) -P -C -o $@ $< \
+                       -DKERNEL_NAME="\"Linux $(KERNELRELEASE)\"" \
+                       -DVMLINUX_BINARY="\"$(3)\"" \
+                       -DVMLINUX_COMPRESSION="\"$(2)\"" \
+                       -DVMLINUX_LOAD_ADDRESS=$(VMLINUX_LOAD_ADDRESS) \
+                       -DVMLINUX_ENTRY_ADDRESS=$(VMLINUX_ENTRY_ADDRESS) \
+                       -DADDR_BITS=$(ADDR_BITS) \
+                       -DADDR_CELLS=$(itb_addr_cells)
+
+$(obj)/vmlinux.its: $(srctree)/arch/mips/$(PLATFORM)/vmlinux.its.S FORCE
+       $(call if_changed_dep,cpp_its_S,none,vmlinux.bin)
+
+$(obj)/vmlinux.gz.its: $(srctree)/arch/mips/$(PLATFORM)/vmlinux.its.S FORCE
+       $(call if_changed_dep,cpp_its_S,gzip,vmlinux.bin.gz)
+
+$(obj)/vmlinux.bz2.its: $(srctree)/arch/mips/$(PLATFORM)/vmlinux.its.S FORCE
+       $(call if_changed_dep,cpp_its_S,bzip2,vmlinux.bin.bz2)
+
+$(obj)/vmlinux.lzma.its: $(srctree)/arch/mips/$(PLATFORM)/vmlinux.its.S FORCE
+       $(call if_changed_dep,cpp_its_S,lzma,vmlinux.bin.lzma)
+
+$(obj)/vmlinux.lzo.its: $(srctree)/arch/mips/$(PLATFORM)/vmlinux.its.S FORCE
+       $(call if_changed_dep,cpp_its_S,lzo,vmlinux.bin.lzo)
+
+quiet_cmd_itb-image = ITB     $@
+      cmd_itb-image = \
+               env PATH="$(objtree)/scripts/dtc:$(PATH)" \
+               $(CONFIG_SHELL) $(MKIMAGE) \
+               -D "-I dts -O dtb -p 500 \
+                       --include $(objtree)/arch/mips \
+                       --warning no-unit_address_vs_reg" \
+               -f $(2) $@
+
+$(obj)/vmlinux.itb: $(obj)/vmlinux.its $(obj)/vmlinux.bin FORCE
+       $(call if_changed,itb-image,$<)
+
+$(obj)/vmlinux.gz.itb: $(obj)/vmlinux.gz.its $(obj)/vmlinux.bin.gz FORCE
+       $(call if_changed,itb-image,$<)
+
+$(obj)/vmlinux.bz2.itb: $(obj)/vmlinux.bz2.its $(obj)/vmlinux.bin.bz2 FORCE
+       $(call if_changed,itb-image,$<)
+
+$(obj)/vmlinux.lzma.itb: $(obj)/vmlinux.lzma.its $(obj)/vmlinux.bin.lzma FORCE
+       $(call if_changed,itb-image,$<)
+
+$(obj)/vmlinux.lzo.itb: $(obj)/vmlinux.lzo.its $(obj)/vmlinux.bin.lzo FORCE
+       $(call if_changed,itb-image,$<)
index fda9d387cc08640067ef93650aa01e78d4536007..d61bc2aebf69b423ba65fe1d02284fb2feed0814 100644 (file)
@@ -1,6 +1,5 @@
 dtb-$(CONFIG_DT_BCM93384WVG)           += bcm93384wvg.dtb
 dtb-$(CONFIG_DT_BCM93384WVG_VIPER)     += bcm93384wvg_viper.dtb
-dtb-$(CONFIG_DT_BCM96358NB4SER)                += bcm96358nb4ser.dtb
 dtb-$(CONFIG_DT_BCM96368MVWG)          += bcm96368mvwg.dtb
 dtb-$(CONFIG_DT_BCM9EJTAGPRB)          += bcm9ejtagprb.dtb
 dtb-$(CONFIG_DT_BCM97125CBMB)          += bcm97125cbmb.dtb
@@ -11,20 +10,29 @@ dtb-$(CONFIG_DT_BCM97362SVMB)               += bcm97362svmb.dtb
 dtb-$(CONFIG_DT_BCM97420C)             += bcm97420c.dtb
 dtb-$(CONFIG_DT_BCM97425SVMB)          += bcm97425svmb.dtb
 dtb-$(CONFIG_DT_BCM97435SVMB)          += bcm97435svmb.dtb
+dtb-$(CONFIG_DT_COMTREND_VR3032U)      += bcm63268-comtrend-vr-3032u.dtb
+dtb-$(CONFIG_DT_NETGEAR_CVG834G)       += bcm3368-netgear-cvg834g.dtb
+dtb-$(CONFIG_DT_SFR_NEUFBOX4_SERCOMM)  += bcm6358-neufbox4-sercomm.dtb
+dtb-$(CONFIG_DT_SFR_NEUFBOX6_SERCOMM)  += bcm6362-neufbox6-sercomm.dtb
 
-dtb-$(CONFIG_DT_NONE)                  += \
-                                               bcm93384wvg.dtb         \
-                                               bcm93384wvg_viper.dtb   \
-                                               bcm96358nb4ser.dtb      \
-                                               bcm96368mvwg.dtb        \
-                                               bcm9ejtagprb.dtb        \
-                                               bcm97125cbmb.dtb        \
-                                               bcm97346dbsmb.dtb       \
-                                               bcm97358svmb.dtb        \
-                                               bcm97360svmb.dtb        \
-                                               bcm97362svmb.dtb        \
-                                               bcm97420c.dtb           \
-                                               bcm97425svmb.dtb
+dtb-$(CONFIG_DT_NONE) += \
+       bcm3368-netgear-cvg834g.dtb \
+       bcm6358-neufbox4-sercomm.dtb \
+       bcm6362-neufbox6-sercomm.dtb \
+       bcm63268-comtrend-vr-3032u.dtb \
+       bcm93384wvg.dtb \
+       bcm93384wvg_viper.dtb \
+       bcm96358nb4ser.dtb \
+       bcm96368mvwg.dtb \
+       bcm9ejtagprb.dtb \
+       bcm97125cbmb.dtb \
+       bcm97346dbsmb.dtb \
+       bcm97358svmb.dtb \
+       bcm97360svmb.dtb \
+       bcm97362svmb.dtb \
+       bcm97420c.dtb \
+       bcm97425svmb.dtb \
+       bcm97435svmb.dtb
 
 obj-y                          += $(patsubst %.dtb, %.dtb.o, $(dtb-y))
 
diff --git a/arch/mips/boot/dts/brcm/bcm3368-netgear-cvg834g.dts b/arch/mips/boot/dts/brcm/bcm3368-netgear-cvg834g.dts
new file mode 100644 (file)
index 0000000..2f2e80f
--- /dev/null
@@ -0,0 +1,22 @@
+/dts-v1/;
+
+/include/ "bcm3368.dtsi"
+
+/ {
+       compatible = "netgear,cvg834g", "brcm,bcm3368";
+       model = "NETGEAR CVG834G";
+
+       memory@0 {
+               device_type = "memory";
+               reg = <0x00000000 0x02000000>;
+       };
+
+       chosen {
+               bootargs = "console=ttyS0,115200";
+               stdout-path = &uart0;
+       };
+};
+
+&uart0 {
+       status = "okay";
+};
diff --git a/arch/mips/boot/dts/brcm/bcm3368.dtsi b/arch/mips/boot/dts/brcm/bcm3368.dtsi
new file mode 100644 (file)
index 0000000..bee855c
--- /dev/null
@@ -0,0 +1,101 @@
+/ {
+       #address-cells = <1>;
+       #size-cells = <1>;
+       compatible = "brcm,bcm3368";
+
+       cpus {
+               #address-cells = <1>;
+               #size-cells = <0>;
+
+               mips-hpt-frequency = <150000000>;
+
+               cpu@0 {
+                       compatible = "brcm,bmips4350";
+                       device_type = "cpu";
+                       reg = <0>;
+               };
+
+               cpu@1 {
+                       compatible = "brcm,bmips4350";
+                       device_type = "cpu";
+                       reg = <1>;
+               };
+       };
+
+       clocks {
+               periph_clk: periph-clk {
+                       compatible = "fixed-clock";
+                       #clock-cells = <0>;
+                       clock-frequency = <50000000>;
+               };
+       };
+
+       aliases {
+               serial0 = &uart0;
+               serial1 = &uart1;
+       };
+
+       cpu_intc: interrupt-controller {
+               #address-cells = <0>;
+               compatible = "mti,cpu-interrupt-controller";
+
+               interrupt-controller;
+               #interrupt-cells = <1>;
+       };
+
+       ubus {
+               #address-cells = <1>;
+               #size-cells = <1>;
+
+               compatible = "simple-bus";
+               ranges;
+
+               periph_cntl: syscon@fff8c000 {
+                       compatible = "syscon";
+                       reg = <0xfff8c000 0xc>;
+                       native-endian;
+               };
+
+               reboot: syscon-reboot@fff8c008 {
+                       compatible = "syscon-reboot";
+                       regmap = <&periph_cntl>;
+                       offset = <0x8>;
+                       mask = <0x1>;
+               };
+
+               periph_intc: interrupt-controller@fff8c00c {
+                       compatible = "brcm,bcm6345-l1-intc";
+                       reg = <0xfff8c00c 0x8>;
+
+                       interrupt-controller;
+                       #interrupt-cells = <1>;
+
+                       interrupt-parent = <&cpu_intc>;
+                       interrupts = <2>;
+               };
+
+               uart0: serial@fff8c100 {
+                       compatible = "brcm,bcm6345-uart";
+                       reg = <0xfff8c100 0x18>;
+
+                       interrupt-parent = <&periph_intc>;
+                       interrupts = <2>;
+
+                       clocks = <&periph_clk>;
+
+                       status = "disabled";
+               };
+
+               uart1: serial@fff8c120 {
+                       compatible = "brcm,bcm6345-uart";
+                       reg = <0xfff8c120 0x18>;
+
+                       interrupt-parent = <&periph_intc>;
+                       interrupts = <3>;
+
+                       clocks = <&periph_clk>;
+
+                       status = "disabled";
+               };
+       };
+};
diff --git a/arch/mips/boot/dts/brcm/bcm63268-comtrend-vr-3032u.dts b/arch/mips/boot/dts/brcm/bcm63268-comtrend-vr-3032u.dts
new file mode 100644 (file)
index 0000000..430d35c
--- /dev/null
@@ -0,0 +1,108 @@
+/dts-v1/;
+
+/include/ "bcm63268.dtsi"
+
+/ {
+       compatible = "comtrend,vr-3032u", "brcm,bcm63268";
+       model = "Comtrend VR-3032u";
+
+       memory@0 {
+               device_type = "memory";
+               reg = <0x00000000 0x04000000>;
+       };
+
+       chosen {
+               bootargs = "console=ttyS0,115200";
+               stdout-path = &uart0;
+       };
+};
+
+&leds0 {
+       status = "ok";
+       brcm,serial-leds;
+       brcm,serial-dat-low;
+       brcm,serial-shift-inv;
+
+       led@0 {
+               reg = <0>;
+               brcm,hardware-controlled;
+               brcm,link-signal-sources = <0>;
+               /* GPHY0 Speed 0 */
+       };
+       led@1 {
+               reg = <1>;
+               brcm,hardware-controlled;
+               brcm,link-signal-sources = <1>;
+               /* GPHY0 Speed 1 */
+       };
+       led@2 {
+               reg = <2>;
+               active-low;
+               label = "vr-3032u:red:inet";
+       };
+       led@3 {
+               reg = <3>;
+               active-low;
+               label = "vr-3032u:green:dsl";
+       };
+       led@4 {
+               reg = <4>;
+               active-low;
+               label = "vr-3032u:green:usb";
+       };
+       led@7 {
+               reg = <7>;
+               active-low;
+               label = "vr-3032u:green:wps";
+       };
+       led@8 {
+               reg = <8>;
+               active-low;
+               label = "vr-3032u:green:inet";
+       };
+       led@9 {
+               reg = <9>;
+               brcm,hardware-controlled;
+               /* EPHY0 Activity */
+       };
+       led@10 {
+               reg = <10>;
+               brcm,hardware-controlled;
+               /* EPHY1 Activity */
+       };
+       led@11 {
+               reg = <11>;
+               brcm,hardware-controlled;
+               /* EPHY2 Activity */
+       };
+       led@12 {
+               reg = <12>;
+               brcm,hardware-controlled;
+               /* GPHY0 Activity */
+       };
+       led@13 {
+               reg = <13>;
+               brcm,hardware-controlled;
+               /* EPHY0 Speed */
+       };
+       led@14 {
+               reg = <14>;
+               brcm,hardware-controlled;
+               /* EPHY1 Speed */
+       };
+       led@15 {
+               reg = <15>;
+               brcm,hardware-controlled;
+               /* EPHY2 Speed */
+       };
+       led@20 {
+               reg = <20>;
+               active-low;
+               label = "vr-3032u:green:power";
+               default-state = "on";
+       };
+};
+
+&uart0 {
+       status = "okay";
+};
diff --git a/arch/mips/boot/dts/brcm/bcm63268.dtsi b/arch/mips/boot/dts/brcm/bcm63268.dtsi
new file mode 100644 (file)
index 0000000..7e6bf2c
--- /dev/null
@@ -0,0 +1,134 @@
+/ {
+       #address-cells = <1>;
+       #size-cells = <1>;
+       compatible = "brcm,bcm63268";
+
+       cpus {
+               #address-cells = <1>;
+               #size-cells = <0>;
+
+               mips-hpt-frequency = <200000000>;
+
+               cpu@0 {
+                       compatible = "brcm,bmips4350";
+                       device_type = "cpu";
+                       reg = <0>;
+               };
+
+               cpu@1 {
+                       compatible = "brcm,bmips4350";
+                       device_type = "cpu";
+                       reg = <1>;
+               };
+       };
+
+       clocks {
+               periph_clk: periph-clk {
+                       compatible = "fixed-clock";
+                       #clock-cells = <0>;
+                       clock-frequency = <50000000>;
+               };
+       };
+
+       aliases {
+               serial0 = &uart0;
+               serial1 = &uart1;
+       };
+
+       cpu_intc: interrupt-controller {
+               #address-cells = <0>;
+               compatible = "mti,cpu-interrupt-controller";
+
+               interrupt-controller;
+               #interrupt-cells = <1>;
+       };
+
+       ubus {
+               #address-cells = <1>;
+               #size-cells = <1>;
+
+               compatible = "simple-bus";
+               ranges;
+
+               periph_cntl: syscon@10000000 {
+                       compatible = "syscon";
+                       reg = <0x10000000 0x14>;
+                       native-endian;
+               };
+
+               reboot: syscon-reboot@10000008 {
+                       compatible = "syscon-reboot";
+                       regmap = <&periph_cntl>;
+                       offset = <0x8>;
+                       mask = <0x1>;
+               };
+
+               periph_intc: interrupt-controller@10000020 {
+                       compatible = "brcm,bcm6345-l1-intc";
+                       reg = <0x10000020 0x20>,
+                             <0x10000040 0x20>;
+
+                       interrupt-controller;
+                       #interrupt-cells = <1>;
+
+                       interrupt-parent = <&cpu_intc>;
+                       interrupts = <2>, <3>;
+               };
+
+               uart0: serial@10000180 {
+                       compatible = "brcm,bcm6345-uart";
+                       reg = <0x10000180 0x18>;
+
+                       interrupt-parent = <&periph_intc>;
+                       interrupts = <5>;
+
+                       clocks = <&periph_clk>;
+
+                       status = "disabled";
+               };
+
+               uart1: serial@100001a0 {
+                       compatible = "brcm,bcm6345-uart";
+                       reg = <0x100001a0 0x18>;
+
+                       interrupt-parent = <&periph_intc>;
+                       interrupts = <34>;
+
+                       clocks = <&periph_clk>;
+
+                       status = "disabled";
+               };
+
+               leds0: led-controller@10001900 {
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+                       compatible = "brcm,bcm6328-leds";
+                       reg = <0x10001900 0x24>;
+
+                       status = "disabled";
+               };
+
+               ehci: usb@10002500 {
+                       compatible = "brcm,bcm63268-ehci", "generic-ehci";
+                       reg = <0x10002500 0x100>;
+                       big-endian;
+
+                       interrupt-parent = <&periph_intc>;
+                       interrupts = <10>;
+
+                       status = "disabled";
+               };
+
+               ohci: usb@10002600 {
+                       compatible = "brcm,bcm63268-ohci", "generic-ohci";
+                       reg = <0x10002600 0x100>;
+                       big-endian;
+                       no-big-frame-no;
+
+                       interrupt-parent = <&periph_intc>;
+                       interrupts = <9>;
+
+                       status = "disabled";
+               };
+       };
+};
diff --git a/arch/mips/boot/dts/brcm/bcm6358-neufbox4-sercomm.dts b/arch/mips/boot/dts/brcm/bcm6358-neufbox4-sercomm.dts
new file mode 100644 (file)
index 0000000..702eae2
--- /dev/null
@@ -0,0 +1,47 @@
+/dts-v1/;
+
+/include/ "bcm6358.dtsi"
+
+/ {
+       compatible = "sfr,nb4-ser", "brcm,bcm6358";
+       model = "SFR Neufbox 4 (Sercomm)";
+
+       memory@0 {
+               device_type = "memory";
+               reg = <0x00000000 0x02000000>;
+       };
+
+       chosen {
+               bootargs = "console=ttyS0,115200";
+               stdout-path = &uart0;
+       };
+};
+
+&leds0 {
+       status = "ok";
+
+       led@0 {
+               reg = <0>;
+               active-low;
+               label = "nb4-ser:white:alarm";
+       };
+       led@2 {
+               reg = <2>;
+               active-low;
+               label = "nb4-ser:white:tv";
+       };
+       led@3 {
+               reg = <3>;
+               active-low;
+               label = "nb4-ser:white:tel";
+       };
+       led@4 {
+               reg = <4>;
+               active-low;
+               label = "nb4-ser:white:adsl";
+       };
+};
+
+&uart0 {
+       status = "okay";
+};
diff --git a/arch/mips/boot/dts/brcm/bcm6362-neufbox6-sercomm.dts b/arch/mips/boot/dts/brcm/bcm6362-neufbox6-sercomm.dts
new file mode 100644 (file)
index 0000000..480f2a5
--- /dev/null
@@ -0,0 +1,22 @@
+/dts-v1/;
+
+/include/ "bcm6362.dtsi"
+
+/ {
+       compatible = "sfr,nb6-ser", "brcm,bcm6362";
+       model = "SFR NeufBox 6 (Sercomm)";
+
+       memory@0 {
+               device_type = "memory";
+               reg = <0x00000000 0x08000000>;
+       };
+
+       chosen {
+               bootargs = "console=ttyS0,115200";
+               stdout-path = &uart0;
+       };
+};
+
+&uart0 {
+       status = "okay";
+};
diff --git a/arch/mips/boot/dts/brcm/bcm6362.dtsi b/arch/mips/boot/dts/brcm/bcm6362.dtsi
new file mode 100644 (file)
index 0000000..c507da5
--- /dev/null
@@ -0,0 +1,134 @@
+/ {
+       #address-cells = <1>;
+       #size-cells = <1>;
+       compatible = "brcm,bcm6362";
+
+       cpus {
+               #address-cells = <1>;
+               #size-cells = <0>;
+
+               mips-hpt-frequency = <200000000>;
+
+               cpu@0 {
+                       compatible = "brcm,bmips4350";
+                       device_type = "cpu";
+                       reg = <0>;
+               };
+
+               cpu@1 {
+                       compatible = "brcm,bmips4350";
+                       device_type = "cpu";
+                       reg = <1>;
+               };
+       };
+
+       clocks {
+               periph_clk: periph-clk {
+                       compatible = "fixed-clock";
+                       #clock-cells = <0>;
+                       clock-frequency = <50000000>;
+               };
+       };
+
+       aliases {
+               serial0 = &uart0;
+               serial1 = &uart1;
+       };
+
+       cpu_intc: interrupt-controller {
+               #address-cells = <0>;
+               compatible = "mti,cpu-interrupt-controller";
+
+               interrupt-controller;
+               #interrupt-cells = <1>;
+       };
+
+       ubus {
+               #address-cells = <1>;
+               #size-cells = <1>;
+
+               compatible = "simple-bus";
+               ranges;
+
+               periph_cntl: syscon@10000000 {
+                       compatible = "syscon";
+                       reg = <0x10000000 0x14>;
+                       native-endian;
+               };
+
+               reboot: syscon-reboot@10000008 {
+                       compatible = "syscon-reboot";
+                       regmap = <&periph_cntl>;
+                       offset = <0x8>;
+                       mask = <0x1>;
+               };
+
+               periph_intc: interrupt-controller@10000020 {
+                       compatible = "brcm,bcm6345-l1-intc";
+                       reg = <0x10000020 0x10>,
+                             <0x10000030 0x10>;
+
+                       interrupt-controller;
+                       #interrupt-cells = <1>;
+
+                       interrupt-parent = <&cpu_intc>;
+                       interrupts = <2>, <3>;
+               };
+
+               uart0: serial@10000100 {
+                       compatible = "brcm,bcm6345-uart";
+                       reg = <0x10000100 0x18>;
+
+                       interrupt-parent = <&periph_intc>;
+                       interrupts = <3>;
+
+                       clocks = <&periph_clk>;
+
+                       status = "disabled";
+               };
+
+               uart1: serial@10000120 {
+                       compatible = "brcm,bcm6345-uart";
+                       reg = <0x10000120 0x18>;
+
+                       interrupt-parent = <&periph_intc>;
+                       interrupts = <4>;
+
+                       clocks = <&periph_clk>;
+
+                       status = "disabled";
+               };
+
+               leds0: led-controller@10001900 {
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+                       compatible = "brcm,bcm6328-leds";
+                       reg = <0x10001900 0x24>;
+
+                       status = "disabled";
+               };
+
+               ehci: usb@10002500 {
+                       compatible = "brcm,bcm6362-ehci", "generic-ehci";
+                       reg = <0x10002500 0x100>;
+                       big-endian;
+
+                       interrupt-parent = <&periph_intc>;
+                       interrupts = <10>;
+
+                       status = "disabled";
+               };
+
+               ohci: usb@10002600 {
+                       compatible = "brcm,bcm6362-ohci", "generic-ohci";
+                       reg = <0x10002600 0x100>;
+                       big-endian;
+                       no-big-frame-no;
+
+                       interrupt-parent = <&periph_intc>;
+                       interrupts = <9>;
+
+                       status = "disabled";
+               };
+       };
+};
index 550e1d9e3ee039eb06f14816bde573a03268ef4c..bbd00f65ce397a7a83b762f7dc5da316efa427fb 100644 (file)
@@ -26,7 +26,7 @@
                uart0 = &uart0;
        };
 
-       cpu_intc: cpu_intc {
+       cpu_intc: interrupt-controller {
                #address-cells = <0>;
                compatible = "mti,cpu-interrupt-controller";
 
                        #clock-cells = <0>;
                        clock-frequency = <81000000>;
                };
+
+               upg_clk: upg_clk {
+                       compatible = "fixed-clock";
+                       #clock-cells = <0>;
+                       clock-frequency = <27000000>;
+               };
        };
 
        rdb {
@@ -49,7 +55,7 @@
                compatible = "simple-bus";
                ranges = <0 0x10000000 0x01000000>;
 
-               periph_intc: periph_intc@441400 {
+               periph_intc: interrupt-controller@441400 {
                        compatible = "brcm,bcm7038-l1-intc";
                        reg = <0x441400 0x30>, <0x441600 0x30>;
 
@@ -60,7 +66,7 @@
                        interrupts = <2>, <3>;
                };
 
-               sun_l2_intc: sun_l2_intc@401800 {
+               sun_l2_intc: interrupt-controller@401800 {
                        compatible = "brcm,l2-intc";
                        reg = <0x401800 0x30>;
                        interrupt-controller;
@@ -81,7 +87,7 @@
                                                     "avd_0", "jtag_0";
                };
 
-               upg_irq0_intc: upg_irq0_intc@406780 {
+               upg_irq0_intc: interrupt-controller@406780 {
                        compatible = "brcm,bcm7120-l2-intc";
                        reg = <0x406780 0x8>;
 
                      status = "disabled";
                };
 
+               pwma: pwm@406580 {
+                       compatible = "brcm,bcm7038-pwm";
+                       reg = <0x406580 0x28>;
+                       #pwm-cells = <2>;
+                       clocks = <&upg_clk>;
+                       status = "disabled";
+               };
+
+               upg_gio: gpio@406700 {
+                       compatible = "brcm,brcmstb-gpio";
+                       reg = <0x406700 0x80>;
+                       #gpio-cells = <2>;
+                       #interrupt-cells = <2>;
+                       gpio-controller;
+                       interrupt-controller;
+                       interrupt-parent = <&upg_irq0_intc>;
+                       interrupts = <6>;
+                       brcm,gpio-bank-widths = <32 32 32 18>;
+               };
+
                ehci0: usb@488300 {
                        compatible = "brcm,bcm7125-ehci", "generic-ehci";
                        reg = <0x488300 0x100>;
index ec959061d52e30ef96c71fd78bd07f72b63c124e..4bbcc95f1c15d6dee9f2124d4318d60fba246b99 100644 (file)
@@ -26,7 +26,7 @@
                uart0 = &uart0;
        };
 
-       cpu_intc: cpu_intc {
+       cpu_intc: interrupt-controller {
                #address-cells = <0>;
                compatible = "mti,cpu-interrupt-controller";
 
                        #clock-cells = <0>;
                        clock-frequency = <81000000>;
                };
+
+               upg_clk: upg_clk {
+                       compatible = "fixed-clock";
+                       #clock-cells = <0>;
+                       clock-frequency = <27000000>;
+               };
        };
 
        rdb {
@@ -49,7 +55,7 @@
                compatible = "simple-bus";
                ranges = <0 0x10000000 0x01000000>;
 
-               periph_intc: periph_intc@411400 {
+               periph_intc: interrupt-controller@411400 {
                        compatible = "brcm,bcm7038-l1-intc";
                        reg = <0x411400 0x30>, <0x411600 0x30>;
 
@@ -60,7 +66,7 @@
                        interrupts = <2>, <3>;
                };
 
-               sun_l2_intc: sun_l2_intc@403000 {
+               sun_l2_intc: interrupt-controller@403000 {
                        compatible = "brcm,l2-intc";
                        reg = <0x403000 0x30>;
                        interrupt-controller;
@@ -81,7 +87,7 @@
                                                     "jtag_0", "svd_0";
                };
 
-               upg_irq0_intc: upg_irq0_intc@406780 {
+               upg_irq0_intc: interrupt-controller@406780 {
                        compatible = "brcm,bcm7120-l2-intc";
                        reg = <0x406780 0x8>;
 
                        interrupt-names = "upg_main", "upg_bsc";
                };
 
-               upg_aon_irq0_intc: upg_aon_irq0_intc@408b80 {
+               upg_aon_irq0_intc: interrupt-controller@408b80 {
                        compatible = "brcm,bcm7120-l2-intc";
                        reg = <0x408b80 0x8>;
 
                      status = "disabled";
                };
 
+               pwma: pwm@406580 {
+                       compatible = "brcm,bcm7038-pwm";
+                       reg = <0x406580 0x28>;
+                       #pwm-cells = <2>;
+                       clocks = <&upg_clk>;
+                       status = "disabled";
+               };
+
+               pwmb: pwm@406800 {
+                       compatible = "brcm,bcm7038-pwm";
+                       reg = <0x406800 0x28>;
+                       #pwm-cells = <2>;
+                       clocks = <&upg_clk>;
+                       status = "disabled";
+               };
+
+               aon_pm_l2_intc: interrupt-controller@408440 {
+                       compatible = "brcm,l2-intc";
+                       reg = <0x408440 0x30>;
+                       interrupt-controller;
+                       #interrupt-cells = <1>;
+                       interrupt-parent = <&periph_intc>;
+                       interrupts = <53>;
+                       brcm,irq-can-wake;
+               };
+
+               upg_gio: gpio@406700 {
+                       compatible = "brcm,brcmstb-gpio";
+                       reg = <0x406700 0x60>;
+                       #gpio-cells = <2>;
+                       #interrupt-cells = <2>;
+                       gpio-controller;
+                       interrupt-controller;
+                       interrupt-parent = <&upg_irq0_intc>;
+                       interrupts = <6>;
+                       brcm,gpio-bank-widths = <32 32 16>;
+               };
+
+               upg_gio_aon: gpio@408c00 {
+                       compatible = "brcm,brcmstb-gpio";
+                       reg = <0x408c00 0x60>;
+                       #gpio-cells = <2>;
+                       #interrupt-cells = <2>;
+                       gpio-controller;
+                       interrupt-controller;
+                       interrupt-parent = <&upg_aon_irq0_intc>;
+                       interrupts = <6>;
+                       interrupts-extended = <&upg_aon_irq0_intc 6>,
+                                             <&aon_pm_l2_intc 5>;
+                       wakeup-source;
+                       brcm,gpio-bank-widths = <27 32 2>;
+               };
+
                enet0: ethernet@430000 {
                        phy-mode = "internal";
                        phy-handle = <&phy1>;
                        status = "disabled";
                };
 
+               hif_l2_intc: interrupt-controller@411000 {
+                       compatible = "brcm,l2-intc";
+                       reg = <0x411000 0x30>;
+                       interrupt-controller;
+                       #interrupt-cells = <1>;
+                       interrupt-parent = <&periph_intc>;
+                       interrupts = <30>;
+               };
+
+               nand: nand@412800 {
+                       compatible = "brcm,brcmnand-v5.0", "brcm,brcmnand";
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+                       reg-names = "nand";
+                       reg = <0x412800 0x400>;
+                       interrupt-parent = <&hif_l2_intc>;
+                       interrupts = <24>;
+                       status = "disabled";
+               };
+
                sata: sata@181000 {
                        compatible = "brcm,bcm7425-ahci", "brcm,sata3-ahci";
                        reg-names = "ahci", "top-ctrl";
                                #phy-cells = <0>;
                        };
                };
+
+               sdhci0: sdhci@413500 {
+                       compatible = "brcm,bcm7425-sdhci";
+                       reg = <0x413500 0x100>;
+                       interrupt-parent = <&periph_intc>;
+                       interrupts = <85>;
+                       status = "disabled";
+               };
        };
 };
index ca57fb5eb1222e4e42066e4b99d5239a85121791..3e42535c8d290907705172bcdb86387983564c34 100644 (file)
@@ -20,7 +20,7 @@
                uart0 = &uart0;
        };
 
-       cpu_intc: cpu_intc {
+       cpu_intc: interrupt-controller {
                #address-cells = <0>;
                compatible = "mti,cpu-interrupt-controller";
 
                        #clock-cells = <0>;
                        clock-frequency = <81000000>;
                };
+
+               upg_clk: upg_clk {
+                       compatible = "fixed-clock";
+                       #clock-cells = <0>;
+                       clock-frequency = <27000000>;
+               };
        };
 
        rdb {
@@ -43,7 +49,7 @@
                compatible = "simple-bus";
                ranges = <0 0x10000000 0x01000000>;
 
-               periph_intc: periph_intc@411400 {
+               periph_intc: interrupt-controller@411400 {
                        compatible = "brcm,bcm7038-l1-intc";
                        reg = <0x411400 0x30>;
 
@@ -54,7 +60,7 @@
                        interrupts = <2>;
                };
 
-               sun_l2_intc: sun_l2_intc@403000 {
+               sun_l2_intc: interrupt-controller@403000 {
                        compatible = "brcm,l2-intc";
                        reg = <0x403000 0x30>;
                        interrupt-controller;
@@ -75,7 +81,7 @@
                                                     "avd_0", "jtag_0";
                };
 
-               upg_irq0_intc: upg_irq0_intc@406600 {
+               upg_irq0_intc: interrupt-controller@406600 {
                        compatible = "brcm,bcm7120-l2-intc";
                        reg = <0x406600 0x8>;
 
@@ -90,7 +96,7 @@
                        interrupt-names = "upg_main", "upg_bsc";
                };
 
-               upg_aon_irq0_intc: upg_aon_irq0_intc@408b80 {
+               upg_aon_irq0_intc: interrupt-controller@408b80 {
                        compatible = "brcm,bcm7120-l2-intc";
                        reg = <0x408b80 0x8>;
 
                      status = "disabled";
                };
 
+               pwma: pwm@406400 {
+                       compatible = "brcm,bcm7038-pwm";
+                       reg = <0x406400 0x28>;
+                       #pwm-cells = <2>;
+                       clocks = <&upg_clk>;
+                       status = "disabled";
+               };
+
+               pwmb: pwm@406700 {
+                       compatible = "brcm,bcm7038-pwm";
+                       reg = <0x406700 0x28>;
+                       #pwm-cells = <2>;
+                       clocks = <&upg_clk>;
+                       status = "disabled";
+               };
+
+               aon_pm_l2_intc: interrupt-controller@408240 {
+                       compatible = "brcm,l2-intc";
+                       reg = <0x408240 0x30>;
+                       interrupt-controller;
+                       #interrupt-cells = <1>;
+                       interrupt-parent = <&periph_intc>;
+                       interrupts = <50>;
+                       brcm,irq-can-wake;
+               };
+
+               upg_gio: gpio@406500 {
+                       compatible = "brcm,brcmstb-gpio";
+                       reg = <0x406500 0xa0>;
+                       #gpio-cells = <2>;
+                       #interrupt-cells = <2>;
+                       gpio-controller;
+                       interrupt-controller;
+                       interrupt-parent = <&upg_irq0_intc>;
+                       interrupts = <6>;
+                       brcm,gpio-bank-widths = <32 32 32 29 4>;
+               };
+
+               upg_gio_aon: gpio@408c00 {
+                       compatible = "brcm,brcmstb-gpio";
+                       reg = <0x408c00 0x60>;
+                       #gpio-cells = <2>;
+                       #interrupt-cells = <2>;
+                       gpio-controller;
+                       interrupt-controller;
+                       interrupt-parent = <&upg_aon_irq0_intc>;
+                       interrupts = <6>;
+                       interrupts-extended = <&upg_aon_irq0_intc 6>,
+                                             <&aon_pm_l2_intc 5>;
+                       wakeup-source;
+                       brcm,gpio-bank-widths = <21 32 2>;
+               };
+
                enet0: ethernet@430000 {
                        phy-mode = "internal";
                        phy-handle = <&phy1>;
                        interrupts = <66>;
                        status = "disabled";
                };
+
+               hif_l2_intc: interrupt-controller@411000 {
+                       compatible = "brcm,l2-intc";
+                       reg = <0x411000 0x30>;
+                       interrupt-controller;
+                       #interrupt-cells = <1>;
+                       interrupt-parent = <&periph_intc>;
+                       interrupts = <30>;
+               };
+
+               nand: nand@412800 {
+                       compatible = "brcm,brcmnand-v5.0", "brcm,brcmnand";
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+                       reg-names = "nand";
+                       reg = <0x412800 0x400>;
+                       interrupt-parent = <&hif_l2_intc>;
+                       interrupts = <24>;
+                       status = "disabled";
+               };
        };
 };
index 1c0c3d438c7ac42b6df46523077c8804bdaf99b8..112a5571c5961c2b5939a81881967121515bf802 100644 (file)
@@ -20,7 +20,7 @@
                uart0 = &uart0;
        };
 
-       cpu_intc: cpu_intc {
+       cpu_intc: interrupt-controller {
                #address-cells = <0>;
                compatible = "mti,cpu-interrupt-controller";
 
                        #clock-cells = <0>;
                        clock-frequency = <81000000>;
                };
+
+               upg_clk: upg_clk {
+                       compatible = "fixed-clock";
+                       #clock-cells = <0>;
+                       clock-frequency = <27000000>;
+               };
        };
 
        rdb {
@@ -43,7 +49,7 @@
                compatible = "simple-bus";
                ranges = <0 0x10000000 0x01000000>;
 
-               periph_intc: periph_intc@411400 {
+               periph_intc: interrupt-controller@411400 {
                        compatible = "brcm,bcm7038-l1-intc";
                        reg = <0x411400 0x30>;
 
@@ -54,7 +60,7 @@
                        interrupts = <2>;
                };
 
-               sun_l2_intc: sun_l2_intc@403000 {
+               sun_l2_intc: interrupt-controller@403000 {
                        compatible = "brcm,l2-intc";
                        reg = <0x403000 0x30>;
                        interrupt-controller;
@@ -75,7 +81,7 @@
                                                     "avd_0", "jtag_0";
                };
 
-               upg_irq0_intc: upg_irq0_intc@406600 {
+               upg_irq0_intc: interrupt-controller@406600 {
                        compatible = "brcm,bcm7120-l2-intc";
                        reg = <0x406600 0x8>;
 
@@ -90,7 +96,7 @@
                        interrupt-names = "upg_main", "upg_bsc";
                };
 
-               upg_aon_irq0_intc: upg_aon_irq0_intc@408b80 {
+               upg_aon_irq0_intc: interrupt-controller@408b80 {
                        compatible = "brcm,bcm7120-l2-intc";
                        reg = <0x408b80 0x8>;
 
                      status = "disabled";
                };
 
+               pwma: pwm@406400 {
+                       compatible = "brcm,bcm7038-pwm";
+                       reg = <0x406400 0x28>;
+                       #pwm-cells = <2>;
+                       clocks = <&upg_clk>;
+                       status = "disabled";
+               };
+
+               aon_pm_l2_intc: interrupt-controller@408440 {
+                       compatible = "brcm,l2-intc";
+                       reg = <0x408440 0x30>;
+                       interrupt-controller;
+                       #interrupt-cells = <1>;
+                       interrupt-parent = <&periph_intc>;
+                       interrupts = <50>;
+                       brcm,irq-can-wake;
+               };
+
+               upg_gio: gpio@406500 {
+                       compatible = "brcm,brcmstb-gpio";
+                       reg = <0x406500 0xa0>;
+                       #gpio-cells = <2>;
+                       #interrupt-cells = <2>;
+                       gpio-controller;
+                       interrupt-controller;
+                       interrupt-parent = <&upg_irq0_intc>;
+                       interrupts = <6>;
+                       brcm,gpio-bank-widths = <32 32 32 29 4>;
+               };
+
+               upg_gio_aon: gpio@408c00 {
+                       compatible = "brcm,brcmstb-gpio";
+                       reg = <0x408c00 0x60>;
+                       #gpio-cells = <2>;
+                       #interrupt-cells = <2>;
+                       gpio-controller;
+                       interrupt-controller;
+                       interrupt-parent = <&upg_aon_irq0_intc>;
+                       interrupts = <6>;
+                       interrupts-extended = <&upg_aon_irq0_intc 6>,
+                                             <&aon_pm_l2_intc 5>;
+                       wakeup-source;
+                       brcm,gpio-bank-widths = <21 32 2>;
+               };
+
                enet0: ethernet@430000 {
                        phy-mode = "internal";
                        phy-handle = <&phy1>;
                        status = "disabled";
                };
 
+               hif_l2_intc: interrupt-controller@411000 {
+                       compatible = "brcm,l2-intc";
+                       reg = <0x411000 0x30>;
+                       interrupt-controller;
+                       #interrupt-cells = <1>;
+                       interrupt-parent = <&periph_intc>;
+                       interrupts = <30>;
+               };
+
+               nand: nand@412800 {
+                       compatible = "brcm,brcmnand-v5.0", "brcm,brcmnand";
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+                       reg-names = "nand";
+                       reg = <0x412800 0x400>;
+                       interrupt-parent = <&hif_l2_intc>;
+                       interrupts = <24>;
+                       status = "disabled";
+               };
+
                sata: sata@181000 {
                        compatible = "brcm,bcm7425-ahci", "brcm,sata3-ahci";
                        reg-names = "ahci", "top-ctrl";
                                #phy-cells = <0>;
                        };
                };
+
+               sdhci0: sdhci@410000 {
+                       compatible = "brcm,bcm7425-sdhci";
+                       reg = <0x410000 0x100>;
+                       interrupt-parent = <&periph_intc>;
+                       interrupts = <82>;
+                       status = "disabled";
+               };
        };
 };
index 6b4713add4b8a9e176d7556fe2a27360cb7f745d..34abfb0b07e79406e23bd2bd63396aea282992a5 100644 (file)
@@ -26,7 +26,7 @@
                uart0 = &uart0;
        };
 
-       cpu_intc: cpu_intc {
+       cpu_intc: interrupt-controller {
                #address-cells = <0>;
                compatible = "mti,cpu-interrupt-controller";
 
                        #clock-cells = <0>;
                        clock-frequency = <81000000>;
                };
+
+               upg_clk: upg_clk {
+                       compatible = "fixed-clock";
+                       #clock-cells = <0>;
+                       clock-frequency = <27000000>;
+               };
        };
 
        rdb {
@@ -49,7 +55,7 @@
                compatible = "simple-bus";
                ranges = <0 0x10000000 0x01000000>;
 
-               periph_intc: periph_intc@411400 {
+               periph_intc: interrupt-controller@411400 {
                        compatible = "brcm,bcm7038-l1-intc";
                        reg = <0x411400 0x30>, <0x411600 0x30>;
 
@@ -60,7 +66,7 @@
                        interrupts = <2>, <3>;
                };
 
-               sun_l2_intc: sun_l2_intc@403000 {
+               sun_l2_intc: interrupt-controller@403000 {
                        compatible = "brcm,l2-intc";
                        reg = <0x403000 0x30>;
                        interrupt-controller;
@@ -81,7 +87,7 @@
                                                     "avd_0", "jtag_0";
                };
 
-               upg_irq0_intc: upg_irq0_intc@406600 {
+               upg_irq0_intc: interrupt-controller@406600 {
                        compatible = "brcm,bcm7120-l2-intc";
                        reg = <0x406600 0x8>;
 
                        interrupt-names = "upg_main", "upg_bsc";
                };
 
-               upg_aon_irq0_intc: upg_aon_irq0_intc@408b80 {
+               upg_aon_irq0_intc: interrupt-controller@408b80 {
                        compatible = "brcm,bcm7120-l2-intc";
                        reg = <0x408b80 0x8>;
 
                      status = "disabled";
                };
 
+               pwma: pwm@406400 {
+                       compatible = "brcm,bcm7038-pwm";
+                       reg = <0x406400 0x28>;
+                       #pwm-cells = <2>;
+                       clocks = <&upg_clk>;
+                       status = "disabled";
+               };
+
+               aon_pm_l2_intc: interrupt-controller@408440 {
+                       compatible = "brcm,l2-intc";
+                       reg = <0x408440 0x30>;
+                       interrupt-controller;
+                       #interrupt-cells = <1>;
+                       interrupt-parent = <&periph_intc>;
+                       interrupts = <50>;
+                       brcm,irq-can-wake;
+               };
+
+               upg_gio: gpio@406500 {
+                       compatible = "brcm,brcmstb-gpio";
+                       reg = <0x406500 0xa0>;
+                       #gpio-cells = <2>;
+                       #interrupt-cells = <2>;
+                       gpio-controller;
+                       interrupt-controller;
+                       interrupt-parent = <&upg_irq0_intc>;
+                       interrupts = <6>;
+                       brcm,gpio-bank-widths = <32 32 32 29 4>;
+               };
+
+               upg_gio_aon: gpio@408c00 {
+                       compatible = "brcm,brcmstb-gpio";
+                       reg = <0x408c00 0x60>;
+                       #gpio-cells = <2>;
+                       #interrupt-cells = <2>;
+                       gpio-controller;
+                       interrupt-controller;
+                       interrupt-parent = <&upg_aon_irq0_intc>;
+                       interrupts = <6>;
+                       interrupts-extended = <&upg_aon_irq0_intc 6>,
+                                             <&aon_pm_l2_intc 5>;
+                       wakeup-source;
+                       brcm,gpio-bank-widths = <21 32 2>;
+               };
+
                enet0: ethernet@430000 {
                        phy-mode = "internal";
                        phy-handle = <&phy1>;
                        status = "disabled";
                };
 
+               hif_l2_intc: interrupt-controller@411000 {
+                       compatible = "brcm,l2-intc";
+                       reg = <0x411000 0x30>;
+                       interrupt-controller;
+                       #interrupt-cells = <1>;
+                       interrupt-parent = <&periph_intc>;
+                       interrupts = <30>;
+               };
+
+               nand: nand@412800 {
+                       compatible = "brcm,brcmnand-v5.0", "brcm,brcmnand";
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+                       reg-names = "nand";
+                       reg = <0x412800 0x400>;
+                       interrupt-parent = <&hif_l2_intc>;
+                       interrupts = <24>;
+                       status = "disabled";
+               };
+
                sata: sata@181000 {
                        compatible = "brcm,bcm7425-ahci", "brcm,sata3-ahci";
                        reg-names = "ahci", "top-ctrl";
                                #phy-cells = <0>;
                        };
                };
+
+               sdhci0: sdhci@410000 {
+                       compatible = "brcm,bcm7425-sdhci";
+                       reg = <0x410000 0x100>;
+                       interrupt-parent = <&periph_intc>;
+                       interrupts = <82>;
+                       status = "disabled";
+               };
        };
 };
index 0586bf662571e633609517c573e147dddc584c6f..b143723c674e8d4b15f50940175d4e87f5b0cc47 100644 (file)
@@ -26,7 +26,7 @@
                uart0 = &uart0;
        };
 
-       cpu_intc: cpu_intc {
+       cpu_intc: interrupt-controller {
                #address-cells = <0>;
                compatible = "mti,cpu-interrupt-controller";
 
                        #clock-cells = <0>;
                        clock-frequency = <81000000>;
                };
+
+               upg_clk: upg_clk {
+                       compatible = "fixed-clock";
+                       #clock-cells = <0>;
+                       clock-frequency = <27000000>;
+               };
        };
 
        rdb {
@@ -49,7 +55,7 @@
                compatible = "simple-bus";
                ranges = <0 0x10000000 0x01000000>;
 
-               periph_intc: periph_intc@441400 {
+               periph_intc: interrupt-controller@441400 {
                        compatible = "brcm,bcm7038-l1-intc";
                        reg = <0x441400 0x30>, <0x441600 0x30>;
 
@@ -60,7 +66,7 @@
                        interrupts = <2>, <3>;
                };
 
-               sun_l2_intc: sun_l2_intc@401800 {
+               sun_l2_intc: interrupt-controller@401800 {
                        compatible = "brcm,l2-intc";
                        reg = <0x401800 0x30>;
                        interrupt-controller;
@@ -82,7 +88,7 @@
                                                     "jtag_0";
                };
 
-               upg_irq0_intc: upg_irq0_intc@406780 {
+               upg_irq0_intc: interrupt-controller@406780 {
                        compatible = "brcm,bcm7120-l2-intc";
                        reg = <0x406780 0x8>;
 
                      status = "disabled";
                };
 
+               pwma: pwm@406580 {
+                       compatible = "brcm,bcm7038-pwm";
+                       reg = <0x406580 0x28>;
+                       #pwm-cells = <2>;
+                       clocks = <&upg_clk>;
+                       status = "disabled";
+               };
+
+               pwmb: pwm@406880 {
+                       compatible = "brcm,bcm7038-pwm";
+                       reg = <0x406880 0x28>;
+                       #pwm-cells = <2>;
+                       clocks = <&upg_clk>;
+                       status = "disabled";
+               };
+
+               upg_gio: gpio@406700 {
+                       compatible = "brcm,brcmstb-gpio";
+                       reg = <0x406700 0x80>;
+                       #gpio-cells = <2>;
+                       #interrupt-cells = <2>;
+                       gpio-controller;
+                       interrupt-controller;
+                       interrupt-parent = <&upg_irq0_intc>;
+                       interrupts = <6>;
+                       brcm,gpio-bank-widths = <32 32 32 27>;
+               };
+
                enet0: ethernet@468000 {
                        phy-mode = "internal";
                        phy-handle = <&phy1>;
index c1c15edaf829ba231eeac9afa77c42876e530593..2488d2f61f6017a26f0d1d9198421e5ee6ae35a4 100644 (file)
@@ -26,7 +26,7 @@
                uart0 = &uart0;
        };
 
-       cpu_intc: cpu_intc {
+       cpu_intc: interrupt-controller {
                #address-cells = <0>;
                compatible = "mti,cpu-interrupt-controller";
 
                        #clock-cells = <0>;
                        clock-frequency = <81000000>;
                };
+
+               upg_clk: upg_clk {
+                       compatible = "fixed-clock";
+                       #clock-cells = <0>;
+                       clock-frequency = <27000000>;
+               };
        };
 
        rdb {
@@ -49,7 +55,7 @@
                compatible = "simple-bus";
                ranges = <0 0x10000000 0x01000000>;
 
-               periph_intc: periph_intc@41a400 {
+               periph_intc: interrupt-controller@41a400 {
                        compatible = "brcm,bcm7038-l1-intc";
                        reg = <0x41a400 0x30>, <0x41a600 0x30>;
 
@@ -60,7 +66,7 @@
                        interrupts = <2>, <3>;
                };
 
-               sun_l2_intc: sun_l2_intc@403000 {
+               sun_l2_intc: interrupt-controller@403000 {
                        compatible = "brcm,l2-intc";
                        reg = <0x403000 0x30>;
                        interrupt-controller;
@@ -83,7 +89,7 @@
                                                     "vice_0";
                };
 
-               upg_irq0_intc: upg_irq0_intc@406780 {
+               upg_irq0_intc: interrupt-controller@406780 {
                        compatible = "brcm,bcm7120-l2-intc";
                        reg = <0x406780 0x8>;
 
                        interrupt-names = "upg_main", "upg_bsc";
                };
 
-               upg_aon_irq0_intc: upg_aon_irq0_intc@409480 {
+               upg_aon_irq0_intc: interrupt-controller@409480 {
                        compatible = "brcm,bcm7120-l2-intc";
                        reg = <0x409480 0x8>;
 
                      status = "disabled";
                };
 
+               pwma: pwm@406580 {
+                       compatible = "brcm,bcm7038-pwm";
+                       reg = <0x406580 0x28>;
+                       #pwm-cells = <2>;
+                       clocks = <&upg_clk>;
+                       status = "disabled";
+               };
+
+               pwmb: pwm@406800 {
+                       compatible = "brcm,bcm7038-pwm";
+                       reg = <0x406800 0x28>;
+                       #pwm-cells = <2>;
+                       clocks = <&upg_clk>;
+                       status = "disabled";
+               };
+
+               aon_pm_l2_intc: interrupt-controller@408440 {
+                       compatible = "brcm,l2-intc";
+                       reg = <0x408440 0x30>;
+                       interrupt-controller;
+                       #interrupt-cells = <1>;
+                       interrupt-parent = <&periph_intc>;
+                       interrupts = <49>;
+                       brcm,irq-can-wake;
+               };
+
+               upg_gio: gpio@406700 {
+                       compatible = "brcm,brcmstb-gpio";
+                       reg = <0x406700 0x80>;
+                       #gpio-cells = <2>;
+                       #interrupt-cells = <2>;
+                       gpio-controller;
+                       interrupt-controller;
+                       interrupt-parent = <&upg_irq0_intc>;
+                       interrupts = <6>;
+                       brcm,gpio-bank-widths = <32 32 32 21>;
+               };
+
+               upg_gio_aon: gpio@4094c0 {
+                       compatible = "brcm,brcmstb-gpio";
+                       reg = <0x4094c0 0x40>;
+                       #gpio-cells = <2>;
+                       #interrupt-cells = <2>;
+                       gpio-controller;
+                       interrupt-controller;
+                       interrupt-parent = <&upg_aon_irq0_intc>;
+                       interrupts = <6>;
+                       interrupts-extended = <&upg_aon_irq0_intc 6>,
+                                             <&aon_pm_l2_intc 5>;
+                       wakeup-source;
+                       brcm,gpio-bank-widths = <18 4>;
+               };
+
                enet0: ethernet@b80000 {
                        phy-mode = "internal";
                        phy-handle = <&phy1>;
                        status = "disabled";
                };
 
+               hif_l2_intc: interrupt-controller@41a000 {
+                       compatible = "brcm,l2-intc";
+                       reg = <0x41a000 0x30>;
+                       interrupt-controller;
+                       #interrupt-cells = <1>;
+                       interrupt-parent = <&periph_intc>;
+                       interrupts = <24>;
+               };
+
+               nand: nand@41b800 {
+                       compatible = "brcm,brcmnand-v5.0", "brcm,brcmnand";
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+                       reg-names = "nand";
+                       reg = <0x41b800 0x400>;
+                       interrupt-parent = <&hif_l2_intc>;
+                       interrupts = <24>;
+                       status = "disabled";
+               };
+
                sata: sata@181000 {
                        compatible = "brcm,bcm7425-ahci", "brcm,sata3-ahci";
                        reg-names = "ahci", "top-ctrl";
                                #phy-cells = <0>;
                        };
                };
+
+               sdhci0: sdhci@419000 {
+                       compatible = "brcm,bcm7425-sdhci";
+                       reg = <0x419000 0x100>;
+                       interrupt-parent = <&periph_intc>;
+                       interrupts = <43>;
+                       sd-uhs-sdr50;
+                       mmc-hs200-1_8v;
+                       status = "disabled";
+               };
+
+               sdhci1: sdhci@419200 {
+                       compatible = "brcm,bcm7425-sdhci";
+                       reg = <0x419200 0x100>;
+                       interrupt-parent = <&periph_intc>;
+                       interrupts = <44>;
+                       sd-uhs-sdr50;
+                       mmc-hs200-1_8v;
+                       status = "disabled";
+               };
        };
 };
index a874d3a0e2ee637402e3feef5c5a45a6fca1732f..19fa259b968b3fc7b1ab476a4ed27125b6af8862 100644 (file)
@@ -38,7 +38,7 @@
                uart0 = &uart0;
        };
 
-       cpu_intc: cpu_intc {
+       cpu_intc: interrupt-controller {
                #address-cells = <0>;
                compatible = "mti,cpu-interrupt-controller";
 
                        #clock-cells = <0>;
                        clock-frequency = <81000000>;
                };
+
+               upg_clk: upg_clk {
+                       compatible = "fixed-clock";
+                       #clock-cells = <0>;
+                       clock-frequency = <27000000>;
+               };
        };
 
        rdb {
@@ -61,7 +67,7 @@
                compatible = "simple-bus";
                ranges = <0 0x10000000 0x01000000>;
 
-               periph_intc: periph_intc@41b500 {
+               periph_intc: interrupt-controller@41b500 {
                        compatible = "brcm,bcm7038-l1-intc";
                        reg = <0x41b500 0x40>, <0x41b600 0x40>,
                                <0x41b700 0x40>, <0x41b800 0x40>;
@@ -73,7 +79,7 @@
                        interrupts = <2>, <3>, <2>, <3>;
                };
 
-               sun_l2_intc: sun_l2_intc@403000 {
+               sun_l2_intc: interrupt-controller@403000 {
                        compatible = "brcm,l2-intc";
                        reg = <0x403000 0x30>;
                        interrupt-controller;
                                                     "scpu";
                };
 
-               upg_irq0_intc: upg_irq0_intc@406780 {
+               upg_irq0_intc: interrupt-controller@406780 {
                        compatible = "brcm,bcm7120-l2-intc";
                        reg = <0x406780 0x8>;
 
                        interrupt-names = "upg_main", "upg_bsc";
                };
 
-               upg_aon_irq0_intc: upg_aon_irq0_intc@409480 {
+               upg_aon_irq0_intc: interrupt-controller@409480 {
                        compatible = "brcm,bcm7120-l2-intc";
                        reg = <0x409480 0x8>;
 
                      status = "disabled";
                };
 
+               pwma: pwm@406580 {
+                       compatible = "brcm,bcm7038-pwm";
+                       reg = <0x406580 0x28>;
+                       #pwm-cells = <2>;
+                       clocks = <&upg_clk>;
+                       status = "disabled";
+               };
+
+               pwmb: pwm@406800 {
+                       compatible = "brcm,bcm7038-pwm";
+                       reg = <0x406800 0x28>;
+                       #pwm-cells = <2>;
+                       clocks = <&upg_clk>;
+                       status = "disabled";
+               };
+
+               aon_pm_l2_intc: interrupt-controller@408440 {
+                       compatible = "brcm,l2-intc";
+                       reg = <0x408440 0x30>;
+                       interrupt-controller;
+                       #interrupt-cells = <1>;
+                       interrupt-parent = <&periph_intc>;
+                       interrupts = <54>;
+                       brcm,irq-can-wake;
+               };
+
+               upg_gio: gpio@406700 {
+                       compatible = "brcm,brcmstb-gpio";
+                       reg = <0x406700 0x80>;
+                       #gpio-cells = <2>;
+                       #interrupt-cells = <2>;
+                       gpio-controller;
+                       interrupt-controller;
+                       interrupt-parent = <&upg_irq0_intc>;
+                       interrupts = <6>;
+                       brcm,gpio-bank-widths = <32 32 32 21>;
+               };
+
+               upg_gio_aon: gpio@4094c0 {
+                       compatible = "brcm,brcmstb-gpio";
+                       reg = <0x4094c0 0x40>;
+                       #gpio-cells = <2>;
+                       #interrupt-cells = <2>;
+                       gpio-controller;
+                       interrupt-controller;
+                       interrupt-parent = <&upg_aon_irq0_intc>;
+                       interrupts = <6>;
+                       interrupts-extended = <&upg_aon_irq0_intc 6>,
+                                             <&aon_pm_l2_intc 5>;
+                       wakeup-source;
+                       brcm,gpio-bank-widths = <18 4>;
+               };
+
                enet0: ethernet@b80000 {
                        phy-mode = "internal";
                        phy-handle = <&phy1>;
                        status = "disabled";
                };
 
+               hif_l2_intc: interrupt-controller@41b000 {
+                       compatible = "brcm,l2-intc";
+                       reg = <0x41b000 0x30>;
+                       interrupt-controller;
+                       #interrupt-cells = <1>;
+                       interrupt-parent = <&periph_intc>;
+                       interrupts = <24>;
+               };
+
+               nand: nand@41c800 {
+                       compatible = "brcm,brcmnand-v6.2", "brcm,brcmnand";
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+                       reg-names = "nand", "flash-dma";
+                       reg = <0x41c800 0x600>, <0x41d000 0x100>;
+                       interrupt-parent = <&hif_l2_intc>;
+                       interrupts = <24>, <4>;
+                       status = "disabled";
+               };
+
                sata: sata@181000 {
                        compatible = "brcm,bcm7425-ahci", "brcm,sata3-ahci";
                        reg-names = "ahci", "top-ctrl";
                                #phy-cells = <0>;
                        };
                };
+
+               sdhci0: sdhci@41a000 {
+                       compatible = "brcm,bcm7425-sdhci";
+                       reg = <0x41a000 0x100>;
+                       interrupt-parent = <&periph_intc>;
+                       interrupts = <47>;
+                       sd-uhs-sdr50;
+                       mmc-hs200-1_8v;
+                       status = "disabled";
+               };
+
+               sdhci1: sdhci@41a200 {
+                       compatible = "brcm,bcm7425-sdhci";
+                       reg = <0x41a200 0x100>;
+                       interrupt-parent = <&periph_intc>;
+                       interrupts = <48>;
+                       sd-uhs-sdr50;
+                       mmc-hs200-1_8v;
+                       status = "disabled";
+               };
        };
 };
diff --git a/arch/mips/boot/dts/brcm/bcm96358nb4ser.dts b/arch/mips/boot/dts/brcm/bcm96358nb4ser.dts
deleted file mode 100644 (file)
index f412117..0000000
+++ /dev/null
@@ -1,46 +0,0 @@
-/dts-v1/;
-
-/include/ "bcm6358.dtsi"
-
-/ {
-       compatible = "sfr,nb4-ser", "brcm,bcm6358";
-       model = "SFR Neufbox 4 (Sercomm)";
-
-       memory@0 {
-               device_type = "memory";
-               reg = <0x00000000 0x02000000>;
-       };
-
-       chosen {
-               stdout-path = &uart0;
-       };
-};
-
-&leds0 {
-       status = "ok";
-
-       led@0 {
-               reg = <0>;
-               active-low;
-               label = "nb4-ser:white:alarm";
-       };
-       led@2 {
-               reg = <2>;
-               active-low;
-               label = "nb4-ser:white:tv";
-       };
-       led@3 {
-               reg = <3>;
-               active-low;
-               label = "nb4-ser:white:tel";
-       };
-       led@4 {
-               reg = <4>;
-               active-low;
-               label = "nb4-ser:white:adsl";
-       };
-};
-
-&uart0 {
-       status = "okay";
-};
index f2449d147c6da9177010d869f90ac0d1ead9e468..5c24eacd72ddce0a5b354275664f528896af6e83 100644 (file)
        status = "okay";
 };
 
+&pwma {
+       status = "okay";
+};
+
 /* FIXME: USB is wonky; disable it for now */
 &ehci0 {
        status = "disabled";
index d3d28816a0270716d9634b553bcc30962584ef1a..e67eaf30de3d131ac4432ed0f8a7097a6a8d89a6 100644 (file)
@@ -1,6 +1,7 @@
 /dts-v1/;
 
 /include/ "bcm7346.dtsi"
+/include/ "bcm97xxx-nand-cs1-bch24.dtsi"
 
 / {
        compatible = "brcm,bcm97346dbsmb", "brcm,bcm7346";
        status = "okay";
 };
 
+&pwma {
+       status = "okay";
+};
+
+&pwmb {
+       status = "okay";
+};
+
 &enet0 {
        status = "okay";
 };
        status = "okay";
 };
 
+&nand {
+       status = "okay";
+};
+
 &sata {
        status = "okay";
 };
 &sata_phy {
        status = "okay";
 };
+
+&sdhci0 {
+       status = "okay";
+};
index 02ce6b429dc47b104633491cee178499e677f1f4..ee4607fae47accb047197ded65b43e09aea7addb 100644 (file)
@@ -1,6 +1,7 @@
 /dts-v1/;
 
 /include/ "bcm7358.dtsi"
+/include/ "bcm97xxx-nand-cs1-bch4.dtsi"
 
 / {
        compatible = "brcm,bcm97358svmb", "brcm,bcm7358";
        status = "okay";
 };
 
+&pwma {
+       status = "okay";
+};
+
+&pwmb {
+       status = "okay";
+};
+
 &enet0 {
        status = "okay";
 };
@@ -56,3 +65,7 @@
 &ohci0 {
        status = "okay";
 };
+
+&nand {
+       status = "okay";
+};
index 73124be9548aeff58f1fd507883c3f2637808a55..bed821b030139599e7fddb0f0de2c38d77fffb7d 100644 (file)
        status = "okay";
 };
 
+&pwma {
+       status = "okay";
+};
+
 &enet0 {
        status = "okay";
 };
@@ -64,3 +68,7 @@
 &sata_phy {
        status = "okay";
 };
+
+&sdhci0 {
+       status = "okay";
+};
index 3cfcaebe7f79db34e8c340ba71a1238f327b78c3..68fd823868e07a3f580ca97960a93f78d36afaae 100644 (file)
@@ -1,6 +1,7 @@
 /dts-v1/;
 
 /include/ "bcm7362.dtsi"
+/include/ "bcm97xxx-nand-cs1-bch4.dtsi"
 
 / {
        compatible = "brcm,bcm97362svmb", "brcm,bcm7362";
        status = "okay";
 };
 
+&pwma {
+       status = "okay";
+};
+
 &enet0 {
        status = "okay";
 };
        status = "okay";
 };
 
+&nand {
+       status = "okay";
+};
+
 &sata {
        status = "okay";
 };
@@ -60,3 +69,7 @@
 &sata_phy {
        status = "okay";
 };
+
+&sdhci0 {
+       status = "okay";
+};
index 600d57abee05c1bb8e8f22c57d67f1fb7a509a8c..e66271af055e74fd19c41f8390019c19c2fd08b9 100644 (file)
        status = "okay";
 };
 
+&pwma {
+       status = "okay";
+};
+
+&pwmb {
+       status = "okay";
+};
+
 /* FIXME: MAC driver comes up but cannot attach to PHY */
 &enet0 {
        status = "disabled";
index 119c714805cbc8257f1addc89a9b17289ffa73ec..f95ba1bf3e5806d0a4b4467621b16eb2490cc2ed 100644 (file)
@@ -1,6 +1,7 @@
 /dts-v1/;
 
 /include/ "bcm7425.dtsi"
+/include/ "bcm97xxx-nand-cs1-bch24.dtsi"
 
 / {
        compatible = "brcm,bcm97425svmb", "brcm,bcm7425";
        status = "okay";
 };
 
+&pwma {
+       status = "okay";
+};
+
+&pwmb {
+       status = "okay";
+};
+
 &enet0 {
        status = "okay";
 };
 &ohci3 {
        status = "okay";
 };
+
+&nand {
+       status = "okay";
+};
+
+&sdhci0 {
+       status = "okay";
+};
+
+&sdhci1 {
+       status = "okay";
+};
index 43e3ba27f07ba2ddc6935a7f80ba4a2b238f67ca..fb37b7111bf4f39bbcac24af96a9f0fdf4dd9a02 100644 (file)
@@ -1,6 +1,7 @@
 /dts-v1/;
 
 /include/ "bcm7435.dtsi"
+/include/ "bcm97xxx-nand-cs1-bch24.dtsi"
 
 / {
        compatible = "brcm,bcm97435svmb", "brcm,bcm7435";
        status = "okay";
 };
 
+&pwma {
+       status = "okay";
+};
+
+&pwmb {
+       status = "okay";
+};
+
 &enet0 {
        status = "okay";
 };
        status = "okay";
 };
 
+&nand {
+       status = "okay";
+};
+
 &sata {
        status = "okay";
 };
 &sata_phy {
        status = "okay";
 };
+
+&sdhci0 {
+       status = "okay";
+};
+
+&sdhci1 {
+       status = "okay";
+};
diff --git a/arch/mips/boot/dts/brcm/bcm97xxx-nand-cs1-bch24.dtsi b/arch/mips/boot/dts/brcm/bcm97xxx-nand-cs1-bch24.dtsi
new file mode 100644 (file)
index 0000000..3c24f97
--- /dev/null
@@ -0,0 +1,25 @@
+&nand {
+       nandcs@1 {
+               compatible = "brcm,nandcs";
+               reg = <1>;
+               nand-on-flash-bbt;
+
+               nand-ecc-strength = <24>;
+               nand-ecc-step-size = <1024>;
+               brcm,nand-oob-sector-size = <27>;
+
+               partitions {
+                       compatible = "fixed-partitions";
+                       #address-cells = <1>;
+                       #size-cells = <1>;
+
+                       flash1.rootfs@0 {
+                               reg = <0x0 0x10000000>;
+                       };
+
+                       flash1.kernel@10000000 {
+                               reg = <0x10000000 0x400000>;
+                       };
+               };
+       };
+};
diff --git a/arch/mips/boot/dts/brcm/bcm97xxx-nand-cs1-bch4.dtsi b/arch/mips/boot/dts/brcm/bcm97xxx-nand-cs1-bch4.dtsi
new file mode 100644 (file)
index 0000000..cb53181
--- /dev/null
@@ -0,0 +1,25 @@
+&nand {
+       nandcs@1 {
+               compatible = "brcm,nandcs";
+               reg = <1>;
+               nand-on-flash-bbt;
+
+               nand-ecc-strength = <4>;
+               nand-ecc-step-size = <512>;
+               brcm,nand-oob-sector-size = <16>;
+
+               partitions {
+                       compatible = "fixed-partitions";
+                       #address-cells = <1>;
+                       #size-cells = <1>;
+
+                       flash1.rootfs@0 {
+                               reg = <0x0 0x10000000>;
+                       };
+
+                       flash1.kernel@10000000 {
+                               reg = <0x10000000 0x400000>;
+                       };
+               };
+       };
+};
index b134798a0fd7252798620add0cd15c705cbea37d..cfa29156eb69df0775af66316f0ffcdfa4e7513d 100644 (file)
@@ -8,55 +8,16 @@
  * published by the Free Software Foundation.
  */
 
-/include/ "octeon_3xxx.dtsi"
+/include/ "dlink_dsr-500n-1000n.dtsi"
 #include <dt-bindings/gpio/gpio.h>
 
 / {
        model = "dlink,dsr-1000n";
 
        soc@0 {
-               smi0: mdio@1180000001800 {
-                       phy8: ethernet-phy@8 {
-                               reg = <8>;
-                               compatible = "ethernet-phy-ieee802.3-c22";
-                       };
-               };
-
-               pip: pip@11800a0000000 {
-                       interface@0 {
-                               ethernet@0 {
-                                       fixed-link {
-                                               speed = <1000>;
-                                               full-duplex;
-                                       };
-                               };
-                               ethernet@1 {
-                                       fixed-link {
-                                               speed = <1000>;
-                                               full-duplex;
-                                       };
-                               };
-                               ethernet@2 {
-                                       phy-handle = <&phy8>;
-                               };
-                       };
-               };
-
-               twsi0: i2c@1180000001000 {
-                       rtc@68 {
-                               compatible = "dallas,ds1337";
-                               reg = <0x68>;
-                       };
-               };
-
                uart0: serial@1180000000800 {
                        clock-frequency = <500000000>;
                };
-
-               usbn: usbn@1180068000000 {
-                       refclk-frequency = <12000000>;
-                       refclk-type = "crystal";
-               };
        };
 
        leds {
@@ -87,8 +48,4 @@
                        gpios = <&gpio 18 GPIO_ACTIVE_LOW>;
                };
        };
-
-       aliases {
-               pip = &pip;
-       };
 };
diff --git a/arch/mips/boot/dts/cavium-octeon/dlink_dsr-500n-1000n.dtsi b/arch/mips/boot/dts/cavium-octeon/dlink_dsr-500n-1000n.dtsi
new file mode 100644 (file)
index 0000000..246b598
--- /dev/null
@@ -0,0 +1,58 @@
+/*
+ * Device tree source for D-Link DSR-500N/1000N (common parts).
+ *
+ * Written by: Aaro Koskinen <aaro.koskinen@iki.fi>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/include/ "octeon_3xxx.dtsi"
+
+/ {
+       soc@0 {
+               smi0: mdio@1180000001800 {
+                       phy8: ethernet-phy@8 {
+                               reg = <8>;
+                               compatible = "ethernet-phy-ieee802.3-c22";
+                       };
+               };
+
+               pip: pip@11800a0000000 {
+                       interface@0 {
+                               ethernet@0 {
+                                       fixed-link {
+                                               speed = <1000>;
+                                               full-duplex;
+                                       };
+                               };
+                               ethernet@1 {
+                                       fixed-link {
+                                               speed = <1000>;
+                                               full-duplex;
+                                       };
+                               };
+                               ethernet@2 {
+                                       phy-handle = <&phy8>;
+                               };
+                       };
+               };
+
+               twsi0: i2c@1180000001000 {
+                       rtc@68 {
+                               compatible = "dallas,ds1337";
+                               reg = <0x68>;
+                       };
+               };
+
+               usbn: usbn@1180068000000 {
+                       refclk-frequency = <12000000>;
+                       refclk-type = "crystal";
+               };
+       };
+
+       aliases {
+               pip = &pip;
+       };
+};
diff --git a/arch/mips/boot/dts/cavium-octeon/dlink_dsr-500n.dts b/arch/mips/boot/dts/cavium-octeon/dlink_dsr-500n.dts
new file mode 100644 (file)
index 0000000..78886e1
--- /dev/null
@@ -0,0 +1,40 @@
+/*
+ * Device tree source for D-Link DSR-500N.
+ *
+ * Written by: Aaro Koskinen <aaro.koskinen@iki.fi>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/include/ "dlink_dsr-500n-1000n.dtsi"
+#include <dt-bindings/gpio/gpio.h>
+
+/ {
+       model = "dlink,dsr-500n";
+       compatible = "dlink,dsr-500n", "cavium,octeon-3860";
+
+       soc@0 {
+               uart0: serial@1180000000800 {
+                       clock-frequency = <300000000>;
+               };
+       };
+
+       leds {
+               compatible = "gpio-leds";
+
+               usb {
+                       gpios = <&gpio 9 GPIO_ACTIVE_LOW>;
+               };
+
+               wps {
+                       gpios = <&gpio 11 GPIO_ACTIVE_LOW>;
+               };
+
+               wireless {
+                       label = "2.4g";
+                       gpios = <&gpio 18 GPIO_ACTIVE_LOW>;
+               };
+       };
+};
index 144d776cc9f2aca50e0608a4716ff34e178af652..fcabd69b703012c5821481ebe6f58bbc024c8fc5 100644 (file)
@@ -1,5 +1,5 @@
 dtb-$(CONFIG_MIPS_MALTA)       += malta.dtb
-dtb-$(CONFIG_MIPS_SEAD3)       += sead3.dtb
+dtb-$(CONFIG_LEGACY_BOARD_SEAD3)       += sead3.dtb
 
 obj-y                          += $(patsubst %.dtb, %.dtb.o, $(dtb-y))
 
index b18c46637d21b4ae7b85636d98a3353332022bbc..ffe3a1508e72c1c389749be5861c815e918b5019 100644 (file)
@@ -1,5 +1,8 @@
 /dts-v1/;
 
+#include <dt-bindings/interrupt-controller/irq.h>
+#include <dt-bindings/interrupt-controller/mips-gic.h>
+
 /memreserve/ 0x00000000 0x00001000;    /* YAMON exception vectors */
 /memreserve/ 0x00001000 0x000ef000;    /* YAMON */
 /memreserve/ 0x000f0000 0x00010000;    /* PIIX4 ISA memory */
        #address-cells = <1>;
        #size-cells = <1>;
        compatible = "mti,malta";
+
+       cpu_intc: interrupt-controller {
+               compatible = "mti,cpu-interrupt-controller";
+
+               interrupt-controller;
+               #interrupt-cells = <1>;
+       };
+
+       gic: interrupt-controller@1bdc0000 {
+               compatible = "mti,gic";
+               reg = <0x1bdc0000 0x20000>;
+
+               interrupt-controller;
+               #interrupt-cells = <3>;
+
+               /*
+                * Declare the interrupt-parent even though the mti,gic
+                * binding doesn't require it, such that the kernel can
+                * figure out that cpu_intc is the root interrupt
+                * controller & should be probed first.
+                */
+               interrupt-parent = <&cpu_intc>;
+
+               timer {
+                       compatible = "mti,gic-timer";
+                       interrupts = <GIC_LOCAL 1 IRQ_TYPE_NONE>;
+               };
+       };
+
+       i8259: interrupt-controller@20 {
+               compatible = "intel,i8259";
+
+               interrupt-controller;
+               #interrupt-cells = <1>;
+
+               interrupt-parent = <&gic>;
+               interrupts = <GIC_SHARED 3 IRQ_TYPE_LEVEL_HIGH>;
+       };
+
+       flash@1e000000 {
+               compatible = "intel,dt28f160", "cfi-flash";
+               reg = <0x1e000000 0x400000>;
+               bank-width = <4>;
+               #address-cells = <1>;
+               #size-cells = <1>;
+
+               partitions {
+                       compatible = "fixed-partitions";
+                       #address-cells = <1>;
+                       #size-cells = <1>;
+
+                       yamon@0 {
+                               label = "YAMON";
+                               reg = <0x0 0x100000>;
+                               read-only;
+                       };
+
+                       user-fs@100000 {
+                               label = "User FS";
+                               reg = <0x100000 0x2e0000>;
+                       };
+
+                       board-config@3e0000 {
+                               label = "Board Config";
+                               reg = <0x3e0000 0x20000>;
+                               read-only;
+                       };
+               };
+       };
+
+       fpga_regs: system-controller@1f000000 {
+               compatible = "mti,malta-fpga", "syscon", "simple-mfd";
+               reg = <0x1f000000 0x1000>;
+               native-endian;
+
+               reboot {
+                       compatible = "syscon-reboot";
+                       regmap = <&fpga_regs>;
+                       offset = <0x500>;
+                       mask = <0x42>;
+               };
+       };
+
+       isa {
+               compatible = "isa";
+               #address-cells = <2>;
+               #size-cells = <1>;
+               ranges = <1 0 0 0x1000>;
+
+               rtc@70 {
+                       compatible = "motorola,mc146818";
+                       reg = <1 0x70 0x8>;
+
+                       interrupt-parent = <&i8259>;
+                       interrupts = <8>;
+               };
+       };
 };
index e4b317d414f112576bbd04012381ab6804dabf86..b112879a5d9d30769c11568ca1c84986d83a9180 100644 (file)
@@ -4,10 +4,23 @@
 /memreserve/ 0x00001000 0x000ef000;    // ROM data
 /memreserve/ 0x000f0000 0x004cc000;    // reserved
 
+#include <dt-bindings/interrupt-controller/mips-gic.h>
+
 / {
        #address-cells = <1>;
        #size-cells = <1>;
        compatible = "mti,sead-3";
+       model = "MIPS SEAD-3";
+       interrupt-parent = <&gic>;
+
+       chosen {
+               stdout-path = "uart1:115200";
+       };
+
+       aliases {
+               uart0 = &uart0;
+               uart1 = &uart1;
+       };
 
        cpus {
                cpu@0 {
                device_type = "memory";
                reg = <0x0 0x08000000>;
        };
+
+       cpu_intc: interrupt-controller {
+               compatible = "mti,cpu-interrupt-controller";
+
+               interrupt-controller;
+               #interrupt-cells = <1>;
+       };
+
+       gic: interrupt-controller@1b1c0000 {
+               compatible = "mti,gic";
+               reg = <0x1b1c0000 0x20000>;
+
+               interrupt-controller;
+               #interrupt-cells = <3>;
+
+               /*
+                * Declare the interrupt-parent even though the mti,gic
+                * binding doesn't require it, such that the kernel can
+                * figure out that cpu_intc is the root interrupt
+                * controller & should be probed first.
+                */
+               interrupt-parent = <&cpu_intc>;
+
+               timer {
+                       compatible = "mti,gic-timer";
+                       interrupts = <GIC_LOCAL 1 IRQ_TYPE_NONE>;
+               };
+       };
+
+       ehci@1b200000 {
+               compatible = "generic-ehci";
+               reg = <0x1b200000 0x1000>;
+
+               interrupts = <0>; /* GIC 0 or CPU 6 */
+
+               has-transaction-translator;
+       };
+
+       flash@1c000000 {
+               compatible = "intel,28f128j3", "cfi-flash";
+               reg = <0x1c000000 0x2000000>;
+               #address-cells = <1>;
+               #size-cells = <1>;
+               bank-width = <4>;
+
+               partitions {
+                       compatible = "fixed-partitions";
+                       #address-cells = <1>;
+                       #size-cells = <1>;
+
+                       user-fs@0 {
+                               label = "User FS";
+                               reg = <0x0 0x1fc0000>;
+                       };
+
+                       board-config@3e0000 {
+                               label = "Board Config";
+                               reg = <0x1fc0000 0x40000>;
+                       };
+               };
+       };
+
+       fpga_regs: system-controller@1f000000 {
+               compatible = "mti,sead3-fpga", "syscon", "simple-mfd";
+               reg = <0x1f000000 0x200>;
+
+               reboot {
+                       compatible = "syscon-reboot";
+                       regmap = <&fpga_regs>;
+                       offset = <0x50>;
+                       mask = <0x4d>;
+               };
+
+               poweroff {
+                       compatible = "restart-poweroff";
+               };
+       };
+
+       system-controller@1f000200 {
+               compatible = "mti,sead3-cpld", "syscon", "simple-mfd";
+               reg = <0x1f000200 0x300>;
+
+               led@10.0 {
+                       compatible = "register-bit-led";
+                       offset = <0x10>;
+                       mask = <0x1>;
+                       label = "pled0";
+               };
+               led@10.1 {
+                       compatible = "register-bit-led";
+                       offset = <0x10>;
+                       mask = <0x2>;
+                       label = "pled1";
+               };
+               led@10.2 {
+                       compatible = "register-bit-led";
+                       offset = <0x10>;
+                       mask = <0x4>;
+                       label = "pled2";
+               };
+               led@10.3 {
+                       compatible = "register-bit-led";
+                       offset = <0x10>;
+                       mask = <0x8>;
+                       label = "pled3";
+               };
+               led@10.4 {
+                       compatible = "register-bit-led";
+                       offset = <0x10>;
+                       mask = <0x10>;
+                       label = "pled4";
+               };
+               led@10.5 {
+                       compatible = "register-bit-led";
+                       offset = <0x10>;
+                       mask = <0x20>;
+                       label = "pled5";
+               };
+               led@10.6 {
+                       compatible = "register-bit-led";
+                       offset = <0x10>;
+                       mask = <0x40>;
+                       label = "pled6";
+               };
+               led@10.7 {
+                       compatible = "register-bit-led";
+                       offset = <0x10>;
+                       mask = <0x80>;
+                       label = "pled7";
+               };
+
+               led@18.0 {
+                       compatible = "register-bit-led";
+                       offset = <0x18>;
+                       mask = <0x1>;
+                       label = "fled0";
+               };
+               led@18.1 {
+                       compatible = "register-bit-led";
+                       offset = <0x18>;
+                       mask = <0x2>;
+                       label = "fled1";
+               };
+               led@18.2 {
+                       compatible = "register-bit-led";
+                       offset = <0x18>;
+                       mask = <0x4>;
+                       label = "fled2";
+               };
+               led@18.3 {
+                       compatible = "register-bit-led";
+                       offset = <0x18>;
+                       mask = <0x8>;
+                       label = "fled3";
+               };
+               led@18.4 {
+                       compatible = "register-bit-led";
+                       offset = <0x18>;
+                       mask = <0x10>;
+                       label = "fled4";
+               };
+               led@18.5 {
+                       compatible = "register-bit-led";
+                       offset = <0x18>;
+                       mask = <0x20>;
+                       label = "fled5";
+               };
+               led@18.6 {
+                       compatible = "register-bit-led";
+                       offset = <0x18>;
+                       mask = <0x40>;
+                       label = "fled6";
+               };
+               led@18.7 {
+                       compatible = "register-bit-led";
+                       offset = <0x18>;
+                       mask = <0x80>;
+                       label = "fled7";
+               };
+
+               lcd@200 {
+                       compatible = "mti,sead3-lcd";
+                       offset = <0x200>;
+               };
+       };
+
+       /* UART connected to FTDI & miniUSB socket */
+       uart0: uart@1f000900 {
+               compatible = "ns16550a";
+               reg = <0x1f000900 0x20>;
+               reg-io-width = <4>;
+               reg-shift = <2>;
+
+               clock-frequency = <14745600>;
+
+               interrupts = <3>; /* GIC 3 or CPU 4 */
+
+               no-loopback-test;
+       };
+
+       /* UART connected to RS232 socket */
+       uart1: uart@1f000800 {
+               compatible = "ns16550a";
+               reg = <0x1f000800 0x20>;
+               reg-io-width = <4>;
+               reg-shift = <2>;
+
+               clock-frequency = <14745600>;
+
+               interrupts = <2>; /* GIC 2 or CPU 4 */
+
+               no-loopback-test;
+       };
+
+       eth@1f010000 {
+               compatible = "smsc,lan9115";
+               reg = <0x1f010000 0x10000>;
+               reg-io-width = <4>;
+
+               interrupts = <0>; /* GIC 0 or CPU 6 */
+
+               phy-mode = "mii";
+               smsc,irq-push-pull;
+               smsc,save-mac-address;
+       };
 };
index ff49fc04500c7e993aef5cd618e8bfd1e7ef8e39..ab8362e04461ef2fd95e54aa7778b56c4dfe8dfb 100644 (file)
@@ -36,8 +36,6 @@
 
 #include <asm/octeon/cvmx-config.h>
 
-#include <asm/octeon/cvmx-mdio.h>
-
 #include <asm/octeon/cvmx-helper.h>
 #include <asm/octeon/cvmx-helper-util.h>
 #include <asm/octeon/cvmx-helper-board.h>
 #include <asm/octeon/cvmx-gmxx-defs.h>
 #include <asm/octeon/cvmx-asxx-defs.h>
 
-/**
- * cvmx_override_board_link_get(int ipd_port) is a function
- * pointer. It is meant to allow customization of the process of
- * talking to a PHY to determine link speed. It is called every
- * time a PHY must be polled for link status. Users should set
- * this pointer to a function before calling any cvmx-helper
- * operations.
- */
-cvmx_helper_link_info_t(*cvmx_override_board_link_get) (int ipd_port) =
-    NULL;
-
 /**
  * Return the MII PHY address associated with the given IPD
  * port. A result of -1 means there isn't a MII capable PHY
@@ -222,12 +209,6 @@ int cvmx_helper_board_get_mii_address(int ipd_port)
 cvmx_helper_link_info_t __cvmx_helper_board_link_get(int ipd_port)
 {
        cvmx_helper_link_info_t result;
-       int phy_addr;
-       int is_broadcom_phy = 0;
-
-       /* Give the user a chance to override the processing of this function */
-       if (cvmx_override_board_link_get)
-               return cvmx_override_board_link_get(ipd_port);
 
        /* Unless we fix it later, all links are defaulted to down */
        result.u64 = 0;
@@ -263,8 +244,7 @@ cvmx_helper_link_info_t __cvmx_helper_board_link_get(int ipd_port)
                        result.s.full_duplex = 1;
                        result.s.speed = 1000;
                        return result;
-               } else          /* The other port uses a broadcom PHY */
-                       is_broadcom_phy = 1;
+               }
                break;
        case CVMX_BOARD_TYPE_BBGW_REF:
                /* Port 1 on these boards is always Gigabit */
@@ -282,108 +262,7 @@ cvmx_helper_link_info_t __cvmx_helper_board_link_get(int ipd_port)
                break;
        }
 
-       phy_addr = cvmx_helper_board_get_mii_address(ipd_port);
-       if (phy_addr != -1) {
-               if (is_broadcom_phy) {
-                       /*
-                        * Below we are going to read SMI/MDIO
-                        * register 0x19 which works on Broadcom
-                        * parts
-                        */
-                       int phy_status =
-                           cvmx_mdio_read(phy_addr >> 8, phy_addr & 0xff,
-                                          0x19);
-                       switch ((phy_status >> 8) & 0x7) {
-                       case 0:
-                               result.u64 = 0;
-                               break;
-                       case 1:
-                               result.s.link_up = 1;
-                               result.s.full_duplex = 0;
-                               result.s.speed = 10;
-                               break;
-                       case 2:
-                               result.s.link_up = 1;
-                               result.s.full_duplex = 1;
-                               result.s.speed = 10;
-                               break;
-                       case 3:
-                               result.s.link_up = 1;
-                               result.s.full_duplex = 0;
-                               result.s.speed = 100;
-                               break;
-                       case 4:
-                               result.s.link_up = 1;
-                               result.s.full_duplex = 1;
-                               result.s.speed = 100;
-                               break;
-                       case 5:
-                               result.s.link_up = 1;
-                               result.s.full_duplex = 1;
-                               result.s.speed = 100;
-                               break;
-                       case 6:
-                               result.s.link_up = 1;
-                               result.s.full_duplex = 0;
-                               result.s.speed = 1000;
-                               break;
-                       case 7:
-                               result.s.link_up = 1;
-                               result.s.full_duplex = 1;
-                               result.s.speed = 1000;
-                               break;
-                       }
-               } else {
-                       /*
-                        * This code assumes we are using a Marvell
-                        * Gigabit PHY. All the speed information can
-                        * be read from register 17 in one
-                        * go. Somebody using a different PHY will
-                        * need to handle it above in the board
-                        * specific area.
-                        */
-                       int phy_status =
-                           cvmx_mdio_read(phy_addr >> 8, phy_addr & 0xff, 17);
-
-                       /*
-                        * If the resolve bit 11 isn't set, see if
-                        * autoneg is turned off (bit 12, reg 0). The
-                        * resolve bit doesn't get set properly when
-                        * autoneg is off, so force it.
-                        */
-                       if ((phy_status & (1 << 11)) == 0) {
-                               int auto_status =
-                                   cvmx_mdio_read(phy_addr >> 8,
-                                                  phy_addr & 0xff, 0);
-                               if ((auto_status & (1 << 12)) == 0)
-                                       phy_status |= 1 << 11;
-                       }
-
-                       /*
-                        * Only return a link if the PHY has finished
-                        * auto negotiation and set the resolved bit
-                        * (bit 11)
-                        */
-                       if (phy_status & (1 << 11)) {
-                               result.s.link_up = 1;
-                               result.s.full_duplex = ((phy_status >> 13) & 1);
-                               switch ((phy_status >> 14) & 3) {
-                               case 0: /* 10 Mbps */
-                                       result.s.speed = 10;
-                                       break;
-                               case 1: /* 100 Mbps */
-                                       result.s.speed = 100;
-                                       break;
-                               case 2: /* 1 Gbps */
-                                       result.s.speed = 1000;
-                                       break;
-                               case 3: /* Illegal */
-                                       result.u64 = 0;
-                                       break;
-                               }
-                       }
-               }
-       } else if (OCTEON_IS_MODEL(OCTEON_CN3XXX)
+       if (OCTEON_IS_MODEL(OCTEON_CN3XXX)
                   || OCTEON_IS_MODEL(OCTEON_CN58XX)
                   || OCTEON_IS_MODEL(OCTEON_CN50XX)) {
                /*
@@ -432,176 +311,6 @@ cvmx_helper_link_info_t __cvmx_helper_board_link_get(int ipd_port)
        return result;
 }
 
-/**
- * This function as a board specific method of changing the PHY
- * speed, duplex, and auto-negotiation. This programs the PHY and
- * not Octeon. This can be used to force Octeon's links to
- * specific settings.
- *
- * @phy_addr:  The address of the PHY to program
- * @enable_autoneg:
- *                 Non zero if you want to enable auto-negotiation.
- * @link_info: Link speed to program. If the speed is zero and auto-negotiation
- *                 is enabled, all possible negotiation speeds are advertised.
- *
- * Returns Zero on success, negative on failure
- */
-int cvmx_helper_board_link_set_phy(int phy_addr,
-                                  cvmx_helper_board_set_phy_link_flags_types_t
-                                  link_flags,
-                                  cvmx_helper_link_info_t link_info)
-{
-
-       /* Set the flow control settings based on link_flags */
-       if ((link_flags & set_phy_link_flags_flow_control_mask) !=
-           set_phy_link_flags_flow_control_dont_touch) {
-               cvmx_mdio_phy_reg_autoneg_adver_t reg_autoneg_adver;
-               reg_autoneg_adver.u16 =
-                   cvmx_mdio_read(phy_addr >> 8, phy_addr & 0xff,
-                                  CVMX_MDIO_PHY_REG_AUTONEG_ADVER);
-               reg_autoneg_adver.s.asymmetric_pause =
-                   (link_flags & set_phy_link_flags_flow_control_mask) ==
-                   set_phy_link_flags_flow_control_enable;
-               reg_autoneg_adver.s.pause =
-                   (link_flags & set_phy_link_flags_flow_control_mask) ==
-                   set_phy_link_flags_flow_control_enable;
-               cvmx_mdio_write(phy_addr >> 8, phy_addr & 0xff,
-                               CVMX_MDIO_PHY_REG_AUTONEG_ADVER,
-                               reg_autoneg_adver.u16);
-       }
-
-       /* If speed isn't set and autoneg is on advertise all supported modes */
-       if ((link_flags & set_phy_link_flags_autoneg)
-           && (link_info.s.speed == 0)) {
-               cvmx_mdio_phy_reg_control_t reg_control;
-               cvmx_mdio_phy_reg_status_t reg_status;
-               cvmx_mdio_phy_reg_autoneg_adver_t reg_autoneg_adver;
-               cvmx_mdio_phy_reg_extended_status_t reg_extended_status;
-               cvmx_mdio_phy_reg_control_1000_t reg_control_1000;
-
-               reg_status.u16 =
-                   cvmx_mdio_read(phy_addr >> 8, phy_addr & 0xff,
-                                  CVMX_MDIO_PHY_REG_STATUS);
-               reg_autoneg_adver.u16 =
-                   cvmx_mdio_read(phy_addr >> 8, phy_addr & 0xff,
-                                  CVMX_MDIO_PHY_REG_AUTONEG_ADVER);
-               reg_autoneg_adver.s.advert_100base_t4 =
-                   reg_status.s.capable_100base_t4;
-               reg_autoneg_adver.s.advert_10base_tx_full =
-                   reg_status.s.capable_10_full;
-               reg_autoneg_adver.s.advert_10base_tx_half =
-                   reg_status.s.capable_10_half;
-               reg_autoneg_adver.s.advert_100base_tx_full =
-                   reg_status.s.capable_100base_x_full;
-               reg_autoneg_adver.s.advert_100base_tx_half =
-                   reg_status.s.capable_100base_x_half;
-               cvmx_mdio_write(phy_addr >> 8, phy_addr & 0xff,
-                               CVMX_MDIO_PHY_REG_AUTONEG_ADVER,
-                               reg_autoneg_adver.u16);
-               if (reg_status.s.capable_extended_status) {
-                       reg_extended_status.u16 =
-                           cvmx_mdio_read(phy_addr >> 8, phy_addr & 0xff,
-                                          CVMX_MDIO_PHY_REG_EXTENDED_STATUS);
-                       reg_control_1000.u16 =
-                           cvmx_mdio_read(phy_addr >> 8, phy_addr & 0xff,
-                                          CVMX_MDIO_PHY_REG_CONTROL_1000);
-                       reg_control_1000.s.advert_1000base_t_full =
-                           reg_extended_status.s.capable_1000base_t_full;
-                       reg_control_1000.s.advert_1000base_t_half =
-                           reg_extended_status.s.capable_1000base_t_half;
-                       cvmx_mdio_write(phy_addr >> 8, phy_addr & 0xff,
-                                       CVMX_MDIO_PHY_REG_CONTROL_1000,
-                                       reg_control_1000.u16);
-               }
-               reg_control.u16 =
-                   cvmx_mdio_read(phy_addr >> 8, phy_addr & 0xff,
-                                  CVMX_MDIO_PHY_REG_CONTROL);
-               reg_control.s.autoneg_enable = 1;
-               reg_control.s.restart_autoneg = 1;
-               cvmx_mdio_write(phy_addr >> 8, phy_addr & 0xff,
-                               CVMX_MDIO_PHY_REG_CONTROL, reg_control.u16);
-       } else if ((link_flags & set_phy_link_flags_autoneg)) {
-               cvmx_mdio_phy_reg_control_t reg_control;
-               cvmx_mdio_phy_reg_status_t reg_status;
-               cvmx_mdio_phy_reg_autoneg_adver_t reg_autoneg_adver;
-               cvmx_mdio_phy_reg_control_1000_t reg_control_1000;
-
-               reg_status.u16 =
-                   cvmx_mdio_read(phy_addr >> 8, phy_addr & 0xff,
-                                  CVMX_MDIO_PHY_REG_STATUS);
-               reg_autoneg_adver.u16 =
-                   cvmx_mdio_read(phy_addr >> 8, phy_addr & 0xff,
-                                  CVMX_MDIO_PHY_REG_AUTONEG_ADVER);
-               reg_autoneg_adver.s.advert_100base_t4 = 0;
-               reg_autoneg_adver.s.advert_10base_tx_full = 0;
-               reg_autoneg_adver.s.advert_10base_tx_half = 0;
-               reg_autoneg_adver.s.advert_100base_tx_full = 0;
-               reg_autoneg_adver.s.advert_100base_tx_half = 0;
-               if (reg_status.s.capable_extended_status) {
-                       reg_control_1000.u16 =
-                           cvmx_mdio_read(phy_addr >> 8, phy_addr & 0xff,
-                                          CVMX_MDIO_PHY_REG_CONTROL_1000);
-                       reg_control_1000.s.advert_1000base_t_full = 0;
-                       reg_control_1000.s.advert_1000base_t_half = 0;
-               }
-               switch (link_info.s.speed) {
-               case 10:
-                       reg_autoneg_adver.s.advert_10base_tx_full =
-                           link_info.s.full_duplex;
-                       reg_autoneg_adver.s.advert_10base_tx_half =
-                           !link_info.s.full_duplex;
-                       break;
-               case 100:
-                       reg_autoneg_adver.s.advert_100base_tx_full =
-                           link_info.s.full_duplex;
-                       reg_autoneg_adver.s.advert_100base_tx_half =
-                           !link_info.s.full_duplex;
-                       break;
-               case 1000:
-                       reg_control_1000.s.advert_1000base_t_full =
-                           link_info.s.full_duplex;
-                       reg_control_1000.s.advert_1000base_t_half =
-                           !link_info.s.full_duplex;
-                       break;
-               }
-               cvmx_mdio_write(phy_addr >> 8, phy_addr & 0xff,
-                               CVMX_MDIO_PHY_REG_AUTONEG_ADVER,
-                               reg_autoneg_adver.u16);
-               if (reg_status.s.capable_extended_status)
-                       cvmx_mdio_write(phy_addr >> 8, phy_addr & 0xff,
-                                       CVMX_MDIO_PHY_REG_CONTROL_1000,
-                                       reg_control_1000.u16);
-               reg_control.u16 =
-                   cvmx_mdio_read(phy_addr >> 8, phy_addr & 0xff,
-                                  CVMX_MDIO_PHY_REG_CONTROL);
-               reg_control.s.autoneg_enable = 1;
-               reg_control.s.restart_autoneg = 1;
-               cvmx_mdio_write(phy_addr >> 8, phy_addr & 0xff,
-                               CVMX_MDIO_PHY_REG_CONTROL, reg_control.u16);
-       } else {
-               cvmx_mdio_phy_reg_control_t reg_control;
-               reg_control.u16 =
-                   cvmx_mdio_read(phy_addr >> 8, phy_addr & 0xff,
-                                  CVMX_MDIO_PHY_REG_CONTROL);
-               reg_control.s.autoneg_enable = 0;
-               reg_control.s.restart_autoneg = 1;
-               reg_control.s.duplex = link_info.s.full_duplex;
-               if (link_info.s.speed == 1000) {
-                       reg_control.s.speed_msb = 1;
-                       reg_control.s.speed_lsb = 0;
-               } else if (link_info.s.speed == 100) {
-                       reg_control.s.speed_msb = 0;
-                       reg_control.s.speed_lsb = 1;
-               } else if (link_info.s.speed == 10) {
-                       reg_control.s.speed_msb = 0;
-                       reg_control.s.speed_lsb = 0;
-               }
-               cvmx_mdio_write(phy_addr >> 8, phy_addr & 0xff,
-                               CVMX_MDIO_PHY_REG_CONTROL, reg_control.u16);
-       }
-       return 0;
-}
-
 /**
  * This function is called by cvmx_helper_interface_probe() after it
  * determines the number of ports Octeon can support on a specific
@@ -675,48 +384,6 @@ int __cvmx_helper_board_hardware_enable(int interface)
                        cvmx_write_csr(CVMX_ASXX_RX_CLK_SETX(0, interface),
                                       0xc);
                }
-       } else if (cvmx_sysinfo_get()->board_type ==
-                  CVMX_BOARD_TYPE_CN3010_EVB_HS5) {
-               /*
-                * Broadcom PHYs require differnet ASX
-                * clocks. Unfortunately many boards don't define a
-                * new board Id and simply mangle the
-                * CN3010_EVB_HS5
-                */
-               if (interface == 0) {
-                       /*
-                        * Some boards use a hacked up bootloader that
-                        * identifies them as CN3010_EVB_HS5
-                        * evaluation boards.  This leads to all kinds
-                        * of configuration problems.  Detect one
-                        * case, and print warning, while trying to do
-                        * the right thing.
-                        */
-                       int phy_addr = cvmx_helper_board_get_mii_address(0);
-                       if (phy_addr != -1) {
-                               int phy_identifier =
-                                   cvmx_mdio_read(phy_addr >> 8,
-                                                  phy_addr & 0xff, 0x2);
-                               /* Is it a Broadcom PHY? */
-                               if (phy_identifier == 0x0143) {
-                                       cvmx_dprintf("\n");
-                                       cvmx_dprintf("ERROR:\n");
-                                       cvmx_dprintf
-                                           ("ERROR: Board type is CVMX_BOARD_TYPE_CN3010_EVB_HS5, but Broadcom PHY found.\n");
-                                       cvmx_dprintf
-                                           ("ERROR: The board type is mis-configured, and software malfunctions are likely.\n");
-                                       cvmx_dprintf
-                                           ("ERROR: All boards require a unique board type to identify them.\n");
-                                       cvmx_dprintf("ERROR:\n");
-                                       cvmx_dprintf("\n");
-                                       cvmx_wait(1000000000);
-                                       cvmx_write_csr(CVMX_ASXX_RX_CLK_SETX
-                                                      (0, interface), 5);
-                                       cvmx_write_csr(CVMX_ASXX_TX_CLK_SETX
-                                                      (0, interface), 5);
-                               }
-                       }
-               }
        } else if (cvmx_sysinfo_get()->board_type ==
                        CVMX_BOARD_TYPE_UBNT_E100) {
                cvmx_write_csr(CVMX_ASXX_RX_CLK_SETX(0, interface), 0);
index f59c88ee9b31cb667b45fdaac54f931244c9d2cb..671ab1db272765ac50ad9aab1bb85a3ec548a8ef 100644 (file)
@@ -33,8 +33,6 @@
 
 #include <asm/octeon/cvmx-config.h>
 
-
-#include <asm/octeon/cvmx-mdio.h>
 #include <asm/octeon/cvmx-pko.h>
 #include <asm/octeon/cvmx-helper.h>
 #include <asm/octeon/cvmx-helper-board.h>
@@ -243,8 +241,7 @@ int __cvmx_helper_rgmii_enable(int interface)
        /* enable the ports now */
        for (port = 0; port < num_ports; port++) {
                union cvmx_gmxx_prtx_cfg gmx_cfg;
-               cvmx_helper_link_autoconf(cvmx_helper_get_ipd_port
-                                         (interface, port));
+
                gmx_cfg.u64 =
                    cvmx_read_csr(CVMX_GMXX_PRTX_CFG(port, interface));
                gmx_cfg.s.en = 1;
index 6f9609e63a65af18bc36c69072a71b631e408216..54375340afe8ba4e3795fb41c7f7af749f0126d5 100644 (file)
@@ -34,7 +34,6 @@
 
 #include <asm/octeon/cvmx-config.h>
 
-#include <asm/octeon/cvmx-mdio.h>
 #include <asm/octeon/cvmx-helper.h>
 #include <asm/octeon/cvmx-helper-board.h>
 
index a56ee590de1f36b50a4f2f7bc962f01e224bc141..d347fe13b66646121649af57a4fa8854319dd2f3 100644 (file)
@@ -234,8 +234,6 @@ int __cvmx_helper_xaui_enable(int interface)
        cvmx_write_csr(CVMX_GMXX_TX_INT_EN(interface), gmx_tx_int_en.u64);
        cvmx_write_csr(CVMX_PCSXX_INT_EN_REG(interface), pcsx_int_en_reg.u64);
 
-       cvmx_helper_link_autoconf(cvmx_helper_get_ipd_port(interface, 0));
-
        /* (8) Enable packet reception */
        xauiMiscCtl.s.gmxeno = 0;
        cvmx_write_csr(CVMX_PCSXX_MISC_CTL_REG(interface), xauiMiscCtl.u64);
index ff26d0217b878feb1aca373c588337af96af8431..6456af6424719a5cc1adabaea0fc1ccd210c29df 100644 (file)
@@ -841,7 +841,6 @@ int __cvmx_helper_errata_fix_ipd_ptr_alignment(void)
        int retry_cnt;
        int retry_loop_cnt;
        int i;
-       cvmx_helper_link_info_t link_info;
 
        /* Save values for restore at end */
        uint64_t prtx_cfg =
@@ -1002,15 +1001,6 @@ fix_ipd_exit:
                       (INDEX(FIX_IPD_OUTPORT), INTERFACE(FIX_IPD_OUTPORT)),
                       frame_max);
        cvmx_write_csr(CVMX_ASXX_PRT_LOOP(INTERFACE(FIX_IPD_OUTPORT)), 0);
-       /* Set link to down so autonegotiation will set it up again */
-       link_info.u64 = 0;
-       cvmx_helper_link_set(FIX_IPD_OUTPORT, link_info);
-
-       /*
-        * Bring the link back up as autonegotiation is not done in
-        * user applications.
-        */
-       cvmx_helper_link_autoconf(FIX_IPD_OUTPORT);
 
        CVMX_SYNC;
        if (num_segs)
index 5537f95b28c9be169ffd42a42094eaeae68ce064..9a2db1c013d92e548fd04ef0a008ec442942350c 100644 (file)
@@ -65,7 +65,8 @@ EXPORT_SYMBOL(octeon_should_swizzle_table);
 extern void pci_console_init(const char *arg);
 #endif
 
-static unsigned long long MAX_MEMORY = 512ull << 20;
+static unsigned long long max_memory = ULLONG_MAX;
+static unsigned long long reserve_low_mem;
 
 DEFINE_SEMAPHORE(octeon_bootbus_sem);
 EXPORT_SYMBOL(octeon_bootbus_sem);
@@ -75,7 +76,6 @@ struct octeon_boot_descriptor *octeon_boot_desc_ptr;
 struct cvmx_bootinfo *octeon_bootinfo;
 EXPORT_SYMBOL(octeon_bootinfo);
 
-static unsigned long long RESERVE_LOW_MEM = 0ull;
 #ifdef CONFIG_KEXEC
 #ifdef CONFIG_SMP
 /*
@@ -125,18 +125,18 @@ static void kexec_bootmem_init(uint64_t mem_size, uint32_t low_reserved_bytes)
        bootmem_desc->major_version = CVMX_BOOTMEM_DESC_MAJ_VER;
        bootmem_desc->minor_version = CVMX_BOOTMEM_DESC_MIN_VER;
 
-       addr = (OCTEON_DDR0_BASE + RESERVE_LOW_MEM + low_reserved_bytes);
+       addr = (OCTEON_DDR0_BASE + reserve_low_mem + low_reserved_bytes);
        bootmem_desc->head_addr = 0;
 
        if (mem_size <= OCTEON_DDR0_SIZE) {
                __cvmx_bootmem_phy_free(addr,
-                               mem_size - RESERVE_LOW_MEM -
+                               mem_size - reserve_low_mem -
                                low_reserved_bytes, 0);
                return;
        }
 
        __cvmx_bootmem_phy_free(addr,
-                       OCTEON_DDR0_SIZE - RESERVE_LOW_MEM -
+                       OCTEON_DDR0_SIZE - reserve_low_mem -
                        low_reserved_bytes, 0);
 
        mem_size -= OCTEON_DDR0_SIZE;
@@ -857,15 +857,15 @@ void __init prom_init(void)
 
        /* Default to 64MB in the simulator to speed things up */
        if (octeon_is_simulation())
-               MAX_MEMORY = 64ull << 20;
+               max_memory = 64ull << 20;
 
        arg = strstr(arcs_cmdline, "mem=");
        if (arg) {
-               MAX_MEMORY = memparse(arg + 4, &p);
-               if (MAX_MEMORY == 0)
-                       MAX_MEMORY = 32ull << 30;
+               max_memory = memparse(arg + 4, &p);
+               if (max_memory == 0)
+                       max_memory = 32ull << 30;
                if (*p == '@')
-                       RESERVE_LOW_MEM = memparse(p + 1, &p);
+                       reserve_low_mem = memparse(p + 1, &p);
        }
 
        arcs_cmdline[0] = 0;
@@ -875,11 +875,11 @@ void __init prom_init(void)
                        cvmx_phys_to_ptr(octeon_boot_desc_ptr->argv[i]);
                if ((strncmp(arg, "MEM=", 4) == 0) ||
                    (strncmp(arg, "mem=", 4) == 0)) {
-                       MAX_MEMORY = memparse(arg + 4, &p);
-                       if (MAX_MEMORY == 0)
-                               MAX_MEMORY = 32ull << 30;
+                       max_memory = memparse(arg + 4, &p);
+                       if (max_memory == 0)
+                               max_memory = 32ull << 30;
                        if (*p == '@')
-                               RESERVE_LOW_MEM = memparse(p + 1, &p);
+                               reserve_low_mem = memparse(p + 1, &p);
 #ifdef CONFIG_KEXEC
                } else if (strncmp(arg, "crashkernel=", 12) == 0) {
                        crashk_size = memparse(arg+12, &p);
@@ -971,13 +971,13 @@ void __init plat_mem_setup(void)
         * to consistently work.
         */
        mem_alloc_size = 4 << 20;
-       if (mem_alloc_size > MAX_MEMORY)
-               mem_alloc_size = MAX_MEMORY;
+       if (mem_alloc_size > max_memory)
+               mem_alloc_size = max_memory;
 
 /* Crashkernel ignores bootmem list. It relies on mem=X@Y option */
 #ifdef CONFIG_CRASH_DUMP
-       add_memory_region(RESERVE_LOW_MEM, MAX_MEMORY, BOOT_MEM_RAM);
-       total += MAX_MEMORY;
+       add_memory_region(reserve_low_mem, max_memory, BOOT_MEM_RAM);
+       total += max_memory;
 #else
 #ifdef CONFIG_KEXEC
        if (crashk_size > 0) {
@@ -992,7 +992,7 @@ void __init plat_mem_setup(void)
         */
        cvmx_bootmem_lock();
        while ((boot_mem_map.nr_map < BOOT_MEM_MAP_MAX)
-               && (total < MAX_MEMORY)) {
+               && (total < max_memory)) {
                memory = cvmx_bootmem_phy_alloc(mem_alloc_size,
                                                __pa_symbol(&_end), -1,
                                                0x100000,
diff --git a/arch/mips/configs/generic/32r1.config b/arch/mips/configs/generic/32r1.config
new file mode 100644 (file)
index 0000000..a11cd87
--- /dev/null
@@ -0,0 +1,2 @@
+CONFIG_CPU_MIPS32_R1=y
+CONFIG_HIGHMEM=y
diff --git a/arch/mips/configs/generic/32r2.config b/arch/mips/configs/generic/32r2.config
new file mode 100644 (file)
index 0000000..9570672
--- /dev/null
@@ -0,0 +1,3 @@
+CONFIG_CPU_MIPS32_R2=y
+CONFIG_MIPS_O32_FP64_SUPPORT=y
+CONFIG_HIGHMEM=y
diff --git a/arch/mips/configs/generic/32r6.config b/arch/mips/configs/generic/32r6.config
new file mode 100644 (file)
index 0000000..ca606e7
--- /dev/null
@@ -0,0 +1,2 @@
+CONFIG_CPU_MIPS32_R6=y
+CONFIG_HIGHMEM=y
diff --git a/arch/mips/configs/generic/64r1.config b/arch/mips/configs/generic/64r1.config
new file mode 100644 (file)
index 0000000..7c1ea7e
--- /dev/null
@@ -0,0 +1,4 @@
+CONFIG_CPU_MIPS64_R1=y
+CONFIG_64BIT=y
+CONFIG_MIPS32_O32=y
+CONFIG_MIPS32_N32=y
diff --git a/arch/mips/configs/generic/64r2.config b/arch/mips/configs/generic/64r2.config
new file mode 100644 (file)
index 0000000..b4d31ae
--- /dev/null
@@ -0,0 +1,5 @@
+CONFIG_CPU_MIPS64_R2=y
+CONFIG_MIPS_O32_FP64_SUPPORT=y
+CONFIG_64BIT=y
+CONFIG_MIPS32_O32=y
+CONFIG_MIPS32_N32=y
diff --git a/arch/mips/configs/generic/64r6.config b/arch/mips/configs/generic/64r6.config
new file mode 100644 (file)
index 0000000..7cac033
--- /dev/null
@@ -0,0 +1,4 @@
+CONFIG_CPU_MIPS64_R6=y
+CONFIG_64BIT=y
+CONFIG_MIPS32_O32=y
+CONFIG_MIPS32_N32=y
diff --git a/arch/mips/configs/generic/board-sead-3.config b/arch/mips/configs/generic/board-sead-3.config
new file mode 100644 (file)
index 0000000..3b5e1ac
--- /dev/null
@@ -0,0 +1,32 @@
+CONFIG_LEGACY_BOARD_SEAD3=y
+
+CONFIG_AUXDISPLAY=y
+CONFIG_IMG_ASCII_LCD=y
+
+CONFIG_NEW_LEDS=y
+CONFIG_LEDS_CLASS=y
+CONFIG_LEDS_SYSCON=y
+
+CONFIG_MMC=y
+CONFIG_MMC_SPI=y
+
+CONFIG_MTD=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_CFI=y
+CONFIG_MTD_CFI_INTELEXT=y
+CONFIG_MTD_OF_PARTS=y
+CONFIG_MTD_PHYSMAP=y
+CONFIG_MTD_PHYSMAP_OF=y
+CONFIG_MTD_UBI=y
+CONFIG_MTD_UBI_GLUEBI=y
+
+CONFIG_NETDEVICES=y
+CONFIG_SMSC911X=y
+CONFIG_SMSC_PHY=y
+
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_OF_PLATFORM=y
+
+CONFIG_USB=y
+CONFIG_USB_EHCI_HCD=y
diff --git a/arch/mips/configs/generic/eb.config b/arch/mips/configs/generic/eb.config
new file mode 100644 (file)
index 0000000..c5cdc99
--- /dev/null
@@ -0,0 +1 @@
+CONFIG_CPU_BIG_ENDIAN=y
diff --git a/arch/mips/configs/generic/el.config b/arch/mips/configs/generic/el.config
new file mode 100644 (file)
index 0000000..ee43fdb
--- /dev/null
@@ -0,0 +1 @@
+CONFIG_CPU_LITTLE_ENDIAN=y
diff --git a/arch/mips/configs/generic/micro32r2.config b/arch/mips/configs/generic/micro32r2.config
new file mode 100644 (file)
index 0000000..b701fe7
--- /dev/null
@@ -0,0 +1,4 @@
+CONFIG_CPU_MIPS32_R2=y
+CONFIG_CPU_MICROMIPS=y
+CONFIG_MIPS_O32_FP64_SUPPORT=y
+CONFIG_HIGHMEM=y
diff --git a/arch/mips/configs/generic_defconfig b/arch/mips/configs/generic_defconfig
new file mode 100644 (file)
index 0000000..c95d94c
--- /dev/null
@@ -0,0 +1,96 @@
+CONFIG_MIPS_GENERIC=y
+CONFIG_CPU_LITTLE_ENDIAN=y
+CONFIG_MIPS_CPS=y
+CONFIG_CPU_HAS_MSA=y
+CONFIG_HIGHMEM=y
+CONFIG_NR_CPUS=2
+CONFIG_MIPS_O32_FP64_SUPPORT=y
+CONFIG_SYSVIPC=y
+CONFIG_NO_HZ_IDLE=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_MEMCG=y
+CONFIG_MEMCG_SWAP=y
+CONFIG_BLK_CGROUP=y
+CONFIG_CFS_BANDWIDTH=y
+CONFIG_RT_GROUP_SCHED=y
+CONFIG_CGROUP_PIDS=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CPUSETS=y
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_NAMESPACES=y
+CONFIG_USER_NS=y
+CONFIG_SCHED_AUTOGROUP=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_BPF_SYSCALL=y
+CONFIG_USERFAULTFD=y
+CONFIG_EMBEDDED=y
+# CONFIG_SLUB_DEBUG is not set
+# CONFIG_COMPAT_BRK is not set
+CONFIG_CC_STACKPROTECTOR_REGULAR=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_TRIM_UNUSED_KSYMS=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_NETFILTER=y
+# CONFIG_WIRELESS is not set
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+CONFIG_SCSI=y
+# CONFIG_INPUT_MOUSEDEV is not set
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+CONFIG_HW_RANDOM=y
+# CONFIG_HWMON is not set
+CONFIG_MFD_SYSCON=y
+CONFIG_HID_A4TECH=y
+CONFIG_HID_APPLE=y
+CONFIG_HID_BELKIN=y
+CONFIG_HID_CHERRY=y
+CONFIG_HID_CHICONY=y
+CONFIG_HID_CYPRESS=y
+CONFIG_HID_EZKEY=y
+CONFIG_HID_KENSINGTON=y
+CONFIG_HID_LOGITECH=y
+CONFIG_HID_MICROSOFT=y
+CONFIG_HID_MONTEREY=y
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_MIPS_PLATFORM_DEVICES is not set
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_EXT4_ENCRYPTION=y
+CONFIG_FANOTIFY=y
+CONFIG_FUSE_FS=y
+CONFIG_CUSE=y
+CONFIG_OVERLAY_FS=y
+CONFIG_MSDOS_FS=y
+CONFIG_VFAT_FS=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+# CONFIG_MISC_FILESYSTEMS is not set
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3_ACL=y
+CONFIG_NFS_V4=y
+CONFIG_NFS_V4_1=y
+CONFIG_NFS_V4_2=y
+CONFIG_PRINTK_TIME=y
+CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_REDUCED=y
+CONFIG_DEBUG_FS=y
+# CONFIG_SCHED_DEBUG is not set
+# CONFIG_FTRACE is not set
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="earlycon"
+# CONFIG_XZ_DEC_X86 is not set
+# CONFIG_XZ_DEC_POWERPC is not set
+# CONFIG_XZ_DEC_IA64 is not set
+# CONFIG_XZ_DEC_ARM is not set
+# CONFIG_XZ_DEC_ARMTHUMB is not set
+# CONFIG_XZ_DEC_SPARC is not set
diff --git a/arch/mips/configs/loongson1c_defconfig b/arch/mips/configs/loongson1c_defconfig
new file mode 100644 (file)
index 0000000..2304d41
--- /dev/null
@@ -0,0 +1,126 @@
+CONFIG_MACH_LOONGSON32=y
+CONFIG_LOONGSON1_LS1C=y
+CONFIG_PREEMPT=y
+# CONFIG_SECCOMP is not set
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_KERNEL_XZ=y
+CONFIG_SYSVIPC=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_BSD_PROCESS_ACCT_V3=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_LOG_BUF_SHIFT=16
+CONFIG_NAMESPACES=y
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+CONFIG_EXPERT=y
+CONFIG_PERF_EVENTS=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODVERSIONS=y
+# CONFIG_LBDAF is not set
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+# CONFIG_SUSPEND is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_SYN_COOKIES=y
+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_BEET is not set
+# CONFIG_INET_DIAG is not set
+# CONFIG_IPV6 is not set
+# CONFIG_WIRELESS is not set
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+# CONFIG_STANDALONE is not set
+CONFIG_MTD=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_NAND=y
+CONFIG_MTD_NAND_LOONGSON1=y
+CONFIG_MTD_UBI=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_SCSI=m
+# CONFIG_SCSI_PROC_FS is not set
+CONFIG_BLK_DEV_SD=m
+# CONFIG_SCSI_LOWLEVEL is not set
+CONFIG_NETDEVICES=y
+# CONFIG_NET_VENDOR_BROADCOM is not set
+# CONFIG_NET_VENDOR_INTEL is not set
+# CONFIG_NET_VENDOR_MARVELL is not set
+# CONFIG_NET_VENDOR_MICREL is not set
+# CONFIG_NET_VENDOR_NATSEMI is not set
+# CONFIG_NET_VENDOR_SEEQ is not set
+# CONFIG_NET_VENDOR_SMSC is not set
+CONFIG_STMMAC_ETH=y
+# CONFIG_NET_VENDOR_WIZNET is not set
+# CONFIG_WLAN is not set
+CONFIG_INPUT_EVDEV=y
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+CONFIG_VT_HW_CONSOLE_BINDING=y
+CONFIG_LEGACY_PTY_COUNT=8
+# CONFIG_DEVKMEM is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+# CONFIG_HW_RANDOM is not set
+CONFIG_GPIOLIB=y
+CONFIG_GPIO_LOONGSON1=y
+# CONFIG_HWMON is not set
+# CONFIG_VGA_CONSOLE is not set
+CONFIG_HID_GENERIC=m
+CONFIG_USB_HID=m
+CONFIG_USB=y
+CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
+CONFIG_USB_EHCI_HCD=y
+# CONFIG_USB_EHCI_TT_NEWSCHED is not set
+CONFIG_USB_EHCI_HCD_PLATFORM=y
+CONFIG_USB_STORAGE=m
+CONFIG_USB_SERIAL=m
+CONFIG_USB_SERIAL_PL2303=m
+CONFIG_NEW_LEDS=y
+CONFIG_LEDS_CLASS=y
+CONFIG_LEDS_GPIO=y
+CONFIG_LEDS_TRIGGERS=y
+CONFIG_LEDS_TRIGGER_HEARTBEAT=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_LOONGSON1=y
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_EXT2_FS_POSIX_ACL=y
+CONFIG_EXT2_FS_SECURITY=y
+CONFIG_EXT3_FS=y
+CONFIG_EXT3_FS_POSIX_ACL=y
+CONFIG_EXT3_FS_SECURITY=y
+# CONFIG_DNOTIFY is not set
+CONFIG_VFAT_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_UBIFS_FS=y
+CONFIG_UBIFS_FS_ADVANCED_COMPR=y
+CONFIG_UBIFS_ATIME_SUPPORT=y
+CONFIG_NFS_FS=y
+CONFIG_ROOT_NFS=y
+CONFIG_NLS_CODEPAGE_437=m
+CONFIG_NLS_ISO8859_1=m
+CONFIG_DYNAMIC_DEBUG=y
+# CONFIG_ENABLE_WARN_DEPRECATED is not set
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_DEBUG_FS=y
+CONFIG_MAGIC_SYSRQ=y
+# CONFIG_SCHED_DEBUG is not set
+# CONFIG_DEBUG_PREEMPT is not set
+# CONFIG_FTRACE is not set
+# CONFIG_EARLY_PRINTK is not set
+# CONFIG_CRYPTO_ECHAINIV is not set
+# CONFIG_CRYPTO_HW is not set
index 5afb4840aec75c3d41fb7a4fd08b8322d0437904..58d43f3c348d00fec15c975afbf2aa0693879385 100644 (file)
@@ -230,7 +230,7 @@ CONFIG_MTD_CFI=y
 CONFIG_MTD_CFI_INTELEXT=y
 CONFIG_MTD_CFI_AMDSTD=y
 CONFIG_MTD_CFI_STAA=y
-CONFIG_MTD_PHYSMAP=y
+CONFIG_MTD_PHYSMAP_OF=y
 CONFIG_MTD_UBI=m
 CONFIG_MTD_UBI_GLUEBI=m
 CONFIG_BLK_DEV_FD=m
@@ -318,6 +318,8 @@ CONFIG_LIBERTAS=m
 # CONFIG_SERIO_I8042 is not set
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_POWER_RESET=y
+CONFIG_POWER_RESET_SYSCON=y
 # CONFIG_HWMON is not set
 CONFIG_FB=y
 CONFIG_FB_CIRRUS=y
index 98f13879bb8fda832e3e73355f96af7defdcbc23..c8f7e2835840ddb5737008004be064d289c6182a 100644 (file)
@@ -235,7 +235,7 @@ CONFIG_MTD_CFI=y
 CONFIG_MTD_CFI_INTELEXT=y
 CONFIG_MTD_CFI_AMDSTD=y
 CONFIG_MTD_CFI_STAA=y
-CONFIG_MTD_PHYSMAP=y
+CONFIG_MTD_PHYSMAP_OF=y
 CONFIG_MTD_UBI=m
 CONFIG_MTD_UBI_GLUEBI=m
 CONFIG_BLK_DEV_FD=m
@@ -331,6 +331,8 @@ CONFIG_LIBERTAS=m
 # CONFIG_SERIO_I8042 is not set
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_POWER_RESET=y
+CONFIG_POWER_RESET_SYSCON=y
 # CONFIG_HWMON is not set
 CONFIG_FB=y
 CONFIG_FB_CIRRUS=y
index 3b5d5913f548cddaf65f457da86ae245dee9f06d..d2f54e55356c8cfb427b7f6ca896110f16b205b9 100644 (file)
@@ -234,7 +234,7 @@ CONFIG_MTD_CFI=y
 CONFIG_MTD_CFI_INTELEXT=y
 CONFIG_MTD_CFI_AMDSTD=y
 CONFIG_MTD_CFI_STAA=y
-CONFIG_MTD_PHYSMAP=y
+CONFIG_MTD_PHYSMAP_OF=y
 CONFIG_MTD_UBI=m
 CONFIG_MTD_UBI_GLUEBI=m
 CONFIG_BLK_DEV_FD=m
@@ -331,6 +331,8 @@ CONFIG_LIBERTAS=m
 # CONFIG_SERIO_I8042 is not set
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_POWER_RESET=y
+CONFIG_POWER_RESET_SYSCON=y
 # CONFIG_HWMON is not set
 CONFIG_FB=y
 CONFIG_FB_CIRRUS=y
index 65f140e1e872a87c8aeb1728c71b710889e25010..cbf37dd0c4908a386c3257a0b084eae95435dc17 100644 (file)
@@ -132,6 +132,8 @@ CONFIG_LEGACY_PTY_COUNT=4
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_HW_RANDOM=y
+CONFIG_POWER_RESET=y
+CONFIG_POWER_RESET_SYSCON=y
 # CONFIG_HWMON is not set
 CONFIG_FB=y
 CONFIG_FIRMWARE_EDID=y
index 799c4338fd5e0580d1810e44a9e8a85361475705..35f6ba260df8fe215c058577377c2a96fcfbfc6b 100644 (file)
@@ -132,6 +132,8 @@ CONFIG_LEGACY_PTY_COUNT=16
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_HW_RANDOM=y
+CONFIG_POWER_RESET=y
+CONFIG_POWER_RESET_SYSCON=y
 # CONFIG_HWMON is not set
 CONFIG_VIDEO_OUTPUT_CONTROL=m
 CONFIG_FB=y
index ac0eb4daf1010b86cff9c2759fa7de046ce39996..900f14543eeb9856d75468b432161c53c0c88de2 100644 (file)
@@ -134,6 +134,8 @@ CONFIG_LEGACY_PTY_COUNT=4
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_HW_RANDOM=y
+CONFIG_POWER_RESET=y
+CONFIG_POWER_RESET_SYSCON=y
 # CONFIG_HWMON is not set
 CONFIG_FB=y
 CONFIG_FIRMWARE_EDID=y
index 31846000530fb158bec7131def3dd40249394d5f..8e2738b5e180a7a5ea9873f52b86d65312ce5f59 100644 (file)
@@ -137,6 +137,8 @@ CONFIG_LEGACY_PTY_COUNT=4
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_HW_RANDOM=y
+CONFIG_POWER_RESET=y
+CONFIG_POWER_RESET_SYSCON=y
 # CONFIG_HWMON is not set
 CONFIG_VIDEO_OUTPUT_CONTROL=m
 CONFIG_FB=y
index a79107da0675b881cbc704863692428f2555f6a0..6dc4e309a6918c69bd06277c7192e065c6e5c5ce 100644 (file)
@@ -131,6 +131,8 @@ CONFIG_LEGACY_PTY_COUNT=16
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_HW_RANDOM=y
+CONFIG_POWER_RESET=y
+CONFIG_POWER_RESET_SYSCON=y
 # CONFIG_HWMON is not set
 CONFIG_VIDEO_OUTPUT_CONTROL=m
 CONFIG_FB=y
index 73221573275166e1ae7d044b1bad8dfa1b1f6d65..3d0d9cb9673f80d976033e969584308d92d3f446 100644 (file)
@@ -231,7 +231,7 @@ CONFIG_MTD_CFI=y
 CONFIG_MTD_CFI_INTELEXT=y
 CONFIG_MTD_CFI_AMDSTD=y
 CONFIG_MTD_CFI_STAA=y
-CONFIG_MTD_PHYSMAP=y
+CONFIG_MTD_PHYSMAP_OF=y
 CONFIG_MTD_UBI=m
 CONFIG_MTD_UBI_GLUEBI=m
 CONFIG_BLK_DEV_FD=m
@@ -326,6 +326,8 @@ CONFIG_LIBERTAS=m
 # CONFIG_SERIO_I8042 is not set
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_POWER_RESET=y
+CONFIG_POWER_RESET_SYSCON=y
 # CONFIG_HWMON is not set
 CONFIG_FB=y
 CONFIG_FB_CIRRUS=y
index 8b7429127a1d18c192677e7b04d7d9c68e6e9875..7d32fbbca96269dd37d22d672ee2a6a6783dfc32 100644 (file)
@@ -29,7 +29,6 @@ CONFIG_CC_OPTIMIZE_FOR_SIZE=y
 CONFIG_EMBEDDED=y
 # CONFIG_COMPAT_BRK is not set
 CONFIG_PROFILING=y
-CONFIG_CC_STACKPROTECTOR_STRONG=y
 CONFIG_MODULES=y
 CONFIG_MODULE_UNLOAD=y
 CONFIG_MODULE_FORCE_UNLOAD=y
@@ -264,7 +263,6 @@ CONFIG_DMADEVICES=y
 CONFIG_IMG_MDC_DMA=y
 CONFIG_STAGING=y
 CONFIG_ASHMEM=y
-# CONFIG_ANDROID_TIMED_OUTPUT is not set
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_MEMORY=y
 CONFIG_IIO=y
diff --git a/arch/mips/configs/sead3_defconfig b/arch/mips/configs/sead3_defconfig
deleted file mode 100644 (file)
index dae9354..0000000
+++ /dev/null
@@ -1,121 +0,0 @@
-CONFIG_MIPS_SEAD3=y
-CONFIG_CPU_LITTLE_ENDIAN=y
-CONFIG_CPU_MIPS32_R2=y
-CONFIG_HZ_100=y
-CONFIG_SYSVIPC=y
-CONFIG_POSIX_MQUEUE=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
-CONFIG_IKCONFIG=y
-CONFIG_IKCONFIG_PROC=y
-CONFIG_LOG_BUF_SHIFT=15
-CONFIG_EMBEDDED=y
-CONFIG_SLAB=y
-CONFIG_PROFILING=y
-CONFIG_OPROFILE=y
-CONFIG_MODULES=y
-# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_INET=y
-CONFIG_IP_PNP=y
-CONFIG_IP_PNP_DHCP=y
-CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
-# CONFIG_INET_DIAG is not set
-# CONFIG_IPV6 is not set
-# CONFIG_WIRELESS is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-CONFIG_DEVTMPFS=y
-CONFIG_MTD=y
-CONFIG_MTD_BLOCK=y
-CONFIG_MTD_CFI=y
-CONFIG_MTD_CFI_INTELEXT=y
-CONFIG_MTD_PHYSMAP=y
-CONFIG_MTD_UBI=y
-CONFIG_MTD_UBI_GLUEBI=y
-CONFIG_BLK_DEV_LOOP=y
-CONFIG_BLK_DEV_CRYPTOLOOP=m
-CONFIG_SCSI=y
-# CONFIG_SCSI_PROC_FS is not set
-CONFIG_BLK_DEV_SD=y
-CONFIG_CHR_DEV_SG=y
-# CONFIG_SCSI_LOWLEVEL is not set
-CONFIG_NETDEVICES=y
-CONFIG_SMSC911X=y
-# CONFIG_NET_VENDOR_WIZNET is not set
-CONFIG_MARVELL_PHY=y
-CONFIG_DAVICOM_PHY=y
-CONFIG_QSEMI_PHY=y
-CONFIG_LXT_PHY=y
-CONFIG_CICADA_PHY=y
-CONFIG_VITESSE_PHY=y
-CONFIG_SMSC_PHY=y
-CONFIG_BROADCOM_PHY=y
-CONFIG_ICPLUS_PHY=y
-# CONFIG_WLAN is not set
-# CONFIG_INPUT_MOUSEDEV is not set
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
-# CONFIG_SERIO is not set
-# CONFIG_CONSOLE_TRANSLATIONS is not set
-CONFIG_VT_HW_CONSOLE_BINDING=y
-CONFIG_LEGACY_PTY_COUNT=32
-CONFIG_SERIAL_8250=y
-CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_SERIAL_8250_NR_UARTS=2
-CONFIG_SERIAL_8250_RUNTIME_UARTS=2
-# CONFIG_HW_RANDOM is not set
-CONFIG_I2C=y
-# CONFIG_I2C_COMPAT is not set
-CONFIG_I2C_CHARDEV=y
-# CONFIG_I2C_HELPER_AUTO is not set
-CONFIG_SPI=y
-CONFIG_SENSORS_ADT7475=y
-CONFIG_BACKLIGHT_LCD_SUPPORT=y
-CONFIG_LCD_CLASS_DEVICE=y
-CONFIG_BACKLIGHT_CLASS_DEVICE=y
-# CONFIG_VGA_CONSOLE is not set
-CONFIG_USB=y
-CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
-CONFIG_USB_EHCI_HCD=y
-CONFIG_USB_EHCI_ROOT_HUB_TT=y
-CONFIG_USB_STORAGE=y
-CONFIG_MMC=y
-CONFIG_MMC_DEBUG=y
-CONFIG_MMC_SPI=y
-CONFIG_NEW_LEDS=y
-CONFIG_LEDS_CLASS=y
-CONFIG_LEDS_TRIGGERS=y
-CONFIG_LEDS_TRIGGER_HEARTBEAT=y
-CONFIG_RTC_CLASS=y
-CONFIG_RTC_DRV_M41T80=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_XFS_FS=y
-CONFIG_XFS_QUOTA=y
-CONFIG_XFS_POSIX_ACL=y
-CONFIG_QUOTA=y
-# CONFIG_PRINT_QUOTA_WARNING is not set
-CONFIG_MSDOS_FS=m
-CONFIG_VFAT_FS=m
-CONFIG_TMPFS=y
-CONFIG_JFFS2_FS=y
-CONFIG_NFS_FS=y
-CONFIG_ROOT_NFS=y
-CONFIG_NLS_CODEPAGE_437=y
-CONFIG_NLS_ASCII=y
-CONFIG_NLS_ISO8859_1=y
-CONFIG_NLS_ISO8859_15=y
-CONFIG_NLS_UTF8=y
-# CONFIG_FTRACE is not set
-CONFIG_CRYPTO_CBC=y
-CONFIG_CRYPTO_ECB=y
-CONFIG_CRYPTO_ARC4=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
-# CONFIG_CRYPTO_HW is not set
diff --git a/arch/mips/configs/sead3micro_defconfig b/arch/mips/configs/sead3micro_defconfig
deleted file mode 100644 (file)
index cd91a77..0000000
+++ /dev/null
@@ -1,122 +0,0 @@
-CONFIG_MIPS_SEAD3=y
-CONFIG_CPU_LITTLE_ENDIAN=y
-CONFIG_CPU_MIPS32_R2=y
-CONFIG_CPU_MICROMIPS=y
-CONFIG_HZ_100=y
-CONFIG_SYSVIPC=y
-CONFIG_POSIX_MQUEUE=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
-CONFIG_IKCONFIG=y
-CONFIG_IKCONFIG_PROC=y
-CONFIG_LOG_BUF_SHIFT=15
-CONFIG_EMBEDDED=y
-CONFIG_SLAB=y
-CONFIG_PROFILING=y
-CONFIG_OPROFILE=y
-CONFIG_MODULES=y
-# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_INET=y
-CONFIG_IP_PNP=y
-CONFIG_IP_PNP_DHCP=y
-CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
-# CONFIG_INET_DIAG is not set
-# CONFIG_IPV6 is not set
-# CONFIG_WIRELESS is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-CONFIG_DEVTMPFS=y
-CONFIG_MTD=y
-CONFIG_MTD_BLOCK=y
-CONFIG_MTD_CFI=y
-CONFIG_MTD_CFI_INTELEXT=y
-CONFIG_MTD_PHYSMAP=y
-CONFIG_MTD_UBI=y
-CONFIG_MTD_UBI_GLUEBI=y
-CONFIG_BLK_DEV_LOOP=y
-CONFIG_BLK_DEV_CRYPTOLOOP=m
-CONFIG_SCSI=y
-# CONFIG_SCSI_PROC_FS is not set
-CONFIG_BLK_DEV_SD=y
-CONFIG_CHR_DEV_SG=y
-# CONFIG_SCSI_LOWLEVEL is not set
-CONFIG_NETDEVICES=y
-CONFIG_SMSC911X=y
-# CONFIG_NET_VENDOR_WIZNET is not set
-CONFIG_MARVELL_PHY=y
-CONFIG_DAVICOM_PHY=y
-CONFIG_QSEMI_PHY=y
-CONFIG_LXT_PHY=y
-CONFIG_CICADA_PHY=y
-CONFIG_VITESSE_PHY=y
-CONFIG_SMSC_PHY=y
-CONFIG_BROADCOM_PHY=y
-CONFIG_ICPLUS_PHY=y
-# CONFIG_WLAN is not set
-# CONFIG_INPUT_MOUSEDEV is not set
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
-# CONFIG_SERIO is not set
-# CONFIG_CONSOLE_TRANSLATIONS is not set
-CONFIG_VT_HW_CONSOLE_BINDING=y
-CONFIG_LEGACY_PTY_COUNT=32
-CONFIG_SERIAL_8250=y
-CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_SERIAL_8250_NR_UARTS=2
-CONFIG_SERIAL_8250_RUNTIME_UARTS=2
-# CONFIG_HW_RANDOM is not set
-CONFIG_I2C=y
-# CONFIG_I2C_COMPAT is not set
-CONFIG_I2C_CHARDEV=y
-# CONFIG_I2C_HELPER_AUTO is not set
-CONFIG_SPI=y
-CONFIG_SENSORS_ADT7475=y
-CONFIG_BACKLIGHT_LCD_SUPPORT=y
-CONFIG_LCD_CLASS_DEVICE=y
-CONFIG_BACKLIGHT_CLASS_DEVICE=y
-# CONFIG_VGA_CONSOLE is not set
-CONFIG_USB=y
-CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
-CONFIG_USB_EHCI_HCD=y
-CONFIG_USB_EHCI_ROOT_HUB_TT=y
-CONFIG_USB_STORAGE=y
-CONFIG_MMC=y
-CONFIG_MMC_DEBUG=y
-CONFIG_MMC_SPI=y
-CONFIG_NEW_LEDS=y
-CONFIG_LEDS_CLASS=y
-CONFIG_LEDS_TRIGGERS=y
-CONFIG_LEDS_TRIGGER_HEARTBEAT=y
-CONFIG_RTC_CLASS=y
-CONFIG_RTC_DRV_M41T80=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_XFS_FS=y
-CONFIG_XFS_QUOTA=y
-CONFIG_XFS_POSIX_ACL=y
-CONFIG_QUOTA=y
-# CONFIG_PRINT_QUOTA_WARNING is not set
-CONFIG_MSDOS_FS=m
-CONFIG_VFAT_FS=m
-CONFIG_TMPFS=y
-CONFIG_JFFS2_FS=y
-CONFIG_NFS_FS=y
-CONFIG_ROOT_NFS=y
-CONFIG_NLS_CODEPAGE_437=y
-CONFIG_NLS_ASCII=y
-CONFIG_NLS_ISO8859_1=y
-CONFIG_NLS_ISO8859_15=y
-CONFIG_NLS_UTF8=y
-# CONFIG_FTRACE is not set
-CONFIG_CRYPTO_CBC=y
-CONFIG_CRYPTO_ECB=y
-CONFIG_CRYPTO_ARC4=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
-# CONFIG_CRYPTO_HW is not set
diff --git a/arch/mips/generic/Kconfig b/arch/mips/generic/Kconfig
new file mode 100644 (file)
index 0000000..a606b3f
--- /dev/null
@@ -0,0 +1,19 @@
+if MIPS_GENERIC
+
+config LEGACY_BOARDS
+       bool
+       help
+         Select this from your board if the board must use a legacy, non-UHI,
+         boot protocol. This will cause the kernel to scan through the list of
+         supported machines calling their detect functions in turn if the
+         kernel is booted without being provided with an FDT via the UHI
+         boot protocol.
+
+config LEGACY_BOARD_SEAD3
+       bool "Support MIPS SEAD-3 boards"
+       select LEGACY_BOARDS
+       help
+         Enable this to include support for booting on MIPS SEAD-3 FPGA-based
+         development boards, which boot using a legacy boot protocol.
+
+endif
diff --git a/arch/mips/generic/Makefile b/arch/mips/generic/Makefile
new file mode 100644 (file)
index 0000000..7c66494
--- /dev/null
@@ -0,0 +1,15 @@
+#
+# Copyright (C) 2016 Imagination Technologies
+# Author: Paul Burton <paul.burton@imgtec.com>
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation;  either version 2 of the  License, or (at your
+# option) any later version.
+#
+
+obj-y += init.o
+obj-y += irq.o
+obj-y += proc.o
+
+obj-$(CONFIG_LEGACY_BOARD_SEAD3)       += board-sead3.o
diff --git a/arch/mips/generic/Platform b/arch/mips/generic/Platform
new file mode 100644 (file)
index 0000000..9a30d69
--- /dev/null
@@ -0,0 +1,14 @@
+#
+# Copyright (C) 2016 Imagination Technologies
+# Author: Paul Burton <paul.burton@imgtec.com>
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation;  either version 2 of the  License, or (at your
+# option) any later version.
+#
+
+platform-$(CONFIG_MIPS_GENERIC)        += generic/
+cflags-$(CONFIG_MIPS_GENERIC)  += -I$(srctree)/arch/mips/include/asm/mach-generic
+load-$(CONFIG_MIPS_GENERIC)    += 0xffffffff80100000
+all-$(CONFIG_MIPS_GENERIC)     := vmlinux.gz.itb
diff --git a/arch/mips/generic/board-sead3.c b/arch/mips/generic/board-sead3.c
new file mode 100644 (file)
index 0000000..f4ae058
--- /dev/null
@@ -0,0 +1,376 @@
+/*
+ * Copyright (C) 2016 Imagination Technologies
+ * Author: Paul Burton <paul.burton@imgtec.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#define pr_fmt(fmt) "sead3: " fmt
+
+#include <linux/errno.h>
+#include <linux/libfdt.h>
+#include <linux/printk.h>
+
+#include <asm/fw/fw.h>
+#include <asm/io.h>
+#include <asm/machine.h>
+
+#define SEAD_CONFIG                    CKSEG1ADDR(0x1b100110)
+#define SEAD_CONFIG_GIC_PRESENT                BIT(1)
+
+#define MIPS_REVISION                  CKSEG1ADDR(0x1fc00010)
+#define MIPS_REVISION_MACHINE          (0xf << 4)
+#define MIPS_REVISION_MACHINE_SEAD3    (0x4 << 4)
+
+static __init bool sead3_detect(void)
+{
+       uint32_t rev;
+
+       rev = __raw_readl((void *)MIPS_REVISION);
+       return (rev & MIPS_REVISION_MACHINE) == MIPS_REVISION_MACHINE_SEAD3;
+}
+
+static __init int append_cmdline(void *fdt)
+{
+       int err, chosen_off;
+
+       /* find or add chosen node */
+       chosen_off = fdt_path_offset(fdt, "/chosen");
+       if (chosen_off == -FDT_ERR_NOTFOUND)
+               chosen_off = fdt_path_offset(fdt, "/chosen@0");
+       if (chosen_off == -FDT_ERR_NOTFOUND)
+               chosen_off = fdt_add_subnode(fdt, 0, "chosen");
+       if (chosen_off < 0) {
+               pr_err("Unable to find or add DT chosen node: %d\n",
+                      chosen_off);
+               return chosen_off;
+       }
+
+       err = fdt_setprop_string(fdt, chosen_off, "bootargs", fw_getcmdline());
+       if (err) {
+               pr_err("Unable to set bootargs property: %d\n", err);
+               return err;
+       }
+
+       return 0;
+}
+
+static __init int append_memory(void *fdt)
+{
+       unsigned long phys_memsize, memsize;
+       __be32 mem_array[2];
+       int err, mem_off;
+       char *var;
+
+       /* find memory size from the bootloader environment */
+       var = fw_getenv("memsize");
+       if (var) {
+               err = kstrtoul(var, 0, &phys_memsize);
+               if (err) {
+                       pr_err("Failed to read memsize env variable '%s'\n",
+                              var);
+                       return -EINVAL;
+               }
+       } else {
+               pr_warn("The bootloader didn't provide memsize: defaulting to 32MB\n");
+               phys_memsize = 32 << 20;
+       }
+
+       /* default to using all available RAM */
+       memsize = phys_memsize;
+
+       /* allow the user to override the usable memory */
+       var = strstr(arcs_cmdline, "memsize=");
+       if (var)
+               memsize = memparse(var + strlen("memsize="), NULL);
+
+       /* if the user says there's more RAM than we thought, believe them */
+       phys_memsize = max_t(unsigned long, phys_memsize, memsize);
+
+       /* find or add a memory node */
+       mem_off = fdt_path_offset(fdt, "/memory");
+       if (mem_off == -FDT_ERR_NOTFOUND)
+               mem_off = fdt_add_subnode(fdt, 0, "memory");
+       if (mem_off < 0) {
+               pr_err("Unable to find or add memory DT node: %d\n", mem_off);
+               return mem_off;
+       }
+
+       err = fdt_setprop_string(fdt, mem_off, "device_type", "memory");
+       if (err) {
+               pr_err("Unable to set memory node device_type: %d\n", err);
+               return err;
+       }
+
+       mem_array[0] = 0;
+       mem_array[1] = cpu_to_be32(phys_memsize);
+       err = fdt_setprop(fdt, mem_off, "reg", mem_array, sizeof(mem_array));
+       if (err) {
+               pr_err("Unable to set memory regs property: %d\n", err);
+               return err;
+       }
+
+       mem_array[0] = 0;
+       mem_array[1] = cpu_to_be32(memsize);
+       err = fdt_setprop(fdt, mem_off, "linux,usable-memory",
+                         mem_array, sizeof(mem_array));
+       if (err) {
+               pr_err("Unable to set linux,usable-memory property: %d\n", err);
+               return err;
+       }
+
+       return 0;
+}
+
+static __init int remove_gic(void *fdt)
+{
+       const unsigned int cpu_ehci_int = 2;
+       const unsigned int cpu_uart_int = 4;
+       const unsigned int cpu_eth_int = 6;
+       int gic_off, cpu_off, uart_off, eth_off, ehci_off, err;
+       uint32_t cfg, cpu_phandle;
+
+       /* leave the GIC node intact if a GIC is present */
+       cfg = __raw_readl((uint32_t *)SEAD_CONFIG);
+       if (cfg & SEAD_CONFIG_GIC_PRESENT)
+               return 0;
+
+       gic_off = fdt_node_offset_by_compatible(fdt, -1, "mti,gic");
+       if (gic_off < 0) {
+               pr_err("unable to find DT GIC node: %d\n", gic_off);
+               return gic_off;
+       }
+
+       err = fdt_nop_node(fdt, gic_off);
+       if (err) {
+               pr_err("unable to nop GIC node\n");
+               return err;
+       }
+
+       cpu_off = fdt_node_offset_by_compatible(fdt, -1,
+                       "mti,cpu-interrupt-controller");
+       if (cpu_off < 0) {
+               pr_err("unable to find CPU intc node: %d\n", cpu_off);
+               return cpu_off;
+       }
+
+       cpu_phandle = fdt_get_phandle(fdt, cpu_off);
+       if (!cpu_phandle) {
+               pr_err("unable to get CPU intc phandle\n");
+               return -EINVAL;
+       }
+
+       err = fdt_setprop_u32(fdt, 0, "interrupt-parent", cpu_phandle);
+       if (err) {
+               pr_err("unable to set root interrupt-parent: %d\n", err);
+               return err;
+       }
+
+       uart_off = fdt_node_offset_by_compatible(fdt, -1, "ns16550a");
+       while (uart_off >= 0) {
+               err = fdt_setprop_u32(fdt, uart_off, "interrupts",
+                                     cpu_uart_int);
+               if (err) {
+                       pr_err("unable to set UART interrupts property: %d\n",
+                              err);
+                       return err;
+               }
+
+               uart_off = fdt_node_offset_by_compatible(fdt, uart_off,
+                                                        "ns16550a");
+       }
+       if (uart_off != -FDT_ERR_NOTFOUND) {
+               pr_err("error searching for UART DT node: %d\n", uart_off);
+               return uart_off;
+       }
+
+       eth_off = fdt_node_offset_by_compatible(fdt, -1, "smsc,lan9115");
+       if (eth_off < 0) {
+               pr_err("unable to find ethernet DT node: %d\n", eth_off);
+               return eth_off;
+       }
+
+       err = fdt_setprop_u32(fdt, eth_off, "interrupts", cpu_eth_int);
+       if (err) {
+               pr_err("unable to set ethernet interrupts property: %d\n", err);
+               return err;
+       }
+
+       ehci_off = fdt_node_offset_by_compatible(fdt, -1, "generic-ehci");
+       if (ehci_off < 0) {
+               pr_err("unable to find EHCI DT node: %d\n", ehci_off);
+               return ehci_off;
+       }
+
+       err = fdt_setprop_u32(fdt, ehci_off, "interrupts", cpu_ehci_int);
+       if (err) {
+               pr_err("unable to set EHCI interrupts property: %d\n", err);
+               return err;
+       }
+
+       return 0;
+}
+
+static __init int serial_config(void *fdt)
+{
+       const char *yamontty, *mode_var;
+       char mode_var_name[9], path[18], parity;
+       unsigned int uart, baud, stop_bits;
+       bool hw_flow;
+       int chosen_off, err;
+
+       yamontty = fw_getenv("yamontty");
+       if (!yamontty || !strcmp(yamontty, "tty0")) {
+               uart = 0;
+       } else if (!strcmp(yamontty, "tty1")) {
+               uart = 1;
+       } else {
+               pr_warn("yamontty environment variable '%s' invalid\n",
+                       yamontty);
+               uart = 0;
+       }
+
+       baud = stop_bits = 0;
+       parity = 0;
+       hw_flow = false;
+
+       snprintf(mode_var_name, sizeof(mode_var_name), "modetty%u", uart);
+       mode_var = fw_getenv(mode_var_name);
+       if (mode_var) {
+               while (mode_var[0] >= '0' && mode_var[0] <= '9') {
+                       baud *= 10;
+                       baud += mode_var[0] - '0';
+                       mode_var++;
+               }
+               if (mode_var[0] == ',')
+                       mode_var++;
+               if (mode_var[0])
+                       parity = mode_var[0];
+               if (mode_var[0] == ',')
+                       mode_var++;
+               if (mode_var[0])
+                       stop_bits = mode_var[0] - '0';
+               if (mode_var[0] == ',')
+                       mode_var++;
+               if (!strcmp(mode_var, "hw"))
+                       hw_flow = true;
+       }
+
+       if (!baud)
+               baud = 38400;
+
+       if (parity != 'e' && parity != 'n' && parity != 'o')
+               parity = 'n';
+
+       if (stop_bits != 7 && stop_bits != 8)
+               stop_bits = 8;
+
+       WARN_ON(snprintf(path, sizeof(path), "uart%u:%u%c%u%s",
+                        uart, baud, parity, stop_bits,
+                        hw_flow ? "r" : "") >= sizeof(path));
+
+       /* find or add chosen node */
+       chosen_off = fdt_path_offset(fdt, "/chosen");
+       if (chosen_off == -FDT_ERR_NOTFOUND)
+               chosen_off = fdt_path_offset(fdt, "/chosen@0");
+       if (chosen_off == -FDT_ERR_NOTFOUND)
+               chosen_off = fdt_add_subnode(fdt, 0, "chosen");
+       if (chosen_off < 0) {
+               pr_err("Unable to find or add DT chosen node: %d\n",
+                      chosen_off);
+               return chosen_off;
+       }
+
+       err = fdt_setprop_string(fdt, chosen_off, "stdout-path", path);
+       if (err) {
+               pr_err("Unable to set stdout-path property: %d\n", err);
+               return err;
+       }
+
+       return 0;
+}
+
+static __init const void *sead3_fixup_fdt(const void *fdt,
+                                         const void *match_data)
+{
+       static unsigned char fdt_buf[16 << 10] __initdata;
+       int err;
+
+       if (fdt_check_header(fdt))
+               panic("Corrupt DT");
+
+       /* if this isn't SEAD3, something went wrong */
+       BUG_ON(fdt_node_check_compatible(fdt, 0, "mti,sead-3"));
+
+       fw_init_cmdline();
+
+       err = fdt_open_into(fdt, fdt_buf, sizeof(fdt_buf));
+       if (err)
+               panic("Unable to open FDT: %d", err);
+
+       err = append_cmdline(fdt_buf);
+       if (err)
+               panic("Unable to patch FDT: %d", err);
+
+       err = append_memory(fdt_buf);
+       if (err)
+               panic("Unable to patch FDT: %d", err);
+
+       err = remove_gic(fdt_buf);
+       if (err)
+               panic("Unable to patch FDT: %d", err);
+
+       err = serial_config(fdt_buf);
+       if (err)
+               panic("Unable to patch FDT: %d", err);
+
+       err = fdt_pack(fdt_buf);
+       if (err)
+               panic("Unable to pack FDT: %d\n", err);
+
+       return fdt_buf;
+}
+
+static __init unsigned int sead3_measure_hpt_freq(void)
+{
+       void __iomem *status_reg = (void __iomem *)0xbf000410;
+       unsigned int freq, orig, tick = 0;
+       unsigned long flags;
+
+       local_irq_save(flags);
+
+       orig = readl(status_reg) & 0x2;               /* get original sample */
+       /* wait for transition */
+       while ((readl(status_reg) & 0x2) == orig)
+               ;
+       orig = orig ^ 0x2;                            /* flip the bit */
+
+       write_c0_count(0);
+
+       /* wait 1 second (the sampling clock transitions every 10ms) */
+       while (tick < 100) {
+               /* wait for transition */
+               while ((readl(status_reg) & 0x2) == orig)
+                       ;
+               orig = orig ^ 0x2;                            /* flip the bit */
+               tick++;
+       }
+
+       freq = read_c0_count();
+
+       local_irq_restore(flags);
+
+       return freq;
+}
+
+extern char __dtb_sead3_begin[];
+
+MIPS_MACHINE(sead3) = {
+       .fdt = __dtb_sead3_begin,
+       .detect = sead3_detect,
+       .fixup_fdt = sead3_fixup_fdt,
+       .measure_hpt_freq = sead3_measure_hpt_freq,
+};
diff --git a/arch/mips/generic/init.c b/arch/mips/generic/init.c
new file mode 100644 (file)
index 0000000..d493ccb
--- /dev/null
@@ -0,0 +1,180 @@
+/*
+ * Copyright (C) 2016 Imagination Technologies
+ * Author: Paul Burton <paul.burton@imgtec.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/clk.h>
+#include <linux/clk-provider.h>
+#include <linux/clocksource.h>
+#include <linux/init.h>
+#include <linux/irqchip.h>
+#include <linux/of_fdt.h>
+#include <linux/of_platform.h>
+
+#include <asm/fw/fw.h>
+#include <asm/irq_cpu.h>
+#include <asm/machine.h>
+#include <asm/mips-cpc.h>
+#include <asm/prom.h>
+#include <asm/smp-ops.h>
+#include <asm/time.h>
+
+static __initdata const void *fdt;
+static __initdata const struct mips_machine *mach;
+static __initdata const void *mach_match_data;
+
+void __init prom_init(void)
+{
+       plat_get_fdt();
+       BUG_ON(!fdt);
+}
+
+void __init *plat_get_fdt(void)
+{
+       const struct mips_machine *check_mach;
+       const struct of_device_id *match;
+
+       if (fdt)
+               /* Already set up */
+               return (void *)fdt;
+
+       if ((fw_arg0 == -2) && !fdt_check_header((void *)fw_arg1)) {
+               /*
+                * We booted using the UHI boot protocol, so we have been
+                * provided with the appropriate device tree for the board.
+                * Make use of it & search for any machine struct based upon
+                * the root compatible string.
+                */
+               fdt = (void *)fw_arg1;
+
+               for_each_mips_machine(check_mach) {
+                       match = mips_machine_is_compatible(check_mach, fdt);
+                       if (match) {
+                               mach = check_mach;
+                               mach_match_data = match->data;
+                               break;
+                       }
+               }
+       } else if (IS_ENABLED(CONFIG_LEGACY_BOARDS)) {
+               /*
+                * We weren't booted using the UHI boot protocol, but do
+                * support some number of boards with legacy boot protocols.
+                * Attempt to find the right one.
+                */
+               for_each_mips_machine(check_mach) {
+                       if (!check_mach->detect)
+                               continue;
+
+                       if (!check_mach->detect())
+                               continue;
+
+                       mach = check_mach;
+               }
+
+               /*
+                * If we don't recognise the machine then we can't continue, so
+                * die here.
+                */
+               BUG_ON(!mach);
+
+               /* Retrieve the machine's FDT */
+               fdt = mach->fdt;
+       }
+       return (void *)fdt;
+}
+
+void __init plat_mem_setup(void)
+{
+       if (mach && mach->fixup_fdt)
+               fdt = mach->fixup_fdt(fdt, mach_match_data);
+
+       strlcpy(arcs_cmdline, boot_command_line, COMMAND_LINE_SIZE);
+       __dt_setup_arch((void *)fdt);
+}
+
+void __init device_tree_init(void)
+{
+       int err;
+
+       unflatten_and_copy_device_tree();
+       mips_cpc_probe();
+
+       err = register_cps_smp_ops();
+       if (err)
+               err = register_up_smp_ops();
+}
+
+void __init plat_time_init(void)
+{
+       struct device_node *np;
+       struct clk *clk;
+
+       of_clk_init(NULL);
+
+       if (!cpu_has_counter) {
+               mips_hpt_frequency = 0;
+       } else if (mach && mach->measure_hpt_freq) {
+               mips_hpt_frequency = mach->measure_hpt_freq();
+       } else {
+               np = of_get_cpu_node(0, NULL);
+               if (!np) {
+                       pr_err("Failed to get CPU node\n");
+                       return;
+               }
+
+               clk = of_clk_get(np, 0);
+               if (IS_ERR(clk)) {
+                       pr_err("Failed to get CPU clock: %ld\n", PTR_ERR(clk));
+                       return;
+               }
+
+               mips_hpt_frequency = clk_get_rate(clk);
+               clk_put(clk);
+
+               switch (boot_cpu_type()) {
+               case CPU_20KC:
+               case CPU_25KF:
+                       /* The counter runs at the CPU clock rate */
+                       break;
+               default:
+                       /* The counter runs at half the CPU clock rate */
+                       mips_hpt_frequency /= 2;
+                       break;
+               }
+       }
+
+       clocksource_probe();
+}
+
+void __init arch_init_irq(void)
+{
+       struct device_node *intc_node;
+
+       intc_node = of_find_compatible_node(NULL, NULL,
+                                           "mti,cpu-interrupt-controller");
+       if (!cpu_has_veic && !intc_node)
+               mips_cpu_irq_init();
+
+       irqchip_init();
+}
+
+static int __init publish_devices(void)
+{
+       if (!of_have_populated_dt())
+               panic("Device-tree not present");
+
+       if (of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL))
+               panic("Failed to populate DT");
+
+       return 0;
+}
+arch_initcall(publish_devices);
+
+void __init prom_free_prom_memory(void)
+{
+}
diff --git a/arch/mips/generic/irq.c b/arch/mips/generic/irq.c
new file mode 100644 (file)
index 0000000..14064bd
--- /dev/null
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2016 Imagination Technologies
+ * Author: Paul Burton <paul.burton@imgtec.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/clk.h>
+#include <linux/clk-provider.h>
+#include <linux/clocksource.h>
+#include <linux/init.h>
+#include <linux/irqchip/mips-gic.h>
+#include <linux/types.h>
+
+#include <asm/irq.h>
+
+int get_c0_fdc_int(void)
+{
+       int mips_cpu_fdc_irq;
+
+       if (cpu_has_veic)
+               panic("Unimplemented!");
+       else if (gic_present)
+               mips_cpu_fdc_irq = gic_get_c0_fdc_int();
+       else if (cp0_fdc_irq >= 0)
+               mips_cpu_fdc_irq = MIPS_CPU_IRQ_BASE + cp0_fdc_irq;
+       else
+               mips_cpu_fdc_irq = -1;
+
+       return mips_cpu_fdc_irq;
+}
+
+int get_c0_perfcount_int(void)
+{
+       int mips_cpu_perf_irq;
+
+       if (cpu_has_veic)
+               panic("Unimplemented!");
+       else if (gic_present)
+               mips_cpu_perf_irq = gic_get_c0_perfcount_int();
+       else if (cp0_perfcount_irq >= 0)
+               mips_cpu_perf_irq = MIPS_CPU_IRQ_BASE + cp0_perfcount_irq;
+       else
+               mips_cpu_perf_irq = -1;
+
+       return mips_cpu_perf_irq;
+}
+
+unsigned int get_c0_compare_int(void)
+{
+       int mips_cpu_timer_irq;
+
+       if (cpu_has_veic)
+               panic("Unimplemented!");
+       else if (gic_present)
+               mips_cpu_timer_irq = gic_get_c0_compare_int();
+       else
+               mips_cpu_timer_irq = MIPS_CPU_IRQ_BASE + cp0_compare_irq;
+
+       return mips_cpu_timer_irq;
+}
diff --git a/arch/mips/generic/proc.c b/arch/mips/generic/proc.c
new file mode 100644 (file)
index 0000000..42b3325
--- /dev/null
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2016 Imagination Technologies
+ * Author: Paul Burton <paul.burton@imgtec.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/of.h>
+
+#include <asm/bootinfo.h>
+
+const char *get_system_type(void)
+{
+       const char *str;
+       int err;
+
+       err = of_property_read_string(of_root, "model", &str);
+       if (!err)
+               return str;
+
+       err = of_property_read_string_index(of_root, "compatible", 0, &str);
+       if (!err)
+               return str;
+
+       return "Unknown";
+}
diff --git a/arch/mips/generic/vmlinux.its.S b/arch/mips/generic/vmlinux.its.S
new file mode 100644 (file)
index 0000000..f67fbf1
--- /dev/null
@@ -0,0 +1,31 @@
+/dts-v1/;
+
+/ {
+       description = KERNEL_NAME;
+       #address-cells = <ADDR_CELLS>;
+
+       images {
+               kernel@0 {
+                       description = KERNEL_NAME;
+                       data = /incbin/(VMLINUX_BINARY);
+                       type = "kernel";
+                       arch = "mips";
+                       os = "linux";
+                       compression = VMLINUX_COMPRESSION;
+                       load = /bits/ ADDR_BITS <VMLINUX_LOAD_ADDRESS>;
+                       entry = /bits/ ADDR_BITS <VMLINUX_ENTRY_ADDRESS>;
+                       hash@0 {
+                               algo = "sha1";
+                       };
+               };
+       };
+
+       configurations {
+               default = "conf@default";
+
+               conf@default {
+                       description = "Generic Linux kernel";
+                       kernel = "kernel@0";
+               };
+       };
+};
index c5b04e752e9762f4920d3b305356d9778216ee65..4856adc8906ef3a7c337fe1a14c7e08af426e29e 100644 (file)
 #define PHYS_TO_XKSEG_UNCACHED(p)      PHYS_TO_XKPHYS(K_CALG_UNCACHED, (p))
 #define PHYS_TO_XKSEG_CACHED(p)                PHYS_TO_XKPHYS(K_CALG_COH_SHAREABLE, (p))
 #define XKPHYS_TO_PHYS(p)              ((p) & TO_PHYS_MASK)
-#define PHYS_TO_XKPHYS(cm, a)          (_CONST64_(0x8000000000000000) | \
-                                        (_CONST64_(cm) << 59) | (a))
+#define PHYS_TO_XKPHYS(cm, a)          (XKPHYS | (_ACAST64_(cm) << 59) | (a))
 
 /*
  * The ultimate limited of the 64-bit MIPS architecture:  2 bits for selecting
index d296633d890e56c892059871499e989c85a93203..a5eb1bb199a7fdf76087bcea1d63b962e2555606 100644 (file)
 
 #include <asm/addrspace.h>
 
+/*
+ * Sync types defined by the MIPS architecture (document MD00087 table 6.5)
+ * These values are used with the sync instruction to perform memory barriers.
+ * Types of ordering guarantees available through the SYNC instruction:
+ * - Completion Barriers
+ * - Ordering Barriers
+ * As compared to the completion barrier, the ordering barrier is a
+ * lighter-weight operation as it does not require the specified instructions
+ * before the SYNC to be already completed. Instead it only requires that those
+ * specified instructions which are subsequent to the SYNC in the instruction
+ * stream are never re-ordered for processing ahead of the specified
+ * instructions which are before the SYNC in the instruction stream.
+ * This potentially reduces how many cycles the barrier instruction must stall
+ * before it completes.
+ * Implementations that do not use any of the non-zero values of stype to define
+ * different barriers, such as ordering barriers, must make those stype values
+ * act the same as stype zero.
+ */
+
+/*
+ * Completion barriers:
+ * - Every synchronizable specified memory instruction (loads or stores or both)
+ *   that occurs in the instruction stream before the SYNC instruction must be
+ *   already globally performed before any synchronizable specified memory
+ *   instructions that occur after the SYNC are allowed to be performed, with
+ *   respect to any other processor or coherent I/O module.
+ *
+ * - The barrier does not guarantee the order in which instruction fetches are
+ *   performed.
+ *
+ * - A stype value of zero will always be defined such that it performs the most
+ *   complete set of synchronization operations that are defined.This means
+ *   stype zero always does a completion barrier that affects both loads and
+ *   stores preceding the SYNC instruction and both loads and stores that are
+ *   subsequent to the SYNC instruction. Non-zero values of stype may be defined
+ *   by the architecture or specific implementations to perform synchronization
+ *   behaviors that are less complete than that of stype zero. If an
+ *   implementation does not use one of these non-zero values to define a
+ *   different synchronization behavior, then that non-zero value of stype must
+ *   act the same as stype zero completion barrier. This allows software written
+ *   for an implementation with a lighter-weight barrier to work on another
+ *   implementation which only implements the stype zero completion barrier.
+ *
+ * - A completion barrier is required, potentially in conjunction with SSNOP (in
+ *   Release 1 of the Architecture) or EHB (in Release 2 of the Architecture),
+ *   to guarantee that memory reference results are visible across operating
+ *   mode changes. For example, a completion barrier is required on some
+ *   implementations on entry to and exit from Debug Mode to guarantee that
+ *   memory effects are handled correctly.
+ */
+
+/*
+ * stype 0 - A completion barrier that affects preceding loads and stores and
+ * subsequent loads and stores.
+ * Older instructions which must reach the load/store ordering point before the
+ * SYNC instruction completes: Loads, Stores
+ * Younger instructions which must reach the load/store ordering point only
+ * after the SYNC instruction completes: Loads, Stores
+ * Older instructions which must be globally performed when the SYNC instruction
+ * completes: Loads, Stores
+ */
+#define STYPE_SYNC 0x0
+
+/*
+ * Ordering barriers:
+ * - Every synchronizable specified memory instruction (loads or stores or both)
+ *   that occurs in the instruction stream before the SYNC instruction must
+ *   reach a stage in the load/store datapath after which no instruction
+ *   re-ordering is possible before any synchronizable specified memory
+ *   instruction which occurs after the SYNC instruction in the instruction
+ *   stream reaches the same stage in the load/store datapath.
+ *
+ * - If any memory instruction before the SYNC instruction in program order,
+ *   generates a memory request to the external memory and any memory
+ *   instruction after the SYNC instruction in program order also generates a
+ *   memory request to external memory, the memory request belonging to the
+ *   older instruction must be globally performed before the time the memory
+ *   request belonging to the younger instruction is globally performed.
+ *
+ * - The barrier does not guarantee the order in which instruction fetches are
+ *   performed.
+ */
+
+/*
+ * stype 0x10 - An ordering barrier that affects preceding loads and stores and
+ * subsequent loads and stores.
+ * Older instructions which must reach the load/store ordering point before the
+ * SYNC instruction completes: Loads, Stores
+ * Younger instructions which must reach the load/store ordering point only
+ * after the SYNC instruction completes: Loads, Stores
+ * Older instructions which must be globally performed when the SYNC instruction
+ * completes: N/A
+ */
+#define STYPE_SYNC_MB 0x10
+
+
 #ifdef CONFIG_CPU_HAS_SYNC
 #define __sync()                               \
        __asm__ __volatile__(                   \
index 34ed22ec6c33e7917386b31007f6f14e597a1e10..4812d1fed0c2ccf7390830c719daad0546a2b0b3 100644 (file)
@@ -28,6 +28,7 @@
  *  - flush_cache_sigtramp() flush signal trampoline
  *  - flush_icache_all() flush the entire instruction cache
  *  - flush_data_cache_page() flushes a page from the data cache
+ *  - __flush_icache_user_range(start, end) flushes range of user instructions
  */
 
  /*
@@ -80,6 +81,10 @@ static inline void flush_icache_page(struct vm_area_struct *vma,
 
 extern void (*flush_icache_range)(unsigned long start, unsigned long end);
 extern void (*local_flush_icache_range)(unsigned long start, unsigned long end);
+extern void (*__flush_icache_user_range)(unsigned long start,
+                                        unsigned long end);
+extern void (*__local_flush_icache_user_range)(unsigned long start,
+                                              unsigned long end);
 
 extern void (*__flush_cache_vmap)(void);
 
index fbe1881f28fca71d6a4f5e6306357e3c14818e98..bdd6dc18e65c618dc65bd8a487895386c8085eea 100644 (file)
@@ -24,7 +24,8 @@ static inline int __pure __get_cpu_type(const int cpu_type)
        case CPU_LOONGSON3:
 #endif
 
-#ifdef CONFIG_SYS_HAS_CPU_LOONGSON1B
+#if defined(CONFIG_SYS_HAS_CPU_LOONGSON1B) || \
+    defined(CONFIG_SYS_HAS_CPU_LOONGSON1C)
        case CPU_LOONGSON1:
 #endif
 
index f672df8b26d0126c7dd9ffd783fad8102fd1d24c..9a8372484edc0f3dd48daf5e06a532473ff911db 100644 (file)
 #define PRID_REV_VR4130                0x0080
 #define PRID_REV_34K_V1_0_2    0x0022
 #define PRID_REV_LOONGSON1B    0x0020
+#define PRID_REV_LOONGSON1C    0x0020  /* Same as Loongson-1B */
 #define PRID_REV_LOONGSON2E    0x0002
 #define PRID_REV_LOONGSON2F    0x0003
 #define PRID_REV_LOONGSON3A_R1 0x0005
index c94fafba9e62fe238c5554e6910fb08820e83ed3..21c2082a0dfbb3b1dec84125f9216d54accd4edf 100644 (file)
@@ -11,6 +11,11 @@ struct dma_map_ops;
 struct dev_archdata {
        /* DMA operations on that device */
        struct dma_map_ops *dma_ops;
+
+#ifdef CONFIG_DMA_PERDEV_COHERENT
+       /* Non-zero if DMA is coherent with CPU caches */
+       bool dma_coherent;
+#endif
 };
 
 struct pdev_archdata {
index bc5e85d579e607a61420a7c0cd7a1c6b716d8f4c..72d0eab02afcbbfc33bd6ee653389a30887c1f3a 100644 (file)
@@ -9,14 +9,22 @@
 #ifndef __ASM_DMA_COHERENCE_H
 #define __ASM_DMA_COHERENCE_H
 
-#ifdef CONFIG_DMA_MAYBE_COHERENT
-extern int coherentio;
+enum coherent_io_user_state {
+       IO_COHERENCE_DEFAULT,
+       IO_COHERENCE_ENABLED,
+       IO_COHERENCE_DISABLED,
+};
+
+#if defined(CONFIG_DMA_PERDEV_COHERENT)
+/* Don't provide (hw_)coherentio to avoid misuse */
+#elif defined(CONFIG_DMA_MAYBE_COHERENT)
+extern enum coherent_io_user_state coherentio;
 extern int hw_coherentio;
 #else
 #ifdef CONFIG_DMA_COHERENT
-#define coherentio     1
+#define coherentio     IO_COHERENCE_ENABLED
 #else
-#define coherentio     0
+#define coherentio     IO_COHERENCE_DISABLED
 #endif
 #define hw_coherentio  0
 #endif /* CONFIG_DMA_MAYBE_COHERENT */
index 12fa79e2f1b4fc7fe7c66f1f93d344bb7f1d67fb..7aa71b9b0258f1fc349fbc2cc78b1b928ac730f8 100644 (file)
@@ -32,4 +32,14 @@ static inline void dma_mark_clean(void *addr, size_t size) {}
 extern void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
               enum dma_data_direction direction);
 
+#define arch_setup_dma_ops arch_setup_dma_ops
+static inline void arch_setup_dma_ops(struct device *dev, u64 dma_base,
+                                     u64 size, const struct iommu_ops *iommu,
+                                     bool coherent)
+{
+#ifdef CONFIG_DMA_PERDEV_COHERENT
+       dev->archdata.dma_coherent = coherent;
+#endif
+}
+
 #endif /* _ASM_DMA_MAPPING_H */
index 355dc25172e7a91b7ccf3c959a4a8fbd05dbf390..c05369e0b8d60352dc1833880b026e15ac09ba17 100644 (file)
@@ -63,6 +63,8 @@ do {                                                                  \
 extern int fpu_emulator_cop1Handler(struct pt_regs *xcp,
                                    struct mips_fpu_struct *ctx, int has_fpu,
                                    void *__user *fault_addr);
+void force_fcr31_sig(unsigned long fcr31, void __user *fault_addr,
+                    struct task_struct *tsk);
 int process_fpemu_return(int sig, void __user *fault_addr,
                         unsigned long fcr31);
 int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
@@ -81,4 +83,15 @@ static inline void fpu_emulator_init_fpu(void)
                set_fpr64(&t->thread.fpu.fpr[i], 0, SIGNALLING_NAN);
 }
 
+/*
+ * Mask the FCSR Cause bits according to the Enable bits, observing
+ * that Unimplemented is always enabled.
+ */
+static inline unsigned long mask_fcr31_x(unsigned long fcr31)
+{
+       return fcr31 & (FPU_CSR_UNI_X |
+                       ((fcr31 & FPU_CSR_ALL_E) <<
+                        (ffs(FPU_CSR_ALL_X) - ffs(FPU_CSR_ALL_E))));
+}
+
 #endif /* _ASM_FPU_EMULATOR_H */
index a7fbcd6ed13c4fb41d19609102ea5813feba17c2..32229c77906a1efb8b151bd3f491a452ef2c24d7 100644 (file)
 
 extern raw_spinlock_t i8259A_lock;
 
-extern int i8259A_irq_pending(unsigned int irq);
 extern void make_8259A_irq(unsigned int irq);
 
 extern void init_i8259_irqs(void);
 extern int i8259_of_init(struct device_node *node, struct device_node *parent);
 
+/**
+ * i8159_set_poll() - Override the i8259 polling function
+ * @poll: pointer to platform-specific polling function
+ *
+ * Call this to override the generic i8259 polling function, which directly
+ * accesses i8259 registers, with a platform specific one which may be faster
+ * in cases where hardware provides a more optimal means of polling for an
+ * interrupt.
+ */
+extern void i8259_set_poll(int (*poll)(void));
+
 /*
  * Do the traditional i8259 interrupt polling thing.  This is for the few
  * cases where no better interrupt acknowledge method is available and we
index 07f58cfc1ab98b2724d01630130527c0546b1126..bebec370324f4401e298bf1bbaa292666972e31c 100644 (file)
@@ -293,7 +293,10 @@ struct kvm_vcpu_arch {
        /* Host KSEG0 address of the EI/DI offset */
        void *kseg0_commpage;
 
-       u32 io_gpr;             /* GPR used as IO source/target */
+       /* Resume PC after MMIO completion */
+       unsigned long io_pc;
+       /* GPR used as IO source/target */
+       u32 io_gpr;
 
        struct hrtimer comparecount_timer;
        /* Count timer control KVM register */
@@ -315,8 +318,6 @@ struct kvm_vcpu_arch {
        /* Bitmask of pending exceptions to be cleared */
        unsigned long pending_exceptions_clr;
 
-       u32 pending_load_cause;
-
        /* Save/Restore the entryhi register when are are preempted/scheduled back in */
        unsigned long preempt_entryhi;
 
index 0f8a354fd4686dc49cbd6718ef565e9b73786386..61addb1677e950c75936cce24905f850e23e7a04 100644 (file)
@@ -49,7 +49,19 @@ static inline int plat_dma_supported(struct device *dev, u64 mask)
 
 static inline int plat_device_is_coherent(struct device *dev)
 {
-       return coherentio;
+#ifdef CONFIG_DMA_PERDEV_COHERENT
+       return dev->archdata.dma_coherent;
+#else
+       switch (coherentio) {
+       default:
+       case IO_COHERENCE_DEFAULT:
+               return hw_coherentio;
+       case IO_COHERENCE_ENABLED:
+               return 1;
+       case IO_COHERENCE_DISABLED:
+               return 0;
+       }
+#endif
 }
 
 #ifndef plat_post_dma_flush
index e2561d99a3feaf12d9d54d948cf2403fc18a8d5c..9ec2f6a5200b6f7ce5bff8d2708b1e7849972d3a 100644 (file)
@@ -115,11 +115,7 @@ static inline unsigned long fd_getfdaddr1(void)
 
 static inline unsigned long fd_dma_mem_alloc(unsigned long size)
 {
-       unsigned long mem;
-
-       mem = __get_dma_pages(GFP_KERNEL, get_order(size));
-
-       return mem;
+       return __get_dma_pages(GFP_KERNEL, get_order(size));
 }
 
 static inline void fd_dma_mem_free(unsigned long addr, unsigned long size)
index afc96ecb90042358685be12d3b2e0f8ab21964ce..952b0fdfda0e637849315f534043137f10972414 100644 (file)
@@ -12,6 +12,8 @@
 
 #include <linux/const.h>
 
+#include <asm/mipsregs.h>
+
 /*
  * This gives the physical RAM offset.
  */
 #ifdef CONFIG_64BIT
 
 #ifndef CAC_BASE
-#ifdef CONFIG_DMA_NONCOHERENT
-#define CAC_BASE               _AC(0x9800000000000000, UL)
-#else
-#define CAC_BASE               _AC(0xa800000000000000, UL)
-#endif
+#define CAC_BASE       PHYS_TO_XKPHYS(read_c0_config() & CONF_CM_CMASK, 0)
 #endif
 
 #ifndef IO_BASE
index b18802a0b17e94dca9a85cc6a8a3c761dfb9f60e..4775a1136a5b45eea2359b74bfe530320e95d9df 100644 (file)
@@ -19,6 +19,7 @@
 #define IO_BASE                        0x9200000000000000
 #define MSPEC_BASE             0x9400000000000000
 #define UNCAC_BASE             0x9600000000000000
+#define CAC_BASE               0xa800000000000000
 
 #define TO_MSPEC(x)            (MSPEC_BASE | ((x) & TO_PHYS_MASK))
 #define TO_HSPEC(x)            (HSPEC_BASE | ((x) & TO_PHYS_MASK))
index c1c744197de498e4c38be02b18f4732f789ae4ea..8c01b304b7ec89464088ed55b36df1dca4f1ed71 100644 (file)
 #define LS1X_IRQ(n, x)                 (LS1X_IRQ_BASE + (n << 5) + (x))
 
 #define LS1X_UART0_IRQ                 LS1X_IRQ(0, 2)
+#if defined(CONFIG_LOONGSON1_LS1B)
 #define LS1X_UART1_IRQ                 LS1X_IRQ(0, 3)
 #define LS1X_UART2_IRQ                 LS1X_IRQ(0, 4)
 #define LS1X_UART3_IRQ                 LS1X_IRQ(0, 5)
+#elif defined(CONFIG_LOONGSON1_LS1C)
+#define LS1X_UART1_IRQ                 LS1X_IRQ(0, 4)
+#define LS1X_UART2_IRQ                 LS1X_IRQ(0, 5)
+#endif
 #define LS1X_CAN0_IRQ                  LS1X_IRQ(0, 6)
 #define LS1X_CAN1_IRQ                  LS1X_IRQ(0, 7)
 #define LS1X_SPI0_IRQ                  LS1X_IRQ(0, 8)
@@ -47,6 +52,9 @@
 #define LS1X_DMA0_IRQ                  LS1X_IRQ(0, 13)
 #define LS1X_DMA1_IRQ                  LS1X_IRQ(0, 14)
 #define LS1X_DMA2_IRQ                  LS1X_IRQ(0, 15)
+#if defined(CONFIG_LOONGSON1_LS1C)
+#define LS1X_NAND_IRQ                  LS1X_IRQ(0, 16)
+#endif
 #define LS1X_PWM0_IRQ                  LS1X_IRQ(0, 17)
 #define LS1X_PWM1_IRQ                  LS1X_IRQ(0, 18)
 #define LS1X_PWM2_IRQ                  LS1X_IRQ(0, 19)
 #define LS1X_RTC_INT0_IRQ              LS1X_IRQ(0, 21)
 #define LS1X_RTC_INT1_IRQ              LS1X_IRQ(0, 22)
 #define LS1X_RTC_INT2_IRQ              LS1X_IRQ(0, 23)
+#if defined(CONFIG_LOONGSON1_LS1B)
 #define LS1X_TOY_INT0_IRQ              LS1X_IRQ(0, 24)
 #define LS1X_TOY_INT1_IRQ              LS1X_IRQ(0, 25)
 #define LS1X_TOY_INT2_IRQ              LS1X_IRQ(0, 26)
 #define LS1X_RTC_TICK_IRQ              LS1X_IRQ(0, 27)
 #define LS1X_TOY_TICK_IRQ              LS1X_IRQ(0, 28)
+#define LS1X_UART4_IRQ                 LS1X_IRQ(0, 29)
+#define LS1X_UART5_IRQ                 LS1X_IRQ(0, 30)
+#elif defined(CONFIG_LOONGSON1_LS1C)
+#define LS1X_UART3_IRQ                 LS1X_IRQ(0, 29)
+#define LS1X_ADC_IRQ                   LS1X_IRQ(0, 30)
+#define LS1X_SDIO_IRQ                  LS1X_IRQ(0, 31)
+#endif
 
 #define LS1X_EHCI_IRQ                  LS1X_IRQ(1, 0)
 #define LS1X_OHCI_IRQ                  LS1X_IRQ(1, 1)
+#if defined(CONFIG_LOONGSON1_LS1B)
 #define LS1X_GMAC0_IRQ                 LS1X_IRQ(1, 2)
 #define LS1X_GMAC1_IRQ                 LS1X_IRQ(1, 3)
+#elif defined(CONFIG_LOONGSON1_LS1C)
+#define LS1X_OTG_IRQ                   LS1X_IRQ(1, 2)
+#define LS1X_GMAC0_IRQ                 LS1X_IRQ(1, 3)
+#define LS1X_CAM_IRQ                   LS1X_IRQ(1, 4)
+#define LS1X_UART4_IRQ                 LS1X_IRQ(1, 5)
+#define LS1X_UART5_IRQ                 LS1X_IRQ(1, 6)
+#define LS1X_UART6_IRQ                 LS1X_IRQ(1, 7)
+#define LS1X_UART7_IRQ                 LS1X_IRQ(1, 8)
+#define LS1X_UART8_IRQ                 LS1X_IRQ(1, 9)
+#define LS1X_UART9_IRQ                 LS1X_IRQ(1, 13)
+#define LS1X_UART10_IRQ                        LS1X_IRQ(1, 14)
+#define LS1X_UART11_IRQ                        LS1X_IRQ(1, 15)
+#define LS1X_I2C0_IRQ                  LS1X_IRQ(1, 17)
+#define LS1X_I2C1_IRQ                  LS1X_IRQ(1, 18)
+#define LS1X_I2C2_IRQ                  LS1X_IRQ(1, 19)
+#endif
 
-#define LS1X_IRQS              (LS1X_IRQ(4, 31) + 1 - LS1X_IRQ_BASE)
+#if defined(CONFIG_LOONGSON1_LS1B)
+#define INTN   4
+#elif defined(CONFIG_LOONGSON1_LS1C)
+#define INTN   5
+#endif
+
+#define LS1X_IRQS              (LS1X_IRQ(INTN, 31) + 1 - LS1X_IRQ_BASE)
 
 #define NR_IRQS                        (MIPS_CPU_IRQS + LS1X_IRQS)
 
index 978f6df8970a38946ddd5afcdb21d0ba5f3f9312..3584c40caf796d1995d3b389b3a4aab2fd612a18 100644 (file)
 #ifndef __ASM_MACH_LOONGSON32_LOONGSON1_H
 #define __ASM_MACH_LOONGSON32_LOONGSON1_H
 
+#if defined(CONFIG_LOONGSON1_LS1B)
 #define DEFAULT_MEMSIZE                        256     /* If no memsize provided */
+#elif defined(CONFIG_LOONGSON1_LS1C)
+#define DEFAULT_MEMSIZE                        32
+#endif
 
 /* Loongson 1 Register Bases */
 #define LS1X_MUX_BASE                  0x1fd00420
@@ -20,6 +24,7 @@
 #define LS1X_GPIO0_BASE                        0x1fd010c0
 #define LS1X_GPIO1_BASE                        0x1fd010c4
 #define LS1X_DMAC_BASE                 0x1fd01160
+#define LS1X_CBUS_BASE                 0x1fd011c0
 #define LS1X_EHCI_BASE                 0x1fe00000
 #define LS1X_OHCI_BASE                 0x1fe08000
 #define LS1X_GMAC0_BASE                        0x1fe10000
index 672531aa9bef0a20488f578682e27ae81d113d61..7adc313649395265c168178528d928d16b7483ac 100644 (file)
@@ -30,5 +30,6 @@ void __init ls1x_clk_init(void);
 void __init ls1x_dma_set_platdata(struct plat_ls1x_dma *pdata);
 void __init ls1x_nand_set_platdata(struct plat_ls1x_nand *pdata);
 void __init ls1x_serial_set_uartclk(struct platform_device *pdev);
+void __init ls1x_rtc_set_extclk(struct platform_device *pdev);
 
 #endif /* __ASM_MACH_LOONGSON32_PLATFORM_H */
index 4d56fc38f0c47be5604a6688754528d43c803240..e5e8f118f34b209f2d7d9e389f710e24166cbdac 100644 (file)
@@ -18,6 +18,7 @@
 #define LS1X_CLK_PLL_FREQ              LS1X_CLK_REG(0x0)
 #define LS1X_CLK_PLL_DIV               LS1X_CLK_REG(0x4)
 
+#if defined(CONFIG_LOONGSON1_LS1B)
 /* Clock PLL Divisor Register Bits */
 #define DIV_DC_EN                      BIT(31)
 #define DIV_DC_RST                     BIT(30)
 #define BYPASS_DDR_WIDTH               1
 #define BYPASS_CPU_WIDTH               1
 
+#elif defined(CONFIG_LOONGSON1_LS1C)
+/* PLL/SDRAM Frequency configuration register Bits */
+#define PLL_VALID                      BIT(31)
+#define FRAC_N                         GENMASK(23, 16)
+#define RST_TIME                       GENMASK(3, 2)
+#define SDRAM_DIV                      GENMASK(1, 0)
+
+/* CPU/CAMERA/DC Frequency configuration register Bits */
+#define DIV_DC_EN                      BIT(31)
+#define DIV_DC                         GENMASK(30, 24)
+#define DIV_CAM_EN                     BIT(23)
+#define DIV_CAM                                GENMASK(22, 16)
+#define DIV_CPU_EN                     BIT(15)
+#define DIV_CPU                                GENMASK(14, 8)
+#define DIV_DC_SEL_EN                  BIT(5)
+#define DIV_DC_SEL                     BIT(4)
+#define DIV_CAM_SEL_EN                 BIT(3)
+#define DIV_CAM_SEL                    BIT(2)
+#define DIV_CPU_SEL_EN                 BIT(1)
+#define DIV_CPU_SEL                    BIT(0)
+
+#define DIV_DC_SHIFT                   24
+#define DIV_CAM_SHIFT                  16
+#define DIV_CPU_SHIFT                  8
+#define DIV_DDR_SHIFT                  0
+
+#define DIV_DC_WIDTH                   7
+#define DIV_CAM_WIDTH                  7
+#define DIV_CPU_WIDTH                  7
+#define DIV_DDR_WIDTH                  2
+
+#endif
+
 #endif /* __ASM_MACH_LOONGSON32_REGS_CLK_H */
index 7c394f93cb9e381adb5b1a813cc1745383e80532..4a0bdeb0eb9b94ae810dd76593a1988224d6cf23 100644 (file)
@@ -18,6 +18,7 @@
 #define LS1X_MUX_CTRL0                 LS1X_MUX_REG(0x0)
 #define LS1X_MUX_CTRL1                 LS1X_MUX_REG(0x4)
 
+#if defined(CONFIG_LOONGSON1_LS1B)
 /* MUX CTRL0 Register Bits */
 #define UART0_USE_PWM23                        BIT(28)
 #define UART0_USE_PWM01                        BIT(27)
 #define GMAC1_USE_PWM23                        BIT(1)
 #define GMAC0_USE_PWM01                        BIT(0)
 
+#elif defined(CONFIG_LOONGSON1_LS1C)
+
+/* SHUT_CTRL Register Bits */
+#define UART_SPLIT                     GENMASK(31, 30)
+#define OUTPUT_CLK                     GENMASK(29, 26)
+#define ADC_SHUT                       BIT(25)
+#define SDIO_SHUT                      BIT(24)
+#define DMA2_SHUT                      BIT(23)
+#define DMA1_SHUT                      BIT(22)
+#define DMA0_SHUT                      BIT(21)
+#define SPI1_SHUT                      BIT(20)
+#define SPI0_SHUT                      BIT(19)
+#define I2C2_SHUT                      BIT(18)
+#define I2C1_SHUT                      BIT(17)
+#define I2C0_SHUT                      BIT(16)
+#define AC97_SHUT                      BIT(15)
+#define I2S_SHUT                       BIT(14)
+#define UART3_SHUT                     BIT(13)
+#define UART2_SHUT                     BIT(12)
+#define UART1_SHUT                     BIT(11)
+#define UART0_SHUT                     BIT(10)
+#define CAN1_SHUT                      BIT(9)
+#define CAN0_SHUT                      BIT(8)
+#define ECC_SHUT                       BIT(7)
+#define GMAC_SHUT                      BIT(6)
+#define USBHOST_SHUT                   BIT(5)
+#define USBOTG_SHUT                    BIT(4)
+#define SDRAM_SHUT                     BIT(3)
+#define SRAM_SHUT                      BIT(2)
+#define CAM_SHUT                       BIT(1)
+#define LCD_SHUT                       BIT(0)
+
+#define UART_SPLIT_SHIFT                        30
+#define OUTPUT_CLK_SHIFT                        26
+
+/* MISC_CTRL Register Bits */
+#define USBHOST_RSTN                   BIT(31)
+#define PHY_INTF_SELI                  GENMASK(30, 28)
+#define AC97_EN                                BIT(25)
+#define SDIO_DMA_EN                    GENMASK(24, 23)
+#define ADC_DMA_EN                     BIT(22)
+#define SDIO_USE_SPI1                  BIT(17)
+#define SDIO_USE_SPI0                  BIT(16)
+#define SRAM_CTRL                      GENMASK(15, 0)
+
+#define PHY_INTF_SELI_SHIFT                     28
+#define SDIO_DMA_EN_SHIFT                       23
+#define SRAM_CTRL_SHIFT                                0
+
+#define LS1X_CBUS_REG(n, x) \
+               ((void __iomem *)KSEG1ADDR(LS1X_CBUS_BASE + (n * 0x04) + (x)))
+
+#define LS1X_CBUS_FIRST(n)             LS1X_CBUS_REG(n, 0x00)
+#define LS1X_CBUS_SECOND(n)            LS1X_CBUS_REG(n, 0x10)
+#define LS1X_CBUS_THIRD(n)             LS1X_CBUS_REG(n, 0x20)
+#define LS1X_CBUS_FOURTHT(n)           LS1X_CBUS_REG(n, 0x30)
+#define LS1X_CBUS_FIFTHT(n)            LS1X_CBUS_REG(n, 0x40)
+
+#endif
+
 #endif /* __ASM_MACH_LOONGSON32_REGS_MUX_H */
diff --git a/arch/mips/include/asm/mach-sead3/cpu-feature-overrides.h b/arch/mips/include/asm/mach-sead3/cpu-feature-overrides.h
deleted file mode 100644 (file)
index bfbd703..0000000
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2003, 2004 Chris Dearman
- * Copyright (C) 2005 Ralf Baechle (ralf@linux-mips.org)
- */
-#ifndef __ASM_MACH_MIPS_CPU_FEATURE_OVERRIDES_H
-#define __ASM_MACH_MIPS_CPU_FEATURE_OVERRIDES_H
-
-
-/*
- * CPU feature overrides for MIPS boards
- */
-#ifdef CONFIG_CPU_MIPS32
-#define cpu_has_tlb            1
-#define cpu_has_4kex           1
-#define cpu_has_4k_cache       1
-/* #define cpu_has_fpu         ? */
-/* #define cpu_has_32fpr       ? */
-#define cpu_has_counter                1
-/* #define cpu_has_watch       ? */
-#define cpu_has_divec          1
-#define cpu_has_vce            0
-/* #define cpu_has_cache_cdex_p ? */
-/* #define cpu_has_cache_cdex_s ? */
-/* #define cpu_has_prefetch    ? */
-#define cpu_has_mcheck         1
-/* #define cpu_has_ejtag       ? */
-#ifdef CONFIG_CPU_MICROMIPS
-#define cpu_has_llsc           0
-#else
-#define cpu_has_llsc           1
-#endif
-/* #define cpu_has_vtag_icache ? */
-/* #define cpu_has_dc_aliases  ? */
-/* #define cpu_has_ic_fills_f_dc ? */
-#define cpu_has_nofpuex                0
-/* #define cpu_has_64bits      ? */
-/* #define cpu_has_64bit_zero_reg ? */
-/* #define cpu_has_inclusive_pcaches ? */
-#define cpu_icache_snoops_remote_store 1
-#endif
-
-#ifdef CONFIG_CPU_MIPS64
-#define cpu_has_tlb            1
-#define cpu_has_4kex           1
-#define cpu_has_4k_cache       1
-/* #define cpu_has_fpu         ? */
-/* #define cpu_has_32fpr       ? */
-#define cpu_has_counter                1
-/* #define cpu_has_watch       ? */
-#define cpu_has_divec          1
-#define cpu_has_vce            0
-/* #define cpu_has_cache_cdex_p ? */
-/* #define cpu_has_cache_cdex_s ? */
-/* #define cpu_has_prefetch    ? */
-#define cpu_has_mcheck         1
-/* #define cpu_has_ejtag       ? */
-#define cpu_has_llsc           1
-/* #define cpu_has_vtag_icache ? */
-/* #define cpu_has_dc_aliases  ? */
-/* #define cpu_has_ic_fills_f_dc ? */
-#define cpu_has_nofpuex                0
-/* #define cpu_has_64bits      ? */
-/* #define cpu_has_64bit_zero_reg ? */
-/* #define cpu_has_inclusive_pcaches ? */
-#define cpu_icache_snoops_remote_store 1
-#endif
-
-#endif /* __ASM_MACH_MIPS_CPU_FEATURE_OVERRIDES_H */
diff --git a/arch/mips/include/asm/mach-sead3/irq.h b/arch/mips/include/asm/mach-sead3/irq.h
deleted file mode 100644 (file)
index 5d154cf..0000000
+++ /dev/null
@@ -1,9 +0,0 @@
-#ifndef __ASM_MACH_MIPS_IRQ_H
-#define __ASM_MACH_MIPS_IRQ_H
-
-#define NR_IRQS 256
-
-
-#include_next <irq.h>
-
-#endif /* __ASM_MACH_MIPS_IRQ_H */
diff --git a/arch/mips/include/asm/mach-sead3/kernel-entry-init.h b/arch/mips/include/asm/mach-sead3/kernel-entry-init.h
deleted file mode 100644 (file)
index 6cccd4d..0000000
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Chris Dearman (chris@mips.com)
- * Copyright (C) 2007 Mips Technologies, Inc.
- */
-#ifndef __ASM_MACH_MIPS_KERNEL_ENTRY_INIT_H
-#define __ASM_MACH_MIPS_KERNEL_ENTRY_INIT_H
-
-       .macro  kernel_entry_setup
-       .endm
-
-/*
- * Do SMP slave processor setup necessary before we can safely execute C code.
- */
-       .macro  smp_slave_setup
-       .endm
-
-#endif /* __ASM_MACH_MIPS_KERNEL_ENTRY_INIT_H */
diff --git a/arch/mips/include/asm/mach-sead3/war.h b/arch/mips/include/asm/mach-sead3/war.h
deleted file mode 100644 (file)
index d068fc4..0000000
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2002, 2004, 2007 by Ralf Baechle <ralf@linux-mips.org>
- */
-#ifndef __ASM_MIPS_MACH_MIPS_WAR_H
-#define __ASM_MIPS_MACH_MIPS_WAR_H
-
-#define R4600_V1_INDEX_ICACHEOP_WAR    0
-#define R4600_V1_HIT_CACHEOP_WAR       0
-#define R4600_V2_HIT_CACHEOP_WAR       0
-#define R5432_CP0_INTERRUPT_WAR                0
-#define BCM1250_M3_WAR                 0
-#define SIBYTE_1956_WAR                        0
-#define MIPS4K_ICACHE_REFILL_WAR       1
-#define MIPS_CACHE_SYNC_WAR            1
-#define TX49XX_ICACHE_INDEX_INV_WAR    0
-#define ICACHE_REFILLS_WORKAROUND_WAR  1
-#define R10000_LLSC_WAR                        0
-#define MIPS34K_MISSED_ITLB_WAR                0
-
-#endif /* __ASM_MIPS_MACH_MIPS_WAR_H */
diff --git a/arch/mips/include/asm/machine.h b/arch/mips/include/asm/machine.h
new file mode 100644 (file)
index 0000000..6b444cd
--- /dev/null
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2016 Imagination Technologies
+ * Author: Paul Burton <paul.burton@imgtec.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#ifndef __MIPS_ASM_MACHINE_H__
+#define __MIPS_ASM_MACHINE_H__
+
+#include <linux/libfdt.h>
+#include <linux/of.h>
+
+struct mips_machine {
+       const struct of_device_id *matches;
+       const void *fdt;
+       bool (*detect)(void);
+       const void *(*fixup_fdt)(const void *fdt, const void *match_data);
+       unsigned int (*measure_hpt_freq)(void);
+};
+
+extern long __mips_machines_start;
+extern long __mips_machines_end;
+
+#define MIPS_MACHINE(name)                                             \
+       static const struct mips_machine __mips_mach_##name             \
+               __used __section(.mips.machines.init)
+
+#define for_each_mips_machine(mach)                                    \
+       for ((mach) = (struct mips_machine *)&__mips_machines_start;    \
+            (mach) < (struct mips_machine *)&__mips_machines_end;      \
+            (mach)++)
+
+/**
+ * mips_machine_is_compatible() - check if a machine is compatible with an FDT
+ * @mach: the machine struct to check
+ * @fdt: the FDT to check for compatibility with
+ *
+ * Check whether the given machine @mach is compatible with the given flattened
+ * device tree @fdt, based upon the compatibility property of the root node.
+ *
+ * Return: the device id matched if any, else NULL
+ */
+static inline const struct of_device_id *
+mips_machine_is_compatible(const struct mips_machine *mach, const void *fdt)
+{
+       const struct of_device_id *match;
+
+       if (!mach->matches)
+               return NULL;
+
+       for (match = mach->matches; match->compatible; match++) {
+               if (fdt_node_check_compatible(fdt, 0, match->compatible) == 0)
+                       return match;
+       }
+
+       return NULL;
+}
+
+#endif /* __MIPS_ASM_MACHINE_H__ */
diff --git a/arch/mips/include/asm/mips-boards/sead3int.h b/arch/mips/include/asm/mips-boards/sead3int.h
deleted file mode 100644 (file)
index 8932c7d..0000000
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2000,2012 MIPS Technologies, Inc.  All rights reserved.
- *     Douglas Leung <douglas@mips.com>
- *     Steven J. Hill <sjhill@mips.com>
- */
-#ifndef _MIPS_SEAD3INT_H
-#define _MIPS_SEAD3INT_H
-
-#include <linux/irqchip/mips-gic.h>
-
-/* SEAD-3 GIC address space definitions. */
-#define GIC_BASE_ADDR          0x1b1c0000
-#define GIC_ADDRSPACE_SZ       (128 * 1024)
-
-/* CPU interrupt offsets */
-#define CPU_INT_GIC            2
-#define CPU_INT_EHCI           2
-#define CPU_INT_UART0          4
-#define CPU_INT_UART1          4
-#define CPU_INT_NET            6
-
-/* GIC interrupt offsets */
-#define GIC_INT_NET            GIC_SHARED_TO_HWIRQ(0)
-#define GIC_INT_UART1          GIC_SHARED_TO_HWIRQ(2)
-#define GIC_INT_UART0          GIC_SHARED_TO_HWIRQ(3)
-#define GIC_INT_EHCI           GIC_SHARED_TO_HWIRQ(5)
-
-#endif /* !(_MIPS_SEAD3INT_H) */
index 4fafeefe65c2a076a6d5683a498e33e18076bc11..2e4180797b21828c3bc93a76d214b51f94f1392e 100644 (file)
@@ -359,6 +359,7 @@ BUILD_CM_Cx_R_(tcid_8_priority,     0x80)
 /* GCR_Cx_COHERENCE register fields */
 #define CM_GCR_Cx_COHERENCE_COHDOMAINEN_SHF    0
 #define CM_GCR_Cx_COHERENCE_COHDOMAINEN_MSK    (_ULCAST_(0xff) << 0)
+#define CM3_GCR_Cx_COHERENCE_COHEN_MSK         (_ULCAST_(0x1) << 0)
 
 /* GCR_Cx_CONFIG register fields */
 #define CM_GCR_Cx_CONFIG_IOCUTYPE_SHF          10
index cda93aee712c6d894de9c26ec821c747103c1e5d..b4d19c21b62cfb53814917e78ffebb27d1343507 100644 (file)
@@ -57,16 +57,6 @@ typedef enum {
  */
 #define CVMX_HELPER_BOARD_MGMT_IPD_PORT            -10
 
-/**
- * cvmx_override_board_link_get(int ipd_port) is a function
- * pointer. It is meant to allow customization of the process of
- * talking to a PHY to determine link speed. It is called every
- * time a PHY must be polled for link status. Users should set
- * this pointer to a function before calling any cvmx-helper
- * operations.
- */
-extern cvmx_helper_link_info_t(*cvmx_override_board_link_get) (int ipd_port);
-
 /**
  * Return the MII PHY address associated with the given IPD
  * port. A result of -1 means there isn't a MII capable PHY
@@ -85,26 +75,6 @@ extern cvmx_helper_link_info_t(*cvmx_override_board_link_get) (int ipd_port);
  */
 extern int cvmx_helper_board_get_mii_address(int ipd_port);
 
-/**
- * This function as a board specific method of changing the PHY
- * speed, duplex, and autonegotiation. This programs the PHY and
- * not Octeon. This can be used to force Octeon's links to
- * specific settings.
- *
- * @phy_addr:  The address of the PHY to program
- * @link_flags:
- *                 Flags to control autonegotiation.  Bit 0 is autonegotiation
- *                 enable/disable to maintain backward compatibility.
- * @link_info: Link speed to program. If the speed is zero and autonegotiation
- *                 is enabled, all possible negotiation speeds are advertised.
- *
- * Returns Zero on success, negative on failure
- */
-int cvmx_helper_board_link_set_phy(int phy_addr,
-                                  cvmx_helper_board_set_phy_link_flags_types_t
-                                  link_flags,
-                                  cvmx_helper_link_info_t link_info);
-
 /**
  * This function is the board specific method of determining an
  * ethernet ports link speed. Most Octeon boards have Marvell PHYs
diff --git a/arch/mips/include/asm/octeon/cvmx-mdio.h b/arch/mips/include/asm/octeon/cvmx-mdio.h
deleted file mode 100644 (file)
index 9f6a4f3..0000000
+++ /dev/null
@@ -1,506 +0,0 @@
-/***********************license start***************
- * Author: Cavium Networks
- *
- * Contact: support@caviumnetworks.com
- * This file is part of the OCTEON SDK
- *
- * Copyright (c) 2003-2008 Cavium Networks
- *
- * This file is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, Version 2, as
- * published by the Free Software Foundation.
- *
- * This file is distributed in the hope that it will be useful, but
- * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
- * NONINFRINGEMENT.  See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this file; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- * or visit http://www.gnu.org/licenses/.
- *
- * This file may also be available under a different license from Cavium.
- * Contact Cavium Networks for more information
- ***********************license end**************************************/
-
-/*
- *
- * Interface to the SMI/MDIO hardware, including support for both IEEE 802.3
- * clause 22 and clause 45 operations.
- *
- */
-
-#ifndef __CVMX_MIO_H__
-#define __CVMX_MIO_H__
-
-#include <asm/octeon/cvmx-smix-defs.h>
-
-/**
- * PHY register 0 from the 802.3 spec
- */
-#define CVMX_MDIO_PHY_REG_CONTROL 0
-typedef union {
-       uint16_t u16;
-       struct {
-               uint16_t reset:1;
-               uint16_t loopback:1;
-               uint16_t speed_lsb:1;
-               uint16_t autoneg_enable:1;
-               uint16_t power_down:1;
-               uint16_t isolate:1;
-               uint16_t restart_autoneg:1;
-               uint16_t duplex:1;
-               uint16_t collision_test:1;
-               uint16_t speed_msb:1;
-               uint16_t unidirectional_enable:1;
-               uint16_t reserved_0_4:5;
-       } s;
-} cvmx_mdio_phy_reg_control_t;
-
-/**
- * PHY register 1 from the 802.3 spec
- */
-#define CVMX_MDIO_PHY_REG_STATUS 1
-typedef union {
-       uint16_t u16;
-       struct {
-               uint16_t capable_100base_t4:1;
-               uint16_t capable_100base_x_full:1;
-               uint16_t capable_100base_x_half:1;
-               uint16_t capable_10_full:1;
-               uint16_t capable_10_half:1;
-               uint16_t capable_100base_t2_full:1;
-               uint16_t capable_100base_t2_half:1;
-               uint16_t capable_extended_status:1;
-               uint16_t capable_unidirectional:1;
-               uint16_t capable_mf_preamble_suppression:1;
-               uint16_t autoneg_complete:1;
-               uint16_t remote_fault:1;
-               uint16_t capable_autoneg:1;
-               uint16_t link_status:1;
-               uint16_t jabber_detect:1;
-               uint16_t capable_extended_registers:1;
-
-       } s;
-} cvmx_mdio_phy_reg_status_t;
-
-/**
- * PHY register 2 from the 802.3 spec
- */
-#define CVMX_MDIO_PHY_REG_ID1 2
-typedef union {
-       uint16_t u16;
-       struct {
-               uint16_t oui_bits_3_18;
-       } s;
-} cvmx_mdio_phy_reg_id1_t;
-
-/**
- * PHY register 3 from the 802.3 spec
- */
-#define CVMX_MDIO_PHY_REG_ID2 3
-typedef union {
-       uint16_t u16;
-       struct {
-               uint16_t oui_bits_19_24:6;
-               uint16_t model:6;
-               uint16_t revision:4;
-       } s;
-} cvmx_mdio_phy_reg_id2_t;
-
-/**
- * PHY register 4 from the 802.3 spec
- */
-#define CVMX_MDIO_PHY_REG_AUTONEG_ADVER 4
-typedef union {
-       uint16_t u16;
-       struct {
-               uint16_t next_page:1;
-               uint16_t reserved_14:1;
-               uint16_t remote_fault:1;
-               uint16_t reserved_12:1;
-               uint16_t asymmetric_pause:1;
-               uint16_t pause:1;
-               uint16_t advert_100base_t4:1;
-               uint16_t advert_100base_tx_full:1;
-               uint16_t advert_100base_tx_half:1;
-               uint16_t advert_10base_tx_full:1;
-               uint16_t advert_10base_tx_half:1;
-               uint16_t selector:5;
-       } s;
-} cvmx_mdio_phy_reg_autoneg_adver_t;
-
-/**
- * PHY register 5 from the 802.3 spec
- */
-#define CVMX_MDIO_PHY_REG_LINK_PARTNER_ABILITY 5
-typedef union {
-       uint16_t u16;
-       struct {
-               uint16_t next_page:1;
-               uint16_t ack:1;
-               uint16_t remote_fault:1;
-               uint16_t reserved_12:1;
-               uint16_t asymmetric_pause:1;
-               uint16_t pause:1;
-               uint16_t advert_100base_t4:1;
-               uint16_t advert_100base_tx_full:1;
-               uint16_t advert_100base_tx_half:1;
-               uint16_t advert_10base_tx_full:1;
-               uint16_t advert_10base_tx_half:1;
-               uint16_t selector:5;
-       } s;
-} cvmx_mdio_phy_reg_link_partner_ability_t;
-
-/**
- * PHY register 6 from the 802.3 spec
- */
-#define CVMX_MDIO_PHY_REG_AUTONEG_EXPANSION 6
-typedef union {
-       uint16_t u16;
-       struct {
-               uint16_t reserved_5_15:11;
-               uint16_t parallel_detection_fault:1;
-               uint16_t link_partner_next_page_capable:1;
-               uint16_t local_next_page_capable:1;
-               uint16_t page_received:1;
-               uint16_t link_partner_autoneg_capable:1;
-
-       } s;
-} cvmx_mdio_phy_reg_autoneg_expansion_t;
-
-/**
- * PHY register 9 from the 802.3 spec
- */
-#define CVMX_MDIO_PHY_REG_CONTROL_1000 9
-typedef union {
-       uint16_t u16;
-       struct {
-               uint16_t test_mode:3;
-               uint16_t manual_master_slave:1;
-               uint16_t master:1;
-               uint16_t port_type:1;
-               uint16_t advert_1000base_t_full:1;
-               uint16_t advert_1000base_t_half:1;
-               uint16_t reserved_0_7:8;
-       } s;
-} cvmx_mdio_phy_reg_control_1000_t;
-
-/**
- * PHY register 10 from the 802.3 spec
- */
-#define CVMX_MDIO_PHY_REG_STATUS_1000 10
-typedef union {
-       uint16_t u16;
-       struct {
-               uint16_t master_slave_fault:1;
-               uint16_t is_master:1;
-               uint16_t local_receiver_ok:1;
-               uint16_t remote_receiver_ok:1;
-               uint16_t remote_capable_1000base_t_full:1;
-               uint16_t remote_capable_1000base_t_half:1;
-               uint16_t reserved_8_9:2;
-               uint16_t idle_error_count:8;
-       } s;
-} cvmx_mdio_phy_reg_status_1000_t;
-
-/**
- * PHY register 15 from the 802.3 spec
- */
-#define CVMX_MDIO_PHY_REG_EXTENDED_STATUS 15
-typedef union {
-       uint16_t u16;
-       struct {
-               uint16_t capable_1000base_x_full:1;
-               uint16_t capable_1000base_x_half:1;
-               uint16_t capable_1000base_t_full:1;
-               uint16_t capable_1000base_t_half:1;
-               uint16_t reserved_0_11:12;
-       } s;
-} cvmx_mdio_phy_reg_extended_status_t;
-
-/**
- * PHY register 13 from the 802.3 spec
- */
-#define CVMX_MDIO_PHY_REG_MMD_CONTROL 13
-typedef union {
-       uint16_t u16;
-       struct {
-               uint16_t function:2;
-               uint16_t reserved_5_13:9;
-               uint16_t devad:5;
-       } s;
-} cvmx_mdio_phy_reg_mmd_control_t;
-
-/**
- * PHY register 14 from the 802.3 spec
- */
-#define CVMX_MDIO_PHY_REG_MMD_ADDRESS_DATA 14
-typedef union {
-       uint16_t u16;
-       struct {
-               uint16_t address_data:16;
-       } s;
-} cvmx_mdio_phy_reg_mmd_address_data_t;
-
-/* Operating request encodings. */
-#define MDIO_CLAUSE_22_WRITE   0
-#define MDIO_CLAUSE_22_READ    1
-
-#define MDIO_CLAUSE_45_ADDRESS 0
-#define MDIO_CLAUSE_45_WRITE   1
-#define MDIO_CLAUSE_45_READ_INC 2
-#define MDIO_CLAUSE_45_READ    3
-
-/* MMD identifiers, mostly for accessing devices within XENPAK modules. */
-#define CVMX_MMD_DEVICE_PMA_PMD             1
-#define CVMX_MMD_DEVICE_WIS         2
-#define CVMX_MMD_DEVICE_PCS         3
-#define CVMX_MMD_DEVICE_PHY_XS      4
-#define CVMX_MMD_DEVICE_DTS_XS      5
-#define CVMX_MMD_DEVICE_TC          6
-#define CVMX_MMD_DEVICE_CL22_EXT     29
-#define CVMX_MMD_DEVICE_VENDOR_1     30
-#define CVMX_MMD_DEVICE_VENDOR_2     31
-
-/* Helper function to put MDIO interface into clause 45 mode */
-static inline void __cvmx_mdio_set_clause45_mode(int bus_id)
-{
-       union cvmx_smix_clk smi_clk;
-       /* Put bus into clause 45 mode */
-       smi_clk.u64 = cvmx_read_csr(CVMX_SMIX_CLK(bus_id));
-       smi_clk.s.mode = 1;
-       smi_clk.s.preamble = 1;
-       cvmx_write_csr(CVMX_SMIX_CLK(bus_id), smi_clk.u64);
-}
-
-/* Helper function to put MDIO interface into clause 22 mode */
-static inline void __cvmx_mdio_set_clause22_mode(int bus_id)
-{
-       union cvmx_smix_clk smi_clk;
-       /* Put bus into clause 22 mode */
-       smi_clk.u64 = cvmx_read_csr(CVMX_SMIX_CLK(bus_id));
-       smi_clk.s.mode = 0;
-       cvmx_write_csr(CVMX_SMIX_CLK(bus_id), smi_clk.u64);
-}
-
-/**
- * Perform an MII read. This function is used to read PHY
- * registers controlling auto negotiation.
- *
- * @bus_id:   MDIO bus number. Zero on most chips, but some chips (ex CN56XX)
- *                support multiple busses.
- * @phy_id:   The MII phy id
- * @location: Register location to read
- *
- * Returns Result from the read or -1 on failure
- */
-static inline int cvmx_mdio_read(int bus_id, int phy_id, int location)
-{
-       union cvmx_smix_cmd smi_cmd;
-       union cvmx_smix_rd_dat smi_rd;
-       int timeout = 1000;
-
-       if (octeon_has_feature(OCTEON_FEATURE_MDIO_CLAUSE_45))
-               __cvmx_mdio_set_clause22_mode(bus_id);
-
-       smi_cmd.u64 = 0;
-       smi_cmd.s.phy_op = MDIO_CLAUSE_22_READ;
-       smi_cmd.s.phy_adr = phy_id;
-       smi_cmd.s.reg_adr = location;
-       cvmx_write_csr(CVMX_SMIX_CMD(bus_id), smi_cmd.u64);
-
-       do {
-               cvmx_wait(1000);
-               smi_rd.u64 = cvmx_read_csr(CVMX_SMIX_RD_DAT(bus_id));
-       } while (smi_rd.s.pending && timeout--);
-
-       if (smi_rd.s.val)
-               return smi_rd.s.dat;
-       else
-               return -1;
-}
-
-/**
- * Perform an MII write. This function is used to write PHY
- * registers controlling auto negotiation.
- *
- * @bus_id:   MDIO bus number. Zero on most chips, but some chips (ex CN56XX)
- *                support multiple busses.
- * @phy_id:   The MII phy id
- * @location: Register location to write
- * @val:      Value to write
- *
- * Returns -1 on error
- *        0 on success
- */
-static inline int cvmx_mdio_write(int bus_id, int phy_id, int location, int val)
-{
-       union cvmx_smix_cmd smi_cmd;
-       union cvmx_smix_wr_dat smi_wr;
-       int timeout = 1000;
-
-       if (octeon_has_feature(OCTEON_FEATURE_MDIO_CLAUSE_45))
-               __cvmx_mdio_set_clause22_mode(bus_id);
-
-       smi_wr.u64 = 0;
-       smi_wr.s.dat = val;
-       cvmx_write_csr(CVMX_SMIX_WR_DAT(bus_id), smi_wr.u64);
-
-       smi_cmd.u64 = 0;
-       smi_cmd.s.phy_op = MDIO_CLAUSE_22_WRITE;
-       smi_cmd.s.phy_adr = phy_id;
-       smi_cmd.s.reg_adr = location;
-       cvmx_write_csr(CVMX_SMIX_CMD(bus_id), smi_cmd.u64);
-
-       do {
-               cvmx_wait(1000);
-               smi_wr.u64 = cvmx_read_csr(CVMX_SMIX_WR_DAT(bus_id));
-       } while (smi_wr.s.pending && --timeout);
-       if (timeout <= 0)
-               return -1;
-
-       return 0;
-}
-
-/**
- * Perform an IEEE 802.3 clause 45 MII read. This function is used to
- * read PHY registers controlling auto negotiation.
- *
- * @bus_id:   MDIO bus number. Zero on most chips, but some chips (ex CN56XX)
- *                support multiple busses.
- * @phy_id:   The MII phy id
- * @device:   MDIO Managable Device (MMD) id
- * @location: Register location to read
- *
- * Returns Result from the read or -1 on failure
- */
-
-static inline int cvmx_mdio_45_read(int bus_id, int phy_id, int device,
-                                   int location)
-{
-       union cvmx_smix_cmd smi_cmd;
-       union cvmx_smix_rd_dat smi_rd;
-       union cvmx_smix_wr_dat smi_wr;
-       int timeout = 1000;
-
-       if (!octeon_has_feature(OCTEON_FEATURE_MDIO_CLAUSE_45))
-               return -1;
-
-       __cvmx_mdio_set_clause45_mode(bus_id);
-
-       smi_wr.u64 = 0;
-       smi_wr.s.dat = location;
-       cvmx_write_csr(CVMX_SMIX_WR_DAT(bus_id), smi_wr.u64);
-
-       smi_cmd.u64 = 0;
-       smi_cmd.s.phy_op = MDIO_CLAUSE_45_ADDRESS;
-       smi_cmd.s.phy_adr = phy_id;
-       smi_cmd.s.reg_adr = device;
-       cvmx_write_csr(CVMX_SMIX_CMD(bus_id), smi_cmd.u64);
-
-       do {
-               cvmx_wait(1000);
-               smi_wr.u64 = cvmx_read_csr(CVMX_SMIX_WR_DAT(bus_id));
-       } while (smi_wr.s.pending && --timeout);
-       if (timeout <= 0) {
-               cvmx_dprintf("cvmx_mdio_45_read: bus_id %d phy_id %2d "
-                            "device %2d register %2d   TIME OUT(address)\n",
-                    bus_id, phy_id, device, location);
-               return -1;
-       }
-
-       smi_cmd.u64 = 0;
-       smi_cmd.s.phy_op = MDIO_CLAUSE_45_READ;
-       smi_cmd.s.phy_adr = phy_id;
-       smi_cmd.s.reg_adr = device;
-       cvmx_write_csr(CVMX_SMIX_CMD(bus_id), smi_cmd.u64);
-
-       do {
-               cvmx_wait(1000);
-               smi_rd.u64 = cvmx_read_csr(CVMX_SMIX_RD_DAT(bus_id));
-       } while (smi_rd.s.pending && --timeout);
-
-       if (timeout <= 0) {
-               cvmx_dprintf("cvmx_mdio_45_read: bus_id %d phy_id %2d "
-                            "device %2d register %2d   TIME OUT(data)\n",
-                    bus_id, phy_id, device, location);
-               return -1;
-       }
-
-       if (smi_rd.s.val)
-               return smi_rd.s.dat;
-       else {
-               cvmx_dprintf("cvmx_mdio_45_read: bus_id %d phy_id %2d "
-                            "device %2d register %2d   INVALID READ\n",
-                    bus_id, phy_id, device, location);
-               return -1;
-       }
-}
-
-/**
- * Perform an IEEE 802.3 clause 45 MII write. This function is used to
- * write PHY registers controlling auto negotiation.
- *
- * @bus_id:   MDIO bus number. Zero on most chips, but some chips (ex CN56XX)
- *                support multiple busses.
- * @phy_id:   The MII phy id
- * @device:   MDIO Managable Device (MMD) id
- * @location: Register location to write
- * @val:      Value to write
- *
- * Returns -1 on error
- *        0 on success
- */
-static inline int cvmx_mdio_45_write(int bus_id, int phy_id, int device,
-                                    int location, int val)
-{
-       union cvmx_smix_cmd smi_cmd;
-       union cvmx_smix_wr_dat smi_wr;
-       int timeout = 1000;
-
-       if (!octeon_has_feature(OCTEON_FEATURE_MDIO_CLAUSE_45))
-               return -1;
-
-       __cvmx_mdio_set_clause45_mode(bus_id);
-
-       smi_wr.u64 = 0;
-       smi_wr.s.dat = location;
-       cvmx_write_csr(CVMX_SMIX_WR_DAT(bus_id), smi_wr.u64);
-
-       smi_cmd.u64 = 0;
-       smi_cmd.s.phy_op = MDIO_CLAUSE_45_ADDRESS;
-       smi_cmd.s.phy_adr = phy_id;
-       smi_cmd.s.reg_adr = device;
-       cvmx_write_csr(CVMX_SMIX_CMD(bus_id), smi_cmd.u64);
-
-       do {
-               cvmx_wait(1000);
-               smi_wr.u64 = cvmx_read_csr(CVMX_SMIX_WR_DAT(bus_id));
-       } while (smi_wr.s.pending && --timeout);
-       if (timeout <= 0)
-               return -1;
-
-       smi_wr.u64 = 0;
-       smi_wr.s.dat = val;
-       cvmx_write_csr(CVMX_SMIX_WR_DAT(bus_id), smi_wr.u64);
-
-       smi_cmd.u64 = 0;
-       smi_cmd.s.phy_op = MDIO_CLAUSE_45_WRITE;
-       smi_cmd.s.phy_adr = phy_id;
-       smi_cmd.s.reg_adr = device;
-       cvmx_write_csr(CVMX_SMIX_CMD(bus_id), smi_cmd.u64);
-
-       do {
-               cvmx_wait(1000);
-               smi_wr.u64 = cvmx_read_csr(CVMX_SMIX_WR_DAT(bus_id));
-       } while (smi_wr.s.pending && --timeout);
-       if (timeout <= 0)
-               return -1;
-
-       return 0;
-}
-
-#endif
index 9b63cd41213de1290e7a06c31791a2eae74d7487..30d1129d86248444fc7065fe99bd26c8f5111d0e 100644 (file)
  */
 
 #include <linux/ioport.h>
+#include <linux/list.h>
 #include <linux/of.h>
 
+#ifdef CONFIG_PCI_DRIVERS_LEGACY
+
 /*
  * Each pci channel is a top-level PCI bus seem by CPU.         A machine  with
  * multiple PCI channels may have multiple PCI host controllers or a
  * single controller supporting multiple channels.
  */
 struct pci_controller {
-       struct pci_controller *next;
+       struct list_head list;
        struct pci_bus *bus;
        struct device_node *of_node;
 
@@ -38,10 +41,12 @@ struct pci_controller {
        struct resource *busn_resource;
        unsigned long busn_offset;
 
+#ifndef CONFIG_PCI_DOMAINS_GENERIC
        unsigned int index;
        /* For compatibility with current (as of July 2003) pciutils
           and XFree86. Eventually will be removed. */
        unsigned int need_domain_info;
+#endif
 
        /* Optional access methods for reading/writing the bus number
           of the PCI controller */
@@ -59,12 +64,43 @@ extern void register_pci_controller(struct pci_controller *hose);
  */
 extern int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin);
 
+/* Do platform specific device initialization at pci_enable_device() time */
+extern int pcibios_plat_dev_init(struct pci_dev *dev);
+
+extern char * (*pcibios_plat_setup)(char *str);
+
+#ifdef CONFIG_OF
+/* this function parses memory ranges from a device node */
+extern void pci_load_of_ranges(struct pci_controller *hose,
+                              struct device_node *node);
+#else
+static inline void pci_load_of_ranges(struct pci_controller *hose,
+                                     struct device_node *node) {}
+#endif
+
+#ifdef CONFIG_PCI_DOMAINS_GENERIC
+static inline void set_pci_need_domain_info(struct pci_controller *hose,
+                                           int need_domain_info)
+{
+       /* nothing to do */
+}
+#elif defined(CONFIG_PCI_DOMAINS)
+static inline void set_pci_need_domain_info(struct pci_controller *hose,
+                                           int need_domain_info)
+{
+       hose->need_domain_info = need_domain_info;
+}
+#endif /* CONFIG_PCI_DOMAINS */
+
+#endif
 
 /* Can be used to override the logic in pci_scan_bus for skipping
    already-configured bus numbers - to be used for buggy BIOSes
    or architectures with incomplete PCI setup by the loader */
-
-extern unsigned int pcibios_assign_all_busses(void);
+static inline unsigned int pcibios_assign_all_busses(void)
+{
+       return 1;
+}
 
 extern unsigned long PCIBIOS_MIN_IO;
 extern unsigned long PCIBIOS_MIN_MEM;
@@ -100,7 +136,12 @@ struct pci_dev;
  */
 #define PCI_DMA_BUS_IS_PHYS     (1)
 
-#ifdef CONFIG_PCI_DOMAINS
+#ifdef CONFIG_PCI_DOMAINS_GENERIC
+static inline int pci_proc_domain(struct pci_bus *bus)
+{
+       return pci_domain_nr(bus);
+}
+#elif defined(CONFIG_PCI_DOMAINS)
 #define pci_domain_nr(bus) ((struct pci_controller *)(bus)->sysdata)->index
 
 static inline int pci_proc_domain(struct pci_bus *bus)
@@ -121,15 +162,4 @@ static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
        return channel ? 15 : 14;
 }
 
-extern char * (*pcibios_plat_setup)(char *str);
-
-#ifdef CONFIG_OF
-/* this function parses memory ranges from a device node */
-extern void pci_load_of_ranges(struct pci_controller *hose,
-                              struct device_node *node);
-#else
-static inline void pci_load_of_ranges(struct pci_controller *hose,
-                                     struct device_node *node) {}
-#endif
-
 #endif /* _ASM_PCI_H */
index 93c079a1cfc8e1f13366590e939c47eade8db789..a03e86969f78a86a9989897ad95cfad1b7798d73 100644 (file)
@@ -67,11 +67,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
        unsigned long address)
 {
-       pte_t *pte;
-
-       pte = (pte_t *) __get_free_pages(GFP_KERNEL|__GFP_ZERO, PTE_ORDER);
-
-       return pte;
+       return (pte_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, PTE_ORDER);
 }
 
 static inline struct page *pte_alloc_one(struct mm_struct *mm,
index 625eda53d57166084c611e5286aa0141704a984b..89d58d80b77bb18da88124d03963381922de58d8 100644 (file)
 
 /*
  * The CM & CPC can only handle coherence & power control on a per-core basis,
- * thus in an MT system the VPEs within each core are coupled and can only
+ * thus in an MT system the VP(E)s within each core are coupled and can only
  * enter or exit states requiring CM or CPC assistance in unison.
  */
-#ifdef CONFIG_MIPS_MT
+#if defined(CONFIG_CPU_MIPSR6)
+# define coupled_coherence cpu_has_vp
+#elif defined(CONFIG_MIPS_MT)
 # define coupled_coherence cpu_has_mipsmt
 #else
 # define coupled_coherence 0
index f6fc6aac54963fcebcf7a55e094ec32689923ea7..b6578611dddbfc37e9a7bea36886affe93d87411 100644 (file)
@@ -152,7 +152,7 @@ static inline int is_syscall_success(struct pt_regs *regs)
 
 static inline long regs_return_value(struct pt_regs *regs)
 {
-       if (is_syscall_success(regs))
+       if (is_syscall_success(regs) || !user_mode(regs))
                return regs->regs[2];
        else
                return -regs->regs[2];
index 8bc6c70a40302485834387ea33aea356c8ccd5dd..060f23ff181718fb6b9dec3f3a206f1013452956 100644 (file)
@@ -85,6 +85,20 @@ static inline void __cpu_die(unsigned int cpu)
 extern void play_dead(void);
 #endif
 
+/*
+ * This function will set up the necessary IPIs for Linux to communicate
+ * with the CPUs in mask.
+ * Return 0 on success.
+ */
+int mips_smp_ipi_allocate(const struct cpumask *mask);
+
+/*
+ * This function will free up IPIs allocated with mips_smp_ipi_allocate to the
+ * CPUs in mask, which must be a subset of the IPIs that have been configured.
+ * Return 0 on success.
+ */
+int mips_smp_ipi_free(const struct cpumask *mask);
+
 static inline void arch_send_call_function_single_ipi(int cpu)
 {
        extern struct plat_smp_ops *mp_ops;     /* private */
index ebb5c0f2f90daef7b2bcca348fb90df2b7530037..c0ae27971e3108093fdc3952969d15bb3bfd5f30 100644 (file)
@@ -75,6 +75,22 @@ do { if (cpu_has_rw_llb) {                                           \
        }                                                               \
 } while (0)
 
+/*
+ * Check FCSR for any unmasked exceptions pending set with `ptrace',
+ * clear them and send a signal.
+ */
+#define __sanitize_fcr31(next)                                         \
+do {                                                                   \
+       unsigned long fcr31 = mask_fcr31_x(next->thread.fpu.fcr31);     \
+       void __user *pc;                                                \
+                                                                       \
+       if (unlikely(fcr31)) {                                          \
+               pc = (void __user *)task_pt_regs(next)->cp0_epc;        \
+               next->thread.fpu.fcr31 &= ~fcr31;                       \
+               force_fcr31_sig(fcr31, pc, next);                       \
+       }                                                               \
+} while (0)
+
 /*
  * For newly created kernel threads switch_to() will return to
  * ret_from_kernel_thread, newly created user threads to ret_from_fork.
@@ -85,6 +101,8 @@ do { if (cpu_has_rw_llb) {                                           \
 do {                                                                   \
        __mips_mt_fpaff_switch_to(prev);                                \
        lose_fpu_inatomic(1, prev);                                     \
+       if (tsk_used_math(next))                                        \
+               __sanitize_fcr31(next);                                 \
        if (cpu_has_dsp) {                                              \
                __save_dsp(prev);                                       \
                __restore_dsp(next);                                    \
index 4daf839cd8a8efa38bb5bd35cb0f0a842bffd9e3..89fa5c0b1579cf63ecdaf7bdf097214183a979a5 100644 (file)
@@ -859,7 +859,10 @@ extern size_t __copy_user(void *__to, const void *__from, size_t __n);
        __cu_to = (to);                                                 \
        __cu_from = (from);                                             \
        __cu_len = (n);                                                 \
+                                                                       \
+       check_object_size(__cu_from, __cu_len, true);                   \
        might_fault();                                                  \
+                                                                       \
        if (eva_kernel_access())                                        \
                __cu_len = __invoke_copy_to_kernel(__cu_to, __cu_from,  \
                                                   __cu_len);           \
@@ -880,6 +883,9 @@ extern size_t __copy_user_inatomic(void *__to, const void *__from, size_t __n);
        __cu_to = (to);                                                 \
        __cu_from = (from);                                             \
        __cu_len = (n);                                                 \
+                                                                       \
+       check_object_size(__cu_from, __cu_len, true);                   \
+                                                                       \
        if (eva_kernel_access())                                        \
                __cu_len = __invoke_copy_to_kernel(__cu_to, __cu_from,  \
                                                   __cu_len);           \
@@ -898,6 +904,9 @@ extern size_t __copy_user_inatomic(void *__to, const void *__from, size_t __n);
        __cu_to = (to);                                                 \
        __cu_from = (from);                                             \
        __cu_len = (n);                                                 \
+                                                                       \
+       check_object_size(__cu_to, __cu_len, false);                    \
+                                                                       \
        if (eva_kernel_access())                                        \
                __cu_len = __invoke_copy_from_kernel_inatomic(__cu_to,  \
                                                              __cu_from,\
@@ -932,6 +941,9 @@ extern size_t __copy_user_inatomic(void *__to, const void *__from, size_t __n);
        __cu_to = (to);                                                 \
        __cu_from = (from);                                             \
        __cu_len = (n);                                                 \
+                                                                       \
+       check_object_size(__cu_from, __cu_len, true);                   \
+                                                                       \
        if (eva_kernel_access()) {                                      \
                __cu_len = __invoke_copy_to_kernel(__cu_to,             \
                                                   __cu_from,           \
@@ -1124,6 +1136,9 @@ extern size_t __copy_in_user_eva(void *__to, const void *__from, size_t __n);
        __cu_to = (to);                                                 \
        __cu_from = (from);                                             \
        __cu_len = (n);                                                 \
+                                                                       \
+       check_object_size(__cu_to, __cu_len, false);                    \
+                                                                       \
        if (eva_kernel_access()) {                                      \
                __cu_len = __invoke_copy_from_kernel(__cu_to,           \
                                                     __cu_from,         \
@@ -1162,6 +1177,9 @@ extern size_t __copy_in_user_eva(void *__to, const void *__from, size_t __n);
        __cu_to = (to);                                                 \
        __cu_from = (from);                                             \
        __cu_len = (n);                                                 \
+                                                                       \
+       check_object_size(__cu_to, __cu_len, false);                    \
+                                                                       \
        if (eva_kernel_access()) {                                      \
                __cu_len = __invoke_copy_from_kernel(__cu_to,           \
                                                     __cu_from,         \
index 2027240aafbb8432f0cc67148b70ae57f48ed2a6..566ecdcb5b4bcb2cd4d5888a1ce787b8fcbd0b97 100644 (file)
 
 #define SO_CNX_ADVICE          53
 
+#define SCM_TIMESTAMPING_OPT_STATS     54
+
 #endif /* _UAPI_ASM_SOCKET_H */
index 24ad815c7f38d463f45e90e38417649c91a08f68..3e940dbe02629ad6916f1870d2bf5a57086e89b8 100644 (file)
 #define __NR_copy_file_range           (__NR_Linux + 360)
 #define __NR_preadv2                   (__NR_Linux + 361)
 #define __NR_pwritev2                  (__NR_Linux + 362)
+#define __NR_pkey_mprotect             (__NR_Linux + 363)
+#define __NR_pkey_alloc                        (__NR_Linux + 364)
+#define __NR_pkey_free                 (__NR_Linux + 365)
+
 
 /*
  * Offset of the last Linux o32 flavoured syscall
  */
-#define __NR_Linux_syscalls            362
+#define __NR_Linux_syscalls            365
 
 #endif /* _MIPS_SIM == _MIPS_SIM_ABI32 */
 
 #define __NR_O32_Linux                 4000
-#define __NR_O32_Linux_syscalls                362
+#define __NR_O32_Linux_syscalls                365
 
 #if _MIPS_SIM == _MIPS_SIM_ABI64
 
 #define __NR_copy_file_range           (__NR_Linux + 320)
 #define __NR_preadv2                   (__NR_Linux + 321)
 #define __NR_pwritev2                  (__NR_Linux + 322)
+#define __NR_pkey_mprotect             (__NR_Linux + 323)
+#define __NR_pkey_alloc                        (__NR_Linux + 324)
+#define __NR_pkey_free                 (__NR_Linux + 325)
 
 /*
  * Offset of the last Linux 64-bit flavoured syscall
  */
-#define __NR_Linux_syscalls            322
+#define __NR_Linux_syscalls            325
 
 #endif /* _MIPS_SIM == _MIPS_SIM_ABI64 */
 
 #define __NR_64_Linux                  5000
-#define __NR_64_Linux_syscalls         322
+#define __NR_64_Linux_syscalls         325
 
 #if _MIPS_SIM == _MIPS_SIM_NABI32
 
 #define __NR_copy_file_range           (__NR_Linux + 324)
 #define __NR_preadv2                   (__NR_Linux + 325)
 #define __NR_pwritev2                  (__NR_Linux + 326)
+#define __NR_pkey_mprotect             (__NR_Linux + 327)
+#define __NR_pkey_alloc                        (__NR_Linux + 328)
+#define __NR_pkey_free                 (__NR_Linux + 329)
 
 /*
  * Offset of the last N32 flavoured syscall
  */
-#define __NR_Linux_syscalls            326
+#define __NR_Linux_syscalls            329
 
 #endif /* _MIPS_SIM == _MIPS_SIM_NABI32 */
 
 #define __NR_N32_Linux                 6000
-#define __NR_N32_Linux_syscalls                326
+#define __NR_N32_Linux_syscalls                329
 
 #endif /* _UAPI_ASM_UNISTD_H */
index 58ad63d7eb42413ee1259582fd6b666f699faa99..9c7f3e136d50da8874df0af6749aa2dfe358d61c 100644 (file)
@@ -1,5 +1,6 @@
 /*
  * Support for n32 Linux/MIPS ELF binaries.
+ * Author: Ralf Baechle (ralf@linux-mips.org)
  *
  * Copyright (C) 1999, 2001 Ralf Baechle
  * Copyright (C) 1999, 2001 Silicon Graphics, Inc.
@@ -37,7 +38,6 @@ typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG];
 #define ELF_ET_DYN_BASE                (TASK32_SIZE / 3 * 2)
 
 #include <asm/processor.h>
-#include <linux/module.h>
 #include <linux/elfcore.h>
 #include <linux/compat.h>
 #include <linux/math64.h>
@@ -96,12 +96,6 @@ jiffies_to_compat_timeval(unsigned long jiffies, struct compat_timeval *value)
 
 #define ELF_CORE_EFLAGS EF_MIPS_ABI2
 
-MODULE_DESCRIPTION("Binary format loader for compatibility with n32 Linux/MIPS binaries");
-MODULE_AUTHOR("Ralf Baechle (ralf@linux-mips.org)");
-
-#undef MODULE_DESCRIPTION
-#undef MODULE_AUTHOR
-
 #undef TASK_SIZE
 #define TASK_SIZE TASK_SIZE32
 
index 49fb881481f7b6940dfb353dedeb412d2be584bb..1ab34322dd977cdc898475573f26dfb19b326c59 100644 (file)
@@ -1,5 +1,6 @@
 /*
  * Support for o32 Linux/MIPS ELF binaries.
+ * Author: Ralf Baechle (ralf@linux-mips.org)
  *
  * Copyright (C) 1999, 2001 Ralf Baechle
  * Copyright (C) 1999, 2001 Silicon Graphics, Inc.
@@ -42,7 +43,6 @@ typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG];
 
 #include <asm/processor.h>
 
-#include <linux/module.h>
 #include <linux/elfcore.h>
 #include <linux/compat.h>
 #include <linux/math64.h>
@@ -99,12 +99,6 @@ jiffies_to_compat_timeval(unsigned long jiffies, struct compat_timeval *value)
        value->tv_usec = rem / NSEC_PER_USEC;
 }
 
-MODULE_DESCRIPTION("Binary format loader for compatibility with o32 Linux/MIPS binaries");
-MODULE_AUTHOR("Ralf Baechle (ralf@linux-mips.org)");
-
-#undef MODULE_DESCRIPTION
-#undef MODULE_AUTHOR
-
 #undef TASK_SIZE
 #define TASK_SIZE TASK_SIZE32
 
index 46c227fc98f5af75bbdd389cbe8eb75b33815470..12c718181e5e3ee5e5cc51cca92bc16f408fbe0a 100644 (file)
@@ -9,7 +9,7 @@
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/signal.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <asm/branch.h>
 #include <asm/cpu.h>
 #include <asm/cpu-features.h>
@@ -866,3 +866,37 @@ unaligned:
        force_sig(SIGBUS, current);
        return -EFAULT;
 }
+
+#if (defined CONFIG_KPROBES) || (defined CONFIG_UPROBES)
+
+int __insn_is_compact_branch(union mips_instruction insn)
+{
+       if (!cpu_has_mips_r6)
+               return 0;
+
+       switch (insn.i_format.opcode) {
+       case blezl_op:
+       case bgtzl_op:
+       case blez_op:
+       case bgtz_op:
+               /*
+                * blez[l] and bgtz[l] opcodes with non-zero rt
+                * are MIPS R6 compact branches
+                */
+               if (insn.i_format.rt)
+                       return 1;
+               break;
+       case bc6_op:
+       case balc6_op:
+       case pop10_op:
+       case pop30_op:
+       case pop66_op:
+       case pop76_op:
+               return 1;
+       }
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(__insn_is_compact_branch);
+
+#endif  /* CONFIG_KPROBES || CONFIG_UPROBES */
index 212f46f2014e06b36aed708b26215457be72ece7..f5c8bce70db29cb59430cdf0bd46d4fccb80aee8 100644 (file)
@@ -32,7 +32,8 @@
 #include <asm/ptrace.h>
 #include <asm/branch.h>
 #include <asm/break.h>
-#include <asm/inst.h>
+
+#include "probes-common.h"
 
 static const union mips_instruction breakpoint_insn = {
        .b_format = {
@@ -55,63 +56,7 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
 
 static int __kprobes insn_has_delayslot(union mips_instruction insn)
 {
-       switch (insn.i_format.opcode) {
-
-               /*
-                * This group contains:
-                * jr and jalr are in r_format format.
-                */
-       case spec_op:
-               switch (insn.r_format.func) {
-               case jr_op:
-               case jalr_op:
-                       break;
-               default:
-                       goto insn_ok;
-               }
-
-               /*
-                * This group contains:
-                * bltz_op, bgez_op, bltzl_op, bgezl_op,
-                * bltzal_op, bgezal_op, bltzall_op, bgezall_op.
-                */
-       case bcond_op:
-
-               /*
-                * These are unconditional and in j_format.
-                */
-       case jal_op:
-       case j_op:
-
-               /*
-                * These are conditional and in i_format.
-                */
-       case beq_op:
-       case beql_op:
-       case bne_op:
-       case bnel_op:
-       case blez_op:
-       case blezl_op:
-       case bgtz_op:
-       case bgtzl_op:
-
-               /*
-                * These are the FPA/cp1 branch instructions.
-                */
-       case cop1_op:
-
-#ifdef CONFIG_CPU_CAVIUM_OCTEON
-       case lwc2_op: /* This is bbit0 on Octeon */
-       case ldc2_op: /* This is bbit032 on Octeon */
-       case swc2_op: /* This is bbit1 on Octeon */
-       case sdc2_op: /* This is bbit132 on Octeon */
-#endif
-               return 1;
-       default:
-               break;
-       }
-insn_ok:
-       return 0;
+       return __insn_has_delay_slot(insn);
 }
 
 /*
@@ -161,6 +106,12 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
                goto out;
        }
 
+       if (__insn_is_compact_branch(insn)) {
+               pr_notice("Kprobes for compact branches are not supported\n");
+               ret = -EINVAL;
+               goto out;
+       }
+
        /* insn: must be on special executable page on mips. */
        p->ainsn.insn = get_insn_slot();
        if (!p->ainsn.insn) {
index 0b29646bcee770533e4a0d25cf1a50b4cc167b7b..50fb62544df71a62d4457d998de415193c0ad90e 100644 (file)
@@ -26,7 +26,6 @@
 #include <linux/utsname.h>
 #include <linux/personality.h>
 #include <linux/dnotify.h>
-#include <linux/module.h>
 #include <linux/binfmts.h>
 #include <linux/security.h>
 #include <linux/compat.h>
index 566b8d2c092c31de6293f245d81b12a899d40622..a4964c334cab66eba2e41e34c25db281a1729555 100644 (file)
@@ -21,6 +21,11 @@ static DEFINE_PER_CPU_ALIGNED(spinlock_t, cpc_core_lock);
 
 static DEFINE_PER_CPU_ALIGNED(unsigned long, cpc_core_lock_flags);
 
+phys_addr_t __weak mips_cpc_default_phys_base(void)
+{
+       return 0;
+}
+
 /**
  * mips_cpc_phys_base - retrieve the physical base address of the CPC
  *
@@ -43,8 +48,12 @@ static phys_addr_t mips_cpc_phys_base(void)
        if (cpc_base & CM_GCR_CPC_BASE_CPCEN_MSK)
                return cpc_base & CM_GCR_CPC_BASE_CPCBASE_MSK;
 
-       /* Otherwise, give it the default address & enable it */
+       /* Otherwise, use the default address */
        cpc_base = mips_cpc_default_phys_base();
+       if (!cpc_base)
+               return cpc_base;
+
+       /* Enable the CPC, mapped at the default address */
        write_gcr_cpc_base(cpc_base | CM_GCR_CPC_BASE_CPCEN_MSK);
        return cpc_base;
 }
@@ -52,7 +61,7 @@ static phys_addr_t mips_cpc_phys_base(void)
 int mips_cpc_probe(void)
 {
        phys_addr_t addr;
-       unsigned cpu;
+       unsigned int cpu;
 
        for_each_possible_cpu(cpu)
                spin_lock_init(&per_cpu(cpc_core_lock, cpu));
@@ -70,7 +79,12 @@ int mips_cpc_probe(void)
 
 void mips_cpc_lock_other(unsigned int core)
 {
-       unsigned curr_core;
+       unsigned int curr_core;
+
+       if (mips_cm_revision() >= CM_REV_CM3)
+               /* Systems with CM >= 3 lock the CPC via mips_cm_lock_other */
+               return;
+
        preempt_disable();
        curr_core = current_cpu_data.core;
        spin_lock_irqsave(&per_cpu(cpc_core_lock, curr_core),
@@ -86,7 +100,13 @@ void mips_cpc_lock_other(unsigned int core)
 
 void mips_cpc_unlock_other(void)
 {
-       unsigned curr_core = current_cpu_data.core;
+       unsigned int curr_core;
+
+       if (mips_cm_revision() >= CM_REV_CM3)
+               /* Systems with CM >= 3 lock the CPC via mips_cm_lock_other */
+               return;
+
+       curr_core = current_cpu_data.core;
        spin_unlock_irqrestore(&per_cpu(cpc_core_lock, curr_core),
                               per_cpu(cpc_core_lock_flags, curr_core));
        preempt_enable();
index 0a7e10b5f9e39eb312e0e48b2eab1966df6854f3..bd09853aecdfa1e567717cc35a9d19e786468391 100644 (file)
@@ -15,7 +15,6 @@
 #include <linux/debugfs.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
-#include <linux/module.h>
 #include <linux/ptrace.h>
 #include <linux/seq_file.h>
 
@@ -900,7 +899,7 @@ static inline int mipsr2_find_op_func(struct pt_regs *regs, u32 inst,
  * mipsr2_decoder: Decode and emulate a MIPS R2 instruction
  * @regs: Process register set
  * @inst: Instruction to decode and emulate
- * @fcr31: Floating Point Control and Status Register returned
+ * @fcr31: Floating Point Control and Status Register Cause bits returned
  */
 int mipsr2_decoder(struct pt_regs *regs, u32 inst, unsigned long *fcr31)
 {
@@ -1173,13 +1172,13 @@ fpu_emul:
 
                err = fpu_emulator_cop1Handler(regs, &current->thread.fpu, 0,
                                               &fault_addr);
-               *fcr31 = current->thread.fpu.fcr31;
 
                /*
-                * We can't allow the emulated instruction to leave any of
-                * the cause bits set in $fcr31.
+                * We can't allow the emulated instruction to leave any
+                * enabled Cause bits set in $fcr31.
                 */
-               current->thread.fpu.fcr31 &= ~FPU_CSR_ALL_X;
+               *fcr31 = res = mask_fcr31_x(current->thread.fpu.fcr31);
+               current->thread.fpu.fcr31 &= ~res;
 
                /*
                 * this is a tricky issue - lose_fpu() uses LL/SC atomics
index 79850e376ef6387cb3956b9af78169fb12694bde..94627a3a6a0d975b6451508fec88738c7c1fcea5 100644 (file)
@@ -20,6 +20,7 @@
 
 #undef DEBUG
 
+#include <linux/extable.h>
 #include <linux/moduleloader.h>
 #include <linux/elf.h>
 #include <linux/mm.h>
index 5b31a9405ebc69cc7de7607a4ee3c0192e267586..7cf653e214237f75b22200c94f0e47c696be14ae 100644 (file)
@@ -8,6 +8,7 @@
  * option) any later version.
  */
 
+#include <linux/cpuhotplug.h>
 #include <linux/init.h>
 #include <linux/percpu.h>
 #include <linux/slab.h>
@@ -70,13 +71,8 @@ static DEFINE_PER_CPU_ALIGNED(atomic_t, pm_barrier);
 DEFINE_PER_CPU_ALIGNED(struct mips_static_suspend_state, cps_cpu_state);
 
 /* A somewhat arbitrary number of labels & relocs for uasm */
-static struct uasm_label labels[32] __initdata;
-static struct uasm_reloc relocs[32] __initdata;
-
-/* CPU dependant sync types */
-static unsigned stype_intervention;
-static unsigned stype_memory;
-static unsigned stype_ordering;
+static struct uasm_label labels[32];
+static struct uasm_reloc relocs[32];
 
 enum mips_reg {
        zero, at, v0, v1, a0, a1, a2, a3,
@@ -134,7 +130,7 @@ int cps_pm_enter_state(enum cps_pm_state state)
                return -EINVAL;
 
        /* Calculate which coupled CPUs (VPEs) are online */
-#ifdef CONFIG_MIPS_MT
+#if defined(CONFIG_MIPS_MT) || defined(CONFIG_CPU_MIPSR6)
        if (cpu_online(cpu)) {
                cpumask_and(coupled_mask, cpu_online_mask,
                            &cpu_sibling_map[cpu]);
@@ -198,10 +194,10 @@ int cps_pm_enter_state(enum cps_pm_state state)
        return 0;
 }
 
-static void __init cps_gen_cache_routine(u32 **pp, struct uasm_label **pl,
-                                        struct uasm_reloc **pr,
-                                        const struct cache_desc *cache,
-                                        unsigned op, int lbl)
+static void cps_gen_cache_routine(u32 **pp, struct uasm_label **pl,
+                                 struct uasm_reloc **pr,
+                                 const struct cache_desc *cache,
+                                 unsigned op, int lbl)
 {
        unsigned cache_size = cache->ways << cache->waybit;
        unsigned i;
@@ -242,10 +238,10 @@ static void __init cps_gen_cache_routine(u32 **pp, struct uasm_label **pl,
        uasm_i_nop(pp);
 }
 
-static int __init cps_gen_flush_fsb(u32 **pp, struct uasm_label **pl,
-                                   struct uasm_reloc **pr,
-                                   const struct cpuinfo_mips *cpu_info,
-                                   int lbl)
+static int cps_gen_flush_fsb(u32 **pp, struct uasm_label **pl,
+                            struct uasm_reloc **pr,
+                            const struct cpuinfo_mips *cpu_info,
+                            int lbl)
 {
        unsigned i, fsb_size = 8;
        unsigned num_loads = (fsb_size * 3) / 2;
@@ -272,14 +268,9 @@ static int __init cps_gen_flush_fsb(u32 **pp, struct uasm_label **pl,
                /* On older ones it's unavailable */
                return -1;
 
-       /* CPUs which do not require the workaround */
-       case CPU_P5600:
-       case CPU_I6400:
-               return 0;
-
        default:
-               WARN_ONCE(1, "pm-cps: FSB flush unsupported for this CPU\n");
-               return -1;
+               /* Assume that the CPU does not need this workaround */
+               return 0;
        }
 
        /*
@@ -320,8 +311,8 @@ static int __init cps_gen_flush_fsb(u32 **pp, struct uasm_label **pl,
                             i * line_size * line_stride, t0);
        }
 
-       /* Completion barrier */
-       uasm_i_sync(pp, stype_memory);
+       /* Barrier ensuring previous cache invalidates are complete */
+       uasm_i_sync(pp, STYPE_SYNC);
        uasm_i_ehb(pp);
 
        /* Check whether the pipeline stalled due to the FSB being full */
@@ -340,9 +331,9 @@ static int __init cps_gen_flush_fsb(u32 **pp, struct uasm_label **pl,
        return 0;
 }
 
-static void __init cps_gen_set_top_bit(u32 **pp, struct uasm_label **pl,
-                                      struct uasm_reloc **pr,
-                                      unsigned r_addr, int lbl)
+static void cps_gen_set_top_bit(u32 **pp, struct uasm_label **pl,
+                               struct uasm_reloc **pr,
+                               unsigned r_addr, int lbl)
 {
        uasm_i_lui(pp, t0, uasm_rel_hi(0x80000000));
        uasm_build_label(pl, *pp, lbl);
@@ -353,7 +344,7 @@ static void __init cps_gen_set_top_bit(u32 **pp, struct uasm_label **pl,
        uasm_i_nop(pp);
 }
 
-static void * __init cps_gen_entry_code(unsigned cpu, enum cps_pm_state state)
+static void *cps_gen_entry_code(unsigned cpu, enum cps_pm_state state)
 {
        struct uasm_label *l = labels;
        struct uasm_reloc *r = relocs;
@@ -411,7 +402,7 @@ static void * __init cps_gen_entry_code(unsigned cpu, enum cps_pm_state state)
 
        if (coupled_coherence) {
                /* Increment ready_count */
-               uasm_i_sync(&p, stype_ordering);
+               uasm_i_sync(&p, STYPE_SYNC_MB);
                uasm_build_label(&l, p, lbl_incready);
                uasm_i_ll(&p, t1, 0, r_nc_count);
                uasm_i_addiu(&p, t2, t1, 1);
@@ -419,8 +410,8 @@ static void * __init cps_gen_entry_code(unsigned cpu, enum cps_pm_state state)
                uasm_il_beqz(&p, &r, t2, lbl_incready);
                uasm_i_addiu(&p, t1, t1, 1);
 
-               /* Ordering barrier */
-               uasm_i_sync(&p, stype_ordering);
+               /* Barrier ensuring all CPUs see the updated r_nc_count value */
+               uasm_i_sync(&p, STYPE_SYNC_MB);
 
                /*
                 * If this is the last VPE to become ready for non-coherence
@@ -441,7 +432,8 @@ static void * __init cps_gen_entry_code(unsigned cpu, enum cps_pm_state state)
                        uasm_i_lw(&p, t0, 0, r_nc_count);
                        uasm_il_bltz(&p, &r, t0, lbl_secondary_cont);
                        uasm_i_ehb(&p);
-                       uasm_i_yield(&p, zero, t1);
+                       if (cpu_has_mipsmt)
+                               uasm_i_yield(&p, zero, t1);
                        uasm_il_b(&p, &r, lbl_poll_cont);
                        uasm_i_nop(&p);
                } else {
@@ -449,8 +441,21 @@ static void * __init cps_gen_entry_code(unsigned cpu, enum cps_pm_state state)
                         * The core will lose power & this VPE will not continue
                         * so it can simply halt here.
                         */
-                       uasm_i_addiu(&p, t0, zero, TCHALT_H);
-                       uasm_i_mtc0(&p, t0, 2, 4);
+                       if (cpu_has_mipsmt) {
+                               /* Halt the VPE via C0 tchalt register */
+                               uasm_i_addiu(&p, t0, zero, TCHALT_H);
+                               uasm_i_mtc0(&p, t0, 2, 4);
+                       } else if (cpu_has_vp) {
+                               /* Halt the VP via the CPC VP_STOP register */
+                               unsigned int vpe_id;
+
+                               vpe_id = cpu_vpe_id(&cpu_data[cpu]);
+                               uasm_i_addiu(&p, t0, zero, 1 << vpe_id);
+                               UASM_i_LA(&p, t1, (long)addr_cpc_cl_vp_stop());
+                               uasm_i_sw(&p, t0, 0, t1);
+                       } else {
+                               BUG();
+                       }
                        uasm_build_label(&l, p, lbl_secondary_hang);
                        uasm_il_b(&p, &r, lbl_secondary_hang);
                        uasm_i_nop(&p);
@@ -472,22 +477,24 @@ static void * __init cps_gen_entry_code(unsigned cpu, enum cps_pm_state state)
        cps_gen_cache_routine(&p, &l, &r, &cpu_data[cpu].dcache,
                              Index_Writeback_Inv_D, lbl_flushdcache);
 
-       /* Completion barrier */
-       uasm_i_sync(&p, stype_memory);
+       /* Barrier ensuring previous cache invalidates are complete */
+       uasm_i_sync(&p, STYPE_SYNC);
        uasm_i_ehb(&p);
 
-       /*
-        * Disable all but self interventions. The load from COHCTL is defined
-        * by the interAptiv & proAptiv SUMs as ensuring that the operation
-        * resulting from the preceding store is complete.
-        */
-       uasm_i_addiu(&p, t0, zero, 1 << cpu_data[cpu].core);
-       uasm_i_sw(&p, t0, 0, r_pcohctl);
-       uasm_i_lw(&p, t0, 0, r_pcohctl);
-
-       /* Sync to ensure previous interventions are complete */
-       uasm_i_sync(&p, stype_intervention);
-       uasm_i_ehb(&p);
+       if (mips_cm_revision() < CM_REV_CM3) {
+               /*
+               * Disable all but self interventions. The load from COHCTL is
+               * defined by the interAptiv & proAptiv SUMs as ensuring that the
+               *  operation resulting from the preceding store is complete.
+               */
+               uasm_i_addiu(&p, t0, zero, 1 << cpu_data[cpu].core);
+               uasm_i_sw(&p, t0, 0, r_pcohctl);
+               uasm_i_lw(&p, t0, 0, r_pcohctl);
+
+               /* Barrier to ensure write to coherence control is complete */
+               uasm_i_sync(&p, STYPE_SYNC);
+               uasm_i_ehb(&p);
+       }
 
        /* Disable coherence */
        uasm_i_sw(&p, zero, 0, r_pcohctl);
@@ -531,8 +538,8 @@ static void * __init cps_gen_entry_code(unsigned cpu, enum cps_pm_state state)
                        goto gen_done;
                }
 
-               /* Completion barrier */
-               uasm_i_sync(&p, stype_memory);
+               /* Barrier to ensure write to CPC command is complete */
+               uasm_i_sync(&p, STYPE_SYNC);
                uasm_i_ehb(&p);
        }
 
@@ -562,26 +569,29 @@ static void * __init cps_gen_entry_code(unsigned cpu, enum cps_pm_state state)
         * will run this. The first will actually re-enable coherence & the
         * rest will just be performing a rather unusual nop.
         */
-       uasm_i_addiu(&p, t0, zero, CM_GCR_Cx_COHERENCE_COHDOMAINEN_MSK);
+       uasm_i_addiu(&p, t0, zero, mips_cm_revision() < CM_REV_CM3
+                               ? CM_GCR_Cx_COHERENCE_COHDOMAINEN_MSK
+                               : CM3_GCR_Cx_COHERENCE_COHEN_MSK);
+
        uasm_i_sw(&p, t0, 0, r_pcohctl);
        uasm_i_lw(&p, t0, 0, r_pcohctl);
 
-       /* Completion barrier */
-       uasm_i_sync(&p, stype_memory);
+       /* Barrier to ensure write to coherence control is complete */
+       uasm_i_sync(&p, STYPE_SYNC);
        uasm_i_ehb(&p);
 
        if (coupled_coherence && (state == CPS_PM_NC_WAIT)) {
                /* Decrement ready_count */
                uasm_build_label(&l, p, lbl_decready);
-               uasm_i_sync(&p, stype_ordering);
+               uasm_i_sync(&p, STYPE_SYNC_MB);
                uasm_i_ll(&p, t1, 0, r_nc_count);
                uasm_i_addiu(&p, t2, t1, -1);
                uasm_i_sc(&p, t2, 0, r_nc_count);
                uasm_il_beqz(&p, &r, t2, lbl_decready);
                uasm_i_andi(&p, v0, t1, (1 << fls(smp_num_siblings)) - 1);
 
-               /* Ordering barrier */
-               uasm_i_sync(&p, stype_ordering);
+               /* Barrier ensuring all CPUs see the updated r_nc_count value */
+               uasm_i_sync(&p, STYPE_SYNC_MB);
        }
 
        if (coupled_coherence && (state == CPS_PM_CLOCK_GATED)) {
@@ -602,8 +612,8 @@ static void * __init cps_gen_entry_code(unsigned cpu, enum cps_pm_state state)
                 */
                uasm_build_label(&l, p, lbl_secondary_cont);
 
-               /* Ordering barrier */
-               uasm_i_sync(&p, stype_ordering);
+               /* Barrier ensuring all CPUs see the updated r_nc_count value */
+               uasm_i_sync(&p, STYPE_SYNC_MB);
        }
 
        /* The core is coherent, time to return to C code */
@@ -628,7 +638,7 @@ out_err:
        return NULL;
 }
 
-static int __init cps_gen_core_entries(unsigned cpu)
+static int cps_pm_online_cpu(unsigned int cpu)
 {
        enum cps_pm_state state;
        unsigned core = cpu_data[cpu].core;
@@ -670,29 +680,10 @@ static int __init cps_gen_core_entries(unsigned cpu)
 
 static int __init cps_pm_init(void)
 {
-       unsigned cpu;
-       int err;
-
-       /* Detect appropriate sync types for the system */
-       switch (current_cpu_data.cputype) {
-       case CPU_INTERAPTIV:
-       case CPU_PROAPTIV:
-       case CPU_M5150:
-       case CPU_P5600:
-       case CPU_I6400:
-               stype_intervention = 0x2;
-               stype_memory = 0x3;
-               stype_ordering = 0x10;
-               break;
-
-       default:
-               pr_warn("Power management is using heavyweight sync 0\n");
-       }
-
        /* A CM is required for all non-coherent states */
        if (!mips_cm_present()) {
                pr_warn("pm-cps: no CM, non-coherent states unavailable\n");
-               goto out;
+               return 0;
        }
 
        /*
@@ -722,12 +713,7 @@ static int __init cps_pm_init(void)
                pr_warn("pm-cps: no CPC, clock & power gating unavailable\n");
        }
 
-       for_each_present_cpu(cpu) {
-               err = cps_gen_core_entries(cpu);
-               if (err)
-                       return err;
-       }
-out:
-       return 0;
+       return cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "AP_PM_CPS_CPU_ONLINE",
+                                cps_pm_online_cpu, NULL);
 }
 arch_initcall(cps_pm_init);
diff --git a/arch/mips/kernel/probes-common.h b/arch/mips/kernel/probes-common.h
new file mode 100644 (file)
index 0000000..dd08e41
--- /dev/null
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2016 Imagination Technologies
+ * Author: Marcin Nowakowski <marcin.nowakowski@imgtec.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ */
+
+#ifndef __PROBES_COMMON_H
+#define __PROBES_COMMON_H
+
+#include <asm/inst.h>
+
+int __insn_is_compact_branch(union mips_instruction insn);
+
+static inline int __insn_has_delay_slot(const union mips_instruction insn)
+{
+       switch (insn.i_format.opcode) {
+       /*
+        * jr and jalr are in r_format format.
+        */
+       case spec_op:
+               switch (insn.r_format.func) {
+               case jalr_op:
+               case jr_op:
+                       return 1;
+               }
+               break;
+
+       /*
+        * This group contains:
+        * bltz_op, bgez_op, bltzl_op, bgezl_op,
+        * bltzal_op, bgezal_op, bltzall_op, bgezall_op.
+        */
+       case bcond_op:
+               switch (insn.i_format.rt) {
+               case bltz_op:
+               case bltzl_op:
+               case bgez_op:
+               case bgezl_op:
+               case bltzal_op:
+               case bltzall_op:
+               case bgezal_op:
+               case bgezall_op:
+               case bposge32_op:
+                       return 1;
+               }
+               break;
+
+       /*
+        * These are unconditional and in j_format.
+        */
+       case jal_op:
+       case j_op:
+       case beq_op:
+       case beql_op:
+       case bne_op:
+       case bnel_op:
+       case blez_op: /* not really i_format */
+       case blezl_op:
+       case bgtz_op:
+       case bgtzl_op:
+               return 1;
+
+       /*
+        * And now the FPA/cp1 branch instructions.
+        */
+       case cop1_op:
+#ifdef CONFIG_CPU_CAVIUM_OCTEON
+       case lwc2_op: /* This is bbit0 on Octeon */
+       case ldc2_op: /* This is bbit032 on Octeon */
+       case swc2_op: /* This is bbit1 on Octeon */
+       case sdc2_op: /* This is bbit132 on Octeon */
+#endif
+               return 1;
+       }
+
+       return 0;
+}
+
+#endif  /* __PROBES_COMMON_H */
index 97dc01b03631196252c118f592e58e7b41bca428..4eff2aed736019d6f071b00487d46bfdc76a7308 100644 (file)
@@ -135,6 +135,13 @@ static int show_cpuinfo(struct seq_file *m, void *v)
        seq_printf(m, "package\t\t\t: %d\n", cpu_data[n].package);
        seq_printf(m, "core\t\t\t: %d\n", cpu_data[n].core);
 
+#if defined(CONFIG_MIPS_MT_SMP) || defined(CONFIG_CPU_MIPSR6)
+       if (cpu_has_mipsmt)
+               seq_printf(m, "VPE\t\t\t: %d\n", cpu_data[n].vpe_id);
+       else if (cpu_has_vp)
+               seq_printf(m, "VP\t\t\t: %d\n", cpu_data[n].vpe_id);
+#endif
+
        sprintf(fmt, "VCE%%c exceptions\t\t: %s\n",
                      cpu_has_vce ? "%u" : "not available");
        seq_printf(m, fmt, 'D', vced_count);
index 6103b24d1bfcb781ae436ceb2a7c94ec7801df34..a92994d60e91e4b768ac6570012ad64c1c86885d 100644 (file)
@@ -79,16 +79,15 @@ void ptrace_disable(struct task_struct *child)
 }
 
 /*
- * Poke at FCSR according to its mask.  Don't set the cause bits as
- * this is currently not handled correctly in FP context restoration
- * and will cause an oops if a corresponding enable bit is set.
+ * Poke at FCSR according to its mask.  Set the Cause bits even
+ * if a corresponding Enable bit is set.  This will be noticed at
+ * the time the thread is switched to and SIGFPE thrown accordingly.
  */
 static void ptrace_setfcr31(struct task_struct *child, u32 value)
 {
        u32 fcr31;
        u32 mask;
 
-       value &= ~FPU_CSR_ALL_X;
        fcr31 = child->thread.fpu.fcr31;
        mask = boot_cpu_data.fpu_msk31;
        child->thread.fpu.fcr31 = (value & ~mask) | (fcr31 & mask);
@@ -817,6 +816,7 @@ long arch_ptrace(struct task_struct *child, long request,
                        break;
 #endif
                case FPC_CSR:
+                       init_fp_ctx(child);
                        ptrace_setfcr31(child, data);
                        break;
                case DSP_BASE ... DSP_BASE + 5: {
index 283b5a1967d1461298bf065b2073d62c6296dc1d..7e71a4e0281ba9cc3c2190dd9a06d958754155fb 100644 (file)
@@ -70,7 +70,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
                        break;
 
                copied = access_process_vm(child, (u64)addrOthers, &tmp,
-                               sizeof(tmp), 0);
+                               sizeof(tmp), FOLL_FORCE);
                if (copied != sizeof(tmp))
                        break;
                ret = put_user(tmp, (u32 __user *) (unsigned long) data);
@@ -179,7 +179,8 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
                        break;
                ret = 0;
                if (access_process_vm(child, (u64)addrOthers, &data,
-                                       sizeof(data), 1) == sizeof(data))
+                                       sizeof(data),
+                                       FOLL_FORCE | FOLL_WRITE) == sizeof(data))
                        break;
                ret = -EIO;
                break;
index b4ac6374a38f28e182b2558934ff979b88b33bb2..918f2f6d3861a87dccc85f5256bf9b89cc1d56d3 100644 (file)
 #include <asm/regdef.h>
 
 #define EX(a,b)                                                        \
+9:     a,##b;                                                  \
+       .section __ex_table,"a";                                \
+       PTR     9b,fault;                                       \
+       .previous
+
+#define EX2(a,b)                                               \
 9:     a,##b;                                                  \
        .section __ex_table,"a";                                \
        PTR     9b,bad_stack;                                   \
+       PTR     9b+4,bad_stack;                                 \
        .previous
 
        .set    noreorder
        .set    mips1
-       /* Save floating point context */
+
+/**
+ * _save_fp_context() - save FP context from the FPU
+ * @a0 - pointer to fpregs field of sigcontext
+ * @a1 - pointer to fpc_csr field of sigcontext
+ *
+ * Save FP context, including the 32 FP data registers and the FP
+ * control & status register, from the FPU to signal context.
+ */
 LEAF(_save_fp_context)
        .set    push
        SET_HARDFLOAT
        li      v0, 0                                   # assume success
-       cfc1    t1,fcr31
-       EX(swc1 $f0,(SC_FPREGS+0)(a0))
-       EX(swc1 $f1,(SC_FPREGS+8)(a0))
-       EX(swc1 $f2,(SC_FPREGS+16)(a0))
-       EX(swc1 $f3,(SC_FPREGS+24)(a0))
-       EX(swc1 $f4,(SC_FPREGS+32)(a0))
-       EX(swc1 $f5,(SC_FPREGS+40)(a0))
-       EX(swc1 $f6,(SC_FPREGS+48)(a0))
-       EX(swc1 $f7,(SC_FPREGS+56)(a0))
-       EX(swc1 $f8,(SC_FPREGS+64)(a0))
-       EX(swc1 $f9,(SC_FPREGS+72)(a0))
-       EX(swc1 $f10,(SC_FPREGS+80)(a0))
-       EX(swc1 $f11,(SC_FPREGS+88)(a0))
-       EX(swc1 $f12,(SC_FPREGS+96)(a0))
-       EX(swc1 $f13,(SC_FPREGS+104)(a0))
-       EX(swc1 $f14,(SC_FPREGS+112)(a0))
-       EX(swc1 $f15,(SC_FPREGS+120)(a0))
-       EX(swc1 $f16,(SC_FPREGS+128)(a0))
-       EX(swc1 $f17,(SC_FPREGS+136)(a0))
-       EX(swc1 $f18,(SC_FPREGS+144)(a0))
-       EX(swc1 $f19,(SC_FPREGS+152)(a0))
-       EX(swc1 $f20,(SC_FPREGS+160)(a0))
-       EX(swc1 $f21,(SC_FPREGS+168)(a0))
-       EX(swc1 $f22,(SC_FPREGS+176)(a0))
-       EX(swc1 $f23,(SC_FPREGS+184)(a0))
-       EX(swc1 $f24,(SC_FPREGS+192)(a0))
-       EX(swc1 $f25,(SC_FPREGS+200)(a0))
-       EX(swc1 $f26,(SC_FPREGS+208)(a0))
-       EX(swc1 $f27,(SC_FPREGS+216)(a0))
-       EX(swc1 $f28,(SC_FPREGS+224)(a0))
-       EX(swc1 $f29,(SC_FPREGS+232)(a0))
-       EX(swc1 $f30,(SC_FPREGS+240)(a0))
-       EX(swc1 $f31,(SC_FPREGS+248)(a0))
-       EX(sw   t1,(SC_FPC_CSR)(a0))
-       cfc1    t0,$0                           # implementation/version
+       cfc1    t1, fcr31
+       EX2(s.d $f0, 0(a0))
+       EX2(s.d $f2, 16(a0))
+       EX2(s.d $f4, 32(a0))
+       EX2(s.d $f6, 48(a0))
+       EX2(s.d $f8, 64(a0))
+       EX2(s.d $f10, 80(a0))
+       EX2(s.d $f12, 96(a0))
+       EX2(s.d $f14, 112(a0))
+       EX2(s.d $f16, 128(a0))
+       EX2(s.d $f18, 144(a0))
+       EX2(s.d $f20, 160(a0))
+       EX2(s.d $f22, 176(a0))
+       EX2(s.d $f24, 192(a0))
+       EX2(s.d $f26, 208(a0))
+       EX2(s.d $f28, 224(a0))
+       EX2(s.d $f30, 240(a0))
        jr      ra
+        EX(sw  t1, (a1))
        .set    pop
-       .set    nomacro
-        EX(sw  t0,(SC_FPC_EIR)(a0))
-       .set    macro
        END(_save_fp_context)
 
-/*
- * Restore FPU state:
- *  - fp gp registers
- *  - cp1 status/control register
+/**
+ * _restore_fp_context() - restore FP context to the FPU
+ * @a0 - pointer to fpregs field of sigcontext
+ * @a1 - pointer to fpc_csr field of sigcontext
  *
- * We base the decision which registers to restore from the signal stack
- * frame on the current content of c0_status, not on the content of the
- * stack frame which might have been changed by the user.
+ * Restore FP context, including the 32 FP data registers and the FP
+ * control & status register, from signal context to the FPU.
  */
 LEAF(_restore_fp_context)
        .set    push
        SET_HARDFLOAT
        li      v0, 0                                   # assume success
-       EX(lw t0,(SC_FPC_CSR)(a0))
-       EX(lwc1 $f0,(SC_FPREGS+0)(a0))
-       EX(lwc1 $f1,(SC_FPREGS+8)(a0))
-       EX(lwc1 $f2,(SC_FPREGS+16)(a0))
-       EX(lwc1 $f3,(SC_FPREGS+24)(a0))
-       EX(lwc1 $f4,(SC_FPREGS+32)(a0))
-       EX(lwc1 $f5,(SC_FPREGS+40)(a0))
-       EX(lwc1 $f6,(SC_FPREGS+48)(a0))
-       EX(lwc1 $f7,(SC_FPREGS+56)(a0))
-       EX(lwc1 $f8,(SC_FPREGS+64)(a0))
-       EX(lwc1 $f9,(SC_FPREGS+72)(a0))
-       EX(lwc1 $f10,(SC_FPREGS+80)(a0))
-       EX(lwc1 $f11,(SC_FPREGS+88)(a0))
-       EX(lwc1 $f12,(SC_FPREGS+96)(a0))
-       EX(lwc1 $f13,(SC_FPREGS+104)(a0))
-       EX(lwc1 $f14,(SC_FPREGS+112)(a0))
-       EX(lwc1 $f15,(SC_FPREGS+120)(a0))
-       EX(lwc1 $f16,(SC_FPREGS+128)(a0))
-       EX(lwc1 $f17,(SC_FPREGS+136)(a0))
-       EX(lwc1 $f18,(SC_FPREGS+144)(a0))
-       EX(lwc1 $f19,(SC_FPREGS+152)(a0))
-       EX(lwc1 $f20,(SC_FPREGS+160)(a0))
-       EX(lwc1 $f21,(SC_FPREGS+168)(a0))
-       EX(lwc1 $f22,(SC_FPREGS+176)(a0))
-       EX(lwc1 $f23,(SC_FPREGS+184)(a0))
-       EX(lwc1 $f24,(SC_FPREGS+192)(a0))
-       EX(lwc1 $f25,(SC_FPREGS+200)(a0))
-       EX(lwc1 $f26,(SC_FPREGS+208)(a0))
-       EX(lwc1 $f27,(SC_FPREGS+216)(a0))
-       EX(lwc1 $f28,(SC_FPREGS+224)(a0))
-       EX(lwc1 $f29,(SC_FPREGS+232)(a0))
-       EX(lwc1 $f30,(SC_FPREGS+240)(a0))
-       EX(lwc1 $f31,(SC_FPREGS+248)(a0))
+       EX(lw t0, (a1))
+       EX2(l.d $f0, 0(a0))
+       EX2(l.d $f2, 16(a0))
+       EX2(l.d $f4, 32(a0))
+       EX2(l.d $f6, 48(a0))
+       EX2(l.d $f8, 64(a0))
+       EX2(l.d $f10, 80(a0))
+       EX2(l.d $f12, 96(a0))
+       EX2(l.d $f14, 112(a0))
+       EX2(l.d $f16, 128(a0))
+       EX2(l.d $f18, 144(a0))
+       EX2(l.d $f20, 160(a0))
+       EX2(l.d $f22, 176(a0))
+       EX2(l.d $f24, 192(a0))
+       EX2(l.d $f26, 208(a0))
+       EX2(l.d $f28, 224(a0))
+       EX2(l.d $f30, 240(a0))
        jr      ra
-        ctc1   t0,fcr31
+        ctc1   t0, fcr31
        .set    pop
        END(_restore_fp_context)
        .set    reorder
index 47077380c15c43aca685a3aacbea837f8cc42a65..9cc7bfab3419a80e02ae344b6bd154e154370864 100644 (file)
        .set    push
        SET_HARDFLOAT
 
-       /* Save floating point context */
+/**
+ * _save_fp_context() - save FP context from the FPU
+ * @a0 - pointer to fpregs field of sigcontext
+ * @a1 - pointer to fpc_csr field of sigcontext
+ *
+ * Save FP context, including the 32 FP data registers and the FP
+ * control & status register, from the FPU to signal context.
+ */
        LEAF(_save_fp_context)
        mfc0    t0,CP0_STATUS
        sll     t0,t0,2
 
        cfc1    t1,fcr31
        /* Store the 16 double precision registers */
-       sdc1    $f0,(SC_FPREGS+0)(a0)
-       sdc1    $f2,(SC_FPREGS+16)(a0)
-       sdc1    $f4,(SC_FPREGS+32)(a0)
-       sdc1    $f6,(SC_FPREGS+48)(a0)
-       sdc1    $f8,(SC_FPREGS+64)(a0)
-       sdc1    $f10,(SC_FPREGS+80)(a0)
-       sdc1    $f12,(SC_FPREGS+96)(a0)
-       sdc1    $f14,(SC_FPREGS+112)(a0)
-       sdc1    $f16,(SC_FPREGS+128)(a0)
-       sdc1    $f18,(SC_FPREGS+144)(a0)
-       sdc1    $f20,(SC_FPREGS+160)(a0)
-       sdc1    $f22,(SC_FPREGS+176)(a0)
-       sdc1    $f24,(SC_FPREGS+192)(a0)
-       sdc1    $f26,(SC_FPREGS+208)(a0)
-       sdc1    $f28,(SC_FPREGS+224)(a0)
-       sdc1    $f30,(SC_FPREGS+240)(a0)
+       sdc1    $f0,0(a0)
+       sdc1    $f2,16(a0)
+       sdc1    $f4,32(a0)
+       sdc1    $f6,48(a0)
+       sdc1    $f8,64(a0)
+       sdc1    $f10,80(a0)
+       sdc1    $f12,96(a0)
+       sdc1    $f14,112(a0)
+       sdc1    $f16,128(a0)
+       sdc1    $f18,144(a0)
+       sdc1    $f20,160(a0)
+       sdc1    $f22,176(a0)
+       sdc1    $f24,192(a0)
+       sdc1    $f26,208(a0)
+       sdc1    $f28,224(a0)
+       sdc1    $f30,240(a0)
        jr      ra
-        sw     t0,SC_FPC_CSR(a0)
+        sw     t0,(a1)
 1:     jr      ra
         nop
        END(_save_fp_context)
 
-/* Restore FPU state:
- *  - fp gp registers
- *  - cp1 status/control register
+/**
+ * _restore_fp_context() - restore FP context to the FPU
+ * @a0 - pointer to fpregs field of sigcontext
+ * @a1 - pointer to fpc_csr field of sigcontext
  *
- * We base the decision which registers to restore from the signal stack
- * frame on the current content of c0_status, not on the content of the
- * stack frame which might have been changed by the user.
+ * Restore FP context, including the 32 FP data registers and the FP
+ * control & status register, from signal context to the FPU.
  */
        LEAF(_restore_fp_context)
        mfc0    t0,CP0_STATUS
        sll     t0,t0,2
 
        bgez    t0,1f
-        lw     t0,SC_FPC_CSR(a0)
+        lw     t0,(a1)
        /* Restore the 16 double precision registers */
-       ldc1    $f0,(SC_FPREGS+0)(a0)
-       ldc1    $f2,(SC_FPREGS+16)(a0)
-       ldc1    $f4,(SC_FPREGS+32)(a0)
-       ldc1    $f6,(SC_FPREGS+48)(a0)
-       ldc1    $f8,(SC_FPREGS+64)(a0)
-       ldc1    $f10,(SC_FPREGS+80)(a0)
-       ldc1    $f12,(SC_FPREGS+96)(a0)
-       ldc1    $f14,(SC_FPREGS+112)(a0)
-       ldc1    $f16,(SC_FPREGS+128)(a0)
-       ldc1    $f18,(SC_FPREGS+144)(a0)
-       ldc1    $f20,(SC_FPREGS+160)(a0)
-       ldc1    $f22,(SC_FPREGS+176)(a0)
-       ldc1    $f24,(SC_FPREGS+192)(a0)
-       ldc1    $f26,(SC_FPREGS+208)(a0)
-       ldc1    $f28,(SC_FPREGS+224)(a0)
-       ldc1    $f30,(SC_FPREGS+240)(a0)
+       ldc1    $f0,0(a0)
+       ldc1    $f2,16(a0)
+       ldc1    $f4,32(a0)
+       ldc1    $f6,48(a0)
+       ldc1    $f8,64(a0)
+       ldc1    $f10,80(a0)
+       ldc1    $f12,96(a0)
+       ldc1    $f14,112(a0)
+       ldc1    $f16,128(a0)
+       ldc1    $f18,144(a0)
+       ldc1    $f20,160(a0)
+       ldc1    $f22,176(a0)
+       ldc1    $f24,192(a0)
+       ldc1    $f26,208(a0)
+       ldc1    $f28,224(a0)
+       ldc1    $f30,240(a0)
        jr      ra
         ctc1   t0,fcr31
 1:     jr      ra
index ca1cc30c0891f7a94a6cb0f0e9bf4941fc817fd2..1958910b75c07aa18926df5d50f93987dbe58406 100644 (file)
@@ -200,7 +200,7 @@ static inline __init unsigned long get_random_boot(void)
 
 #if defined(CONFIG_USE_OF)
        /* Get any additional entropy passed in device tree */
-       {
+       if (initial_boot_params) {
                int node, len;
                u64 *prop;
 
index c8e43e0c4066b599fd24b1135eebac0129252d17..c29d397eee86cf48a05d7945efe3cdf4a38a7273 100644 (file)
@@ -597,3 +597,6 @@ EXPORT(sys_call_table)
        PTR     sys_copy_file_range             /* 4360 */
        PTR     sys_preadv2
        PTR     sys_pwritev2
+       PTR     sys_pkey_mprotect
+       PTR     sys_pkey_alloc
+       PTR     sys_pkey_free                   /* 4365 */
index e6ede125059fe87253edb0bd5a5d9e46c8ef0fff..0687f96ee912698285a92abde87a7376897f076b 100644 (file)
@@ -435,4 +435,7 @@ EXPORT(sys_call_table)
        PTR     sys_copy_file_range             /* 5320 */
        PTR     sys_preadv2
        PTR     sys_pwritev2
+       PTR     sys_pkey_mprotect
+       PTR     sys_pkey_alloc
+       PTR     sys_pkey_free                   /* 5325 */
        .size   sys_call_table,.-sys_call_table
index 51d3988933f831bfe8dd7be7282ecd3174b22a89..0331ba39a065b8530818093d7b707921242a3672 100644 (file)
@@ -430,4 +430,7 @@ EXPORT(sysn32_call_table)
        PTR     sys_copy_file_range
        PTR     compat_sys_preadv2              /* 6325 */
        PTR     compat_sys_pwritev2
+       PTR     sys_pkey_mprotect
+       PTR     sys_pkey_alloc
+       PTR     sys_pkey_free
        .size   sysn32_call_table,.-sysn32_call_table
index 6efa7136748f6d72f1f5ea47e43e28eae87e59ea..5a47042dd25f7ae7f93cee8a596f311bf17a9382 100644 (file)
@@ -585,4 +585,7 @@ EXPORT(sys32_call_table)
        PTR     sys_copy_file_range             /* 4360 */
        PTR     compat_sys_preadv2
        PTR     compat_sys_pwritev2
+       PTR     sys_pkey_mprotect
+       PTR     sys_pkey_alloc
+       PTR     sys_pkey_free                   /* 4365 */
        .size   sys32_call_table,.-sys32_call_table
index 0d57909d90261d69790156614394bce5325c5bc0..f66e5ce505b23db0e666adb727653df71104c5d1 100644 (file)
@@ -368,6 +368,19 @@ static void __init bootmem_init(void)
                end = PFN_DOWN(boot_mem_map.map[i].addr
                                + boot_mem_map.map[i].size);
 
+#ifndef CONFIG_HIGHMEM
+               /*
+                * Skip highmem here so we get an accurate max_low_pfn if low
+                * memory stops short of high memory.
+                * If the region overlaps HIGHMEM_START, end is clipped so
+                * max_pfn excludes the highmem portion.
+                */
+               if (start >= PFN_DOWN(HIGHMEM_START))
+                       continue;
+               if (end > PFN_DOWN(HIGHMEM_START))
+                       end = PFN_DOWN(HIGHMEM_START);
+#endif
+
                if (end > max_low_pfn)
                        max_low_pfn = end;
                if (start < min_low_pfn)
diff --git a/arch/mips/kernel/smp-gic.c b/arch/mips/kernel/smp-gic.c
deleted file mode 100644 (file)
index 9b63829..0000000
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Copyright (C) 2013 Imagination Technologies
- * Author: Paul Burton <paul.burton@imgtec.com>
- *
- * Based on smp-cmp.c:
- *  Copyright (C) 2007 MIPS Technologies, Inc.
- *  Author: Chris Dearman (chris@mips.com)
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- */
-
-#include <linux/irqchip/mips-gic.h>
-#include <linux/printk.h>
-
-#include <asm/mips-cpc.h>
-#include <asm/smp-ops.h>
-
-void gic_send_ipi_single(int cpu, unsigned int action)
-{
-       unsigned long flags;
-       unsigned int intr;
-       unsigned int core = cpu_data[cpu].core;
-
-       pr_debug("CPU%d: %s cpu %d action %u status %08x\n",
-                smp_processor_id(), __func__, cpu, action, read_c0_status());
-
-       local_irq_save(flags);
-
-       switch (action) {
-       case SMP_CALL_FUNCTION:
-               intr = plat_ipi_call_int_xlate(cpu);
-               break;
-
-       case SMP_RESCHEDULE_YOURSELF:
-               intr = plat_ipi_resched_int_xlate(cpu);
-               break;
-
-       default:
-               BUG();
-       }
-
-       gic_send_ipi(intr);
-
-       if (mips_cpc_present() && (core != current_cpu_data.core)) {
-               while (!cpumask_test_cpu(cpu, &cpu_coherent_mask)) {
-                       mips_cm_lock_other(core, 0);
-                       mips_cpc_lock_other(core);
-                       write_cpc_co_cmd(CPC_Cx_CMD_PWRUP);
-                       mips_cpc_unlock_other();
-                       mips_cm_unlock_other();
-               }
-       }
-
-       local_irq_restore(flags);
-}
-
-void gic_send_ipi_mask(const struct cpumask *mask, unsigned int action)
-{
-       unsigned int i;
-
-       for_each_cpu(i, mask)
-               gic_send_ipi_single(i, action);
-}
index 4f9570a57e8d8a354f48502f92d2d82b5907e3d3..e077ea3e11fb36ee2d5f85f7e8415c97eeead1d9 100644 (file)
@@ -289,26 +289,3 @@ struct plat_smp_ops vsmp_smp_ops = {
        .prepare_cpus           = vsmp_prepare_cpus,
 };
 
-#ifdef CONFIG_PROC_FS
-static int proc_cpuinfo_chain_call(struct notifier_block *nfb,
-       unsigned long action_unused, void *data)
-{
-       struct proc_cpuinfo_notifier_args *pcn = data;
-       struct seq_file *m = pcn->m;
-       unsigned long n = pcn->n;
-
-       if (!cpu_has_mipsmt)
-               return NOTIFY_OK;
-
-       seq_printf(m, "VPE\t\t\t: %d\n", cpu_data[n].vpe_id);
-
-       return NOTIFY_OK;
-}
-
-static int __init proc_cpuinfo_notifier_init(void)
-{
-       return proc_cpuinfo_notifier(proc_cpuinfo_chain_call, 0);
-}
-
-subsys_initcall(proc_cpuinfo_notifier_init);
-#endif
index b0baf48951faabffac7a53fde37dc79748dea0ac..7ebb1918e2ac8abb5a2f8bff3b7d0f51cd126ee8 100644 (file)
@@ -25,7 +25,7 @@
 #include <linux/smp.h>
 #include <linux/spinlock.h>
 #include <linux/threads.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/time.h>
 #include <linux/timex.h>
 #include <linux/sched.h>
@@ -192,9 +192,11 @@ void mips_smp_send_ipi_mask(const struct cpumask *mask, unsigned int action)
                                continue;
 
                        while (!cpumask_test_cpu(cpu, &cpu_coherent_mask)) {
+                               mips_cm_lock_other(core, 0);
                                mips_cpc_lock_other(core);
                                write_cpc_co_cmd(CPC_Cx_CMD_PWRUP);
                                mips_cpc_unlock_other();
+                               mips_cm_unlock_other();
                        }
                }
        }
@@ -229,7 +231,7 @@ static struct irqaction irq_call = {
        .name           = "IPI call"
 };
 
-static __init void smp_ipi_init_one(unsigned int virq,
+static void smp_ipi_init_one(unsigned int virq,
                                    struct irqaction *action)
 {
        int ret;
@@ -239,9 +241,11 @@ static __init void smp_ipi_init_one(unsigned int virq,
        BUG_ON(ret);
 }
 
-static int __init mips_smp_ipi_init(void)
+static unsigned int call_virq, sched_virq;
+
+int mips_smp_ipi_allocate(const struct cpumask *mask)
 {
-       unsigned int call_virq, sched_virq;
+       int virq;
        struct irq_domain *ipidomain;
        struct device_node *node;
 
@@ -268,16 +272,20 @@ static int __init mips_smp_ipi_init(void)
        if (!ipidomain)
                return 0;
 
-       call_virq = irq_reserve_ipi(ipidomain, cpu_possible_mask);
-       BUG_ON(!call_virq);
+       virq = irq_reserve_ipi(ipidomain, mask);
+       BUG_ON(!virq);
+       if (!call_virq)
+               call_virq = virq;
 
-       sched_virq = irq_reserve_ipi(ipidomain, cpu_possible_mask);
-       BUG_ON(!sched_virq);
+       virq = irq_reserve_ipi(ipidomain, mask);
+       BUG_ON(!virq);
+       if (!sched_virq)
+               sched_virq = virq;
 
        if (irq_domain_is_ipi_per_cpu(ipidomain)) {
                int cpu;
 
-               for_each_cpu(cpu, cpu_possible_mask) {
+               for_each_cpu(cpu, mask) {
                        smp_ipi_init_one(call_virq + cpu, &irq_call);
                        smp_ipi_init_one(sched_virq + cpu, &irq_resched);
                }
@@ -286,6 +294,45 @@ static int __init mips_smp_ipi_init(void)
                smp_ipi_init_one(sched_virq, &irq_resched);
        }
 
+       return 0;
+}
+
+int mips_smp_ipi_free(const struct cpumask *mask)
+{
+       struct irq_domain *ipidomain;
+       struct device_node *node;
+
+       node = of_irq_find_parent(of_root);
+       ipidomain = irq_find_matching_host(node, DOMAIN_BUS_IPI);
+
+       /*
+        * Some platforms have half DT setup. So if we found irq node but
+        * didn't find an ipidomain, try to search for one that is not in the
+        * DT.
+        */
+       if (node && !ipidomain)
+               ipidomain = irq_find_matching_host(NULL, DOMAIN_BUS_IPI);
+
+       BUG_ON(!ipidomain);
+
+       if (irq_domain_is_ipi_per_cpu(ipidomain)) {
+               int cpu;
+
+               for_each_cpu(cpu, mask) {
+                       remove_irq(call_virq + cpu, &irq_call);
+                       remove_irq(sched_virq + cpu, &irq_resched);
+               }
+       }
+       irq_destroy_ipi(call_virq, mask);
+       irq_destroy_ipi(sched_virq, mask);
+       return 0;
+}
+
+
+static int __init mips_smp_ipi_init(void)
+{
+       mips_smp_ipi_allocate(cpu_possible_mask);
+
        call_desc = irq_to_desc(call_virq);
        sched_desc = irq_to_desc(sched_virq);
 
index 3de85be2486a44d5fbaedb64a556b3e9bf879210..3905003dfe2b918faa7acd39f350710c9ed6edd0 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/extable.h>
 #include <linux/mm.h>
 #include <linux/sched.h>
 #include <linux/smp.h>
@@ -48,6 +49,7 @@
 #include <asm/fpu.h>
 #include <asm/fpu_emulator.h>
 #include <asm/idle.h>
+#include <asm/mips-cm.h>
 #include <asm/mips-r2-to-r6-emul.h>
 #include <asm/mipsregs.h>
 #include <asm/mipsmtregs.h>
@@ -154,7 +156,7 @@ static void show_backtrace(struct task_struct *task, const struct pt_regs *regs)
                print_ip_sym(pc);
                pc = unwind_stack(task, &sp, pc, &ra);
        } while (pc);
-       printk("\n");
+       pr_cont("\n");
 }
 
 /*
@@ -172,22 +174,24 @@ static void show_stacktrace(struct task_struct *task,
        printk("Stack :");
        i = 0;
        while ((unsigned long) sp & (PAGE_SIZE - 1)) {
-               if (i && ((i % (64 / field)) == 0))
-                       printk("\n       ");
+               if (i && ((i % (64 / field)) == 0)) {
+                       pr_cont("\n");
+                       printk("       ");
+               }
                if (i > 39) {
-                       printk(" ...");
+                       pr_cont(" ...");
                        break;
                }
 
                if (__get_user(stackdata, sp++)) {
-                       printk(" (Bad stack address)");
+                       pr_cont(" (Bad stack address)");
                        break;
                }
 
-               printk(" %0*lx", field, stackdata);
+               pr_cont(" %0*lx", field, stackdata);
                i++;
        }
-       printk("\n");
+       pr_cont("\n");
        show_backtrace(task, regs);
 }
 
@@ -227,18 +231,19 @@ static void show_code(unsigned int __user *pc)
        long i;
        unsigned short __user *pc16 = NULL;
 
-       printk("\nCode:");
+       printk("Code:");
 
        if ((unsigned long)pc & 1)
                pc16 = (unsigned short __user *)((unsigned long)pc & ~1);
        for(i = -3 ; i < 6 ; i++) {
                unsigned int insn;
                if (pc16 ? __get_user(insn, pc16 + i) : __get_user(insn, pc + i)) {
-                       printk(" (Bad address in epc)\n");
+                       pr_cont(" (Bad address in epc)\n");
                        break;
                }
-               printk("%c%0*x%c", (i?' ':'<'), pc16 ? 4 : 8, insn, (i?' ':'>'));
+               pr_cont("%c%0*x%c", (i?' ':'<'), pc16 ? 4 : 8, insn, (i?' ':'>'));
        }
+       pr_cont("\n");
 }
 
 static void __show_regs(const struct pt_regs *regs)
@@ -257,15 +262,15 @@ static void __show_regs(const struct pt_regs *regs)
                if ((i % 4) == 0)
                        printk("$%2d   :", i);
                if (i == 0)
-                       printk(" %0*lx", field, 0UL);
+                       pr_cont(" %0*lx", field, 0UL);
                else if (i == 26 || i == 27)
-                       printk(" %*s", field, "");
+                       pr_cont(" %*s", field, "");
                else
-                       printk(" %0*lx", field, regs->regs[i]);
+                       pr_cont(" %0*lx", field, regs->regs[i]);
 
                i++;
                if ((i % 4) == 0)
-                       printk("\n");
+                       pr_cont("\n");
        }
 
 #ifdef CONFIG_CPU_HAS_SMARTMIPS
@@ -286,46 +291,46 @@ static void __show_regs(const struct pt_regs *regs)
 
        if (cpu_has_3kex) {
                if (regs->cp0_status & ST0_KUO)
-                       printk("KUo ");
+                       pr_cont("KUo ");
                if (regs->cp0_status & ST0_IEO)
-                       printk("IEo ");
+                       pr_cont("IEo ");
                if (regs->cp0_status & ST0_KUP)
-                       printk("KUp ");
+                       pr_cont("KUp ");
                if (regs->cp0_status & ST0_IEP)
-                       printk("IEp ");
+                       pr_cont("IEp ");
                if (regs->cp0_status & ST0_KUC)
-                       printk("KUc ");
+                       pr_cont("KUc ");
                if (regs->cp0_status & ST0_IEC)
-                       printk("IEc ");
+                       pr_cont("IEc ");
        } else if (cpu_has_4kex) {
                if (regs->cp0_status & ST0_KX)
-                       printk("KX ");
+                       pr_cont("KX ");
                if (regs->cp0_status & ST0_SX)
-                       printk("SX ");
+                       pr_cont("SX ");
                if (regs->cp0_status & ST0_UX)
-                       printk("UX ");
+                       pr_cont("UX ");
                switch (regs->cp0_status & ST0_KSU) {
                case KSU_USER:
-                       printk("USER ");
+                       pr_cont("USER ");
                        break;
                case KSU_SUPERVISOR:
-                       printk("SUPERVISOR ");
+                       pr_cont("SUPERVISOR ");
                        break;
                case KSU_KERNEL:
-                       printk("KERNEL ");
+                       pr_cont("KERNEL ");
                        break;
                default:
-                       printk("BAD_MODE ");
+                       pr_cont("BAD_MODE ");
                        break;
                }
                if (regs->cp0_status & ST0_ERL)
-                       printk("ERL ");
+                       pr_cont("ERL ");
                if (regs->cp0_status & ST0_EXL)
-                       printk("EXL ");
+                       pr_cont("EXL ");
                if (regs->cp0_status & ST0_IE)
-                       printk("IE ");
+                       pr_cont("IE ");
        }
-       printk("\n");
+       pr_cont("\n");
 
        exccode = (cause & CAUSEF_EXCCODE) >> CAUSEB_EXCCODE;
        printk("Cause : %08x (ExcCode %02x)\n", cause, exccode);
@@ -444,6 +449,8 @@ asmlinkage void do_be(struct pt_regs *regs)
 
        if (board_be_handler)
                action = board_be_handler(regs, fixup != NULL);
+       else
+               mips_cm_error_report();
 
        switch (action) {
        case MIPS_BE_DISCARD:
@@ -701,6 +708,32 @@ asmlinkage void do_ov(struct pt_regs *regs)
        exception_exit(prev_state);
 }
 
+/*
+ * Send SIGFPE according to FCSR Cause bits, which must have already
+ * been masked against Enable bits.  This is impotant as Inexact can
+ * happen together with Overflow or Underflow, and `ptrace' can set
+ * any bits.
+ */
+void force_fcr31_sig(unsigned long fcr31, void __user *fault_addr,
+                    struct task_struct *tsk)
+{
+       struct siginfo si = { .si_addr = fault_addr, .si_signo = SIGFPE };
+
+       if (fcr31 & FPU_CSR_INV_X)
+               si.si_code = FPE_FLTINV;
+       else if (fcr31 & FPU_CSR_DIV_X)
+               si.si_code = FPE_FLTDIV;
+       else if (fcr31 & FPU_CSR_OVF_X)
+               si.si_code = FPE_FLTOVF;
+       else if (fcr31 & FPU_CSR_UDF_X)
+               si.si_code = FPE_FLTUND;
+       else if (fcr31 & FPU_CSR_INE_X)
+               si.si_code = FPE_FLTRES;
+       else
+               si.si_code = __SI_FAULT;
+       force_sig_info(SIGFPE, &si, tsk);
+}
+
 int process_fpemu_return(int sig, void __user *fault_addr, unsigned long fcr31)
 {
        struct siginfo si = { 0 };
@@ -711,27 +744,7 @@ int process_fpemu_return(int sig, void __user *fault_addr, unsigned long fcr31)
                return 0;
 
        case SIGFPE:
-               si.si_addr = fault_addr;
-               si.si_signo = sig;
-               /*
-                * Inexact can happen together with Overflow or Underflow.
-                * Respect the mask to deliver the correct exception.
-                */
-               fcr31 &= (fcr31 & FPU_CSR_ALL_E) <<
-                        (ffs(FPU_CSR_ALL_X) - ffs(FPU_CSR_ALL_E));
-               if (fcr31 & FPU_CSR_INV_X)
-                       si.si_code = FPE_FLTINV;
-               else if (fcr31 & FPU_CSR_DIV_X)
-                       si.si_code = FPE_FLTDIV;
-               else if (fcr31 & FPU_CSR_OVF_X)
-                       si.si_code = FPE_FLTOVF;
-               else if (fcr31 & FPU_CSR_UDF_X)
-                       si.si_code = FPE_FLTUND;
-               else if (fcr31 & FPU_CSR_INE_X)
-                       si.si_code = FPE_FLTRES;
-               else
-                       si.si_code = __SI_FAULT;
-               force_sig_info(sig, &si, current);
+               force_fcr31_sig(fcr31, fault_addr, current);
                return 1;
 
        case SIGBUS:
@@ -795,13 +808,13 @@ static int simulate_fp(struct pt_regs *regs, unsigned int opcode,
        /* Run the emulator */
        sig = fpu_emulator_cop1Handler(regs, &current->thread.fpu, 1,
                                       &fault_addr);
-       fcr31 = current->thread.fpu.fcr31;
 
        /*
-        * We can't allow the emulated instruction to leave any of
-        * the cause bits set in $fcr31.
+        * We can't allow the emulated instruction to leave any
+        * enabled Cause bits set in $fcr31.
         */
-       current->thread.fpu.fcr31 &= ~FPU_CSR_ALL_X;
+       fcr31 = mask_fcr31_x(current->thread.fpu.fcr31);
+       current->thread.fpu.fcr31 &= ~fcr31;
 
        /* Restore the hardware register state */
        own_fpu(1);
@@ -827,7 +840,7 @@ asmlinkage void do_fpe(struct pt_regs *regs, unsigned long fcr31)
                goto out;
 
        /* Clear FCSR.Cause before enabling interrupts */
-       write_32bit_cp1_register(CP1_STATUS, fcr31 & ~FPU_CSR_ALL_X);
+       write_32bit_cp1_register(CP1_STATUS, fcr31 & ~mask_fcr31_x(fcr31));
        local_irq_enable();
 
        die_if_kernel("FP exception in kernel code", regs);
@@ -849,13 +862,13 @@ asmlinkage void do_fpe(struct pt_regs *regs, unsigned long fcr31)
                /* Run the emulator */
                sig = fpu_emulator_cop1Handler(regs, &current->thread.fpu, 1,
                                               &fault_addr);
-               fcr31 = current->thread.fpu.fcr31;
 
                /*
-                * We can't allow the emulated instruction to leave any of
-                * the cause bits set in $fcr31.
+                * We can't allow the emulated instruction to leave any
+                * enabled Cause bits set in $fcr31.
                 */
-               current->thread.fpu.fcr31 &= ~FPU_CSR_ALL_X;
+               fcr31 = mask_fcr31_x(current->thread.fpu.fcr31);
+               current->thread.fpu.fcr31 &= ~fcr31;
 
                /* Restore the hardware register state */
                own_fpu(1);     /* Using the FPU again.  */
@@ -1420,13 +1433,13 @@ asmlinkage void do_cpu(struct pt_regs *regs)
 
                sig = fpu_emulator_cop1Handler(regs, &current->thread.fpu, 0,
                                               &fault_addr);
-               fcr31 = current->thread.fpu.fcr31;
 
                /*
                 * We can't allow the emulated instruction to leave
-                * any of the cause bits set in $fcr31.
+                * any enabled Cause bits set in $fcr31.
                 */
-               current->thread.fpu.fcr31 &= ~FPU_CSR_ALL_X;
+               fcr31 = mask_fcr31_x(current->thread.fpu.fcr31);
+               current->thread.fpu.fcr31 &= ~fcr31;
 
                /* Send a signal if required.  */
                if (!process_fpemu_return(sig, fault_addr, fcr31) && !err)
@@ -2091,6 +2104,14 @@ static void configure_exception_vector(void)
 {
        if (cpu_has_veic || cpu_has_vint) {
                unsigned long sr = set_c0_status(ST0_BEV);
+               /* If available, use WG to set top bits of EBASE */
+               if (cpu_has_ebase_wg) {
+#ifdef CONFIG_64BIT
+                       write_c0_ebase_64(ebase | MIPS_EBASE_WG);
+#else
+                       write_c0_ebase(ebase | MIPS_EBASE_WG);
+#endif
+               }
                write_c0_ebase(ebase);
                write_c0_status(sr);
                /* Setting vector spacing enables EI/VI mode  */
@@ -2127,8 +2148,17 @@ void per_cpu_trap_init(bool is_boot_cpu)
                 * We shouldn't trust a secondary core has a sane EBASE register
                 * so use the one calculated by the boot CPU.
                 */
-               if (!is_boot_cpu)
+               if (!is_boot_cpu) {
+                       /* If available, use WG to set top bits of EBASE */
+                       if (cpu_has_ebase_wg) {
+#ifdef CONFIG_64BIT
+                               write_c0_ebase_64(ebase | MIPS_EBASE_WG);
+#else
+                               write_c0_ebase(ebase | MIPS_EBASE_WG);
+#endif
+                       }
                        write_c0_ebase(ebase);
+               }
 
                cp0_compare_irq_shift = CAUSEB_TI - CAUSEB_IP;
                cp0_compare_irq = (read_c0_intctl() >> INTCTLB_IPTI) & 7;
@@ -2209,13 +2239,39 @@ void __init trap_init(void)
 
        if (cpu_has_veic || cpu_has_vint) {
                unsigned long size = 0x200 + VECTORSPACING*64;
+               phys_addr_t ebase_pa;
+
                ebase = (unsigned long)
                        __alloc_bootmem(size, 1 << fls(size), 0);
+
+               /*
+                * Try to ensure ebase resides in KSeg0 if possible.
+                *
+                * It shouldn't generally be in XKPhys on MIPS64 to avoid
+                * hitting a poorly defined exception base for Cache Errors.
+                * The allocation is likely to be in the low 512MB of physical,
+                * in which case we should be able to convert to KSeg0.
+                *
+                * EVA is special though as it allows segments to be rearranged
+                * and to become uncached during cache error handling.
+                */
+               ebase_pa = __pa(ebase);
+               if (!IS_ENABLED(CONFIG_EVA) && !WARN_ON(ebase_pa >= 0x20000000))
+                       ebase = CKSEG0ADDR(ebase_pa);
        } else {
                ebase = CAC_BASE;
 
-               if (cpu_has_mips_r2_r6)
-                       ebase += (read_c0_ebase() & 0x3ffff000);
+               if (cpu_has_mips_r2_r6) {
+                       if (cpu_has_ebase_wg) {
+#ifdef CONFIG_64BIT
+                               ebase = (read_c0_ebase_64() & ~0xfff);
+#else
+                               ebase = (read_c0_ebase() & ~0xfff);
+#endif
+                       } else {
+                               ebase += (read_c0_ebase() & 0x3ffff000);
+                       }
+               }
        }
 
        if (cpu_has_mmips) {
index 4c7c1558944a2a5382c4c8076866607c0064d392..dbb917403131441369c0ecf1c2a8324644958e85 100644 (file)
@@ -8,71 +8,12 @@
 #include <asm/branch.h>
 #include <asm/cpu-features.h>
 #include <asm/ptrace.h>
-#include <asm/inst.h>
+
+#include "probes-common.h"
 
 static inline int insn_has_delay_slot(const union mips_instruction insn)
 {
-       switch (insn.i_format.opcode) {
-       /*
-        * jr and jalr are in r_format format.
-        */
-       case spec_op:
-               switch (insn.r_format.func) {
-               case jalr_op:
-               case jr_op:
-                       return 1;
-               }
-               break;
-
-       /*
-        * This group contains:
-        * bltz_op, bgez_op, bltzl_op, bgezl_op,
-        * bltzal_op, bgezal_op, bltzall_op, bgezall_op.
-        */
-       case bcond_op:
-               switch (insn.i_format.rt) {
-               case bltz_op:
-               case bltzl_op:
-               case bgez_op:
-               case bgezl_op:
-               case bltzal_op:
-               case bltzall_op:
-               case bgezal_op:
-               case bgezall_op:
-               case bposge32_op:
-                       return 1;
-               }
-               break;
-
-       /*
-        * These are unconditional and in j_format.
-        */
-       case jal_op:
-       case j_op:
-       case beq_op:
-       case beql_op:
-       case bne_op:
-       case bnel_op:
-       case blez_op: /* not really i_format */
-       case blezl_op:
-       case bgtz_op:
-       case bgtzl_op:
-               return 1;
-
-       /*
-        * And now the FPA/cp1 branch instructions.
-        */
-       case cop1_op:
-#ifdef CONFIG_CPU_CAVIUM_OCTEON
-       case lwc2_op: /* This is bbit0 on Octeon */
-       case ldc2_op: /* This is bbit032 on Octeon */
-       case swc2_op: /* This is bbit1 on Octeon */
-       case sdc2_op: /* This is bbit132 on Octeon */
-#endif
-               return 1;
-       }
-
-       return 0;
+       return __insn_has_delay_slot(insn);
 }
 
 /**
@@ -95,6 +36,12 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *aup,
                return -EINVAL;
 
        inst.word = aup->insn[0];
+
+       if (__insn_is_compact_branch(inst)) {
+               pr_notice("Uprobes for compact branches are not supported\n");
+               return -EINVAL;
+       }
+
        aup->ixol[0] = aup->insn[insn_has_delay_slot(inst)];
        aup->ixol[1] = UPROBE_BRK_UPROBE_XOL;           /* NOP  */
 
@@ -282,19 +229,14 @@ int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm,
 void __weak arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
                                  void *src, unsigned long len)
 {
-       void *kaddr;
+       unsigned long kaddr, kstart;
 
        /* Initialize the slot */
-       kaddr = kmap_atomic(page);
-       memcpy(kaddr + (vaddr & ~PAGE_MASK), src, len);
-       kunmap_atomic(kaddr);
-
-       /*
-        * The MIPS version of flush_icache_range will operate safely on
-        * user space addresses and more importantly, it doesn't require a
-        * VMA argument.
-        */
-       flush_icache_range(vaddr, vaddr + len);
+       kaddr = (unsigned long)kmap_atomic(page);
+       kstart = kaddr + (vaddr & ~PAGE_MASK);
+       memcpy((void *)kstart, src, len);
+       flush_icache_range(kstart, kstart + len);
+       kunmap_atomic((void *)kaddr);
 }
 
 /**
index a36b77e1705c5839663585d1da910338ae23278c..f43629979a0e59959db7f1732233ad2db538ca5d 100644 (file)
@@ -12,7 +12,6 @@
 
 #include <linux/errno.h>
 #include <linux/err.h>
-#include <linux/module.h>
 #include <linux/vmalloc.h>
 #include <linux/fs.h>
 #include <linux/bootmem.h>
index d280894915ed0dd780052f6a99c58a2172f5fcd2..010cef2406880e8742a389575d9c54bb1c004da7 100644 (file)
@@ -13,7 +13,6 @@
 #include <linux/err.h>
 #include <linux/highmem.h>
 #include <linux/kvm_host.h>
-#include <linux/module.h>
 #include <linux/vmalloc.h>
 #include <linux/fs.h>
 #include <linux/bootmem.h>
@@ -45,8 +44,8 @@ static int kvm_mips_trans_replace(struct kvm_vcpu *vcpu, u32 *opc,
        } else if (KVM_GUEST_KSEGX((unsigned long) opc) == KVM_GUEST_KSEG23) {
                local_irq_save(flags);
                memcpy((void *)opc, (void *)&replace, sizeof(u32));
-               local_flush_icache_range((unsigned long)opc,
-                                        (unsigned long)opc + 32);
+               __local_flush_icache_user_range((unsigned long)opc,
+                                               (unsigned long)opc + 32);
                local_irq_restore(flags);
        } else {
                kvm_err("%s: Invalid address: %p\n", __func__, opc);
index 4db4c03708590f3030bdf84d87264ad2a79f6bc4..aa0937423e287b06e007b2251977ff10e23c63b2 100644 (file)
@@ -13,7 +13,6 @@
 #include <linux/err.h>
 #include <linux/ktime.h>
 #include <linux/kvm_host.h>
-#include <linux/module.h>
 #include <linux/vmalloc.h>
 #include <linux/fs.h>
 #include <linux/bootmem.h>
@@ -791,15 +790,15 @@ enum emulation_result kvm_mips_emul_eret(struct kvm_vcpu *vcpu)
        struct mips_coproc *cop0 = vcpu->arch.cop0;
        enum emulation_result er = EMULATE_DONE;
 
-       if (kvm_read_c0_guest_status(cop0) & ST0_EXL) {
+       if (kvm_read_c0_guest_status(cop0) & ST0_ERL) {
+               kvm_clear_c0_guest_status(cop0, ST0_ERL);
+               vcpu->arch.pc = kvm_read_c0_guest_errorepc(cop0);
+       } else if (kvm_read_c0_guest_status(cop0) & ST0_EXL) {
                kvm_debug("[%#lx] ERET to %#lx\n", vcpu->arch.pc,
                          kvm_read_c0_guest_epc(cop0));
                kvm_clear_c0_guest_status(cop0, ST0_EXL);
                vcpu->arch.pc = kvm_read_c0_guest_epc(cop0);
 
-       } else if (kvm_read_c0_guest_status(cop0) & ST0_ERL) {
-               kvm_clear_c0_guest_status(cop0, ST0_ERL);
-               vcpu->arch.pc = kvm_read_c0_guest_errorepc(cop0);
        } else {
                kvm_err("[%#lx] ERET when MIPS_SR_EXL|MIPS_SR_ERL == 0\n",
                        vcpu->arch.pc);
@@ -1529,13 +1528,25 @@ enum emulation_result kvm_mips_emulate_load(union mips_instruction inst,
                                            struct kvm_vcpu *vcpu)
 {
        enum emulation_result er = EMULATE_DO_MMIO;
+       unsigned long curr_pc;
        u32 op, rt;
        u32 bytes;
 
        rt = inst.i_format.rt;
        op = inst.i_format.opcode;
 
-       vcpu->arch.pending_load_cause = cause;
+       /*
+        * Find the resume PC now while we have safe and easy access to the
+        * prior branch instruction, and save it for
+        * kvm_mips_complete_mmio_load() to restore later.
+        */
+       curr_pc = vcpu->arch.pc;
+       er = update_pc(vcpu, cause);
+       if (er == EMULATE_FAIL)
+               return er;
+       vcpu->arch.io_pc = vcpu->arch.pc;
+       vcpu->arch.pc = curr_pc;
+
        vcpu->arch.io_gpr = rt;
 
        switch (op) {
@@ -2495,9 +2506,8 @@ enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu,
                goto done;
        }
 
-       er = update_pc(vcpu, vcpu->arch.pending_load_cause);
-       if (er == EMULATE_FAIL)
-               return er;
+       /* Restore saved resume PC */
+       vcpu->arch.pc = vcpu->arch.io_pc;
 
        switch (run->mmio.len) {
        case 4:
@@ -2519,11 +2529,6 @@ enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu,
                break;
        }
 
-       if (vcpu->arch.pending_load_cause & CAUSEF_BD)
-               kvm_debug("[%#lx] Completing %d byte BD Load to gpr %d (0x%08lx) type %d\n",
-                         vcpu->arch.pc, run->mmio.len, vcpu->arch.io_gpr, *gpr,
-                         vcpu->mmio_needed);
-
 done:
        return er;
 }
index ad28dac6b7e9557346813a02f4699cbcdd5b6d74..e88403b3dcdd5d0bf3ecf9d35555bbcb345e50ed 100644 (file)
@@ -11,7 +11,6 @@
 
 #include <linux/errno.h>
 #include <linux/err.h>
-#include <linux/module.h>
 #include <linux/vmalloc.h>
 #include <linux/fs.h>
 #include <linux/bootmem.h>
index ce961495b5e123f374a4d129387daffa20974373..06a60b19acfb53c2788ba6b9c79de983bb6fe43e 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/err.h>
 #include <linux/kdebug.h>
 #include <linux/module.h>
+#include <linux/uaccess.h>
 #include <linux/vmalloc.h>
 #include <linux/fs.h>
 #include <linux/bootmem.h>
@@ -425,7 +426,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
 static void kvm_mips_check_asids(struct kvm_vcpu *vcpu)
 {
        struct mips_coproc *cop0 = vcpu->arch.cop0;
-       int cpu = smp_processor_id();
+       int i, cpu = smp_processor_id();
        unsigned int gasid;
 
        /*
@@ -441,6 +442,9 @@ static void kvm_mips_check_asids(struct kvm_vcpu *vcpu)
                                                vcpu);
                        vcpu->arch.guest_user_asid[cpu] =
                                vcpu->arch.guest_user_mm.context.asid[cpu];
+                       for_each_possible_cpu(i)
+                               if (i != cpu)
+                                       vcpu->arch.guest_user_asid[cpu] = 0;
                        vcpu->arch.last_user_gasid = gasid;
                }
        }
index 03883ba806e252d451f5df348c5414b3b31971cb..3b677c851be0794861d06a2d7b875b7fddfedff7 100644 (file)
@@ -260,13 +260,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 
        if ((vcpu->arch.guest_user_asid[cpu] ^ asid_cache(cpu)) &
                                                asid_version_mask(cpu)) {
-               u32 gasid = kvm_read_c0_guest_entryhi(vcpu->arch.cop0) &
-                               KVM_ENTRYHI_ASID;
-
                kvm_get_new_mmu_context(&vcpu->arch.guest_user_mm, cpu, vcpu);
                vcpu->arch.guest_user_asid[cpu] =
                    vcpu->arch.guest_user_mm.context.asid[cpu];
-               vcpu->arch.last_user_gasid = gasid;
                newasid++;
 
                kvm_debug("[%d]: cpu_context: %#lx\n", cpu,
index 3a5484f9aa5078a3c5a67b35c2e56b8cbd09c35b..3b20441f2bebfb4516f580e6301f3e269e91465f 100644 (file)
@@ -11,7 +11,6 @@
 
 #include <linux/errno.h>
 #include <linux/err.h>
-#include <linux/module.h>
 #include <linux/vmalloc.h>
 
 #include <linux/kvm_host.h>
index 4625495f9230aa97dbdf2d51b8eb9157d1debcbc..577ec81b557dcfa2d4805ed39cbcaffb1f7052d1 100644 (file)
@@ -6,7 +6,7 @@
  *  Copyright (C) 2012 John Crispin <john@phrozen.org>
  */
 
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/of_platform.h>
 #include <linux/of_gpio.h>
 #include <linux/dma-mapping.h>
@@ -55,7 +55,6 @@ static const struct of_device_id vmmc_match[] = {
        { .compatible = "lantiq,vmmc-xway" },
        {},
 };
-MODULE_DEVICE_TABLE(of, vmmc_match);
 
 static struct platform_driver vmmc_driver = {
        .probe = vmmc_probe,
@@ -64,5 +63,4 @@ static struct platform_driver vmmc_driver = {
                .of_match_table = vmmc_match,
        },
 };
-
-module_platform_driver(vmmc_driver);
+builtin_platform_driver(vmmc_driver);
index 71e518c1e7e7500782420ee2c68279d178dc9694..f0a0f2d431b2335fc9f851a41766ec089ea7073d 100644 (file)
@@ -1,4 +1,7 @@
 /*
+ * Lantiq XRX200 PHY Firmware Loader
+ * Author: John Crispin
+ *
  *  This program is free software; you can redistribute it and/or modify it
  *  under the terms of the GNU General Public License version 2 as published
  *  by the Free Software Foundation.
@@ -8,7 +11,6 @@
 
 #include <linux/delay.h>
 #include <linux/dma-mapping.h>
-#include <linux/module.h>
 #include <linux/firmware.h>
 #include <linux/of_platform.h>
 
@@ -100,7 +102,6 @@ static const struct of_device_id xway_phy_match[] = {
        { .compatible = "lantiq,phy-xrx200" },
        {},
 };
-MODULE_DEVICE_TABLE(of, xway_phy_match);
 
 static struct platform_driver xway_phy_driver = {
        .probe = xway_phy_fw_probe,
@@ -109,9 +110,4 @@ static struct platform_driver xway_phy_driver = {
                .of_match_table = xway_phy_match,
        },
 };
-
-module_platform_driver(xway_phy_driver);
-
-MODULE_AUTHOR("John Crispin <john@phrozen.org>");
-MODULE_DESCRIPTION("Lantiq XRX200 PHY Firmware Loader");
-MODULE_LICENSE("GPL");
+builtin_platform_driver(xway_phy_driver);
index 927dc94a030f3942dd72723ec080486654c14846..c3e22053d13eb2172374c90c2a8eb75cb0299784 100644 (file)
@@ -1,4 +1,4 @@
-#include <linux/module.h>
+#include <linux/export.h>
 
 #include "libgcc.h"
 
index 9fdf1a598428a5804937be3515c42cabee401e00..17456024873d20918cc151c16e4d75173b6869ee 100644 (file)
@@ -1,4 +1,4 @@
-#include <linux/module.h>
+#include <linux/export.h>
 
 #include "libgcc.h"
 
index e3e77aa52c957d7971976de7f7a6b5839daee989..a8114148f82a4f4c300c064013b0068f3273191f 100644 (file)
@@ -1,4 +1,5 @@
-#include <linux/module.h>
+#include <linux/export.h>
+#include <linux/compiler.h>
 
 unsigned long long notrace __bswapdi2(unsigned long long u)
 {
index 530a8afe6fda20e345dff0d1e05ff8035cc0950a..106fd978317d3b35740e62f4f0f00979011b050e 100644 (file)
@@ -1,4 +1,5 @@
-#include <linux/module.h>
+#include <linux/export.h>
+#include <linux/compiler.h>
 
 unsigned int notrace __bswapsi2(unsigned int u)
 {
index 06857da96993c2474879d4f2f3bed382fbf141a1..9d849d8743c953d5f1ad9976af7314bd85c31072 100644 (file)
@@ -1,4 +1,4 @@
-#include <linux/module.h>
+#include <linux/export.h>
 
 #include "libgcc.h"
 
index 21d27c6819a2fd813e56bde24924950abdcd9004..2307a3cb2714fca0819cfb7727eca41650f51fbf 100644 (file)
@@ -8,7 +8,7 @@
  * Copyright (C) 1999, 2000 Silicon Graphics, Inc.
  * Copyright (C) 2007, 2014 Maciej W. Rozycki
  */
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/param.h>
 #include <linux/smp.h>
 #include <linux/stringify.h>
index 0f80b936e75ec92015d1ebdfb815dfaed14a9909..6eb50a7137db05c9dc18b38bf33b317192aa058a 100644 (file)
@@ -135,42 +135,42 @@ static void dump_tlb(int first, int last)
                c0 = (entrylo0 & ENTRYLO_C) >> ENTRYLO_C_SHIFT;
                c1 = (entrylo1 & ENTRYLO_C) >> ENTRYLO_C_SHIFT;
 
-               printk("va=%0*lx asid=%0*lx",
-                      vwidth, (entryhi & ~0x1fffUL),
-                      asidwidth, entryhi & asidmask);
+               pr_cont("va=%0*lx asid=%0*lx",
+                       vwidth, (entryhi & ~0x1fffUL),
+                       asidwidth, entryhi & asidmask);
                if (cpu_has_guestid)
-                       printk(" gid=%02lx",
-                              (guestctl1 & MIPS_GCTL1_RID)
+                       pr_cont(" gid=%02lx",
+                               (guestctl1 & MIPS_GCTL1_RID)
                                        >> MIPS_GCTL1_RID_SHIFT);
                /* RI/XI are in awkward places, so mask them off separately */
                pa = entrylo0 & ~(MIPS_ENTRYLO_RI | MIPS_ENTRYLO_XI);
                if (xpa)
                        pa |= (unsigned long long)readx_c0_entrylo0() << 30;
                pa = (pa << 6) & PAGE_MASK;
-               printk("\n\t[");
+               pr_cont("\n\t[");
                if (cpu_has_rixi)
-                       printk("ri=%d xi=%d ",
-                              (entrylo0 & MIPS_ENTRYLO_RI) ? 1 : 0,
-                              (entrylo0 & MIPS_ENTRYLO_XI) ? 1 : 0);
-               printk("pa=%0*llx c=%d d=%d v=%d g=%d] [",
-                      pwidth, pa, c0,
-                      (entrylo0 & ENTRYLO_D) ? 1 : 0,
-                      (entrylo0 & ENTRYLO_V) ? 1 : 0,
-                      (entrylo0 & ENTRYLO_G) ? 1 : 0);
+                       pr_cont("ri=%d xi=%d ",
+                               (entrylo0 & MIPS_ENTRYLO_RI) ? 1 : 0,
+                               (entrylo0 & MIPS_ENTRYLO_XI) ? 1 : 0);
+               pr_cont("pa=%0*llx c=%d d=%d v=%d g=%d] [",
+                       pwidth, pa, c0,
+                       (entrylo0 & ENTRYLO_D) ? 1 : 0,
+                       (entrylo0 & ENTRYLO_V) ? 1 : 0,
+                       (entrylo0 & ENTRYLO_G) ? 1 : 0);
                /* RI/XI are in awkward places, so mask them off separately */
                pa = entrylo1 & ~(MIPS_ENTRYLO_RI | MIPS_ENTRYLO_XI);
                if (xpa)
                        pa |= (unsigned long long)readx_c0_entrylo1() << 30;
                pa = (pa << 6) & PAGE_MASK;
                if (cpu_has_rixi)
-                       printk("ri=%d xi=%d ",
-                              (entrylo1 & MIPS_ENTRYLO_RI) ? 1 : 0,
-                              (entrylo1 & MIPS_ENTRYLO_XI) ? 1 : 0);
-               printk("pa=%0*llx c=%d d=%d v=%d g=%d]\n",
-                      pwidth, pa, c1,
-                      (entrylo1 & ENTRYLO_D) ? 1 : 0,
-                      (entrylo1 & ENTRYLO_V) ? 1 : 0,
-                      (entrylo1 & ENTRYLO_G) ? 1 : 0);
+                       pr_cont("ri=%d xi=%d ",
+                               (entrylo1 & MIPS_ENTRYLO_RI) ? 1 : 0,
+                               (entrylo1 & MIPS_ENTRYLO_XI) ? 1 : 0);
+               pr_cont("pa=%0*llx c=%d d=%d v=%d g=%d]\n",
+                       pwidth, pa, c1,
+                       (entrylo1 & ENTRYLO_D) ? 1 : 0,
+                       (entrylo1 & ENTRYLO_V) ? 1 : 0,
+                       (entrylo1 & ENTRYLO_G) ? 1 : 0);
        }
        printk("\n");
 
index fd35daa45314a370b89f521e3ec401d39e5dfdc9..8ed3f25a9047d06b80cfeffa495cb2f18984a306 100644 (file)
@@ -7,9 +7,11 @@
  *     written by Ralf Baechle <ralf@linux-mips.org>
  */
 #include <linux/pci.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <asm/io.h>
 
+#ifdef CONFIG_PCI_DRIVERS_LEGACY
+
 void __iomem *__pci_ioport_map(struct pci_dev *dev,
                               unsigned long port, unsigned int nr)
 {
@@ -40,6 +42,8 @@ void __iomem *__pci_ioport_map(struct pci_dev *dev,
        return (void __iomem *) (ctrl->io_map_base + port);
 }
 
+#endif /* CONFIG_PCI_DRIVERS_LEGACY */
+
 void pci_iounmap(struct pci_dev *dev, void __iomem * addr)
 {
        iounmap(addr);
index 8e7e378ce51c323a34a8e9d0aa28a49b95ba90cd..9daa92428e23b39b33c6b2c3a63670c11755e5f2 100644 (file)
@@ -6,7 +6,7 @@
  * (C) Copyright 2007 MIPS Technologies, Inc.
  *     written by Ralf Baechle <ralf@linux-mips.org>
  */
-#include <linux/module.h>
+#include <linux/export.h>
 #include <asm/io.h>
 
 /*
index 364547449c6532ffb7e322e736e484cd5b2fa40f..221167c1be551aa1b950f038a070e7f92b78a84e 100644 (file)
@@ -1,4 +1,4 @@
-#include <linux/module.h>
+#include <linux/export.h>
 
 #include "libgcc.h"
 
index 744f4a7bc49dfa5eabbde9b78228e0d2065799d4..85b4086e553e8734cf4b286a2a65d5159ed4f5d9 100644 (file)
@@ -53,15 +53,15 @@ static void dump_tlb(int first, int last)
                         */
                        printk("Index: %2d ", i);
 
-                       printk("va=%08lx asid=%08lx"
-                              "  [pa=%06lx n=%d d=%d v=%d g=%d]",
-                              entryhi & PAGE_MASK,
-                              entryhi & asid_mask,
-                              entrylo0 & PAGE_MASK,
-                              (entrylo0 & R3K_ENTRYLO_N) ? 1 : 0,
-                              (entrylo0 & R3K_ENTRYLO_D) ? 1 : 0,
-                              (entrylo0 & R3K_ENTRYLO_V) ? 1 : 0,
-                              (entrylo0 & R3K_ENTRYLO_G) ? 1 : 0);
+                       pr_cont("va=%08lx asid=%08lx"
+                               "  [pa=%06lx n=%d d=%d v=%d g=%d]",
+                               entryhi & PAGE_MASK,
+                               entryhi & asid_mask,
+                               entrylo0 & PAGE_MASK,
+                               (entrylo0 & R3K_ENTRYLO_N) ? 1 : 0,
+                               (entrylo0 & R3K_ENTRYLO_D) ? 1 : 0,
+                               (entrylo0 & R3K_ENTRYLO_V) ? 1 : 0,
+                               (entrylo0 & R3K_ENTRYLO_G) ? 1 : 0);
                }
        }
        printk("\n");
index bd599f58234c9558089acb28f37ac677bd6818dc..08067fa538f2dd070450821def7cb976460aba56 100644 (file)
@@ -1,4 +1,4 @@
-#include <linux/module.h>
+#include <linux/export.h>
 
 #include "libgcc.h"
 
index 7704f20529d63589fce06b73e9fdf1a819f28be8..3c0c2f2096cd8d1862e5807460d5deb39e3ab3fb 100644 (file)
@@ -19,6 +19,21 @@ config LOONGSON1_LS1B
        select USE_GENERIC_EARLY_PRINTK_8250
        select COMMON_CLK
 
+config LOONGSON1_LS1C
+       bool "Loongson LS1C board"
+       select CEVT_R4K if !MIPS_EXTERNAL_TIMER
+       select CSRC_R4K if !MIPS_EXTERNAL_TIMER
+       select SYS_HAS_CPU_LOONGSON1C
+       select DMA_NONCOHERENT
+       select BOOT_ELF32
+       select IRQ_MIPS_CPU
+       select SYS_SUPPORTS_32BIT_KERNEL
+       select SYS_SUPPORTS_LITTLE_ENDIAN
+       select SYS_SUPPORTS_HIGHMEM
+       select SYS_SUPPORTS_MIPS16
+       select SYS_HAS_EARLY_PRINTK
+       select USE_GENERIC_EARLY_PRINTK_8250
+       select COMMON_CLK
 endchoice
 
 menuconfig CEVT_CSRC_LS1X
index 5f4bd6e071ca06410db2e09f5cc400b3ab769d43..1ab2c5bbc06606d9e7c984fd8410647c1698e999 100644 (file)
@@ -9,3 +9,9 @@ obj-$(CONFIG_MACH_LOONGSON32) += common/
 #
 
 obj-$(CONFIG_LOONGSON1_LS1B)  += ls1b/
+
+#
+# Loongson LS1C board
+#
+
+obj-$(CONFIG_LOONGSON1_LS1C)  += ls1c/
index ebb6dc290f0ab5591da3076dfc387a660c786c4b..ffe01c6d0037db032e84344d9eb4682c9d081fb4 100644 (file)
@@ -5,3 +5,4 @@ cflags-$(CONFIG_CPU_LOONGSON1)  += \
 platform-$(CONFIG_MACH_LOONGSON32)     += loongson32/
 cflags-$(CONFIG_MACH_LOONGSON32)       += -I$(srctree)/arch/mips/include/asm/mach-loongson32
 load-$(CONFIG_LOONGSON1_LS1B)          += 0xffffffff80100000
+load-$(CONFIG_LOONGSON1_LS1C)          += 0xffffffff80100000
index 455a7704a90f98a73820e6fa691e991faa2baa60..635a4abe1f4821d990533d0f53508b871a62ff28 100644 (file)
@@ -62,12 +62,58 @@ static void ls1x_irq_unmask(struct irq_data *d)
                        | (1 << bit), LS1X_INTC_INTIEN(n));
 }
 
+static int ls1x_irq_settype(struct irq_data *d, unsigned int type)
+{
+       unsigned int bit = (d->irq - LS1X_IRQ_BASE) & 0x1f;
+       unsigned int n = (d->irq - LS1X_IRQ_BASE) >> 5;
+
+       switch (type) {
+       case IRQ_TYPE_LEVEL_HIGH:
+               __raw_writel(__raw_readl(LS1X_INTC_INTPOL(n))
+                       | (1 << bit), LS1X_INTC_INTPOL(n));
+               __raw_writel(__raw_readl(LS1X_INTC_INTEDGE(n))
+                       & ~(1 << bit), LS1X_INTC_INTEDGE(n));
+               break;
+       case IRQ_TYPE_LEVEL_LOW:
+               __raw_writel(__raw_readl(LS1X_INTC_INTPOL(n))
+                       & ~(1 << bit), LS1X_INTC_INTPOL(n));
+               __raw_writel(__raw_readl(LS1X_INTC_INTEDGE(n))
+                       & ~(1 << bit), LS1X_INTC_INTEDGE(n));
+               break;
+       case IRQ_TYPE_EDGE_RISING:
+               __raw_writel(__raw_readl(LS1X_INTC_INTPOL(n))
+                       | (1 << bit), LS1X_INTC_INTPOL(n));
+               __raw_writel(__raw_readl(LS1X_INTC_INTEDGE(n))
+                       | (1 << bit), LS1X_INTC_INTEDGE(n));
+               break;
+       case IRQ_TYPE_EDGE_FALLING:
+               __raw_writel(__raw_readl(LS1X_INTC_INTPOL(n))
+                       & ~(1 << bit), LS1X_INTC_INTPOL(n));
+               __raw_writel(__raw_readl(LS1X_INTC_INTEDGE(n))
+                       | (1 << bit), LS1X_INTC_INTEDGE(n));
+               break;
+       case IRQ_TYPE_EDGE_BOTH:
+               __raw_writel(__raw_readl(LS1X_INTC_INTPOL(n))
+                       & ~(1 << bit), LS1X_INTC_INTPOL(n));
+               __raw_writel(__raw_readl(LS1X_INTC_INTEDGE(n))
+                       | (1 << bit), LS1X_INTC_INTEDGE(n));
+               break;
+       case IRQ_TYPE_NONE:
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
 static struct irq_chip ls1x_irq_chip = {
        .name           = "LS1X-INTC",
        .irq_ack        = ls1x_irq_ack,
        .irq_mask       = ls1x_irq_mask,
        .irq_mask_ack   = ls1x_irq_mask_ack,
        .irq_unmask     = ls1x_irq_unmask,
+       .irq_set_type   = ls1x_irq_settype,
 };
 
 static void ls1x_irq_dispatch(int n)
@@ -107,7 +153,7 @@ asmlinkage void plat_irq_dispatch(void)
 
 }
 
-struct irqaction cascade_irqaction = {
+static struct irqaction cascade_irqaction = {
        .handler = no_action,
        .name = "cascade",
        .flags = IRQF_NO_THREAD,
@@ -120,7 +166,7 @@ static void __init ls1x_irq_init(int base)
        /* Disable interrupts and clear pending,
         * setup all IRQs as high level triggered
         */
-       for (n = 0; n < 4; n++) {
+       for (n = 0; n < INTN; n++) {
                __raw_writel(0x0, LS1X_INTC_INTIEN(n));
                __raw_writel(0xffffffff, LS1X_INTC_INTCLR(n));
                __raw_writel(0xffffffff, LS1X_INTC_INTPOL(n));
@@ -129,7 +175,7 @@ static void __init ls1x_irq_init(int base)
        }
 
 
-       for (n = base; n < LS1X_IRQS; n++) {
+       for (n = base; n < NR_IRQS; n++) {
                irq_set_chip_and_handler(n, &ls1x_irq_chip,
                                         handle_level_irq);
        }
@@ -138,6 +184,9 @@ static void __init ls1x_irq_init(int base)
        setup_irq(INT1_IRQ, &cascade_irqaction);
        setup_irq(INT2_IRQ, &cascade_irqaction);
        setup_irq(INT3_IRQ, &cascade_irqaction);
+#if defined(CONFIG_LOONGSON1_LS1C)
+       setup_irq(INT4_IRQ, &cascade_irqaction);
+#endif
 }
 
 void __init arch_init_irq(void)
index f2c714d8fb60c7654274cd1e1ee604954a86e6c4..beff0852c6a479ef47911ea69e489c68999256fd 100644 (file)
 #include <linux/stmmac.h>
 #include <linux/usb/ehci_pdriver.h>
 
+#include <platform.h>
 #include <loongson1.h>
 #include <cpufreq.h>
 #include <dma.h>
 #include <nand.h>
 
+#define LS1X_RTC_CTRL  ((void __iomem *)KSEG1ADDR(LS1X_RTC_BASE + 0x40))
+#define RTC_EXTCLK_OK  (BIT(5) | BIT(8))
+#define RTC_EXTCLK_EN  BIT(8)
+
 /* 8250/16550 compatible UART */
 #define LS1X_UART(_id)                                         \
        {                                                       \
@@ -65,6 +70,15 @@ void __init ls1x_serial_set_uartclk(struct platform_device *pdev)
                p->uartclk = clk_get_rate(clk);
 }
 
+void __init ls1x_rtc_set_extclk(struct platform_device *pdev)
+{
+       u32 val;
+
+       val = __raw_readl(LS1X_RTC_CTRL);
+       if (!(val & RTC_EXTCLK_OK))
+               __raw_writel(val | RTC_EXTCLK_EN, LS1X_RTC_CTRL);
+}
+
 /* CPUFreq */
 static struct plat_ls1x_cpufreq ls1x_cpufreq_pdata = {
        .clk_name       = "cpu_clk",
@@ -132,6 +146,7 @@ int ls1x_eth_mux_init(struct platform_device *pdev, void *priv)
 
        val = __raw_readl(LS1X_MUX_CTRL1);
 
+#if defined(CONFIG_LOONGSON1_LS1B)
        plat_dat = dev_get_platdata(&pdev->dev);
        if (plat_dat->bus_id) {
                __raw_writel(__raw_readl(LS1X_MUX_CTRL0) | GMAC1_USE_UART1 |
@@ -165,6 +180,17 @@ int ls1x_eth_mux_init(struct platform_device *pdev, void *priv)
                val &= ~GMAC0_SHUT;
        }
        __raw_writel(val, LS1X_MUX_CTRL1);
+#elif defined(CONFIG_LOONGSON1_LS1C)
+       plat_dat = dev_get_platdata(&pdev->dev);
+
+       val &= ~PHY_INTF_SELI;
+       if (plat_dat->interface == PHY_INTERFACE_MODE_RMII)
+               val |= 0x4 << PHY_INTF_SELI_SHIFT;
+       __raw_writel(val, LS1X_MUX_CTRL1);
+
+       val = __raw_readl(LS1X_MUX_CTRL0);
+       __raw_writel(val & (~GMAC_SHUT), LS1X_MUX_CTRL0);
+#endif
 
        return 0;
 }
@@ -172,7 +198,11 @@ int ls1x_eth_mux_init(struct platform_device *pdev, void *priv)
 static struct plat_stmmacenet_data ls1x_eth0_pdata = {
        .bus_id         = 0,
        .phy_addr       = -1,
+#if defined(CONFIG_LOONGSON1_LS1B)
        .interface      = PHY_INTERFACE_MODE_MII,
+#elif defined(CONFIG_LOONGSON1_LS1C)
+       .interface      = PHY_INTERFACE_MODE_RMII,
+#endif
        .mdio_bus_data  = &ls1x_mdio_bus_data,
        .dma_cfg        = &ls1x_eth_dma_cfg,
        .has_gmac       = 1,
@@ -203,6 +233,7 @@ struct platform_device ls1x_eth0_pdev = {
        },
 };
 
+#ifdef CONFIG_LOONGSON1_LS1B
 static struct plat_stmmacenet_data ls1x_eth1_pdata = {
        .bus_id         = 1,
        .phy_addr       = -1,
@@ -236,6 +267,7 @@ struct platform_device ls1x_eth1_pdev = {
                .platform_data = &ls1x_eth1_pdata,
        },
 };
+#endif /* CONFIG_LOONGSON1_LS1B */
 
 /* GPIO */
 static struct resource ls1x_gpio0_resources[] = {
index 62f41afee241e7a4df305725bac9659c0a796bea..1640744288ee020df7200b5f01165749075bfe67 100644 (file)
@@ -22,7 +22,11 @@ const char *get_system_type(void)
 
        switch (processor_id & PRID_REV_MASK) {
        case PRID_REV_LOONGSON1B:
+#if defined(CONFIG_LOONGSON1_LS1B)
                return "LOONGSON LS1B";
+#elif defined(CONFIG_LOONGSON1_LS1C)
+               return "LOONGSON LS1C";
+#endif
        default:
                return "LOONGSON (unknown)";
        }
diff --git a/arch/mips/loongson32/ls1c/Makefile b/arch/mips/loongson32/ls1c/Makefile
new file mode 100644 (file)
index 0000000..a92c6cd
--- /dev/null
@@ -0,0 +1,5 @@
+#
+# Makefile for loongson1C based machines.
+#
+
+obj-y += board.o
diff --git a/arch/mips/loongson32/ls1c/board.c b/arch/mips/loongson32/ls1c/board.c
new file mode 100644 (file)
index 0000000..a96bed5
--- /dev/null
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2016 Yang Ling <gnaygnil@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under  the terms of the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <platform.h>
+
+static struct platform_device *ls1c_platform_devices[] __initdata = {
+       &ls1x_uart_pdev,
+       &ls1x_eth0_pdev,
+       &ls1x_rtc_pdev,
+};
+
+static int __init ls1c_platform_init(void)
+{
+       ls1x_serial_set_uartclk(&ls1x_uart_pdev);
+       ls1x_rtc_set_extclk(&ls1x_rtc_pdev);
+
+       return platform_add_devices(ls1c_platform_devices,
+                                  ARRAY_SIZE(ls1c_platform_devices));
+}
+
+arch_initcall(ls1c_platform_init);
index 05b1d7cf9514c71422c587ddd4955a2a4992623e..0e45b061e514153a397b9d953986a6cc077f721e 100644 (file)
@@ -294,6 +294,8 @@ void octeon_cache_init(void)
        flush_data_cache_page           = octeon_flush_data_cache_page;
        flush_icache_range              = octeon_flush_icache_range;
        local_flush_icache_range        = local_octeon_flush_icache_range;
+       __flush_icache_user_range       = octeon_flush_icache_range;
+       __local_flush_icache_user_range = local_octeon_flush_icache_range;
 
        __flush_kernel_vmap_range       = octeon_flush_kernel_vmap_range;
 
index 135ec313c1f6594b31fbda33a7f8fcfc05735e95..21e4e662c1fa6c29597a37ab837643d68141a88b 100644 (file)
@@ -325,6 +325,8 @@ void r3k_cache_init(void)
        flush_cache_page = r3k_flush_cache_page;
        flush_icache_range = r3k_flush_icache_range;
        local_flush_icache_range = r3k_flush_icache_range;
+       __flush_icache_user_range = r3k_flush_icache_range;
+       __local_flush_icache_user_range = r3k_flush_icache_range;
 
        __flush_kernel_vmap_range = r3k_flush_kernel_vmap_range;
 
index fa7d8d3790bfc960bc7d4b358e9fb1a5120c04e2..88cfaf81c958733397a08ccb79d8c4c021f90580 100644 (file)
@@ -17,7 +17,7 @@
 #include <linux/sched.h>
 #include <linux/smp.h>
 #include <linux/mm.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/bitops.h>
 
 #include <asm/bcache.h>
@@ -722,11 +722,13 @@ struct flush_icache_range_args {
        unsigned long start;
        unsigned long end;
        unsigned int type;
+       bool user;
 };
 
 static inline void __local_r4k_flush_icache_range(unsigned long start,
                                                  unsigned long end,
-                                                 unsigned int type)
+                                                 unsigned int type,
+                                                 bool user)
 {
        if (!cpu_has_ic_fills_f_dc) {
                if (type == R4K_INDEX ||
@@ -734,7 +736,10 @@ static inline void __local_r4k_flush_icache_range(unsigned long start,
                        r4k_blast_dcache();
                } else {
                        R4600_HIT_CACHEOP_WAR_IMPL;
-                       protected_blast_dcache_range(start, end);
+                       if (user)
+                               protected_blast_dcache_range(start, end);
+                       else
+                               blast_dcache_range(start, end);
                }
        }
 
@@ -748,27 +753,25 @@ static inline void __local_r4k_flush_icache_range(unsigned long start,
                        break;
 
                default:
-                       protected_blast_icache_range(start, end);
+                       if (user)
+                               protected_blast_icache_range(start, end);
+                       else
+                               blast_icache_range(start, end);
                        break;
                }
        }
-#ifdef CONFIG_EVA
-       /*
-        * Due to all possible segment mappings, there might cache aliases
-        * caused by the bootloader being in non-EVA mode, and the CPU switching
-        * to EVA during early kernel init. It's best to flush the scache
-        * to avoid having secondary cores fetching stale data and lead to
-        * kernel crashes.
-        */
-       bc_wback_inv(start, (end - start));
-       __sync();
-#endif
 }
 
 static inline void local_r4k_flush_icache_range(unsigned long start,
                                                unsigned long end)
 {
-       __local_r4k_flush_icache_range(start, end, R4K_HIT | R4K_INDEX);
+       __local_r4k_flush_icache_range(start, end, R4K_HIT | R4K_INDEX, false);
+}
+
+static inline void local_r4k_flush_icache_user_range(unsigned long start,
+                                                    unsigned long end)
+{
+       __local_r4k_flush_icache_range(start, end, R4K_HIT | R4K_INDEX, true);
 }
 
 static inline void local_r4k_flush_icache_range_ipi(void *args)
@@ -777,11 +780,13 @@ static inline void local_r4k_flush_icache_range_ipi(void *args)
        unsigned long start = fir_args->start;
        unsigned long end = fir_args->end;
        unsigned int type = fir_args->type;
+       bool user = fir_args->user;
 
-       __local_r4k_flush_icache_range(start, end, type);
+       __local_r4k_flush_icache_range(start, end, type, user);
 }
 
-static void r4k_flush_icache_range(unsigned long start, unsigned long end)
+static void __r4k_flush_icache_range(unsigned long start, unsigned long end,
+                                    bool user)
 {
        struct flush_icache_range_args args;
        unsigned long size, cache_size;
@@ -789,6 +794,7 @@ static void r4k_flush_icache_range(unsigned long start, unsigned long end)
        args.start = start;
        args.end = end;
        args.type = R4K_HIT | R4K_INDEX;
+       args.user = user;
 
        /*
         * Indexed cache ops require an SMP call.
@@ -814,6 +820,16 @@ static void r4k_flush_icache_range(unsigned long start, unsigned long end)
        instruction_hazard();
 }
 
+static void r4k_flush_icache_range(unsigned long start, unsigned long end)
+{
+       return __r4k_flush_icache_range(start, end, false);
+}
+
+static void r4k_flush_icache_user_range(unsigned long start, unsigned long end)
+{
+       return __r4k_flush_icache_range(start, end, true);
+}
+
 #if defined(CONFIG_DMA_NONCOHERENT) || defined(CONFIG_DMA_MAYBE_COHERENT)
 
 static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size)
@@ -1915,9 +1931,16 @@ void r4k_cache_init(void)
        flush_data_cache_page   = r4k_flush_data_cache_page;
        flush_icache_range      = r4k_flush_icache_range;
        local_flush_icache_range        = local_r4k_flush_icache_range;
+       __flush_icache_user_range       = r4k_flush_icache_user_range;
+       __local_flush_icache_user_range = local_r4k_flush_icache_user_range;
 
 #if defined(CONFIG_DMA_NONCOHERENT) || defined(CONFIG_DMA_MAYBE_COHERENT)
-       if (coherentio) {
+# if defined(CONFIG_DMA_PERDEV_COHERENT)
+       if (0) {
+# else
+       if ((coherentio == IO_COHERENCE_ENABLED) ||
+           ((coherentio == IO_COHERENCE_DEFAULT) && hw_coherentio)) {
+# endif
                _dma_cache_wback_inv    = (void *)cache_noop;
                _dma_cache_wback        = (void *)cache_noop;
                _dma_cache_inv          = (void *)cache_noop;
index 596e18458e041cf74d2cdf2974fcc701144591c0..5c282583edf16a5c95a638707199e80be3cd734b 100644 (file)
@@ -411,6 +411,9 @@ void tx39_cache_init(void)
                break;
        }
 
+       __flush_icache_user_range = flush_icache_range;
+       __local_flush_icache_user_range = local_flush_icache_range;
+
        current_cpu_data.icache.waysize = icache_size / current_cpu_data.icache.ways;
        current_cpu_data.dcache.waysize = dcache_size / current_cpu_data.dcache.ways;
 
index bf04c6c479a4e6ec3f99a90a8d14c093c63d5b6f..6db3413472023dbf586c433c2808b9c62ca67a8f 100644 (file)
@@ -10,7 +10,7 @@
 #include <linux/fcntl.h>
 #include <linux/kernel.h>
 #include <linux/linkage.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/sched.h>
 #include <linux/syscalls.h>
 #include <linux/mm.h>
@@ -33,6 +33,10 @@ void (*flush_icache_range)(unsigned long start, unsigned long end);
 EXPORT_SYMBOL_GPL(flush_icache_range);
 void (*local_flush_icache_range)(unsigned long start, unsigned long end);
 EXPORT_SYMBOL_GPL(local_flush_icache_range);
+void (*__flush_icache_user_range)(unsigned long start, unsigned long end);
+EXPORT_SYMBOL_GPL(__flush_icache_user_range);
+void (*__local_flush_icache_user_range)(unsigned long start, unsigned long end);
+EXPORT_SYMBOL_GPL(__local_flush_icache_user_range);
 
 void (*__flush_cache_vmap)(void);
 void (*__flush_cache_vunmap)(void);
@@ -74,7 +78,7 @@ SYSCALL_DEFINE3(cacheflush, unsigned long, addr, unsigned long, bytes,
        if (!access_ok(VERIFY_WRITE, (void __user *) addr, bytes))
                return -EFAULT;
 
-       flush_icache_range(addr, addr + bytes);
+       __flush_icache_user_range(addr, addr + bytes);
 
        return 0;
 }
index b2eadd6fa9a1ed06167231eb9ab3632203b52d65..46d5696c4f276a7cdd729057fb4ee7044146d9e3 100644 (file)
@@ -11,7 +11,7 @@
 #include <linux/types.h>
 #include <linux/dma-mapping.h>
 #include <linux/mm.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/scatterlist.h>
 #include <linux/string.h>
 #include <linux/gfp.h>
 
 #include <dma-coherence.h>
 
-#ifdef CONFIG_DMA_MAYBE_COHERENT
-int coherentio = 0;    /* User defined DMA coherency from command line. */
+#if defined(CONFIG_DMA_MAYBE_COHERENT) && !defined(CONFIG_DMA_PERDEV_COHERENT)
+/* User defined DMA coherency from command line. */
+enum coherent_io_user_state coherentio = IO_COHERENCE_DEFAULT;
 EXPORT_SYMBOL_GPL(coherentio);
 int hw_coherentio = 0; /* Actual hardware supported DMA coherency setting. */
 
 static int __init setcoherentio(char *str)
 {
-       coherentio = 1;
+       coherentio = IO_COHERENCE_ENABLED;
        pr_info("Hardware DMA cache coherency (command line)\n");
        return 0;
 }
@@ -39,7 +40,7 @@ early_param("coherentio", setcoherentio);
 
 static int __init setnocoherentio(char *str)
 {
-       coherentio = 0;
+       coherentio = IO_COHERENCE_DISABLED;
        pr_info("Software DMA cache coherency (command line)\n");
        return 0;
 }
@@ -160,8 +161,7 @@ static void *mips_dma_alloc_coherent(struct device *dev, size_t size,
        *dma_handle = plat_map_dma_mem(dev, ret, size);
        if (!plat_device_is_coherent(dev)) {
                dma_cache_wback_inv((unsigned long) ret, size);
-               if (!hw_coherentio)
-                       ret = UNCAC_ADDR(ret);
+               ret = UNCAC_ADDR(ret);
        }
 
        return ret;
@@ -189,7 +189,7 @@ static void mips_dma_free_coherent(struct device *dev, size_t size, void *vaddr,
 
        plat_unmap_dma_mem(dev, dma_handle, size, DMA_BIDIRECTIONAL);
 
-       if (!plat_device_is_coherent(dev) && !hw_coherentio)
+       if (!plat_device_is_coherent(dev))
                addr = CAC_ADDR(addr);
 
        page = virt_to_page((void *) addr);
@@ -209,7 +209,7 @@ static int mips_dma_mmap(struct device *dev, struct vm_area_struct *vma,
        unsigned long pfn;
        int ret = -ENXIO;
 
-       if (!plat_device_is_coherent(dev) && !hw_coherentio)
+       if (!plat_device_is_coherent(dev))
                addr = CAC_ADDR(addr);
 
        pfn = page_to_pfn(virt_to_page((void *)addr));
index 9d25d2ba4b9ea8b0157bea763fa314a6556227f6..e474fa2efed49fbe30467fc38c268717905684ac 100644 (file)
@@ -5,7 +5,7 @@
  *
  * Copyright (C) 1997, 99, 2001 - 2004 Ralf Baechle <ralf@linux-mips.org>
  */
-#include <linux/module.h>
+#include <linux/extable.h>
 #include <linux/spinlock.h>
 #include <asm/branch.h>
 #include <asm/uaccess.h>
index 9560ad73112093a2b3c1573b4c54e5993e97328c..d56a855828c2bbc1235873ab7fd03c3836fb7b4a 100644 (file)
@@ -18,7 +18,6 @@
 #include <linux/mman.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
-#include <linux/module.h>
 #include <linux/kprobes.h>
 #include <linux/perf_event.h>
 #include <linux/uaccess.h>
index 42d124fb6474477c896e4618713e5b6d65d6e8f3..d8c3c159289a2953b8ae495a1da34d27d095b161 100644 (file)
@@ -287,7 +287,7 @@ slow_irqon:
        pages += nr;
 
        ret = get_user_pages_unlocked(start, (end - start) >> PAGE_SHIFT,
-                                     write, 0, pages);
+                                     pages, write ? FOLL_WRITE : 0);
 
        /* Have to be a bit careful with return values */
        if (nr > 0) {
index d7258a103439d8316b527d64569955339489fe4c..f13f51003bd83c7db13c8e7f298b85c14e4a962f 100644 (file)
@@ -1,5 +1,6 @@
 #include <linux/compiler.h>
-#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/export.h>
 #include <linux/highmem.h>
 #include <linux/sched.h>
 #include <linux/smp.h>
index 72f7478ee068408d9398813c9f75e21b98c80a3b..3a6edecc3f385e4bd897750aa66488623f60411e 100644 (file)
@@ -10,7 +10,7 @@
  */
 #include <linux/bug.h>
 #include <linux/init.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/signal.h>
 #include <linux/sched.h>
 #include <linux/smp.h>
index 8d5008cbdc0f0a580d1fbc2c65f6f8741cbdc5eb..1f189627440f235b0876966b21562e55bc665323 100644 (file)
@@ -6,7 +6,7 @@
  * (C) Copyright 1995 1996 Linus Torvalds
  * (C) Copyright 2001, 2002 Ralf Baechle
  */
-#include <linux/module.h>
+#include <linux/export.h>
 #include <asm/addrspace.h>
 #include <asm/byteorder.h>
 #include <linux/sched.h>
index 353037699512ca5515b11ce8fb2c808eb6386c78..d08ea3ff0f53345e7501dd168f32c2177976f6ee 100644 (file)
@@ -10,7 +10,7 @@
 #include <linux/errno.h>
 #include <linux/mm.h>
 #include <linux/mman.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/personality.h>
 #include <linux/random.h>
 #include <linux/sched.h>
index c41953ca6605ca347db0bbe71689fb791ef51410..6f804f5960abeff77cdbcd2cecf48a1acf989e7c 100644 (file)
@@ -12,7 +12,6 @@
 #include <linux/sched.h>
 #include <linux/smp.h>
 #include <linux/mm.h>
-#include <linux/module.h>
 #include <linux/proc_fs.h>
 
 #include <asm/bugs.h>
index e8b335c162958030adb7a201f6a92a8b05018153..bba9c1484b41e1bc8c124b3a2c50e53eb27e032c 100644 (file)
@@ -14,7 +14,7 @@
 #include <linux/smp.h>
 #include <linux/mm.h>
 #include <linux/hugetlb.h>
-#include <linux/module.h>
+#include <linux/export.h>
 
 #include <asm/cpu.h>
 #include <asm/cpu-type.h>
@@ -67,8 +67,11 @@ void local_flush_tlb_all(void)
 
        entry = read_c0_wired();
 
-       /* Blast 'em all away. */
-       if (cpu_has_tlbinv) {
+       /*
+        * Blast 'em all away.
+        * If there are any wired entries, fall back to iterating
+        */
+       if (cpu_has_tlbinv && !entry) {
                if (current_cpu_data.tlbsizevtlb) {
                        write_c0_index(0);
                        mtc0_tlbw_hazard();
index 47a22889285f33db32b83c542e643b742133681e..4822943100f303c0fe08c7d7a3514ea7e0a8b927 100644 (file)
@@ -17,18 +17,3 @@ void __init device_tree_init(void)
 {
        unflatten_and_copy_device_tree();
 }
-
-static const struct of_device_id bus_ids[] __initconst = {
-       { .compatible = "simple-bus", },
-       { .compatible = "isa", },
-       {},
-};
-
-static int __init publish_devices(void)
-{
-       if (!of_have_populated_dt())
-               return 0;
-
-       return of_platform_bus_probe(NULL, bus_ids, NULL);
-}
-device_initcall(publish_devices);
index 151f4882ec8ac62d6cd8993a38d353d352bdd77e..c398582c316fcc9a8b01f567052ac4bb2ef18c26 100644 (file)
 #include <linux/libfdt.h>
 #include <linux/of_fdt.h>
 #include <linux/sizes.h>
+#include <asm/addrspace.h>
 #include <asm/bootinfo.h>
 #include <asm/fw/fw.h>
+#include <asm/mips-boards/generic.h>
+#include <asm/mips-boards/malta.h>
+#include <asm/mips-cm.h>
 #include <asm/page.h>
 
+#define ROCIT_REG_BASE                 0x1f403000
+#define ROCIT_CONFIG_GEN1              (ROCIT_REG_BASE + 0x04)
+#define  ROCIT_CONFIG_GEN1_MEMMAP_SHIFT        8
+#define  ROCIT_CONFIG_GEN1_MEMMAP_MASK (0xf << 8)
+
 static unsigned char fdt_buf[16 << 10] __initdata;
 
 /* determined physical memory size, not overridden by command line args         */
 extern unsigned long physical_memsize;
 
-#define MAX_MEM_ARRAY_ENTRIES 1
+enum mem_map {
+       MEM_MAP_V1 = 0,
+       MEM_MAP_V2,
+};
+
+#define MAX_MEM_ARRAY_ENTRIES 2
 
-static unsigned __init gen_fdt_mem_array(__be32 *mem_array, unsigned long size)
+static __init int malta_scon(void)
+{
+       int scon = MIPS_REVISION_SCONID;
+
+       if (scon != MIPS_REVISION_SCON_OTHER)
+               return scon;
+
+       switch (MIPS_REVISION_CORID) {
+       case MIPS_REVISION_CORID_QED_RM5261:
+       case MIPS_REVISION_CORID_CORE_LV:
+       case MIPS_REVISION_CORID_CORE_FPGA:
+       case MIPS_REVISION_CORID_CORE_FPGAR2:
+               return MIPS_REVISION_SCON_GT64120;
+
+       case MIPS_REVISION_CORID_CORE_EMUL_BON:
+       case MIPS_REVISION_CORID_BONITO64:
+       case MIPS_REVISION_CORID_CORE_20K:
+               return MIPS_REVISION_SCON_BONITO;
+
+       case MIPS_REVISION_CORID_CORE_MSC:
+       case MIPS_REVISION_CORID_CORE_FPGA2:
+       case MIPS_REVISION_CORID_CORE_24K:
+               return MIPS_REVISION_SCON_SOCIT;
+
+       case MIPS_REVISION_CORID_CORE_FPGA3:
+       case MIPS_REVISION_CORID_CORE_FPGA4:
+       case MIPS_REVISION_CORID_CORE_FPGA5:
+       case MIPS_REVISION_CORID_CORE_EMUL_MSC:
+       default:
+               return MIPS_REVISION_SCON_ROCIT;
+       }
+}
+
+static unsigned __init gen_fdt_mem_array(__be32 *mem_array, unsigned long size,
+                                        enum mem_map map)
 {
        unsigned long size_preio;
        unsigned entries;
@@ -39,11 +87,47 @@ static unsigned __init gen_fdt_mem_array(__be32 *mem_array, unsigned long size)
                 * DDR but limits it to 2GB.
                 */
                mem_array[1] = cpu_to_be32(size);
+               goto done;
+       }
+
+       size_preio = min_t(unsigned long, size, SZ_256M);
+       mem_array[1] = cpu_to_be32(size_preio);
+       size -= size_preio;
+       if (!size)
+               goto done;
+
+       if (map == MEM_MAP_V2) {
+               /*
+                * We have a flat 32 bit physical memory map with DDR filling
+                * all 4GB of the memory map, apart from the I/O region which
+                * obscures 256MB from 0x10000000-0x1fffffff.
+                *
+                * Therefore we discard the 256MB behind the I/O region.
+                */
+               if (size <= SZ_256M)
+                       goto done;
+               size -= SZ_256M;
+
+               /* Make use of the memory following the I/O region */
+               entries++;
+               mem_array[2] = cpu_to_be32(PHYS_OFFSET + SZ_512M);
+               mem_array[3] = cpu_to_be32(size);
        } else {
-               size_preio = min_t(unsigned long, size, SZ_256M);
-               mem_array[1] = cpu_to_be32(size_preio);
+               /*
+                * We have a 32 bit physical memory map with a 2GB DDR region
+                * aliased in the upper & lower halves of it. The I/O region
+                * obscures 256MB from 0x10000000-0x1fffffff in the low alias
+                * but the DDR it obscures is accessible via the high alias.
+                *
+                * Simply access everything beyond the lowest 256MB of DDR using
+                * the high alias.
+                */
+               entries++;
+               mem_array[2] = cpu_to_be32(PHYS_OFFSET + SZ_2G + SZ_256M);
+               mem_array[3] = cpu_to_be32(size);
        }
 
+done:
        BUG_ON(entries > MAX_MEM_ARRAY_ENTRIES);
        return entries;
 }
@@ -54,6 +138,8 @@ static void __init append_memory(void *fdt, int root_off)
        unsigned long memsize;
        unsigned mem_entries;
        int i, err, mem_off;
+       enum mem_map mem_map;
+       u32 config;
        char *var, param_name[10], *var_names[] = {
                "ememsize", "memsize",
        };
@@ -106,6 +192,20 @@ static void __init append_memory(void *fdt, int root_off)
        /* if the user says there's more RAM than we thought, believe them */
        physical_memsize = max_t(unsigned long, physical_memsize, memsize);
 
+       /* detect the memory map in use */
+       if (malta_scon() == MIPS_REVISION_SCON_ROCIT) {
+               /* ROCit has a register indicating the memory map in use */
+               config = readl((void __iomem *)CKSEG1ADDR(ROCIT_CONFIG_GEN1));
+               mem_map = config & ROCIT_CONFIG_GEN1_MEMMAP_MASK;
+               mem_map >>= ROCIT_CONFIG_GEN1_MEMMAP_SHIFT;
+       } else {
+               /* if not using ROCit, presume the v1 memory map */
+               mem_map = MEM_MAP_V1;
+       }
+       if (mem_map > MEM_MAP_V2)
+               panic("Unsupported physical memory map v%u detected",
+                     (unsigned int)mem_map);
+
        /* append memory to the DT */
        mem_off = fdt_add_subnode(fdt, root_off, "memory");
        if (mem_off < 0)
@@ -115,19 +215,93 @@ static void __init append_memory(void *fdt, int root_off)
        if (err)
                panic("Unable to set memory node device_type: %d", err);
 
-       mem_entries = gen_fdt_mem_array(mem_array, physical_memsize);
+       mem_entries = gen_fdt_mem_array(mem_array, physical_memsize, mem_map);
        err = fdt_setprop(fdt, mem_off, "reg", mem_array,
                          mem_entries * 2 * sizeof(mem_array[0]));
        if (err)
                panic("Unable to set memory regs property: %d", err);
 
-       mem_entries = gen_fdt_mem_array(mem_array, memsize);
+       mem_entries = gen_fdt_mem_array(mem_array, memsize, mem_map);
        err = fdt_setprop(fdt, mem_off, "linux,usable-memory", mem_array,
                          mem_entries * 2 * sizeof(mem_array[0]));
        if (err)
                panic("Unable to set linux,usable-memory property: %d", err);
 }
 
+static void __init remove_gic(void *fdt)
+{
+       int err, gic_off, i8259_off, cpu_off;
+       void __iomem *biu_base;
+       uint32_t cpu_phandle, sc_cfg;
+
+       /* if we have a CM which reports a GIC is present, leave the DT alone */
+       err = mips_cm_probe();
+       if (!err && (read_gcr_gic_status() & CM_GCR_GIC_STATUS_GICEX_MSK))
+               return;
+
+       if (malta_scon() == MIPS_REVISION_SCON_ROCIT) {
+               /*
+                * On systems using the RocIT system controller a GIC may be
+                * present without a CM. Detect whether that is the case.
+                */
+               biu_base = ioremap_nocache(MSC01_BIU_REG_BASE,
+                               MSC01_BIU_ADDRSPACE_SZ);
+               sc_cfg = __raw_readl(biu_base + MSC01_SC_CFG_OFS);
+               if (sc_cfg & MSC01_SC_CFG_GICPRES_MSK) {
+                       /* enable the GIC at the system controller level */
+                       sc_cfg |= BIT(MSC01_SC_CFG_GICENA_SHF);
+                       __raw_writel(sc_cfg, biu_base + MSC01_SC_CFG_OFS);
+                       return;
+               }
+       }
+
+       gic_off = fdt_node_offset_by_compatible(fdt, -1, "mti,gic");
+       if (gic_off < 0) {
+               pr_warn("malta-dtshim: unable to find DT GIC node: %d\n",
+                       gic_off);
+               return;
+       }
+
+       err = fdt_nop_node(fdt, gic_off);
+       if (err)
+               pr_warn("malta-dtshim: unable to nop GIC node\n");
+
+       i8259_off = fdt_node_offset_by_compatible(fdt, -1, "intel,i8259");
+       if (i8259_off < 0) {
+               pr_warn("malta-dtshim: unable to find DT i8259 node: %d\n",
+                       i8259_off);
+               return;
+       }
+
+       cpu_off = fdt_node_offset_by_compatible(fdt, -1,
+                       "mti,cpu-interrupt-controller");
+       if (cpu_off < 0) {
+               pr_warn("malta-dtshim: unable to find CPU intc node: %d\n",
+                       cpu_off);
+               return;
+       }
+
+       cpu_phandle = fdt_get_phandle(fdt, cpu_off);
+       if (!cpu_phandle) {
+               pr_warn("malta-dtshim: unable to get CPU intc phandle\n");
+               return;
+       }
+
+       err = fdt_setprop_u32(fdt, i8259_off, "interrupt-parent", cpu_phandle);
+       if (err) {
+               pr_warn("malta-dtshim: unable to set i8259 interrupt-parent: %d\n",
+                       err);
+               return;
+       }
+
+       err = fdt_setprop_u32(fdt, i8259_off, "interrupts", 2);
+       if (err) {
+               pr_warn("malta-dtshim: unable to set i8259 interrupts: %d\n",
+                       err);
+               return;
+       }
+}
+
 void __init *malta_dt_shim(void *fdt)
 {
        int root_off, len, err;
@@ -153,6 +327,7 @@ void __init *malta_dt_shim(void *fdt)
                return fdt;
 
        append_memory(fdt_buf, root_off);
+       remove_gic(fdt_buf);
 
        err = fdt_pack(fdt_buf);
        if (err)
index dc2c5214809d38703a7048babe7a16f83d77f28a..0f3b881a3190fdcb993ea01234a2c304b1511f18 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/init.h>
 #include <linux/string.h>
 #include <linux/kernel.h>
+#include <linux/pci_regs.h>
 #include <linux/serial_core.h>
 
 #include <asm/cacheflush.h>
@@ -242,23 +243,19 @@ mips_pci_controller:
                          MSC01_PCI_SWAP_BYTESWAP << MSC01_PCI_SWAP_MEM_SHF |
                          MSC01_PCI_SWAP_BYTESWAP << MSC01_PCI_SWAP_BAR0_SHF);
 #endif
-#ifndef CONFIG_EVA
-               /* Fix up target memory mapping.  */
-               MSC_READ(MSC01_PCI_BAR0, mask);
-               MSC_WRITE(MSC01_PCI_P2SCMSKL, mask & MSC01_PCI_BAR0_SIZE_MSK);
-#else
+
                /*
                 * Setup the Malta max (2GB) memory for PCI DMA in host bridge
-                * in transparent addressing mode, starting from 0x80000000.
+                * in transparent addressing mode.
                 */
-               mask = PHYS_OFFSET | (1<<3);
+               mask = PHYS_OFFSET | PCI_BASE_ADDRESS_MEM_PREFETCH;
                MSC_WRITE(MSC01_PCI_BAR0, mask);
-
-               mask = PHYS_OFFSET;
                MSC_WRITE(MSC01_PCI_HEAD4, mask);
+
+               mask &= MSC01_PCI_BAR0_SIZE_MSK;
                MSC_WRITE(MSC01_PCI_P2SCMSKL, mask);
                MSC_WRITE(MSC01_PCI_P2SCMAPL, mask);
-#endif
+
                /* Don't handle target retries indefinitely.  */
                if ((data & MSC01_PCI_CFG_MAXRTRY_MSK) ==
                    MSC01_PCI_CFG_MAXRTRY_MSK)
index c6a6c7afddab41f00475bce99e53f861dc10db58..cb675ec6f283ee9d08071e9b112845bddd5c1594 100644 (file)
  */
 #include <linux/init.h>
 #include <linux/irq.h>
+#include <linux/irqchip.h>
 #include <linux/sched.h>
 #include <linux/smp.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/irqchip/mips-gic.h>
+#include <linux/of_irq.h>
 #include <linux/kernel_stat.h>
 #include <linux/kernel.h>
 #include <linux/random.h>
 #include <asm/setup.h>
 #include <asm/rtlx.h>
 
-static void __iomem *_msc01_biu_base;
-
-static DEFINE_RAW_SPINLOCK(mips_irq_lock);
-
 static inline int mips_pcibios_iack(void)
 {
        int irq;
@@ -85,49 +83,6 @@ static inline int mips_pcibios_iack(void)
        return irq;
 }
 
-static inline int get_int(void)
-{
-       unsigned long flags;
-       int irq;
-       raw_spin_lock_irqsave(&mips_irq_lock, flags);
-
-       irq = mips_pcibios_iack();
-
-       /*
-        * The only way we can decide if an interrupt is spurious
-        * is by checking the 8259 registers.  This needs a spinlock
-        * on an SMP system,  so leave it up to the generic code...
-        */
-
-       raw_spin_unlock_irqrestore(&mips_irq_lock, flags);
-
-       return irq;
-}
-
-static void malta_hw0_irqdispatch(void)
-{
-       int irq;
-
-       irq = get_int();
-       if (irq < 0) {
-               /* interrupt has already been cleared */
-               return;
-       }
-
-       do_IRQ(MALTA_INT_BASE + irq);
-
-#ifdef CONFIG_MIPS_VPE_APSP_API_MT
-       if (aprp_hook)
-               aprp_hook();
-#endif
-}
-
-static irqreturn_t i8259_handler(int irq, void *dev_id)
-{
-       malta_hw0_irqdispatch();
-       return IRQ_HANDLED;
-}
-
 static void corehi_irqdispatch(void)
 {
        unsigned int intedge, intsteer, pcicmd, pcibadaddr;
@@ -240,12 +195,6 @@ static struct irqaction irq_call = {
 };
 #endif /* CONFIG_MIPS_MT_SMP */
 
-static struct irqaction i8259irq = {
-       .handler = i8259_handler,
-       .name = "XT-PIC cascade",
-       .flags = IRQF_NO_THREAD,
-};
-
 static struct irqaction corehi_irqaction = {
        .handler = corehi_handler,
        .name = "CoreHi",
@@ -281,28 +230,10 @@ void __init arch_init_ipiirq(int irq, struct irqaction *action)
 
 void __init arch_init_irq(void)
 {
-       int corehi_irq, i8259_irq;
-
-       init_i8259_irqs();
+       int corehi_irq;
 
-       if (!cpu_has_veic)
-               mips_cpu_irq_init();
-
-       if (mips_cm_present()) {
-               write_gcr_gic_base(GIC_BASE_ADDR | CM_GCR_GIC_BASE_GICEN_MSK);
-               gic_present = 1;
-       } else {
-               if (mips_revision_sconid == MIPS_REVISION_SCON_ROCIT) {
-                       _msc01_biu_base = ioremap_nocache(MSC01_BIU_REG_BASE,
-                                               MSC01_BIU_ADDRSPACE_SZ);
-                       gic_present =
-                         (__raw_readl(_msc01_biu_base + MSC01_SC_CFG_OFS) &
-                          MSC01_SC_CFG_GICPRES_MSK) >>
-                         MSC01_SC_CFG_GICPRES_SHF;
-               }
-       }
-       if (gic_present)
-               pr_debug("GIC present\n");
+       i8259_set_poll(mips_pcibios_iack);
+       irqchip_init();
 
        switch (mips_revision_sconid) {
        case MIPS_REVISION_SCON_SOCIT:
@@ -330,18 +261,6 @@ void __init arch_init_irq(void)
        }
 
        if (gic_present) {
-               int i;
-
-               gic_init(GIC_BASE_ADDR, GIC_ADDRSPACE_SZ, MIPSCPU_INT_GIC,
-                        MIPS_GIC_IRQ_BASE);
-               if (!mips_cm_present()) {
-                       /* Enable the GIC */
-                       i = __raw_readl(_msc01_biu_base + MSC01_SC_CFG_OFS);
-                       __raw_writel(i | (0x1 << MSC01_SC_CFG_GICENA_SHF),
-                                _msc01_biu_base + MSC01_SC_CFG_OFS);
-                       pr_debug("GIC Enabled\n");
-               }
-               i8259_irq = MIPS_GIC_IRQ_BASE + GIC_INT_I8259A;
                corehi_irq = MIPS_CPU_IRQ_BASE + MIPSCPU_INT_COREHI;
        } else {
 #if defined(CONFIG_MIPS_MT_SMP)
@@ -361,33 +280,13 @@ void __init arch_init_irq(void)
                arch_init_ipiirq(cpu_ipi_call_irq, &irq_call);
 #endif
                if (cpu_has_veic) {
-                       set_vi_handler(MSC01E_INT_I8259A,
-                                      malta_hw0_irqdispatch);
                        set_vi_handler(MSC01E_INT_COREHI,
                                       corehi_irqdispatch);
-                       i8259_irq = MSC01E_INT_BASE + MSC01E_INT_I8259A;
                        corehi_irq = MSC01E_INT_BASE + MSC01E_INT_COREHI;
                } else {
-                       i8259_irq = MIPS_CPU_IRQ_BASE + MIPSCPU_INT_I8259A;
                        corehi_irq = MIPS_CPU_IRQ_BASE + MIPSCPU_INT_COREHI;
                }
        }
 
-       setup_irq(i8259_irq, &i8259irq);
        setup_irq(corehi_irq, &corehi_irqaction);
 }
-
-void malta_be_init(void)
-{
-       /* Could change CM error mask register. */
-}
-
-int malta_be_handler(struct pt_regs *regs, int is_fixup)
-{
-       /* This duplicates the handling in do_be which seems wrong */
-       int retval = is_fixup ? MIPS_BE_FIXUP : MIPS_BE_FATAL;
-
-       mips_cm_error_report();
-
-       return retval;
-}
index e1dd1c1d3fdeed9f5214dc18ec4cb7f2c5593279..516e1233d771cb3cd87cde8e73ef6f4d296e861b 100644 (file)
  */
 #include <linux/init.h>
 #include <linux/serial_8250.h>
-#include <linux/mc146818rtc.h>
 #include <linux/module.h>
 #include <linux/irq.h>
-#include <linux/mtd/partitions.h>
-#include <linux/mtd/physmap.h>
 #include <linux/platform_device.h>
 #include <asm/mips-boards/maltaint.h>
-#include <mtd/mtd-abi.h>
 
 #define SMC_PORT(base, int)                                            \
 {                                                                      \
@@ -68,80 +64,13 @@ static struct platform_device malta_uart8250_device = {
        },
 };
 
-struct resource malta_rtc_resources[] = {
-       {
-               .start  = RTC_PORT(0),
-               .end    = RTC_PORT(7),
-               .flags  = IORESOURCE_IO,
-       }, {
-               .start  = RTC_IRQ,
-               .end    = RTC_IRQ,
-               .flags  = IORESOURCE_IRQ,
-       }
-};
-
-static struct platform_device malta_rtc_device = {
-       .name           = "rtc_cmos",
-       .id             = -1,
-       .resource       = malta_rtc_resources,
-       .num_resources  = ARRAY_SIZE(malta_rtc_resources),
-};
-
-static struct mtd_partition malta_mtd_partitions[] = {
-       {
-               .name =         "YAMON",
-               .offset =       0x0,
-               .size =         0x100000,
-               .mask_flags =   MTD_WRITEABLE
-       }, {
-               .name =         "User FS",
-               .offset =       0x100000,
-               .size =         0x2e0000
-       }, {
-               .name =         "Board Config",
-               .offset =       0x3e0000,
-               .size =         0x020000,
-               .mask_flags =   MTD_WRITEABLE
-       }
-};
-
-static struct physmap_flash_data malta_flash_data = {
-       .width          = 4,
-       .nr_parts       = ARRAY_SIZE(malta_mtd_partitions),
-       .parts          = malta_mtd_partitions
-};
-
-static struct resource malta_flash_resource = {
-       .start          = 0x1e000000,
-       .end            = 0x1e3fffff,
-       .flags          = IORESOURCE_MEM
-};
-
-static struct platform_device malta_flash_device = {
-       .name           = "physmap-flash",
-       .id             = 0,
-       .dev            = {
-               .platform_data  = &malta_flash_data,
-       },
-       .num_resources  = 1,
-       .resource       = &malta_flash_resource,
-};
-
 static struct platform_device *malta_devices[] __initdata = {
        &malta_uart8250_device,
-       &malta_rtc_device,
-       &malta_flash_device,
 };
 
 static int __init malta_add_devices(void)
 {
-       int err;
-
-       err = platform_add_devices(malta_devices, ARRAY_SIZE(malta_devices));
-       if (err)
-               return err;
-
-       return 0;
+       return platform_add_devices(malta_devices, ARRAY_SIZE(malta_devices));
 }
 
 device_initcall(malta_add_devices);
index 2fd2cc2c5034fbf1ddd59ffec163fab76187aa4a..dd6f62ad4417225cf92b63bd6ff7a886c5f01c97 100644 (file)
@@ -8,38 +8,21 @@
  */
 #include <linux/io.h>
 #include <linux/pm.h>
+#include <linux/reboot.h>
 
 #include <asm/reboot.h>
 #include <asm/mach-malta/malta-pm.h>
 
-#define SOFTRES_REG    0x1f000500
-#define GORESET                0x42
-
-static void mips_machine_restart(char *command)
-{
-       unsigned int __iomem *softres_reg =
-               ioremap(SOFTRES_REG, sizeof(unsigned int));
-
-       __raw_writel(GORESET, softres_reg);
-}
-
-static void mips_machine_halt(void)
-{
-       while (true);
-}
-
 static void mips_machine_power_off(void)
 {
        mips_pm_suspend(PIIX4_FUNC3IO_PMCNTRL_SUS_TYP_SOFF);
 
        pr_info("Failed to power down, resetting\n");
-       mips_machine_restart(NULL);
+       machine_restart(NULL);
 }
 
 static int __init mips_reboot_setup(void)
 {
-       _machine_restart = mips_machine_restart;
-       _machine_halt = mips_machine_halt;
        pm_power_off = mips_machine_power_off;
 
        return 0;
index 7e7364b0501edc33f1746962dc9b3796000d3c69..a01d5debfcaf5578a30b6114721db82a1c392e19 100644 (file)
@@ -42,9 +42,6 @@
 #define ROCIT_CONFIG_GEN0              0x1f403000
 #define  ROCIT_CONFIG_GEN0_PCI_IOCU    BIT(7)
 
-extern void malta_be_init(void);
-extern int malta_be_handler(struct pt_regs *regs, int is_fixup);
-
 static struct resource standard_io_resources[] = {
        {
                .name = "dma1",
@@ -154,12 +151,12 @@ static void __init plat_setup_iocoherency(void)
         * coherency instead.
         */
        if (plat_enable_iocoherency()) {
-               if (coherentio == 0)
+               if (coherentio == IO_COHERENCE_DISABLED)
                        pr_info("Hardware DMA cache coherency disabled\n");
                else
                        pr_info("Hardware DMA cache coherency enabled\n");
        } else {
-               if (coherentio == 1)
+               if (coherentio == IO_COHERENCE_ENABLED)
                        pr_info("Hardware DMA cache coherency unsupported, but enabled from command line!\n");
                else
                        pr_info("Software DMA cache coherency enabled\n");
@@ -301,7 +298,4 @@ void __init plat_mem_setup(void)
 #if defined(CONFIG_VT) && defined(CONFIG_VGA_CONSOLE)
        screen_info_setup();
 #endif
-
-       board_be_init = malta_be_init;
-       board_be_handler = malta_be_handler;
 }
diff --git a/arch/mips/mti-sead3/Makefile b/arch/mips/mti-sead3/Makefile
deleted file mode 100644 (file)
index 7a584e0..0000000
+++ /dev/null
@@ -1,15 +0,0 @@
-#
-# Carsten Langgaard, carstenl@mips.com
-# Copyright (C) 1999,2000 MIPS Technologies, Inc.  All rights reserved.
-#
-# Copyright (C) 2008 Wind River Systems, Inc.
-#   written by Ralf Baechle <ralf@linux-mips.org>
-#
-# Copyright (C) 2012 MIPS Technoligies, Inc.  All rights reserved.
-# Steven J. Hill <sjhill@mips.com>
-#
-obj-y                          := sead3-lcd.o sead3-display.o sead3-init.o \
-                                  sead3-int.o sead3-platform.o sead3-reset.o \
-                                  sead3-setup.o sead3-time.o
-
-obj-$(CONFIG_EARLY_PRINTK)     += sead3-console.o
diff --git a/arch/mips/mti-sead3/Platform b/arch/mips/mti-sead3/Platform
deleted file mode 100644 (file)
index 3870924..0000000
+++ /dev/null
@@ -1,7 +0,0 @@
-#
-# MIPS SEAD-3 board
-#
-platform-$(CONFIG_MIPS_SEAD3)  += mti-sead3/
-cflags-$(CONFIG_MIPS_SEAD3)    += -I$(srctree)/arch/mips/include/asm/mach-sead3
-load-$(CONFIG_MIPS_SEAD3)      += 0xffffffff80100000
-all-$(CONFIG_MIPS_SEAD3)       := $(COMPRESSION_FNAME).srec
diff --git a/arch/mips/mti-sead3/sead3-console.c b/arch/mips/mti-sead3/sead3-console.c
deleted file mode 100644 (file)
index 031f47d..0000000
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2012 MIPS Technologies, Inc.  All rights reserved.
- */
-#include <linux/init.h>
-#include <linux/console.h>
-#include <linux/serial_reg.h>
-#include <linux/io.h>
-
-#define SEAD_UART1_REGS_BASE   0xbf000800   /* ttyS1 = DB9 port */
-#define SEAD_UART0_REGS_BASE   0xbf000900   /* ttyS0 = USB port   */
-#define PORT(base_addr, offset) ((unsigned int __iomem *)(base_addr+(offset)*4))
-
-static char console_port = 1;
-
-static inline unsigned int serial_in(int offset, unsigned int base_addr)
-{
-       return __raw_readl(PORT(base_addr, offset)) & 0xff;
-}
-
-static inline void serial_out(int offset, int value, unsigned int base_addr)
-{
-       __raw_writel(value, PORT(base_addr, offset));
-}
-
-void __init fw_init_early_console(char port)
-{
-       console_port = port;
-}
-
-int prom_putchar(char c)
-{
-       unsigned int base_addr;
-
-       base_addr = console_port ? SEAD_UART1_REGS_BASE : SEAD_UART0_REGS_BASE;
-
-       while ((serial_in(UART_LSR, base_addr) & UART_LSR_THRE) == 0)
-               ;
-
-       serial_out(UART_TX, c, base_addr);
-
-       return 1;
-}
diff --git a/arch/mips/mti-sead3/sead3-display.c b/arch/mips/mti-sead3/sead3-display.c
deleted file mode 100644 (file)
index 9487599..0000000
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2012 MIPS Technologies, Inc.  All rights reserved.
- */
-#include <linux/timer.h>
-#include <linux/io.h>
-#include <asm/mips-boards/generic.h>
-
-static unsigned int display_count;
-static unsigned int max_display_count;
-
-#define LCD_DISPLAY_POS_BASE           0x1f000400
-#define DISPLAY_LCDINSTRUCTION         (0*2)
-#define DISPLAY_LCDDATA                        (1*2)
-#define DISPLAY_CPLDSTATUS             (2*2)
-#define DISPLAY_CPLDDATA               (3*2)
-#define LCD_SETDDRAM                   0x80
-#define LCD_IR_BF                      0x80
-
-const char display_string[] = "                      LINUX ON SEAD3               ";
-
-static void scroll_display_message(unsigned long data);
-static DEFINE_TIMER(mips_scroll_timer, scroll_display_message, HZ, 0);
-
-static void lcd_wait(unsigned int __iomem *display)
-{
-       /* Wait for CPLD state machine to become idle. */
-       do { } while (__raw_readl(display + DISPLAY_CPLDSTATUS) & 1);
-
-       do {
-               __raw_readl(display + DISPLAY_LCDINSTRUCTION);
-
-               /* Wait for CPLD state machine to become idle. */
-               do { } while (__raw_readl(display + DISPLAY_CPLDSTATUS) & 1);
-       } while (__raw_readl(display + DISPLAY_CPLDDATA) & LCD_IR_BF);
-}
-
-void mips_display_message(const char *str)
-{
-       static unsigned int __iomem *display;
-       char ch;
-       int i;
-
-       if (unlikely(display == NULL))
-               display = ioremap_nocache(LCD_DISPLAY_POS_BASE,
-                       (8 * sizeof(int)));
-
-       for (i = 0; i < 16; i++) {
-               if (*str)
-                       ch = *str++;
-               else
-                       ch = ' ';
-               lcd_wait(display);
-               __raw_writel((LCD_SETDDRAM | i),
-                       (display + DISPLAY_LCDINSTRUCTION));
-               lcd_wait(display);
-               __raw_writel(ch, display + DISPLAY_LCDDATA);
-       }
-}
-
-static void scroll_display_message(unsigned long data)
-{
-       mips_display_message(&display_string[display_count++]);
-       if (display_count == max_display_count)
-               display_count = 0;
-       mod_timer(&mips_scroll_timer, jiffies + HZ);
-}
-
-void mips_scroll_message(void)
-{
-       del_timer_sync(&mips_scroll_timer);
-       max_display_count = strlen(display_string) + 1 - 16;
-       mod_timer(&mips_scroll_timer, jiffies + 1);
-}
diff --git a/arch/mips/mti-sead3/sead3-init.c b/arch/mips/mti-sead3/sead3-init.c
deleted file mode 100644 (file)
index 3572ea3..0000000
+++ /dev/null
@@ -1,152 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2012 MIPS Technologies, Inc.  All rights reserved.
- */
-#include <linux/init.h>
-#include <linux/io.h>
-
-#include <asm/bootinfo.h>
-#include <asm/cacheflush.h>
-#include <asm/traps.h>
-#include <asm/mips-boards/generic.h>
-#include <asm/fw/fw.h>
-
-extern char except_vec_nmi;
-extern char except_vec_ejtag_debug;
-
-#ifdef CONFIG_SERIAL_8250_CONSOLE
-static void __init console_config(void)
-{
-       char console_string[40];
-       int baud = 0;
-       char parity = '\0', bits = '\0', flow = '\0';
-       char *s;
-
-       if ((strstr(fw_getcmdline(), "console=")) == NULL) {
-               s = fw_getenv("modetty0");
-               if (s) {
-                       while (*s >= '0' && *s <= '9')
-                               baud = baud*10 + *s++ - '0';
-                       if (*s == ',')
-                               s++;
-                       if (*s)
-                               parity = *s++;
-                       if (*s == ',')
-                               s++;
-                       if (*s)
-                               bits = *s++;
-                       if (*s == ',')
-                               s++;
-                       if (*s == 'h')
-                               flow = 'r';
-               }
-               if (baud == 0)
-                       baud = 38400;
-               if (parity != 'n' && parity != 'o' && parity != 'e')
-                       parity = 'n';
-               if (bits != '7' && bits != '8')
-                       bits = '8';
-               if (flow == '\0')
-                       flow = 'r';
-               sprintf(console_string, " console=ttyS0,%d%c%c%c", baud,
-                       parity, bits, flow);
-               strcat(fw_getcmdline(), console_string);
-       }
-}
-#endif
-
-static void __init mips_nmi_setup(void)
-{
-       void *base;
-
-       base = cpu_has_veic ?
-               (void *)(CAC_BASE + 0xa80) :
-               (void *)(CAC_BASE + 0x380);
-#ifdef CONFIG_CPU_MICROMIPS
-       /*
-        * Decrement the exception vector address by one for microMIPS.
-        */
-       memcpy(base, (&except_vec_nmi - 1), 0x80);
-
-       /*
-        * This is a hack. We do not know if the boot loader was built with
-        * microMIPS instructions or not. If it was not, the NMI exception
-        * code at 0x80000a80 will be taken in MIPS32 mode. The hand coded
-        * assembly below forces us into microMIPS mode if we are a pure
-        * microMIPS kernel. The assembly instructions are:
-        *
-        *  3C1A8000   lui       k0,0x8000
-        *  375A0381   ori       k0,k0,0x381
-        *  03400008   jr        k0
-        *  00000000   nop
-        *
-        * The mode switch occurs by jumping to the unaligned exception
-        * vector address at 0x80000381 which would have been 0x80000380
-        * in MIPS32 mode. The jump to the unaligned address transitions
-        * us into microMIPS mode.
-        */
-       if (!cpu_has_veic) {
-               void *base2 = (void *)(CAC_BASE + 0xa80);
-               *((unsigned int *)base2) = 0x3c1a8000;
-               *((unsigned int *)base2 + 1) = 0x375a0381;
-               *((unsigned int *)base2 + 2) = 0x03400008;
-               *((unsigned int *)base2 + 3) = 0x00000000;
-               flush_icache_range((unsigned long)base2,
-                       (unsigned long)base2 + 0x10);
-       }
-#else
-       memcpy(base, &except_vec_nmi, 0x80);
-#endif
-       flush_icache_range((unsigned long)base, (unsigned long)base + 0x80);
-}
-
-static void __init mips_ejtag_setup(void)
-{
-       void *base;
-
-       base = cpu_has_veic ?
-               (void *)(CAC_BASE + 0xa00) :
-               (void *)(CAC_BASE + 0x300);
-#ifdef CONFIG_CPU_MICROMIPS
-       /* Deja vu... */
-       memcpy(base, (&except_vec_ejtag_debug - 1), 0x80);
-       if (!cpu_has_veic) {
-               void *base2 = (void *)(CAC_BASE + 0xa00);
-               *((unsigned int *)base2) = 0x3c1a8000;
-               *((unsigned int *)base2 + 1) = 0x375a0301;
-               *((unsigned int *)base2 + 2) = 0x03400008;
-               *((unsigned int *)base2 + 3) = 0x00000000;
-               flush_icache_range((unsigned long)base2,
-                       (unsigned long)base2 + 0x10);
-       }
-#else
-       memcpy(base, &except_vec_ejtag_debug, 0x80);
-#endif
-       flush_icache_range((unsigned long)base, (unsigned long)base + 0x80);
-}
-
-void __init prom_init(void)
-{
-       board_nmi_handler_setup = mips_nmi_setup;
-       board_ejtag_handler_setup = mips_ejtag_setup;
-
-       fw_init_cmdline();
-#ifdef CONFIG_EARLY_PRINTK
-       if ((strstr(fw_getcmdline(), "console=ttyS0")) != NULL)
-               fw_init_early_console(0);
-       else if ((strstr(fw_getcmdline(), "console=ttyS1")) != NULL)
-               fw_init_early_console(1);
-#endif
-#ifdef CONFIG_SERIAL_8250_CONSOLE
-       if ((strstr(fw_getcmdline(), "console=")) == NULL)
-               strcat(fw_getcmdline(), " console=ttyS0,38400n8r");
-       console_config();
-#endif
-}
-
-void __init prom_free_prom_memory(void)
-{
-}
diff --git a/arch/mips/mti-sead3/sead3-int.c b/arch/mips/mti-sead3/sead3-int.c
deleted file mode 100644 (file)
index e31e17f..0000000
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2012 MIPS Technologies, Inc.  All rights reserved.
- */
-#include <linux/init.h>
-#include <linux/irq.h>
-#include <linux/irqchip/mips-gic.h>
-#include <linux/io.h>
-
-#include <asm/irq_cpu.h>
-#include <asm/setup.h>
-
-#include <asm/mips-boards/sead3int.h>
-
-#define SEAD_CONFIG_GIC_PRESENT_SHF    1
-#define SEAD_CONFIG_GIC_PRESENT_MSK    (1 << SEAD_CONFIG_GIC_PRESENT_SHF)
-#define SEAD_CONFIG_BASE               0x1b100110
-#define SEAD_CONFIG_SIZE               4
-
-static void __iomem *sead3_config_reg;
-
-void __init arch_init_irq(void)
-{
-       if (!cpu_has_veic)
-               mips_cpu_irq_init();
-
-       sead3_config_reg = ioremap_nocache(SEAD_CONFIG_BASE, SEAD_CONFIG_SIZE);
-       gic_present = (__raw_readl(sead3_config_reg) &
-                      SEAD_CONFIG_GIC_PRESENT_MSK) >>
-               SEAD_CONFIG_GIC_PRESENT_SHF;
-       pr_info("GIC: %spresent\n", (gic_present) ? "" : "not ");
-       pr_info("EIC: %s\n",
-               (current_cpu_data.options & MIPS_CPU_VEIC) ?  "on" : "off");
-
-       if (gic_present)
-               gic_init(GIC_BASE_ADDR, GIC_ADDRSPACE_SZ, CPU_INT_GIC,
-                        MIPS_GIC_IRQ_BASE);
-}
-
diff --git a/arch/mips/mti-sead3/sead3-lcd.c b/arch/mips/mti-sead3/sead3-lcd.c
deleted file mode 100644 (file)
index 10b10ed..0000000
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2012 MIPS Technologies, Inc.  All rights reserved.
- */
-#include <linux/init.h>
-#include <linux/platform_device.h>
-
-static struct resource __initdata sead3_lcd_resource = {
-               .start  = 0x1f000400,
-               .end    = 0x1f00041f,
-               .flags  = IORESOURCE_MEM,
-};
-
-static __init int sead3_lcd_add(void)
-{
-       struct platform_device *pdev;
-       int retval;
-
-       /* SEAD-3 and Cobalt platforms use same display type. */
-       pdev = platform_device_alloc("cobalt-lcd", -1);
-       if (!pdev)
-               return -ENOMEM;
-
-       retval = platform_device_add_resources(pdev, &sead3_lcd_resource, 1);
-       if (retval)
-               goto err_free_device;
-
-       retval = platform_device_add(pdev);
-       if (retval)
-               goto err_free_device;
-
-       return 0;
-
-err_free_device:
-       platform_device_put(pdev);
-
-       return retval;
-}
-
-device_initcall(sead3_lcd_add);
diff --git a/arch/mips/mti-sead3/sead3-platform.c b/arch/mips/mti-sead3/sead3-platform.c
deleted file mode 100644 (file)
index 73b73ef..0000000
+++ /dev/null
@@ -1,223 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2012 MIPS Technologies, Inc.  All rights reserved.
- */
-#include <linux/dma-mapping.h>
-#include <linux/init.h>
-#include <linux/irq.h>
-#include <linux/irqchip/mips-gic.h>
-#include <linux/leds.h>
-#include <linux/mtd/physmap.h>
-#include <linux/platform_device.h>
-#include <linux/serial_8250.h>
-#include <linux/smsc911x.h>
-
-#include <asm/mips-boards/sead3int.h>
-
-#define UART(base)                                                     \
-{                                                                      \
-       .mapbase        = base,                                         \
-       .irq            = -1,                                           \
-       .uartclk        = 14745600,                                     \
-       .iotype         = UPIO_MEM32,                                   \
-       .flags          = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST | UPF_IOREMAP, \
-       .regshift       = 2,                                            \
-}
-
-static struct plat_serial8250_port uart8250_data[] = {
-       UART(0x1f000900),   /* ttyS0 = USB   */
-       UART(0x1f000800),   /* ttyS1 = RS232 */
-       { },
-};
-
-static struct platform_device uart8250_device = {
-       .name                   = "serial8250",
-       .id                     = PLAT8250_DEV_PLATFORM2,
-       .dev                    = {
-               .platform_data  = uart8250_data,
-       },
-};
-
-static struct smsc911x_platform_config sead3_smsc911x_data = {
-       .irq_polarity   = SMSC911X_IRQ_POLARITY_ACTIVE_LOW,
-       .irq_type       = SMSC911X_IRQ_TYPE_PUSH_PULL,
-       .flags          = SMSC911X_USE_32BIT | SMSC911X_SAVE_MAC_ADDRESS,
-       .phy_interface  = PHY_INTERFACE_MODE_MII,
-};
-
-static struct resource sead3_net_resources[] = {
-       {
-               .start                  = 0x1f010000,
-               .end                    = 0x1f01ffff,
-               .flags                  = IORESOURCE_MEM
-       }, {
-               .flags                  = IORESOURCE_IRQ
-       }
-};
-
-static struct platform_device sead3_net_device = {
-       .name                   = "smsc911x",
-       .id                     = 0,
-       .dev                    = {
-               .platform_data  = &sead3_smsc911x_data,
-       },
-       .num_resources          = ARRAY_SIZE(sead3_net_resources),
-       .resource               = sead3_net_resources
-};
-
-static struct mtd_partition sead3_mtd_partitions[] = {
-       {
-               .name =         "User FS",
-               .offset =       0x00000000,
-               .size =         0x01fc0000,
-       }, {
-               .name =         "Board Config",
-               .offset =       0x01fc0000,
-               .size =         0x00040000,
-               .mask_flags =   MTD_WRITEABLE
-       },
-};
-
-static struct physmap_flash_data sead3_flash_data = {
-       .width          = 4,
-       .nr_parts       = ARRAY_SIZE(sead3_mtd_partitions),
-       .parts          = sead3_mtd_partitions
-};
-
-static struct resource sead3_flash_resource = {
-       .start          = 0x1c000000,
-       .end            = 0x1dffffff,
-       .flags          = IORESOURCE_MEM
-};
-
-static struct platform_device sead3_flash = {
-       .name           = "physmap-flash",
-       .id             = 0,
-       .dev            = {
-               .platform_data  = &sead3_flash_data,
-       },
-       .num_resources  = 1,
-       .resource       = &sead3_flash_resource,
-};
-
-#define LEDFLAGS(bits, shift)          \
-       ((bits << 8) | (shift << 8))
-
-#define LEDBITS(id, shift, bits)       \
-       .name = id #shift,              \
-       .flags = LEDFLAGS(bits, shift)
-
-static struct led_info led_data_info[] = {
-       { LEDBITS("bit", 0, 1) },
-       { LEDBITS("bit", 1, 1) },
-       { LEDBITS("bit", 2, 1) },
-       { LEDBITS("bit", 3, 1) },
-       { LEDBITS("bit", 4, 1) },
-       { LEDBITS("bit", 5, 1) },
-       { LEDBITS("bit", 6, 1) },
-       { LEDBITS("bit", 7, 1) },
-       { LEDBITS("all", 0, 8) },
-};
-
-static struct led_platform_data led_data = {
-       .num_leds       = ARRAY_SIZE(led_data_info),
-       .leds           = led_data_info
-};
-
-static struct resource pled_resources[] = {
-       {
-               .start  = 0x1f000210,
-               .end    = 0x1f000217,
-               .flags  = IORESOURCE_MEM
-       }
-};
-
-static struct platform_device pled_device = {
-       .name                   = "sead3::pled",
-       .id                     = 0,
-       .dev                    = {
-               .platform_data  = &led_data,
-       },
-       .num_resources          = ARRAY_SIZE(pled_resources),
-       .resource               = pled_resources
-};
-
-
-static struct resource fled_resources[] = {
-       {
-               .start                  = 0x1f000218,
-               .end                    = 0x1f00021f,
-               .flags                  = IORESOURCE_MEM
-       }
-};
-
-static struct platform_device fled_device = {
-       .name                   = "sead3::fled",
-       .id                     = 0,
-       .dev                    = {
-               .platform_data  = &led_data,
-       },
-       .num_resources          = ARRAY_SIZE(fled_resources),
-       .resource               = fled_resources
-};
-
-static struct platform_device sead3_led_device = {
-        .name   = "sead3-led",
-        .id     = -1,
-};
-
-static struct resource ehci_resources[] = {
-       {
-               .start                  = 0x1b200000,
-               .end                    = 0x1b200fff,
-               .flags                  = IORESOURCE_MEM
-       }, {
-               .flags                  = IORESOURCE_IRQ
-       }
-};
-
-static u64 sead3_usbdev_dma_mask = DMA_BIT_MASK(32);
-
-static struct platform_device ehci_device = {
-       .name           = "sead3-ehci",
-       .id             = 0,
-       .dev            = {
-               .dma_mask               = &sead3_usbdev_dma_mask,
-               .coherent_dma_mask      = DMA_BIT_MASK(32)
-       },
-       .num_resources  = ARRAY_SIZE(ehci_resources),
-       .resource       = ehci_resources
-};
-
-static struct platform_device *sead3_platform_devices[] __initdata = {
-       &uart8250_device,
-       &sead3_flash,
-       &pled_device,
-       &fled_device,
-       &sead3_led_device,
-       &ehci_device,
-       &sead3_net_device,
-};
-
-static int __init sead3_platforms_device_init(void)
-{
-       if (gic_present) {
-               uart8250_data[0].irq = MIPS_GIC_IRQ_BASE + GIC_INT_UART0;
-               uart8250_data[1].irq = MIPS_GIC_IRQ_BASE + GIC_INT_UART1;
-               ehci_resources[1].start = MIPS_GIC_IRQ_BASE + GIC_INT_EHCI;
-               sead3_net_resources[1].start = MIPS_GIC_IRQ_BASE + GIC_INT_NET;
-       } else {
-               uart8250_data[0].irq = MIPS_CPU_IRQ_BASE + CPU_INT_UART0;
-               uart8250_data[1].irq = MIPS_CPU_IRQ_BASE + CPU_INT_UART1;
-               ehci_resources[1].start = MIPS_CPU_IRQ_BASE + CPU_INT_EHCI;
-               sead3_net_resources[1].start = MIPS_CPU_IRQ_BASE + CPU_INT_NET;
-       }
-
-       return platform_add_devices(sead3_platform_devices,
-                                   ARRAY_SIZE(sead3_platform_devices));
-}
-
-device_initcall(sead3_platforms_device_init);
diff --git a/arch/mips/mti-sead3/sead3-reset.c b/arch/mips/mti-sead3/sead3-reset.c
deleted file mode 100644 (file)
index e6fb244..0000000
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2012 MIPS Technologies, Inc.  All rights reserved.
- */
-#include <linux/io.h>
-#include <linux/pm.h>
-
-#include <asm/reboot.h>
-
-#define SOFTRES_REG    0x1f000050
-#define GORESET                0x4d
-
-static void mips_machine_restart(char *command)
-{
-       unsigned int __iomem *softres_reg =
-               ioremap(SOFTRES_REG, sizeof(unsigned int));
-
-       __raw_writel(GORESET, softres_reg);
-}
-
-static void mips_machine_halt(void)
-{
-       unsigned int __iomem *softres_reg =
-               ioremap(SOFTRES_REG, sizeof(unsigned int));
-
-       __raw_writel(GORESET, softres_reg);
-}
-
-static int __init mips_reboot_setup(void)
-{
-       _machine_restart = mips_machine_restart;
-       _machine_halt = mips_machine_halt;
-       pm_power_off = mips_machine_halt;
-
-       return 0;
-}
-arch_initcall(mips_reboot_setup);
diff --git a/arch/mips/mti-sead3/sead3-setup.c b/arch/mips/mti-sead3/sead3-setup.c
deleted file mode 100644 (file)
index edfcaf0..0000000
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2012 MIPS Technologies, Inc.  All rights reserved.
- * Copyright (C) 2013 Imagination Technologies Ltd.
- */
-#include <linux/init.h>
-#include <linux/libfdt.h>
-#include <linux/of_fdt.h>
-
-#include <asm/prom.h>
-#include <asm/fw/fw.h>
-
-#include <asm/mips-boards/generic.h>
-
-const char *get_system_type(void)
-{
-       return "MIPS SEAD3";
-}
-
-static uint32_t get_memsize_from_cmdline(void)
-{
-       int memsize = 0;
-       char *p = arcs_cmdline;
-       char *s = "memsize=";
-
-       p = strstr(p, s);
-       if (p) {
-               p += strlen(s);
-               memsize = memparse(p, NULL);
-       }
-
-       return memsize;
-}
-
-static uint32_t get_memsize_from_env(void)
-{
-       int memsize = 0;
-       char *p;
-
-       p = fw_getenv("memsize");
-       if (p)
-               memsize = memparse(p, NULL);
-
-       return memsize;
-}
-
-static uint32_t get_memsize(void)
-{
-       uint32_t memsize;
-
-       memsize = get_memsize_from_cmdline();
-       if (memsize)
-               return memsize;
-
-       return get_memsize_from_env();
-}
-
-static void __init parse_memsize_param(void)
-{
-       int offset;
-       const uint64_t *prop_value;
-       int prop_len;
-       uint32_t memsize = get_memsize();
-
-       if (!memsize)
-               return;
-
-       offset = fdt_path_offset(__dtb_start, "/memory");
-       if (offset > 0) {
-               uint64_t new_value;
-               /*
-                * reg contains 2 32-bits BE values, offset and size. We just
-                * want to replace the size value without affecting the offset
-                */
-               prop_value = fdt_getprop(__dtb_start, offset, "reg", &prop_len);
-               new_value = be64_to_cpu(*prop_value);
-               new_value =  (new_value & ~0xffffffffllu) | memsize;
-               fdt_setprop_inplace_u64(__dtb_start, offset, "reg", new_value);
-       }
-}
-
-void __init *plat_get_fdt(void)
-{
-       return (void *)__dtb_start;
-}
-
-void __init plat_mem_setup(void)
-{
-       /* allow command line/bootloader env to override memory size in DT */
-       parse_memsize_param();
-
-       /*
-        * Load the builtin devicetree. This causes the chosen node to be
-        * parsed resulting in our memory appearing
-        */
-       __dt_setup_arch(__dtb_start);
-}
-
-void __init device_tree_init(void)
-{
-       if (!initial_boot_params)
-               return;
-
-       unflatten_and_copy_device_tree();
-}
diff --git a/arch/mips/mti-sead3/sead3-time.c b/arch/mips/mti-sead3/sead3-time.c
deleted file mode 100644 (file)
index a120b7a..0000000
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2012 MIPS Technologies, Inc.  All rights reserved.
- */
-#include <linux/init.h>
-#include <linux/irqchip/mips-gic.h>
-
-#include <asm/cpu.h>
-#include <asm/setup.h>
-#include <asm/time.h>
-#include <asm/irq.h>
-#include <asm/mips-boards/generic.h>
-
-static void __iomem *status_reg = (void __iomem *)0xbf000410;
-
-/*
- * Estimate CPU frequency.  Sets mips_hpt_frequency as a side-effect.
- */
-static unsigned int __init estimate_cpu_frequency(void)
-{
-       unsigned int prid = read_c0_prid() & (PRID_COMP_MASK | PRID_IMP_MASK);
-       unsigned int tick = 0;
-       unsigned int freq;
-       unsigned int orig;
-       unsigned long flags;
-
-       local_irq_save(flags);
-
-       orig = readl(status_reg) & 0x2;               /* get original sample */
-       /* wait for transition */
-       while ((readl(status_reg) & 0x2) == orig)
-               ;
-       orig = orig ^ 0x2;                            /* flip the bit */
-
-       write_c0_count(0);
-
-       /* wait 1 second (the sampling clock transitions every 10ms) */
-       while (tick < 100) {
-               /* wait for transition */
-               while ((readl(status_reg) & 0x2) == orig)
-                       ;
-               orig = orig ^ 0x2;                            /* flip the bit */
-               tick++;
-       }
-
-       freq = read_c0_count();
-
-       local_irq_restore(flags);
-
-       mips_hpt_frequency = freq;
-
-       /* Adjust for processor */
-       if ((prid != (PRID_COMP_MIPS | PRID_IMP_20KC)) &&
-               (prid != (PRID_COMP_MIPS | PRID_IMP_25KF)))
-               freq *= 2;
-
-       freq += 5000;        /* rounding */
-       freq -= freq%10000;
-
-       return freq ;
-}
-
-void read_persistent_clock(struct timespec *ts)
-{
-       ts->tv_sec = 0;
-       ts->tv_nsec = 0;
-}
-
-int get_c0_perfcount_int(void)
-{
-       if (gic_present)
-               return gic_get_c0_perfcount_int();
-       if (cp0_perfcount_irq >= 0)
-               return MIPS_CPU_IRQ_BASE + cp0_perfcount_irq;
-       return -1;
-}
-EXPORT_SYMBOL_GPL(get_c0_perfcount_int);
-
-unsigned int get_c0_compare_int(void)
-{
-       if (gic_present)
-               return gic_get_c0_compare_int();
-       return MIPS_CPU_IRQ_BASE + cp0_compare_irq;
-}
-
-void __init plat_time_init(void)
-{
-       unsigned int est_freq;
-
-       est_freq = estimate_cpu_frequency();
-
-       pr_debug("CPU frequency %d.%02d MHz\n", (est_freq / 1000000),
-               (est_freq % 1000000) * 100 / 1000000);
-
-       mips_scroll_message();
-}
index 139ad1d7ab5e3ce9dfa8aba6b507fe04e8b8d8b9..4b821481dd4432b11103ac30fb24fa2fece707ec 100644 (file)
@@ -3,6 +3,8 @@
 #
 
 obj-y                          += pci.o
+obj-$(CONFIG_PCI_DRIVERS_LEGACY)+= pci-legacy.o
+obj-$(CONFIG_PCI_DRIVERS_GENERIC)+= pci-generic.o
 
 #
 # PCI bus host bridge specific code
index c8994c156e2ddad2c0bde912c553f66015c8d757..e99ca7702d8ad81660ab011ae4c990c0fb954f96 100644 (file)
@@ -429,7 +429,8 @@ static int alchemy_pci_probe(struct platform_device *pdev)
 
        /* Au1500 revisions older than AD have borked coherent PCI */
        if ((alchemy_get_cputype() == ALCHEMY_CPU_AU1500) &&
-           (read_c0_prid() < 0x01030202) && !coherentio) {
+           (read_c0_prid() < 0x01030202) &&
+           (coherentio == IO_COHERENCE_DISABLED)) {
                val = __raw_readl(ctx->regs + PCI_REG_CONFIG);
                val |= PCI_CONFIG_NC;
                __raw_writel(val, ctx->regs + PCI_REG_CONFIG);
index 7db963deec737747de9f2e7dedcdf049a93a6115..bdf87b43633fe51694218eccc7e3e767eb3ba99d 100644 (file)
@@ -18,7 +18,7 @@
 #include <linux/pci.h>
 #include <linux/pci_regs.h>
 #include <linux/interrupt.h>
-#include <linux/module.h>
+#include <linux/init.h>
 #include <linux/platform_device.h>
 
 #include <asm/mach-ath79/ar71xx_regs.h>
index 2013dad700dfa9c38c7618067c8fb78fbd0f7350..1e23c8d587bdef78db97ee553439d1f1eb3fe3ba 100644 (file)
@@ -11,7 +11,7 @@
 
 #include <linux/irq.h>
 #include <linux/pci.h>
-#include <linux/module.h>
+#include <linux/init.h>
 #include <linux/platform_device.h>
 #include <asm/mach-ath79/ath79.h>
 #include <asm/mach-ath79/ar71xx_regs.h>
diff --git a/arch/mips/pci/pci-generic.c b/arch/mips/pci/pci-generic.c
new file mode 100644 (file)
index 0000000..dce304d
--- /dev/null
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2016 Imagination Technologies
+ * Author: Paul Burton <paul.burton@imgtec.com>
+ *
+ * pcibios_align_resource taken from arch/arm/kernel/bios32.c.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/pci.h>
+
+/*
+ * We need to avoid collisions with `mirrored' VGA ports
+ * and other strange ISA hardware, so we always want the
+ * addresses to be allocated in the 0x000-0x0ff region
+ * modulo 0x400.
+ *
+ * Why? Because some silly external IO cards only decode
+ * the low 10 bits of the IO address. The 0x00-0xff region
+ * is reserved for motherboard devices that decode all 16
+ * bits, so it's ok to allocate at, say, 0x2800-0x28ff,
+ * but we want to try to avoid allocating at 0x2900-0x2bff
+ * which might have be mirrored at 0x0100-0x03ff..
+ */
+resource_size_t pcibios_align_resource(void *data, const struct resource *res,
+                               resource_size_t size, resource_size_t align)
+{
+       struct pci_dev *dev = data;
+       resource_size_t start = res->start;
+       struct pci_host_bridge *host_bridge;
+
+       if (res->flags & IORESOURCE_IO && start & 0x300)
+               start = (start + 0x3ff) & ~0x3ff;
+
+       start = (start + align - 1) & ~(align - 1);
+
+       host_bridge = pci_find_host_bridge(dev->bus);
+
+       if (host_bridge->align_resource)
+               return host_bridge->align_resource(dev, res,
+                               start, size, align);
+
+       return start;
+}
+
+void pcibios_fixup_bus(struct pci_bus *bus)
+{
+       pci_read_bridge_bases(bus);
+}
index b9deab17ccf246130760565cc1b387fdea5997ca..f18f887f481d8ada98e62b00dd600069929db253 100644 (file)
@@ -13,7 +13,6 @@
 #include <linux/delay.h>
 #include <linux/mm.h>
 #include <linux/vmalloc.h>
-#include <linux/module.h>
 #include <linux/clk.h>
 #include <linux/of_platform.h>
 #include <linux/of_gpio.h>
@@ -234,7 +233,6 @@ static const struct of_device_id ltq_pci_match[] = {
        { .compatible = "lantiq,pci-xway" },
        {},
 };
-MODULE_DEVICE_TABLE(of, ltq_pci_match);
 
 static struct platform_driver ltq_pci_driver = {
        .probe = ltq_pci_probe,
diff --git a/arch/mips/pci/pci-legacy.c b/arch/mips/pci/pci-legacy.c
new file mode 100644 (file)
index 0000000..014649b
--- /dev/null
@@ -0,0 +1,302 @@
+/*
+ * This program is free software; you can redistribute it and/or modify it
+ * under  the terms of the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ * Copyright (C) 2003, 04, 11 Ralf Baechle (ralf@linux-mips.org)
+ * Copyright (C) 2011 Wind River Systems,
+ *   written by Ralf Baechle (ralf@linux-mips.org)
+ */
+#include <linux/bug.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/bootmem.h>
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/of_address.h>
+
+#include <asm/cpu-info.h>
+
+/*
+ * If PCI_PROBE_ONLY in pci_flags is set, we don't change any PCI resource
+ * assignments.
+ */
+
+/*
+ * The PCI controller list.
+ */
+static LIST_HEAD(controllers);
+
+static int pci_initialized;
+
+/*
+ * We need to avoid collisions with `mirrored' VGA ports
+ * and other strange ISA hardware, so we always want the
+ * addresses to be allocated in the 0x000-0x0ff region
+ * modulo 0x400.
+ *
+ * Why? Because some silly external IO cards only decode
+ * the low 10 bits of the IO address. The 0x00-0xff region
+ * is reserved for motherboard devices that decode all 16
+ * bits, so it's ok to allocate at, say, 0x2800-0x28ff,
+ * but we want to try to avoid allocating at 0x2900-0x2bff
+ * which might have be mirrored at 0x0100-0x03ff..
+ */
+resource_size_t
+pcibios_align_resource(void *data, const struct resource *res,
+                      resource_size_t size, resource_size_t align)
+{
+       struct pci_dev *dev = data;
+       struct pci_controller *hose = dev->sysdata;
+       resource_size_t start = res->start;
+
+       if (res->flags & IORESOURCE_IO) {
+               /* Make sure we start at our min on all hoses */
+               if (start < PCIBIOS_MIN_IO + hose->io_resource->start)
+                       start = PCIBIOS_MIN_IO + hose->io_resource->start;
+
+               /*
+                * Put everything into 0x00-0xff region modulo 0x400
+                */
+               if (start & 0x300)
+                       start = (start + 0x3ff) & ~0x3ff;
+       } else if (res->flags & IORESOURCE_MEM) {
+               /* Make sure we start at our min on all hoses */
+               if (start < PCIBIOS_MIN_MEM + hose->mem_resource->start)
+                       start = PCIBIOS_MIN_MEM + hose->mem_resource->start;
+       }
+
+       return start;
+}
+
+static void pcibios_scanbus(struct pci_controller *hose)
+{
+       static int next_busno;
+       static int need_domain_info;
+       LIST_HEAD(resources);
+       struct pci_bus *bus;
+
+       if (hose->get_busno && pci_has_flag(PCI_PROBE_ONLY))
+               next_busno = (*hose->get_busno)();
+
+       pci_add_resource_offset(&resources,
+                               hose->mem_resource, hose->mem_offset);
+       pci_add_resource_offset(&resources,
+                               hose->io_resource, hose->io_offset);
+       pci_add_resource_offset(&resources,
+                               hose->busn_resource, hose->busn_offset);
+       bus = pci_scan_root_bus(NULL, next_busno, hose->pci_ops, hose,
+                               &resources);
+       hose->bus = bus;
+
+       need_domain_info = need_domain_info || pci_domain_nr(bus);
+       set_pci_need_domain_info(hose, need_domain_info);
+
+       if (!bus) {
+               pci_free_resource_list(&resources);
+               return;
+       }
+
+       next_busno = bus->busn_res.end + 1;
+       /* Don't allow 8-bit bus number overflow inside the hose -
+          reserve some space for bridges. */
+       if (next_busno > 224) {
+               next_busno = 0;
+               need_domain_info = 1;
+       }
+
+       /*
+        * We insert PCI resources into the iomem_resource and
+        * ioport_resource trees in either pci_bus_claim_resources()
+        * or pci_bus_assign_resources().
+        */
+       if (pci_has_flag(PCI_PROBE_ONLY)) {
+               pci_bus_claim_resources(bus);
+       } else {
+               pci_bus_size_bridges(bus);
+               pci_bus_assign_resources(bus);
+       }
+       pci_bus_add_devices(bus);
+}
+
+#ifdef CONFIG_OF
+void pci_load_of_ranges(struct pci_controller *hose, struct device_node *node)
+{
+       struct of_pci_range range;
+       struct of_pci_range_parser parser;
+
+       pr_info("PCI host bridge %s ranges:\n", node->full_name);
+       hose->of_node = node;
+
+       if (of_pci_range_parser_init(&parser, node))
+               return;
+
+       for_each_of_pci_range(&parser, &range) {
+               struct resource *res = NULL;
+
+               switch (range.flags & IORESOURCE_TYPE_BITS) {
+               case IORESOURCE_IO:
+                       pr_info("  IO 0x%016llx..0x%016llx\n",
+                               range.cpu_addr,
+                               range.cpu_addr + range.size - 1);
+                       hose->io_map_base =
+                               (unsigned long)ioremap(range.cpu_addr,
+                                                      range.size);
+                       res = hose->io_resource;
+                       break;
+               case IORESOURCE_MEM:
+                       pr_info(" MEM 0x%016llx..0x%016llx\n",
+                               range.cpu_addr,
+                               range.cpu_addr + range.size - 1);
+                       res = hose->mem_resource;
+                       break;
+               }
+               if (res != NULL)
+                       of_pci_range_to_resource(&range, node, res);
+       }
+}
+
+struct device_node *pcibios_get_phb_of_node(struct pci_bus *bus)
+{
+       struct pci_controller *hose = bus->sysdata;
+
+       return of_node_get(hose->of_node);
+}
+#endif
+
+static DEFINE_MUTEX(pci_scan_mutex);
+
+void register_pci_controller(struct pci_controller *hose)
+{
+       struct resource *parent;
+
+       parent = hose->mem_resource->parent;
+       if (!parent)
+               parent = &iomem_resource;
+
+       if (request_resource(parent, hose->mem_resource) < 0)
+               goto out;
+
+       parent = hose->io_resource->parent;
+       if (!parent)
+               parent = &ioport_resource;
+
+       if (request_resource(parent, hose->io_resource) < 0) {
+               release_resource(hose->mem_resource);
+               goto out;
+       }
+
+       INIT_LIST_HEAD(&hose->list);
+       list_add(&hose->list, &controllers);
+
+       /*
+        * Do not panic here but later - this might happen before console init.
+        */
+       if (!hose->io_map_base) {
+               printk(KERN_WARNING
+                      "registering PCI controller with io_map_base unset\n");
+       }
+
+       /*
+        * Scan the bus if it is register after the PCI subsystem
+        * initialization.
+        */
+       if (pci_initialized) {
+               mutex_lock(&pci_scan_mutex);
+               pcibios_scanbus(hose);
+               mutex_unlock(&pci_scan_mutex);
+       }
+
+       return;
+
+out:
+       printk(KERN_WARNING
+              "Skipping PCI bus scan due to resource conflict\n");
+}
+
+static int __init pcibios_init(void)
+{
+       struct pci_controller *hose;
+
+       /* Scan all of the recorded PCI controllers.  */
+       list_for_each_entry(hose, &controllers, list)
+               pcibios_scanbus(hose);
+
+       pci_fixup_irqs(pci_common_swizzle, pcibios_map_irq);
+
+       pci_initialized = 1;
+
+       return 0;
+}
+
+subsys_initcall(pcibios_init);
+
+static int pcibios_enable_resources(struct pci_dev *dev, int mask)
+{
+       u16 cmd, old_cmd;
+       int idx;
+       struct resource *r;
+
+       pci_read_config_word(dev, PCI_COMMAND, &cmd);
+       old_cmd = cmd;
+       for (idx=0; idx < PCI_NUM_RESOURCES; idx++) {
+               /* Only set up the requested stuff */
+               if (!(mask & (1<<idx)))
+                       continue;
+
+               r = &dev->resource[idx];
+               if (!(r->flags & (IORESOURCE_IO | IORESOURCE_MEM)))
+                       continue;
+               if ((idx == PCI_ROM_RESOURCE) &&
+                               (!(r->flags & IORESOURCE_ROM_ENABLE)))
+                       continue;
+               if (!r->start && r->end) {
+                       printk(KERN_ERR "PCI: Device %s not available "
+                              "because of resource collisions\n",
+                              pci_name(dev));
+                       return -EINVAL;
+               }
+               if (r->flags & IORESOURCE_IO)
+                       cmd |= PCI_COMMAND_IO;
+               if (r->flags & IORESOURCE_MEM)
+                       cmd |= PCI_COMMAND_MEMORY;
+       }
+       if (cmd != old_cmd) {
+               printk("PCI: Enabling device %s (%04x -> %04x)\n",
+                      pci_name(dev), old_cmd, cmd);
+               pci_write_config_word(dev, PCI_COMMAND, cmd);
+       }
+       return 0;
+}
+
+int pcibios_enable_device(struct pci_dev *dev, int mask)
+{
+       int err;
+
+       if ((err = pcibios_enable_resources(dev, mask)) < 0)
+               return err;
+
+       return pcibios_plat_dev_init(dev);
+}
+
+void pcibios_fixup_bus(struct pci_bus *bus)
+{
+       struct pci_dev *dev = bus->self;
+
+       if (pci_has_flag(PCI_PROBE_ONLY) && dev &&
+           (dev->class >> 8) == PCI_CLASS_BRIDGE_PCI) {
+               pci_read_bridge_bases(bus);
+       }
+}
+
+char * (*pcibios_plat_setup)(char *str) __initdata;
+
+char *__init pcibios_setup(char *str)
+{
+       if (pcibios_plat_setup)
+               return pcibios_plat_setup(str);
+       return str;
+}
index 6ce81620169956564ab96a0cda2b22e3cf756e27..628c5132b3d8b254ad0c590ef9a606af05412972 100644 (file)
@@ -15,7 +15,6 @@
 #include <linux/init.h>
 #include <linux/delay.h>
 #include <linux/interrupt.h>
-#include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_irq.h>
 #include <linux/of_pci.h>
@@ -407,13 +406,11 @@ static const struct of_device_id mt7620_pci_ids[] = {
        { .compatible = "mediatek,mt7620-pci" },
        {},
 };
-MODULE_DEVICE_TABLE(of, mt7620_pci_ids);
 
 static struct platform_driver mt7620_pci_driver = {
        .probe = mt7620_pci_probe,
        .driver = {
                .name = "mt7620-pci",
-               .owner = THIS_MODULE,
                .of_match_table = of_match_ptr(mt7620_pci_ids),
        },
 };
index c258cd406fbbe39b2e657b60e5e43545526d9a4e..308d051fc45cd5d25e96c4157abbfeabd32cc5c0 100644 (file)
@@ -204,6 +204,8 @@ const char *octeon_get_pci_interrupts(void)
         * Interrupt Number (INTA# = 0, INTB# = 1, INTC# = 2, and
         * INTD# = 3)
         */
+       if (of_machine_is_compatible("dlink,dsr-500n"))
+               return "CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC";
        switch (octeon_bootinfo->board_type) {
        case CVMX_BOARD_TYPE_NAO38:
                /* This is really the NAC38 */
index f2a1050168d9592c0a0940b9f996f4c1141285cc..d6360fe73d058c5733274fb1b163c393a3f0e14c 100644 (file)
@@ -16,7 +16,6 @@
 #include <linux/pci.h>
 #include <linux/io.h>
 #include <linux/init.h>
-#include <linux/module.h>
 #include <linux/of_platform.h>
 #include <linux/of_irq.h>
 #include <linux/of_pci.h>
@@ -260,7 +259,6 @@ static const struct of_device_id rt288x_pci_match[] = {
        { .compatible = "ralink,rt288x-pci" },
        {},
 };
-MODULE_DEVICE_TABLE(of, rt288x_pci_match);
 
 static struct platform_driver rt288x_pci_driver = {
        .probe = rt288x_pci_probe,
index 53a42b07008b99f2043ac1da444b9732c4af8c2b..3520e9b414e7b91dfedc59abeb37d757579e475a 100644 (file)
@@ -16,7 +16,6 @@
 #include <linux/init.h>
 #include <linux/delay.h>
 #include <linux/interrupt.h>
-#include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_irq.h>
 #include <linux/of_pci.h>
@@ -580,7 +579,6 @@ static const struct of_device_id rt3883_pci_ids[] = {
        { .compatible = "ralink,rt3883-pci" },
        {},
 };
-MODULE_DEVICE_TABLE(of, rt3883_pci_ids);
 
 static struct platform_driver rt3883_pci_driver = {
        .probe = rt3883_pci_probe,
index b4c02f29663e180aeb5635aba2f4d9eabd24980c..f6325fa657fb6538dbbc512d53254cf611b60aa1 100644 (file)
 
 #include <asm/cpu-info.h>
 
-/*
- * If PCI_PROBE_ONLY in pci_flags is set, we don't change any PCI resource
- * assignments.
- */
-
-/*
- * The PCI controller list.
- */
-
-static struct pci_controller *hose_head, **hose_tail = &hose_head;
-
 unsigned long PCIBIOS_MIN_IO;
-unsigned long PCIBIOS_MIN_MEM;
-
-static int pci_initialized;
-
-/*
- * We need to avoid collisions with `mirrored' VGA ports
- * and other strange ISA hardware, so we always want the
- * addresses to be allocated in the 0x000-0x0ff region
- * modulo 0x400.
- *
- * Why? Because some silly external IO cards only decode
- * the low 10 bits of the IO address. The 0x00-0xff region
- * is reserved for motherboard devices that decode all 16
- * bits, so it's ok to allocate at, say, 0x2800-0x28ff,
- * but we want to try to avoid allocating at 0x2900-0x2bff
- * which might have be mirrored at 0x0100-0x03ff..
- */
-resource_size_t
-pcibios_align_resource(void *data, const struct resource *res,
-                      resource_size_t size, resource_size_t align)
-{
-       struct pci_dev *dev = data;
-       struct pci_controller *hose = dev->sysdata;
-       resource_size_t start = res->start;
-
-       if (res->flags & IORESOURCE_IO) {
-               /* Make sure we start at our min on all hoses */
-               if (start < PCIBIOS_MIN_IO + hose->io_resource->start)
-                       start = PCIBIOS_MIN_IO + hose->io_resource->start;
-
-               /*
-                * Put everything into 0x00-0xff region modulo 0x400
-                */
-               if (start & 0x300)
-                       start = (start + 0x3ff) & ~0x3ff;
-       } else if (res->flags & IORESOURCE_MEM) {
-               /* Make sure we start at our min on all hoses */
-               if (start < PCIBIOS_MIN_MEM + hose->mem_resource->start)
-                       start = PCIBIOS_MIN_MEM + hose->mem_resource->start;
-       }
-
-       return start;
-}
-
-static void pcibios_scanbus(struct pci_controller *hose)
-{
-       static int next_busno;
-       static int need_domain_info;
-       LIST_HEAD(resources);
-       struct pci_bus *bus;
-
-       if (hose->get_busno && pci_has_flag(PCI_PROBE_ONLY))
-               next_busno = (*hose->get_busno)();
-
-       pci_add_resource_offset(&resources,
-                               hose->mem_resource, hose->mem_offset);
-       pci_add_resource_offset(&resources,
-                               hose->io_resource, hose->io_offset);
-       pci_add_resource_offset(&resources,
-                               hose->busn_resource, hose->busn_offset);
-       bus = pci_scan_root_bus(NULL, next_busno, hose->pci_ops, hose,
-                               &resources);
-       hose->bus = bus;
-
-       need_domain_info = need_domain_info || hose->index;
-       hose->need_domain_info = need_domain_info;
-
-       if (!bus) {
-               pci_free_resource_list(&resources);
-               return;
-       }
-
-       next_busno = bus->busn_res.end + 1;
-       /* Don't allow 8-bit bus number overflow inside the hose -
-          reserve some space for bridges. */
-       if (next_busno > 224) {
-               next_busno = 0;
-               need_domain_info = 1;
-       }
-
-       /*
-        * We insert PCI resources into the iomem_resource and
-        * ioport_resource trees in either pci_bus_claim_resources()
-        * or pci_bus_assign_resources().
-        */
-       if (pci_has_flag(PCI_PROBE_ONLY)) {
-               pci_bus_claim_resources(bus);
-       } else {
-               pci_bus_size_bridges(bus);
-               pci_bus_assign_resources(bus);
-       }
-       pci_bus_add_devices(bus);
-}
-
-#ifdef CONFIG_OF
-void pci_load_of_ranges(struct pci_controller *hose, struct device_node *node)
-{
-       struct of_pci_range range;
-       struct of_pci_range_parser parser;
-
-       pr_info("PCI host bridge %s ranges:\n", node->full_name);
-       hose->of_node = node;
-
-       if (of_pci_range_parser_init(&parser, node))
-               return;
-
-       for_each_of_pci_range(&parser, &range) {
-               struct resource *res = NULL;
-
-               switch (range.flags & IORESOURCE_TYPE_BITS) {
-               case IORESOURCE_IO:
-                       pr_info("  IO 0x%016llx..0x%016llx\n",
-                               range.cpu_addr,
-                               range.cpu_addr + range.size - 1);
-                       hose->io_map_base =
-                               (unsigned long)ioremap(range.cpu_addr,
-                                                      range.size);
-                       res = hose->io_resource;
-                       break;
-               case IORESOURCE_MEM:
-                       pr_info(" MEM 0x%016llx..0x%016llx\n",
-                               range.cpu_addr,
-                               range.cpu_addr + range.size - 1);
-                       res = hose->mem_resource;
-                       break;
-               }
-               if (res != NULL)
-                       of_pci_range_to_resource(&range, node, res);
-       }
-}
-
-struct device_node *pcibios_get_phb_of_node(struct pci_bus *bus)
-{
-       struct pci_controller *hose = bus->sysdata;
-
-       return of_node_get(hose->of_node);
-}
-#endif
-
-static DEFINE_MUTEX(pci_scan_mutex);
-
-void register_pci_controller(struct pci_controller *hose)
-{
-       struct resource *parent;
-
-       parent = hose->mem_resource->parent;
-       if (!parent)
-               parent = &iomem_resource;
-
-       if (request_resource(parent, hose->mem_resource) < 0)
-               goto out;
-
-       parent = hose->io_resource->parent;
-       if (!parent)
-               parent = &ioport_resource;
-
-       if (request_resource(parent, hose->io_resource) < 0) {
-               release_resource(hose->mem_resource);
-               goto out;
-       }
-
-       *hose_tail = hose;
-       hose_tail = &hose->next;
-
-       /*
-        * Do not panic here but later - this might happen before console init.
-        */
-       if (!hose->io_map_base) {
-               printk(KERN_WARNING
-                      "registering PCI controller with io_map_base unset\n");
-       }
-
-       /*
-        * Scan the bus if it is register after the PCI subsystem
-        * initialization.
-        */
-       if (pci_initialized) {
-               mutex_lock(&pci_scan_mutex);
-               pcibios_scanbus(hose);
-               mutex_unlock(&pci_scan_mutex);
-       }
-
-       return;
+EXPORT_SYMBOL(PCIBIOS_MIN_IO);
 
-out:
-       printk(KERN_WARNING
-              "Skipping PCI bus scan due to resource conflict\n");
-}
+unsigned long PCIBIOS_MIN_MEM;
+EXPORT_SYMBOL(PCIBIOS_MIN_MEM);
 
-static void __init pcibios_set_cache_line_size(void)
+static int __init pcibios_set_cache_line_size(void)
 {
        struct cpuinfo_mips *c = &current_cpu_data;
        unsigned int lsize;
@@ -239,92 +44,9 @@ static void __init pcibios_set_cache_line_size(void)
        pci_dfl_cache_line_size = lsize >> 2;
 
        pr_debug("PCI: pci_cache_line_size set to %d bytes\n", lsize);
-}
-
-static int __init pcibios_init(void)
-{
-       struct pci_controller *hose;
-
-       pcibios_set_cache_line_size();
-
-       /* Scan all of the recorded PCI controllers.  */
-       for (hose = hose_head; hose; hose = hose->next)
-               pcibios_scanbus(hose);
-
-       pci_fixup_irqs(pci_common_swizzle, pcibios_map_irq);
-
-       pci_initialized = 1;
-
-       return 0;
-}
-
-subsys_initcall(pcibios_init);
-
-static int pcibios_enable_resources(struct pci_dev *dev, int mask)
-{
-       u16 cmd, old_cmd;
-       int idx;
-       struct resource *r;
-
-       pci_read_config_word(dev, PCI_COMMAND, &cmd);
-       old_cmd = cmd;
-       for (idx=0; idx < PCI_NUM_RESOURCES; idx++) {
-               /* Only set up the requested stuff */
-               if (!(mask & (1<<idx)))
-                       continue;
-
-               r = &dev->resource[idx];
-               if (!(r->flags & (IORESOURCE_IO | IORESOURCE_MEM)))
-                       continue;
-               if ((idx == PCI_ROM_RESOURCE) &&
-                               (!(r->flags & IORESOURCE_ROM_ENABLE)))
-                       continue;
-               if (!r->start && r->end) {
-                       printk(KERN_ERR "PCI: Device %s not available "
-                              "because of resource collisions\n",
-                              pci_name(dev));
-                       return -EINVAL;
-               }
-               if (r->flags & IORESOURCE_IO)
-                       cmd |= PCI_COMMAND_IO;
-               if (r->flags & IORESOURCE_MEM)
-                       cmd |= PCI_COMMAND_MEMORY;
-       }
-       if (cmd != old_cmd) {
-               printk("PCI: Enabling device %s (%04x -> %04x)\n",
-                      pci_name(dev), old_cmd, cmd);
-               pci_write_config_word(dev, PCI_COMMAND, cmd);
-       }
        return 0;
 }
-
-unsigned int pcibios_assign_all_busses(void)
-{
-       return 1;
-}
-
-int pcibios_enable_device(struct pci_dev *dev, int mask)
-{
-       int err;
-
-       if ((err = pcibios_enable_resources(dev, mask)) < 0)
-               return err;
-
-       return pcibios_plat_dev_init(dev);
-}
-
-void pcibios_fixup_bus(struct pci_bus *bus)
-{
-       struct pci_dev *dev = bus->self;
-
-       if (pci_has_flag(PCI_PROBE_ONLY) && dev &&
-           (dev->class >> 8) == PCI_CLASS_BRIDGE_PCI) {
-               pci_read_bridge_bases(bus);
-       }
-}
-
-EXPORT_SYMBOL(PCIBIOS_MIN_IO);
-EXPORT_SYMBOL(PCIBIOS_MIN_MEM);
+arch_initcall(pcibios_set_cache_line_size);
 
 void pci_resource_to_user(const struct pci_dev *dev, int bar,
                          const struct resource *rsrc, resource_size_t *start,
@@ -359,12 +81,3 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
        return remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
                vma->vm_end - vma->vm_start, vma->vm_page_prot);
 }
-
-char * (*pcibios_plat_setup)(char *str) __initdata;
-
-char *__init pcibios_setup(char *str)
-{
-       if (pcibios_plat_setup)
-               return pcibios_plat_setup(str);
-       return str;
-}
index 99f3db4f0a9b1b060476e6d5873fd45dd15a649b..9f672ceb089b9086b1fadcd0a3a487d2307beb69 100644 (file)
@@ -11,7 +11,7 @@
 #include <linux/interrupt.h>
 #include <linux/time.h>
 #include <linux/delay.h>
-#include <linux/module.h>
+#include <linux/moduleparam.h>
 
 #include <asm/octeon/octeon.h>
 #include <asm/octeon/cvmx-npei-defs.h>
index 3cd357737a267c38dc20b85724b723f37f26bd01..7cf4eb50fc7211f93219dbe23cb47581cac32f3a 100644 (file)
@@ -232,12 +232,8 @@ static struct platform_device *pnx833x_platform_devices[] __initdata = {
 
 static int __init pnx833x_platform_init(void)
 {
-       int res;
-
-       res = platform_add_devices(pnx833x_platform_devices,
-                                  ARRAY_SIZE(pnx833x_platform_devices));
-
-       return res;
+       return platform_add_devices(pnx833x_platform_devices,
+                                   ARRAY_SIZE(pnx833x_platform_devices));
 }
 
 arch_initcall(pnx833x_platform_init);
index b0343ff336c5fe6dd173af86cfb341129144a725..8077ff39bdeabb602680342f0d8e436b5f9bf4cc 100644 (file)
@@ -1,4 +1,7 @@
 /*
+ * Ralink RT2880 timer
+ * Author: John Crispin
+ *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License version 2 as published
  * by the Free Software Foundation.
@@ -6,7 +9,6 @@
  * Copyright (C) 2013 John Crispin <john@phrozen.org>
 */
 
-#include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/interrupt.h>
 #include <linux/timer.h>
@@ -152,33 +154,17 @@ static int rt_timer_probe(struct platform_device *pdev)
        return 0;
 }
 
-static int rt_timer_remove(struct platform_device *pdev)
-{
-       struct rt_timer *rt = platform_get_drvdata(pdev);
-
-       rt_timer_disable(rt);
-       rt_timer_free(rt);
-
-       return 0;
-}
-
 static const struct of_device_id rt_timer_match[] = {
        { .compatible = "ralink,rt2880-timer" },
        {},
 };
-MODULE_DEVICE_TABLE(of, rt_timer_match);
 
 static struct platform_driver rt_timer_driver = {
        .probe = rt_timer_probe,
-       .remove = rt_timer_remove,
        .driver = {
-               .name           = "rt-timer",
-               .of_match_table = rt_timer_match
+               .name                   = "rt-timer",
+               .of_match_table         = rt_timer_match,
+               .suppress_bind_attrs    = true,
        },
 };
-
-module_platform_driver(rt_timer_driver);
-
-MODULE_DESCRIPTION("Ralink RT2880 timer");
-MODULE_AUTHOR("John Crispin <john@phrozen.org");
-MODULE_LICENSE("GPL");
+builtin_platform_driver(rt_timer_driver);
index 8c337d60f790db9f0ae31c8a64872acfe734cb9b..42923478d45ca363b2c8eca403138dfa4aedcc61 100644 (file)
@@ -20,7 +20,7 @@ config MACH_TXX9
        select SYS_SUPPORTS_32BIT_KERNEL
        select SYS_SUPPORTS_LITTLE_ENDIAN
        select SYS_SUPPORTS_BIG_ENDIAN
-       select HAVE_CLK
+       select COMMON_CLK
 
 config TOSHIBA_JMR3927
        bool "Toshiba JMR-TX3927 board"
index 1f6bc9a3036c0976b54c4b1f85abd27bd7d6b598..285d84e5c7b92cf52785a6aa21393c67a6301c68 100644 (file)
@@ -29,12 +29,8 @@ static int __init
 early_read_config_word(struct pci_controller *hose,
                       int top_bus, int bus, int devfn, int offset, u16 *value)
 {
-       struct pci_dev fake_dev;
        struct pci_bus fake_bus;
 
-       fake_dev.bus = &fake_bus;
-       fake_dev.sysdata = hose;
-       fake_dev.devfn = devfn;
        fake_bus.number = bus;
        fake_bus.sysdata = hose;
        fake_bus.ops = hose->pci_ops;
@@ -45,7 +41,7 @@ early_read_config_word(struct pci_controller *hose,
        else
                fake_bus.parent = NULL;
 
-       return pci_read_config_word(&fake_dev, offset, value);
+       return pci_bus_read_config_word(&fake_bus, devfn, offset, value);
 }
 
 int __init txx9_pci66_check(struct pci_controller *hose, int top_bus,
index ada92db92f87d91a68a32d8747313eaf20bed021..a1d98b5c8fd6757683d1e9c23196660676006a54 100644 (file)
@@ -15,7 +15,8 @@
 #include <linux/interrupt.h>
 #include <linux/string.h>
 #include <linux/module.h>
-#include <linux/clk.h>
+#include <linux/clk-provider.h>
+#include <linux/clkdev.h>
 #include <linux/err.h>
 #include <linux/gpio/driver.h>
 #include <linux/platform_device.h>
@@ -83,40 +84,6 @@ int txx9_ccfg_toeon __initdata;
 int txx9_ccfg_toeon __initdata = 1;
 #endif
 
-/* Minimum CLK support */
-
-struct clk *clk_get(struct device *dev, const char *id)
-{
-       if (!strcmp(id, "spi-baseclk"))
-               return (struct clk *)((unsigned long)txx9_gbus_clock / 2 / 2);
-       if (!strcmp(id, "imbus_clk"))
-               return (struct clk *)((unsigned long)txx9_gbus_clock / 2);
-       return ERR_PTR(-ENOENT);
-}
-EXPORT_SYMBOL(clk_get);
-
-int clk_enable(struct clk *clk)
-{
-       return 0;
-}
-EXPORT_SYMBOL(clk_enable);
-
-void clk_disable(struct clk *clk)
-{
-}
-EXPORT_SYMBOL(clk_disable);
-
-unsigned long clk_get_rate(struct clk *clk)
-{
-       return (unsigned long)clk;
-}
-EXPORT_SYMBOL(clk_get_rate);
-
-void clk_put(struct clk *clk)
-{
-}
-EXPORT_SYMBOL(clk_put);
-
 #define BOARD_VEC(board)       extern struct txx9_board_vec board;
 #include <asm/txx9/boards.h>
 #undef BOARD_VEC
@@ -560,8 +527,41 @@ void __init plat_time_init(void)
        txx9_board_vec->time_init();
 }
 
+static void txx9_clk_init(void)
+{
+       struct clk_hw *hw;
+       int error;
+
+       hw = clk_hw_register_fixed_rate(NULL, "gbus", NULL, 0, txx9_gbus_clock);
+       if (IS_ERR(hw)) {
+               error = PTR_ERR(hw);
+               goto fail;
+       }
+
+       hw = clk_hw_register_fixed_factor(NULL, "imbus", "gbus", 0, 1, 2);
+       error = clk_hw_register_clkdev(hw, "imbus_clk", NULL);
+       if (error)
+               goto fail;
+
+#ifdef CONFIG_CPU_TX49XX
+       if (TX4938_REV_PCODE() == 0x4938) {
+               hw = clk_hw_register_fixed_factor(NULL, "spi", "gbus", 0, 1, 4);
+               error = clk_hw_register_clkdev(hw, "spi-baseclk", NULL);
+               if (error)
+                       goto fail;
+       }
+#endif
+
+       return;
+
+fail:
+       pr_err("Failed to register clocks: %d\n", error);
+}
+
 static int __init _txx9_arch_init(void)
 {
+       txx9_clk_init();
+
        if (txx9_board_vec->arch_init)
                txx9_board_vec->arch_init();
        return 0;
index 110e05c3eb8fb638c1b75dbeb68db81d96a26e50..d3b83a92cf26c7075939bd23ae0aeb2e083921d2 100644 (file)
@@ -92,7 +92,6 @@ void __init tx3927_setup(void)
        /* PIO */
        __raw_writel(0, &tx3927_pioptr->maskcpu);
        __raw_writel(0, &tx3927_pioptr->maskext);
-       txx9_gpio_init(TX3927_PIO_REG, 0, 16);
 
        conf = read_c0_conf();
        if (conf & TX39_CONF_DCE) {
index a4664cb6c1e183712c2f60a430e082f231b27ddc..8d8011570b1dbe990567a848d52bf513729e18c6 100644 (file)
@@ -215,7 +215,6 @@ void __init tx4927_setup(void)
                txx9_tmr_init(TX4927_TMR_REG(i) & 0xfffffffffULL);
 
        /* PIO */
-       txx9_gpio_init(TX4927_PIO_REG & 0xfffffffffULL, 0, TX4927_NUM_PIO);
        __raw_writel(0, &tx4927_pioptr->maskcpu);
        __raw_writel(0, &tx4927_pioptr->maskext);
 
index 58cdb2aba5e1ba72cac3235974919eb26ffad254..ba265bf1fd06703645cb4a353c3100013741a324 100644 (file)
@@ -241,7 +241,6 @@ void __init tx4938_setup(void)
                txx9_tmr_init(TX4938_TMR_REG(i) & 0xfffffffffULL);
 
        /* PIO */
-       txx9_gpio_init(TX4938_PIO_REG & 0xfffffffffULL, 0, TX4938_NUM_PIO);
        __raw_writel(0, &tx4938_pioptr->maskcpu);
        __raw_writel(0, &tx4938_pioptr->maskext);
 
index 3206f76f300b727a91d5566c92bbc64cddd82be0..a455166dc6d44fe7418f0b66823e49ce188e690b 100644 (file)
@@ -142,8 +142,6 @@ static void __init jmr3927_board_init(void)
 
        /* PIO[15:12] connected to LEDs */
        __raw_writel(0x0000f000, &tx3927_pioptr->dir);
-       gpio_request(11, "dipsw1");
-       gpio_request(10, "dipsw2");
 
        jmr3927_pci_setup();
 
@@ -204,6 +202,14 @@ static void __init jmr3927_device_init(void)
        txx9_iocled_init(iocled_base, -1, 8, 1, "green", NULL);
 }
 
+static void __init jmr3927_arch_init(void)
+{
+       txx9_gpio_init(TX3927_PIO_REG, 0, 16);
+
+       gpio_request(11, "dipsw1");
+       gpio_request(10, "dipsw2");
+}
+
 struct txx9_board_vec jmr3927_vec __initdata = {
        .system = "Toshiba JMR_TX3927",
        .prom_init = jmr3927_prom_init,
@@ -211,6 +217,7 @@ struct txx9_board_vec jmr3927_vec __initdata = {
        .irq_setup = jmr3927_irq_setup,
        .time_init = jmr3927_time_init,
        .device_init = jmr3927_device_init,
+       .arch_init = jmr3927_arch_init,
 #ifdef CONFIG_PCI
        .pci_map_irq = jmr3927_pci_map_irq,
 #endif
index 3c516ef625e57d2da5aee577917dd1e5d1ee318b..f5b367e20dff1cfa1f0a1b3f3a1e86f04f12dc6e 100644 (file)
@@ -52,6 +52,7 @@
 #include <linux/leds.h>
 #include <asm/io.h>
 #include <asm/reboot.h>
+#include <asm/txx9pio.h>
 #include <asm/txx9/generic.h>
 #include <asm/txx9/pci.h>
 #include <asm/txx9/rbtx4927.h>
@@ -151,20 +152,37 @@ static void __init tx4937_pci_setup(void)
        }
        tx4938_setup_pcierr_irq();
 }
+#else
+static inline void tx4927_pci_setup(void) {}
+static inline void tx4937_pci_setup(void) {}
+#endif /* CONFIG_PCI */
+
+static void __init rbtx4927_gpio_init(void)
+{
+       /* TX4927-SIO DTR on (PIO[15]) */
+       gpio_request(15, "sio-dtr");
+       gpio_direction_output(15, 1);
+
+       tx4927_sio_init(0, 0);
+}
 
 static void __init rbtx4927_arch_init(void)
 {
+       txx9_gpio_init(TX4927_PIO_REG & 0xfffffffffULL, 0, TX4927_NUM_PIO);
+
+       rbtx4927_gpio_init();
+
        tx4927_pci_setup();
 }
 
 static void __init rbtx4937_arch_init(void)
 {
+       txx9_gpio_init(TX4938_PIO_REG & 0xfffffffffULL, 0, TX4938_NUM_PIO);
+
+       rbtx4927_gpio_init();
+
        tx4937_pci_setup();
 }
-#else
-#define rbtx4927_arch_init NULL
-#define rbtx4937_arch_init NULL
-#endif /* CONFIG_PCI */
 
 static void toshiba_rbtx4927_restart(char *command)
 {
@@ -205,12 +223,6 @@ static void __init rbtx4927_mem_setup(void)
 #else
        set_io_port_base(KSEG1 + RBTX4927_ISA_IO_OFFSET);
 #endif
-
-       /* TX4927-SIO DTR on (PIO[15]) */
-       gpio_request(15, "sio-dtr");
-       gpio_direction_output(15, 1);
-
-       tx4927_sio_init(0, 0);
 }
 
 static void __init rbtx4927_clock_init(void)
index 54de66837103c702e341951b4aeb346e8bffc0f0..07939ed6b22fdac50316fbbfe702b8909effc6a0 100644 (file)
@@ -336,6 +336,7 @@ static void __init rbtx4938_mtd_init(void)
 
 static void __init rbtx4938_arch_init(void)
 {
+       txx9_gpio_init(TX4938_PIO_REG & 0xfffffffffULL, 0, TX4938_NUM_PIO);
        gpiochip_add_data(&rbtx4938_spi_gpio_chip, NULL);
        rbtx4938_pci_setup();
        rbtx4938_spi_init();
index 3b4538ec0102d5d28b15bc6421db11663fb377ff..c3dc12a8b7d9ddc79f8acf126cdd83bbb91dfee2 100644 (file)
@@ -13,8 +13,6 @@ cflags-vdso := $(ccflags-vdso) \
        -DDISABLE_BRANCH_PROFILING \
        $(call cc-option, -fno-stack-protector)
 aflags-vdso := $(ccflags-vdso) \
-       $(filter -I%,$(KBUILD_CFLAGS)) \
-       $(filter -E%,$(KBUILD_CFLAGS)) \
        -D__ASSEMBLY__ -Wa,-gdwarf-2
 
 #
@@ -82,7 +80,7 @@ obj-vdso := $(obj-vdso-y:%.o=$(obj)/%.o)
 $(obj-vdso): KBUILD_CFLAGS := $(cflags-vdso) $(native-abi)
 $(obj-vdso): KBUILD_AFLAGS := $(aflags-vdso) $(native-abi)
 
-$(obj)/vdso.lds: KBUILD_CPPFLAGS := $(native-abi)
+$(obj)/vdso.lds: KBUILD_CPPFLAGS := $(ccflags-vdso) $(native-abi)
 
 $(obj)/vdso.so.dbg.raw: $(obj)/vdso.lds $(obj-vdso) FORCE
        $(call if_changed,vdsold)
index 5129f23a9ee1008fc4b7203d2af689b0bc915a46..0e12527c4b0e6de154efaa91fe197eb995a1535c 100644 (file)
@@ -90,4 +90,6 @@
 
 #define SO_CNX_ADVICE          53
 
+#define SCM_TIMESTAMPING_OPT_STATS     54
+
 #endif /* _ASM_SOCKET_H */
index d9563ddb337eab4e44d052ebd285206b09788f13..746bf5caaffc7989884813b16bc3fd3fce85d0ef 100644 (file)
@@ -324,6 +324,7 @@ static int __init nios2_time_init(struct device_node *timer)
                ret = nios2_clocksource_init(timer);
                break;
        default:
+               ret = 0;
                break;
        }
 
index 4ce7a01a252dc6a2c191917e2516a5ffe27aa1d1..5f55da9cbfd5ce8ff72d9baea51671bcedd44732 100644 (file)
@@ -23,6 +23,8 @@
  * they shouldn't be hard-coded!
  */
 
+#define __ro_after_init __read_mostly
+
 #define L1_CACHE_BYTES 16
 #define L1_CACHE_SHIFT 4
 
index 71c4a3aa3752cf485813b011cc4489ad14b0b1b5..a14b865870131a052c995fed98cc9e3ed7a3cd2d 100644 (file)
@@ -34,7 +34,9 @@ config PARISC
        select HAVE_ARCH_HASH
        select HAVE_ARCH_SECCOMP_FILTER
        select HAVE_ARCH_TRACEHOOK
-       select HAVE_UNSTABLE_SCHED_CLOCK if (SMP || !64BIT)
+       select GENERIC_SCHED_CLOCK
+       select HAVE_UNSTABLE_SCHED_CLOCK if SMP
+       select GENERIC_CLOCKEVENTS
        select ARCH_NO_COHERENT_DMA_MMAP
        select CPU_NO_EFFICIENT_FFS
 
index 9c935d717df94c998dce3dc66ad8eefd1b71d066..7a109b73ddf7e814f9c29ffa1ec9b99977ead5d1 100644 (file)
@@ -89,4 +89,6 @@
 
 #define SO_CNX_ADVICE          0x402E
 
+#define SCM_TIMESTAMPING_OPT_STATS     0x402F
+
 #endif /* _UAPI_ASM_SOCKET_H */
index a9b9407f38f7c63a3ad9f42b0dcd7d11d1fb8d92..6b0741e7a7ed3ee4060d619a8999b50dab12dac3 100644 (file)
 
 #define __IGNORE_select                /* newselect */
 #define __IGNORE_fadvise64     /* fadvise64_64 */
-
+#define __IGNORE_pkey_mprotect
+#define __IGNORE_pkey_alloc
+#define __IGNORE_pkey_free
 
 #define LINUX_GATEWAY_ADDR      0x100
 
index 629eb464d5bacd0d299d9eaad26ef871aed2f573..c263301648f34a049abea878974324022b70b086 100644 (file)
@@ -369,6 +369,7 @@ void __init parisc_setup_cache_timing(void)
 {
        unsigned long rangetime, alltime;
        unsigned long size, start;
+       unsigned long threshold;
 
        alltime = mfctl(16);
        flush_data_cache();
@@ -382,17 +383,12 @@ void __init parisc_setup_cache_timing(void)
        printk(KERN_DEBUG "Whole cache flush %lu cycles, flushing %lu bytes %lu cycles\n",
                alltime, size, rangetime);
 
-       /* Racy, but if we see an intermediate value, it's ok too... */
-       parisc_cache_flush_threshold = size * alltime / rangetime;
-
-       parisc_cache_flush_threshold = L1_CACHE_ALIGN(parisc_cache_flush_threshold);
-       if (!parisc_cache_flush_threshold)
-               parisc_cache_flush_threshold = FLUSH_THRESHOLD;
-
-       if (parisc_cache_flush_threshold > cache_info.dc_size)
-               parisc_cache_flush_threshold = cache_info.dc_size;
-
-       printk(KERN_INFO "Setting cache flush threshold to %lu kB\n",
+       threshold = L1_CACHE_ALIGN(size * alltime / rangetime);
+       if (threshold > cache_info.dc_size)
+               threshold = cache_info.dc_size;
+       if (threshold)
+               parisc_cache_flush_threshold = threshold;
+       printk(KERN_INFO "Cache flush threshold set to %lu KiB\n",
                parisc_cache_flush_threshold/1024);
 
        /* calculate TLB flush threshold */
@@ -401,7 +397,7 @@ void __init parisc_setup_cache_timing(void)
        flush_tlb_all();
        alltime = mfctl(16) - alltime;
 
-       size = PAGE_SIZE;
+       size = 0;
        start = (unsigned long) _text;
        rangetime = mfctl(16);
        while (start < (unsigned long) _end) {
@@ -414,13 +410,10 @@ void __init parisc_setup_cache_timing(void)
        printk(KERN_DEBUG "Whole TLB flush %lu cycles, flushing %lu bytes %lu cycles\n",
                alltime, size, rangetime);
 
-       parisc_tlb_flush_threshold = size * alltime / rangetime;
-       parisc_tlb_flush_threshold *= num_online_cpus();
-       parisc_tlb_flush_threshold = PAGE_ALIGN(parisc_tlb_flush_threshold);
-       if (!parisc_tlb_flush_threshold)
-               parisc_tlb_flush_threshold = FLUSH_TLB_THRESHOLD;
-
-       printk(KERN_INFO "Setting TLB flush threshold to %lu kB\n",
+       threshold = PAGE_ALIGN(num_online_cpus() * size * alltime / rangetime);
+       if (threshold)
+               parisc_tlb_flush_threshold = threshold;
+       printk(KERN_INFO "TLB flush threshold set to %lu KiB\n",
                parisc_tlb_flush_threshold/1024);
 }
 
index f8150669b8c6f4e4de827b6b3d2f8dbd586c6593..700e2d2da0969cdfeb872071fe16b5f9c32e82cf 100644 (file)
@@ -873,11 +873,11 @@ static void print_parisc_device(struct parisc_device *dev)
 
        if (dev->num_addrs) {
                int k;
-               printk(", additional addresses: ");
+               pr_cont(", additional addresses: ");
                for (k = 0; k < dev->num_addrs; k++)
-                       printk("0x%lx ", dev->addr[k]);
+                       pr_cont("0x%lx ", dev->addr[k]);
        }
-       printk("\n");
+       pr_cont("\n");
 }
 
 /**
index 545f9d2fe71107edc181170dcac473d9dcec0388..c05d1876d27c4975453194686976c6cb0147531d 100644 (file)
@@ -58,7 +58,7 @@ void __init setup_pdc(void)
        status = pdc_system_map_find_mods(&module_result, &module_path, 0);
        if (status == PDC_OK) {
                pdc_type = PDC_TYPE_SYSTEM_MAP;
-               printk("System Map.\n");
+               pr_cont("System Map.\n");
                return;
        }
 
@@ -77,7 +77,7 @@ void __init setup_pdc(void)
        status = pdc_pat_cell_get_number(&cell_info);
        if (status == PDC_OK) {
                pdc_type = PDC_TYPE_PAT;
-               printk("64 bit PAT.\n");
+               pr_cont("64 bit PAT.\n");
                return;
        }
 #endif
@@ -97,12 +97,12 @@ void __init setup_pdc(void)
        case 0xC:               /* 715/64, at least */
 
                pdc_type = PDC_TYPE_SNAKE;
-               printk("Snake.\n");
+               pr_cont("Snake.\n");
                return;
 
        default:                /* Everything else */
 
-               printk("Unsupported.\n");
+               pr_cont("Unsupported.\n");
                panic("If this is a 64-bit machine, please try a 64-bit kernel.\n");
        }
 }
index 985e06da37f5163fd6ea566024c9f6e82d9db607..1b39a2acaadf43f16df3a54f3a797b029a3e06ff 100644 (file)
@@ -96,7 +96,7 @@ fitmanyloop:                                  /* Loop if LOOP >= 2 */
 
 fitmanymiddle:                                 /* Loop if LOOP >= 2 */
        addib,COND(>)           -1, %r31, fitmanymiddle /* Adjusted inner loop decr */
-       pitlbe          0(%sr1, %r28)
+       pitlbe          %r0(%sr1, %r28)
        pitlbe,m        %arg1(%sr1, %r28)       /* Last pitlbe and addr adjust */
        addib,COND(>)           -1, %r29, fitmanymiddle /* Middle loop decr */
        copy            %arg3, %r31             /* Re-init inner loop count */
@@ -139,7 +139,7 @@ fdtmanyloop:                                        /* Loop if LOOP >= 2 */
 
 fdtmanymiddle:                                 /* Loop if LOOP >= 2 */
        addib,COND(>)           -1, %r31, fdtmanymiddle /* Adjusted inner loop decr */
-       pdtlbe          0(%sr1, %r28)
+       pdtlbe          %r0(%sr1, %r28)
        pdtlbe,m        %arg1(%sr1, %r28)       /* Last pdtlbe and addr adjust */
        addib,COND(>)           -1, %r29, fdtmanymiddle /* Middle loop decr */
        copy            %arg3, %r31             /* Re-init inner loop count */
@@ -626,12 +626,12 @@ ENTRY_CFI(copy_user_page_asm)
        /* Purge any old translations */
 
 #ifdef CONFIG_PA20
-       pdtlb,l         0(%r28)
-       pdtlb,l         0(%r29)
+       pdtlb,l         %r0(%r28)
+       pdtlb,l         %r0(%r29)
 #else
        tlb_lock        %r20,%r21,%r22
-       pdtlb           0(%r28)
-       pdtlb           0(%r29)
+       pdtlb           %r0(%r28)
+       pdtlb           %r0(%r29)
        tlb_unlock      %r20,%r21,%r22
 #endif
 
@@ -774,10 +774,10 @@ ENTRY_CFI(clear_user_page_asm)
        /* Purge any old translation */
 
 #ifdef CONFIG_PA20
-       pdtlb,l         0(%r28)
+       pdtlb,l         %r0(%r28)
 #else
        tlb_lock        %r20,%r21,%r22
-       pdtlb           0(%r28)
+       pdtlb           %r0(%r28)
        tlb_unlock      %r20,%r21,%r22
 #endif
 
@@ -858,10 +858,10 @@ ENTRY_CFI(flush_dcache_page_asm)
        /* Purge any old translation */
 
 #ifdef CONFIG_PA20
-       pdtlb,l         0(%r28)
+       pdtlb,l         %r0(%r28)
 #else
        tlb_lock        %r20,%r21,%r22
-       pdtlb           0(%r28)
+       pdtlb           %r0(%r28)
        tlb_unlock      %r20,%r21,%r22
 #endif
 
@@ -898,10 +898,10 @@ ENTRY_CFI(flush_dcache_page_asm)
        sync
 
 #ifdef CONFIG_PA20
-       pdtlb,l         0(%r25)
+       pdtlb,l         %r0(%r25)
 #else
        tlb_lock        %r20,%r21,%r22
-       pdtlb           0(%r25)
+       pdtlb           %r0(%r25)
        tlb_unlock      %r20,%r21,%r22
 #endif
 
@@ -931,13 +931,18 @@ ENTRY_CFI(flush_icache_page_asm)
        depwi           0, 31,PAGE_SHIFT, %r28  /* Clear any offset bits */
 #endif
 
-       /* Purge any old translation */
+       /* Purge any old translation.  Note that the FIC instruction
+        * may use either the instruction or data TLB.  Given that we
+        * have a flat address space, it's not clear which TLB will be
+        * used.  So, we purge both entries.  */
 
 #ifdef CONFIG_PA20
+       pdtlb,l         %r0(%r28)
        pitlb,l         %r0(%sr4,%r28)
 #else
        tlb_lock        %r20,%r21,%r22
-       pitlb           (%sr4,%r28)
+       pdtlb           %r0(%r28)
+       pitlb           %r0(%sr4,%r28)
        tlb_unlock      %r20,%r21,%r22
 #endif
 
@@ -976,10 +981,12 @@ ENTRY_CFI(flush_icache_page_asm)
        sync
 
 #ifdef CONFIG_PA20
+       pdtlb,l         %r0(%r28)
        pitlb,l         %r0(%sr4,%r25)
 #else
        tlb_lock        %r20,%r21,%r22
-       pitlb           (%sr4,%r25)
+       pdtlb           %r0(%r28)
+       pitlb           %r0(%sr4,%r25)
        tlb_unlock      %r20,%r21,%r22
 #endif
 
index 02d9ed0f3949f29b0b8db1761e6fb10a43c49414..494ff6e8c88a471288b89aa15737ebae56a6f018 100644 (file)
@@ -95,8 +95,8 @@ static inline int map_pte_uncached(pte_t * pte,
 
                if (!pte_none(*pte))
                        printk(KERN_ERR "map_pte_uncached: page already exists\n");
-               set_pte(pte, __mk_pte(*paddr_ptr, PAGE_KERNEL_UNC));
                purge_tlb_start(flags);
+               set_pte(pte, __mk_pte(*paddr_ptr, PAGE_KERNEL_UNC));
                pdtlb_kernel(orig_vaddr);
                purge_tlb_end(flags);
                vaddr += PAGE_SIZE;
index 81d6f639194478fa96b33a7f30ba4d612c9cfa8f..2e66a887788e8781bf76b13cbd3fed02e6979259 100644 (file)
@@ -334,6 +334,10 @@ static int __init parisc_init(void)
        /* tell PDC we're Linux. Nevermind failure. */
        pdc_stable_write(0x40, &osid, sizeof(osid));
        
+       /* start with known state */
+       flush_cache_all_local();
+       flush_tlb_all_local(NULL);
+
        processor_init();
 #ifdef CONFIG_SMP
        pr_info("CPU(s): %d out of %d %s at %d.%06d MHz online\n",
index d03422e5f188368f8df5283cedfd4e32845e64df..23de307c3052aa9ecac21fd6c294657fb53de447 100644 (file)
@@ -100,14 +100,12 @@ set_thread_pointer:
        .endr
 
 /* This address must remain fixed at 0x100 for glibc's syscalls to work */
-       .align 256
+       .align LINUX_GATEWAY_ADDR
 linux_gateway_entry:
        gate    .+8, %r0                        /* become privileged */
        mtsp    %r0,%sr4                        /* get kernel space into sr4 */
        mtsp    %r0,%sr5                        /* get kernel space into sr5 */
        mtsp    %r0,%sr6                        /* get kernel space into sr6 */
-       mfsp    %sr7,%r1                        /* save user sr7 */
-       mtsp    %r1,%sr3                        /* and store it in sr3 */
 
 #ifdef CONFIG_64BIT
        /* for now we can *always* set the W bit on entry to the syscall
@@ -133,6 +131,14 @@ linux_gateway_entry:
        depdi   0, 31, 32, %r21
 1:     
 #endif
+
+       /* We use a rsm/ssm pair to prevent sr3 from being clobbered
+        * by external interrupts.
+        */
+       mfsp    %sr7,%r1                        /* save user sr7 */
+       rsm     PSW_SM_I, %r0                   /* disable interrupts */
+       mtsp    %r1,%sr3                        /* and store it in sr3 */
+
        mfctl   %cr30,%r1
        xor     %r1,%r30,%r30                   /* ye olde xor trick */
        xor     %r1,%r30,%r1
@@ -147,6 +153,7 @@ linux_gateway_entry:
         */
 
        mtsp    %r0,%sr7                        /* get kernel space into sr7 */
+       ssm     PSW_SM_I, %r0                   /* enable interrupts */
        STREGM  %r1,FRAME_SIZE(%r30)            /* save r1 (usp) here for now */
        mfctl   %cr30,%r1                       /* get task ptr in %r1 */
        LDREG   TI_TASK(%r1),%r1
@@ -474,11 +481,6 @@ lws_start:
        comiclr,>>      __NR_lws_entries, %r20, %r0
        b,n     lws_exit_nosys
 
-       /* WARNING: Trashing sr2 and sr3 */
-       mfsp    %sr7,%r1                        /* get userspace into sr3 */
-       mtsp    %r1,%sr3
-       mtsp    %r0,%sr2                        /* get kernel space into sr2 */
-
        /* Load table start */
        ldil    L%lws_table, %r1
        ldo     R%lws_table(%r1), %r28  /* Scratch use of r28 */
@@ -627,9 +629,9 @@ cas_action:
        stw     %r1, 4(%sr2,%r20)
 #endif
        /* The load and store could fail */
-1:     ldw,ma  0(%sr3,%r26), %r28
+1:     ldw,ma  0(%r26), %r28
        sub,<>  %r28, %r25, %r0
-2:     stw,ma  %r24, 0(%sr3,%r26)
+2:     stw,ma  %r24, 0(%r26)
        /* Free lock */
        stw,ma  %r20, 0(%sr2,%r20)
 #if ENABLE_LWS_DEBUG
@@ -706,9 +708,9 @@ lws_compare_and_swap_2:
        nop
 
        /* 8bit load */
-4:     ldb     0(%sr3,%r25), %r25
+4:     ldb     0(%r25), %r25
        b       cas2_lock_start
-5:     ldb     0(%sr3,%r24), %r24
+5:     ldb     0(%r24), %r24
        nop
        nop
        nop
@@ -716,9 +718,9 @@ lws_compare_and_swap_2:
        nop
 
        /* 16bit load */
-6:     ldh     0(%sr3,%r25), %r25
+6:     ldh     0(%r25), %r25
        b       cas2_lock_start
-7:     ldh     0(%sr3,%r24), %r24
+7:     ldh     0(%r24), %r24
        nop
        nop
        nop
@@ -726,9 +728,9 @@ lws_compare_and_swap_2:
        nop
 
        /* 32bit load */
-8:     ldw     0(%sr3,%r25), %r25
+8:     ldw     0(%r25), %r25
        b       cas2_lock_start
-9:     ldw     0(%sr3,%r24), %r24
+9:     ldw     0(%r24), %r24
        nop
        nop
        nop
@@ -737,14 +739,14 @@ lws_compare_and_swap_2:
 
        /* 64bit load */
 #ifdef CONFIG_64BIT
-10:    ldd     0(%sr3,%r25), %r25
-11:    ldd     0(%sr3,%r24), %r24
+10:    ldd     0(%r25), %r25
+11:    ldd     0(%r24), %r24
 #else
        /* Load new value into r22/r23 - high/low */
-10:    ldw     0(%sr3,%r25), %r22
-11:    ldw     4(%sr3,%r25), %r23
+10:    ldw     0(%r25), %r22
+11:    ldw     4(%r25), %r23
        /* Load new value into fr4 for atomic store later */
-12:    flddx   0(%sr3,%r24), %fr4
+12:    flddx   0(%r24), %fr4
 #endif
 
 cas2_lock_start:
@@ -794,30 +796,30 @@ cas2_action:
        ldo     1(%r0),%r28
 
        /* 8bit CAS */
-13:    ldb,ma  0(%sr3,%r26), %r29
+13:    ldb,ma  0(%r26), %r29
        sub,=   %r29, %r25, %r0
        b,n     cas2_end
-14:    stb,ma  %r24, 0(%sr3,%r26)
+14:    stb,ma  %r24, 0(%r26)
        b       cas2_end
        copy    %r0, %r28
        nop
        nop
 
        /* 16bit CAS */
-15:    ldh,ma  0(%sr3,%r26), %r29
+15:    ldh,ma  0(%r26), %r29
        sub,=   %r29, %r25, %r0
        b,n     cas2_end
-16:    sth,ma  %r24, 0(%sr3,%r26)
+16:    sth,ma  %r24, 0(%r26)
        b       cas2_end
        copy    %r0, %r28
        nop
        nop
 
        /* 32bit CAS */
-17:    ldw,ma  0(%sr3,%r26), %r29
+17:    ldw,ma  0(%r26), %r29
        sub,=   %r29, %r25, %r0
        b,n     cas2_end
-18:    stw,ma  %r24, 0(%sr3,%r26)
+18:    stw,ma  %r24, 0(%r26)
        b       cas2_end
        copy    %r0, %r28
        nop
@@ -825,22 +827,22 @@ cas2_action:
 
        /* 64bit CAS */
 #ifdef CONFIG_64BIT
-19:    ldd,ma  0(%sr3,%r26), %r29
+19:    ldd,ma  0(%r26), %r29
        sub,*=  %r29, %r25, %r0
        b,n     cas2_end
-20:    std,ma  %r24, 0(%sr3,%r26)
+20:    std,ma  %r24, 0(%r26)
        copy    %r0, %r28
 #else
        /* Compare first word */
-19:    ldw,ma  0(%sr3,%r26), %r29
+19:    ldw,ma  0(%r26), %r29
        sub,=   %r29, %r22, %r0
        b,n     cas2_end
        /* Compare second word */
-20:    ldw,ma  4(%sr3,%r26), %r29
+20:    ldw,ma  4(%r26), %r29
        sub,=   %r29, %r23, %r0
        b,n     cas2_end
        /* Perform the store */
-21:    fstdx   %fr4, 0(%sr3,%r26)
+21:    fstdx   %fr4, 0(%r26)
        copy    %r0, %r28
 #endif
 
index 9b63b876a13a4b6ad422f7ae69fc49fed70eb1be..325f30d82b6434368425d652402fabf66fd4f8ee 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/module.h>
 #include <linux/rtc.h>
 #include <linux/sched.h>
+#include <linux/sched_clock.h>
 #include <linux/kernel.h>
 #include <linux/param.h>
 #include <linux/string.h>
 
 static unsigned long clocktick __read_mostly;  /* timer cycles per tick */
 
-#ifndef CONFIG_64BIT
-/*
- * The processor-internal cycle counter (Control Register 16) is used as time
- * source for the sched_clock() function.  This register is 64bit wide on a
- * 64-bit kernel and 32bit on a 32-bit kernel. Since sched_clock() always
- * requires a 64bit counter we emulate on the 32-bit kernel the higher 32bits
- * with a per-cpu variable which we increase every time the counter
- * wraps-around (which happens every ~4 secounds).
- */
-static DEFINE_PER_CPU(unsigned long, cr16_high_32_bits);
-#endif
-
 /*
  * We keep time on PA-RISC Linux by using the Interval Timer which is
  * a pair of registers; one is read-only and one is write-only; both
@@ -121,12 +110,6 @@ irqreturn_t __irq_entry timer_interrupt(int irq, void *dev_id)
         */
        mtctl(next_tick, 16);
 
-#if !defined(CONFIG_64BIT)
-       /* check for overflow on a 32bit kernel (every ~4 seconds). */
-       if (unlikely(next_tick < now))
-               this_cpu_inc(cr16_high_32_bits);
-#endif
-
        /* Skip one clocktick on purpose if we missed next_tick.
         * The new CR16 must be "later" than current CR16 otherwise
         * itimer would not fire until CR16 wrapped - e.g 4 seconds
@@ -208,7 +191,7 @@ EXPORT_SYMBOL(profile_pc);
 
 /* clock source code */
 
-static cycle_t read_cr16(struct clocksource *cs)
+static cycle_t notrace read_cr16(struct clocksource *cs)
 {
        return get_cycles();
 }
@@ -287,26 +270,9 @@ void read_persistent_clock(struct timespec *ts)
 }
 
 
-/*
- * sched_clock() framework
- */
-
-static u32 cyc2ns_mul __read_mostly;
-static u32 cyc2ns_shift __read_mostly;
-
-u64 sched_clock(void)
+static u64 notrace read_cr16_sched_clock(void)
 {
-       u64 now;
-
-       /* Get current cycle counter (Control Register 16). */
-#ifdef CONFIG_64BIT
-       now = mfctl(16);
-#else
-       now = mfctl(16) + (((u64) this_cpu_read(cr16_high_32_bits)) << 32);
-#endif
-
-       /* return the value in ns (cycles_2_ns) */
-       return mul_u64_u32_shr(now, cyc2ns_mul, cyc2ns_shift);
+       return get_cycles();
 }
 
 
@@ -316,17 +282,16 @@ u64 sched_clock(void)
 
 void __init time_init(void)
 {
-       unsigned long current_cr16_khz;
+       unsigned long cr16_hz;
 
-       current_cr16_khz = PAGE0->mem_10msec/10;  /* kHz */
        clocktick = (100 * PAGE0->mem_10msec) / HZ;
-
-       /* calculate mult/shift values for cr16 */
-       clocks_calc_mult_shift(&cyc2ns_mul, &cyc2ns_shift, current_cr16_khz,
-                               NSEC_PER_MSEC, 0);
-
        start_cpu_itimer();     /* get CPU 0 started */
 
+       cr16_hz = 100 * PAGE0->mem_10msec;  /* Hz */
+
        /* register at clocksource framework */
-       clocksource_register_khz(&clocksource_cr16, current_cr16_khz);
+       clocksource_register_hz(&clocksource_cr16, cr16_hz);
+
+       /* register as sched_clock source */
+       sched_clock_register(read_cr16_sched_clock, BITS_PER_LONG, cr16_hz);
 }
index 50d020ac0f487ef1427f02644748fe35a653960a..617dece6792427847458203c229505a054b5331e 100644 (file)
@@ -318,12 +318,12 @@ mpc85xx_smp_defconfig:
 PHONY += corenet32_smp_defconfig
 corenet32_smp_defconfig:
        $(call merge_into_defconfig,corenet_basic_defconfig,\
-               85xx-32bit 85xx-smp 85xx-hw fsl-emb-nonhw)
+               85xx-32bit 85xx-smp 85xx-hw fsl-emb-nonhw dpaa)
 
 PHONY += corenet64_smp_defconfig
 corenet64_smp_defconfig:
        $(call merge_into_defconfig,corenet_basic_defconfig,\
-               85xx-64bit 85xx-smp altivec 85xx-hw fsl-emb-nonhw)
+               85xx-64bit 85xx-smp altivec 85xx-hw fsl-emb-nonhw dpaa)
 
 PHONY += mpc86xx_defconfig
 mpc86xx_defconfig:
index f7a184b6c35b4ad00414720316cfbbf1d50fc704..78aaf4ffd7ab07156990a4d5a68118dec6944213 100644 (file)
@@ -32,9 +32,16 @@ static struct addr_range prep_kernel(void)
        void *addr = 0;
        struct elf_info ei;
        long len;
+       int uncompressed_image = 0;
 
-       partial_decompress(vmlinuz_addr, vmlinuz_size,
+       len = partial_decompress(vmlinuz_addr, vmlinuz_size,
                elfheader, sizeof(elfheader), 0);
+       /* assume uncompressed data if -1 is returned */
+       if (len == -1) {
+               uncompressed_image = 1;
+               memcpy(elfheader, vmlinuz_addr, sizeof(elfheader));
+               printf("No valid compressed data found, assume uncompressed data\n\r");
+       }
 
        if (!parse_elf64(elfheader, &ei) && !parse_elf32(elfheader, &ei))
                fatal("Error: not a valid PPC32 or PPC64 ELF file!\n\r");
@@ -67,6 +74,13 @@ static struct addr_range prep_kernel(void)
                                        "device tree\n\r");
        }
 
+       if (uncompressed_image) {
+               memcpy(addr, vmlinuz_addr + ei.elfoffset, ei.loadsize);
+               printf("0x%lx bytes of uncompressed data copied\n\r",
+                      ei.loadsize);
+               goto out;
+       }
+
        /* Finally, decompress the kernel */
        printf("Decompressing (0x%p <- 0x%p:0x%p)...\n\r", addr,
               vmlinuz_addr, vmlinuz_addr+vmlinuz_size);
@@ -82,7 +96,7 @@ static struct addr_range prep_kernel(void)
                         len, ei.loadsize);
 
        printf("Done! Decompressed 0x%lx bytes\n\r", len);
-
+out:
        flush_cache(addr, ei.loadsize);
 
        return (struct addr_range){addr, ei.memsize};
@@ -218,8 +232,12 @@ void start(void)
                console_ops.close();
 
        kentry = (kernel_entry_t) vmlinux.addr;
-       if (ft_addr)
-               kentry(ft_addr, 0, NULL);
+       if (ft_addr) {
+               if(platform_ops.kentry)
+                       platform_ops.kentry(ft_addr, vmlinux.addr);
+               else
+                       kentry(ft_addr, 0, NULL);
+       }
        else
                kentry((unsigned long)initrd.addr, initrd.size,
                       loader_info.promptr);
index ff2f1b97bc5323f78bbfdd038fccebdde45a4432..2a99fc9a3ccf368d5176d799133e652ffb70ca87 100644 (file)
 
        .text
 
+       .globl opal_kentry
+opal_kentry:
+       /* r3 is the fdt ptr */
+       mtctr r4
+       li      r4, 0
+       li      r5, 0
+       li      r6, 0
+       li      r7, 0
+       ld      r11,opal@got(r2)
+       ld      r8,0(r11)
+       ld      r9,8(r11)
+       bctr
+
 #define OPAL_CALL(name, token)                         \
        .globl name;                                    \
 name:                                                  \
index 1f37e1c1d6d88b9cd0a88795a12e8651389dc483..d7b4fd47eb44ff6b9bbef81da9f49f334efa8eef 100644 (file)
@@ -23,14 +23,25 @@ struct opal {
 
 static u32 opal_con_id;
 
+/* see opal-wrappers.S */
 int64_t opal_console_write(int64_t term_number, u64 *length, const u8 *buffer);
 int64_t opal_console_read(int64_t term_number, uint64_t *length, u8 *buffer);
 int64_t opal_console_write_buffer_space(uint64_t term_number, uint64_t *length);
 int64_t opal_console_flush(uint64_t term_number);
 int64_t opal_poll_events(uint64_t *outstanding_event_mask);
 
+void opal_kentry(unsigned long fdt_addr, void *vmlinux_addr);
+
 static int opal_con_open(void)
 {
+       /*
+        * When OPAL loads the boot kernel it stashes the OPAL base and entry
+        * address in r8 and r9 so the kernel can use the OPAL console
+        * before unflattening the devicetree. While executing the wrapper will
+        * probably trash r8 and r9 so this kentry hook restores them before
+        * entering the decompressed kernel.
+        */
+       platform_ops.kentry = opal_kentry;
        return 0;
 }
 
index 309d1b127e966f8161d7940eaf30e2f68b2b47e6..fad1862f4b2d3904360aa68431177dbc79621f99 100644 (file)
@@ -30,6 +30,7 @@ struct platform_ops {
        void *  (*realloc)(void *ptr, unsigned long size);
        void    (*exit)(void);
        void *  (*vmlinux_alloc)(unsigned long size);
+       void    (*kentry)(unsigned long fdt_addr, void *vmlinux_addr);
 };
 extern struct platform_ops platform_ops;
 
diff --git a/arch/powerpc/configs/dpaa.config b/arch/powerpc/configs/dpaa.config
new file mode 100644 (file)
index 0000000..2fe76f5
--- /dev/null
@@ -0,0 +1,4 @@
+CONFIG_FSL_DPAA=y
+CONFIG_FSL_PAMU=y
+CONFIG_FSL_FMAN=y
+CONFIG_FSL_DPAA_ETH=y
index ab9f4e0ed4cfcfd48a8d232fe20d0482739a22c5..5c4fbc80dc6ce683d278ce13174b770f337bcaa0 100644 (file)
@@ -1,5 +1,6 @@
 generic-y += clkdev.h
 generic-y += div64.h
+generic-y += export.h
 generic-y += irq_regs.h
 generic-y += irq_work.h
 generic-y += local64.h
index d1492736d85223d54913eef6c2014d915cdb7f78..e0baba1535e6b08ab9350d64c51507ebfbf1630a 100644 (file)
 
 #include <linux/threads.h>
 #include <linux/kprobes.h>
+#include <asm/cacheflush.h>
+#include <asm/checksum.h>
+#include <asm/uaccess.h>
+#include <asm/epapr_hcalls.h>
 
 #include <uapi/asm/ucontext.h>
 
@@ -109,4 +113,12 @@ void early_setup_secondary(void);
 /* time */
 void accumulate_stolen_time(void);
 
+/* misc runtime */
+extern u64 __bswapdi2(u64);
+extern s64 __lshrdi3(s64, int);
+extern s64 __ashldi3(s64, int);
+extern s64 __ashrdi3(s64, int);
+extern int __cmpdi2(s64, s64);
+extern int __ucmpdi2(u64, u64);
+
 #endif /* _ASM_POWERPC_ASM_PROTOTYPES_H */
index ee655ed1ff1bc7eae352be8887936e168f7ab58f..1e8fceb308a518950918e1ea7d9102eb313f66ee 100644 (file)
@@ -53,10 +53,8 @@ static inline __sum16 csum_fold(__wsum sum)
        return (__force __sum16)(~((__force u32)sum + tmp) >> 16);
 }
 
-static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
-                                     unsigned short len,
-                                     unsigned short proto,
-                                     __wsum sum)
+static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len,
+                                       __u8 proto, __wsum sum)
 {
 #ifdef __powerpc64__
        unsigned long s = (__force u32)sum;
@@ -83,10 +81,8 @@ static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
  * computes the checksum of the TCP/UDP pseudo-header
  * returns a 16-bit checksum, already complemented
  */
-static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr,
-                                       unsigned short len,
-                                       unsigned short proto,
-                                       __wsum sum)
+static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len,
+                                       __u8 proto, __wsum sum)
 {
        return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum));
 }
index 01b8a13f022467be64ccd46f248344bdf96e9a41..3919332965af04bf98f2b77b7f4ec722d5acf8da 100644 (file)
@@ -26,7 +26,7 @@ extern u64 pnv_first_deep_stop_state;
        std     r0,0(r1);                                       \
        ptesync;                                                \
        ld      r0,0(r1);                                       \
-1:     cmp     cr0,r0,r0;                                      \
+1:     cmpd    cr0,r0,r0;                                      \
        bne     1b;                                             \
        IDLE_INST;                                              \
        b       .
index f752e6f7cfbe2656ab0ded7f6619b22339e973ae..ab68d0ee7725861d827d34c4f05ed08001e13274 100644 (file)
@@ -43,6 +43,7 @@ extern int machine_check_e500mc(struct pt_regs *regs);
 extern int machine_check_e500(struct pt_regs *regs);
 extern int machine_check_e200(struct pt_regs *regs);
 extern int machine_check_47x(struct pt_regs *regs);
+int machine_check_8xx(struct pt_regs *regs);
 
 extern void cpu_down_flush_e500v2(void);
 extern void cpu_down_flush_e500mc(void);
index 2e4e7d878c8eeda322d701cb3f407d67ecff0a58..9a3eee66129766d84d8bd5063fd8ecd142e8fc21 100644 (file)
  */
 #define LOAD_HANDLER(reg, label)                                       \
        ld      reg,PACAKBASE(r13);     /* get high part of &label */   \
-       ori     reg,reg,(FIXED_SYMBOL_ABS_ADDR(label))@l;
+       ori     reg,reg,FIXED_SYMBOL_ABS_ADDR(label);
+
+#define __LOAD_HANDLER(reg, label)                                     \
+       ld      reg,PACAKBASE(r13);                                     \
+       ori     reg,reg,(ABS_ADDR(label))@l;
 
 /* Exception register prefixes */
 #define EXC_HV H
@@ -154,14 +158,17 @@ BEGIN_FTR_SECTION_NESTED(943)                                             \
        std     ra,offset(r13);                                         \
 END_FTR_SECTION_NESTED(ftr,ftr,943)
 
-#define EXCEPTION_PROLOG_0(area)                                       \
-       GET_PACA(r13);                                                  \
+#define EXCEPTION_PROLOG_0_PACA(area)                                  \
        std     r9,area+EX_R9(r13);     /* save r9 */                   \
        OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR);                     \
        HMT_MEDIUM;                                                     \
        std     r10,area+EX_R10(r13);   /* save r10 - r12 */            \
        OPT_GET_SPR(r10, SPRN_CFAR, CPU_FTR_CFAR)
 
+#define EXCEPTION_PROLOG_0(area)                                       \
+       GET_PACA(r13);                                                  \
+       EXCEPTION_PROLOG_0_PACA(area)
+
 #define __EXCEPTION_PROLOG_1(area, extra, vec)                         \
        OPT_SAVE_REG_TO_PACA(area+EX_PPR, r9, CPU_FTR_HAS_PPR);         \
        OPT_SAVE_REG_TO_PACA(area+EX_CFAR, r10, CPU_FTR_CFAR);          \
@@ -192,6 +199,12 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
        EXCEPTION_PROLOG_1(area, extra, vec);                           \
        EXCEPTION_PROLOG_PSERIES_1(label, h);
 
+/* Have the PACA in r13 already */
+#define EXCEPTION_PROLOG_PSERIES_PACA(area, label, h, extra, vec)      \
+       EXCEPTION_PROLOG_0_PACA(area);                                  \
+       EXCEPTION_PROLOG_1(area, extra, vec);                           \
+       EXCEPTION_PROLOG_PSERIES_1(label, h);
+
 #define __KVMTEST(h, n)                                                        \
        lbz     r10,HSTATE_IN_GUEST(r13);                               \
        cmpwi   r10,0;                                                  \
@@ -208,6 +221,18 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
 #define kvmppc_interrupt kvmppc_interrupt_pr
 #endif
 
+#ifdef CONFIG_RELOCATABLE
+#define BRANCH_TO_COMMON(reg, label)                                   \
+       __LOAD_HANDLER(reg, label);                                     \
+       mtctr   reg;                                                    \
+       bctr
+
+#else
+#define BRANCH_TO_COMMON(reg, label)                                   \
+       b       label
+
+#endif
+
 #define __KVM_HANDLER_PROLOG(area, n)                                  \
        BEGIN_FTR_SECTION_NESTED(947)                                   \
        ld      r10,area+EX_CFAR(r13);                                  \
index c7d82ff62a3346eb6b807d0bbbd06a5c85a760e4..eba60416536ec0955ff4a4131b4d788db2f25f9c 100644 (file)
@@ -155,6 +155,8 @@ static inline unsigned long arch_local_irq_save(void)
        unsigned long flags = arch_local_save_flags();
 #ifdef CONFIG_BOOKE
        asm volatile("wrteei 0" : : : "memory");
+#elif defined(CONFIG_PPC_8xx)
+       wrtspr(SPRN_EID);
 #else
        SET_MSR_EE(flags & ~MSR_EE);
 #endif
@@ -165,6 +167,8 @@ static inline void arch_local_irq_disable(void)
 {
 #ifdef CONFIG_BOOKE
        asm volatile("wrteei 0" : : : "memory");
+#elif defined(CONFIG_PPC_8xx)
+       wrtspr(SPRN_EID);
 #else
        arch_local_irq_save();
 #endif
@@ -174,6 +178,8 @@ static inline void arch_local_irq_enable(void)
 {
 #ifdef CONFIG_BOOKE
        asm volatile("wrteei 1" : : : "memory");
+#elif defined(CONFIG_PPC_8xx)
+       wrtspr(SPRN_EIE);
 #else
        unsigned long msr = mfmsr();
        SET_MSR_EE(msr | MSR_EE);
index 4d8518049f4df569a24a10437afd3aff1c2df873..4396db57b8be19bbe12377c6c6babc6a99c6b6e8 100644 (file)
@@ -1,12 +1,8 @@
 #ifndef __ASM_POWERPC_LIBATA_PORTMAP_H
 #define __ASM_POWERPC_LIBATA_PORTMAP_H
 
-#define ATA_PRIMARY_CMD        0x1F0
-#define ATA_PRIMARY_CTL        0x3F6
 #define ATA_PRIMARY_IRQ(dev)   pci_get_legacy_ide_irq(dev, 0)
 
-#define ATA_SECONDARY_CMD      0x170
-#define ATA_SECONDARY_CTL      0x376
 #define ATA_SECONDARY_IRQ(dev) pci_get_legacy_ide_irq(dev, 1)
 
 #endif
index e88368354e499caa901020303d4d969d120ab60b..e311c25751a4111d20f8bef1165ff52934a18576 100644 (file)
  * Individual features below.
  */
 
+/*
+ * Kernel read only support.
+ * We added the ppp value 0b110 in ISA 2.04.
+ */
+#define MMU_FTR_KERNEL_RO              ASM_CONST(0x00004000)
+
 /*
  * We need to clear top 16bits of va (from the remaining 64 bits )in
  * tlbie* instructions
 #define MMU_FTRS_POWER4                MMU_FTRS_DEFAULT_HPTE_ARCH_V2
 #define MMU_FTRS_PPC970                MMU_FTRS_POWER4 | MMU_FTR_TLBIE_CROP_VA
 #define MMU_FTRS_POWER5                MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE
-#define MMU_FTRS_POWER6                MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE
-#define MMU_FTRS_POWER7                MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE
-#define MMU_FTRS_POWER8                MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE
-#define MMU_FTRS_POWER9                MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE
+#define MMU_FTRS_POWER6                MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE | MMU_FTR_KERNEL_RO
+#define MMU_FTRS_POWER7                MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE | MMU_FTR_KERNEL_RO
+#define MMU_FTRS_POWER8                MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE | MMU_FTR_KERNEL_RO
+#define MMU_FTRS_POWER9                MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE | MMU_FTR_KERNEL_RO
 #define MMU_FTRS_CELL          MMU_FTRS_DEFAULT_HPTE_ARCH_V2 | \
                                MMU_FTR_CI_LARGE_PAGE
 #define MMU_FTRS_PA6T          MMU_FTRS_DEFAULT_HPTE_ARCH_V2 | \
index 54ff8ce7fa96b7fec908fd9c4a86dc94f024447c..c56ea8c84abb1771ff65f66ba91ffff02bff5fae 100644 (file)
 #define PPC_INST_LWSYNC                        0x7c2004ac
 #define PPC_INST_SYNC                  0x7c0004ac
 #define PPC_INST_SYNC_MASK             0xfc0007fe
+#define PPC_INST_ISYNC                 0x4c00012c
 #define PPC_INST_LXVD2X                        0x7c000698
 #define PPC_INST_MCRXR                 0x7c000400
 #define PPC_INST_MCRXR_MASK            0xfc0007fe
 
 #define PPC_SLBIA(IH)  stringify_in_c(.long PPC_INST_SLBIA | \
                                       ((IH & 0x7) << 21))
+#define PPC_INVALIDATE_ERAT    PPC_SLBIA(7)
 
 #endif /* _ASM_POWERPC_PPC_OPCODE_H */
index 2a620789954bbe3a21a93b8e9db7a031cb3077a3..9e1499f98deff5299e7a666313749c8b4ebad0b1 100644 (file)
 #define     LPCR_PECE0         ASM_CONST(0x0000000000004000)   /* ext. exceptions can cause exit */
 #define     LPCR_PECE1         ASM_CONST(0x0000000000002000)   /* decrementer can cause exit */
 #define     LPCR_PECE2         ASM_CONST(0x0000000000001000)   /* machine check etc can cause exit */
+#define     LPCR_PECE_HVEE     ASM_CONST(0x0000400000000000)   /* P9 Wakeup on HV interrupts */
 #define   LPCR_MER             ASM_CONST(0x0000000000000800)   /* Mediated External Exception */
 #define   LPCR_MER_SH          11
 #define   LPCR_TC              ASM_CONST(0x0000000000000200)   /* Translation control */
@@ -1250,6 +1251,8 @@ static inline void mtmsr_isync(unsigned long val)
                                     : "r" ((unsigned long)(v)) \
                                     : "memory")
 #endif
+#define wrtspr(rn)     asm volatile("mtspr " __stringify(rn) ",0" : \
+                                    : : "memory")
 
 extern unsigned long msr_check_and_set(unsigned long bits);
 extern bool strict_msr_control;
index 94d01f81e66877646faba091187ae981bfa0a457..0197e12f7d482512c2d7be8f8ee76c1c3935df73 100644 (file)
 #define SPRN_MD_RAM0   825
 #define SPRN_MD_RAM1   826
 
+/* Special MSR manipulation registers */
+#define SPRN_EIE       80      /* External interrupt enable (EE=1, RI=1) */
+#define SPRN_EID       81      /* External interrupt disable (EE=0, RI=1) */
+
 /* Commands.  Only the first few are available to the instruction cache.
 */
 #define        IDC_ENABLE      0x02000000      /* Cache enable */
index f6f68f73e8581147772bad3100f74ed5950987bd..99e1397b71dac78dae0cc2b98eefd40cf90947ec 100644 (file)
@@ -52,11 +52,23 @@ static inline int mm_is_core_local(struct mm_struct *mm)
        return cpumask_subset(mm_cpumask(mm),
                              topology_sibling_cpumask(smp_processor_id()));
 }
+
+static inline int mm_is_thread_local(struct mm_struct *mm)
+{
+       return cpumask_equal(mm_cpumask(mm),
+                             cpumask_of(smp_processor_id()));
+}
+
 #else
 static inline int mm_is_core_local(struct mm_struct *mm)
 {
        return 1;
 }
+
+static inline int mm_is_thread_local(struct mm_struct *mm)
+{
+       return 1;
+}
 #endif
 
 #endif /* __KERNEL__ */
index cf12c580f6b286b957b0280d8174cc3d8c203d2f..e8cdfec8d5125c531c45b7ffd250955ad68021af 100644 (file)
 
 #define __NR__exit __NR_exit
 
+#define __IGNORE_pkey_mprotect
+#define __IGNORE_pkey_alloc
+#define __IGNORE_pkey_free
+
 #ifndef __ASSEMBLY__
 
 #include <linux/types.h>
index 1672e3398270bf50a740895c6ad534689eb84cf5..44583a52f882540986928cc48a63971251226a0f 100644 (file)
@@ -97,4 +97,6 @@
 
 #define SO_CNX_ADVICE          53
 
+#define SCM_TIMESTAMPING_OPT_STATS     54
+
 #endif /* _ASM_POWERPC_SOCKET_H */
index aded29ad2e8f4873e06a01c1e580e47bbc181fcf..1925341dbb9c9df7ceb05975cf95c0ff8a3339a5 100644 (file)
@@ -14,6 +14,11 @@ CFLAGS_prom_init.o      += -fPIC
 CFLAGS_btext.o         += -fPIC
 endif
 
+CFLAGS_cputable.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
+CFLAGS_init.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
+CFLAGS_btext.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
+CFLAGS_prom.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
+
 ifdef CONFIG_FUNCTION_TRACER
 # Do not trace early boot code
 CFLAGS_REMOVE_cputable.o = -mno-sched-epilog $(CC_FLAGS_FTRACE)
@@ -90,10 +95,6 @@ obj-$(CONFIG_RELOCATABLE)    += reloc_$(BITS).o
 obj-$(CONFIG_PPC32)            += entry_32.o setup_32.o
 obj-$(CONFIG_PPC64)            += dma-iommu.o iommu.o
 obj-$(CONFIG_KGDB)             += kgdb.o
-obj-$(CONFIG_MODULES)          += ppc_ksyms.o
-ifeq ($(CONFIG_PPC32),y)
-obj-$(CONFIG_MODULES)          += ppc_ksyms_32.o
-endif
 obj-$(CONFIG_BOOTX_TEXT)       += btext.o
 obj-$(CONFIG_SMP)              += smp.o
 obj-$(CONFIG_KPROBES)          += kprobes.o
index 52ff3f025437947484d7567141b5d3802cc887c2..37c027ca83b2b172a07a9ed01b23ec3e34baffff 100644 (file)
@@ -98,8 +98,8 @@ _GLOBAL(__setup_cpu_power9)
        li      r0,0
        mtspr   SPRN_LPID,r0
        mfspr   r3,SPRN_LPCR
-       ori     r3, r3, LPCR_PECEDH
-       ori     r3, r3, LPCR_HVICE
+       LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE)
+       or      r3, r3, r4
        bl      __init_LPCR
        bl      __init_HFSCR
        bl      __init_tlb_power9
@@ -118,8 +118,8 @@ _GLOBAL(__restore_cpu_power9)
        li      r0,0
        mtspr   SPRN_LPID,r0
        mfspr   r3,SPRN_LPCR
-       ori     r3, r3, LPCR_PECEDH
-       ori     r3, r3, LPCR_HVICE
+       LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE)
+       or      r3, r3, r4
        bl      __init_LPCR
        bl      __init_HFSCR
        bl      __init_tlb_power9
index 6c4646ac9234dafb186ca8a5360a3530b1e2940b..6a82ef039c509746f5afa66ebc286fa5da429ed3 100644 (file)
@@ -1248,6 +1248,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
                .mmu_features           = MMU_FTR_TYPE_8xx,
                .icache_bsize           = 16,
                .dcache_bsize           = 16,
+               .machine_check          = machine_check_8xx,
                .platform               = "ppc823",
        },
 #endif /* CONFIG_8xx */
index 83428a283fa075fcac6ec48676c44d2078427a6f..3841d749a430069f4d4f2705c4199c08609b3757 100644 (file)
@@ -33,6 +33,7 @@
 #include <asm/unistd.h>
 #include <asm/ftrace.h>
 #include <asm/ptrace.h>
+#include <asm/export.h>
 
 /*
  * MSR_KERNEL is > 0x10000 on 4xx/Book-E since it include MSR_CE.
@@ -1358,6 +1359,7 @@ _GLOBAL(_mcount)
        MCOUNT_RESTORE_FRAME
        bctr
 #endif
+EXPORT_SYMBOL(_mcount)
 
 _GLOBAL(ftrace_stub)
        blr
index 51df82b610843d5c490741fa33d59afb5a01c08b..6432d4bf08c889c128803cc5505546122118b964 100644 (file)
@@ -38,6 +38,7 @@
 #include <asm/context_tracking.h>
 #include <asm/tm.h>
 #include <asm/ppc-opcode.h>
+#include <asm/export.h>
 
 /*
  * System calls.
@@ -1177,6 +1178,7 @@ _GLOBAL(enter_prom)
 #ifdef CONFIG_DYNAMIC_FTRACE
 _GLOBAL(mcount)
 _GLOBAL(_mcount)
+EXPORT_SYMBOL(_mcount)
        mflr    r12
        mtctr   r12
        mtlr    r0
@@ -1413,6 +1415,7 @@ livepatch_handler:
 
 #else
 _GLOBAL_TOC(_mcount)
+EXPORT_SYMBOL(_mcount)
        /* Taken from output of objdump from lib64/glibc */
        mflr    r3
        ld      r11, 0(r1)
index 9f1ebf7338f1084499fcc03e50efa71b66a73b76..52ca2471ee1a4355b8fa3063bcc5d27ba61a52a1 100644 (file)
@@ -16,6 +16,7 @@
 #include <asm/ppc_asm.h>
 #include <asm/asm-compat.h>
 #include <asm/asm-offsets.h>
+#include <asm/export.h>
 
 #ifndef CONFIG_PPC64
 /* epapr_ev_idle() was derived from e500_idle() */
@@ -53,3 +54,4 @@ epapr_hypercall_start:
        nop
        nop
        blr
+EXPORT_SYMBOL(epapr_hypercall_start)
index 08992f8f50365612fd82d6be6cb2daebe3ea2918..1ba82ea9023093ae3d58eedba4b9542bae137047 100644 (file)
@@ -95,19 +95,40 @@ __start_interrupts:
 /* No virt vectors corresponding with 0x0..0x100 */
 EXC_VIRT_NONE(0x4000, 0x4100)
 
-EXC_REAL_BEGIN(system_reset, 0x100, 0x200)
-       SET_SCRATCH0(r13)
+
 #ifdef CONFIG_PPC_P7_NAP
-BEGIN_FTR_SECTION
-       /* Running native on arch 2.06 or later, check if we are
-        * waking up from nap/sleep/winkle.
+       /*
+        * If running native on arch 2.06 or later, check if we are waking up
+        * from nap/sleep/winkle, and branch to idle handler.
         */
-       mfspr   r13,SPRN_SRR1
-       rlwinm. r13,r13,47-31,30,31
-       beq     9f
+#define IDLETEST(n)                                                    \
+       BEGIN_FTR_SECTION ;                                             \
+       mfspr   r10,SPRN_SRR1 ;                                         \
+       rlwinm. r10,r10,47-31,30,31 ;                                   \
+       beq-    1f ;                                                    \
+       cmpwi   cr3,r10,2 ;                                             \
+       BRANCH_TO_COMMON(r10, system_reset_idle_common) ;               \
+1:                                                                     \
+       END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+#else
+#define IDLETEST NOTEST
+#endif
 
-       cmpwi   cr3,r13,2
+EXC_REAL_BEGIN(system_reset, 0x100, 0x200)
+       SET_SCRATCH0(r13)
        GET_PACA(r13)
+       clrrdi  r13,r13,1 /* Last bit of HSPRG0 is set if waking from winkle */
+       EXCEPTION_PROLOG_PSERIES_PACA(PACA_EXGEN, system_reset_common, EXC_STD,
+                                IDLETEST, 0x100)
+
+EXC_REAL_END(system_reset, 0x100, 0x200)
+EXC_VIRT_NONE(0x4100, 0x4200)
+
+#ifdef CONFIG_PPC_P7_NAP
+EXC_COMMON_BEGIN(system_reset_idle_common)
+BEGIN_FTR_SECTION
+       GET_PACA(r13) /* Restore HSPRG0 to get the winkle bit in r13 */
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
        bl      pnv_restore_hyp_resource
 
        li      r0,PNV_THREAD_RUNNING
@@ -130,14 +151,8 @@ BEGIN_FTR_SECTION
        blt     cr3,2f
        b       pnv_wakeup_loss
 2:     b       pnv_wakeup_noloss
+#endif
 
-9:
-END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
-#endif /* CONFIG_PPC_P7_NAP */
-       EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD,
-                                NOTEST, 0x100)
-EXC_REAL_END(system_reset, 0x100, 0x200)
-EXC_VIRT_NONE(0x4100, 0x4200)
 EXC_COMMON(system_reset_common, 0x100, system_reset_exception)
 
 #ifdef CONFIG_PPC_PSERIES
@@ -159,7 +174,7 @@ EXC_REAL_BEGIN(machine_check, 0x200, 0x300)
        SET_SCRATCH0(r13)               /* save r13 */
        /*
         * Running native on arch 2.06 or later, we may wakeup from winkle
-        * inside machine check. If yes, then last bit of HSPGR0 would be set
+        * inside machine check. If yes, then last bit of HSPRG0 would be set
         * to 1. Hence clear it unconditionally.
         */
        GET_PACA(r13)
@@ -378,7 +393,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
        /*
         * Go back to winkle. Please note that this thread was woken up in
         * machine check from winkle and have not restored the per-subcore
-        * state. Hence before going back to winkle, set last bit of HSPGR0
+        * state. Hence before going back to winkle, set last bit of HSPRG0
         * to 1. This will make sure that if this thread gets woken up
         * again at reset vector 0x100 then it will get chance to restore
         * the subcore state.
@@ -817,10 +832,8 @@ EXC_VIRT(trap_0b, 0x4b00, 0x4c00, 0xb00)
 TRAMP_KVM(PACA_EXGEN, 0xb00)
 EXC_COMMON(trap_0b_common, 0xb00, unknown_exception)
 
-
-#define LOAD_SYSCALL_HANDLER(reg)                              \
-       ld      reg,PACAKBASE(r13);                             \
-       ori     reg,reg,(ABS_ADDR(system_call_common))@l;
+#define LOAD_SYSCALL_HANDLER(reg)                                      \
+       __LOAD_HANDLER(reg, system_call_common)
 
 /* Syscall routine is used twice, in reloc-off and reloc-on paths */
 #define SYSCALL_PSERIES_1                                      \
@@ -1377,7 +1390,7 @@ __end_interrupts:
 DEFINE_FIXED_SYMBOL(__end_interrupts)
 
 #ifdef CONFIG_PPC_970_NAP
-TRAMP_REAL_BEGIN(power4_fixup_nap)
+EXC_COMMON_BEGIN(power4_fixup_nap)
        andc    r9,r9,r10
        std     r9,TI_LOCAL_FLAGS(r11)
        ld      r10,_LINK(r1)           /* make idle task do the */
index 08d14b096eb90e8f9a0ff1c2de2a68c1ec2dd325..6c509f39bbdeb97c7c18a28e4479792a1d0fb7bf 100644 (file)
@@ -24,6 +24,7 @@
 #include <asm/ppc_asm.h>
 #include <asm/asm-offsets.h>
 #include <asm/ptrace.h>
+#include <asm/export.h>
 
 #ifdef CONFIG_VSX
 #define __REST_32FPVSRS(n,c,base)                                      \
@@ -59,6 +60,7 @@ _GLOBAL(load_fp_state)
        MTFSF_L(fr0)
        REST_32FPVSRS(0, R4, R3)
        blr
+EXPORT_SYMBOL(load_fp_state)
 
 /*
  * Store FP state into memory, including FPSCR
@@ -69,6 +71,7 @@ _GLOBAL(store_fp_state)
        mffs    fr0
        stfd    fr0,FPSTATE_FPSCR(r3)
        blr
+EXPORT_SYMBOL(store_fp_state)
 
 /*
  * This task wants to use the FPU now.
index a3f821eb7e9ab9985df07d3631d4dec2ecf03baf..9d963547d2438864d36662271912c4a2c28f21ef 100644 (file)
@@ -34,6 +34,7 @@
 #include <asm/ptrace.h>
 #include <asm/bug.h>
 #include <asm/kvm_book3s_asm.h>
+#include <asm/export.h>
 
 /* 601 only have IBAT; cr0.eq is set on 601 when using this macro */
 #define LOAD_BAT(n, reg, RA, RB)       \
@@ -738,6 +739,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU)
 
        .globl mol_trampoline
        .set mol_trampoline, i0x2f00
+       EXPORT_SYMBOL(mol_trampoline)
 
        . = 0x3000
 
@@ -1045,6 +1047,7 @@ _ENTRY(switch_mmu_context)
 4:     trap
        EMIT_BUG_ENTRY 4b,__FILE__,__LINE__,0
        blr
+EXPORT_SYMBOL(switch_mmu_context)
 
 /*
  * An undocumented "feature" of 604e requires that the v bit
@@ -1272,6 +1275,7 @@ sdata:
        .globl  empty_zero_page
 empty_zero_page:
        .space  4096
+EXPORT_SYMBOL(empty_zero_page)
 
        .globl  swapper_pg_dir
 swapper_pg_dir:
@@ -1285,6 +1289,7 @@ intercept_table:
        .long 0, 0, 0, 0, 0, 0, 0, 0
        .long 0, 0, 0, 0, 0, 0, 0, 0
        .long 0, 0, 0, 0, 0, 0, 0, 0
+EXPORT_SYMBOL(intercept_table)
 
 /* Room for two PTE pointers, usually the kernel and current user pointers
  * to their respective root page table.
index 7d7d8635227ac76bcd054b3fd04248ec90127c9d..41374a468d1c1d49a801f6d0856ced3908b29137 100644 (file)
@@ -41,6 +41,7 @@
 #include <asm/ppc_asm.h>
 #include <asm/asm-offsets.h>
 #include <asm/ptrace.h>
+#include <asm/export.h>
 
 /* As with the other PowerPC ports, it is expected that when code
  * execution begins here, the following registers contain valid, yet
@@ -971,6 +972,7 @@ sdata:
        .globl  empty_zero_page
 empty_zero_page:
        .space  4096
+EXPORT_SYMBOL(empty_zero_page)
        .globl  swapper_pg_dir
 swapper_pg_dir:
        .space  PGD_TABLE_SIZE
index 9cdf5c71e4263c102a2294ffb251f991f98a8c03..37e4a7cf0065be285ba00b30a7601998a9e570f0 100644 (file)
@@ -39,6 +39,7 @@
 #include <asm/asm-offsets.h>
 #include <asm/ptrace.h>
 #include <asm/synch.h>
+#include <asm/export.h>
 #include "head_booke.h"
 
 
@@ -1254,6 +1255,7 @@ sdata:
        .globl  empty_zero_page
 empty_zero_page:
        .space  PAGE_SIZE
+EXPORT_SYMBOL(empty_zero_page)
 
 /*
  * To support >32-bit physical addresses, we use an 8KB pgdir.
index 79da0641bae24e5e9439df2a666f989b975a3896..04c546e20cc05e2db9c3f3022efcc4d2b9a91ee7 100644 (file)
@@ -43,6 +43,7 @@
 #include <asm/hw_irq.h>
 #include <asm/cputhreads.h>
 #include <asm/ppc-opcode.h>
+#include <asm/export.h>
 
 /* The physical memory is laid out such that the secondary processor
  * spin code sits at 0x0000...0x00ff. On server, the vectors follow
@@ -1002,3 +1003,4 @@ swapper_pg_dir:
        .globl  empty_zero_page
 empty_zero_page:
        .space  PAGE_SIZE
+EXPORT_SYMBOL(empty_zero_page)
index 3a185c51ce8f657d47709f12ffabb5e6480eff7f..fb133a1632636c5c252ddf222b05a1c4a6972896 100644 (file)
@@ -31,6 +31,7 @@
 #include <asm/asm-offsets.h>
 #include <asm/ptrace.h>
 #include <asm/fixmap.h>
+#include <asm/export.h>
 
 /* Macro to make the code more readable. */
 #ifdef CONFIG_8xx_CPU6
@@ -226,7 +227,7 @@ i##n:                                                               \
                          ret_from_except)
 
 /* System reset */
-       EXCEPTION(0x100, Reset, unknown_exception, EXC_XFER_STD)
+       EXCEPTION(0x100, Reset, system_reset_exception, EXC_XFER_STD)
 
 /* Machine check */
        . = 0x200
@@ -321,7 +322,7 @@ SystemCall:
 #endif
 
 InstructionTLBMiss:
-#ifdef CONFIG_8xx_CPU6
+#if defined(CONFIG_8xx_CPU6) || defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC)
        mtspr   SPRN_SPRG_SCRATCH2, r3
 #endif
        EXCEPTION_PROLOG_0
@@ -329,23 +330,20 @@ InstructionTLBMiss:
        /* If we are faulting a kernel address, we have to use the
         * kernel page tables.
         */
+       mfspr   r10, SPRN_SRR0  /* Get effective address of fault */
+       INVALIDATE_ADJACENT_PAGES_CPU15(r11, r10)
 #if defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC)
        /* Only modules will cause ITLB Misses as we always
         * pin the first 8MB of kernel memory */
-       mfspr   r11, SPRN_SRR0  /* Get effective address of fault */
-       INVALIDATE_ADJACENT_PAGES_CPU15(r10, r11)
-       mfcr    r10
-       IS_KERNEL(r11, r11)
+       mfcr    r3
+       IS_KERNEL(r11, r10)
+#endif
        mfspr   r11, SPRN_M_TW  /* Get level 1 table */
+#if defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC)
        BRANCH_UNLESS_KERNEL(3f)
        lis     r11, (swapper_pg_dir-PAGE_OFFSET)@ha
 3:
-       mtcr    r10
-       mfspr   r10, SPRN_SRR0  /* Get effective address of fault */
-#else
-       mfspr   r10, SPRN_SRR0  /* Get effective address of fault */
-       INVALIDATE_ADJACENT_PAGES_CPU15(r11, r10)
-       mfspr   r11, SPRN_M_TW  /* Get level 1 table base address */
+       mtcr    r3
 #endif
        /* Insert level 1 index */
        rlwimi  r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29
@@ -377,58 +375,39 @@ InstructionTLBMiss:
        MTSPR_CPU6(SPRN_MI_RPN, r10, r3)        /* Update TLB entry */
 
        /* Restore registers */
-#ifdef CONFIG_8xx_CPU6
+#if defined(CONFIG_8xx_CPU6) || defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC)
        mfspr   r3, SPRN_SPRG_SCRATCH2
 #endif
        EXCEPTION_EPILOG_0
        rfi
 
-/*
- * Bottom part of DataStoreTLBMiss handler for IMMR area
- * not enough space in the DataStoreTLBMiss area
- */
-DTLBMissIMMR:
-       mtcr    r10
-       /* Set 512k byte guarded page and mark it valid */
-       li      r10, MD_PS512K | MD_GUARDED | MD_SVALID
-       MTSPR_CPU6(SPRN_MD_TWC, r10, r11)
-       mfspr   r10, SPRN_IMMR                  /* Get current IMMR */
-       rlwinm  r10, r10, 0, 0xfff80000         /* Get 512 kbytes boundary */
-       ori     r10, r10, 0xf0 | MD_SPS16K | _PAGE_SHARED | _PAGE_DIRTY | \
-                         _PAGE_PRESENT | _PAGE_NO_CACHE
-       MTSPR_CPU6(SPRN_MD_RPN, r10, r11)       /* Update TLB entry */
-
-       li      r11, RPN_PATTERN
-       mtspr   SPRN_DAR, r11   /* Tag DAR */
-       EXCEPTION_EPILOG_0
-       rfi
-
        . = 0x1200
 DataStoreTLBMiss:
+       mtspr   SPRN_SPRG_SCRATCH2, r3
        EXCEPTION_PROLOG_0
-       mfcr    r10
+       mfcr    r3
 
        /* If we are faulting a kernel address, we have to use the
         * kernel page tables.
         */
-       mfspr   r11, SPRN_MD_EPN
-       rlwinm  r11, r11, 16, 0xfff8
+       mfspr   r10, SPRN_MD_EPN
+       rlwinm  r10, r10, 16, 0xfff8
+       cmpli   cr0, r10, PAGE_OFFSET@h
+       mfspr   r11, SPRN_M_TW  /* Get level 1 table */
+       blt+    3f
 #ifndef CONFIG_PIN_TLB_IMMR
-       cmpli   cr0, r11, VIRT_IMMR_BASE@h
+       cmpli   cr0, r10, VIRT_IMMR_BASE@h
 #endif
-       cmpli   cr7, r11, PAGE_OFFSET@h
+_ENTRY(DTLBMiss_cmp)
+       cmpli   cr7, r10, (PAGE_OFFSET + 0x1800000)@h
+       lis     r11, (swapper_pg_dir-PAGE_OFFSET)@ha
 #ifndef CONFIG_PIN_TLB_IMMR
 _ENTRY(DTLBMiss_jmp)
        beq-    DTLBMissIMMR
 #endif
-       bge-    cr7, 4f
-
-       mfspr   r11, SPRN_M_TW  /* Get level 1 table */
+       blt     cr7, DTLBMissLinear
 3:
-       mtcr    r10
-#ifdef CONFIG_8xx_CPU6
-       mtspr   SPRN_SPRG_SCRATCH2, r3
-#endif
+       mtcr    r3
        mfspr   r10, SPRN_MD_EPN
 
        /* Insert level 1 index */
@@ -481,30 +460,7 @@ _ENTRY(DTLBMiss_jmp)
        MTSPR_CPU6(SPRN_MD_RPN, r10, r3)        /* Update TLB entry */
 
        /* Restore registers */
-#ifdef CONFIG_8xx_CPU6
        mfspr   r3, SPRN_SPRG_SCRATCH2
-#endif
-       mtspr   SPRN_DAR, r11   /* Tag DAR */
-       EXCEPTION_EPILOG_0
-       rfi
-
-4:
-_ENTRY(DTLBMiss_cmp)
-       cmpli   cr0, r11, (PAGE_OFFSET + 0x1800000)@h
-       lis     r11, (swapper_pg_dir-PAGE_OFFSET)@ha
-       bge-    3b
-
-       mtcr    r10
-       /* Set 8M byte page and mark it valid */
-       li      r10, MD_PS8MEG | MD_SVALID
-       MTSPR_CPU6(SPRN_MD_TWC, r10, r11)
-       mfspr   r10, SPRN_MD_EPN
-       rlwinm  r10, r10, 0, 0x0f800000         /* 8xx supports max 256Mb RAM */
-       ori     r10, r10, 0xf0 | MD_SPS16K | _PAGE_SHARED | _PAGE_DIRTY | \
-                         _PAGE_PRESENT
-       MTSPR_CPU6(SPRN_MD_RPN, r10, r11)       /* Update TLB entry */
-
-       li      r11, RPN_PATTERN
        mtspr   SPRN_DAR, r11   /* Tag DAR */
        EXCEPTION_EPILOG_0
        rfi
@@ -570,6 +526,43 @@ DARFixed:/* Return from dcbx instruction bug workaround */
 
        . = 0x2000
 
+/*
+ * Bottom part of DataStoreTLBMiss handlers for IMMR area and linear RAM.
+ * not enough space in the DataStoreTLBMiss area.
+ */
+DTLBMissIMMR:
+       mtcr    r3
+       /* Set 512k byte guarded page and mark it valid */
+       li      r10, MD_PS512K | MD_GUARDED | MD_SVALID
+       MTSPR_CPU6(SPRN_MD_TWC, r10, r11)
+       mfspr   r10, SPRN_IMMR                  /* Get current IMMR */
+       rlwinm  r10, r10, 0, 0xfff80000         /* Get 512 kbytes boundary */
+       ori     r10, r10, 0xf0 | MD_SPS16K | _PAGE_SHARED | _PAGE_DIRTY | \
+                         _PAGE_PRESENT | _PAGE_NO_CACHE
+       MTSPR_CPU6(SPRN_MD_RPN, r10, r11)       /* Update TLB entry */
+
+       li      r11, RPN_PATTERN
+       mtspr   SPRN_DAR, r11   /* Tag DAR */
+       mfspr   r3, SPRN_SPRG_SCRATCH2
+       EXCEPTION_EPILOG_0
+       rfi
+
+DTLBMissLinear:
+       mtcr    r3
+       /* Set 8M byte page and mark it valid */
+       li      r11, MD_PS8MEG | MD_SVALID
+       MTSPR_CPU6(SPRN_MD_TWC, r11, r3)
+       rlwinm  r10, r10, 16, 0x0f800000        /* 8xx supports max 256Mb RAM */
+       ori     r10, r10, 0xf0 | MD_SPS16K | _PAGE_SHARED | _PAGE_DIRTY | \
+                         _PAGE_PRESENT
+       MTSPR_CPU6(SPRN_MD_RPN, r10, r11)       /* Update TLB entry */
+
+       li      r11, RPN_PATTERN
+       mtspr   SPRN_DAR, r11   /* Tag DAR */
+       mfspr   r3, SPRN_SPRG_SCRATCH2
+       EXCEPTION_EPILOG_0
+       rfi
+
 /* This is the procedure to calculate the data EA for buggy dcbx,dcbi instructions
  * by decoding the registers used by the dcbx instruction and adding them.
  * DAR is set to the calculated address.
@@ -586,7 +579,9 @@ FixupDAR:/* Entry point for dcbx workaround. */
        rlwinm  r11, r10, 16, 0xfff8
 _ENTRY(FixupDAR_cmp)
        cmpli   cr7, r11, (PAGE_OFFSET + 0x1800000)@h
-       blt-    cr7, 200f
+       /* create physical page address from effective address */
+       tophys(r11, r10)
+       blt-    cr7, 201f
        lis     r11, (swapper_pg_dir-PAGE_OFFSET)@ha
        /* Insert level 1 index */
 3:     rlwimi  r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29
@@ -616,10 +611,6 @@ _ENTRY(FixupDAR_cmp)
 141:   mfspr   r10,SPRN_SPRG_SCRATCH2
        b       DARFixed        /* Nope, go back to normal TLB processing */
 
-       /* create physical page address from effective address */
-200:   tophys(r11, r10)
-       b       201b
-
 144:   mfspr   r10, SPRN_DSISR
        rlwinm  r10, r10,0,7,5  /* Clear store bit for buggy dcbst insn */
        mtspr   SPRN_DSISR, r10
@@ -894,6 +885,7 @@ sdata:
        .align  PAGE_SHIFT
 empty_zero_page:
        .space  PAGE_SIZE
+EXPORT_SYMBOL(empty_zero_page)
 
        .globl  swapper_pg_dir
 swapper_pg_dir:
index 3bfa3150911f7b42f20b7bfb7e7715be6c10871e..bf4c6021515f8aedff5a8c2cfc39f37b39850982 100644 (file)
@@ -42,6 +42,7 @@
 #include <asm/asm-offsets.h>
 #include <asm/cache.h>
 #include <asm/ptrace.h>
+#include <asm/export.h>
 #include "head_booke.h"
 
 /* As with the other PowerPC ports, it is expected that when code
@@ -1223,6 +1224,7 @@ sdata:
        .globl  empty_zero_page
 empty_zero_page:
        .space  4096
+EXPORT_SYMBOL(empty_zero_page)
        .globl  swapper_pg_dir
 swapper_pg_dir:
        .space  PGD_TABLE_SIZE
index 9781c69eae5767adc9fdde54232e4df1329e7b92..03d089b3ed726faeb69d3121e27191ed834899bf 100644 (file)
@@ -275,7 +275,7 @@ int hw_breakpoint_handler(struct die_args *args)
        if (!stepped) {
                WARN(1, "Unable to handle hardware breakpoint. Breakpoint at "
                        "0x%lx will be disabled.", info->address);
-               perf_event_disable(bp);
+               perf_event_disable_inatomic(bp);
                goto out;
        }
        /*
index bd739fed26e3203aae73807399c43a939c248d38..72dac0b58061f023db4a4c8a4b6badd66bcd0b70 100644 (file)
@@ -90,6 +90,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
  * Threads will spin in HMT_LOW until the lock bit is cleared.
  * r14 - pointer to core_idle_state
  * r15 - used to load contents of core_idle_state
+ * r9  - used as a temporary variable
  */
 
 core_idle_lock_held:
@@ -99,6 +100,8 @@ core_idle_lock_held:
        bne     3b
        HMT_MEDIUM
        lwarx   r15,0,r14
+       andi.   r9,r15,PNV_CORE_IDLE_LOCK_BIT
+       bne     core_idle_lock_held
        blr
 
 /*
@@ -163,12 +166,6 @@ _GLOBAL(pnv_powersave_common)
        std     r9,_MSR(r1)
        std     r1,PACAR1(r13)
 
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-       /* Tell KVM we're entering idle */
-       li      r4,KVM_HWTHREAD_IN_IDLE
-       stb     r4,HSTATE_HWTHREAD_STATE(r13)
-#endif
-
        /*
         * Go to real mode to do the nap, as required by the architecture.
         * Also, we need to be in real mode before setting hwthread_state,
@@ -185,6 +182,26 @@ _GLOBAL(pnv_powersave_common)
 
        .globl pnv_enter_arch207_idle_mode
 pnv_enter_arch207_idle_mode:
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+       /* Tell KVM we're entering idle */
+       li      r4,KVM_HWTHREAD_IN_IDLE
+       /******************************************************/
+       /*  N O T E   W E L L    ! ! !    N O T E   W E L L   */
+       /* The following store to HSTATE_HWTHREAD_STATE(r13)  */
+       /* MUST occur in real mode, i.e. with the MMU off,    */
+       /* and the MMU must stay off until we clear this flag */
+       /* and test HSTATE_HWTHREAD_REQ(r13) in the system    */
+       /* reset interrupt vector in exceptions-64s.S.        */
+       /* The reason is that another thread can switch the   */
+       /* MMU to a guest context whenever this flag is set   */
+       /* to KVM_HWTHREAD_IN_IDLE, and if the MMU was on,    */
+       /* that would potentially cause this thread to start  */
+       /* executing instructions from guest memory in        */
+       /* hypervisor mode, leading to a host crash or data   */
+       /* corruption, or worse.                              */
+       /******************************************************/
+       stb     r4,HSTATE_HWTHREAD_STATE(r13)
+#endif
        stb     r3,PACA_THREAD_IDLE_STATE(r13)
        cmpwi   cr3,r3,PNV_THREAD_SLEEP
        bge     cr3,2f
@@ -250,6 +267,12 @@ enter_winkle:
  * r3 - requested stop state
  */
 power_enter_stop:
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+       /* Tell KVM we're entering idle */
+       li      r4,KVM_HWTHREAD_IN_IDLE
+       /* DO THIS IN REAL MODE!  See comment above. */
+       stb     r4,HSTATE_HWTHREAD_STATE(r13)
+#endif
 /*
  * Check if the requested state is a deep idle state.
  */
index 0d432194c01825286b4f629ad1637cdbeed32a22..384357cb8bc0031170e764549c1a493745b5b3af 100644 (file)
@@ -18,6 +18,7 @@
 #include <asm/unistd.h>
 #include <asm/asm-compat.h>
 #include <asm/asm-offsets.h>
+#include <asm/export.h>
 
        .text
 
@@ -118,3 +119,4 @@ _GLOBAL(longjmp)
 _GLOBAL(current_stack_pointer)
        PPC_LL  r3,0(r1)
        blr
+EXPORT_SYMBOL(current_stack_pointer)
index 03756ffdcd71063dd028d7d3825d89d70e47990c..93cf7a5846a6f5875534cc43a2a3409357dc482a 100644 (file)
@@ -33,6 +33,7 @@
 #include <asm/kexec.h>
 #include <asm/bug.h>
 #include <asm/ptrace.h>
+#include <asm/export.h>
 
        .text
 
@@ -319,6 +320,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_UNIFIED_ID_CACHE)
 #endif /* CONFIG_4xx */
        isync
        blr
+EXPORT_SYMBOL(flush_instruction_cache)
 #endif /* CONFIG_PPC_8xx */
 
 /*
@@ -359,6 +361,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
        isync
        blr
 _ASM_NOKPROBE_SYMBOL(flush_icache_range)
+EXPORT_SYMBOL(flush_icache_range)
 
 /*
  * Flush a particular page from the data cache to RAM.
@@ -497,6 +500,7 @@ _GLOBAL(copy_page)
        li      r0,MAX_COPY_PREFETCH
        li      r11,4
        b       2b
+EXPORT_SYMBOL(copy_page)
 
 /*
  * Extended precision shifts.
@@ -524,6 +528,7 @@ _GLOBAL(__ashrdi3)
        sraw    r3,r3,r5        # MSW = MSW >> count
        or      r4,r4,r7        # LSW |= t2
        blr
+EXPORT_SYMBOL(__ashrdi3)
 
 _GLOBAL(__ashldi3)
        subfic  r6,r5,32
@@ -535,6 +540,7 @@ _GLOBAL(__ashldi3)
        slw     r4,r4,r5        # LSW = LSW << count
        or      r3,r3,r7        # MSW |= t2
        blr
+EXPORT_SYMBOL(__ashldi3)
 
 _GLOBAL(__lshrdi3)
        subfic  r6,r5,32
@@ -546,6 +552,7 @@ _GLOBAL(__lshrdi3)
        srw     r3,r3,r5        # MSW = MSW >> count
        or      r4,r4,r7        # LSW |= t2
        blr
+EXPORT_SYMBOL(__lshrdi3)
 
 /*
  * 64-bit comparison: __cmpdi2(s64 a, s64 b)
@@ -561,6 +568,7 @@ _GLOBAL(__cmpdi2)
        bltlr
        li      r3,2
        blr
+EXPORT_SYMBOL(__cmpdi2)
 /*
  * 64-bit comparison: __ucmpdi2(u64 a, u64 b)
  * Returns 0 if a < b, 1 if a == b, 2 if a > b.
@@ -575,6 +583,7 @@ _GLOBAL(__ucmpdi2)
        bltlr
        li      r3,2
        blr
+EXPORT_SYMBOL(__ucmpdi2)
 
 _GLOBAL(__bswapdi2)
        rotlwi  r9,r4,8
@@ -586,6 +595,7 @@ _GLOBAL(__bswapdi2)
        mr      r3,r9
        mr      r4,r10
        blr
+EXPORT_SYMBOL(__bswapdi2)
 
 #ifdef CONFIG_SMP
 _GLOBAL(start_secondary_resume)
index 9f0bed214bcb891d48078cfa094c3c1792a3a0d3..4f178671f230ccd799a9089ca4389590e94bc703 100644 (file)
@@ -27,6 +27,7 @@
 #include <asm/kexec.h>
 #include <asm/ptrace.h>
 #include <asm/mmu.h>
+#include <asm/export.h>
 
        .text
 
@@ -110,6 +111,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
        isync
        blr
 _ASM_NOKPROBE_SYMBOL(flush_icache_range)
+EXPORT_SYMBOL(flush_icache_range)
 
 /*
  * Like above, but only do the D-cache.
@@ -140,6 +142,7 @@ _GLOBAL(flush_dcache_range)
        bdnz    0b
        sync
        blr
+EXPORT_SYMBOL(flush_dcache_range)
 
 /*
  * Like above, but works on non-mapped physical addresses.
@@ -243,6 +246,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
        blr
 
 _GLOBAL(__bswapdi2)
+EXPORT_SYMBOL(__bswapdi2)
        srdi    r8,r3,32
        rlwinm  r7,r3,8,0xffffffff
        rlwimi  r7,r3,24,0,7
index 95d3769a2e267cd9ee14b1901418c74526ceb975..74bec549897202e797b2f0c7de30ad7d83ed9cfb 100644 (file)
@@ -56,6 +56,7 @@ static DECLARE_BITMAP(phb_bitmap, MAX_PHBS);
 
 /* ISA Memory physical address */
 resource_size_t isa_mem_base;
+EXPORT_SYMBOL(isa_mem_base);
 
 
 static struct dma_map_ops *pci_dma_ops = &dma_direct_ops;
index 1f7930037cb7df036ad61d4358bc29fab994618f..678f87a63645718e0f63488f52ff5ae6fc72ae06 100644 (file)
@@ -32,6 +32,8 @@
 unsigned long isa_io_base     = 0;
 unsigned long pci_dram_offset = 0;
 int pcibios_assign_bus_offset = 1;
+EXPORT_SYMBOL(isa_io_base);
+EXPORT_SYMBOL(pci_dram_offset);
 
 void pcibios_make_OF_bus_map(void);
 
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
deleted file mode 100644 (file)
index 9f01e28..0000000
+++ /dev/null
@@ -1,37 +0,0 @@
-#include <linux/ftrace.h>
-#include <linux/mm.h>
-
-#include <asm/processor.h>
-#include <asm/switch_to.h>
-#include <asm/cacheflush.h>
-#include <asm/epapr_hcalls.h>
-
-#ifdef CONFIG_PPC64
-EXPORT_SYMBOL(flush_dcache_range);
-#endif
-EXPORT_SYMBOL(flush_icache_range);
-
-EXPORT_SYMBOL(empty_zero_page);
-
-long long __bswapdi2(long long);
-EXPORT_SYMBOL(__bswapdi2);
-
-#ifdef CONFIG_FUNCTION_TRACER
-EXPORT_SYMBOL(_mcount);
-#endif
-
-#ifdef CONFIG_PPC_FPU
-EXPORT_SYMBOL(load_fp_state);
-EXPORT_SYMBOL(store_fp_state);
-#endif
-
-#ifdef CONFIG_ALTIVEC
-EXPORT_SYMBOL(load_vr_state);
-EXPORT_SYMBOL(store_vr_state);
-#endif
-
-#ifdef CONFIG_EPAPR_PARAVIRT
-EXPORT_SYMBOL(epapr_hypercall_start);
-#endif
-
-EXPORT_SYMBOL(current_stack_pointer);
diff --git a/arch/powerpc/kernel/ppc_ksyms_32.c b/arch/powerpc/kernel/ppc_ksyms_32.c
deleted file mode 100644 (file)
index 2bfaafe..0000000
+++ /dev/null
@@ -1,60 +0,0 @@
-#include <linux/export.h>
-#include <linux/smp.h>
-
-#include <asm/page.h>
-#include <asm/dma.h>
-#include <asm/io.h>
-#include <asm/hw_irq.h>
-#include <asm/time.h>
-#include <asm/mmu_context.h>
-#include <asm/pgtable.h>
-#include <asm/dcr.h>
-
-EXPORT_SYMBOL(ISA_DMA_THRESHOLD);
-EXPORT_SYMBOL(DMA_MODE_READ);
-EXPORT_SYMBOL(DMA_MODE_WRITE);
-
-#if defined(CONFIG_PCI)
-EXPORT_SYMBOL(isa_io_base);
-EXPORT_SYMBOL(isa_mem_base);
-EXPORT_SYMBOL(pci_dram_offset);
-#endif
-
-#ifdef CONFIG_SMP
-EXPORT_SYMBOL(smp_hw_index);
-#endif
-
-long long __ashrdi3(long long, int);
-long long __ashldi3(long long, int);
-long long __lshrdi3(long long, int);
-int __ucmpdi2(unsigned long long, unsigned long long);
-int __cmpdi2(long long, long long);
-EXPORT_SYMBOL(__ashrdi3);
-EXPORT_SYMBOL(__ashldi3);
-EXPORT_SYMBOL(__lshrdi3);
-EXPORT_SYMBOL(__ucmpdi2);
-EXPORT_SYMBOL(__cmpdi2);
-
-EXPORT_SYMBOL(timer_interrupt);
-EXPORT_SYMBOL(tb_ticks_per_jiffy);
-
-EXPORT_SYMBOL(switch_mmu_context);
-
-#ifdef CONFIG_PPC_STD_MMU_32
-extern long mol_trampoline;
-EXPORT_SYMBOL(mol_trampoline); /* For MOL */
-EXPORT_SYMBOL(flush_hash_pages); /* For MOL */
-#ifdef CONFIG_SMP
-extern int mmu_hash_lock;
-EXPORT_SYMBOL(mmu_hash_lock); /* For MOL */
-#endif /* CONFIG_SMP */
-extern long *intercept_table;
-EXPORT_SYMBOL(intercept_table);
-#endif /* CONFIG_PPC_STD_MMU_32 */
-
-#ifdef CONFIG_PPC_DCR_NATIVE
-EXPORT_SYMBOL(__mtdcr);
-EXPORT_SYMBOL(__mfdcr);
-#endif
-
-EXPORT_SYMBOL(flush_instruction_cache);
index 9e7c10fe205f7f5dc3415f54d5abf7bf2a2105b3..49a680d5ae3740041b8819095d3815dda4930417 100644 (file)
@@ -1012,7 +1012,7 @@ void restore_tm_state(struct pt_regs *regs)
        /* Ensure that restore_math() will restore */
        if (msr_diff & MSR_FP)
                current->thread.load_fp = 1;
-#ifdef CONFIG_ALIVEC
+#ifdef CONFIG_ALTIVEC
        if (cpu_has_feature(CPU_FTR_ALTIVEC) && msr_diff & MSR_VEC)
                current->thread.load_vec = 1;
 #endif
@@ -1215,7 +1215,7 @@ static void show_instructions(struct pt_regs *regs)
                int instr;
 
                if (!(i % 8))
-                       printk("\n");
+                       pr_cont("\n");
 
 #if !defined(CONFIG_BOOKE)
                /* If executing with the IMMU off, adjust pc rather
@@ -1227,18 +1227,18 @@ static void show_instructions(struct pt_regs *regs)
 
                if (!__kernel_text_address(pc) ||
                     probe_kernel_address((unsigned int __user *)pc, instr)) {
-                       printk(KERN_CONT "XXXXXXXX ");
+                       pr_cont("XXXXXXXX ");
                } else {
                        if (regs->nip == pc)
-                               printk(KERN_CONT "<%08x> ", instr);
+                               pr_cont("<%08x> ", instr);
                        else
-                               printk(KERN_CONT "%08x ", instr);
+                               pr_cont("%08x ", instr);
                }
 
                pc += sizeof(int);
        }
 
-       printk("\n");
+       pr_cont("\n");
 }
 
 struct regbit {
@@ -1282,7 +1282,7 @@ static void print_bits(unsigned long val, struct regbit *bits, const char *sep)
 
        for (; bits->bit; ++bits)
                if (val & bits->bit) {
-                       printk("%s%s", s, bits->name);
+                       pr_cont("%s%s", s, bits->name);
                        s = sep;
                }
 }
@@ -1305,9 +1305,9 @@ static void print_tm_bits(unsigned long val)
  *   T: Transactional  (bit 34)
  */
        if (val & (MSR_TM | MSR_TS_S | MSR_TS_T)) {
-               printk(",TM[");
+               pr_cont(",TM[");
                print_bits(val, msr_tm_bits, "");
-               printk("]");
+               pr_cont("]");
        }
 }
 #else
@@ -1316,10 +1316,10 @@ static void print_tm_bits(unsigned long val) {}
 
 static void print_msr_bits(unsigned long val)
 {
-       printk("<");
+       pr_cont("<");
        print_bits(val, msr_bits, ",");
        print_tm_bits(val);
-       printk(">");
+       pr_cont(">");
 }
 
 #ifdef CONFIG_PPC64
@@ -1347,29 +1347,29 @@ void show_regs(struct pt_regs * regs)
        printk("  CR: %08lx  XER: %08lx\n", regs->ccr, regs->xer);
        trap = TRAP(regs);
        if ((regs->trap != 0xc00) && cpu_has_feature(CPU_FTR_CFAR))
-               printk("CFAR: "REG" ", regs->orig_gpr3);
+               pr_cont("CFAR: "REG" ", regs->orig_gpr3);
        if (trap == 0x200 || trap == 0x300 || trap == 0x600)
 #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
-               printk("DEAR: "REG" ESR: "REG" ", regs->dar, regs->dsisr);
+               pr_cont("DEAR: "REG" ESR: "REG" ", regs->dar, regs->dsisr);
 #else
-               printk("DAR: "REG" DSISR: %08lx ", regs->dar, regs->dsisr);
+               pr_cont("DAR: "REG" DSISR: %08lx ", regs->dar, regs->dsisr);
 #endif
 #ifdef CONFIG_PPC64
-       printk("SOFTE: %ld ", regs->softe);
+       pr_cont("SOFTE: %ld ", regs->softe);
 #endif
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
        if (MSR_TM_ACTIVE(regs->msr))
-               printk("\nPACATMSCRATCH: %016llx ", get_paca()->tm_scratch);
+               pr_cont("\nPACATMSCRATCH: %016llx ", get_paca()->tm_scratch);
 #endif
 
        for (i = 0;  i < 32;  i++) {
                if ((i % REGS_PER_LINE) == 0)
-                       printk("\nGPR%02d: ", i);
-               printk(REG " ", regs->gpr[i]);
+                       pr_cont("\nGPR%02d: ", i);
+               pr_cont(REG " ", regs->gpr[i]);
                if (i == LAST_VOLATILE && !FULL_REGS(regs))
                        break;
        }
-       printk("\n");
+       pr_cont("\n");
 #ifdef CONFIG_KALLSYMS
        /*
         * Lookup NIP late so we have the best change of getting the
@@ -1900,14 +1900,14 @@ void show_stack(struct task_struct *tsk, unsigned long *stack)
                        printk("["REG"] ["REG"] %pS", sp, ip, (void *)ip);
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
                        if ((ip == rth) && curr_frame >= 0) {
-                               printk(" (%pS)",
+                               pr_cont(" (%pS)",
                                       (void *)current->ret_stack[curr_frame].ret);
                                curr_frame--;
                        }
 #endif
                        if (firstframe)
-                               printk(" (unreliable)");
-                       printk("\n");
+                               pr_cont(" (unreliable)");
+                       pr_cont("\n");
                }
                firstframe = 0;
 
index f52b7db327c80a3b603fb2a7179a2f97d6a054be..010b7b310237e4be38ef1d7bcc15a76050fc4469 100644 (file)
@@ -74,7 +74,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
                        break;
 
                copied = access_process_vm(child, (u64)addrOthers, &tmp,
-                               sizeof(tmp), 0);
+                               sizeof(tmp), FOLL_FORCE);
                if (copied != sizeof(tmp))
                        break;
                ret = put_user(tmp, (u32 __user *)data);
@@ -179,7 +179,8 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
                        break;
                ret = 0;
                if (access_process_vm(child, (u64)addrOthers, &tmp,
-                                       sizeof(tmp), 1) == sizeof(tmp))
+                                       sizeof(tmp),
+                                       FOLL_FORCE | FOLL_WRITE) == sizeof(tmp))
                        break;
                ret = -EIO;
                break;
index dba265c586df010fea218aa2c3d54102c5dbb708..270ee30abdcf739982438271d8b5957e7e830dbb 100644 (file)
@@ -131,15 +131,26 @@ void machine_shutdown(void)
                ppc_md.machine_shutdown();
 }
 
+static void machine_hang(void)
+{
+       pr_emerg("System Halted, OK to turn off power\n");
+       local_irq_disable();
+       while (1)
+               ;
+}
+
 void machine_restart(char *cmd)
 {
        machine_shutdown();
        if (ppc_md.restart)
                ppc_md.restart(cmd);
+
        smp_send_stop();
-       printk(KERN_EMERG "System Halted, OK to turn off power\n");
-       local_irq_disable();
-       while (1) ;
+
+       do_kernel_restart(cmd);
+       mdelay(1000);
+
+       machine_hang();
 }
 
 void machine_power_off(void)
@@ -147,10 +158,9 @@ void machine_power_off(void)
        machine_shutdown();
        if (pm_power_off)
                pm_power_off();
+
        smp_send_stop();
-       printk(KERN_EMERG "System Halted, OK to turn off power\n");
-       local_irq_disable();
-       while (1) ;
+       machine_hang();
 }
 /* Used by the G5 thermal driver */
 EXPORT_SYMBOL_GPL(machine_power_off);
@@ -163,10 +173,9 @@ void machine_halt(void)
        machine_shutdown();
        if (ppc_md.halt)
                ppc_md.halt();
+
        smp_send_stop();
-       printk(KERN_EMERG "System Halted, OK to turn off power\n");
-       local_irq_disable();
-       while (1) ;
+       machine_hang();
 }
 
 
index 24ec3ea4b3a2eeeeae2e0f713226acd252bab806..5fe79182f0fac97ef292b3ab680f7bc4cdd419e1 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/cpu.h>
 #include <linux/console.h>
 #include <linux/memblock.h>
+#include <linux/export.h>
 
 #include <asm/io.h>
 #include <asm/prom.h>
@@ -47,11 +48,16 @@ int boot_cpuid_phys;
 EXPORT_SYMBOL_GPL(boot_cpuid_phys);
 
 int smp_hw_index[NR_CPUS];
+EXPORT_SYMBOL(smp_hw_index);
 
 unsigned long ISA_DMA_THRESHOLD;
 unsigned int DMA_MODE_READ;
 unsigned int DMA_MODE_WRITE;
 
+EXPORT_SYMBOL(ISA_DMA_THRESHOLD);
+EXPORT_SYMBOL(DMA_MODE_READ);
+EXPORT_SYMBOL(DMA_MODE_WRITE);
+
 /*
  * These are used in binfmt_elf.c to put aux entries on the stack
  * for each elf executable being started.
index 7ac8e6eaab5ba24566f1f6fe06829e22727e86ea..8d586cff8a41f7e95b9ec7fc44b7126a4904c9dc 100644 (file)
@@ -226,17 +226,25 @@ static void __init configure_exceptions(void)
                if (firmware_has_feature(FW_FEATURE_OPAL))
                        opal_configure_cores();
 
-               /* Enable AIL if supported, and we are in hypervisor mode */
-               if (early_cpu_has_feature(CPU_FTR_HVMODE) &&
-                   early_cpu_has_feature(CPU_FTR_ARCH_207S)) {
-                       unsigned long lpcr = mfspr(SPRN_LPCR);
-                       mtspr(SPRN_LPCR, lpcr | LPCR_AIL_3);
-               }
+               /* AIL on native is done in cpu_ready_for_interrupts() */
        }
 }
 
 static void cpu_ready_for_interrupts(void)
 {
+       /*
+        * Enable AIL if supported, and we are in hypervisor mode. This
+        * is called once for every processor.
+        *
+        * If we are not in hypervisor mode the job is done once for
+        * the whole partition in configure_exceptions().
+        */
+       if (early_cpu_has_feature(CPU_FTR_HVMODE) &&
+           early_cpu_has_feature(CPU_FTR_ARCH_207S)) {
+               unsigned long lpcr = mfspr(SPRN_LPCR);
+               mtspr(SPRN_LPCR, lpcr | LPCR_AIL_3);
+       }
+
        /* Set IR and DR in PACA MSR */
        get_paca()->kernel_msr = MSR_KERNEL;
 }
index 67859b7d1c97b483a7487aedb838843911591ff4..bc3f7d0d7b7987dec3a74e4c59c2d133adb1330c 100644 (file)
@@ -596,6 +596,7 @@ void timer_interrupt(struct pt_regs * regs)
        irq_exit();
        set_irq_regs(old_regs);
 }
+EXPORT_SYMBOL(timer_interrupt);
 
 /*
  * Hypervisor decrementer interrupts shouldn't occur but are sometimes
index a1f8f5641e9e22db73c6f3a2ffc124605d1e2bfe..023a462725b5dccf39ff51521118986f824afdbe 100644 (file)
@@ -273,7 +273,6 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr)
        force_sig_info(signr, &info, current);
 }
 
-#ifdef CONFIG_PPC64
 void system_reset_exception(struct pt_regs *regs)
 {
        /* See if any machine dependent calls */
@@ -291,6 +290,7 @@ void system_reset_exception(struct pt_regs *regs)
        /* What should we do here? We could issue a shutdown or hard reset. */
 }
 
+#ifdef CONFIG_PPC64
 /*
  * This function is called in real mode. Strictly no printk's please.
  *
@@ -352,12 +352,11 @@ static inline int check_io_access(struct pt_regs *regs)
                 * For the debug message, we look at the preceding
                 * load or store.
                 */
-               if (*nip == 0x60000000)         /* nop */
+               if (*nip == PPC_INST_NOP)
                        nip -= 2;
-               else if (*nip == 0x4c00012c)    /* isync */
+               else if (*nip == PPC_INST_ISYNC)
                        --nip;
-               if (*nip == 0x7c0004ac || (*nip >> 26) == 3) {
-                       /* sync or twi */
+               if (*nip == PPC_INST_SYNC || (*nip >> 26) == OP_TRAP) {
                        unsigned int rb;
 
                        --nip;
@@ -668,6 +667,31 @@ int machine_check_e200(struct pt_regs *regs)
 
        return 0;
 }
+#elif defined(CONFIG_PPC_8xx)
+int machine_check_8xx(struct pt_regs *regs)
+{
+       unsigned long reason = get_mc_reason(regs);
+
+       pr_err("Machine check in kernel mode.\n");
+       pr_err("Caused by (from SRR1=%lx): ", reason);
+       if (reason & 0x40000000)
+               pr_err("Fetch error at address %lx\n", regs->nip);
+       else
+               pr_err("Data access error at address %lx\n", regs->dar);
+
+#ifdef CONFIG_PCI
+       /* the qspan pci read routines can cause machine checks -- Cort
+        *
+        * yuck !!! that totally needs to go away ! There are better ways
+        * to deal with that than having a wart in the mcheck handler.
+        * -- BenH
+        */
+       bad_page_fault(regs, regs->dar, SIGBUS);
+       return 1;
+#else
+       return 0;
+#endif
+}
 #else
 int machine_check_generic(struct pt_regs *regs)
 {
@@ -727,17 +751,6 @@ void machine_check_exception(struct pt_regs *regs)
        if (recover > 0)
                goto bail;
 
-#if defined(CONFIG_8xx) && defined(CONFIG_PCI)
-       /* the qspan pci read routines can cause machine checks -- Cort
-        *
-        * yuck !!! that totally needs to go away ! There are better ways
-        * to deal with that than having a wart in the mcheck handler.
-        * -- BenH
-        */
-       bad_page_fault(regs, regs->dar, SIGBUS);
-       goto bail;
-#endif
-
        if (debugger_fault_handler(regs))
                goto bail;
 
index bc85bdff4e01a18ed7b241faf2b319b845ac13b6..0c123f3406cd0b19fc9bd8f08f6b6aeecd7235da 100644 (file)
@@ -6,6 +6,7 @@
 #include <asm/thread_info.h>
 #include <asm/page.h>
 #include <asm/ptrace.h>
+#include <asm/export.h>
 
 /*
  * Load state from memory into VMX registers including VSCR.
@@ -17,6 +18,7 @@ _GLOBAL(load_vr_state)
        mtvscr  v0
        REST_32VRS(0,r4,r3)
        blr
+EXPORT_SYMBOL(load_vr_state)
 
 /*
  * Store VMX state into memory, including VSCR.
@@ -28,6 +30,7 @@ _GLOBAL(store_vr_state)
        li      r4, VRSTATE_VSCR
        stvx    v0, r4, r3
        blr
+EXPORT_SYMBOL(store_vr_state)
 
 /*
  * Disable VMX for the task which had it previously,
index 82ff5de8b1e7a5564df01dd323c0662ab6457b3b..a0ea63ac2b521b6f8a861aa4b211c0c08dd1062f 100644 (file)
@@ -23,6 +23,7 @@
 #include <asm/ppc-opcode.h>
 #include <asm/pnv-pci.h>
 #include <asm/opal.h>
+#include <asm/smp.h>
 
 #include "book3s_xics.h"
 
index ad5290005ca432549e8b12f268845bb90834ca76..309361e8652331b65e9bc2d28b870f9791b6d986 100644 (file)
@@ -9,7 +9,7 @@ ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
 CFLAGS_REMOVE_code-patching.o = $(CC_FLAGS_FTRACE)
 CFLAGS_REMOVE_feature-fixups.o = $(CC_FLAGS_FTRACE)
 
-obj-y += string.o alloc.o crtsavres.o ppc_ksyms.o code-patching.o \
+obj-y += string.o alloc.o crtsavres.o code-patching.o \
         feature-fixups.o
 
 obj-$(CONFIG_PPC32)    += div64.o copy_32.o
index aa8214f30c920e05c6bf62349c6ea52e0ca68dc9..ea29a5d67743722dee2ca0f7fb15c98ad4b01e05 100644 (file)
@@ -17,6 +17,7 @@
 #include <asm/cache.h>
 #include <asm/errno.h>
 #include <asm/ppc_asm.h>
+#include <asm/export.h>
 
        .text
 
@@ -68,6 +69,7 @@ _GLOBAL(__csum_partial)
        adde    r5,r5,r0
 5:     addze   r3,r5           /* add in final carry */
        blr
+EXPORT_SYMBOL(__csum_partial)
 
 /*
  * Computes the checksum of a memory block at src, length len,
@@ -297,3 +299,4 @@ dst_error:
        .long   41b,dst_error
        .long   50b,src_error
        .long   51b,dst_error
+EXPORT_SYMBOL(csum_partial_copy_generic)
index fdec6e613e954b8fa70f8f27a3971e378b1d6d1b..fd9176671f9fb41b1c74f866445bb2e52d29c6b9 100644 (file)
@@ -16,6 +16,7 @@
 #include <asm/processor.h>
 #include <asm/errno.h>
 #include <asm/ppc_asm.h>
+#include <asm/export.h>
 
 /*
  * Computes the checksum of a memory block at buff, length len,
@@ -176,6 +177,7 @@ _GLOBAL(__csum_partial)
        add     r3,r4,r0
        srdi    r3,r3,32
        blr
+EXPORT_SYMBOL(__csum_partial)
 
 
        .macro srcnr
@@ -430,3 +432,4 @@ dstnr;      stb     r6,0(r4)
        li      r6,-EFAULT
        stw     r6,0(r8)
        blr
+EXPORT_SYMBOL(csum_partial_copy_generic)
index 99f37f24185ca890127b3e2a1ebed7ed14f4ca55..40cce33b08d6a226f64ab94788725647c15df54d 100644 (file)
@@ -12,6 +12,7 @@
 #include <asm/cache.h>
 #include <asm/errno.h>
 #include <asm/ppc_asm.h>
+#include <asm/export.h>
 
 #define COPY_16_BYTES          \
        lwz     r7,4(r4);       \
@@ -92,6 +93,7 @@ _GLOBAL(memset)
        subf    r6,r0,r6
        cmplwi  0,r4,0
        bne     2f      /* Use normal procedure if r4 is not zero */
+EXPORT_SYMBOL(memset)
 _GLOBAL(memset_nocache_branch)
        b       2f      /* Skip optimised bloc until cache is enabled */
 
@@ -216,6 +218,8 @@ _GLOBAL(memcpy)
        stbu    r0,1(r6)
        bdnz    40b
 65:    blr
+EXPORT_SYMBOL(memcpy)
+EXPORT_SYMBOL(memmove)
 
 generic_memcpy:
        srwi.   r7,r5,3
@@ -507,3 +511,4 @@ _GLOBAL(__copy_tofrom_user)
        .long   112b,120b
        .long   114b,120b
        .text
+EXPORT_SYMBOL(__copy_tofrom_user)
index a3c4dc4defdd86e02ad098fa0269c68170935e78..21367b3a81465ea65ec38387d1e46e33cacbeeb6 100644 (file)
@@ -10,6 +10,7 @@
 #include <asm/processor.h>
 #include <asm/ppc_asm.h>
 #include <asm/asm-offsets.h>
+#include <asm/export.h>
 
         .section        ".toc","aw"
 PPC64_CACHES:
@@ -110,3 +111,4 @@ END_FTR_SECTION_IFSET(CPU_FTR_CP_USE_DCBTZ)
        std     r11,120(r3)
        std     r12,128(r3)
        blr
+EXPORT_SYMBOL(copy_page)
index f09899e35991711d0a57e74519af662b59e59f15..60386b2c99bb301165182e31bd876583f9c3b887 100644 (file)
@@ -8,6 +8,7 @@
  */
 #include <asm/processor.h>
 #include <asm/ppc_asm.h>
+#include <asm/export.h>
 
 #ifdef __BIG_ENDIAN__
 #define sLd sld                /* Shift towards low-numbered address. */
@@ -359,6 +360,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
        addi    r3,r3,8
 171:
 177:
+179:
        addi    r3,r3,8
 370:
 372:
@@ -373,7 +375,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
 173:
 174:
 175:
-179:
 181:
 184:
 186:
@@ -671,3 +672,4 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
        .llong  89b,100b
        .llong  90b,100b
        .llong  91b,100b
+EXPORT_SYMBOL(__copy_tofrom_user)
index 19e66001a4f9d5ab6b1c1e7cc6f15c9b50b83b55..3de7ac154f24e7c1198ffdfc6942ff011088162f 100644 (file)
@@ -19,6 +19,7 @@
  */
 #include <asm/processor.h>
 #include <asm/ppc_asm.h>
+#include <asm/export.h>
 
 /* Note: This code relies on -mminimal-toc */
 
@@ -32,6 +33,7 @@ FTR_SECTION_ELSE
        clrldi  r3,r3,64-8
        blr
 ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POPCNTB)
+EXPORT_SYMBOL(__arch_hweight8)
 
 _GLOBAL(__arch_hweight16)
 BEGIN_FTR_SECTION
@@ -54,6 +56,7 @@ FTR_SECTION_ELSE
        blr
   ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_POPCNTD, 50)
 ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POPCNTB)
+EXPORT_SYMBOL(__arch_hweight16)
 
 _GLOBAL(__arch_hweight32)
 BEGIN_FTR_SECTION
@@ -79,6 +82,7 @@ FTR_SECTION_ELSE
        blr
   ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_POPCNTD, 51)
 ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POPCNTB)
+EXPORT_SYMBOL(__arch_hweight32)
 
 _GLOBAL(__arch_hweight64)
 BEGIN_FTR_SECTION
@@ -108,3 +112,4 @@ FTR_SECTION_ELSE
        blr
   ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_POPCNTD, 52)
 ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POPCNTB)
+EXPORT_SYMBOL(__arch_hweight64)
index eda7a96161ab6f494e6403cc6fa5cc2da89cd4c9..85fa9869aec5848d1c22c7b24e297cb51f1d4a9d 100644 (file)
@@ -11,6 +11,7 @@
 #include <asm/processor.h>
 #include <asm/errno.h>
 #include <asm/ppc_asm.h>
+#include <asm/export.h>
 
 _GLOBAL(memset)
        neg     r0,r3
@@ -77,6 +78,7 @@ _GLOBAL(memset)
 10:    bflr    31
        stb     r4,0(r6)
        blr
+EXPORT_SYMBOL(memset)
 
 _GLOBAL_TOC(memmove)
        cmplw   0,r3,r4
@@ -119,3 +121,4 @@ _GLOBAL(backwards_memcpy)
        beq     2b
        mtctr   r7
        b       1b
+EXPORT_SYMBOL(memmove)
index 8953d2382a653a948afb340304b07bbfc51181d8..d75d18b7bd554d3d389d543d3084e43be29f1d16 100644 (file)
@@ -8,6 +8,7 @@
  * 2 of the License, or (at your option) any later version.
  */
 #include <asm/ppc_asm.h>
+#include <asm/export.h>
 
 #define off8   r6
 #define off16  r7
@@ -231,3 +232,4 @@ _GLOBAL(memcmp)
        ld      r28,-32(r1)
        ld      r27,-40(r1)
        blr
+EXPORT_SYMBOL(memcmp)
index 32a06ec395d2108202762c6d164b3994ca6a7644..f4d6088e2d5390b937f60e35819496dfeab17858 100644 (file)
@@ -8,6 +8,7 @@
  */
 #include <asm/processor.h>
 #include <asm/ppc_asm.h>
+#include <asm/export.h>
 
        .align  7
 _GLOBAL_TOC(memcpy)
@@ -219,3 +220,4 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
 4:     ld      r3,-STACKFRAMESIZE+STK_REG(R31)(r1)     /* return dest pointer */
        blr
 #endif
+EXPORT_SYMBOL(memcpy)
diff --git a/arch/powerpc/lib/ppc_ksyms.c b/arch/powerpc/lib/ppc_ksyms.c
deleted file mode 100644 (file)
index ae69d84..0000000
+++ /dev/null
@@ -1,29 +0,0 @@
-#include <linux/string.h>
-#include <linux/uaccess.h>
-#include <linux/bitops.h>
-#include <net/checksum.h>
-
-EXPORT_SYMBOL(memcpy);
-EXPORT_SYMBOL(memset);
-EXPORT_SYMBOL(memmove);
-EXPORT_SYMBOL(memcmp);
-EXPORT_SYMBOL(memchr);
-
-EXPORT_SYMBOL(strncpy);
-EXPORT_SYMBOL(strncmp);
-
-#ifndef CONFIG_GENERIC_CSUM
-EXPORT_SYMBOL(__csum_partial);
-EXPORT_SYMBOL(csum_partial_copy_generic);
-#endif
-
-EXPORT_SYMBOL(__copy_tofrom_user);
-EXPORT_SYMBOL(__clear_user);
-EXPORT_SYMBOL(copy_page);
-
-#ifdef CONFIG_PPC64
-EXPORT_SYMBOL(__arch_hweight8);
-EXPORT_SYMBOL(__arch_hweight16);
-EXPORT_SYMBOL(__arch_hweight32);
-EXPORT_SYMBOL(__arch_hweight64);
-#endif
index beabc68d9a1e4cdb7e40d26045a906173e652790..d13e0760351955b16be34cb5ae2e29484c35c17d 100644 (file)
@@ -11,6 +11,7 @@
 #include <asm/processor.h>
 #include <asm/errno.h>
 #include <asm/ppc_asm.h>
+#include <asm/export.h>
 
        .section __ex_table,"a"
        PPC_LONG_ALIGN
@@ -36,6 +37,7 @@ _GLOBAL(strncpy)
 2:     stbu    r0,1(r6)        /* clear it out if so */
        bdnz    2b
        blr
+EXPORT_SYMBOL(strncpy)
 
 _GLOBAL(strncmp)
        PPC_LCMPI 0,r5,0
@@ -53,6 +55,7 @@ _GLOBAL(strncmp)
        blr
 2:     li      r3,0
        blr
+EXPORT_SYMBOL(strncmp)
 
 #ifdef CONFIG_PPC32
 _GLOBAL(memcmp)
@@ -68,6 +71,7 @@ _GLOBAL(memcmp)
        blr
 2:     li      r3,0
        blr
+EXPORT_SYMBOL(memcmp)
 #endif
 
 _GLOBAL(memchr)
@@ -82,6 +86,7 @@ _GLOBAL(memchr)
        beqlr
 2:     li      r3,0
        blr
+EXPORT_SYMBOL(memchr)
 
 #ifdef CONFIG_PPC32
 _GLOBAL(__clear_user)
@@ -125,4 +130,5 @@ _GLOBAL(__clear_user)
        PPC_LONG        1b,91b
        PPC_LONG        8b,92b
        .text
+EXPORT_SYMBOL(__clear_user)
 #endif
index 7bd9549a90a23f0c82fa5688f1e2ebf8c3845e7f..57ace356c9490fbce556f82c6dd8a74e2fe8655a 100644 (file)
@@ -20,6 +20,7 @@
 
 #include <asm/ppc_asm.h>
 #include <asm/asm-offsets.h>
+#include <asm/export.h>
 
        .section        ".toc","aw"
 PPC64_CACHES:
@@ -200,3 +201,4 @@ err1;       dcbz    r0,r3
        cmpdi   r4,32
        blt     .Lshort_clear
        b       .Lmedium_clear
+EXPORT_SYMBOL(__clear_user)
index bb0354222b1158c57133a09ef50b98cf20de6f29..362954f98029b46d4d3d312b239bb7a2fa8fe63a 100644 (file)
@@ -106,6 +106,8 @@ int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb)
        switch (REGION_ID(ea)) {
        case USER_REGION_ID:
                pr_devel("%s: 0x%llx -- USER_REGION_ID\n", __func__, ea);
+               if (mm == NULL)
+                       return 1;
                psize = get_slice_psize(mm, ea);
                ssize = user_segment_size(ea);
                vsid = get_vsid(mm->context.id, ea, ssize);
index 115347f74ce5867199b2b329cc94cbddd0e0d5b4..09cc50c8dace34652b028ab94f8bd8123daffa0e 100644 (file)
@@ -26,6 +26,7 @@
 #include <asm/ppc_asm.h>
 #include <asm/thread_info.h>
 #include <asm/asm-offsets.h>
+#include <asm/export.h>
 
 #ifdef CONFIG_SMP
        .section .bss
@@ -33,6 +34,7 @@
        .globl mmu_hash_lock
 mmu_hash_lock:
        .space  4
+EXPORT_SYMBOL(mmu_hash_lock)
 #endif /* CONFIG_SMP */
 
 /*
@@ -575,6 +577,7 @@ _GLOBAL(flush_hash_pages)
        rlwinm  r8,r8,0,31,29           /* clear HASHPTE bit */
        stwcx.  r8,0,r5                 /* update the pte */
        bne-    33b
+EXPORT_SYMBOL(flush_hash_pages)
 
        /* Get the address of the primary PTE group in the hash table (r3) */
 _GLOBAL(flush_hash_patch_A)
index 90480e23fd2c5333bdff811e8e003d32b59e034e..78dabf065ba96eb755267b9099e50dd6f024653d 100644 (file)
@@ -193,8 +193,12 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags)
                /*
                 * Kernel read only mapped with ppp bits 0b110
                 */
-               if (!(pteflags & _PAGE_WRITE))
-                       rflags |= (HPTE_R_PP0 | 0x2);
+               if (!(pteflags & _PAGE_WRITE)) {
+                       if (mmu_has_feature(MMU_FTR_KERNEL_RO))
+                               rflags |= (HPTE_R_PP0 | 0x2);
+                       else
+                               rflags |= 0x3;
+               }
        } else {
                if (pteflags & _PAGE_RWX)
                        rflags |= 0x2;
@@ -529,7 +533,7 @@ static bool might_have_hea(void)
         */
 #ifdef CONFIG_IBMEBUS
        return !cpu_has_feature(CPU_FTR_ARCH_207S) &&
-               !firmware_has_feature(FW_FEATURE_SPLPAR);
+               firmware_has_feature(FW_FEATURE_SPLPAR);
 #else
        return false;
 #endif
@@ -1029,6 +1033,10 @@ void hash__early_init_mmu_secondary(void)
 {
        /* Initialize hash table for that CPU */
        if (!firmware_has_feature(FW_FEATURE_LPAR)) {
+
+               if (cpu_has_feature(CPU_FTR_POWER9_DD1))
+                       update_hid_for_hash();
+
                if (!cpu_has_feature(CPU_FTR_ARCH_300))
                        mtspr(SPRN_SDR1, _SDR1);
                else
index 75b9cd6150cc80c98aa4dfec8ab633f126638f01..a51c188b81f31bf1c3c5ecbe5f0b8dd572a65ccb 100644 (file)
@@ -845,7 +845,7 @@ void __init dump_numa_cpu_topology(void)
                return;
 
        for_each_online_node(node) {
-               printk(KERN_DEBUG "Node %d CPUs:", node);
+               pr_info("Node %d CPUs:", node);
 
                count = 0;
                /*
@@ -856,52 +856,18 @@ void __init dump_numa_cpu_topology(void)
                        if (cpumask_test_cpu(cpu,
                                        node_to_cpumask_map[node])) {
                                if (count == 0)
-                                       printk(" %u", cpu);
+                                       pr_cont(" %u", cpu);
                                ++count;
                        } else {
                                if (count > 1)
-                                       printk("-%u", cpu - 1);
+                                       pr_cont("-%u", cpu - 1);
                                count = 0;
                        }
                }
 
                if (count > 1)
-                       printk("-%u", nr_cpu_ids - 1);
-               printk("\n");
-       }
-}
-
-static void __init dump_numa_memory_topology(void)
-{
-       unsigned int node;
-       unsigned int count;
-
-       if (min_common_depth == -1 || !numa_enabled)
-               return;
-
-       for_each_online_node(node) {
-               unsigned long i;
-
-               printk(KERN_DEBUG "Node %d Memory:", node);
-
-               count = 0;
-
-               for (i = 0; i < memblock_end_of_DRAM();
-                    i += (1 << SECTION_SIZE_BITS)) {
-                       if (early_pfn_to_nid(i >> PAGE_SHIFT) == node) {
-                               if (count == 0)
-                                       printk(" 0x%lx", i);
-                               ++count;
-                       } else {
-                               if (count > 0)
-                                       printk("-0x%lx", i);
-                               count = 0;
-                       }
-               }
-
-               if (count > 0)
-                       printk("-0x%lx", i);
-               printk("\n");
+                       pr_cont("-%u", nr_cpu_ids - 1);
+               pr_cont("\n");
        }
 }
 
@@ -947,8 +913,6 @@ void __init initmem_init(void)
 
        if (parse_numa_properties())
                setup_nonnuma();
-       else
-               dump_numa_memory_topology();
 
        memblock_dump_all();
 
index ed7bddc456b72b5a7ce1b647438cd2271306302a..688b54517655f1ef787023f18f3cacdcbd62ba3b 100644 (file)
@@ -388,6 +388,10 @@ void radix__early_init_mmu_secondary(void)
         * update partition table control register and UPRT
         */
        if (!firmware_has_feature(FW_FEATURE_LPAR)) {
+
+               if (cpu_has_feature(CPU_FTR_POWER9_DD1))
+                       update_hid_for_radix();
+
                lpcr = mfspr(SPRN_LPCR);
                mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR);
 
index 0e49ec541ab57c91fbf568947cf02ac4f9bbe1ac..3493cf4e045258df20f5cf47990e991b6af265b7 100644 (file)
@@ -50,6 +50,8 @@ static inline void _tlbiel_pid(unsigned long pid, unsigned long ric)
        for (set = 0; set < POWER9_TLB_SETS_RADIX ; set++) {
                __tlbiel_pid(pid, set, ric);
        }
+       if (cpu_has_feature(CPU_FTR_POWER9_DD1))
+               asm volatile(PPC_INVALIDATE_ERAT : : :"memory");
        return;
 }
 
@@ -83,6 +85,8 @@ static inline void _tlbiel_va(unsigned long va, unsigned long pid,
        asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
                     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
        asm volatile("ptesync": : :"memory");
+       if (cpu_has_feature(CPU_FTR_POWER9_DD1))
+               asm volatile(PPC_INVALIDATE_ERAT : : :"memory");
 }
 
 static inline void _tlbie_va(unsigned long va, unsigned long pid,
@@ -175,7 +179,7 @@ void radix__flush_tlb_mm(struct mm_struct *mm)
        if (unlikely(pid == MMU_NO_CONTEXT))
                goto no_context;
 
-       if (!mm_is_core_local(mm)) {
+       if (!mm_is_thread_local(mm)) {
                int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
 
                if (lock_tlbie)
@@ -201,7 +205,7 @@ void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr)
        if (unlikely(pid == MMU_NO_CONTEXT))
                goto no_context;
 
-       if (!mm_is_core_local(mm)) {
+       if (!mm_is_thread_local(mm)) {
                int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
 
                if (lock_tlbie)
@@ -226,7 +230,7 @@ void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
        pid = mm ? mm->context.id : 0;
        if (unlikely(pid == MMU_NO_CONTEXT))
                goto bail;
-       if (!mm_is_core_local(mm)) {
+       if (!mm_is_thread_local(mm)) {
                int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
 
                if (lock_tlbie)
@@ -321,7 +325,7 @@ void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
 {
        unsigned long pid;
        unsigned long addr;
-       int local = mm_is_core_local(mm);
+       int local = mm_is_thread_local(mm);
        unsigned long ap = mmu_get_ap(psize);
        int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
        unsigned long page_size = 1UL << mmu_psize_defs[psize].shift;
index 7c7df40038207b28ca1eaeb745ad725689cc400a..994d1a959e20f4b1cd57112d359b31a1cf31c79b 100644 (file)
@@ -30,8 +30,8 @@ config EP8248E
        select 8272
        select 8260
        select FSL_SOC
-       select PHYLIB
-       select MDIO_BITBANG
+       select PHYLIB if NETDEVICES
+       select MDIO_BITBANG if PHYLIB
        help
          This enables support for the Embedded Planet EP8248E board.
 
index cdab847749e60342b41f4e2bb29e424da843f90d..8fec050f2d5b8a880afb3817ea34349bf49f0aad 100644 (file)
@@ -298,7 +298,9 @@ static const struct of_device_id of_bus_ids[] __initconst = {
 static int __init declare_of_platform_devices(void)
 {
        of_platform_bus_probe(NULL, of_bus_ids, NULL);
-       platform_driver_register(&ep8248e_mdio_driver);
+
+       if (IS_ENABLED(CONFIG_MDIO_BITBANG))
+               platform_driver_register(&ep8248e_mdio_driver);
 
        return 0;
 }
index 17e54339f8d98b1793c29076abca2bbaa0560396..575afd6eb36a405f6a13bf3b7f969d5b6611e7d4 100644 (file)
@@ -30,9 +30,7 @@
  */
 static void __init asp834x_setup_arch(void)
 {
-       if (ppc_md.progress)
-               ppc_md.progress("asp834x_setup_arch()", 0);
-
+       mpc83xx_setup_arch();
        mpc834x_usb_cfg();
 }
 
index e7fbd6366abbba9a003fa1855e4ee104ef612b56..d8642a4afc743e36183e6602a27b68baf63c221d 100644 (file)
@@ -130,10 +130,7 @@ static void __init mpc83xx_km_setup_arch(void)
        struct device_node *np;
 #endif
 
-       if (ppc_md.progress)
-               ppc_md.progress("kmpbec83xx_setup_arch()", 0);
-
-       mpc83xx_setup_pci();
+       mpc83xx_setup_arch();
 
 #ifdef CONFIG_QUICC_ENGINE
        np = of_find_node_by_name(NULL, "par_io");
index 8899aa9d11f5fae495d30209706226c1a60461c6..d75c9816a5c92ad4211d71568e90848ab7fc998a 100644 (file)
@@ -142,3 +142,11 @@ void __init mpc83xx_setup_pci(void)
                mpc83xx_add_bridge(np);
 }
 #endif
+
+void __init mpc83xx_setup_arch(void)
+{
+       if (ppc_md.progress)
+               ppc_md.progress("mpc83xx_setup_arch()", 0);
+
+       mpc83xx_setup_pci();
+}
index 040d5d0854675148fc213c7158e887b37c35fb3a..272c41c387b94a00cfe5bc77c541a7ae86a0683a 100644 (file)
  */
 static void __init mpc830x_rdb_setup_arch(void)
 {
-       if (ppc_md.progress)
-               ppc_md.progress("mpc830x_rdb_setup_arch()", 0);
-
-       mpc83xx_setup_pci();
+       mpc83xx_setup_arch();
        mpc831x_usb_cfg();
 }
 
index 40e0d8307b59e3dcca522c1b17e2f0ed1093eff4..fd80fd570e67ba4ca3261c6ee8a3f2e94b625947 100644 (file)
  */
 static void __init mpc831x_rdb_setup_arch(void)
 {
-       if (ppc_md.progress)
-               ppc_md.progress("mpc831x_rdb_setup_arch()", 0);
-
-       mpc83xx_setup_pci();
+       mpc83xx_setup_arch();
        mpc831x_usb_cfg();
 }
 
index cdfa47c4d3941bdc9a4573b3f3fbe821598587a0..bb7b25acf26ffd800d0ead5173c70031b5a488d2 100644 (file)
@@ -58,8 +58,7 @@ static void __init mpc832x_sys_setup_arch(void)
        struct device_node *np;
        u8 __iomem *bcsr_regs = NULL;
 
-       if (ppc_md.progress)
-               ppc_md.progress("mpc832x_sys_setup_arch()", 0);
+       mpc83xx_setup_arch();
 
        /* Map BCSR area */
        np = of_find_node_by_name(NULL, "bcsr");
@@ -71,8 +70,6 @@ static void __init mpc832x_sys_setup_arch(void)
                of_node_put(np);
        }
 
-       mpc83xx_setup_pci();
-
 #ifdef CONFIG_QUICC_ENGINE
        if ((np = of_find_node_by_name(NULL, "par_io")) != NULL) {
                par_io_init(np);
index 0d6a62fc586463bff2da2240925fff9cb3178338..d7c9b186954d931c0957908dbd7a19e3f19d0610 100644 (file)
@@ -197,10 +197,7 @@ static void __init mpc832x_rdb_setup_arch(void)
        struct device_node *np;
 #endif
 
-       if (ppc_md.progress)
-               ppc_md.progress("mpc832x_rdb_setup_arch()", 0);
-
-       mpc83xx_setup_pci();
+       mpc83xx_setup_arch();
 
 #ifdef CONFIG_QUICC_ENGINE
        if ((np = of_find_node_by_name(NULL, "par_io")) != NULL) {
index 8fd0c1e8b182815fb553aabf00d5fd376c3c22f4..73a5267df497ef267db4c277ea050582de640502 100644 (file)
@@ -57,10 +57,7 @@ machine_device_initcall(mpc834x_itx, mpc834x_itx_declare_of_platform_devices);
  */
 static void __init mpc834x_itx_setup_arch(void)
 {
-       if (ppc_md.progress)
-               ppc_md.progress("mpc834x_itx_setup_arch()", 0);
-
-       mpc83xx_setup_pci();
+       mpc83xx_setup_arch();
 
        mpc834x_usb_cfg();
 }
index eeaee6123bb34d48479811ae5759ecc32618adea..009cfc18a4ee3e5c3575fcd410386926ef6cb938 100644 (file)
@@ -76,10 +76,7 @@ static int mpc834xemds_usb_cfg(void)
  */
 static void __init mpc834x_mds_setup_arch(void)
 {
-       if (ppc_md.progress)
-               ppc_md.progress("mpc834x_mds_setup_arch()", 0);
-
-       mpc83xx_setup_pci();
+       mpc83xx_setup_arch();
 
        mpc834xemds_usb_cfg();
 }
index dacf4c2df06978d5007462e52dd7c4e826497cf2..4fc3051c2b2eee0bd4c463dec489dcab21d8f099 100644 (file)
@@ -66,8 +66,7 @@ static void __init mpc836x_mds_setup_arch(void)
        struct device_node *np;
        u8 __iomem *bcsr_regs = NULL;
 
-       if (ppc_md.progress)
-               ppc_md.progress("mpc836x_mds_setup_arch()", 0);
+       mpc83xx_setup_arch();
 
        /* Map BCSR area */
        np = of_find_node_by_name(NULL, "bcsr");
@@ -79,8 +78,6 @@ static void __init mpc836x_mds_setup_arch(void)
                of_node_put(np);
        }
 
-       mpc83xx_setup_pci();
-
 #ifdef CONFIG_QUICC_ENGINE
        if ((np = of_find_node_by_name(NULL, "par_io")) != NULL) {
                par_io_init(np);
index cf67ac93ddcb8f341ff27667fac8f9ccf0b3d4e0..93f024fd9b459cc0dcae91af013d7d8f980fb0db 100644 (file)
@@ -31,10 +31,7 @@ machine_device_initcall(mpc836x_rdk, mpc83xx_declare_of_platform_devices);
 
 static void __init mpc836x_rdk_setup_arch(void)
 {
-       if (ppc_md.progress)
-               ppc_md.progress("mpc836x_rdk_setup_arch()", 0);
-
-       mpc83xx_setup_pci();
+       mpc83xx_setup_arch();
 }
 
 /*
index 652b97d699c9b7860d2745043af61e78b7f90729..3b34cc1f626c1ef9342e0150cc7e55d906c88961 100644 (file)
@@ -79,10 +79,7 @@ out:
  */
 static void __init mpc837x_mds_setup_arch(void)
 {
-       if (ppc_md.progress)
-               ppc_md.progress("mpc837x_mds_setup_arch()", 0);
-
-       mpc83xx_setup_pci();
+       mpc83xx_setup_arch();
        mpc837xmds_usb_cfg();
 }
 
index 667731d81676a179fe8cf6d65608bbdec0f967cc..0c55fa6af2d5481ed7844bb79ca68998014dc192 100644 (file)
@@ -50,10 +50,7 @@ static void mpc837x_rdb_sd_cfg(void)
  */
 static void __init mpc837x_rdb_setup_arch(void)
 {
-       if (ppc_md.progress)
-               ppc_md.progress("mpc837x_rdb_setup_arch()", 0);
-
-       mpc83xx_setup_pci();
+       mpc83xx_setup_arch();
        mpc837x_usb_cfg();
        mpc837x_rdb_sd_cfg();
 }
index ad484199eff7862881001ba722a738b684d19cd4..636eb9d0401ae354a4e7da56b5d99c01a3890b2c 100644 (file)
@@ -86,5 +86,6 @@ extern void mpc83xx_setup_pci(void);
 #endif
 
 extern int mpc83xx_declare_of_platform_devices(void);
+extern void mpc83xx_setup_arch(void);
 
 #endif                         /* __MPC83XX_H__ */
index b867e88dfb0d292634426d49cb58f039319c95dc..cb4bdabfdf1cd519badabb245b7e0685e515eb09 100644 (file)
  */
 static void __init sbc834x_setup_arch(void)
 {
-       if (ppc_md.progress)
-               ppc_md.progress("sbc834x_setup_arch()", 0);
-
-       mpc83xx_setup_pci();
+       mpc83xx_setup_arch();
 }
 
 machine_device_initcall(sbc834x, mpc83xx_declare_of_platform_devices);
index df25a3ed489dd43e578b2f7785116d8ecff1b89b..9dc1d28975b92e4f38c9e89983d7bae444eee495 100644 (file)
@@ -72,7 +72,7 @@ config MPC85xx_CDS
 config MPC85xx_MDS
        bool "Freescale MPC85xx MDS"
        select DEFAULT_UIMAGE
-       select PHYLIB
+       select PHYLIB if NETDEVICES
        select HAS_RAPIDIO
        select SWIOTLB
        help
index 07dd6ae3ec5251b0b0674546246759d0680e1c1a..d2f45569a02647db3abcb35ae4c20778836f6b14 100644 (file)
@@ -72,7 +72,6 @@ define_machine(bsc9132_qds) {
        .pcibios_fixup_bus      = fsl_pcibios_fixup_bus,
 #endif
        .get_irq                = mpic_get_irq,
-       .restart                = fsl_rstcr_restart,
        .calibrate_decr         = generic_calibrate_decr,
        .progress               = udbg_progress,
 };
index e48f6710e6d5420de6a3802b5712709be92c01d9..0ffdb4a80c2afcf646d1a1d3c1a1fc5545463bf4 100644 (file)
@@ -59,7 +59,6 @@ define_machine(bsc9131_rdb) {
        .setup_arch             = bsc913x_rdb_setup_arch,
        .init_IRQ               = bsc913x_rdb_pic_init,
        .get_irq                = mpic_get_irq,
-       .restart                = fsl_rstcr_restart,
        .calibrate_decr         = generic_calibrate_decr,
        .progress               = udbg_progress,
 };
index 3b9e3f0f9aec2f6fa780d70a379e869a4137c7d2..4df1b4026eab4002a588afa5ad8fc887ca3bdc01 100644 (file)
@@ -65,7 +65,6 @@ define_machine(c293_pcie) {
        .setup_arch             = c293_pcie_setup_arch,
        .init_IRQ               = c293_pcie_pic_init,
        .get_irq                = mpic_get_irq,
-       .restart                = fsl_rstcr_restart,
        .calibrate_decr         = generic_calibrate_decr,
        .progress               = udbg_progress,
 };
index 3a6a84f07f43c5bbc876ca1b0eeff291044c11d9..1179115a4b5c64aff563af4699e645b311b0e979 100644 (file)
@@ -225,7 +225,6 @@ define_machine(corenet_generic) {
 #else
        .get_irq                = mpic_get_coreint_irq,
 #endif
-       .restart                = fsl_rstcr_restart,
        .calibrate_decr         = generic_calibrate_decr,
        .progress               = udbg_progress,
 #ifdef CONFIG_PPC64
index 14af36a7fa9caa63ef4bddb0d3544cf4ef26a490..f29c6f0909f354b089b31f3f4773bbc3fa411e10 100644 (file)
@@ -215,7 +215,6 @@ define_machine(ge_imp3a) {
        .pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
 #endif
        .get_irq                = mpic_get_irq,
-       .restart                = fsl_rstcr_restart,
        .calibrate_decr         = generic_calibrate_decr,
        .progress               = udbg_progress,
 };
index 6ba687f19e45dc86dd1911a90d519546244acfad..94a7f92c858ffee2c41903f16f82a5cd492522fe 100644 (file)
@@ -77,7 +77,6 @@ define_machine(mpc8536_ds) {
        .pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
 #endif
        .get_irq                = mpic_get_irq,
-       .restart                = fsl_rstcr_restart,
        .calibrate_decr         = generic_calibrate_decr,
        .progress               = udbg_progress,
 };
index 8756715c7a47fa12152bc53cd69077f43f835680..f3e055fdd1de436450d8035ba6800b950eaa62e9 100644 (file)
@@ -170,7 +170,6 @@ define_machine(mpc85xx_ads) {
        .init_IRQ               = mpc85xx_ads_pic_init,
        .show_cpuinfo           = mpc85xx_ads_show_cpuinfo,
        .get_irq                = mpic_get_irq,
-       .restart                = fsl_rstcr_restart,
        .calibrate_decr         = generic_calibrate_decr,
        .progress               = udbg_progress,
 };
index 86f20156178e919b5972ce4feeb22080a90a2bf9..224db30c497b0665284aafafa6e9edeec4e85ec3 100644 (file)
@@ -83,7 +83,8 @@ static int mpc85xx_exclude_device(struct pci_controller *hose,
                return PCIBIOS_SUCCESSFUL;
 }
 
-static void __noreturn mpc85xx_cds_restart(char *cmd)
+static int mpc85xx_cds_restart(struct notifier_block *this,
+                              unsigned long mode, void *cmd)
 {
        struct pci_dev *dev;
        u_char tmp;
@@ -108,12 +109,25 @@ static void __noreturn mpc85xx_cds_restart(char *cmd)
        }
 
        /*
-        *  If we can't find the VIA chip (maybe the P2P bridge is disabled)
-        *  or the VIA chip reset didn't work, just use the default reset.
+        *  If we can't find the VIA chip (maybe the P2P bridge is
+        *  disabled) or the VIA chip reset didn't work, just return
+        *  and let default reset sequence happen.
         */
-       fsl_rstcr_restart(NULL);
+       return NOTIFY_DONE;
 }
 
+static int mpc85xx_cds_restart_register(void)
+{
+       static struct notifier_block restart_handler;
+
+       restart_handler.notifier_call = mpc85xx_cds_restart;
+       restart_handler.priority = 192;
+
+       return register_restart_handler(&restart_handler);
+}
+machine_arch_initcall(mpc85xx_cds, mpc85xx_cds_restart_register);
+
+
 static void __init mpc85xx_cds_pci_irq_fixup(struct pci_dev *dev)
 {
        u_char c;
@@ -380,11 +394,8 @@ define_machine(mpc85xx_cds) {
        .show_cpuinfo   = mpc85xx_cds_show_cpuinfo,
        .get_irq        = mpic_get_irq,
 #ifdef CONFIG_PCI
-       .restart        = mpc85xx_cds_restart,
        .pcibios_fixup_bus      = mpc85xx_cds_fixup_bus,
        .pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
-#else
-       .restart        = fsl_rstcr_restart,
 #endif
        .calibrate_decr = generic_calibrate_decr,
        .progress       = udbg_progress,
index ed69c7ee1829b94d1c060aed38eef408bf7baf3b..dc9e035cc637a749061a2b25e1a44d93998f8289 100644 (file)
@@ -204,7 +204,6 @@ define_machine(mpc8544_ds) {
        .pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
 #endif
        .get_irq                = mpic_get_irq,
-       .restart                = fsl_rstcr_restart,
        .calibrate_decr         = generic_calibrate_decr,
        .progress               = udbg_progress,
 };
@@ -219,7 +218,6 @@ define_machine(mpc8572_ds) {
        .pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
 #endif
        .get_irq                = mpic_get_irq,
-       .restart                = fsl_rstcr_restart,
        .calibrate_decr         = generic_calibrate_decr,
        .progress               = udbg_progress,
 };
@@ -234,7 +232,6 @@ define_machine(p2020_ds) {
        .pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
 #endif
        .get_irq                = mpic_get_irq,
-       .restart                = fsl_rstcr_restart,
        .calibrate_decr         = generic_calibrate_decr,
        .progress               = udbg_progress,
 };
index fa9cd710d2aee15fd33ce501445fd5c84ede0b5f..d7e440e6dba3d3d45615e26f26894bf09bb1615c 100644 (file)
@@ -63,6 +63,8 @@
 #define DBG(fmt...)
 #endif
 
+#if IS_BUILTIN(CONFIG_PHYLIB)
+
 #define MV88E1111_SCR  0x10
 #define MV88E1111_SCR_125CLK   0x0010
 static int mpc8568_fixup_125_clock(struct phy_device *phydev)
@@ -152,6 +154,8 @@ static int mpc8568_mds_phy_fixups(struct phy_device *phydev)
        return err;
 }
 
+#endif
+
 /* ************************************************************************
  *
  * Setup the architecture
@@ -313,6 +317,7 @@ static void __init mpc85xx_mds_setup_arch(void)
        swiotlb_detect_4g();
 }
 
+#if IS_BUILTIN(CONFIG_PHYLIB)
 
 static int __init board_fixups(void)
 {
@@ -342,9 +347,12 @@ static int __init board_fixups(void)
 
        return 0;
 }
+
 machine_arch_initcall(mpc8568_mds, board_fixups);
 machine_arch_initcall(mpc8569_mds, board_fixups);
 
+#endif
+
 static int __init mpc85xx_publish_devices(void)
 {
        if (machine_is(mpc8568_mds))
@@ -385,7 +393,6 @@ define_machine(mpc8568_mds) {
        .setup_arch     = mpc85xx_mds_setup_arch,
        .init_IRQ       = mpc85xx_mds_pic_init,
        .get_irq        = mpic_get_irq,
-       .restart        = fsl_rstcr_restart,
        .calibrate_decr = generic_calibrate_decr,
        .progress       = udbg_progress,
 #ifdef CONFIG_PCI
@@ -405,7 +412,6 @@ define_machine(mpc8569_mds) {
        .setup_arch     = mpc85xx_mds_setup_arch,
        .init_IRQ       = mpc85xx_mds_pic_init,
        .get_irq        = mpic_get_irq,
-       .restart        = fsl_rstcr_restart,
        .calibrate_decr = generic_calibrate_decr,
        .progress       = udbg_progress,
 #ifdef CONFIG_PCI
@@ -426,7 +432,6 @@ define_machine(p1021_mds) {
        .setup_arch     = mpc85xx_mds_setup_arch,
        .init_IRQ       = mpc85xx_mds_pic_init,
        .get_irq        = mpic_get_irq,
-       .restart        = fsl_rstcr_restart,
        .calibrate_decr = generic_calibrate_decr,
        .progress       = udbg_progress,
 #ifdef CONFIG_PCI
@@ -434,4 +439,3 @@ define_machine(p1021_mds) {
        .pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
 #endif
 };
-
index c1499cbf3786746be655ac13787f6034b1cd086c..10069503e39f2fab8ce33c4a2b27d2e5041ed32b 100644 (file)
@@ -213,7 +213,6 @@ define_machine(p2020_rdb) {
        .pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
 #endif
        .get_irq                = mpic_get_irq,
-       .restart                = fsl_rstcr_restart,
        .calibrate_decr         = generic_calibrate_decr,
        .progress               = udbg_progress,
 };
@@ -228,7 +227,6 @@ define_machine(p1020_rdb) {
        .pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
 #endif
        .get_irq                = mpic_get_irq,
-       .restart                = fsl_rstcr_restart,
        .calibrate_decr         = generic_calibrate_decr,
        .progress               = udbg_progress,
 };
@@ -243,7 +241,6 @@ define_machine(p1021_rdb_pc) {
        .pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
 #endif
        .get_irq                = mpic_get_irq,
-       .restart                = fsl_rstcr_restart,
        .calibrate_decr         = generic_calibrate_decr,
        .progress               = udbg_progress,
 };
@@ -258,7 +255,6 @@ define_machine(p2020_rdb_pc) {
        .pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
 #endif
        .get_irq                = mpic_get_irq,
-       .restart                = fsl_rstcr_restart,
        .calibrate_decr         = generic_calibrate_decr,
        .progress               = udbg_progress,
 };
@@ -273,7 +269,6 @@ define_machine(p1025_rdb) {
        .pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
 #endif
        .get_irq                = mpic_get_irq,
-       .restart                = fsl_rstcr_restart,
        .calibrate_decr         = generic_calibrate_decr,
        .progress               = udbg_progress,
 };
@@ -288,7 +283,6 @@ define_machine(p1020_mbg_pc) {
        .pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
 #endif
        .get_irq                = mpic_get_irq,
-       .restart                = fsl_rstcr_restart,
        .calibrate_decr         = generic_calibrate_decr,
        .progress               = udbg_progress,
 };
@@ -303,7 +297,6 @@ define_machine(p1020_utm_pc) {
        .pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
 #endif
        .get_irq                = mpic_get_irq,
-       .restart                = fsl_rstcr_restart,
        .calibrate_decr         = generic_calibrate_decr,
        .progress               = udbg_progress,
 };
@@ -318,7 +311,6 @@ define_machine(p1020_rdb_pc) {
        .pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
 #endif
        .get_irq                = mpic_get_irq,
-       .restart                = fsl_rstcr_restart,
        .calibrate_decr         = generic_calibrate_decr,
        .progress               = udbg_progress,
 };
@@ -333,7 +325,6 @@ define_machine(p1020_rdb_pd) {
        .pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
 #endif
        .get_irq                = mpic_get_irq,
-       .restart                = fsl_rstcr_restart,
        .calibrate_decr         = generic_calibrate_decr,
        .progress               = udbg_progress,
 };
@@ -348,7 +339,6 @@ define_machine(p1024_rdb) {
        .pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
 #endif
        .get_irq                = mpic_get_irq,
-       .restart                = fsl_rstcr_restart,
        .calibrate_decr         = generic_calibrate_decr,
        .progress               = udbg_progress,
 };
index acc3d0d6049d50c650a46002bd9859a374ce3326..d5af0723a69e97f58e6fba26e3694469440b2bc2 100644 (file)
@@ -66,7 +66,6 @@ define_machine(mvme2500) {
        .pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
 #endif
        .get_irq                = mpic_get_irq,
-       .restart                = fsl_rstcr_restart,
        .calibrate_decr         = generic_calibrate_decr,
        .progress               = udbg_progress,
 };
index 661d7b59e4131d8bbe332b0b7f555d4252a7d237..78d13b364cd631e6c4b2863171fc8382796c4490 100644 (file)
@@ -79,7 +79,6 @@ define_machine(p1010_rdb) {
        .pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
 #endif
        .get_irq                = mpic_get_irq,
-       .restart                = fsl_rstcr_restart,
        .calibrate_decr         = generic_calibrate_decr,
        .progress               = udbg_progress,
 };
index 63568d68c76f8c50554f4126a8672ea20118603f..0908abd7e36f684cf60162fd572a6175abfeb833 100644 (file)
@@ -568,7 +568,6 @@ define_machine(p1022_ds) {
        .pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
 #endif
        .get_irq                = mpic_get_irq,
-       .restart                = fsl_rstcr_restart,
        .calibrate_decr         = generic_calibrate_decr,
        .progress               = udbg_progress,
 };
index 2f29436003011ecf24612275b22dfc8f234d5b71..276e00ab3dde97a766b6652d7f9dd714a6ea302f 100644 (file)
@@ -148,7 +148,6 @@ define_machine(p1022_rdk) {
        .pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
 #endif
        .get_irq                = mpic_get_irq,
-       .restart                = fsl_rstcr_restart,
        .calibrate_decr         = generic_calibrate_decr,
        .progress               = udbg_progress,
 };
index 40d8de57c341dc73924130ad23d9a4a666069e1b..3e8cd0324dfcab91585beeb8962a863d53cab885 100644 (file)
@@ -110,7 +110,6 @@ define_machine(p1023_rdb) {
        .setup_arch             = mpc85xx_rdb_setup_arch,
        .init_IRQ               = mpc85xx_rdb_pic_init,
        .get_irq                = mpic_get_irq,
-       .restart                = fsl_rstcr_restart,
        .calibrate_decr         = generic_calibrate_decr,
        .progress               = udbg_progress,
 #ifdef CONFIG_PCI
index 2410167b290a256232a82f37c5c04eb3d300d6eb..33c5ba644fa5683afa1f09839ae8fa23105a0285 100644 (file)
@@ -91,7 +91,6 @@ define_machine(ppa8548) {
        .init_IRQ       = ppa8548_pic_init,
        .show_cpuinfo   = ppa8548_show_cpuinfo,
        .get_irq        = mpic_get_irq,
-       .restart        = fsl_rstcr_restart,
        .calibrate_decr = generic_calibrate_decr,
        .progress       = udbg_progress,
 };
index 50d745809809a693daa11e3079a8f060e06962e1..b63a8548366f8d6e3218624784e7875edad01ee5 100644 (file)
@@ -77,7 +77,6 @@ define_machine(qemu_e500) {
        .pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
 #endif
        .get_irq                = mpic_get_coreint_irq,
-       .restart                = fsl_rstcr_restart,
        .calibrate_decr         = generic_calibrate_decr,
        .progress               = udbg_progress,
 };
index 62b6c45a5a9b162db5b1c8e3c7ded9d0b1ccc78d..2c670848ff0804b5110a092e1429401cd1574be2 100644 (file)
@@ -130,7 +130,6 @@ define_machine(sbc8548) {
        .init_IRQ       = sbc8548_pic_init,
        .show_cpuinfo   = sbc8548_show_cpuinfo,
        .get_irq        = mpic_get_irq,
-       .restart        = fsl_rstcr_restart,
 #ifdef CONFIG_PCI
        .pcibios_fixup_bus      = fsl_pcibios_fixup_bus,
        .pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
index 79fd0dfd4b8203f95a07d6bef19407d309708772..21d6aaa5c3e4aedced415e9409591049a63cc7cc 100644 (file)
@@ -38,18 +38,18 @@ static void gpio_halt_wfn(struct work_struct *work)
 }
 static DECLARE_WORK(gpio_halt_wq, gpio_halt_wfn);
 
-static void gpio_halt_cb(void)
+static void __noreturn gpio_halt_cb(void)
 {
        enum of_gpio_flags flags;
        int trigger, gpio;
 
        if (!halt_node)
-               return;
+               panic("No reset GPIO information was provided in DT\n");
 
        gpio = of_get_gpio_flags(halt_node, 0, &flags);
 
        if (!gpio_is_valid(gpio))
-               return;
+               panic("Provided GPIO is invalid\n");
 
        trigger = (flags == OF_GPIO_ACTIVE_LOW);
 
@@ -57,6 +57,8 @@ static void gpio_halt_cb(void)
 
        /* Probably wont return */
        gpio_set_value(gpio, trigger);
+
+       panic("Halt failed\n");
 }
 
 /* This IRQ means someone pressed the power button and it is waiting for us
index cd255acde2e29ec7e9f60705bb5069ca3d4f1816..8da4ed90338d56090fac9cb5bc371879d3a05f67 100644 (file)
@@ -91,7 +91,6 @@ define_machine(socrates) {
        .setup_arch             = socrates_setup_arch,
        .init_IRQ               = socrates_pic_init,
        .get_irq                = mpic_get_irq,
-       .restart                = fsl_rstcr_restart,
        .calibrate_decr         = generic_calibrate_decr,
        .progress               = udbg_progress,
 };
index 91b824c4dc082aea8add1e371c1553dcac3d5c8e..1a1d44ea17541b1bbca35562f2a936594d73af71 100644 (file)
@@ -103,7 +103,6 @@ define_machine(stx_gp3) {
        .init_IRQ               = stx_gp3_pic_init,
        .show_cpuinfo           = stx_gp3_show_cpuinfo,
        .get_irq                = mpic_get_irq,
-       .restart                = fsl_rstcr_restart,
        .calibrate_decr         = generic_calibrate_decr,
        .progress               = udbg_progress,
 };
index b7c54454d611a9db3b6ee1cbe882157bf25d5d6c..9fc20a37835e62b5247bf5d37cf5283e06825f7c 100644 (file)
@@ -132,7 +132,6 @@ define_machine(tqm85xx) {
        .init_IRQ               = tqm85xx_pic_init,
        .show_cpuinfo           = tqm85xx_show_cpuinfo,
        .get_irq                = mpic_get_irq,
-       .restart                = fsl_rstcr_restart,
        .calibrate_decr         = generic_calibrate_decr,
        .progress               = udbg_progress,
 };
index 1bc02a87f5971ddaa844b1822332e3590f48fda7..360f6253e9ffca80f009e363d47e032cf715a2ca 100644 (file)
@@ -140,7 +140,6 @@ define_machine(twr_p1025) {
        .pcibios_fixup_bus      = fsl_pcibios_fixup_bus,
 #endif
        .get_irq                = mpic_get_irq,
-       .restart                = fsl_rstcr_restart,
        .calibrate_decr         = generic_calibrate_decr,
        .progress               = udbg_progress,
 };
index cf0c70ff026e7aa2c65948e3600b6c155ecd5819..cd6ce845f398b69334dfa8602bd7ec37c7ad44e8 100644 (file)
@@ -167,7 +167,6 @@ define_machine(xes_mpc8572) {
        .pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
 #endif
        .get_irq                = mpic_get_irq,
-       .restart                = fsl_rstcr_restart,
        .calibrate_decr         = generic_calibrate_decr,
        .progress               = udbg_progress,
 };
@@ -182,7 +181,6 @@ define_machine(xes_mpc8548) {
        .pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
 #endif
        .get_irq                = mpic_get_irq,
-       .restart                = fsl_rstcr_restart,
        .calibrate_decr         = generic_calibrate_decr,
        .progress               = udbg_progress,
 };
@@ -197,7 +195,6 @@ define_machine(xes_mpc8540) {
        .pcibios_fixup_phb      = fsl_pcibios_fixup_phb,
 #endif
        .get_irq                = mpic_get_irq,
-       .restart                = fsl_rstcr_restart,
        .calibrate_decr         = generic_calibrate_decr,
        .progress               = udbg_progress,
 };
index ef684afb63c62433eaf1caec591c772592c2a640..6b99300edd36585cba83d65feeccdfb4076f6297 100644 (file)
@@ -204,7 +204,6 @@ define_machine(gef_ppc9a) {
        .init_IRQ               = gef_ppc9a_init_irq,
        .show_cpuinfo           = gef_ppc9a_show_cpuinfo,
        .get_irq                = mpic_get_irq,
-       .restart                = fsl_rstcr_restart,
        .time_init              = mpc86xx_time_init,
        .calibrate_decr         = generic_calibrate_decr,
        .progress               = udbg_progress,
index 67dd0c23164604c732a36a31f0ac7d254441d2e8..8cdeca0611279b8608c4da431782a8269d36a4dc 100644 (file)
@@ -191,7 +191,6 @@ define_machine(gef_sbc310) {
        .init_IRQ               = gef_sbc310_init_irq,
        .show_cpuinfo           = gef_sbc310_show_cpuinfo,
        .get_irq                = mpic_get_irq,
-       .restart                = fsl_rstcr_restart,
        .time_init              = mpc86xx_time_init,
        .calibrate_decr         = generic_calibrate_decr,
        .progress               = udbg_progress,
index 805026976cacd10fc4a22b381e691b6756dd5596..da8723ae23ecfe1a51d95bc34dcd4fea0a832519 100644 (file)
@@ -181,7 +181,6 @@ define_machine(gef_sbc610) {
        .init_IRQ               = gef_sbc610_init_irq,
        .show_cpuinfo           = gef_sbc610_show_cpuinfo,
        .get_irq                = mpic_get_irq,
-       .restart                = fsl_rstcr_restart,
        .time_init              = mpc86xx_time_init,
        .calibrate_decr         = generic_calibrate_decr,
        .progress               = udbg_progress,
index fef0582eddf1572fd54ea80886db22421d83bf1f..a5d73fabe4d11dfa5746c9ee9f6072808f45df54 100644 (file)
@@ -331,7 +331,6 @@ define_machine(mpc86xx_hpcd) {
        .setup_arch             = mpc86xx_hpcd_setup_arch,
        .init_IRQ               = mpc86xx_init_irq,
        .get_irq                = mpic_get_irq,
-       .restart                = fsl_rstcr_restart,
        .time_init              = mpc86xx_time_init,
        .calibrate_decr         = generic_calibrate_decr,
        .progress               = udbg_progress,
index 5ae42a037065f6d6246def005d3aa021004c7b0f..a0e989ed4b6f425036e880d886b6a15476bf9e85 100644 (file)
@@ -130,7 +130,6 @@ define_machine(mpc86xx_hpcn) {
        .init_IRQ               = mpc86xx_init_irq,
        .show_cpuinfo           = mpc86xx_hpcn_show_cpuinfo,
        .get_irq                = mpic_get_irq,
-       .restart                = fsl_rstcr_restart,
        .time_init              = mpc86xx_time_init,
        .calibrate_decr         = generic_calibrate_decr,
        .progress               = udbg_progress,
index addb41e7cd144c9909b655073963cf759007f6ad..835352e63dc3a62213ffb22e40b48105fdb6c467 100644 (file)
@@ -111,7 +111,6 @@ define_machine(mvme7100) {
        .setup_arch             = mvme7100_setup_arch,
        .init_IRQ               = mpc86xx_init_irq,
        .get_irq                = mpic_get_irq,
-       .restart                = fsl_rstcr_restart,
        .time_init              = mpc86xx_time_init,
        .calibrate_decr         = generic_calibrate_decr,
        .progress               = udbg_progress,
index 52af5735742e4f8784e1cfc7e944489d6c96f33b..93db35d4f6eb902c3a9efccc0354827bf2596e78 100644 (file)
@@ -82,7 +82,6 @@ define_machine(sbc8641) {
        .init_IRQ               = mpc86xx_init_irq,
        .show_cpuinfo           = sbc8641_show_cpuinfo,
        .get_irq                = mpic_get_irq,
-       .restart                = fsl_rstcr_restart,
        .time_init              = mpc86xx_time_init,
        .calibrate_decr         = generic_calibrate_decr,
        .progress               = udbg_progress,
index 86707e67843f6a152e8339ca7d600c27211fefac..aa35245d8d6d337204806bbb5888e9df4d5455ef 100644 (file)
@@ -393,7 +393,7 @@ static void __pSeries_lpar_hugepage_invalidate(unsigned long *slot,
                                             unsigned long *vpn, int count,
                                             int psize, int ssize)
 {
-       unsigned long param[8];
+       unsigned long param[PLPAR_HCALL9_BUFSIZE];
        int i = 0, pix = 0, rc;
        unsigned long flags = 0;
        int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
@@ -522,7 +522,7 @@ static void pSeries_lpar_flush_hash_range(unsigned long number, int local)
        unsigned long flags = 0;
        struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch);
        int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
-       unsigned long param[9];
+       unsigned long param[PLPAR_HCALL9_BUFSIZE];
        unsigned long hash, index, shift, hidx, slot;
        real_pte_t pte;
        int psize, ssize;
index 2e4ebd0e25b3e84431335c8d2be07d9485af5078..ec2d5c835170a6753a588d4ba6a16d2c51412d3c 100755 (executable)
@@ -30,6 +30,7 @@ bad_relocs=$(
        # On PPC64:
        #       R_PPC64_RELATIVE, R_PPC64_NONE
        #       R_PPC64_ADDR64 mach_<name>
+       #       R_PPC64_ADDR64 __crc_<name>
        # On PPC:
        #       R_PPC_RELATIVE, R_PPC_ADDR16_HI,
        #       R_PPC_ADDR16_HA,R_PPC_ADDR16_LO,
@@ -41,7 +42,8 @@ R_PPC_ADDR16_HI
 R_PPC_ADDR16_HA
 R_PPC_RELATIVE
 R_PPC_NONE' |
-       grep -E -v '\<R_PPC64_ADDR64[[:space:]]+mach_'
+       grep -E -v '\<R_PPC64_ADDR64[[:space:]]+mach_' |
+       grep -E -v '\<R_PPC64_ADDR64[[:space:]]+__crc_'
 )
 
 if [ -z "$bad_relocs" ]; then
index 3c0eb9b255353acf5bac69bf25ef66bee44dafdc..986cd111d4df1064161238a6b2fabad474fa5260 100644 (file)
@@ -233,8 +233,6 @@ void __init cpm_reset(void)
        else
                out_be32(&siu_conf->sc_sdcr, 1);
        immr_unmap(siu_conf);
-
-       cpm_muram_init();
 }
 
 static DEFINE_SPINLOCK(cmd_lock);
index 8dc1e24f3c2383bbaf22f2486b9ba076220413ff..f78ff841652c2fd2006b977db12e28447661aa87 100644 (file)
@@ -66,10 +66,6 @@ void __init cpm2_reset(void)
        cpm2_immr = ioremap(get_immrbase(), CPM_MAP_SIZE);
 #endif
 
-       /* Reclaim the DP memory for our use.
-        */
-       cpm_muram_init();
-
        /* Tell everyone where the comm processor resides.
         */
        cpmp = &cpm2_immr->im_cpm;
index 947f42007734c1c22d5f6eb02ae72f110cd5f44b..51bf749a4f3a7b6c8a245c8d793cee0cc47f9a51 100644 (file)
 #include <linux/of_gpio.h>
 #endif
 
+static int __init cpm_init(void)
+{
+       struct device_node *np;
+
+       np = of_find_compatible_node(NULL, NULL, "fsl,cpm1");
+       if (!np)
+               np = of_find_compatible_node(NULL, NULL, "fsl,cpm2");
+       if (!np)
+               return -ENODEV;
+       cpm_muram_init();
+       of_node_put(np);
+       return 0;
+}
+subsys_initcall(cpm_init);
+
 #ifdef CONFIG_PPC_EARLY_DEBUG_CPM
 static u32 __iomem *cpm_udbg_txdesc;
 static u8 __iomem *cpm_udbg_txbuf;
index d3098ef1404a2df72738bd1e01e51dda3f185227..e687bb2003ff0af93028c14de2c75ad9d37b2136 100644 (file)
@@ -12,6 +12,7 @@
 #include <asm/ppc_asm.h>
 #include <asm/processor.h>
 #include <asm/bug.h>
+#include <asm/export.h>
 
 #define DCR_ACCESS_PROLOG(table) \
        cmpli   cr0,r3,1024;     \
 
 _GLOBAL(__mfdcr)
        DCR_ACCESS_PROLOG(__mfdcr_table)
+EXPORT_SYMBOL(__mfdcr)
 
 _GLOBAL(__mtdcr)
        DCR_ACCESS_PROLOG(__mtdcr_table)
+EXPORT_SYMBOL(__mtdcr)
 
 __mfdcr_table:
        mfdcr  r3,0; blr
index 0ef9df49f0f2c2aca631963a188057d64ffe4098..d3a597456b6e57f83efb165e065a572d0105cc99 100644 (file)
@@ -111,8 +111,7 @@ static struct pci_ops fsl_indirect_pcie_ops =
        .write = indirect_write_config,
 };
 
-#define MAX_PHYS_ADDR_BITS     40
-static u64 pci64_dma_offset = 1ull << MAX_PHYS_ADDR_BITS;
+static u64 pci64_dma_offset;
 
 #ifdef CONFIG_SWIOTLB
 static void setup_swiotlb_ops(struct pci_controller *hose)
@@ -132,12 +131,10 @@ static int fsl_pci_dma_set_mask(struct device *dev, u64 dma_mask)
                return -EIO;
 
        /*
-        * Fixup PCI devices that are able to DMA to above the physical
-        * address width of the SoC such that we can address any internal
-        * SoC address from across PCI if needed
+        * Fix up PCI devices that are able to DMA to the large inbound
+        * mapping that allows addressing any RAM address from across PCI.
         */
-       if ((dev_is_pci(dev)) &&
-           dma_mask >= DMA_BIT_MASK(MAX_PHYS_ADDR_BITS)) {
+       if (dev_is_pci(dev) && dma_mask >= pci64_dma_offset * 2 - 1) {
                set_dma_ops(dev, &dma_direct_ops);
                set_dma_offset(dev, pci64_dma_offset);
        }
@@ -387,6 +384,7 @@ static void setup_pci_atmu(struct pci_controller *hose)
                                mem_log++;
 
                        piwar = (piwar & ~PIWAR_SZ_MASK) | (mem_log - 1);
+                       pci64_dma_offset = 1ULL << mem_log;
 
                        if (setup_inbound) {
                                /* Setup inbound memory window */
index a09ca704de58af05ce66c8229b9ddfca2bf834b1..d93056eedcb0a4a3c01e51c2ff3dc0e9b6163c77 100644 (file)
@@ -29,6 +29,7 @@
 #include <linux/fsl_devices.h>
 #include <linux/fs_enet_pd.h>
 #include <linux/fs_uart_pd.h>
+#include <linux/reboot.h>
 
 #include <linux/atomic.h>
 #include <asm/io.h>
@@ -180,23 +181,38 @@ EXPORT_SYMBOL(get_baudrate);
 #if defined(CONFIG_FSL_SOC_BOOKE) || defined(CONFIG_PPC_86xx)
 static __be32 __iomem *rstcr;
 
+static int fsl_rstcr_restart(struct notifier_block *this,
+                            unsigned long mode, void *cmd)
+{
+       local_irq_disable();
+       /* set reset control register */
+       out_be32(rstcr, 0x2);   /* HRESET_REQ */
+
+       return NOTIFY_DONE;
+}
+
 static int __init setup_rstcr(void)
 {
        struct device_node *np;
 
+       static struct notifier_block restart_handler = {
+               .notifier_call = fsl_rstcr_restart,
+               .priority = 128,
+       };
+
        for_each_node_by_name(np, "global-utilities") {
                if ((of_get_property(np, "fsl,has-rstcr", NULL))) {
                        rstcr = of_iomap(np, 0) + 0xb0;
-                       if (!rstcr)
+                       if (!rstcr) {
                                printk (KERN_ERR "Error: reset control "
                                                "register not mapped!\n");
+                       } else {
+                               register_restart_handler(&restart_handler);
+                       }
                        break;
                }
        }
 
-       if (!rstcr && ppc_md.restart == fsl_rstcr_restart)
-               printk(KERN_ERR "No RSTCR register, warm reboot won't work\n");
-
        of_node_put(np);
 
        return 0;
@@ -204,15 +220,6 @@ static int __init setup_rstcr(void)
 
 arch_initcall(setup_rstcr);
 
-void __noreturn fsl_rstcr_restart(char *cmd)
-{
-       local_irq_disable();
-       if (rstcr)
-               /* set reset control register */
-               out_be32(rstcr, 0x2);   /* HRESET_REQ */
-
-       while (1) ;
-}
 #endif
 
 #if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE)
index 433566a5ef192703d74c6ac64728c1441d6aa22e..d73daa4f0ccfc9005ff2cca7df398a590f709179 100644 (file)
@@ -19,8 +19,6 @@ extern u32 fsl_get_sys_freq(void);
 struct spi_board_info;
 struct device_node;
 
-extern void __noreturn fsl_rstcr_restart(char *cmd);
-
 /* The different ports that the DIU can be connected to */
 enum fsl_diu_monitor_port {
        FSL_DIU_PORT_DVI,       /* DVI */
index 4d48cecfedd1d6b7dee121db11fdf35b295bec23..b9aac951a90f69ec3e05775fe50495b25fea6716 100644 (file)
@@ -1249,7 +1249,7 @@ struct mpic * __init mpic_alloc(struct device_node *node,
        /* Pick the physical address from the device tree if unspecified */
        if (!phys_addr) {
                /* Check if it is DCR-based */
-               if (of_get_property(node, "dcr-reg", NULL)) {
+               if (of_property_read_bool(node, "dcr-reg")) {
                        flags |= MPIC_USES_DCR;
                } else {
                        struct resource r;
index 28f03ca60100a3399b501721851b789871c787e2..794bebb43d23d285370138d70bef7aab8a1e905a 100644 (file)
@@ -363,11 +363,11 @@ out:
 static int diag224_get_name_table(void)
 {
        /* memory must be below 2GB */
-       diag224_cpu_names = kmalloc(PAGE_SIZE, GFP_KERNEL | GFP_DMA);
+       diag224_cpu_names = (char *) __get_free_page(GFP_KERNEL | GFP_DMA);
        if (!diag224_cpu_names)
                return -ENOMEM;
        if (diag224(diag224_cpu_names)) {
-               kfree(diag224_cpu_names);
+               free_page((unsigned long) diag224_cpu_names);
                return -EOPNOTSUPP;
        }
        EBCASC(diag224_cpu_names + 16, (*diag224_cpu_names + 1) * 16);
@@ -376,7 +376,7 @@ static int diag224_get_name_table(void)
 
 static void diag224_delete_name_table(void)
 {
-       kfree(diag224_cpu_names);
+       free_page((unsigned long) diag224_cpu_names);
 }
 
 static int diag224_idx2name(int index, char *name)
index 9043d2e1e2ae0b3c01a7b6588bed848f44dd92ff..20f196b82a6e65ad9f792d5d885df27441aeddaa 100644 (file)
@@ -1,6 +1,7 @@
 
 
 generic-y += clkdev.h
+generic-y += export.h
 generic-y += irq_work.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
index 64053d9ac3f23b7cb1bf23aa42aa435b9f2ae0f8..836c56290499b84c0dad7785e9aa5979d68ed0bf 100644 (file)
@@ -12,9 +12,7 @@
 
 #ifndef __ASSEMBLY__
 
-unsigned long return_address(int depth);
-
-#define ftrace_return_address(n) return_address(n)
+#define ftrace_return_address(n) __builtin_return_address(n)
 
 void _mcount(void);
 void ftrace_caller(void);
index 03323175de308fbb3f98ecd5595f66f2d92e9580..602af692efdc1b5273e466b474dbf1a6c7e25922 100644 (file)
@@ -192,7 +192,7 @@ struct task_struct;
 struct mm_struct;
 struct seq_file;
 
-typedef int (*dump_trace_func_t)(void *data, unsigned long address);
+typedef int (*dump_trace_func_t)(void *data, unsigned long address, int reliable);
 void dump_trace(dump_trace_func_t func, void *data,
                struct task_struct *task, unsigned long sp);
 
index 02613bad8bbba4cc4a6e2de3dcc07846946536bf..3066031a73feeeecde2a3ae6f9c5913bf9f8c7d6 100644 (file)
@@ -9,6 +9,9 @@
 #include <uapi/asm/unistd.h>
 
 #define __IGNORE_time
+#define __IGNORE_pkey_mprotect
+#define __IGNORE_pkey_alloc
+#define __IGNORE_pkey_free
 
 #define __ARCH_WANT_OLD_READDIR
 #define __ARCH_WANT_SYS_ALARM
index 41b51c2f4f1ba98055f7f75ac36915665b62392c..b24a64cbfeb10a91274a59117f2e76ea3c583e00 100644 (file)
@@ -96,4 +96,6 @@
 
 #define SO_CNX_ADVICE          53
 
+#define SCM_TIMESTAMPING_OPT_STATS     54
+
 #endif /* _ASM_SOCKET_H */
index 72ccc41444dc64a1ddef531fd635ddf24bbbc6be..1f0fe98f6db927cea10038e5b0adbfaa57dbab93 100644 (file)
@@ -61,7 +61,7 @@ obj-y += entry.o reipl.o relocate_kernel.o
 
 extra-y                                += head.o head64.o vmlinux.lds
 
-obj-$(CONFIG_MODULES)          += s390_ksyms.o module.o
+obj-$(CONFIG_MODULES)          += module.o
 obj-$(CONFIG_SMP)              += smp.o
 obj-$(CONFIG_SCHED_TOPOLOGY)   += topology.o
 obj-$(CONFIG_HIBERNATION)      += suspend.o swsusp.o
index 43446fa2a4e55cdc49b7f89d639483f1ed738164..c74c59236f4418c3f37933ff67b4812c101e8536 100644 (file)
@@ -2014,12 +2014,12 @@ void show_code(struct pt_regs *regs)
                        *ptr++ = '\t';
                ptr += print_insn(ptr, code + start, addr);
                start += opsize;
-               printk("%s", buffer);
+               pr_cont("%s", buffer);
                ptr = buffer;
                ptr += sprintf(ptr, "\n          ");
                hops++;
        }
-       printk("\n");
+       pr_cont("\n");
 }
 
 void print_fn_code(unsigned char *code, unsigned long len)
index 6693383bc01bc7b78b4a98895a06052ccd8d77ec..55d4fe174fd9728a880016dcf14c7e281ffcdc4f 100644 (file)
@@ -38,10 +38,10 @@ __dump_trace(dump_trace_func_t func, void *data, unsigned long sp,
                if (sp < low || sp > high - sizeof(*sf))
                        return sp;
                sf = (struct stack_frame *) sp;
+               if (func(data, sf->gprs[8], 0))
+                       return sp;
                /* Follow the backchain. */
                while (1) {
-                       if (func(data, sf->gprs[8]))
-                               return sp;
                        low = sp;
                        sp = sf->back_chain;
                        if (!sp)
@@ -49,6 +49,8 @@ __dump_trace(dump_trace_func_t func, void *data, unsigned long sp,
                        if (sp <= low || sp > high - sizeof(*sf))
                                return sp;
                        sf = (struct stack_frame *) sp;
+                       if (func(data, sf->gprs[8], 1))
+                               return sp;
                }
                /* Zero backchain detected, check for interrupt frame. */
                sp = (unsigned long) (sf + 1);
@@ -56,7 +58,7 @@ __dump_trace(dump_trace_func_t func, void *data, unsigned long sp,
                        return sp;
                regs = (struct pt_regs *) sp;
                if (!user_mode(regs)) {
-                       if (func(data, regs->psw.addr))
+                       if (func(data, regs->psw.addr, 1))
                                return sp;
                }
                low = sp;
@@ -85,33 +87,12 @@ void dump_trace(dump_trace_func_t func, void *data, struct task_struct *task,
 }
 EXPORT_SYMBOL_GPL(dump_trace);
 
-struct return_address_data {
-       unsigned long address;
-       int depth;
-};
-
-static int __return_address(void *data, unsigned long address)
-{
-       struct return_address_data *rd = data;
-
-       if (rd->depth--)
-               return 0;
-       rd->address = address;
-       return 1;
-}
-
-unsigned long return_address(int depth)
-{
-       struct return_address_data rd = { .depth = depth + 2 };
-
-       dump_trace(__return_address, &rd, NULL, current_stack_pointer());
-       return rd.address;
-}
-EXPORT_SYMBOL_GPL(return_address);
-
-static int show_address(void *data, unsigned long address)
+static int show_address(void *data, unsigned long address, int reliable)
 {
-       printk("([<%016lx>] %pSR)\n", address, (void *)address);
+       if (reliable)
+               printk(" [<%016lx>] %pSR \n", address, (void *)address);
+       else
+               printk("([<%016lx>] %pSR)\n", address, (void *)address);
        return 0;
 }
 
@@ -138,14 +119,14 @@ void show_stack(struct task_struct *task, unsigned long *sp)
                else
                        stack = (unsigned long *)task->thread.ksp;
        }
+       printk(KERN_DEFAULT "Stack:\n");
        for (i = 0; i < 20; i++) {
                if (((addr_t) stack & (THREAD_SIZE-1)) == 0)
                        break;
-               if ((i * sizeof(long) % 32) == 0)
-                       printk("%s       ", i == 0 ? "" : "\n");
-               printk("%016lx ", *stack++);
+               if (i % 4 == 0)
+                       printk(KERN_DEFAULT "       ");
+               pr_cont("%016lx%c", *stack++, i % 4 == 3 ? '\n' : ' ');
        }
-       printk("\n");
        show_trace(task, (unsigned long)sp);
 }
 
@@ -163,13 +144,13 @@ void show_registers(struct pt_regs *regs)
        mode = user_mode(regs) ? "User" : "Krnl";
        printk("%s PSW : %p %p", mode, (void *)regs->psw.mask, (void *)regs->psw.addr);
        if (!user_mode(regs))
-               printk(" (%pSR)", (void *)regs->psw.addr);
-       printk("\n");
+               pr_cont(" (%pSR)", (void *)regs->psw.addr);
+       pr_cont("\n");
        printk("           R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x "
               "P:%x AS:%x CC:%x PM:%x", psw->r, psw->t, psw->i, psw->e,
               psw->key, psw->m, psw->w, psw->p, psw->as, psw->cc, psw->pm);
-       printk(" RI:%x EA:%x", psw->ri, psw->eaba);
-       printk("\n%s GPRS: %016lx %016lx %016lx %016lx\n", mode,
+       pr_cont(" RI:%x EA:%x\n", psw->ri, psw->eaba);
+       printk("%s GPRS: %016lx %016lx %016lx %016lx\n", mode,
               regs->gprs[0], regs->gprs[1], regs->gprs[2], regs->gprs[3]);
        printk("           %016lx %016lx %016lx %016lx\n",
               regs->gprs[4], regs->gprs[5], regs->gprs[6], regs->gprs[7]);
@@ -205,14 +186,14 @@ void die(struct pt_regs *regs, const char *str)
        printk("%s: %04x ilc:%d [#%d] ", str, regs->int_code & 0xffff,
               regs->int_code >> 17, ++die_counter);
 #ifdef CONFIG_PREEMPT
-       printk("PREEMPT ");
+       pr_cont("PREEMPT ");
 #endif
 #ifdef CONFIG_SMP
-       printk("SMP ");
+       pr_cont("SMP ");
 #endif
        if (debug_pagealloc_enabled())
-               printk("DEBUG_PAGEALLOC");
-       printk("\n");
+               pr_cont("DEBUG_PAGEALLOC");
+       pr_cont("\n");
        notify_die(DIE_OOPS, str, regs, 0, regs->int_code & 0xffff, SIGSEGV);
        print_modules();
        show_regs(regs);
index c51650a1ed167d533922f1b205c666368d5cc9ad..49a30737addef4103184ad92907b6f1f5b911bfa 100644 (file)
@@ -23,6 +23,7 @@
 #include <asm/vx-insn.h>
 #include <asm/setup.h>
 #include <asm/nmi.h>
+#include <asm/export.h>
 
 __PT_R0      = __PT_GPRS
 __PT_R1      = __PT_GPRS + 8
@@ -259,6 +260,8 @@ sie_exit:
 
        EX_TABLE(.Lrewind_pad,.Lsie_fault)
        EX_TABLE(sie_exit,.Lsie_fault)
+EXPORT_SYMBOL(sie64a)
+EXPORT_SYMBOL(sie_exit)
 #endif
 
 /*
@@ -825,6 +828,9 @@ ENTRY(save_fpu_regs)
        oi      __LC_CPU_FLAGS+7,_CIF_FPU
        br      %r14
 .Lsave_fpu_regs_end:
+#if IS_ENABLED(CONFIG_KVM)
+EXPORT_SYMBOL(save_fpu_regs)
+#endif
 
 /*
  * Load floating-point controls and floating-point or vector registers.
index e499370fbccb328561e6ef5ab0bfe35afb4a0b51..9a17e4475d2779d088fc3fb1deabf250dd09a94f 100644 (file)
@@ -9,6 +9,7 @@
 #include <asm/asm-offsets.h>
 #include <asm/ftrace.h>
 #include <asm/ptrace.h>
+#include <asm/export.h>
 
        .section .kprobes.text, "ax"
 
@@ -23,6 +24,8 @@ ENTRY(ftrace_stub)
 ENTRY(_mcount)
        br      %r14
 
+EXPORT_SYMBOL(_mcount)
+
 ENTRY(ftrace_caller)
        .globl  ftrace_regs_caller
        .set    ftrace_regs_caller,ftrace_caller
index 17431f63de00279bbef1b1619ce3966f395c7e74..955a7b6fa0a453bacf588de42e80a95e07ade61a 100644 (file)
@@ -222,7 +222,7 @@ static int __init service_level_perf_register(void)
 }
 arch_initcall(service_level_perf_register);
 
-static int __perf_callchain_kernel(void *data, unsigned long address)
+static int __perf_callchain_kernel(void *data, unsigned long address, int reliable)
 {
        struct perf_callchain_entry_ctx *entry = data;
 
diff --git a/arch/s390/kernel/s390_ksyms.c b/arch/s390/kernel/s390_ksyms.c
deleted file mode 100644 (file)
index e67453b..0000000
+++ /dev/null
@@ -1,15 +0,0 @@
-#include <linux/module.h>
-#include <linux/kvm_host.h>
-#include <asm/fpu/api.h>
-#include <asm/ftrace.h>
-
-#ifdef CONFIG_FUNCTION_TRACER
-EXPORT_SYMBOL(_mcount);
-#endif
-#if IS_ENABLED(CONFIG_KVM)
-EXPORT_SYMBOL(sie64a);
-EXPORT_SYMBOL(sie_exit);
-EXPORT_SYMBOL(save_fpu_regs);
-#endif
-EXPORT_SYMBOL(memcpy);
-EXPORT_SYMBOL(memset);
index 44f84b23d4e5996f1ddc594ae3985d762e80aa36..355db9db82104d11bac97f7c23c59fa363859757 100644 (file)
@@ -27,12 +27,12 @@ static int __save_address(void *data, unsigned long address, int nosched)
        return 1;
 }
 
-static int save_address(void *data, unsigned long address)
+static int save_address(void *data, unsigned long address, int reliable)
 {
        return __save_address(data, address, 0);
 }
 
-static int save_address_nosched(void *data, unsigned long address)
+static int save_address_nosched(void *data, unsigned long address, int reliable)
 {
        return __save_address(data, address, 1);
 }
index 000e6e91f6a0630c53f35519d450fccf02479e55..3667d20e997f3ccac943438ad2e03588795afb33 100644 (file)
@@ -62,9 +62,11 @@ SECTIONS
 
        . = ALIGN(PAGE_SIZE);
        __start_ro_after_init = .;
+       __start_data_ro_after_init = .;
        .data..ro_after_init : {
                 *(.data..ro_after_init)
        }
+       __end_data_ro_after_init = .;
        EXCEPTION_TABLE(16)
        . = ALIGN(PAGE_SIZE);
        __end_ro_after_init = .;
index 1cab8a177d0e7b7c80e1556e659c4751b0657187..7a27eebab28ad023069d21ae92033a06f4ab482d 100644 (file)
@@ -119,8 +119,13 @@ static int handle_validity(struct kvm_vcpu *vcpu)
 
        vcpu->stat.exit_validity++;
        trace_kvm_s390_intercept_validity(vcpu, viwhy);
-       WARN_ONCE(true, "kvm: unhandled validity intercept 0x%x\n", viwhy);
-       return -EOPNOTSUPP;
+       KVM_EVENT(3, "validity intercept 0x%x for pid %u (kvm 0x%pK)", viwhy,
+                 current->pid, vcpu->kvm);
+
+       /* do not warn on invalid runtime instrumentation mode */
+       WARN_ONCE(viwhy != 0x44, "kvm: unhandled validity intercept 0x%x\n",
+                 viwhy);
+       return -EINVAL;
 }
 
 static int handle_instruction(struct kvm_vcpu *vcpu)
index bd98b7d252004dae3d9ea569c6705ea92c8e51f0..05c98bb853cf971117530967a94f9176f85ef049 100644 (file)
@@ -315,7 +315,7 @@ static void fill_diag(struct sthyi_sctns *sctns)
        if (r < 0)
                goto out;
 
-       diag224_buf = kmalloc(PAGE_SIZE, GFP_KERNEL | GFP_DMA);
+       diag224_buf = (void *)__get_free_page(GFP_KERNEL | GFP_DMA);
        if (!diag224_buf || diag224(diag224_buf))
                goto out;
 
@@ -378,7 +378,7 @@ static void fill_diag(struct sthyi_sctns *sctns)
        sctns->par.infpval1 |= PAR_WGHT_VLD;
 
 out:
-       kfree(diag224_buf);
+       free_page((unsigned long)diag224_buf);
        vfree(diag204_buf);
 }
 
index c6d553e85ab1112e3a6dffd2a0c0f0aa870b6620..be9fa65bfac4e16d1d71f5dc6ee3dcb7c9d69299 100644 (file)
@@ -5,6 +5,7 @@
  */
 
 #include <linux/linkage.h>
+#include <asm/export.h>
 
 /*
  * memset implementation
@@ -60,6 +61,7 @@ ENTRY(memset)
        xc      0(1,%r1),0(%r1)
 .Lmemset_mvc:
        mvc     1(1,%r1),0(%r1)
+EXPORT_SYMBOL(memset)
 
 /*
  * memcpy implementation
@@ -86,3 +88,4 @@ ENTRY(memcpy)
        j       .Lmemcpy_rest
 .Lmemcpy_mvc:
        mvc     0(1,%r1),0(%r3)
+EXPORT_SYMBOL(memcpy)
index adb0c34bf431e121d66caff904c30fb7e63e933d..18d4107e10eefb5e2ea5935412a95e7a03f2ae41 100644 (file)
@@ -266,7 +266,8 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
        /* Try to get the remaining pages with get_user_pages */
        start += nr << PAGE_SHIFT;
        pages += nr;
-       ret = get_user_pages_unlocked(start, nr_pages - nr, write, 0, pages);
+       ret = get_user_pages_unlocked(start, nr_pages - nr, pages,
+                                     write ? FOLL_WRITE : 0);
        /* Have to be a bit careful with return values */
        if (nr > 0)
                ret = (ret < 0) ? nr : ret + nr;
index cd404aa3931c101c963f9940e80669173453022c..4a0c5bce3552b00ba4863622fcc63de7b711614c 100644 (file)
@@ -217,6 +217,7 @@ static __init int setup_hugepagesz(char *opt)
        } else if (MACHINE_HAS_EDAT2 && size == PUD_SIZE) {
                hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
        } else {
+               hugetlb_bad_size();
                pr_err("hugepagesz= specifies an unsupported page size %s\n",
                        string);
                return 0;
index f56a39bd8ba688afbc29832b63305fa32b3e725d..b3e9d18f2ec62b603737ef5d0080cd86b8df94a8 100644 (file)
@@ -151,36 +151,40 @@ void __init free_initrd_mem(unsigned long start, unsigned long end)
 #ifdef CONFIG_MEMORY_HOTPLUG
 int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
 {
-       unsigned long normal_end_pfn = PFN_DOWN(memblock_end_of_DRAM());
-       unsigned long dma_end_pfn = PFN_DOWN(MAX_DMA_ADDRESS);
+       unsigned long zone_start_pfn, zone_end_pfn, nr_pages;
        unsigned long start_pfn = PFN_DOWN(start);
        unsigned long size_pages = PFN_DOWN(size);
-       unsigned long nr_pages;
-       int rc, zone_enum;
+       pg_data_t *pgdat = NODE_DATA(nid);
+       struct zone *zone;
+       int rc, i;
 
        rc = vmem_add_mapping(start, size);
        if (rc)
                return rc;
 
-       while (size_pages > 0) {
-               if (start_pfn < dma_end_pfn) {
-                       nr_pages = (start_pfn + size_pages > dma_end_pfn) ?
-                                  dma_end_pfn - start_pfn : size_pages;
-                       zone_enum = ZONE_DMA;
-               } else if (start_pfn < normal_end_pfn) {
-                       nr_pages = (start_pfn + size_pages > normal_end_pfn) ?
-                                  normal_end_pfn - start_pfn : size_pages;
-                       zone_enum = ZONE_NORMAL;
+       for (i = 0; i < MAX_NR_ZONES; i++) {
+               zone = pgdat->node_zones + i;
+               if (zone_idx(zone) != ZONE_MOVABLE) {
+                       /* Add range within existing zone limits, if possible */
+                       zone_start_pfn = zone->zone_start_pfn;
+                       zone_end_pfn = zone->zone_start_pfn +
+                                      zone->spanned_pages;
                } else {
-                       nr_pages = size_pages;
-                       zone_enum = ZONE_MOVABLE;
+                       /* Add remaining range to ZONE_MOVABLE */
+                       zone_start_pfn = start_pfn;
+                       zone_end_pfn = start_pfn + size_pages;
                }
-               rc = __add_pages(nid, NODE_DATA(nid)->node_zones + zone_enum,
-                                start_pfn, size_pages);
+               if (start_pfn < zone_start_pfn || start_pfn >= zone_end_pfn)
+                       continue;
+               nr_pages = (start_pfn + size_pages > zone_end_pfn) ?
+                          zone_end_pfn - start_pfn : size_pages;
+               rc = __add_pages(nid, zone, start_pfn, nr_pages);
                if (rc)
                        break;
                start_pfn += nr_pages;
                size_pages -= nr_pages;
+               if (!size_pages)
+                       break;
        }
        if (rc)
                vmem_remove_mapping(start, size);
index 16f4c3960b874b2dc2129926c45c8343a3f7403c..9a4de4599c7b99a82f4a38989a309c896f4e73f3 100644 (file)
@@ -13,7 +13,7 @@
 #include <linux/init.h>
 #include <asm/processor.h>
 
-static int __s390_backtrace(void *data, unsigned long address)
+static int __s390_backtrace(void *data, unsigned long address, int reliable)
 {
        unsigned int *depth = data;
 
index 7350c8bc13a290ca362ad25b69cdb050c72ab3f4..6b2f72f523b91bb6c68347602b33ca2446807229 100644 (file)
@@ -423,7 +423,7 @@ static int __s390_dma_map_sg(struct device *dev, struct scatterlist *sg,
        dma_addr_t dma_addr_base, dma_addr;
        int flags = ZPCI_PTE_VALID;
        struct scatterlist *s;
-       unsigned long pa;
+       unsigned long pa = 0;
        int ret;
 
        size = PAGE_ALIGN(size);
index 55836188b217c170c3141558d843e1d4259bf165..4f7314d5f3347d40a6282976d538ccfb0e4d447d 100644 (file)
@@ -131,7 +131,7 @@ read_tsk_long(struct task_struct *child,
 {
        int copied;
 
-       copied = access_process_vm(child, addr, res, sizeof(*res), 0);
+       copied = access_process_vm(child, addr, res, sizeof(*res), FOLL_FORCE);
 
        return copied != sizeof(*res) ? -EIO : 0;
 }
@@ -142,7 +142,7 @@ read_tsk_short(struct task_struct *child,
 {
        int copied;
 
-       copied = access_process_vm(child, addr, res, sizeof(*res), 0);
+       copied = access_process_vm(child, addr, res, sizeof(*res), FOLL_FORCE);
 
        return copied != sizeof(*res) ? -EIO : 0;
 }
@@ -153,7 +153,8 @@ write_tsk_short(struct task_struct *child,
 {
        int copied;
 
-       copied = access_process_vm(child, addr, &val, sizeof(val), 1);
+       copied = access_process_vm(child, addr, &val, sizeof(val),
+                       FOLL_FORCE | FOLL_WRITE);
 
        return copied != sizeof(val) ? -EIO : 0;
 }
@@ -164,7 +165,8 @@ write_tsk_long(struct task_struct *child,
 {
        int copied;
 
-       copied = access_process_vm(child, addr, &val, sizeof(val), 1);
+       copied = access_process_vm(child, addr, &val, sizeof(val),
+                       FOLL_FORCE | FOLL_WRITE);
 
        return copied != sizeof(val) ? -EIO : 0;
 }
index 1517a7dcd6d92a3e49ec9f847456de3156267bfe..5cea1e750ceca0b7ad0b7baac431218e695dafb8 100644 (file)
@@ -29,6 +29,7 @@
 #include <asm/cacheflush.h>
 #include <asm/irq.h>
 #include <asm/irq_regs.h>
+#include <asm/uaccess.h>
 
 unsigned long exception_handlers[32];
 
index 00476662ac2c07ba1e0a3bb427e114ae2f5a7650..336f33a419d99561d57b329a3ee6a513164ca43f 100644 (file)
@@ -31,7 +31,7 @@ isa-y                                 := $(isa-y)-up
 endif
 
 cflags-$(CONFIG_CPU_SH2)               := $(call cc-option,-m2,)
-cflags-$(CONFIG_CPU_J2)                        := $(call cc-option,-mj2,)
+cflags-$(CONFIG_CPU_J2)                        += $(call cc-option,-mj2,)
 cflags-$(CONFIG_CPU_SH2A)              += $(call cc-option,-m2a,) \
                                           $(call cc-option,-m2a-nofpu,) \
                                           $(call cc-option,-m4-nofpu,)
index e9c2c42031fee497379af3e3fff29d519275eaee..4e21949593cf59f20c4290285d7c4c28db7acc30 100644 (file)
@@ -22,6 +22,16 @@ config SH_DEVICE_TREE
          have sufficient driver coverage to use this option; do not
          select it if you are using original SuperH hardware.
 
+config SH_JCORE_SOC
+       bool "J-Core SoC"
+       depends on SH_DEVICE_TREE && (CPU_SH2 || CPU_J2)
+       select CLKSRC_JCORE_PIT
+       select JCORE_AIC
+       default y if CPU_J2
+       help
+         Select this option to include drivers core components of the
+         J-Core SoC, including interrupt controllers and timers.
+
 config SH_SOLUTION_ENGINE
        bool "SolutionEngine"
        select SOLUTION_ENGINE
index 94d1eca52f723e82df0792c96d87783c6a422245..2eb81ebe3888bf8a4bdd2af4b575c0e254f52f02 100644 (file)
@@ -8,6 +8,7 @@ CONFIG_MEMORY_START=0x10000000
 CONFIG_MEMORY_SIZE=0x04000000
 CONFIG_CPU_BIG_ENDIAN=y
 CONFIG_SH_DEVICE_TREE=y
+CONFIG_SH_JCORE_SOC=y
 CONFIG_HZ_100=y
 CONFIG_CMDLINE_OVERWRITE=y
 CONFIG_CMDLINE="console=ttyUL0 earlycon"
@@ -20,6 +21,7 @@ CONFIG_INET=y
 CONFIG_DEVTMPFS=y
 CONFIG_DEVTMPFS_MOUNT=y
 CONFIG_NETDEVICES=y
+CONFIG_SERIAL_EARLYCON=y
 CONFIG_SERIAL_UARTLITE=y
 CONFIG_SERIAL_UARTLITE_CONSOLE=y
 CONFIG_I2C=y
index 40fa6c8adc43a361c60ffee8929aa39d2449e926..063c298ba56cc900479eb9b50a8c9587678f6893 100644 (file)
@@ -258,7 +258,8 @@ slow_irqon:
                pages += nr;
 
                ret = get_user_pages_unlocked(start,
-                       (end - start) >> PAGE_SHIFT, write, 0, pages);
+                       (end - start) >> PAGE_SHIFT, pages,
+                       write ? FOLL_WRITE : 0);
 
                /* Have to be a bit careful with return values */
                if (nr > 0) {
index b23c76b42d6e8c1508c04e3dbb65b05b24bce7c3..165ecdd24d22dec52108d54d2b31db4a38f55aab 100644 (file)
@@ -43,6 +43,7 @@ config SPARC
        select ARCH_HAS_SG_CHAIN
        select CPU_NO_EFFICIENT_FFS
        select HAVE_ARCH_HARDENED_USERCOPY
+       select PROVE_LOCKING_SMALL if PROVE_LOCKING
 
 config SPARC32
        def_bool !64BIT
@@ -89,6 +90,14 @@ config ARCH_DEFCONFIG
 config ARCH_PROC_KCORE_TEXT
        def_bool y
 
+config ARCH_ATU
+       bool
+       default y if SPARC64
+
+config ARCH_DMA_ADDR_T_64BIT
+       bool
+       default y if ARCH_ATU
+
 config IOMMU_HELPER
        bool
        default y if SPARC64
@@ -304,6 +313,20 @@ config ARCH_SPARSEMEM_ENABLE
 config ARCH_SPARSEMEM_DEFAULT
        def_bool y if SPARC64
 
+config FORCE_MAX_ZONEORDER
+       int "Maximum zone order"
+       default "13"
+       help
+         The kernel memory allocator divides physically contiguous memory
+         blocks into "zones", where each zone is a power of two number of
+         pages.  This option selects the largest power of two that the kernel
+         keeps in the memory allocator.  If you need to allocate very large
+         blocks of physically contiguous memory, then you may need to
+         increase this value.
+
+         This config option is actually maximum order plus one. For example,
+         a value of 13 means that the largest free memory block is 2^12 pages.
+
 source "mm/Kconfig"
 
 if SPARC64
index 6024c26c058565c76f20ebe98a3c3b0ad5a96120..cfc918067f80aa01f9d976819608afe3f6729a25 100644 (file)
@@ -6,6 +6,7 @@ generic-y += cputime.h
 generic-y += div64.h
 generic-y += emergency-restart.h
 generic-y += exec.h
+generic-y += export.h
 generic-y += irq_regs.h
 generic-y += irq_work.h
 generic-y += linkage.h
index a6cfdabb6054aef28846342f49fdb2718e1263a5..5b0ed48e5b0c2e8e201a4bc5abe1cedd02833a7d 100644 (file)
@@ -24,9 +24,10 @@ typedef struct {
        unsigned int    icache_line_size;
        unsigned int    ecache_size;
        unsigned int    ecache_line_size;
-       unsigned short  sock_id;
+       unsigned short  sock_id;        /* physical package */
        unsigned short  core_id;
-       int             proc_id;
+       unsigned short  max_cache_id;   /* groupings of highest shared cache */
+       unsigned short  proc_id;        /* strand (aka HW thread) id */
 } cpuinfo_sparc;
 
 DECLARE_PER_CPU(cpuinfo_sparc, __cpu_data);
index 666d5ba230d2f83f0659000e85ade4f4c025f319..73cb8978df58c9f25e2f06101a84bf1101dc7cf3 100644 (file)
@@ -2335,6 +2335,348 @@ unsigned long sun4v_vintr_set_target(unsigned long dev_handle,
  */
 #define HV_FAST_PCI_MSG_SETVALID       0xd3
 
+/* PCI IOMMU v2 definitions and services
+ *
+ * While the PCI IO definitions above is valid IOMMU v2 adds new PCI IO
+ * definitions and services.
+ *
+ *     CTE             Clump Table Entry. First level table entry in the ATU.
+ *
+ *     pci_device_list
+ *                     A 32-bit aligned list of pci_devices.
+ *
+ *     pci_device_listp
+ *                     real address of a pci_device_list. 32-bit aligned.
+ *
+ *     iotte           IOMMU translation table entry.
+ *
+ *     iotte_attributes
+ *                     IO Attributes for IOMMU v2 mappings. In addition to
+ *                     read, write IOMMU v2 supports relax ordering
+ *
+ *     io_page_list    A 64-bit aligned list of real addresses. Each real
+ *                     address in an io_page_list must be properly aligned
+ *                     to the pagesize of the given IOTSB.
+ *
+ *     io_page_list_p  Real address of an io_page_list, 64-bit aligned.
+ *
+ *     IOTSB           IO Translation Storage Buffer. An aligned table of
+ *                     IOTTEs. Each IOTSB has a pagesize, table size, and
+ *                     virtual address associated with it that must match
+ *                     a pagesize and table size supported by the un-derlying
+ *                     hardware implementation. The alignment requirements
+ *                     for an IOTSB depend on the pagesize used for that IOTSB.
+ *                     Each IOTTE in an IOTSB maps one pagesize-sized page.
+ *                     The size of the IOTSB dictates how large of a virtual
+ *                     address space the IOTSB is capable of mapping.
+ *
+ *     iotsb_handle    An opaque identifier for an IOTSB. A devhandle plus
+ *                     iotsb_handle represents a binding of an IOTSB to a
+ *                     PCI root complex.
+ *
+ *     iotsb_index     Zero-based IOTTE number within an IOTSB.
+ */
+
+/* The index_count argument consists of two fields:
+ * bits 63:48 #iottes and bits 47:0 iotsb_index
+ */
+#define HV_PCI_IOTSB_INDEX_COUNT(__iottes, __iotsb_index) \
+       (((u64)(__iottes) << 48UL) | ((u64)(__iotsb_index)))
+
+/* pci_iotsb_conf()
+ * TRAP:       HV_FAST_TRAP
+ * FUNCTION:   HV_FAST_PCI_IOTSB_CONF
+ * ARG0:       devhandle
+ * ARG1:       r_addr
+ * ARG2:       size
+ * ARG3:       pagesize
+ * ARG4:       iova
+ * RET0:       status
+ * RET1:       iotsb_handle
+ * ERRORS:     EINVAL          Invalid devhandle, size, iova, or pagesize
+ *             EBADALIGN       r_addr is not properly aligned
+ *             ENORADDR        r_addr is not a valid real address
+ *             ETOOMANY        No further IOTSBs may be configured
+ *             EBUSY           Duplicate devhandle, raddir, iova combination
+ *
+ * Create an IOTSB suitable for the PCI root complex identified by devhandle,
+ * for the DMA virtual address defined by the argument iova.
+ *
+ * r_addr is the properly aligned base address of the IOTSB and size is the
+ * IOTSB (table) size in bytes.The IOTSB is required to be zeroed prior to
+ * being configured. If it contains any values other than zeros then the
+ * behavior is undefined.
+ *
+ * pagesize is the size of each page in the IOTSB. Note that the combination of
+ * size (table size) and pagesize must be valid.
+ *
+ * virt is the DMA virtual address this IOTSB will map.
+ *
+ * If successful, the opaque 64-bit handle iotsb_handle is returned in ret1.
+ * Once configured, privileged access to the IOTSB memory is prohibited and
+ * creates undefined behavior. The only permitted access is indirect via these
+ * services.
+ */
+#define HV_FAST_PCI_IOTSB_CONF         0x190
+
+/* pci_iotsb_info()
+ * TRAP:       HV_FAST_TRAP
+ * FUNCTION:   HV_FAST_PCI_IOTSB_INFO
+ * ARG0:       devhandle
+ * ARG1:       iotsb_handle
+ * RET0:       status
+ * RET1:       r_addr
+ * RET2:       size
+ * RET3:       pagesize
+ * RET4:       iova
+ * RET5:       #bound
+ * ERRORS:     EINVAL  Invalid devhandle or iotsb_handle
+ *
+ * This service returns configuration information about an IOTSB previously
+ * created with pci_iotsb_conf.
+ *
+ * iotsb_handle value 0 may be used with this service to inquire about the
+ * legacy IOTSB that may or may not exist. If the service succeeds, the return
+ * values describe the legacy IOTSB and I/O virtual addresses mapped by that
+ * table. However, the table base address r_addr may contain the value -1 which
+ * indicates a memory range that cannot be accessed or be reclaimed.
+ *
+ * The return value #bound contains the number of PCI devices that iotsb_handle
+ * is currently bound to.
+ */
+#define HV_FAST_PCI_IOTSB_INFO         0x191
+
+/* pci_iotsb_unconf()
+ * TRAP:       HV_FAST_TRAP
+ * FUNCTION:   HV_FAST_PCI_IOTSB_UNCONF
+ * ARG0:       devhandle
+ * ARG1:       iotsb_handle
+ * RET0:       status
+ * ERRORS:     EINVAL  Invalid devhandle or iotsb_handle
+ *             EBUSY   The IOTSB is bound and may not be unconfigured
+ *
+ * This service unconfigures the IOTSB identified by the devhandle and
+ * iotsb_handle arguments, previously created with pci_iotsb_conf.
+ * The IOTSB must not be currently bound to any device or the service will fail
+ *
+ * If the call succeeds, iotsb_handle is no longer valid.
+ */
+#define HV_FAST_PCI_IOTSB_UNCONF       0x192
+
+/* pci_iotsb_bind()
+ * TRAP:       HV_FAST_TRAP
+ * FUNCTION:   HV_FAST_PCI_IOTSB_BIND
+ * ARG0:       devhandle
+ * ARG1:       iotsb_handle
+ * ARG2:       pci_device
+ * RET0:       status
+ * ERRORS:     EINVAL  Invalid devhandle, iotsb_handle, or pci_device
+ *             EBUSY   A PCI function is already bound to an IOTSB at the same
+ *                     address range as specified by devhandle, iotsb_handle.
+ *
+ * This service binds the PCI function specified by the argument pci_device to
+ * the IOTSB specified by the arguments devhandle and iotsb_handle.
+ *
+ * The PCI device function is bound to the specified IOTSB with the IOVA range
+ * specified when the IOTSB was configured via pci_iotsb_conf. If the function
+ * is already bound then it is unbound first.
+ */
+#define HV_FAST_PCI_IOTSB_BIND         0x193
+
+/* pci_iotsb_unbind()
+ * TRAP:       HV_FAST_TRAP
+ * FUNCTION:   HV_FAST_PCI_IOTSB_UNBIND
+ * ARG0:       devhandle
+ * ARG1:       iotsb_handle
+ * ARG2:       pci_device
+ * RET0:       status
+ * ERRORS:     EINVAL  Invalid devhandle, iotsb_handle, or pci_device
+ *             ENOMAP  The PCI function was not bound to the specified IOTSB
+ *
+ * This service unbinds the PCI device specified by the argument pci_device
+ * from the IOTSB identified  * by the arguments devhandle and iotsb_handle.
+ *
+ * If the PCI device is not bound to the specified IOTSB then this service will
+ * fail with status ENOMAP
+ */
+#define HV_FAST_PCI_IOTSB_UNBIND       0x194
+
+/* pci_iotsb_get_binding()
+ * TRAP:       HV_FAST_TRAP
+ * FUNCTION:   HV_FAST_PCI_IOTSB_GET_BINDING
+ * ARG0:       devhandle
+ * ARG1:       iotsb_handle
+ * ARG2:       iova
+ * RET0:       status
+ * RET1:       iotsb_handle
+ * ERRORS:     EINVAL  Invalid devhandle, pci_device, or iova
+ *             ENOMAP  The PCI function is not bound to an IOTSB at iova
+ *
+ * This service returns the IOTSB binding, iotsb_handle, for a given pci_device
+ * and DMA virtual address, iova.
+ *
+ * iova must be the base address of a DMA virtual address range as defined by
+ * the iommu-address-ranges property in the root complex device node defined
+ * by the argument devhandle.
+ */
+#define HV_FAST_PCI_IOTSB_GET_BINDING  0x195
+
+/* pci_iotsb_map()
+ * TRAP:       HV_FAST_TRAP
+ * FUNCTION:   HV_FAST_PCI_IOTSB_MAP
+ * ARG0:       devhandle
+ * ARG1:       iotsb_handle
+ * ARG2:       index_count
+ * ARG3:       iotte_attributes
+ * ARG4:       io_page_list_p
+ * RET0:       status
+ * RET1:       #mapped
+ * ERRORS:     EINVAL          Invalid devhandle, iotsb_handle, #iottes,
+ *                             iotsb_index or iotte_attributes
+ *             EBADALIGN       Improperly aligned io_page_list_p or I/O page
+ *                             address in the I/O page list.
+ *             ENORADDR        Invalid io_page_list_p or I/O page address in
+ *                             the I/O page list.
+ *
+ * This service creates and flushes mappings in the IOTSB defined by the
+ * arguments devhandle, iotsb.
+ *
+ * The index_count argument consists of two fields. Bits 63:48 contain #iotte
+ * and bits 47:0 contain iotsb_index
+ *
+ * The first mapping is created in the IOTSB index specified by iotsb_index.
+ * Subsequent mappings are  created at iotsb_index+1 and so on.
+ *
+ * The attributes of each mapping are defined by the argument iotte_attributes.
+ *
+ * The io_page_list_p specifies the real address of the 64-bit-aligned list of
+ * #iottes I/O page addresses. Each page address must be a properly aligned
+ * real address of a page to be mapped in the IOTSB. The first entry in the I/O
+ * page list contains the real address of the first page, the 2nd entry for the
+ * 2nd page, and so on.
+ *
+ * #iottes must be greater than zero.
+ *
+ * The return value #mapped is the actual number of mappings created, which may
+ * be less than or equal to the argument #iottes. If the function returns
+ * successfully with a #mapped value less than the requested #iottes then the
+ * caller should continue to invoke the service with updated iotsb_index,
+ * #iottes, and io_page_list_p arguments until all pages are mapped.
+ *
+ * This service must not be used to demap a mapping. In other words, all
+ * mappings must be valid and have  one or both of the RW attribute bits set.
+ *
+ * Note:
+ * It is implementation-defined whether I/O page real address validity checking
+ * is done at time mappings are established or deferred until they are
+ * accessed.
+ */
+#define HV_FAST_PCI_IOTSB_MAP          0x196
+
+/* pci_iotsb_map_one()
+ * TRAP:       HV_FAST_TRAP
+ * FUNCTION:   HV_FAST_PCI_IOTSB_MAP_ONE
+ * ARG0:       devhandle
+ * ARG1:       iotsb_handle
+ * ARG2:       iotsb_index
+ * ARG3:       iotte_attributes
+ * ARG4:       r_addr
+ * RET0:       status
+ * ERRORS:     EINVAL          Invalid devhandle,iotsb_handle, iotsb_index
+ *                             or iotte_attributes
+ *             EBADALIGN       Improperly aligned r_addr
+ *             ENORADDR        Invalid r_addr
+ *
+ * This service creates and flushes a single mapping in the IOTSB defined by the
+ * arguments devhandle, iotsb.
+ *
+ * The mapping for the page at r_addr is created at the IOTSB index specified by
+ * iotsb_index with  the attributes iotte_attributes.
+ *
+ * This service must not be used to demap a mapping. In other words, the mapping
+ * must be valid and have one or both of the RW attribute bits set.
+ *
+ * Note:
+ * It is implementation-defined whether I/O page real address validity checking
+ * is done at time mappings are established or deferred until they are
+ * accessed.
+ */
+#define HV_FAST_PCI_IOTSB_MAP_ONE      0x197
+
+/* pci_iotsb_demap()
+ * TRAP:       HV_FAST_TRAP
+ * FUNCTION:   HV_FAST_PCI_IOTSB_DEMAP
+ * ARG0:       devhandle
+ * ARG1:       iotsb_handle
+ * ARG2:       iotsb_index
+ * ARG3:       #iottes
+ * RET0:       status
+ * RET1:       #unmapped
+ * ERRORS:     EINVAL  Invalid devhandle, iotsb_handle, iotsb_index or #iottes
+ *
+ * This service unmaps and flushes up to #iottes mappings starting at index
+ * iotsb_index from the IOTSB defined by the arguments devhandle, iotsb.
+ *
+ * #iottes must be greater than zero.
+ *
+ * The actual number of IOTTEs unmapped is returned in #unmapped and may be less
+ * than or equal to the requested number of IOTTEs, #iottes.
+ *
+ * If #unmapped is less than #iottes, the caller should continue to invoke this
+ * service with updated iotsb_index and #iottes arguments until all pages are
+ * demapped.
+ */
+#define HV_FAST_PCI_IOTSB_DEMAP                0x198
+
+/* pci_iotsb_getmap()
+ * TRAP:       HV_FAST_TRAP
+ * FUNCTION:   HV_FAST_PCI_IOTSB_GETMAP
+ * ARG0:       devhandle
+ * ARG1:       iotsb_handle
+ * ARG2:       iotsb_index
+ * RET0:       status
+ * RET1:       r_addr
+ * RET2:       iotte_attributes
+ * ERRORS:     EINVAL  Invalid devhandle, iotsb_handle, or iotsb_index
+ *             ENOMAP  No mapping was found
+ *
+ * This service returns the mapping specified by index iotsb_index from the
+ * IOTSB defined by the arguments devhandle, iotsb.
+ *
+ * Upon success, the real address of the mapping shall be returned in
+ * r_addr and thethe IOTTE mapping attributes shall be returned in
+ * iotte_attributes.
+ *
+ * The return value iotte_attributes may not include optional features used in
+ * the call to create the  mapping.
+ */
+#define HV_FAST_PCI_IOTSB_GETMAP       0x199
+
+/* pci_iotsb_sync_mappings()
+ * TRAP:       HV_FAST_TRAP
+ * FUNCTION:   HV_FAST_PCI_IOTSB_SYNC_MAPPINGS
+ * ARG0:       devhandle
+ * ARG1:       iotsb_handle
+ * ARG2:       iotsb_index
+ * ARG3:       #iottes
+ * RET0:       status
+ * RET1:       #synced
+ * ERROS:      EINVAL  Invalid devhandle, iotsb_handle, iotsb_index, or #iottes
+ *
+ * This service synchronizes #iottes mappings starting at index iotsb_index in
+ * the IOTSB defined by the arguments devhandle, iotsb.
+ *
+ * #iottes must be greater than zero.
+ *
+ * The actual number of IOTTEs synchronized is returned in #synced, which may
+ * be less than or equal to the requested number, #iottes.
+ *
+ * Upon a successful return, #synced is less than #iottes, the caller should
+ * continue to invoke this service with updated iotsb_index and #iottes
+ * arguments until all pages are synchronized.
+ */
+#define HV_FAST_PCI_IOTSB_SYNC_MAPPINGS        0x19a
+
 /* Logical Domain Channel services.  */
 
 #define LDC_CHANNEL_DOWN               0
@@ -2993,6 +3335,7 @@ unsigned long sun4v_m7_set_perfreg(unsigned long reg_num,
 #define HV_GRP_SDIO                    0x0108
 #define HV_GRP_SDIO_ERR                        0x0109
 #define HV_GRP_REBOOT_DATA             0x0110
+#define HV_GRP_ATU                     0x0111
 #define HV_GRP_M7_PERF                 0x0114
 #define HV_GRP_NIAG_PERF               0x0200
 #define HV_GRP_FIRE_PERF               0x0201
index cd0d69fa7592e64d6ac564eafac8ee622ea7ccfc..f24f356f250376e0931a4d4bc5d6bc631f469a74 100644 (file)
@@ -24,8 +24,36 @@ struct iommu_arena {
        unsigned int    limit;
 };
 
+#define ATU_64_SPACE_SIZE 0x800000000 /* 32G */
+
+/* Data structures for SPARC ATU architecture */
+struct atu_iotsb {
+       void    *table;         /* IOTSB table base virtual addr*/
+       u64     ra;             /* IOTSB table real addr */
+       u64     dvma_size;      /* ranges[3].size or OS slected 32G size */
+       u64     dvma_base;      /* ranges[3].base */
+       u64     table_size;     /* IOTSB table size */
+       u64     page_size;      /* IO PAGE size for IOTSB */
+       u32     iotsb_num;      /* tsbnum is same as iotsb_handle */
+};
+
+struct atu_ranges {
+       u64     base;
+       u64     size;
+};
+
+struct atu {
+       struct  atu_ranges      *ranges;
+       struct  atu_iotsb       *iotsb;
+       struct  iommu_map_table tbl;
+       u64                     base;
+       u64                     size;
+       u64                     dma_addr_mask;
+};
+
 struct iommu {
        struct iommu_map_table  tbl;
+       struct atu              *atu;
        spinlock_t              lock;
        u32                     dma_addr_mask;
        iopte_t                 *page_table;
index d9c5876c61215494df0238992da09c03f5d82211..8011e79f59c96f3658e430765e6aa56caa400098 100644 (file)
@@ -134,7 +134,7 @@ static inline void arch_write_lock(arch_rwlock_t *rw)
        *(volatile __u32 *)&lp->lock = ~0U;
 }
 
-static void inline arch_write_unlock(arch_rwlock_t *lock)
+static inline void arch_write_unlock(arch_rwlock_t *lock)
 {
        __asm__ __volatile__(
 "      st              %%g0, [%0]"
index 87990b7c6b0d693eb4c715f33047e5d8cfa0e5c6..07c9f2e9bf57716eccabe5d55208dbc016b41881 100644 (file)
@@ -96,7 +96,7 @@ static inline void arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long fla
 
 /* Multi-reader locks, these are much saner than the 32-bit Sparc ones... */
 
-static void inline arch_read_lock(arch_rwlock_t *lock)
+static inline void arch_read_lock(arch_rwlock_t *lock)
 {
        unsigned long tmp1, tmp2;
 
@@ -119,7 +119,7 @@ static void inline arch_read_lock(arch_rwlock_t *lock)
        : "memory");
 }
 
-static int inline arch_read_trylock(arch_rwlock_t *lock)
+static inline int arch_read_trylock(arch_rwlock_t *lock)
 {
        int tmp1, tmp2;
 
@@ -140,7 +140,7 @@ static int inline arch_read_trylock(arch_rwlock_t *lock)
        return tmp1;
 }
 
-static void inline arch_read_unlock(arch_rwlock_t *lock)
+static inline void arch_read_unlock(arch_rwlock_t *lock)
 {
        unsigned long tmp1, tmp2;
 
@@ -156,7 +156,7 @@ static void inline arch_read_unlock(arch_rwlock_t *lock)
        : "memory");
 }
 
-static void inline arch_write_lock(arch_rwlock_t *lock)
+static inline void arch_write_lock(arch_rwlock_t *lock)
 {
        unsigned long mask, tmp1, tmp2;
 
@@ -181,7 +181,7 @@ static void inline arch_write_lock(arch_rwlock_t *lock)
        : "memory");
 }
 
-static void inline arch_write_unlock(arch_rwlock_t *lock)
+static inline void arch_write_unlock(arch_rwlock_t *lock)
 {
        __asm__ __volatile__(
 "      stw             %%g0, [%0]"
@@ -190,7 +190,7 @@ static void inline arch_write_unlock(arch_rwlock_t *lock)
        : "memory");
 }
 
-static int inline arch_write_trylock(arch_rwlock_t *lock)
+static inline int arch_write_trylock(arch_rwlock_t *lock)
 {
        unsigned long mask, tmp1, tmp2, result;
 
index 98b72a0c8e6e624d0ea6618e4f8ace5736369309..86f34be14ce0065671587ea5ca9558c5d3b8096d 100644 (file)
@@ -5,4 +5,38 @@
 #else
 #include <asm/string_32.h>
 #endif
+
+/* First the mem*() things. */
+#define __HAVE_ARCH_MEMMOVE
+void *memmove(void *, const void *, __kernel_size_t);
+
+#define __HAVE_ARCH_MEMCPY
+#define memcpy(t, f, n) __builtin_memcpy(t, f, n)
+
+#define __HAVE_ARCH_MEMSET
+#define memset(s, c, count) __builtin_memset(s, c, count)
+
+#define __HAVE_ARCH_MEMSCAN
+
+#define memscan(__arg0, __char, __arg2)                                                \
+({                                                                             \
+       void *__memscan_zero(void *, size_t);                                   \
+       void *__memscan_generic(void *, int, size_t);                           \
+       void *__retval, *__addr = (__arg0);                                     \
+       size_t __size = (__arg2);                                               \
+                                                                               \
+       if(__builtin_constant_p(__char) && !(__char))                           \
+               __retval = __memscan_zero(__addr, __size);                      \
+       else                                                                    \
+               __retval = __memscan_generic(__addr, (__char), __size);         \
+                                                                               \
+       __retval;                                                               \
+})
+
+#define __HAVE_ARCH_MEMCMP
+int memcmp(const void *,const void *,__kernel_size_t);
+
+#define __HAVE_ARCH_STRNCMP
+int strncmp(const char *, const char *, __kernel_size_t);
+
 #endif
index 69974e924611191ae448d8707affaec8ac4c3de9..649412476a6949b69d57e753fcb766ceb35f9960 100644 (file)
 
 #include <asm/page.h>
 
-/* Really, userland/ksyms should not see any of this stuff. */
-
-#ifdef __KERNEL__
-
-void __memmove(void *,const void *,__kernel_size_t);
-
-#ifndef EXPORT_SYMTAB_STROPS
-
-/* First the mem*() things. */
-#define __HAVE_ARCH_MEMMOVE
-#undef memmove
-#define memmove(_to, _from, _n) \
-({ \
-       void *_t = (_to); \
-       __memmove(_t, (_from), (_n)); \
-       _t; \
-})
-
-#define __HAVE_ARCH_MEMCPY
-#define memcpy(t, f, n) __builtin_memcpy(t, f, n)
-
-#define __HAVE_ARCH_MEMSET
-#define memset(s, c, count) __builtin_memset(s, c, count)
-
-#define __HAVE_ARCH_MEMSCAN
-
-#undef memscan
-#define memscan(__arg0, __char, __arg2)                                                \
-({                                                                             \
-       void *__memscan_zero(void *, size_t);                                   \
-       void *__memscan_generic(void *, int, size_t);                           \
-       void *__retval, *__addr = (__arg0);                                     \
-       size_t __size = (__arg2);                                               \
-                                                                               \
-       if(__builtin_constant_p(__char) && !(__char))                           \
-               __retval = __memscan_zero(__addr, __size);                      \
-       else                                                                    \
-               __retval = __memscan_generic(__addr, (__char), __size);         \
-                                                                               \
-       __retval;                                                               \
-})
-
-#define __HAVE_ARCH_MEMCMP
-int memcmp(const void *,const void *,__kernel_size_t);
-
-/* Now the str*() stuff... */
-#define __HAVE_ARCH_STRLEN
-__kernel_size_t strlen(const char *);
-
-#define __HAVE_ARCH_STRNCMP
-int strncmp(const char *, const char *, __kernel_size_t);
-
-#endif /* !EXPORT_SYMTAB_STROPS */
-
-#endif /* __KERNEL__ */
-
 #endif /* !(__SPARC_STRING_H__) */
index 5936b8ff3c050c15d873e2fc650185ede4912c1e..6b9ccb3086057a09feb565ade3c3f48859e3246f 100644 (file)
@@ -9,54 +9,10 @@
 #ifndef __SPARC64_STRING_H__
 #define __SPARC64_STRING_H__
 
-/* Really, userland/ksyms should not see any of this stuff. */
-
-#ifdef __KERNEL__
-
 #include <asm/asi.h>
 
-#ifndef EXPORT_SYMTAB_STROPS
-
-/* First the mem*() things. */
-#define __HAVE_ARCH_MEMMOVE
-void *memmove(void *, const void *, __kernel_size_t);
-
-#define __HAVE_ARCH_MEMCPY
-#define memcpy(t, f, n) __builtin_memcpy(t, f, n)
-
-#define __HAVE_ARCH_MEMSET
-#define memset(s, c, count) __builtin_memset(s, c, count)
-
-#define __HAVE_ARCH_MEMSCAN
-
-#undef memscan
-#define memscan(__arg0, __char, __arg2)                                        \
-({                                                                     \
-       void *__memscan_zero(void *, size_t);                           \
-       void *__memscan_generic(void *, int, size_t);                   \
-       void *__retval, *__addr = (__arg0);                             \
-       size_t __size = (__arg2);                                       \
-                                                                       \
-       if(__builtin_constant_p(__char) && !(__char))                   \
-               __retval = __memscan_zero(__addr, __size);              \
-       else                                                            \
-               __retval = __memscan_generic(__addr, (__char), __size); \
-                                                                       \
-       __retval;                                                       \
-})
-
-#define __HAVE_ARCH_MEMCMP
-int memcmp(const void *,const void *,__kernel_size_t);
-
 /* Now the str*() stuff... */
 #define __HAVE_ARCH_STRLEN
 __kernel_size_t strlen(const char *);
 
-#define __HAVE_ARCH_STRNCMP
-int strncmp(const char *, const char *, __kernel_size_t);
-
-#endif /* !EXPORT_SYMTAB_STROPS */
-
-#endif /* __KERNEL__ */
-
 #endif /* !(__SPARC64_STRING_H__) */
index bec481aaca1635999f8889a52c4dde68bc109014..7b4898a36eee8e09c125e3619188b9ba5916c6d4 100644 (file)
@@ -44,14 +44,20 @@ int __node_distance(int, int);
 #define topology_physical_package_id(cpu)      (cpu_data(cpu).proc_id)
 #define topology_core_id(cpu)                  (cpu_data(cpu).core_id)
 #define topology_core_cpumask(cpu)             (&cpu_core_sib_map[cpu])
+#define topology_core_cache_cpumask(cpu)       (&cpu_core_sib_cache_map[cpu])
 #define topology_sibling_cpumask(cpu)          (&per_cpu(cpu_sibling_map, cpu))
 #endif /* CONFIG_SMP */
 
 extern cpumask_t cpu_core_map[NR_CPUS];
 extern cpumask_t cpu_core_sib_map[NR_CPUS];
+extern cpumask_t cpu_core_sib_cache_map[NR_CPUS];
+
+/**
+ * Return cores that shares the last level cache.
+ */
 static inline const struct cpumask *cpu_coregroup_mask(int cpu)
 {
-        return &cpu_core_map[cpu];
+       return &cpu_core_sib_cache_map[cpu];
 }
 
 #endif /* _ASM_SPARC64_TOPOLOGY_H */
index b68acc563235cc4a4fbffc197ab9e2d107aaae11..5373136c412bf33814c9721c95f0a6738ee45ddf 100644 (file)
@@ -82,7 +82,6 @@ static inline int access_ok(int type, const void __user * addr, unsigned long si
        return 1;
 }
 
-void __ret_efault(void);
 void __retl_efault(void);
 
 /* Uh, these should become the main single-value transfer routines..
@@ -189,55 +188,34 @@ int __get_user_bad(void);
 unsigned long __must_check ___copy_from_user(void *to,
                                             const void __user *from,
                                             unsigned long size);
-unsigned long copy_from_user_fixup(void *to, const void __user *from,
-                                  unsigned long size);
 static inline unsigned long __must_check
 copy_from_user(void *to, const void __user *from, unsigned long size)
 {
-       unsigned long ret;
-
        check_object_size(to, size, false);
 
-       ret = ___copy_from_user(to, from, size);
-       if (unlikely(ret))
-               ret = copy_from_user_fixup(to, from, size);
-
-       return ret;
+       return ___copy_from_user(to, from, size);
 }
 #define __copy_from_user copy_from_user
 
 unsigned long __must_check ___copy_to_user(void __user *to,
                                           const void *from,
                                           unsigned long size);
-unsigned long copy_to_user_fixup(void __user *to, const void *from,
-                                unsigned long size);
 static inline unsigned long __must_check
 copy_to_user(void __user *to, const void *from, unsigned long size)
 {
-       unsigned long ret;
-
        check_object_size(from, size, true);
 
-       ret = ___copy_to_user(to, from, size);
-       if (unlikely(ret))
-               ret = copy_to_user_fixup(to, from, size);
-       return ret;
+       return ___copy_to_user(to, from, size);
 }
 #define __copy_to_user copy_to_user
 
 unsigned long __must_check ___copy_in_user(void __user *to,
                                           const void __user *from,
                                           unsigned long size);
-unsigned long copy_in_user_fixup(void __user *to, void __user *from,
-                                unsigned long size);
 static inline unsigned long __must_check
 copy_in_user(void __user *to, void __user *from, unsigned long size)
 {
-       unsigned long ret = ___copy_in_user(to, from, size);
-
-       if (unlikely(ret))
-               ret = copy_in_user_fixup(to, from, size);
-       return ret;
+       return ___copy_in_user(to, from, size);
 }
 #define __copy_in_user copy_in_user
 
index 31aede3af088034934c4205f2ab3bf7003956370..a25dc32f5d6a163c1b0e7b7ae775f43898ba4d58 100644 (file)
@@ -86,6 +86,8 @@
 
 #define SO_CNX_ADVICE          0x0037
 
+#define SCM_TIMESTAMPING_OPT_STATS     0x0038
+
 /* Security levels - as per NRL IPv6 - don't actually do anything */
 #define SO_SECURITY_AUTHENTICATION             0x5001
 #define SO_SECURITY_ENCRYPTION_TRANSPORT       0x5002
index fdb13327fded36a313b054783adf1654725d237f..fa3c02d411389e9f9ef118c8ccd8016c0a678559 100644 (file)
@@ -86,7 +86,7 @@ obj-y                     += auxio_$(BITS).o
 obj-$(CONFIG_SUN_PM)      += apc.o pmc.o
 
 obj-$(CONFIG_MODULES)     += module.o
-obj-$(CONFIG_MODULES)     += sparc_ksyms_$(BITS).o
+obj-$(CONFIG_MODULES)     += sparc_ksyms.o
 obj-$(CONFIG_SPARC_LED)   += led.o
 obj-$(CONFIG_KGDB)        += kgdb_$(BITS).o
 
index 07918ab3062e1a219ee456593044fb7ee61044e4..d85bdb9998193ce53d40e4108809de0241e90919 100644 (file)
@@ -29,6 +29,7 @@
 #include <asm/unistd.h>
 
 #include <asm/asmmacro.h>
+#include <asm/export.h>
 
 #define curptr      g6
 
@@ -1207,6 +1208,8 @@ delay_continue:
        
        ret
        restore
+EXPORT_SYMBOL(__udelay)
+EXPORT_SYMBOL(__ndelay)
 
        /* Handle a software breakpoint */
        /* We have to inform parent that child has stopped */
index 3d92c0a8f6c4928c95046eb108f5b9a9cc8c8bf8..7bb317b87ddeb4180a1381faa0c89140dc4b7e80 100644 (file)
@@ -24,6 +24,7 @@
 #include <asm/thread_info.h>   /* TI_UWINMASK */
 #include <asm/errno.h>
 #include <asm/pgtsrmmu.h>      /* SRMMU_PGDIR_SHIFT */
+#include <asm/export.h>
 
        .data
 /* The following are used with the prom_vector node-ops to figure out
@@ -60,6 +61,7 @@ sun4e_notsup:
  */
        .globl empty_zero_page
 empty_zero_page:       .skip PAGE_SIZE
+EXPORT_SYMBOL(empty_zero_page)
 
        .global root_flags
        .global ram_flags
@@ -813,3 +815,4 @@ lvl14_save:
 __ret_efault:
         ret
          restore %g0, -EFAULT, %o0
+EXPORT_SYMBOL(__ret_efault)
index a076b4249e622a4ebfe47eab6c9bcd7cb98b36ce..6aa3da152c20008a08e752c4f9708ff6a89e3d72 100644 (file)
@@ -32,7 +32,8 @@
 #include <asm/estate.h>
 #include <asm/sfafsr.h>
 #include <asm/unistd.h>
-       
+#include <asm/export.h>
+
 /* This section from from _start to sparc64_boot_end should fit into
  * 0x0000000000404000 to 0x0000000000408000.
  */
@@ -143,6 +144,7 @@ prom_cpu_compatible:
        .skip   64
 prom_root_node:
        .word   0
+EXPORT_SYMBOL(prom_root_node)
 prom_mmu_ihandle_cache:
        .word   0
 prom_boot_mapped_pc:
@@ -158,6 +160,7 @@ is_sun4v:
        .word   0
 sun4v_chip_type:
        .word   SUN4V_CHIP_INVALID
+EXPORT_SYMBOL(sun4v_chip_type)
 1:
        rd      %pc, %l0
 
@@ -920,49 +923,14 @@ swapper_4m_tsb:
        .globl  prom_tba, tlb_type
 prom_tba:      .xword  0
 tlb_type:      .word   0       /* Must NOT end up in BSS */
+EXPORT_SYMBOL(tlb_type)
        .section        ".fixup",#alloc,#execinstr
 
-       .globl  __ret_efault, __retl_efault, __ret_one, __retl_one
-ENTRY(__ret_efault)
-       ret
-        restore %g0, -EFAULT, %o0
-ENDPROC(__ret_efault)
-
 ENTRY(__retl_efault)
        retl
         mov    -EFAULT, %o0
 ENDPROC(__retl_efault)
 
-ENTRY(__retl_one)
-       retl
-        mov    1, %o0
-ENDPROC(__retl_one)
-
-ENTRY(__retl_one_fp)
-       VISExitHalf
-       retl
-        mov    1, %o0
-ENDPROC(__retl_one_fp)
-
-ENTRY(__ret_one_asi)
-       wr      %g0, ASI_AIUS, %asi
-       ret
-        restore %g0, 1, %o0
-ENDPROC(__ret_one_asi)
-
-ENTRY(__retl_one_asi)
-       wr      %g0, ASI_AIUS, %asi
-       retl
-        mov    1, %o0
-ENDPROC(__retl_one_asi)
-
-ENTRY(__retl_one_asi_fp)
-       wr      %g0, ASI_AIUS, %asi
-       VISExitHalf
-       retl
-        mov    1, %o0
-ENDPROC(__retl_one_asi_fp)
-
 ENTRY(__retl_o1)
        retl
         mov    %o1, %o0
index 314dd0c9fc5b24e5c8064f92520cc37e7cdd3c47..e4e5b832fcb6f9313b353400e45a840afbd29e26 100644 (file)
@@ -15,6 +15,7 @@ __flushw_user:
 2:     retl
         nop
        .size   __flushw_user,.-__flushw_user
+EXPORT_SYMBOL(__flushw_user)
 
        /* Flush %fp and %i7 to the stack for all register
         * windows active inside of the cpu.  This allows
@@ -61,3 +62,4 @@ real_hard_smp_processor_id:
        .size           hard_smp_processor_id,.-hard_smp_processor_id
 #endif
        .size           real_hard_smp_processor_id,.-real_hard_smp_processor_id
+EXPORT_SYMBOL_GPL(real_hard_smp_processor_id)
index 662500fa555f74160f6449143e6d0785af2643e9..267731234ce8a2ee2cdc03686377c11a2ea6205f 100644 (file)
@@ -39,6 +39,7 @@ static struct api_info api_table[] = {
        { .group = HV_GRP_SDIO,                                 },
        { .group = HV_GRP_SDIO_ERR,                             },
        { .group = HV_GRP_REBOOT_DATA,                          },
+       { .group = HV_GRP_ATU,          .flags = FLAG_PRE_API   },
        { .group = HV_GRP_NIAG_PERF,    .flags = FLAG_PRE_API   },
        { .group = HV_GRP_FIRE_PERF,                            },
        { .group = HV_GRP_N2_CPU,                               },
index d127130bf4246032d39cf923248fce7eebf92867..4116ee5c77913221593421836eaae0803115a028 100644 (file)
@@ -343,6 +343,7 @@ ENTRY(sun4v_mach_set_watchdog)
 0:     retl
         nop
 ENDPROC(sun4v_mach_set_watchdog)
+EXPORT_SYMBOL(sun4v_mach_set_watchdog)
 
        /* No inputs and does not return.  */
 ENTRY(sun4v_mach_sir)
@@ -776,6 +777,7 @@ ENTRY(sun4v_niagara_getperf)
        retl
         nop
 ENDPROC(sun4v_niagara_getperf)
+EXPORT_SYMBOL(sun4v_niagara_getperf)
 
 ENTRY(sun4v_niagara_setperf)
        mov     HV_FAST_SET_PERFREG, %o5
@@ -783,6 +785,7 @@ ENTRY(sun4v_niagara_setperf)
        retl
         nop
 ENDPROC(sun4v_niagara_setperf)
+EXPORT_SYMBOL(sun4v_niagara_setperf)
 
 ENTRY(sun4v_niagara2_getperf)
        mov     %o0, %o4
@@ -792,6 +795,7 @@ ENTRY(sun4v_niagara2_getperf)
        retl
         nop
 ENDPROC(sun4v_niagara2_getperf)
+EXPORT_SYMBOL(sun4v_niagara2_getperf)
 
 ENTRY(sun4v_niagara2_setperf)
        mov     HV_FAST_N2_SET_PERFREG, %o5
@@ -799,6 +803,7 @@ ENTRY(sun4v_niagara2_setperf)
        retl
         nop
 ENDPROC(sun4v_niagara2_setperf)
+EXPORT_SYMBOL(sun4v_niagara2_setperf)
 
 ENTRY(sun4v_reboot_data_set)
        mov     HV_FAST_REBOOT_DATA_SET, %o5
index 5c615abff030fdb6c26a7c68c86d5c3fa0544b94..852a3291db968986a0f77d240ccf7b58c3f9070a 100644 (file)
@@ -760,8 +760,12 @@ int dma_supported(struct device *dev, u64 device_mask)
        struct iommu *iommu = dev->archdata.iommu;
        u64 dma_addr_mask = iommu->dma_addr_mask;
 
-       if (device_mask >= (1UL << 32UL))
-               return 0;
+       if (device_mask > DMA_BIT_MASK(32)) {
+               if (iommu->atu)
+                       dma_addr_mask = iommu->atu->dma_addr_mask;
+               else
+                       return 0;
+       }
 
        if ((device_mask & dma_addr_mask) == dma_addr_mask)
                return 1;
index b40cec25290503b72dbf1d8c18e8f374ab51b44e..828493329f68d05e7ab1c47d6449a9f76adedddd 100644 (file)
@@ -13,7 +13,6 @@
 #include <linux/scatterlist.h>
 #include <linux/device.h>
 #include <linux/iommu-helper.h>
-#include <linux/scatterlist.h>
 
 #include <asm/iommu.h>
 
index 59bbeff550243dc10ea558a013d09825303e4793..07933b9e9ce00a34fc3677ee614a24ce2c0532a5 100644 (file)
 void arch_jump_label_transform(struct jump_entry *entry,
                               enum jump_label_type type)
 {
-       u32 val;
        u32 *insn = (u32 *) (unsigned long) entry->code;
+       u32 val;
 
        if (type == JUMP_LABEL_JMP) {
                s32 off = (s32)entry->target - (s32)entry->code;
+               bool use_v9_branch = false;
+
+               BUG_ON(off & 3);
 
 #ifdef CONFIG_SPARC64
-               /* ba,pt %xcc, . + (off << 2) */
-               val = 0x10680000 | ((u32) off >> 2);
-#else
-               /* ba . + (off << 2) */
-               val = 0x10800000 | ((u32) off >> 2);
+               if (off <= 0xfffff && off >= -0x100000)
+                       use_v9_branch = true;
 #endif
+               if (use_v9_branch) {
+                       /* WDISP19 - target is . + immed << 2 */
+                       /* ba,pt %xcc, . + off */
+                       val = 0x10680000 | (((u32) off >> 2) & 0x7ffff);
+               } else {
+                       /* WDISP22 - target is . + immed << 2 */
+                       BUG_ON(off > 0x7fffff);
+                       BUG_ON(off < -0x800000);
+                       /* ba . + off */
+                       val = 0x10800000 | (((u32) off >> 2) & 0x3fffff);
+               }
        } else {
                val = 0x01000000;
        }
index 11228861d9b4716dde53881c4b2184538dec6197..8a6982dfd7334fe1ff41cd830bbe320f05e25b30 100644 (file)
@@ -645,13 +645,20 @@ static void __mark_core_id(struct mdesc_handle *hp, u64 node,
                cpu_data(*id).core_id = core_id;
 }
 
-static void __mark_sock_id(struct mdesc_handle *hp, u64 node,
-                          int sock_id)
+static void __mark_max_cache_id(struct mdesc_handle *hp, u64 node,
+                               int max_cache_id)
 {
        const u64 *id = mdesc_get_property(hp, node, "id", NULL);
 
-       if (*id < num_possible_cpus())
-               cpu_data(*id).sock_id = sock_id;
+       if (*id < num_possible_cpus()) {
+               cpu_data(*id).max_cache_id = max_cache_id;
+
+               /**
+                * On systems without explicit socket descriptions socket
+                * is max_cache_id
+                */
+               cpu_data(*id).sock_id = max_cache_id;
+       }
 }
 
 static void mark_core_ids(struct mdesc_handle *hp, u64 mp,
@@ -660,10 +667,11 @@ static void mark_core_ids(struct mdesc_handle *hp, u64 mp,
        find_back_node_value(hp, mp, "cpu", __mark_core_id, core_id, 10);
 }
 
-static void mark_sock_ids(struct mdesc_handle *hp, u64 mp,
-                         int sock_id)
+static void mark_max_cache_ids(struct mdesc_handle *hp, u64 mp,
+                              int max_cache_id)
 {
-       find_back_node_value(hp, mp, "cpu", __mark_sock_id, sock_id, 10);
+       find_back_node_value(hp, mp, "cpu", __mark_max_cache_id,
+                            max_cache_id, 10);
 }
 
 static void set_core_ids(struct mdesc_handle *hp)
@@ -694,14 +702,15 @@ static void set_core_ids(struct mdesc_handle *hp)
        }
 }
 
-static int set_sock_ids_by_cache(struct mdesc_handle *hp, int level)
+static int set_max_cache_ids_by_cache(struct mdesc_handle *hp, int level)
 {
        u64 mp;
        int idx = 1;
        int fnd = 0;
 
-       /* Identify unique sockets by looking for cpus backpointed to by
-        * shared level n caches.
+       /**
+        * Identify unique highest level of shared cache by looking for cpus
+        * backpointed to by shared level N caches.
         */
        mdesc_for_each_node_by_name(hp, mp, "cache") {
                const u64 *cur_lvl;
@@ -709,8 +718,7 @@ static int set_sock_ids_by_cache(struct mdesc_handle *hp, int level)
                cur_lvl = mdesc_get_property(hp, mp, "level", NULL);
                if (*cur_lvl != level)
                        continue;
-
-               mark_sock_ids(hp, mp, idx);
+               mark_max_cache_ids(hp, mp, idx);
                idx++;
                fnd = 1;
        }
@@ -745,15 +753,17 @@ static void set_sock_ids(struct mdesc_handle *hp)
 {
        u64 mp;
 
-       /* If machine description exposes sockets data use it.
-        * Otherwise fallback to use shared L3 or L2 caches.
+       /**
+        * Find the highest level of shared cache which pre-T7 is also
+        * the socket.
         */
+       if (!set_max_cache_ids_by_cache(hp, 3))
+               set_max_cache_ids_by_cache(hp, 2);
+
+       /* If machine description exposes sockets data use it.*/
        mp = mdesc_node_by_name(hp, MDESC_NODE_NULL, "sockets");
        if (mp != MDESC_NODE_NULL)
-               return set_sock_ids_by_socket(hp, mp);
-
-       if (!set_sock_ids_by_cache(hp, 3))
-               set_sock_ids_by_cache(hp, 2);
+               set_sock_ids_by_socket(hp, mp);
 }
 
 static void mark_proc_ids(struct mdesc_handle *hp, u64 mp, int proc_id)
index db57d8acdc01cf52aeb1664acdc35fef77b3a21a..06981cc716b68022712dc3a872e611565e4e4817 100644 (file)
@@ -44,6 +44,9 @@ static struct vpci_version vpci_versions[] = {
        { .major = 1, .minor = 1 },
 };
 
+static unsigned long vatu_major = 1;
+static unsigned long vatu_minor = 1;
+
 #define PGLIST_NENTS   (PAGE_SIZE / sizeof(u64))
 
 struct iommu_batch {
@@ -69,34 +72,57 @@ static inline void iommu_batch_start(struct device *dev, unsigned long prot, uns
 }
 
 /* Interrupts must be disabled.  */
-static long iommu_batch_flush(struct iommu_batch *p)
+static long iommu_batch_flush(struct iommu_batch *p, u64 mask)
 {
        struct pci_pbm_info *pbm = p->dev->archdata.host_controller;
+       u64 *pglist = p->pglist;
+       u64 index_count;
        unsigned long devhandle = pbm->devhandle;
        unsigned long prot = p->prot;
        unsigned long entry = p->entry;
-       u64 *pglist = p->pglist;
        unsigned long npages = p->npages;
+       unsigned long iotsb_num;
+       unsigned long ret;
+       long num;
 
        /* VPCI maj=1, min=[0,1] only supports read and write */
        if (vpci_major < 2)
                prot &= (HV_PCI_MAP_ATTR_READ | HV_PCI_MAP_ATTR_WRITE);
 
        while (npages != 0) {
-               long num;
-
-               num = pci_sun4v_iommu_map(devhandle, HV_PCI_TSBID(0, entry),
-                                         npages, prot, __pa(pglist));
-               if (unlikely(num < 0)) {
-                       if (printk_ratelimit())
-                               printk("iommu_batch_flush: IOMMU map of "
-                                      "[%08lx:%08llx:%lx:%lx:%lx] failed with "
-                                      "status %ld\n",
-                                      devhandle, HV_PCI_TSBID(0, entry),
-                                      npages, prot, __pa(pglist), num);
-                       return -1;
+               if (mask <= DMA_BIT_MASK(32)) {
+                       num = pci_sun4v_iommu_map(devhandle,
+                                                 HV_PCI_TSBID(0, entry),
+                                                 npages,
+                                                 prot,
+                                                 __pa(pglist));
+                       if (unlikely(num < 0)) {
+                               pr_err_ratelimited("%s: IOMMU map of [%08lx:%08llx:%lx:%lx:%lx] failed with status %ld\n",
+                                                  __func__,
+                                                  devhandle,
+                                                  HV_PCI_TSBID(0, entry),
+                                                  npages, prot, __pa(pglist),
+                                                  num);
+                               return -1;
+                       }
+               } else {
+                       index_count = HV_PCI_IOTSB_INDEX_COUNT(npages, entry),
+                       iotsb_num = pbm->iommu->atu->iotsb->iotsb_num;
+                       ret = pci_sun4v_iotsb_map(devhandle,
+                                                 iotsb_num,
+                                                 index_count,
+                                                 prot,
+                                                 __pa(pglist),
+                                                 &num);
+                       if (unlikely(ret != HV_EOK)) {
+                               pr_err_ratelimited("%s: ATU map of [%08lx:%lx:%llx:%lx:%lx] failed with status %ld\n",
+                                                  __func__,
+                                                  devhandle, iotsb_num,
+                                                  index_count, prot,
+                                                  __pa(pglist), ret);
+                               return -1;
+                       }
                }
-
                entry += num;
                npages -= num;
                pglist += num;
@@ -108,19 +134,19 @@ static long iommu_batch_flush(struct iommu_batch *p)
        return 0;
 }
 
-static inline void iommu_batch_new_entry(unsigned long entry)
+static inline void iommu_batch_new_entry(unsigned long entry, u64 mask)
 {
        struct iommu_batch *p = this_cpu_ptr(&iommu_batch);
 
        if (p->entry + p->npages == entry)
                return;
        if (p->entry != ~0UL)
-               iommu_batch_flush(p);
+               iommu_batch_flush(p, mask);
        p->entry = entry;
 }
 
 /* Interrupts must be disabled.  */
-static inline long iommu_batch_add(u64 phys_page)
+static inline long iommu_batch_add(u64 phys_page, u64 mask)
 {
        struct iommu_batch *p = this_cpu_ptr(&iommu_batch);
 
@@ -128,28 +154,31 @@ static inline long iommu_batch_add(u64 phys_page)
 
        p->pglist[p->npages++] = phys_page;
        if (p->npages == PGLIST_NENTS)
-               return iommu_batch_flush(p);
+               return iommu_batch_flush(p, mask);
 
        return 0;
 }
 
 /* Interrupts must be disabled.  */
-static inline long iommu_batch_end(void)
+static inline long iommu_batch_end(u64 mask)
 {
        struct iommu_batch *p = this_cpu_ptr(&iommu_batch);
 
        BUG_ON(p->npages >= PGLIST_NENTS);
 
-       return iommu_batch_flush(p);
+       return iommu_batch_flush(p, mask);
 }
 
 static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
                                   dma_addr_t *dma_addrp, gfp_t gfp,
                                   unsigned long attrs)
 {
+       u64 mask;
        unsigned long flags, order, first_page, npages, n;
        unsigned long prot = 0;
        struct iommu *iommu;
+       struct atu *atu;
+       struct iommu_map_table *tbl;
        struct page *page;
        void *ret;
        long entry;
@@ -174,14 +203,21 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
        memset((char *)first_page, 0, PAGE_SIZE << order);
 
        iommu = dev->archdata.iommu;
+       atu = iommu->atu;
+
+       mask = dev->coherent_dma_mask;
+       if (mask <= DMA_BIT_MASK(32))
+               tbl = &iommu->tbl;
+       else
+               tbl = &atu->tbl;
 
-       entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages, NULL,
+       entry = iommu_tbl_range_alloc(dev, tbl, npages, NULL,
                                      (unsigned long)(-1), 0);
 
        if (unlikely(entry == IOMMU_ERROR_CODE))
                goto range_alloc_fail;
 
-       *dma_addrp = (iommu->tbl.table_map_base + (entry << IO_PAGE_SHIFT));
+       *dma_addrp = (tbl->table_map_base + (entry << IO_PAGE_SHIFT));
        ret = (void *) first_page;
        first_page = __pa(first_page);
 
@@ -193,12 +229,12 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
                          entry);
 
        for (n = 0; n < npages; n++) {
-               long err = iommu_batch_add(first_page + (n * PAGE_SIZE));
+               long err = iommu_batch_add(first_page + (n * PAGE_SIZE), mask);
                if (unlikely(err < 0L))
                        goto iommu_map_fail;
        }
 
-       if (unlikely(iommu_batch_end() < 0L))
+       if (unlikely(iommu_batch_end(mask) < 0L))
                goto iommu_map_fail;
 
        local_irq_restore(flags);
@@ -206,25 +242,71 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
        return ret;
 
 iommu_map_fail:
-       iommu_tbl_range_free(&iommu->tbl, *dma_addrp, npages, IOMMU_ERROR_CODE);
+       iommu_tbl_range_free(tbl, *dma_addrp, npages, IOMMU_ERROR_CODE);
 
 range_alloc_fail:
        free_pages(first_page, order);
        return NULL;
 }
 
-static void dma_4v_iommu_demap(void *demap_arg, unsigned long entry,
-                              unsigned long npages)
+unsigned long dma_4v_iotsb_bind(unsigned long devhandle,
+                               unsigned long iotsb_num,
+                               struct pci_bus *bus_dev)
+{
+       struct pci_dev *pdev;
+       unsigned long err;
+       unsigned int bus;
+       unsigned int device;
+       unsigned int fun;
+
+       list_for_each_entry(pdev, &bus_dev->devices, bus_list) {
+               if (pdev->subordinate) {
+                       /* No need to bind pci bridge */
+                       dma_4v_iotsb_bind(devhandle, iotsb_num,
+                                         pdev->subordinate);
+               } else {
+                       bus = bus_dev->number;
+                       device = PCI_SLOT(pdev->devfn);
+                       fun = PCI_FUNC(pdev->devfn);
+                       err = pci_sun4v_iotsb_bind(devhandle, iotsb_num,
+                                                  HV_PCI_DEVICE_BUILD(bus,
+                                                                      device,
+                                                                      fun));
+
+                       /* If bind fails for one device it is going to fail
+                        * for rest of the devices because we are sharing
+                        * IOTSB. So in case of failure simply return with
+                        * error.
+                        */
+                       if (err)
+                               return err;
+               }
+       }
+
+       return 0;
+}
+
+static void dma_4v_iommu_demap(struct device *dev, unsigned long devhandle,
+                              dma_addr_t dvma, unsigned long iotsb_num,
+                              unsigned long entry, unsigned long npages)
 {
-       u32 devhandle = *(u32 *)demap_arg;
        unsigned long num, flags;
+       unsigned long ret;
 
        local_irq_save(flags);
        do {
-               num = pci_sun4v_iommu_demap(devhandle,
-                                           HV_PCI_TSBID(0, entry),
-                                           npages);
-
+               if (dvma <= DMA_BIT_MASK(32)) {
+                       num = pci_sun4v_iommu_demap(devhandle,
+                                                   HV_PCI_TSBID(0, entry),
+                                                   npages);
+               } else {
+                       ret = pci_sun4v_iotsb_demap(devhandle, iotsb_num,
+                                                   entry, npages, &num);
+                       if (unlikely(ret != HV_EOK)) {
+                               pr_err_ratelimited("pci_iotsb_demap() failed with error: %ld\n",
+                                                  ret);
+                       }
+               }
                entry += num;
                npages -= num;
        } while (npages != 0);
@@ -236,16 +318,28 @@ static void dma_4v_free_coherent(struct device *dev, size_t size, void *cpu,
 {
        struct pci_pbm_info *pbm;
        struct iommu *iommu;
+       struct atu *atu;
+       struct iommu_map_table *tbl;
        unsigned long order, npages, entry;
+       unsigned long iotsb_num;
        u32 devhandle;
 
        npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT;
        iommu = dev->archdata.iommu;
        pbm = dev->archdata.host_controller;
+       atu = iommu->atu;
        devhandle = pbm->devhandle;
-       entry = ((dvma - iommu->tbl.table_map_base) >> IO_PAGE_SHIFT);
-       dma_4v_iommu_demap(&devhandle, entry, npages);
-       iommu_tbl_range_free(&iommu->tbl, dvma, npages, IOMMU_ERROR_CODE);
+
+       if (dvma <= DMA_BIT_MASK(32)) {
+               tbl = &iommu->tbl;
+               iotsb_num = 0; /* we don't care for legacy iommu */
+       } else {
+               tbl = &atu->tbl;
+               iotsb_num = atu->iotsb->iotsb_num;
+       }
+       entry = ((dvma - tbl->table_map_base) >> IO_PAGE_SHIFT);
+       dma_4v_iommu_demap(dev, devhandle, dvma, iotsb_num, entry, npages);
+       iommu_tbl_range_free(tbl, dvma, npages, IOMMU_ERROR_CODE);
        order = get_order(size);
        if (order < 10)
                free_pages((unsigned long)cpu, order);
@@ -257,13 +351,17 @@ static dma_addr_t dma_4v_map_page(struct device *dev, struct page *page,
                                  unsigned long attrs)
 {
        struct iommu *iommu;
+       struct atu *atu;
+       struct iommu_map_table *tbl;
+       u64 mask;
        unsigned long flags, npages, oaddr;
        unsigned long i, base_paddr;
-       u32 bus_addr, ret;
        unsigned long prot;
+       dma_addr_t bus_addr, ret;
        long entry;
 
        iommu = dev->archdata.iommu;
+       atu = iommu->atu;
 
        if (unlikely(direction == DMA_NONE))
                goto bad;
@@ -272,13 +370,19 @@ static dma_addr_t dma_4v_map_page(struct device *dev, struct page *page,
        npages = IO_PAGE_ALIGN(oaddr + sz) - (oaddr & IO_PAGE_MASK);
        npages >>= IO_PAGE_SHIFT;
 
-       entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages, NULL,
+       mask = *dev->dma_mask;
+       if (mask <= DMA_BIT_MASK(32))
+               tbl = &iommu->tbl;
+       else
+               tbl = &atu->tbl;
+
+       entry = iommu_tbl_range_alloc(dev, tbl, npages, NULL,
                                      (unsigned long)(-1), 0);
 
        if (unlikely(entry == IOMMU_ERROR_CODE))
                goto bad;
 
-       bus_addr = (iommu->tbl.table_map_base + (entry << IO_PAGE_SHIFT));
+       bus_addr = (tbl->table_map_base + (entry << IO_PAGE_SHIFT));
        ret = bus_addr | (oaddr & ~IO_PAGE_MASK);
        base_paddr = __pa(oaddr & IO_PAGE_MASK);
        prot = HV_PCI_MAP_ATTR_READ;
@@ -293,11 +397,11 @@ static dma_addr_t dma_4v_map_page(struct device *dev, struct page *page,
        iommu_batch_start(dev, prot, entry);
 
        for (i = 0; i < npages; i++, base_paddr += IO_PAGE_SIZE) {
-               long err = iommu_batch_add(base_paddr);
+               long err = iommu_batch_add(base_paddr, mask);
                if (unlikely(err < 0L))
                        goto iommu_map_fail;
        }
-       if (unlikely(iommu_batch_end() < 0L))
+       if (unlikely(iommu_batch_end(mask) < 0L))
                goto iommu_map_fail;
 
        local_irq_restore(flags);
@@ -310,7 +414,7 @@ bad:
        return DMA_ERROR_CODE;
 
 iommu_map_fail:
-       iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, IOMMU_ERROR_CODE);
+       iommu_tbl_range_free(tbl, bus_addr, npages, IOMMU_ERROR_CODE);
        return DMA_ERROR_CODE;
 }
 
@@ -320,7 +424,10 @@ static void dma_4v_unmap_page(struct device *dev, dma_addr_t bus_addr,
 {
        struct pci_pbm_info *pbm;
        struct iommu *iommu;
+       struct atu *atu;
+       struct iommu_map_table *tbl;
        unsigned long npages;
+       unsigned long iotsb_num;
        long entry;
        u32 devhandle;
 
@@ -332,14 +439,23 @@ static void dma_4v_unmap_page(struct device *dev, dma_addr_t bus_addr,
 
        iommu = dev->archdata.iommu;
        pbm = dev->archdata.host_controller;
+       atu = iommu->atu;
        devhandle = pbm->devhandle;
 
        npages = IO_PAGE_ALIGN(bus_addr + sz) - (bus_addr & IO_PAGE_MASK);
        npages >>= IO_PAGE_SHIFT;
        bus_addr &= IO_PAGE_MASK;
-       entry = (bus_addr - iommu->tbl.table_map_base) >> IO_PAGE_SHIFT;
-       dma_4v_iommu_demap(&devhandle, entry, npages);
-       iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, IOMMU_ERROR_CODE);
+
+       if (bus_addr <= DMA_BIT_MASK(32)) {
+               iotsb_num = 0; /* we don't care for legacy iommu */
+               tbl = &iommu->tbl;
+       } else {
+               iotsb_num = atu->iotsb->iotsb_num;
+               tbl = &atu->tbl;
+       }
+       entry = (bus_addr - tbl->table_map_base) >> IO_PAGE_SHIFT;
+       dma_4v_iommu_demap(dev, devhandle, bus_addr, iotsb_num, entry, npages);
+       iommu_tbl_range_free(tbl, bus_addr, npages, IOMMU_ERROR_CODE);
 }
 
 static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
@@ -353,12 +469,17 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
        unsigned long seg_boundary_size;
        int outcount, incount, i;
        struct iommu *iommu;
+       struct atu *atu;
+       struct iommu_map_table *tbl;
+       u64 mask;
        unsigned long base_shift;
        long err;
 
        BUG_ON(direction == DMA_NONE);
 
        iommu = dev->archdata.iommu;
+       atu = iommu->atu;
+
        if (nelems == 0 || !iommu)
                return 0;
        
@@ -384,7 +505,15 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
        max_seg_size = dma_get_max_seg_size(dev);
        seg_boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
                                  IO_PAGE_SIZE) >> IO_PAGE_SHIFT;
-       base_shift = iommu->tbl.table_map_base >> IO_PAGE_SHIFT;
+
+       mask = *dev->dma_mask;
+       if (mask <= DMA_BIT_MASK(32))
+               tbl = &iommu->tbl;
+       else
+               tbl = &atu->tbl;
+
+       base_shift = tbl->table_map_base >> IO_PAGE_SHIFT;
+
        for_each_sg(sglist, s, nelems, i) {
                unsigned long paddr, npages, entry, out_entry = 0, slen;
 
@@ -397,27 +526,26 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
                /* Allocate iommu entries for that segment */
                paddr = (unsigned long) SG_ENT_PHYS_ADDRESS(s);
                npages = iommu_num_pages(paddr, slen, IO_PAGE_SIZE);
-               entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages,
+               entry = iommu_tbl_range_alloc(dev, tbl, npages,
                                              &handle, (unsigned long)(-1), 0);
 
                /* Handle failure */
                if (unlikely(entry == IOMMU_ERROR_CODE)) {
-                       if (printk_ratelimit())
-                               printk(KERN_INFO "iommu_alloc failed, iommu %p paddr %lx"
-                                      " npages %lx\n", iommu, paddr, npages);
+                       pr_err_ratelimited("iommu_alloc failed, iommu %p paddr %lx npages %lx\n",
+                                          tbl, paddr, npages);
                        goto iommu_map_failed;
                }
 
-               iommu_batch_new_entry(entry);
+               iommu_batch_new_entry(entry, mask);
 
                /* Convert entry to a dma_addr_t */
-               dma_addr = iommu->tbl.table_map_base + (entry << IO_PAGE_SHIFT);
+               dma_addr = tbl->table_map_base + (entry << IO_PAGE_SHIFT);
                dma_addr |= (s->offset & ~IO_PAGE_MASK);
 
                /* Insert into HW table */
                paddr &= IO_PAGE_MASK;
                while (npages--) {
-                       err = iommu_batch_add(paddr);
+                       err = iommu_batch_add(paddr, mask);
                        if (unlikely(err < 0L))
                                goto iommu_map_failed;
                        paddr += IO_PAGE_SIZE;
@@ -452,7 +580,7 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
                dma_next = dma_addr + slen;
        }
 
-       err = iommu_batch_end();
+       err = iommu_batch_end(mask);
 
        if (unlikely(err < 0L))
                goto iommu_map_failed;
@@ -475,7 +603,7 @@ iommu_map_failed:
                        vaddr = s->dma_address & IO_PAGE_MASK;
                        npages = iommu_num_pages(s->dma_address, s->dma_length,
                                                 IO_PAGE_SIZE);
-                       iommu_tbl_range_free(&iommu->tbl, vaddr, npages,
+                       iommu_tbl_range_free(tbl, vaddr, npages,
                                             IOMMU_ERROR_CODE);
                        /* XXX demap? XXX */
                        s->dma_address = DMA_ERROR_CODE;
@@ -496,13 +624,16 @@ static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist,
        struct pci_pbm_info *pbm;
        struct scatterlist *sg;
        struct iommu *iommu;
+       struct atu *atu;
        unsigned long flags, entry;
+       unsigned long iotsb_num;
        u32 devhandle;
 
        BUG_ON(direction == DMA_NONE);
 
        iommu = dev->archdata.iommu;
        pbm = dev->archdata.host_controller;
+       atu = iommu->atu;
        devhandle = pbm->devhandle;
        
        local_irq_save(flags);
@@ -512,15 +643,24 @@ static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist,
                dma_addr_t dma_handle = sg->dma_address;
                unsigned int len = sg->dma_length;
                unsigned long npages;
-               struct iommu_map_table *tbl = &iommu->tbl;
+               struct iommu_map_table *tbl;
                unsigned long shift = IO_PAGE_SHIFT;
 
                if (!len)
                        break;
                npages = iommu_num_pages(dma_handle, len, IO_PAGE_SIZE);
+
+               if (dma_handle <= DMA_BIT_MASK(32)) {
+                       iotsb_num = 0; /* we don't care for legacy iommu */
+                       tbl = &iommu->tbl;
+               } else {
+                       iotsb_num = atu->iotsb->iotsb_num;
+                       tbl = &atu->tbl;
+               }
                entry = ((dma_handle - tbl->table_map_base) >> shift);
-               dma_4v_iommu_demap(&devhandle, entry, npages);
-               iommu_tbl_range_free(&iommu->tbl, dma_handle, npages,
+               dma_4v_iommu_demap(dev, devhandle, dma_handle, iotsb_num,
+                                  entry, npages);
+               iommu_tbl_range_free(tbl, dma_handle, npages,
                                     IOMMU_ERROR_CODE);
                sg = sg_next(sg);
        }
@@ -581,6 +721,132 @@ static unsigned long probe_existing_entries(struct pci_pbm_info *pbm,
        return cnt;
 }
 
+static int pci_sun4v_atu_alloc_iotsb(struct pci_pbm_info *pbm)
+{
+       struct atu *atu = pbm->iommu->atu;
+       struct atu_iotsb *iotsb;
+       void *table;
+       u64 table_size;
+       u64 iotsb_num;
+       unsigned long order;
+       unsigned long err;
+
+       iotsb = kzalloc(sizeof(*iotsb), GFP_KERNEL);
+       if (!iotsb) {
+               err = -ENOMEM;
+               goto out_err;
+       }
+       atu->iotsb = iotsb;
+
+       /* calculate size of IOTSB */
+       table_size = (atu->size / IO_PAGE_SIZE) * 8;
+       order = get_order(table_size);
+       table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
+       if (!table) {
+               err = -ENOMEM;
+               goto table_failed;
+       }
+       iotsb->table = table;
+       iotsb->ra = __pa(table);
+       iotsb->dvma_size = atu->size;
+       iotsb->dvma_base = atu->base;
+       iotsb->table_size = table_size;
+       iotsb->page_size = IO_PAGE_SIZE;
+
+       /* configure and register IOTSB with HV */
+       err = pci_sun4v_iotsb_conf(pbm->devhandle,
+                                  iotsb->ra,
+                                  iotsb->table_size,
+                                  iotsb->page_size,
+                                  iotsb->dvma_base,
+                                  &iotsb_num);
+       if (err) {
+               pr_err(PFX "pci_iotsb_conf failed error: %ld\n", err);
+               goto iotsb_conf_failed;
+       }
+       iotsb->iotsb_num = iotsb_num;
+
+       err = dma_4v_iotsb_bind(pbm->devhandle, iotsb_num, pbm->pci_bus);
+       if (err) {
+               pr_err(PFX "pci_iotsb_bind failed error: %ld\n", err);
+               goto iotsb_conf_failed;
+       }
+
+       return 0;
+
+iotsb_conf_failed:
+       free_pages((unsigned long)table, order);
+table_failed:
+       kfree(iotsb);
+out_err:
+       return err;
+}
+
+static int pci_sun4v_atu_init(struct pci_pbm_info *pbm)
+{
+       struct atu *atu = pbm->iommu->atu;
+       unsigned long err;
+       const u64 *ranges;
+       u64 map_size, num_iotte;
+       u64 dma_mask;
+       const u32 *page_size;
+       int len;
+
+       ranges = of_get_property(pbm->op->dev.of_node, "iommu-address-ranges",
+                                &len);
+       if (!ranges) {
+               pr_err(PFX "No iommu-address-ranges\n");
+               return -EINVAL;
+       }
+
+       page_size = of_get_property(pbm->op->dev.of_node, "iommu-pagesizes",
+                                   NULL);
+       if (!page_size) {
+               pr_err(PFX "No iommu-pagesizes\n");
+               return -EINVAL;
+       }
+
+       /* There are 4 iommu-address-ranges supported. Each range is pair of
+        * {base, size}. The ranges[0] and ranges[1] are 32bit address space
+        * while ranges[2] and ranges[3] are 64bit space.  We want to use 64bit
+        * address ranges to support 64bit addressing. Because 'size' for
+        * address ranges[2] and ranges[3] are same we can select either of
+        * ranges[2] or ranges[3] for mapping. However due to 'size' is too
+        * large for OS to allocate IOTSB we are using fix size 32G
+        * (ATU_64_SPACE_SIZE) which is more than enough for all PCIe devices
+        * to share.
+        */
+       atu->ranges = (struct atu_ranges *)ranges;
+       atu->base = atu->ranges[3].base;
+       atu->size = ATU_64_SPACE_SIZE;
+
+       /* Create IOTSB */
+       err = pci_sun4v_atu_alloc_iotsb(pbm);
+       if (err) {
+               pr_err(PFX "Error creating ATU IOTSB\n");
+               return err;
+       }
+
+       /* Create ATU iommu map.
+        * One bit represents one iotte in IOTSB table.
+        */
+       dma_mask = (roundup_pow_of_two(atu->size) - 1UL);
+       num_iotte = atu->size / IO_PAGE_SIZE;
+       map_size = num_iotte / 8;
+       atu->tbl.table_map_base = atu->base;
+       atu->dma_addr_mask = dma_mask;
+       atu->tbl.map = kzalloc(map_size, GFP_KERNEL);
+       if (!atu->tbl.map)
+               return -ENOMEM;
+
+       iommu_tbl_pool_init(&atu->tbl, num_iotte, IO_PAGE_SHIFT,
+                           NULL, false /* no large_pool */,
+                           0 /* default npools */,
+                           false /* want span boundary checking */);
+
+       return 0;
+}
+
 static int pci_sun4v_iommu_init(struct pci_pbm_info *pbm)
 {
        static const u32 vdma_default[] = { 0x80000000, 0x80000000 };
@@ -918,6 +1184,18 @@ static int pci_sun4v_pbm_init(struct pci_pbm_info *pbm,
 
        pci_sun4v_scan_bus(pbm, &op->dev);
 
+       /* if atu_init fails its not complete failure.
+        * we can still continue using legacy iommu.
+        */
+       if (pbm->iommu->atu) {
+               err = pci_sun4v_atu_init(pbm);
+               if (err) {
+                       kfree(pbm->iommu->atu);
+                       pbm->iommu->atu = NULL;
+                       pr_err(PFX "ATU init failed, err=%d\n", err);
+               }
+       }
+
        pbm->next = pci_pbm_root;
        pci_pbm_root = pbm;
 
@@ -931,8 +1209,10 @@ static int pci_sun4v_probe(struct platform_device *op)
        struct pci_pbm_info *pbm;
        struct device_node *dp;
        struct iommu *iommu;
+       struct atu *atu;
        u32 devhandle;
        int i, err = -ENODEV;
+       static bool hv_atu = true;
 
        dp = op->dev.of_node;
 
@@ -954,6 +1234,19 @@ static int pci_sun4v_probe(struct platform_device *op)
                pr_info(PFX "Registered hvapi major[%lu] minor[%lu]\n",
                        vpci_major, vpci_minor);
 
+               err = sun4v_hvapi_register(HV_GRP_ATU, vatu_major, &vatu_minor);
+               if (err) {
+                       /* don't return an error if we fail to register the
+                        * ATU group, but ATU hcalls won't be available.
+                        */
+                       hv_atu = false;
+                       pr_err(PFX "Could not register hvapi ATU err=%d\n",
+                              err);
+               } else {
+                       pr_info(PFX "Registered hvapi ATU major[%lu] minor[%lu]\n",
+                               vatu_major, vatu_minor);
+               }
+
                dma_ops = &sun4v_dma_ops;
        }
 
@@ -991,6 +1284,14 @@ static int pci_sun4v_probe(struct platform_device *op)
        }
 
        pbm->iommu = iommu;
+       iommu->atu = NULL;
+       if (hv_atu) {
+               atu = kzalloc(sizeof(*atu), GFP_KERNEL);
+               if (!atu)
+                       pr_err(PFX "Could not allocate atu\n");
+               else
+                       iommu->atu = atu;
+       }
 
        err = pci_sun4v_pbm_init(pbm, op, devhandle);
        if (err)
@@ -1001,6 +1302,7 @@ static int pci_sun4v_probe(struct platform_device *op)
        return 0;
 
 out_free_iommu:
+       kfree(iommu->atu);
        kfree(pbm->iommu);
 
 out_free_controller:
index 5642212390b2ea7911cea77b1fcc2f6c133f882b..22603a4e48bf1883d3bb72292ec23e10ee6673b3 100644 (file)
@@ -89,4 +89,25 @@ unsigned long pci_sun4v_msg_setvalid(unsigned long devhandle,
                                     unsigned long msinum,
                                     unsigned long valid);
 
+/* Sun4v HV IOMMU v2 APIs */
+unsigned long pci_sun4v_iotsb_conf(unsigned long devhandle,
+                                  unsigned long ra,
+                                  unsigned long table_size,
+                                  unsigned long page_size,
+                                  unsigned long dvma_base,
+                                  u64 *iotsb_num);
+unsigned long pci_sun4v_iotsb_bind(unsigned long devhandle,
+                                  unsigned long iotsb_num,
+                                  unsigned int pci_device);
+unsigned long pci_sun4v_iotsb_map(unsigned long devhandle,
+                                 unsigned long iotsb_num,
+                                 unsigned long iotsb_index_iottes,
+                                 unsigned long io_attributes,
+                                 unsigned long io_page_list_pa,
+                                 long *mapped);
+unsigned long pci_sun4v_iotsb_demap(unsigned long devhandle,
+                                   unsigned long iotsb_num,
+                                   unsigned long iotsb_index,
+                                   unsigned long iottes,
+                                   unsigned long *demapped);
 #endif /* !(_PCI_SUN4V_H) */
index e606d46c68159a9c773dfd571fb60b6659882055..578f09657916305b2aab785e2c0eca683863987a 100644 (file)
@@ -360,3 +360,71 @@ ENTRY(pci_sun4v_msg_setvalid)
         mov    %o0, %o0
 ENDPROC(pci_sun4v_msg_setvalid)
 
+       /*
+        * %o0: devhandle
+        * %o1: r_addr
+        * %o2: size
+        * %o3: pagesize
+        * %o4: virt
+        * %o5: &iotsb_num/&iotsb_handle
+        *
+        * returns %o0: status
+        *         %o1: iotsb_num/iotsb_handle
+        */
+ENTRY(pci_sun4v_iotsb_conf)
+       mov     %o5, %g1
+       mov     HV_FAST_PCI_IOTSB_CONF, %o5
+       ta      HV_FAST_TRAP
+       retl
+        stx    %o1, [%g1]
+ENDPROC(pci_sun4v_iotsb_conf)
+
+       /*
+        * %o0: devhandle
+        * %o1: iotsb_num/iotsb_handle
+        * %o2: pci_device
+        *
+        * returns %o0: status
+        */
+ENTRY(pci_sun4v_iotsb_bind)
+       mov     HV_FAST_PCI_IOTSB_BIND, %o5
+       ta      HV_FAST_TRAP
+       retl
+        nop
+ENDPROC(pci_sun4v_iotsb_bind)
+
+       /*
+        * %o0: devhandle
+        * %o1: iotsb_num/iotsb_handle
+        * %o2: index_count
+        * %o3: iotte_attributes
+        * %o4: io_page_list_p
+        * %o5: &mapped
+        *
+        * returns %o0: status
+        *         %o1: #mapped
+        */
+ENTRY(pci_sun4v_iotsb_map)
+       mov     %o5, %g1
+       mov     HV_FAST_PCI_IOTSB_MAP, %o5
+       ta      HV_FAST_TRAP
+       retl
+        stx    %o1, [%g1]
+ENDPROC(pci_sun4v_iotsb_map)
+
+       /*
+        * %o0: devhandle
+        * %o1: iotsb_num/iotsb_handle
+        * %o2: iotsb_index
+        * %o3: #iottes
+        * %o4: &demapped
+        *
+        * returns %o0: status
+        *         %o1: #demapped
+        */
+ENTRY(pci_sun4v_iotsb_demap)
+       mov     HV_FAST_PCI_IOTSB_DEMAP, %o5
+       ta      HV_FAST_TRAP
+       retl
+        stx    %o1, [%o4]
+ENDPROC(pci_sun4v_iotsb_demap)
index 9ddc4928a089b599568331792097c2bc35ea0be8..ac082dd8c67d5a4f1fbf527145b82bd251fbdc8d 100644 (file)
@@ -127,7 +127,8 @@ static int get_from_target(struct task_struct *target, unsigned long uaddr,
                if (copy_from_user(kbuf, (void __user *) uaddr, len))
                        return -EFAULT;
        } else {
-               int len2 = access_process_vm(target, uaddr, kbuf, len, 0);
+               int len2 = access_process_vm(target, uaddr, kbuf, len,
+                               FOLL_FORCE);
                if (len2 != len)
                        return -EFAULT;
        }
@@ -141,7 +142,8 @@ static int set_to_target(struct task_struct *target, unsigned long uaddr,
                if (copy_to_user((void __user *) uaddr, kbuf, len))
                        return -EFAULT;
        } else {
-               int len2 = access_process_vm(target, uaddr, kbuf, len, 1);
+               int len2 = access_process_vm(target, uaddr, kbuf, len,
+                               FOLL_FORCE | FOLL_WRITE);
                if (len2 != len)
                        return -EFAULT;
        }
@@ -505,7 +507,8 @@ static int genregs32_get(struct task_struct *target,
                                if (access_process_vm(target,
                                                      (unsigned long)
                                                      &reg_window[pos],
-                                                     k, sizeof(*k), 0)
+                                                     k, sizeof(*k),
+                                                     FOLL_FORCE)
                                    != sizeof(*k))
                                        return -EFAULT;
                                k++;
@@ -531,12 +534,14 @@ static int genregs32_get(struct task_struct *target,
                                if (access_process_vm(target,
                                                      (unsigned long)
                                                      &reg_window[pos],
-                                                     &reg, sizeof(reg), 0)
+                                                     &reg, sizeof(reg),
+                                                     FOLL_FORCE)
                                    != sizeof(reg))
                                        return -EFAULT;
                                if (access_process_vm(target,
                                                      (unsigned long) u,
-                                                     &reg, sizeof(reg), 1)
+                                                     &reg, sizeof(reg),
+                                                     FOLL_FORCE | FOLL_WRITE)
                                    != sizeof(reg))
                                        return -EFAULT;
                                pos++;
@@ -615,7 +620,8 @@ static int genregs32_set(struct task_struct *target,
                                                      (unsigned long)
                                                      &reg_window[pos],
                                                      (void *) k,
-                                                     sizeof(*k), 1)
+                                                     sizeof(*k),
+                                                     FOLL_FORCE | FOLL_WRITE)
                                    != sizeof(*k))
                                        return -EFAULT;
                                k++;
@@ -642,13 +648,15 @@ static int genregs32_set(struct task_struct *target,
                                if (access_process_vm(target,
                                                      (unsigned long)
                                                      u,
-                                                     &reg, sizeof(reg), 0)
+                                                     &reg, sizeof(reg),
+                                                     FOLL_FORCE)
                                    != sizeof(reg))
                                        return -EFAULT;
                                if (access_process_vm(target,
                                                      (unsigned long)
                                                      &reg_window[pos],
-                                                     &reg, sizeof(reg), 1)
+                                                     &reg, sizeof(reg),
+                                                     FOLL_FORCE | FOLL_WRITE)
                                    != sizeof(reg))
                                        return -EFAULT;
                                pos++;
index c3c12efe0bc004053fea6b49c2f34e51e1ffc32f..9c0c8fd0b2922cacd2ad6ad81f3238a707286d69 100644 (file)
@@ -89,7 +89,7 @@ asmlinkage void do_sigreturn(struct pt_regs *regs)
        sf = (struct signal_frame __user *) regs->u_regs[UREG_FP];
 
        /* 1. Make sure we are not getting garbage from the user */
-       if (!invalid_frame_pointer(sf, sizeof(*sf)))
+       if (invalid_frame_pointer(sf, sizeof(*sf)))
                goto segv_and_exit;
 
        if (get_user(ufp, &sf->info.si_regs.u_regs[UREG_FP]))
@@ -150,7 +150,7 @@ asmlinkage void do_rt_sigreturn(struct pt_regs *regs)
 
        synchronize_user_stack();
        sf = (struct rt_signal_frame __user *) regs->u_regs[UREG_FP];
-       if (!invalid_frame_pointer(sf, sizeof(*sf)))
+       if (invalid_frame_pointer(sf, sizeof(*sf)))
                goto segv;
 
        if (get_user(ufp, &sf->regs.u_regs[UREG_FP]))
index d3035ba6cd3181fb2ada3b4f6bbdcf80422a02a8..8182f7caf5b1faa0b5d3cdc3a767411da4ad9f2a 100644 (file)
@@ -63,9 +63,13 @@ cpumask_t cpu_core_map[NR_CPUS] __read_mostly =
 cpumask_t cpu_core_sib_map[NR_CPUS] __read_mostly = {
        [0 ... NR_CPUS-1] = CPU_MASK_NONE };
 
+cpumask_t cpu_core_sib_cache_map[NR_CPUS] __read_mostly = {
+       [0 ... NR_CPUS - 1] = CPU_MASK_NONE };
+
 EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
 EXPORT_SYMBOL(cpu_core_map);
 EXPORT_SYMBOL(cpu_core_sib_map);
+EXPORT_SYMBOL(cpu_core_sib_cache_map);
 
 static cpumask_t smp_commenced_mask;
 
@@ -1265,6 +1269,10 @@ void smp_fill_in_sib_core_maps(void)
                unsigned int j;
 
                for_each_present_cpu(j)  {
+                       if (cpu_data(i).max_cache_id ==
+                           cpu_data(j).max_cache_id)
+                               cpumask_set_cpu(j, &cpu_core_sib_cache_map[i]);
+
                        if (cpu_data(i).sock_id == cpu_data(j).sock_id)
                                cpumask_set_cpu(j, &cpu_core_sib_map[i]);
                }
diff --git a/arch/sparc/kernel/sparc_ksyms.c b/arch/sparc/kernel/sparc_ksyms.c
new file mode 100644 (file)
index 0000000..09aa69e
--- /dev/null
@@ -0,0 +1,12 @@
+/*
+ * arch/sparc/kernel/ksyms.c: Sparc specific ksyms support.
+ *
+ * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
+ * Copyright (C) 1996 Eddie C. Dost (ecd@skynet.be)
+ */
+
+#include <linux/init.h>
+#include <linux/export.h>
+
+/* This is needed only for drivers/sbus/char/openprom.c */
+EXPORT_SYMBOL(saved_command_line);
diff --git a/arch/sparc/kernel/sparc_ksyms_32.c b/arch/sparc/kernel/sparc_ksyms_32.c
deleted file mode 100644 (file)
index bf4ccb1..0000000
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * arch/sparc/kernel/ksyms.c: Sparc specific ksyms support.
- *
- * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
- * Copyright (C) 1996 Eddie C. Dost (ecd@skynet.be)
- */
-
-#include <linux/module.h>
-
-#include <asm/pgtable.h>
-#include <asm/uaccess.h>
-#include <asm/delay.h>
-#include <asm/head.h>
-#include <asm/dma.h>
-
-struct poll {
-       int fd;
-       short events;
-       short revents;
-};
-
-/* from entry.S */
-EXPORT_SYMBOL(__udelay);
-EXPORT_SYMBOL(__ndelay);
-
-/* from head_32.S */
-EXPORT_SYMBOL(__ret_efault);
-EXPORT_SYMBOL(empty_zero_page);
-
-/* Exporting a symbol from /init/main.c */
-EXPORT_SYMBOL(saved_command_line);
diff --git a/arch/sparc/kernel/sparc_ksyms_64.c b/arch/sparc/kernel/sparc_ksyms_64.c
deleted file mode 100644 (file)
index 9e034f2..0000000
+++ /dev/null
@@ -1,53 +0,0 @@
-/* arch/sparc64/kernel/sparc64_ksyms.c: Sparc64 specific ksyms support.
- *
- * Copyright (C) 1996, 2007 David S. Miller (davem@davemloft.net)
- * Copyright (C) 1996 Eddie C. Dost (ecd@skynet.be)
- * Copyright (C) 1999 Jakub Jelinek (jj@ultra.linux.cz)
- */
-
-#include <linux/export.h>
-#include <linux/pci.h>
-#include <linux/bitops.h>
-
-#include <asm/cpudata.h>
-#include <asm/uaccess.h>
-#include <asm/spitfire.h>
-#include <asm/oplib.h>
-#include <asm/hypervisor.h>
-#include <asm/cacheflush.h>
-
-struct poll {
-       int fd;
-       short events;
-       short revents;
-};
-
-/* from helpers.S */
-EXPORT_SYMBOL(__flushw_user);
-EXPORT_SYMBOL_GPL(real_hard_smp_processor_id);
-
-/* from head_64.S */
-EXPORT_SYMBOL(__ret_efault);
-EXPORT_SYMBOL(tlb_type);
-EXPORT_SYMBOL(sun4v_chip_type);
-EXPORT_SYMBOL(prom_root_node);
-
-/* from hvcalls.S */
-EXPORT_SYMBOL(sun4v_niagara_getperf);
-EXPORT_SYMBOL(sun4v_niagara_setperf);
-EXPORT_SYMBOL(sun4v_niagara2_getperf);
-EXPORT_SYMBOL(sun4v_niagara2_setperf);
-EXPORT_SYMBOL(sun4v_mach_set_watchdog);
-
-/* from hweight.S */
-EXPORT_SYMBOL(__arch_hweight8);
-EXPORT_SYMBOL(__arch_hweight16);
-EXPORT_SYMBOL(__arch_hweight32);
-EXPORT_SYMBOL(__arch_hweight64);
-
-/* from ffs_ffz.S */
-EXPORT_SYMBOL(ffs);
-EXPORT_SYMBOL(__ffs);
-
-/* Exporting a symbol from /init/main.c */
-EXPORT_SYMBOL(saved_command_line);
index b7d0bd6b14063bc1e62cfed3c0e32f0b43799396..69a439fa2fc1ac809a969d6651e9b9e0f9be11b9 100644 (file)
@@ -3,11 +3,11 @@
  * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
  */
 
-#define EX_LD(x)               \
+#define EX_LD(x,y)             \
 98:    x;                      \
        .section __ex_table,"a";\
        .align 4;               \
-       .word 98b, __retl_one;  \
+       .word 98b, y;           \
        .text;                  \
        .align 4;
 
index 780550e1afc74fd6efe38ef16f73a07ea021a8f2..9947427ce3549799b2e0c5592b3f972e516093ed 100644 (file)
@@ -3,11 +3,11 @@
  * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
  */
 
-#define EX_ST(x)               \
+#define EX_ST(x,y)             \
 98:    x;                      \
        .section __ex_table,"a";\
        .align 4;               \
-       .word 98b, __retl_one;  \
+       .word 98b, y;           \
        .text;                  \
        .align 4;
 
index 89358ee948516cf2ad84c60d1e96e021434bcd32..059ea24ad73dcd91b81ef920f3bb18a1475dceb1 100644 (file)
@@ -4,21 +4,18 @@
  */
 
 #ifdef __KERNEL__
+#include <linux/linkage.h>
 #define GLOBAL_SPARE   %g7
 #else
 #define GLOBAL_SPARE   %g5
 #endif
 
 #ifndef EX_LD
-#define EX_LD(x)       x
+#define EX_LD(x,y)     x
 #endif
 
 #ifndef EX_ST
-#define EX_ST(x)       x
-#endif
-
-#ifndef EX_RETVAL
-#define EX_RETVAL(x)   x
+#define EX_ST(x,y)     x
 #endif
 
 #ifndef LOAD
        .register       %g3,#scratch
 
        .text
+
+#ifndef EX_RETVAL
+#define EX_RETVAL(x)   x
+ENTRY(GEN_retl_o4_1)
+       add     %o4, %o2, %o4
+       retl
+        add    %o4, 1, %o0
+ENDPROC(GEN_retl_o4_1)
+ENTRY(GEN_retl_g1_8)
+       add     %g1, %o2, %g1
+       retl
+        add    %g1, 8, %o0
+ENDPROC(GEN_retl_g1_8)
+ENTRY(GEN_retl_o2_4)
+       retl
+        add    %o2, 4, %o0
+ENDPROC(GEN_retl_o2_4)
+ENTRY(GEN_retl_o2_1)
+       retl
+        add    %o2, 1, %o0
+ENDPROC(GEN_retl_o2_1)
+#endif
+
        .align          64
 
        .globl  FUNC_NAME
@@ -73,8 +93,8 @@ FUNC_NAME:    /* %o0=dst, %o1=src, %o2=len */
        sub             %g0, %o4, %o4
        sub             %o2, %o4, %o2
 1:     subcc           %o4, 1, %o4
-       EX_LD(LOAD(ldub, %o1, %g1))
-       EX_ST(STORE(stb, %g1, %o0))
+       EX_LD(LOAD(ldub, %o1, %g1),GEN_retl_o4_1)
+       EX_ST(STORE(stb, %g1, %o0),GEN_retl_o4_1)
        add             %o1, 1, %o1
        bne,pt          %XCC, 1b
        add             %o0, 1, %o0
@@ -82,8 +102,8 @@ FUNC_NAME:   /* %o0=dst, %o1=src, %o2=len */
        andn            %o2, 0x7, %g1
        sub             %o2, %g1, %o2
 1:     subcc           %g1, 0x8, %g1
-       EX_LD(LOAD(ldx, %o1, %g2))
-       EX_ST(STORE(stx, %g2, %o0))
+       EX_LD(LOAD(ldx, %o1, %g2),GEN_retl_g1_8)
+       EX_ST(STORE(stx, %g2, %o0),GEN_retl_g1_8)
        add             %o1, 0x8, %o1
        bne,pt          %XCC, 1b
         add            %o0, 0x8, %o0
@@ -100,8 +120,8 @@ FUNC_NAME:  /* %o0=dst, %o1=src, %o2=len */
 
 1:
        subcc           %o2, 4, %o2
-       EX_LD(LOAD(lduw, %o1, %g1))
-       EX_ST(STORE(stw, %g1, %o1 + %o3))
+       EX_LD(LOAD(lduw, %o1, %g1),GEN_retl_o2_4)
+       EX_ST(STORE(stw, %g1, %o1 + %o3),GEN_retl_o2_4)
        bgu,pt          %XCC, 1b
         add            %o1, 4, %o1
 
@@ -111,8 +131,8 @@ FUNC_NAME:  /* %o0=dst, %o1=src, %o2=len */
        .align          32
 90:
        subcc           %o2, 1, %o2
-       EX_LD(LOAD(ldub, %o1, %g1))
-       EX_ST(STORE(stb, %g1, %o1 + %o3))
+       EX_LD(LOAD(ldub, %o1, %g1),GEN_retl_o2_1)
+       EX_ST(STORE(stb, %g1, %o1 + %o3),GEN_retl_o2_1)
        bgu,pt          %XCC, 90b
         add            %o1, 1, %o1
        retl
index 3269b0234093bfdbd6b6dcd22388cdc65a627d0a..69912d2f8b54e903ef040b346371cc27204b9d15 100644 (file)
@@ -38,10 +38,9 @@ lib-$(CONFIG_SPARC64) +=  NG4patch.o NG4copy_page.o NG4clear_page.o NG4memset.o
 lib-$(CONFIG_SPARC64) += GENmemcpy.o GENcopy_from_user.o GENcopy_to_user.o
 lib-$(CONFIG_SPARC64) += GENpatch.o GENpage.o GENbzero.o
 
-lib-$(CONFIG_SPARC64) += copy_in_user.o user_fixup.o memmove.o
+lib-$(CONFIG_SPARC64) += copy_in_user.o memmove.o
 lib-$(CONFIG_SPARC64) += mcount.o ipcsum.o xor.o hweight.o ffs.o
 
 obj-$(CONFIG_SPARC64) += iomap.o
 obj-$(CONFIG_SPARC32) += atomic32.o ucmpdi2.o
-obj-y                 += ksyms.o
 obj-$(CONFIG_SPARC64) += PeeCeeI.o
index d5242b8c4f9495fe4241ee39de01255364e877af..b79a6998d87c82eaeb12f5c1c23ab8fcbc8b10de 100644 (file)
@@ -3,19 +3,19 @@
  * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
  */
 
-#define EX_LD(x)               \
+#define EX_LD(x,y)             \
 98:    x;                      \
        .section __ex_table,"a";\
        .align 4;               \
-       .word 98b, __retl_one_asi;\
+       .word 98b, y;           \
        .text;                  \
        .align 4;
 
-#define EX_LD_FP(x)            \
+#define EX_LD_FP(x,y)          \
 98:    x;                      \
        .section __ex_table,"a";\
        .align 4;               \
-       .word 98b, __retl_one_asi_fp;\
+       .word 98b, y##_fp;      \
        .text;                  \
        .align 4;
 
index 4e962d993b10cdff7677f8d51e61ad877901facc..dcec55f254ab214dc9aa959c4a575ed58f5a19d8 100644 (file)
@@ -3,19 +3,19 @@
  * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
  */
 
-#define EX_ST(x)               \
+#define EX_ST(x,y)             \
 98:    x;                      \
        .section __ex_table,"a";\
        .align 4;               \
-       .word 98b, __retl_one_asi;\
+       .word 98b, y;           \
        .text;                  \
        .align 4;
 
-#define EX_ST_FP(x)            \
+#define EX_ST_FP(x,y)          \
 98:    x;                      \
        .section __ex_table,"a";\
        .align 4;               \
-       .word 98b, __retl_one_asi_fp;\
+       .word 98b, y##_fp;      \
        .text;                  \
        .align 4;
 
index d5f585df2f3fc345c87f04fb1a420fa766816cb5..c629dbd121b6e4fe64494c62bcad656747f05ef2 100644 (file)
@@ -4,6 +4,7 @@
  */
 
 #ifdef __KERNEL__
+#include <linux/linkage.h>
 #include <asm/visasm.h>
 #include <asm/asi.h>
 #define GLOBAL_SPARE   %g7
 #endif
 
 #ifndef EX_LD
-#define EX_LD(x)       x
+#define EX_LD(x,y)     x
 #endif
 #ifndef EX_LD_FP
-#define EX_LD_FP(x)    x
+#define EX_LD_FP(x,y)  x
 #endif
 
 #ifndef EX_ST
-#define EX_ST(x)       x
+#define EX_ST(x,y)     x
 #endif
 #ifndef EX_ST_FP
-#define EX_ST_FP(x)    x
-#endif
-
-#ifndef EX_RETVAL
-#define EX_RETVAL(x)   x
+#define EX_ST_FP(x,y)  x
 #endif
 
 #ifndef LOAD
        fsrc2           %x6, %f12; \
        fsrc2           %x7, %f14;
 #define FREG_LOAD_1(base, x0) \
-       EX_LD_FP(LOAD(ldd, base + 0x00, %x0))
+       EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1)
 #define FREG_LOAD_2(base, x0, x1) \
-       EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
-       EX_LD_FP(LOAD(ldd, base + 0x08, %x1));
+       EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
+       EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1);
 #define FREG_LOAD_3(base, x0, x1, x2) \
-       EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
-       EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \
-       EX_LD_FP(LOAD(ldd, base + 0x10, %x2));
+       EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
+       EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
+       EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1);
 #define FREG_LOAD_4(base, x0, x1, x2, x3) \
-       EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
-       EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \
-       EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \
-       EX_LD_FP(LOAD(ldd, base + 0x18, %x3));
+       EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
+       EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
+       EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \
+       EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1);
 #define FREG_LOAD_5(base, x0, x1, x2, x3, x4) \
-       EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
-       EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \
-       EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \
-       EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \
-       EX_LD_FP(LOAD(ldd, base + 0x20, %x4));
+       EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
+       EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
+       EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \
+       EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \
+       EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1);
 #define FREG_LOAD_6(base, x0, x1, x2, x3, x4, x5) \
-       EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
-       EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \
-       EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \
-       EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \
-       EX_LD_FP(LOAD(ldd, base + 0x20, %x4)); \
-       EX_LD_FP(LOAD(ldd, base + 0x28, %x5));
+       EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
+       EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
+       EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \
+       EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \
+       EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1); \
+       EX_LD_FP(LOAD(ldd, base + 0x28, %x5), NG2_retl_o2_plus_g1);
 #define FREG_LOAD_7(base, x0, x1, x2, x3, x4, x5, x6) \
-       EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \
-       EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \
-       EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \
-       EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \
-       EX_LD_FP(LOAD(ldd, base + 0x20, %x4)); \
-       EX_LD_FP(LOAD(ldd, base + 0x28, %x5)); \
-       EX_LD_FP(LOAD(ldd, base + 0x30, %x6));
+       EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
+       EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
+       EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \
+       EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \
+       EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1); \
+       EX_LD_FP(LOAD(ldd, base + 0x28, %x5), NG2_retl_o2_plus_g1); \
+       EX_LD_FP(LOAD(ldd, base + 0x30, %x6), NG2_retl_o2_plus_g1);
 
        .register       %g2,#scratch
        .register       %g3,#scratch
 
        .text
+#ifndef EX_RETVAL
+#define EX_RETVAL(x)   x
+__restore_fp:
+       VISExitHalf
+__restore_asi:
+       retl
+        wr     %g0, ASI_AIUS, %asi
+ENTRY(NG2_retl_o2)
+       ba,pt   %xcc, __restore_asi
+        mov    %o2, %o0
+ENDPROC(NG2_retl_o2)
+ENTRY(NG2_retl_o2_plus_1)
+       ba,pt   %xcc, __restore_asi
+        add    %o2, 1, %o0
+ENDPROC(NG2_retl_o2_plus_1)
+ENTRY(NG2_retl_o2_plus_4)
+       ba,pt   %xcc, __restore_asi
+        add    %o2, 4, %o0
+ENDPROC(NG2_retl_o2_plus_4)
+ENTRY(NG2_retl_o2_plus_8)
+       ba,pt   %xcc, __restore_asi
+        add    %o2, 8, %o0
+ENDPROC(NG2_retl_o2_plus_8)
+ENTRY(NG2_retl_o2_plus_o4_plus_1)
+       add     %o4, 1, %o4
+       ba,pt   %xcc, __restore_asi
+        add    %o2, %o4, %o0
+ENDPROC(NG2_retl_o2_plus_o4_plus_1)
+ENTRY(NG2_retl_o2_plus_o4_plus_8)
+       add     %o4, 8, %o4
+       ba,pt   %xcc, __restore_asi
+        add    %o2, %o4, %o0
+ENDPROC(NG2_retl_o2_plus_o4_plus_8)
+ENTRY(NG2_retl_o2_plus_o4_plus_16)
+       add     %o4, 16, %o4
+       ba,pt   %xcc, __restore_asi
+        add    %o2, %o4, %o0
+ENDPROC(NG2_retl_o2_plus_o4_plus_16)
+ENTRY(NG2_retl_o2_plus_g1_fp)
+       ba,pt   %xcc, __restore_fp
+        add    %o2, %g1, %o0
+ENDPROC(NG2_retl_o2_plus_g1_fp)
+ENTRY(NG2_retl_o2_plus_g1_plus_64_fp)
+       add     %g1, 64, %g1
+       ba,pt   %xcc, __restore_fp
+        add    %o2, %g1, %o0
+ENDPROC(NG2_retl_o2_plus_g1_plus_64_fp)
+ENTRY(NG2_retl_o2_plus_g1_plus_1)
+       add     %g1, 1, %g1
+       ba,pt   %xcc, __restore_asi
+        add    %o2, %g1, %o0
+ENDPROC(NG2_retl_o2_plus_g1_plus_1)
+ENTRY(NG2_retl_o2_and_7_plus_o4)
+       and     %o2, 7, %o2
+       ba,pt   %xcc, __restore_asi
+        add    %o2, %o4, %o0
+ENDPROC(NG2_retl_o2_and_7_plus_o4)
+ENTRY(NG2_retl_o2_and_7_plus_o4_plus_8)
+       and     %o2, 7, %o2
+       add     %o4, 8, %o4
+       ba,pt   %xcc, __restore_asi
+        add    %o2, %o4, %o0
+ENDPROC(NG2_retl_o2_and_7_plus_o4_plus_8)
+#endif
+
        .align          64
 
        .globl  FUNC_NAME
@@ -230,8 +292,8 @@ FUNC_NAME:  /* %o0=dst, %o1=src, %o2=len */
        sub             %g0, %o4, %o4   ! bytes to align dst
        sub             %o2, %o4, %o2
 1:     subcc           %o4, 1, %o4
-       EX_LD(LOAD(ldub, %o1, %g1))
-       EX_ST(STORE(stb, %g1, %o0))
+       EX_LD(LOAD(ldub, %o1, %g1), NG2_retl_o2_plus_o4_plus_1)
+       EX_ST(STORE(stb, %g1, %o0), NG2_retl_o2_plus_o4_plus_1)
        add             %o1, 1, %o1
        bne,pt          %XCC, 1b
        add             %o0, 1, %o0
@@ -281,11 +343,11 @@ FUNC_NAME:        /* %o0=dst, %o1=src, %o2=len */
         nop
        /* fall through for 0 < low bits < 8 */
 110:   sub             %o4, 64, %g2
-       EX_LD_FP(LOAD_BLK(%g2, %f0))
-1:     EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
-       EX_LD_FP(LOAD_BLK(%o4, %f16))
+       EX_LD_FP(LOAD_BLK(%g2, %f0), NG2_retl_o2_plus_g1)
+1:     EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
+       EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
        FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f14, f16)
-       EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
+       EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
        FREG_MOVE_8(f16, f18, f20, f22, f24, f26, f28, f30)
        subcc           %g1, 64, %g1
        add             %o4, 64, %o4
@@ -296,10 +358,10 @@ FUNC_NAME:        /* %o0=dst, %o1=src, %o2=len */
 
 120:   sub             %o4, 56, %g2
        FREG_LOAD_7(%g2, f0, f2, f4, f6, f8, f10, f12)
-1:     EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
-       EX_LD_FP(LOAD_BLK(%o4, %f16))
+1:     EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
+       EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
        FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f16, f18)
-       EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
+       EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
        FREG_MOVE_7(f18, f20, f22, f24, f26, f28, f30)
        subcc           %g1, 64, %g1
        add             %o4, 64, %o4
@@ -310,10 +372,10 @@ FUNC_NAME:        /* %o0=dst, %o1=src, %o2=len */
 
 130:   sub             %o4, 48, %g2
        FREG_LOAD_6(%g2, f0, f2, f4, f6, f8, f10)
-1:     EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
-       EX_LD_FP(LOAD_BLK(%o4, %f16))
+1:     EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
+       EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
        FREG_FROB(f0, f2, f4, f6, f8, f10, f16, f18, f20)
-       EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
+       EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
        FREG_MOVE_6(f20, f22, f24, f26, f28, f30)
        subcc           %g1, 64, %g1
        add             %o4, 64, %o4
@@ -324,10 +386,10 @@ FUNC_NAME:        /* %o0=dst, %o1=src, %o2=len */
 
 140:   sub             %o4, 40, %g2
        FREG_LOAD_5(%g2, f0, f2, f4, f6, f8)
-1:     EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
-       EX_LD_FP(LOAD_BLK(%o4, %f16))
+1:     EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
+       EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
        FREG_FROB(f0, f2, f4, f6, f8, f16, f18, f20, f22)
-       EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
+       EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
        FREG_MOVE_5(f22, f24, f26, f28, f30)
        subcc           %g1, 64, %g1
        add             %o4, 64, %o4
@@ -338,10 +400,10 @@ FUNC_NAME:        /* %o0=dst, %o1=src, %o2=len */
 
 150:   sub             %o4, 32, %g2
        FREG_LOAD_4(%g2, f0, f2, f4, f6)
-1:     EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
-       EX_LD_FP(LOAD_BLK(%o4, %f16))
+1:     EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
+       EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
        FREG_FROB(f0, f2, f4, f6, f16, f18, f20, f22, f24)
-       EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
+       EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
        FREG_MOVE_4(f24, f26, f28, f30)
        subcc           %g1, 64, %g1
        add             %o4, 64, %o4
@@ -352,10 +414,10 @@ FUNC_NAME:        /* %o0=dst, %o1=src, %o2=len */
 
 160:   sub             %o4, 24, %g2
        FREG_LOAD_3(%g2, f0, f2, f4)
-1:     EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
-       EX_LD_FP(LOAD_BLK(%o4, %f16))
+1:     EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
+       EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
        FREG_FROB(f0, f2, f4, f16, f18, f20, f22, f24, f26)
-       EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
+       EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
        FREG_MOVE_3(f26, f28, f30)
        subcc           %g1, 64, %g1
        add             %o4, 64, %o4
@@ -366,10 +428,10 @@ FUNC_NAME:        /* %o0=dst, %o1=src, %o2=len */
 
 170:   sub             %o4, 16, %g2
        FREG_LOAD_2(%g2, f0, f2)
-1:     EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
-       EX_LD_FP(LOAD_BLK(%o4, %f16))
+1:     EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
+       EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
        FREG_FROB(f0, f2, f16, f18, f20, f22, f24, f26, f28)
-       EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
+       EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
        FREG_MOVE_2(f28, f30)
        subcc           %g1, 64, %g1
        add             %o4, 64, %o4
@@ -380,10 +442,10 @@ FUNC_NAME:        /* %o0=dst, %o1=src, %o2=len */
 
 180:   sub             %o4, 8, %g2
        FREG_LOAD_1(%g2, f0)
-1:     EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
-       EX_LD_FP(LOAD_BLK(%o4, %f16))
+1:     EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
+       EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
        FREG_FROB(f0, f16, f18, f20, f22, f24, f26, f28, f30)
-       EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
+       EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
        FREG_MOVE_1(f30)
        subcc           %g1, 64, %g1
        add             %o4, 64, %o4
@@ -393,10 +455,10 @@ FUNC_NAME:        /* %o0=dst, %o1=src, %o2=len */
         nop
 
 190:
-1:     EX_ST_FP(STORE_INIT(%g0, %o4 + %g3))
+1:     EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
        subcc           %g1, 64, %g1
-       EX_LD_FP(LOAD_BLK(%o4, %f0))
-       EX_ST_FP(STORE_BLK(%f0, %o4 + %g3))
+       EX_LD_FP(LOAD_BLK(%o4, %f0), NG2_retl_o2_plus_g1_plus_64)
+       EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1_plus_64)
        add             %o4, 64, %o4
        bne,pt          %xcc, 1b
         LOAD(prefetch, %o4 + 64, #one_read)
@@ -423,28 +485,28 @@ FUNC_NAME:        /* %o0=dst, %o1=src, %o2=len */
        andn            %o2, 0xf, %o4
        and             %o2, 0xf, %o2
 1:     subcc           %o4, 0x10, %o4
-       EX_LD(LOAD(ldx, %o1, %o5))
+       EX_LD(LOAD(ldx, %o1, %o5), NG2_retl_o2_plus_o4_plus_16)
        add             %o1, 0x08, %o1
-       EX_LD(LOAD(ldx, %o1, %g1))
+       EX_LD(LOAD(ldx, %o1, %g1), NG2_retl_o2_plus_o4_plus_16)
        sub             %o1, 0x08, %o1
-       EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE))
+       EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_o4_plus_16)
        add             %o1, 0x8, %o1
-       EX_ST(STORE(stx, %g1, %o1 + GLOBAL_SPARE))
+       EX_ST(STORE(stx, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_o4_plus_8)
        bgu,pt          %XCC, 1b
         add            %o1, 0x8, %o1
 73:    andcc           %o2, 0x8, %g0
        be,pt           %XCC, 1f
         nop
        sub             %o2, 0x8, %o2
-       EX_LD(LOAD(ldx, %o1, %o5))
-       EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE))
+       EX_LD(LOAD(ldx, %o1, %o5), NG2_retl_o2_plus_8)
+       EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_8)
        add             %o1, 0x8, %o1
 1:     andcc           %o2, 0x4, %g0
        be,pt           %XCC, 1f
         nop
        sub             %o2, 0x4, %o2
-       EX_LD(LOAD(lduw, %o1, %o5))
-       EX_ST(STORE(stw, %o5, %o1 + GLOBAL_SPARE))
+       EX_LD(LOAD(lduw, %o1, %o5), NG2_retl_o2_plus_4)
+       EX_ST(STORE(stw, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_4)
        add             %o1, 0x4, %o1
 1:     cmp             %o2, 0
        be,pt           %XCC, 85f
@@ -460,8 +522,8 @@ FUNC_NAME:  /* %o0=dst, %o1=src, %o2=len */
        sub             %o2, %g1, %o2
 
 1:     subcc           %g1, 1, %g1
-       EX_LD(LOAD(ldub, %o1, %o5))
-       EX_ST(STORE(stb, %o5, %o1 + GLOBAL_SPARE))
+       EX_LD(LOAD(ldub, %o1, %o5), NG2_retl_o2_plus_g1_plus_1)
+       EX_ST(STORE(stb, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_g1_plus_1)
        bgu,pt          %icc, 1b
         add            %o1, 1, %o1
 
@@ -477,16 +539,16 @@ FUNC_NAME:        /* %o0=dst, %o1=src, %o2=len */
 
 8:     mov             64, GLOBAL_SPARE
        andn            %o1, 0x7, %o1
-       EX_LD(LOAD(ldx, %o1, %g2))
+       EX_LD(LOAD(ldx, %o1, %g2), NG2_retl_o2)
        sub             GLOBAL_SPARE, %g1, GLOBAL_SPARE
        andn            %o2, 0x7, %o4
        sllx            %g2, %g1, %g2
 1:     add             %o1, 0x8, %o1
-       EX_LD(LOAD(ldx, %o1, %g3))
+       EX_LD(LOAD(ldx, %o1, %g3), NG2_retl_o2_and_7_plus_o4)
        subcc           %o4, 0x8, %o4
        srlx            %g3, GLOBAL_SPARE, %o5
        or              %o5, %g2, %o5
-       EX_ST(STORE(stx, %o5, %o0))
+       EX_ST(STORE(stx, %o5, %o0), NG2_retl_o2_and_7_plus_o4_plus_8)
        add             %o0, 0x8, %o0
        bgu,pt          %icc, 1b
         sllx           %g3, %g1, %g2
@@ -506,8 +568,8 @@ FUNC_NAME:  /* %o0=dst, %o1=src, %o2=len */
 
 1:
        subcc           %o2, 4, %o2
-       EX_LD(LOAD(lduw, %o1, %g1))
-       EX_ST(STORE(stw, %g1, %o1 + GLOBAL_SPARE))
+       EX_LD(LOAD(lduw, %o1, %g1), NG2_retl_o2_plus_4)
+       EX_ST(STORE(stw, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_4)
        bgu,pt          %XCC, 1b
         add            %o1, 4, %o1
 
@@ -517,8 +579,8 @@ FUNC_NAME:  /* %o0=dst, %o1=src, %o2=len */
        .align          32
 90:
        subcc           %o2, 1, %o2
-       EX_LD(LOAD(ldub, %o1, %g1))
-       EX_ST(STORE(stb, %g1, %o1 + GLOBAL_SPARE))
+       EX_LD(LOAD(ldub, %o1, %g1), NG2_retl_o2_plus_1)
+       EX_ST(STORE(stb, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_1)
        bgu,pt          %XCC, 90b
         add            %o1, 1, %o1
        retl
index 2e8ee7ad07a9ce06129cd63c4129ccade674ab77..16a286c1a52836ee92b2a7d6bac66d3341b0f905 100644 (file)
@@ -3,19 +3,19 @@
  * Copyright (C) 2012 David S. Miller (davem@davemloft.net)
  */
 
-#define EX_LD(x)               \
+#define EX_LD(x, y)            \
 98:    x;                      \
        .section __ex_table,"a";\
        .align 4;               \
-       .word 98b, __retl_one_asi;\
+       .word 98b, y;           \
        .text;                  \
        .align 4;
 
-#define EX_LD_FP(x)            \
+#define EX_LD_FP(x,y)          \
 98:    x;                      \
        .section __ex_table,"a";\
        .align 4;               \
-       .word 98b, __retl_one_asi_fp;\
+       .word 98b, y##_fp;      \
        .text;                  \
        .align 4;
 
index be0bf4590df8971ddf29a81153f05de7c0ef30da..6b0276ffc858c4777d95dffcff95f048dd340cc2 100644 (file)
@@ -3,19 +3,19 @@
  * Copyright (C) 2012 David S. Miller (davem@davemloft.net)
  */
 
-#define EX_ST(x)               \
+#define EX_ST(x,y)             \
 98:    x;                      \
        .section __ex_table,"a";\
        .align 4;               \
-       .word 98b, __retl_one_asi;\
+       .word 98b, y;           \
        .text;                  \
        .align 4;
 
-#define EX_ST_FP(x)            \
+#define EX_ST_FP(x,y)          \
 98:    x;                      \
        .section __ex_table,"a";\
        .align 4;               \
-       .word 98b, __retl_one_asi_fp;\
+       .word 98b, y##_fp;      \
        .text;                  \
        .align 4;
 
index 8e13ee1f4454ea2b6478d302a9a1048bfeb60aff..75bb93b1437f7f6f29ab17c96bc0b4c322f2a373 100644 (file)
@@ -4,6 +4,7 @@
  */
 
 #ifdef __KERNEL__
+#include <linux/linkage.h>
 #include <asm/visasm.h>
 #include <asm/asi.h>
 #define GLOBAL_SPARE   %g7
 #endif
 
 #ifndef EX_LD
-#define EX_LD(x)       x
+#define EX_LD(x,y)     x
 #endif
 #ifndef EX_LD_FP
-#define EX_LD_FP(x)    x
+#define EX_LD_FP(x,y)  x
 #endif
 
 #ifndef EX_ST
-#define EX_ST(x)       x
+#define EX_ST(x,y)     x
 #endif
 #ifndef EX_ST_FP
-#define EX_ST_FP(x)    x
+#define EX_ST_FP(x,y)  x
 #endif
 
-#ifndef EX_RETVAL
-#define EX_RETVAL(x)   x
-#endif
 
 #ifndef LOAD
 #define LOAD(type,addr,dest)   type [addr], dest
        .register       %g3,#scratch
 
        .text
+#ifndef EX_RETVAL
+#define EX_RETVAL(x)   x
+__restore_asi_fp:
+       VISExitHalf
+__restore_asi:
+       retl
+        wr     %g0, ASI_AIUS, %asi
+
+ENTRY(NG4_retl_o2)
+       ba,pt   %xcc, __restore_asi
+        mov    %o2, %o0
+ENDPROC(NG4_retl_o2)
+ENTRY(NG4_retl_o2_plus_1)
+       ba,pt   %xcc, __restore_asi
+        add    %o2, 1, %o0
+ENDPROC(NG4_retl_o2_plus_1)
+ENTRY(NG4_retl_o2_plus_4)
+       ba,pt   %xcc, __restore_asi
+        add    %o2, 4, %o0
+ENDPROC(NG4_retl_o2_plus_4)
+ENTRY(NG4_retl_o2_plus_o5)
+       ba,pt   %xcc, __restore_asi
+        add    %o2, %o5, %o0
+ENDPROC(NG4_retl_o2_plus_o5)
+ENTRY(NG4_retl_o2_plus_o5_plus_4)
+       add     %o5, 4, %o5
+       ba,pt   %xcc, __restore_asi
+        add    %o2, %o5, %o0
+ENDPROC(NG4_retl_o2_plus_o5_plus_4)
+ENTRY(NG4_retl_o2_plus_o5_plus_8)
+       add     %o5, 8, %o5
+       ba,pt   %xcc, __restore_asi
+        add    %o2, %o5, %o0
+ENDPROC(NG4_retl_o2_plus_o5_plus_8)
+ENTRY(NG4_retl_o2_plus_o5_plus_16)
+       add     %o5, 16, %o5
+       ba,pt   %xcc, __restore_asi
+        add    %o2, %o5, %o0
+ENDPROC(NG4_retl_o2_plus_o5_plus_16)
+ENTRY(NG4_retl_o2_plus_o5_plus_24)
+       add     %o5, 24, %o5
+       ba,pt   %xcc, __restore_asi
+        add    %o2, %o5, %o0
+ENDPROC(NG4_retl_o2_plus_o5_plus_24)
+ENTRY(NG4_retl_o2_plus_o5_plus_32)
+       add     %o5, 32, %o5
+       ba,pt   %xcc, __restore_asi
+        add    %o2, %o5, %o0
+ENDPROC(NG4_retl_o2_plus_o5_plus_32)
+ENTRY(NG4_retl_o2_plus_g1)
+       ba,pt   %xcc, __restore_asi
+        add    %o2, %g1, %o0
+ENDPROC(NG4_retl_o2_plus_g1)
+ENTRY(NG4_retl_o2_plus_g1_plus_1)
+       add     %g1, 1, %g1
+       ba,pt   %xcc, __restore_asi
+        add    %o2, %g1, %o0
+ENDPROC(NG4_retl_o2_plus_g1_plus_1)
+ENTRY(NG4_retl_o2_plus_g1_plus_8)
+       add     %g1, 8, %g1
+       ba,pt   %xcc, __restore_asi
+        add    %o2, %g1, %o0
+ENDPROC(NG4_retl_o2_plus_g1_plus_8)
+ENTRY(NG4_retl_o2_plus_o4)
+       ba,pt   %xcc, __restore_asi
+        add    %o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4)
+ENTRY(NG4_retl_o2_plus_o4_plus_8)
+       add     %o4, 8, %o4
+       ba,pt   %xcc, __restore_asi
+        add    %o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_8)
+ENTRY(NG4_retl_o2_plus_o4_plus_16)
+       add     %o4, 16, %o4
+       ba,pt   %xcc, __restore_asi
+        add    %o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_16)
+ENTRY(NG4_retl_o2_plus_o4_plus_24)
+       add     %o4, 24, %o4
+       ba,pt   %xcc, __restore_asi
+        add    %o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_24)
+ENTRY(NG4_retl_o2_plus_o4_plus_32)
+       add     %o4, 32, %o4
+       ba,pt   %xcc, __restore_asi
+        add    %o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_32)
+ENTRY(NG4_retl_o2_plus_o4_plus_40)
+       add     %o4, 40, %o4
+       ba,pt   %xcc, __restore_asi
+        add    %o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_40)
+ENTRY(NG4_retl_o2_plus_o4_plus_48)
+       add     %o4, 48, %o4
+       ba,pt   %xcc, __restore_asi
+        add    %o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_48)
+ENTRY(NG4_retl_o2_plus_o4_plus_56)
+       add     %o4, 56, %o4
+       ba,pt   %xcc, __restore_asi
+        add    %o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_56)
+ENTRY(NG4_retl_o2_plus_o4_plus_64)
+       add     %o4, 64, %o4
+       ba,pt   %xcc, __restore_asi
+        add    %o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_64)
+ENTRY(NG4_retl_o2_plus_o4_fp)
+       ba,pt   %xcc, __restore_asi_fp
+        add    %o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_fp)
+ENTRY(NG4_retl_o2_plus_o4_plus_8_fp)
+       add     %o4, 8, %o4
+       ba,pt   %xcc, __restore_asi_fp
+        add    %o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_8_fp)
+ENTRY(NG4_retl_o2_plus_o4_plus_16_fp)
+       add     %o4, 16, %o4
+       ba,pt   %xcc, __restore_asi_fp
+        add    %o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_16_fp)
+ENTRY(NG4_retl_o2_plus_o4_plus_24_fp)
+       add     %o4, 24, %o4
+       ba,pt   %xcc, __restore_asi_fp
+        add    %o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_24_fp)
+ENTRY(NG4_retl_o2_plus_o4_plus_32_fp)
+       add     %o4, 32, %o4
+       ba,pt   %xcc, __restore_asi_fp
+        add    %o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_32_fp)
+ENTRY(NG4_retl_o2_plus_o4_plus_40_fp)
+       add     %o4, 40, %o4
+       ba,pt   %xcc, __restore_asi_fp
+        add    %o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_40_fp)
+ENTRY(NG4_retl_o2_plus_o4_plus_48_fp)
+       add     %o4, 48, %o4
+       ba,pt   %xcc, __restore_asi_fp
+        add    %o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_48_fp)
+ENTRY(NG4_retl_o2_plus_o4_plus_56_fp)
+       add     %o4, 56, %o4
+       ba,pt   %xcc, __restore_asi_fp
+        add    %o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_56_fp)
+ENTRY(NG4_retl_o2_plus_o4_plus_64_fp)
+       add     %o4, 64, %o4
+       ba,pt   %xcc, __restore_asi_fp
+        add    %o2, %o4, %o0
+ENDPROC(NG4_retl_o2_plus_o4_plus_64_fp)
+#endif
        .align          64
 
        .globl  FUNC_NAME
@@ -124,12 +274,13 @@ FUNC_NAME:        /* %o0=dst, %o1=src, %o2=len */
        brz,pt          %g1, 51f
         sub            %o2, %g1, %o2
 
-1:     EX_LD(LOAD(ldub, %o1 + 0x00, %g2))
+
+1:     EX_LD(LOAD(ldub, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1)
        add             %o1, 1, %o1
        subcc           %g1, 1, %g1
        add             %o0, 1, %o0
        bne,pt          %icc, 1b
-        EX_ST(STORE(stb, %g2, %o0 - 0x01))
+        EX_ST(STORE(stb, %g2, %o0 - 0x01), NG4_retl_o2_plus_g1_plus_1)
 
 51:    LOAD(prefetch, %o1 + 0x040, #n_reads_strong)
        LOAD(prefetch, %o1 + 0x080, #n_reads_strong)
@@ -154,43 +305,43 @@ FUNC_NAME:        /* %o0=dst, %o1=src, %o2=len */
        brz,pt          %g1, .Llarge_aligned
         sub            %o2, %g1, %o2
 
-1:     EX_LD(LOAD(ldx, %o1 + 0x00, %g2))
+1:     EX_LD(LOAD(ldx, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1)
        add             %o1, 8, %o1
        subcc           %g1, 8, %g1
        add             %o0, 8, %o0
        bne,pt          %icc, 1b
-        EX_ST(STORE(stx, %g2, %o0 - 0x08))
+        EX_ST(STORE(stx, %g2, %o0 - 0x08), NG4_retl_o2_plus_g1_plus_8)
 
 .Llarge_aligned:
        /* len >= 0x80 && src 8-byte aligned && dest 8-byte aligned */
        andn            %o2, 0x3f, %o4
        sub             %o2, %o4, %o2
 
-1:     EX_LD(LOAD(ldx, %o1 + 0x00, %g1))
+1:     EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o4)
        add             %o1, 0x40, %o1
-       EX_LD(LOAD(ldx, %o1 - 0x38, %g2))
+       EX_LD(LOAD(ldx, %o1 - 0x38, %g2), NG4_retl_o2_plus_o4)
        subcc           %o4, 0x40, %o4
-       EX_LD(LOAD(ldx, %o1 - 0x30, %g3))
-       EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPARE))
-       EX_LD(LOAD(ldx, %o1 - 0x20, %o5))
-       EX_ST(STORE_INIT(%g1, %o0))
+       EX_LD(LOAD(ldx, %o1 - 0x30, %g3), NG4_retl_o2_plus_o4_plus_64)
+       EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPARE), NG4_retl_o2_plus_o4_plus_64)
+       EX_LD(LOAD(ldx, %o1 - 0x20, %o5), NG4_retl_o2_plus_o4_plus_64)
+       EX_ST(STORE_INIT(%g1, %o0), NG4_retl_o2_plus_o4_plus_64)
        add             %o0, 0x08, %o0
-       EX_ST(STORE_INIT(%g2, %o0))
+       EX_ST(STORE_INIT(%g2, %o0), NG4_retl_o2_plus_o4_plus_56)
        add             %o0, 0x08, %o0
-       EX_LD(LOAD(ldx, %o1 - 0x18, %g2))
-       EX_ST(STORE_INIT(%g3, %o0))
+       EX_LD(LOAD(ldx, %o1 - 0x18, %g2), NG4_retl_o2_plus_o4_plus_48)
+       EX_ST(STORE_INIT(%g3, %o0), NG4_retl_o2_plus_o4_plus_48)
        add             %o0, 0x08, %o0
-       EX_LD(LOAD(ldx, %o1 - 0x10, %g3))
-       EX_ST(STORE_INIT(GLOBAL_SPARE, %o0))
+       EX_LD(LOAD(ldx, %o1 - 0x10, %g3), NG4_retl_o2_plus_o4_plus_40)
+       EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), NG4_retl_o2_plus_o4_plus_40)
        add             %o0, 0x08, %o0
-       EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPARE))
-       EX_ST(STORE_INIT(%o5, %o0))
+       EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPARE), NG4_retl_o2_plus_o4_plus_32)
+       EX_ST(STORE_INIT(%o5, %o0), NG4_retl_o2_plus_o4_plus_32)
        add             %o0, 0x08, %o0
-       EX_ST(STORE_INIT(%g2, %o0))
+       EX_ST(STORE_INIT(%g2, %o0), NG4_retl_o2_plus_o4_plus_24)
        add             %o0, 0x08, %o0
-       EX_ST(STORE_INIT(%g3, %o0))
+       EX_ST(STORE_INIT(%g3, %o0), NG4_retl_o2_plus_o4_plus_16)
        add             %o0, 0x08, %o0
-       EX_ST(STORE_INIT(GLOBAL_SPARE, %o0))
+       EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), NG4_retl_o2_plus_o4_plus_8)
        add             %o0, 0x08, %o0
        bne,pt          %icc, 1b
         LOAD(prefetch, %o1 + 0x200, #n_reads_strong)
@@ -216,17 +367,17 @@ FUNC_NAME:        /* %o0=dst, %o1=src, %o2=len */
        sub             %o2, %o4, %o2
        alignaddr       %o1, %g0, %g1
        add             %o1, %o4, %o1
-       EX_LD_FP(LOAD(ldd, %g1 + 0x00, %f0))
-1:     EX_LD_FP(LOAD(ldd, %g1 + 0x08, %f2))
+       EX_LD_FP(LOAD(ldd, %g1 + 0x00, %f0), NG4_retl_o2_plus_o4)
+1:     EX_LD_FP(LOAD(ldd, %g1 + 0x08, %f2), NG4_retl_o2_plus_o4)
        subcc           %o4, 0x40, %o4
-       EX_LD_FP(LOAD(ldd, %g1 + 0x10, %f4))
-       EX_LD_FP(LOAD(ldd, %g1 + 0x18, %f6))
-       EX_LD_FP(LOAD(ldd, %g1 + 0x20, %f8))
-       EX_LD_FP(LOAD(ldd, %g1 + 0x28, %f10))
-       EX_LD_FP(LOAD(ldd, %g1 + 0x30, %f12))
-       EX_LD_FP(LOAD(ldd, %g1 + 0x38, %f14))
+       EX_LD_FP(LOAD(ldd, %g1 + 0x10, %f4), NG4_retl_o2_plus_o4_plus_64)
+       EX_LD_FP(LOAD(ldd, %g1 + 0x18, %f6), NG4_retl_o2_plus_o4_plus_64)
+       EX_LD_FP(LOAD(ldd, %g1 + 0x20, %f8), NG4_retl_o2_plus_o4_plus_64)
+       EX_LD_FP(LOAD(ldd, %g1 + 0x28, %f10), NG4_retl_o2_plus_o4_plus_64)
+       EX_LD_FP(LOAD(ldd, %g1 + 0x30, %f12), NG4_retl_o2_plus_o4_plus_64)
+       EX_LD_FP(LOAD(ldd, %g1 + 0x38, %f14), NG4_retl_o2_plus_o4_plus_64)
        faligndata      %f0, %f2, %f16
-       EX_LD_FP(LOAD(ldd, %g1 + 0x40, %f0))
+       EX_LD_FP(LOAD(ldd, %g1 + 0x40, %f0), NG4_retl_o2_plus_o4_plus_64)
        faligndata      %f2, %f4, %f18
        add             %g1, 0x40, %g1
        faligndata      %f4, %f6, %f20
@@ -235,14 +386,14 @@ FUNC_NAME:        /* %o0=dst, %o1=src, %o2=len */
        faligndata      %f10, %f12, %f26
        faligndata      %f12, %f14, %f28
        faligndata      %f14, %f0, %f30
-       EX_ST_FP(STORE(std, %f16, %o0 + 0x00))
-       EX_ST_FP(STORE(std, %f18, %o0 + 0x08))
-       EX_ST_FP(STORE(std, %f20, %o0 + 0x10))
-       EX_ST_FP(STORE(std, %f22, %o0 + 0x18))
-       EX_ST_FP(STORE(std, %f24, %o0 + 0x20))
-       EX_ST_FP(STORE(std, %f26, %o0 + 0x28))
-       EX_ST_FP(STORE(std, %f28, %o0 + 0x30))
-       EX_ST_FP(STORE(std, %f30, %o0 + 0x38))
+       EX_ST_FP(STORE(std, %f16, %o0 + 0x00), NG4_retl_o2_plus_o4_plus_64)
+       EX_ST_FP(STORE(std, %f18, %o0 + 0x08), NG4_retl_o2_plus_o4_plus_56)
+       EX_ST_FP(STORE(std, %f20, %o0 + 0x10), NG4_retl_o2_plus_o4_plus_48)
+       EX_ST_FP(STORE(std, %f22, %o0 + 0x18), NG4_retl_o2_plus_o4_plus_40)
+       EX_ST_FP(STORE(std, %f24, %o0 + 0x20), NG4_retl_o2_plus_o4_plus_32)
+       EX_ST_FP(STORE(std, %f26, %o0 + 0x28), NG4_retl_o2_plus_o4_plus_24)
+       EX_ST_FP(STORE(std, %f28, %o0 + 0x30), NG4_retl_o2_plus_o4_plus_16)
+       EX_ST_FP(STORE(std, %f30, %o0 + 0x38), NG4_retl_o2_plus_o4_plus_8)
        add             %o0, 0x40, %o0
        bne,pt          %icc, 1b
         LOAD(prefetch, %g1 + 0x200, #n_reads_strong)
@@ -270,37 +421,38 @@ FUNC_NAME:        /* %o0=dst, %o1=src, %o2=len */
        andncc          %o2, 0x20 - 1, %o5
        be,pn           %icc, 2f
         sub            %o2, %o5, %o2
-1:     EX_LD(LOAD(ldx, %o1 + 0x00, %g1))
-       EX_LD(LOAD(ldx, %o1 + 0x08, %g2))
-       EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPARE))
-       EX_LD(LOAD(ldx, %o1 + 0x18, %o4))
+1:     EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5)
+       EX_LD(LOAD(ldx, %o1 + 0x08, %g2), NG4_retl_o2_plus_o5)
+       EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPARE), NG4_retl_o2_plus_o5)
+       EX_LD(LOAD(ldx, %o1 + 0x18, %o4), NG4_retl_o2_plus_o5)
        add             %o1, 0x20, %o1
        subcc           %o5, 0x20, %o5
-       EX_ST(STORE(stx, %g1, %o0 + 0x00))
-       EX_ST(STORE(stx, %g2, %o0 + 0x08))
-       EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10))
-       EX_ST(STORE(stx, %o4, %o0 + 0x18))
+       EX_ST(STORE(stx, %g1, %o0 + 0x00), NG4_retl_o2_plus_o5_plus_32)
+       EX_ST(STORE(stx, %g2, %o0 + 0x08), NG4_retl_o2_plus_o5_plus_24)
+       EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10), NG4_retl_o2_plus_o5_plus_24)
+       EX_ST(STORE(stx, %o4, %o0 + 0x18), NG4_retl_o2_plus_o5_plus_8)
        bne,pt          %icc, 1b
         add            %o0, 0x20, %o0
 2:     andcc           %o2, 0x18, %o5
        be,pt           %icc, 3f
         sub            %o2, %o5, %o2
-1:     EX_LD(LOAD(ldx, %o1 + 0x00, %g1))
+
+1:     EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5)
        add             %o1, 0x08, %o1
        add             %o0, 0x08, %o0
        subcc           %o5, 0x08, %o5
        bne,pt          %icc, 1b
-        EX_ST(STORE(stx, %g1, %o0 - 0x08))
+        EX_ST(STORE(stx, %g1, %o0 - 0x08), NG4_retl_o2_plus_o5_plus_8)
 3:     brz,pt          %o2, .Lexit
         cmp            %o2, 0x04
        bl,pn           %icc, .Ltiny
         nop
-       EX_LD(LOAD(lduw, %o1 + 0x00, %g1))
+       EX_LD(LOAD(lduw, %o1 + 0x00, %g1), NG4_retl_o2)
        add             %o1, 0x04, %o1
        add             %o0, 0x04, %o0
        subcc           %o2, 0x04, %o2
        bne,pn          %icc, .Ltiny
-        EX_ST(STORE(stw, %g1, %o0 - 0x04))
+        EX_ST(STORE(stw, %g1, %o0 - 0x04), NG4_retl_o2_plus_4)
        ba,a,pt         %icc, .Lexit
 .Lmedium_unaligned:
        /* First get dest 8 byte aligned.  */
@@ -309,12 +461,12 @@ FUNC_NAME:        /* %o0=dst, %o1=src, %o2=len */
        brz,pt          %g1, 2f
         sub            %o2, %g1, %o2
 
-1:     EX_LD(LOAD(ldub, %o1 + 0x00, %g2))
+1:     EX_LD(LOAD(ldub, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1)
        add             %o1, 1, %o1
        subcc           %g1, 1, %g1
        add             %o0, 1, %o0
        bne,pt          %icc, 1b
-        EX_ST(STORE(stb, %g2, %o0 - 0x01))
+        EX_ST(STORE(stb, %g2, %o0 - 0x01), NG4_retl_o2_plus_g1_plus_1)
 2:
        and             %o1, 0x7, %g1
        brz,pn          %g1, .Lmedium_noprefetch
@@ -322,16 +474,16 @@ FUNC_NAME:        /* %o0=dst, %o1=src, %o2=len */
        mov             64, %g2
        sub             %g2, %g1, %g2
        andn            %o1, 0x7, %o1
-       EX_LD(LOAD(ldx, %o1 + 0x00, %o4))
+       EX_LD(LOAD(ldx, %o1 + 0x00, %o4), NG4_retl_o2)
        sllx            %o4, %g1, %o4
        andn            %o2, 0x08 - 1, %o5
        sub             %o2, %o5, %o2
-1:     EX_LD(LOAD(ldx, %o1 + 0x08, %g3))
+1:     EX_LD(LOAD(ldx, %o1 + 0x08, %g3), NG4_retl_o2_plus_o5)
        add             %o1, 0x08, %o1
        subcc           %o5, 0x08, %o5
        srlx            %g3, %g2, GLOBAL_SPARE
        or              GLOBAL_SPARE, %o4, GLOBAL_SPARE
-       EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x00))
+       EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x00), NG4_retl_o2_plus_o5_plus_8)
        add             %o0, 0x08, %o0
        bne,pt          %icc, 1b
         sllx           %g3, %g1, %o4
@@ -342,17 +494,17 @@ FUNC_NAME:        /* %o0=dst, %o1=src, %o2=len */
        ba,pt           %icc, .Lsmall_unaligned
 
 .Ltiny:
-       EX_LD(LOAD(ldub, %o1 + 0x00, %g1))
+       EX_LD(LOAD(ldub, %o1 + 0x00, %g1), NG4_retl_o2)
        subcc           %o2, 1, %o2
        be,pn           %icc, .Lexit
-        EX_ST(STORE(stb, %g1, %o0 + 0x00))
-       EX_LD(LOAD(ldub, %o1 + 0x01, %g1))
+        EX_ST(STORE(stb, %g1, %o0 + 0x00), NG4_retl_o2_plus_1)
+       EX_LD(LOAD(ldub, %o1 + 0x01, %g1), NG4_retl_o2)
        subcc           %o2, 1, %o2
        be,pn           %icc, .Lexit
-        EX_ST(STORE(stb, %g1, %o0 + 0x01))
-       EX_LD(LOAD(ldub, %o1 + 0x02, %g1))
+        EX_ST(STORE(stb, %g1, %o0 + 0x01), NG4_retl_o2_plus_1)
+       EX_LD(LOAD(ldub, %o1 + 0x02, %g1), NG4_retl_o2)
        ba,pt           %icc, .Lexit
-        EX_ST(STORE(stb, %g1, %o0 + 0x02))
+        EX_ST(STORE(stb, %g1, %o0 + 0x02), NG4_retl_o2)
 
 .Lsmall:
        andcc           %g2, 0x3, %g0
@@ -360,22 +512,22 @@ FUNC_NAME:        /* %o0=dst, %o1=src, %o2=len */
         andn           %o2, 0x4 - 1, %o5
        sub             %o2, %o5, %o2
 1:
-       EX_LD(LOAD(lduw, %o1 + 0x00, %g1))
+       EX_LD(LOAD(lduw, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5)
        add             %o1, 0x04, %o1
        subcc           %o5, 0x04, %o5
        add             %o0, 0x04, %o0
        bne,pt          %icc, 1b
-        EX_ST(STORE(stw, %g1, %o0 - 0x04))
+        EX_ST(STORE(stw, %g1, %o0 - 0x04), NG4_retl_o2_plus_o5_plus_4)
        brz,pt          %o2, .Lexit
         nop
        ba,a,pt         %icc, .Ltiny
 
 .Lsmall_unaligned:
-1:     EX_LD(LOAD(ldub, %o1 + 0x00, %g1))
+1:     EX_LD(LOAD(ldub, %o1 + 0x00, %g1), NG4_retl_o2)
        add             %o1, 1, %o1
        add             %o0, 1, %o0
        subcc           %o2, 1, %o2
        bne,pt          %icc, 1b
-        EX_ST(STORE(stb, %g1, %o0 - 0x01))
+        EX_ST(STORE(stb, %g1, %o0 - 0x01), NG4_retl_o2_plus_1)
        ba,a,pt         %icc, .Lexit
        .size           FUNC_NAME, .-FUNC_NAME
index 5d1e4d1ac21edf09a664663dc005e1fcae805b6a..9cd42fcbc781152ca95e3c8051962ea7de1538a9 100644 (file)
@@ -3,11 +3,11 @@
  * Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net)
  */
 
-#define EX_LD(x)               \
+#define EX_LD(x,y)             \
 98:    x;                      \
        .section __ex_table,"a";\
        .align 4;               \
-       .word 98b, __ret_one_asi;\
+       .word 98b, y;           \
        .text;                  \
        .align 4;
 
index ff630dcb273c9649de6fc9e145fce42e72667787..5c358afd464e24f3513cb6716f2983ca9f201dff 100644 (file)
@@ -3,11 +3,11 @@
  * Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net)
  */
 
-#define EX_ST(x)               \
+#define EX_ST(x,y)             \
 98:    x;                      \
        .section __ex_table,"a";\
        .align 4;               \
-       .word 98b, __ret_one_asi;\
+       .word 98b, y;           \
        .text;                  \
        .align 4;
 
index 96a14caf6966282cab2d97071f7c3837b2a6727a..d88c4ed50a0023cd8e2daa06689abc0d46d79f46 100644 (file)
@@ -4,6 +4,7 @@
  */
 
 #ifdef __KERNEL__
+#include <linux/linkage.h>
 #include <asm/asi.h>
 #include <asm/thread_info.h>
 #define GLOBAL_SPARE   %g7
 #endif
 
 #ifndef EX_LD
-#define EX_LD(x)       x
+#define EX_LD(x,y)     x
 #endif
 
 #ifndef EX_ST
-#define EX_ST(x)       x
-#endif
-
-#ifndef EX_RETVAL
-#define EX_RETVAL(x)   x
+#define EX_ST(x,y)     x
 #endif
 
 #ifndef LOAD
        .register       %g3,#scratch
 
        .text
+#ifndef EX_RETVAL
+#define EX_RETVAL(x)   x
+__restore_asi:
+       ret
+       wr      %g0, ASI_AIUS, %asi
+        restore
+ENTRY(NG_ret_i2_plus_i4_plus_1)
+       ba,pt   %xcc, __restore_asi
+        add    %i2, %i5, %i0
+ENDPROC(NG_ret_i2_plus_i4_plus_1)
+ENTRY(NG_ret_i2_plus_g1)
+       ba,pt   %xcc, __restore_asi
+        add    %i2, %g1, %i0
+ENDPROC(NG_ret_i2_plus_g1)
+ENTRY(NG_ret_i2_plus_g1_minus_8)
+       sub     %g1, 8, %g1
+       ba,pt   %xcc, __restore_asi
+        add    %i2, %g1, %i0
+ENDPROC(NG_ret_i2_plus_g1_minus_8)
+ENTRY(NG_ret_i2_plus_g1_minus_16)
+       sub     %g1, 16, %g1
+       ba,pt   %xcc, __restore_asi
+        add    %i2, %g1, %i0
+ENDPROC(NG_ret_i2_plus_g1_minus_16)
+ENTRY(NG_ret_i2_plus_g1_minus_24)
+       sub     %g1, 24, %g1
+       ba,pt   %xcc, __restore_asi
+        add    %i2, %g1, %i0
+ENDPROC(NG_ret_i2_plus_g1_minus_24)
+ENTRY(NG_ret_i2_plus_g1_minus_32)
+       sub     %g1, 32, %g1
+       ba,pt   %xcc, __restore_asi
+        add    %i2, %g1, %i0
+ENDPROC(NG_ret_i2_plus_g1_minus_32)
+ENTRY(NG_ret_i2_plus_g1_minus_40)
+       sub     %g1, 40, %g1
+       ba,pt   %xcc, __restore_asi
+        add    %i2, %g1, %i0
+ENDPROC(NG_ret_i2_plus_g1_minus_40)
+ENTRY(NG_ret_i2_plus_g1_minus_48)
+       sub     %g1, 48, %g1
+       ba,pt   %xcc, __restore_asi
+        add    %i2, %g1, %i0
+ENDPROC(NG_ret_i2_plus_g1_minus_48)
+ENTRY(NG_ret_i2_plus_g1_minus_56)
+       sub     %g1, 56, %g1
+       ba,pt   %xcc, __restore_asi
+        add    %i2, %g1, %i0
+ENDPROC(NG_ret_i2_plus_g1_minus_56)
+ENTRY(NG_ret_i2_plus_i4)
+       ba,pt   %xcc, __restore_asi
+        add    %i2, %i4, %i0
+ENDPROC(NG_ret_i2_plus_i4)
+ENTRY(NG_ret_i2_plus_i4_minus_8)
+       sub     %i4, 8, %i4
+       ba,pt   %xcc, __restore_asi
+        add    %i2, %i4, %i0
+ENDPROC(NG_ret_i2_plus_i4_minus_8)
+ENTRY(NG_ret_i2_plus_8)
+       ba,pt   %xcc, __restore_asi
+        add    %i2, 8, %i0
+ENDPROC(NG_ret_i2_plus_8)
+ENTRY(NG_ret_i2_plus_4)
+       ba,pt   %xcc, __restore_asi
+        add    %i2, 4, %i0
+ENDPROC(NG_ret_i2_plus_4)
+ENTRY(NG_ret_i2_plus_1)
+       ba,pt   %xcc, __restore_asi
+        add    %i2, 1, %i0
+ENDPROC(NG_ret_i2_plus_1)
+ENTRY(NG_ret_i2_plus_g1_plus_1)
+       add     %g1, 1, %g1
+       ba,pt   %xcc, __restore_asi
+        add    %i2, %g1, %i0
+ENDPROC(NG_ret_i2_plus_g1_plus_1)
+ENTRY(NG_ret_i2)
+       ba,pt   %xcc, __restore_asi
+        mov    %i2, %i0
+ENDPROC(NG_ret_i2)
+ENTRY(NG_ret_i2_and_7_plus_i4)
+       and     %i2, 7, %i2
+       ba,pt   %xcc, __restore_asi
+        add    %i2, %i4, %i0
+ENDPROC(NG_ret_i2_and_7_plus_i4)
+#endif
+
        .align          64
 
        .globl  FUNC_NAME
@@ -126,8 +209,8 @@ FUNC_NAME:  /* %i0=dst, %i1=src, %i2=len */
        sub             %g0, %i4, %i4   ! bytes to align dst
        sub             %i2, %i4, %i2
 1:     subcc           %i4, 1, %i4
-       EX_LD(LOAD(ldub, %i1, %g1))
-       EX_ST(STORE(stb, %g1, %o0))
+       EX_LD(LOAD(ldub, %i1, %g1), NG_ret_i2_plus_i4_plus_1)
+       EX_ST(STORE(stb, %g1, %o0), NG_ret_i2_plus_i4_plus_1)
        add             %i1, 1, %i1
        bne,pt          %XCC, 1b
        add             %o0, 1, %o0
@@ -160,7 +243,7 @@ FUNC_NAME:  /* %i0=dst, %i1=src, %i2=len */
        and             %i4, 0x7, GLOBAL_SPARE
        sll             GLOBAL_SPARE, 3, GLOBAL_SPARE
        mov             64, %i5
-       EX_LD(LOAD_TWIN(%i1, %g2, %g3))
+       EX_LD(LOAD_TWIN(%i1, %g2, %g3), NG_ret_i2_plus_g1)
        sub             %i5, GLOBAL_SPARE, %i5
        mov             16, %o4
        mov             32, %o5
@@ -178,31 +261,31 @@ FUNC_NAME:        /* %i0=dst, %i1=src, %i2=len */
        srlx            WORD3, PRE_SHIFT, TMP; \
        or              WORD2, TMP, WORD2;
 
-8:     EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3))
+8:     EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3), NG_ret_i2_plus_g1)
        MIX_THREE_WORDS(%g2, %g3, %o2, %i5, GLOBAL_SPARE, %o1)
        LOAD(prefetch, %i1 + %i3, #one_read)
 
-       EX_ST(STORE_INIT(%g2, %o0 + 0x00))
-       EX_ST(STORE_INIT(%g3, %o0 + 0x08))
+       EX_ST(STORE_INIT(%g2, %o0 + 0x00), NG_ret_i2_plus_g1)
+       EX_ST(STORE_INIT(%g3, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8)
 
-       EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3))
+       EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3), NG_ret_i2_plus_g1_minus_16)
        MIX_THREE_WORDS(%o2, %o3, %g2, %i5, GLOBAL_SPARE, %o1)
 
-       EX_ST(STORE_INIT(%o2, %o0 + 0x10))
-       EX_ST(STORE_INIT(%o3, %o0 + 0x18))
+       EX_ST(STORE_INIT(%o2, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16)
+       EX_ST(STORE_INIT(%o3, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24)
 
-       EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3))
+       EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1_minus_32)
        MIX_THREE_WORDS(%g2, %g3, %o2, %i5, GLOBAL_SPARE, %o1)
 
-       EX_ST(STORE_INIT(%g2, %o0 + 0x20))
-       EX_ST(STORE_INIT(%g3, %o0 + 0x28))
+       EX_ST(STORE_INIT(%g2, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32)
+       EX_ST(STORE_INIT(%g3, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40)
 
-       EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3))
+       EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3), NG_ret_i2_plus_g1_minus_48)
        add             %i1, 64, %i1
        MIX_THREE_WORDS(%o2, %o3, %g2, %i5, GLOBAL_SPARE, %o1)
 
-       EX_ST(STORE_INIT(%o2, %o0 + 0x30))
-       EX_ST(STORE_INIT(%o3, %o0 + 0x38))
+       EX_ST(STORE_INIT(%o2, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48)
+       EX_ST(STORE_INIT(%o3, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56)
 
        subcc           %g1, 64, %g1
        bne,pt          %XCC, 8b
@@ -211,31 +294,31 @@ FUNC_NAME:        /* %i0=dst, %i1=src, %i2=len */
        ba,pt           %XCC, 60f
         add            %i1, %i4, %i1
 
-9:     EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3))
+9:     EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3), NG_ret_i2_plus_g1)
        MIX_THREE_WORDS(%g3, %o2, %o3, %i5, GLOBAL_SPARE, %o1)
        LOAD(prefetch, %i1 + %i3, #one_read)
 
-       EX_ST(STORE_INIT(%g3, %o0 + 0x00))
-       EX_ST(STORE_INIT(%o2, %o0 + 0x08))
+       EX_ST(STORE_INIT(%g3, %o0 + 0x00), NG_ret_i2_plus_g1)
+       EX_ST(STORE_INIT(%o2, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8)
 
-       EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3))
+       EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3), NG_ret_i2_plus_g1_minus_16)
        MIX_THREE_WORDS(%o3, %g2, %g3, %i5, GLOBAL_SPARE, %o1)
 
-       EX_ST(STORE_INIT(%o3, %o0 + 0x10))
-       EX_ST(STORE_INIT(%g2, %o0 + 0x18))
+       EX_ST(STORE_INIT(%o3, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16)
+       EX_ST(STORE_INIT(%g2, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24)
 
-       EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3))
+       EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1_minus_32)
        MIX_THREE_WORDS(%g3, %o2, %o3, %i5, GLOBAL_SPARE, %o1)
 
-       EX_ST(STORE_INIT(%g3, %o0 + 0x20))
-       EX_ST(STORE_INIT(%o2, %o0 + 0x28))
+       EX_ST(STORE_INIT(%g3, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32)
+       EX_ST(STORE_INIT(%o2, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40)
 
-       EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3))
+       EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3), NG_ret_i2_plus_g1_minus_48)
        add             %i1, 64, %i1
        MIX_THREE_WORDS(%o3, %g2, %g3, %i5, GLOBAL_SPARE, %o1)
 
-       EX_ST(STORE_INIT(%o3, %o0 + 0x30))
-       EX_ST(STORE_INIT(%g2, %o0 + 0x38))
+       EX_ST(STORE_INIT(%o3, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48)
+       EX_ST(STORE_INIT(%g2, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56)
 
        subcc           %g1, 64, %g1
        bne,pt          %XCC, 9b
@@ -249,25 +332,25 @@ FUNC_NAME:        /* %i0=dst, %i1=src, %i2=len */
         * one twin load ahead, then add 8 back into source when
         * we finish the loop.
         */
-       EX_LD(LOAD_TWIN(%i1, %o4, %o5))
+       EX_LD(LOAD_TWIN(%i1, %o4, %o5), NG_ret_i2_plus_g1)
        mov     16, %o7
        mov     32, %g2
        mov     48, %g3
        mov     64, %o1
-1:     EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3))
+1:     EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1)
        LOAD(prefetch, %i1 + %o1, #one_read)
-       EX_ST(STORE_INIT(%o5, %o0 + 0x00))      ! initializes cache line
-       EX_ST(STORE_INIT(%o2, %o0 + 0x08))
-       EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5))
-       EX_ST(STORE_INIT(%o3, %o0 + 0x10))
-       EX_ST(STORE_INIT(%o4, %o0 + 0x18))
-       EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3))
-       EX_ST(STORE_INIT(%o5, %o0 + 0x20))
-       EX_ST(STORE_INIT(%o2, %o0 + 0x28))
-       EX_LD(LOAD_TWIN(%i1 + %o1, %o4, %o5))
+       EX_ST(STORE_INIT(%o5, %o0 + 0x00), NG_ret_i2_plus_g1)   ! initializes cache line
+       EX_ST(STORE_INIT(%o2, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8)
+       EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5), NG_ret_i2_plus_g1_minus_16)
+       EX_ST(STORE_INIT(%o3, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16)
+       EX_ST(STORE_INIT(%o4, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24)
+       EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3), NG_ret_i2_plus_g1_minus_32)
+       EX_ST(STORE_INIT(%o5, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32)
+       EX_ST(STORE_INIT(%o2, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40)
+       EX_LD(LOAD_TWIN(%i1 + %o1, %o4, %o5), NG_ret_i2_plus_g1_minus_48)
        add             %i1, 64, %i1
-       EX_ST(STORE_INIT(%o3, %o0 + 0x30))
-       EX_ST(STORE_INIT(%o4, %o0 + 0x38))
+       EX_ST(STORE_INIT(%o3, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48)
+       EX_ST(STORE_INIT(%o4, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56)
        subcc           %g1, 64, %g1
        bne,pt          %XCC, 1b
         add            %o0, 64, %o0
@@ -282,20 +365,20 @@ FUNC_NAME:        /* %i0=dst, %i1=src, %i2=len */
        mov     32, %g2
        mov     48, %g3
        mov     64, %o1
-1:     EX_LD(LOAD_TWIN(%i1 + %g0, %o4, %o5))
-       EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3))
+1:     EX_LD(LOAD_TWIN(%i1 + %g0, %o4, %o5), NG_ret_i2_plus_g1)
+       EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1)
        LOAD(prefetch, %i1 + %o1, #one_read)
-       EX_ST(STORE_INIT(%o4, %o0 + 0x00))      ! initializes cache line
-       EX_ST(STORE_INIT(%o5, %o0 + 0x08))
-       EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5))
-       EX_ST(STORE_INIT(%o2, %o0 + 0x10))
-       EX_ST(STORE_INIT(%o3, %o0 + 0x18))
-       EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3))
+       EX_ST(STORE_INIT(%o4, %o0 + 0x00), NG_ret_i2_plus_g1)   ! initializes cache line
+       EX_ST(STORE_INIT(%o5, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8)
+       EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5), NG_ret_i2_plus_g1_minus_16)
+       EX_ST(STORE_INIT(%o2, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16)
+       EX_ST(STORE_INIT(%o3, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24)
+       EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3), NG_ret_i2_plus_g1_minus_32)
        add     %i1, 64, %i1
-       EX_ST(STORE_INIT(%o4, %o0 + 0x20))
-       EX_ST(STORE_INIT(%o5, %o0 + 0x28))
-       EX_ST(STORE_INIT(%o2, %o0 + 0x30))
-       EX_ST(STORE_INIT(%o3, %o0 + 0x38))
+       EX_ST(STORE_INIT(%o4, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32)
+       EX_ST(STORE_INIT(%o5, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40)
+       EX_ST(STORE_INIT(%o2, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48)
+       EX_ST(STORE_INIT(%o3, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56)
        subcc   %g1, 64, %g1
        bne,pt  %XCC, 1b
         add    %o0, 64, %o0
@@ -321,28 +404,28 @@ FUNC_NAME:        /* %i0=dst, %i1=src, %i2=len */
        andn            %i2, 0xf, %i4
        and             %i2, 0xf, %i2
 1:     subcc           %i4, 0x10, %i4
-       EX_LD(LOAD(ldx, %i1, %o4))
+       EX_LD(LOAD(ldx, %i1, %o4), NG_ret_i2_plus_i4)
        add             %i1, 0x08, %i1
-       EX_LD(LOAD(ldx, %i1, %g1))
+       EX_LD(LOAD(ldx, %i1, %g1), NG_ret_i2_plus_i4)
        sub             %i1, 0x08, %i1
-       EX_ST(STORE(stx, %o4, %i1 + %i3))
+       EX_ST(STORE(stx, %o4, %i1 + %i3), NG_ret_i2_plus_i4)
        add             %i1, 0x8, %i1
-       EX_ST(STORE(stx, %g1, %i1 + %i3))
+       EX_ST(STORE(stx, %g1, %i1 + %i3), NG_ret_i2_plus_i4_minus_8)
        bgu,pt          %XCC, 1b
         add            %i1, 0x8, %i1
 73:    andcc           %i2, 0x8, %g0
        be,pt           %XCC, 1f
         nop
        sub             %i2, 0x8, %i2
-       EX_LD(LOAD(ldx, %i1, %o4))
-       EX_ST(STORE(stx, %o4, %i1 + %i3))
+       EX_LD(LOAD(ldx, %i1, %o4), NG_ret_i2_plus_8)
+       EX_ST(STORE(stx, %o4, %i1 + %i3), NG_ret_i2_plus_8)
        add             %i1, 0x8, %i1
 1:     andcc           %i2, 0x4, %g0
        be,pt           %XCC, 1f
         nop
        sub             %i2, 0x4, %i2
-       EX_LD(LOAD(lduw, %i1, %i5))
-       EX_ST(STORE(stw, %i5, %i1 + %i3))
+       EX_LD(LOAD(lduw, %i1, %i5), NG_ret_i2_plus_4)
+       EX_ST(STORE(stw, %i5, %i1 + %i3), NG_ret_i2_plus_4)
        add             %i1, 0x4, %i1
 1:     cmp             %i2, 0
        be,pt           %XCC, 85f
@@ -358,8 +441,8 @@ FUNC_NAME:  /* %i0=dst, %i1=src, %i2=len */
        sub             %i2, %g1, %i2
 
 1:     subcc           %g1, 1, %g1
-       EX_LD(LOAD(ldub, %i1, %i5))
-       EX_ST(STORE(stb, %i5, %i1 + %i3))
+       EX_LD(LOAD(ldub, %i1, %i5), NG_ret_i2_plus_g1_plus_1)
+       EX_ST(STORE(stb, %i5, %i1 + %i3), NG_ret_i2_plus_g1_plus_1)
        bgu,pt          %icc, 1b
         add            %i1, 1, %i1
 
@@ -375,16 +458,16 @@ FUNC_NAME:        /* %i0=dst, %i1=src, %i2=len */
 
 8:     mov             64, %i3
        andn            %i1, 0x7, %i1
-       EX_LD(LOAD(ldx, %i1, %g2))
+       EX_LD(LOAD(ldx, %i1, %g2), NG_ret_i2)
        sub             %i3, %g1, %i3
        andn            %i2, 0x7, %i4
        sllx            %g2, %g1, %g2
 1:     add             %i1, 0x8, %i1
-       EX_LD(LOAD(ldx, %i1, %g3))
+       EX_LD(LOAD(ldx, %i1, %g3), NG_ret_i2_and_7_plus_i4)
        subcc           %i4, 0x8, %i4
        srlx            %g3, %i3, %i5
        or              %i5, %g2, %i5
-       EX_ST(STORE(stx, %i5, %o0))
+       EX_ST(STORE(stx, %i5, %o0), NG_ret_i2_and_7_plus_i4)
        add             %o0, 0x8, %o0
        bgu,pt          %icc, 1b
         sllx           %g3, %g1, %g2
@@ -404,8 +487,8 @@ FUNC_NAME:  /* %i0=dst, %i1=src, %i2=len */
 
 1:
        subcc           %i2, 4, %i2
-       EX_LD(LOAD(lduw, %i1, %g1))
-       EX_ST(STORE(stw, %g1, %i1 + %i3))
+       EX_LD(LOAD(lduw, %i1, %g1), NG_ret_i2_plus_4)
+       EX_ST(STORE(stw, %g1, %i1 + %i3), NG_ret_i2_plus_4)
        bgu,pt          %XCC, 1b
         add            %i1, 4, %i1
 
@@ -415,8 +498,8 @@ FUNC_NAME:  /* %i0=dst, %i1=src, %i2=len */
        .align          32
 90:
        subcc           %i2, 1, %i2
-       EX_LD(LOAD(ldub, %i1, %g1))
-       EX_ST(STORE(stb, %g1, %i1 + %i3))
+       EX_LD(LOAD(ldub, %i1, %g1), NG_ret_i2_plus_1)
+       EX_ST(STORE(stb, %g1, %i1 + %i3), NG_ret_i2_plus_1)
        bgu,pt          %XCC, 90b
         add            %i1, 1, %i1
        ret
index ecc5692fa2b49a3acfc6a6592c6c247835417b4c..bb6ff73229e3e5e7eac225389db53c27d989abc1 100644 (file)
@@ -3,19 +3,19 @@
  * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com)
  */
 
-#define EX_LD(x)               \
+#define EX_LD(x,y)             \
 98:    x;                      \
        .section __ex_table,"a";\
        .align 4;               \
-       .word 98b, __retl_one;  \
+       .word 98b, y;           \
        .text;                  \
        .align 4;
 
-#define EX_LD_FP(x)            \
+#define EX_LD_FP(x,y)          \
 98:    x;                      \
        .section __ex_table,"a";\
        .align 4;               \
-       .word 98b, __retl_one_fp;\
+       .word 98b, y;           \
        .text;                  \
        .align 4;
 
index 9eea392e44d471679ba85c22867a9767b34912a5..ed92ce73955889dba9faa8f028b7e5e6c4326ecf 100644 (file)
@@ -3,19 +3,19 @@
  * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com)
  */
 
-#define EX_ST(x)               \
+#define EX_ST(x,y)             \
 98:    x;                      \
        .section __ex_table,"a";\
        .align 4;               \
-       .word 98b, __retl_one;  \
+       .word 98b, y;           \
        .text;                  \
        .align 4;
 
-#define EX_ST_FP(x)            \
+#define EX_ST_FP(x,y)          \
 98:    x;                      \
        .section __ex_table,"a";\
        .align 4;               \
-       .word 98b, __retl_one_fp;\
+       .word 98b, y;           \
        .text;                  \
        .align 4;
 
index 3e6209ebb7d7865fd62bb21df56dabda07fe4f5f..4f0d50b33a72482e98411a2bd1b891de7229089d 100644 (file)
@@ -5,8 +5,10 @@
  */
 
 #ifdef __KERNEL__
+#include <linux/linkage.h>
 #include <asm/visasm.h>
 #include <asm/asi.h>
+#include <asm/export.h>
 #define GLOBAL_SPARE   g7
 #else
 #define GLOBAL_SPARE   g5
 #endif
 
 #ifndef EX_LD
-#define EX_LD(x)       x
+#define EX_LD(x,y)     x
 #endif
 #ifndef EX_LD_FP
-#define EX_LD_FP(x)    x
+#define EX_LD_FP(x,y)  x
 #endif
 
 #ifndef EX_ST
-#define EX_ST(x)       x
+#define EX_ST(x,y)     x
 #endif
 #ifndef EX_ST_FP
-#define EX_ST_FP(x)    x
-#endif
-
-#ifndef EX_RETVAL
-#define EX_RETVAL(x)   x
+#define EX_ST_FP(x,y)  x
 #endif
 
 #ifndef LOAD
        faligndata              %f7, %f8, %f60;                 \
        faligndata              %f8, %f9, %f62;
 
-#define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, len, jmptgt)   \
-       EX_LD_FP(LOAD_BLK(%src, %fdest));                               \
-       EX_ST_FP(STORE_BLK(%fsrc, %dest));                              \
-       add                     %src, 0x40, %src;               \
-       subcc                   %len, 0x40, %len;               \
-       be,pn                   %xcc, jmptgt;                   \
-        add                    %dest, 0x40, %dest;             \
-
-#define LOOP_CHUNK1(src, dest, len, branch_dest)               \
-       MAIN_LOOP_CHUNK(src, dest, f0,  f48, len, branch_dest)
-#define LOOP_CHUNK2(src, dest, len, branch_dest)               \
-       MAIN_LOOP_CHUNK(src, dest, f16, f48, len, branch_dest)
-#define LOOP_CHUNK3(src, dest, len, branch_dest)               \
-       MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest)
+#define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, jmptgt)                        \
+       EX_LD_FP(LOAD_BLK(%src, %fdest), U1_gs_80_fp);                  \
+       EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_80_fp);                 \
+       add                     %src, 0x40, %src;                       \
+       subcc                   %GLOBAL_SPARE, 0x40, %GLOBAL_SPARE;     \
+       be,pn                   %xcc, jmptgt;                           \
+        add                    %dest, 0x40, %dest;                     \
+
+#define LOOP_CHUNK1(src, dest, branch_dest)            \
+       MAIN_LOOP_CHUNK(src, dest, f0,  f48, branch_dest)
+#define LOOP_CHUNK2(src, dest, branch_dest)            \
+       MAIN_LOOP_CHUNK(src, dest, f16, f48, branch_dest)
+#define LOOP_CHUNK3(src, dest, branch_dest)            \
+       MAIN_LOOP_CHUNK(src, dest, f32, f48, branch_dest)
 
 #define DO_SYNC                        membar  #Sync;
 #define STORE_SYNC(dest, fsrc)                         \
-       EX_ST_FP(STORE_BLK(%fsrc, %dest));                      \
+       EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_80_fp); \
        add                     %dest, 0x40, %dest;     \
        DO_SYNC
 
 #define STORE_JUMP(dest, fsrc, target)                 \
-       EX_ST_FP(STORE_BLK(%fsrc, %dest));                      \
+       EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_40_fp); \
        add                     %dest, 0x40, %dest;     \
        ba,pt                   %xcc, target;           \
         nop;
 
-#define FINISH_VISCHUNK(dest, f0, f1, left)    \
-       subcc                   %left, 8, %left;\
-       bl,pn                   %xcc, 95f;      \
-        faligndata             %f0, %f1, %f48; \
-       EX_ST_FP(STORE(std, %f48, %dest));              \
+#define FINISH_VISCHUNK(dest, f0, f1)                  \
+       subcc                   %g3, 8, %g3;            \
+       bl,pn                   %xcc, 95f;              \
+        faligndata             %f0, %f1, %f48;         \
+       EX_ST_FP(STORE(std, %f48, %dest), U1_g3_8_fp);  \
        add                     %dest, 8, %dest;
 
-#define UNEVEN_VISCHUNK_LAST(dest, f0, f1, left)       \
-       subcc                   %left, 8, %left;        \
-       bl,pn                   %xcc, 95f;              \
+#define UNEVEN_VISCHUNK_LAST(dest, f0, f1)     \
+       subcc                   %g3, 8, %g3;    \
+       bl,pn                   %xcc, 95f;      \
         fsrc2                  %f0, %f1;
 
-#define UNEVEN_VISCHUNK(dest, f0, f1, left)            \
-       UNEVEN_VISCHUNK_LAST(dest, f0, f1, left)        \
+#define UNEVEN_VISCHUNK(dest, f0, f1)          \
+       UNEVEN_VISCHUNK_LAST(dest, f0, f1)      \
        ba,a,pt                 %xcc, 93f;
 
        .register       %g2,#scratch
        .register       %g3,#scratch
 
        .text
+#ifndef EX_RETVAL
+#define EX_RETVAL(x)   x
+ENTRY(U1_g1_1_fp)
+       VISExitHalf
+       add             %g1, 1, %g1
+       add             %g1, %g2, %g1
+       retl
+        add            %g1, %o2, %o0
+ENDPROC(U1_g1_1_fp)
+ENTRY(U1_g2_0_fp)
+       VISExitHalf
+       retl
+        add            %g2, %o2, %o0
+ENDPROC(U1_g2_0_fp)
+ENTRY(U1_g2_8_fp)
+       VISExitHalf
+       add             %g2, 8, %g2
+       retl
+        add            %g2, %o2, %o0
+ENDPROC(U1_g2_8_fp)
+ENTRY(U1_gs_0_fp)
+       VISExitHalf
+       add             %GLOBAL_SPARE, %g3, %o0
+       retl
+        add            %o0, %o2, %o0
+ENDPROC(U1_gs_0_fp)
+ENTRY(U1_gs_80_fp)
+       VISExitHalf
+       add             %GLOBAL_SPARE, 0x80, %GLOBAL_SPARE
+       add             %GLOBAL_SPARE, %g3, %o0
+       retl
+        add            %o0, %o2, %o0
+ENDPROC(U1_gs_80_fp)
+ENTRY(U1_gs_40_fp)
+       VISExitHalf
+       add             %GLOBAL_SPARE, 0x40, %GLOBAL_SPARE
+       add             %GLOBAL_SPARE, %g3, %o0
+       retl
+        add            %o0, %o2, %o0
+ENDPROC(U1_gs_40_fp)
+ENTRY(U1_g3_0_fp)
+       VISExitHalf
+       retl
+        add            %g3, %o2, %o0
+ENDPROC(U1_g3_0_fp)
+ENTRY(U1_g3_8_fp)
+       VISExitHalf
+       add             %g3, 8, %g3
+       retl
+        add            %g3, %o2, %o0
+ENDPROC(U1_g3_8_fp)
+ENTRY(U1_o2_0_fp)
+       VISExitHalf
+       retl
+        mov            %o2, %o0
+ENDPROC(U1_o2_0_fp)
+ENTRY(U1_o2_1_fp)
+       VISExitHalf
+       retl
+        add            %o2, 1, %o0
+ENDPROC(U1_o2_1_fp)
+ENTRY(U1_gs_0)
+       VISExitHalf
+       retl
+        add            %GLOBAL_SPARE, %o2, %o0
+ENDPROC(U1_gs_0)
+ENTRY(U1_gs_8)
+       VISExitHalf
+       add             %GLOBAL_SPARE, %o2, %GLOBAL_SPARE
+       retl
+        add            %GLOBAL_SPARE, 0x8, %o0
+ENDPROC(U1_gs_8)
+ENTRY(U1_gs_10)
+       VISExitHalf
+       add             %GLOBAL_SPARE, %o2, %GLOBAL_SPARE
+       retl
+        add            %GLOBAL_SPARE, 0x10, %o0
+ENDPROC(U1_gs_10)
+ENTRY(U1_o2_0)
+       retl
+        mov            %o2, %o0
+ENDPROC(U1_o2_0)
+ENTRY(U1_o2_8)
+       retl
+        add            %o2, 8, %o0
+ENDPROC(U1_o2_8)
+ENTRY(U1_o2_4)
+       retl
+        add            %o2, 4, %o0
+ENDPROC(U1_o2_4)
+ENTRY(U1_o2_1)
+       retl
+        add            %o2, 1, %o0
+ENDPROC(U1_o2_1)
+ENTRY(U1_g1_0)
+       retl
+        add            %g1, %o2, %o0
+ENDPROC(U1_g1_0)
+ENTRY(U1_g1_1)
+       add             %g1, 1, %g1
+       retl
+        add            %g1, %o2, %o0
+ENDPROC(U1_g1_1)
+ENTRY(U1_gs_0_o2_adj)
+       and             %o2, 7, %o2
+       retl
+        add            %GLOBAL_SPARE, %o2, %o0
+ENDPROC(U1_gs_0_o2_adj)
+ENTRY(U1_gs_8_o2_adj)
+       and             %o2, 7, %o2
+       add             %GLOBAL_SPARE, 8, %GLOBAL_SPARE
+       retl
+        add            %GLOBAL_SPARE, %o2, %o0
+ENDPROC(U1_gs_8_o2_adj)
+#endif
+
        .align          64
 
        .globl          FUNC_NAME
@@ -166,8 +280,8 @@ FUNC_NAME:          /* %o0=dst, %o1=src, %o2=len */
         and            %g2, 0x38, %g2
 
 1:     subcc           %g1, 0x1, %g1
-       EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3))
-       EX_ST_FP(STORE(stb, %o3, %o1 + %GLOBAL_SPARE))
+       EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3), U1_g1_1_fp)
+       EX_ST_FP(STORE(stb, %o3, %o1 + %GLOBAL_SPARE), U1_g1_1_fp)
        bgu,pt          %XCC, 1b
         add            %o1, 0x1, %o1
 
@@ -178,20 +292,20 @@ FUNC_NAME:                /* %o0=dst, %o1=src, %o2=len */
        be,pt           %icc, 3f
         alignaddr      %o1, %g0, %o1
 
-       EX_LD_FP(LOAD(ldd, %o1, %f4))
-1:     EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6))
+       EX_LD_FP(LOAD(ldd, %o1, %f4), U1_g2_0_fp)
+1:     EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6), U1_g2_0_fp)
        add             %o1, 0x8, %o1
        subcc           %g2, 0x8, %g2
        faligndata      %f4, %f6, %f0
-       EX_ST_FP(STORE(std, %f0, %o0))
+       EX_ST_FP(STORE(std, %f0, %o0), U1_g2_8_fp)
        be,pn           %icc, 3f
         add            %o0, 0x8, %o0
 
-       EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4))
+       EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4), U1_g2_0_fp)
        add             %o1, 0x8, %o1
        subcc           %g2, 0x8, %g2
        faligndata      %f6, %f4, %f0
-       EX_ST_FP(STORE(std, %f0, %o0))
+       EX_ST_FP(STORE(std, %f0, %o0), U1_g2_8_fp)
        bne,pt          %icc, 1b
         add            %o0, 0x8, %o0
 
@@ -214,13 +328,13 @@ FUNC_NAME:                /* %o0=dst, %o1=src, %o2=len */
        add             %g1, %GLOBAL_SPARE, %g1
        subcc           %o2, %g3, %o2
 
-       EX_LD_FP(LOAD_BLK(%o1, %f0))
+       EX_LD_FP(LOAD_BLK(%o1, %f0), U1_gs_0_fp)
        add             %o1, 0x40, %o1
        add             %g1, %g3, %g1
-       EX_LD_FP(LOAD_BLK(%o1, %f16))
+       EX_LD_FP(LOAD_BLK(%o1, %f16), U1_gs_0_fp)
        add             %o1, 0x40, %o1
        sub             %GLOBAL_SPARE, 0x80, %GLOBAL_SPARE
-       EX_LD_FP(LOAD_BLK(%o1, %f32))
+       EX_LD_FP(LOAD_BLK(%o1, %f32), U1_gs_80_fp)
        add             %o1, 0x40, %o1
 
        /* There are 8 instances of the unrolled loop,
@@ -240,11 +354,11 @@ FUNC_NAME:                /* %o0=dst, %o1=src, %o2=len */
 
        .align          64
 1:     FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)
-       LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
+       LOOP_CHUNK1(o1, o0, 1f)
        FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)
-       LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
+       LOOP_CHUNK2(o1, o0, 2f)
        FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)
-       LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
+       LOOP_CHUNK3(o1, o0, 3f)
        ba,pt           %xcc, 1b+4
         faligndata     %f0, %f2, %f48
 1:     FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)
@@ -261,11 +375,11 @@ FUNC_NAME:                /* %o0=dst, %o1=src, %o2=len */
        STORE_JUMP(o0, f48, 56f)
 
 1:     FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)
-       LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
+       LOOP_CHUNK1(o1, o0, 1f)
        FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)
-       LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
+       LOOP_CHUNK2(o1, o0, 2f)
        FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)
-       LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
+       LOOP_CHUNK3(o1, o0, 3f)
        ba,pt           %xcc, 1b+4
         faligndata     %f2, %f4, %f48
 1:     FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)
@@ -282,11 +396,11 @@ FUNC_NAME:                /* %o0=dst, %o1=src, %o2=len */
        STORE_JUMP(o0, f48, 57f)
 
 1:     FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)
-       LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
+       LOOP_CHUNK1(o1, o0, 1f)
        FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)
-       LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
+       LOOP_CHUNK2(o1, o0, 2f)
        FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)
-       LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
+       LOOP_CHUNK3(o1, o0, 3f)
        ba,pt           %xcc, 1b+4
         faligndata     %f4, %f6, %f48
 1:     FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)
@@ -303,11 +417,11 @@ FUNC_NAME:                /* %o0=dst, %o1=src, %o2=len */
        STORE_JUMP(o0, f48, 58f)
 
 1:     FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)
-       LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
+       LOOP_CHUNK1(o1, o0, 1f)
        FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)
-       LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
+       LOOP_CHUNK2(o1, o0, 2f)
        FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) 
-       LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
+       LOOP_CHUNK3(o1, o0, 3f)
        ba,pt           %xcc, 1b+4
         faligndata     %f6, %f8, %f48
 1:     FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)
@@ -324,11 +438,11 @@ FUNC_NAME:                /* %o0=dst, %o1=src, %o2=len */
        STORE_JUMP(o0, f48, 59f)
 
 1:     FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)
-       LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
+       LOOP_CHUNK1(o1, o0, 1f)
        FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)
-       LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
+       LOOP_CHUNK2(o1, o0, 2f)
        FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)
-       LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
+       LOOP_CHUNK3(o1, o0, 3f)
        ba,pt           %xcc, 1b+4
         faligndata     %f8, %f10, %f48
 1:     FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)
@@ -345,11 +459,11 @@ FUNC_NAME:                /* %o0=dst, %o1=src, %o2=len */
        STORE_JUMP(o0, f48, 60f)
 
 1:     FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)
-       LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
+       LOOP_CHUNK1(o1, o0, 1f)
        FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)
-       LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
+       LOOP_CHUNK2(o1, o0, 2f)
        FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10)
-       LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
+       LOOP_CHUNK3(o1, o0, 3f)
        ba,pt           %xcc, 1b+4
         faligndata     %f10, %f12, %f48
 1:     FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)
@@ -366,11 +480,11 @@ FUNC_NAME:                /* %o0=dst, %o1=src, %o2=len */
        STORE_JUMP(o0, f48, 61f)
 
 1:     FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)
-       LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
+       LOOP_CHUNK1(o1, o0, 1f)
        FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)
-       LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
+       LOOP_CHUNK2(o1, o0, 2f)
        FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12)
-       LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
+       LOOP_CHUNK3(o1, o0, 3f)
        ba,pt           %xcc, 1b+4
         faligndata     %f12, %f14, %f48
 1:     FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)
@@ -387,11 +501,11 @@ FUNC_NAME:                /* %o0=dst, %o1=src, %o2=len */
        STORE_JUMP(o0, f48, 62f)
 
 1:     FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)
-       LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
+       LOOP_CHUNK1(o1, o0, 1f)
        FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
-       LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f)
+       LOOP_CHUNK2(o1, o0, 2f)
        FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14)
-       LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f)
+       LOOP_CHUNK3(o1, o0, 3f)
        ba,pt           %xcc, 1b+4
         faligndata     %f14, %f16, %f48
 1:     FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
@@ -407,53 +521,53 @@ FUNC_NAME:                /* %o0=dst, %o1=src, %o2=len */
        FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
        STORE_JUMP(o0, f48, 63f)
 
-40:    FINISH_VISCHUNK(o0, f0,  f2,  g3)
-41:    FINISH_VISCHUNK(o0, f2,  f4,  g3)
-42:    FINISH_VISCHUNK(o0, f4,  f6,  g3)
-43:    FINISH_VISCHUNK(o0, f6,  f8,  g3)
-44:    FINISH_VISCHUNK(o0, f8,  f10, g3)
-45:    FINISH_VISCHUNK(o0, f10, f12, g3)
-46:    FINISH_VISCHUNK(o0, f12, f14, g3)
-47:    UNEVEN_VISCHUNK(o0, f14, f0,  g3)
-48:    FINISH_VISCHUNK(o0, f16, f18, g3)
-49:    FINISH_VISCHUNK(o0, f18, f20, g3)
-50:    FINISH_VISCHUNK(o0, f20, f22, g3)
-51:    FINISH_VISCHUNK(o0, f22, f24, g3)
-52:    FINISH_VISCHUNK(o0, f24, f26, g3)
-53:    FINISH_VISCHUNK(o0, f26, f28, g3)
-54:    FINISH_VISCHUNK(o0, f28, f30, g3)
-55:    UNEVEN_VISCHUNK(o0, f30, f0,  g3)
-56:    FINISH_VISCHUNK(o0, f32, f34, g3)
-57:    FINISH_VISCHUNK(o0, f34, f36, g3)
-58:    FINISH_VISCHUNK(o0, f36, f38, g3)
-59:    FINISH_VISCHUNK(o0, f38, f40, g3)
-60:    FINISH_VISCHUNK(o0, f40, f42, g3)
-61:    FINISH_VISCHUNK(o0, f42, f44, g3)
-62:    FINISH_VISCHUNK(o0, f44, f46, g3)
-63:    UNEVEN_VISCHUNK_LAST(o0, f46, f0,  g3)
-
-93:    EX_LD_FP(LOAD(ldd, %o1, %f2))
+40:    FINISH_VISCHUNK(o0, f0,  f2)
+41:    FINISH_VISCHUNK(o0, f2,  f4)
+42:    FINISH_VISCHUNK(o0, f4,  f6)
+43:    FINISH_VISCHUNK(o0, f6,  f8)
+44:    FINISH_VISCHUNK(o0, f8,  f10)
+45:    FINISH_VISCHUNK(o0, f10, f12)
+46:    FINISH_VISCHUNK(o0, f12, f14)
+47:    UNEVEN_VISCHUNK(o0, f14, f0)
+48:    FINISH_VISCHUNK(o0, f16, f18)
+49:    FINISH_VISCHUNK(o0, f18, f20)
+50:    FINISH_VISCHUNK(o0, f20, f22)
+51:    FINISH_VISCHUNK(o0, f22, f24)
+52:    FINISH_VISCHUNK(o0, f24, f26)
+53:    FINISH_VISCHUNK(o0, f26, f28)
+54:    FINISH_VISCHUNK(o0, f28, f30)
+55:    UNEVEN_VISCHUNK(o0, f30, f0)
+56:    FINISH_VISCHUNK(o0, f32, f34)
+57:    FINISH_VISCHUNK(o0, f34, f36)
+58:    FINISH_VISCHUNK(o0, f36, f38)
+59:    FINISH_VISCHUNK(o0, f38, f40)
+60:    FINISH_VISCHUNK(o0, f40, f42)
+61:    FINISH_VISCHUNK(o0, f42, f44)
+62:    FINISH_VISCHUNK(o0, f44, f46)
+63:    UNEVEN_VISCHUNK_LAST(o0, f46, f0)
+
+93:    EX_LD_FP(LOAD(ldd, %o1, %f2), U1_g3_0_fp)
        add             %o1, 8, %o1
        subcc           %g3, 8, %g3
        faligndata      %f0, %f2, %f8
-       EX_ST_FP(STORE(std, %f8, %o0))
+       EX_ST_FP(STORE(std, %f8, %o0), U1_g3_8_fp)
        bl,pn           %xcc, 95f
         add            %o0, 8, %o0
-       EX_LD_FP(LOAD(ldd, %o1, %f0))
+       EX_LD_FP(LOAD(ldd, %o1, %f0), U1_g3_0_fp)
        add             %o1, 8, %o1
        subcc           %g3, 8, %g3
        faligndata      %f2, %f0, %f8
-       EX_ST_FP(STORE(std, %f8, %o0))
+       EX_ST_FP(STORE(std, %f8, %o0), U1_g3_8_fp)
        bge,pt          %xcc, 93b
         add            %o0, 8, %o0
 
 95:    brz,pt          %o2, 2f
         mov            %g1, %o1
 
-1:     EX_LD_FP(LOAD(ldub, %o1, %o3))
+1:     EX_LD_FP(LOAD(ldub, %o1, %o3), U1_o2_0_fp)
        add             %o1, 1, %o1
        subcc           %o2, 1, %o2
-       EX_ST_FP(STORE(stb, %o3, %o0))
+       EX_ST_FP(STORE(stb, %o3, %o0), U1_o2_1_fp)
        bne,pt          %xcc, 1b
         add            %o0, 1, %o0
 
@@ -469,27 +583,27 @@ FUNC_NAME:                /* %o0=dst, %o1=src, %o2=len */
 
 72:    andn            %o2, 0xf, %GLOBAL_SPARE
        and             %o2, 0xf, %o2
-1:     EX_LD(LOAD(ldx, %o1 + 0x00, %o5))
-       EX_LD(LOAD(ldx, %o1 + 0x08, %g1))
+1:     EX_LD(LOAD(ldx, %o1 + 0x00, %o5), U1_gs_0)
+       EX_LD(LOAD(ldx, %o1 + 0x08, %g1), U1_gs_0)
        subcc           %GLOBAL_SPARE, 0x10, %GLOBAL_SPARE
-       EX_ST(STORE(stx, %o5, %o1 + %o3))
+       EX_ST(STORE(stx, %o5, %o1 + %o3), U1_gs_10)
        add             %o1, 0x8, %o1
-       EX_ST(STORE(stx, %g1, %o1 + %o3))
+       EX_ST(STORE(stx, %g1, %o1 + %o3), U1_gs_8)
        bgu,pt          %XCC, 1b
         add            %o1, 0x8, %o1
 73:    andcc           %o2, 0x8, %g0
        be,pt           %XCC, 1f
         nop
-       EX_LD(LOAD(ldx, %o1, %o5))
+       EX_LD(LOAD(ldx, %o1, %o5), U1_o2_0)
        sub             %o2, 0x8, %o2
-       EX_ST(STORE(stx, %o5, %o1 + %o3))
+       EX_ST(STORE(stx, %o5, %o1 + %o3), U1_o2_8)
        add             %o1, 0x8, %o1
 1:     andcc           %o2, 0x4, %g0
        be,pt           %XCC, 1f
         nop
-       EX_LD(LOAD(lduw, %o1, %o5))
+       EX_LD(LOAD(lduw, %o1, %o5), U1_o2_0)
        sub             %o2, 0x4, %o2
-       EX_ST(STORE(stw, %o5, %o1 + %o3))
+       EX_ST(STORE(stw, %o5, %o1 + %o3), U1_o2_4)
        add             %o1, 0x4, %o1
 1:     cmp             %o2, 0
        be,pt           %XCC, 85f
@@ -503,9 +617,9 @@ FUNC_NAME:          /* %o0=dst, %o1=src, %o2=len */
         sub            %g0, %g1, %g1
        sub             %o2, %g1, %o2
 
-1:     EX_LD(LOAD(ldub, %o1, %o5))
+1:     EX_LD(LOAD(ldub, %o1, %o5), U1_g1_0)
        subcc           %g1, 1, %g1
-       EX_ST(STORE(stb, %o5, %o1 + %o3))
+       EX_ST(STORE(stb, %o5, %o1 + %o3), U1_g1_1)
        bgu,pt          %icc, 1b
         add            %o1, 1, %o1
 
@@ -521,16 +635,16 @@ FUNC_NAME:                /* %o0=dst, %o1=src, %o2=len */
 
 8:     mov             64, %o3
        andn            %o1, 0x7, %o1
-       EX_LD(LOAD(ldx, %o1, %g2))
+       EX_LD(LOAD(ldx, %o1, %g2), U1_o2_0)
        sub             %o3, %g1, %o3
        andn            %o2, 0x7, %GLOBAL_SPARE
        sllx            %g2, %g1, %g2
-1:     EX_LD(LOAD(ldx, %o1 + 0x8, %g3))
+1:     EX_LD(LOAD(ldx, %o1 + 0x8, %g3), U1_gs_0_o2_adj)
        subcc           %GLOBAL_SPARE, 0x8, %GLOBAL_SPARE
        add             %o1, 0x8, %o1
        srlx            %g3, %o3, %o5
        or              %o5, %g2, %o5
-       EX_ST(STORE(stx, %o5, %o0))
+       EX_ST(STORE(stx, %o5, %o0), U1_gs_8_o2_adj)
        add             %o0, 0x8, %o0
        bgu,pt          %icc, 1b
         sllx           %g3, %g1, %g2
@@ -548,9 +662,9 @@ FUNC_NAME:          /* %o0=dst, %o1=src, %o2=len */
        bne,pn          %XCC, 90f
         sub            %o0, %o1, %o3
 
-1:     EX_LD(LOAD(lduw, %o1, %g1))
+1:     EX_LD(LOAD(lduw, %o1, %g1), U1_o2_0)
        subcc           %o2, 4, %o2
-       EX_ST(STORE(stw, %g1, %o1 + %o3))
+       EX_ST(STORE(stw, %g1, %o1 + %o3), U1_o2_4)
        bgu,pt          %XCC, 1b
         add            %o1, 4, %o1
 
@@ -558,12 +672,13 @@ FUNC_NAME:                /* %o0=dst, %o1=src, %o2=len */
         mov            EX_RETVAL(%o4), %o0
 
        .align          32
-90:    EX_LD(LOAD(ldub, %o1, %g1))
+90:    EX_LD(LOAD(ldub, %o1, %g1), U1_o2_0)
        subcc           %o2, 1, %o2
-       EX_ST(STORE(stb, %g1, %o1 + %o3))
+       EX_ST(STORE(stb, %g1, %o1 + %o3), U1_o2_1)
        bgu,pt          %XCC, 90b
         add            %o1, 1, %o1
        retl
         mov            EX_RETVAL(%o4), %o0
 
        .size           FUNC_NAME, .-FUNC_NAME
+EXPORT_SYMBOL(FUNC_NAME)
index 88ad73d86fe44b64c2313483e7490cc8ae0ee438..db73010a1af8f18d5baa7515203001045995ce31 100644 (file)
@@ -3,19 +3,19 @@
  * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com)
  */
 
-#define EX_LD(x)               \
+#define EX_LD(x,y)             \
 98:    x;                      \
        .section __ex_table,"a";\
        .align 4;               \
-       .word 98b, __retl_one;  \
+       .word 98b, y;           \
        .text;                  \
        .align 4;
 
-#define EX_LD_FP(x)            \
+#define EX_LD_FP(x,y)          \
 98:    x;                      \
        .section __ex_table,"a";\
        .align 4;               \
-       .word 98b, __retl_one_fp;\
+       .word 98b, y##_fp;      \
        .text;                  \
        .align 4;
 
index 845139d7553720ce5fe98d6e30bcb11215f71e2f..c4ee858e352a2be0e028ac757dd399938e89db04 100644 (file)
@@ -3,19 +3,19 @@
  * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com)
  */
 
-#define EX_ST(x)               \
+#define EX_ST(x,y)             \
 98:    x;                      \
        .section __ex_table,"a";\
        .align 4;               \
-       .word 98b, __retl_one;  \
+       .word 98b, y;           \
        .text;                  \
        .align 4;
 
-#define EX_ST_FP(x)            \
+#define EX_ST_FP(x,y)          \
 98:    x;                      \
        .section __ex_table,"a";\
        .align 4;               \
-       .word 98b, __retl_one_fp;\
+       .word 98b, y##_fp;      \
        .text;                  \
        .align 4;
 
index 491ee69e49951fc2040640e77b896552872c8a0a..54f98706b03b2f53025adb99e086002a0629e9f0 100644 (file)
@@ -4,6 +4,7 @@
  */
 
 #ifdef __KERNEL__
+#include <linux/linkage.h>
 #include <asm/visasm.h>
 #include <asm/asi.h>
 #define GLOBAL_SPARE   %g7
 #endif
 
 #ifndef EX_LD
-#define EX_LD(x)       x
+#define EX_LD(x,y)     x
 #endif
 #ifndef EX_LD_FP
-#define EX_LD_FP(x)    x
+#define EX_LD_FP(x,y)  x
 #endif
 
 #ifndef EX_ST
-#define EX_ST(x)       x
+#define EX_ST(x,y)     x
 #endif
 #ifndef EX_ST_FP
-#define EX_ST_FP(x)    x
-#endif
-
-#ifndef EX_RETVAL
-#define EX_RETVAL(x)   x
+#define EX_ST_FP(x,y)  x
 #endif
 
 #ifndef LOAD
         */
 
        .text
+#ifndef EX_RETVAL
+#define EX_RETVAL(x)   x
+__restore_fp:
+       VISExitHalf
+       retl
+        nop
+ENTRY(U3_retl_o2_plus_g2_plus_g1_plus_1_fp)
+       add     %g1, 1, %g1
+       add     %g2, %g1, %g2
+       ba,pt   %xcc, __restore_fp
+        add    %o2, %g2, %o0
+ENDPROC(U3_retl_o2_plus_g2_plus_g1_plus_1_fp)
+ENTRY(U3_retl_o2_plus_g2_fp)
+       ba,pt   %xcc, __restore_fp
+        add    %o2, %g2, %o0
+ENDPROC(U3_retl_o2_plus_g2_fp)
+ENTRY(U3_retl_o2_plus_g2_plus_8_fp)
+       add     %g2, 8, %g2
+       ba,pt   %xcc, __restore_fp
+        add    %o2, %g2, %o0
+ENDPROC(U3_retl_o2_plus_g2_plus_8_fp)
+ENTRY(U3_retl_o2)
+       retl
+        mov    %o2, %o0
+ENDPROC(U3_retl_o2)
+ENTRY(U3_retl_o2_plus_1)
+       retl
+        add    %o2, 1, %o0
+ENDPROC(U3_retl_o2_plus_1)
+ENTRY(U3_retl_o2_plus_4)
+       retl
+        add    %o2, 4, %o0
+ENDPROC(U3_retl_o2_plus_4)
+ENTRY(U3_retl_o2_plus_8)
+       retl
+        add    %o2, 8, %o0
+ENDPROC(U3_retl_o2_plus_8)
+ENTRY(U3_retl_o2_plus_g1_plus_1)
+       add     %g1, 1, %g1
+       retl
+        add    %o2, %g1, %o0
+ENDPROC(U3_retl_o2_plus_g1_plus_1)
+ENTRY(U3_retl_o2_fp)
+       ba,pt   %xcc, __restore_fp
+        mov    %o2, %o0
+ENDPROC(U3_retl_o2_fp)
+ENTRY(U3_retl_o2_plus_o3_sll_6_plus_0x80_fp)
+       sll     %o3, 6, %o3
+       add     %o3, 0x80, %o3
+       ba,pt   %xcc, __restore_fp
+        add    %o2, %o3, %o0
+ENDPROC(U3_retl_o2_plus_o3_sll_6_plus_0x80_fp)
+ENTRY(U3_retl_o2_plus_o3_sll_6_plus_0x40_fp)
+       sll     %o3, 6, %o3
+       add     %o3, 0x40, %o3
+       ba,pt   %xcc, __restore_fp
+        add    %o2, %o3, %o0
+ENDPROC(U3_retl_o2_plus_o3_sll_6_plus_0x40_fp)
+ENTRY(U3_retl_o2_plus_GS_plus_0x10)
+       add     GLOBAL_SPARE, 0x10, GLOBAL_SPARE
+       retl
+        add    %o2, GLOBAL_SPARE, %o0
+ENDPROC(U3_retl_o2_plus_GS_plus_0x10)
+ENTRY(U3_retl_o2_plus_GS_plus_0x08)
+       add     GLOBAL_SPARE, 0x08, GLOBAL_SPARE
+       retl
+        add    %o2, GLOBAL_SPARE, %o0
+ENDPROC(U3_retl_o2_plus_GS_plus_0x08)
+ENTRY(U3_retl_o2_and_7_plus_GS)
+       and     %o2, 7, %o2
+       retl
+        add    %o2, GLOBAL_SPARE, %o2
+ENDPROC(U3_retl_o2_and_7_plus_GS)
+ENTRY(U3_retl_o2_and_7_plus_GS_plus_8)
+       add     GLOBAL_SPARE, 8, GLOBAL_SPARE
+       and     %o2, 7, %o2
+       retl
+        add    %o2, GLOBAL_SPARE, %o2
+ENDPROC(U3_retl_o2_and_7_plus_GS_plus_8)
+#endif
+
        .align          64
 
        /* The cheetah's flexible spine, oversized liver, enlarged heart,
@@ -126,8 +204,8 @@ FUNC_NAME:  /* %o0=dst, %o1=src, %o2=len */
         and            %g2, 0x38, %g2
 
 1:     subcc           %g1, 0x1, %g1
-       EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3))
-       EX_ST_FP(STORE(stb, %o3, %o1 + GLOBAL_SPARE))
+       EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3), U3_retl_o2_plus_g2_plus_g1_plus_1)
+       EX_ST_FP(STORE(stb, %o3, %o1 + GLOBAL_SPARE), U3_retl_o2_plus_g2_plus_g1_plus_1)
        bgu,pt          %XCC, 1b
         add            %o1, 0x1, %o1
 
@@ -138,20 +216,20 @@ FUNC_NAME:        /* %o0=dst, %o1=src, %o2=len */
        be,pt           %icc, 3f
         alignaddr      %o1, %g0, %o1
 
-       EX_LD_FP(LOAD(ldd, %o1, %f4))
-1:     EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6))
+       EX_LD_FP(LOAD(ldd, %o1, %f4), U3_retl_o2_plus_g2)
+1:     EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6), U3_retl_o2_plus_g2)
        add             %o1, 0x8, %o1
        subcc           %g2, 0x8, %g2
        faligndata      %f4, %f6, %f0
-       EX_ST_FP(STORE(std, %f0, %o0))
+       EX_ST_FP(STORE(std, %f0, %o0), U3_retl_o2_plus_g2_plus_8)
        be,pn           %icc, 3f
         add            %o0, 0x8, %o0
 
-       EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4))
+       EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4), U3_retl_o2_plus_g2)
        add             %o1, 0x8, %o1
        subcc           %g2, 0x8, %g2
        faligndata      %f6, %f4, %f2
-       EX_ST_FP(STORE(std, %f2, %o0))
+       EX_ST_FP(STORE(std, %f2, %o0), U3_retl_o2_plus_g2_plus_8)
        bne,pt          %icc, 1b
         add            %o0, 0x8, %o0
 
@@ -161,25 +239,25 @@ FUNC_NAME:        /* %o0=dst, %o1=src, %o2=len */
        LOAD(prefetch, %o1 + 0x080, #one_read)
        LOAD(prefetch, %o1 + 0x0c0, #one_read)
        LOAD(prefetch, %o1 + 0x100, #one_read)
-       EX_LD_FP(LOAD(ldd, %o1 + 0x000, %f0))
+       EX_LD_FP(LOAD(ldd, %o1 + 0x000, %f0), U3_retl_o2)
        LOAD(prefetch, %o1 + 0x140, #one_read)
-       EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2))
+       EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), U3_retl_o2)
        LOAD(prefetch, %o1 + 0x180, #one_read)
-       EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4))
+       EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), U3_retl_o2)
        LOAD(prefetch, %o1 + 0x1c0, #one_read)
        faligndata      %f0, %f2, %f16
-       EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6))
+       EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), U3_retl_o2)
        faligndata      %f2, %f4, %f18
-       EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8))
+       EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), U3_retl_o2)
        faligndata      %f4, %f6, %f20
-       EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10))
+       EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), U3_retl_o2)
        faligndata      %f6, %f8, %f22
 
-       EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12))
+       EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), U3_retl_o2)
        faligndata      %f8, %f10, %f24
-       EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14))
+       EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), U3_retl_o2)
        faligndata      %f10, %f12, %f26
-       EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0))
+       EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2)
 
        subcc           GLOBAL_SPARE, 0x80, GLOBAL_SPARE
        add             %o1, 0x40, %o1
@@ -190,26 +268,26 @@ FUNC_NAME:        /* %o0=dst, %o1=src, %o2=len */
 
        .align          64
 1:
-       EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2))
+       EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), U3_retl_o2_plus_o3_sll_6_plus_0x80)
        faligndata      %f12, %f14, %f28
-       EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4))
+       EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), U3_retl_o2_plus_o3_sll_6_plus_0x80)
        faligndata      %f14, %f0, %f30
-       EX_ST_FP(STORE_BLK(%f16, %o0))
-       EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6))
+       EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x80)
+       EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), U3_retl_o2_plus_o3_sll_6_plus_0x40)
        faligndata      %f0, %f2, %f16
        add             %o0, 0x40, %o0
 
-       EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8))
+       EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), U3_retl_o2_plus_o3_sll_6_plus_0x40)
        faligndata      %f2, %f4, %f18
-       EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10))
+       EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), U3_retl_o2_plus_o3_sll_6_plus_0x40)
        faligndata      %f4, %f6, %f20
-       EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12))
+       EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), U3_retl_o2_plus_o3_sll_6_plus_0x40)
        subcc           %o3, 0x01, %o3
        faligndata      %f6, %f8, %f22
-       EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14))
+       EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), U3_retl_o2_plus_o3_sll_6_plus_0x80)
 
        faligndata      %f8, %f10, %f24
-       EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0))
+       EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2_plus_o3_sll_6_plus_0x80)
        LOAD(prefetch, %o1 + 0x1c0, #one_read)
        faligndata      %f10, %f12, %f26
        bg,pt           %XCC, 1b
@@ -217,29 +295,29 @@ FUNC_NAME:        /* %o0=dst, %o1=src, %o2=len */
 
        /* Finally we copy the last full 64-byte block. */
 2:
-       EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2))
+       EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), U3_retl_o2_plus_o3_sll_6_plus_0x80)
        faligndata      %f12, %f14, %f28
-       EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4))
+       EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), U3_retl_o2_plus_o3_sll_6_plus_0x80)
        faligndata      %f14, %f0, %f30
-       EX_ST_FP(STORE_BLK(%f16, %o0))
-       EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6))
+       EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x80)
+       EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), U3_retl_o2_plus_o3_sll_6_plus_0x40)
        faligndata      %f0, %f2, %f16
-       EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8))
+       EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), U3_retl_o2_plus_o3_sll_6_plus_0x40)
        faligndata      %f2, %f4, %f18
-       EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10))
+       EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), U3_retl_o2_plus_o3_sll_6_plus_0x40)
        faligndata      %f4, %f6, %f20
-       EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12))
+       EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), U3_retl_o2_plus_o3_sll_6_plus_0x40)
        faligndata      %f6, %f8, %f22
-       EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14))
+       EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), U3_retl_o2_plus_o3_sll_6_plus_0x40)
        faligndata      %f8, %f10, %f24
        cmp             %g1, 0
        be,pt           %XCC, 1f
         add            %o0, 0x40, %o0
-       EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0))
+       EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2_plus_o3_sll_6_plus_0x40)
 1:     faligndata      %f10, %f12, %f26
        faligndata      %f12, %f14, %f28
        faligndata      %f14, %f0, %f30
-       EX_ST_FP(STORE_BLK(%f16, %o0))
+       EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x40)
        add             %o0, 0x40, %o0
        add             %o1, 0x40, %o1
        membar          #Sync
@@ -259,20 +337,20 @@ FUNC_NAME:        /* %o0=dst, %o1=src, %o2=len */
 
        sub             %o2, %g2, %o2
        be,a,pt         %XCC, 1f
-        EX_LD_FP(LOAD(ldd, %o1 + 0x00, %f0))
+        EX_LD_FP(LOAD(ldd, %o1 + 0x00, %f0), U3_retl_o2_plus_g2)
 
-1:     EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f2))
+1:     EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f2), U3_retl_o2_plus_g2)
        add             %o1, 0x8, %o1
        subcc           %g2, 0x8, %g2
        faligndata      %f0, %f2, %f8
-       EX_ST_FP(STORE(std, %f8, %o0))
+       EX_ST_FP(STORE(std, %f8, %o0), U3_retl_o2_plus_g2_plus_8)
        be,pn           %XCC, 2f
         add            %o0, 0x8, %o0
-       EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f0))
+       EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f0), U3_retl_o2_plus_g2)
        add             %o1, 0x8, %o1
        subcc           %g2, 0x8, %g2
        faligndata      %f2, %f0, %f8
-       EX_ST_FP(STORE(std, %f8, %o0))
+       EX_ST_FP(STORE(std, %f8, %o0), U3_retl_o2_plus_g2_plus_8)
        bne,pn          %XCC, 1b
         add            %o0, 0x8, %o0
 
@@ -292,30 +370,33 @@ FUNC_NAME:        /* %o0=dst, %o1=src, %o2=len */
         andcc          %o2, 0x8, %g0
        be,pt           %icc, 1f
         nop
-       EX_LD(LOAD(ldx, %o1, %o5))
-       EX_ST(STORE(stx, %o5, %o1 + %o3))
+       EX_LD(LOAD(ldx, %o1, %o5), U3_retl_o2)
+       EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2)
        add             %o1, 0x8, %o1
+       sub             %o2, 8, %o2
 
 1:     andcc           %o2, 0x4, %g0
        be,pt           %icc, 1f
         nop
-       EX_LD(LOAD(lduw, %o1, %o5))
-       EX_ST(STORE(stw, %o5, %o1 + %o3))
+       EX_LD(LOAD(lduw, %o1, %o5), U3_retl_o2)
+       EX_ST(STORE(stw, %o5, %o1 + %o3), U3_retl_o2)
        add             %o1, 0x4, %o1
+       sub             %o2, 4, %o2
 
 1:     andcc           %o2, 0x2, %g0
        be,pt           %icc, 1f
         nop
-       EX_LD(LOAD(lduh, %o1, %o5))
-       EX_ST(STORE(sth, %o5, %o1 + %o3))
+       EX_LD(LOAD(lduh, %o1, %o5), U3_retl_o2)
+       EX_ST(STORE(sth, %o5, %o1 + %o3), U3_retl_o2)
        add             %o1, 0x2, %o1
+       sub             %o2, 2, %o2
 
 1:     andcc           %o2, 0x1, %g0
        be,pt           %icc, 85f
         nop
-       EX_LD(LOAD(ldub, %o1, %o5))
+       EX_LD(LOAD(ldub, %o1, %o5), U3_retl_o2)
        ba,pt           %xcc, 85f
-        EX_ST(STORE(stb, %o5, %o1 + %o3))
+        EX_ST(STORE(stb, %o5, %o1 + %o3), U3_retl_o2)
 
        .align          64
 70: /* 16 < len <= 64 */
@@ -326,26 +407,26 @@ FUNC_NAME:        /* %o0=dst, %o1=src, %o2=len */
        andn            %o2, 0xf, GLOBAL_SPARE
        and             %o2, 0xf, %o2
 1:     subcc           GLOBAL_SPARE, 0x10, GLOBAL_SPARE
-       EX_LD(LOAD(ldx, %o1 + 0x00, %o5))
-       EX_LD(LOAD(ldx, %o1 + 0x08, %g1))
-       EX_ST(STORE(stx, %o5, %o1 + %o3))
+       EX_LD(LOAD(ldx, %o1 + 0x00, %o5), U3_retl_o2_plus_GS_plus_0x10)
+       EX_LD(LOAD(ldx, %o1 + 0x08, %g1), U3_retl_o2_plus_GS_plus_0x10)
+       EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2_plus_GS_plus_0x10)
        add             %o1, 0x8, %o1
-       EX_ST(STORE(stx, %g1, %o1 + %o3))
+       EX_ST(STORE(stx, %g1, %o1 + %o3), U3_retl_o2_plus_GS_plus_0x08)
        bgu,pt          %XCC, 1b
         add            %o1, 0x8, %o1
 73:    andcc           %o2, 0x8, %g0
        be,pt           %XCC, 1f
         nop
        sub             %o2, 0x8, %o2
-       EX_LD(LOAD(ldx, %o1, %o5))
-       EX_ST(STORE(stx, %o5, %o1 + %o3))
+       EX_LD(LOAD(ldx, %o1, %o5), U3_retl_o2_plus_8)
+       EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2_plus_8)
        add             %o1, 0x8, %o1
 1:     andcc           %o2, 0x4, %g0
        be,pt           %XCC, 1f
         nop
        sub             %o2, 0x4, %o2
-       EX_LD(LOAD(lduw, %o1, %o5))
-       EX_ST(STORE(stw, %o5, %o1 + %o3))
+       EX_LD(LOAD(lduw, %o1, %o5), U3_retl_o2_plus_4)
+       EX_ST(STORE(stw, %o5, %o1 + %o3), U3_retl_o2_plus_4)
        add             %o1, 0x4, %o1
 1:     cmp             %o2, 0
        be,pt           %XCC, 85f
@@ -361,8 +442,8 @@ FUNC_NAME:  /* %o0=dst, %o1=src, %o2=len */
        sub             %o2, %g1, %o2
 
 1:     subcc           %g1, 1, %g1
-       EX_LD(LOAD(ldub, %o1, %o5))
-       EX_ST(STORE(stb, %o5, %o1 + %o3))
+       EX_LD(LOAD(ldub, %o1, %o5), U3_retl_o2_plus_g1_plus_1)
+       EX_ST(STORE(stb, %o5, %o1 + %o3), U3_retl_o2_plus_g1_plus_1)
        bgu,pt          %icc, 1b
         add            %o1, 1, %o1
 
@@ -378,16 +459,16 @@ FUNC_NAME:        /* %o0=dst, %o1=src, %o2=len */
 
 8:     mov             64, %o3
        andn            %o1, 0x7, %o1
-       EX_LD(LOAD(ldx, %o1, %g2))
+       EX_LD(LOAD(ldx, %o1, %g2), U3_retl_o2)
        sub             %o3, %g1, %o3
        andn            %o2, 0x7, GLOBAL_SPARE
        sllx            %g2, %g1, %g2
-1:     EX_LD(LOAD(ldx, %o1 + 0x8, %g3))
+1:     EX_LD(LOAD(ldx, %o1 + 0x8, %g3), U3_retl_o2_and_7_plus_GS)
        subcc           GLOBAL_SPARE, 0x8, GLOBAL_SPARE
        add             %o1, 0x8, %o1
        srlx            %g3, %o3, %o5
        or              %o5, %g2, %o5
-       EX_ST(STORE(stx, %o5, %o0))
+       EX_ST(STORE(stx, %o5, %o0), U3_retl_o2_and_7_plus_GS_plus_8)
        add             %o0, 0x8, %o0
        bgu,pt          %icc, 1b
         sllx           %g3, %g1, %g2
@@ -407,8 +488,8 @@ FUNC_NAME:  /* %o0=dst, %o1=src, %o2=len */
 
 1:
        subcc           %o2, 4, %o2
-       EX_LD(LOAD(lduw, %o1, %g1))
-       EX_ST(STORE(stw, %g1, %o1 + %o3))
+       EX_LD(LOAD(lduw, %o1, %g1), U3_retl_o2_plus_4)
+       EX_ST(STORE(stw, %g1, %o1 + %o3), U3_retl_o2_plus_4)
        bgu,pt          %XCC, 1b
         add            %o1, 4, %o1
 
@@ -418,8 +499,8 @@ FUNC_NAME:  /* %o0=dst, %o1=src, %o2=len */
        .align          32
 90:
        subcc           %o2, 1, %o2
-       EX_LD(LOAD(ldub, %o1, %g1))
-       EX_ST(STORE(stb, %g1, %o1 + %o3))
+       EX_LD(LOAD(ldub, %o1, %g1), U3_retl_o2_plus_1)
+       EX_ST(STORE(stb, %g1, %o1 + %o3), U3_retl_o2_plus_1)
        bgu,pt          %XCC, 90b
         add            %o1, 1, %o1
        retl
index 62c2647bd5cefaa832edf622c641bd0f3c17e8dc..1c7b6a39b9424acdc887c6640f3e4e7139cf5ed1 100644 (file)
@@ -13,6 +13,7 @@
 #include <asm/ptrace.h>
 #include <asm/visasm.h>
 #include <asm/thread_info.h>
+#include <asm/export.h>
 
        /* On entry: %o5=current FPRS value, %g7 is callers address */
        /* May clobber %o5, %g1, %g2, %g3, %g7, %icc, %xcc */
@@ -79,3 +80,4 @@ vis1: ldub            [%g6 + TI_FPSAVED], %g3
 80:    jmpl            %g7 + %g0, %g0
         nop
 ENDPROC(VISenter)
+EXPORT_SYMBOL(VISenter)
index 86f60de07b0a3526b620e6f76a94233b6e42e393..c8b1cf71bc73227e490c5662fe18759d58c012c9 100644 (file)
@@ -6,6 +6,7 @@
  */
 
 #include <linux/linkage.h>
+#include <asm/export.h>
 
        .text
 ENTRY(__ashldi3)
@@ -33,3 +34,4 @@ ENTRY(__ashldi3)
        retl
         nop
 ENDPROC(__ashldi3)
+EXPORT_SYMBOL(__ashldi3)
index 6eb8ba2dd50e0de2a80a4c91bf734ac24017fd1c..4310256e796475b5e9ad3e1fb81b0839a521b027 100644 (file)
@@ -6,6 +6,7 @@
  */
 
 #include <linux/linkage.h>
+#include <asm/export.h>
 
        .text
 ENTRY(__ashrdi3)
@@ -35,3 +36,4 @@ ENTRY(__ashrdi3)
        jmpl    %o7 + 8, %g0
         nop
 ENDPROC(__ashrdi3)
+EXPORT_SYMBOL(__ashrdi3)
index a5c5a0279cccc7b3487e9361500cf8162a3c89ad..1c6a1bde51388ea0402bc4271f796d5935dda4fb 100644 (file)
@@ -6,6 +6,7 @@
 #include <linux/linkage.h>
 #include <asm/asi.h>
 #include <asm/backoff.h>
+#include <asm/export.h>
 
        .text
 
@@ -29,6 +30,7 @@ ENTRY(atomic_##op) /* %o0 = increment, %o1 = atomic_ptr */            \
         nop;                                                           \
 2:     BACKOFF_SPIN(%o2, %o3, 1b);                                     \
 ENDPROC(atomic_##op);                                                  \
+EXPORT_SYMBOL(atomic_##op);
 
 #define ATOMIC_OP_RETURN(op)                                           \
 ENTRY(atomic_##op##_return) /* %o0 = increment, %o1 = atomic_ptr */    \
@@ -42,7 +44,8 @@ ENTRY(atomic_##op##_return) /* %o0 = increment, %o1 = atomic_ptr */   \
        retl;                                                           \
         sra    %g1, 0, %o0;                                            \
 2:     BACKOFF_SPIN(%o2, %o3, 1b);                                     \
-ENDPROC(atomic_##op##_return);
+ENDPROC(atomic_##op##_return);                                         \
+EXPORT_SYMBOL(atomic_##op##_return);
 
 #define ATOMIC_FETCH_OP(op)                                            \
 ENTRY(atomic_fetch_##op) /* %o0 = increment, %o1 = atomic_ptr */       \
@@ -56,7 +59,8 @@ ENTRY(atomic_fetch_##op) /* %o0 = increment, %o1 = atomic_ptr */      \
        retl;                                                           \
         sra    %g1, 0, %o0;                                            \
 2:     BACKOFF_SPIN(%o2, %o3, 1b);                                     \
-ENDPROC(atomic_fetch_##op);
+ENDPROC(atomic_fetch_##op);                                            \
+EXPORT_SYMBOL(atomic_fetch_##op);
 
 #define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op)
 
@@ -88,6 +92,7 @@ ENTRY(atomic64_##op) /* %o0 = increment, %o1 = atomic_ptr */          \
         nop;                                                           \
 2:     BACKOFF_SPIN(%o2, %o3, 1b);                                     \
 ENDPROC(atomic64_##op);                                                        \
+EXPORT_SYMBOL(atomic64_##op);
 
 #define ATOMIC64_OP_RETURN(op)                                         \
 ENTRY(atomic64_##op##_return) /* %o0 = increment, %o1 = atomic_ptr */  \
@@ -101,7 +106,8 @@ ENTRY(atomic64_##op##_return) /* %o0 = increment, %o1 = atomic_ptr */       \
        retl;                                                           \
         op     %g1, %o0, %o0;                                          \
 2:     BACKOFF_SPIN(%o2, %o3, 1b);                                     \
-ENDPROC(atomic64_##op##_return);
+ENDPROC(atomic64_##op##_return);                                       \
+EXPORT_SYMBOL(atomic64_##op##_return);
 
 #define ATOMIC64_FETCH_OP(op)                                          \
 ENTRY(atomic64_fetch_##op) /* %o0 = increment, %o1 = atomic_ptr */     \
@@ -115,7 +121,8 @@ ENTRY(atomic64_fetch_##op) /* %o0 = increment, %o1 = atomic_ptr */  \
        retl;                                                           \
         mov    %g1, %o0;                                               \
 2:     BACKOFF_SPIN(%o2, %o3, 1b);                                     \
-ENDPROC(atomic64_fetch_##op);
+ENDPROC(atomic64_fetch_##op);                                          \
+EXPORT_SYMBOL(atomic64_fetch_##op);
 
 #define ATOMIC64_OPS(op) ATOMIC64_OP(op) ATOMIC64_OP_RETURN(op) ATOMIC64_FETCH_OP(op)
 
@@ -147,3 +154,4 @@ ENTRY(atomic64_dec_if_positive) /* %o0 = atomic_ptr */
         sub    %g1, 1, %o0
 2:     BACKOFF_SPIN(%o2, %o3, 1b)
 ENDPROC(atomic64_dec_if_positive)
+EXPORT_SYMBOL(atomic64_dec_if_positive)
index 36f72cc0e67e682dc347d7efe66ae7c7c2eb5658..7031bf1587cb6136569f9f56af3c502a77f1350f 100644 (file)
@@ -6,6 +6,7 @@
 #include <linux/linkage.h>
 #include <asm/asi.h>
 #include <asm/backoff.h>
+#include <asm/export.h>
 
        .text
 
@@ -29,6 +30,7 @@ ENTRY(test_and_set_bit)       /* %o0=nr, %o1=addr */
         nop
 2:     BACKOFF_SPIN(%o3, %o4, 1b)
 ENDPROC(test_and_set_bit)
+EXPORT_SYMBOL(test_and_set_bit)
 
 ENTRY(test_and_clear_bit) /* %o0=nr, %o1=addr */
        BACKOFF_SETUP(%o3)
@@ -50,6 +52,7 @@ ENTRY(test_and_clear_bit) /* %o0=nr, %o1=addr */
         nop
 2:     BACKOFF_SPIN(%o3, %o4, 1b)
 ENDPROC(test_and_clear_bit)
+EXPORT_SYMBOL(test_and_clear_bit)
 
 ENTRY(test_and_change_bit) /* %o0=nr, %o1=addr */
        BACKOFF_SETUP(%o3)
@@ -71,6 +74,7 @@ ENTRY(test_and_change_bit) /* %o0=nr, %o1=addr */
         nop
 2:     BACKOFF_SPIN(%o3, %o4, 1b)
 ENDPROC(test_and_change_bit)
+EXPORT_SYMBOL(test_and_change_bit)
 
 ENTRY(set_bit) /* %o0=nr, %o1=addr */
        BACKOFF_SETUP(%o3)
@@ -90,6 +94,7 @@ ENTRY(set_bit) /* %o0=nr, %o1=addr */
         nop
 2:     BACKOFF_SPIN(%o3, %o4, 1b)
 ENDPROC(set_bit)
+EXPORT_SYMBOL(set_bit)
 
 ENTRY(clear_bit) /* %o0=nr, %o1=addr */
        BACKOFF_SETUP(%o3)
@@ -109,6 +114,7 @@ ENTRY(clear_bit) /* %o0=nr, %o1=addr */
         nop
 2:     BACKOFF_SPIN(%o3, %o4, 1b)
 ENDPROC(clear_bit)
+EXPORT_SYMBOL(clear_bit)
 
 ENTRY(change_bit) /* %o0=nr, %o1=addr */
        BACKOFF_SETUP(%o3)
@@ -128,3 +134,4 @@ ENTRY(change_bit) /* %o0=nr, %o1=addr */
         nop
 2:     BACKOFF_SPIN(%o3, %o4, 1b)
 ENDPROC(change_bit)
+EXPORT_SYMBOL(change_bit)
index 3c771011ff4b036d04aa424e3861972d08317517..1f2692d59d18ebe4a206d6ae27d875b1292da852 100644 (file)
@@ -6,6 +6,7 @@
 
 #include <linux/linkage.h>
 #include <asm/page.h>
+#include <asm/export.h>
 
        /* Zero out 64 bytes of memory at (buf + offset).
         * Assumes %g1 contains zero.
@@ -64,6 +65,7 @@ ENTRY(bzero_1page)
        retl
         nop
 ENDPROC(bzero_1page)
+EXPORT_SYMBOL(bzero_1page)
 
 ENTRY(__copy_1page)
 /* NOTE: If you change the number of insns of this routine, please check
@@ -87,3 +89,4 @@ ENTRY(__copy_1page)
        retl
         nop
 ENDPROC(__copy_1page)
+EXPORT_SYMBOL(__copy_1page)
index 8c058114b64901ea507d70c8b1a0f59f2c50372f..3bb1914c4fa487aecd598e11b4bd74687287c441 100644 (file)
@@ -5,6 +5,7 @@
  */
 
 #include <linux/linkage.h>
+#include <asm/export.h>
 
        .text
 
@@ -78,6 +79,8 @@ __bzero_done:
         mov            %o3, %o0
 ENDPROC(__bzero)
 ENDPROC(memset)
+EXPORT_SYMBOL(__bzero)
+EXPORT_SYMBOL(memset)
 
 #define EX_ST(x,y)             \
 98:    x,y;                    \
@@ -143,3 +146,4 @@ __clear_user_done:
        retl
         clr            %o0
 ENDPROC(__clear_user)
+EXPORT_SYMBOL(__clear_user)
index 0084c3361e15afa77530071ada7a6e30c3ece3b5..c9d8b62321116aa7f472904afae12793c57c0527 100644 (file)
@@ -14,6 +14,7 @@
  */
 
 #include <asm/errno.h>
+#include <asm/export.h>
 
 #define CSUM_BIGCHUNK(buf, offset, sum, t0, t1, t2, t3, t4, t5)        \
        ldd     [buf + offset + 0x00], t0;                      \
@@ -104,6 +105,7 @@ csum_partial_fix_alignment:
         * buffer of size 0x20.  Follow the code path for that case.
         */
        .globl  csum_partial
+       EXPORT_SYMBOL(csum_partial)
 csum_partial:                  /* %o0=buf, %o1=len, %o2=sum */
        andcc   %o0, 0x7, %g0                           ! alignment problems?
        bne     csum_partial_fix_alignment              ! yep, handle it
@@ -335,6 +337,7 @@ cc_dword_align:
         */
        .align  8
        .globl  __csum_partial_copy_sparc_generic
+       EXPORT_SYMBOL(__csum_partial_copy_sparc_generic)
 __csum_partial_copy_sparc_generic:
                                        /* %o0=src, %o1=dest, %g1=len, %g7=sum */
        xor     %o0, %o1, %o4           ! get changing bits
index 1d230f693dc4b1806e4c80b8df2918abeee878b8..f6732174fe6bdcceeb4228e64a92e2357cd81e00 100644 (file)
@@ -13,6 +13,7 @@
  *     BSD4.4 portable checksum routine
  */
 
+#include <asm/export.h>
        .text
 
 csum_partial_fix_alignment:
@@ -37,6 +38,7 @@ csum_partial_fix_alignment:
 
        .align          32
        .globl          csum_partial
+       EXPORT_SYMBOL(csum_partial)
 csum_partial:          /* %o0=buff, %o1=len, %o2=sum */
        prefetch        [%o0 + 0x000], #n_reads
        clr             %o4
index 46272dfc26e81e9ea4882b6789636f70bd8c5c47..f30d6b78afbd658fbcb9009fe34f6ce0bc2e99e0 100644 (file)
@@ -10,6 +10,7 @@
 #include <asm/pgtable.h>
 #include <asm/spitfire.h>
 #include <asm/head.h>
+#include <asm/export.h>
 
        /* What we used to do was lock a TLB entry into a specific
         * TLB slot, clear the page with interrupts disabled, then
@@ -26,6 +27,7 @@
        .text
 
        .globl          _clear_page
+       EXPORT_SYMBOL(_clear_page)
 _clear_page:           /* %o0=dest */
        ba,pt           %xcc, clear_page_common
         clr            %o4
@@ -35,6 +37,7 @@ _clear_page:          /* %o0=dest */
         */
        .align          32
        .globl          clear_user_page
+       EXPORT_SYMBOL(clear_user_page)
 clear_user_page:       /* %o0=dest, %o1=vaddr */
        lduw            [%g6 + TI_PRE_COUNT], %o2
        sethi           %hi(PAGE_OFFSET), %g2
index 302c0e60dc2ceb48a3212840ac7891f337419d14..0252b218de45ac0685c440de07d0d2d0891834e9 100644 (file)
@@ -5,21 +5,37 @@
 
 #include <linux/linkage.h>
 #include <asm/asi.h>
+#include <asm/export.h>
 
 #define XCC xcc
 
-#define EX(x,y)                        \
+#define EX(x,y,z)              \
 98:    x,y;                    \
        .section __ex_table,"a";\
        .align 4;               \
-       .word 98b, __retl_one;  \
+       .word 98b, z;           \
        .text;                  \
        .align 4;
 
+#define EX_O4(x,y) EX(x,y,__retl_o4_plus_8)
+#define EX_O2_4(x,y) EX(x,y,__retl_o2_plus_4)
+#define EX_O2_1(x,y) EX(x,y,__retl_o2_plus_1)
+
        .register       %g2,#scratch
        .register       %g3,#scratch
 
        .text
+__retl_o4_plus_8:
+       add     %o4, %o2, %o4
+       retl
+        add    %o4, 8, %o0
+__retl_o2_plus_4:
+       retl
+        add    %o2, 4, %o0
+__retl_o2_plus_1:
+       retl
+        add    %o2, 1, %o0
+
        .align  32
 
        /* Don't try to get too fancy here, just nice and
@@ -44,8 +60,8 @@ ENTRY(___copy_in_user)        /* %o0=dst, %o1=src, %o2=len */
        andn            %o2, 0x7, %o4
        and             %o2, 0x7, %o2
 1:     subcc           %o4, 0x8, %o4
-       EX(ldxa [%o1] %asi, %o5)
-       EX(stxa %o5, [%o0] %asi)
+       EX_O4(ldxa [%o1] %asi, %o5)
+       EX_O4(stxa %o5, [%o0] %asi)
        add             %o1, 0x8, %o1
        bgu,pt          %XCC, 1b
         add            %o0, 0x8, %o0
@@ -53,8 +69,8 @@ ENTRY(___copy_in_user)        /* %o0=dst, %o1=src, %o2=len */
        be,pt           %XCC, 1f
         nop
        sub             %o2, 0x4, %o2
-       EX(lduwa [%o1] %asi, %o5)
-       EX(stwa %o5, [%o0] %asi)
+       EX_O2_4(lduwa [%o1] %asi, %o5)
+       EX_O2_4(stwa %o5, [%o0] %asi)
        add             %o1, 0x4, %o1
        add             %o0, 0x4, %o0
 1:     cmp             %o2, 0
@@ -70,8 +86,8 @@ ENTRY(___copy_in_user)        /* %o0=dst, %o1=src, %o2=len */
 
 82:
        subcc           %o2, 4, %o2
-       EX(lduwa [%o1] %asi, %g1)
-       EX(stwa %g1, [%o0] %asi)
+       EX_O2_4(lduwa [%o1] %asi, %g1)
+       EX_O2_4(stwa %g1, [%o0] %asi)
        add             %o1, 4, %o1
        bgu,pt          %XCC, 82b
         add            %o0, 4, %o0
@@ -82,11 +98,12 @@ ENTRY(___copy_in_user)      /* %o0=dst, %o1=src, %o2=len */
        .align  32
 90:
        subcc           %o2, 1, %o2
-       EX(lduba [%o1] %asi, %g1)
-       EX(stba %g1, [%o0] %asi)
+       EX_O2_1(lduba [%o1] %asi, %g1)
+       EX_O2_1(stba %g1, [%o0] %asi)
        add             %o1, 1, %o1
        bgu,pt          %XCC, 90b
         add            %o0, 1, %o0
        retl
         clr            %o0
 ENDPROC(___copy_in_user)
+EXPORT_SYMBOL(___copy_in_user)
index dd16c61f3263689f05b52d2da71f7c40685f1a0f..7197b72508951b68b7a1cc91087e29675bea183f 100644 (file)
@@ -10,6 +10,7 @@
 #include <asm/pgtable.h>
 #include <asm/spitfire.h>
 #include <asm/head.h>
+#include <asm/export.h>
 
        /* What we used to do was lock a TLB entry into a specific
         * TLB slot, clear the page with interrupts disabled, then
@@ -44,6 +45,7 @@
        .align          32
        .globl          copy_user_page
        .type           copy_user_page,#function
+       EXPORT_SYMBOL(copy_user_page)
 copy_user_page:                /* %o0=dest, %o1=src, %o2=vaddr */
        lduw            [%g6 + TI_PRE_COUNT], %o4
        sethi           %hi(PAGE_OFFSET), %g2
index ef095b6c43b157dc7ad076b641893b59e25775de..cea644dc67a628e8db216d6d5934fad2ec15e899 100644 (file)
@@ -15,6 +15,7 @@
 #include <asm/asmmacro.h>
 #include <asm/page.h>
 #include <asm/thread_info.h>
+#include <asm/export.h>
 
 /* Work around cpp -rob */
 #define ALLOC #alloc
 __copy_user_begin:
 
        .globl  __copy_user
+       EXPORT_SYMBOL(__copy_user)
 dword_align:
        andcc   %o1, 1, %g0
        be      4f
index e566c770a0f6372de5e4e7856ff520a9ac8920ac..0ecbafc30fd00e21cd7bd453d5e09d99349b8a82 100644 (file)
@@ -3,6 +3,8 @@
  * Copyright (C) 2005 David S. Miller <davem@davemloft.net>
  */
 
+#include <asm/export.h>
+
 #ifdef __KERNEL__
 #define GLOBAL_SPARE   %g7
 #else
@@ -63,6 +65,7 @@
         add            %o5, %o4, %o4
 
        .globl          FUNC_NAME
+       EXPORT_SYMBOL(FUNC_NAME)
 FUNC_NAME:             /* %o0=src, %o1=dst, %o2=len, %o3=sum */
        LOAD(prefetch, %o0 + 0x000, #n_reads)
        xor             %o0, %o1, %g1
index 9614b48b6ef839fe52d8dcb463419634fe2d5770..a2b5a976be33612cce76a5b76c531b5bd1f06adb 100644 (file)
@@ -17,6 +17,7 @@ along with GNU CC; see the file COPYING.  If not, write to
 the Free Software Foundation, 59 Temple Place - Suite 330,
 Boston, MA 02111-1307, USA.  */
 
+#include <asm/export.h>
        .text
        .align 4
        .globl __divdi3
@@ -279,3 +280,4 @@ __divdi3:
 .LL81:
        ret
        restore
+EXPORT_SYMBOL(__divdi3)
index b39389f6989954aa6d9c0455d0fb3e99216763fa..23aab144d28e5b67b4db11710c9afb43746af6d4 100644 (file)
@@ -1,4 +1,5 @@
 #include <linux/linkage.h>
+#include <asm/export.h>
 
        .register       %g2,#scratch
 
@@ -65,6 +66,8 @@ ENTRY(__ffs)
         add    %o2, %g1, %o0
 ENDPROC(ffs)
 ENDPROC(__ffs)
+EXPORT_SYMBOL(__ffs)
+EXPORT_SYMBOL(ffs)
 
        .section        .popc_6insn_patch, "ax"
        .word           ffs
index 95414e0a6808d596f85c4809c519914499c28d1d..f9985f129fb68e2c599b944a3be5c0a4ebddb8b3 100644 (file)
@@ -1,4 +1,5 @@
 #include <linux/linkage.h>
+#include <asm/export.h>
 
        .text
        .align  32
@@ -7,6 +8,7 @@ ENTRY(__arch_hweight8)
         nop
        nop
 ENDPROC(__arch_hweight8)
+EXPORT_SYMBOL(__arch_hweight8)
        .section        .popc_3insn_patch, "ax"
        .word           __arch_hweight8
        sllx            %o0, 64-8, %g1
@@ -19,6 +21,7 @@ ENTRY(__arch_hweight16)
         nop
        nop
 ENDPROC(__arch_hweight16)
+EXPORT_SYMBOL(__arch_hweight16)
        .section        .popc_3insn_patch, "ax"
        .word           __arch_hweight16
        sllx            %o0, 64-16, %g1
@@ -31,6 +34,7 @@ ENTRY(__arch_hweight32)
         nop
        nop
 ENDPROC(__arch_hweight32)
+EXPORT_SYMBOL(__arch_hweight32)
        .section        .popc_3insn_patch, "ax"
        .word           __arch_hweight32
        sllx            %o0, 64-32, %g1
@@ -43,6 +47,7 @@ ENTRY(__arch_hweight64)
         nop
        nop
 ENDPROC(__arch_hweight64)
+EXPORT_SYMBOL(__arch_hweight64)
        .section        .popc_3insn_patch, "ax"
        .word           __arch_hweight64
        retl
index 4742d59029ee3677d0a77cd121d4437bb823e151..5d61648b53dd29d62225fb6053a6d1a9e1d5d7e5 100644 (file)
@@ -1,4 +1,5 @@
 #include <linux/linkage.h>
+#include <asm/export.h>
 
        .text
 ENTRY(ip_fast_csum) /* %o0 = iph, %o1 = ihl */
@@ -31,3 +32,4 @@ ENTRY(ip_fast_csum) /* %o0 = iph, %o1 = ihl */
        retl
         and    %o2, %o1, %o0
 ENDPROC(ip_fast_csum)
+EXPORT_SYMBOL(ip_fast_csum)
diff --git a/arch/sparc/lib/ksyms.c b/arch/sparc/lib/ksyms.c
deleted file mode 100644 (file)
index de5e978..0000000
+++ /dev/null
@@ -1,174 +0,0 @@
-/*
- * Export of symbols defined in assembler
- */
-
-/* Tell string.h we don't want memcpy etc. as cpp defines */
-#define EXPORT_SYMTAB_STROPS
-
-#include <linux/module.h>
-#include <linux/string.h>
-#include <linux/types.h>
-
-#include <asm/checksum.h>
-#include <asm/uaccess.h>
-#include <asm/ftrace.h>
-
-/* string functions */
-EXPORT_SYMBOL(strlen);
-EXPORT_SYMBOL(strncmp);
-
-/* mem* functions */
-extern void *__memscan_zero(void *, size_t);
-extern void *__memscan_generic(void *, int, size_t);
-extern void *__bzero(void *, size_t);
-
-EXPORT_SYMBOL(memscan);
-EXPORT_SYMBOL(__memscan_zero);
-EXPORT_SYMBOL(__memscan_generic);
-EXPORT_SYMBOL(memcmp);
-EXPORT_SYMBOL(memcpy);
-EXPORT_SYMBOL(memset);
-EXPORT_SYMBOL(memmove);
-EXPORT_SYMBOL(__bzero);
-
-/* Networking helper routines. */
-EXPORT_SYMBOL(csum_partial);
-
-#ifdef CONFIG_MCOUNT
-EXPORT_SYMBOL(_mcount);
-#endif
-
-/*
- * sparc
- */
-#ifdef CONFIG_SPARC32
-extern int __ashrdi3(int, int);
-extern int __ashldi3(int, int);
-extern int __lshrdi3(int, int);
-extern int __muldi3(int, int);
-extern int __divdi3(int, int);
-
-extern void (*__copy_1page)(void *, const void *);
-extern void (*bzero_1page)(void *);
-
-extern void ___rw_read_enter(void);
-extern void ___rw_read_try(void);
-extern void ___rw_read_exit(void);
-extern void ___rw_write_enter(void);
-
-/* Networking helper routines. */
-EXPORT_SYMBOL(__csum_partial_copy_sparc_generic);
-
-/* Special internal versions of library functions. */
-EXPORT_SYMBOL(__copy_1page);
-EXPORT_SYMBOL(__memmove);
-EXPORT_SYMBOL(bzero_1page);
-
-/* Moving data to/from/in userspace. */
-EXPORT_SYMBOL(__copy_user);
-
-/* Used by asm/spinlock.h */
-#ifdef CONFIG_SMP
-EXPORT_SYMBOL(___rw_read_enter);
-EXPORT_SYMBOL(___rw_read_try);
-EXPORT_SYMBOL(___rw_read_exit);
-EXPORT_SYMBOL(___rw_write_enter);
-#endif
-
-EXPORT_SYMBOL(__ashrdi3);
-EXPORT_SYMBOL(__ashldi3);
-EXPORT_SYMBOL(__lshrdi3);
-EXPORT_SYMBOL(__muldi3);
-EXPORT_SYMBOL(__divdi3);
-#endif
-
-/*
- * sparc64
- */
-#ifdef CONFIG_SPARC64
-/* Networking helper routines. */
-EXPORT_SYMBOL(csum_partial_copy_nocheck);
-EXPORT_SYMBOL(__csum_partial_copy_from_user);
-EXPORT_SYMBOL(__csum_partial_copy_to_user);
-EXPORT_SYMBOL(ip_fast_csum);
-
-/* Moving data to/from/in userspace. */
-EXPORT_SYMBOL(___copy_to_user);
-EXPORT_SYMBOL(___copy_from_user);
-EXPORT_SYMBOL(___copy_in_user);
-EXPORT_SYMBOL(__clear_user);
-
-/* Atomic counter implementation. */
-#define ATOMIC_OP(op)                                                  \
-EXPORT_SYMBOL(atomic_##op);                                            \
-EXPORT_SYMBOL(atomic64_##op);
-
-#define ATOMIC_OP_RETURN(op)                                           \
-EXPORT_SYMBOL(atomic_##op##_return);                                   \
-EXPORT_SYMBOL(atomic64_##op##_return);
-
-#define ATOMIC_FETCH_OP(op)                                            \
-EXPORT_SYMBOL(atomic_fetch_##op);                                      \
-EXPORT_SYMBOL(atomic64_fetch_##op);
-
-#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_OP_RETURN(op) ATOMIC_FETCH_OP(op)
-
-ATOMIC_OPS(add)
-ATOMIC_OPS(sub)
-
-#undef ATOMIC_OPS
-#define ATOMIC_OPS(op) ATOMIC_OP(op) ATOMIC_FETCH_OP(op)
-
-ATOMIC_OPS(and)
-ATOMIC_OPS(or)
-ATOMIC_OPS(xor)
-
-#undef ATOMIC_OPS
-#undef ATOMIC_FETCH_OP
-#undef ATOMIC_OP_RETURN
-#undef ATOMIC_OP
-
-EXPORT_SYMBOL(atomic64_dec_if_positive);
-
-/* Atomic bit operations. */
-EXPORT_SYMBOL(test_and_set_bit);
-EXPORT_SYMBOL(test_and_clear_bit);
-EXPORT_SYMBOL(test_and_change_bit);
-EXPORT_SYMBOL(set_bit);
-EXPORT_SYMBOL(clear_bit);
-EXPORT_SYMBOL(change_bit);
-
-/* Special internal versions of library functions. */
-EXPORT_SYMBOL(_clear_page);
-EXPORT_SYMBOL(clear_user_page);
-EXPORT_SYMBOL(copy_user_page);
-
-/* RAID code needs this */
-void VISenter(void);
-EXPORT_SYMBOL(VISenter);
-
-extern void xor_vis_2(unsigned long, unsigned long *, unsigned long *);
-extern void xor_vis_3(unsigned long, unsigned long *, unsigned long *,
-               unsigned long *);
-extern void xor_vis_4(unsigned long, unsigned long *, unsigned long *,
-               unsigned long *, unsigned long *);
-extern void xor_vis_5(unsigned long, unsigned long *, unsigned long *,
-               unsigned long *, unsigned long *, unsigned long *);
-EXPORT_SYMBOL(xor_vis_2);
-EXPORT_SYMBOL(xor_vis_3);
-EXPORT_SYMBOL(xor_vis_4);
-EXPORT_SYMBOL(xor_vis_5);
-
-extern void xor_niagara_2(unsigned long, unsigned long *, unsigned long *);
-extern void xor_niagara_3(unsigned long, unsigned long *, unsigned long *,
-               unsigned long *);
-extern void xor_niagara_4(unsigned long, unsigned long *, unsigned long *,
-               unsigned long *, unsigned long *);
-extern void xor_niagara_5(unsigned long, unsigned long *, unsigned long *,
-               unsigned long *, unsigned long *, unsigned long *);
-
-EXPORT_SYMBOL(xor_niagara_2);
-EXPORT_SYMBOL(xor_niagara_3);
-EXPORT_SYMBOL(xor_niagara_4);
-EXPORT_SYMBOL(xor_niagara_5);
-#endif
index 64f53f2b673de1cb91a80dc2eb3929ac65ad2db8..f38c4e59d078fefec684bc68bad68865e1945662 100644 (file)
@@ -10,6 +10,7 @@
 #include <asm/psr.h>
 #include <asm/smp.h>
 #include <asm/spinlock.h>
+#include <asm/export.h>
 
        .text
        .align  4
@@ -48,6 +49,7 @@ ___rw_write_enter_spin_on_wlock:
         ld     [%g1], %g2
 
        .globl  ___rw_read_enter
+EXPORT_SYMBOL(___rw_read_enter)
 ___rw_read_enter:
        orcc    %g2, 0x0, %g0
        bne,a   ___rw_read_enter_spin_on_wlock
@@ -59,6 +61,7 @@ ___rw_read_enter:
         mov    %g4, %o7
 
        .globl  ___rw_read_exit
+EXPORT_SYMBOL(___rw_read_exit)
 ___rw_read_exit:
        orcc    %g2, 0x0, %g0
        bne,a   ___rw_read_exit_spin_on_wlock
@@ -70,6 +73,7 @@ ___rw_read_exit:
         mov    %g4, %o7
 
        .globl  ___rw_read_try
+EXPORT_SYMBOL(___rw_read_try)
 ___rw_read_try:
        orcc    %g2, 0x0, %g0
        bne     ___rw_read_try_spin_on_wlock
@@ -81,6 +85,7 @@ ___rw_read_try:
         mov    %g4, %o7
 
        .globl  ___rw_write_enter
+EXPORT_SYMBOL(___rw_write_enter)
 ___rw_write_enter:
        orcc    %g2, 0x0, %g0
        bne     ___rw_write_enter_spin_on_wlock
index 60ebc7cdbee04a89b772b4ea28d515f86a5cae0e..c9b9373f8d8104587dc5b001f207ddc880775689 100644 (file)
@@ -1,4 +1,5 @@
 #include <linux/linkage.h>
+#include <asm/export.h>
 
 ENTRY(__lshrdi3)
        cmp     %o2, 0
@@ -25,3 +26,4 @@ ENTRY(__lshrdi3)
        retl 
         nop 
 ENDPROC(__lshrdi3)
+EXPORT_SYMBOL(__lshrdi3)
index 0b0ed4d34219bc6dfccd1a0ab337727b2417a5b0..194f383611c02f815431f90dfd662c3718f3b4a8 100644 (file)
@@ -6,6 +6,7 @@
  */
 
 #include <linux/linkage.h>
+#include <asm/export.h>
 
 /*
  * This is the main variant and is called by C code.  GCC's -pg option
@@ -16,6 +17,7 @@
        .align          32
        .globl          _mcount
        .type           _mcount,#function
+       EXPORT_SYMBOL(_mcount)
        .globl          mcount
        .type           mcount,#function
 _mcount:
index efa106c41ed0af7777d4a6a8b70ca9585d9b694d..cee7f30dbb61352cb33552ed1240821269c76dfa 100644 (file)
@@ -6,6 +6,7 @@
 
 #include <linux/linkage.h>
 #include <asm/asm.h>
+#include <asm/export.h>
 
        .text
 ENTRY(memcmp)
@@ -25,3 +26,4 @@ ENTRY(memcmp)
 2:     retl
         mov    0, %o0
 ENDPROC(memcmp)
+EXPORT_SYMBOL(memcmp)
index 4d8c497517bd6f3f17110f2a3b4e6b6fe204e39e..8913feaa7ac7e1bb0ec677a531c58f6cc946b948 100644 (file)
@@ -7,6 +7,7 @@
  * Copyright (C) 1996 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
  */
 
+#include <asm/export.h>
 #define FUNC(x)                \
        .globl  x;              \
        .type   x,@function;    \
@@ -58,93 +59,11 @@ x:
        stb     %t0, [%dst - (offset) - 0x02]; \
        stb     %t1, [%dst - (offset) - 0x01];
 
-/* Both these macros have to start with exactly the same insn */
-#define RMOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
-       ldd     [%src - (offset) - 0x20], %t0; \
-       ldd     [%src - (offset) - 0x18], %t2; \
-       ldd     [%src - (offset) - 0x10], %t4; \
-       ldd     [%src - (offset) - 0x08], %t6; \
-       st      %t0, [%dst - (offset) - 0x20]; \
-       st      %t1, [%dst - (offset) - 0x1c]; \
-       st      %t2, [%dst - (offset) - 0x18]; \
-       st      %t3, [%dst - (offset) - 0x14]; \
-       st      %t4, [%dst - (offset) - 0x10]; \
-       st      %t5, [%dst - (offset) - 0x0c]; \
-       st      %t6, [%dst - (offset) - 0x08]; \
-       st      %t7, [%dst - (offset) - 0x04];
-
-#define RMOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
-       ldd     [%src - (offset) - 0x20], %t0; \
-       ldd     [%src - (offset) - 0x18], %t2; \
-       ldd     [%src - (offset) - 0x10], %t4; \
-       ldd     [%src - (offset) - 0x08], %t6; \
-       std     %t0, [%dst - (offset) - 0x20]; \
-       std     %t2, [%dst - (offset) - 0x18]; \
-       std     %t4, [%dst - (offset) - 0x10]; \
-       std     %t6, [%dst - (offset) - 0x08];
-
-#define RMOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \
-       ldd     [%src + (offset) + 0x00], %t0; \
-       ldd     [%src + (offset) + 0x08], %t2; \
-       st      %t0, [%dst + (offset) + 0x00]; \
-       st      %t1, [%dst + (offset) + 0x04]; \
-       st      %t2, [%dst + (offset) + 0x08]; \
-       st      %t3, [%dst + (offset) + 0x0c];
-
-#define RMOVE_SHORTCHUNK(src, dst, offset, t0, t1) \
-       ldub    [%src + (offset) + 0x00], %t0; \
-       ldub    [%src + (offset) + 0x01], %t1; \
-       stb     %t0, [%dst + (offset) + 0x00]; \
-       stb     %t1, [%dst + (offset) + 0x01];
-
-#define SMOVE_CHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, prev, shil, shir, offset2) \
-       ldd     [%src + (offset) + 0x00], %t0; \
-       ldd     [%src + (offset) + 0x08], %t2; \
-       srl     %t0, shir, %t5; \
-       srl     %t1, shir, %t6; \
-       sll     %t0, shil, %t0; \
-       or      %t5, %prev, %t5; \
-       sll     %t1, shil, %prev; \
-       or      %t6, %t0, %t0; \
-       srl     %t2, shir, %t1; \
-       srl     %t3, shir, %t6; \
-       sll     %t2, shil, %t2; \
-       or      %t1, %prev, %t1; \
-       std     %t4, [%dst + (offset) + (offset2) - 0x04]; \
-       std     %t0, [%dst + (offset) + (offset2) + 0x04]; \
-       sll     %t3, shil, %prev; \
-       or      %t6, %t2, %t4;
-
-#define SMOVE_ALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, prev, shil, shir, offset2) \
-       ldd     [%src + (offset) + 0x00], %t0; \
-       ldd     [%src + (offset) + 0x08], %t2; \
-       srl     %t0, shir, %t4; \
-       srl     %t1, shir, %t5; \
-       sll     %t0, shil, %t6; \
-       or      %t4, %prev, %t0; \
-       sll     %t1, shil, %prev; \
-       or      %t5, %t6, %t1; \
-       srl     %t2, shir, %t4; \
-       srl     %t3, shir, %t5; \
-       sll     %t2, shil, %t6; \
-       or      %t4, %prev, %t2; \
-       sll     %t3, shil, %prev; \
-       or      %t5, %t6, %t3; \
-       std     %t0, [%dst + (offset) + (offset2) + 0x00]; \
-       std     %t2, [%dst + (offset) + (offset2) + 0x08];
-
        .text
        .align  4
 
-0:
-       retl
-        nop            ! Only bcopy returns here and it retuns void...
-
-#ifdef __KERNEL__
-FUNC(amemmove)
-FUNC(__memmove)
-#endif
 FUNC(memmove)
+EXPORT_SYMBOL(memmove)
        cmp             %o0, %o1
        mov             %o0, %g7
        bleu            9f
@@ -202,6 +121,7 @@ FUNC(memmove)
         add            %o0, 2, %o0
 
 FUNC(memcpy)   /* %o0=dst %o1=src %o2=len */
+EXPORT_SYMBOL(memcpy)
 
        sub             %o0, %o1, %o4
        mov             %o0, %g7
index 857ad4f8905f942f44ac40770c9277323e4e3c09..012cdb6ca4677b09da150b3f8b61ae98879b5952 100644 (file)
@@ -5,6 +5,7 @@
  */
 
 #include <linux/linkage.h>
+#include <asm/export.h>
 
        .text
 ENTRY(memmove) /* o0=dst o1=src o2=len */
@@ -57,3 +58,4 @@ ENTRY(memmove) /* o0=dst o1=src o2=len */
         stb            %g7, [%o0 - 0x1]
        ba,a,pt         %xcc, 99b
 ENDPROC(memmove)
+EXPORT_SYMBOL(memmove)
index 4ff1657dfc246a65a432cf228ba3088d853db2c4..51ce690c42a8ec3a06e43edb92b098de36db0b4d 100644 (file)
@@ -4,6 +4,8 @@
  * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
  */
 
+#include <asm/export.h>
+
 /* In essence, this is just a fancy strlen. */
 
 #define LO_MAGIC 0x01010101
@@ -13,6 +15,8 @@
        .align  4
        .globl  __memscan_zero, __memscan_generic
        .globl  memscan
+EXPORT_SYMBOL(__memscan_zero)
+EXPORT_SYMBOL(__memscan_generic)
 __memscan_zero:
        /* %o0 = addr, %o1 = size */
        cmp     %o1, 0
index 5686dfa5dc1595ef14c47dfc5664b44a537f572b..daa96f4b03e6007fbc415a70514f3f56be8a4ef1 100644 (file)
@@ -5,6 +5,8 @@
  * Copyright (C) 1998 David S. Miller (davem@redhat.com)
  */
 
+       #include <asm/export.h>
+
 #define HI_MAGIC       0x8080808080808080
 #define LO_MAGIC       0x0101010101010101
 #define ASI_PL         0x88
@@ -13,6 +15,8 @@
        .align  32
        .globl          __memscan_zero, __memscan_generic
        .globl          memscan
+       EXPORT_SYMBOL(__memscan_zero)
+       EXPORT_SYMBOL(__memscan_generic)
 
 __memscan_zero:
        /* %o0 = bufp, %o1 = size */
index f75e6906df146aae9a99cc83c6750903f9853783..bb539b42b088ace45ecf2d1419ee808986f12d72 100644 (file)
@@ -9,6 +9,7 @@
  */
 
 #include <asm/ptrace.h>
+#include <asm/export.h>
 
 /* Work around cpp -rob */
 #define ALLOC #alloc
@@ -63,6 +64,8 @@ __bzero_begin:
 
        .globl  __bzero
        .globl  memset
+       EXPORT_SYMBOL(__bzero)
+       EXPORT_SYMBOL(memset)
        .globl  __memset_start, __memset_end
 __memset_start:
 memset:
index 9794939d1c12626187016451fd803d7653523d0a..17a0f49aef3c0d18d1e1e03d44c1f664c3384ee4 100644 (file)
@@ -17,6 +17,7 @@ along with GNU CC; see the file COPYING.  If not, write to
 the Free Software Foundation, 59 Temple Place - Suite 330,
 Boston, MA 02111-1307, USA.  */
 
+#include <asm/export.h>
        .text
        .align 4
        .globl __muldi3
@@ -74,3 +75,4 @@ __muldi3:
        add  %l2, %l0, %i0
        ret 
        restore  %g0, %l3, %o1
+EXPORT_SYMBOL(__muldi3)
index 536f83507fbff1dc669f998f5e6b7336812225a5..ca0e7077e87136fae3875839b216b9fd80694fbb 100644 (file)
@@ -7,6 +7,7 @@
 
 #include <linux/linkage.h>
 #include <asm/asm.h>
+#include <asm/export.h>
 
 #define LO_MAGIC 0x01010101
 #define HI_MAGIC 0x80808080
@@ -78,3 +79,4 @@ ENTRY(strlen)
        retl
         mov    2, %o0
 ENDPROC(strlen)
+EXPORT_SYMBOL(strlen)
index c0d1b568c1c561f443356678d06ae6b7d4831053..e3fe014813af85bf1040fbef9165dadbb66f25f2 100644 (file)
@@ -4,6 +4,7 @@
  */
 
 #include <linux/linkage.h>
+#include <asm/export.h>
 
        .text
 ENTRY(strncmp)
@@ -116,3 +117,4 @@ ENTRY(strncmp)
        retl
         sub    %o3, %o0, %o0
 ENDPROC(strncmp)
+EXPORT_SYMBOL(strncmp)
index 0656627166f38e3070731893beeba2acbad1b571..efb5f884330d73538f6ce74b09ace90076782023 100644 (file)
@@ -6,6 +6,7 @@
 
 #include <linux/linkage.h>
 #include <asm/asi.h>
+#include <asm/export.h>
 
        .text
 ENTRY(strncmp)
@@ -28,3 +29,4 @@ ENTRY(strncmp)
        retl
         clr    %o0
 ENDPROC(strncmp)
+EXPORT_SYMBOL(strncmp)
diff --git a/arch/sparc/lib/user_fixup.c b/arch/sparc/lib/user_fixup.c
deleted file mode 100644 (file)
index ac96ae2..0000000
+++ /dev/null
@@ -1,71 +0,0 @@
-/* user_fixup.c: Fix up user copy faults.
- *
- * Copyright (C) 2004 David S. Miller <davem@redhat.com>
- */
-
-#include <linux/compiler.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/errno.h>
-#include <linux/module.h>
-
-#include <asm/uaccess.h>
-
-/* Calculating the exact fault address when using
- * block loads and stores can be very complicated.
- *
- * Instead of trying to be clever and handling all
- * of the cases, just fix things up simply here.
- */
-
-static unsigned long compute_size(unsigned long start, unsigned long size, unsigned long *offset)
-{
-       unsigned long fault_addr = current_thread_info()->fault_address;
-       unsigned long end = start + size;
-
-       if (fault_addr < start || fault_addr >= end) {
-               *offset = 0;
-       } else {
-               *offset = fault_addr - start;
-               size = end - fault_addr;
-       }
-       return size;
-}
-
-unsigned long copy_from_user_fixup(void *to, const void __user *from, unsigned long size)
-{
-       unsigned long offset;
-
-       size = compute_size((unsigned long) from, size, &offset);
-       if (likely(size))
-               memset(to + offset, 0, size);
-
-       return size;
-}
-EXPORT_SYMBOL(copy_from_user_fixup);
-
-unsigned long copy_to_user_fixup(void __user *to, const void *from, unsigned long size)
-{
-       unsigned long offset;
-
-       return compute_size((unsigned long) to, size, &offset);
-}
-EXPORT_SYMBOL(copy_to_user_fixup);
-
-unsigned long copy_in_user_fixup(void __user *to, void __user *from, unsigned long size)
-{
-       unsigned long fault_addr = current_thread_info()->fault_address;
-       unsigned long start = (unsigned long) to;
-       unsigned long end = start + size;
-
-       if (fault_addr >= start && fault_addr < end)
-               return end - fault_addr;
-
-       start = (unsigned long) from;
-       end = start + size;
-       if (fault_addr >= start && fault_addr < end)
-               return end - fault_addr;
-
-       return size;
-}
-EXPORT_SYMBOL(copy_in_user_fixup);
index 2c05641c326390949f51749e01cedc1b36002015..45a49cb618b52bea832ee2bd128406ea8c20d500 100644 (file)
@@ -13,6 +13,7 @@
 #include <asm/asi.h>
 #include <asm/dcu.h>
 #include <asm/spitfire.h>
+#include <asm/export.h>
 
 /*
  *     Requirements:
@@ -90,6 +91,7 @@ ENTRY(xor_vis_2)
        retl
          wr    %g0, 0, %fprs
 ENDPROC(xor_vis_2)
+EXPORT_SYMBOL(xor_vis_2)
 
 ENTRY(xor_vis_3)
        rd      %fprs, %o5
@@ -156,6 +158,7 @@ ENTRY(xor_vis_3)
        retl
         wr     %g0, 0, %fprs
 ENDPROC(xor_vis_3)
+EXPORT_SYMBOL(xor_vis_3)
 
 ENTRY(xor_vis_4)
        rd      %fprs, %o5
@@ -241,6 +244,7 @@ ENTRY(xor_vis_4)
        retl
         wr     %g0, 0, %fprs
 ENDPROC(xor_vis_4)
+EXPORT_SYMBOL(xor_vis_4)
 
 ENTRY(xor_vis_5)
        save    %sp, -192, %sp
@@ -347,6 +351,7 @@ ENTRY(xor_vis_5)
        ret
         restore
 ENDPROC(xor_vis_5)
+EXPORT_SYMBOL(xor_vis_5)
 
        /* Niagara versions. */
 ENTRY(xor_niagara_2) /* %o0=bytes, %o1=dest, %o2=src */
@@ -393,6 +398,7 @@ ENTRY(xor_niagara_2) /* %o0=bytes, %o1=dest, %o2=src */
        ret
         restore
 ENDPROC(xor_niagara_2)
+EXPORT_SYMBOL(xor_niagara_2)
 
 ENTRY(xor_niagara_3) /* %o0=bytes, %o1=dest, %o2=src1, %o3=src2 */
        save            %sp, -192, %sp
@@ -454,6 +460,7 @@ ENTRY(xor_niagara_3) /* %o0=bytes, %o1=dest, %o2=src1, %o3=src2 */
        ret
         restore
 ENDPROC(xor_niagara_3)
+EXPORT_SYMBOL(xor_niagara_3)
 
 ENTRY(xor_niagara_4) /* %o0=bytes, %o1=dest, %o2=src1, %o3=src2, %o4=src3 */
        save            %sp, -192, %sp
@@ -536,6 +543,7 @@ ENTRY(xor_niagara_4) /* %o0=bytes, %o1=dest, %o2=src1, %o3=src2, %o4=src3 */
        ret
         restore
 ENDPROC(xor_niagara_4)
+EXPORT_SYMBOL(xor_niagara_4)
 
 ENTRY(xor_niagara_5) /* %o0=bytes, %o1=dest, %o2=src1, %o3=src2, %o4=src3, %o5=src4 */
        save            %sp, -192, %sp
@@ -634,3 +642,4 @@ ENTRY(xor_niagara_5) /* %o0=bytes, %o1=dest, %o2=src1, %o3=src2, %o4=src3, %o5=s
        ret
         restore
 ENDPROC(xor_niagara_5)
+EXPORT_SYMBOL(xor_niagara_5)
index 4e06750a5d295649660ff4ca0998eb03b00da9e9..cd0e32bbcb1de0f6b16bce4ccd3f8b89acaa18b9 100644 (file)
@@ -238,7 +238,8 @@ slow:
                pages += nr;
 
                ret = get_user_pages_unlocked(start,
-                       (end - start) >> PAGE_SHIFT, write, 0, pages);
+                       (end - start) >> PAGE_SHIFT, pages,
+                       write ? FOLL_WRITE : 0);
 
                /* Have to be a bit careful with return values */
                if (nr > 0) {
index 439784b7b7ac6e5951a6c1c6ab126dddc5fae915..37aa537b3ad841522d9a13c2b4695edb7b092a28 100644 (file)
@@ -802,8 +802,10 @@ struct mdesc_mblock {
 };
 static struct mdesc_mblock *mblocks;
 static int num_mblocks;
+static int find_numa_node_for_addr(unsigned long pa,
+                                  struct node_mem_mask *pnode_mask);
 
-static unsigned long ra_to_pa(unsigned long addr)
+static unsigned long __init ra_to_pa(unsigned long addr)
 {
        int i;
 
@@ -819,8 +821,11 @@ static unsigned long ra_to_pa(unsigned long addr)
        return addr;
 }
 
-static int find_node(unsigned long addr)
+static int __init find_node(unsigned long addr)
 {
+       static bool search_mdesc = true;
+       static struct node_mem_mask last_mem_mask = { ~0UL, ~0UL };
+       static int last_index;
        int i;
 
        addr = ra_to_pa(addr);
@@ -830,13 +835,30 @@ static int find_node(unsigned long addr)
                if ((addr & p->mask) == p->val)
                        return i;
        }
-       /* The following condition has been observed on LDOM guests.*/
-       WARN_ONCE(1, "find_node: A physical address doesn't match a NUMA node"
-               " rule. Some physical memory will be owned by node 0.");
-       return 0;
+       /* The following condition has been observed on LDOM guests because
+        * node_masks only contains the best latency mask and value.
+        * LDOM guest's mdesc can contain a single latency group to
+        * cover multiple address range. Print warning message only if the
+        * address cannot be found in node_masks nor mdesc.
+        */
+       if ((search_mdesc) &&
+           ((addr & last_mem_mask.mask) != last_mem_mask.val)) {
+               /* find the available node in the mdesc */
+               last_index = find_numa_node_for_addr(addr, &last_mem_mask);
+               numadbg("find_node: latency group for address 0x%lx is %d\n",
+                       addr, last_index);
+               if ((last_index < 0) || (last_index >= num_node_masks)) {
+                       /* WARN_ONCE() and use default group 0 */
+                       WARN_ONCE(1, "find_node: A physical address doesn't match a NUMA node rule. Some physical memory will be owned by node 0.");
+                       search_mdesc = false;
+                       last_index = 0;
+               }
+       }
+
+       return last_index;
 }
 
-static u64 memblock_nid_range(u64 start, u64 end, int *nid)
+static u64 __init memblock_nid_range(u64 start, u64 end, int *nid)
 {
        *nid = find_node(start);
        start += PAGE_SIZE;
@@ -1160,6 +1182,41 @@ int __node_distance(int from, int to)
        return numa_latency[from][to];
 }
 
+static int find_numa_node_for_addr(unsigned long pa,
+                                  struct node_mem_mask *pnode_mask)
+{
+       struct mdesc_handle *md = mdesc_grab();
+       u64 node, arc;
+       int i = 0;
+
+       node = mdesc_node_by_name(md, MDESC_NODE_NULL, "latency-groups");
+       if (node == MDESC_NODE_NULL)
+               goto out;
+
+       mdesc_for_each_node_by_name(md, node, "group") {
+               mdesc_for_each_arc(arc, md, node, MDESC_ARC_TYPE_FWD) {
+                       u64 target = mdesc_arc_target(md, arc);
+                       struct mdesc_mlgroup *m = find_mlgroup(target);
+
+                       if (!m)
+                               continue;
+                       if ((pa & m->mask) == m->match) {
+                               if (pnode_mask) {
+                                       pnode_mask->mask = m->mask;
+                                       pnode_mask->val = m->match;
+                               }
+                               mdesc_release(md);
+                               return i;
+                       }
+               }
+               i++;
+       }
+
+out:
+       mdesc_release(md);
+       return -1;
+}
+
 static int __init find_best_numa_node_for_mlgroup(struct mdesc_mlgroup *grp)
 {
        int i;
index f2b77112e9d8bb50f4f05346e955fb4c5f6746a9..e20fbbafb0b04af0fa85b21188cd6c851c132e6e 100644 (file)
@@ -27,6 +27,20 @@ static inline int tag_compare(unsigned long tag, unsigned long vaddr)
        return (tag == (vaddr >> 22));
 }
 
+static void flush_tsb_kernel_range_scan(unsigned long start, unsigned long end)
+{
+       unsigned long idx;
+
+       for (idx = 0; idx < KERNEL_TSB_NENTRIES; idx++) {
+               struct tsb *ent = &swapper_tsb[idx];
+               unsigned long match = idx << 13;
+
+               match |= (ent->tag << 22);
+               if (match >= start && match < end)
+                       ent->tag = (1UL << TSB_TAG_INVALID_BIT);
+       }
+}
+
 /* TSB flushes need only occur on the processor initiating the address
  * space modification, not on each cpu the address space has run on.
  * Only the TLB flush needs that treatment.
@@ -36,6 +50,9 @@ void flush_tsb_kernel_range(unsigned long start, unsigned long end)
 {
        unsigned long v;
 
+       if ((end - start) >> PAGE_SHIFT >= 2 * KERNEL_TSB_NENTRIES)
+               return flush_tsb_kernel_range_scan(start, end);
+
        for (v = start; v < end; v += PAGE_SIZE) {
                unsigned long hash = tsb_hash(v, PAGE_SHIFT,
                                              KERNEL_TSB_NENTRIES);
index b4f4733abc6ea8f9e6ef6abafbc75f0b16b08003..5d2fd6cd31896b87a3373a59cbfc3130808c6908 100644 (file)
@@ -30,7 +30,7 @@
        .text
        .align          32
        .globl          __flush_tlb_mm
-__flush_tlb_mm:                /* 18 insns */
+__flush_tlb_mm:                /* 19 insns */
        /* %o0=(ctx & TAG_CONTEXT_BITS), %o1=SECONDARY_CONTEXT */
        ldxa            [%o1] ASI_DMMU, %g2
        cmp             %g2, %o0
@@ -81,7 +81,7 @@ __flush_tlb_page:     /* 22 insns */
 
        .align          32
        .globl          __flush_tlb_pending
-__flush_tlb_pending:   /* 26 insns */
+__flush_tlb_pending:   /* 27 insns */
        /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */
        rdpr            %pstate, %g7
        sllx            %o1, 3, %o1
@@ -113,12 +113,14 @@ __flush_tlb_pending:      /* 26 insns */
 
        .align          32
        .globl          __flush_tlb_kernel_range
-__flush_tlb_kernel_range:      /* 16 insns */
+__flush_tlb_kernel_range:      /* 31 insns */
        /* %o0=start, %o1=end */
        cmp             %o0, %o1
        be,pn           %xcc, 2f
+        sub            %o1, %o0, %o3
+       srlx            %o3, 18, %o4
+       brnz,pn         %o4, __spitfire_flush_tlb_kernel_range_slow
         sethi          %hi(PAGE_SIZE), %o4
-       sub             %o1, %o0, %o3
        sub             %o3, %o4, %o3
        or              %o0, 0x20, %o0          ! Nucleus
 1:     stxa            %g0, [%o0 + %o3] ASI_DMMU_DEMAP
@@ -131,6 +133,41 @@ __flush_tlb_kernel_range:  /* 16 insns */
        retl
         nop
        nop
+       nop
+       nop
+       nop
+       nop
+       nop
+       nop
+       nop
+       nop
+       nop
+       nop
+       nop
+       nop
+       nop
+
+__spitfire_flush_tlb_kernel_range_slow:
+       mov             63 * 8, %o4
+1:     ldxa            [%o4] ASI_ITLB_DATA_ACCESS, %o3
+       andcc           %o3, 0x40, %g0                  /* _PAGE_L_4U */
+       bne,pn          %xcc, 2f
+        mov            TLB_TAG_ACCESS, %o3
+       stxa            %g0, [%o3] ASI_IMMU
+       stxa            %g0, [%o4] ASI_ITLB_DATA_ACCESS
+       membar          #Sync
+2:     ldxa            [%o4] ASI_DTLB_DATA_ACCESS, %o3
+       andcc           %o3, 0x40, %g0
+       bne,pn          %xcc, 2f
+        mov            TLB_TAG_ACCESS, %o3
+       stxa            %g0, [%o3] ASI_DMMU
+       stxa            %g0, [%o4] ASI_DTLB_DATA_ACCESS
+       membar          #Sync
+2:     sub             %o4, 8, %o4
+       brgez,pt        %o4, 1b
+        nop
+       retl
+        nop
 
 __spitfire_flush_tlb_mm_slow:
        rdpr            %pstate, %g1
@@ -285,6 +322,40 @@ __cheetah_flush_tlb_pending:       /* 27 insns */
        retl
         wrpr           %g7, 0x0, %pstate
 
+__cheetah_flush_tlb_kernel_range:      /* 31 insns */
+       /* %o0=start, %o1=end */
+       cmp             %o0, %o1
+       be,pn           %xcc, 2f
+        sub            %o1, %o0, %o3
+       srlx            %o3, 18, %o4
+       brnz,pn         %o4, 3f
+        sethi          %hi(PAGE_SIZE), %o4
+       sub             %o3, %o4, %o3
+       or              %o0, 0x20, %o0          ! Nucleus
+1:     stxa            %g0, [%o0 + %o3] ASI_DMMU_DEMAP
+       stxa            %g0, [%o0 + %o3] ASI_IMMU_DEMAP
+       membar          #Sync
+       brnz,pt         %o3, 1b
+        sub            %o3, %o4, %o3
+2:     sethi           %hi(KERNBASE), %o3
+       flush           %o3
+       retl
+        nop
+3:     mov             0x80, %o4
+       stxa            %g0, [%o4] ASI_DMMU_DEMAP
+       membar          #Sync
+       stxa            %g0, [%o4] ASI_IMMU_DEMAP
+       membar          #Sync
+       retl
+        nop
+       nop
+       nop
+       nop
+       nop
+       nop
+       nop
+       nop
+
 #ifdef DCACHE_ALIASING_POSSIBLE
 __cheetah_flush_dcache_page: /* 11 insns */
        sethi           %hi(PAGE_OFFSET), %g1
@@ -309,19 +380,28 @@ __hypervisor_tlb_tl0_error:
        ret
         restore
 
-__hypervisor_flush_tlb_mm: /* 10 insns */
+__hypervisor_flush_tlb_mm: /* 19 insns */
        mov             %o0, %o2        /* ARG2: mmu context */
        mov             0, %o0          /* ARG0: CPU lists unimplemented */
        mov             0, %o1          /* ARG1: CPU lists unimplemented */
        mov             HV_MMU_ALL, %o3 /* ARG3: flags */
        mov             HV_FAST_MMU_DEMAP_CTX, %o5
        ta              HV_FAST_TRAP
-       brnz,pn         %o0, __hypervisor_tlb_tl0_error
+       brnz,pn         %o0, 1f
         mov            HV_FAST_MMU_DEMAP_CTX, %o1
        retl
         nop
+1:     sethi           %hi(__hypervisor_tlb_tl0_error), %o5
+       jmpl            %o5 + %lo(__hypervisor_tlb_tl0_error), %g0
+        nop
+       nop
+       nop
+       nop
+       nop
+       nop
+       nop
 
-__hypervisor_flush_tlb_page: /* 11 insns */
+__hypervisor_flush_tlb_page: /* 22 insns */
        /* %o0 = context, %o1 = vaddr */
        mov             %o0, %g2
        mov             %o1, %o0              /* ARG0: vaddr + IMMU-bit */
@@ -330,12 +410,23 @@ __hypervisor_flush_tlb_page: /* 11 insns */
        srlx            %o0, PAGE_SHIFT, %o0
        sllx            %o0, PAGE_SHIFT, %o0
        ta              HV_MMU_UNMAP_ADDR_TRAP
-       brnz,pn         %o0, __hypervisor_tlb_tl0_error
+       brnz,pn         %o0, 1f
         mov            HV_MMU_UNMAP_ADDR_TRAP, %o1
        retl
         nop
+1:     sethi           %hi(__hypervisor_tlb_tl0_error), %o2
+       jmpl            %o2 + %lo(__hypervisor_tlb_tl0_error), %g0
+        nop
+       nop
+       nop
+       nop
+       nop
+       nop
+       nop
+       nop
+       nop
 
-__hypervisor_flush_tlb_pending: /* 16 insns */
+__hypervisor_flush_tlb_pending: /* 27 insns */
        /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */
        sllx            %o1, 3, %g1
        mov             %o2, %g2
@@ -347,31 +438,57 @@ __hypervisor_flush_tlb_pending: /* 16 insns */
        srlx            %o0, PAGE_SHIFT, %o0
        sllx            %o0, PAGE_SHIFT, %o0
        ta              HV_MMU_UNMAP_ADDR_TRAP
-       brnz,pn         %o0, __hypervisor_tlb_tl0_error
+       brnz,pn         %o0, 1f
         mov            HV_MMU_UNMAP_ADDR_TRAP, %o1
        brnz,pt         %g1, 1b
         nop
        retl
         nop
+1:     sethi           %hi(__hypervisor_tlb_tl0_error), %o2
+       jmpl            %o2 + %lo(__hypervisor_tlb_tl0_error), %g0
+        nop
+       nop
+       nop
+       nop
+       nop
+       nop
+       nop
+       nop
+       nop
 
-__hypervisor_flush_tlb_kernel_range: /* 16 insns */
+__hypervisor_flush_tlb_kernel_range: /* 31 insns */
        /* %o0=start, %o1=end */
        cmp             %o0, %o1
        be,pn           %xcc, 2f
-        sethi          %hi(PAGE_SIZE), %g3
-       mov             %o0, %g1
-       sub             %o1, %g1, %g2
+        sub            %o1, %o0, %g2
+       srlx            %g2, 18, %g3
+       brnz,pn         %g3, 4f
+        mov            %o0, %g1
+       sethi           %hi(PAGE_SIZE), %g3
        sub             %g2, %g3, %g2
 1:     add             %g1, %g2, %o0   /* ARG0: virtual address */
        mov             0, %o1          /* ARG1: mmu context */
        mov             HV_MMU_ALL, %o2 /* ARG2: flags */
        ta              HV_MMU_UNMAP_ADDR_TRAP
-       brnz,pn         %o0, __hypervisor_tlb_tl0_error
+       brnz,pn         %o0, 3f
         mov            HV_MMU_UNMAP_ADDR_TRAP, %o1
        brnz,pt         %g2, 1b
         sub            %g2, %g3, %g2
 2:     retl
         nop
+3:     sethi           %hi(__hypervisor_tlb_tl0_error), %o2
+       jmpl            %o2 + %lo(__hypervisor_tlb_tl0_error), %g0
+        nop
+4:     mov             0, %o0          /* ARG0: CPU lists unimplemented */
+       mov             0, %o1          /* ARG1: CPU lists unimplemented */
+       mov             0, %o2          /* ARG2: mmu context == nucleus */
+       mov             HV_MMU_ALL, %o3 /* ARG3: flags */
+       mov             HV_FAST_MMU_DEMAP_CTX, %o5
+       ta              HV_FAST_TRAP
+       brnz,pn         %o0, 3b
+        mov            HV_FAST_MMU_DEMAP_CTX, %o1
+       retl
+        nop
 
 #ifdef DCACHE_ALIASING_POSSIBLE
        /* XXX Niagara and friends have an 8K cache, so no aliasing is
@@ -394,43 +511,6 @@ tlb_patch_one:
        retl
         nop
 
-       .globl          cheetah_patch_cachetlbops
-cheetah_patch_cachetlbops:
-       save            %sp, -128, %sp
-
-       sethi           %hi(__flush_tlb_mm), %o0
-       or              %o0, %lo(__flush_tlb_mm), %o0
-       sethi           %hi(__cheetah_flush_tlb_mm), %o1
-       or              %o1, %lo(__cheetah_flush_tlb_mm), %o1
-       call            tlb_patch_one
-        mov            19, %o2
-
-       sethi           %hi(__flush_tlb_page), %o0
-       or              %o0, %lo(__flush_tlb_page), %o0
-       sethi           %hi(__cheetah_flush_tlb_page), %o1
-       or              %o1, %lo(__cheetah_flush_tlb_page), %o1
-       call            tlb_patch_one
-        mov            22, %o2
-
-       sethi           %hi(__flush_tlb_pending), %o0
-       or              %o0, %lo(__flush_tlb_pending), %o0
-       sethi           %hi(__cheetah_flush_tlb_pending), %o1
-       or              %o1, %lo(__cheetah_flush_tlb_pending), %o1
-       call            tlb_patch_one
-        mov            27, %o2
-
-#ifdef DCACHE_ALIASING_POSSIBLE
-       sethi           %hi(__flush_dcache_page), %o0
-       or              %o0, %lo(__flush_dcache_page), %o0
-       sethi           %hi(__cheetah_flush_dcache_page), %o1
-       or              %o1, %lo(__cheetah_flush_dcache_page), %o1
-       call            tlb_patch_one
-        mov            11, %o2
-#endif /* DCACHE_ALIASING_POSSIBLE */
-
-       ret
-        restore
-
 #ifdef CONFIG_SMP
        /* These are all called by the slaves of a cross call, at
         * trap level 1, with interrupts fully disabled.
@@ -447,7 +527,7 @@ cheetah_patch_cachetlbops:
         */
        .align          32
        .globl          xcall_flush_tlb_mm
-xcall_flush_tlb_mm:    /* 21 insns */
+xcall_flush_tlb_mm:    /* 24 insns */
        mov             PRIMARY_CONTEXT, %g2
        ldxa            [%g2] ASI_DMMU, %g3
        srlx            %g3, CTX_PGSZ1_NUC_SHIFT, %g4
@@ -469,9 +549,12 @@ xcall_flush_tlb_mm:        /* 21 insns */
        nop
        nop
        nop
+       nop
+       nop
+       nop
 
        .globl          xcall_flush_tlb_page
-xcall_flush_tlb_page:  /* 17 insns */
+xcall_flush_tlb_page:  /* 20 insns */
        /* %g5=context, %g1=vaddr */
        mov             PRIMARY_CONTEXT, %g4
        ldxa            [%g4] ASI_DMMU, %g2
@@ -490,15 +573,20 @@ xcall_flush_tlb_page:     /* 17 insns */
        retry
        nop
        nop
+       nop
+       nop
+       nop
 
        .globl          xcall_flush_tlb_kernel_range
-xcall_flush_tlb_kernel_range:  /* 25 insns */
+xcall_flush_tlb_kernel_range:  /* 44 insns */
        sethi           %hi(PAGE_SIZE - 1), %g2
        or              %g2, %lo(PAGE_SIZE - 1), %g2
        andn            %g1, %g2, %g1
        andn            %g7, %g2, %g7
        sub             %g7, %g1, %g3
-       add             %g2, 1, %g2
+       srlx            %g3, 18, %g2
+       brnz,pn         %g2, 2f
+        add            %g2, 1, %g2
        sub             %g3, %g2, %g3
        or              %g1, 0x20, %g1          ! Nucleus
 1:     stxa            %g0, [%g1 + %g3] ASI_DMMU_DEMAP
@@ -507,8 +595,25 @@ xcall_flush_tlb_kernel_range:      /* 25 insns */
        brnz,pt         %g3, 1b
         sub            %g3, %g2, %g3
        retry
-       nop
-       nop
+2:     mov             63 * 8, %g1
+1:     ldxa            [%g1] ASI_ITLB_DATA_ACCESS, %g2
+       andcc           %g2, 0x40, %g0                  /* _PAGE_L_4U */
+       bne,pn          %xcc, 2f
+        mov            TLB_TAG_ACCESS, %g2
+       stxa            %g0, [%g2] ASI_IMMU
+       stxa            %g0, [%g1] ASI_ITLB_DATA_ACCESS
+       membar          #Sync
+2:     ldxa            [%g1] ASI_DTLB_DATA_ACCESS, %g2
+       andcc           %g2, 0x40, %g0
+       bne,pn          %xcc, 2f
+        mov            TLB_TAG_ACCESS, %g2
+       stxa            %g0, [%g2] ASI_DMMU
+       stxa            %g0, [%g1] ASI_DTLB_DATA_ACCESS
+       membar          #Sync
+2:     sub             %g1, 8, %g1
+       brgez,pt        %g1, 1b
+        nop
+       retry
        nop
        nop
        nop
@@ -637,6 +742,52 @@ xcall_fetch_glob_pmu_n4:
 
        retry
 
+__cheetah_xcall_flush_tlb_kernel_range:        /* 44 insns */
+       sethi           %hi(PAGE_SIZE - 1), %g2
+       or              %g2, %lo(PAGE_SIZE - 1), %g2
+       andn            %g1, %g2, %g1
+       andn            %g7, %g2, %g7
+       sub             %g7, %g1, %g3
+       srlx            %g3, 18, %g2
+       brnz,pn         %g2, 2f
+        add            %g2, 1, %g2
+       sub             %g3, %g2, %g3
+       or              %g1, 0x20, %g1          ! Nucleus
+1:     stxa            %g0, [%g1 + %g3] ASI_DMMU_DEMAP
+       stxa            %g0, [%g1 + %g3] ASI_IMMU_DEMAP
+       membar          #Sync
+       brnz,pt         %g3, 1b
+        sub            %g3, %g2, %g3
+       retry
+2:     mov             0x80, %g2
+       stxa            %g0, [%g2] ASI_DMMU_DEMAP
+       membar          #Sync
+       stxa            %g0, [%g2] ASI_IMMU_DEMAP
+       membar          #Sync
+       retry
+       nop
+       nop
+       nop
+       nop
+       nop
+       nop
+       nop
+       nop
+       nop
+       nop
+       nop
+       nop
+       nop
+       nop
+       nop
+       nop
+       nop
+       nop
+       nop
+       nop
+       nop
+       nop
+
 #ifdef DCACHE_ALIASING_POSSIBLE
        .align          32
        .globl          xcall_flush_dcache_page_cheetah
@@ -700,7 +851,7 @@ __hypervisor_tlb_xcall_error:
        ba,a,pt %xcc, rtrap
 
        .globl          __hypervisor_xcall_flush_tlb_mm
-__hypervisor_xcall_flush_tlb_mm: /* 21 insns */
+__hypervisor_xcall_flush_tlb_mm: /* 24 insns */
        /* %g5=ctx, g1,g2,g3,g4,g7=scratch, %g6=unusable */
        mov             %o0, %g2
        mov             %o1, %g3
@@ -714,7 +865,7 @@ __hypervisor_xcall_flush_tlb_mm: /* 21 insns */
        mov             HV_FAST_MMU_DEMAP_CTX, %o5
        ta              HV_FAST_TRAP
        mov             HV_FAST_MMU_DEMAP_CTX, %g6
-       brnz,pn         %o0, __hypervisor_tlb_xcall_error
+       brnz,pn         %o0, 1f
         mov            %o0, %g5
        mov             %g2, %o0
        mov             %g3, %o1
@@ -723,9 +874,12 @@ __hypervisor_xcall_flush_tlb_mm: /* 21 insns */
        mov             %g7, %o5
        membar          #Sync
        retry
+1:     sethi           %hi(__hypervisor_tlb_xcall_error), %g4
+       jmpl            %g4 + %lo(__hypervisor_tlb_xcall_error), %g0
+        nop
 
        .globl          __hypervisor_xcall_flush_tlb_page
-__hypervisor_xcall_flush_tlb_page: /* 17 insns */
+__hypervisor_xcall_flush_tlb_page: /* 20 insns */
        /* %g5=ctx, %g1=vaddr */
        mov             %o0, %g2
        mov             %o1, %g3
@@ -737,42 +891,64 @@ __hypervisor_xcall_flush_tlb_page: /* 17 insns */
        sllx            %o0, PAGE_SHIFT, %o0
        ta              HV_MMU_UNMAP_ADDR_TRAP
        mov             HV_MMU_UNMAP_ADDR_TRAP, %g6
-       brnz,a,pn       %o0, __hypervisor_tlb_xcall_error
+       brnz,a,pn       %o0, 1f
         mov            %o0, %g5
        mov             %g2, %o0
        mov             %g3, %o1
        mov             %g4, %o2
        membar          #Sync
        retry
+1:     sethi           %hi(__hypervisor_tlb_xcall_error), %g4
+       jmpl            %g4 + %lo(__hypervisor_tlb_xcall_error), %g0
+        nop
 
        .globl          __hypervisor_xcall_flush_tlb_kernel_range
-__hypervisor_xcall_flush_tlb_kernel_range: /* 25 insns */
+__hypervisor_xcall_flush_tlb_kernel_range: /* 44 insns */
        /* %g1=start, %g7=end, g2,g3,g4,g5,g6=scratch */
        sethi           %hi(PAGE_SIZE - 1), %g2
        or              %g2, %lo(PAGE_SIZE - 1), %g2
        andn            %g1, %g2, %g1
        andn            %g7, %g2, %g7
        sub             %g7, %g1, %g3
+       srlx            %g3, 18, %g7
        add             %g2, 1, %g2
        sub             %g3, %g2, %g3
        mov             %o0, %g2
        mov             %o1, %g4
-       mov             %o2, %g7
+       brnz,pn         %g7, 2f
+        mov            %o2, %g7
 1:     add             %g1, %g3, %o0   /* ARG0: virtual address */
        mov             0, %o1          /* ARG1: mmu context */
        mov             HV_MMU_ALL, %o2 /* ARG2: flags */
        ta              HV_MMU_UNMAP_ADDR_TRAP
        mov             HV_MMU_UNMAP_ADDR_TRAP, %g6
-       brnz,pn         %o0, __hypervisor_tlb_xcall_error
+       brnz,pn         %o0, 1f
         mov            %o0, %g5
        sethi           %hi(PAGE_SIZE), %o2
        brnz,pt         %g3, 1b
         sub            %g3, %o2, %g3
-       mov             %g2, %o0
+5:     mov             %g2, %o0
        mov             %g4, %o1
        mov             %g7, %o2
        membar          #Sync
        retry
+1:     sethi           %hi(__hypervisor_tlb_xcall_error), %g4
+       jmpl            %g4 + %lo(__hypervisor_tlb_xcall_error), %g0
+        nop
+2:     mov             %o3, %g1
+       mov             %o5, %g3
+       mov             0, %o0          /* ARG0: CPU lists unimplemented */
+       mov             0, %o1          /* ARG1: CPU lists unimplemented */
+       mov             0, %o2          /* ARG2: mmu context == nucleus */
+       mov             HV_MMU_ALL, %o3 /* ARG3: flags */
+       mov             HV_FAST_MMU_DEMAP_CTX, %o5
+       ta              HV_FAST_TRAP
+       mov             %g1, %o3
+       brz,pt          %o0, 5b
+        mov            %g3, %o5
+       mov             HV_FAST_MMU_DEMAP_CTX, %g6
+       ba,pt           %xcc, 1b
+        clr            %g5
 
        /* These just get rescheduled to PIL vectors. */
        .globl          xcall_call_function
@@ -809,6 +985,58 @@ xcall_kgdb_capture:
 
 #endif /* CONFIG_SMP */
 
+       .globl          cheetah_patch_cachetlbops
+cheetah_patch_cachetlbops:
+       save            %sp, -128, %sp
+
+       sethi           %hi(__flush_tlb_mm), %o0
+       or              %o0, %lo(__flush_tlb_mm), %o0
+       sethi           %hi(__cheetah_flush_tlb_mm), %o1
+       or              %o1, %lo(__cheetah_flush_tlb_mm), %o1
+       call            tlb_patch_one
+        mov            19, %o2
+
+       sethi           %hi(__flush_tlb_page), %o0
+       or              %o0, %lo(__flush_tlb_page), %o0
+       sethi           %hi(__cheetah_flush_tlb_page), %o1
+       or              %o1, %lo(__cheetah_flush_tlb_page), %o1
+       call            tlb_patch_one
+        mov            22, %o2
+
+       sethi           %hi(__flush_tlb_pending), %o0
+       or              %o0, %lo(__flush_tlb_pending), %o0
+       sethi           %hi(__cheetah_flush_tlb_pending), %o1
+       or              %o1, %lo(__cheetah_flush_tlb_pending), %o1
+       call            tlb_patch_one
+        mov            27, %o2
+
+       sethi           %hi(__flush_tlb_kernel_range), %o0
+       or              %o0, %lo(__flush_tlb_kernel_range), %o0
+       sethi           %hi(__cheetah_flush_tlb_kernel_range), %o1
+       or              %o1, %lo(__cheetah_flush_tlb_kernel_range), %o1
+       call            tlb_patch_one
+        mov            31, %o2
+
+#ifdef DCACHE_ALIASING_POSSIBLE
+       sethi           %hi(__flush_dcache_page), %o0
+       or              %o0, %lo(__flush_dcache_page), %o0
+       sethi           %hi(__cheetah_flush_dcache_page), %o1
+       or              %o1, %lo(__cheetah_flush_dcache_page), %o1
+       call            tlb_patch_one
+        mov            11, %o2
+#endif /* DCACHE_ALIASING_POSSIBLE */
+
+#ifdef CONFIG_SMP
+       sethi           %hi(xcall_flush_tlb_kernel_range), %o0
+       or              %o0, %lo(xcall_flush_tlb_kernel_range), %o0
+       sethi           %hi(__cheetah_xcall_flush_tlb_kernel_range), %o1
+       or              %o1, %lo(__cheetah_xcall_flush_tlb_kernel_range), %o1
+       call            tlb_patch_one
+        mov            44, %o2
+#endif /* CONFIG_SMP */
+
+       ret
+        restore
 
        .globl          hypervisor_patch_cachetlbops
 hypervisor_patch_cachetlbops:
@@ -819,28 +1047,28 @@ hypervisor_patch_cachetlbops:
        sethi           %hi(__hypervisor_flush_tlb_mm), %o1
        or              %o1, %lo(__hypervisor_flush_tlb_mm), %o1
        call            tlb_patch_one
-        mov            10, %o2
+        mov            19, %o2
 
        sethi           %hi(__flush_tlb_page), %o0
        or              %o0, %lo(__flush_tlb_page), %o0
        sethi           %hi(__hypervisor_flush_tlb_page), %o1
        or              %o1, %lo(__hypervisor_flush_tlb_page), %o1
        call            tlb_patch_one
-        mov            11, %o2
+        mov            22, %o2
 
        sethi           %hi(__flush_tlb_pending), %o0
        or              %o0, %lo(__flush_tlb_pending), %o0
        sethi           %hi(__hypervisor_flush_tlb_pending), %o1
        or              %o1, %lo(__hypervisor_flush_tlb_pending), %o1
        call            tlb_patch_one
-        mov            16, %o2
+        mov            27, %o2
 
        sethi           %hi(__flush_tlb_kernel_range), %o0
        or              %o0, %lo(__flush_tlb_kernel_range), %o0
        sethi           %hi(__hypervisor_flush_tlb_kernel_range), %o1
        or              %o1, %lo(__hypervisor_flush_tlb_kernel_range), %o1
        call            tlb_patch_one
-        mov            16, %o2
+        mov            31, %o2
 
 #ifdef DCACHE_ALIASING_POSSIBLE
        sethi           %hi(__flush_dcache_page), %o0
@@ -857,21 +1085,21 @@ hypervisor_patch_cachetlbops:
        sethi           %hi(__hypervisor_xcall_flush_tlb_mm), %o1
        or              %o1, %lo(__hypervisor_xcall_flush_tlb_mm), %o1
        call            tlb_patch_one
-        mov            21, %o2
+        mov            24, %o2
 
        sethi           %hi(xcall_flush_tlb_page), %o0
        or              %o0, %lo(xcall_flush_tlb_page), %o0
        sethi           %hi(__hypervisor_xcall_flush_tlb_page), %o1
        or              %o1, %lo(__hypervisor_xcall_flush_tlb_page), %o1
        call            tlb_patch_one
-        mov            17, %o2
+        mov            20, %o2
 
        sethi           %hi(xcall_flush_tlb_kernel_range), %o0
        or              %o0, %lo(xcall_flush_tlb_kernel_range), %o0
        sethi           %hi(__hypervisor_xcall_flush_tlb_kernel_range), %o1
        or              %o1, %lo(__hypervisor_xcall_flush_tlb_kernel_range), %o1
        call            tlb_patch_one
-        mov            25, %o2
+        mov            44, %o2
 #endif /* CONFIG_SMP */
 
        ret
index 6160761d5f611319ecd9f838d3f407c934e61ae6..4810e48dbbbf57cc8d77ff4ce7bcb8356b142341 100644 (file)
@@ -61,4 +61,7 @@
  */
 #define __write_once __read_mostly
 
+/* __ro_after_init is the generic name for the tile arch __write_once. */
+#define __ro_after_init __read_mostly
+
 #endif /* _ASM_TILE_CACHE_H */
index 178989e6d3e3ae1403ae9dc1f108126fedb6e2c8..ea960d6609177faa86780e6164f26e4d3ef51ac2 100644 (file)
@@ -218,8 +218,8 @@ void do_timer_interrupt(struct pt_regs *regs, int fault_num)
  */
 unsigned long long sched_clock(void)
 {
-       return clocksource_cyc2ns(get_cycles(),
-                                 sched_clock_mult, SCHED_CLOCK_SHIFT);
+       return mult_frac(get_cycles(),
+                        sched_clock_mult, 1ULL << SCHED_CLOCK_SHIFT);
 }
 
 int setup_profiling_timer(unsigned int multiplier)
index 536ccfcc01c673c4e3196a4fe57276cf7d7d53a8..34d9e15857c3ba69681038b3ee032c0efa8126f8 100644 (file)
@@ -40,8 +40,8 @@ GCOV_PROFILE := n
 UBSAN_SANITIZE :=n
 
 LDFLAGS := -m elf_$(UTS_MACHINE)
-ifeq ($(CONFIG_RELOCATABLE),y)
-# If kernel is relocatable, build compressed kernel as PIE.
+# Compressed kernel should be built as PIE since it may be loaded at any
+# address by the bootloader.
 ifeq ($(CONFIG_X86_32),y)
 LDFLAGS += $(call ld-option, -pie) $(call ld-option, --no-dynamic-linker)
 else
@@ -51,7 +51,6 @@ else
 LDFLAGS += $(shell $(LD) --help 2>&1 | grep -q "\-z noreloc-overflow" \
        && echo "-z noreloc-overflow -pie --no-dynamic-linker")
 endif
-endif
 LDFLAGS_vmlinux := -T
 
 hostprogs-y    := mkpiggy
index 26240dde081e82e696b9a828a191b01c7bdaf0a9..4224ede43b4edc3df93b5e669cc091d73b9186b5 100644 (file)
@@ -87,6 +87,12 @@ int validate_cpu(void)
                return -1;
        }
 
+       if (CONFIG_X86_MINIMUM_CPU_FAMILY <= 4 && !IS_ENABLED(CONFIG_M486) &&
+           !has_eflag(X86_EFLAGS_ID)) {
+               printf("This kernel requires a CPU with the CPUID instruction.  Build with CONFIG_M486=y to run on this CPU.\n");
+               return -1;
+       }
+
        if (err_flags) {
                puts("This kernel requires the following features "
                     "not present on the CPU:\n");
index 0ab5ee1c26af057d9965470d42d5a9edd65c9ae7..aa8b0672f87a451865283f5f3e9f3d2c03992870 100644 (file)
@@ -888,7 +888,7 @@ static int helper_rfc4106_encrypt(struct aead_request *req)
        unsigned long auth_tag_len = crypto_aead_authsize(tfm);
        u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN)));
        struct scatter_walk src_sg_walk;
-       struct scatter_walk dst_sg_walk;
+       struct scatter_walk dst_sg_walk = {};
        unsigned int i;
 
        /* Assuming we are supporting rfc4106 64-bit extended */
@@ -968,7 +968,7 @@ static int helper_rfc4106_decrypt(struct aead_request *req)
        u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN)));
        u8 authTag[16];
        struct scatter_walk src_sg_walk;
-       struct scatter_walk dst_sg_walk;
+       struct scatter_walk dst_sg_walk = {};
        unsigned int i;
 
        if (unlikely(req->assoclen != 16 && req->assoclen != 20))
index 77f28ce9c6464e71a942f767082391502c8fa80d..9976fcecd17edfca5d5dbfe94c2dad0a635edf2d 100644 (file)
@@ -5,8 +5,8 @@
 OBJECT_FILES_NON_STANDARD_entry_$(BITS).o   := y
 OBJECT_FILES_NON_STANDARD_entry_64_compat.o := y
 
-CFLAGS_syscall_64.o            += -Wno-override-init
-CFLAGS_syscall_32.o            += -Wno-override-init
+CFLAGS_syscall_64.o            += $(call cc-option,-Wno-override-init,)
+CFLAGS_syscall_32.o            += $(call cc-option,-Wno-override-init,)
 obj-y                          := entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o
 obj-y                          += common.o
 
index b75a8bcd2d23cced23df9bade46e6304499d926b..21b352a11b493f4868b1a091dc416c7e2279ad34 100644 (file)
@@ -44,6 +44,7 @@
 #include <asm/alternative-asm.h>
 #include <asm/asm.h>
 #include <asm/smap.h>
+#include <asm/export.h>
 
        .section .entry.text, "ax"
 
@@ -991,6 +992,7 @@ trace:
        jmp     ftrace_stub
 END(mcount)
 #endif /* CONFIG_DYNAMIC_FTRACE */
+EXPORT_SYMBOL(mcount)
 #endif /* CONFIG_FUNCTION_TRACER */
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
index c98ec2efd7500d9e7673fc1f215e3e561afd8641..ef766a358b37dd355e8d5d75c10a61d47a83e04f 100644 (file)
@@ -35,6 +35,7 @@
 #include <asm/asm.h>
 #include <asm/smap.h>
 #include <asm/pgtable_types.h>
+#include <asm/export.h>
 #include <linux/err.h>
 
 /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this.  */
@@ -875,6 +876,7 @@ ENTRY(native_load_gs_index)
        popfq
        ret
 END(native_load_gs_index)
+EXPORT_SYMBOL(native_load_gs_index)
 
        _ASM_EXTABLE(.Lgs_change, bad_gs)
        .section .fixup, "ax"
index ff6ef7b3082237e5d223de9a9986202761566d17..2b361854254414662c17b531e2cacb8d5bb696b9 100644 (file)
 380    i386    pkey_mprotect           sys_pkey_mprotect
 381    i386    pkey_alloc              sys_pkey_alloc
 382    i386    pkey_free               sys_pkey_free
-#383   i386    pkey_get                sys_pkey_get
-#384   i386    pkey_set                sys_pkey_set
index 2f024d02511da47e12cacfbeea9826c440de8f9e..e93ef0b38db8e16a38f83e2e3f08dfb8d5fff4a0 100644 (file)
 329    common  pkey_mprotect           sys_pkey_mprotect
 330    common  pkey_alloc              sys_pkey_alloc
 331    common  pkey_free               sys_pkey_free
-#332   common  pkey_get                sys_pkey_get
-#333   common  pkey_set                sys_pkey_set
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
index e5a17114a8c4f917b56819bc6f7ff80ff0ac4798..fee6bc79b987faf56353ee367ca2aac61a87d49a 100644 (file)
@@ -6,6 +6,7 @@
  */
        #include <linux/linkage.h>
        #include <asm/asm.h>
+       #include <asm/export.h>
 
        /* put return address in eax (arg1) */
        .macro THUNK name, func, put_ret_addr_in_eax=0
@@ -36,5 +37,7 @@
 #ifdef CONFIG_PREEMPT
        THUNK ___preempt_schedule, preempt_schedule
        THUNK ___preempt_schedule_notrace, preempt_schedule_notrace
+       EXPORT_SYMBOL(___preempt_schedule)
+       EXPORT_SYMBOL(___preempt_schedule_notrace)
 #endif
 
index 627ecbcb2e6267796b634e05b16873285300a977..be36bf4e0957ec0df1437407fa98800cb7869737 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/linkage.h>
 #include "calling.h"
 #include <asm/asm.h>
+#include <asm/export.h>
 
        /* rdi: arg1 ... normal C conventions. rax is saved/restored. */
        .macro THUNK name, func, put_ret_addr_in_rdi=0
@@ -49,6 +50,8 @@
 #ifdef CONFIG_PREEMPT
        THUNK ___preempt_schedule, preempt_schedule
        THUNK ___preempt_schedule_notrace, preempt_schedule_notrace
+       EXPORT_SYMBOL(___preempt_schedule)
+       EXPORT_SYMBOL(___preempt_schedule_notrace)
 #endif
 
 #if defined(CONFIG_TRACE_IRQFLAGS) \
index f5f4b3fbbbc2924cbac3fe24d45d949e0997dc8e..afb222b63caeb0217ef34d9b2b193b6b59bd190d 100644 (file)
@@ -662,7 +662,13 @@ static int __init amd_core_pmu_init(void)
                pr_cont("Fam15h ");
                x86_pmu.get_event_constraints = amd_get_event_constraints_f15h;
                break;
-
+       case 0x17:
+               pr_cont("Fam17h ");
+               /*
+                * In family 17h, there are no event constraints in the PMC hardware.
+                * We fallback to using default amd_get_event_constraints.
+                */
+               break;
        default:
                pr_err("core perfctr but no constraints; unknown hardware!\n");
                return -ENODEV;
index d31735f37ed7d0435b8aca5d2f476e04a249b6e8..9d4bf3ab049ec19f8e8dd3c3f859dc310f92c466 100644 (file)
@@ -2352,7 +2352,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry_ctx *ent
                frame.next_frame     = 0;
                frame.return_address = 0;
 
-               if (!access_ok(VERIFY_READ, fp, 8))
+               if (!valid_user_frame(fp, sizeof(frame)))
                        break;
 
                bytes = __copy_from_user_nmi(&frame.next_frame, fp, 4);
@@ -2362,9 +2362,6 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry_ctx *ent
                if (bytes != 0)
                        break;
 
-               if (!valid_user_frame(fp, sizeof(frame)))
-                       break;
-
                perf_callchain_store(entry, cs_base + frame.return_address);
                fp = compat_ptr(ss_base + frame.next_frame);
        }
@@ -2413,7 +2410,7 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs
                frame.next_frame             = NULL;
                frame.return_address = 0;
 
-               if (!access_ok(VERIFY_READ, fp, sizeof(*fp) * 2))
+               if (!valid_user_frame(fp, sizeof(frame)))
                        break;
 
                bytes = __copy_from_user_nmi(&frame.next_frame, fp, sizeof(*fp));
@@ -2423,9 +2420,6 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs
                if (bytes != 0)
                        break;
 
-               if (!valid_user_frame(fp, sizeof(frame)))
-                       break;
-
                perf_callchain_store(entry, frame.return_address);
                fp = (void __user *)frame.next_frame;
        }
index a3a9eb84b5cf16ffebd5bf46c69037db4d1e3794..a74a2dbc01801a1ccc3c041a6037214c369957bb 100644 (file)
@@ -3607,10 +3607,14 @@ __init int intel_pmu_init(void)
 
        /*
         * Quirk: v2 perfmon does not report fixed-purpose events, so
-        * assume at least 3 events:
+        * assume at least 3 events, when not running in a hypervisor:
         */
-       if (version > 1)
-               x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3);
+       if (version > 1) {
+               int assume = 3 * !boot_cpu_has(X86_FEATURE_HYPERVISOR);
+
+               x86_pmu.num_counters_fixed =
+                       max((int)edx.split.num_counters_fixed, assume);
+       }
 
        if (boot_cpu_has(X86_FEATURE_PDCM)) {
                u64 capabilities;
@@ -3898,6 +3902,7 @@ __init int intel_pmu_init(void)
                break;
 
        case INTEL_FAM6_XEON_PHI_KNL:
+       case INTEL_FAM6_XEON_PHI_KNM:
                memcpy(hw_cache_event_ids,
                       slm_hw_cache_event_ids, sizeof(hw_cache_event_ids));
                memcpy(hw_cache_extra_regs,
@@ -3912,7 +3917,7 @@ __init int intel_pmu_init(void)
                x86_pmu.flags |= PMU_FL_HAS_RSP_1;
                x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
 
-               pr_cont("Knights Landing events, ");
+               pr_cont("Knights Landing/Mill events, ");
                break;
 
        case INTEL_FAM6_SKYLAKE_MOBILE:
index 3ca87b5a8677608c86ac8d748b59ead0d160f580..4f5ac726335f899a4faefd01a41aac241b9160be 100644 (file)
@@ -48,7 +48,8 @@
  *                            Scope: Core
  *     MSR_CORE_C6_RESIDENCY: CORE C6 Residency Counter
  *                            perf code: 0x02
- *                            Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,SKL
+ *                            Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW
+ *                                             SKL,KNL
  *                            Scope: Core
  *     MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter
  *                            perf code: 0x03
  *                            Scope: Core
  *     MSR_PKG_C2_RESIDENCY:  Package C2 Residency Counter.
  *                            perf code: 0x00
- *                            Available model: SNB,IVB,HSW,BDW,SKL
+ *                            Available model: SNB,IVB,HSW,BDW,SKL,KNL
  *                            Scope: Package (physical package)
  *     MSR_PKG_C3_RESIDENCY:  Package C3 Residency Counter.
  *                            perf code: 0x01
- *                            Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL
+ *                            Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,KNL
  *                            Scope: Package (physical package)
  *     MSR_PKG_C6_RESIDENCY:  Package C6 Residency Counter.
  *                            perf code: 0x02
- *                            Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,SKL
+ *                            Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW
+ *                                             SKL,KNL
  *                            Scope: Package (physical package)
  *     MSR_PKG_C7_RESIDENCY:  Package C7 Residency Counter.
  *                            perf code: 0x03
@@ -118,6 +120,7 @@ struct cstate_model {
 
 /* Quirk flags */
 #define SLM_PKG_C6_USE_C7_MSR  (1UL << 0)
+#define KNL_CORE_C6_MSR                (1UL << 1)
 
 struct perf_cstate_msr {
        u64     msr;
@@ -488,6 +491,18 @@ static const struct cstate_model slm_cstates __initconst = {
        .quirks                 = SLM_PKG_C6_USE_C7_MSR,
 };
 
+
+static const struct cstate_model knl_cstates __initconst = {
+       .core_events            = BIT(PERF_CSTATE_CORE_C6_RES),
+
+       .pkg_events             = BIT(PERF_CSTATE_PKG_C2_RES) |
+                                 BIT(PERF_CSTATE_PKG_C3_RES) |
+                                 BIT(PERF_CSTATE_PKG_C6_RES),
+       .quirks                 = KNL_CORE_C6_MSR,
+};
+
+
+
 #define X86_CSTATES_MODEL(model, states)                               \
        { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long) &(states) }
 
@@ -523,6 +538,8 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
 
        X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_MOBILE,  snb_cstates),
        X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_DESKTOP, snb_cstates),
+
+       X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNL, knl_cstates),
        { },
 };
 MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
@@ -558,6 +575,11 @@ static int __init cstate_probe(const struct cstate_model *cm)
        if (cm->quirks & SLM_PKG_C6_USE_C7_MSR)
                pkg_msr[PERF_CSTATE_PKG_C6_RES].msr = MSR_PKG_C7_RESIDENCY;
 
+       /* KNL has different MSR for CORE C6 */
+       if (cm->quirks & KNL_CORE_C6_MSR)
+               pkg_msr[PERF_CSTATE_CORE_C6_RES].msr = MSR_KNL_CORE_C6_RESIDENCY;
+
+
        has_cstate_core = cstate_probe_msr(cm->core_events,
                                           PERF_CSTATE_CORE_EVENT_MAX,
                                           core_msr, core_events_attrs);
index 0319311dbdbb548eef4f5b2431c285deea5dd00c..be202390bbd37b00106864123a647786497ce2cd 100644 (file)
@@ -1108,20 +1108,20 @@ static void setup_pebs_sample_data(struct perf_event *event,
        }
 
        /*
-        * We use the interrupt regs as a base because the PEBS record
-        * does not contain a full regs set, specifically it seems to
-        * lack segment descriptors, which get used by things like
-        * user_mode().
+        * We use the interrupt regs as a base because the PEBS record does not
+        * contain a full regs set, specifically it seems to lack segment
+        * descriptors, which get used by things like user_mode().
         *
-        * In the simple case fix up only the IP and BP,SP regs, for
-        * PERF_SAMPLE_IP and PERF_SAMPLE_CALLCHAIN to function properly.
-        * A possible PERF_SAMPLE_REGS will have to transfer all regs.
+        * In the simple case fix up only the IP for PERF_SAMPLE_IP.
+        *
+        * We must however always use BP,SP from iregs for the unwinder to stay
+        * sane; the record BP,SP can point into thin air when the record is
+        * from a previous PMI context or an (I)RET happend between the record
+        * and PMI.
         */
        *regs = *iregs;
        regs->flags = pebs->flags;
        set_linear_ip(regs, pebs->ip);
-       regs->bp = pebs->bp;
-       regs->sp = pebs->sp;
 
        if (sample_type & PERF_SAMPLE_REGS_INTR) {
                regs->ax = pebs->ax;
@@ -1130,10 +1130,21 @@ static void setup_pebs_sample_data(struct perf_event *event,
                regs->dx = pebs->dx;
                regs->si = pebs->si;
                regs->di = pebs->di;
-               regs->bp = pebs->bp;
-               regs->sp = pebs->sp;
 
-               regs->flags = pebs->flags;
+               /*
+                * Per the above; only set BP,SP if we don't need callchains.
+                *
+                * XXX: does this make sense?
+                */
+               if (!(sample_type & PERF_SAMPLE_CALLCHAIN)) {
+                       regs->bp = pebs->bp;
+                       regs->sp = pebs->sp;
+               }
+
+               /*
+                * Preserve PERF_EFLAGS_VM from set_linear_ip().
+                */
+               regs->flags = pebs->flags | (regs->flags & PERF_EFLAGS_VM);
 #ifndef CONFIG_X86_32
                regs->r8 = pebs->r8;
                regs->r9 = pebs->r9;
index fc6cf21c535e19f4c07deccf4a6444199e5ae39c..81b321ace8e0194d3ce18b29fcb42c20b834a918 100644 (file)
@@ -458,8 +458,8 @@ void intel_pmu_lbr_del(struct perf_event *event)
        if (!x86_pmu.lbr_nr)
                return;
 
-       if (branch_user_callstack(cpuc->br_sel) && event->ctx &&
-                                       event->ctx->task_ctx_data) {
+       if (branch_user_callstack(cpuc->br_sel) &&
+           event->ctx->task_ctx_data) {
                task_ctx = event->ctx->task_ctx_data;
                task_ctx->lbr_callstack_users--;
        }
index b0f0e835a770f7ee959681513bd7e41af39827a0..0a535cea8ff31adf6e06fc32ff763d423a73ab74 100644 (file)
@@ -763,6 +763,7 @@ static const struct x86_cpu_id rapl_cpu_match[] __initconst = {
        X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, hsw_rapl_init),
 
        X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, knl_rapl_init),
+       X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNM, knl_rapl_init),
 
        X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE,  skl_rapl_init),
        X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP, skl_rapl_init),
index d9844cc74486e602c7b768e225856323a37025b8..dbaaf7dc8373cb0248a4637abe4a211f266fb2db 100644 (file)
@@ -319,9 +319,9 @@ static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type,
  */
 static int uncore_pmu_event_init(struct perf_event *event);
 
-static bool is_uncore_event(struct perf_event *event)
+static bool is_box_event(struct intel_uncore_box *box, struct perf_event *event)
 {
-       return event->pmu->event_init == uncore_pmu_event_init;
+       return &box->pmu->pmu == event->pmu;
 }
 
 static int
@@ -340,7 +340,7 @@ uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader,
 
        n = box->n_events;
 
-       if (is_uncore_event(leader)) {
+       if (is_box_event(box, leader)) {
                box->event_list[n] = leader;
                n++;
        }
@@ -349,7 +349,7 @@ uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader,
                return n;
 
        list_for_each_entry(event, &leader->sibling_list, group_entry) {
-               if (!is_uncore_event(event) ||
+               if (!is_box_event(box, event) ||
                    event->state <= PERF_EVENT_STATE_OFF)
                        continue;
 
@@ -1349,6 +1349,7 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = {
        X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_X,    bdx_uncore_init),
        X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, bdx_uncore_init),
        X86_UNCORE_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL,   knl_uncore_init),
+       X86_UNCORE_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNM,   knl_uncore_init),
        X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP,skl_uncore_init),
        X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE, skl_uncore_init),
        X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_X,      skx_uncore_init),
index 5f845eef9a4d682a3598c13946dbdec9fb70adeb..a3dcc12bef4ab3a67aab29c6acf3480920e06952 100644 (file)
@@ -8,8 +8,12 @@
 #define PCI_DEVICE_ID_INTEL_HSW_IMC    0x0c00
 #define PCI_DEVICE_ID_INTEL_HSW_U_IMC  0x0a04
 #define PCI_DEVICE_ID_INTEL_BDW_IMC    0x1604
-#define PCI_DEVICE_ID_INTEL_SKL_IMC    0x191f
-#define PCI_DEVICE_ID_INTEL_SKL_U_IMC  0x190c
+#define PCI_DEVICE_ID_INTEL_SKL_U_IMC  0x1904
+#define PCI_DEVICE_ID_INTEL_SKL_Y_IMC  0x190c
+#define PCI_DEVICE_ID_INTEL_SKL_HD_IMC 0x1900
+#define PCI_DEVICE_ID_INTEL_SKL_HQ_IMC 0x1910
+#define PCI_DEVICE_ID_INTEL_SKL_SD_IMC 0x190f
+#define PCI_DEVICE_ID_INTEL_SKL_SQ_IMC 0x191f
 
 /* SNB event control */
 #define SNB_UNC_CTL_EV_SEL_MASK                        0x000000ff
@@ -486,24 +490,12 @@ static int snb_uncore_imc_event_add(struct perf_event *event, int flags)
 
        snb_uncore_imc_event_start(event, 0);
 
-       box->n_events++;
-
        return 0;
 }
 
 static void snb_uncore_imc_event_del(struct perf_event *event, int flags)
 {
-       struct intel_uncore_box *box = uncore_event_to_box(event);
-       int i;
-
        snb_uncore_imc_event_stop(event, PERF_EF_UPDATE);
-
-       for (i = 0; i < box->n_events; i++) {
-               if (event == box->event_list[i]) {
-                       --box->n_events;
-                       break;
-               }
-       }
 }
 
 int snb_pci2phy_map_init(int devid)
@@ -616,13 +608,29 @@ static const struct pci_device_id bdw_uncore_pci_ids[] = {
 
 static const struct pci_device_id skl_uncore_pci_ids[] = {
        { /* IMC */
-               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_IMC),
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_Y_IMC),
                .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
        },
        { /* IMC */
                PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_U_IMC),
                .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
        },
+       { /* IMC */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_HD_IMC),
+               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+       },
+       { /* IMC */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_HQ_IMC),
+               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+       },
+       { /* IMC */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_SD_IMC),
+               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+       },
+       { /* IMC */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_SQ_IMC),
+               .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+       },
 
        { /* end: all zeroes */ },
 };
@@ -666,8 +674,12 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = {
        IMC_DEV(HSW_IMC, &hsw_uncore_pci_driver),    /* 4th Gen Core Processor */
        IMC_DEV(HSW_U_IMC, &hsw_uncore_pci_driver),  /* 4th Gen Core ULT Mobile Processor */
        IMC_DEV(BDW_IMC, &bdw_uncore_pci_driver),    /* 5th Gen Core U */
-       IMC_DEV(SKL_IMC, &skl_uncore_pci_driver),    /* 6th Gen Core */
+       IMC_DEV(SKL_Y_IMC, &skl_uncore_pci_driver),  /* 6th Gen Core Y */
        IMC_DEV(SKL_U_IMC, &skl_uncore_pci_driver),  /* 6th Gen Core U */
+       IMC_DEV(SKL_HD_IMC, &skl_uncore_pci_driver),  /* 6th Gen Core H Dual Core */
+       IMC_DEV(SKL_HQ_IMC, &skl_uncore_pci_driver),  /* 6th Gen Core H Quad Core */
+       IMC_DEV(SKL_SD_IMC, &skl_uncore_pci_driver),  /* 6th Gen Core S Dual Core */
+       IMC_DEV(SKL_SQ_IMC, &skl_uncore_pci_driver),  /* 6th Gen Core S Quad Core */
        {  /* end marker */ }
 };
 
index 5874d8de1f8da111e3e0a682835e42b23af875ff..a77ee026643d23fac4808a1f1c32ca42e4999769 100644 (file)
@@ -113,7 +113,7 @@ struct debug_store {
  * Per register state.
  */
 struct er_account {
-       raw_spinlock_t          lock;   /* per-core: protect structure */
+       raw_spinlock_t      lock;       /* per-core: protect structure */
        u64                 config;     /* extra MSR config */
        u64                 reg;        /* extra MSR number */
        atomic_t            ref;        /* reference count */
index 1188bc849ee3b3253fd8229fca21bf2d6c87856e..a39629206864e5bb74aaddea15ca1ab762877042 100644 (file)
 #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
 
 #define X86_FEATURE_INTEL_PT   ( 7*32+15) /* Intel Processor Trace */
+#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */
+#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */
 
 /* Virtualization flags: Linux defined, word 8 */
 #define X86_FEATURE_TPR_SHADOW  ( 8*32+ 0) /* Intel TPR Shadow */
diff --git a/arch/x86/include/asm/export.h b/arch/x86/include/asm/export.h
new file mode 100644 (file)
index 0000000..138de56
--- /dev/null
@@ -0,0 +1,4 @@
+#ifdef CONFIG_64BIT
+#define KSYM_ALIGN 16
+#endif
+#include <asm-generic/export.h>
index 9ae5ab80a497c35a9fa3048c532bdb9239c395e1..34a46dc076d3610212e6f5c9f0abfb2ab9bc3629 100644 (file)
@@ -64,5 +64,6 @@
 /* Xeon Phi */
 
 #define INTEL_FAM6_XEON_PHI_KNL                0x57 /* Knights Landing */
+#define INTEL_FAM6_XEON_PHI_KNM                0x85 /* Knights Mill */
 
 #endif /* _ASM_X86_INTEL_FAMILY_H */
index 5b6753d1f7f4e35f8066e89555c715badc46ca23..49da9f497b908b676d9ec0c22c8646817bf1f52b 100644 (file)
@@ -17,6 +17,7 @@
 
 extern int intel_mid_pci_init(void);
 extern int intel_mid_pci_set_power_state(struct pci_dev *pdev, pci_power_t state);
+extern pci_power_t intel_mid_pci_get_power_state(struct pci_dev *pdev);
 
 extern void intel_mid_pwr_power_off(void);
 
index de25aad0785389c399dd12c843ca2d4d2ff0d112..d34bd370074b46662e5a96014ed4cd5b521f6045 100644 (file)
@@ -351,4 +351,10 @@ extern void arch_phys_wc_del(int handle);
 #define arch_phys_wc_add arch_phys_wc_add
 #endif
 
+#ifdef CONFIG_X86_PAT
+extern int arch_io_reserve_memtype_wc(resource_size_t start, resource_size_t size);
+extern void arch_io_free_memtype_wc(resource_size_t start, resource_size_t size);
+#define arch_io_reserve_memtype_wc arch_io_reserve_memtype_wc
+#endif
+
 #endif /* _ASM_X86_IO_H */
index 4b20f7304b9c241f58dcaa6b33a486a7308af9c4..bdde80731f490ecc05af1234d99c6ae2820975d4 100644 (file)
@@ -948,7 +948,6 @@ struct kvm_x86_ops {
        int (*get_lpage_level)(void);
        bool (*rdtscp_supported)(void);
        bool (*invpcid_supported)(void);
-       void (*adjust_tsc_offset_guest)(struct kvm_vcpu *vcpu, s64 adjustment);
 
        void (*set_tdp_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3);
 
@@ -958,8 +957,6 @@ struct kvm_x86_ops {
 
        void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
 
-       u64 (*read_l1_tsc)(struct kvm_vcpu *vcpu, u64 host_tsc);
-
        void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2);
 
        int (*check_intercept)(struct kvm_vcpu *vcpu,
index 56f4c6676b29034830d4fdb9c7cdcedd0c14e2a1..78f3760ca1f2985cfcbb278d59fbe71f92f69ea7 100644 (file)
@@ -88,7 +88,6 @@
 
 #define MSR_IA32_RTIT_CTL              0x00000570
 #define MSR_IA32_RTIT_STATUS           0x00000571
-#define MSR_IA32_RTIT_STATUS           0x00000571
 #define MSR_IA32_RTIT_ADDR0_A          0x00000580
 #define MSR_IA32_RTIT_ADDR0_B          0x00000581
 #define MSR_IA32_RTIT_ADDR1_A          0x00000582
index e02e3f80d363bbb87d8db6c74b2a0d5370818419..84f58de08c2bdbff7ce98119ff2e360d7d5175a1 100644 (file)
@@ -521,7 +521,8 @@ do {                                                                        \
 static __always_inline bool x86_this_cpu_constant_test_bit(unsigned int nr,
                         const unsigned long __percpu *addr)
 {
-       unsigned long __percpu *a = (unsigned long *)addr + nr / BITS_PER_LONG;
+       unsigned long __percpu *a =
+               (unsigned long __percpu *)addr + nr / BITS_PER_LONG;
 
 #ifdef CONFIG_X86_64
        return ((1UL << (nr % BITS_PER_LONG)) & raw_cpu_read_8(*a)) != 0;
@@ -538,7 +539,7 @@ static inline bool x86_this_cpu_variable_test_bit(int nr,
        asm volatile("bt "__percpu_arg(2)",%1\n\t"
                        CC_SET(c)
                        : CC_OUT(c) (oldbit)
-                       : "m" (*(unsigned long *)addr), "Ir" (nr));
+                       : "m" (*(unsigned long __percpu *)addr), "Ir" (nr));
 
        return oldbit;
 }
index 3d33a719f5c1d8652be749c43ba4b7dc5560c4d5..a34e0d4b957d639afb5978863e57fefb74a173f2 100644 (file)
@@ -103,8 +103,10 @@ static inline bool __down_read_trylock(struct rw_semaphore *sem)
 ({                                                     \
        long tmp;                                       \
        struct rw_semaphore* ret;                       \
+       register void *__sp asm(_ASM_SP);               \
+                                                       \
        asm volatile("# beginning down_write\n\t"       \
-                    LOCK_PREFIX "  xadd      %1,(%3)\n\t"      \
+                    LOCK_PREFIX "  xadd      %1,(%4)\n\t"      \
                     /* adds 0xffff0001, returns the old value */ \
                     "  test " __ASM_SEL(%w1,%k1) "," __ASM_SEL(%w1,%k1) "\n\t" \
                     /* was the active mask 0 before? */\
@@ -112,7 +114,7 @@ static inline bool __down_read_trylock(struct rw_semaphore *sem)
                     "  call " slow_path "\n"           \
                     "1:\n"                             \
                     "# ending down_write"              \
-                    : "+m" (sem->count), "=d" (tmp), "=a" (ret)        \
+                    : "+m" (sem->count), "=d" (tmp), "=a" (ret), "+r" (__sp) \
                     : "a" (sem), "1" (RWSEM_ACTIVE_WRITE_BIAS) \
                     : "memory", "cc");                 \
        ret;                                            \
index 2aaca53c097416bbb305c0014acac1ba6d72dbb0..ad6f5eb07a95bd221fe4e13c8cdb3af0cd27aa37 100644 (file)
@@ -52,6 +52,15 @@ struct task_struct;
 #include <asm/cpufeature.h>
 #include <linux/atomic.h>
 
+struct thread_info {
+       unsigned long           flags;          /* low level flags */
+};
+
+#define INIT_THREAD_INFO(tsk)                  \
+{                                              \
+       .flags          = 0,                    \
+}
+
 #define init_stack             (init_thread_union.stack)
 
 #else /* !__ASSEMBLY__ */
index 4dd5d500eb6024cc27492d49a2458213e44bdca6..79076d75bdbfde2aec556e14fd8622e7c017682a 100644 (file)
@@ -46,9 +46,7 @@ obj-$(CONFIG_MODIFY_LDT_SYSCALL)      += ldt.o
 obj-y                  += setup.o x86_init.o i8259.o irqinit.o jump_label.o
 obj-$(CONFIG_IRQ_WORK)  += irq_work.o
 obj-y                  += probe_roms.o
-obj-$(CONFIG_X86_32)   += i386_ksyms_32.o
-obj-$(CONFIG_X86_64)   += sys_x86_64.o x8664_ksyms_64.o
-obj-$(CONFIG_X86_64)   += mcount_64.o
+obj-$(CONFIG_X86_64)   += sys_x86_64.o mcount_64.o
 obj-$(CONFIG_X86_ESPFIX64)     += espfix_64.o
 obj-$(CONFIG_SYSFS)    += ksysfs.o
 obj-y                  += bootflag.o e820.o
index 8a5abaa7d4533e1592bc6977569c737762089516..931ced8ca345114397536ae1998a438a84889193 100644 (file)
@@ -454,6 +454,7 @@ static void __init acpi_sci_ioapic_setup(u8 bus_irq, u16 polarity, u16 trigger,
                polarity = acpi_sci_flags & ACPI_MADT_POLARITY_MASK;
 
        mp_override_legacy_irq(bus_irq, polarity, trigger, gsi);
+       acpi_penalize_sci_irq(bus_irq, trigger, polarity);
 
        /*
         * stash over-ride to indicate we've been here
index c7364bd633e1d8c1a346c69534ded295bc2ba48d..51287cd90bf65f4ffa6215c4dea0c112aa5f6697 100644 (file)
@@ -1042,8 +1042,11 @@ static int apm_get_power_status(u_short *status, u_short *bat, u_short *life)
 
        if (apm_info.get_power_status_broken)
                return APM_32_UNSUPPORTED;
-       if (apm_bios_call(&call))
+       if (apm_bios_call(&call)) {
+               if (!call.err)
+                       return APM_NO_ERROR;
                return call.err;
+       }
        *status = call.ebx;
        *bat = call.ecx;
        if (apm_info.get_power_status_swabinminutes) {
index b81fe2d63e15751c2cb7e61fd10dc85cc7f906b0..1e81a37c034e7821580f5165eb4359e209ef7823 100644 (file)
@@ -347,7 +347,6 @@ static void amd_detect_cmp(struct cpuinfo_x86 *c)
 #ifdef CONFIG_SMP
        unsigned bits;
        int cpu = smp_processor_id();
-       unsigned int socket_id, core_complex_id;
 
        bits = c->x86_coreid_bits;
        /* Low order bits define the core id (index of core in socket) */
@@ -365,10 +364,7 @@ static void amd_detect_cmp(struct cpuinfo_x86 *c)
         if (c->x86 != 0x17 || !cpuid_edx(0x80000006))
                return;
 
-       socket_id       = (c->apicid >> bits) - 1;
-       core_complex_id = (c->apicid & ((1 << bits) - 1)) >> 3;
-
-       per_cpu(cpu_llc_id, cpu) = (socket_id << 3) | core_complex_id;
+       per_cpu(cpu_llc_id, cpu) = c->apicid >> 3;
 #endif
 }
 
index 9bd910a7dd0abc0d994c6e50250c6d104550dc1e..cc9e980c68ec47b39a8c4ae7ad3497d3d94bd53d 100644 (file)
@@ -978,6 +978,35 @@ static void x86_init_cache_qos(struct cpuinfo_x86 *c)
        }
 }
 
+/*
+ * The physical to logical package id mapping is initialized from the
+ * acpi/mptables information. Make sure that CPUID actually agrees with
+ * that.
+ */
+static void sanitize_package_id(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_SMP
+       unsigned int pkg, apicid, cpu = smp_processor_id();
+
+       apicid = apic->cpu_present_to_apicid(cpu);
+       pkg = apicid >> boot_cpu_data.x86_coreid_bits;
+
+       if (apicid != c->initial_apicid) {
+               pr_err(FW_BUG "CPU%u: APIC id mismatch. Firmware: %x CPUID: %x\n",
+                      cpu, apicid, c->initial_apicid);
+               c->initial_apicid = apicid;
+       }
+       if (pkg != c->phys_proc_id) {
+               pr_err(FW_BUG "CPU%u: Using firmware package id %u instead of %u\n",
+                      cpu, pkg, c->phys_proc_id);
+               c->phys_proc_id = pkg;
+       }
+       c->logical_proc_id = topology_phys_to_logical_pkg(pkg);
+#else
+       c->logical_proc_id = 0;
+#endif
+}
+
 /*
  * This does the hard work of actually picking apart the CPU stuff...
  */
@@ -1103,8 +1132,7 @@ static void identify_cpu(struct cpuinfo_x86 *c)
 #ifdef CONFIG_NUMA
        numa_add_cpu(smp_processor_id());
 #endif
-       /* The boot/hotplug time assigment got cleared, restore it */
-       c->logical_proc_id = topology_phys_to_logical_pkg(c->phys_proc_id);
+       sanitize_package_id(c);
 }
 
 /*
index 620ab06bcf4571c8841b70e35a57657dda2e9985..017bda12caaed9c46f60fccc90f05d861e58e1ba 100644 (file)
@@ -429,7 +429,7 @@ int __init save_microcode_in_initrd_amd(void)
         * We need the physical address of the container for both bitness since
         * boot_params.hdr.ramdisk_image is a physical address.
         */
-       cont    = __pa(container);
+       cont    = __pa_nodebug(container);
        cont_va = container;
 #endif
 
index 8cb57df9398d91a74eec678134844d8ac1052463..1db8dc490b665e751f43f3411cc21079eea75ae2 100644 (file)
@@ -32,6 +32,8 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c)
 
        static const struct cpuid_bit cpuid_bits[] = {
                { X86_FEATURE_INTEL_PT,         CR_EBX,25, 0x00000007, 0 },
+               { X86_FEATURE_AVX512_4VNNIW,    CR_EDX, 2, 0x00000007, 0 },
+               { X86_FEATURE_AVX512_4FMAPS,    CR_EDX, 3, 0x00000007, 0 },
                { X86_FEATURE_APERFMPERF,       CR_ECX, 0, 0x00000006, 0 },
                { X86_FEATURE_EPB,              CR_ECX, 3, 0x00000006, 0 },
                { X86_FEATURE_HW_PSTATE,        CR_EDX, 7, 0x80000007, 0 },
index 81160578b91ac9a053bf5a5f172ea2fb6da1c241..5130985b758b98ea74380ec8991ca1f72d709521 100644 (file)
@@ -27,6 +27,7 @@
 #include <asm/div64.h>
 #include <asm/x86_init.h>
 #include <asm/hypervisor.h>
+#include <asm/timer.h>
 #include <asm/apic.h>
 
 #define CPUID_VMWARE_INFO_LEAF 0x40000000
@@ -94,6 +95,10 @@ static void __init vmware_platform_setup(void)
        } else {
                pr_warn("Failed to get TSC freq from the hypervisor\n");
        }
+
+#ifdef CONFIG_X86_IO_APIC
+       no_timer_check = 1;
+#endif
 }
 
 /*
index 9b7cf5c28f5fa8557d23083047995bd93af1a68a..85f854b98a9d24c3e0e6a3d9d83fd6c5b6c57e3f 100644 (file)
@@ -112,7 +112,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
                for (; stack < stack_info.end; stack++) {
                        unsigned long real_addr;
                        int reliable = 0;
-                       unsigned long addr = *stack;
+                       unsigned long addr = READ_ONCE_NOCHECK(*stack);
                        unsigned long *ret_addr_p =
                                unwind_get_return_address_ptr(&state);
 
index b85fe5f91c3fe4901cf766aa1f6996710f844b0d..90e8dde3ec26b1d97d10309d2d796489078b5cdf 100644 (file)
@@ -350,7 +350,7 @@ int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map,
                 * continue building up new bios map based on this
                 * information
                 */
-               if (current_type != last_type) {
+               if (current_type != last_type || current_type == E820_PRAM) {
                        if (last_type != 0)      {
                                new_bios[new_bios_entry].size =
                                        change_point[chgidx]->addr - last_addr;
index 47004010ad5dd42ec03e5ca075d832ef0c925e38..ebb4e95fbd741b5842b6c4a6320688f99bcf541a 100644 (file)
@@ -521,14 +521,14 @@ void fpu__clear(struct fpu *fpu)
 {
        WARN_ON_FPU(fpu != &current->thread.fpu); /* Almost certainly an anomaly */
 
-       if (!use_eager_fpu() || !static_cpu_has(X86_FEATURE_FPU)) {
-               /* FPU state will be reallocated lazily at the first use. */
-               fpu__drop(fpu);
-       } else {
-               if (!fpu->fpstate_active) {
-                       fpu__activate_curr(fpu);
-                       user_fpu_begin();
-               }
+       fpu__drop(fpu);
+
+       /*
+        * Make sure fpstate is cleared and initialized.
+        */
+       if (static_cpu_has(X86_FEATURE_FPU)) {
+               fpu__activate_curr(fpu);
+               user_fpu_begin();
                copy_init_fpstate_to_fpregs();
        }
 }
index 124aa5c593f8da7aba6643bc609a332744d10ba6..095ef7ddd6ae4d1c6d5476d6e63561963786e793 100644 (file)
@@ -74,6 +74,8 @@ void fpu__xstate_clear_all_cpu_caps(void)
        setup_clear_cpu_cap(X86_FEATURE_MPX);
        setup_clear_cpu_cap(X86_FEATURE_XGETBV1);
        setup_clear_cpu_cap(X86_FEATURE_PKU);
+       setup_clear_cpu_cap(X86_FEATURE_AVX512_4VNNIW);
+       setup_clear_cpu_cap(X86_FEATURE_AVX512_4FMAPS);
 }
 
 /*
index 5f401262f12d08c50d6411d80de056ef87b6c6e9..2dabea46f03935f435493117c058a297f5011878 100644 (file)
@@ -23,6 +23,7 @@
 #include <asm/percpu.h>
 #include <asm/nops.h>
 #include <asm/bootparam.h>
+#include <asm/export.h>
 
 /* Physical address */
 #define pa(X) ((X) - __PAGE_OFFSET)
@@ -664,15 +665,19 @@ __PAGE_ALIGNED_BSS
 initial_pg_pmd:
        .fill 1024*KPMDS,4,0
 #else
-ENTRY(initial_page_table)
+.globl initial_page_table
+initial_page_table:
        .fill 1024,4,0
 #endif
 initial_pg_fixmap:
        .fill 1024,4,0
-ENTRY(empty_zero_page)
+.globl empty_zero_page
+empty_zero_page:
        .fill 4096,1,0
-ENTRY(swapper_pg_dir)
+.globl swapper_pg_dir
+swapper_pg_dir:
        .fill 1024,4,0
+EXPORT_SYMBOL(empty_zero_page)
 
 /*
  * This starts the data section.
index c98a559c346ed0c2b5092181130b2a3a2f7b1dcf..b4421cc191b056727f8f8c0def78a750b319a1c4 100644 (file)
@@ -21,6 +21,7 @@
 #include <asm/percpu.h>
 #include <asm/nops.h>
 #include "../entry/calling.h"
+#include <asm/export.h>
 
 #ifdef CONFIG_PARAVIRT
 #include <asm/asm-offsets.h>
@@ -486,10 +487,12 @@ early_gdt_descr_base:
 ENTRY(phys_base)
        /* This must match the first entry in level2_kernel_pgt */
        .quad   0x0000000000000000
+EXPORT_SYMBOL(phys_base)
 
 #include "../../x86/xen/xen-head.S"
        
        __PAGE_ALIGNED_BSS
 NEXT_PAGE(empty_zero_page)
        .skip PAGE_SIZE
+EXPORT_SYMBOL(empty_zero_page)
 
diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c
deleted file mode 100644 (file)
index 1f9b878..0000000
+++ /dev/null
@@ -1,47 +0,0 @@
-#include <linux/export.h>
-#include <linux/spinlock_types.h>
-
-#include <asm/checksum.h>
-#include <asm/pgtable.h>
-#include <asm/desc.h>
-#include <asm/ftrace.h>
-
-#ifdef CONFIG_FUNCTION_TRACER
-/* mcount is defined in assembly */
-EXPORT_SYMBOL(mcount);
-#endif
-
-/*
- * Note, this is a prototype to get at the symbol for
- * the export, but dont use it from C code, it is used
- * by assembly code and is not using C calling convention!
- */
-#ifndef CONFIG_X86_CMPXCHG64
-extern void cmpxchg8b_emu(void);
-EXPORT_SYMBOL(cmpxchg8b_emu);
-#endif
-
-/* Networking helper routines. */
-EXPORT_SYMBOL(csum_partial_copy_generic);
-
-EXPORT_SYMBOL(__get_user_1);
-EXPORT_SYMBOL(__get_user_2);
-EXPORT_SYMBOL(__get_user_4);
-EXPORT_SYMBOL(__get_user_8);
-
-EXPORT_SYMBOL(__put_user_1);
-EXPORT_SYMBOL(__put_user_2);
-EXPORT_SYMBOL(__put_user_4);
-EXPORT_SYMBOL(__put_user_8);
-
-EXPORT_SYMBOL(strstr);
-
-EXPORT_SYMBOL(csum_partial);
-EXPORT_SYMBOL(empty_zero_page);
-
-#ifdef CONFIG_PREEMPT
-EXPORT_SYMBOL(___preempt_schedule);
-EXPORT_SYMBOL(___preempt_schedule_notrace);
-#endif
-
-EXPORT_SYMBOL(__sw_hweight32);
index 28cee019209ce7f7fd9ec160e4409fecf000819f..d9d8d16b69db89df63b5ff33a4e597fcf4e5eba6 100644 (file)
@@ -50,6 +50,7 @@
 #include <linux/kallsyms.h>
 #include <linux/ftrace.h>
 #include <linux/frame.h>
+#include <linux/kasan.h>
 
 #include <asm/text-patching.h>
 #include <asm/cacheflush.h>
@@ -1057,9 +1058,10 @@ int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
         * tailcall optimization. So, to be absolutely safe
         * we also save and restore enough stack bytes to cover
         * the argument area.
+        * Use __memcpy() to avoid KASAN stack out-of-bounds reports as we copy
+        * raw stack chunk with redzones:
         */
-       memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr,
-              MIN_STACK_SIZE(addr));
+       __memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr, MIN_STACK_SIZE(addr));
        regs->flags &= ~X86_EFLAGS_IF;
        trace_hardirqs_off();
        regs->ip = (unsigned long)(jp->entry);
@@ -1080,6 +1082,9 @@ void jprobe_return(void)
 {
        struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
 
+       /* Unpoison stack redzones in the frames we are going to jump over. */
+       kasan_unpoison_stack_above_sp_to(kcb->jprobe_saved_sp);
+
        asm volatile (
 #ifdef CONFIG_X86_64
                        "       xchg   %%rbx,%%rsp      \n"
@@ -1118,7 +1123,7 @@ int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
                /* It's OK to start function graph tracing again */
                unpause_graph_tracing();
                *regs = kcb->jprobe_saved_regs;
-               memcpy(saved_sp, kcb->jprobes_stack, MIN_STACK_SIZE(saved_sp));
+               __memcpy(saved_sp, kcb->jprobes_stack, MIN_STACK_SIZE(saved_sp));
                preempt_enable_no_resched();
                return 1;
        }
index 61924222a9e1ceb4e582b961a0568a1301dc4b6b..7b0d3da52fb42f288a947f1befe8886319b37ec5 100644 (file)
@@ -7,6 +7,7 @@
 #include <linux/linkage.h>
 #include <asm/ptrace.h>
 #include <asm/ftrace.h>
+#include <asm/export.h>
 
 
        .code64
 
 #ifdef CC_USING_FENTRY
 # define function_hook __fentry__
+EXPORT_SYMBOL(__fentry__)
 #else
 # define function_hook mcount
+EXPORT_SYMBOL(mcount)
 #endif
 
 /* All cases save the original rbp (8 bytes) */
index 51402a7e4ca6ed040bd727477a0cee9c23426887..0bee04d41bed04406de00732cb1d73e5f6f32c33 100644 (file)
@@ -625,8 +625,6 @@ static void amd_disable_seq_and_redirect_scrub(struct pci_dev *dev)
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3,
                        amd_disable_seq_and_redirect_scrub);
 
-#endif
-
 #if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE)
 #include <linux/jump_label.h>
 #include <asm/string_64.h>
@@ -657,3 +655,4 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2fc0, quirk_intel_brickland_xeon_
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fc0, quirk_intel_brickland_xeon_ras_cap);
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2083, quirk_intel_purley_xeon_ras_cap);
 #endif
+#endif
index bbfbca5fea0cda10cf7f56e92d2a19d95a37ecb7..9c337b0e8ba7c4ac5b82a3e4ad979c7d4011b4bf 100644 (file)
@@ -1221,11 +1221,16 @@ void __init setup_arch(char **cmdline_p)
         */
        get_smp_config();
 
+       /*
+        * Systems w/o ACPI and mptables might not have it mapped the local
+        * APIC yet, but prefill_possible_map() might need to access it.
+        */
+       init_apic_mappings();
+
        prefill_possible_map();
 
        init_cpu_to_node();
 
-       init_apic_mappings();
        io_apic_init_mappings();
 
        kvm_guest_init();
index 40df33753bae8d71390b7f4bd81113211a7b67da..ec1f756f9dc9ace1badccd544b6032d64e520360 100644 (file)
@@ -105,9 +105,6 @@ void sigaction_compat_abi(struct k_sigaction *act, struct k_sigaction *oact)
        /* Don't let flags to be set from userspace */
        act->sa.sa_flags &= ~(SA_IA32_ABI | SA_X32_ABI);
 
-       if (user_64bit_mode(current_pt_regs()))
-               return;
-
        if (in_ia32_syscall())
                act->sa.sa_flags |= SA_IA32_ABI;
        if (in_x32_syscall())
index 68f8cc222f255aa1cf5266e2d84d7ceeb2417977..c00cb64bc0a12e5f6f36c37215b05a3afd178db9 100644 (file)
@@ -261,8 +261,10 @@ static inline void __smp_reschedule_interrupt(void)
 
 __visible void smp_reschedule_interrupt(struct pt_regs *regs)
 {
+       irq_enter();
        ack_APIC_irq();
        __smp_reschedule_interrupt();
+       irq_exit();
        /*
         * KVM uses this interrupt to force a cpu out of guest mode
         */
index 951f093a96fe90709827a7f75430ad042c318774..42f5eb7b4f6c85251f4ab65d6de44268b6de06ea 100644 (file)
@@ -1409,15 +1409,17 @@ __init void prefill_possible_map(void)
 
        /* No boot processor was found in mptable or ACPI MADT */
        if (!num_processors) {
-               int apicid = boot_cpu_physical_apicid;
-               int cpu = hard_smp_processor_id();
+               if (boot_cpu_has(X86_FEATURE_APIC)) {
+                       int apicid = boot_cpu_physical_apicid;
+                       int cpu = hard_smp_processor_id();
 
-               pr_warn("Boot CPU (id %d) not listed by BIOS\n", cpu);
+                       pr_warn("Boot CPU (id %d) not listed by BIOS\n", cpu);
 
-               /* Make sure boot cpu is enumerated */
-               if (apic->cpu_present_to_apicid(0) == BAD_APICID &&
-                   apic->apic_id_valid(apicid))
-                       generic_processor_info(apicid, boot_cpu_apic_version);
+                       /* Make sure boot cpu is enumerated */
+                       if (apic->cpu_present_to_apicid(0) == BAD_APICID &&
+                           apic->apic_id_valid(apicid))
+                               generic_processor_info(apicid, boot_cpu_apic_version);
+               }
 
                if (!num_processors)
                        num_processors = 1;
index c9a073866ca7b1f4c6efa5b9db110c591083b3af..a23ce84a3f6ccfefe36a0d3070880aa45d2bc0f5 100644 (file)
@@ -57,7 +57,8 @@ static int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs)
        unsigned char opcode[15];
        unsigned long addr = convert_ip_to_linear(child, regs);
 
-       copied = access_process_vm(child, addr, opcode, sizeof(opcode), 0);
+       copied = access_process_vm(child, addr, opcode, sizeof(opcode),
+                       FOLL_FORCE);
        for (i = 0; i < copied; i++) {
                switch (opcode[i]) {
                /* popf and iret */
index 764a29f84de7feea6346ce1d822af64f14af63ff..85195d447a922785857db0caacc73d6bc9b9490c 100644 (file)
@@ -66,13 +66,36 @@ __init int create_simplefb(const struct screen_info *si,
 {
        struct platform_device *pd;
        struct resource res;
-       unsigned long len;
+       u64 base, size;
+       u32 length;
 
-       /* don't use lfb_size as it may contain the whole VMEM instead of only
-        * the part that is occupied by the framebuffer */
-       len = mode->height * mode->stride;
-       len = PAGE_ALIGN(len);
-       if (len > (u64)si->lfb_size << 16) {
+       /*
+        * If the 64BIT_BASE capability is set, ext_lfb_base will contain the
+        * upper half of the base address. Assemble the address, then make sure
+        * it is valid and we can actually access it.
+        */
+       base = si->lfb_base;
+       if (si->capabilities & VIDEO_CAPABILITY_64BIT_BASE)
+               base |= (u64)si->ext_lfb_base << 32;
+       if (!base || (u64)(resource_size_t)base != base) {
+               printk(KERN_DEBUG "sysfb: inaccessible VRAM base\n");
+               return -EINVAL;
+       }
+
+       /*
+        * Don't use lfb_size as IORESOURCE size, since it may contain the
+        * entire VMEM, and thus require huge mappings. Use just the part we
+        * need, that is, the part where the framebuffer is located. But verify
+        * that it does not exceed the advertised VMEM.
+        * Note that in case of VBE, the lfb_size is shifted by 16 bits for
+        * historical reasons.
+        */
+       size = si->lfb_size;
+       if (si->orig_video_isVGA == VIDEO_TYPE_VLFB)
+               size <<= 16;
+       length = mode->height * mode->stride;
+       length = PAGE_ALIGN(length);
+       if (length > size) {
                printk(KERN_WARNING "sysfb: VRAM smaller than advertised\n");
                return -EINVAL;
        }
@@ -81,8 +104,8 @@ __init int create_simplefb(const struct screen_info *si,
        memset(&res, 0, sizeof(res));
        res.flags = IORESOURCE_MEM | IORESOURCE_BUSY;
        res.name = simplefb_resname;
-       res.start = si->lfb_base;
-       res.end = si->lfb_base + len - 1;
+       res.start = base;
+       res.end = res.start + length - 1;
        if (res.end <= res.start)
                return -EINVAL;
 
index 9298993dc8b715adb79e78f2f71be74402e68b7d..b80e8bf43cc63b4350185ab1125ee85f1c43d05f 100644 (file)
@@ -7,11 +7,13 @@
 
 unsigned long unwind_get_return_address(struct unwind_state *state)
 {
+       unsigned long addr = READ_ONCE_NOCHECK(*state->sp);
+
        if (unwind_done(state))
                return 0;
 
        return ftrace_graph_ret_addr(state->task, &state->graph_idx,
-                                    *state->sp, state->sp);
+                                    addr, state->sp);
 }
 EXPORT_SYMBOL_GPL(unwind_get_return_address);
 
@@ -23,8 +25,10 @@ bool unwind_next_frame(struct unwind_state *state)
                return false;
 
        do {
+               unsigned long addr = READ_ONCE_NOCHECK(*state->sp);
+
                for (state->sp++; state->sp < info->end; state->sp++)
-                       if (__kernel_text_address(*state->sp))
+                       if (__kernel_text_address(addr))
                                return true;
 
                state->sp = info->next_sp;
@@ -47,7 +51,14 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
        get_stack_info(first_frame, state->task, &state->stack_info,
                       &state->stack_mask);
 
-       if (!__kernel_text_address(*first_frame))
+       /*
+        * The caller can provide the address of the first frame directly
+        * (first_frame) or indirectly (regs->sp) to indicate which stack frame
+        * to start unwinding at.  Skip ahead until we reach it.
+        */
+       if (!unwind_done(state) &&
+           (!on_stack(&state->stack_info, first_frame, sizeof(long)) ||
+           !__kernel_text_address(*first_frame)))
                unwind_next_frame(state);
 }
 EXPORT_SYMBOL_GPL(__unwind_start);
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
deleted file mode 100644 (file)
index b2cee3d..0000000
+++ /dev/null
@@ -1,85 +0,0 @@
-/* Exports for assembly files.
-   All C exports should go in the respective C files. */
-
-#include <linux/export.h>
-#include <linux/spinlock_types.h>
-#include <linux/smp.h>
-
-#include <net/checksum.h>
-
-#include <asm/processor.h>
-#include <asm/pgtable.h>
-#include <asm/uaccess.h>
-#include <asm/desc.h>
-#include <asm/ftrace.h>
-
-#ifdef CONFIG_FUNCTION_TRACER
-/* mcount and __fentry__ are defined in assembly */
-#ifdef CC_USING_FENTRY
-EXPORT_SYMBOL(__fentry__);
-#else
-EXPORT_SYMBOL(mcount);
-#endif
-#endif
-
-EXPORT_SYMBOL(__get_user_1);
-EXPORT_SYMBOL(__get_user_2);
-EXPORT_SYMBOL(__get_user_4);
-EXPORT_SYMBOL(__get_user_8);
-EXPORT_SYMBOL(__put_user_1);
-EXPORT_SYMBOL(__put_user_2);
-EXPORT_SYMBOL(__put_user_4);
-EXPORT_SYMBOL(__put_user_8);
-
-EXPORT_SYMBOL(copy_user_generic_string);
-EXPORT_SYMBOL(copy_user_generic_unrolled);
-EXPORT_SYMBOL(copy_user_enhanced_fast_string);
-EXPORT_SYMBOL(__copy_user_nocache);
-EXPORT_SYMBOL(_copy_from_user);
-EXPORT_SYMBOL(_copy_to_user);
-
-EXPORT_SYMBOL_GPL(memcpy_mcsafe_unrolled);
-
-EXPORT_SYMBOL(copy_page);
-EXPORT_SYMBOL(clear_page);
-
-EXPORT_SYMBOL(csum_partial);
-
-EXPORT_SYMBOL(__sw_hweight32);
-EXPORT_SYMBOL(__sw_hweight64);
-
-/*
- * Export string functions. We normally rely on gcc builtin for most of these,
- * but gcc sometimes decides not to inline them.
- */
-#undef memcpy
-#undef memset
-#undef memmove
-
-extern void *__memset(void *, int, __kernel_size_t);
-extern void *__memcpy(void *, const void *, __kernel_size_t);
-extern void *__memmove(void *, const void *, __kernel_size_t);
-extern void *memset(void *, int, __kernel_size_t);
-extern void *memcpy(void *, const void *, __kernel_size_t);
-extern void *memmove(void *, const void *, __kernel_size_t);
-
-EXPORT_SYMBOL(__memset);
-EXPORT_SYMBOL(__memcpy);
-EXPORT_SYMBOL(__memmove);
-
-EXPORT_SYMBOL(memset);
-EXPORT_SYMBOL(memcpy);
-EXPORT_SYMBOL(memmove);
-
-#ifndef CONFIG_DEBUG_VIRTUAL
-EXPORT_SYMBOL(phys_base);
-#endif
-EXPORT_SYMBOL(empty_zero_page);
-#ifndef CONFIG_PARAVIRT
-EXPORT_SYMBOL(native_load_gs_index);
-#endif
-
-#ifdef CONFIG_PREEMPT
-EXPORT_SYMBOL(___preempt_schedule);
-EXPORT_SYMBOL(___preempt_schedule_notrace);
-#endif
index 4e95d3eb29557bcb99219fddb7b909e23ff3b090..a3ce9d260d68756fa675ce40994b79aedd206413 100644 (file)
@@ -2105,16 +2105,10 @@ static int em_iret(struct x86_emulate_ctxt *ctxt)
 static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
 {
        int rc;
-       unsigned short sel, old_sel;
-       struct desc_struct old_desc, new_desc;
-       const struct x86_emulate_ops *ops = ctxt->ops;
+       unsigned short sel;
+       struct desc_struct new_desc;
        u8 cpl = ctxt->ops->cpl(ctxt);
 
-       /* Assignment of RIP may only fail in 64-bit mode */
-       if (ctxt->mode == X86EMUL_MODE_PROT64)
-               ops->get_segment(ctxt, &old_sel, &old_desc, NULL,
-                                VCPU_SREG_CS);
-
        memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
 
        rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl,
@@ -2124,12 +2118,10 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
                return rc;
 
        rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc);
-       if (rc != X86EMUL_CONTINUE) {
-               WARN_ON(ctxt->mode != X86EMUL_MODE_PROT64);
-               /* assigning eip failed; restore the old cs */
-               ops->set_segment(ctxt, old_sel, &old_desc, 0, VCPU_SREG_CS);
-               return rc;
-       }
+       /* Error handling is not implemented. */
+       if (rc != X86EMUL_CONTINUE)
+               return X86EMUL_UNHANDLEABLE;
+
        return rc;
 }
 
@@ -2189,14 +2181,8 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt)
 {
        int rc;
        unsigned long eip, cs;
-       u16 old_cs;
        int cpl = ctxt->ops->cpl(ctxt);
-       struct desc_struct old_desc, new_desc;
-       const struct x86_emulate_ops *ops = ctxt->ops;
-
-       if (ctxt->mode == X86EMUL_MODE_PROT64)
-               ops->get_segment(ctxt, &old_cs, &old_desc, NULL,
-                                VCPU_SREG_CS);
+       struct desc_struct new_desc;
 
        rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
        if (rc != X86EMUL_CONTINUE)
@@ -2213,10 +2199,10 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt)
        if (rc != X86EMUL_CONTINUE)
                return rc;
        rc = assign_eip_far(ctxt, eip, &new_desc);
-       if (rc != X86EMUL_CONTINUE) {
-               WARN_ON(ctxt->mode != X86EMUL_MODE_PROT64);
-               ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
-       }
+       /* Error handling is not implemented. */
+       if (rc != X86EMUL_CONTINUE)
+               return X86EMUL_UNHANDLEABLE;
+
        return rc;
 }
 
@@ -5045,7 +5031,7 @@ done_prefixes:
        /* Decode and fetch the destination operand: register or memory. */
        rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask);
 
-       if (ctxt->rip_relative)
+       if (ctxt->rip_relative && likely(ctxt->memopp))
                ctxt->memopp->addr.mem.ea = address_mask(ctxt,
                                        ctxt->memopp->addr.mem.ea + ctxt->_eip);
 
index c7220ba94aa776dceb3db4413ea9dec4ebace324..6e219e5c07d27c5dc41786953b1114b1e475e346 100644 (file)
@@ -94,7 +94,7 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic,
 static void rtc_irq_eoi_tracking_reset(struct kvm_ioapic *ioapic)
 {
        ioapic->rtc_status.pending_eoi = 0;
-       bitmap_zero(ioapic->rtc_status.dest_map.map, KVM_MAX_VCPUS);
+       bitmap_zero(ioapic->rtc_status.dest_map.map, KVM_MAX_VCPU_ID);
 }
 
 static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic);
@@ -594,7 +594,7 @@ static void kvm_ioapic_reset(struct kvm_ioapic *ioapic)
        ioapic->irr = 0;
        ioapic->irr_delivered = 0;
        ioapic->id = 0;
-       memset(ioapic->irq_eoi, 0x00, IOAPIC_NUM_PINS);
+       memset(ioapic->irq_eoi, 0x00, sizeof(ioapic->irq_eoi));
        rtc_irq_eoi_tracking_reset(ioapic);
 }
 
index 7d2692a4965756143825d65d37688837d82aaca2..1cc6e54436dbaa71e4a68943456b9beaceec00f6 100644 (file)
@@ -42,13 +42,13 @@ struct kvm_vcpu;
 
 struct dest_map {
        /* vcpu bitmap where IRQ has been sent */
-       DECLARE_BITMAP(map, KVM_MAX_VCPUS);
+       DECLARE_BITMAP(map, KVM_MAX_VCPU_ID);
 
        /*
         * Vector sent to a given vcpu, only valid when
         * the vcpu's bit in map is set
         */
-       u8 vectors[KVM_MAX_VCPUS];
+       u8 vectors[KVM_MAX_VCPU_ID];
 };
 
 
index 25810b144b58d979c5ba9355fc7b0907494f5869..6c0191615f23a34bae5c0972a20025cf5b695af9 100644 (file)
@@ -41,6 +41,15 @@ static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e,
                           bool line_status)
 {
        struct kvm_pic *pic = pic_irqchip(kvm);
+
+       /*
+        * XXX: rejecting pic routes when pic isn't in use would be better,
+        * but the default routing table is installed while kvm->arch.vpic is
+        * NULL and KVM_CREATE_IRQCHIP can race with KVM_IRQ_LINE.
+        */
+       if (!pic)
+               return -1;
+
        return kvm_pic_set_irq(pic, e->irqchip.pin, irq_source_id, level);
 }
 
@@ -49,6 +58,10 @@ static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e,
                              bool line_status)
 {
        struct kvm_ioapic *ioapic = kvm->arch.vioapic;
+
+       if (!ioapic)
+               return -1;
+
        return kvm_ioapic_set_irq(ioapic, e->irqchip.pin, irq_source_id, level,
                                line_status);
 }
@@ -156,6 +169,16 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
 }
 
 
+static int kvm_hv_set_sint(struct kvm_kernel_irq_routing_entry *e,
+                   struct kvm *kvm, int irq_source_id, int level,
+                   bool line_status)
+{
+       if (!level)
+               return -1;
+
+       return kvm_hv_synic_set_irq(kvm, e->hv_sint.vcpu, e->hv_sint.sint);
+}
+
 int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e,
                              struct kvm *kvm, int irq_source_id, int level,
                              bool line_status)
@@ -163,18 +186,26 @@ int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e,
        struct kvm_lapic_irq irq;
        int r;
 
-       if (unlikely(e->type != KVM_IRQ_ROUTING_MSI))
-               return -EWOULDBLOCK;
+       switch (e->type) {
+       case KVM_IRQ_ROUTING_HV_SINT:
+               return kvm_hv_set_sint(e, kvm, irq_source_id, level,
+                                      line_status);
 
-       if (kvm_msi_route_invalid(kvm, e))
-               return -EINVAL;
+       case KVM_IRQ_ROUTING_MSI:
+               if (kvm_msi_route_invalid(kvm, e))
+                       return -EINVAL;
 
-       kvm_set_msi_irq(kvm, e, &irq);
+               kvm_set_msi_irq(kvm, e, &irq);
 
-       if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r, NULL))
-               return r;
-       else
-               return -EWOULDBLOCK;
+               if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r, NULL))
+                       return r;
+               break;
+
+       default:
+               break;
+       }
+
+       return -EWOULDBLOCK;
 }
 
 int kvm_request_irq_source_id(struct kvm *kvm)
@@ -254,16 +285,6 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
        srcu_read_unlock(&kvm->irq_srcu, idx);
 }
 
-static int kvm_hv_set_sint(struct kvm_kernel_irq_routing_entry *e,
-                   struct kvm *kvm, int irq_source_id, int level,
-                   bool line_status)
-{
-       if (!level)
-               return -1;
-
-       return kvm_hv_synic_set_irq(kvm, e->hv_sint.vcpu, e->hv_sint.sint);
-}
-
 int kvm_set_routing_entry(struct kvm *kvm,
                          struct kvm_kernel_irq_routing_entry *e,
                          const struct kvm_irq_routing_entry *ue)
@@ -423,18 +444,6 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu,
        srcu_read_unlock(&kvm->irq_srcu, idx);
 }
 
-int kvm_arch_set_irq(struct kvm_kernel_irq_routing_entry *irq, struct kvm *kvm,
-                    int irq_source_id, int level, bool line_status)
-{
-       switch (irq->type) {
-       case KVM_IRQ_ROUTING_HV_SINT:
-               return kvm_hv_set_sint(irq, kvm, irq_source_id, level,
-                                      line_status);
-       default:
-               return -EWOULDBLOCK;
-       }
-}
-
 void kvm_arch_irq_routing_update(struct kvm *kvm)
 {
        kvm_hv_irq_routing_update(kvm);
index 23b99f3053825d40c6d697cf8c58dfb4ae941208..6f69340f9fa31496cf9ec8d91f69a5fc0b550ff6 100644 (file)
@@ -138,7 +138,7 @@ static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map,
                *mask = dest_id & 0xff;
                return true;
        case KVM_APIC_MODE_XAPIC_CLUSTER:
-               *cluster = map->xapic_cluster_map[dest_id >> 4];
+               *cluster = map->xapic_cluster_map[(dest_id >> 4) & 0xf];
                *mask = dest_id & 0xf;
                return true;
        default:
index f8157a36ab099a2d3336ef422208b5f078154064..8ca1eca5038d5ce50f6376393abb83df79c4524f 100644 (file)
@@ -1138,21 +1138,6 @@ static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
        mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
 }
 
-static void svm_adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, s64 adjustment)
-{
-       struct vcpu_svm *svm = to_svm(vcpu);
-
-       svm->vmcb->control.tsc_offset += adjustment;
-       if (is_guest_mode(vcpu))
-               svm->nested.hsave->control.tsc_offset += adjustment;
-       else
-               trace_kvm_write_tsc_offset(vcpu->vcpu_id,
-                                    svm->vmcb->control.tsc_offset - adjustment,
-                                    svm->vmcb->control.tsc_offset);
-
-       mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
-}
-
 static void avic_init_vmcb(struct vcpu_svm *svm)
 {
        struct vmcb *vmcb = svm->vmcb;
@@ -3449,12 +3434,6 @@ static int cr8_write_interception(struct vcpu_svm *svm)
        return 0;
 }
 
-static u64 svm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
-{
-       struct vmcb *vmcb = get_host_vmcb(to_svm(vcpu));
-       return vmcb->control.tsc_offset + host_tsc;
-}
-
 static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
@@ -5422,8 +5401,6 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
        .has_wbinvd_exit = svm_has_wbinvd_exit,
 
        .write_tsc_offset = svm_write_tsc_offset,
-       .adjust_tsc_offset_guest = svm_adjust_tsc_offset_guest,
-       .read_l1_tsc = svm_read_l1_tsc,
 
        .set_tdp_cr3 = set_tdp_cr3,
 
index cf1b16dbc98a90d4035a480362a549f299faf55f..5382b82462fcba28fed9a5064776cfb527e8eaa3 100644 (file)
@@ -187,6 +187,7 @@ struct vmcs {
  */
 struct loaded_vmcs {
        struct vmcs *vmcs;
+       struct vmcs *shadow_vmcs;
        int cpu;
        int launched;
        struct list_head loaded_vmcss_on_cpu_link;
@@ -411,7 +412,6 @@ struct nested_vmx {
         * memory during VMXOFF, VMCLEAR, VMPTRLD.
         */
        struct vmcs12 *cached_vmcs12;
-       struct vmcs *current_shadow_vmcs;
        /*
         * Indicates if the shadow vmcs must be updated with the
         * data hold by vmcs12
@@ -421,7 +421,6 @@ struct nested_vmx {
        /* vmcs02_list cache of VMCSs recently used to run L2 guests */
        struct list_head vmcs02_pool;
        int vmcs02_num;
-       u64 vmcs01_tsc_offset;
        bool change_vmcs01_virtual_x2apic_mode;
        /* L2 must run next, and mustn't decide to exit to L1. */
        bool nested_run_pending;
@@ -1419,6 +1418,8 @@ static void vmcs_clear(struct vmcs *vmcs)
 static inline void loaded_vmcs_init(struct loaded_vmcs *loaded_vmcs)
 {
        vmcs_clear(loaded_vmcs->vmcs);
+       if (loaded_vmcs->shadow_vmcs && loaded_vmcs->launched)
+               vmcs_clear(loaded_vmcs->shadow_vmcs);
        loaded_vmcs->cpu = -1;
        loaded_vmcs->launched = 0;
 }
@@ -2604,20 +2605,6 @@ static u64 guest_read_tsc(struct kvm_vcpu *vcpu)
        return kvm_scale_tsc(vcpu, host_tsc) + tsc_offset;
 }
 
-/*
- * Like guest_read_tsc, but always returns L1's notion of the timestamp
- * counter, even if a nested guest (L2) is currently running.
- */
-static u64 vmx_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
-{
-       u64 tsc_offset;
-
-       tsc_offset = is_guest_mode(vcpu) ?
-               to_vmx(vcpu)->nested.vmcs01_tsc_offset :
-               vmcs_read64(TSC_OFFSET);
-       return host_tsc + tsc_offset;
-}
-
 /*
  * writes 'offset' into guest's timestamp counter offset register
  */
@@ -2631,7 +2618,6 @@ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
                 * to the newly set TSC to get L2's TSC.
                 */
                struct vmcs12 *vmcs12;
-               to_vmx(vcpu)->nested.vmcs01_tsc_offset = offset;
                /* recalculate vmcs02.TSC_OFFSET: */
                vmcs12 = get_vmcs12(vcpu);
                vmcs_write64(TSC_OFFSET, offset +
@@ -2644,19 +2630,6 @@ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
        }
 }
 
-static void vmx_adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, s64 adjustment)
-{
-       u64 offset = vmcs_read64(TSC_OFFSET);
-
-       vmcs_write64(TSC_OFFSET, offset + adjustment);
-       if (is_guest_mode(vcpu)) {
-               /* Even when running L2, the adjustment needs to apply to L1 */
-               to_vmx(vcpu)->nested.vmcs01_tsc_offset += adjustment;
-       } else
-               trace_kvm_write_tsc_offset(vcpu->vcpu_id, offset,
-                                          offset + adjustment);
-}
-
 static bool guest_cpuid_has_vmx(struct kvm_vcpu *vcpu)
 {
        struct kvm_cpuid_entry2 *best = kvm_find_cpuid_entry(vcpu, 1, 0);
@@ -3562,6 +3535,7 @@ static void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
        loaded_vmcs_clear(loaded_vmcs);
        free_vmcs(loaded_vmcs->vmcs);
        loaded_vmcs->vmcs = NULL;
+       WARN_ON(loaded_vmcs->shadow_vmcs != NULL);
 }
 
 static void free_kvm_area(void)
@@ -6696,6 +6670,7 @@ static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx)
        if (!item)
                return NULL;
        item->vmcs02.vmcs = alloc_vmcs();
+       item->vmcs02.shadow_vmcs = NULL;
        if (!item->vmcs02.vmcs) {
                kfree(item);
                return NULL;
@@ -7072,7 +7047,7 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
                shadow_vmcs->revision_id |= (1u << 31);
                /* init shadow vmcs */
                vmcs_clear(shadow_vmcs);
-               vmx->nested.current_shadow_vmcs = shadow_vmcs;
+               vmx->vmcs01.shadow_vmcs = shadow_vmcs;
        }
 
        INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool));
@@ -7174,8 +7149,11 @@ static void free_nested(struct vcpu_vmx *vmx)
                free_page((unsigned long)vmx->nested.msr_bitmap);
                vmx->nested.msr_bitmap = NULL;
        }
-       if (enable_shadow_vmcs)
-               free_vmcs(vmx->nested.current_shadow_vmcs);
+       if (enable_shadow_vmcs) {
+               vmcs_clear(vmx->vmcs01.shadow_vmcs);
+               free_vmcs(vmx->vmcs01.shadow_vmcs);
+               vmx->vmcs01.shadow_vmcs = NULL;
+       }
        kfree(vmx->nested.cached_vmcs12);
        /* Unpin physical memory we referred to in current vmcs02 */
        if (vmx->nested.apic_access_page) {
@@ -7352,7 +7330,7 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
        int i;
        unsigned long field;
        u64 field_value;
-       struct vmcs *shadow_vmcs = vmx->nested.current_shadow_vmcs;
+       struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs;
        const unsigned long *fields = shadow_read_write_fields;
        const int num_fields = max_shadow_read_write_fields;
 
@@ -7401,7 +7379,7 @@ static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx)
        int i, q;
        unsigned long field;
        u64 field_value = 0;
-       struct vmcs *shadow_vmcs = vmx->nested.current_shadow_vmcs;
+       struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs;
 
        vmcs_load(shadow_vmcs);
 
@@ -7591,7 +7569,7 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
                        vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
                                      SECONDARY_EXEC_SHADOW_VMCS);
                        vmcs_write64(VMCS_LINK_POINTER,
-                                    __pa(vmx->nested.current_shadow_vmcs));
+                                    __pa(vmx->vmcs01.shadow_vmcs));
                        vmx->nested.sync_shadow_vmcs = true;
                }
        }
@@ -7659,7 +7637,7 @@ static int handle_invept(struct kvm_vcpu *vcpu)
 
        types = (vmx->nested.nested_vmx_ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6;
 
-       if (!(types & (1UL << type))) {
+       if (type >= 32 || !(types & (1 << type))) {
                nested_vmx_failValid(vcpu,
                                VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
                skip_emulated_instruction(vcpu);
@@ -7722,7 +7700,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
 
        types = (vmx->nested.nested_vmx_vpid_caps >> 8) & 0x7;
 
-       if (!(types & (1UL << type))) {
+       if (type >= 32 || !(types & (1 << type))) {
                nested_vmx_failValid(vcpu,
                        VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
                skip_emulated_instruction(vcpu);
@@ -9156,6 +9134,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 
        vmx->loaded_vmcs = &vmx->vmcs01;
        vmx->loaded_vmcs->vmcs = alloc_vmcs();
+       vmx->loaded_vmcs->shadow_vmcs = NULL;
        if (!vmx->loaded_vmcs->vmcs)
                goto free_msrs;
        if (!vmm_exclusive)
@@ -10061,9 +10040,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 
        if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)
                vmcs_write64(TSC_OFFSET,
-                       vmx->nested.vmcs01_tsc_offset + vmcs12->tsc_offset);
+                       vcpu->arch.tsc_offset + vmcs12->tsc_offset);
        else
-               vmcs_write64(TSC_OFFSET, vmx->nested.vmcs01_tsc_offset);
+               vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
        if (kvm_has_tsc_control)
                decache_tsc_multiplier(vmx);
 
@@ -10293,8 +10272,6 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
 
        enter_guest_mode(vcpu);
 
-       vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET);
-
        if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
                vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
 
@@ -10818,7 +10795,7 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
        load_vmcs12_host_state(vcpu, vmcs12);
 
        /* Update any VMCS fields that might have changed while L2 ran */
-       vmcs_write64(TSC_OFFSET, vmx->nested.vmcs01_tsc_offset);
+       vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
        if (vmx->hv_deadline_tsc == -1)
                vmcs_clear_bits(PIN_BASED_VM_EXEC_CONTROL,
                                PIN_BASED_VMX_PREEMPTION_TIMER);
@@ -11339,8 +11316,6 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
        .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit,
 
        .write_tsc_offset = vmx_write_tsc_offset,
-       .adjust_tsc_offset_guest = vmx_adjust_tsc_offset_guest,
-       .read_l1_tsc = vmx_read_l1_tsc,
 
        .set_tdp_cr3 = vmx_set_cr3,
 
index 6c633de84dd7339637e24604952fb4d2c5180562..04c5d96b1d678a6eeec993b71efb93f509496bdf 100644 (file)
@@ -210,7 +210,18 @@ static void kvm_on_user_return(struct user_return_notifier *urn)
        struct kvm_shared_msrs *locals
                = container_of(urn, struct kvm_shared_msrs, urn);
        struct kvm_shared_msr_values *values;
+       unsigned long flags;
 
+       /*
+        * Disabling irqs at this point since the following code could be
+        * interrupted and executed through kvm_arch_hardware_disable()
+        */
+       local_irq_save(flags);
+       if (locals->registered) {
+               locals->registered = false;
+               user_return_notifier_unregister(urn);
+       }
+       local_irq_restore(flags);
        for (slot = 0; slot < shared_msrs_global.nr; ++slot) {
                values = &locals->values[slot];
                if (values->host != values->curr) {
@@ -218,8 +229,6 @@ static void kvm_on_user_return(struct user_return_notifier *urn)
                        values->curr = values->host;
                }
        }
-       locals->registered = false;
-       user_return_notifier_unregister(urn);
 }
 
 static void shared_msr_update(unsigned slot, u32 msr)
@@ -1409,7 +1418,7 @@ static u64 kvm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
 
 u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
 {
-       return kvm_x86_ops->read_l1_tsc(vcpu, kvm_scale_tsc(vcpu, host_tsc));
+       return vcpu->arch.tsc_offset + kvm_scale_tsc(vcpu, host_tsc);
 }
 EXPORT_SYMBOL_GPL(kvm_read_l1_tsc);
 
@@ -1547,7 +1556,7 @@ EXPORT_SYMBOL_GPL(kvm_write_tsc);
 static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu,
                                           s64 adjustment)
 {
-       kvm_x86_ops->adjust_tsc_offset_guest(vcpu, adjustment);
+       kvm_vcpu_write_tsc_offset(vcpu, vcpu->arch.tsc_offset + adjustment);
 }
 
 static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment)
@@ -1555,7 +1564,7 @@ static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment)
        if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio)
                WARN_ON(adjustment < 0);
        adjustment = kvm_scale_tsc(vcpu, (u64) adjustment);
-       kvm_x86_ops->adjust_tsc_offset_guest(vcpu, adjustment);
+       adjust_tsc_offset_guest(vcpu, adjustment);
 }
 
 #ifdef CONFIG_X86_64
@@ -1724,18 +1733,23 @@ static void kvm_gen_update_masterclock(struct kvm *kvm)
 
 static u64 __get_kvmclock_ns(struct kvm *kvm)
 {
-       struct kvm_vcpu *vcpu = kvm_get_vcpu(kvm, 0);
        struct kvm_arch *ka = &kvm->arch;
-       s64 ns;
+       struct pvclock_vcpu_time_info hv_clock;
 
-       if (vcpu->arch.hv_clock.flags & PVCLOCK_TSC_STABLE_BIT) {
-               u64 tsc = kvm_read_l1_tsc(vcpu, rdtsc());
-               ns = __pvclock_read_cycles(&vcpu->arch.hv_clock, tsc);
-       } else {
-               ns = ktime_get_boot_ns() + ka->kvmclock_offset;
+       spin_lock(&ka->pvclock_gtod_sync_lock);
+       if (!ka->use_master_clock) {
+               spin_unlock(&ka->pvclock_gtod_sync_lock);
+               return ktime_get_boot_ns() + ka->kvmclock_offset;
        }
 
-       return ns;
+       hv_clock.tsc_timestamp = ka->master_cycle_now;
+       hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset;
+       spin_unlock(&ka->pvclock_gtod_sync_lock);
+
+       kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL,
+                          &hv_clock.tsc_shift,
+                          &hv_clock.tsc_to_system_mul);
+       return __pvclock_read_cycles(&hv_clock, rdtsc());
 }
 
 u64 get_kvmclock_ns(struct kvm *kvm)
@@ -2262,7 +2276,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                /* Drop writes to this legacy MSR -- see rdmsr
                 * counterpart for further detail.
                 */
-               vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n", msr, data);
+               vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n", msr, data);
                break;
        case MSR_AMD64_OSVW_ID_LENGTH:
                if (!guest_cpuid_has_osvw(vcpu))
@@ -2280,11 +2294,11 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                if (kvm_pmu_is_valid_msr(vcpu, msr))
                        return kvm_pmu_set_msr(vcpu, msr_info);
                if (!ignore_msrs) {
-                       vcpu_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n",
+                       vcpu_unimpl(vcpu, "unhandled wrmsr: 0x%x data 0x%llx\n",
                                    msr, data);
                        return 1;
                } else {
-                       vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n",
+                       vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n",
                                    msr, data);
                        break;
                }
@@ -2596,7 +2610,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
        case KVM_CAP_PIT_STATE2:
        case KVM_CAP_SET_IDENTITY_MAP_ADDR:
        case KVM_CAP_XEN_HVM:
-       case KVM_CAP_ADJUST_CLOCK:
        case KVM_CAP_VCPU_EVENTS:
        case KVM_CAP_HYPERV:
        case KVM_CAP_HYPERV_VAPIC:
@@ -2623,6 +2636,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 #endif
                r = 1;
                break;
+       case KVM_CAP_ADJUST_CLOCK:
+               r = KVM_CLOCK_TSC_STABLE;
+               break;
        case KVM_CAP_X86_SMM:
                /* SMBASE is usually relocated above 1M on modern chipsets,
                 * and SMM handlers might indeed rely on 4G segment limits,
@@ -3415,6 +3431,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
        };
        case KVM_SET_VAPIC_ADDR: {
                struct kvm_vapic_addr va;
+               int idx;
 
                r = -EINVAL;
                if (!lapic_in_kernel(vcpu))
@@ -3422,7 +3439,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
                r = -EFAULT;
                if (copy_from_user(&va, argp, sizeof va))
                        goto out;
+               idx = srcu_read_lock(&vcpu->kvm->srcu);
                r = kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr);
+               srcu_read_unlock(&vcpu->kvm->srcu, idx);
                break;
        }
        case KVM_X86_SETUP_MCE: {
@@ -4103,9 +4122,11 @@ long kvm_arch_vm_ioctl(struct file *filp,
                struct kvm_clock_data user_ns;
                u64 now_ns;
 
-               now_ns = get_kvmclock_ns(kvm);
+               local_irq_disable();
+               now_ns = __get_kvmclock_ns(kvm);
                user_ns.clock = now_ns;
-               user_ns.flags = 0;
+               user_ns.flags = kvm->arch.use_master_clock ? KVM_CLOCK_TSC_STABLE : 0;
+               local_irq_enable();
                memset(&user_ns.pad, 0, sizeof(user_ns.pad));
 
                r = -EFAULT;
@@ -5733,13 +5754,13 @@ static int kvmclock_cpu_online(unsigned int cpu)
 
 static void kvm_timer_init(void)
 {
-       int cpu;
-
        max_tsc_khz = tsc_khz;
 
        if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
 #ifdef CONFIG_CPU_FREQ
                struct cpufreq_policy policy;
+               int cpu;
+
                memset(&policy, 0, sizeof(policy));
                cpu = get_cpu();
                cpufreq_get_policy(&policy, cpu);
@@ -7410,10 +7431,12 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
 
 void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
 {
+       void *wbinvd_dirty_mask = vcpu->arch.wbinvd_dirty_mask;
+
        kvmclock_reset(vcpu);
 
-       free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
        kvm_x86_ops->vcpu_free(vcpu);
+       free_cpumask_var(wbinvd_dirty_mask);
 }
 
 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
index c1e6232098531f0d8ce14d492215041994443b81..4d34bb548b41ebdddc20fcf464aad4cb44c6a763 100644 (file)
@@ -28,6 +28,7 @@
 #include <linux/linkage.h>
 #include <asm/errno.h>
 #include <asm/asm.h>
+#include <asm/export.h>
                                
 /*
  * computes a partial checksum, e.g. for TCP/UDP fragments
@@ -251,6 +252,7 @@ ENTRY(csum_partial)
 ENDPROC(csum_partial)
                                
 #endif
+EXPORT_SYMBOL(csum_partial)
 
 /*
 unsigned int csum_partial_copy_generic (const char *src, char *dst,
@@ -490,3 +492,4 @@ ENDPROC(csum_partial_copy_generic)
 #undef ROUND1          
                
 #endif
+EXPORT_SYMBOL(csum_partial_copy_generic)
index 65be7cfaf947228d454f2324541e5f5227d85183..5e2af3a88cf5e47e2505926938d3f239d55f2ff3 100644 (file)
@@ -1,6 +1,7 @@
 #include <linux/linkage.h>
 #include <asm/cpufeatures.h>
 #include <asm/alternative-asm.h>
+#include <asm/export.h>
 
 /*
  * Most CPUs support enhanced REP MOVSB/STOSB instructions. It is
@@ -23,6 +24,7 @@ ENTRY(clear_page)
        rep stosq
        ret
 ENDPROC(clear_page)
+EXPORT_SYMBOL(clear_page)
 
 ENTRY(clear_page_orig)
 
index ad53497784904b2c1f420fd41576c9ffeea8ce0b..03a186fc06eab6217a4d461174409ac1cc9366ee 100644 (file)
@@ -7,6 +7,7 @@
  */
 
 #include <linux/linkage.h>
+#include <asm/export.h>
 
 .text
 
@@ -48,3 +49,4 @@ ENTRY(cmpxchg8b_emu)
        ret
 
 ENDPROC(cmpxchg8b_emu)
+EXPORT_SYMBOL(cmpxchg8b_emu)
index 24ef1c2104d422c35a9ce783934306a0dd9215f1..e8508156c99d5e0a8f3aa8a6dc2ad659d59c021e 100644 (file)
@@ -3,6 +3,7 @@
 #include <linux/linkage.h>
 #include <asm/cpufeatures.h>
 #include <asm/alternative-asm.h>
+#include <asm/export.h>
 
 /*
  * Some CPUs run faster using the string copy instructions (sane microcode).
@@ -17,6 +18,7 @@ ENTRY(copy_page)
        rep     movsq
        ret
 ENDPROC(copy_page)
+EXPORT_SYMBOL(copy_page)
 
 ENTRY(copy_page_regs)
        subq    $2*8,   %rsp
index bf603ebbfd8e26eb81f915f52103a2e9aa691cd9..d376e4b48f881b89170802ab7b6aa072c458ed8f 100644 (file)
@@ -14,6 +14,7 @@
 #include <asm/alternative-asm.h>
 #include <asm/asm.h>
 #include <asm/smap.h>
+#include <asm/export.h>
 
 /* Standard copy_to_user with segment limit checking */
 ENTRY(_copy_to_user)
@@ -29,6 +30,7 @@ ENTRY(_copy_to_user)
                      "jmp copy_user_enhanced_fast_string",     \
                      X86_FEATURE_ERMS
 ENDPROC(_copy_to_user)
+EXPORT_SYMBOL(_copy_to_user)
 
 /* Standard copy_from_user with segment limit checking */
 ENTRY(_copy_from_user)
@@ -44,6 +46,8 @@ ENTRY(_copy_from_user)
                      "jmp copy_user_enhanced_fast_string",     \
                      X86_FEATURE_ERMS
 ENDPROC(_copy_from_user)
+EXPORT_SYMBOL(_copy_from_user)
+
 
        .section .fixup,"ax"
        /* must zero dest */
@@ -155,6 +159,7 @@ ENTRY(copy_user_generic_unrolled)
        _ASM_EXTABLE(21b,50b)
        _ASM_EXTABLE(22b,50b)
 ENDPROC(copy_user_generic_unrolled)
+EXPORT_SYMBOL(copy_user_generic_unrolled)
 
 /* Some CPUs run faster using the string copy instructions.
  * This is also a lot simpler. Use them when possible.
@@ -200,6 +205,7 @@ ENTRY(copy_user_generic_string)
        _ASM_EXTABLE(1b,11b)
        _ASM_EXTABLE(3b,12b)
 ENDPROC(copy_user_generic_string)
+EXPORT_SYMBOL(copy_user_generic_string)
 
 /*
  * Some CPUs are adding enhanced REP MOVSB/STOSB instructions.
@@ -229,6 +235,7 @@ ENTRY(copy_user_enhanced_fast_string)
 
        _ASM_EXTABLE(1b,12b)
 ENDPROC(copy_user_enhanced_fast_string)
+EXPORT_SYMBOL(copy_user_enhanced_fast_string)
 
 /*
  * copy_user_nocache - Uncached memory copy with exception handling
@@ -379,3 +386,4 @@ ENTRY(__copy_user_nocache)
        _ASM_EXTABLE(40b,.L_fixup_1b_copy)
        _ASM_EXTABLE(41b,.L_fixup_1b_copy)
 ENDPROC(__copy_user_nocache)
+EXPORT_SYMBOL(__copy_user_nocache)
index 9a7fe6a70491e20c7c648d97d04a2009ac0460b6..378e5d5bf9b13834dec061738f356b8fc674b47e 100644 (file)
@@ -135,6 +135,7 @@ __wsum csum_partial(const void *buff, int len, __wsum sum)
        return (__force __wsum)add32_with_carry(do_csum(buff, len),
                                                (__force u32)sum);
 }
+EXPORT_SYMBOL(csum_partial);
 
 /*
  * this routine is used for miscellaneous IP-like checksums, mainly
index 0ef5128c2de8b05ce60e8beac1e43f53adcba2ba..37b62d4121481df990741fac20ee48c2e68492e7 100644 (file)
@@ -32,6 +32,7 @@
 #include <asm/thread_info.h>
 #include <asm/asm.h>
 #include <asm/smap.h>
+#include <asm/export.h>
 
        .text
 ENTRY(__get_user_1)
@@ -44,6 +45,7 @@ ENTRY(__get_user_1)
        ASM_CLAC
        ret
 ENDPROC(__get_user_1)
+EXPORT_SYMBOL(__get_user_1)
 
 ENTRY(__get_user_2)
        add $1,%_ASM_AX
@@ -57,6 +59,7 @@ ENTRY(__get_user_2)
        ASM_CLAC
        ret
 ENDPROC(__get_user_2)
+EXPORT_SYMBOL(__get_user_2)
 
 ENTRY(__get_user_4)
        add $3,%_ASM_AX
@@ -70,6 +73,7 @@ ENTRY(__get_user_4)
        ASM_CLAC
        ret
 ENDPROC(__get_user_4)
+EXPORT_SYMBOL(__get_user_4)
 
 ENTRY(__get_user_8)
 #ifdef CONFIG_X86_64
@@ -97,6 +101,7 @@ ENTRY(__get_user_8)
        ret
 #endif
 ENDPROC(__get_user_8)
+EXPORT_SYMBOL(__get_user_8)
 
 
 bad_get_user:
index 8a602a1e404a262f32fbe708e926e986636dcfca..23d893cbc2001094b0885ebf1811cf391192bbad 100644 (file)
@@ -1,4 +1,5 @@
 #include <linux/linkage.h>
+#include <asm/export.h>
 
 #include <asm/asm.h>
 
@@ -32,6 +33,7 @@ ENTRY(__sw_hweight32)
        __ASM_SIZE(pop,) %__ASM_REG(dx)
        ret
 ENDPROC(__sw_hweight32)
+EXPORT_SYMBOL(__sw_hweight32)
 
 ENTRY(__sw_hweight64)
 #ifdef CONFIG_X86_64
@@ -77,3 +79,4 @@ ENTRY(__sw_hweight64)
        ret
 #endif
 ENDPROC(__sw_hweight64)
+EXPORT_SYMBOL(__sw_hweight64)
index 49e6ebac7e73e33b0a03327cb65c95a29afc1c67..779782f5832476582becc24e5a0f0f5b10ea0b53 100644 (file)
@@ -4,6 +4,7 @@
 #include <asm/errno.h>
 #include <asm/cpufeatures.h>
 #include <asm/alternative-asm.h>
+#include <asm/export.h>
 
 /*
  * We build a jump to memcpy_orig by default which gets NOPped out on
@@ -40,6 +41,8 @@ ENTRY(memcpy)
        ret
 ENDPROC(memcpy)
 ENDPROC(__memcpy)
+EXPORT_SYMBOL(memcpy)
+EXPORT_SYMBOL(__memcpy)
 
 /*
  * memcpy_erms() - enhanced fast string memcpy. This is faster and
@@ -274,6 +277,7 @@ ENTRY(memcpy_mcsafe_unrolled)
        xorq %rax, %rax
        ret
 ENDPROC(memcpy_mcsafe_unrolled)
+EXPORT_SYMBOL_GPL(memcpy_mcsafe_unrolled)
 
        .section .fixup, "ax"
        /* Return -EFAULT for any failure */
index 90ce01bee00c17f173719652659edb5972ecef07..15de86cd15b05bf95d0275cce0d52188999ac5d0 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/linkage.h>
 #include <asm/cpufeatures.h>
 #include <asm/alternative-asm.h>
+#include <asm/export.h>
 
 #undef memmove
 
@@ -207,3 +208,5 @@ ENTRY(__memmove)
        retq
 ENDPROC(__memmove)
 ENDPROC(memmove)
+EXPORT_SYMBOL(__memmove)
+EXPORT_SYMBOL(memmove)
index e1229ecd2a82057cc6f0171169b6871f0e438167..55b95db30a61c08df145e2c321fb0362696a623f 100644 (file)
@@ -3,6 +3,7 @@
 #include <linux/linkage.h>
 #include <asm/cpufeatures.h>
 #include <asm/alternative-asm.h>
+#include <asm/export.h>
 
 .weak memset
 
@@ -43,6 +44,8 @@ ENTRY(__memset)
        ret
 ENDPROC(memset)
 ENDPROC(__memset)
+EXPORT_SYMBOL(memset)
+EXPORT_SYMBOL(__memset)
 
 /*
  * ISO C memset - set a memory block to a byte value. This function uses
index c891ece81e5b11a9b2337eac142c65c95286ad72..cd5d716d289783cd5c0d853b461701822db96a63 100644 (file)
@@ -15,6 +15,7 @@
 #include <asm/errno.h>
 #include <asm/asm.h>
 #include <asm/smap.h>
+#include <asm/export.h>
 
 
 /*
@@ -43,6 +44,7 @@ ENTRY(__put_user_1)
        xor %eax,%eax
        EXIT
 ENDPROC(__put_user_1)
+EXPORT_SYMBOL(__put_user_1)
 
 ENTRY(__put_user_2)
        ENTER
@@ -55,6 +57,7 @@ ENTRY(__put_user_2)
        xor %eax,%eax
        EXIT
 ENDPROC(__put_user_2)
+EXPORT_SYMBOL(__put_user_2)
 
 ENTRY(__put_user_4)
        ENTER
@@ -67,6 +70,7 @@ ENTRY(__put_user_4)
        xor %eax,%eax
        EXIT
 ENDPROC(__put_user_4)
+EXPORT_SYMBOL(__put_user_4)
 
 ENTRY(__put_user_8)
        ENTER
@@ -82,6 +86,7 @@ ENTRY(__put_user_8)
        xor %eax,%eax
        EXIT
 ENDPROC(__put_user_8)
+EXPORT_SYMBOL(__put_user_8)
 
 bad_put_user:
        movl $-EFAULT,%eax
index 8e2d55f754bff8f7f221cdb96b0c01c2352f32fa..a03b1c750bfed32e061b93fcfa45c81f9c9ecdad 100644 (file)
@@ -1,4 +1,5 @@
 #include <linux/string.h>
+#include <linux/export.h>
 
 char *strstr(const char *cs, const char *ct)
 {
@@ -28,4 +29,4 @@ __asm__ __volatile__(
        : "dx", "di");
 return __res;
 }
-
+EXPORT_SYMBOL(strstr);
index 79ae939970d3f49065fa4f0ed3dcc6d769ddf48a..fcd06f7526de31a6cd429e6d800ea93fcac294f9 100644 (file)
@@ -135,7 +135,12 @@ void __init early_fixup_exception(struct pt_regs *regs, int trapnr)
        if (early_recursion_flag > 2)
                goto halt_loop;
 
-       if (regs->cs != __KERNEL_CS)
+       /*
+        * Old CPUs leave the high bits of CS on the stack
+        * undefined.  I'm not sure which CPUs do this, but at least
+        * the 486 DX works this way.
+        */
+       if ((regs->cs & 0xFFFF) != __KERNEL_CS)
                goto fail;
 
        /*
index b8b6a60b32cf47837070518bd44d4c4742910697..0d4fb3ebbbac9872aaaf26514211ae543a253299 100644 (file)
@@ -435,7 +435,7 @@ slow_irqon:
 
                ret = get_user_pages_unlocked(start,
                                              (end - start) >> PAGE_SHIFT,
-                                             write, 0, pages);
+                                             pages, write ? FOLL_WRITE : 0);
 
                /* Have to be a bit careful with return values */
                if (nr > 0) {
index ddd2661c4502922a63fbcd169615d34dddd6fcae..887e57182716828b7f4f4946fe7145d106ec5bea 100644 (file)
@@ -104,10 +104,10 @@ void __init kernel_randomize_memory(void)
         * consistent with the vaddr_start/vaddr_end variables.
         */
        BUILD_BUG_ON(vaddr_start >= vaddr_end);
-       BUILD_BUG_ON(config_enabled(CONFIG_X86_ESPFIX64) &&
+       BUILD_BUG_ON(IS_ENABLED(CONFIG_X86_ESPFIX64) &&
                     vaddr_end >= EFI_VA_START);
-       BUILD_BUG_ON((config_enabled(CONFIG_X86_ESPFIX64) ||
-                     config_enabled(CONFIG_EFI)) &&
+       BUILD_BUG_ON((IS_ENABLED(CONFIG_X86_ESPFIX64) ||
+                     IS_ENABLED(CONFIG_EFI)) &&
                     vaddr_end >= __START_KERNEL_map);
        BUILD_BUG_ON(vaddr_end > __START_KERNEL_map);
 
index 80476878eb4ca5c8ad56340bb52b2423995a7867..e4f800999b32dc94d5ba1a1283591649a818fbb7 100644 (file)
@@ -544,10 +544,9 @@ static int mpx_resolve_fault(long __user *addr, int write)
 {
        long gup_ret;
        int nr_pages = 1;
-       int force = 0;
 
-       gup_ret = get_user_pages((unsigned long)addr, nr_pages, write,
-                       force, NULL, NULL);
+       gup_ret = get_user_pages((unsigned long)addr, nr_pages,
+                       write ? FOLL_WRITE : 0, NULL, NULL);
        /*
         * get_user_pages() returns number of pages gotten.
         * 0 means we failed to fault in and get anything,
index 170cc4ff057b398382bef3dd635d6a9115460d88..83e701f160a9128dc72316376d8b6a66233e223c 100644 (file)
@@ -730,6 +730,20 @@ void io_free_memtype(resource_size_t start, resource_size_t end)
        free_memtype(start, end);
 }
 
+int arch_io_reserve_memtype_wc(resource_size_t start, resource_size_t size)
+{
+       enum page_cache_mode type = _PAGE_CACHE_MODE_WC;
+
+       return io_reserve_memtype(start, start + size, &type);
+}
+EXPORT_SYMBOL(arch_io_reserve_memtype_wc);
+
+void arch_io_free_memtype_wc(resource_size_t start, resource_size_t size)
+{
+       io_free_memtype(start, start + size);
+}
+EXPORT_SYMBOL(arch_io_free_memtype_wc);
+
 pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
                                unsigned long size, pgprot_t vma_prot)
 {
index bf99aa7005eb3eda505893352be0bd6a02f312b1..936a488d6cf6df3c2aadbbdbc036b8eb06701cb0 100644 (file)
@@ -861,7 +861,7 @@ static void __init __efi_enter_virtual_mode(void)
        int count = 0, pg_shift = 0;
        void *new_memmap = NULL;
        efi_status_t status;
-       phys_addr_t pa;
+       unsigned long pa;
 
        efi.systab = NULL;
 
index 58b0f801f66f97212fb9904c77d88ad44c2703de..319148bd4b05091d24576a7535b10aad7bec0c2d 100644 (file)
@@ -31,6 +31,7 @@
 #include <linux/io.h>
 #include <linux/reboot.h>
 #include <linux/slab.h>
+#include <linux/ucs2_string.h>
 
 #include <asm/setup.h>
 #include <asm/page.h>
@@ -211,6 +212,35 @@ void efi_sync_low_kernel_mappings(void)
        memcpy(pud_efi, pud_k, sizeof(pud_t) * num_entries);
 }
 
+/*
+ * Wrapper for slow_virt_to_phys() that handles NULL addresses.
+ */
+static inline phys_addr_t
+virt_to_phys_or_null_size(void *va, unsigned long size)
+{
+       bool bad_size;
+
+       if (!va)
+               return 0;
+
+       if (virt_addr_valid(va))
+               return virt_to_phys(va);
+
+       /*
+        * A fully aligned variable on the stack is guaranteed not to
+        * cross a page bounary. Try to catch strings on the stack by
+        * checking that 'size' is a power of two.
+        */
+       bad_size = size > PAGE_SIZE || !is_power_of_2(size);
+
+       WARN_ON(!IS_ALIGNED((unsigned long)va, size) || bad_size);
+
+       return slow_virt_to_phys(va);
+}
+
+#define virt_to_phys_or_null(addr)                             \
+       virt_to_phys_or_null_size((addr), sizeof(*(addr)))
+
 int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
 {
        unsigned long pfn, text;
@@ -494,8 +524,8 @@ static efi_status_t efi_thunk_get_time(efi_time_t *tm, efi_time_cap_t *tc)
 
        spin_lock(&rtc_lock);
 
-       phys_tm = virt_to_phys(tm);
-       phys_tc = virt_to_phys(tc);
+       phys_tm = virt_to_phys_or_null(tm);
+       phys_tc = virt_to_phys_or_null(tc);
 
        status = efi_thunk(get_time, phys_tm, phys_tc);
 
@@ -511,7 +541,7 @@ static efi_status_t efi_thunk_set_time(efi_time_t *tm)
 
        spin_lock(&rtc_lock);
 
-       phys_tm = virt_to_phys(tm);
+       phys_tm = virt_to_phys_or_null(tm);
 
        status = efi_thunk(set_time, phys_tm);
 
@@ -529,9 +559,9 @@ efi_thunk_get_wakeup_time(efi_bool_t *enabled, efi_bool_t *pending,
 
        spin_lock(&rtc_lock);
 
-       phys_enabled = virt_to_phys(enabled);
-       phys_pending = virt_to_phys(pending);
-       phys_tm = virt_to_phys(tm);
+       phys_enabled = virt_to_phys_or_null(enabled);
+       phys_pending = virt_to_phys_or_null(pending);
+       phys_tm = virt_to_phys_or_null(tm);
 
        status = efi_thunk(get_wakeup_time, phys_enabled,
                             phys_pending, phys_tm);
@@ -549,7 +579,7 @@ efi_thunk_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm)
 
        spin_lock(&rtc_lock);
 
-       phys_tm = virt_to_phys(tm);
+       phys_tm = virt_to_phys_or_null(tm);
 
        status = efi_thunk(set_wakeup_time, enabled, phys_tm);
 
@@ -558,6 +588,10 @@ efi_thunk_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm)
        return status;
 }
 
+static unsigned long efi_name_size(efi_char16_t *name)
+{
+       return ucs2_strsize(name, EFI_VAR_NAME_LEN) + 1;
+}
 
 static efi_status_t
 efi_thunk_get_variable(efi_char16_t *name, efi_guid_t *vendor,
@@ -567,11 +601,11 @@ efi_thunk_get_variable(efi_char16_t *name, efi_guid_t *vendor,
        u32 phys_name, phys_vendor, phys_attr;
        u32 phys_data_size, phys_data;
 
-       phys_data_size = virt_to_phys(data_size);
-       phys_vendor = virt_to_phys(vendor);
-       phys_name = virt_to_phys(name);
-       phys_attr = virt_to_phys(attr);
-       phys_data = virt_to_phys(data);
+       phys_data_size = virt_to_phys_or_null(data_size);
+       phys_vendor = virt_to_phys_or_null(vendor);
+       phys_name = virt_to_phys_or_null_size(name, efi_name_size(name));
+       phys_attr = virt_to_phys_or_null(attr);
+       phys_data = virt_to_phys_or_null_size(data, *data_size);
 
        status = efi_thunk(get_variable, phys_name, phys_vendor,
                           phys_attr, phys_data_size, phys_data);
@@ -586,9 +620,9 @@ efi_thunk_set_variable(efi_char16_t *name, efi_guid_t *vendor,
        u32 phys_name, phys_vendor, phys_data;
        efi_status_t status;
 
-       phys_name = virt_to_phys(name);
-       phys_vendor = virt_to_phys(vendor);
-       phys_data = virt_to_phys(data);
+       phys_name = virt_to_phys_or_null_size(name, efi_name_size(name));
+       phys_vendor = virt_to_phys_or_null(vendor);
+       phys_data = virt_to_phys_or_null_size(data, data_size);
 
        /* If data_size is > sizeof(u32) we've got problems */
        status = efi_thunk(set_variable, phys_name, phys_vendor,
@@ -605,9 +639,9 @@ efi_thunk_get_next_variable(unsigned long *name_size,
        efi_status_t status;
        u32 phys_name_size, phys_name, phys_vendor;
 
-       phys_name_size = virt_to_phys(name_size);
-       phys_vendor = virt_to_phys(vendor);
-       phys_name = virt_to_phys(name);
+       phys_name_size = virt_to_phys_or_null(name_size);
+       phys_vendor = virt_to_phys_or_null(vendor);
+       phys_name = virt_to_phys_or_null_size(name, *name_size);
 
        status = efi_thunk(get_next_variable, phys_name_size,
                           phys_name, phys_vendor);
@@ -621,7 +655,7 @@ efi_thunk_get_next_high_mono_count(u32 *count)
        efi_status_t status;
        u32 phys_count;
 
-       phys_count = virt_to_phys(count);
+       phys_count = virt_to_phys_or_null(count);
        status = efi_thunk(get_next_high_mono_count, phys_count);
 
        return status;
@@ -633,7 +667,7 @@ efi_thunk_reset_system(int reset_type, efi_status_t status,
 {
        u32 phys_data;
 
-       phys_data = virt_to_phys(data);
+       phys_data = virt_to_phys_or_null_size(data, data_size);
 
        efi_thunk(reset_system, reset_type, status, data_size, phys_data);
 }
@@ -661,9 +695,9 @@ efi_thunk_query_variable_info(u32 attr, u64 *storage_space,
        if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION)
                return EFI_UNSUPPORTED;
 
-       phys_storage = virt_to_phys(storage_space);
-       phys_remaining = virt_to_phys(remaining_space);
-       phys_max = virt_to_phys(max_variable_size);
+       phys_storage = virt_to_phys_or_null(storage_space);
+       phys_remaining = virt_to_phys_or_null(remaining_space);
+       phys_max = virt_to_phys_or_null(max_variable_size);
 
        status = efi_thunk(query_variable_info, attr, phys_storage,
                           phys_remaining, phys_max);
index 429d08be7848a2df6334aef7e5e2c4e79e4b6d4a..dd6cfa4ad3ac35713da9c93951bd65bd70106a58 100644 (file)
@@ -28,4 +28,4 @@ obj-$(subst m,y,$(CONFIG_GPIO_PCA953X)) += platform_pcal9555a.o
 obj-$(subst m,y,$(CONFIG_GPIO_PCA953X)) += platform_tca6416.o
 # MISC Devices
 obj-$(subst m,y,$(CONFIG_KEYBOARD_GPIO)) += platform_gpio_keys.o
-obj-$(subst m,y,$(CONFIG_INTEL_MID_WATCHDOG)) += platform_wdt.o
+obj-$(subst m,y,$(CONFIG_INTEL_MID_WATCHDOG)) += platform_mrfld_wdt.o
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c
new file mode 100644 (file)
index 0000000..3f1f1c7
--- /dev/null
@@ -0,0 +1,85 @@
+/*
+ * Intel Merrifield watchdog platform device library file
+ *
+ * (C) Copyright 2014 Intel Corporation
+ * Author: David Cohen <david.a.cohen@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/platform_data/intel-mid_wdt.h>
+
+#include <asm/intel-mid.h>
+#include <asm/intel_scu_ipc.h>
+#include <asm/io_apic.h>
+
+#define TANGIER_EXT_TIMER0_MSI 15
+
+static struct platform_device wdt_dev = {
+       .name = "intel_mid_wdt",
+       .id = -1,
+};
+
+static int tangier_probe(struct platform_device *pdev)
+{
+       int gsi;
+       struct irq_alloc_info info;
+       struct intel_mid_wdt_pdata *pdata = pdev->dev.platform_data;
+
+       if (!pdata)
+               return -EINVAL;
+
+       /* IOAPIC builds identity mapping between GSI and IRQ on MID */
+       gsi = pdata->irq;
+       ioapic_set_alloc_attr(&info, cpu_to_node(0), 1, 0);
+       if (mp_map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC, &info) <= 0) {
+               dev_warn(&pdev->dev, "cannot find interrupt %d in ioapic\n",
+                        gsi);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static struct intel_mid_wdt_pdata tangier_pdata = {
+       .irq = TANGIER_EXT_TIMER0_MSI,
+       .probe = tangier_probe,
+};
+
+static int wdt_scu_status_change(struct notifier_block *nb,
+                                unsigned long code, void *data)
+{
+       if (code == SCU_DOWN) {
+               platform_device_unregister(&wdt_dev);
+               return 0;
+       }
+
+       return platform_device_register(&wdt_dev);
+}
+
+static struct notifier_block wdt_scu_notifier = {
+       .notifier_call  = wdt_scu_status_change,
+};
+
+static int __init register_mid_wdt(void)
+{
+       if (intel_mid_identify_cpu() != INTEL_MID_CPU_CHIP_TANGIER)
+               return -ENODEV;
+
+       wdt_dev.dev.platform_data = &tangier_pdata;
+
+       /*
+        * We need to be sure that the SCU IPC is ready before watchdog device
+        * can be registered:
+        */
+       intel_scu_notifier_add(&wdt_scu_notifier);
+
+       return 0;
+}
+rootfs_initcall(register_mid_wdt);
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_wdt.c b/arch/x86/platform/intel-mid/device_libs/platform_wdt.c
deleted file mode 100644 (file)
index de73413..0000000
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * platform_wdt.c: Watchdog platform library file
- *
- * (C) Copyright 2014 Intel Corporation
- * Author: David Cohen <david.a.cohen@linux.intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
- */
-
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/platform_device.h>
-#include <linux/platform_data/intel-mid_wdt.h>
-#include <asm/intel-mid.h>
-#include <asm/io_apic.h>
-
-#define TANGIER_EXT_TIMER0_MSI 15
-
-static struct platform_device wdt_dev = {
-       .name = "intel_mid_wdt",
-       .id = -1,
-};
-
-static int tangier_probe(struct platform_device *pdev)
-{
-       int gsi;
-       struct irq_alloc_info info;
-       struct intel_mid_wdt_pdata *pdata = pdev->dev.platform_data;
-
-       if (!pdata)
-               return -EINVAL;
-
-       /* IOAPIC builds identity mapping between GSI and IRQ on MID */
-       gsi = pdata->irq;
-       ioapic_set_alloc_attr(&info, cpu_to_node(0), 1, 0);
-       if (mp_map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC, &info) <= 0) {
-               dev_warn(&pdev->dev, "cannot find interrupt %d in ioapic\n",
-                        gsi);
-               return -EINVAL;
-       }
-
-       return 0;
-}
-
-static struct intel_mid_wdt_pdata tangier_pdata = {
-       .irq = TANGIER_EXT_TIMER0_MSI,
-       .probe = tangier_probe,
-};
-
-static int __init register_mid_wdt(void)
-{
-       if (intel_mid_identify_cpu() == INTEL_MID_CPU_CHIP_TANGIER) {
-               wdt_dev.dev.platform_data = &tangier_pdata;
-               return platform_device_register(&wdt_dev);
-       }
-
-       return -ENODEV;
-}
-
-rootfs_initcall(register_mid_wdt);
index 5d3b45ad1c034d4ca922ccdd10ad7d2edb565384..67375dda451c1bec9fe900899dee5a5898882281 100644 (file)
@@ -272,6 +272,25 @@ int intel_mid_pci_set_power_state(struct pci_dev *pdev, pci_power_t state)
 }
 EXPORT_SYMBOL_GPL(intel_mid_pci_set_power_state);
 
+pci_power_t intel_mid_pci_get_power_state(struct pci_dev *pdev)
+{
+       struct mid_pwr *pwr = midpwr;
+       int id, reg, bit;
+       u32 power;
+
+       if (!pwr || !pwr->available)
+               return PCI_UNKNOWN;
+
+       id = intel_mid_pwr_get_lss_id(pdev);
+       if (id < 0)
+               return PCI_UNKNOWN;
+
+       reg = (id * LSS_PWS_BITS) / 32;
+       bit = (id * LSS_PWS_BITS) % 32;
+       power = mid_pwr_get_state(pwr, reg);
+       return (__force pci_power_t)((power >> bit) & 3);
+}
+
 void intel_mid_pwr_power_off(void)
 {
        struct mid_pwr *pwr = midpwr;
index b4d5e95fe4dfea57c3d9bdcdfa2d35d46a48bc5d..4a6a5a26c58295e4245b09be21a658c3ddae86a2 100644 (file)
@@ -40,7 +40,15 @@ s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5)
                 */
                return BIOS_STATUS_UNIMPLEMENTED;
 
-       ret = efi_call_virt_pointer(tab, function, (u64)which, a1, a2, a3, a4, a5);
+       /*
+        * If EFI_OLD_MEMMAP is set, we need to fall back to using our old EFI
+        * callback method, which uses efi_call() directly, with the kernel page tables:
+        */
+       if (unlikely(test_bit(EFI_OLD_MEMMAP, &efi.flags)))
+               ret = efi_call((void *)__va(tab->function), (u64)which, a1, a2, a3, a4, a5);
+       else
+               ret = efi_call_virt_pointer(tab, function, (u64)which, a1, a2, a3, a4, a5);
+
        return ret;
 }
 EXPORT_SYMBOL_GPL(uv_bios_call);
index ac58c1616408b515813403389f9d0c91bc19124c..555b9fa0ad43cbd4148b2fb268692d4b2de167c4 100644 (file)
@@ -16,6 +16,7 @@ KCOV_INSTRUMENT := n
 
 KBUILD_CFLAGS := -fno-strict-aliasing -Wall -Wstrict-prototypes -fno-zero-initialized-in-bss -fno-builtin -ffreestanding -c -MD -Os -mcmodel=large
 KBUILD_CFLAGS += -m$(BITS)
+KBUILD_CFLAGS += $(call cc-option,-fno-PIE)
 
 $(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE
                $(call if_changed,ld)
index 3ee2bb6b440bd3296c4f616c0311aefe07c2d5ce..e7e7055a86589dea6d0f9ca043365db4ab8b1b9c 100644 (file)
@@ -8,7 +8,7 @@ else
        BITS := 64
 endif
 
-obj-y = bug.o bugs_$(BITS).o delay.o fault.o ksyms.o ldt.o \
+obj-y = bug.o bugs_$(BITS).o delay.o fault.o ldt.o \
        ptrace_$(BITS).o ptrace_user.o setjmp_$(BITS).o signal.o \
        stub_$(BITS).o stub_segv.o \
        sys_call_table_$(BITS).o sysrq_$(BITS).o tls_$(BITS).o \
index fa4b8b9841ff7071a993e61d2b55ca3bc8b35575..b9933eb9274aeae84159a70c3e5e82a506536826 100644 (file)
@@ -27,6 +27,7 @@
 
 #include <asm/errno.h>
 #include <asm/asm.h>
+#include <asm/export.h>
                                
 /*
  * computes a partial checksum, e.g. for TCP/UDP fragments
@@ -214,3 +215,4 @@ csum_partial:
        ret
                                
 #endif
+       EXPORT_SYMBOL(csum_partial)
diff --git a/arch/x86/um/ksyms.c b/arch/x86/um/ksyms.c
deleted file mode 100644 (file)
index 2e8f43e..0000000
+++ /dev/null
@@ -1,13 +0,0 @@
-#include <linux/module.h>
-#include <asm/string.h>
-#include <asm/checksum.h>
-
-#ifndef CONFIG_X86_32
-/*XXX: we need them because they would be exported by x86_64 */
-#if (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) || __GNUC__ > 4
-EXPORT_SYMBOL(memcpy);
-#else
-EXPORT_SYMBOL(__memcpy);
-#endif
-#endif
-EXPORT_SYMBOL(csum_partial);
index 5766ead6fdb9679907c63aa78f8becccc8d9ecff..60a5a5a85505dd25305aea03d0850613e51ee791 100644 (file)
@@ -36,7 +36,8 @@ int is_syscall(unsigned long addr)
                 * slow, but that doesn't matter, since it will be called only
                 * in case of singlestepping, if copy_from_user failed.
                 */
-               n = access_process_vm(current, addr, &instr, sizeof(instr), 0);
+               n = access_process_vm(current, addr, &instr, sizeof(instr),
+                               FOLL_FORCE);
                if (n != sizeof(instr)) {
                        printk(KERN_ERR "is_syscall : failed to read "
                               "instruction from 0x%lx\n", addr);
index 0b5c184dd5b3b80894c3db60ead6432ad20dca57..e30202b1716efb4243372d02e61f5f8269203419 100644 (file)
@@ -212,7 +212,8 @@ int is_syscall(unsigned long addr)
                 * slow, but that doesn't matter, since it will be called only
                 * in case of singlestepping, if copy_from_user failed.
                 */
-               n = access_process_vm(current, addr, &instr, sizeof(instr), 0);
+               n = access_process_vm(current, addr, &instr, sizeof(instr),
+                               FOLL_FORCE);
                if (n != sizeof(instr)) {
                        printk("is_syscall : failed to read instruction from "
                               "0x%lx\n", addr);
index c0fdd57da7aad477534b5e4d019ebe4e06e2df07..bdd85568540382ebe3b5683534fb2ab7144e39a3 100644 (file)
@@ -1837,6 +1837,7 @@ static void __init init_hvm_pv_info(void)
 
        xen_domain_type = XEN_HVM_DOMAIN;
 }
+#endif
 
 static int xen_cpu_up_prepare(unsigned int cpu)
 {
@@ -1887,6 +1888,7 @@ static int xen_cpu_up_online(unsigned int cpu)
        return 0;
 }
 
+#ifdef CONFIG_XEN_PVHVM
 #ifdef CONFIG_KEXEC_CORE
 static void xen_hvm_shutdown(void)
 {
index 81435d995e1183d07ed0aac8903503e174a5c21e..9fdbe1fe0473802caaf04782f9a5c05ca813f013 100644 (file)
 
 #define SO_CNX_ADVICE          53
 
+#define SCM_TIMESTAMPING_OPT_STATS     54
+
 #endif /* _XTENSA_SOCKET_H */
index de9b14b2d348b84ea078e412d3d9f3883e444cb4..cd400af4a6b25597756cda04826278fea75ecf33 100644 (file)
@@ -767,7 +767,14 @@ __SYSCALL(346, sys_preadv2, 6)
 #define __NR_pwritev2                          347
 __SYSCALL(347, sys_pwritev2, 6)
 
-#define __NR_syscall_count                     348
+#define __NR_pkey_mprotect                     348
+__SYSCALL(348, sys_pkey_mprotect, 4)
+#define __NR_pkey_alloc                                349
+__SYSCALL(349, sys_pkey_alloc, 2)
+#define __NR_pkey_free                         350
+__SYSCALL(350, sys_pkey_free, 1)
+
+#define __NR_syscall_count                     351
 
 /*
  * sysxtensa syscall handler
index 9a5bcd0381a71e987b29499d194061af8a1b2d22..be81e69b25bc98e46514c46e4a5a030afb87ecca 100644 (file)
@@ -172,10 +172,11 @@ void __init time_init(void)
 {
        of_clk_init(NULL);
 #ifdef CONFIG_XTENSA_CALIBRATE_CCOUNT
-       printk("Calibrating CPU frequency ");
+       pr_info("Calibrating CPU frequency ");
        calibrate_ccount();
-       printk("%d.%02d MHz\n", (int)ccount_freq/1000000,
-                       (int)(ccount_freq/10000)%100);
+       pr_cont("%d.%02d MHz\n",
+               (int)ccount_freq / 1000000,
+               (int)(ccount_freq / 10000) % 100);
 #else
        ccount_freq = CONFIG_XTENSA_CPU_CLOCK*1000000UL;
 #endif
@@ -210,9 +211,8 @@ irqreturn_t timer_interrupt(int irq, void *dev_id)
 void calibrate_delay(void)
 {
        loops_per_jiffy = ccount_freq / HZ;
-       printk("Calibrating delay loop (skipped)... "
-              "%lu.%02lu BogoMIPS preset\n",
-              loops_per_jiffy/(1000000/HZ),
-              (loops_per_jiffy/(10000/HZ)) % 100);
+       pr_info("Calibrating delay loop (skipped)... %lu.%02lu BogoMIPS preset\n",
+               loops_per_jiffy / (1000000 / HZ),
+               (loops_per_jiffy / (10000 / HZ)) % 100);
 }
 #endif
index d02fc304b31c10ea9019172b8fd15d8aaea41bdc..ce37d5b899fead50d312f06ed3c3f3982309d294 100644 (file)
@@ -465,26 +465,25 @@ void show_regs(struct pt_regs * regs)
 
        for (i = 0; i < 16; i++) {
                if ((i % 8) == 0)
-                       printk(KERN_INFO "a%02d:", i);
-               printk(KERN_CONT " %08lx", regs->areg[i]);
+                       pr_info("a%02d:", i);
+               pr_cont(" %08lx", regs->areg[i]);
        }
-       printk(KERN_CONT "\n");
-
-       printk("pc: %08lx, ps: %08lx, depc: %08lx, excvaddr: %08lx\n",
-              regs->pc, regs->ps, regs->depc, regs->excvaddr);
-       printk("lbeg: %08lx, lend: %08lx lcount: %08lx, sar: %08lx\n",
-              regs->lbeg, regs->lend, regs->lcount, regs->sar);
+       pr_cont("\n");
+       pr_info("pc: %08lx, ps: %08lx, depc: %08lx, excvaddr: %08lx\n",
+               regs->pc, regs->ps, regs->depc, regs->excvaddr);
+       pr_info("lbeg: %08lx, lend: %08lx lcount: %08lx, sar: %08lx\n",
+               regs->lbeg, regs->lend, regs->lcount, regs->sar);
        if (user_mode(regs))
-               printk("wb: %08lx, ws: %08lx, wmask: %08lx, syscall: %ld\n",
-                      regs->windowbase, regs->windowstart, regs->wmask,
-                      regs->syscall);
+               pr_cont("wb: %08lx, ws: %08lx, wmask: %08lx, syscall: %ld\n",
+                       regs->windowbase, regs->windowstart, regs->wmask,
+                       regs->syscall);
 }
 
 static int show_trace_cb(struct stackframe *frame, void *data)
 {
        if (kernel_text_address(frame->pc)) {
-               printk(" [<%08lx>] ", frame->pc);
-               print_symbol("%s\n", frame->pc);
+               pr_cont(" [<%08lx>]", frame->pc);
+               print_symbol(" %s\n", frame->pc);
        }
        return 0;
 }
@@ -494,19 +493,13 @@ void show_trace(struct task_struct *task, unsigned long *sp)
        if (!sp)
                sp = stack_pointer(task);
 
-       printk("Call Trace:");
-#ifdef CONFIG_KALLSYMS
-       printk("\n");
-#endif
+       pr_info("Call Trace:\n");
        walk_stackframe(sp, show_trace_cb, NULL);
-       printk("\n");
+#ifndef CONFIG_KALLSYMS
+       pr_cont("\n");
+#endif
 }
 
-/*
- * This routine abuses get_user()/put_user() to reference pointers
- * with at least a bit of error checking ...
- */
-
 static int kstack_depth_to_print = 24;
 
 void show_stack(struct task_struct *task, unsigned long *sp)
@@ -518,52 +511,29 @@ void show_stack(struct task_struct *task, unsigned long *sp)
                sp = stack_pointer(task);
        stack = sp;
 
-       printk("\nStack: ");
+       pr_info("Stack:\n");
 
        for (i = 0; i < kstack_depth_to_print; i++) {
                if (kstack_end(sp))
                        break;
-               if (i && ((i % 8) == 0))
-                       printk("\n       ");
-               printk("%08lx ", *sp++);
+               pr_cont(" %08lx", *sp++);
+               if (i % 8 == 7)
+                       pr_cont("\n");
        }
-       printk("\n");
        show_trace(task, stack);
 }
 
-void show_code(unsigned int *pc)
-{
-       long i;
-
-       printk("\nCode:");
-
-       for(i = -3 ; i < 6 ; i++) {
-               unsigned long insn;
-               if (__get_user(insn, pc + i)) {
-                       printk(" (Bad address in pc)\n");
-                       break;
-               }
-               printk("%c%08lx%c",(i?' ':'<'),insn,(i?' ':'>'));
-       }
-}
-
 DEFINE_SPINLOCK(die_lock);
 
 void die(const char * str, struct pt_regs * regs, long err)
 {
        static int die_counter;
-       int nl = 0;
 
        console_verbose();
        spin_lock_irq(&die_lock);
 
-       printk("%s: sig: %ld [#%d]\n", str, err, ++die_counter);
-#ifdef CONFIG_PREEMPT
-       printk("PREEMPT ");
-       nl = 1;
-#endif
-       if (nl)
-               printk("\n");
+       pr_info("%s: sig: %ld [#%d]%s\n", str, err, ++die_counter,
+               IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "");
        show_regs(regs);
        if (!user_mode(regs))
                show_stack(NULL, (unsigned long*)regs->areg[1]);
index 7be53cb1cc3cebf3e9f5c30e004f6f74b50bfdbf..6ebcef28231486ae2b9dcefadfe61412b2112f11 100644 (file)
@@ -133,6 +133,26 @@ retry:
 }
 EXPORT_SYMBOL_GPL(badblocks_check);
 
+static void badblocks_update_acked(struct badblocks *bb)
+{
+       u64 *p = bb->page;
+       int i;
+       bool unacked = false;
+
+       if (!bb->unacked_exist)
+               return;
+
+       for (i = 0; i < bb->count ; i++) {
+               if (!BB_ACK(p[i])) {
+                       unacked = true;
+                       break;
+               }
+       }
+
+       if (!unacked)
+               bb->unacked_exist = 0;
+}
+
 /**
  * badblocks_set() - Add a range of bad blocks to the table.
  * @bb:                the badblocks structure that holds all badblock information
@@ -294,6 +314,8 @@ int badblocks_set(struct badblocks *bb, sector_t s, int sectors,
        bb->changed = 1;
        if (!acknowledged)
                bb->unacked_exist = 1;
+       else
+               badblocks_update_acked(bb);
        write_sequnlock_irqrestore(&bb->lock, flags);
 
        return rv;
@@ -354,7 +376,8 @@ int badblocks_clear(struct badblocks *bb, sector_t s, int sectors)
                 * current range.  Earlier ranges could also overlap,
                 * but only this one can overlap the end of the range.
                 */
-               if (BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > target) {
+               if ((BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > target) &&
+                   (BB_OFFSET(p[lo]) < target)) {
                        /* Partial overlap, leave the tail of this range */
                        int ack = BB_ACK(p[lo]);
                        sector_t a = BB_OFFSET(p[lo]);
@@ -377,7 +400,8 @@ int badblocks_clear(struct badblocks *bb, sector_t s, int sectors)
                        lo--;
                }
                while (lo >= 0 &&
-                      BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) {
+                      (BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) &&
+                      (BB_OFFSET(p[lo]) < target)) {
                        /* This range does overlap */
                        if (BB_OFFSET(p[lo]) < s) {
                                /* Keep the early parts of this range. */
@@ -399,6 +423,7 @@ int badblocks_clear(struct badblocks *bb, sector_t s, int sectors)
                }
        }
 
+       badblocks_update_acked(bb);
        bb->changed = 1;
 out:
        write_sequnlock_irq(&bb->lock);
index dd38e5ced4a3fa7510f00c2ee7ff9dc636b2a7fc..b08ccbb9393a75800a3299f90537f6ee881b00f0 100644 (file)
@@ -1340,10 +1340,8 @@ int blkcg_policy_register(struct blkcg_policy *pol)
                        struct blkcg_policy_data *cpd;
 
                        cpd = pol->cpd_alloc_fn(GFP_KERNEL);
-                       if (!cpd) {
-                               mutex_unlock(&blkcg_pol_mutex);
+                       if (!cpd)
                                goto err_free_cpds;
-                       }
 
                        blkcg->cpd[pol->plid] = cpd;
                        cpd->blkcg = blkcg;
index 6a14b68b91358bdd28d90fb5ab23100a77af31dd..3c882cbc75417d60bfa92c20f5da27aaaa86c4bf 100644 (file)
@@ -342,6 +342,34 @@ static void flush_data_end_io(struct request *rq, int error)
        struct request_queue *q = rq->q;
        struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL);
 
+       /*
+        * Updating q->in_flight[] here for making this tag usable
+        * early. Because in blk_queue_start_tag(),
+        * q->in_flight[BLK_RW_ASYNC] is used to limit async I/O and
+        * reserve tags for sync I/O.
+        *
+        * More importantly this way can avoid the following I/O
+        * deadlock:
+        *
+        * - suppose there are 40 fua requests comming to flush queue
+        *   and queue depth is 31
+        * - 30 rqs are scheduled then blk_queue_start_tag() can't alloc
+        *   tag for async I/O any more
+        * - all the 30 rqs are completed before FLUSH_PENDING_TIMEOUT
+        *   and flush_data_end_io() is called
+        * - the other rqs still can't go ahead if not updating
+        *   q->in_flight[BLK_RW_ASYNC] here, meantime these rqs
+        *   are held in flush data queue and make no progress of
+        *   handling post flush rq
+        * - only after the post flush rq is handled, all these rqs
+        *   can be completed
+        */
+
+       elv_completed_request(q, rq);
+
+       /* for avoiding double accounting */
+       rq->cmd_flags &= ~REQ_STARTED;
+
        /*
         * After populating an empty queue, kick it to avoid stall.  Read
         * the comment in flush_end_io().
index ddc2eed6477146320073b061a61e15ebe4d587eb..f3d27a6dee09dfa48dd7b78bc024f4a9809e8f88 100644 (file)
@@ -1217,9 +1217,9 @@ static struct request *blk_mq_map_request(struct request_queue *q,
        blk_mq_set_alloc_data(&alloc_data, q, 0, ctx, hctx);
        rq = __blk_mq_alloc_request(&alloc_data, op, op_flags);
 
-       hctx->queued++;
-       data->hctx = hctx;
-       data->ctx = ctx;
+       data->hctx = alloc_data.hctx;
+       data->ctx = alloc_data.ctx;
+       data->hctx->queued++;
        return rq;
 }
 
index 96631e6a22b9628f124eb7d0b106261d43cfb21f..06cf9807f49a3be1742a632f9be61c0232fcaf5c 100644 (file)
@@ -18,7 +18,7 @@ static DEFINE_PER_CPU(struct list_head, blk_cpu_done);
  * Softirq action handler - move entries to local list and loop over them
  * while passing them to the queue registered handler.
  */
-static void blk_done_softirq(struct softirq_action *h)
+static __latent_entropy void blk_done_softirq(struct softirq_action *h)
 {
        struct list_head *cpu_list, local_list;
 
index 80a0f1a7855181930afa0a545f5bf79584141e5c..8948392c0525db3d6d87bbc53d97f252335381e7 100644 (file)
@@ -132,28 +132,27 @@ static void aead_wmem_wakeup(struct sock *sk)
 
 static int aead_wait_for_data(struct sock *sk, unsigned flags)
 {
+       DEFINE_WAIT_FUNC(wait, woken_wake_function);
        struct alg_sock *ask = alg_sk(sk);
        struct aead_ctx *ctx = ask->private;
        long timeout;
-       DEFINE_WAIT(wait);
        int err = -ERESTARTSYS;
 
        if (flags & MSG_DONTWAIT)
                return -EAGAIN;
 
        sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
-
+       add_wait_queue(sk_sleep(sk), &wait);
        for (;;) {
                if (signal_pending(current))
                        break;
-               prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
                timeout = MAX_SCHEDULE_TIMEOUT;
-               if (sk_wait_event(sk, &timeout, !ctx->more)) {
+               if (sk_wait_event(sk, &timeout, !ctx->more, &wait)) {
                        err = 0;
                        break;
                }
        }
-       finish_wait(sk_sleep(sk), &wait);
+       remove_wait_queue(sk_sleep(sk), &wait);
 
        sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
 
index 2d8466f9e49b8632527ed1e2f35617ff02f5fac1..d19b09cdf284d93dc63820a7cfc648217b220a7d 100644 (file)
@@ -214,23 +214,26 @@ static int hash_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 
        ahash_request_set_crypt(&ctx->req, NULL, ctx->result, 0);
 
-       if (ctx->more) {
+       if (!result && !ctx->more) {
+               err = af_alg_wait_for_completion(
+                               crypto_ahash_init(&ctx->req),
+                               &ctx->completion);
+               if (err)
+                       goto unlock;
+       }
+
+       if (!result || ctx->more) {
                ctx->more = 0;
                err = af_alg_wait_for_completion(crypto_ahash_final(&ctx->req),
                                                 &ctx->completion);
                if (err)
                        goto unlock;
-       } else if (!result) {
-               err = af_alg_wait_for_completion(
-                               crypto_ahash_digest(&ctx->req),
-                               &ctx->completion);
        }
 
        err = memcpy_to_msg(msg, ctx->result, len);
 
-       hash_free_result(sk, ctx);
-
 unlock:
+       hash_free_result(sk, ctx);
        release_sock(sk);
 
        return err ?: len;
index 28556fce42671e2f182d5239d3dc6468e5b1d970..1e38aaa8303ea831aef6ba0f7eedde2ac5ee4810 100644 (file)
@@ -199,26 +199,26 @@ static void skcipher_free_sgl(struct sock *sk)
 
 static int skcipher_wait_for_wmem(struct sock *sk, unsigned flags)
 {
-       long timeout;
-       DEFINE_WAIT(wait);
+       DEFINE_WAIT_FUNC(wait, woken_wake_function);
        int err = -ERESTARTSYS;
+       long timeout;
 
        if (flags & MSG_DONTWAIT)
                return -EAGAIN;
 
        sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
 
+       add_wait_queue(sk_sleep(sk), &wait);
        for (;;) {
                if (signal_pending(current))
                        break;
-               prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
                timeout = MAX_SCHEDULE_TIMEOUT;
-               if (sk_wait_event(sk, &timeout, skcipher_writable(sk))) {
+               if (sk_wait_event(sk, &timeout, skcipher_writable(sk), &wait)) {
                        err = 0;
                        break;
                }
        }
-       finish_wait(sk_sleep(sk), &wait);
+       remove_wait_queue(sk_sleep(sk), &wait);
 
        return err;
 }
@@ -242,10 +242,10 @@ static void skcipher_wmem_wakeup(struct sock *sk)
 
 static int skcipher_wait_for_data(struct sock *sk, unsigned flags)
 {
+       DEFINE_WAIT_FUNC(wait, woken_wake_function);
        struct alg_sock *ask = alg_sk(sk);
        struct skcipher_ctx *ctx = ask->private;
        long timeout;
-       DEFINE_WAIT(wait);
        int err = -ERESTARTSYS;
 
        if (flags & MSG_DONTWAIT) {
@@ -254,17 +254,17 @@ static int skcipher_wait_for_data(struct sock *sk, unsigned flags)
 
        sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
 
+       add_wait_queue(sk_sleep(sk), &wait);
        for (;;) {
                if (signal_pending(current))
                        break;
-               prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
                timeout = MAX_SCHEDULE_TIMEOUT;
-               if (sk_wait_event(sk, &timeout, ctx->used)) {
+               if (sk_wait_event(sk, &timeout, ctx->used, &wait)) {
                        err = 0;
                        break;
                }
        }
-       finish_wait(sk_sleep(sk), &wait);
+       remove_wait_queue(sk_sleep(sk), &wait);
 
        sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
 
index 865f46ea724f285046542fab1639a4377d2a2aa1..c80765b211cf0fae7a91c35494dc9ed45247eafe 100644 (file)
@@ -133,7 +133,6 @@ struct x509_certificate *x509_cert_parse(const void *data, size_t datalen)
        return cert;
 
 error_decode:
-       kfree(cert->pub->key);
        kfree(ctx);
 error_no_ctx:
        x509_free_certificate(cert);
index 52ce17a3dd63079c3f5bb6eacde40ec1a61aa2ab..c16c94f88733e738f4ee1dff7a1334f9c2e39eb4 100644 (file)
@@ -68,10 +68,6 @@ void scatterwalk_map_and_copy(void *buf, struct scatterlist *sg,
 
        sg = scatterwalk_ffwd(tmp, sg, start);
 
-       if (sg_page(sg) == virt_to_page(buf) &&
-           sg->offset == offset_in_page(buf))
-               return;
-
        scatterwalk_start(&walk, sg);
        scatterwalk_copychunks(buf, &walk, nbytes, out);
        scatterwalk_done(&walk, out, 0);
index f0afdfb3c7df5c79a2d7cc21ef0bf10e0b30cbb2..194d20bee7dce40070964fa2343bc19ea946d627 100644 (file)
@@ -21,7 +21,7 @@ obj-y                         += video/
 obj-y                          += idle/
 
 # IPMI must come before ACPI in order to provide IPMI opregion support
-obj-$(CONFIG_IPMI_HANDLER)     += char/ipmi/
+obj-y                          += char/ipmi/
 
 obj-$(CONFIG_ACPI)             += acpi/
 obj-$(CONFIG_SFI)              += sfi/
index d58fbf7f04e6c7f4d901f648d9f26a0708cc6f15..7dd70927991e7e9e0de14a3af89db442eca8d175 100644 (file)
@@ -122,7 +122,7 @@ static int acpi_apd_create_device(struct acpi_device *adev,
        int ret;
 
        if (!dev_desc) {
-               pdev = acpi_create_platform_device(adev);
+               pdev = acpi_create_platform_device(adev, NULL);
                return IS_ERR_OR_NULL(pdev) ? PTR_ERR(pdev) : 1;
        }
 
@@ -139,14 +139,8 @@ static int acpi_apd_create_device(struct acpi_device *adev,
                        goto err_out;
        }
 
-       if (dev_desc->properties) {
-               ret = device_add_properties(&adev->dev, dev_desc->properties);
-               if (ret)
-                       goto err_out;
-       }
-
        adev->driver_data = pdata;
-       pdev = acpi_create_platform_device(adev);
+       pdev = acpi_create_platform_device(adev, dev_desc->properties);
        if (!IS_ERR_OR_NULL(pdev))
                return 1;
 
index 5520102881357e005361ffd879dc7a245d2fd253..373657f7e35a9cac3a2a951cd030c00ef6fbd06d 100644 (file)
@@ -395,7 +395,7 @@ static int acpi_lpss_create_device(struct acpi_device *adev,
 
        dev_desc = (const struct lpss_device_desc *)id->driver_data;
        if (!dev_desc) {
-               pdev = acpi_create_platform_device(adev);
+               pdev = acpi_create_platform_device(adev, NULL);
                return IS_ERR_OR_NULL(pdev) ? PTR_ERR(pdev) : 1;
        }
        pdata = kzalloc(sizeof(*pdata), GFP_KERNEL);
@@ -451,14 +451,8 @@ static int acpi_lpss_create_device(struct acpi_device *adev,
                goto err_out;
        }
 
-       if (dev_desc->properties) {
-               ret = device_add_properties(&adev->dev, dev_desc->properties);
-               if (ret)
-                       goto err_out;
-       }
-
        adev->driver_data = pdata;
-       pdev = acpi_create_platform_device(adev);
+       pdev = acpi_create_platform_device(adev, dev_desc->properties);
        if (!IS_ERR_OR_NULL(pdev)) {
                return 1;
        }
index 8ea8211b2d589c6618d5037e40d653a3771edc55..eb76a4c10dbfb170e43680d8b3f836bec169ad01 100644 (file)
@@ -26,6 +26,7 @@
 #include <linux/slab.h>
 #include <linux/acpi.h>
 #include <asm/mwait.h>
+#include <xen/xen.h>
 
 #define ACPI_PROCESSOR_AGGREGATOR_CLASS        "acpi_pad"
 #define ACPI_PROCESSOR_AGGREGATOR_DEVICE_NAME "Processor Aggregator"
@@ -477,6 +478,10 @@ static struct acpi_driver acpi_pad_driver = {
 
 static int __init acpi_pad_init(void)
 {
+       /* Xen ACPI PAD is used when running as Xen Dom0. */
+       if (xen_initial_domain())
+               return -ENODEV;
+
        power_saving_mwait_init();
        if (power_saving_mwait_eax == 0)
                return -EINVAL;
index b200ae1f3c6fb0fbef0a38135fd0b27e6da45041..b4c1a6a51da482a953051959279fc9d39cd29d49 100644 (file)
@@ -50,6 +50,7 @@ static void acpi_platform_fill_resource(struct acpi_device *adev,
 /**
  * acpi_create_platform_device - Create platform device for ACPI device node
  * @adev: ACPI device node to create a platform device for.
+ * @properties: Optional collection of build-in properties.
  *
  * Check if the given @adev can be represented as a platform device and, if
  * that's the case, create and register a platform device, populate its common
@@ -57,7 +58,8 @@ static void acpi_platform_fill_resource(struct acpi_device *adev,
  *
  * Name of the platform device will be the same as @adev's.
  */
-struct platform_device *acpi_create_platform_device(struct acpi_device *adev)
+struct platform_device *acpi_create_platform_device(struct acpi_device *adev,
+                                       struct property_entry *properties)
 {
        struct platform_device *pdev = NULL;
        struct platform_device_info pdevinfo;
@@ -106,6 +108,7 @@ struct platform_device *acpi_create_platform_device(struct acpi_device *adev)
        pdevinfo.res = resources;
        pdevinfo.num_res = count;
        pdevinfo.fwnode = acpi_fwnode_handle(adev);
+       pdevinfo.properties = properties;
 
        if (acpi_dma_supported(adev))
                pdevinfo.dma_mask = DMA_BIT_MASK(32);
index f1e6dcc7a8271c6527e954299774841e5f4eaf78..54d48b90de2cd025710a71c03bcc88ea1ca75f32 100644 (file)
@@ -46,6 +46,7 @@
 #include "acdispat.h"
 #include "acnamesp.h"
 #include "actables.h"
+#include "acinterp.h"
 
 #define _COMPONENT          ACPI_DISPATCHER
 ACPI_MODULE_NAME("dsinit")
@@ -214,23 +215,17 @@ acpi_ds_initialize_objects(u32 table_index,
 
        /* Walk entire namespace from the supplied root */
 
-       status = acpi_ut_acquire_mutex(ACPI_MTX_NAMESPACE);
-       if (ACPI_FAILURE(status)) {
-               return_ACPI_STATUS(status);
-       }
-
        /*
         * We don't use acpi_walk_namespace since we do not want to acquire
         * the namespace reader lock.
         */
        status =
            acpi_ns_walk_namespace(ACPI_TYPE_ANY, start_node, ACPI_UINT32_MAX,
-                                  ACPI_NS_WALK_UNLOCK, acpi_ds_init_one_object,
-                                  NULL, &info, NULL);
+                                  0, acpi_ds_init_one_object, NULL, &info,
+                                  NULL);
        if (ACPI_FAILURE(status)) {
                ACPI_EXCEPTION((AE_INFO, status, "During WalkNamespace"));
        }
-       (void)acpi_ut_release_mutex(ACPI_MTX_NAMESPACE);
 
        status = acpi_get_table_by_index(table_index, &table);
        if (ACPI_FAILURE(status)) {
index 32e9ddc0cf2bbbf4a73afc640956fcc92715bbcd..2b3210f42a46966f608c7729f02aef6b3aadffde 100644 (file)
@@ -99,14 +99,11 @@ acpi_ds_auto_serialize_method(struct acpi_namespace_node *node,
                          "Method auto-serialization parse [%4.4s] %p\n",
                          acpi_ut_get_node_name(node), node));
 
-       acpi_ex_enter_interpreter();
-
        /* Create/Init a root op for the method parse tree */
 
        op = acpi_ps_alloc_op(AML_METHOD_OP, obj_desc->method.aml_start);
        if (!op) {
-               status = AE_NO_MEMORY;
-               goto unlock;
+               return_ACPI_STATUS(AE_NO_MEMORY);
        }
 
        acpi_ps_set_name(op, node->name.integer);
@@ -118,8 +115,7 @@ acpi_ds_auto_serialize_method(struct acpi_namespace_node *node,
            acpi_ds_create_walk_state(node->owner_id, NULL, NULL, NULL);
        if (!walk_state) {
                acpi_ps_free_op(op);
-               status = AE_NO_MEMORY;
-               goto unlock;
+               return_ACPI_STATUS(AE_NO_MEMORY);
        }
 
        status = acpi_ds_init_aml_walk(walk_state, op, node,
@@ -138,8 +134,6 @@ acpi_ds_auto_serialize_method(struct acpi_namespace_node *node,
        status = acpi_ps_parse_aml(walk_state);
 
        acpi_ps_delete_parse_tree(op);
-unlock:
-       acpi_ex_exit_interpreter();
        return_ACPI_STATUS(status);
 }
 
@@ -730,26 +724,6 @@ acpi_ds_terminate_control_method(union acpi_operand_object *method_desc,
 
                acpi_ds_method_data_delete_all(walk_state);
 
-               /*
-                * If method is serialized, release the mutex and restore the
-                * current sync level for this thread
-                */
-               if (method_desc->method.mutex) {
-
-                       /* Acquisition Depth handles recursive calls */
-
-                       method_desc->method.mutex->mutex.acquisition_depth--;
-                       if (!method_desc->method.mutex->mutex.acquisition_depth) {
-                               walk_state->thread->current_sync_level =
-                                   method_desc->method.mutex->mutex.
-                                   original_sync_level;
-
-                               acpi_os_release_mutex(method_desc->method.
-                                                     mutex->mutex.os_mutex);
-                               method_desc->method.mutex->mutex.thread_id = 0;
-                       }
-               }
-
                /*
                 * Delete any namespace objects created anywhere within the
                 * namespace by the execution of this method. Unless:
@@ -786,6 +760,26 @@ acpi_ds_terminate_control_method(union acpi_operand_object *method_desc,
                                    ~ACPI_METHOD_MODIFIED_NAMESPACE;
                        }
                }
+
+               /*
+                * If method is serialized, release the mutex and restore the
+                * current sync level for this thread
+                */
+               if (method_desc->method.mutex) {
+
+                       /* Acquisition Depth handles recursive calls */
+
+                       method_desc->method.mutex->mutex.acquisition_depth--;
+                       if (!method_desc->method.mutex->mutex.acquisition_depth) {
+                               walk_state->thread->current_sync_level =
+                                   method_desc->method.mutex->mutex.
+                                   original_sync_level;
+
+                               acpi_os_release_mutex(method_desc->method.
+                                                     mutex->mutex.os_mutex);
+                               method_desc->method.mutex->mutex.thread_id = 0;
+                       }
+               }
        }
 
        /* Decrement the thread count on the method */
index 028b22a3154ebb888245d030bd38db6ce04cb3ab..e36218206bb013d030fc3d75487834403ecf5458 100644 (file)
@@ -607,11 +607,9 @@ acpi_status acpi_ds_load2_end_op(struct acpi_walk_state *walk_state)
                                }
                        }
 
-                       acpi_ex_exit_interpreter();
                        status =
                            acpi_ev_initialize_region
                            (acpi_ns_get_attached_object(node), FALSE);
-                       acpi_ex_enter_interpreter();
 
                        if (ACPI_FAILURE(status)) {
                                /*
index 3843f1fc5dbbd6166a2005302d8df22726fd0fa4..75ddd160a716faaa10c81ac92ddca37d1a73dcbf 100644 (file)
@@ -45,6 +45,7 @@
 #include "accommon.h"
 #include "acevents.h"
 #include "acnamesp.h"
+#include "acinterp.h"
 
 #define _COMPONENT          ACPI_EVENTS
 ACPI_MODULE_NAME("evrgnini")
@@ -597,9 +598,11 @@ acpi_ev_initialize_region(union acpi_operand_object *region_obj,
                                        }
                                }
 
+                               acpi_ex_exit_interpreter();
                                status =
                                    acpi_ev_execute_reg_method(region_obj,
                                                               ACPI_REG_CONNECT);
+                               acpi_ex_enter_interpreter();
 
                                if (acpi_ns_locked) {
                                        status =
index 334d3c5ba617dc23a46198bdd3ccaafe9b12223f..d1f20143bb113ffdc751514d793f929aab38bec1 100644 (file)
@@ -137,7 +137,9 @@ unlock:
        ACPI_DEBUG_PRINT((ACPI_DB_INFO,
                          "**** Begin Table Object Initialization\n"));
 
+       acpi_ex_enter_interpreter();
        status = acpi_ds_initialize_objects(table_index, node);
+       acpi_ex_exit_interpreter();
 
        ACPI_DEBUG_PRINT((ACPI_DB_INFO,
                          "**** Completed Table Object Initialization\n"));
index 046c4d0394eedd9a5c84cf813deb01506b52068b..5fb838e592dc430c49b657ce6fba3dce1572676f 100644 (file)
@@ -480,19 +480,17 @@ static void acpi_tb_convert_fadt(void)
        u32 i;
 
        /*
-        * For ACPI 1.0 FADTs (revision 1), ensure that reserved fields which
+        * For ACPI 1.0 FADTs (revision 1 or 2), ensure that reserved fields which
         * should be zero are indeed zero. This will workaround BIOSs that
         * inadvertently place values in these fields.
         *
         * The ACPI 1.0 reserved fields that will be zeroed are the bytes located
         * at offset 45, 55, 95, and the word located at offset 109, 110.
         *
-        * Note: The FADT revision value is unreliable because of BIOS errors.
-        * The table length is instead used as the final word on the version.
-        *
-        * Note: FADT revision 3 is the ACPI 2.0 version of the FADT.
+        * Note: The FADT revision value is unreliable. Only the length can be
+        * trusted.
         */
-       if (acpi_gbl_FADT.header.length <= ACPI_FADT_V3_SIZE) {
+       if (acpi_gbl_FADT.header.length <= ACPI_FADT_V2_SIZE) {
                acpi_gbl_FADT.preferred_profile = 0;
                acpi_gbl_FADT.pstate_control = 0;
                acpi_gbl_FADT.cst_control = 0;
index f0a029e68d3e2989b7e3ad79a02d1d0c36e1b0f4..0d099a24f776ac9bd665f3aab8006793031aef48 100644 (file)
@@ -662,7 +662,7 @@ static int ghes_proc(struct ghes *ghes)
        ghes_do_proc(ghes, ghes->estatus);
 out:
        ghes_clear_estatus(ghes);
-       return 0;
+       return rc;
 }
 
 static void ghes_add_timer(struct ghes *ghes)
index 33505c651f62792e136cedc73a8b022c10f2ba21..86364097e236e5d2d7d99b4e4e94f67b7560ebfd 100644 (file)
@@ -34,11 +34,11 @@ static int int340x_thermal_handler_attach(struct acpi_device *adev,
                                        const struct acpi_device_id *id)
 {
        if (IS_ENABLED(CONFIG_INT340X_THERMAL))
-               acpi_create_platform_device(adev);
+               acpi_create_platform_device(adev, NULL);
        /* Intel SoC DTS thermal driver needs INT3401 to set IRQ descriptor */
        else if (IS_ENABLED(CONFIG_INTEL_SOC_DTS_THERMAL) &&
                 id->driver_data == INT3401_DEVICE)
-               acpi_create_platform_device(adev);
+               acpi_create_platform_device(adev, NULL);
        return 1;
 }
 
index 6805310621607335a2cf14763acc48f629d3ba31..48e19d013170936ef494d0fa88d275d37f25071a 100644 (file)
@@ -526,6 +526,7 @@ static void acpi_ec_enable_event(struct acpi_ec *ec)
                acpi_ec_clear(ec);
 }
 
+#ifdef CONFIG_PM_SLEEP
 static bool acpi_ec_query_flushed(struct acpi_ec *ec)
 {
        bool flushed;
@@ -557,6 +558,7 @@ static void acpi_ec_disable_event(struct acpi_ec *ec)
        spin_unlock_irqrestore(&ec->lock, flags);
        __acpi_ec_flush_event(ec);
 }
+#endif /* CONFIG_PM_SLEEP */
 
 static bool acpi_ec_guard_event(struct acpi_ec *ec)
 {
index e24ea4e796e4b920cdc437f3f76a69eb391b85c7..7fceb3b4691b0f5d4b42043d42ec422a60319051 100644 (file)
@@ -82,8 +82,8 @@ static const struct genl_multicast_group acpi_event_mcgrps[] = {
        { .name = ACPI_GENL_MCAST_GROUP_NAME, },
 };
 
-static struct genl_family acpi_event_genl_family = {
-       .id = GENL_ID_GENERATE,
+static struct genl_family acpi_event_genl_family __ro_after_init = {
+       .module = THIS_MODULE,
        .name = ACPI_GENL_FAMILY_NAME,
        .version = ACPI_GENL_VERSION,
        .maxattr = ACPI_GENL_ATTR_MAX,
@@ -144,7 +144,7 @@ int acpi_bus_generate_netlink_event(const char *device_class,
 
 EXPORT_SYMBOL(acpi_bus_generate_netlink_event);
 
-static int acpi_event_genetlink_init(void)
+static int __init acpi_event_genetlink_init(void)
 {
        return genl_register_family(&acpi_event_genl_family);
 }
index 384cfc3083e1d98c544918b49ec517a9399c27a3..6cf4988206f24e8292099ae7141914ced024affd 100644 (file)
@@ -129,8 +129,18 @@ static int fan_get_state_acpi4(struct acpi_device *device, unsigned long *state)
 
        control = obj->package.elements[1].integer.value;
        for (i = 0; i < fan->fps_count; i++) {
-               if (control == fan->fps[i].control)
+               /*
+                * When Fine Grain Control is set, return the state
+                * corresponding to maximum fan->fps[i].control
+                * value compared to the current speed. Here the
+                * fan->fps[] is sorted array with increasing speed.
+                */
+               if (fan->fif.fine_grain_ctrl && control < fan->fps[i].control) {
+                       i = (i > 0) ? i - 1 : 0;
                        break;
+               } else if (control == fan->fps[i].control) {
+                       break;
+               }
        }
        if (i == fan->fps_count) {
                dev_dbg(&device->dev, "Invalid control value returned\n");
index 4305ee9db4b2d880faaf6314cde028546b5207a6..416953a4251094ce2066d724c1d88583b8e89705 100644 (file)
@@ -162,11 +162,18 @@ void acpi_os_vprintf(const char *fmt, va_list args)
        if (acpi_in_debugger) {
                kdb_printf("%s", buffer);
        } else {
-               printk(KERN_CONT "%s", buffer);
+               if (printk_get_level(buffer))
+                       printk("%s", buffer);
+               else
+                       printk(KERN_CONT "%s", buffer);
        }
 #else
-       if (acpi_debugger_write_log(buffer) < 0)
-               printk(KERN_CONT "%s", buffer);
+       if (acpi_debugger_write_log(buffer) < 0) {
+               if (printk_get_level(buffer))
+                       printk("%s", buffer);
+               else
+                       printk(KERN_CONT "%s", buffer);
+       }
 #endif
 }
 
index c983bf733ad37d7b608c9410108dcef32cdbf02b..bc3d914dfc3e397880581b56b33188f575ea160d 100644 (file)
@@ -87,6 +87,7 @@ struct acpi_pci_link {
 
 static LIST_HEAD(acpi_link_list);
 static DEFINE_MUTEX(acpi_link_lock);
+static int sci_irq = -1, sci_penalty;
 
 /* --------------------------------------------------------------------------
                             PCI Link Device Management
@@ -496,25 +497,13 @@ static int acpi_irq_get_penalty(int irq)
 {
        int penalty = 0;
 
-       /*
-       * Penalize IRQ used by ACPI SCI. If ACPI SCI pin attributes conflict
-       * with PCI IRQ attributes, mark ACPI SCI as ISA_ALWAYS so it won't be
-       * use for PCI IRQs.
-       */
-       if (irq == acpi_gbl_FADT.sci_interrupt) {
-               u32 type = irq_get_trigger_type(irq) & IRQ_TYPE_SENSE_MASK;
-
-               if (type != IRQ_TYPE_LEVEL_LOW)
-                       penalty += PIRQ_PENALTY_ISA_ALWAYS;
-               else
-                       penalty += PIRQ_PENALTY_PCI_USING;
-       }
+       if (irq == sci_irq)
+               penalty += sci_penalty;
 
        if (irq < ACPI_MAX_ISA_IRQS)
                return penalty + acpi_isa_irq_penalty[irq];
 
-       penalty += acpi_irq_pci_sharing_penalty(irq);
-       return penalty;
+       return penalty + acpi_irq_pci_sharing_penalty(irq);
 }
 
 int __init acpi_irq_penalty_init(void)
@@ -619,6 +608,10 @@ static int acpi_pci_link_allocate(struct acpi_pci_link *link)
                            acpi_device_bid(link->device));
                return -ENODEV;
        } else {
+               if (link->irq.active < ACPI_MAX_ISA_IRQS)
+                       acpi_isa_irq_penalty[link->irq.active] +=
+                               PIRQ_PENALTY_PCI_USING;
+
                printk(KERN_WARNING PREFIX "%s [%s] enabled at IRQ %d\n",
                       acpi_device_name(link->device),
                       acpi_device_bid(link->device), link->irq.active);
@@ -849,7 +842,7 @@ static int __init acpi_irq_penalty_update(char *str, int used)
                        continue;
 
                if (used)
-                       new_penalty = acpi_irq_get_penalty(irq) +
+                       new_penalty = acpi_isa_irq_penalty[irq] +
                                        PIRQ_PENALTY_ISA_USED;
                else
                        new_penalty = 0;
@@ -871,7 +864,7 @@ static int __init acpi_irq_penalty_update(char *str, int used)
 void acpi_penalize_isa_irq(int irq, int active)
 {
        if ((irq >= 0) && (irq < ARRAY_SIZE(acpi_isa_irq_penalty)))
-               acpi_isa_irq_penalty[irq] = acpi_irq_get_penalty(irq) +
+               acpi_isa_irq_penalty[irq] +=
                  (active ? PIRQ_PENALTY_ISA_USED : PIRQ_PENALTY_PCI_USING);
 }
 
@@ -881,6 +874,17 @@ bool acpi_isa_irq_available(int irq)
                    acpi_irq_get_penalty(irq) < PIRQ_PENALTY_ISA_ALWAYS);
 }
 
+void acpi_penalize_sci_irq(int irq, int trigger, int polarity)
+{
+       sci_irq = irq;
+
+       if (trigger == ACPI_MADT_TRIGGER_LEVEL &&
+           polarity == ACPI_MADT_POLARITY_ACTIVE_LOW)
+               sci_penalty = PIRQ_PENALTY_PCI_USING;
+       else
+               sci_penalty = PIRQ_PENALTY_ISA_ALWAYS;
+}
+
 /*
  * Over-ride default table to reserve additional IRQs for use by ISA
  * e.g. acpi_irq_isa=5
index f2fd3fee588a8233a80bdc5da3549fea5703b9f8..03f5ec11ab3197de66ced4e7f09d39dbab6b8232 100644 (file)
@@ -468,10 +468,11 @@ static int acpi_data_get_property_array(struct acpi_device_data *data,
 }
 
 /**
- * acpi_data_get_property_reference - returns handle to the referenced object
- * @data: ACPI device data object containing the property
+ * __acpi_node_get_property_reference - returns handle to the referenced object
+ * @fwnode: Firmware node to get the property from
  * @propname: Name of the property
  * @index: Index of the reference to return
+ * @num_args: Maximum number of arguments after each reference
  * @args: Location to store the returned reference with optional arguments
  *
  * Find property with @name, verifify that it is a package containing at least
@@ -482,17 +483,40 @@ static int acpi_data_get_property_array(struct acpi_device_data *data,
  * If there's more than one reference in the property value package, @index is
  * used to select the one to return.
  *
+ * It is possible to leave holes in the property value set like in the
+ * example below:
+ *
+ * Package () {
+ *     "cs-gpios",
+ *     Package () {
+ *        ^GPIO, 19, 0, 0,
+ *        ^GPIO, 20, 0, 0,
+ *        0,
+ *        ^GPIO, 21, 0, 0,
+ *     }
+ * }
+ *
+ * Calling this function with index %2 return %-ENOENT and with index %3
+ * returns the last entry. If the property does not contain any more values
+ * %-ENODATA is returned. The NULL entry must be single integer and
+ * preferably contain value %0.
+ *
  * Return: %0 on success, negative error code on failure.
  */
-static int acpi_data_get_property_reference(struct acpi_device_data *data,
-                                           const char *propname, size_t index,
-                                           struct acpi_reference_args *args)
+int __acpi_node_get_property_reference(struct fwnode_handle *fwnode,
+       const char *propname, size_t index, size_t num_args,
+       struct acpi_reference_args *args)
 {
        const union acpi_object *element, *end;
        const union acpi_object *obj;
+       struct acpi_device_data *data;
        struct acpi_device *device;
        int ret, idx = 0;
 
+       data = acpi_device_data_of_node(fwnode);
+       if (!data)
+               return -EINVAL;
+
        ret = acpi_data_get_property(data, propname, ACPI_TYPE_ANY, &obj);
        if (ret)
                return ret;
@@ -532,59 +556,54 @@ static int acpi_data_get_property_reference(struct acpi_device_data *data,
        while (element < end) {
                u32 nargs, i;
 
-               if (element->type != ACPI_TYPE_LOCAL_REFERENCE)
-                       return -EPROTO;
-
-               ret = acpi_bus_get_device(element->reference.handle, &device);
-               if (ret)
-                       return -ENODEV;
-
-               element++;
-               nargs = 0;
-
-               /* assume following integer elements are all args */
-               for (i = 0; element + i < end; i++) {
-                       int type = element[i].type;
+               if (element->type == ACPI_TYPE_LOCAL_REFERENCE) {
+                       ret = acpi_bus_get_device(element->reference.handle,
+                                                 &device);
+                       if (ret)
+                               return -ENODEV;
+
+                       nargs = 0;
+                       element++;
+
+                       /* assume following integer elements are all args */
+                       for (i = 0; element + i < end && i < num_args; i++) {
+                               int type = element[i].type;
+
+                               if (type == ACPI_TYPE_INTEGER)
+                                       nargs++;
+                               else if (type == ACPI_TYPE_LOCAL_REFERENCE)
+                                       break;
+                               else
+                                       return -EPROTO;
+                       }
 
-                       if (type == ACPI_TYPE_INTEGER)
-                               nargs++;
-                       else if (type == ACPI_TYPE_LOCAL_REFERENCE)
-                               break;
-                       else
+                       if (nargs > MAX_ACPI_REFERENCE_ARGS)
                                return -EPROTO;
-               }
 
-               if (idx++ == index) {
-                       args->adev = device;
-                       args->nargs = nargs;
-                       for (i = 0; i < nargs; i++)
-                               args->args[i] = element[i].integer.value;
+                       if (idx == index) {
+                               args->adev = device;
+                               args->nargs = nargs;
+                               for (i = 0; i < nargs; i++)
+                                       args->args[i] = element[i].integer.value;
 
-                       return 0;
+                               return 0;
+                       }
+
+                       element += nargs;
+               } else if (element->type == ACPI_TYPE_INTEGER) {
+                       if (idx == index)
+                               return -ENOENT;
+                       element++;
+               } else {
+                       return -EPROTO;
                }
 
-               element += nargs;
+               idx++;
        }
 
-       return -EPROTO;
-}
-
-/**
- * acpi_node_get_property_reference - get a handle to the referenced object.
- * @fwnode: Firmware node to get the property from.
- * @propname: Name of the property.
- * @index: Index of the reference to return.
- * @args: Location to store the returned reference with optional arguments.
- */
-int acpi_node_get_property_reference(struct fwnode_handle *fwnode,
-                                    const char *name, size_t index,
-                                    struct acpi_reference_args *args)
-{
-       struct acpi_device_data *data = acpi_device_data_of_node(fwnode);
-
-       return data ? acpi_data_get_property_reference(data, name, index, args) : -EINVAL;
+       return -ENODATA;
 }
-EXPORT_SYMBOL_GPL(acpi_node_get_property_reference);
+EXPORT_SYMBOL_GPL(__acpi_node_get_property_reference);
 
 static int acpi_data_prop_read_single(struct acpi_device_data *data,
                                      const char *propname,
index 035ac646d8db55272bf2182f690e0452b8d7a485..3d1856f1f4d03eb8c47e5e4043684d7b5b76b046 100644 (file)
@@ -1734,7 +1734,7 @@ static void acpi_default_enumeration(struct acpi_device *device)
                               &is_spi_i2c_slave);
        acpi_dev_free_resource_list(&resource_list);
        if (!is_spi_i2c_slave) {
-               acpi_create_platform_device(device);
+               acpi_create_platform_device(device, NULL);
                acpi_device_set_enumerated(device);
        } else {
                blocking_notifier_call_chain(&acpi_reconfig_chain,
index deb0ff78eba8705b56292d1acf2e206541ee8bf8..54abb26b736639ca54aa7051ae742d6657a501bc 100644 (file)
@@ -47,32 +47,15 @@ static void acpi_sleep_tts_switch(u32 acpi_state)
        }
 }
 
-static void acpi_sleep_pts_switch(u32 acpi_state)
-{
-       acpi_status status;
-
-       status = acpi_execute_simple_method(NULL, "\\_PTS", acpi_state);
-       if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) {
-               /*
-                * OS can't evaluate the _PTS object correctly. Some warning
-                * message will be printed. But it won't break anything.
-                */
-               printk(KERN_NOTICE "Failure in evaluating _PTS object\n");
-       }
-}
-
-static int sleep_notify_reboot(struct notifier_block *this,
+static int tts_notify_reboot(struct notifier_block *this,
                        unsigned long code, void *x)
 {
        acpi_sleep_tts_switch(ACPI_STATE_S5);
-
-       acpi_sleep_pts_switch(ACPI_STATE_S5);
-
        return NOTIFY_DONE;
 }
 
-static struct notifier_block sleep_notifier = {
-       .notifier_call  = sleep_notify_reboot,
+static struct notifier_block tts_notifier = {
+       .notifier_call  = tts_notify_reboot,
        .next           = NULL,
        .priority       = 0,
 };
@@ -916,9 +899,9 @@ int __init acpi_sleep_init(void)
        pr_info(PREFIX "(supports%s)\n", supported);
 
        /*
-        * Register the sleep_notifier to reboot notifier list so that the _TTS
-        * and _PTS object can also be evaluated when the system enters S5.
+        * Register the tts_notifier to reboot notifier list so that the _TTS
+        * object can also be evaluated when the system enters S5.
         */
-       register_reboot_notifier(&sleep_notifier);
+       register_reboot_notifier(&tts_notifier);
        return 0;
 }
index 562af94bec357f09ab1a33394a2ec742f9f88977..3c71b982bf2a35ae1a406e35fac2683577edb2b8 100644 (file)
@@ -1002,7 +1002,7 @@ static int binder_dec_node(struct binder_node *node, int strong, int internal)
 
 
 static struct binder_ref *binder_get_ref(struct binder_proc *proc,
-                                        uint32_t desc)
+                                        u32 desc, bool need_strong_ref)
 {
        struct rb_node *n = proc->refs_by_desc.rb_node;
        struct binder_ref *ref;
@@ -1010,12 +1010,16 @@ static struct binder_ref *binder_get_ref(struct binder_proc *proc,
        while (n) {
                ref = rb_entry(n, struct binder_ref, rb_node_desc);
 
-               if (desc < ref->desc)
+               if (desc < ref->desc) {
                        n = n->rb_left;
-               else if (desc > ref->desc)
+               } else if (desc > ref->desc) {
                        n = n->rb_right;
-               else
+               } else if (need_strong_ref && !ref->strong) {
+                       binder_user_error("tried to use weak ref as strong ref\n");
+                       return NULL;
+               } else {
                        return ref;
+               }
        }
        return NULL;
 }
@@ -1285,7 +1289,10 @@ static void binder_transaction_buffer_release(struct binder_proc *proc,
                } break;
                case BINDER_TYPE_HANDLE:
                case BINDER_TYPE_WEAK_HANDLE: {
-                       struct binder_ref *ref = binder_get_ref(proc, fp->handle);
+                       struct binder_ref *ref;
+
+                       ref = binder_get_ref(proc, fp->handle,
+                                            fp->type == BINDER_TYPE_HANDLE);
 
                        if (ref == NULL) {
                                pr_err("transaction release %d bad handle %d\n",
@@ -1380,7 +1387,7 @@ static void binder_transaction(struct binder_proc *proc,
                if (tr->target.handle) {
                        struct binder_ref *ref;
 
-                       ref = binder_get_ref(proc, tr->target.handle);
+                       ref = binder_get_ref(proc, tr->target.handle, true);
                        if (ref == NULL) {
                                binder_user_error("%d:%d got transaction to invalid handle\n",
                                        proc->pid, thread->pid);
@@ -1577,7 +1584,9 @@ static void binder_transaction(struct binder_proc *proc,
                                fp->type = BINDER_TYPE_HANDLE;
                        else
                                fp->type = BINDER_TYPE_WEAK_HANDLE;
+                       fp->binder = 0;
                        fp->handle = ref->desc;
+                       fp->cookie = 0;
                        binder_inc_ref(ref, fp->type == BINDER_TYPE_HANDLE,
                                       &thread->todo);
 
@@ -1589,7 +1598,10 @@ static void binder_transaction(struct binder_proc *proc,
                } break;
                case BINDER_TYPE_HANDLE:
                case BINDER_TYPE_WEAK_HANDLE: {
-                       struct binder_ref *ref = binder_get_ref(proc, fp->handle);
+                       struct binder_ref *ref;
+
+                       ref = binder_get_ref(proc, fp->handle,
+                                            fp->type == BINDER_TYPE_HANDLE);
 
                        if (ref == NULL) {
                                binder_user_error("%d:%d got transaction with invalid handle, %d\n",
@@ -1624,7 +1636,9 @@ static void binder_transaction(struct binder_proc *proc,
                                        return_error = BR_FAILED_REPLY;
                                        goto err_binder_get_ref_for_node_failed;
                                }
+                               fp->binder = 0;
                                fp->handle = new_ref->desc;
+                               fp->cookie = 0;
                                binder_inc_ref(new_ref, fp->type == BINDER_TYPE_HANDLE, NULL);
                                trace_binder_transaction_ref_to_ref(t, ref,
                                                                    new_ref);
@@ -1678,6 +1692,7 @@ static void binder_transaction(struct binder_proc *proc,
                        binder_debug(BINDER_DEBUG_TRANSACTION,
                                     "        fd %d -> %d\n", fp->handle, target_fd);
                        /* TODO: fput? */
+                       fp->binder = 0;
                        fp->handle = target_fd;
                } break;
 
@@ -1800,7 +1815,9 @@ static int binder_thread_write(struct binder_proc *proc,
                                                ref->desc);
                                }
                        } else
-                               ref = binder_get_ref(proc, target);
+                               ref = binder_get_ref(proc, target,
+                                                    cmd == BC_ACQUIRE ||
+                                                    cmd == BC_RELEASE);
                        if (ref == NULL) {
                                binder_user_error("%d:%d refcount change on invalid ref %d\n",
                                        proc->pid, thread->pid, target);
@@ -1996,7 +2013,7 @@ static int binder_thread_write(struct binder_proc *proc,
                        if (get_user(cookie, (binder_uintptr_t __user *)ptr))
                                return -EFAULT;
                        ptr += sizeof(binder_uintptr_t);
-                       ref = binder_get_ref(proc, target);
+                       ref = binder_get_ref(proc, target, false);
                        if (ref == NULL) {
                                binder_user_error("%d:%d %s invalid ref %d\n",
                                        proc->pid, thread->pid,
index 90eabaf812150dbfcff32e173f4c48f2cfcd801c..9669fc7c19df7fe05b922daf25e0392677433e26 100644 (file)
@@ -1400,142 +1400,59 @@ static irqreturn_t ahci_thunderx_irq_handler(int irq, void *dev_instance)
 }
 #endif
 
-/*
- * ahci_init_msix() - optionally enable per-port MSI-X otherwise defer
- * to single msi.
- */
-static int ahci_init_msix(struct pci_dev *pdev, unsigned int n_ports,
-                         struct ahci_host_priv *hpriv, unsigned long flags)
+static int ahci_get_irq_vector(struct ata_host *host, int port)
 {
-       int nvec, i, rc;
-
-       /* Do not init MSI-X if MSI is disabled for the device */
-       if (hpriv->flags & AHCI_HFLAG_NO_MSI)
-               return -ENODEV;
-
-       nvec = pci_msix_vec_count(pdev);
-       if (nvec < 0)
-               return nvec;
-
-       /*
-        * Proper MSI-X implementations will have a vector per-port.
-        * Barring that, we prefer single-MSI over single-MSIX.  If this
-        * check fails (not enough MSI-X vectors for all ports) we will
-        * be called again with the flag clear iff ahci_init_msi()
-        * fails.
-        */
-       if (flags & AHCI_HFLAG_MULTI_MSIX) {
-               if (nvec < n_ports)
-                       return -ENODEV;
-               nvec = n_ports;
-       } else if (nvec) {
-               nvec = 1;
-       } else {
-               /*
-                * Emit dev_err() since this was the non-legacy irq
-                * method of last resort.
-                */
-               rc = -ENODEV;
-               goto fail;
-       }
-
-       for (i = 0; i < nvec; i++)
-               hpriv->msix[i].entry = i;
-       rc = pci_enable_msix_exact(pdev, hpriv->msix, nvec);
-       if (rc < 0)
-               goto fail;
-
-       if (nvec > 1)
-               hpriv->flags |= AHCI_HFLAG_MULTI_MSIX;
-       hpriv->irq = hpriv->msix[0].vector; /* for single msi-x */
-
-       return nvec;
-fail:
-       dev_err(&pdev->dev,
-               "failed to enable MSI-X with error %d, # of vectors: %d\n",
-               rc, nvec);
-
-       return rc;
+       return pci_irq_vector(to_pci_dev(host->dev), port);
 }
 
 static int ahci_init_msi(struct pci_dev *pdev, unsigned int n_ports,
                        struct ahci_host_priv *hpriv)
 {
-       int rc, nvec;
+       int nvec;
 
        if (hpriv->flags & AHCI_HFLAG_NO_MSI)
                return -ENODEV;
 
-       nvec = pci_msi_vec_count(pdev);
-       if (nvec < 0)
-               return nvec;
-
        /*
         * If number of MSIs is less than number of ports then Sharing Last
         * Message mode could be enforced. In this case assume that advantage
         * of multipe MSIs is negated and use single MSI mode instead.
         */
-       if (nvec < n_ports)
-               goto single_msi;
+       if (n_ports > 1) {
+               nvec = pci_alloc_irq_vectors(pdev, n_ports, INT_MAX,
+                               PCI_IRQ_MSIX | PCI_IRQ_MSI);
+               if (nvec > 0) {
+                       if (!(readl(hpriv->mmio + HOST_CTL) & HOST_MRSM)) {
+                               hpriv->get_irq_vector = ahci_get_irq_vector;
+                               hpriv->flags |= AHCI_HFLAG_MULTI_MSI;
+                               return nvec;
+                       }
 
-       rc = pci_enable_msi_exact(pdev, nvec);
-       if (rc == -ENOSPC)
-               goto single_msi;
-       if (rc < 0)
-               return rc;
+                       /*
+                        * Fallback to single MSI mode if the controller
+                        * enforced MRSM mode.
+                        */
+                       printk(KERN_INFO
+                               "ahci: MRSM is on, fallback to single MSI\n");
+                       pci_free_irq_vectors(pdev);
+               }
 
-       /* fallback to single MSI mode if the controller enforced MRSM mode */
-       if (readl(hpriv->mmio + HOST_CTL) & HOST_MRSM) {
-               pci_disable_msi(pdev);
-               printk(KERN_INFO "ahci: MRSM is on, fallback to single MSI\n");
-               goto single_msi;
+               /*
+                * -ENOSPC indicated we don't have enough vectors.  Don't bother
+                * trying a single vectors for any other error:
+                */
+               if (nvec < 0 && nvec != -ENOSPC)
+                       return nvec;
        }
 
-       if (nvec > 1)
-               hpriv->flags |= AHCI_HFLAG_MULTI_MSI;
-
-       goto out;
-
-single_msi:
-       nvec = 1;
-
-       rc = pci_enable_msi(pdev);
-       if (rc < 0)
-               return rc;
-out:
-       hpriv->irq = pdev->irq;
-
-       return nvec;
-}
-
-static int ahci_init_interrupts(struct pci_dev *pdev, unsigned int n_ports,
-                               struct ahci_host_priv *hpriv)
-{
-       int nvec;
-
        /*
-        * Try to enable per-port MSI-X.  If the host is not capable
-        * fall back to single MSI before finally attempting single
-        * MSI-X.
+        * If the host is not capable of supporting per-port vectors, fall
+        * back to single MSI before finally attempting single MSI-X.
         */
-       nvec = ahci_init_msix(pdev, n_ports, hpriv, AHCI_HFLAG_MULTI_MSIX);
-       if (nvec >= 0)
-               return nvec;
-
-       nvec = ahci_init_msi(pdev, n_ports, hpriv);
-       if (nvec >= 0)
-               return nvec;
-
-       /* try single-msix */
-       nvec = ahci_init_msix(pdev, n_ports, hpriv, 0);
-       if (nvec >= 0)
+       nvec = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI);
+       if (nvec == 1)
                return nvec;
-
-       /* legacy intx interrupts */
-       pci_intx(pdev, 1);
-       hpriv->irq = pdev->irq;
-
-       return 0;
+       return pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSIX);
 }
 
 static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
@@ -1698,11 +1615,12 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
        if (!host)
                return -ENOMEM;
        host->private_data = hpriv;
-       hpriv->msix = devm_kzalloc(&pdev->dev,
-                       sizeof(struct msix_entry) * n_ports, GFP_KERNEL);
-       if (!hpriv->msix)
-               return -ENOMEM;
-       ahci_init_interrupts(pdev, n_ports, hpriv);
+
+       if (ahci_init_msi(pdev, n_ports, hpriv) < 0) {
+               /* legacy intx interrupts */
+               pci_intx(pdev, 1);
+       }
+       hpriv->irq = pci_irq_vector(pdev, 0);
 
        if (!(hpriv->cap & HOST_CAP_SSS) || ahci_ignore_sss)
                host->flags |= ATA_HOST_PARALLEL_SCAN;
index 70b06bcfb7e3535b47e02a913da680f67595888e..0cc08f892feaeeacf1915df8588d236e32e8960d 100644 (file)
@@ -242,12 +242,10 @@ enum {
        AHCI_HFLAG_NO_FBS               = (1 << 18), /* no FBS */
 
 #ifdef CONFIG_PCI_MSI
-       AHCI_HFLAG_MULTI_MSI            = (1 << 20), /* multiple PCI MSIs */
-       AHCI_HFLAG_MULTI_MSIX           = (1 << 21), /* per-port MSI-X */
+       AHCI_HFLAG_MULTI_MSI            = (1 << 20), /* per-port MSI(-X) */
 #else
        /* compile out MSI infrastructure */
        AHCI_HFLAG_MULTI_MSI            = 0,
-       AHCI_HFLAG_MULTI_MSIX           = 0,
 #endif
        AHCI_HFLAG_WAKE_BEFORE_STOP     = (1 << 22), /* wake before DMA stop */
 
@@ -351,7 +349,6 @@ struct ahci_host_priv {
         * the PHY position in this array.
         */
        struct phy              **phys;
-       struct msix_entry       *msix;          /* Optional MSI-X support */
        unsigned                nports;         /* Number of ports */
        void                    *plat_data;     /* Other platform data */
        unsigned int            irq;            /* interrupt line */
@@ -362,22 +359,11 @@ struct ahci_host_priv {
         */
        void                    (*start_engine)(struct ata_port *ap);
        irqreturn_t             (*irq_handler)(int irq, void *dev_instance);
-};
 
-#ifdef CONFIG_PCI_MSI
-static inline int ahci_irq_vector(struct ahci_host_priv *hpriv, int port)
-{
-       if (hpriv->flags & AHCI_HFLAG_MULTI_MSIX)
-               return hpriv->msix[port].vector;
-       else
-               return hpriv->irq + port;
-}
-#else
-static inline int ahci_irq_vector(struct ahci_host_priv *hpriv, int port)
-{
-       return hpriv->irq;
-}
-#endif
+       /* only required for per-port MSI(-X) support */
+       int                     (*get_irq_vector)(struct ata_host *host,
+                                                 int port);
+};
 
 extern int ahci_ignore_sss;
 
index 7bdee9bd8786638049c51d952520f2fe298df694..1eba8dff875eb6d45566779baab68aef4ba412e2 100644 (file)
 #define PORT_PHY3      0xB0
 #define PORT_PHY4      0xB4
 #define PORT_PHY5      0xB8
+#define PORT_AXICC     0xBC
 #define PORT_TRANS     0xC8
 
 /* port register default value */
 #define AHCI_PORT_PHY_1_CFG    0xa003fffe
 #define AHCI_PORT_TRANS_CFG    0x08000029
+#define AHCI_PORT_AXICC_CFG    0x3fffffff
 
 /* for ls1021a */
 #define LS1021A_PORT_PHY2      0x28183414
 #define LS1021A_PORT_PHY3      0x0e080e06
 #define LS1021A_PORT_PHY4      0x064a080b
 #define LS1021A_PORT_PHY5      0x2aa86470
+#define LS1021A_AXICC_ADDR     0xC0
 
 #define SATA_ECC_DISABLE       0x00020000
 
-/* for ls1043a */
-#define LS1043A_PORT_PHY2      0x28184d1f
-#define LS1043A_PORT_PHY3      0x0e081509
-
 enum ahci_qoriq_type {
        AHCI_LS1021A,
        AHCI_LS1043A,
@@ -137,7 +136,7 @@ static struct ata_port_operations ahci_qoriq_ops = {
        .hardreset      = ahci_qoriq_hardreset,
 };
 
-static struct ata_port_info ahci_qoriq_port_info = {
+static const struct ata_port_info ahci_qoriq_port_info = {
        .flags          = AHCI_FLAG_COMMON | ATA_FLAG_NCQ,
        .pio_mask       = ATA_PIO4,
        .udma_mask      = ATA_UDMA6,
@@ -162,18 +161,19 @@ static int ahci_qoriq_phy_init(struct ahci_host_priv *hpriv)
                writel(LS1021A_PORT_PHY4, reg_base + PORT_PHY4);
                writel(LS1021A_PORT_PHY5, reg_base + PORT_PHY5);
                writel(AHCI_PORT_TRANS_CFG, reg_base + PORT_TRANS);
+               writel(AHCI_PORT_AXICC_CFG, reg_base + LS1021A_AXICC_ADDR);
                break;
 
        case AHCI_LS1043A:
                writel(AHCI_PORT_PHY_1_CFG, reg_base + PORT_PHY1);
-               writel(LS1043A_PORT_PHY2, reg_base + PORT_PHY2);
-               writel(LS1043A_PORT_PHY3, reg_base + PORT_PHY3);
                writel(AHCI_PORT_TRANS_CFG, reg_base + PORT_TRANS);
+               writel(AHCI_PORT_AXICC_CFG, reg_base + PORT_AXICC);
                break;
 
        case AHCI_LS2080A:
                writel(AHCI_PORT_PHY_1_CFG, reg_base + PORT_PHY1);
                writel(AHCI_PORT_TRANS_CFG, reg_base + PORT_TRANS);
+               writel(AHCI_PORT_AXICC_CFG, reg_base + PORT_AXICC);
                break;
        }
 
@@ -221,12 +221,6 @@ static int ahci_qoriq_probe(struct platform_device *pdev)
        if (rc)
                goto disable_resources;
 
-       /* Workaround for ls2080a */
-       if (qoriq_priv->type == AHCI_LS2080A) {
-               hpriv->flags |= AHCI_HFLAG_NO_NCQ;
-               ahci_qoriq_port_info.flags &= ~ATA_FLAG_NCQ;
-       }
-
        rc = ahci_platform_init_host(pdev, hpriv, &ahci_qoriq_port_info,
                                     &ahci_qoriq_sht);
        if (rc)
index 8ff428fe8e0fa00659218f58f041cf948ae8327c..bc345f24955531fba69e36e1bc0233da3bb6fc88 100644 (file)
@@ -147,6 +147,7 @@ static struct scsi_host_template ahci_platform_sht = {
 
 static int st_ahci_probe(struct platform_device *pdev)
 {
+       struct device *dev = &pdev->dev;
        struct st_ahci_drv_data *drv_data;
        struct ahci_host_priv *hpriv;
        int err;
@@ -170,6 +171,9 @@ static int st_ahci_probe(struct platform_device *pdev)
 
        st_ahci_configure_oob(hpriv->mmio);
 
+       of_property_read_u32(dev->of_node,
+                            "ports-implemented", &hpriv->force_port_map);
+
        err = ahci_platform_init_host(pdev, hpriv, &st_ahci_port_info,
                                      &ahci_platform_sht);
        if (err) {
index dcf2c724fd066c33cf74a3f502d2ae6ba3d276bc..0d028ead99e85b632d1802af0f438287bc5a7c54 100644 (file)
@@ -2520,7 +2520,7 @@ static int ahci_host_activate_multi_irqs(struct ata_host *host,
         */
        for (i = 0; i < host->n_ports; i++) {
                struct ahci_port_priv *pp = host->ports[i]->private_data;
-               int irq = ahci_irq_vector(hpriv, i);
+               int irq = hpriv->get_irq_vector(host, i);
 
                /* Do not receive interrupts sent by dummy ports */
                if (!pp) {
@@ -2556,10 +2556,15 @@ int ahci_host_activate(struct ata_host *host, struct scsi_host_template *sht)
        int irq = hpriv->irq;
        int rc;
 
-       if (hpriv->flags & (AHCI_HFLAG_MULTI_MSI | AHCI_HFLAG_MULTI_MSIX)) {
+       if (hpriv->flags & AHCI_HFLAG_MULTI_MSI) {
                if (hpriv->irq_handler)
                        dev_warn(host->dev,
                                 "both AHCI_HFLAG_MULTI_MSI flag set and custom irq handler implemented\n");
+               if (!hpriv->get_irq_vector) {
+                       dev_err(host->dev,
+                               "AHCI_HFLAG_MULTI_MSI requires ->get_irq_vector!\n");
+                       return -EIO;
+               }
 
                rc = ahci_host_activate_multi_irqs(host, sht);
        } else {
index e207b33e4ce9d602ca5d72144a7332c91f2515c9..9cceb4a875a58caa19fdd809ad82181f22676fc2 100644 (file)
@@ -1159,8 +1159,6 @@ static void ata_scsi_sdev_config(struct scsi_device *sdev)
 {
        sdev->use_10_for_rw = 1;
        sdev->use_10_for_ms = 1;
-       sdev->no_report_opcodes = 1;
-       sdev->no_write_same = 1;
 
        /* Schedule policy is determined by ->qc_defer() callback and
         * it needs to see every deferred qc.  Set dev_blocked to 1 to
@@ -3282,18 +3280,125 @@ static unsigned int ata_scsi_pass_thru(struct ata_queued_cmd *qc)
        return 1;
 }
 
+/**
+ * ata_format_dsm_trim_descr() - SATL Write Same to DSM Trim
+ * @cmd: SCSI command being translated
+ * @trmax: Maximum number of entries that will fit in sector_size bytes.
+ * @sector: Starting sector
+ * @count: Total Range of request in logical sectors
+ *
+ * Rewrite the WRITE SAME descriptor to be a DSM TRIM little-endian formatted
+ * descriptor.
+ *
+ * Upto 64 entries of the format:
+ *   63:48 Range Length
+ *   47:0  LBA
+ *
+ *  Range Length of 0 is ignored.
+ *  LBA's should be sorted order and not overlap.
+ *
+ * NOTE: this is the same format as ADD LBA(S) TO NV CACHE PINNED SET
+ *
+ * Return: Number of bytes copied into sglist.
+ */
+static size_t ata_format_dsm_trim_descr(struct scsi_cmnd *cmd, u32 trmax,
+                                       u64 sector, u32 count)
+{
+       struct scsi_device *sdp = cmd->device;
+       size_t len = sdp->sector_size;
+       size_t r;
+       __le64 *buf;
+       u32 i = 0;
+       unsigned long flags;
+
+       WARN_ON(len > ATA_SCSI_RBUF_SIZE);
+
+       if (len > ATA_SCSI_RBUF_SIZE)
+               len = ATA_SCSI_RBUF_SIZE;
+
+       spin_lock_irqsave(&ata_scsi_rbuf_lock, flags);
+       buf = ((void *)ata_scsi_rbuf);
+       memset(buf, 0, len);
+       while (i < trmax) {
+               u64 entry = sector |
+                       ((u64)(count > 0xffff ? 0xffff : count) << 48);
+               buf[i++] = __cpu_to_le64(entry);
+               if (count <= 0xffff)
+                       break;
+               count -= 0xffff;
+               sector += 0xffff;
+       }
+       r = sg_copy_from_buffer(scsi_sglist(cmd), scsi_sg_count(cmd), buf, len);
+       spin_unlock_irqrestore(&ata_scsi_rbuf_lock, flags);
+
+       return r;
+}
+
+/**
+ * ata_format_dsm_trim_descr() - SATL Write Same to ATA SCT Write Same
+ * @cmd: SCSI command being translated
+ * @lba: Starting sector
+ * @num: Number of sectors to be zero'd.
+ *
+ * Rewrite the WRITE SAME payload to be an SCT Write Same formatted
+ * descriptor.
+ * NOTE: Writes a pattern (0's) in the foreground.
+ *
+ * Return: Number of bytes copied into sglist.
+ */
+static size_t ata_format_sct_write_same(struct scsi_cmnd *cmd, u64 lba, u64 num)
+{
+       struct scsi_device *sdp = cmd->device;
+       size_t len = sdp->sector_size;
+       size_t r;
+       u16 *buf;
+       unsigned long flags;
+
+       spin_lock_irqsave(&ata_scsi_rbuf_lock, flags);
+       buf = ((void *)ata_scsi_rbuf);
+
+       put_unaligned_le16(0x0002,  &buf[0]); /* SCT_ACT_WRITE_SAME */
+       put_unaligned_le16(0x0101,  &buf[1]); /* WRITE PTRN FG */
+       put_unaligned_le64(lba,     &buf[2]);
+       put_unaligned_le64(num,     &buf[6]);
+       put_unaligned_le32(0u,      &buf[10]); /* pattern */
+
+       WARN_ON(len > ATA_SCSI_RBUF_SIZE);
+
+       if (len > ATA_SCSI_RBUF_SIZE)
+               len = ATA_SCSI_RBUF_SIZE;
+
+       r = sg_copy_from_buffer(scsi_sglist(cmd), scsi_sg_count(cmd), buf, len);
+       spin_unlock_irqrestore(&ata_scsi_rbuf_lock, flags);
+
+       return r;
+}
+
+/**
+ * ata_scsi_write_same_xlat() - SATL Write Same to ATA SCT Write Same
+ * @qc: Command to be translated
+ *
+ * Translate a SCSI WRITE SAME command to be either a DSM TRIM command or
+ * an SCT Write Same command.
+ * Based on WRITE SAME has the UNMAP flag
+ *   When set translate to DSM TRIM
+ *   When clear translate to SCT Write Same
+ */
 static unsigned int ata_scsi_write_same_xlat(struct ata_queued_cmd *qc)
 {
        struct ata_taskfile *tf = &qc->tf;
        struct scsi_cmnd *scmd = qc->scsicmd;
+       struct scsi_device *sdp = scmd->device;
+       size_t len = sdp->sector_size;
        struct ata_device *dev = qc->dev;
        const u8 *cdb = scmd->cmnd;
        u64 block;
        u32 n_block;
+       const u32 trmax = len >> 3;
        u32 size;
-       void *buf;
        u16 fp;
        u8 bp = 0xff;
+       u8 unmap = cdb[1] & 0x8;
 
        /* we may not issue DMA commands if no DMA mode is set */
        if (unlikely(!dev->dma_mode))
@@ -3305,11 +3410,26 @@ static unsigned int ata_scsi_write_same_xlat(struct ata_queued_cmd *qc)
        }
        scsi_16_lba_len(cdb, &block, &n_block);
 
-       /* for now we only support WRITE SAME with the unmap bit set */
-       if (unlikely(!(cdb[1] & 0x8))) {
-               fp = 1;
-               bp = 3;
-               goto invalid_fld;
+       if (unmap) {
+               /* If trim is not enabled the cmd is invalid. */
+               if ((dev->horkage & ATA_HORKAGE_NOTRIM) ||
+                   !ata_id_has_trim(dev->id)) {
+                       fp = 1;
+                       bp = 3;
+                       goto invalid_fld;
+               }
+               /* If the request is too large the cmd is invalid */
+               if (n_block > 0xffff * trmax) {
+                       fp = 2;
+                       goto invalid_fld;
+               }
+       } else {
+               /* If write same is not available the cmd is invalid */
+               if (!ata_id_sct_write_same(dev->id)) {
+                       fp = 1;
+                       bp = 3;
+                       goto invalid_fld;
+               }
        }
 
        /*
@@ -3319,32 +3439,54 @@ static unsigned int ata_scsi_write_same_xlat(struct ata_queued_cmd *qc)
        if (!scsi_sg_count(scmd))
                goto invalid_param_len;
 
-       buf = page_address(sg_page(scsi_sglist(scmd)));
-
-       if (n_block <= 65535 * ATA_MAX_TRIM_RNUM) {
-               size = ata_set_lba_range_entries(buf, ATA_MAX_TRIM_RNUM, block, n_block);
-       } else {
-               fp = 2;
-               goto invalid_fld;
-       }
+       /*
+        * size must match sector size in bytes
+        * For DATA SET MANAGEMENT TRIM in ACS-2 nsect (aka count)
+        * is defined as number of 512 byte blocks to be transferred.
+        */
+       if (unmap) {
+               size = ata_format_dsm_trim_descr(scmd, trmax, block, n_block);
+               if (size != len)
+                       goto invalid_param_len;
 
-       if (ata_ncq_enabled(dev) && ata_fpdma_dsm_supported(dev)) {
-               /* Newer devices support queued TRIM commands */
-               tf->protocol = ATA_PROT_NCQ;
-               tf->command = ATA_CMD_FPDMA_SEND;
-               tf->hob_nsect = ATA_SUBCMD_FPDMA_SEND_DSM & 0x1f;
-               tf->nsect = qc->tag << 3;
-               tf->hob_feature = (size / 512) >> 8;
-               tf->feature = size / 512;
+               if (ata_ncq_enabled(dev) && ata_fpdma_dsm_supported(dev)) {
+                       /* Newer devices support queued TRIM commands */
+                       tf->protocol = ATA_PROT_NCQ;
+                       tf->command = ATA_CMD_FPDMA_SEND;
+                       tf->hob_nsect = ATA_SUBCMD_FPDMA_SEND_DSM & 0x1f;
+                       tf->nsect = qc->tag << 3;
+                       tf->hob_feature = (size / 512) >> 8;
+                       tf->feature = size / 512;
 
-               tf->auxiliary = 1;
+                       tf->auxiliary = 1;
+               } else {
+                       tf->protocol = ATA_PROT_DMA;
+                       tf->hob_feature = 0;
+                       tf->feature = ATA_DSM_TRIM;
+                       tf->hob_nsect = (size / 512) >> 8;
+                       tf->nsect = size / 512;
+                       tf->command = ATA_CMD_DSM;
+               }
        } else {
-               tf->protocol = ATA_PROT_DMA;
+               size = ata_format_sct_write_same(scmd, block, n_block);
+               if (size != len)
+                       goto invalid_param_len;
+
                tf->hob_feature = 0;
-               tf->feature = ATA_DSM_TRIM;
-               tf->hob_nsect = (size / 512) >> 8;
-               tf->nsect = size / 512;
-               tf->command = ATA_CMD_DSM;
+               tf->feature = 0;
+               tf->hob_nsect = 0;
+               tf->nsect = 1;
+               tf->lbah = 0;
+               tf->lbam = 0;
+               tf->lbal = ATA_CMD_STANDBYNOW1;
+               tf->hob_lbah = 0;
+               tf->hob_lbam = 0;
+               tf->hob_lbal = 0;
+               tf->device = ATA_CMD_STANDBYNOW1;
+               tf->protocol = ATA_PROT_DMA;
+               tf->command = ATA_CMD_WRITE_LOG_DMA_EXT;
+               if (unlikely(dev->flags & ATA_DFLAG_PIO))
+                       tf->command = ATA_CMD_WRITE_LOG_EXT;
        }
 
        tf->flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE | ATA_TFLAG_LBA48 |
@@ -3367,6 +3509,76 @@ invalid_opcode:
        return 1;
 }
 
+/**
+ *     ata_scsiop_maint_in - Simulate a subset of MAINTENANCE_IN
+ *     @args: device MAINTENANCE_IN data / SCSI command of interest.
+ *     @rbuf: Response buffer, to which simulated SCSI cmd output is sent.
+ *
+ *     Yields a subset to satisfy scsi_report_opcode()
+ *
+ *     LOCKING:
+ *     spin_lock_irqsave(host lock)
+ */
+static unsigned int ata_scsiop_maint_in(struct ata_scsi_args *args, u8 *rbuf)
+{
+       struct ata_device *dev = args->dev;
+       u8 *cdb = args->cmd->cmnd;
+       u8 supported = 0;
+       unsigned int err = 0;
+
+       if (cdb[2] != 1) {
+               ata_dev_warn(dev, "invalid command format %d\n", cdb[2]);
+               err = 2;
+               goto out;
+       }
+       switch (cdb[3]) {
+       case INQUIRY:
+       case MODE_SENSE:
+       case MODE_SENSE_10:
+       case READ_CAPACITY:
+       case SERVICE_ACTION_IN_16:
+       case REPORT_LUNS:
+       case REQUEST_SENSE:
+       case SYNCHRONIZE_CACHE:
+       case REZERO_UNIT:
+       case SEEK_6:
+       case SEEK_10:
+       case TEST_UNIT_READY:
+       case SEND_DIAGNOSTIC:
+       case MAINTENANCE_IN:
+       case READ_6:
+       case READ_10:
+       case READ_16:
+       case WRITE_6:
+       case WRITE_10:
+       case WRITE_16:
+       case ATA_12:
+       case ATA_16:
+       case VERIFY:
+       case VERIFY_16:
+       case MODE_SELECT:
+       case MODE_SELECT_10:
+       case START_STOP:
+               supported = 3;
+               break;
+       case WRITE_SAME_16:
+               if (!ata_id_sct_write_same(dev->id))
+                       break;
+               /* fallthrough: if SCT ... only enable for ZBC */
+       case ZBC_IN:
+       case ZBC_OUT:
+               if (ata_id_zoned_cap(dev->id) ||
+                   dev->class == ATA_DEV_ZAC)
+                       supported = 3;
+               break;
+       default:
+               break;
+       }
+out:
+       rbuf[1] = supported; /* supported */
+       return err;
+}
+
 /**
  *     ata_scsi_report_zones_complete - convert ATA output
  *     @qc: command structure returning the data
@@ -3610,7 +3822,7 @@ static int ata_mselect_caching(struct ata_queued_cmd *qc,
 {
        struct ata_taskfile *tf = &qc->tf;
        struct ata_device *dev = qc->dev;
-       char mpage[CACHE_MPAGE_LEN];
+       u8 mpage[CACHE_MPAGE_LEN];
        u8 wce;
        int i;
 
@@ -3666,7 +3878,7 @@ static int ata_mselect_control(struct ata_queued_cmd *qc,
                               const u8 *buf, int len, u16 *fp)
 {
        struct ata_device *dev = qc->dev;
-       char mpage[CONTROL_MPAGE_LEN];
+       u8 mpage[CONTROL_MPAGE_LEN];
        u8 d_sense;
        int i;
 
@@ -3701,8 +3913,6 @@ static int ata_mselect_control(struct ata_queued_cmd *qc,
                dev->flags |= ATA_DFLAG_D_SENSE;
        else
                dev->flags &= ~ATA_DFLAG_D_SENSE;
-       qc->scsicmd->result = SAM_STAT_GOOD;
-       qc->scsicmd->scsi_done(qc->scsicmd);
        return 0;
 }
 
@@ -3829,6 +4039,8 @@ static unsigned int ata_scsi_mode_select_xlat(struct ata_queued_cmd *qc)
                if (ata_mselect_control(qc, p, pg_len, &fp) < 0) {
                        fp += hdr_len + bd_len;
                        goto invalid_param;
+               } else {
+                       goto skip; /* No ATA command to send */
                }
                break;
        default:                /* invalid page code */
@@ -4147,6 +4359,13 @@ void ata_scsi_simulate(struct ata_device *dev, struct scsi_cmnd *cmd)
                        ata_scsi_invalid_field(dev, cmd, 1);
                break;
 
+       case MAINTENANCE_IN:
+               if (scsicmd[1] == MI_REPORT_SUPPORTED_OPERATION_CODES)
+                       ata_scsi_rbuf_fill(&args, ata_scsiop_maint_in);
+               else
+                       ata_scsi_invalid_field(dev, cmd, 1);
+               break;
+
        /* all other commands */
        default:
                ata_scsi_set_sense(dev, cmd, ILLEGAL_REQUEST, 0x20, 0x0);
@@ -4179,7 +4398,6 @@ int ata_scsi_add_hosts(struct ata_host *host, struct scsi_host_template *sht)
                shost->max_lun = 1;
                shost->max_channel = 1;
                shost->max_cmd_len = 16;
-               shost->no_write_same = 1;
 
                /* Schedule policy is determined by ->qc_defer()
                 * callback and it needs to see every deferred qc.
index 9f27b14009f9e0f9b34b095b99dfeae76f5d72c4..1611e0e8d767816d86127f007f36818ab8a04434 100644 (file)
@@ -347,10 +347,8 @@ static int at91sam9_smc_fields_init(struct device *dev)
 
        field.reg = AT91SAM9_SMC_MODE(AT91SAM9_SMC_GENERIC);
        fields.mode = devm_regmap_field_alloc(dev, smc, field);
-       if (IS_ERR(fields.mode))
-               return PTR_ERR(fields.mode);
 
-       return 0;
+       return PTR_ERR_OR_ZERO(fields.mode);
 }
 
 static int pata_at91_probe(struct platform_device *pdev)
index 27245957eee3cd906f546d67853d2ebd6ce54d30..475a006694273bed1262dd457e8a9f683ba0367e 100644 (file)
@@ -152,8 +152,7 @@ static void octeon_cf_set_piomode(struct ata_port *ap, struct ata_device *dev)
                div = 8;
        T = (int)((1000000000000LL * div) / octeon_get_io_clock_rate());
 
-       if (ata_timing_compute(dev, dev->pio_mode, &timing, T, T))
-               BUG();
+       BUG_ON(ata_timing_compute(dev, dev->pio_mode, &timing, T, T));
 
        t1 = timing.setup;
        if (t1)
index 745489a1c86ab2c11701b124f8998126608c398a..efc48bf89d5182a6b4b4150b522fabc2cc1844ee 100644 (file)
@@ -1727,15 +1727,13 @@ static int mv_port_start(struct ata_port *ap)
                return -ENOMEM;
        ap->private_data = pp;
 
-       pp->crqb = dma_pool_alloc(hpriv->crqb_pool, GFP_KERNEL, &pp->crqb_dma);
+       pp->crqb = dma_pool_zalloc(hpriv->crqb_pool, GFP_KERNEL, &pp->crqb_dma);
        if (!pp->crqb)
                return -ENOMEM;
-       memset(pp->crqb, 0, MV_CRQB_Q_SZ);
 
-       pp->crpb = dma_pool_alloc(hpriv->crpb_pool, GFP_KERNEL, &pp->crpb_dma);
+       pp->crpb = dma_pool_zalloc(hpriv->crpb_pool, GFP_KERNEL, &pp->crpb_dma);
        if (!pp->crpb)
                goto out_port_free_dma_mem;
-       memset(pp->crpb, 0, MV_CRPB_Q_SZ);
 
        /* 6041/6081 Rev. "C0" (and newer) are okay with async notify */
        if (hpriv->hp_flags & MV_HP_ERRATA_60X1C0)
index 6ac2b2b1e8dec204921fa3cd3d3383f45a3a2794..5ad037c07ec7201b23233d53faef2d9f2e5f5dac 100644 (file)
@@ -584,7 +584,7 @@ static ssize_t hardware_show(struct device *dev, struct device_attribute *attr,
        return sprintf(buf, "%d\n", data32);
 }
 
-static DEVICE_ATTR(console, 0644, console_show, console_store);
+static DEVICE_ATTR_RW(console);
 
 
 #define SOLOS_ATTR_RO(x) static DEVICE_ATTR(x, 0444, solos_param_show, NULL);
index c07e725ea93db3ffdffb933917b0be456911b724..10e1b9eee10eaf6ab4e5ac82b81d2ecfea83c10a 100644 (file)
@@ -119,4 +119,13 @@ config CFAG12864B_RATE
          If you compile this as a module, you can still override this
          value using the module parameters.
 
+config IMG_ASCII_LCD
+       tristate "Imagination Technologies ASCII LCD Display"
+       default y if MIPS_MALTA || MIPS_SEAD3
+       select SYSCON
+       help
+         Enable this to support the simple ASCII LCD displays found on
+         development boards such as the MIPS Boston, MIPS Malta & MIPS SEAD3
+         from Imagination Technologies.
+
 endif # AUXDISPLAY
index 8a8936a468b9f3acf446a5f250da8e73c629b6ab..3127175c89df53769ea7cdf50a1e2ed13f216fa2 100644 (file)
@@ -4,3 +4,4 @@
 
 obj-$(CONFIG_KS0108)           += ks0108.o
 obj-$(CONFIG_CFAG12864B)       += cfag12864b.o cfag12864bfb.o
+obj-$(CONFIG_IMG_ASCII_LCD)    += img-ascii-lcd.o
diff --git a/drivers/auxdisplay/img-ascii-lcd.c b/drivers/auxdisplay/img-ascii-lcd.c
new file mode 100644 (file)
index 0000000..bf43b5d
--- /dev/null
@@ -0,0 +1,443 @@
+/*
+ * Copyright (C) 2016 Imagination Technologies
+ * Author: Paul Burton <paul.burton@imgtec.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ */
+
+#include <generated/utsrelease.h>
+#include <linux/kernel.h>
+#include <linux/io.h>
+#include <linux/mfd/syscon.h>
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+#include <linux/sysfs.h>
+
+struct img_ascii_lcd_ctx;
+
+/**
+ * struct img_ascii_lcd_config - Configuration information about an LCD model
+ * @num_chars: the number of characters the LCD can display
+ * @external_regmap: true if registers are in a system controller, else false
+ * @update: function called to update the LCD
+ */
+struct img_ascii_lcd_config {
+       unsigned int num_chars;
+       bool external_regmap;
+       void (*update)(struct img_ascii_lcd_ctx *ctx);
+};
+
+/**
+ * struct img_ascii_lcd_ctx - Private data structure
+ * @pdev: the ASCII LCD platform device
+ * @base: the base address of the LCD registers
+ * @regmap: the regmap through which LCD registers are accessed
+ * @offset: the offset within regmap to the start of the LCD registers
+ * @cfg: pointer to the LCD model configuration
+ * @message: the full message to display or scroll on the LCD
+ * @message_len: the length of the @message string
+ * @scroll_pos: index of the first character of @message currently displayed
+ * @scroll_rate: scroll interval in jiffies
+ * @timer: timer used to implement scrolling
+ * @curr: the string currently displayed on the LCD
+ */
+struct img_ascii_lcd_ctx {
+       struct platform_device *pdev;
+       union {
+               void __iomem *base;
+               struct regmap *regmap;
+       };
+       u32 offset;
+       const struct img_ascii_lcd_config *cfg;
+       char *message;
+       unsigned int message_len;
+       unsigned int scroll_pos;
+       unsigned int scroll_rate;
+       struct timer_list timer;
+       char curr[] __aligned(8);
+};
+
+/*
+ * MIPS Boston development board
+ */
+
+static void boston_update(struct img_ascii_lcd_ctx *ctx)
+{
+       ulong val;
+
+#if BITS_PER_LONG == 64
+       val = *((u64 *)&ctx->curr[0]);
+       __raw_writeq(val, ctx->base);
+#elif BITS_PER_LONG == 32
+       val = *((u32 *)&ctx->curr[0]);
+       __raw_writel(val, ctx->base);
+       val = *((u32 *)&ctx->curr[4]);
+       __raw_writel(val, ctx->base + 4);
+#else
+# error Not 32 or 64 bit
+#endif
+}
+
+static struct img_ascii_lcd_config boston_config = {
+       .num_chars = 8,
+       .update = boston_update,
+};
+
+/*
+ * MIPS Malta development board
+ */
+
+static void malta_update(struct img_ascii_lcd_ctx *ctx)
+{
+       unsigned int i;
+       int err;
+
+       for (i = 0; i < ctx->cfg->num_chars; i++) {
+               err = regmap_write(ctx->regmap,
+                                  ctx->offset + (i * 8), ctx->curr[i]);
+               if (err)
+                       break;
+       }
+
+       if (unlikely(err))
+               pr_err_ratelimited("Failed to update LCD display: %d\n", err);
+}
+
+static struct img_ascii_lcd_config malta_config = {
+       .num_chars = 8,
+       .external_regmap = true,
+       .update = malta_update,
+};
+
+/*
+ * MIPS SEAD3 development board
+ */
+
+enum {
+       SEAD3_REG_LCD_CTRL              = 0x00,
+#define SEAD3_REG_LCD_CTRL_SETDRAM     BIT(7)
+       SEAD3_REG_LCD_DATA              = 0x08,
+       SEAD3_REG_CPLD_STATUS           = 0x10,
+#define SEAD3_REG_CPLD_STATUS_BUSY     BIT(0)
+       SEAD3_REG_CPLD_DATA             = 0x18,
+#define SEAD3_REG_CPLD_DATA_BUSY       BIT(7)
+};
+
+static int sead3_wait_sm_idle(struct img_ascii_lcd_ctx *ctx)
+{
+       unsigned int status;
+       int err;
+
+       do {
+               err = regmap_read(ctx->regmap,
+                                 ctx->offset + SEAD3_REG_CPLD_STATUS,
+                                 &status);
+               if (err)
+                       return err;
+       } while (status & SEAD3_REG_CPLD_STATUS_BUSY);
+
+       return 0;
+
+}
+
+static int sead3_wait_lcd_idle(struct img_ascii_lcd_ctx *ctx)
+{
+       unsigned int cpld_data;
+       int err;
+
+       err = sead3_wait_sm_idle(ctx);
+       if (err)
+               return err;
+
+       do {
+               err = regmap_read(ctx->regmap,
+                                 ctx->offset + SEAD3_REG_LCD_CTRL,
+                                 &cpld_data);
+               if (err)
+                       return err;
+
+               err = sead3_wait_sm_idle(ctx);
+               if (err)
+                       return err;
+
+               err = regmap_read(ctx->regmap,
+                                 ctx->offset + SEAD3_REG_CPLD_DATA,
+                                 &cpld_data);
+               if (err)
+                       return err;
+       } while (cpld_data & SEAD3_REG_CPLD_DATA_BUSY);
+
+       return 0;
+}
+
+static void sead3_update(struct img_ascii_lcd_ctx *ctx)
+{
+       unsigned int i;
+       int err;
+
+       for (i = 0; i < ctx->cfg->num_chars; i++) {
+               err = sead3_wait_lcd_idle(ctx);
+               if (err)
+                       break;
+
+               err = regmap_write(ctx->regmap,
+                                  ctx->offset + SEAD3_REG_LCD_CTRL,
+                                  SEAD3_REG_LCD_CTRL_SETDRAM | i);
+               if (err)
+                       break;
+
+               err = sead3_wait_lcd_idle(ctx);
+               if (err)
+                       break;
+
+               err = regmap_write(ctx->regmap,
+                                  ctx->offset + SEAD3_REG_LCD_DATA,
+                                  ctx->curr[i]);
+               if (err)
+                       break;
+       }
+
+       if (unlikely(err))
+               pr_err_ratelimited("Failed to update LCD display: %d\n", err);
+}
+
+static struct img_ascii_lcd_config sead3_config = {
+       .num_chars = 16,
+       .external_regmap = true,
+       .update = sead3_update,
+};
+
+static const struct of_device_id img_ascii_lcd_matches[] = {
+       { .compatible = "img,boston-lcd", .data = &boston_config },
+       { .compatible = "mti,malta-lcd", .data = &malta_config },
+       { .compatible = "mti,sead3-lcd", .data = &sead3_config },
+};
+
+/**
+ * img_ascii_lcd_scroll() - scroll the display by a character
+ * @arg: really a pointer to the private data structure
+ *
+ * Scroll the current message along the LCD by one character, rearming the
+ * timer if required.
+ */
+static void img_ascii_lcd_scroll(unsigned long arg)
+{
+       struct img_ascii_lcd_ctx *ctx = (struct img_ascii_lcd_ctx *)arg;
+       unsigned int i, ch = ctx->scroll_pos;
+       unsigned int num_chars = ctx->cfg->num_chars;
+
+       /* update the current message string */
+       for (i = 0; i < num_chars;) {
+               /* copy as many characters from the string as possible */
+               for (; i < num_chars && ch < ctx->message_len; i++, ch++)
+                       ctx->curr[i] = ctx->message[ch];
+
+               /* wrap around to the start of the string */
+               ch = 0;
+       }
+
+       /* update the LCD */
+       ctx->cfg->update(ctx);
+
+       /* move on to the next character */
+       ctx->scroll_pos++;
+       ctx->scroll_pos %= ctx->message_len;
+
+       /* rearm the timer */
+       if (ctx->message_len > ctx->cfg->num_chars)
+               mod_timer(&ctx->timer, jiffies + ctx->scroll_rate);
+}
+
+/**
+ * img_ascii_lcd_display() - set the message to be displayed
+ * @ctx: pointer to the private data structure
+ * @msg: the message to display
+ * @count: length of msg, or -1
+ *
+ * Display a new message @msg on the LCD. @msg can be longer than the number of
+ * characters the LCD can display, in which case it will begin scrolling across
+ * the LCD display.
+ *
+ * Return: 0 on success, -ENOMEM on memory allocation failure
+ */
+static int img_ascii_lcd_display(struct img_ascii_lcd_ctx *ctx,
+                            const char *msg, ssize_t count)
+{
+       char *new_msg;
+
+       /* stop the scroll timer */
+       del_timer_sync(&ctx->timer);
+
+       if (count == -1)
+               count = strlen(msg);
+
+       /* if the string ends with a newline, trim it */
+       if (msg[count - 1] == '\n')
+               count--;
+
+       new_msg = devm_kmalloc(&ctx->pdev->dev, count + 1, GFP_KERNEL);
+       if (!new_msg)
+               return -ENOMEM;
+
+       memcpy(new_msg, msg, count);
+       new_msg[count] = 0;
+
+       if (ctx->message)
+               devm_kfree(&ctx->pdev->dev, ctx->message);
+
+       ctx->message = new_msg;
+       ctx->message_len = count;
+       ctx->scroll_pos = 0;
+
+       /* update the LCD */
+       img_ascii_lcd_scroll((unsigned long)ctx);
+
+       return 0;
+}
+
+/**
+ * message_show() - read message via sysfs
+ * @dev: the LCD device
+ * @attr: the LCD message attribute
+ * @buf: the buffer to read the message into
+ *
+ * Read the current message being displayed or scrolled across the LCD display
+ * into @buf, for reads from sysfs.
+ *
+ * Return: the number of characters written to @buf
+ */
+static ssize_t message_show(struct device *dev, struct device_attribute *attr,
+                           char *buf)
+{
+       struct img_ascii_lcd_ctx *ctx = dev_get_drvdata(dev);
+
+       return sprintf(buf, "%s\n", ctx->message);
+}
+
+/**
+ * message_store() - write a new message via sysfs
+ * @dev: the LCD device
+ * @attr: the LCD message attribute
+ * @buf: the buffer containing the new message
+ * @count: the size of the message in @buf
+ *
+ * Write a new message to display or scroll across the LCD display from sysfs.
+ *
+ * Return: the size of the message on success, else -ERRNO
+ */
+static ssize_t message_store(struct device *dev, struct device_attribute *attr,
+                            const char *buf, size_t count)
+{
+       struct img_ascii_lcd_ctx *ctx = dev_get_drvdata(dev);
+       int err;
+
+       err = img_ascii_lcd_display(ctx, buf, count);
+       return err ?: count;
+}
+
+static DEVICE_ATTR_RW(message);
+
+/**
+ * img_ascii_lcd_probe() - probe an LCD display device
+ * @pdev: the LCD platform device
+ *
+ * Probe an LCD display device, ensuring that we have the required resources in
+ * order to access the LCD & setting up private data as well as sysfs files.
+ *
+ * Return: 0 on success, else -ERRNO
+ */
+static int img_ascii_lcd_probe(struct platform_device *pdev)
+{
+       const struct of_device_id *match;
+       const struct img_ascii_lcd_config *cfg;
+       struct img_ascii_lcd_ctx *ctx;
+       struct resource *res;
+       int err;
+
+       match = of_match_device(img_ascii_lcd_matches, &pdev->dev);
+       if (!match)
+               return -ENODEV;
+
+       cfg = match->data;
+       ctx = devm_kzalloc(&pdev->dev, sizeof(*ctx) + cfg->num_chars,
+                          GFP_KERNEL);
+       if (!ctx)
+               return -ENOMEM;
+
+       if (cfg->external_regmap) {
+               ctx->regmap = syscon_node_to_regmap(pdev->dev.parent->of_node);
+               if (IS_ERR(ctx->regmap))
+                       return PTR_ERR(ctx->regmap);
+
+               if (of_property_read_u32(pdev->dev.of_node, "offset",
+                                        &ctx->offset))
+                       return -EINVAL;
+       } else {
+               res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+               ctx->base = devm_ioremap_resource(&pdev->dev, res);
+               if (IS_ERR(ctx->base))
+                       return PTR_ERR(ctx->base);
+       }
+
+       ctx->pdev = pdev;
+       ctx->cfg = cfg;
+       ctx->message = NULL;
+       ctx->scroll_pos = 0;
+       ctx->scroll_rate = HZ / 2;
+
+       /* initialise a timer for scrolling the message */
+       init_timer(&ctx->timer);
+       ctx->timer.function = img_ascii_lcd_scroll;
+       ctx->timer.data = (unsigned long)ctx;
+
+       platform_set_drvdata(pdev, ctx);
+
+       /* display a default message */
+       err = img_ascii_lcd_display(ctx, "Linux " UTS_RELEASE "       ", -1);
+       if (err)
+               goto out_del_timer;
+
+       err = device_create_file(&pdev->dev, &dev_attr_message);
+       if (err)
+               goto out_del_timer;
+
+       return 0;
+out_del_timer:
+       del_timer_sync(&ctx->timer);
+       return err;
+}
+
+/**
+ * img_ascii_lcd_remove() - remove an LCD display device
+ * @pdev: the LCD platform device
+ *
+ * Remove an LCD display device, freeing private resources & ensuring that the
+ * driver stops using the LCD display registers.
+ *
+ * Return: 0
+ */
+static int img_ascii_lcd_remove(struct platform_device *pdev)
+{
+       struct img_ascii_lcd_ctx *ctx = platform_get_drvdata(pdev);
+
+       device_remove_file(&pdev->dev, &dev_attr_message);
+       del_timer_sync(&ctx->timer);
+       return 0;
+}
+
+static struct platform_driver img_ascii_lcd_driver = {
+       .driver = {
+               .name           = "img-ascii-lcd",
+               .of_match_table = img_ascii_lcd_matches,
+       },
+       .probe  = img_ascii_lcd_probe,
+       .remove = img_ascii_lcd_remove,
+};
+module_platform_driver(img_ascii_lcd_driver);
index fdf44cac08e6d0026dab6095f46b55924cd89b1d..d02e7c0f5bfdff1c6c56372113e230e55be54f5f 100644 (file)
@@ -213,14 +213,16 @@ config DEBUG_DEVRES
          If you are unsure about this, Say N here.
 
 config DEBUG_TEST_DRIVER_REMOVE
-       bool "Test driver remove calls during probe"
+       bool "Test driver remove calls during probe (UNSTABLE)"
        depends on DEBUG_KERNEL
        help
          Say Y here if you want the Driver core to test driver remove functions
          by calling probe, remove, probe. This tests the remove path without
          having to unbind the driver or unload the driver module.
 
-         If you are unsure about this, say N here.
+         This option is expected to find errors and may render your system
+         unusable. You should say N here unless you are explicitly looking to
+         test this functionality.
 
 config SYS_HYPERVISOR
        bool
index d22a7260f42b26f498037f35c943b547300181dc..d76cd97a98b6badff85740180dfec97c8966754a 100644 (file)
@@ -324,7 +324,8 @@ static int really_probe(struct device *dev, struct device_driver *drv)
 {
        int ret = -EPROBE_DEFER;
        int local_trigger_count = atomic_read(&deferred_trigger_count);
-       bool test_remove = IS_ENABLED(CONFIG_DEBUG_TEST_DRIVER_REMOVE);
+       bool test_remove = IS_ENABLED(CONFIG_DEBUG_TEST_DRIVER_REMOVE) &&
+                          !drv->suppress_bind_attrs;
 
        if (defer_all_probes) {
                /*
@@ -383,7 +384,7 @@ re_probe:
        if (test_remove) {
                test_remove = false;
 
-               if (dev->bus && dev->bus->remove)
+               if (dev->bus->remove)
                        dev->bus->remove(dev);
                else if (drv->remove)
                        drv->remove(dev);
index 8fc654f0807bff66c2473b88010e67fe5ab0cb36..71d577025285bb816a46973877099fea7747e5d4 100644 (file)
@@ -10,6 +10,7 @@
 #include <linux/device.h>
 #include <linux/module.h>
 #include <linux/slab.h>
+#include <linux/percpu.h>
 
 #include "base.h"
 
@@ -985,3 +986,68 @@ void devm_free_pages(struct device *dev, unsigned long addr)
                               &devres));
 }
 EXPORT_SYMBOL_GPL(devm_free_pages);
+
+static void devm_percpu_release(struct device *dev, void *pdata)
+{
+       void __percpu *p;
+
+       p = *(void __percpu **)pdata;
+       free_percpu(p);
+}
+
+static int devm_percpu_match(struct device *dev, void *data, void *p)
+{
+       struct devres *devr = container_of(data, struct devres, data);
+
+       return *(void **)devr->data == p;
+}
+
+/**
+ * __devm_alloc_percpu - Resource-managed alloc_percpu
+ * @dev: Device to allocate per-cpu memory for
+ * @size: Size of per-cpu memory to allocate
+ * @align: Alignment of per-cpu memory to allocate
+ *
+ * Managed alloc_percpu. Per-cpu memory allocated with this function is
+ * automatically freed on driver detach.
+ *
+ * RETURNS:
+ * Pointer to allocated memory on success, NULL on failure.
+ */
+void __percpu *__devm_alloc_percpu(struct device *dev, size_t size,
+               size_t align)
+{
+       void *p;
+       void __percpu *pcpu;
+
+       pcpu = __alloc_percpu(size, align);
+       if (!pcpu)
+               return NULL;
+
+       p = devres_alloc(devm_percpu_release, sizeof(void *), GFP_KERNEL);
+       if (!p) {
+               free_percpu(pcpu);
+               return NULL;
+       }
+
+       *(void __percpu **)p = pcpu;
+
+       devres_add(dev, p);
+
+       return pcpu;
+}
+EXPORT_SYMBOL_GPL(__devm_alloc_percpu);
+
+/**
+ * devm_free_percpu - Resource-managed free_percpu
+ * @dev: Device this memory belongs to
+ * @pdata: Per-cpu memory to free
+ *
+ * Free memory allocated with devm_alloc_percpu().
+ */
+void devm_free_percpu(struct device *dev, void __percpu *pdata)
+{
+       WARN_ON(devres_destroy(dev, devm_percpu_release, devm_percpu_match,
+                              (void *)pdata));
+}
+EXPORT_SYMBOL_GPL(devm_free_percpu);
index e44944f4be77d0a573e4e46850849b52ba09a349..2932a5bd892f7e2b400d5b806c7df36b3cdc30a8 100644 (file)
@@ -1027,6 +1027,8 @@ static int __device_suspend_noirq(struct device *dev, pm_message_t state, bool a
        TRACE_DEVICE(dev);
        TRACE_SUSPEND(0);
 
+       dpm_wait_for_children(dev, async);
+
        if (async_error)
                goto Complete;
 
@@ -1038,8 +1040,6 @@ static int __device_suspend_noirq(struct device *dev, pm_message_t state, bool a
        if (dev->power.syscore || dev->power.direct_complete)
                goto Complete;
 
-       dpm_wait_for_children(dev, async);
-
        if (dev->pm_domain) {
                info = "noirq power domain ";
                callback = pm_noirq_op(&dev->pm_domain->ops, state);
@@ -1174,6 +1174,8 @@ static int __device_suspend_late(struct device *dev, pm_message_t state, bool as
 
        __pm_runtime_disable(dev, false);
 
+       dpm_wait_for_children(dev, async);
+
        if (async_error)
                goto Complete;
 
@@ -1185,8 +1187,6 @@ static int __device_suspend_late(struct device *dev, pm_message_t state, bool as
        if (dev->power.syscore || dev->power.direct_complete)
                goto Complete;
 
-       dpm_wait_for_children(dev, async);
-
        if (dev->pm_domain) {
                info = "late power domain ";
                callback = pm_late_early_op(&dev->pm_domain->ops, state);
index 811e11c82f32907a98a02a442ecb76cb1da471fb..0809cda93cc031360257c7f0802964e15f0f0d51 100644 (file)
@@ -2954,7 +2954,7 @@ DAC960_DetectController(struct pci_dev *PCI_Device,
        case DAC960_PD_Controller:
          if (!request_region(Controller->IO_Address, 0x80,
                              Controller->FullModelName)) {
-               DAC960_Error("IO port 0x%d busy for Controller at\n",
+               DAC960_Error("IO port 0x%lx busy for Controller at\n",
                             Controller, Controller->IO_Address);
                goto Failure;
          }
@@ -2990,7 +2990,7 @@ DAC960_DetectController(struct pci_dev *PCI_Device,
        case DAC960_P_Controller:
          if (!request_region(Controller->IO_Address, 0x80,
                              Controller->FullModelName)){
-               DAC960_Error("IO port 0x%d busy for Controller at\n",
+               DAC960_Error("IO port 0x%lx busy for Controller at\n",
                             Controller, Controller->IO_Address);
                goto Failure;
          }
index ab19adb07a126ae0cae4f0d37170a2929367b115..3c606c09fd5acbd2897c680c3249929f30b6a9a8 100644 (file)
@@ -853,45 +853,6 @@ rqbiocnt(struct request *r)
        return n;
 }
 
-/* This can be removed if we are certain that no users of the block
- * layer will ever use zero-count pages in bios.  Otherwise we have to
- * protect against the put_page sometimes done by the network layer.
- *
- * See http://oss.sgi.com/archives/xfs/2007-01/msg00594.html for
- * discussion.
- *
- * We cannot use get_page in the workaround, because it insists on a
- * positive page count as a precondition.  So we use _refcount directly.
- */
-static void
-bio_pageinc(struct bio *bio)
-{
-       struct bio_vec bv;
-       struct page *page;
-       struct bvec_iter iter;
-
-       bio_for_each_segment(bv, bio, iter) {
-               /* Non-zero page count for non-head members of
-                * compound pages is no longer allowed by the kernel.
-                */
-               page = compound_head(bv.bv_page);
-               page_ref_inc(page);
-       }
-}
-
-static void
-bio_pagedec(struct bio *bio)
-{
-       struct page *page;
-       struct bio_vec bv;
-       struct bvec_iter iter;
-
-       bio_for_each_segment(bv, bio, iter) {
-               page = compound_head(bv.bv_page);
-               page_ref_dec(page);
-       }
-}
-
 static void
 bufinit(struct buf *buf, struct request *rq, struct bio *bio)
 {
@@ -899,7 +860,6 @@ bufinit(struct buf *buf, struct request *rq, struct bio *bio)
        buf->rq = rq;
        buf->bio = bio;
        buf->iter = bio->bi_iter;
-       bio_pageinc(bio);
 }
 
 static struct buf *
@@ -1127,7 +1087,6 @@ aoe_end_buf(struct aoedev *d, struct buf *buf)
        if (buf == d->ip.buf)
                d->ip.buf = NULL;
        rq = buf->rq;
-       bio_pagedec(buf->bio);
        mempool_free(buf, d->bufpool);
        n = (unsigned long) rq->special;
        rq->special = (void *) --n;
index 100be556e6137ce1e5731fd68eec5b317ed3bcc8..83482721bc012739cf25ee627fd2b85b2fd094ab 100644 (file)
@@ -1871,7 +1871,7 @@ int drbd_send(struct drbd_connection *connection, struct socket *sock,
                drbd_update_congested(connection);
        }
        do {
-               rv = kernel_sendmsg(sock, &msg, &iov, 1, size);
+               rv = kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len);
                if (rv == -EAGAIN) {
                        if (we_should_drop_the_connection(connection, sock))
                                break;
index ba405b55329fb7d784213ea582eae429f05c2bc2..7a104875591400a896ab3025e5feb4e37ec3e86a 100644 (file)
@@ -164,7 +164,7 @@ static void sock_shutdown(struct nbd_device *nbd)
        spin_lock(&nbd->sock_lock);
 
        if (!nbd->sock) {
-               spin_unlock_irq(&nbd->sock_lock);
+               spin_unlock(&nbd->sock_lock);
                return;
        }
 
@@ -599,7 +599,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
                        return -EINVAL;
 
                sreq = blk_mq_alloc_request(bdev_get_queue(bdev), WRITE, 0);
-               if (!sreq)
+               if (IS_ERR(sreq))
                        return -ENOMEM;
 
                mutex_unlock(&nbd->tx_lock);
index abb71628ab614628d62acd3d16a91ee9eae8c9b4..7b274ff4632c6944e32b95d605ee9aa9769f7082 100644 (file)
@@ -415,15 +415,15 @@ struct rbd_device {
 };
 
 /*
- * Flag bits for rbd_dev->flags.  If atomicity is required,
- * rbd_dev->lock is used to protect access.
- *
- * Currently, only the "removing" flag (which is coupled with the
- * "open_count" field) requires atomic access.
+ * Flag bits for rbd_dev->flags:
+ * - REMOVING (which is coupled with rbd_dev->open_count) is protected
+ *   by rbd_dev->lock
+ * - BLACKLISTED is protected by rbd_dev->lock_rwsem
  */
 enum rbd_dev_flags {
        RBD_DEV_FLAG_EXISTS,    /* mapped snapshot has not been deleted */
        RBD_DEV_FLAG_REMOVING,  /* this mapping is being removed */
+       RBD_DEV_FLAG_BLACKLISTED, /* our ceph_client is blacklisted */
 };
 
 static DEFINE_MUTEX(client_mutex);     /* Serialize client creation */
@@ -3926,6 +3926,7 @@ static void rbd_reregister_watch(struct work_struct *work)
        struct rbd_device *rbd_dev = container_of(to_delayed_work(work),
                                            struct rbd_device, watch_dwork);
        bool was_lock_owner = false;
+       bool need_to_wake = false;
        int ret;
 
        dout("%s rbd_dev %p\n", __func__, rbd_dev);
@@ -3935,19 +3936,27 @@ static void rbd_reregister_watch(struct work_struct *work)
                was_lock_owner = rbd_release_lock(rbd_dev);
 
        mutex_lock(&rbd_dev->watch_mutex);
-       if (rbd_dev->watch_state != RBD_WATCH_STATE_ERROR)
-               goto fail_unlock;
+       if (rbd_dev->watch_state != RBD_WATCH_STATE_ERROR) {
+               mutex_unlock(&rbd_dev->watch_mutex);
+               goto out;
+       }
 
        ret = __rbd_register_watch(rbd_dev);
        if (ret) {
                rbd_warn(rbd_dev, "failed to reregister watch: %d", ret);
-               if (ret != -EBLACKLISTED)
+               if (ret == -EBLACKLISTED || ret == -ENOENT) {
+                       set_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags);
+                       need_to_wake = true;
+               } else {
                        queue_delayed_work(rbd_dev->task_wq,
                                           &rbd_dev->watch_dwork,
                                           RBD_RETRY_DELAY);
-               goto fail_unlock;
+               }
+               mutex_unlock(&rbd_dev->watch_mutex);
+               goto out;
        }
 
+       need_to_wake = true;
        rbd_dev->watch_state = RBD_WATCH_STATE_REGISTERED;
        rbd_dev->watch_cookie = rbd_dev->watch_handle->linger_id;
        mutex_unlock(&rbd_dev->watch_mutex);
@@ -3963,13 +3972,10 @@ static void rbd_reregister_watch(struct work_struct *work)
                                 ret);
        }
 
+out:
        up_write(&rbd_dev->lock_rwsem);
-       wake_requests(rbd_dev, true);
-       return;
-
-fail_unlock:
-       mutex_unlock(&rbd_dev->watch_mutex);
-       up_write(&rbd_dev->lock_rwsem);
+       if (need_to_wake)
+               wake_requests(rbd_dev, true);
 }
 
 /*
@@ -4074,7 +4080,9 @@ static void rbd_wait_state_locked(struct rbd_device *rbd_dev)
                up_read(&rbd_dev->lock_rwsem);
                schedule();
                down_read(&rbd_dev->lock_rwsem);
-       } while (rbd_dev->lock_state != RBD_LOCK_STATE_LOCKED);
+       } while (rbd_dev->lock_state != RBD_LOCK_STATE_LOCKED &&
+                !test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags));
+
        finish_wait(&rbd_dev->lock_waitq, &wait);
 }
 
@@ -4166,8 +4174,16 @@ static void rbd_queue_workfn(struct work_struct *work)
 
        if (must_be_locked) {
                down_read(&rbd_dev->lock_rwsem);
-               if (rbd_dev->lock_state != RBD_LOCK_STATE_LOCKED)
+               if (rbd_dev->lock_state != RBD_LOCK_STATE_LOCKED &&
+                   !test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags))
                        rbd_wait_state_locked(rbd_dev);
+
+               WARN_ON((rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED) ^
+                       !test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags));
+               if (test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags)) {
+                       result = -EBLACKLISTED;
+                       goto err_unlock;
+               }
        }
 
        img_request = rbd_img_request_create(rbd_dev, offset, length, op_type,
index 2dc5c96c186aa3455ea124aa2bb824e889e1e15f..5545a679abd8887123fc83d57beb37dede77a685 100644 (file)
@@ -376,7 +376,7 @@ static void virtblk_config_changed(struct virtio_device *vdev)
 
 static int init_vq(struct virtio_blk *vblk)
 {
-       int err = 0;
+       int err;
        int i;
        vq_callback_t **callbacks;
        const char **names;
@@ -390,13 +390,13 @@ static int init_vq(struct virtio_blk *vblk)
        if (err)
                num_vqs = 1;
 
-       vblk->vqs = kmalloc(sizeof(*vblk->vqs) * num_vqs, GFP_KERNEL);
+       vblk->vqs = kmalloc_array(num_vqs, sizeof(*vblk->vqs), GFP_KERNEL);
        if (!vblk->vqs)
                return -ENOMEM;
 
-       names = kmalloc(sizeof(*names) * num_vqs, GFP_KERNEL);
-       callbacks = kmalloc(sizeof(*callbacks) * num_vqs, GFP_KERNEL);
-       vqs = kmalloc(sizeof(*vqs) * num_vqs, GFP_KERNEL);
+       names = kmalloc_array(num_vqs, sizeof(*names), GFP_KERNEL);
+       callbacks = kmalloc_array(num_vqs, sizeof(*callbacks), GFP_KERNEL);
+       vqs = kmalloc_array(num_vqs, sizeof(*vqs), GFP_KERNEL);
        if (!names || !callbacks || !vqs) {
                err = -ENOMEM;
                goto out;
index ef51c9c864c59e2ae0cfa89f2dede3b282d4830e..b6bb58c41df5b7c553e6bfd05d7c8ce3adece509 100644 (file)
@@ -310,7 +310,7 @@ static int bt_ti_probe(struct platform_device *pdev)
        BT_DBG("HCI device registered (hdev %p)", hdev);
 
        dev_set_drvdata(&pdev->dev, hst);
-       return err;
+       return 0;
 }
 
 static int bt_ti_remove(struct platform_device *pdev)
index 5ccb90ef0146e5f324dd6da2590bea6a19a37521..8f6c23c20c52d83b4097dabfa64b82d6b360cf4b 100644 (file)
@@ -643,6 +643,14 @@ static const struct dmi_system_id bcm_wrong_irq_dmi_table[] = {
                },
                .driver_data = &acpi_active_low,
        },
+       {       /* Handle ThinkPad 8 tablets with BCM2E55 chipset ACPI ID */
+               .ident = "Lenovo ThinkPad 8",
+               .matches = {
+                       DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+                       DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "ThinkPad 8"),
+               },
+               .driver_data = &acpi_active_low,
+       },
        { }
 };
 
index 7010dcac93288aa1984ea9d7d232bdd163e4d37a..78751057164aedfe29b270a69d10c7c6f7b3b73a 100644 (file)
@@ -111,6 +111,7 @@ config OMAP_OCP2SCP
 config QCOM_EBI2
        bool "Qualcomm External Bus Interface 2 (EBI2)"
        depends on HAS_IOMEM
+       depends on ARCH_QCOM || COMPILE_TEST
        help
          Say y here to enable support for the Qualcomm External Bus
          Interface 2, which can be used to connect things like NAND Flash,
index 482794526e8cd52418dc1c1e6e5b200943d3c4c8..d2d2c89de5b4428e627eb06d9733ec1bcf2c2b0d 100644 (file)
@@ -84,14 +84,14 @@ static size_t rng_buffer_size(void)
 
 static void add_early_randomness(struct hwrng *rng)
 {
-       unsigned char bytes[16];
        int bytes_read;
+       size_t size = min_t(size_t, 16, rng_buffer_size());
 
        mutex_lock(&reading_mutex);
-       bytes_read = rng_get_data(rng, bytes, sizeof(bytes), 1);
+       bytes_read = rng_get_data(rng, rng_buffer, size, 1);
        mutex_unlock(&reading_mutex);
        if (bytes_read > 0)
-               add_device_randomness(bytes, bytes_read);
+               add_device_randomness(rng_buffer, bytes_read);
 }
 
 static inline void cleanup_rng(struct kref *kref)
index 5a9350b1069a3495b2d1e4d116fcb2d780bbd9d2..7f816655cbbfafc9ea3f16097ba4a6f8a0f4b91b 100644 (file)
@@ -76,3 +76,11 @@ config IPMI_POWEROFF
         the IPMI management controller is capable of this.
 
 endif # IPMI_HANDLER
+
+config ASPEED_BT_IPMI_BMC
+       depends on ARCH_ASPEED
+       tristate "BT IPMI bmc driver"
+       help
+         Provides a driver for the BT (Block Transfer) IPMI interface
+         found on Aspeed SOCs (AST2400 and AST2500). The driver
+         implements the BMC side of the BT interface.
index f3ffde1f5f1f53c3c702458b5e2d33f84866e0b9..0d98cd91def1dba3f70f8aae98d01bc04522c8b1 100644 (file)
@@ -11,3 +11,4 @@ obj-$(CONFIG_IPMI_SSIF) += ipmi_ssif.o
 obj-$(CONFIG_IPMI_POWERNV) += ipmi_powernv.o
 obj-$(CONFIG_IPMI_WATCHDOG) += ipmi_watchdog.o
 obj-$(CONFIG_IPMI_POWEROFF) += ipmi_poweroff.o
+obj-$(CONFIG_ASPEED_BT_IPMI_BMC) += bt-bmc.o
diff --git a/drivers/char/ipmi/bt-bmc.c b/drivers/char/ipmi/bt-bmc.c
new file mode 100644 (file)
index 0000000..fc9e889
--- /dev/null
@@ -0,0 +1,505 @@
+/*
+ * Copyright (c) 2015-2016, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/atomic.h>
+#include <linux/bt-bmc.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/poll.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+
+/*
+ * This is a BMC device used to communicate to the host
+ */
+#define DEVICE_NAME    "ipmi-bt-host"
+
+#define BT_IO_BASE     0xe4
+#define BT_IRQ         10
+
+#define BT_CR0         0x0
+#define   BT_CR0_IO_BASE               16
+#define   BT_CR0_IRQ                   12
+#define   BT_CR0_EN_CLR_SLV_RDP                0x8
+#define   BT_CR0_EN_CLR_SLV_WRP                0x4
+#define   BT_CR0_ENABLE_IBT            0x1
+#define BT_CR1         0x4
+#define   BT_CR1_IRQ_H2B       0x01
+#define   BT_CR1_IRQ_HBUSY     0x40
+#define BT_CR2         0x8
+#define   BT_CR2_IRQ_H2B       0x01
+#define   BT_CR2_IRQ_HBUSY     0x40
+#define BT_CR3         0xc
+#define BT_CTRL                0x10
+#define   BT_CTRL_B_BUSY               0x80
+#define   BT_CTRL_H_BUSY               0x40
+#define   BT_CTRL_OEM0                 0x20
+#define   BT_CTRL_SMS_ATN              0x10
+#define   BT_CTRL_B2H_ATN              0x08
+#define   BT_CTRL_H2B_ATN              0x04
+#define   BT_CTRL_CLR_RD_PTR           0x02
+#define   BT_CTRL_CLR_WR_PTR           0x01
+#define BT_BMC2HOST    0x14
+#define BT_INTMASK     0x18
+#define   BT_INTMASK_B2H_IRQEN         0x01
+#define   BT_INTMASK_B2H_IRQ           0x02
+#define   BT_INTMASK_BMC_HWRST         0x80
+
+#define BT_BMC_BUFFER_SIZE 256
+
+struct bt_bmc {
+       struct device           dev;
+       struct miscdevice       miscdev;
+       void __iomem            *base;
+       int                     irq;
+       wait_queue_head_t       queue;
+       struct timer_list       poll_timer;
+       struct mutex            mutex;
+};
+
+static atomic_t open_count = ATOMIC_INIT(0);
+
+static u8 bt_inb(struct bt_bmc *bt_bmc, int reg)
+{
+       return ioread8(bt_bmc->base + reg);
+}
+
+static void bt_outb(struct bt_bmc *bt_bmc, u8 data, int reg)
+{
+       iowrite8(data, bt_bmc->base + reg);
+}
+
+static void clr_rd_ptr(struct bt_bmc *bt_bmc)
+{
+       bt_outb(bt_bmc, BT_CTRL_CLR_RD_PTR, BT_CTRL);
+}
+
+static void clr_wr_ptr(struct bt_bmc *bt_bmc)
+{
+       bt_outb(bt_bmc, BT_CTRL_CLR_WR_PTR, BT_CTRL);
+}
+
+static void clr_h2b_atn(struct bt_bmc *bt_bmc)
+{
+       bt_outb(bt_bmc, BT_CTRL_H2B_ATN, BT_CTRL);
+}
+
+static void set_b_busy(struct bt_bmc *bt_bmc)
+{
+       if (!(bt_inb(bt_bmc, BT_CTRL) & BT_CTRL_B_BUSY))
+               bt_outb(bt_bmc, BT_CTRL_B_BUSY, BT_CTRL);
+}
+
+static void clr_b_busy(struct bt_bmc *bt_bmc)
+{
+       if (bt_inb(bt_bmc, BT_CTRL) & BT_CTRL_B_BUSY)
+               bt_outb(bt_bmc, BT_CTRL_B_BUSY, BT_CTRL);
+}
+
+static void set_b2h_atn(struct bt_bmc *bt_bmc)
+{
+       bt_outb(bt_bmc, BT_CTRL_B2H_ATN, BT_CTRL);
+}
+
+static u8 bt_read(struct bt_bmc *bt_bmc)
+{
+       return bt_inb(bt_bmc, BT_BMC2HOST);
+}
+
+static ssize_t bt_readn(struct bt_bmc *bt_bmc, u8 *buf, size_t n)
+{
+       int i;
+
+       for (i = 0; i < n; i++)
+               buf[i] = bt_read(bt_bmc);
+       return n;
+}
+
+static void bt_write(struct bt_bmc *bt_bmc, u8 c)
+{
+       bt_outb(bt_bmc, c, BT_BMC2HOST);
+}
+
+static ssize_t bt_writen(struct bt_bmc *bt_bmc, u8 *buf, size_t n)
+{
+       int i;
+
+       for (i = 0; i < n; i++)
+               bt_write(bt_bmc, buf[i]);
+       return n;
+}
+
+static void set_sms_atn(struct bt_bmc *bt_bmc)
+{
+       bt_outb(bt_bmc, BT_CTRL_SMS_ATN, BT_CTRL);
+}
+
+static struct bt_bmc *file_bt_bmc(struct file *file)
+{
+       return container_of(file->private_data, struct bt_bmc, miscdev);
+}
+
+static int bt_bmc_open(struct inode *inode, struct file *file)
+{
+       struct bt_bmc *bt_bmc = file_bt_bmc(file);
+
+       if (atomic_inc_return(&open_count) == 1) {
+               clr_b_busy(bt_bmc);
+               return 0;
+       }
+
+       atomic_dec(&open_count);
+       return -EBUSY;
+}
+
+/*
+ * The BT (Block Transfer) interface means that entire messages are
+ * buffered by the host before a notification is sent to the BMC that
+ * there is data to be read. The first byte is the length and the
+ * message data follows. The read operation just tries to capture the
+ * whole before returning it to userspace.
+ *
+ * BT Message format :
+ *
+ *    Byte 1  Byte 2     Byte 3  Byte 4  Byte 5:N
+ *    Length  NetFn/LUN  Seq     Cmd     Data
+ *
+ */
+static ssize_t bt_bmc_read(struct file *file, char __user *buf,
+                          size_t count, loff_t *ppos)
+{
+       struct bt_bmc *bt_bmc = file_bt_bmc(file);
+       u8 len;
+       int len_byte = 1;
+       u8 kbuffer[BT_BMC_BUFFER_SIZE];
+       ssize_t ret = 0;
+       ssize_t nread;
+
+       if (!access_ok(VERIFY_WRITE, buf, count))
+               return -EFAULT;
+
+       WARN_ON(*ppos);
+
+       if (wait_event_interruptible(bt_bmc->queue,
+                                    bt_inb(bt_bmc, BT_CTRL) & BT_CTRL_H2B_ATN))
+               return -ERESTARTSYS;
+
+       mutex_lock(&bt_bmc->mutex);
+
+       if (unlikely(!(bt_inb(bt_bmc, BT_CTRL) & BT_CTRL_H2B_ATN))) {
+               ret = -EIO;
+               goto out_unlock;
+       }
+
+       set_b_busy(bt_bmc);
+       clr_h2b_atn(bt_bmc);
+       clr_rd_ptr(bt_bmc);
+
+       /*
+        * The BT frames start with the message length, which does not
+        * include the length byte.
+        */
+       kbuffer[0] = bt_read(bt_bmc);
+       len = kbuffer[0];
+
+       /* We pass the length back to userspace as well */
+       if (len + 1 > count)
+               len = count - 1;
+
+       while (len) {
+               nread = min_t(ssize_t, len, sizeof(kbuffer) - len_byte);
+
+               bt_readn(bt_bmc, kbuffer + len_byte, nread);
+
+               if (copy_to_user(buf, kbuffer, nread + len_byte)) {
+                       ret = -EFAULT;
+                       break;
+               }
+               len -= nread;
+               buf += nread + len_byte;
+               ret += nread + len_byte;
+               len_byte = 0;
+       }
+
+       clr_b_busy(bt_bmc);
+
+out_unlock:
+       mutex_unlock(&bt_bmc->mutex);
+       return ret;
+}
+
+/*
+ * BT Message response format :
+ *
+ *    Byte 1  Byte 2     Byte 3  Byte 4  Byte 5  Byte 6:N
+ *    Length  NetFn/LUN  Seq     Cmd     Code    Data
+ */
+static ssize_t bt_bmc_write(struct file *file, const char __user *buf,
+                           size_t count, loff_t *ppos)
+{
+       struct bt_bmc *bt_bmc = file_bt_bmc(file);
+       u8 kbuffer[BT_BMC_BUFFER_SIZE];
+       ssize_t ret = 0;
+       ssize_t nwritten;
+
+       /*
+        * send a minimum response size
+        */
+       if (count < 5)
+               return -EINVAL;
+
+       if (!access_ok(VERIFY_READ, buf, count))
+               return -EFAULT;
+
+       WARN_ON(*ppos);
+
+       /*
+        * There's no interrupt for clearing bmc busy so we have to
+        * poll
+        */
+       if (wait_event_interruptible(bt_bmc->queue,
+                                    !(bt_inb(bt_bmc, BT_CTRL) &
+                                      (BT_CTRL_H_BUSY | BT_CTRL_B2H_ATN))))
+               return -ERESTARTSYS;
+
+       mutex_lock(&bt_bmc->mutex);
+
+       if (unlikely(bt_inb(bt_bmc, BT_CTRL) &
+                    (BT_CTRL_H_BUSY | BT_CTRL_B2H_ATN))) {
+               ret = -EIO;
+               goto out_unlock;
+       }
+
+       clr_wr_ptr(bt_bmc);
+
+       while (count) {
+               nwritten = min_t(ssize_t, count, sizeof(kbuffer));
+               if (copy_from_user(&kbuffer, buf, nwritten)) {
+                       ret = -EFAULT;
+                       break;
+               }
+
+               bt_writen(bt_bmc, kbuffer, nwritten);
+
+               count -= nwritten;
+               buf += nwritten;
+               ret += nwritten;
+       }
+
+       set_b2h_atn(bt_bmc);
+
+out_unlock:
+       mutex_unlock(&bt_bmc->mutex);
+       return ret;
+}
+
+static long bt_bmc_ioctl(struct file *file, unsigned int cmd,
+                        unsigned long param)
+{
+       struct bt_bmc *bt_bmc = file_bt_bmc(file);
+
+       switch (cmd) {
+       case BT_BMC_IOCTL_SMS_ATN:
+               set_sms_atn(bt_bmc);
+               return 0;
+       }
+       return -EINVAL;
+}
+
+static int bt_bmc_release(struct inode *inode, struct file *file)
+{
+       struct bt_bmc *bt_bmc = file_bt_bmc(file);
+
+       atomic_dec(&open_count);
+       set_b_busy(bt_bmc);
+       return 0;
+}
+
+static unsigned int bt_bmc_poll(struct file *file, poll_table *wait)
+{
+       struct bt_bmc *bt_bmc = file_bt_bmc(file);
+       unsigned int mask = 0;
+       u8 ctrl;
+
+       poll_wait(file, &bt_bmc->queue, wait);
+
+       ctrl = bt_inb(bt_bmc, BT_CTRL);
+
+       if (ctrl & BT_CTRL_H2B_ATN)
+               mask |= POLLIN;
+
+       if (!(ctrl & (BT_CTRL_H_BUSY | BT_CTRL_B2H_ATN)))
+               mask |= POLLOUT;
+
+       return mask;
+}
+
+static const struct file_operations bt_bmc_fops = {
+       .owner          = THIS_MODULE,
+       .open           = bt_bmc_open,
+       .read           = bt_bmc_read,
+       .write          = bt_bmc_write,
+       .release        = bt_bmc_release,
+       .poll           = bt_bmc_poll,
+       .unlocked_ioctl = bt_bmc_ioctl,
+};
+
+static void poll_timer(unsigned long data)
+{
+       struct bt_bmc *bt_bmc = (void *)data;
+
+       bt_bmc->poll_timer.expires += msecs_to_jiffies(500);
+       wake_up(&bt_bmc->queue);
+       add_timer(&bt_bmc->poll_timer);
+}
+
+static irqreturn_t bt_bmc_irq(int irq, void *arg)
+{
+       struct bt_bmc *bt_bmc = arg;
+       u32 reg;
+
+       reg = ioread32(bt_bmc->base + BT_CR2);
+       reg &= BT_CR2_IRQ_H2B | BT_CR2_IRQ_HBUSY;
+       if (!reg)
+               return IRQ_NONE;
+
+       /* ack pending IRQs */
+       iowrite32(reg, bt_bmc->base + BT_CR2);
+
+       wake_up(&bt_bmc->queue);
+       return IRQ_HANDLED;
+}
+
+static int bt_bmc_config_irq(struct bt_bmc *bt_bmc,
+                            struct platform_device *pdev)
+{
+       struct device *dev = &pdev->dev;
+       u32 reg;
+       int rc;
+
+       bt_bmc->irq = platform_get_irq(pdev, 0);
+       if (!bt_bmc->irq)
+               return -ENODEV;
+
+       rc = devm_request_irq(dev, bt_bmc->irq, bt_bmc_irq, IRQF_SHARED,
+                             DEVICE_NAME, bt_bmc);
+       if (rc < 0) {
+               dev_warn(dev, "Unable to request IRQ %d\n", bt_bmc->irq);
+               bt_bmc->irq = 0;
+               return rc;
+       }
+
+       /*
+        * Configure IRQs on the bmc clearing the H2B and HBUSY bits;
+        * H2B will be asserted when the bmc has data for us; HBUSY
+        * will be cleared (along with B2H) when we can write the next
+        * message to the BT buffer
+        */
+       reg = ioread32(bt_bmc->base + BT_CR1);
+       reg |= BT_CR1_IRQ_H2B | BT_CR1_IRQ_HBUSY;
+       iowrite32(reg, bt_bmc->base + BT_CR1);
+
+       return 0;
+}
+
+static int bt_bmc_probe(struct platform_device *pdev)
+{
+       struct bt_bmc *bt_bmc;
+       struct device *dev;
+       struct resource *res;
+       int rc;
+
+       if (!pdev || !pdev->dev.of_node)
+               return -ENODEV;
+
+       dev = &pdev->dev;
+       dev_info(dev, "Found bt bmc device\n");
+
+       bt_bmc = devm_kzalloc(dev, sizeof(*bt_bmc), GFP_KERNEL);
+       if (!bt_bmc)
+               return -ENOMEM;
+
+       dev_set_drvdata(&pdev->dev, bt_bmc);
+
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       bt_bmc->base = devm_ioremap_resource(&pdev->dev, res);
+       if (IS_ERR(bt_bmc->base))
+               return PTR_ERR(bt_bmc->base);
+
+       mutex_init(&bt_bmc->mutex);
+       init_waitqueue_head(&bt_bmc->queue);
+
+       bt_bmc->miscdev.minor   = MISC_DYNAMIC_MINOR,
+               bt_bmc->miscdev.name    = DEVICE_NAME,
+               bt_bmc->miscdev.fops    = &bt_bmc_fops,
+               bt_bmc->miscdev.parent = dev;
+       rc = misc_register(&bt_bmc->miscdev);
+       if (rc) {
+               dev_err(dev, "Unable to register misc device\n");
+               return rc;
+       }
+
+       bt_bmc_config_irq(bt_bmc, pdev);
+
+       if (bt_bmc->irq) {
+               dev_info(dev, "Using IRQ %d\n", bt_bmc->irq);
+       } else {
+               dev_info(dev, "No IRQ; using timer\n");
+               setup_timer(&bt_bmc->poll_timer, poll_timer,
+                           (unsigned long)bt_bmc);
+               bt_bmc->poll_timer.expires = jiffies + msecs_to_jiffies(10);
+               add_timer(&bt_bmc->poll_timer);
+       }
+
+       iowrite32((BT_IO_BASE << BT_CR0_IO_BASE) |
+                 (BT_IRQ << BT_CR0_IRQ) |
+                 BT_CR0_EN_CLR_SLV_RDP |
+                 BT_CR0_EN_CLR_SLV_WRP |
+                 BT_CR0_ENABLE_IBT,
+                 bt_bmc->base + BT_CR0);
+
+       clr_b_busy(bt_bmc);
+
+       return 0;
+}
+
+static int bt_bmc_remove(struct platform_device *pdev)
+{
+       struct bt_bmc *bt_bmc = dev_get_drvdata(&pdev->dev);
+
+       misc_deregister(&bt_bmc->miscdev);
+       if (!bt_bmc->irq)
+               del_timer_sync(&bt_bmc->poll_timer);
+       return 0;
+}
+
+static const struct of_device_id bt_bmc_match[] = {
+       { .compatible = "aspeed,ast2400-ibt-bmc" },
+       { },
+};
+
+static struct platform_driver bt_bmc_driver = {
+       .driver = {
+               .name           = DEVICE_NAME,
+               .of_match_table = bt_bmc_match,
+       },
+       .probe = bt_bmc_probe,
+       .remove = bt_bmc_remove,
+};
+
+module_platform_driver(bt_bmc_driver);
+
+MODULE_DEVICE_TABLE(of, bt_bmc_match);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Alistair Popple <alistair@popple.id.au>");
+MODULE_DESCRIPTION("Linux device interface to the IPMI BT interface");
index d8619998cfb57ffffa9fa29e80498072d390db86..fcdd886819f5c85d88767d9d2042b8fef7a1d11f 100644 (file)
@@ -2891,11 +2891,11 @@ int ipmi_register_smi(const struct ipmi_smi_handlers *handlers,
                intf->curr_channel = IPMI_MAX_CHANNELS;
        }
 
+       rv = ipmi_bmc_register(intf, i);
+
        if (rv == 0)
                rv = add_proc_entries(intf, i);
 
-       rv = ipmi_bmc_register(intf, i);
-
  out:
        if (rv) {
                if (intf->proc_dir)
@@ -2982,8 +2982,6 @@ int ipmi_unregister_smi(ipmi_smi_t intf)
        int intf_num = intf->intf_num;
        ipmi_user_t user;
 
-       ipmi_bmc_unregister(intf);
-
        mutex_lock(&smi_watchers_mutex);
        mutex_lock(&ipmi_interfaces_mutex);
        intf->intf_num = -1;
@@ -3007,6 +3005,7 @@ int ipmi_unregister_smi(ipmi_smi_t intf)
        mutex_unlock(&ipmi_interfaces_mutex);
 
        remove_proc_entries(intf);
+       ipmi_bmc_unregister(intf);
 
        /*
         * Call all the watcher interfaces to tell them that
index d23368874710f726d485917e4d3f1ef93f3e3843..6af1ce04b3dac9ab1fd2e20d52d1930e6798502b 100644 (file)
@@ -748,10 +748,7 @@ static int pp_release(struct inode *inode, struct file *file)
        }
 
        if (pp->pdev) {
-               const char *name = pp->pdev->name;
-
                parport_unregister_device(pp->pdev);
-               kfree(name);
                pp->pdev = NULL;
                pr_debug(CHRDEV "%x: unregistered pardevice\n", minor);
        }
index d131e152c8ce6a3e70a7e0fe287275269f0c40c6..d6876d50622075f1b490bf7a19831b4e8784adb6 100644 (file)
@@ -479,8 +479,8 @@ static ssize_t _extract_entropy(struct entropy_store *r, void *buf,
 
 static void crng_reseed(struct crng_state *crng, struct entropy_store *r);
 static void push_to_pool(struct work_struct *work);
-static __u32 input_pool_data[INPUT_POOL_WORDS];
-static __u32 blocking_pool_data[OUTPUT_POOL_WORDS];
+static __u32 input_pool_data[INPUT_POOL_WORDS] __latent_entropy;
+static __u32 blocking_pool_data[OUTPUT_POOL_WORDS] __latent_entropy;
 
 static struct entropy_store input_pool = {
        .poolinfo = &poolinfo_table[0],
index 8de61876f6336bc2bd239ce935191b0b4c01eca4..3a9149cf011048f21fa742785c9ff0b6f9da6c0b 100644 (file)
@@ -813,9 +813,6 @@ int tpm_do_selftest(struct tpm_chip *chip)
                        continue;
                }
 
-               if (rc < TPM_HEADER_SIZE)
-                       return -EFAULT;
-
                if (rc == TPM_ERR_DISABLED || rc == TPM_ERR_DEACTIVATED) {
                        dev_info(&chip->dev,
                                 "TPM is disabled/deactivated (0x%X)\n", rc);
index d433b1db1fdd79469ae7b744cf8f2514c3a0fab0..5649234b73162aaefcb0d74a5f7b9ec4820be486 100644 (file)
@@ -1539,19 +1539,29 @@ static void remove_port_data(struct port *port)
        spin_lock_irq(&port->inbuf_lock);
        /* Remove unused data this port might have received. */
        discard_port_data(port);
+       spin_unlock_irq(&port->inbuf_lock);
 
        /* Remove buffers we queued up for the Host to send us data in. */
-       while ((buf = virtqueue_detach_unused_buf(port->in_vq)))
-               free_buf(buf, true);
-       spin_unlock_irq(&port->inbuf_lock);
+       do {
+               spin_lock_irq(&port->inbuf_lock);
+               buf = virtqueue_detach_unused_buf(port->in_vq);
+               spin_unlock_irq(&port->inbuf_lock);
+               if (buf)
+                       free_buf(buf, true);
+       } while (buf);
 
        spin_lock_irq(&port->outvq_lock);
        reclaim_consumed_buffers(port);
+       spin_unlock_irq(&port->outvq_lock);
 
        /* Free pending buffers from the out-queue. */
-       while ((buf = virtqueue_detach_unused_buf(port->out_vq)))
-               free_buf(buf, true);
-       spin_unlock_irq(&port->outvq_lock);
+       do {
+               spin_lock_irq(&port->outvq_lock);
+               buf = virtqueue_detach_unused_buf(port->out_vq);
+               spin_unlock_irq(&port->outvq_lock);
+               if (buf)
+                       free_buf(buf, true);
+       } while (buf);
 }
 
 /*
index 190122e64a3a5e78079423c4f08f96a7ac52a1b2..85a449cf61e3fa79b36849f84a831e05683d48dc 100644 (file)
@@ -203,7 +203,7 @@ at91_clk_register_programmable(struct regmap *regmap,
        ret = clk_hw_register(NULL, &prog->hw);
        if (ret) {
                kfree(prog);
-               hw = &prog->hw;
+               hw = ERR_PTR(ret);
        }
 
        return hw;
index b68bf573dcfb743a353f8312b67ec1585421f1f8..8c7763fd9efc52b30f02d9ebcd4fdb10d2876465 100644 (file)
@@ -502,8 +502,12 @@ static long bcm2835_pll_rate_from_divisors(unsigned long parent_rate,
 static long bcm2835_pll_round_rate(struct clk_hw *hw, unsigned long rate,
                                   unsigned long *parent_rate)
 {
+       struct bcm2835_pll *pll = container_of(hw, struct bcm2835_pll, hw);
+       const struct bcm2835_pll_data *data = pll->data;
        u32 ndiv, fdiv;
 
+       rate = clamp(rate, data->min_rate, data->max_rate);
+
        bcm2835_pll_choose_ndiv_and_fdiv(rate, *parent_rate, &ndiv, &fdiv);
 
        return bcm2835_pll_rate_from_divisors(*parent_rate, ndiv, fdiv, 1);
@@ -608,13 +612,6 @@ static int bcm2835_pll_set_rate(struct clk_hw *hw,
        u32 ana[4];
        int i;
 
-       if (rate < data->min_rate || rate > data->max_rate) {
-               dev_err(cprman->dev, "%s: rate out of spec: %lu vs (%lu, %lu)\n",
-                       clk_hw_get_name(hw), rate,
-                       data->min_rate, data->max_rate);
-               return -EINVAL;
-       }
-
        if (rate > data->max_fb_rate) {
                use_fb_prediv = true;
                rate /= 2;
index edf3b96b3b737f0ec0aad0661bc0ded04177e58f..1d99292e2039ee5ff6e187e4e0cdbddf85d860e1 100644 (file)
@@ -685,7 +685,7 @@ static void __init berlin2_clock_setup(struct device_node *np)
        }
 
        /* register clk-provider */
-       of_clk_add_hw_provider(np, of_clk_hw_onecell_get, &clk_data);
+       of_clk_add_hw_provider(np, of_clk_hw_onecell_get, clk_data);
 
        return;
 
index 0718e831475fdace5f7bd0c06190147da1fb3870..3b784b593afde7fea97650f938b03c55c79d8ac0 100644 (file)
@@ -382,7 +382,7 @@ static void __init berlin2q_clock_setup(struct device_node *np)
        }
 
        /* register clk-provider */
-       of_clk_add_hw_provider(np, of_clk_hw_onecell_get, &clk_data);
+       of_clk_add_hw_provider(np, of_clk_hw_onecell_get, clk_data);
 
        return;
 
index 8802a2dd56ac415c1576d62eb8a0bff0c81c0f9e..f674778fb3ac5912e05fb7448c6b1fb21eefa3d8 100644 (file)
@@ -82,6 +82,6 @@ static void __init efm32gg_cmu_init(struct device_node *np)
        hws[clk_HFPERCLKDAC0] = clk_hw_register_gate(NULL, "HFPERCLK.DAC0",
                        "HFXO", 0, base + CMU_HFPERCLKEN0, 17, 0, NULL);
 
-       of_clk_add_hw_provider(np, of_clk_hw_onecell_get, &clk_data);
+       of_clk_add_hw_provider(np, of_clk_hw_onecell_get, clk_data);
 }
 CLK_OF_DECLARE(efm32ggcmu, "efm32gg,cmu", efm32gg_cmu_init);
index b637f5979023f92659b2c6cd927f030a275b1532..eb953d3b0b69bef048312fe9b6ec3c2678d815bb 100644 (file)
@@ -216,6 +216,7 @@ static int max77686_clk_probe(struct platform_device *pdev)
                return -EINVAL;
        }
 
+       drv_data->num_clks = num_clks;
        drv_data->max_clk_data = devm_kcalloc(dev, num_clks,
                                              sizeof(*drv_data->max_clk_data),
                                              GFP_KERNEL);
index 20b105584f82fb9bad9ba17ec4b1a6136affc656..80ae2a51452d7410c1edc856f8126d6451c75913 100644 (file)
@@ -700,6 +700,7 @@ static struct clk * __init create_mux_common(struct clockgen *cg,
                                             struct mux_hwclock *hwc,
                                             const struct clk_ops *ops,
                                             unsigned long min_rate,
+                                            unsigned long max_rate,
                                             unsigned long pct80_rate,
                                             const char *fmt, int idx)
 {
@@ -728,6 +729,8 @@ static struct clk * __init create_mux_common(struct clockgen *cg,
                        continue;
                if (rate < min_rate)
                        continue;
+               if (rate > max_rate)
+                       continue;
 
                parent_names[j] = div->name;
                hwc->parent_to_clksel[j] = i;
@@ -759,7 +762,7 @@ static struct clk * __init create_one_cmux(struct clockgen *cg, int idx)
        struct mux_hwclock *hwc;
        const struct clockgen_pll_div *div;
        unsigned long plat_rate, min_rate;
-       u64 pct80_rate;
+       u64 max_rate, pct80_rate;
        u32 clksel;
 
        hwc = kzalloc(sizeof(*hwc), GFP_KERNEL);
@@ -787,8 +790,8 @@ static struct clk * __init create_one_cmux(struct clockgen *cg, int idx)
                return NULL;
        }
 
-       pct80_rate = clk_get_rate(div->clk);
-       pct80_rate *= 8;
+       max_rate = clk_get_rate(div->clk);
+       pct80_rate = max_rate * 8;
        do_div(pct80_rate, 10);
 
        plat_rate = clk_get_rate(cg->pll[PLATFORM_PLL].div[PLL_DIV1].clk);
@@ -798,7 +801,7 @@ static struct clk * __init create_one_cmux(struct clockgen *cg, int idx)
        else
                min_rate = plat_rate / 2;
 
-       return create_mux_common(cg, hwc, &cmux_ops, min_rate,
+       return create_mux_common(cg, hwc, &cmux_ops, min_rate, max_rate,
                                 pct80_rate, "cg-cmux%d", idx);
 }
 
@@ -813,7 +816,7 @@ static struct clk * __init create_one_hwaccel(struct clockgen *cg, int idx)
        hwc->reg = cg->regs + 0x20 * idx + 0x10;
        hwc->info = cg->info.hwaccel[idx];
 
-       return create_mux_common(cg, hwc, &hwaccel_ops, 0, 0,
+       return create_mux_common(cg, hwc, &hwaccel_ops, 0, ULONG_MAX, 0,
                                 "cg-hwaccel%d", idx);
 }
 
index 5daddf5ecc4b2ca403203d5e5cae5eabf825915f..bc37030e38ba62833316302a383d86cdfd671921 100644 (file)
@@ -463,22 +463,20 @@ static int xgene_clk_enable(struct clk_hw *hw)
        struct xgene_clk *pclk = to_xgene_clk(hw);
        unsigned long flags = 0;
        u32 data;
-       phys_addr_t reg;
 
        if (pclk->lock)
                spin_lock_irqsave(pclk->lock, flags);
 
        if (pclk->param.csr_reg != NULL) {
                pr_debug("%s clock enabled\n", clk_hw_get_name(hw));
-               reg = __pa(pclk->param.csr_reg);
                /* First enable the clock */
                data = xgene_clk_read(pclk->param.csr_reg +
                                        pclk->param.reg_clk_offset);
                data |= pclk->param.reg_clk_mask;
                xgene_clk_write(data, pclk->param.csr_reg +
                                        pclk->param.reg_clk_offset);
-               pr_debug("%s clock PADDR base %pa clk offset 0x%08X mask 0x%08X value 0x%08X\n",
-                       clk_hw_get_name(hw), &reg,
+               pr_debug("%s clk offset 0x%08X mask 0x%08X value 0x%08X\n",
+                       clk_hw_get_name(hw),
                        pclk->param.reg_clk_offset, pclk->param.reg_clk_mask,
                        data);
 
@@ -488,8 +486,8 @@ static int xgene_clk_enable(struct clk_hw *hw)
                data &= ~pclk->param.reg_csr_mask;
                xgene_clk_write(data, pclk->param.csr_reg +
                                        pclk->param.reg_csr_offset);
-               pr_debug("%s CSR RESET PADDR base %pa csr offset 0x%08X mask 0x%08X value 0x%08X\n",
-                       clk_hw_get_name(hw), &reg,
+               pr_debug("%s csr offset 0x%08X mask 0x%08X value 0x%08X\n",
+                       clk_hw_get_name(hw),
                        pclk->param.reg_csr_offset, pclk->param.reg_csr_mask,
                        data);
        }
index fe364e63f8de899867d2c6e8bb47394b8e871426..c0e8e1f196aae4f15c39bf39a725db8fd192f101 100644 (file)
@@ -195,7 +195,7 @@ static void __init hi6220_clk_sys_init(struct device_node *np)
        hi6220_clk_register_divider(hi6220_div_clks_sys,
                        ARRAY_SIZE(hi6220_div_clks_sys), clk_data);
 }
-CLK_OF_DECLARE(hi6220_clk_sys, "hisilicon,hi6220-sysctrl", hi6220_clk_sys_init);
+CLK_OF_DECLARE_DRIVER(hi6220_clk_sys, "hisilicon,hi6220-sysctrl", hi6220_clk_sys_init);
 
 
 /* clocks in media controller */
@@ -252,7 +252,7 @@ static void __init hi6220_clk_media_init(struct device_node *np)
        hi6220_clk_register_divider(hi6220_div_clks_media,
                                ARRAY_SIZE(hi6220_div_clks_media), clk_data);
 }
-CLK_OF_DECLARE(hi6220_clk_media, "hisilicon,hi6220-mediactrl", hi6220_clk_media_init);
+CLK_OF_DECLARE_DRIVER(hi6220_clk_media, "hisilicon,hi6220-mediactrl", hi6220_clk_media_init);
 
 
 /* clocks in pmctrl */
index 19f9b622981a5281bc375a756d437cc117c63640..7a6acc3e4a927c1ae874bdb74bbe826284602c3a 100644 (file)
@@ -223,7 +223,7 @@ static unsigned long clk_pllv3_av_recalc_rate(struct clk_hw *hw,
        temp64 *= mfn;
        do_div(temp64, mfd);
 
-       return (parent_rate * div) + (u32)temp64;
+       return parent_rate * div + (unsigned long)temp64;
 }
 
 static long clk_pllv3_av_round_rate(struct clk_hw *hw, unsigned long rate,
@@ -247,7 +247,11 @@ static long clk_pllv3_av_round_rate(struct clk_hw *hw, unsigned long rate,
        do_div(temp64, parent_rate);
        mfn = temp64;
 
-       return parent_rate * div + parent_rate * mfn / mfd;
+       temp64 = (u64)parent_rate;
+       temp64 *= mfn;
+       do_div(temp64, mfd);
+
+       return parent_rate * div + (unsigned long)temp64;
 }
 
 static int clk_pllv3_av_set_rate(struct clk_hw *hw, unsigned long rate,
index 380c372d528ec1b0ec593e00eb052547515208e8..f042bd2a6a998651481e7ca594333c3856f6d8dc 100644 (file)
@@ -8,6 +8,7 @@ config COMMON_CLK_MEDIATEK
 
 config COMMON_CLK_MT8135
        bool "Clock driver for Mediatek MT8135"
+       depends on ARCH_MEDIATEK || COMPILE_TEST
        select COMMON_CLK_MEDIATEK
        default ARCH_MEDIATEK
        ---help---
@@ -15,6 +16,7 @@ config COMMON_CLK_MT8135
 
 config COMMON_CLK_MT8173
        bool "Clock driver for Mediatek MT8173"
+       depends on ARCH_MEDIATEK || COMPILE_TEST
        select COMMON_CLK_MEDIATEK
        default ARCH_MEDIATEK
        ---help---
index 3a51fff1b0e76b7bbebdbc969d7f1d67fe796887..9adaf48aea2317625a520bb42635fa8362946893 100644 (file)
@@ -313,7 +313,7 @@ static void __init mmp2_clk_init(struct device_node *np)
        }
 
        pxa_unit->apmu_base = of_iomap(np, 1);
-       if (!pxa_unit->mpmu_base) {
+       if (!pxa_unit->apmu_base) {
                pr_err("failed to map apmu registers\n");
                return;
        }
index 87f2317b2a005aca6aed18a39de7509010edd19e..f110c02e83cb6142c0653357f4c13d05b0f17018 100644 (file)
@@ -262,7 +262,7 @@ static void __init pxa168_clk_init(struct device_node *np)
        }
 
        pxa_unit->apmu_base = of_iomap(np, 1);
-       if (!pxa_unit->mpmu_base) {
+       if (!pxa_unit->apmu_base) {
                pr_err("failed to map apmu registers\n");
                return;
        }
index e22a67f76d932546eba42aec3b69be266d775238..64d1ef49caebedd9d35226afcec9e0a624f0965f 100644 (file)
@@ -282,7 +282,7 @@ static void __init pxa910_clk_init(struct device_node *np)
        }
 
        pxa_unit->apmu_base = of_iomap(np, 1);
-       if (!pxa_unit->mpmu_base) {
+       if (!pxa_unit->apmu_base) {
                pr_err("failed to map apmu registers\n");
                return;
        }
@@ -294,7 +294,7 @@ static void __init pxa910_clk_init(struct device_node *np)
        }
 
        pxa_unit->apbcp_base = of_iomap(np, 3);
-       if (!pxa_unit->mpmu_base) {
+       if (!pxa_unit->apbcp_base) {
                pr_err("failed to map apbcp registers\n");
                return;
        }
index 45905fc0d75b3e650403da4aa83513322f630cea..cecb0fdfaef6cd354893f9f4628427c7dee655e5 100644 (file)
@@ -305,7 +305,7 @@ static const struct of_device_id armada_3700_periph_clock_of_match[] = {
 };
 static int armada_3700_add_composite_clk(const struct clk_periph_data *data,
                                         void __iomem *reg, spinlock_t *lock,
-                                        struct device *dev, struct clk_hw *hw)
+                                        struct device *dev, struct clk_hw **hw)
 {
        const struct clk_ops *mux_ops = NULL, *gate_ops = NULL,
                *rate_ops = NULL;
@@ -329,6 +329,7 @@ static int armada_3700_add_composite_clk(const struct clk_periph_data *data,
                gate->lock = lock;
                gate_ops = gate_hw->init->ops;
                gate->reg = reg + (u64)gate->reg;
+               gate->flags = CLK_GATE_SET_TO_DISABLE;
        }
 
        if (data->rate_hw) {
@@ -353,13 +354,13 @@ static int armada_3700_add_composite_clk(const struct clk_periph_data *data,
                }
        }
 
-       hw = clk_hw_register_composite(dev, data->name, data->parent_names,
+       *hw = clk_hw_register_composite(dev, data->name, data->parent_names,
                                       data->num_parents, mux_hw,
                                       mux_ops, rate_hw, rate_ops,
                                       gate_hw, gate_ops, CLK_IGNORE_UNUSED);
 
-       if (IS_ERR(hw))
-               return PTR_ERR(hw);
+       if (IS_ERR(*hw))
+               return PTR_ERR(*hw);
 
        return 0;
 }
@@ -400,7 +401,7 @@ static int armada_3700_periph_clock_probe(struct platform_device *pdev)
        spin_lock_init(&driver_data->lock);
 
        for (i = 0; i < num_periph; i++) {
-               struct clk_hw *hw = driver_data->hw_data->hws[i];
+               struct clk_hw **hw = &driver_data->hw_data->hws[i];
 
                if (armada_3700_add_composite_clk(&data[i], reg,
                                                  &driver_data->lock, dev, hw))
index 8feba93672c5e4248a98c7116887995756bc77f5..e8075359366b0d9ef9cf84611d6c36b19fc22c4a 100644 (file)
@@ -144,11 +144,8 @@ struct clk *rockchip_clk_register_ddrclk(const char *name, int flags,
        ddrclk->ddr_flag = ddr_flag;
 
        clk = clk_register(NULL, &ddrclk->hw);
-       if (IS_ERR(clk)) {
-               pr_err("%s: could not register ddrclk %s\n", __func__,  name);
+       if (IS_ERR(clk))
                kfree(ddrclk);
-               return NULL;
-       }
 
        return clk;
 }
index 51d152f735cc5be186c9b0b1e2bf00cef2381b7b..17e68a724945608382924ce18b64f3df9d172db8 100644 (file)
@@ -106,6 +106,7 @@ static const struct of_device_id exynos_audss_clk_of_match[] = {
        },
        { },
 };
+MODULE_DEVICE_TABLE(of, exynos_audss_clk_of_match);
 
 static void exynos_audss_clk_teardown(void)
 {
index 96fab6cfb2027f805e8ed941012a464d58cbb3dd..6c6afb87b4ce3babf8d7625581a5f7a288b1a1d5 100644 (file)
@@ -132,28 +132,34 @@ free_clkout:
        pr_err("%s: failed to register clkout clock\n", __func__);
 }
 
+/*
+ * We use CLK_OF_DECLARE_DRIVER initialization method to avoid setting
+ * the OF_POPULATED flag on the pmu device tree node, so later the
+ * Exynos PMU platform device can be properly probed with PMU driver.
+ */
+
 static void __init exynos4_clkout_init(struct device_node *node)
 {
        exynos_clkout_init(node, EXYNOS4_CLKOUT_MUX_MASK);
 }
-CLK_OF_DECLARE(exynos4210_clkout, "samsung,exynos4210-pmu",
+CLK_OF_DECLARE_DRIVER(exynos4210_clkout, "samsung,exynos4210-pmu",
                exynos4_clkout_init);
-CLK_OF_DECLARE(exynos4212_clkout, "samsung,exynos4212-pmu",
+CLK_OF_DECLARE_DRIVER(exynos4212_clkout, "samsung,exynos4212-pmu",
                exynos4_clkout_init);
-CLK_OF_DECLARE(exynos4412_clkout, "samsung,exynos4412-pmu",
+CLK_OF_DECLARE_DRIVER(exynos4412_clkout, "samsung,exynos4412-pmu",
                exynos4_clkout_init);
-CLK_OF_DECLARE(exynos3250_clkout, "samsung,exynos3250-pmu",
+CLK_OF_DECLARE_DRIVER(exynos3250_clkout, "samsung,exynos3250-pmu",
                exynos4_clkout_init);
 
 static void __init exynos5_clkout_init(struct device_node *node)
 {
        exynos_clkout_init(node, EXYNOS5_CLKOUT_MUX_MASK);
 }
-CLK_OF_DECLARE(exynos5250_clkout, "samsung,exynos5250-pmu",
+CLK_OF_DECLARE_DRIVER(exynos5250_clkout, "samsung,exynos5250-pmu",
                exynos5_clkout_init);
-CLK_OF_DECLARE(exynos5410_clkout, "samsung,exynos5410-pmu",
+CLK_OF_DECLARE_DRIVER(exynos5410_clkout, "samsung,exynos5410-pmu",
                exynos5_clkout_init);
-CLK_OF_DECLARE(exynos5420_clkout, "samsung,exynos5420-pmu",
+CLK_OF_DECLARE_DRIVER(exynos5420_clkout, "samsung,exynos5420-pmu",
                exynos5_clkout_init);
-CLK_OF_DECLARE(exynos5433_clkout, "samsung,exynos5433-pmu",
+CLK_OF_DECLARE_DRIVER(exynos5433_clkout, "samsung,exynos5433-pmu",
                exynos5_clkout_init);
index 79596463e0d94b66a3135e7fe60d07cfcd023ad3..4a82a49cff5e604a290b395a84614fc5158dd27d 100644 (file)
@@ -191,6 +191,8 @@ static struct clk_div_table axi_div_table[] = {
 static SUNXI_CCU_DIV_TABLE(axi_clk, "axi", "cpu",
                           0x050, 0, 3, axi_div_table, 0);
 
+#define SUN6I_A31_AHB1_REG  0x054
+
 static const char * const ahb1_parents[] = { "osc32k", "osc24M",
                                             "axi", "pll-periph" };
 
@@ -1230,6 +1232,16 @@ static void __init sun6i_a31_ccu_setup(struct device_node *node)
        val &= BIT(16);
        writel(val, reg + SUN6I_A31_PLL_MIPI_REG);
 
+       /* Force AHB1 to PLL6 / 3 */
+       val = readl(reg + SUN6I_A31_AHB1_REG);
+       /* set PLL6 pre-div = 3 */
+       val &= ~GENMASK(7, 6);
+       val |= 0x2 << 6;
+       /* select PLL6 / pre-div */
+       val &= ~GENMASK(13, 12);
+       val |= 0x3 << 12;
+       writel(val, reg + SUN6I_A31_AHB1_REG);
+
        sunxi_ccu_probe(node, reg, &sun6i_a31_ccu_desc);
 
        ccu_mux_notifier_register(pll_cpu_clk.common.hw.clk,
index 838b22aa8b67fbabdfef5386485d5c3316adf296..f2c9274b8bd570c586b56d10ec17e829e6a710c7 100644 (file)
@@ -373,7 +373,7 @@ static void sun4i_get_apb1_factors(struct factors_request *req)
        else
                calcp = 3;
 
-       calcm = (req->parent_rate >> calcp) - 1;
+       calcm = (div >> calcp) - 1;
 
        req->rate = (req->parent_rate >> calcp) / (calcm + 1);
        req->m = calcm;
index 5ffb898d0839df896ac7a82b266c46202bbdf0e9..26c53f7963a438dcaaac252229cea31419ab44ea 100644 (file)
@@ -79,7 +79,7 @@ static int uniphier_clk_probe(struct platform_device *pdev)
        hw_data->num = clk_num;
 
        /* avoid returning NULL for unused idx */
-       for (; clk_num >= 0; clk_num--)
+       while (--clk_num >= 0)
                hw_data->hws[clk_num] = ERR_PTR(-EINVAL);
 
        for (p = data; p->name; p++) {
@@ -110,6 +110,10 @@ static int uniphier_clk_remove(struct platform_device *pdev)
 
 static const struct of_device_id uniphier_clk_match[] = {
        /* System clock */
+       {
+               .compatible = "socionext,uniphier-sld3-clock",
+               .data = uniphier_sld3_sys_clk_data,
+       },
        {
                .compatible = "socionext,uniphier-ld4-clock",
                .data = uniphier_ld4_sys_clk_data,
@@ -138,7 +142,7 @@ static const struct of_device_id uniphier_clk_match[] = {
                .compatible = "socionext,uniphier-ld20-clock",
                .data = uniphier_ld20_sys_clk_data,
        },
-       /* Media I/O clock */
+       /* Media I/O clock, SD clock */
        {
                .compatible = "socionext,uniphier-sld3-mio-clock",
                .data = uniphier_sld3_mio_clk_data,
@@ -156,20 +160,20 @@ static const struct of_device_id uniphier_clk_match[] = {
                .data = uniphier_sld3_mio_clk_data,
        },
        {
-               .compatible = "socionext,uniphier-pro5-mio-clock",
-               .data = uniphier_pro5_mio_clk_data,
+               .compatible = "socionext,uniphier-pro5-sd-clock",
+               .data = uniphier_pro5_sd_clk_data,
        },
        {
-               .compatible = "socionext,uniphier-pxs2-mio-clock",
-               .data = uniphier_pro5_mio_clk_data,
+               .compatible = "socionext,uniphier-pxs2-sd-clock",
+               .data = uniphier_pro5_sd_clk_data,
        },
        {
                .compatible = "socionext,uniphier-ld11-mio-clock",
                .data = uniphier_sld3_mio_clk_data,
        },
        {
-               .compatible = "socionext,uniphier-ld20-mio-clock",
-               .data = uniphier_pro5_mio_clk_data,
+               .compatible = "socionext,uniphier-ld20-sd-clock",
+               .data = uniphier_pro5_sd_clk_data,
        },
        /* Peripheral clock */
        {
index 6aa7ec768d0bfad0141ed2ccc24611be6c34962e..218d20f099cec2da27eb2a80fc761b8b6b1c263e 100644 (file)
@@ -93,7 +93,7 @@ const struct uniphier_clk_data uniphier_sld3_mio_clk_data[] = {
        { /* sentinel */ }
 };
 
-const struct uniphier_clk_data uniphier_pro5_mio_clk_data[] = {
+const struct uniphier_clk_data uniphier_pro5_sd_clk_data[] = {
        UNIPHIER_MIO_CLK_SD_FIXED,
        UNIPHIER_MIO_CLK_SD(0, 0),
        UNIPHIER_MIO_CLK_SD(1, 1),
index 15a2f2cbe0d90e4ab5d68d4d9eb310bdf5164cf2..2c243a894f3b9fe19bd9575e2ca639f5939fa503 100644 (file)
@@ -42,7 +42,7 @@ static u8 uniphier_clk_mux_get_parent(struct clk_hw *hw)
        struct uniphier_clk_mux *mux = to_uniphier_clk_mux(hw);
        int num_parents = clk_hw_get_num_parents(hw);
        int ret;
-       u32 val;
+       unsigned int val;
        u8 i;
 
        ret = regmap_read(mux->regmap, mux->reg, &val);
index 3ae184062388bf487be8b91c5b52193fd7383d75..0244dba1f4cf554567c37bd15978bd8813fc3be9 100644 (file)
@@ -115,7 +115,7 @@ extern const struct uniphier_clk_data uniphier_pxs2_sys_clk_data[];
 extern const struct uniphier_clk_data uniphier_ld11_sys_clk_data[];
 extern const struct uniphier_clk_data uniphier_ld20_sys_clk_data[];
 extern const struct uniphier_clk_data uniphier_sld3_mio_clk_data[];
-extern const struct uniphier_clk_data uniphier_pro5_mio_clk_data[];
+extern const struct uniphier_clk_data uniphier_pro5_sd_clk_data[];
 extern const struct uniphier_clk_data uniphier_ld4_peri_clk_data[];
 extern const struct uniphier_clk_data uniphier_pro4_peri_clk_data[];
 
index 245190839359301edd5896e9bb2196c961a2847e..e2c6e43cf8ca31e27af1b6c8630f7a0c590e8b7a 100644 (file)
@@ -417,6 +417,16 @@ config SYS_SUPPORTS_SH_TMU
 config SYS_SUPPORTS_EM_STI
         bool
 
+config CLKSRC_JCORE_PIT
+       bool "J-Core PIT timer driver" if COMPILE_TEST
+       depends on OF
+       depends on GENERIC_CLOCKEVENTS
+       depends on HAS_IOMEM
+       select CLKSRC_MMIO
+       help
+         This enables build of clocksource and clockevent driver for
+         the integrated PIT in the J-Core synthesizable, open source SoC.
+
 config SH_TIMER_CMT
        bool "Renesas CMT timer driver" if COMPILE_TEST
        depends on GENERIC_CLOCKEVENTS
index fd9d6df0bbc0993c3b7862a08f89dc3a9725be9d..cf87f407f1adbfaab8d485bea93f6ee859d8e3d6 100644 (file)
@@ -5,6 +5,7 @@ obj-$(CONFIG_ATMEL_TCB_CLKSRC)  += tcb_clksrc.o
 obj-$(CONFIG_X86_PM_TIMER)     += acpi_pm.o
 obj-$(CONFIG_SCx200HR_TIMER)   += scx200_hrt.o
 obj-$(CONFIG_CS5535_CLOCK_EVENT_SRC)   += cs5535-clockevt.o
+obj-$(CONFIG_CLKSRC_JCORE_PIT)         += jcore-pit.o
 obj-$(CONFIG_SH_TIMER_CMT)     += sh_cmt.o
 obj-$(CONFIG_SH_TIMER_MTU2)    += sh_mtu2.o
 obj-$(CONFIG_SH_TIMER_TMU)     += sh_tmu.o
diff --git a/drivers/clocksource/jcore-pit.c b/drivers/clocksource/jcore-pit.c
new file mode 100644 (file)
index 0000000..54e1665
--- /dev/null
@@ -0,0 +1,249 @@
+/*
+ * J-Core SoC PIT/clocksource driver
+ *
+ * Copyright (C) 2015-2016 Smart Energy Instruments, Inc.
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/clockchips.h>
+#include <linux/clocksource.h>
+#include <linux/sched_clock.h>
+#include <linux/cpu.h>
+#include <linux/cpuhotplug.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+
+#define PIT_IRQ_SHIFT          12
+#define PIT_PRIO_SHIFT         20
+#define PIT_ENABLE_SHIFT       26
+#define PIT_PRIO_MASK          0xf
+
+#define REG_PITEN              0x00
+#define REG_THROT              0x10
+#define REG_COUNT              0x14
+#define REG_BUSPD              0x18
+#define REG_SECHI              0x20
+#define REG_SECLO              0x24
+#define REG_NSEC               0x28
+
+struct jcore_pit {
+       struct clock_event_device       ced;
+       void __iomem                    *base;
+       unsigned long                   periodic_delta;
+       u32                             enable_val;
+};
+
+static void __iomem *jcore_pit_base;
+static struct jcore_pit __percpu *jcore_pit_percpu;
+
+static notrace u64 jcore_sched_clock_read(void)
+{
+       u32 seclo, nsec, seclo0;
+       __iomem void *base = jcore_pit_base;
+
+       seclo = readl(base + REG_SECLO);
+       do {
+               seclo0 = seclo;
+               nsec  = readl(base + REG_NSEC);
+               seclo = readl(base + REG_SECLO);
+       } while (seclo0 != seclo);
+
+       return seclo * NSEC_PER_SEC + nsec;
+}
+
+static cycle_t jcore_clocksource_read(struct clocksource *cs)
+{
+       return jcore_sched_clock_read();
+}
+
+static int jcore_pit_disable(struct jcore_pit *pit)
+{
+       writel(0, pit->base + REG_PITEN);
+       return 0;
+}
+
+static int jcore_pit_set(unsigned long delta, struct jcore_pit *pit)
+{
+       jcore_pit_disable(pit);
+       writel(delta, pit->base + REG_THROT);
+       writel(pit->enable_val, pit->base + REG_PITEN);
+       return 0;
+}
+
+static int jcore_pit_set_state_shutdown(struct clock_event_device *ced)
+{
+       struct jcore_pit *pit = container_of(ced, struct jcore_pit, ced);
+
+       return jcore_pit_disable(pit);
+}
+
+static int jcore_pit_set_state_oneshot(struct clock_event_device *ced)
+{
+       struct jcore_pit *pit = container_of(ced, struct jcore_pit, ced);
+
+       return jcore_pit_disable(pit);
+}
+
+static int jcore_pit_set_state_periodic(struct clock_event_device *ced)
+{
+       struct jcore_pit *pit = container_of(ced, struct jcore_pit, ced);
+
+       return jcore_pit_set(pit->periodic_delta, pit);
+}
+
+static int jcore_pit_set_next_event(unsigned long delta,
+                                   struct clock_event_device *ced)
+{
+       struct jcore_pit *pit = container_of(ced, struct jcore_pit, ced);
+
+       return jcore_pit_set(delta, pit);
+}
+
+static int jcore_pit_local_init(unsigned cpu)
+{
+       struct jcore_pit *pit = this_cpu_ptr(jcore_pit_percpu);
+       unsigned buspd, freq;
+
+       pr_info("Local J-Core PIT init on cpu %u\n", cpu);
+
+       buspd = readl(pit->base + REG_BUSPD);
+       freq = DIV_ROUND_CLOSEST(NSEC_PER_SEC, buspd);
+       pit->periodic_delta = DIV_ROUND_CLOSEST(NSEC_PER_SEC, HZ * buspd);
+
+       clockevents_config_and_register(&pit->ced, freq, 1, ULONG_MAX);
+
+       return 0;
+}
+
+static irqreturn_t jcore_timer_interrupt(int irq, void *dev_id)
+{
+       struct jcore_pit *pit = this_cpu_ptr(dev_id);
+
+       if (clockevent_state_oneshot(&pit->ced))
+               jcore_pit_disable(pit);
+
+       pit->ced.event_handler(&pit->ced);
+
+       return IRQ_HANDLED;
+}
+
+static int __init jcore_pit_init(struct device_node *node)
+{
+       int err;
+       unsigned pit_irq, cpu;
+       unsigned long hwirq;
+       u32 irqprio, enable_val;
+
+       jcore_pit_base = of_iomap(node, 0);
+       if (!jcore_pit_base) {
+               pr_err("Error: Cannot map base address for J-Core PIT\n");
+               return -ENXIO;
+       }
+
+       pit_irq = irq_of_parse_and_map(node, 0);
+       if (!pit_irq) {
+               pr_err("Error: J-Core PIT has no IRQ\n");
+               return -ENXIO;
+       }
+
+       pr_info("Initializing J-Core PIT at %p IRQ %d\n",
+               jcore_pit_base, pit_irq);
+
+       err = clocksource_mmio_init(jcore_pit_base, "jcore_pit_cs",
+                                   NSEC_PER_SEC, 400, 32,
+                                   jcore_clocksource_read);
+       if (err) {
+               pr_err("Error registering clocksource device: %d\n", err);
+               return err;
+       }
+
+       sched_clock_register(jcore_sched_clock_read, 32, NSEC_PER_SEC);
+
+       jcore_pit_percpu = alloc_percpu(struct jcore_pit);
+       if (!jcore_pit_percpu) {
+               pr_err("Failed to allocate memory for clock event device\n");
+               return -ENOMEM;
+       }
+
+       err = request_irq(pit_irq, jcore_timer_interrupt,
+                         IRQF_TIMER | IRQF_PERCPU,
+                         "jcore_pit", jcore_pit_percpu);
+       if (err) {
+               pr_err("pit irq request failed: %d\n", err);
+               free_percpu(jcore_pit_percpu);
+               return err;
+       }
+
+       /*
+        * The J-Core PIT is not hard-wired to a particular IRQ, but
+        * integrated with the interrupt controller such that the IRQ it
+        * generates is programmable, as follows:
+        *
+        * The bit layout of the PIT enable register is:
+        *
+        *      .....e..ppppiiiiiiii............
+        *
+        * where the .'s indicate unrelated/unused bits, e is enable,
+        * p is priority, and i is hard irq number.
+        *
+        * For the PIT included in AIC1 (obsolete but still in use),
+        * any hard irq (trap number) can be programmed via the 8
+        * iiiiiiii bits, and a priority (0-15) is programmable
+        * separately in the pppp bits.
+        *
+        * For the PIT included in AIC2 (current), the programming
+        * interface is equivalent modulo interrupt mapping. This is
+        * why a different compatible tag was not used. However only
+        * traps 64-127 (the ones actually intended to be used for
+        * interrupts, rather than syscalls/exceptions/etc.) can be
+        * programmed (the high 2 bits of i are ignored) and the
+        * priority pppp is <<2'd and or'd onto the irq number. This
+        * choice seems to have been made on the hardware engineering
+        * side under an assumption that preserving old AIC1 priority
+        * mappings was important. Future models will likely ignore
+        * the pppp field.
+        */
+       hwirq = irq_get_irq_data(pit_irq)->hwirq;
+       irqprio = (hwirq >> 2) & PIT_PRIO_MASK;
+       enable_val = (1U << PIT_ENABLE_SHIFT)
+                  | (hwirq << PIT_IRQ_SHIFT)
+                  | (irqprio << PIT_PRIO_SHIFT);
+
+       for_each_present_cpu(cpu) {
+               struct jcore_pit *pit = per_cpu_ptr(jcore_pit_percpu, cpu);
+
+               pit->base = of_iomap(node, cpu);
+               if (!pit->base) {
+                       pr_err("Unable to map PIT for cpu %u\n", cpu);
+                       continue;
+               }
+
+               pit->ced.name = "jcore_pit";
+               pit->ced.features = CLOCK_EVT_FEAT_PERIODIC
+                                 | CLOCK_EVT_FEAT_ONESHOT
+                                 | CLOCK_EVT_FEAT_PERCPU;
+               pit->ced.cpumask = cpumask_of(cpu);
+               pit->ced.rating = 400;
+               pit->ced.irq = pit_irq;
+               pit->ced.set_state_shutdown = jcore_pit_set_state_shutdown;
+               pit->ced.set_state_periodic = jcore_pit_set_state_periodic;
+               pit->ced.set_state_oneshot = jcore_pit_set_state_oneshot;
+               pit->ced.set_next_event = jcore_pit_set_next_event;
+
+               pit->enable_val = enable_val;
+       }
+
+       cpuhp_setup_state(CPUHP_AP_JCORE_TIMER_STARTING,
+                         "AP_JCORE_TIMER_STARTING",
+                         jcore_pit_local_init, NULL);
+
+       return 0;
+}
+
+CLOCKSOURCE_OF_DECLARE(jcore_pit, "jcore,pit", jcore_pit_init);
index c184eb84101e9f9a72354baf4c87c4d2ada92d19..4f87f3e76d8328ec6ca462882f873a51b2f72995 100644 (file)
@@ -152,6 +152,13 @@ static irqreturn_t sun5i_timer_interrupt(int irq, void *dev_id)
        return IRQ_HANDLED;
 }
 
+static cycle_t sun5i_clksrc_read(struct clocksource *clksrc)
+{
+       struct sun5i_timer_clksrc *cs = to_sun5i_timer_clksrc(clksrc);
+
+       return ~readl(cs->timer.base + TIMER_CNTVAL_LO_REG(1));
+}
+
 static int sun5i_rate_cb_clksrc(struct notifier_block *nb,
                                unsigned long event, void *data)
 {
@@ -210,8 +217,13 @@ static int __init sun5i_setup_clocksource(struct device_node *node,
        writel(TIMER_CTL_ENABLE | TIMER_CTL_RELOAD,
               base + TIMER_CTL_REG(1));
 
-       ret = clocksource_mmio_init(base + TIMER_CNTVAL_LO_REG(1), node->name,
-                                   rate, 340, 32, clocksource_mmio_readl_down);
+       cs->clksrc.name = node->name;
+       cs->clksrc.rating = 340;
+       cs->clksrc.read = sun5i_clksrc_read;
+       cs->clksrc.mask = CLOCKSOURCE_MASK(32);
+       cs->clksrc.flags = CLOCK_SOURCE_IS_CONTINUOUS;
+
+       ret = clocksource_register_hz(&cs->clksrc, rate);
        if (ret) {
                pr_err("Couldn't register clock source.\n");
                goto err_remove_notifier;
index 1b2f28f69a8142fce5edcd02c2f85f2e4f0c4a72..4852d9efe74e7c470169b25babbb3355e8a5a0b6 100644 (file)
@@ -80,11 +80,17 @@ static int cppc_cpufreq_set_target(struct cpufreq_policy *policy,
 {
        struct cppc_cpudata *cpu;
        struct cpufreq_freqs freqs;
+       u32 desired_perf;
        int ret = 0;
 
        cpu = all_cpu_data[policy->cpu];
 
-       cpu->perf_ctrls.desired_perf = (u64)target_freq * policy->max / cppc_dmi_max_khz;
+       desired_perf = (u64)target_freq * cpu->perf_caps.highest_perf / cppc_dmi_max_khz;
+       /* Return if it is exactly the same perf */
+       if (desired_perf == cpu->perf_ctrls.desired_perf)
+               return ret;
+
+       cpu->perf_ctrls.desired_perf = desired_perf;
        freqs.old = policy->cur;
        freqs.new = target_freq;
 
index 18da4f8051d372d06eb74e68e591f57e40569c82..13475890d792d7bb24209a8fa8617f011883e660 100644 (file)
@@ -17,6 +17,7 @@
 struct cs_policy_dbs_info {
        struct policy_dbs_info policy_dbs;
        unsigned int down_skip;
+       unsigned int requested_freq;
 };
 
 static inline struct cs_policy_dbs_info *to_dbs_info(struct policy_dbs_info *policy_dbs)
@@ -61,6 +62,7 @@ static unsigned int cs_dbs_timer(struct cpufreq_policy *policy)
 {
        struct policy_dbs_info *policy_dbs = policy->governor_data;
        struct cs_policy_dbs_info *dbs_info = to_dbs_info(policy_dbs);
+       unsigned int requested_freq = dbs_info->requested_freq;
        struct dbs_data *dbs_data = policy_dbs->dbs_data;
        struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
        unsigned int load = dbs_update(policy);
@@ -72,10 +74,16 @@ static unsigned int cs_dbs_timer(struct cpufreq_policy *policy)
        if (cs_tuners->freq_step == 0)
                goto out;
 
+       /*
+        * If requested_freq is out of range, it is likely that the limits
+        * changed in the meantime, so fall back to current frequency in that
+        * case.
+        */
+       if (requested_freq > policy->max || requested_freq < policy->min)
+               requested_freq = policy->cur;
+
        /* Check for frequency increase */
        if (load > dbs_data->up_threshold) {
-               unsigned int requested_freq = policy->cur;
-
                dbs_info->down_skip = 0;
 
                /* if we are already at full speed then break out early */
@@ -83,8 +91,11 @@ static unsigned int cs_dbs_timer(struct cpufreq_policy *policy)
                        goto out;
 
                requested_freq += get_freq_target(cs_tuners, policy);
+               if (requested_freq > policy->max)
+                       requested_freq = policy->max;
 
                __cpufreq_driver_target(policy, requested_freq, CPUFREQ_RELATION_H);
+               dbs_info->requested_freq = requested_freq;
                goto out;
        }
 
@@ -95,7 +106,7 @@ static unsigned int cs_dbs_timer(struct cpufreq_policy *policy)
 
        /* Check for frequency decrease */
        if (load < cs_tuners->down_threshold) {
-               unsigned int freq_target, requested_freq = policy->cur;
+               unsigned int freq_target;
                /*
                 * if we cannot reduce the frequency anymore, break out early
                 */
@@ -109,6 +120,7 @@ static unsigned int cs_dbs_timer(struct cpufreq_policy *policy)
                        requested_freq = policy->min;
 
                __cpufreq_driver_target(policy, requested_freq, CPUFREQ_RELATION_L);
+               dbs_info->requested_freq = requested_freq;
        }
 
  out:
@@ -287,6 +299,7 @@ static void cs_start(struct cpufreq_policy *policy)
        struct cs_policy_dbs_info *dbs_info = to_dbs_info(policy->governor_data);
 
        dbs_info->down_skip = 0;
+       dbs_info->requested_freq = policy->cur;
 }
 
 static struct dbs_governor cs_governor = {
index 806f2039571e56ec69bd2cad697f788821826c1a..4737520ec8230a830d80e81c0dbc9dbaa96d0dc7 100644 (file)
@@ -179,6 +179,7 @@ struct _pid {
 /**
  * struct cpudata -    Per CPU instance data storage
  * @cpu:               CPU number for this instance data
+ * @policy:            CPUFreq policy value
  * @update_util:       CPUFreq utility callback information
  * @update_util_set:   CPUFreq utility callback is set
  * @iowait_boost:      iowait-related boost fraction
@@ -201,6 +202,7 @@ struct _pid {
 struct cpudata {
        int cpu;
 
+       unsigned int policy;
        struct update_util_data update_util;
        bool   update_util_set;
 
@@ -225,7 +227,7 @@ struct cpudata {
 static struct cpudata **all_cpu_data;
 
 /**
- * struct pid_adjust_policy - Stores static PID configuration data
+ * struct pstate_adjust_policy - Stores static PID configuration data
  * @sample_rate_ms:    PID calculation sample rate in ms
  * @sample_rate_ns:    Sample rate calculation in ns
  * @deadband:          PID deadband
@@ -562,12 +564,12 @@ static void intel_pstate_hwp_set(const struct cpumask *cpumask)
        int min, hw_min, max, hw_max, cpu, range, adj_range;
        u64 value, cap;
 
-       rdmsrl(MSR_HWP_CAPABILITIES, cap);
-       hw_min = HWP_LOWEST_PERF(cap);
-       hw_max = HWP_HIGHEST_PERF(cap);
-       range = hw_max - hw_min;
-
        for_each_cpu(cpu, cpumask) {
+               rdmsrl_on_cpu(cpu, MSR_HWP_CAPABILITIES, &cap);
+               hw_min = HWP_LOWEST_PERF(cap);
+               hw_max = HWP_HIGHEST_PERF(cap);
+               range = hw_max - hw_min;
+
                rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value);
                adj_range = limits->min_perf_pct * range / 100;
                min = hw_min + adj_range;
@@ -1142,10 +1144,8 @@ static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max)
        *min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf);
 }
 
-static void intel_pstate_set_min_pstate(struct cpudata *cpu)
+static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate)
 {
-       int pstate = cpu->pstate.min_pstate;
-
        trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu);
        cpu->pstate.current_pstate = pstate;
        /*
@@ -1157,6 +1157,20 @@ static void intel_pstate_set_min_pstate(struct cpudata *cpu)
                      pstate_funcs.get_val(cpu, pstate));
 }
 
+static void intel_pstate_set_min_pstate(struct cpudata *cpu)
+{
+       intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate);
+}
+
+static void intel_pstate_max_within_limits(struct cpudata *cpu)
+{
+       int min_pstate, max_pstate;
+
+       update_turbo_state();
+       intel_pstate_get_min_max(cpu, &min_pstate, &max_pstate);
+       intel_pstate_set_pstate(cpu, max_pstate);
+}
+
 static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
 {
        cpu->pstate.min_pstate = pstate_funcs.get_min();
@@ -1232,6 +1246,7 @@ static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu)
 {
        struct sample *sample = &cpu->sample;
        int32_t busy_frac, boost;
+       int target, avg_pstate;
 
        busy_frac = div_fp(sample->mperf, sample->tsc);
 
@@ -1242,7 +1257,26 @@ static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu)
                busy_frac = boost;
 
        sample->busy_scaled = busy_frac * 100;
-       return get_avg_pstate(cpu) - pid_calc(&cpu->pid, sample->busy_scaled);
+
+       target = limits->no_turbo || limits->turbo_disabled ?
+                       cpu->pstate.max_pstate : cpu->pstate.turbo_pstate;
+       target += target >> 2;
+       target = mul_fp(target, busy_frac);
+       if (target < cpu->pstate.min_pstate)
+               target = cpu->pstate.min_pstate;
+
+       /*
+        * If the average P-state during the previous cycle was higher than the
+        * current target, add 50% of the difference to the target to reduce
+        * possible performance oscillations and offset possible performance
+        * loss related to moving the workload from one CPU to another within
+        * a package/module.
+        */
+       avg_pstate = get_avg_pstate(cpu);
+       if (avg_pstate > target)
+               target += (avg_pstate - target) >> 1;
+
+       return target;
 }
 
 static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu)
@@ -1251,10 +1285,11 @@ static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu)
        u64 duration_ns;
 
        /*
-        * perf_scaled is the average performance during the last sampling
-        * period scaled by the ratio of the maximum P-state to the P-state
-        * requested last time (in percent).  That measures the system's
-        * response to the previous P-state selection.
+        * perf_scaled is the ratio of the average P-state during the last
+        * sampling period to the P-state requested last time (in percent).
+        *
+        * That measures the system's response to the previous P-state
+        * selection.
         */
        max_pstate = cpu->pstate.max_pstate_physical;
        current_pstate = cpu->pstate.current_pstate;
@@ -1304,7 +1339,8 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
 
        from = cpu->pstate.current_pstate;
 
-       target_pstate = pstate_funcs.get_target_pstate(cpu);
+       target_pstate = cpu->policy == CPUFREQ_POLICY_PERFORMANCE ?
+               cpu->pstate.turbo_pstate : pstate_funcs.get_target_pstate(cpu);
 
        intel_pstate_update_pstate(cpu, target_pstate);
 
@@ -1470,7 +1506,9 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
        pr_debug("set_policy cpuinfo.max %u policy->max %u\n",
                 policy->cpuinfo.max_freq, policy->max);
 
-       cpu = all_cpu_data[0];
+       cpu = all_cpu_data[policy->cpu];
+       cpu->policy = policy->policy;
+
        if (cpu->pstate.max_pstate_physical > cpu->pstate.max_pstate &&
            policy->max < policy->cpuinfo.max_freq &&
            policy->max > cpu->pstate.max_pstate * cpu->pstate.scaling) {
@@ -1478,7 +1516,7 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
                policy->max = policy->cpuinfo.max_freq;
        }
 
-       if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) {
+       if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE) {
                limits = &performance_limits;
                if (policy->max >= policy->cpuinfo.max_freq) {
                        pr_debug("set performance\n");
@@ -1514,6 +1552,15 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
        limits->max_perf = round_up(limits->max_perf, FRAC_BITS);
 
  out:
+       if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE) {
+               /*
+                * NOHZ_FULL CPUs need this as the governor callback may not
+                * be invoked on them.
+                */
+               intel_pstate_clear_update_util_hook(policy->cpu);
+               intel_pstate_max_within_limits(cpu);
+       }
+
        intel_pstate_set_update_util_hook(policy->cpu);
 
        intel_pstate_hwp_set_policy(policy);
index 4102be01d06a03db5d98f91473b0083db81158d7..512ee37b374b1cf82b4762ccde10d233d35b98a0 100644 (file)
@@ -5,7 +5,7 @@ config MIPS_CPS_CPUIDLE
        bool "CPU Idle driver for MIPS CPS platforms"
        depends on CPU_IDLE && MIPS_CPS
        depends on SYS_SUPPORTS_MIPS_CPS
-       select ARCH_NEEDS_CPU_IDLE_COUPLED if MIPS_MT
+       select ARCH_NEEDS_CPU_IDLE_COUPLED if MIPS_MT || CPU_MIPSR6
        select GENERIC_CLOCKEVENTS_BROADCAST if SMP
        select MIPS_CPS_PM
        default y
index 1adb6980b707ced6b9737e1d1df79ce10af41def..926ba9871c628ac4e1e352363f578cfc22835788 100644 (file)
@@ -163,7 +163,7 @@ static int __init cps_cpuidle_init(void)
                core = cpu_data[cpu].core;
                device = &per_cpu(cpuidle_dev, cpu);
                device->cpu = cpu;
-#ifdef CONFIG_MIPS_MT
+#ifdef CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED
                cpumask_copy(&device->coupled_cpus, &cpu_sibling_map[cpu]);
 #endif
 
index 156aad167cd6fcd44e66ea9456e253f8e3e36866..954a64c7757b10e1551672861a6ef28623e57019 100644 (file)
@@ -137,7 +137,7 @@ static void dbg_dump_sg(const char *level, const char *prefix_str,
                }
 
                buf = it_page + it->offset;
-               len = min(tlen, it->length);
+               len = min_t(size_t, tlen, it->length);
                print_hex_dump(level, prefix_str, prefix_type, rowsize,
                               groupsize, buf, len, ascii);
                tlen -= len;
@@ -4583,6 +4583,15 @@ static int __init caam_algapi_init(void)
                if (!aes_inst && (alg_sel == OP_ALG_ALGSEL_AES))
                                continue;
 
+               /*
+                * Check support for AES modes not available
+                * on LP devices.
+                */
+               if ((cha_vid & CHA_ID_LS_AES_MASK) == CHA_ID_LS_AES_LP)
+                       if ((alg->class1_alg_type & OP_ALG_AAI_MASK) ==
+                            OP_ALG_AAI_XTS)
+                               continue;
+
                t_alg = caam_alg_alloc(alg);
                if (IS_ERR(t_alg)) {
                        err = PTR_ERR(t_alg);
index e4ddb921d7b3e156ab7d3b68a59fbc709d853fed..56b15380546286d61058e61ea88c060f409c21f1 100644 (file)
@@ -592,16 +592,18 @@ badkey_err:
 
 static int cxgb4_is_crypto_q_full(struct net_device *dev, unsigned int idx)
 {
-       int ret = 0;
-       struct sge_ofld_txq *q;
        struct adapter *adap = netdev2adap(dev);
+       struct sge_uld_txq_info *txq_info =
+               adap->sge.uld_txq_info[CXGB4_TX_CRYPTO];
+       struct sge_uld_txq *txq;
+       int ret = 0;
 
        local_bh_disable();
-       q = &adap->sge.ofldtxq[idx];
-       spin_lock(&q->sendq.lock);
-       if (q->full)
+       txq = &txq_info->uldtxq[idx];
+       spin_lock(&txq->sendq.lock);
+       if (txq->full)
                ret = -1;
-       spin_unlock(&q->sendq.lock);
+       spin_unlock(&txq->sendq.lock);
        local_bh_enable();
        return ret;
 }
@@ -674,11 +676,11 @@ static int chcr_device_init(struct chcr_context *ctx)
                }
                u_ctx = ULD_CTX(ctx);
                rxq_perchan = u_ctx->lldi.nrxq / u_ctx->lldi.nchan;
-               ctx->dev->tx_channel_id = 0;
                rxq_idx = ctx->dev->tx_channel_id * rxq_perchan;
                rxq_idx += id % rxq_perchan;
                spin_lock(&ctx->dev->lock_chcr_dev);
                ctx->tx_channel_id = rxq_idx;
+               ctx->dev->tx_channel_id = !ctx->dev->tx_channel_id;
                spin_unlock(&ctx->dev->lock_chcr_dev);
        }
 out:
index fb5f9bbfa09cd3b643246334fe86a455da6eeaf0..4d7f6700fd7e4e988572393b3734d6a8952c0bd3 100644 (file)
@@ -42,6 +42,7 @@ static chcr_handler_func work_handlers[NUM_CPL_CMDS] = {
 static struct cxgb4_uld_info chcr_uld_info = {
        .name = DRV_MODULE_NAME,
        .nrxq = MAX_ULD_QSETS,
+       .ntxq = MAX_ULD_QSETS,
        .rxq_size = 1024,
        .add = chcr_uld_add,
        .state_change = chcr_uld_state_change,
@@ -126,7 +127,7 @@ static int cpl_fw6_pld_handler(struct chcr_dev *dev,
 
 int chcr_send_wr(struct sk_buff *skb)
 {
-       return cxgb4_ofld_send(skb->dev, skb);
+       return cxgb4_crypto_send(skb->dev, skb);
 }
 
 static void *chcr_uld_add(const struct cxgb4_lld_info *lld)
index daadd20aa936a85e087ba0145147ffa48eac05a9..3e2ab3b14eea205f19e8b436291e5117cec9567d 100644 (file)
@@ -14,7 +14,7 @@ if DEV_DAX
 
 config DEV_DAX_PMEM
        tristate "PMEM DAX: direct access to persistent memory"
-       depends on NVDIMM_DAX
+       depends on LIBNVDIMM && NVDIMM_DAX
        default DEV_DAX
        help
          Support raw access to persistent memory.  Note that this
index 0e499bfca41ccda4752463f770dd495259fb255e..3d94ff20fdca2bd64beeb86674e42abe736117de 100644 (file)
@@ -270,8 +270,8 @@ static int check_vma(struct dax_dev *dax_dev, struct vm_area_struct *vma,
        if (!dax_dev->alive)
                return -ENXIO;
 
-       /* prevent private / writable mappings from being established */
-       if ((vma->vm_flags & (VM_NORESERVE|VM_SHARED|VM_WRITE)) == VM_WRITE) {
+       /* prevent private mappings from being established */
+       if ((vma->vm_flags & VM_SHARED) != VM_SHARED) {
                dev_info(dev, "%s: %s: fail, attempted private mapping\n",
                                current->comm, func);
                return -EINVAL;
index 9630d8837ba94ed3badc9a3755b53c08123b9a51..73c6ce93a0d9204227818465a707db9f7d5806fb 100644 (file)
@@ -44,7 +44,6 @@ static void dax_pmem_percpu_exit(void *data)
 
        dev_dbg(dax_pmem->dev, "%s\n", __func__);
        percpu_ref_exit(ref);
-       wait_for_completion(&dax_pmem->cmp);
 }
 
 static void dax_pmem_percpu_kill(void *data)
@@ -54,6 +53,7 @@ static void dax_pmem_percpu_kill(void *data)
 
        dev_dbg(dax_pmem->dev, "%s\n", __func__);
        percpu_ref_kill(ref);
+       wait_for_completion(&dax_pmem->cmp);
 }
 
 static int dax_pmem_probe(struct device *dev)
@@ -78,7 +78,9 @@ static int dax_pmem_probe(struct device *dev)
        nsio = to_nd_namespace_io(&ndns->dev);
 
        /* parse the 'pfn' info block via ->rw_bytes */
-       devm_nsio_enable(dev, nsio);
+       rc = devm_nsio_enable(dev, nsio);
+       if (rc)
+               return rc;
        altmap = nvdimm_setup_pfn(nd_pfn, &res, &__altmap);
        if (IS_ERR(altmap))
                return PTR_ERR(altmap);
index 478006b7764a5919259e8b582dedfd3a1f5358f5..bf3ea7603a58a9705d51feb271a61507f5c779e9 100644 (file)
@@ -137,6 +137,10 @@ static int devfreq_update_status(struct devfreq *devfreq, unsigned long freq)
 
        cur_time = jiffies;
 
+       /* Immediately exit if previous_freq is not initialized yet. */
+       if (!devfreq->previous_freq)
+               goto out;
+
        prev_lev = devfreq_get_freq_level(devfreq, devfreq->previous_freq);
        if (prev_lev < 0) {
                ret = prev_lev;
@@ -594,17 +598,19 @@ struct devfreq *devfreq_add_device(struct device *dev,
        if (devfreq->governor)
                err = devfreq->governor->event_handler(devfreq,
                                        DEVFREQ_GOV_START, NULL);
-       mutex_unlock(&devfreq_list_lock);
        if (err) {
                dev_err(dev, "%s: Unable to start governor for the device\n",
                        __func__);
                goto err_init;
        }
+       mutex_unlock(&devfreq_list_lock);
 
        return devfreq;
 
 err_init:
        list_del(&devfreq->node);
+       mutex_unlock(&devfreq_list_lock);
+
        device_unregister(&devfreq->dev);
 err_out:
        return ERR_PTR(err);
index 0fdae86089613fe130d35246318fe75d41a755c5..cd949800eed962cffa34b599d3ce66f386763d11 100644 (file)
@@ -17,6 +17,7 @@ config DEVFREQ_EVENT_EXYNOS_NOCP
        tristate "EXYNOS NoC (Network On Chip) Probe DEVFREQ event Driver"
        depends on ARCH_EXYNOS || COMPILE_TEST
        select PM_OPP
+       select REGMAP_MMIO
        help
          This add the devfreq-event driver for Exynos SoC. It provides NoC
          (Network on Chip) Probe counters to measure the bandwidth of AXI bus.
index a5841403bde8b64c314c921148b6189d7dcef82f..49e712aca0c15367e2168d91a3542cdb6ff2ff5b 100644 (file)
@@ -176,9 +176,6 @@ static int exynos_nocp_get_event(struct devfreq_event_dev *edev,
        return 0;
 
 out:
-       edata->load_count = 0;
-       edata->total_count = 0;
-
        dev_err(nocp->dev, "Failed to read the counter of NoC probe device\n");
 
        return ret;
index af63a6bcf564a53574eba818a9e6c2cd07ff9faa..141aefbe37ec93d1f4f38d1be5e2cf8d93266725 100644 (file)
@@ -306,6 +306,7 @@ config MMP_TDMA
        depends on ARCH_MMP || COMPILE_TEST
        select DMA_ENGINE
        select MMP_SRAM if ARCH_MMP
+       select GENERIC_ALLOCATOR
        help
          Support the MMP Two-Channel DMA engine.
          This engine used for MMP Audio DMA and pxa910 SQU.
index bac5f023013b23c92519f052f8523b6008524271..d5ba43a87a682b6e718d5e2ad7c804498bad61de 100644 (file)
@@ -317,6 +317,12 @@ static irqreturn_t cppi41_irq(int irq, void *data)
 
                while (val) {
                        u32 desc, len;
+                       int error;
+
+                       error = pm_runtime_get(cdd->ddev.dev);
+                       if (error < 0)
+                               dev_err(cdd->ddev.dev, "%s pm runtime get: %i\n",
+                                       __func__, error);
 
                        q_num = __fls(val);
                        val &= ~(1 << q_num);
@@ -338,7 +344,6 @@ static irqreturn_t cppi41_irq(int irq, void *data)
                        dma_cookie_complete(&c->txd);
                        dmaengine_desc_get_callback_invoke(&c->txd, NULL);
 
-                       /* Paired with cppi41_dma_issue_pending */
                        pm_runtime_mark_last_busy(cdd->ddev.dev);
                        pm_runtime_put_autosuspend(cdd->ddev.dev);
                }
@@ -362,8 +367,13 @@ static int cppi41_dma_alloc_chan_resources(struct dma_chan *chan)
        int error;
 
        error = pm_runtime_get_sync(cdd->ddev.dev);
-       if (error < 0)
+       if (error < 0) {
+               dev_err(cdd->ddev.dev, "%s pm runtime get: %i\n",
+                       __func__, error);
+               pm_runtime_put_noidle(cdd->ddev.dev);
+
                return error;
+       }
 
        dma_cookie_init(chan);
        dma_async_tx_descriptor_init(&c->txd, chan);
@@ -385,8 +395,11 @@ static void cppi41_dma_free_chan_resources(struct dma_chan *chan)
        int error;
 
        error = pm_runtime_get_sync(cdd->ddev.dev);
-       if (error < 0)
+       if (error < 0) {
+               pm_runtime_put_noidle(cdd->ddev.dev);
+
                return;
+       }
 
        WARN_ON(!list_empty(&cdd->pending));
 
@@ -460,9 +473,9 @@ static void cppi41_dma_issue_pending(struct dma_chan *chan)
        struct cppi41_dd *cdd = c->cdd;
        int error;
 
-       /* PM runtime paired with dmaengine_desc_get_callback_invoke */
        error = pm_runtime_get(cdd->ddev.dev);
        if ((error != -EINPROGRESS) && error < 0) {
+               pm_runtime_put_noidle(cdd->ddev.dev);
                dev_err(cdd->ddev.dev, "Failed to pm_runtime_get: %i\n",
                        error);
 
@@ -473,6 +486,9 @@ static void cppi41_dma_issue_pending(struct dma_chan *chan)
                push_desc_queue(c);
        else
                pending_desc(c);
+
+       pm_runtime_mark_last_busy(cdd->ddev.dev);
+       pm_runtime_put_autosuspend(cdd->ddev.dev);
 }
 
 static u32 get_host_pd0(u32 length)
@@ -1059,8 +1075,8 @@ err_chans:
        deinit_cppi41(dev, cdd);
 err_init_cppi:
        pm_runtime_dont_use_autosuspend(dev);
-       pm_runtime_put_sync(dev);
 err_get_sync:
+       pm_runtime_put_sync(dev);
        pm_runtime_disable(dev);
        iounmap(cdd->usbss_mem);
        iounmap(cdd->ctrl_mem);
@@ -1072,7 +1088,12 @@ err_get_sync:
 static int cppi41_dma_remove(struct platform_device *pdev)
 {
        struct cppi41_dd *cdd = platform_get_drvdata(pdev);
+       int error;
 
+       error = pm_runtime_get_sync(&pdev->dev);
+       if (error < 0)
+               dev_err(&pdev->dev, "%s could not pm_runtime_get: %i\n",
+                       __func__, error);
        of_dma_controller_free(pdev->dev.of_node);
        dma_async_device_unregister(&cdd->ddev);
 
index e18a58068bca75862bfcaee08952575925d284a7..77242b37ef87866acf4681c0934c906ecb75de77 100644 (file)
@@ -1628,6 +1628,7 @@ static int edma_alloc_chan_resources(struct dma_chan *chan)
        if (echan->slot[0] < 0) {
                dev_err(dev, "Entry slot allocation failed for channel %u\n",
                        EDMA_CHAN_SLOT(echan->ch_num));
+               ret = echan->slot[0];
                goto err_slot;
        }
 
index 83461994e4181a67b73c5ceaa7bfb24bada1f9d5..a2358780ab2c3ca6ea52c834fa692a644e35c0cb 100644 (file)
@@ -578,7 +578,7 @@ static struct dma_async_tx_descriptor *sun6i_dma_prep_dma_memcpy(
 
        burst = convert_burst(8);
        width = convert_buswidth(DMA_SLAVE_BUSWIDTH_4_BYTES);
-       v_lli->cfg |= DMA_CHAN_CFG_SRC_DRQ(DRQ_SDRAM) |
+       v_lli->cfg = DMA_CHAN_CFG_SRC_DRQ(DRQ_SDRAM) |
                DMA_CHAN_CFG_DST_DRQ(DRQ_SDRAM) |
                DMA_CHAN_CFG_DST_LINEAR_MODE |
                DMA_CHAN_CFG_SRC_LINEAR_MODE |
index ca957a5f4291228a77c85b65cf08f0fb690bf522..b8cde096a808f552fde97e0cb85c101ab4139881 100644 (file)
@@ -51,7 +51,7 @@ static void qcom_usb_extcon_detect_cable(struct work_struct *work)
        if (ret)
                return;
 
-       extcon_set_state(info->edev, EXTCON_USB_HOST, !id);
+       extcon_set_state_sync(info->edev, EXTCON_USB_HOST, !id);
 }
 
 static irqreturn_t qcom_usb_irq_handler(int irq, void *dev_id)
index 03715e7d9d92125f15e8ade08bc58350e241c506..5d3640264f2da1322c76b0aacf01bd2d5cd0f00e 100644 (file)
@@ -73,13 +73,13 @@ struct rfc2734_header {
 
 #define fwnet_get_hdr_lf(h)            (((h)->w0 & 0xc0000000) >> 30)
 #define fwnet_get_hdr_ether_type(h)    (((h)->w0 & 0x0000ffff))
-#define fwnet_get_hdr_dg_size(h)       (((h)->w0 & 0x0fff0000) >> 16)
+#define fwnet_get_hdr_dg_size(h)       ((((h)->w0 & 0x0fff0000) >> 16) + 1)
 #define fwnet_get_hdr_fg_off(h)                (((h)->w0 & 0x00000fff))
 #define fwnet_get_hdr_dgl(h)           (((h)->w1 & 0xffff0000) >> 16)
 
-#define fwnet_set_hdr_lf(lf)           ((lf)  << 30)
+#define fwnet_set_hdr_lf(lf)           ((lf) << 30)
 #define fwnet_set_hdr_ether_type(et)   (et)
-#define fwnet_set_hdr_dg_size(dgs)     ((dgs) << 16)
+#define fwnet_set_hdr_dg_size(dgs)     (((dgs) - 1) << 16)
 #define fwnet_set_hdr_fg_off(fgo)      (fgo)
 
 #define fwnet_set_hdr_dgl(dgl)         ((dgl) << 16)
@@ -578,6 +578,9 @@ static int fwnet_incoming_packet(struct fwnet_device *dev, __be32 *buf, int len,
        int retval;
        u16 ether_type;
 
+       if (len <= RFC2374_UNFRAG_HDR_SIZE)
+               return 0;
+
        hdr.w0 = be32_to_cpu(buf[0]);
        lf = fwnet_get_hdr_lf(&hdr);
        if (lf == RFC2374_HDR_UNFRAG) {
@@ -602,7 +605,12 @@ static int fwnet_incoming_packet(struct fwnet_device *dev, __be32 *buf, int len,
                return fwnet_finish_incoming_packet(net, skb, source_node_id,
                                                    is_broadcast, ether_type);
        }
+
        /* A datagram fragment has been received, now the fun begins. */
+
+       if (len <= RFC2374_FRAG_HDR_SIZE)
+               return 0;
+
        hdr.w1 = ntohl(buf[1]);
        buf += 2;
        len -= RFC2374_FRAG_HDR_SIZE;
@@ -614,7 +622,10 @@ static int fwnet_incoming_packet(struct fwnet_device *dev, __be32 *buf, int len,
                fg_off = fwnet_get_hdr_fg_off(&hdr);
        }
        datagram_label = fwnet_get_hdr_dgl(&hdr);
-       dg_size = fwnet_get_hdr_dg_size(&hdr); /* ??? + 1 */
+       dg_size = fwnet_get_hdr_dg_size(&hdr);
+
+       if (fg_off + len > dg_size)
+               return 0;
 
        spin_lock_irqsave(&dev->lock, flags);
 
@@ -722,6 +733,22 @@ static void fwnet_receive_packet(struct fw_card *card, struct fw_request *r,
        fw_send_response(card, r, rcode);
 }
 
+static int gasp_source_id(__be32 *p)
+{
+       return be32_to_cpu(p[0]) >> 16;
+}
+
+static u32 gasp_specifier_id(__be32 *p)
+{
+       return (be32_to_cpu(p[0]) & 0xffff) << 8 |
+              (be32_to_cpu(p[1]) & 0xff000000) >> 24;
+}
+
+static u32 gasp_version(__be32 *p)
+{
+       return be32_to_cpu(p[1]) & 0xffffff;
+}
+
 static void fwnet_receive_broadcast(struct fw_iso_context *context,
                u32 cycle, size_t header_length, void *header, void *data)
 {
@@ -731,9 +758,6 @@ static void fwnet_receive_broadcast(struct fw_iso_context *context,
        __be32 *buf_ptr;
        int retval;
        u32 length;
-       u16 source_node_id;
-       u32 specifier_id;
-       u32 ver;
        unsigned long offset;
        unsigned long flags;
 
@@ -750,22 +774,17 @@ static void fwnet_receive_broadcast(struct fw_iso_context *context,
 
        spin_unlock_irqrestore(&dev->lock, flags);
 
-       specifier_id =    (be32_to_cpu(buf_ptr[0]) & 0xffff) << 8
-                       | (be32_to_cpu(buf_ptr[1]) & 0xff000000) >> 24;
-       ver = be32_to_cpu(buf_ptr[1]) & 0xffffff;
-       source_node_id = be32_to_cpu(buf_ptr[0]) >> 16;
-
-       if (specifier_id == IANA_SPECIFIER_ID &&
-           (ver == RFC2734_SW_VERSION
+       if (length > IEEE1394_GASP_HDR_SIZE &&
+           gasp_specifier_id(buf_ptr) == IANA_SPECIFIER_ID &&
+           (gasp_version(buf_ptr) == RFC2734_SW_VERSION
 #if IS_ENABLED(CONFIG_IPV6)
-            || ver == RFC3146_SW_VERSION
+            || gasp_version(buf_ptr) == RFC3146_SW_VERSION
 #endif
-           )) {
-               buf_ptr += 2;
-               length -= IEEE1394_GASP_HDR_SIZE;
-               fwnet_incoming_packet(dev, buf_ptr, length, source_node_id,
+           ))
+               fwnet_incoming_packet(dev, buf_ptr + 2,
+                                     length - IEEE1394_GASP_HDR_SIZE,
+                                     gasp_source_id(buf_ptr),
                                      context->card->generation, true);
-       }
 
        packet.payload_length = dev->rcv_buffer_size;
        packet.interrupt = 1;
@@ -1465,7 +1484,7 @@ static int fwnet_probe(struct fw_unit *unit,
 
        net->mtu = 1500U;
        net->min_mtu = ETH_MIN_MTU;
-       net->max_mtu = ETH_MAX_MTU;
+       net->max_mtu = 0xfff;
 
        /* Set our hardware address while we're at it */
        ha = (union fwnet_hwaddr *)net->dev_addr;
index 631c977b0da5f817b7ecf950d94f90198a53abbe..180f0a96528cee063d3da64fa620d6de0eebdfcf 100644 (file)
@@ -566,6 +566,11 @@ add_card(struct pci_dev *dev, const struct pci_device_id *unused)
 
        lynx->registers = ioremap_nocache(pci_resource_start(dev, 0),
                                          PCILYNX_MAX_REGISTER);
+       if (lynx->registers == NULL) {
+               dev_err(&dev->dev, "Failed to map registers\n");
+               ret = -ENOMEM;
+               goto fail_deallocate_lynx;
+       }
 
        lynx->rcv_start_pcl = pci_alloc_consistent(lynx->pci_device,
                                sizeof(struct pcl), &lynx->rcv_start_pcl_bus);
@@ -578,7 +583,7 @@ add_card(struct pci_dev *dev, const struct pci_device_id *unused)
            lynx->rcv_buffer == NULL) {
                dev_err(&dev->dev, "Failed to allocate receive buffer\n");
                ret = -ENOMEM;
-               goto fail_deallocate;
+               goto fail_deallocate_buffers;
        }
        lynx->rcv_start_pcl->next       = cpu_to_le32(lynx->rcv_pcl_bus);
        lynx->rcv_pcl->next             = cpu_to_le32(PCL_NEXT_INVALID);
@@ -641,7 +646,7 @@ add_card(struct pci_dev *dev, const struct pci_device_id *unused)
                dev_err(&dev->dev,
                        "Failed to allocate shared interrupt %d\n", dev->irq);
                ret = -EIO;
-               goto fail_deallocate;
+               goto fail_deallocate_buffers;
        }
 
        lynx->misc.parent = &dev->dev;
@@ -668,7 +673,7 @@ fail_free_irq:
        reg_write(lynx, PCI_INT_ENABLE, 0);
        free_irq(lynx->pci_device->irq, lynx);
 
-fail_deallocate:
+fail_deallocate_buffers:
        if (lynx->rcv_start_pcl)
                pci_free_consistent(lynx->pci_device, sizeof(struct pcl),
                                lynx->rcv_start_pcl, lynx->rcv_start_pcl_bus);
@@ -679,6 +684,8 @@ fail_deallocate:
                pci_free_consistent(lynx->pci_device, PAGE_SIZE,
                                lynx->rcv_buffer, lynx->rcv_buffer_bus);
        iounmap(lynx->registers);
+
+fail_deallocate_lynx:
        kfree(lynx);
 
 fail_disable:
index c06945160a4154a5f8d518192b7adb16c4ab325d..5e23e2d305e71db52a7f9e27a63ce760f4c752fd 100644 (file)
@@ -11,7 +11,7 @@ cflags-$(CONFIG_X86)          += -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2 \
                                   -mno-mmx -mno-sse
 
 cflags-$(CONFIG_ARM64)         := $(subst -pg,,$(KBUILD_CFLAGS))
-cflags-$(CONFIG_ARM)           := $(subst -pg,,$(KBUILD_CFLAGS)) \
+cflags-$(CONFIG_ARM)           := $(subst -pg,,$(KBUILD_CFLAGS)) -g0 \
                                   -fno-builtin -fpic -mno-single-pic-base
 
 cflags-$(CONFIG_EFI_ARMSTUB)   += -I$(srctree)/scripts/dtc/libfdt
@@ -79,5 +79,6 @@ quiet_cmd_stubcopy = STUBCPY $@
 # decompressor. So move our .data to .data.efistub, which is preserved
 # explicitly by the decompressor linker script.
 #
-STUBCOPY_FLAGS-$(CONFIG_ARM)   += --rename-section .data=.data.efistub
+STUBCOPY_FLAGS-$(CONFIG_ARM)   += --rename-section .data=.data.efistub \
+                                  -R ___ksymtab+sort -R ___kcrctab+sort
 STUBCOPY_RELOC-$(CONFIG_ARM)   := R_ARM_ABS
index 26ee00f6bd5829c04d66b4621643375959c161da..ed37e5908b910cd51cb378ffc171fe2a9104c082 100644 (file)
@@ -22,10 +22,6 @@ menuconfig GPIOLIB
 
 if GPIOLIB
 
-config GPIO_DEVRES
-       def_bool y
-       depends on HAS_IOMEM
-
 config OF_GPIO
        def_bool y
        depends on OF
@@ -284,7 +280,7 @@ config GPIO_MM_LANTIQ
 
 config GPIO_MOCKUP
        tristate "GPIO Testing Driver"
-       depends on GPIOLIB
+       depends on GPIOLIB && SYSFS
        select GPIO_SYSFS
        help
          This enables GPIO Testing driver, which provides a way to test GPIO
index ab28a2daeacc92fb1dcd648bbecb09c0b2ce8fc4..d074c2299393dc9cef3456b0068ff2d229677c27 100644 (file)
@@ -2,7 +2,7 @@
 
 ccflags-$(CONFIG_DEBUG_GPIO)   += -DDEBUG
 
-obj-$(CONFIG_GPIO_DEVRES)      += devres.o
+obj-$(CONFIG_GPIOLIB)          += devres.o
 obj-$(CONFIG_GPIOLIB)          += gpiolib.o
 obj-$(CONFIG_GPIOLIB)          += gpiolib-legacy.o
 obj-$(CONFIG_OF_GPIO)          += gpiolib-of.o
index 9457e2022bf6c1e0c4f8cc4cd11acf224ff68e54..dc37dbe4b46d8889bfaddeac0c492dd6ed56090c 100644 (file)
@@ -219,6 +219,7 @@ static const struct of_device_id ath79_gpio_of_match[] = {
        { .compatible = "qca,ar9340-gpio" },
        {},
 };
+MODULE_DEVICE_TABLE(of, ath79_gpio_of_match);
 
 static int ath79_gpio_probe(struct platform_device *pdev)
 {
index 425501c39527038509a0c3af752598d0c53ee049..793518a30afe6c97a97bfe5db1d62cb1651077c1 100644 (file)
@@ -239,7 +239,7 @@ static int mpc8xxx_gpio_irq_map(struct irq_domain *h, unsigned int irq,
                                irq_hw_number_t hwirq)
 {
        irq_set_chip_data(irq, h->host_data);
-       irq_set_chip_and_handler(irq, &mpc8xxx_irq_chip, handle_level_irq);
+       irq_set_chip_and_handler(irq, &mpc8xxx_irq_chip, handle_edge_irq);
 
        return 0;
 }
index cd5dc27320a273ab52bc119f57f2f87f9cf240b2..1ed6132b993c6fbcf28997c4327572011c054930 100644 (file)
@@ -293,10 +293,10 @@ static void mvebu_gpio_irq_ack(struct irq_data *d)
 {
        struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
        struct mvebu_gpio_chip *mvchip = gc->private;
-       u32 mask = ~(1 << (d->irq - gc->irq_base));
+       u32 mask = d->mask;
 
        irq_gc_lock(gc);
-       writel_relaxed(mask, mvebu_gpioreg_edge_cause(mvchip));
+       writel_relaxed(~mask, mvebu_gpioreg_edge_cause(mvchip));
        irq_gc_unlock(gc);
 }
 
@@ -305,7 +305,7 @@ static void mvebu_gpio_edge_irq_mask(struct irq_data *d)
        struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
        struct mvebu_gpio_chip *mvchip = gc->private;
        struct irq_chip_type *ct = irq_data_get_chip_type(d);
-       u32 mask = 1 << (d->irq - gc->irq_base);
+       u32 mask = d->mask;
 
        irq_gc_lock(gc);
        ct->mask_cache_priv &= ~mask;
@@ -319,8 +319,7 @@ static void mvebu_gpio_edge_irq_unmask(struct irq_data *d)
        struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
        struct mvebu_gpio_chip *mvchip = gc->private;
        struct irq_chip_type *ct = irq_data_get_chip_type(d);
-
-       u32 mask = 1 << (d->irq - gc->irq_base);
+       u32 mask = d->mask;
 
        irq_gc_lock(gc);
        ct->mask_cache_priv |= mask;
@@ -333,8 +332,7 @@ static void mvebu_gpio_level_irq_mask(struct irq_data *d)
        struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
        struct mvebu_gpio_chip *mvchip = gc->private;
        struct irq_chip_type *ct = irq_data_get_chip_type(d);
-
-       u32 mask = 1 << (d->irq - gc->irq_base);
+       u32 mask = d->mask;
 
        irq_gc_lock(gc);
        ct->mask_cache_priv &= ~mask;
@@ -347,8 +345,7 @@ static void mvebu_gpio_level_irq_unmask(struct irq_data *d)
        struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
        struct mvebu_gpio_chip *mvchip = gc->private;
        struct irq_chip_type *ct = irq_data_get_chip_type(d);
-
-       u32 mask = 1 << (d->irq - gc->irq_base);
+       u32 mask = d->mask;
 
        irq_gc_lock(gc);
        ct->mask_cache_priv |= mask;
@@ -462,7 +459,7 @@ static void mvebu_gpio_irq_handler(struct irq_desc *desc)
        for (i = 0; i < mvchip->chip.ngpio; i++) {
                int irq;
 
-               irq = mvchip->irqbase + i;
+               irq = irq_find_mapping(mvchip->domain, i);
 
                if (!(cause & (1 << i)))
                        continue;
@@ -655,6 +652,7 @@ static int mvebu_gpio_probe(struct platform_device *pdev)
        struct irq_chip_type *ct;
        struct clk *clk;
        unsigned int ngpios;
+       bool have_irqs;
        int soc_variant;
        int i, cpu, id;
        int err;
@@ -665,6 +663,9 @@ static int mvebu_gpio_probe(struct platform_device *pdev)
        else
                soc_variant = MVEBU_GPIO_SOC_VARIANT_ORION;
 
+       /* Some gpio controllers do not provide irq support */
+       have_irqs = of_irq_count(np) != 0;
+
        mvchip = devm_kzalloc(&pdev->dev, sizeof(struct mvebu_gpio_chip),
                              GFP_KERNEL);
        if (!mvchip)
@@ -697,7 +698,8 @@ static int mvebu_gpio_probe(struct platform_device *pdev)
        mvchip->chip.get = mvebu_gpio_get;
        mvchip->chip.direction_output = mvebu_gpio_direction_output;
        mvchip->chip.set = mvebu_gpio_set;
-       mvchip->chip.to_irq = mvebu_gpio_to_irq;
+       if (have_irqs)
+               mvchip->chip.to_irq = mvebu_gpio_to_irq;
        mvchip->chip.base = id * MVEBU_MAX_GPIO_PER_BANK;
        mvchip->chip.ngpio = ngpios;
        mvchip->chip.can_sleep = false;
@@ -758,34 +760,30 @@ static int mvebu_gpio_probe(struct platform_device *pdev)
        devm_gpiochip_add_data(&pdev->dev, &mvchip->chip, mvchip);
 
        /* Some gpio controllers do not provide irq support */
-       if (!of_irq_count(np))
+       if (!have_irqs)
                return 0;
 
-       /* Setup the interrupt handlers. Each chip can have up to 4
-        * interrupt handlers, with each handler dealing with 8 GPIO
-        * pins. */
-       for (i = 0; i < 4; i++) {
-               int irq = platform_get_irq(pdev, i);
-
-               if (irq < 0)
-                       continue;
-               irq_set_chained_handler_and_data(irq, mvebu_gpio_irq_handler,
-                                                mvchip);
-       }
-
-       mvchip->irqbase = irq_alloc_descs(-1, 0, ngpios, -1);
-       if (mvchip->irqbase < 0) {
-               dev_err(&pdev->dev, "no irqs\n");
-               return mvchip->irqbase;
+       mvchip->domain =
+           irq_domain_add_linear(np, ngpios, &irq_generic_chip_ops, NULL);
+       if (!mvchip->domain) {
+               dev_err(&pdev->dev, "couldn't allocate irq domain %s (DT).\n",
+                       mvchip->chip.label);
+               return -ENODEV;
        }
 
-       gc = irq_alloc_generic_chip("mvebu_gpio_irq", 2, mvchip->irqbase,
-                                   mvchip->membase, handle_level_irq);
-       if (!gc) {
-               dev_err(&pdev->dev, "Cannot allocate generic irq_chip\n");
-               return -ENOMEM;
+       err = irq_alloc_domain_generic_chips(
+           mvchip->domain, ngpios, 2, np->name, handle_level_irq,
+           IRQ_NOREQUEST | IRQ_NOPROBE | IRQ_LEVEL, 0, 0);
+       if (err) {
+               dev_err(&pdev->dev, "couldn't allocate irq chips %s (DT).\n",
+                       mvchip->chip.label);
+               goto err_domain;
        }
 
+       /* NOTE: The common accessors cannot be used because of the percpu
+        * access to the mask registers
+        */
+       gc = irq_get_domain_generic_chip(mvchip->domain, 0);
        gc->private = mvchip;
        ct = &gc->chip_types[0];
        ct->type = IRQ_TYPE_LEVEL_HIGH | IRQ_TYPE_LEVEL_LOW;
@@ -803,27 +801,23 @@ static int mvebu_gpio_probe(struct platform_device *pdev)
        ct->handler = handle_edge_irq;
        ct->chip.name = mvchip->chip.label;
 
-       irq_setup_generic_chip(gc, IRQ_MSK(ngpios), 0,
-                              IRQ_NOREQUEST, IRQ_LEVEL | IRQ_NOPROBE);
+       /* Setup the interrupt handlers. Each chip can have up to 4
+        * interrupt handlers, with each handler dealing with 8 GPIO
+        * pins.
+        */
+       for (i = 0; i < 4; i++) {
+               int irq = platform_get_irq(pdev, i);
 
-       /* Setup irq domain on top of the generic chip. */
-       mvchip->domain = irq_domain_add_simple(np, mvchip->chip.ngpio,
-                                              mvchip->irqbase,
-                                              &irq_domain_simple_ops,
-                                              mvchip);
-       if (!mvchip->domain) {
-               dev_err(&pdev->dev, "couldn't allocate irq domain %s (DT).\n",
-                       mvchip->chip.label);
-               err = -ENODEV;
-               goto err_generic_chip;
+               if (irq < 0)
+                       continue;
+               irq_set_chained_handler_and_data(irq, mvebu_gpio_irq_handler,
+                                                mvchip);
        }
 
        return 0;
 
-err_generic_chip:
-       irq_remove_generic_chip(gc, IRQ_MSK(ngpios), IRQ_NOREQUEST,
-                               IRQ_LEVEL | IRQ_NOPROBE);
-       kfree(gc);
+err_domain:
+       irq_domain_remove(mvchip->domain);
 
        return err;
 }
index b9daa0bf32a46375784c2f6ade582ba4c46c4a74..ee1724806f46db13d7eb2b41ce900fd2043b3d91 100644 (file)
@@ -308,8 +308,10 @@ static int mxs_gpio_probe(struct platform_device *pdev)
        writel(~0U, port->base + PINCTRL_IRQSTAT(port) + MXS_CLR);
 
        irq_base = irq_alloc_descs(-1, 0, 32, numa_node_id());
-       if (irq_base < 0)
-               return irq_base;
+       if (irq_base < 0) {
+               err = irq_base;
+               goto out_iounmap;
+       }
 
        port->domain = irq_domain_add_legacy(np, 32, irq_base, 0,
                                             &irq_domain_simple_ops, NULL);
@@ -349,6 +351,8 @@ out_irqdomain_remove:
        irq_domain_remove(port->domain);
 out_irqdesc_free:
        irq_free_descs(irq_base, 32);
+out_iounmap:
+       iounmap(port->base);
        return err;
 }
 
index 45c8817d068c60649ee9e5648dbf9451f3bf5667..fe731f09425712b546655d7097a84b7a47a35d68 100644 (file)
@@ -372,14 +372,15 @@ static void pca953x_gpio_set_multiple(struct gpio_chip *gc,
 
        bank_shift = fls((chip->gpio_chip.ngpio - 1) / BANK_SZ);
 
-       memcpy(reg_val, chip->reg_output, NBANK(chip));
        mutex_lock(&chip->i2c_lock);
+       memcpy(reg_val, chip->reg_output, NBANK(chip));
        for (bank = 0; bank < NBANK(chip); bank++) {
                bank_mask = mask[bank / sizeof(*mask)] >>
                           ((bank % sizeof(*mask)) * 8);
                if (bank_mask) {
                        bank_val = bits[bank / sizeof(*bits)] >>
                                  ((bank % sizeof(*bits)) * 8);
+                       bank_val &= bank_mask;
                        reg_val[bank] = (reg_val[bank] & ~bank_mask) | bank_val;
                }
        }
@@ -607,7 +608,6 @@ static int pca953x_irq_setup(struct pca953x_chip *chip,
 
        if (client->irq && irq_base != -1
                        && (chip->driver_data & PCA_INT)) {
-
                ret = pca953x_read_regs(chip,
                                        chip->regs->input, chip->irq_stat);
                if (ret)
@@ -794,6 +794,22 @@ static int pca953x_probe(struct i2c_client *client,
        }
 
        mutex_init(&chip->i2c_lock);
+       /*
+        * In case we have an i2c-mux controlled by a GPIO provided by an
+        * expander using the same driver higher on the device tree, read the
+        * i2c adapter nesting depth and use the retrieved value as lockdep
+        * subclass for chip->i2c_lock.
+        *
+        * REVISIT: This solution is not complete. It protects us from lockdep
+        * false positives when the expander controlling the i2c-mux is on
+        * a different level on the device tree, but not when it's on the same
+        * level on a different branch (in which case the subclass number
+        * would be the same).
+        *
+        * TODO: Once a correct solution is developed, a similar fix should be
+        * applied to all other i2c-controlled GPIO expanders (and potentially
+        * regmap-i2c).
+        */
        lockdep_set_subclass(&chip->i2c_lock,
                             i2c_adapter_depth(client->adapter));
 
index e7d422a6b90bd71427694b8f3efc607d78c254cd..5b0042776ec7081f49d3eaff1dc56fe08733f3d1 100644 (file)
@@ -409,7 +409,7 @@ static irqreturn_t stmpe_gpio_irq(int irq, void *dev)
                 * 801/1801/1600, bits are cleared when read.
                 * Edge detect register is not present on 801/1600/1801
                 */
-               if (stmpe->partnum != STMPE801 || stmpe->partnum != STMPE1600 ||
+               if (stmpe->partnum != STMPE801 && stmpe->partnum != STMPE1600 &&
                    stmpe->partnum != STMPE1801) {
                        stmpe_reg_write(stmpe, statmsbreg + i, status[i]);
                        stmpe_reg_write(stmpe,
index 5a5a6cb00eea9cc7e21666812ed6571fa55ff92d..d6e21f1a70a9dc685aa8dfc786230ad620022691 100644 (file)
@@ -97,7 +97,7 @@ static int tc3589x_gpio_get_direction(struct gpio_chip *chip,
        if (ret < 0)
                return ret;
 
-       return !!(ret & BIT(pos));
+       return !(ret & BIT(pos));
 }
 
 static int tc3589x_gpio_set_single_ended(struct gpio_chip *chip,
index 99256115bea55c12710ae6c19a94c09a8e01d918..c2a80b4cbf32c2ce3b83eedf82339a8ad0177b16 100644 (file)
@@ -66,6 +66,7 @@ static const struct of_device_id ts4800_gpio_of_match[] = {
        { .compatible = "technologic,ts4800-gpio", },
        {},
 };
+MODULE_DEVICE_TABLE(of, ts4800_gpio_of_match);
 
 static struct platform_driver ts4800_gpio_driver = {
        .driver = {
index 58ece201b8e62328741a1f62a1fbb33dfedbf996..72a4b326fd0da2f1d84f9e05a97416ff5761c9d3 100644 (file)
@@ -653,14 +653,17 @@ int acpi_dev_gpio_irq_get(struct acpi_device *adev, int index)
 {
        int idx, i;
        unsigned int irq_flags;
+       int ret = -ENOENT;
 
        for (i = 0, idx = 0; idx <= index; i++) {
                struct acpi_gpio_info info;
                struct gpio_desc *desc;
 
                desc = acpi_get_gpiod_by_index(adev, NULL, i, &info);
-               if (IS_ERR(desc))
+               if (IS_ERR(desc)) {
+                       ret = PTR_ERR(desc);
                        break;
+               }
                if (info.gpioint && idx++ == index) {
                        int irq = gpiod_to_irq(desc);
 
@@ -679,7 +682,7 @@ int acpi_dev_gpio_irq_get(struct acpi_device *adev, int index)
                }
 
        }
-       return -ENOENT;
+       return ret;
 }
 EXPORT_SYMBOL_GPL(acpi_dev_gpio_irq_get);
 
index ecad3f0e3b772772440bcae1735c3430a71f030d..193f15d50bbaa33e9728f98c462f8fddaf96971b 100644 (file)
 
 #include "gpiolib.h"
 
-static int of_gpiochip_match_node(struct gpio_chip *chip, void *data)
+static int of_gpiochip_match_node_and_xlate(struct gpio_chip *chip, void *data)
 {
-       return chip->gpiodev->dev.of_node == data;
+       struct of_phandle_args *gpiospec = data;
+
+       return chip->gpiodev->dev.of_node == gpiospec->np &&
+                               chip->of_xlate(chip, gpiospec, NULL) >= 0;
 }
 
-static struct gpio_chip *of_find_gpiochip_by_node(struct device_node *np)
+static struct gpio_chip *of_find_gpiochip_by_xlate(
+                                       struct of_phandle_args *gpiospec)
 {
-       return gpiochip_find(np, of_gpiochip_match_node);
+       return gpiochip_find(gpiospec, of_gpiochip_match_node_and_xlate);
 }
 
 static struct gpio_desc *of_xlate_and_get_gpiod_flags(struct gpio_chip *chip,
@@ -79,7 +83,7 @@ struct gpio_desc *of_get_named_gpiod_flags(struct device_node *np,
                return ERR_PTR(ret);
        }
 
-       chip = of_find_gpiochip_by_node(gpiospec.np);
+       chip = of_find_gpiochip_by_xlate(&gpiospec);
        if (!chip) {
                desc = ERR_PTR(-EPROBE_DEFER);
                goto out;
index f0fc3a0d37c829de62e3c136f37cdb7a835a11b7..868128a676bae832090c2c18b1f45e94c2996a5f 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/uaccess.h>
 #include <linux/compat.h>
 #include <linux/anon_inodes.h>
+#include <linux/file.h>
 #include <linux/kfifo.h>
 #include <linux/poll.h>
 #include <linux/timekeeping.h>
@@ -333,6 +334,13 @@ struct linehandle_state {
        u32 numdescs;
 };
 
+#define GPIOHANDLE_REQUEST_VALID_FLAGS \
+       (GPIOHANDLE_REQUEST_INPUT | \
+       GPIOHANDLE_REQUEST_OUTPUT | \
+       GPIOHANDLE_REQUEST_ACTIVE_LOW | \
+       GPIOHANDLE_REQUEST_OPEN_DRAIN | \
+       GPIOHANDLE_REQUEST_OPEN_SOURCE)
+
 static long linehandle_ioctl(struct file *filep, unsigned int cmd,
                             unsigned long arg)
 {
@@ -344,6 +352,8 @@ static long linehandle_ioctl(struct file *filep, unsigned int cmd,
        if (cmd == GPIOHANDLE_GET_LINE_VALUES_IOCTL) {
                int val;
 
+               memset(&ghd, 0, sizeof(ghd));
+
                /* TODO: check if descriptors are really input */
                for (i = 0; i < lh->numdescs; i++) {
                        val = gpiod_get_value_cansleep(lh->descs[i]);
@@ -414,6 +424,7 @@ static int linehandle_create(struct gpio_device *gdev, void __user *ip)
 {
        struct gpiohandle_request handlereq;
        struct linehandle_state *lh;
+       struct file *file;
        int fd, i, ret;
 
        if (copy_from_user(&handlereq, ip, sizeof(handlereq)))
@@ -444,6 +455,17 @@ static int linehandle_create(struct gpio_device *gdev, void __user *ip)
                u32 lflags = handlereq.flags;
                struct gpio_desc *desc;
 
+               if (offset >= gdev->ngpio) {
+                       ret = -EINVAL;
+                       goto out_free_descs;
+               }
+
+               /* Return an error if a unknown flag is set */
+               if (lflags & ~GPIOHANDLE_REQUEST_VALID_FLAGS) {
+                       ret = -EINVAL;
+                       goto out_free_descs;
+               }
+
                desc = &gdev->descs[offset];
                ret = gpiod_request(desc, lh->label);
                if (ret)
@@ -479,26 +501,41 @@ static int linehandle_create(struct gpio_device *gdev, void __user *ip)
        i--;
        lh->numdescs = handlereq.lines;
 
-       fd = anon_inode_getfd("gpio-linehandle",
-                             &linehandle_fileops,
-                             lh,
-                             O_RDONLY | O_CLOEXEC);
+       fd = get_unused_fd_flags(O_RDONLY | O_CLOEXEC);
        if (fd < 0) {
                ret = fd;
                goto out_free_descs;
        }
 
+       file = anon_inode_getfile("gpio-linehandle",
+                                 &linehandle_fileops,
+                                 lh,
+                                 O_RDONLY | O_CLOEXEC);
+       if (IS_ERR(file)) {
+               ret = PTR_ERR(file);
+               goto out_put_unused_fd;
+       }
+
        handlereq.fd = fd;
        if (copy_to_user(ip, &handlereq, sizeof(handlereq))) {
-               ret = -EFAULT;
-               goto out_free_descs;
+               /*
+                * fput() will trigger the release() callback, so do not go onto
+                * the regular error cleanup path here.
+                */
+               fput(file);
+               put_unused_fd(fd);
+               return -EFAULT;
        }
 
+       fd_install(fd, file);
+
        dev_dbg(&gdev->dev, "registered chardev handle for %d lines\n",
                lh->numdescs);
 
        return 0;
 
+out_put_unused_fd:
+       put_unused_fd(fd);
 out_free_descs:
        for (; i >= 0; i--)
                gpiod_free(lh->descs[i]);
@@ -536,6 +573,10 @@ struct lineevent_state {
        struct mutex read_lock;
 };
 
+#define GPIOEVENT_REQUEST_VALID_FLAGS \
+       (GPIOEVENT_REQUEST_RISING_EDGE | \
+       GPIOEVENT_REQUEST_FALLING_EDGE)
+
 static unsigned int lineevent_poll(struct file *filep,
                                   struct poll_table_struct *wait)
 {
@@ -623,6 +664,8 @@ static long lineevent_ioctl(struct file *filep, unsigned int cmd,
        if (cmd == GPIOHANDLE_GET_LINE_VALUES_IOCTL) {
                int val;
 
+               memset(&ghd, 0, sizeof(ghd));
+
                val = gpiod_get_value_cansleep(le->desc);
                if (val < 0)
                        return val;
@@ -695,6 +738,7 @@ static int lineevent_create(struct gpio_device *gdev, void __user *ip)
        struct gpioevent_request eventreq;
        struct lineevent_state *le;
        struct gpio_desc *desc;
+       struct file *file;
        u32 offset;
        u32 lflags;
        u32 eflags;
@@ -726,6 +770,18 @@ static int lineevent_create(struct gpio_device *gdev, void __user *ip)
        lflags = eventreq.handleflags;
        eflags = eventreq.eventflags;
 
+       if (offset >= gdev->ngpio) {
+               ret = -EINVAL;
+               goto out_free_label;
+       }
+
+       /* Return an error if a unknown flag is set */
+       if ((lflags & ~GPIOHANDLE_REQUEST_VALID_FLAGS) ||
+           (eflags & ~GPIOEVENT_REQUEST_VALID_FLAGS)) {
+               ret = -EINVAL;
+               goto out_free_label;
+       }
+
        /* This is just wrong: we don't look for events on output lines */
        if (lflags & GPIOHANDLE_REQUEST_OUTPUT) {
                ret = -EINVAL;
@@ -777,23 +833,38 @@ static int lineevent_create(struct gpio_device *gdev, void __user *ip)
        if (ret)
                goto out_free_desc;
 
-       fd = anon_inode_getfd("gpio-event",
-                             &lineevent_fileops,
-                             le,
-                             O_RDONLY | O_CLOEXEC);
+       fd = get_unused_fd_flags(O_RDONLY | O_CLOEXEC);
        if (fd < 0) {
                ret = fd;
                goto out_free_irq;
        }
 
+       file = anon_inode_getfile("gpio-event",
+                                 &lineevent_fileops,
+                                 le,
+                                 O_RDONLY | O_CLOEXEC);
+       if (IS_ERR(file)) {
+               ret = PTR_ERR(file);
+               goto out_put_unused_fd;
+       }
+
        eventreq.fd = fd;
        if (copy_to_user(ip, &eventreq, sizeof(eventreq))) {
-               ret = -EFAULT;
-               goto out_free_irq;
+               /*
+                * fput() will trigger the release() callback, so do not go onto
+                * the regular error cleanup path here.
+                */
+               fput(file);
+               put_unused_fd(fd);
+               return -EFAULT;
        }
 
+       fd_install(fd, file);
+
        return 0;
 
+out_put_unused_fd:
+       put_unused_fd(fd);
 out_free_irq:
        free_irq(le->irq, le);
 out_free_desc:
@@ -823,6 +894,8 @@ static long gpio_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
        if (cmd == GPIO_GET_CHIPINFO_IOCTL) {
                struct gpiochip_info chipinfo;
 
+               memset(&chipinfo, 0, sizeof(chipinfo));
+
                strncpy(chipinfo.name, dev_name(&gdev->dev),
                        sizeof(chipinfo.name));
                chipinfo.name[sizeof(chipinfo.name)-1] = '\0';
@@ -839,7 +912,7 @@ static long gpio_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 
                if (copy_from_user(&lineinfo, ip, sizeof(lineinfo)))
                        return -EFAULT;
-               if (lineinfo.line_offset > gdev->ngpio)
+               if (lineinfo.line_offset >= gdev->ngpio)
                        return -EINVAL;
 
                desc = &gdev->descs[lineinfo.line_offset];
@@ -2664,8 +2737,11 @@ int gpiochip_lock_as_irq(struct gpio_chip *chip, unsigned int offset)
        if (IS_ERR(desc))
                return PTR_ERR(desc);
 
-       /* Flush direction if something changed behind our back */
-       if (chip->get_direction) {
+       /*
+        * If it's fast: flush the direction setting if something changed
+        * behind our back
+        */
+       if (!chip->can_sleep && chip->get_direction) {
                int dir = chip->get_direction(chip, offset);
 
                if (dir)
index 039b57e4644c3936bfe345426ff17754847b199b..496f72b134eb07777f623f62d10eda6743565fa1 100644 (file)
@@ -459,6 +459,7 @@ struct amdgpu_bo {
        u64                             metadata_flags;
        void                            *metadata;
        u32                             metadata_size;
+       unsigned                        prime_shared_count;
        /* list of all virtual address to which this bo
         * is associated to
         */
index 892d60fb225b56b25d7a44edec959ab56f45ed90..2057683f7b5998d3641cf20af336838ee72d27d2 100644 (file)
@@ -395,9 +395,12 @@ static int acp_hw_fini(void *handle)
 {
        int i, ret;
        struct device *dev;
-
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+       /* return early if no ACP */
+       if (!adev->acp.acp_genpd)
+               return 0;
+
        for (i = 0; i < ACP_DEVS ; i++) {
                dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i);
                ret = pm_genpd_remove_device(&adev->acp.acp_genpd->gpd, dev);
index dae35a96a694d0b6ffc3de5aae94e7344a69e565..02ca5dd978f664ad31cbe248674514e707ce143f 100644 (file)
@@ -34,6 +34,7 @@ struct amdgpu_atpx {
 
 static struct amdgpu_atpx_priv {
        bool atpx_detected;
+       bool bridge_pm_usable;
        /* handle for device - and atpx */
        acpi_handle dhandle;
        acpi_handle other_handle;
@@ -205,7 +206,11 @@ static int amdgpu_atpx_validate(struct amdgpu_atpx *atpx)
        atpx->is_hybrid = false;
        if (valid_bits & ATPX_MS_HYBRID_GFX_SUPPORTED) {
                printk("ATPX Hybrid Graphics\n");
-               atpx->functions.power_cntl = false;
+               /*
+                * Disable legacy PM methods only when pcie port PM is usable,
+                * otherwise the device might fail to power off or power on.
+                */
+               atpx->functions.power_cntl = !amdgpu_atpx_priv.bridge_pm_usable;
                atpx->is_hybrid = true;
        }
 
@@ -480,6 +485,7 @@ static int amdgpu_atpx_power_state(enum vga_switcheroo_client_id id,
  */
 static bool amdgpu_atpx_pci_probe_handle(struct pci_dev *pdev)
 {
+       struct pci_dev *parent_pdev = pci_upstream_bridge(pdev);
        acpi_handle dhandle, atpx_handle;
        acpi_status status;
 
@@ -494,6 +500,7 @@ static bool amdgpu_atpx_pci_probe_handle(struct pci_dev *pdev)
        }
        amdgpu_atpx_priv.dhandle = dhandle;
        amdgpu_atpx_priv.atpx.handle = atpx_handle;
+       amdgpu_atpx_priv.bridge_pm_usable = parent_pdev && parent_pdev->bridge_d3;
        return true;
 }
 
index 651115dcce12c6ff332eac87de6e9e5712b6d69c..c02db01f6583e620d885542f37a0da6f1780152e 100644 (file)
@@ -132,7 +132,7 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev,
                entry->priority = min(info[i].bo_priority,
                                      AMDGPU_BO_LIST_MAX_PRIORITY);
                entry->tv.bo = &entry->robj->tbo;
-               entry->tv.shared = true;
+               entry->tv.shared = !entry->robj->prime_shared_count;
 
                if (entry->robj->prefered_domains == AMDGPU_GEM_DOMAIN_GDS)
                        gds_obj = entry->robj;
index 7a8bfa34682fdd8a92959d43328a7de058b42b7f..662976292535856d57dde4788364c20f7fba38e5 100644 (file)
@@ -795,10 +795,19 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
                if (!adev->pm.fw) {
                        switch (adev->asic_type) {
                        case CHIP_TOPAZ:
-                               strcpy(fw_name, "amdgpu/topaz_smc.bin");
+                               if (((adev->pdev->device == 0x6900) && (adev->pdev->revision == 0x81)) ||
+                                   ((adev->pdev->device == 0x6900) && (adev->pdev->revision == 0x83)) ||
+                                   ((adev->pdev->device == 0x6907) && (adev->pdev->revision == 0x87)))
+                                       strcpy(fw_name, "amdgpu/topaz_k_smc.bin");
+                               else
+                                       strcpy(fw_name, "amdgpu/topaz_smc.bin");
                                break;
                        case CHIP_TONGA:
-                               strcpy(fw_name, "amdgpu/tonga_smc.bin");
+                               if (((adev->pdev->device == 0x6939) && (adev->pdev->revision == 0xf1)) ||
+                                   ((adev->pdev->device == 0x6938) && (adev->pdev->revision == 0xf1)))
+                                       strcpy(fw_name, "amdgpu/tonga_k_smc.bin");
+                               else
+                                       strcpy(fw_name, "amdgpu/tonga_smc.bin");
                                break;
                        case CHIP_FIJI:
                                strcpy(fw_name, "amdgpu/fiji_smc.bin");
index 2e3a0543760d0967164b2a5833cf9c39a63bf90b..086aa5c9c6348c45888d0b732f9417c0ec964cbe 100644 (file)
@@ -765,14 +765,20 @@ amdgpu_connector_lvds_detect(struct drm_connector *connector, bool force)
        return ret;
 }
 
-static void amdgpu_connector_destroy(struct drm_connector *connector)
+static void amdgpu_connector_unregister(struct drm_connector *connector)
 {
        struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
 
-       if (amdgpu_connector->ddc_bus->has_aux) {
+       if (amdgpu_connector->ddc_bus && amdgpu_connector->ddc_bus->has_aux) {
                drm_dp_aux_unregister(&amdgpu_connector->ddc_bus->aux);
                amdgpu_connector->ddc_bus->has_aux = false;
        }
+}
+
+static void amdgpu_connector_destroy(struct drm_connector *connector)
+{
+       struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
+
        amdgpu_connector_free_edid(connector);
        kfree(amdgpu_connector->con_priv);
        drm_connector_unregister(connector);
@@ -826,6 +832,7 @@ static const struct drm_connector_funcs amdgpu_connector_lvds_funcs = {
        .dpms = drm_helper_connector_dpms,
        .detect = amdgpu_connector_lvds_detect,
        .fill_modes = drm_helper_probe_single_connector_modes,
+       .early_unregister = amdgpu_connector_unregister,
        .destroy = amdgpu_connector_destroy,
        .set_property = amdgpu_connector_set_lcd_property,
 };
@@ -936,6 +943,7 @@ static const struct drm_connector_funcs amdgpu_connector_vga_funcs = {
        .dpms = drm_helper_connector_dpms,
        .detect = amdgpu_connector_vga_detect,
        .fill_modes = drm_helper_probe_single_connector_modes,
+       .early_unregister = amdgpu_connector_unregister,
        .destroy = amdgpu_connector_destroy,
        .set_property = amdgpu_connector_set_property,
 };
@@ -1203,6 +1211,7 @@ static const struct drm_connector_funcs amdgpu_connector_dvi_funcs = {
        .detect = amdgpu_connector_dvi_detect,
        .fill_modes = drm_helper_probe_single_connector_modes,
        .set_property = amdgpu_connector_set_property,
+       .early_unregister = amdgpu_connector_unregister,
        .destroy = amdgpu_connector_destroy,
        .force = amdgpu_connector_dvi_force,
 };
@@ -1493,6 +1502,7 @@ static const struct drm_connector_funcs amdgpu_connector_dp_funcs = {
        .detect = amdgpu_connector_dp_detect,
        .fill_modes = drm_helper_probe_single_connector_modes,
        .set_property = amdgpu_connector_set_property,
+       .early_unregister = amdgpu_connector_unregister,
        .destroy = amdgpu_connector_destroy,
        .force = amdgpu_connector_dvi_force,
 };
@@ -1502,6 +1512,7 @@ static const struct drm_connector_funcs amdgpu_connector_edp_funcs = {
        .detect = amdgpu_connector_dp_detect,
        .fill_modes = drm_helper_probe_single_connector_modes,
        .set_property = amdgpu_connector_set_lcd_property,
+       .early_unregister = amdgpu_connector_unregister,
        .destroy = amdgpu_connector_destroy,
        .force = amdgpu_connector_dvi_force,
 };
index b0f6e6957536a7827acc979856f3ef5280b3ce79..82dc8d20e28acfdd2c2c4c2e9dca8ea0cca88d53 100644 (file)
@@ -519,7 +519,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
                r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
                                           &duplicates);
                if (unlikely(r != 0)) {
-                       DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
+                       if (r != -ERESTARTSYS)
+                               DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
                        goto error_free_pages;
                }
 
index e203e5561107146badbb45a2260763c87e06f8e0..a5e2fcbef0f0f24f54bcf54164eb610c463edd49 100644 (file)
@@ -43,6 +43,9 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ctx *ctx)
                ctx->rings[i].sequence = 1;
                ctx->rings[i].fences = &ctx->fences[amdgpu_sched_jobs * i];
        }
+
+       ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
+
        /* create context entity for each ring */
        for (i = 0; i < adev->num_rings; i++) {
                struct amdgpu_ring *ring = adev->rings[i];
index 7dbe85d67d2682854e8158e35f9ee8f8e0e65d94..3161d77bf29989f5a6edb6d817973630e2b996b4 100644 (file)
@@ -658,12 +658,10 @@ static bool amdgpu_vpost_needed(struct amdgpu_device *adev)
                return false;
 
        if (amdgpu_passthrough(adev)) {
-               /* for FIJI: In whole GPU pass-through virtualization case
-                * old smc fw won't clear some registers (e.g. MEM_SIZE, BIOS_SCRATCH)
-                * so amdgpu_card_posted return false and driver will incorrectly skip vPost.
-                * but if we force vPost do in pass-through case, the driver reload will hang.
-                * whether doing vPost depends on amdgpu_card_posted if smc version is above
-                * 00160e00 for FIJI.
+               /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
+                * some old smc fw still need driver do vPost otherwise gpu hang, while
+                * those smc fw version above 22.15 doesn't have this flaw, so we force
+                * vpost executed for smc version below 22.15
                 */
                if (adev->asic_type == CHIP_FIJI) {
                        int err;
@@ -674,22 +672,11 @@ static bool amdgpu_vpost_needed(struct amdgpu_device *adev)
                                return true;
 
                        fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
-                       if (fw_ver >= 0x00160e00)
-                               return !amdgpu_card_posted(adev);
+                       if (fw_ver < 0x00160e00)
+                               return true;
                }
-       } else {
-               /* in bare-metal case, amdgpu_card_posted return false
-                * after system reboot/boot, and return true if driver
-                * reloaded.
-                * we shouldn't do vPost after driver reload otherwise GPU
-                * could hang.
-                */
-               if (amdgpu_card_posted(adev))
-                       return false;
        }
-
-       /* we assume vPost is neede for all other cases */
-       return true;
+       return !amdgpu_card_posted(adev);
 }
 
 /**
@@ -1408,16 +1395,6 @@ static int amdgpu_late_init(struct amdgpu_device *adev)
        for (i = 0; i < adev->num_ip_blocks; i++) {
                if (!adev->ip_block_status[i].valid)
                        continue;
-               if (adev->ip_blocks[i].type == AMD_IP_BLOCK_TYPE_UVD ||
-                       adev->ip_blocks[i].type == AMD_IP_BLOCK_TYPE_VCE)
-                       continue;
-               /* enable clockgating to save power */
-               r = adev->ip_blocks[i].funcs->set_clockgating_state((void *)adev,
-                                                                   AMD_CG_STATE_GATE);
-               if (r) {
-                       DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n", adev->ip_blocks[i].funcs->name, r);
-                       return r;
-               }
                if (adev->ip_blocks[i].funcs->late_init) {
                        r = adev->ip_blocks[i].funcs->late_init((void *)adev);
                        if (r) {
@@ -1426,6 +1403,18 @@ static int amdgpu_late_init(struct amdgpu_device *adev)
                        }
                        adev->ip_block_status[i].late_initialized = true;
                }
+               /* skip CG for VCE/UVD, it's handled specially */
+               if (adev->ip_blocks[i].type != AMD_IP_BLOCK_TYPE_UVD &&
+                   adev->ip_blocks[i].type != AMD_IP_BLOCK_TYPE_VCE) {
+                       /* enable clockgating to save power */
+                       r = adev->ip_blocks[i].funcs->set_clockgating_state((void *)adev,
+                                                                           AMD_CG_STATE_GATE);
+                       if (r) {
+                               DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
+                                         adev->ip_blocks[i].funcs->name, r);
+                               return r;
+                       }
+               }
        }
 
        return 0;
@@ -1435,6 +1424,30 @@ static int amdgpu_fini(struct amdgpu_device *adev)
 {
        int i, r;
 
+       /* need to disable SMC first */
+       for (i = 0; i < adev->num_ip_blocks; i++) {
+               if (!adev->ip_block_status[i].hw)
+                       continue;
+               if (adev->ip_blocks[i].type == AMD_IP_BLOCK_TYPE_SMC) {
+                       /* ungate blocks before hw fini so that we can shutdown the blocks safely */
+                       r = adev->ip_blocks[i].funcs->set_clockgating_state((void *)adev,
+                                                                           AMD_CG_STATE_UNGATE);
+                       if (r) {
+                               DRM_ERROR("set_clockgating_state(ungate) of IP block <%s> failed %d\n",
+                                         adev->ip_blocks[i].funcs->name, r);
+                               return r;
+                       }
+                       r = adev->ip_blocks[i].funcs->hw_fini((void *)adev);
+                       /* XXX handle errors */
+                       if (r) {
+                               DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
+                                         adev->ip_blocks[i].funcs->name, r);
+                       }
+                       adev->ip_block_status[i].hw = false;
+                       break;
+               }
+       }
+
        for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
                if (!adev->ip_block_status[i].hw)
                        continue;
@@ -1933,6 +1946,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
        /* evict remaining vram memory */
        amdgpu_bo_evict_vram(adev);
 
+       amdgpu_atombios_scratch_regs_save(adev);
        pci_save_state(dev->pdev);
        if (suspend) {
                /* Shut down the device */
@@ -1984,6 +1998,7 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
                        return r;
                }
        }
+       amdgpu_atombios_scratch_regs_restore(adev);
 
        /* post card */
        if (!amdgpu_card_posted(adev) || !resume) {
@@ -2073,7 +2088,8 @@ static bool amdgpu_check_soft_reset(struct amdgpu_device *adev)
                if (!adev->ip_block_status[i].valid)
                        continue;
                if (adev->ip_blocks[i].funcs->check_soft_reset)
-                       adev->ip_blocks[i].funcs->check_soft_reset(adev);
+                       adev->ip_block_status[i].hang =
+                               adev->ip_blocks[i].funcs->check_soft_reset(adev);
                if (adev->ip_block_status[i].hang) {
                        DRM_INFO("IP block:%d is hang!\n", i);
                        asic_hang = true;
@@ -2102,12 +2118,20 @@ static int amdgpu_pre_soft_reset(struct amdgpu_device *adev)
 
 static bool amdgpu_need_full_reset(struct amdgpu_device *adev)
 {
-       if (adev->ip_block_status[AMD_IP_BLOCK_TYPE_GMC].hang ||
-           adev->ip_block_status[AMD_IP_BLOCK_TYPE_SMC].hang ||
-           adev->ip_block_status[AMD_IP_BLOCK_TYPE_ACP].hang ||
-           adev->ip_block_status[AMD_IP_BLOCK_TYPE_DCE].hang) {
-               DRM_INFO("Some block need full reset!\n");
-               return true;
+       int i;
+
+       for (i = 0; i < adev->num_ip_blocks; i++) {
+               if (!adev->ip_block_status[i].valid)
+                       continue;
+               if ((adev->ip_blocks[i].type == AMD_IP_BLOCK_TYPE_GMC) ||
+                   (adev->ip_blocks[i].type == AMD_IP_BLOCK_TYPE_SMC) ||
+                   (adev->ip_blocks[i].type == AMD_IP_BLOCK_TYPE_ACP) ||
+                   (adev->ip_blocks[i].type == AMD_IP_BLOCK_TYPE_DCE)) {
+                       if (adev->ip_block_status[i].hang) {
+                               DRM_INFO("Some block need full reset!\n");
+                               return true;
+                       }
+               }
        }
        return false;
 }
@@ -2233,8 +2257,6 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev)
        }
 
        if (need_full_reset) {
-               /* save scratch */
-               amdgpu_atombios_scratch_regs_save(adev);
                r = amdgpu_suspend(adev);
 
 retry:
@@ -2244,8 +2266,9 @@ retry:
                        amdgpu_display_stop_mc_access(adev, &save);
                        amdgpu_wait_for_idle(adev, AMD_IP_BLOCK_TYPE_GMC);
                }
-
+               amdgpu_atombios_scratch_regs_save(adev);
                r = amdgpu_asic_reset(adev);
+               amdgpu_atombios_scratch_regs_restore(adev);
                /* post card */
                amdgpu_atom_asic_init(adev->mode_info.atom_context);
 
@@ -2253,8 +2276,6 @@ retry:
                        dev_info(adev->dev, "GPU reset succeeded, trying to resume\n");
                        r = amdgpu_resume(adev);
                }
-               /* restore scratch */
-               amdgpu_atombios_scratch_regs_restore(adev);
        }
        if (!r) {
                amdgpu_irq_gpu_reset_resume_helper(adev);
index fe36caf1b7d7b084762d19fb7fa6516a42dd3fa3..14f57d9915e3fc0aa8d5a8e77b734073f05ecb9b 100644 (file)
@@ -113,24 +113,26 @@ void amdgpu_dpm_print_ps_status(struct amdgpu_device *adev,
        printk("\n");
 }
 
+
 u32 amdgpu_dpm_get_vblank_time(struct amdgpu_device *adev)
 {
        struct drm_device *dev = adev->ddev;
        struct drm_crtc *crtc;
        struct amdgpu_crtc *amdgpu_crtc;
-       u32 line_time_us, vblank_lines;
+       u32 vblank_in_pixels;
        u32 vblank_time_us = 0xffffffff; /* if the displays are off, vblank time is max */
 
        if (adev->mode_info.num_crtc && adev->mode_info.mode_config_initialized) {
                list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
                        amdgpu_crtc = to_amdgpu_crtc(crtc);
                        if (crtc->enabled && amdgpu_crtc->enabled && amdgpu_crtc->hw_mode.clock) {
-                               line_time_us = (amdgpu_crtc->hw_mode.crtc_htotal * 1000) /
-                                       amdgpu_crtc->hw_mode.clock;
-                               vblank_lines = amdgpu_crtc->hw_mode.crtc_vblank_end -
+                               vblank_in_pixels =
+                                       amdgpu_crtc->hw_mode.crtc_htotal *
+                                       (amdgpu_crtc->hw_mode.crtc_vblank_end -
                                        amdgpu_crtc->hw_mode.crtc_vdisplay +
-                                       (amdgpu_crtc->v_border * 2);
-                               vblank_time_us = vblank_lines * line_time_us;
+                                       (amdgpu_crtc->v_border * 2));
+
+                               vblank_time_us = vblank_in_pixels * 1000 / amdgpu_crtc->hw_mode.clock;
                                break;
                        }
                }
index 71ed27eb3ddebd3d2463cc136fb268db74aceb60..02ff0747197c13e91ee378696a0fc46fe2b193f0 100644 (file)
@@ -735,8 +735,20 @@ static struct pci_driver amdgpu_kms_pci_driver = {
 
 static int __init amdgpu_init(void)
 {
-       amdgpu_sync_init();
-       amdgpu_fence_slab_init();
+       int r;
+
+       r = amdgpu_sync_init();
+       if (r)
+               goto error_sync;
+
+       r = amdgpu_fence_slab_init();
+       if (r)
+               goto error_fence;
+
+       r = amd_sched_fence_slab_init();
+       if (r)
+               goto error_sched;
+
        if (vgacon_text_force()) {
                DRM_ERROR("VGACON disables amdgpu kernel modesetting.\n");
                return -EINVAL;
@@ -748,6 +760,15 @@ static int __init amdgpu_init(void)
        amdgpu_register_atpx_handler();
        /* let modprobe override vga console setting */
        return drm_pci_init(driver, pdriver);
+
+error_sched:
+       amdgpu_fence_slab_fini();
+
+error_fence:
+       amdgpu_sync_fini();
+
+error_sync:
+       return r;
 }
 
 static void __exit amdgpu_exit(void)
@@ -756,6 +777,7 @@ static void __exit amdgpu_exit(void)
        drm_pci_exit(driver, pdriver);
        amdgpu_unregister_atpx_handler();
        amdgpu_sync_fini();
+       amd_sched_fence_slab_fini();
        amdgpu_fence_slab_fini();
 }
 
index 3a2e42f4b897647520f49db963412789672d0229..77b34ec9263215f1cd13d158587589dfa7fb0484 100644 (file)
@@ -68,6 +68,7 @@ int amdgpu_fence_slab_init(void)
 
 void amdgpu_fence_slab_fini(void)
 {
+       rcu_barrier();
        kmem_cache_destroy(amdgpu_fence_slab);
 }
 /*
index 278708f5a744eebb69f0d719bfcb198efcf2ec11..9fa809876339dd47e7cb225af2d8695f7c12b593 100644 (file)
@@ -239,6 +239,7 @@ int amdgpu_irq_init(struct amdgpu_device *adev)
        if (r) {
                adev->irq.installed = false;
                flush_work(&adev->hotplug_work);
+               cancel_work_sync(&adev->reset_work);
                return r;
        }
 
@@ -264,6 +265,7 @@ void amdgpu_irq_fini(struct amdgpu_device *adev)
                if (adev->irq.msi_enabled)
                        pci_disable_msi(adev->pdev);
                flush_work(&adev->hotplug_work);
+               cancel_work_sync(&adev->reset_work);
        }
 
        for (i = 0; i < AMDGPU_MAX_IRQ_SRC_ID; ++i) {
index c2c7fb140338061f77cc3d2560e2f99f46e37346..3938fca1ea8e5f4c69fd5e0746fcc7ec60c6d2f1 100644 (file)
@@ -99,6 +99,8 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags)
 
        if ((amdgpu_runtime_pm != 0) &&
            amdgpu_has_atpx() &&
+           (amdgpu_is_atpx_hybrid() ||
+            amdgpu_has_atpx_dgpu_power_cntl()) &&
            ((flags & AMD_IS_APU) == 0))
                flags |= AMD_IS_PX;
 
@@ -459,10 +461,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
                /* return all clocks in KHz */
                dev_info.gpu_counter_freq = amdgpu_asic_get_xclk(adev) * 10;
                if (adev->pm.dpm_enabled) {
-                       dev_info.max_engine_clock =
-                               adev->pm.dpm.dyn_state.max_clock_voltage_on_ac.sclk * 10;
-                       dev_info.max_memory_clock =
-                               adev->pm.dpm.dyn_state.max_clock_voltage_on_ac.mclk * 10;
+                       dev_info.max_engine_clock = amdgpu_dpm_get_sclk(adev, false) * 10;
+                       dev_info.max_memory_clock = amdgpu_dpm_get_mclk(adev, false) * 10;
                } else {
                        dev_info.max_engine_clock = adev->pm.default_sclk * 10;
                        dev_info.max_memory_clock = adev->pm.default_mclk * 10;
index aa074fac0c7f66ef796b2da0704c314c96f2b5d5..f3efb1c5dae96469ecdb6944e991f832b43e26dd 100644 (file)
@@ -754,6 +754,10 @@ static const char *amdgpu_vram_names[] = {
 
 int amdgpu_bo_init(struct amdgpu_device *adev)
 {
+       /* reserve PAT memory space to WC for VRAM */
+       arch_io_reserve_memtype_wc(adev->mc.aper_base,
+                                  adev->mc.aper_size);
+
        /* Add an MTRR for the VRAM */
        adev->mc.vram_mtrr = arch_phys_wc_add(adev->mc.aper_base,
                                              adev->mc.aper_size);
@@ -769,6 +773,7 @@ void amdgpu_bo_fini(struct amdgpu_device *adev)
 {
        amdgpu_ttm_fini(adev);
        arch_phys_wc_del(adev->mc.vram_mtrr);
+       arch_io_free_memtype_wc(adev->mc.aper_base, adev->mc.aper_size);
 }
 
 int amdgpu_bo_fbdev_mmap(struct amdgpu_bo *bo,
index 7700dc22f2432bb3f6d020a7c3acf3f3ccaaae9f..3826d5aea0a6a55d00d9aae2bda9f7b04489ec60 100644 (file)
@@ -74,20 +74,36 @@ amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
        if (ret)
                return ERR_PTR(ret);
 
+       bo->prime_shared_count = 1;
        return &bo->gem_base;
 }
 
 int amdgpu_gem_prime_pin(struct drm_gem_object *obj)
 {
        struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
-       int ret = 0;
+       long ret = 0;
 
        ret = amdgpu_bo_reserve(bo, false);
        if (unlikely(ret != 0))
                return ret;
 
+       /*
+        * Wait for all shared fences to complete before we switch to future
+        * use of exclusive fence on this prime shared bo.
+        */
+       ret = reservation_object_wait_timeout_rcu(bo->tbo.resv, true, false,
+                                                 MAX_SCHEDULE_TIMEOUT);
+       if (unlikely(ret < 0)) {
+               DRM_DEBUG_PRIME("Fence wait failed: %li\n", ret);
+               amdgpu_bo_unreserve(bo);
+               return ret;
+       }
+
        /* pin buffer into GTT */
        ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT, NULL);
+       if (likely(ret == 0))
+               bo->prime_shared_count++;
+
        amdgpu_bo_unreserve(bo);
        return ret;
 }
@@ -102,6 +118,8 @@ void amdgpu_gem_prime_unpin(struct drm_gem_object *obj)
                return;
 
        amdgpu_bo_unpin(bo);
+       if (bo->prime_shared_count)
+               bo->prime_shared_count--;
        amdgpu_bo_unreserve(bo);
 }
 
index e1fa8731d1e2db7b16c09edf44d195e8d457cf07..3cb5e903cd62896529d2ea18d5ea7f49c2feae73 100644 (file)
@@ -345,8 +345,8 @@ static int amdgpu_debugfs_ring_init(struct amdgpu_device *adev,
        ent = debugfs_create_file(name,
                                  S_IFREG | S_IRUGO, root,
                                  ring, &amdgpu_debugfs_ring_fops);
-       if (IS_ERR(ent))
-               return PTR_ERR(ent);
+       if (!ent)
+               return -ENOMEM;
 
        i_size_write(ent->d_inode, ring->ring_size + 12);
        ring->ent = ent;
index 887483b8b818383c5139ec8c2059e3fd19f16014..dcaf691f56b5577352d2238bdfd27e2f1aca2386 100644 (file)
@@ -555,10 +555,13 @@ struct amdgpu_ttm_tt {
 int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
 {
        struct amdgpu_ttm_tt *gtt = (void *)ttm;
-       int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
+       unsigned int flags = 0;
        unsigned pinned = 0;
        int r;
 
+       if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY))
+               flags |= FOLL_WRITE;
+
        if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) {
                /* check that we only use anonymous memory
                   to prevent problems with writeback */
@@ -581,7 +584,7 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
                list_add(&guptask.list, &gtt->guptasks);
                spin_unlock(&gtt->guptasklock);
 
-               r = get_user_pages(userptr, num_pages, write, 0, p, NULL);
+               r = get_user_pages(userptr, num_pages, flags, p, NULL);
 
                spin_lock(&gtt->guptasklock);
                list_del(&guptask.list);
index 06f24322e7c31bcfbb9dc867909ebe234968accb..968c4260d7a7e0ccfa94f4a3f069acc9f73e1830 100644 (file)
@@ -1758,5 +1758,6 @@ void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
                fence_put(adev->vm_manager.ids[i].first);
                amdgpu_sync_free(&adev->vm_manager.ids[i].active);
                fence_put(id->flushed_updates);
+               fence_put(id->last_flush);
        }
 }
index 1d8c375a3561c9f872a4d87c0b435048f5a4b10b..5be788b269e22232a61b75e83bdd822bf142c583 100644 (file)
@@ -4075,7 +4075,7 @@ static int ci_enable_uvd_dpm(struct amdgpu_device *adev, bool enable)
                                                          pi->dpm_level_enable_mask.mclk_dpm_enable_mask);
                }
        } else {
-               if (pi->last_mclk_dpm_enable_mask & 0x1) {
+               if (pi->uvd_enabled) {
                        pi->uvd_enabled = false;
                        pi->dpm_level_enable_mask.mclk_dpm_enable_mask |= 1;
                        amdgpu_ci_send_msg_to_smc_with_parameter(adev,
@@ -6236,6 +6236,8 @@ static int ci_dpm_sw_fini(void *handle)
 {
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+       flush_work(&adev->pm.dpm.thermal.work);
+
        mutex_lock(&adev->pm.mutex);
        amdgpu_pm_sysfs_fini(adev);
        ci_dpm_fini(adev);
index f80a0834e889e8ff07b77846f28fcb619632ca14..3c082e1437303d3247da620fcd08ba8a031e62e0 100644 (file)
@@ -1514,14 +1514,16 @@ static int cz_dpm_set_powergating_state(void *handle,
        return 0;
 }
 
-/* borrowed from KV, need future unify */
 static int cz_dpm_get_temperature(struct amdgpu_device *adev)
 {
        int actual_temp = 0;
-       uint32_t temp = RREG32_SMC(0xC0300E0C);
+       uint32_t val = RREG32_SMC(ixTHM_TCON_CUR_TMP);
+       uint32_t temp = REG_GET_FIELD(val, THM_TCON_CUR_TMP, CUR_TEMP);
 
-       if (temp)
+       if (REG_GET_FIELD(val, THM_TCON_CUR_TMP, CUR_TEMP_RANGE_SEL))
                actual_temp = 1000 * ((temp / 8) - 49);
+       else
+               actual_temp = 1000 * (temp / 8);
 
        return actual_temp;
 }
index 613ebb7ed50f5e33699fb254bd417b238f6c2783..9260caef74fa07f7045f1bbc30e5481bb6f5558e 100644 (file)
@@ -3151,10 +3151,6 @@ static int dce_v10_0_hw_fini(void *handle)
 
 static int dce_v10_0_suspend(void *handle)
 {
-       struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
-       amdgpu_atombios_scratch_regs_save(adev);
-
        return dce_v10_0_hw_fini(handle);
 }
 
@@ -3165,8 +3161,6 @@ static int dce_v10_0_resume(void *handle)
 
        ret = dce_v10_0_hw_init(handle);
 
-       amdgpu_atombios_scratch_regs_restore(adev);
-
        /* turn on the BL */
        if (adev->mode_info.bl_encoder) {
                u8 bl_level = amdgpu_display_backlight_get_level(adev,
@@ -3188,16 +3182,11 @@ static int dce_v10_0_wait_for_idle(void *handle)
        return 0;
 }
 
-static int dce_v10_0_check_soft_reset(void *handle)
+static bool dce_v10_0_check_soft_reset(void *handle)
 {
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-       if (dce_v10_0_is_display_hung(adev))
-               adev->ip_block_status[AMD_IP_BLOCK_TYPE_DCE].hang = true;
-       else
-               adev->ip_block_status[AMD_IP_BLOCK_TYPE_DCE].hang = false;
-
-       return 0;
+       return dce_v10_0_is_display_hung(adev);
 }
 
 static int dce_v10_0_soft_reset(void *handle)
@@ -3205,9 +3194,6 @@ static int dce_v10_0_soft_reset(void *handle)
        u32 srbm_soft_reset = 0, tmp;
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-       if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_DCE].hang)
-               return 0;
-
        if (dce_v10_0_is_display_hung(adev))
                srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_DC_MASK;
 
index f264b8f17ad1b302ebb51f0f37a1805655529470..367739bd19279fa5f968675b54733bad289b65e0 100644 (file)
@@ -3215,10 +3215,6 @@ static int dce_v11_0_hw_fini(void *handle)
 
 static int dce_v11_0_suspend(void *handle)
 {
-       struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
-       amdgpu_atombios_scratch_regs_save(adev);
-
        return dce_v11_0_hw_fini(handle);
 }
 
@@ -3229,8 +3225,6 @@ static int dce_v11_0_resume(void *handle)
 
        ret = dce_v11_0_hw_init(handle);
 
-       amdgpu_atombios_scratch_regs_restore(adev);
-
        /* turn on the BL */
        if (adev->mode_info.bl_encoder) {
                u8 bl_level = amdgpu_display_backlight_get_level(adev,
index b948d6cb139936670228d6a9e805ecd7fbe9b225..15f9fc0514b29b800f1fb5c83cfd3f43ea52ec04 100644 (file)
@@ -2482,10 +2482,6 @@ static int dce_v6_0_hw_fini(void *handle)
 
 static int dce_v6_0_suspend(void *handle)
 {
-       struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
-       amdgpu_atombios_scratch_regs_save(adev);
-
        return dce_v6_0_hw_fini(handle);
 }
 
@@ -2496,8 +2492,6 @@ static int dce_v6_0_resume(void *handle)
 
        ret = dce_v6_0_hw_init(handle);
 
-       amdgpu_atombios_scratch_regs_restore(adev);
-
        /* turn on the BL */
        if (adev->mode_info.bl_encoder) {
                u8 bl_level = amdgpu_display_backlight_get_level(adev,
index 5966166ec94c886d48035f1492b42ea89a24d354..8c4d808db0f1279af1b5a0c05e364c6e6c07bdfd 100644 (file)
@@ -3033,10 +3033,6 @@ static int dce_v8_0_hw_fini(void *handle)
 
 static int dce_v8_0_suspend(void *handle)
 {
-       struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
-       amdgpu_atombios_scratch_regs_save(adev);
-
        return dce_v8_0_hw_fini(handle);
 }
 
@@ -3047,8 +3043,6 @@ static int dce_v8_0_resume(void *handle)
 
        ret = dce_v8_0_hw_init(handle);
 
-       amdgpu_atombios_scratch_regs_restore(adev);
-
        /* turn on the BL */
        if (adev->mode_info.bl_encoder) {
                u8 bl_level = amdgpu_display_backlight_get_level(adev,
index 6c6ff57b1c95f2824537933c8c8ecd6525bda3ab..bb97182dc74991ae5b5ad0ab5d4121757ae54789 100644 (file)
@@ -640,7 +640,6 @@ static const u32 stoney_mgcg_cgcg_init[] =
        mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
        mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
        mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
-       mmATC_MISC_CG, 0xffffffff, 0x000c0200,
 };
 
 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
@@ -4087,14 +4086,21 @@ static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
 {
        int r;
+       u32 tmp;
 
        gfx_v8_0_rlc_stop(adev);
 
        /* disable CG */
-       WREG32(mmRLC_CGCG_CGLS_CTRL, 0);
+       tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
+       tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
+                RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
+       WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
        if (adev->asic_type == CHIP_POLARIS11 ||
-           adev->asic_type == CHIP_POLARIS10)
-               WREG32(mmRLC_CGCG_CGLS_CTRL_3D, 0);
+           adev->asic_type == CHIP_POLARIS10) {
+               tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
+               tmp &= ~0x3;
+               WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
+       }
 
        /* disable PG */
        WREG32(mmRLC_PG_CNTL, 0);
@@ -5137,7 +5143,7 @@ static int gfx_v8_0_wait_for_idle(void *handle)
        return -ETIMEDOUT;
 }
 
-static int gfx_v8_0_check_soft_reset(void *handle)
+static bool gfx_v8_0_check_soft_reset(void *handle)
 {
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
        u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
@@ -5189,16 +5195,14 @@ static int gfx_v8_0_check_soft_reset(void *handle)
                                                SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
 
        if (grbm_soft_reset || srbm_soft_reset) {
-               adev->ip_block_status[AMD_IP_BLOCK_TYPE_GFX].hang = true;
                adev->gfx.grbm_soft_reset = grbm_soft_reset;
                adev->gfx.srbm_soft_reset = srbm_soft_reset;
+               return true;
        } else {
-               adev->ip_block_status[AMD_IP_BLOCK_TYPE_GFX].hang = false;
                adev->gfx.grbm_soft_reset = 0;
                adev->gfx.srbm_soft_reset = 0;
+               return false;
        }
-
-       return 0;
 }
 
 static void gfx_v8_0_inactive_hqd(struct amdgpu_device *adev,
@@ -5226,7 +5230,8 @@ static int gfx_v8_0_pre_soft_reset(void *handle)
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
        u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
 
-       if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_GFX].hang)
+       if ((!adev->gfx.grbm_soft_reset) &&
+           (!adev->gfx.srbm_soft_reset))
                return 0;
 
        grbm_soft_reset = adev->gfx.grbm_soft_reset;
@@ -5264,7 +5269,8 @@ static int gfx_v8_0_soft_reset(void *handle)
        u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
        u32 tmp;
 
-       if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_GFX].hang)
+       if ((!adev->gfx.grbm_soft_reset) &&
+           (!adev->gfx.srbm_soft_reset))
                return 0;
 
        grbm_soft_reset = adev->gfx.grbm_soft_reset;
@@ -5334,7 +5340,8 @@ static int gfx_v8_0_post_soft_reset(void *handle)
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
        u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
 
-       if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_GFX].hang)
+       if ((!adev->gfx.grbm_soft_reset) &&
+           (!adev->gfx.srbm_soft_reset))
                return 0;
 
        grbm_soft_reset = adev->gfx.grbm_soft_reset;
index 1b319f5bc6962d5d6250db12fcb18302db789ade..a16b2201d52cac3a53870e7253e1d367160765ef 100644 (file)
@@ -100,6 +100,7 @@ static const u32 cz_mgcg_cgcg_init[] =
 
 static const u32 stoney_mgcg_cgcg_init[] =
 {
+       mmATC_MISC_CG, 0xffffffff, 0x000c0200,
        mmMC_MEM_POWER_LS, 0xffffffff, 0x00000104
 };
 
@@ -1099,7 +1100,7 @@ static int gmc_v8_0_wait_for_idle(void *handle)
 
 }
 
-static int gmc_v8_0_check_soft_reset(void *handle)
+static bool gmc_v8_0_check_soft_reset(void *handle)
 {
        u32 srbm_soft_reset = 0;
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -1116,20 +1117,19 @@ static int gmc_v8_0_check_soft_reset(void *handle)
                                                        SRBM_SOFT_RESET, SOFT_RESET_MC, 1);
        }
        if (srbm_soft_reset) {
-               adev->ip_block_status[AMD_IP_BLOCK_TYPE_GMC].hang = true;
                adev->mc.srbm_soft_reset = srbm_soft_reset;
+               return true;
        } else {
-               adev->ip_block_status[AMD_IP_BLOCK_TYPE_GMC].hang = false;
                adev->mc.srbm_soft_reset = 0;
+               return false;
        }
-       return 0;
 }
 
 static int gmc_v8_0_pre_soft_reset(void *handle)
 {
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-       if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_GMC].hang)
+       if (!adev->mc.srbm_soft_reset)
                return 0;
 
        gmc_v8_0_mc_stop(adev, &adev->mc.save);
@@ -1145,7 +1145,7 @@ static int gmc_v8_0_soft_reset(void *handle)
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
        u32 srbm_soft_reset;
 
-       if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_GMC].hang)
+       if (!adev->mc.srbm_soft_reset)
                return 0;
        srbm_soft_reset = adev->mc.srbm_soft_reset;
 
@@ -1175,7 +1175,7 @@ static int gmc_v8_0_post_soft_reset(void *handle)
 {
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-       if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_GMC].hang)
+       if (!adev->mc.srbm_soft_reset)
                return 0;
 
        gmc_v8_0_mc_resume(adev, &adev->mc.save);
index f8618a3881a841a3160115eabd065a9b311f862b..71d2856222fa9be710be004ad40f3d32c5ab17cb 100644 (file)
@@ -3063,6 +3063,8 @@ static int kv_dpm_sw_fini(void *handle)
 {
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+       flush_work(&adev->pm.dpm.thermal.work);
+
        mutex_lock(&adev->pm.mutex);
        amdgpu_pm_sysfs_fini(adev);
        kv_dpm_fini(adev);
index f325fd86430b9e3d565f28fd5b11ace2c2a76667..a9d10941fb53d9ab2290fd6bdbe3351d91b0acaa 100644 (file)
@@ -1268,7 +1268,7 @@ static int sdma_v3_0_wait_for_idle(void *handle)
        return -ETIMEDOUT;
 }
 
-static int sdma_v3_0_check_soft_reset(void *handle)
+static bool sdma_v3_0_check_soft_reset(void *handle)
 {
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
        u32 srbm_soft_reset = 0;
@@ -1281,14 +1281,12 @@ static int sdma_v3_0_check_soft_reset(void *handle)
        }
 
        if (srbm_soft_reset) {
-               adev->ip_block_status[AMD_IP_BLOCK_TYPE_SDMA].hang = true;
                adev->sdma.srbm_soft_reset = srbm_soft_reset;
+               return true;
        } else {
-               adev->ip_block_status[AMD_IP_BLOCK_TYPE_SDMA].hang = false;
                adev->sdma.srbm_soft_reset = 0;
+               return false;
        }
-
-       return 0;
 }
 
 static int sdma_v3_0_pre_soft_reset(void *handle)
@@ -1296,7 +1294,7 @@ static int sdma_v3_0_pre_soft_reset(void *handle)
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
        u32 srbm_soft_reset = 0;
 
-       if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_SDMA].hang)
+       if (!adev->sdma.srbm_soft_reset)
                return 0;
 
        srbm_soft_reset = adev->sdma.srbm_soft_reset;
@@ -1315,7 +1313,7 @@ static int sdma_v3_0_post_soft_reset(void *handle)
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
        u32 srbm_soft_reset = 0;
 
-       if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_SDMA].hang)
+       if (!adev->sdma.srbm_soft_reset)
                return 0;
 
        srbm_soft_reset = adev->sdma.srbm_soft_reset;
@@ -1335,7 +1333,7 @@ static int sdma_v3_0_soft_reset(void *handle)
        u32 srbm_soft_reset = 0;
        u32 tmp;
 
-       if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_SDMA].hang)
+       if (!adev->sdma.srbm_soft_reset)
                return 0;
 
        srbm_soft_reset = adev->sdma.srbm_soft_reset;
index 8bd08925b370b753fbe217031c1e29e5b3b4426e..d6f85b1a0b93540e60399b337310cee3be6753c0 100644 (file)
@@ -3477,6 +3477,49 @@ static void si_apply_state_adjust_rules(struct amdgpu_device *adev,
        int i;
        struct si_dpm_quirk *p = si_dpm_quirk_list;
 
+       /* limit all SI kickers */
+       if (adev->asic_type == CHIP_PITCAIRN) {
+               if ((adev->pdev->revision == 0x81) ||
+                   (adev->pdev->device == 0x6810) ||
+                   (adev->pdev->device == 0x6811) ||
+                   (adev->pdev->device == 0x6816) ||
+                   (adev->pdev->device == 0x6817) ||
+                   (adev->pdev->device == 0x6806))
+                       max_mclk = 120000;
+       } else if (adev->asic_type == CHIP_VERDE) {
+               if ((adev->pdev->revision == 0x81) ||
+                   (adev->pdev->revision == 0x83) ||
+                   (adev->pdev->revision == 0x87) ||
+                   (adev->pdev->device == 0x6820) ||
+                   (adev->pdev->device == 0x6821) ||
+                   (adev->pdev->device == 0x6822) ||
+                   (adev->pdev->device == 0x6823) ||
+                   (adev->pdev->device == 0x682A) ||
+                   (adev->pdev->device == 0x682B)) {
+                       max_sclk = 75000;
+                       max_mclk = 80000;
+               }
+       } else if (adev->asic_type == CHIP_OLAND) {
+               if ((adev->pdev->revision == 0xC7) ||
+                   (adev->pdev->revision == 0x80) ||
+                   (adev->pdev->revision == 0x81) ||
+                   (adev->pdev->revision == 0x83) ||
+                   (adev->pdev->device == 0x6604) ||
+                   (adev->pdev->device == 0x6605)) {
+                       max_sclk = 75000;
+                       max_mclk = 80000;
+               }
+       } else if (adev->asic_type == CHIP_HAINAN) {
+               if ((adev->pdev->revision == 0x81) ||
+                   (adev->pdev->revision == 0x83) ||
+                   (adev->pdev->revision == 0xC3) ||
+                   (adev->pdev->device == 0x6664) ||
+                   (adev->pdev->device == 0x6665) ||
+                   (adev->pdev->device == 0x6667)) {
+                       max_sclk = 75000;
+                       max_mclk = 80000;
+               }
+       }
        /* Apply dpm quirks */
        while (p && p->chip_device != 0) {
                if (adev->pdev->vendor == p->chip_vendor &&
@@ -3489,16 +3532,6 @@ static void si_apply_state_adjust_rules(struct amdgpu_device *adev,
                }
                ++p;
        }
-       /* limit mclk on all R7 370 parts for stability */
-       if (adev->pdev->device == 0x6811 &&
-           adev->pdev->revision == 0x81)
-               max_mclk = 120000;
-       /* limit sclk/mclk on Jet parts for stability */
-       if (adev->pdev->device == 0x6665 &&
-           adev->pdev->revision == 0xc3) {
-               max_sclk = 75000;
-               max_mclk = 80000;
-       }
 
        if (rps->vce_active) {
                rps->evclk = adev->pm.dpm.vce_states[adev->pm.dpm.vce_level].evclk;
@@ -7771,6 +7804,8 @@ static int si_dpm_sw_fini(void *handle)
 {
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+       flush_work(&adev->pm.dpm.thermal.work);
+
        mutex_lock(&adev->pm.mutex);
        amdgpu_pm_sysfs_fini(adev);
        si_dpm_fini(adev);
index d127d59f953a8ded522884fa7f9eba77e648db2d..b4ea229bb4498ff1f84209dec7f211198ed788ed 100644 (file)
@@ -373,7 +373,7 @@ static int tonga_ih_wait_for_idle(void *handle)
        return -ETIMEDOUT;
 }
 
-static int tonga_ih_check_soft_reset(void *handle)
+static bool tonga_ih_check_soft_reset(void *handle)
 {
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
        u32 srbm_soft_reset = 0;
@@ -384,21 +384,19 @@ static int tonga_ih_check_soft_reset(void *handle)
                                                SOFT_RESET_IH, 1);
 
        if (srbm_soft_reset) {
-               adev->ip_block_status[AMD_IP_BLOCK_TYPE_IH].hang = true;
                adev->irq.srbm_soft_reset = srbm_soft_reset;
+               return true;
        } else {
-               adev->ip_block_status[AMD_IP_BLOCK_TYPE_IH].hang = false;
                adev->irq.srbm_soft_reset = 0;
+               return false;
        }
-
-       return 0;
 }
 
 static int tonga_ih_pre_soft_reset(void *handle)
 {
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-       if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_IH].hang)
+       if (!adev->irq.srbm_soft_reset)
                return 0;
 
        return tonga_ih_hw_fini(adev);
@@ -408,7 +406,7 @@ static int tonga_ih_post_soft_reset(void *handle)
 {
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-       if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_IH].hang)
+       if (!adev->irq.srbm_soft_reset)
                return 0;
 
        return tonga_ih_hw_init(adev);
@@ -419,7 +417,7 @@ static int tonga_ih_soft_reset(void *handle)
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
        u32 srbm_soft_reset;
 
-       if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_IH].hang)
+       if (!adev->irq.srbm_soft_reset)
                return 0;
        srbm_soft_reset = adev->irq.srbm_soft_reset;
 
index e0fd9f21ed9585ce37c310605f2ea56524fa60bc..ab3df6d756562ee33b97d2c48aaf6f7bfadc6f2a 100644 (file)
@@ -770,7 +770,7 @@ static int uvd_v6_0_wait_for_idle(void *handle)
 }
 
 #define AMDGPU_UVD_STATUS_BUSY_MASK    0xfd
-static int uvd_v6_0_check_soft_reset(void *handle)
+static bool uvd_v6_0_check_soft_reset(void *handle)
 {
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
        u32 srbm_soft_reset = 0;
@@ -782,19 +782,19 @@ static int uvd_v6_0_check_soft_reset(void *handle)
                srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_UVD, 1);
 
        if (srbm_soft_reset) {
-               adev->ip_block_status[AMD_IP_BLOCK_TYPE_UVD].hang = true;
                adev->uvd.srbm_soft_reset = srbm_soft_reset;
+               return true;
        } else {
-               adev->ip_block_status[AMD_IP_BLOCK_TYPE_UVD].hang = false;
                adev->uvd.srbm_soft_reset = 0;
+               return false;
        }
-       return 0;
 }
+
 static int uvd_v6_0_pre_soft_reset(void *handle)
 {
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-       if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_UVD].hang)
+       if (!adev->uvd.srbm_soft_reset)
                return 0;
 
        uvd_v6_0_stop(adev);
@@ -806,7 +806,7 @@ static int uvd_v6_0_soft_reset(void *handle)
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
        u32 srbm_soft_reset;
 
-       if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_UVD].hang)
+       if (!adev->uvd.srbm_soft_reset)
                return 0;
        srbm_soft_reset = adev->uvd.srbm_soft_reset;
 
@@ -836,7 +836,7 @@ static int uvd_v6_0_post_soft_reset(void *handle)
 {
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-       if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_UVD].hang)
+       if (!adev->uvd.srbm_soft_reset)
                return 0;
 
        mdelay(5);
index 3f6db4ec0102d0f54d2b5cba6c33a76b9d64ad46..6feed726e299378e39d08cf74f5d7e71b20a2cc4 100644 (file)
@@ -52,6 +52,8 @@
 #define VCE_V3_0_STACK_SIZE    (64 * 1024)
 #define VCE_V3_0_DATA_SIZE     ((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024))
 
+#define FW_52_8_3      ((52 << 24) | (8 << 16) | (3 << 8))
+
 static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx);
 static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev);
 static void vce_v3_0_set_irq_funcs(struct amdgpu_device *adev);
@@ -382,6 +384,10 @@ static int vce_v3_0_sw_init(void *handle)
        if (r)
                return r;
 
+       /* 52.8.3 required for 3 ring support */
+       if (adev->vce.fw_version < FW_52_8_3)
+               adev->vce.num_rings = 2;
+
        r = amdgpu_vce_resume(adev);
        if (r)
                return r;
@@ -561,7 +567,7 @@ static int vce_v3_0_wait_for_idle(void *handle)
 #define  AMDGPU_VCE_STATUS_BUSY_MASK (VCE_STATUS_VCPU_REPORT_AUTO_BUSY_MASK | \
                                      VCE_STATUS_VCPU_REPORT_RB0_BUSY_MASK)
 
-static int vce_v3_0_check_soft_reset(void *handle)
+static bool vce_v3_0_check_soft_reset(void *handle)
 {
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
        u32 srbm_soft_reset = 0;
@@ -591,16 +597,15 @@ static int vce_v3_0_check_soft_reset(void *handle)
                srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
        }
        WREG32_FIELD(GRBM_GFX_INDEX, INSTANCE_INDEX, 0);
+       mutex_unlock(&adev->grbm_idx_mutex);
 
        if (srbm_soft_reset) {
-               adev->ip_block_status[AMD_IP_BLOCK_TYPE_VCE].hang = true;
                adev->vce.srbm_soft_reset = srbm_soft_reset;
+               return true;
        } else {
-               adev->ip_block_status[AMD_IP_BLOCK_TYPE_VCE].hang = false;
                adev->vce.srbm_soft_reset = 0;
+               return false;
        }
-       mutex_unlock(&adev->grbm_idx_mutex);
-       return 0;
 }
 
 static int vce_v3_0_soft_reset(void *handle)
@@ -608,7 +613,7 @@ static int vce_v3_0_soft_reset(void *handle)
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
        u32 srbm_soft_reset;
 
-       if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_VCE].hang)
+       if (!adev->vce.srbm_soft_reset)
                return 0;
        srbm_soft_reset = adev->vce.srbm_soft_reset;
 
@@ -638,7 +643,7 @@ static int vce_v3_0_pre_soft_reset(void *handle)
 {
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-       if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_VCE].hang)
+       if (!adev->vce.srbm_soft_reset)
                return 0;
 
        mdelay(5);
@@ -651,7 +656,7 @@ static int vce_v3_0_post_soft_reset(void *handle)
 {
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-       if (!adev->ip_block_status[AMD_IP_BLOCK_TYPE_VCE].hang)
+       if (!adev->vce.srbm_soft_reset)
                return 0;
 
        mdelay(5);
index c0d9aad7126f4a16e067e19e76067d0a3248e8d9..f62f1a74f890d0d806506f343bb14ec6090578e9 100644 (file)
@@ -80,7 +80,9 @@
 #include "dce_virtual.h"
 
 MODULE_FIRMWARE("amdgpu/topaz_smc.bin");
+MODULE_FIRMWARE("amdgpu/topaz_k_smc.bin");
 MODULE_FIRMWARE("amdgpu/tonga_smc.bin");
+MODULE_FIRMWARE("amdgpu/tonga_k_smc.bin");
 MODULE_FIRMWARE("amdgpu/fiji_smc.bin");
 MODULE_FIRMWARE("amdgpu/polaris10_smc.bin");
 MODULE_FIRMWARE("amdgpu/polaris10_smc_sk.bin");
@@ -1651,7 +1653,7 @@ static int vi_common_early_init(void *handle)
                        AMD_CG_SUPPORT_SDMA_MGCG |
                        AMD_CG_SUPPORT_SDMA_LS |
                        AMD_CG_SUPPORT_VCE_MGCG;
-               adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
+               adev->pg_flags = AMD_PG_SUPPORT_GFX_PG |
                        AMD_PG_SUPPORT_GFX_SMG |
                        AMD_PG_SUPPORT_GFX_PIPELINE |
                        AMD_PG_SUPPORT_UVD |
index c934b78c9e2f056b9ba20ec9f2b182cfa1910e74..bec8125bceb0d2555b49b8badaafbf4494ac47f7 100644 (file)
@@ -165,7 +165,7 @@ struct amd_ip_funcs {
        /* poll for idle */
        int (*wait_for_idle)(void *handle);
        /* check soft reset the IP block */
-       int (*check_soft_reset)(void *handle);
+       bool (*check_soft_reset)(void *handle);
        /* pre soft reset the IP block */
        int (*pre_soft_reset)(void *handle);
        /* soft reset the IP block */
index 92b1178438755ab4e981dfa14741f62a3353da9f..8cee4e0f9fde60c736b56344b378c67c2107d867 100644 (file)
@@ -49,6 +49,7 @@ static const pem_event_action * const uninitialize_event[] = {
        uninitialize_display_phy_access_tasks,
        disable_gfx_voltage_island_power_gating_tasks,
        disable_gfx_clock_gating_tasks,
+       uninitialize_thermal_controller_tasks,
        set_boot_state_tasks,
        adjust_power_state_tasks,
        disable_dynamic_state_management_tasks,
index 7e4fcbbbe08652735c6796a2cbfd62d122434e69..960424913496d671d70fa220600fab6874dbd67d 100644 (file)
@@ -1785,6 +1785,21 @@ static int cz_get_max_high_clocks(struct pp_hwmgr *hwmgr, struct amd_pp_simple_c
        return 0;
 }
 
+static int cz_thermal_get_temperature(struct pp_hwmgr *hwmgr)
+{
+       int actual_temp = 0;
+       uint32_t val = cgs_read_ind_register(hwmgr->device,
+                                            CGS_IND_REG__SMC, ixTHM_TCON_CUR_TMP);
+       uint32_t temp = PHM_GET_FIELD(val, THM_TCON_CUR_TMP, CUR_TEMP);
+
+       if (PHM_GET_FIELD(val, THM_TCON_CUR_TMP, CUR_TEMP_RANGE_SEL))
+               actual_temp = ((temp / 8) - 49) * PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+       else
+               actual_temp = (temp / 8) * PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+
+       return actual_temp;
+}
+
 static int cz_read_sensor(struct pp_hwmgr *hwmgr, int idx, int32_t *value)
 {
        struct cz_hwmgr *cz_hwmgr = (struct cz_hwmgr *)(hwmgr->backend);
@@ -1881,6 +1896,9 @@ static int cz_read_sensor(struct pp_hwmgr *hwmgr, int idx, int32_t *value)
        case AMDGPU_PP_SENSOR_VCE_POWER:
                *value = cz_hwmgr->vce_power_gated ? 0 : 1;
                return 0;
+       case AMDGPU_PP_SENSOR_GPU_TEMP:
+               *value = cz_thermal_get_temperature(hwmgr);
+               return 0;
        default:
                return -EINVAL;
        }
index 14f8c1f4da3d7a385202f2e8ec24ee5e6ab0f3ad..0723758ed0650616ee111af60e9ac14287f73ae2 100644 (file)
@@ -272,7 +272,7 @@ bool phm_check_smc_update_required_for_display_configuration(struct pp_hwmgr *hw
        PHM_FUNC_CHECK(hwmgr);
 
        if (hwmgr->hwmgr_func->check_smc_update_required_for_display_configuration == NULL)
-               return -EINVAL;
+               return false;
 
        return hwmgr->hwmgr_func->check_smc_update_required_for_display_configuration(hwmgr);
 }
index 1167205057b337d7f968f08d4218487c218797a9..e03dcb6ea9c17c0a3ea5dbf7dc547bf8b55464af 100644 (file)
@@ -710,13 +710,15 @@ int phm_get_voltage_evv_on_sclk(struct pp_hwmgr *hwmgr, uint8_t voltage_type,
        uint32_t vol;
        int ret = 0;
 
-       if (hwmgr->chip_id < CHIP_POLARIS10) {
-               atomctrl_get_voltage_evv_on_sclk(hwmgr, voltage_type, sclk, id, voltage);
+       if (hwmgr->chip_id < CHIP_TONGA) {
+               ret = atomctrl_get_voltage_evv(hwmgr, id, voltage);
+       } else if (hwmgr->chip_id < CHIP_POLARIS10) {
+               ret = atomctrl_get_voltage_evv_on_sclk(hwmgr, voltage_type, sclk, id, voltage);
                if (*voltage >= 2000 || *voltage == 0)
                        *voltage = 1150;
        } else {
                ret = atomctrl_get_voltage_evv_on_sclk_ai(hwmgr, voltage_type, sclk, id, &vol);
-               *voltage = (uint16_t)vol/100;
+               *voltage = (uint16_t)(vol/100);
        }
        return ret;
 }
index 1126bd4f74dcc61d6e48c452ec281723b2fe81ec..0894527d932f4849cbe147eba4298a6a2f36264d 100644 (file)
@@ -1320,7 +1320,8 @@ int atomctrl_get_voltage_evv_on_sclk_ai(struct pp_hwmgr *hwmgr, uint8_t voltage_
        if (0 != result)
                return result;
 
-       *voltage = le32_to_cpu(((GET_EVV_VOLTAGE_INFO_OUTPUT_PARAMETER_V1_3 *)(&get_voltage_info_param_space))->ulVoltageLevel);
+       *voltage = le32_to_cpu(((GET_EVV_VOLTAGE_INFO_OUTPUT_PARAMETER_V1_3 *)
+                               (&get_voltage_info_param_space))->ulVoltageLevel);
 
        return result;
 }
index 7de701d8a450a624bcc57c9802338676ab573b0b..4477c55a58e32f903d33005cf9f2cc13a07215ba 100644 (file)
@@ -1201,12 +1201,15 @@ static uint32_t make_classification_flags(struct pp_hwmgr *hwmgr,
 static int ppt_get_num_of_vce_state_table_entries_v1_0(struct pp_hwmgr *hwmgr)
 {
        const ATOM_Tonga_POWERPLAYTABLE *pp_table = get_powerplay_table(hwmgr);
-       const ATOM_Tonga_VCE_State_Table *vce_state_table =
-                               (ATOM_Tonga_VCE_State_Table *)(((unsigned long)pp_table) + le16_to_cpu(pp_table->usVCEStateTableOffset));
+       const ATOM_Tonga_VCE_State_Table *vce_state_table;
 
-       if (vce_state_table == NULL)
+
+       if (pp_table == NULL)
                return 0;
 
+       vce_state_table = (void *)pp_table +
+                       le16_to_cpu(pp_table->usVCEStateTableOffset);
+
        return vce_state_table->ucNumEntries;
 }
 
index 508245d49d3394055835683f3067e021f6d482d0..08cd0bd3ebe5b1e34b14940a4cff4805027518e1 100644 (file)
@@ -1030,20 +1030,19 @@ static int smu7_disable_sclk_mclk_dpm(struct pp_hwmgr *hwmgr)
        struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
 
        /* disable SCLK dpm */
-       if (!data->sclk_dpm_key_disabled)
-               PP_ASSERT_WITH_CODE(
-                               (smum_send_msg_to_smc(hwmgr->smumgr,
-                                               PPSMC_MSG_DPM_Disable) == 0),
-                               "Failed to disable SCLK DPM!",
-                               return -EINVAL);
+       if (!data->sclk_dpm_key_disabled) {
+               PP_ASSERT_WITH_CODE(true == smum_is_dpm_running(hwmgr),
+                               "Trying to disable SCLK DPM when DPM is disabled",
+                               return 0);
+               smum_send_msg_to_smc(hwmgr->smumgr, PPSMC_MSG_DPM_Disable);
+       }
 
        /* disable MCLK dpm */
        if (!data->mclk_dpm_key_disabled) {
-               PP_ASSERT_WITH_CODE(
-                               (smum_send_msg_to_smc(hwmgr->smumgr,
-                                               PPSMC_MSG_MCLKDPM_Disable) == 0),
-                               "Failed to disable MCLK DPM!",
-                               return -EINVAL);
+               PP_ASSERT_WITH_CODE(true == smum_is_dpm_running(hwmgr),
+                               "Trying to disable MCLK DPM when DPM is disabled",
+                               return 0);
+               smum_send_msg_to_smc(hwmgr->smumgr, PPSMC_MSG_MCLKDPM_Disable);
        }
 
        return 0;
@@ -1069,10 +1068,13 @@ static int smu7_stop_dpm(struct pp_hwmgr *hwmgr)
                                return -EINVAL);
        }
 
-       if (smu7_disable_sclk_mclk_dpm(hwmgr)) {
-               printk(KERN_ERR "Failed to disable Sclk DPM and Mclk DPM!");
-               return -EINVAL;
-       }
+       smu7_disable_sclk_mclk_dpm(hwmgr);
+
+       PP_ASSERT_WITH_CODE(true == smum_is_dpm_running(hwmgr),
+                       "Trying to disable voltage DPM when DPM is disabled",
+                       return 0);
+
+       smum_send_msg_to_smc(hwmgr->smumgr, PPSMC_MSG_Voltage_Cntl_Disable);
 
        return 0;
 }
@@ -1166,8 +1168,8 @@ int smu7_enable_dpm_tasks(struct pp_hwmgr *hwmgr)
 
        tmp_result = (!smum_is_dpm_running(hwmgr)) ? 0 : -1;
        PP_ASSERT_WITH_CODE(tmp_result == 0,
-                       "DPM is already running right now, no need to enable DPM!",
-                       return 0);
+                       "DPM is already running",
+                       );
 
        if (smu7_voltage_control(hwmgr)) {
                tmp_result = smu7_enable_voltage_control(hwmgr);
@@ -1226,7 +1228,7 @@ int smu7_enable_dpm_tasks(struct pp_hwmgr *hwmgr)
        PP_ASSERT_WITH_CODE((0 == tmp_result),
                        "Failed to enable VR hot GPIO interrupt!", result = tmp_result);
 
-       smum_send_msg_to_smc(hwmgr->smumgr, (PPSMC_Msg)PPSMC_HasDisplay);
+       smum_send_msg_to_smc(hwmgr->smumgr, (PPSMC_Msg)PPSMC_NoDisplay);
 
        tmp_result = smu7_enable_sclk_control(hwmgr);
        PP_ASSERT_WITH_CODE((0 == tmp_result),
@@ -1306,6 +1308,12 @@ int smu7_disable_dpm_tasks(struct pp_hwmgr *hwmgr)
        PP_ASSERT_WITH_CODE((tmp_result == 0),
                        "Failed to disable thermal auto throttle!", result = tmp_result);
 
+       if (1 == PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, FEATURE_STATUS, AVS_ON)) {
+               PP_ASSERT_WITH_CODE((0 == smum_send_msg_to_smc(hwmgr->smumgr, PPSMC_MSG_DisableAvfs)),
+                                       "Failed to disable AVFS!",
+                                       return -EINVAL);
+       }
+
        tmp_result = smu7_stop_dpm(hwmgr);
        PP_ASSERT_WITH_CODE((tmp_result == 0),
                        "Failed to stop DPM!", result = tmp_result);
@@ -1452,17 +1460,17 @@ static int smu7_get_evv_voltages(struct pp_hwmgr *hwmgr)
        struct phm_ppt_v1_clock_voltage_dependency_table *sclk_table = NULL;
 
 
-       if (table_info != NULL)
-               sclk_table = table_info->vdd_dep_on_sclk;
-
        for (i = 0; i < SMU7_MAX_LEAKAGE_COUNT; i++) {
                vv_id = ATOM_VIRTUAL_VOLTAGE_ID0 + i;
 
                if (data->vdd_gfx_control == SMU7_VOLTAGE_CONTROL_BY_SVID2) {
-                       if (0 == phm_get_sclk_for_voltage_evv(hwmgr,
+                       if ((hwmgr->pp_table_version == PP_TABLE_V1)
+                           && !phm_get_sclk_for_voltage_evv(hwmgr,
                                                table_info->vddgfx_lookup_table, vv_id, &sclk)) {
                                if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
                                                        PHM_PlatformCaps_ClockStretcher)) {
+                                       sclk_table = table_info->vdd_dep_on_sclk;
+
                                        for (j = 1; j < sclk_table->count; j++) {
                                                if (sclk_table->entries[j].clk == sclk &&
                                                                sclk_table->entries[j].cks_enable == 0) {
@@ -1488,12 +1496,15 @@ static int smu7_get_evv_voltages(struct pp_hwmgr *hwmgr)
                                }
                        }
                } else {
-
                        if ((hwmgr->pp_table_version == PP_TABLE_V0)
                                || !phm_get_sclk_for_voltage_evv(hwmgr,
                                        table_info->vddc_lookup_table, vv_id, &sclk)) {
                                if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
                                                PHM_PlatformCaps_ClockStretcher)) {
+                                       if (table_info == NULL)
+                                               return -EINVAL;
+                                       sclk_table = table_info->vdd_dep_on_sclk;
+
                                        for (j = 1; j < sclk_table->count; j++) {
                                                if (sclk_table->entries[j].clk == sclk &&
                                                                sclk_table->entries[j].cks_enable == 0) {
@@ -2117,15 +2128,20 @@ static int smu7_patch_acp_vddc(struct pp_hwmgr *hwmgr,
 }
 
 static int smu7_patch_limits_vddc(struct pp_hwmgr *hwmgr,
-                                    struct phm_clock_and_voltage_limits *tab)
+                                 struct phm_clock_and_voltage_limits *tab)
 {
+       uint32_t vddc, vddci;
        struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
 
        if (tab) {
-               smu7_patch_ppt_v0_with_vdd_leakage(hwmgr, (uint32_t *)&tab->vddc,
-                                                       &data->vddc_leakage);
-               smu7_patch_ppt_v0_with_vdd_leakage(hwmgr, (uint32_t *)&tab->vddci,
-                                                       &data->vddci_leakage);
+               vddc = tab->vddc;
+               smu7_patch_ppt_v0_with_vdd_leakage(hwmgr, &vddc,
+                                                  &data->vddc_leakage);
+               tab->vddc = vddc;
+               vddci = tab->vddci;
+               smu7_patch_ppt_v0_with_vdd_leakage(hwmgr, &vddci,
+                                                  &data->vddci_leakage);
+               tab->vddci = vddci;
        }
 
        return 0;
@@ -2968,19 +2984,19 @@ static int smu7_get_pp_table_entry_callback_func_v0(struct pp_hwmgr *hwmgr,
        if (!(data->mc_micro_code_feature & DISABLE_MC_LOADMICROCODE) && memory_clock > data->highest_mclk)
                data->highest_mclk = memory_clock;
 
-       performance_level = &(ps->performance_levels
-                       [ps->performance_level_count++]);
-
        PP_ASSERT_WITH_CODE(
                        (ps->performance_level_count < smum_get_mac_definition(hwmgr->smumgr, SMU_MAX_LEVELS_GRAPHICS)),
                        "Performance levels exceeds SMC limit!",
                        return -EINVAL);
 
        PP_ASSERT_WITH_CODE(
-                       (ps->performance_level_count <=
+                       (ps->performance_level_count <
                                        hwmgr->platform_descriptor.hardwareActivityPerformanceLevels),
-                       "Performance levels exceeds Driver limit!",
-                       return -EINVAL);
+                       "Performance levels exceeds Driver limit, Skip!",
+                       return 0);
+
+       performance_level = &(ps->performance_levels
+                       [ps->performance_level_count++]);
 
        /* Performance levels are arranged from low to high. */
        performance_level->memory_clock = memory_clock;
@@ -3802,13 +3818,15 @@ static inline bool smu7_are_power_levels_equal(const struct smu7_performance_lev
 
 int smu7_check_states_equal(struct pp_hwmgr *hwmgr, const struct pp_hw_power_state *pstate1, const struct pp_hw_power_state *pstate2, bool *equal)
 {
-       const struct smu7_power_state *psa = cast_const_phw_smu7_power_state(pstate1);
-       const struct smu7_power_state *psb = cast_const_phw_smu7_power_state(pstate2);
+       const struct smu7_power_state *psa;
+       const struct smu7_power_state *psb;
        int i;
 
        if (pstate1 == NULL || pstate2 == NULL || equal == NULL)
                return -EINVAL;
 
+       psa = cast_const_phw_smu7_power_state(pstate1);
+       psb = cast_const_phw_smu7_power_state(pstate2);
        /* If the two states don't even have the same number of performance levels they cannot be the same state. */
        if (psa->performance_level_count != psb->performance_level_count) {
                *equal = false;
@@ -4213,18 +4231,26 @@ static int smu7_get_sclks(struct pp_hwmgr *hwmgr, struct amd_pp_clocks *clocks)
 {
        struct phm_ppt_v1_information *table_info =
                        (struct phm_ppt_v1_information *)hwmgr->pptable;
-       struct phm_ppt_v1_clock_voltage_dependency_table *dep_sclk_table;
+       struct phm_ppt_v1_clock_voltage_dependency_table *dep_sclk_table = NULL;
+       struct phm_clock_voltage_dependency_table *sclk_table;
        int i;
 
-       if (table_info == NULL)
-               return -EINVAL;
-
-       dep_sclk_table = table_info->vdd_dep_on_sclk;
-
-       for (i = 0; i < dep_sclk_table->count; i++) {
-               clocks->clock[i] = dep_sclk_table->entries[i].clk;
-               clocks->count++;
+       if (hwmgr->pp_table_version == PP_TABLE_V1) {
+               if (table_info == NULL || table_info->vdd_dep_on_sclk == NULL)
+                       return -EINVAL;
+               dep_sclk_table = table_info->vdd_dep_on_sclk;
+               for (i = 0; i < dep_sclk_table->count; i++) {
+                       clocks->clock[i] = dep_sclk_table->entries[i].clk;
+                       clocks->count++;
+               }
+       } else if (hwmgr->pp_table_version == PP_TABLE_V0) {
+               sclk_table = hwmgr->dyn_state.vddc_dependency_on_sclk;
+               for (i = 0; i < sclk_table->count; i++) {
+                       clocks->clock[i] = sclk_table->entries[i].clk;
+                       clocks->count++;
+               }
        }
+
        return 0;
 }
 
@@ -4246,17 +4272,24 @@ static int smu7_get_mclks(struct pp_hwmgr *hwmgr, struct amd_pp_clocks *clocks)
                        (struct phm_ppt_v1_information *)hwmgr->pptable;
        struct phm_ppt_v1_clock_voltage_dependency_table *dep_mclk_table;
        int i;
+       struct phm_clock_voltage_dependency_table *mclk_table;
 
-       if (table_info == NULL)
-               return -EINVAL;
-
-       dep_mclk_table = table_info->vdd_dep_on_mclk;
-
-       for (i = 0; i < dep_mclk_table->count; i++) {
-               clocks->clock[i] = dep_mclk_table->entries[i].clk;
-               clocks->latency[i] = smu7_get_mem_latency(hwmgr,
+       if (hwmgr->pp_table_version == PP_TABLE_V1) {
+               if (table_info == NULL)
+                       return -EINVAL;
+               dep_mclk_table = table_info->vdd_dep_on_mclk;
+               for (i = 0; i < dep_mclk_table->count; i++) {
+                       clocks->clock[i] = dep_mclk_table->entries[i].clk;
+                       clocks->latency[i] = smu7_get_mem_latency(hwmgr,
                                                dep_mclk_table->entries[i].clk);
-               clocks->count++;
+                       clocks->count++;
+               }
+       } else if (hwmgr->pp_table_version == PP_TABLE_V0) {
+               mclk_table = hwmgr->dyn_state.vddc_dependency_on_mclk;
+               for (i = 0; i < mclk_table->count; i++) {
+                       clocks->clock[i] = mclk_table->entries[i].clk;
+                       clocks->count++;
+               }
        }
        return 0;
 }
@@ -4324,6 +4357,7 @@ static const struct pp_hwmgr_func smu7_hwmgr_funcs = {
        .set_mclk_od = smu7_set_mclk_od,
        .get_clock_by_type = smu7_get_clock_by_type,
        .read_sensor = smu7_read_sensor,
+       .dynamic_state_management_disable = smu7_disable_dpm_tasks,
 };
 
 uint8_t smu7_get_sleep_divider_id_from_clock(uint32_t clock,
index fb6c6f6106d5fe233e8df5bc1f29cf12385b6b7d..29d0319b22e6c68cfa46466ad13d795a7a7f37cf 100644 (file)
@@ -30,7 +30,7 @@ int smu7_fan_ctrl_get_fan_speed_info(struct pp_hwmgr *hwmgr,
                struct phm_fan_speed_info *fan_speed_info)
 {
        if (hwmgr->thermal_controller.fanInfo.bNoFan)
-               return 0;
+               return -ENODEV;
 
        fan_speed_info->supports_percent_read = true;
        fan_speed_info->supports_percent_write = true;
@@ -60,7 +60,7 @@ int smu7_fan_ctrl_get_fan_speed_percent(struct pp_hwmgr *hwmgr,
        uint64_t tmp64;
 
        if (hwmgr->thermal_controller.fanInfo.bNoFan)
-               return 0;
+               return -ENODEV;
 
        duty100 = PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC,
                        CG_FDO_CTRL1, FMAX_DUTY100);
@@ -89,7 +89,7 @@ int smu7_fan_ctrl_get_fan_speed_rpm(struct pp_hwmgr *hwmgr, uint32_t *speed)
        if (hwmgr->thermal_controller.fanInfo.bNoFan ||
                        (hwmgr->thermal_controller.fanInfo.
                                ucTachometerPulsesPerRevolution == 0))
-               return 0;
+               return -ENODEV;
 
        tach_period = PHM_READ_VFPF_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC,
                        CG_TACH_STATUS, TACH_PERIOD);
index eda802bc63c888ead2082e4362768ba763d1d83f..8c889caba420dc2d55ec9340d46ab0a92d468514 100644 (file)
@@ -2458,7 +2458,7 @@ static int iceland_set_mc_special_registers(struct pp_hwmgr *hwmgr,
                        PP_ASSERT_WITH_CODE((j <= SMU71_DISCRETE_MC_REGISTER_ARRAY_SIZE),
                                "Invalid VramInfo table.", return -EINVAL);
 
-                       if (!data->is_memory_gddr5) {
+                       if (!data->is_memory_gddr5 && j < SMU71_DISCRETE_MC_REGISTER_ARRAY_SIZE) {
                                table->mc_reg_address[j].s1 = mmMC_PMG_AUTO_CMD;
                                table->mc_reg_address[j].s0 = mmMC_PMG_AUTO_CMD;
                                for (k = 0; k < table->num_entries; k++) {
index 963a24d46a93d336e2d52bf1f4c7d045c6c2f57e..ffe1f85ce30019dc75b7aff2550ad60c6e70a5f1 100644 (file)
@@ -34,9 +34,6 @@ static bool amd_sched_entity_is_ready(struct amd_sched_entity *entity);
 static void amd_sched_wakeup(struct amd_gpu_scheduler *sched);
 static void amd_sched_process_job(struct fence *f, struct fence_cb *cb);
 
-struct kmem_cache *sched_fence_slab;
-atomic_t sched_fence_slab_ref = ATOMIC_INIT(0);
-
 /* Initialize a given run queue struct */
 static void amd_sched_rq_init(struct amd_sched_rq *rq)
 {
@@ -618,13 +615,6 @@ int amd_sched_init(struct amd_gpu_scheduler *sched,
        INIT_LIST_HEAD(&sched->ring_mirror_list);
        spin_lock_init(&sched->job_list_lock);
        atomic_set(&sched->hw_rq_count, 0);
-       if (atomic_inc_return(&sched_fence_slab_ref) == 1) {
-               sched_fence_slab = kmem_cache_create(
-                       "amd_sched_fence", sizeof(struct amd_sched_fence), 0,
-                       SLAB_HWCACHE_ALIGN, NULL);
-               if (!sched_fence_slab)
-                       return -ENOMEM;
-       }
 
        /* Each scheduler will run on a seperate kernel thread */
        sched->thread = kthread_run(amd_sched_main, sched, sched->name);
@@ -645,6 +635,4 @@ void amd_sched_fini(struct amd_gpu_scheduler *sched)
 {
        if (sched->thread)
                kthread_stop(sched->thread);
-       if (atomic_dec_and_test(&sched_fence_slab_ref))
-               kmem_cache_destroy(sched_fence_slab);
 }
index 7cbbbfb502ef1342caa2a5b36aa970368fed5055..51068e6c3d9af4746e40ebca3f81384e95d5c16c 100644 (file)
@@ -30,9 +30,6 @@
 struct amd_gpu_scheduler;
 struct amd_sched_rq;
 
-extern struct kmem_cache *sched_fence_slab;
-extern atomic_t sched_fence_slab_ref;
-
 /**
  * A scheduler entity is a wrapper around a job queue or a group
  * of other entities. Entities take turns emitting jobs from their
@@ -145,6 +142,9 @@ void amd_sched_entity_fini(struct amd_gpu_scheduler *sched,
                           struct amd_sched_entity *entity);
 void amd_sched_entity_push_job(struct amd_sched_job *sched_job);
 
+int amd_sched_fence_slab_init(void);
+void amd_sched_fence_slab_fini(void);
+
 struct amd_sched_fence *amd_sched_fence_create(
        struct amd_sched_entity *s_entity, void *owner);
 void amd_sched_fence_scheduled(struct amd_sched_fence *fence);
index 6b63beaf75746848720f98d4eb2b5329c299267b..88fc2d66257990876507b8ab65391ad2a6deca0f 100644 (file)
 #include <drm/drmP.h>
 #include "gpu_scheduler.h"
 
+static struct kmem_cache *sched_fence_slab;
+
+int amd_sched_fence_slab_init(void)
+{
+       sched_fence_slab = kmem_cache_create(
+               "amd_sched_fence", sizeof(struct amd_sched_fence), 0,
+               SLAB_HWCACHE_ALIGN, NULL);
+       if (!sched_fence_slab)
+               return -ENOMEM;
+
+       return 0;
+}
+
+void amd_sched_fence_slab_fini(void)
+{
+       rcu_barrier();
+       kmem_cache_destroy(sched_fence_slab);
+}
+
 struct amd_sched_fence *amd_sched_fence_create(struct amd_sched_entity *entity,
                                               void *owner)
 {
@@ -103,7 +122,7 @@ static void amd_sched_fence_free(struct rcu_head *rcu)
 }
 
 /**
- * amd_sched_fence_release - callback that fence can be freed
+ * amd_sched_fence_release_scheduled - callback that fence can be freed
  *
  * @fence: fence
  *
@@ -118,7 +137,7 @@ static void amd_sched_fence_release_scheduled(struct fence *f)
 }
 
 /**
- * amd_sched_fence_release_scheduled - drop extra reference
+ * amd_sched_fence_release_finished - drop extra reference
  *
  * @f: fence
  *
index b7a8b2ac4055b6594daa4e15d0c68009dd1e1cdc..b69c66b4897e46e2ea77890e39235f3d9e94b57d 100644 (file)
  *
  */
 
-#include <drm/drm_crtc_helper.h>
+#include <drm/drm_crtc.h>
 #include <drm/drm_encoder_slave.h>
-#include <drm/drm_atomic_helper.h>
 
 #include "arcpgu.h"
 
-struct arcpgu_drm_connector {
-       struct drm_connector connector;
-       struct drm_encoder_slave *encoder_slave;
-};
-
-static int arcpgu_drm_connector_get_modes(struct drm_connector *connector)
-{
-       const struct drm_encoder_slave_funcs *sfuncs;
-       struct drm_encoder_slave *slave;
-       struct arcpgu_drm_connector *con =
-               container_of(connector, struct arcpgu_drm_connector, connector);
-
-       slave = con->encoder_slave;
-       if (slave == NULL) {
-               dev_err(connector->dev->dev,
-                       "connector_get_modes: cannot find slave encoder for connector\n");
-               return 0;
-       }
-
-       sfuncs = slave->slave_funcs;
-       if (sfuncs->get_modes == NULL)
-               return 0;
-
-       return sfuncs->get_modes(&slave->base, connector);
-}
-
-static enum drm_connector_status
-arcpgu_drm_connector_detect(struct drm_connector *connector, bool force)
-{
-       enum drm_connector_status status = connector_status_unknown;
-       const struct drm_encoder_slave_funcs *sfuncs;
-       struct drm_encoder_slave *slave;
-
-       struct arcpgu_drm_connector *con =
-               container_of(connector, struct arcpgu_drm_connector, connector);
-
-       slave = con->encoder_slave;
-       if (slave == NULL) {
-               dev_err(connector->dev->dev,
-                       "connector_detect: cannot find slave encoder for connector\n");
-               return status;
-       }
-
-       sfuncs = slave->slave_funcs;
-       if (sfuncs && sfuncs->detect)
-               return sfuncs->detect(&slave->base, connector);
-
-       dev_err(connector->dev->dev, "connector_detect: could not detect slave funcs\n");
-       return status;
-}
-
-static void arcpgu_drm_connector_destroy(struct drm_connector *connector)
-{
-       drm_connector_unregister(connector);
-       drm_connector_cleanup(connector);
-}
-
-static const struct drm_connector_helper_funcs
-arcpgu_drm_connector_helper_funcs = {
-       .get_modes = arcpgu_drm_connector_get_modes,
-};
-
-static const struct drm_connector_funcs arcpgu_drm_connector_funcs = {
-       .dpms = drm_helper_connector_dpms,
-       .reset = drm_atomic_helper_connector_reset,
-       .detect = arcpgu_drm_connector_detect,
-       .fill_modes = drm_helper_probe_single_connector_modes,
-       .destroy = arcpgu_drm_connector_destroy,
-       .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
-       .atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
-};
-
-static struct drm_encoder_helper_funcs arcpgu_drm_encoder_helper_funcs = {
-       .dpms = drm_i2c_encoder_dpms,
-       .mode_fixup = drm_i2c_encoder_mode_fixup,
-       .mode_set = drm_i2c_encoder_mode_set,
-       .prepare = drm_i2c_encoder_prepare,
-       .commit = drm_i2c_encoder_commit,
-       .detect = drm_i2c_encoder_detect,
-};
-
 static struct drm_encoder_funcs arcpgu_drm_encoder_funcs = {
        .destroy = drm_encoder_cleanup,
 };
 
 int arcpgu_drm_hdmi_init(struct drm_device *drm, struct device_node *np)
 {
-       struct arcpgu_drm_connector *arcpgu_connector;
-       struct drm_i2c_encoder_driver *driver;
-       struct drm_encoder_slave *encoder;
-       struct drm_connector *connector;
-       struct i2c_client *i2c_slave;
-       int ret;
+       struct drm_encoder *encoder;
+       struct drm_bridge *bridge;
+
+       int ret = 0;
 
        encoder = devm_kzalloc(drm->dev, sizeof(*encoder), GFP_KERNEL);
        if (encoder == NULL)
                return -ENOMEM;
 
-       i2c_slave = of_find_i2c_device_by_node(np);
-       if (!i2c_slave || !i2c_get_clientdata(i2c_slave)) {
-               dev_err(drm->dev, "failed to find i2c slave encoder\n");
-               return -EPROBE_DEFER;
-       }
-
-       if (i2c_slave->dev.driver == NULL) {
-               dev_err(drm->dev, "failed to find i2c slave driver\n");
+       /* Locate drm bridge from the hdmi encoder DT node */
+       bridge = of_drm_find_bridge(np);
+       if (!bridge)
                return -EPROBE_DEFER;
-       }
 
-       driver =
-           to_drm_i2c_encoder_driver(to_i2c_driver(i2c_slave->dev.driver));
-       ret = driver->encoder_init(i2c_slave, drm, encoder);
-       if (ret) {
-               dev_err(drm->dev, "failed to initialize i2c encoder slave\n");
-               return ret;
-       }
-
-       encoder->base.possible_crtcs = 1;
-       encoder->base.possible_clones = 0;
-       ret = drm_encoder_init(drm, &encoder->base, &arcpgu_drm_encoder_funcs,
+       encoder->possible_crtcs = 1;
+       encoder->possible_clones = 0;
+       ret = drm_encoder_init(drm, encoder, &arcpgu_drm_encoder_funcs,
                               DRM_MODE_ENCODER_TMDS, NULL);
        if (ret)
                return ret;
 
-       drm_encoder_helper_add(&encoder->base,
-                              &arcpgu_drm_encoder_helper_funcs);
-
-       arcpgu_connector = devm_kzalloc(drm->dev, sizeof(*arcpgu_connector),
-                                       GFP_KERNEL);
-       if (!arcpgu_connector) {
-               ret = -ENOMEM;
-               goto error_encoder_cleanup;
-       }
-
-       connector = &arcpgu_connector->connector;
-       drm_connector_helper_add(connector, &arcpgu_drm_connector_helper_funcs);
-       ret = drm_connector_init(drm, connector, &arcpgu_drm_connector_funcs,
-                       DRM_MODE_CONNECTOR_HDMIA);
-       if (ret < 0) {
-               dev_err(drm->dev, "failed to initialize drm connector\n");
-               goto error_encoder_cleanup;
-       }
+       /* Link drm_bridge to encoder */
+       bridge->encoder = encoder;
+       encoder->bridge = bridge;
 
-       ret = drm_mode_connector_attach_encoder(connector, &encoder->base);
-       if (ret < 0) {
-               dev_err(drm->dev, "could not attach connector to encoder\n");
-               drm_connector_unregister(connector);
-               goto error_connector_cleanup;
-       }
-
-       arcpgu_connector->encoder_slave = encoder;
-
-       return 0;
-
-error_connector_cleanup:
-       drm_connector_cleanup(connector);
+       ret = drm_bridge_attach(drm, bridge);
+       if (ret)
+               drm_encoder_cleanup(encoder);
 
-error_encoder_cleanup:
-       drm_encoder_cleanup(&encoder->base);
        return ret;
 }
index 48019ae22ddba5fcbb3283e94a016543fba3d8b3..28341b32067f89e347c13db6f6a4b97e94022b96 100644 (file)
@@ -150,15 +150,14 @@ static void hdlcd_crtc_enable(struct drm_crtc *crtc)
        clk_prepare_enable(hdlcd->clk);
        hdlcd_crtc_mode_set_nofb(crtc);
        hdlcd_write(hdlcd, HDLCD_REG_COMMAND, 1);
+       drm_crtc_vblank_on(crtc);
 }
 
 static void hdlcd_crtc_disable(struct drm_crtc *crtc)
 {
        struct hdlcd_drm_private *hdlcd = crtc_to_hdlcd_priv(crtc);
 
-       if (!crtc->state->active)
-               return;
-
+       drm_crtc_vblank_off(crtc);
        hdlcd_write(hdlcd, HDLCD_REG_COMMAND, 0);
        clk_disable_unprepare(hdlcd->clk);
 }
index 2f58e9e2a59cb4346e339a43953e3ade8941889a..a51f8cbcfe26d9d4d5d52d2037ed94292533c41e 100644 (file)
@@ -332,17 +332,19 @@ static void armada_drm_crtc_dpms(struct drm_crtc *crtc, int dpms)
 {
        struct armada_crtc *dcrtc = drm_to_armada_crtc(crtc);
 
-       if (dcrtc->dpms != dpms) {
-               dcrtc->dpms = dpms;
-               if (!IS_ERR(dcrtc->clk) && !dpms_blanked(dpms))
-                       WARN_ON(clk_prepare_enable(dcrtc->clk));
-               armada_drm_crtc_update(dcrtc);
-               if (!IS_ERR(dcrtc->clk) && dpms_blanked(dpms))
-                       clk_disable_unprepare(dcrtc->clk);
+       if (dpms_blanked(dcrtc->dpms) != dpms_blanked(dpms)) {
                if (dpms_blanked(dpms))
                        armada_drm_vblank_off(dcrtc);
-               else
+               else if (!IS_ERR(dcrtc->clk))
+                       WARN_ON(clk_prepare_enable(dcrtc->clk));
+               dcrtc->dpms = dpms;
+               armada_drm_crtc_update(dcrtc);
+               if (!dpms_blanked(dpms))
                        drm_crtc_vblank_on(&dcrtc->crtc);
+               else if (!IS_ERR(dcrtc->clk))
+                       clk_disable_unprepare(dcrtc->clk);
+       } else if (dcrtc->dpms != dpms) {
+               dcrtc->dpms = dpms;
        }
 }
 
index 608df4c90520278e59bfe75d3c3348d66af51e6c..0743e65cb24020fd7c8dffc679b76f15145b0d97 100644 (file)
@@ -267,6 +267,8 @@ int ast_mm_init(struct ast_private *ast)
                return ret;
        }
 
+       arch_io_reserve_memtype_wc(pci_resource_start(dev->pdev, 0),
+                                  pci_resource_len(dev->pdev, 0));
        ast->fb_mtrr = arch_phys_wc_add(pci_resource_start(dev->pdev, 0),
                                        pci_resource_len(dev->pdev, 0));
 
@@ -275,11 +277,15 @@ int ast_mm_init(struct ast_private *ast)
 
 void ast_mm_fini(struct ast_private *ast)
 {
+       struct drm_device *dev = ast->dev;
+
        ttm_bo_device_release(&ast->ttm.bdev);
 
        ast_ttm_global_release(ast);
 
        arch_phys_wc_del(ast->fb_mtrr);
+       arch_io_free_memtype_wc(pci_resource_start(dev->pdev, 0),
+                               pci_resource_len(dev->pdev, 0));
 }
 
 void ast_ttm_placement(struct ast_bo *bo, int domain)
index bb2438dd8733f4c2c64618629abf1e946395f02a..5e7e63ce7bcef9bd81058c01e886244e28e15f4a 100644 (file)
@@ -267,6 +267,9 @@ int cirrus_mm_init(struct cirrus_device *cirrus)
                return ret;
        }
 
+       arch_io_reserve_memtype_wc(pci_resource_start(dev->pdev, 0),
+                                  pci_resource_len(dev->pdev, 0));
+
        cirrus->fb_mtrr = arch_phys_wc_add(pci_resource_start(dev->pdev, 0),
                                           pci_resource_len(dev->pdev, 0));
 
@@ -276,6 +279,8 @@ int cirrus_mm_init(struct cirrus_device *cirrus)
 
 void cirrus_mm_fini(struct cirrus_device *cirrus)
 {
+       struct drm_device *dev = cirrus->dev;
+
        if (!cirrus->mm_inited)
                return;
 
@@ -285,6 +290,8 @@ void cirrus_mm_fini(struct cirrus_device *cirrus)
 
        arch_phys_wc_del(cirrus->fb_mtrr);
        cirrus->fb_mtrr = 0;
+       arch_io_free_memtype_wc(pci_resource_start(dev->pdev, 0),
+                               pci_resource_len(dev->pdev, 0));
 }
 
 void cirrus_ttm_placement(struct cirrus_bo *bo, int domain)
index 23739609427d86b9cd64d81ddad719bf5fc2bd78..e6862a7442104f59fa476a79a28f48bd0daf98a2 100644 (file)
@@ -420,18 +420,21 @@ drm_atomic_replace_property_blob_from_id(struct drm_crtc *crtc,
                                         ssize_t expected_size,
                                         bool *replaced)
 {
-       struct drm_device *dev = crtc->dev;
        struct drm_property_blob *new_blob = NULL;
 
        if (blob_id != 0) {
-               new_blob = drm_property_lookup_blob(dev, blob_id);
+               new_blob = drm_property_lookup_blob(crtc->dev, blob_id);
                if (new_blob == NULL)
                        return -EINVAL;
-               if (expected_size > 0 && expected_size != new_blob->length)
+
+               if (expected_size > 0 && expected_size != new_blob->length) {
+                       drm_property_unreference_blob(new_blob);
                        return -EINVAL;
+               }
        }
 
        drm_atomic_replace_property_blob(blob, new_blob, replaced);
+       drm_property_unreference_blob(new_blob);
 
        return 0;
 }
index c3f83476f99601c2ff91711b602c902b55170f71..21f9926055415e7c0507aa4555e2a3b6daa7ca52 100644 (file)
@@ -594,10 +594,6 @@ drm_atomic_helper_check_planes(struct drm_device *dev,
        struct drm_plane_state *plane_state;
        int i, ret = 0;
 
-       ret = drm_atomic_normalize_zpos(dev, state);
-       if (ret)
-               return ret;
-
        for_each_plane_in_state(state, plane, plane_state, i) {
                const struct drm_plane_helper_funcs *funcs;
 
index 04e457117980b8d554e3561139f52cdc53dedc0d..aa644487749c9cbab104f2ea77c819baf7d6b250 100644 (file)
@@ -914,6 +914,7 @@ static void drm_dp_destroy_port(struct kref *kref)
                /* no need to clean up vcpi
                 * as if we have no connector we never setup a vcpi */
                drm_dp_port_teardown_pdt(port, port->pdt);
+               port->pdt = DP_PEER_DEVICE_NONE;
        }
        kfree(port);
 }
@@ -1159,7 +1160,9 @@ static void drm_dp_add_port(struct drm_dp_mst_branch *mstb,
                        drm_dp_put_port(port);
                        goto out;
                }
-               if (port->port_num >= DP_MST_LOGICAL_PORT_0) {
+               if ((port->pdt == DP_PEER_DEVICE_DP_LEGACY_CONV ||
+                    port->pdt == DP_PEER_DEVICE_SST_SINK) &&
+                   port->port_num >= DP_MST_LOGICAL_PORT_0) {
                        port->cached_edid = drm_get_edid(port->connector, &port->aux.ddc);
                        drm_mode_connector_set_tile_property(port->connector);
                }
@@ -2919,6 +2922,7 @@ static void drm_dp_destroy_connector_work(struct work_struct *work)
                mgr->cbs->destroy_connector(mgr, port->connector);
 
                drm_dp_port_teardown_pdt(port, port->pdt);
+               port->pdt = DP_PEER_DEVICE_NONE;
 
                if (!port->input && port->vcpi.vcpi > 0) {
                        drm_dp_mst_reset_vcpi_slots(mgr, port);
index 03414bde1f152637a7ed6002ed8a88e30611fec8..6c75e62c0b2254cee15cb88642ed37c1219946c2 100644 (file)
@@ -131,7 +131,12 @@ int drm_fb_helper_single_add_all_connectors(struct drm_fb_helper *fb_helper)
        return 0;
 fail:
        for (i = 0; i < fb_helper->connector_count; i++) {
-               kfree(fb_helper->connector_info[i]);
+               struct drm_fb_helper_connector *fb_helper_connector =
+                       fb_helper->connector_info[i];
+
+               drm_connector_unreference(fb_helper_connector->connector);
+
+               kfree(fb_helper_connector);
                fb_helper->connector_info[i] = NULL;
        }
        fb_helper->connector_count = 0;
@@ -603,6 +608,24 @@ int drm_fb_helper_blank(int blank, struct fb_info *info)
 }
 EXPORT_SYMBOL(drm_fb_helper_blank);
 
+static void drm_fb_helper_modeset_release(struct drm_fb_helper *helper,
+                                         struct drm_mode_set *modeset)
+{
+       int i;
+
+       for (i = 0; i < modeset->num_connectors; i++) {
+               drm_connector_unreference(modeset->connectors[i]);
+               modeset->connectors[i] = NULL;
+       }
+       modeset->num_connectors = 0;
+
+       drm_mode_destroy(helper->dev, modeset->mode);
+       modeset->mode = NULL;
+
+       /* FIXME should hold a ref? */
+       modeset->fb = NULL;
+}
+
 static void drm_fb_helper_crtc_free(struct drm_fb_helper *helper)
 {
        int i;
@@ -612,10 +635,12 @@ static void drm_fb_helper_crtc_free(struct drm_fb_helper *helper)
                kfree(helper->connector_info[i]);
        }
        kfree(helper->connector_info);
+
        for (i = 0; i < helper->crtc_count; i++) {
-               kfree(helper->crtc_info[i].mode_set.connectors);
-               if (helper->crtc_info[i].mode_set.mode)
-                       drm_mode_destroy(helper->dev, helper->crtc_info[i].mode_set.mode);
+               struct drm_mode_set *modeset = &helper->crtc_info[i].mode_set;
+
+               drm_fb_helper_modeset_release(helper, modeset);
+               kfree(modeset->connectors);
        }
        kfree(helper->crtc_info);
 }
@@ -644,7 +669,9 @@ static void drm_fb_helper_dirty_work(struct work_struct *work)
        clip->x2 = clip->y2 = 0;
        spin_unlock_irqrestore(&helper->dirty_lock, flags);
 
-       helper->fb->funcs->dirty(helper->fb, NULL, 0, 0, &clip_copy, 1);
+       /* call dirty callback only when it has been really touched */
+       if (clip_copy.x1 < clip_copy.x2 && clip_copy.y1 < clip_copy.y2)
+               helper->fb->funcs->dirty(helper->fb, NULL, 0, 0, &clip_copy, 1);
 }
 
 /**
@@ -2088,7 +2115,6 @@ static void drm_setup_crtcs(struct drm_fb_helper *fb_helper)
        struct drm_fb_helper_crtc **crtcs;
        struct drm_display_mode **modes;
        struct drm_fb_offset *offsets;
-       struct drm_mode_set *modeset;
        bool *enabled;
        int width, height;
        int i;
@@ -2136,45 +2162,35 @@ static void drm_setup_crtcs(struct drm_fb_helper *fb_helper)
 
        /* need to set the modesets up here for use later */
        /* fill out the connector<->crtc mappings into the modesets */
-       for (i = 0; i < fb_helper->crtc_count; i++) {
-               modeset = &fb_helper->crtc_info[i].mode_set;
-               modeset->num_connectors = 0;
-               modeset->fb = NULL;
-       }
+       for (i = 0; i < fb_helper->crtc_count; i++)
+               drm_fb_helper_modeset_release(fb_helper,
+                                             &fb_helper->crtc_info[i].mode_set);
 
        for (i = 0; i < fb_helper->connector_count; i++) {
                struct drm_display_mode *mode = modes[i];
                struct drm_fb_helper_crtc *fb_crtc = crtcs[i];
                struct drm_fb_offset *offset = &offsets[i];
-               modeset = &fb_crtc->mode_set;
+               struct drm_mode_set *modeset = &fb_crtc->mode_set;
 
                if (mode && fb_crtc) {
+                       struct drm_connector *connector =
+                               fb_helper->connector_info[i]->connector;
+
                        DRM_DEBUG_KMS("desired mode %s set on crtc %d (%d,%d)\n",
                                      mode->name, fb_crtc->mode_set.crtc->base.id, offset->x, offset->y);
+
                        fb_crtc->desired_mode = mode;
                        fb_crtc->x = offset->x;
                        fb_crtc->y = offset->y;
-                       if (modeset->mode)
-                               drm_mode_destroy(dev, modeset->mode);
                        modeset->mode = drm_mode_duplicate(dev,
                                                           fb_crtc->desired_mode);
-                       modeset->connectors[modeset->num_connectors++] = fb_helper->connector_info[i]->connector;
+                       drm_connector_reference(connector);
+                       modeset->connectors[modeset->num_connectors++] = connector;
                        modeset->fb = fb_helper->fb;
                        modeset->x = offset->x;
                        modeset->y = offset->y;
                }
        }
-
-       /* Clear out any old modes if there are no more connected outputs. */
-       for (i = 0; i < fb_helper->crtc_count; i++) {
-               modeset = &fb_helper->crtc_info[i].mode_set;
-               if (modeset->num_connectors == 0) {
-                       BUG_ON(modeset->fb);
-                       if (modeset->mode)
-                               drm_mode_destroy(dev, modeset->mode);
-                       modeset->mode = NULL;
-               }
-       }
 out:
        kfree(crtcs);
        kfree(modes);
index 1df2d33d0b40ed43e0cbd4623e014851b34ce581..ffb2ab389d1d14863ed1e69c17419d560910afe5 100644 (file)
@@ -54,9 +54,6 @@ int drm_name_info(struct seq_file *m, void *data)
 
        mutex_lock(&dev->master_mutex);
        master = dev->master;
-       if (!master)
-               goto out_unlock;
-
        seq_printf(m, "%s", dev->driver->name);
        if (dev->dev)
                seq_printf(m, " dev=%s", dev_name(dev->dev));
@@ -65,7 +62,6 @@ int drm_name_info(struct seq_file *m, void *data)
        if (dev->unique)
                seq_printf(m, " unique=%s", dev->unique);
        seq_printf(m, "\n");
-out_unlock:
        mutex_unlock(&dev->master_mutex);
 
        return 0;
index cb86c7e5495c58b5a855cede81f20e387d5d6ec0..d9230132dfbcc51d1da070769617b1841ee3a248 100644 (file)
@@ -329,20 +329,34 @@ void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, unsigned int event,
        /*
         * Append a LINK to the submitted command buffer to return to
         * the ring buffer.  return_target is the ring target address.
-        * We need three dwords: event, wait, link.
+        * We need at most 7 dwords in the return target: 2 cache flush +
+        * 2 semaphore stall + 1 event + 1 wait + 1 link.
         */
-       return_dwords = 3;
+       return_dwords = 7;
        return_target = etnaviv_buffer_reserve(gpu, buffer, return_dwords);
        CMD_LINK(cmdbuf, return_dwords, return_target);
 
        /*
-        * Append event, wait and link pointing back to the wait
-        * command to the ring buffer.
+        * Append a cache flush, stall, event, wait and link pointing back to
+        * the wait command to the ring buffer.
         */
+       if (gpu->exec_state == ETNA_PIPE_2D) {
+               CMD_LOAD_STATE(buffer, VIVS_GL_FLUSH_CACHE,
+                                      VIVS_GL_FLUSH_CACHE_PE2D);
+       } else {
+               CMD_LOAD_STATE(buffer, VIVS_GL_FLUSH_CACHE,
+                                      VIVS_GL_FLUSH_CACHE_DEPTH |
+                                      VIVS_GL_FLUSH_CACHE_COLOR);
+               CMD_LOAD_STATE(buffer, VIVS_TS_FLUSH_CACHE,
+                                      VIVS_TS_FLUSH_CACHE_FLUSH);
+       }
+       CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
+       CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
        CMD_LOAD_STATE(buffer, VIVS_GL_EVENT, VIVS_GL_EVENT_EVENT_ID(event) |
                       VIVS_GL_EVENT_FROM_PE);
        CMD_WAIT(buffer);
-       CMD_LINK(buffer, 2, return_target + 8);
+       CMD_LINK(buffer, 2, etnaviv_iommu_get_cmdbuf_va(gpu, buffer) +
+                           buffer->user_size - 4);
 
        if (drm_debug & DRM_UT_DRIVER)
                pr_info("stream link to 0x%08x @ 0x%08x %p\n",
index 5ce3603e6eacb1aab7c59ed66399f743d5346990..0370b842d9cc20c2fdae37406c82a9546106e69b 100644 (file)
@@ -748,19 +748,22 @@ static struct page **etnaviv_gem_userptr_do_get_pages(
        int ret = 0, pinned, npages = etnaviv_obj->base.size >> PAGE_SHIFT;
        struct page **pvec;
        uintptr_t ptr;
+       unsigned int flags = 0;
 
        pvec = drm_malloc_ab(npages, sizeof(struct page *));
        if (!pvec)
                return ERR_PTR(-ENOMEM);
 
+       if (!etnaviv_obj->userptr.ro)
+               flags |= FOLL_WRITE;
+
        pinned = 0;
        ptr = etnaviv_obj->userptr.ptr;
 
        down_read(&mm->mmap_sem);
        while (pinned < npages) {
                ret = get_user_pages_remote(task, mm, ptr, npages - pinned,
-                                           !etnaviv_obj->userptr.ro, 0,
-                                           pvec + pinned, NULL);
+                                           flags, pvec + pinned, NULL);
                if (ret < 0)
                        break;
 
index d3796ed8d8c5b2808cd9edba22d5d10a57563917..169ac96e8f0861f9648e0e3ca3292ca1da61556c 100644 (file)
@@ -330,7 +330,8 @@ u32 etnaviv_iommu_get_cmdbuf_va(struct etnaviv_gpu *gpu,
                        return (u32)buf->vram_node.start;
 
                mutex_lock(&mmu->lock);
-               ret = etnaviv_iommu_find_iova(mmu, &buf->vram_node, buf->size);
+               ret = etnaviv_iommu_find_iova(mmu, &buf->vram_node,
+                                             buf->size + SZ_64K);
                if (ret < 0) {
                        mutex_unlock(&mmu->lock);
                        return 0;
index def78c8c1780a90ef8a8354770a3618276a506da..f86e7c8466785caf06c44c625ffd4c44cabae707 100644 (file)
@@ -262,6 +262,26 @@ int exynos_atomic_commit(struct drm_device *dev, struct drm_atomic_state *state,
        return 0;
 }
 
+int exynos_atomic_check(struct drm_device *dev,
+                       struct drm_atomic_state *state)
+{
+       int ret;
+
+       ret = drm_atomic_helper_check_modeset(dev, state);
+       if (ret)
+               return ret;
+
+       ret = drm_atomic_normalize_zpos(dev, state);
+       if (ret)
+               return ret;
+
+       ret = drm_atomic_helper_check_planes(dev, state);
+       if (ret)
+               return ret;
+
+       return ret;
+}
+
 static int exynos_drm_open(struct drm_device *dev, struct drm_file *file)
 {
        struct drm_exynos_file_private *file_priv;
index d215149e737b1d19128740f861fb9d83bb7f3606..80c4d5b81689e5a304cd0cc60438adf5b354ad00 100644 (file)
@@ -301,6 +301,7 @@ static inline int exynos_dpi_bind(struct drm_device *dev,
 
 int exynos_atomic_commit(struct drm_device *dev, struct drm_atomic_state *state,
                         bool nonblock);
+int exynos_atomic_check(struct drm_device *dev, struct drm_atomic_state *state);
 
 
 extern struct platform_driver fimd_driver;
index 40ce841eb9529b2f8ce3f3be8049780b64bd91a2..23cce0a3f5fcc842cd708edb594fea9700feec5f 100644 (file)
@@ -190,7 +190,7 @@ dma_addr_t exynos_drm_fb_dma_addr(struct drm_framebuffer *fb, int index)
 static const struct drm_mode_config_funcs exynos_drm_mode_config_funcs = {
        .fb_create = exynos_user_fb_create,
        .output_poll_changed = exynos_drm_output_poll_changed,
-       .atomic_check = drm_atomic_helper_check,
+       .atomic_check = exynos_atomic_check,
        .atomic_commit = exynos_atomic_commit,
 };
 
index aa92decf4233df4a4f06252a85c60ac3a7cd7a37..fbd13fabdf2daf93aeb79cb976c47e467fae6971 100644 (file)
@@ -488,7 +488,8 @@ static dma_addr_t *g2d_userptr_get_dma_addr(struct drm_device *drm_dev,
                goto err_free;
        }
 
-       ret = get_vaddr_frames(start, npages, true, true, g2d_userptr->vec);
+       ret = get_vaddr_frames(start, npages, FOLL_FORCE | FOLL_WRITE,
+               g2d_userptr->vec);
        if (ret != npages) {
                DRM_ERROR("failed to get user pages from userptr.\n");
                if (ret < 0)
index e8fb6ef947eea388ac0425054c6ce928bf453a5d..38eaa63afb31f61125314893d154ec03171cb3e1 100644 (file)
@@ -1907,6 +1907,8 @@ err_disable_pm_runtime:
 err_hdmiphy:
        if (hdata->hdmiphy_port)
                put_device(&hdata->hdmiphy_port->dev);
+       if (hdata->regs_hdmiphy)
+               iounmap(hdata->regs_hdmiphy);
 err_ddc:
        put_device(&hdata->ddc_adpt->dev);
 
@@ -1929,6 +1931,9 @@ static int hdmi_remove(struct platform_device *pdev)
        if (hdata->hdmiphy_port)
                put_device(&hdata->hdmiphy_port->dev);
 
+       if (hdata->regs_hdmiphy)
+               iounmap(hdata->regs_hdmiphy);
+
        put_device(&hdata->ddc_adpt->dev);
 
        return 0;
index 3371635cd4d707192e39104ed1bc13671fc19c69..deb57435cc89736e5531e411ef12bcbe8f4d35a1 100644 (file)
 static void fsl_dcu_drm_crtc_atomic_flush(struct drm_crtc *crtc,
                                          struct drm_crtc_state *old_crtc_state)
 {
+       struct drm_device *dev = crtc->dev;
+       struct fsl_dcu_drm_device *fsl_dev = dev->dev_private;
        struct drm_pending_vblank_event *event = crtc->state->event;
 
+       regmap_write(fsl_dev->regmap,
+                    DCU_UPDATE_MODE, DCU_UPDATE_MODE_READREG);
+
        if (event) {
                crtc->state->event = NULL;
 
@@ -39,11 +44,15 @@ static void fsl_dcu_drm_crtc_atomic_flush(struct drm_crtc *crtc,
        }
 }
 
-static void fsl_dcu_drm_disable_crtc(struct drm_crtc *crtc)
+static void fsl_dcu_drm_crtc_atomic_disable(struct drm_crtc *crtc,
+                                       struct drm_crtc_state *old_crtc_state)
 {
        struct drm_device *dev = crtc->dev;
        struct fsl_dcu_drm_device *fsl_dev = dev->dev_private;
 
+       /* always disable planes on the CRTC */
+       drm_atomic_helper_disable_planes_on_crtc(old_crtc_state, true);
+
        drm_crtc_vblank_off(crtc);
 
        regmap_update_bits(fsl_dev->regmap, DCU_DCU_MODE,
@@ -51,6 +60,7 @@ static void fsl_dcu_drm_disable_crtc(struct drm_crtc *crtc)
                           DCU_MODE_DCU_MODE(DCU_MODE_OFF));
        regmap_write(fsl_dev->regmap, DCU_UPDATE_MODE,
                     DCU_UPDATE_MODE_READREG);
+       clk_disable_unprepare(fsl_dev->pix_clk);
 }
 
 static void fsl_dcu_drm_crtc_enable(struct drm_crtc *crtc)
@@ -58,6 +68,7 @@ static void fsl_dcu_drm_crtc_enable(struct drm_crtc *crtc)
        struct drm_device *dev = crtc->dev;
        struct fsl_dcu_drm_device *fsl_dev = dev->dev_private;
 
+       clk_prepare_enable(fsl_dev->pix_clk);
        regmap_update_bits(fsl_dev->regmap, DCU_DCU_MODE,
                           DCU_MODE_DCU_MODE_MASK,
                           DCU_MODE_DCU_MODE(DCU_MODE_NORMAL));
@@ -116,14 +127,12 @@ static void fsl_dcu_drm_crtc_mode_set_nofb(struct drm_crtc *crtc)
                     DCU_THRESHOLD_LS_BF_VS(BF_VS_VAL) |
                     DCU_THRESHOLD_OUT_BUF_HIGH(BUF_MAX_VAL) |
                     DCU_THRESHOLD_OUT_BUF_LOW(BUF_MIN_VAL));
-       regmap_write(fsl_dev->regmap, DCU_UPDATE_MODE,
-                    DCU_UPDATE_MODE_READREG);
        return;
 }
 
 static const struct drm_crtc_helper_funcs fsl_dcu_drm_crtc_helper_funcs = {
+       .atomic_disable = fsl_dcu_drm_crtc_atomic_disable,
        .atomic_flush = fsl_dcu_drm_crtc_atomic_flush,
-       .disable = fsl_dcu_drm_disable_crtc,
        .enable = fsl_dcu_drm_crtc_enable,
        .mode_set_nofb = fsl_dcu_drm_crtc_mode_set_nofb,
 };
index 0884c45aefe84a9800b2ec95c57d0f44d1259835..cc2fde2ae5eff272c8f5e15548b61b525a54c6cc 100644 (file)
@@ -59,8 +59,6 @@ static int fsl_dcu_drm_irq_init(struct drm_device *dev)
 
        regmap_write(fsl_dev->regmap, DCU_INT_STATUS, 0);
        regmap_write(fsl_dev->regmap, DCU_INT_MASK, ~0);
-       regmap_write(fsl_dev->regmap, DCU_UPDATE_MODE,
-                    DCU_UPDATE_MODE_READREG);
 
        return ret;
 }
@@ -139,8 +137,6 @@ static irqreturn_t fsl_dcu_drm_irq(int irq, void *arg)
                drm_handle_vblank(dev, 0);
 
        regmap_write(fsl_dev->regmap, DCU_INT_STATUS, int_status);
-       regmap_write(fsl_dev->regmap, DCU_UPDATE_MODE,
-                    DCU_UPDATE_MODE_READREG);
 
        return IRQ_HANDLED;
 }
@@ -267,12 +263,8 @@ static int fsl_dcu_drm_pm_resume(struct device *dev)
                return ret;
        }
 
-       ret = clk_prepare_enable(fsl_dev->pix_clk);
-       if (ret < 0) {
-               dev_err(dev, "failed to enable pix clk\n");
-               goto disable_dcu_clk;
-       }
-
+       if (fsl_dev->tcon)
+               fsl_tcon_bypass_enable(fsl_dev->tcon);
        fsl_dcu_drm_init_planes(fsl_dev->drm);
        drm_atomic_helper_resume(fsl_dev->drm, fsl_dev->state);
 
@@ -284,10 +276,6 @@ static int fsl_dcu_drm_pm_resume(struct device *dev)
        enable_irq(fsl_dev->irq);
 
        return 0;
-
-disable_dcu_clk:
-       clk_disable_unprepare(fsl_dev->clk);
-       return ret;
 }
 #endif
 
@@ -401,18 +389,12 @@ static int fsl_dcu_drm_probe(struct platform_device *pdev)
                goto disable_clk;
        }
 
-       ret = clk_prepare_enable(fsl_dev->pix_clk);
-       if (ret < 0) {
-               dev_err(dev, "failed to enable pix clk\n");
-               goto unregister_pix_clk;
-       }
-
        fsl_dev->tcon = fsl_tcon_init(dev);
 
        drm = drm_dev_alloc(driver, dev);
        if (IS_ERR(drm)) {
                ret = PTR_ERR(drm);
-               goto disable_pix_clk;
+               goto unregister_pix_clk;
        }
 
        fsl_dev->dev = dev;
@@ -433,8 +415,6 @@ static int fsl_dcu_drm_probe(struct platform_device *pdev)
 
 unref:
        drm_dev_unref(drm);
-disable_pix_clk:
-       clk_disable_unprepare(fsl_dev->pix_clk);
 unregister_pix_clk:
        clk_unregister(fsl_dev->pix_clk);
 disable_clk:
@@ -447,7 +427,6 @@ static int fsl_dcu_drm_remove(struct platform_device *pdev)
        struct fsl_dcu_drm_device *fsl_dev = platform_get_drvdata(pdev);
 
        clk_disable_unprepare(fsl_dev->clk);
-       clk_disable_unprepare(fsl_dev->pix_clk);
        clk_unregister(fsl_dev->pix_clk);
        drm_put_dev(fsl_dev->drm);
 
index a7e5486bd1e934be88374df0ce08731f6286ee1a..a99f4884742058ab6820b8f9cd5e086326caabb5 100644 (file)
@@ -160,11 +160,6 @@ static void fsl_dcu_drm_plane_atomic_update(struct drm_plane *plane,
                             DCU_LAYER_POST_SKIP(0) |
                             DCU_LAYER_PRE_SKIP(0));
        }
-       regmap_update_bits(fsl_dev->regmap, DCU_DCU_MODE,
-                          DCU_MODE_DCU_MODE_MASK,
-                          DCU_MODE_DCU_MODE(DCU_MODE_NORMAL));
-       regmap_write(fsl_dev->regmap,
-                    DCU_UPDATE_MODE, DCU_UPDATE_MODE_READREG);
 
        return;
 }
@@ -211,11 +206,6 @@ void fsl_dcu_drm_init_planes(struct drm_device *dev)
                for (j = 1; j <= fsl_dev->soc->layer_regs; j++)
                        regmap_write(fsl_dev->regmap, DCU_CTRLDESCLN(i, j), 0);
        }
-       regmap_update_bits(fsl_dev->regmap, DCU_DCU_MODE,
-                          DCU_MODE_DCU_MODE_MASK,
-                          DCU_MODE_DCU_MODE(DCU_MODE_OFF));
-       regmap_write(fsl_dev->regmap, DCU_UPDATE_MODE,
-                    DCU_UPDATE_MODE_READREG);
 }
 
 struct drm_plane *fsl_dcu_drm_primary_create_plane(struct drm_device *dev)
index 26edcc899712d16db8959cbb2a5b01c932431c0b..e1dd75b181189bbc27f81eb92ad3f308b735aa2e 100644 (file)
 #include "fsl_dcu_drm_drv.h"
 #include "fsl_tcon.h"
 
-static int
-fsl_dcu_drm_encoder_atomic_check(struct drm_encoder *encoder,
-                                struct drm_crtc_state *crtc_state,
-                                struct drm_connector_state *conn_state)
-{
-       return 0;
-}
-
-static void fsl_dcu_drm_encoder_disable(struct drm_encoder *encoder)
-{
-       struct drm_device *dev = encoder->dev;
-       struct fsl_dcu_drm_device *fsl_dev = dev->dev_private;
-
-       if (fsl_dev->tcon)
-               fsl_tcon_bypass_disable(fsl_dev->tcon);
-}
-
-static void fsl_dcu_drm_encoder_enable(struct drm_encoder *encoder)
-{
-       struct drm_device *dev = encoder->dev;
-       struct fsl_dcu_drm_device *fsl_dev = dev->dev_private;
-
-       if (fsl_dev->tcon)
-               fsl_tcon_bypass_enable(fsl_dev->tcon);
-}
-
-static const struct drm_encoder_helper_funcs encoder_helper_funcs = {
-       .atomic_check = fsl_dcu_drm_encoder_atomic_check,
-       .disable = fsl_dcu_drm_encoder_disable,
-       .enable = fsl_dcu_drm_encoder_enable,
-};
-
 static void fsl_dcu_drm_encoder_destroy(struct drm_encoder *encoder)
 {
        drm_encoder_cleanup(encoder);
@@ -68,13 +36,16 @@ int fsl_dcu_drm_encoder_create(struct fsl_dcu_drm_device *fsl_dev,
        int ret;
 
        encoder->possible_crtcs = 1;
+
+       /* Use bypass mode for parallel RGB/LVDS encoder */
+       if (fsl_dev->tcon)
+               fsl_tcon_bypass_enable(fsl_dev->tcon);
+
        ret = drm_encoder_init(fsl_dev->drm, encoder, &encoder_funcs,
                               DRM_MODE_ENCODER_LVDS, NULL);
        if (ret < 0)
                return ret;
 
-       drm_encoder_helper_add(encoder, &encoder_helper_funcs);
-
        return 0;
 }
 
index bfb2efd8d4d44e996d6af1d75299f12fab534c92..18dfdd5c1b3b1ba5fc8b9c660b37c036d5ea87dd 100644 (file)
@@ -1447,8 +1447,6 @@ static int i915_drm_suspend(struct drm_device *dev)
 
        dev_priv->suspend_count++;
 
-       intel_display_set_init_power(dev_priv, false);
-
        intel_csr_ucode_suspend(dev_priv);
 
 out:
@@ -1466,6 +1464,8 @@ static int i915_drm_suspend_late(struct drm_device *dev, bool hibernation)
 
        disable_rpm_wakeref_asserts(dev_priv);
 
+       intel_display_set_init_power(dev_priv, false);
+
        fw_csr = !IS_BROXTON(dev_priv) &&
                suspend_to_idle(dev_priv) && dev_priv->csr.dmc_payload;
        /*
index 8b9ee4e390c0a1fac6d6449c2ceb2f4f51ca3364..685e9e065287983a50b82aa02faa9822d74d1582 100644 (file)
@@ -2883,6 +2883,11 @@ __i915_printk(struct drm_i915_private *dev_priv, const char *level,
 extern long i915_compat_ioctl(struct file *filp, unsigned int cmd,
                              unsigned long arg);
 #endif
+extern const struct dev_pm_ops i915_pm_ops;
+
+extern int i915_driver_load(struct pci_dev *pdev,
+                           const struct pci_device_id *ent);
+extern void i915_driver_unload(struct drm_device *dev);
 extern int intel_gpu_reset(struct drm_i915_private *dev_priv, u32 engine_mask);
 extern bool intel_has_gpu_reset(struct drm_i915_private *dev_priv);
 extern void i915_reset(struct drm_i915_private *dev_priv);
index 947e82c2b1757993e6b5fff2e6fdf29d4f584ffa..91ab7e9d6d2ead0827c0b452f119062d8be50a0a 100644 (file)
@@ -1806,7 +1806,7 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
                /* Use a partial view if it is bigger than available space */
                chunk_size = MIN_CHUNK_PAGES;
                if (i915_gem_object_is_tiled(obj))
-                       chunk_size = max(chunk_size, tile_row_pages(obj));
+                       chunk_size = roundup(chunk_size, tile_row_pages(obj));
 
                memset(&view, 0, sizeof(view));
                view.type = I915_GGTT_VIEW_PARTIAL;
@@ -3543,15 +3543,27 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
        if (view->type == I915_GGTT_VIEW_NORMAL)
                vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
                                               PIN_MAPPABLE | PIN_NONBLOCK);
-       if (IS_ERR(vma))
-               vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, 0);
+       if (IS_ERR(vma)) {
+               struct drm_i915_private *i915 = to_i915(obj->base.dev);
+               unsigned int flags;
+
+               /* Valleyview is definitely limited to scanning out the first
+                * 512MiB. Lets presume this behaviour was inherited from the
+                * g4x display engine and that all earlier gen are similarly
+                * limited. Testing suggests that it is a little more
+                * complicated than this. For example, Cherryview appears quite
+                * happy to scanout from anywhere within its global aperture.
+                */
+               flags = 0;
+               if (HAS_GMCH_DISPLAY(i915))
+                       flags = PIN_MAPPABLE;
+               vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags);
+       }
        if (IS_ERR(vma))
                goto err_unpin_display;
 
        vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
 
-       WARN_ON(obj->pin_display > i915_vma_pin_count(vma));
-
        i915_gem_object_flush_cpu_write_domain(obj);
 
        old_write_domain = obj->base.write_domain;
@@ -3588,7 +3600,6 @@ i915_gem_object_unpin_from_display_plane(struct i915_vma *vma)
                list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
 
        i915_vma_unpin(vma);
-       WARN_ON(vma->obj->pin_display > i915_vma_pin_count(vma));
 }
 
 /**
@@ -3745,7 +3756,12 @@ void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
        mappable = (vma->node.start + fence_size <=
                    dev_priv->ggtt.mappable_end);
 
-       if (mappable && fenceable)
+       /*
+        * Explicitly disable for rotated VMA since the display does not
+        * need the fence and the VMA is not accessible to other users.
+        */
+       if (mappable && fenceable &&
+           vma->ggtt_view.type != I915_GGTT_VIEW_ROTATED)
                vma->flags |= I915_VMA_CAN_FENCE;
        else
                vma->flags &= ~I915_VMA_CAN_FENCE;
index 7adb4c77cc7f449698f0207d5f51cbf037cff179..a218c2e395e759e9e3b9c367324311550f4db0a8 100644 (file)
@@ -1281,6 +1281,12 @@ i915_gem_validate_context(struct drm_device *dev, struct drm_file *file,
        return ctx;
 }
 
+static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj)
+{
+       return !(obj->cache_level == I915_CACHE_NONE ||
+                obj->cache_level == I915_CACHE_WT);
+}
+
 void i915_vma_move_to_active(struct i915_vma *vma,
                             struct drm_i915_gem_request *req,
                             unsigned int flags)
@@ -1311,6 +1317,8 @@ void i915_vma_move_to_active(struct i915_vma *vma,
 
                /* update for the implicit flush after a batch */
                obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
+               if (!obj->cache_dirty && gpu_write_needs_clflush(obj))
+                       obj->cache_dirty = true;
        }
 
        if (flags & EXEC_OBJECT_NEEDS_FENCE)
index 8df1fa7234e8e031e9c61da4608c5c4b7610476b..2c7ba0ee127c6a230eff6bf7deff705a1259364b 100644 (file)
@@ -290,6 +290,8 @@ i915_vma_put_fence(struct i915_vma *vma)
 {
        struct drm_i915_fence_reg *fence = vma->fence;
 
+       assert_rpm_wakelock_held(to_i915(vma->vm->dev));
+
        if (!fence)
                return 0;
 
@@ -341,6 +343,8 @@ i915_vma_get_fence(struct i915_vma *vma)
        struct drm_i915_fence_reg *fence;
        struct i915_vma *set = i915_gem_object_is_tiled(vma->obj) ? vma : NULL;
 
+       assert_rpm_wakelock_held(to_i915(vma->vm->dev));
+
        /* Just update our place in the LRU if our fence is getting reused. */
        if (vma->fence) {
                fence = vma->fence;
@@ -371,6 +375,12 @@ void i915_gem_restore_fences(struct drm_device *dev)
        struct drm_i915_private *dev_priv = to_i915(dev);
        int i;
 
+       /* Note that this may be called outside of struct_mutex, by
+        * runtime suspend/resume. The barrier we require is enforced by
+        * rpm itself - all access to fences/GTT are only within an rpm
+        * wakeref, and to acquire that wakeref you must pass through here.
+        */
+
        for (i = 0; i < dev_priv->num_fence_regs; i++) {
                struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
                struct i915_vma *vma = reg->vma;
@@ -379,10 +389,17 @@ void i915_gem_restore_fences(struct drm_device *dev)
                 * Commit delayed tiling changes if we have an object still
                 * attached to the fence, otherwise just clear the fence.
                 */
-               if (vma && !i915_gem_object_is_tiled(vma->obj))
+               if (vma && !i915_gem_object_is_tiled(vma->obj)) {
+                       GEM_BUG_ON(!reg->dirty);
+                       GEM_BUG_ON(vma->obj->fault_mappable);
+
+                       list_move(&reg->link, &dev_priv->mm.fence_list);
+                       vma->fence = NULL;
                        vma = NULL;
+               }
 
-               fence_update(reg, vma);
+               fence_write(reg, vma);
+               reg->vma = vma;
        }
 }
 
index e537930c64b53d5a18ebbf7fcb79be68f0114acc..c6f780f5abc9cf3776edcfa67a6fa08a6b53239a 100644 (file)
@@ -508,6 +508,10 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
        pvec = drm_malloc_gfp(npages, sizeof(struct page *), GFP_TEMPORARY);
        if (pvec != NULL) {
                struct mm_struct *mm = obj->userptr.mm->mm;
+               unsigned int flags = 0;
+
+               if (!obj->userptr.read_only)
+                       flags |= FOLL_WRITE;
 
                ret = -EFAULT;
                if (atomic_inc_not_zero(&mm->mm_users)) {
@@ -517,7 +521,7 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
                                        (work->task, mm,
                                         obj->userptr.ptr + pinned * PAGE_SIZE,
                                         npages - pinned,
-                                        !obj->userptr.read_only, 0,
+                                        flags,
                                         pvec + pinned, NULL);
                                if (ret < 0)
                                        break;
index 687c768833b3e4e3d0ef87f4604a5838121d9fdd..31e6edd08dd0525ce9b4477df9e77a3c1ed0c2d5 100644 (file)
@@ -431,9 +431,6 @@ static const struct pci_device_id pciidlist[] = {
 };
 MODULE_DEVICE_TABLE(pci, pciidlist);
 
-extern int i915_driver_load(struct pci_dev *pdev,
-                           const struct pci_device_id *ent);
-
 static int i915_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
        struct intel_device_info *intel_info =
@@ -463,8 +460,6 @@ static int i915_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        return i915_driver_load(pdev, ent);
 }
 
-extern void i915_driver_unload(struct drm_device *dev);
-
 static void i915_pci_remove(struct pci_dev *pdev)
 {
        struct drm_device *dev = pci_get_drvdata(pdev);
@@ -473,8 +468,6 @@ static void i915_pci_remove(struct pci_dev *pdev)
        drm_dev_unref(dev);
 }
 
-extern const struct dev_pm_ops i915_pm_ops;
-
 static struct pci_driver i915_pci_driver = {
        .name = DRIVER_NAME,
        .id_table = pciidlist,
index c6e69e4cfa8314a051277797bed0ec1fbcceb3ea..cf2560708e031d5957442e60783377ebe4106e11 100644 (file)
@@ -1031,6 +1031,77 @@ static u8 translate_iboost(u8 val)
        return mapping[val];
 }
 
+static void sanitize_ddc_pin(struct drm_i915_private *dev_priv,
+                            enum port port)
+{
+       const struct ddi_vbt_port_info *info =
+               &dev_priv->vbt.ddi_port_info[port];
+       enum port p;
+
+       if (!info->alternate_ddc_pin)
+               return;
+
+       for_each_port_masked(p, (1 << port) - 1) {
+               struct ddi_vbt_port_info *i = &dev_priv->vbt.ddi_port_info[p];
+
+               if (info->alternate_ddc_pin != i->alternate_ddc_pin)
+                       continue;
+
+               DRM_DEBUG_KMS("port %c trying to use the same DDC pin (0x%x) as port %c, "
+                             "disabling port %c DVI/HDMI support\n",
+                             port_name(p), i->alternate_ddc_pin,
+                             port_name(port), port_name(p));
+
+               /*
+                * If we have multiple ports supposedly sharing the
+                * pin, then dvi/hdmi couldn't exist on the shared
+                * port. Otherwise they share the same ddc bin and
+                * system couldn't communicate with them separately.
+                *
+                * Due to parsing the ports in alphabetical order,
+                * a higher port will always clobber a lower one.
+                */
+               i->supports_dvi = false;
+               i->supports_hdmi = false;
+               i->alternate_ddc_pin = 0;
+       }
+}
+
+static void sanitize_aux_ch(struct drm_i915_private *dev_priv,
+                           enum port port)
+{
+       const struct ddi_vbt_port_info *info =
+               &dev_priv->vbt.ddi_port_info[port];
+       enum port p;
+
+       if (!info->alternate_aux_channel)
+               return;
+
+       for_each_port_masked(p, (1 << port) - 1) {
+               struct ddi_vbt_port_info *i = &dev_priv->vbt.ddi_port_info[p];
+
+               if (info->alternate_aux_channel != i->alternate_aux_channel)
+                       continue;
+
+               DRM_DEBUG_KMS("port %c trying to use the same AUX CH (0x%x) as port %c, "
+                             "disabling port %c DP support\n",
+                             port_name(p), i->alternate_aux_channel,
+                             port_name(port), port_name(p));
+
+               /*
+                * If we have multiple ports supposedlt sharing the
+                * aux channel, then DP couldn't exist on the shared
+                * port. Otherwise they share the same aux channel
+                * and system couldn't communicate with them separately.
+                *
+                * Due to parsing the ports in alphabetical order,
+                * a higher port will always clobber a lower one.
+                */
+               i->supports_dp = false;
+               i->alternate_aux_channel = 0;
+       }
+}
+
 static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port,
                           const struct bdb_header *bdb)
 {
@@ -1072,7 +1143,7 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port,
        if (!child)
                return;
 
-       aux_channel = child->raw[25];
+       aux_channel = child->common.aux_channel;
        ddc_pin = child->common.ddc_pin;
 
        is_dvi = child->common.device_type & DEVICE_TYPE_TMDS_DVI_SIGNALING;
@@ -1105,54 +1176,15 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port,
                DRM_DEBUG_KMS("Port %c is internal DP\n", port_name(port));
 
        if (is_dvi) {
-               if (port == PORT_E) {
-                       info->alternate_ddc_pin = ddc_pin;
-                       /* if DDIE share ddc pin with other port, then
-                        * dvi/hdmi couldn't exist on the shared port.
-                        * Otherwise they share the same ddc bin and system
-                        * couldn't communicate with them seperately. */
-                       if (ddc_pin == DDC_PIN_B) {
-                               dev_priv->vbt.ddi_port_info[PORT_B].supports_dvi = 0;
-                               dev_priv->vbt.ddi_port_info[PORT_B].supports_hdmi = 0;
-                       } else if (ddc_pin == DDC_PIN_C) {
-                               dev_priv->vbt.ddi_port_info[PORT_C].supports_dvi = 0;
-                               dev_priv->vbt.ddi_port_info[PORT_C].supports_hdmi = 0;
-                       } else if (ddc_pin == DDC_PIN_D) {
-                               dev_priv->vbt.ddi_port_info[PORT_D].supports_dvi = 0;
-                               dev_priv->vbt.ddi_port_info[PORT_D].supports_hdmi = 0;
-                       }
-               } else if (ddc_pin == DDC_PIN_B && port != PORT_B)
-                       DRM_DEBUG_KMS("Unexpected DDC pin for port B\n");
-               else if (ddc_pin == DDC_PIN_C && port != PORT_C)
-                       DRM_DEBUG_KMS("Unexpected DDC pin for port C\n");
-               else if (ddc_pin == DDC_PIN_D && port != PORT_D)
-                       DRM_DEBUG_KMS("Unexpected DDC pin for port D\n");
+               info->alternate_ddc_pin = ddc_pin;
+
+               sanitize_ddc_pin(dev_priv, port);
        }
 
        if (is_dp) {
-               if (port == PORT_E) {
-                       info->alternate_aux_channel = aux_channel;
-                       /* if DDIE share aux channel with other port, then
-                        * DP couldn't exist on the shared port. Otherwise
-                        * they share the same aux channel and system
-                        * couldn't communicate with them seperately. */
-                       if (aux_channel == DP_AUX_A)
-                               dev_priv->vbt.ddi_port_info[PORT_A].supports_dp = 0;
-                       else if (aux_channel == DP_AUX_B)
-                               dev_priv->vbt.ddi_port_info[PORT_B].supports_dp = 0;
-                       else if (aux_channel == DP_AUX_C)
-                               dev_priv->vbt.ddi_port_info[PORT_C].supports_dp = 0;
-                       else if (aux_channel == DP_AUX_D)
-                               dev_priv->vbt.ddi_port_info[PORT_D].supports_dp = 0;
-               }
-               else if (aux_channel == DP_AUX_A && port != PORT_A)
-                       DRM_DEBUG_KMS("Unexpected AUX channel for port A\n");
-               else if (aux_channel == DP_AUX_B && port != PORT_B)
-                       DRM_DEBUG_KMS("Unexpected AUX channel for port B\n");
-               else if (aux_channel == DP_AUX_C && port != PORT_C)
-                       DRM_DEBUG_KMS("Unexpected AUX channel for port C\n");
-               else if (aux_channel == DP_AUX_D && port != PORT_D)
-                       DRM_DEBUG_KMS("Unexpected AUX channel for port D\n");
+               info->alternate_aux_channel = aux_channel;
+
+               sanitize_aux_ch(dev_priv, port);
        }
 
        if (bdb->version >= 158) {
@@ -1641,7 +1673,8 @@ bool intel_bios_is_port_edp(struct drm_i915_private *dev_priv, enum port port)
        return false;
 }
 
-bool intel_bios_is_port_dp_dual_mode(struct drm_i915_private *dev_priv, enum port port)
+static bool child_dev_is_dp_dual_mode(const union child_device_config *p_child,
+                                     enum port port)
 {
        static const struct {
                u16 dp, hdmi;
@@ -1655,22 +1688,35 @@ bool intel_bios_is_port_dp_dual_mode(struct drm_i915_private *dev_priv, enum por
                [PORT_D] = { DVO_PORT_DPD, DVO_PORT_HDMID, },
                [PORT_E] = { DVO_PORT_DPE, DVO_PORT_HDMIE, },
        };
-       int i;
 
        if (port == PORT_A || port >= ARRAY_SIZE(port_mapping))
                return false;
 
-       if (!dev_priv->vbt.child_dev_num)
+       if ((p_child->common.device_type & DEVICE_TYPE_DP_DUAL_MODE_BITS) !=
+           (DEVICE_TYPE_DP_DUAL_MODE & DEVICE_TYPE_DP_DUAL_MODE_BITS))
                return false;
 
+       if (p_child->common.dvo_port == port_mapping[port].dp)
+               return true;
+
+       /* Only accept a HDMI dvo_port as DP++ if it has an AUX channel */
+       if (p_child->common.dvo_port == port_mapping[port].hdmi &&
+           p_child->common.aux_channel != 0)
+               return true;
+
+       return false;
+}
+
+bool intel_bios_is_port_dp_dual_mode(struct drm_i915_private *dev_priv,
+                                    enum port port)
+{
+       int i;
+
        for (i = 0; i < dev_priv->vbt.child_dev_num; i++) {
                const union child_device_config *p_child =
                        &dev_priv->vbt.child_dev[i];
 
-               if ((p_child->common.dvo_port == port_mapping[port].dp ||
-                    p_child->common.dvo_port == port_mapping[port].hdmi) &&
-                   (p_child->common.device_type & DEVICE_TYPE_DP_DUAL_MODE_BITS) ==
-                   (DEVICE_TYPE_DP_DUAL_MODE & DEVICE_TYPE_DP_DUAL_MODE_BITS))
+               if (child_dev_is_dp_dual_mode(p_child, port))
                        return true;
        }
 
index 73b6858600acf56b30ef75e62c9a63804ed305e3..1b20e160bc1f68f0819cc4875c23d00a117d51d2 100644 (file)
@@ -192,7 +192,7 @@ static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv)
        struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu;
        const int s_max = 3, ss_max = 3, eu_max = 8;
        int s, ss;
-       u32 fuse2, eu_disable[s_max];
+       u32 fuse2, eu_disable[3]; /* s_max */
 
        fuse2 = I915_READ(GEN8_FUSE2);
        sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT;
index fbcfed63a76e16ec59f465c96e9b5c7c8d37ffe2..81c11499bcf059356669cf77778bb53ae17bb0e6 100644 (file)
@@ -2978,7 +2978,8 @@ int skl_check_plane_surface(struct intel_plane_state *plane_state)
        /* Rotate src coordinates to match rotated GTT view */
        if (intel_rotation_90_or_270(rotation))
                drm_rect_rotate(&plane_state->base.src,
-                               fb->width, fb->height, DRM_ROTATE_270);
+                               fb->width << 16, fb->height << 16,
+                               DRM_ROTATE_270);
 
        /*
         * Handle the AUX surface first since
@@ -10242,6 +10243,29 @@ static void bxt_modeset_commit_cdclk(struct drm_atomic_state *old_state)
        bxt_set_cdclk(to_i915(dev), req_cdclk);
 }
 
+static int bdw_adjust_min_pipe_pixel_rate(struct intel_crtc_state *crtc_state,
+                                         int pixel_rate)
+{
+       struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
+
+       /* pixel rate mustn't exceed 95% of cdclk with IPS on BDW */
+       if (IS_BROADWELL(dev_priv) && crtc_state->ips_enabled)
+               pixel_rate = DIV_ROUND_UP(pixel_rate * 100, 95);
+
+       /* BSpec says "Do not use DisplayPort with CDCLK less than
+        * 432 MHz, audio enabled, port width x4, and link rate
+        * HBR2 (5.4 GHz), or else there may be audio corruption or
+        * screen corruption."
+        */
+       if (intel_crtc_has_dp_encoder(crtc_state) &&
+           crtc_state->has_audio &&
+           crtc_state->port_clock >= 540000 &&
+           crtc_state->lane_count == 4)
+               pixel_rate = max(432000, pixel_rate);
+
+       return pixel_rate;
+}
+
 /* compute the max rate for new configuration */
 static int ilk_max_pixel_rate(struct drm_atomic_state *state)
 {
@@ -10267,9 +10291,9 @@ static int ilk_max_pixel_rate(struct drm_atomic_state *state)
 
                pixel_rate = ilk_pipe_pixel_rate(crtc_state);
 
-               /* pixel rate mustn't exceed 95% of cdclk with IPS on BDW */
-               if (IS_BROADWELL(dev_priv) && crtc_state->ips_enabled)
-                       pixel_rate = DIV_ROUND_UP(pixel_rate * 100, 95);
+               if (IS_BROADWELL(dev_priv) || IS_GEN9(dev_priv))
+                       pixel_rate = bdw_adjust_min_pipe_pixel_rate(crtc_state,
+                                                                   pixel_rate);
 
                intel_state->min_pixclk[i] = pixel_rate;
        }
@@ -14310,7 +14334,7 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state)
 
        for_each_plane_in_state(state, plane, plane_state, i) {
                struct intel_plane_state *intel_plane_state =
-                       to_intel_plane_state(plane_state);
+                       to_intel_plane_state(plane->state);
 
                if (!intel_plane_state->wait_req)
                        continue;
index 14a3cf0b72133a2734cb98059fbdd524578dc392..bf344d08356a2b48a77726fa8bf749c55eb2651f 100644 (file)
@@ -1108,6 +1108,44 @@ intel_dp_aux_transfer(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg)
        return ret;
 }
 
+static enum port intel_aux_port(struct drm_i915_private *dev_priv,
+                               enum port port)
+{
+       const struct ddi_vbt_port_info *info =
+               &dev_priv->vbt.ddi_port_info[port];
+       enum port aux_port;
+
+       if (!info->alternate_aux_channel) {
+               DRM_DEBUG_KMS("using AUX %c for port %c (platform default)\n",
+                             port_name(port), port_name(port));
+               return port;
+       }
+
+       switch (info->alternate_aux_channel) {
+       case DP_AUX_A:
+               aux_port = PORT_A;
+               break;
+       case DP_AUX_B:
+               aux_port = PORT_B;
+               break;
+       case DP_AUX_C:
+               aux_port = PORT_C;
+               break;
+       case DP_AUX_D:
+               aux_port = PORT_D;
+               break;
+       default:
+               MISSING_CASE(info->alternate_aux_channel);
+               aux_port = PORT_A;
+               break;
+       }
+
+       DRM_DEBUG_KMS("using AUX %c for port %c (VBT)\n",
+                     port_name(aux_port), port_name(port));
+
+       return aux_port;
+}
+
 static i915_reg_t g4x_aux_ctl_reg(struct drm_i915_private *dev_priv,
                                       enum port port)
 {
@@ -1168,36 +1206,9 @@ static i915_reg_t ilk_aux_data_reg(struct drm_i915_private *dev_priv,
        }
 }
 
-/*
- * On SKL we don't have Aux for port E so we rely
- * on VBT to set a proper alternate aux channel.
- */
-static enum port skl_porte_aux_port(struct drm_i915_private *dev_priv)
-{
-       const struct ddi_vbt_port_info *info =
-               &dev_priv->vbt.ddi_port_info[PORT_E];
-
-       switch (info->alternate_aux_channel) {
-       case DP_AUX_A:
-               return PORT_A;
-       case DP_AUX_B:
-               return PORT_B;
-       case DP_AUX_C:
-               return PORT_C;
-       case DP_AUX_D:
-               return PORT_D;
-       default:
-               MISSING_CASE(info->alternate_aux_channel);
-               return PORT_A;
-       }
-}
-
 static i915_reg_t skl_aux_ctl_reg(struct drm_i915_private *dev_priv,
                                       enum port port)
 {
-       if (port == PORT_E)
-               port = skl_porte_aux_port(dev_priv);
-
        switch (port) {
        case PORT_A:
        case PORT_B:
@@ -1213,9 +1224,6 @@ static i915_reg_t skl_aux_ctl_reg(struct drm_i915_private *dev_priv,
 static i915_reg_t skl_aux_data_reg(struct drm_i915_private *dev_priv,
                                        enum port port, int index)
 {
-       if (port == PORT_E)
-               port = skl_porte_aux_port(dev_priv);
-
        switch (port) {
        case PORT_A:
        case PORT_B:
@@ -1253,7 +1261,8 @@ static i915_reg_t intel_aux_data_reg(struct drm_i915_private *dev_priv,
 static void intel_aux_reg_init(struct intel_dp *intel_dp)
 {
        struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp));
-       enum port port = dp_to_dig_port(intel_dp)->port;
+       enum port port = intel_aux_port(dev_priv,
+                                       dp_to_dig_port(intel_dp)->port);
        int i;
 
        intel_dp->aux_ch_ctl_reg = intel_aux_ctl_reg(dev_priv, port);
@@ -3551,8 +3560,8 @@ intel_edp_init_dpcd(struct intel_dp *intel_dp)
        /* Read the eDP Display control capabilities registers */
        if ((intel_dp->dpcd[DP_EDP_CONFIGURATION_CAP] & DP_DPCD_DISPLAY_CONTROL_CAPABLE) &&
            drm_dp_dpcd_read(&intel_dp->aux, DP_EDP_DPCD_REV,
-                            intel_dp->edp_dpcd, sizeof(intel_dp->edp_dpcd) ==
-                            sizeof(intel_dp->edp_dpcd)))
+                            intel_dp->edp_dpcd, sizeof(intel_dp->edp_dpcd)) ==
+                            sizeof(intel_dp->edp_dpcd))
                DRM_DEBUG_KMS("EDP DPCD : %*ph\n", (int) sizeof(intel_dp->edp_dpcd),
                              intel_dp->edp_dpcd);
 
@@ -4454,21 +4463,11 @@ static enum drm_connector_status
 intel_dp_detect(struct drm_connector *connector, bool force)
 {
        struct intel_dp *intel_dp = intel_attached_dp(connector);
-       struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
-       struct intel_encoder *intel_encoder = &intel_dig_port->base;
        enum drm_connector_status status = connector->status;
 
        DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n",
                      connector->base.id, connector->name);
 
-       if (intel_dp->is_mst) {
-               /* MST devices are disconnected from a monitor POV */
-               intel_dp_unset_edid(intel_dp);
-               if (intel_encoder->type != INTEL_OUTPUT_EDP)
-                       intel_encoder->type = INTEL_OUTPUT_DP;
-               return connector_status_disconnected;
-       }
-
        /* If full detect is not performed yet, do a full detect */
        if (!intel_dp->detect_done)
                status = intel_dp_long_pulse(intel_dp->attached_connector);
index faa67624e1ed734b80b0c0b9734b63c173d9389c..c43dd9abce790cf797804bde73781363c396d91d 100644 (file)
@@ -104,8 +104,10 @@ static int intel_fbc_calculate_cfb_size(struct drm_i915_private *dev_priv,
        int lines;
 
        intel_fbc_get_plane_source_size(cache, NULL, &lines);
-       if (INTEL_INFO(dev_priv)->gen >= 7)
+       if (INTEL_GEN(dev_priv) == 7)
                lines = min(lines, 2048);
+       else if (INTEL_GEN(dev_priv) >= 8)
+               lines = min(lines, 2560);
 
        /* Hardware needs the full buffer stride, not just the active area. */
        return lines * cache->fb.stride;
index f40a35f2913a8222f2800bfa081b3adb1c636288..13c306173f27b9be7dde7e831d1dca942b59a4e0 100644 (file)
@@ -1799,6 +1799,50 @@ intel_hdmi_add_properties(struct intel_hdmi *intel_hdmi, struct drm_connector *c
        intel_hdmi->aspect_ratio = HDMI_PICTURE_ASPECT_NONE;
 }
 
+static u8 intel_hdmi_ddc_pin(struct drm_i915_private *dev_priv,
+                            enum port port)
+{
+       const struct ddi_vbt_port_info *info =
+               &dev_priv->vbt.ddi_port_info[port];
+       u8 ddc_pin;
+
+       if (info->alternate_ddc_pin) {
+               DRM_DEBUG_KMS("Using DDC pin 0x%x for port %c (VBT)\n",
+                             info->alternate_ddc_pin, port_name(port));
+               return info->alternate_ddc_pin;
+       }
+
+       switch (port) {
+       case PORT_B:
+               if (IS_BROXTON(dev_priv))
+                       ddc_pin = GMBUS_PIN_1_BXT;
+               else
+                       ddc_pin = GMBUS_PIN_DPB;
+               break;
+       case PORT_C:
+               if (IS_BROXTON(dev_priv))
+                       ddc_pin = GMBUS_PIN_2_BXT;
+               else
+                       ddc_pin = GMBUS_PIN_DPC;
+               break;
+       case PORT_D:
+               if (IS_CHERRYVIEW(dev_priv))
+                       ddc_pin = GMBUS_PIN_DPD_CHV;
+               else
+                       ddc_pin = GMBUS_PIN_DPD;
+               break;
+       default:
+               MISSING_CASE(port);
+               ddc_pin = GMBUS_PIN_DPB;
+               break;
+       }
+
+       DRM_DEBUG_KMS("Using DDC pin 0x%x for port %c (platform default)\n",
+                     ddc_pin, port_name(port));
+
+       return ddc_pin;
+}
+
 void intel_hdmi_init_connector(struct intel_digital_port *intel_dig_port,
                               struct intel_connector *intel_connector)
 {
@@ -1808,7 +1852,6 @@ void intel_hdmi_init_connector(struct intel_digital_port *intel_dig_port,
        struct drm_device *dev = intel_encoder->base.dev;
        struct drm_i915_private *dev_priv = to_i915(dev);
        enum port port = intel_dig_port->port;
-       uint8_t alternate_ddc_pin;
 
        DRM_DEBUG_KMS("Adding HDMI connector on port %c\n",
                      port_name(port));
@@ -1826,12 +1869,10 @@ void intel_hdmi_init_connector(struct intel_digital_port *intel_dig_port,
        connector->doublescan_allowed = 0;
        connector->stereo_allowed = 1;
 
+       intel_hdmi->ddc_bus = intel_hdmi_ddc_pin(dev_priv, port);
+
        switch (port) {
        case PORT_B:
-               if (IS_BROXTON(dev_priv))
-                       intel_hdmi->ddc_bus = GMBUS_PIN_1_BXT;
-               else
-                       intel_hdmi->ddc_bus = GMBUS_PIN_DPB;
                /*
                 * On BXT A0/A1, sw needs to activate DDIA HPD logic and
                 * interrupts to check the external panel connection.
@@ -1842,46 +1883,17 @@ void intel_hdmi_init_connector(struct intel_digital_port *intel_dig_port,
                        intel_encoder->hpd_pin = HPD_PORT_B;
                break;
        case PORT_C:
-               if (IS_BROXTON(dev_priv))
-                       intel_hdmi->ddc_bus = GMBUS_PIN_2_BXT;
-               else
-                       intel_hdmi->ddc_bus = GMBUS_PIN_DPC;
                intel_encoder->hpd_pin = HPD_PORT_C;
                break;
        case PORT_D:
-               if (WARN_ON(IS_BROXTON(dev_priv)))
-                       intel_hdmi->ddc_bus = GMBUS_PIN_DISABLED;
-               else if (IS_CHERRYVIEW(dev_priv))
-                       intel_hdmi->ddc_bus = GMBUS_PIN_DPD_CHV;
-               else
-                       intel_hdmi->ddc_bus = GMBUS_PIN_DPD;
                intel_encoder->hpd_pin = HPD_PORT_D;
                break;
        case PORT_E:
-               /* On SKL PORT E doesn't have seperate GMBUS pin
-                *  We rely on VBT to set a proper alternate GMBUS pin. */
-               alternate_ddc_pin =
-                       dev_priv->vbt.ddi_port_info[PORT_E].alternate_ddc_pin;
-               switch (alternate_ddc_pin) {
-               case DDC_PIN_B:
-                       intel_hdmi->ddc_bus = GMBUS_PIN_DPB;
-                       break;
-               case DDC_PIN_C:
-                       intel_hdmi->ddc_bus = GMBUS_PIN_DPC;
-                       break;
-               case DDC_PIN_D:
-                       intel_hdmi->ddc_bus = GMBUS_PIN_DPD;
-                       break;
-               default:
-                       MISSING_CASE(alternate_ddc_pin);
-               }
                intel_encoder->hpd_pin = HPD_PORT_E;
                break;
-       case PORT_A:
-               intel_encoder->hpd_pin = HPD_PORT_A;
-               /* Internal port only for eDP. */
        default:
-               BUG();
+               MISSING_CASE(port);
+               return;
        }
 
        if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) {
index a2f751cd187a2fe2d8552758b71326439aa205de..db24f898853cbbbc98d2043b9a511956dd0bc1bd 100644 (file)
@@ -3362,13 +3362,15 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
        int num_active;
        int id, i;
 
+       /* Clear the partitioning for disabled planes. */
+       memset(ddb->plane[pipe], 0, sizeof(ddb->plane[pipe]));
+       memset(ddb->y_plane[pipe], 0, sizeof(ddb->y_plane[pipe]));
+
        if (WARN_ON(!state))
                return 0;
 
        if (!cstate->base.active) {
                ddb->pipe[pipe].start = ddb->pipe[pipe].end = 0;
-               memset(ddb->plane[pipe], 0, sizeof(ddb->plane[pipe]));
-               memset(ddb->y_plane[pipe], 0, sizeof(ddb->y_plane[pipe]));
                return 0;
        }
 
@@ -3468,12 +3470,6 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate,
        return 0;
 }
 
-static uint32_t skl_pipe_pixel_rate(const struct intel_crtc_state *config)
-{
-       /* TODO: Take into account the scalers once we support them */
-       return config->base.adjusted_mode.crtc_clock;
-}
-
 /*
  * The max latency should be 257 (max the punit can code is 255 and we add 2us
  * for the read latency) and cpp should always be <= 8, so that
@@ -3524,7 +3520,7 @@ static uint32_t skl_adjusted_plane_pixel_rate(const struct intel_crtc_state *cst
         * Adjusted plane pixel rate is just the pipe's adjusted pixel rate
         * with additional adjustments for plane-specific scaling.
         */
-       adjusted_pixel_rate = skl_pipe_pixel_rate(cstate);
+       adjusted_pixel_rate = ilk_pipe_pixel_rate(cstate);
        downscale_amount = skl_plane_downscale_amount(pstate);
 
        pixel_rate = adjusted_pixel_rate * downscale_amount >> 16;
@@ -3736,11 +3732,11 @@ skl_compute_linetime_wm(struct intel_crtc_state *cstate)
        if (!cstate->base.active)
                return 0;
 
-       if (WARN_ON(skl_pipe_pixel_rate(cstate) == 0))
+       if (WARN_ON(ilk_pipe_pixel_rate(cstate) == 0))
                return 0;
 
        return DIV_ROUND_UP(8 * cstate->base.adjusted_mode.crtc_htotal * 1000,
-                           skl_pipe_pixel_rate(cstate));
+                           ilk_pipe_pixel_rate(cstate));
 }
 
 static void skl_compute_transition_wm(struct intel_crtc_state *cstate,
@@ -4050,6 +4046,12 @@ skl_compute_ddb(struct drm_atomic_state *state)
                intel_state->wm_results.dirty_pipes = ~0;
        }
 
+       /*
+        * We're not recomputing for the pipes not included in the commit, so
+        * make sure we start with the current state.
+        */
+       memcpy(ddb, &dev_priv->wm.skl_hw.ddb, sizeof(*ddb));
+
        for_each_intel_crtc_mask(dev, intel_crtc, realloc_pipes) {
                struct intel_crtc_state *cstate;
 
index 6c11168facd63c7fd18eeb64e49abfa283e9c87a..a38c2fefe85a6fc2abb1fcaabe2143da3c95be3a 100644 (file)
@@ -1139,7 +1139,9 @@ static void vlv_display_power_well_deinit(struct drm_i915_private *dev_priv)
 
        intel_power_sequencer_reset(dev_priv);
 
-       intel_hpd_poll_init(dev_priv);
+       /* Prevent us from re-enabling polling on accident in late suspend */
+       if (!dev_priv->drm.dev->power.is_suspended)
+               intel_hpd_poll_init(dev_priv);
 }
 
 static void vlv_display_power_well_enable(struct drm_i915_private *dev_priv,
index 73a521fdf1bdf760e7d487f0e632b80020e43e49..dbed12c484c9d5d5f3af0529caa4746737f4b007 100644 (file)
@@ -358,7 +358,7 @@ vlv_update_plane(struct drm_plane *dplane,
        int plane = intel_plane->plane;
        u32 sprctl;
        u32 sprsurf_offset, linear_offset;
-       unsigned int rotation = dplane->state->rotation;
+       unsigned int rotation = plane_state->base.rotation;
        const struct drm_intel_sprite_colorkey *key = &plane_state->ckey;
        int crtc_x = plane_state->base.dst.x1;
        int crtc_y = plane_state->base.dst.y1;
index 68db9621f1f0db0dcdbd715d14c02715c19b5b95..8886cab19f985abf7e70451c991a5a84c9a60adf 100644 (file)
@@ -280,7 +280,8 @@ struct common_child_dev_config {
        u8 dp_support:1;
        u8 tmds_support:1;
        u8 support_reserved:5;
-       u8 not_common3[12];
+       u8 aux_channel;
+       u8 not_common3[11];
        u8 iboost_level;
 } __packed;
 
index 98df09c2b3885b0ad0e5d99434063005a11f6712..9672b579f9506942fa1a697b4708737edc8ad7a0 100644 (file)
@@ -357,8 +357,8 @@ static int imx_drm_bind(struct device *dev)
        int ret;
 
        drm = drm_dev_alloc(&imx_drm_driver, dev);
-       if (!drm)
-               return -ENOMEM;
+       if (IS_ERR(drm))
+               return PTR_ERR(drm);
 
        imxdrm = devm_kzalloc(dev, sizeof(*imxdrm), GFP_KERNEL);
        if (!imxdrm) {
@@ -436,9 +436,11 @@ static int imx_drm_bind(struct device *dev)
 
 err_fbhelper:
        drm_kms_helper_poll_fini(drm);
+#if IS_ENABLED(CONFIG_DRM_FBDEV_EMULATION)
        if (imxdrm->fbhelper)
                drm_fbdev_cma_fini(imxdrm->fbhelper);
 err_unbind:
+#endif
        component_unbind_all(drm->dev, drm);
 err_vblank:
        drm_vblank_cleanup(drm);
index 4e1ae3fc462dc65591d2fa5b3f6dffe3ee8a4ad4..6be515a9fb694b5fdac53d880430ba218b08e544 100644 (file)
@@ -68,6 +68,12 @@ static void ipu_crtc_atomic_disable(struct drm_crtc *crtc,
 
        ipu_dc_disable_channel(ipu_crtc->dc);
        ipu_di_disable(ipu_crtc->di);
+       /*
+        * Planes must be disabled before DC clock is removed, as otherwise the
+        * attached IDMACs will be left in undefined state, possibly hanging
+        * the IPU or even system.
+        */
+       drm_atomic_helper_disable_planes_on_crtc(old_crtc_state, false);
        ipu_dc_disable(ipu);
 
        spin_lock_irq(&crtc->dev->event_lock);
@@ -77,9 +83,6 @@ static void ipu_crtc_atomic_disable(struct drm_crtc *crtc,
        }
        spin_unlock_irq(&crtc->dev->event_lock);
 
-       /* always disable planes on the CRTC */
-       drm_atomic_helper_disable_planes_on_crtc(old_crtc_state, true);
-
        drm_crtc_vblank_off(crtc);
 }
 
index ce22d0a0ddc8116e8026571f7bd700f461f5ee08..d5864ed4d772fdd2f4d5dfd0be9a13e372e27569 100644 (file)
@@ -103,11 +103,11 @@ drm_plane_state_to_vbo(struct drm_plane_state *state)
               (state->src_x >> 16) / 2 - eba;
 }
 
-static void ipu_plane_atomic_set_base(struct ipu_plane *ipu_plane,
-                                     struct drm_plane_state *old_state)
+static void ipu_plane_atomic_set_base(struct ipu_plane *ipu_plane)
 {
        struct drm_plane *plane = &ipu_plane->base;
        struct drm_plane_state *state = plane->state;
+       struct drm_crtc_state *crtc_state = state->crtc->state;
        struct drm_framebuffer *fb = state->fb;
        unsigned long eba, ubo, vbo;
        int active;
@@ -117,7 +117,7 @@ static void ipu_plane_atomic_set_base(struct ipu_plane *ipu_plane,
        switch (fb->pixel_format) {
        case DRM_FORMAT_YUV420:
        case DRM_FORMAT_YVU420:
-               if (old_state->fb)
+               if (!drm_atomic_crtc_needs_modeset(crtc_state))
                        break;
 
                /*
@@ -149,7 +149,7 @@ static void ipu_plane_atomic_set_base(struct ipu_plane *ipu_plane,
                break;
        }
 
-       if (old_state->fb) {
+       if (!drm_atomic_crtc_needs_modeset(crtc_state)) {
                active = ipu_idmac_get_current_buffer(ipu_plane->ipu_ch);
                ipu_cpmem_set_buffer(ipu_plane->ipu_ch, !active, eba);
                ipu_idmac_select_buffer(ipu_plane->ipu_ch, !active);
@@ -259,6 +259,7 @@ static int ipu_plane_atomic_check(struct drm_plane *plane,
        struct drm_framebuffer *fb = state->fb;
        struct drm_framebuffer *old_fb = old_state->fb;
        unsigned long eba, ubo, vbo, old_ubo, old_vbo;
+       int hsub, vsub;
 
        /* Ok to disable */
        if (!fb)
@@ -355,7 +356,9 @@ static int ipu_plane_atomic_check(struct drm_plane *plane,
                if ((ubo > 0xfffff8) || (vbo > 0xfffff8))
                        return -EINVAL;
 
-               if (old_fb) {
+               if (old_fb &&
+                   (old_fb->pixel_format == DRM_FORMAT_YUV420 ||
+                    old_fb->pixel_format == DRM_FORMAT_YVU420)) {
                        old_ubo = drm_plane_state_to_ubo(old_state);
                        old_vbo = drm_plane_state_to_vbo(old_state);
                        if (ubo != old_ubo || vbo != old_vbo)
@@ -370,6 +373,16 @@ static int ipu_plane_atomic_check(struct drm_plane *plane,
 
                if (old_fb && old_fb->pitches[1] != fb->pitches[1])
                        crtc_state->mode_changed = true;
+
+               /*
+                * The x/y offsets must be even in case of horizontal/vertical
+                * chroma subsampling.
+                */
+               hsub = drm_format_horz_chroma_subsampling(fb->pixel_format);
+               vsub = drm_format_vert_chroma_subsampling(fb->pixel_format);
+               if (((state->src_x >> 16) & (hsub - 1)) ||
+                   ((state->src_y >> 16) & (vsub - 1)))
+                       return -EINVAL;
        }
 
        return 0;
@@ -392,7 +405,7 @@ static void ipu_plane_atomic_update(struct drm_plane *plane,
                struct drm_crtc_state *crtc_state = state->crtc->state;
 
                if (!drm_atomic_crtc_needs_modeset(crtc_state)) {
-                       ipu_plane_atomic_set_base(ipu_plane, old_state);
+                       ipu_plane_atomic_set_base(ipu_plane);
                        return;
                }
        }
@@ -424,6 +437,7 @@ static void ipu_plane_atomic_update(struct drm_plane *plane,
                        ipu_dp_set_global_alpha(ipu_plane->dp, false, 0, false);
                        break;
                default:
+                       ipu_dp_set_global_alpha(ipu_plane->dp, true, 0, true);
                        break;
                }
        }
@@ -437,7 +451,7 @@ static void ipu_plane_atomic_update(struct drm_plane *plane,
        ipu_cpmem_set_high_priority(ipu_plane->ipu_ch);
        ipu_idmac_set_double_buffer(ipu_plane->ipu_ch, 1);
        ipu_cpmem_set_stride(ipu_plane->ipu_ch, state->fb->pitches[0]);
-       ipu_plane_atomic_set_base(ipu_plane, old_state);
+       ipu_plane_atomic_set_base(ipu_plane);
        ipu_plane_enable(ipu_plane);
 }
 
index 019b7ca392d7a49db0ffcb25ccb36f293f9d530e..c70310206ac56fd76eeb93ae76af669f1f2870a7 100644 (file)
@@ -80,6 +80,7 @@ static void mtk_ovl_enable_vblank(struct mtk_ddp_comp *comp,
                                                 ddp_comp);
 
        priv->crtc = crtc;
+       writel(0x0, comp->regs + DISP_REG_OVL_INTSTA);
        writel_relaxed(OVL_FME_CPL_INT, comp->regs + DISP_REG_OVL_INTEN);
 }
 
@@ -250,13 +251,6 @@ static int mtk_disp_ovl_probe(struct platform_device *pdev)
        if (irq < 0)
                return irq;
 
-       ret = devm_request_irq(dev, irq, mtk_disp_ovl_irq_handler,
-                              IRQF_TRIGGER_NONE, dev_name(dev), priv);
-       if (ret < 0) {
-               dev_err(dev, "Failed to request irq %d: %d\n", irq, ret);
-               return ret;
-       }
-
        comp_id = mtk_ddp_comp_get_id(dev->of_node, MTK_DISP_OVL);
        if (comp_id < 0) {
                dev_err(dev, "Failed to identify by alias: %d\n", comp_id);
@@ -272,6 +266,13 @@ static int mtk_disp_ovl_probe(struct platform_device *pdev)
 
        platform_set_drvdata(pdev, priv);
 
+       ret = devm_request_irq(dev, irq, mtk_disp_ovl_irq_handler,
+                              IRQF_TRIGGER_NONE, dev_name(dev), priv);
+       if (ret < 0) {
+               dev_err(dev, "Failed to request irq %d: %d\n", irq, ret);
+               return ret;
+       }
+
        ret = component_add(dev, &mtk_disp_ovl_component_ops);
        if (ret)
                dev_err(dev, "Failed to add component: %d\n", ret);
index 0186e500d2a544d8c90769428c7673bec1cd68c0..90fb831ef031b9794df4f1ef95609204bedce93a 100644 (file)
@@ -432,11 +432,16 @@ static int mtk_dpi_set_display_mode(struct mtk_dpi *dpi,
        unsigned long pll_rate;
        unsigned int factor;
 
+       /* let pll_rate can fix the valid range of tvdpll (1G~2GHz) */
        pix_rate = 1000UL * mode->clock;
-       if (mode->clock <= 74000)
+       if (mode->clock <= 27000)
+               factor = 16 * 3;
+       else if (mode->clock <= 84000)
                factor = 8 * 3;
-       else
+       else if (mode->clock <= 167000)
                factor = 4 * 3;
+       else
+               factor = 2 * 3;
        pll_rate = pix_rate * factor;
 
        dev_dbg(dpi->dev, "Want PLL %lu Hz, pixel clock %lu Hz\n",
index df33b3ca6ffd5b2038e3e01d81ada28ffd33462e..48cc01fd20c78db1533ff8da0edf6c76e7f163e4 100644 (file)
@@ -123,7 +123,7 @@ static void mtk_od_config(struct mtk_ddp_comp *comp, unsigned int w,
                          unsigned int bpc)
 {
        writel(w << 16 | h, comp->regs + DISP_OD_SIZE);
-       writel(OD_RELAYMODE, comp->regs + OD_RELAYMODE);
+       writel(OD_RELAYMODE, comp->regs + DISP_OD_CFG);
        mtk_dither_set(comp, bpc, DISP_OD_CFG);
 }
 
index 28b2044ed9f285ad7c1bf1441822c3bcd24d9196..eaa5a2240c0c9db36b3ca918e589e117aee01077 100644 (file)
@@ -86,7 +86,7 @@
 
 #define DSI_PHY_TIMECON0       0x110
 #define LPX                            (0xff << 0)
-#define HS_PRPR                                (0xff << 8)
+#define HS_PREP                                (0xff << 8)
 #define HS_ZERO                                (0xff << 16)
 #define HS_TRAIL                       (0xff << 24)
 
 #define CLK_TRAIL                      (0xff << 24)
 
 #define DSI_PHY_TIMECON3       0x11c
-#define CLK_HS_PRPR                    (0xff << 0)
+#define CLK_HS_PREP                    (0xff << 0)
 #define CLK_HS_POST                    (0xff << 8)
 #define CLK_HS_EXIT                    (0xff << 16)
 
+#define T_LPX          5
+#define T_HS_PREP      6
+#define T_HS_TRAIL     8
+#define T_HS_EXIT      7
+#define T_HS_ZERO      10
+
 #define NS_TO_CYCLE(n, c)    ((n) / (c) + (((n) % (c)) ? 1 : 0))
 
 struct phy;
@@ -161,20 +167,18 @@ static void mtk_dsi_mask(struct mtk_dsi *dsi, u32 offset, u32 mask, u32 data)
 static void dsi_phy_timconfig(struct mtk_dsi *dsi)
 {
        u32 timcon0, timcon1, timcon2, timcon3;
-       unsigned int ui, cycle_time;
-       unsigned int lpx;
+       u32 ui, cycle_time;
 
        ui = 1000 / dsi->data_rate + 0x01;
        cycle_time = 8000 / dsi->data_rate + 0x01;
-       lpx = 5;
 
-       timcon0 = (8 << 24) | (0xa << 16) | (0x6 << 8) | lpx;
-       timcon1 = (7 << 24) | (5 * lpx << 16) | ((3 * lpx) / 2) << 8 |
-                 (4 * lpx);
+       timcon0 = T_LPX | T_HS_PREP << 8 | T_HS_ZERO << 16 | T_HS_TRAIL << 24;
+       timcon1 = 4 * T_LPX | (3 * T_LPX / 2) << 8 | 5 * T_LPX << 16 |
+                 T_HS_EXIT << 24;
        timcon2 = ((NS_TO_CYCLE(0x64, cycle_time) + 0xa) << 24) |
                  (NS_TO_CYCLE(0x150, cycle_time) << 16);
-       timcon3 = (2 * lpx) << 16 | NS_TO_CYCLE(80 + 52 * ui, cycle_time) << 8 |
-                  NS_TO_CYCLE(0x40, cycle_time);
+       timcon3 = NS_TO_CYCLE(0x40, cycle_time) | (2 * T_LPX) << 16 |
+                 NS_TO_CYCLE(80 + 52 * ui, cycle_time) << 8;
 
        writel(timcon0, dsi->regs + DSI_PHY_TIMECON0);
        writel(timcon1, dsi->regs + DSI_PHY_TIMECON1);
@@ -202,19 +206,47 @@ static int mtk_dsi_poweron(struct mtk_dsi *dsi)
 {
        struct device *dev = dsi->dev;
        int ret;
+       u64 pixel_clock, total_bits;
+       u32 htotal, htotal_bits, bit_per_pixel, overhead_cycles, overhead_bits;
 
        if (++dsi->refcount != 1)
                return 0;
 
+       switch (dsi->format) {
+       case MIPI_DSI_FMT_RGB565:
+               bit_per_pixel = 16;
+               break;
+       case MIPI_DSI_FMT_RGB666_PACKED:
+               bit_per_pixel = 18;
+               break;
+       case MIPI_DSI_FMT_RGB666:
+       case MIPI_DSI_FMT_RGB888:
+       default:
+               bit_per_pixel = 24;
+               break;
+       }
+
        /**
-        * data_rate = (pixel_clock / 1000) * pixel_dipth * mipi_ratio;
-        * pixel_clock unit is Khz, data_rata unit is MHz, so need divide 1000.
-        * mipi_ratio is mipi clk coefficient for balance the pixel clk in mipi.
-        * we set mipi_ratio is 1.05.
+        * vm.pixelclock is in kHz, pixel_clock unit is Hz, so multiply by 1000
+        * htotal_time = htotal * byte_per_pixel / num_lanes
+        * overhead_time = lpx + hs_prepare + hs_zero + hs_trail + hs_exit
+        * mipi_ratio = (htotal_time + overhead_time) / htotal_time
+        * data_rate = pixel_clock * bit_per_pixel * mipi_ratio / num_lanes;
         */
-       dsi->data_rate = dsi->vm.pixelclock * 3 * 21 / (1 * 1000 * 10);
+       pixel_clock = dsi->vm.pixelclock * 1000;
+       htotal = dsi->vm.hactive + dsi->vm.hback_porch + dsi->vm.hfront_porch +
+                       dsi->vm.hsync_len;
+       htotal_bits = htotal * bit_per_pixel;
+
+       overhead_cycles = T_LPX + T_HS_PREP + T_HS_ZERO + T_HS_TRAIL +
+                       T_HS_EXIT;
+       overhead_bits = overhead_cycles * dsi->lanes * 8;
+       total_bits = htotal_bits + overhead_bits;
+
+       dsi->data_rate = DIV_ROUND_UP_ULL(pixel_clock * total_bits,
+                                         htotal * dsi->lanes);
 
-       ret = clk_set_rate(dsi->hs_clk, dsi->data_rate * 1000000);
+       ret = clk_set_rate(dsi->hs_clk, dsi->data_rate);
        if (ret < 0) {
                dev_err(dev, "Failed to set data rate: %d\n", ret);
                goto err_refcount;
index 71227deef21b1ea96d6a2594b98a584d41ad6320..0e8c4d9af34069f55e8784d8e43b6e4e56251cfa 100644 (file)
@@ -1133,12 +1133,6 @@ static int mtk_hdmi_output_set_display_mode(struct mtk_hdmi *hdmi,
        phy_power_on(hdmi->phy);
        mtk_hdmi_aud_output_config(hdmi, mode);
 
-       mtk_hdmi_setup_audio_infoframe(hdmi);
-       mtk_hdmi_setup_avi_infoframe(hdmi, mode);
-       mtk_hdmi_setup_spd_infoframe(hdmi, "mediatek", "On-chip HDMI");
-       if (mode->flags & DRM_MODE_FLAG_3D_MASK)
-               mtk_hdmi_setup_vendor_specific_infoframe(hdmi, mode);
-
        mtk_hdmi_hw_vid_black(hdmi, false);
        mtk_hdmi_hw_aud_unmute(hdmi);
        mtk_hdmi_hw_send_av_unmute(hdmi);
@@ -1401,6 +1395,16 @@ static void mtk_hdmi_bridge_pre_enable(struct drm_bridge *bridge)
        hdmi->powered = true;
 }
 
+static void mtk_hdmi_send_infoframe(struct mtk_hdmi *hdmi,
+                                   struct drm_display_mode *mode)
+{
+       mtk_hdmi_setup_audio_infoframe(hdmi);
+       mtk_hdmi_setup_avi_infoframe(hdmi, mode);
+       mtk_hdmi_setup_spd_infoframe(hdmi, "mediatek", "On-chip HDMI");
+       if (mode->flags & DRM_MODE_FLAG_3D_MASK)
+               mtk_hdmi_setup_vendor_specific_infoframe(hdmi, mode);
+}
+
 static void mtk_hdmi_bridge_enable(struct drm_bridge *bridge)
 {
        struct mtk_hdmi *hdmi = hdmi_ctx_from_bridge(bridge);
@@ -1409,6 +1413,7 @@ static void mtk_hdmi_bridge_enable(struct drm_bridge *bridge)
        clk_prepare_enable(hdmi->clk[MTK_HDMI_CLK_HDMI_PLL]);
        clk_prepare_enable(hdmi->clk[MTK_HDMI_CLK_HDMI_PIXEL]);
        phy_power_on(hdmi->phy);
+       mtk_hdmi_send_infoframe(hdmi, &hdmi->mode);
 
        hdmi->enabled = true;
 }
index 8a24754b440f06585ec67bda44a182cc34e00c84..51cb9cfb6646e3f46a63a2073b923ef6b3f02b2d 100644 (file)
@@ -265,6 +265,9 @@ static int mtk_hdmi_pll_set_rate(struct clk_hw *hw, unsigned long rate,
        struct mtk_hdmi_phy *hdmi_phy = to_mtk_hdmi_phy(hw);
        unsigned int pre_div;
        unsigned int div;
+       unsigned int pre_ibias;
+       unsigned int hdmi_ibias;
+       unsigned int imp_en;
 
        dev_dbg(hdmi_phy->dev, "%s: %lu Hz, parent: %lu Hz\n", __func__,
                rate, parent_rate);
@@ -298,18 +301,31 @@ static int mtk_hdmi_pll_set_rate(struct clk_hw *hw, unsigned long rate,
                          (0x1 << PLL_BR_SHIFT),
                          RG_HDMITX_PLL_BP | RG_HDMITX_PLL_BC |
                          RG_HDMITX_PLL_BR);
-       mtk_hdmi_phy_clear_bits(hdmi_phy, HDMI_CON3, RG_HDMITX_PRD_IMP_EN);
+       if (rate < 165000000) {
+               mtk_hdmi_phy_clear_bits(hdmi_phy, HDMI_CON3,
+                                       RG_HDMITX_PRD_IMP_EN);
+               pre_ibias = 0x3;
+               imp_en = 0x0;
+               hdmi_ibias = hdmi_phy->ibias;
+       } else {
+               mtk_hdmi_phy_set_bits(hdmi_phy, HDMI_CON3,
+                                     RG_HDMITX_PRD_IMP_EN);
+               pre_ibias = 0x6;
+               imp_en = 0xf;
+               hdmi_ibias = hdmi_phy->ibias_up;
+       }
        mtk_hdmi_phy_mask(hdmi_phy, HDMI_CON4,
-                         (0x3 << PRD_IBIAS_CLK_SHIFT) |
-                         (0x3 << PRD_IBIAS_D2_SHIFT) |
-                         (0x3 << PRD_IBIAS_D1_SHIFT) |
-                         (0x3 << PRD_IBIAS_D0_SHIFT),
+                         (pre_ibias << PRD_IBIAS_CLK_SHIFT) |
+                         (pre_ibias << PRD_IBIAS_D2_SHIFT) |
+                         (pre_ibias << PRD_IBIAS_D1_SHIFT) |
+                         (pre_ibias << PRD_IBIAS_D0_SHIFT),
                          RG_HDMITX_PRD_IBIAS_CLK |
                          RG_HDMITX_PRD_IBIAS_D2 |
                          RG_HDMITX_PRD_IBIAS_D1 |
                          RG_HDMITX_PRD_IBIAS_D0);
        mtk_hdmi_phy_mask(hdmi_phy, HDMI_CON3,
-                         (0x0 << DRV_IMP_EN_SHIFT), RG_HDMITX_DRV_IMP_EN);
+                         (imp_en << DRV_IMP_EN_SHIFT),
+                         RG_HDMITX_DRV_IMP_EN);
        mtk_hdmi_phy_mask(hdmi_phy, HDMI_CON6,
                          (hdmi_phy->drv_imp_clk << DRV_IMP_CLK_SHIFT) |
                          (hdmi_phy->drv_imp_d2 << DRV_IMP_D2_SHIFT) |
@@ -318,12 +334,14 @@ static int mtk_hdmi_pll_set_rate(struct clk_hw *hw, unsigned long rate,
                          RG_HDMITX_DRV_IMP_CLK | RG_HDMITX_DRV_IMP_D2 |
                          RG_HDMITX_DRV_IMP_D1 | RG_HDMITX_DRV_IMP_D0);
        mtk_hdmi_phy_mask(hdmi_phy, HDMI_CON5,
-                         (hdmi_phy->ibias << DRV_IBIAS_CLK_SHIFT) |
-                         (hdmi_phy->ibias << DRV_IBIAS_D2_SHIFT) |
-                         (hdmi_phy->ibias << DRV_IBIAS_D1_SHIFT) |
-                         (hdmi_phy->ibias << DRV_IBIAS_D0_SHIFT),
-                         RG_HDMITX_DRV_IBIAS_CLK | RG_HDMITX_DRV_IBIAS_D2 |
-                         RG_HDMITX_DRV_IBIAS_D1 | RG_HDMITX_DRV_IBIAS_D0);
+                         (hdmi_ibias << DRV_IBIAS_CLK_SHIFT) |
+                         (hdmi_ibias << DRV_IBIAS_D2_SHIFT) |
+                         (hdmi_ibias << DRV_IBIAS_D1_SHIFT) |
+                         (hdmi_ibias << DRV_IBIAS_D0_SHIFT),
+                         RG_HDMITX_DRV_IBIAS_CLK |
+                         RG_HDMITX_DRV_IBIAS_D2 |
+                         RG_HDMITX_DRV_IBIAS_D1 |
+                         RG_HDMITX_DRV_IBIAS_D0);
        return 0;
 }
 
index 919b35f2ad2487443c97dc3af28916d67ba6c0fb..dcf7d11ac380d0e6b25e1b7e64440913572d7609 100644 (file)
@@ -266,6 +266,9 @@ int mgag200_mm_init(struct mga_device *mdev)
                return ret;
        }
 
+       arch_io_reserve_memtype_wc(pci_resource_start(dev->pdev, 0),
+                                  pci_resource_len(dev->pdev, 0));
+
        mdev->fb_mtrr = arch_phys_wc_add(pci_resource_start(dev->pdev, 0),
                                         pci_resource_len(dev->pdev, 0));
 
@@ -274,10 +277,14 @@ int mgag200_mm_init(struct mga_device *mdev)
 
 void mgag200_mm_fini(struct mga_device *mdev)
 {
+       struct drm_device *dev = mdev->dev;
+
        ttm_bo_device_release(&mdev->ttm.bdev);
 
        mgag200_ttm_global_release(mdev);
 
+       arch_io_free_memtype_wc(pci_resource_start(dev->pdev, 0),
+                               pci_resource_len(dev->pdev, 0));
        arch_phys_wc_del(mdev->fb_mtrr);
        mdev->fb_mtrr = 0;
 }
index f05ed0e1f3d655d0009438fd923e9438c79be1d5..6f240021705b0c493457560f1782fdd9483c5b98 100644 (file)
@@ -139,6 +139,7 @@ struct msm_dsi_host {
 
        u32 err_work_state;
        struct work_struct err_work;
+       struct work_struct hpd_work;
        struct workqueue_struct *workqueue;
 
        /* DSI 6G TX buffer*/
@@ -1294,6 +1295,14 @@ static void dsi_sw_reset_restore(struct msm_dsi_host *msm_host)
        wmb();  /* make sure dsi controller enabled again */
 }
 
+static void dsi_hpd_worker(struct work_struct *work)
+{
+       struct msm_dsi_host *msm_host =
+               container_of(work, struct msm_dsi_host, hpd_work);
+
+       drm_helper_hpd_irq_event(msm_host->dev);
+}
+
 static void dsi_err_worker(struct work_struct *work)
 {
        struct msm_dsi_host *msm_host =
@@ -1480,7 +1489,7 @@ static int dsi_host_attach(struct mipi_dsi_host *host,
 
        DBG("id=%d", msm_host->id);
        if (msm_host->dev)
-               drm_helper_hpd_irq_event(msm_host->dev);
+               queue_work(msm_host->workqueue, &msm_host->hpd_work);
 
        return 0;
 }
@@ -1494,7 +1503,7 @@ static int dsi_host_detach(struct mipi_dsi_host *host,
 
        DBG("id=%d", msm_host->id);
        if (msm_host->dev)
-               drm_helper_hpd_irq_event(msm_host->dev);
+               queue_work(msm_host->workqueue, &msm_host->hpd_work);
 
        return 0;
 }
@@ -1748,6 +1757,7 @@ int msm_dsi_host_init(struct msm_dsi *msm_dsi)
        /* setup workqueue */
        msm_host->workqueue = alloc_ordered_workqueue("dsi_drm_work", 0);
        INIT_WORK(&msm_host->err_work, dsi_err_worker);
+       INIT_WORK(&msm_host->hpd_work, dsi_hpd_worker);
 
        msm_dsi->host = &msm_host->base;
        msm_dsi->id = msm_host->id;
index 598fdaff0a41a051fa165d79e386aab7a7f9bd84..26e3a01a99c2b71dde9fed5bd548ee17b61e2df4 100644 (file)
@@ -521,6 +521,7 @@ static int pll_28nm_register(struct dsi_pll_28nm *pll_28nm)
                .parent_names = (const char *[]){ "xo" },
                .num_parents = 1,
                .name = vco_name,
+               .flags = CLK_IGNORE_UNUSED,
                .ops = &clk_ops_dsi_pll_28nm_vco,
        };
        struct device *dev = &pll_28nm->pdev->dev;
index 38c90e1eb00286b4ce0bb9dd49e30115f60869bc..49008451085b86ccb84ee3760c406554db51fd49 100644 (file)
@@ -412,6 +412,7 @@ static int pll_28nm_register(struct dsi_pll_28nm *pll_28nm)
        struct clk_init_data vco_init = {
                .parent_names = (const char *[]){ "pxo" },
                .num_parents = 1,
+               .flags = CLK_IGNORE_UNUSED,
                .ops = &clk_ops_dsi_pll_28nm_vco,
        };
        struct device *dev = &pll_28nm->pdev->dev;
index aa94a553794f50eed7d5c49dd01d594c96113baa..143eab46ba687b0b1a89969598736bc75e750809 100644 (file)
@@ -702,6 +702,7 @@ static struct clk_init_data pll_init = {
        .ops = &hdmi_8996_pll_ops,
        .parent_names = hdmi_pll_parents,
        .num_parents = ARRAY_SIZE(hdmi_pll_parents),
+       .flags = CLK_IGNORE_UNUSED,
 };
 
 int msm_hdmi_pll_8996_init(struct platform_device *pdev)
index 92da69aa6187e64fa6cbb515a4698bf32f57db54..99590758c68b7cf8e296928db1bab5ce5b2df4c4 100644 (file)
@@ -424,6 +424,7 @@ static struct clk_init_data pll_init = {
        .ops = &hdmi_pll_ops,
        .parent_names = hdmi_pll_parents,
        .num_parents = ARRAY_SIZE(hdmi_pll_parents),
+       .flags = CLK_IGNORE_UNUSED,
 };
 
 int msm_hdmi_pll_8960_init(struct platform_device *pdev)
index ac9e4cde13804f78333d5d6e101b37511949a5e8..8b4e3004f4518d19341b24c140606404853e379b 100644 (file)
@@ -272,7 +272,7 @@ const struct mdp5_cfg_hw msm8x16_config = {
                .count = 2,
                .base = { 0x14000, 0x16000 },
                .caps = MDP_PIPE_CAP_HFLIP | MDP_PIPE_CAP_VFLIP |
-                               MDP_PIPE_CAP_SCALE | MDP_PIPE_CAP_DECIMATION,
+                               MDP_PIPE_CAP_DECIMATION,
        },
        .pipe_dma = {
                .count = 1,
@@ -282,7 +282,7 @@ const struct mdp5_cfg_hw msm8x16_config = {
        .lm = {
                .count = 2, /* LM0 and LM3 */
                .base = { 0x44000, 0x47000 },
-               .nb_stages = 5,
+               .nb_stages = 8,
                .max_width = 2048,
                .max_height = 0xFFFF,
        },
index fa2be7ce9468768eb6737a12ec29d11fc52f49f0..c205c360e16dcbdf83120e102e0a6854b852959f 100644 (file)
@@ -223,12 +223,7 @@ static void blend_setup(struct drm_crtc *crtc)
                plane_cnt++;
        }
 
-       /*
-       * If there is no base layer, enable border color.
-       * Although it's not possbile in current blend logic,
-       * put it here as a reminder.
-       */
-       if (!pstates[STAGE_BASE] && plane_cnt) {
+       if (!pstates[STAGE_BASE]) {
                ctl_blend_flags |= MDP5_CTL_BLEND_OP_FLAG_BORDER_OUT;
                DBG("Border Color is enabled");
        }
@@ -365,6 +360,15 @@ static int pstate_cmp(const void *a, const void *b)
        return pa->state->zpos - pb->state->zpos;
 }
 
+/* is there a helper for this? */
+static bool is_fullscreen(struct drm_crtc_state *cstate,
+               struct drm_plane_state *pstate)
+{
+       return (pstate->crtc_x <= 0) && (pstate->crtc_y <= 0) &&
+               ((pstate->crtc_x + pstate->crtc_w) >= cstate->mode.hdisplay) &&
+               ((pstate->crtc_y + pstate->crtc_h) >= cstate->mode.vdisplay);
+}
+
 static int mdp5_crtc_atomic_check(struct drm_crtc *crtc,
                struct drm_crtc_state *state)
 {
@@ -375,21 +379,11 @@ static int mdp5_crtc_atomic_check(struct drm_crtc *crtc,
        struct plane_state pstates[STAGE_MAX + 1];
        const struct mdp5_cfg_hw *hw_cfg;
        const struct drm_plane_state *pstate;
-       int cnt = 0, i;
+       int cnt = 0, base = 0, i;
 
        DBG("%s: check", mdp5_crtc->name);
 
-       /* verify that there are not too many planes attached to crtc
-        * and that we don't have conflicting mixer stages:
-        */
-       hw_cfg = mdp5_cfg_get_hw_config(mdp5_kms->cfg);
        drm_atomic_crtc_state_for_each_plane_state(plane, pstate, state) {
-               if (cnt >= (hw_cfg->lm.nb_stages)) {
-                       dev_err(dev->dev, "too many planes!\n");
-                       return -EINVAL;
-               }
-
-
                pstates[cnt].plane = plane;
                pstates[cnt].state = to_mdp5_plane_state(pstate);
 
@@ -399,8 +393,24 @@ static int mdp5_crtc_atomic_check(struct drm_crtc *crtc,
        /* assign a stage based on sorted zpos property */
        sort(pstates, cnt, sizeof(pstates[0]), pstate_cmp, NULL);
 
+       /* if the bottom-most layer is not fullscreen, we need to use
+        * it for solid-color:
+        */
+       if ((cnt > 0) && !is_fullscreen(state, &pstates[0].state->base))
+               base++;
+
+       /* verify that there are not too many planes attached to crtc
+        * and that we don't have conflicting mixer stages:
+        */
+       hw_cfg = mdp5_cfg_get_hw_config(mdp5_kms->cfg);
+
+       if ((cnt + base) >= hw_cfg->lm.nb_stages) {
+               dev_err(dev->dev, "too many planes!\n");
+               return -EINVAL;
+       }
+
        for (i = 0; i < cnt; i++) {
-               pstates[i].state->stage = STAGE_BASE + i;
+               pstates[i].state->stage = STAGE_BASE + i + base;
                DBG("%s: assign pipe %s on stage=%d", mdp5_crtc->name,
                                pipe2name(mdp5_plane_pipe(pstates[i].plane)),
                                pstates[i].state->stage);
index 951c002b05df2701977eb3f8570aa004f1cddd28..83bf997dda03cd9df09bd356ae661156351aaee2 100644 (file)
@@ -292,8 +292,7 @@ static int mdp5_plane_atomic_check(struct drm_plane *plane,
                format = to_mdp_format(msm_framebuffer_format(state->fb));
                if (MDP_FORMAT_IS_YUV(format) &&
                        !pipe_supports_yuv(mdp5_plane->caps)) {
-                       dev_err(plane->dev->dev,
-                               "Pipe doesn't support YUV\n");
+                       DBG("Pipe doesn't support YUV\n");
 
                        return -EINVAL;
                }
@@ -301,8 +300,7 @@ static int mdp5_plane_atomic_check(struct drm_plane *plane,
                if (!(mdp5_plane->caps & MDP_PIPE_CAP_SCALE) &&
                        (((state->src_w >> 16) != state->crtc_w) ||
                        ((state->src_h >> 16) != state->crtc_h))) {
-                       dev_err(plane->dev->dev,
-                               "Pipe doesn't support scaling (%dx%d -> %dx%d)\n",
+                       DBG("Pipe doesn't support scaling (%dx%d -> %dx%d)\n",
                                state->src_w >> 16, state->src_h >> 16,
                                state->crtc_w, state->crtc_h);
 
@@ -313,8 +311,7 @@ static int mdp5_plane_atomic_check(struct drm_plane *plane,
                vflip = !!(state->rotation & DRM_REFLECT_Y);
                if ((vflip && !(mdp5_plane->caps & MDP_PIPE_CAP_VFLIP)) ||
                        (hflip && !(mdp5_plane->caps & MDP_PIPE_CAP_HFLIP))) {
-                       dev_err(plane->dev->dev,
-                               "Pipe doesn't support flip\n");
+                       DBG("Pipe doesn't support flip\n");
 
                        return -EINVAL;
                }
index fb5c0b0a7594adcb0f38858cce0fc87d786f4eaa..46568fc80848f1376122d4936e662310e6cdb4b4 100644 (file)
@@ -228,7 +228,7 @@ static int msm_drm_uninit(struct device *dev)
        flush_workqueue(priv->atomic_wq);
        destroy_workqueue(priv->atomic_wq);
 
-       if (kms)
+       if (kms && kms->funcs)
                kms->funcs->destroy(kms);
 
        if (gpu) {
index 283d2841ba58137efa28ea4c2cef3872b4cb5202..192b2d3a79cb221e4b067b5b153d3a913edd6ffc 100644 (file)
@@ -163,6 +163,9 @@ void msm_gem_shrinker_init(struct drm_device *dev)
 void msm_gem_shrinker_cleanup(struct drm_device *dev)
 {
        struct msm_drm_private *priv = dev->dev_private;
-       WARN_ON(unregister_vmap_purge_notifier(&priv->vmap_notifier));
-       unregister_shrinker(&priv->shrinker);
+
+       if (priv->shrinker.nr_deferred) {
+               WARN_ON(unregister_vmap_purge_notifier(&priv->vmap_notifier));
+               unregister_shrinker(&priv->shrinker);
+       }
 }
index dc57b628e07473ad6e0810085c5fd960ef6b49bc..193573d191e520a12ccdde4a791ebdf8e64a334c 100644 (file)
@@ -240,7 +240,8 @@ static bool nouveau_pr3_present(struct pci_dev *pdev)
        if (!parent_adev)
                return false;
 
-       return acpi_has_method(parent_adev->handle, "_PR3");
+       return parent_adev->power.flags.power_resources &&
+               acpi_has_method(parent_adev->handle, "_PR3");
 }
 
 static void nouveau_dsm_pci_probe(struct pci_dev *pdev, acpi_handle *dhandle_out,
index 1825dbc331926c84de69e13606f38cf4c80a2bed..a6dbe8258040d24b67a7e774f4785e1b1efeb32f 100644 (file)
@@ -398,6 +398,9 @@ nouveau_ttm_init(struct nouveau_drm *drm)
        /* VRAM init */
        drm->gem.vram_available = drm->device.info.ram_user;
 
+       arch_io_reserve_memtype_wc(device->func->resource_addr(device, 1),
+                                  device->func->resource_size(device, 1));
+
        ret = ttm_bo_init_mm(&drm->ttm.bdev, TTM_PL_VRAM,
                              drm->gem.vram_available >> PAGE_SHIFT);
        if (ret) {
@@ -430,6 +433,8 @@ nouveau_ttm_init(struct nouveau_drm *drm)
 void
 nouveau_ttm_fini(struct nouveau_drm *drm)
 {
+       struct nvkm_device *device = nvxx_device(&drm->device);
+
        ttm_bo_clean_mm(&drm->ttm.bdev, TTM_PL_VRAM);
        ttm_bo_clean_mm(&drm->ttm.bdev, TTM_PL_TT);
 
@@ -439,4 +444,7 @@ nouveau_ttm_fini(struct nouveau_drm *drm)
 
        arch_phys_wc_del(drm->ttm.mtrr);
        drm->ttm.mtrr = 0;
+       arch_io_free_memtype_wc(device->func->resource_addr(device, 1),
+                               device->func->resource_size(device, 1));
+
 }
index 103fc8650197bfe8efd6903ba29ba1c3906ccd2a..a0d4a0522fdc98582e99b1016d482de148e62f16 100644 (file)
@@ -1396,9 +1396,7 @@ static void cayman_pcie_gart_fini(struct radeon_device *rdev)
 void cayman_cp_int_cntl_setup(struct radeon_device *rdev,
                              int ring, u32 cp_int_cntl)
 {
-       u32 srbm_gfx_cntl = RREG32(SRBM_GFX_CNTL) & ~3;
-
-       WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl | (ring & 3));
+       WREG32(SRBM_GFX_CNTL, RINGID(ring));
        WREG32(CP_INT_CNTL, cp_int_cntl);
 }
 
index 6a4b020dd0b45aa504bcb7acd68c49a0ddfc9328..5a26eb4545aae4afea2dc83d6b5e615b1ddcfff3 100644 (file)
@@ -156,19 +156,20 @@ u32 r600_dpm_get_vblank_time(struct radeon_device *rdev)
        struct drm_device *dev = rdev->ddev;
        struct drm_crtc *crtc;
        struct radeon_crtc *radeon_crtc;
-       u32 line_time_us, vblank_lines;
+       u32 vblank_in_pixels;
        u32 vblank_time_us = 0xffffffff; /* if the displays are off, vblank time is max */
 
        if (rdev->num_crtc && rdev->mode_info.mode_config_initialized) {
                list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
                        radeon_crtc = to_radeon_crtc(crtc);
                        if (crtc->enabled && radeon_crtc->enabled && radeon_crtc->hw_mode.clock) {
-                               line_time_us = (radeon_crtc->hw_mode.crtc_htotal * 1000) /
-                                       radeon_crtc->hw_mode.clock;
-                               vblank_lines = radeon_crtc->hw_mode.crtc_vblank_end -
-                                       radeon_crtc->hw_mode.crtc_vdisplay +
-                                       (radeon_crtc->v_border * 2);
-                               vblank_time_us = vblank_lines * line_time_us;
+                               vblank_in_pixels =
+                                       radeon_crtc->hw_mode.crtc_htotal *
+                                       (radeon_crtc->hw_mode.crtc_vblank_end -
+                                        radeon_crtc->hw_mode.crtc_vdisplay +
+                                        (radeon_crtc->v_border * 2));
+
+                               vblank_time_us = vblank_in_pixels * 1000 / radeon_crtc->hw_mode.clock;
                                break;
                        }
                }
index 2fdcd04bc93f7b9c6abf5d84752836e154d566b0..4129b12521a67e9296582353a74f418bbf84d1c6 100644 (file)
@@ -34,6 +34,7 @@ struct radeon_atpx {
 
 static struct radeon_atpx_priv {
        bool atpx_detected;
+       bool bridge_pm_usable;
        /* handle for device - and atpx */
        acpi_handle dhandle;
        struct radeon_atpx atpx;
@@ -203,7 +204,11 @@ static int radeon_atpx_validate(struct radeon_atpx *atpx)
        atpx->is_hybrid = false;
        if (valid_bits & ATPX_MS_HYBRID_GFX_SUPPORTED) {
                printk("ATPX Hybrid Graphics\n");
-               atpx->functions.power_cntl = false;
+               /*
+                * Disable legacy PM methods only when pcie port PM is usable,
+                * otherwise the device might fail to power off or power on.
+                */
+               atpx->functions.power_cntl = !radeon_atpx_priv.bridge_pm_usable;
                atpx->is_hybrid = true;
        }
 
@@ -474,6 +479,7 @@ static int radeon_atpx_power_state(enum vga_switcheroo_client_id id,
  */
 static bool radeon_atpx_pci_probe_handle(struct pci_dev *pdev)
 {
+       struct pci_dev *parent_pdev = pci_upstream_bridge(pdev);
        acpi_handle dhandle, atpx_handle;
        acpi_status status;
 
@@ -487,6 +493,7 @@ static bool radeon_atpx_pci_probe_handle(struct pci_dev *pdev)
 
        radeon_atpx_priv.dhandle = dhandle;
        radeon_atpx_priv.atpx.handle = atpx_handle;
+       radeon_atpx_priv.bridge_pm_usable = parent_pdev && parent_pdev->bridge_d3;
        return true;
 }
 
index 50e96d2c593dafe05c0e0205288c7f2b7c6a71d9..27affbde058c626eec6ae5b5b77cd9a2d4601ba9 100644 (file)
@@ -927,6 +927,16 @@ radeon_lvds_detect(struct drm_connector *connector, bool force)
        return ret;
 }
 
+static void radeon_connector_unregister(struct drm_connector *connector)
+{
+       struct radeon_connector *radeon_connector = to_radeon_connector(connector);
+
+       if (radeon_connector->ddc_bus && radeon_connector->ddc_bus->has_aux) {
+               drm_dp_aux_unregister(&radeon_connector->ddc_bus->aux);
+               radeon_connector->ddc_bus->has_aux = false;
+       }
+}
+
 static void radeon_connector_destroy(struct drm_connector *connector)
 {
        struct radeon_connector *radeon_connector = to_radeon_connector(connector);
@@ -984,6 +994,7 @@ static const struct drm_connector_funcs radeon_lvds_connector_funcs = {
        .dpms = drm_helper_connector_dpms,
        .detect = radeon_lvds_detect,
        .fill_modes = drm_helper_probe_single_connector_modes,
+       .early_unregister = radeon_connector_unregister,
        .destroy = radeon_connector_destroy,
        .set_property = radeon_lvds_set_property,
 };
@@ -1111,6 +1122,7 @@ static const struct drm_connector_funcs radeon_vga_connector_funcs = {
        .dpms = drm_helper_connector_dpms,
        .detect = radeon_vga_detect,
        .fill_modes = drm_helper_probe_single_connector_modes,
+       .early_unregister = radeon_connector_unregister,
        .destroy = radeon_connector_destroy,
        .set_property = radeon_connector_set_property,
 };
@@ -1188,6 +1200,7 @@ static const struct drm_connector_funcs radeon_tv_connector_funcs = {
        .dpms = drm_helper_connector_dpms,
        .detect = radeon_tv_detect,
        .fill_modes = drm_helper_probe_single_connector_modes,
+       .early_unregister = radeon_connector_unregister,
        .destroy = radeon_connector_destroy,
        .set_property = radeon_connector_set_property,
 };
@@ -1519,6 +1532,7 @@ static const struct drm_connector_funcs radeon_dvi_connector_funcs = {
        .detect = radeon_dvi_detect,
        .fill_modes = drm_helper_probe_single_connector_modes,
        .set_property = radeon_connector_set_property,
+       .early_unregister = radeon_connector_unregister,
        .destroy = radeon_connector_destroy,
        .force = radeon_dvi_force,
 };
@@ -1832,6 +1846,7 @@ static const struct drm_connector_funcs radeon_dp_connector_funcs = {
        .detect = radeon_dp_detect,
        .fill_modes = drm_helper_probe_single_connector_modes,
        .set_property = radeon_connector_set_property,
+       .early_unregister = radeon_connector_unregister,
        .destroy = radeon_connector_destroy,
        .force = radeon_dvi_force,
 };
@@ -1841,6 +1856,7 @@ static const struct drm_connector_funcs radeon_edp_connector_funcs = {
        .detect = radeon_dp_detect,
        .fill_modes = drm_helper_probe_single_connector_modes,
        .set_property = radeon_lvds_set_property,
+       .early_unregister = radeon_connector_unregister,
        .destroy = radeon_connector_destroy,
        .force = radeon_dvi_force,
 };
@@ -1850,6 +1866,7 @@ static const struct drm_connector_funcs radeon_lvds_bridge_connector_funcs = {
        .detect = radeon_dp_detect,
        .fill_modes = drm_helper_probe_single_connector_modes,
        .set_property = radeon_lvds_set_property,
+       .early_unregister = radeon_connector_unregister,
        .destroy = radeon_connector_destroy,
        .force = radeon_dvi_force,
 };
index eb92aef46e3cfcf99a07d26272fac7725d0cfaa8..621af069a3d2a5f1175bac236d4421db594eca07 100644 (file)
@@ -104,6 +104,14 @@ static const char radeon_family_name[][16] = {
        "LAST",
 };
 
+#if defined(CONFIG_VGA_SWITCHEROO)
+bool radeon_has_atpx_dgpu_power_cntl(void);
+bool radeon_is_atpx_hybrid(void);
+#else
+static inline bool radeon_has_atpx_dgpu_power_cntl(void) { return false; }
+static inline bool radeon_is_atpx_hybrid(void) { return false; }
+#endif
+
 #define RADEON_PX_QUIRK_DISABLE_PX  (1 << 0)
 #define RADEON_PX_QUIRK_LONG_WAKEUP (1 << 1)
 
@@ -160,6 +168,11 @@ static void radeon_device_handle_px_quirks(struct radeon_device *rdev)
 
        if (rdev->px_quirk_flags & RADEON_PX_QUIRK_DISABLE_PX)
                rdev->flags &= ~RADEON_IS_PX;
+
+       /* disable PX is the system doesn't support dGPU power control or hybrid gfx */
+       if (!radeon_is_atpx_hybrid() &&
+           !radeon_has_atpx_dgpu_power_cntl())
+               rdev->flags &= ~RADEON_IS_PX;
 }
 
 /**
index b8ab30a7dd6d2f2d215415751539c652a3b1ed55..cdb8cb568c15310589039b2f0c56faf5cbf11b9c 100644 (file)
@@ -1675,20 +1675,20 @@ int radeon_modeset_init(struct radeon_device *rdev)
 
 void radeon_modeset_fini(struct radeon_device *rdev)
 {
-       radeon_fbdev_fini(rdev);
-       kfree(rdev->mode_info.bios_hardcoded_edid);
-
-       /* free i2c buses */
-       radeon_i2c_fini(rdev);
-
        if (rdev->mode_info.mode_config_initialized) {
-               radeon_afmt_fini(rdev);
                drm_kms_helper_poll_fini(rdev->ddev);
                radeon_hpd_fini(rdev);
                drm_crtc_force_disable_all(rdev->ddev);
+               radeon_fbdev_fini(rdev);
+               radeon_afmt_fini(rdev);
                drm_mode_config_cleanup(rdev->ddev);
                rdev->mode_info.mode_config_initialized = false;
        }
+
+       kfree(rdev->mode_info.bios_hardcoded_edid);
+
+       /* free i2c buses */
+       radeon_i2c_fini(rdev);
 }
 
 static bool is_hdtv_mode(const struct drm_display_mode *mode)
index 2d465648856a03156c878993ab2cc24755aec74f..474a8a1886f712114caf20b8484929f5009a3313 100644 (file)
@@ -105,7 +105,7 @@ radeon_dp_aux_transfer_native(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg
 
        tmp &= AUX_HPD_SEL(0x7);
        tmp |= AUX_HPD_SEL(chan->rec.hpd);
-       tmp |= AUX_EN | AUX_LS_READ_EN | AUX_HPD_DISCON(0x1);
+       tmp |= AUX_EN | AUX_LS_READ_EN;
 
        WREG32(AUX_CONTROL + aux_offset[instance], tmp);
 
index 91c8f433956605e1f5106b69df1af9123e3bd2c5..00ea0002b539b9e9b5b0a063f62deb3b7638fd56 100644 (file)
  *   2.45.0 - Allow setting shader registers using DMA/COPY packet3 on SI
  *   2.46.0 - Add PFP_SYNC_ME support on evergreen
  *   2.47.0 - Add UVD_NO_OP register support
+ *   2.48.0 - TA_CS_BC_BASE_ADDR allowed on SI
  */
 #define KMS_DRIVER_MAJOR       2
-#define KMS_DRIVER_MINOR       47
+#define KMS_DRIVER_MINOR       48
 #define KMS_DRIVER_PATCHLEVEL  0
 int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags);
 int radeon_driver_unload_kms(struct drm_device *dev);
index 021aa005623f804be130179398a617291032c92b..29f7817af821babcfe36e148c6f5a0fb8065e035 100644 (file)
@@ -982,9 +982,8 @@ void radeon_i2c_destroy(struct radeon_i2c_chan *i2c)
 {
        if (!i2c)
                return;
+       WARN_ON(i2c->has_aux);
        i2c_del_adapter(&i2c->adapter);
-       if (i2c->has_aux)
-               drm_dp_aux_unregister(&i2c->aux);
        kfree(i2c);
 }
 
index be30861afae9a8e2bb1fcc6ee261b5285f4dcb44..41b72ce6613febc033c0e37fe68655faeca65554 100644 (file)
@@ -446,6 +446,10 @@ void radeon_bo_force_delete(struct radeon_device *rdev)
 
 int radeon_bo_init(struct radeon_device *rdev)
 {
+       /* reserve PAT memory space to WC for VRAM */
+       arch_io_reserve_memtype_wc(rdev->mc.aper_base,
+                                  rdev->mc.aper_size);
+
        /* Add an MTRR for the VRAM */
        if (!rdev->fastfb_working) {
                rdev->mc.vram_mtrr = arch_phys_wc_add(rdev->mc.aper_base,
@@ -463,6 +467,7 @@ void radeon_bo_fini(struct radeon_device *rdev)
 {
        radeon_ttm_fini(rdev);
        arch_phys_wc_del(rdev->mc.vram_mtrr);
+       arch_io_free_memtype_wc(rdev->mc.aper_base, rdev->mc.aper_size);
 }
 
 /* Returns how many bytes TTM can move per IB.
index 455268214b893eac36e8bbd65d5e2b18d2735483..3de5e6e216628233ef1ba997bfcab6ae09d24621 100644 (file)
@@ -566,7 +566,8 @@ static int radeon_ttm_tt_pin_userptr(struct ttm_tt *ttm)
                uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE;
                struct page **pages = ttm->pages + pinned;
 
-               r = get_user_pages(userptr, num_pages, write, 0, pages, NULL);
+               r = get_user_pages(userptr, num_pages, write ? FOLL_WRITE : 0,
+                                  pages, NULL);
                if (r < 0)
                        goto release_pages;
 
index 7ee9aafbdf744bc0a97f358767e5b2178f24c41a..e402be8821c45271a276f721f1eb9c94fcf4afb5 100644 (file)
@@ -4431,6 +4431,7 @@ static bool si_vm_reg_valid(u32 reg)
        case SPI_CONFIG_CNTL:
        case SPI_CONFIG_CNTL_1:
        case TA_CNTL_AUX:
+       case TA_CS_BC_BASE_ADDR:
                return true;
        default:
                DRM_ERROR("Invalid register 0x%x in CS\n", reg);
index 89bdf20344aeffdcbbcef609e131b8a37f6423cf..c49934527a87852bf207b26e04c4de5939df8c53 100644 (file)
@@ -2999,6 +2999,49 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev,
        int i;
        struct si_dpm_quirk *p = si_dpm_quirk_list;
 
+       /* limit all SI kickers */
+       if (rdev->family == CHIP_PITCAIRN) {
+               if ((rdev->pdev->revision == 0x81) ||
+                   (rdev->pdev->device == 0x6810) ||
+                   (rdev->pdev->device == 0x6811) ||
+                   (rdev->pdev->device == 0x6816) ||
+                   (rdev->pdev->device == 0x6817) ||
+                   (rdev->pdev->device == 0x6806))
+                       max_mclk = 120000;
+       } else if (rdev->family == CHIP_VERDE) {
+               if ((rdev->pdev->revision == 0x81) ||
+                   (rdev->pdev->revision == 0x83) ||
+                   (rdev->pdev->revision == 0x87) ||
+                   (rdev->pdev->device == 0x6820) ||
+                   (rdev->pdev->device == 0x6821) ||
+                   (rdev->pdev->device == 0x6822) ||
+                   (rdev->pdev->device == 0x6823) ||
+                   (rdev->pdev->device == 0x682A) ||
+                   (rdev->pdev->device == 0x682B)) {
+                       max_sclk = 75000;
+                       max_mclk = 80000;
+               }
+       } else if (rdev->family == CHIP_OLAND) {
+               if ((rdev->pdev->revision == 0xC7) ||
+                   (rdev->pdev->revision == 0x80) ||
+                   (rdev->pdev->revision == 0x81) ||
+                   (rdev->pdev->revision == 0x83) ||
+                   (rdev->pdev->device == 0x6604) ||
+                   (rdev->pdev->device == 0x6605)) {
+                       max_sclk = 75000;
+                       max_mclk = 80000;
+               }
+       } else if (rdev->family == CHIP_HAINAN) {
+               if ((rdev->pdev->revision == 0x81) ||
+                   (rdev->pdev->revision == 0x83) ||
+                   (rdev->pdev->revision == 0xC3) ||
+                   (rdev->pdev->device == 0x6664) ||
+                   (rdev->pdev->device == 0x6665) ||
+                   (rdev->pdev->device == 0x6667)) {
+                       max_sclk = 75000;
+                       max_mclk = 80000;
+               }
+       }
        /* Apply dpm quirks */
        while (p && p->chip_device != 0) {
                if (rdev->pdev->vendor == p->chip_vendor &&
@@ -3011,16 +3054,6 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev,
                }
                ++p;
        }
-       /* limit mclk on all R7 370 parts for stability */
-       if (rdev->pdev->device == 0x6811 &&
-           rdev->pdev->revision == 0x81)
-               max_mclk = 120000;
-       /* limit sclk/mclk on Jet parts for stability */
-       if (rdev->pdev->device == 0x6665 &&
-           rdev->pdev->revision == 0xc3) {
-               max_sclk = 75000;
-               max_mclk = 80000;
-       }
 
        if (rps->vce_active) {
                rps->evclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].evclk;
index eb220eecba789aaf40ced21852a8d98441e55a3b..65a911ddd509d29d2a5d460ada79674e947a96f3 100644 (file)
 #define        SPI_LB_CU_MASK                                  0x9354
 
 #define        TA_CNTL_AUX                                     0x9508
+#define        TA_CS_BC_BASE_ADDR                              0x950C
 
 #define CC_RB_BACKEND_DISABLE                          0x98F4
 #define                BACKEND_DISABLE(x)                      ((x) << 16)
index bd9c3bb9252c68520af8233412c69253bfd04838..392c7e6de04272dc3336f0c197c283f3319ad993 100644 (file)
@@ -231,8 +231,16 @@ static int rcar_du_atomic_check(struct drm_device *dev,
        struct rcar_du_device *rcdu = dev->dev_private;
        int ret;
 
-       ret = drm_atomic_helper_check(dev, state);
-       if (ret < 0)
+       ret = drm_atomic_helper_check_modeset(dev, state);
+       if (ret)
+               return ret;
+
+       ret = drm_atomic_normalize_zpos(dev, state);
+       if (ret)
+               return ret;
+
+       ret = drm_atomic_helper_check_planes(dev, state);
+       if (ret)
                return ret;
 
        if (rcar_du_has(rcdu, RCAR_DU_FEATURE_VSP1_SOURCE))
index 2784919a73664c6287e7d5812839347d572011ca..9df308565f6cac7b68ff8b81cd97a3ef3a005834 100644 (file)
@@ -195,6 +195,26 @@ static void sti_atomic_work(struct work_struct *work)
        sti_atomic_complete(private, private->commit.state);
 }
 
+static int sti_atomic_check(struct drm_device *dev,
+                           struct drm_atomic_state *state)
+{
+       int ret;
+
+       ret = drm_atomic_helper_check_modeset(dev, state);
+       if (ret)
+               return ret;
+
+       ret = drm_atomic_normalize_zpos(dev, state);
+       if (ret)
+               return ret;
+
+       ret = drm_atomic_helper_check_planes(dev, state);
+       if (ret)
+               return ret;
+
+       return ret;
+}
+
 static int sti_atomic_commit(struct drm_device *drm,
                             struct drm_atomic_state *state, bool nonblock)
 {
@@ -248,7 +268,7 @@ static void sti_output_poll_changed(struct drm_device *ddev)
 static const struct drm_mode_config_funcs sti_mode_config_funcs = {
        .fb_create = drm_fb_cma_create,
        .output_poll_changed = sti_output_poll_changed,
-       .atomic_check = drm_atomic_helper_check,
+       .atomic_check = sti_atomic_check,
        .atomic_commit = sti_atomic_commit,
 };
 
index 0da9862ad8ed928e23a6a1e089551967ad5273e8..70e9fd59c5a24650f7e89f2db5d1a780cc2683d9 100644 (file)
@@ -142,9 +142,9 @@ static int sun4i_drv_bind(struct device *dev)
 
        /* Create our layers */
        drv->layers = sun4i_layers_init(drm);
-       if (!drv->layers) {
+       if (IS_ERR(drv->layers)) {
                dev_err(drm->dev, "Couldn't create the planes\n");
-               ret = -EINVAL;
+               ret = PTR_ERR(drv->layers);
                goto free_drm;
        }
 
index c3ff10f559cc4811755f764d6ebbfd999a8ee7e9..d198ad7e53234794b0e78933ff395b38b90cfd59 100644 (file)
@@ -152,15 +152,13 @@ static void sun4i_rgb_encoder_enable(struct drm_encoder *encoder)
 
        DRM_DEBUG_DRIVER("Enabling RGB output\n");
 
-       if (!IS_ERR(tcon->panel)) {
+       if (!IS_ERR(tcon->panel))
                drm_panel_prepare(tcon->panel);
-               drm_panel_enable(tcon->panel);
-       }
-
-       /* encoder->bridge can be NULL; drm_bridge_enable checks for it */
-       drm_bridge_enable(encoder->bridge);
 
        sun4i_tcon_channel_enable(tcon, 0);
+
+       if (!IS_ERR(tcon->panel))
+               drm_panel_enable(tcon->panel);
 }
 
 static void sun4i_rgb_encoder_disable(struct drm_encoder *encoder)
@@ -171,15 +169,13 @@ static void sun4i_rgb_encoder_disable(struct drm_encoder *encoder)
 
        DRM_DEBUG_DRIVER("Disabling RGB output\n");
 
-       sun4i_tcon_channel_disable(tcon, 0);
+       if (!IS_ERR(tcon->panel))
+               drm_panel_disable(tcon->panel);
 
-       /* encoder->bridge can be NULL; drm_bridge_disable checks for it */
-       drm_bridge_disable(encoder->bridge);
+       sun4i_tcon_channel_disable(tcon, 0);
 
-       if (!IS_ERR(tcon->panel)) {
-               drm_panel_disable(tcon->panel);
+       if (!IS_ERR(tcon->panel))
                drm_panel_unprepare(tcon->panel);
-       }
 }
 
 static void sun4i_rgb_encoder_mode_set(struct drm_encoder *encoder,
index 29f0207fa677064dc4b7bd93ea360fd5d3f414a4..873f010d9616f702deb3d59947831c28a899b853 100644 (file)
@@ -98,17 +98,23 @@ success:
 static int udl_select_std_channel(struct udl_device *udl)
 {
        int ret;
-       u8 set_def_chn[] = {0x57, 0xCD, 0xDC, 0xA7,
-                           0x1C, 0x88, 0x5E, 0x15,
-                           0x60, 0xFE, 0xC6, 0x97,
-                           0x16, 0x3D, 0x47, 0xF2};
+       static const u8 set_def_chn[] = {0x57, 0xCD, 0xDC, 0xA7,
+                                        0x1C, 0x88, 0x5E, 0x15,
+                                        0x60, 0xFE, 0xC6, 0x97,
+                                        0x16, 0x3D, 0x47, 0xF2};
+       void *sendbuf;
+
+       sendbuf = kmemdup(set_def_chn, sizeof(set_def_chn), GFP_KERNEL);
+       if (!sendbuf)
+               return -ENOMEM;
 
        ret = usb_control_msg(udl->udev,
                              usb_sndctrlpipe(udl->udev, 0),
                              NR_USB_REQUEST_CHANNEL,
                              (USB_DIR_OUT | USB_TYPE_VENDOR), 0, 0,
-                             set_def_chn, sizeof(set_def_chn),
+                             sendbuf, sizeof(set_def_chn),
                              USB_CTRL_SET_TIMEOUT);
+       kfree(sendbuf);
        return ret < 0 ? ret : 0;
 }
 
index 7e2a12c4fed2a49bb5f35714e8ef42f24cf8f7d1..1a3ad769f8c85bc0506741d105669dd66013b533 100644 (file)
@@ -241,8 +241,8 @@ via_lock_all_dma_pages(drm_via_sg_info_t *vsg,  drm_via_dmablit_t *xfer)
        down_read(&current->mm->mmap_sem);
        ret = get_user_pages((unsigned long)xfer->mem_addr,
                             vsg->num_pages,
-                            (vsg->direction == DMA_FROM_DEVICE),
-                            0, vsg->pages, NULL);
+                            (vsg->direction == DMA_FROM_DEVICE) ? FOLL_WRITE : 0,
+                            vsg->pages, NULL);
 
        up_read(&current->mm->mmap_sem);
        if (ret != vsg->num_pages) {
index 7cf3678623c3a1b0254b88b76d8f8ad95f7a08ce..58048709c34e6ca0f48b03b3bc5638a3b93d7226 100644 (file)
@@ -338,8 +338,7 @@ static void vgdev_atomic_commit_tail(struct drm_atomic_state *state)
 
        drm_atomic_helper_commit_modeset_disables(dev, state);
        drm_atomic_helper_commit_modeset_enables(dev, state);
-       drm_atomic_helper_commit_planes(dev, state,
-                                       DRM_PLANE_COMMIT_ACTIVE_ONLY);
+       drm_atomic_helper_commit_planes(dev, state, 0);
 
        drm_atomic_helper_commit_hw_done(state);
 
index e8ae3dc476d16fb756fd5ec4380fe644f1f858d0..18061a4bc2f287f9d99debc461a22bcd9c9f3c4d 100644 (file)
@@ -241,15 +241,15 @@ static int vmwgfx_pm_notifier(struct notifier_block *nb, unsigned long val,
                              void *ptr);
 
 MODULE_PARM_DESC(enable_fbdev, "Enable vmwgfx fbdev");
-module_param_named(enable_fbdev, enable_fbdev, int, 0600);
+module_param_named(enable_fbdev, enable_fbdev, int, S_IRUSR | S_IWUSR);
 MODULE_PARM_DESC(force_dma_api, "Force using the DMA API for TTM pages");
-module_param_named(force_dma_api, vmw_force_iommu, int, 0600);
+module_param_named(force_dma_api, vmw_force_iommu, int, S_IRUSR | S_IWUSR);
 MODULE_PARM_DESC(restrict_iommu, "Try to limit IOMMU usage for TTM pages");
-module_param_named(restrict_iommu, vmw_restrict_iommu, int, 0600);
+module_param_named(restrict_iommu, vmw_restrict_iommu, int, S_IRUSR | S_IWUSR);
 MODULE_PARM_DESC(force_coherent, "Force coherent TTM pages");
-module_param_named(force_coherent, vmw_force_coherent, int, 0600);
+module_param_named(force_coherent, vmw_force_coherent, int, S_IRUSR | S_IWUSR);
 MODULE_PARM_DESC(restrict_dma_mask, "Restrict DMA mask to 44 bits with IOMMU");
-module_param_named(restrict_dma_mask, vmw_restrict_dma_mask, int, 0600);
+module_param_named(restrict_dma_mask, vmw_restrict_dma_mask, int, S_IRUSR | S_IWUSR);
 MODULE_PARM_DESC(assume_16bpp, "Assume 16-bpp when filtering modes");
 module_param_named(assume_16bpp, vmw_assume_16bpp, int, 0600);
 
index 070d750af16d33022f8b3496f38d3667d77169ca..1e59a486bba8a65577bc24464e4ea441731049c4 100644 (file)
@@ -43,7 +43,7 @@
 
 #define VMWGFX_DRIVER_DATE "20160210"
 #define VMWGFX_DRIVER_MAJOR 2
-#define VMWGFX_DRIVER_MINOR 10
+#define VMWGFX_DRIVER_MINOR 11
 #define VMWGFX_DRIVER_PATCHLEVEL 0
 #define VMWGFX_FILE_PAGE_OFFSET 0x00100000
 #define VMWGFX_FIFO_STATIC_SIZE (1024*1024)
index dc5beff2b4aaff83c18dc77187647102efea5266..c7b53d987f06c923c53eaafb0fccdd9631ba72a7 100644 (file)
 
 #define VMW_RES_HT_ORDER 12
 
+/**
+ * enum vmw_resource_relocation_type - Relocation type for resources
+ *
+ * @vmw_res_rel_normal: Traditional relocation. The resource id in the
+ * command stream is replaced with the actual id after validation.
+ * @vmw_res_rel_nop: NOP relocation. The command is unconditionally replaced
+ * with a NOP.
+ * @vmw_res_rel_cond_nop: Conditional NOP relocation. If the resource id
+ * after validation is -1, the command is replaced with a NOP. Otherwise no
+ * action.
+ */
+enum vmw_resource_relocation_type {
+       vmw_res_rel_normal,
+       vmw_res_rel_nop,
+       vmw_res_rel_cond_nop,
+       vmw_res_rel_max
+};
+
 /**
  * struct vmw_resource_relocation - Relocation info for resources
  *
  * @head: List head for the software context's relocation list.
  * @res: Non-ref-counted pointer to the resource.
- * @offset: Offset of 4 byte entries into the command buffer where the
+ * @offset: Offset of single byte entries into the command buffer where the
  * id that needs fixup is located.
+ * @rel_type: Type of relocation.
  */
 struct vmw_resource_relocation {
        struct list_head head;
        const struct vmw_resource *res;
-       unsigned long offset;
+       u32 offset:29;
+       enum vmw_resource_relocation_type rel_type:3;
 };
 
 /**
@@ -109,7 +129,18 @@ static int vmw_bo_to_validate_list(struct vmw_sw_context *sw_context,
                                   struct vmw_dma_buffer *vbo,
                                   bool validate_as_mob,
                                   uint32_t *p_val_node);
-
+/**
+ * vmw_ptr_diff - Compute the offset from a to b in bytes
+ *
+ * @a: A starting pointer.
+ * @b: A pointer offset in the same address space.
+ *
+ * Returns: The offset in bytes between the two pointers.
+ */
+static size_t vmw_ptr_diff(void *a, void *b)
+{
+       return (unsigned long) b - (unsigned long) a;
+}
 
 /**
  * vmw_resources_unreserve - unreserve resources previously reserved for
@@ -409,11 +440,14 @@ static int vmw_resource_context_res_add(struct vmw_private *dev_priv,
  * @list: Pointer to head of relocation list.
  * @res: The resource.
  * @offset: Offset into the command buffer currently being parsed where the
- * id that needs fixup is located. Granularity is 4 bytes.
+ * id that needs fixup is located. Granularity is one byte.
+ * @rel_type: Relocation type.
  */
 static int vmw_resource_relocation_add(struct list_head *list,
                                       const struct vmw_resource *res,
-                                      unsigned long offset)
+                                      unsigned long offset,
+                                      enum vmw_resource_relocation_type
+                                      rel_type)
 {
        struct vmw_resource_relocation *rel;
 
@@ -425,6 +459,7 @@ static int vmw_resource_relocation_add(struct list_head *list,
 
        rel->res = res;
        rel->offset = offset;
+       rel->rel_type = rel_type;
        list_add_tail(&rel->head, list);
 
        return 0;
@@ -459,11 +494,24 @@ static void vmw_resource_relocations_apply(uint32_t *cb,
 {
        struct vmw_resource_relocation *rel;
 
+       /* Validate the struct vmw_resource_relocation member size */
+       BUILD_BUG_ON(SVGA_CB_MAX_SIZE >= (1 << 29));
+       BUILD_BUG_ON(vmw_res_rel_max >= (1 << 3));
+
        list_for_each_entry(rel, list, head) {
-               if (likely(rel->res != NULL))
-                       cb[rel->offset] = rel->res->id;
-               else
-                       cb[rel->offset] = SVGA_3D_CMD_NOP;
+               u32 *addr = (u32 *)((unsigned long) cb + rel->offset);
+               switch (rel->rel_type) {
+               case vmw_res_rel_normal:
+                       *addr = rel->res->id;
+                       break;
+               case vmw_res_rel_nop:
+                       *addr = SVGA_3D_CMD_NOP;
+                       break;
+               default:
+                       if (rel->res->id == -1)
+                               *addr = SVGA_3D_CMD_NOP;
+                       break;
+               }
        }
 }
 
@@ -655,7 +703,9 @@ static int vmw_cmd_res_reloc_add(struct vmw_private *dev_priv,
        *p_val = NULL;
        ret = vmw_resource_relocation_add(&sw_context->res_relocations,
                                          res,
-                                         id_loc - sw_context->buf_start);
+                                         vmw_ptr_diff(sw_context->buf_start,
+                                                      id_loc),
+                                         vmw_res_rel_normal);
        if (unlikely(ret != 0))
                return ret;
 
@@ -721,7 +771,8 @@ vmw_cmd_res_check(struct vmw_private *dev_priv,
 
                return vmw_resource_relocation_add
                        (&sw_context->res_relocations, res,
-                        id_loc - sw_context->buf_start);
+                        vmw_ptr_diff(sw_context->buf_start, id_loc),
+                        vmw_res_rel_normal);
        }
 
        ret = vmw_user_resource_lookup_handle(dev_priv,
@@ -2143,10 +2194,10 @@ static int vmw_cmd_shader_define(struct vmw_private *dev_priv,
                return ret;
 
        return vmw_resource_relocation_add(&sw_context->res_relocations,
-                                          NULL, &cmd->header.id -
-                                          sw_context->buf_start);
-
-       return 0;
+                                          NULL,
+                                          vmw_ptr_diff(sw_context->buf_start,
+                                                       &cmd->header.id),
+                                          vmw_res_rel_nop);
 }
 
 /**
@@ -2188,10 +2239,10 @@ static int vmw_cmd_shader_destroy(struct vmw_private *dev_priv,
                return ret;
 
        return vmw_resource_relocation_add(&sw_context->res_relocations,
-                                          NULL, &cmd->header.id -
-                                          sw_context->buf_start);
-
-       return 0;
+                                          NULL,
+                                          vmw_ptr_diff(sw_context->buf_start,
+                                                       &cmd->header.id),
+                                          vmw_res_rel_nop);
 }
 
 /**
@@ -2848,8 +2899,7 @@ static int vmw_cmd_dx_cid_check(struct vmw_private *dev_priv,
  * @header: Pointer to the command header in the command stream.
  *
  * Check that the view exists, and if it was not created using this
- * command batch, make sure it's validated (present in the device) so that
- * the remove command will not confuse the device.
+ * command batch, conditionally make this command a NOP.
  */
 static int vmw_cmd_dx_view_remove(struct vmw_private *dev_priv,
                                  struct vmw_sw_context *sw_context,
@@ -2877,10 +2927,16 @@ static int vmw_cmd_dx_view_remove(struct vmw_private *dev_priv,
                return ret;
 
        /*
-        * Add view to the validate list iff it was not created using this
-        * command batch.
+        * If the view wasn't created during this command batch, it might
+        * have been removed due to a context swapout, so add a
+        * relocation to conditionally make this command a NOP to avoid
+        * device errors.
         */
-       return vmw_view_res_val_add(sw_context, view);
+       return vmw_resource_relocation_add(&sw_context->res_relocations,
+                                          view,
+                                          vmw_ptr_diff(sw_context->buf_start,
+                                                       &cmd->header.id),
+                                          vmw_res_rel_cond_nop);
 }
 
 /**
@@ -3029,6 +3085,35 @@ static int vmw_cmd_dx_genmips(struct vmw_private *dev_priv,
                                   cmd->body.shaderResourceViewId);
 }
 
+/**
+ * vmw_cmd_dx_transfer_from_buffer -
+ * Validate an SVGA_3D_CMD_DX_TRANSFER_FROM_BUFFER command
+ *
+ * @dev_priv: Pointer to a device private struct.
+ * @sw_context: The software context being used for this batch.
+ * @header: Pointer to the command header in the command stream.
+ */
+static int vmw_cmd_dx_transfer_from_buffer(struct vmw_private *dev_priv,
+                                          struct vmw_sw_context *sw_context,
+                                          SVGA3dCmdHeader *header)
+{
+       struct {
+               SVGA3dCmdHeader header;
+               SVGA3dCmdDXTransferFromBuffer body;
+       } *cmd = container_of(header, typeof(*cmd), header);
+       int ret;
+
+       ret = vmw_cmd_res_check(dev_priv, sw_context, vmw_res_surface,
+                               user_surface_converter,
+                               &cmd->body.srcSid, NULL);
+       if (ret != 0)
+               return ret;
+
+       return vmw_cmd_res_check(dev_priv, sw_context, vmw_res_surface,
+                                user_surface_converter,
+                                &cmd->body.destSid, NULL);
+}
+
 static int vmw_cmd_check_not_3d(struct vmw_private *dev_priv,
                                struct vmw_sw_context *sw_context,
                                void *buf, uint32_t *size)
@@ -3379,6 +3464,9 @@ static const struct vmw_cmd_entry vmw_cmd_entries[SVGA_3D_CMD_MAX] = {
                    &vmw_cmd_buffer_copy_check, true, false, true),
        VMW_CMD_DEF(SVGA_3D_CMD_DX_PRED_COPY_REGION,
                    &vmw_cmd_pred_copy_check, true, false, true),
+       VMW_CMD_DEF(SVGA_3D_CMD_DX_TRANSFER_FROM_BUFFER,
+                   &vmw_cmd_dx_transfer_from_buffer,
+                   true, false, true),
 };
 
 static int vmw_cmd_check(struct vmw_private *dev_priv,
@@ -3848,14 +3936,14 @@ static void *vmw_execbuf_cmdbuf(struct vmw_private *dev_priv,
        int ret;
 
        *header = NULL;
-       if (!dev_priv->cman || kernel_commands)
-               return kernel_commands;
-
        if (command_size > SVGA_CB_MAX_SIZE) {
                DRM_ERROR("Command buffer is too large.\n");
                return ERR_PTR(-EINVAL);
        }
 
+       if (!dev_priv->cman || kernel_commands)
+               return kernel_commands;
+
        /* If possible, add a little space for fencing. */
        cmdbuf_size = command_size + 512;
        cmdbuf_size = min_t(size_t, cmdbuf_size, SVGA_CB_MAX_SIZE);
@@ -4232,9 +4320,6 @@ void __vmw_execbuf_release_pinned_bo(struct vmw_private *dev_priv,
        ttm_bo_unref(&query_val.bo);
        ttm_bo_unref(&pinned_val.bo);
        vmw_dmabuf_unreference(&dev_priv->pinned_bo);
-       DRM_INFO("Dummy query bo pin count: %d\n",
-                dev_priv->dummy_query_bo->pin_count);
-
 out_unlock:
        return;
 
index 6a328d507a285e027bf9f355b2f5cb9b9a6da8c7..52ca1c9d070ee734f3ce80c516bd3bed3f087252 100644 (file)
@@ -574,10 +574,8 @@ static int vmw_user_dmabuf_synccpu_grab(struct vmw_user_dma_buffer *user_bo,
                bool nonblock = !!(flags & drm_vmw_synccpu_dontblock);
                long lret;
 
-               if (nonblock)
-                       return reservation_object_test_signaled_rcu(bo->resv, true) ? 0 : -EBUSY;
-
-               lret = reservation_object_wait_timeout_rcu(bo->resv, true, true, MAX_SCHEDULE_TIMEOUT);
+               lret = reservation_object_wait_timeout_rcu(bo->resv, true, true,
+                                       nonblock ? 0 : MAX_SCHEDULE_TIMEOUT);
                if (!lret)
                        return -EBUSY;
                else if (lret < 0)
index c2a721a8cef9d99f9e4fe28032eb150ae4d5f472..b445ce9b9757861ecc1ece1071f1c8c3a02a166f 100644 (file)
@@ -324,7 +324,7 @@ static void vmw_hw_surface_destroy(struct vmw_resource *res)
        if (res->id != -1) {
 
                cmd = vmw_fifo_reserve(dev_priv, vmw_surface_destroy_size());
-               if (unlikely(cmd == NULL)) {
+               if (unlikely(!cmd)) {
                        DRM_ERROR("Failed reserving FIFO space for surface "
                                  "destruction.\n");
                        return;
@@ -397,7 +397,7 @@ static int vmw_legacy_srf_create(struct vmw_resource *res)
 
        submit_size = vmw_surface_define_size(srf);
        cmd = vmw_fifo_reserve(dev_priv, submit_size);
-       if (unlikely(cmd == NULL)) {
+       if (unlikely(!cmd)) {
                DRM_ERROR("Failed reserving FIFO space for surface "
                          "creation.\n");
                ret = -ENOMEM;
@@ -446,11 +446,10 @@ static int vmw_legacy_srf_dma(struct vmw_resource *res,
        uint8_t *cmd;
        struct vmw_private *dev_priv = res->dev_priv;
 
-       BUG_ON(val_buf->bo == NULL);
-
+       BUG_ON(!val_buf->bo);
        submit_size = vmw_surface_dma_size(srf);
        cmd = vmw_fifo_reserve(dev_priv, submit_size);
-       if (unlikely(cmd == NULL)) {
+       if (unlikely(!cmd)) {
                DRM_ERROR("Failed reserving FIFO space for surface "
                          "DMA.\n");
                return -ENOMEM;
@@ -538,7 +537,7 @@ static int vmw_legacy_srf_destroy(struct vmw_resource *res)
 
        submit_size = vmw_surface_destroy_size();
        cmd = vmw_fifo_reserve(dev_priv, submit_size);
-       if (unlikely(cmd == NULL)) {
+       if (unlikely(!cmd)) {
                DRM_ERROR("Failed reserving FIFO space for surface "
                          "eviction.\n");
                return -ENOMEM;
@@ -578,7 +577,7 @@ static int vmw_surface_init(struct vmw_private *dev_priv,
        int ret;
        struct vmw_resource *res = &srf->res;
 
-       BUG_ON(res_free == NULL);
+       BUG_ON(!res_free);
        if (!dev_priv->has_mob)
                vmw_fifo_resource_inc(dev_priv);
        ret = vmw_resource_init(dev_priv, res, true, res_free,
@@ -700,7 +699,6 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data,
        struct drm_vmw_surface_create_req *req = &arg->req;
        struct drm_vmw_surface_arg *rep = &arg->rep;
        struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
-       struct drm_vmw_size __user *user_sizes;
        int ret;
        int i, j;
        uint32_t cur_bo_offset;
@@ -748,7 +746,7 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data,
        }
 
        user_srf = kzalloc(sizeof(*user_srf), GFP_KERNEL);
-       if (unlikely(user_srf == NULL)) {
+       if (unlikely(!user_srf)) {
                ret = -ENOMEM;
                goto out_no_user_srf;
        }
@@ -763,29 +761,21 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data,
        memcpy(srf->mip_levels, req->mip_levels, sizeof(srf->mip_levels));
        srf->num_sizes = num_sizes;
        user_srf->size = size;
-
-       srf->sizes = kmalloc(srf->num_sizes * sizeof(*srf->sizes), GFP_KERNEL);
-       if (unlikely(srf->sizes == NULL)) {
-               ret = -ENOMEM;
+       srf->sizes = memdup_user((struct drm_vmw_size __user *)(unsigned long)
+                                req->size_addr,
+                                sizeof(*srf->sizes) * srf->num_sizes);
+       if (IS_ERR(srf->sizes)) {
+               ret = PTR_ERR(srf->sizes);
                goto out_no_sizes;
        }
-       srf->offsets = kmalloc(srf->num_sizes * sizeof(*srf->offsets),
-                              GFP_KERNEL);
-       if (unlikely(srf->offsets == NULL)) {
+       srf->offsets = kmalloc_array(srf->num_sizes,
+                                    sizeof(*srf->offsets),
+                                    GFP_KERNEL);
+       if (unlikely(!srf->offsets)) {
                ret = -ENOMEM;
                goto out_no_offsets;
        }
 
-       user_sizes = (struct drm_vmw_size __user *)(unsigned long)
-           req->size_addr;
-
-       ret = copy_from_user(srf->sizes, user_sizes,
-                            srf->num_sizes * sizeof(*srf->sizes));
-       if (unlikely(ret != 0)) {
-               ret = -EFAULT;
-               goto out_no_copy;
-       }
-
        srf->base_size = *srf->sizes;
        srf->autogen_filter = SVGA3D_TEX_FILTER_NONE;
        srf->multisample_count = 0;
@@ -923,7 +913,7 @@ vmw_surface_handle_reference(struct vmw_private *dev_priv,
 
        ret = -EINVAL;
        base = ttm_base_object_lookup_for_ref(dev_priv->tdev, handle);
-       if (unlikely(base == NULL)) {
+       if (unlikely(!base)) {
                DRM_ERROR("Could not find surface to reference.\n");
                goto out_no_lookup;
        }
@@ -1069,7 +1059,7 @@ static int vmw_gb_surface_create(struct vmw_resource *res)
 
        cmd = vmw_fifo_reserve(dev_priv, submit_len);
        cmd2 = (typeof(cmd2))cmd;
-       if (unlikely(cmd == NULL)) {
+       if (unlikely(!cmd)) {
                DRM_ERROR("Failed reserving FIFO space for surface "
                          "creation.\n");
                ret = -ENOMEM;
@@ -1135,7 +1125,7 @@ static int vmw_gb_surface_bind(struct vmw_resource *res,
        submit_size = sizeof(*cmd1) + (res->backup_dirty ? sizeof(*cmd2) : 0);
 
        cmd1 = vmw_fifo_reserve(dev_priv, submit_size);
-       if (unlikely(cmd1 == NULL)) {
+       if (unlikely(!cmd1)) {
                DRM_ERROR("Failed reserving FIFO space for surface "
                          "binding.\n");
                return -ENOMEM;
@@ -1185,7 +1175,7 @@ static int vmw_gb_surface_unbind(struct vmw_resource *res,
 
        submit_size = sizeof(*cmd3) + (readback ? sizeof(*cmd1) : sizeof(*cmd2));
        cmd = vmw_fifo_reserve(dev_priv, submit_size);
-       if (unlikely(cmd == NULL)) {
+       if (unlikely(!cmd)) {
                DRM_ERROR("Failed reserving FIFO space for surface "
                          "unbinding.\n");
                return -ENOMEM;
@@ -1244,7 +1234,7 @@ static int vmw_gb_surface_destroy(struct vmw_resource *res)
        vmw_binding_res_list_scrub(&res->binding_head);
 
        cmd = vmw_fifo_reserve(dev_priv, sizeof(*cmd));
-       if (unlikely(cmd == NULL)) {
+       if (unlikely(!cmd)) {
                DRM_ERROR("Failed reserving FIFO space for surface "
                          "destruction.\n");
                mutex_unlock(&dev_priv->binding_mutex);
@@ -1410,7 +1400,7 @@ int vmw_gb_surface_reference_ioctl(struct drm_device *dev, void *data,
 
        user_srf = container_of(base, struct vmw_user_surface, prime.base);
        srf = &user_srf->srf;
-       if (srf->res.backup == NULL) {
+       if (!srf->res.backup) {
                DRM_ERROR("Shared GB surface is missing a backup buffer.\n");
                goto out_bad_resource;
        }
@@ -1524,7 +1514,7 @@ int vmw_surface_gb_priv_define(struct drm_device *dev,
        }
 
        user_srf = kzalloc(sizeof(*user_srf), GFP_KERNEL);
-       if (unlikely(user_srf == NULL)) {
+       if (unlikely(!user_srf)) {
                ret = -ENOMEM;
                goto out_no_user_srf;
        }
index 2ba7d437a2afc7a0758402690526de5366abb718..805b6fa7b5f4c2f7ca98582ff028e1268bc66540 100644 (file)
@@ -1617,7 +1617,7 @@ ipu_image_convert(struct ipu_soc *ipu, enum ipu_ic_task ic_task,
        ctx = ipu_image_convert_prepare(ipu, ic_task, in, out, rot_mode,
                                        complete, complete_context);
        if (IS_ERR(ctx))
-               return ERR_PTR(PTR_ERR(ctx));
+               return ERR_CAST(ctx);
 
        run = kzalloc(sizeof(*run), GFP_KERNEL);
        if (!run) {
index 086d8a50715789d2237d19a59d8e24f407d35e23..60d30203a5faf9c7b69dcddb09ec0a086ca96a08 100644 (file)
 #include <linux/usb/ch9.h>
 #include "hid-ids.h"
 
+#define CP2112_REPORT_MAX_LENGTH               64
+#define CP2112_GPIO_CONFIG_LENGTH              5
+#define CP2112_GPIO_GET_LENGTH                 2
+#define CP2112_GPIO_SET_LENGTH                 3
+
 enum {
        CP2112_GPIO_CONFIG              = 0x02,
        CP2112_GPIO_GET                 = 0x03,
@@ -161,6 +166,8 @@ struct cp2112_device {
        atomic_t read_avail;
        atomic_t xfer_avail;
        struct gpio_chip gc;
+       u8 *in_out_buffer;
+       spinlock_t lock;
 };
 
 static int gpio_push_pull = 0xFF;
@@ -171,62 +178,86 @@ static int cp2112_gpio_direction_input(struct gpio_chip *chip, unsigned offset)
 {
        struct cp2112_device *dev = gpiochip_get_data(chip);
        struct hid_device *hdev = dev->hdev;
-       u8 buf[5];
+       u8 *buf = dev->in_out_buffer;
+       unsigned long flags;
        int ret;
 
+       spin_lock_irqsave(&dev->lock, flags);
+
        ret = hid_hw_raw_request(hdev, CP2112_GPIO_CONFIG, buf,
-                                      sizeof(buf), HID_FEATURE_REPORT,
-                                      HID_REQ_GET_REPORT);
-       if (ret != sizeof(buf)) {
+                                CP2112_GPIO_CONFIG_LENGTH, HID_FEATURE_REPORT,
+                                HID_REQ_GET_REPORT);
+       if (ret != CP2112_GPIO_CONFIG_LENGTH) {
                hid_err(hdev, "error requesting GPIO config: %d\n", ret);
-               return ret;
+               goto exit;
        }
 
        buf[1] &= ~(1 << offset);
        buf[2] = gpio_push_pull;
 
-       ret = hid_hw_raw_request(hdev, CP2112_GPIO_CONFIG, buf, sizeof(buf),
-                                HID_FEATURE_REPORT, HID_REQ_SET_REPORT);
+       ret = hid_hw_raw_request(hdev, CP2112_GPIO_CONFIG, buf,
+                                CP2112_GPIO_CONFIG_LENGTH, HID_FEATURE_REPORT,
+                                HID_REQ_SET_REPORT);
        if (ret < 0) {
                hid_err(hdev, "error setting GPIO config: %d\n", ret);
-               return ret;
+               goto exit;
        }
 
-       return 0;
+       ret = 0;
+
+exit:
+       spin_unlock_irqrestore(&dev->lock, flags);
+       return ret <= 0 ? ret : -EIO;
 }
 
 static void cp2112_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
 {
        struct cp2112_device *dev = gpiochip_get_data(chip);
        struct hid_device *hdev = dev->hdev;
-       u8 buf[3];
+       u8 *buf = dev->in_out_buffer;
+       unsigned long flags;
        int ret;
 
+       spin_lock_irqsave(&dev->lock, flags);
+
        buf[0] = CP2112_GPIO_SET;
        buf[1] = value ? 0xff : 0;
        buf[2] = 1 << offset;
 
-       ret = hid_hw_raw_request(hdev, CP2112_GPIO_SET, buf, sizeof(buf),
-                                HID_FEATURE_REPORT, HID_REQ_SET_REPORT);
+       ret = hid_hw_raw_request(hdev, CP2112_GPIO_SET, buf,
+                                CP2112_GPIO_SET_LENGTH, HID_FEATURE_REPORT,
+                                HID_REQ_SET_REPORT);
        if (ret < 0)
                hid_err(hdev, "error setting GPIO values: %d\n", ret);
+
+       spin_unlock_irqrestore(&dev->lock, flags);
 }
 
 static int cp2112_gpio_get(struct gpio_chip *chip, unsigned offset)
 {
        struct cp2112_device *dev = gpiochip_get_data(chip);
        struct hid_device *hdev = dev->hdev;
-       u8 buf[2];
+       u8 *buf = dev->in_out_buffer;
+       unsigned long flags;
        int ret;
 
-       ret = hid_hw_raw_request(hdev, CP2112_GPIO_GET, buf, sizeof(buf),
-                                      HID_FEATURE_REPORT, HID_REQ_GET_REPORT);
-       if (ret != sizeof(buf)) {
+       spin_lock_irqsave(&dev->lock, flags);
+
+       ret = hid_hw_raw_request(hdev, CP2112_GPIO_GET, buf,
+                                CP2112_GPIO_GET_LENGTH, HID_FEATURE_REPORT,
+                                HID_REQ_GET_REPORT);
+       if (ret != CP2112_GPIO_GET_LENGTH) {
                hid_err(hdev, "error requesting GPIO values: %d\n", ret);
-               return ret;
+               ret = ret < 0 ? ret : -EIO;
+               goto exit;
        }
 
-       return (buf[1] >> offset) & 1;
+       ret = (buf[1] >> offset) & 1;
+
+exit:
+       spin_unlock_irqrestore(&dev->lock, flags);
+
+       return ret;
 }
 
 static int cp2112_gpio_direction_output(struct gpio_chip *chip,
@@ -234,27 +265,33 @@ static int cp2112_gpio_direction_output(struct gpio_chip *chip,
 {
        struct cp2112_device *dev = gpiochip_get_data(chip);
        struct hid_device *hdev = dev->hdev;
-       u8 buf[5];
+       u8 *buf = dev->in_out_buffer;
+       unsigned long flags;
        int ret;
 
+       spin_lock_irqsave(&dev->lock, flags);
+
        ret = hid_hw_raw_request(hdev, CP2112_GPIO_CONFIG, buf,
-                                      sizeof(buf), HID_FEATURE_REPORT,
-                                      HID_REQ_GET_REPORT);
-       if (ret != sizeof(buf)) {
+                                CP2112_GPIO_CONFIG_LENGTH, HID_FEATURE_REPORT,
+                                HID_REQ_GET_REPORT);
+       if (ret != CP2112_GPIO_CONFIG_LENGTH) {
                hid_err(hdev, "error requesting GPIO config: %d\n", ret);
-               return ret;
+               goto fail;
        }
 
        buf[1] |= 1 << offset;
        buf[2] = gpio_push_pull;
 
-       ret = hid_hw_raw_request(hdev, CP2112_GPIO_CONFIG, buf, sizeof(buf),
-                                HID_FEATURE_REPORT, HID_REQ_SET_REPORT);
+       ret = hid_hw_raw_request(hdev, CP2112_GPIO_CONFIG, buf,
+                                CP2112_GPIO_CONFIG_LENGTH, HID_FEATURE_REPORT,
+                                HID_REQ_SET_REPORT);
        if (ret < 0) {
                hid_err(hdev, "error setting GPIO config: %d\n", ret);
-               return ret;
+               goto fail;
        }
 
+       spin_unlock_irqrestore(&dev->lock, flags);
+
        /*
         * Set gpio value when output direction is already set,
         * as specified in AN495, Rev. 0.2, cpt. 4.4
@@ -262,6 +299,10 @@ static int cp2112_gpio_direction_output(struct gpio_chip *chip,
        cp2112_gpio_set(chip, offset, value);
 
        return 0;
+
+fail:
+       spin_unlock_irqrestore(&dev->lock, flags);
+       return ret < 0 ? ret : -EIO;
 }
 
 static int cp2112_hid_get(struct hid_device *hdev, unsigned char report_number,
@@ -1007,6 +1048,17 @@ static int cp2112_probe(struct hid_device *hdev, const struct hid_device_id *id)
        struct cp2112_smbus_config_report config;
        int ret;
 
+       dev = devm_kzalloc(&hdev->dev, sizeof(*dev), GFP_KERNEL);
+       if (!dev)
+               return -ENOMEM;
+
+       dev->in_out_buffer = devm_kzalloc(&hdev->dev, CP2112_REPORT_MAX_LENGTH,
+                                         GFP_KERNEL);
+       if (!dev->in_out_buffer)
+               return -ENOMEM;
+
+       spin_lock_init(&dev->lock);
+
        ret = hid_parse(hdev);
        if (ret) {
                hid_err(hdev, "parse failed\n");
@@ -1063,12 +1115,6 @@ static int cp2112_probe(struct hid_device *hdev, const struct hid_device_id *id)
                goto err_power_normal;
        }
 
-       dev = kzalloc(sizeof(*dev), GFP_KERNEL);
-       if (!dev) {
-               ret = -ENOMEM;
-               goto err_power_normal;
-       }
-
        hid_set_drvdata(hdev, (void *)dev);
        dev->hdev               = hdev;
        dev->adap.owner         = THIS_MODULE;
@@ -1087,7 +1133,7 @@ static int cp2112_probe(struct hid_device *hdev, const struct hid_device_id *id)
 
        if (ret) {
                hid_err(hdev, "error registering i2c adapter\n");
-               goto err_free_dev;
+               goto err_power_normal;
        }
 
        hid_dbg(hdev, "adapter registered\n");
@@ -1123,8 +1169,6 @@ err_gpiochip_remove:
        gpiochip_remove(&dev->gc);
 err_free_i2c:
        i2c_del_adapter(&dev->adap);
-err_free_dev:
-       kfree(dev);
 err_power_normal:
        hid_hw_power(hdev, PM_HINT_NORMAL);
 err_hid_close:
@@ -1149,7 +1193,6 @@ static void cp2112_remove(struct hid_device *hdev)
         */
        hid_hw_close(hdev);
        hid_hw_stop(hdev);
-       kfree(dev);
 }
 
 static int cp2112_raw_event(struct hid_device *hdev, struct hid_report *report,
index 8fd4bf77f264940ec04631252062e06aafe148c8..818ea7d935333046adc5036d141ffa6cf6be5c40 100644 (file)
@@ -234,58 +234,6 @@ static __u8 pid0011_rdesc_fixed[] = {
        0xC0                /*  End Collection                  */
 };
 
-static __u8 pid0006_rdesc_fixed[] = {
-       0x05, 0x01,        /* Usage Page (Generic Desktop)      */
-       0x09, 0x04,        /* Usage (Joystick)                  */
-       0xA1, 0x01,        /* Collection (Application)          */
-       0xA1, 0x02,        /*   Collection (Logical)            */
-       0x75, 0x08,        /*     Report Size (8)               */
-       0x95, 0x05,        /*     Report Count (5)              */
-       0x15, 0x00,        /*     Logical Minimum (0)           */
-       0x26, 0xFF, 0x00,  /*     Logical Maximum (255)         */
-       0x35, 0x00,        /*     Physical Minimum (0)          */
-       0x46, 0xFF, 0x00,  /*     Physical Maximum (255)        */
-       0x09, 0x30,        /*     Usage (X)                     */
-       0x09, 0x33,        /*     Usage (Ry)                    */
-       0x09, 0x32,        /*     Usage (Z)                     */
-       0x09, 0x31,        /*     Usage (Y)                     */
-       0x09, 0x34,        /*     Usage (Ry)                    */
-       0x81, 0x02,        /*     Input (Variable)              */
-       0x75, 0x04,        /*     Report Size (4)               */
-       0x95, 0x01,        /*     Report Count (1)              */
-       0x25, 0x07,        /*     Logical Maximum (7)           */
-       0x46, 0x3B, 0x01,  /*     Physical Maximum (315)        */
-       0x65, 0x14,        /*     Unit (Centimeter)             */
-       0x09, 0x39,        /*     Usage (Hat switch)            */
-       0x81, 0x42,        /*     Input (Variable)              */
-       0x65, 0x00,        /*     Unit (None)                   */
-       0x75, 0x01,        /*     Report Size (1)               */
-       0x95, 0x0C,        /*     Report Count (12)             */
-       0x25, 0x01,        /*     Logical Maximum (1)           */
-       0x45, 0x01,        /*     Physical Maximum (1)          */
-       0x05, 0x09,        /*     Usage Page (Button)           */
-       0x19, 0x01,        /*     Usage Minimum (0x01)          */
-       0x29, 0x0C,        /*     Usage Maximum (0x0C)          */
-       0x81, 0x02,        /*     Input (Variable)              */
-       0x06, 0x00, 0xFF,  /*     Usage Page (Vendor Defined)   */
-       0x75, 0x01,        /*     Report Size (1)               */
-       0x95, 0x08,        /*     Report Count (8)              */
-       0x25, 0x01,        /*     Logical Maximum (1)           */
-       0x45, 0x01,        /*     Physical Maximum (1)          */
-       0x09, 0x01,        /*     Usage (0x01)                  */
-       0x81, 0x02,        /*     Input (Variable)              */
-       0xC0,              /*   End Collection                  */
-       0xA1, 0x02,        /*   Collection (Logical)            */
-       0x75, 0x08,        /*     Report Size (8)               */
-       0x95, 0x07,        /*     Report Count (7)              */
-       0x46, 0xFF, 0x00,  /*     Physical Maximum (255)        */
-       0x26, 0xFF, 0x00,  /*     Logical Maximum (255)         */
-       0x09, 0x02,        /*     Usage (0x02)                  */
-       0x91, 0x02,        /*     Output (Variable)             */
-       0xC0,              /*   End Collection                  */
-       0xC0               /* End Collection                    */
-};
-
 static __u8 *dr_report_fixup(struct hid_device *hdev, __u8 *rdesc,
                                unsigned int *rsize)
 {
@@ -296,16 +244,34 @@ static __u8 *dr_report_fixup(struct hid_device *hdev, __u8 *rdesc,
                        *rsize = sizeof(pid0011_rdesc_fixed);
                }
                break;
-       case 0x0006:
-               if (*rsize == sizeof(pid0006_rdesc_fixed)) {
-                       rdesc = pid0006_rdesc_fixed;
-                       *rsize = sizeof(pid0006_rdesc_fixed);
-               }
-               break;
        }
        return rdesc;
 }
 
+#define map_abs(c)      hid_map_usage(hi, usage, bit, max, EV_ABS, (c))
+#define map_rel(c)      hid_map_usage(hi, usage, bit, max, EV_REL, (c))
+
+static int dr_input_mapping(struct hid_device *hdev, struct hid_input *hi,
+                           struct hid_field *field, struct hid_usage *usage,
+                           unsigned long **bit, int *max)
+{
+       switch (usage->hid) {
+       /*
+        * revert to the old hid-input behavior where axes
+        * can be randomly assigned when hid->usage is reused.
+        */
+       case HID_GD_X: case HID_GD_Y: case HID_GD_Z:
+       case HID_GD_RX: case HID_GD_RY: case HID_GD_RZ:
+               if (field->flags & HID_MAIN_ITEM_RELATIVE)
+                       map_rel(usage->hid & 0xf);
+               else
+                       map_abs(usage->hid & 0xf);
+               return 1;
+       }
+
+       return 0;
+}
+
 static int dr_probe(struct hid_device *hdev, const struct hid_device_id *id)
 {
        int ret;
@@ -352,6 +318,7 @@ static struct hid_driver dr_driver = {
        .id_table = dr_devices,
        .report_fixup = dr_report_fixup,
        .probe = dr_probe,
+       .input_mapping = dr_input_mapping,
 };
 module_hid_driver(dr_driver);
 
index cd59c79eebdd2bd896c1ff19d687f25686d7fb35..575aa65436d182c31fb874a432807688747dd785 100644 (file)
@@ -64,6 +64,9 @@
 #define USB_VENDOR_ID_AKAI             0x2011
 #define USB_DEVICE_ID_AKAI_MPKMINI2    0x0715
 
+#define USB_VENDOR_ID_AKAI_09E8                0x09E8
+#define USB_DEVICE_ID_AKAI_09E8_MIDIMIX        0x0031
+
 #define USB_VENDOR_ID_ALCOR            0x058f
 #define USB_DEVICE_ID_ALCOR_USBRS232   0x9720
 
 #define USB_DEVICE_ID_ATEN_4PORTKVM    0x2205
 #define USB_DEVICE_ID_ATEN_4PORTKVMC   0x2208
 #define USB_DEVICE_ID_ATEN_CS682       0x2213
+#define USB_DEVICE_ID_ATEN_CS692       0x8021
 
 #define USB_VENDOR_ID_ATMEL            0x03eb
 #define USB_DEVICE_ID_ATMEL_MULTITOUCH 0x211c
index d8d55f37b4f5604e2d10672513267c483726f05e..d3e1ab162f7c6ade6e2a83053ec1d719f1a08daa 100644 (file)
@@ -100,6 +100,7 @@ struct hidled_device {
        const struct hidled_config *config;
        struct hid_device       *hdev;
        struct hidled_rgb       *rgb;
+       u8                      *buf;
        struct mutex            lock;
 };
 
@@ -118,13 +119,19 @@ static int hidled_send(struct hidled_device *ldev, __u8 *buf)
 
        mutex_lock(&ldev->lock);
 
+       /*
+        * buffer provided to hid_hw_raw_request must not be on the stack
+        * and must not be part of a data structure
+        */
+       memcpy(ldev->buf, buf, ldev->config->report_size);
+
        if (ldev->config->report_type == RAW_REQUEST)
-               ret = hid_hw_raw_request(ldev->hdev, buf[0], buf,
+               ret = hid_hw_raw_request(ldev->hdev, buf[0], ldev->buf,
                                         ldev->config->report_size,
                                         HID_FEATURE_REPORT,
                                         HID_REQ_SET_REPORT);
        else if (ldev->config->report_type == OUTPUT_REPORT)
-               ret = hid_hw_output_report(ldev->hdev, buf,
+               ret = hid_hw_output_report(ldev->hdev, ldev->buf,
                                           ldev->config->report_size);
        else
                ret = -EINVAL;
@@ -147,17 +154,21 @@ static int hidled_recv(struct hidled_device *ldev, __u8 *buf)
 
        mutex_lock(&ldev->lock);
 
-       ret = hid_hw_raw_request(ldev->hdev, buf[0], buf,
+       memcpy(ldev->buf, buf, ldev->config->report_size);
+
+       ret = hid_hw_raw_request(ldev->hdev, buf[0], ldev->buf,
                                 ldev->config->report_size,
                                 HID_FEATURE_REPORT,
                                 HID_REQ_SET_REPORT);
        if (ret < 0)
                goto err;
 
-       ret = hid_hw_raw_request(ldev->hdev, buf[0], buf,
+       ret = hid_hw_raw_request(ldev->hdev, buf[0], ldev->buf,
                                 ldev->config->report_size,
                                 HID_FEATURE_REPORT,
                                 HID_REQ_GET_REPORT);
+
+       memcpy(buf, ldev->buf, ldev->config->report_size);
 err:
        mutex_unlock(&ldev->lock);
 
@@ -447,6 +458,10 @@ static int hidled_probe(struct hid_device *hdev, const struct hid_device_id *id)
        if (!ldev)
                return -ENOMEM;
 
+       ldev->buf = devm_kmalloc(&hdev->dev, MAX_REPORT_SIZE, GFP_KERNEL);
+       if (!ldev->buf)
+               return -ENOMEM;
+
        ret = hid_parse(hdev);
        if (ret)
                return ret;
index 76f644deb0a75cab6d6810517d4196465be18314..c5c5fbe9d60577f44085d86a7fb5cf60efb6acd3 100644 (file)
@@ -756,11 +756,16 @@ static int lg_probe(struct hid_device *hdev, const struct hid_device_id *id)
 
        /* Setup wireless link with Logitech Wii wheel */
        if (hdev->product == USB_DEVICE_ID_LOGITECH_WII_WHEEL) {
-               unsigned char buf[] = { 0x00, 0xAF,  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
+               const unsigned char cbuf[] = { 0x00, 0xAF,  0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
+               u8 *buf = kmemdup(cbuf, sizeof(cbuf), GFP_KERNEL);
 
-               ret = hid_hw_raw_request(hdev, buf[0], buf, sizeof(buf),
-                                       HID_FEATURE_REPORT, HID_REQ_SET_REPORT);
+               if (!buf) {
+                       ret = -ENOMEM;
+                       goto err_free;
+               }
 
+               ret = hid_hw_raw_request(hdev, buf[0], buf, sizeof(cbuf),
+                                       HID_FEATURE_REPORT, HID_REQ_SET_REPORT);
                if (ret >= 0) {
                        /* insert a little delay of 10 jiffies ~ 40ms */
                        wait_queue_head_t wait;
@@ -772,9 +777,10 @@ static int lg_probe(struct hid_device *hdev, const struct hid_device_id *id)
                        buf[1] = 0xB2;
                        get_random_bytes(&buf[2], 2);
 
-                       ret = hid_hw_raw_request(hdev, buf[0], buf, sizeof(buf),
+                       ret = hid_hw_raw_request(hdev, buf[0], buf, sizeof(cbuf),
                                        HID_FEATURE_REPORT, HID_REQ_SET_REPORT);
                }
+               kfree(buf);
        }
 
        if (drv_data->quirks & LG_FF)
index d6fa496d0ca25c17035233315ba9f5d5ebddc4fd..20b40ad2632503754685b84cc07d8787a4a44515 100644 (file)
@@ -493,7 +493,8 @@ static int magicmouse_input_configured(struct hid_device *hdev,
 static int magicmouse_probe(struct hid_device *hdev,
        const struct hid_device_id *id)
 {
-       __u8 feature[] = { 0xd7, 0x01 };
+       const u8 feature[] = { 0xd7, 0x01 };
+       u8 *buf;
        struct magicmouse_sc *msc;
        struct hid_report *report;
        int ret;
@@ -544,6 +545,12 @@ static int magicmouse_probe(struct hid_device *hdev,
        }
        report->size = 6;
 
+       buf = kmemdup(feature, sizeof(feature), GFP_KERNEL);
+       if (!buf) {
+               ret = -ENOMEM;
+               goto err_stop_hw;
+       }
+
        /*
         * Some devices repond with 'invalid report id' when feature
         * report switching it into multitouch mode is sent to it.
@@ -552,8 +559,9 @@ static int magicmouse_probe(struct hid_device *hdev,
         * but there seems to be no other way of switching the mode.
         * Thus the super-ugly hacky success check below.
         */
-       ret = hid_hw_raw_request(hdev, feature[0], feature, sizeof(feature),
+       ret = hid_hw_raw_request(hdev, buf[0], buf, sizeof(feature),
                                HID_FEATURE_REPORT, HID_REQ_SET_REPORT);
+       kfree(buf);
        if (ret != -EIO && ret != sizeof(feature)) {
                hid_err(hdev, "unable to request touch data (%d)\n", ret);
                goto err_stop_hw;
index 9cd2ca34a6be5583dbcd82a64feb018f66b20bf8..be89bcbf6a71b23f266c8bb0b38b0aa66cca1ae0 100644 (file)
@@ -188,10 +188,16 @@ static int rmi_set_page(struct hid_device *hdev, u8 page)
 static int rmi_set_mode(struct hid_device *hdev, u8 mode)
 {
        int ret;
-       u8 txbuf[2] = {RMI_SET_RMI_MODE_REPORT_ID, mode};
+       const u8 txbuf[2] = {RMI_SET_RMI_MODE_REPORT_ID, mode};
+       u8 *buf;
 
-       ret = hid_hw_raw_request(hdev, RMI_SET_RMI_MODE_REPORT_ID, txbuf,
+       buf = kmemdup(txbuf, sizeof(txbuf), GFP_KERNEL);
+       if (!buf)
+               return -ENOMEM;
+
+       ret = hid_hw_raw_request(hdev, RMI_SET_RMI_MODE_REPORT_ID, buf,
                        sizeof(txbuf), HID_FEATURE_REPORT, HID_REQ_SET_REPORT);
+       kfree(buf);
        if (ret < 0) {
                dev_err(&hdev->dev, "unable to set rmi mode to %d (%d)\n", mode,
                        ret);
index 5614fee82347f34b0ad96d906d800d3de7906052..3a84aaf1418b45c725531903b41e29ae4ce62236 100644 (file)
@@ -292,11 +292,11 @@ static ssize_t show_value(struct device *dev, struct device_attribute *attr,
        bool input = false;
        int value = 0;
 
-       if (sscanf(attr->attr.name, "feature-%d-%x-%s", &index, &usage,
+       if (sscanf(attr->attr.name, "feature-%x-%x-%s", &index, &usage,
                   name) == 3) {
                feature = true;
                field_index = index + sensor_inst->input_field_count;
-       } else if (sscanf(attr->attr.name, "input-%d-%x-%s", &index, &usage,
+       } else if (sscanf(attr->attr.name, "input-%x-%x-%s", &index, &usage,
                   name) == 3) {
                input = true;
                field_index = index;
@@ -398,7 +398,7 @@ static ssize_t store_value(struct device *dev, struct device_attribute *attr,
        char name[HID_CUSTOM_NAME_LENGTH];
        int value;
 
-       if (sscanf(attr->attr.name, "feature-%d-%x-%s", &index, &usage,
+       if (sscanf(attr->attr.name, "feature-%x-%x-%s", &index, &usage,
                   name) == 3) {
                field_index = index + sensor_inst->input_field_count;
        } else
index 658a607dc6d9eb1f7c57da3c4afe065e32ffd156..60875625cbdff45725532b92a395fe7f329163ee 100644 (file)
@@ -212,6 +212,7 @@ int sensor_hub_set_feature(struct hid_sensor_hub_device *hsdev, u32 report_id,
        __s32 value;
        int ret = 0;
 
+       memset(buffer, 0, buffer_size);
        mutex_lock(&data->mutex);
        report = sensor_hub_report(report_id, hsdev->hdev, HID_FEATURE_REPORT);
        if (!report || (field_index >= report->maxfield)) {
@@ -251,6 +252,9 @@ int sensor_hub_get_feature(struct hid_sensor_hub_device *hsdev, u32 report_id,
        struct sensor_hub_data *data = hid_get_drvdata(hsdev->hdev);
        int report_size;
        int ret = 0;
+       u8 *val_ptr;
+       int buffer_index = 0;
+       int i;
 
        mutex_lock(&data->mutex);
        report = sensor_hub_report(report_id, hsdev->hdev, HID_FEATURE_REPORT);
@@ -271,7 +275,17 @@ int sensor_hub_get_feature(struct hid_sensor_hub_device *hsdev, u32 report_id,
                goto done_proc;
        }
        ret = min(report_size, buffer_size);
-       memcpy(buffer, report->field[field_index]->value, ret);
+
+       val_ptr = (u8 *)report->field[field_index]->value;
+       for (i = 0; i < report->field[field_index]->report_count; ++i) {
+               if (buffer_index >= ret)
+                       break;
+
+               memcpy(&((u8 *)buffer)[buffer_index], val_ptr,
+                      report->field[field_index]->report_size / 8);
+               val_ptr += sizeof(__s32);
+               buffer_index += (report->field[field_index]->report_size / 8);
+       }
 
 done_proc:
        mutex_unlock(&data->mutex);
index e2517c11e0ee053c68ff27c5f11325bcf601ba2a..0c9ac4d5d85007e52e45c7f2a3c413c304901264 100644 (file)
@@ -637,6 +637,58 @@ eoi:
        return  IRQ_HANDLED;
 }
 
+/**
+ * ish_disable_dma() - disable dma communication between host and ISHFW
+ * @dev: ishtp device pointer
+ *
+ * Clear the dma enable bit and wait for dma inactive.
+ *
+ * Return: 0 for success else error code.
+ */
+static int ish_disable_dma(struct ishtp_device *dev)
+{
+       unsigned int    dma_delay;
+
+       /* Clear the dma enable bit */
+       ish_reg_write(dev, IPC_REG_ISH_RMP2, 0);
+
+       /* wait for dma inactive */
+       for (dma_delay = 0; dma_delay < MAX_DMA_DELAY &&
+               _ish_read_fw_sts_reg(dev) & (IPC_ISH_IN_DMA);
+               dma_delay += 5)
+               mdelay(5);
+
+       if (dma_delay >= MAX_DMA_DELAY) {
+               dev_err(dev->devc,
+                       "Wait for DMA inactive timeout\n");
+               return  -EBUSY;
+       }
+
+       return 0;
+}
+
+/**
+ * ish_wakeup() - wakeup ishfw from waiting-for-host state
+ * @dev: ishtp device pointer
+ *
+ * Set the dma enable bit and send a void message to FW,
+ * it wil wakeup FW from waiting-for-host state.
+ */
+static void ish_wakeup(struct ishtp_device *dev)
+{
+       /* Set dma enable bit */
+       ish_reg_write(dev, IPC_REG_ISH_RMP2, IPC_RMP2_DMA_ENABLED);
+
+       /*
+        * Send 0 IPC message so that ISH FW wakes up if it was already
+        * asleep.
+        */
+       ish_reg_write(dev, IPC_REG_HOST2ISH_DRBL, IPC_DRBL_BUSY_BIT);
+
+       /* Flush writes to doorbell and REMAP2 */
+       ish_reg_read(dev, IPC_REG_ISH_HOST_FWSTS);
+}
+
 /**
  * _ish_hw_reset() - HW reset
  * @dev: ishtp device pointer
@@ -649,7 +701,6 @@ static int _ish_hw_reset(struct ishtp_device *dev)
 {
        struct pci_dev *pdev = dev->pdev;
        int     rv;
-       unsigned int    dma_delay;
        uint16_t csr;
 
        if (!pdev)
@@ -664,15 +715,8 @@ static int _ish_hw_reset(struct ishtp_device *dev)
                return  -EINVAL;
        }
 
-       /* Now trigger reset to FW */
-       ish_reg_write(dev, IPC_REG_ISH_RMP2, 0);
-
-       for (dma_delay = 0; dma_delay < MAX_DMA_DELAY &&
-               _ish_read_fw_sts_reg(dev) & (IPC_ISH_IN_DMA);
-               dma_delay += 5)
-               mdelay(5);
-
-       if (dma_delay >= MAX_DMA_DELAY) {
+       /* Disable dma communication between FW and host */
+       if (ish_disable_dma(dev)) {
                dev_err(&pdev->dev,
                        "Can't reset - stuck with DMA in-progress\n");
                return  -EBUSY;
@@ -690,16 +734,8 @@ static int _ish_hw_reset(struct ishtp_device *dev)
        csr |= PCI_D0;
        pci_write_config_word(pdev, pdev->pm_cap + PCI_PM_CTRL, csr);
 
-       ish_reg_write(dev, IPC_REG_ISH_RMP2, IPC_RMP2_DMA_ENABLED);
-
-       /*
-        * Send 0 IPC message so that ISH FW wakes up if it was already
-        * asleep
-        */
-       ish_reg_write(dev, IPC_REG_HOST2ISH_DRBL, IPC_DRBL_BUSY_BIT);
-
-       /* Flush writes to doorbell and REMAP2 */
-       ish_reg_read(dev, IPC_REG_ISH_HOST_FWSTS);
+       /* Now we can enable ISH DMA operation and wakeup ISHFW */
+       ish_wakeup(dev);
 
        return  0;
 }
@@ -758,16 +794,9 @@ static int _ish_ipc_reset(struct ishtp_device *dev)
 int ish_hw_start(struct ishtp_device *dev)
 {
        ish_set_host_rdy(dev);
-       /* After that we can enable ISH DMA operation */
-       ish_reg_write(dev, IPC_REG_ISH_RMP2, IPC_RMP2_DMA_ENABLED);
 
-       /*
-        * Send 0 IPC message so that ISH FW wakes up if it was already
-        * asleep
-        */
-       ish_reg_write(dev, IPC_REG_HOST2ISH_DRBL, IPC_DRBL_BUSY_BIT);
-       /* Flush write to doorbell */
-       ish_reg_read(dev, IPC_REG_ISH_HOST_FWSTS);
+       /* After that we can enable ISH DMA operation and wakeup ISHFW */
+       ish_wakeup(dev);
 
        set_host_ready(dev);
 
@@ -876,6 +905,21 @@ struct ishtp_device *ish_dev_init(struct pci_dev *pdev)
  */
 void   ish_device_disable(struct ishtp_device *dev)
 {
+       struct pci_dev *pdev = dev->pdev;
+
+       if (!pdev)
+               return;
+
+       /* Disable dma communication between FW and host */
+       if (ish_disable_dma(dev)) {
+               dev_err(&pdev->dev,
+                       "Can't reset - stuck with DMA in-progress\n");
+               return;
+       }
+
+       /* Put ISH to D3hot state for power saving */
+       pci_set_power_state(pdev, PCI_D3hot);
+
        dev->dev_state = ISHTP_DEV_DISABLED;
        ish_clr_host_rdy(dev);
 }
index 42f0beeb09fd4e2ab4fe6ad246f9f21786bf3e3d..20d647d2dd2cbfa5fb57fbc85de236147cf448ab 100644 (file)
@@ -146,7 +146,7 @@ static int ish_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        pdev->dev_flags |= PCI_DEV_FLAGS_NO_D3;
 
        /* request and enable interrupt */
-       ret = request_irq(pdev->irq, ish_irq_handler, IRQF_NO_SUSPEND,
+       ret = request_irq(pdev->irq, ish_irq_handler, IRQF_SHARED,
                          KBUILD_MODNAME, dev);
        if (ret) {
                dev_err(&pdev->dev, "ISH: request IRQ failure (%d)\n",
@@ -202,6 +202,7 @@ static void ish_remove(struct pci_dev *pdev)
        kfree(ishtp_dev);
 }
 
+#ifdef CONFIG_PM
 static struct device *ish_resume_device;
 
 /**
@@ -293,7 +294,6 @@ static int ish_resume(struct device *device)
        return 0;
 }
 
-#ifdef CONFIG_PM
 static const struct dev_pm_ops ish_pm_ops = {
        .suspend = ish_suspend,
        .resume = ish_resume,
@@ -301,7 +301,7 @@ static const struct dev_pm_ops ish_pm_ops = {
 #define ISHTP_ISH_PM_OPS       (&ish_pm_ops)
 #else
 #define ISHTP_ISH_PM_OPS       NULL
-#endif
+#endif /* CONFIG_PM */
 
 static struct pci_driver ish_driver = {
        .name = KBUILD_MODNAME,
index 0a0eca5da47d0274cca10822e3b8d6aded20b6c7..e6cfd323babc62d653146730e4d7325ee8a24696 100644 (file)
@@ -56,12 +56,14 @@ static const struct hid_blacklist {
 
        { USB_VENDOR_ID_AIREN, USB_DEVICE_ID_AIREN_SLIMPLUS, HID_QUIRK_NOGET },
        { USB_VENDOR_ID_AKAI, USB_DEVICE_ID_AKAI_MPKMINI2, HID_QUIRK_NO_INIT_REPORTS },
+       { USB_VENDOR_ID_AKAI_09E8, USB_DEVICE_ID_AKAI_09E8_MIDIMIX, HID_QUIRK_NO_INIT_REPORTS },
        { USB_VENDOR_ID_ATEN, USB_DEVICE_ID_ATEN_UC100KM, HID_QUIRK_NOGET },
        { USB_VENDOR_ID_ATEN, USB_DEVICE_ID_ATEN_CS124U, HID_QUIRK_NOGET },
        { USB_VENDOR_ID_ATEN, USB_DEVICE_ID_ATEN_2PORTKVM, HID_QUIRK_NOGET },
        { USB_VENDOR_ID_ATEN, USB_DEVICE_ID_ATEN_4PORTKVM, HID_QUIRK_NOGET },
        { USB_VENDOR_ID_ATEN, USB_DEVICE_ID_ATEN_4PORTKVMC, HID_QUIRK_NOGET },
        { USB_VENDOR_ID_ATEN, USB_DEVICE_ID_ATEN_CS682, HID_QUIRK_NOGET },
+       { USB_VENDOR_ID_ATEN, USB_DEVICE_ID_ATEN_CS692, HID_QUIRK_NOGET },
        { USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_FIGHTERSTICK, HID_QUIRK_NOGET },
        { USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_COMBATSTICK, HID_QUIRK_NOGET },
        { USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_FLIGHT_SIM_ECLIPSE_YOKE, HID_QUIRK_NOGET },
index 4aa3cb63fd41f4506254187608c995e22359aa28..bcd06306f3e894a379603a4216cadab475be6b69 100644 (file)
@@ -314,10 +314,14 @@ static void heartbeat_onchannelcallback(void *context)
        u8 *hbeat_txf_buf = util_heartbeat.recv_buffer;
        struct icmsg_negotiate *negop = NULL;
 
-       vmbus_recvpacket(channel, hbeat_txf_buf,
-                        PAGE_SIZE, &recvlen, &requestid);
+       while (1) {
+
+               vmbus_recvpacket(channel, hbeat_txf_buf,
+                                PAGE_SIZE, &recvlen, &requestid);
+
+               if (!recvlen)
+                       break;
 
-       if (recvlen > 0) {
                icmsghdrp = (struct icmsg_hdr *)&hbeat_txf_buf[
                                sizeof(struct vmbuspipe_hdr)];
 
index a259e18d22d5b0e3293230e97979a5120d3265fa..0276d2ef06ee5f3d61811b38b77249df1c03f581 100644 (file)
@@ -961,7 +961,7 @@ int vmbus_device_register(struct hv_device *child_device_obj)
 {
        int ret = 0;
 
-       dev_set_name(&child_device_obj->device, "vmbus-%pUl",
+       dev_set_name(&child_device_obj->device, "%pUl",
                     child_device_obj->channel->offermsg.offer.if_instance.b);
 
        child_device_obj->device.bus = &hv_bus;
index 98114cef1e43962eb433560700d0103a96ed7050..2fe1828bd10b79fc18f1dfbe1f2e0e9c00e99cd3 100644 (file)
@@ -194,10 +194,10 @@ static struct adm9240_data *adm9240_update_device(struct device *dev)
                 * 0.5'C per two measurement cycles thus ignore possible
                 * but unlikely aliasing error on lsb reading. --Grant
                 */
-               data->temp = ((i2c_smbus_read_byte_data(client,
+               data->temp = (i2c_smbus_read_byte_data(client,
                                        ADM9240_REG_TEMP) << 8) |
                                        i2c_smbus_read_byte_data(client,
-                                       ADM9240_REG_TEMP_CONF)) / 128;
+                                       ADM9240_REG_TEMP_CONF);
 
                for (i = 0; i < 2; i++) { /* read fans */
                        data->fan[i] = i2c_smbus_read_byte_data(client,
@@ -263,7 +263,7 @@ static ssize_t show_temp(struct device *dev, struct device_attribute *dummy,
                char *buf)
 {
        struct adm9240_data *data = adm9240_update_device(dev);
-       return sprintf(buf, "%d\n", data->temp * 500); /* 9-bit value */
+       return sprintf(buf, "%d\n", data->temp / 128 * 500); /* 9-bit value */
 }
 
 static ssize_t show_max(struct device *dev, struct device_attribute *devattr,
index adae6848ffb2311b46d59a509fb0883e49cfd6e5..a74c075a30ec49608308ddd45715f1addc104c84 100644 (file)
@@ -536,8 +536,10 @@ __hwmon_device_register(struct device *dev, const char *name, void *drvdata,
 
                hwdev->groups = devm_kcalloc(dev, ngroups, sizeof(*groups),
                                             GFP_KERNEL);
-               if (!hwdev->groups)
-                       return ERR_PTR(-ENOMEM);
+               if (!hwdev->groups) {
+                       err = -ENOMEM;
+                       goto free_hwmon;
+               }
 
                attrs = __hwmon_create_attrs(dev, drvdata, chip);
                if (IS_ERR(attrs)) {
index bef84e08597307893b699a854b7d97b94dca234c..c1b9275978f9d9ee9172e99e569de7dca48b491d 100644 (file)
@@ -268,11 +268,13 @@ static int max31790_read_pwm(struct device *dev, u32 attr, int channel,
                             long *val)
 {
        struct max31790_data *data = max31790_update_device(dev);
-       u8 fan_config = data->fan_config[channel];
+       u8 fan_config;
 
        if (IS_ERR(data))
                return PTR_ERR(data);
 
+       fan_config = data->fan_config[channel];
+
        switch (attr) {
        case hwmon_pwm_input:
                *val = data->pwm[channel] >> 8;
index d223650a97e426e582b2cfb58db3ecaedcf7cd4a..11edabf425ae34aac0b72d7c2811c714fa5796f2 100644 (file)
@@ -59,7 +59,6 @@ config I2C_CHARDEV
 
 config I2C_MUX
        tristate "I2C bus multiplexing support"
-       depends on HAS_IOMEM
        help
          Say Y here if you want the I2C core to support the ability to
          handle multiplexed I2C bus topologies, by presenting each
index 6d94e2ec5b4f7183734fbd1159db5d7013fa6f74..d252276feadf6b0b05cbe370330ed7cef5a16857 100644 (file)
@@ -79,12 +79,12 @@ config I2C_AMD8111
 
 config I2C_HIX5HD2
        tristate "Hix5hd2 high-speed I2C driver"
-       depends on ARCH_HIX5HD2 || COMPILE_TEST
+       depends on ARCH_HISI || ARCH_HIX5HD2 || COMPILE_TEST
        help
-         Say Y here to include support for high-speed I2C controller in the
-         Hisilicon based hix5hd2 SoCs.
+         Say Y here to include support for the high-speed I2C controller
+         used in HiSilicon hix5hd2 SoCs.
 
-         This driver can also be built as a module.  If so, the module
+         This driver can also be built as a module. If so, the module
          will be called i2c-hix5hd2.
 
 config I2C_I801
@@ -589,10 +589,10 @@ config I2C_IMG
 
 config I2C_IMX
        tristate "IMX I2C interface"
-       depends on ARCH_MXC || ARCH_LAYERSCAPE
+       depends on ARCH_MXC || ARCH_LAYERSCAPE || COLDFIRE
        help
          Say Y here if you want to use the IIC bus controller on
-         the Freescale i.MX/MXC or Layerscape processors.
+         the Freescale i.MX/MXC, Layerscape or ColdFire processors.
 
          This driver can also be built as a module.  If so, the module
          will be called i2c-imx.
index 1fe93c43215cf9e5d26385727e7c4f35a7e2fe89..b403fa5ecf4994c538b9b1d005f9658a93a45072 100644 (file)
                                         DW_IC_INTR_TX_ABRT | \
                                         DW_IC_INTR_STOP_DET)
 
-#define DW_IC_STATUS_ACTIVITY          0x1
-#define DW_IC_STATUS_TFE               BIT(2)
-#define DW_IC_STATUS_MST_ACTIVITY      BIT(5)
+#define DW_IC_STATUS_ACTIVITY  0x1
+
+#define DW_IC_SDA_HOLD_RX_SHIFT                16
+#define DW_IC_SDA_HOLD_RX_MASK         GENMASK(23, DW_IC_SDA_HOLD_RX_SHIFT)
 
 #define DW_IC_ERR_TX_ABRT      0x1
 
@@ -420,12 +421,20 @@ int i2c_dw_init(struct dw_i2c_dev *dev)
        /* Configure SDA Hold Time if required */
        reg = dw_readl(dev, DW_IC_COMP_VERSION);
        if (reg >= DW_IC_SDA_HOLD_MIN_VERS) {
-               if (dev->sda_hold_time) {
-                       dw_writel(dev, dev->sda_hold_time, DW_IC_SDA_HOLD);
-               } else {
+               if (!dev->sda_hold_time) {
                        /* Keep previous hold time setting if no one set it */
                        dev->sda_hold_time = dw_readl(dev, DW_IC_SDA_HOLD);
                }
+               /*
+                * Workaround for avoiding TX arbitration lost in case I2C
+                * slave pulls SDA down "too quickly" after falling egde of
+                * SCL by enabling non-zero SDA RX hold. Specification says it
+                * extends incoming SDA low to high transition while SCL is
+                * high but it apprears to help also above issue.
+                */
+               if (!(dev->sda_hold_time & DW_IC_SDA_HOLD_RX_MASK))
+                       dev->sda_hold_time |= 1 << DW_IC_SDA_HOLD_RX_SHIFT;
+               dw_writel(dev, dev->sda_hold_time, DW_IC_SDA_HOLD);
        } else {
                dev_warn(dev->dev,
                        "Hardware too old to adjust SDA hold time.\n");
@@ -467,25 +476,9 @@ static void i2c_dw_xfer_init(struct dw_i2c_dev *dev)
 {
        struct i2c_msg *msgs = dev->msgs;
        u32 ic_tar = 0;
-       bool enabled;
-
-       enabled = dw_readl(dev, DW_IC_ENABLE_STATUS) & 1;
-
-       if (enabled) {
-               u32 ic_status;
 
-               /*
-                * Only disable adapter if ic_tar and ic_con can't be
-                * dynamically updated
-                */
-               ic_status = dw_readl(dev, DW_IC_STATUS);
-               if (!dev->dynamic_tar_update_enabled ||
-                   (ic_status & DW_IC_STATUS_MST_ACTIVITY) ||
-                   !(ic_status & DW_IC_STATUS_TFE)) {
-                       __i2c_dw_enable_and_wait(dev, false);
-                       enabled = false;
-               }
-       }
+       /* Disable the adapter */
+       __i2c_dw_enable_and_wait(dev, false);
 
        /* if the slave address is ten bit address, enable 10BITADDR */
        if (dev->dynamic_tar_update_enabled) {
@@ -515,8 +508,8 @@ static void i2c_dw_xfer_init(struct dw_i2c_dev *dev)
        /* enforce disabled interrupts (due to HW issues) */
        i2c_dw_disable_int(dev);
 
-       if (!enabled)
-               __i2c_dw_enable(dev, true);
+       /* Enable the adapter */
+       __i2c_dw_enable(dev, true);
 
        /* Clear and enable interrupts */
        dw_readl(dev, DW_IC_CLR_INTR);
@@ -600,7 +593,7 @@ i2c_dw_xfer_msg(struct dw_i2c_dev *dev)
                        if (msgs[dev->msg_write_idx].flags & I2C_M_RD) {
 
                                /* avoid rx buffer overrun */
-                               if (rx_limit - dev->rx_outstanding <= 0)
+                               if (dev->rx_outstanding >= dev->rx_fifo_depth)
                                        break;
 
                                dw_writel(dev, cmd | 0x100, DW_IC_DATA_CMD);
@@ -697,8 +690,7 @@ static int i2c_dw_handle_tx_abort(struct dw_i2c_dev *dev)
 }
 
 /*
- * Prepare controller for a transaction and start transfer by calling
- * i2c_dw_xfer_init()
+ * Prepare controller for a transaction and call i2c_dw_xfer_msg
  */
 static int
 i2c_dw_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num)
@@ -741,13 +733,23 @@ i2c_dw_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num)
                goto done;
        }
 
+       /*
+        * We must disable the adapter before returning and signaling the end
+        * of the current transfer. Otherwise the hardware might continue
+        * generating interrupts which in turn causes a race condition with
+        * the following transfer.  Needs some more investigation if the
+        * additional interrupts are a hardware bug or this driver doesn't
+        * handle them correctly yet.
+        */
+       __i2c_dw_enable(dev, false);
+
        if (dev->msg_err) {
                ret = dev->msg_err;
                goto done;
        }
 
        /* no error */
-       if (likely(!dev->cmd_err)) {
+       if (likely(!dev->cmd_err && !dev->status)) {
                ret = num;
                goto done;
        }
@@ -757,6 +759,11 @@ i2c_dw_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num)
                ret = i2c_dw_handle_tx_abort(dev);
                goto done;
        }
+
+       if (dev->status)
+               dev_err(dev->dev,
+                       "transfer terminated early - interrupt latency too high?\n");
+
        ret = -EIO;
 
 done:
@@ -877,19 +884,9 @@ static irqreturn_t i2c_dw_isr(int this_irq, void *dev_id)
         */
 
 tx_aborted:
-       if ((stat & (DW_IC_INTR_TX_ABRT | DW_IC_INTR_STOP_DET))
-                       || dev->msg_err) {
-               /*
-                * We must disable interruts before returning and signaling
-                * the end of the current transfer. Otherwise the hardware
-                * might continue generating interrupts for non-existent
-                * transfers.
-                */
-               i2c_dw_disable_int(dev);
-               dw_readl(dev, DW_IC_CLR_INTR);
-
+       if ((stat & (DW_IC_INTR_TX_ABRT | DW_IC_INTR_STOP_DET)) || dev->msg_err)
                complete(&dev->cmd_complete);
-       else if (unlikely(dev->accessor_flags & ACCESS_INTR_MASK)) {
+       else if (unlikely(dev->accessor_flags & ACCESS_INTR_MASK)) {
                /* workaround to trigger pending interrupt */
                stat = dw_readl(dev, DW_IC_INTR_MASK);
                i2c_dw_disable_int(dev);
index 9604024e0eb0959e4b77816214eaf3ac939c29ca..50813a24c541d216ee32f53e8b52fbde6ab09b5c 100644 (file)
@@ -347,7 +347,7 @@ static int dc_i2c_probe(struct platform_device *pdev)
 
        ret = i2c_add_adapter(&i2c->adap);
        if (ret < 0) {
-               clk_unprepare(i2c->clk);
+               clk_disable_unprepare(i2c->clk);
                return ret;
        }
 
@@ -368,6 +368,7 @@ static const struct of_device_id dc_i2c_match[] = {
        { .compatible = "cnxt,cx92755-i2c" },
        { },
 };
+MODULE_DEVICE_TABLE(of, dc_i2c_match);
 
 static struct platform_driver dc_i2c_driver = {
        .probe   = dc_i2c_probe,
index 08847e8b899872e2bc1bffd1d0936f63ee4f38d7..eb3627f35d12002776447982d493835fe36dc064 100644 (file)
 #define SMBHSTCFG_HST_EN       1
 #define SMBHSTCFG_SMB_SMI_EN   2
 #define SMBHSTCFG_I2C_EN       4
+#define SMBHSTCFG_SPD_WD       0x10
 
 /* TCO configuration bits for TCOCTL */
 #define TCOCTL_EN              0x0100
@@ -865,9 +866,16 @@ static s32 i801_access(struct i2c_adapter *adap, u16 addr,
                block = 1;
                break;
        case I2C_SMBUS_I2C_BLOCK_DATA:
-               /* NB: page 240 of ICH5 datasheet shows that the R/#W
-                * bit should be cleared here, even when reading */
-               outb_p((addr & 0x7f) << 1, SMBHSTADD(priv));
+               /*
+                * NB: page 240 of ICH5 datasheet shows that the R/#W
+                * bit should be cleared here, even when reading.
+                * However if SPD Write Disable is set (Lynx Point and later),
+                * the read will fail if we don't set the R/#W bit.
+                */
+               outb_p(((addr & 0x7f) << 1) |
+                      ((priv->original_hstcfg & SMBHSTCFG_SPD_WD) ?
+                       (read_write & 0x01) : 0),
+                      SMBHSTADD(priv));
                if (read_write == I2C_SMBUS_READ) {
                        /* NB: page 240 of ICH5 datasheet also shows
                         * that DATA1 is the cmd field when reading */
@@ -1573,6 +1581,8 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id)
                /* Disable SMBus interrupt feature if SMBus using SMI# */
                priv->features &= ~FEATURE_IRQ;
        }
+       if (temp & SMBHSTCFG_SPD_WD)
+               dev_info(&dev->dev, "SPD Write Disable is set\n");
 
        /* Clear special mode bits */
        if (priv->features & (FEATURE_SMBUS_PEC | FEATURE_BLOCK_BUFFER))
index 592a8f26a708db4cb331da5906c492f7af06a6a8..47fc1f1acff7db60a6cf909a43f6d443c04ca91e 100644 (file)
@@ -1009,10 +1009,13 @@ static int i2c_imx_init_recovery_info(struct imx_i2c_struct *i2c_imx,
        rinfo->sda_gpio = of_get_named_gpio(pdev->dev.of_node, "sda-gpios", 0);
        rinfo->scl_gpio = of_get_named_gpio(pdev->dev.of_node, "scl-gpios", 0);
 
-       if (!gpio_is_valid(rinfo->sda_gpio) ||
-           !gpio_is_valid(rinfo->scl_gpio) ||
-           IS_ERR(i2c_imx->pinctrl_pins_default) ||
-           IS_ERR(i2c_imx->pinctrl_pins_gpio)) {
+       if (rinfo->sda_gpio == -EPROBE_DEFER ||
+           rinfo->scl_gpio == -EPROBE_DEFER) {
+               return -EPROBE_DEFER;
+       } else if (!gpio_is_valid(rinfo->sda_gpio) ||
+                  !gpio_is_valid(rinfo->scl_gpio) ||
+                  IS_ERR(i2c_imx->pinctrl_pins_default) ||
+                  IS_ERR(i2c_imx->pinctrl_pins_gpio)) {
                dev_dbg(&pdev->dev, "recovery information incomplete\n");
                return 0;
        }
index b8ea62105f42c99205c1a241176ebb7210a55c08..30132c3957cdc3d2d76e03a5a82511d9b0a27829 100644 (file)
@@ -729,6 +729,7 @@ static const struct of_device_id jz4780_i2c_of_matches[] = {
        { .compatible = "ingenic,jz4780-i2c", },
        { /* sentinel */ }
 };
+MODULE_DEVICE_TABLE(of, jz4780_i2c_of_matches);
 
 static int jz4780_i2c_probe(struct platform_device *pdev)
 {
index 50702c7bb244beec821e4d04e0352c07ee516a21..df220666d62741f15eb4bbf99c960d3ec4a0f444 100644 (file)
@@ -694,6 +694,8 @@ static int rk3x_i2c_v0_calc_timings(unsigned long clk_rate,
        t_calc->div_low--;
        t_calc->div_high--;
 
+       /* Give the tuning value 0, that would not update con register */
+       t_calc->tuning = 0;
        /* Maximum divider supported by hw is 0xffff */
        if (t_calc->div_low > 0xffff) {
                t_calc->div_low = 0xffff;
index 263685c7a5128773f12dfc00bd4ba75324aeee4c..05cf192ef1acae340397d9ff67f942bca6d08d3e 100644 (file)
@@ -105,7 +105,7 @@ struct slimpro_i2c_dev {
        struct mbox_chan *mbox_chan;
        struct mbox_client mbox_client;
        struct completion rd_complete;
-       u8 dma_buffer[I2C_SMBUS_BLOCK_MAX];
+       u8 dma_buffer[I2C_SMBUS_BLOCK_MAX + 1]; /* dma_buffer[0] is used for length */
        u32 *resp_msg;
 };
 
index 2a972ed7aa0df113185b9fb567e76d11638739e4..e29ff37a43bd615bba54a39402c78d5574af0d92 100644 (file)
@@ -426,6 +426,7 @@ static const struct of_device_id xlp9xx_i2c_of_match[] = {
        { .compatible = "netlogic,xlp980-i2c", },
        { /* sentinel */ },
 };
+MODULE_DEVICE_TABLE(of, xlp9xx_i2c_of_match);
 
 #ifdef CONFIG_ACPI
 static const struct acpi_device_id xlp9xx_i2c_acpi_ids[] = {
index 0968f59b6df58690207b182b4fe192d5422e349b..ad17d88d857361663ae98d8faff52a0d9cedbeab 100644 (file)
@@ -358,6 +358,7 @@ static const struct of_device_id xlr_i2c_dt_ids[] = {
        },
        { }
 };
+MODULE_DEVICE_TABLE(of, xlr_i2c_dt_ids);
 
 static int xlr_i2c_probe(struct platform_device *pdev)
 {
index 98fffa3a09f7fe5e716415c79963b7ac15587b9c..b432b64e307a81740b0ce8a292dc9b80a3921a40 100644 (file)
@@ -1680,7 +1680,8 @@ static struct i2c_client *of_i2c_register_device(struct i2c_adapter *adap,
 
 static void of_i2c_register_devices(struct i2c_adapter *adap)
 {
-       struct device_node *node;
+       struct device_node *bus, *node;
+       struct i2c_client *client;
 
        /* Only register child devices if the adapter has a node pointer set */
        if (!adap->dev.of_node)
@@ -1688,11 +1689,24 @@ static void of_i2c_register_devices(struct i2c_adapter *adap)
 
        dev_dbg(&adap->dev, "of_i2c: walking child nodes\n");
 
-       for_each_available_child_of_node(adap->dev.of_node, node) {
+       bus = of_get_child_by_name(adap->dev.of_node, "i2c-bus");
+       if (!bus)
+               bus = of_node_get(adap->dev.of_node);
+
+       for_each_available_child_of_node(bus, node) {
                if (of_node_test_and_set_flag(node, OF_POPULATED))
                        continue;
-               of_i2c_register_device(adap, node);
+
+               client = of_i2c_register_device(adap, node);
+               if (IS_ERR(client)) {
+                       dev_warn(&adap->dev,
+                                "Failed to create I2C device for %s\n",
+                                node->full_name);
+                       of_node_clear_flag(node, OF_POPULATED);
+               }
        }
+
+       of_node_put(bus);
 }
 
 static int of_dev_node_match(struct device *dev, void *data)
@@ -2165,6 +2179,7 @@ int i2c_register_driver(struct module *owner, struct i2c_driver *driver)
        /* add the driver to the list of i2c drivers in the driver core */
        driver->driver.owner = owner;
        driver->driver.bus = &i2c_bus_type;
+       INIT_LIST_HEAD(&driver->clients);
 
        /* When registration returns, the driver core
         * will have called probe() for all matching-but-unbound devices.
@@ -2175,7 +2190,6 @@ int i2c_register_driver(struct module *owner, struct i2c_driver *driver)
 
        pr_debug("driver [%s] registered\n", driver->driver.name);
 
-       INIT_LIST_HEAD(&driver->clients);
        /* Walk the adapters that are already present */
        i2c_for_each_dev(driver, __process_new_driver);
 
@@ -2293,6 +2307,7 @@ static int of_i2c_notify(struct notifier_block *nb, unsigned long action,
                if (IS_ERR(client)) {
                        dev_err(&adap->dev, "failed to create client for '%s'\n",
                                 rd->dn->full_name);
+                       of_node_clear_flag(rd->dn, OF_POPULATED);
                        return notifier_from_errno(PTR_ERR(client));
                }
                break;
index e280c8ecc0b59bcb76d6ca56830aa30aea01a2e3..96de9ce5669b64daa0d77cc856e20a9ea589c234 100644 (file)
@@ -63,6 +63,7 @@ config I2C_MUX_PINCTRL
 
 config I2C_MUX_REG
        tristate "Register-based I2C multiplexer"
+       depends on HAS_IOMEM
        help
          If you say yes to this option, support will be included for a
          register based I2C multiplexer. This driver provides access to
index b3893f6282ba5b38920388657d5d1a70129a0148..3e6fe1760d82fc9b654540ef6da6b955ca7daa81 100644 (file)
@@ -69,10 +69,28 @@ static int i2c_demux_activate_master(struct i2c_demux_pinctrl_priv *priv, u32 ne
                goto err_with_revert;
        }
 
-       p = devm_pinctrl_get_select(adap->dev.parent, priv->bus_name);
+       /*
+        * Check if there are pinctrl states at all. Note: we cant' use
+        * devm_pinctrl_get_select() because we need to distinguish between
+        * the -ENODEV from devm_pinctrl_get() and pinctrl_lookup_state().
+        */
+       p = devm_pinctrl_get(adap->dev.parent);
        if (IS_ERR(p)) {
                ret = PTR_ERR(p);
-               goto err_with_put;
+               /* continue if just no pinctrl states (e.g. i2c-gpio), otherwise exit */
+               if (ret != -ENODEV)
+                       goto err_with_put;
+       } else {
+               /* there are states. check and use them */
+               struct pinctrl_state *s = pinctrl_lookup_state(p, priv->bus_name);
+
+               if (IS_ERR(s)) {
+                       ret = PTR_ERR(s);
+                       goto err_with_put;
+               }
+               ret = pinctrl_select_state(p, s);
+               if (ret < 0)
+                       goto err_with_put;
        }
 
        priv->chan[new_chan].parent_adap = adap;
index 1091346f2480a6e4c4a79dd1836964894b9c8169..8bc3d36d28379ee9bf834b49d8022a310e55aee4 100644 (file)
@@ -268,9 +268,9 @@ static int pca954x_probe(struct i2c_client *client,
                                /* discard unconfigured channels */
                                break;
                        idle_disconnect_pd = pdata->modes[num].deselect_on_exit;
-                       data->deselect |= (idle_disconnect_pd
-                                          || idle_disconnect_dt) << num;
                }
+               data->deselect |= (idle_disconnect_pd ||
+                                  idle_disconnect_dt) << num;
 
                ret = i2c_mux_add_adapter(muxc, force, num, class);
 
index da3fb069ec5c06dc2bdd99868ee1ad4e114b471a..ce69048c88e98ba9825fbee17e8fd186b94e8d8f 100644 (file)
@@ -743,8 +743,8 @@ static int st_accel_read_raw(struct iio_dev *indio_dev,
 
                return IIO_VAL_INT;
        case IIO_CHAN_INFO_SCALE:
-               *val = 0;
-               *val2 = adata->current_fullscale->gain;
+               *val = adata->current_fullscale->gain / 1000000;
+               *val2 = adata->current_fullscale->gain % 1000000;
                return IIO_VAL_INT_PLUS_MICRO;
        case IIO_CHAN_INFO_SAMP_FREQ:
                *val = adata->odr;
@@ -763,9 +763,13 @@ static int st_accel_write_raw(struct iio_dev *indio_dev,
        int err;
 
        switch (mask) {
-       case IIO_CHAN_INFO_SCALE:
-               err = st_sensors_set_fullscale_by_gain(indio_dev, val2);
+       case IIO_CHAN_INFO_SCALE: {
+               int gain;
+
+               gain = val * 1000000 + val2;
+               err = st_sensors_set_fullscale_by_gain(indio_dev, gain);
                break;
+       }
        case IIO_CHAN_INFO_SAMP_FREQ:
                if (val2)
                        return -EINVAL;
index 7edcf32386206cfb391afaa10055e1dff0b3eb72..99c051490effa736e91d978442e5882fbc2a54f6 100644 (file)
@@ -437,6 +437,8 @@ config STX104
 config TI_ADC081C
        tristate "Texas Instruments ADC081C/ADC101C/ADC121C family"
        depends on I2C
+       select IIO_BUFFER
+       select IIO_TRIGGERED_BUFFER
        help
          If you say yes here you get support for Texas Instruments ADC081C,
          ADC101C and ADC121C ADC chips.
index bd321b305a0a03a38d2a26ec282eb22e7f37d5b6..ef761a5086304b4b032afa6095e2d9a605128caa 100644 (file)
@@ -213,13 +213,14 @@ static int atlas_check_ec_calibration(struct atlas_data *data)
        struct device *dev = &data->client->dev;
        int ret;
        unsigned int val;
+       __be16  rval;
 
-       ret = regmap_bulk_read(data->regmap, ATLAS_REG_EC_PROBE, &val, 2);
+       ret = regmap_bulk_read(data->regmap, ATLAS_REG_EC_PROBE, &rval, 2);
        if (ret)
                return ret;
 
-       dev_info(dev, "probe set to K = %d.%.2d", be16_to_cpu(val) / 100,
-                                                be16_to_cpu(val) % 100);
+       val = be16_to_cpu(rval);
+       dev_info(dev, "probe set to K = %d.%.2d", val / 100, val % 100);
 
        ret = regmap_read(data->regmap, ATLAS_REG_EC_CALIB_STATUS, &val);
        if (ret)
index dc33c1dd5191a57aaa8c3c66cdaa75a31866463c..b5beea53d6f6551aba68e570be25deca87d8288c 100644 (file)
@@ -30,26 +30,26 @@ static struct {
        u32 usage_id;
        int unit; /* 0 for default others from HID sensor spec */
        int scale_val0; /* scale, whole number */
-       int scale_val1; /* scale, fraction in micros */
+       int scale_val1; /* scale, fraction in nanos */
 } unit_conversion[] = {
-       {HID_USAGE_SENSOR_ACCEL_3D, 0, 9, 806650},
+       {HID_USAGE_SENSOR_ACCEL_3D, 0, 9, 806650000},
        {HID_USAGE_SENSOR_ACCEL_3D,
                HID_USAGE_SENSOR_UNITS_METERS_PER_SEC_SQRD, 1, 0},
        {HID_USAGE_SENSOR_ACCEL_3D,
-               HID_USAGE_SENSOR_UNITS_G, 9, 806650},
+               HID_USAGE_SENSOR_UNITS_G, 9, 806650000},
 
-       {HID_USAGE_SENSOR_GYRO_3D, 0, 0, 17453},
+       {HID_USAGE_SENSOR_GYRO_3D, 0, 0, 17453293},
        {HID_USAGE_SENSOR_GYRO_3D,
                HID_USAGE_SENSOR_UNITS_RADIANS_PER_SECOND, 1, 0},
        {HID_USAGE_SENSOR_GYRO_3D,
-               HID_USAGE_SENSOR_UNITS_DEGREES_PER_SECOND, 0, 17453},
+               HID_USAGE_SENSOR_UNITS_DEGREES_PER_SECOND, 0, 17453293},
 
-       {HID_USAGE_SENSOR_COMPASS_3D, 0, 0, 1000},
+       {HID_USAGE_SENSOR_COMPASS_3D, 0, 0, 1000000},
        {HID_USAGE_SENSOR_COMPASS_3D, HID_USAGE_SENSOR_UNITS_GAUSS, 1, 0},
 
-       {HID_USAGE_SENSOR_INCLINOMETER_3D, 0, 0, 17453},
+       {HID_USAGE_SENSOR_INCLINOMETER_3D, 0, 0, 17453293},
        {HID_USAGE_SENSOR_INCLINOMETER_3D,
-               HID_USAGE_SENSOR_UNITS_DEGREES, 0, 17453},
+               HID_USAGE_SENSOR_UNITS_DEGREES, 0, 17453293},
        {HID_USAGE_SENSOR_INCLINOMETER_3D,
                HID_USAGE_SENSOR_UNITS_RADIANS, 1, 0},
 
@@ -57,7 +57,7 @@ static struct {
        {HID_USAGE_SENSOR_ALS, HID_USAGE_SENSOR_UNITS_LUX, 1, 0},
 
        {HID_USAGE_SENSOR_PRESSURE, 0, 100, 0},
-       {HID_USAGE_SENSOR_PRESSURE, HID_USAGE_SENSOR_UNITS_PASCAL, 0, 1000},
+       {HID_USAGE_SENSOR_PRESSURE, HID_USAGE_SENSOR_UNITS_PASCAL, 0, 1000000},
 };
 
 static int pow_10(unsigned power)
@@ -266,15 +266,15 @@ EXPORT_SYMBOL(hid_sensor_write_raw_hyst_value);
 /*
  * This fuction applies the unit exponent to the scale.
  * For example:
- * 9.806650 ->exp:2-> val0[980]val1[665000]
- * 9.000806 ->exp:2-> val0[900]val1[80600]
- * 0.174535 ->exp:2-> val0[17]val1[453500]
- * 1.001745 ->exp:0-> val0[1]val1[1745]
- * 1.001745 ->exp:2-> val0[100]val1[174500]
- * 1.001745 ->exp:4-> val0[10017]val1[450000]
- * 9.806650 ->exp:-2-> val0[0]val1[98066]
+ * 9.806650000 ->exp:2-> val0[980]val1[665000000]
+ * 9.000806000 ->exp:2-> val0[900]val1[80600000]
+ * 0.174535293 ->exp:2-> val0[17]val1[453529300]
+ * 1.001745329 ->exp:0-> val0[1]val1[1745329]
+ * 1.001745329 ->exp:2-> val0[100]val1[174532900]
+ * 1.001745329 ->exp:4-> val0[10017]val1[453290000]
+ * 9.806650000 ->exp:-2-> val0[0]val1[98066500]
  */
-static void adjust_exponent_micro(int *val0, int *val1, int scale0,
+static void adjust_exponent_nano(int *val0, int *val1, int scale0,
                                  int scale1, int exp)
 {
        int i;
@@ -285,32 +285,32 @@ static void adjust_exponent_micro(int *val0, int *val1, int scale0,
        if (exp > 0) {
                *val0 = scale0 * pow_10(exp);
                res = 0;
-               if (exp > 6) {
+               if (exp > 9) {
                        *val1 = 0;
                        return;
                }
                for (i = 0; i < exp; ++i) {
-                       x = scale1 / pow_10(5 - i);
+                       x = scale1 / pow_10(8 - i);
                        res += (pow_10(exp - 1 - i) * x);
-                       scale1 = scale1 % pow_10(5 - i);
+                       scale1 = scale1 % pow_10(8 - i);
                }
                *val0 += res;
                        *val1 = scale1 * pow_10(exp);
        } else if (exp < 0) {
                exp = abs(exp);
-               if (exp > 6) {
+               if (exp > 9) {
                        *val0 = *val1 = 0;
                        return;
                }
                *val0 = scale0 / pow_10(exp);
                rem = scale0 % pow_10(exp);
                res = 0;
-               for (i = 0; i < (6 - exp); ++i) {
-                       x = scale1 / pow_10(5 - i);
-                       res += (pow_10(5 - exp - i) * x);
-                       scale1 = scale1 % pow_10(5 - i);
+               for (i = 0; i < (9 - exp); ++i) {
+                       x = scale1 / pow_10(8 - i);
+                       res += (pow_10(8 - exp - i) * x);
+                       scale1 = scale1 % pow_10(8 - i);
                }
-               *val1 = rem * pow_10(6 - exp) + res;
+               *val1 = rem * pow_10(9 - exp) + res;
        } else {
                *val0 = scale0;
                *val1 = scale1;
@@ -332,14 +332,14 @@ int hid_sensor_format_scale(u32 usage_id,
                        unit_conversion[i].unit == attr_info->units) {
                        exp  = hid_sensor_convert_exponent(
                                                attr_info->unit_expo);
-                       adjust_exponent_micro(val0, val1,
+                       adjust_exponent_nano(val0, val1,
                                        unit_conversion[i].scale_val0,
                                        unit_conversion[i].scale_val1, exp);
                        break;
                }
        }
 
-       return IIO_VAL_INT_PLUS_MICRO;
+       return IIO_VAL_INT_PLUS_NANO;
 }
 EXPORT_SYMBOL(hid_sensor_format_scale);
 
index 285a64a589d7137e7c2f188a851e4a36526a2824..975a1f19f74760e5e1c17c786e36d9a86ae5a2a6 100644 (file)
@@ -612,7 +612,7 @@ EXPORT_SYMBOL(st_sensors_sysfs_sampling_frequency_avail);
 ssize_t st_sensors_sysfs_scale_avail(struct device *dev,
                                struct device_attribute *attr, char *buf)
 {
-       int i, len = 0;
+       int i, len = 0, q, r;
        struct iio_dev *indio_dev = dev_get_drvdata(dev);
        struct st_sensor_data *sdata = iio_priv(indio_dev);
 
@@ -621,8 +621,10 @@ ssize_t st_sensors_sysfs_scale_avail(struct device *dev,
                if (sdata->sensor_settings->fs.fs_avl[i].num == 0)
                        break;
 
-               len += scnprintf(buf + len, PAGE_SIZE - len, "0.%06u ",
-                               sdata->sensor_settings->fs.fs_avl[i].gain);
+               q = sdata->sensor_settings->fs.fs_avl[i].gain / 1000000;
+               r = sdata->sensor_settings->fs.fs_avl[i].gain % 1000000;
+
+               len += scnprintf(buf + len, PAGE_SIZE - len, "%u.%06u ", q, r);
        }
        mutex_unlock(&indio_dev->mlock);
        buf[len - 1] = '\n';
index b98b9d94d184cd058b7ddff7c40e7b9d87f965ac..a97e802ca523138227e9930a585357fd2f873e94 100644 (file)
@@ -335,6 +335,7 @@ static struct platform_driver hid_dev_rot_platform_driver = {
        .id_table = hid_dev_rot_ids,
        .driver = {
                .name   = KBUILD_MODNAME,
+               .pm     = &hid_sensor_pm_ops,
        },
        .probe          = hid_dev_rot_probe,
        .remove         = hid_dev_rot_remove,
index 39dd2026ccc928a7f3c0c1be60948969923594ee..f962f31a5eb223c4f41d273743ed75f30e648a18 100644 (file)
@@ -123,22 +123,26 @@ static int maxim_thermocouple_read(struct maxim_thermocouple_data *data,
 {
        unsigned int storage_bytes = data->chip->read_size;
        unsigned int shift = chan->scan_type.shift + (chan->address * 8);
-       unsigned int buf;
+       __be16 buf16;
+       __be32 buf32;
        int ret;
 
-       ret = spi_read(data->spi, (void *) &buf, storage_bytes);
-       if (ret)
-               return ret;
-
        switch (storage_bytes) {
        case 2:
-               *val = be16_to_cpu(buf);
+               ret = spi_read(data->spi, (void *)&buf16, storage_bytes);
+               *val = be16_to_cpu(buf16);
                break;
        case 4:
-               *val = be32_to_cpu(buf);
+               ret = spi_read(data->spi, (void *)&buf32, storage_bytes);
+               *val = be32_to_cpu(buf32);
                break;
+       default:
+               ret = -EINVAL;
        }
 
+       if (ret)
+               return ret;
+
        /* check to be sure this is a valid reading */
        if (*val & data->chip->status_bit)
                return -EINVAL;
index 19a418a1b631250f3f652c256a75bea30e76f3a3..fb3fb89640e59c3d396e133aab2b47ddd9b6ab15 100644 (file)
@@ -89,4 +89,6 @@ source "drivers/infiniband/sw/rxe/Kconfig"
 
 source "drivers/infiniband/hw/hfi1/Kconfig"
 
+source "drivers/infiniband/hw/qedr/Kconfig"
+
 endif # INFINIBAND
index b136d3acc5bde63f5af8d43f16af3d3107a6db05..0f58f46dbad7e0b4059cbab4ffca95623cad8bb9 100644 (file)
@@ -699,13 +699,16 @@ EXPORT_SYMBOL(rdma_addr_cancel);
 struct resolve_cb_context {
        struct rdma_dev_addr *addr;
        struct completion comp;
+       int status;
 };
 
 static void resolve_cb(int status, struct sockaddr *src_addr,
             struct rdma_dev_addr *addr, void *context)
 {
-       memcpy(((struct resolve_cb_context *)context)->addr, addr, sizeof(struct
-                               rdma_dev_addr));
+       if (!status)
+               memcpy(((struct resolve_cb_context *)context)->addr,
+                      addr, sizeof(struct rdma_dev_addr));
+       ((struct resolve_cb_context *)context)->status = status;
        complete(&((struct resolve_cb_context *)context)->comp);
 }
 
@@ -743,6 +746,10 @@ int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
 
        wait_for_completion(&ctx.comp);
 
+       ret = ctx.status;
+       if (ret)
+               return ret;
+
        memcpy(dmac, dev_addr.dst_dev_addr, ETH_ALEN);
        dev = dev_get_by_index(&init_net, dev_addr.bound_dev_if);
        if (!dev)
index c99525512b3434d24a7882d4f869f6e675da44b2..71c7c4c328ef6bb438273395eb9b2d40667e5da3 100644 (file)
@@ -80,6 +80,8 @@ static struct ib_cm {
        __be32 random_id_operand;
        struct list_head timewait_list;
        struct workqueue_struct *wq;
+       /* Sync on cm change port state */
+       spinlock_t state_lock;
 } cm;
 
 /* Counter indexes ordered by attribute ID */
@@ -161,6 +163,8 @@ struct cm_port {
        struct ib_mad_agent *mad_agent;
        struct kobject port_obj;
        u8 port_num;
+       struct list_head cm_priv_prim_list;
+       struct list_head cm_priv_altr_list;
        struct cm_counter_group counter_group[CM_COUNTER_GROUPS];
 };
 
@@ -241,6 +245,12 @@ struct cm_id_private {
        u8 service_timeout;
        u8 target_ack_delay;
 
+       struct list_head prim_list;
+       struct list_head altr_list;
+       /* Indicates that the send port mad is registered and av is set */
+       int prim_send_port_not_ready;
+       int altr_send_port_not_ready;
+
        struct list_head work_list;
        atomic_t work_count;
 };
@@ -259,20 +269,47 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
        struct ib_mad_agent *mad_agent;
        struct ib_mad_send_buf *m;
        struct ib_ah *ah;
+       struct cm_av *av;
+       unsigned long flags, flags2;
+       int ret = 0;
 
+       /* don't let the port to be released till the agent is down */
+       spin_lock_irqsave(&cm.state_lock, flags2);
+       spin_lock_irqsave(&cm.lock, flags);
+       if (!cm_id_priv->prim_send_port_not_ready)
+               av = &cm_id_priv->av;
+       else if (!cm_id_priv->altr_send_port_not_ready &&
+                (cm_id_priv->alt_av.port))
+               av = &cm_id_priv->alt_av;
+       else {
+               pr_info("%s: not valid CM id\n", __func__);
+               ret = -ENODEV;
+               spin_unlock_irqrestore(&cm.lock, flags);
+               goto out;
+       }
+       spin_unlock_irqrestore(&cm.lock, flags);
+       /* Make sure the port haven't released the mad yet */
        mad_agent = cm_id_priv->av.port->mad_agent;
-       ah = ib_create_ah(mad_agent->qp->pd, &cm_id_priv->av.ah_attr);
-       if (IS_ERR(ah))
-               return PTR_ERR(ah);
+       if (!mad_agent) {
+               pr_info("%s: not a valid MAD agent\n", __func__);
+               ret = -ENODEV;
+               goto out;
+       }
+       ah = ib_create_ah(mad_agent->qp->pd, &av->ah_attr);
+       if (IS_ERR(ah)) {
+               ret = PTR_ERR(ah);
+               goto out;
+       }
 
        m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn,
-                              cm_id_priv->av.pkey_index,
+                              av->pkey_index,
                               0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
                               GFP_ATOMIC,
                               IB_MGMT_BASE_VERSION);
        if (IS_ERR(m)) {
                ib_destroy_ah(ah);
-               return PTR_ERR(m);
+               ret = PTR_ERR(m);
+               goto out;
        }
 
        /* Timeout set by caller if response is expected. */
@@ -282,7 +319,10 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
        atomic_inc(&cm_id_priv->refcount);
        m->context[0] = cm_id_priv;
        *msg = m;
-       return 0;
+
+out:
+       spin_unlock_irqrestore(&cm.state_lock, flags2);
+       return ret;
 }
 
 static int cm_alloc_response_msg(struct cm_port *port,
@@ -352,7 +392,8 @@ static void cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
                           grh, &av->ah_attr);
 }
 
-static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
+static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av,
+                             struct cm_id_private *cm_id_priv)
 {
        struct cm_device *cm_dev;
        struct cm_port *port = NULL;
@@ -387,7 +428,17 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
                             &av->ah_attr);
        av->timeout = path->packet_life_time + 1;
 
-       return 0;
+       spin_lock_irqsave(&cm.lock, flags);
+       if (&cm_id_priv->av == av)
+               list_add_tail(&cm_id_priv->prim_list, &port->cm_priv_prim_list);
+       else if (&cm_id_priv->alt_av == av)
+               list_add_tail(&cm_id_priv->altr_list, &port->cm_priv_altr_list);
+       else
+               ret = -EINVAL;
+
+       spin_unlock_irqrestore(&cm.lock, flags);
+
+       return ret;
 }
 
 static int cm_alloc_id(struct cm_id_private *cm_id_priv)
@@ -677,6 +728,8 @@ struct ib_cm_id *ib_create_cm_id(struct ib_device *device,
        spin_lock_init(&cm_id_priv->lock);
        init_completion(&cm_id_priv->comp);
        INIT_LIST_HEAD(&cm_id_priv->work_list);
+       INIT_LIST_HEAD(&cm_id_priv->prim_list);
+       INIT_LIST_HEAD(&cm_id_priv->altr_list);
        atomic_set(&cm_id_priv->work_count, -1);
        atomic_set(&cm_id_priv->refcount, 1);
        return &cm_id_priv->id;
@@ -892,6 +945,15 @@ retest:
                break;
        }
 
+       spin_lock_irq(&cm.lock);
+       if (!list_empty(&cm_id_priv->altr_list) &&
+           (!cm_id_priv->altr_send_port_not_ready))
+               list_del(&cm_id_priv->altr_list);
+       if (!list_empty(&cm_id_priv->prim_list) &&
+           (!cm_id_priv->prim_send_port_not_ready))
+               list_del(&cm_id_priv->prim_list);
+       spin_unlock_irq(&cm.lock);
+
        cm_free_id(cm_id->local_id);
        cm_deref_id(cm_id_priv);
        wait_for_completion(&cm_id_priv->comp);
@@ -1192,12 +1254,13 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
                goto out;
        }
 
-       ret = cm_init_av_by_path(param->primary_path, &cm_id_priv->av);
+       ret = cm_init_av_by_path(param->primary_path, &cm_id_priv->av,
+                                cm_id_priv);
        if (ret)
                goto error1;
        if (param->alternate_path) {
                ret = cm_init_av_by_path(param->alternate_path,
-                                        &cm_id_priv->alt_av);
+                                        &cm_id_priv->alt_av, cm_id_priv);
                if (ret)
                        goto error1;
        }
@@ -1653,7 +1716,8 @@ static int cm_req_handler(struct cm_work *work)
                        dev_put(gid_attr.ndev);
                }
                work->path[0].gid_type = gid_attr.gid_type;
-               ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av);
+               ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av,
+                                        cm_id_priv);
        }
        if (ret) {
                int err = ib_get_cached_gid(work->port->cm_dev->ib_device,
@@ -1672,7 +1736,8 @@ static int cm_req_handler(struct cm_work *work)
                goto rejected;
        }
        if (req_msg->alt_local_lid) {
-               ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av);
+               ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av,
+                                        cm_id_priv);
                if (ret) {
                        ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID,
                                       &work->path[0].sgid,
@@ -2727,7 +2792,8 @@ int ib_send_cm_lap(struct ib_cm_id *cm_id,
                goto out;
        }
 
-       ret = cm_init_av_by_path(alternate_path, &cm_id_priv->alt_av);
+       ret = cm_init_av_by_path(alternate_path, &cm_id_priv->alt_av,
+                                cm_id_priv);
        if (ret)
                goto out;
        cm_id_priv->alt_av.timeout =
@@ -2839,7 +2905,8 @@ static int cm_lap_handler(struct cm_work *work)
        cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
                                work->mad_recv_wc->recv_buf.grh,
                                &cm_id_priv->av);
-       cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av);
+       cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av,
+                          cm_id_priv);
        ret = atomic_inc_and_test(&cm_id_priv->work_count);
        if (!ret)
                list_add_tail(&work->list, &cm_id_priv->work_list);
@@ -3031,7 +3098,7 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
                return -EINVAL;
 
        cm_id_priv = container_of(cm_id, struct cm_id_private, id);
-       ret = cm_init_av_by_path(param->path, &cm_id_priv->av);
+       ret = cm_init_av_by_path(param->path, &cm_id_priv->av, cm_id_priv);
        if (ret)
                goto out;
 
@@ -3468,7 +3535,9 @@ out:
 static int cm_migrate(struct ib_cm_id *cm_id)
 {
        struct cm_id_private *cm_id_priv;
+       struct cm_av tmp_av;
        unsigned long flags;
+       int tmp_send_port_not_ready;
        int ret = 0;
 
        cm_id_priv = container_of(cm_id, struct cm_id_private, id);
@@ -3477,7 +3546,14 @@ static int cm_migrate(struct ib_cm_id *cm_id)
            (cm_id->lap_state == IB_CM_LAP_UNINIT ||
             cm_id->lap_state == IB_CM_LAP_IDLE)) {
                cm_id->lap_state = IB_CM_LAP_IDLE;
+               /* Swap address vector */
+               tmp_av = cm_id_priv->av;
                cm_id_priv->av = cm_id_priv->alt_av;
+               cm_id_priv->alt_av = tmp_av;
+               /* Swap port send ready state */
+               tmp_send_port_not_ready = cm_id_priv->prim_send_port_not_ready;
+               cm_id_priv->prim_send_port_not_ready = cm_id_priv->altr_send_port_not_ready;
+               cm_id_priv->altr_send_port_not_ready = tmp_send_port_not_ready;
        } else
                ret = -EINVAL;
        spin_unlock_irqrestore(&cm_id_priv->lock, flags);
@@ -3888,6 +3964,9 @@ static void cm_add_one(struct ib_device *ib_device)
                port->cm_dev = cm_dev;
                port->port_num = i;
 
+               INIT_LIST_HEAD(&port->cm_priv_prim_list);
+               INIT_LIST_HEAD(&port->cm_priv_altr_list);
+
                ret = cm_create_port_fs(port);
                if (ret)
                        goto error1;
@@ -3945,6 +4024,8 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data)
 {
        struct cm_device *cm_dev = client_data;
        struct cm_port *port;
+       struct cm_id_private *cm_id_priv;
+       struct ib_mad_agent *cur_mad_agent;
        struct ib_port_modify port_modify = {
                .clr_port_cap_mask = IB_PORT_CM_SUP
        };
@@ -3968,15 +4049,27 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data)
 
                port = cm_dev->port[i-1];
                ib_modify_port(ib_device, port->port_num, 0, &port_modify);
+               /* Mark all the cm_id's as not valid */
+               spin_lock_irq(&cm.lock);
+               list_for_each_entry(cm_id_priv, &port->cm_priv_altr_list, altr_list)
+                       cm_id_priv->altr_send_port_not_ready = 1;
+               list_for_each_entry(cm_id_priv, &port->cm_priv_prim_list, prim_list)
+                       cm_id_priv->prim_send_port_not_ready = 1;
+               spin_unlock_irq(&cm.lock);
                /*
                 * We flush the queue here after the going_down set, this
                 * verify that no new works will be queued in the recv handler,
                 * after that we can call the unregister_mad_agent
                 */
                flush_workqueue(cm.wq);
-               ib_unregister_mad_agent(port->mad_agent);
+               spin_lock_irq(&cm.state_lock);
+               cur_mad_agent = port->mad_agent;
+               port->mad_agent = NULL;
+               spin_unlock_irq(&cm.state_lock);
+               ib_unregister_mad_agent(cur_mad_agent);
                cm_remove_port_fs(port);
        }
+
        device_unregister(cm_dev->device);
        kfree(cm_dev);
 }
@@ -3989,6 +4082,7 @@ static int __init ib_cm_init(void)
        INIT_LIST_HEAD(&cm.device_list);
        rwlock_init(&cm.device_lock);
        spin_lock_init(&cm.lock);
+       spin_lock_init(&cm.state_lock);
        cm.listen_service_table = RB_ROOT;
        cm.listen_service_id = be64_to_cpu(IB_CM_ASSIGN_SERVICE_ID);
        cm.remote_id_table = RB_ROOT;
index 36bf50ebb187eb40195f0ede0d2d3abcc47d8ccb..22fcf284dd8b633e2b43f7617d53d76c2dbac832 100644 (file)
@@ -116,7 +116,7 @@ static LIST_HEAD(dev_list);
 static LIST_HEAD(listen_any_list);
 static DEFINE_MUTEX(lock);
 static struct workqueue_struct *cma_wq;
-static int cma_pernet_id;
+static unsigned int cma_pernet_id;
 
 struct cma_pernet {
        struct idr tcp_ps;
@@ -1094,47 +1094,47 @@ static void cma_save_ib_info(struct sockaddr *src_addr,
        }
 }
 
-static void cma_save_ip4_info(struct sockaddr *src_addr,
-                             struct sockaddr *dst_addr,
+static void cma_save_ip4_info(struct sockaddr_in *src_addr,
+                             struct sockaddr_in *dst_addr,
                              struct cma_hdr *hdr,
                              __be16 local_port)
 {
-       struct sockaddr_in *ip4;
-
        if (src_addr) {
-               ip4 = (struct sockaddr_in *)src_addr;
-               ip4->sin_family = AF_INET;
-               ip4->sin_addr.s_addr = hdr->dst_addr.ip4.addr;
-               ip4->sin_port = local_port;
+               *src_addr = (struct sockaddr_in) {
+                       .sin_family = AF_INET,
+                       .sin_addr.s_addr = hdr->dst_addr.ip4.addr,
+                       .sin_port = local_port,
+               };
        }
 
        if (dst_addr) {
-               ip4 = (struct sockaddr_in *)dst_addr;
-               ip4->sin_family = AF_INET;
-               ip4->sin_addr.s_addr = hdr->src_addr.ip4.addr;
-               ip4->sin_port = hdr->port;
+               *dst_addr = (struct sockaddr_in) {
+                       .sin_family = AF_INET,
+                       .sin_addr.s_addr = hdr->src_addr.ip4.addr,
+                       .sin_port = hdr->port,
+               };
        }
 }
 
-static void cma_save_ip6_info(struct sockaddr *src_addr,
-                             struct sockaddr *dst_addr,
+static void cma_save_ip6_info(struct sockaddr_in6 *src_addr,
+                             struct sockaddr_in6 *dst_addr,
                              struct cma_hdr *hdr,
                              __be16 local_port)
 {
-       struct sockaddr_in6 *ip6;
-
        if (src_addr) {
-               ip6 = (struct sockaddr_in6 *)src_addr;
-               ip6->sin6_family = AF_INET6;
-               ip6->sin6_addr = hdr->dst_addr.ip6;
-               ip6->sin6_port = local_port;
+               *src_addr = (struct sockaddr_in6) {
+                       .sin6_family = AF_INET6,
+                       .sin6_addr = hdr->dst_addr.ip6,
+                       .sin6_port = local_port,
+               };
        }
 
        if (dst_addr) {
-               ip6 = (struct sockaddr_in6 *)dst_addr;
-               ip6->sin6_family = AF_INET6;
-               ip6->sin6_addr = hdr->src_addr.ip6;
-               ip6->sin6_port = hdr->port;
+               *dst_addr = (struct sockaddr_in6) {
+                       .sin6_family = AF_INET6,
+                       .sin6_addr = hdr->src_addr.ip6,
+                       .sin6_port = hdr->port,
+               };
        }
 }
 
@@ -1159,10 +1159,12 @@ static int cma_save_ip_info(struct sockaddr *src_addr,
 
        switch (cma_get_ip_ver(hdr)) {
        case 4:
-               cma_save_ip4_info(src_addr, dst_addr, hdr, port);
+               cma_save_ip4_info((struct sockaddr_in *)src_addr,
+                                 (struct sockaddr_in *)dst_addr, hdr, port);
                break;
        case 6:
-               cma_save_ip6_info(src_addr, dst_addr, hdr, port);
+               cma_save_ip6_info((struct sockaddr_in6 *)src_addr,
+                                 (struct sockaddr_in6 *)dst_addr, hdr, port);
                break;
        default:
                return -EAFNOSUPPORT;
@@ -2436,6 +2438,18 @@ static int iboe_tos_to_sl(struct net_device *ndev, int tos)
        return 0;
 }
 
+static enum ib_gid_type cma_route_gid_type(enum rdma_network_type network_type,
+                                          unsigned long supported_gids,
+                                          enum ib_gid_type default_gid)
+{
+       if ((network_type == RDMA_NETWORK_IPV4 ||
+            network_type == RDMA_NETWORK_IPV6) &&
+           test_bit(IB_GID_TYPE_ROCE_UDP_ENCAP, &supported_gids))
+               return IB_GID_TYPE_ROCE_UDP_ENCAP;
+
+       return default_gid;
+}
+
 static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
 {
        struct rdma_route *route = &id_priv->id.route;
@@ -2461,6 +2475,8 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
        route->num_paths = 1;
 
        if (addr->dev_addr.bound_dev_if) {
+               unsigned long supported_gids;
+
                ndev = dev_get_by_index(&init_net, addr->dev_addr.bound_dev_if);
                if (!ndev) {
                        ret = -ENODEV;
@@ -2484,7 +2500,12 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
 
                route->path_rec->net = &init_net;
                route->path_rec->ifindex = ndev->ifindex;
-               route->path_rec->gid_type = id_priv->gid_type;
+               supported_gids = roce_gid_type_mask_support(id_priv->id.device,
+                                                           id_priv->id.port_num);
+               route->path_rec->gid_type =
+                       cma_route_gid_type(addr->dev_addr.network,
+                                          supported_gids,
+                                          id_priv->gid_type);
        }
        if (!ndev) {
                ret = -ENODEV;
index c68746ce6624cdd7f0fcc9ecd4db851e45c4b497..84b4eff90395e5eb2afc7d66a2f932de8afd28f2 100644 (file)
@@ -94,6 +94,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
        unsigned long dma_attrs = 0;
        struct scatterlist *sg, *sg_list_start;
        int need_release = 0;
+       unsigned int gup_flags = FOLL_WRITE;
 
        if (dmasync)
                dma_attrs |= DMA_ATTR_WRITE_BARRIER;
@@ -174,7 +175,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
 
        cur_base = addr & PAGE_MASK;
 
-       if (npages == 0) {
+       if (npages == 0 || npages > UINT_MAX) {
                ret = -EINVAL;
                goto out;
        }
@@ -183,6 +184,9 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
        if (ret)
                goto out;
 
+       if (!umem->writable)
+               gup_flags |= FOLL_FORCE;
+
        need_release = 1;
        sg_list_start = umem->sg_head.sgl;
 
@@ -190,7 +194,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
                ret = get_user_pages(cur_base,
                                     min_t(unsigned long, npages,
                                           PAGE_SIZE / sizeof (struct page *)),
-                                    1, !umem->writable, page_list, vma_list);
+                                    gup_flags, page_list, vma_list);
 
                if (ret < 0)
                        goto out;
index 75077a018675e1aa77c7955878250ad521c9d25d..1f0fe3217f2386e03caf4b7e14cd228bddac2020 100644 (file)
@@ -527,6 +527,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
        u64 off;
        int j, k, ret = 0, start_idx, npages = 0;
        u64 base_virt_addr;
+       unsigned int flags = 0;
 
        if (access_mask == 0)
                return -EINVAL;
@@ -556,6 +557,9 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
                goto out_put_task;
        }
 
+       if (access_mask & ODP_WRITE_ALLOWED_BIT)
+               flags |= FOLL_WRITE;
+
        start_idx = (user_virt - ib_umem_start(umem)) >> PAGE_SHIFT;
        k = start_idx;
 
@@ -574,8 +578,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
                 */
                npages = get_user_pages_remote(owning_process, owning_mm,
                                user_virt, gup_num_pages,
-                               access_mask & ODP_WRITE_ALLOWED_BIT,
-                               0, local_page_list, NULL);
+                               flags, local_page_list, NULL);
                up_read(&owning_mm->mmap_sem);
 
                if (npages < 0)
index 0012fa58c105ded78fa433b89024b8ab176aced0..44b1104eb168e50b7d598a2c5f756c5681d1e416 100644 (file)
@@ -262,12 +262,9 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
                        container_of(uobj, struct ib_uqp_object, uevent.uobject);
 
                idr_remove_uobj(&ib_uverbs_qp_idr, uobj);
-               if (qp != qp->real_qp) {
-                       ib_close_qp(qp);
-               } else {
+               if (qp == qp->real_qp)
                        ib_uverbs_detach_umcast(qp, uqp);
-                       ib_destroy_qp(qp);
-               }
+               ib_destroy_qp(qp);
                ib_uverbs_release_uevent(file, &uqp->uevent);
                kfree(uqp);
        }
index 21fe401ff178b96ba1c9c7a52c31e03d09505c3e..e7a5ed9f6f3fbbe6f3bc146469e3a00d0729b04f 100644 (file)
@@ -10,3 +10,4 @@ obj-$(CONFIG_INFINIBAND_OCRDMA)               += ocrdma/
 obj-$(CONFIG_INFINIBAND_USNIC)         += usnic/
 obj-$(CONFIG_INFINIBAND_HFI1)          += hfi1/
 obj-$(CONFIG_INFINIBAND_HNS)           += hns/
+obj-$(CONFIG_INFINIBAND_QEDR)          += qedr/
index 867b8cf82be8eb092f62c464802bedae0c3efe3a..19c6477af19f1416d17c15363e239307542b438d 100644 (file)
@@ -666,18 +666,6 @@ skip_cqe:
        return ret;
 }
 
-static void invalidate_mr(struct c4iw_dev *rhp, u32 rkey)
-{
-       struct c4iw_mr *mhp;
-       unsigned long flags;
-
-       spin_lock_irqsave(&rhp->lock, flags);
-       mhp = get_mhp(rhp, rkey >> 8);
-       if (mhp)
-               mhp->attr.state = 0;
-       spin_unlock_irqrestore(&rhp->lock, flags);
-}
-
 /*
  * Get one cq entry from c4iw and map it to openib.
  *
@@ -733,7 +721,7 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
                    CQE_OPCODE(&cqe) == FW_RI_SEND_WITH_SE_INV) {
                        wc->ex.invalidate_rkey = CQE_WRID_STAG(&cqe);
                        wc->wc_flags |= IB_WC_WITH_INVALIDATE;
-                       invalidate_mr(qhp->rhp, wc->ex.invalidate_rkey);
+                       c4iw_invalidate_mr(qhp->rhp, wc->ex.invalidate_rkey);
                }
        } else {
                switch (CQE_OPCODE(&cqe)) {
@@ -762,7 +750,8 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
 
                        /* Invalidate the MR if the fastreg failed */
                        if (CQE_STATUS(&cqe) != T4_ERR_SUCCESS)
-                               invalidate_mr(qhp->rhp, CQE_WRID_FR_STAG(&cqe));
+                               c4iw_invalidate_mr(qhp->rhp,
+                                                  CQE_WRID_FR_STAG(&cqe));
                        break;
                default:
                        printk(KERN_ERR MOD "Unexpected opcode %d "
index 93e3d270a98af630f849947c2d6f6a1849b8d688..4e5baf4fe15ec3e1d839ef8d70ea9a20f8f4749d 100644 (file)
@@ -1481,6 +1481,7 @@ static int c4iw_uld_control(void *handle, enum cxgb4_control control, ...)
 static struct cxgb4_uld_info c4iw_uld_info = {
        .name = DRV_NAME,
        .nrxq = MAX_ULD_QSETS,
+       .ntxq = MAX_ULD_QSETS,
        .rxq_size = 511,
        .ciq = true,
        .lro = false,
index 7e7f79e5500654f6aa8b1c42b29c20f9491b3c1a..4788e1a46fdee23cce2956cc17ba8d09b0f3eb56 100644 (file)
@@ -999,6 +999,6 @@ extern int db_coalescing_threshold;
 extern int use_dsgl;
 void c4iw_drain_rq(struct ib_qp *qp);
 void c4iw_drain_sq(struct ib_qp *qp);
-
+void c4iw_invalidate_mr(struct c4iw_dev *rhp, u32 rkey);
 
 #endif
index 80e27749420a782b1f7c81b7f08611ea81333835..410408f886c1906a7abdbdbd3178ac9dc541ce3b 100644 (file)
@@ -770,3 +770,15 @@ int c4iw_dereg_mr(struct ib_mr *ib_mr)
        kfree(mhp);
        return 0;
 }
+
+void c4iw_invalidate_mr(struct c4iw_dev *rhp, u32 rkey)
+{
+       struct c4iw_mr *mhp;
+       unsigned long flags;
+
+       spin_lock_irqsave(&rhp->lock, flags);
+       mhp = get_mhp(rhp, rkey >> 8);
+       if (mhp)
+               mhp->attr.state = 0;
+       spin_unlock_irqrestore(&rhp->lock, flags);
+}
index f57deba6717ce69d08b12842ea7ff5fa8514c67a..b7ac97b27c88c2fe11ad11f564ce786085d3217c 100644 (file)
@@ -706,12 +706,8 @@ static int build_memreg(struct t4_sq *sq, union t4_wr *wqe,
        return 0;
 }
 
-static int build_inv_stag(struct c4iw_dev *dev, union t4_wr *wqe,
-                         struct ib_send_wr *wr, u8 *len16)
+static int build_inv_stag(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16)
 {
-       struct c4iw_mr *mhp = get_mhp(dev, wr->ex.invalidate_rkey >> 8);
-
-       mhp->attr.state = 0;
        wqe->inv.stag_inv = cpu_to_be32(wr->ex.invalidate_rkey);
        wqe->inv.r2 = 0;
        *len16 = DIV_ROUND_UP(sizeof wqe->inv, 16);
@@ -797,11 +793,13 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
        spin_lock_irqsave(&qhp->lock, flag);
        if (t4_wq_in_error(&qhp->wq)) {
                spin_unlock_irqrestore(&qhp->lock, flag);
+               *bad_wr = wr;
                return -EINVAL;
        }
        num_wrs = t4_sq_avail(&qhp->wq);
        if (num_wrs == 0) {
                spin_unlock_irqrestore(&qhp->lock, flag);
+               *bad_wr = wr;
                return -ENOMEM;
        }
        while (wr) {
@@ -840,10 +838,13 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                case IB_WR_RDMA_READ_WITH_INV:
                        fw_opcode = FW_RI_RDMA_READ_WR;
                        swsqe->opcode = FW_RI_READ_REQ;
-                       if (wr->opcode == IB_WR_RDMA_READ_WITH_INV)
+                       if (wr->opcode == IB_WR_RDMA_READ_WITH_INV) {
+                               c4iw_invalidate_mr(qhp->rhp,
+                                                  wr->sg_list[0].lkey);
                                fw_flags = FW_RI_RDMA_READ_INVALIDATE;
-                       else
+                       } else {
                                fw_flags = 0;
+                       }
                        err = build_rdma_read(wqe, wr, &len16);
                        if (err)
                                break;
@@ -876,7 +877,8 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                                fw_flags |= FW_RI_LOCAL_FENCE_FLAG;
                        fw_opcode = FW_RI_INV_LSTAG_WR;
                        swsqe->opcode = FW_RI_LOCAL_INV;
-                       err = build_inv_stag(qhp->rhp, wqe, wr, &len16);
+                       err = build_inv_stag(wqe, wr, &len16);
+                       c4iw_invalidate_mr(qhp->rhp, wr->ex.invalidate_rkey);
                        break;
                default:
                        PDBG("%s post of type=%d TBD!\n", __func__,
@@ -934,11 +936,13 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
        spin_lock_irqsave(&qhp->lock, flag);
        if (t4_wq_in_error(&qhp->wq)) {
                spin_unlock_irqrestore(&qhp->lock, flag);
+               *bad_wr = wr;
                return -EINVAL;
        }
        num_wrs = t4_rq_avail(&qhp->wq);
        if (num_wrs == 0) {
                spin_unlock_irqrestore(&qhp->lock, flag);
+               *bad_wr = wr;
                return -ENOMEM;
        }
        while (wr) {
index a26a9a0bfc417c536ed764499097b5e90c265d78..67ea85a569452e82d037b8e1ee8a3f2567d4f07f 100644 (file)
@@ -775,75 +775,3 @@ void hfi1_put_proc_affinity(int cpu)
        }
        mutex_unlock(&affinity->lock);
 }
-
-int hfi1_set_sdma_affinity(struct hfi1_devdata *dd, const char *buf,
-                          size_t count)
-{
-       struct hfi1_affinity_node *entry;
-       cpumask_var_t mask;
-       int ret, i;
-
-       mutex_lock(&node_affinity.lock);
-       entry = node_affinity_lookup(dd->node);
-
-       if (!entry) {
-               ret = -EINVAL;
-               goto unlock;
-       }
-
-       ret = zalloc_cpumask_var(&mask, GFP_KERNEL);
-       if (!ret) {
-               ret = -ENOMEM;
-               goto unlock;
-       }
-
-       ret = cpulist_parse(buf, mask);
-       if (ret)
-               goto out;
-
-       if (!cpumask_subset(mask, cpu_online_mask) || cpumask_empty(mask)) {
-               dd_dev_warn(dd, "Invalid CPU mask\n");
-               ret = -EINVAL;
-               goto out;
-       }
-
-       /* reset the SDMA interrupt affinity details */
-       init_cpu_mask_set(&entry->def_intr);
-       cpumask_copy(&entry->def_intr.mask, mask);
-
-       /* Reassign the affinity for each SDMA interrupt. */
-       for (i = 0; i < dd->num_msix_entries; i++) {
-               struct hfi1_msix_entry *msix;
-
-               msix = &dd->msix_entries[i];
-               if (msix->type != IRQ_SDMA)
-                       continue;
-
-               ret = get_irq_affinity(dd, msix);
-
-               if (ret)
-                       break;
-       }
-out:
-       free_cpumask_var(mask);
-unlock:
-       mutex_unlock(&node_affinity.lock);
-       return ret ? ret : strnlen(buf, PAGE_SIZE);
-}
-
-int hfi1_get_sdma_affinity(struct hfi1_devdata *dd, char *buf)
-{
-       struct hfi1_affinity_node *entry;
-
-       mutex_lock(&node_affinity.lock);
-       entry = node_affinity_lookup(dd->node);
-
-       if (!entry) {
-               mutex_unlock(&node_affinity.lock);
-               return -EINVAL;
-       }
-
-       cpumap_print_to_pagebuf(true, buf, &entry->def_intr.mask);
-       mutex_unlock(&node_affinity.lock);
-       return strnlen(buf, PAGE_SIZE);
-}
index b89ea3c0ee1afcd4a3eff2e6916fd9fb0e38d7ca..42e63316afd193f4baf932f0c420eeca52738b39 100644 (file)
@@ -102,10 +102,6 @@ int hfi1_get_proc_affinity(int);
 /* Release a CPU used by a user process. */
 void hfi1_put_proc_affinity(int);
 
-int hfi1_get_sdma_affinity(struct hfi1_devdata *dd, char *buf);
-int hfi1_set_sdma_affinity(struct hfi1_devdata *dd, const char *buf,
-                          size_t count);
-
 struct hfi1_affinity_node {
        int node;
        struct cpu_mask_set def_intr;
index 9bf5f23544d4cf529e67adcd9838fcc1692260bd..24d0820873cf2df220892b9523907aeb9485eb8b 100644 (file)
@@ -6301,19 +6301,8 @@ void set_up_vl15(struct hfi1_devdata *dd, u8 vau, u16 vl15buf)
        /* leave shared count at zero for both global and VL15 */
        write_global_credit(dd, vau, vl15buf, 0);
 
-       /* We may need some credits for another VL when sending packets
-        * with the snoop interface. Dividing it down the middle for VL15
-        * and VL0 should suffice.
-        */
-       if (unlikely(dd->hfi1_snoop.mode_flag == HFI1_PORT_SNOOP_MODE)) {
-               write_csr(dd, SEND_CM_CREDIT_VL15, (u64)(vl15buf >> 1)
-                   << SEND_CM_CREDIT_VL15_DEDICATED_LIMIT_VL_SHIFT);
-               write_csr(dd, SEND_CM_CREDIT_VL, (u64)(vl15buf >> 1)
-                   << SEND_CM_CREDIT_VL_DEDICATED_LIMIT_VL_SHIFT);
-       } else {
-               write_csr(dd, SEND_CM_CREDIT_VL15, (u64)vl15buf
-                       << SEND_CM_CREDIT_VL15_DEDICATED_LIMIT_VL_SHIFT);
-       }
+       write_csr(dd, SEND_CM_CREDIT_VL15, (u64)vl15buf
+                 << SEND_CM_CREDIT_VL15_DEDICATED_LIMIT_VL_SHIFT);
 }
 
 /*
@@ -9915,9 +9904,6 @@ static void set_lidlmc(struct hfi1_pportdata *ppd)
        u32 mask = ~((1U << ppd->lmc) - 1);
        u64 c1 = read_csr(ppd->dd, DCC_CFG_PORT_CONFIG1);
 
-       if (dd->hfi1_snoop.mode_flag)
-               dd_dev_info(dd, "Set lid/lmc while snooping");
-
        c1 &= ~(DCC_CFG_PORT_CONFIG1_TARGET_DLID_SMASK
                | DCC_CFG_PORT_CONFIG1_DLID_MASK_SMASK);
        c1 |= ((ppd->lid & DCC_CFG_PORT_CONFIG1_TARGET_DLID_MASK)
@@ -12112,7 +12098,7 @@ static void update_synth_timer(unsigned long opaque)
        mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME);
 }
 
-#define C_MAX_NAME 13 /* 12 chars + one for /0 */
+#define C_MAX_NAME 16 /* 15 chars + one for /0 */
 static int init_cntrs(struct hfi1_devdata *dd)
 {
        int i, rcv_ctxts, j;
@@ -14463,7 +14449,7 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
         * Any error printing is already done by the init code.
         * On return, we have the chip mapped.
         */
-       ret = hfi1_pcie_ddinit(dd, pdev, ent);
+       ret = hfi1_pcie_ddinit(dd, pdev);
        if (ret < 0)
                goto bail_free;
 
@@ -14691,6 +14677,11 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
        if (ret)
                goto bail_free_cntrs;
 
+       init_completion(&dd->user_comp);
+
+       /* The user refcount starts with one to inidicate an active device */
+       atomic_set(&dd->user_refcount, 1);
+
        goto bail;
 
 bail_free_rcverr:
index 92345259a8f47932faaa45ef16e314c9831fd428..043fd21dc5f3193828ce07da3fa20565c81f8bc0 100644 (file)
 /* DC_DC8051_CFG_MODE.GENERAL bits */
 #define DISABLE_SELF_GUID_CHECK 0x2
 
+/* Bad L2 frame error code */
+#define BAD_L2_ERR      0x6
+
 /*
  * Eager buffer minimum and maximum sizes supported by the hardware.
  * All power-of-two sizes in between are supported as well.
index 6563e4d38b80c141df9d9fd766d5eac9779eb914..c5efff29c1479138ea37e585395eccd17059640b 100644 (file)
@@ -599,7 +599,6 @@ static void __prescan_rxq(struct hfi1_packet *packet)
                                         dd->rhf_offset;
                struct rvt_qp *qp;
                struct ib_header *hdr;
-               struct ib_other_headers *ohdr;
                struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
                u64 rhf = rhf_to_cpu(rhf_addr);
                u32 etype = rhf_rcv_type(rhf), qpn, bth1;
@@ -615,18 +614,21 @@ static void __prescan_rxq(struct hfi1_packet *packet)
                if (etype != RHF_RCV_TYPE_IB)
                        goto next;
 
-               hdr = hfi1_get_msgheader(dd, rhf_addr);
+               packet->hdr = hfi1_get_msgheader(dd, rhf_addr);
+               hdr = packet->hdr;
 
                lnh = be16_to_cpu(hdr->lrh[0]) & 3;
 
-               if (lnh == HFI1_LRH_BTH)
-                       ohdr = &hdr->u.oth;
-               else if (lnh == HFI1_LRH_GRH)
-                       ohdr = &hdr->u.l.oth;
-               else
+               if (lnh == HFI1_LRH_BTH) {
+                       packet->ohdr = &hdr->u.oth;
+               } else if (lnh == HFI1_LRH_GRH) {
+                       packet->ohdr = &hdr->u.l.oth;
+                       packet->rcv_flags |= HFI1_HAS_GRH;
+               } else {
                        goto next; /* just in case */
+               }
 
-               bth1 = be32_to_cpu(ohdr->bth[1]);
+               bth1 = be32_to_cpu(packet->ohdr->bth[1]);
                is_ecn = !!(bth1 & (HFI1_FECN_SMASK | HFI1_BECN_SMASK));
 
                if (!is_ecn)
@@ -646,7 +648,7 @@ static void __prescan_rxq(struct hfi1_packet *packet)
 
                /* turn off BECN, FECN */
                bth1 &= ~(HFI1_FECN_SMASK | HFI1_BECN_SMASK);
-               ohdr->bth[1] = cpu_to_be32(bth1);
+               packet->ohdr->bth[1] = cpu_to_be32(bth1);
 next:
                update_ps_mdata(&mdata, rcd);
        }
@@ -1360,12 +1362,25 @@ int process_receive_ib(struct hfi1_packet *packet)
 
 int process_receive_bypass(struct hfi1_packet *packet)
 {
+       struct hfi1_devdata *dd = packet->rcd->dd;
+
        if (unlikely(rhf_err_flags(packet->rhf)))
                handle_eflags(packet);
 
-       dd_dev_err(packet->rcd->dd,
+       dd_dev_err(dd,
                   "Bypass packets are not supported in normal operation. Dropping\n");
-       incr_cntr64(&packet->rcd->dd->sw_rcv_bypass_packet_errors);
+       incr_cntr64(&dd->sw_rcv_bypass_packet_errors);
+       if (!(dd->err_info_rcvport.status_and_code & OPA_EI_STATUS_SMASK)) {
+               u64 *flits = packet->ebuf;
+
+               if (flits && !(packet->rhf & RHF_LEN_ERR)) {
+                       dd->err_info_rcvport.packet_flit1 = flits[0];
+                       dd->err_info_rcvport.packet_flit2 =
+                               packet->tlen > sizeof(flits[0]) ? flits[1] : 0;
+               }
+               dd->err_info_rcvport.status_and_code |=
+                       (OPA_EI_STATUS_SMASK | BAD_L2_ERR);
+       }
        return RHF_RCV_CONTINUE;
 }
 
index 677efa0e8cd689f167ab72bc13eb2b60dd91b8ed..bd786b7bd30b1256f523a89357e52d12f2a4266c 100644 (file)
@@ -172,6 +172,9 @@ static int hfi1_file_open(struct inode *inode, struct file *fp)
                                               struct hfi1_devdata,
                                               user_cdev);
 
+       if (!atomic_inc_not_zero(&dd->user_refcount))
+               return -ENXIO;
+
        /* Just take a ref now. Not all opens result in a context assign */
        kobject_get(&dd->kobj);
 
@@ -183,11 +186,17 @@ static int hfi1_file_open(struct inode *inode, struct file *fp)
                fd->rec_cpu_num = -1; /* no cpu affinity by default */
                fd->mm = current->mm;
                atomic_inc(&fd->mm->mm_count);
-       }
+               fp->private_data = fd;
+       } else {
+               fp->private_data = NULL;
+
+               if (atomic_dec_and_test(&dd->user_refcount))
+                       complete(&dd->user_comp);
 
-       fp->private_data = fd;
+               return -ENOMEM;
+       }
 
-       return fd ? 0 : -ENOMEM;
+       return 0;
 }
 
 static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
@@ -798,6 +807,10 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
 done:
        mmdrop(fdata->mm);
        kobject_put(&dd->kobj);
+
+       if (atomic_dec_and_test(&dd->user_refcount))
+               complete(&dd->user_comp);
+
        kfree(fdata);
        return 0;
 }
index 7eef11b316ff327e50b8330860db7fd88173ca51..cc87fd4e534bbefd1235be4c010c5df4be9c065c 100644 (file)
@@ -367,26 +367,6 @@ struct hfi1_packet {
        u8 etype;
 };
 
-/*
- * Private data for snoop/capture support.
- */
-struct hfi1_snoop_data {
-       int mode_flag;
-       struct cdev cdev;
-       struct device *class_dev;
-       /* protect snoop data */
-       spinlock_t snoop_lock;
-       struct list_head queue;
-       wait_queue_head_t waitq;
-       void *filter_value;
-       int (*filter_callback)(void *hdr, void *data, void *value);
-       u64 dcc_cfg; /* saved value of DCC Cfg register */
-};
-
-/* snoop mode_flag values */
-#define HFI1_PORT_SNOOP_MODE     1U
-#define HFI1_PORT_CAPTURE_MODE   2U
-
 struct rvt_sge_state;
 
 /*
@@ -613,8 +593,6 @@ struct hfi1_pportdata {
        struct mutex hls_lock;
        u32 host_link_state;
 
-       spinlock_t            sdma_alllock ____cacheline_aligned_in_smp;
-
        u32 lstate;     /* logical link state */
 
        /* these are the "32 bit" regs */
@@ -1104,8 +1082,6 @@ struct hfi1_devdata {
        char *portcntrnames;
        size_t portcntrnameslen;
 
-       struct hfi1_snoop_data hfi1_snoop;
-
        struct err_info_rcvport err_info_rcvport;
        struct err_info_constraint err_info_rcv_constraint;
        struct err_info_constraint err_info_xmit_constraint;
@@ -1141,8 +1117,8 @@ struct hfi1_devdata {
        rhf_rcv_function_ptr normal_rhf_rcv_functions[8];
 
        /*
-        * Handlers for outgoing data so that snoop/capture does not
-        * have to have its hooks in the send path
+        * Capability to have different send engines simply by changing a
+        * pointer value.
         */
        send_routine process_pio_send;
        send_routine process_dma_send;
@@ -1174,6 +1150,10 @@ struct hfi1_devdata {
        spinlock_t aspm_lock;
        /* Number of verbs contexts which have disabled ASPM */
        atomic_t aspm_disabled_cnt;
+       /* Keeps track of user space clients */
+       atomic_t user_refcount;
+       /* Used to wait for outstanding user space clients before dev removal */
+       struct completion user_comp;
 
        struct hfi1_affinity *affinity;
        struct rhashtable sdma_rht;
@@ -1221,8 +1201,6 @@ struct hfi1_devdata *hfi1_lookup(int unit);
 extern u32 hfi1_cpulist_count;
 extern unsigned long *hfi1_cpulist;
 
-extern unsigned int snoop_drop_send;
-extern unsigned int snoop_force_capture;
 int hfi1_init(struct hfi1_devdata *, int);
 int hfi1_count_units(int *npresentp, int *nupp);
 int hfi1_count_active_units(void);
@@ -1557,13 +1535,6 @@ void set_up_vl15(struct hfi1_devdata *dd, u8 vau, u16 vl15buf);
 void reset_link_credits(struct hfi1_devdata *dd);
 void assign_remote_cm_au_table(struct hfi1_devdata *dd, u8 vcu);
 
-int snoop_recv_handler(struct hfi1_packet *packet);
-int snoop_send_dma_handler(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
-                          u64 pbc);
-int snoop_send_pio_handler(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
-                          u64 pbc);
-void snoop_inline_pio_send(struct hfi1_devdata *dd, struct pio_buf *pbuf,
-                          u64 pbc, const void *from, size_t count);
 int set_buffer_control(struct hfi1_pportdata *ppd, struct buffer_control *bc);
 
 static inline struct hfi1_devdata *dd_from_ppd(struct hfi1_pportdata *ppd)
@@ -1763,8 +1734,7 @@ int qsfp_dump(struct hfi1_pportdata *ppd, char *buf, int len);
 
 int hfi1_pcie_init(struct pci_dev *, const struct pci_device_id *);
 void hfi1_pcie_cleanup(struct pci_dev *);
-int hfi1_pcie_ddinit(struct hfi1_devdata *, struct pci_dev *,
-                    const struct pci_device_id *);
+int hfi1_pcie_ddinit(struct hfi1_devdata *, struct pci_dev *);
 void hfi1_pcie_ddcleanup(struct hfi1_devdata *);
 void hfi1_pcie_flr(struct hfi1_devdata *);
 int pcie_speeds(struct hfi1_devdata *);
@@ -1799,8 +1769,6 @@ int kdeth_process_expected(struct hfi1_packet *packet);
 int kdeth_process_eager(struct hfi1_packet *packet);
 int process_receive_invalid(struct hfi1_packet *packet);
 
-extern rhf_rcv_function_ptr snoop_rhf_rcv_functions[8];
-
 void update_sge(struct rvt_sge_state *ss, u32 length);
 
 /* global module parameter variables */
@@ -1827,9 +1795,6 @@ extern struct mutex hfi1_mutex;
 #define DRIVER_NAME            "hfi1"
 #define HFI1_USER_MINOR_BASE     0
 #define HFI1_TRACE_MINOR         127
-#define HFI1_DIAGPKT_MINOR       128
-#define HFI1_DIAG_MINOR_BASE     129
-#define HFI1_SNOOP_CAPTURE_BASE  200
 #define HFI1_NMINORS             255
 
 #define PCI_VENDOR_ID_INTEL 0x8086
@@ -1848,7 +1813,13 @@ extern struct mutex hfi1_mutex;
 static inline u64 hfi1_pkt_default_send_ctxt_mask(struct hfi1_devdata *dd,
                                                  u16 ctxt_type)
 {
-       u64 base_sc_integrity =
+       u64 base_sc_integrity;
+
+       /* No integrity checks if HFI1_CAP_NO_INTEGRITY is set */
+       if (HFI1_CAP_IS_KSET(NO_INTEGRITY))
+               return 0;
+
+       base_sc_integrity =
        SEND_CTXT_CHECK_ENABLE_DISALLOW_BYPASS_BAD_PKT_LEN_SMASK
        | SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK
        | SEND_CTXT_CHECK_ENABLE_DISALLOW_TOO_LONG_BYPASS_PACKETS_SMASK
@@ -1863,7 +1834,6 @@ static inline u64 hfi1_pkt_default_send_ctxt_mask(struct hfi1_devdata *dd,
        | SEND_CTXT_CHECK_ENABLE_CHECK_VL_MAPPING_SMASK
        | SEND_CTXT_CHECK_ENABLE_CHECK_OPCODE_SMASK
        | SEND_CTXT_CHECK_ENABLE_CHECK_SLID_SMASK
-       | SEND_CTXT_CHECK_ENABLE_CHECK_JOB_KEY_SMASK
        | SEND_CTXT_CHECK_ENABLE_CHECK_VL_SMASK
        | SEND_CTXT_CHECK_ENABLE_CHECK_ENABLE_SMASK;
 
@@ -1872,18 +1842,23 @@ static inline u64 hfi1_pkt_default_send_ctxt_mask(struct hfi1_devdata *dd,
        else
                base_sc_integrity |= HFI1_PKT_KERNEL_SC_INTEGRITY;
 
-       if (is_ax(dd))
-               /* turn off send-side job key checks - A0 */
-               return base_sc_integrity &
-                      ~SEND_CTXT_CHECK_ENABLE_CHECK_JOB_KEY_SMASK;
+       /* turn on send-side job key checks if !A0 */
+       if (!is_ax(dd))
+               base_sc_integrity |= SEND_CTXT_CHECK_ENABLE_CHECK_JOB_KEY_SMASK;
+
        return base_sc_integrity;
 }
 
 static inline u64 hfi1_pkt_base_sdma_integrity(struct hfi1_devdata *dd)
 {
-       u64 base_sdma_integrity =
+       u64 base_sdma_integrity;
+
+       /* No integrity checks if HFI1_CAP_NO_INTEGRITY is set */
+       if (HFI1_CAP_IS_KSET(NO_INTEGRITY))
+               return 0;
+
+       base_sdma_integrity =
        SEND_DMA_CHECK_ENABLE_DISALLOW_BYPASS_BAD_PKT_LEN_SMASK
-       | SEND_DMA_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK
        | SEND_DMA_CHECK_ENABLE_DISALLOW_TOO_LONG_BYPASS_PACKETS_SMASK
        | SEND_DMA_CHECK_ENABLE_DISALLOW_TOO_LONG_IB_PACKETS_SMASK
        | SEND_DMA_CHECK_ENABLE_DISALLOW_BAD_PKT_LEN_SMASK
@@ -1895,14 +1870,18 @@ static inline u64 hfi1_pkt_base_sdma_integrity(struct hfi1_devdata *dd)
        | SEND_DMA_CHECK_ENABLE_CHECK_VL_MAPPING_SMASK
        | SEND_DMA_CHECK_ENABLE_CHECK_OPCODE_SMASK
        | SEND_DMA_CHECK_ENABLE_CHECK_SLID_SMASK
-       | SEND_DMA_CHECK_ENABLE_CHECK_JOB_KEY_SMASK
        | SEND_DMA_CHECK_ENABLE_CHECK_VL_SMASK
        | SEND_DMA_CHECK_ENABLE_CHECK_ENABLE_SMASK;
 
-       if (is_ax(dd))
-               /* turn off send-side job key checks - A0 */
-               return base_sdma_integrity &
-                      ~SEND_DMA_CHECK_ENABLE_CHECK_JOB_KEY_SMASK;
+       if (!HFI1_CAP_IS_KSET(STATIC_RATE_CTRL))
+               base_sdma_integrity |=
+               SEND_DMA_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK;
+
+       /* turn on send-side job key checks if !A0 */
+       if (!is_ax(dd))
+               base_sdma_integrity |=
+                       SEND_DMA_CHECK_ENABLE_CHECK_JOB_KEY_SMASK;
+
        return base_sdma_integrity;
 }
 
index 60db61536fedf396c7a76dcdf777f7000a189eec..e3b5bc93bc70edd5e253766a6a76db6a13068d4d 100644 (file)
@@ -144,6 +144,8 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd)
                struct hfi1_ctxtdata *rcd;
 
                ppd = dd->pport + (i % dd->num_pports);
+
+               /* dd->rcd[i] gets assigned inside the callee */
                rcd = hfi1_create_ctxtdata(ppd, i, dd->node);
                if (!rcd) {
                        dd_dev_err(dd,
@@ -169,8 +171,6 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd)
                if (!rcd->sc) {
                        dd_dev_err(dd,
                                   "Unable to allocate kernel send context, failing\n");
-                       dd->rcd[rcd->ctxt] = NULL;
-                       hfi1_free_ctxtdata(dd, rcd);
                        goto nomem;
                }
 
@@ -178,9 +178,6 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd)
                if (ret < 0) {
                        dd_dev_err(dd,
                                   "Failed to setup kernel receive context, failing\n");
-                       sc_free(rcd->sc);
-                       dd->rcd[rcd->ctxt] = NULL;
-                       hfi1_free_ctxtdata(dd, rcd);
                        ret = -EFAULT;
                        goto bail;
                }
@@ -196,6 +193,10 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd)
 nomem:
        ret = -ENOMEM;
 bail:
+       if (dd->rcd) {
+               for (i = 0; i < dd->num_rcv_contexts; ++i)
+                       hfi1_free_ctxtdata(dd, dd->rcd[i]);
+       }
        kfree(dd->rcd);
        dd->rcd = NULL;
        return ret;
@@ -216,7 +217,7 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt,
            dd->num_rcv_contexts - dd->first_user_ctxt)
                kctxt_ngroups = (dd->rcv_entries.nctxt_extra -
                                 (dd->num_rcv_contexts - dd->first_user_ctxt));
-       rcd = kzalloc(sizeof(*rcd), GFP_KERNEL);
+       rcd = kzalloc_node(sizeof(*rcd), GFP_KERNEL, numa);
        if (rcd) {
                u32 rcvtids, max_entries;
 
@@ -261,13 +262,6 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt,
                }
                rcd->eager_base = base * dd->rcv_entries.group_size;
 
-               /* Validate and initialize Rcv Hdr Q variables */
-               if (rcvhdrcnt % HDRQ_INCREMENT) {
-                       dd_dev_err(dd,
-                                  "ctxt%u: header queue count %d must be divisible by %lu\n",
-                                  rcd->ctxt, rcvhdrcnt, HDRQ_INCREMENT);
-                       goto bail;
-               }
                rcd->rcvhdrq_cnt = rcvhdrcnt;
                rcd->rcvhdrqentsize = hfi1_hdrq_entsize;
                /*
@@ -506,7 +500,6 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
        INIT_WORK(&ppd->qsfp_info.qsfp_work, qsfp_event);
 
        mutex_init(&ppd->hls_lock);
-       spin_lock_init(&ppd->sdma_alllock);
        spin_lock_init(&ppd->qsfp_info.qsfp_lock);
 
        ppd->qsfp_info.ppd = ppd;
@@ -1399,28 +1392,43 @@ static void postinit_cleanup(struct hfi1_devdata *dd)
        hfi1_free_devdata(dd);
 }
 
+static int init_validate_rcvhdrcnt(struct device *dev, uint thecnt)
+{
+       if (thecnt <= HFI1_MIN_HDRQ_EGRBUF_CNT) {
+               hfi1_early_err(dev, "Receive header queue count too small\n");
+               return -EINVAL;
+       }
+
+       if (thecnt > HFI1_MAX_HDRQ_EGRBUF_CNT) {
+               hfi1_early_err(dev,
+                              "Receive header queue count cannot be greater than %u\n",
+                              HFI1_MAX_HDRQ_EGRBUF_CNT);
+               return -EINVAL;
+       }
+
+       if (thecnt % HDRQ_INCREMENT) {
+               hfi1_early_err(dev, "Receive header queue count %d must be divisible by %lu\n",
+                              thecnt, HDRQ_INCREMENT);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
 static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
        int ret = 0, j, pidx, initfail;
-       struct hfi1_devdata *dd = ERR_PTR(-EINVAL);
+       struct hfi1_devdata *dd;
        struct hfi1_pportdata *ppd;
 
        /* First, lock the non-writable module parameters */
        HFI1_CAP_LOCK();
 
        /* Validate some global module parameters */
-       if (rcvhdrcnt <= HFI1_MIN_HDRQ_EGRBUF_CNT) {
-               hfi1_early_err(&pdev->dev, "Header queue  count too small\n");
-               ret = -EINVAL;
-               goto bail;
-       }
-       if (rcvhdrcnt > HFI1_MAX_HDRQ_EGRBUF_CNT) {
-               hfi1_early_err(&pdev->dev,
-                              "Receive header queue count cannot be greater than %u\n",
-                              HFI1_MAX_HDRQ_EGRBUF_CNT);
-               ret = -EINVAL;
+       ret = init_validate_rcvhdrcnt(&pdev->dev, rcvhdrcnt);
+       if (ret)
                goto bail;
-       }
+
        /* use the encoding function as a sanitization check */
        if (!encode_rcv_header_entry_size(hfi1_hdrq_entsize)) {
                hfi1_early_err(&pdev->dev, "Invalid HdrQ Entry size %u\n",
@@ -1461,26 +1469,25 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
        if (ret)
                goto bail;
 
-       /*
-        * Do device-specific initialization, function table setup, dd
-        * allocation, etc.
-        */
-       switch (ent->device) {
-       case PCI_DEVICE_ID_INTEL0:
-       case PCI_DEVICE_ID_INTEL1:
-               dd = hfi1_init_dd(pdev, ent);
-               break;
-       default:
+       if (!(ent->device == PCI_DEVICE_ID_INTEL0 ||
+             ent->device == PCI_DEVICE_ID_INTEL1)) {
                hfi1_early_err(&pdev->dev,
                               "Failing on unknown Intel deviceid 0x%x\n",
                               ent->device);
                ret = -ENODEV;
+               goto clean_bail;
        }
 
-       if (IS_ERR(dd))
+       /*
+        * Do device-specific initialization, function table setup, dd
+        * allocation, etc.
+        */
+       dd = hfi1_init_dd(pdev, ent);
+
+       if (IS_ERR(dd)) {
                ret = PTR_ERR(dd);
-       if (ret)
                goto clean_bail; /* error already printed */
+       }
 
        ret = create_workqueues(dd);
        if (ret)
@@ -1538,12 +1545,31 @@ bail:
        return ret;
 }
 
+static void wait_for_clients(struct hfi1_devdata *dd)
+{
+       /*
+        * Remove the device init value and complete the device if there is
+        * no clients or wait for active clients to finish.
+        */
+       if (atomic_dec_and_test(&dd->user_refcount))
+               complete(&dd->user_comp);
+
+       wait_for_completion(&dd->user_comp);
+}
+
 static void remove_one(struct pci_dev *pdev)
 {
        struct hfi1_devdata *dd = pci_get_drvdata(pdev);
 
        /* close debugfs files before ib unregister */
        hfi1_dbg_ibdev_exit(&dd->verbs_dev);
+
+       /* remove the /dev hfi1 interface */
+       hfi1_device_remove(dd);
+
+       /* wait for existing user space clients to finish */
+       wait_for_clients(dd);
+
        /* unregister from IB core */
        hfi1_unregister_ib_device(dd);
 
@@ -1558,8 +1584,6 @@ static void remove_one(struct pci_dev *pdev)
        /* wait until all of our (qsfp) queue_work() calls complete */
        flush_workqueue(ib_wq);
 
-       hfi1_device_remove(dd);
-
        postinit_cleanup(dd);
 }
 
index 89c68da1c273297c71476fe0acbe37d93f7f97a3..4ac8f330c5cb8bcd02aaee2551441eb6292489d6 100644 (file)
@@ -157,8 +157,7 @@ void hfi1_pcie_cleanup(struct pci_dev *pdev)
  * fields required to re-initialize after a chip reset, or for
  * various other purposes
  */
-int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev,
-                    const struct pci_device_id *ent)
+int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev)
 {
        unsigned long len;
        resource_size_t addr;
index 50a3a36d93632d3e6fbb3a7afced466be2c4547b..d89b8745d4c1c3012e89c8355b3066bb116e26c9 100644 (file)
@@ -668,19 +668,12 @@ void sc_set_cr_threshold(struct send_context *sc, u32 new_threshold)
 void set_pio_integrity(struct send_context *sc)
 {
        struct hfi1_devdata *dd = sc->dd;
-       u64 reg = 0;
        u32 hw_context = sc->hw_context;
        int type = sc->type;
 
-       /*
-        * No integrity checks if HFI1_CAP_NO_INTEGRITY is set, or if
-        * we're snooping.
-        */
-       if (likely(!HFI1_CAP_IS_KSET(NO_INTEGRITY)) &&
-           dd->hfi1_snoop.mode_flag != HFI1_PORT_SNOOP_MODE)
-               reg = hfi1_pkt_default_send_ctxt_mask(dd, type);
-
-       write_kctxt_csr(dd, hw_context, SC(CHECK_ENABLE), reg);
+       write_kctxt_csr(dd, hw_context,
+                       SC(CHECK_ENABLE),
+                       hfi1_pkt_default_send_ctxt_mask(dd, type));
 }
 
 static u32 get_buffers_allocated(struct send_context *sc)
index 8bc5013f39a1ae7ef6f28af82c56017c89edacdb..83198a8a87979baeeb58c591fba0e4d06db6e014 100644 (file)
@@ -89,7 +89,7 @@ void hfi1_add_rnr_timer(struct rvt_qp *qp, u32 to)
 
        lockdep_assert_held(&qp->s_lock);
        qp->s_flags |= RVT_S_WAIT_RNR;
-       qp->s_timer.expires = jiffies + usecs_to_jiffies(to);
+       priv->s_rnr_timer.expires = jiffies + usecs_to_jiffies(to);
        add_timer(&priv->s_rnr_timer);
 }
 
index fd39bcaa062d69683fdbefff9b610f26e15d898b..9cbe52d210778fdce2b3a5ad66d8e06dac801637 100644 (file)
@@ -2009,11 +2009,6 @@ static void sdma_hw_start_up(struct sdma_engine *sde)
        write_sde_csr(sde, SD(ENG_ERR_CLEAR), reg);
 }
 
-#define CLEAR_STATIC_RATE_CONTROL_SMASK(r) \
-(r &= ~SEND_DMA_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK)
-
-#define SET_STATIC_RATE_CONTROL_SMASK(r) \
-(r |= SEND_DMA_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK)
 /*
  * set_sdma_integrity
  *
@@ -2022,19 +2017,9 @@ static void sdma_hw_start_up(struct sdma_engine *sde)
 static void set_sdma_integrity(struct sdma_engine *sde)
 {
        struct hfi1_devdata *dd = sde->dd;
-       u64 reg;
-
-       if (unlikely(HFI1_CAP_IS_KSET(NO_INTEGRITY)))
-               return;
-
-       reg = hfi1_pkt_base_sdma_integrity(dd);
-
-       if (HFI1_CAP_IS_KSET(STATIC_RATE_CTRL))
-               CLEAR_STATIC_RATE_CONTROL_SMASK(reg);
-       else
-               SET_STATIC_RATE_CONTROL_SMASK(reg);
 
-       write_sde_csr(sde, SD(CHECK_ENABLE), reg);
+       write_sde_csr(sde, SD(CHECK_ENABLE),
+                     hfi1_pkt_base_sdma_integrity(dd));
 }
 
 static void init_sdma_regs(
index edba22461a9c16dc86af705b2b86952d9fb85867..919a5474e6512a237fa0b6059f3f77f4a671103e 100644 (file)
@@ -49,7 +49,6 @@
 #include "hfi.h"
 #include "mad.h"
 #include "trace.h"
-#include "affinity.h"
 
 /*
  * Start of per-port congestion control structures and support code
@@ -623,27 +622,6 @@ static ssize_t show_tempsense(struct device *device,
        return ret;
 }
 
-static ssize_t show_sdma_affinity(struct device *device,
-                                 struct device_attribute *attr, char *buf)
-{
-       struct hfi1_ibdev *dev =
-               container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
-       struct hfi1_devdata *dd = dd_from_dev(dev);
-
-       return hfi1_get_sdma_affinity(dd, buf);
-}
-
-static ssize_t store_sdma_affinity(struct device *device,
-                                  struct device_attribute *attr,
-                                  const char *buf, size_t count)
-{
-       struct hfi1_ibdev *dev =
-               container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
-       struct hfi1_devdata *dd = dd_from_dev(dev);
-
-       return hfi1_set_sdma_affinity(dd, buf, count);
-}
-
 /*
  * end of per-unit (or driver, in some cases, but replicated
  * per unit) functions
@@ -658,8 +636,6 @@ static DEVICE_ATTR(serial, S_IRUGO, show_serial, NULL);
 static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL);
 static DEVICE_ATTR(tempsense, S_IRUGO, show_tempsense, NULL);
 static DEVICE_ATTR(chip_reset, S_IWUSR, NULL, store_chip_reset);
-static DEVICE_ATTR(sdma_affinity, S_IWUSR | S_IRUGO, show_sdma_affinity,
-                  store_sdma_affinity);
 
 static struct device_attribute *hfi1_attributes[] = {
        &dev_attr_hw_rev,
@@ -670,7 +646,6 @@ static struct device_attribute *hfi1_attributes[] = {
        &dev_attr_boardversion,
        &dev_attr_tempsense,
        &dev_attr_chip_reset,
-       &dev_attr_sdma_affinity,
 };
 
 int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num,
index 11e02b22892281f5e5a2248a00f5f7ad4d1e9834..f77e59fb43fee7934fc15096b703cfb962f89f5a 100644 (file)
@@ -253,66 +253,6 @@ TRACE_EVENT(hfi1_mmu_invalidate,
                      )
            );
 
-#define SNOOP_PRN \
-       "slid %.4x dlid %.4x qpn 0x%.6x opcode 0x%.2x,%s " \
-       "svc lvl %d pkey 0x%.4x [header = %d bytes] [data = %d bytes]"
-
-TRACE_EVENT(snoop_capture,
-           TP_PROTO(struct hfi1_devdata *dd,
-                    int hdr_len,
-                    struct ib_header *hdr,
-                    int data_len,
-                    void *data),
-           TP_ARGS(dd, hdr_len, hdr, data_len, data),
-           TP_STRUCT__entry(
-                            DD_DEV_ENTRY(dd)
-                            __field(u16, slid)
-                            __field(u16, dlid)
-                            __field(u32, qpn)
-                            __field(u8, opcode)
-                            __field(u8, sl)
-                            __field(u16, pkey)
-                            __field(u32, hdr_len)
-                            __field(u32, data_len)
-                            __field(u8, lnh)
-                            __dynamic_array(u8, raw_hdr, hdr_len)
-                            __dynamic_array(u8, raw_pkt, data_len)
-                            ),
-           TP_fast_assign(
-               struct ib_other_headers *ohdr;
-
-               __entry->lnh = (u8)(be16_to_cpu(hdr->lrh[0]) & 3);
-               if (__entry->lnh == HFI1_LRH_BTH)
-               ohdr = &hdr->u.oth;
-               else
-               ohdr = &hdr->u.l.oth;
-               DD_DEV_ASSIGN(dd);
-               __entry->slid = be16_to_cpu(hdr->lrh[3]);
-               __entry->dlid = be16_to_cpu(hdr->lrh[1]);
-               __entry->qpn = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
-               __entry->opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff;
-               __entry->sl = (u8)(be16_to_cpu(hdr->lrh[0]) >> 4) & 0xf;
-               __entry->pkey = be32_to_cpu(ohdr->bth[0]) & 0xffff;
-               __entry->hdr_len = hdr_len;
-               __entry->data_len = data_len;
-               memcpy(__get_dynamic_array(raw_hdr), hdr, hdr_len);
-               memcpy(__get_dynamic_array(raw_pkt), data, data_len);
-               ),
-           TP_printk(
-               "[%s] " SNOOP_PRN,
-               __get_str(dev),
-               __entry->slid,
-               __entry->dlid,
-               __entry->qpn,
-               __entry->opcode,
-               show_ib_opcode(__entry->opcode),
-               __entry->sl,
-               __entry->pkey,
-               __entry->hdr_len,
-               __entry->data_len
-               )
-);
-
 #endif /* __HFI1_TRACE_RX_H */
 
 #undef TRACE_INCLUDE_PATH
index a761f804111eea026855bc3c2d033430f4e81807..77697d690f3eb12add14aed6a9fe19dafd498601 100644 (file)
@@ -1144,7 +1144,7 @@ static int pin_vector_pages(struct user_sdma_request *req,
        rb_node = hfi1_mmu_rb_extract(pq->handler,
                                      (unsigned long)iovec->iov.iov_base,
                                      iovec->iov.iov_len);
-       if (rb_node && !IS_ERR(rb_node))
+       if (rb_node)
                node = container_of(rb_node, struct sdma_mmu_node, rb);
        else
                rb_node = NULL;
index 875597b0e69c59196e666e958e7b25404779eb20..097365932b0961f578fe27c7c8b650d564317476 100644 (file)
@@ -83,8 +83,7 @@ static int hns_roce_sw2hw_cq(struct hns_roce_dev *dev,
 static int hns_roce_cq_alloc(struct hns_roce_dev *hr_dev, int nent,
                             struct hns_roce_mtt *hr_mtt,
                             struct hns_roce_uar *hr_uar,
-                            struct hns_roce_cq *hr_cq, int vector,
-                            int collapsed)
+                            struct hns_roce_cq *hr_cq, int vector)
 {
        struct hns_roce_cmd_mailbox *mailbox = NULL;
        struct hns_roce_cq_table *cq_table = NULL;
@@ -153,6 +152,9 @@ static int hns_roce_cq_alloc(struct hns_roce_dev *hr_dev, int nent,
        hr_cq->cons_index = 0;
        hr_cq->uar = hr_uar;
 
+       atomic_set(&hr_cq->refcount, 1);
+       init_completion(&hr_cq->free);
+
        return 0;
 
 err_radix:
@@ -192,6 +194,11 @@ static void hns_roce_free_cq(struct hns_roce_dev *hr_dev,
        /* Waiting interrupt process procedure carried out */
        synchronize_irq(hr_dev->eq_table.eq[hr_cq->vector].irq);
 
+       /* wait for all interrupt processed */
+       if (atomic_dec_and_test(&hr_cq->refcount))
+               complete(&hr_cq->free);
+       wait_for_completion(&hr_cq->free);
+
        spin_lock_irq(&cq_table->lock);
        radix_tree_delete(&cq_table->tree, hr_cq->cqn);
        spin_unlock_irq(&cq_table->lock);
@@ -300,10 +307,7 @@ struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev,
 
        cq_entries = roundup_pow_of_two((unsigned int)cq_entries);
        hr_cq->ib_cq.cqe = cq_entries - 1;
-       mutex_init(&hr_cq->resize_mutex);
        spin_lock_init(&hr_cq->lock);
-       hr_cq->hr_resize_buf = NULL;
-       hr_cq->resize_umem = NULL;
 
        if (context) {
                if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
@@ -338,8 +342,8 @@ struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev,
        }
 
        /* Allocate cq index, fill cq_context */
-       ret = hns_roce_cq_alloc(hr_dev, cq_entries, &hr_cq->hr_buf.hr_mtt,
-                               uar, hr_cq, vector, 0);
+       ret = hns_roce_cq_alloc(hr_dev, cq_entries, &hr_cq->hr_buf.hr_mtt, uar,
+                               hr_cq, vector);
        if (ret) {
                dev_err(dev, "Creat CQ .Failed to cq_alloc.\n");
                goto err_mtt;
@@ -353,12 +357,15 @@ struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev,
        if (context) {
                if (ib_copy_to_udata(udata, &hr_cq->cqn, sizeof(u64))) {
                        ret = -EFAULT;
-                       goto err_mtt;
+                       goto err_cqc;
                }
        }
 
        return &hr_cq->ib_cq;
 
+err_cqc:
+       hns_roce_free_cq(hr_dev, hr_cq);
+
 err_mtt:
        hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt);
        if (context)
index ea735800eb18e8c6c5962349aa94152b13d636bf..341731553a60167d4fff793edb49dc8958371422 100644 (file)
@@ -62,7 +62,7 @@
 #define HNS_ROCE_AEQE_OF_VEC_NUM               1
 
 /* 4G/4K = 1M */
-#define HNS_ROCE_SL_SHIFT                      29
+#define HNS_ROCE_SL_SHIFT                      28
 #define HNS_ROCE_TCLASS_SHIFT                  20
 #define HNS_ROCE_FLOW_LABLE_MASK               0xfffff
 
@@ -74,7 +74,9 @@
 #define MR_TYPE_DMA                            0x03
 
 #define PKEY_ID                                        0xffff
+#define GUID_LEN                               8
 #define NODE_DESC_SIZE                         64
+#define DB_REG_OFFSET                          0x1000
 
 #define SERV_TYPE_RC                           0
 #define SERV_TYPE_RD                           1
@@ -282,20 +284,11 @@ struct hns_roce_cq_buf {
        struct hns_roce_mtt hr_mtt;
 };
 
-struct hns_roce_cq_resize {
-       struct hns_roce_cq_buf  hr_buf;
-       int                     cqe;
-};
-
 struct hns_roce_cq {
        struct ib_cq                    ib_cq;
        struct hns_roce_cq_buf          hr_buf;
-       /* pointer to store information after resize*/
-       struct hns_roce_cq_resize       *hr_resize_buf;
        spinlock_t                      lock;
-       struct mutex                    resize_mutex;
        struct ib_umem                  *umem;
-       struct ib_umem                  *resize_umem;
        void (*comp)(struct hns_roce_cq *);
        void (*event)(struct hns_roce_cq *, enum hns_roce_event);
 
@@ -408,6 +401,7 @@ struct hns_roce_qp {
        u32                     buff_size;
        struct mutex            mutex;
        u8                      port;
+       u8                      phy_port;
        u8                      sl;
        u8                      resp_depth;
        u8                      state;
@@ -471,7 +465,6 @@ struct hns_roce_caps {
        u32             max_rq_desc_sz; /* 64 */
        int             max_qp_init_rdma;
        int             max_qp_dest_rdma;
-       int             sqp_start;
        int             num_cqs;
        int             max_cqes;
        int             reserved_cqs;
@@ -512,6 +505,8 @@ struct hns_roce_hw {
        void (*write_cqc)(struct hns_roce_dev *hr_dev,
                          struct hns_roce_cq *hr_cq, void *mb_buf, u64 *mtts,
                          dma_addr_t dma_handle, int nent, u32 vector);
+       int (*clear_hem)(struct hns_roce_dev *hr_dev,
+                        struct hns_roce_hem_table *table, int obj);
        int (*query_qp)(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
                        int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr);
        int (*modify_qp)(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
@@ -533,7 +528,6 @@ struct hns_roce_dev {
        struct hns_roce_uar     priv_uar;
        const char              *irq_names[HNS_ROCE_MAX_IRQ_NUM];
        spinlock_t              sm_lock;
-       spinlock_t              cq_db_lock;
        spinlock_t              bt_cmd_lock;
        struct hns_roce_ib_iboe iboe;
 
index 98af7fecf2f1e9d42d92fec1a776c1e1bc6d3080..21e21b03cfb52a966f1412e65b2ec4709975ecae 100644 (file)
@@ -66,9 +66,6 @@ static void hns_roce_wq_catas_err_handle(struct hns_roce_dev *hr_dev,
 {
        struct device *dev = &hr_dev->pdev->dev;
 
-       qpn = roce_get_field(aeqe->event.qp_event.qp,
-                            HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_M,
-                            HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_S);
        dev_warn(dev, "Local Work Queue Catastrophic Error.\n");
        switch (roce_get_field(aeqe->asyn, HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_M,
                               HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_S)) {
@@ -96,13 +93,6 @@ static void hns_roce_wq_catas_err_handle(struct hns_roce_dev *hr_dev,
        default:
                break;
        }
-
-       hns_roce_qp_event(hr_dev, roce_get_field(aeqe->event.qp_event.qp,
-                                       HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_M,
-                                       HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_S),
-                         roce_get_field(aeqe->asyn,
-                                       HNS_ROCE_AEQE_U32_4_EVENT_TYPE_M,
-                                       HNS_ROCE_AEQE_U32_4_EVENT_TYPE_S));
 }
 
 static void hns_roce_local_wq_access_err_handle(struct hns_roce_dev *hr_dev,
@@ -111,9 +101,6 @@ static void hns_roce_local_wq_access_err_handle(struct hns_roce_dev *hr_dev,
 {
        struct device *dev = &hr_dev->pdev->dev;
 
-       qpn = roce_get_field(aeqe->event.qp_event.qp,
-                            HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_M,
-                            HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_S);
        dev_warn(dev, "Local Access Violation Work Queue Error.\n");
        switch (roce_get_field(aeqe->asyn, HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_M,
                               HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_S)) {
@@ -141,13 +128,69 @@ static void hns_roce_local_wq_access_err_handle(struct hns_roce_dev *hr_dev,
        default:
                break;
        }
+}
+
+static void hns_roce_qp_err_handle(struct hns_roce_dev *hr_dev,
+                                  struct hns_roce_aeqe *aeqe,
+                                  int event_type)
+{
+       struct device *dev = &hr_dev->pdev->dev;
+       int phy_port;
+       int qpn;
+
+       qpn = roce_get_field(aeqe->event.qp_event.qp,
+                            HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_M,
+                            HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_S);
+       phy_port = roce_get_field(aeqe->event.qp_event.qp,
+                       HNS_ROCE_AEQE_EVENT_QP_EVENT_PORT_NUM_M,
+                       HNS_ROCE_AEQE_EVENT_QP_EVENT_PORT_NUM_S);
+       if (qpn <= 1)
+               qpn = HNS_ROCE_MAX_PORTS * qpn + phy_port;
+
+       switch (event_type) {
+       case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
+               dev_warn(dev, "Invalid Req Local Work Queue Error.\n"
+                             "QP %d, phy_port %d.\n", qpn, phy_port);
+               break;
+       case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
+               hns_roce_wq_catas_err_handle(hr_dev, aeqe, qpn);
+               break;
+       case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
+               hns_roce_local_wq_access_err_handle(hr_dev, aeqe, qpn);
+               break;
+       default:
+               break;
+       }
+
+       hns_roce_qp_event(hr_dev, qpn, event_type);
+}
+
+static void hns_roce_cq_err_handle(struct hns_roce_dev *hr_dev,
+                                  struct hns_roce_aeqe *aeqe,
+                                  int event_type)
+{
+       struct device *dev = &hr_dev->pdev->dev;
+       u32 cqn;
+
+       cqn = le32_to_cpu(roce_get_field(aeqe->event.cq_event.cq,
+                   HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_M,
+                   HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_S));
+
+       switch (event_type) {
+       case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
+               dev_warn(dev, "CQ 0x%x access err.\n", cqn);
+               break;
+       case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW:
+               dev_warn(dev, "CQ 0x%x overflow\n", cqn);
+               break;
+       case HNS_ROCE_EVENT_TYPE_CQ_ID_INVALID:
+               dev_warn(dev, "CQ 0x%x ID invalid.\n", cqn);
+               break;
+       default:
+               break;
+       }
 
-       hns_roce_qp_event(hr_dev, roce_get_field(aeqe->event.qp_event.qp,
-                                        HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_M,
-                                        HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_S),
-                         roce_get_field(aeqe->asyn,
-                                        HNS_ROCE_AEQE_U32_4_EVENT_TYPE_M,
-                                        HNS_ROCE_AEQE_U32_4_EVENT_TYPE_S));
+       hns_roce_cq_event(hr_dev, cqn, event_type);
 }
 
 static void hns_roce_db_overflow_handle(struct hns_roce_dev *hr_dev,
@@ -185,7 +228,7 @@ static int hns_roce_aeq_int(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq)
        struct device *dev = &hr_dev->pdev->dev;
        struct hns_roce_aeqe *aeqe;
        int aeqes_found = 0;
-       int qpn = 0;
+       int event_type;
 
        while ((aeqe = next_aeqe_sw(eq))) {
                dev_dbg(dev, "aeqe = %p, aeqe->asyn.event_type = 0x%lx\n", aeqe,
@@ -195,9 +238,10 @@ static int hns_roce_aeq_int(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq)
                /* Memory barrier */
                rmb();
 
-               switch (roce_get_field(aeqe->asyn,
-                       HNS_ROCE_AEQE_U32_4_EVENT_TYPE_M,
-                       HNS_ROCE_AEQE_U32_4_EVENT_TYPE_S)) {
+               event_type = roce_get_field(aeqe->asyn,
+                               HNS_ROCE_AEQE_U32_4_EVENT_TYPE_M,
+                               HNS_ROCE_AEQE_U32_4_EVENT_TYPE_S);
+               switch (event_type) {
                case HNS_ROCE_EVENT_TYPE_PATH_MIG:
                        dev_warn(dev, "PATH MIG not supported\n");
                        break;
@@ -211,23 +255,9 @@ static int hns_roce_aeq_int(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq)
                        dev_warn(dev, "PATH MIG failed\n");
                        break;
                case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
-                       dev_warn(dev, "qpn = 0x%lx\n",
-                       roce_get_field(aeqe->event.qp_event.qp,
-                                      HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_M,
-                                      HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_S));
-                       hns_roce_qp_event(hr_dev,
-                               roce_get_field(aeqe->event.qp_event.qp,
-                                       HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_M,
-                                       HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_S),
-                               roce_get_field(aeqe->asyn,
-                                       HNS_ROCE_AEQE_U32_4_EVENT_TYPE_M,
-                                       HNS_ROCE_AEQE_U32_4_EVENT_TYPE_S));
-                       break;
                case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
-                       hns_roce_wq_catas_err_handle(hr_dev, aeqe, qpn);
-                       break;
                case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
-                       hns_roce_local_wq_access_err_handle(hr_dev, aeqe, qpn);
+                       hns_roce_qp_err_handle(hr_dev, aeqe, event_type);
                        break;
                case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH:
                case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR:
@@ -235,40 +265,9 @@ static int hns_roce_aeq_int(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq)
                        dev_warn(dev, "SRQ not support!\n");
                        break;
                case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
-                       dev_warn(dev, "CQ 0x%lx access err.\n",
-                       roce_get_field(aeqe->event.cq_event.cq,
-                                      HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_M,
-                                      HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_S));
-                       hns_roce_cq_event(hr_dev,
-                       le32_to_cpu(roce_get_field(aeqe->event.cq_event.cq,
-                                   HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_M,
-                                   HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_S)),
-                       roce_get_field(aeqe->asyn,
-                                      HNS_ROCE_AEQE_U32_4_EVENT_TYPE_M,
-                                      HNS_ROCE_AEQE_U32_4_EVENT_TYPE_S));
-                       break;
                case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW:
-                       dev_warn(dev, "CQ 0x%lx overflow\n",
-                       roce_get_field(aeqe->event.cq_event.cq,
-                                      HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_M,
-                                      HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_S));
-                       hns_roce_cq_event(hr_dev,
-                       le32_to_cpu(roce_get_field(aeqe->event.cq_event.cq,
-                                   HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_M,
-                                   HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_S)),
-                       roce_get_field(aeqe->asyn,
-                                      HNS_ROCE_AEQE_U32_4_EVENT_TYPE_M,
-                                      HNS_ROCE_AEQE_U32_4_EVENT_TYPE_S));
-                       break;
                case HNS_ROCE_EVENT_TYPE_CQ_ID_INVALID:
-                       dev_warn(dev, "CQ ID invalid.\n");
-                       hns_roce_cq_event(hr_dev,
-                       le32_to_cpu(roce_get_field(aeqe->event.cq_event.cq,
-                                   HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_M,
-                                   HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_S)),
-                       roce_get_field(aeqe->asyn,
-                                      HNS_ROCE_AEQE_U32_4_EVENT_TYPE_M,
-                                      HNS_ROCE_AEQE_U32_4_EVENT_TYPE_S));
+                       hns_roce_cq_err_handle(hr_dev, aeqe, event_type);
                        break;
                case HNS_ROCE_EVENT_TYPE_PORT_CHANGE:
                        dev_warn(dev, "port change.\n");
@@ -290,11 +289,8 @@ static int hns_roce_aeq_int(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq)
                                     HNS_ROCE_AEQE_EVENT_CE_EVENT_CEQE_CEQN_S));
                        break;
                default:
-                       dev_warn(dev, "Unhandled event 0x%lx on EQ %d at index %u\n",
-                                roce_get_field(aeqe->asyn,
-                                             HNS_ROCE_AEQE_U32_4_EVENT_TYPE_M,
-                                             HNS_ROCE_AEQE_U32_4_EVENT_TYPE_S),
-                                eq->eqn, eq->cons_index);
+                       dev_warn(dev, "Unhandled event %d on EQ %d at index %u\n",
+                                event_type, eq->eqn, eq->cons_index);
                        break;
                };
 
index fe4388191a3c255c74a3668256a3a5e91f897772..c6d212d12e033372f1c1e99b46c764b8eefcde28 100644 (file)
@@ -107,6 +107,10 @@ struct hns_roce_aeqe {
 #define HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_M   \
        (((1UL << 24) - 1) << HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_S)
 
+#define HNS_ROCE_AEQE_EVENT_QP_EVENT_PORT_NUM_S 25
+#define HNS_ROCE_AEQE_EVENT_QP_EVENT_PORT_NUM_M   \
+       (((1UL << 3) - 1) << HNS_ROCE_AEQE_EVENT_QP_EVENT_PORT_NUM_S)
+
 #define HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_S 0
 #define HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_M   \
        (((1UL << 16) - 1) << HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_S)
index d53d643623899e0fdd2eaf771dcb852bb817a6fe..250d8f2803908f4c5fcae2d224baae5df68ed1fb 100644 (file)
 #include "hns_roce_hem.h"
 #include "hns_roce_common.h"
 
-#define HW_SYNC_TIMEOUT_MSECS          500
-#define HW_SYNC_SLEEP_TIME_INTERVAL    20
-
 #define HNS_ROCE_HEM_ALLOC_SIZE                (1 << 17)
 #define HNS_ROCE_TABLE_CHUNK_SIZE      (1 << 17)
 
 #define DMA_ADDR_T_SHIFT               12
-#define BT_CMD_SYNC_SHIFT              31
 #define BT_BA_SHIFT                    32
 
 struct hns_roce_hem *hns_roce_alloc_hem(struct hns_roce_dev *hr_dev, int npages,
@@ -213,74 +209,6 @@ static int hns_roce_set_hem(struct hns_roce_dev *hr_dev,
        return ret;
 }
 
-static int hns_roce_clear_hem(struct hns_roce_dev *hr_dev,
-                             struct hns_roce_hem_table *table,
-                             unsigned long obj)
-{
-       struct device *dev = &hr_dev->pdev->dev;
-       unsigned long end = 0;
-       unsigned long flags;
-       void __iomem *bt_cmd;
-       uint32_t bt_cmd_val[2];
-       u32 bt_cmd_h_val = 0;
-       int ret = 0;
-
-       switch (table->type) {
-       case HEM_TYPE_QPC:
-               roce_set_field(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M,
-                              ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S, HEM_TYPE_QPC);
-               break;
-       case HEM_TYPE_MTPT:
-               roce_set_field(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M,
-                              ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S,
-                              HEM_TYPE_MTPT);
-               break;
-       case HEM_TYPE_CQC:
-               roce_set_field(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M,
-                              ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S, HEM_TYPE_CQC);
-               break;
-       case HEM_TYPE_SRQC:
-               roce_set_field(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M,
-                              ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S,
-                              HEM_TYPE_SRQC);
-               break;
-       default:
-               return ret;
-       }
-       roce_set_field(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_M,
-                      ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_S, obj);
-       roce_set_bit(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_S, 0);
-       roce_set_bit(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_HW_SYNS_S, 1);
-       roce_set_field(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_M,
-                      ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_S, 0);
-
-       spin_lock_irqsave(&hr_dev->bt_cmd_lock, flags);
-
-       bt_cmd = hr_dev->reg_base + ROCEE_BT_CMD_H_REG;
-
-       end = msecs_to_jiffies(HW_SYNC_TIMEOUT_MSECS) + jiffies;
-       while (1) {
-               if (readl(bt_cmd) >> BT_CMD_SYNC_SHIFT) {
-                       if (!(time_before(jiffies, end))) {
-                               dev_err(dev, "Write bt_cmd err,hw_sync is not zero.\n");
-                               spin_unlock_irqrestore(&hr_dev->bt_cmd_lock,
-                                                      flags);
-                               return -EBUSY;
-                       }
-               } else {
-                       break;
-               }
-               msleep(HW_SYNC_SLEEP_TIME_INTERVAL);
-       }
-
-       bt_cmd_val[0] = 0;
-       bt_cmd_val[1] = bt_cmd_h_val;
-       hns_roce_write64_k(bt_cmd_val, hr_dev->reg_base + ROCEE_BT_CMD_L_REG);
-       spin_unlock_irqrestore(&hr_dev->bt_cmd_lock, flags);
-
-       return ret;
-}
-
 int hns_roce_table_get(struct hns_roce_dev *hr_dev,
                       struct hns_roce_hem_table *table, unsigned long obj)
 {
@@ -333,7 +261,7 @@ void hns_roce_table_put(struct hns_roce_dev *hr_dev,
 
        if (--table->hem[i]->refcount == 0) {
                /* Clear HEM base address */
-               if (hns_roce_clear_hem(hr_dev, table, obj))
+               if (hr_dev->hw->clear_hem(hr_dev, table, obj))
                        dev_warn(dev, "Clear HEM base address failed.\n");
 
                hns_roce_free_hem(hr_dev, table->hem[i]);
@@ -456,7 +384,7 @@ void hns_roce_cleanup_hem_table(struct hns_roce_dev *hr_dev,
 
        for (i = 0; i < table->num_hem; ++i)
                if (table->hem[i]) {
-                       if (hns_roce_clear_hem(hr_dev, table,
+                       if (hr_dev->hw->clear_hem(hr_dev, table,
                            i * HNS_ROCE_TABLE_CHUNK_SIZE / table->obj_size))
                                dev_err(dev, "Clear HEM base address failed.\n");
 
index ad6617588fba9a15eee65e12eb948219ba706670..435748858252d756d065315e5ce4347db46c4477 100644 (file)
 #ifndef _HNS_ROCE_HEM_H
 #define _HNS_ROCE_HEM_H
 
+#define HW_SYNC_TIMEOUT_MSECS          500
+#define HW_SYNC_SLEEP_TIME_INTERVAL    20
+#define BT_CMD_SYNC_SHIFT              31
+
 enum {
        /* MAP HEM(Hardware Entry Memory) */
        HEM_TYPE_QPC = 0,
index 399f5dedaf2dd6dfcd075d7875903b943bed4ec3..71232e5fabf6acccf4002f1fedded380f4376274 100644 (file)
@@ -73,8 +73,14 @@ int hns_roce_v1_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
        u32 ind = 0;
        int ret = 0;
 
-       spin_lock_irqsave(&qp->sq.lock, flags);
+       if (unlikely(ibqp->qp_type != IB_QPT_GSI &&
+               ibqp->qp_type != IB_QPT_RC)) {
+               dev_err(dev, "un-supported QP type\n");
+               *bad_wr = NULL;
+               return -EOPNOTSUPP;
+       }
 
+       spin_lock_irqsave(&qp->sq.lock, flags);
        ind = qp->sq_next_wqe;
        for (nreq = 0; wr; ++nreq, wr = wr->next) {
                if (hns_roce_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
@@ -162,7 +168,7 @@ int hns_roce_v1_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                        roce_set_field(ud_sq_wqe->u32_36,
                                       UD_SEND_WQE_U32_36_SGID_INDEX_M,
                                       UD_SEND_WQE_U32_36_SGID_INDEX_S,
-                                      hns_get_gid_index(hr_dev, qp->port,
+                                      hns_get_gid_index(hr_dev, qp->phy_port,
                                                         ah->av.gid_index));
 
                        roce_set_field(ud_sq_wqe->u32_40,
@@ -205,8 +211,7 @@ int hns_roce_v1_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                                      (wr->send_flags & IB_SEND_FENCE ?
                                      (cpu_to_le32(HNS_ROCE_WQE_FENCE)) : 0);
 
-                       wqe = (struct hns_roce_wqe_ctrl_seg *)wqe +
-                              sizeof(struct hns_roce_wqe_ctrl_seg);
+                       wqe += sizeof(struct hns_roce_wqe_ctrl_seg);
 
                        switch (wr->opcode) {
                        case IB_WR_RDMA_READ:
@@ -235,8 +240,7 @@ int hns_roce_v1_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                                break;
                        }
                        ctrl->flag |= cpu_to_le32(ps_opcode);
-                       wqe = (struct hns_roce_wqe_raddr_seg *)wqe +
-                              sizeof(struct hns_roce_wqe_raddr_seg);
+                       wqe += sizeof(struct hns_roce_wqe_raddr_seg);
 
                        dseg = wqe;
                        if (wr->send_flags & IB_SEND_INLINE && wr->num_sge) {
@@ -253,8 +257,7 @@ int hns_roce_v1_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                                        memcpy(wqe, ((void *) (uintptr_t)
                                               wr->sg_list[i].addr),
                                               wr->sg_list[i].length);
-                                       wqe = (struct hns_roce_wqe_raddr_seg *)
-                                              wqe + wr->sg_list[i].length;
+                                       wqe += wr->sg_list[i].length;
                                }
                                ctrl->flag |= HNS_ROCE_WQE_INLINE;
                        } else {
@@ -266,9 +269,6 @@ int hns_roce_v1_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                                              HNS_ROCE_WQE_SGE_NUM_BIT);
                        }
                        ind++;
-               } else {
-                       dev_dbg(dev, "unSupported QP type\n");
-                       break;
                }
        }
 
@@ -285,7 +285,7 @@ out:
                               SQ_DOORBELL_U32_4_SQ_HEAD_S,
                              (qp->sq.head & ((qp->sq.wqe_cnt << 1) - 1)));
                roce_set_field(sq_db.u32_4, SQ_DOORBELL_U32_4_PORT_M,
-                              SQ_DOORBELL_U32_4_PORT_S, qp->port);
+                              SQ_DOORBELL_U32_4_PORT_S, qp->phy_port);
                roce_set_field(sq_db.u32_8, SQ_DOORBELL_U32_8_QPN_M,
                               SQ_DOORBELL_U32_8_QPN_S, qp->doorbell_qpn);
                roce_set_bit(sq_db.u32_8, SQ_DOORBELL_HW_SYNC_S, 1);
@@ -365,14 +365,14 @@ out:
                        /* SW update GSI rq header */
                        reg_val = roce_read(to_hr_dev(ibqp->device),
                                            ROCEE_QP1C_CFG3_0_REG +
-                                           QP1C_CFGN_OFFSET * hr_qp->port);
+                                           QP1C_CFGN_OFFSET * hr_qp->phy_port);
                        roce_set_field(reg_val,
                                       ROCEE_QP1C_CFG3_0_ROCEE_QP1C_RQ_HEAD_M,
                                       ROCEE_QP1C_CFG3_0_ROCEE_QP1C_RQ_HEAD_S,
                                       hr_qp->rq.head);
                        roce_write(to_hr_dev(ibqp->device),
                                   ROCEE_QP1C_CFG3_0_REG +
-                                  QP1C_CFGN_OFFSET * hr_qp->port, reg_val);
+                                  QP1C_CFGN_OFFSET * hr_qp->phy_port, reg_val);
                } else {
                        rq_db.u32_4 = 0;
                        rq_db.u32_8 = 0;
@@ -789,6 +789,66 @@ static void hns_roce_port_enable(struct hns_roce_dev *hr_dev, int enable_flag)
        }
 }
 
+static int hns_roce_bt_init(struct hns_roce_dev *hr_dev)
+{
+       struct device *dev = &hr_dev->pdev->dev;
+       struct hns_roce_v1_priv *priv;
+       int ret;
+
+       priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
+
+       priv->bt_table.qpc_buf.buf = dma_alloc_coherent(dev,
+               HNS_ROCE_BT_RSV_BUF_SIZE, &priv->bt_table.qpc_buf.map,
+               GFP_KERNEL);
+       if (!priv->bt_table.qpc_buf.buf)
+               return -ENOMEM;
+
+       priv->bt_table.mtpt_buf.buf = dma_alloc_coherent(dev,
+               HNS_ROCE_BT_RSV_BUF_SIZE, &priv->bt_table.mtpt_buf.map,
+               GFP_KERNEL);
+       if (!priv->bt_table.mtpt_buf.buf) {
+               ret = -ENOMEM;
+               goto err_failed_alloc_mtpt_buf;
+       }
+
+       priv->bt_table.cqc_buf.buf = dma_alloc_coherent(dev,
+               HNS_ROCE_BT_RSV_BUF_SIZE, &priv->bt_table.cqc_buf.map,
+               GFP_KERNEL);
+       if (!priv->bt_table.cqc_buf.buf) {
+               ret = -ENOMEM;
+               goto err_failed_alloc_cqc_buf;
+       }
+
+       return 0;
+
+err_failed_alloc_cqc_buf:
+       dma_free_coherent(dev, HNS_ROCE_BT_RSV_BUF_SIZE,
+               priv->bt_table.mtpt_buf.buf, priv->bt_table.mtpt_buf.map);
+
+err_failed_alloc_mtpt_buf:
+       dma_free_coherent(dev, HNS_ROCE_BT_RSV_BUF_SIZE,
+               priv->bt_table.qpc_buf.buf, priv->bt_table.qpc_buf.map);
+
+       return ret;
+}
+
+static void hns_roce_bt_free(struct hns_roce_dev *hr_dev)
+{
+       struct device *dev = &hr_dev->pdev->dev;
+       struct hns_roce_v1_priv *priv;
+
+       priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
+
+       dma_free_coherent(dev, HNS_ROCE_BT_RSV_BUF_SIZE,
+               priv->bt_table.cqc_buf.buf, priv->bt_table.cqc_buf.map);
+
+       dma_free_coherent(dev, HNS_ROCE_BT_RSV_BUF_SIZE,
+               priv->bt_table.mtpt_buf.buf, priv->bt_table.mtpt_buf.map);
+
+       dma_free_coherent(dev, HNS_ROCE_BT_RSV_BUF_SIZE,
+               priv->bt_table.qpc_buf.buf, priv->bt_table.qpc_buf.map);
+}
+
 /**
  * hns_roce_v1_reset - reset RoCE
  * @hr_dev: RoCE device struct pointer
@@ -879,7 +939,6 @@ void hns_roce_v1_profile(struct hns_roce_dev *hr_dev)
        caps->mtt_entry_sz      = HNS_ROCE_V1_MTT_ENTRY_SIZE;
        caps->cq_entry_sz       = HNS_ROCE_V1_CQE_ENTRY_SIZE;
        caps->page_size_cap     = HNS_ROCE_V1_PAGE_SIZE_SUPPORT;
-       caps->sqp_start         = 0;
        caps->reserved_lkey     = 0;
        caps->reserved_pds      = 0;
        caps->reserved_mrws     = 1;
@@ -944,8 +1003,18 @@ int hns_roce_v1_init(struct hns_roce_dev *hr_dev)
 
        hns_roce_port_enable(hr_dev, HNS_ROCE_PORT_UP);
 
+       ret = hns_roce_bt_init(hr_dev);
+       if (ret) {
+               dev_err(dev, "bt init failed!\n");
+               goto error_failed_bt_init;
+       }
+
        return 0;
 
+error_failed_bt_init:
+       hns_roce_port_enable(hr_dev, HNS_ROCE_PORT_DOWN);
+       hns_roce_raq_free(hr_dev);
+
 error_failed_raq_init:
        hns_roce_db_free(hr_dev);
        return ret;
@@ -953,6 +1022,7 @@ error_failed_raq_init:
 
 void hns_roce_v1_exit(struct hns_roce_dev *hr_dev)
 {
+       hns_roce_bt_free(hr_dev);
        hns_roce_port_enable(hr_dev, HNS_ROCE_PORT_DOWN);
        hns_roce_raq_free(hr_dev);
        hns_roce_db_free(hr_dev);
@@ -1192,9 +1262,7 @@ static struct hns_roce_cqe *next_cqe_sw(struct hns_roce_cq *hr_cq)
        return get_sw_cqe(hr_cq, hr_cq->cons_index);
 }
 
-void hns_roce_v1_cq_set_ci(struct hns_roce_cq *hr_cq, u32 cons_index,
-                          spinlock_t *doorbell_lock)
-
+void hns_roce_v1_cq_set_ci(struct hns_roce_cq *hr_cq, u32 cons_index)
 {
        u32 doorbell[2];
 
@@ -1254,8 +1322,7 @@ static void __hns_roce_v1_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn,
                */
                wmb();
 
-               hns_roce_v1_cq_set_ci(hr_cq, hr_cq->cons_index,
-                                  &to_hr_dev(hr_cq->ib_cq.device)->cq_db_lock);
+               hns_roce_v1_cq_set_ci(hr_cq, hr_cq->cons_index);
        }
 }
 
@@ -1485,7 +1552,8 @@ static int hns_roce_v1_poll_one(struct hns_roce_cq *hr_cq,
                /* SQ conrespond to CQE */
                sq_wqe = get_send_wqe(*cur_qp, roce_get_field(cqe->cqe_byte_4,
                                                CQE_BYTE_4_WQE_INDEX_M,
-                                               CQE_BYTE_4_WQE_INDEX_S));
+                                               CQE_BYTE_4_WQE_INDEX_S)&
+                                               ((*cur_qp)->sq.wqe_cnt-1));
                switch (sq_wqe->flag & HNS_ROCE_WQE_OPCODE_MASK) {
                case HNS_ROCE_WQE_OPCODE_SEND:
                        wc->opcode = IB_WC_SEND;
@@ -1591,10 +1659,8 @@ int hns_roce_v1_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
                        break;
        }
 
-       if (npolled) {
-               hns_roce_v1_cq_set_ci(hr_cq, hr_cq->cons_index,
-                                     &to_hr_dev(ibcq->device)->cq_db_lock);
-       }
+       if (npolled)
+               hns_roce_v1_cq_set_ci(hr_cq, hr_cq->cons_index);
 
        spin_unlock_irqrestore(&hr_cq->lock, flags);
 
@@ -1604,6 +1670,74 @@ int hns_roce_v1_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
                return ret;
 }
 
+int hns_roce_v1_clear_hem(struct hns_roce_dev *hr_dev,
+               struct hns_roce_hem_table *table, int obj)
+{
+       struct device *dev = &hr_dev->pdev->dev;
+       struct hns_roce_v1_priv *priv;
+       unsigned long end = 0, flags = 0;
+       uint32_t bt_cmd_val[2] = {0};
+       void __iomem *bt_cmd;
+       u64 bt_ba = 0;
+
+       priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
+
+       switch (table->type) {
+       case HEM_TYPE_QPC:
+               roce_set_field(bt_cmd_val[1], ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M,
+                       ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S, HEM_TYPE_QPC);
+               bt_ba = priv->bt_table.qpc_buf.map >> 12;
+               break;
+       case HEM_TYPE_MTPT:
+               roce_set_field(bt_cmd_val[1], ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M,
+                       ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S, HEM_TYPE_MTPT);
+               bt_ba = priv->bt_table.mtpt_buf.map >> 12;
+               break;
+       case HEM_TYPE_CQC:
+               roce_set_field(bt_cmd_val[1], ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M,
+                       ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S, HEM_TYPE_CQC);
+               bt_ba = priv->bt_table.cqc_buf.map >> 12;
+               break;
+       case HEM_TYPE_SRQC:
+               dev_dbg(dev, "HEM_TYPE_SRQC not support.\n");
+               return -EINVAL;
+       default:
+               return 0;
+       }
+       roce_set_field(bt_cmd_val[1], ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_M,
+               ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_S, obj);
+       roce_set_bit(bt_cmd_val[1], ROCEE_BT_CMD_H_ROCEE_BT_CMD_S, 0);
+       roce_set_bit(bt_cmd_val[1], ROCEE_BT_CMD_H_ROCEE_BT_CMD_HW_SYNS_S, 1);
+
+       spin_lock_irqsave(&hr_dev->bt_cmd_lock, flags);
+
+       bt_cmd = hr_dev->reg_base + ROCEE_BT_CMD_H_REG;
+
+       end = msecs_to_jiffies(HW_SYNC_TIMEOUT_MSECS) + jiffies;
+       while (1) {
+               if (readl(bt_cmd) >> BT_CMD_SYNC_SHIFT) {
+                       if (!(time_before(jiffies, end))) {
+                               dev_err(dev, "Write bt_cmd err,hw_sync is not zero.\n");
+                               spin_unlock_irqrestore(&hr_dev->bt_cmd_lock,
+                                       flags);
+                               return -EBUSY;
+                       }
+               } else {
+                       break;
+               }
+               msleep(HW_SYNC_SLEEP_TIME_INTERVAL);
+       }
+
+       bt_cmd_val[0] = (uint32_t)bt_ba;
+       roce_set_field(bt_cmd_val[1], ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_M,
+               ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_S, bt_ba >> 32);
+       hns_roce_write64_k(bt_cmd_val, hr_dev->reg_base + ROCEE_BT_CMD_L_REG);
+
+       spin_unlock_irqrestore(&hr_dev->bt_cmd_lock, flags);
+
+       return 0;
+}
+
 static int hns_roce_v1_qp_modify(struct hns_roce_dev *hr_dev,
                                 struct hns_roce_mtt *mtt,
                                 enum hns_roce_qp_state cur_state,
@@ -1733,13 +1867,10 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
                roce_set_field(context->qp1c_bytes_16, QP1C_BYTES_16_RQ_HEAD_M,
                               QP1C_BYTES_16_RQ_HEAD_S, hr_qp->rq.head);
                roce_set_field(context->qp1c_bytes_16, QP1C_BYTES_16_PORT_NUM_M,
-                              QP1C_BYTES_16_PORT_NUM_S, hr_qp->port);
+                              QP1C_BYTES_16_PORT_NUM_S, hr_qp->phy_port);
                roce_set_bit(context->qp1c_bytes_16,
                             QP1C_BYTES_16_SIGNALING_TYPE_S,
                             hr_qp->sq_signal_bits);
-               roce_set_bit(context->qp1c_bytes_16,
-                            QP1C_BYTES_16_LOCAL_ENABLE_E2E_CREDIT_S,
-                            hr_qp->sq_signal_bits);
                roce_set_bit(context->qp1c_bytes_16, QP1C_BYTES_16_RQ_BA_FLG_S,
                             1);
                roce_set_bit(context->qp1c_bytes_16, QP1C_BYTES_16_SQ_BA_FLG_S,
@@ -1784,7 +1915,7 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
 
                /* Copy context to QP1C register */
                addr = (u32 *)(hr_dev->reg_base + ROCEE_QP1C_CFG0_0_REG +
-                       hr_qp->port * sizeof(*context));
+                       hr_qp->phy_port * sizeof(*context));
 
                writel(context->qp1c_bytes_4, addr);
                writel(context->sq_rq_bt_l, addr + 1);
@@ -1795,15 +1926,16 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
                writel(context->qp1c_bytes_28, addr + 6);
                writel(context->qp1c_bytes_32, addr + 7);
                writel(context->cur_sq_wqe_ba_l, addr + 8);
+               writel(context->qp1c_bytes_40, addr + 9);
        }
 
        /* Modify QP1C status */
        reg_val = roce_read(hr_dev, ROCEE_QP1C_CFG0_0_REG +
-                           hr_qp->port * sizeof(*context));
+                           hr_qp->phy_port * sizeof(*context));
        roce_set_field(reg_val, ROCEE_QP1C_CFG0_0_ROCEE_QP1C_QP_ST_M,
                       ROCEE_QP1C_CFG0_0_ROCEE_QP1C_QP_ST_S, new_state);
        roce_write(hr_dev, ROCEE_QP1C_CFG0_0_REG +
-                   hr_qp->port * sizeof(*context), reg_val);
+                   hr_qp->phy_port * sizeof(*context), reg_val);
 
        hr_qp->state = new_state;
        if (new_state == IB_QPS_RESET) {
@@ -1836,12 +1968,10 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
        struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
        struct device *dev = &hr_dev->pdev->dev;
        struct hns_roce_qp_context *context;
-       struct hns_roce_rq_db rq_db;
        dma_addr_t dma_handle_2 = 0;
        dma_addr_t dma_handle = 0;
        uint32_t doorbell[2] = {0};
        int rq_pa_start = 0;
-       u32 reg_val = 0;
        u64 *mtts_2 = NULL;
        int ret = -EINVAL;
        u64 *mtts = NULL;
@@ -2119,7 +2249,8 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
 
                roce_set_field(context->qpc_bytes_68,
                               QP_CONTEXT_QPC_BYTES_68_RQ_HEAD_M,
-                              QP_CONTEXT_QPC_BYTES_68_RQ_HEAD_S, 0);
+                              QP_CONTEXT_QPC_BYTES_68_RQ_HEAD_S,
+                              hr_qp->rq.head);
                roce_set_field(context->qpc_bytes_68,
                               QP_CONTEXT_QPC_BYTES_68_RQ_CUR_INDEX_M,
                               QP_CONTEXT_QPC_BYTES_68_RQ_CUR_INDEX_S, 0);
@@ -2186,7 +2317,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
                roce_set_field(context->qpc_bytes_156,
                               QP_CONTEXT_QPC_BYTES_156_PORT_NUM_M,
                               QP_CONTEXT_QPC_BYTES_156_PORT_NUM_S,
-                              hr_qp->port);
+                              hr_qp->phy_port);
                roce_set_field(context->qpc_bytes_156,
                               QP_CONTEXT_QPC_BYTES_156_SL_M,
                               QP_CONTEXT_QPC_BYTES_156_SL_S, attr->ah_attr.sl);
@@ -2257,20 +2388,17 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
                roce_set_bit(context->qpc_bytes_140,
                             QP_CONTEXT_QPC_BYTES_140_RNR_RETRY_FLG_S, 0);
 
-               roce_set_field(context->qpc_bytes_144,
-                              QP_CONTEXT_QPC_BYTES_144_QP_STATE_M,
-                              QP_CONTEXT_QPC_BYTES_144_QP_STATE_S,
-                              attr->qp_state);
-
                roce_set_field(context->qpc_bytes_148,
                               QP_CONTEXT_QPC_BYTES_148_CHECK_FLAG_M,
                               QP_CONTEXT_QPC_BYTES_148_CHECK_FLAG_S, 0);
                roce_set_field(context->qpc_bytes_148,
                               QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_M,
-                              QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_S, 0);
+                              QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_S,
+                              attr->retry_cnt);
                roce_set_field(context->qpc_bytes_148,
                               QP_CONTEXT_QPC_BYTES_148_RNR_RETRY_COUNT_M,
-                              QP_CONTEXT_QPC_BYTES_148_RNR_RETRY_COUNT_S, 0);
+                              QP_CONTEXT_QPC_BYTES_148_RNR_RETRY_COUNT_S,
+                              attr->rnr_retry);
                roce_set_field(context->qpc_bytes_148,
                               QP_CONTEXT_QPC_BYTES_148_LSN_M,
                               QP_CONTEXT_QPC_BYTES_148_LSN_S, 0x100);
@@ -2281,10 +2409,19 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
                               QP_CONTEXT_QPC_BYTES_156_RETRY_COUNT_INIT_M,
                               QP_CONTEXT_QPC_BYTES_156_RETRY_COUNT_INIT_S,
                               attr->retry_cnt);
-               roce_set_field(context->qpc_bytes_156,
-                              QP_CONTEXT_QPC_BYTES_156_ACK_TIMEOUT_M,
-                              QP_CONTEXT_QPC_BYTES_156_ACK_TIMEOUT_S,
-                              attr->timeout);
+               if (attr->timeout < 0x12) {
+                       dev_info(dev, "ack timeout value(0x%x) must bigger than 0x12.\n",
+                                attr->timeout);
+                       roce_set_field(context->qpc_bytes_156,
+                                      QP_CONTEXT_QPC_BYTES_156_ACK_TIMEOUT_M,
+                                      QP_CONTEXT_QPC_BYTES_156_ACK_TIMEOUT_S,
+                                      0x12);
+               } else {
+                       roce_set_field(context->qpc_bytes_156,
+                                      QP_CONTEXT_QPC_BYTES_156_ACK_TIMEOUT_M,
+                                      QP_CONTEXT_QPC_BYTES_156_ACK_TIMEOUT_S,
+                                      attr->timeout);
+               }
                roce_set_field(context->qpc_bytes_156,
                               QP_CONTEXT_QPC_BYTES_156_RNR_RETRY_COUNT_INIT_M,
                               QP_CONTEXT_QPC_BYTES_156_RNR_RETRY_COUNT_INIT_S,
@@ -2292,7 +2429,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
                roce_set_field(context->qpc_bytes_156,
                               QP_CONTEXT_QPC_BYTES_156_PORT_NUM_M,
                               QP_CONTEXT_QPC_BYTES_156_PORT_NUM_S,
-                              hr_qp->port);
+                              hr_qp->phy_port);
                roce_set_field(context->qpc_bytes_156,
                               QP_CONTEXT_QPC_BYTES_156_SL_M,
                               QP_CONTEXT_QPC_BYTES_156_SL_S, attr->ah_attr.sl);
@@ -2357,21 +2494,15 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
                               QP_CONTEXT_QPC_BYTES_188_TX_RETRY_CUR_INDEX_M,
                               QP_CONTEXT_QPC_BYTES_188_TX_RETRY_CUR_INDEX_S,
                               0);
-       } else if ((cur_state == IB_QPS_INIT && new_state == IB_QPS_RESET) ||
+       } else if (!((cur_state == IB_QPS_INIT && new_state == IB_QPS_RESET) ||
                   (cur_state == IB_QPS_INIT && new_state == IB_QPS_ERR) ||
                   (cur_state == IB_QPS_RTR && new_state == IB_QPS_RESET) ||
                   (cur_state == IB_QPS_RTR && new_state == IB_QPS_ERR) ||
                   (cur_state == IB_QPS_RTS && new_state == IB_QPS_RESET) ||
                   (cur_state == IB_QPS_RTS && new_state == IB_QPS_ERR) ||
                   (cur_state == IB_QPS_ERR && new_state == IB_QPS_RESET) ||
-                  (cur_state == IB_QPS_ERR && new_state == IB_QPS_ERR)) {
-               roce_set_field(context->qpc_bytes_144,
-                              QP_CONTEXT_QPC_BYTES_144_QP_STATE_M,
-                              QP_CONTEXT_QPC_BYTES_144_QP_STATE_S,
-                              attr->qp_state);
-
-       } else {
-               dev_err(dev, "not support this modify\n");
+                  (cur_state == IB_QPS_ERR && new_state == IB_QPS_ERR))) {
+               dev_err(dev, "not support this status migration\n");
                goto out;
        }
 
@@ -2397,43 +2528,32 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
        if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) {
                /* Memory barrier */
                wmb();
-               if (hr_qp->ibqp.qp_type == IB_QPT_GSI) {
-                       /* SW update GSI rq header */
-                       reg_val = roce_read(hr_dev, ROCEE_QP1C_CFG3_0_REG +
-                                           QP1C_CFGN_OFFSET * hr_qp->port);
-                       roce_set_field(reg_val,
-                                      ROCEE_QP1C_CFG3_0_ROCEE_QP1C_RQ_HEAD_M,
-                                      ROCEE_QP1C_CFG3_0_ROCEE_QP1C_RQ_HEAD_S,
-                                      hr_qp->rq.head);
-                       roce_write(hr_dev, ROCEE_QP1C_CFG3_0_REG +
-                                   QP1C_CFGN_OFFSET * hr_qp->port, reg_val);
-               } else {
-                       rq_db.u32_4 = 0;
-                       rq_db.u32_8 = 0;
-
-                       roce_set_field(rq_db.u32_4, RQ_DOORBELL_U32_4_RQ_HEAD_M,
-                                      RQ_DOORBELL_U32_4_RQ_HEAD_S,
-                                      hr_qp->rq.head);
-                       roce_set_field(rq_db.u32_8, RQ_DOORBELL_U32_8_QPN_M,
-                                      RQ_DOORBELL_U32_8_QPN_S, hr_qp->qpn);
-                       roce_set_field(rq_db.u32_8, RQ_DOORBELL_U32_8_CMD_M,
-                                      RQ_DOORBELL_U32_8_CMD_S, 1);
-                       roce_set_bit(rq_db.u32_8, RQ_DOORBELL_U32_8_HW_SYNC_S,
-                                    1);
 
-                       doorbell[0] = rq_db.u32_4;
-                       doorbell[1] = rq_db.u32_8;
-
-                       hns_roce_write64_k(doorbell, hr_qp->rq.db_reg_l);
+               roce_set_field(doorbell[0], RQ_DOORBELL_U32_4_RQ_HEAD_M,
+                              RQ_DOORBELL_U32_4_RQ_HEAD_S, hr_qp->rq.head);
+               roce_set_field(doorbell[1], RQ_DOORBELL_U32_8_QPN_M,
+                              RQ_DOORBELL_U32_8_QPN_S, hr_qp->qpn);
+               roce_set_field(doorbell[1], RQ_DOORBELL_U32_8_CMD_M,
+                              RQ_DOORBELL_U32_8_CMD_S, 1);
+               roce_set_bit(doorbell[1], RQ_DOORBELL_U32_8_HW_SYNC_S, 1);
+
+               if (ibqp->uobject) {
+                       hr_qp->rq.db_reg_l = hr_dev->reg_base +
+                                    ROCEE_DB_OTHERS_L_0_REG +
+                                    DB_REG_OFFSET * hr_dev->priv_uar.index;
                }
+
+               hns_roce_write64_k(doorbell, hr_qp->rq.db_reg_l);
        }
 
        hr_qp->state = new_state;
 
        if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
                hr_qp->resp_depth = attr->max_dest_rd_atomic;
-       if (attr_mask & IB_QP_PORT)
-               hr_qp->port = (attr->port_num - 1);
+       if (attr_mask & IB_QP_PORT) {
+               hr_qp->port = attr->port_num - 1;
+               hr_qp->phy_port = hr_dev->iboe.phy_port[hr_qp->port];
+       }
 
        if (new_state == IB_QPS_RESET && !ibqp->uobject) {
                hns_roce_v1_cq_clean(to_hr_cq(ibqp->recv_cq), hr_qp->qpn,
@@ -2789,6 +2909,7 @@ struct hns_roce_hw hns_roce_hw_v1 = {
        .set_mtu = hns_roce_v1_set_mtu,
        .write_mtpt = hns_roce_v1_write_mtpt,
        .write_cqc = hns_roce_v1_write_cqc,
+       .clear_hem = hns_roce_v1_clear_hem,
        .modify_qp = hns_roce_v1_modify_qp,
        .query_qp = hns_roce_v1_query_qp,
        .destroy_qp = hns_roce_v1_destroy_qp,
index 316b592b1636df29be7939c4d7ca11abbdfce125..539b0a3b92b09a17dd663497c62b2607a8e94878 100644 (file)
 #define HNS_ROCE_V1_EXT_ODB_ALFUL      \
        (HNS_ROCE_V1_EXT_ODB_DEPTH - HNS_ROCE_V1_DB_RSVD)
 
+#define HNS_ROCE_BT_RSV_BUF_SIZE                       (1 << 17)
+
 #define HNS_ROCE_ODB_POLL_MODE                         0
 
 #define HNS_ROCE_SDB_NORMAL_MODE                       0
@@ -971,9 +973,16 @@ struct hns_roce_db_table {
        struct hns_roce_ext_db *ext_db;
 };
 
+struct hns_roce_bt_table {
+       struct hns_roce_buf_list qpc_buf;
+       struct hns_roce_buf_list mtpt_buf;
+       struct hns_roce_buf_list cqc_buf;
+};
+
 struct hns_roce_v1_priv {
        struct hns_roce_db_table  db_table;
        struct hns_roce_raq_table raq_table;
+       struct hns_roce_bt_table  bt_table;
 };
 
 int hns_dsaf_roce_reset(struct fwnode_handle *dsaf_fwnode, bool dereset);
index f64f0dde9a882c7ee697d990e4bf7acae6765610..764e35a54457e7c0c8bfde46577df1e3962d833c 100644 (file)
@@ -355,8 +355,7 @@ static int hns_roce_query_device(struct ib_device *ib_dev,
        props->max_qp = hr_dev->caps.num_qps;
        props->max_qp_wr = hr_dev->caps.max_wqes;
        props->device_cap_flags = IB_DEVICE_PORT_ACTIVE_EVENT |
-                                 IB_DEVICE_RC_RNR_NAK_GEN |
-                                 IB_DEVICE_LOCAL_DMA_LKEY;
+                                 IB_DEVICE_RC_RNR_NAK_GEN;
        props->max_sge = hr_dev->caps.max_sq_sg;
        props->max_sge_rd = 1;
        props->max_cq = hr_dev->caps.num_cqs;
@@ -372,6 +371,25 @@ static int hns_roce_query_device(struct ib_device *ib_dev,
        return 0;
 }
 
+static struct net_device *hns_roce_get_netdev(struct ib_device *ib_dev,
+                                             u8 port_num)
+{
+       struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev);
+       struct net_device *ndev;
+
+       if (port_num < 1 || port_num > hr_dev->caps.num_ports)
+               return NULL;
+
+       rcu_read_lock();
+
+       ndev = hr_dev->iboe.netdevs[port_num - 1];
+       if (ndev)
+               dev_hold(ndev);
+
+       rcu_read_unlock();
+       return ndev;
+}
+
 static int hns_roce_query_port(struct ib_device *ib_dev, u8 port_num,
                               struct ib_port_attr *props)
 {
@@ -584,6 +602,7 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
        struct device *dev = &hr_dev->pdev->dev;
 
        iboe = &hr_dev->iboe;
+       spin_lock_init(&iboe->lock);
 
        ib_dev = &hr_dev->ib_dev;
        strlcpy(ib_dev->name, "hisi_%d", IB_DEVICE_NAME_MAX);
@@ -618,6 +637,7 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
        ib_dev->query_port              = hns_roce_query_port;
        ib_dev->modify_port             = hns_roce_modify_port;
        ib_dev->get_link_layer          = hns_roce_get_link_layer;
+       ib_dev->get_netdev              = hns_roce_get_netdev;
        ib_dev->query_gid               = hns_roce_query_gid;
        ib_dev->query_pkey              = hns_roce_query_pkey;
        ib_dev->alloc_ucontext          = hns_roce_alloc_ucontext;
@@ -667,8 +687,6 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
                goto error_failed_setup_mtu_gids;
        }
 
-       spin_lock_init(&iboe->lock);
-
        iboe->nb.notifier_call = hns_roce_netdev_event;
        ret = register_netdevice_notifier(&iboe->nb);
        if (ret) {
@@ -777,6 +795,15 @@ static int hns_roce_get_cfg(struct hns_roce_dev *hr_dev)
        if (IS_ERR(hr_dev->reg_base))
                return PTR_ERR(hr_dev->reg_base);
 
+       /* read the node_guid of IB device from the DT or ACPI */
+       ret = device_property_read_u8_array(dev, "node-guid",
+                                           (u8 *)&hr_dev->ib_dev.node_guid,
+                                           GUID_LEN);
+       if (ret) {
+               dev_err(dev, "couldn't get node_guid from DT or ACPI!\n");
+               return ret;
+       }
+
        /* get the RoCE associated ethernet ports or netdevices */
        for (i = 0; i < HNS_ROCE_MAX_PORTS; i++) {
                if (dev_of_node(dev)) {
@@ -923,7 +950,6 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev)
        struct device *dev = &hr_dev->pdev->dev;
 
        spin_lock_init(&hr_dev->sm_lock);
-       spin_lock_init(&hr_dev->cq_db_lock);
        spin_lock_init(&hr_dev->bt_cmd_lock);
 
        ret = hns_roce_init_uar_table(hr_dev);
index 59f5e2be046b326c7a3f0a8a2355f2ac33df4ab5..fb87883ead340423e087e14331487f735ec6e55f 100644 (file)
@@ -564,11 +564,14 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
        if (mr->umem->page_size != HNS_ROCE_HEM_PAGE_SIZE) {
                dev_err(dev, "Just support 4K page size but is 0x%x now!\n",
                        mr->umem->page_size);
+               ret = -EINVAL;
+               goto err_umem;
        }
 
        if (n > HNS_ROCE_MAX_MTPT_PBL_NUM) {
                dev_err(dev, " MR len %lld err. MR is limited to 4G at most!\n",
                        length);
+               ret = -EINVAL;
                goto err_umem;
        }
 
index 16271b5bd1701deceaab511e9ad7d8813d555a6d..05db7d59812a6db5eb014a87e0d3566122442719 100644 (file)
 
 static int hns_roce_pd_alloc(struct hns_roce_dev *hr_dev, unsigned long *pdn)
 {
-       struct device *dev = &hr_dev->pdev->dev;
-       unsigned long pd_number;
-       int ret = 0;
-
-       ret = hns_roce_bitmap_alloc(&hr_dev->pd_bitmap, &pd_number);
-       if (ret == -1) {
-               dev_err(dev, "alloc pdn from pdbitmap failed\n");
-               return -ENOMEM;
-       }
-
-       *pdn = pd_number;
-
-       return 0;
+       return hns_roce_bitmap_alloc(&hr_dev->pd_bitmap, pdn);
 }
 
 static void hns_roce_pd_free(struct hns_roce_dev *hr_dev, unsigned long pdn)
@@ -117,9 +105,15 @@ int hns_roce_uar_alloc(struct hns_roce_dev *hr_dev, struct hns_roce_uar *uar)
        if (ret == -1)
                return -ENOMEM;
 
-       uar->index = (uar->index - 1) % hr_dev->caps.phy_num_uars + 1;
+       if (uar->index > 0)
+               uar->index = (uar->index - 1) %
+                            (hr_dev->caps.phy_num_uars - 1) + 1;
 
        res = platform_get_resource(hr_dev->pdev, IORESOURCE_MEM, 0);
+       if (!res) {
+               dev_err(&hr_dev->pdev->dev, "memory resource not found!\n");
+               return -EINVAL;
+       }
        uar->pfn = ((res->start) >> PAGE_SHIFT) + uar->index;
 
        return 0;
index 645c18d809a5de94bfb2120caca67efda242e928..e86dd8d0677760c4aa38432405b9dd977b35c610 100644 (file)
  */
 
 #include <linux/platform_device.h>
+#include <rdma/ib_addr.h>
 #include <rdma/ib_umem.h>
 #include "hns_roce_common.h"
 #include "hns_roce_device.h"
 #include "hns_roce_hem.h"
 #include "hns_roce_user.h"
 
-#define DB_REG_OFFSET                  0x1000
-#define SQP_NUM                                12
+#define SQP_NUM                                (2 * HNS_ROCE_MAX_PORTS)
 
 void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type)
 {
@@ -113,16 +113,8 @@ static int hns_roce_reserve_range_qp(struct hns_roce_dev *hr_dev, int cnt,
                                     int align, unsigned long *base)
 {
        struct hns_roce_qp_table *qp_table = &hr_dev->qp_table;
-       int ret = 0;
-       unsigned long qpn;
-
-       ret = hns_roce_bitmap_alloc_range(&qp_table->bitmap, cnt, align, &qpn);
-       if (ret == -1)
-               return -ENOMEM;
-
-       *base = qpn;
 
-       return 0;
+       return hns_roce_bitmap_alloc_range(&qp_table->bitmap, cnt, align, base);
 }
 
 enum hns_roce_qp_state to_hns_roce_state(enum ib_qp_state state)
@@ -255,7 +247,7 @@ void hns_roce_release_range_qp(struct hns_roce_dev *hr_dev, int base_qpn,
 {
        struct hns_roce_qp_table *qp_table = &hr_dev->qp_table;
 
-       if (base_qpn < (hr_dev->caps.sqp_start + 2 * hr_dev->caps.num_ports))
+       if (base_qpn < SQP_NUM)
                return;
 
        hns_roce_bitmap_free_range(&qp_table->bitmap, base_qpn, cnt);
@@ -345,12 +337,10 @@ static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev,
 
 static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev,
                                       struct ib_qp_cap *cap,
-                                      enum ib_qp_type type,
                                       struct hns_roce_qp *hr_qp)
 {
        struct device *dev = &hr_dev->pdev->dev;
        u32 max_cnt;
-       (void)type;
 
        if (cap->max_send_wr  > hr_dev->caps.max_wqes  ||
            cap->max_send_sge > hr_dev->caps.max_sq_sg ||
@@ -476,7 +466,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
 
                /* Set SQ size */
                ret = hns_roce_set_kernel_sq_size(hr_dev, &init_attr->cap,
-                                                 init_attr->qp_type, hr_qp);
+                                                 hr_qp);
                if (ret) {
                        dev_err(dev, "hns_roce_set_kernel_sq_size error!\n");
                        goto err_out;
@@ -617,21 +607,19 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd,
                        return ERR_PTR(-ENOMEM);
 
                hr_qp = &hr_sqp->hr_qp;
+               hr_qp->port = init_attr->port_num - 1;
+               hr_qp->phy_port = hr_dev->iboe.phy_port[hr_qp->port];
+               hr_qp->ibqp.qp_num = HNS_ROCE_MAX_PORTS +
+                                    hr_dev->iboe.phy_port[hr_qp->port];
 
                ret = hns_roce_create_qp_common(hr_dev, pd, init_attr, udata,
-                                               hr_dev->caps.sqp_start +
-                                               hr_dev->caps.num_ports +
-                                               init_attr->port_num - 1, hr_qp);
+                                               hr_qp->ibqp.qp_num, hr_qp);
                if (ret) {
                        dev_err(dev, "Create GSI QP failed!\n");
                        kfree(hr_sqp);
                        return ERR_PTR(ret);
                }
 
-               hr_qp->port = (init_attr->port_num - 1);
-               hr_qp->ibqp.qp_num = hr_dev->caps.sqp_start +
-                                    hr_dev->caps.num_ports +
-                                    init_attr->port_num - 1;
                break;
        }
        default:{
@@ -670,6 +658,7 @@ int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
        struct device *dev = &hr_dev->pdev->dev;
        int ret = -EINVAL;
        int p;
+       enum ib_mtu active_mtu;
 
        mutex_lock(&hr_qp->mutex);
 
@@ -700,6 +689,19 @@ int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
                }
        }
 
+       if (attr_mask & IB_QP_PATH_MTU) {
+               p = attr_mask & IB_QP_PORT ? (attr->port_num - 1) : hr_qp->port;
+               active_mtu = iboe_get_mtu(hr_dev->iboe.netdevs[p]->mtu);
+
+               if (attr->path_mtu > IB_MTU_2048 ||
+                   attr->path_mtu < IB_MTU_256 ||
+                   attr->path_mtu > active_mtu) {
+                       dev_err(dev, "attr path_mtu(%d)invalid while modify qp",
+                               attr->path_mtu);
+                       goto out;
+               }
+       }
+
        if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
            attr->max_rd_atomic > hr_dev->caps.max_qp_init_rdma) {
                dev_err(dev, "attr max_rd_atomic invalid.attr->max_rd_atomic=%d\n",
@@ -782,29 +784,11 @@ static void *get_wqe(struct hns_roce_qp *hr_qp, int offset)
 
 void *get_recv_wqe(struct hns_roce_qp *hr_qp, int n)
 {
-       struct ib_qp *ibqp = &hr_qp->ibqp;
-       struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
-
-       if ((n < 0) || (n > hr_qp->rq.wqe_cnt)) {
-               dev_err(&hr_dev->pdev->dev, "rq wqe index:%d,rq wqe cnt:%d\r\n",
-                       n, hr_qp->rq.wqe_cnt);
-               return NULL;
-       }
-
        return get_wqe(hr_qp, hr_qp->rq.offset + (n << hr_qp->rq.wqe_shift));
 }
 
 void *get_send_wqe(struct hns_roce_qp *hr_qp, int n)
 {
-       struct ib_qp *ibqp = &hr_qp->ibqp;
-       struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
-
-       if ((n < 0) || (n > hr_qp->sq.wqe_cnt)) {
-               dev_err(&hr_dev->pdev->dev, "sq wqe index:%d,sq wqe cnt:%d\r\n",
-                       n, hr_qp->sq.wqe_cnt);
-               return NULL;
-       }
-
        return get_wqe(hr_qp, hr_qp->sq.offset + (n << hr_qp->sq.wqe_shift));
 }
 
@@ -837,8 +821,7 @@ int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev)
 
        /* A port include two SQP, six port total 12 */
        ret = hns_roce_bitmap_init(&qp_table->bitmap, hr_dev->caps.num_qps,
-                                  hr_dev->caps.num_qps - 1,
-                                  hr_dev->caps.sqp_start + SQP_NUM,
+                                  hr_dev->caps.num_qps - 1, SQP_NUM,
                                   reserved_from_top);
        if (ret) {
                dev_err(&hr_dev->pdev->dev, "qp bitmap init failed!error=%d\n",
index 5fc62336273132a80eebe1e8f3045758edd12d72..b9bf0759f10a54f37e242b8d07c54c56a063e859 100644 (file)
@@ -102,7 +102,10 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr
        if (vlan_tag < 0x1000)
                vlan_tag |= (ah_attr->sl & 7) << 13;
        ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24));
-       ah->av.eth.gid_index = mlx4_ib_gid_index_to_real_index(ibdev, ah_attr->port_num, ah_attr->grh.sgid_index);
+       ret = mlx4_ib_gid_index_to_real_index(ibdev, ah_attr->port_num, ah_attr->grh.sgid_index);
+       if (ret < 0)
+               return ERR_PTR(ret);
+       ah->av.eth.gid_index = ret;
        ah->av.eth.vlan = cpu_to_be16(vlan_tag);
        ah->av.eth.hop_limit = ah_attr->grh.hop_limit;
        if (ah_attr->static_rate) {
index 1ea686b9e0f963cbfb49dc22fa4333f30922431e..6a0fec357daecdd1e049690af1495fc333b8c9bc 100644 (file)
@@ -253,11 +253,14 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
        if (context)
                if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof (__u32))) {
                        err = -EFAULT;
-                       goto err_dbmap;
+                       goto err_cq_free;
                }
 
        return &cq->ibcq;
 
+err_cq_free:
+       mlx4_cq_free(dev->dev, &cq->mcq);
+
 err_dbmap:
        if (context)
                mlx4_ib_db_unmap_user(to_mucontext(context), &cq->db);
index 79d017baf6f49dac160090d1e75ec75fd8b3cf2c..fcd04b881ec1924eb679827e18c0a2f8f6f3fd39 100644 (file)
@@ -932,8 +932,7 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
                if (err)
                        goto err_create;
        } else {
-               /* for now choose 64 bytes till we have a proper interface */
-               cqe_size = 64;
+               cqe_size = cache_line_size() == 128 ? 128 : 64;
                err = create_cq_kernel(dev, cq, entries, cqe_size, &cqb,
                                       &index, &inlen);
                if (err)
index 22174774dbb8c392709936b0eb225d3e6768d2c4..2be65ddf56ba8ba311cce5497258caf030bfc47a 100644 (file)
@@ -1019,7 +1019,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
        resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp);
        if (mlx5_core_is_pf(dev->mdev) && MLX5_CAP_GEN(dev->mdev, bf))
                resp.bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size);
-       resp.cache_line_size = L1_CACHE_BYTES;
+       resp.cache_line_size = cache_line_size();
        resp.max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq);
        resp.max_rq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq);
        resp.max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
@@ -1771,13 +1771,13 @@ static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)
        mutex_lock(&dev->flow_db.lock);
 
        list_for_each_entry_safe(iter, tmp, &handler->list, list) {
-               mlx5_del_flow_rule(iter->rule);
+               mlx5_del_flow_rules(iter->rule);
                put_flow_table(dev, iter->prio, true);
                list_del(&iter->list);
                kfree(iter);
        }
 
-       mlx5_del_flow_rule(handler->rule);
+       mlx5_del_flow_rules(handler->rule);
        put_flow_table(dev, handler->prio, true);
        mutex_unlock(&dev->flow_db.lock);
 
@@ -1857,7 +1857,7 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
                ft = mlx5_create_auto_grouped_flow_table(ns, priority,
                                                         num_entries,
                                                         num_groups,
-                                                        0);
+                                                        0, 0);
 
                if (!IS_ERR(ft)) {
                        prio->refcount = 0;
@@ -1877,10 +1877,10 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
 {
        struct mlx5_flow_table  *ft = ft_prio->flow_table;
        struct mlx5_ib_flow_handler *handler;
+       struct mlx5_flow_act flow_act = {0};
        struct mlx5_flow_spec *spec;
        const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr);
        unsigned int spec_index;
-       u32 action;
        int err = 0;
 
        if (!is_valid_attr(flow_attr))
@@ -1905,12 +1905,12 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
        }
 
        spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria);
-       action = dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST :
+       flow_act.action = dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST :
                MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
-       handler->rule = mlx5_add_flow_rule(ft, spec,
-                                          action,
-                                          MLX5_FS_DEFAULT_FLOW_TAG,
-                                          dst);
+       flow_act.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
+       handler->rule = mlx5_add_flow_rules(ft, spec,
+                                           &flow_act,
+                                           dst, 1);
 
        if (IS_ERR(handler->rule)) {
                err = PTR_ERR(handler->rule);
@@ -1941,7 +1941,7 @@ static struct mlx5_ib_flow_handler *create_dont_trap_rule(struct mlx5_ib_dev *de
                handler_dst = create_flow_rule(dev, ft_prio,
                                               flow_attr, dst);
                if (IS_ERR(handler_dst)) {
-                       mlx5_del_flow_rule(handler->rule);
+                       mlx5_del_flow_rules(handler->rule);
                        ft_prio->refcount--;
                        kfree(handler);
                        handler = handler_dst;
@@ -2004,7 +2004,7 @@ static struct mlx5_ib_flow_handler *create_leftovers_rule(struct mlx5_ib_dev *de
                                                 &leftovers_specs[LEFTOVERS_UC].flow_attr,
                                                 dst);
                if (IS_ERR(handler_ucast)) {
-                       mlx5_del_flow_rule(handler->rule);
+                       mlx5_del_flow_rules(handler->rule);
                        ft_prio->refcount--;
                        kfree(handler);
                        handler = handler_ucast;
@@ -2046,7 +2046,7 @@ static struct mlx5_ib_flow_handler *create_sniffer_rule(struct mlx5_ib_dev *dev,
        return handler_rx;
 
 err_tx:
-       mlx5_del_flow_rule(handler_rx->rule);
+       mlx5_del_flow_rules(handler_rx->rule);
        ft_rx->refcount--;
        kfree(handler_rx);
 err:
@@ -2311,14 +2311,14 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
 {
        struct mlx5_ib_dev *ibdev = (struct mlx5_ib_dev *)context;
        struct ib_event ibev;
-
+       bool fatal = false;
        u8 port = 0;
 
        switch (event) {
        case MLX5_DEV_EVENT_SYS_ERROR:
-               ibdev->ib_active = false;
                ibev.event = IB_EVENT_DEVICE_FATAL;
                mlx5_ib_handle_internal_error(ibdev);
+               fatal = true;
                break;
 
        case MLX5_DEV_EVENT_PORT_UP:
@@ -2358,6 +2358,8 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
                ibev.event = IB_EVENT_CLIENT_REREGISTER;
                port = (u8)param;
                break;
+       default:
+               return;
        }
 
        ibev.device           = &ibdev->ib_dev;
@@ -2370,6 +2372,9 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
 
        if (ibdev->ib_active)
                ib_dispatch_event(&ibev);
+
+       if (fatal)
+               ibdev->ib_active = false;
 }
 
 static void get_ext_port_caps(struct mlx5_ib_dev *dev)
@@ -3115,7 +3120,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
        }
        err = init_node_data(dev);
        if (err)
-               goto err_dealloc;
+               goto err_free_port;
 
        mutex_init(&dev->flow_db.lock);
        mutex_init(&dev->cap_mask_mutex);
@@ -3125,7 +3130,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
        if (ll == IB_LINK_LAYER_ETHERNET) {
                err = mlx5_enable_roce(dev);
                if (err)
-                       goto err_dealloc;
+                       goto err_free_port;
        }
 
        err = create_dev_resources(&dev->devr);
index dcdcd195fe53a4dd003b0d22426d146bb81c0380..854748b6121204bf26eeebe920c6195c35b2dd04 100644 (file)
@@ -153,7 +153,7 @@ struct mlx5_ib_flow_handler {
        struct list_head                list;
        struct ib_flow                  ibflow;
        struct mlx5_ib_flow_prio        *prio;
-       struct mlx5_flow_rule   *rule;
+       struct mlx5_flow_handle         *rule;
 };
 
 struct mlx5_ib_flow_db {
@@ -626,6 +626,8 @@ struct mlx5_ib_dev {
        struct mlx5_ib_resources        devr;
        struct mlx5_mr_cache            cache;
        struct timer_list               delay_timer;
+       /* Prevents soft lock on massive reg MRs */
+       struct mutex                    slow_path_mutex;
        int                             fill_delay;
 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
        struct ib_odp_caps      odp_caps;
index d4ad672b905bf0068b59b2ebffe8de8a85f035e1..4e9012463c37de6381cddde1c527e4c2ff6e84ae 100644 (file)
@@ -610,6 +610,7 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
        int err;
        int i;
 
+       mutex_init(&dev->slow_path_mutex);
        cache->wq = alloc_ordered_workqueue("mkey_cache", WQ_MEM_RECLAIM);
        if (!cache->wq) {
                mlx5_ib_warn(dev, "failed to create work queue\n");
@@ -1182,9 +1183,12 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
                goto error;
        }
 
-       if (!mr)
+       if (!mr) {
+               mutex_lock(&dev->slow_path_mutex);
                mr = reg_create(NULL, pd, virt_addr, length, umem, ncont,
                                page_shift, access_flags);
+               mutex_unlock(&dev->slow_path_mutex);
+       }
 
        if (IS_ERR(mr)) {
                err = PTR_ERR(mr);
index 41f4c2afbcdd6264a05c38d9c0cd2ce7d807bccc..d1e921816bfee3596c961e6671d87a84a3caa0ab 100644 (file)
@@ -52,7 +52,6 @@ enum {
 
 enum {
        MLX5_IB_SQ_STRIDE       = 6,
-       MLX5_IB_CACHE_LINE_SIZE = 64,
 };
 
 static const u32 mlx5_ib_opcode[] = {
@@ -2052,8 +2051,8 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
 
                mlx5_ib_dbg(dev, "ib qpnum 0x%x, mlx qpn 0x%x, rcqn 0x%x, scqn 0x%x\n",
                            qp->ibqp.qp_num, qp->trans_qp.base.mqp.qpn,
-                           to_mcq(init_attr->recv_cq)->mcq.cqn,
-                           to_mcq(init_attr->send_cq)->mcq.cqn);
+                           init_attr->recv_cq ? to_mcq(init_attr->recv_cq)->mcq.cqn : -1,
+                           init_attr->send_cq ? to_mcq(init_attr->send_cq)->mcq.cqn : -1);
 
                qp->trans_qp.xrcdn = xrcdn;
 
@@ -4815,6 +4814,14 @@ struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device,
                                 udata->inlen))
                return ERR_PTR(-EOPNOTSUPP);
 
+       if (init_attr->log_ind_tbl_size >
+           MLX5_CAP_GEN(dev->mdev, log_max_rqt_size)) {
+               mlx5_ib_dbg(dev, "log_ind_tbl_size = %d is bigger than supported = %d\n",
+                           init_attr->log_ind_tbl_size,
+                           MLX5_CAP_GEN(dev->mdev, log_max_rqt_size));
+               return ERR_PTR(-EINVAL);
+       }
+
        min_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved);
        if (udata->outlen && udata->outlen < min_resp_len)
                return ERR_PTR(-EINVAL);
index 6c00d04b8b2852369f4f43d78c39c68898ff4640..c6fe89d79248fa28d05282440ede6f67cd35e75c 100644 (file)
@@ -472,7 +472,7 @@ int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
                goto out;
        }
 
-       ret = get_user_pages(uaddr & PAGE_MASK, 1, 1, 0, pages, NULL);
+       ret = get_user_pages(uaddr & PAGE_MASK, 1, FOLL_WRITE, pages, NULL);
        if (ret < 0)
                goto out;
 
diff --git a/drivers/infiniband/hw/qedr/Kconfig b/drivers/infiniband/hw/qedr/Kconfig
new file mode 100644 (file)
index 0000000..6c9f392
--- /dev/null
@@ -0,0 +1,8 @@
+config INFINIBAND_QEDR
+       tristate "QLogic RoCE driver"
+       depends on 64BIT && QEDE
+       select QED_LL2
+       select QED_RDMA
+       ---help---
+         This driver provides low-level InfiniBand over Ethernet
+         support for QLogic QED host channel adapters (HCAs).
diff --git a/drivers/infiniband/hw/qedr/Makefile b/drivers/infiniband/hw/qedr/Makefile
new file mode 100644 (file)
index 0000000..ba7067c
--- /dev/null
@@ -0,0 +1,3 @@
+obj-$(CONFIG_INFINIBAND_QEDR) := qedr.o
+
+qedr-y := main.o verbs.o qedr_cm.o
diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c
new file mode 100644 (file)
index 0000000..7b74d09
--- /dev/null
@@ -0,0 +1,914 @@
+/* QLogic qedr NIC Driver
+ * Copyright (c) 2015-2016  QLogic Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and /or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/module.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_addr.h>
+#include <rdma/ib_user_verbs.h>
+#include <linux/netdevice.h>
+#include <linux/iommu.h>
+#include <net/addrconf.h>
+#include <linux/qed/qede_roce.h>
+#include <linux/qed/qed_chain.h>
+#include <linux/qed/qed_if.h>
+#include "qedr.h"
+#include "verbs.h"
+#include <rdma/qedr-abi.h>
+
+MODULE_DESCRIPTION("QLogic 40G/100G ROCE Driver");
+MODULE_AUTHOR("QLogic Corporation");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION(QEDR_MODULE_VERSION);
+
+#define QEDR_WQ_MULTIPLIER_DFT (3)
+
+void qedr_ib_dispatch_event(struct qedr_dev *dev, u8 port_num,
+                           enum ib_event_type type)
+{
+       struct ib_event ibev;
+
+       ibev.device = &dev->ibdev;
+       ibev.element.port_num = port_num;
+       ibev.event = type;
+
+       ib_dispatch_event(&ibev);
+}
+
+static enum rdma_link_layer qedr_link_layer(struct ib_device *device,
+                                           u8 port_num)
+{
+       return IB_LINK_LAYER_ETHERNET;
+}
+
+static void qedr_get_dev_fw_str(struct ib_device *ibdev, char *str,
+                               size_t str_len)
+{
+       struct qedr_dev *qedr = get_qedr_dev(ibdev);
+       u32 fw_ver = (u32)qedr->attr.fw_ver;
+
+       snprintf(str, str_len, "%d. %d. %d. %d",
+                (fw_ver >> 24) & 0xFF, (fw_ver >> 16) & 0xFF,
+                (fw_ver >> 8) & 0xFF, fw_ver & 0xFF);
+}
+
+static struct net_device *qedr_get_netdev(struct ib_device *dev, u8 port_num)
+{
+       struct qedr_dev *qdev;
+
+       qdev = get_qedr_dev(dev);
+       dev_hold(qdev->ndev);
+
+       /* The HW vendor's device driver must guarantee
+        * that this function returns NULL before the net device reaches
+        * NETDEV_UNREGISTER_FINAL state.
+        */
+       return qdev->ndev;
+}
+
+static int qedr_register_device(struct qedr_dev *dev)
+{
+       strlcpy(dev->ibdev.name, "qedr%d", IB_DEVICE_NAME_MAX);
+
+       dev->ibdev.node_guid = dev->attr.node_guid;
+       memcpy(dev->ibdev.node_desc, QEDR_NODE_DESC, sizeof(QEDR_NODE_DESC));
+       dev->ibdev.owner = THIS_MODULE;
+       dev->ibdev.uverbs_abi_ver = QEDR_ABI_VERSION;
+
+       dev->ibdev.uverbs_cmd_mask = QEDR_UVERBS(GET_CONTEXT) |
+                                    QEDR_UVERBS(QUERY_DEVICE) |
+                                    QEDR_UVERBS(QUERY_PORT) |
+                                    QEDR_UVERBS(ALLOC_PD) |
+                                    QEDR_UVERBS(DEALLOC_PD) |
+                                    QEDR_UVERBS(CREATE_COMP_CHANNEL) |
+                                    QEDR_UVERBS(CREATE_CQ) |
+                                    QEDR_UVERBS(RESIZE_CQ) |
+                                    QEDR_UVERBS(DESTROY_CQ) |
+                                    QEDR_UVERBS(REQ_NOTIFY_CQ) |
+                                    QEDR_UVERBS(CREATE_QP) |
+                                    QEDR_UVERBS(MODIFY_QP) |
+                                    QEDR_UVERBS(QUERY_QP) |
+                                    QEDR_UVERBS(DESTROY_QP) |
+                                    QEDR_UVERBS(REG_MR) |
+                                    QEDR_UVERBS(DEREG_MR) |
+                                    QEDR_UVERBS(POLL_CQ) |
+                                    QEDR_UVERBS(POST_SEND) |
+                                    QEDR_UVERBS(POST_RECV);
+
+       dev->ibdev.phys_port_cnt = 1;
+       dev->ibdev.num_comp_vectors = dev->num_cnq;
+       dev->ibdev.node_type = RDMA_NODE_IB_CA;
+
+       dev->ibdev.query_device = qedr_query_device;
+       dev->ibdev.query_port = qedr_query_port;
+       dev->ibdev.modify_port = qedr_modify_port;
+
+       dev->ibdev.query_gid = qedr_query_gid;
+       dev->ibdev.add_gid = qedr_add_gid;
+       dev->ibdev.del_gid = qedr_del_gid;
+
+       dev->ibdev.alloc_ucontext = qedr_alloc_ucontext;
+       dev->ibdev.dealloc_ucontext = qedr_dealloc_ucontext;
+       dev->ibdev.mmap = qedr_mmap;
+
+       dev->ibdev.alloc_pd = qedr_alloc_pd;
+       dev->ibdev.dealloc_pd = qedr_dealloc_pd;
+
+       dev->ibdev.create_cq = qedr_create_cq;
+       dev->ibdev.destroy_cq = qedr_destroy_cq;
+       dev->ibdev.resize_cq = qedr_resize_cq;
+       dev->ibdev.req_notify_cq = qedr_arm_cq;
+
+       dev->ibdev.create_qp = qedr_create_qp;
+       dev->ibdev.modify_qp = qedr_modify_qp;
+       dev->ibdev.query_qp = qedr_query_qp;
+       dev->ibdev.destroy_qp = qedr_destroy_qp;
+
+       dev->ibdev.query_pkey = qedr_query_pkey;
+
+       dev->ibdev.create_ah = qedr_create_ah;
+       dev->ibdev.destroy_ah = qedr_destroy_ah;
+
+       dev->ibdev.get_dma_mr = qedr_get_dma_mr;
+       dev->ibdev.dereg_mr = qedr_dereg_mr;
+       dev->ibdev.reg_user_mr = qedr_reg_user_mr;
+       dev->ibdev.alloc_mr = qedr_alloc_mr;
+       dev->ibdev.map_mr_sg = qedr_map_mr_sg;
+
+       dev->ibdev.poll_cq = qedr_poll_cq;
+       dev->ibdev.post_send = qedr_post_send;
+       dev->ibdev.post_recv = qedr_post_recv;
+
+       dev->ibdev.process_mad = qedr_process_mad;
+       dev->ibdev.get_port_immutable = qedr_port_immutable;
+       dev->ibdev.get_netdev = qedr_get_netdev;
+
+       dev->ibdev.dma_device = &dev->pdev->dev;
+
+       dev->ibdev.get_link_layer = qedr_link_layer;
+       dev->ibdev.get_dev_fw_str = qedr_get_dev_fw_str;
+
+       return ib_register_device(&dev->ibdev, NULL);
+}
+
+/* This function allocates fast-path status block memory */
+static int qedr_alloc_mem_sb(struct qedr_dev *dev,
+                            struct qed_sb_info *sb_info, u16 sb_id)
+{
+       struct status_block *sb_virt;
+       dma_addr_t sb_phys;
+       int rc;
+
+       sb_virt = dma_alloc_coherent(&dev->pdev->dev,
+                                    sizeof(*sb_virt), &sb_phys, GFP_KERNEL);
+       if (!sb_virt)
+               return -ENOMEM;
+
+       rc = dev->ops->common->sb_init(dev->cdev, sb_info,
+                                      sb_virt, sb_phys, sb_id,
+                                      QED_SB_TYPE_CNQ);
+       if (rc) {
+               pr_err("Status block initialization failed\n");
+               dma_free_coherent(&dev->pdev->dev, sizeof(*sb_virt),
+                                 sb_virt, sb_phys);
+               return rc;
+       }
+
+       return 0;
+}
+
+static void qedr_free_mem_sb(struct qedr_dev *dev,
+                            struct qed_sb_info *sb_info, int sb_id)
+{
+       if (sb_info->sb_virt) {
+               dev->ops->common->sb_release(dev->cdev, sb_info, sb_id);
+               dma_free_coherent(&dev->pdev->dev, sizeof(*sb_info->sb_virt),
+                                 (void *)sb_info->sb_virt, sb_info->sb_phys);
+       }
+}
+
+static void qedr_free_resources(struct qedr_dev *dev)
+{
+       int i;
+
+       for (i = 0; i < dev->num_cnq; i++) {
+               qedr_free_mem_sb(dev, &dev->sb_array[i], dev->sb_start + i);
+               dev->ops->common->chain_free(dev->cdev, &dev->cnq_array[i].pbl);
+       }
+
+       kfree(dev->cnq_array);
+       kfree(dev->sb_array);
+       kfree(dev->sgid_tbl);
+}
+
+static int qedr_alloc_resources(struct qedr_dev *dev)
+{
+       struct qedr_cnq *cnq;
+       __le16 *cons_pi;
+       u16 n_entries;
+       int i, rc;
+
+       dev->sgid_tbl = kzalloc(sizeof(union ib_gid) *
+                               QEDR_MAX_SGID, GFP_KERNEL);
+       if (!dev->sgid_tbl)
+               return -ENOMEM;
+
+       spin_lock_init(&dev->sgid_lock);
+
+       /* Allocate Status blocks for CNQ */
+       dev->sb_array = kcalloc(dev->num_cnq, sizeof(*dev->sb_array),
+                               GFP_KERNEL);
+       if (!dev->sb_array) {
+               rc = -ENOMEM;
+               goto err1;
+       }
+
+       dev->cnq_array = kcalloc(dev->num_cnq,
+                                sizeof(*dev->cnq_array), GFP_KERNEL);
+       if (!dev->cnq_array) {
+               rc = -ENOMEM;
+               goto err2;
+       }
+
+       dev->sb_start = dev->ops->rdma_get_start_sb(dev->cdev);
+
+       /* Allocate CNQ PBLs */
+       n_entries = min_t(u32, QED_RDMA_MAX_CNQ_SIZE, QEDR_ROCE_MAX_CNQ_SIZE);
+       for (i = 0; i < dev->num_cnq; i++) {
+               cnq = &dev->cnq_array[i];
+
+               rc = qedr_alloc_mem_sb(dev, &dev->sb_array[i],
+                                      dev->sb_start + i);
+               if (rc)
+                       goto err3;
+
+               rc = dev->ops->common->chain_alloc(dev->cdev,
+                                                  QED_CHAIN_USE_TO_CONSUME,
+                                                  QED_CHAIN_MODE_PBL,
+                                                  QED_CHAIN_CNT_TYPE_U16,
+                                                  n_entries,
+                                                  sizeof(struct regpair *),
+                                                  &cnq->pbl);
+               if (rc)
+                       goto err4;
+
+               cnq->dev = dev;
+               cnq->sb = &dev->sb_array[i];
+               cons_pi = dev->sb_array[i].sb_virt->pi_array;
+               cnq->hw_cons_ptr = &cons_pi[QED_ROCE_PROTOCOL_INDEX];
+               cnq->index = i;
+               sprintf(cnq->name, "qedr%d@pci:%s", i, pci_name(dev->pdev));
+
+               DP_DEBUG(dev, QEDR_MSG_INIT, "cnq[%d].cons=%d\n",
+                        i, qed_chain_get_cons_idx(&cnq->pbl));
+       }
+
+       return 0;
+err4:
+       qedr_free_mem_sb(dev, &dev->sb_array[i], dev->sb_start + i);
+err3:
+       for (--i; i >= 0; i--) {
+               dev->ops->common->chain_free(dev->cdev, &dev->cnq_array[i].pbl);
+               qedr_free_mem_sb(dev, &dev->sb_array[i], dev->sb_start + i);
+       }
+       kfree(dev->cnq_array);
+err2:
+       kfree(dev->sb_array);
+err1:
+       kfree(dev->sgid_tbl);
+       return rc;
+}
+
+/* QEDR sysfs interface */
+static ssize_t show_rev(struct device *device, struct device_attribute *attr,
+                       char *buf)
+{
+       struct qedr_dev *dev = dev_get_drvdata(device);
+
+       return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->pdev->vendor);
+}
+
+static ssize_t show_hca_type(struct device *device,
+                            struct device_attribute *attr, char *buf)
+{
+       return scnprintf(buf, PAGE_SIZE, "%s\n", "HCA_TYPE_TO_SET");
+}
+
+static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
+static DEVICE_ATTR(hca_type, S_IRUGO, show_hca_type, NULL);
+
+static struct device_attribute *qedr_attributes[] = {
+       &dev_attr_hw_rev,
+       &dev_attr_hca_type
+};
+
+static void qedr_remove_sysfiles(struct qedr_dev *dev)
+{
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(qedr_attributes); i++)
+               device_remove_file(&dev->ibdev.dev, qedr_attributes[i]);
+}
+
+static void qedr_pci_set_atomic(struct qedr_dev *dev, struct pci_dev *pdev)
+{
+       struct pci_dev *bridge;
+       u32 val;
+
+       dev->atomic_cap = IB_ATOMIC_NONE;
+
+       bridge = pdev->bus->self;
+       if (!bridge)
+               return;
+
+       /* Check whether we are connected directly or via a switch */
+       while (bridge && bridge->bus->parent) {
+               DP_DEBUG(dev, QEDR_MSG_INIT,
+                        "Device is not connected directly to root. bridge->bus->number=%d primary=%d\n",
+                        bridge->bus->number, bridge->bus->primary);
+               /* Need to check Atomic Op Routing Supported all the way to
+                * root complex.
+                */
+               pcie_capability_read_dword(bridge, PCI_EXP_DEVCAP2, &val);
+               if (!(val & PCI_EXP_DEVCAP2_ATOMIC_ROUTE)) {
+                       pcie_capability_clear_word(pdev,
+                                                  PCI_EXP_DEVCTL2,
+                                                  PCI_EXP_DEVCTL2_ATOMIC_REQ);
+                       return;
+               }
+               bridge = bridge->bus->parent->self;
+       }
+       bridge = pdev->bus->self;
+
+       /* according to bridge capability */
+       pcie_capability_read_dword(bridge, PCI_EXP_DEVCAP2, &val);
+       if (val & PCI_EXP_DEVCAP2_ATOMIC_COMP64) {
+               pcie_capability_set_word(pdev, PCI_EXP_DEVCTL2,
+                                        PCI_EXP_DEVCTL2_ATOMIC_REQ);
+               dev->atomic_cap = IB_ATOMIC_GLOB;
+       } else {
+               pcie_capability_clear_word(pdev, PCI_EXP_DEVCTL2,
+                                          PCI_EXP_DEVCTL2_ATOMIC_REQ);
+       }
+}
+
+static const struct qed_rdma_ops *qed_ops;
+
+#define HILO_U64(hi, lo)               ((((u64)(hi)) << 32) + (lo))
+
+static irqreturn_t qedr_irq_handler(int irq, void *handle)
+{
+       u16 hw_comp_cons, sw_comp_cons;
+       struct qedr_cnq *cnq = handle;
+       struct regpair *cq_handle;
+       struct qedr_cq *cq;
+
+       qed_sb_ack(cnq->sb, IGU_INT_DISABLE, 0);
+
+       qed_sb_update_sb_idx(cnq->sb);
+
+       hw_comp_cons = le16_to_cpu(*cnq->hw_cons_ptr);
+       sw_comp_cons = qed_chain_get_cons_idx(&cnq->pbl);
+
+       /* Align protocol-index and chain reads */
+       rmb();
+
+       while (sw_comp_cons != hw_comp_cons) {
+               cq_handle = (struct regpair *)qed_chain_consume(&cnq->pbl);
+               cq = (struct qedr_cq *)(uintptr_t)HILO_U64(cq_handle->hi,
+                               cq_handle->lo);
+
+               if (cq == NULL) {
+                       DP_ERR(cnq->dev,
+                              "Received NULL CQ cq_handle->hi=%d cq_handle->lo=%d sw_comp_cons=%d hw_comp_cons=%d\n",
+                              cq_handle->hi, cq_handle->lo, sw_comp_cons,
+                              hw_comp_cons);
+
+                       break;
+               }
+
+               if (cq->sig != QEDR_CQ_MAGIC_NUMBER) {
+                       DP_ERR(cnq->dev,
+                              "Problem with cq signature, cq_handle->hi=%d ch_handle->lo=%d cq=%p\n",
+                              cq_handle->hi, cq_handle->lo, cq);
+                       break;
+               }
+
+               cq->arm_flags = 0;
+
+               if (cq->ibcq.comp_handler)
+                       (*cq->ibcq.comp_handler)
+                               (&cq->ibcq, cq->ibcq.cq_context);
+
+               sw_comp_cons = qed_chain_get_cons_idx(&cnq->pbl);
+
+               cnq->n_comp++;
+
+       }
+
+       qed_ops->rdma_cnq_prod_update(cnq->dev->rdma_ctx, cnq->index,
+                                     sw_comp_cons);
+
+       qed_sb_ack(cnq->sb, IGU_INT_ENABLE, 1);
+
+       return IRQ_HANDLED;
+}
+
+static void qedr_sync_free_irqs(struct qedr_dev *dev)
+{
+       u32 vector;
+       int i;
+
+       for (i = 0; i < dev->int_info.used_cnt; i++) {
+               if (dev->int_info.msix_cnt) {
+                       vector = dev->int_info.msix[i * dev->num_hwfns].vector;
+                       synchronize_irq(vector);
+                       free_irq(vector, &dev->cnq_array[i]);
+               }
+       }
+
+       dev->int_info.used_cnt = 0;
+}
+
+static int qedr_req_msix_irqs(struct qedr_dev *dev)
+{
+       int i, rc = 0;
+
+       if (dev->num_cnq > dev->int_info.msix_cnt) {
+               DP_ERR(dev,
+                      "Interrupt mismatch: %d CNQ queues > %d MSI-x vectors\n",
+                      dev->num_cnq, dev->int_info.msix_cnt);
+               return -EINVAL;
+       }
+
+       for (i = 0; i < dev->num_cnq; i++) {
+               rc = request_irq(dev->int_info.msix[i * dev->num_hwfns].vector,
+                                qedr_irq_handler, 0, dev->cnq_array[i].name,
+                                &dev->cnq_array[i]);
+               if (rc) {
+                       DP_ERR(dev, "Request cnq %d irq failed\n", i);
+                       qedr_sync_free_irqs(dev);
+               } else {
+                       DP_DEBUG(dev, QEDR_MSG_INIT,
+                                "Requested cnq irq for %s [entry %d]. Cookie is at %p\n",
+                                dev->cnq_array[i].name, i,
+                                &dev->cnq_array[i]);
+                       dev->int_info.used_cnt++;
+               }
+       }
+
+       return rc;
+}
+
+static int qedr_setup_irqs(struct qedr_dev *dev)
+{
+       int rc;
+
+       DP_DEBUG(dev, QEDR_MSG_INIT, "qedr_setup_irqs\n");
+
+       /* Learn Interrupt configuration */
+       rc = dev->ops->rdma_set_rdma_int(dev->cdev, dev->num_cnq);
+       if (rc < 0)
+               return rc;
+
+       rc = dev->ops->rdma_get_rdma_int(dev->cdev, &dev->int_info);
+       if (rc) {
+               DP_DEBUG(dev, QEDR_MSG_INIT, "get_rdma_int failed\n");
+               return rc;
+       }
+
+       if (dev->int_info.msix_cnt) {
+               DP_DEBUG(dev, QEDR_MSG_INIT, "rdma msix_cnt = %d\n",
+                        dev->int_info.msix_cnt);
+               rc = qedr_req_msix_irqs(dev);
+               if (rc)
+                       return rc;
+       }
+
+       DP_DEBUG(dev, QEDR_MSG_INIT, "qedr_setup_irqs succeeded\n");
+
+       return 0;
+}
+
+static int qedr_set_device_attr(struct qedr_dev *dev)
+{
+       struct qed_rdma_device *qed_attr;
+       struct qedr_device_attr *attr;
+       u32 page_size;
+
+       /* Part 1 - query core capabilities */
+       qed_attr = dev->ops->rdma_query_device(dev->rdma_ctx);
+
+       /* Part 2 - check capabilities */
+       page_size = ~dev->attr.page_size_caps + 1;
+       if (page_size > PAGE_SIZE) {
+               DP_ERR(dev,
+                      "Kernel PAGE_SIZE is %ld which is smaller than minimum page size (%d) required by qedr\n",
+                      PAGE_SIZE, page_size);
+               return -ENODEV;
+       }
+
+       /* Part 3 - copy and update capabilities */
+       attr = &dev->attr;
+       attr->vendor_id = qed_attr->vendor_id;
+       attr->vendor_part_id = qed_attr->vendor_part_id;
+       attr->hw_ver = qed_attr->hw_ver;
+       attr->fw_ver = qed_attr->fw_ver;
+       attr->node_guid = qed_attr->node_guid;
+       attr->sys_image_guid = qed_attr->sys_image_guid;
+       attr->max_cnq = qed_attr->max_cnq;
+       attr->max_sge = qed_attr->max_sge;
+       attr->max_inline = qed_attr->max_inline;
+       attr->max_sqe = min_t(u32, qed_attr->max_wqe, QEDR_MAX_SQE);
+       attr->max_rqe = min_t(u32, qed_attr->max_wqe, QEDR_MAX_RQE);
+       attr->max_qp_resp_rd_atomic_resc = qed_attr->max_qp_resp_rd_atomic_resc;
+       attr->max_qp_req_rd_atomic_resc = qed_attr->max_qp_req_rd_atomic_resc;
+       attr->max_dev_resp_rd_atomic_resc =
+           qed_attr->max_dev_resp_rd_atomic_resc;
+       attr->max_cq = qed_attr->max_cq;
+       attr->max_qp = qed_attr->max_qp;
+       attr->max_mr = qed_attr->max_mr;
+       attr->max_mr_size = qed_attr->max_mr_size;
+       attr->max_cqe = min_t(u64, qed_attr->max_cqe, QEDR_MAX_CQES);
+       attr->max_mw = qed_attr->max_mw;
+       attr->max_fmr = qed_attr->max_fmr;
+       attr->max_mr_mw_fmr_pbl = qed_attr->max_mr_mw_fmr_pbl;
+       attr->max_mr_mw_fmr_size = qed_attr->max_mr_mw_fmr_size;
+       attr->max_pd = qed_attr->max_pd;
+       attr->max_ah = qed_attr->max_ah;
+       attr->max_pkey = qed_attr->max_pkey;
+       attr->max_srq = qed_attr->max_srq;
+       attr->max_srq_wr = qed_attr->max_srq_wr;
+       attr->dev_caps = qed_attr->dev_caps;
+       attr->page_size_caps = qed_attr->page_size_caps;
+       attr->dev_ack_delay = qed_attr->dev_ack_delay;
+       attr->reserved_lkey = qed_attr->reserved_lkey;
+       attr->bad_pkey_counter = qed_attr->bad_pkey_counter;
+       attr->max_stats_queues = qed_attr->max_stats_queues;
+
+       return 0;
+}
+
+void qedr_unaffiliated_event(void *context,
+                            u8 event_code)
+{
+       pr_err("unaffiliated event not implemented yet\n");
+}
+
+void qedr_affiliated_event(void *context, u8 e_code, void *fw_handle)
+{
+#define EVENT_TYPE_NOT_DEFINED 0
+#define EVENT_TYPE_CQ          1
+#define EVENT_TYPE_QP          2
+       struct qedr_dev *dev = (struct qedr_dev *)context;
+       union event_ring_data *data = fw_handle;
+       u64 roce_handle64 = ((u64)data->roce_handle.hi << 32) +
+                           data->roce_handle.lo;
+       u8 event_type = EVENT_TYPE_NOT_DEFINED;
+       struct ib_event event;
+       struct ib_cq *ibcq;
+       struct ib_qp *ibqp;
+       struct qedr_cq *cq;
+       struct qedr_qp *qp;
+
+       switch (e_code) {
+       case ROCE_ASYNC_EVENT_CQ_OVERFLOW_ERR:
+               event.event = IB_EVENT_CQ_ERR;
+               event_type = EVENT_TYPE_CQ;
+               break;
+       case ROCE_ASYNC_EVENT_SQ_DRAINED:
+               event.event = IB_EVENT_SQ_DRAINED;
+               event_type = EVENT_TYPE_QP;
+               break;
+       case ROCE_ASYNC_EVENT_QP_CATASTROPHIC_ERR:
+               event.event = IB_EVENT_QP_FATAL;
+               event_type = EVENT_TYPE_QP;
+               break;
+       case ROCE_ASYNC_EVENT_LOCAL_INVALID_REQUEST_ERR:
+               event.event = IB_EVENT_QP_REQ_ERR;
+               event_type = EVENT_TYPE_QP;
+               break;
+       case ROCE_ASYNC_EVENT_LOCAL_ACCESS_ERR:
+               event.event = IB_EVENT_QP_ACCESS_ERR;
+               event_type = EVENT_TYPE_QP;
+               break;
+       default:
+               DP_ERR(dev, "unsupported event %d on handle=%llx\n", e_code,
+                      roce_handle64);
+       }
+
+       switch (event_type) {
+       case EVENT_TYPE_CQ:
+               cq = (struct qedr_cq *)(uintptr_t)roce_handle64;
+               if (cq) {
+                       ibcq = &cq->ibcq;
+                       if (ibcq->event_handler) {
+                               event.device = ibcq->device;
+                               event.element.cq = ibcq;
+                               ibcq->event_handler(&event, ibcq->cq_context);
+                       }
+               } else {
+                       WARN(1,
+                            "Error: CQ event with NULL pointer ibcq. Handle=%llx\n",
+                            roce_handle64);
+               }
+               DP_ERR(dev, "CQ event %d on hanlde %p\n", e_code, cq);
+               break;
+       case EVENT_TYPE_QP:
+               qp = (struct qedr_qp *)(uintptr_t)roce_handle64;
+               if (qp) {
+                       ibqp = &qp->ibqp;
+                       if (ibqp->event_handler) {
+                               event.device = ibqp->device;
+                               event.element.qp = ibqp;
+                               ibqp->event_handler(&event, ibqp->qp_context);
+                       }
+               } else {
+                       WARN(1,
+                            "Error: QP event with NULL pointer ibqp. Handle=%llx\n",
+                            roce_handle64);
+               }
+               DP_ERR(dev, "QP event %d on hanlde %p\n", e_code, qp);
+               break;
+       default:
+               break;
+       }
+}
+
+static int qedr_init_hw(struct qedr_dev *dev)
+{
+       struct qed_rdma_add_user_out_params out_params;
+       struct qed_rdma_start_in_params *in_params;
+       struct qed_rdma_cnq_params *cur_pbl;
+       struct qed_rdma_events events;
+       dma_addr_t p_phys_table;
+       u32 page_cnt;
+       int rc = 0;
+       int i;
+
+       in_params =  kzalloc(sizeof(*in_params), GFP_KERNEL);
+       if (!in_params) {
+               rc = -ENOMEM;
+               goto out;
+       }
+
+       in_params->desired_cnq = dev->num_cnq;
+       for (i = 0; i < dev->num_cnq; i++) {
+               cur_pbl = &in_params->cnq_pbl_list[i];
+
+               page_cnt = qed_chain_get_page_cnt(&dev->cnq_array[i].pbl);
+               cur_pbl->num_pbl_pages = page_cnt;
+
+               p_phys_table = qed_chain_get_pbl_phys(&dev->cnq_array[i].pbl);
+               cur_pbl->pbl_ptr = (u64)p_phys_table;
+       }
+
+       events.affiliated_event = qedr_affiliated_event;
+       events.unaffiliated_event = qedr_unaffiliated_event;
+       events.context = dev;
+
+       in_params->events = &events;
+       in_params->cq_mode = QED_RDMA_CQ_MODE_32_BITS;
+       in_params->max_mtu = dev->ndev->mtu;
+       ether_addr_copy(&in_params->mac_addr[0], dev->ndev->dev_addr);
+
+       rc = dev->ops->rdma_init(dev->cdev, in_params);
+       if (rc)
+               goto out;
+
+       rc = dev->ops->rdma_add_user(dev->rdma_ctx, &out_params);
+       if (rc)
+               goto out;
+
+       dev->db_addr = (void *)(uintptr_t)out_params.dpi_addr;
+       dev->db_phys_addr = out_params.dpi_phys_addr;
+       dev->db_size = out_params.dpi_size;
+       dev->dpi = out_params.dpi;
+
+       rc = qedr_set_device_attr(dev);
+out:
+       kfree(in_params);
+       if (rc)
+               DP_ERR(dev, "Init HW Failed rc = %d\n", rc);
+
+       return rc;
+}
+
+void qedr_stop_hw(struct qedr_dev *dev)
+{
+       dev->ops->rdma_remove_user(dev->rdma_ctx, dev->dpi);
+       dev->ops->rdma_stop(dev->rdma_ctx);
+}
+
+static struct qedr_dev *qedr_add(struct qed_dev *cdev, struct pci_dev *pdev,
+                                struct net_device *ndev)
+{
+       struct qed_dev_rdma_info dev_info;
+       struct qedr_dev *dev;
+       int rc = 0, i;
+
+       dev = (struct qedr_dev *)ib_alloc_device(sizeof(*dev));
+       if (!dev) {
+               pr_err("Unable to allocate ib device\n");
+               return NULL;
+       }
+
+       DP_DEBUG(dev, QEDR_MSG_INIT, "qedr add device called\n");
+
+       dev->pdev = pdev;
+       dev->ndev = ndev;
+       dev->cdev = cdev;
+
+       qed_ops = qed_get_rdma_ops();
+       if (!qed_ops) {
+               DP_ERR(dev, "Failed to get qed roce operations\n");
+               goto init_err;
+       }
+
+       dev->ops = qed_ops;
+       rc = qed_ops->fill_dev_info(cdev, &dev_info);
+       if (rc)
+               goto init_err;
+
+       dev->num_hwfns = dev_info.common.num_hwfns;
+       dev->rdma_ctx = dev->ops->rdma_get_rdma_ctx(cdev);
+
+       dev->num_cnq = dev->ops->rdma_get_min_cnq_msix(cdev);
+       if (!dev->num_cnq) {
+               DP_ERR(dev, "not enough CNQ resources.\n");
+               goto init_err;
+       }
+
+       dev->wq_multiplier = QEDR_WQ_MULTIPLIER_DFT;
+
+       qedr_pci_set_atomic(dev, pdev);
+
+       rc = qedr_alloc_resources(dev);
+       if (rc)
+               goto init_err;
+
+       rc = qedr_init_hw(dev);
+       if (rc)
+               goto alloc_err;
+
+       rc = qedr_setup_irqs(dev);
+       if (rc)
+               goto irq_err;
+
+       rc = qedr_register_device(dev);
+       if (rc) {
+               DP_ERR(dev, "Unable to allocate register device\n");
+               goto reg_err;
+       }
+
+       for (i = 0; i < ARRAY_SIZE(qedr_attributes); i++)
+               if (device_create_file(&dev->ibdev.dev, qedr_attributes[i]))
+                       goto sysfs_err;
+
+       DP_DEBUG(dev, QEDR_MSG_INIT, "qedr driver loaded successfully\n");
+       return dev;
+
+sysfs_err:
+       ib_unregister_device(&dev->ibdev);
+reg_err:
+       qedr_sync_free_irqs(dev);
+irq_err:
+       qedr_stop_hw(dev);
+alloc_err:
+       qedr_free_resources(dev);
+init_err:
+       ib_dealloc_device(&dev->ibdev);
+       DP_ERR(dev, "qedr driver load failed rc=%d\n", rc);
+
+       return NULL;
+}
+
+static void qedr_remove(struct qedr_dev *dev)
+{
+       /* First unregister with stack to stop all the active traffic
+        * of the registered clients.
+        */
+       qedr_remove_sysfiles(dev);
+       ib_unregister_device(&dev->ibdev);
+
+       qedr_stop_hw(dev);
+       qedr_sync_free_irqs(dev);
+       qedr_free_resources(dev);
+       ib_dealloc_device(&dev->ibdev);
+}
+
+static int qedr_close(struct qedr_dev *dev)
+{
+       qedr_ib_dispatch_event(dev, 1, IB_EVENT_PORT_ERR);
+
+       return 0;
+}
+
+static void qedr_shutdown(struct qedr_dev *dev)
+{
+       qedr_close(dev);
+       qedr_remove(dev);
+}
+
+static void qedr_mac_address_change(struct qedr_dev *dev)
+{
+       union ib_gid *sgid = &dev->sgid_tbl[0];
+       u8 guid[8], mac_addr[6];
+       int rc;
+
+       /* Update SGID */
+       ether_addr_copy(&mac_addr[0], dev->ndev->dev_addr);
+       guid[0] = mac_addr[0] ^ 2;
+       guid[1] = mac_addr[1];
+       guid[2] = mac_addr[2];
+       guid[3] = 0xff;
+       guid[4] = 0xfe;
+       guid[5] = mac_addr[3];
+       guid[6] = mac_addr[4];
+       guid[7] = mac_addr[5];
+       sgid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
+       memcpy(&sgid->raw[8], guid, sizeof(guid));
+
+       /* Update LL2 */
+       rc = dev->ops->roce_ll2_set_mac_filter(dev->cdev,
+                                              dev->gsi_ll2_mac_address,
+                                              dev->ndev->dev_addr);
+
+       ether_addr_copy(dev->gsi_ll2_mac_address, dev->ndev->dev_addr);
+
+       qedr_ib_dispatch_event(dev, 1, IB_EVENT_GID_CHANGE);
+
+       if (rc)
+               DP_ERR(dev, "Error updating mac filter\n");
+}
+
+/* event handling via NIC driver ensures that all the NIC specific
+ * initialization done before RoCE driver notifies
+ * event to stack.
+ */
+static void qedr_notify(struct qedr_dev *dev, enum qede_roce_event event)
+{
+       switch (event) {
+       case QEDE_UP:
+               qedr_ib_dispatch_event(dev, 1, IB_EVENT_PORT_ACTIVE);
+               break;
+       case QEDE_DOWN:
+               qedr_close(dev);
+               break;
+       case QEDE_CLOSE:
+               qedr_shutdown(dev);
+               break;
+       case QEDE_CHANGE_ADDR:
+               qedr_mac_address_change(dev);
+               break;
+       default:
+               pr_err("Event not supported\n");
+       }
+}
+
+static struct qedr_driver qedr_drv = {
+       .name = "qedr_driver",
+       .add = qedr_add,
+       .remove = qedr_remove,
+       .notify = qedr_notify,
+};
+
+static int __init qedr_init_module(void)
+{
+       return qede_roce_register_driver(&qedr_drv);
+}
+
+static void __exit qedr_exit_module(void)
+{
+       qede_roce_unregister_driver(&qedr_drv);
+}
+
+module_init(qedr_init_module);
+module_exit(qedr_exit_module);
diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h
new file mode 100644 (file)
index 0000000..620badd
--- /dev/null
@@ -0,0 +1,495 @@
+/* QLogic qedr NIC Driver
+ * Copyright (c) 2015-2016  QLogic Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and /or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __QEDR_H__
+#define __QEDR_H__
+
+#include <linux/pci.h>
+#include <rdma/ib_addr.h>
+#include <linux/qed/qed_if.h>
+#include <linux/qed/qed_chain.h>
+#include <linux/qed/qed_roce_if.h>
+#include <linux/qed/qede_roce.h>
+#include "qedr_hsi.h"
+
+#define QEDR_MODULE_VERSION    "8.10.10.0"
+#define QEDR_NODE_DESC "QLogic 579xx RoCE HCA"
+#define DP_NAME(dev) ((dev)->ibdev.name)
+
+#define DP_DEBUG(dev, module, fmt, ...)                                        \
+       pr_debug("(%s) " module ": " fmt,                               \
+                DP_NAME(dev) ? DP_NAME(dev) : "", ## __VA_ARGS__)
+
+#define QEDR_MSG_INIT "INIT"
+#define QEDR_MSG_MISC "MISC"
+#define QEDR_MSG_CQ   "  CQ"
+#define QEDR_MSG_MR   "  MR"
+#define QEDR_MSG_RQ   "  RQ"
+#define QEDR_MSG_SQ   "  SQ"
+#define QEDR_MSG_QP   "  QP"
+#define QEDR_MSG_GSI  " GSI"
+
+#define QEDR_CQ_MAGIC_NUMBER   (0x11223344)
+
+struct qedr_dev;
+
+struct qedr_cnq {
+       struct qedr_dev         *dev;
+       struct qed_chain        pbl;
+       struct qed_sb_info      *sb;
+       char                    name[32];
+       u64                     n_comp;
+       __le16                  *hw_cons_ptr;
+       u8                      index;
+};
+
+#define QEDR_MAX_SGID 128
+
+struct qedr_device_attr {
+       u32     vendor_id;
+       u32     vendor_part_id;
+       u32     hw_ver;
+       u64     fw_ver;
+       u64     node_guid;
+       u64     sys_image_guid;
+       u8      max_cnq;
+       u8      max_sge;
+       u16     max_inline;
+       u32     max_sqe;
+       u32     max_rqe;
+       u8      max_qp_resp_rd_atomic_resc;
+       u8      max_qp_req_rd_atomic_resc;
+       u64     max_dev_resp_rd_atomic_resc;
+       u32     max_cq;
+       u32     max_qp;
+       u32     max_mr;
+       u64     max_mr_size;
+       u32     max_cqe;
+       u32     max_mw;
+       u32     max_fmr;
+       u32     max_mr_mw_fmr_pbl;
+       u64     max_mr_mw_fmr_size;
+       u32     max_pd;
+       u32     max_ah;
+       u8      max_pkey;
+       u32     max_srq;
+       u32     max_srq_wr;
+       u8      max_srq_sge;
+       u8      max_stats_queues;
+       u32     dev_caps;
+
+       u64     page_size_caps;
+       u8      dev_ack_delay;
+       u32     reserved_lkey;
+       u32     bad_pkey_counter;
+       struct qed_rdma_events events;
+};
+
+struct qedr_dev {
+       struct ib_device        ibdev;
+       struct qed_dev          *cdev;
+       struct pci_dev          *pdev;
+       struct net_device       *ndev;
+
+       enum ib_atomic_cap      atomic_cap;
+
+       void *rdma_ctx;
+       struct qedr_device_attr attr;
+
+       const struct qed_rdma_ops *ops;
+       struct qed_int_info     int_info;
+
+       struct qed_sb_info      *sb_array;
+       struct qedr_cnq         *cnq_array;
+       int                     num_cnq;
+       int                     sb_start;
+
+       void __iomem            *db_addr;
+       u64                     db_phys_addr;
+       u32                     db_size;
+       u16                     dpi;
+
+       union ib_gid *sgid_tbl;
+
+       /* Lock for sgid table */
+       spinlock_t sgid_lock;
+
+       u64                     guid;
+
+       u32                     dp_module;
+       u8                      dp_level;
+       u8                      num_hwfns;
+       uint                    wq_multiplier;
+       u8                      gsi_ll2_mac_address[ETH_ALEN];
+       int                     gsi_qp_created;
+       struct qedr_cq          *gsi_sqcq;
+       struct qedr_cq          *gsi_rqcq;
+       struct qedr_qp          *gsi_qp;
+};
+
+#define QEDR_MAX_SQ_PBL                        (0x8000)
+#define QEDR_MAX_SQ_PBL_ENTRIES                (0x10000 / sizeof(void *))
+#define QEDR_SQE_ELEMENT_SIZE          (sizeof(struct rdma_sq_sge))
+#define QEDR_MAX_SQE_ELEMENTS_PER_SQE  (ROCE_REQ_MAX_SINGLE_SQ_WQE_SIZE / \
+                                        QEDR_SQE_ELEMENT_SIZE)
+#define QEDR_MAX_SQE_ELEMENTS_PER_PAGE ((RDMA_RING_PAGE_SIZE) / \
+                                        QEDR_SQE_ELEMENT_SIZE)
+#define QEDR_MAX_SQE                   ((QEDR_MAX_SQ_PBL_ENTRIES) *\
+                                        (RDMA_RING_PAGE_SIZE) / \
+                                        (QEDR_SQE_ELEMENT_SIZE) /\
+                                        (QEDR_MAX_SQE_ELEMENTS_PER_SQE))
+/* RQ */
+#define QEDR_MAX_RQ_PBL                        (0x2000)
+#define QEDR_MAX_RQ_PBL_ENTRIES                (0x10000 / sizeof(void *))
+#define QEDR_RQE_ELEMENT_SIZE          (sizeof(struct rdma_rq_sge))
+#define QEDR_MAX_RQE_ELEMENTS_PER_RQE  (RDMA_MAX_SGE_PER_RQ_WQE)
+#define QEDR_MAX_RQE_ELEMENTS_PER_PAGE ((RDMA_RING_PAGE_SIZE) / \
+                                        QEDR_RQE_ELEMENT_SIZE)
+#define QEDR_MAX_RQE                   ((QEDR_MAX_RQ_PBL_ENTRIES) *\
+                                        (RDMA_RING_PAGE_SIZE) / \
+                                        (QEDR_RQE_ELEMENT_SIZE) /\
+                                        (QEDR_MAX_RQE_ELEMENTS_PER_RQE))
+
+#define QEDR_CQE_SIZE  (sizeof(union rdma_cqe))
+#define QEDR_MAX_CQE_PBL_SIZE (512 * 1024)
+#define QEDR_MAX_CQE_PBL_ENTRIES (((QEDR_MAX_CQE_PBL_SIZE) / \
+                                 sizeof(u64)) - 1)
+#define QEDR_MAX_CQES ((u32)((QEDR_MAX_CQE_PBL_ENTRIES) * \
+                            (QED_CHAIN_PAGE_SIZE) / QEDR_CQE_SIZE))
+
+#define QEDR_ROCE_MAX_CNQ_SIZE         (0x4000)
+
+#define QEDR_MAX_PORT                  (1)
+
+#define QEDR_UVERBS(CMD_NAME) (1ull << IB_USER_VERBS_CMD_##CMD_NAME)
+
+#define QEDR_ROCE_PKEY_MAX 1
+#define QEDR_ROCE_PKEY_TABLE_LEN 1
+#define QEDR_ROCE_PKEY_DEFAULT 0xffff
+
+struct qedr_pbl {
+       struct list_head list_entry;
+       void *va;
+       dma_addr_t pa;
+};
+
+struct qedr_ucontext {
+       struct ib_ucontext ibucontext;
+       struct qedr_dev *dev;
+       struct qedr_pd *pd;
+       u64 dpi_addr;
+       u64 dpi_phys_addr;
+       u32 dpi_size;
+       u16 dpi;
+
+       struct list_head mm_head;
+
+       /* Lock to protect mm list */
+       struct mutex mm_list_lock;
+};
+
+union db_prod64 {
+       struct rdma_pwm_val32_data data;
+       u64 raw;
+};
+
+enum qedr_cq_type {
+       QEDR_CQ_TYPE_GSI,
+       QEDR_CQ_TYPE_KERNEL,
+       QEDR_CQ_TYPE_USER,
+};
+
+struct qedr_pbl_info {
+       u32 num_pbls;
+       u32 num_pbes;
+       u32 pbl_size;
+       u32 pbe_size;
+       bool two_layered;
+};
+
+struct qedr_userq {
+       struct ib_umem *umem;
+       struct qedr_pbl_info pbl_info;
+       struct qedr_pbl *pbl_tbl;
+       u64 buf_addr;
+       size_t buf_len;
+};
+
+struct qedr_cq {
+       struct ib_cq ibcq;
+
+       enum qedr_cq_type cq_type;
+       u32 sig;
+
+       u16 icid;
+
+       /* Lock to protect completion handler */
+       spinlock_t comp_handler_lock;
+
+       /* Lock to protect multiplem CQ's */
+       spinlock_t cq_lock;
+       u8 arm_flags;
+       struct qed_chain pbl;
+
+       void __iomem *db_addr;
+       union db_prod64 db;
+
+       u8 pbl_toggle;
+       union rdma_cqe *latest_cqe;
+       union rdma_cqe *toggle_cqe;
+
+       u32 cq_cons;
+
+       struct qedr_userq q;
+};
+
+struct qedr_pd {
+       struct ib_pd ibpd;
+       u32 pd_id;
+       struct qedr_ucontext *uctx;
+};
+
+struct qedr_mm {
+       struct {
+               u64 phy_addr;
+               unsigned long len;
+       } key;
+       struct list_head entry;
+};
+
+union db_prod32 {
+       struct rdma_pwm_val16_data data;
+       u32 raw;
+};
+
+struct qedr_qp_hwq_info {
+       /* WQE Elements */
+       struct qed_chain pbl;
+       u64 p_phys_addr_tbl;
+       u32 max_sges;
+
+       /* WQE */
+       u16 prod;
+       u16 cons;
+       u16 wqe_cons;
+       u16 gsi_cons;
+       u16 max_wr;
+
+       /* DB */
+       void __iomem *db;
+       union db_prod32 db_data;
+};
+
+#define QEDR_INC_SW_IDX(p_info, index)                                 \
+       do {                                                            \
+               p_info->index = (p_info->index + 1) &                   \
+                               qed_chain_get_capacity(p_info->pbl)     \
+       } while (0)
+
+enum qedr_qp_err_bitmap {
+       QEDR_QP_ERR_SQ_FULL = 1,
+       QEDR_QP_ERR_RQ_FULL = 2,
+       QEDR_QP_ERR_BAD_SR = 4,
+       QEDR_QP_ERR_BAD_RR = 8,
+       QEDR_QP_ERR_SQ_PBL_FULL = 16,
+       QEDR_QP_ERR_RQ_PBL_FULL = 32,
+};
+
+struct qedr_qp {
+       struct ib_qp ibqp;      /* must be first */
+       struct qedr_dev *dev;
+
+       struct qedr_qp_hwq_info sq;
+       struct qedr_qp_hwq_info rq;
+
+       u32 max_inline_data;
+
+       /* Lock for QP's */
+       spinlock_t q_lock;
+       struct qedr_cq *sq_cq;
+       struct qedr_cq *rq_cq;
+       struct qedr_srq *srq;
+       enum qed_roce_qp_state state;
+       u32 id;
+       struct qedr_pd *pd;
+       enum ib_qp_type qp_type;
+       struct qed_rdma_qp *qed_qp;
+       u32 qp_id;
+       u16 icid;
+       u16 mtu;
+       int sgid_idx;
+       u32 rq_psn;
+       u32 sq_psn;
+       u32 qkey;
+       u32 dest_qp_num;
+
+       /* Relevant to qps created from kernel space only (ULPs) */
+       u8 prev_wqe_size;
+       u16 wqe_cons;
+       u32 err_bitmap;
+       bool signaled;
+
+       /* SQ shadow */
+       struct {
+               u64 wr_id;
+               enum ib_wc_opcode opcode;
+               u32 bytes_len;
+               u8 wqe_size;
+               bool signaled;
+               dma_addr_t icrc_mapping;
+               u32 *icrc;
+               struct qedr_mr *mr;
+       } *wqe_wr_id;
+
+       /* RQ shadow */
+       struct {
+               u64 wr_id;
+               struct ib_sge sg_list[RDMA_MAX_SGE_PER_RQ_WQE];
+               u8 wqe_size;
+
+               u8 smac[ETH_ALEN];
+               u16 vlan_id;
+               int rc;
+       } *rqe_wr_id;
+
+       /* Relevant to qps created from user space only (applications) */
+       struct qedr_userq usq;
+       struct qedr_userq urq;
+};
+
+struct qedr_ah {
+       struct ib_ah ibah;
+       struct ib_ah_attr attr;
+};
+
+enum qedr_mr_type {
+       QEDR_MR_USER,
+       QEDR_MR_KERNEL,
+       QEDR_MR_DMA,
+       QEDR_MR_FRMR,
+};
+
+struct mr_info {
+       struct qedr_pbl *pbl_table;
+       struct qedr_pbl_info pbl_info;
+       struct list_head free_pbl_list;
+       struct list_head inuse_pbl_list;
+       u32 completed;
+       u32 completed_handled;
+};
+
+struct qedr_mr {
+       struct ib_mr ibmr;
+       struct ib_umem *umem;
+
+       struct qed_rdma_register_tid_in_params hw_mr;
+       enum qedr_mr_type type;
+
+       struct qedr_dev *dev;
+       struct mr_info info;
+
+       u64 *pages;
+       u32 npages;
+};
+
+#define SET_FIELD2(value, name, flag) ((value) |= ((flag) << (name ## _SHIFT)))
+
+#define QEDR_RESP_IMM  (RDMA_CQE_RESPONDER_IMM_FLG_MASK << \
+                        RDMA_CQE_RESPONDER_IMM_FLG_SHIFT)
+#define QEDR_RESP_RDMA (RDMA_CQE_RESPONDER_RDMA_FLG_MASK << \
+                        RDMA_CQE_RESPONDER_RDMA_FLG_SHIFT)
+#define QEDR_RESP_RDMA_IMM (QEDR_RESP_IMM | QEDR_RESP_RDMA)
+
+static inline void qedr_inc_sw_cons(struct qedr_qp_hwq_info *info)
+{
+       info->cons = (info->cons + 1) % info->max_wr;
+       info->wqe_cons++;
+}
+
+static inline void qedr_inc_sw_prod(struct qedr_qp_hwq_info *info)
+{
+       info->prod = (info->prod + 1) % info->max_wr;
+}
+
+static inline int qedr_get_dmac(struct qedr_dev *dev,
+                               struct ib_ah_attr *ah_attr, u8 *mac_addr)
+{
+       union ib_gid zero_sgid = { { 0 } };
+       struct in6_addr in6;
+
+       if (!memcmp(&ah_attr->grh.dgid, &zero_sgid, sizeof(union ib_gid))) {
+               DP_ERR(dev, "Local port GID not supported\n");
+               eth_zero_addr(mac_addr);
+               return -EINVAL;
+       }
+
+       memcpy(&in6, ah_attr->grh.dgid.raw, sizeof(in6));
+       ether_addr_copy(mac_addr, ah_attr->dmac);
+
+       return 0;
+}
+
+static inline
+struct qedr_ucontext *get_qedr_ucontext(struct ib_ucontext *ibucontext)
+{
+       return container_of(ibucontext, struct qedr_ucontext, ibucontext);
+}
+
+static inline struct qedr_dev *get_qedr_dev(struct ib_device *ibdev)
+{
+       return container_of(ibdev, struct qedr_dev, ibdev);
+}
+
+static inline struct qedr_pd *get_qedr_pd(struct ib_pd *ibpd)
+{
+       return container_of(ibpd, struct qedr_pd, ibpd);
+}
+
+static inline struct qedr_cq *get_qedr_cq(struct ib_cq *ibcq)
+{
+       return container_of(ibcq, struct qedr_cq, ibcq);
+}
+
+static inline struct qedr_qp *get_qedr_qp(struct ib_qp *ibqp)
+{
+       return container_of(ibqp, struct qedr_qp, ibqp);
+}
+
+static inline struct qedr_ah *get_qedr_ah(struct ib_ah *ibah)
+{
+       return container_of(ibah, struct qedr_ah, ibah);
+}
+
+static inline struct qedr_mr *get_qedr_mr(struct ib_mr *ibmr)
+{
+       return container_of(ibmr, struct qedr_mr, ibmr);
+}
+#endif
diff --git a/drivers/infiniband/hw/qedr/qedr_cm.c b/drivers/infiniband/hw/qedr/qedr_cm.c
new file mode 100644 (file)
index 0000000..63890eb
--- /dev/null
@@ -0,0 +1,622 @@
+/* QLogic qedr NIC Driver
+ * Copyright (c) 2015-2016  QLogic Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and /or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/dma-mapping.h>
+#include <linux/crc32.h>
+#include <linux/iommu.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/udp.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_user_verbs.h>
+#include <rdma/iw_cm.h>
+#include <rdma/ib_umem.h>
+#include <rdma/ib_addr.h>
+#include <rdma/ib_cache.h>
+
+#include "qedr_hsi.h"
+#include <linux/qed/qed_if.h>
+#include <linux/qed/qed_roce_if.h>
+#include "qedr.h"
+#include "qedr_hsi.h"
+#include "verbs.h"
+#include <rdma/qedr-abi.h>
+#include "qedr_hsi.h"
+#include "qedr_cm.h"
+
+void qedr_inc_sw_gsi_cons(struct qedr_qp_hwq_info *info)
+{
+       info->gsi_cons = (info->gsi_cons + 1) % info->max_wr;
+}
+
+void qedr_store_gsi_qp_cq(struct qedr_dev *dev, struct qedr_qp *qp,
+                         struct ib_qp_init_attr *attrs)
+{
+       dev->gsi_qp_created = 1;
+       dev->gsi_sqcq = get_qedr_cq(attrs->send_cq);
+       dev->gsi_rqcq = get_qedr_cq(attrs->recv_cq);
+       dev->gsi_qp = qp;
+}
+
+void qedr_ll2_tx_cb(void *_qdev, struct qed_roce_ll2_packet *pkt)
+{
+       struct qedr_dev *dev = (struct qedr_dev *)_qdev;
+       struct qedr_cq *cq = dev->gsi_sqcq;
+       struct qedr_qp *qp = dev->gsi_qp;
+       unsigned long flags;
+
+       DP_DEBUG(dev, QEDR_MSG_GSI,
+                "LL2 TX CB: gsi_sqcq=%p, gsi_rqcq=%p, gsi_cons=%d, ibcq_comp=%s\n",
+                dev->gsi_sqcq, dev->gsi_rqcq, qp->sq.gsi_cons,
+                cq->ibcq.comp_handler ? "Yes" : "No");
+
+       dma_free_coherent(&dev->pdev->dev, pkt->header.len, pkt->header.vaddr,
+                         pkt->header.baddr);
+       kfree(pkt);
+
+       spin_lock_irqsave(&qp->q_lock, flags);
+       qedr_inc_sw_gsi_cons(&qp->sq);
+       spin_unlock_irqrestore(&qp->q_lock, flags);
+
+       if (cq->ibcq.comp_handler) {
+               spin_lock_irqsave(&cq->comp_handler_lock, flags);
+               (*cq->ibcq.comp_handler) (&cq->ibcq, cq->ibcq.cq_context);
+               spin_unlock_irqrestore(&cq->comp_handler_lock, flags);
+       }
+}
+
+void qedr_ll2_rx_cb(void *_dev, struct qed_roce_ll2_packet *pkt,
+                   struct qed_roce_ll2_rx_params *params)
+{
+       struct qedr_dev *dev = (struct qedr_dev *)_dev;
+       struct qedr_cq *cq = dev->gsi_rqcq;
+       struct qedr_qp *qp = dev->gsi_qp;
+       unsigned long flags;
+
+       spin_lock_irqsave(&qp->q_lock, flags);
+
+       qp->rqe_wr_id[qp->rq.gsi_cons].rc = params->rc;
+       qp->rqe_wr_id[qp->rq.gsi_cons].vlan_id = params->vlan_id;
+       qp->rqe_wr_id[qp->rq.gsi_cons].sg_list[0].length = pkt->payload[0].len;
+       ether_addr_copy(qp->rqe_wr_id[qp->rq.gsi_cons].smac, params->smac);
+
+       qedr_inc_sw_gsi_cons(&qp->rq);
+
+       spin_unlock_irqrestore(&qp->q_lock, flags);
+
+       if (cq->ibcq.comp_handler) {
+               spin_lock_irqsave(&cq->comp_handler_lock, flags);
+               (*cq->ibcq.comp_handler) (&cq->ibcq, cq->ibcq.cq_context);
+               spin_unlock_irqrestore(&cq->comp_handler_lock, flags);
+       }
+}
+
+static void qedr_destroy_gsi_cq(struct qedr_dev *dev,
+                               struct ib_qp_init_attr *attrs)
+{
+       struct qed_rdma_destroy_cq_in_params iparams;
+       struct qed_rdma_destroy_cq_out_params oparams;
+       struct qedr_cq *cq;
+
+       cq = get_qedr_cq(attrs->send_cq);
+       iparams.icid = cq->icid;
+       dev->ops->rdma_destroy_cq(dev->rdma_ctx, &iparams, &oparams);
+       dev->ops->common->chain_free(dev->cdev, &cq->pbl);
+
+       cq = get_qedr_cq(attrs->recv_cq);
+       /* if a dedicated recv_cq was used, delete it too */
+       if (iparams.icid != cq->icid) {
+               iparams.icid = cq->icid;
+               dev->ops->rdma_destroy_cq(dev->rdma_ctx, &iparams, &oparams);
+               dev->ops->common->chain_free(dev->cdev, &cq->pbl);
+       }
+}
+
+static inline int qedr_check_gsi_qp_attrs(struct qedr_dev *dev,
+                                         struct ib_qp_init_attr *attrs)
+{
+       if (attrs->cap.max_recv_sge > QEDR_GSI_MAX_RECV_SGE) {
+               DP_ERR(dev,
+                      " create gsi qp: failed. max_recv_sge is larger the max %d>%d\n",
+                      attrs->cap.max_recv_sge, QEDR_GSI_MAX_RECV_SGE);
+               return -EINVAL;
+       }
+
+       if (attrs->cap.max_recv_wr > QEDR_GSI_MAX_RECV_WR) {
+               DP_ERR(dev,
+                      " create gsi qp: failed. max_recv_wr is too large %d>%d\n",
+                      attrs->cap.max_recv_wr, QEDR_GSI_MAX_RECV_WR);
+               return -EINVAL;
+       }
+
+       if (attrs->cap.max_send_wr > QEDR_GSI_MAX_SEND_WR) {
+               DP_ERR(dev,
+                      " create gsi qp: failed. max_send_wr is too large %d>%d\n",
+                      attrs->cap.max_send_wr, QEDR_GSI_MAX_SEND_WR);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+struct ib_qp *qedr_create_gsi_qp(struct qedr_dev *dev,
+                                struct ib_qp_init_attr *attrs,
+                                struct qedr_qp *qp)
+{
+       struct qed_roce_ll2_params ll2_params;
+       int rc;
+
+       rc = qedr_check_gsi_qp_attrs(dev, attrs);
+       if (rc)
+               return ERR_PTR(rc);
+
+       /* configure and start LL2 */
+       memset(&ll2_params, 0, sizeof(ll2_params));
+       ll2_params.max_tx_buffers = attrs->cap.max_send_wr;
+       ll2_params.max_rx_buffers = attrs->cap.max_recv_wr;
+       ll2_params.cbs.tx_cb = qedr_ll2_tx_cb;
+       ll2_params.cbs.rx_cb = qedr_ll2_rx_cb;
+       ll2_params.cb_cookie = (void *)dev;
+       ll2_params.mtu = dev->ndev->mtu;
+       ether_addr_copy(ll2_params.mac_address, dev->ndev->dev_addr);
+       rc = dev->ops->roce_ll2_start(dev->cdev, &ll2_params);
+       if (rc) {
+               DP_ERR(dev, "create gsi qp: failed on ll2 start. rc=%d\n", rc);
+               return ERR_PTR(rc);
+       }
+
+       /* create QP */
+       qp->ibqp.qp_num = 1;
+       qp->rq.max_wr = attrs->cap.max_recv_wr;
+       qp->sq.max_wr = attrs->cap.max_send_wr;
+
+       qp->rqe_wr_id = kcalloc(qp->rq.max_wr, sizeof(*qp->rqe_wr_id),
+                               GFP_KERNEL);
+       if (!qp->rqe_wr_id)
+               goto err;
+       qp->wqe_wr_id = kcalloc(qp->sq.max_wr, sizeof(*qp->wqe_wr_id),
+                               GFP_KERNEL);
+       if (!qp->wqe_wr_id)
+               goto err;
+
+       qedr_store_gsi_qp_cq(dev, qp, attrs);
+       ether_addr_copy(dev->gsi_ll2_mac_address, dev->ndev->dev_addr);
+
+       /* the GSI CQ is handled by the driver so remove it from the FW */
+       qedr_destroy_gsi_cq(dev, attrs);
+       dev->gsi_rqcq->cq_type = QEDR_CQ_TYPE_GSI;
+       dev->gsi_rqcq->cq_type = QEDR_CQ_TYPE_GSI;
+
+       DP_DEBUG(dev, QEDR_MSG_GSI, "created GSI QP %p\n", qp);
+
+       return &qp->ibqp;
+
+err:
+       kfree(qp->rqe_wr_id);
+
+       rc = dev->ops->roce_ll2_stop(dev->cdev);
+       if (rc)
+               DP_ERR(dev, "create gsi qp: failed destroy on create\n");
+
+       return ERR_PTR(-ENOMEM);
+}
+
+int qedr_destroy_gsi_qp(struct qedr_dev *dev)
+{
+       int rc;
+
+       rc = dev->ops->roce_ll2_stop(dev->cdev);
+       if (rc)
+               DP_ERR(dev, "destroy gsi qp: failed (rc=%d)\n", rc);
+       else
+               DP_DEBUG(dev, QEDR_MSG_GSI, "destroy gsi qp: success\n");
+
+       return rc;
+}
+
+#define QEDR_MAX_UD_HEADER_SIZE        (100)
+#define QEDR_GSI_QPN           (1)
+static inline int qedr_gsi_build_header(struct qedr_dev *dev,
+                                       struct qedr_qp *qp,
+                                       struct ib_send_wr *swr,
+                                       struct ib_ud_header *udh,
+                                       int *roce_mode)
+{
+       bool has_vlan = false, has_grh_ipv6 = true;
+       struct ib_ah_attr *ah_attr = &get_qedr_ah(ud_wr(swr)->ah)->attr;
+       struct ib_global_route *grh = &ah_attr->grh;
+       union ib_gid sgid;
+       int send_size = 0;
+       u16 vlan_id = 0;
+       u16 ether_type;
+       struct ib_gid_attr sgid_attr;
+       int rc;
+       int ip_ver = 0;
+
+       bool has_udp = false;
+       int i;
+
+       send_size = 0;
+       for (i = 0; i < swr->num_sge; ++i)
+               send_size += swr->sg_list[i].length;
+
+       rc = ib_get_cached_gid(qp->ibqp.device, ah_attr->port_num,
+                              grh->sgid_index, &sgid, &sgid_attr);
+       if (rc) {
+               DP_ERR(dev,
+                      "gsi post send: failed to get cached GID (port=%d, ix=%d)\n",
+                      ah_attr->port_num, grh->sgid_index);
+               return rc;
+       }
+
+       vlan_id = rdma_vlan_dev_vlan_id(sgid_attr.ndev);
+       if (vlan_id < VLAN_CFI_MASK)
+               has_vlan = true;
+       if (sgid_attr.ndev)
+               dev_put(sgid_attr.ndev);
+
+       if (!memcmp(&sgid, &zgid, sizeof(sgid))) {
+               DP_ERR(dev, "gsi post send: GID not found GID index %d\n",
+                      ah_attr->grh.sgid_index);
+               return -ENOENT;
+       }
+
+       has_udp = (sgid_attr.gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP);
+       if (!has_udp) {
+               /* RoCE v1 */
+               ether_type = ETH_P_ROCE;
+               *roce_mode = ROCE_V1;
+       } else if (ipv6_addr_v4mapped((struct in6_addr *)&sgid)) {
+               /* RoCE v2 IPv4 */
+               ip_ver = 4;
+               ether_type = ETH_P_IP;
+               has_grh_ipv6 = false;
+               *roce_mode = ROCE_V2_IPV4;
+       } else {
+               /* RoCE v2 IPv6 */
+               ip_ver = 6;
+               ether_type = ETH_P_IPV6;
+               *roce_mode = ROCE_V2_IPV6;
+       }
+
+       rc = ib_ud_header_init(send_size, false, true, has_vlan,
+                              has_grh_ipv6, ip_ver, has_udp, 0, udh);
+       if (rc) {
+               DP_ERR(dev, "gsi post send: failed to init header\n");
+               return rc;
+       }
+
+       /* ENET + VLAN headers */
+       ether_addr_copy(udh->eth.dmac_h, ah_attr->dmac);
+       ether_addr_copy(udh->eth.smac_h, dev->ndev->dev_addr);
+       if (has_vlan) {
+               udh->eth.type = htons(ETH_P_8021Q);
+               udh->vlan.tag = htons(vlan_id);
+               udh->vlan.type = htons(ether_type);
+       } else {
+               udh->eth.type = htons(ether_type);
+       }
+
+       /* BTH */
+       udh->bth.solicited_event = !!(swr->send_flags & IB_SEND_SOLICITED);
+       udh->bth.pkey = QEDR_ROCE_PKEY_DEFAULT;
+       udh->bth.destination_qpn = htonl(ud_wr(swr)->remote_qpn);
+       udh->bth.psn = htonl((qp->sq_psn++) & ((1 << 24) - 1));
+       udh->bth.opcode = IB_OPCODE_UD_SEND_ONLY;
+
+       /* DETH */
+       udh->deth.qkey = htonl(0x80010000);
+       udh->deth.source_qpn = htonl(QEDR_GSI_QPN);
+
+       if (has_grh_ipv6) {
+               /* GRH / IPv6 header */
+               udh->grh.traffic_class = grh->traffic_class;
+               udh->grh.flow_label = grh->flow_label;
+               udh->grh.hop_limit = grh->hop_limit;
+               udh->grh.destination_gid = grh->dgid;
+               memcpy(&udh->grh.source_gid.raw, &sgid.raw,
+                      sizeof(udh->grh.source_gid.raw));
+       } else {
+               /* IPv4 header */
+               u32 ipv4_addr;
+
+               udh->ip4.protocol = IPPROTO_UDP;
+               udh->ip4.tos = htonl(ah_attr->grh.flow_label);
+               udh->ip4.frag_off = htons(IP_DF);
+               udh->ip4.ttl = ah_attr->grh.hop_limit;
+
+               ipv4_addr = qedr_get_ipv4_from_gid(sgid.raw);
+               udh->ip4.saddr = ipv4_addr;
+               ipv4_addr = qedr_get_ipv4_from_gid(ah_attr->grh.dgid.raw);
+               udh->ip4.daddr = ipv4_addr;
+               /* note: checksum is calculated by the device */
+       }
+
+       /* UDP */
+       if (has_udp) {
+               udh->udp.sport = htons(QEDR_ROCE_V2_UDP_SPORT);
+               udh->udp.dport = htons(ROCE_V2_UDP_DPORT);
+               udh->udp.csum = 0;
+               /* UDP length is untouched hence is zero */
+       }
+       return 0;
+}
+
+static inline int qedr_gsi_build_packet(struct qedr_dev *dev,
+                                       struct qedr_qp *qp,
+                                       struct ib_send_wr *swr,
+                                       struct qed_roce_ll2_packet **p_packet)
+{
+       u8 ud_header_buffer[QEDR_MAX_UD_HEADER_SIZE];
+       struct qed_roce_ll2_packet *packet;
+       struct pci_dev *pdev = dev->pdev;
+       int roce_mode, header_size;
+       struct ib_ud_header udh;
+       int i, rc;
+
+       *p_packet = NULL;
+
+       rc = qedr_gsi_build_header(dev, qp, swr, &udh, &roce_mode);
+       if (rc)
+               return rc;
+
+       header_size = ib_ud_header_pack(&udh, &ud_header_buffer);
+
+       packet = kzalloc(sizeof(*packet), GFP_ATOMIC);
+       if (!packet)
+               return -ENOMEM;
+
+       packet->header.vaddr = dma_alloc_coherent(&pdev->dev, header_size,
+                                                 &packet->header.baddr,
+                                                 GFP_ATOMIC);
+       if (!packet->header.vaddr) {
+               kfree(packet);
+               return -ENOMEM;
+       }
+
+       if (ether_addr_equal(udh.eth.smac_h, udh.eth.dmac_h))
+               packet->tx_dest = QED_ROCE_LL2_TX_DEST_NW;
+       else
+               packet->tx_dest = QED_ROCE_LL2_TX_DEST_LB;
+
+       packet->roce_mode = roce_mode;
+       memcpy(packet->header.vaddr, ud_header_buffer, header_size);
+       packet->header.len = header_size;
+       packet->n_seg = swr->num_sge;
+       for (i = 0; i < packet->n_seg; i++) {
+               packet->payload[i].baddr = swr->sg_list[i].addr;
+               packet->payload[i].len = swr->sg_list[i].length;
+       }
+
+       *p_packet = packet;
+
+       return 0;
+}
+
+int qedr_gsi_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+                      struct ib_send_wr **bad_wr)
+{
+       struct qed_roce_ll2_packet *pkt = NULL;
+       struct qedr_qp *qp = get_qedr_qp(ibqp);
+       struct qed_roce_ll2_tx_params params;
+       struct qedr_dev *dev = qp->dev;
+       unsigned long flags;
+       int rc;
+
+       if (qp->state != QED_ROCE_QP_STATE_RTS) {
+               *bad_wr = wr;
+               DP_ERR(dev,
+                      "gsi post recv: failed to post rx buffer. state is %d and not QED_ROCE_QP_STATE_RTS\n",
+                      qp->state);
+               return -EINVAL;
+       }
+
+       if (wr->num_sge > RDMA_MAX_SGE_PER_SQ_WQE) {
+               DP_ERR(dev, "gsi post send: num_sge is too large (%d>%d)\n",
+                      wr->num_sge, RDMA_MAX_SGE_PER_SQ_WQE);
+               rc = -EINVAL;
+               goto err;
+       }
+
+       if (wr->opcode != IB_WR_SEND) {
+               DP_ERR(dev,
+                      "gsi post send: failed due to unsupported opcode %d\n",
+                      wr->opcode);
+               rc = -EINVAL;
+               goto err;
+       }
+
+       memset(&params, 0, sizeof(params));
+
+       spin_lock_irqsave(&qp->q_lock, flags);
+
+       rc = qedr_gsi_build_packet(dev, qp, wr, &pkt);
+       if (rc) {
+               spin_unlock_irqrestore(&qp->q_lock, flags);
+               goto err;
+       }
+
+       rc = dev->ops->roce_ll2_tx(dev->cdev, pkt, &params);
+       if (!rc) {
+               qp->wqe_wr_id[qp->sq.prod].wr_id = wr->wr_id;
+               qedr_inc_sw_prod(&qp->sq);
+               DP_DEBUG(qp->dev, QEDR_MSG_GSI,
+                        "gsi post send: opcode=%d, in_irq=%ld, irqs_disabled=%d, wr_id=%llx\n",
+                        wr->opcode, in_irq(), irqs_disabled(), wr->wr_id);
+       } else {
+               if (rc == QED_ROCE_TX_HEAD_FAILURE) {
+                       /* TX failed while posting header - release resources */
+                       dma_free_coherent(&dev->pdev->dev, pkt->header.len,
+                                         pkt->header.vaddr, pkt->header.baddr);
+                       kfree(pkt);
+               } else if (rc == QED_ROCE_TX_FRAG_FAILURE) {
+                       /* NTD since TX failed while posting a fragment. We will
+                        * release the resources on TX callback
+                        */
+               }
+
+               DP_ERR(dev, "gsi post send: failed to transmit (rc=%d)\n", rc);
+               rc = -EAGAIN;
+               *bad_wr = wr;
+       }
+
+       spin_unlock_irqrestore(&qp->q_lock, flags);
+
+       if (wr->next) {
+               DP_ERR(dev,
+                      "gsi post send: failed second WR. Only one WR may be passed at a time\n");
+               *bad_wr = wr->next;
+               rc = -EINVAL;
+       }
+
+       return rc;
+
+err:
+       *bad_wr = wr;
+       return rc;
+}
+
+int qedr_gsi_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+                      struct ib_recv_wr **bad_wr)
+{
+       struct qedr_dev *dev = get_qedr_dev(ibqp->device);
+       struct qedr_qp *qp = get_qedr_qp(ibqp);
+       struct qed_roce_ll2_buffer buf;
+       unsigned long flags;
+       int status = 0;
+       int rc;
+
+       if ((qp->state != QED_ROCE_QP_STATE_RTR) &&
+           (qp->state != QED_ROCE_QP_STATE_RTS)) {
+               *bad_wr = wr;
+               DP_ERR(dev,
+                      "gsi post recv: failed to post rx buffer. state is %d and not QED_ROCE_QP_STATE_RTR/S\n",
+                      qp->state);
+               return -EINVAL;
+       }
+
+       memset(&buf, 0, sizeof(buf));
+
+       spin_lock_irqsave(&qp->q_lock, flags);
+
+       while (wr) {
+               if (wr->num_sge > QEDR_GSI_MAX_RECV_SGE) {
+                       DP_ERR(dev,
+                              "gsi post recv: failed to post rx buffer. too many sges %d>%d\n",
+                              wr->num_sge, QEDR_GSI_MAX_RECV_SGE);
+                       goto err;
+               }
+
+               buf.baddr = wr->sg_list[0].addr;
+               buf.len = wr->sg_list[0].length;
+
+               rc = dev->ops->roce_ll2_post_rx_buffer(dev->cdev, &buf, 0, 1);
+               if (rc) {
+                       DP_ERR(dev,
+                              "gsi post recv: failed to post rx buffer (rc=%d)\n",
+                              rc);
+                       goto err;
+               }
+
+               memset(&qp->rqe_wr_id[qp->rq.prod], 0,
+                      sizeof(qp->rqe_wr_id[qp->rq.prod]));
+               qp->rqe_wr_id[qp->rq.prod].sg_list[0] = wr->sg_list[0];
+               qp->rqe_wr_id[qp->rq.prod].wr_id = wr->wr_id;
+
+               qedr_inc_sw_prod(&qp->rq);
+
+               wr = wr->next;
+       }
+
+       spin_unlock_irqrestore(&qp->q_lock, flags);
+
+       return status;
+err:
+       spin_unlock_irqrestore(&qp->q_lock, flags);
+       *bad_wr = wr;
+       return -ENOMEM;
+}
+
+int qedr_gsi_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
+{
+       struct qedr_dev *dev = get_qedr_dev(ibcq->device);
+       struct qedr_cq *cq = get_qedr_cq(ibcq);
+       struct qedr_qp *qp = dev->gsi_qp;
+       unsigned long flags;
+       int i = 0;
+
+       spin_lock_irqsave(&cq->cq_lock, flags);
+
+       while (i < num_entries && qp->rq.cons != qp->rq.gsi_cons) {
+               memset(&wc[i], 0, sizeof(*wc));
+
+               wc[i].qp = &qp->ibqp;
+               wc[i].wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id;
+               wc[i].opcode = IB_WC_RECV;
+               wc[i].pkey_index = 0;
+               wc[i].status = (qp->rqe_wr_id[qp->rq.cons].rc) ?
+                   IB_WC_GENERAL_ERR : IB_WC_SUCCESS;
+               /* 0 - currently only one recv sg is supported */
+               wc[i].byte_len = qp->rqe_wr_id[qp->rq.cons].sg_list[0].length;
+               wc[i].wc_flags |= IB_WC_GRH | IB_WC_IP_CSUM_OK;
+               ether_addr_copy(wc[i].smac, qp->rqe_wr_id[qp->rq.cons].smac);
+               wc[i].wc_flags |= IB_WC_WITH_SMAC;
+               if (qp->rqe_wr_id[qp->rq.cons].vlan_id) {
+                       wc[i].wc_flags |= IB_WC_WITH_VLAN;
+                       wc[i].vlan_id = qp->rqe_wr_id[qp->rq.cons].vlan_id;
+               }
+
+               qedr_inc_sw_cons(&qp->rq);
+               i++;
+       }
+
+       while (i < num_entries && qp->sq.cons != qp->sq.gsi_cons) {
+               memset(&wc[i], 0, sizeof(*wc));
+
+               wc[i].qp = &qp->ibqp;
+               wc[i].wr_id = qp->wqe_wr_id[qp->sq.cons].wr_id;
+               wc[i].opcode = IB_WC_SEND;
+               wc[i].status = IB_WC_SUCCESS;
+
+               qedr_inc_sw_cons(&qp->sq);
+               i++;
+       }
+
+       spin_unlock_irqrestore(&cq->cq_lock, flags);
+
+       DP_DEBUG(dev, QEDR_MSG_GSI,
+                "gsi poll_cq: requested entries=%d, actual=%d, qp->rq.cons=%d, qp->rq.gsi_cons=%x, qp->sq.cons=%d, qp->sq.gsi_cons=%d, qp_num=%d\n",
+                num_entries, i, qp->rq.cons, qp->rq.gsi_cons, qp->sq.cons,
+                qp->sq.gsi_cons, qp->ibqp.qp_num);
+
+       return i;
+}
diff --git a/drivers/infiniband/hw/qedr/qedr_cm.h b/drivers/infiniband/hw/qedr/qedr_cm.h
new file mode 100644 (file)
index 0000000..9ba6e15
--- /dev/null
@@ -0,0 +1,61 @@
+/* QLogic qedr NIC Driver
+ * Copyright (c) 2015-2016  QLogic Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and /or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef LINUX_QEDR_CM_H_
+#define LINUX_QEDR_CM_H_
+
+#define QEDR_GSI_MAX_RECV_WR   (4096)
+#define QEDR_GSI_MAX_SEND_WR   (4096)
+
+#define QEDR_GSI_MAX_RECV_SGE  (1)     /* LL2 FW limitation */
+
+#define ETH_P_ROCE             (0x8915)
+#define QEDR_ROCE_V2_UDP_SPORT (0000)
+
+static inline u32 qedr_get_ipv4_from_gid(u8 *gid)
+{
+       return *(u32 *)(void *)&gid[12];
+}
+
+/* RDMA CM */
+int qedr_gsi_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
+int qedr_gsi_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+                      struct ib_recv_wr **bad_wr);
+int qedr_gsi_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+                      struct ib_send_wr **bad_wr);
+struct ib_qp *qedr_create_gsi_qp(struct qedr_dev *dev,
+                                struct ib_qp_init_attr *attrs,
+                                struct qedr_qp *qp);
+void qedr_store_gsi_qp_cq(struct qedr_dev *dev,
+                         struct qedr_qp *qp, struct ib_qp_init_attr *attrs);
+int qedr_destroy_gsi_qp(struct qedr_dev *dev);
+void qedr_inc_sw_gsi_cons(struct qedr_qp_hwq_info *info);
+#endif
diff --git a/drivers/infiniband/hw/qedr/qedr_hsi.h b/drivers/infiniband/hw/qedr/qedr_hsi.h
new file mode 100644 (file)
index 0000000..66d2752
--- /dev/null
@@ -0,0 +1,56 @@
+/* QLogic qedr NIC Driver
+ * Copyright (c) 2015-2016  QLogic Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and /or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __QED_HSI_ROCE__
+#define __QED_HSI_ROCE__
+
+#include <linux/qed/common_hsi.h>
+#include <linux/qed/roce_common.h>
+#include "qedr_hsi_rdma.h"
+
+/* Affiliated asynchronous events / errors enumeration */
+enum roce_async_events_type {
+       ROCE_ASYNC_EVENT_NONE = 0,
+       ROCE_ASYNC_EVENT_COMM_EST = 1,
+       ROCE_ASYNC_EVENT_SQ_DRAINED,
+       ROCE_ASYNC_EVENT_SRQ_LIMIT,
+       ROCE_ASYNC_EVENT_LAST_WQE_REACHED,
+       ROCE_ASYNC_EVENT_CQ_ERR,
+       ROCE_ASYNC_EVENT_LOCAL_INVALID_REQUEST_ERR,
+       ROCE_ASYNC_EVENT_LOCAL_CATASTROPHIC_ERR,
+       ROCE_ASYNC_EVENT_LOCAL_ACCESS_ERR,
+       ROCE_ASYNC_EVENT_QP_CATASTROPHIC_ERR,
+       ROCE_ASYNC_EVENT_CQ_OVERFLOW_ERR,
+       ROCE_ASYNC_EVENT_SRQ_EMPTY,
+       MAX_ROCE_ASYNC_EVENTS_TYPE
+};
+
+#endif /* __QED_HSI_ROCE__ */
diff --git a/drivers/infiniband/hw/qedr/qedr_hsi_rdma.h b/drivers/infiniband/hw/qedr/qedr_hsi_rdma.h
new file mode 100644 (file)
index 0000000..5c98d20
--- /dev/null
@@ -0,0 +1,748 @@
+/* QLogic qedr NIC Driver
+ * Copyright (c) 2015-2016  QLogic Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and /or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __QED_HSI_RDMA__
+#define __QED_HSI_RDMA__
+
+#include <linux/qed/rdma_common.h>
+
+/* rdma completion notification queue element */
+struct rdma_cnqe {
+       struct regpair  cq_handle;
+};
+
+struct rdma_cqe_responder {
+       struct regpair srq_wr_id;
+       struct regpair qp_handle;
+       __le32 imm_data_or_inv_r_Key;
+       __le32 length;
+       __le32 imm_data_hi;
+       __le16 rq_cons;
+       u8 flags;
+#define RDMA_CQE_RESPONDER_TOGGLE_BIT_MASK  0x1
+#define RDMA_CQE_RESPONDER_TOGGLE_BIT_SHIFT 0
+#define RDMA_CQE_RESPONDER_TYPE_MASK        0x3
+#define RDMA_CQE_RESPONDER_TYPE_SHIFT       1
+#define RDMA_CQE_RESPONDER_INV_FLG_MASK     0x1
+#define RDMA_CQE_RESPONDER_INV_FLG_SHIFT    3
+#define RDMA_CQE_RESPONDER_IMM_FLG_MASK     0x1
+#define RDMA_CQE_RESPONDER_IMM_FLG_SHIFT    4
+#define RDMA_CQE_RESPONDER_RDMA_FLG_MASK    0x1
+#define RDMA_CQE_RESPONDER_RDMA_FLG_SHIFT   5
+#define RDMA_CQE_RESPONDER_RESERVED2_MASK   0x3
+#define RDMA_CQE_RESPONDER_RESERVED2_SHIFT  6
+       u8 status;
+};
+
+struct rdma_cqe_requester {
+       __le16 sq_cons;
+       __le16 reserved0;
+       __le32 reserved1;
+       struct regpair qp_handle;
+       struct regpair reserved2;
+       __le32 reserved3;
+       __le16 reserved4;
+       u8 flags;
+#define RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK  0x1
+#define RDMA_CQE_REQUESTER_TOGGLE_BIT_SHIFT 0
+#define RDMA_CQE_REQUESTER_TYPE_MASK        0x3
+#define RDMA_CQE_REQUESTER_TYPE_SHIFT       1
+#define RDMA_CQE_REQUESTER_RESERVED5_MASK   0x1F
+#define RDMA_CQE_REQUESTER_RESERVED5_SHIFT  3
+       u8 status;
+};
+
+struct rdma_cqe_common {
+       struct regpair reserved0;
+       struct regpair qp_handle;
+       __le16 reserved1[7];
+       u8 flags;
+#define RDMA_CQE_COMMON_TOGGLE_BIT_MASK  0x1
+#define RDMA_CQE_COMMON_TOGGLE_BIT_SHIFT 0
+#define RDMA_CQE_COMMON_TYPE_MASK        0x3
+#define RDMA_CQE_COMMON_TYPE_SHIFT       1
+#define RDMA_CQE_COMMON_RESERVED2_MASK   0x1F
+#define RDMA_CQE_COMMON_RESERVED2_SHIFT  3
+       u8 status;
+};
+
+/* rdma completion queue element */
+union rdma_cqe {
+       struct rdma_cqe_responder resp;
+       struct rdma_cqe_requester req;
+       struct rdma_cqe_common cmn;
+};
+
+/* * CQE requester status enumeration */
+enum rdma_cqe_requester_status_enum {
+       RDMA_CQE_REQ_STS_OK,
+       RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR,
+       RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR,
+       RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR,
+       RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR,
+       RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR,
+       RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR,
+       RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR,
+       RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR,
+       RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR,
+       RDMA_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR,
+       RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR,
+       MAX_RDMA_CQE_REQUESTER_STATUS_ENUM
+};
+
+/* CQE responder status enumeration */
+enum rdma_cqe_responder_status_enum {
+       RDMA_CQE_RESP_STS_OK,
+       RDMA_CQE_RESP_STS_LOCAL_ACCESS_ERR,
+       RDMA_CQE_RESP_STS_LOCAL_LENGTH_ERR,
+       RDMA_CQE_RESP_STS_LOCAL_QP_OPERATION_ERR,
+       RDMA_CQE_RESP_STS_LOCAL_PROTECTION_ERR,
+       RDMA_CQE_RESP_STS_MEMORY_MGT_OPERATION_ERR,
+       RDMA_CQE_RESP_STS_REMOTE_INVALID_REQUEST_ERR,
+       RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR,
+       MAX_RDMA_CQE_RESPONDER_STATUS_ENUM
+};
+
+/* CQE type enumeration */
+enum rdma_cqe_type {
+       RDMA_CQE_TYPE_REQUESTER,
+       RDMA_CQE_TYPE_RESPONDER_RQ,
+       RDMA_CQE_TYPE_RESPONDER_SRQ,
+       RDMA_CQE_TYPE_INVALID,
+       MAX_RDMA_CQE_TYPE
+};
+
+struct rdma_sq_sge {
+       __le32 length;
+       struct regpair  addr;
+       __le32 l_key;
+};
+
+struct rdma_rq_sge {
+       struct regpair addr;
+       __le32 length;
+       __le32 flags;
+#define RDMA_RQ_SGE_L_KEY_MASK      0x3FFFFFF
+#define RDMA_RQ_SGE_L_KEY_SHIFT     0
+#define RDMA_RQ_SGE_NUM_SGES_MASK   0x7
+#define RDMA_RQ_SGE_NUM_SGES_SHIFT  26
+#define RDMA_RQ_SGE_RESERVED0_MASK  0x7
+#define RDMA_RQ_SGE_RESERVED0_SHIFT 29
+};
+
+struct rdma_srq_sge {
+       struct regpair addr;
+       __le32 length;
+       __le32 l_key;
+};
+
+/* Rdma doorbell data for SQ and RQ */
+struct rdma_pwm_val16_data {
+       __le16 icid;
+       __le16 value;
+};
+
+union rdma_pwm_val16_data_union {
+       struct rdma_pwm_val16_data as_struct;
+       __le32 as_dword;
+};
+
+/* Rdma doorbell data for CQ */
+struct rdma_pwm_val32_data {
+       __le16 icid;
+       u8 agg_flags;
+       u8 params;
+#define RDMA_PWM_VAL32_DATA_AGG_CMD_MASK    0x3
+#define RDMA_PWM_VAL32_DATA_AGG_CMD_SHIFT   0
+#define RDMA_PWM_VAL32_DATA_BYPASS_EN_MASK  0x1
+#define RDMA_PWM_VAL32_DATA_BYPASS_EN_SHIFT 2
+#define RDMA_PWM_VAL32_DATA_RESERVED_MASK   0x1F
+#define RDMA_PWM_VAL32_DATA_RESERVED_SHIFT  3
+       __le32 value;
+};
+
+/* DIF Block size options */
+enum rdma_dif_block_size {
+       RDMA_DIF_BLOCK_512 = 0,
+       RDMA_DIF_BLOCK_4096 = 1,
+       MAX_RDMA_DIF_BLOCK_SIZE
+};
+
+/* DIF CRC initial value */
+enum rdma_dif_crc_seed {
+       RDMA_DIF_CRC_SEED_0000 = 0,
+       RDMA_DIF_CRC_SEED_FFFF = 1,
+       MAX_RDMA_DIF_CRC_SEED
+};
+
+/* RDMA DIF Error Result Structure */
+struct rdma_dif_error_result {
+       __le32 error_intervals;
+       __le32 dif_error_1st_interval;
+       u8 flags;
+#define RDMA_DIF_ERROR_RESULT_DIF_ERROR_TYPE_CRC_MASK      0x1
+#define RDMA_DIF_ERROR_RESULT_DIF_ERROR_TYPE_CRC_SHIFT     0
+#define RDMA_DIF_ERROR_RESULT_DIF_ERROR_TYPE_APP_TAG_MASK  0x1
+#define RDMA_DIF_ERROR_RESULT_DIF_ERROR_TYPE_APP_TAG_SHIFT 1
+#define RDMA_DIF_ERROR_RESULT_DIF_ERROR_TYPE_REF_TAG_MASK  0x1
+#define RDMA_DIF_ERROR_RESULT_DIF_ERROR_TYPE_REF_TAG_SHIFT 2
+#define RDMA_DIF_ERROR_RESULT_RESERVED0_MASK               0xF
+#define RDMA_DIF_ERROR_RESULT_RESERVED0_SHIFT              3
+#define RDMA_DIF_ERROR_RESULT_TOGGLE_BIT_MASK              0x1
+#define RDMA_DIF_ERROR_RESULT_TOGGLE_BIT_SHIFT             7
+       u8 reserved1[55];
+};
+
+/* DIF IO direction */
+enum rdma_dif_io_direction_flg {
+       RDMA_DIF_DIR_RX = 0,
+       RDMA_DIF_DIR_TX = 1,
+       MAX_RDMA_DIF_IO_DIRECTION_FLG
+};
+
+/* RDMA DIF Runt Result Structure */
+struct rdma_dif_runt_result {
+       __le16 guard_tag;
+       __le16 reserved[3];
+};
+
+/* Memory window type enumeration */
+enum rdma_mw_type {
+       RDMA_MW_TYPE_1,
+       RDMA_MW_TYPE_2A,
+       MAX_RDMA_MW_TYPE
+};
+
+struct rdma_sq_atomic_wqe {
+       __le32 reserved1;
+       __le32 length;
+       __le32 xrc_srq;
+       u8 req_type;
+       u8 flags;
+#define RDMA_SQ_ATOMIC_WQE_COMP_FLG_MASK         0x1
+#define RDMA_SQ_ATOMIC_WQE_COMP_FLG_SHIFT        0
+#define RDMA_SQ_ATOMIC_WQE_RD_FENCE_FLG_MASK     0x1
+#define RDMA_SQ_ATOMIC_WQE_RD_FENCE_FLG_SHIFT    1
+#define RDMA_SQ_ATOMIC_WQE_INV_FENCE_FLG_MASK    0x1
+#define RDMA_SQ_ATOMIC_WQE_INV_FENCE_FLG_SHIFT   2
+#define RDMA_SQ_ATOMIC_WQE_SE_FLG_MASK           0x1
+#define RDMA_SQ_ATOMIC_WQE_SE_FLG_SHIFT          3
+#define RDMA_SQ_ATOMIC_WQE_INLINE_FLG_MASK       0x1
+#define RDMA_SQ_ATOMIC_WQE_INLINE_FLG_SHIFT      4
+#define RDMA_SQ_ATOMIC_WQE_DIF_ON_HOST_FLG_MASK  0x1
+#define RDMA_SQ_ATOMIC_WQE_DIF_ON_HOST_FLG_SHIFT 5
+#define RDMA_SQ_ATOMIC_WQE_RESERVED0_MASK        0x3
+#define RDMA_SQ_ATOMIC_WQE_RESERVED0_SHIFT       6
+       u8 wqe_size;
+       u8 prev_wqe_size;
+       struct regpair remote_va;
+       __le32 r_key;
+       __le32 reserved2;
+       struct regpair cmp_data;
+       struct regpair swap_data;
+};
+
+/* First element (16 bytes) of atomic wqe */
+struct rdma_sq_atomic_wqe_1st {
+       __le32 reserved1;
+       __le32 length;
+       __le32 xrc_srq;
+       u8 req_type;
+       u8 flags;
+#define RDMA_SQ_ATOMIC_WQE_1ST_COMP_FLG_MASK       0x1
+#define RDMA_SQ_ATOMIC_WQE_1ST_COMP_FLG_SHIFT      0
+#define RDMA_SQ_ATOMIC_WQE_1ST_RD_FENCE_FLG_MASK   0x1
+#define RDMA_SQ_ATOMIC_WQE_1ST_RD_FENCE_FLG_SHIFT  1
+#define RDMA_SQ_ATOMIC_WQE_1ST_INV_FENCE_FLG_MASK  0x1
+#define RDMA_SQ_ATOMIC_WQE_1ST_INV_FENCE_FLG_SHIFT 2
+#define RDMA_SQ_ATOMIC_WQE_1ST_SE_FLG_MASK         0x1
+#define RDMA_SQ_ATOMIC_WQE_1ST_SE_FLG_SHIFT        3
+#define RDMA_SQ_ATOMIC_WQE_1ST_INLINE_FLG_MASK     0x1
+#define RDMA_SQ_ATOMIC_WQE_1ST_INLINE_FLG_SHIFT    4
+#define RDMA_SQ_ATOMIC_WQE_1ST_RESERVED0_MASK      0x7
+#define RDMA_SQ_ATOMIC_WQE_1ST_RESERVED0_SHIFT     5
+       u8 wqe_size;
+       u8 prev_wqe_size;
+};
+
+/* Second element (16 bytes) of atomic wqe */
+struct rdma_sq_atomic_wqe_2nd {
+       struct regpair remote_va;
+       __le32 r_key;
+       __le32 reserved2;
+};
+
+/* Third element (16 bytes) of atomic wqe */
+struct rdma_sq_atomic_wqe_3rd {
+       struct regpair cmp_data;
+       struct regpair swap_data;
+};
+
+struct rdma_sq_bind_wqe {
+       struct regpair addr;
+       __le32 l_key;
+       u8 req_type;
+       u8 flags;
+#define RDMA_SQ_BIND_WQE_COMP_FLG_MASK       0x1
+#define RDMA_SQ_BIND_WQE_COMP_FLG_SHIFT      0
+#define RDMA_SQ_BIND_WQE_RD_FENCE_FLG_MASK   0x1
+#define RDMA_SQ_BIND_WQE_RD_FENCE_FLG_SHIFT  1
+#define RDMA_SQ_BIND_WQE_INV_FENCE_FLG_MASK  0x1
+#define RDMA_SQ_BIND_WQE_INV_FENCE_FLG_SHIFT 2
+#define RDMA_SQ_BIND_WQE_SE_FLG_MASK         0x1
+#define RDMA_SQ_BIND_WQE_SE_FLG_SHIFT        3
+#define RDMA_SQ_BIND_WQE_INLINE_FLG_MASK     0x1
+#define RDMA_SQ_BIND_WQE_INLINE_FLG_SHIFT    4
+#define RDMA_SQ_BIND_WQE_RESERVED0_MASK      0x7
+#define RDMA_SQ_BIND_WQE_RESERVED0_SHIFT     5
+       u8 wqe_size;
+       u8 prev_wqe_size;
+       u8 bind_ctrl;
+#define RDMA_SQ_BIND_WQE_ZERO_BASED_MASK     0x1
+#define RDMA_SQ_BIND_WQE_ZERO_BASED_SHIFT    0
+#define RDMA_SQ_BIND_WQE_MW_TYPE_MASK        0x1
+#define RDMA_SQ_BIND_WQE_MW_TYPE_SHIFT       1
+#define RDMA_SQ_BIND_WQE_RESERVED1_MASK      0x3F
+#define RDMA_SQ_BIND_WQE_RESERVED1_SHIFT     2
+       u8 access_ctrl;
+#define RDMA_SQ_BIND_WQE_REMOTE_READ_MASK    0x1
+#define RDMA_SQ_BIND_WQE_REMOTE_READ_SHIFT   0
+#define RDMA_SQ_BIND_WQE_REMOTE_WRITE_MASK   0x1
+#define RDMA_SQ_BIND_WQE_REMOTE_WRITE_SHIFT  1
+#define RDMA_SQ_BIND_WQE_ENABLE_ATOMIC_MASK  0x1
+#define RDMA_SQ_BIND_WQE_ENABLE_ATOMIC_SHIFT 2
+#define RDMA_SQ_BIND_WQE_LOCAL_READ_MASK     0x1
+#define RDMA_SQ_BIND_WQE_LOCAL_READ_SHIFT    3
+#define RDMA_SQ_BIND_WQE_LOCAL_WRITE_MASK    0x1
+#define RDMA_SQ_BIND_WQE_LOCAL_WRITE_SHIFT   4
+#define RDMA_SQ_BIND_WQE_RESERVED2_MASK      0x7
+#define RDMA_SQ_BIND_WQE_RESERVED2_SHIFT     5
+       u8 reserved3;
+       u8 length_hi;
+       __le32 length_lo;
+       __le32 parent_l_key;
+       __le32 reserved4;
+};
+
+/* First element (16 bytes) of bind wqe */
+struct rdma_sq_bind_wqe_1st {
+       struct regpair addr;
+       __le32 l_key;
+       u8 req_type;
+       u8 flags;
+#define RDMA_SQ_BIND_WQE_1ST_COMP_FLG_MASK       0x1
+#define RDMA_SQ_BIND_WQE_1ST_COMP_FLG_SHIFT      0
+#define RDMA_SQ_BIND_WQE_1ST_RD_FENCE_FLG_MASK   0x1
+#define RDMA_SQ_BIND_WQE_1ST_RD_FENCE_FLG_SHIFT  1
+#define RDMA_SQ_BIND_WQE_1ST_INV_FENCE_FLG_MASK  0x1
+#define RDMA_SQ_BIND_WQE_1ST_INV_FENCE_FLG_SHIFT 2
+#define RDMA_SQ_BIND_WQE_1ST_SE_FLG_MASK         0x1
+#define RDMA_SQ_BIND_WQE_1ST_SE_FLG_SHIFT        3
+#define RDMA_SQ_BIND_WQE_1ST_INLINE_FLG_MASK     0x1
+#define RDMA_SQ_BIND_WQE_1ST_INLINE_FLG_SHIFT    4
+#define RDMA_SQ_BIND_WQE_1ST_RESERVED0_MASK      0x7
+#define RDMA_SQ_BIND_WQE_1ST_RESERVED0_SHIFT     5
+       u8 wqe_size;
+       u8 prev_wqe_size;
+};
+
+/* Second element (16 bytes) of bind wqe */
+struct rdma_sq_bind_wqe_2nd {
+       u8 bind_ctrl;
+#define RDMA_SQ_BIND_WQE_2ND_ZERO_BASED_MASK     0x1
+#define RDMA_SQ_BIND_WQE_2ND_ZERO_BASED_SHIFT    0
+#define RDMA_SQ_BIND_WQE_2ND_MW_TYPE_MASK        0x1
+#define RDMA_SQ_BIND_WQE_2ND_MW_TYPE_SHIFT       1
+#define RDMA_SQ_BIND_WQE_2ND_RESERVED1_MASK      0x3F
+#define RDMA_SQ_BIND_WQE_2ND_RESERVED1_SHIFT     2
+       u8 access_ctrl;
+#define RDMA_SQ_BIND_WQE_2ND_REMOTE_READ_MASK    0x1
+#define RDMA_SQ_BIND_WQE_2ND_REMOTE_READ_SHIFT   0
+#define RDMA_SQ_BIND_WQE_2ND_REMOTE_WRITE_MASK   0x1
+#define RDMA_SQ_BIND_WQE_2ND_REMOTE_WRITE_SHIFT  1
+#define RDMA_SQ_BIND_WQE_2ND_ENABLE_ATOMIC_MASK  0x1
+#define RDMA_SQ_BIND_WQE_2ND_ENABLE_ATOMIC_SHIFT 2
+#define RDMA_SQ_BIND_WQE_2ND_LOCAL_READ_MASK     0x1
+#define RDMA_SQ_BIND_WQE_2ND_LOCAL_READ_SHIFT    3
+#define RDMA_SQ_BIND_WQE_2ND_LOCAL_WRITE_MASK    0x1
+#define RDMA_SQ_BIND_WQE_2ND_LOCAL_WRITE_SHIFT   4
+#define RDMA_SQ_BIND_WQE_2ND_RESERVED2_MASK      0x7
+#define RDMA_SQ_BIND_WQE_2ND_RESERVED2_SHIFT     5
+       u8 reserved3;
+       u8 length_hi;
+       __le32 length_lo;
+       __le32 parent_l_key;
+       __le32 reserved4;
+};
+
+/* Structure with only the SQ WQE common
+ * fields. Size is of one SQ element (16B)
+ */
+struct rdma_sq_common_wqe {
+       __le32 reserved1[3];
+       u8 req_type;
+       u8 flags;
+#define RDMA_SQ_COMMON_WQE_COMP_FLG_MASK       0x1
+#define RDMA_SQ_COMMON_WQE_COMP_FLG_SHIFT      0
+#define RDMA_SQ_COMMON_WQE_RD_FENCE_FLG_MASK   0x1
+#define RDMA_SQ_COMMON_WQE_RD_FENCE_FLG_SHIFT  1
+#define RDMA_SQ_COMMON_WQE_INV_FENCE_FLG_MASK  0x1
+#define RDMA_SQ_COMMON_WQE_INV_FENCE_FLG_SHIFT 2
+#define RDMA_SQ_COMMON_WQE_SE_FLG_MASK         0x1
+#define RDMA_SQ_COMMON_WQE_SE_FLG_SHIFT        3
+#define RDMA_SQ_COMMON_WQE_INLINE_FLG_MASK     0x1
+#define RDMA_SQ_COMMON_WQE_INLINE_FLG_SHIFT    4
+#define RDMA_SQ_COMMON_WQE_RESERVED0_MASK      0x7
+#define RDMA_SQ_COMMON_WQE_RESERVED0_SHIFT     5
+       u8 wqe_size;
+       u8 prev_wqe_size;
+};
+
+struct rdma_sq_fmr_wqe {
+       struct regpair addr;
+       __le32 l_key;
+       u8 req_type;
+       u8 flags;
+#define RDMA_SQ_FMR_WQE_COMP_FLG_MASK                0x1
+#define RDMA_SQ_FMR_WQE_COMP_FLG_SHIFT               0
+#define RDMA_SQ_FMR_WQE_RD_FENCE_FLG_MASK            0x1
+#define RDMA_SQ_FMR_WQE_RD_FENCE_FLG_SHIFT           1
+#define RDMA_SQ_FMR_WQE_INV_FENCE_FLG_MASK           0x1
+#define RDMA_SQ_FMR_WQE_INV_FENCE_FLG_SHIFT          2
+#define RDMA_SQ_FMR_WQE_SE_FLG_MASK                  0x1
+#define RDMA_SQ_FMR_WQE_SE_FLG_SHIFT                 3
+#define RDMA_SQ_FMR_WQE_INLINE_FLG_MASK              0x1
+#define RDMA_SQ_FMR_WQE_INLINE_FLG_SHIFT             4
+#define RDMA_SQ_FMR_WQE_DIF_ON_HOST_FLG_MASK         0x1
+#define RDMA_SQ_FMR_WQE_DIF_ON_HOST_FLG_SHIFT        5
+#define RDMA_SQ_FMR_WQE_RESERVED0_MASK               0x3
+#define RDMA_SQ_FMR_WQE_RESERVED0_SHIFT              6
+       u8 wqe_size;
+       u8 prev_wqe_size;
+       u8 fmr_ctrl;
+#define RDMA_SQ_FMR_WQE_PAGE_SIZE_LOG_MASK           0x1F
+#define RDMA_SQ_FMR_WQE_PAGE_SIZE_LOG_SHIFT          0
+#define RDMA_SQ_FMR_WQE_ZERO_BASED_MASK              0x1
+#define RDMA_SQ_FMR_WQE_ZERO_BASED_SHIFT             5
+#define RDMA_SQ_FMR_WQE_BIND_EN_MASK                 0x1
+#define RDMA_SQ_FMR_WQE_BIND_EN_SHIFT                6
+#define RDMA_SQ_FMR_WQE_RESERVED1_MASK               0x1
+#define RDMA_SQ_FMR_WQE_RESERVED1_SHIFT              7
+       u8 access_ctrl;
+#define RDMA_SQ_FMR_WQE_REMOTE_READ_MASK             0x1
+#define RDMA_SQ_FMR_WQE_REMOTE_READ_SHIFT            0
+#define RDMA_SQ_FMR_WQE_REMOTE_WRITE_MASK            0x1
+#define RDMA_SQ_FMR_WQE_REMOTE_WRITE_SHIFT           1
+#define RDMA_SQ_FMR_WQE_ENABLE_ATOMIC_MASK           0x1
+#define RDMA_SQ_FMR_WQE_ENABLE_ATOMIC_SHIFT          2
+#define RDMA_SQ_FMR_WQE_LOCAL_READ_MASK              0x1
+#define RDMA_SQ_FMR_WQE_LOCAL_READ_SHIFT             3
+#define RDMA_SQ_FMR_WQE_LOCAL_WRITE_MASK             0x1
+#define RDMA_SQ_FMR_WQE_LOCAL_WRITE_SHIFT            4
+#define RDMA_SQ_FMR_WQE_RESERVED2_MASK               0x7
+#define RDMA_SQ_FMR_WQE_RESERVED2_SHIFT              5
+       u8 reserved3;
+       u8 length_hi;
+       __le32 length_lo;
+       struct regpair pbl_addr;
+       __le32 dif_base_ref_tag;
+       __le16 dif_app_tag;
+       __le16 dif_app_tag_mask;
+       __le16 dif_runt_crc_value;
+       __le16 dif_flags;
+#define RDMA_SQ_FMR_WQE_DIF_IO_DIRECTION_FLG_MASK    0x1
+#define RDMA_SQ_FMR_WQE_DIF_IO_DIRECTION_FLG_SHIFT   0
+#define RDMA_SQ_FMR_WQE_DIF_BLOCK_SIZE_MASK          0x1
+#define RDMA_SQ_FMR_WQE_DIF_BLOCK_SIZE_SHIFT         1
+#define RDMA_SQ_FMR_WQE_DIF_RUNT_VALID_FLG_MASK      0x1
+#define RDMA_SQ_FMR_WQE_DIF_RUNT_VALID_FLG_SHIFT     2
+#define RDMA_SQ_FMR_WQE_DIF_VALIDATE_CRC_GUARD_MASK  0x1
+#define RDMA_SQ_FMR_WQE_DIF_VALIDATE_CRC_GUARD_SHIFT 3
+#define RDMA_SQ_FMR_WQE_DIF_VALIDATE_REF_TAG_MASK    0x1
+#define RDMA_SQ_FMR_WQE_DIF_VALIDATE_REF_TAG_SHIFT   4
+#define RDMA_SQ_FMR_WQE_DIF_VALIDATE_APP_TAG_MASK    0x1
+#define RDMA_SQ_FMR_WQE_DIF_VALIDATE_APP_TAG_SHIFT   5
+#define RDMA_SQ_FMR_WQE_DIF_CRC_SEED_MASK            0x1
+#define RDMA_SQ_FMR_WQE_DIF_CRC_SEED_SHIFT           6
+#define RDMA_SQ_FMR_WQE_RESERVED4_MASK               0x1FF
+#define RDMA_SQ_FMR_WQE_RESERVED4_SHIFT              7
+       __le32 Reserved5;
+};
+
+/* First element (16 bytes) of fmr wqe */
+struct rdma_sq_fmr_wqe_1st {
+       struct regpair addr;
+       __le32 l_key;
+       u8 req_type;
+       u8 flags;
+#define RDMA_SQ_FMR_WQE_1ST_COMP_FLG_MASK         0x1
+#define RDMA_SQ_FMR_WQE_1ST_COMP_FLG_SHIFT        0
+#define RDMA_SQ_FMR_WQE_1ST_RD_FENCE_FLG_MASK     0x1
+#define RDMA_SQ_FMR_WQE_1ST_RD_FENCE_FLG_SHIFT    1
+#define RDMA_SQ_FMR_WQE_1ST_INV_FENCE_FLG_MASK    0x1
+#define RDMA_SQ_FMR_WQE_1ST_INV_FENCE_FLG_SHIFT   2
+#define RDMA_SQ_FMR_WQE_1ST_SE_FLG_MASK           0x1
+#define RDMA_SQ_FMR_WQE_1ST_SE_FLG_SHIFT          3
+#define RDMA_SQ_FMR_WQE_1ST_INLINE_FLG_MASK       0x1
+#define RDMA_SQ_FMR_WQE_1ST_INLINE_FLG_SHIFT      4
+#define RDMA_SQ_FMR_WQE_1ST_DIF_ON_HOST_FLG_MASK  0x1
+#define RDMA_SQ_FMR_WQE_1ST_DIF_ON_HOST_FLG_SHIFT 5
+#define RDMA_SQ_FMR_WQE_1ST_RESERVED0_MASK        0x3
+#define RDMA_SQ_FMR_WQE_1ST_RESERVED0_SHIFT       6
+       u8 wqe_size;
+       u8 prev_wqe_size;
+};
+
+/* Second element (16 bytes) of fmr wqe */
+struct rdma_sq_fmr_wqe_2nd {
+       u8 fmr_ctrl;
+#define RDMA_SQ_FMR_WQE_2ND_PAGE_SIZE_LOG_MASK  0x1F
+#define RDMA_SQ_FMR_WQE_2ND_PAGE_SIZE_LOG_SHIFT 0
+#define RDMA_SQ_FMR_WQE_2ND_ZERO_BASED_MASK     0x1
+#define RDMA_SQ_FMR_WQE_2ND_ZERO_BASED_SHIFT    5
+#define RDMA_SQ_FMR_WQE_2ND_BIND_EN_MASK        0x1
+#define RDMA_SQ_FMR_WQE_2ND_BIND_EN_SHIFT       6
+#define RDMA_SQ_FMR_WQE_2ND_RESERVED1_MASK      0x1
+#define RDMA_SQ_FMR_WQE_2ND_RESERVED1_SHIFT     7
+       u8 access_ctrl;
+#define RDMA_SQ_FMR_WQE_2ND_REMOTE_READ_MASK    0x1
+#define RDMA_SQ_FMR_WQE_2ND_REMOTE_READ_SHIFT   0
+#define RDMA_SQ_FMR_WQE_2ND_REMOTE_WRITE_MASK   0x1
+#define RDMA_SQ_FMR_WQE_2ND_REMOTE_WRITE_SHIFT  1
+#define RDMA_SQ_FMR_WQE_2ND_ENABLE_ATOMIC_MASK  0x1
+#define RDMA_SQ_FMR_WQE_2ND_ENABLE_ATOMIC_SHIFT 2
+#define RDMA_SQ_FMR_WQE_2ND_LOCAL_READ_MASK     0x1
+#define RDMA_SQ_FMR_WQE_2ND_LOCAL_READ_SHIFT    3
+#define RDMA_SQ_FMR_WQE_2ND_LOCAL_WRITE_MASK    0x1
+#define RDMA_SQ_FMR_WQE_2ND_LOCAL_WRITE_SHIFT   4
+#define RDMA_SQ_FMR_WQE_2ND_RESERVED2_MASK      0x7
+#define RDMA_SQ_FMR_WQE_2ND_RESERVED2_SHIFT     5
+       u8 reserved3;
+       u8 length_hi;
+       __le32 length_lo;
+       struct regpair pbl_addr;
+};
+
+/* Third element (16 bytes) of fmr wqe */
+struct rdma_sq_fmr_wqe_3rd {
+       __le32 dif_base_ref_tag;
+       __le16 dif_app_tag;
+       __le16 dif_app_tag_mask;
+       __le16 dif_runt_crc_value;
+       __le16 dif_flags;
+#define RDMA_SQ_FMR_WQE_3RD_DIF_IO_DIRECTION_FLG_MASK    0x1
+#define RDMA_SQ_FMR_WQE_3RD_DIF_IO_DIRECTION_FLG_SHIFT   0
+#define RDMA_SQ_FMR_WQE_3RD_DIF_BLOCK_SIZE_MASK          0x1
+#define RDMA_SQ_FMR_WQE_3RD_DIF_BLOCK_SIZE_SHIFT         1
+#define RDMA_SQ_FMR_WQE_3RD_DIF_RUNT_VALID_FLG_MASK      0x1
+#define RDMA_SQ_FMR_WQE_3RD_DIF_RUNT_VALID_FLG_SHIFT     2
+#define RDMA_SQ_FMR_WQE_3RD_DIF_VALIDATE_CRC_GUARD_MASK  0x1
+#define RDMA_SQ_FMR_WQE_3RD_DIF_VALIDATE_CRC_GUARD_SHIFT 3
+#define RDMA_SQ_FMR_WQE_3RD_DIF_VALIDATE_REF_TAG_MASK    0x1
+#define RDMA_SQ_FMR_WQE_3RD_DIF_VALIDATE_REF_TAG_SHIFT   4
+#define RDMA_SQ_FMR_WQE_3RD_DIF_VALIDATE_APP_TAG_MASK    0x1
+#define RDMA_SQ_FMR_WQE_3RD_DIF_VALIDATE_APP_TAG_SHIFT   5
+#define RDMA_SQ_FMR_WQE_3RD_DIF_CRC_SEED_MASK            0x1
+#define RDMA_SQ_FMR_WQE_3RD_DIF_CRC_SEED_SHIFT           6
+#define RDMA_SQ_FMR_WQE_3RD_RESERVED4_MASK               0x1FF
+#define RDMA_SQ_FMR_WQE_3RD_RESERVED4_SHIFT              7
+       __le32 Reserved5;
+};
+
+struct rdma_sq_local_inv_wqe {
+       struct regpair reserved;
+       __le32 inv_l_key;
+       u8 req_type;
+       u8 flags;
+#define RDMA_SQ_LOCAL_INV_WQE_COMP_FLG_MASK         0x1
+#define RDMA_SQ_LOCAL_INV_WQE_COMP_FLG_SHIFT        0
+#define RDMA_SQ_LOCAL_INV_WQE_RD_FENCE_FLG_MASK     0x1
+#define RDMA_SQ_LOCAL_INV_WQE_RD_FENCE_FLG_SHIFT    1
+#define RDMA_SQ_LOCAL_INV_WQE_INV_FENCE_FLG_MASK    0x1
+#define RDMA_SQ_LOCAL_INV_WQE_INV_FENCE_FLG_SHIFT   2
+#define RDMA_SQ_LOCAL_INV_WQE_SE_FLG_MASK           0x1
+#define RDMA_SQ_LOCAL_INV_WQE_SE_FLG_SHIFT          3
+#define RDMA_SQ_LOCAL_INV_WQE_INLINE_FLG_MASK       0x1
+#define RDMA_SQ_LOCAL_INV_WQE_INLINE_FLG_SHIFT      4
+#define RDMA_SQ_LOCAL_INV_WQE_DIF_ON_HOST_FLG_MASK  0x1
+#define RDMA_SQ_LOCAL_INV_WQE_DIF_ON_HOST_FLG_SHIFT 5
+#define RDMA_SQ_LOCAL_INV_WQE_RESERVED0_MASK        0x3
+#define RDMA_SQ_LOCAL_INV_WQE_RESERVED0_SHIFT       6
+       u8 wqe_size;
+       u8 prev_wqe_size;
+};
+
+struct rdma_sq_rdma_wqe {
+       __le32 imm_data;
+       __le32 length;
+       __le32 xrc_srq;
+       u8 req_type;
+       u8 flags;
+#define RDMA_SQ_RDMA_WQE_COMP_FLG_MASK                  0x1
+#define RDMA_SQ_RDMA_WQE_COMP_FLG_SHIFT                 0
+#define RDMA_SQ_RDMA_WQE_RD_FENCE_FLG_MASK              0x1
+#define RDMA_SQ_RDMA_WQE_RD_FENCE_FLG_SHIFT             1
+#define RDMA_SQ_RDMA_WQE_INV_FENCE_FLG_MASK             0x1
+#define RDMA_SQ_RDMA_WQE_INV_FENCE_FLG_SHIFT            2
+#define RDMA_SQ_RDMA_WQE_SE_FLG_MASK                    0x1
+#define RDMA_SQ_RDMA_WQE_SE_FLG_SHIFT                   3
+#define RDMA_SQ_RDMA_WQE_INLINE_FLG_MASK                0x1
+#define RDMA_SQ_RDMA_WQE_INLINE_FLG_SHIFT               4
+#define RDMA_SQ_RDMA_WQE_DIF_ON_HOST_FLG_MASK           0x1
+#define RDMA_SQ_RDMA_WQE_DIF_ON_HOST_FLG_SHIFT          5
+#define RDMA_SQ_RDMA_WQE_RESERVED0_MASK                 0x3
+#define RDMA_SQ_RDMA_WQE_RESERVED0_SHIFT                6
+       u8 wqe_size;
+       u8 prev_wqe_size;
+       struct regpair remote_va;
+       __le32 r_key;
+       u8 dif_flags;
+#define RDMA_SQ_RDMA_WQE_DIF_BLOCK_SIZE_MASK            0x1
+#define RDMA_SQ_RDMA_WQE_DIF_BLOCK_SIZE_SHIFT           0
+#define RDMA_SQ_RDMA_WQE_DIF_FIRST_RDMA_IN_IO_FLG_MASK  0x1
+#define RDMA_SQ_RDMA_WQE_DIF_FIRST_RDMA_IN_IO_FLG_SHIFT 1
+#define RDMA_SQ_RDMA_WQE_DIF_LAST_RDMA_IN_IO_FLG_MASK   0x1
+#define RDMA_SQ_RDMA_WQE_DIF_LAST_RDMA_IN_IO_FLG_SHIFT  2
+#define RDMA_SQ_RDMA_WQE_RESERVED1_MASK                 0x1F
+#define RDMA_SQ_RDMA_WQE_RESERVED1_SHIFT                3
+       u8 reserved2[3];
+};
+
+/* First element (16 bytes) of rdma wqe */
+struct rdma_sq_rdma_wqe_1st {
+       __le32 imm_data;
+       __le32 length;
+       __le32 xrc_srq;
+       u8 req_type;
+       u8 flags;
+#define RDMA_SQ_RDMA_WQE_1ST_COMP_FLG_MASK         0x1
+#define RDMA_SQ_RDMA_WQE_1ST_COMP_FLG_SHIFT        0
+#define RDMA_SQ_RDMA_WQE_1ST_RD_FENCE_FLG_MASK     0x1
+#define RDMA_SQ_RDMA_WQE_1ST_RD_FENCE_FLG_SHIFT    1
+#define RDMA_SQ_RDMA_WQE_1ST_INV_FENCE_FLG_MASK    0x1
+#define RDMA_SQ_RDMA_WQE_1ST_INV_FENCE_FLG_SHIFT   2
+#define RDMA_SQ_RDMA_WQE_1ST_SE_FLG_MASK           0x1
+#define RDMA_SQ_RDMA_WQE_1ST_SE_FLG_SHIFT          3
+#define RDMA_SQ_RDMA_WQE_1ST_INLINE_FLG_MASK       0x1
+#define RDMA_SQ_RDMA_WQE_1ST_INLINE_FLG_SHIFT      4
+#define RDMA_SQ_RDMA_WQE_1ST_DIF_ON_HOST_FLG_MASK  0x1
+#define RDMA_SQ_RDMA_WQE_1ST_DIF_ON_HOST_FLG_SHIFT 5
+#define RDMA_SQ_RDMA_WQE_1ST_RESERVED0_MASK        0x3
+#define RDMA_SQ_RDMA_WQE_1ST_RESERVED0_SHIFT       6
+       u8 wqe_size;
+       u8 prev_wqe_size;
+};
+
+/* Second element (16 bytes) of rdma wqe */
+struct rdma_sq_rdma_wqe_2nd {
+       struct regpair remote_va;
+       __le32 r_key;
+       u8 dif_flags;
+#define RDMA_SQ_RDMA_WQE_2ND_DIF_BLOCK_SIZE_MASK         0x1
+#define RDMA_SQ_RDMA_WQE_2ND_DIF_BLOCK_SIZE_SHIFT        0
+#define RDMA_SQ_RDMA_WQE_2ND_DIF_FIRST_SEGMENT_FLG_MASK  0x1
+#define RDMA_SQ_RDMA_WQE_2ND_DIF_FIRST_SEGMENT_FLG_SHIFT 1
+#define RDMA_SQ_RDMA_WQE_2ND_DIF_LAST_SEGMENT_FLG_MASK   0x1
+#define RDMA_SQ_RDMA_WQE_2ND_DIF_LAST_SEGMENT_FLG_SHIFT  2
+#define RDMA_SQ_RDMA_WQE_2ND_RESERVED1_MASK              0x1F
+#define RDMA_SQ_RDMA_WQE_2ND_RESERVED1_SHIFT             3
+       u8 reserved2[3];
+};
+
+/* SQ WQE req type enumeration */
+enum rdma_sq_req_type {
+       RDMA_SQ_REQ_TYPE_SEND,
+       RDMA_SQ_REQ_TYPE_SEND_WITH_IMM,
+       RDMA_SQ_REQ_TYPE_SEND_WITH_INVALIDATE,
+       RDMA_SQ_REQ_TYPE_RDMA_WR,
+       RDMA_SQ_REQ_TYPE_RDMA_WR_WITH_IMM,
+       RDMA_SQ_REQ_TYPE_RDMA_RD,
+       RDMA_SQ_REQ_TYPE_ATOMIC_CMP_AND_SWAP,
+       RDMA_SQ_REQ_TYPE_ATOMIC_ADD,
+       RDMA_SQ_REQ_TYPE_LOCAL_INVALIDATE,
+       RDMA_SQ_REQ_TYPE_FAST_MR,
+       RDMA_SQ_REQ_TYPE_BIND,
+       RDMA_SQ_REQ_TYPE_INVALID,
+       MAX_RDMA_SQ_REQ_TYPE
+};
+
+struct rdma_sq_send_wqe {
+       __le32 inv_key_or_imm_data;
+       __le32 length;
+       __le32 xrc_srq;
+       u8 req_type;
+       u8 flags;
+#define RDMA_SQ_SEND_WQE_COMP_FLG_MASK         0x1
+#define RDMA_SQ_SEND_WQE_COMP_FLG_SHIFT        0
+#define RDMA_SQ_SEND_WQE_RD_FENCE_FLG_MASK     0x1
+#define RDMA_SQ_SEND_WQE_RD_FENCE_FLG_SHIFT    1
+#define RDMA_SQ_SEND_WQE_INV_FENCE_FLG_MASK    0x1
+#define RDMA_SQ_SEND_WQE_INV_FENCE_FLG_SHIFT   2
+#define RDMA_SQ_SEND_WQE_SE_FLG_MASK           0x1
+#define RDMA_SQ_SEND_WQE_SE_FLG_SHIFT          3
+#define RDMA_SQ_SEND_WQE_INLINE_FLG_MASK       0x1
+#define RDMA_SQ_SEND_WQE_INLINE_FLG_SHIFT      4
+#define RDMA_SQ_SEND_WQE_DIF_ON_HOST_FLG_MASK  0x1
+#define RDMA_SQ_SEND_WQE_DIF_ON_HOST_FLG_SHIFT 5
+#define RDMA_SQ_SEND_WQE_RESERVED0_MASK        0x3
+#define RDMA_SQ_SEND_WQE_RESERVED0_SHIFT       6
+       u8 wqe_size;
+       u8 prev_wqe_size;
+       __le32 reserved1[4];
+};
+
+struct rdma_sq_send_wqe_1st {
+       __le32 inv_key_or_imm_data;
+       __le32 length;
+       __le32 xrc_srq;
+       u8 req_type;
+       u8 flags;
+#define RDMA_SQ_SEND_WQE_1ST_COMP_FLG_MASK       0x1
+#define RDMA_SQ_SEND_WQE_1ST_COMP_FLG_SHIFT      0
+#define RDMA_SQ_SEND_WQE_1ST_RD_FENCE_FLG_MASK   0x1
+#define RDMA_SQ_SEND_WQE_1ST_RD_FENCE_FLG_SHIFT  1
+#define RDMA_SQ_SEND_WQE_1ST_INV_FENCE_FLG_MASK  0x1
+#define RDMA_SQ_SEND_WQE_1ST_INV_FENCE_FLG_SHIFT 2
+#define RDMA_SQ_SEND_WQE_1ST_SE_FLG_MASK         0x1
+#define RDMA_SQ_SEND_WQE_1ST_SE_FLG_SHIFT        3
+#define RDMA_SQ_SEND_WQE_1ST_INLINE_FLG_MASK     0x1
+#define RDMA_SQ_SEND_WQE_1ST_INLINE_FLG_SHIFT    4
+#define RDMA_SQ_SEND_WQE_1ST_RESERVED0_MASK      0x7
+#define RDMA_SQ_SEND_WQE_1ST_RESERVED0_SHIFT     5
+       u8 wqe_size;
+       u8 prev_wqe_size;
+};
+
+struct rdma_sq_send_wqe_2st {
+       __le32 reserved1[4];
+};
+
+#endif /* __QED_HSI_RDMA__ */
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
new file mode 100644 (file)
index 0000000..a615142
--- /dev/null
@@ -0,0 +1,3547 @@
+/* QLogic qedr NIC Driver
+ * Copyright (c) 2015-2016  QLogic Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and /or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/dma-mapping.h>
+#include <linux/crc32.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/udp.h>
+#include <linux/iommu.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_user_verbs.h>
+#include <rdma/iw_cm.h>
+#include <rdma/ib_umem.h>
+#include <rdma/ib_addr.h>
+#include <rdma/ib_cache.h>
+
+#include "qedr_hsi.h"
+#include <linux/qed/qed_if.h>
+#include "qedr.h"
+#include "verbs.h"
+#include <rdma/qedr-abi.h>
+#include "qedr_cm.h"
+
+#define DB_ADDR_SHIFT(addr)            ((addr) << DB_PWM_ADDR_OFFSET_SHIFT)
+
+int qedr_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
+{
+       if (index > QEDR_ROCE_PKEY_TABLE_LEN)
+               return -EINVAL;
+
+       *pkey = QEDR_ROCE_PKEY_DEFAULT;
+       return 0;
+}
+
+int qedr_query_gid(struct ib_device *ibdev, u8 port, int index,
+                  union ib_gid *sgid)
+{
+       struct qedr_dev *dev = get_qedr_dev(ibdev);
+       int rc = 0;
+
+       if (!rdma_cap_roce_gid_table(ibdev, port))
+               return -ENODEV;
+
+       rc = ib_get_cached_gid(ibdev, port, index, sgid, NULL);
+       if (rc == -EAGAIN) {
+               memcpy(sgid, &zgid, sizeof(*sgid));
+               return 0;
+       }
+
+       DP_DEBUG(dev, QEDR_MSG_INIT, "query gid: index=%d %llx:%llx\n", index,
+                sgid->global.interface_id, sgid->global.subnet_prefix);
+
+       return rc;
+}
+
+int qedr_add_gid(struct ib_device *device, u8 port_num,
+                unsigned int index, const union ib_gid *gid,
+                const struct ib_gid_attr *attr, void **context)
+{
+       if (!rdma_cap_roce_gid_table(device, port_num))
+               return -EINVAL;
+
+       if (port_num > QEDR_MAX_PORT)
+               return -EINVAL;
+
+       if (!context)
+               return -EINVAL;
+
+       return 0;
+}
+
+int qedr_del_gid(struct ib_device *device, u8 port_num,
+                unsigned int index, void **context)
+{
+       if (!rdma_cap_roce_gid_table(device, port_num))
+               return -EINVAL;
+
+       if (port_num > QEDR_MAX_PORT)
+               return -EINVAL;
+
+       if (!context)
+               return -EINVAL;
+
+       return 0;
+}
+
+int qedr_query_device(struct ib_device *ibdev,
+                     struct ib_device_attr *attr, struct ib_udata *udata)
+{
+       struct qedr_dev *dev = get_qedr_dev(ibdev);
+       struct qedr_device_attr *qattr = &dev->attr;
+
+       if (!dev->rdma_ctx) {
+               DP_ERR(dev,
+                      "qedr_query_device called with invalid params rdma_ctx=%p\n",
+                      dev->rdma_ctx);
+               return -EINVAL;
+       }
+
+       memset(attr, 0, sizeof(*attr));
+
+       attr->fw_ver = qattr->fw_ver;
+       attr->sys_image_guid = qattr->sys_image_guid;
+       attr->max_mr_size = qattr->max_mr_size;
+       attr->page_size_cap = qattr->page_size_caps;
+       attr->vendor_id = qattr->vendor_id;
+       attr->vendor_part_id = qattr->vendor_part_id;
+       attr->hw_ver = qattr->hw_ver;
+       attr->max_qp = qattr->max_qp;
+       attr->max_qp_wr = max_t(u32, qattr->max_sqe, qattr->max_rqe);
+       attr->device_cap_flags = IB_DEVICE_CURR_QP_STATE_MOD |
+           IB_DEVICE_RC_RNR_NAK_GEN |
+           IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS;
+
+       attr->max_sge = qattr->max_sge;
+       attr->max_sge_rd = qattr->max_sge;
+       attr->max_cq = qattr->max_cq;
+       attr->max_cqe = qattr->max_cqe;
+       attr->max_mr = qattr->max_mr;
+       attr->max_mw = qattr->max_mw;
+       attr->max_pd = qattr->max_pd;
+       attr->atomic_cap = dev->atomic_cap;
+       attr->max_fmr = qattr->max_fmr;
+       attr->max_map_per_fmr = 16;
+       attr->max_qp_init_rd_atom =
+           1 << (fls(qattr->max_qp_req_rd_atomic_resc) - 1);
+       attr->max_qp_rd_atom =
+           min(1 << (fls(qattr->max_qp_resp_rd_atomic_resc) - 1),
+               attr->max_qp_init_rd_atom);
+
+       attr->max_srq = qattr->max_srq;
+       attr->max_srq_sge = qattr->max_srq_sge;
+       attr->max_srq_wr = qattr->max_srq_wr;
+
+       attr->local_ca_ack_delay = qattr->dev_ack_delay;
+       attr->max_fast_reg_page_list_len = qattr->max_mr / 8;
+       attr->max_pkeys = QEDR_ROCE_PKEY_MAX;
+       attr->max_ah = qattr->max_ah;
+
+       return 0;
+}
+
+#define QEDR_SPEED_SDR         (1)
+#define QEDR_SPEED_DDR         (2)
+#define QEDR_SPEED_QDR         (4)
+#define QEDR_SPEED_FDR10       (8)
+#define QEDR_SPEED_FDR         (16)
+#define QEDR_SPEED_EDR         (32)
+
+static inline void get_link_speed_and_width(int speed, u8 *ib_speed,
+                                           u8 *ib_width)
+{
+       switch (speed) {
+       case 1000:
+               *ib_speed = QEDR_SPEED_SDR;
+               *ib_width = IB_WIDTH_1X;
+               break;
+       case 10000:
+               *ib_speed = QEDR_SPEED_QDR;
+               *ib_width = IB_WIDTH_1X;
+               break;
+
+       case 20000:
+               *ib_speed = QEDR_SPEED_DDR;
+               *ib_width = IB_WIDTH_4X;
+               break;
+
+       case 25000:
+               *ib_speed = QEDR_SPEED_EDR;
+               *ib_width = IB_WIDTH_1X;
+               break;
+
+       case 40000:
+               *ib_speed = QEDR_SPEED_QDR;
+               *ib_width = IB_WIDTH_4X;
+               break;
+
+       case 50000:
+               *ib_speed = QEDR_SPEED_QDR;
+               *ib_width = IB_WIDTH_4X;
+               break;
+
+       case 100000:
+               *ib_speed = QEDR_SPEED_EDR;
+               *ib_width = IB_WIDTH_4X;
+               break;
+
+       default:
+               /* Unsupported */
+               *ib_speed = QEDR_SPEED_SDR;
+               *ib_width = IB_WIDTH_1X;
+       }
+}
+
+int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr)
+{
+       struct qedr_dev *dev;
+       struct qed_rdma_port *rdma_port;
+
+       dev = get_qedr_dev(ibdev);
+       if (port > 1) {
+               DP_ERR(dev, "invalid_port=0x%x\n", port);
+               return -EINVAL;
+       }
+
+       if (!dev->rdma_ctx) {
+               DP_ERR(dev, "rdma_ctx is NULL\n");
+               return -EINVAL;
+       }
+
+       rdma_port = dev->ops->rdma_query_port(dev->rdma_ctx);
+       memset(attr, 0, sizeof(*attr));
+
+       if (rdma_port->port_state == QED_RDMA_PORT_UP) {
+               attr->state = IB_PORT_ACTIVE;
+               attr->phys_state = 5;
+       } else {
+               attr->state = IB_PORT_DOWN;
+               attr->phys_state = 3;
+       }
+       attr->max_mtu = IB_MTU_4096;
+       attr->active_mtu = iboe_get_mtu(dev->ndev->mtu);
+       attr->lid = 0;
+       attr->lmc = 0;
+       attr->sm_lid = 0;
+       attr->sm_sl = 0;
+       attr->port_cap_flags = IB_PORT_IP_BASED_GIDS;
+       attr->gid_tbl_len = QEDR_MAX_SGID;
+       attr->pkey_tbl_len = QEDR_ROCE_PKEY_TABLE_LEN;
+       attr->bad_pkey_cntr = rdma_port->pkey_bad_counter;
+       attr->qkey_viol_cntr = 0;
+       get_link_speed_and_width(rdma_port->link_speed,
+                                &attr->active_speed, &attr->active_width);
+       attr->max_msg_sz = rdma_port->max_msg_size;
+       attr->max_vl_num = 4;
+
+       return 0;
+}
+
+int qedr_modify_port(struct ib_device *ibdev, u8 port, int mask,
+                    struct ib_port_modify *props)
+{
+       struct qedr_dev *dev;
+
+       dev = get_qedr_dev(ibdev);
+       if (port > 1) {
+               DP_ERR(dev, "invalid_port=0x%x\n", port);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static int qedr_add_mmap(struct qedr_ucontext *uctx, u64 phy_addr,
+                        unsigned long len)
+{
+       struct qedr_mm *mm;
+
+       mm = kzalloc(sizeof(*mm), GFP_KERNEL);
+       if (!mm)
+               return -ENOMEM;
+
+       mm->key.phy_addr = phy_addr;
+       /* This function might be called with a length which is not a multiple
+        * of PAGE_SIZE, while the mapping is PAGE_SIZE grained and the kernel
+        * forces this granularity by increasing the requested size if needed.
+        * When qedr_mmap is called, it will search the list with the updated
+        * length as a key. To prevent search failures, the length is rounded up
+        * in advance to PAGE_SIZE.
+        */
+       mm->key.len = roundup(len, PAGE_SIZE);
+       INIT_LIST_HEAD(&mm->entry);
+
+       mutex_lock(&uctx->mm_list_lock);
+       list_add(&mm->entry, &uctx->mm_head);
+       mutex_unlock(&uctx->mm_list_lock);
+
+       DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
+                "added (addr=0x%llx,len=0x%lx) for ctx=%p\n",
+                (unsigned long long)mm->key.phy_addr,
+                (unsigned long)mm->key.len, uctx);
+
+       return 0;
+}
+
+static bool qedr_search_mmap(struct qedr_ucontext *uctx, u64 phy_addr,
+                            unsigned long len)
+{
+       bool found = false;
+       struct qedr_mm *mm;
+
+       mutex_lock(&uctx->mm_list_lock);
+       list_for_each_entry(mm, &uctx->mm_head, entry) {
+               if (len != mm->key.len || phy_addr != mm->key.phy_addr)
+                       continue;
+
+               found = true;
+               break;
+       }
+       mutex_unlock(&uctx->mm_list_lock);
+       DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
+                "searched for (addr=0x%llx,len=0x%lx) for ctx=%p, result=%d\n",
+                mm->key.phy_addr, mm->key.len, uctx, found);
+
+       return found;
+}
+
+struct ib_ucontext *qedr_alloc_ucontext(struct ib_device *ibdev,
+                                       struct ib_udata *udata)
+{
+       int rc;
+       struct qedr_ucontext *ctx;
+       struct qedr_alloc_ucontext_resp uresp;
+       struct qedr_dev *dev = get_qedr_dev(ibdev);
+       struct qed_rdma_add_user_out_params oparams;
+
+       if (!udata)
+               return ERR_PTR(-EFAULT);
+
+       ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+       if (!ctx)
+               return ERR_PTR(-ENOMEM);
+
+       rc = dev->ops->rdma_add_user(dev->rdma_ctx, &oparams);
+       if (rc) {
+               DP_ERR(dev,
+                      "failed to allocate a DPI for a new RoCE application, rc=%d. To overcome this consider to increase the number of DPIs, increase the doorbell BAR size or just close unnecessary RoCE applications. In order to increase the number of DPIs consult the qedr readme\n",
+                      rc);
+               goto err;
+       }
+
+       ctx->dpi = oparams.dpi;
+       ctx->dpi_addr = oparams.dpi_addr;
+       ctx->dpi_phys_addr = oparams.dpi_phys_addr;
+       ctx->dpi_size = oparams.dpi_size;
+       INIT_LIST_HEAD(&ctx->mm_head);
+       mutex_init(&ctx->mm_list_lock);
+
+       memset(&uresp, 0, sizeof(uresp));
+
+       uresp.db_pa = ctx->dpi_phys_addr;
+       uresp.db_size = ctx->dpi_size;
+       uresp.max_send_wr = dev->attr.max_sqe;
+       uresp.max_recv_wr = dev->attr.max_rqe;
+       uresp.max_srq_wr = dev->attr.max_srq_wr;
+       uresp.sges_per_send_wr = QEDR_MAX_SQE_ELEMENTS_PER_SQE;
+       uresp.sges_per_recv_wr = QEDR_MAX_RQE_ELEMENTS_PER_RQE;
+       uresp.sges_per_srq_wr = dev->attr.max_srq_sge;
+       uresp.max_cqes = QEDR_MAX_CQES;
+
+       rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
+       if (rc)
+               goto err;
+
+       ctx->dev = dev;
+
+       rc = qedr_add_mmap(ctx, ctx->dpi_phys_addr, ctx->dpi_size);
+       if (rc)
+               goto err;
+
+       DP_DEBUG(dev, QEDR_MSG_INIT, "Allocating user context %p\n",
+                &ctx->ibucontext);
+       return &ctx->ibucontext;
+
+err:
+       kfree(ctx);
+       return ERR_PTR(rc);
+}
+
+int qedr_dealloc_ucontext(struct ib_ucontext *ibctx)
+{
+       struct qedr_ucontext *uctx = get_qedr_ucontext(ibctx);
+       struct qedr_mm *mm, *tmp;
+       int status = 0;
+
+       DP_DEBUG(uctx->dev, QEDR_MSG_INIT, "Deallocating user context %p\n",
+                uctx);
+       uctx->dev->ops->rdma_remove_user(uctx->dev->rdma_ctx, uctx->dpi);
+
+       list_for_each_entry_safe(mm, tmp, &uctx->mm_head, entry) {
+               DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
+                        "deleted (addr=0x%llx,len=0x%lx) for ctx=%p\n",
+                        mm->key.phy_addr, mm->key.len, uctx);
+               list_del(&mm->entry);
+               kfree(mm);
+       }
+
+       kfree(uctx);
+       return status;
+}
+
+int qedr_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
+{
+       struct qedr_ucontext *ucontext = get_qedr_ucontext(context);
+       struct qedr_dev *dev = get_qedr_dev(context->device);
+       unsigned long vm_page = vma->vm_pgoff << PAGE_SHIFT;
+       u64 unmapped_db = dev->db_phys_addr;
+       unsigned long len = (vma->vm_end - vma->vm_start);
+       int rc = 0;
+       bool found;
+
+       DP_DEBUG(dev, QEDR_MSG_INIT,
+                "qedr_mmap called vm_page=0x%lx vm_pgoff=0x%lx unmapped_db=0x%llx db_size=%x, len=%lx\n",
+                vm_page, vma->vm_pgoff, unmapped_db, dev->db_size, len);
+       if (vma->vm_start & (PAGE_SIZE - 1)) {
+               DP_ERR(dev, "Vma_start not page aligned = %ld\n",
+                      vma->vm_start);
+               return -EINVAL;
+       }
+
+       found = qedr_search_mmap(ucontext, vm_page, len);
+       if (!found) {
+               DP_ERR(dev, "Vma_pgoff not found in mapped array = %ld\n",
+                      vma->vm_pgoff);
+               return -EINVAL;
+       }
+
+       DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping doorbell bar\n");
+
+       if ((vm_page >= unmapped_db) && (vm_page <= (unmapped_db +
+                                                    dev->db_size))) {
+               DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping doorbell bar\n");
+               if (vma->vm_flags & VM_READ) {
+                       DP_ERR(dev, "Trying to map doorbell bar for read\n");
+                       return -EPERM;
+               }
+
+               vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+
+               rc = io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
+                                       PAGE_SIZE, vma->vm_page_prot);
+       } else {
+               DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping chains\n");
+               rc = remap_pfn_range(vma, vma->vm_start,
+                                    vma->vm_pgoff, len, vma->vm_page_prot);
+       }
+       DP_DEBUG(dev, QEDR_MSG_INIT, "qedr_mmap return code: %d\n", rc);
+       return rc;
+}
+
+struct ib_pd *qedr_alloc_pd(struct ib_device *ibdev,
+                           struct ib_ucontext *context, struct ib_udata *udata)
+{
+       struct qedr_dev *dev = get_qedr_dev(ibdev);
+       struct qedr_ucontext *uctx = NULL;
+       struct qedr_alloc_pd_uresp uresp;
+       struct qedr_pd *pd;
+       u16 pd_id;
+       int rc;
+
+       DP_DEBUG(dev, QEDR_MSG_INIT, "Function called from: %s\n",
+                (udata && context) ? "User Lib" : "Kernel");
+
+       if (!dev->rdma_ctx) {
+               DP_ERR(dev, "invlaid RDMA context\n");
+               return ERR_PTR(-EINVAL);
+       }
+
+       pd = kzalloc(sizeof(*pd), GFP_KERNEL);
+       if (!pd)
+               return ERR_PTR(-ENOMEM);
+
+       dev->ops->rdma_alloc_pd(dev->rdma_ctx, &pd_id);
+
+       uresp.pd_id = pd_id;
+       pd->pd_id = pd_id;
+
+       if (udata && context) {
+               rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
+               if (rc)
+                       DP_ERR(dev, "copy error pd_id=0x%x.\n", pd_id);
+               uctx = get_qedr_ucontext(context);
+               uctx->pd = pd;
+               pd->uctx = uctx;
+       }
+
+       return &pd->ibpd;
+}
+
+int qedr_dealloc_pd(struct ib_pd *ibpd)
+{
+       struct qedr_dev *dev = get_qedr_dev(ibpd->device);
+       struct qedr_pd *pd = get_qedr_pd(ibpd);
+
+       if (!pd)
+               pr_err("Invalid PD received in dealloc_pd\n");
+
+       DP_DEBUG(dev, QEDR_MSG_INIT, "Deallocating PD %d\n", pd->pd_id);
+       dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd->pd_id);
+
+       kfree(pd);
+
+       return 0;
+}
+
+static void qedr_free_pbl(struct qedr_dev *dev,
+                         struct qedr_pbl_info *pbl_info, struct qedr_pbl *pbl)
+{
+       struct pci_dev *pdev = dev->pdev;
+       int i;
+
+       for (i = 0; i < pbl_info->num_pbls; i++) {
+               if (!pbl[i].va)
+                       continue;
+               dma_free_coherent(&pdev->dev, pbl_info->pbl_size,
+                                 pbl[i].va, pbl[i].pa);
+       }
+
+       kfree(pbl);
+}
+
+#define MIN_FW_PBL_PAGE_SIZE (4 * 1024)
+#define MAX_FW_PBL_PAGE_SIZE (64 * 1024)
+
+#define NUM_PBES_ON_PAGE(_page_size) (_page_size / sizeof(u64))
+#define MAX_PBES_ON_PAGE NUM_PBES_ON_PAGE(MAX_FW_PBL_PAGE_SIZE)
+#define MAX_PBES_TWO_LAYER (MAX_PBES_ON_PAGE * MAX_PBES_ON_PAGE)
+
+static struct qedr_pbl *qedr_alloc_pbl_tbl(struct qedr_dev *dev,
+                                          struct qedr_pbl_info *pbl_info,
+                                          gfp_t flags)
+{
+       struct pci_dev *pdev = dev->pdev;
+       struct qedr_pbl *pbl_table;
+       dma_addr_t *pbl_main_tbl;
+       dma_addr_t pa;
+       void *va;
+       int i;
+
+       pbl_table = kcalloc(pbl_info->num_pbls, sizeof(*pbl_table), flags);
+       if (!pbl_table)
+               return ERR_PTR(-ENOMEM);
+
+       for (i = 0; i < pbl_info->num_pbls; i++) {
+               va = dma_alloc_coherent(&pdev->dev, pbl_info->pbl_size,
+                                       &pa, flags);
+               if (!va)
+                       goto err;
+
+               memset(va, 0, pbl_info->pbl_size);
+               pbl_table[i].va = va;
+               pbl_table[i].pa = pa;
+       }
+
+       /* Two-Layer PBLs, if we have more than one pbl we need to initialize
+        * the first one with physical pointers to all of the rest
+        */
+       pbl_main_tbl = (dma_addr_t *)pbl_table[0].va;
+       for (i = 0; i < pbl_info->num_pbls - 1; i++)
+               pbl_main_tbl[i] = pbl_table[i + 1].pa;
+
+       return pbl_table;
+
+err:
+       for (i--; i >= 0; i--)
+               dma_free_coherent(&pdev->dev, pbl_info->pbl_size,
+                                 pbl_table[i].va, pbl_table[i].pa);
+
+       qedr_free_pbl(dev, pbl_info, pbl_table);
+
+       return ERR_PTR(-ENOMEM);
+}
+
+static int qedr_prepare_pbl_tbl(struct qedr_dev *dev,
+                               struct qedr_pbl_info *pbl_info,
+                               u32 num_pbes, int two_layer_capable)
+{
+       u32 pbl_capacity;
+       u32 pbl_size;
+       u32 num_pbls;
+
+       if ((num_pbes > MAX_PBES_ON_PAGE) && two_layer_capable) {
+               if (num_pbes > MAX_PBES_TWO_LAYER) {
+                       DP_ERR(dev, "prepare pbl table: too many pages %d\n",
+                              num_pbes);
+                       return -EINVAL;
+               }
+
+               /* calculate required pbl page size */
+               pbl_size = MIN_FW_PBL_PAGE_SIZE;
+               pbl_capacity = NUM_PBES_ON_PAGE(pbl_size) *
+                              NUM_PBES_ON_PAGE(pbl_size);
+
+               while (pbl_capacity < num_pbes) {
+                       pbl_size *= 2;
+                       pbl_capacity = pbl_size / sizeof(u64);
+                       pbl_capacity = pbl_capacity * pbl_capacity;
+               }
+
+               num_pbls = DIV_ROUND_UP(num_pbes, NUM_PBES_ON_PAGE(pbl_size));
+               num_pbls++;     /* One for the layer0 ( points to the pbls) */
+               pbl_info->two_layered = true;
+       } else {
+               /* One layered PBL */
+               num_pbls = 1;
+               pbl_size = max_t(u32, MIN_FW_PBL_PAGE_SIZE,
+                                roundup_pow_of_two((num_pbes * sizeof(u64))));
+               pbl_info->two_layered = false;
+       }
+
+       pbl_info->num_pbls = num_pbls;
+       pbl_info->pbl_size = pbl_size;
+       pbl_info->num_pbes = num_pbes;
+
+       DP_DEBUG(dev, QEDR_MSG_MR,
+                "prepare pbl table: num_pbes=%d, num_pbls=%d, pbl_size=%d\n",
+                pbl_info->num_pbes, pbl_info->num_pbls, pbl_info->pbl_size);
+
+       return 0;
+}
+
+static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem,
+                              struct qedr_pbl *pbl,
+                              struct qedr_pbl_info *pbl_info)
+{
+       int shift, pg_cnt, pages, pbe_cnt, total_num_pbes = 0;
+       struct qedr_pbl *pbl_tbl;
+       struct scatterlist *sg;
+       struct regpair *pbe;
+       int entry;
+       u32 addr;
+
+       if (!pbl_info->num_pbes)
+               return;
+
+       /* If we have a two layered pbl, the first pbl points to the rest
+        * of the pbls and the first entry lays on the second pbl in the table
+        */
+       if (pbl_info->two_layered)
+               pbl_tbl = &pbl[1];
+       else
+               pbl_tbl = pbl;
+
+       pbe = (struct regpair *)pbl_tbl->va;
+       if (!pbe) {
+               DP_ERR(dev, "cannot populate PBL due to a NULL PBE\n");
+               return;
+       }
+
+       pbe_cnt = 0;
+
+       shift = ilog2(umem->page_size);
+
+       for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
+               pages = sg_dma_len(sg) >> shift;
+               for (pg_cnt = 0; pg_cnt < pages; pg_cnt++) {
+                       /* store the page address in pbe */
+                       pbe->lo = cpu_to_le32(sg_dma_address(sg) +
+                                             umem->page_size * pg_cnt);
+                       addr = upper_32_bits(sg_dma_address(sg) +
+                                            umem->page_size * pg_cnt);
+                       pbe->hi = cpu_to_le32(addr);
+                       pbe_cnt++;
+                       total_num_pbes++;
+                       pbe++;
+
+                       if (total_num_pbes == pbl_info->num_pbes)
+                               return;
+
+                       /* If the given pbl is full storing the pbes,
+                        * move to next pbl.
+                        */
+                       if (pbe_cnt == (pbl_info->pbl_size / sizeof(u64))) {
+                               pbl_tbl++;
+                               pbe = (struct regpair *)pbl_tbl->va;
+                               pbe_cnt = 0;
+                       }
+               }
+       }
+}
+
+static int qedr_copy_cq_uresp(struct qedr_dev *dev,
+                             struct qedr_cq *cq, struct ib_udata *udata)
+{
+       struct qedr_create_cq_uresp uresp;
+       int rc;
+
+       memset(&uresp, 0, sizeof(uresp));
+
+       uresp.db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
+       uresp.icid = cq->icid;
+
+       rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
+       if (rc)
+               DP_ERR(dev, "copy error cqid=0x%x.\n", cq->icid);
+
+       return rc;
+}
+
+static void consume_cqe(struct qedr_cq *cq)
+{
+       if (cq->latest_cqe == cq->toggle_cqe)
+               cq->pbl_toggle ^= RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK;
+
+       cq->latest_cqe = qed_chain_consume(&cq->pbl);
+}
+
+static inline int qedr_align_cq_entries(int entries)
+{
+       u64 size, aligned_size;
+
+       /* We allocate an extra entry that we don't report to the FW. */
+       size = (entries + 1) * QEDR_CQE_SIZE;
+       aligned_size = ALIGN(size, PAGE_SIZE);
+
+       return aligned_size / QEDR_CQE_SIZE;
+}
+
+static inline int qedr_init_user_queue(struct ib_ucontext *ib_ctx,
+                                      struct qedr_dev *dev,
+                                      struct qedr_userq *q,
+                                      u64 buf_addr, size_t buf_len,
+                                      int access, int dmasync)
+{
+       int page_cnt;
+       int rc;
+
+       q->buf_addr = buf_addr;
+       q->buf_len = buf_len;
+       q->umem = ib_umem_get(ib_ctx, q->buf_addr, q->buf_len, access, dmasync);
+       if (IS_ERR(q->umem)) {
+               DP_ERR(dev, "create user queue: failed ib_umem_get, got %ld\n",
+                      PTR_ERR(q->umem));
+               return PTR_ERR(q->umem);
+       }
+
+       page_cnt = ib_umem_page_count(q->umem);
+       rc = qedr_prepare_pbl_tbl(dev, &q->pbl_info, page_cnt, 0);
+       if (rc)
+               goto err0;
+
+       q->pbl_tbl = qedr_alloc_pbl_tbl(dev, &q->pbl_info, GFP_KERNEL);
+       if (IS_ERR_OR_NULL(q->pbl_tbl))
+               goto err0;
+
+       qedr_populate_pbls(dev, q->umem, q->pbl_tbl, &q->pbl_info);
+
+       return 0;
+
+err0:
+       ib_umem_release(q->umem);
+
+       return rc;
+}
+
+static inline void qedr_init_cq_params(struct qedr_cq *cq,
+                                      struct qedr_ucontext *ctx,
+                                      struct qedr_dev *dev, int vector,
+                                      int chain_entries, int page_cnt,
+                                      u64 pbl_ptr,
+                                      struct qed_rdma_create_cq_in_params
+                                      *params)
+{
+       memset(params, 0, sizeof(*params));
+       params->cq_handle_hi = upper_32_bits((uintptr_t)cq);
+       params->cq_handle_lo = lower_32_bits((uintptr_t)cq);
+       params->cnq_id = vector;
+       params->cq_size = chain_entries - 1;
+       params->dpi = (ctx) ? ctx->dpi : dev->dpi;
+       params->pbl_num_pages = page_cnt;
+       params->pbl_ptr = pbl_ptr;
+       params->pbl_two_level = 0;
+}
+
+static void doorbell_cq(struct qedr_cq *cq, u32 cons, u8 flags)
+{
+       /* Flush data before signalling doorbell */
+       wmb();
+       cq->db.data.agg_flags = flags;
+       cq->db.data.value = cpu_to_le32(cons);
+       writeq(cq->db.raw, cq->db_addr);
+
+       /* Make sure write would stick */
+       mmiowb();
+}
+
+int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
+{
+       struct qedr_cq *cq = get_qedr_cq(ibcq);
+       unsigned long sflags;
+
+       if (cq->cq_type == QEDR_CQ_TYPE_GSI)
+               return 0;
+
+       spin_lock_irqsave(&cq->cq_lock, sflags);
+
+       cq->arm_flags = 0;
+
+       if (flags & IB_CQ_SOLICITED)
+               cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_SE_CF_CMD;
+
+       if (flags & IB_CQ_NEXT_COMP)
+               cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_CF_CMD;
+
+       doorbell_cq(cq, cq->cq_cons - 1, cq->arm_flags);
+
+       spin_unlock_irqrestore(&cq->cq_lock, sflags);
+
+       return 0;
+}
+
+struct ib_cq *qedr_create_cq(struct ib_device *ibdev,
+                            const struct ib_cq_init_attr *attr,
+                            struct ib_ucontext *ib_ctx, struct ib_udata *udata)
+{
+       struct qedr_ucontext *ctx = get_qedr_ucontext(ib_ctx);
+       struct qed_rdma_destroy_cq_out_params destroy_oparams;
+       struct qed_rdma_destroy_cq_in_params destroy_iparams;
+       struct qedr_dev *dev = get_qedr_dev(ibdev);
+       struct qed_rdma_create_cq_in_params params;
+       struct qedr_create_cq_ureq ureq;
+       int vector = attr->comp_vector;
+       int entries = attr->cqe;
+       struct qedr_cq *cq;
+       int chain_entries;
+       int page_cnt;
+       u64 pbl_ptr;
+       u16 icid;
+       int rc;
+
+       DP_DEBUG(dev, QEDR_MSG_INIT,
+                "create_cq: called from %s. entries=%d, vector=%d\n",
+                udata ? "User Lib" : "Kernel", entries, vector);
+
+       if (entries > QEDR_MAX_CQES) {
+               DP_ERR(dev,
+                      "create cq: the number of entries %d is too high. Must be equal or below %d.\n",
+                      entries, QEDR_MAX_CQES);
+               return ERR_PTR(-EINVAL);
+       }
+
+       chain_entries = qedr_align_cq_entries(entries);
+       chain_entries = min_t(int, chain_entries, QEDR_MAX_CQES);
+
+       cq = kzalloc(sizeof(*cq), GFP_KERNEL);
+       if (!cq)
+               return ERR_PTR(-ENOMEM);
+
+       if (udata) {
+               memset(&ureq, 0, sizeof(ureq));
+               if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) {
+                       DP_ERR(dev,
+                              "create cq: problem copying data from user space\n");
+                       goto err0;
+               }
+
+               if (!ureq.len) {
+                       DP_ERR(dev,
+                              "create cq: cannot create a cq with 0 entries\n");
+                       goto err0;
+               }
+
+               cq->cq_type = QEDR_CQ_TYPE_USER;
+
+               rc = qedr_init_user_queue(ib_ctx, dev, &cq->q, ureq.addr,
+                                         ureq.len, IB_ACCESS_LOCAL_WRITE, 1);
+               if (rc)
+                       goto err0;
+
+               pbl_ptr = cq->q.pbl_tbl->pa;
+               page_cnt = cq->q.pbl_info.num_pbes;
+       } else {
+               cq->cq_type = QEDR_CQ_TYPE_KERNEL;
+
+               rc = dev->ops->common->chain_alloc(dev->cdev,
+                                                  QED_CHAIN_USE_TO_CONSUME,
+                                                  QED_CHAIN_MODE_PBL,
+                                                  QED_CHAIN_CNT_TYPE_U32,
+                                                  chain_entries,
+                                                  sizeof(union rdma_cqe),
+                                                  &cq->pbl);
+               if (rc)
+                       goto err1;
+
+               page_cnt = qed_chain_get_page_cnt(&cq->pbl);
+               pbl_ptr = qed_chain_get_pbl_phys(&cq->pbl);
+       }
+
+       qedr_init_cq_params(cq, ctx, dev, vector, chain_entries, page_cnt,
+                           pbl_ptr, &params);
+
+       rc = dev->ops->rdma_create_cq(dev->rdma_ctx, &params, &icid);
+       if (rc)
+               goto err2;
+
+       cq->icid = icid;
+       cq->sig = QEDR_CQ_MAGIC_NUMBER;
+       spin_lock_init(&cq->cq_lock);
+
+       if (ib_ctx) {
+               rc = qedr_copy_cq_uresp(dev, cq, udata);
+               if (rc)
+                       goto err3;
+       } else {
+               /* Generate doorbell address. */
+               cq->db_addr = dev->db_addr +
+                   DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
+               cq->db.data.icid = cq->icid;
+               cq->db.data.params = DB_AGG_CMD_SET <<
+                   RDMA_PWM_VAL32_DATA_AGG_CMD_SHIFT;
+
+               /* point to the very last element, passing it we will toggle */
+               cq->toggle_cqe = qed_chain_get_last_elem(&cq->pbl);
+               cq->pbl_toggle = RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK;
+               cq->latest_cqe = NULL;
+               consume_cqe(cq);
+               cq->cq_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
+       }
+
+       DP_DEBUG(dev, QEDR_MSG_CQ,
+                "create cq: icid=0x%0x, addr=%p, size(entries)=0x%0x\n",
+                cq->icid, cq, params.cq_size);
+
+       return &cq->ibcq;
+
+err3:
+       destroy_iparams.icid = cq->icid;
+       dev->ops->rdma_destroy_cq(dev->rdma_ctx, &destroy_iparams,
+                                 &destroy_oparams);
+err2:
+       if (udata)
+               qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
+       else
+               dev->ops->common->chain_free(dev->cdev, &cq->pbl);
+err1:
+       if (udata)
+               ib_umem_release(cq->q.umem);
+err0:
+       kfree(cq);
+       return ERR_PTR(-EINVAL);
+}
+
+int qedr_resize_cq(struct ib_cq *ibcq, int new_cnt, struct ib_udata *udata)
+{
+       struct qedr_dev *dev = get_qedr_dev(ibcq->device);
+       struct qedr_cq *cq = get_qedr_cq(ibcq);
+
+       DP_ERR(dev, "cq %p RESIZE NOT SUPPORTED\n", cq);
+
+       return 0;
+}
+
+int qedr_destroy_cq(struct ib_cq *ibcq)
+{
+       struct qedr_dev *dev = get_qedr_dev(ibcq->device);
+       struct qed_rdma_destroy_cq_out_params oparams;
+       struct qed_rdma_destroy_cq_in_params iparams;
+       struct qedr_cq *cq = get_qedr_cq(ibcq);
+
+       DP_DEBUG(dev, QEDR_MSG_CQ, "destroy cq: cq_id %d", cq->icid);
+
+       /* GSIs CQs are handled by driver, so they don't exist in the FW */
+       if (cq->cq_type != QEDR_CQ_TYPE_GSI) {
+               iparams.icid = cq->icid;
+               dev->ops->rdma_destroy_cq(dev->rdma_ctx, &iparams, &oparams);
+               dev->ops->common->chain_free(dev->cdev, &cq->pbl);
+       }
+
+       if (ibcq->uobject && ibcq->uobject->context) {
+               qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
+               ib_umem_release(cq->q.umem);
+       }
+
+       kfree(cq);
+
+       return 0;
+}
+
+static inline int get_gid_info_from_table(struct ib_qp *ibqp,
+                                         struct ib_qp_attr *attr,
+                                         int attr_mask,
+                                         struct qed_rdma_modify_qp_in_params
+                                         *qp_params)
+{
+       enum rdma_network_type nw_type;
+       struct ib_gid_attr gid_attr;
+       union ib_gid gid;
+       u32 ipv4_addr;
+       int rc = 0;
+       int i;
+
+       rc = ib_get_cached_gid(ibqp->device, attr->ah_attr.port_num,
+                              attr->ah_attr.grh.sgid_index, &gid, &gid_attr);
+       if (rc)
+               return rc;
+
+       if (!memcmp(&gid, &zgid, sizeof(gid)))
+               return -ENOENT;
+
+       if (gid_attr.ndev) {
+               qp_params->vlan_id = rdma_vlan_dev_vlan_id(gid_attr.ndev);
+
+               dev_put(gid_attr.ndev);
+               nw_type = ib_gid_to_network_type(gid_attr.gid_type, &gid);
+               switch (nw_type) {
+               case RDMA_NETWORK_IPV6:
+                       memcpy(&qp_params->sgid.bytes[0], &gid.raw[0],
+                              sizeof(qp_params->sgid));
+                       memcpy(&qp_params->dgid.bytes[0],
+                              &attr->ah_attr.grh.dgid,
+                              sizeof(qp_params->dgid));
+                       qp_params->roce_mode = ROCE_V2_IPV6;
+                       SET_FIELD(qp_params->modify_flags,
+                                 QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
+                       break;
+               case RDMA_NETWORK_IB:
+                       memcpy(&qp_params->sgid.bytes[0], &gid.raw[0],
+                              sizeof(qp_params->sgid));
+                       memcpy(&qp_params->dgid.bytes[0],
+                              &attr->ah_attr.grh.dgid,
+                              sizeof(qp_params->dgid));
+                       qp_params->roce_mode = ROCE_V1;
+                       break;
+               case RDMA_NETWORK_IPV4:
+                       memset(&qp_params->sgid, 0, sizeof(qp_params->sgid));
+                       memset(&qp_params->dgid, 0, sizeof(qp_params->dgid));
+                       ipv4_addr = qedr_get_ipv4_from_gid(gid.raw);
+                       qp_params->sgid.ipv4_addr = ipv4_addr;
+                       ipv4_addr =
+                           qedr_get_ipv4_from_gid(attr->ah_attr.grh.dgid.raw);
+                       qp_params->dgid.ipv4_addr = ipv4_addr;
+                       SET_FIELD(qp_params->modify_flags,
+                                 QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
+                       qp_params->roce_mode = ROCE_V2_IPV4;
+                       break;
+               }
+       }
+
+       for (i = 0; i < 4; i++) {
+               qp_params->sgid.dwords[i] = ntohl(qp_params->sgid.dwords[i]);
+               qp_params->dgid.dwords[i] = ntohl(qp_params->dgid.dwords[i]);
+       }
+
+       if (qp_params->vlan_id >= VLAN_CFI_MASK)
+               qp_params->vlan_id = 0;
+
+       return 0;
+}
+
+static void qedr_cleanup_user_sq(struct qedr_dev *dev, struct qedr_qp *qp)
+{
+       qedr_free_pbl(dev, &qp->usq.pbl_info, qp->usq.pbl_tbl);
+       ib_umem_release(qp->usq.umem);
+}
+
+static void qedr_cleanup_user_rq(struct qedr_dev *dev, struct qedr_qp *qp)
+{
+       qedr_free_pbl(dev, &qp->urq.pbl_info, qp->urq.pbl_tbl);
+       ib_umem_release(qp->urq.umem);
+}
+
+static void qedr_cleanup_kernel_sq(struct qedr_dev *dev, struct qedr_qp *qp)
+{
+       dev->ops->common->chain_free(dev->cdev, &qp->sq.pbl);
+       kfree(qp->wqe_wr_id);
+}
+
+static void qedr_cleanup_kernel_rq(struct qedr_dev *dev, struct qedr_qp *qp)
+{
+       dev->ops->common->chain_free(dev->cdev, &qp->rq.pbl);
+       kfree(qp->rqe_wr_id);
+}
+
+static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev,
+                              struct ib_qp_init_attr *attrs)
+{
+       struct qedr_device_attr *qattr = &dev->attr;
+
+       /* QP0... attrs->qp_type == IB_QPT_GSI */
+       if (attrs->qp_type != IB_QPT_RC && attrs->qp_type != IB_QPT_GSI) {
+               DP_DEBUG(dev, QEDR_MSG_QP,
+                        "create qp: unsupported qp type=0x%x requested\n",
+                        attrs->qp_type);
+               return -EINVAL;
+       }
+
+       if (attrs->cap.max_send_wr > qattr->max_sqe) {
+               DP_ERR(dev,
+                      "create qp: cannot create a SQ with %d elements (max_send_wr=0x%x)\n",
+                      attrs->cap.max_send_wr, qattr->max_sqe);
+               return -EINVAL;
+       }
+
+       if (attrs->cap.max_inline_data > qattr->max_inline) {
+               DP_ERR(dev,
+                      "create qp: unsupported inline data size=0x%x requested (max_inline=0x%x)\n",
+                      attrs->cap.max_inline_data, qattr->max_inline);
+               return -EINVAL;
+       }
+
+       if (attrs->cap.max_send_sge > qattr->max_sge) {
+               DP_ERR(dev,
+                      "create qp: unsupported send_sge=0x%x requested (max_send_sge=0x%x)\n",
+                      attrs->cap.max_send_sge, qattr->max_sge);
+               return -EINVAL;
+       }
+
+       if (attrs->cap.max_recv_sge > qattr->max_sge) {
+               DP_ERR(dev,
+                      "create qp: unsupported recv_sge=0x%x requested (max_recv_sge=0x%x)\n",
+                      attrs->cap.max_recv_sge, qattr->max_sge);
+               return -EINVAL;
+       }
+
+       /* Unprivileged user space cannot create special QP */
+       if (ibpd->uobject && attrs->qp_type == IB_QPT_GSI) {
+               DP_ERR(dev,
+                      "create qp: userspace can't create special QPs of type=0x%x\n",
+                      attrs->qp_type);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static void qedr_copy_rq_uresp(struct qedr_create_qp_uresp *uresp,
+                              struct qedr_qp *qp)
+{
+       uresp->rq_db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
+       uresp->rq_icid = qp->icid;
+}
+
+static void qedr_copy_sq_uresp(struct qedr_create_qp_uresp *uresp,
+                              struct qedr_qp *qp)
+{
+       uresp->sq_db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
+       uresp->sq_icid = qp->icid + 1;
+}
+
+static int qedr_copy_qp_uresp(struct qedr_dev *dev,
+                             struct qedr_qp *qp, struct ib_udata *udata)
+{
+       struct qedr_create_qp_uresp uresp;
+       int rc;
+
+       memset(&uresp, 0, sizeof(uresp));
+       qedr_copy_sq_uresp(&uresp, qp);
+       qedr_copy_rq_uresp(&uresp, qp);
+
+       uresp.atomic_supported = dev->atomic_cap != IB_ATOMIC_NONE;
+       uresp.qp_id = qp->qp_id;
+
+       rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
+       if (rc)
+               DP_ERR(dev,
+                      "create qp: failed a copy to user space with qp icid=0x%x.\n",
+                      qp->icid);
+
+       return rc;
+}
+
+static void qedr_set_qp_init_params(struct qedr_dev *dev,
+                                   struct qedr_qp *qp,
+                                   struct qedr_pd *pd,
+                                   struct ib_qp_init_attr *attrs)
+{
+       qp->pd = pd;
+
+       spin_lock_init(&qp->q_lock);
+
+       qp->qp_type = attrs->qp_type;
+       qp->max_inline_data = attrs->cap.max_inline_data;
+       qp->sq.max_sges = attrs->cap.max_send_sge;
+       qp->state = QED_ROCE_QP_STATE_RESET;
+       qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false;
+       qp->sq_cq = get_qedr_cq(attrs->send_cq);
+       qp->rq_cq = get_qedr_cq(attrs->recv_cq);
+       qp->dev = dev;
+
+       DP_DEBUG(dev, QEDR_MSG_QP,
+                "QP params:\tpd = %d, qp_type = %d, max_inline_data = %d, state = %d, signaled = %d, use_srq=%d\n",
+                pd->pd_id, qp->qp_type, qp->max_inline_data,
+                qp->state, qp->signaled, (attrs->srq) ? 1 : 0);
+       DP_DEBUG(dev, QEDR_MSG_QP,
+                "SQ params:\tsq_max_sges = %d, sq_cq_id = %d\n",
+                qp->sq.max_sges, qp->sq_cq->icid);
+       qp->rq.max_sges = attrs->cap.max_recv_sge;
+       DP_DEBUG(dev, QEDR_MSG_QP,
+                "RQ params:\trq_max_sges = %d, rq_cq_id = %d\n",
+                qp->rq.max_sges, qp->rq_cq->icid);
+}
+
+static inline void
+qedr_init_qp_user_params(struct qed_rdma_create_qp_in_params *params,
+                        struct qedr_create_qp_ureq *ureq)
+{
+       /* QP handle to be written in CQE */
+       params->qp_handle_lo = ureq->qp_handle_lo;
+       params->qp_handle_hi = ureq->qp_handle_hi;
+}
+
+static inline void
+qedr_init_qp_kernel_doorbell_sq(struct qedr_dev *dev, struct qedr_qp *qp)
+{
+       qp->sq.db = dev->db_addr +
+                   DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
+       qp->sq.db_data.data.icid = qp->icid + 1;
+}
+
+static inline void
+qedr_init_qp_kernel_doorbell_rq(struct qedr_dev *dev, struct qedr_qp *qp)
+{
+       qp->rq.db = dev->db_addr +
+                   DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
+       qp->rq.db_data.data.icid = qp->icid;
+}
+
+static inline int
+qedr_init_qp_kernel_params_rq(struct qedr_dev *dev,
+                             struct qedr_qp *qp, struct ib_qp_init_attr *attrs)
+{
+       /* Allocate driver internal RQ array */
+       qp->rqe_wr_id = kcalloc(qp->rq.max_wr, sizeof(*qp->rqe_wr_id),
+                               GFP_KERNEL);
+       if (!qp->rqe_wr_id)
+               return -ENOMEM;
+
+       DP_DEBUG(dev, QEDR_MSG_QP, "RQ max_wr set to %d.\n", qp->rq.max_wr);
+
+       return 0;
+}
+
+static inline int
+qedr_init_qp_kernel_params_sq(struct qedr_dev *dev,
+                             struct qedr_qp *qp,
+                             struct ib_qp_init_attr *attrs,
+                             struct qed_rdma_create_qp_in_params *params)
+{
+       u32 temp_max_wr;
+
+       /* Allocate driver internal SQ array */
+       temp_max_wr = attrs->cap.max_send_wr * dev->wq_multiplier;
+       temp_max_wr = min_t(u32, temp_max_wr, dev->attr.max_sqe);
+
+       /* temp_max_wr < attr->max_sqe < u16 so the casting is safe */
+       qp->sq.max_wr = (u16)temp_max_wr;
+       qp->wqe_wr_id = kcalloc(qp->sq.max_wr, sizeof(*qp->wqe_wr_id),
+                               GFP_KERNEL);
+       if (!qp->wqe_wr_id)
+               return -ENOMEM;
+
+       DP_DEBUG(dev, QEDR_MSG_QP, "SQ max_wr set to %d.\n", qp->sq.max_wr);
+
+       /* QP handle to be written in CQE */
+       params->qp_handle_lo = lower_32_bits((uintptr_t)qp);
+       params->qp_handle_hi = upper_32_bits((uintptr_t)qp);
+
+       return 0;
+}
+
+static inline int qedr_init_qp_kernel_sq(struct qedr_dev *dev,
+                                        struct qedr_qp *qp,
+                                        struct ib_qp_init_attr *attrs)
+{
+       u32 n_sq_elems, n_sq_entries;
+       int rc;
+
+       /* A single work request may take up to QEDR_MAX_SQ_WQE_SIZE elements in
+        * the ring. The ring should allow at least a single WR, even if the
+        * user requested none, due to allocation issues.
+        */
+       n_sq_entries = attrs->cap.max_send_wr;
+       n_sq_entries = min_t(u32, n_sq_entries, dev->attr.max_sqe);
+       n_sq_entries = max_t(u32, n_sq_entries, 1);
+       n_sq_elems = n_sq_entries * QEDR_MAX_SQE_ELEMENTS_PER_SQE;
+       rc = dev->ops->common->chain_alloc(dev->cdev,
+                                          QED_CHAIN_USE_TO_PRODUCE,
+                                          QED_CHAIN_MODE_PBL,
+                                          QED_CHAIN_CNT_TYPE_U32,
+                                          n_sq_elems,
+                                          QEDR_SQE_ELEMENT_SIZE,
+                                          &qp->sq.pbl);
+       if (rc) {
+               DP_ERR(dev, "failed to allocate QP %p SQ\n", qp);
+               return rc;
+       }
+
+       DP_DEBUG(dev, QEDR_MSG_SQ,
+                "SQ Pbl base addr = %llx max_send_wr=%d max_wr=%d capacity=%d, rc=%d\n",
+                qed_chain_get_pbl_phys(&qp->sq.pbl), attrs->cap.max_send_wr,
+                n_sq_entries, qed_chain_get_capacity(&qp->sq.pbl), rc);
+       return 0;
+}
+
+static inline int qedr_init_qp_kernel_rq(struct qedr_dev *dev,
+                                        struct qedr_qp *qp,
+                                        struct ib_qp_init_attr *attrs)
+{
+       u32 n_rq_elems, n_rq_entries;
+       int rc;
+
+       /* A single work request may take up to QEDR_MAX_RQ_WQE_SIZE elements in
+        * the ring. There ring should allow at least a single WR, even if the
+        * user requested none, due to allocation issues.
+        */
+       n_rq_entries = max_t(u32, attrs->cap.max_recv_wr, 1);
+       n_rq_elems = n_rq_entries * QEDR_MAX_RQE_ELEMENTS_PER_RQE;
+       rc = dev->ops->common->chain_alloc(dev->cdev,
+                                          QED_CHAIN_USE_TO_CONSUME_PRODUCE,
+                                          QED_CHAIN_MODE_PBL,
+                                          QED_CHAIN_CNT_TYPE_U32,
+                                          n_rq_elems,
+                                          QEDR_RQE_ELEMENT_SIZE,
+                                          &qp->rq.pbl);
+
+       if (rc) {
+               DP_ERR(dev, "failed to allocate memory for QP %p RQ\n", qp);
+               return -ENOMEM;
+       }
+
+       DP_DEBUG(dev, QEDR_MSG_RQ,
+                "RQ Pbl base addr = %llx max_recv_wr=%d max_wr=%d capacity=%d, rc=%d\n",
+                qed_chain_get_pbl_phys(&qp->rq.pbl), attrs->cap.max_recv_wr,
+                n_rq_entries, qed_chain_get_capacity(&qp->rq.pbl), rc);
+
+       /* n_rq_entries < u16 so the casting is safe */
+       qp->rq.max_wr = (u16)n_rq_entries;
+
+       return 0;
+}
+
+static inline void
+qedr_init_qp_in_params_sq(struct qedr_dev *dev,
+                         struct qedr_pd *pd,
+                         struct qedr_qp *qp,
+                         struct ib_qp_init_attr *attrs,
+                         struct ib_udata *udata,
+                         struct qed_rdma_create_qp_in_params *params)
+{
+       /* QP handle to be written in an async event */
+       params->qp_handle_async_lo = lower_32_bits((uintptr_t)qp);
+       params->qp_handle_async_hi = upper_32_bits((uintptr_t)qp);
+
+       params->signal_all = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR);
+       params->fmr_and_reserved_lkey = !udata;
+       params->pd = pd->pd_id;
+       params->dpi = pd->uctx ? pd->uctx->dpi : dev->dpi;
+       params->sq_cq_id = get_qedr_cq(attrs->send_cq)->icid;
+       params->max_sq_sges = 0;
+       params->stats_queue = 0;
+
+       if (udata) {
+               params->sq_num_pages = qp->usq.pbl_info.num_pbes;
+               params->sq_pbl_ptr = qp->usq.pbl_tbl->pa;
+       } else {
+               params->sq_num_pages = qed_chain_get_page_cnt(&qp->sq.pbl);
+               params->sq_pbl_ptr = qed_chain_get_pbl_phys(&qp->sq.pbl);
+       }
+}
+
+static inline void
+qedr_init_qp_in_params_rq(struct qedr_qp *qp,
+                         struct ib_qp_init_attr *attrs,
+                         struct ib_udata *udata,
+                         struct qed_rdma_create_qp_in_params *params)
+{
+       params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid;
+       params->srq_id = 0;
+       params->use_srq = false;
+
+       if (udata) {
+               params->rq_num_pages = qp->urq.pbl_info.num_pbes;
+               params->rq_pbl_ptr = qp->urq.pbl_tbl->pa;
+       } else {
+               params->rq_num_pages = qed_chain_get_page_cnt(&qp->rq.pbl);
+               params->rq_pbl_ptr = qed_chain_get_pbl_phys(&qp->rq.pbl);
+       }
+}
+
+static inline void qedr_qp_user_print(struct qedr_dev *dev, struct qedr_qp *qp)
+{
+       DP_DEBUG(dev, QEDR_MSG_QP,
+                "create qp: successfully created user QP. qp=%p, sq_addr=0x%llx, sq_len=%zd, rq_addr=0x%llx, rq_len=%zd\n",
+                qp, qp->usq.buf_addr, qp->usq.buf_len, qp->urq.buf_addr,
+                qp->urq.buf_len);
+}
+
+static inline int qedr_init_user_qp(struct ib_ucontext *ib_ctx,
+                                   struct qedr_dev *dev,
+                                   struct qedr_qp *qp,
+                                   struct qedr_create_qp_ureq *ureq)
+{
+       int rc;
+
+       /* SQ - read access only (0), dma sync not required (0) */
+       rc = qedr_init_user_queue(ib_ctx, dev, &qp->usq, ureq->sq_addr,
+                                 ureq->sq_len, 0, 0);
+       if (rc)
+               return rc;
+
+       /* RQ - read access only (0), dma sync not required (0) */
+       rc = qedr_init_user_queue(ib_ctx, dev, &qp->urq, ureq->rq_addr,
+                                 ureq->rq_len, 0, 0);
+
+       if (rc)
+               qedr_cleanup_user_sq(dev, qp);
+       return rc;
+}
+
+static inline int
+qedr_init_kernel_qp(struct qedr_dev *dev,
+                   struct qedr_qp *qp,
+                   struct ib_qp_init_attr *attrs,
+                   struct qed_rdma_create_qp_in_params *params)
+{
+       int rc;
+
+       rc = qedr_init_qp_kernel_sq(dev, qp, attrs);
+       if (rc) {
+               DP_ERR(dev, "failed to init kernel QP %p SQ\n", qp);
+               return rc;
+       }
+
+       rc = qedr_init_qp_kernel_params_sq(dev, qp, attrs, params);
+       if (rc) {
+               dev->ops->common->chain_free(dev->cdev, &qp->sq.pbl);
+               DP_ERR(dev, "failed to init kernel QP %p SQ params\n", qp);
+               return rc;
+       }
+
+       rc = qedr_init_qp_kernel_rq(dev, qp, attrs);
+       if (rc) {
+               qedr_cleanup_kernel_sq(dev, qp);
+               DP_ERR(dev, "failed to init kernel QP %p RQ\n", qp);
+               return rc;
+       }
+
+       rc = qedr_init_qp_kernel_params_rq(dev, qp, attrs);
+       if (rc) {
+               DP_ERR(dev, "failed to init kernel QP %p RQ params\n", qp);
+               qedr_cleanup_kernel_sq(dev, qp);
+               dev->ops->common->chain_free(dev->cdev, &qp->rq.pbl);
+               return rc;
+       }
+
+       return rc;
+}
+
+struct ib_qp *qedr_create_qp(struct ib_pd *ibpd,
+                            struct ib_qp_init_attr *attrs,
+                            struct ib_udata *udata)
+{
+       struct qedr_dev *dev = get_qedr_dev(ibpd->device);
+       struct qed_rdma_create_qp_out_params out_params;
+       struct qed_rdma_create_qp_in_params in_params;
+       struct qedr_pd *pd = get_qedr_pd(ibpd);
+       struct ib_ucontext *ib_ctx = NULL;
+       struct qedr_ucontext *ctx = NULL;
+       struct qedr_create_qp_ureq ureq;
+       struct qedr_qp *qp;
+       int rc = 0;
+
+       DP_DEBUG(dev, QEDR_MSG_QP, "create qp: called from %s, pd=%p\n",
+                udata ? "user library" : "kernel", pd);
+
+       rc = qedr_check_qp_attrs(ibpd, dev, attrs);
+       if (rc)
+               return ERR_PTR(rc);
+
+       qp = kzalloc(sizeof(*qp), GFP_KERNEL);
+       if (!qp)
+               return ERR_PTR(-ENOMEM);
+
+       if (attrs->srq)
+               return ERR_PTR(-EINVAL);
+
+       DP_DEBUG(dev, QEDR_MSG_QP,
+                "create qp: sq_cq=%p, sq_icid=%d, rq_cq=%p, rq_icid=%d\n",
+                get_qedr_cq(attrs->send_cq),
+                get_qedr_cq(attrs->send_cq)->icid,
+                get_qedr_cq(attrs->recv_cq),
+                get_qedr_cq(attrs->recv_cq)->icid);
+
+       qedr_set_qp_init_params(dev, qp, pd, attrs);
+
+       if (attrs->qp_type == IB_QPT_GSI) {
+               if (udata) {
+                       DP_ERR(dev,
+                              "create qp: unexpected udata when creating GSI QP\n");
+                       goto err0;
+               }
+               return qedr_create_gsi_qp(dev, attrs, qp);
+       }
+
+       memset(&in_params, 0, sizeof(in_params));
+
+       if (udata) {
+               if (!(udata && ibpd->uobject && ibpd->uobject->context))
+                       goto err0;
+
+               ib_ctx = ibpd->uobject->context;
+               ctx = get_qedr_ucontext(ib_ctx);
+
+               memset(&ureq, 0, sizeof(ureq));
+               if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) {
+                       DP_ERR(dev,
+                              "create qp: problem copying data from user space\n");
+                       goto err0;
+               }
+
+               rc = qedr_init_user_qp(ib_ctx, dev, qp, &ureq);
+               if (rc)
+                       goto err0;
+
+               qedr_init_qp_user_params(&in_params, &ureq);
+       } else {
+               rc = qedr_init_kernel_qp(dev, qp, attrs, &in_params);
+               if (rc)
+                       goto err0;
+       }
+
+       qedr_init_qp_in_params_sq(dev, pd, qp, attrs, udata, &in_params);
+       qedr_init_qp_in_params_rq(qp, attrs, udata, &in_params);
+
+       qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
+                                             &in_params, &out_params);
+
+       if (!qp->qed_qp)
+               goto err1;
+
+       qp->qp_id = out_params.qp_id;
+       qp->icid = out_params.icid;
+       qp->ibqp.qp_num = qp->qp_id;
+
+       if (udata) {
+               rc = qedr_copy_qp_uresp(dev, qp, udata);
+               if (rc)
+                       goto err2;
+
+               qedr_qp_user_print(dev, qp);
+       } else {
+               qedr_init_qp_kernel_doorbell_sq(dev, qp);
+               qedr_init_qp_kernel_doorbell_rq(dev, qp);
+       }
+
+       DP_DEBUG(dev, QEDR_MSG_QP, "created %s space QP %p\n",
+                udata ? "user" : "kernel", qp);
+
+       return &qp->ibqp;
+
+err2:
+       rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
+       if (rc)
+               DP_ERR(dev, "create qp: fatal fault. rc=%d", rc);
+err1:
+       if (udata) {
+               qedr_cleanup_user_sq(dev, qp);
+               qedr_cleanup_user_rq(dev, qp);
+       } else {
+               qedr_cleanup_kernel_sq(dev, qp);
+               qedr_cleanup_kernel_rq(dev, qp);
+       }
+
+err0:
+       kfree(qp);
+
+       return ERR_PTR(-EFAULT);
+}
+
+enum ib_qp_state qedr_get_ibqp_state(enum qed_roce_qp_state qp_state)
+{
+       switch (qp_state) {
+       case QED_ROCE_QP_STATE_RESET:
+               return IB_QPS_RESET;
+       case QED_ROCE_QP_STATE_INIT:
+               return IB_QPS_INIT;
+       case QED_ROCE_QP_STATE_RTR:
+               return IB_QPS_RTR;
+       case QED_ROCE_QP_STATE_RTS:
+               return IB_QPS_RTS;
+       case QED_ROCE_QP_STATE_SQD:
+               return IB_QPS_SQD;
+       case QED_ROCE_QP_STATE_ERR:
+               return IB_QPS_ERR;
+       case QED_ROCE_QP_STATE_SQE:
+               return IB_QPS_SQE;
+       }
+       return IB_QPS_ERR;
+}
+
+enum qed_roce_qp_state qedr_get_state_from_ibqp(enum ib_qp_state qp_state)
+{
+       switch (qp_state) {
+       case IB_QPS_RESET:
+               return QED_ROCE_QP_STATE_RESET;
+       case IB_QPS_INIT:
+               return QED_ROCE_QP_STATE_INIT;
+       case IB_QPS_RTR:
+               return QED_ROCE_QP_STATE_RTR;
+       case IB_QPS_RTS:
+               return QED_ROCE_QP_STATE_RTS;
+       case IB_QPS_SQD:
+               return QED_ROCE_QP_STATE_SQD;
+       case IB_QPS_ERR:
+               return QED_ROCE_QP_STATE_ERR;
+       default:
+               return QED_ROCE_QP_STATE_ERR;
+       }
+}
+
+static void qedr_reset_qp_hwq_info(struct qedr_qp_hwq_info *qph)
+{
+       qed_chain_reset(&qph->pbl);
+       qph->prod = 0;
+       qph->cons = 0;
+       qph->wqe_cons = 0;
+       qph->db_data.data.value = cpu_to_le16(0);
+}
+
+static int qedr_update_qp_state(struct qedr_dev *dev,
+                               struct qedr_qp *qp,
+                               enum qed_roce_qp_state new_state)
+{
+       int status = 0;
+
+       if (new_state == qp->state)
+               return 1;
+
+       switch (qp->state) {
+       case QED_ROCE_QP_STATE_RESET:
+               switch (new_state) {
+               case QED_ROCE_QP_STATE_INIT:
+                       qp->prev_wqe_size = 0;
+                       qedr_reset_qp_hwq_info(&qp->sq);
+                       qedr_reset_qp_hwq_info(&qp->rq);
+                       break;
+               default:
+                       status = -EINVAL;
+                       break;
+               };
+               break;
+       case QED_ROCE_QP_STATE_INIT:
+               switch (new_state) {
+               case QED_ROCE_QP_STATE_RTR:
+                       /* Update doorbell (in case post_recv was
+                        * done before move to RTR)
+                        */
+                       wmb();
+                       writel(qp->rq.db_data.raw, qp->rq.db);
+                       /* Make sure write takes effect */
+                       mmiowb();
+                       break;
+               case QED_ROCE_QP_STATE_ERR:
+                       break;
+               default:
+                       /* Invalid state change. */
+                       status = -EINVAL;
+                       break;
+               };
+               break;
+       case QED_ROCE_QP_STATE_RTR:
+               /* RTR->XXX */
+               switch (new_state) {
+               case QED_ROCE_QP_STATE_RTS:
+                       break;
+               case QED_ROCE_QP_STATE_ERR:
+                       break;
+               default:
+                       /* Invalid state change. */
+                       status = -EINVAL;
+                       break;
+               };
+               break;
+       case QED_ROCE_QP_STATE_RTS:
+               /* RTS->XXX */
+               switch (new_state) {
+               case QED_ROCE_QP_STATE_SQD:
+                       break;
+               case QED_ROCE_QP_STATE_ERR:
+                       break;
+               default:
+                       /* Invalid state change. */
+                       status = -EINVAL;
+                       break;
+               };
+               break;
+       case QED_ROCE_QP_STATE_SQD:
+               /* SQD->XXX */
+               switch (new_state) {
+               case QED_ROCE_QP_STATE_RTS:
+               case QED_ROCE_QP_STATE_ERR:
+                       break;
+               default:
+                       /* Invalid state change. */
+                       status = -EINVAL;
+                       break;
+               };
+               break;
+       case QED_ROCE_QP_STATE_ERR:
+               /* ERR->XXX */
+               switch (new_state) {
+               case QED_ROCE_QP_STATE_RESET:
+                       break;
+               default:
+                       status = -EINVAL;
+                       break;
+               };
+               break;
+       default:
+               status = -EINVAL;
+               break;
+       };
+
+       return status;
+}
+
+int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+                  int attr_mask, struct ib_udata *udata)
+{
+       struct qedr_qp *qp = get_qedr_qp(ibqp);
+       struct qed_rdma_modify_qp_in_params qp_params = { 0 };
+       struct qedr_dev *dev = get_qedr_dev(&qp->dev->ibdev);
+       enum ib_qp_state old_qp_state, new_qp_state;
+       int rc = 0;
+
+       DP_DEBUG(dev, QEDR_MSG_QP,
+                "modify qp: qp %p attr_mask=0x%x, state=%d", qp, attr_mask,
+                attr->qp_state);
+
+       old_qp_state = qedr_get_ibqp_state(qp->state);
+       if (attr_mask & IB_QP_STATE)
+               new_qp_state = attr->qp_state;
+       else
+               new_qp_state = old_qp_state;
+
+       if (!ib_modify_qp_is_ok
+           (old_qp_state, new_qp_state, ibqp->qp_type, attr_mask,
+            IB_LINK_LAYER_ETHERNET)) {
+               DP_ERR(dev,
+                      "modify qp: invalid attribute mask=0x%x specified for\n"
+                      "qpn=0x%x of type=0x%x old_qp_state=0x%x, new_qp_state=0x%x\n",
+                      attr_mask, qp->qp_id, ibqp->qp_type, old_qp_state,
+                      new_qp_state);
+               rc = -EINVAL;
+               goto err;
+       }
+
+       /* Translate the masks... */
+       if (attr_mask & IB_QP_STATE) {
+               SET_FIELD(qp_params.modify_flags,
+                         QED_RDMA_MODIFY_QP_VALID_NEW_STATE, 1);
+               qp_params.new_state = qedr_get_state_from_ibqp(attr->qp_state);
+       }
+
+       if (attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY)
+               qp_params.sqd_async = true;
+
+       if (attr_mask & IB_QP_PKEY_INDEX) {
+               SET_FIELD(qp_params.modify_flags,
+                         QED_ROCE_MODIFY_QP_VALID_PKEY, 1);
+               if (attr->pkey_index >= QEDR_ROCE_PKEY_TABLE_LEN) {
+                       rc = -EINVAL;
+                       goto err;
+               }
+
+               qp_params.pkey = QEDR_ROCE_PKEY_DEFAULT;
+       }
+
+       if (attr_mask & IB_QP_QKEY)
+               qp->qkey = attr->qkey;
+
+       if (attr_mask & IB_QP_ACCESS_FLAGS) {
+               SET_FIELD(qp_params.modify_flags,
+                         QED_RDMA_MODIFY_QP_VALID_RDMA_OPS_EN, 1);
+               qp_params.incoming_rdma_read_en = attr->qp_access_flags &
+                                                 IB_ACCESS_REMOTE_READ;
+               qp_params.incoming_rdma_write_en = attr->qp_access_flags &
+                                                  IB_ACCESS_REMOTE_WRITE;
+               qp_params.incoming_atomic_en = attr->qp_access_flags &
+                                              IB_ACCESS_REMOTE_ATOMIC;
+       }
+
+       if (attr_mask & (IB_QP_AV | IB_QP_PATH_MTU)) {
+               if (attr_mask & IB_QP_PATH_MTU) {
+                       if (attr->path_mtu < IB_MTU_256 ||
+                           attr->path_mtu > IB_MTU_4096) {
+                               pr_err("error: Only MTU sizes of 256, 512, 1024, 2048 and 4096 are supported by RoCE\n");
+                               rc = -EINVAL;
+                               goto err;
+                       }
+                       qp->mtu = min(ib_mtu_enum_to_int(attr->path_mtu),
+                                     ib_mtu_enum_to_int(iboe_get_mtu
+                                                        (dev->ndev->mtu)));
+               }
+
+               if (!qp->mtu) {
+                       qp->mtu =
+                       ib_mtu_enum_to_int(iboe_get_mtu(dev->ndev->mtu));
+                       pr_err("Fixing zeroed MTU to qp->mtu = %d\n", qp->mtu);
+               }
+
+               SET_FIELD(qp_params.modify_flags,
+                         QED_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR, 1);
+
+               qp_params.traffic_class_tos = attr->ah_attr.grh.traffic_class;
+               qp_params.flow_label = attr->ah_attr.grh.flow_label;
+               qp_params.hop_limit_ttl = attr->ah_attr.grh.hop_limit;
+
+               qp->sgid_idx = attr->ah_attr.grh.sgid_index;
+
+               rc = get_gid_info_from_table(ibqp, attr, attr_mask, &qp_params);
+               if (rc) {
+                       DP_ERR(dev,
+                              "modify qp: problems with GID index %d (rc=%d)\n",
+                              attr->ah_attr.grh.sgid_index, rc);
+                       return rc;
+               }
+
+               rc = qedr_get_dmac(dev, &attr->ah_attr,
+                                  qp_params.remote_mac_addr);
+               if (rc)
+                       return rc;
+
+               qp_params.use_local_mac = true;
+               ether_addr_copy(qp_params.local_mac_addr, dev->ndev->dev_addr);
+
+               DP_DEBUG(dev, QEDR_MSG_QP, "dgid=%x:%x:%x:%x\n",
+                        qp_params.dgid.dwords[0], qp_params.dgid.dwords[1],
+                        qp_params.dgid.dwords[2], qp_params.dgid.dwords[3]);
+               DP_DEBUG(dev, QEDR_MSG_QP, "sgid=%x:%x:%x:%x\n",
+                        qp_params.sgid.dwords[0], qp_params.sgid.dwords[1],
+                        qp_params.sgid.dwords[2], qp_params.sgid.dwords[3]);
+               DP_DEBUG(dev, QEDR_MSG_QP, "remote_mac=[%pM]\n",
+                        qp_params.remote_mac_addr);
+;
+
+               qp_params.mtu = qp->mtu;
+               qp_params.lb_indication = false;
+       }
+
+       if (!qp_params.mtu) {
+               /* Stay with current MTU */
+               if (qp->mtu)
+                       qp_params.mtu = qp->mtu;
+               else
+                       qp_params.mtu =
+                           ib_mtu_enum_to_int(iboe_get_mtu(dev->ndev->mtu));
+       }
+
+       if (attr_mask & IB_QP_TIMEOUT) {
+               SET_FIELD(qp_params.modify_flags,
+                         QED_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT, 1);
+
+               qp_params.ack_timeout = attr->timeout;
+               if (attr->timeout) {
+                       u32 temp;
+
+                       temp = 4096 * (1UL << attr->timeout) / 1000 / 1000;
+                       /* FW requires [msec] */
+                       qp_params.ack_timeout = temp;
+               } else {
+                       /* Infinite */
+                       qp_params.ack_timeout = 0;
+               }
+       }
+       if (attr_mask & IB_QP_RETRY_CNT) {
+               SET_FIELD(qp_params.modify_flags,
+                         QED_ROCE_MODIFY_QP_VALID_RETRY_CNT, 1);
+               qp_params.retry_cnt = attr->retry_cnt;
+       }
+
+       if (attr_mask & IB_QP_RNR_RETRY) {
+               SET_FIELD(qp_params.modify_flags,
+                         QED_ROCE_MODIFY_QP_VALID_RNR_RETRY_CNT, 1);
+               qp_params.rnr_retry_cnt = attr->rnr_retry;
+       }
+
+       if (attr_mask & IB_QP_RQ_PSN) {
+               SET_FIELD(qp_params.modify_flags,
+                         QED_ROCE_MODIFY_QP_VALID_RQ_PSN, 1);
+               qp_params.rq_psn = attr->rq_psn;
+               qp->rq_psn = attr->rq_psn;
+       }
+
+       if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
+               if (attr->max_rd_atomic > dev->attr.max_qp_req_rd_atomic_resc) {
+                       rc = -EINVAL;
+                       DP_ERR(dev,
+                              "unsupported max_rd_atomic=%d, supported=%d\n",
+                              attr->max_rd_atomic,
+                              dev->attr.max_qp_req_rd_atomic_resc);
+                       goto err;
+               }
+
+               SET_FIELD(qp_params.modify_flags,
+                         QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_REQ, 1);
+               qp_params.max_rd_atomic_req = attr->max_rd_atomic;
+       }
+
+       if (attr_mask & IB_QP_MIN_RNR_TIMER) {
+               SET_FIELD(qp_params.modify_flags,
+                         QED_ROCE_MODIFY_QP_VALID_MIN_RNR_NAK_TIMER, 1);
+               qp_params.min_rnr_nak_timer = attr->min_rnr_timer;
+       }
+
+       if (attr_mask & IB_QP_SQ_PSN) {
+               SET_FIELD(qp_params.modify_flags,
+                         QED_ROCE_MODIFY_QP_VALID_SQ_PSN, 1);
+               qp_params.sq_psn = attr->sq_psn;
+               qp->sq_psn = attr->sq_psn;
+       }
+
+       if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
+               if (attr->max_dest_rd_atomic >
+                   dev->attr.max_qp_resp_rd_atomic_resc) {
+                       DP_ERR(dev,
+                              "unsupported max_dest_rd_atomic=%d, supported=%d\n",
+                              attr->max_dest_rd_atomic,
+                              dev->attr.max_qp_resp_rd_atomic_resc);
+
+                       rc = -EINVAL;
+                       goto err;
+               }
+
+               SET_FIELD(qp_params.modify_flags,
+                         QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_RESP, 1);
+               qp_params.max_rd_atomic_resp = attr->max_dest_rd_atomic;
+       }
+
+       if (attr_mask & IB_QP_DEST_QPN) {
+               SET_FIELD(qp_params.modify_flags,
+                         QED_ROCE_MODIFY_QP_VALID_DEST_QP, 1);
+
+               qp_params.dest_qp = attr->dest_qp_num;
+               qp->dest_qp_num = attr->dest_qp_num;
+       }
+
+       if (qp->qp_type != IB_QPT_GSI)
+               rc = dev->ops->rdma_modify_qp(dev->rdma_ctx,
+                                             qp->qed_qp, &qp_params);
+
+       if (attr_mask & IB_QP_STATE) {
+               if ((qp->qp_type != IB_QPT_GSI) && (!udata))
+                       qedr_update_qp_state(dev, qp, qp_params.new_state);
+               qp->state = qp_params.new_state;
+       }
+
+err:
+       return rc;
+}
+
+static int qedr_to_ib_qp_acc_flags(struct qed_rdma_query_qp_out_params *params)
+{
+       int ib_qp_acc_flags = 0;
+
+       if (params->incoming_rdma_write_en)
+               ib_qp_acc_flags |= IB_ACCESS_REMOTE_WRITE;
+       if (params->incoming_rdma_read_en)
+               ib_qp_acc_flags |= IB_ACCESS_REMOTE_READ;
+       if (params->incoming_atomic_en)
+               ib_qp_acc_flags |= IB_ACCESS_REMOTE_ATOMIC;
+       ib_qp_acc_flags |= IB_ACCESS_LOCAL_WRITE;
+       return ib_qp_acc_flags;
+}
+
+int qedr_query_qp(struct ib_qp *ibqp,
+                 struct ib_qp_attr *qp_attr,
+                 int attr_mask, struct ib_qp_init_attr *qp_init_attr)
+{
+       struct qed_rdma_query_qp_out_params params;
+       struct qedr_qp *qp = get_qedr_qp(ibqp);
+       struct qedr_dev *dev = qp->dev;
+       int rc = 0;
+
+       memset(&params, 0, sizeof(params));
+
+       rc = dev->ops->rdma_query_qp(dev->rdma_ctx, qp->qed_qp, &params);
+       if (rc)
+               goto err;
+
+       memset(qp_attr, 0, sizeof(*qp_attr));
+       memset(qp_init_attr, 0, sizeof(*qp_init_attr));
+
+       qp_attr->qp_state = qedr_get_ibqp_state(params.state);
+       qp_attr->cur_qp_state = qedr_get_ibqp_state(params.state);
+       qp_attr->path_mtu = iboe_get_mtu(params.mtu);
+       qp_attr->path_mig_state = IB_MIG_MIGRATED;
+       qp_attr->rq_psn = params.rq_psn;
+       qp_attr->sq_psn = params.sq_psn;
+       qp_attr->dest_qp_num = params.dest_qp;
+
+       qp_attr->qp_access_flags = qedr_to_ib_qp_acc_flags(&params);
+
+       qp_attr->cap.max_send_wr = qp->sq.max_wr;
+       qp_attr->cap.max_recv_wr = qp->rq.max_wr;
+       qp_attr->cap.max_send_sge = qp->sq.max_sges;
+       qp_attr->cap.max_recv_sge = qp->rq.max_sges;
+       qp_attr->cap.max_inline_data = qp->max_inline_data;
+       qp_init_attr->cap = qp_attr->cap;
+
+       memcpy(&qp_attr->ah_attr.grh.dgid.raw[0], &params.dgid.bytes[0],
+              sizeof(qp_attr->ah_attr.grh.dgid.raw));
+
+       qp_attr->ah_attr.grh.flow_label = params.flow_label;
+       qp_attr->ah_attr.grh.sgid_index = qp->sgid_idx;
+       qp_attr->ah_attr.grh.hop_limit = params.hop_limit_ttl;
+       qp_attr->ah_attr.grh.traffic_class = params.traffic_class_tos;
+
+       qp_attr->ah_attr.ah_flags = IB_AH_GRH;
+       qp_attr->ah_attr.port_num = 1;
+       qp_attr->ah_attr.sl = 0;
+       qp_attr->timeout = params.timeout;
+       qp_attr->rnr_retry = params.rnr_retry;
+       qp_attr->retry_cnt = params.retry_cnt;
+       qp_attr->min_rnr_timer = params.min_rnr_nak_timer;
+       qp_attr->pkey_index = params.pkey_index;
+       qp_attr->port_num = 1;
+       qp_attr->ah_attr.src_path_bits = 0;
+       qp_attr->ah_attr.static_rate = 0;
+       qp_attr->alt_pkey_index = 0;
+       qp_attr->alt_port_num = 0;
+       qp_attr->alt_timeout = 0;
+       memset(&qp_attr->alt_ah_attr, 0, sizeof(qp_attr->alt_ah_attr));
+
+       qp_attr->sq_draining = (params.state == QED_ROCE_QP_STATE_SQD) ? 1 : 0;
+       qp_attr->max_dest_rd_atomic = params.max_dest_rd_atomic;
+       qp_attr->max_rd_atomic = params.max_rd_atomic;
+       qp_attr->en_sqd_async_notify = (params.sqd_async) ? 1 : 0;
+
+       DP_DEBUG(dev, QEDR_MSG_QP, "QEDR_QUERY_QP: max_inline_data=%d\n",
+                qp_attr->cap.max_inline_data);
+
+err:
+       return rc;
+}
+
+int qedr_destroy_qp(struct ib_qp *ibqp)
+{
+       struct qedr_qp *qp = get_qedr_qp(ibqp);
+       struct qedr_dev *dev = qp->dev;
+       struct ib_qp_attr attr;
+       int attr_mask = 0;
+       int rc = 0;
+
+       DP_DEBUG(dev, QEDR_MSG_QP, "destroy qp: destroying %p, qp type=%d\n",
+                qp, qp->qp_type);
+
+       if (qp->state != (QED_ROCE_QP_STATE_RESET | QED_ROCE_QP_STATE_ERR |
+                         QED_ROCE_QP_STATE_INIT)) {
+               attr.qp_state = IB_QPS_ERR;
+               attr_mask |= IB_QP_STATE;
+
+               /* Change the QP state to ERROR */
+               qedr_modify_qp(ibqp, &attr, attr_mask, NULL);
+       }
+
+       if (qp->qp_type != IB_QPT_GSI) {
+               rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
+               if (rc)
+                       return rc;
+       } else {
+               qedr_destroy_gsi_qp(dev);
+       }
+
+       if (ibqp->uobject && ibqp->uobject->context) {
+               qedr_cleanup_user_sq(dev, qp);
+               qedr_cleanup_user_rq(dev, qp);
+       } else {
+               qedr_cleanup_kernel_sq(dev, qp);
+               qedr_cleanup_kernel_rq(dev, qp);
+       }
+
+       kfree(qp);
+
+       return rc;
+}
+
+struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr)
+{
+       struct qedr_ah *ah;
+
+       ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
+       if (!ah)
+               return ERR_PTR(-ENOMEM);
+
+       ah->attr = *attr;
+
+       return &ah->ibah;
+}
+
+int qedr_destroy_ah(struct ib_ah *ibah)
+{
+       struct qedr_ah *ah = get_qedr_ah(ibah);
+
+       kfree(ah);
+       return 0;
+}
+
+static void free_mr_info(struct qedr_dev *dev, struct mr_info *info)
+{
+       struct qedr_pbl *pbl, *tmp;
+
+       if (info->pbl_table)
+               list_add_tail(&info->pbl_table->list_entry,
+                             &info->free_pbl_list);
+
+       if (!list_empty(&info->inuse_pbl_list))
+               list_splice(&info->inuse_pbl_list, &info->free_pbl_list);
+
+       list_for_each_entry_safe(pbl, tmp, &info->free_pbl_list, list_entry) {
+               list_del(&pbl->list_entry);
+               qedr_free_pbl(dev, &info->pbl_info, pbl);
+       }
+}
+
+static int init_mr_info(struct qedr_dev *dev, struct mr_info *info,
+                       size_t page_list_len, bool two_layered)
+{
+       struct qedr_pbl *tmp;
+       int rc;
+
+       INIT_LIST_HEAD(&info->free_pbl_list);
+       INIT_LIST_HEAD(&info->inuse_pbl_list);
+
+       rc = qedr_prepare_pbl_tbl(dev, &info->pbl_info,
+                                 page_list_len, two_layered);
+       if (rc)
+               goto done;
+
+       info->pbl_table = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL);
+       if (!info->pbl_table) {
+               rc = -ENOMEM;
+               goto done;
+       }
+
+       DP_DEBUG(dev, QEDR_MSG_MR, "pbl_table_pa = %pa\n",
+                &info->pbl_table->pa);
+
+       /* in usual case we use 2 PBLs, so we add one to free
+        * list and allocating another one
+        */
+       tmp = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL);
+       if (!tmp) {
+               DP_DEBUG(dev, QEDR_MSG_MR, "Extra PBL is not allocated\n");
+               goto done;
+       }
+
+       list_add_tail(&tmp->list_entry, &info->free_pbl_list);
+
+       DP_DEBUG(dev, QEDR_MSG_MR, "extra pbl_table_pa = %pa\n", &tmp->pa);
+
+done:
+       if (rc)
+               free_mr_info(dev, info);
+
+       return rc;
+}
+
+struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
+                              u64 usr_addr, int acc, struct ib_udata *udata)
+{
+       struct qedr_dev *dev = get_qedr_dev(ibpd->device);
+       struct qedr_mr *mr;
+       struct qedr_pd *pd;
+       int rc = -ENOMEM;
+
+       pd = get_qedr_pd(ibpd);
+       DP_DEBUG(dev, QEDR_MSG_MR,
+                "qedr_register user mr pd = %d start = %lld, len = %lld, usr_addr = %lld, acc = %d\n",
+                pd->pd_id, start, len, usr_addr, acc);
+
+       if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE))
+               return ERR_PTR(-EINVAL);
+
+       mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+       if (!mr)
+               return ERR_PTR(rc);
+
+       mr->type = QEDR_MR_USER;
+
+       mr->umem = ib_umem_get(ibpd->uobject->context, start, len, acc, 0);
+       if (IS_ERR(mr->umem)) {
+               rc = -EFAULT;
+               goto err0;
+       }
+
+       rc = init_mr_info(dev, &mr->info, ib_umem_page_count(mr->umem), 1);
+       if (rc)
+               goto err1;
+
+       qedr_populate_pbls(dev, mr->umem, mr->info.pbl_table,
+                          &mr->info.pbl_info);
+
+       rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
+       if (rc) {
+               DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
+               goto err1;
+       }
+
+       /* Index only, 18 bit long, lkey = itid << 8 | key */
+       mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR;
+       mr->hw_mr.key = 0;
+       mr->hw_mr.pd = pd->pd_id;
+       mr->hw_mr.local_read = 1;
+       mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
+       mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
+       mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
+       mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
+       mr->hw_mr.mw_bind = false;
+       mr->hw_mr.pbl_ptr = mr->info.pbl_table[0].pa;
+       mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
+       mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
+       mr->hw_mr.page_size_log = ilog2(mr->umem->page_size);
+       mr->hw_mr.fbo = ib_umem_offset(mr->umem);
+       mr->hw_mr.length = len;
+       mr->hw_mr.vaddr = usr_addr;
+       mr->hw_mr.zbva = false;
+       mr->hw_mr.phy_mr = false;
+       mr->hw_mr.dma_mr = false;
+
+       rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
+       if (rc) {
+               DP_ERR(dev, "roce register tid returned an error %d\n", rc);
+               goto err2;
+       }
+
+       mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
+       if (mr->hw_mr.remote_write || mr->hw_mr.remote_read ||
+           mr->hw_mr.remote_atomic)
+               mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
+
+       DP_DEBUG(dev, QEDR_MSG_MR, "register user mr lkey: %x\n",
+                mr->ibmr.lkey);
+       return &mr->ibmr;
+
+err2:
+       dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
+err1:
+       qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table);
+err0:
+       kfree(mr);
+       return ERR_PTR(rc);
+}
+
+int qedr_dereg_mr(struct ib_mr *ib_mr)
+{
+       struct qedr_mr *mr = get_qedr_mr(ib_mr);
+       struct qedr_dev *dev = get_qedr_dev(ib_mr->device);
+       int rc = 0;
+
+       rc = dev->ops->rdma_deregister_tid(dev->rdma_ctx, mr->hw_mr.itid);
+       if (rc)
+               return rc;
+
+       dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
+
+       if ((mr->type != QEDR_MR_DMA) && (mr->type != QEDR_MR_FRMR))
+               qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table);
+
+       /* it could be user registered memory. */
+       if (mr->umem)
+               ib_umem_release(mr->umem);
+
+       kfree(mr);
+
+       return rc;
+}
+
+struct qedr_mr *__qedr_alloc_mr(struct ib_pd *ibpd, int max_page_list_len)
+{
+       struct qedr_pd *pd = get_qedr_pd(ibpd);
+       struct qedr_dev *dev = get_qedr_dev(ibpd->device);
+       struct qedr_mr *mr;
+       int rc = -ENOMEM;
+
+       DP_DEBUG(dev, QEDR_MSG_MR,
+                "qedr_alloc_frmr pd = %d max_page_list_len= %d\n", pd->pd_id,
+                max_page_list_len);
+
+       mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+       if (!mr)
+               return ERR_PTR(rc);
+
+       mr->dev = dev;
+       mr->type = QEDR_MR_FRMR;
+
+       rc = init_mr_info(dev, &mr->info, max_page_list_len, 1);
+       if (rc)
+               goto err0;
+
+       rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
+       if (rc) {
+               DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
+               goto err0;
+       }
+
+       /* Index only, 18 bit long, lkey = itid << 8 | key */
+       mr->hw_mr.tid_type = QED_RDMA_TID_FMR;
+       mr->hw_mr.key = 0;
+       mr->hw_mr.pd = pd->pd_id;
+       mr->hw_mr.local_read = 1;
+       mr->hw_mr.local_write = 0;
+       mr->hw_mr.remote_read = 0;
+       mr->hw_mr.remote_write = 0;
+       mr->hw_mr.remote_atomic = 0;
+       mr->hw_mr.mw_bind = false;
+       mr->hw_mr.pbl_ptr = 0;
+       mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
+       mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
+       mr->hw_mr.fbo = 0;
+       mr->hw_mr.length = 0;
+       mr->hw_mr.vaddr = 0;
+       mr->hw_mr.zbva = false;
+       mr->hw_mr.phy_mr = true;
+       mr->hw_mr.dma_mr = false;
+
+       rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
+       if (rc) {
+               DP_ERR(dev, "roce register tid returned an error %d\n", rc);
+               goto err1;
+       }
+
+       mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
+       mr->ibmr.rkey = mr->ibmr.lkey;
+
+       DP_DEBUG(dev, QEDR_MSG_MR, "alloc frmr: %x\n", mr->ibmr.lkey);
+       return mr;
+
+err1:
+       dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
+err0:
+       kfree(mr);
+       return ERR_PTR(rc);
+}
+
+struct ib_mr *qedr_alloc_mr(struct ib_pd *ibpd,
+                           enum ib_mr_type mr_type, u32 max_num_sg)
+{
+       struct qedr_dev *dev;
+       struct qedr_mr *mr;
+
+       if (mr_type != IB_MR_TYPE_MEM_REG)
+               return ERR_PTR(-EINVAL);
+
+       mr = __qedr_alloc_mr(ibpd, max_num_sg);
+
+       if (IS_ERR(mr))
+               return ERR_PTR(-EINVAL);
+
+       dev = mr->dev;
+
+       return &mr->ibmr;
+}
+
+static int qedr_set_page(struct ib_mr *ibmr, u64 addr)
+{
+       struct qedr_mr *mr = get_qedr_mr(ibmr);
+       struct qedr_pbl *pbl_table;
+       struct regpair *pbe;
+       u32 pbes_in_page;
+
+       if (unlikely(mr->npages == mr->info.pbl_info.num_pbes)) {
+               DP_ERR(mr->dev, "qedr_set_page failes when %d\n", mr->npages);
+               return -ENOMEM;
+       }
+
+       DP_DEBUG(mr->dev, QEDR_MSG_MR, "qedr_set_page pages[%d] = 0x%llx\n",
+                mr->npages, addr);
+
+       pbes_in_page = mr->info.pbl_info.pbl_size / sizeof(u64);
+       pbl_table = mr->info.pbl_table + (mr->npages / pbes_in_page);
+       pbe = (struct regpair *)pbl_table->va;
+       pbe +=  mr->npages % pbes_in_page;
+       pbe->lo = cpu_to_le32((u32)addr);
+       pbe->hi = cpu_to_le32((u32)upper_32_bits(addr));
+
+       mr->npages++;
+
+       return 0;
+}
+
+static void handle_completed_mrs(struct qedr_dev *dev, struct mr_info *info)
+{
+       int work = info->completed - info->completed_handled - 1;
+
+       DP_DEBUG(dev, QEDR_MSG_MR, "Special FMR work = %d\n", work);
+       while (work-- > 0 && !list_empty(&info->inuse_pbl_list)) {
+               struct qedr_pbl *pbl;
+
+               /* Free all the page list that are possible to be freed
+                * (all the ones that were invalidated), under the assumption
+                * that if an FMR was completed successfully that means that
+                * if there was an invalidate operation before it also ended
+                */
+               pbl = list_first_entry(&info->inuse_pbl_list,
+                                      struct qedr_pbl, list_entry);
+               list_del(&pbl->list_entry);
+               list_add_tail(&pbl->list_entry, &info->free_pbl_list);
+               info->completed_handled++;
+       }
+}
+
+int qedr_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
+                  int sg_nents, unsigned int *sg_offset)
+{
+       struct qedr_mr *mr = get_qedr_mr(ibmr);
+
+       mr->npages = 0;
+
+       handle_completed_mrs(mr->dev, &mr->info);
+       return ib_sg_to_pages(ibmr, sg, sg_nents, NULL, qedr_set_page);
+}
+
+struct ib_mr *qedr_get_dma_mr(struct ib_pd *ibpd, int acc)
+{
+       struct qedr_dev *dev = get_qedr_dev(ibpd->device);
+       struct qedr_pd *pd = get_qedr_pd(ibpd);
+       struct qedr_mr *mr;
+       int rc;
+
+       mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+       if (!mr)
+               return ERR_PTR(-ENOMEM);
+
+       mr->type = QEDR_MR_DMA;
+
+       rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
+       if (rc) {
+               DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
+               goto err1;
+       }
+
+       /* index only, 18 bit long, lkey = itid << 8 | key */
+       mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR;
+       mr->hw_mr.pd = pd->pd_id;
+       mr->hw_mr.local_read = 1;
+       mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
+       mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
+       mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
+       mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
+       mr->hw_mr.dma_mr = true;
+
+       rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
+       if (rc) {
+               DP_ERR(dev, "roce register tid returned an error %d\n", rc);
+               goto err2;
+       }
+
+       mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
+       if (mr->hw_mr.remote_write || mr->hw_mr.remote_read ||
+           mr->hw_mr.remote_atomic)
+               mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
+
+       DP_DEBUG(dev, QEDR_MSG_MR, "get dma mr: lkey = %x\n", mr->ibmr.lkey);
+       return &mr->ibmr;
+
+err2:
+       dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
+err1:
+       kfree(mr);
+       return ERR_PTR(rc);
+}
+
+static inline int qedr_wq_is_full(struct qedr_qp_hwq_info *wq)
+{
+       return (((wq->prod + 1) % wq->max_wr) == wq->cons);
+}
+
+static int sge_data_len(struct ib_sge *sg_list, int num_sge)
+{
+       int i, len = 0;
+
+       for (i = 0; i < num_sge; i++)
+               len += sg_list[i].length;
+
+       return len;
+}
+
+static void swap_wqe_data64(u64 *p)
+{
+       int i;
+
+       for (i = 0; i < QEDR_SQE_ELEMENT_SIZE / sizeof(u64); i++, p++)
+               *p = cpu_to_be64(cpu_to_le64(*p));
+}
+
+static u32 qedr_prepare_sq_inline_data(struct qedr_dev *dev,
+                                      struct qedr_qp *qp, u8 *wqe_size,
+                                      struct ib_send_wr *wr,
+                                      struct ib_send_wr **bad_wr, u8 *bits,
+                                      u8 bit)
+{
+       u32 data_size = sge_data_len(wr->sg_list, wr->num_sge);
+       char *seg_prt, *wqe;
+       int i, seg_siz;
+
+       if (data_size > ROCE_REQ_MAX_INLINE_DATA_SIZE) {
+               DP_ERR(dev, "Too much inline data in WR: %d\n", data_size);
+               *bad_wr = wr;
+               return 0;
+       }
+
+       if (!data_size)
+               return data_size;
+
+       *bits |= bit;
+
+       seg_prt = NULL;
+       wqe = NULL;
+       seg_siz = 0;
+
+       /* Copy data inline */
+       for (i = 0; i < wr->num_sge; i++) {
+               u32 len = wr->sg_list[i].length;
+               void *src = (void *)(uintptr_t)wr->sg_list[i].addr;
+
+               while (len > 0) {
+                       u32 cur;
+
+                       /* New segment required */
+                       if (!seg_siz) {
+                               wqe = (char *)qed_chain_produce(&qp->sq.pbl);
+                               seg_prt = wqe;
+                               seg_siz = sizeof(struct rdma_sq_common_wqe);
+                               (*wqe_size)++;
+                       }
+
+                       /* Calculate currently allowed length */
+                       cur = min_t(u32, len, seg_siz);
+                       memcpy(seg_prt, src, cur);
+
+                       /* Update segment variables */
+                       seg_prt += cur;
+                       seg_siz -= cur;
+
+                       /* Update sge variables */
+                       src += cur;
+                       len -= cur;
+
+                       /* Swap fully-completed segments */
+                       if (!seg_siz)
+                               swap_wqe_data64((u64 *)wqe);
+               }
+       }
+
+       /* swap last not completed segment */
+       if (seg_siz)
+               swap_wqe_data64((u64 *)wqe);
+
+       return data_size;
+}
+
+#define RQ_SGE_SET(sge, vaddr, vlength, vflags)                        \
+       do {                                                    \
+               DMA_REGPAIR_LE(sge->addr, vaddr);               \
+               (sge)->length = cpu_to_le32(vlength);           \
+               (sge)->flags = cpu_to_le32(vflags);             \
+       } while (0)
+
+#define SRQ_HDR_SET(hdr, vwr_id, num_sge)                      \
+       do {                                                    \
+               DMA_REGPAIR_LE(hdr->wr_id, vwr_id);             \
+               (hdr)->num_sges = num_sge;                      \
+       } while (0)
+
+#define SRQ_SGE_SET(sge, vaddr, vlength, vlkey)                        \
+       do {                                                    \
+               DMA_REGPAIR_LE(sge->addr, vaddr);               \
+               (sge)->length = cpu_to_le32(vlength);           \
+               (sge)->l_key = cpu_to_le32(vlkey);              \
+       } while (0)
+
+static u32 qedr_prepare_sq_sges(struct qedr_qp *qp, u8 *wqe_size,
+                               struct ib_send_wr *wr)
+{
+       u32 data_size = 0;
+       int i;
+
+       for (i = 0; i < wr->num_sge; i++) {
+               struct rdma_sq_sge *sge = qed_chain_produce(&qp->sq.pbl);
+
+               DMA_REGPAIR_LE(sge->addr, wr->sg_list[i].addr);
+               sge->l_key = cpu_to_le32(wr->sg_list[i].lkey);
+               sge->length = cpu_to_le32(wr->sg_list[i].length);
+               data_size += wr->sg_list[i].length;
+       }
+
+       if (wqe_size)
+               *wqe_size += wr->num_sge;
+
+       return data_size;
+}
+
+static u32 qedr_prepare_sq_rdma_data(struct qedr_dev *dev,
+                                    struct qedr_qp *qp,
+                                    struct rdma_sq_rdma_wqe_1st *rwqe,
+                                    struct rdma_sq_rdma_wqe_2nd *rwqe2,
+                                    struct ib_send_wr *wr,
+                                    struct ib_send_wr **bad_wr)
+{
+       rwqe2->r_key = cpu_to_le32(rdma_wr(wr)->rkey);
+       DMA_REGPAIR_LE(rwqe2->remote_va, rdma_wr(wr)->remote_addr);
+
+       if (wr->send_flags & IB_SEND_INLINE) {
+               u8 flags = 0;
+
+               SET_FIELD2(flags, RDMA_SQ_RDMA_WQE_1ST_INLINE_FLG, 1);
+               return qedr_prepare_sq_inline_data(dev, qp, &rwqe->wqe_size, wr,
+                                                  bad_wr, &rwqe->flags, flags);
+       }
+
+       return qedr_prepare_sq_sges(qp, &rwqe->wqe_size, wr);
+}
+
+static u32 qedr_prepare_sq_send_data(struct qedr_dev *dev,
+                                    struct qedr_qp *qp,
+                                    struct rdma_sq_send_wqe_1st *swqe,
+                                    struct rdma_sq_send_wqe_2st *swqe2,
+                                    struct ib_send_wr *wr,
+                                    struct ib_send_wr **bad_wr)
+{
+       memset(swqe2, 0, sizeof(*swqe2));
+       if (wr->send_flags & IB_SEND_INLINE) {
+               u8 flags = 0;
+
+               SET_FIELD2(flags, RDMA_SQ_SEND_WQE_INLINE_FLG, 1);
+               return qedr_prepare_sq_inline_data(dev, qp, &swqe->wqe_size, wr,
+                                                  bad_wr, &swqe->flags, flags);
+       }
+
+       return qedr_prepare_sq_sges(qp, &swqe->wqe_size, wr);
+}
+
+static int qedr_prepare_reg(struct qedr_qp *qp,
+                           struct rdma_sq_fmr_wqe_1st *fwqe1,
+                           struct ib_reg_wr *wr)
+{
+       struct qedr_mr *mr = get_qedr_mr(wr->mr);
+       struct rdma_sq_fmr_wqe_2nd *fwqe2;
+
+       fwqe2 = (struct rdma_sq_fmr_wqe_2nd *)qed_chain_produce(&qp->sq.pbl);
+       fwqe1->addr.hi = upper_32_bits(mr->ibmr.iova);
+       fwqe1->addr.lo = lower_32_bits(mr->ibmr.iova);
+       fwqe1->l_key = wr->key;
+
+       SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_READ,
+                  !!(wr->access & IB_ACCESS_REMOTE_READ));
+       SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_WRITE,
+                  !!(wr->access & IB_ACCESS_REMOTE_WRITE));
+       SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_ENABLE_ATOMIC,
+                  !!(wr->access & IB_ACCESS_REMOTE_ATOMIC));
+       SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_READ, 1);
+       SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_WRITE,
+                  !!(wr->access & IB_ACCESS_LOCAL_WRITE));
+       fwqe2->fmr_ctrl = 0;
+
+       SET_FIELD2(fwqe2->fmr_ctrl, RDMA_SQ_FMR_WQE_2ND_PAGE_SIZE_LOG,
+                  ilog2(mr->ibmr.page_size) - 12);
+
+       fwqe2->length_hi = 0;
+       fwqe2->length_lo = mr->ibmr.length;
+       fwqe2->pbl_addr.hi = upper_32_bits(mr->info.pbl_table->pa);
+       fwqe2->pbl_addr.lo = lower_32_bits(mr->info.pbl_table->pa);
+
+       qp->wqe_wr_id[qp->sq.prod].mr = mr;
+
+       return 0;
+}
+
+enum ib_wc_opcode qedr_ib_to_wc_opcode(enum ib_wr_opcode opcode)
+{
+       switch (opcode) {
+       case IB_WR_RDMA_WRITE:
+       case IB_WR_RDMA_WRITE_WITH_IMM:
+               return IB_WC_RDMA_WRITE;
+       case IB_WR_SEND_WITH_IMM:
+       case IB_WR_SEND:
+       case IB_WR_SEND_WITH_INV:
+               return IB_WC_SEND;
+       case IB_WR_RDMA_READ:
+               return IB_WC_RDMA_READ;
+       case IB_WR_ATOMIC_CMP_AND_SWP:
+               return IB_WC_COMP_SWAP;
+       case IB_WR_ATOMIC_FETCH_AND_ADD:
+               return IB_WC_FETCH_ADD;
+       case IB_WR_REG_MR:
+               return IB_WC_REG_MR;
+       case IB_WR_LOCAL_INV:
+               return IB_WC_LOCAL_INV;
+       default:
+               return IB_WC_SEND;
+       }
+}
+
+inline bool qedr_can_post_send(struct qedr_qp *qp, struct ib_send_wr *wr)
+{
+       int wq_is_full, err_wr, pbl_is_full;
+       struct qedr_dev *dev = qp->dev;
+
+       /* prevent SQ overflow and/or processing of a bad WR */
+       err_wr = wr->num_sge > qp->sq.max_sges;
+       wq_is_full = qedr_wq_is_full(&qp->sq);
+       pbl_is_full = qed_chain_get_elem_left_u32(&qp->sq.pbl) <
+                     QEDR_MAX_SQE_ELEMENTS_PER_SQE;
+       if (wq_is_full || err_wr || pbl_is_full) {
+               if (wq_is_full && !(qp->err_bitmap & QEDR_QP_ERR_SQ_FULL)) {
+                       DP_ERR(dev,
+                              "error: WQ is full. Post send on QP %p failed (this error appears only once)\n",
+                              qp);
+                       qp->err_bitmap |= QEDR_QP_ERR_SQ_FULL;
+               }
+
+               if (err_wr && !(qp->err_bitmap & QEDR_QP_ERR_BAD_SR)) {
+                       DP_ERR(dev,
+                              "error: WR is bad. Post send on QP %p failed (this error appears only once)\n",
+                              qp);
+                       qp->err_bitmap |= QEDR_QP_ERR_BAD_SR;
+               }
+
+               if (pbl_is_full &&
+                   !(qp->err_bitmap & QEDR_QP_ERR_SQ_PBL_FULL)) {
+                       DP_ERR(dev,
+                              "error: WQ PBL is full. Post send on QP %p failed (this error appears only once)\n",
+                              qp);
+                       qp->err_bitmap |= QEDR_QP_ERR_SQ_PBL_FULL;
+               }
+               return false;
+       }
+       return true;
+}
+
+int __qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+                    struct ib_send_wr **bad_wr)
+{
+       struct qedr_dev *dev = get_qedr_dev(ibqp->device);
+       struct qedr_qp *qp = get_qedr_qp(ibqp);
+       struct rdma_sq_atomic_wqe_1st *awqe1;
+       struct rdma_sq_atomic_wqe_2nd *awqe2;
+       struct rdma_sq_atomic_wqe_3rd *awqe3;
+       struct rdma_sq_send_wqe_2st *swqe2;
+       struct rdma_sq_local_inv_wqe *iwqe;
+       struct rdma_sq_rdma_wqe_2nd *rwqe2;
+       struct rdma_sq_send_wqe_1st *swqe;
+       struct rdma_sq_rdma_wqe_1st *rwqe;
+       struct rdma_sq_fmr_wqe_1st *fwqe1;
+       struct rdma_sq_common_wqe *wqe;
+       u32 length;
+       int rc = 0;
+       bool comp;
+
+       if (!qedr_can_post_send(qp, wr)) {
+               *bad_wr = wr;
+               return -ENOMEM;
+       }
+
+       wqe = qed_chain_produce(&qp->sq.pbl);
+       qp->wqe_wr_id[qp->sq.prod].signaled =
+               !!(wr->send_flags & IB_SEND_SIGNALED) || qp->signaled;
+
+       wqe->flags = 0;
+       SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_SE_FLG,
+                  !!(wr->send_flags & IB_SEND_SOLICITED));
+       comp = (!!(wr->send_flags & IB_SEND_SIGNALED)) || qp->signaled;
+       SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_COMP_FLG, comp);
+       SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_RD_FENCE_FLG,
+                  !!(wr->send_flags & IB_SEND_FENCE));
+       wqe->prev_wqe_size = qp->prev_wqe_size;
+
+       qp->wqe_wr_id[qp->sq.prod].opcode = qedr_ib_to_wc_opcode(wr->opcode);
+
+       switch (wr->opcode) {
+       case IB_WR_SEND_WITH_IMM:
+               wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_IMM;
+               swqe = (struct rdma_sq_send_wqe_1st *)wqe;
+               swqe->wqe_size = 2;
+               swqe2 = qed_chain_produce(&qp->sq.pbl);
+
+               swqe->inv_key_or_imm_data = cpu_to_le32(wr->ex.imm_data);
+               length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
+                                                  wr, bad_wr);
+               swqe->length = cpu_to_le32(length);
+               qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
+               qp->prev_wqe_size = swqe->wqe_size;
+               qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
+               break;
+       case IB_WR_SEND:
+               wqe->req_type = RDMA_SQ_REQ_TYPE_SEND;
+               swqe = (struct rdma_sq_send_wqe_1st *)wqe;
+
+               swqe->wqe_size = 2;
+               swqe2 = qed_chain_produce(&qp->sq.pbl);
+               length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
+                                                  wr, bad_wr);
+               swqe->length = cpu_to_le32(length);
+               qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
+               qp->prev_wqe_size = swqe->wqe_size;
+               qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
+               break;
+       case IB_WR_SEND_WITH_INV:
+               wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_INVALIDATE;
+               swqe = (struct rdma_sq_send_wqe_1st *)wqe;
+               swqe2 = qed_chain_produce(&qp->sq.pbl);
+               swqe->wqe_size = 2;
+               swqe->inv_key_or_imm_data = cpu_to_le32(wr->ex.invalidate_rkey);
+               length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
+                                                  wr, bad_wr);
+               swqe->length = cpu_to_le32(length);
+               qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
+               qp->prev_wqe_size = swqe->wqe_size;
+               qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
+               break;
+
+       case IB_WR_RDMA_WRITE_WITH_IMM:
+               wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR_WITH_IMM;
+               rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
+
+               rwqe->wqe_size = 2;
+               rwqe->imm_data = htonl(cpu_to_le32(wr->ex.imm_data));
+               rwqe2 = qed_chain_produce(&qp->sq.pbl);
+               length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
+                                                  wr, bad_wr);
+               rwqe->length = cpu_to_le32(length);
+               qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
+               qp->prev_wqe_size = rwqe->wqe_size;
+               qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
+               break;
+       case IB_WR_RDMA_WRITE:
+               wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR;
+               rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
+
+               rwqe->wqe_size = 2;
+               rwqe2 = qed_chain_produce(&qp->sq.pbl);
+               length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
+                                                  wr, bad_wr);
+               rwqe->length = cpu_to_le32(length);
+               qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
+               qp->prev_wqe_size = rwqe->wqe_size;
+               qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
+               break;
+       case IB_WR_RDMA_READ_WITH_INV:
+               DP_ERR(dev,
+                      "RDMA READ WITH INVALIDATE not supported\n");
+               *bad_wr = wr;
+               rc = -EINVAL;
+               break;
+
+       case IB_WR_RDMA_READ:
+               wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_RD;
+               rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
+
+               rwqe->wqe_size = 2;
+               rwqe2 = qed_chain_produce(&qp->sq.pbl);
+               length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
+                                                  wr, bad_wr);
+               rwqe->length = cpu_to_le32(length);
+               qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
+               qp->prev_wqe_size = rwqe->wqe_size;
+               qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
+               break;
+
+       case IB_WR_ATOMIC_CMP_AND_SWP:
+       case IB_WR_ATOMIC_FETCH_AND_ADD:
+               awqe1 = (struct rdma_sq_atomic_wqe_1st *)wqe;
+               awqe1->wqe_size = 4;
+
+               awqe2 = qed_chain_produce(&qp->sq.pbl);
+               DMA_REGPAIR_LE(awqe2->remote_va, atomic_wr(wr)->remote_addr);
+               awqe2->r_key = cpu_to_le32(atomic_wr(wr)->rkey);
+
+               awqe3 = qed_chain_produce(&qp->sq.pbl);
+
+               if (wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
+                       wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_ADD;
+                       DMA_REGPAIR_LE(awqe3->swap_data,
+                                      atomic_wr(wr)->compare_add);
+               } else {
+                       wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_CMP_AND_SWAP;
+                       DMA_REGPAIR_LE(awqe3->swap_data,
+                                      atomic_wr(wr)->swap);
+                       DMA_REGPAIR_LE(awqe3->cmp_data,
+                                      atomic_wr(wr)->compare_add);
+               }
+
+               qedr_prepare_sq_sges(qp, NULL, wr);
+
+               qp->wqe_wr_id[qp->sq.prod].wqe_size = awqe1->wqe_size;
+               qp->prev_wqe_size = awqe1->wqe_size;
+               break;
+
+       case IB_WR_LOCAL_INV:
+               iwqe = (struct rdma_sq_local_inv_wqe *)wqe;
+               iwqe->wqe_size = 1;
+
+               iwqe->req_type = RDMA_SQ_REQ_TYPE_LOCAL_INVALIDATE;
+               iwqe->inv_l_key = wr->ex.invalidate_rkey;
+               qp->wqe_wr_id[qp->sq.prod].wqe_size = iwqe->wqe_size;
+               qp->prev_wqe_size = iwqe->wqe_size;
+               break;
+       case IB_WR_REG_MR:
+               DP_DEBUG(dev, QEDR_MSG_CQ, "REG_MR\n");
+               wqe->req_type = RDMA_SQ_REQ_TYPE_FAST_MR;
+               fwqe1 = (struct rdma_sq_fmr_wqe_1st *)wqe;
+               fwqe1->wqe_size = 2;
+
+               rc = qedr_prepare_reg(qp, fwqe1, reg_wr(wr));
+               if (rc) {
+                       DP_ERR(dev, "IB_REG_MR failed rc=%d\n", rc);
+                       *bad_wr = wr;
+                       break;
+               }
+
+               qp->wqe_wr_id[qp->sq.prod].wqe_size = fwqe1->wqe_size;
+               qp->prev_wqe_size = fwqe1->wqe_size;
+               break;
+       default:
+               DP_ERR(dev, "invalid opcode 0x%x!\n", wr->opcode);
+               rc = -EINVAL;
+               *bad_wr = wr;
+               break;
+       }
+
+       if (*bad_wr) {
+               u16 value;
+
+               /* Restore prod to its position before
+                * this WR was processed
+                */
+               value = le16_to_cpu(qp->sq.db_data.data.value);
+               qed_chain_set_prod(&qp->sq.pbl, value, wqe);
+
+               /* Restore prev_wqe_size */
+               qp->prev_wqe_size = wqe->prev_wqe_size;
+               rc = -EINVAL;
+               DP_ERR(dev, "POST SEND FAILED\n");
+       }
+
+       return rc;
+}
+
+int qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+                  struct ib_send_wr **bad_wr)
+{
+       struct qedr_dev *dev = get_qedr_dev(ibqp->device);
+       struct qedr_qp *qp = get_qedr_qp(ibqp);
+       unsigned long flags;
+       int rc = 0;
+
+       *bad_wr = NULL;
+
+       if (qp->qp_type == IB_QPT_GSI)
+               return qedr_gsi_post_send(ibqp, wr, bad_wr);
+
+       spin_lock_irqsave(&qp->q_lock, flags);
+
+       if ((qp->state == QED_ROCE_QP_STATE_RESET) ||
+           (qp->state == QED_ROCE_QP_STATE_ERR)) {
+               spin_unlock_irqrestore(&qp->q_lock, flags);
+               *bad_wr = wr;
+               DP_DEBUG(dev, QEDR_MSG_CQ,
+                        "QP in wrong state! QP icid=0x%x state %d\n",
+                        qp->icid, qp->state);
+               return -EINVAL;
+       }
+
+       if (!wr) {
+               DP_ERR(dev, "Got an empty post send.\n");
+               return -EINVAL;
+       }
+
+       while (wr) {
+               rc = __qedr_post_send(ibqp, wr, bad_wr);
+               if (rc)
+                       break;
+
+               qp->wqe_wr_id[qp->sq.prod].wr_id = wr->wr_id;
+
+               qedr_inc_sw_prod(&qp->sq);
+
+               qp->sq.db_data.data.value++;
+
+               wr = wr->next;
+       }
+
+       /* Trigger doorbell
+        * If there was a failure in the first WR then it will be triggered in
+        * vane. However this is not harmful (as long as the producer value is
+        * unchanged). For performance reasons we avoid checking for this
+        * redundant doorbell.
+        */
+       wmb();
+       writel(qp->sq.db_data.raw, qp->sq.db);
+
+       /* Make sure write sticks */
+       mmiowb();
+
+       spin_unlock_irqrestore(&qp->q_lock, flags);
+
+       return rc;
+}
+
+int qedr_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+                  struct ib_recv_wr **bad_wr)
+{
+       struct qedr_qp *qp = get_qedr_qp(ibqp);
+       struct qedr_dev *dev = qp->dev;
+       unsigned long flags;
+       int status = 0;
+
+       if (qp->qp_type == IB_QPT_GSI)
+               return qedr_gsi_post_recv(ibqp, wr, bad_wr);
+
+       spin_lock_irqsave(&qp->q_lock, flags);
+
+       if ((qp->state == QED_ROCE_QP_STATE_RESET) ||
+           (qp->state == QED_ROCE_QP_STATE_ERR)) {
+               spin_unlock_irqrestore(&qp->q_lock, flags);
+               *bad_wr = wr;
+               return -EINVAL;
+       }
+
+       while (wr) {
+               int i;
+
+               if (qed_chain_get_elem_left_u32(&qp->rq.pbl) <
+                   QEDR_MAX_RQE_ELEMENTS_PER_RQE ||
+                   wr->num_sge > qp->rq.max_sges) {
+                       DP_ERR(dev, "Can't post WR  (%d < %d) || (%d > %d)\n",
+                              qed_chain_get_elem_left_u32(&qp->rq.pbl),
+                              QEDR_MAX_RQE_ELEMENTS_PER_RQE, wr->num_sge,
+                              qp->rq.max_sges);
+                       status = -ENOMEM;
+                       *bad_wr = wr;
+                       break;
+               }
+               for (i = 0; i < wr->num_sge; i++) {
+                       u32 flags = 0;
+                       struct rdma_rq_sge *rqe =
+                           qed_chain_produce(&qp->rq.pbl);
+
+                       /* First one must include the number
+                        * of SGE in the list
+                        */
+                       if (!i)
+                               SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES,
+                                         wr->num_sge);
+
+                       SET_FIELD(flags, RDMA_RQ_SGE_L_KEY,
+                                 wr->sg_list[i].lkey);
+
+                       RQ_SGE_SET(rqe, wr->sg_list[i].addr,
+                                  wr->sg_list[i].length, flags);
+               }
+
+               /* Special case of no sges. FW requires between 1-4 sges...
+                * in this case we need to post 1 sge with length zero. this is
+                * because rdma write with immediate consumes an RQ.
+                */
+               if (!wr->num_sge) {
+                       u32 flags = 0;
+                       struct rdma_rq_sge *rqe =
+                           qed_chain_produce(&qp->rq.pbl);
+
+                       /* First one must include the number
+                        * of SGE in the list
+                        */
+                       SET_FIELD(flags, RDMA_RQ_SGE_L_KEY, 0);
+                       SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES, 1);
+
+                       RQ_SGE_SET(rqe, 0, 0, flags);
+                       i = 1;
+               }
+
+               qp->rqe_wr_id[qp->rq.prod].wr_id = wr->wr_id;
+               qp->rqe_wr_id[qp->rq.prod].wqe_size = i;
+
+               qedr_inc_sw_prod(&qp->rq);
+
+               /* Flush all the writes before signalling doorbell */
+               wmb();
+
+               qp->rq.db_data.data.value++;
+
+               writel(qp->rq.db_data.raw, qp->rq.db);
+
+               /* Make sure write sticks */
+               mmiowb();
+
+               wr = wr->next;
+       }
+
+       spin_unlock_irqrestore(&qp->q_lock, flags);
+
+       return status;
+}
+
+static int is_valid_cqe(struct qedr_cq *cq, union rdma_cqe *cqe)
+{
+       struct rdma_cqe_requester *resp_cqe = &cqe->req;
+
+       return (resp_cqe->flags & RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK) ==
+               cq->pbl_toggle;
+}
+
+static struct qedr_qp *cqe_get_qp(union rdma_cqe *cqe)
+{
+       struct rdma_cqe_requester *resp_cqe = &cqe->req;
+       struct qedr_qp *qp;
+
+       qp = (struct qedr_qp *)(uintptr_t)HILO_GEN(resp_cqe->qp_handle.hi,
+                                                  resp_cqe->qp_handle.lo,
+                                                  u64);
+       return qp;
+}
+
+static enum rdma_cqe_type cqe_get_type(union rdma_cqe *cqe)
+{
+       struct rdma_cqe_requester *resp_cqe = &cqe->req;
+
+       return GET_FIELD(resp_cqe->flags, RDMA_CQE_REQUESTER_TYPE);
+}
+
+/* Return latest CQE (needs processing) */
+static union rdma_cqe *get_cqe(struct qedr_cq *cq)
+{
+       return cq->latest_cqe;
+}
+
+/* In fmr we need to increase the number of fmr completed counter for the fmr
+ * algorithm determining whether we can free a pbl or not.
+ * we need to perform this whether the work request was signaled or not. for
+ * this purpose we call this function from the condition that checks if a wr
+ * should be skipped, to make sure we don't miss it ( possibly this fmr
+ * operation was not signalted)
+ */
+static inline void qedr_chk_if_fmr(struct qedr_qp *qp)
+{
+       if (qp->wqe_wr_id[qp->sq.cons].opcode == IB_WC_REG_MR)
+               qp->wqe_wr_id[qp->sq.cons].mr->info.completed++;
+}
+
+static int process_req(struct qedr_dev *dev, struct qedr_qp *qp,
+                      struct qedr_cq *cq, int num_entries,
+                      struct ib_wc *wc, u16 hw_cons, enum ib_wc_status status,
+                      int force)
+{
+       u16 cnt = 0;
+
+       while (num_entries && qp->sq.wqe_cons != hw_cons) {
+               if (!qp->wqe_wr_id[qp->sq.cons].signaled && !force) {
+                       qedr_chk_if_fmr(qp);
+                       /* skip WC */
+                       goto next_cqe;
+               }
+
+               /* fill WC */
+               wc->status = status;
+               wc->wc_flags = 0;
+               wc->src_qp = qp->id;
+               wc->qp = &qp->ibqp;
+
+               wc->wr_id = qp->wqe_wr_id[qp->sq.cons].wr_id;
+               wc->opcode = qp->wqe_wr_id[qp->sq.cons].opcode;
+
+               switch (wc->opcode) {
+               case IB_WC_RDMA_WRITE:
+                       wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len;
+                       break;
+               case IB_WC_COMP_SWAP:
+               case IB_WC_FETCH_ADD:
+                       wc->byte_len = 8;
+                       break;
+               case IB_WC_REG_MR:
+                       qp->wqe_wr_id[qp->sq.cons].mr->info.completed++;
+                       break;
+               default:
+                       break;
+               }
+
+               num_entries--;
+               wc++;
+               cnt++;
+next_cqe:
+               while (qp->wqe_wr_id[qp->sq.cons].wqe_size--)
+                       qed_chain_consume(&qp->sq.pbl);
+               qedr_inc_sw_cons(&qp->sq);
+       }
+
+       return cnt;
+}
+
+static int qedr_poll_cq_req(struct qedr_dev *dev,
+                           struct qedr_qp *qp, struct qedr_cq *cq,
+                           int num_entries, struct ib_wc *wc,
+                           struct rdma_cqe_requester *req)
+{
+       int cnt = 0;
+
+       switch (req->status) {
+       case RDMA_CQE_REQ_STS_OK:
+               cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons,
+                                 IB_WC_SUCCESS, 0);
+               break;
+       case RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR:
+               DP_ERR(dev,
+                      "Error: POLL CQ with RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR. CQ icid=0x%x, QP icid=0x%x\n",
+                      cq->icid, qp->icid);
+               cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons,
+                                 IB_WC_WR_FLUSH_ERR, 0);
+               break;
+       default:
+               /* process all WQE before the cosumer */
+               qp->state = QED_ROCE_QP_STATE_ERR;
+               cnt = process_req(dev, qp, cq, num_entries, wc,
+                                 req->sq_cons - 1, IB_WC_SUCCESS, 0);
+               wc += cnt;
+               /* if we have extra WC fill it with actual error info */
+               if (cnt < num_entries) {
+                       enum ib_wc_status wc_status;
+
+                       switch (req->status) {
+                       case RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR:
+                               DP_ERR(dev,
+                                      "Error: POLL CQ with RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR. CQ icid=0x%x, QP icid=0x%x\n",
+                                      cq->icid, qp->icid);
+                               wc_status = IB_WC_BAD_RESP_ERR;
+                               break;
+                       case RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR:
+                               DP_ERR(dev,
+                                      "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR. CQ icid=0x%x, QP icid=0x%x\n",
+                                      cq->icid, qp->icid);
+                               wc_status = IB_WC_LOC_LEN_ERR;
+                               break;
+                       case RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR:
+                               DP_ERR(dev,
+                                      "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
+                                      cq->icid, qp->icid);
+                               wc_status = IB_WC_LOC_QP_OP_ERR;
+                               break;
+                       case RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR:
+                               DP_ERR(dev,
+                                      "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
+                                      cq->icid, qp->icid);
+                               wc_status = IB_WC_LOC_PROT_ERR;
+                               break;
+                       case RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR:
+                               DP_ERR(dev,
+                                      "Error: POLL CQ with RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
+                                      cq->icid, qp->icid);
+                               wc_status = IB_WC_MW_BIND_ERR;
+                               break;
+                       case RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR:
+                               DP_ERR(dev,
+                                      "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR. CQ icid=0x%x, QP icid=0x%x\n",
+                                      cq->icid, qp->icid);
+                               wc_status = IB_WC_REM_INV_REQ_ERR;
+                               break;
+                       case RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR:
+                               DP_ERR(dev,
+                                      "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR. CQ icid=0x%x, QP icid=0x%x\n",
+                                      cq->icid, qp->icid);
+                               wc_status = IB_WC_REM_ACCESS_ERR;
+                               break;
+                       case RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR:
+                               DP_ERR(dev,
+                                      "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n",
+                                      cq->icid, qp->icid);
+                               wc_status = IB_WC_REM_OP_ERR;
+                               break;
+                       case RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR:
+                               DP_ERR(dev,
+                                      "Error: POLL CQ with RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR. CQ icid=0x%x, QP icid=0x%x\n",
+                                      cq->icid, qp->icid);
+                               wc_status = IB_WC_RNR_RETRY_EXC_ERR;
+                               break;
+                       case RDMA_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR:
+                               DP_ERR(dev,
+                                      "Error: POLL CQ with ROCE_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR. CQ icid=0x%x, QP icid=0x%x\n",
+                                      cq->icid, qp->icid);
+                               wc_status = IB_WC_RETRY_EXC_ERR;
+                               break;
+                       default:
+                               DP_ERR(dev,
+                                      "Error: POLL CQ with IB_WC_GENERAL_ERR. CQ icid=0x%x, QP icid=0x%x\n",
+                                      cq->icid, qp->icid);
+                               wc_status = IB_WC_GENERAL_ERR;
+                       }
+                       cnt += process_req(dev, qp, cq, 1, wc, req->sq_cons,
+                                          wc_status, 1);
+               }
+       }
+
+       return cnt;
+}
+
+static void __process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
+                              struct qedr_cq *cq, struct ib_wc *wc,
+                              struct rdma_cqe_responder *resp, u64 wr_id)
+{
+       enum ib_wc_status wc_status = IB_WC_SUCCESS;
+       u8 flags;
+
+       wc->opcode = IB_WC_RECV;
+       wc->wc_flags = 0;
+
+       switch (resp->status) {
+       case RDMA_CQE_RESP_STS_LOCAL_ACCESS_ERR:
+               wc_status = IB_WC_LOC_ACCESS_ERR;
+               break;
+       case RDMA_CQE_RESP_STS_LOCAL_LENGTH_ERR:
+               wc_status = IB_WC_LOC_LEN_ERR;
+               break;
+       case RDMA_CQE_RESP_STS_LOCAL_QP_OPERATION_ERR:
+               wc_status = IB_WC_LOC_QP_OP_ERR;
+               break;
+       case RDMA_CQE_RESP_STS_LOCAL_PROTECTION_ERR:
+               wc_status = IB_WC_LOC_PROT_ERR;
+               break;
+       case RDMA_CQE_RESP_STS_MEMORY_MGT_OPERATION_ERR:
+               wc_status = IB_WC_MW_BIND_ERR;
+               break;
+       case RDMA_CQE_RESP_STS_REMOTE_INVALID_REQUEST_ERR:
+               wc_status = IB_WC_REM_INV_RD_REQ_ERR;
+               break;
+       case RDMA_CQE_RESP_STS_OK:
+               wc_status = IB_WC_SUCCESS;
+               wc->byte_len = le32_to_cpu(resp->length);
+
+               flags = resp->flags & QEDR_RESP_RDMA_IMM;
+
+               if (flags == QEDR_RESP_RDMA_IMM)
+                       wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
+
+               if (flags == QEDR_RESP_RDMA_IMM || flags == QEDR_RESP_IMM) {
+                       wc->ex.imm_data =
+                               le32_to_cpu(resp->imm_data_or_inv_r_Key);
+                       wc->wc_flags |= IB_WC_WITH_IMM;
+               }
+               break;
+       default:
+               wc->status = IB_WC_GENERAL_ERR;
+               DP_ERR(dev, "Invalid CQE status detected\n");
+       }
+
+       /* fill WC */
+       wc->status = wc_status;
+       wc->src_qp = qp->id;
+       wc->qp = &qp->ibqp;
+       wc->wr_id = wr_id;
+}
+
+static int process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
+                           struct qedr_cq *cq, struct ib_wc *wc,
+                           struct rdma_cqe_responder *resp)
+{
+       u64 wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id;
+
+       __process_resp_one(dev, qp, cq, wc, resp, wr_id);
+
+       while (qp->rqe_wr_id[qp->rq.cons].wqe_size--)
+               qed_chain_consume(&qp->rq.pbl);
+       qedr_inc_sw_cons(&qp->rq);
+
+       return 1;
+}
+
+static int process_resp_flush(struct qedr_qp *qp, struct qedr_cq *cq,
+                             int num_entries, struct ib_wc *wc, u16 hw_cons)
+{
+       u16 cnt = 0;
+
+       while (num_entries && qp->rq.wqe_cons != hw_cons) {
+               /* fill WC */
+               wc->status = IB_WC_WR_FLUSH_ERR;
+               wc->wc_flags = 0;
+               wc->src_qp = qp->id;
+               wc->byte_len = 0;
+               wc->wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id;
+               wc->qp = &qp->ibqp;
+               num_entries--;
+               wc++;
+               cnt++;
+               while (qp->rqe_wr_id[qp->rq.cons].wqe_size--)
+                       qed_chain_consume(&qp->rq.pbl);
+               qedr_inc_sw_cons(&qp->rq);
+       }
+
+       return cnt;
+}
+
+static void try_consume_resp_cqe(struct qedr_cq *cq, struct qedr_qp *qp,
+                                struct rdma_cqe_responder *resp, int *update)
+{
+       if (le16_to_cpu(resp->rq_cons) == qp->rq.wqe_cons) {
+               consume_cqe(cq);
+               *update |= 1;
+       }
+}
+
+static int qedr_poll_cq_resp(struct qedr_dev *dev, struct qedr_qp *qp,
+                            struct qedr_cq *cq, int num_entries,
+                            struct ib_wc *wc, struct rdma_cqe_responder *resp,
+                            int *update)
+{
+       int cnt;
+
+       if (resp->status == RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR) {
+               cnt = process_resp_flush(qp, cq, num_entries, wc,
+                                        resp->rq_cons);
+               try_consume_resp_cqe(cq, qp, resp, update);
+       } else {
+               cnt = process_resp_one(dev, qp, cq, wc, resp);
+               consume_cqe(cq);
+               *update |= 1;
+       }
+
+       return cnt;
+}
+
+static void try_consume_req_cqe(struct qedr_cq *cq, struct qedr_qp *qp,
+                               struct rdma_cqe_requester *req, int *update)
+{
+       if (le16_to_cpu(req->sq_cons) == qp->sq.wqe_cons) {
+               consume_cqe(cq);
+               *update |= 1;
+       }
+}
+
+int qedr_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
+{
+       struct qedr_dev *dev = get_qedr_dev(ibcq->device);
+       struct qedr_cq *cq = get_qedr_cq(ibcq);
+       union rdma_cqe *cqe = cq->latest_cqe;
+       u32 old_cons, new_cons;
+       unsigned long flags;
+       int update = 0;
+       int done = 0;
+
+       if (cq->cq_type == QEDR_CQ_TYPE_GSI)
+               return qedr_gsi_poll_cq(ibcq, num_entries, wc);
+
+       spin_lock_irqsave(&cq->cq_lock, flags);
+       old_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
+       while (num_entries && is_valid_cqe(cq, cqe)) {
+               struct qedr_qp *qp;
+               int cnt = 0;
+
+               /* prevent speculative reads of any field of CQE */
+               rmb();
+
+               qp = cqe_get_qp(cqe);
+               if (!qp) {
+                       WARN(1, "Error: CQE QP pointer is NULL. CQE=%p\n", cqe);
+                       break;
+               }
+
+               wc->qp = &qp->ibqp;
+
+               switch (cqe_get_type(cqe)) {
+               case RDMA_CQE_TYPE_REQUESTER:
+                       cnt = qedr_poll_cq_req(dev, qp, cq, num_entries, wc,
+                                              &cqe->req);
+                       try_consume_req_cqe(cq, qp, &cqe->req, &update);
+                       break;
+               case RDMA_CQE_TYPE_RESPONDER_RQ:
+                       cnt = qedr_poll_cq_resp(dev, qp, cq, num_entries, wc,
+                                               &cqe->resp, &update);
+                       break;
+               case RDMA_CQE_TYPE_INVALID:
+               default:
+                       DP_ERR(dev, "Error: invalid CQE type = %d\n",
+                              cqe_get_type(cqe));
+               }
+               num_entries -= cnt;
+               wc += cnt;
+               done += cnt;
+
+               cqe = get_cqe(cq);
+       }
+       new_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
+
+       cq->cq_cons += new_cons - old_cons;
+
+       if (update)
+               /* doorbell notifies abount latest VALID entry,
+                * but chain already point to the next INVALID one
+                */
+               doorbell_cq(cq, cq->cq_cons - 1, cq->arm_flags);
+
+       spin_unlock_irqrestore(&cq->cq_lock, flags);
+       return done;
+}
+
+int qedr_process_mad(struct ib_device *ibdev, int process_mad_flags,
+                    u8 port_num,
+                    const struct ib_wc *in_wc,
+                    const struct ib_grh *in_grh,
+                    const struct ib_mad_hdr *mad_hdr,
+                    size_t in_mad_size, struct ib_mad_hdr *out_mad,
+                    size_t *out_mad_size, u16 *out_mad_pkey_index)
+{
+       struct qedr_dev *dev = get_qedr_dev(ibdev);
+
+       DP_DEBUG(dev, QEDR_MSG_GSI,
+                "QEDR_PROCESS_MAD in_mad %x %x %x %x %x %x %x %x\n",
+                mad_hdr->attr_id, mad_hdr->base_version, mad_hdr->attr_mod,
+                mad_hdr->class_specific, mad_hdr->class_version,
+                mad_hdr->method, mad_hdr->mgmt_class, mad_hdr->status);
+       return IB_MAD_RESULT_SUCCESS;
+}
+
+int qedr_port_immutable(struct ib_device *ibdev, u8 port_num,
+                       struct ib_port_immutable *immutable)
+{
+       struct ib_port_attr attr;
+       int err;
+
+       err = qedr_query_port(ibdev, port_num, &attr);
+       if (err)
+               return err;
+
+       immutable->pkey_tbl_len = attr.pkey_tbl_len;
+       immutable->gid_tbl_len = attr.gid_tbl_len;
+       immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE |
+                                   RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
+       immutable->max_mad_size = IB_MGMT_MAD_SIZE;
+
+       return 0;
+}
diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h
new file mode 100644 (file)
index 0000000..a9b5e67
--- /dev/null
@@ -0,0 +1,101 @@
+/* QLogic qedr NIC Driver
+ * Copyright (c) 2015-2016  QLogic Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and /or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __QEDR_VERBS_H__
+#define __QEDR_VERBS_H__
+
+int qedr_query_device(struct ib_device *ibdev,
+                     struct ib_device_attr *attr, struct ib_udata *udata);
+int qedr_query_port(struct ib_device *, u8 port, struct ib_port_attr *props);
+int qedr_modify_port(struct ib_device *, u8 port, int mask,
+                    struct ib_port_modify *props);
+
+int qedr_query_gid(struct ib_device *, u8 port, int index, union ib_gid *gid);
+
+int qedr_query_pkey(struct ib_device *, u8 port, u16 index, u16 *pkey);
+
+struct ib_ucontext *qedr_alloc_ucontext(struct ib_device *, struct ib_udata *);
+int qedr_dealloc_ucontext(struct ib_ucontext *);
+
+int qedr_mmap(struct ib_ucontext *, struct vm_area_struct *vma);
+int qedr_del_gid(struct ib_device *device, u8 port_num,
+                unsigned int index, void **context);
+int qedr_add_gid(struct ib_device *device, u8 port_num,
+                unsigned int index, const union ib_gid *gid,
+                const struct ib_gid_attr *attr, void **context);
+struct ib_pd *qedr_alloc_pd(struct ib_device *,
+                           struct ib_ucontext *, struct ib_udata *);
+int qedr_dealloc_pd(struct ib_pd *pd);
+
+struct ib_cq *qedr_create_cq(struct ib_device *ibdev,
+                            const struct ib_cq_init_attr *attr,
+                            struct ib_ucontext *ib_ctx,
+                            struct ib_udata *udata);
+int qedr_resize_cq(struct ib_cq *, int cqe, struct ib_udata *);
+int qedr_destroy_cq(struct ib_cq *);
+int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
+struct ib_qp *qedr_create_qp(struct ib_pd *, struct ib_qp_init_attr *attrs,
+                            struct ib_udata *);
+int qedr_modify_qp(struct ib_qp *, struct ib_qp_attr *attr,
+                  int attr_mask, struct ib_udata *udata);
+int qedr_query_qp(struct ib_qp *, struct ib_qp_attr *qp_attr,
+                 int qp_attr_mask, struct ib_qp_init_attr *);
+int qedr_destroy_qp(struct ib_qp *ibqp);
+
+struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr);
+int qedr_destroy_ah(struct ib_ah *ibah);
+
+int qedr_dereg_mr(struct ib_mr *);
+struct ib_mr *qedr_get_dma_mr(struct ib_pd *, int acc);
+
+struct ib_mr *qedr_reg_user_mr(struct ib_pd *, u64 start, u64 length,
+                              u64 virt, int acc, struct ib_udata *);
+
+int qedr_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
+                  int sg_nents, unsigned int *sg_offset);
+
+struct ib_mr *qedr_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
+                           u32 max_num_sg);
+int qedr_poll_cq(struct ib_cq *, int num_entries, struct ib_wc *wc);
+int qedr_post_send(struct ib_qp *, struct ib_send_wr *,
+                  struct ib_send_wr **bad_wr);
+int qedr_post_recv(struct ib_qp *, struct ib_recv_wr *,
+                  struct ib_recv_wr **bad_wr);
+int qedr_process_mad(struct ib_device *ibdev, int process_mad_flags,
+                    u8 port_num, const struct ib_wc *in_wc,
+                    const struct ib_grh *in_grh,
+                    const struct ib_mad_hdr *in_mad,
+                    size_t in_mad_size, struct ib_mad_hdr *out_mad,
+                    size_t *out_mad_size, u16 *out_mad_pkey_index);
+
+int qedr_port_immutable(struct ib_device *ibdev, u8 port_num,
+                       struct ib_port_immutable *immutable);
+#endif
index 2d2b94fd3633bff1ddbed16d077ee436bbe0ae58..75f08624ac052abed2347cb462990c0545c8d828 100644 (file)
@@ -67,7 +67,8 @@ static int __qib_get_user_pages(unsigned long start_page, size_t num_pages,
 
        for (got = 0; got < num_pages; got += ret) {
                ret = get_user_pages(start_page + got * PAGE_SIZE,
-                                    num_pages - got, 1, 1,
+                                    num_pages - got,
+                                    FOLL_WRITE | FOLL_FORCE,
                                     p + got, NULL);
                if (ret < 0)
                        goto bail_release;
index a0b6ebee4d8a047e2fdffb8bbd831e5c36107fec..1ccee6ea5bc3092f196689c4481b21d9f27b796c 100644 (file)
@@ -111,6 +111,7 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable,
        int i;
        int flags;
        dma_addr_t pa;
+       unsigned int gup_flags;
 
        if (!can_do_mlock())
                return -EPERM;
@@ -135,6 +136,8 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable,
 
        flags = IOMMU_READ | IOMMU_CACHE;
        flags |= (writable) ? IOMMU_WRITE : 0;
+       gup_flags = FOLL_WRITE;
+       gup_flags |= (writable) ? 0 : FOLL_FORCE;
        cur_base = addr & PAGE_MASK;
        ret = 0;
 
@@ -142,7 +145,7 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable,
                ret = get_user_pages(cur_base,
                                        min_t(unsigned long, npages,
                                        PAGE_SIZE / sizeof(struct page *)),
-                                       1, !writable, page_list, NULL);
+                                       gup_flags, page_list, NULL);
 
                if (ret < 0)
                        goto out;
index 01f71caa3ac4f95994e20b72fedd91cb09a3fb9e..f2cefb0d918083516e3bd2319bcbc318fa852ef2 100644 (file)
@@ -90,9 +90,6 @@ static u64 rvt_dma_map_page(struct ib_device *dev, struct page *page,
        if (WARN_ON(!valid_dma_direction(direction)))
                return BAD_DMA_ADDRESS;
 
-       if (offset + size > PAGE_SIZE)
-               return BAD_DMA_ADDRESS;
-
        addr = (u64)page_address(page);
        if (addr)
                addr += offset;
index b8258e4f0aeaf656cfa8f98f6f80bf3761b6bd2b..ffff5a54cb340d9d95d6bef83ce78ff456011d46 100644 (file)
@@ -243,10 +243,8 @@ static struct socket *rxe_setup_udp_tunnel(struct net *net, __be16 port,
 {
        int err;
        struct socket *sock;
-       struct udp_port_cfg udp_cfg;
-       struct udp_tunnel_sock_cfg tnl_cfg;
-
-       memset(&udp_cfg, 0, sizeof(udp_cfg));
+       struct udp_port_cfg udp_cfg = {0};
+       struct udp_tunnel_sock_cfg tnl_cfg = {0};
 
        if (ipv6) {
                udp_cfg.family = AF_INET6;
@@ -264,10 +262,8 @@ static struct socket *rxe_setup_udp_tunnel(struct net *net, __be16 port,
                return ERR_PTR(err);
        }
 
-       tnl_cfg.sk_user_data = NULL;
        tnl_cfg.encap_type = 1;
        tnl_cfg.encap_rcv = rxe_udp_encap_recv;
-       tnl_cfg.encap_destroy = NULL;
 
        /* Setup UDP tunnel */
        setup_udp_tunnel_sock(net, sock, &tnl_cfg);
index b8036cfbce04041d77a3679979ed5def0d9a0b46..c3e60e4bde6e2a3ba5e0953b531a42f65927b717 100644 (file)
@@ -522,6 +522,7 @@ static void rxe_qp_reset(struct rxe_qp *qp)
        if (qp->sq.queue) {
                __rxe_do_task(&qp->comp.task);
                __rxe_do_task(&qp->req.task);
+               rxe_queue_reset(qp->sq.queue);
        }
 
        /* cleanup attributes */
@@ -573,6 +574,7 @@ void rxe_qp_error(struct rxe_qp *qp)
 {
        qp->req.state = QP_STATE_ERROR;
        qp->resp.state = QP_STATE_ERROR;
+       qp->attr.qp_state = IB_QPS_ERR;
 
        /* drain work and packet queues */
        rxe_run_task(&qp->resp.task, 1);
index 08274254eb887531068d8239dd13c95a16ef190c..d14bf496d62d3af7581bb307fa1350989a90f042 100644 (file)
@@ -84,6 +84,15 @@ err1:
        return -EINVAL;
 }
 
+inline void rxe_queue_reset(struct rxe_queue *q)
+{
+       /* queue is comprised from header and the memory
+        * of the actual queue. See "struct rxe_queue_buf" in rxe_queue.h
+        * reset only the queue itself and not the management header
+        */
+       memset(q->buf->data, 0, q->buf_size - sizeof(struct rxe_queue_buf));
+}
+
 struct rxe_queue *rxe_queue_init(struct rxe_dev *rxe,
                                 int *num_elem,
                                 unsigned int elem_size)
index 239fd609c31ef51b47d306420a54a26d3ddeb185..8c8641c87817f3fcb6024e58ae31311179d4d060 100644 (file)
@@ -84,6 +84,8 @@ int do_mmap_info(struct rxe_dev *rxe,
                 size_t buf_size,
                 struct rxe_mmap_info **ip_p);
 
+void rxe_queue_reset(struct rxe_queue *q);
+
 struct rxe_queue *rxe_queue_init(struct rxe_dev *rxe,
                                 int *num_elem,
                                 unsigned int elem_size);
index 832846b73ea0252f8b8eda61b0af58d92ab92afc..22bd9630dcd924315012019ff0a39242d7476a59 100644 (file)
@@ -696,7 +696,8 @@ next_wqe:
                                                       qp->req.wqe_index);
                        wqe->state = wqe_state_done;
                        wqe->status = IB_WC_SUCCESS;
-                       goto complete;
+                       __rxe_do_task(&qp->comp.task);
+                       return 0;
                }
                payload = mtu;
        }
@@ -745,13 +746,17 @@ err:
        wqe->status = IB_WC_LOC_PROT_ERR;
        wqe->state = wqe_state_error;
 
-complete:
-       if (qp_type(qp) != IB_QPT_RC) {
-               while (rxe_completer(qp) == 0)
-                       ;
-       }
-
-       return 0;
+       /*
+        * IBA Spec. Section 10.7.3.1 SIGNALED COMPLETIONS
+        * ---------8<---------8<-------------
+        * ...Note that if a completion error occurs, a Work Completion
+        * will always be generated, even if the signaling
+        * indicator requests an Unsignaled Completion.
+        * ---------8<---------8<-------------
+        */
+       wqe->wr.send_flags |= IB_SEND_SIGNALED;
+       __rxe_do_task(&qp->comp.task);
+       return -EAGAIN;
 
 exit:
        return -EAGAIN;
index 7b8d2d9e22633f140b601d0056438bd7d9ca3e68..da12717a3eb794f100438988c564eb56004ef0d7 100644 (file)
@@ -63,6 +63,8 @@ enum ipoib_flush_level {
 
 enum {
        IPOIB_ENCAP_LEN           = 4,
+       IPOIB_PSEUDO_LEN          = 20,
+       IPOIB_HARD_LEN            = IPOIB_ENCAP_LEN + IPOIB_PSEUDO_LEN,
 
        IPOIB_UD_HEAD_SIZE        = IB_GRH_BYTES + IPOIB_ENCAP_LEN,
        IPOIB_UD_RX_SG            = 2, /* max buffer needed for 4K mtu */
@@ -134,15 +136,21 @@ struct ipoib_header {
        u16     reserved;
 };
 
-struct ipoib_cb {
-       struct qdisc_skb_cb     qdisc_cb;
-       u8                      hwaddr[INFINIBAND_ALEN];
+struct ipoib_pseudo_header {
+       u8      hwaddr[INFINIBAND_ALEN];
 };
 
-static inline struct ipoib_cb *ipoib_skb_cb(const struct sk_buff *skb)
+static inline void skb_add_pseudo_hdr(struct sk_buff *skb)
 {
-       BUILD_BUG_ON(sizeof(skb->cb) < sizeof(struct ipoib_cb));
-       return (struct ipoib_cb *)skb->cb;
+       char *data = skb_push(skb, IPOIB_PSEUDO_LEN);
+
+       /*
+        * only the ipoib header is present now, make room for a dummy
+        * pseudo header and set skb field accordingly
+        */
+       memset(data, 0, IPOIB_PSEUDO_LEN);
+       skb_reset_mac_header(skb);
+       skb_pull(skb, IPOIB_HARD_LEN);
 }
 
 /* Used for all multicast joins (broadcast, IPv4 mcast and IPv6 mcast) */
index 4ad297d3de897789141c87847d26d6fdf91a062e..339a1eecdfe3083e2b15d09473a1053113b7acaa 100644 (file)
@@ -63,6 +63,8 @@ MODULE_PARM_DESC(cm_data_debug_level,
 #define IPOIB_CM_RX_DELAY       (3 * 256 * HZ)
 #define IPOIB_CM_RX_UPDATE_MASK (0x3)
 
+#define IPOIB_CM_RX_RESERVE     (ALIGN(IPOIB_HARD_LEN, 16) - IPOIB_ENCAP_LEN)
+
 static struct ib_qp_attr ipoib_cm_err_attr = {
        .qp_state = IB_QPS_ERR
 };
@@ -146,15 +148,15 @@ static struct sk_buff *ipoib_cm_alloc_rx_skb(struct net_device *dev,
        struct sk_buff *skb;
        int i;
 
-       skb = dev_alloc_skb(IPOIB_CM_HEAD_SIZE + 12);
+       skb = dev_alloc_skb(ALIGN(IPOIB_CM_HEAD_SIZE + IPOIB_PSEUDO_LEN, 16));
        if (unlikely(!skb))
                return NULL;
 
        /*
-        * IPoIB adds a 4 byte header. So we need 12 more bytes to align the
+        * IPoIB adds a IPOIB_ENCAP_LEN byte header, this will align the
         * IP header to a multiple of 16.
         */
-       skb_reserve(skb, 12);
+       skb_reserve(skb, IPOIB_CM_RX_RESERVE);
 
        mapping[0] = ib_dma_map_single(priv->ca, skb->data, IPOIB_CM_HEAD_SIZE,
                                       DMA_FROM_DEVICE);
@@ -624,9 +626,9 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
        if (wc->byte_len < IPOIB_CM_COPYBREAK) {
                int dlen = wc->byte_len;
 
-               small_skb = dev_alloc_skb(dlen + 12);
+               small_skb = dev_alloc_skb(dlen + IPOIB_CM_RX_RESERVE);
                if (small_skb) {
-                       skb_reserve(small_skb, 12);
+                       skb_reserve(small_skb, IPOIB_CM_RX_RESERVE);
                        ib_dma_sync_single_for_cpu(priv->ca, rx_ring[wr_id].mapping[0],
                                                   dlen, DMA_FROM_DEVICE);
                        skb_copy_from_linear_data(skb, small_skb->data, dlen);
@@ -663,8 +665,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
 
 copied:
        skb->protocol = ((struct ipoib_header *) skb->data)->proto;
-       skb_reset_mac_header(skb);
-       skb_pull(skb, IPOIB_ENCAP_LEN);
+       skb_add_pseudo_hdr(skb);
 
        ++dev->stats.rx_packets;
        dev->stats.rx_bytes += skb->len;
index be11d5d5b8c1d9ca84ab884bac32000e018c4c60..830fecb6934c8edf60a4c1d138cd4815be4b1314 100644 (file)
@@ -128,16 +128,15 @@ static struct sk_buff *ipoib_alloc_rx_skb(struct net_device *dev, int id)
 
        buf_size = IPOIB_UD_BUF_SIZE(priv->max_ib_mtu);
 
-       skb = dev_alloc_skb(buf_size + IPOIB_ENCAP_LEN);
+       skb = dev_alloc_skb(buf_size + IPOIB_HARD_LEN);
        if (unlikely(!skb))
                return NULL;
 
        /*
-        * IB will leave a 40 byte gap for a GRH and IPoIB adds a 4 byte
-        * header.  So we need 4 more bytes to get to 48 and align the
-        * IP header to a multiple of 16.
+        * the IP header will be at IPOIP_HARD_LEN + IB_GRH_BYTES, that is
+        * 64 bytes aligned
         */
-       skb_reserve(skb, 4);
+       skb_reserve(skb, sizeof(struct ipoib_pseudo_header));
 
        mapping = priv->rx_ring[id].mapping;
        mapping[0] = ib_dma_map_single(priv->ca, skb->data, buf_size,
@@ -253,8 +252,7 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
        skb_pull(skb, IB_GRH_BYTES);
 
        skb->protocol = ((struct ipoib_header *) skb->data)->proto;
-       skb_reset_mac_header(skb);
-       skb_pull(skb, IPOIB_ENCAP_LEN);
+       skb_add_pseudo_hdr(skb);
 
        ++dev->stats.rx_packets;
        dev->stats.rx_bytes += skb->len;
index ae5d7cd100a54c6c15cce2f50899a499d29c76c3..c50794fb92db4cf5454507b5c35ecdc4ccf8511f 100644 (file)
@@ -938,9 +938,12 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
                                ipoib_neigh_free(neigh);
                                goto err_drop;
                        }
-                       if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE)
+                       if (skb_queue_len(&neigh->queue) <
+                           IPOIB_MAX_PATH_REC_QUEUE) {
+                               /* put pseudoheader back on for next time */
+                               skb_push(skb, IPOIB_PSEUDO_LEN);
                                __skb_queue_tail(&neigh->queue, skb);
-                       else {
+                       else {
                                ipoib_warn(priv, "queue length limit %d. Packet drop.\n",
                                           skb_queue_len(&neigh->queue));
                                goto err_drop;
@@ -977,7 +980,7 @@ err_drop:
 }
 
 static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
-                            struct ipoib_cb *cb)
+                            struct ipoib_pseudo_header *phdr)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
        struct ipoib_path *path;
@@ -985,16 +988,18 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
 
        spin_lock_irqsave(&priv->lock, flags);
 
-       path = __path_find(dev, cb->hwaddr + 4);
+       path = __path_find(dev, phdr->hwaddr + 4);
        if (!path || !path->valid) {
                int new_path = 0;
 
                if (!path) {
-                       path = path_rec_create(dev, cb->hwaddr + 4);
+                       path = path_rec_create(dev, phdr->hwaddr + 4);
                        new_path = 1;
                }
                if (path) {
                        if (skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
+                               /* put pseudoheader back on for next time */
+                               skb_push(skb, IPOIB_PSEUDO_LEN);
                                __skb_queue_tail(&path->queue, skb);
                        } else {
                                ++dev->stats.tx_dropped;
@@ -1022,10 +1027,12 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
                          be16_to_cpu(path->pathrec.dlid));
 
                spin_unlock_irqrestore(&priv->lock, flags);
-               ipoib_send(dev, skb, path->ah, IPOIB_QPN(cb->hwaddr));
+               ipoib_send(dev, skb, path->ah, IPOIB_QPN(phdr->hwaddr));
                return;
        } else if ((path->query || !path_rec_start(dev, path)) &&
                   skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
+               /* put pseudoheader back on for next time */
+               skb_push(skb, IPOIB_PSEUDO_LEN);
                __skb_queue_tail(&path->queue, skb);
        } else {
                ++dev->stats.tx_dropped;
@@ -1039,13 +1046,15 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
        struct ipoib_neigh *neigh;
-       struct ipoib_cb *cb = ipoib_skb_cb(skb);
+       struct ipoib_pseudo_header *phdr;
        struct ipoib_header *header;
        unsigned long flags;
 
+       phdr = (struct ipoib_pseudo_header *) skb->data;
+       skb_pull(skb, sizeof(*phdr));
        header = (struct ipoib_header *) skb->data;
 
-       if (unlikely(cb->hwaddr[4] == 0xff)) {
+       if (unlikely(phdr->hwaddr[4] == 0xff)) {
                /* multicast, arrange "if" according to probability */
                if ((header->proto != htons(ETH_P_IP)) &&
                    (header->proto != htons(ETH_P_IPV6)) &&
@@ -1058,13 +1067,13 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
                        return NETDEV_TX_OK;
                }
                /* Add in the P_Key for multicast*/
-               cb->hwaddr[8] = (priv->pkey >> 8) & 0xff;
-               cb->hwaddr[9] = priv->pkey & 0xff;
+               phdr->hwaddr[8] = (priv->pkey >> 8) & 0xff;
+               phdr->hwaddr[9] = priv->pkey & 0xff;
 
-               neigh = ipoib_neigh_get(dev, cb->hwaddr);
+               neigh = ipoib_neigh_get(dev, phdr->hwaddr);
                if (likely(neigh))
                        goto send_using_neigh;
-               ipoib_mcast_send(dev, cb->hwaddr, skb);
+               ipoib_mcast_send(dev, phdr->hwaddr, skb);
                return NETDEV_TX_OK;
        }
 
@@ -1073,16 +1082,16 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
        case htons(ETH_P_IP):
        case htons(ETH_P_IPV6):
        case htons(ETH_P_TIPC):
-               neigh = ipoib_neigh_get(dev, cb->hwaddr);
+               neigh = ipoib_neigh_get(dev, phdr->hwaddr);
                if (unlikely(!neigh)) {
-                       neigh_add_path(skb, cb->hwaddr, dev);
+                       neigh_add_path(skb, phdr->hwaddr, dev);
                        return NETDEV_TX_OK;
                }
                break;
        case htons(ETH_P_ARP):
        case htons(ETH_P_RARP):
                /* for unicast ARP and RARP should always perform path find */
-               unicast_arp_send(skb, dev, cb);
+               unicast_arp_send(skb, dev, phdr);
                return NETDEV_TX_OK;
        default:
                /* ethertype not supported by IPoIB */
@@ -1099,11 +1108,13 @@ send_using_neigh:
                        goto unref;
                }
        } else if (neigh->ah) {
-               ipoib_send(dev, skb, neigh->ah, IPOIB_QPN(cb->hwaddr));
+               ipoib_send(dev, skb, neigh->ah, IPOIB_QPN(phdr->hwaddr));
                goto unref;
        }
 
        if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
+               /* put pseudoheader back on for next time */
+               skb_push(skb, sizeof(*phdr));
                spin_lock_irqsave(&priv->lock, flags);
                __skb_queue_tail(&neigh->queue, skb);
                spin_unlock_irqrestore(&priv->lock, flags);
@@ -1135,8 +1146,8 @@ static int ipoib_hard_header(struct sk_buff *skb,
                             unsigned short type,
                             const void *daddr, const void *saddr, unsigned len)
 {
+       struct ipoib_pseudo_header *phdr;
        struct ipoib_header *header;
-       struct ipoib_cb *cb = ipoib_skb_cb(skb);
 
        header = (struct ipoib_header *) skb_push(skb, sizeof *header);
 
@@ -1145,12 +1156,13 @@ static int ipoib_hard_header(struct sk_buff *skb,
 
        /*
         * we don't rely on dst_entry structure,  always stuff the
-        * destination address into skb->cb so we can figure out where
+        * destination address into skb hard header so we can figure out where
         * to send the packet later.
         */
-       memcpy(cb->hwaddr, daddr, INFINIBAND_ALEN);
+       phdr = (struct ipoib_pseudo_header *) skb_push(skb, sizeof(*phdr));
+       memcpy(phdr->hwaddr, daddr, INFINIBAND_ALEN);
 
-       return sizeof *header;
+       return IPOIB_HARD_LEN;
 }
 
 static void ipoib_set_mcast_list(struct net_device *dev)
@@ -1772,7 +1784,7 @@ void ipoib_setup(struct net_device *dev)
 
        dev->flags              |= IFF_BROADCAST | IFF_MULTICAST;
 
-       dev->hard_header_len     = IPOIB_ENCAP_LEN;
+       dev->hard_header_len     = IPOIB_HARD_LEN;
        dev->addr_len            = INFINIBAND_ALEN;
        dev->type                = ARPHRD_INFINIBAND;
        dev->tx_queue_len        = ipoib_sendq_size * 2;
index d3394b6add24a0303dd51710d72f86087a36c7fe..1909dd252c9406ba4700e96d5730a67c51ba1a91 100644 (file)
@@ -796,9 +796,11 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb)
                        __ipoib_mcast_add(dev, mcast);
                        list_add_tail(&mcast->list, &priv->multicast_list);
                }
-               if (skb_queue_len(&mcast->pkt_queue) < IPOIB_MAX_MCAST_QUEUE)
+               if (skb_queue_len(&mcast->pkt_queue) < IPOIB_MAX_MCAST_QUEUE) {
+                       /* put pseudoheader back on for next time */
+                       skb_push(skb, sizeof(struct ipoib_pseudo_header));
                        skb_queue_tail(&mcast->pkt_queue, skb);
-               else {
+               else {
                        ++dev->stats.tx_dropped;
                        dev_kfree_skb_any(skb);
                }
index 936f07a4e35f41265de96e84c2bab08f13544ad6..6d7de9bfed9a7f2bdf872474f156613740632539 100644 (file)
@@ -103,6 +103,7 @@ static const struct alps_nibble_commands alps_v6_nibble_commands[] = {
                                           6-byte ALPS packet */
 #define ALPS_STICK_BITS                0x100   /* separate stick button bits */
 #define ALPS_BUTTONPAD         0x200   /* device is a clickpad */
+#define ALPS_DUALPOINT_WITH_PRESSURE   0x400   /* device can report trackpoint pressure */
 
 static const struct alps_model_info alps_model_data[] = {
        { { 0x32, 0x02, 0x14 }, 0x00, { ALPS_PROTO_V2, 0xf8, 0xf8, ALPS_PASS | ALPS_DUALPOINT } },      /* Toshiba Salellite Pro M10 */
@@ -1156,15 +1157,28 @@ static unsigned char alps_get_pkt_id_ss4_v2(unsigned char *byte)
 {
        unsigned char pkt_id = SS4_PACKET_ID_IDLE;
 
-       if (byte[0] == 0x18 && byte[1] == 0x10 && byte[2] == 0x00 &&
-           (byte[3] & 0x88) == 0x08 && byte[4] == 0x10 && byte[5] == 0x00) {
-               pkt_id = SS4_PACKET_ID_IDLE;
-       } else if (!(byte[3] & 0x10)) {
-               pkt_id = SS4_PACKET_ID_ONE;
-       } else if (!(byte[3] & 0x20)) {
+       switch (byte[3] & 0x30) {
+       case 0x00:
+               if (byte[0] == 0x18 && byte[1] == 0x10 && byte[2] == 0x00 &&
+                   (byte[3] & 0x88) == 0x08 && byte[4] == 0x10 &&
+                   byte[5] == 0x00) {
+                       pkt_id = SS4_PACKET_ID_IDLE;
+               } else {
+                       pkt_id = SS4_PACKET_ID_ONE;
+               }
+               break;
+       case 0x10:
+               /* two-finger finger positions */
                pkt_id = SS4_PACKET_ID_TWO;
-       } else {
+               break;
+       case 0x20:
+               /* stick pointer */
+               pkt_id = SS4_PACKET_ID_STICK;
+               break;
+       case 0x30:
+               /* third and fourth finger positions */
                pkt_id = SS4_PACKET_ID_MULTI;
+               break;
        }
 
        return pkt_id;
@@ -1185,7 +1199,13 @@ static int alps_decode_ss4_v2(struct alps_fields *f,
                f->mt[0].x = SS4_1F_X_V2(p);
                f->mt[0].y = SS4_1F_Y_V2(p);
                f->pressure = ((SS4_1F_Z_V2(p)) * 2) & 0x7f;
-               f->fingers = 1;
+               /*
+                * When a button is held the device will give us events
+                * with x, y, and pressure of 0. This causes annoying jumps
+                * if a touch is released while the button is held.
+                * Handle this by claiming zero contacts.
+                */
+               f->fingers = f->pressure > 0 ? 1 : 0;
                f->first_mp = 0;
                f->is_mp = 0;
                break;
@@ -1246,16 +1266,40 @@ static int alps_decode_ss4_v2(struct alps_fields *f,
                }
                break;
 
+       case SS4_PACKET_ID_STICK:
+               if (!(priv->flags & ALPS_DUALPOINT)) {
+                       psmouse_warn(psmouse,
+                                    "Rejected trackstick packet from non DualPoint device");
+               } else {
+                       int x = (s8)(((p[0] & 1) << 7) | (p[1] & 0x7f));
+                       int y = (s8)(((p[3] & 1) << 7) | (p[2] & 0x7f));
+                       int pressure = (s8)(p[4] & 0x7f);
+
+                       input_report_rel(priv->dev2, REL_X, x);
+                       input_report_rel(priv->dev2, REL_Y, -y);
+                       input_report_abs(priv->dev2, ABS_PRESSURE, pressure);
+               }
+               break;
+
        case SS4_PACKET_ID_IDLE:
        default:
                memset(f, 0, sizeof(struct alps_fields));
                break;
        }
 
-       f->left = !!(SS4_BTN_V2(p) & 0x01);
-       if (!(priv->flags & ALPS_BUTTONPAD)) {
-               f->right = !!(SS4_BTN_V2(p) & 0x02);
-               f->middle = !!(SS4_BTN_V2(p) & 0x04);
+       /* handle buttons */
+       if (pkt_id == SS4_PACKET_ID_STICK) {
+               f->ts_left = !!(SS4_BTN_V2(p) & 0x01);
+               if (!(priv->flags & ALPS_BUTTONPAD)) {
+                       f->ts_right = !!(SS4_BTN_V2(p) & 0x02);
+                       f->ts_middle = !!(SS4_BTN_V2(p) & 0x04);
+               }
+       } else {
+               f->left = !!(SS4_BTN_V2(p) & 0x01);
+               if (!(priv->flags & ALPS_BUTTONPAD)) {
+                       f->right = !!(SS4_BTN_V2(p) & 0x02);
+                       f->middle = !!(SS4_BTN_V2(p) & 0x04);
+               }
        }
 
        return 0;
@@ -1266,6 +1310,7 @@ static void alps_process_packet_ss4_v2(struct psmouse *psmouse)
        struct alps_data *priv = psmouse->private;
        unsigned char *packet = psmouse->packet;
        struct input_dev *dev = psmouse->dev;
+       struct input_dev *dev2 = priv->dev2;
        struct alps_fields *f = &priv->f;
 
        memset(f, 0, sizeof(struct alps_fields));
@@ -1311,6 +1356,13 @@ static void alps_process_packet_ss4_v2(struct psmouse *psmouse)
 
        input_report_abs(dev, ABS_PRESSURE, f->pressure);
        input_sync(dev);
+
+       if (priv->flags & ALPS_DUALPOINT) {
+               input_report_key(dev2, BTN_LEFT, f->ts_left);
+               input_report_key(dev2, BTN_RIGHT, f->ts_right);
+               input_report_key(dev2, BTN_MIDDLE, f->ts_middle);
+               input_sync(dev2);
+       }
 }
 
 static bool alps_is_valid_package_ss4_v2(struct psmouse *psmouse)
@@ -2695,6 +2747,10 @@ static int alps_set_protocol(struct psmouse *psmouse,
                if (alps_set_defaults_ss4_v2(psmouse, priv))
                        return -EIO;
 
+               if (priv->fw_ver[1] == 0x1)
+                       priv->flags |= ALPS_DUALPOINT |
+                                       ALPS_DUALPOINT_WITH_PRESSURE;
+
                break;
        }
 
@@ -2767,6 +2823,9 @@ static int alps_identify(struct psmouse *psmouse, struct alps_data *priv)
                } else if (e7[0] == 0x73 && e7[1] == 0x03 &&
                           e7[2] == 0x14 && ec[1] == 0x02) {
                        protocol = &alps_v8_protocol_data;
+               } else if (e7[0] == 0x73 && e7[1] == 0x03 &&
+                          e7[2] == 0x28 && ec[1] == 0x01) {
+                       protocol = &alps_v8_protocol_data;
                } else {
                        psmouse_dbg(psmouse,
                                    "Likely not an ALPS touchpad: E7=%3ph, EC=%3ph\n", e7, ec);
@@ -2949,6 +3008,10 @@ int alps_init(struct psmouse *psmouse)
 
                input_set_capability(dev2, EV_REL, REL_X);
                input_set_capability(dev2, EV_REL, REL_Y);
+               if (priv->flags & ALPS_DUALPOINT_WITH_PRESSURE) {
+                       input_set_capability(dev2, EV_ABS, ABS_PRESSURE);
+                       input_set_abs_params(dev2, ABS_PRESSURE, 0, 127, 0, 0);
+               }
                input_set_capability(dev2, EV_KEY, BTN_LEFT);
                input_set_capability(dev2, EV_KEY, BTN_RIGHT);
                input_set_capability(dev2, EV_KEY, BTN_MIDDLE);
index d37f814dc4478186eb3db8694d84bb2b0e7a9175..b9417e2d7ad3a7ba01aff4d7c5504a619090b0b5 100644 (file)
  *  or there's button activities.
  * SS4_PACKET_ID_TWO: There's two or more fingers on touchpad
  * SS4_PACKET_ID_MULTI: There's three or more fingers on touchpad
+ * SS4_PACKET_ID_STICK: A stick pointer packet
 */
 enum SS4_PACKET_ID {
        SS4_PACKET_ID_IDLE = 0,
        SS4_PACKET_ID_ONE,
        SS4_PACKET_ID_TWO,
        SS4_PACKET_ID_MULTI,
+       SS4_PACKET_ID_STICK,
 };
 
 #define SS4_COUNT_PER_ELECTRODE                256
index 08e252a424802df0885711509e3e26528ec7b7ba..db7d1d666ac1092e23806793e3c3b4b8e292a766 100644 (file)
@@ -1134,7 +1134,7 @@ static int elantech_get_resolution_v4(struct psmouse *psmouse,
  * System76 Pangolin       0x250f01        ?               2 hw buttons
  * (*) + 3 trackpoint buttons
  * (**) + 0 trackpoint buttons
- * Note: Lenovo L430 and Lenovo L430 have the same fw_version/caps
+ * Note: Lenovo L430 and Lenovo L530 have the same fw_version/caps
  */
 static void elantech_set_buttonpad_prop(struct psmouse *psmouse)
 {
@@ -1159,6 +1159,13 @@ static const struct dmi_system_id elantech_dmi_has_middle_button[] = {
                        DMI_MATCH(DMI_PRODUCT_NAME, "CELSIUS H730"),
                },
        },
+       {
+               /* Fujitsu H760 also has a middle button */
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "CELSIUS H760"),
+               },
+       },
 #endif
        { }
 };
@@ -1503,10 +1510,10 @@ static const struct dmi_system_id elantech_dmi_force_crc_enabled[] = {
                },
        },
        {
-               /* Fujitsu LIFEBOOK E554  does not work with crc_enabled == 0 */
+               /* Fujitsu H760 does not work with crc_enabled == 0 */
                .matches = {
                        DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"),
-                       DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK E554"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "CELSIUS H760"),
                },
        },
        {
@@ -1516,6 +1523,20 @@ static const struct dmi_system_id elantech_dmi_force_crc_enabled[] = {
                        DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK E544"),
                },
        },
+       {
+               /* Fujitsu LIFEBOOK E554  does not work with crc_enabled == 0 */
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK E554"),
+               },
+       },
+       {
+               /* Fujitsu LIFEBOOK E556 does not work with crc_enabled == 0 */
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK E556"),
+               },
+       },
        {
                /* Fujitsu LIFEBOOK U745 does not work with crc_enabled == 0 */
                .matches = {
index 54eceb30ede5090e67415a7e70ae5f43d7dd9b3f..a7d39689bbfb6fc7df84f6a01a8b78bdc0cc6a99 100644 (file)
@@ -43,7 +43,7 @@ int focaltech_detect(struct psmouse *psmouse, bool set_properties)
 
        if (set_properties) {
                psmouse->vendor = "FocalTech";
-               psmouse->name = "FocalTech Touchpad";
+               psmouse->name = "Touchpad";
        }
 
        return 0;
@@ -146,8 +146,8 @@ static void focaltech_report_state(struct psmouse *psmouse)
        }
        input_mt_report_pointer_emulation(dev, true);
 
-       input_report_key(psmouse->dev, BTN_LEFT, state->pressed);
-       input_sync(psmouse->dev);
+       input_report_key(dev, BTN_LEFT, state->pressed);
+       input_sync(dev);
 }
 
 static void focaltech_process_touch_packet(struct psmouse *psmouse,
index 6f2e0e4f0296999885a40e382bc319ba15fc0253..1ebc2c1debae31e5d1479466085a9d7fbcfc586f 100644 (file)
@@ -221,6 +221,21 @@ static const struct of_device_id rmi_i2c_of_match[] = {
 MODULE_DEVICE_TABLE(of, rmi_i2c_of_match);
 #endif
 
+static void rmi_i2c_regulator_bulk_disable(void *data)
+{
+       struct rmi_i2c_xport *rmi_i2c = data;
+
+       regulator_bulk_disable(ARRAY_SIZE(rmi_i2c->supplies),
+                              rmi_i2c->supplies);
+}
+
+static void rmi_i2c_unregister_transport(void *data)
+{
+       struct rmi_i2c_xport *rmi_i2c = data;
+
+       rmi_unregister_transport_device(&rmi_i2c->xport);
+}
+
 static int rmi_i2c_probe(struct i2c_client *client,
                         const struct i2c_device_id *id)
 {
@@ -264,6 +279,12 @@ static int rmi_i2c_probe(struct i2c_client *client,
        if (retval < 0)
                return retval;
 
+       retval = devm_add_action_or_reset(&client->dev,
+                                         rmi_i2c_regulator_bulk_disable,
+                                         rmi_i2c);
+       if (retval)
+               return retval;
+
        of_property_read_u32(client->dev.of_node, "syna,startup-delay-ms",
                             &rmi_i2c->startup_delay);
 
@@ -294,6 +315,11 @@ static int rmi_i2c_probe(struct i2c_client *client,
                        client->addr);
                return retval;
        }
+       retval = devm_add_action_or_reset(&client->dev,
+                                         rmi_i2c_unregister_transport,
+                                         rmi_i2c);
+       if (retval)
+               return retval;
 
        retval = rmi_i2c_init_irq(client);
        if (retval < 0)
@@ -304,17 +330,6 @@ static int rmi_i2c_probe(struct i2c_client *client,
        return 0;
 }
 
-static int rmi_i2c_remove(struct i2c_client *client)
-{
-       struct rmi_i2c_xport *rmi_i2c = i2c_get_clientdata(client);
-
-       rmi_unregister_transport_device(&rmi_i2c->xport);
-       regulator_bulk_disable(ARRAY_SIZE(rmi_i2c->supplies),
-                              rmi_i2c->supplies);
-
-       return 0;
-}
-
 #ifdef CONFIG_PM_SLEEP
 static int rmi_i2c_suspend(struct device *dev)
 {
@@ -431,7 +446,6 @@ static struct i2c_driver rmi_i2c_driver = {
        },
        .id_table       = rmi_id,
        .probe          = rmi_i2c_probe,
-       .remove         = rmi_i2c_remove,
 };
 
 module_i2c_driver(rmi_i2c_driver);
index 55bd1b34970c90a62005264e257ed774280cfd22..4ebef607e2141ad574fe161366308c08e101dc68 100644 (file)
@@ -396,6 +396,13 @@ static inline int rmi_spi_of_probe(struct spi_device *spi,
 }
 #endif
 
+static void rmi_spi_unregister_transport(void *data)
+{
+       struct rmi_spi_xport *rmi_spi = data;
+
+       rmi_unregister_transport_device(&rmi_spi->xport);
+}
+
 static int rmi_spi_probe(struct spi_device *spi)
 {
        struct rmi_spi_xport *rmi_spi;
@@ -464,6 +471,11 @@ static int rmi_spi_probe(struct spi_device *spi)
                dev_err(&spi->dev, "failed to register transport.\n");
                return retval;
        }
+       retval = devm_add_action_or_reset(&spi->dev,
+                                         rmi_spi_unregister_transport,
+                                         rmi_spi);
+       if (retval)
+               return retval;
 
        retval = rmi_spi_init_irq(spi);
        if (retval < 0)
@@ -473,15 +485,6 @@ static int rmi_spi_probe(struct spi_device *spi)
        return 0;
 }
 
-static int rmi_spi_remove(struct spi_device *spi)
-{
-       struct rmi_spi_xport *rmi_spi = spi_get_drvdata(spi);
-
-       rmi_unregister_transport_device(&rmi_spi->xport);
-
-       return 0;
-}
-
 #ifdef CONFIG_PM_SLEEP
 static int rmi_spi_suspend(struct device *dev)
 {
@@ -577,7 +580,6 @@ static struct spi_driver rmi_spi_driver = {
        },
        .id_table       = rmi_id,
        .probe          = rmi_spi_probe,
-       .remove         = rmi_spi_remove,
 };
 
 module_spi_driver(rmi_spi_driver);
index a5eed2ade53de3a4e1d06776288ec17baba52530..34da81c006b6d80d3df98feffc7651d8eaa3f2c8 100644 (file)
@@ -81,7 +81,7 @@ static inline int i8042_platform_init(void)
                return -EBUSY;
 #endif
 
-       i8042_reset = 1;
+       i8042_reset = I8042_RESET_ALWAYS;
        return 0;
 }
 
index ee1ad27d6ed06ef70370f3096352b37acb1c1368..08a1c10a1448d12db092a1c3f7525e4f5f1b76a2 100644 (file)
@@ -61,7 +61,7 @@ static inline int i8042_platform_init(void)
                return -EBUSY;
 #endif
 
-       i8042_reset = 1;
+       i8042_reset = I8042_RESET_ALWAYS;
 
        return 0;
 }
index f708c75d16f1d919f4e054c94437fb6e4e587866..1aabea43329edf56f2067c73eafca4d140d3ac4b 100644 (file)
@@ -44,7 +44,7 @@ static inline void i8042_write_command(int val)
 
 static inline int i8042_platform_init(void)
 {
-       i8042_reset = 1;
+       i8042_reset = I8042_RESET_ALWAYS;
        return 0;
 }
 
index afcd1c1a05b272b1d615b481f2de9c0a812a2889..6231d63860ee324031d36e6e1c42dd57407dc27f 100644 (file)
@@ -130,7 +130,7 @@ static int __init i8042_platform_init(void)
                }
        }
 
-       i8042_reset = 1;
+       i8042_reset = I8042_RESET_ALWAYS;
 
        return 0;
 }
index 73f5cc124a3606a5cb73406e207d8a27fd4c339e..455747552f858a8819ec18d03317980352e514ea 100644 (file)
@@ -61,7 +61,7 @@ static inline int i8042_platform_init(void)
        if (!request_mem_region(I8042_REGION_START, I8042_REGION_SIZE, "i8042"))
                return -EBUSY;
 
-       i8042_reset = 1;
+       i8042_reset = I8042_RESET_ALWAYS;
        return 0;
 }
 
index 68f5f4a0f1e72f10b35e45243218e6cfbf3d586e..073246c7d1634eef411c1bf07d7f72ab47cad369 100644 (file)
@@ -510,6 +510,90 @@ static const struct dmi_system_id __initconst i8042_dmi_nomux_table[] = {
        { }
 };
 
+/*
+ * On some Asus laptops, just running self tests cause problems.
+ */
+static const struct dmi_system_id i8042_dmi_noselftest_table[] = {
+       {
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "A455LD"),
+               },
+       },
+       {
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "K401LB"),
+               },
+       },
+       {
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "K501LB"),
+               },
+       },
+       {
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "K501LX"),
+               },
+       },
+       {
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "R409L"),
+               },
+       },
+       {
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "V502LX"),
+               },
+       },
+       {
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "X302LA"),
+               },
+       },
+       {
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "X450LCP"),
+               },
+       },
+       {
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "X450LD"),
+               },
+       },
+       {
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "X455LAB"),
+               },
+       },
+       {
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "X455LDB"),
+               },
+       },
+       {
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "X455LF"),
+               },
+       },
+       {
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "Z450LA"),
+               },
+       },
+       { }
+};
 static const struct dmi_system_id __initconst i8042_dmi_reset_table[] = {
        {
                /* MSI Wind U-100 */
@@ -793,6 +877,13 @@ static const struct dmi_system_id __initconst i8042_dmi_kbdreset_table[] = {
                        DMI_MATCH(DMI_PRODUCT_NAME, "P34"),
                },
        },
+       {
+               /* Schenker XMG C504 - Elantech touchpad */
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "XMG"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "C504"),
+               },
+       },
        { }
 };
 
@@ -1072,12 +1163,18 @@ static int __init i8042_platform_init(void)
                return retval;
 
 #if defined(__ia64__)
-        i8042_reset = true;
+        i8042_reset = I8042_RESET_ALWAYS;
 #endif
 
 #ifdef CONFIG_X86
-       if (dmi_check_system(i8042_dmi_reset_table))
-               i8042_reset = true;
+       /* Honor module parameter when value is not default */
+       if (i8042_reset == I8042_RESET_DEFAULT) {
+               if (dmi_check_system(i8042_dmi_reset_table))
+                       i8042_reset = I8042_RESET_ALWAYS;
+
+               if (dmi_check_system(i8042_dmi_noselftest_table))
+                       i8042_reset = I8042_RESET_NEVER;
+       }
 
        if (dmi_check_system(i8042_dmi_noloop_table))
                i8042_noloop = true;
index 405252a884dd41e233da4399ab109f213becb85a..89abfdb539ac750ff50eca67f77b2fe6898ff6f0 100644 (file)
@@ -48,9 +48,39 @@ static bool i8042_unlock;
 module_param_named(unlock, i8042_unlock, bool, 0);
 MODULE_PARM_DESC(unlock, "Ignore keyboard lock.");
 
-static bool i8042_reset;
-module_param_named(reset, i8042_reset, bool, 0);
-MODULE_PARM_DESC(reset, "Reset controller during init and cleanup.");
+enum i8042_controller_reset_mode {
+       I8042_RESET_NEVER,
+       I8042_RESET_ALWAYS,
+       I8042_RESET_ON_S2RAM,
+#define I8042_RESET_DEFAULT    I8042_RESET_ON_S2RAM
+};
+static enum i8042_controller_reset_mode i8042_reset = I8042_RESET_DEFAULT;
+static int i8042_set_reset(const char *val, const struct kernel_param *kp)
+{
+       enum i8042_controller_reset_mode *arg = kp->arg;
+       int error;
+       bool reset;
+
+       if (val) {
+               error = kstrtobool(val, &reset);
+               if (error)
+                       return error;
+       } else {
+               reset = true;
+       }
+
+       *arg = reset ? I8042_RESET_ALWAYS : I8042_RESET_NEVER;
+       return 0;
+}
+
+static const struct kernel_param_ops param_ops_reset_param = {
+       .flags = KERNEL_PARAM_OPS_FL_NOARG,
+       .set = i8042_set_reset,
+};
+#define param_check_reset_param(name, p)       \
+       __param_check(name, p, enum i8042_controller_reset_mode)
+module_param_named(reset, i8042_reset, reset_param, 0);
+MODULE_PARM_DESC(reset, "Reset controller on resume, cleanup or both");
 
 static bool i8042_direct;
 module_param_named(direct, i8042_direct, bool, 0);
@@ -1019,7 +1049,7 @@ static int i8042_controller_init(void)
  * Reset the controller and reset CRT to the original value set by BIOS.
  */
 
-static void i8042_controller_reset(bool force_reset)
+static void i8042_controller_reset(bool s2r_wants_reset)
 {
        i8042_flush();
 
@@ -1044,8 +1074,10 @@ static void i8042_controller_reset(bool force_reset)
  * Reset the controller if requested.
  */
 
-       if (i8042_reset || force_reset)
+       if (i8042_reset == I8042_RESET_ALWAYS ||
+           (i8042_reset == I8042_RESET_ON_S2RAM && s2r_wants_reset)) {
                i8042_controller_selftest();
+       }
 
 /*
  * Restore the original control register setting.
@@ -1110,7 +1142,7 @@ static void i8042_dritek_enable(void)
  * before suspending.
  */
 
-static int i8042_controller_resume(bool force_reset)
+static int i8042_controller_resume(bool s2r_wants_reset)
 {
        int error;
 
@@ -1118,7 +1150,8 @@ static int i8042_controller_resume(bool force_reset)
        if (error)
                return error;
 
-       if (i8042_reset || force_reset) {
+       if (i8042_reset == I8042_RESET_ALWAYS ||
+           (i8042_reset == I8042_RESET_ON_S2RAM && s2r_wants_reset)) {
                error = i8042_controller_selftest();
                if (error)
                        return error;
@@ -1195,7 +1228,7 @@ static int i8042_pm_resume_noirq(struct device *dev)
 
 static int i8042_pm_resume(struct device *dev)
 {
-       bool force_reset;
+       bool want_reset;
        int i;
 
        for (i = 0; i < I8042_NUM_PORTS; i++) {
@@ -1218,9 +1251,9 @@ static int i8042_pm_resume(struct device *dev)
         * off control to the platform firmware, otherwise we can simply restore
         * the mode.
         */
-       force_reset = pm_resume_via_firmware();
+       want_reset = pm_resume_via_firmware();
 
-       return i8042_controller_resume(force_reset);
+       return i8042_controller_resume(want_reset);
 }
 
 static int i8042_pm_thaw(struct device *dev)
@@ -1482,7 +1515,7 @@ static int __init i8042_probe(struct platform_device *dev)
 
        i8042_platform_device = dev;
 
-       if (i8042_reset) {
+       if (i8042_reset == I8042_RESET_ALWAYS) {
                error = i8042_controller_selftest();
                if (error)
                        return error;
index fb5fb9140ca9536c59f0163ccd4cb5e8b8f4d9ac..552a3773f79d0b7815d6d180f7aabc9083ff6c92 100644 (file)
@@ -157,6 +157,7 @@ struct mip4_ts {
 
        char phys[32];
        char product_name[16];
+       char ic_name[4];
 
        unsigned int max_x;
        unsigned int max_y;
@@ -263,6 +264,18 @@ static int mip4_query_device(struct mip4_ts *ts)
                dev_dbg(&ts->client->dev, "product name: %.*s\n",
                        (int)sizeof(ts->product_name), ts->product_name);
 
+       /* IC name */
+       cmd[0] = MIP4_R0_INFO;
+       cmd[1] = MIP4_R1_INFO_IC_NAME;
+       error = mip4_i2c_xfer(ts, cmd, sizeof(cmd),
+                             ts->ic_name, sizeof(ts->ic_name));
+       if (error)
+               dev_warn(&ts->client->dev,
+                        "Failed to retrieve IC name: %d\n", error);
+       else
+               dev_dbg(&ts->client->dev, "IC name: %.*s\n",
+                       (int)sizeof(ts->ic_name), ts->ic_name);
+
        /* Firmware version */
        error = mip4_get_fw_version(ts);
        if (error)
@@ -1326,7 +1339,7 @@ static ssize_t mip4_sysfs_read_hw_version(struct device *dev,
         * paired with current firmware in the chip.
         */
        count = snprintf(buf, PAGE_SIZE, "%.*s\n",
-               (int)sizeof(ts->product_name), ts->product_name);
+                        (int)sizeof(ts->product_name), ts->product_name);
 
        mutex_unlock(&ts->input->mutex);
 
@@ -1335,9 +1348,30 @@ static ssize_t mip4_sysfs_read_hw_version(struct device *dev,
 
 static DEVICE_ATTR(hw_version, S_IRUGO, mip4_sysfs_read_hw_version, NULL);
 
+static ssize_t mip4_sysfs_read_ic_name(struct device *dev,
+                                         struct device_attribute *attr,
+                                         char *buf)
+{
+       struct i2c_client *client = to_i2c_client(dev);
+       struct mip4_ts *ts = i2c_get_clientdata(client);
+       size_t count;
+
+       mutex_lock(&ts->input->mutex);
+
+       count = snprintf(buf, PAGE_SIZE, "%.*s\n",
+                        (int)sizeof(ts->ic_name), ts->ic_name);
+
+       mutex_unlock(&ts->input->mutex);
+
+       return count;
+}
+
+static DEVICE_ATTR(ic_name, S_IRUGO, mip4_sysfs_read_ic_name, NULL);
+
 static struct attribute *mip4_attrs[] = {
        &dev_attr_fw_version.attr,
        &dev_attr_hw_version.attr,
+       &dev_attr_ic_name.attr,
        &dev_attr_update_fw.attr,
        NULL,
 };
@@ -1538,6 +1572,6 @@ static struct i2c_driver mip4_driver = {
 module_i2c_driver(mip4_driver);
 
 MODULE_DESCRIPTION("MELFAS MIP4 Touchscreen");
-MODULE_VERSION("2016.03.12");
+MODULE_VERSION("2016.09.28");
 MODULE_AUTHOR("Sangwon Jee <jeesw@melfas.com>");
 MODULE_LICENSE("GPL");
index 15c01c3cd540b6b0416002ca39c8a72951b3cc75..e6f9b2d745ca0eefbe02cb5e9c2680dcff60af46 100644 (file)
@@ -2636,17 +2636,26 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev)
        /* And we're up. Go go go! */
        of_iommu_set_ops(dev->of_node, &arm_smmu_ops);
 #ifdef CONFIG_PCI
-       pci_request_acs();
-       ret = bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
-       if (ret)
-               return ret;
+       if (pci_bus_type.iommu_ops != &arm_smmu_ops) {
+               pci_request_acs();
+               ret = bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
+               if (ret)
+                       return ret;
+       }
 #endif
 #ifdef CONFIG_ARM_AMBA
-       ret = bus_set_iommu(&amba_bustype, &arm_smmu_ops);
-       if (ret)
-               return ret;
+       if (amba_bustype.iommu_ops != &arm_smmu_ops) {
+               ret = bus_set_iommu(&amba_bustype, &arm_smmu_ops);
+               if (ret)
+                       return ret;
+       }
 #endif
-       return bus_set_iommu(&platform_bus_type, &arm_smmu_ops);
+       if (platform_bus_type.iommu_ops != &arm_smmu_ops) {
+               ret = bus_set_iommu(&platform_bus_type, &arm_smmu_ops);
+               if (ret)
+                       return ret;
+       }
+       return 0;
 }
 
 static int arm_smmu_device_remove(struct platform_device *pdev)
index c841eb7a1a7417af301e6c51a9ba464d05b1472a..8f7281444551dfc8a7b1a1bbfdb66a81f28e01ca 100644 (file)
@@ -324,8 +324,10 @@ struct arm_smmu_master_cfg {
 #define INVALID_SMENDX                 -1
 #define __fwspec_cfg(fw) ((struct arm_smmu_master_cfg *)fw->iommu_priv)
 #define fwspec_smmu(fw)  (__fwspec_cfg(fw)->smmu)
+#define fwspec_smendx(fw, i) \
+       (i >= fw->num_ids ? INVALID_SMENDX : __fwspec_cfg(fw)->smendx[i])
 #define for_each_cfg_sme(fw, i, idx) \
-       for (i = 0; idx = __fwspec_cfg(fw)->smendx[i], i < fw->num_ids; ++i)
+       for (i = 0; idx = fwspec_smendx(fw, i), i < fw->num_ids; ++i)
 
 struct arm_smmu_device {
        struct device                   *dev;
@@ -1228,6 +1230,16 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
                return -ENXIO;
        }
 
+       /*
+        * FIXME: The arch/arm DMA API code tries to attach devices to its own
+        * domains between of_xlate() and add_device() - we have no way to cope
+        * with that, so until ARM gets converted to rely on groups and default
+        * domains, just say no (but more politely than by dereferencing NULL).
+        * This should be at least a WARN_ON once that's sorted.
+        */
+       if (!fwspec->iommu_priv)
+               return -ENODEV;
+
        smmu = fwspec_smmu(fwspec);
        /* Ensure that the domain is finalised */
        ret = arm_smmu_init_domain_context(domain, smmu);
@@ -1390,7 +1402,7 @@ static int arm_smmu_add_device(struct device *dev)
                fwspec = dev->iommu_fwspec;
                if (ret)
                        goto out_free;
-       } else if (fwspec) {
+       } else if (fwspec && fwspec->ops == &arm_smmu_ops) {
                smmu = arm_smmu_get_by_node(to_of_node(fwspec->iommu_fwnode));
        } else {
                return -ENODEV;
index a4407eabf0e64fbacba5573bc3eb91204c97a19c..3965e73db51cef0f3d3b5593da18c9d9fe25fd9e 100644 (file)
@@ -1711,6 +1711,7 @@ static void disable_dmar_iommu(struct intel_iommu *iommu)
        if (!iommu->domains || !iommu->domain_ids)
                return;
 
+again:
        spin_lock_irqsave(&device_domain_lock, flags);
        list_for_each_entry_safe(info, tmp, &device_domain_list, global) {
                struct dmar_domain *domain;
@@ -1723,10 +1724,19 @@ static void disable_dmar_iommu(struct intel_iommu *iommu)
 
                domain = info->domain;
 
-               dmar_remove_one_dev_info(domain, info->dev);
+               __dmar_remove_one_dev_info(info);
 
-               if (!domain_type_is_vm_or_si(domain))
+               if (!domain_type_is_vm_or_si(domain)) {
+                       /*
+                        * The domain_exit() function  can't be called under
+                        * device_domain_lock, as it takes this lock itself.
+                        * So release the lock here and re-run the loop
+                        * afterwards.
+                        */
+                       spin_unlock_irqrestore(&device_domain_lock, flags);
                        domain_exit(domain);
+                       goto again;
+               }
        }
        spin_unlock_irqrestore(&device_domain_lock, flags);
 
index c0e7b624ce5475aed0022d3b6f79a043110740ff..12102448fdddf1371f5d561c81e04a55091e8b07 100644 (file)
@@ -178,7 +178,7 @@ static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
                       idev->id_vendor, idev->id_device);
 }
 
-ipack_device_attr(id_format, "0x%hhu\n");
+ipack_device_attr(id_format, "0x%hhx\n");
 
 static DEVICE_ATTR_RO(id);
 static DEVICE_ATTR_RO(id_device);
index 82b0b5daf3f5b2af6a91c18923271dc7647a7cef..bc0af3307bbfd6612d2ae18d37a15c8d87a6ea39 100644 (file)
@@ -158,8 +158,8 @@ config PIC32_EVIC
        select IRQ_DOMAIN
 
 config JCORE_AIC
-       bool "J-Core integrated AIC"
-       depends on OF && (SUPERH || COMPILE_TEST)
+       bool "J-Core integrated AIC" if COMPILE_TEST
+       depends on OF
        select IRQ_DOMAIN
        help
          Support for the J-Core integrated AIC.
index efbf0e4304b7073d5aa12bfa97a1d727aace81b7..2a7a38830a8df3ef08fdeebe6f98cd24191f8808 100644 (file)
@@ -85,7 +85,7 @@ static void nps400_irq_eoi_global(struct irq_data *irqd)
        nps_ack_gic();
 }
 
-static void nps400_irq_eoi(struct irq_data *irqd)
+static void nps400_irq_ack(struct irq_data *irqd)
 {
        unsigned int __maybe_unused irq = irqd_to_hwirq(irqd);
 
@@ -103,7 +103,7 @@ static struct irq_chip nps400_irq_chip_percpu = {
        .name           = "NPS400 IC",
        .irq_mask       = nps400_irq_mask,
        .irq_unmask     = nps400_irq_unmask,
-       .irq_eoi        = nps400_irq_eoi,
+       .irq_ack        = nps400_irq_ack,
 };
 
 static int nps400_irq_map(struct irq_domain *d, unsigned int virq,
@@ -135,7 +135,7 @@ static const struct irq_domain_ops nps400_irq_ops = {
 static int __init nps400_of_init(struct device_node *node,
                                 struct device_node *parent)
 {
-       static struct irq_domain *nps400_root_domain;
+       struct irq_domain *nps400_root_domain;
 
        if (parent) {
                pr_err("DeviceTree incore ic not a root irq controller\n");
index 003495d91f9cfd34ea77eec0b52a4f070e58bfd5..c5dee300e8a3e1a40c30d22bba5e1a968e83a21a 100644 (file)
@@ -1023,7 +1023,7 @@ static void its_free_tables(struct its_node *its)
 
 static int its_alloc_tables(struct its_node *its)
 {
-       u64 typer = readq_relaxed(its->base + GITS_TYPER);
+       u64 typer = gic_read_typer(its->base + GITS_TYPER);
        u32 ids = GITS_TYPER_DEVBITS(typer);
        u64 shr = GITS_BASER_InnerShareable;
        u64 cache = GITS_BASER_WaWb;
@@ -1198,7 +1198,7 @@ static void its_cpu_init_collection(void)
                 * We now have to bind each collection to its target
                 * redistributor.
                 */
-               if (readq_relaxed(its->base + GITS_TYPER) & GITS_TYPER_PTA) {
+               if (gic_read_typer(its->base + GITS_TYPER) & GITS_TYPER_PTA) {
                        /*
                         * This ITS wants the physical address of the
                         * redistributor.
@@ -1208,7 +1208,7 @@ static void its_cpu_init_collection(void)
                        /*
                         * This ITS wants a linear CPU number.
                         */
-                       target = readq_relaxed(gic_data_rdist_rd_base() + GICR_TYPER);
+                       target = gic_read_typer(gic_data_rdist_rd_base() + GICR_TYPER);
                        target = GICR_TYPER_CPU_NUMBER(target) << 16;
                }
 
@@ -1691,7 +1691,7 @@ static int __init its_probe_one(struct resource *res,
        INIT_LIST_HEAD(&its->its_device_list);
        its->base = its_base;
        its->phys_base = res->start;
-       its->ite_size = ((readl_relaxed(its_base + GITS_TYPER) >> 4) & 0xf) + 1;
+       its->ite_size = ((gic_read_typer(its_base + GITS_TYPER) >> 4) & 0xf) + 1;
        its->numa_node = numa_node;
 
        its->cmd_base = kzalloc(ITS_CMD_QUEUE_SZ, GFP_KERNEL);
@@ -1763,7 +1763,7 @@ out_unmap:
 
 static bool gic_rdists_supports_plpis(void)
 {
-       return !!(readl_relaxed(gic_data_rdist_rd_base() + GICR_TYPER) & GICR_TYPER_PLPIS);
+       return !!(gic_read_typer(gic_data_rdist_rd_base() + GICR_TYPER) & GICR_TYPER_PLPIS);
 }
 
 int its_cpu_init(void)
index 9b81bd8b929c0bf925d6e3fe578d03b0c2af7afd..19d642eae096b2495f5707470ea5f857d2f9b876 100644 (file)
@@ -153,7 +153,7 @@ static void gic_enable_redist(bool enable)
                        return; /* No PM support in this redistributor */
        }
 
-       while (count--) {
+       while (--count) {
                val = readl_relaxed(rbase + GICR_WAKER);
                if (enable ^ (bool)(val & GICR_WAKER_ChildrenAsleep))
                        break;
index 58e5b4e870561c6361ff50a9e5869cf8abd4e356..d6c404b3584d5118799f8376d384c2f71b728a69 100644 (file)
@@ -1279,7 +1279,7 @@ static bool gic_check_eoimode(struct device_node *node, void __iomem **base)
                 */
                *base += 0xf000;
                cpuif_res.start += 0xf000;
-               pr_warn("GIC: Adjusting CPU interface base to %pa",
+               pr_warn("GIC: Adjusting CPU interface base to %pa\n",
                        &cpuif_res.start);
        }
 
index 6b304eb39bd22c96b64c6a99629ef4195db721b5..1aec12c6d9ac83cb4338d4c0c44a78b83c1e01d8 100644 (file)
@@ -38,6 +38,7 @@ static void disable_8259A_irq(struct irq_data *d);
 static void enable_8259A_irq(struct irq_data *d);
 static void mask_and_ack_8259A(struct irq_data *d);
 static void init_8259A(int auto_eoi);
+static int (*i8259_poll)(void) = i8259_irq;
 
 static struct irq_chip i8259A_chip = {
        .name                   = "XT-PIC",
@@ -51,6 +52,11 @@ static struct irq_chip i8259A_chip = {
  * 8259A PIC functions to handle ISA devices:
  */
 
+void i8259_set_poll(int (*poll)(void))
+{
+       i8259_poll = poll;
+}
+
 /*
  * This contains the irq mask for both 8259A irq controllers,
  */
@@ -89,24 +95,6 @@ static void enable_8259A_irq(struct irq_data *d)
        raw_spin_unlock_irqrestore(&i8259A_lock, flags);
 }
 
-int i8259A_irq_pending(unsigned int irq)
-{
-       unsigned int mask;
-       unsigned long flags;
-       int ret;
-
-       irq -= I8259A_IRQ_BASE;
-       mask = 1 << irq;
-       raw_spin_lock_irqsave(&i8259A_lock, flags);
-       if (irq < 8)
-               ret = inb(PIC_MASTER_CMD) & mask;
-       else
-               ret = inb(PIC_SLAVE_CMD) & (mask >> 8);
-       raw_spin_unlock_irqrestore(&i8259A_lock, flags);
-
-       return ret;
-}
-
 void make_8259A_irq(unsigned int irq)
 {
        disable_irq_nosync(irq);
@@ -355,7 +343,7 @@ void __init init_i8259_irqs(void)
 static void i8259_irq_dispatch(struct irq_desc *desc)
 {
        struct irq_domain *domain = irq_desc_get_handler_data(desc);
-       int hwirq = i8259_irq();
+       int hwirq = i8259_poll();
        unsigned int irq;
 
        if (hwirq < 0)
@@ -370,13 +358,15 @@ int __init i8259_of_init(struct device_node *node, struct device_node *parent)
        struct irq_domain *domain;
        unsigned int parent_irq;
 
+       domain = __init_i8259_irqs(node);
+
        parent_irq = irq_of_parse_and_map(node, 0);
        if (!parent_irq) {
                pr_err("Failed to map i8259 parent IRQ\n");
+               irq_domain_remove(domain);
                return -ENODEV;
        }
 
-       domain = __init_i8259_irqs(node);
        irq_set_chained_handler_and_data(parent_irq, i8259_irq_dispatch,
                                         domain);
        return 0;
index 84b01dec277dfee6f35c286c43448ed258a48199..033bccb41455c46c32d5473fa46940c2770973e9 100644 (file)
 
 static struct irq_chip jcore_aic;
 
+/*
+ * The J-Core AIC1 and AIC2 are cpu-local interrupt controllers and do
+ * not distinguish or use distinct irq number ranges for per-cpu event
+ * interrupts (timer, IPI). Since information to determine whether a
+ * particular irq number should be treated as per-cpu is not available
+ * at mapping time, we use a wrapper handler function which chooses
+ * the right handler at runtime based on whether IRQF_PERCPU was used
+ * when requesting the irq.
+ */
+
+static void handle_jcore_irq(struct irq_desc *desc)
+{
+       if (irqd_is_per_cpu(irq_desc_get_irq_data(desc)))
+               handle_percpu_irq(desc);
+       else
+               handle_simple_irq(desc);
+}
+
 static int jcore_aic_irqdomain_map(struct irq_domain *d, unsigned int irq,
                                   irq_hw_number_t hwirq)
 {
        struct irq_chip *aic = d->host_data;
 
-       irq_set_chip_and_handler(irq, aic, handle_simple_irq);
+       irq_set_chip_and_handler(irq, aic, handle_jcore_irq);
 
        return 0;
 }
index 08c87fadca8ce610e9baff8706dabe2fb7ac82a4..1f32688c312d717639ecfe7de3ca94b35d31a637 100644 (file)
@@ -65,6 +65,7 @@
 #include <linux/mailbox_controller.h>
 #include <linux/mailbox_client.h>
 #include <linux/io-64-nonatomic-lo-hi.h>
+#include <acpi/pcc.h>
 
 #include "mailbox.h"
 
@@ -267,6 +268,8 @@ struct mbox_chan *pcc_mbox_request_channel(struct mbox_client *cl,
        if (chan->txdone_method == TXDONE_BY_POLL && cl->knows_txdone)
                chan->txdone_method |= TXDONE_BY_ACK;
 
+       spin_unlock_irqrestore(&chan->lock, flags);
+
        if (pcc_doorbell_irq[subspace_id] > 0) {
                int rc;
 
@@ -275,12 +278,11 @@ struct mbox_chan *pcc_mbox_request_channel(struct mbox_client *cl,
                if (unlikely(rc)) {
                        dev_err(dev, "failed to register PCC interrupt %d\n",
                                pcc_doorbell_irq[subspace_id]);
+                       pcc_mbox_free_channel(chan);
                        chan = ERR_PTR(rc);
                }
        }
 
-       spin_unlock_irqrestore(&chan->lock, flags);
-
        return chan;
 }
 EXPORT_SYMBOL_GPL(pcc_mbox_request_channel);
@@ -304,20 +306,19 @@ void pcc_mbox_free_channel(struct mbox_chan *chan)
                return;
        }
 
+       if (pcc_doorbell_irq[id] > 0)
+               devm_free_irq(chan->mbox->dev, pcc_doorbell_irq[id], chan);
+
        spin_lock_irqsave(&chan->lock, flags);
        chan->cl = NULL;
        chan->active_req = NULL;
        if (chan->txdone_method == (TXDONE_BY_POLL | TXDONE_BY_ACK))
                chan->txdone_method = TXDONE_BY_POLL;
 
-       if (pcc_doorbell_irq[id] > 0)
-               devm_free_irq(chan->mbox->dev, pcc_doorbell_irq[id], chan);
-
        spin_unlock_irqrestore(&chan->lock, flags);
 }
 EXPORT_SYMBOL_GPL(pcc_mbox_free_channel);
 
-
 /**
  * pcc_send_data - Called from Mailbox Controller code. Used
  *             here only to ring the channel doorbell. The PCC client
index 8abde6b8cedc4540dac80b73256317fb671dbd69..6d53810963f7531a7e5048dad5c55ef53e8aa914 100644 (file)
@@ -266,7 +266,7 @@ static struct raid_type {
        {"raid10_offset", "raid10 offset (striped mirrors)",        0, 2, 10, ALGORITHM_RAID10_OFFSET},
        {"raid10_near",   "raid10 near (striped mirrors)",          0, 2, 10, ALGORITHM_RAID10_NEAR},
        {"raid10",        "raid10 (striped mirrors)",               0, 2, 10, ALGORITHM_RAID10_DEFAULT},
-       {"raid4",         "raid4 (dedicated last parity disk)",     1, 2, 4,  ALGORITHM_PARITY_N}, /* raid4 layout = raid5_n */
+       {"raid4",         "raid4 (dedicated first parity disk)",    1, 2, 5,  ALGORITHM_PARITY_0}, /* raid4 layout = raid5_0 */
        {"raid5_n",       "raid5 (dedicated last parity disk)",     1, 2, 5,  ALGORITHM_PARITY_N},
        {"raid5_ls",      "raid5 (left symmetric)",                 1, 2, 5,  ALGORITHM_LEFT_SYMMETRIC},
        {"raid5_rs",      "raid5 (right symmetric)",                1, 2, 5,  ALGORITHM_RIGHT_SYMMETRIC},
@@ -2087,11 +2087,11 @@ static int super_init_validation(struct raid_set *rs, struct md_rdev *rdev)
                /*
                 * No takeover/reshaping, because we don't have the extended v1.9.0 metadata
                 */
-               if (le32_to_cpu(sb->level) != mddev->level) {
+               if (le32_to_cpu(sb->level) != mddev->new_level) {
                        DMERR("Reshaping/takeover raid sets not yet supported. (raid level/stripes/size change)");
                        return -EINVAL;
                }
-               if (le32_to_cpu(sb->layout) != mddev->layout) {
+               if (le32_to_cpu(sb->layout) != mddev->new_layout) {
                        DMERR("Reshaping raid sets not yet supported. (raid layout change)");
                        DMERR("  0x%X vs 0x%X", le32_to_cpu(sb->layout), mddev->layout);
                        DMERR("  Old layout: %s w/ %d copies",
@@ -2102,7 +2102,7 @@ static int super_init_validation(struct raid_set *rs, struct md_rdev *rdev)
                              raid10_md_layout_to_copies(mddev->layout));
                        return -EINVAL;
                }
-               if (le32_to_cpu(sb->stripe_sectors) != mddev->chunk_sectors) {
+               if (le32_to_cpu(sb->stripe_sectors) != mddev->new_chunk_sectors) {
                        DMERR("Reshaping raid sets not yet supported. (stripe sectors change)");
                        return -EINVAL;
                }
@@ -2115,6 +2115,8 @@ static int super_init_validation(struct raid_set *rs, struct md_rdev *rdev)
                        return -EINVAL;
                }
 
+               DMINFO("Discovered old metadata format; upgrading to extended metadata format");
+
                /* Table line is checked vs. authoritative superblock */
                rs_set_new(rs);
        }
@@ -2258,7 +2260,8 @@ static int super_validate(struct raid_set *rs, struct md_rdev *rdev)
        if (!mddev->events && super_init_validation(rs, rdev))
                return -EINVAL;
 
-       if (le32_to_cpu(sb->compat_features) != FEATURE_FLAG_SUPPORTS_V190) {
+       if (le32_to_cpu(sb->compat_features) &&
+           le32_to_cpu(sb->compat_features) != FEATURE_FLAG_SUPPORTS_V190) {
                rs->ti->error = "Unable to assemble array: Unknown flag(s) in compatible feature flags";
                return -EINVAL;
        }
@@ -3646,7 +3649,7 @@ static void raid_resume(struct dm_target *ti)
 
 static struct target_type raid_target = {
        .name = "raid",
-       .version = {1, 9, 0},
+       .version = {1, 9, 1},
        .module = THIS_MODULE,
        .ctr = raid_ctr,
        .dtr = raid_dtr,
index bdf1606f67bcfbfcfadcfdc65f2c559c68fff5c9..9a8b71067c6eba52dca7a4050a75cc51199ffbe5 100644 (file)
@@ -145,7 +145,6 @@ static void dispatch_bios(void *context, struct bio_list *bio_list)
 
 struct dm_raid1_bio_record {
        struct mirror *m;
-       /* if details->bi_bdev == NULL, details were not saved */
        struct dm_bio_details details;
        region_t write_region;
 };
@@ -1200,8 +1199,6 @@ static int mirror_map(struct dm_target *ti, struct bio *bio)
        struct dm_raid1_bio_record *bio_record =
          dm_per_bio_data(bio, sizeof(struct dm_raid1_bio_record));
 
-       bio_record->details.bi_bdev = NULL;
-
        if (rw == WRITE) {
                /* Save region for mirror_end_io() handler */
                bio_record->write_region = dm_rh_bio_to_region(ms->rh, bio);
@@ -1260,22 +1257,12 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error)
        }
 
        if (error == -EOPNOTSUPP)
-               goto out;
+               return error;
 
        if ((error == -EWOULDBLOCK) && (bio->bi_opf & REQ_RAHEAD))
-               goto out;
+               return error;
 
        if (unlikely(error)) {
-               if (!bio_record->details.bi_bdev) {
-                       /*
-                        * There wasn't enough memory to record necessary
-                        * information for a retry or there was no other
-                        * mirror in-sync.
-                        */
-                       DMERR_LIMIT("Mirror read failed.");
-                       return -EIO;
-               }
-
                m = bio_record->m;
 
                DMERR("Mirror read failed from %s. Trying alternative device.",
@@ -1291,7 +1278,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error)
                        bd = &bio_record->details;
 
                        dm_bio_restore(bd, bio);
-                       bio_record->details.bi_bdev = NULL;
+                       bio->bi_error = 0;
 
                        queue_bio(ms, bio, rw);
                        return DM_ENDIO_INCOMPLETE;
@@ -1299,9 +1286,6 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error)
                DMERR("All replicated volumes dead, failing I/O");
        }
 
-out:
-       bio_record->details.bi_bdev = NULL;
-
        return error;
 }
 
index dc75bea0d541b3a2892d688c7eaf3093e1168790..1d0d2adc050a5539a4b430bd9e055f99e1abb5d7 100644 (file)
@@ -856,8 +856,11 @@ int dm_old_init_request_queue(struct mapped_device *md)
        kthread_init_worker(&md->kworker);
        md->kworker_task = kthread_run(kthread_worker_fn, &md->kworker,
                                       "kdmwork-%s", dm_device_name(md));
-       if (IS_ERR(md->kworker_task))
-               return PTR_ERR(md->kworker_task);
+       if (IS_ERR(md->kworker_task)) {
+               int error = PTR_ERR(md->kworker_task);
+               md->kworker_task = NULL;
+               return error;
+       }
 
        elv_register_queue(md->queue);
 
index 3e407a9cde1f190baade6d22e78c30ae5b27b438..c4b53b332607bec3f303671da385656713498e04 100644 (file)
@@ -695,37 +695,32 @@ int dm_table_add_target(struct dm_table *t, const char *type,
 
        tgt->type = dm_get_target_type(type);
        if (!tgt->type) {
-               DMERR("%s: %s: unknown target type", dm_device_name(t->md),
-                     type);
+               DMERR("%s: %s: unknown target type", dm_device_name(t->md), type);
                return -EINVAL;
        }
 
        if (dm_target_needs_singleton(tgt->type)) {
                if (t->num_targets) {
-                       DMERR("%s: target type %s must appear alone in table",
-                             dm_device_name(t->md), type);
-                       return -EINVAL;
+                       tgt->error = "singleton target type must appear alone in table";
+                       goto bad;
                }
                t->singleton = true;
        }
 
        if (dm_target_always_writeable(tgt->type) && !(t->mode & FMODE_WRITE)) {
-               DMERR("%s: target type %s may not be included in read-only tables",
-                     dm_device_name(t->md), type);
-               return -EINVAL;
+               tgt->error = "target type may not be included in a read-only table";
+               goto bad;
        }
 
        if (t->immutable_target_type) {
                if (t->immutable_target_type != tgt->type) {
-                       DMERR("%s: immutable target type %s cannot be mixed with other target types",
-                             dm_device_name(t->md), t->immutable_target_type->name);
-                       return -EINVAL;
+                       tgt->error = "immutable target type cannot be mixed with other target types";
+                       goto bad;
                }
        } else if (dm_target_is_immutable(tgt->type)) {
                if (t->num_targets) {
-                       DMERR("%s: immutable target type %s cannot be mixed with other target types",
-                             dm_device_name(t->md), tgt->type->name);
-                       return -EINVAL;
+                       tgt->error = "immutable target type cannot be mixed with other target types";
+                       goto bad;
                }
                t->immutable_target_type = tgt->type;
        }
@@ -740,7 +735,6 @@ int dm_table_add_target(struct dm_table *t, const char *type,
         */
        if (!adjoin(t, tgt)) {
                tgt->error = "Gap in table";
-               r = -EINVAL;
                goto bad;
        }
 
index 147af9536d0c10d4054f42306383e6fe118a6ce4..ef7bf1dd6900893c8faf728cb57a93ad8b17af54 100644 (file)
@@ -1423,8 +1423,6 @@ static void cleanup_mapped_device(struct mapped_device *md)
        if (md->bs)
                bioset_free(md->bs);
 
-       cleanup_srcu_struct(&md->io_barrier);
-
        if (md->disk) {
                spin_lock(&_minor_lock);
                md->disk->private_data = NULL;
@@ -1436,6 +1434,8 @@ static void cleanup_mapped_device(struct mapped_device *md)
        if (md->queue)
                blk_cleanup_queue(md->queue);
 
+       cleanup_srcu_struct(&md->io_barrier);
+
        if (md->bdev) {
                bdput(md->bdev);
                md->bdev = NULL;
index eac84d8ff7244b659bef2ccea8c8f4ada8b7bc2f..2089d46b0eb89cf280bd6d90202a9caaeff13d47 100644 (file)
@@ -3887,10 +3887,10 @@ array_state_show(struct mddev *mddev, char *page)
                        st = read_auto;
                        break;
                case 0:
-                       if (mddev->in_sync)
-                               st = clean;
-                       else if (test_bit(MD_CHANGE_PENDING, &mddev->flags))
+                       if (test_bit(MD_CHANGE_PENDING, &mddev->flags))
                                st = write_pending;
+                       else if (mddev->in_sync)
+                               st = clean;
                        else if (mddev->safemode)
                                st = active_idle;
                        else
@@ -8144,14 +8144,14 @@ void md_do_sync(struct md_thread *thread)
 
        if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
            !test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
-           mddev->curr_resync > 2) {
+           mddev->curr_resync > 3) {
                mddev->curr_resync_completed = mddev->curr_resync;
                sysfs_notify(&mddev->kobj, NULL, "sync_completed");
        }
        mddev->pers->sync_request(mddev, max_sectors, &skipped);
 
        if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
-           mddev->curr_resync > 2) {
+           mddev->curr_resync > 3) {
                if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
                        if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
                                if (mddev->curr_resync >= mddev->recovery_cp) {
index 1961d827dbd19b5bbe2f4dbee356ac1f2a29fea7..29e2df5cd77b282fd4dd0cff8aa5599d7aacc7ef 100644 (file)
@@ -403,11 +403,14 @@ static void raid1_end_write_request(struct bio *bio)
        struct bio *to_put = NULL;
        int mirror = find_bio_disk(r1_bio, bio);
        struct md_rdev *rdev = conf->mirrors[mirror].rdev;
+       bool discard_error;
+
+       discard_error = bio->bi_error && bio_op(bio) == REQ_OP_DISCARD;
 
        /*
         * 'one mirror IO has finished' event handler:
         */
-       if (bio->bi_error) {
+       if (bio->bi_error && !discard_error) {
                set_bit(WriteErrorSeen, &rdev->flags);
                if (!test_and_set_bit(WantReplacement, &rdev->flags))
                        set_bit(MD_RECOVERY_NEEDED, &
@@ -444,7 +447,7 @@ static void raid1_end_write_request(struct bio *bio)
 
                /* Maybe we can clear some bad blocks. */
                if (is_badblock(rdev, r1_bio->sector, r1_bio->sectors,
-                               &first_bad, &bad_sectors)) {
+                               &first_bad, &bad_sectors) && !discard_error) {
                        r1_bio->bios[mirror] = IO_MADE_GOOD;
                        set_bit(R1BIO_MadeGood, &r1_bio->state);
                }
@@ -2294,17 +2297,23 @@ static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio)
         * This is all done synchronously while the array is
         * frozen
         */
+
+       bio = r1_bio->bios[r1_bio->read_disk];
+       bdevname(bio->bi_bdev, b);
+       bio_put(bio);
+       r1_bio->bios[r1_bio->read_disk] = NULL;
+
        if (mddev->ro == 0) {
                freeze_array(conf, 1);
                fix_read_error(conf, r1_bio->read_disk,
                               r1_bio->sector, r1_bio->sectors);
                unfreeze_array(conf);
-       } else
-               md_error(mddev, conf->mirrors[r1_bio->read_disk].rdev);
+       } else {
+               r1_bio->bios[r1_bio->read_disk] = IO_BLOCKED;
+       }
+
        rdev_dec_pending(conf->mirrors[r1_bio->read_disk].rdev, conf->mddev);
 
-       bio = r1_bio->bios[r1_bio->read_disk];
-       bdevname(bio->bi_bdev, b);
 read_more:
        disk = read_balance(conf, r1_bio, &max_sectors);
        if (disk == -1) {
@@ -2315,11 +2324,6 @@ read_more:
        } else {
                const unsigned long do_sync
                        = r1_bio->master_bio->bi_opf & REQ_SYNC;
-               if (bio) {
-                       r1_bio->bios[r1_bio->read_disk] =
-                               mddev->ro ? IO_BLOCKED : NULL;
-                       bio_put(bio);
-               }
                r1_bio->read_disk = disk;
                bio = bio_clone_mddev(r1_bio->master_bio, GFP_NOIO, mddev);
                bio_trim(bio, r1_bio->sector - bio->bi_iter.bi_sector,
index be1a9fca3b2d2ade369359d109d1a53ddf30d077..39fddda2fef2d918863699e3f283f441f6fb52c1 100644 (file)
@@ -447,6 +447,9 @@ static void raid10_end_write_request(struct bio *bio)
        struct r10conf *conf = r10_bio->mddev->private;
        int slot, repl;
        struct md_rdev *rdev = NULL;
+       bool discard_error;
+
+       discard_error = bio->bi_error && bio_op(bio) == REQ_OP_DISCARD;
 
        dev = find_bio_disk(conf, r10_bio, bio, &slot, &repl);
 
@@ -460,7 +463,7 @@ static void raid10_end_write_request(struct bio *bio)
        /*
         * this branch is our 'one mirror IO has finished' event handler:
         */
-       if (bio->bi_error) {
+       if (bio->bi_error && !discard_error) {
                if (repl)
                        /* Never record new bad blocks to replacement,
                         * just fail it.
@@ -503,7 +506,7 @@ static void raid10_end_write_request(struct bio *bio)
                if (is_badblock(rdev,
                                r10_bio->devs[slot].addr,
                                r10_bio->sectors,
-                               &first_bad, &bad_sectors)) {
+                               &first_bad, &bad_sectors) && !discard_error) {
                        bio_put(bio);
                        if (repl)
                                r10_bio->devs[slot].repl_bio = IO_MADE_GOOD;
index 1b1ab4a1d132b39f0145b8bc3305484fad5a7091..a227a9f3ee6556b1af15223080e791fda7817989 100644 (file)
@@ -1087,7 +1087,7 @@ static int r5l_recovery_log(struct r5l_log *log)
         * 1's seq + 10 and let superblock points to meta2. The same recovery will
         * not think meta 3 is a valid meta, because its seq doesn't match
         */
-       if (ctx.seq > log->last_cp_seq + 1) {
+       if (ctx.seq > log->last_cp_seq) {
                int ret;
 
                ret = r5l_log_write_empty_meta_block(log, ctx.pos, ctx.seq + 10);
@@ -1096,6 +1096,8 @@ static int r5l_recovery_log(struct r5l_log *log)
                log->seq = ctx.seq + 11;
                log->log_start = r5l_ring_add(log, ctx.pos, BLOCK_SECTORS);
                r5l_write_super(log, ctx.pos);
+               log->last_checkpoint = ctx.pos;
+               log->next_checkpoint = ctx.pos;
        } else {
                log->log_start = ctx.pos;
                log->seq = ctx.seq;
@@ -1154,6 +1156,7 @@ create:
        if (create_super) {
                log->last_cp_seq = prandom_u32();
                cp = 0;
+               r5l_log_write_empty_meta_block(log, cp, log->last_cp_seq);
                /*
                 * Make sure super points to correct address. Log might have
                 * data very soon. If super hasn't correct log tail address,
@@ -1168,6 +1171,7 @@ create:
        if (log->max_free_space > RECLAIM_MAX_FREE_SPACE)
                log->max_free_space = RECLAIM_MAX_FREE_SPACE;
        log->last_checkpoint = cp;
+       log->next_checkpoint = cp;
 
        __free_page(page);
 
index 012225587c258abb18f6946e31757112ca7f8981..b71b747ee0baae22a68c081b24132bef5f05bf2a 100644 (file)
@@ -513,6 +513,11 @@ config DVB_AS102_FE
        depends on DVB_CORE
        default DVB_AS102
 
+config DVB_GP8PSK_FE
+       tristate
+       depends on DVB_CORE
+       default DVB_USB_GP8PSK
+
 comment "DVB-C (cable) frontends"
        depends on DVB_CORE
 
index e90165ad361bbae7324cb966a1d668187be471ca..93921a4eaa275997a5ed4178de1acb1a54409430 100644 (file)
@@ -121,6 +121,7 @@ obj-$(CONFIG_DVB_RTL2832_SDR) += rtl2832_sdr.o
 obj-$(CONFIG_DVB_M88RS2000) += m88rs2000.o
 obj-$(CONFIG_DVB_AF9033) += af9033.o
 obj-$(CONFIG_DVB_AS102_FE) += as102_fe.o
+obj-$(CONFIG_DVB_GP8PSK_FE) += gp8psk-fe.o
 obj-$(CONFIG_DVB_TC90522) += tc90522.o
 obj-$(CONFIG_DVB_HORUS3A) += horus3a.o
 obj-$(CONFIG_DVB_ASCOT2E) += ascot2e.o
diff --git a/drivers/media/dvb-frontends/gp8psk-fe.c b/drivers/media/dvb-frontends/gp8psk-fe.c
new file mode 100644 (file)
index 0000000..93f59bf
--- /dev/null
@@ -0,0 +1,400 @@
+/*
+ * Frontend driver for the GENPIX 8pks/qpsk/DCII USB2.0 DVB-S module
+ *
+ * Copyright (C) 2006,2007 Alan Nisota (alannisota@gmail.com)
+ * Copyright (C) 2006,2007 Genpix Electronics (genpix@genpix-electronics.com)
+ *
+ * Thanks to GENPIX for the sample code used to implement this module.
+ *
+ * This module is based off the vp7045 and vp702x modules
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation, version 2.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include "gp8psk-fe.h"
+#include "dvb_frontend.h"
+
+static int debug;
+module_param(debug, int, 0644);
+MODULE_PARM_DESC(debug, "Turn on/off debugging (default:off).");
+
+#define dprintk(fmt, arg...) do {                                      \
+       if (debug)                                                      \
+               printk(KERN_DEBUG pr_fmt("%s: " fmt),                   \
+                      __func__, ##arg);                                \
+} while (0)
+
+struct gp8psk_fe_state {
+       struct dvb_frontend fe;
+       void *priv;
+       const struct gp8psk_fe_ops *ops;
+       bool is_rev1;
+       u8 lock;
+       u16 snr;
+       unsigned long next_status_check;
+       unsigned long status_check_interval;
+};
+
+static int gp8psk_tuned_to_DCII(struct dvb_frontend *fe)
+{
+       struct gp8psk_fe_state *st = fe->demodulator_priv;
+       u8 status;
+
+       st->ops->in(st->priv, GET_8PSK_CONFIG, 0, 0, &status, 1);
+       return status & bmDCtuned;
+}
+
+static int gp8psk_set_tuner_mode(struct dvb_frontend *fe, int mode)
+{
+       struct gp8psk_fe_state *st = fe->demodulator_priv;
+
+       return st->ops->out(st->priv, SET_8PSK_CONFIG, mode, 0, NULL, 0);
+}
+
+static int gp8psk_fe_update_status(struct gp8psk_fe_state *st)
+{
+       u8 buf[6];
+       if (time_after(jiffies,st->next_status_check)) {
+               st->ops->in(st->priv, GET_SIGNAL_LOCK, 0, 0, &st->lock, 1);
+               st->ops->in(st->priv, GET_SIGNAL_STRENGTH, 0, 0, buf, 6);
+               st->snr = (buf[1]) << 8 | buf[0];
+               st->next_status_check = jiffies + (st->status_check_interval*HZ)/1000;
+       }
+       return 0;
+}
+
+static int gp8psk_fe_read_status(struct dvb_frontend *fe,
+                                enum fe_status *status)
+{
+       struct gp8psk_fe_state *st = fe->demodulator_priv;
+       gp8psk_fe_update_status(st);
+
+       if (st->lock)
+               *status = FE_HAS_LOCK | FE_HAS_SYNC | FE_HAS_VITERBI | FE_HAS_SIGNAL | FE_HAS_CARRIER;
+       else
+               *status = 0;
+
+       if (*status & FE_HAS_LOCK)
+               st->status_check_interval = 1000;
+       else
+               st->status_check_interval = 100;
+       return 0;
+}
+
+/* not supported by this Frontend */
+static int gp8psk_fe_read_ber(struct dvb_frontend* fe, u32 *ber)
+{
+       (void) fe;
+       *ber = 0;
+       return 0;
+}
+
+/* not supported by this Frontend */
+static int gp8psk_fe_read_unc_blocks(struct dvb_frontend* fe, u32 *unc)
+{
+       (void) fe;
+       *unc = 0;
+       return 0;
+}
+
+static int gp8psk_fe_read_snr(struct dvb_frontend* fe, u16 *snr)
+{
+       struct gp8psk_fe_state *st = fe->demodulator_priv;
+       gp8psk_fe_update_status(st);
+       /* snr is reported in dBu*256 */
+       *snr = st->snr;
+       return 0;
+}
+
+static int gp8psk_fe_read_signal_strength(struct dvb_frontend* fe, u16 *strength)
+{
+       struct gp8psk_fe_state *st = fe->demodulator_priv;
+       gp8psk_fe_update_status(st);
+       /* snr is reported in dBu*256 */
+       /* snr / 38.4 ~= 100% strength */
+       /* snr * 17 returns 100% strength as 65535 */
+       if (st->snr > 0xf00)
+               *strength = 0xffff;
+       else
+               *strength = (st->snr << 4) + st->snr; /* snr*17 */
+       return 0;
+}
+
+static int gp8psk_fe_get_tune_settings(struct dvb_frontend* fe, struct dvb_frontend_tune_settings *tune)
+{
+       tune->min_delay_ms = 800;
+       return 0;
+}
+
+static int gp8psk_fe_set_frontend(struct dvb_frontend *fe)
+{
+       struct gp8psk_fe_state *st = fe->demodulator_priv;
+       struct dtv_frontend_properties *c = &fe->dtv_property_cache;
+       u8 cmd[10];
+       u32 freq = c->frequency * 1000;
+
+       dprintk("%s()\n", __func__);
+
+       cmd[4] = freq         & 0xff;
+       cmd[5] = (freq >> 8)  & 0xff;
+       cmd[6] = (freq >> 16) & 0xff;
+       cmd[7] = (freq >> 24) & 0xff;
+
+       /* backwards compatibility: DVB-S + 8-PSK were used for Turbo-FEC */
+       if (c->delivery_system == SYS_DVBS && c->modulation == PSK_8)
+               c->delivery_system = SYS_TURBO;
+
+       switch (c->delivery_system) {
+       case SYS_DVBS:
+               if (c->modulation != QPSK) {
+                       dprintk("%s: unsupported modulation selected (%d)\n",
+                               __func__, c->modulation);
+                       return -EOPNOTSUPP;
+               }
+               c->fec_inner = FEC_AUTO;
+               break;
+       case SYS_DVBS2: /* kept for backwards compatibility */
+               dprintk("%s: DVB-S2 delivery system selected\n", __func__);
+               break;
+       case SYS_TURBO:
+               dprintk("%s: Turbo-FEC delivery system selected\n", __func__);
+               break;
+
+       default:
+               dprintk("%s: unsupported delivery system selected (%d)\n",
+                       __func__, c->delivery_system);
+               return -EOPNOTSUPP;
+       }
+
+       cmd[0] =  c->symbol_rate        & 0xff;
+       cmd[1] = (c->symbol_rate >>  8) & 0xff;
+       cmd[2] = (c->symbol_rate >> 16) & 0xff;
+       cmd[3] = (c->symbol_rate >> 24) & 0xff;
+       switch (c->modulation) {
+       case QPSK:
+               if (st->is_rev1)
+                       if (gp8psk_tuned_to_DCII(fe))
+                               st->ops->reload(st->priv);
+               switch (c->fec_inner) {
+               case FEC_1_2:
+                       cmd[9] = 0; break;
+               case FEC_2_3:
+                       cmd[9] = 1; break;
+               case FEC_3_4:
+                       cmd[9] = 2; break;
+               case FEC_5_6:
+                       cmd[9] = 3; break;
+               case FEC_7_8:
+                       cmd[9] = 4; break;
+               case FEC_AUTO:
+                       cmd[9] = 5; break;
+               default:
+                       cmd[9] = 5; break;
+               }
+               if (c->delivery_system == SYS_TURBO)
+                       cmd[8] = ADV_MOD_TURBO_QPSK;
+               else
+                       cmd[8] = ADV_MOD_DVB_QPSK;
+               break;
+       case PSK_8: /* PSK_8 is for compatibility with DN */
+               cmd[8] = ADV_MOD_TURBO_8PSK;
+               switch (c->fec_inner) {
+               case FEC_2_3:
+                       cmd[9] = 0; break;
+               case FEC_3_4:
+                       cmd[9] = 1; break;
+               case FEC_3_5:
+                       cmd[9] = 2; break;
+               case FEC_5_6:
+                       cmd[9] = 3; break;
+               case FEC_8_9:
+                       cmd[9] = 4; break;
+               default:
+                       cmd[9] = 0; break;
+               }
+               break;
+       case QAM_16: /* QAM_16 is for compatibility with DN */
+               cmd[8] = ADV_MOD_TURBO_16QAM;
+               cmd[9] = 0;
+               break;
+       default: /* Unknown modulation */
+               dprintk("%s: unsupported modulation selected (%d)\n",
+                       __func__, c->modulation);
+               return -EOPNOTSUPP;
+       }
+
+       if (st->is_rev1)
+               gp8psk_set_tuner_mode(fe, 0);
+       st->ops->out(st->priv, TUNE_8PSK, 0, 0, cmd, 10);
+
+       st->lock = 0;
+       st->next_status_check = jiffies;
+       st->status_check_interval = 200;
+
+       return 0;
+}
+
+static int gp8psk_fe_send_diseqc_msg (struct dvb_frontend* fe,
+                                   struct dvb_diseqc_master_cmd *m)
+{
+       struct gp8psk_fe_state *st = fe->demodulator_priv;
+
+       dprintk("%s\n", __func__);
+
+       if (st->ops->out(st->priv, SEND_DISEQC_COMMAND, m->msg[0], 0,
+                       m->msg, m->msg_len)) {
+               return -EINVAL;
+       }
+       return 0;
+}
+
+static int gp8psk_fe_send_diseqc_burst(struct dvb_frontend *fe,
+                                      enum fe_sec_mini_cmd burst)
+{
+       struct gp8psk_fe_state *st = fe->demodulator_priv;
+       u8 cmd;
+
+       dprintk("%s\n", __func__);
+
+       /* These commands are certainly wrong */
+       cmd = (burst == SEC_MINI_A) ? 0x00 : 0x01;
+
+       if (st->ops->out(st->priv, SEND_DISEQC_COMMAND, cmd, 0,
+                       &cmd, 0)) {
+               return -EINVAL;
+       }
+       return 0;
+}
+
+static int gp8psk_fe_set_tone(struct dvb_frontend *fe,
+                             enum fe_sec_tone_mode tone)
+{
+       struct gp8psk_fe_state *st = fe->demodulator_priv;
+
+       if (st->ops->out(st->priv, SET_22KHZ_TONE,
+                        (tone == SEC_TONE_ON), 0, NULL, 0)) {
+               return -EINVAL;
+       }
+       return 0;
+}
+
+static int gp8psk_fe_set_voltage(struct dvb_frontend *fe,
+                                enum fe_sec_voltage voltage)
+{
+       struct gp8psk_fe_state *st = fe->demodulator_priv;
+
+       if (st->ops->out(st->priv, SET_LNB_VOLTAGE,
+                        voltage == SEC_VOLTAGE_18, 0, NULL, 0)) {
+               return -EINVAL;
+       }
+       return 0;
+}
+
+static int gp8psk_fe_enable_high_lnb_voltage(struct dvb_frontend* fe, long onoff)
+{
+       struct gp8psk_fe_state *st = fe->demodulator_priv;
+
+       return st->ops->out(st->priv, USE_EXTRA_VOLT, onoff, 0, NULL, 0);
+}
+
+static int gp8psk_fe_send_legacy_dish_cmd (struct dvb_frontend* fe, unsigned long sw_cmd)
+{
+       struct gp8psk_fe_state *st = fe->demodulator_priv;
+       u8 cmd = sw_cmd & 0x7f;
+
+       if (st->ops->out(st->priv, SET_DN_SWITCH, cmd, 0, NULL, 0))
+               return -EINVAL;
+
+       if (st->ops->out(st->priv, SET_LNB_VOLTAGE, !!(sw_cmd & 0x80),
+                       0, NULL, 0))
+               return -EINVAL;
+
+       return 0;
+}
+
+static void gp8psk_fe_release(struct dvb_frontend* fe)
+{
+       struct gp8psk_fe_state *st = fe->demodulator_priv;
+
+       kfree(st);
+}
+
+static struct dvb_frontend_ops gp8psk_fe_ops;
+
+struct dvb_frontend *gp8psk_fe_attach(const struct gp8psk_fe_ops *ops,
+                                     void *priv, bool is_rev1)
+{
+       struct gp8psk_fe_state *st;
+
+       if (!ops || !ops->in || !ops->out || !ops->reload) {
+               pr_err("Error! gp8psk-fe ops not defined.\n");
+               return NULL;
+       }
+
+       st = kzalloc(sizeof(struct gp8psk_fe_state), GFP_KERNEL);
+       if (!st)
+               return NULL;
+
+       memcpy(&st->fe.ops, &gp8psk_fe_ops, sizeof(struct dvb_frontend_ops));
+       st->fe.demodulator_priv = st;
+       st->ops = ops;
+       st->priv = priv;
+       st->is_rev1 = is_rev1;
+
+       pr_info("Frontend %sattached\n", is_rev1 ? "revision 1 " : "");
+
+       return &st->fe;
+}
+EXPORT_SYMBOL_GPL(gp8psk_fe_attach);
+
+static struct dvb_frontend_ops gp8psk_fe_ops = {
+       .delsys = { SYS_DVBS },
+       .info = {
+               .name                   = "Genpix DVB-S",
+               .frequency_min          = 800000,
+               .frequency_max          = 2250000,
+               .frequency_stepsize     = 100,
+               .symbol_rate_min        = 1000000,
+               .symbol_rate_max        = 45000000,
+               .symbol_rate_tolerance  = 500,  /* ppm */
+               .caps = FE_CAN_INVERSION_AUTO |
+                       FE_CAN_FEC_1_2 | FE_CAN_FEC_2_3 | FE_CAN_FEC_3_4 |
+                       FE_CAN_FEC_5_6 | FE_CAN_FEC_7_8 | FE_CAN_FEC_AUTO |
+                       /*
+                        * FE_CAN_QAM_16 is for compatibility
+                        * (Myth incorrectly detects Turbo-QPSK as plain QAM-16)
+                        */
+                       FE_CAN_QPSK | FE_CAN_QAM_16 | FE_CAN_TURBO_FEC
+       },
+
+       .release = gp8psk_fe_release,
+
+       .init = NULL,
+       .sleep = NULL,
+
+       .set_frontend = gp8psk_fe_set_frontend,
+
+       .get_tune_settings = gp8psk_fe_get_tune_settings,
+
+       .read_status = gp8psk_fe_read_status,
+       .read_ber = gp8psk_fe_read_ber,
+       .read_signal_strength = gp8psk_fe_read_signal_strength,
+       .read_snr = gp8psk_fe_read_snr,
+       .read_ucblocks = gp8psk_fe_read_unc_blocks,
+
+       .diseqc_send_master_cmd = gp8psk_fe_send_diseqc_msg,
+       .diseqc_send_burst = gp8psk_fe_send_diseqc_burst,
+       .set_tone = gp8psk_fe_set_tone,
+       .set_voltage = gp8psk_fe_set_voltage,
+       .dishnetwork_send_legacy_command = gp8psk_fe_send_legacy_dish_cmd,
+       .enable_high_lnb_voltage = gp8psk_fe_enable_high_lnb_voltage
+};
+
+MODULE_AUTHOR("Alan Nisota <alannisota@gamil.com>");
+MODULE_DESCRIPTION("Frontend Driver for Genpix DVB-S");
+MODULE_VERSION("1.1");
+MODULE_LICENSE("GPL");
diff --git a/drivers/media/dvb-frontends/gp8psk-fe.h b/drivers/media/dvb-frontends/gp8psk-fe.h
new file mode 100644 (file)
index 0000000..6c7944b
--- /dev/null
@@ -0,0 +1,82 @@
+/*
+ * gp8psk_fe driver
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef GP8PSK_FE_H
+#define GP8PSK_FE_H
+
+#include <linux/types.h>
+
+/* gp8psk commands */
+
+#define GET_8PSK_CONFIG                 0x80    /* in */
+#define SET_8PSK_CONFIG                 0x81
+#define I2C_WRITE                      0x83
+#define I2C_READ                       0x84
+#define ARM_TRANSFER                    0x85
+#define TUNE_8PSK                       0x86
+#define GET_SIGNAL_STRENGTH             0x87    /* in */
+#define LOAD_BCM4500                    0x88
+#define BOOT_8PSK                       0x89    /* in */
+#define START_INTERSIL                  0x8A    /* in */
+#define SET_LNB_VOLTAGE                 0x8B
+#define SET_22KHZ_TONE                  0x8C
+#define SEND_DISEQC_COMMAND             0x8D
+#define SET_DVB_MODE                    0x8E
+#define SET_DN_SWITCH                   0x8F
+#define GET_SIGNAL_LOCK                 0x90    /* in */
+#define GET_FW_VERS                    0x92
+#define GET_SERIAL_NUMBER               0x93    /* in */
+#define USE_EXTRA_VOLT                  0x94
+#define GET_FPGA_VERS                  0x95
+#define CW3K_INIT                      0x9d
+
+/* PSK_configuration bits */
+#define bm8pskStarted                   0x01
+#define bm8pskFW_Loaded                 0x02
+#define bmIntersilOn                    0x04
+#define bmDVBmode                       0x08
+#define bm22kHz                         0x10
+#define bmSEL18V                        0x20
+#define bmDCtuned                       0x40
+#define bmArmed                         0x80
+
+/* Satellite modulation modes */
+#define ADV_MOD_DVB_QPSK 0     /* DVB-S QPSK */
+#define ADV_MOD_TURBO_QPSK 1   /* Turbo QPSK */
+#define ADV_MOD_TURBO_8PSK 2   /* Turbo 8PSK (also used for Trellis 8PSK) */
+#define ADV_MOD_TURBO_16QAM 3  /* Turbo 16QAM (also used for Trellis 8PSK) */
+
+#define ADV_MOD_DCII_C_QPSK 4  /* Digicipher II Combo */
+#define ADV_MOD_DCII_I_QPSK 5  /* Digicipher II I-stream */
+#define ADV_MOD_DCII_Q_QPSK 6  /* Digicipher II Q-stream */
+#define ADV_MOD_DCII_C_OQPSK 7 /* Digicipher II offset QPSK */
+#define ADV_MOD_DSS_QPSK 8     /* DSS (DIRECTV) QPSK */
+#define ADV_MOD_DVB_BPSK 9     /* DVB-S BPSK */
+
+/* firmware revision id's */
+#define GP8PSK_FW_REV1                 0x020604
+#define GP8PSK_FW_REV2                 0x020704
+#define GP8PSK_FW_VERS(_fw_vers) \
+       ((_fw_vers)[2]<<0x10 | (_fw_vers)[1]<<0x08 | (_fw_vers)[0])
+
+struct gp8psk_fe_ops {
+       int (*in)(void *priv, u8 req, u16 value, u16 index, u8 *b, int blen);
+       int (*out)(void *priv, u8 req, u16 value, u16 index, u8 *b, int blen);
+       int (*reload)(void *priv);
+};
+
+struct dvb_frontend *gp8psk_fe_attach(const struct gp8psk_fe_ops *ops,
+                                     void *priv, bool is_rev1);
+
+#endif
index f95a6bc839d58f5f6cde8c37170f297f24ca8b3b..cede3975d04bd90441abcb29e6f169857561f935 100644 (file)
@@ -118,7 +118,7 @@ static int get_key_haup_common(struct IR_i2c *ir, enum rc_type *protocol,
                        *protocol = RC_TYPE_RC6_MCE;
                        dev &= 0x7f;
                        dprintk(1, "ir hauppauge (rc6-mce): t%d vendor=%d dev=%d code=%d\n",
-                                               toggle, vendor, dev, code);
+                                               *ptoggle, vendor, dev, code);
                } else {
                        *ptoggle = 0;
                        *protocol = RC_TYPE_RC6_6A_32;
index 4769469fe842964574eae3a72b812f579ab5f63f..2c9232ef7baa4b57efd020547d0b2af3f42199d4 100644 (file)
@@ -124,8 +124,8 @@ int ivtv_udma_setup(struct ivtv *itv, unsigned long ivtv_dest_addr,
        }
 
        /* Get user pages for DMA Xfer */
-       err = get_user_pages_unlocked(user_dma.uaddr, user_dma.page_count, 0,
-                       1, dma->map);
+       err = get_user_pages_unlocked(user_dma.uaddr, user_dma.page_count,
+                       dma->map, FOLL_FORCE);
 
        if (user_dma.page_count != err) {
                IVTV_DEBUG_WARN("failed to map user pages, returned %d instead of %d\n",
index b094054cda6e55b64852598d6d22dd6d57e6cdaa..f7299d3d82449ddaefc1ee76fd3f8cb33650e5b4 100644 (file)
@@ -76,11 +76,12 @@ static int ivtv_yuv_prep_user_dma(struct ivtv *itv, struct ivtv_user_dma *dma,
 
        /* Get user pages for DMA Xfer */
        y_pages = get_user_pages_unlocked(y_dma.uaddr,
-                       y_dma.page_count, 0, 1, &dma->map[0]);
+                       y_dma.page_count, &dma->map[0], FOLL_FORCE);
        uv_pages = 0; /* silence gcc. value is set and consumed only if: */
        if (y_pages == y_dma.page_count) {
                uv_pages = get_user_pages_unlocked(uv_dma.uaddr,
-                               uv_dma.page_count, 0, 1, &dma->map[y_pages]);
+                               uv_dma.page_count, &dma->map[y_pages],
+                               FOLL_FORCE);
        }
 
        if (y_pages != y_dma.page_count || uv_pages != uv_dma.page_count) {
index e668dde6d85722d2b68d7f399452d22bbf6d8342..a31b95cb3b09c0623be42762cea8d4dc08c99170 100644 (file)
@@ -214,7 +214,7 @@ static int omap_vout_get_userptr(struct videobuf_buffer *vb, u32 virtp,
        if (!vec)
                return -ENOMEM;
 
-       ret = get_vaddr_frames(virtp, 1, true, false, vec);
+       ret = get_vaddr_frames(virtp, 1, FOLL_WRITE, vec);
        if (ret != 1) {
                frame_vector_destroy(vec);
                return -EINVAL;
index 317ef63ee78999d673f85b62af14bed7e664549e..8d96a22647b396c5a8790775a883ff58eabce7ce 100644 (file)
@@ -281,6 +281,14 @@ static void free_firmware(struct xc2028_data *priv)
        int i;
        tuner_dbg("%s called\n", __func__);
 
+       /* free allocated f/w string */
+       if (priv->fname != firmware_name)
+               kfree(priv->fname);
+       priv->fname = NULL;
+
+       priv->state = XC2028_NO_FIRMWARE;
+       memset(&priv->cur_fw, 0, sizeof(priv->cur_fw));
+
        if (!priv->firm)
                return;
 
@@ -291,9 +299,6 @@ static void free_firmware(struct xc2028_data *priv)
 
        priv->firm = NULL;
        priv->firm_size = 0;
-       priv->state = XC2028_NO_FIRMWARE;
-
-       memset(&priv->cur_fw, 0, sizeof(priv->cur_fw));
 }
 
 static int load_all_firmwares(struct dvb_frontend *fe,
@@ -884,9 +889,8 @@ read_not_reliable:
        return 0;
 
 fail:
-       priv->state = XC2028_NO_FIRMWARE;
+       free_firmware(priv);
 
-       memset(&priv->cur_fw, 0, sizeof(priv->cur_fw));
        if (retry_count < 8) {
                msleep(50);
                retry_count++;
@@ -1332,11 +1336,8 @@ static int xc2028_dvb_release(struct dvb_frontend *fe)
        mutex_lock(&xc2028_list_mutex);
 
        /* only perform final cleanup if this is the last instance */
-       if (hybrid_tuner_report_instance_count(priv) == 1) {
+       if (hybrid_tuner_report_instance_count(priv) == 1)
                free_firmware(priv);
-               kfree(priv->ctrl.fname);
-               priv->ctrl.fname = NULL;
-       }
 
        if (priv)
                hybrid_tuner_release_state(priv);
@@ -1399,19 +1400,8 @@ static int xc2028_set_config(struct dvb_frontend *fe, void *priv_cfg)
 
        /*
         * Copy the config data.
-        * For the firmware name, keep a local copy of the string,
-        * in order to avoid troubles during device release.
         */
-       kfree(priv->ctrl.fname);
-       priv->ctrl.fname = NULL;
        memcpy(&priv->ctrl, p, sizeof(priv->ctrl));
-       if (p->fname) {
-               priv->ctrl.fname = kstrdup(p->fname, GFP_KERNEL);
-               if (priv->ctrl.fname == NULL) {
-                       rc = -ENOMEM;
-                       goto unlock;
-               }
-       }
 
        /*
         * If firmware name changed, frees firmware. As free_firmware will
@@ -1426,10 +1416,15 @@ static int xc2028_set_config(struct dvb_frontend *fe, void *priv_cfg)
 
        if (priv->state == XC2028_NO_FIRMWARE) {
                if (!firmware_name[0])
-                       priv->fname = priv->ctrl.fname;
+                       priv->fname = kstrdup(p->fname, GFP_KERNEL);
                else
                        priv->fname = firmware_name;
 
+               if (!priv->fname) {
+                       rc = -ENOMEM;
+                       goto unlock;
+               }
+
                rc = request_firmware_nowait(THIS_MODULE, 1,
                                             priv->fname,
                                             priv->i2c_props.adap->dev.parent,
index d4bdba60b0f71436065e4884d5622cc34dffbfe6..52bc42da8a4ce4dde4848c3a2497c5b286b6ae8b 100644 (file)
@@ -73,23 +73,34 @@ static int flexcop_usb_readwrite_dw(struct flexcop_device *fc, u16 wRegOffsPCI,
        u8 request_type = (read ? USB_DIR_IN : USB_DIR_OUT) | USB_TYPE_VENDOR;
        u8 wAddress = B2C2_FLEX_PCIOFFSET_TO_INTERNALADDR(wRegOffsPCI) |
                (read ? 0x80 : 0);
+       int ret;
+
+       mutex_lock(&fc_usb->data_mutex);
+       if (!read)
+               memcpy(fc_usb->data, val, sizeof(*val));
 
-       int len = usb_control_msg(fc_usb->udev,
+       ret = usb_control_msg(fc_usb->udev,
                        read ? B2C2_USB_CTRL_PIPE_IN : B2C2_USB_CTRL_PIPE_OUT,
                        request,
                        request_type, /* 0xc0 read or 0x40 write */
                        wAddress,
                        0,
-                       val,
+                       fc_usb->data,
                        sizeof(u32),
                        B2C2_WAIT_FOR_OPERATION_RDW * HZ);
 
-       if (len != sizeof(u32)) {
+       if (ret != sizeof(u32)) {
                err("error while %s dword from %d (%d).", read ? "reading" :
                                "writing", wAddress, wRegOffsPCI);
-               return -EIO;
+               if (ret >= 0)
+                       ret = -EIO;
        }
-       return 0;
+
+       if (read && ret >= 0)
+               memcpy(val, fc_usb->data, sizeof(*val));
+       mutex_unlock(&fc_usb->data_mutex);
+
+       return ret;
 }
 /*
  * DKT 010817 - add support for V8 memory read/write and flash update
@@ -100,9 +111,14 @@ static int flexcop_usb_v8_memory_req(struct flexcop_usb *fc_usb,
 {
        u8 request_type = USB_TYPE_VENDOR;
        u16 wIndex;
-       int nWaitTime, pipe, len;
+       int nWaitTime, pipe, ret;
        wIndex = page << 8;
 
+       if (buflen > sizeof(fc_usb->data)) {
+               err("Buffer size bigger than max URB control message\n");
+               return -EIO;
+       }
+
        switch (req) {
        case B2C2_USB_READ_V8_MEM:
                nWaitTime = B2C2_WAIT_FOR_OPERATION_V8READ;
@@ -127,17 +143,32 @@ static int flexcop_usb_v8_memory_req(struct flexcop_usb *fc_usb,
        deb_v8("v8mem: %02x %02x %04x %04x, len: %d\n", request_type, req,
                        wAddress, wIndex, buflen);
 
-       len = usb_control_msg(fc_usb->udev, pipe,
+       mutex_lock(&fc_usb->data_mutex);
+
+       if ((request_type & USB_ENDPOINT_DIR_MASK) == USB_DIR_OUT)
+               memcpy(fc_usb->data, pbBuffer, buflen);
+
+       ret = usb_control_msg(fc_usb->udev, pipe,
                        req,
                        request_type,
                        wAddress,
                        wIndex,
-                       pbBuffer,
+                       fc_usb->data,
                        buflen,
                        nWaitTime * HZ);
+       if (ret != buflen)
+               ret = -EIO;
+
+       if (ret >= 0) {
+               ret = 0;
+               if ((request_type & USB_ENDPOINT_DIR_MASK) == USB_DIR_IN)
+                       memcpy(pbBuffer, fc_usb->data, buflen);
+       }
 
-       debug_dump(pbBuffer, len, deb_v8);
-       return len == buflen ? 0 : -EIO;
+       mutex_unlock(&fc_usb->data_mutex);
+
+       debug_dump(pbBuffer, ret, deb_v8);
+       return ret;
 }
 
 #define bytes_left_to_read_on_page(paddr,buflen) \
@@ -196,29 +227,6 @@ static int flexcop_usb_get_mac_addr(struct flexcop_device *fc, int extended)
                fc->dvb_adapter.proposed_mac, 6);
 }
 
-#if 0
-static int flexcop_usb_utility_req(struct flexcop_usb *fc_usb, int set,
-               flexcop_usb_utility_function_t func, u8 extra, u16 wIndex,
-               u16 buflen, u8 *pvBuffer)
-{
-       u16 wValue;
-       u8 request_type = (set ? USB_DIR_OUT : USB_DIR_IN) | USB_TYPE_VENDOR;
-       int nWaitTime = 2,
-           pipe = set ? B2C2_USB_CTRL_PIPE_OUT : B2C2_USB_CTRL_PIPE_IN, len;
-       wValue = (func << 8) | extra;
-
-       len = usb_control_msg(fc_usb->udev,pipe,
-                       B2C2_USB_UTILITY,
-                       request_type,
-                       wValue,
-                       wIndex,
-                       pvBuffer,
-                       buflen,
-                       nWaitTime * HZ);
-       return len == buflen ? 0 : -EIO;
-}
-#endif
-
 /* usb i2c stuff */
 static int flexcop_usb_i2c_req(struct flexcop_i2c_adapter *i2c,
                flexcop_usb_request_t req, flexcop_usb_i2c_function_t func,
@@ -226,9 +234,14 @@ static int flexcop_usb_i2c_req(struct flexcop_i2c_adapter *i2c,
 {
        struct flexcop_usb *fc_usb = i2c->fc->bus_specific;
        u16 wValue, wIndex;
-       int nWaitTime,pipe,len;
+       int nWaitTime, pipe, ret;
        u8 request_type = USB_TYPE_VENDOR;
 
+       if (buflen > sizeof(fc_usb->data)) {
+               err("Buffer size bigger than max URB control message\n");
+               return -EIO;
+       }
+
        switch (func) {
        case USB_FUNC_I2C_WRITE:
        case USB_FUNC_I2C_MULTIWRITE:
@@ -257,15 +270,32 @@ static int flexcop_usb_i2c_req(struct flexcop_i2c_adapter *i2c,
                        wValue & 0xff, wValue >> 8,
                        wIndex & 0xff, wIndex >> 8);
 
-       len = usb_control_msg(fc_usb->udev,pipe,
+       mutex_lock(&fc_usb->data_mutex);
+
+       if ((request_type & USB_ENDPOINT_DIR_MASK) == USB_DIR_OUT)
+               memcpy(fc_usb->data, buf, buflen);
+
+       ret = usb_control_msg(fc_usb->udev, pipe,
                        req,
                        request_type,
                        wValue,
                        wIndex,
-                       buf,
+                       fc_usb->data,
                        buflen,
                        nWaitTime * HZ);
-       return len == buflen ? 0 : -EREMOTEIO;
+
+       if (ret != buflen)
+               ret = -EIO;
+
+       if (ret >= 0) {
+               ret = 0;
+               if ((request_type & USB_ENDPOINT_DIR_MASK) == USB_DIR_IN)
+                       memcpy(buf, fc_usb->data, buflen);
+       }
+
+       mutex_unlock(&fc_usb->data_mutex);
+
+       return 0;
 }
 
 /* actual bus specific access functions,
@@ -516,6 +546,7 @@ static int flexcop_usb_probe(struct usb_interface *intf,
        /* general flexcop init */
        fc_usb = fc->bus_specific;
        fc_usb->fc_dev = fc;
+       mutex_init(&fc_usb->data_mutex);
 
        fc->read_ibi_reg  = flexcop_usb_read_ibi_reg;
        fc->write_ibi_reg = flexcop_usb_write_ibi_reg;
index 92529a9c4475b71ca5381670ceb16d5ccfcd079c..25ad43166e78c759226c603e66fa6e6b81a361f8 100644 (file)
@@ -29,6 +29,10 @@ struct flexcop_usb {
 
        u8 tmp_buffer[1023+190];
        int tmp_buffer_length;
+
+       /* for URB control messages */
+       u8 data[80];
+       struct mutex data_mutex;
 };
 
 #if 0
index 13620cdf05996fd3d549a5668043db0b12b1d381..e9100a23583104d3f6fcdcb4f0dfa9535877704a 100644 (file)
@@ -545,18 +545,30 @@ static void free_sbufs(struct camera_data *cam)
 static int write_packet(struct usb_device *udev,
                        u8 request, u8 * registers, u16 start, size_t size)
 {
+       unsigned char *buf;
+       int ret;
+
        if (!registers || size <= 0)
                return -EINVAL;
 
-       return usb_control_msg(udev,
+       buf = kmalloc(size, GFP_KERNEL);
+       if (!buf)
+               return -ENOMEM;
+
+       memcpy(buf, registers, size);
+
+       ret = usb_control_msg(udev,
                               usb_sndctrlpipe(udev, 0),
                               request,
                               USB_TYPE_VENDOR | USB_RECIP_DEVICE,
                               start,   /* value */
                               0,       /* index */
-                              registers,       /* buffer */
+                              buf,     /* buffer */
                               size,
                               HZ);
+
+       kfree(buf);
+       return ret;
 }
 
 /****************************************************************************
@@ -567,18 +579,32 @@ static int write_packet(struct usb_device *udev,
 static int read_packet(struct usb_device *udev,
                       u8 request, u8 * registers, u16 start, size_t size)
 {
+       unsigned char *buf;
+       int ret;
+
        if (!registers || size <= 0)
                return -EINVAL;
 
-       return usb_control_msg(udev,
+       buf = kmalloc(size, GFP_KERNEL);
+       if (!buf)
+               return -ENOMEM;
+
+       ret = usb_control_msg(udev,
                               usb_rcvctrlpipe(udev, 0),
                               request,
                               USB_DIR_IN|USB_TYPE_VENDOR|USB_RECIP_DEVICE,
                               start,   /* value */
                               0,       /* index */
-                              registers,       /* buffer */
+                              buf,     /* buffer */
                               size,
                               HZ);
+
+       if (ret >= 0)
+               memcpy(registers, buf, size);
+
+       kfree(buf);
+
+       return ret;
 }
 
 /******************************************************************************
index 2a7b5a963acfd902f832e8af5f7fbaa9f0f4a76c..3b3f32b426d192af84b874c0c329ce52bb8baec7 100644 (file)
@@ -8,7 +8,7 @@ obj-$(CONFIG_DVB_USB_VP7045) += dvb-usb-vp7045.o
 dvb-usb-vp702x-objs := vp702x.o vp702x-fe.o
 obj-$(CONFIG_DVB_USB_VP702X) += dvb-usb-vp702x.o
 
-dvb-usb-gp8psk-objs := gp8psk.o gp8psk-fe.o
+dvb-usb-gp8psk-objs := gp8psk.o
 obj-$(CONFIG_DVB_USB_GP8PSK) += dvb-usb-gp8psk.o
 
 dvb-usb-dtt200u-objs := dtt200u.o dtt200u-fe.o
index efa782ed6e2d833630f7984646a7ddd2a6a90bdb..7853261906b1afd3334e8e67e74a3f4a5311a13e 100644 (file)
@@ -52,17 +52,15 @@ u8 regmask[8] = { 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff };
 struct af9005_device_state {
        u8 sequence;
        int led_state;
+       unsigned char data[256];
 };
 
 static int af9005_generic_read_write(struct dvb_usb_device *d, u16 reg,
                              int readwrite, int type, u8 * values, int len)
 {
        struct af9005_device_state *st = d->priv;
-       u8 obuf[16] = { 0 };
-       u8 ibuf[17] = { 0 };
-       u8 command;
-       int i;
-       int ret;
+       u8 command, seq;
+       int i, ret;
 
        if (len < 1) {
                err("generic read/write, less than 1 byte. Makes no sense.");
@@ -73,16 +71,17 @@ static int af9005_generic_read_write(struct dvb_usb_device *d, u16 reg,
                return -EINVAL;
        }
 
-       obuf[0] = 14;           /* rest of buffer length low */
-       obuf[1] = 0;            /* rest of buffer length high */
+       mutex_lock(&d->data_mutex);
+       st->data[0] = 14;               /* rest of buffer length low */
+       st->data[1] = 0;                /* rest of buffer length high */
 
-       obuf[2] = AF9005_REGISTER_RW;   /* register operation */
-       obuf[3] = 12;           /* rest of buffer length */
+       st->data[2] = AF9005_REGISTER_RW;       /* register operation */
+       st->data[3] = 12;               /* rest of buffer length */
 
-       obuf[4] = st->sequence++;       /* sequence number */
+       st->data[4] = seq = st->sequence++;     /* sequence number */
 
-       obuf[5] = (u8) (reg >> 8);      /* register address */
-       obuf[6] = (u8) (reg & 0xff);
+       st->data[5] = (u8) (reg >> 8);  /* register address */
+       st->data[6] = (u8) (reg & 0xff);
 
        if (type == AF9005_OFDM_REG) {
                command = AF9005_CMD_OFDM_REG;
@@ -96,51 +95,52 @@ static int af9005_generic_read_write(struct dvb_usb_device *d, u16 reg,
        command |= readwrite;
        if (readwrite == AF9005_CMD_WRITE)
                for (i = 0; i < len; i++)
-                       obuf[8 + i] = values[i];
+                       st->data[8 + i] = values[i];
        else if (type == AF9005_TUNER_REG)
                /* read command for tuner, the first byte contains the i2c address */
-               obuf[8] = values[0];
-       obuf[7] = command;
+               st->data[8] = values[0];
+       st->data[7] = command;
 
-       ret = dvb_usb_generic_rw(d, obuf, 16, ibuf, 17, 0);
+       ret = dvb_usb_generic_rw(d, st->data, 16, st->data, 17, 0);
        if (ret)
-               return ret;
+               goto ret;
 
        /* sanity check */
-       if (ibuf[2] != AF9005_REGISTER_RW_ACK) {
+       if (st->data[2] != AF9005_REGISTER_RW_ACK) {
                err("generic read/write, wrong reply code.");
-               return -EIO;
+               ret = -EIO;
+               goto ret;
        }
-       if (ibuf[3] != 0x0d) {
+       if (st->data[3] != 0x0d) {
                err("generic read/write, wrong length in reply.");
-               return -EIO;
+               ret = -EIO;
+               goto ret;
        }
-       if (ibuf[4] != obuf[4]) {
+       if (st->data[4] != seq) {
                err("generic read/write, wrong sequence in reply.");
-               return -EIO;
+               ret = -EIO;
+               goto ret;
        }
        /*
-          Windows driver doesn't check these fields, in fact sometimes
-          the register in the reply is different that what has been sent
-
-          if (ibuf[5] != obuf[5] || ibuf[6] != obuf[6]) {
-          err("generic read/write, wrong register in reply.");
-          return -EIO;
-          }
-          if (ibuf[7] != command) {
-          err("generic read/write wrong command in reply.");
-          return -EIO;
-          }
+        * In thesis, both input and output buffers should have
+        * identical values for st->data[5] to st->data[8].
+        * However, windows driver doesn't check these fields, in fact
+        * sometimes the register in the reply is different that what
+        * has been sent
         */
-       if (ibuf[16] != 0x01) {
+       if (st->data[16] != 0x01) {
                err("generic read/write wrong status code in reply.");
-               return -EIO;
+               ret = -EIO;
+               goto ret;
        }
+
        if (readwrite == AF9005_CMD_READ)
                for (i = 0; i < len; i++)
-                       values[i] = ibuf[8 + i];
+                       values[i] = st->data[8 + i];
 
-       return 0;
+ret:
+       mutex_unlock(&d->data_mutex);
+       return ret;
 
 }
 
@@ -464,8 +464,7 @@ int af9005_send_command(struct dvb_usb_device *d, u8 command, u8 * wbuf,
        struct af9005_device_state *st = d->priv;
 
        int ret, i, packet_len;
-       u8 buf[64];
-       u8 ibuf[64];
+       u8 seq;
 
        if (wlen < 0) {
                err("send command, wlen less than 0 bytes. Makes no sense.");
@@ -480,94 +479,97 @@ int af9005_send_command(struct dvb_usb_device *d, u8 command, u8 * wbuf,
                return -EINVAL;
        }
        packet_len = wlen + 5;
-       buf[0] = (u8) (packet_len & 0xff);
-       buf[1] = (u8) ((packet_len & 0xff00) >> 8);
-
-       buf[2] = 0x26;          /* packet type */
-       buf[3] = wlen + 3;
-       buf[4] = st->sequence++;
-       buf[5] = command;
-       buf[6] = wlen;
+
+       mutex_lock(&d->data_mutex);
+
+       st->data[0] = (u8) (packet_len & 0xff);
+       st->data[1] = (u8) ((packet_len & 0xff00) >> 8);
+
+       st->data[2] = 0x26;             /* packet type */
+       st->data[3] = wlen + 3;
+       st->data[4] = seq = st->sequence++;
+       st->data[5] = command;
+       st->data[6] = wlen;
        for (i = 0; i < wlen; i++)
-               buf[7 + i] = wbuf[i];
-       ret = dvb_usb_generic_rw(d, buf, wlen + 7, ibuf, rlen + 7, 0);
-       if (ret)
-               return ret;
-       if (ibuf[2] != 0x27) {
+               st->data[7 + i] = wbuf[i];
+       ret = dvb_usb_generic_rw(d, st->data, wlen + 7, st->data, rlen + 7, 0);
+       if (st->data[2] != 0x27) {
                err("send command, wrong reply code.");
-               return -EIO;
-       }
-       if (ibuf[4] != buf[4]) {
+               ret = -EIO;
+       } else if (st->data[4] != seq) {
                err("send command, wrong sequence in reply.");
-               return -EIO;
-       }
-       if (ibuf[5] != 0x01) {
+               ret = -EIO;
+       } else if (st->data[5] != 0x01) {
                err("send command, wrong status code in reply.");
-               return -EIO;
-       }
-       if (ibuf[6] != rlen) {
+               ret = -EIO;
+       } else if (st->data[6] != rlen) {
                err("send command, invalid data length in reply.");
-               return -EIO;
+               ret = -EIO;
        }
-       for (i = 0; i < rlen; i++)
-               rbuf[i] = ibuf[i + 7];
-       return 0;
+       if (!ret) {
+               for (i = 0; i < rlen; i++)
+                       rbuf[i] = st->data[i + 7];
+       }
+
+       mutex_unlock(&d->data_mutex);
+       return ret;
 }
 
 int af9005_read_eeprom(struct dvb_usb_device *d, u8 address, u8 * values,
                       int len)
 {
        struct af9005_device_state *st = d->priv;
-       u8 obuf[16], ibuf[14];
+       u8 seq;
        int ret, i;
 
-       memset(obuf, 0, sizeof(obuf));
-       memset(ibuf, 0, sizeof(ibuf));
+       mutex_lock(&d->data_mutex);
 
-       obuf[0] = 14;           /* length of rest of packet low */
-       obuf[1] = 0;            /* length of rest of packer high */
+       memset(st->data, 0, sizeof(st->data));
 
-       obuf[2] = 0x2a;         /* read/write eeprom */
+       st->data[0] = 14;               /* length of rest of packet low */
+       st->data[1] = 0;                /* length of rest of packer high */
 
-       obuf[3] = 12;           /* size */
+       st->data[2] = 0x2a;             /* read/write eeprom */
 
-       obuf[4] = st->sequence++;
+       st->data[3] = 12;               /* size */
 
-       obuf[5] = 0;            /* read */
+       st->data[4] = seq = st->sequence++;
 
-       obuf[6] = len;
-       obuf[7] = address;
-       ret = dvb_usb_generic_rw(d, obuf, 16, ibuf, 14, 0);
-       if (ret)
-               return ret;
-       if (ibuf[2] != 0x2b) {
+       st->data[5] = 0;                /* read */
+
+       st->data[6] = len;
+       st->data[7] = address;
+       ret = dvb_usb_generic_rw(d, st->data, 16, st->data, 14, 0);
+       if (st->data[2] != 0x2b) {
                err("Read eeprom, invalid reply code");
-               return -EIO;
-       }
-       if (ibuf[3] != 10) {
+               ret = -EIO;
+       } else if (st->data[3] != 10) {
                err("Read eeprom, invalid reply length");
-               return -EIO;
-       }
-       if (ibuf[4] != obuf[4]) {
+               ret = -EIO;
+       } else if (st->data[4] != seq) {
                err("Read eeprom, wrong sequence in reply ");
-               return -EIO;
-       }
-       if (ibuf[5] != 1) {
+               ret = -EIO;
+       } else if (st->data[5] != 1) {
                err("Read eeprom, wrong status in reply ");
-               return -EIO;
+               ret = -EIO;
        }
-       for (i = 0; i < len; i++) {
-               values[i] = ibuf[6 + i];
+
+       if (!ret) {
+               for (i = 0; i < len; i++)
+                       values[i] = st->data[6 + i];
        }
-       return 0;
+       mutex_unlock(&d->data_mutex);
+
+       return ret;
 }
 
-static int af9005_boot_packet(struct usb_device *udev, int type, u8 * reply)
+static int af9005_boot_packet(struct usb_device *udev, int type, u8 *reply,
+                             u8 *buf, int size)
 {
-       u8 buf[FW_BULKOUT_SIZE + 2];
        u16 checksum;
        int act_len, i, ret;
-       memset(buf, 0, sizeof(buf));
+
+       memset(buf, 0, size);
        buf[0] = (u8) (FW_BULKOUT_SIZE & 0xff);
        buf[1] = (u8) ((FW_BULKOUT_SIZE >> 8) & 0xff);
        switch (type) {
@@ -720,15 +722,21 @@ static int af9005_download_firmware(struct usb_device *udev, const struct firmwa
 {
        int i, packets, ret, act_len;
 
-       u8 buf[FW_BULKOUT_SIZE + 2];
+       u8 *buf;
        u8 reply;
 
-       ret = af9005_boot_packet(udev, FW_CONFIG, &reply);
+       buf = kmalloc(FW_BULKOUT_SIZE + 2, GFP_KERNEL);
+       if (!buf)
+               return -ENOMEM;
+
+       ret = af9005_boot_packet(udev, FW_CONFIG, &reply, buf,
+                                FW_BULKOUT_SIZE + 2);
        if (ret)
-               return ret;
+               goto err;
        if (reply != 0x01) {
                err("before downloading firmware, FW_CONFIG expected 0x01, received 0x%x", reply);
-               return -EIO;
+               ret = -EIO;
+               goto err;
        }
        packets = fw->size / FW_BULKOUT_SIZE;
        buf[0] = (u8) (FW_BULKOUT_SIZE & 0xff);
@@ -743,28 +751,35 @@ static int af9005_download_firmware(struct usb_device *udev, const struct firmwa
                                   buf, FW_BULKOUT_SIZE + 2, &act_len, 1000);
                if (ret) {
                        err("firmware download failed at packet %d with code %d", i, ret);
-                       return ret;
+                       goto err;
                }
        }
-       ret = af9005_boot_packet(udev, FW_CONFIRM, &reply);
+       ret = af9005_boot_packet(udev, FW_CONFIRM, &reply,
+                                buf, FW_BULKOUT_SIZE + 2);
        if (ret)
-               return ret;
+               goto err;
        if (reply != (u8) (packets & 0xff)) {
                err("after downloading firmware, FW_CONFIRM expected 0x%x, received 0x%x", packets & 0xff, reply);
-               return -EIO;
+               ret = -EIO;
+               goto err;
        }
-       ret = af9005_boot_packet(udev, FW_BOOT, &reply);
+       ret = af9005_boot_packet(udev, FW_BOOT, &reply, buf,
+                                FW_BULKOUT_SIZE + 2);
        if (ret)
-               return ret;
-       ret = af9005_boot_packet(udev, FW_CONFIG, &reply);
+               goto err;
+       ret = af9005_boot_packet(udev, FW_CONFIG, &reply, buf,
+                                FW_BULKOUT_SIZE + 2);
        if (ret)
-               return ret;
+               goto err;
        if (reply != 0x02) {
                err("after downloading firmware, FW_CONFIG expected 0x02, received 0x%x", reply);
-               return -EIO;
+               ret = -EIO;
+               goto err;
        }
 
-       return 0;
+err:
+       kfree(buf);
+       return ret;
 
 }
 
@@ -823,53 +838,59 @@ static int af9005_rc_query(struct dvb_usb_device *d, u32 * event, int *state)
 {
        struct af9005_device_state *st = d->priv;
        int ret, len;
-
-       u8 obuf[5];
-       u8 ibuf[256];
+       u8 seq;
 
        *state = REMOTE_NO_KEY_PRESSED;
        if (rc_decode == NULL) {
                /* it shouldn't never come here */
                return 0;
        }
+
+       mutex_lock(&d->data_mutex);
+
        /* deb_info("rc_query\n"); */
-       obuf[0] = 3;            /* rest of packet length low */
-       obuf[1] = 0;            /* rest of packet lentgh high */
-       obuf[2] = 0x40;         /* read remote */
-       obuf[3] = 1;            /* rest of packet length */
-       obuf[4] = st->sequence++;       /* sequence number */
-       ret = dvb_usb_generic_rw(d, obuf, 5, ibuf, 256, 0);
+       st->data[0] = 3;                /* rest of packet length low */
+       st->data[1] = 0;                /* rest of packet lentgh high */
+       st->data[2] = 0x40;             /* read remote */
+       st->data[3] = 1;                /* rest of packet length */
+       st->data[4] = seq = st->sequence++;     /* sequence number */
+       ret = dvb_usb_generic_rw(d, st->data, 5, st->data, 256, 0);
        if (ret) {
                err("rc query failed");
-               return ret;
+               goto ret;
        }
-       if (ibuf[2] != 0x41) {
+       if (st->data[2] != 0x41) {
                err("rc query bad header.");
-               return -EIO;
-       }
-       if (ibuf[4] != obuf[4]) {
+               ret = -EIO;
+               goto ret;
+       } else if (st->data[4] != seq) {
                err("rc query bad sequence.");
-               return -EIO;
+               ret = -EIO;
+               goto ret;
        }
-       len = ibuf[5];
+       len = st->data[5];
        if (len > 246) {
                err("rc query invalid length");
-               return -EIO;
+               ret = -EIO;
+               goto ret;
        }
        if (len > 0) {
                deb_rc("rc data (%d) ", len);
-               debug_dump((ibuf + 6), len, deb_rc);
-               ret = rc_decode(d, &ibuf[6], len, event, state);
+               debug_dump((st->data + 6), len, deb_rc);
+               ret = rc_decode(d, &st->data[6], len, event, state);
                if (ret) {
                        err("rc_decode failed");
-                       return ret;
+                       goto ret;
                } else {
                        deb_rc("rc_decode state %x event %x\n", *state, *event);
                        if (*state == REMOTE_KEY_REPEAT)
                                *event = d->last_event;
                }
        }
-       return 0;
+
+ret:
+       mutex_unlock(&d->data_mutex);
+       return ret;
 }
 
 static int af9005_power_ctrl(struct dvb_usb_device *d, int onoff)
@@ -953,10 +974,16 @@ static int af9005_identify_state(struct usb_device *udev,
                                 int *cold)
 {
        int ret;
-       u8 reply;
-       ret = af9005_boot_packet(udev, FW_CONFIG, &reply);
+       u8 reply, *buf;
+
+       buf = kmalloc(FW_BULKOUT_SIZE + 2, GFP_KERNEL);
+       if (!buf)
+               return -ENOMEM;
+
+       ret = af9005_boot_packet(udev, FW_CONFIG, &reply,
+                                buf, FW_BULKOUT_SIZE + 2);
        if (ret)
-               return ret;
+               goto err;
        deb_info("result of FW_CONFIG in identify state %d\n", reply);
        if (reply == 0x01)
                *cold = 1;
@@ -965,7 +992,10 @@ static int af9005_identify_state(struct usb_device *udev,
        else
                return -EIO;
        deb_info("Identify state cold = %d\n", *cold);
-       return 0;
+
+err:
+       kfree(buf);
+       return ret;
 }
 
 static struct dvb_usb_device_properties af9005_properties;
@@ -974,7 +1004,7 @@ static int af9005_usb_probe(struct usb_interface *intf,
                            const struct usb_device_id *id)
 {
        return dvb_usb_device_init(intf, &af9005_properties,
-                                  THIS_MODULE, NULL, adapter_nr);
+                                 THIS_MODULE, NULL, adapter_nr);
 }
 
 enum af9005_usb_table_entry {
index 9fd1527494ebd65ab200c38f13a20e28330034a1..290275bc7fdee038be993754f9f103c2e22cef36 100644 (file)
@@ -41,6 +41,7 @@ DVB_DEFINE_MOD_OPT_ADAPTER_NR(adapter_nr);
 
 struct cinergyt2_state {
        u8 rc_counter;
+       unsigned char data[64];
 };
 
 /* We are missing a release hook with usb_device data */
@@ -50,38 +51,57 @@ static struct dvb_usb_device_properties cinergyt2_properties;
 
 static int cinergyt2_streaming_ctrl(struct dvb_usb_adapter *adap, int enable)
 {
-       char buf[] = { CINERGYT2_EP1_CONTROL_STREAM_TRANSFER, enable ? 1 : 0 };
-       char result[64];
-       return dvb_usb_generic_rw(adap->dev, buf, sizeof(buf), result,
-                               sizeof(result), 0);
+       struct dvb_usb_device *d = adap->dev;
+       struct cinergyt2_state *st = d->priv;
+       int ret;
+
+       mutex_lock(&d->data_mutex);
+       st->data[0] = CINERGYT2_EP1_CONTROL_STREAM_TRANSFER;
+       st->data[1] = enable ? 1 : 0;
+
+       ret = dvb_usb_generic_rw(d, st->data, 2, st->data, 64, 0);
+       mutex_unlock(&d->data_mutex);
+
+       return ret;
 }
 
 static int cinergyt2_power_ctrl(struct dvb_usb_device *d, int enable)
 {
-       char buf[] = { CINERGYT2_EP1_SLEEP_MODE, enable ? 0 : 1 };
-       char state[3];
-       return dvb_usb_generic_rw(d, buf, sizeof(buf), state, sizeof(state), 0);
+       struct cinergyt2_state *st = d->priv;
+       int ret;
+
+       mutex_lock(&d->data_mutex);
+       st->data[0] = CINERGYT2_EP1_SLEEP_MODE;
+       st->data[1] = enable ? 0 : 1;
+
+       ret = dvb_usb_generic_rw(d, st->data, 2, st->data, 3, 0);
+       mutex_unlock(&d->data_mutex);
+
+       return ret;
 }
 
 static int cinergyt2_frontend_attach(struct dvb_usb_adapter *adap)
 {
-       char query[] = { CINERGYT2_EP1_GET_FIRMWARE_VERSION };
-       char state[3];
+       struct dvb_usb_device *d = adap->dev;
+       struct cinergyt2_state *st = d->priv;
        int ret;
 
        adap->fe_adap[0].fe = cinergyt2_fe_attach(adap->dev);
 
-       ret = dvb_usb_generic_rw(adap->dev, query, sizeof(query), state,
-                               sizeof(state), 0);
+       mutex_lock(&d->data_mutex);
+       st->data[0] = CINERGYT2_EP1_GET_FIRMWARE_VERSION;
+
+       ret = dvb_usb_generic_rw(d, st->data, 1, st->data, 3, 0);
        if (ret < 0) {
                deb_rc("cinergyt2_power_ctrl() Failed to retrieve sleep "
                        "state info\n");
        }
+       mutex_unlock(&d->data_mutex);
 
        /* Copy this pointer as we are gonna need it in the release phase */
        cinergyt2_usb_device = adap->dev;
 
-       return 0;
+       return ret;
 }
 
 static struct rc_map_table rc_map_cinergyt2_table[] = {
@@ -141,13 +161,18 @@ static int repeatable_keys[] = {
 static int cinergyt2_rc_query(struct dvb_usb_device *d, u32 *event, int *state)
 {
        struct cinergyt2_state *st = d->priv;
-       u8 key[5] = {0, 0, 0, 0, 0}, cmd = CINERGYT2_EP1_GET_RC_EVENTS;
-       int i;
+       int i, ret;
 
        *state = REMOTE_NO_KEY_PRESSED;
 
-       dvb_usb_generic_rw(d, &cmd, 1, key, sizeof(key), 0);
-       if (key[4] == 0xff) {
+       mutex_lock(&d->data_mutex);
+       st->data[0] = CINERGYT2_EP1_GET_RC_EVENTS;
+
+       ret = dvb_usb_generic_rw(d, st->data, 1, st->data, 5, 0);
+       if (ret < 0)
+               goto ret;
+
+       if (st->data[4] == 0xff) {
                /* key repeat */
                st->rc_counter++;
                if (st->rc_counter > RC_REPEAT_DELAY) {
@@ -157,34 +182,36 @@ static int cinergyt2_rc_query(struct dvb_usb_device *d, u32 *event, int *state)
                                        *event = d->last_event;
                                        deb_rc("repeat key, event %x\n",
                                                   *event);
-                                       return 0;
+                                       goto ret;
                                }
                        }
                        deb_rc("repeated key (non repeatable)\n");
                }
-               return 0;
+               goto ret;
        }
 
        /* hack to pass checksum on the custom field */
-       key[2] = ~key[1];
-       dvb_usb_nec_rc_key_to_event(d, key, event, state);
-       if (key[0] != 0) {
+       st->data[2] = ~st->data[1];
+       dvb_usb_nec_rc_key_to_event(d, st->data, event, state);
+       if (st->data[0] != 0) {
                if (*event != d->last_event)
                        st->rc_counter = 0;
 
-               deb_rc("key: %*ph\n", 5, key);
+               deb_rc("key: %*ph\n", 5, st->data);
        }
-       return 0;
+
+ret:
+       mutex_unlock(&d->data_mutex);
+       return ret;
 }
 
 static int cinergyt2_usb_probe(struct usb_interface *intf,
                                const struct usb_device_id *id)
 {
        return dvb_usb_device_init(intf, &cinergyt2_properties,
-                                       THIS_MODULE, NULL, adapter_nr);
+                                  THIS_MODULE, NULL, adapter_nr);
 }
 
-
 static struct usb_device_id cinergyt2_usb_table[] = {
        { USB_DEVICE(USB_VID_TERRATEC, 0x0038) },
        { 0 }
index b3ec743a7a2e6816eafa6d8f20a128eaabead3c4..2d29b4174dba0e94977aa988e6a309704898af6d 100644 (file)
@@ -139,32 +139,42 @@ static uint16_t compute_tps(struct dtv_frontend_properties *op)
 struct cinergyt2_fe_state {
        struct dvb_frontend fe;
        struct dvb_usb_device *d;
+
+       unsigned char data[64];
+       struct mutex data_mutex;
+
+       struct dvbt_get_status_msg status;
 };
 
 static int cinergyt2_fe_read_status(struct dvb_frontend *fe,
                                    enum fe_status *status)
 {
        struct cinergyt2_fe_state *state = fe->demodulator_priv;
-       struct dvbt_get_status_msg result;
-       u8 cmd[] = { CINERGYT2_EP1_GET_TUNER_STATUS };
        int ret;
 
-       ret = dvb_usb_generic_rw(state->d, cmd, sizeof(cmd), (u8 *)&result,
-                       sizeof(result), 0);
+       mutex_lock(&state->data_mutex);
+       state->data[0] = CINERGYT2_EP1_GET_TUNER_STATUS;
+
+       ret = dvb_usb_generic_rw(state->d, state->data, 1,
+                                state->data, sizeof(state->status), 0);
+       if (!ret)
+               memcpy(&state->status, state->data, sizeof(state->status));
+       mutex_unlock(&state->data_mutex);
+
        if (ret < 0)
                return ret;
 
        *status = 0;
 
-       if (0xffff - le16_to_cpu(result.gain) > 30)
+       if (0xffff - le16_to_cpu(state->status.gain) > 30)
                *status |= FE_HAS_SIGNAL;
-       if (result.lock_bits & (1 << 6))
+       if (state->status.lock_bits & (1 << 6))
                *status |= FE_HAS_LOCK;
-       if (result.lock_bits & (1 << 5))
+       if (state->status.lock_bits & (1 << 5))
                *status |= FE_HAS_SYNC;
-       if (result.lock_bits & (1 << 4))
+       if (state->status.lock_bits & (1 << 4))
                *status |= FE_HAS_CARRIER;
-       if (result.lock_bits & (1 << 1))
+       if (state->status.lock_bits & (1 << 1))
                *status |= FE_HAS_VITERBI;
 
        if ((*status & (FE_HAS_CARRIER | FE_HAS_VITERBI | FE_HAS_SYNC)) !=
@@ -177,34 +187,16 @@ static int cinergyt2_fe_read_status(struct dvb_frontend *fe,
 static int cinergyt2_fe_read_ber(struct dvb_frontend *fe, u32 *ber)
 {
        struct cinergyt2_fe_state *state = fe->demodulator_priv;
-       struct dvbt_get_status_msg status;
-       char cmd[] = { CINERGYT2_EP1_GET_TUNER_STATUS };
-       int ret;
-
-       ret = dvb_usb_generic_rw(state->d, cmd, sizeof(cmd), (char *)&status,
-                               sizeof(status), 0);
-       if (ret < 0)
-               return ret;
 
-       *ber = le32_to_cpu(status.viterbi_error_rate);
+       *ber = le32_to_cpu(state->status.viterbi_error_rate);
        return 0;
 }
 
 static int cinergyt2_fe_read_unc_blocks(struct dvb_frontend *fe, u32 *unc)
 {
        struct cinergyt2_fe_state *state = fe->demodulator_priv;
-       struct dvbt_get_status_msg status;
-       u8 cmd[] = { CINERGYT2_EP1_GET_TUNER_STATUS };
-       int ret;
 
-       ret = dvb_usb_generic_rw(state->d, cmd, sizeof(cmd), (u8 *)&status,
-                               sizeof(status), 0);
-       if (ret < 0) {
-               err("cinergyt2_fe_read_unc_blocks() Failed! (Error=%d)\n",
-                       ret);
-               return ret;
-       }
-       *unc = le32_to_cpu(status.uncorrected_block_count);
+       *unc = le32_to_cpu(state->status.uncorrected_block_count);
        return 0;
 }
 
@@ -212,35 +204,16 @@ static int cinergyt2_fe_read_signal_strength(struct dvb_frontend *fe,
                                                u16 *strength)
 {
        struct cinergyt2_fe_state *state = fe->demodulator_priv;
-       struct dvbt_get_status_msg status;
-       char cmd[] = { CINERGYT2_EP1_GET_TUNER_STATUS };
-       int ret;
 
-       ret = dvb_usb_generic_rw(state->d, cmd, sizeof(cmd), (char *)&status,
-                               sizeof(status), 0);
-       if (ret < 0) {
-               err("cinergyt2_fe_read_signal_strength() Failed!"
-                       " (Error=%d)\n", ret);
-               return ret;
-       }
-       *strength = (0xffff - le16_to_cpu(status.gain));
+       *strength = (0xffff - le16_to_cpu(state->status.gain));
        return 0;
 }
 
 static int cinergyt2_fe_read_snr(struct dvb_frontend *fe, u16 *snr)
 {
        struct cinergyt2_fe_state *state = fe->demodulator_priv;
-       struct dvbt_get_status_msg status;
-       char cmd[] = { CINERGYT2_EP1_GET_TUNER_STATUS };
-       int ret;
 
-       ret = dvb_usb_generic_rw(state->d, cmd, sizeof(cmd), (char *)&status,
-                               sizeof(status), 0);
-       if (ret < 0) {
-               err("cinergyt2_fe_read_snr() Failed! (Error=%d)\n", ret);
-               return ret;
-       }
-       *snr = (status.snr << 8) | status.snr;
+       *snr = (state->status.snr << 8) | state->status.snr;
        return 0;
 }
 
@@ -266,34 +239,36 @@ static int cinergyt2_fe_set_frontend(struct dvb_frontend *fe)
 {
        struct dtv_frontend_properties *fep = &fe->dtv_property_cache;
        struct cinergyt2_fe_state *state = fe->demodulator_priv;
-       struct dvbt_set_parameters_msg param;
-       char result[2];
+       struct dvbt_set_parameters_msg *param;
        int err;
 
-       param.cmd = CINERGYT2_EP1_SET_TUNER_PARAMETERS;
-       param.tps = cpu_to_le16(compute_tps(fep));
-       param.freq = cpu_to_le32(fep->frequency / 1000);
-       param.flags = 0;
+       mutex_lock(&state->data_mutex);
+
+       param = (void *)state->data;
+       param->cmd = CINERGYT2_EP1_SET_TUNER_PARAMETERS;
+       param->tps = cpu_to_le16(compute_tps(fep));
+       param->freq = cpu_to_le32(fep->frequency / 1000);
+       param->flags = 0;
 
        switch (fep->bandwidth_hz) {
        default:
        case 8000000:
-               param.bandwidth = 8;
+               param->bandwidth = 8;
                break;
        case 7000000:
-               param.bandwidth = 7;
+               param->bandwidth = 7;
                break;
        case 6000000:
-               param.bandwidth = 6;
+               param->bandwidth = 6;
                break;
        }
 
-       err = dvb_usb_generic_rw(state->d,
-                       (char *)&param, sizeof(param),
-                       result, sizeof(result), 0);
+       err = dvb_usb_generic_rw(state->d, state->data, sizeof(*param),
+                                state->data, 2, 0);
        if (err < 0)
                err("cinergyt2_fe_set_frontend() Failed! err=%d\n", err);
 
+       mutex_unlock(&state->data_mutex);
        return (err < 0) ? err : 0;
 }
 
@@ -315,6 +290,7 @@ struct dvb_frontend *cinergyt2_fe_attach(struct dvb_usb_device *d)
        s->d = d;
        memcpy(&s->fe.ops, &cinergyt2_fe_ops, sizeof(struct dvb_frontend_ops));
        s->fe.demodulator_priv = s;
+       mutex_init(&s->data_mutex);
        return &s->fe;
 }
 
index 907ac01ae2979a56657e6ddc32fafecfbb9c9042..243403081fa53f2860a5b67469593b5ecffae62a 100644 (file)
@@ -45,9 +45,6 @@
 #include "si2168.h"
 #include "si2157.h"
 
-/* Max transfer size done by I2C transfer functions */
-#define MAX_XFER_SIZE  80
-
 /* debug */
 static int dvb_usb_cxusb_debug;
 module_param_named(debug, dvb_usb_cxusb_debug, int, 0644);
@@ -61,23 +58,27 @@ DVB_DEFINE_MOD_OPT_ADAPTER_NR(adapter_nr);
 static int cxusb_ctrl_msg(struct dvb_usb_device *d,
                          u8 cmd, u8 *wbuf, int wlen, u8 *rbuf, int rlen)
 {
-       int wo = (rbuf == NULL || rlen == 0); /* write-only */
-       u8 sndbuf[MAX_XFER_SIZE];
+       struct cxusb_state *st = d->priv;
+       int ret, wo;
 
-       if (1 + wlen > sizeof(sndbuf)) {
-               warn("i2c wr: len=%d is too big!\n",
-                    wlen);
+       if (1 + wlen > MAX_XFER_SIZE) {
+               warn("i2c wr: len=%d is too big!\n", wlen);
                return -EOPNOTSUPP;
        }
 
-       memset(sndbuf, 0, 1+wlen);
+       wo = (rbuf == NULL || rlen == 0); /* write-only */
 
-       sndbuf[0] = cmd;
-       memcpy(&sndbuf[1], wbuf, wlen);
+       mutex_lock(&d->data_mutex);
+       st->data[0] = cmd;
+       memcpy(&st->data[1], wbuf, wlen);
        if (wo)
-               return dvb_usb_generic_write(d, sndbuf, 1+wlen);
+               ret = dvb_usb_generic_write(d, st->data, 1 + wlen);
        else
-               return dvb_usb_generic_rw(d, sndbuf, 1+wlen, rbuf, rlen, 0);
+               ret = dvb_usb_generic_rw(d, st->data, 1 + wlen,
+                                        rbuf, rlen, 0);
+
+       mutex_unlock(&d->data_mutex);
+       return ret;
 }
 
 /* GPIO */
index 527ff7905e1590961b64b8edd3b62b859a9f903a..18acda19527a644fc31e0596e7206c70193cf1d0 100644 (file)
 #define CMD_ANALOG        0x50
 #define CMD_DIGITAL       0x51
 
+/* Max transfer size done by I2C transfer functions */
+#define MAX_XFER_SIZE  80
+
 struct cxusb_state {
        u8 gpio_write_state[3];
        struct i2c_client *i2c_client_demod;
        struct i2c_client *i2c_client_tuner;
+
+       unsigned char data[MAX_XFER_SIZE];
 };
 
 #endif
index f3196658fb700706e12b61fd8b0951d25c2f1152..47ce9d5de4c678e78d1cd6cc075661ddaf40b0f6 100644 (file)
@@ -213,7 +213,7 @@ static int dib0700_i2c_xfer_new(struct i2c_adapter *adap, struct i2c_msg *msg,
                                                 usb_rcvctrlpipe(d->udev, 0),
                                                 REQUEST_NEW_I2C_READ,
                                                 USB_TYPE_VENDOR | USB_DIR_IN,
-                                                value, index, msg[i].buf,
+                                                value, index, st->buf,
                                                 msg[i].len,
                                                 USB_CTRL_GET_TIMEOUT);
                        if (result < 0) {
@@ -221,6 +221,14 @@ static int dib0700_i2c_xfer_new(struct i2c_adapter *adap, struct i2c_msg *msg,
                                break;
                        }
 
+                       if (msg[i].len > sizeof(st->buf)) {
+                               deb_info("buffer too small to fit %d bytes\n",
+                                        msg[i].len);
+                               return -EIO;
+                       }
+
+                       memcpy(msg[i].buf, st->buf, msg[i].len);
+
                        deb_data("<<< ");
                        debug_dump(msg[i].buf, msg[i].len, deb_data);
 
@@ -238,6 +246,13 @@ static int dib0700_i2c_xfer_new(struct i2c_adapter *adap, struct i2c_msg *msg,
                        /* I2C ctrl + FE bus; */
                        st->buf[3] = ((gen_mode << 6) & 0xC0) |
                                 ((bus_mode << 4) & 0x30);
+
+                       if (msg[i].len > sizeof(st->buf) - 4) {
+                               deb_info("i2c message to big: %d\n",
+                                        msg[i].len);
+                               return -EIO;
+                       }
+
                        /* The Actual i2c payload */
                        memcpy(&st->buf[4], msg[i].buf, msg[i].len);
 
@@ -283,6 +298,11 @@ static int dib0700_i2c_xfer_legacy(struct i2c_adapter *adap,
                /* fill in the address */
                st->buf[1] = msg[i].addr << 1;
                /* fill the buffer */
+               if (msg[i].len > sizeof(st->buf) - 2) {
+                       deb_info("i2c xfer to big: %d\n",
+                               msg[i].len);
+                       return -EIO;
+               }
                memcpy(&st->buf[2], msg[i].buf, msg[i].len);
 
                /* write/read request */
@@ -292,13 +312,20 @@ static int dib0700_i2c_xfer_legacy(struct i2c_adapter *adap,
 
                        /* special thing in the current firmware: when length is zero the read-failed */
                        len = dib0700_ctrl_rd(d, st->buf, msg[i].len + 2,
-                                       msg[i+1].buf, msg[i+1].len);
+                                             st->buf, msg[i + 1].len);
                        if (len <= 0) {
                                deb_info("I2C read failed on address 0x%02x\n",
                                                msg[i].addr);
                                break;
                        }
 
+                       if (msg[i + 1].len > sizeof(st->buf)) {
+                               deb_info("i2c xfer buffer to small for %d\n",
+                                       msg[i].len);
+                               return -EIO;
+                       }
+                       memcpy(msg[i + 1].buf, st->buf, msg[i + 1].len);
+
                        msg[i+1].len = len;
 
                        i++;
@@ -677,7 +704,7 @@ static void dib0700_rc_urb_completion(struct urb *purb)
        struct dvb_usb_device *d = purb->context;
        struct dib0700_rc_response *poll_reply;
        enum rc_type protocol;
-       u32 uninitialized_var(keycode);
+       u32 keycode;
        u8 toggle;
 
        deb_info("%s()\n", __func__);
@@ -718,7 +745,8 @@ static void dib0700_rc_urb_completion(struct urb *purb)
                    poll_reply->nec.data       == 0x00 &&
                    poll_reply->nec.not_data   == 0xff) {
                        poll_reply->data_state = 2;
-                       break;
+                       rc_repeat(d->rc_dev);
+                       goto resubmit;
                }
 
                if ((poll_reply->nec.data ^ poll_reply->nec.not_data) != 0xff) {
index 0857b56e652cf96515d34b890ff65693c5936507..ef1b8ee75c577f00c61b07b80859ef063722d907 100644 (file)
@@ -508,8 +508,6 @@ static int stk7700ph_tuner_attach(struct dvb_usb_adapter *adap)
 
 #define DEFAULT_RC_INTERVAL 50
 
-static u8 rc_request[] = { REQUEST_POLL_RC, 0 };
-
 /*
  * This function is used only when firmware is < 1.20 version. Newer
  * firmwares use bulk mode, with functions implemented at dib0700_core,
@@ -517,7 +515,6 @@ static u8 rc_request[] = { REQUEST_POLL_RC, 0 };
  */
 static int dib0700_rc_query_old_firmware(struct dvb_usb_device *d)
 {
-       u8 key[4];
        enum rc_type protocol;
        u32 scancode;
        u8 toggle;
@@ -532,39 +529,43 @@ static int dib0700_rc_query_old_firmware(struct dvb_usb_device *d)
                return 0;
        }
 
-       i = dib0700_ctrl_rd(d, rc_request, 2, key, 4);
+       st->buf[0] = REQUEST_POLL_RC;
+       st->buf[1] = 0;
+
+       i = dib0700_ctrl_rd(d, st->buf, 2, st->buf, 4);
        if (i <= 0) {
                err("RC Query Failed");
-               return -1;
+               return -EIO;
        }
 
        /* losing half of KEY_0 events from Philipps rc5 remotes.. */
-       if (key[0] == 0 && key[1] == 0 && key[2] == 0 && key[3] == 0)
+       if (st->buf[0] == 0 && st->buf[1] == 0
+           && st->buf[2] == 0 && st->buf[3] == 0)
                return 0;
 
-       /* info("%d: %2X %2X %2X %2X",dvb_usb_dib0700_ir_proto,(int)key[3-2],(int)key[3-3],(int)key[3-1],(int)key[3]);  */
+       /* info("%d: %2X %2X %2X %2X",dvb_usb_dib0700_ir_proto,(int)st->buf[3 - 2],(int)st->buf[3 - 3],(int)st->buf[3 - 1],(int)st->buf[3]);  */
 
        dib0700_rc_setup(d, NULL); /* reset ir sensor data to prevent false events */
 
        switch (d->props.rc.core.protocol) {
        case RC_BIT_NEC:
                /* NEC protocol sends repeat code as 0 0 0 FF */
-               if ((key[3-2] == 0x00) && (key[3-3] == 0x00) &&
-                   (key[3] == 0xff)) {
+               if ((st->buf[3 - 2] == 0x00) && (st->buf[3 - 3] == 0x00) &&
+                   (st->buf[3] == 0xff)) {
                        rc_repeat(d->rc_dev);
                        return 0;
                }
 
                protocol = RC_TYPE_NEC;
-               scancode = RC_SCANCODE_NEC(key[3-2], key[3-3]);
+               scancode = RC_SCANCODE_NEC(st->buf[3 - 2], st->buf[3 - 3]);
                toggle = 0;
                break;
 
        default:
                /* RC-5 protocol changes toggle bit on new keypress */
                protocol = RC_TYPE_RC5;
-               scancode = RC_SCANCODE_RC5(key[3-2], key[3-3]);
-               toggle = key[3-1];
+               scancode = RC_SCANCODE_RC5(st->buf[3 - 2], st->buf[3 - 3]);
+               toggle = st->buf[3 - 1];
                break;
        }
 
index 18ed3bfbb5e2a95f5127b41d0e1f15cce01b1f07..de3ee2547479428cd1c14347db5332ea1090c60d 100644 (file)
@@ -62,72 +62,117 @@ EXPORT_SYMBOL(dibusb_pid_filter_ctrl);
 
 int dibusb_power_ctrl(struct dvb_usb_device *d, int onoff)
 {
-       u8 b[3];
+       u8 *b;
        int ret;
+
+       b = kmalloc(3, GFP_KERNEL);
+       if (!b)
+               return -ENOMEM;
+
        b[0] = DIBUSB_REQ_SET_IOCTL;
        b[1] = DIBUSB_IOCTL_CMD_POWER_MODE;
        b[2] = onoff ? DIBUSB_IOCTL_POWER_WAKEUP : DIBUSB_IOCTL_POWER_SLEEP;
-       ret = dvb_usb_generic_write(d,b,3);
+
+       ret = dvb_usb_generic_write(d, b, 3);
+
+       kfree(b);
+
        msleep(10);
+
        return ret;
 }
 EXPORT_SYMBOL(dibusb_power_ctrl);
 
 int dibusb2_0_streaming_ctrl(struct dvb_usb_adapter *adap, int onoff)
 {
-       u8 b[3] = { 0 };
        int ret;
+       u8 *b;
+
+       b = kmalloc(3, GFP_KERNEL);
+       if (!b)
+               return -ENOMEM;
 
        if ((ret = dibusb_streaming_ctrl(adap,onoff)) < 0)
-               return ret;
+               goto ret;
 
        if (onoff) {
                b[0] = DIBUSB_REQ_SET_STREAMING_MODE;
                b[1] = 0x00;
-               if ((ret = dvb_usb_generic_write(adap->dev,b,2)) < 0)
-                       return ret;
+               ret = dvb_usb_generic_write(adap->dev, b, 2);
+               if (ret  < 0)
+                       goto ret;
        }
 
        b[0] = DIBUSB_REQ_SET_IOCTL;
        b[1] = onoff ? DIBUSB_IOCTL_CMD_ENABLE_STREAM : DIBUSB_IOCTL_CMD_DISABLE_STREAM;
-       return dvb_usb_generic_write(adap->dev,b,3);
+       ret = dvb_usb_generic_write(adap->dev, b, 3);
+
+ret:
+       kfree(b);
+       return ret;
 }
 EXPORT_SYMBOL(dibusb2_0_streaming_ctrl);
 
 int dibusb2_0_power_ctrl(struct dvb_usb_device *d, int onoff)
 {
-       if (onoff) {
-               u8 b[3] = { DIBUSB_REQ_SET_IOCTL, DIBUSB_IOCTL_CMD_POWER_MODE, DIBUSB_IOCTL_POWER_WAKEUP };
-               return dvb_usb_generic_write(d,b,3);
-       } else
+       u8 *b;
+       int ret;
+
+       if (!onoff)
                return 0;
+
+       b = kmalloc(3, GFP_KERNEL);
+       if (!b)
+               return -ENOMEM;
+
+       b[0] = DIBUSB_REQ_SET_IOCTL;
+       b[1] = DIBUSB_IOCTL_CMD_POWER_MODE;
+       b[2] = DIBUSB_IOCTL_POWER_WAKEUP;
+
+       ret = dvb_usb_generic_write(d, b, 3);
+
+       kfree(b);
+
+       return ret;
 }
 EXPORT_SYMBOL(dibusb2_0_power_ctrl);
 
 static int dibusb_i2c_msg(struct dvb_usb_device *d, u8 addr,
                          u8 *wbuf, u16 wlen, u8 *rbuf, u16 rlen)
 {
-       u8 sndbuf[MAX_XFER_SIZE]; /* lead(1) devaddr,direction(1) addr(2) data(wlen) (len(2) (when reading)) */
+       u8 *sndbuf;
+       int ret, wo, len;
+
        /* write only ? */
-       int wo = (rbuf == NULL || rlen == 0),
-               len = 2 + wlen + (wo ? 0 : 2);
+       wo = (rbuf == NULL || rlen == 0);
+
+       len = 2 + wlen + (wo ? 0 : 2);
+
+       sndbuf = kmalloc(MAX_XFER_SIZE, GFP_KERNEL);
+       if (!sndbuf)
+               return -ENOMEM;
 
-       if (4 + wlen > sizeof(sndbuf)) {
+       if (4 + wlen > MAX_XFER_SIZE) {
                warn("i2c wr: len=%d is too big!\n", wlen);
-               return -EOPNOTSUPP;
+               ret = -EOPNOTSUPP;
+               goto ret;
        }
 
        sndbuf[0] = wo ? DIBUSB_REQ_I2C_WRITE : DIBUSB_REQ_I2C_READ;
        sndbuf[1] = (addr << 1) | (wo ? 0 : 1);
 
-       memcpy(&sndbuf[2],wbuf,wlen);
+       memcpy(&sndbuf[2], wbuf, wlen);
 
        if (!wo) {
-               sndbuf[wlen+2] = (rlen >> 8) & 0xff;
-               sndbuf[wlen+3] = rlen & 0xff;
+               sndbuf[wlen + 2] = (rlen >> 8) & 0xff;
+               sndbuf[wlen + 3] = rlen & 0xff;
        }
 
-       return dvb_usb_generic_rw(d,sndbuf,len,rbuf,rlen,0);
+       ret = dvb_usb_generic_rw(d, sndbuf, len, rbuf, rlen, 0);
+
+ret:
+       kfree(sndbuf);
+       return ret;
 }
 
 /*
@@ -319,11 +364,27 @@ EXPORT_SYMBOL(rc_map_dibusb_table);
 
 int dibusb_rc_query(struct dvb_usb_device *d, u32 *event, int *state)
 {
-       u8 key[5],cmd = DIBUSB_REQ_POLL_REMOTE;
-       dvb_usb_generic_rw(d,&cmd,1,key,5,0);
-       dvb_usb_nec_rc_key_to_event(d,key,event,state);
-       if (key[0] != 0)
-               deb_info("key: %*ph\n", 5, key);
-       return 0;
+       u8 *buf;
+       int ret;
+
+       buf = kmalloc(5, GFP_KERNEL);
+       if (!buf)
+               return -ENOMEM;
+
+       buf[0] = DIBUSB_REQ_POLL_REMOTE;
+
+       ret = dvb_usb_generic_rw(d, buf, 1, buf, 5, 0);
+       if (ret < 0)
+               goto ret;
+
+       dvb_usb_nec_rc_key_to_event(d, buf, event, state);
+
+       if (buf[0] != 0)
+               deb_info("key: %*ph\n", 5, buf);
+
+       kfree(buf);
+
+ret:
+       return ret;
 }
 EXPORT_SYMBOL(dibusb_rc_query);
index 3f82163d8ab89061410bdff4b68bea2e1d153c27..697be2a17adef131b68a2f34a2401b2108174b68 100644 (file)
@@ -96,6 +96,9 @@
 #define DIBUSB_IOCTL_CMD_ENABLE_STREAM 0x01
 #define DIBUSB_IOCTL_CMD_DISABLE_STREAM        0x02
 
+/* Max transfer size done by I2C transfer functions */
+#define MAX_XFER_SIZE  64
+
 struct dibusb_state {
        struct dib_fe_xfer_ops ops;
        int mt2060_present;
index 63134335c99406ca7df3d284cd3ba95d4154cae3..4284f6984dc1ffe14698b6fcdc42aad8cf69bf32 100644 (file)
@@ -28,22 +28,26 @@ DVB_DEFINE_MOD_OPT_ADAPTER_NR(adapter_nr);
 static int digitv_ctrl_msg(struct dvb_usb_device *d,
                u8 cmd, u8 vv, u8 *wbuf, int wlen, u8 *rbuf, int rlen)
 {
-       int wo = (rbuf == NULL || rlen == 0); /* write-only */
-       u8 sndbuf[7],rcvbuf[7];
-       memset(sndbuf,0,7); memset(rcvbuf,0,7);
+       struct digitv_state *st = d->priv;
+       int ret, wo;
 
-       sndbuf[0] = cmd;
-       sndbuf[1] = vv;
-       sndbuf[2] = wo ? wlen : rlen;
+       wo = (rbuf == NULL || rlen == 0); /* write-only */
+
+       memset(st->sndbuf, 0, 7);
+       memset(st->rcvbuf, 0, 7);
+
+       st->sndbuf[0] = cmd;
+       st->sndbuf[1] = vv;
+       st->sndbuf[2] = wo ? wlen : rlen;
 
        if (wo) {
-               memcpy(&sndbuf[3],wbuf,wlen);
-               dvb_usb_generic_write(d,sndbuf,7);
+               memcpy(&st->sndbuf[3], wbuf, wlen);
+               ret = dvb_usb_generic_write(d, st->sndbuf, 7);
        } else {
-               dvb_usb_generic_rw(d,sndbuf,7,rcvbuf,7,10);
-               memcpy(rbuf,&rcvbuf[3],rlen);
+               ret = dvb_usb_generic_rw(d, st->sndbuf, 7, st->rcvbuf, 7, 10);
+               memcpy(rbuf, &st->rcvbuf[3], rlen);
        }
-       return 0;
+       return ret;
 }
 
 /* I2C */
index 908c09f4966b89b8923569b35a5a2cbc3b8a6727..581e09c25491bfe54a683fbfd094a890c0351c4a 100644 (file)
@@ -5,7 +5,10 @@
 #include "dvb-usb.h"
 
 struct digitv_state {
-    int is_nxt6000;
+       int is_nxt6000;
+
+       unsigned char sndbuf[7];
+       unsigned char rcvbuf[7];
 };
 
 /* protocol (from usblogging and the SDK:
index c09332bd99cb7c5e41699e2a9d435c3072432277..f5c042baa254e0cf3be54eb759918161f82202b1 100644 (file)
@@ -18,17 +18,28 @@ struct dtt200u_fe_state {
 
        struct dtv_frontend_properties fep;
        struct dvb_frontend frontend;
+
+       unsigned char data[80];
+       struct mutex data_mutex;
 };
 
 static int dtt200u_fe_read_status(struct dvb_frontend *fe,
                                  enum fe_status *stat)
 {
        struct dtt200u_fe_state *state = fe->demodulator_priv;
-       u8 st = GET_TUNE_STATUS, b[3];
+       int ret;
+
+       mutex_lock(&state->data_mutex);
+       state->data[0] = GET_TUNE_STATUS;
 
-       dvb_usb_generic_rw(state->d,&st,1,b,3,0);
+       ret = dvb_usb_generic_rw(state->d, state->data, 1, state->data, 3, 0);
+       if (ret < 0) {
+               *stat = 0;
+               mutex_unlock(&state->data_mutex);
+               return ret;
+       }
 
-       switch (b[0]) {
+       switch (state->data[0]) {
                case 0x01:
                        *stat = FE_HAS_SIGNAL | FE_HAS_CARRIER |
                                FE_HAS_VITERBI | FE_HAS_SYNC | FE_HAS_LOCK;
@@ -41,51 +52,86 @@ static int dtt200u_fe_read_status(struct dvb_frontend *fe,
                        *stat = 0;
                        break;
        }
+       mutex_unlock(&state->data_mutex);
        return 0;
 }
 
 static int dtt200u_fe_read_ber(struct dvb_frontend* fe, u32 *ber)
 {
        struct dtt200u_fe_state *state = fe->demodulator_priv;
-       u8 bw = GET_VIT_ERR_CNT,b[3];
-       dvb_usb_generic_rw(state->d,&bw,1,b,3,0);
-       *ber = (b[0] << 16) | (b[1] << 8) | b[2];
-       return 0;
+       int ret;
+
+       mutex_lock(&state->data_mutex);
+       state->data[0] = GET_VIT_ERR_CNT;
+
+       ret = dvb_usb_generic_rw(state->d, state->data, 1, state->data, 3, 0);
+       if (ret >= 0)
+               *ber = (state->data[0] << 16) | (state->data[1] << 8) | state->data[2];
+
+       mutex_unlock(&state->data_mutex);
+       return ret;
 }
 
 static int dtt200u_fe_read_unc_blocks(struct dvb_frontend* fe, u32 *unc)
 {
        struct dtt200u_fe_state *state = fe->demodulator_priv;
-       u8 bw = GET_RS_UNCOR_BLK_CNT,b[2];
+       int ret;
 
-       dvb_usb_generic_rw(state->d,&bw,1,b,2,0);
-       *unc = (b[0] << 8) | b[1];
-       return 0;
+       mutex_lock(&state->data_mutex);
+       state->data[0] = GET_RS_UNCOR_BLK_CNT;
+
+       ret = dvb_usb_generic_rw(state->d, state->data, 1, state->data, 2, 0);
+       if (ret >= 0)
+               *unc = (state->data[0] << 8) | state->data[1];
+
+       mutex_unlock(&state->data_mutex);
+       return ret;
 }
 
 static int dtt200u_fe_read_signal_strength(struct dvb_frontend* fe, u16 *strength)
 {
        struct dtt200u_fe_state *state = fe->demodulator_priv;
-       u8 bw = GET_AGC, b;
-       dvb_usb_generic_rw(state->d,&bw,1,&b,1,0);
-       *strength = (b << 8) | b;
-       return 0;
+       int ret;
+
+       mutex_lock(&state->data_mutex);
+       state->data[0] = GET_AGC;
+
+       ret = dvb_usb_generic_rw(state->d, state->data, 1, state->data, 1, 0);
+       if (ret >= 0)
+               *strength = (state->data[0] << 8) | state->data[0];
+
+       mutex_unlock(&state->data_mutex);
+       return ret;
 }
 
 static int dtt200u_fe_read_snr(struct dvb_frontend* fe, u16 *snr)
 {
        struct dtt200u_fe_state *state = fe->demodulator_priv;
-       u8 bw = GET_SNR,br;
-       dvb_usb_generic_rw(state->d,&bw,1,&br,1,0);
-       *snr = ~((br << 8) | br);
-       return 0;
+       int ret;
+
+       mutex_lock(&state->data_mutex);
+       state->data[0] = GET_SNR;
+
+       ret = dvb_usb_generic_rw(state->d, state->data, 1, state->data, 1, 0);
+       if (ret >= 0)
+               *snr = ~((state->data[0] << 8) | state->data[0]);
+
+       mutex_unlock(&state->data_mutex);
+       return ret;
 }
 
 static int dtt200u_fe_init(struct dvb_frontend* fe)
 {
        struct dtt200u_fe_state *state = fe->demodulator_priv;
-       u8 b = SET_INIT;
-       return dvb_usb_generic_write(state->d,&b,1);
+       int ret;
+
+       mutex_lock(&state->data_mutex);
+       state->data[0] = SET_INIT;
+
+       ret = dvb_usb_generic_write(state->d, state->data, 1);
+       mutex_unlock(&state->data_mutex);
+
+       return ret;
 }
 
 static int dtt200u_fe_sleep(struct dvb_frontend* fe)
@@ -105,39 +151,40 @@ static int dtt200u_fe_set_frontend(struct dvb_frontend *fe)
 {
        struct dtv_frontend_properties *fep = &fe->dtv_property_cache;
        struct dtt200u_fe_state *state = fe->demodulator_priv;
-       int i;
-       enum fe_status st;
+       int ret;
        u16 freq = fep->frequency / 250000;
-       u8 bwbuf[2] = { SET_BANDWIDTH, 0 },freqbuf[3] = { SET_RF_FREQ, 0, 0 };
 
+       mutex_lock(&state->data_mutex);
+       state->data[0] = SET_BANDWIDTH;
        switch (fep->bandwidth_hz) {
        case 8000000:
-               bwbuf[1] = 8;
+               state->data[1] = 8;
                break;
        case 7000000:
-               bwbuf[1] = 7;
+               state->data[1] = 7;
                break;
        case 6000000:
-               bwbuf[1] = 6;
+               state->data[1] = 6;
                break;
        default:
-               return -EINVAL;
+               ret = -EINVAL;
+               goto ret;
        }
 
-       dvb_usb_generic_write(state->d,bwbuf,2);
+       ret = dvb_usb_generic_write(state->d, state->data, 2);
+       if (ret < 0)
+               goto ret;
 
-       freqbuf[1] = freq & 0xff;
-       freqbuf[2] = (freq >> 8) & 0xff;
-       dvb_usb_generic_write(state->d,freqbuf,3);
+       state->data[0] = SET_RF_FREQ;
+       state->data[1] = freq & 0xff;
+       state->data[2] = (freq >> 8) & 0xff;
+       ret = dvb_usb_generic_write(state->d, state->data, 3);
+       if (ret < 0)
+               goto ret;
 
-       for (i = 0; i < 30; i++) {
-               msleep(20);
-               dtt200u_fe_read_status(fe, &st);
-               if (st & FE_TIMEDOUT)
-                       continue;
-       }
-
-       return 0;
+ret:
+       mutex_unlock(&state->data_mutex);
+       return ret;
 }
 
 static int dtt200u_fe_get_frontend(struct dvb_frontend* fe,
@@ -169,6 +216,7 @@ struct dvb_frontend* dtt200u_fe_attach(struct dvb_usb_device *d)
        deb_info("attaching frontend dtt200u\n");
 
        state->d = d;
+       mutex_init(&state->data_mutex);
 
        memcpy(&state->frontend.ops,&dtt200u_fe_ops,sizeof(struct dvb_frontend_ops));
        state->frontend.demodulator_priv = state;
index d2a01b50af0daf7ce7ab7d17686b845574c291a1..fcbff7fb0c4e192b9409b23bc10e7e1c66222d44 100644 (file)
@@ -20,75 +20,115 @@ MODULE_PARM_DESC(debug, "set debugging level (1=info,xfer=2 (or-able))." DVB_USB
 
 DVB_DEFINE_MOD_OPT_ADAPTER_NR(adapter_nr);
 
+struct dtt200u_state {
+       unsigned char data[80];
+};
+
 static int dtt200u_power_ctrl(struct dvb_usb_device *d, int onoff)
 {
-       u8 b = SET_INIT;
+       struct dtt200u_state *st = d->priv;
+       int ret = 0;
+
+       mutex_lock(&d->data_mutex);
+
+       st->data[0] = SET_INIT;
 
        if (onoff)
-               dvb_usb_generic_write(d,&b,2);
+               ret = dvb_usb_generic_write(d, st->data, 2);
 
-       return 0;
+       mutex_unlock(&d->data_mutex);
+       return ret;
 }
 
 static int dtt200u_streaming_ctrl(struct dvb_usb_adapter *adap, int onoff)
 {
-       u8 b_streaming[2] = { SET_STREAMING, onoff };
-       u8 b_rst_pid = RESET_PID_FILTER;
+       struct dvb_usb_device *d = adap->dev;
+       struct dtt200u_state *st = d->priv;
+       int ret;
 
-       dvb_usb_generic_write(adap->dev, b_streaming, 2);
+       mutex_lock(&d->data_mutex);
+       st->data[0] = SET_STREAMING;
+       st->data[1] = onoff;
 
-       if (onoff == 0)
-               dvb_usb_generic_write(adap->dev, &b_rst_pid, 1);
-       return 0;
+       ret = dvb_usb_generic_write(adap->dev, st->data, 2);
+       if (ret < 0)
+               goto ret;
+
+       if (onoff)
+               goto ret;
+
+       st->data[0] = RESET_PID_FILTER;
+       ret = dvb_usb_generic_write(adap->dev, st->data, 1);
+
+ret:
+       mutex_unlock(&d->data_mutex);
+
+       return ret;
 }
 
 static int dtt200u_pid_filter(struct dvb_usb_adapter *adap, int index, u16 pid, int onoff)
 {
-       u8 b_pid[4];
+       struct dvb_usb_device *d = adap->dev;
+       struct dtt200u_state *st = d->priv;
+       int ret;
+
        pid = onoff ? pid : 0;
 
-       b_pid[0] = SET_PID_FILTER;
-       b_pid[1] = index;
-       b_pid[2] = pid & 0xff;
-       b_pid[3] = (pid >> 8) & 0x1f;
+       mutex_lock(&d->data_mutex);
+       st->data[0] = SET_PID_FILTER;
+       st->data[1] = index;
+       st->data[2] = pid & 0xff;
+       st->data[3] = (pid >> 8) & 0x1f;
+
+       ret = dvb_usb_generic_write(adap->dev, st->data, 4);
+       mutex_unlock(&d->data_mutex);
 
-       return dvb_usb_generic_write(adap->dev, b_pid, 4);
+       return ret;
 }
 
 static int dtt200u_rc_query(struct dvb_usb_device *d)
 {
-       u8 key[5],cmd = GET_RC_CODE;
+       struct dtt200u_state *st = d->priv;
        u32 scancode;
+       int ret;
+
+       mutex_lock(&d->data_mutex);
+       st->data[0] = GET_RC_CODE;
 
-       dvb_usb_generic_rw(d,&cmd,1,key,5,0);
-       if (key[0] == 1) {
+       ret = dvb_usb_generic_rw(d, st->data, 1, st->data, 5, 0);
+       if (ret < 0)
+               goto ret;
+
+       if (st->data[0] == 1) {
                enum rc_type proto = RC_TYPE_NEC;
 
-               scancode = key[1];
-               if ((u8) ~key[1] != key[2]) {
+               scancode = st->data[1];
+               if ((u8) ~st->data[1] != st->data[2]) {
                        /* Extended NEC */
                        scancode = scancode << 8;
-                       scancode |= key[2];
+                       scancode |= st->data[2];
                        proto = RC_TYPE_NECX;
                }
                scancode = scancode << 8;
-               scancode |= key[3];
+               scancode |= st->data[3];
 
                /* Check command checksum is ok */
-               if ((u8) ~key[3] == key[4])
+               if ((u8) ~st->data[3] == st->data[4])
                        rc_keydown(d->rc_dev, proto, scancode, 0);
                else
                        rc_keyup(d->rc_dev);
-       } else if (key[0] == 2) {
+       } else if (st->data[0] == 2) {
                rc_repeat(d->rc_dev);
        } else {
                rc_keyup(d->rc_dev);
        }
 
-       if (key[0] != 0)
-               deb_info("key: %*ph\n", 5, key);
+       if (st->data[0] != 0)
+               deb_info("st->data: %*ph\n", 5, st->data);
 
-       return 0;
+ret:
+       mutex_unlock(&d->data_mutex);
+       return ret;
 }
 
 static int dtt200u_frontend_attach(struct dvb_usb_adapter *adap)
@@ -140,6 +180,8 @@ static struct dvb_usb_device_properties dtt200u_properties = {
        .usb_ctrl = CYPRESS_FX2,
        .firmware = "dvb-usb-dtt200u-01.fw",
 
+       .size_of_priv     = sizeof(struct dtt200u_state),
+
        .num_adapters = 1,
        .adapter = {
                {
@@ -190,6 +232,8 @@ static struct dvb_usb_device_properties wt220u_properties = {
        .usb_ctrl = CYPRESS_FX2,
        .firmware = "dvb-usb-wt220u-02.fw",
 
+       .size_of_priv     = sizeof(struct dtt200u_state),
+
        .num_adapters = 1,
        .adapter = {
                {
@@ -240,6 +284,8 @@ static struct dvb_usb_device_properties wt220u_fc_properties = {
        .usb_ctrl = CYPRESS_FX2,
        .firmware = "dvb-usb-wt220u-fc03.fw",
 
+       .size_of_priv     = sizeof(struct dtt200u_state),
+
        .num_adapters = 1,
        .adapter = {
                {
@@ -290,6 +336,8 @@ static struct dvb_usb_device_properties wt220u_zl0353_properties = {
        .usb_ctrl = CYPRESS_FX2,
        .firmware = "dvb-usb-wt220u-zl0353-01.fw",
 
+       .size_of_priv     = sizeof(struct dtt200u_state),
+
        .num_adapters = 1,
        .adapter = {
                {
@@ -340,6 +388,8 @@ static struct dvb_usb_device_properties wt220u_miglia_properties = {
        .usb_ctrl = CYPRESS_FX2,
        .firmware = "dvb-usb-wt220u-miglia-01.fw",
 
+       .size_of_priv     = sizeof(struct dtt200u_state),
+
        .num_adapters = 1,
        .generic_bulk_ctrl_endpoint = 0x01,
 
index 3d11df41cac0300c00a5d635412b233d232d338f..c60fb54f445f582357ca949b3e4f6775e0822e12 100644 (file)
@@ -31,9 +31,14 @@ module_param_named(debug, dvb_usb_dtv5100_debug, int, 0644);
 MODULE_PARM_DESC(debug, "set debugging level" DVB_USB_DEBUG_STATUS);
 DVB_DEFINE_MOD_OPT_ADAPTER_NR(adapter_nr);
 
+struct dtv5100_state {
+       unsigned char data[80];
+};
+
 static int dtv5100_i2c_msg(struct dvb_usb_device *d, u8 addr,
                           u8 *wbuf, u16 wlen, u8 *rbuf, u16 rlen)
 {
+       struct dtv5100_state *st = d->priv;
        u8 request;
        u8 type;
        u16 value;
@@ -60,9 +65,10 @@ static int dtv5100_i2c_msg(struct dvb_usb_device *d, u8 addr,
        }
        index = (addr << 8) + wbuf[0];
 
+       memcpy(st->data, rbuf, rlen);
        msleep(1); /* avoid I2C errors */
        return usb_control_msg(d->udev, usb_rcvctrlpipe(d->udev, 0), request,
-                              type, value, index, rbuf, rlen,
+                              type, value, index, st->data, rlen,
                               DTV5100_USB_TIMEOUT);
 }
 
@@ -176,7 +182,7 @@ static struct dvb_usb_device_properties dtv5100_properties = {
        .caps = DVB_USB_IS_AN_I2C_ADAPTER,
        .usb_ctrl = DEVICE_SPECIFIC,
 
-       .size_of_priv = 0,
+       .size_of_priv = sizeof(struct dtv5100_state),
 
        .num_adapters = 1,
        .adapter = {{
index 3896ba9a4179670687eadb63bdd199e251dc904e..84308569e7dc12a7911304fc4c8c2abb72b49082 100644 (file)
@@ -142,6 +142,7 @@ static int dvb_usb_init(struct dvb_usb_device *d, short *adapter_nums)
 {
        int ret = 0;
 
+       mutex_init(&d->data_mutex);
        mutex_init(&d->usb_mutex);
        mutex_init(&d->i2c_mutex);
 
index 639c4678c65b96c82293f7546919768401f43dbd..107255b08b2b1f5c5454286938319d63b48208a9 100644 (file)
@@ -404,8 +404,12 @@ struct dvb_usb_adapter {
  *  Powered is in/decremented for each call to modify the state.
  * @udev: pointer to the device's struct usb_device.
  *
- * @usb_mutex: semaphore of USB control messages (reading needs two messages)
- * @i2c_mutex: semaphore for i2c-transfers
+ * @data_mutex: mutex to protect the data structure used to store URB data
+ * @usb_mutex: mutex of USB control messages (reading needs two messages).
+ *     Please notice that this mutex is used internally at the generic
+ *     URB control functions. So, drivers using dvb_usb_generic_rw() and
+ *     derivated functions should not lock it internally.
+ * @i2c_mutex: mutex for i2c-transfers
  *
  * @i2c_adap: device's i2c_adapter if it uses I2CoverUSB
  *
@@ -433,6 +437,7 @@ struct dvb_usb_device {
        int powered;
 
        /* locking */
+       struct mutex data_mutex;
        struct mutex usb_mutex;
 
        /* i2c */
index 5fb0c650926e3561684981013cc004a17975a290..2c720cb2fb00f0ddecd0eb82c6daf0f3a6731094 100644 (file)
@@ -852,7 +852,7 @@ static int su3000_power_ctrl(struct dvb_usb_device *d, int i)
        if (i && !state->initialized) {
                state->initialized = 1;
                /* reset board */
-               dvb_usb_generic_rw(d, obuf, 2, NULL, 0, 0);
+               return dvb_usb_generic_rw(d, obuf, 2, NULL, 0, 0);
        }
 
        return 0;
diff --git a/drivers/media/usb/dvb-usb/gp8psk-fe.c b/drivers/media/usb/dvb-usb/gp8psk-fe.c
deleted file mode 100644 (file)
index db6eb79..0000000
+++ /dev/null
@@ -1,372 +0,0 @@
-/* DVB USB compliant Linux driver for the
- *  - GENPIX 8pks/qpsk/DCII USB2.0 DVB-S module
- *
- * Copyright (C) 2006,2007 Alan Nisota (alannisota@gmail.com)
- * Copyright (C) 2006,2007 Genpix Electronics (genpix@genpix-electronics.com)
- *
- * Thanks to GENPIX for the sample code used to implement this module.
- *
- * This module is based off the vp7045 and vp702x modules
- *
- *     This program is free software; you can redistribute it and/or modify it
- *     under the terms of the GNU General Public License as published by the Free
- *     Software Foundation, version 2.
- *
- * see Documentation/dvb/README.dvb-usb for more information
- */
-#include "gp8psk.h"
-
-struct gp8psk_fe_state {
-       struct dvb_frontend fe;
-       struct dvb_usb_device *d;
-       u8 lock;
-       u16 snr;
-       unsigned long next_status_check;
-       unsigned long status_check_interval;
-};
-
-static int gp8psk_tuned_to_DCII(struct dvb_frontend *fe)
-{
-       struct gp8psk_fe_state *st = fe->demodulator_priv;
-       u8 status;
-       gp8psk_usb_in_op(st->d, GET_8PSK_CONFIG, 0, 0, &status, 1);
-       return status & bmDCtuned;
-}
-
-static int gp8psk_set_tuner_mode(struct dvb_frontend *fe, int mode)
-{
-       struct gp8psk_fe_state *state = fe->demodulator_priv;
-       return gp8psk_usb_out_op(state->d, SET_8PSK_CONFIG, mode, 0, NULL, 0);
-}
-
-static int gp8psk_fe_update_status(struct gp8psk_fe_state *st)
-{
-       u8 buf[6];
-       if (time_after(jiffies,st->next_status_check)) {
-               gp8psk_usb_in_op(st->d, GET_SIGNAL_LOCK, 0,0,&st->lock,1);
-               gp8psk_usb_in_op(st->d, GET_SIGNAL_STRENGTH, 0,0,buf,6);
-               st->snr = (buf[1]) << 8 | buf[0];
-               st->next_status_check = jiffies + (st->status_check_interval*HZ)/1000;
-       }
-       return 0;
-}
-
-static int gp8psk_fe_read_status(struct dvb_frontend *fe,
-                                enum fe_status *status)
-{
-       struct gp8psk_fe_state *st = fe->demodulator_priv;
-       gp8psk_fe_update_status(st);
-
-       if (st->lock)
-               *status = FE_HAS_LOCK | FE_HAS_SYNC | FE_HAS_VITERBI | FE_HAS_SIGNAL | FE_HAS_CARRIER;
-       else
-               *status = 0;
-
-       if (*status & FE_HAS_LOCK)
-               st->status_check_interval = 1000;
-       else
-               st->status_check_interval = 100;
-       return 0;
-}
-
-/* not supported by this Frontend */
-static int gp8psk_fe_read_ber(struct dvb_frontend* fe, u32 *ber)
-{
-       (void) fe;
-       *ber = 0;
-       return 0;
-}
-
-/* not supported by this Frontend */
-static int gp8psk_fe_read_unc_blocks(struct dvb_frontend* fe, u32 *unc)
-{
-       (void) fe;
-       *unc = 0;
-       return 0;
-}
-
-static int gp8psk_fe_read_snr(struct dvb_frontend* fe, u16 *snr)
-{
-       struct gp8psk_fe_state *st = fe->demodulator_priv;
-       gp8psk_fe_update_status(st);
-       /* snr is reported in dBu*256 */
-       *snr = st->snr;
-       return 0;
-}
-
-static int gp8psk_fe_read_signal_strength(struct dvb_frontend* fe, u16 *strength)
-{
-       struct gp8psk_fe_state *st = fe->demodulator_priv;
-       gp8psk_fe_update_status(st);
-       /* snr is reported in dBu*256 */
-       /* snr / 38.4 ~= 100% strength */
-       /* snr * 17 returns 100% strength as 65535 */
-       if (st->snr > 0xf00)
-               *strength = 0xffff;
-       else
-               *strength = (st->snr << 4) + st->snr; /* snr*17 */
-       return 0;
-}
-
-static int gp8psk_fe_get_tune_settings(struct dvb_frontend* fe, struct dvb_frontend_tune_settings *tune)
-{
-       tune->min_delay_ms = 800;
-       return 0;
-}
-
-static int gp8psk_fe_set_frontend(struct dvb_frontend *fe)
-{
-       struct gp8psk_fe_state *state = fe->demodulator_priv;
-       struct dtv_frontend_properties *c = &fe->dtv_property_cache;
-       u8 cmd[10];
-       u32 freq = c->frequency * 1000;
-       int gp_product_id = le16_to_cpu(state->d->udev->descriptor.idProduct);
-
-       deb_fe("%s()\n", __func__);
-
-       cmd[4] = freq         & 0xff;
-       cmd[5] = (freq >> 8)  & 0xff;
-       cmd[6] = (freq >> 16) & 0xff;
-       cmd[7] = (freq >> 24) & 0xff;
-
-       /* backwards compatibility: DVB-S + 8-PSK were used for Turbo-FEC */
-       if (c->delivery_system == SYS_DVBS && c->modulation == PSK_8)
-               c->delivery_system = SYS_TURBO;
-
-       switch (c->delivery_system) {
-       case SYS_DVBS:
-               if (c->modulation != QPSK) {
-                       deb_fe("%s: unsupported modulation selected (%d)\n",
-                               __func__, c->modulation);
-                       return -EOPNOTSUPP;
-               }
-               c->fec_inner = FEC_AUTO;
-               break;
-       case SYS_DVBS2: /* kept for backwards compatibility */
-               deb_fe("%s: DVB-S2 delivery system selected\n", __func__);
-               break;
-       case SYS_TURBO:
-               deb_fe("%s: Turbo-FEC delivery system selected\n", __func__);
-               break;
-
-       default:
-               deb_fe("%s: unsupported delivery system selected (%d)\n",
-                       __func__, c->delivery_system);
-               return -EOPNOTSUPP;
-       }
-
-       cmd[0] =  c->symbol_rate        & 0xff;
-       cmd[1] = (c->symbol_rate >>  8) & 0xff;
-       cmd[2] = (c->symbol_rate >> 16) & 0xff;
-       cmd[3] = (c->symbol_rate >> 24) & 0xff;
-       switch (c->modulation) {
-       case QPSK:
-               if (gp_product_id == USB_PID_GENPIX_8PSK_REV_1_WARM)
-                       if (gp8psk_tuned_to_DCII(fe))
-                               gp8psk_bcm4500_reload(state->d);
-               switch (c->fec_inner) {
-               case FEC_1_2:
-                       cmd[9] = 0; break;
-               case FEC_2_3:
-                       cmd[9] = 1; break;
-               case FEC_3_4:
-                       cmd[9] = 2; break;
-               case FEC_5_6:
-                       cmd[9] = 3; break;
-               case FEC_7_8:
-                       cmd[9] = 4; break;
-               case FEC_AUTO:
-                       cmd[9] = 5; break;
-               default:
-                       cmd[9] = 5; break;
-               }
-               if (c->delivery_system == SYS_TURBO)
-                       cmd[8] = ADV_MOD_TURBO_QPSK;
-               else
-                       cmd[8] = ADV_MOD_DVB_QPSK;
-               break;
-       case PSK_8: /* PSK_8 is for compatibility with DN */
-               cmd[8] = ADV_MOD_TURBO_8PSK;
-               switch (c->fec_inner) {
-               case FEC_2_3:
-                       cmd[9] = 0; break;
-               case FEC_3_4:
-                       cmd[9] = 1; break;
-               case FEC_3_5:
-                       cmd[9] = 2; break;
-               case FEC_5_6:
-                       cmd[9] = 3; break;
-               case FEC_8_9:
-                       cmd[9] = 4; break;
-               default:
-                       cmd[9] = 0; break;
-               }
-               break;
-       case QAM_16: /* QAM_16 is for compatibility with DN */
-               cmd[8] = ADV_MOD_TURBO_16QAM;
-               cmd[9] = 0;
-               break;
-       default: /* Unknown modulation */
-               deb_fe("%s: unsupported modulation selected (%d)\n",
-                       __func__, c->modulation);
-               return -EOPNOTSUPP;
-       }
-
-       if (gp_product_id == USB_PID_GENPIX_8PSK_REV_1_WARM)
-               gp8psk_set_tuner_mode(fe, 0);
-       gp8psk_usb_out_op(state->d, TUNE_8PSK, 0, 0, cmd, 10);
-
-       state->lock = 0;
-       state->next_status_check = jiffies;
-       state->status_check_interval = 200;
-
-       return 0;
-}
-
-static int gp8psk_fe_send_diseqc_msg (struct dvb_frontend* fe,
-                                   struct dvb_diseqc_master_cmd *m)
-{
-       struct gp8psk_fe_state *st = fe->demodulator_priv;
-
-       deb_fe("%s\n",__func__);
-
-       if (gp8psk_usb_out_op(st->d,SEND_DISEQC_COMMAND, m->msg[0], 0,
-                       m->msg, m->msg_len)) {
-               return -EINVAL;
-       }
-       return 0;
-}
-
-static int gp8psk_fe_send_diseqc_burst(struct dvb_frontend *fe,
-                                      enum fe_sec_mini_cmd burst)
-{
-       struct gp8psk_fe_state *st = fe->demodulator_priv;
-       u8 cmd;
-
-       deb_fe("%s\n",__func__);
-
-       /* These commands are certainly wrong */
-       cmd = (burst == SEC_MINI_A) ? 0x00 : 0x01;
-
-       if (gp8psk_usb_out_op(st->d,SEND_DISEQC_COMMAND, cmd, 0,
-                       &cmd, 0)) {
-               return -EINVAL;
-       }
-       return 0;
-}
-
-static int gp8psk_fe_set_tone(struct dvb_frontend *fe,
-                             enum fe_sec_tone_mode tone)
-{
-       struct gp8psk_fe_state* state = fe->demodulator_priv;
-
-       if (gp8psk_usb_out_op(state->d,SET_22KHZ_TONE,
-                (tone == SEC_TONE_ON), 0, NULL, 0)) {
-               return -EINVAL;
-       }
-       return 0;
-}
-
-static int gp8psk_fe_set_voltage(struct dvb_frontend *fe,
-                                enum fe_sec_voltage voltage)
-{
-       struct gp8psk_fe_state* state = fe->demodulator_priv;
-
-       if (gp8psk_usb_out_op(state->d,SET_LNB_VOLTAGE,
-                        voltage == SEC_VOLTAGE_18, 0, NULL, 0)) {
-               return -EINVAL;
-       }
-       return 0;
-}
-
-static int gp8psk_fe_enable_high_lnb_voltage(struct dvb_frontend* fe, long onoff)
-{
-       struct gp8psk_fe_state* state = fe->demodulator_priv;
-       return gp8psk_usb_out_op(state->d, USE_EXTRA_VOLT, onoff, 0,NULL,0);
-}
-
-static int gp8psk_fe_send_legacy_dish_cmd (struct dvb_frontend* fe, unsigned long sw_cmd)
-{
-       struct gp8psk_fe_state* state = fe->demodulator_priv;
-       u8 cmd = sw_cmd & 0x7f;
-
-       if (gp8psk_usb_out_op(state->d,SET_DN_SWITCH, cmd, 0,
-                       NULL, 0)) {
-               return -EINVAL;
-       }
-       if (gp8psk_usb_out_op(state->d,SET_LNB_VOLTAGE, !!(sw_cmd & 0x80),
-                       0, NULL, 0)) {
-               return -EINVAL;
-       }
-
-       return 0;
-}
-
-static void gp8psk_fe_release(struct dvb_frontend* fe)
-{
-       struct gp8psk_fe_state *state = fe->demodulator_priv;
-       kfree(state);
-}
-
-static struct dvb_frontend_ops gp8psk_fe_ops;
-
-struct dvb_frontend * gp8psk_fe_attach(struct dvb_usb_device *d)
-{
-       struct gp8psk_fe_state *s = kzalloc(sizeof(struct gp8psk_fe_state), GFP_KERNEL);
-       if (s == NULL)
-               goto error;
-
-       s->d = d;
-       memcpy(&s->fe.ops, &gp8psk_fe_ops, sizeof(struct dvb_frontend_ops));
-       s->fe.demodulator_priv = s;
-
-       goto success;
-error:
-       return NULL;
-success:
-       return &s->fe;
-}
-
-
-static struct dvb_frontend_ops gp8psk_fe_ops = {
-       .delsys = { SYS_DVBS },
-       .info = {
-               .name                   = "Genpix DVB-S",
-               .frequency_min          = 800000,
-               .frequency_max          = 2250000,
-               .frequency_stepsize     = 100,
-               .symbol_rate_min        = 1000000,
-               .symbol_rate_max        = 45000000,
-               .symbol_rate_tolerance  = 500,  /* ppm */
-               .caps = FE_CAN_INVERSION_AUTO |
-                       FE_CAN_FEC_1_2 | FE_CAN_FEC_2_3 | FE_CAN_FEC_3_4 |
-                       FE_CAN_FEC_5_6 | FE_CAN_FEC_7_8 | FE_CAN_FEC_AUTO |
-                       /*
-                        * FE_CAN_QAM_16 is for compatibility
-                        * (Myth incorrectly detects Turbo-QPSK as plain QAM-16)
-                        */
-                       FE_CAN_QPSK | FE_CAN_QAM_16 | FE_CAN_TURBO_FEC
-       },
-
-       .release = gp8psk_fe_release,
-
-       .init = NULL,
-       .sleep = NULL,
-
-       .set_frontend = gp8psk_fe_set_frontend,
-
-       .get_tune_settings = gp8psk_fe_get_tune_settings,
-
-       .read_status = gp8psk_fe_read_status,
-       .read_ber = gp8psk_fe_read_ber,
-       .read_signal_strength = gp8psk_fe_read_signal_strength,
-       .read_snr = gp8psk_fe_read_snr,
-       .read_ucblocks = gp8psk_fe_read_unc_blocks,
-
-       .diseqc_send_master_cmd = gp8psk_fe_send_diseqc_msg,
-       .diseqc_send_burst = gp8psk_fe_send_diseqc_burst,
-       .set_tone = gp8psk_fe_set_tone,
-       .set_voltage = gp8psk_fe_set_voltage,
-       .dishnetwork_send_legacy_command = gp8psk_fe_send_legacy_dish_cmd,
-       .enable_high_lnb_voltage = gp8psk_fe_enable_high_lnb_voltage
-};
index 5d0384dd45b5ed1ed2e638acdddf9fe86ea2e628..993bb7a72985f05140b311c811f335db7258b7ec 100644 (file)
@@ -15,6 +15,7 @@
  * see Documentation/dvb/README.dvb-usb for more information
  */
 #include "gp8psk.h"
+#include "gp8psk-fe.h"
 
 /* debug */
 static char bcm4500_firmware[] = "dvb-usb-gp8psk-02.fw";
@@ -24,37 +25,19 @@ MODULE_PARM_DESC(debug, "set debugging level (1=info,xfer=2,rc=4 (or-able))." DV
 
 DVB_DEFINE_MOD_OPT_ADAPTER_NR(adapter_nr);
 
-static int gp8psk_get_fw_version(struct dvb_usb_device *d, u8 *fw_vers)
-{
-       return (gp8psk_usb_in_op(d, GET_FW_VERS, 0, 0, fw_vers, 6));
-}
-
-static int gp8psk_get_fpga_version(struct dvb_usb_device *d, u8 *fpga_vers)
-{
-       return (gp8psk_usb_in_op(d, GET_FPGA_VERS, 0, 0, fpga_vers, 1));
-}
-
-static void gp8psk_info(struct dvb_usb_device *d)
-{
-       u8 fpga_vers, fw_vers[6];
-
-       if (!gp8psk_get_fw_version(d, fw_vers))
-               info("FW Version = %i.%02i.%i (0x%x)  Build %4i/%02i/%02i",
-               fw_vers[2], fw_vers[1], fw_vers[0], GP8PSK_FW_VERS(fw_vers),
-               2000 + fw_vers[5], fw_vers[4], fw_vers[3]);
-       else
-               info("failed to get FW version");
-
-       if (!gp8psk_get_fpga_version(d, &fpga_vers))
-               info("FPGA Version = %i", fpga_vers);
-       else
-               info("failed to get FPGA version");
-}
+struct gp8psk_state {
+       unsigned char data[80];
+};
 
-int gp8psk_usb_in_op(struct dvb_usb_device *d, u8 req, u16 value, u16 index, u8 *b, int blen)
+static int gp8psk_usb_in_op(struct dvb_usb_device *d, u8 req, u16 value,
+                           u16 index, u8 *b, int blen)
 {
+       struct gp8psk_state *st = d->priv;
        int ret = 0,try = 0;
 
+       if (blen > sizeof(st->data))
+               return -EIO;
+
        if ((ret = mutex_lock_interruptible(&d->usb_mutex)))
                return ret;
 
@@ -63,7 +46,7 @@ int gp8psk_usb_in_op(struct dvb_usb_device *d, u8 req, u16 value, u16 index, u8
                        usb_rcvctrlpipe(d->udev,0),
                        req,
                        USB_TYPE_VENDOR | USB_DIR_IN,
-                       value,index,b,blen,
+                       value, index, st->data, blen,
                        2000);
                deb_info("reading number %d (ret: %d)\n",try,ret);
                try++;
@@ -72,8 +55,10 @@ int gp8psk_usb_in_op(struct dvb_usb_device *d, u8 req, u16 value, u16 index, u8
        if (ret < 0 || ret != blen) {
                warn("usb in %d operation failed.", req);
                ret = -EIO;
-       } else
+       } else {
                ret = 0;
+               memcpy(b, st->data, blen);
+       }
 
        deb_xfer("in: req. %x, val: %x, ind: %x, buffer: ",req,value,index);
        debug_dump(b,blen,deb_xfer);
@@ -83,22 +68,27 @@ int gp8psk_usb_in_op(struct dvb_usb_device *d, u8 req, u16 value, u16 index, u8
        return ret;
 }
 
-int gp8psk_usb_out_op(struct dvb_usb_device *d, u8 req, u16 value,
+static int gp8psk_usb_out_op(struct dvb_usb_device *d, u8 req, u16 value,
                             u16 index, u8 *b, int blen)
 {
+       struct gp8psk_state *st = d->priv;
        int ret;
 
        deb_xfer("out: req. %x, val: %x, ind: %x, buffer: ",req,value,index);
        debug_dump(b,blen,deb_xfer);
 
+       if (blen > sizeof(st->data))
+               return -EIO;
+
        if ((ret = mutex_lock_interruptible(&d->usb_mutex)))
                return ret;
 
+       memcpy(st->data, b, blen);
        if (usb_control_msg(d->udev,
                        usb_sndctrlpipe(d->udev,0),
                        req,
                        USB_TYPE_VENDOR | USB_DIR_OUT,
-                       value,index,b,blen,
+                       value, index, st->data, blen,
                        2000) != blen) {
                warn("usb out operation failed.");
                ret = -EIO;
@@ -109,6 +99,34 @@ int gp8psk_usb_out_op(struct dvb_usb_device *d, u8 req, u16 value,
        return ret;
 }
 
+
+static int gp8psk_get_fw_version(struct dvb_usb_device *d, u8 *fw_vers)
+{
+       return gp8psk_usb_in_op(d, GET_FW_VERS, 0, 0, fw_vers, 6);
+}
+
+static int gp8psk_get_fpga_version(struct dvb_usb_device *d, u8 *fpga_vers)
+{
+       return gp8psk_usb_in_op(d, GET_FPGA_VERS, 0, 0, fpga_vers, 1);
+}
+
+static void gp8psk_info(struct dvb_usb_device *d)
+{
+       u8 fpga_vers, fw_vers[6];
+
+       if (!gp8psk_get_fw_version(d, fw_vers))
+               info("FW Version = %i.%02i.%i (0x%x)  Build %4i/%02i/%02i",
+               fw_vers[2], fw_vers[1], fw_vers[0], GP8PSK_FW_VERS(fw_vers),
+               2000 + fw_vers[5], fw_vers[4], fw_vers[3]);
+       else
+               info("failed to get FW version");
+
+       if (!gp8psk_get_fpga_version(d, &fpga_vers))
+               info("FPGA Version = %i", fpga_vers);
+       else
+               info("failed to get FPGA version");
+}
+
 static int gp8psk_load_bcm4500fw(struct dvb_usb_device *d)
 {
        int ret;
@@ -143,6 +161,11 @@ static int gp8psk_load_bcm4500fw(struct dvb_usb_device *d)
                        err("failed to load bcm4500 firmware.");
                        goto out_free;
                }
+               if (buflen > 64) {
+                       err("firmare chunk size bigger than 64 bytes.");
+                       goto out_free;
+               }
+
                memcpy(buf, ptr, buflen);
                if (dvb_usb_generic_write(d, buf, buflen)) {
                        err("failed to load bcm4500 firmware.");
@@ -206,10 +229,13 @@ static int gp8psk_power_ctrl(struct dvb_usb_device *d, int onoff)
        return 0;
 }
 
-int gp8psk_bcm4500_reload(struct dvb_usb_device *d)
+static int gp8psk_bcm4500_reload(struct dvb_usb_device *d)
 {
        u8 buf;
        int gp_product_id = le16_to_cpu(d->udev->descriptor.idProduct);
+
+       deb_xfer("reloading firmware\n");
+
        /* Turn off 8psk power */
        if (gp8psk_usb_in_op(d, BOOT_8PSK, 0, 0, &buf, 1))
                return -EINVAL;
@@ -228,9 +254,47 @@ static int gp8psk_streaming_ctrl(struct dvb_usb_adapter *adap, int onoff)
        return gp8psk_usb_out_op(adap->dev, ARM_TRANSFER, onoff, 0 , NULL, 0);
 }
 
+/* Callbacks for gp8psk-fe.c */
+
+static int gp8psk_fe_in(void *priv, u8 req, u16 value,
+                           u16 index, u8 *b, int blen)
+{
+       struct dvb_usb_device *d = priv;
+
+       return gp8psk_usb_in_op(d, req, value, index, b, blen);
+}
+
+static int gp8psk_fe_out(void *priv, u8 req, u16 value,
+                           u16 index, u8 *b, int blen)
+{
+       struct dvb_usb_device *d = priv;
+
+       return gp8psk_usb_out_op(d, req, value, index, b, blen);
+}
+
+static int gp8psk_fe_reload(void *priv)
+{
+       struct dvb_usb_device *d = priv;
+
+       return gp8psk_bcm4500_reload(d);
+}
+
+const struct gp8psk_fe_ops gp8psk_fe_ops = {
+       .in = gp8psk_fe_in,
+       .out = gp8psk_fe_out,
+       .reload = gp8psk_fe_reload,
+};
+
 static int gp8psk_frontend_attach(struct dvb_usb_adapter *adap)
 {
-       adap->fe_adap[0].fe = gp8psk_fe_attach(adap->dev);
+       struct dvb_usb_device *d = adap->dev;
+       int id = le16_to_cpu(d->udev->descriptor.idProduct);
+       int is_rev1;
+
+       is_rev1 = (id == USB_PID_GENPIX_8PSK_REV_1_WARM) ? true : false;
+
+       adap->fe_adap[0].fe = dvb_attach(gp8psk_fe_attach,
+                                        &gp8psk_fe_ops, d, is_rev1);
        return 0;
 }
 
@@ -265,6 +329,8 @@ static struct dvb_usb_device_properties gp8psk_properties = {
        .usb_ctrl = CYPRESS_FX2,
        .firmware = "dvb-usb-gp8psk-01.fw",
 
+       .size_of_priv = sizeof(struct gp8psk_state),
+
        .num_adapters = 1,
        .adapter = {
                {
index ed32b9da484364d8f49e94f4c29342149ef563da..d8975b866deeceda0e3820a7810f5cfc2b7205d6 100644 (file)
@@ -24,58 +24,6 @@ extern int dvb_usb_gp8psk_debug;
 #define deb_info(args...) dprintk(dvb_usb_gp8psk_debug,0x01,args)
 #define deb_xfer(args...) dprintk(dvb_usb_gp8psk_debug,0x02,args)
 #define deb_rc(args...)   dprintk(dvb_usb_gp8psk_debug,0x04,args)
-#define deb_fe(args...)   dprintk(dvb_usb_gp8psk_debug,0x08,args)
-
-/* Twinhan Vendor requests */
-#define TH_COMMAND_IN                     0xC0
-#define TH_COMMAND_OUT                    0xC1
-
-/* gp8psk commands */
-
-#define GET_8PSK_CONFIG                 0x80    /* in */
-#define SET_8PSK_CONFIG                 0x81
-#define I2C_WRITE                      0x83
-#define I2C_READ                       0x84
-#define ARM_TRANSFER                    0x85
-#define TUNE_8PSK                       0x86
-#define GET_SIGNAL_STRENGTH             0x87    /* in */
-#define LOAD_BCM4500                    0x88
-#define BOOT_8PSK                       0x89    /* in */
-#define START_INTERSIL                  0x8A    /* in */
-#define SET_LNB_VOLTAGE                 0x8B
-#define SET_22KHZ_TONE                  0x8C
-#define SEND_DISEQC_COMMAND             0x8D
-#define SET_DVB_MODE                    0x8E
-#define SET_DN_SWITCH                   0x8F
-#define GET_SIGNAL_LOCK                 0x90    /* in */
-#define GET_FW_VERS                    0x92
-#define GET_SERIAL_NUMBER               0x93    /* in */
-#define USE_EXTRA_VOLT                  0x94
-#define GET_FPGA_VERS                  0x95
-#define CW3K_INIT                      0x9d
-
-/* PSK_configuration bits */
-#define bm8pskStarted                   0x01
-#define bm8pskFW_Loaded                 0x02
-#define bmIntersilOn                    0x04
-#define bmDVBmode                       0x08
-#define bm22kHz                         0x10
-#define bmSEL18V                        0x20
-#define bmDCtuned                       0x40
-#define bmArmed                         0x80
-
-/* Satellite modulation modes */
-#define ADV_MOD_DVB_QPSK 0     /* DVB-S QPSK */
-#define ADV_MOD_TURBO_QPSK 1   /* Turbo QPSK */
-#define ADV_MOD_TURBO_8PSK 2   /* Turbo 8PSK (also used for Trellis 8PSK) */
-#define ADV_MOD_TURBO_16QAM 3  /* Turbo 16QAM (also used for Trellis 8PSK) */
-
-#define ADV_MOD_DCII_C_QPSK 4  /* Digicipher II Combo */
-#define ADV_MOD_DCII_I_QPSK 5  /* Digicipher II I-stream */
-#define ADV_MOD_DCII_Q_QPSK 6  /* Digicipher II Q-stream */
-#define ADV_MOD_DCII_C_OQPSK 7 /* Digicipher II offset QPSK */
-#define ADV_MOD_DSS_QPSK 8     /* DSS (DIRECTV) QPSK */
-#define ADV_MOD_DVB_BPSK 9     /* DVB-S BPSK */
 
 #define GET_USB_SPEED                     0x07
 
@@ -86,15 +34,4 @@ extern int dvb_usb_gp8psk_debug;
 #define PRODUCT_STRING_READ               0x0D
 #define FW_BCD_VERSION_READ               0x14
 
-/* firmware revision id's */
-#define GP8PSK_FW_REV1                 0x020604
-#define GP8PSK_FW_REV2                 0x020704
-#define GP8PSK_FW_VERS(_fw_vers)       ((_fw_vers)[2]<<0x10 | (_fw_vers)[1]<<0x08 | (_fw_vers)[0])
-
-extern struct dvb_frontend * gp8psk_fe_attach(struct dvb_usb_device *d);
-extern int gp8psk_usb_in_op(struct dvb_usb_device *d, u8 req, u16 value, u16 index, u8 *b, int blen);
-extern int gp8psk_usb_out_op(struct dvb_usb_device *d, u8 req, u16 value,
-                            u16 index, u8 *b, int blen);
-extern int gp8psk_bcm4500_reload(struct dvb_usb_device *d);
-
 #endif
index fc7569e2728d2201e14897cddd41c9a9504a9716..1babd334191069d30419e2c07fc25fd04e0931d4 100644 (file)
@@ -74,22 +74,31 @@ static struct rc_map_table rc_map_haupp_table[] = {
  */
 static int nova_t_rc_query(struct dvb_usb_device *d, u32 *event, int *state)
 {
-       u8 key[5],cmd[2] = { DIBUSB_REQ_POLL_REMOTE, 0x35 }, data,toggle,custom;
+       u8 *buf, data, toggle, custom;
        u16 raw;
-       int i;
+       int i, ret;
        struct dibusb_device_state *st = d->priv;
 
-       dvb_usb_generic_rw(d,cmd,2,key,5,0);
+       buf = kmalloc(5, GFP_KERNEL);
+       if (!buf)
+               return -ENOMEM;
+
+       buf[0] = DIBUSB_REQ_POLL_REMOTE;
+       buf[1] = 0x35;
+       ret = dvb_usb_generic_rw(d, buf, 2, buf, 5, 0);
+       if (ret < 0)
+               goto ret;
 
        *state = REMOTE_NO_KEY_PRESSED;
-       switch (key[0]) {
+       switch (buf[0]) {
                case DIBUSB_RC_HAUPPAUGE_KEY_PRESSED:
-                       raw = ((key[1] << 8) | key[2]) >> 3;
+                       raw = ((buf[1] << 8) | buf[2]) >> 3;
                        toggle = !!(raw & 0x800);
                        data = raw & 0x3f;
                        custom = (raw >> 6) & 0x1f;
 
-                       deb_rc("raw key code 0x%02x, 0x%02x, 0x%02x to c: %02x d: %02x toggle: %d\n",key[1],key[2],key[3],custom,data,toggle);
+                       deb_rc("raw key code 0x%02x, 0x%02x, 0x%02x to c: %02x d: %02x toggle: %d\n",
+                              buf[1], buf[2], buf[3], custom, data, toggle);
 
                        for (i = 0; i < ARRAY_SIZE(rc_map_haupp_table); i++) {
                                if (rc5_data(&rc_map_haupp_table[i]) == data &&
@@ -117,7 +126,9 @@ static int nova_t_rc_query(struct dvb_usb_device *d, u32 *event, int *state)
                        break;
        }
 
-       return 0;
+ret:
+       kfree(buf);
+       return ret;
 }
 
 static int nova_t_read_mac_address (struct dvb_usb_device *d, u8 mac[6])
index c05de1b088a4e3abf0e6de4309b43df6305d4678..07fa08be9e994a3f7d5952251b73f871fe4ecdca 100644 (file)
@@ -97,48 +97,53 @@ struct pctv452e_state {
        u8 c;      /* transaction counter, wraps around...  */
        u8 initialized; /* set to 1 if 0x15 has been sent */
        u16 last_rc_key;
+
+       unsigned char data[80];
 };
 
 static int tt3650_ci_msg(struct dvb_usb_device *d, u8 cmd, u8 *data,
                         unsigned int write_len, unsigned int read_len)
 {
        struct pctv452e_state *state = (struct pctv452e_state *)d->priv;
-       u8 buf[64];
        u8 id;
        unsigned int rlen;
        int ret;
 
-       BUG_ON(NULL == data && 0 != (write_len | read_len));
-       BUG_ON(write_len > 64 - 4);
-       BUG_ON(read_len > 64 - 4);
+       if (!data || (write_len > 64 - 4) || (read_len > 64 - 4)) {
+               err("%s: transfer data invalid", __func__);
+               return -EIO;
+       }
 
+       mutex_lock(&state->ca_mutex);
        id = state->c++;
 
-       buf[0] = SYNC_BYTE_OUT;
-       buf[1] = id;
-       buf[2] = cmd;
-       buf[3] = write_len;
+       state->data[0] = SYNC_BYTE_OUT;
+       state->data[1] = id;
+       state->data[2] = cmd;
+       state->data[3] = write_len;
 
-       memcpy(buf + 4, data, write_len);
+       memcpy(state->data + 4, data, write_len);
 
        rlen = (read_len > 0) ? 64 : 0;
-       ret = dvb_usb_generic_rw(d, buf, 4 + write_len,
-                                 buf, rlen, /* delay_ms */ 0);
+       ret = dvb_usb_generic_rw(d, state->data, 4 + write_len,
+                                 state->data, rlen, /* delay_ms */ 0);
        if (0 != ret)
                goto failed;
 
        ret = -EIO;
-       if (SYNC_BYTE_IN != buf[0] || id != buf[1])
+       if (SYNC_BYTE_IN != state->data[0] || id != state->data[1])
                goto failed;
 
-       memcpy(data, buf + 4, read_len);
+       memcpy(data, state->data + 4, read_len);
 
+       mutex_unlock(&state->ca_mutex);
        return 0;
 
 failed:
        err("CI error %d; %02X %02X %02X -> %*ph.",
-            ret, SYNC_BYTE_OUT, id, cmd, 3, buf);
+            ret, SYNC_BYTE_OUT, id, cmd, 3, state->data);
 
+       mutex_unlock(&state->ca_mutex);
        return ret;
 }
 
@@ -405,52 +410,53 @@ static int pctv452e_i2c_msg(struct dvb_usb_device *d, u8 addr,
                                u8 *rcv_buf, u8 rcv_len)
 {
        struct pctv452e_state *state = (struct pctv452e_state *)d->priv;
-       u8 buf[64];
        u8 id;
        int ret;
 
+       mutex_lock(&state->ca_mutex);
        id = state->c++;
 
        ret = -EINVAL;
        if (snd_len > 64 - 7 || rcv_len > 64 - 7)
                goto failed;
 
-       buf[0] = SYNC_BYTE_OUT;
-       buf[1] = id;
-       buf[2] = PCTV_CMD_I2C;
-       buf[3] = snd_len + 3;
-       buf[4] = addr << 1;
-       buf[5] = snd_len;
-       buf[6] = rcv_len;
+       state->data[0] = SYNC_BYTE_OUT;
+       state->data[1] = id;
+       state->data[2] = PCTV_CMD_I2C;
+       state->data[3] = snd_len + 3;
+       state->data[4] = addr << 1;
+       state->data[5] = snd_len;
+       state->data[6] = rcv_len;
 
-       memcpy(buf + 7, snd_buf, snd_len);
+       memcpy(state->data + 7, snd_buf, snd_len);
 
-       ret = dvb_usb_generic_rw(d, buf, 7 + snd_len,
-                                 buf, /* rcv_len */ 64,
+       ret = dvb_usb_generic_rw(d, state->data, 7 + snd_len,
+                                 state->data, /* rcv_len */ 64,
                                  /* delay_ms */ 0);
        if (ret < 0)
                goto failed;
 
        /* TT USB protocol error. */
        ret = -EIO;
-       if (SYNC_BYTE_IN != buf[0] || id != buf[1])
+       if (SYNC_BYTE_IN != state->data[0] || id != state->data[1])
                goto failed;
 
        /* I2C device didn't respond as expected. */
        ret = -EREMOTEIO;
-       if (buf[5] < snd_len || buf[6] < rcv_len)
+       if (state->data[5] < snd_len || state->data[6] < rcv_len)
                goto failed;
 
-       memcpy(rcv_buf, buf + 7, rcv_len);
+       memcpy(rcv_buf, state->data + 7, rcv_len);
+       mutex_unlock(&state->ca_mutex);
 
        return rcv_len;
 
 failed:
-       err("I2C error %d; %02X %02X  %02X %02X %02X -> "
-            "%02X %02X  %02X %02X %02X.",
+       err("I2C error %d; %02X %02X  %02X %02X %02X -> %*ph",
             ret, SYNC_BYTE_OUT, id, addr << 1, snd_len, rcv_len,
-            buf[0], buf[1], buf[4], buf[5], buf[6]);
+            7, state->data);
 
+       mutex_unlock(&state->ca_mutex);
        return ret;
 }
 
@@ -499,8 +505,7 @@ static u32 pctv452e_i2c_func(struct i2c_adapter *adapter)
 static int pctv452e_power_ctrl(struct dvb_usb_device *d, int i)
 {
        struct pctv452e_state *state = (struct pctv452e_state *)d->priv;
-       u8 b0[] = { 0xaa, 0, PCTV_CMD_RESET, 1, 0 };
-       u8 rx[PCTV_ANSWER_LEN];
+       u8 *rx;
        int ret;
 
        info("%s: %d\n", __func__, i);
@@ -511,6 +516,11 @@ static int pctv452e_power_ctrl(struct dvb_usb_device *d, int i)
        if (state->initialized)
                return 0;
 
+       rx = kmalloc(PCTV_ANSWER_LEN, GFP_KERNEL);
+       if (!rx)
+               return -ENOMEM;
+
+       mutex_lock(&state->ca_mutex);
        /* hmm where shoud this should go? */
        ret = usb_set_interface(d->udev, 0, ISOC_INTERFACE_ALTERNATIVE);
        if (ret != 0)
@@ -518,65 +528,75 @@ static int pctv452e_power_ctrl(struct dvb_usb_device *d, int i)
                        __func__, ret);
 
        /* this is a one-time initialization, dont know where to put */
-       b0[1] = state->c++;
+       state->data[0] = 0xaa;
+       state->data[1] = state->c++;
+       state->data[2] = PCTV_CMD_RESET;
+       state->data[3] = 1;
+       state->data[4] = 0;
        /* reset board */
-       ret = dvb_usb_generic_rw(d, b0, sizeof(b0), rx, PCTV_ANSWER_LEN, 0);
+       ret = dvb_usb_generic_rw(d, state->data, 5, rx, PCTV_ANSWER_LEN, 0);
        if (ret)
-               return ret;
+               goto ret;
 
-       b0[1] = state->c++;
-       b0[4] = 1;
+       state->data[1] = state->c++;
+       state->data[4] = 1;
        /* reset board (again?) */
-       ret = dvb_usb_generic_rw(d, b0, sizeof(b0), rx, PCTV_ANSWER_LEN, 0);
+       ret = dvb_usb_generic_rw(d, state->data, 5, rx, PCTV_ANSWER_LEN, 0);
        if (ret)
-               return ret;
+               goto ret;
 
        state->initialized = 1;
 
-       return 0;
+ret:
+       mutex_unlock(&state->ca_mutex);
+       kfree(rx);
+       return ret;
 }
 
 static int pctv452e_rc_query(struct dvb_usb_device *d)
 {
        struct pctv452e_state *state = (struct pctv452e_state *)d->priv;
-       u8 b[CMD_BUFFER_SIZE];
-       u8 rx[PCTV_ANSWER_LEN];
        int ret, i;
-       u8 id = state->c++;
+       u8 id;
+
+       mutex_lock(&state->ca_mutex);
+       id = state->c++;
 
        /* prepare command header  */
-       b[0] = SYNC_BYTE_OUT;
-       b[1] = id;
-       b[2] = PCTV_CMD_IR;
-       b[3] = 0;
+       state->data[0] = SYNC_BYTE_OUT;
+       state->data[1] = id;
+       state->data[2] = PCTV_CMD_IR;
+       state->data[3] = 0;
 
        /* send ir request */
-       ret = dvb_usb_generic_rw(d, b, 4, rx, PCTV_ANSWER_LEN, 0);
+       ret = dvb_usb_generic_rw(d, state->data, 4,
+                                state->data, PCTV_ANSWER_LEN, 0);
        if (ret != 0)
-               return ret;
+               goto ret;
 
        if (debug > 3) {
-               info("%s: read: %2d: %*ph: ", __func__, ret, 3, rx);
-               for (i = 0; (i < rx[3]) && ((i+3) < PCTV_ANSWER_LEN); i++)
-                       info(" %02x", rx[i+3]);
+               info("%s: read: %2d: %*ph: ", __func__, ret, 3, state->data);
+               for (i = 0; (i < state->data[3]) && ((i + 3) < PCTV_ANSWER_LEN); i++)
+                       info(" %02x", state->data[i + 3]);
 
                info("\n");
        }
 
-       if ((rx[3] == 9) &&  (rx[12] & 0x01)) {
+       if ((state->data[3] == 9) &&  (state->data[12] & 0x01)) {
                /* got a "press" event */
-               state->last_rc_key = RC_SCANCODE_RC5(rx[7], rx[6]);
+               state->last_rc_key = RC_SCANCODE_RC5(state->data[7], state->data[6]);
                if (debug > 2)
                        info("%s: cmd=0x%02x sys=0x%02x\n",
-                               __func__, rx[6], rx[7]);
+                               __func__, state->data[6], state->data[7]);
 
                rc_keydown(d->rc_dev, RC_TYPE_RC5, state->last_rc_key, 0);
        } else if (state->last_rc_key) {
                rc_keyup(d->rc_dev);
                state->last_rc_key = 0;
        }
-
-       return 0;
+ret:
+       mutex_unlock(&state->ca_mutex);
+       return ret;
 }
 
 static int pctv452e_read_mac_address(struct dvb_usb_device *d, u8 mac[6])
index d9f3262bf0712642b6d15edd216a26ee48c7ecf9..4706628a3ed5ea5345e30a9dc59cad7fb385286d 100644 (file)
@@ -89,9 +89,13 @@ struct technisat_usb2_state {
 static int technisat_usb2_i2c_access(struct usb_device *udev,
                u8 device_addr, u8 *tx, u8 txlen, u8 *rx, u8 rxlen)
 {
-       u8 b[64];
+       u8 *b;
        int ret, actual_length;
 
+       b = kmalloc(64, GFP_KERNEL);
+       if (!b)
+               return -ENOMEM;
+
        deb_i2c("i2c-access: %02x, tx: ", device_addr);
        debug_dump(tx, txlen, deb_i2c);
        deb_i2c(" ");
@@ -123,7 +127,7 @@ static int technisat_usb2_i2c_access(struct usb_device *udev,
 
        if (ret < 0) {
                err("i2c-error: out failed %02x = %d", device_addr, ret);
-               return -ENODEV;
+               goto err;
        }
 
        ret = usb_bulk_msg(udev,
@@ -131,7 +135,7 @@ static int technisat_usb2_i2c_access(struct usb_device *udev,
                        b, 64, &actual_length, 1000);
        if (ret < 0) {
                err("i2c-error: in failed %02x = %d", device_addr, ret);
-               return -ENODEV;
+               goto err;
        }
 
        if (b[0] != I2C_STATUS_OK) {
@@ -140,7 +144,7 @@ static int technisat_usb2_i2c_access(struct usb_device *udev,
                if (!(b[0] == I2C_STATUS_NAK &&
                                device_addr == 0x60
                                /* && device_is_technisat_usb2 */))
-                       return -ENODEV;
+                       goto err;
        }
 
        deb_i2c("status: %d, ", b[0]);
@@ -154,7 +158,9 @@ static int technisat_usb2_i2c_access(struct usb_device *udev,
 
        deb_i2c("\n");
 
-       return 0;
+err:
+       kfree(b);
+       return ret;
 }
 
 static int technisat_usb2_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg *msg,
index c3a0e87066ebbbd78cda420a48f7dc76ab511107..f7bb78c1873c915d9db9574ea024cc77a355c413 100644 (file)
@@ -1901,19 +1901,30 @@ static long s2255_vendor_req(struct s2255_dev *dev, unsigned char Request,
                             s32 TransferBufferLength, int bOut)
 {
        int r;
+       unsigned char *buf;
+
+       buf = kmalloc(TransferBufferLength, GFP_KERNEL);
+       if (!buf)
+               return -ENOMEM;
+
        if (!bOut) {
                r = usb_control_msg(dev->udev, usb_rcvctrlpipe(dev->udev, 0),
                                    Request,
                                    USB_TYPE_VENDOR | USB_RECIP_DEVICE |
                                    USB_DIR_IN,
-                                   Value, Index, TransferBuffer,
+                                   Value, Index, buf,
                                    TransferBufferLength, HZ * 5);
+
+               if (r >= 0)
+                       memcpy(TransferBuffer, buf, TransferBufferLength);
        } else {
+               memcpy(buf, TransferBuffer, TransferBufferLength);
                r = usb_control_msg(dev->udev, usb_sndctrlpipe(dev->udev, 0),
                                    Request, USB_TYPE_VENDOR | USB_RECIP_DEVICE,
-                                   Value, Index, TransferBuffer,
+                                   Value, Index, buf,
                                    TransferBufferLength, HZ * 5);
        }
+       kfree(buf);
        return r;
 }
 
index db200c9d796d3683d23b9279db73d6eb625420da..22a9aae16291b31adb5eeae0af4cd14c747c0ec5 100644 (file)
@@ -147,20 +147,26 @@ int stk_camera_write_reg(struct stk_camera *dev, u16 index, u8 value)
 int stk_camera_read_reg(struct stk_camera *dev, u16 index, int *value)
 {
        struct usb_device *udev = dev->udev;
+       unsigned char *buf;
        int ret;
 
+       buf = kmalloc(sizeof(u8), GFP_KERNEL);
+       if (!buf)
+               return -ENOMEM;
+
        ret = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0),
                        0x00,
                        USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE,
                        0x00,
                        index,
-                       (u8 *) value,
+                       buf,
                        sizeof(u8),
                        500);
-       if (ret < 0)
-               return ret;
-       else
-               return 0;
+       if (ret >= 0)
+               memcpy(value, buf, sizeof(u8));
+
+       kfree(buf);
+       return ret;
 }
 
 static int stk_start_stream(struct stk_camera *dev)
index 29b3436d0910fbd0c1a591b0aea408dc34aca74a..367523a3c774c29b4e015419d06a9c4c8275eeaa 100644 (file)
@@ -27,7 +27,7 @@ config VIDEO_FIXED_MINOR_RANGES
 
 config VIDEO_PCI_SKELETON
        tristate "Skeleton PCI V4L2 driver"
-       depends on PCI && BUILD_DOCSRC
+       depends on PCI
        depends on VIDEO_V4L2 && VIDEOBUF2_CORE
        depends on VIDEOBUF2_MEMOPS && VIDEOBUF2_DMA_CONTIG
        ---help---
index f300f060b3f34cdfeb8b1e12b90d9ab345c6b629..1db0af6c7f94810bf9270b0ebb5961de6b1802a5 100644 (file)
@@ -156,6 +156,7 @@ static int videobuf_dma_init_user_locked(struct videobuf_dmabuf *dma,
 {
        unsigned long first, last;
        int err, rw = 0;
+       unsigned int flags = FOLL_FORCE;
 
        dma->direction = direction;
        switch (dma->direction) {
@@ -178,12 +179,14 @@ static int videobuf_dma_init_user_locked(struct videobuf_dmabuf *dma,
        if (NULL == dma->pages)
                return -ENOMEM;
 
+       if (rw == READ)
+               flags |= FOLL_WRITE;
+
        dprintk(1, "init user [0x%lx+0x%lx => %d pages]\n",
                data, size, dma->nr_pages);
 
        err = get_user_pages(data & PAGE_MASK, dma->nr_pages,
-                            rw == READ, 1, /* force */
-                            dma->pages, NULL);
+                            flags, dma->pages, NULL);
 
        if (err != dma->nr_pages) {
                dma->nr_pages = (err >= 0) ? err : 0;
index 3c3b517f1d1cacb5a7131263cd80c112bd22d619..1cd322e939c70520e9e915575590bf516c0e46ee 100644 (file)
@@ -42,6 +42,10 @@ struct frame_vector *vb2_create_framevec(unsigned long start,
        unsigned long first, last;
        unsigned long nr;
        struct frame_vector *vec;
+       unsigned int flags = FOLL_FORCE;
+
+       if (write)
+               flags |= FOLL_WRITE;
 
        first = start >> PAGE_SHIFT;
        last = (start + length - 1) >> PAGE_SHIFT;
@@ -49,7 +53,7 @@ struct frame_vector *vb2_create_framevec(unsigned long start,
        vec = frame_vector_create(nr);
        if (!vec)
                return ERR_PTR(-ENOMEM);
-       ret = get_vaddr_frames(start & PAGE_MASK, nr, write, true, vec);
+       ret = get_vaddr_frames(start & PAGE_MASK, nr, flags, vec);
        if (ret < 0)
                goto out_destroy;
        /* We accept only complete set of PFNs */
index d34bc35303851634d143f924a790a47a7f006e8c..2e3cf012ef485f863dd2de36d26dae8caae7156b 100644 (file)
@@ -524,6 +524,7 @@ static void rtsx_usb_ms_handle_req(struct work_struct *work)
        int rc;
 
        if (!host->req) {
+               pm_runtime_get_sync(ms_dev(host));
                do {
                        rc = memstick_next_req(msh, &host->req);
                        dev_dbg(ms_dev(host), "next req %d\n", rc);
@@ -544,6 +545,7 @@ static void rtsx_usb_ms_handle_req(struct work_struct *work)
                                                host->req->error);
                        }
                } while (!rc);
+               pm_runtime_put(ms_dev(host));
        }
 
 }
@@ -570,6 +572,7 @@ static int rtsx_usb_ms_set_param(struct memstick_host *msh,
        dev_dbg(ms_dev(host), "%s: param = %d, value = %d\n",
                        __func__, param, value);
 
+       pm_runtime_get_sync(ms_dev(host));
        mutex_lock(&ucr->dev_mutex);
 
        err = rtsx_usb_card_exclusive_check(ucr, RTSX_USB_MS_CARD);
@@ -635,6 +638,7 @@ static int rtsx_usb_ms_set_param(struct memstick_host *msh,
        }
 out:
        mutex_unlock(&ucr->dev_mutex);
+       pm_runtime_put(ms_dev(host));
 
        /* power-on delay */
        if (param == MEMSTICK_POWER && value == MEMSTICK_POWER_ON)
@@ -681,6 +685,7 @@ static int rtsx_usb_detect_ms_card(void *__host)
        int err;
 
        for (;;) {
+               pm_runtime_get_sync(ms_dev(host));
                mutex_lock(&ucr->dev_mutex);
 
                /* Check pending MS card changes */
@@ -703,6 +708,7 @@ static int rtsx_usb_detect_ms_card(void *__host)
                }
 
 poll_again:
+               pm_runtime_put(ms_dev(host));
                if (host->eject)
                        break;
 
index 3228fd182a99ecfd32391f90d41d61be46b0251c..9ff243970e93ef1c025df40ca3e4474f59c371f5 100644 (file)
@@ -123,19 +123,6 @@ static const struct intel_lpss_platform_info apl_i2c_info = {
        .properties = apl_i2c_properties,
 };
 
-static const struct intel_lpss_platform_info kbl_info = {
-       .clk_rate = 120000000,
-};
-
-static const struct intel_lpss_platform_info kbl_uart_info = {
-       .clk_rate = 120000000,
-       .clk_con_id = "baudclk",
-};
-
-static const struct intel_lpss_platform_info kbl_i2c_info = {
-       .clk_rate = 133000000,
-};
-
 static const struct pci_device_id intel_lpss_pci_ids[] = {
        /* BXT A-Step */
        { PCI_VDEVICE(INTEL, 0x0aac), (kernel_ulong_t)&bxt_i2c_info },
@@ -207,15 +194,15 @@ static const struct pci_device_id intel_lpss_pci_ids[] = {
        { PCI_VDEVICE(INTEL, 0xa161), (kernel_ulong_t)&spt_i2c_info },
        { PCI_VDEVICE(INTEL, 0xa166), (kernel_ulong_t)&spt_uart_info },
        /* KBL-H */
-       { PCI_VDEVICE(INTEL, 0xa2a7), (kernel_ulong_t)&kbl_uart_info },
-       { PCI_VDEVICE(INTEL, 0xa2a8), (kernel_ulong_t)&kbl_uart_info },
-       { PCI_VDEVICE(INTEL, 0xa2a9), (kernel_ulong_t)&kbl_info },
-       { PCI_VDEVICE(INTEL, 0xa2aa), (kernel_ulong_t)&kbl_info },
-       { PCI_VDEVICE(INTEL, 0xa2e0), (kernel_ulong_t)&kbl_i2c_info },
-       { PCI_VDEVICE(INTEL, 0xa2e1), (kernel_ulong_t)&kbl_i2c_info },
-       { PCI_VDEVICE(INTEL, 0xa2e2), (kernel_ulong_t)&kbl_i2c_info },
-       { PCI_VDEVICE(INTEL, 0xa2e3), (kernel_ulong_t)&kbl_i2c_info },
-       { PCI_VDEVICE(INTEL, 0xa2e6), (kernel_ulong_t)&kbl_uart_info },
+       { PCI_VDEVICE(INTEL, 0xa2a7), (kernel_ulong_t)&spt_uart_info },
+       { PCI_VDEVICE(INTEL, 0xa2a8), (kernel_ulong_t)&spt_uart_info },
+       { PCI_VDEVICE(INTEL, 0xa2a9), (kernel_ulong_t)&spt_info },
+       { PCI_VDEVICE(INTEL, 0xa2aa), (kernel_ulong_t)&spt_info },
+       { PCI_VDEVICE(INTEL, 0xa2e0), (kernel_ulong_t)&spt_i2c_info },
+       { PCI_VDEVICE(INTEL, 0xa2e1), (kernel_ulong_t)&spt_i2c_info },
+       { PCI_VDEVICE(INTEL, 0xa2e2), (kernel_ulong_t)&spt_i2c_info },
+       { PCI_VDEVICE(INTEL, 0xa2e3), (kernel_ulong_t)&spt_i2c_info },
+       { PCI_VDEVICE(INTEL, 0xa2e6), (kernel_ulong_t)&spt_uart_info },
        { }
 };
 MODULE_DEVICE_TABLE(pci, intel_lpss_pci_ids);
index 41b113875d6452acc545085ffbc4c52c23079338..70c646b0097d8c70ded6c4f62a92711aa89bfd22 100644 (file)
@@ -502,9 +502,6 @@ int intel_lpss_suspend(struct device *dev)
        for (i = 0; i < LPSS_PRIV_REG_COUNT; i++)
                lpss->priv_ctx[i] = readl(lpss->priv + i * 4);
 
-       /* Put the device into reset state */
-       writel(0, lpss->priv + LPSS_PRIV_RESETS);
-
        return 0;
 }
 EXPORT_SYMBOL_GPL(intel_lpss_suspend);
index 43e54b7e908f0cc5a117f950db323ebfa6b246cf..f9a8c5203873a2f8b6ac4a68e5582eddd69b3103 100644 (file)
@@ -86,6 +86,7 @@ enum bxtwc_irqs_level2 {
        BXTWC_THRM2_IRQ,
        BXTWC_BCU_IRQ,
        BXTWC_ADC_IRQ,
+       BXTWC_USBC_IRQ,
        BXTWC_CHGR0_IRQ,
        BXTWC_CHGR1_IRQ,
        BXTWC_GPIO0_IRQ,
@@ -111,7 +112,8 @@ static const struct regmap_irq bxtwc_regmap_irqs_level2[] = {
        REGMAP_IRQ_REG(BXTWC_THRM2_IRQ, 2, 0xff),
        REGMAP_IRQ_REG(BXTWC_BCU_IRQ, 3, 0x1f),
        REGMAP_IRQ_REG(BXTWC_ADC_IRQ, 4, 0xff),
-       REGMAP_IRQ_REG(BXTWC_CHGR0_IRQ, 5, 0x3f),
+       REGMAP_IRQ_REG(BXTWC_USBC_IRQ, 5, BIT(5)),
+       REGMAP_IRQ_REG(BXTWC_CHGR0_IRQ, 5, 0x1f),
        REGMAP_IRQ_REG(BXTWC_CHGR1_IRQ, 6, 0x1f),
        REGMAP_IRQ_REG(BXTWC_GPIO0_IRQ, 7, 0xff),
        REGMAP_IRQ_REG(BXTWC_GPIO1_IRQ, 8, 0x3f),
@@ -146,7 +148,7 @@ static struct resource adc_resources[] = {
 };
 
 static struct resource usbc_resources[] = {
-       DEFINE_RES_IRQ_NAMED(BXTWC_CHGR0_IRQ, "USBC"),
+       DEFINE_RES_IRQ(BXTWC_USBC_IRQ),
 };
 
 static struct resource charger_resources[] = {
index 3ac486a597f3c31e8e362f1f9954098cdf081086..c57e407020f11dd19aff3a9dd4b9ba7ce9c7b351 100644 (file)
@@ -399,6 +399,8 @@ int mfd_clone_cell(const char *cell, const char **clones, size_t n_clones)
                                        clones[i]);
        }
 
+       put_device(dev);
+
        return 0;
 }
 EXPORT_SYMBOL(mfd_clone_cell);
index cfdae8a3d77976b3a5b543551d07834069a7b45d..b0c7bcdaf5df522f9d1a208fe325cf053c063fab 100644 (file)
@@ -851,6 +851,8 @@ static int stmpe_reset(struct stmpe *stmpe)
        if (ret < 0)
                return ret;
 
+       msleep(10);
+
        timeout = jiffies + msecs_to_jiffies(100);
        while (time_before(jiffies, timeout)) {
                ret = __stmpe_reg_read(stmpe, stmpe->regs[STMPE_IDX_SYS_CTRL]);
index 2f2225e845efe960a9904fefc1d16fc5cde6b081..b93fe4c4957a06c947650ae67987cec11aca7d48 100644 (file)
@@ -73,8 +73,10 @@ static struct syscon *of_syscon_register(struct device_node *np)
        /* Parse the device's DT node for an endianness specification */
        if (of_property_read_bool(np, "big-endian"))
                syscon_config.val_format_endian = REGMAP_ENDIAN_BIG;
-        else if (of_property_read_bool(np, "little-endian"))
+       else if (of_property_read_bool(np, "little-endian"))
                syscon_config.val_format_endian = REGMAP_ENDIAN_LITTLE;
+       else if (of_property_read_bool(np, "native-endian"))
+               syscon_config.val_format_endian = REGMAP_ENDIAN_NATIVE;
 
        /*
         * search for reg-io-width property in DT. If it is not provided,
index 7eec619a6023c717803d05e318537e0589f6967e..8588dbad330119149ad112c0bf493e1c979d59a3 100644 (file)
@@ -393,8 +393,13 @@ static int wm8994_device_init(struct wm8994 *wm8994, int irq)
                BUG();
                goto err;
        }
-               
-       ret = devm_regulator_bulk_get(wm8994->dev, wm8994->num_supplies,
+
+       /*
+        * Can't use devres helper here as some of the supplies are provided by
+        * wm8994->dev's children (regulators) and those regulators are
+        * unregistered by the devres core before the supplies are freed.
+        */
+       ret = regulator_bulk_get(wm8994->dev, wm8994->num_supplies,
                                 wm8994->supplies);
        if (ret != 0) {
                dev_err(wm8994->dev, "Failed to get supplies: %d\n", ret);
@@ -405,7 +410,7 @@ static int wm8994_device_init(struct wm8994 *wm8994, int irq)
                                    wm8994->supplies);
        if (ret != 0) {
                dev_err(wm8994->dev, "Failed to enable supplies: %d\n", ret);
-               goto err;
+               goto err_regulator_free;
        }
 
        ret = wm8994_reg_read(wm8994, WM8994_SOFTWARE_RESET);
@@ -596,6 +601,8 @@ err_irq:
 err_enable:
        regulator_bulk_disable(wm8994->num_supplies,
                               wm8994->supplies);
+err_regulator_free:
+       regulator_bulk_free(wm8994->num_supplies, wm8994->supplies);
 err:
        mfd_remove_devices(wm8994->dev);
        return ret;
@@ -604,10 +611,11 @@ err:
 static void wm8994_device_exit(struct wm8994 *wm8994)
 {
        pm_runtime_disable(wm8994->dev);
-       mfd_remove_devices(wm8994->dev);
        wm8994_irq_exit(wm8994);
        regulator_bulk_disable(wm8994->num_supplies,
                               wm8994->supplies);
+       regulator_bulk_free(wm8994->num_supplies, wm8994->supplies);
+       mfd_remove_devices(wm8994->dev);
 }
 
 static const struct of_device_id wm8994_of_match[] = {
index f3d34b941f8599e91e121b7dfe8cbc115e9f138e..2e5233b6097110e72ae147f21ed15cf259b0a5a9 100644 (file)
@@ -229,6 +229,14 @@ int cxl_start_context(struct cxl_context *ctx, u64 wed,
        if (ctx->status == STARTED)
                goto out; /* already started */
 
+       /*
+        * Increment the mapped context count for adapter. This also checks
+        * if adapter_context_lock is taken.
+        */
+       rc = cxl_adapter_context_get(ctx->afu->adapter);
+       if (rc)
+               goto out;
+
        if (task) {
                ctx->pid = get_task_pid(task, PIDTYPE_PID);
                ctx->glpid = get_task_pid(task->group_leader, PIDTYPE_PID);
@@ -239,7 +247,10 @@ int cxl_start_context(struct cxl_context *ctx, u64 wed,
        cxl_ctx_get();
 
        if ((rc = cxl_ops->attach_process(ctx, kernel, wed, 0))) {
+               put_pid(ctx->glpid);
                put_pid(ctx->pid);
+               ctx->glpid = ctx->pid = NULL;
+               cxl_adapter_context_put(ctx->afu->adapter);
                cxl_ctx_put();
                goto out;
        }
index c466ee2b0c973a7c77cb16566770dec3b426db33..5e506c19108ad22da4a002957fd056711138b0f3 100644 (file)
@@ -238,6 +238,9 @@ int __detach_context(struct cxl_context *ctx)
        put_pid(ctx->glpid);
 
        cxl_ctx_put();
+
+       /* Decrease the attached context count on the adapter */
+       cxl_adapter_context_put(ctx->afu->adapter);
        return 0;
 }
 
index 01d372aba131416524ec565748ea4f9e52a795ee..a144073593fa1e5170bba669d7ba467eb06ada5b 100644 (file)
@@ -618,6 +618,14 @@ struct cxl {
        bool perst_select_user;
        bool perst_same_image;
        bool psl_timebase_synced;
+
+       /*
+        * number of contexts mapped on to this card. Possible values are:
+        * >0: Number of contexts mapped and new one can be mapped.
+        *  0: No active contexts and new ones can be mapped.
+        * -1: No contexts mapped and new ones cannot be mapped.
+        */
+       atomic_t contexts_num;
 };
 
 int cxl_pci_alloc_one_irq(struct cxl *adapter);
@@ -944,4 +952,20 @@ bool cxl_pci_is_vphb_device(struct pci_dev *dev);
 
 /* decode AFU error bits in the PSL register PSL_SERR_An */
 void cxl_afu_decode_psl_serr(struct cxl_afu *afu, u64 serr);
+
+/*
+ * Increments the number of attached contexts on an adapter.
+ * In case an adapter_context_lock is taken the return -EBUSY.
+ */
+int cxl_adapter_context_get(struct cxl *adapter);
+
+/* Decrements the number of attached contexts on an adapter */
+void cxl_adapter_context_put(struct cxl *adapter);
+
+/* If no active contexts then prevents contexts from being attached */
+int cxl_adapter_context_lock(struct cxl *adapter);
+
+/* Unlock the contexts-lock if taken. Warn and force unlock otherwise */
+void cxl_adapter_context_unlock(struct cxl *adapter);
+
 #endif
index 5fb9894b157faaae236fd75d4b41cceba069b3d1..77080cc5fa0aa4cdbc476729e4cdabcac8afae7b 100644 (file)
@@ -193,6 +193,16 @@ static long afu_ioctl_start_work(struct cxl_context *ctx,
 
        ctx->mmio_err_ff = !!(work.flags & CXL_START_WORK_ERR_FF);
 
+       /*
+        * Increment the mapped context count for adapter. This also checks
+        * if adapter_context_lock is taken.
+        */
+       rc = cxl_adapter_context_get(ctx->afu->adapter);
+       if (rc) {
+               afu_release_irqs(ctx, ctx);
+               goto out;
+       }
+
        /*
         * We grab the PID here and not in the file open to allow for the case
         * where a process (master, some daemon, etc) has opened the chardev on
@@ -205,11 +215,16 @@ static long afu_ioctl_start_work(struct cxl_context *ctx,
        ctx->pid = get_task_pid(current, PIDTYPE_PID);
        ctx->glpid = get_task_pid(current->group_leader, PIDTYPE_PID);
 
+
        trace_cxl_attach(ctx, work.work_element_descriptor, work.num_interrupts, amr);
 
        if ((rc = cxl_ops->attach_process(ctx, false, work.work_element_descriptor,
                                                        amr))) {
                afu_release_irqs(ctx, ctx);
+               cxl_adapter_context_put(ctx->afu->adapter);
+               put_pid(ctx->glpid);
+               put_pid(ctx->pid);
+               ctx->glpid = ctx->pid = NULL;
                goto out;
        }
 
index 9aa58a77a24d13f102546a9c45aa48d780326364..3e102cd6ed914d992152128423cce8cbab33e830 100644 (file)
@@ -1152,6 +1152,9 @@ struct cxl *cxl_guest_init_adapter(struct device_node *np, struct platform_devic
        if ((rc = cxl_sysfs_adapter_add(adapter)))
                goto err_put1;
 
+       /* release the context lock as the adapter is configured */
+       cxl_adapter_context_unlock(adapter);
+
        return adapter;
 
 err_put1:
index d9be23b24aa3b88dce63a8c3f1bf38f9c66ca5ef..62e0dfb5f15b62d64980bb008d448038f8fd3c36 100644 (file)
@@ -243,8 +243,10 @@ struct cxl *cxl_alloc_adapter(void)
        if (dev_set_name(&adapter->dev, "card%i", adapter->adapter_num))
                goto err2;
 
-       return adapter;
+       /* start with context lock taken */
+       atomic_set(&adapter->contexts_num, -1);
 
+       return adapter;
 err2:
        cxl_remove_adapter_nr(adapter);
 err1:
@@ -286,6 +288,44 @@ int cxl_afu_select_best_mode(struct cxl_afu *afu)
        return 0;
 }
 
+int cxl_adapter_context_get(struct cxl *adapter)
+{
+       int rc;
+
+       rc = atomic_inc_unless_negative(&adapter->contexts_num);
+       return rc >= 0 ? 0 : -EBUSY;
+}
+
+void cxl_adapter_context_put(struct cxl *adapter)
+{
+       atomic_dec_if_positive(&adapter->contexts_num);
+}
+
+int cxl_adapter_context_lock(struct cxl *adapter)
+{
+       int rc;
+       /* no active contexts -> contexts_num == 0 */
+       rc = atomic_cmpxchg(&adapter->contexts_num, 0, -1);
+       return rc ? -EBUSY : 0;
+}
+
+void cxl_adapter_context_unlock(struct cxl *adapter)
+{
+       int val = atomic_cmpxchg(&adapter->contexts_num, -1, 0);
+
+       /*
+        * contexts lock taken -> contexts_num == -1
+        * If not true then show a warning and force reset the lock.
+        * This will happen when context_unlock was requested without
+        * doing a context_lock.
+        */
+       if (val != -1) {
+               atomic_set(&adapter->contexts_num, 0);
+               WARN(1, "Adapter context unlocked with %d active contexts",
+                    val);
+       }
+}
+
 static int __init init_cxl(void)
 {
        int rc = 0;
index 7afad8477ad55358f3608abefcf85795407a99a1..e96be9ca4e60437db6bfba9b098fad852790f722 100644 (file)
@@ -1487,6 +1487,8 @@ static int cxl_configure_adapter(struct cxl *adapter, struct pci_dev *dev)
        if ((rc = cxl_native_register_psl_err_irq(adapter)))
                goto err;
 
+       /* Release the context lock as adapter is configured */
+       cxl_adapter_context_unlock(adapter);
        return 0;
 
 err:
index b043c20f158f122d11bbf5dc765ae957ebf9155d..a8b6d6a635e962b057325d9d5d6af96382474eba 100644 (file)
@@ -75,12 +75,31 @@ static ssize_t reset_adapter_store(struct device *device,
        int val;
 
        rc = sscanf(buf, "%i", &val);
-       if ((rc != 1) || (val != 1))
+       if ((rc != 1) || (val != 1 && val != -1))
                return -EINVAL;
 
-       if ((rc = cxl_ops->adapter_reset(adapter)))
-               return rc;
-       return count;
+       /*
+        * See if we can lock the context mapping that's only allowed
+        * when there are no contexts attached to the adapter. Once
+        * taken this will also prevent any context from getting activated.
+        */
+       if (val == 1) {
+               rc =  cxl_adapter_context_lock(adapter);
+               if (rc)
+                       goto out;
+
+               rc = cxl_ops->adapter_reset(adapter);
+               /* In case reset failed release context lock */
+               if (rc)
+                       cxl_adapter_context_unlock(adapter);
+
+       } else if (val == -1) {
+               /* Perform a forced adapter reset */
+               rc = cxl_ops->adapter_reset(adapter);
+       }
+
+out:
+       return rc ? rc : count;
 }
 
 static ssize_t load_image_on_perst_show(struct device *device,
index 8a679ecc8fd108d25ac4ecae8f1f24a33890107e..fc2794b513faf12ca043e760524046ad2d72a9ed 100644 (file)
@@ -352,17 +352,27 @@ int genwqe_alloc_sync_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl,
                if (copy_from_user(sgl->lpage, user_addr + user_size -
                                   sgl->lpage_size, sgl->lpage_size)) {
                        rc = -EFAULT;
-                       goto err_out1;
+                       goto err_out2;
                }
        }
        return 0;
 
+ err_out2:
+       __genwqe_free_consistent(cd, PAGE_SIZE, sgl->lpage,
+                                sgl->lpage_dma_addr);
+       sgl->lpage = NULL;
+       sgl->lpage_dma_addr = 0;
  err_out1:
        __genwqe_free_consistent(cd, PAGE_SIZE, sgl->fpage,
                                 sgl->fpage_dma_addr);
+       sgl->fpage = NULL;
+       sgl->fpage_dma_addr = 0;
  err_out:
        __genwqe_free_consistent(cd, sgl->sgl_size, sgl->sgl,
                                 sgl->sgl_dma_addr);
+       sgl->sgl = NULL;
+       sgl->sgl_dma_addr = 0;
+       sgl->sgl_size = 0;
        return -ENOMEM;
 }
 
index e9e6ea3ab73cf3500657d8c6001cb4eff5712568..75b9d4ac8b1e37fa3eb317aa728a4024c530539e 100644 (file)
@@ -178,7 +178,7 @@ static int mei_nfc_if_version(struct mei_cl *cl,
 
        ret = 0;
        bytes_recv = __mei_cl_recv(cl, (u8 *)reply, if_version_length);
-       if (bytes_recv < 0 || bytes_recv < sizeof(struct mei_nfc_reply)) {
+       if (bytes_recv < if_version_length) {
                dev_err(bus->dev, "Could not read IF version\n");
                ret = -EIO;
                goto err;
index e6e5e55a12ed45f09577899198e7a8735ae7a0e8..60415a2bfcbd4c3f67a832bf3bc388b0af9a1270 100644 (file)
@@ -981,11 +981,13 @@ static bool mei_txe_check_and_ack_intrs(struct mei_device *dev, bool do_ack)
        hisr = mei_txe_br_reg_read(hw, HISR_REG);
 
        aliveness = mei_txe_aliveness_get(dev);
-       if (hhisr & IPC_HHIER_SEC && aliveness)
+       if (hhisr & IPC_HHIER_SEC && aliveness) {
                ipc_isr = mei_txe_sec_reg_read_silent(hw,
                                SEC_IPC_HOST_INT_STATUS_REG);
-       else
+       } else {
                ipc_isr = 0;
+               hhisr &= ~IPC_HHIER_SEC;
+       }
 
        generated = generated ||
                (hisr & HISR_INT_STS_MSK) ||
index e0203b1a20fd17fe88d9db3dc882d4d64eed2cc5..f806a4471eb913f388042886de29ec8de996cbf5 100644 (file)
@@ -1396,8 +1396,7 @@ retry:
                pinned_pages->nr_pages = get_user_pages(
                                (u64)addr,
                                nr_pages,
-                               !!(prot & SCIF_PROT_WRITE),
-                               0,
+                               (prot & SCIF_PROT_WRITE) ? FOLL_WRITE : 0,
                                pinned_pages->pages,
                                NULL);
                up_write(&mm->mmap_sem);
index a2d97b9b17e3463cebea03c70ec062cafcdac08c..6fb773dbcd0c3233d62136dcf673afb7b80efcea 100644 (file)
@@ -198,7 +198,7 @@ static int non_atomic_pte_lookup(struct vm_area_struct *vma,
 #else
        *pageshift = PAGE_SHIFT;
 #endif
-       if (get_user_pages(vaddr, 1, write, 0, &page, NULL) <= 0)
+       if (get_user_pages(vaddr, 1, write ? FOLL_WRITE : 0, &page, NULL) <= 0)
                return -EFAULT;
        *paddr = page_to_phys(page);
        put_page(page);
index 1525870f460aa65d0aa1b24baf119cb490ca35ed..33741ad4a74a0ee19af50cac294d52f5aa5726ab 100644 (file)
@@ -283,7 +283,7 @@ static void gru_unload_mm_tracker(struct gru_state *gru,
        spin_lock(&gru->gs_asid_lock);
        BUG_ON((asids->mt_ctxbitmap & ctxbitmap) != ctxbitmap);
        asids->mt_ctxbitmap ^= ctxbitmap;
-       gru_dbg(grudev, "gid %d, gts %p, gms %p, ctxnum 0x%d, asidmap 0x%lx\n",
+       gru_dbg(grudev, "gid %d, gts %p, gms %p, ctxnum %d, asidmap 0x%lx\n",
                gru->gs_gid, gts, gms, gts->ts_ctxnum, gms->ms_asidmap[0]);
        spin_unlock(&gru->gs_asid_lock);
        spin_unlock(&gms->ms_asid_lock);
index a8cee33ae8d2eaa6187945026e307168c96d0b45..b3fa738ae0050b48ba3f07ef3a02460d72898ea4 100644 (file)
@@ -431,6 +431,12 @@ int vmci_doorbell_create(struct vmci_handle *handle,
        if (vmci_handle_is_invalid(*handle)) {
                u32 context_id = vmci_get_context_id();
 
+               if (context_id == VMCI_INVALID_ID) {
+                       pr_warn("Failed to get context ID\n");
+                       result = VMCI_ERROR_NO_RESOURCES;
+                       goto free_mem;
+               }
+
                /* Let resource code allocate a free ID for us */
                new_handle = vmci_make_handle(context_id, VMCI_INVALID_ID);
        } else {
@@ -525,7 +531,7 @@ int vmci_doorbell_destroy(struct vmci_handle handle)
 
        entry = container_of(resource, struct dbell_entry, resource);
 
-       if (vmci_guest_code_active()) {
+       if (!hlist_unhashed(&entry->node)) {
                int result;
 
                dbell_index_table_remove(entry);
index 896be150e28fa5e0802f85e47cf882fe2ea4104d..d7eaf1eb11e7f3e67646dd7da053213a78fd35bd 100644 (file)
@@ -113,5 +113,5 @@ module_exit(vmci_drv_exit);
 
 MODULE_AUTHOR("VMware, Inc.");
 MODULE_DESCRIPTION("VMware Virtual Machine Communication Interface.");
-MODULE_VERSION("1.1.4.0-k");
+MODULE_VERSION("1.1.5.0-k");
 MODULE_LICENSE("GPL v2");
index c3335112e68c29cfe4a16eae0aabf4368682d357..709a872ed484a9da1ce620238c3222190c612f86 100644 (file)
@@ -46,6 +46,7 @@
 #include <asm/uaccess.h>
 
 #include "queue.h"
+#include "block.h"
 
 MODULE_ALIAS("mmc:block");
 #ifdef MODULE_PARAM_PREFIX
@@ -1786,7 +1787,7 @@ static void mmc_blk_packed_hdr_wrq_prep(struct mmc_queue_req *mqrq,
        struct mmc_blk_data *md = mq->data;
        struct mmc_packed *packed = mqrq->packed;
        bool do_rel_wr, do_data_tag;
-       u32 *packed_cmd_hdr;
+       __le32 *packed_cmd_hdr;
        u8 hdr_blocks;
        u8 i = 1;
 
index 5a8dc5a76e0dffae3fef30d3eb93591994d733c8..3678220964fe62948a9a4d1aa2fed06b9c1a3a66 100644 (file)
@@ -2347,7 +2347,7 @@ static int mmc_test_ongoing_transfer(struct mmc_test_card *test,
        struct mmc_test_req *rq = mmc_test_req_alloc();
        struct mmc_host *host = test->card->host;
        struct mmc_test_area *t = &test->area;
-       struct mmc_async_req areq;
+       struct mmc_test_async_req test_areq = { .test = test };
        struct mmc_request *mrq;
        unsigned long timeout;
        bool expired = false;
@@ -2363,8 +2363,8 @@ static int mmc_test_ongoing_transfer(struct mmc_test_card *test,
                mrq->sbc = &rq->sbc;
        mrq->cap_cmd_during_tfr = true;
 
-       areq.mrq = mrq;
-       areq.err_check = mmc_test_check_result_async;
+       test_areq.areq.mrq = mrq;
+       test_areq.areq.err_check = mmc_test_check_result_async;
 
        mmc_test_prepare_mrq(test, mrq, t->sg, t->sg_len, dev_addr, t->blocks,
                             512, write);
@@ -2378,7 +2378,7 @@ static int mmc_test_ongoing_transfer(struct mmc_test_card *test,
 
        /* Start ongoing data request */
        if (use_areq) {
-               mmc_start_req(host, &areq, &ret);
+               mmc_start_req(host, &test_areq.areq, &ret);
                if (ret)
                        goto out_free;
        } else {
index 3c15a75bae862d0466ad0ca43bef2b5e49ba13ed..342f1e3f301e9e6e7d0cc918f3f38aae32a88480 100644 (file)
@@ -31,7 +31,7 @@ enum mmc_packed_type {
 
 struct mmc_packed {
        struct list_head        list;
-       u32                     cmd_hdr[1024];
+       __le32                  cmd_hdr[1024];
        unsigned int            blocks;
        u8                      nr_entries;
        u8                      retries;
index 3486bc7fbb64a67a4cf1156c2ac7652541ef60bb..df19777068a6237f388bb4bb9c1a8ee6917ea64b 100644 (file)
@@ -26,6 +26,8 @@
 #include "mmc_ops.h"
 #include "sd_ops.h"
 
+#define DEFAULT_CMD6_TIMEOUT_MS        500
+
 static const unsigned int tran_exp[] = {
        10000,          100000,         1000000,        10000000,
        0,              0,              0,              0
@@ -571,6 +573,7 @@ static int mmc_decode_ext_csd(struct mmc_card *card, u8 *ext_csd)
                card->erased_byte = 0x0;
 
        /* eMMC v4.5 or later */
+       card->ext_csd.generic_cmd6_time = DEFAULT_CMD6_TIMEOUT_MS;
        if (card->ext_csd.rev >= 6) {
                card->ext_csd.feature_support |= MMC_DISCARD_FEATURE;
 
@@ -1263,6 +1266,16 @@ static int mmc_select_hs400es(struct mmc_card *card)
                goto out_err;
        }
 
+       if (card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS400_1_2V)
+               err = __mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_120);
+
+       if (err && card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS400_1_8V)
+               err = __mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_180);
+
+       /* If fails try again during next card power cycle */
+       if (err)
+               goto out_err;
+
        err = mmc_select_bus_width(card);
        if (err < 0)
                goto out_err;
@@ -1272,6 +1285,8 @@ static int mmc_select_hs400es(struct mmc_card *card)
        if (err)
                goto out_err;
 
+       mmc_set_clock(host, card->ext_csd.hs_max_dtr);
+
        err = mmc_switch_status(card);
        if (err)
                goto out_err;
index c0bb0c793e84b2744586db19a5f559f31d207cd2..dbbc4303bdd0fb2ce0fa6c5d478b0f5a9f1ed70b 100644 (file)
@@ -46,12 +46,13 @@ int dw_mci_pltfm_register(struct platform_device *pdev,
        host->pdata = pdev->dev.platform_data;
 
        regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       /* Get registers' physical base address */
-       host->phy_regs = regs->start;
        host->regs = devm_ioremap_resource(&pdev->dev, regs);
        if (IS_ERR(host->regs))
                return PTR_ERR(host->regs);
 
+       /* Get registers' physical base address */
+       host->phy_regs = regs->start;
+
        platform_set_drvdata(pdev, host);
        return dw_mci_probe(host);
 }
index 4fcbc4012ed03b554185cf42eeb1567b915745f6..df478ae72e23235ca8939128b2ad37497aaf4b71 100644 (file)
@@ -1058,6 +1058,7 @@ static int dw_mci_submit_data_dma(struct dw_mci *host, struct mmc_data *data)
        spin_unlock_irqrestore(&host->irq_lock, irqflags);
 
        if (host->dma_ops->start(host, sg_len)) {
+               host->dma_ops->stop(host);
                /* We can't do DMA, try PIO for this one */
                dev_dbg(host->dev,
                        "%s: fall back to PIO mode for current transfer\n",
@@ -2940,7 +2941,7 @@ static struct dw_mci_board *dw_mci_parse_dt(struct dw_mci *host)
                return ERR_PTR(-ENOMEM);
 
        /* find reset controller when exist */
-       pdata->rstc = devm_reset_control_get_optional(dev, NULL);
+       pdata->rstc = devm_reset_control_get_optional(dev, "reset");
        if (IS_ERR(pdata->rstc)) {
                if (PTR_ERR(pdata->rstc) == -EPROBE_DEFER)
                        return ERR_PTR(-EPROBE_DEFER);
index d839147e591d24f5d5d0a97d389ea04ffbaa9883..44ecebd1ea8c1834a5d311fbe36ddfc8383e3ecd 100644 (file)
@@ -661,13 +661,13 @@ static int mxs_mmc_probe(struct platform_device *pdev)
 
        platform_set_drvdata(pdev, mmc);
 
+       spin_lock_init(&host->lock);
+
        ret = devm_request_irq(&pdev->dev, irq_err, mxs_mmc_irq_handler, 0,
                               dev_name(&pdev->dev), host);
        if (ret)
                goto out_free_dma;
 
-       spin_lock_init(&host->lock);
-
        ret = mmc_add_host(mmc);
        if (ret)
                goto out_free_dma;
index 4106295527b9d0c5a4128f44e09c190791328ca9..6e9c0f8fddb1064c64c195812fdf8ad220728360 100644 (file)
@@ -1138,11 +1138,6 @@ static void sdmmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
        dev_dbg(sdmmc_dev(host), "%s\n", __func__);
        mutex_lock(&ucr->dev_mutex);
 
-       if (rtsx_usb_card_exclusive_check(ucr, RTSX_USB_SD_CARD)) {
-               mutex_unlock(&ucr->dev_mutex);
-               return;
-       }
-
        sd_set_power_mode(host, ios->power_mode);
        sd_set_bus_width(host, ios->bus_width);
        sd_set_timing(host, ios->timing, &host->ddr_mode);
@@ -1314,6 +1309,7 @@ static void rtsx_usb_update_led(struct work_struct *work)
                container_of(work, struct rtsx_usb_sdmmc, led_work);
        struct rtsx_ucr *ucr = host->ucr;
 
+       pm_runtime_get_sync(sdmmc_dev(host));
        mutex_lock(&ucr->dev_mutex);
 
        if (host->led.brightness == LED_OFF)
@@ -1322,6 +1318,7 @@ static void rtsx_usb_update_led(struct work_struct *work)
                rtsx_usb_turn_on_led(ucr);
 
        mutex_unlock(&ucr->dev_mutex);
+       pm_runtime_put(sdmmc_dev(host));
 }
 #endif
 
index 1f54fd8755c8e026fd8fb99f7fdb1cbc154acc87..7123ef96ed18523c88553146035103f3517bd372 100644 (file)
@@ -346,7 +346,8 @@ static void esdhc_writel_le(struct sdhci_host *host, u32 val, int reg)
        struct pltfm_imx_data *imx_data = sdhci_pltfm_priv(pltfm_host);
        u32 data;
 
-       if (unlikely(reg == SDHCI_INT_ENABLE || reg == SDHCI_SIGNAL_ENABLE)) {
+       if (unlikely(reg == SDHCI_INT_ENABLE || reg == SDHCI_SIGNAL_ENABLE ||
+                       reg == SDHCI_INT_STATUS)) {
                if ((val & SDHCI_INT_CARD_INT) && !esdhc_is_usdhc(imx_data)) {
                        /*
                         * Clear and then set D3CD bit to avoid missing the
@@ -555,6 +556,25 @@ static void esdhc_writew_le(struct sdhci_host *host, u16 val, int reg)
        esdhc_clrset_le(host, 0xffff, val, reg);
 }
 
+static u8 esdhc_readb_le(struct sdhci_host *host, int reg)
+{
+       u8 ret;
+       u32 val;
+
+       switch (reg) {
+       case SDHCI_HOST_CONTROL:
+               val = readl(host->ioaddr + reg);
+
+               ret = val & SDHCI_CTRL_LED;
+               ret |= (val >> 5) & SDHCI_CTRL_DMA_MASK;
+               ret |= (val & ESDHC_CTRL_4BITBUS);
+               ret |= (val & ESDHC_CTRL_8BITBUS) << 3;
+               return ret;
+       }
+
+       return readb(host->ioaddr + reg);
+}
+
 static void esdhc_writeb_le(struct sdhci_host *host, u8 val, int reg)
 {
        struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
@@ -947,6 +967,7 @@ static void esdhc_set_timeout(struct sdhci_host *host, struct mmc_command *cmd)
 static struct sdhci_ops sdhci_esdhc_ops = {
        .read_l = esdhc_readl_le,
        .read_w = esdhc_readw_le,
+       .read_b = esdhc_readb_le,
        .write_l = esdhc_writel_le,
        .write_w = esdhc_writew_le,
        .write_b = esdhc_writeb_le,
index 8ef44a2a2fd94b6572e0cc5feda1efd0b698f7a6..90ed2e12d345d4ee91f6088aecac306897e2c266 100644 (file)
@@ -647,6 +647,7 @@ static int sdhci_msm_probe(struct platform_device *pdev)
        if (msm_host->pwr_irq < 0) {
                dev_err(&pdev->dev, "Get pwr_irq failed (%d)\n",
                        msm_host->pwr_irq);
+               ret = msm_host->pwr_irq;
                goto clk_disable;
        }
 
index da8e40af6f85e82143bb222f38d5663a2dbf1766..410a55b1c25fe5f2ef32ff8f2d26c4c3286f4b71 100644 (file)
@@ -250,7 +250,7 @@ static void sdhci_arasan_hs400_enhanced_strobe(struct mmc_host *mmc,
        writel(vendor, host->ioaddr + SDHCI_ARASAN_VENDOR_REGISTER);
 }
 
-void sdhci_arasan_reset(struct sdhci_host *host, u8 mask)
+static void sdhci_arasan_reset(struct sdhci_host *host, u8 mask)
 {
        u8 ctrl;
        struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
@@ -265,6 +265,28 @@ void sdhci_arasan_reset(struct sdhci_host *host, u8 mask)
        }
 }
 
+static int sdhci_arasan_voltage_switch(struct mmc_host *mmc,
+                                      struct mmc_ios *ios)
+{
+       switch (ios->signal_voltage) {
+       case MMC_SIGNAL_VOLTAGE_180:
+               /*
+                * Plese don't switch to 1V8 as arasan,5.1 doesn't
+                * actually refer to this setting to indicate the
+                * signal voltage and the state machine will be broken
+                * actually if we force to enable 1V8. That's something
+                * like broken quirk but we could work around here.
+                */
+               return 0;
+       case MMC_SIGNAL_VOLTAGE_330:
+       case MMC_SIGNAL_VOLTAGE_120:
+               /* We don't support 3V3 and 1V2 */
+               break;
+       }
+
+       return -EINVAL;
+}
+
 static struct sdhci_ops sdhci_arasan_ops = {
        .set_clock = sdhci_arasan_set_clock,
        .get_max_clock = sdhci_pltfm_clk_get_max_clock,
@@ -661,6 +683,8 @@ static int sdhci_arasan_probe(struct platform_device *pdev)
 
                host->mmc_host_ops.hs400_enhanced_strobe =
                                        sdhci_arasan_hs400_enhanced_strobe;
+               host->mmc_host_ops.start_signal_voltage_switch =
+                                       sdhci_arasan_voltage_switch;
        }
 
        ret = sdhci_add_host(host);
index fb71c866eacc7028918e1abb667b230e784a47f4..1bb11e4a9fe53f7e01eb81bb87482f6bd96a56f1 100644 (file)
@@ -66,6 +66,20 @@ static u32 esdhc_readl_fixup(struct sdhci_host *host,
                        return ret;
                }
        }
+       /*
+        * The DAT[3:0] line signal levels and the CMD line signal level are
+        * not compatible with standard SDHC register. The line signal levels
+        * DAT[7:0] are at bits 31:24 and the command line signal level is at
+        * bit 23. All other bits are the same as in the standard SDHC
+        * register.
+        */
+       if (spec_reg == SDHCI_PRESENT_STATE) {
+               ret = value & 0x000fffff;
+               ret |= (value >> 4) & SDHCI_DATA_LVL_MASK;
+               ret |= (value << 1) & SDHCI_CMD_LVL;
+               return ret;
+       }
+
        ret = value;
        return ret;
 }
index 72a1f1f5180a9cc12f5bdc7fffaa03685500f6e0..1d9e00a00e9fc986eb0bb01ff55a0966f03f7d5c 100644 (file)
 #include "sdhci-pci.h"
 #include "sdhci-pci-o2micro.h"
 
+static int sdhci_pci_enable_dma(struct sdhci_host *host);
+static void sdhci_pci_set_bus_width(struct sdhci_host *host, int width);
+static void sdhci_pci_hw_reset(struct sdhci_host *host);
+static int sdhci_pci_select_drive_strength(struct sdhci_host *host,
+                                          struct mmc_card *card,
+                                          unsigned int max_dtr, int host_drv,
+                                          int card_drv, int *drv_type);
+
 /*****************************************************************************\
  *                                                                           *
  * Hardware specific quirk handling                                          *
@@ -390,6 +398,45 @@ static int byt_sd_probe_slot(struct sdhci_pci_slot *slot)
        return 0;
 }
 
+#define SDHCI_INTEL_PWR_TIMEOUT_CNT    20
+#define SDHCI_INTEL_PWR_TIMEOUT_UDELAY 100
+
+static void sdhci_intel_set_power(struct sdhci_host *host, unsigned char mode,
+                                 unsigned short vdd)
+{
+       int cntr;
+       u8 reg;
+
+       sdhci_set_power(host, mode, vdd);
+
+       if (mode == MMC_POWER_OFF)
+               return;
+
+       /*
+        * Bus power might not enable after D3 -> D0 transition due to the
+        * present state not yet having propagated. Retry for up to 2ms.
+        */
+       for (cntr = 0; cntr < SDHCI_INTEL_PWR_TIMEOUT_CNT; cntr++) {
+               reg = sdhci_readb(host, SDHCI_POWER_CONTROL);
+               if (reg & SDHCI_POWER_ON)
+                       break;
+               udelay(SDHCI_INTEL_PWR_TIMEOUT_UDELAY);
+               reg |= SDHCI_POWER_ON;
+               sdhci_writeb(host, reg, SDHCI_POWER_CONTROL);
+       }
+}
+
+static const struct sdhci_ops sdhci_intel_byt_ops = {
+       .set_clock              = sdhci_set_clock,
+       .set_power              = sdhci_intel_set_power,
+       .enable_dma             = sdhci_pci_enable_dma,
+       .set_bus_width          = sdhci_pci_set_bus_width,
+       .reset                  = sdhci_reset,
+       .set_uhs_signaling      = sdhci_set_uhs_signaling,
+       .hw_reset               = sdhci_pci_hw_reset,
+       .select_drive_strength  = sdhci_pci_select_drive_strength,
+};
+
 static const struct sdhci_pci_fixes sdhci_intel_byt_emmc = {
        .allow_runtime_pm = true,
        .probe_slot     = byt_emmc_probe_slot,
@@ -397,6 +444,7 @@ static const struct sdhci_pci_fixes sdhci_intel_byt_emmc = {
        .quirks2        = SDHCI_QUIRK2_PRESET_VALUE_BROKEN |
                          SDHCI_QUIRK2_CAPS_BIT63_FOR_HS400 |
                          SDHCI_QUIRK2_STOP_WITH_TC,
+       .ops            = &sdhci_intel_byt_ops,
 };
 
 static const struct sdhci_pci_fixes sdhci_intel_byt_sdio = {
@@ -405,6 +453,7 @@ static const struct sdhci_pci_fixes sdhci_intel_byt_sdio = {
                        SDHCI_QUIRK2_PRESET_VALUE_BROKEN,
        .allow_runtime_pm = true,
        .probe_slot     = byt_sdio_probe_slot,
+       .ops            = &sdhci_intel_byt_ops,
 };
 
 static const struct sdhci_pci_fixes sdhci_intel_byt_sd = {
@@ -415,6 +464,7 @@ static const struct sdhci_pci_fixes sdhci_intel_byt_sd = {
        .allow_runtime_pm = true,
        .own_cd_for_runtime_pm = true,
        .probe_slot     = byt_sd_probe_slot,
+       .ops            = &sdhci_intel_byt_ops,
 };
 
 /* Define Host controllers for Intel Merrifield platform */
@@ -1648,7 +1698,9 @@ static struct sdhci_pci_slot *sdhci_pci_probe_slot(
        }
 
        host->hw_name = "PCI";
-       host->ops = &sdhci_pci_ops;
+       host->ops = chip->fixes && chip->fixes->ops ?
+                   chip->fixes->ops :
+                   &sdhci_pci_ops;
        host->quirks = chip->quirks;
        host->quirks2 = chip->quirks2;
 
index 9c7c08b9322387f7914024ed404055ed7ba158cb..6bccf56bc5fff654d203ead074a6a53ae0409623 100644 (file)
@@ -65,6 +65,8 @@ struct sdhci_pci_fixes {
 
        int                     (*suspend) (struct sdhci_pci_chip *);
        int                     (*resume) (struct sdhci_pci_chip *);
+
+       const struct sdhci_ops  *ops;
 };
 
 struct sdhci_pci_slot {
index dd1938d341f7ae2af1de80bfa4f39678b865441d..d0f5c05fbc195fcf568d482efa0c5a41a0754a44 100644 (file)
@@ -315,7 +315,7 @@ static void pxav3_set_power(struct sdhci_host *host, unsigned char mode,
        struct mmc_host *mmc = host->mmc;
        u8 pwr = host->pwr;
 
-       sdhci_set_power(host, mode, vdd);
+       sdhci_set_power_noreg(host, mode, vdd);
 
        if (host->pwr == pwr)
                return;
index 48055666c6557a98286dd6d07118c82a218f5c76..42ef3ebb1d8cf9d57f30e48d21c3a5250aea16ea 100644 (file)
@@ -687,7 +687,7 @@ static u8 sdhci_calc_timeout(struct sdhci_host *host, struct mmc_command *cmd)
                         * host->clock is in Hz.  target_timeout is in us.
                         * Hence, us = 1000000 * cycles / Hz.  Round up.
                         */
-                       val = 1000000 * data->timeout_clks;
+                       val = 1000000ULL * data->timeout_clks;
                        if (do_div(val, host->clock))
                                target_timeout++;
                        target_timeout += val;
@@ -1077,6 +1077,10 @@ void sdhci_send_command(struct sdhci_host *host, struct mmc_command *cmd)
        /* Initially, a command has no error */
        cmd->error = 0;
 
+       if ((host->quirks2 & SDHCI_QUIRK2_STOP_WITH_TC) &&
+           cmd->opcode == MMC_STOP_TRANSMISSION)
+               cmd->flags |= MMC_RSP_BUSY;
+
        /* Wait max 10 ms */
        timeout = 10;
 
@@ -1390,8 +1394,8 @@ static void sdhci_set_power_reg(struct sdhci_host *host, unsigned char mode,
                sdhci_writeb(host, 0, SDHCI_POWER_CONTROL);
 }
 
-void sdhci_set_power(struct sdhci_host *host, unsigned char mode,
-                    unsigned short vdd)
+void sdhci_set_power_noreg(struct sdhci_host *host, unsigned char mode,
+                          unsigned short vdd)
 {
        u8 pwr = 0;
 
@@ -1455,20 +1459,17 @@ void sdhci_set_power(struct sdhci_host *host, unsigned char mode,
                        mdelay(10);
        }
 }
-EXPORT_SYMBOL_GPL(sdhci_set_power);
+EXPORT_SYMBOL_GPL(sdhci_set_power_noreg);
 
-static void __sdhci_set_power(struct sdhci_host *host, unsigned char mode,
-                             unsigned short vdd)
+void sdhci_set_power(struct sdhci_host *host, unsigned char mode,
+                    unsigned short vdd)
 {
-       struct mmc_host *mmc = host->mmc;
-
-       if (host->ops->set_power)
-               host->ops->set_power(host, mode, vdd);
-       else if (!IS_ERR(mmc->supply.vmmc))
-               sdhci_set_power_reg(host, mode, vdd);
+       if (IS_ERR(host->mmc->supply.vmmc))
+               sdhci_set_power_noreg(host, mode, vdd);
        else
-               sdhci_set_power(host, mode, vdd);
+               sdhci_set_power_reg(host, mode, vdd);
 }
+EXPORT_SYMBOL_GPL(sdhci_set_power);
 
 /*****************************************************************************\
  *                                                                           *
@@ -1609,7 +1610,10 @@ static void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
                }
        }
 
-       __sdhci_set_power(host, ios->power_mode, ios->vdd);
+       if (host->ops->set_power)
+               host->ops->set_power(host, ios->power_mode, ios->vdd);
+       else
+               sdhci_set_power(host, ios->power_mode, ios->vdd);
 
        if (host->ops->platform_send_init_74_clocks)
                host->ops->platform_send_init_74_clocks(host, ios->power_mode);
@@ -2082,6 +2086,10 @@ static int sdhci_execute_tuning(struct mmc_host *mmc, u32 opcode)
 
                if (!host->tuning_done) {
                        pr_info(DRIVER_NAME ": Timeout waiting for Buffer Read Ready interrupt during tuning procedure, falling back to fixed sampling clock\n");
+
+                       sdhci_do_reset(host, SDHCI_RESET_CMD);
+                       sdhci_do_reset(host, SDHCI_RESET_DATA);
+
                        ctrl = sdhci_readw(host, SDHCI_HOST_CONTROL2);
                        ctrl &= ~SDHCI_CTRL_TUNED_CLK;
                        ctrl &= ~SDHCI_CTRL_EXEC_TUNING;
@@ -2282,10 +2290,8 @@ static bool sdhci_request_done(struct sdhci_host *host)
 
        for (i = 0; i < SDHCI_MAX_MRQS; i++) {
                mrq = host->mrqs_done[i];
-               if (mrq) {
-                       host->mrqs_done[i] = NULL;
+               if (mrq)
                        break;
-               }
        }
 
        if (!mrq) {
@@ -2316,6 +2322,17 @@ static bool sdhci_request_done(struct sdhci_host *host)
         * upon error conditions.
         */
        if (sdhci_needs_reset(host, mrq)) {
+               /*
+                * Do not finish until command and data lines are available for
+                * reset. Note there can only be one other mrq, so it cannot
+                * also be in mrqs_done, otherwise host->cmd and host->data_cmd
+                * would both be null.
+                */
+               if (host->cmd || host->data_cmd) {
+                       spin_unlock_irqrestore(&host->lock, flags);
+                       return true;
+               }
+
                /* Some controllers need this kick or reset won't work here */
                if (host->quirks & SDHCI_QUIRK_CLOCK_BEFORE_RESET)
                        /* This is to force an update */
@@ -2323,10 +2340,8 @@ static bool sdhci_request_done(struct sdhci_host *host)
 
                /* Spec says we should do both at the same time, but Ricoh
                   controllers do not like that. */
-               if (!host->cmd)
-                       sdhci_do_reset(host, SDHCI_RESET_CMD);
-               if (!host->data_cmd)
-                       sdhci_do_reset(host, SDHCI_RESET_DATA);
+               sdhci_do_reset(host, SDHCI_RESET_CMD);
+               sdhci_do_reset(host, SDHCI_RESET_DATA);
 
                host->pending_reset = false;
        }
@@ -2334,6 +2349,8 @@ static bool sdhci_request_done(struct sdhci_host *host)
        if (!sdhci_has_requests(host))
                sdhci_led_deactivate(host);
 
+       host->mrqs_done[i] = NULL;
+
        mmiowb();
        spin_unlock_irqrestore(&host->lock, flags);
 
@@ -2409,7 +2426,7 @@ static void sdhci_timeout_data_timer(unsigned long data)
  *                                                                           *
 \*****************************************************************************/
 
-static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask, u32 *mask)
+static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask)
 {
        if (!host->cmd) {
                /*
@@ -2453,11 +2470,6 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask, u32 *mask)
                return;
        }
 
-       if ((host->quirks2 & SDHCI_QUIRK2_STOP_WITH_TC) &&
-           !(host->cmd->flags & MMC_RSP_BUSY) && !host->data &&
-           host->cmd->opcode == MMC_STOP_TRANSMISSION)
-               *mask &= ~SDHCI_INT_DATA_END;
-
        if (intmask & SDHCI_INT_RESPONSE)
                sdhci_finish_command(host);
 }
@@ -2513,9 +2525,6 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
        if (!host->data) {
                struct mmc_command *data_cmd = host->data_cmd;
 
-               if (data_cmd)
-                       host->data_cmd = NULL;
-
                /*
                 * The "data complete" interrupt is also used to
                 * indicate that a busy state has ended. See comment
@@ -2523,11 +2532,13 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
                 */
                if (data_cmd && (data_cmd->flags & MMC_RSP_BUSY)) {
                        if (intmask & SDHCI_INT_DATA_TIMEOUT) {
+                               host->data_cmd = NULL;
                                data_cmd->error = -ETIMEDOUT;
                                sdhci_finish_mrq(host, data_cmd->mrq);
                                return;
                        }
                        if (intmask & SDHCI_INT_DATA_END) {
+                               host->data_cmd = NULL;
                                /*
                                 * Some cards handle busy-end interrupt
                                 * before the command completed, so make
@@ -2680,8 +2691,7 @@ static irqreturn_t sdhci_irq(int irq, void *dev_id)
                }
 
                if (intmask & SDHCI_INT_CMD_MASK)
-                       sdhci_cmd_irq(host, intmask & SDHCI_INT_CMD_MASK,
-                                     &intmask);
+                       sdhci_cmd_irq(host, intmask & SDHCI_INT_CMD_MASK);
 
                if (intmask & SDHCI_INT_DATA_MASK)
                        sdhci_data_irq(host, intmask & SDHCI_INT_DATA_MASK);
@@ -2914,6 +2924,10 @@ int sdhci_runtime_resume_host(struct sdhci_host *host)
                spin_unlock_irqrestore(&host->lock, flags);
        }
 
+       if ((mmc->caps2 & MMC_CAP2_HS400_ES) &&
+           mmc->ops->hs400_enhanced_strobe)
+               mmc->ops->hs400_enhanced_strobe(mmc, &mmc->ios);
+
        spin_lock_irqsave(&host->lock, flags);
 
        host->runtime_suspended = false;
index c722cd23205cd2306ed42feb20f17173493d049e..2570455b219a469c1669ff4d3f9935ca89e37c6f 100644 (file)
@@ -73,6 +73,7 @@
 #define  SDHCI_DATA_LVL_MASK   0x00F00000
 #define   SDHCI_DATA_LVL_SHIFT 20
 #define   SDHCI_DATA_0_LVL_MASK        0x00100000
+#define  SDHCI_CMD_LVL         0x01000000
 
 #define SDHCI_HOST_CONTROL     0x28
 #define  SDHCI_CTRL_LED                0x01
@@ -683,6 +684,8 @@ u16 sdhci_calc_clk(struct sdhci_host *host, unsigned int clock,
 void sdhci_set_clock(struct sdhci_host *host, unsigned int clock);
 void sdhci_set_power(struct sdhci_host *host, unsigned char mode,
                     unsigned short vdd);
+void sdhci_set_power_noreg(struct sdhci_host *host, unsigned char mode,
+                          unsigned short vdd);
 void sdhci_set_bus_width(struct sdhci_host *host, int width);
 void sdhci_reset(struct sdhci_host *host, u8 mask);
 void sdhci_set_uhs_signaling(struct sdhci_host *host, unsigned timing);
index 0f68a99fc4ad0135b5852327be6b7177b6fb5cfd..141bd70a49c2c5c888d290b724b2ed6a59af2216 100644 (file)
@@ -161,7 +161,7 @@ int gpmi_init(struct gpmi_nand_data *this)
 
        ret = gpmi_enable_clk(this);
        if (ret)
-               goto err_out;
+               return ret;
        ret = gpmi_reset_block(r->gpmi_regs, false);
        if (ret)
                goto err_out;
@@ -197,6 +197,7 @@ int gpmi_init(struct gpmi_nand_data *this)
        gpmi_disable_clk(this);
        return 0;
 err_out:
+       gpmi_disable_clk(this);
        return ret;
 }
 
@@ -270,7 +271,7 @@ int bch_set_geometry(struct gpmi_nand_data *this)
 
        ret = gpmi_enable_clk(this);
        if (ret)
-               goto err_out;
+               return ret;
 
        /*
        * Due to erratum #2847 of the MX23, the BCH cannot be soft reset on this
@@ -308,6 +309,7 @@ int bch_set_geometry(struct gpmi_nand_data *this)
        gpmi_disable_clk(this);
        return 0;
 err_out:
+       gpmi_disable_clk(this);
        return ret;
 }
 
index d54f666417e183c4f148826db042931b5d81f47c..dbf256217b3eb75a0486a0f2ec8774c05f793fe8 100644 (file)
@@ -86,6 +86,8 @@ struct mtk_ecc {
        struct completion done;
        struct mutex lock;
        u32 sectors;
+
+       u8 eccdata[112];
 };
 
 static inline void mtk_ecc_wait_idle(struct mtk_ecc *ecc,
@@ -366,9 +368,8 @@ int mtk_ecc_encode(struct mtk_ecc *ecc, struct mtk_ecc_config *config,
                   u8 *data, u32 bytes)
 {
        dma_addr_t addr;
-       u8 *p;
-       u32 len, i, val;
-       int ret = 0;
+       u32 len;
+       int ret;
 
        addr = dma_map_single(ecc->dev, data, bytes, DMA_TO_DEVICE);
        ret = dma_mapping_error(ecc->dev, addr);
@@ -393,14 +394,12 @@ int mtk_ecc_encode(struct mtk_ecc *ecc, struct mtk_ecc_config *config,
 
        /* Program ECC bytes to OOB: per sector oob = FDM + ECC + SPARE */
        len = (config->strength * ECC_PARITY_BITS + 7) >> 3;
-       p = data + bytes;
 
-       /* write the parity bytes generated by the ECC back to the OOB region */
-       for (i = 0; i < len; i++) {
-               if ((i % 4) == 0)
-                       val = readl(ecc->regs + ECC_ENCPAR(i / 4));
-               p[i] = (val >> ((i % 4) * 8)) & 0xff;
-       }
+       /* write the parity bytes generated by the ECC back to temp buffer */
+       __ioread32_copy(ecc->eccdata, ecc->regs + ECC_ENCPAR(0), round_up(len, 4));
+
+       /* copy into possibly unaligned OOB region with actual length */
+       memcpy(data + bytes, ecc->eccdata, len);
 timeout:
 
        dma_unmap_single(ecc->dev, addr, bytes, DMA_TO_DEVICE);
index e5718e5ecf925868012eb0332c43d071f32a727f..3bde96a3f7bfd5b8f066fc56af91fb1983279cdf 100644 (file)
@@ -1095,10 +1095,11 @@ static void nand_release_data_interface(struct nand_chip *chip)
 /**
  * nand_reset - Reset and initialize a NAND device
  * @chip: The NAND chip
+ * @chipnr: Internal die id
  *
  * Returns 0 for success or negative error code otherwise
  */
-int nand_reset(struct nand_chip *chip)
+int nand_reset(struct nand_chip *chip, int chipnr)
 {
        struct mtd_info *mtd = nand_to_mtd(chip);
        int ret;
@@ -1107,9 +1108,17 @@ int nand_reset(struct nand_chip *chip)
        if (ret)
                return ret;
 
+       /*
+        * The CS line has to be released before we can apply the new NAND
+        * interface settings, hence this weird ->select_chip() dance.
+        */
+       chip->select_chip(mtd, chipnr);
        chip->cmdfunc(mtd, NAND_CMD_RESET, -1, -1);
+       chip->select_chip(mtd, -1);
 
+       chip->select_chip(mtd, chipnr);
        ret = nand_setup_data_interface(chip);
+       chip->select_chip(mtd, -1);
        if (ret)
                return ret;
 
@@ -1185,8 +1194,6 @@ int nand_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
        /* Shift to get chip number */
        chipnr = ofs >> chip->chip_shift;
 
-       chip->select_chip(mtd, chipnr);
-
        /*
         * Reset the chip.
         * If we want to check the WP through READ STATUS and check the bit 7
@@ -1194,7 +1201,9 @@ int nand_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
         * some operation can also clear the bit 7 of status register
         * eg. erase/program a locked block
         */
-       nand_reset(chip);
+       nand_reset(chip, chipnr);
+
+       chip->select_chip(mtd, chipnr);
 
        /* Check, if it is write protected */
        if (nand_check_wp(mtd)) {
@@ -1244,8 +1253,6 @@ int nand_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
        /* Shift to get chip number */
        chipnr = ofs >> chip->chip_shift;
 
-       chip->select_chip(mtd, chipnr);
-
        /*
         * Reset the chip.
         * If we want to check the WP through READ STATUS and check the bit 7
@@ -1253,7 +1260,9 @@ int nand_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
         * some operation can also clear the bit 7 of status register
         * eg. erase/program a locked block
         */
-       nand_reset(chip);
+       nand_reset(chip, chipnr);
+
+       chip->select_chip(mtd, chipnr);
 
        /* Check, if it is write protected */
        if (nand_check_wp(mtd)) {
@@ -2940,10 +2949,6 @@ static int nand_do_write_oob(struct mtd_info *mtd, loff_t to,
        }
 
        chipnr = (int)(to >> chip->chip_shift);
-       chip->select_chip(mtd, chipnr);
-
-       /* Shift to get page */
-       page = (int)(to >> chip->page_shift);
 
        /*
         * Reset the chip. Some chips (like the Toshiba TC5832DC found in one
@@ -2951,7 +2956,12 @@ static int nand_do_write_oob(struct mtd_info *mtd, loff_t to,
         * if we don't do this. I have no clue why, but I seem to have 'fixed'
         * it in the doc2000 driver in August 1999.  dwmw2.
         */
-       nand_reset(chip);
+       nand_reset(chip, chipnr);
+
+       chip->select_chip(mtd, chipnr);
+
+       /* Shift to get page */
+       page = (int)(to >> chip->page_shift);
 
        /* Check, if it is write protected */
        if (nand_check_wp(mtd)) {
@@ -3984,14 +3994,14 @@ static struct nand_flash_dev *nand_get_flash_type(struct mtd_info *mtd,
        int i, maf_idx;
        u8 id_data[8];
 
-       /* Select the device */
-       chip->select_chip(mtd, 0);
-
        /*
         * Reset the chip, required by some chips (e.g. Micron MT29FxGxxxxx)
         * after power-up.
         */
-       nand_reset(chip);
+       nand_reset(chip, 0);
+
+       /* Select the device */
+       chip->select_chip(mtd, 0);
 
        /* Send the command for reading device ID */
        chip->cmdfunc(mtd, NAND_CMD_READID, 0x00, -1);
@@ -4329,17 +4339,31 @@ int nand_scan_ident(struct mtd_info *mtd, int maxchips,
                return PTR_ERR(type);
        }
 
+       /* Initialize the ->data_interface field. */
        ret = nand_init_data_interface(chip);
        if (ret)
                return ret;
 
+       /*
+        * Setup the data interface correctly on the chip and controller side.
+        * This explicit call to nand_setup_data_interface() is only required
+        * for the first die, because nand_reset() has been called before
+        * ->data_interface and ->default_onfi_timing_mode were set.
+        * For the other dies, nand_reset() will automatically switch to the
+        * best mode for us.
+        */
+       ret = nand_setup_data_interface(chip);
+       if (ret)
+               return ret;
+
        chip->select_chip(mtd, -1);
 
        /* Check for a chip array */
        for (i = 1; i < maxchips; i++) {
-               chip->select_chip(mtd, i);
                /* See comment in nand_get_flash_type for reset */
-               nand_reset(chip);
+               nand_reset(chip, i);
+
+               chip->select_chip(mtd, i);
                /* Send the command for reading device ID */
                chip->cmdfunc(mtd, NAND_CMD_READID, 0x00, -1);
                /* Read manufacturer and device IDs */
index 95c4048a371e87b6f5517b01109b19db85cedc56..388e46be6ad92805f2a6633da6960d8c56b1b837 100644 (file)
@@ -741,6 +741,7 @@ static int try_recover_peb(struct ubi_volume *vol, int pnum, int lnum,
                goto out_put;
        }
 
+       vid_hdr = ubi_get_vid_hdr(vidb);
        ubi_assert(vid_hdr->vol_type == UBI_VID_DYNAMIC);
 
        mutex_lock(&ubi->buf_mutex);
index d6384d9657885c31c01e8fa1a5a2ed82530b0891..c1f5c29e458ef86305376fa7b404f9c5b8e05681 100644 (file)
@@ -287,7 +287,7 @@ static int update_vol(struct ubi_device *ubi, struct ubi_attach_info *ai,
 
                /* new_aeb is newer */
                if (cmp_res & 1) {
-                       victim = ubi_alloc_aeb(ai, aeb->ec, aeb->pnum);
+                       victim = ubi_alloc_aeb(ai, aeb->pnum, aeb->ec);
                        if (!victim)
                                return -ENOMEM;
 
@@ -707,11 +707,11 @@ static int ubi_attach_fastmap(struct ubi_device *ubi,
                             fmvhdr->vol_type,
                             be32_to_cpu(fmvhdr->last_eb_bytes));
 
-               if (!av)
-                       goto fail_bad;
-               if (PTR_ERR(av) == -EINVAL) {
-                       ubi_err(ubi, "volume (ID %i) already exists",
-                               fmvhdr->vol_id);
+               if (IS_ERR(av)) {
+                       if (PTR_ERR(av) == -EEXIST)
+                               ubi_err(ubi, "volume (ID %i) already exists",
+                                       fmvhdr->vol_id);
+
                        goto fail_bad;
                }
 
index c9944d86d0457beea1adf29b218a284892892d28..8029dd4912b6f950e8ab5f06d5de9747d6832a02 100644 (file)
@@ -199,7 +199,7 @@ MODULE_PARM_DESC(lp_interval, "The number of seconds between instances where "
 atomic_t netpoll_block_tx = ATOMIC_INIT(0);
 #endif
 
-int bond_net_id __read_mostly;
+unsigned int bond_net_id __read_mostly;
 
 static __be32 arp_target[BOND_MAX_ARP_TARGETS];
 static int arp_ip_count;
@@ -4080,16 +4080,16 @@ static netdev_tx_t bond_start_xmit(struct sk_buff *skb, struct net_device *dev)
        return ret;
 }
 
-static int bond_ethtool_get_settings(struct net_device *bond_dev,
-                                    struct ethtool_cmd *ecmd)
+static int bond_ethtool_get_link_ksettings(struct net_device *bond_dev,
+                                          struct ethtool_link_ksettings *cmd)
 {
        struct bonding *bond = netdev_priv(bond_dev);
        unsigned long speed = 0;
        struct list_head *iter;
        struct slave *slave;
 
-       ecmd->duplex = DUPLEX_UNKNOWN;
-       ecmd->port = PORT_OTHER;
+       cmd->base.duplex = DUPLEX_UNKNOWN;
+       cmd->base.port = PORT_OTHER;
 
        /* Since bond_slave_can_tx returns false for all inactive or down slaves, we
         * do not need to check mode.  Though link speed might not represent
@@ -4100,12 +4100,12 @@ static int bond_ethtool_get_settings(struct net_device *bond_dev,
                if (bond_slave_can_tx(slave)) {
                        if (slave->speed != SPEED_UNKNOWN)
                                speed += slave->speed;
-                       if (ecmd->duplex == DUPLEX_UNKNOWN &&
+                       if (cmd->base.duplex == DUPLEX_UNKNOWN &&
                            slave->duplex != DUPLEX_UNKNOWN)
-                               ecmd->duplex = slave->duplex;
+                               cmd->base.duplex = slave->duplex;
                }
        }
-       ethtool_cmd_speed_set(ecmd, speed ? : SPEED_UNKNOWN);
+       cmd->base.speed = speed ? : SPEED_UNKNOWN;
 
        return 0;
 }
@@ -4121,8 +4121,8 @@ static void bond_ethtool_get_drvinfo(struct net_device *bond_dev,
 
 static const struct ethtool_ops bond_ethtool_ops = {
        .get_drvinfo            = bond_ethtool_get_drvinfo,
-       .get_settings           = bond_ethtool_get_settings,
        .get_link               = ethtool_op_get_link,
+       .get_link_ksettings     = bond_ethtool_get_link_ksettings,
 };
 
 static const struct net_device_ops bond_netdev_ops = {
index 3eb7430dffbf1378df8c4c9c40f92a99e06e879d..f8ff25c8ee2e46122083de6f45816648f60c0370 100644 (file)
@@ -142,6 +142,9 @@ struct plx_pci_card {
 #define CTI_PCI_VENDOR_ID              0x12c4
 #define CTI_PCI_DEVICE_ID_CRG001       0x0900
 
+#define MOXA_PCI_VENDOR_ID             0x1393
+#define MOXA_PCI_DEVICE_ID             0x0100
+
 static void plx_pci_reset_common(struct pci_dev *pdev);
 static void plx9056_pci_reset_common(struct pci_dev *pdev);
 static void plx_pci_reset_marathon_pci(struct pci_dev *pdev);
@@ -258,6 +261,14 @@ static struct plx_pci_card_info plx_pci_card_info_elcus = {
        /* based on PLX9030 */
 };
 
+static struct plx_pci_card_info plx_pci_card_info_moxa = {
+       "MOXA", 2,
+       PLX_PCI_CAN_CLOCK, PLX_PCI_OCR, PLX_PCI_CDR,
+       {0, 0x00, 0x00}, { {0, 0x00, 0x80}, {1, 0x00, 0x80} },
+       &plx_pci_reset_common
+        /* based on PLX9052 */
+};
+
 static const struct pci_device_id plx_pci_tbl[] = {
        {
                /* Adlink PCI-7841/cPCI-7841 */
@@ -357,6 +368,13 @@ static const struct pci_device_id plx_pci_tbl[] = {
                0, 0,
                (kernel_ulong_t)&plx_pci_card_info_elcus
        },
+       {
+               /* moxa */
+               MOXA_PCI_VENDOR_ID, MOXA_PCI_DEVICE_ID,
+               PCI_ANY_ID, PCI_ANY_ID,
+               0, 0,
+               (kernel_ulong_t)&plx_pci_card_info_moxa
+       },
        { 0,}
 };
 MODULE_DEVICE_TABLE(pci, plx_pci_tbl);
index e128826aa117857b6ac36f9f1f273c1d9eab9044..91c876a0a647ba1b17bdc734db168c9ee456409e 100644 (file)
@@ -7,9 +7,6 @@
  *
  */
 
-
-#include <linux/module.h>
-
 #include <linux/kernel.h>
 #include <linux/delay.h>
 #include <linux/types.h>
@@ -411,6 +408,7 @@ etrax_ethernet_init(void)
        led_next_time = jiffies;
        return 0;
 }
+device_initcall(etrax_ethernet_init)
 
 /* set MAC address of the interface. called from the core after a
  * SIOCSIFADDR ioctl, and from the bootup above.
@@ -1714,11 +1712,6 @@ e100_netpoll(struct net_device* netdev)
 }
 #endif
 
-static int
-etrax_init_module(void)
-{
-       return etrax_ethernet_init();
-}
 
 static int __init
 e100_boot_setup(char* str)
@@ -1741,5 +1734,3 @@ e100_boot_setup(char* str)
 }
 
 __setup("etrax100_eth=", e100_boot_setup);
-
-module_init(etrax_init_module);
index 7717b19dc806bf1532ac263525a35ce643c7cdd6..947adda3397d64ce9e86f5cfe8e300b4e8650827 100644 (file)
@@ -962,9 +962,10 @@ static void b53_vlan_add(struct dsa_switch *ds, int port,
 
                vl->members |= BIT(port) | BIT(cpu_port);
                if (untagged)
-                       vl->untag |= BIT(port) | BIT(cpu_port);
+                       vl->untag |= BIT(port);
                else
-                       vl->untag &= ~(BIT(port) | BIT(cpu_port));
+                       vl->untag &= ~BIT(port);
+               vl->untag &= ~BIT(cpu_port);
 
                b53_set_vlan_entry(dev, vid, vl);
                b53_fast_age_vlan(dev, vid);
@@ -973,8 +974,6 @@ static void b53_vlan_add(struct dsa_switch *ds, int port,
        if (pvid) {
                b53_write16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(port),
                            vlan->vid_end);
-               b53_write16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(cpu_port),
-                           vlan->vid_end);
                b53_fast_age_vlan(dev, vid);
        }
 }
@@ -984,7 +983,6 @@ static int b53_vlan_del(struct dsa_switch *ds, int port,
 {
        struct b53_device *dev = ds->priv;
        bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED;
-       unsigned int cpu_port = dev->cpu_port;
        struct b53_vlan *vl;
        u16 vid;
        u16 pvid;
@@ -997,8 +995,6 @@ static int b53_vlan_del(struct dsa_switch *ds, int port,
                b53_get_vlan_entry(dev, vid, vl);
 
                vl->members &= ~BIT(port);
-               if ((vl->members & BIT(cpu_port)) == BIT(cpu_port))
-                       vl->members = 0;
 
                if (pvid == vid) {
                        if (is5325(dev) || is5365(dev))
@@ -1007,18 +1003,14 @@ static int b53_vlan_del(struct dsa_switch *ds, int port,
                                pvid = 0;
                }
 
-               if (untagged) {
+               if (untagged)
                        vl->untag &= ~(BIT(port));
-                       if ((vl->untag & BIT(cpu_port)) == BIT(cpu_port))
-                               vl->untag = 0;
-               }
 
                b53_set_vlan_entry(dev, vid, vl);
                b53_fast_age_vlan(dev, vid);
        }
 
        b53_write16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(port), pvid);
-       b53_write16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(cpu_port), pvid);
        b53_fast_age_vlan(dev, pvid);
 
        return 0;
index 76fb8552c9d93a6d3e2c424ee3ad0ef55be04adf..ef63d24fef8149a1fd26614933a5ea4dd5dae86f 100644 (file)
@@ -256,6 +256,7 @@ static const struct of_device_id b53_mmap_of_table[] = {
        { .compatible = "brcm,bcm63xx-switch" },
        { /* sentinel */ },
 };
+MODULE_DEVICE_TABLE(of, b53_mmap_of_table);
 
 static struct platform_driver b53_mmap_driver = {
        .probe = b53_mmap_probe,
index e218887f18b79e352435416d8d9b3047cf52ae66..9ec33b51a0edad879701bef79d6c8f250d778b91 100644 (file)
@@ -588,6 +588,7 @@ static void bcm_sf2_sw_adjust_link(struct dsa_switch *ds, int port,
                                   struct phy_device *phydev)
 {
        struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
+       struct ethtool_eee *p = &priv->port_sts[port].eee;
        u32 id_mode_dis = 0, port_mode;
        const char *str = NULL;
        u32 reg;
@@ -662,6 +663,9 @@ force_link:
                reg |= DUPLX_MODE;
 
        core_writel(priv, reg, CORE_STS_OVERRIDE_GMIIP_PORT(port));
+
+       if (!phydev->is_pseudo_fixed_link)
+               p->eee_enabled = bcm_sf2_eee_init(ds, port, phydev);
 }
 
 static void bcm_sf2_sw_fixed_link_update(struct dsa_switch *ds, int port,
@@ -1133,6 +1137,20 @@ static int bcm_sf2_sw_remove(struct platform_device *pdev)
        return 0;
 }
 
+static void bcm_sf2_sw_shutdown(struct platform_device *pdev)
+{
+       struct bcm_sf2_priv *priv = platform_get_drvdata(pdev);
+
+       /* For a kernel about to be kexec'd we want to keep the GPHY on for a
+        * successful MDIO bus scan to occur. If we did turn off the GPHY
+        * before (e.g: port_disable), this will also power it back on.
+        *
+        * Do not rely on kexec_in_progress, just power the PHY on.
+        */
+       if (priv->hw_params.num_gphy == 1)
+               bcm_sf2_gphy_enable_set(priv->dev->ds, true);
+}
+
 #ifdef CONFIG_PM_SLEEP
 static int bcm_sf2_suspend(struct device *dev)
 {
@@ -1158,10 +1176,12 @@ static const struct of_device_id bcm_sf2_of_match[] = {
        { .compatible = "brcm,bcm7445-switch-v4.0" },
        { /* sentinel */ },
 };
+MODULE_DEVICE_TABLE(of, bcm_sf2_of_match);
 
 static struct platform_driver bcm_sf2_driver = {
        .probe  = bcm_sf2_sw_probe,
        .remove = bcm_sf2_sw_remove,
+       .shutdown = bcm_sf2_sw_shutdown,
        .driver = {
                .name = "brcm-sf2",
                .of_match_table = bcm_sf2_of_match,
index 486668813e15eba3a8972919a27cc1b89bce02b7..1aaa7a95ebc4a24b4692e4d78f2da063aaf1779c 100644 (file)
@@ -1,6 +1,7 @@
 config NET_DSA_MV88E6XXX
        tristate "Marvell 88E6xxx Ethernet switch fabric support"
        depends on NET_DSA
+       select IRQ_DOMAIN
        select NET_DSA_TAG_EDSA
        select NET_DSA_TAG_DSA
        help
index 10ce820daa4875bf19f5478ad46ba1924b8747e5..c36be318de1aaf1d6d936ad97194b1524112bcbd 100644 (file)
@@ -2,3 +2,4 @@ obj-$(CONFIG_NET_DSA_MV88E6XXX) += mv88e6xxx.o
 mv88e6xxx-objs := chip.o
 mv88e6xxx-objs += global1.o
 mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_GLOBAL2) += global2.o
+mv88e6xxx-objs += port.o
index 72b9dac29901b7772ff87795a44dcdb724ecd268..ce2f7ff8066e0c2b413e8d3518307811378d9700 100644 (file)
@@ -37,6 +37,7 @@
 #include "mv88e6xxx.h"
 #include "global1.h"
 #include "global2.h"
+#include "port.h"
 
 static void assert_reg_lock(struct mv88e6xxx_chip *chip)
 {
@@ -221,22 +222,6 @@ int mv88e6xxx_write(struct mv88e6xxx_chip *chip, int addr, int reg, u16 val)
        return 0;
 }
 
-static int mv88e6xxx_port_read(struct mv88e6xxx_chip *chip, int port, int reg,
-                              u16 *val)
-{
-       int addr = chip->info->port_base_addr + port;
-
-       return mv88e6xxx_read(chip, addr, reg, val);
-}
-
-static int mv88e6xxx_port_write(struct mv88e6xxx_chip *chip, int port, int reg,
-                               u16 val)
-{
-       int addr = chip->info->port_base_addr + port;
-
-       return mv88e6xxx_write(chip, addr, reg, val);
-}
-
 static int mv88e6xxx_phy_read(struct mv88e6xxx_chip *chip, int phy,
                              int reg, u16 *val)
 {
@@ -428,19 +413,26 @@ static const struct irq_domain_ops mv88e6xxx_g1_irq_domain_ops = {
 static void mv88e6xxx_g1_irq_free(struct mv88e6xxx_chip *chip)
 {
        int irq, virq;
+       u16 mask;
 
-       for (irq = 0; irq < 16; irq++) {
-               virq = irq_find_mapping(chip->g2_irq.domain, irq);
+       mv88e6xxx_g1_read(chip, GLOBAL_CONTROL, &mask);
+       mask |= GENMASK(chip->g1_irq.nirqs, 0);
+       mv88e6xxx_g1_write(chip, GLOBAL_CONTROL, mask);
+
+       free_irq(chip->irq, chip);
+
+       for (irq = 0; irq < chip->g1_irq.nirqs; irq++) {
+               virq = irq_find_mapping(chip->g1_irq.domain, irq);
                irq_dispose_mapping(virq);
        }
 
-       irq_domain_remove(chip->g2_irq.domain);
+       irq_domain_remove(chip->g1_irq.domain);
 }
 
 static int mv88e6xxx_g1_irq_setup(struct mv88e6xxx_chip *chip)
 {
-       int err, irq;
-       u16 reg;
+       int err, irq, virq;
+       u16 reg, mask;
 
        chip->g1_irq.nirqs = chip->info->g1_irqs;
        chip->g1_irq.domain = irq_domain_add_simple(
@@ -455,32 +447,41 @@ static int mv88e6xxx_g1_irq_setup(struct mv88e6xxx_chip *chip)
        chip->g1_irq.chip = mv88e6xxx_g1_irq_chip;
        chip->g1_irq.masked = ~0;
 
-       err = mv88e6xxx_g1_read(chip, GLOBAL_CONTROL, &reg);
+       err = mv88e6xxx_g1_read(chip, GLOBAL_CONTROL, &mask);
        if (err)
-               goto out;
+               goto out_mapping;
 
-       reg &= ~GENMASK(chip->g1_irq.nirqs, 0);
+       mask &= ~GENMASK(chip->g1_irq.nirqs, 0);
 
-       err = mv88e6xxx_g1_write(chip, GLOBAL_CONTROL, reg);
+       err = mv88e6xxx_g1_write(chip, GLOBAL_CONTROL, mask);
        if (err)
-               goto out;
+               goto out_disable;
 
        /* Reading the interrupt status clears (most of) them */
        err = mv88e6xxx_g1_read(chip, GLOBAL_STATUS, &reg);
        if (err)
-               goto out;
+               goto out_disable;
 
        err = request_threaded_irq(chip->irq, NULL,
                                   mv88e6xxx_g1_irq_thread_fn,
                                   IRQF_ONESHOT | IRQF_TRIGGER_FALLING,
                                   dev_name(chip->dev), chip);
        if (err)
-               goto out;
+               goto out_disable;
 
        return 0;
 
-out:
-       mv88e6xxx_g1_irq_free(chip);
+out_disable:
+       mask |= GENMASK(chip->g1_irq.nirqs, 0);
+       mv88e6xxx_g1_write(chip, GLOBAL_CONTROL, mask);
+
+out_mapping:
+       for (irq = 0; irq < 16; irq++) {
+               virq = irq_find_mapping(chip->g1_irq.domain, irq);
+               irq_dispose_mapping(virq);
+       }
+
+       irq_domain_remove(chip->g1_irq.domain);
 
        return err;
 }
@@ -716,231 +717,152 @@ static bool mv88e6xxx_6352_family(struct mv88e6xxx_chip *chip)
        return chip->info->family == MV88E6XXX_FAMILY_6352;
 }
 
-/* We expect the switch to perform auto negotiation if there is a real
- * phy. However, in the case of a fixed link phy, we force the port
- * settings from the fixed link settings.
- */
-static void mv88e6xxx_adjust_link(struct dsa_switch *ds, int port,
-                                 struct phy_device *phydev)
+static int mv88e6xxx_port_setup_mac(struct mv88e6xxx_chip *chip, int port,
+                                   int link, int speed, int duplex,
+                                   phy_interface_t mode)
 {
-       struct mv88e6xxx_chip *chip = ds->priv;
-       u16 reg;
        int err;
 
-       if (!phy_is_pseudo_fixed_link(phydev))
-               return;
-
-       mutex_lock(&chip->reg_lock);
+       if (!chip->info->ops->port_set_link)
+               return 0;
 
-       err = mv88e6xxx_port_read(chip, port, PORT_PCS_CTRL, &reg);
+       /* Port's MAC control must not be changed unless the link is down */
+       err = chip->info->ops->port_set_link(chip, port, 0);
        if (err)
-               goto out;
-
-       reg &= ~(PORT_PCS_CTRL_LINK_UP |
-                PORT_PCS_CTRL_FORCE_LINK |
-                PORT_PCS_CTRL_DUPLEX_FULL |
-                PORT_PCS_CTRL_FORCE_DUPLEX |
-                PORT_PCS_CTRL_UNFORCED);
-
-       reg |= PORT_PCS_CTRL_FORCE_LINK;
-       if (phydev->link)
-               reg |= PORT_PCS_CTRL_LINK_UP;
-
-       if (mv88e6xxx_6065_family(chip) && phydev->speed > SPEED_100)
-               goto out;
+               return err;
 
-       switch (phydev->speed) {
-       case SPEED_1000:
-               reg |= PORT_PCS_CTRL_1000;
-               break;
-       case SPEED_100:
-               reg |= PORT_PCS_CTRL_100;
-               break;
-       case SPEED_10:
-               reg |= PORT_PCS_CTRL_10;
-               break;
-       default:
-               pr_info("Unknown speed");
-               goto out;
+       if (chip->info->ops->port_set_speed) {
+               err = chip->info->ops->port_set_speed(chip, port, speed);
+               if (err && err != -EOPNOTSUPP)
+                       goto restore_link;
        }
 
-       reg |= PORT_PCS_CTRL_FORCE_DUPLEX;
-       if (phydev->duplex == DUPLEX_FULL)
-               reg |= PORT_PCS_CTRL_DUPLEX_FULL;
-
-       if ((mv88e6xxx_6352_family(chip) || mv88e6xxx_6351_family(chip)) &&
-           (port >= mv88e6xxx_num_ports(chip) - 2)) {
-               if (phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID)
-                       reg |= PORT_PCS_CTRL_RGMII_DELAY_RXCLK;
-               if (phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID)
-                       reg |= PORT_PCS_CTRL_RGMII_DELAY_TXCLK;
-               if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID)
-                       reg |= (PORT_PCS_CTRL_RGMII_DELAY_RXCLK |
-                               PORT_PCS_CTRL_RGMII_DELAY_TXCLK);
+       if (chip->info->ops->port_set_duplex) {
+               err = chip->info->ops->port_set_duplex(chip, port, duplex);
+               if (err && err != -EOPNOTSUPP)
+                       goto restore_link;
        }
-       mv88e6xxx_port_write(chip, port, PORT_PCS_CTRL, reg);
 
-out:
-       mutex_unlock(&chip->reg_lock);
-}
-
-static int _mv88e6xxx_stats_wait(struct mv88e6xxx_chip *chip)
-{
-       u16 val;
-       int i, err;
-
-       for (i = 0; i < 10; i++) {
-               err = mv88e6xxx_g1_read(chip, GLOBAL_STATS_OP, &val);
-               if ((val & GLOBAL_STATS_OP_BUSY) == 0)
-                       return 0;
+       if (chip->info->ops->port_set_rgmii_delay) {
+               err = chip->info->ops->port_set_rgmii_delay(chip, port, mode);
+               if (err && err != -EOPNOTSUPP)
+                       goto restore_link;
        }
 
-       return -ETIMEDOUT;
-}
-
-static int _mv88e6xxx_stats_snapshot(struct mv88e6xxx_chip *chip, int port)
-{
-       int err;
-
-       if (mv88e6xxx_6320_family(chip) || mv88e6xxx_6352_family(chip))
-               port = (port + 1) << 5;
-
-       /* Snapshot the hardware statistics counters for this port. */
-       err = mv88e6xxx_g1_write(chip, GLOBAL_STATS_OP,
-                                GLOBAL_STATS_OP_CAPTURE_PORT |
-                                GLOBAL_STATS_OP_HIST_RX_TX | port);
-       if (err)
-               return err;
+       err = 0;
+restore_link:
+       if (chip->info->ops->port_set_link(chip, port, link))
+               netdev_err(chip->ds->ports[port].netdev,
+                          "failed to restore MAC's link\n");
 
-       /* Wait for the snapshotting to complete. */
-       return _mv88e6xxx_stats_wait(chip);
+       return err;
 }
 
-static void _mv88e6xxx_stats_read(struct mv88e6xxx_chip *chip,
-                                 int stat, u32 *val)
+/* We expect the switch to perform auto negotiation if there is a real
+ * phy. However, in the case of a fixed link phy, we force the port
+ * settings from the fixed link settings.
+ */
+static void mv88e6xxx_adjust_link(struct dsa_switch *ds, int port,
+                                 struct phy_device *phydev)
 {
-       u32 value;
-       u16 reg;
+       struct mv88e6xxx_chip *chip = ds->priv;
        int err;
 
-       *val = 0;
-
-       err = mv88e6xxx_g1_write(chip, GLOBAL_STATS_OP,
-                                GLOBAL_STATS_OP_READ_CAPTURED |
-                                GLOBAL_STATS_OP_HIST_RX_TX | stat);
-       if (err)
-               return;
-
-       err = _mv88e6xxx_stats_wait(chip);
-       if (err)
+       if (!phy_is_pseudo_fixed_link(phydev))
                return;
 
-       err = mv88e6xxx_g1_read(chip, GLOBAL_STATS_COUNTER_32, &reg);
-       if (err)
-               return;
+       mutex_lock(&chip->reg_lock);
+       err = mv88e6xxx_port_setup_mac(chip, port, phydev->link, phydev->speed,
+                                      phydev->duplex, phydev->interface);
+       mutex_unlock(&chip->reg_lock);
 
-       value = reg << 16;
+       if (err && err != -EOPNOTSUPP)
+               netdev_err(ds->ports[port].netdev, "failed to configure MAC\n");
+}
 
-       err = mv88e6xxx_g1_read(chip, GLOBAL_STATS_COUNTER_01, &reg);
-       if (err)
-               return;
+static int mv88e6xxx_stats_snapshot(struct mv88e6xxx_chip *chip, int port)
+{
+       if (!chip->info->ops->stats_snapshot)
+               return -EOPNOTSUPP;
 
-       *val = value | reg;
+       return chip->info->ops->stats_snapshot(chip, port);
 }
 
 static struct mv88e6xxx_hw_stat mv88e6xxx_hw_stats[] = {
-       { "in_good_octets",     8, 0x00, BANK0, },
-       { "in_bad_octets",      4, 0x02, BANK0, },
-       { "in_unicast",         4, 0x04, BANK0, },
-       { "in_broadcasts",      4, 0x06, BANK0, },
-       { "in_multicasts",      4, 0x07, BANK0, },
-       { "in_pause",           4, 0x16, BANK0, },
-       { "in_undersize",       4, 0x18, BANK0, },
-       { "in_fragments",       4, 0x19, BANK0, },
-       { "in_oversize",        4, 0x1a, BANK0, },
-       { "in_jabber",          4, 0x1b, BANK0, },
-       { "in_rx_error",        4, 0x1c, BANK0, },
-       { "in_fcs_error",       4, 0x1d, BANK0, },
-       { "out_octets",         8, 0x0e, BANK0, },
-       { "out_unicast",        4, 0x10, BANK0, },
-       { "out_broadcasts",     4, 0x13, BANK0, },
-       { "out_multicasts",     4, 0x12, BANK0, },
-       { "out_pause",          4, 0x15, BANK0, },
-       { "excessive",          4, 0x11, BANK0, },
-       { "collisions",         4, 0x1e, BANK0, },
-       { "deferred",           4, 0x05, BANK0, },
-       { "single",             4, 0x14, BANK0, },
-       { "multiple",           4, 0x17, BANK0, },
-       { "out_fcs_error",      4, 0x03, BANK0, },
-       { "late",               4, 0x1f, BANK0, },
-       { "hist_64bytes",       4, 0x08, BANK0, },
-       { "hist_65_127bytes",   4, 0x09, BANK0, },
-       { "hist_128_255bytes",  4, 0x0a, BANK0, },
-       { "hist_256_511bytes",  4, 0x0b, BANK0, },
-       { "hist_512_1023bytes", 4, 0x0c, BANK0, },
-       { "hist_1024_max_bytes", 4, 0x0d, BANK0, },
-       { "sw_in_discards",     4, 0x10, PORT, },
-       { "sw_in_filtered",     2, 0x12, PORT, },
-       { "sw_out_filtered",    2, 0x13, PORT, },
-       { "in_discards",        4, 0x00 | GLOBAL_STATS_OP_BANK_1, BANK1, },
-       { "in_filtered",        4, 0x01 | GLOBAL_STATS_OP_BANK_1, BANK1, },
-       { "in_accepted",        4, 0x02 | GLOBAL_STATS_OP_BANK_1, BANK1, },
-       { "in_bad_accepted",    4, 0x03 | GLOBAL_STATS_OP_BANK_1, BANK1, },
-       { "in_good_avb_class_a", 4, 0x04 | GLOBAL_STATS_OP_BANK_1, BANK1, },
-       { "in_good_avb_class_b", 4, 0x05 | GLOBAL_STATS_OP_BANK_1, BANK1, },
-       { "in_bad_avb_class_a", 4, 0x06 | GLOBAL_STATS_OP_BANK_1, BANK1, },
-       { "in_bad_avb_class_b", 4, 0x07 | GLOBAL_STATS_OP_BANK_1, BANK1, },
-       { "tcam_counter_0",     4, 0x08 | GLOBAL_STATS_OP_BANK_1, BANK1, },
-       { "tcam_counter_1",     4, 0x09 | GLOBAL_STATS_OP_BANK_1, BANK1, },
-       { "tcam_counter_2",     4, 0x0a | GLOBAL_STATS_OP_BANK_1, BANK1, },
-       { "tcam_counter_3",     4, 0x0b | GLOBAL_STATS_OP_BANK_1, BANK1, },
-       { "in_da_unknown",      4, 0x0e | GLOBAL_STATS_OP_BANK_1, BANK1, },
-       { "in_management",      4, 0x0f | GLOBAL_STATS_OP_BANK_1, BANK1, },
-       { "out_queue_0",        4, 0x10 | GLOBAL_STATS_OP_BANK_1, BANK1, },
-       { "out_queue_1",        4, 0x11 | GLOBAL_STATS_OP_BANK_1, BANK1, },
-       { "out_queue_2",        4, 0x12 | GLOBAL_STATS_OP_BANK_1, BANK1, },
-       { "out_queue_3",        4, 0x13 | GLOBAL_STATS_OP_BANK_1, BANK1, },
-       { "out_queue_4",        4, 0x14 | GLOBAL_STATS_OP_BANK_1, BANK1, },
-       { "out_queue_5",        4, 0x15 | GLOBAL_STATS_OP_BANK_1, BANK1, },
-       { "out_queue_6",        4, 0x16 | GLOBAL_STATS_OP_BANK_1, BANK1, },
-       { "out_queue_7",        4, 0x17 | GLOBAL_STATS_OP_BANK_1, BANK1, },
-       { "out_cut_through",    4, 0x18 | GLOBAL_STATS_OP_BANK_1, BANK1, },
-       { "out_octets_a",       4, 0x1a | GLOBAL_STATS_OP_BANK_1, BANK1, },
-       { "out_octets_b",       4, 0x1b | GLOBAL_STATS_OP_BANK_1, BANK1, },
-       { "out_management",     4, 0x1f | GLOBAL_STATS_OP_BANK_1, BANK1, },
+       { "in_good_octets",             8, 0x00, STATS_TYPE_BANK0, },
+       { "in_bad_octets",              4, 0x02, STATS_TYPE_BANK0, },
+       { "in_unicast",                 4, 0x04, STATS_TYPE_BANK0, },
+       { "in_broadcasts",              4, 0x06, STATS_TYPE_BANK0, },
+       { "in_multicasts",              4, 0x07, STATS_TYPE_BANK0, },
+       { "in_pause",                   4, 0x16, STATS_TYPE_BANK0, },
+       { "in_undersize",               4, 0x18, STATS_TYPE_BANK0, },
+       { "in_fragments",               4, 0x19, STATS_TYPE_BANK0, },
+       { "in_oversize",                4, 0x1a, STATS_TYPE_BANK0, },
+       { "in_jabber",                  4, 0x1b, STATS_TYPE_BANK0, },
+       { "in_rx_error",                4, 0x1c, STATS_TYPE_BANK0, },
+       { "in_fcs_error",               4, 0x1d, STATS_TYPE_BANK0, },
+       { "out_octets",                 8, 0x0e, STATS_TYPE_BANK0, },
+       { "out_unicast",                4, 0x10, STATS_TYPE_BANK0, },
+       { "out_broadcasts",             4, 0x13, STATS_TYPE_BANK0, },
+       { "out_multicasts",             4, 0x12, STATS_TYPE_BANK0, },
+       { "out_pause",                  4, 0x15, STATS_TYPE_BANK0, },
+       { "excessive",                  4, 0x11, STATS_TYPE_BANK0, },
+       { "collisions",                 4, 0x1e, STATS_TYPE_BANK0, },
+       { "deferred",                   4, 0x05, STATS_TYPE_BANK0, },
+       { "single",                     4, 0x14, STATS_TYPE_BANK0, },
+       { "multiple",                   4, 0x17, STATS_TYPE_BANK0, },
+       { "out_fcs_error",              4, 0x03, STATS_TYPE_BANK0, },
+       { "late",                       4, 0x1f, STATS_TYPE_BANK0, },
+       { "hist_64bytes",               4, 0x08, STATS_TYPE_BANK0, },
+       { "hist_65_127bytes",           4, 0x09, STATS_TYPE_BANK0, },
+       { "hist_128_255bytes",          4, 0x0a, STATS_TYPE_BANK0, },
+       { "hist_256_511bytes",          4, 0x0b, STATS_TYPE_BANK0, },
+       { "hist_512_1023bytes",         4, 0x0c, STATS_TYPE_BANK0, },
+       { "hist_1024_max_bytes",        4, 0x0d, STATS_TYPE_BANK0, },
+       { "sw_in_discards",             4, 0x10, STATS_TYPE_PORT, },
+       { "sw_in_filtered",             2, 0x12, STATS_TYPE_PORT, },
+       { "sw_out_filtered",            2, 0x13, STATS_TYPE_PORT, },
+       { "in_discards",                4, 0x00, STATS_TYPE_BANK1, },
+       { "in_filtered",                4, 0x01, STATS_TYPE_BANK1, },
+       { "in_accepted",                4, 0x02, STATS_TYPE_BANK1, },
+       { "in_bad_accepted",            4, 0x03, STATS_TYPE_BANK1, },
+       { "in_good_avb_class_a",        4, 0x04, STATS_TYPE_BANK1, },
+       { "in_good_avb_class_b",        4, 0x05, STATS_TYPE_BANK1, },
+       { "in_bad_avb_class_a",         4, 0x06, STATS_TYPE_BANK1, },
+       { "in_bad_avb_class_b",         4, 0x07, STATS_TYPE_BANK1, },
+       { "tcam_counter_0",             4, 0x08, STATS_TYPE_BANK1, },
+       { "tcam_counter_1",             4, 0x09, STATS_TYPE_BANK1, },
+       { "tcam_counter_2",             4, 0x0a, STATS_TYPE_BANK1, },
+       { "tcam_counter_3",             4, 0x0b, STATS_TYPE_BANK1, },
+       { "in_da_unknown",              4, 0x0e, STATS_TYPE_BANK1, },
+       { "in_management",              4, 0x0f, STATS_TYPE_BANK1, },
+       { "out_queue_0",                4, 0x10, STATS_TYPE_BANK1, },
+       { "out_queue_1",                4, 0x11, STATS_TYPE_BANK1, },
+       { "out_queue_2",                4, 0x12, STATS_TYPE_BANK1, },
+       { "out_queue_3",                4, 0x13, STATS_TYPE_BANK1, },
+       { "out_queue_4",                4, 0x14, STATS_TYPE_BANK1, },
+       { "out_queue_5",                4, 0x15, STATS_TYPE_BANK1, },
+       { "out_queue_6",                4, 0x16, STATS_TYPE_BANK1, },
+       { "out_queue_7",                4, 0x17, STATS_TYPE_BANK1, },
+       { "out_cut_through",            4, 0x18, STATS_TYPE_BANK1, },
+       { "out_octets_a",               4, 0x1a, STATS_TYPE_BANK1, },
+       { "out_octets_b",               4, 0x1b, STATS_TYPE_BANK1, },
+       { "out_management",             4, 0x1f, STATS_TYPE_BANK1, },
 };
 
-static bool mv88e6xxx_has_stat(struct mv88e6xxx_chip *chip,
-                              struct mv88e6xxx_hw_stat *stat)
-{
-       switch (stat->type) {
-       case BANK0:
-               return true;
-       case BANK1:
-               return mv88e6xxx_6320_family(chip);
-       case PORT:
-               return mv88e6xxx_6095_family(chip) ||
-                       mv88e6xxx_6185_family(chip) ||
-                       mv88e6xxx_6097_family(chip) ||
-                       mv88e6xxx_6165_family(chip) ||
-                       mv88e6xxx_6351_family(chip) ||
-                       mv88e6xxx_6352_family(chip);
-       }
-       return false;
-}
-
 static uint64_t _mv88e6xxx_get_ethtool_stat(struct mv88e6xxx_chip *chip,
                                            struct mv88e6xxx_hw_stat *s,
-                                           int port)
+                                           int port, u16 bank1_select,
+                                           u16 histogram)
 {
        u32 low;
        u32 high = 0;
+       u16 reg = 0;
        int err;
-       u16 reg;
        u64 value;
 
        switch (s->type) {
-       case PORT:
+       case STATS_TYPE_PORT:
                err = mv88e6xxx_port_read(chip, port, s->reg, &reg);
                if (err)
                        return UINT64_MAX;
@@ -953,26 +875,28 @@ static uint64_t _mv88e6xxx_get_ethtool_stat(struct mv88e6xxx_chip *chip,
                        high = reg;
                }
                break;
-       case BANK0:
-       case BANK1:
-               _mv88e6xxx_stats_read(chip, s->reg, &low);
+       case STATS_TYPE_BANK1:
+               reg = bank1_select;
+               /* fall through */
+       case STATS_TYPE_BANK0:
+               reg |= s->reg | histogram;
+               mv88e6xxx_g1_stats_read(chip, reg, &low);
                if (s->sizeof_stat == 8)
-                       _mv88e6xxx_stats_read(chip, s->reg + 1, &high);
+                       mv88e6xxx_g1_stats_read(chip, reg + 1, &high);
        }
        value = (((u64)high) << 16) | low;
        return value;
 }
 
-static void mv88e6xxx_get_strings(struct dsa_switch *ds, int port,
-                                 uint8_t *data)
+static void mv88e6xxx_stats_get_strings(struct mv88e6xxx_chip *chip,
+                                       uint8_t *data, int types)
 {
-       struct mv88e6xxx_chip *chip = ds->priv;
        struct mv88e6xxx_hw_stat *stat;
        int i, j;
 
        for (i = 0, j = 0; i < ARRAY_SIZE(mv88e6xxx_hw_stats); i++) {
                stat = &mv88e6xxx_hw_stats[i];
-               if (mv88e6xxx_has_stat(chip, stat)) {
+               if (stat->type & types) {
                        memcpy(data + j * ETH_GSTRING_LEN, stat->string,
                               ETH_GSTRING_LEN);
                        j++;
@@ -980,46 +904,142 @@ static void mv88e6xxx_get_strings(struct dsa_switch *ds, int port,
        }
 }
 
-static int mv88e6xxx_get_sset_count(struct dsa_switch *ds)
+static void mv88e6095_stats_get_strings(struct mv88e6xxx_chip *chip,
+                                       uint8_t *data)
+{
+       mv88e6xxx_stats_get_strings(chip, data,
+                                   STATS_TYPE_BANK0 | STATS_TYPE_PORT);
+}
+
+static void mv88e6320_stats_get_strings(struct mv88e6xxx_chip *chip,
+                                       uint8_t *data)
+{
+       mv88e6xxx_stats_get_strings(chip, data,
+                                   STATS_TYPE_BANK0 | STATS_TYPE_BANK1);
+}
+
+static void mv88e6xxx_get_strings(struct dsa_switch *ds, int port,
+                                 uint8_t *data)
 {
        struct mv88e6xxx_chip *chip = ds->priv;
+
+       if (chip->info->ops->stats_get_strings)
+               chip->info->ops->stats_get_strings(chip, data);
+}
+
+static int mv88e6xxx_stats_get_sset_count(struct mv88e6xxx_chip *chip,
+                                         int types)
+{
        struct mv88e6xxx_hw_stat *stat;
        int i, j;
 
        for (i = 0, j = 0; i < ARRAY_SIZE(mv88e6xxx_hw_stats); i++) {
                stat = &mv88e6xxx_hw_stats[i];
-               if (mv88e6xxx_has_stat(chip, stat))
+               if (stat->type & types)
                        j++;
        }
        return j;
 }
 
+static int mv88e6095_stats_get_sset_count(struct mv88e6xxx_chip *chip)
+{
+       return mv88e6xxx_stats_get_sset_count(chip, STATS_TYPE_BANK0 |
+                                             STATS_TYPE_PORT);
+}
+
+static int mv88e6320_stats_get_sset_count(struct mv88e6xxx_chip *chip)
+{
+       return mv88e6xxx_stats_get_sset_count(chip, STATS_TYPE_BANK0 |
+                                             STATS_TYPE_BANK1);
+}
+
+static int mv88e6xxx_get_sset_count(struct dsa_switch *ds)
+{
+       struct mv88e6xxx_chip *chip = ds->priv;
+
+       if (chip->info->ops->stats_get_sset_count)
+               return chip->info->ops->stats_get_sset_count(chip);
+
+       return 0;
+}
+
+static void mv88e6xxx_stats_get_stats(struct mv88e6xxx_chip *chip, int port,
+                                     uint64_t *data, int types,
+                                     u16 bank1_select, u16 histogram)
+{
+       struct mv88e6xxx_hw_stat *stat;
+       int i, j;
+
+       for (i = 0, j = 0; i < ARRAY_SIZE(mv88e6xxx_hw_stats); i++) {
+               stat = &mv88e6xxx_hw_stats[i];
+               if (stat->type & types) {
+                       data[j] = _mv88e6xxx_get_ethtool_stat(chip, stat, port,
+                                                             bank1_select,
+                                                             histogram);
+                       j++;
+               }
+       }
+}
+
+static void mv88e6095_stats_get_stats(struct mv88e6xxx_chip *chip, int port,
+                                     uint64_t *data)
+{
+       return mv88e6xxx_stats_get_stats(chip, port, data,
+                                        STATS_TYPE_BANK0 | STATS_TYPE_PORT,
+                                        0, GLOBAL_STATS_OP_HIST_RX_TX);
+}
+
+static void mv88e6320_stats_get_stats(struct mv88e6xxx_chip *chip, int port,
+                                     uint64_t *data)
+{
+       return mv88e6xxx_stats_get_stats(chip, port, data,
+                                        STATS_TYPE_BANK0 | STATS_TYPE_BANK1,
+                                        GLOBAL_STATS_OP_BANK_1_BIT_9,
+                                        GLOBAL_STATS_OP_HIST_RX_TX);
+}
+
+static void mv88e6390_stats_get_stats(struct mv88e6xxx_chip *chip, int port,
+                                     uint64_t *data)
+{
+       return mv88e6xxx_stats_get_stats(chip, port, data,
+                                        STATS_TYPE_BANK0 | STATS_TYPE_BANK1,
+                                        GLOBAL_STATS_OP_BANK_1_BIT_10, 0);
+}
+
+static void mv88e6xxx_get_stats(struct mv88e6xxx_chip *chip, int port,
+                               uint64_t *data)
+{
+       if (chip->info->ops->stats_get_stats)
+               chip->info->ops->stats_get_stats(chip, port, data);
+}
+
 static void mv88e6xxx_get_ethtool_stats(struct dsa_switch *ds, int port,
                                        uint64_t *data)
 {
        struct mv88e6xxx_chip *chip = ds->priv;
-       struct mv88e6xxx_hw_stat *stat;
        int ret;
-       int i, j;
 
        mutex_lock(&chip->reg_lock);
 
-       ret = _mv88e6xxx_stats_snapshot(chip, port);
+       ret = mv88e6xxx_stats_snapshot(chip, port);
        if (ret < 0) {
                mutex_unlock(&chip->reg_lock);
                return;
        }
-       for (i = 0, j = 0; i < ARRAY_SIZE(mv88e6xxx_hw_stats); i++) {
-               stat = &mv88e6xxx_hw_stats[i];
-               if (mv88e6xxx_has_stat(chip, stat)) {
-                       data[j] = _mv88e6xxx_get_ethtool_stat(chip, stat, port);
-                       j++;
-               }
-       }
+
+       mv88e6xxx_get_stats(chip, port, data);
 
        mutex_unlock(&chip->reg_lock);
 }
 
+static int mv88e6xxx_stats_set_histogram(struct mv88e6xxx_chip *chip)
+{
+       if (chip->info->ops->stats_set_histogram)
+               return chip->info->ops->stats_set_histogram(chip);
+
+       return 0;
+}
+
 static int mv88e6xxx_get_regs_len(struct dsa_switch *ds, int port)
 {
        return 32 * sizeof(u16);
@@ -1230,54 +1250,16 @@ static int _mv88e6xxx_atu_remove(struct mv88e6xxx_chip *chip, u16 fid,
        return _mv88e6xxx_atu_move(chip, fid, port, 0x0f, static_too);
 }
 
-static const char * const mv88e6xxx_port_state_names[] = {
-       [PORT_CONTROL_STATE_DISABLED] = "Disabled",
-       [PORT_CONTROL_STATE_BLOCKING] = "Blocking/Listening",
-       [PORT_CONTROL_STATE_LEARNING] = "Learning",
-       [PORT_CONTROL_STATE_FORWARDING] = "Forwarding",
-};
-
-static int _mv88e6xxx_port_state(struct mv88e6xxx_chip *chip, int port,
-                                u8 state)
-{
-       struct dsa_switch *ds = chip->ds;
-       u16 reg;
-       int err;
-       u8 oldstate;
-
-       err = mv88e6xxx_port_read(chip, port, PORT_CONTROL, &reg);
-       if (err)
-               return err;
-
-       oldstate = reg & PORT_CONTROL_STATE_MASK;
-
-       reg &= ~PORT_CONTROL_STATE_MASK;
-       reg |= state;
-
-       err = mv88e6xxx_port_write(chip, port, PORT_CONTROL, reg);
-       if (err)
-               return err;
-
-       netdev_dbg(ds->ports[port].netdev, "PortState %s (was %s)\n",
-                  mv88e6xxx_port_state_names[state],
-                  mv88e6xxx_port_state_names[oldstate]);
-
-       return 0;
-}
-
 static int _mv88e6xxx_port_based_vlan_map(struct mv88e6xxx_chip *chip, int port)
 {
        struct net_device *bridge = chip->ports[port].bridge_dev;
-       const u16 mask = (1 << mv88e6xxx_num_ports(chip)) - 1;
        struct dsa_switch *ds = chip->ds;
        u16 output_ports = 0;
-       u16 reg;
-       int err;
        int i;
 
        /* allow CPU port or DSA link(s) to send frames to every port */
        if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) {
-               output_ports = mask;
+               output_ports = ~0;
        } else {
                for (i = 0; i < mv88e6xxx_num_ports(chip); ++i) {
                        /* allow sending frames to every group member */
@@ -1293,14 +1275,7 @@ static int _mv88e6xxx_port_based_vlan_map(struct mv88e6xxx_chip *chip, int port)
        /* prevent frames from going back out of the port they came in on */
        output_ports &= ~BIT(port);
 
-       err = mv88e6xxx_port_read(chip, port, PORT_BASE_VLAN, &reg);
-       if (err)
-               return err;
-
-       reg &= ~mask;
-       reg |= output_ports & mask;
-
-       return mv88e6xxx_port_write(chip, port, PORT_BASE_VLAN, reg);
+       return mv88e6xxx_port_set_vlan_map(chip, port, output_ports);
 }
 
 static void mv88e6xxx_port_stp_state_set(struct dsa_switch *ds, int port,
@@ -1328,13 +1303,11 @@ static void mv88e6xxx_port_stp_state_set(struct dsa_switch *ds, int port,
        }
 
        mutex_lock(&chip->reg_lock);
-       err = _mv88e6xxx_port_state(chip, port, stp_state);
+       err = mv88e6xxx_port_set_state(chip, port, stp_state);
        mutex_unlock(&chip->reg_lock);
 
        if (err)
-               netdev_err(ds->ports[port].netdev,
-                          "failed to update state to %s\n",
-                          mv88e6xxx_port_state_names[stp_state]);
+               netdev_err(ds->ports[port].netdev, "failed to update state\n");
 }
 
 static void mv88e6xxx_port_fast_age(struct dsa_switch *ds, int port)
@@ -1350,49 +1323,6 @@ static void mv88e6xxx_port_fast_age(struct dsa_switch *ds, int port)
                netdev_err(ds->ports[port].netdev, "failed to flush ATU\n");
 }
 
-static int _mv88e6xxx_port_pvid(struct mv88e6xxx_chip *chip, int port,
-                               u16 *new, u16 *old)
-{
-       struct dsa_switch *ds = chip->ds;
-       u16 pvid, reg;
-       int err;
-
-       err = mv88e6xxx_port_read(chip, port, PORT_DEFAULT_VLAN, &reg);
-       if (err)
-               return err;
-
-       pvid = reg & PORT_DEFAULT_VLAN_MASK;
-
-       if (new) {
-               reg &= ~PORT_DEFAULT_VLAN_MASK;
-               reg |= *new & PORT_DEFAULT_VLAN_MASK;
-
-               err = mv88e6xxx_port_write(chip, port, PORT_DEFAULT_VLAN, reg);
-               if (err)
-                       return err;
-
-               netdev_dbg(ds->ports[port].netdev,
-                          "DefaultVID %d (was %d)\n", *new, pvid);
-       }
-
-       if (old)
-               *old = pvid;
-
-       return 0;
-}
-
-static int _mv88e6xxx_port_pvid_get(struct mv88e6xxx_chip *chip,
-                                   int port, u16 *pvid)
-{
-       return _mv88e6xxx_port_pvid(chip, port, NULL, pvid);
-}
-
-static int _mv88e6xxx_port_pvid_set(struct mv88e6xxx_chip *chip,
-                                   int port, u16 pvid)
-{
-       return _mv88e6xxx_port_pvid(chip, port, &pvid, NULL);
-}
-
 static int _mv88e6xxx_vtu_wait(struct mv88e6xxx_chip *chip)
 {
        return mv88e6xxx_g1_wait(chip, GLOBAL_VTU_OP, GLOBAL_VTU_OP_BUSY);
@@ -1572,7 +1502,7 @@ static int mv88e6xxx_port_vlan_dump(struct dsa_switch *ds, int port,
 
        mutex_lock(&chip->reg_lock);
 
-       err = _mv88e6xxx_port_pvid_get(chip, port, &pvid);
+       err = mv88e6xxx_port_get_pvid(chip, port, &pvid);
        if (err)
                goto unlock;
 
@@ -1736,75 +1666,6 @@ loadpurge:
        return _mv88e6xxx_vtu_cmd(chip, GLOBAL_VTU_OP_STU_LOAD_PURGE);
 }
 
-static int _mv88e6xxx_port_fid(struct mv88e6xxx_chip *chip, int port,
-                              u16 *new, u16 *old)
-{
-       struct dsa_switch *ds = chip->ds;
-       u16 upper_mask;
-       u16 fid;
-       u16 reg;
-       int err;
-
-       if (mv88e6xxx_num_databases(chip) == 4096)
-               upper_mask = 0xff;
-       else if (mv88e6xxx_num_databases(chip) == 256)
-               upper_mask = 0xf;
-       else
-               return -EOPNOTSUPP;
-
-       /* Port's default FID bits 3:0 are located in reg 0x06, offset 12 */
-       err = mv88e6xxx_port_read(chip, port, PORT_BASE_VLAN, &reg);
-       if (err)
-               return err;
-
-       fid = (reg & PORT_BASE_VLAN_FID_3_0_MASK) >> 12;
-
-       if (new) {
-               reg &= ~PORT_BASE_VLAN_FID_3_0_MASK;
-               reg |= (*new << 12) & PORT_BASE_VLAN_FID_3_0_MASK;
-
-               err = mv88e6xxx_port_write(chip, port, PORT_BASE_VLAN, reg);
-               if (err)
-                       return err;
-       }
-
-       /* Port's default FID bits 11:4 are located in reg 0x05, offset 0 */
-       err = mv88e6xxx_port_read(chip, port, PORT_CONTROL_1, &reg);
-       if (err)
-               return err;
-
-       fid |= (reg & upper_mask) << 4;
-
-       if (new) {
-               reg &= ~upper_mask;
-               reg |= (*new >> 4) & upper_mask;
-
-               err = mv88e6xxx_port_write(chip, port, PORT_CONTROL_1, reg);
-               if (err)
-                       return err;
-
-               netdev_dbg(ds->ports[port].netdev,
-                          "FID %d (was %d)\n", *new, fid);
-       }
-
-       if (old)
-               *old = fid;
-
-       return 0;
-}
-
-static int _mv88e6xxx_port_fid_get(struct mv88e6xxx_chip *chip,
-                                  int port, u16 *fid)
-{
-       return _mv88e6xxx_port_fid(chip, port, NULL, fid);
-}
-
-static int _mv88e6xxx_port_fid_set(struct mv88e6xxx_chip *chip,
-                                  int port, u16 fid)
-{
-       return _mv88e6xxx_port_fid(chip, port, &fid, NULL);
-}
-
 static int _mv88e6xxx_fid_new(struct mv88e6xxx_chip *chip, u16 *fid)
 {
        DECLARE_BITMAP(fid_bitmap, MV88E6XXX_N_FID);
@@ -1815,7 +1676,7 @@ static int _mv88e6xxx_fid_new(struct mv88e6xxx_chip *chip, u16 *fid)
 
        /* Set every FID bit used by the (un)bridged ports */
        for (i = 0; i < mv88e6xxx_num_ports(chip); ++i) {
-               err = _mv88e6xxx_port_fid_get(chip, i, fid);
+               err = mv88e6xxx_port_get_fid(chip, i, fid);
                if (err)
                        return err;
 
@@ -1980,48 +1841,19 @@ unlock:
        return err;
 }
 
-static const char * const mv88e6xxx_port_8021q_mode_names[] = {
-       [PORT_CONTROL_2_8021Q_DISABLED] = "Disabled",
-       [PORT_CONTROL_2_8021Q_FALLBACK] = "Fallback",
-       [PORT_CONTROL_2_8021Q_CHECK] = "Check",
-       [PORT_CONTROL_2_8021Q_SECURE] = "Secure",
-};
-
 static int mv88e6xxx_port_vlan_filtering(struct dsa_switch *ds, int port,
                                         bool vlan_filtering)
 {
        struct mv88e6xxx_chip *chip = ds->priv;
-       u16 old, new = vlan_filtering ? PORT_CONTROL_2_8021Q_SECURE :
+       u16 mode = vlan_filtering ? PORT_CONTROL_2_8021Q_SECURE :
                PORT_CONTROL_2_8021Q_DISABLED;
-       u16 reg;
        int err;
 
        if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_VTU))
                return -EOPNOTSUPP;
 
        mutex_lock(&chip->reg_lock);
-
-       err = mv88e6xxx_port_read(chip, port, PORT_CONTROL_2, &reg);
-       if (err)
-               goto unlock;
-
-       old = reg & PORT_CONTROL_2_8021Q_MASK;
-
-       if (new != old) {
-               reg &= ~PORT_CONTROL_2_8021Q_MASK;
-               reg |= new & PORT_CONTROL_2_8021Q_MASK;
-
-               err = mv88e6xxx_port_write(chip, port, PORT_CONTROL_2, reg);
-               if (err)
-                       goto unlock;
-
-               netdev_dbg(ds->ports[port].netdev, "802.1Q Mode %s (was %s)\n",
-                          mv88e6xxx_port_8021q_mode_names[new],
-                          mv88e6xxx_port_8021q_mode_names[old]);
-       }
-
-       err = 0;
-unlock:
+       err = mv88e6xxx_port_set_8021q_mode(chip, port, mode);
        mutex_unlock(&chip->reg_lock);
 
        return err;
@@ -2089,7 +1921,7 @@ static void mv88e6xxx_port_vlan_add(struct dsa_switch *ds, int port,
                                   "failed to add VLAN %d%c\n",
                                   vid, untagged ? 'u' : 't');
 
-       if (pvid && _mv88e6xxx_port_pvid_set(chip, port, vlan->vid_end))
+       if (pvid && mv88e6xxx_port_set_pvid(chip, port, vlan->vid_end))
                netdev_err(ds->ports[port].netdev, "failed to set PVID %d\n",
                           vlan->vid_end);
 
@@ -2144,7 +1976,7 @@ static int mv88e6xxx_port_vlan_del(struct dsa_switch *ds, int port,
 
        mutex_lock(&chip->reg_lock);
 
-       err = _mv88e6xxx_port_pvid_get(chip, port, &pvid);
+       err = mv88e6xxx_port_get_pvid(chip, port, &pvid);
        if (err)
                goto unlock;
 
@@ -2154,7 +1986,7 @@ static int mv88e6xxx_port_vlan_del(struct dsa_switch *ds, int port,
                        goto unlock;
 
                if (vid == pvid) {
-                       err = _mv88e6xxx_port_pvid_set(chip, port, 0);
+                       err = mv88e6xxx_port_set_pvid(chip, port, 0);
                        if (err)
                                goto unlock;
                }
@@ -2265,7 +2097,7 @@ static int mv88e6xxx_port_db_load_purge(struct mv88e6xxx_chip *chip, int port,
 
        /* Null VLAN ID corresponds to the port private database */
        if (vid == 0)
-               err = _mv88e6xxx_port_fid_get(chip, port, &vlan.fid);
+               err = mv88e6xxx_port_get_fid(chip, port, &vlan.fid);
        else
                err = _mv88e6xxx_vtu_get(chip, vid, &vlan, false);
        if (err)
@@ -2441,7 +2273,7 @@ static int mv88e6xxx_port_db_dump(struct mv88e6xxx_chip *chip, int port,
        int err;
 
        /* Dump port's default Filtering Information Database (VLAN ID 0) */
-       err = _mv88e6xxx_port_fid_get(chip, port, &fid);
+       err = mv88e6xxx_port_get_fid(chip, port, &fid);
        if (err)
                return err;
 
@@ -2541,12 +2373,8 @@ static int mv88e6xxx_switch_reset(struct mv88e6xxx_chip *chip)
 
        /* Set all ports to the disabled state. */
        for (i = 0; i < mv88e6xxx_num_ports(chip); i++) {
-               err = mv88e6xxx_port_read(chip, i, PORT_CONTROL, &reg);
-               if (err)
-                       return err;
-
-               err = mv88e6xxx_port_write(chip, i, PORT_CONTROL,
-                                          reg & 0xfffc);
+               err = mv88e6xxx_port_set_state(chip, i,
+                                              PORT_CONTROL_STATE_DISABLED);
                if (err)
                        return err;
        }
@@ -2616,35 +2444,20 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port)
        int err;
        u16 reg;
 
-       if (mv88e6xxx_6352_family(chip) || mv88e6xxx_6351_family(chip) ||
-           mv88e6xxx_6165_family(chip) || mv88e6xxx_6097_family(chip) ||
-           mv88e6xxx_6185_family(chip) || mv88e6xxx_6095_family(chip) ||
-           mv88e6xxx_6065_family(chip) || mv88e6xxx_6320_family(chip)) {
-               /* MAC Forcing register: don't force link, speed,
-                * duplex or flow control state to any particular
-                * values on physical ports, but force the CPU port
-                * and all DSA ports to their maximum bandwidth and
-                * full duplex.
-                */
-               err = mv88e6xxx_port_read(chip, port, PORT_PCS_CTRL, &reg);
-               if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) {
-                       reg &= ~PORT_PCS_CTRL_UNFORCED;
-                       reg |= PORT_PCS_CTRL_FORCE_LINK |
-                               PORT_PCS_CTRL_LINK_UP |
-                               PORT_PCS_CTRL_DUPLEX_FULL |
-                               PORT_PCS_CTRL_FORCE_DUPLEX;
-                       if (mv88e6xxx_6065_family(chip))
-                               reg |= PORT_PCS_CTRL_100;
-                       else
-                               reg |= PORT_PCS_CTRL_1000;
-               } else {
-                       reg |= PORT_PCS_CTRL_UNFORCED;
-               }
-
-               err = mv88e6xxx_port_write(chip, port, PORT_PCS_CTRL, reg);
-               if (err)
-                       return err;
-       }
+       /* MAC Forcing register: don't force link, speed, duplex or flow control
+        * state to any particular values on physical ports, but force the CPU
+        * port and all DSA ports to their maximum bandwidth and full duplex.
+        */
+       if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))
+               err = mv88e6xxx_port_setup_mac(chip, port, LINK_FORCED_UP,
+                                              SPEED_MAX, DUPLEX_FULL,
+                                              PHY_INTERFACE_MODE_NA);
+       else
+               err = mv88e6xxx_port_setup_mac(chip, port, LINK_UNFORCED,
+                                              SPEED_UNFORCED, DUPLEX_UNFORCED,
+                                              PHY_INTERFACE_MODE_NA);
+       if (err)
+               return err;
 
        /* Port Control: disable Drop-on-Unlock, disable Drop-on-Lock,
         * disable Header mode, enable IGMP/MLD snooping, disable VLAN
@@ -2848,7 +2661,7 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port)
         * database, and allow bidirectional communication between the
         * CPU and DSA port(s), and the other ports.
         */
-       err = _mv88e6xxx_port_fid_set(chip, port, 0);
+       err = mv88e6xxx_port_set_fid(chip, port, 0);
        if (err)
                return err;
 
@@ -3015,6 +2828,11 @@ static int mv88e6xxx_g1_setup(struct mv88e6xxx_chip *chip)
        if (err)
                return err;
 
+       /* Initialize the statistics unit */
+       err = mv88e6xxx_stats_set_histogram(chip);
+       if (err)
+               return err;
+
        /* Clear the statistics counters for all ports */
        err = mv88e6xxx_g1_write(chip, GLOBAL_STATS_OP,
                                 GLOBAL_STATS_OP_FLUSH_ALL);
@@ -3022,7 +2840,7 @@ static int mv88e6xxx_g1_setup(struct mv88e6xxx_chip *chip)
                return err;
 
        /* Wait for the flush to complete. */
-       err = _mv88e6xxx_stats_wait(chip);
+       err = mv88e6xxx_g1_stats_wait(chip);
        if (err)
                return err;
 
@@ -3364,117 +3182,387 @@ static int mv88e6xxx_set_eeprom(struct dsa_switch *ds,
 }
 
 static const struct mv88e6xxx_ops mv88e6085_ops = {
+       /* MV88E6XXX_FAMILY_6097 */
        .set_switch_mac = mv88e6xxx_g1_set_switch_mac,
        .phy_read = mv88e6xxx_phy_ppu_read,
        .phy_write = mv88e6xxx_phy_ppu_write,
+       .port_set_link = mv88e6xxx_port_set_link,
+       .port_set_duplex = mv88e6xxx_port_set_duplex,
+       .port_set_speed = mv88e6185_port_set_speed,
+       .stats_snapshot = mv88e6xxx_g1_stats_snapshot,
+       .stats_get_sset_count = mv88e6095_stats_get_sset_count,
+       .stats_get_strings = mv88e6095_stats_get_strings,
+       .stats_get_stats = mv88e6095_stats_get_stats,
 };
 
 static const struct mv88e6xxx_ops mv88e6095_ops = {
+       /* MV88E6XXX_FAMILY_6095 */
        .set_switch_mac = mv88e6xxx_g1_set_switch_mac,
        .phy_read = mv88e6xxx_phy_ppu_read,
        .phy_write = mv88e6xxx_phy_ppu_write,
+       .port_set_link = mv88e6xxx_port_set_link,
+       .port_set_duplex = mv88e6xxx_port_set_duplex,
+       .port_set_speed = mv88e6185_port_set_speed,
+       .stats_snapshot = mv88e6xxx_g1_stats_snapshot,
+       .stats_get_sset_count = mv88e6095_stats_get_sset_count,
+       .stats_get_strings = mv88e6095_stats_get_strings,
+       .stats_get_stats = mv88e6095_stats_get_stats,
+};
+
+static const struct mv88e6xxx_ops mv88e6097_ops = {
+       /* MV88E6XXX_FAMILY_6097 */
+       .set_switch_mac = mv88e6xxx_g2_set_switch_mac,
+       .phy_read = mv88e6xxx_g2_smi_phy_read,
+       .phy_write = mv88e6xxx_g2_smi_phy_write,
+       .port_set_link = mv88e6xxx_port_set_link,
+       .port_set_duplex = mv88e6xxx_port_set_duplex,
+       .port_set_speed = mv88e6185_port_set_speed,
+       .stats_snapshot = mv88e6xxx_g1_stats_snapshot,
+       .stats_get_sset_count = mv88e6095_stats_get_sset_count,
+       .stats_get_strings = mv88e6095_stats_get_strings,
+       .stats_get_stats = mv88e6095_stats_get_stats,
 };
 
 static const struct mv88e6xxx_ops mv88e6123_ops = {
+       /* MV88E6XXX_FAMILY_6165 */
        .set_switch_mac = mv88e6xxx_g2_set_switch_mac,
        .phy_read = mv88e6xxx_read,
        .phy_write = mv88e6xxx_write,
+       .port_set_link = mv88e6xxx_port_set_link,
+       .port_set_duplex = mv88e6xxx_port_set_duplex,
+       .port_set_speed = mv88e6185_port_set_speed,
+       .stats_snapshot = mv88e6xxx_g1_stats_snapshot,
+       .stats_get_sset_count = mv88e6095_stats_get_sset_count,
+       .stats_get_strings = mv88e6095_stats_get_strings,
+       .stats_get_stats = mv88e6095_stats_get_stats,
 };
 
 static const struct mv88e6xxx_ops mv88e6131_ops = {
+       /* MV88E6XXX_FAMILY_6185 */
        .set_switch_mac = mv88e6xxx_g1_set_switch_mac,
        .phy_read = mv88e6xxx_phy_ppu_read,
        .phy_write = mv88e6xxx_phy_ppu_write,
+       .port_set_link = mv88e6xxx_port_set_link,
+       .port_set_duplex = mv88e6xxx_port_set_duplex,
+       .port_set_speed = mv88e6185_port_set_speed,
+       .stats_snapshot = mv88e6xxx_g1_stats_snapshot,
+       .stats_get_sset_count = mv88e6095_stats_get_sset_count,
+       .stats_get_strings = mv88e6095_stats_get_strings,
+       .stats_get_stats = mv88e6095_stats_get_stats,
 };
 
 static const struct mv88e6xxx_ops mv88e6161_ops = {
+       /* MV88E6XXX_FAMILY_6165 */
        .set_switch_mac = mv88e6xxx_g2_set_switch_mac,
        .phy_read = mv88e6xxx_read,
        .phy_write = mv88e6xxx_write,
+       .port_set_link = mv88e6xxx_port_set_link,
+       .port_set_duplex = mv88e6xxx_port_set_duplex,
+       .port_set_speed = mv88e6185_port_set_speed,
+       .stats_snapshot = mv88e6xxx_g1_stats_snapshot,
+       .stats_get_sset_count = mv88e6095_stats_get_sset_count,
+       .stats_get_strings = mv88e6095_stats_get_strings,
+       .stats_get_stats = mv88e6095_stats_get_stats,
 };
 
 static const struct mv88e6xxx_ops mv88e6165_ops = {
+       /* MV88E6XXX_FAMILY_6165 */
        .set_switch_mac = mv88e6xxx_g2_set_switch_mac,
        .phy_read = mv88e6xxx_read,
        .phy_write = mv88e6xxx_write,
+       .port_set_link = mv88e6xxx_port_set_link,
+       .port_set_duplex = mv88e6xxx_port_set_duplex,
+       .port_set_speed = mv88e6185_port_set_speed,
+       .stats_snapshot = mv88e6xxx_g1_stats_snapshot,
+       .stats_get_sset_count = mv88e6095_stats_get_sset_count,
+       .stats_get_strings = mv88e6095_stats_get_strings,
+       .stats_get_stats = mv88e6095_stats_get_stats,
 };
 
 static const struct mv88e6xxx_ops mv88e6171_ops = {
+       /* MV88E6XXX_FAMILY_6351 */
        .set_switch_mac = mv88e6xxx_g2_set_switch_mac,
        .phy_read = mv88e6xxx_g2_smi_phy_read,
        .phy_write = mv88e6xxx_g2_smi_phy_write,
+       .port_set_link = mv88e6xxx_port_set_link,
+       .port_set_duplex = mv88e6xxx_port_set_duplex,
+       .port_set_rgmii_delay = mv88e6352_port_set_rgmii_delay,
+       .port_set_speed = mv88e6185_port_set_speed,
+       .stats_snapshot = mv88e6320_g1_stats_snapshot,
+       .stats_get_sset_count = mv88e6095_stats_get_sset_count,
+       .stats_get_strings = mv88e6095_stats_get_strings,
+       .stats_get_stats = mv88e6095_stats_get_stats,
 };
 
 static const struct mv88e6xxx_ops mv88e6172_ops = {
+       /* MV88E6XXX_FAMILY_6352 */
        .get_eeprom = mv88e6xxx_g2_get_eeprom16,
        .set_eeprom = mv88e6xxx_g2_set_eeprom16,
        .set_switch_mac = mv88e6xxx_g2_set_switch_mac,
        .phy_read = mv88e6xxx_g2_smi_phy_read,
        .phy_write = mv88e6xxx_g2_smi_phy_write,
+       .port_set_link = mv88e6xxx_port_set_link,
+       .port_set_duplex = mv88e6xxx_port_set_duplex,
+       .port_set_rgmii_delay = mv88e6352_port_set_rgmii_delay,
+       .port_set_speed = mv88e6352_port_set_speed,
+       .stats_snapshot = mv88e6320_g1_stats_snapshot,
+       .stats_get_sset_count = mv88e6095_stats_get_sset_count,
+       .stats_get_strings = mv88e6095_stats_get_strings,
+       .stats_get_stats = mv88e6095_stats_get_stats,
 };
 
 static const struct mv88e6xxx_ops mv88e6175_ops = {
+       /* MV88E6XXX_FAMILY_6351 */
        .set_switch_mac = mv88e6xxx_g2_set_switch_mac,
        .phy_read = mv88e6xxx_g2_smi_phy_read,
        .phy_write = mv88e6xxx_g2_smi_phy_write,
+       .port_set_link = mv88e6xxx_port_set_link,
+       .port_set_duplex = mv88e6xxx_port_set_duplex,
+       .port_set_rgmii_delay = mv88e6352_port_set_rgmii_delay,
+       .port_set_speed = mv88e6185_port_set_speed,
+       .stats_snapshot = mv88e6320_g1_stats_snapshot,
+       .stats_get_sset_count = mv88e6095_stats_get_sset_count,
+       .stats_get_strings = mv88e6095_stats_get_strings,
+       .stats_get_stats = mv88e6095_stats_get_stats,
 };
 
 static const struct mv88e6xxx_ops mv88e6176_ops = {
+       /* MV88E6XXX_FAMILY_6352 */
        .get_eeprom = mv88e6xxx_g2_get_eeprom16,
        .set_eeprom = mv88e6xxx_g2_set_eeprom16,
        .set_switch_mac = mv88e6xxx_g2_set_switch_mac,
        .phy_read = mv88e6xxx_g2_smi_phy_read,
        .phy_write = mv88e6xxx_g2_smi_phy_write,
+       .port_set_link = mv88e6xxx_port_set_link,
+       .port_set_duplex = mv88e6xxx_port_set_duplex,
+       .port_set_rgmii_delay = mv88e6352_port_set_rgmii_delay,
+       .port_set_speed = mv88e6352_port_set_speed,
+       .stats_snapshot = mv88e6320_g1_stats_snapshot,
+       .stats_get_sset_count = mv88e6095_stats_get_sset_count,
+       .stats_get_strings = mv88e6095_stats_get_strings,
+       .stats_get_stats = mv88e6095_stats_get_stats,
 };
 
 static const struct mv88e6xxx_ops mv88e6185_ops = {
+       /* MV88E6XXX_FAMILY_6185 */
        .set_switch_mac = mv88e6xxx_g1_set_switch_mac,
        .phy_read = mv88e6xxx_phy_ppu_read,
        .phy_write = mv88e6xxx_phy_ppu_write,
+       .port_set_link = mv88e6xxx_port_set_link,
+       .port_set_duplex = mv88e6xxx_port_set_duplex,
+       .port_set_speed = mv88e6185_port_set_speed,
+       .stats_snapshot = mv88e6xxx_g1_stats_snapshot,
+       .stats_get_sset_count = mv88e6095_stats_get_sset_count,
+       .stats_get_strings = mv88e6095_stats_get_strings,
+       .stats_get_stats = mv88e6095_stats_get_stats,
+};
+
+static const struct mv88e6xxx_ops mv88e6190_ops = {
+       /* MV88E6XXX_FAMILY_6390 */
+       .set_switch_mac = mv88e6xxx_g2_set_switch_mac,
+       .phy_read = mv88e6xxx_g2_smi_phy_read,
+       .phy_write = mv88e6xxx_g2_smi_phy_write,
+       .port_set_link = mv88e6xxx_port_set_link,
+       .port_set_duplex = mv88e6xxx_port_set_duplex,
+       .port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay,
+       .port_set_speed = mv88e6390_port_set_speed,
+       .stats_snapshot = mv88e6390_g1_stats_snapshot,
+       .stats_set_histogram = mv88e6390_g1_stats_set_histogram,
+       .stats_get_sset_count = mv88e6320_stats_get_sset_count,
+       .stats_get_strings = mv88e6320_stats_get_strings,
+       .stats_get_stats = mv88e6390_stats_get_stats,
+};
+
+static const struct mv88e6xxx_ops mv88e6190x_ops = {
+       /* MV88E6XXX_FAMILY_6390 */
+       .set_switch_mac = mv88e6xxx_g2_set_switch_mac,
+       .phy_read = mv88e6xxx_g2_smi_phy_read,
+       .phy_write = mv88e6xxx_g2_smi_phy_write,
+       .port_set_link = mv88e6xxx_port_set_link,
+       .port_set_duplex = mv88e6xxx_port_set_duplex,
+       .port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay,
+       .port_set_speed = mv88e6390x_port_set_speed,
+       .stats_snapshot = mv88e6390_g1_stats_snapshot,
+       .stats_set_histogram = mv88e6390_g1_stats_set_histogram,
+       .stats_get_sset_count = mv88e6320_stats_get_sset_count,
+       .stats_get_strings = mv88e6320_stats_get_strings,
+       .stats_get_stats = mv88e6390_stats_get_stats,
+};
+
+static const struct mv88e6xxx_ops mv88e6191_ops = {
+       /* MV88E6XXX_FAMILY_6390 */
+       .set_switch_mac = mv88e6xxx_g2_set_switch_mac,
+       .phy_read = mv88e6xxx_g2_smi_phy_read,
+       .phy_write = mv88e6xxx_g2_smi_phy_write,
+       .port_set_link = mv88e6xxx_port_set_link,
+       .port_set_duplex = mv88e6xxx_port_set_duplex,
+       .port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay,
+       .port_set_speed = mv88e6390_port_set_speed,
+       .stats_snapshot = mv88e6390_g1_stats_snapshot,
+       .stats_set_histogram = mv88e6390_g1_stats_set_histogram,
+       .stats_get_sset_count = mv88e6320_stats_get_sset_count,
+       .stats_get_strings = mv88e6320_stats_get_strings,
+       .stats_get_stats = mv88e6390_stats_get_stats,
 };
 
 static const struct mv88e6xxx_ops mv88e6240_ops = {
+       /* MV88E6XXX_FAMILY_6352 */
        .get_eeprom = mv88e6xxx_g2_get_eeprom16,
        .set_eeprom = mv88e6xxx_g2_set_eeprom16,
        .set_switch_mac = mv88e6xxx_g2_set_switch_mac,
        .phy_read = mv88e6xxx_g2_smi_phy_read,
        .phy_write = mv88e6xxx_g2_smi_phy_write,
+       .port_set_link = mv88e6xxx_port_set_link,
+       .port_set_duplex = mv88e6xxx_port_set_duplex,
+       .port_set_rgmii_delay = mv88e6352_port_set_rgmii_delay,
+       .port_set_speed = mv88e6352_port_set_speed,
+       .stats_snapshot = mv88e6320_g1_stats_snapshot,
+       .stats_get_sset_count = mv88e6095_stats_get_sset_count,
+       .stats_get_strings = mv88e6095_stats_get_strings,
+       .stats_get_stats = mv88e6095_stats_get_stats,
+};
+
+static const struct mv88e6xxx_ops mv88e6290_ops = {
+       /* MV88E6XXX_FAMILY_6390 */
+       .set_switch_mac = mv88e6xxx_g2_set_switch_mac,
+       .phy_read = mv88e6xxx_g2_smi_phy_read,
+       .phy_write = mv88e6xxx_g2_smi_phy_write,
+       .port_set_link = mv88e6xxx_port_set_link,
+       .port_set_duplex = mv88e6xxx_port_set_duplex,
+       .port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay,
+       .port_set_speed = mv88e6390_port_set_speed,
+       .stats_snapshot = mv88e6390_g1_stats_snapshot,
+       .stats_set_histogram = mv88e6390_g1_stats_set_histogram,
+       .stats_get_sset_count = mv88e6320_stats_get_sset_count,
+       .stats_get_strings = mv88e6320_stats_get_strings,
+       .stats_get_stats = mv88e6390_stats_get_stats,
 };
 
 static const struct mv88e6xxx_ops mv88e6320_ops = {
+       /* MV88E6XXX_FAMILY_6320 */
        .get_eeprom = mv88e6xxx_g2_get_eeprom16,
        .set_eeprom = mv88e6xxx_g2_set_eeprom16,
        .set_switch_mac = mv88e6xxx_g2_set_switch_mac,
        .phy_read = mv88e6xxx_g2_smi_phy_read,
        .phy_write = mv88e6xxx_g2_smi_phy_write,
+       .port_set_link = mv88e6xxx_port_set_link,
+       .port_set_duplex = mv88e6xxx_port_set_duplex,
+       .port_set_speed = mv88e6185_port_set_speed,
+       .stats_snapshot = mv88e6320_g1_stats_snapshot,
+       .stats_get_sset_count = mv88e6320_stats_get_sset_count,
+       .stats_get_strings = mv88e6320_stats_get_strings,
+       .stats_get_stats = mv88e6320_stats_get_stats,
 };
 
 static const struct mv88e6xxx_ops mv88e6321_ops = {
+       /* MV88E6XXX_FAMILY_6321 */
        .get_eeprom = mv88e6xxx_g2_get_eeprom16,
        .set_eeprom = mv88e6xxx_g2_set_eeprom16,
        .set_switch_mac = mv88e6xxx_g2_set_switch_mac,
        .phy_read = mv88e6xxx_g2_smi_phy_read,
        .phy_write = mv88e6xxx_g2_smi_phy_write,
+       .port_set_link = mv88e6xxx_port_set_link,
+       .port_set_duplex = mv88e6xxx_port_set_duplex,
+       .port_set_speed = mv88e6185_port_set_speed,
+       .stats_snapshot = mv88e6320_g1_stats_snapshot,
+       .stats_get_sset_count = mv88e6320_stats_get_sset_count,
+       .stats_get_strings = mv88e6320_stats_get_strings,
+       .stats_get_stats = mv88e6320_stats_get_stats,
 };
 
 static const struct mv88e6xxx_ops mv88e6350_ops = {
+       /* MV88E6XXX_FAMILY_6351 */
        .set_switch_mac = mv88e6xxx_g2_set_switch_mac,
        .phy_read = mv88e6xxx_g2_smi_phy_read,
        .phy_write = mv88e6xxx_g2_smi_phy_write,
+       .port_set_link = mv88e6xxx_port_set_link,
+       .port_set_duplex = mv88e6xxx_port_set_duplex,
+       .port_set_rgmii_delay = mv88e6352_port_set_rgmii_delay,
+       .port_set_speed = mv88e6185_port_set_speed,
+       .stats_snapshot = mv88e6320_g1_stats_snapshot,
+       .stats_get_sset_count = mv88e6095_stats_get_sset_count,
+       .stats_get_strings = mv88e6095_stats_get_strings,
+       .stats_get_stats = mv88e6095_stats_get_stats,
 };
 
 static const struct mv88e6xxx_ops mv88e6351_ops = {
+       /* MV88E6XXX_FAMILY_6351 */
        .set_switch_mac = mv88e6xxx_g2_set_switch_mac,
        .phy_read = mv88e6xxx_g2_smi_phy_read,
        .phy_write = mv88e6xxx_g2_smi_phy_write,
+       .port_set_link = mv88e6xxx_port_set_link,
+       .port_set_duplex = mv88e6xxx_port_set_duplex,
+       .port_set_rgmii_delay = mv88e6352_port_set_rgmii_delay,
+       .port_set_speed = mv88e6185_port_set_speed,
+       .stats_snapshot = mv88e6320_g1_stats_snapshot,
+       .stats_get_sset_count = mv88e6095_stats_get_sset_count,
+       .stats_get_strings = mv88e6095_stats_get_strings,
+       .stats_get_stats = mv88e6095_stats_get_stats,
 };
 
 static const struct mv88e6xxx_ops mv88e6352_ops = {
+       /* MV88E6XXX_FAMILY_6352 */
        .get_eeprom = mv88e6xxx_g2_get_eeprom16,
        .set_eeprom = mv88e6xxx_g2_set_eeprom16,
        .set_switch_mac = mv88e6xxx_g2_set_switch_mac,
        .phy_read = mv88e6xxx_g2_smi_phy_read,
        .phy_write = mv88e6xxx_g2_smi_phy_write,
+       .port_set_link = mv88e6xxx_port_set_link,
+       .port_set_duplex = mv88e6xxx_port_set_duplex,
+       .port_set_rgmii_delay = mv88e6352_port_set_rgmii_delay,
+       .port_set_speed = mv88e6352_port_set_speed,
+       .stats_snapshot = mv88e6320_g1_stats_snapshot,
+       .stats_get_sset_count = mv88e6095_stats_get_sset_count,
+       .stats_get_strings = mv88e6095_stats_get_strings,
+       .stats_get_stats = mv88e6095_stats_get_stats,
+};
+
+static const struct mv88e6xxx_ops mv88e6390_ops = {
+       /* MV88E6XXX_FAMILY_6390 */
+       .set_switch_mac = mv88e6xxx_g2_set_switch_mac,
+       .phy_read = mv88e6xxx_g2_smi_phy_read,
+       .phy_write = mv88e6xxx_g2_smi_phy_write,
+       .port_set_link = mv88e6xxx_port_set_link,
+       .port_set_duplex = mv88e6xxx_port_set_duplex,
+       .port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay,
+       .port_set_speed = mv88e6390_port_set_speed,
+       .stats_snapshot = mv88e6390_g1_stats_snapshot,
+       .stats_set_histogram = mv88e6390_g1_stats_set_histogram,
+       .stats_get_sset_count = mv88e6320_stats_get_sset_count,
+       .stats_get_strings = mv88e6320_stats_get_strings,
+       .stats_get_stats = mv88e6390_stats_get_stats,
+};
+
+static const struct mv88e6xxx_ops mv88e6390x_ops = {
+       /* MV88E6XXX_FAMILY_6390 */
+       .set_switch_mac = mv88e6xxx_g2_set_switch_mac,
+       .phy_read = mv88e6xxx_g2_smi_phy_read,
+       .phy_write = mv88e6xxx_g2_smi_phy_write,
+       .port_set_link = mv88e6xxx_port_set_link,
+       .port_set_duplex = mv88e6xxx_port_set_duplex,
+       .port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay,
+       .port_set_speed = mv88e6390x_port_set_speed,
+       .stats_snapshot = mv88e6390_g1_stats_snapshot,
+       .stats_set_histogram = mv88e6390_g1_stats_set_histogram,
+       .stats_get_sset_count = mv88e6320_stats_get_sset_count,
+       .stats_get_strings = mv88e6320_stats_get_strings,
+       .stats_get_stats = mv88e6390_stats_get_stats,
+};
+
+static const struct mv88e6xxx_ops mv88e6391_ops = {
+       /* MV88E6XXX_FAMILY_6390 */
+       .set_switch_mac = mv88e6xxx_g2_set_switch_mac,
+       .phy_read = mv88e6xxx_g2_smi_phy_read,
+       .phy_write = mv88e6xxx_g2_smi_phy_write,
+       .port_set_link = mv88e6xxx_port_set_link,
+       .port_set_duplex = mv88e6xxx_port_set_duplex,
+       .port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay,
+       .port_set_speed = mv88e6390_port_set_speed,
+       .stats_snapshot = mv88e6390_g1_stats_snapshot,
+       .stats_set_histogram = mv88e6390_g1_stats_set_histogram,
+       .stats_get_sset_count = mv88e6320_stats_get_sset_count,
+       .stats_get_strings = mv88e6320_stats_get_strings,
+       .stats_get_stats = mv88e6390_stats_get_stats,
 };
 
 static const struct mv88e6xxx_info mv88e6xxx_table[] = {
@@ -3506,6 +3594,20 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .ops = &mv88e6095_ops,
        },
 
+       [MV88E6097] = {
+               .prod_num = PORT_SWITCH_ID_PROD_NUM_6097,
+               .family = MV88E6XXX_FAMILY_6097,
+               .name = "Marvell 88E6097/88E6097F",
+               .num_databases = 4096,
+               .num_ports = 11,
+               .port_base_addr = 0x10,
+               .global1_addr = 0x1b,
+               .age_time_coeff = 15000,
+               .g1_irqs = 8,
+               .flags = MV88E6XXX_FLAGS_FAMILY_6097,
+               .ops = &mv88e6097_ops,
+       },
+
        [MV88E6123] = {
                .prod_num = PORT_SWITCH_ID_PROD_NUM_6123,
                .family = MV88E6XXX_FAMILY_6165,
@@ -3632,6 +3734,47 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .ops = &mv88e6185_ops,
        },
 
+       [MV88E6190] = {
+               .prod_num = PORT_SWITCH_ID_PROD_NUM_6190,
+               .family = MV88E6XXX_FAMILY_6390,
+               .name = "Marvell 88E6190",
+               .num_databases = 4096,
+               .num_ports = 11,        /* 10 + Z80 */
+               .port_base_addr = 0x0,
+               .global1_addr = 0x1b,
+               .age_time_coeff = 15000,
+               .g1_irqs = 9,
+               .flags = MV88E6XXX_FLAGS_FAMILY_6390,
+               .ops = &mv88e6190_ops,
+       },
+
+       [MV88E6190X] = {
+               .prod_num = PORT_SWITCH_ID_PROD_NUM_6190X,
+               .family = MV88E6XXX_FAMILY_6390,
+               .name = "Marvell 88E6190X",
+               .num_databases = 4096,
+               .num_ports = 11,        /* 10 + Z80 */
+               .port_base_addr = 0x0,
+               .global1_addr = 0x1b,
+               .age_time_coeff = 15000,
+               .g1_irqs = 9,
+               .flags = MV88E6XXX_FLAGS_FAMILY_6390,
+               .ops = &mv88e6190x_ops,
+       },
+
+       [MV88E6191] = {
+               .prod_num = PORT_SWITCH_ID_PROD_NUM_6191,
+               .family = MV88E6XXX_FAMILY_6390,
+               .name = "Marvell 88E6191",
+               .num_databases = 4096,
+               .num_ports = 11,        /* 10 + Z80 */
+               .port_base_addr = 0x0,
+               .global1_addr = 0x1b,
+               .age_time_coeff = 15000,
+               .flags = MV88E6XXX_FLAGS_FAMILY_6390,
+               .ops = &mv88e6391_ops,
+       },
+
        [MV88E6240] = {
                .prod_num = PORT_SWITCH_ID_PROD_NUM_6240,
                .family = MV88E6XXX_FAMILY_6352,
@@ -3646,6 +3789,20 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .ops = &mv88e6240_ops,
        },
 
+       [MV88E6290] = {
+               .prod_num = PORT_SWITCH_ID_PROD_NUM_6290,
+               .family = MV88E6XXX_FAMILY_6390,
+               .name = "Marvell 88E6290",
+               .num_databases = 4096,
+               .num_ports = 11,        /* 10 + Z80 */
+               .port_base_addr = 0x0,
+               .global1_addr = 0x1b,
+               .age_time_coeff = 15000,
+               .g1_irqs = 9,
+               .flags = MV88E6XXX_FLAGS_FAMILY_6390,
+               .ops = &mv88e6290_ops,
+       },
+
        [MV88E6320] = {
                .prod_num = PORT_SWITCH_ID_PROD_NUM_6320,
                .family = MV88E6XXX_FAMILY_6320,
@@ -3715,6 +3872,32 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .flags = MV88E6XXX_FLAGS_FAMILY_6352,
                .ops = &mv88e6352_ops,
        },
+       [MV88E6390] = {
+               .prod_num = PORT_SWITCH_ID_PROD_NUM_6390,
+               .family = MV88E6XXX_FAMILY_6390,
+               .name = "Marvell 88E6390",
+               .num_databases = 4096,
+               .num_ports = 11,        /* 10 + Z80 */
+               .port_base_addr = 0x0,
+               .global1_addr = 0x1b,
+               .age_time_coeff = 15000,
+               .g1_irqs = 9,
+               .flags = MV88E6XXX_FLAGS_FAMILY_6390,
+               .ops = &mv88e6390_ops,
+       },
+       [MV88E6390X] = {
+               .prod_num = PORT_SWITCH_ID_PROD_NUM_6390X,
+               .family = MV88E6XXX_FAMILY_6390,
+               .name = "Marvell 88E6390X",
+               .num_databases = 4096,
+               .num_ports = 11,        /* 10 + Z80 */
+               .port_base_addr = 0x0,
+               .global1_addr = 0x1b,
+               .age_time_coeff = 15000,
+               .g1_irqs = 9,
+               .flags = MV88E6XXX_FLAGS_FAMILY_6390,
+               .ops = &mv88e6390x_ops,
+       },
 };
 
 static const struct mv88e6xxx_info *mv88e6xxx_lookup_info(unsigned int prod_num)
@@ -4007,16 +4190,16 @@ static int mv88e6xxx_probe(struct mdio_device *mdiodev)
        if (err)
                return err;
 
+       chip->reset = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_LOW);
+       if (IS_ERR(chip->reset))
+               return PTR_ERR(chip->reset);
+
        err = mv88e6xxx_detect(chip);
        if (err)
                return err;
 
        mv88e6xxx_phy_init(chip);
 
-       chip->reset = devm_gpiod_get_optional(dev, "reset", GPIOD_ASIS);
-       if (IS_ERR(chip->reset))
-               return PTR_ERR(chip->reset);
-
        if (chip->info->ops->get_eeprom &&
            !of_property_read_u32(np, "eeprom-length", &eeprom_len))
                chip->eeprom_len = eeprom_len;
@@ -4065,10 +4248,14 @@ static int mv88e6xxx_probe(struct mdio_device *mdiodev)
 out_mdio:
        mv88e6xxx_mdio_unregister(chip);
 out_g2_irq:
-       if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_INT))
+       if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_INT) && chip->irq > 0)
                mv88e6xxx_g2_irq_free(chip);
 out_g1_irq:
-       mv88e6xxx_g1_irq_free(chip);
+       if (chip->irq > 0) {
+               mutex_lock(&chip->reg_lock);
+               mv88e6xxx_g1_irq_free(chip);
+               mutex_unlock(&chip->reg_lock);
+       }
 out:
        return err;
 }
@@ -4082,9 +4269,11 @@ static void mv88e6xxx_remove(struct mdio_device *mdiodev)
        mv88e6xxx_unregister_switch(chip);
        mv88e6xxx_mdio_unregister(chip);
 
-       if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_INT))
-               mv88e6xxx_g2_irq_free(chip);
-       mv88e6xxx_g1_irq_free(chip);
+       if (chip->irq > 0) {
+               if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_INT))
+                       mv88e6xxx_g2_irq_free(chip);
+               mv88e6xxx_g1_irq_free(chip);
+       }
 }
 
 static const struct of_device_id mv88e6xxx_of_match[] = {
@@ -4092,6 +4281,10 @@ static const struct of_device_id mv88e6xxx_of_match[] = {
                .compatible = "marvell,mv88e6085",
                .data = &mv88e6xxx_table[MV88E6085],
        },
+       {
+               .compatible = "marvell,mv88e6190",
+               .data = &mv88e6xxx_table[MV88E6190],
+       },
        { /* sentinel */ },
 };
 
index d358720b6c2d5ed99d3ae28150fc49e7e7e32ceb..5fcf23dbf04bdc3cff53a9197c53fd7355044a39 100644 (file)
@@ -32,3 +32,96 @@ int mv88e6xxx_g1_wait(struct mv88e6xxx_chip *chip, int reg, u16 mask)
 {
        return mv88e6xxx_wait(chip, chip->info->global1_addr, reg, mask);
 }
+
+/* Offset 0x1c: Global Control 2 */
+
+int mv88e6390_g1_stats_set_histogram(struct mv88e6xxx_chip *chip)
+{
+       u16 val;
+       int err;
+
+       err = mv88e6xxx_g1_read(chip, GLOBAL_CONTROL_2, &val);
+       if (err)
+               return err;
+
+       val |= GLOBAL_CONTROL_2_HIST_RX_TX;
+
+       err = mv88e6xxx_g1_write(chip, GLOBAL_CONTROL_2, val);
+
+       return err;
+}
+
+/* Offset 0x1d: Statistics Operation 2 */
+
+int mv88e6xxx_g1_stats_wait(struct mv88e6xxx_chip *chip)
+{
+       return mv88e6xxx_g1_wait(chip, GLOBAL_STATS_OP, GLOBAL_STATS_OP_BUSY);
+}
+
+int mv88e6xxx_g1_stats_snapshot(struct mv88e6xxx_chip *chip, int port)
+{
+       int err;
+
+       /* Snapshot the hardware statistics counters for this port. */
+       err = mv88e6xxx_g1_write(chip, GLOBAL_STATS_OP,
+                                GLOBAL_STATS_OP_CAPTURE_PORT |
+                                GLOBAL_STATS_OP_HIST_RX_TX | port);
+       if (err)
+               return err;
+
+       /* Wait for the snapshotting to complete. */
+       return mv88e6xxx_g1_stats_wait(chip);
+}
+
+int mv88e6320_g1_stats_snapshot(struct mv88e6xxx_chip *chip, int port)
+{
+       port = (port + 1) << 5;
+
+       return mv88e6xxx_g1_stats_snapshot(chip, port);
+}
+
+int mv88e6390_g1_stats_snapshot(struct mv88e6xxx_chip *chip, int port)
+{
+       int err;
+
+       port = (port + 1) << 5;
+
+       /* Snapshot the hardware statistics counters for this port. */
+       err = mv88e6xxx_g1_write(chip, GLOBAL_STATS_OP,
+                                GLOBAL_STATS_OP_CAPTURE_PORT | port);
+       if (err)
+               return err;
+
+       /* Wait for the snapshotting to complete. */
+       return mv88e6xxx_g1_stats_wait(chip);
+}
+
+void mv88e6xxx_g1_stats_read(struct mv88e6xxx_chip *chip, int stat, u32 *val)
+{
+       u32 value;
+       u16 reg;
+       int err;
+
+       *val = 0;
+
+       err = mv88e6xxx_g1_write(chip, GLOBAL_STATS_OP,
+                                GLOBAL_STATS_OP_READ_CAPTURED | stat);
+       if (err)
+               return;
+
+       err = mv88e6xxx_g1_stats_wait(chip);
+       if (err)
+               return;
+
+       err = mv88e6xxx_g1_read(chip, GLOBAL_STATS_COUNTER_32, &reg);
+       if (err)
+               return;
+
+       value = reg << 16;
+
+       err = mv88e6xxx_g1_read(chip, GLOBAL_STATS_COUNTER_01, &reg);
+       if (err)
+               return;
+
+       *val = value | reg;
+}
index 62291e6fe3a3fec276d7918c94b0d62109757e87..df3794cdbfb9aed287e9dfb948fc28d36d16b7c4 100644 (file)
 int mv88e6xxx_g1_read(struct mv88e6xxx_chip *chip, int reg, u16 *val);
 int mv88e6xxx_g1_write(struct mv88e6xxx_chip *chip, int reg, u16 val);
 int mv88e6xxx_g1_wait(struct mv88e6xxx_chip *chip, int reg, u16 mask);
+int mv88e6xxx_g1_stats_wait(struct mv88e6xxx_chip *chip);
+int mv88e6xxx_g1_stats_snapshot(struct mv88e6xxx_chip *chip, int port);
+int mv88e6320_g1_stats_snapshot(struct mv88e6xxx_chip *chip, int port);
+int mv88e6390_g1_stats_snapshot(struct mv88e6xxx_chip *chip, int port);
+int mv88e6390_g1_stats_set_histogram(struct mv88e6xxx_chip *chip);
+void mv88e6xxx_g1_stats_read(struct mv88e6xxx_chip *chip, int stat, u32 *val);
 
 #endif /* _MV88E6XXX_GLOBAL1_H */
index 1a0b13521d1378aa404f2a472812fba972a7c7a3..536a27c9735fd853cbbb982bf0fd04d217839b94 100644 (file)
@@ -507,6 +507,9 @@ void mv88e6xxx_g2_irq_free(struct mv88e6xxx_chip *chip)
 {
        int irq, virq;
 
+       free_irq(chip->device_irq, chip);
+       irq_dispose_mapping(chip->device_irq);
+
        for (irq = 0; irq < 16; irq++) {
                virq = irq_find_mapping(chip->g2_irq.domain, irq);
                irq_dispose_mapping(virq);
@@ -517,8 +520,7 @@ void mv88e6xxx_g2_irq_free(struct mv88e6xxx_chip *chip)
 
 int mv88e6xxx_g2_irq_setup(struct mv88e6xxx_chip *chip)
 {
-       int device_irq;
-       int err, irq;
+       int err, irq, virq;
 
        if (!chip->dev->of_node)
                return -EINVAL;
@@ -534,22 +536,28 @@ int mv88e6xxx_g2_irq_setup(struct mv88e6xxx_chip *chip)
        chip->g2_irq.chip = mv88e6xxx_g2_irq_chip;
        chip->g2_irq.masked = ~0;
 
-       device_irq = irq_find_mapping(chip->g1_irq.domain,
-                                     GLOBAL_STATUS_IRQ_DEVICE);
-       if (device_irq < 0) {
-               err = device_irq;
+       chip->device_irq = irq_find_mapping(chip->g1_irq.domain,
+                                           GLOBAL_STATUS_IRQ_DEVICE);
+       if (chip->device_irq < 0) {
+               err = chip->device_irq;
                goto out;
        }
 
-       err = devm_request_threaded_irq(chip->dev, device_irq, NULL,
-                                       mv88e6xxx_g2_irq_thread_fn,
-                                       IRQF_ONESHOT, "mv88e6xxx-g1", chip);
+       err = request_threaded_irq(chip->device_irq, NULL,
+                                  mv88e6xxx_g2_irq_thread_fn,
+                                  IRQF_ONESHOT, "mv88e6xxx-g1", chip);
        if (err)
                goto out;
 
        return 0;
+
 out:
-       mv88e6xxx_g2_irq_free(chip);
+       for (irq = 0; irq < 16; irq++) {
+               virq = irq_find_mapping(chip->g2_irq.domain, irq);
+               irq_dispose_mapping(virq);
+       }
+
+       irq_domain_remove(chip->g2_irq.domain);
 
        return err;
 }
index 6f48e5886b2adbbae021576cf2bf51c59f901059..ab52c3772c7879b49e2dc8311ad97466c8bd228b 100644 (file)
 #define PORT_PCS_CTRL          0x01
 #define PORT_PCS_CTRL_RGMII_DELAY_RXCLK        BIT(15)
 #define PORT_PCS_CTRL_RGMII_DELAY_TXCLK        BIT(14)
+#define PORT_PCS_CTRL_FORCE_SPEED      BIT(13) /* 6390 */
+#define PORT_PCS_CTRL_ALTSPEED         BIT(12) /* 6390 */
+#define PORT_PCS_CTRL_200BASE          BIT(12) /* 6352 */
 #define PORT_PCS_CTRL_FC               BIT(7)
 #define PORT_PCS_CTRL_FORCE_FC         BIT(6)
 #define PORT_PCS_CTRL_LINK_UP          BIT(5)
 #define PORT_PCS_CTRL_FORCE_LINK       BIT(4)
 #define PORT_PCS_CTRL_DUPLEX_FULL      BIT(3)
 #define PORT_PCS_CTRL_FORCE_DUPLEX     BIT(2)
-#define PORT_PCS_CTRL_10               0x00
-#define PORT_PCS_CTRL_100              0x01
-#define PORT_PCS_CTRL_1000             0x02
-#define PORT_PCS_CTRL_UNFORCED         0x03
+#define PORT_PCS_CTRL_SPEED_MASK       (0x03)
+#define PORT_PCS_CTRL_SPEED_10         (0x00)
+#define PORT_PCS_CTRL_SPEED_100                (0x01)
+#define PORT_PCS_CTRL_SPEED_200                (0x02) /* 6065 and non Gb chips */
+#define PORT_PCS_CTRL_SPEED_1000       (0x02)
+#define PORT_PCS_CTRL_SPEED_10000      (0x03) /* 6390X */
+#define PORT_PCS_CTRL_SPEED_UNFORCED   (0x03)
 #define PORT_PAUSE_CTRL                0x02
 #define PORT_SWITCH_ID         0x03
 #define PORT_SWITCH_ID_PROD_NUM_6085   0x04a
 #define PORT_SWITCH_ID_PROD_NUM_6095   0x095
+#define PORT_SWITCH_ID_PROD_NUM_6097   0x099
 #define PORT_SWITCH_ID_PROD_NUM_6131   0x106
 #define PORT_SWITCH_ID_PROD_NUM_6320   0x115
 #define PORT_SWITCH_ID_PROD_NUM_6123   0x121
 #define PORT_SWITCH_ID_PROD_NUM_6175   0x175
 #define PORT_SWITCH_ID_PROD_NUM_6176   0x176
 #define PORT_SWITCH_ID_PROD_NUM_6185   0x1a7
+#define PORT_SWITCH_ID_PROD_NUM_6190   0x190
+#define PORT_SWITCH_ID_PROD_NUM_6190X  0x0a0
+#define PORT_SWITCH_ID_PROD_NUM_6191   0x191
 #define PORT_SWITCH_ID_PROD_NUM_6240   0x240
+#define PORT_SWITCH_ID_PROD_NUM_6290   0x290
 #define PORT_SWITCH_ID_PROD_NUM_6321   0x310
 #define PORT_SWITCH_ID_PROD_NUM_6352   0x352
 #define PORT_SWITCH_ID_PROD_NUM_6350   0x371
 #define PORT_SWITCH_ID_PROD_NUM_6351   0x375
+#define PORT_SWITCH_ID_PROD_NUM_6390   0x390
+#define PORT_SWITCH_ID_PROD_NUM_6390X  0x0a1
 #define PORT_CONTROL           0x04
 #define PORT_CONTROL_USE_CORE_TAG      BIT(15)
 #define PORT_CONTROL_DROP_ON_LOCK      BIT(14)
 #define GLOBAL_CONTROL_2       0x1c
 #define GLOBAL_CONTROL_2_NO_CASCADE            0xe000
 #define GLOBAL_CONTROL_2_MULTIPLE_CASCADE      0xf000
-
+#define GLOBAL_CONTROL_2_HIST_RX              (0x1 << 6)
+#define GLOBAL_CONTROL_2_HIST_TX              (0x2 << 6)
+#define GLOBAL_CONTROL_2_HIST_RX_TX           (0x3 << 6)
 #define GLOBAL_STATS_OP                0x1d
 #define GLOBAL_STATS_OP_BUSY   BIT(15)
 #define GLOBAL_STATS_OP_NOP            (0 << 12)
 #define GLOBAL_STATS_OP_HIST_RX                ((1 << 10) | GLOBAL_STATS_OP_BUSY)
 #define GLOBAL_STATS_OP_HIST_TX                ((2 << 10) | GLOBAL_STATS_OP_BUSY)
 #define GLOBAL_STATS_OP_HIST_RX_TX     ((3 << 10) | GLOBAL_STATS_OP_BUSY)
-#define GLOBAL_STATS_OP_BANK_1 BIT(9)
+#define GLOBAL_STATS_OP_BANK_1_BIT_9   BIT(9)
+#define GLOBAL_STATS_OP_BANK_1_BIT_10  BIT(10)
 #define GLOBAL_STATS_COUNTER_32        0x1e
 #define GLOBAL_STATS_COUNTER_01        0x1f
 
 enum mv88e6xxx_model {
        MV88E6085,
        MV88E6095,
+       MV88E6097,
        MV88E6123,
        MV88E6131,
        MV88E6161,
@@ -372,12 +389,18 @@ enum mv88e6xxx_model {
        MV88E6175,
        MV88E6176,
        MV88E6185,
+       MV88E6190,
+       MV88E6190X,
+       MV88E6191,
        MV88E6240,
+       MV88E6290,
        MV88E6320,
        MV88E6321,
        MV88E6350,
        MV88E6351,
        MV88E6352,
+       MV88E6390,
+       MV88E6390X,
 };
 
 enum mv88e6xxx_family {
@@ -390,6 +413,7 @@ enum mv88e6xxx_family {
        MV88E6XXX_FAMILY_6320,  /* 6320 6321 */
        MV88E6XXX_FAMILY_6351,  /* 6171 6175 6350 6351 */
        MV88E6XXX_FAMILY_6352,  /* 6172 6176 6240 6352 */
+       MV88E6XXX_FAMILY_6390,  /* 6190 6190X 6191 6290 6390 6390X */
 };
 
 enum mv88e6xxx_cap {
@@ -609,6 +633,18 @@ enum mv88e6xxx_cap {
 
 struct mv88e6xxx_ops;
 
+#define MV88E6XXX_FLAGS_FAMILY_6390    \
+       (MV88E6XXX_FLAG_EEE |           \
+        MV88E6XXX_FLAG_GLOBAL2 |       \
+        MV88E6XXX_FLAG_PPU_ACTIVE |    \
+        MV88E6XXX_FLAG_STU |           \
+        MV88E6XXX_FLAG_TEMP |          \
+        MV88E6XXX_FLAG_TEMP_LIMIT |    \
+        MV88E6XXX_FLAG_VTU |           \
+        MV88E6XXX_FLAGS_IRL |          \
+        MV88E6XXX_FLAGS_MULTI_CHIP |   \
+        MV88E6XXX_FLAGS_PVT)
+
 struct mv88e6xxx_info {
        enum mv88e6xxx_family family;
        u16 prod_num;
@@ -708,6 +744,7 @@ struct mv88e6xxx_chip {
        struct mv88e6xxx_irq g1_irq;
        struct mv88e6xxx_irq g2_irq;
        int irq;
+       int device_irq;
 };
 
 struct mv88e6xxx_bus_ops {
@@ -727,19 +764,68 @@ struct mv88e6xxx_ops {
                        u16 *val);
        int (*phy_write)(struct mv88e6xxx_chip *chip, int addr, int reg,
                         u16 val);
-};
 
-enum stat_type {
-       BANK0,
-       BANK1,
-       PORT,
+       /* RGMII Receive/Transmit Timing Control
+        * Add delay on PHY_INTERFACE_MODE_RGMII_*ID, no delay otherwise.
+        */
+       int (*port_set_rgmii_delay)(struct mv88e6xxx_chip *chip, int port,
+                                   phy_interface_t mode);
+
+#define LINK_FORCED_DOWN       0
+#define LINK_FORCED_UP         1
+#define LINK_UNFORCED          -2
+
+       /* Port's MAC link state
+        * Use LINK_FORCED_UP or LINK_FORCED_DOWN to force link up or down,
+        * or LINK_UNFORCED for normal link detection.
+        */
+       int (*port_set_link)(struct mv88e6xxx_chip *chip, int port, int link);
+
+#define DUPLEX_UNFORCED                -2
+
+       /* Port's MAC duplex mode
+        *
+        * Use DUPLEX_HALF or DUPLEX_FULL to force half or full duplex,
+        * or DUPLEX_UNFORCED for normal duplex detection.
+        */
+       int (*port_set_duplex)(struct mv88e6xxx_chip *chip, int port, int dup);
+
+#define SPEED_MAX              INT_MAX
+#define SPEED_UNFORCED         -2
+
+       /* Port's MAC speed (in Mbps)
+        *
+        * Depending on the chip, 10, 100, 200, 1000, 2500, 10000 are valid.
+        * Use SPEED_UNFORCED for normal detection, SPEED_MAX for max value.
+        */
+       int (*port_set_speed)(struct mv88e6xxx_chip *chip, int port, int speed);
+
+       /* Snapshot the statistics for a port. The statistics can then
+        * be read back a leisure but still with a consistent view.
+        */
+       int (*stats_snapshot)(struct mv88e6xxx_chip *chip, int port);
+
+       /* Set the histogram mode for statistics, when the control registers
+        * are separated out of the STATS_OP register.
+        */
+       int (*stats_set_histogram)(struct mv88e6xxx_chip *chip);
+
+       /* Return the number of strings describing statistics */
+       int (*stats_get_sset_count)(struct mv88e6xxx_chip *chip);
+       void (*stats_get_strings)(struct mv88e6xxx_chip *chip,  uint8_t *data);
+       void (*stats_get_stats)(struct mv88e6xxx_chip *chip,  int port,
+                               uint64_t *data);
 };
 
+#define STATS_TYPE_PORT                BIT(0)
+#define STATS_TYPE_BANK0       BIT(1)
+#define STATS_TYPE_BANK1       BIT(2)
+
 struct mv88e6xxx_hw_stat {
        char string[ETH_GSTRING_LEN];
        int sizeof_stat;
        int reg;
-       enum stat_type type;
+       int type;
 };
 
 static inline bool mv88e6xxx_has(struct mv88e6xxx_chip *chip,
diff --git a/drivers/net/dsa/mv88e6xxx/port.c b/drivers/net/dsa/mv88e6xxx/port.c
new file mode 100644 (file)
index 0000000..af4772d
--- /dev/null
@@ -0,0 +1,498 @@
+/*
+ * Marvell 88E6xxx Switch Port Registers support
+ *
+ * Copyright (c) 2008 Marvell Semiconductor
+ *
+ * Copyright (c) 2016 Vivien Didelot <vivien.didelot@savoirfairelinux.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include "mv88e6xxx.h"
+#include "port.h"
+
+int mv88e6xxx_port_read(struct mv88e6xxx_chip *chip, int port, int reg,
+                       u16 *val)
+{
+       int addr = chip->info->port_base_addr + port;
+
+       return mv88e6xxx_read(chip, addr, reg, val);
+}
+
+int mv88e6xxx_port_write(struct mv88e6xxx_chip *chip, int port, int reg,
+                        u16 val)
+{
+       int addr = chip->info->port_base_addr + port;
+
+       return mv88e6xxx_write(chip, addr, reg, val);
+}
+
+/* Offset 0x01: MAC (or PCS or Physical) Control Register
+ *
+ * Link, Duplex and Flow Control have one force bit, one value bit.
+ *
+ * For port's MAC speed, ForceSpd (or SpdValue) bits 1:0 program the value.
+ * Alternative values require the 200BASE (or AltSpeed) bit 12 set.
+ * Newer chips need a ForcedSpd bit 13 set to consider the value.
+ */
+
+static int mv88e6xxx_port_set_rgmii_delay(struct mv88e6xxx_chip *chip, int port,
+                                         phy_interface_t mode)
+{
+       u16 reg;
+       int err;
+
+       err = mv88e6xxx_port_read(chip, port, PORT_PCS_CTRL, &reg);
+       if (err)
+               return err;
+
+       reg &= ~(PORT_PCS_CTRL_RGMII_DELAY_RXCLK |
+                PORT_PCS_CTRL_RGMII_DELAY_TXCLK);
+
+       switch (mode) {
+       case PHY_INTERFACE_MODE_RGMII_RXID:
+               reg |= PORT_PCS_CTRL_RGMII_DELAY_RXCLK;
+               break;
+       case PHY_INTERFACE_MODE_RGMII_TXID:
+               reg |= PORT_PCS_CTRL_RGMII_DELAY_TXCLK;
+               break;
+       case PHY_INTERFACE_MODE_RGMII_ID:
+               reg |= PORT_PCS_CTRL_RGMII_DELAY_RXCLK |
+                       PORT_PCS_CTRL_RGMII_DELAY_TXCLK;
+               break;
+       case PHY_INTERFACE_MODE_RGMII:
+               break;
+       default:
+               return 0;
+       }
+
+       err = mv88e6xxx_port_write(chip, port, PORT_PCS_CTRL, reg);
+       if (err)
+               return err;
+
+       netdev_dbg(chip->ds->ports[port].netdev, "delay RXCLK %s, TXCLK %s\n",
+                  reg & PORT_PCS_CTRL_RGMII_DELAY_RXCLK ? "yes" : "no",
+                  reg & PORT_PCS_CTRL_RGMII_DELAY_TXCLK ? "yes" : "no");
+
+       return 0;
+}
+
+int mv88e6352_port_set_rgmii_delay(struct mv88e6xxx_chip *chip, int port,
+                                  phy_interface_t mode)
+{
+       if (port < 5)
+               return -EOPNOTSUPP;
+
+       return mv88e6xxx_port_set_rgmii_delay(chip, port, mode);
+}
+
+int mv88e6390_port_set_rgmii_delay(struct mv88e6xxx_chip *chip, int port,
+                                  phy_interface_t mode)
+{
+       if (port != 0)
+               return -EOPNOTSUPP;
+
+       return mv88e6xxx_port_set_rgmii_delay(chip, port, mode);
+}
+
+int mv88e6xxx_port_set_link(struct mv88e6xxx_chip *chip, int port, int link)
+{
+       u16 reg;
+       int err;
+
+       err = mv88e6xxx_port_read(chip, port, PORT_PCS_CTRL, &reg);
+       if (err)
+               return err;
+
+       reg &= ~(PORT_PCS_CTRL_FORCE_LINK | PORT_PCS_CTRL_LINK_UP);
+
+       switch (link) {
+       case LINK_FORCED_DOWN:
+               reg |= PORT_PCS_CTRL_FORCE_LINK;
+               break;
+       case LINK_FORCED_UP:
+               reg |= PORT_PCS_CTRL_FORCE_LINK | PORT_PCS_CTRL_LINK_UP;
+               break;
+       case LINK_UNFORCED:
+               /* normal link detection */
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       err = mv88e6xxx_port_write(chip, port, PORT_PCS_CTRL, reg);
+       if (err)
+               return err;
+
+       netdev_dbg(chip->ds->ports[port].netdev, "%s link %s\n",
+                  reg & PORT_PCS_CTRL_FORCE_LINK ? "Force" : "Unforce",
+                  reg & PORT_PCS_CTRL_LINK_UP ? "up" : "down");
+
+       return 0;
+}
+
+int mv88e6xxx_port_set_duplex(struct mv88e6xxx_chip *chip, int port, int dup)
+{
+       u16 reg;
+       int err;
+
+       err = mv88e6xxx_port_read(chip, port, PORT_PCS_CTRL, &reg);
+       if (err)
+               return err;
+
+       reg &= ~(PORT_PCS_CTRL_FORCE_DUPLEX | PORT_PCS_CTRL_DUPLEX_FULL);
+
+       switch (dup) {
+       case DUPLEX_HALF:
+               reg |= PORT_PCS_CTRL_FORCE_DUPLEX;
+               break;
+       case DUPLEX_FULL:
+               reg |= PORT_PCS_CTRL_FORCE_DUPLEX | PORT_PCS_CTRL_DUPLEX_FULL;
+               break;
+       case DUPLEX_UNFORCED:
+               /* normal duplex detection */
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       err = mv88e6xxx_port_write(chip, port, PORT_PCS_CTRL, reg);
+       if (err)
+               return err;
+
+       netdev_dbg(chip->ds->ports[port].netdev, "%s %s duplex\n",
+                  reg & PORT_PCS_CTRL_FORCE_DUPLEX ? "Force" : "Unforce",
+                  reg & PORT_PCS_CTRL_DUPLEX_FULL ? "full" : "half");
+
+       return 0;
+}
+
+static int mv88e6xxx_port_set_speed(struct mv88e6xxx_chip *chip, int port,
+                                   int speed, bool alt_bit, bool force_bit)
+{
+       u16 reg, ctrl;
+       int err;
+
+       switch (speed) {
+       case 10:
+               ctrl = PORT_PCS_CTRL_SPEED_10;
+               break;
+       case 100:
+               ctrl = PORT_PCS_CTRL_SPEED_100;
+               break;
+       case 200:
+               if (alt_bit)
+                       ctrl = PORT_PCS_CTRL_SPEED_100 | PORT_PCS_CTRL_ALTSPEED;
+               else
+                       ctrl = PORT_PCS_CTRL_SPEED_200;
+               break;
+       case 1000:
+               ctrl = PORT_PCS_CTRL_SPEED_1000;
+               break;
+       case 2500:
+               ctrl = PORT_PCS_CTRL_SPEED_1000 | PORT_PCS_CTRL_ALTSPEED;
+               break;
+       case 10000:
+               /* all bits set, fall through... */
+       case SPEED_UNFORCED:
+               ctrl = PORT_PCS_CTRL_SPEED_UNFORCED;
+               break;
+       default:
+               return -EOPNOTSUPP;
+       }
+
+       err = mv88e6xxx_port_read(chip, port, PORT_PCS_CTRL, &reg);
+       if (err)
+               return err;
+
+       reg &= ~PORT_PCS_CTRL_SPEED_MASK;
+       if (alt_bit)
+               reg &= ~PORT_PCS_CTRL_ALTSPEED;
+       if (force_bit) {
+               reg &= ~PORT_PCS_CTRL_FORCE_SPEED;
+               if (speed != SPEED_UNFORCED)
+                       ctrl |= PORT_PCS_CTRL_FORCE_SPEED;
+       }
+       reg |= ctrl;
+
+       err = mv88e6xxx_port_write(chip, port, PORT_PCS_CTRL, reg);
+       if (err)
+               return err;
+
+       if (speed)
+               netdev_dbg(chip->ds->ports[port].netdev,
+                          "Speed set to %d Mbps\n", speed);
+       else
+               netdev_dbg(chip->ds->ports[port].netdev, "Speed unforced\n");
+
+       return 0;
+}
+
+/* Support 10, 100, 200 Mbps (e.g. 88E6065 family) */
+int mv88e6065_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed)
+{
+       if (speed == SPEED_MAX)
+               speed = 200;
+
+       if (speed > 200)
+               return -EOPNOTSUPP;
+
+       /* Setting 200 Mbps on port 0 to 3 selects 100 Mbps */
+       return mv88e6xxx_port_set_speed(chip, port, speed, false, false);
+}
+
+/* Support 10, 100, 1000 Mbps (e.g. 88E6185 family) */
+int mv88e6185_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed)
+{
+       if (speed == SPEED_MAX)
+               speed = 1000;
+
+       if (speed == 200 || speed > 1000)
+               return -EOPNOTSUPP;
+
+       return mv88e6xxx_port_set_speed(chip, port, speed, false, false);
+}
+
+/* Support 10, 100, 200, 1000 Mbps (e.g. 88E6352 family) */
+int mv88e6352_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed)
+{
+       if (speed == SPEED_MAX)
+               speed = 1000;
+
+       if (speed > 1000)
+               return -EOPNOTSUPP;
+
+       if (speed == 200 && port < 5)
+               return -EOPNOTSUPP;
+
+       return mv88e6xxx_port_set_speed(chip, port, speed, true, false);
+}
+
+/* Support 10, 100, 200, 1000, 2500 Mbps (e.g. 88E6390) */
+int mv88e6390_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed)
+{
+       if (speed == SPEED_MAX)
+               speed = port < 9 ? 1000 : 2500;
+
+       if (speed > 2500)
+               return -EOPNOTSUPP;
+
+       if (speed == 200 && port != 0)
+               return -EOPNOTSUPP;
+
+       if (speed == 2500 && port < 9)
+               return -EOPNOTSUPP;
+
+       return mv88e6xxx_port_set_speed(chip, port, speed, true, true);
+}
+
+/* Support 10, 100, 200, 1000, 2500, 10000 Mbps (e.g. 88E6190X) */
+int mv88e6390x_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed)
+{
+       if (speed == SPEED_MAX)
+               speed = port < 9 ? 1000 : 10000;
+
+       if (speed == 200 && port != 0)
+               return -EOPNOTSUPP;
+
+       if (speed >= 2500 && port < 9)
+               return -EOPNOTSUPP;
+
+       return mv88e6xxx_port_set_speed(chip, port, speed, true, true);
+}
+
+/* Offset 0x04: Port Control Register */
+
+static const char * const mv88e6xxx_port_state_names[] = {
+       [PORT_CONTROL_STATE_DISABLED] = "Disabled",
+       [PORT_CONTROL_STATE_BLOCKING] = "Blocking/Listening",
+       [PORT_CONTROL_STATE_LEARNING] = "Learning",
+       [PORT_CONTROL_STATE_FORWARDING] = "Forwarding",
+};
+
+int mv88e6xxx_port_set_state(struct mv88e6xxx_chip *chip, int port, u8 state)
+{
+       u16 reg;
+       int err;
+
+       err = mv88e6xxx_port_read(chip, port, PORT_CONTROL, &reg);
+       if (err)
+               return err;
+
+       reg &= ~PORT_CONTROL_STATE_MASK;
+       reg |= state;
+
+       err = mv88e6xxx_port_write(chip, port, PORT_CONTROL, reg);
+       if (err)
+               return err;
+
+       netdev_dbg(chip->ds->ports[port].netdev, "PortState set to %s\n",
+                  mv88e6xxx_port_state_names[state]);
+
+       return 0;
+}
+
+/* Offset 0x05: Port Control 1 */
+
+/* Offset 0x06: Port Based VLAN Map */
+
+int mv88e6xxx_port_set_vlan_map(struct mv88e6xxx_chip *chip, int port, u16 map)
+{
+       const u16 mask = GENMASK(mv88e6xxx_num_ports(chip) - 1, 0);
+       u16 reg;
+       int err;
+
+       err = mv88e6xxx_port_read(chip, port, PORT_BASE_VLAN, &reg);
+       if (err)
+               return err;
+
+       reg &= ~mask;
+       reg |= map & mask;
+
+       err = mv88e6xxx_port_write(chip, port, PORT_BASE_VLAN, reg);
+       if (err)
+               return err;
+
+       netdev_dbg(chip->ds->ports[port].netdev, "VLANTable set to %.3x\n",
+                  map);
+
+       return 0;
+}
+
+int mv88e6xxx_port_get_fid(struct mv88e6xxx_chip *chip, int port, u16 *fid)
+{
+       const u16 upper_mask = (mv88e6xxx_num_databases(chip) - 1) >> 4;
+       u16 reg;
+       int err;
+
+       /* Port's default FID lower 4 bits are located in reg 0x06, offset 12 */
+       err = mv88e6xxx_port_read(chip, port, PORT_BASE_VLAN, &reg);
+       if (err)
+               return err;
+
+       *fid = (reg & 0xf000) >> 12;
+
+       /* Port's default FID upper bits are located in reg 0x05, offset 0 */
+       if (upper_mask) {
+               err = mv88e6xxx_port_read(chip, port, PORT_CONTROL_1, &reg);
+               if (err)
+                       return err;
+
+               *fid |= (reg & upper_mask) << 4;
+       }
+
+       return 0;
+}
+
+int mv88e6xxx_port_set_fid(struct mv88e6xxx_chip *chip, int port, u16 fid)
+{
+       const u16 upper_mask = (mv88e6xxx_num_databases(chip) - 1) >> 4;
+       u16 reg;
+       int err;
+
+       if (fid >= mv88e6xxx_num_databases(chip))
+               return -EINVAL;
+
+       /* Port's default FID lower 4 bits are located in reg 0x06, offset 12 */
+       err = mv88e6xxx_port_read(chip, port, PORT_BASE_VLAN, &reg);
+       if (err)
+               return err;
+
+       reg &= 0x0fff;
+       reg |= (fid & 0x000f) << 12;
+
+       err = mv88e6xxx_port_write(chip, port, PORT_BASE_VLAN, reg);
+       if (err)
+               return err;
+
+       /* Port's default FID upper bits are located in reg 0x05, offset 0 */
+       if (upper_mask) {
+               err = mv88e6xxx_port_read(chip, port, PORT_CONTROL_1, &reg);
+               if (err)
+                       return err;
+
+               reg &= ~upper_mask;
+               reg |= (fid >> 4) & upper_mask;
+
+               err = mv88e6xxx_port_write(chip, port, PORT_CONTROL_1, reg);
+               if (err)
+                       return err;
+       }
+
+       netdev_dbg(chip->ds->ports[port].netdev, "FID set to %u\n", fid);
+
+       return 0;
+}
+
+/* Offset 0x07: Default Port VLAN ID & Priority */
+
+int mv88e6xxx_port_get_pvid(struct mv88e6xxx_chip *chip, int port, u16 *pvid)
+{
+       u16 reg;
+       int err;
+
+       err = mv88e6xxx_port_read(chip, port, PORT_DEFAULT_VLAN, &reg);
+       if (err)
+               return err;
+
+       *pvid = reg & PORT_DEFAULT_VLAN_MASK;
+
+       return 0;
+}
+
+int mv88e6xxx_port_set_pvid(struct mv88e6xxx_chip *chip, int port, u16 pvid)
+{
+       u16 reg;
+       int err;
+
+       err = mv88e6xxx_port_read(chip, port, PORT_DEFAULT_VLAN, &reg);
+       if (err)
+               return err;
+
+       reg &= ~PORT_DEFAULT_VLAN_MASK;
+       reg |= pvid & PORT_DEFAULT_VLAN_MASK;
+
+       err = mv88e6xxx_port_write(chip, port, PORT_DEFAULT_VLAN, reg);
+       if (err)
+               return err;
+
+       netdev_dbg(chip->ds->ports[port].netdev, "DefaultVID set to %u\n",
+                  pvid);
+
+       return 0;
+}
+
+/* Offset 0x08: Port Control 2 Register */
+
+static const char * const mv88e6xxx_port_8021q_mode_names[] = {
+       [PORT_CONTROL_2_8021Q_DISABLED] = "Disabled",
+       [PORT_CONTROL_2_8021Q_FALLBACK] = "Fallback",
+       [PORT_CONTROL_2_8021Q_CHECK] = "Check",
+       [PORT_CONTROL_2_8021Q_SECURE] = "Secure",
+};
+
+int mv88e6xxx_port_set_8021q_mode(struct mv88e6xxx_chip *chip, int port,
+                                 u16 mode)
+{
+       u16 reg;
+       int err;
+
+       err = mv88e6xxx_port_read(chip, port, PORT_CONTROL_2, &reg);
+       if (err)
+               return err;
+
+       reg &= ~PORT_CONTROL_2_8021Q_MASK;
+       reg |= mode & PORT_CONTROL_2_8021Q_MASK;
+
+       err = mv88e6xxx_port_write(chip, port, PORT_CONTROL_2, reg);
+       if (err)
+               return err;
+
+       netdev_dbg(chip->ds->ports[port].netdev, "802.1QMode set to %s\n",
+                  mv88e6xxx_port_8021q_mode_names[mode]);
+
+       return 0;
+}
diff --git a/drivers/net/dsa/mv88e6xxx/port.h b/drivers/net/dsa/mv88e6xxx/port.h
new file mode 100644 (file)
index 0000000..499129c
--- /dev/null
@@ -0,0 +1,52 @@
+/*
+ * Marvell 88E6xxx Switch Port Registers support
+ *
+ * Copyright (c) 2008 Marvell Semiconductor
+ *
+ * Copyright (c) 2016 Vivien Didelot <vivien.didelot@savoirfairelinux.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef _MV88E6XXX_PORT_H
+#define _MV88E6XXX_PORT_H
+
+#include "mv88e6xxx.h"
+
+int mv88e6xxx_port_read(struct mv88e6xxx_chip *chip, int port, int reg,
+                       u16 *val);
+int mv88e6xxx_port_write(struct mv88e6xxx_chip *chip, int port, int reg,
+                        u16 val);
+
+int mv88e6352_port_set_rgmii_delay(struct mv88e6xxx_chip *chip, int port,
+                                  phy_interface_t mode);
+int mv88e6390_port_set_rgmii_delay(struct mv88e6xxx_chip *chip, int port,
+                                  phy_interface_t mode);
+
+int mv88e6xxx_port_set_link(struct mv88e6xxx_chip *chip, int port, int link);
+
+int mv88e6xxx_port_set_duplex(struct mv88e6xxx_chip *chip, int port, int dup);
+
+int mv88e6065_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed);
+int mv88e6185_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed);
+int mv88e6352_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed);
+int mv88e6390_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed);
+int mv88e6390x_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed);
+
+int mv88e6xxx_port_set_state(struct mv88e6xxx_chip *chip, int port, u8 state);
+
+int mv88e6xxx_port_set_vlan_map(struct mv88e6xxx_chip *chip, int port, u16 map);
+
+int mv88e6xxx_port_get_fid(struct mv88e6xxx_chip *chip, int port, u16 *fid);
+int mv88e6xxx_port_set_fid(struct mv88e6xxx_chip *chip, int port, u16 fid);
+
+int mv88e6xxx_port_get_pvid(struct mv88e6xxx_chip *chip, int port, u16 *pvid);
+int mv88e6xxx_port_set_pvid(struct mv88e6xxx_chip *chip, int port, u16 pvid);
+
+int mv88e6xxx_port_set_8021q_mode(struct mv88e6xxx_chip *chip, int port,
+                                 u16 mode);
+
+#endif /* _MV88E6XXX_PORT_H */
index 9f9a5f440e2f71f656299b41a6828b70a30898d8..a7533780dddc011cddbb96556cc5adae5d09bcc5 100644 (file)
@@ -1040,67 +1040,68 @@ el3_link_ok(struct net_device *dev)
 }
 
 static int
-el3_netdev_get_ecmd(struct net_device *dev, struct ethtool_cmd *ecmd)
+el3_netdev_get_ecmd(struct net_device *dev, struct ethtool_link_ksettings *cmd)
 {
        u16 tmp;
        int ioaddr = dev->base_addr;
+       u32 supported;
 
        EL3WINDOW(0);
        /* obtain current transceiver via WN4_MEDIA? */
        tmp = inw(ioaddr + WN0_ADDR_CONF);
-       ecmd->transceiver = XCVR_INTERNAL;
        switch (tmp >> 14) {
        case 0:
-               ecmd->port = PORT_TP;
+               cmd->base.port = PORT_TP;
                break;
        case 1:
-               ecmd->port = PORT_AUI;
-               ecmd->transceiver = XCVR_EXTERNAL;
+               cmd->base.port = PORT_AUI;
                break;
        case 3:
-               ecmd->port = PORT_BNC;
+               cmd->base.port = PORT_BNC;
        default:
                break;
        }
 
-       ecmd->duplex = DUPLEX_HALF;
-       ecmd->supported = 0;
+       cmd->base.duplex = DUPLEX_HALF;
+       supported = 0;
        tmp = inw(ioaddr + WN0_CONF_CTRL);
        if (tmp & (1<<13))
-               ecmd->supported |= SUPPORTED_AUI;
+               supported |= SUPPORTED_AUI;
        if (tmp & (1<<12))
-               ecmd->supported |= SUPPORTED_BNC;
+               supported |= SUPPORTED_BNC;
        if (tmp & (1<<9)) {
-               ecmd->supported |= SUPPORTED_TP | SUPPORTED_10baseT_Half |
+               supported |= SUPPORTED_TP | SUPPORTED_10baseT_Half |
                                SUPPORTED_10baseT_Full; /* hmm... */
                EL3WINDOW(4);
                tmp = inw(ioaddr + WN4_NETDIAG);
                if (tmp & FD_ENABLE)
-                       ecmd->duplex = DUPLEX_FULL;
+                       cmd->base.duplex = DUPLEX_FULL;
        }
 
-       ethtool_cmd_speed_set(ecmd, SPEED_10);
+       ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+                                               supported);
+       cmd->base.speed = SPEED_10;
        EL3WINDOW(1);
        return 0;
 }
 
 static int
-el3_netdev_set_ecmd(struct net_device *dev, struct ethtool_cmd *ecmd)
+el3_netdev_set_ecmd(struct net_device *dev,
+                   const struct ethtool_link_ksettings *cmd)
 {
        u16 tmp;
        int ioaddr = dev->base_addr;
 
-       if (ecmd->speed != SPEED_10)
+       if (cmd->base.speed != SPEED_10)
                return -EINVAL;
-       if ((ecmd->duplex != DUPLEX_HALF) && (ecmd->duplex != DUPLEX_FULL))
-               return -EINVAL;
-       if ((ecmd->transceiver != XCVR_INTERNAL) && (ecmd->transceiver != XCVR_EXTERNAL))
+       if ((cmd->base.duplex != DUPLEX_HALF) &&
+           (cmd->base.duplex != DUPLEX_FULL))
                return -EINVAL;
 
        /* change XCVR type */
        EL3WINDOW(0);
        tmp = inw(ioaddr + WN0_ADDR_CONF);
-       switch (ecmd->port) {
+       switch (cmd->base.port) {
        case PORT_TP:
                tmp &= ~(3<<14);
                dev->if_port = 0;
@@ -1130,7 +1131,7 @@ el3_netdev_set_ecmd(struct net_device *dev, struct ethtool_cmd *ecmd)
 
        EL3WINDOW(4);
        tmp = inw(ioaddr + WN4_NETDIAG);
-       if (ecmd->duplex == DUPLEX_FULL)
+       if (cmd->base.duplex == DUPLEX_FULL)
                tmp |= FD_ENABLE;
        else
                tmp &= ~FD_ENABLE;
@@ -1146,24 +1147,26 @@ static void el3_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info
        strlcpy(info->version, DRV_VERSION, sizeof(info->version));
 }
 
-static int el3_get_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
+static int el3_get_link_ksettings(struct net_device *dev,
+                                 struct ethtool_link_ksettings *cmd)
 {
        struct el3_private *lp = netdev_priv(dev);
        int ret;
 
        spin_lock_irq(&lp->lock);
-       ret = el3_netdev_get_ecmd(dev, ecmd);
+       ret = el3_netdev_get_ecmd(dev, cmd);
        spin_unlock_irq(&lp->lock);
        return ret;
 }
 
-static int el3_set_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
+static int el3_set_link_ksettings(struct net_device *dev,
+                                 const struct ethtool_link_ksettings *cmd)
 {
        struct el3_private *lp = netdev_priv(dev);
        int ret;
 
        spin_lock_irq(&lp->lock);
-       ret = el3_netdev_set_ecmd(dev, ecmd);
+       ret = el3_netdev_set_ecmd(dev, cmd);
        spin_unlock_irq(&lp->lock);
        return ret;
 }
@@ -1191,11 +1194,11 @@ static void el3_set_msglevel(struct net_device *dev, u32 v)
 
 static const struct ethtool_ops ethtool_ops = {
        .get_drvinfo = el3_get_drvinfo,
-       .get_settings = el3_get_settings,
-       .set_settings = el3_set_settings,
        .get_link = el3_get_link,
        .get_msglevel = el3_get_msglevel,
        .set_msglevel = el3_set_msglevel,
+       .get_link_ksettings = el3_get_link_ksettings,
+       .set_link_ksettings = el3_set_link_ksettings,
 };
 
 static void
index 3ecf61382269d0a190ccb3aca80308cedf1f9e56..b3560a364e536666d7e22dd08700e5eb3267439a 100644 (file)
@@ -2907,18 +2907,20 @@ static int vortex_nway_reset(struct net_device *dev)
        return mii_nway_restart(&vp->mii);
 }
 
-static int vortex_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int vortex_get_link_ksettings(struct net_device *dev,
+                                    struct ethtool_link_ksettings *cmd)
 {
        struct vortex_private *vp = netdev_priv(dev);
 
-       return mii_ethtool_gset(&vp->mii, cmd);
+       return mii_ethtool_get_link_ksettings(&vp->mii, cmd);
 }
 
-static int vortex_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int vortex_set_link_ksettings(struct net_device *dev,
+                                    const struct ethtool_link_ksettings *cmd)
 {
        struct vortex_private *vp = netdev_priv(dev);
 
-       return mii_ethtool_sset(&vp->mii, cmd);
+       return mii_ethtool_set_link_ksettings(&vp->mii, cmd);
 }
 
 static u32 vortex_get_msglevel(struct net_device *dev)
@@ -3031,13 +3033,13 @@ static const struct ethtool_ops vortex_ethtool_ops = {
        .set_msglevel           = vortex_set_msglevel,
        .get_ethtool_stats      = vortex_get_ethtool_stats,
        .get_sset_count         = vortex_get_sset_count,
-       .get_settings           = vortex_get_settings,
-       .set_settings           = vortex_set_settings,
        .get_link               = ethtool_op_get_link,
        .nway_reset             = vortex_nway_reset,
        .get_wol                = vortex_get_wol,
        .set_wol                = vortex_set_wol,
        .get_ts_info            = ethtool_op_get_ts_info,
+       .get_link_ksettings     = vortex_get_link_ksettings,
+       .set_link_ksettings     = vortex_set_link_ksettings,
 };
 
 #ifdef CONFIG_PCI
index 506b507b415810cd9833782615cc9b575a82278e..a0cacbe846ba3347cf5d1d62a8e3ad84a5b83428 100644 (file)
@@ -996,28 +996,30 @@ typhoon_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
 }
 
 static int
-typhoon_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+typhoon_get_link_ksettings(struct net_device *dev,
+                          struct ethtool_link_ksettings *cmd)
 {
        struct typhoon *tp = netdev_priv(dev);
+       u32 supported, advertising = 0;
 
-       cmd->supported = SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full |
+       supported = SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full |
                                SUPPORTED_Autoneg;
 
        switch (tp->xcvr_select) {
        case TYPHOON_XCVR_10HALF:
-               cmd->advertising = ADVERTISED_10baseT_Half;
+               advertising = ADVERTISED_10baseT_Half;
                break;
        case TYPHOON_XCVR_10FULL:
-               cmd->advertising = ADVERTISED_10baseT_Full;
+               advertising = ADVERTISED_10baseT_Full;
                break;
        case TYPHOON_XCVR_100HALF:
-               cmd->advertising = ADVERTISED_100baseT_Half;
+               advertising = ADVERTISED_100baseT_Half;
                break;
        case TYPHOON_XCVR_100FULL:
-               cmd->advertising = ADVERTISED_100baseT_Full;
+               advertising = ADVERTISED_100baseT_Full;
                break;
        case TYPHOON_XCVR_AUTONEG:
-               cmd->advertising = ADVERTISED_10baseT_Half |
+               advertising = ADVERTISED_10baseT_Half |
                                            ADVERTISED_10baseT_Full |
                                            ADVERTISED_100baseT_Half |
                                            ADVERTISED_100baseT_Full |
@@ -1026,54 +1028,57 @@ typhoon_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
        }
 
        if(tp->capabilities & TYPHOON_FIBER) {
-               cmd->supported |= SUPPORTED_FIBRE;
-               cmd->advertising |= ADVERTISED_FIBRE;
-               cmd->port = PORT_FIBRE;
+               supported |= SUPPORTED_FIBRE;
+               advertising |= ADVERTISED_FIBRE;
+               cmd->base.port = PORT_FIBRE;
        } else {
-               cmd->supported |= SUPPORTED_10baseT_Half |
+               supported |= SUPPORTED_10baseT_Half |
                                        SUPPORTED_10baseT_Full |
                                        SUPPORTED_TP;
-               cmd->advertising |= ADVERTISED_TP;
-               cmd->port = PORT_TP;
+               advertising |= ADVERTISED_TP;
+               cmd->base.port = PORT_TP;
        }
 
        /* need to get stats to make these link speed/duplex valid */
        typhoon_do_get_stats(tp);
-       ethtool_cmd_speed_set(cmd, tp->speed);
-       cmd->duplex = tp->duplex;
-       cmd->phy_address = 0;
-       cmd->transceiver = XCVR_INTERNAL;
+       cmd->base.speed = tp->speed;
+       cmd->base.duplex = tp->duplex;
+       cmd->base.phy_address = 0;
        if(tp->xcvr_select == TYPHOON_XCVR_AUTONEG)
-               cmd->autoneg = AUTONEG_ENABLE;
+               cmd->base.autoneg = AUTONEG_ENABLE;
        else
-               cmd->autoneg = AUTONEG_DISABLE;
-       cmd->maxtxpkt = 1;
-       cmd->maxrxpkt = 1;
+               cmd->base.autoneg = AUTONEG_DISABLE;
+
+       ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+                                               supported);
+       ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
+                                               advertising);
 
        return 0;
 }
 
 static int
-typhoon_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+typhoon_set_link_ksettings(struct net_device *dev,
+                          const struct ethtool_link_ksettings *cmd)
 {
        struct typhoon *tp = netdev_priv(dev);
-       u32 speed = ethtool_cmd_speed(cmd);
+       u32 speed = cmd->base.speed;
        struct cmd_desc xp_cmd;
        __le16 xcvr;
        int err;
 
        err = -EINVAL;
-       if (cmd->autoneg == AUTONEG_ENABLE) {
+       if (cmd->base.autoneg == AUTONEG_ENABLE) {
                xcvr = TYPHOON_XCVR_AUTONEG;
        } else {
-               if (cmd->duplex == DUPLEX_HALF) {
+               if (cmd->base.duplex == DUPLEX_HALF) {
                        if (speed == SPEED_10)
                                xcvr = TYPHOON_XCVR_10HALF;
                        else if (speed == SPEED_100)
                                xcvr = TYPHOON_XCVR_100HALF;
                        else
                                goto out;
-               } else if (cmd->duplex == DUPLEX_FULL) {
+               } else if (cmd->base.duplex == DUPLEX_FULL) {
                        if (speed == SPEED_10)
                                xcvr = TYPHOON_XCVR_10FULL;
                        else if (speed == SPEED_100)
@@ -1091,12 +1096,12 @@ typhoon_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
                goto out;
 
        tp->xcvr_select = xcvr;
-       if(cmd->autoneg == AUTONEG_ENABLE) {
+       if (cmd->base.autoneg == AUTONEG_ENABLE) {
                tp->speed = 0xff;       /* invalid */
                tp->duplex = 0xff;      /* invalid */
        } else {
                tp->speed = speed;
-               tp->duplex = cmd->duplex;
+               tp->duplex = cmd->base.duplex;
        }
 
 out:
@@ -1145,13 +1150,13 @@ typhoon_get_ringparam(struct net_device *dev, struct ethtool_ringparam *ering)
 }
 
 static const struct ethtool_ops typhoon_ethtool_ops = {
-       .get_settings           = typhoon_get_settings,
-       .set_settings           = typhoon_set_settings,
        .get_drvinfo            = typhoon_get_drvinfo,
        .get_wol                = typhoon_get_wol,
        .set_wol                = typhoon_set_wol,
        .get_link               = ethtool_op_get_link,
        .get_ringparam          = typhoon_get_ringparam,
+       .get_link_ksettings     = typhoon_get_link_ksettings,
+       .set_link_ksettings     = typhoon_set_link_ksettings,
 };
 
 static int
index 8cc7467b6c1f63b468a81afdca03a802f96e4944..6738fbb357cdeee86789706346b2d3034daafe1c 100644 (file)
@@ -165,6 +165,7 @@ source "drivers/net/ethernet/seeq/Kconfig"
 source "drivers/net/ethernet/silan/Kconfig"
 source "drivers/net/ethernet/sis/Kconfig"
 source "drivers/net/ethernet/sfc/Kconfig"
+source "drivers/net/ethernet/sfc/falcon/Kconfig"
 source "drivers/net/ethernet/sgi/Kconfig"
 source "drivers/net/ethernet/smsc/Kconfig"
 source "drivers/net/ethernet/stmicro/Kconfig"
index a09423df83f278134e32b987a11755eb67b0d51e..e76244521a2ed56ff1b40580142e48e1d292b44f 100644 (file)
@@ -75,6 +75,7 @@ obj-$(CONFIG_NET_VENDOR_SEEQ) += seeq/
 obj-$(CONFIG_NET_VENDOR_SILAN) += silan/
 obj-$(CONFIG_NET_VENDOR_SIS) += sis/
 obj-$(CONFIG_SFC) += sfc/
+obj-$(CONFIG_SFC_FALCON) += sfc/falcon/
 obj-$(CONFIG_NET_VENDOR_SGI) += sgi/
 obj-$(CONFIG_NET_VENDOR_SMSC) += smsc/
 obj-$(CONFIG_NET_VENDOR_STMICRO) += stmicro/
index 4a9a16e25666199d14b8ac3704d0870500bb7df4..3aaad33cdbc6f48d19f1646a6565ae81be96a91e 100644 (file)
@@ -1816,21 +1816,23 @@ static void get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
        strlcpy(info->bus_info, pci_name(np->pci_dev), sizeof(info->bus_info));
 }
 
-static int get_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
+static int get_link_ksettings(struct net_device *dev,
+                             struct ethtool_link_ksettings *cmd)
 {
        struct netdev_private *np = netdev_priv(dev);
        spin_lock_irq(&np->lock);
-       mii_ethtool_gset(&np->mii_if, ecmd);
+       mii_ethtool_get_link_ksettings(&np->mii_if, cmd);
        spin_unlock_irq(&np->lock);
        return 0;
 }
 
-static int set_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
+static int set_link_ksettings(struct net_device *dev,
+                             const struct ethtool_link_ksettings *cmd)
 {
        struct netdev_private *np = netdev_priv(dev);
        int res;
        spin_lock_irq(&np->lock);
-       res = mii_ethtool_sset(&np->mii_if, ecmd);
+       res = mii_ethtool_set_link_ksettings(&np->mii_if, cmd);
        spin_unlock_irq(&np->lock);
        check_duplex(dev);
        return res;
@@ -1861,12 +1863,12 @@ static void set_msglevel(struct net_device *dev, u32 val)
 static const struct ethtool_ops ethtool_ops = {
        .begin = check_if_running,
        .get_drvinfo = get_drvinfo,
-       .get_settings = get_settings,
-       .set_settings = set_settings,
        .nway_reset = nway_reset,
        .get_link = get_link,
        .get_msglevel = get_msglevel,
        .set_msglevel = set_msglevel,
+       .get_link_ksettings = get_link_ksettings,
+       .set_link_ksettings = set_link_ksettings,
 };
 
 static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
index f8df8248035ec8f6ba7f14aab225b6e7cb3dba64..93def92f999703d234b44508725f45863ef39a18 100644 (file)
@@ -1290,15 +1290,6 @@ static int greth_mdio_probe(struct net_device *dev)
        return 0;
 }
 
-static inline int phy_aneg_done(struct phy_device *phydev)
-{
-       int retval;
-
-       retval = phy_read(phydev, MII_BMSR);
-
-       return (retval < 0) ? retval : (retval & BMSR_ANEGCOMPLETE);
-}
-
 static int greth_mdio_init(struct greth_private *greth)
 {
        int ret;
index af27f9dbedf2e366c4948449046f03c4d3625fb8..c8f4d26fc9d414a957faf3c6939ac2488782342c 100644 (file)
 
 #define EMAC_MAX_FRAME_LEN     0x0600
 
+#define EMAC_DEFAULT_MSG_ENABLE 0x0000
+static int debug = -1;     /* defaults above */;
+module_param(debug, int, 0);
+MODULE_PARM_DESC(debug, "debug message flags");
+
 /* Transmit timeout, default 5 seconds. */
 static int watchdog = 5000;
 module_param(watchdog, int, 0400);
@@ -225,11 +230,27 @@ static void emac_get_drvinfo(struct net_device *dev,
        strlcpy(info->bus_info, dev_name(&dev->dev), sizeof(info->bus_info));
 }
 
+static u32 emac_get_msglevel(struct net_device *dev)
+{
+       struct emac_board_info *db = netdev_priv(dev);
+
+       return db->msg_enable;
+}
+
+static void emac_set_msglevel(struct net_device *dev, u32 value)
+{
+       struct emac_board_info *db = netdev_priv(dev);
+
+       db->msg_enable = value;
+}
+
 static const struct ethtool_ops emac_ethtool_ops = {
        .get_drvinfo    = emac_get_drvinfo,
        .get_link       = ethtool_op_get_link,
        .get_link_ksettings = phy_ethtool_get_link_ksettings,
        .set_link_ksettings = phy_ethtool_set_link_ksettings,
+       .get_msglevel   = emac_get_msglevel,
+       .set_msglevel   = emac_set_msglevel,
 };
 
 static unsigned int emac_setup(struct net_device *ndev)
@@ -571,8 +592,7 @@ static void emac_rx(struct net_device *dev)
                /* A packet ready now  & Get status/length */
                good_packet = true;
 
-               emac_inblk_32bit(db->membase + EMAC_RX_IO_DATA_REG,
-                               &rxhdr, sizeof(rxhdr));
+               rxhdr = readl(db->membase + EMAC_RX_IO_DATA_REG);
 
                if (netif_msg_rx_status(db))
                        dev_dbg(db->dev, "rxhdr: %x\n", *((int *)(&rxhdr)));
@@ -804,6 +824,7 @@ static int emac_probe(struct platform_device *pdev)
        db->dev = &pdev->dev;
        db->ndev = ndev;
        db->pdev = pdev;
+       db->msg_enable = netif_msg_init(debug, EMAC_DEFAULT_MSG_ENABLE);
 
        spin_lock_init(&db->lock);
 
index a5c1e290677a44bf8445c065c2e1cb154d714830..16f0c70266bce75f503a7910709fa67684898539 100644 (file)
@@ -429,14 +429,16 @@ static const char version[] =
   "acenic.c: v0.92 08/05/2002  Jes Sorensen, linux-acenic@SunSITE.dk\n"
   "                            http://home.cern.ch/~jes/gige/acenic.html\n";
 
-static int ace_get_settings(struct net_device *, struct ethtool_cmd *);
-static int ace_set_settings(struct net_device *, struct ethtool_cmd *);
+static int ace_get_link_ksettings(struct net_device *,
+                                 struct ethtool_link_ksettings *);
+static int ace_set_link_ksettings(struct net_device *,
+                                 const struct ethtool_link_ksettings *);
 static void ace_get_drvinfo(struct net_device *, struct ethtool_drvinfo *);
 
 static const struct ethtool_ops ace_ethtool_ops = {
-       .get_settings = ace_get_settings,
-       .set_settings = ace_set_settings,
        .get_drvinfo = ace_get_drvinfo,
+       .get_link_ksettings = ace_get_link_ksettings,
+       .set_link_ksettings = ace_set_link_ksettings,
 };
 
 static void ace_watchdog(struct net_device *dev);
@@ -2579,43 +2581,44 @@ static int ace_change_mtu(struct net_device *dev, int new_mtu)
        return 0;
 }
 
-static int ace_get_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
+static int ace_get_link_ksettings(struct net_device *dev,
+                                 struct ethtool_link_ksettings *cmd)
 {
        struct ace_private *ap = netdev_priv(dev);
        struct ace_regs __iomem *regs = ap->regs;
        u32 link;
+       u32 supported;
 
-       memset(ecmd, 0, sizeof(struct ethtool_cmd));
-       ecmd->supported =
-               (SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full |
-                SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full |
-                SUPPORTED_1000baseT_Half | SUPPORTED_1000baseT_Full |
-                SUPPORTED_Autoneg | SUPPORTED_FIBRE);
+       memset(cmd, 0, sizeof(struct ethtool_link_ksettings));
 
-       ecmd->port = PORT_FIBRE;
-       ecmd->transceiver = XCVR_INTERNAL;
+       supported = (SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full |
+                    SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full |
+                    SUPPORTED_1000baseT_Half | SUPPORTED_1000baseT_Full |
+                    SUPPORTED_Autoneg | SUPPORTED_FIBRE);
+
+       cmd->base.port = PORT_FIBRE;
 
        link = readl(&regs->GigLnkState);
-       if (link & LNK_1000MB)
-               ethtool_cmd_speed_set(ecmd, SPEED_1000);
-       else {
+       if (link & LNK_1000MB) {
+               cmd->base.speed = SPEED_1000;
+       else {
                link = readl(&regs->FastLnkState);
                if (link & LNK_100MB)
-                       ethtool_cmd_speed_set(ecmd, SPEED_100);
+                       cmd->base.speed = SPEED_100;
                else if (link & LNK_10MB)
-                       ethtool_cmd_speed_set(ecmd, SPEED_10);
+                       cmd->base.speed = SPEED_10;
                else
-                       ethtool_cmd_speed_set(ecmd, 0);
+                       cmd->base.speed = 0;
        }
        if (link & LNK_FULL_DUPLEX)
-               ecmd->duplex = DUPLEX_FULL;
+               cmd->base.duplex = DUPLEX_FULL;
        else
-               ecmd->duplex = DUPLEX_HALF;
+               cmd->base.duplex = DUPLEX_HALF;
 
        if (link & LNK_NEGOTIATE)
-               ecmd->autoneg = AUTONEG_ENABLE;
+               cmd->base.autoneg = AUTONEG_ENABLE;
        else
-               ecmd->autoneg = AUTONEG_DISABLE;
+               cmd->base.autoneg = AUTONEG_DISABLE;
 
 #if 0
        /*
@@ -2626,13 +2629,15 @@ static int ace_get_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
        ecmd->txcoal = readl(&regs->TuneTxCoalTicks);
        ecmd->rxcoal = readl(&regs->TuneRxCoalTicks);
 #endif
-       ecmd->maxtxpkt = readl(&regs->TuneMaxTxDesc);
-       ecmd->maxrxpkt = readl(&regs->TuneMaxRxDesc);
+
+       ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+                                               supported);
 
        return 0;
 }
 
-static int ace_set_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
+static int ace_set_link_ksettings(struct net_device *dev,
+                                 const struct ethtool_link_ksettings *cmd)
 {
        struct ace_private *ap = netdev_priv(dev);
        struct ace_regs __iomem *regs = ap->regs;
@@ -2655,11 +2660,11 @@ static int ace_set_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
                LNK_RX_FLOW_CTL_Y | LNK_NEG_FCTL;
        if (!ACE_IS_TIGON_I(ap))
                link |= LNK_TX_FLOW_CTL_Y;
-       if (ecmd->autoneg == AUTONEG_ENABLE)
+       if (cmd->base.autoneg == AUTONEG_ENABLE)
                link |= LNK_NEGOTIATE;
-       if (ethtool_cmd_speed(ecmd) != speed) {
+       if (cmd->base.speed != speed) {
                link &= ~(LNK_1000MB | LNK_100MB | LNK_10MB);
-               switch (ethtool_cmd_speed(ecmd)) {
+               switch (cmd->base.speed) {
                case SPEED_1000:
                        link |= LNK_1000MB;
                        break;
@@ -2672,7 +2677,7 @@ static int ace_set_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
                }
        }
 
-       if (ecmd->duplex == DUPLEX_FULL)
+       if (cmd->base.duplex == DUPLEX_FULL)
                link |= LNK_FULL_DUPLEX;
 
        if (link != ap->link) {
index 9b640c8fbc28b96a97d2bf1da58d7e6b851b102b..e2feee87180a84a91708b2d43e12a3f2d7b5b650 100644 (file)
 #define MAC_CMDCFG_DISABLE_READ_TIMEOUT_GET(v) GET_BIT_VALUE(v, 27)
 #define MAC_CMDCFG_CNT_RESET_GET(v)            GET_BIT_VALUE(v, 31)
 
+/* SGMII PCS register addresses
+ */
+#define SGMII_PCS_SCRATCH      0x10
+#define SGMII_PCS_REV          0x11
+#define SGMII_PCS_LINK_TIMER_0 0x12
+#define SGMII_PCS_LINK_TIMER_1 0x13
+#define SGMII_PCS_IF_MODE      0x14
+#define SGMII_PCS_DIS_READ_TO  0x15
+#define SGMII_PCS_READ_TO      0x16
+#define SGMII_PCS_SW_RESET_TIMEOUT 100 /* usecs */
+
 /* MDIO registers within MAC register Space
  */
 struct altera_tse_mdio {
index 8e92084342622732ecdcd4d8e4b164551906c74e..d8bca949d810d790d1cdc58e70de481fb7ed960a 100644 (file)
@@ -37,6 +37,7 @@
 #include <linux/io.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/mii.h>
 #include <linux/netdevice.h>
 #include <linux/of_device.h>
 #include <linux/of_mdio.h>
@@ -96,6 +97,27 @@ static inline u32 tse_tx_avail(struct altera_tse_private *priv)
        return priv->tx_cons + priv->tx_ring_size - priv->tx_prod - 1;
 }
 
+/* PCS Register read/write functions
+ */
+static u16 sgmii_pcs_read(struct altera_tse_private *priv, int regnum)
+{
+       return csrrd32(priv->mac_dev,
+                      tse_csroffs(mdio_phy0) + regnum * 4) & 0xffff;
+}
+
+static void sgmii_pcs_write(struct altera_tse_private *priv, int regnum,
+                               u16 value)
+{
+       csrwr32(value, priv->mac_dev, tse_csroffs(mdio_phy0) + regnum * 4);
+}
+
+/* Check PCS scratch memory */
+static int sgmii_pcs_scratch_test(struct altera_tse_private *priv, u16 value)
+{
+       sgmii_pcs_write(priv, SGMII_PCS_SCRATCH, value);
+       return (sgmii_pcs_read(priv, SGMII_PCS_SCRATCH) == value);
+}
+
 /* MDIO specific functions
  */
 static int altera_tse_mdio_read(struct mii_bus *bus, int mii_id, int regnum)
@@ -1083,6 +1105,66 @@ static void tse_set_rx_mode(struct net_device *dev)
        spin_unlock(&priv->mac_cfg_lock);
 }
 
+/* Initialise (if necessary) the SGMII PCS component
+ */
+static int init_sgmii_pcs(struct net_device *dev)
+{
+       struct altera_tse_private *priv = netdev_priv(dev);
+       int n;
+       unsigned int tmp_reg = 0;
+
+       if (priv->phy_iface != PHY_INTERFACE_MODE_SGMII)
+               return 0; /* Nothing to do, not in SGMII mode */
+
+       /* The TSE SGMII PCS block looks a little like a PHY, it is
+        * mapped into the zeroth MDIO space of the MAC and it has
+        * ID registers like a PHY would.  Sadly this is often
+        * configured to zeroes, so don't be surprised if it does
+        * show 0x00000000.
+        */
+
+       if (sgmii_pcs_scratch_test(priv, 0x0000) &&
+               sgmii_pcs_scratch_test(priv, 0xffff) &&
+               sgmii_pcs_scratch_test(priv, 0xa5a5) &&
+               sgmii_pcs_scratch_test(priv, 0x5a5a)) {
+               netdev_info(dev, "PCS PHY ID: 0x%04x%04x\n",
+                               sgmii_pcs_read(priv, MII_PHYSID1),
+                               sgmii_pcs_read(priv, MII_PHYSID2));
+       } else {
+               netdev_err(dev, "SGMII PCS Scratch memory test failed.\n");
+               return -ENOMEM;
+       }
+
+       /* Starting on page 5-29 of the MegaCore Function User Guide
+        * Set SGMII Link timer to 1.6ms
+        */
+       sgmii_pcs_write(priv, SGMII_PCS_LINK_TIMER_0, 0x0D40);
+       sgmii_pcs_write(priv, SGMII_PCS_LINK_TIMER_1, 0x03);
+
+       /* Enable SGMII Interface and Enable SGMII Auto Negotiation */
+       sgmii_pcs_write(priv, SGMII_PCS_IF_MODE, 0x3);
+
+       /* Enable Autonegotiation */
+       tmp_reg = sgmii_pcs_read(priv, MII_BMCR);
+       tmp_reg |= (BMCR_SPEED1000 | BMCR_FULLDPLX | BMCR_ANENABLE);
+       sgmii_pcs_write(priv, MII_BMCR, tmp_reg);
+
+       /* Reset PCS block */
+       tmp_reg |= BMCR_RESET;
+       sgmii_pcs_write(priv, MII_BMCR, tmp_reg);
+       for (n = 0; n < SGMII_PCS_SW_RESET_TIMEOUT; n++) {
+               if (!(sgmii_pcs_read(priv, MII_BMCR) & BMCR_RESET)) {
+                       netdev_info(dev, "SGMII PCS block initialised OK\n");
+                       return 0;
+               }
+               udelay(1);
+       }
+
+       /* We failed to reset the block, return a timeout */
+       netdev_err(dev, "SGMII PCS block reset failed.\n");
+       return -ETIMEDOUT;
+}
+
 /* Open and initialize the interface
  */
 static int tse_open(struct net_device *dev)
@@ -1107,6 +1189,15 @@ static int tse_open(struct net_device *dev)
                netdev_warn(dev, "TSE revision %x\n", priv->revision);
 
        spin_lock(&priv->mac_cfg_lock);
+       /* no-op if MAC not operating in SGMII mode*/
+       ret = init_sgmii_pcs(dev);
+       if (ret) {
+               netdev_err(dev,
+                          "Cannot init the SGMII PCS (error: %d)\n", ret);
+               spin_unlock(&priv->mac_cfg_lock);
+               goto phy_error;
+       }
+
        ret = reset_mac(priv);
        /* Note that reset_mac will fail if the clocks are gated by the PHY
         * due to the PHY being put into isolation or power down mode.
index 0038709fd317d83ca8f4226cab3357673c25565f..7ab6efbe4189d9cc823e17f0e5c70f8233b92f94 100644 (file)
@@ -173,11 +173,13 @@ config SUNLANCE
 
 config AMD_XGBE
        tristate "AMD 10GbE Ethernet driver"
-       depends on ((OF_NET && OF_ADDRESS) || ACPI) && HAS_IOMEM && HAS_DMA
-       depends on ARM64 || COMPILE_TEST
+       depends on ((OF_NET && OF_ADDRESS) || ACPI || PCI) && HAS_IOMEM && HAS_DMA
+       depends on X86 || ARM64 || COMPILE_TEST
        select BITREVERSE
        select CRC32
        select PTP_1588_CLOCK
+       select PHYLIB
+       select AMD_XGBE_HAVE_ECC if X86
        ---help---
          This driver supports the AMD 10GbE Ethernet device found on an
          AMD SoC.
@@ -195,4 +197,8 @@ config AMD_XGBE_DCB
 
          If unsure, say N.
 
+config AMD_XGBE_HAVE_ECC
+       bool
+       default n
+
 endif # NET_VENDOR_AMD
index 84b4ffbd084ab5f8192118d54b1ebba23bbf1822..11cf1e3e0295def5bf7056aceec892692db3c026 100644 (file)
@@ -1421,21 +1421,23 @@ static void amd8111e_get_regs(struct net_device *dev, struct ethtool_regs *regs,
        amd8111e_read_regs(lp, buf);
 }
 
-static int amd8111e_get_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
+static int amd8111e_get_link_ksettings(struct net_device *dev,
+                                      struct ethtool_link_ksettings *cmd)
 {
        struct amd8111e_priv *lp = netdev_priv(dev);
        spin_lock_irq(&lp->lock);
-       mii_ethtool_gset(&lp->mii_if, ecmd);
+       mii_ethtool_get_link_ksettings(&lp->mii_if, cmd);
        spin_unlock_irq(&lp->lock);
        return 0;
 }
 
-static int amd8111e_set_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
+static int amd8111e_set_link_ksettings(struct net_device *dev,
+                                      const struct ethtool_link_ksettings *cmd)
 {
        struct amd8111e_priv *lp = netdev_priv(dev);
        int res;
        spin_lock_irq(&lp->lock);
-       res = mii_ethtool_sset(&lp->mii_if, ecmd);
+       res = mii_ethtool_set_link_ksettings(&lp->mii_if, cmd);
        spin_unlock_irq(&lp->lock);
        return res;
 }
@@ -1482,12 +1484,12 @@ static const struct ethtool_ops ops = {
        .get_drvinfo = amd8111e_get_drvinfo,
        .get_regs_len = amd8111e_get_regs_len,
        .get_regs = amd8111e_get_regs,
-       .get_settings = amd8111e_get_settings,
-       .set_settings = amd8111e_set_settings,
        .nway_reset = amd8111e_nway_reset,
        .get_link = amd8111e_get_link,
        .get_wol = amd8111e_get_wol,
        .set_wol = amd8111e_set_wol,
+       .get_link_ksettings = amd8111e_get_link_ksettings,
+       .set_link_ksettings = amd8111e_set_link_ksettings,
 };
 
 /* This function handles all the  ethtool ioctls. It gives driver info,
index e53ccc3b7d8dcf73eff299469a0dbb677f61058d..796c37a5bbde0d6ba693eddf7b7099884de8ebcc 100644 (file)
@@ -1012,13 +1012,9 @@ static int lance_rx( struct net_device *dev )
                                        u_char *data = PKTBUF_ADDR(head);
 
                                        printk(KERN_DEBUG "%s: RX pkt type 0x%04x from %pM to %pM "
-                                                  "data %02x %02x %02x %02x %02x %02x %02x %02x "
-                                                  "len %d\n",
+                                                  "data %8ph len %d\n",
                                                   dev->name, ((u_short *)data)[6],
-                                                  &data[6], data,
-                                                  data[15], data[16], data[17], data[18],
-                                                  data[19], data[20], data[21], data[22],
-                                                  pkt_len);
+                                                  &data[6], data, &data[15], pkt_len);
                                }
 
                                skb_reserve( skb, 2 );  /* 16 byte align */
index adc7ab99a2f692b8d71fb27fc1904a1bbc837b46..41e58cca8feed284391d91f7469eaf4b79e52ca1 100644 (file)
@@ -677,7 +677,8 @@ static void pcnet32_poll_controller(struct net_device *dev)
 }
 #endif
 
-static int pcnet32_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int pcnet32_get_link_ksettings(struct net_device *dev,
+                                     struct ethtool_link_ksettings *cmd)
 {
        struct pcnet32_private *lp = netdev_priv(dev);
        unsigned long flags;
@@ -685,14 +686,15 @@ static int pcnet32_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 
        if (lp->mii) {
                spin_lock_irqsave(&lp->lock, flags);
-               mii_ethtool_gset(&lp->mii_if, cmd);
+               mii_ethtool_get_link_ksettings(&lp->mii_if, cmd);
                spin_unlock_irqrestore(&lp->lock, flags);
                r = 0;
        }
        return r;
 }
 
-static int pcnet32_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int pcnet32_set_link_ksettings(struct net_device *dev,
+                                     const struct ethtool_link_ksettings *cmd)
 {
        struct pcnet32_private *lp = netdev_priv(dev);
        unsigned long flags;
@@ -700,7 +702,7 @@ static int pcnet32_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 
        if (lp->mii) {
                spin_lock_irqsave(&lp->lock, flags);
-               r = mii_ethtool_sset(&lp->mii_if, cmd);
+               r = mii_ethtool_set_link_ksettings(&lp->mii_if, cmd);
                spin_unlock_irqrestore(&lp->lock, flags);
        }
        return r;
@@ -1440,8 +1442,6 @@ static void pcnet32_get_regs(struct net_device *dev, struct ethtool_regs *regs,
 }
 
 static const struct ethtool_ops pcnet32_ethtool_ops = {
-       .get_settings           = pcnet32_get_settings,
-       .set_settings           = pcnet32_set_settings,
        .get_drvinfo            = pcnet32_get_drvinfo,
        .get_msglevel           = pcnet32_get_msglevel,
        .set_msglevel           = pcnet32_set_msglevel,
@@ -1455,6 +1455,8 @@ static const struct ethtool_ops pcnet32_ethtool_ops = {
        .get_regs_len           = pcnet32_get_regs_len,
        .get_regs               = pcnet32_get_regs,
        .get_sset_count         = pcnet32_get_sset_count,
+       .get_link_ksettings     = pcnet32_get_link_ksettings,
+       .set_link_ksettings     = pcnet32_set_link_ksettings,
 };
 
 /* only probes for non-PCI devices, the rest are handled by
index 171a7e68048d79c43867aeb6646ff4262097966d..0dea8f5da899d2480015c82ed571c812f9548828 100644 (file)
@@ -2,7 +2,10 @@ obj-$(CONFIG_AMD_XGBE) += amd-xgbe.o
 
 amd-xgbe-objs := xgbe-main.o xgbe-drv.o xgbe-dev.o \
                 xgbe-desc.o xgbe-ethtool.o xgbe-mdio.o \
-                xgbe-ptp.o
+                xgbe-ptp.o \
+                xgbe-i2c.o xgbe-phy-v1.o xgbe-phy-v2.o \
+                xgbe-platform.o
 
+amd-xgbe-$(CONFIG_PCI) += xgbe-pci.o
 amd-xgbe-$(CONFIG_AMD_XGBE_DCB) += xgbe-dcb.o
 amd-xgbe-$(CONFIG_DEBUG_FS) += xgbe-debugfs.o
index bbef95973c27355cad33c3e7ffc7c53d15964f24..5b7ba25e006522a6143d6c73a9b8141a7ac76f85 100644 (file)
 #define DMA_ISR_MACIS_WIDTH            1
 #define DMA_ISR_MTLIS_INDEX            16
 #define DMA_ISR_MTLIS_WIDTH            1
+#define DMA_MR_INTM_INDEX              12
+#define DMA_MR_INTM_WIDTH              2
 #define DMA_MR_SWR_INDEX               0
 #define DMA_MR_SWR_WIDTH               1
 #define DMA_SBMR_EAME_INDEX            11
 #define MAC_HWF0R                      0x011c
 #define MAC_HWF1R                      0x0120
 #define MAC_HWF2R                      0x0124
+#define MAC_MDIOSCAR                   0x0200
+#define MAC_MDIOSCCDR                  0x0204
+#define MAC_MDIOISR                    0x0214
+#define MAC_MDIOIER                    0x0218
+#define MAC_MDIOCL22R                  0x0220
 #define MAC_GPIOCR                     0x0278
 #define MAC_GPIOSR                     0x027c
 #define MAC_MACA0HR                    0x0300
 #define MAC_ISR_MMCTXIS_WIDTH          1
 #define MAC_ISR_PMTIS_INDEX            4
 #define MAC_ISR_PMTIS_WIDTH            1
+#define MAC_ISR_SMI_INDEX              1
+#define MAC_ISR_SMI_WIDTH              1
 #define MAC_ISR_TSIS_INDEX             12
 #define MAC_ISR_TSIS_WIDTH             1
 #define MAC_MACA1HR_AE_INDEX           31
 #define MAC_MACA1HR_AE_WIDTH           1
+#define MAC_MDIOIER_SNGLCOMPIE_INDEX   12
+#define MAC_MDIOIER_SNGLCOMPIE_WIDTH   1
+#define MAC_MDIOISR_SNGLCOMPINT_INDEX  12
+#define MAC_MDIOISR_SNGLCOMPINT_WIDTH  1
+#define MAC_MDIOSCAR_DA_INDEX          21
+#define MAC_MDIOSCAR_DA_WIDTH          5
+#define MAC_MDIOSCAR_PA_INDEX          16
+#define MAC_MDIOSCAR_PA_WIDTH          5
+#define MAC_MDIOSCAR_RA_INDEX          0
+#define MAC_MDIOSCAR_RA_WIDTH          16
+#define MAC_MDIOSCAR_REG_INDEX         0
+#define MAC_MDIOSCAR_REG_WIDTH         21
+#define MAC_MDIOSCCDR_BUSY_INDEX       22
+#define MAC_MDIOSCCDR_BUSY_WIDTH       1
+#define MAC_MDIOSCCDR_CMD_INDEX                16
+#define MAC_MDIOSCCDR_CMD_WIDTH                2
+#define MAC_MDIOSCCDR_CR_INDEX         19
+#define MAC_MDIOSCCDR_CR_WIDTH         3
+#define MAC_MDIOSCCDR_DATA_INDEX       0
+#define MAC_MDIOSCCDR_DATA_WIDTH       16
+#define MAC_MDIOSCCDR_SADDR_INDEX      18
+#define MAC_MDIOSCCDR_SADDR_WIDTH      1
 #define MAC_PFR_HMC_INDEX              2
 #define MAC_PFR_HMC_WIDTH              1
 #define MAC_PFR_HPF_INDEX              10
 #define MTL_Q_RQOMR_RSF_WIDTH          1
 #define MTL_Q_RQOMR_RTC_INDEX          0
 #define MTL_Q_RQOMR_RTC_WIDTH          2
+#define MTL_Q_TQDR_TRCSTS_INDEX                1
+#define MTL_Q_TQDR_TRCSTS_WIDTH                2
+#define MTL_Q_TQDR_TXQSTS_INDEX                4
+#define MTL_Q_TQDR_TXQSTS_WIDTH                1
 #define MTL_Q_TQOMR_FTQ_INDEX          0
 #define MTL_Q_TQOMR_FTQ_WIDTH          1
 #define MTL_Q_TQOMR_Q2TCMAP_INDEX      8
 #define MTL_TSA_SP                     0x00
 #define MTL_TSA_ETS                    0x02
 
-/* PCS MMD select register offset
- *  The MMD select register is used for accessing PCS registers
- *  when the underlying APB3 interface is using indirect addressing.
- *  Indirect addressing requires accessing registers in two phases,
- *  an address phase and a data phase.  The address phases requires
- *  writing an address selection value to the MMD select regiesters.
- */
-#define PCS_MMD_SELECT                 0xff
+/* PCS register offsets */
+#define PCS_V1_WINDOW_SELECT           0x03fc
+#define PCS_V2_WINDOW_DEF              0x9060
+#define PCS_V2_WINDOW_SELECT           0x9064
+
+/* PCS register entry bit positions and sizes */
+#define PCS_V2_WINDOW_DEF_OFFSET_INDEX 6
+#define PCS_V2_WINDOW_DEF_OFFSET_WIDTH 14
+#define PCS_V2_WINDOW_DEF_SIZE_INDEX   2
+#define PCS_V2_WINDOW_DEF_SIZE_WIDTH   4
 
 /* SerDes integration register offsets */
 #define SIR0_KR_RT_1                   0x002c
 #define RXTX_REG129_RXDFE_CONFIG_INDEX 14
 #define RXTX_REG129_RXDFE_CONFIG_WIDTH 2
 
+/* MAC Control register offsets */
+#define XP_PROP_0                      0x0000
+#define XP_PROP_1                      0x0004
+#define XP_PROP_2                      0x0008
+#define XP_PROP_3                      0x000c
+#define XP_PROP_4                      0x0010
+#define XP_PROP_5                      0x0014
+#define XP_MAC_ADDR_LO                 0x0020
+#define XP_MAC_ADDR_HI                 0x0024
+#define XP_ECC_ISR                     0x0030
+#define XP_ECC_IER                     0x0034
+#define XP_ECC_CNT0                    0x003c
+#define XP_ECC_CNT1                    0x0040
+#define XP_DRIVER_INT_REQ              0x0060
+#define XP_DRIVER_INT_RO               0x0064
+#define XP_DRIVER_SCRATCH_0            0x0068
+#define XP_DRIVER_SCRATCH_1            0x006c
+#define XP_INT_EN                      0x0078
+#define XP_I2C_MUTEX                   0x0080
+#define XP_MDIO_MUTEX                  0x0084
+
+/* MAC Control register entry bit positions and sizes */
+#define XP_DRIVER_INT_REQ_REQUEST_INDEX                0
+#define XP_DRIVER_INT_REQ_REQUEST_WIDTH                1
+#define XP_DRIVER_INT_RO_STATUS_INDEX          0
+#define XP_DRIVER_INT_RO_STATUS_WIDTH          1
+#define XP_DRIVER_SCRATCH_0_COMMAND_INDEX      0
+#define XP_DRIVER_SCRATCH_0_COMMAND_WIDTH      8
+#define XP_DRIVER_SCRATCH_0_SUB_COMMAND_INDEX  8
+#define XP_DRIVER_SCRATCH_0_SUB_COMMAND_WIDTH  8
+#define XP_ECC_CNT0_RX_DED_INDEX               24
+#define XP_ECC_CNT0_RX_DED_WIDTH               8
+#define XP_ECC_CNT0_RX_SEC_INDEX               16
+#define XP_ECC_CNT0_RX_SEC_WIDTH               8
+#define XP_ECC_CNT0_TX_DED_INDEX               8
+#define XP_ECC_CNT0_TX_DED_WIDTH               8
+#define XP_ECC_CNT0_TX_SEC_INDEX               0
+#define XP_ECC_CNT0_TX_SEC_WIDTH               8
+#define XP_ECC_CNT1_DESC_DED_INDEX             8
+#define XP_ECC_CNT1_DESC_DED_WIDTH             8
+#define XP_ECC_CNT1_DESC_SEC_INDEX             0
+#define XP_ECC_CNT1_DESC_SEC_WIDTH             8
+#define XP_ECC_IER_DESC_DED_INDEX              0
+#define XP_ECC_IER_DESC_DED_WIDTH              1
+#define XP_ECC_IER_DESC_SEC_INDEX              1
+#define XP_ECC_IER_DESC_SEC_WIDTH              1
+#define XP_ECC_IER_RX_DED_INDEX                        2
+#define XP_ECC_IER_RX_DED_WIDTH                        1
+#define XP_ECC_IER_RX_SEC_INDEX                        3
+#define XP_ECC_IER_RX_SEC_WIDTH                        1
+#define XP_ECC_IER_TX_DED_INDEX                        4
+#define XP_ECC_IER_TX_DED_WIDTH                        1
+#define XP_ECC_IER_TX_SEC_INDEX                        5
+#define XP_ECC_IER_TX_SEC_WIDTH                        1
+#define XP_ECC_ISR_DESC_DED_INDEX              0
+#define XP_ECC_ISR_DESC_DED_WIDTH              1
+#define XP_ECC_ISR_DESC_SEC_INDEX              1
+#define XP_ECC_ISR_DESC_SEC_WIDTH              1
+#define XP_ECC_ISR_RX_DED_INDEX                        2
+#define XP_ECC_ISR_RX_DED_WIDTH                        1
+#define XP_ECC_ISR_RX_SEC_INDEX                        3
+#define XP_ECC_ISR_RX_SEC_WIDTH                        1
+#define XP_ECC_ISR_TX_DED_INDEX                        4
+#define XP_ECC_ISR_TX_DED_WIDTH                        1
+#define XP_ECC_ISR_TX_SEC_INDEX                        5
+#define XP_ECC_ISR_TX_SEC_WIDTH                        1
+#define XP_I2C_MUTEX_BUSY_INDEX                        31
+#define XP_I2C_MUTEX_BUSY_WIDTH                        1
+#define XP_I2C_MUTEX_ID_INDEX                  29
+#define XP_I2C_MUTEX_ID_WIDTH                  2
+#define XP_I2C_MUTEX_ACTIVE_INDEX              0
+#define XP_I2C_MUTEX_ACTIVE_WIDTH              1
+#define XP_MAC_ADDR_HI_VALID_INDEX             31
+#define XP_MAC_ADDR_HI_VALID_WIDTH             1
+#define XP_PROP_0_CONN_TYPE_INDEX              28
+#define XP_PROP_0_CONN_TYPE_WIDTH              3
+#define XP_PROP_0_MDIO_ADDR_INDEX              16
+#define XP_PROP_0_MDIO_ADDR_WIDTH              5
+#define XP_PROP_0_PORT_ID_INDEX                        0
+#define XP_PROP_0_PORT_ID_WIDTH                        8
+#define XP_PROP_0_PORT_MODE_INDEX              8
+#define XP_PROP_0_PORT_MODE_WIDTH              4
+#define XP_PROP_0_PORT_SPEEDS_INDEX            23
+#define XP_PROP_0_PORT_SPEEDS_WIDTH            4
+#define XP_PROP_1_MAX_RX_DMA_INDEX             24
+#define XP_PROP_1_MAX_RX_DMA_WIDTH             5
+#define XP_PROP_1_MAX_RX_QUEUES_INDEX          8
+#define XP_PROP_1_MAX_RX_QUEUES_WIDTH          5
+#define XP_PROP_1_MAX_TX_DMA_INDEX             16
+#define XP_PROP_1_MAX_TX_DMA_WIDTH             5
+#define XP_PROP_1_MAX_TX_QUEUES_INDEX          0
+#define XP_PROP_1_MAX_TX_QUEUES_WIDTH          5
+#define XP_PROP_2_RX_FIFO_SIZE_INDEX           16
+#define XP_PROP_2_RX_FIFO_SIZE_WIDTH           16
+#define XP_PROP_2_TX_FIFO_SIZE_INDEX           0
+#define XP_PROP_2_TX_FIFO_SIZE_WIDTH           16
+#define XP_PROP_3_GPIO_MASK_INDEX              28
+#define XP_PROP_3_GPIO_MASK_WIDTH              4
+#define XP_PROP_3_GPIO_MOD_ABS_INDEX           20
+#define XP_PROP_3_GPIO_MOD_ABS_WIDTH           4
+#define XP_PROP_3_GPIO_RATE_SELECT_INDEX       16
+#define XP_PROP_3_GPIO_RATE_SELECT_WIDTH       4
+#define XP_PROP_3_GPIO_RX_LOS_INDEX            24
+#define XP_PROP_3_GPIO_RX_LOS_WIDTH            4
+#define XP_PROP_3_GPIO_TX_FAULT_INDEX          12
+#define XP_PROP_3_GPIO_TX_FAULT_WIDTH          4
+#define XP_PROP_3_GPIO_ADDR_INDEX              8
+#define XP_PROP_3_GPIO_ADDR_WIDTH              3
+#define XP_PROP_3_MDIO_RESET_INDEX             0
+#define XP_PROP_3_MDIO_RESET_WIDTH             2
+#define XP_PROP_3_MDIO_RESET_I2C_ADDR_INDEX    8
+#define XP_PROP_3_MDIO_RESET_I2C_ADDR_WIDTH    3
+#define XP_PROP_3_MDIO_RESET_I2C_GPIO_INDEX    12
+#define XP_PROP_3_MDIO_RESET_I2C_GPIO_WIDTH    4
+#define XP_PROP_3_MDIO_RESET_INT_GPIO_INDEX    4
+#define XP_PROP_3_MDIO_RESET_INT_GPIO_WIDTH    2
+#define XP_PROP_4_MUX_ADDR_HI_INDEX            8
+#define XP_PROP_4_MUX_ADDR_HI_WIDTH            5
+#define XP_PROP_4_MUX_ADDR_LO_INDEX            0
+#define XP_PROP_4_MUX_ADDR_LO_WIDTH            3
+#define XP_PROP_4_MUX_CHAN_INDEX               4
+#define XP_PROP_4_MUX_CHAN_WIDTH               3
+#define XP_PROP_4_REDRV_ADDR_INDEX             16
+#define XP_PROP_4_REDRV_ADDR_WIDTH             7
+#define XP_PROP_4_REDRV_IF_INDEX               23
+#define XP_PROP_4_REDRV_IF_WIDTH               1
+#define XP_PROP_4_REDRV_LANE_INDEX             24
+#define XP_PROP_4_REDRV_LANE_WIDTH             3
+#define XP_PROP_4_REDRV_MODEL_INDEX            28
+#define XP_PROP_4_REDRV_MODEL_WIDTH            3
+#define XP_PROP_4_REDRV_PRESENT_INDEX          31
+#define XP_PROP_4_REDRV_PRESENT_WIDTH          1
+
+/* I2C Control register offsets */
+#define IC_CON                                 0x0000
+#define IC_TAR                                 0x0004
+#define IC_DATA_CMD                            0x0010
+#define IC_INTR_STAT                           0x002c
+#define IC_INTR_MASK                           0x0030
+#define IC_RAW_INTR_STAT                       0x0034
+#define IC_CLR_INTR                            0x0040
+#define IC_CLR_TX_ABRT                         0x0054
+#define IC_CLR_STOP_DET                                0x0060
+#define IC_ENABLE                              0x006c
+#define IC_TXFLR                               0x0074
+#define IC_RXFLR                               0x0078
+#define IC_TX_ABRT_SOURCE                      0x0080
+#define IC_ENABLE_STATUS                       0x009c
+#define IC_COMP_PARAM_1                                0x00f4
+
+/* I2C Control register entry bit positions and sizes */
+#define IC_COMP_PARAM_1_MAX_SPEED_MODE_INDEX   2
+#define IC_COMP_PARAM_1_MAX_SPEED_MODE_WIDTH   2
+#define IC_COMP_PARAM_1_RX_BUFFER_DEPTH_INDEX  8
+#define IC_COMP_PARAM_1_RX_BUFFER_DEPTH_WIDTH  8
+#define IC_COMP_PARAM_1_TX_BUFFER_DEPTH_INDEX  16
+#define IC_COMP_PARAM_1_TX_BUFFER_DEPTH_WIDTH  8
+#define IC_CON_MASTER_MODE_INDEX               0
+#define IC_CON_MASTER_MODE_WIDTH               1
+#define IC_CON_RESTART_EN_INDEX                        5
+#define IC_CON_RESTART_EN_WIDTH                        1
+#define IC_CON_RX_FIFO_FULL_HOLD_INDEX         9
+#define IC_CON_RX_FIFO_FULL_HOLD_WIDTH         1
+#define IC_CON_SLAVE_DISABLE_INDEX             6
+#define IC_CON_SLAVE_DISABLE_WIDTH             1
+#define IC_CON_SPEED_INDEX                     1
+#define IC_CON_SPEED_WIDTH                     2
+#define IC_DATA_CMD_CMD_INDEX                  8
+#define IC_DATA_CMD_CMD_WIDTH                  1
+#define IC_DATA_CMD_STOP_INDEX                 9
+#define IC_DATA_CMD_STOP_WIDTH                 1
+#define IC_ENABLE_ABORT_INDEX                  1
+#define IC_ENABLE_ABORT_WIDTH                  1
+#define IC_ENABLE_EN_INDEX                     0
+#define IC_ENABLE_EN_WIDTH                     1
+#define IC_ENABLE_STATUS_EN_INDEX              0
+#define IC_ENABLE_STATUS_EN_WIDTH              1
+#define IC_INTR_MASK_TX_EMPTY_INDEX            4
+#define IC_INTR_MASK_TX_EMPTY_WIDTH            1
+#define IC_RAW_INTR_STAT_RX_FULL_INDEX         2
+#define IC_RAW_INTR_STAT_RX_FULL_WIDTH         1
+#define IC_RAW_INTR_STAT_STOP_DET_INDEX                9
+#define IC_RAW_INTR_STAT_STOP_DET_WIDTH                1
+#define IC_RAW_INTR_STAT_TX_ABRT_INDEX         6
+#define IC_RAW_INTR_STAT_TX_ABRT_WIDTH         1
+#define IC_RAW_INTR_STAT_TX_EMPTY_INDEX                4
+#define IC_RAW_INTR_STAT_TX_EMPTY_WIDTH                1
+
+/* I2C Control register value */
+#define IC_TX_ABRT_7B_ADDR_NOACK               0x0001
+#define IC_TX_ABRT_ARB_LOST                    0x1000
+
 /* Descriptor/Packet entry bit positions and sizes */
 #define RX_PACKET_ERRORS_CRC_INDEX             2
 #define RX_PACKET_ERRORS_CRC_WIDTH             1
 #define MDIO_PMA_10GBR_FECCTRL         0x00ab
 #endif
 
+#ifndef MDIO_PCS_DIG_CTRL
+#define MDIO_PCS_DIG_CTRL              0x8000
+#endif
+
 #ifndef MDIO_AN_XNP
 #define MDIO_AN_XNP                    0x0016
 #endif
 #define MDIO_AN_INT                    0x8002
 #endif
 
+#ifndef MDIO_VEND2_AN_ADVERTISE
+#define MDIO_VEND2_AN_ADVERTISE                0x0004
+#endif
+
+#ifndef MDIO_VEND2_AN_LP_ABILITY
+#define MDIO_VEND2_AN_LP_ABILITY       0x0005
+#endif
+
+#ifndef MDIO_VEND2_AN_CTRL
+#define MDIO_VEND2_AN_CTRL             0x8001
+#endif
+
+#ifndef MDIO_VEND2_AN_STAT
+#define MDIO_VEND2_AN_STAT             0x8002
+#endif
+
 #ifndef MDIO_CTRL1_SPEED1G
 #define MDIO_CTRL1_SPEED1G             (MDIO_CTRL1_SPEED10G & ~BMCR_SPEED100)
 #endif
 
+#ifndef MDIO_VEND2_CTRL1_AN_ENABLE
+#define MDIO_VEND2_CTRL1_AN_ENABLE     BIT(12)
+#endif
+
+#ifndef MDIO_VEND2_CTRL1_AN_RESTART
+#define MDIO_VEND2_CTRL1_AN_RESTART    BIT(9)
+#endif
+
+#ifndef MDIO_VEND2_CTRL1_SS6
+#define MDIO_VEND2_CTRL1_SS6           BIT(6)
+#endif
+
+#ifndef MDIO_VEND2_CTRL1_SS13
+#define MDIO_VEND2_CTRL1_SS13          BIT(13)
+#endif
+
 /* MDIO mask values */
+#define XGBE_AN_CL73_INT_CMPLT         BIT(0)
+#define XGBE_AN_CL73_INC_LINK          BIT(1)
+#define XGBE_AN_CL73_PG_RCV            BIT(2)
+#define XGBE_AN_CL73_INT_MASK          0x07
+
 #define XGBE_XNP_MCF_NULL_MESSAGE      0x001
 #define XGBE_XNP_ACK_PROCESSED         BIT(12)
 #define XGBE_XNP_MP_FORMATTED          BIT(13)
 #define XGBE_KR_TRAINING_START         BIT(0)
 #define XGBE_KR_TRAINING_ENABLE                BIT(1)
 
+#define XGBE_PCS_CL37_BP               BIT(12)
+
+#define XGBE_AN_CL37_INT_CMPLT         BIT(0)
+#define XGBE_AN_CL37_INT_MASK          0x01
+
+#define XGBE_AN_CL37_HD_MASK           0x40
+#define XGBE_AN_CL37_FD_MASK           0x20
+
+#define XGBE_AN_CL37_PCS_MODE_MASK     0x06
+#define XGBE_AN_CL37_PCS_MODE_BASEX    0x00
+#define XGBE_AN_CL37_PCS_MODE_SGMII    0x04
+#define XGBE_AN_CL37_TX_CONFIG_MASK    0x08
+
 /* Bit setting and getting macros
  *  The get macro will extract the current bit field value from within
  *  the variable
@@ -1195,12 +1478,28 @@ do {                                                                    \
 /* Macros for building, reading or writing register values or bits
  * within the register values of XPCS registers.
  */
-#define XPCS_IOWRITE(_pdata, _off, _val)                               \
+#define XPCS_GET_BITS(_var, _prefix, _field)                           \
+       GET_BITS((_var),                                                \
+                _prefix##_##_field##_INDEX,                            \
+                _prefix##_##_field##_WIDTH)
+
+#define XPCS_SET_BITS(_var, _prefix, _field, _val)                      \
+       SET_BITS((_var),                                                \
+                _prefix##_##_field##_INDEX,                            \
+                _prefix##_##_field##_WIDTH, (_val))
+
+#define XPCS32_IOWRITE(_pdata, _off, _val)                             \
        iowrite32(_val, (_pdata)->xpcs_regs + (_off))
 
-#define XPCS_IOREAD(_pdata, _off)                                      \
+#define XPCS32_IOREAD(_pdata, _off)                                    \
        ioread32((_pdata)->xpcs_regs + (_off))
 
+#define XPCS16_IOWRITE(_pdata, _off, _val)                             \
+       iowrite16(_val, (_pdata)->xpcs_regs + (_off))
+
+#define XPCS16_IOREAD(_pdata, _off)                                    \
+       ioread16((_pdata)->xpcs_regs + (_off))
+
 /* Macros for building, reading or writing register values or bits
  * within the register values of SerDes integration registers.
  */
@@ -1277,6 +1576,72 @@ do {                                                                     \
        XRXTX_IOWRITE((_pdata), _reg, reg_val);                         \
 } while (0)
 
+/* Macros for building, reading or writing register values or bits
+ * within the register values of MAC Control registers.
+ */
+#define XP_GET_BITS(_var, _prefix, _field)                             \
+       GET_BITS((_var),                                                \
+                _prefix##_##_field##_INDEX,                            \
+                _prefix##_##_field##_WIDTH)
+
+#define XP_SET_BITS(_var, _prefix, _field, _val)                       \
+       SET_BITS((_var),                                                \
+                _prefix##_##_field##_INDEX,                            \
+                _prefix##_##_field##_WIDTH, (_val))
+
+#define XP_IOREAD(_pdata, _reg)                                                \
+       ioread32((_pdata)->xprop_regs + (_reg))
+
+#define XP_IOREAD_BITS(_pdata, _reg, _field)                           \
+       GET_BITS(XP_IOREAD((_pdata), (_reg)),                           \
+                _reg##_##_field##_INDEX,                               \
+                _reg##_##_field##_WIDTH)
+
+#define XP_IOWRITE(_pdata, _reg, _val)                                 \
+       iowrite32((_val), (_pdata)->xprop_regs + (_reg))
+
+#define XP_IOWRITE_BITS(_pdata, _reg, _field, _val)                    \
+do {                                                                   \
+       u32 reg_val = XP_IOREAD((_pdata), (_reg));                      \
+       SET_BITS(reg_val,                                               \
+                _reg##_##_field##_INDEX,                               \
+                _reg##_##_field##_WIDTH, (_val));                      \
+       XP_IOWRITE((_pdata), (_reg), reg_val);                          \
+} while (0)
+
+/* Macros for building, reading or writing register values or bits
+ * within the register values of I2C Control registers.
+ */
+#define XI2C_GET_BITS(_var, _prefix, _field)                           \
+       GET_BITS((_var),                                                \
+                _prefix##_##_field##_INDEX,                            \
+                _prefix##_##_field##_WIDTH)
+
+#define XI2C_SET_BITS(_var, _prefix, _field, _val)                     \
+       SET_BITS((_var),                                                \
+                _prefix##_##_field##_INDEX,                            \
+                _prefix##_##_field##_WIDTH, (_val))
+
+#define XI2C_IOREAD(_pdata, _reg)                                      \
+       ioread32((_pdata)->xi2c_regs + (_reg))
+
+#define XI2C_IOREAD_BITS(_pdata, _reg, _field)                         \
+       GET_BITS(XI2C_IOREAD((_pdata), (_reg)),                         \
+                _reg##_##_field##_INDEX,                               \
+                _reg##_##_field##_WIDTH)
+
+#define XI2C_IOWRITE(_pdata, _reg, _val)                               \
+       iowrite32((_val), (_pdata)->xi2c_regs + (_reg))
+
+#define XI2C_IOWRITE_BITS(_pdata, _reg, _field, _val)                  \
+do {                                                                   \
+       u32 reg_val = XI2C_IOREAD((_pdata), (_reg));                    \
+       SET_BITS(reg_val,                                               \
+                _reg##_##_field##_INDEX,                               \
+                _reg##_##_field##_WIDTH, (_val));                      \
+       XI2C_IOWRITE((_pdata), (_reg), reg_val);                        \
+} while (0)
+
 /* Macros for building, reading or writing register values or bits
  * using MDIO.  Different from above because of the use of standardized
  * Linux include values.  No shifting is performed with the bit
index 96f485ab612e679dc7065b1e214cb9d73c690d43..7546b660d6b52a2e0138ea667ad6a733ba185325 100644 (file)
@@ -153,7 +153,7 @@ static ssize_t xgbe_common_write(const char __user *buffer, size_t count,
        int ret;
 
        if (*ppos != 0)
-               return 0;
+               return -EINVAL;
 
        if (count >= sizeof(workarea))
                return -ENOSPC;
@@ -316,6 +316,126 @@ static const struct file_operations xpcs_reg_value_fops = {
        .write = xpcs_reg_value_write,
 };
 
+static ssize_t xprop_reg_addr_read(struct file *filp, char __user *buffer,
+                                  size_t count, loff_t *ppos)
+{
+       struct xgbe_prv_data *pdata = filp->private_data;
+
+       return xgbe_common_read(buffer, count, ppos, pdata->debugfs_xprop_reg);
+}
+
+static ssize_t xprop_reg_addr_write(struct file *filp,
+                                   const char __user *buffer,
+                                   size_t count, loff_t *ppos)
+{
+       struct xgbe_prv_data *pdata = filp->private_data;
+
+       return xgbe_common_write(buffer, count, ppos,
+                                &pdata->debugfs_xprop_reg);
+}
+
+static ssize_t xprop_reg_value_read(struct file *filp, char __user *buffer,
+                                   size_t count, loff_t *ppos)
+{
+       struct xgbe_prv_data *pdata = filp->private_data;
+       unsigned int value;
+
+       value = XP_IOREAD(pdata, pdata->debugfs_xprop_reg);
+
+       return xgbe_common_read(buffer, count, ppos, value);
+}
+
+static ssize_t xprop_reg_value_write(struct file *filp,
+                                    const char __user *buffer,
+                                    size_t count, loff_t *ppos)
+{
+       struct xgbe_prv_data *pdata = filp->private_data;
+       unsigned int value;
+       ssize_t len;
+
+       len = xgbe_common_write(buffer, count, ppos, &value);
+       if (len < 0)
+               return len;
+
+       XP_IOWRITE(pdata, pdata->debugfs_xprop_reg, value);
+
+       return len;
+}
+
+static const struct file_operations xprop_reg_addr_fops = {
+       .owner = THIS_MODULE,
+       .open = simple_open,
+       .read =  xprop_reg_addr_read,
+       .write = xprop_reg_addr_write,
+};
+
+static const struct file_operations xprop_reg_value_fops = {
+       .owner = THIS_MODULE,
+       .open = simple_open,
+       .read =  xprop_reg_value_read,
+       .write = xprop_reg_value_write,
+};
+
+static ssize_t xi2c_reg_addr_read(struct file *filp, char __user *buffer,
+                                 size_t count, loff_t *ppos)
+{
+       struct xgbe_prv_data *pdata = filp->private_data;
+
+       return xgbe_common_read(buffer, count, ppos, pdata->debugfs_xi2c_reg);
+}
+
+static ssize_t xi2c_reg_addr_write(struct file *filp,
+                                  const char __user *buffer,
+                                  size_t count, loff_t *ppos)
+{
+       struct xgbe_prv_data *pdata = filp->private_data;
+
+       return xgbe_common_write(buffer, count, ppos,
+                                &pdata->debugfs_xi2c_reg);
+}
+
+static ssize_t xi2c_reg_value_read(struct file *filp, char __user *buffer,
+                                  size_t count, loff_t *ppos)
+{
+       struct xgbe_prv_data *pdata = filp->private_data;
+       unsigned int value;
+
+       value = XI2C_IOREAD(pdata, pdata->debugfs_xi2c_reg);
+
+       return xgbe_common_read(buffer, count, ppos, value);
+}
+
+static ssize_t xi2c_reg_value_write(struct file *filp,
+                                   const char __user *buffer,
+                                   size_t count, loff_t *ppos)
+{
+       struct xgbe_prv_data *pdata = filp->private_data;
+       unsigned int value;
+       ssize_t len;
+
+       len = xgbe_common_write(buffer, count, ppos, &value);
+       if (len < 0)
+               return len;
+
+       XI2C_IOWRITE(pdata, pdata->debugfs_xi2c_reg, value);
+
+       return len;
+}
+
+static const struct file_operations xi2c_reg_addr_fops = {
+       .owner = THIS_MODULE,
+       .open = simple_open,
+       .read =  xi2c_reg_addr_read,
+       .write = xi2c_reg_addr_write,
+};
+
+static const struct file_operations xi2c_reg_value_fops = {
+       .owner = THIS_MODULE,
+       .open = simple_open,
+       .read =  xi2c_reg_value_read,
+       .write = xi2c_reg_value_write,
+};
+
 void xgbe_debugfs_init(struct xgbe_prv_data *pdata)
 {
        struct dentry *pfile;
@@ -367,6 +487,38 @@ void xgbe_debugfs_init(struct xgbe_prv_data *pdata)
        if (!pfile)
                netdev_err(pdata->netdev, "debugfs_create_file failed\n");
 
+       if (pdata->xprop_regs) {
+               pfile = debugfs_create_file("xprop_register", 0600,
+                                           pdata->xgbe_debugfs, pdata,
+                                           &xprop_reg_addr_fops);
+               if (!pfile)
+                       netdev_err(pdata->netdev,
+                                  "debugfs_create_file failed\n");
+
+               pfile = debugfs_create_file("xprop_register_value", 0600,
+                                           pdata->xgbe_debugfs, pdata,
+                                           &xprop_reg_value_fops);
+               if (!pfile)
+                       netdev_err(pdata->netdev,
+                                  "debugfs_create_file failed\n");
+       }
+
+       if (pdata->xi2c_regs) {
+               pfile = debugfs_create_file("xi2c_register", 0600,
+                                           pdata->xgbe_debugfs, pdata,
+                                           &xi2c_reg_addr_fops);
+               if (!pfile)
+                       netdev_err(pdata->netdev,
+                                  "debugfs_create_file failed\n");
+
+               pfile = debugfs_create_file("xi2c_register_value", 0600,
+                                           pdata->xgbe_debugfs, pdata,
+                                           &xi2c_reg_value_fops);
+               if (!pfile)
+                       netdev_err(pdata->netdev,
+                                  "debugfs_create_file failed\n");
+       }
+
        kfree(buf);
 }
 
index 1babcc11a2488b57a57fb9eec8b0d86638ff5df4..aaf0350076a90a1dcd91502b9ce8e4a81cff7174 100644 (file)
 #include "xgbe.h"
 #include "xgbe-common.h"
 
+static inline unsigned int xgbe_get_max_frame(struct xgbe_prv_data *pdata)
+{
+       return pdata->netdev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
+}
+
 static unsigned int xgbe_usec_to_riwt(struct xgbe_prv_data *pdata,
                                      unsigned int usec)
 {
@@ -491,6 +496,27 @@ static void xgbe_config_rss(struct xgbe_prv_data *pdata)
                           "error configuring RSS, RSS disabled\n");
 }
 
+static bool xgbe_is_pfc_queue(struct xgbe_prv_data *pdata,
+                             unsigned int queue)
+{
+       unsigned int prio, tc;
+
+       for (prio = 0; prio < IEEE_8021QAZ_MAX_TCS; prio++) {
+               /* Does this queue handle the priority? */
+               if (pdata->prio2q_map[prio] != queue)
+                       continue;
+
+               /* Get the Traffic Class for this priority */
+               tc = pdata->ets->prio_tc[prio];
+
+               /* Check if PFC is enabled for this traffic class */
+               if (pdata->pfc->pfc_en & (1 << tc))
+                       return true;
+       }
+
+       return false;
+}
+
 static int xgbe_disable_tx_flow_control(struct xgbe_prv_data *pdata)
 {
        unsigned int max_q_count, q_count;
@@ -528,27 +554,14 @@ static int xgbe_enable_tx_flow_control(struct xgbe_prv_data *pdata)
        for (i = 0; i < pdata->rx_q_count; i++) {
                unsigned int ehfc = 0;
 
-               if (pfc && ets) {
-                       unsigned int prio;
-
-                       for (prio = 0; prio < IEEE_8021QAZ_MAX_TCS; prio++) {
-                               unsigned int tc;
-
-                               /* Does this queue handle the priority? */
-                               if (pdata->prio2q_map[prio] != i)
-                                       continue;
-
-                               /* Get the Traffic Class for this priority */
-                               tc = ets->prio_tc[prio];
-
-                               /* Check if flow control should be enabled */
-                               if (pfc->pfc_en & (1 << tc)) {
+               if (pdata->rx_rfd[i]) {
+                       /* Flow control thresholds are established */
+                       if (pfc && ets) {
+                               if (xgbe_is_pfc_queue(pdata, i))
                                        ehfc = 1;
-                                       break;
-                               }
+                       } else {
+                               ehfc = 1;
                        }
-               } else {
-                       ehfc = 1;
                }
 
                XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_RQOMR, EHFC, ehfc);
@@ -633,6 +646,11 @@ static void xgbe_enable_dma_interrupts(struct xgbe_prv_data *pdata)
        unsigned int dma_ch_isr, dma_ch_ier;
        unsigned int i;
 
+       /* Set the interrupt mode if supported */
+       if (pdata->channel_irq_mode)
+               XGMAC_IOWRITE_BITS(pdata, DMA_MR, INTM,
+                                  pdata->channel_irq_mode);
+
        channel = pdata->channel;
        for (i = 0; i < pdata->channel_count; i++, channel++) {
                /* Clear all the interrupts which are set */
@@ -654,19 +672,21 @@ static void xgbe_enable_dma_interrupts(struct xgbe_prv_data *pdata)
                if (channel->tx_ring) {
                        /* Enable the following Tx interrupts
                         *   TIE  - Transmit Interrupt Enable (unless using
-                        *          per channel interrupts)
+                        *          per channel interrupts in edge triggered
+                        *          mode)
                         */
-                       if (!pdata->per_channel_irq)
+                       if (!pdata->per_channel_irq || pdata->channel_irq_mode)
                                XGMAC_SET_BITS(dma_ch_ier, DMA_CH_IER, TIE, 1);
                }
                if (channel->rx_ring) {
                        /* Enable following Rx interrupts
                         *   RBUE - Receive Buffer Unavailable Enable
                         *   RIE  - Receive Interrupt Enable (unless using
-                        *          per channel interrupts)
+                        *          per channel interrupts in edge triggered
+                        *          mode)
                         */
                        XGMAC_SET_BITS(dma_ch_ier, DMA_CH_IER, RBUE, 1);
-                       if (!pdata->per_channel_irq)
+                       if (!pdata->per_channel_irq || pdata->channel_irq_mode)
                                XGMAC_SET_BITS(dma_ch_ier, DMA_CH_IER, RIE, 1);
                }
 
@@ -702,34 +722,90 @@ static void xgbe_enable_mac_interrupts(struct xgbe_prv_data *pdata)
        /* Enable all counter interrupts */
        XGMAC_IOWRITE_BITS(pdata, MMC_RIER, ALL_INTERRUPTS, 0xffffffff);
        XGMAC_IOWRITE_BITS(pdata, MMC_TIER, ALL_INTERRUPTS, 0xffffffff);
+
+       /* Enable MDIO single command completion interrupt */
+       XGMAC_IOWRITE_BITS(pdata, MAC_MDIOIER, SNGLCOMPIE, 1);
 }
 
-static int xgbe_set_gmii_speed(struct xgbe_prv_data *pdata)
+static void xgbe_enable_ecc_interrupts(struct xgbe_prv_data *pdata)
 {
-       if (XGMAC_IOREAD_BITS(pdata, MAC_TCR, SS) == 0x3)
-               return 0;
+       unsigned int ecc_isr, ecc_ier = 0;
 
-       XGMAC_IOWRITE_BITS(pdata, MAC_TCR, SS, 0x3);
+       if (!pdata->vdata->ecc_support)
+               return;
 
-       return 0;
+       /* Clear all the interrupts which are set */
+       ecc_isr = XP_IOREAD(pdata, XP_ECC_ISR);
+       XP_IOWRITE(pdata, XP_ECC_ISR, ecc_isr);
+
+       /* Enable ECC interrupts */
+       XP_SET_BITS(ecc_ier, XP_ECC_IER, TX_DED, 1);
+       XP_SET_BITS(ecc_ier, XP_ECC_IER, TX_SEC, 1);
+       XP_SET_BITS(ecc_ier, XP_ECC_IER, RX_DED, 1);
+       XP_SET_BITS(ecc_ier, XP_ECC_IER, RX_SEC, 1);
+       XP_SET_BITS(ecc_ier, XP_ECC_IER, DESC_DED, 1);
+       XP_SET_BITS(ecc_ier, XP_ECC_IER, DESC_SEC, 1);
+
+       XP_IOWRITE(pdata, XP_ECC_IER, ecc_ier);
 }
 
-static int xgbe_set_gmii_2500_speed(struct xgbe_prv_data *pdata)
+static void xgbe_disable_ecc_ded(struct xgbe_prv_data *pdata)
 {
-       if (XGMAC_IOREAD_BITS(pdata, MAC_TCR, SS) == 0x2)
-               return 0;
+       unsigned int ecc_ier;
 
-       XGMAC_IOWRITE_BITS(pdata, MAC_TCR, SS, 0x2);
+       ecc_ier = XP_IOREAD(pdata, XP_ECC_IER);
 
-       return 0;
+       /* Disable ECC DED interrupts */
+       XP_SET_BITS(ecc_ier, XP_ECC_IER, TX_DED, 0);
+       XP_SET_BITS(ecc_ier, XP_ECC_IER, RX_DED, 0);
+       XP_SET_BITS(ecc_ier, XP_ECC_IER, DESC_DED, 0);
+
+       XP_IOWRITE(pdata, XP_ECC_IER, ecc_ier);
 }
 
-static int xgbe_set_xgmii_speed(struct xgbe_prv_data *pdata)
+static void xgbe_disable_ecc_sec(struct xgbe_prv_data *pdata,
+                                enum xgbe_ecc_sec sec)
 {
-       if (XGMAC_IOREAD_BITS(pdata, MAC_TCR, SS) == 0)
-               return 0;
+       unsigned int ecc_ier;
+
+       ecc_ier = XP_IOREAD(pdata, XP_ECC_IER);
+
+       /* Disable ECC SEC interrupt */
+       switch (sec) {
+       case XGBE_ECC_SEC_TX:
+       XP_SET_BITS(ecc_ier, XP_ECC_IER, TX_SEC, 0);
+               break;
+       case XGBE_ECC_SEC_RX:
+       XP_SET_BITS(ecc_ier, XP_ECC_IER, RX_SEC, 0);
+               break;
+       case XGBE_ECC_SEC_DESC:
+       XP_SET_BITS(ecc_ier, XP_ECC_IER, DESC_SEC, 0);
+               break;
+       }
+
+       XP_IOWRITE(pdata, XP_ECC_IER, ecc_ier);
+}
+
+static int xgbe_set_speed(struct xgbe_prv_data *pdata, int speed)
+{
+       unsigned int ss;
+
+       switch (speed) {
+       case SPEED_1000:
+               ss = 0x03;
+               break;
+       case SPEED_2500:
+               ss = 0x02;
+               break;
+       case SPEED_10000:
+               ss = 0x00;
+               break;
+       default:
+               return -EINVAL;
+       }
 
-       XGMAC_IOWRITE_BITS(pdata, MAC_TCR, SS, 0);
+       if (XGMAC_IOREAD_BITS(pdata, MAC_TCR, SS) != ss)
+               XGMAC_IOWRITE_BITS(pdata, MAC_TCR, SS, ss);
 
        return 0;
 }
@@ -1019,8 +1095,101 @@ static int xgbe_config_rx_mode(struct xgbe_prv_data *pdata)
        return 0;
 }
 
-static int xgbe_read_mmd_regs(struct xgbe_prv_data *pdata, int prtad,
-                             int mmd_reg)
+static int xgbe_clr_gpio(struct xgbe_prv_data *pdata, unsigned int gpio)
+{
+       unsigned int reg;
+
+       if (gpio > 15)
+               return -EINVAL;
+
+       reg = XGMAC_IOREAD(pdata, MAC_GPIOSR);
+
+       reg &= ~(1 << (gpio + 16));
+       XGMAC_IOWRITE(pdata, MAC_GPIOSR, reg);
+
+       return 0;
+}
+
+static int xgbe_set_gpio(struct xgbe_prv_data *pdata, unsigned int gpio)
+{
+       unsigned int reg;
+
+       if (gpio > 15)
+               return -EINVAL;
+
+       reg = XGMAC_IOREAD(pdata, MAC_GPIOSR);
+
+       reg |= (1 << (gpio + 16));
+       XGMAC_IOWRITE(pdata, MAC_GPIOSR, reg);
+
+       return 0;
+}
+
+static int xgbe_read_mmd_regs_v2(struct xgbe_prv_data *pdata, int prtad,
+                                int mmd_reg)
+{
+       unsigned long flags;
+       unsigned int mmd_address, index, offset;
+       int mmd_data;
+
+       if (mmd_reg & MII_ADDR_C45)
+               mmd_address = mmd_reg & ~MII_ADDR_C45;
+       else
+               mmd_address = (pdata->mdio_mmd << 16) | (mmd_reg & 0xffff);
+
+       /* The PCS registers are accessed using mmio. The underlying
+        * management interface uses indirect addressing to access the MMD
+        * register sets. This requires accessing of the PCS register in two
+        * phases, an address phase and a data phase.
+        *
+        * The mmio interface is based on 16-bit offsets and values. All
+        * register offsets must therefore be adjusted by left shifting the
+        * offset 1 bit and reading 16 bits of data.
+        */
+       mmd_address <<= 1;
+       index = mmd_address & ~pdata->xpcs_window_mask;
+       offset = pdata->xpcs_window + (mmd_address & pdata->xpcs_window_mask);
+
+       spin_lock_irqsave(&pdata->xpcs_lock, flags);
+       XPCS32_IOWRITE(pdata, PCS_V2_WINDOW_SELECT, index);
+       mmd_data = XPCS16_IOREAD(pdata, offset);
+       spin_unlock_irqrestore(&pdata->xpcs_lock, flags);
+
+       return mmd_data;
+}
+
+static void xgbe_write_mmd_regs_v2(struct xgbe_prv_data *pdata, int prtad,
+                                  int mmd_reg, int mmd_data)
+{
+       unsigned long flags;
+       unsigned int mmd_address, index, offset;
+
+       if (mmd_reg & MII_ADDR_C45)
+               mmd_address = mmd_reg & ~MII_ADDR_C45;
+       else
+               mmd_address = (pdata->mdio_mmd << 16) | (mmd_reg & 0xffff);
+
+       /* The PCS registers are accessed using mmio. The underlying
+        * management interface uses indirect addressing to access the MMD
+        * register sets. This requires accessing of the PCS register in two
+        * phases, an address phase and a data phase.
+        *
+        * The mmio interface is based on 16-bit offsets and values. All
+        * register offsets must therefore be adjusted by left shifting the
+        * offset 1 bit and writing 16 bits of data.
+        */
+       mmd_address <<= 1;
+       index = mmd_address & ~pdata->xpcs_window_mask;
+       offset = pdata->xpcs_window + (mmd_address & pdata->xpcs_window_mask);
+
+       spin_lock_irqsave(&pdata->xpcs_lock, flags);
+       XPCS32_IOWRITE(pdata, PCS_V2_WINDOW_SELECT, index);
+       XPCS16_IOWRITE(pdata, offset, mmd_data);
+       spin_unlock_irqrestore(&pdata->xpcs_lock, flags);
+}
+
+static int xgbe_read_mmd_regs_v1(struct xgbe_prv_data *pdata, int prtad,
+                                int mmd_reg)
 {
        unsigned long flags;
        unsigned int mmd_address;
@@ -1041,15 +1210,15 @@ static int xgbe_read_mmd_regs(struct xgbe_prv_data *pdata, int prtad,
         * offset 2 bits and reading 32 bits of data.
         */
        spin_lock_irqsave(&pdata->xpcs_lock, flags);
-       XPCS_IOWRITE(pdata, PCS_MMD_SELECT << 2, mmd_address >> 8);
-       mmd_data = XPCS_IOREAD(pdata, (mmd_address & 0xff) << 2);
+       XPCS32_IOWRITE(pdata, PCS_V1_WINDOW_SELECT, mmd_address >> 8);
+       mmd_data = XPCS32_IOREAD(pdata, (mmd_address & 0xff) << 2);
        spin_unlock_irqrestore(&pdata->xpcs_lock, flags);
 
        return mmd_data;
 }
 
-static void xgbe_write_mmd_regs(struct xgbe_prv_data *pdata, int prtad,
-                               int mmd_reg, int mmd_data)
+static void xgbe_write_mmd_regs_v1(struct xgbe_prv_data *pdata, int prtad,
+                                  int mmd_reg, int mmd_data)
 {
        unsigned int mmd_address;
        unsigned long flags;
@@ -1066,14 +1235,113 @@ static void xgbe_write_mmd_regs(struct xgbe_prv_data *pdata, int prtad,
         *
         * The mmio interface is based on 32-bit offsets and values. All
         * register offsets must therefore be adjusted by left shifting the
-        * offset 2 bits and reading 32 bits of data.
+        * offset 2 bits and writing 32 bits of data.
         */
        spin_lock_irqsave(&pdata->xpcs_lock, flags);
-       XPCS_IOWRITE(pdata, PCS_MMD_SELECT << 2, mmd_address >> 8);
-       XPCS_IOWRITE(pdata, (mmd_address & 0xff) << 2, mmd_data);
+       XPCS32_IOWRITE(pdata, PCS_V1_WINDOW_SELECT, mmd_address >> 8);
+       XPCS32_IOWRITE(pdata, (mmd_address & 0xff) << 2, mmd_data);
        spin_unlock_irqrestore(&pdata->xpcs_lock, flags);
 }
 
+static int xgbe_read_mmd_regs(struct xgbe_prv_data *pdata, int prtad,
+                             int mmd_reg)
+{
+       switch (pdata->vdata->xpcs_access) {
+       case XGBE_XPCS_ACCESS_V1:
+               return xgbe_read_mmd_regs_v1(pdata, prtad, mmd_reg);
+
+       case XGBE_XPCS_ACCESS_V2:
+       default:
+               return xgbe_read_mmd_regs_v2(pdata, prtad, mmd_reg);
+       }
+}
+
+static void xgbe_write_mmd_regs(struct xgbe_prv_data *pdata, int prtad,
+                               int mmd_reg, int mmd_data)
+{
+       switch (pdata->vdata->xpcs_access) {
+       case XGBE_XPCS_ACCESS_V1:
+               return xgbe_write_mmd_regs_v1(pdata, prtad, mmd_reg, mmd_data);
+
+       case XGBE_XPCS_ACCESS_V2:
+       default:
+               return xgbe_write_mmd_regs_v2(pdata, prtad, mmd_reg, mmd_data);
+       }
+}
+
+static int xgbe_write_ext_mii_regs(struct xgbe_prv_data *pdata, int addr,
+                                  int reg, u16 val)
+{
+       unsigned int mdio_sca, mdio_sccd;
+
+       reinit_completion(&pdata->mdio_complete);
+
+       mdio_sca = 0;
+       XGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, REG, reg);
+       XGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, DA, addr);
+       XGMAC_IOWRITE(pdata, MAC_MDIOSCAR, mdio_sca);
+
+       mdio_sccd = 0;
+       XGMAC_SET_BITS(mdio_sccd, MAC_MDIOSCCDR, DATA, val);
+       XGMAC_SET_BITS(mdio_sccd, MAC_MDIOSCCDR, CMD, 1);
+       XGMAC_SET_BITS(mdio_sccd, MAC_MDIOSCCDR, BUSY, 1);
+       XGMAC_IOWRITE(pdata, MAC_MDIOSCCDR, mdio_sccd);
+
+       if (!wait_for_completion_timeout(&pdata->mdio_complete, HZ)) {
+               netdev_err(pdata->netdev, "mdio write operation timed out\n");
+               return -ETIMEDOUT;
+       }
+
+       return 0;
+}
+
+static int xgbe_read_ext_mii_regs(struct xgbe_prv_data *pdata, int addr,
+                                 int reg)
+{
+       unsigned int mdio_sca, mdio_sccd;
+
+       reinit_completion(&pdata->mdio_complete);
+
+       mdio_sca = 0;
+       XGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, REG, reg);
+       XGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, DA, addr);
+       XGMAC_IOWRITE(pdata, MAC_MDIOSCAR, mdio_sca);
+
+       mdio_sccd = 0;
+       XGMAC_SET_BITS(mdio_sccd, MAC_MDIOSCCDR, CMD, 3);
+       XGMAC_SET_BITS(mdio_sccd, MAC_MDIOSCCDR, BUSY, 1);
+       XGMAC_IOWRITE(pdata, MAC_MDIOSCCDR, mdio_sccd);
+
+       if (!wait_for_completion_timeout(&pdata->mdio_complete, HZ)) {
+               netdev_err(pdata->netdev, "mdio read operation timed out\n");
+               return -ETIMEDOUT;
+       }
+
+       return XGMAC_IOREAD_BITS(pdata, MAC_MDIOSCCDR, DATA);
+}
+
+static int xgbe_set_ext_mii_mode(struct xgbe_prv_data *pdata, unsigned int port,
+                                enum xgbe_mdio_mode mode)
+{
+       unsigned int reg_val = 0;
+
+       switch (mode) {
+       case XGBE_MDIO_MODE_CL22:
+               if (port > XGMAC_MAX_C22_PORT)
+                       return -EINVAL;
+               reg_val |= (1 << port);
+               break;
+       case XGBE_MDIO_MODE_CL45:
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       XGMAC_IOWRITE(pdata, MAC_MDIOCL22R, reg_val);
+
+       return 0;
+}
+
 static int xgbe_tx_complete(struct xgbe_ring_desc *rdesc)
 {
        return !XGMAC_GET_BITS_LE(rdesc->desc3, TX_NORMAL_DESC3, OWN);
@@ -1264,14 +1532,21 @@ static u64 xgbe_get_tstamp_time(struct xgbe_prv_data *pdata)
 
 static u64 xgbe_get_tx_tstamp(struct xgbe_prv_data *pdata)
 {
-       unsigned int tx_snr;
+       unsigned int tx_snr, tx_ssr;
        u64 nsec;
 
-       tx_snr = XGMAC_IOREAD(pdata, MAC_TXSNR);
+       if (pdata->vdata->tx_tstamp_workaround) {
+               tx_snr = XGMAC_IOREAD(pdata, MAC_TXSNR);
+               tx_ssr = XGMAC_IOREAD(pdata, MAC_TXSSR);
+       } else {
+               tx_ssr = XGMAC_IOREAD(pdata, MAC_TXSSR);
+               tx_snr = XGMAC_IOREAD(pdata, MAC_TXSNR);
+       }
+
        if (XGMAC_GET_BITS(tx_snr, MAC_TXSNR, TXTSSTSMIS))
                return 0;
 
-       nsec = XGMAC_IOREAD(pdata, MAC_TXSSR);
+       nsec = tx_ssr;
        nsec *= NSEC_PER_SEC;
        nsec += tx_snr;
 
@@ -1327,163 +1602,63 @@ static int xgbe_config_tstamp(struct xgbe_prv_data *pdata,
        return 0;
 }
 
-static void xgbe_config_tc(struct xgbe_prv_data *pdata)
+static void xgbe_tx_start_xmit(struct xgbe_channel *channel,
+                              struct xgbe_ring *ring)
 {
-       unsigned int offset, queue, prio;
-       u8 i;
-
-       netdev_reset_tc(pdata->netdev);
-       if (!pdata->num_tcs)
-               return;
+       struct xgbe_prv_data *pdata = channel->pdata;
+       struct xgbe_ring_data *rdata;
 
-       netdev_set_num_tc(pdata->netdev, pdata->num_tcs);
+       /* Make sure everything is written before the register write */
+       wmb();
 
-       for (i = 0, queue = 0, offset = 0; i < pdata->num_tcs; i++) {
-               while ((queue < pdata->tx_q_count) &&
-                      (pdata->q2tc_map[queue] == i))
-                       queue++;
+       /* Issue a poll command to Tx DMA by writing address
+        * of next immediate free descriptor */
+       rdata = XGBE_GET_DESC_DATA(ring, ring->cur);
+       XGMAC_DMA_IOWRITE(channel, DMA_CH_TDTR_LO,
+                         lower_32_bits(rdata->rdesc_dma));
 
-               netif_dbg(pdata, drv, pdata->netdev, "TC%u using TXq%u-%u\n",
-                         i, offset, queue - 1);
-               netdev_set_tc_queue(pdata->netdev, i, queue - offset, offset);
-               offset = queue;
+       /* Start the Tx timer */
+       if (pdata->tx_usecs && !channel->tx_timer_active) {
+               channel->tx_timer_active = 1;
+               mod_timer(&channel->tx_timer,
+                         jiffies + usecs_to_jiffies(pdata->tx_usecs));
        }
 
-       if (!pdata->ets)
-               return;
-
-       for (prio = 0; prio < IEEE_8021QAZ_MAX_TCS; prio++)
-               netdev_set_prio_tc_map(pdata->netdev, prio,
-                                      pdata->ets->prio_tc[prio]);
+       ring->tx.xmit_more = 0;
 }
 
-static void xgbe_config_dcb_tc(struct xgbe_prv_data *pdata)
+static void xgbe_dev_xmit(struct xgbe_channel *channel)
 {
-       struct ieee_ets *ets = pdata->ets;
-       unsigned int total_weight, min_weight, weight;
-       unsigned int mask, reg, reg_val;
-       unsigned int i, prio;
-
-       if (!ets)
-               return;
-
-       /* Set Tx to deficit weighted round robin scheduling algorithm (when
-        * traffic class is using ETS algorithm)
-        */
-       XGMAC_IOWRITE_BITS(pdata, MTL_OMR, ETSALG, MTL_ETSALG_DWRR);
+       struct xgbe_prv_data *pdata = channel->pdata;
+       struct xgbe_ring *ring = channel->tx_ring;
+       struct xgbe_ring_data *rdata;
+       struct xgbe_ring_desc *rdesc;
+       struct xgbe_packet_data *packet = &ring->packet_data;
+       unsigned int csum, tso, vlan;
+       unsigned int tso_context, vlan_context;
+       unsigned int tx_set_ic;
+       int start_index = ring->cur;
+       int cur_index = ring->cur;
+       int i;
 
-       /* Set Traffic Class algorithms */
-       total_weight = pdata->netdev->mtu * pdata->hw_feat.tc_cnt;
-       min_weight = total_weight / 100;
-       if (!min_weight)
-               min_weight = 1;
+       DBGPR("-->xgbe_dev_xmit\n");
 
-       for (i = 0; i < pdata->hw_feat.tc_cnt; i++) {
-               /* Map the priorities to the traffic class */
-               mask = 0;
-               for (prio = 0; prio < IEEE_8021QAZ_MAX_TCS; prio++) {
-                       if (ets->prio_tc[prio] == i)
-                               mask |= (1 << prio);
-               }
-               mask &= 0xff;
+       csum = XGMAC_GET_BITS(packet->attributes, TX_PACKET_ATTRIBUTES,
+                             CSUM_ENABLE);
+       tso = XGMAC_GET_BITS(packet->attributes, TX_PACKET_ATTRIBUTES,
+                            TSO_ENABLE);
+       vlan = XGMAC_GET_BITS(packet->attributes, TX_PACKET_ATTRIBUTES,
+                             VLAN_CTAG);
 
-               netif_dbg(pdata, drv, pdata->netdev, "TC%u PRIO mask=%#x\n",
-                         i, mask);
-               reg = MTL_TCPM0R + (MTL_TCPM_INC * (i / MTL_TCPM_TC_PER_REG));
-               reg_val = XGMAC_IOREAD(pdata, reg);
+       if (tso && (packet->mss != ring->tx.cur_mss))
+               tso_context = 1;
+       else
+               tso_context = 0;
 
-               reg_val &= ~(0xff << ((i % MTL_TCPM_TC_PER_REG) << 3));
-               reg_val |= (mask << ((i % MTL_TCPM_TC_PER_REG) << 3));
-
-               XGMAC_IOWRITE(pdata, reg, reg_val);
-
-               /* Set the traffic class algorithm */
-               switch (ets->tc_tsa[i]) {
-               case IEEE_8021QAZ_TSA_STRICT:
-                       netif_dbg(pdata, drv, pdata->netdev,
-                                 "TC%u using SP\n", i);
-                       XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_TC_ETSCR, TSA,
-                                              MTL_TSA_SP);
-                       break;
-               case IEEE_8021QAZ_TSA_ETS:
-                       weight = total_weight * ets->tc_tx_bw[i] / 100;
-                       weight = clamp(weight, min_weight, total_weight);
-
-                       netif_dbg(pdata, drv, pdata->netdev,
-                                 "TC%u using DWRR (weight %u)\n", i, weight);
-                       XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_TC_ETSCR, TSA,
-                                              MTL_TSA_ETS);
-                       XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_TC_QWR, QW,
-                                              weight);
-                       break;
-               }
-       }
-
-       xgbe_config_tc(pdata);
-}
-
-static void xgbe_config_dcb_pfc(struct xgbe_prv_data *pdata)
-{
-       xgbe_config_flow_control(pdata);
-}
-
-static void xgbe_tx_start_xmit(struct xgbe_channel *channel,
-                              struct xgbe_ring *ring)
-{
-       struct xgbe_prv_data *pdata = channel->pdata;
-       struct xgbe_ring_data *rdata;
-
-       /* Make sure everything is written before the register write */
-       wmb();
-
-       /* Issue a poll command to Tx DMA by writing address
-        * of next immediate free descriptor */
-       rdata = XGBE_GET_DESC_DATA(ring, ring->cur);
-       XGMAC_DMA_IOWRITE(channel, DMA_CH_TDTR_LO,
-                         lower_32_bits(rdata->rdesc_dma));
-
-       /* Start the Tx timer */
-       if (pdata->tx_usecs && !channel->tx_timer_active) {
-               channel->tx_timer_active = 1;
-               mod_timer(&channel->tx_timer,
-                         jiffies + usecs_to_jiffies(pdata->tx_usecs));
-       }
-
-       ring->tx.xmit_more = 0;
-}
-
-static void xgbe_dev_xmit(struct xgbe_channel *channel)
-{
-       struct xgbe_prv_data *pdata = channel->pdata;
-       struct xgbe_ring *ring = channel->tx_ring;
-       struct xgbe_ring_data *rdata;
-       struct xgbe_ring_desc *rdesc;
-       struct xgbe_packet_data *packet = &ring->packet_data;
-       unsigned int csum, tso, vlan;
-       unsigned int tso_context, vlan_context;
-       unsigned int tx_set_ic;
-       int start_index = ring->cur;
-       int cur_index = ring->cur;
-       int i;
-
-       DBGPR("-->xgbe_dev_xmit\n");
-
-       csum = XGMAC_GET_BITS(packet->attributes, TX_PACKET_ATTRIBUTES,
-                             CSUM_ENABLE);
-       tso = XGMAC_GET_BITS(packet->attributes, TX_PACKET_ATTRIBUTES,
-                            TSO_ENABLE);
-       vlan = XGMAC_GET_BITS(packet->attributes, TX_PACKET_ATTRIBUTES,
-                             VLAN_CTAG);
-
-       if (tso && (packet->mss != ring->tx.cur_mss))
-               tso_context = 1;
-       else
-               tso_context = 0;
-
-       if (vlan && (packet->vlan_ctag != ring->tx.cur_vlan_ctag))
-               vlan_context = 1;
-       else
-               vlan_context = 0;
+       if (vlan && (packet->vlan_ctag != ring->tx.cur_vlan_ctag))
+               vlan_context = 1;
+       else
+               vlan_context = 0;
 
        /* Determine if an interrupt should be generated for this Tx:
         *   Interrupt:
@@ -1901,7 +2076,7 @@ static int xgbe_disable_int(struct xgbe_channel *channel,
        return 0;
 }
 
-static int xgbe_exit(struct xgbe_prv_data *pdata)
+static int __xgbe_exit(struct xgbe_prv_data *pdata)
 {
        unsigned int count = 2000;
 
@@ -1923,6 +2098,20 @@ static int xgbe_exit(struct xgbe_prv_data *pdata)
        return 0;
 }
 
+static int xgbe_exit(struct xgbe_prv_data *pdata)
+{
+       int ret;
+
+       /* To guard against possible incorrectly generated interrupts,
+        * issue the software reset twice.
+        */
+       ret = __xgbe_exit(pdata);
+       if (ret)
+               return ret;
+
+       return __xgbe_exit(pdata);
+}
+
 static int xgbe_flush_tx_queues(struct xgbe_prv_data *pdata)
 {
        unsigned int i, count;
@@ -2000,61 +2189,331 @@ static void xgbe_config_mtl_mode(struct xgbe_prv_data *pdata)
        XGMAC_IOWRITE_BITS(pdata, MTL_OMR, RAA, MTL_RAA_SP);
 }
 
-static unsigned int xgbe_calculate_per_queue_fifo(unsigned int fifo_size,
-                                                 unsigned int queue_count)
+static void xgbe_queue_flow_control_threshold(struct xgbe_prv_data *pdata,
+                                             unsigned int queue,
+                                             unsigned int q_fifo_size)
+{
+       unsigned int frame_fifo_size;
+       unsigned int rfa, rfd;
+
+       frame_fifo_size = XGMAC_FLOW_CONTROL_ALIGN(xgbe_get_max_frame(pdata));
+
+       if (pdata->pfcq[queue] && (q_fifo_size > pdata->pfc_rfa)) {
+               /* PFC is active for this queue */
+               rfa = pdata->pfc_rfa;
+               rfd = rfa + frame_fifo_size;
+               if (rfd > XGMAC_FLOW_CONTROL_MAX)
+                       rfd = XGMAC_FLOW_CONTROL_MAX;
+               if (rfa >= XGMAC_FLOW_CONTROL_MAX)
+                       rfa = XGMAC_FLOW_CONTROL_MAX - XGMAC_FLOW_CONTROL_UNIT;
+       } else {
+               /* This path deals with just maximum frame sizes which are
+                * limited to a jumbo frame of 9,000 (plus headers, etc.)
+                * so we can never exceed the maximum allowable RFA/RFD
+                * values.
+                */
+               if (q_fifo_size <= 2048) {
+                       /* rx_rfd to zero to signal no flow control */
+                       pdata->rx_rfa[queue] = 0;
+                       pdata->rx_rfd[queue] = 0;
+                       return;
+               }
+
+               if (q_fifo_size <= 4096) {
+                       /* Between 2048 and 4096 */
+                       pdata->rx_rfa[queue] = 0;       /* Full - 1024 bytes */
+                       pdata->rx_rfd[queue] = 1;       /* Full - 1536 bytes */
+                       return;
+               }
+
+               if (q_fifo_size <= frame_fifo_size) {
+                       /* Between 4096 and max-frame */
+                       pdata->rx_rfa[queue] = 2;       /* Full - 2048 bytes */
+                       pdata->rx_rfd[queue] = 5;       /* Full - 3584 bytes */
+                       return;
+               }
+
+               if (q_fifo_size <= (frame_fifo_size * 3)) {
+                       /* Between max-frame and 3 max-frames,
+                        * trigger if we get just over a frame of data and
+                        * resume when we have just under half a frame left.
+                        */
+                       rfa = q_fifo_size - frame_fifo_size;
+                       rfd = rfa + (frame_fifo_size / 2);
+               } else {
+                       /* Above 3 max-frames - trigger when just over
+                        * 2 frames of space available
+                        */
+                       rfa = frame_fifo_size * 2;
+                       rfa += XGMAC_FLOW_CONTROL_UNIT;
+                       rfd = rfa + frame_fifo_size;
+               }
+       }
+
+       pdata->rx_rfa[queue] = XGMAC_FLOW_CONTROL_VALUE(rfa);
+       pdata->rx_rfd[queue] = XGMAC_FLOW_CONTROL_VALUE(rfd);
+}
+
+static void xgbe_calculate_flow_control_threshold(struct xgbe_prv_data *pdata,
+                                                 unsigned int *fifo)
 {
        unsigned int q_fifo_size;
-       unsigned int p_fifo;
+       unsigned int i;
+
+       for (i = 0; i < pdata->rx_q_count; i++) {
+               q_fifo_size = (fifo[i] + 1) * XGMAC_FIFO_UNIT;
+
+               xgbe_queue_flow_control_threshold(pdata, i, q_fifo_size);
+       }
+}
+
+static void xgbe_config_flow_control_threshold(struct xgbe_prv_data *pdata)
+{
+       unsigned int i;
 
-       /* Calculate the configured fifo size */
-       q_fifo_size = 1 << (fifo_size + 7);
+       for (i = 0; i < pdata->rx_q_count; i++) {
+               XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_RQFCR, RFA,
+                                      pdata->rx_rfa[i]);
+               XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_RQFCR, RFD,
+                                      pdata->rx_rfd[i]);
+       }
+}
 
+static unsigned int xgbe_get_tx_fifo_size(struct xgbe_prv_data *pdata)
+{
        /* The configured value may not be the actual amount of fifo RAM */
-       q_fifo_size = min_t(unsigned int, XGBE_FIFO_MAX, q_fifo_size);
+       return min_t(unsigned int, pdata->tx_max_fifo_size,
+                    pdata->hw_feat.tx_fifo_size);
+}
 
-       q_fifo_size = q_fifo_size / queue_count;
+static unsigned int xgbe_get_rx_fifo_size(struct xgbe_prv_data *pdata)
+{
+       /* The configured value may not be the actual amount of fifo RAM */
+       return min_t(unsigned int, pdata->rx_max_fifo_size,
+                    pdata->hw_feat.rx_fifo_size);
+}
 
-       /* Each increment in the queue fifo size represents 256 bytes of
-        * fifo, with 0 representing 256 bytes. Distribute the fifo equally
-        * between the queues.
+static void xgbe_calculate_equal_fifo(unsigned int fifo_size,
+                                     unsigned int queue_count,
+                                     unsigned int *fifo)
+{
+       unsigned int q_fifo_size;
+       unsigned int p_fifo;
+       unsigned int i;
+
+       q_fifo_size = fifo_size / queue_count;
+
+       /* Calculate the fifo setting by dividing the queue's fifo size
+        * by the fifo allocation increment (with 0 representing the
+        * base allocation increment so decrement the result by 1).
         */
-       p_fifo = q_fifo_size / 256;
+       p_fifo = q_fifo_size / XGMAC_FIFO_UNIT;
        if (p_fifo)
                p_fifo--;
 
-       return p_fifo;
+       /* Distribute the fifo equally amongst the queues */
+       for (i = 0; i < queue_count; i++)
+               fifo[i] = p_fifo;
+}
+
+static unsigned int xgbe_set_nonprio_fifos(unsigned int fifo_size,
+                                          unsigned int queue_count,
+                                          unsigned int *fifo)
+{
+       unsigned int i;
+
+       BUILD_BUG_ON_NOT_POWER_OF_2(XGMAC_FIFO_MIN_ALLOC);
+
+       if (queue_count <= IEEE_8021QAZ_MAX_TCS)
+               return fifo_size;
+
+       /* Rx queues 9 and up are for specialized packets,
+        * such as PTP or DCB control packets, etc. and
+        * don't require a large fifo
+        */
+       for (i = IEEE_8021QAZ_MAX_TCS; i < queue_count; i++) {
+               fifo[i] = (XGMAC_FIFO_MIN_ALLOC / XGMAC_FIFO_UNIT) - 1;
+               fifo_size -= XGMAC_FIFO_MIN_ALLOC;
+       }
+
+       return fifo_size;
+}
+
+static unsigned int xgbe_get_pfc_delay(struct xgbe_prv_data *pdata)
+{
+       unsigned int delay;
+
+       /* If a delay has been provided, use that */
+       if (pdata->pfc->delay)
+               return pdata->pfc->delay / 8;
+
+       /* Allow for two maximum size frames */
+       delay = xgbe_get_max_frame(pdata);
+       delay += XGMAC_ETH_PREAMBLE;
+       delay *= 2;
+
+       /* Allow for PFC frame */
+       delay += XGMAC_PFC_DATA_LEN;
+       delay += ETH_HLEN + ETH_FCS_LEN;
+       delay += XGMAC_ETH_PREAMBLE;
+
+       /* Allow for miscellaneous delays (LPI exit, cable, etc.) */
+       delay += XGMAC_PFC_DELAYS;
+
+       return delay;
+}
+
+static unsigned int xgbe_get_pfc_queues(struct xgbe_prv_data *pdata)
+{
+       unsigned int count, prio_queues;
+       unsigned int i;
+
+       if (!pdata->pfc->pfc_en)
+               return 0;
+
+       count = 0;
+       prio_queues = XGMAC_PRIO_QUEUES(pdata->rx_q_count);
+       for (i = 0; i < prio_queues; i++) {
+               if (!xgbe_is_pfc_queue(pdata, i))
+                       continue;
+
+               pdata->pfcq[i] = 1;
+               count++;
+       }
+
+       return count;
+}
+
+static void xgbe_calculate_dcb_fifo(struct xgbe_prv_data *pdata,
+                                   unsigned int fifo_size,
+                                   unsigned int *fifo)
+{
+       unsigned int q_fifo_size, rem_fifo, addn_fifo;
+       unsigned int prio_queues;
+       unsigned int pfc_count;
+       unsigned int i;
+
+       q_fifo_size = XGMAC_FIFO_ALIGN(xgbe_get_max_frame(pdata));
+       prio_queues = XGMAC_PRIO_QUEUES(pdata->rx_q_count);
+       pfc_count = xgbe_get_pfc_queues(pdata);
+
+       if (!pfc_count || ((q_fifo_size * prio_queues) > fifo_size)) {
+               /* No traffic classes with PFC enabled or can't do lossless */
+               xgbe_calculate_equal_fifo(fifo_size, prio_queues, fifo);
+               return;
+       }
+
+       /* Calculate how much fifo we have to play with */
+       rem_fifo = fifo_size - (q_fifo_size * prio_queues);
+
+       /* Calculate how much more than base fifo PFC needs, which also
+        * becomes the threshold activation point (RFA)
+        */
+       pdata->pfc_rfa = xgbe_get_pfc_delay(pdata);
+       pdata->pfc_rfa = XGMAC_FLOW_CONTROL_ALIGN(pdata->pfc_rfa);
+
+       if (pdata->pfc_rfa > q_fifo_size) {
+               addn_fifo = pdata->pfc_rfa - q_fifo_size;
+               addn_fifo = XGMAC_FIFO_ALIGN(addn_fifo);
+       } else {
+               addn_fifo = 0;
+       }
+
+       /* Calculate DCB fifo settings:
+        *   - distribute remaining fifo between the VLAN priority
+        *     queues based on traffic class PFC enablement and overall
+        *     priority (0 is lowest priority, so start at highest)
+        */
+       i = prio_queues;
+       while (i > 0) {
+               i--;
+
+               fifo[i] = (q_fifo_size / XGMAC_FIFO_UNIT) - 1;
+
+               if (!pdata->pfcq[i] || !addn_fifo)
+                       continue;
+
+               if (addn_fifo > rem_fifo) {
+                       netdev_warn(pdata->netdev,
+                                   "RXq%u cannot set needed fifo size\n", i);
+                       if (!rem_fifo)
+                               continue;
+
+                       addn_fifo = rem_fifo;
+               }
+
+               fifo[i] += (addn_fifo / XGMAC_FIFO_UNIT);
+               rem_fifo -= addn_fifo;
+       }
+
+       if (rem_fifo) {
+               unsigned int inc_fifo = rem_fifo / prio_queues;
+
+               /* Distribute remaining fifo across queues */
+               for (i = 0; i < prio_queues; i++)
+                       fifo[i] += (inc_fifo / XGMAC_FIFO_UNIT);
+       }
 }
 
 static void xgbe_config_tx_fifo_size(struct xgbe_prv_data *pdata)
 {
        unsigned int fifo_size;
+       unsigned int fifo[XGBE_MAX_QUEUES];
        unsigned int i;
 
-       fifo_size = xgbe_calculate_per_queue_fifo(pdata->hw_feat.tx_fifo_size,
-                                                 pdata->tx_q_count);
+       fifo_size = xgbe_get_tx_fifo_size(pdata);
+
+       xgbe_calculate_equal_fifo(fifo_size, pdata->tx_q_count, fifo);
 
        for (i = 0; i < pdata->tx_q_count; i++)
-               XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_TQOMR, TQS, fifo_size);
+               XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_TQOMR, TQS, fifo[i]);
 
        netif_info(pdata, drv, pdata->netdev,
                   "%d Tx hardware queues, %d byte fifo per queue\n",
-                  pdata->tx_q_count, ((fifo_size + 1) * 256));
+                  pdata->tx_q_count, ((fifo[0] + 1) * XGMAC_FIFO_UNIT));
 }
 
 static void xgbe_config_rx_fifo_size(struct xgbe_prv_data *pdata)
 {
        unsigned int fifo_size;
+       unsigned int fifo[XGBE_MAX_QUEUES];
+       unsigned int prio_queues;
        unsigned int i;
 
-       fifo_size = xgbe_calculate_per_queue_fifo(pdata->hw_feat.rx_fifo_size,
-                                                 pdata->rx_q_count);
+       /* Clear any DCB related fifo/queue information */
+       memset(pdata->pfcq, 0, sizeof(pdata->pfcq));
+       pdata->pfc_rfa = 0;
+
+       fifo_size = xgbe_get_rx_fifo_size(pdata);
+       prio_queues = XGMAC_PRIO_QUEUES(pdata->rx_q_count);
+
+       /* Assign a minimum fifo to the non-VLAN priority queues */
+       fifo_size = xgbe_set_nonprio_fifos(fifo_size, pdata->rx_q_count, fifo);
+
+       if (pdata->pfc && pdata->ets)
+               xgbe_calculate_dcb_fifo(pdata, fifo_size, fifo);
+       else
+               xgbe_calculate_equal_fifo(fifo_size, prio_queues, fifo);
 
        for (i = 0; i < pdata->rx_q_count; i++)
-               XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_RQOMR, RQS, fifo_size);
+               XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_RQOMR, RQS, fifo[i]);
 
-       netif_info(pdata, drv, pdata->netdev,
-                  "%d Rx hardware queues, %d byte fifo per queue\n",
-                  pdata->rx_q_count, ((fifo_size + 1) * 256));
+       xgbe_calculate_flow_control_threshold(pdata, fifo);
+       xgbe_config_flow_control_threshold(pdata);
+
+       if (pdata->pfc && pdata->ets && pdata->pfc->pfc_en) {
+               netif_info(pdata, drv, pdata->netdev,
+                          "%u Rx hardware queues\n", pdata->rx_q_count);
+               for (i = 0; i < pdata->rx_q_count; i++)
+                       netif_info(pdata, drv, pdata->netdev,
+                                  "RxQ%u, %u byte fifo queue\n", i,
+                                  ((fifo[i] + 1) * XGMAC_FIFO_UNIT));
+       } else {
+               netif_info(pdata, drv, pdata->netdev,
+                          "%u Rx hardware queues, %u byte fifo per queue\n",
+                          pdata->rx_q_count,
+                          ((fifo[0] + 1) * XGMAC_FIFO_UNIT));
+       }
 }
 
 static void xgbe_config_queue_mapping(struct xgbe_prv_data *pdata)
@@ -2090,8 +2549,7 @@ static void xgbe_config_queue_mapping(struct xgbe_prv_data *pdata)
        }
 
        /* Map the 8 VLAN priority values to available MTL Rx queues */
-       prio_queues = min_t(unsigned int, IEEE_8021QAZ_MAX_TCS,
-                           pdata->rx_q_count);
+       prio_queues = XGMAC_PRIO_QUEUES(pdata->rx_q_count);
        ppq = IEEE_8021QAZ_MAX_TCS / prio_queues;
        ppq_extra = IEEE_8021QAZ_MAX_TCS % prio_queues;
 
@@ -2139,16 +2597,120 @@ static void xgbe_config_queue_mapping(struct xgbe_prv_data *pdata)
        }
 }
 
-static void xgbe_config_flow_control_threshold(struct xgbe_prv_data *pdata)
+static void xgbe_config_tc(struct xgbe_prv_data *pdata)
 {
-       unsigned int i;
+       unsigned int offset, queue, prio;
+       u8 i;
 
-       for (i = 0; i < pdata->rx_q_count; i++) {
-               /* Activate flow control when less than 4k left in fifo */
-               XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_RQFCR, RFA, 2);
+       netdev_reset_tc(pdata->netdev);
+       if (!pdata->num_tcs)
+               return;
+
+       netdev_set_num_tc(pdata->netdev, pdata->num_tcs);
+
+       for (i = 0, queue = 0, offset = 0; i < pdata->num_tcs; i++) {
+               while ((queue < pdata->tx_q_count) &&
+                      (pdata->q2tc_map[queue] == i))
+                       queue++;
 
-               /* De-activate flow control when more than 6k left in fifo */
-               XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_RQFCR, RFD, 4);
+               netif_dbg(pdata, drv, pdata->netdev, "TC%u using TXq%u-%u\n",
+                         i, offset, queue - 1);
+               netdev_set_tc_queue(pdata->netdev, i, queue - offset, offset);
+               offset = queue;
+       }
+
+       if (!pdata->ets)
+               return;
+
+       for (prio = 0; prio < IEEE_8021QAZ_MAX_TCS; prio++)
+               netdev_set_prio_tc_map(pdata->netdev, prio,
+                                      pdata->ets->prio_tc[prio]);
+}
+
+static void xgbe_config_dcb_tc(struct xgbe_prv_data *pdata)
+{
+       struct ieee_ets *ets = pdata->ets;
+       unsigned int total_weight, min_weight, weight;
+       unsigned int mask, reg, reg_val;
+       unsigned int i, prio;
+
+       if (!ets)
+               return;
+
+       /* Set Tx to deficit weighted round robin scheduling algorithm (when
+        * traffic class is using ETS algorithm)
+        */
+       XGMAC_IOWRITE_BITS(pdata, MTL_OMR, ETSALG, MTL_ETSALG_DWRR);
+
+       /* Set Traffic Class algorithms */
+       total_weight = pdata->netdev->mtu * pdata->hw_feat.tc_cnt;
+       min_weight = total_weight / 100;
+       if (!min_weight)
+               min_weight = 1;
+
+       for (i = 0; i < pdata->hw_feat.tc_cnt; i++) {
+               /* Map the priorities to the traffic class */
+               mask = 0;
+               for (prio = 0; prio < IEEE_8021QAZ_MAX_TCS; prio++) {
+                       if (ets->prio_tc[prio] == i)
+                               mask |= (1 << prio);
+               }
+               mask &= 0xff;
+
+               netif_dbg(pdata, drv, pdata->netdev, "TC%u PRIO mask=%#x\n",
+                         i, mask);
+               reg = MTL_TCPM0R + (MTL_TCPM_INC * (i / MTL_TCPM_TC_PER_REG));
+               reg_val = XGMAC_IOREAD(pdata, reg);
+
+               reg_val &= ~(0xff << ((i % MTL_TCPM_TC_PER_REG) << 3));
+               reg_val |= (mask << ((i % MTL_TCPM_TC_PER_REG) << 3));
+
+               XGMAC_IOWRITE(pdata, reg, reg_val);
+
+               /* Set the traffic class algorithm */
+               switch (ets->tc_tsa[i]) {
+               case IEEE_8021QAZ_TSA_STRICT:
+                       netif_dbg(pdata, drv, pdata->netdev,
+                                 "TC%u using SP\n", i);
+                       XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_TC_ETSCR, TSA,
+                                              MTL_TSA_SP);
+                       break;
+               case IEEE_8021QAZ_TSA_ETS:
+                       weight = total_weight * ets->tc_tx_bw[i] / 100;
+                       weight = clamp(weight, min_weight, total_weight);
+
+                       netif_dbg(pdata, drv, pdata->netdev,
+                                 "TC%u using DWRR (weight %u)\n", i, weight);
+                       XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_TC_ETSCR, TSA,
+                                              MTL_TSA_ETS);
+                       XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_TC_QWR, QW,
+                                              weight);
+                       break;
+               }
+       }
+
+       xgbe_config_tc(pdata);
+}
+
+static void xgbe_config_dcb_pfc(struct xgbe_prv_data *pdata)
+{
+       if (!test_bit(XGBE_DOWN, &pdata->dev_state)) {
+               /* Just stop the Tx queues while Rx fifo is changed */
+               netif_tx_stop_all_queues(pdata->netdev);
+
+               /* Suspend Rx so that fifo's can be adjusted */
+               pdata->hw_if.disable_rx(pdata);
+       }
+
+       xgbe_config_rx_fifo_size(pdata);
+       xgbe_config_flow_control(pdata);
+
+       if (!test_bit(XGBE_DOWN, &pdata->dev_state)) {
+               /* Resume Rx */
+               pdata->hw_if.enable_rx(pdata);
+
+               /* Resume Tx queues */
+               netif_tx_start_all_queues(pdata->netdev);
        }
 }
 
@@ -2175,19 +2737,7 @@ static void xgbe_config_jumbo_enable(struct xgbe_prv_data *pdata)
 
 static void xgbe_config_mac_speed(struct xgbe_prv_data *pdata)
 {
-       switch (pdata->phy_speed) {
-       case SPEED_10000:
-               xgbe_set_xgmii_speed(pdata);
-               break;
-
-       case SPEED_2500:
-               xgbe_set_gmii_2500_speed(pdata);
-               break;
-
-       case SPEED_1000:
-               xgbe_set_gmii_speed(pdata);
-               break;
-       }
+       xgbe_set_speed(pdata, pdata->phy_speed);
 }
 
 static void xgbe_config_checksum_offload(struct xgbe_prv_data *pdata)
@@ -2223,17 +2773,33 @@ static u64 xgbe_mmc_read(struct xgbe_prv_data *pdata, unsigned int reg_lo)
        bool read_hi;
        u64 val;
 
-       switch (reg_lo) {
-       /* These registers are always 64 bit */
-       case MMC_TXOCTETCOUNT_GB_LO:
-       case MMC_TXOCTETCOUNT_G_LO:
-       case MMC_RXOCTETCOUNT_GB_LO:
-       case MMC_RXOCTETCOUNT_G_LO:
-               read_hi = true;
-               break;
+       if (pdata->vdata->mmc_64bit) {
+               switch (reg_lo) {
+               /* These registers are always 32 bit */
+               case MMC_RXRUNTERROR:
+               case MMC_RXJABBERERROR:
+               case MMC_RXUNDERSIZE_G:
+               case MMC_RXOVERSIZE_G:
+               case MMC_RXWATCHDOGERROR:
+                       read_hi = false;
+                       break;
 
-       default:
-               read_hi = false;
+               default:
+                       read_hi = true;
+               }
+       } else {
+               switch (reg_lo) {
+               /* These registers are always 64 bit */
+               case MMC_TXOCTETCOUNT_GB_LO:
+               case MMC_TXOCTETCOUNT_G_LO:
+               case MMC_RXOCTETCOUNT_GB_LO:
+               case MMC_RXOCTETCOUNT_G_LO:
+                       read_hi = true;
+                       break;
+
+               default:
+                       read_hi = false;
+               }
        }
 
        val = XGMAC_IOREAD(pdata, reg_lo);
@@ -2563,20 +3129,48 @@ static void xgbe_config_mmc(struct xgbe_prv_data *pdata)
        XGMAC_IOWRITE_BITS(pdata, MMC_CR, CR, 1);
 }
 
+static void xgbe_txq_prepare_tx_stop(struct xgbe_prv_data *pdata,
+                                    unsigned int queue)
+{
+       unsigned int tx_status;
+       unsigned long tx_timeout;
+
+       /* The Tx engine cannot be stopped if it is actively processing
+        * packets. Wait for the Tx queue to empty the Tx fifo.  Don't
+        * wait forever though...
+        */
+       tx_timeout = jiffies + (XGBE_DMA_STOP_TIMEOUT * HZ);
+       while (time_before(jiffies, tx_timeout)) {
+               tx_status = XGMAC_MTL_IOREAD(pdata, queue, MTL_Q_TQDR);
+               if ((XGMAC_GET_BITS(tx_status, MTL_Q_TQDR, TRCSTS) != 1) &&
+                   (XGMAC_GET_BITS(tx_status, MTL_Q_TQDR, TXQSTS) == 0))
+                       break;
+
+               usleep_range(500, 1000);
+       }
+
+       if (!time_before(jiffies, tx_timeout))
+               netdev_info(pdata->netdev,
+                           "timed out waiting for Tx queue %u to empty\n",
+                           queue);
+}
+
 static void xgbe_prepare_tx_stop(struct xgbe_prv_data *pdata,
-                                struct xgbe_channel *channel)
+                                unsigned int queue)
 {
        unsigned int tx_dsr, tx_pos, tx_qidx;
        unsigned int tx_status;
        unsigned long tx_timeout;
 
+       if (XGMAC_GET_BITS(pdata->hw_feat.version, MAC_VR, SNPSVER) > 0x20)
+               return xgbe_txq_prepare_tx_stop(pdata, queue);
+
        /* Calculate the status register to read and the position within */
-       if (channel->queue_index < DMA_DSRX_FIRST_QUEUE) {
+       if (queue < DMA_DSRX_FIRST_QUEUE) {
                tx_dsr = DMA_DSR0;
-               tx_pos = (channel->queue_index * DMA_DSR_Q_WIDTH) +
-                        DMA_DSR0_TPS_START;
+               tx_pos = (queue * DMA_DSR_Q_WIDTH) + DMA_DSR0_TPS_START;
        } else {
-               tx_qidx = channel->queue_index - DMA_DSRX_FIRST_QUEUE;
+               tx_qidx = queue - DMA_DSRX_FIRST_QUEUE;
 
                tx_dsr = DMA_DSR1 + ((tx_qidx / DMA_DSRX_QPR) * DMA_DSRX_INC);
                tx_pos = ((tx_qidx % DMA_DSRX_QPR) * DMA_DSR_Q_WIDTH) +
@@ -2601,7 +3195,7 @@ static void xgbe_prepare_tx_stop(struct xgbe_prv_data *pdata,
        if (!time_before(jiffies, tx_timeout))
                netdev_info(pdata->netdev,
                            "timed out waiting for Tx DMA channel %u to stop\n",
-                           channel->queue_index);
+                           queue);
 }
 
 static void xgbe_enable_tx(struct xgbe_prv_data *pdata)
@@ -2633,13 +3227,8 @@ static void xgbe_disable_tx(struct xgbe_prv_data *pdata)
        unsigned int i;
 
        /* Prepare for Tx DMA channel stop */
-       channel = pdata->channel;
-       for (i = 0; i < pdata->channel_count; i++, channel++) {
-               if (!channel->tx_ring)
-                       break;
-
-               xgbe_prepare_tx_stop(pdata, channel);
-       }
+       for (i = 0; i < pdata->tx_q_count; i++)
+               xgbe_prepare_tx_stop(pdata, i);
 
        /* Disable MAC Tx */
        XGMAC_IOWRITE_BITS(pdata, MAC_TCR, TE, 0);
@@ -2763,13 +3352,8 @@ static void xgbe_powerdown_tx(struct xgbe_prv_data *pdata)
        unsigned int i;
 
        /* Prepare for Tx DMA channel stop */
-       channel = pdata->channel;
-       for (i = 0; i < pdata->channel_count; i++, channel++) {
-               if (!channel->tx_ring)
-                       break;
-
-               xgbe_prepare_tx_stop(pdata, channel);
-       }
+       for (i = 0; i < pdata->tx_q_count; i++)
+               xgbe_prepare_tx_stop(pdata, i);
 
        /* Disable MAC Tx */
        XGMAC_IOWRITE_BITS(pdata, MAC_TCR, TE, 0);
@@ -2856,12 +3440,10 @@ static int xgbe_init(struct xgbe_prv_data *pdata)
        xgbe_config_rx_threshold(pdata, pdata->rx_threshold);
        xgbe_config_tx_fifo_size(pdata);
        xgbe_config_rx_fifo_size(pdata);
-       xgbe_config_flow_control_threshold(pdata);
        /*TODO: Error Packet and undersized good Packet forwarding enable
                (FEP and FUP)
         */
        xgbe_config_dcb_tc(pdata);
-       xgbe_config_dcb_pfc(pdata);
        xgbe_enable_mtl_interrupts(pdata);
 
        /*
@@ -2877,6 +3459,11 @@ static int xgbe_init(struct xgbe_prv_data *pdata)
        xgbe_config_mmc(pdata);
        xgbe_enable_mac_interrupts(pdata);
 
+       /*
+        * Initialize ECC related features
+        */
+       xgbe_enable_ecc_interrupts(pdata);
+
        DBGPR("<--xgbe_init\n");
 
        return 0;
@@ -2903,9 +3490,14 @@ void xgbe_init_function_ptrs_dev(struct xgbe_hw_if *hw_if)
        hw_if->read_mmd_regs = xgbe_read_mmd_regs;
        hw_if->write_mmd_regs = xgbe_write_mmd_regs;
 
-       hw_if->set_gmii_speed = xgbe_set_gmii_speed;
-       hw_if->set_gmii_2500_speed = xgbe_set_gmii_2500_speed;
-       hw_if->set_xgmii_speed = xgbe_set_xgmii_speed;
+       hw_if->set_speed = xgbe_set_speed;
+
+       hw_if->set_ext_mii_mode = xgbe_set_ext_mii_mode;
+       hw_if->read_ext_mii_regs = xgbe_read_ext_mii_regs;
+       hw_if->write_ext_mii_regs = xgbe_write_ext_mii_regs;
+
+       hw_if->set_gpio = xgbe_set_gpio;
+       hw_if->clr_gpio = xgbe_clr_gpio;
 
        hw_if->enable_tx = xgbe_enable_tx;
        hw_if->disable_tx = xgbe_disable_tx;
@@ -2984,5 +3576,9 @@ void xgbe_init_function_ptrs_dev(struct xgbe_hw_if *hw_if)
        hw_if->set_rss_hash_key = xgbe_set_rss_hash_key;
        hw_if->set_rss_lookup_table = xgbe_set_rss_lookup_table;
 
+       /* For ECC */
+       hw_if->disable_ecc_ded = xgbe_disable_ecc_ded;
+       hw_if->disable_ecc_sec = xgbe_disable_ecc_sec;
+
        DBGPR("<--xgbe_init_function_ptrs\n");
 }
index c4e668208e0474eed26292626fdfeafbc048314a..155190db682d29a6a97b2267550954fb4eba639d 100644 (file)
  *     THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-#include <linux/platform_device.h>
+#include <linux/module.h>
 #include <linux/spinlock.h>
 #include <linux/tcp.h>
 #include <linux/if_vlan.h>
 #include "xgbe.h"
 #include "xgbe-common.h"
 
+static unsigned int ecc_sec_info_threshold = 10;
+static unsigned int ecc_sec_warn_threshold = 10000;
+static unsigned int ecc_sec_period = 600;
+static unsigned int ecc_ded_threshold = 2;
+static unsigned int ecc_ded_period = 600;
+
+#ifdef CONFIG_AMD_XGBE_HAVE_ECC
+/* Only expose the ECC parameters if supported */
+module_param(ecc_sec_info_threshold, uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(ecc_sec_info_threshold,
+                " ECC corrected error informational threshold setting");
+
+module_param(ecc_sec_warn_threshold, uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(ecc_sec_warn_threshold,
+                " ECC corrected error warning threshold setting");
+
+module_param(ecc_sec_period, uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(ecc_sec_period, " ECC corrected error period (in seconds)");
+
+module_param(ecc_ded_threshold, uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(ecc_ded_threshold, " ECC detected error threshold setting");
+
+module_param(ecc_ded_period, uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(ecc_ded_period, " ECC detected error period (in seconds)");
+#endif
+
 static int xgbe_one_poll(struct napi_struct *, int);
 static int xgbe_all_poll(struct napi_struct *, int);
+static void xgbe_stop(struct xgbe_prv_data *);
 
 static int xgbe_alloc_channels(struct xgbe_prv_data *pdata)
 {
@@ -160,18 +187,8 @@ static int xgbe_alloc_channels(struct xgbe_prv_data *pdata)
                channel->dma_regs = pdata->xgmac_regs + DMA_CH_BASE +
                                    (DMA_CH_INC * i);
 
-               if (pdata->per_channel_irq) {
-                       /* Get the DMA interrupt (offset 1) */
-                       ret = platform_get_irq(pdata->pdev, i + 1);
-                       if (ret < 0) {
-                               netdev_err(pdata->netdev,
-                                          "platform_get_irq %u failed\n",
-                                          i + 1);
-                               goto err_irq;
-                       }
-
-                       channel->dma_irq = ret;
-               }
+               if (pdata->per_channel_irq)
+                       channel->dma_irq = pdata->channel_irq[i];
 
                if (i < pdata->tx_ring_count) {
                        spin_lock_init(&tx_ring->lock);
@@ -194,9 +211,6 @@ static int xgbe_alloc_channels(struct xgbe_prv_data *pdata)
 
        return 0;
 
-err_irq:
-       kfree(rx_ring);
-
 err_rx_ring:
        kfree(tx_ring);
 
@@ -266,48 +280,161 @@ static int xgbe_calc_rx_buf_size(struct net_device *netdev, unsigned int mtu)
        return rx_buf_size;
 }
 
-static void xgbe_enable_rx_tx_ints(struct xgbe_prv_data *pdata)
+static void xgbe_enable_rx_tx_int(struct xgbe_prv_data *pdata,
+                                 struct xgbe_channel *channel)
 {
        struct xgbe_hw_if *hw_if = &pdata->hw_if;
-       struct xgbe_channel *channel;
        enum xgbe_int int_id;
+
+       if (channel->tx_ring && channel->rx_ring)
+               int_id = XGMAC_INT_DMA_CH_SR_TI_RI;
+       else if (channel->tx_ring)
+               int_id = XGMAC_INT_DMA_CH_SR_TI;
+       else if (channel->rx_ring)
+               int_id = XGMAC_INT_DMA_CH_SR_RI;
+       else
+               return;
+
+       hw_if->enable_int(channel, int_id);
+}
+
+static void xgbe_enable_rx_tx_ints(struct xgbe_prv_data *pdata)
+{
+       struct xgbe_channel *channel;
        unsigned int i;
 
        channel = pdata->channel;
-       for (i = 0; i < pdata->channel_count; i++, channel++) {
-               if (channel->tx_ring && channel->rx_ring)
-                       int_id = XGMAC_INT_DMA_CH_SR_TI_RI;
-               else if (channel->tx_ring)
-                       int_id = XGMAC_INT_DMA_CH_SR_TI;
-               else if (channel->rx_ring)
-                       int_id = XGMAC_INT_DMA_CH_SR_RI;
-               else
-                       continue;
+       for (i = 0; i < pdata->channel_count; i++, channel++)
+               xgbe_enable_rx_tx_int(pdata, channel);
+}
 
-               hw_if->enable_int(channel, int_id);
-       }
+static void xgbe_disable_rx_tx_int(struct xgbe_prv_data *pdata,
+                                  struct xgbe_channel *channel)
+{
+       struct xgbe_hw_if *hw_if = &pdata->hw_if;
+       enum xgbe_int int_id;
+
+       if (channel->tx_ring && channel->rx_ring)
+               int_id = XGMAC_INT_DMA_CH_SR_TI_RI;
+       else if (channel->tx_ring)
+               int_id = XGMAC_INT_DMA_CH_SR_TI;
+       else if (channel->rx_ring)
+               int_id = XGMAC_INT_DMA_CH_SR_RI;
+       else
+               return;
+
+       hw_if->disable_int(channel, int_id);
 }
 
 static void xgbe_disable_rx_tx_ints(struct xgbe_prv_data *pdata)
 {
-       struct xgbe_hw_if *hw_if = &pdata->hw_if;
        struct xgbe_channel *channel;
-       enum xgbe_int int_id;
        unsigned int i;
 
        channel = pdata->channel;
-       for (i = 0; i < pdata->channel_count; i++, channel++) {
-               if (channel->tx_ring && channel->rx_ring)
-                       int_id = XGMAC_INT_DMA_CH_SR_TI_RI;
-               else if (channel->tx_ring)
-                       int_id = XGMAC_INT_DMA_CH_SR_TI;
-               else if (channel->rx_ring)
-                       int_id = XGMAC_INT_DMA_CH_SR_RI;
-               else
-                       continue;
+       for (i = 0; i < pdata->channel_count; i++, channel++)
+               xgbe_disable_rx_tx_int(pdata, channel);
+}
+
+static bool xgbe_ecc_sec(struct xgbe_prv_data *pdata, unsigned long *period,
+                        unsigned int *count, const char *area)
+{
+       if (time_before(jiffies, *period)) {
+               (*count)++;
+       } else {
+               *period = jiffies + (ecc_sec_period * HZ);
+               *count = 1;
+       }
+
+       if (*count > ecc_sec_info_threshold)
+               dev_warn_once(pdata->dev,
+                             "%s ECC corrected errors exceed informational threshold\n",
+                             area);
+
+       if (*count > ecc_sec_warn_threshold) {
+               dev_warn_once(pdata->dev,
+                             "%s ECC corrected errors exceed warning threshold\n",
+                             area);
+               return true;
+       }
+
+       return false;
+}
+
+static bool xgbe_ecc_ded(struct xgbe_prv_data *pdata, unsigned long *period,
+                        unsigned int *count, const char *area)
+{
+       if (time_before(jiffies, *period)) {
+               (*count)++;
+       } else {
+               *period = jiffies + (ecc_ded_period * HZ);
+               *count = 1;
+       }
+
+       if (*count > ecc_ded_threshold) {
+               netdev_alert(pdata->netdev,
+                            "%s ECC detected errors exceed threshold\n",
+                            area);
+               return true;
+       }
+
+       return false;
+}
+
+static irqreturn_t xgbe_ecc_isr(int irq, void *data)
+{
+       struct xgbe_prv_data *pdata = data;
+       unsigned int ecc_isr;
+       bool stop = false;
+
+       /* Mask status with only the interrupts we care about */
+       ecc_isr = XP_IOREAD(pdata, XP_ECC_ISR);
+       ecc_isr &= XP_IOREAD(pdata, XP_ECC_IER);
+       netif_dbg(pdata, intr, pdata->netdev, "ECC_ISR=%#010x\n", ecc_isr);
+
+       if (XP_GET_BITS(ecc_isr, XP_ECC_ISR, TX_DED)) {
+               stop |= xgbe_ecc_ded(pdata, &pdata->tx_ded_period,
+                                    &pdata->tx_ded_count, "TX fifo");
+       }
 
-               hw_if->disable_int(channel, int_id);
+       if (XP_GET_BITS(ecc_isr, XP_ECC_ISR, RX_DED)) {
+               stop |= xgbe_ecc_ded(pdata, &pdata->rx_ded_period,
+                                    &pdata->rx_ded_count, "RX fifo");
        }
+
+       if (XP_GET_BITS(ecc_isr, XP_ECC_ISR, DESC_DED)) {
+               stop |= xgbe_ecc_ded(pdata, &pdata->desc_ded_period,
+                                    &pdata->desc_ded_count,
+                                    "descriptor cache");
+       }
+
+       if (stop) {
+               pdata->hw_if.disable_ecc_ded(pdata);
+               schedule_work(&pdata->stopdev_work);
+               goto out;
+       }
+
+       if (XP_GET_BITS(ecc_isr, XP_ECC_ISR, TX_SEC)) {
+               if (xgbe_ecc_sec(pdata, &pdata->tx_sec_period,
+                                &pdata->tx_sec_count, "TX fifo"))
+                       pdata->hw_if.disable_ecc_sec(pdata, XGBE_ECC_SEC_TX);
+       }
+
+       if (XP_GET_BITS(ecc_isr, XP_ECC_ISR, RX_SEC))
+               if (xgbe_ecc_sec(pdata, &pdata->rx_sec_period,
+                                &pdata->rx_sec_count, "RX fifo"))
+                       pdata->hw_if.disable_ecc_sec(pdata, XGBE_ECC_SEC_RX);
+
+       if (XP_GET_BITS(ecc_isr, XP_ECC_ISR, DESC_SEC))
+               if (xgbe_ecc_sec(pdata, &pdata->desc_sec_period,
+                                &pdata->desc_sec_count, "descriptor cache"))
+                       pdata->hw_if.disable_ecc_sec(pdata, XGBE_ECC_SEC_DESC);
+
+out:
+       /* Clear all ECC interrupts */
+       XP_IOWRITE(pdata, XP_ECC_ISR, ecc_isr);
+
+       return IRQ_HANDLED;
 }
 
 static irqreturn_t xgbe_isr(int irq, void *data)
@@ -316,7 +443,7 @@ static irqreturn_t xgbe_isr(int irq, void *data)
        struct xgbe_hw_if *hw_if = &pdata->hw_if;
        struct xgbe_channel *channel;
        unsigned int dma_isr, dma_ch_isr;
-       unsigned int mac_isr, mac_tssr;
+       unsigned int mac_isr, mac_tssr, mac_mdioisr;
        unsigned int i;
 
        /* The DMA interrupt status register also reports MAC and MTL
@@ -353,6 +480,13 @@ static irqreturn_t xgbe_isr(int irq, void *data)
                                /* Turn on polling */
                                __napi_schedule_irqoff(&pdata->napi);
                        }
+               } else {
+                       /* Don't clear Rx/Tx status if doing per channel DMA
+                        * interrupts, these will be cleared by the ISR for
+                        * per channel DMA interrupts.
+                        */
+                       XGMAC_SET_BITS(dma_ch_isr, DMA_CH_SR, TI, 0);
+                       XGMAC_SET_BITS(dma_ch_isr, DMA_CH_SR, RI, 0);
                }
 
                if (XGMAC_GET_BITS(dma_ch_isr, DMA_CH_SR, RBU))
@@ -362,13 +496,16 @@ static irqreturn_t xgbe_isr(int irq, void *data)
                if (XGMAC_GET_BITS(dma_ch_isr, DMA_CH_SR, FBE))
                        schedule_work(&pdata->restart_work);
 
-               /* Clear all interrupt signals */
+               /* Clear interrupt signals */
                XGMAC_DMA_IOWRITE(channel, DMA_CH_SR, dma_ch_isr);
        }
 
        if (XGMAC_GET_BITS(dma_isr, DMA_ISR, MACIS)) {
                mac_isr = XGMAC_IOREAD(pdata, MAC_ISR);
 
+               netif_dbg(pdata, intr, pdata->netdev, "MAC_ISR=%#010x\n",
+                         mac_isr);
+
                if (XGMAC_GET_BITS(mac_isr, MAC_ISR, MMCTXIS))
                        hw_if->tx_mmc_int(pdata);
 
@@ -378,6 +515,9 @@ static irqreturn_t xgbe_isr(int irq, void *data)
                if (XGMAC_GET_BITS(mac_isr, MAC_ISR, TSIS)) {
                        mac_tssr = XGMAC_IOREAD(pdata, MAC_TSSR);
 
+                       netif_dbg(pdata, intr, pdata->netdev,
+                                 "MAC_TSSR=%#010x\n", mac_tssr);
+
                        if (XGMAC_GET_BITS(mac_tssr, MAC_TSSR, TXTSC)) {
                                /* Read Tx Timestamp to clear interrupt */
                                pdata->tx_tstamp =
@@ -386,8 +526,31 @@ static irqreturn_t xgbe_isr(int irq, void *data)
                                           &pdata->tx_tstamp_work);
                        }
                }
+
+               if (XGMAC_GET_BITS(mac_isr, MAC_ISR, SMI)) {
+                       mac_mdioisr = XGMAC_IOREAD(pdata, MAC_MDIOISR);
+
+                       netif_dbg(pdata, intr, pdata->netdev,
+                                 "MAC_MDIOISR=%#010x\n", mac_mdioisr);
+
+                       if (XGMAC_GET_BITS(mac_mdioisr, MAC_MDIOISR,
+                                          SNGLCOMPINT))
+                               complete(&pdata->mdio_complete);
+               }
        }
 
+       /* If there is not a separate AN irq, handle it here */
+       if (pdata->dev_irq == pdata->an_irq)
+               pdata->phy_if.an_isr(irq, pdata);
+
+       /* If there is not a separate ECC irq, handle it here */
+       if (pdata->vdata->ecc_support && (pdata->dev_irq == pdata->ecc_irq))
+               xgbe_ecc_isr(irq, pdata);
+
+       /* If there is not a separate I2C irq, handle it here */
+       if (pdata->vdata->i2c_support && (pdata->dev_irq == pdata->i2c_irq))
+               pdata->i2c_if.i2c_isr(irq, pdata);
+
 isr_done:
        return IRQ_HANDLED;
 }
@@ -395,18 +558,29 @@ isr_done:
 static irqreturn_t xgbe_dma_isr(int irq, void *data)
 {
        struct xgbe_channel *channel = data;
+       struct xgbe_prv_data *pdata = channel->pdata;
+       unsigned int dma_status;
 
        /* Per channel DMA interrupts are enabled, so we use the per
         * channel napi structure and not the private data napi structure
         */
        if (napi_schedule_prep(&channel->napi)) {
                /* Disable Tx and Rx interrupts */
-               disable_irq_nosync(channel->dma_irq);
+               if (pdata->channel_irq_mode)
+                       xgbe_disable_rx_tx_int(pdata, channel);
+               else
+                       disable_irq_nosync(channel->dma_irq);
 
                /* Turn on polling */
                __napi_schedule_irqoff(&channel->napi);
        }
 
+       /* Clear Tx/Rx signals */
+       dma_status = 0;
+       XGMAC_SET_BITS(dma_status, DMA_CH_SR, TI, 1);
+       XGMAC_SET_BITS(dma_status, DMA_CH_SR, RI, 1);
+       XGMAC_DMA_IOWRITE(channel, DMA_CH_SR, dma_status);
+
        return IRQ_HANDLED;
 }
 
@@ -423,7 +597,10 @@ static void xgbe_tx_timer(unsigned long data)
        if (napi_schedule_prep(napi)) {
                /* Disable Tx and Rx interrupts */
                if (pdata->per_channel_irq)
-                       disable_irq_nosync(channel->dma_irq);
+                       if (pdata->channel_irq_mode)
+                               xgbe_disable_rx_tx_int(pdata, channel);
+                       else
+                               disable_irq_nosync(channel->dma_irq);
                else
                        xgbe_disable_rx_tx_ints(pdata);
 
@@ -590,6 +767,10 @@ void xgbe_get_all_hw_features(struct xgbe_prv_data *pdata)
        hw_feat->tx_ch_cnt++;
        hw_feat->tc_cnt++;
 
+       /* Translate the fifo sizes into actual numbers */
+       hw_feat->rx_fifo_size = 1 << (hw_feat->rx_fifo_size + 7);
+       hw_feat->tx_fifo_size = 1 << (hw_feat->tx_fifo_size + 7);
+
        DBGPR("<--xgbe_get_all_hw_features\n");
 }
 
@@ -652,6 +833,16 @@ static int xgbe_request_irqs(struct xgbe_prv_data *pdata)
                return ret;
        }
 
+       if (pdata->vdata->ecc_support && (pdata->dev_irq != pdata->ecc_irq)) {
+               ret = devm_request_irq(pdata->dev, pdata->ecc_irq, xgbe_ecc_isr,
+                                      0, pdata->ecc_name, pdata);
+               if (ret) {
+                       netdev_alert(netdev, "error requesting ecc irq %d\n",
+                                    pdata->ecc_irq);
+                       goto err_dev_irq;
+               }
+       }
+
        if (!pdata->per_channel_irq)
                return 0;
 
@@ -668,17 +859,21 @@ static int xgbe_request_irqs(struct xgbe_prv_data *pdata)
                if (ret) {
                        netdev_alert(netdev, "error requesting irq %d\n",
                                     channel->dma_irq);
-                       goto err_irq;
+                       goto err_dma_irq;
                }
        }
 
        return 0;
 
-err_irq:
+err_dma_irq:
        /* Using an unsigned int, 'i' will go to UINT_MAX and exit */
        for (i--, channel--; i < pdata->channel_count; i--, channel--)
                devm_free_irq(pdata->dev, channel->dma_irq, channel);
 
+       if (pdata->vdata->ecc_support && (pdata->dev_irq != pdata->ecc_irq))
+               devm_free_irq(pdata->dev, pdata->ecc_irq, pdata);
+
+err_dev_irq:
        devm_free_irq(pdata->dev, pdata->dev_irq, pdata);
 
        return ret;
@@ -691,6 +886,9 @@ static void xgbe_free_irqs(struct xgbe_prv_data *pdata)
 
        devm_free_irq(pdata->dev, pdata->dev_irq, pdata);
 
+       if (pdata->vdata->ecc_support && (pdata->dev_irq != pdata->ecc_irq))
+               devm_free_irq(pdata->dev, pdata->ecc_irq, pdata);
+
        if (!pdata->per_channel_irq)
                return;
 
@@ -778,7 +976,7 @@ static void xgbe_free_rx_data(struct xgbe_prv_data *pdata)
        DBGPR("<--xgbe_free_rx_data\n");
 }
 
-static int xgbe_phy_init(struct xgbe_prv_data *pdata)
+static int xgbe_phy_reset(struct xgbe_prv_data *pdata)
 {
        pdata->phy_link = -1;
        pdata->phy_speed = SPEED_UNKNOWN;
@@ -874,16 +1072,16 @@ static int xgbe_start(struct xgbe_prv_data *pdata)
 
        hw_if->init(pdata);
 
-       ret = phy_if->phy_start(pdata);
-       if (ret)
-               goto err_phy;
-
        xgbe_napi_enable(pdata, 1);
 
        ret = xgbe_request_irqs(pdata);
        if (ret)
                goto err_napi;
 
+       ret = phy_if->phy_start(pdata);
+       if (ret)
+               goto err_irqs;
+
        hw_if->enable_tx(pdata);
        hw_if->enable_rx(pdata);
 
@@ -892,16 +1090,18 @@ static int xgbe_start(struct xgbe_prv_data *pdata)
        xgbe_start_timers(pdata);
        queue_work(pdata->dev_workqueue, &pdata->service_work);
 
+       clear_bit(XGBE_STOPPED, &pdata->dev_state);
+
        DBGPR("<--xgbe_start\n");
 
        return 0;
 
+err_irqs:
+       xgbe_free_irqs(pdata);
+
 err_napi:
        xgbe_napi_disable(pdata, 1);
 
-       phy_if->phy_stop(pdata);
-
-err_phy:
        hw_if->exit(pdata);
 
        return ret;
@@ -918,6 +1118,9 @@ static void xgbe_stop(struct xgbe_prv_data *pdata)
 
        DBGPR("-->xgbe_stop\n");
 
+       if (test_bit(XGBE_STOPPED, &pdata->dev_state))
+               return;
+
        netif_tx_stop_all_queues(netdev);
 
        xgbe_stop_timers(pdata);
@@ -943,9 +1146,29 @@ static void xgbe_stop(struct xgbe_prv_data *pdata)
                netdev_tx_reset_queue(txq);
        }
 
+       set_bit(XGBE_STOPPED, &pdata->dev_state);
+
        DBGPR("<--xgbe_stop\n");
 }
 
+static void xgbe_stopdev(struct work_struct *work)
+{
+       struct xgbe_prv_data *pdata = container_of(work,
+                                                  struct xgbe_prv_data,
+                                                  stopdev_work);
+
+       rtnl_lock();
+
+       xgbe_stop(pdata);
+
+       xgbe_free_tx_data(pdata);
+       xgbe_free_rx_data(pdata);
+
+       rtnl_unlock();
+
+       netdev_alert(pdata->netdev, "device stopped\n");
+}
+
 static void xgbe_restart_dev(struct xgbe_prv_data *pdata)
 {
        DBGPR("-->xgbe_restart_dev\n");
@@ -1292,8 +1515,8 @@ static int xgbe_open(struct net_device *netdev)
 
        DBGPR("-->xgbe_open\n");
 
-       /* Initialize the phy */
-       ret = xgbe_phy_init(pdata);
+       /* Reset the phy settings */
+       ret = xgbe_phy_reset(pdata);
        if (ret)
                return ret;
 
@@ -1328,6 +1551,7 @@ static int xgbe_open(struct net_device *netdev)
 
        INIT_WORK(&pdata->service_work, xgbe_service);
        INIT_WORK(&pdata->restart_work, xgbe_restart);
+       INIT_WORK(&pdata->stopdev_work, xgbe_stopdev);
        INIT_WORK(&pdata->tx_tstamp_work, xgbe_tx_tstamp);
        xgbe_init_timers(pdata);
 
@@ -2036,6 +2260,7 @@ static int xgbe_one_poll(struct napi_struct *napi, int budget)
 {
        struct xgbe_channel *channel = container_of(napi, struct xgbe_channel,
                                                    napi);
+       struct xgbe_prv_data *pdata = channel->pdata;
        int processed = 0;
 
        DBGPR("-->xgbe_one_poll: budget=%d\n", budget);
@@ -2052,7 +2277,10 @@ static int xgbe_one_poll(struct napi_struct *napi, int budget)
                napi_complete_done(napi, processed);
 
                /* Enable Tx and Rx interrupts */
-               enable_irq(channel->dma_irq);
+               if (pdata->channel_irq_mode)
+                       xgbe_enable_rx_tx_int(pdata, channel);
+               else
+                       enable_irq(channel->dma_irq);
        }
 
        DBGPR("<--xgbe_one_poll: received = %d\n", processed);
index 4007b429c80c4a962c628199623028f32ac59792..920566a3a5996c3032e325e42ccd4736844a11de 100644 (file)
@@ -272,97 +272,86 @@ static int xgbe_set_pauseparam(struct net_device *netdev,
        return ret;
 }
 
-static int xgbe_get_settings(struct net_device *netdev,
-                            struct ethtool_cmd *cmd)
+static int xgbe_get_link_ksettings(struct net_device *netdev,
+                                  struct ethtool_link_ksettings *cmd)
 {
        struct xgbe_prv_data *pdata = netdev_priv(netdev);
 
-       cmd->phy_address = pdata->phy.address;
+       cmd->base.phy_address = pdata->phy.address;
 
-       cmd->supported = pdata->phy.supported;
-       cmd->advertising = pdata->phy.advertising;
-       cmd->lp_advertising = pdata->phy.lp_advertising;
+       ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+                                               pdata->phy.supported);
+       ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
+                                               pdata->phy.advertising);
+       ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.lp_advertising,
+                                               pdata->phy.lp_advertising);
 
-       cmd->autoneg = pdata->phy.autoneg;
-       ethtool_cmd_speed_set(cmd, pdata->phy.speed);
-       cmd->duplex = pdata->phy.duplex;
+       cmd->base.autoneg = pdata->phy.autoneg;
+       cmd->base.speed = pdata->phy.speed;
+       cmd->base.duplex = pdata->phy.duplex;
 
-       cmd->port = PORT_NONE;
-       cmd->transceiver = XCVR_INTERNAL;
+       cmd->base.port = PORT_NONE;
 
        return 0;
 }
 
-static int xgbe_set_settings(struct net_device *netdev,
-                            struct ethtool_cmd *cmd)
+static int xgbe_set_link_ksettings(struct net_device *netdev,
+                                  const struct ethtool_link_ksettings *cmd)
 {
        struct xgbe_prv_data *pdata = netdev_priv(netdev);
+       u32 advertising;
        u32 speed;
        int ret;
 
-       speed = ethtool_cmd_speed(cmd);
+       speed = cmd->base.speed;
 
-       if (cmd->phy_address != pdata->phy.address) {
+       if (cmd->base.phy_address != pdata->phy.address) {
                netdev_err(netdev, "invalid phy address %hhu\n",
-                          cmd->phy_address);
+                          cmd->base.phy_address);
                return -EINVAL;
        }
 
-       if ((cmd->autoneg != AUTONEG_ENABLE) &&
-           (cmd->autoneg != AUTONEG_DISABLE)) {
+       if ((cmd->base.autoneg != AUTONEG_ENABLE) &&
+           (cmd->base.autoneg != AUTONEG_DISABLE)) {
                netdev_err(netdev, "unsupported autoneg %hhu\n",
-                          cmd->autoneg);
+                          cmd->base.autoneg);
                return -EINVAL;
        }
 
-       if (cmd->autoneg == AUTONEG_DISABLE) {
-               switch (speed) {
-               case SPEED_10000:
-                       break;
-               case SPEED_2500:
-                       if (pdata->speed_set != XGBE_SPEEDSET_2500_10000) {
-                               netdev_err(netdev, "unsupported speed %u\n",
-                                          speed);
-                               return -EINVAL;
-                       }
-                       break;
-               case SPEED_1000:
-                       if (pdata->speed_set != XGBE_SPEEDSET_1000_10000) {
-                               netdev_err(netdev, "unsupported speed %u\n",
-                                          speed);
-                               return -EINVAL;
-                       }
-                       break;
-               default:
+       if (cmd->base.autoneg == AUTONEG_DISABLE) {
+               if (!pdata->phy_if.phy_valid_speed(pdata, speed)) {
                        netdev_err(netdev, "unsupported speed %u\n", speed);
                        return -EINVAL;
                }
 
-               if (cmd->duplex != DUPLEX_FULL) {
+               if (cmd->base.duplex != DUPLEX_FULL) {
                        netdev_err(netdev, "unsupported duplex %hhu\n",
-                                  cmd->duplex);
+                                  cmd->base.duplex);
                        return -EINVAL;
                }
        }
 
+       ethtool_convert_link_mode_to_legacy_u32(&advertising,
+                                               cmd->link_modes.advertising);
+
        netif_dbg(pdata, link, netdev,
                  "requested advertisement %#x, phy supported %#x\n",
-                 cmd->advertising, pdata->phy.supported);
+                 advertising, pdata->phy.supported);
 
-       cmd->advertising &= pdata->phy.supported;
-       if ((cmd->autoneg == AUTONEG_ENABLE) && !cmd->advertising) {
+       advertising &= pdata->phy.supported;
+       if ((cmd->base.autoneg == AUTONEG_ENABLE) && !advertising) {
                netdev_err(netdev,
                           "unsupported requested advertisement\n");
                return -EINVAL;
        }
 
        ret = 0;
-       pdata->phy.autoneg = cmd->autoneg;
+       pdata->phy.autoneg = cmd->base.autoneg;
        pdata->phy.speed = speed;
-       pdata->phy.duplex = cmd->duplex;
-       pdata->phy.advertising = cmd->advertising;
+       pdata->phy.duplex = cmd->base.duplex;
+       pdata->phy.advertising = advertising;
 
-       if (cmd->autoneg == AUTONEG_ENABLE)
+       if (cmd->base.autoneg == AUTONEG_ENABLE)
                pdata->phy.advertising |= ADVERTISED_Autoneg;
        else
                pdata->phy.advertising &= ~ADVERTISED_Autoneg;
@@ -602,8 +591,6 @@ static int xgbe_get_ts_info(struct net_device *netdev,
 }
 
 static const struct ethtool_ops xgbe_ethtool_ops = {
-       .get_settings = xgbe_get_settings,
-       .set_settings = xgbe_set_settings,
        .get_drvinfo = xgbe_get_drvinfo,
        .get_msglevel = xgbe_get_msglevel,
        .set_msglevel = xgbe_set_msglevel,
@@ -621,6 +608,8 @@ static const struct ethtool_ops xgbe_ethtool_ops = {
        .get_rxfh = xgbe_get_rxfh,
        .set_rxfh = xgbe_set_rxfh,
        .get_ts_info = xgbe_get_ts_info,
+       .get_link_ksettings = xgbe_get_link_ksettings,
+       .set_link_ksettings = xgbe_set_link_ksettings,
 };
 
 const struct ethtool_ops *xgbe_get_ethtool_ops(void)
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c b/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c
new file mode 100644 (file)
index 0000000..0c7088a
--- /dev/null
@@ -0,0 +1,492 @@
+/*
+ * AMD 10Gb Ethernet driver
+ *
+ * This file is available to you under your choice of the following two
+ * licenses:
+ *
+ * License 1: GPLv2
+ *
+ * Copyright (c) 2016 Advanced Micro Devices, Inc.
+ *
+ * This file is free software; you may copy, redistribute and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or (at
+ * your option) any later version.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * This file incorporates work covered by the following copyright and
+ * permission notice:
+ *     The Synopsys DWC ETHER XGMAC Software Driver and documentation
+ *     (hereinafter "Software") is an unsupported proprietary work of Synopsys,
+ *     Inc. unless otherwise expressly agreed to in writing between Synopsys
+ *     and you.
+ *
+ *     The Software IS NOT an item of Licensed Software or Licensed Product
+ *     under any End User Software License Agreement or Agreement for Licensed
+ *     Product with Synopsys or any supplement thereto.  Permission is hereby
+ *     granted, free of charge, to any person obtaining a copy of this software
+ *     annotated with this license and the Software, to deal in the Software
+ *     without restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ *     of the Software, and to permit persons to whom the Software is furnished
+ *     to do so, subject to the following conditions:
+ *
+ *     The above copyright notice and this permission notice shall be included
+ *     in all copies or substantial portions of the Software.
+ *
+ *     THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
+ *     BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ *     TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ *     PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
+ *     BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ *     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ *     THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ * License 2: Modified BSD
+ *
+ * Copyright (c) 2016 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Advanced Micro Devices, Inc. nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * This file incorporates work covered by the following copyright and
+ * permission notice:
+ *     The Synopsys DWC ETHER XGMAC Software Driver and documentation
+ *     (hereinafter "Software") is an unsupported proprietary work of Synopsys,
+ *     Inc. unless otherwise expressly agreed to in writing between Synopsys
+ *     and you.
+ *
+ *     The Software IS NOT an item of Licensed Software or Licensed Product
+ *     under any End User Software License Agreement or Agreement for Licensed
+ *     Product with Synopsys or any supplement thereto.  Permission is hereby
+ *     granted, free of charge, to any person obtaining a copy of this software
+ *     annotated with this license and the Software, to deal in the Software
+ *     without restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ *     of the Software, and to permit persons to whom the Software is furnished
+ *     to do so, subject to the following conditions:
+ *
+ *     The above copyright notice and this permission notice shall be included
+ *     in all copies or substantial portions of the Software.
+ *
+ *     THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
+ *     BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ *     TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ *     PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
+ *     BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ *     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ *     THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/module.h>
+#include <linux/kmod.h>
+#include <linux/delay.h>
+#include <linux/completion.h>
+#include <linux/mutex.h>
+
+#include "xgbe.h"
+#include "xgbe-common.h"
+
+#define XGBE_ABORT_COUNT       500
+#define XGBE_DISABLE_COUNT     1000
+
+#define XGBE_STD_SPEED         1
+
+#define XGBE_INTR_RX_FULL      BIT(IC_RAW_INTR_STAT_RX_FULL_INDEX)
+#define XGBE_INTR_TX_EMPTY     BIT(IC_RAW_INTR_STAT_TX_EMPTY_INDEX)
+#define XGBE_INTR_TX_ABRT      BIT(IC_RAW_INTR_STAT_TX_ABRT_INDEX)
+#define XGBE_INTR_STOP_DET     BIT(IC_RAW_INTR_STAT_STOP_DET_INDEX)
+#define XGBE_DEFAULT_INT_MASK  (XGBE_INTR_RX_FULL  |   \
+                                XGBE_INTR_TX_EMPTY |   \
+                                XGBE_INTR_TX_ABRT  |   \
+                                XGBE_INTR_STOP_DET)
+
+#define XGBE_I2C_READ          BIT(8)
+#define XGBE_I2C_STOP          BIT(9)
+
+static int xgbe_i2c_abort(struct xgbe_prv_data *pdata)
+{
+       unsigned int wait = XGBE_ABORT_COUNT;
+
+       /* Must be enabled to recognize the abort request */
+       XI2C_IOWRITE_BITS(pdata, IC_ENABLE, EN, 1);
+
+       /* Issue the abort */
+       XI2C_IOWRITE_BITS(pdata, IC_ENABLE, ABORT, 1);
+
+       while (wait--) {
+               if (!XI2C_IOREAD_BITS(pdata, IC_ENABLE, ABORT))
+                       return 0;
+
+               usleep_range(500, 600);
+       }
+
+       return -EBUSY;
+}
+
+static int xgbe_i2c_set_enable(struct xgbe_prv_data *pdata, bool enable)
+{
+       unsigned int wait = XGBE_DISABLE_COUNT;
+       unsigned int mode = enable ? 1 : 0;
+
+       while (wait--) {
+               XI2C_IOWRITE_BITS(pdata, IC_ENABLE, EN, mode);
+               if (XI2C_IOREAD_BITS(pdata, IC_ENABLE_STATUS, EN) == mode)
+                       return 0;
+
+               usleep_range(100, 110);
+       }
+
+       return -EBUSY;
+}
+
+static int xgbe_i2c_disable(struct xgbe_prv_data *pdata)
+{
+       unsigned int ret;
+
+       ret = xgbe_i2c_set_enable(pdata, false);
+       if (ret) {
+               /* Disable failed, try an abort */
+               ret = xgbe_i2c_abort(pdata);
+               if (ret)
+                       return ret;
+
+               /* Abort succeeded, try to disable again */
+               ret = xgbe_i2c_set_enable(pdata, false);
+       }
+
+       return ret;
+}
+
+static int xgbe_i2c_enable(struct xgbe_prv_data *pdata)
+{
+       return xgbe_i2c_set_enable(pdata, true);
+}
+
+static void xgbe_i2c_clear_all_interrupts(struct xgbe_prv_data *pdata)
+{
+       XI2C_IOREAD(pdata, IC_CLR_INTR);
+}
+
+static void xgbe_i2c_disable_interrupts(struct xgbe_prv_data *pdata)
+{
+       XI2C_IOWRITE(pdata, IC_INTR_MASK, 0);
+}
+
+static void xgbe_i2c_enable_interrupts(struct xgbe_prv_data *pdata)
+{
+       XI2C_IOWRITE(pdata, IC_INTR_MASK, XGBE_DEFAULT_INT_MASK);
+}
+
+static void xgbe_i2c_write(struct xgbe_prv_data *pdata)
+{
+       struct xgbe_i2c_op_state *state = &pdata->i2c.op_state;
+       unsigned int tx_slots;
+       unsigned int cmd;
+
+       /* Configured to never receive Rx overflows, so fill up Tx fifo */
+       tx_slots = pdata->i2c.tx_fifo_size - XI2C_IOREAD(pdata, IC_TXFLR);
+       while (tx_slots && state->tx_len) {
+               if (state->op->cmd == XGBE_I2C_CMD_READ)
+                       cmd = XGBE_I2C_READ;
+               else
+                       cmd = *state->tx_buf++;
+
+               if (state->tx_len == 1)
+                       XI2C_SET_BITS(cmd, IC_DATA_CMD, STOP, 1);
+
+               XI2C_IOWRITE(pdata, IC_DATA_CMD, cmd);
+
+               tx_slots--;
+               state->tx_len--;
+       }
+
+       /* No more Tx operations, so ignore TX_EMPTY and return */
+       if (!state->tx_len)
+               XI2C_IOWRITE_BITS(pdata, IC_INTR_MASK, TX_EMPTY, 0);
+}
+
+static void xgbe_i2c_read(struct xgbe_prv_data *pdata)
+{
+       struct xgbe_i2c_op_state *state = &pdata->i2c.op_state;
+       unsigned int rx_slots;
+
+       /* Anything to be read? */
+       if (state->op->cmd != XGBE_I2C_CMD_READ)
+               return;
+
+       rx_slots = XI2C_IOREAD(pdata, IC_RXFLR);
+       while (rx_slots && state->rx_len) {
+               *state->rx_buf++ = XI2C_IOREAD(pdata, IC_DATA_CMD);
+               state->rx_len--;
+               rx_slots--;
+       }
+}
+
+static void xgbe_i2c_clear_isr_interrupts(struct xgbe_prv_data *pdata,
+                                         unsigned int isr)
+{
+       struct xgbe_i2c_op_state *state = &pdata->i2c.op_state;
+
+       if (isr & XGBE_INTR_TX_ABRT) {
+               state->tx_abort_source = XI2C_IOREAD(pdata, IC_TX_ABRT_SOURCE);
+               XI2C_IOREAD(pdata, IC_CLR_TX_ABRT);
+       }
+
+       if (isr & XGBE_INTR_STOP_DET)
+               XI2C_IOREAD(pdata, IC_CLR_STOP_DET);
+}
+
+static irqreturn_t xgbe_i2c_isr(int irq, void *data)
+{
+       struct xgbe_prv_data *pdata = (struct xgbe_prv_data *)data;
+       struct xgbe_i2c_op_state *state = &pdata->i2c.op_state;
+       unsigned int isr;
+
+       isr = XI2C_IOREAD(pdata, IC_RAW_INTR_STAT);
+       netif_dbg(pdata, intr, pdata->netdev,
+                 "I2C interrupt received: status=%#010x\n", isr);
+
+       xgbe_i2c_clear_isr_interrupts(pdata, isr);
+
+       if (isr & XGBE_INTR_TX_ABRT) {
+               netif_dbg(pdata, link, pdata->netdev,
+                         "I2C TX_ABRT received (%#010x) for target %#04x\n",
+                         state->tx_abort_source, state->op->target);
+
+               xgbe_i2c_disable_interrupts(pdata);
+
+               state->ret = -EIO;
+               goto out;
+       }
+
+       /* Check for data in the Rx fifo */
+       xgbe_i2c_read(pdata);
+
+       /* Fill up the Tx fifo next */
+       xgbe_i2c_write(pdata);
+
+out:
+       /* Complete on an error or STOP condition */
+       if (state->ret || XI2C_GET_BITS(isr, IC_RAW_INTR_STAT, STOP_DET))
+               complete(&pdata->i2c_complete);
+
+       return IRQ_HANDLED;
+}
+
+static void xgbe_i2c_set_mode(struct xgbe_prv_data *pdata)
+{
+       unsigned int reg;
+
+       reg = XI2C_IOREAD(pdata, IC_CON);
+       XI2C_SET_BITS(reg, IC_CON, MASTER_MODE, 1);
+       XI2C_SET_BITS(reg, IC_CON, SLAVE_DISABLE, 1);
+       XI2C_SET_BITS(reg, IC_CON, RESTART_EN, 1);
+       XI2C_SET_BITS(reg, IC_CON, SPEED, XGBE_STD_SPEED);
+       XI2C_SET_BITS(reg, IC_CON, RX_FIFO_FULL_HOLD, 1);
+       XI2C_IOWRITE(pdata, IC_CON, reg);
+}
+
+static void xgbe_i2c_get_features(struct xgbe_prv_data *pdata)
+{
+       struct xgbe_i2c *i2c = &pdata->i2c;
+       unsigned int reg;
+
+       reg = XI2C_IOREAD(pdata, IC_COMP_PARAM_1);
+       i2c->max_speed_mode = XI2C_GET_BITS(reg, IC_COMP_PARAM_1,
+                                           MAX_SPEED_MODE);
+       i2c->rx_fifo_size = XI2C_GET_BITS(reg, IC_COMP_PARAM_1,
+                                         RX_BUFFER_DEPTH);
+       i2c->tx_fifo_size = XI2C_GET_BITS(reg, IC_COMP_PARAM_1,
+                                         TX_BUFFER_DEPTH);
+
+       if (netif_msg_probe(pdata))
+               dev_dbg(pdata->dev, "I2C features: %s=%u, %s=%u, %s=%u\n",
+                       "MAX_SPEED_MODE", i2c->max_speed_mode,
+                       "RX_BUFFER_DEPTH", i2c->rx_fifo_size,
+                       "TX_BUFFER_DEPTH", i2c->tx_fifo_size);
+}
+
+static void xgbe_i2c_set_target(struct xgbe_prv_data *pdata, unsigned int addr)
+{
+       XI2C_IOWRITE(pdata, IC_TAR, addr);
+}
+
+static irqreturn_t xgbe_i2c_combined_isr(int irq, struct xgbe_prv_data *pdata)
+{
+       if (!XI2C_IOREAD(pdata, IC_RAW_INTR_STAT))
+               return IRQ_HANDLED;
+
+       return xgbe_i2c_isr(irq, pdata);
+}
+
+static int xgbe_i2c_xfer(struct xgbe_prv_data *pdata, struct xgbe_i2c_op *op)
+{
+       struct xgbe_i2c_op_state *state = &pdata->i2c.op_state;
+       int ret;
+
+       mutex_lock(&pdata->i2c_mutex);
+
+       reinit_completion(&pdata->i2c_complete);
+
+       ret = xgbe_i2c_disable(pdata);
+       if (ret) {
+               netdev_err(pdata->netdev, "failed to disable i2c master\n");
+               goto unlock;
+       }
+
+       xgbe_i2c_set_target(pdata, op->target);
+
+       memset(state, 0, sizeof(*state));
+       state->op = op;
+       state->tx_len = op->len;
+       state->tx_buf = op->buf;
+       state->rx_len = op->len;
+       state->rx_buf = op->buf;
+
+       xgbe_i2c_clear_all_interrupts(pdata);
+       ret = xgbe_i2c_enable(pdata);
+       if (ret) {
+               netdev_err(pdata->netdev, "failed to enable i2c master\n");
+               goto unlock;
+       }
+
+       /* Enabling the interrupts will cause the TX FIFO empty interrupt to
+        * fire and begin to process the command via the ISR.
+        */
+       xgbe_i2c_enable_interrupts(pdata);
+
+       if (!wait_for_completion_timeout(&pdata->i2c_complete, HZ)) {
+               netdev_err(pdata->netdev, "i2c operation timed out\n");
+               ret = -ETIMEDOUT;
+               goto disable;
+       }
+
+       ret = state->ret;
+       if (ret) {
+               if (state->tx_abort_source & IC_TX_ABRT_7B_ADDR_NOACK)
+                       ret = -ENOTCONN;
+               else if (state->tx_abort_source & IC_TX_ABRT_ARB_LOST)
+                       ret = -EAGAIN;
+       }
+
+disable:
+       xgbe_i2c_disable_interrupts(pdata);
+       xgbe_i2c_disable(pdata);
+
+unlock:
+       mutex_unlock(&pdata->i2c_mutex);
+
+       return ret;
+}
+
+static void xgbe_i2c_stop(struct xgbe_prv_data *pdata)
+{
+       if (!pdata->i2c.started)
+               return;
+
+       netif_dbg(pdata, link, pdata->netdev, "stopping I2C\n");
+
+       pdata->i2c.started = 0;
+
+       xgbe_i2c_disable_interrupts(pdata);
+       xgbe_i2c_disable(pdata);
+       xgbe_i2c_clear_all_interrupts(pdata);
+
+       if (pdata->dev_irq != pdata->i2c_irq)
+               devm_free_irq(pdata->dev, pdata->i2c_irq, pdata);
+}
+
+static int xgbe_i2c_start(struct xgbe_prv_data *pdata)
+{
+       int ret;
+
+       if (pdata->i2c.started)
+               return 0;
+
+       netif_dbg(pdata, link, pdata->netdev, "starting I2C\n");
+
+       /* If we have a separate I2C irq, enable it */
+       if (pdata->dev_irq != pdata->i2c_irq) {
+               ret = devm_request_irq(pdata->dev, pdata->i2c_irq,
+                                      xgbe_i2c_isr, 0, pdata->i2c_name,
+                                      pdata);
+               if (ret) {
+                       netdev_err(pdata->netdev, "i2c irq request failed\n");
+                       return ret;
+               }
+       }
+
+       pdata->i2c.started = 1;
+
+       return 0;
+}
+
+static int xgbe_i2c_init(struct xgbe_prv_data *pdata)
+{
+       int ret;
+
+       xgbe_i2c_disable_interrupts(pdata);
+
+       ret = xgbe_i2c_disable(pdata);
+       if (ret) {
+               dev_err(pdata->dev, "failed to disable i2c master\n");
+               return ret;
+       }
+
+       xgbe_i2c_get_features(pdata);
+
+       xgbe_i2c_set_mode(pdata);
+
+       xgbe_i2c_clear_all_interrupts(pdata);
+
+       return 0;
+}
+
+void xgbe_init_function_ptrs_i2c(struct xgbe_i2c_if *i2c_if)
+{
+       i2c_if->i2c_init                = xgbe_i2c_init;
+
+       i2c_if->i2c_start               = xgbe_i2c_start;
+       i2c_if->i2c_stop                = xgbe_i2c_stop;
+
+       i2c_if->i2c_xfer                = xgbe_i2c_xfer;
+
+       i2c_if->i2c_isr                 = xgbe_i2c_combined_isr;
+}
index 6997f1110ecebaf9e5e203c82146fd918acd01ad..b87a89988ffd60979a4b19508911f7910e1b3bbb 100644 (file)
 
 #include <linux/module.h>
 #include <linux/device.h>
-#include <linux/platform_device.h>
 #include <linux/spinlock.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 #include <linux/io.h>
-#include <linux/of.h>
-#include <linux/of_net.h>
-#include <linux/of_address.h>
-#include <linux/of_platform.h>
-#include <linux/clk.h>
-#include <linux/property.h>
-#include <linux/acpi.h>
-#include <linux/mdio.h>
 
 #include "xgbe.h"
 #include "xgbe-common.h"
@@ -145,42 +136,6 @@ MODULE_PARM_DESC(debug, " Network interface message level setting");
 static const u32 default_msg_level = (NETIF_MSG_LINK | NETIF_MSG_IFDOWN |
                                      NETIF_MSG_IFUP);
 
-static const u32 xgbe_serdes_blwc[] = {
-       XGBE_SPEED_1000_BLWC,
-       XGBE_SPEED_2500_BLWC,
-       XGBE_SPEED_10000_BLWC,
-};
-
-static const u32 xgbe_serdes_cdr_rate[] = {
-       XGBE_SPEED_1000_CDR,
-       XGBE_SPEED_2500_CDR,
-       XGBE_SPEED_10000_CDR,
-};
-
-static const u32 xgbe_serdes_pq_skew[] = {
-       XGBE_SPEED_1000_PQ,
-       XGBE_SPEED_2500_PQ,
-       XGBE_SPEED_10000_PQ,
-};
-
-static const u32 xgbe_serdes_tx_amp[] = {
-       XGBE_SPEED_1000_TXAMP,
-       XGBE_SPEED_2500_TXAMP,
-       XGBE_SPEED_10000_TXAMP,
-};
-
-static const u32 xgbe_serdes_dfe_tap_cfg[] = {
-       XGBE_SPEED_1000_DFE_TAP_CONFIG,
-       XGBE_SPEED_2500_DFE_TAP_CONFIG,
-       XGBE_SPEED_10000_DFE_TAP_CONFIG,
-};
-
-static const u32 xgbe_serdes_dfe_tap_ena[] = {
-       XGBE_SPEED_1000_DFE_TAP_ENABLE,
-       XGBE_SPEED_2500_DFE_TAP_ENABLE,
-       XGBE_SPEED_10000_DFE_TAP_ENABLE,
-};
-
 static void xgbe_default_config(struct xgbe_prv_data *pdata)
 {
        DBGPR("-->xgbe_default_config\n");
@@ -206,456 +161,124 @@ static void xgbe_init_all_fptrs(struct xgbe_prv_data *pdata)
 {
        xgbe_init_function_ptrs_dev(&pdata->hw_if);
        xgbe_init_function_ptrs_phy(&pdata->phy_if);
+       xgbe_init_function_ptrs_i2c(&pdata->i2c_if);
        xgbe_init_function_ptrs_desc(&pdata->desc_if);
-}
-
-#ifdef CONFIG_ACPI
-static int xgbe_acpi_support(struct xgbe_prv_data *pdata)
-{
-       struct device *dev = pdata->dev;
-       u32 property;
-       int ret;
-
-       /* Obtain the system clock setting */
-       ret = device_property_read_u32(dev, XGBE_ACPI_DMA_FREQ, &property);
-       if (ret) {
-               dev_err(dev, "unable to obtain %s property\n",
-                       XGBE_ACPI_DMA_FREQ);
-               return ret;
-       }
-       pdata->sysclk_rate = property;
-
-       /* Obtain the PTP clock setting */
-       ret = device_property_read_u32(dev, XGBE_ACPI_PTP_FREQ, &property);
-       if (ret) {
-               dev_err(dev, "unable to obtain %s property\n",
-                       XGBE_ACPI_PTP_FREQ);
-               return ret;
-       }
-       pdata->ptpclk_rate = property;
 
-       return 0;
+       pdata->vdata->init_function_ptrs_phy_impl(&pdata->phy_if);
 }
-#else   /* CONFIG_ACPI */
-static int xgbe_acpi_support(struct xgbe_prv_data *pdata)
-{
-       return -EINVAL;
-}
-#endif  /* CONFIG_ACPI */
 
-#ifdef CONFIG_OF
-static int xgbe_of_support(struct xgbe_prv_data *pdata)
-{
-       struct device *dev = pdata->dev;
-
-       /* Obtain the system clock setting */
-       pdata->sysclk = devm_clk_get(dev, XGBE_DMA_CLOCK);
-       if (IS_ERR(pdata->sysclk)) {
-               dev_err(dev, "dma devm_clk_get failed\n");
-               return PTR_ERR(pdata->sysclk);
-       }
-       pdata->sysclk_rate = clk_get_rate(pdata->sysclk);
-
-       /* Obtain the PTP clock setting */
-       pdata->ptpclk = devm_clk_get(dev, XGBE_PTP_CLOCK);
-       if (IS_ERR(pdata->ptpclk)) {
-               dev_err(dev, "ptp devm_clk_get failed\n");
-               return PTR_ERR(pdata->ptpclk);
-       }
-       pdata->ptpclk_rate = clk_get_rate(pdata->ptpclk);
-
-       return 0;
-}
-
-static struct platform_device *xgbe_of_get_phy_pdev(struct xgbe_prv_data *pdata)
-{
-       struct device *dev = pdata->dev;
-       struct device_node *phy_node;
-       struct platform_device *phy_pdev;
-
-       phy_node = of_parse_phandle(dev->of_node, "phy-handle", 0);
-       if (phy_node) {
-               /* Old style device tree:
-                *   The XGBE and PHY resources are separate
-                */
-               phy_pdev = of_find_device_by_node(phy_node);
-               of_node_put(phy_node);
-       } else {
-               /* New style device tree:
-                *   The XGBE and PHY resources are grouped together with
-                *   the PHY resources listed last
-                */
-               get_device(dev);
-               phy_pdev = pdata->pdev;
-       }
-
-       return phy_pdev;
-}
-#else   /* CONFIG_OF */
-static int xgbe_of_support(struct xgbe_prv_data *pdata)
-{
-       return -EINVAL;
-}
-
-static struct platform_device *xgbe_of_get_phy_pdev(struct xgbe_prv_data *pdata)
-{
-       return NULL;
-}
-#endif  /* CONFIG_OF */
-
-static unsigned int xgbe_resource_count(struct platform_device *pdev,
-                                       unsigned int type)
-{
-       unsigned int count;
-       int i;
-
-       for (i = 0, count = 0; i < pdev->num_resources; i++) {
-               struct resource *res = &pdev->resource[i];
-
-               if (type == resource_type(res))
-                       count++;
-       }
-
-       return count;
-}
-
-static struct platform_device *xgbe_get_phy_pdev(struct xgbe_prv_data *pdata)
-{
-       struct platform_device *phy_pdev;
-
-       if (pdata->use_acpi) {
-               get_device(pdata->dev);
-               phy_pdev = pdata->pdev;
-       } else {
-               phy_pdev = xgbe_of_get_phy_pdev(pdata);
-       }
-
-       return phy_pdev;
-}
-
-static int xgbe_probe(struct platform_device *pdev)
+struct xgbe_prv_data *xgbe_alloc_pdata(struct device *dev)
 {
        struct xgbe_prv_data *pdata;
        struct net_device *netdev;
-       struct device *dev = &pdev->dev, *phy_dev;
-       struct platform_device *phy_pdev;
-       struct resource *res;
-       const char *phy_mode;
-       unsigned int i, phy_memnum, phy_irqnum;
-       enum dev_dma_attr attr;
-       int ret;
-
-       DBGPR("--> xgbe_probe\n");
 
        netdev = alloc_etherdev_mq(sizeof(struct xgbe_prv_data),
                                   XGBE_MAX_DMA_CHANNELS);
        if (!netdev) {
-               dev_err(dev, "alloc_etherdev failed\n");
-               ret = -ENOMEM;
-               goto err_alloc;
+               dev_err(dev, "alloc_etherdev_mq failed\n");
+               return ERR_PTR(-ENOMEM);
        }
        SET_NETDEV_DEV(netdev, dev);
        pdata = netdev_priv(netdev);
        pdata->netdev = netdev;
-       pdata->pdev = pdev;
-       pdata->adev = ACPI_COMPANION(dev);
        pdata->dev = dev;
-       platform_set_drvdata(pdev, netdev);
 
        spin_lock_init(&pdata->lock);
        spin_lock_init(&pdata->xpcs_lock);
        mutex_init(&pdata->rss_mutex);
        spin_lock_init(&pdata->tstamp_lock);
+       mutex_init(&pdata->i2c_mutex);
+       init_completion(&pdata->i2c_complete);
+       init_completion(&pdata->mdio_complete);
 
        pdata->msg_enable = netif_msg_init(debug, default_msg_level);
 
        set_bit(XGBE_DOWN, &pdata->dev_state);
+       set_bit(XGBE_STOPPED, &pdata->dev_state);
 
-       /* Check if we should use ACPI or DT */
-       pdata->use_acpi = dev->of_node ? 0 : 1;
-
-       phy_pdev = xgbe_get_phy_pdev(pdata);
-       if (!phy_pdev) {
-               dev_err(dev, "unable to obtain phy device\n");
-               ret = -EINVAL;
-               goto err_phydev;
-       }
-       phy_dev = &phy_pdev->dev;
-
-       if (pdev == phy_pdev) {
-               /* New style device tree or ACPI:
-                *   The XGBE and PHY resources are grouped together with
-                *   the PHY resources listed last
-                */
-               phy_memnum = xgbe_resource_count(pdev, IORESOURCE_MEM) - 3;
-               phy_irqnum = xgbe_resource_count(pdev, IORESOURCE_IRQ) - 1;
-       } else {
-               /* Old style device tree:
-                *   The XGBE and PHY resources are separate
-                */
-               phy_memnum = 0;
-               phy_irqnum = 0;
-       }
-
-       /* Set and validate the number of descriptors for a ring */
-       BUILD_BUG_ON_NOT_POWER_OF_2(XGBE_TX_DESC_CNT);
-       pdata->tx_desc_count = XGBE_TX_DESC_CNT;
-       if (pdata->tx_desc_count & (pdata->tx_desc_count - 1)) {
-               dev_err(dev, "tx descriptor count (%d) is not valid\n",
-                       pdata->tx_desc_count);
-               ret = -EINVAL;
-               goto err_io;
-       }
-       BUILD_BUG_ON_NOT_POWER_OF_2(XGBE_RX_DESC_CNT);
-       pdata->rx_desc_count = XGBE_RX_DESC_CNT;
-       if (pdata->rx_desc_count & (pdata->rx_desc_count - 1)) {
-               dev_err(dev, "rx descriptor count (%d) is not valid\n",
-                       pdata->rx_desc_count);
-               ret = -EINVAL;
-               goto err_io;
-       }
-
-       /* Obtain the mmio areas for the device */
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       pdata->xgmac_regs = devm_ioremap_resource(dev, res);
-       if (IS_ERR(pdata->xgmac_regs)) {
-               dev_err(dev, "xgmac ioremap failed\n");
-               ret = PTR_ERR(pdata->xgmac_regs);
-               goto err_io;
-       }
-       if (netif_msg_probe(pdata))
-               dev_dbg(dev, "xgmac_regs = %p\n", pdata->xgmac_regs);
-
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-       pdata->xpcs_regs = devm_ioremap_resource(dev, res);
-       if (IS_ERR(pdata->xpcs_regs)) {
-               dev_err(dev, "xpcs ioremap failed\n");
-               ret = PTR_ERR(pdata->xpcs_regs);
-               goto err_io;
-       }
-       if (netif_msg_probe(pdata))
-               dev_dbg(dev, "xpcs_regs  = %p\n", pdata->xpcs_regs);
-
-       res = platform_get_resource(phy_pdev, IORESOURCE_MEM, phy_memnum++);
-       pdata->rxtx_regs = devm_ioremap_resource(dev, res);
-       if (IS_ERR(pdata->rxtx_regs)) {
-               dev_err(dev, "rxtx ioremap failed\n");
-               ret = PTR_ERR(pdata->rxtx_regs);
-               goto err_io;
-       }
-       if (netif_msg_probe(pdata))
-               dev_dbg(dev, "rxtx_regs  = %p\n", pdata->rxtx_regs);
-
-       res = platform_get_resource(phy_pdev, IORESOURCE_MEM, phy_memnum++);
-       pdata->sir0_regs = devm_ioremap_resource(dev, res);
-       if (IS_ERR(pdata->sir0_regs)) {
-               dev_err(dev, "sir0 ioremap failed\n");
-               ret = PTR_ERR(pdata->sir0_regs);
-               goto err_io;
-       }
-       if (netif_msg_probe(pdata))
-               dev_dbg(dev, "sir0_regs  = %p\n", pdata->sir0_regs);
-
-       res = platform_get_resource(phy_pdev, IORESOURCE_MEM, phy_memnum++);
-       pdata->sir1_regs = devm_ioremap_resource(dev, res);
-       if (IS_ERR(pdata->sir1_regs)) {
-               dev_err(dev, "sir1 ioremap failed\n");
-               ret = PTR_ERR(pdata->sir1_regs);
-               goto err_io;
-       }
-       if (netif_msg_probe(pdata))
-               dev_dbg(dev, "sir1_regs  = %p\n", pdata->sir1_regs);
-
-       /* Retrieve the MAC address */
-       ret = device_property_read_u8_array(dev, XGBE_MAC_ADDR_PROPERTY,
-                                           pdata->mac_addr,
-                                           sizeof(pdata->mac_addr));
-       if (ret || !is_valid_ether_addr(pdata->mac_addr)) {
-               dev_err(dev, "invalid %s property\n", XGBE_MAC_ADDR_PROPERTY);
-               if (!ret)
-                       ret = -EINVAL;
-               goto err_io;
-       }
-
-       /* Retrieve the PHY mode - it must be "xgmii" */
-       ret = device_property_read_string(dev, XGBE_PHY_MODE_PROPERTY,
-                                         &phy_mode);
-       if (ret || strcmp(phy_mode, phy_modes(PHY_INTERFACE_MODE_XGMII))) {
-               dev_err(dev, "invalid %s property\n", XGBE_PHY_MODE_PROPERTY);
-               if (!ret)
-                       ret = -EINVAL;
-               goto err_io;
-       }
-       pdata->phy_mode = PHY_INTERFACE_MODE_XGMII;
+       return pdata;
+}
 
-       /* Check for per channel interrupt support */
-       if (device_property_present(dev, XGBE_DMA_IRQS_PROPERTY))
-               pdata->per_channel_irq = 1;
+void xgbe_free_pdata(struct xgbe_prv_data *pdata)
+{
+       struct net_device *netdev = pdata->netdev;
 
-       /* Retrieve the PHY speedset */
-       ret = device_property_read_u32(phy_dev, XGBE_SPEEDSET_PROPERTY,
-                                      &pdata->speed_set);
-       if (ret) {
-               dev_err(dev, "invalid %s property\n", XGBE_SPEEDSET_PROPERTY);
-               goto err_io;
-       }
+       free_netdev(netdev);
+}
 
-       switch (pdata->speed_set) {
-       case XGBE_SPEEDSET_1000_10000:
-       case XGBE_SPEEDSET_2500_10000:
-               break;
-       default:
-               dev_err(dev, "invalid %s property\n", XGBE_SPEEDSET_PROPERTY);
-               ret = -EINVAL;
-               goto err_io;
-       }
+void xgbe_set_counts(struct xgbe_prv_data *pdata)
+{
+       /* Set all the function pointers */
+       xgbe_init_all_fptrs(pdata);
 
-       /* Retrieve the PHY configuration properties */
-       if (device_property_present(phy_dev, XGBE_BLWC_PROPERTY)) {
-               ret = device_property_read_u32_array(phy_dev,
-                                                    XGBE_BLWC_PROPERTY,
-                                                    pdata->serdes_blwc,
-                                                    XGBE_SPEEDS);
-               if (ret) {
-                       dev_err(dev, "invalid %s property\n",
-                               XGBE_BLWC_PROPERTY);
-                       goto err_io;
-               }
-       } else {
-               memcpy(pdata->serdes_blwc, xgbe_serdes_blwc,
-                      sizeof(pdata->serdes_blwc));
-       }
+       /* Populate the hardware features */
+       xgbe_get_all_hw_features(pdata);
 
-       if (device_property_present(phy_dev, XGBE_CDR_RATE_PROPERTY)) {
-               ret = device_property_read_u32_array(phy_dev,
-                                                    XGBE_CDR_RATE_PROPERTY,
-                                                    pdata->serdes_cdr_rate,
-                                                    XGBE_SPEEDS);
-               if (ret) {
-                       dev_err(dev, "invalid %s property\n",
-                               XGBE_CDR_RATE_PROPERTY);
-                       goto err_io;
-               }
-       } else {
-               memcpy(pdata->serdes_cdr_rate, xgbe_serdes_cdr_rate,
-                      sizeof(pdata->serdes_cdr_rate));
-       }
+       /* Set default max values if not provided */
+       if (!pdata->tx_max_channel_count)
+               pdata->tx_max_channel_count = pdata->hw_feat.tx_ch_cnt;
+       if (!pdata->rx_max_channel_count)
+               pdata->rx_max_channel_count = pdata->hw_feat.rx_ch_cnt;
 
-       if (device_property_present(phy_dev, XGBE_PQ_SKEW_PROPERTY)) {
-               ret = device_property_read_u32_array(phy_dev,
-                                                    XGBE_PQ_SKEW_PROPERTY,
-                                                    pdata->serdes_pq_skew,
-                                                    XGBE_SPEEDS);
-               if (ret) {
-                       dev_err(dev, "invalid %s property\n",
-                               XGBE_PQ_SKEW_PROPERTY);
-                       goto err_io;
-               }
-       } else {
-               memcpy(pdata->serdes_pq_skew, xgbe_serdes_pq_skew,
-                      sizeof(pdata->serdes_pq_skew));
-       }
+       if (!pdata->tx_max_q_count)
+               pdata->tx_max_q_count = pdata->hw_feat.tx_q_cnt;
+       if (!pdata->rx_max_q_count)
+               pdata->rx_max_q_count = pdata->hw_feat.rx_q_cnt;
 
-       if (device_property_present(phy_dev, XGBE_TX_AMP_PROPERTY)) {
-               ret = device_property_read_u32_array(phy_dev,
-                                                    XGBE_TX_AMP_PROPERTY,
-                                                    pdata->serdes_tx_amp,
-                                                    XGBE_SPEEDS);
-               if (ret) {
-                       dev_err(dev, "invalid %s property\n",
-                               XGBE_TX_AMP_PROPERTY);
-                       goto err_io;
-               }
-       } else {
-               memcpy(pdata->serdes_tx_amp, xgbe_serdes_tx_amp,
-                      sizeof(pdata->serdes_tx_amp));
-       }
+       /* Calculate the number of Tx and Rx rings to be created
+        *  -Tx (DMA) Channels map 1-to-1 to Tx Queues so set
+        *   the number of Tx queues to the number of Tx channels
+        *   enabled
+        *  -Rx (DMA) Channels do not map 1-to-1 so use the actual
+        *   number of Rx queues or maximum allowed
+        */
+       pdata->tx_ring_count = min_t(unsigned int, num_online_cpus(),
+                                    pdata->hw_feat.tx_ch_cnt);
+       pdata->tx_ring_count = min_t(unsigned int, pdata->tx_ring_count,
+                                    pdata->tx_max_channel_count);
+       pdata->tx_ring_count = min_t(unsigned int, pdata->tx_ring_count,
+                                    pdata->tx_max_q_count);
 
-       if (device_property_present(phy_dev, XGBE_DFE_CFG_PROPERTY)) {
-               ret = device_property_read_u32_array(phy_dev,
-                                                    XGBE_DFE_CFG_PROPERTY,
-                                                    pdata->serdes_dfe_tap_cfg,
-                                                    XGBE_SPEEDS);
-               if (ret) {
-                       dev_err(dev, "invalid %s property\n",
-                               XGBE_DFE_CFG_PROPERTY);
-                       goto err_io;
-               }
-       } else {
-               memcpy(pdata->serdes_dfe_tap_cfg, xgbe_serdes_dfe_tap_cfg,
-                      sizeof(pdata->serdes_dfe_tap_cfg));
-       }
+       pdata->tx_q_count = pdata->tx_ring_count;
 
-       if (device_property_present(phy_dev, XGBE_DFE_ENA_PROPERTY)) {
-               ret = device_property_read_u32_array(phy_dev,
-                                                    XGBE_DFE_ENA_PROPERTY,
-                                                    pdata->serdes_dfe_tap_ena,
-                                                    XGBE_SPEEDS);
-               if (ret) {
-                       dev_err(dev, "invalid %s property\n",
-                               XGBE_DFE_ENA_PROPERTY);
-                       goto err_io;
-               }
-       } else {
-               memcpy(pdata->serdes_dfe_tap_ena, xgbe_serdes_dfe_tap_ena,
-                      sizeof(pdata->serdes_dfe_tap_ena));
-       }
+       pdata->rx_ring_count = min_t(unsigned int, num_online_cpus(),
+                                    pdata->hw_feat.rx_ch_cnt);
+       pdata->rx_ring_count = min_t(unsigned int, pdata->rx_ring_count,
+                                    pdata->rx_max_channel_count);
 
-       /* Obtain device settings unique to ACPI/OF */
-       if (pdata->use_acpi)
-               ret = xgbe_acpi_support(pdata);
-       else
-               ret = xgbe_of_support(pdata);
-       if (ret)
-               goto err_io;
-
-       /* Set the DMA coherency values */
-       attr = device_get_dma_attr(dev);
-       if (attr == DEV_DMA_NOT_SUPPORTED) {
-               dev_err(dev, "DMA is not supported");
-               ret = -ENODEV;
-               goto err_io;
-       }
-       pdata->coherent = (attr == DEV_DMA_COHERENT);
-       if (pdata->coherent) {
-               pdata->axdomain = XGBE_DMA_OS_AXDOMAIN;
-               pdata->arcache = XGBE_DMA_OS_ARCACHE;
-               pdata->awcache = XGBE_DMA_OS_AWCACHE;
-       } else {
-               pdata->axdomain = XGBE_DMA_SYS_AXDOMAIN;
-               pdata->arcache = XGBE_DMA_SYS_ARCACHE;
-               pdata->awcache = XGBE_DMA_SYS_AWCACHE;
-       }
+       pdata->rx_q_count = min_t(unsigned int, pdata->hw_feat.rx_q_cnt,
+                                 pdata->rx_max_q_count);
 
-       /* Get the device interrupt */
-       ret = platform_get_irq(pdev, 0);
-       if (ret < 0) {
-               dev_err(dev, "platform_get_irq 0 failed\n");
-               goto err_io;
+       if (netif_msg_probe(pdata)) {
+               dev_dbg(pdata->dev, "TX/RX DMA channel count = %u/%u\n",
+                       pdata->tx_ring_count, pdata->rx_ring_count);
+               dev_dbg(pdata->dev, "TX/RX hardware queue count = %u/%u\n",
+                       pdata->tx_q_count, pdata->rx_q_count);
        }
-       pdata->dev_irq = ret;
+}
 
-       /* Get the auto-negotiation interrupt */
-       ret = platform_get_irq(phy_pdev, phy_irqnum++);
-       if (ret < 0) {
-               dev_err(dev, "platform_get_irq phy 0 failed\n");
-               goto err_io;
-       }
-       pdata->an_irq = ret;
+int xgbe_config_netdev(struct xgbe_prv_data *pdata)
+{
+       struct net_device *netdev = pdata->netdev;
+       struct device *dev = pdata->dev;
+       unsigned int i;
+       int ret;
 
        netdev->irq = pdata->dev_irq;
        netdev->base_addr = (unsigned long)pdata->xgmac_regs;
        memcpy(netdev->dev_addr, pdata->mac_addr, netdev->addr_len);
 
-       /* Set all the function pointers */
-       xgbe_init_all_fptrs(pdata);
+       /* Initialize ECC timestamps */
+       pdata->tx_sec_period = jiffies;
+       pdata->tx_ded_period = jiffies;
+       pdata->rx_sec_period = jiffies;
+       pdata->rx_ded_period = jiffies;
+       pdata->desc_sec_period = jiffies;
+       pdata->desc_ded_period = jiffies;
 
        /* Issue software reset to device */
        pdata->hw_if.exit(pdata);
 
-       /* Populate the hardware features */
-       xgbe_get_all_hw_features(pdata);
-
        /* Set default configuration data */
        xgbe_default_config(pdata);
 
@@ -664,33 +287,46 @@ static int xgbe_probe(struct platform_device *pdev)
                                        DMA_BIT_MASK(pdata->hw_feat.dma_width));
        if (ret) {
                dev_err(dev, "dma_set_mask_and_coherent failed\n");
-               goto err_io;
+               return ret;
        }
 
-       /* Calculate the number of Tx and Rx rings to be created
-        *  -Tx (DMA) Channels map 1-to-1 to Tx Queues so set
-        *   the number of Tx queues to the number of Tx channels
-        *   enabled
-        *  -Rx (DMA) Channels do not map 1-to-1 so use the actual
-        *   number of Rx queues
-        */
-       pdata->tx_ring_count = min_t(unsigned int, num_online_cpus(),
-                                    pdata->hw_feat.tx_ch_cnt);
-       pdata->tx_q_count = pdata->tx_ring_count;
+       /* Set default max values if not provided */
+       if (!pdata->tx_max_fifo_size)
+               pdata->tx_max_fifo_size = pdata->hw_feat.tx_fifo_size;
+       if (!pdata->rx_max_fifo_size)
+               pdata->rx_max_fifo_size = pdata->hw_feat.rx_fifo_size;
+
+       /* Set and validate the number of descriptors for a ring */
+       BUILD_BUG_ON_NOT_POWER_OF_2(XGBE_TX_DESC_CNT);
+       pdata->tx_desc_count = XGBE_TX_DESC_CNT;
+
+       BUILD_BUG_ON_NOT_POWER_OF_2(XGBE_RX_DESC_CNT);
+       pdata->rx_desc_count = XGBE_RX_DESC_CNT;
+
+       /* Adjust the number of queues based on interrupts assigned */
+       if (pdata->channel_irq_count) {
+               pdata->tx_ring_count = min_t(unsigned int, pdata->tx_ring_count,
+                                            pdata->channel_irq_count);
+               pdata->rx_ring_count = min_t(unsigned int, pdata->rx_ring_count,
+                                            pdata->channel_irq_count);
+
+               if (netif_msg_probe(pdata))
+                       dev_dbg(pdata->dev,
+                               "adjusted TX/RX DMA channel count = %u/%u\n",
+                               pdata->tx_ring_count, pdata->rx_ring_count);
+       }
+
+       /* Set the number of queues */
        ret = netif_set_real_num_tx_queues(netdev, pdata->tx_ring_count);
        if (ret) {
                dev_err(dev, "error setting real tx queue count\n");
-               goto err_io;
+               return ret;
        }
 
-       pdata->rx_ring_count = min_t(unsigned int,
-                                    netif_get_num_default_rss_queues(),
-                                    pdata->hw_feat.rx_ch_cnt);
-       pdata->rx_q_count = pdata->hw_feat.rx_q_cnt;
        ret = netif_set_real_num_rx_queues(netdev, pdata->rx_ring_count);
        if (ret) {
                dev_err(dev, "error setting real rx queue count\n");
-               goto err_io;
+               return ret;
        }
 
        /* Initialize RSS hash key and lookup table */
@@ -705,7 +341,9 @@ static int xgbe_probe(struct platform_device *pdev)
        XGMAC_SET_BITS(pdata->rss_options, MAC_RSSCR, UDP4TE, 1);
 
        /* Call MDIO/PHY initialization routine */
-       pdata->phy_if.phy_init(pdata);
+       ret = pdata->phy_if.phy_init(pdata);
+       if (ret)
+               return ret;
 
        /* Set device operations */
        netdev->netdev_ops = xgbe_get_netdev_ops();
@@ -752,13 +390,21 @@ static int xgbe_probe(struct platform_device *pdev)
        ret = register_netdev(netdev);
        if (ret) {
                dev_err(dev, "net device registration failed\n");
-               goto err_io;
+               return ret;
        }
 
        /* Create the PHY/ANEG name based on netdev name */
        snprintf(pdata->an_name, sizeof(pdata->an_name) - 1, "%s-pcs",
                 netdev_name(netdev));
 
+       /* Create the ECC name based on netdev name */
+       snprintf(pdata->ecc_name, sizeof(pdata->ecc_name) - 1, "%s-ecc",
+                netdev_name(netdev));
+
+       /* Create the I2C name based on netdev name */
+       snprintf(pdata->i2c_name, sizeof(pdata->i2c_name) - 1, "%s-i2c",
+                netdev_name(netdev));
+
        /* Create workqueues */
        pdata->dev_workqueue =
                create_singlethread_workqueue(netdev_name(netdev));
@@ -780,11 +426,10 @@ static int xgbe_probe(struct platform_device *pdev)
 
        xgbe_debugfs_init(pdata);
 
-       platform_device_put(phy_pdev);
-
-       netdev_notice(netdev, "net device enabled\n");
-
-       DBGPR("<-- xgbe_probe\n");
+       netif_dbg(pdata, drv, pdata->netdev, "%u Tx software queues\n",
+                 pdata->tx_ring_count);
+       netif_dbg(pdata, drv, pdata->netdev, "%u Rx software queues\n",
+                 pdata->rx_ring_count);
 
        return 0;
 
@@ -794,29 +439,19 @@ err_wq:
 err_netdev:
        unregister_netdev(netdev);
 
-err_io:
-       platform_device_put(phy_pdev);
-
-err_phydev:
-       free_netdev(netdev);
-
-err_alloc:
-       dev_notice(dev, "net device not enabled\n");
-
        return ret;
 }
 
-static int xgbe_remove(struct platform_device *pdev)
+void xgbe_deconfig_netdev(struct xgbe_prv_data *pdata)
 {
-       struct net_device *netdev = platform_get_drvdata(pdev);
-       struct xgbe_prv_data *pdata = netdev_priv(netdev);
-
-       DBGPR("-->xgbe_remove\n");
+       struct net_device *netdev = pdata->netdev;
 
        xgbe_debugfs_exit(pdata);
 
        xgbe_ptp_unregister(pdata);
 
+       pdata->phy_if.phy_exit(pdata);
+
        flush_workqueue(pdata->an_workqueue);
        destroy_workqueue(pdata->an_workqueue);
 
@@ -824,94 +459,29 @@ static int xgbe_remove(struct platform_device *pdev)
        destroy_workqueue(pdata->dev_workqueue);
 
        unregister_netdev(netdev);
-
-       free_netdev(netdev);
-
-       DBGPR("<--xgbe_remove\n");
-
-       return 0;
 }
 
-#ifdef CONFIG_PM
-static int xgbe_suspend(struct device *dev)
+static int __init xgbe_mod_init(void)
 {
-       struct net_device *netdev = dev_get_drvdata(dev);
-       struct xgbe_prv_data *pdata = netdev_priv(netdev);
-       int ret = 0;
-
-       DBGPR("-->xgbe_suspend\n");
-
-       if (netif_running(netdev))
-               ret = xgbe_powerdown(netdev, XGMAC_DRIVER_CONTEXT);
+       int ret;
 
-       pdata->lpm_ctrl = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1);
-       pdata->lpm_ctrl |= MDIO_CTRL1_LPOWER;
-       XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, pdata->lpm_ctrl);
+       ret = xgbe_platform_init();
+       if (ret)
+               return ret;
 
-       DBGPR("<--xgbe_suspend\n");
+       ret = xgbe_pci_init();
+       if (ret)
+               return ret;
 
-       return ret;
+       return 0;
 }
 
-static int xgbe_resume(struct device *dev)
+static void __exit xgbe_mod_exit(void)
 {
-       struct net_device *netdev = dev_get_drvdata(dev);
-       struct xgbe_prv_data *pdata = netdev_priv(netdev);
-       int ret = 0;
-
-       DBGPR("-->xgbe_resume\n");
-
-       pdata->lpm_ctrl &= ~MDIO_CTRL1_LPOWER;
-       XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, pdata->lpm_ctrl);
-
-       if (netif_running(netdev)) {
-               ret = xgbe_powerup(netdev, XGMAC_DRIVER_CONTEXT);
-
-               /* Schedule a restart in case the link or phy state changed
-                * while we were powered down.
-                */
-               schedule_work(&pdata->restart_work);
-       }
-
-       DBGPR("<--xgbe_resume\n");
+       xgbe_pci_exit();
 
-       return ret;
+       xgbe_platform_exit();
 }
-#endif /* CONFIG_PM */
-
-#ifdef CONFIG_ACPI
-static const struct acpi_device_id xgbe_acpi_match[] = {
-       { "AMDI8001", 0 },
-       {},
-};
-
-MODULE_DEVICE_TABLE(acpi, xgbe_acpi_match);
-#endif
-
-#ifdef CONFIG_OF
-static const struct of_device_id xgbe_of_match[] = {
-       { .compatible = "amd,xgbe-seattle-v1a", },
-       {},
-};
-
-MODULE_DEVICE_TABLE(of, xgbe_of_match);
-#endif
-
-static SIMPLE_DEV_PM_OPS(xgbe_pm_ops, xgbe_suspend, xgbe_resume);
-
-static struct platform_driver xgbe_driver = {
-       .driver = {
-               .name = "amd-xgbe",
-#ifdef CONFIG_ACPI
-               .acpi_match_table = xgbe_acpi_match,
-#endif
-#ifdef CONFIG_OF
-               .of_match_table = xgbe_of_match,
-#endif
-               .pm = &xgbe_pm_ops,
-       },
-       .probe = xgbe_probe,
-       .remove = xgbe_remove,
-};
 
-module_platform_driver(xgbe_driver);
+module_init(xgbe_mod_init);
+module_exit(xgbe_mod_exit);
index 84c5d296d13e85b2ee7425ac75833312cf6819e5..4c5b90eea4af2e389decec1d80b31e7134cee140 100644 (file)
 #include "xgbe.h"
 #include "xgbe-common.h"
 
-static void xgbe_an_enable_kr_training(struct xgbe_prv_data *pdata)
+static void xgbe_an37_clear_interrupts(struct xgbe_prv_data *pdata)
 {
-       unsigned int reg;
-
-       reg = XMDIO_READ(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL);
+       int reg;
 
-       reg |= XGBE_KR_TRAINING_ENABLE;
-       XMDIO_WRITE(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL, reg);
+       reg = XMDIO_READ(pdata, MDIO_MMD_VEND2, MDIO_VEND2_AN_STAT);
+       reg &= ~XGBE_AN_CL37_INT_MASK;
+       XMDIO_WRITE(pdata, MDIO_MMD_VEND2, MDIO_VEND2_AN_STAT, reg);
 }
 
-static void xgbe_an_disable_kr_training(struct xgbe_prv_data *pdata)
+static void xgbe_an37_disable_interrupts(struct xgbe_prv_data *pdata)
 {
-       unsigned int reg;
+       int reg;
 
-       reg = XMDIO_READ(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL);
+       reg = XMDIO_READ(pdata, MDIO_MMD_VEND2, MDIO_VEND2_AN_CTRL);
+       reg &= ~XGBE_AN_CL37_INT_MASK;
+       XMDIO_WRITE(pdata, MDIO_MMD_VEND2, MDIO_VEND2_AN_CTRL, reg);
 
-       reg &= ~XGBE_KR_TRAINING_ENABLE;
-       XMDIO_WRITE(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL, reg);
+       reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_PCS_DIG_CTRL);
+       reg &= ~XGBE_PCS_CL37_BP;
+       XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_PCS_DIG_CTRL, reg);
 }
 
-static void xgbe_pcs_power_cycle(struct xgbe_prv_data *pdata)
+static void xgbe_an37_enable_interrupts(struct xgbe_prv_data *pdata)
 {
-       unsigned int reg;
-
-       reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1);
+       int reg;
 
-       reg |= MDIO_CTRL1_LPOWER;
-       XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, reg);
+       reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_PCS_DIG_CTRL);
+       reg |= XGBE_PCS_CL37_BP;
+       XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_PCS_DIG_CTRL, reg);
 
-       usleep_range(75, 100);
-
-       reg &= ~MDIO_CTRL1_LPOWER;
-       XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, reg);
+       reg = XMDIO_READ(pdata, MDIO_MMD_VEND2, MDIO_VEND2_AN_CTRL);
+       reg |= XGBE_AN_CL37_INT_MASK;
+       XMDIO_WRITE(pdata, MDIO_MMD_VEND2, MDIO_VEND2_AN_CTRL, reg);
 }
 
-static void xgbe_serdes_start_ratechange(struct xgbe_prv_data *pdata)
+static void xgbe_an73_clear_interrupts(struct xgbe_prv_data *pdata)
 {
-       /* Assert Rx and Tx ratechange */
-       XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, RATECHANGE, 1);
+       XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_INT, 0);
 }
 
-static void xgbe_serdes_complete_ratechange(struct xgbe_prv_data *pdata)
+static void xgbe_an73_disable_interrupts(struct xgbe_prv_data *pdata)
 {
-       unsigned int wait;
-       u16 status;
-
-       /* Release Rx and Tx ratechange */
-       XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, RATECHANGE, 0);
+       XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_INTMASK, 0);
+}
 
-       /* Wait for Rx and Tx ready */
-       wait = XGBE_RATECHANGE_COUNT;
-       while (wait--) {
-               usleep_range(50, 75);
+static void xgbe_an73_enable_interrupts(struct xgbe_prv_data *pdata)
+{
+       XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_INTMASK, XGBE_AN_CL73_INT_MASK);
+}
 
-               status = XSIR0_IOREAD(pdata, SIR0_STATUS);
-               if (XSIR_GET_BITS(status, SIR0_STATUS, RX_READY) &&
-                   XSIR_GET_BITS(status, SIR0_STATUS, TX_READY))
-                       goto rx_reset;
+static void xgbe_an_enable_interrupts(struct xgbe_prv_data *pdata)
+{
+       switch (pdata->an_mode) {
+       case XGBE_AN_MODE_CL73:
+       case XGBE_AN_MODE_CL73_REDRV:
+               xgbe_an73_enable_interrupts(pdata);
+               break;
+       case XGBE_AN_MODE_CL37:
+       case XGBE_AN_MODE_CL37_SGMII:
+               xgbe_an37_enable_interrupts(pdata);
+               break;
+       default:
+               break;
        }
+}
 
-       netif_dbg(pdata, link, pdata->netdev, "SerDes rx/tx not ready (%#hx)\n",
-                 status);
-
-rx_reset:
-       /* Perform Rx reset for the DFE changes */
-       XRXTX_IOWRITE_BITS(pdata, RXTX_REG6, RESETB_RXD, 0);
-       XRXTX_IOWRITE_BITS(pdata, RXTX_REG6, RESETB_RXD, 1);
+static void xgbe_an_clear_interrupts_all(struct xgbe_prv_data *pdata)
+{
+       xgbe_an73_clear_interrupts(pdata);
+       xgbe_an37_clear_interrupts(pdata);
 }
 
-static void xgbe_xgmii_mode(struct xgbe_prv_data *pdata)
+static void xgbe_an73_enable_kr_training(struct xgbe_prv_data *pdata)
 {
        unsigned int reg;
 
-       /* Enable KR training */
-       xgbe_an_enable_kr_training(pdata);
-
-       /* Set MAC to 10G speed */
-       pdata->hw_if.set_xgmii_speed(pdata);
-
-       /* Set PCS to KR/10G speed */
-       reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL2);
-       reg &= ~MDIO_PCS_CTRL2_TYPE;
-       reg |= MDIO_PCS_CTRL2_10GBR;
-       XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL2, reg);
+       reg = XMDIO_READ(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL);
 
-       reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1);
-       reg &= ~MDIO_CTRL1_SPEEDSEL;
-       reg |= MDIO_CTRL1_SPEED10G;
-       XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, reg);
+       reg |= XGBE_KR_TRAINING_ENABLE;
+       XMDIO_WRITE(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL, reg);
+}
 
-       xgbe_pcs_power_cycle(pdata);
+static void xgbe_an73_disable_kr_training(struct xgbe_prv_data *pdata)
+{
+       unsigned int reg;
 
-       /* Set SerDes to 10G speed */
-       xgbe_serdes_start_ratechange(pdata);
+       reg = XMDIO_READ(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL);
 
-       XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, DATARATE, XGBE_SPEED_10000_RATE);
-       XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, WORDMODE, XGBE_SPEED_10000_WORD);
-       XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, PLLSEL, XGBE_SPEED_10000_PLL);
+       reg &= ~XGBE_KR_TRAINING_ENABLE;
+       XMDIO_WRITE(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL, reg);
+}
 
-       XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, CDR_RATE,
-                          pdata->serdes_cdr_rate[XGBE_SPEED_10000]);
-       XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, TXAMP,
-                          pdata->serdes_tx_amp[XGBE_SPEED_10000]);
-       XRXTX_IOWRITE_BITS(pdata, RXTX_REG20, BLWC_ENA,
-                          pdata->serdes_blwc[XGBE_SPEED_10000]);
-       XRXTX_IOWRITE_BITS(pdata, RXTX_REG114, PQ_REG,
-                          pdata->serdes_pq_skew[XGBE_SPEED_10000]);
-       XRXTX_IOWRITE_BITS(pdata, RXTX_REG129, RXDFE_CONFIG,
-                          pdata->serdes_dfe_tap_cfg[XGBE_SPEED_10000]);
-       XRXTX_IOWRITE(pdata, RXTX_REG22,
-                     pdata->serdes_dfe_tap_ena[XGBE_SPEED_10000]);
+static void xgbe_kr_mode(struct xgbe_prv_data *pdata)
+{
+       /* Enable KR training */
+       xgbe_an73_enable_kr_training(pdata);
 
-       xgbe_serdes_complete_ratechange(pdata);
+       /* Set MAC to 10G speed */
+       pdata->hw_if.set_speed(pdata, SPEED_10000);
 
-       netif_dbg(pdata, link, pdata->netdev, "10GbE KR mode set\n");
+       /* Call PHY implementation support to complete rate change */
+       pdata->phy_if.phy_impl.set_mode(pdata, XGBE_MODE_KR);
 }
 
-static void xgbe_gmii_2500_mode(struct xgbe_prv_data *pdata)
+static void xgbe_kx_2500_mode(struct xgbe_prv_data *pdata)
 {
-       unsigned int reg;
-
        /* Disable KR training */
-       xgbe_an_disable_kr_training(pdata);
+       xgbe_an73_disable_kr_training(pdata);
 
        /* Set MAC to 2.5G speed */
-       pdata->hw_if.set_gmii_2500_speed(pdata);
+       pdata->hw_if.set_speed(pdata, SPEED_2500);
 
-       /* Set PCS to KX/1G speed */
-       reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL2);
-       reg &= ~MDIO_PCS_CTRL2_TYPE;
-       reg |= MDIO_PCS_CTRL2_10GBX;
-       XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL2, reg);
+       /* Call PHY implementation support to complete rate change */
+       pdata->phy_if.phy_impl.set_mode(pdata, XGBE_MODE_KX_2500);
+}
 
-       reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1);
-       reg &= ~MDIO_CTRL1_SPEEDSEL;
-       reg |= MDIO_CTRL1_SPEED1G;
-       XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, reg);
+static void xgbe_kx_1000_mode(struct xgbe_prv_data *pdata)
+{
+       /* Disable KR training */
+       xgbe_an73_disable_kr_training(pdata);
 
-       xgbe_pcs_power_cycle(pdata);
+       /* Set MAC to 1G speed */
+       pdata->hw_if.set_speed(pdata, SPEED_1000);
 
-       /* Set SerDes to 2.5G speed */
-       xgbe_serdes_start_ratechange(pdata);
+       /* Call PHY implementation support to complete rate change */
+       pdata->phy_if.phy_impl.set_mode(pdata, XGBE_MODE_KX_1000);
+}
 
-       XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, DATARATE, XGBE_SPEED_2500_RATE);
-       XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, WORDMODE, XGBE_SPEED_2500_WORD);
-       XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, PLLSEL, XGBE_SPEED_2500_PLL);
+static void xgbe_sfi_mode(struct xgbe_prv_data *pdata)
+{
+       /* If a KR re-driver is present, change to KR mode instead */
+       if (pdata->kr_redrv)
+               return xgbe_kr_mode(pdata);
 
-       XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, CDR_RATE,
-                          pdata->serdes_cdr_rate[XGBE_SPEED_2500]);
-       XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, TXAMP,
-                          pdata->serdes_tx_amp[XGBE_SPEED_2500]);
-       XRXTX_IOWRITE_BITS(pdata, RXTX_REG20, BLWC_ENA,
-                          pdata->serdes_blwc[XGBE_SPEED_2500]);
-       XRXTX_IOWRITE_BITS(pdata, RXTX_REG114, PQ_REG,
-                          pdata->serdes_pq_skew[XGBE_SPEED_2500]);
-       XRXTX_IOWRITE_BITS(pdata, RXTX_REG129, RXDFE_CONFIG,
-                          pdata->serdes_dfe_tap_cfg[XGBE_SPEED_2500]);
-       XRXTX_IOWRITE(pdata, RXTX_REG22,
-                     pdata->serdes_dfe_tap_ena[XGBE_SPEED_2500]);
+       /* Disable KR training */
+       xgbe_an73_disable_kr_training(pdata);
 
-       xgbe_serdes_complete_ratechange(pdata);
+       /* Set MAC to 10G speed */
+       pdata->hw_if.set_speed(pdata, SPEED_10000);
 
-       netif_dbg(pdata, link, pdata->netdev, "2.5GbE KX mode set\n");
+       /* Call PHY implementation support to complete rate change */
+       pdata->phy_if.phy_impl.set_mode(pdata, XGBE_MODE_SFI);
 }
 
-static void xgbe_gmii_mode(struct xgbe_prv_data *pdata)
+static void xgbe_x_mode(struct xgbe_prv_data *pdata)
 {
-       unsigned int reg;
-
        /* Disable KR training */
-       xgbe_an_disable_kr_training(pdata);
+       xgbe_an73_disable_kr_training(pdata);
 
        /* Set MAC to 1G speed */
-       pdata->hw_if.set_gmii_speed(pdata);
+       pdata->hw_if.set_speed(pdata, SPEED_1000);
 
-       /* Set PCS to KX/1G speed */
-       reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL2);
-       reg &= ~MDIO_PCS_CTRL2_TYPE;
-       reg |= MDIO_PCS_CTRL2_10GBX;
-       XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL2, reg);
-
-       reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1);
-       reg &= ~MDIO_CTRL1_SPEEDSEL;
-       reg |= MDIO_CTRL1_SPEED1G;
-       XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, reg);
+       /* Call PHY implementation support to complete rate change */
+       pdata->phy_if.phy_impl.set_mode(pdata, XGBE_MODE_X);
+}
 
-       xgbe_pcs_power_cycle(pdata);
+static void xgbe_sgmii_1000_mode(struct xgbe_prv_data *pdata)
+{
+       /* Disable KR training */
+       xgbe_an73_disable_kr_training(pdata);
 
-       /* Set SerDes to 1G speed */
-       xgbe_serdes_start_ratechange(pdata);
+       /* Set MAC to 1G speed */
+       pdata->hw_if.set_speed(pdata, SPEED_1000);
 
-       XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, DATARATE, XGBE_SPEED_1000_RATE);
-       XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, WORDMODE, XGBE_SPEED_1000_WORD);
-       XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, PLLSEL, XGBE_SPEED_1000_PLL);
+       /* Call PHY implementation support to complete rate change */
+       pdata->phy_if.phy_impl.set_mode(pdata, XGBE_MODE_SGMII_1000);
+}
 
-       XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, CDR_RATE,
-                          pdata->serdes_cdr_rate[XGBE_SPEED_1000]);
-       XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, TXAMP,
-                          pdata->serdes_tx_amp[XGBE_SPEED_1000]);
-       XRXTX_IOWRITE_BITS(pdata, RXTX_REG20, BLWC_ENA,
-                          pdata->serdes_blwc[XGBE_SPEED_1000]);
-       XRXTX_IOWRITE_BITS(pdata, RXTX_REG114, PQ_REG,
-                          pdata->serdes_pq_skew[XGBE_SPEED_1000]);
-       XRXTX_IOWRITE_BITS(pdata, RXTX_REG129, RXDFE_CONFIG,
-                          pdata->serdes_dfe_tap_cfg[XGBE_SPEED_1000]);
-       XRXTX_IOWRITE(pdata, RXTX_REG22,
-                     pdata->serdes_dfe_tap_ena[XGBE_SPEED_1000]);
+static void xgbe_sgmii_100_mode(struct xgbe_prv_data *pdata)
+{
+       /* Disable KR training */
+       xgbe_an73_disable_kr_training(pdata);
 
-       xgbe_serdes_complete_ratechange(pdata);
+       /* Set MAC to 1G speed */
+       pdata->hw_if.set_speed(pdata, SPEED_1000);
 
-       netif_dbg(pdata, link, pdata->netdev, "1GbE KX mode set\n");
+       /* Call PHY implementation support to complete rate change */
+       pdata->phy_if.phy_impl.set_mode(pdata, XGBE_MODE_SGMII_100);
 }
 
-static void xgbe_cur_mode(struct xgbe_prv_data *pdata,
-                         enum xgbe_mode *mode)
+static enum xgbe_mode xgbe_cur_mode(struct xgbe_prv_data *pdata)
 {
-       unsigned int reg;
-
-       reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL2);
-       if ((reg & MDIO_PCS_CTRL2_TYPE) == MDIO_PCS_CTRL2_10GBR)
-               *mode = XGBE_MODE_KR;
-       else
-               *mode = XGBE_MODE_KX;
+       return pdata->phy_if.phy_impl.cur_mode(pdata);
 }
 
 static bool xgbe_in_kr_mode(struct xgbe_prv_data *pdata)
 {
-       enum xgbe_mode mode;
-
-       xgbe_cur_mode(pdata, &mode);
+       return (xgbe_cur_mode(pdata) == XGBE_MODE_KR);
+}
 
-       return (mode == XGBE_MODE_KR);
+static void xgbe_change_mode(struct xgbe_prv_data *pdata,
+                            enum xgbe_mode mode)
+{
+       switch (mode) {
+       case XGBE_MODE_KX_1000:
+               xgbe_kx_1000_mode(pdata);
+               break;
+       case XGBE_MODE_KX_2500:
+               xgbe_kx_2500_mode(pdata);
+               break;
+       case XGBE_MODE_KR:
+               xgbe_kr_mode(pdata);
+               break;
+       case XGBE_MODE_SGMII_100:
+               xgbe_sgmii_100_mode(pdata);
+               break;
+       case XGBE_MODE_SGMII_1000:
+               xgbe_sgmii_1000_mode(pdata);
+               break;
+       case XGBE_MODE_X:
+               xgbe_x_mode(pdata);
+               break;
+       case XGBE_MODE_SFI:
+               xgbe_sfi_mode(pdata);
+               break;
+       case XGBE_MODE_UNKNOWN:
+               break;
+       default:
+               netif_dbg(pdata, link, pdata->netdev,
+                         "invalid operation mode requested (%u)\n", mode);
+       }
 }
 
 static void xgbe_switch_mode(struct xgbe_prv_data *pdata)
 {
-       /* If we are in KR switch to KX, and vice-versa */
-       if (xgbe_in_kr_mode(pdata)) {
-               if (pdata->speed_set == XGBE_SPEEDSET_1000_10000)
-                       xgbe_gmii_mode(pdata);
-               else
-                       xgbe_gmii_2500_mode(pdata);
-       } else {
-               xgbe_xgmii_mode(pdata);
-       }
+       xgbe_change_mode(pdata, pdata->phy_if.phy_impl.switch_mode(pdata));
 }
 
 static void xgbe_set_mode(struct xgbe_prv_data *pdata,
                          enum xgbe_mode mode)
 {
-       enum xgbe_mode cur_mode;
+       if (mode == xgbe_cur_mode(pdata))
+               return;
 
-       xgbe_cur_mode(pdata, &cur_mode);
-       if (mode != cur_mode)
-               xgbe_switch_mode(pdata);
+       xgbe_change_mode(pdata, mode);
 }
 
-static bool xgbe_use_xgmii_mode(struct xgbe_prv_data *pdata)
+static bool xgbe_use_mode(struct xgbe_prv_data *pdata,
+                         enum xgbe_mode mode)
 {
-       if (pdata->phy.autoneg == AUTONEG_ENABLE) {
-               if (pdata->phy.advertising & ADVERTISED_10000baseKR_Full)
-                       return true;
-       } else {
-               if (pdata->phy.speed == SPEED_10000)
-                       return true;
-       }
+       return pdata->phy_if.phy_impl.use_mode(pdata, mode);
+}
+
+static void xgbe_an37_set(struct xgbe_prv_data *pdata, bool enable,
+                         bool restart)
+{
+       unsigned int reg;
+
+       reg = XMDIO_READ(pdata, MDIO_MMD_VEND2, MDIO_CTRL1);
+       reg &= ~MDIO_VEND2_CTRL1_AN_ENABLE;
 
-       return false;
+       if (enable)
+               reg |= MDIO_VEND2_CTRL1_AN_ENABLE;
+
+       if (restart)
+               reg |= MDIO_VEND2_CTRL1_AN_RESTART;
+
+       XMDIO_WRITE(pdata, MDIO_MMD_VEND2, MDIO_CTRL1, reg);
 }
 
-static bool xgbe_use_gmii_2500_mode(struct xgbe_prv_data *pdata)
+static void xgbe_an37_restart(struct xgbe_prv_data *pdata)
 {
-       if (pdata->phy.autoneg == AUTONEG_ENABLE) {
-               if (pdata->phy.advertising & ADVERTISED_2500baseX_Full)
-                       return true;
-       } else {
-               if (pdata->phy.speed == SPEED_2500)
-                       return true;
-       }
+       xgbe_an37_enable_interrupts(pdata);
+       xgbe_an37_set(pdata, true, true);
 
-       return false;
+       netif_dbg(pdata, link, pdata->netdev, "CL37 AN enabled/restarted\n");
 }
 
-static bool xgbe_use_gmii_mode(struct xgbe_prv_data *pdata)
+static void xgbe_an37_disable(struct xgbe_prv_data *pdata)
 {
-       if (pdata->phy.autoneg == AUTONEG_ENABLE) {
-               if (pdata->phy.advertising & ADVERTISED_1000baseKX_Full)
-                       return true;
-       } else {
-               if (pdata->phy.speed == SPEED_1000)
-                       return true;
-       }
+       xgbe_an37_set(pdata, false, false);
+       xgbe_an37_disable_interrupts(pdata);
 
-       return false;
+       netif_dbg(pdata, link, pdata->netdev, "CL37 AN disabled\n");
 }
 
-static void xgbe_set_an(struct xgbe_prv_data *pdata, bool enable, bool restart)
+static void xgbe_an73_set(struct xgbe_prv_data *pdata, bool enable,
+                         bool restart)
 {
        unsigned int reg;
 
@@ -437,22 +418,62 @@ static void xgbe_set_an(struct xgbe_prv_data *pdata, bool enable, bool restart)
        XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_CTRL1, reg);
 }
 
-static void xgbe_restart_an(struct xgbe_prv_data *pdata)
+static void xgbe_an73_restart(struct xgbe_prv_data *pdata)
+{
+       xgbe_an73_enable_interrupts(pdata);
+       xgbe_an73_set(pdata, true, true);
+
+       netif_dbg(pdata, link, pdata->netdev, "CL73 AN enabled/restarted\n");
+}
+
+static void xgbe_an73_disable(struct xgbe_prv_data *pdata)
 {
-       xgbe_set_an(pdata, true, true);
+       xgbe_an73_set(pdata, false, false);
+       xgbe_an73_disable_interrupts(pdata);
 
-       netif_dbg(pdata, link, pdata->netdev, "AN enabled/restarted\n");
+       netif_dbg(pdata, link, pdata->netdev, "CL73 AN disabled\n");
+}
+
+static void xgbe_an_restart(struct xgbe_prv_data *pdata)
+{
+       switch (pdata->an_mode) {
+       case XGBE_AN_MODE_CL73:
+       case XGBE_AN_MODE_CL73_REDRV:
+               xgbe_an73_restart(pdata);
+               break;
+       case XGBE_AN_MODE_CL37:
+       case XGBE_AN_MODE_CL37_SGMII:
+               xgbe_an37_restart(pdata);
+               break;
+       default:
+               break;
+       }
 }
 
-static void xgbe_disable_an(struct xgbe_prv_data *pdata)
+static void xgbe_an_disable(struct xgbe_prv_data *pdata)
 {
-       xgbe_set_an(pdata, false, false);
+       switch (pdata->an_mode) {
+       case XGBE_AN_MODE_CL73:
+       case XGBE_AN_MODE_CL73_REDRV:
+               xgbe_an73_disable(pdata);
+               break;
+       case XGBE_AN_MODE_CL37:
+       case XGBE_AN_MODE_CL37_SGMII:
+               xgbe_an37_disable(pdata);
+               break;
+       default:
+               break;
+       }
+}
 
-       netif_dbg(pdata, link, pdata->netdev, "AN disabled\n");
+static void xgbe_an_disable_all(struct xgbe_prv_data *pdata)
+{
+       xgbe_an73_disable(pdata);
+       xgbe_an37_disable(pdata);
 }
 
-static enum xgbe_an xgbe_an_tx_training(struct xgbe_prv_data *pdata,
-                                       enum xgbe_rx *state)
+static enum xgbe_an xgbe_an73_tx_training(struct xgbe_prv_data *pdata,
+                                         enum xgbe_rx *state)
 {
        unsigned int ad_reg, lp_reg, reg;
 
@@ -476,13 +497,15 @@ static enum xgbe_an xgbe_an_tx_training(struct xgbe_prv_data *pdata,
        /* Start KR training */
        reg = XMDIO_READ(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL);
        if (reg & XGBE_KR_TRAINING_ENABLE) {
-               XSIR0_IOWRITE_BITS(pdata, SIR0_KR_RT_1, RESET, 1);
+               if (pdata->phy_if.phy_impl.kr_training_pre)
+                       pdata->phy_if.phy_impl.kr_training_pre(pdata);
 
                reg |= XGBE_KR_TRAINING_START;
                XMDIO_WRITE(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL,
                            reg);
 
-               XSIR0_IOWRITE_BITS(pdata, SIR0_KR_RT_1, RESET, 0);
+               if (pdata->phy_if.phy_impl.kr_training_post)
+                       pdata->phy_if.phy_impl.kr_training_post(pdata);
 
                netif_dbg(pdata, link, pdata->netdev,
                          "KR training initiated\n");
@@ -491,8 +514,8 @@ static enum xgbe_an xgbe_an_tx_training(struct xgbe_prv_data *pdata,
        return XGBE_AN_PAGE_RECEIVED;
 }
 
-static enum xgbe_an xgbe_an_tx_xnp(struct xgbe_prv_data *pdata,
-                                  enum xgbe_rx *state)
+static enum xgbe_an xgbe_an73_tx_xnp(struct xgbe_prv_data *pdata,
+                                    enum xgbe_rx *state)
 {
        u16 msg;
 
@@ -508,8 +531,8 @@ static enum xgbe_an xgbe_an_tx_xnp(struct xgbe_prv_data *pdata,
        return XGBE_AN_PAGE_RECEIVED;
 }
 
-static enum xgbe_an xgbe_an_rx_bpa(struct xgbe_prv_data *pdata,
-                                  enum xgbe_rx *state)
+static enum xgbe_an xgbe_an73_rx_bpa(struct xgbe_prv_data *pdata,
+                                    enum xgbe_rx *state)
 {
        unsigned int link_support;
        unsigned int reg, ad_reg, lp_reg;
@@ -528,12 +551,12 @@ static enum xgbe_an xgbe_an_rx_bpa(struct xgbe_prv_data *pdata,
 
        return ((ad_reg & XGBE_XNP_NP_EXCHANGE) ||
                (lp_reg & XGBE_XNP_NP_EXCHANGE))
-              ? xgbe_an_tx_xnp(pdata, state)
-              : xgbe_an_tx_training(pdata, state);
+              ? xgbe_an73_tx_xnp(pdata, state)
+              : xgbe_an73_tx_training(pdata, state);
 }
 
-static enum xgbe_an xgbe_an_rx_xnp(struct xgbe_prv_data *pdata,
-                                  enum xgbe_rx *state)
+static enum xgbe_an xgbe_an73_rx_xnp(struct xgbe_prv_data *pdata,
+                                    enum xgbe_rx *state)
 {
        unsigned int ad_reg, lp_reg;
 
@@ -543,11 +566,11 @@ static enum xgbe_an xgbe_an_rx_xnp(struct xgbe_prv_data *pdata,
 
        return ((ad_reg & XGBE_XNP_NP_EXCHANGE) ||
                (lp_reg & XGBE_XNP_NP_EXCHANGE))
-              ? xgbe_an_tx_xnp(pdata, state)
-              : xgbe_an_tx_training(pdata, state);
+              ? xgbe_an73_tx_xnp(pdata, state)
+              : xgbe_an73_tx_training(pdata, state);
 }
 
-static enum xgbe_an xgbe_an_page_received(struct xgbe_prv_data *pdata)
+static enum xgbe_an xgbe_an73_page_received(struct xgbe_prv_data *pdata)
 {
        enum xgbe_rx *state;
        unsigned long an_timeout;
@@ -566,20 +589,20 @@ static enum xgbe_an xgbe_an_page_received(struct xgbe_prv_data *pdata)
                        pdata->an_start = jiffies;
 
                        netif_dbg(pdata, link, pdata->netdev,
-                                 "AN timed out, resetting state\n");
+                                 "CL73 AN timed out, resetting state\n");
                }
        }
 
        state = xgbe_in_kr_mode(pdata) ? &pdata->kr_state
-                                          : &pdata->kx_state;
+                                      : &pdata->kx_state;
 
        switch (*state) {
        case XGBE_RX_BPA:
-               ret = xgbe_an_rx_bpa(pdata, state);
+               ret = xgbe_an73_rx_bpa(pdata, state);
                break;
 
        case XGBE_RX_XNP:
-               ret = xgbe_an_rx_xnp(pdata, state);
+               ret = xgbe_an73_rx_xnp(pdata, state);
                break;
 
        default:
@@ -589,7 +612,7 @@ static enum xgbe_an xgbe_an_page_received(struct xgbe_prv_data *pdata)
        return ret;
 }
 
-static enum xgbe_an xgbe_an_incompat_link(struct xgbe_prv_data *pdata)
+static enum xgbe_an xgbe_an73_incompat_link(struct xgbe_prv_data *pdata)
 {
        /* Be sure we aren't looping trying to negotiate */
        if (xgbe_in_kr_mode(pdata)) {
@@ -611,23 +634,43 @@ static enum xgbe_an xgbe_an_incompat_link(struct xgbe_prv_data *pdata)
                        return XGBE_AN_NO_LINK;
        }
 
-       xgbe_disable_an(pdata);
+       xgbe_an73_disable(pdata);
 
        xgbe_switch_mode(pdata);
 
-       xgbe_restart_an(pdata);
+       xgbe_an73_restart(pdata);
 
        return XGBE_AN_INCOMPAT_LINK;
 }
 
-static irqreturn_t xgbe_an_isr(int irq, void *data)
+static void xgbe_an37_isr(struct xgbe_prv_data *pdata)
 {
-       struct xgbe_prv_data *pdata = (struct xgbe_prv_data *)data;
+       unsigned int reg;
 
-       netif_dbg(pdata, intr, pdata->netdev, "AN interrupt received\n");
+       /* Disable AN interrupts */
+       xgbe_an37_disable_interrupts(pdata);
+
+       /* Save the interrupt(s) that fired */
+       reg = XMDIO_READ(pdata, MDIO_MMD_VEND2, MDIO_VEND2_AN_STAT);
+       pdata->an_int = reg & XGBE_AN_CL37_INT_MASK;
+       pdata->an_status = reg & ~XGBE_AN_CL37_INT_MASK;
 
+       if (pdata->an_int) {
+               /* Clear the interrupt(s) that fired and process them */
+               reg &= ~XGBE_AN_CL37_INT_MASK;
+               XMDIO_WRITE(pdata, MDIO_MMD_VEND2, MDIO_VEND2_AN_STAT, reg);
+
+               queue_work(pdata->an_workqueue, &pdata->an_irq_work);
+       } else {
+               /* Enable AN interrupts */
+               xgbe_an37_enable_interrupts(pdata);
+       }
+}
+
+static void xgbe_an73_isr(struct xgbe_prv_data *pdata)
+{
        /* Disable AN interrupts */
-       XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_INTMASK, 0);
+       xgbe_an73_disable_interrupts(pdata);
 
        /* Save the interrupt(s) that fired */
        pdata->an_int = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_INT);
@@ -639,13 +682,37 @@ static irqreturn_t xgbe_an_isr(int irq, void *data)
                queue_work(pdata->an_workqueue, &pdata->an_irq_work);
        } else {
                /* Enable AN interrupts */
-               XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_INTMASK,
-                           XGBE_AN_INT_MASK);
+               xgbe_an73_enable_interrupts(pdata);
+       }
+}
+
+static irqreturn_t xgbe_an_isr(int irq, void *data)
+{
+       struct xgbe_prv_data *pdata = (struct xgbe_prv_data *)data;
+
+       netif_dbg(pdata, intr, pdata->netdev, "AN interrupt received\n");
+
+       switch (pdata->an_mode) {
+       case XGBE_AN_MODE_CL73:
+       case XGBE_AN_MODE_CL73_REDRV:
+               xgbe_an73_isr(pdata);
+               break;
+       case XGBE_AN_MODE_CL37:
+       case XGBE_AN_MODE_CL37_SGMII:
+               xgbe_an37_isr(pdata);
+               break;
+       default:
+               break;
        }
 
        return IRQ_HANDLED;
 }
 
+static irqreturn_t xgbe_an_combined_isr(int irq, struct xgbe_prv_data *pdata)
+{
+       return xgbe_an_isr(irq, pdata);
+}
+
 static void xgbe_an_irq_work(struct work_struct *work)
 {
        struct xgbe_prv_data *pdata = container_of(work,
@@ -679,36 +746,87 @@ static const char *xgbe_state_as_string(enum xgbe_an state)
        }
 }
 
-static void xgbe_an_state_machine(struct work_struct *work)
+static void xgbe_an37_state_machine(struct xgbe_prv_data *pdata)
 {
-       struct xgbe_prv_data *pdata = container_of(work,
-                                                  struct xgbe_prv_data,
-                                                  an_work);
        enum xgbe_an cur_state = pdata->an_state;
 
-       mutex_lock(&pdata->an_mutex);
+       if (!pdata->an_int)
+               return;
+
+       if (pdata->an_int & XGBE_AN_CL37_INT_CMPLT) {
+               pdata->an_state = XGBE_AN_COMPLETE;
+               pdata->an_int &= ~XGBE_AN_CL37_INT_CMPLT;
+
+               /* If SGMII is enabled, check the link status */
+               if ((pdata->an_mode == XGBE_AN_MODE_CL37_SGMII) &&
+                   !(pdata->an_status & XGBE_SGMII_AN_LINK_STATUS))
+                       pdata->an_state = XGBE_AN_NO_LINK;
+       }
+
+       netif_dbg(pdata, link, pdata->netdev, "CL37 AN %s\n",
+                 xgbe_state_as_string(pdata->an_state));
+
+       cur_state = pdata->an_state;
+
+       switch (pdata->an_state) {
+       case XGBE_AN_READY:
+               break;
+
+       case XGBE_AN_COMPLETE:
+               netif_dbg(pdata, link, pdata->netdev,
+                         "Auto negotiation successful\n");
+               break;
+
+       case XGBE_AN_NO_LINK:
+               break;
+
+       default:
+               pdata->an_state = XGBE_AN_ERROR;
+       }
+
+       if (pdata->an_state == XGBE_AN_ERROR) {
+               netdev_err(pdata->netdev,
+                          "error during auto-negotiation, state=%u\n",
+                          cur_state);
+
+               pdata->an_int = 0;
+               xgbe_an37_clear_interrupts(pdata);
+       }
+
+       if (pdata->an_state >= XGBE_AN_COMPLETE) {
+               pdata->an_result = pdata->an_state;
+               pdata->an_state = XGBE_AN_READY;
+
+               netif_dbg(pdata, link, pdata->netdev, "CL37 AN result: %s\n",
+                         xgbe_state_as_string(pdata->an_result));
+       }
+
+       xgbe_an37_enable_interrupts(pdata);
+}
+
+static void xgbe_an73_state_machine(struct xgbe_prv_data *pdata)
+{
+       enum xgbe_an cur_state = pdata->an_state;
 
        if (!pdata->an_int)
-               goto out;
+               return;
 
 next_int:
-       if (pdata->an_int & XGBE_AN_PG_RCV) {
+       if (pdata->an_int & XGBE_AN_CL73_PG_RCV) {
                pdata->an_state = XGBE_AN_PAGE_RECEIVED;
-               pdata->an_int &= ~XGBE_AN_PG_RCV;
-       } else if (pdata->an_int & XGBE_AN_INC_LINK) {
+               pdata->an_int &= ~XGBE_AN_CL73_PG_RCV;
+       } else if (pdata->an_int & XGBE_AN_CL73_INC_LINK) {
                pdata->an_state = XGBE_AN_INCOMPAT_LINK;
-               pdata->an_int &= ~XGBE_AN_INC_LINK;
-       } else if (pdata->an_int & XGBE_AN_INT_CMPLT) {
+               pdata->an_int &= ~XGBE_AN_CL73_INC_LINK;
+       } else if (pdata->an_int & XGBE_AN_CL73_INT_CMPLT) {
                pdata->an_state = XGBE_AN_COMPLETE;
-               pdata->an_int &= ~XGBE_AN_INT_CMPLT;
+               pdata->an_int &= ~XGBE_AN_CL73_INT_CMPLT;
        } else {
                pdata->an_state = XGBE_AN_ERROR;
        }
 
-       pdata->an_result = pdata->an_state;
-
 again:
-       netif_dbg(pdata, link, pdata->netdev, "AN %s\n",
+       netif_dbg(pdata, link, pdata->netdev, "CL73 AN %s\n",
                  xgbe_state_as_string(pdata->an_state));
 
        cur_state = pdata->an_state;
@@ -719,14 +837,14 @@ again:
                break;
 
        case XGBE_AN_PAGE_RECEIVED:
-               pdata->an_state = xgbe_an_page_received(pdata);
+               pdata->an_state = xgbe_an73_page_received(pdata);
                pdata->an_supported++;
                break;
 
        case XGBE_AN_INCOMPAT_LINK:
                pdata->an_supported = 0;
                pdata->parallel_detect = 0;
-               pdata->an_state = xgbe_an_incompat_link(pdata);
+               pdata->an_state = xgbe_an73_incompat_link(pdata);
                break;
 
        case XGBE_AN_COMPLETE:
@@ -745,14 +863,14 @@ again:
 
        if (pdata->an_state == XGBE_AN_NO_LINK) {
                pdata->an_int = 0;
-               XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_INT, 0);
+               xgbe_an73_clear_interrupts(pdata);
        } else if (pdata->an_state == XGBE_AN_ERROR) {
                netdev_err(pdata->netdev,
                           "error during auto-negotiation, state=%u\n",
                           cur_state);
 
                pdata->an_int = 0;
-               XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_INT, 0);
+               xgbe_an73_clear_interrupts(pdata);
        }
 
        if (pdata->an_state >= XGBE_AN_COMPLETE) {
@@ -762,7 +880,7 @@ again:
                pdata->kx_state = XGBE_RX_BPA;
                pdata->an_start = 0;
 
-               netif_dbg(pdata, link, pdata->netdev, "AN result: %s\n",
+               netif_dbg(pdata, link, pdata->netdev, "CL73 AN result: %s\n",
                          xgbe_state_as_string(pdata->an_result));
        }
 
@@ -772,20 +890,88 @@ again:
        if (pdata->an_int)
                goto next_int;
 
-out:
-       /* Enable AN interrupts on the way out */
-       XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_INTMASK, XGBE_AN_INT_MASK);
+       xgbe_an73_enable_interrupts(pdata);
+}
+
+static void xgbe_an_state_machine(struct work_struct *work)
+{
+       struct xgbe_prv_data *pdata = container_of(work,
+                                                  struct xgbe_prv_data,
+                                                  an_work);
+
+       mutex_lock(&pdata->an_mutex);
+
+       switch (pdata->an_mode) {
+       case XGBE_AN_MODE_CL73:
+       case XGBE_AN_MODE_CL73_REDRV:
+               xgbe_an73_state_machine(pdata);
+               break;
+       case XGBE_AN_MODE_CL37:
+       case XGBE_AN_MODE_CL37_SGMII:
+               xgbe_an37_state_machine(pdata);
+               break;
+       default:
+               break;
+       }
 
        mutex_unlock(&pdata->an_mutex);
 }
 
-static void xgbe_an_init(struct xgbe_prv_data *pdata)
+static void xgbe_an37_init(struct xgbe_prv_data *pdata)
 {
-       unsigned int reg;
+       unsigned int advertising, reg;
+
+       advertising = pdata->phy_if.phy_impl.an_advertising(pdata);
+
+       /* Set up Advertisement register */
+       reg = XMDIO_READ(pdata, MDIO_MMD_VEND2, MDIO_VEND2_AN_ADVERTISE);
+       if (advertising & ADVERTISED_Pause)
+               reg |= 0x100;
+       else
+               reg &= ~0x100;
+
+       if (advertising & ADVERTISED_Asym_Pause)
+               reg |= 0x80;
+       else
+               reg &= ~0x80;
+
+       /* Full duplex, but not half */
+       reg |= XGBE_AN_CL37_FD_MASK;
+       reg &= ~XGBE_AN_CL37_HD_MASK;
+
+       XMDIO_WRITE(pdata, MDIO_MMD_VEND2, MDIO_VEND2_AN_ADVERTISE, reg);
+
+       /* Set up the Control register */
+       reg = XMDIO_READ(pdata, MDIO_MMD_VEND2, MDIO_VEND2_AN_CTRL);
+       reg &= ~XGBE_AN_CL37_TX_CONFIG_MASK;
+       reg &= ~XGBE_AN_CL37_PCS_MODE_MASK;
+
+       switch (pdata->an_mode) {
+       case XGBE_AN_MODE_CL37:
+               reg |= XGBE_AN_CL37_PCS_MODE_BASEX;
+               break;
+       case XGBE_AN_MODE_CL37_SGMII:
+               reg |= XGBE_AN_CL37_PCS_MODE_SGMII;
+               break;
+       default:
+               break;
+       }
+
+       XMDIO_WRITE(pdata, MDIO_MMD_VEND2, MDIO_VEND2_AN_CTRL, reg);
+
+       netif_dbg(pdata, link, pdata->netdev, "CL37 AN (%s) initialized\n",
+                 (pdata->an_mode == XGBE_AN_MODE_CL37) ? "BaseX" : "SGMII");
+}
+
+static void xgbe_an73_init(struct xgbe_prv_data *pdata)
+{
+       unsigned int advertising, reg;
+
+       advertising = pdata->phy_if.phy_impl.an_advertising(pdata);
 
        /* Set up Advertisement register 3 first */
        reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 2);
-       if (pdata->phy.advertising & ADVERTISED_10000baseR_FEC)
+       if (advertising & ADVERTISED_10000baseR_FEC)
                reg |= 0xc000;
        else
                reg &= ~0xc000;
@@ -794,13 +980,13 @@ static void xgbe_an_init(struct xgbe_prv_data *pdata)
 
        /* Set up Advertisement register 2 next */
        reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 1);
-       if (pdata->phy.advertising & ADVERTISED_10000baseKR_Full)
+       if (advertising & ADVERTISED_10000baseKR_Full)
                reg |= 0x80;
        else
                reg &= ~0x80;
 
-       if ((pdata->phy.advertising & ADVERTISED_1000baseKX_Full) ||
-           (pdata->phy.advertising & ADVERTISED_2500baseX_Full))
+       if ((advertising & ADVERTISED_1000baseKX_Full) ||
+           (advertising & ADVERTISED_2500baseX_Full))
                reg |= 0x20;
        else
                reg &= ~0x20;
@@ -809,12 +995,12 @@ static void xgbe_an_init(struct xgbe_prv_data *pdata)
 
        /* Set up Advertisement register 1 last */
        reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE);
-       if (pdata->phy.advertising & ADVERTISED_Pause)
+       if (advertising & ADVERTISED_Pause)
                reg |= 0x400;
        else
                reg &= ~0x400;
 
-       if (pdata->phy.advertising & ADVERTISED_Asym_Pause)
+       if (advertising & ADVERTISED_Asym_Pause)
                reg |= 0x800;
        else
                reg &= ~0x800;
@@ -824,7 +1010,25 @@ static void xgbe_an_init(struct xgbe_prv_data *pdata)
 
        XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE, reg);
 
-       netif_dbg(pdata, link, pdata->netdev, "AN initialized\n");
+       netif_dbg(pdata, link, pdata->netdev, "CL73 AN initialized\n");
+}
+
+static void xgbe_an_init(struct xgbe_prv_data *pdata)
+{
+       /* Set up advertisement registers based on current settings */
+       pdata->an_mode = pdata->phy_if.phy_impl.an_mode(pdata);
+       switch (pdata->an_mode) {
+       case XGBE_AN_MODE_CL73:
+       case XGBE_AN_MODE_CL73_REDRV:
+               xgbe_an73_init(pdata);
+               break;
+       case XGBE_AN_MODE_CL37:
+       case XGBE_AN_MODE_CL37_SGMII:
+               xgbe_an37_init(pdata);
+               break;
+       default:
+               break;
+       }
 }
 
 static const char *xgbe_phy_fc_string(struct xgbe_prv_data *pdata)
@@ -842,6 +1046,8 @@ static const char *xgbe_phy_fc_string(struct xgbe_prv_data *pdata)
 static const char *xgbe_phy_speed_string(int speed)
 {
        switch (speed) {
+       case SPEED_100:
+               return "100Mbps";
        case SPEED_1000:
                return "1Gbps";
        case SPEED_2500:
@@ -907,24 +1113,32 @@ static void xgbe_phy_adjust_link(struct xgbe_prv_data *pdata)
                xgbe_phy_print_status(pdata);
 }
 
+static bool xgbe_phy_valid_speed(struct xgbe_prv_data *pdata, int speed)
+{
+       return pdata->phy_if.phy_impl.valid_speed(pdata, speed);
+}
+
 static int xgbe_phy_config_fixed(struct xgbe_prv_data *pdata)
 {
+       enum xgbe_mode mode;
+
        netif_dbg(pdata, link, pdata->netdev, "fixed PHY configuration\n");
 
        /* Disable auto-negotiation */
-       xgbe_disable_an(pdata);
-
-       /* Validate/Set specified speed */
-       switch (pdata->phy.speed) {
-       case SPEED_10000:
-               xgbe_set_mode(pdata, XGBE_MODE_KR);
+       xgbe_an_disable(pdata);
+
+       /* Set specified mode for specified speed */
+       mode = pdata->phy_if.phy_impl.get_mode(pdata, pdata->phy.speed);
+       switch (mode) {
+       case XGBE_MODE_KX_1000:
+       case XGBE_MODE_KX_2500:
+       case XGBE_MODE_KR:
+       case XGBE_MODE_SGMII_100:
+       case XGBE_MODE_SGMII_1000:
+       case XGBE_MODE_X:
+       case XGBE_MODE_SFI:
                break;
-
-       case SPEED_2500:
-       case SPEED_1000:
-               xgbe_set_mode(pdata, XGBE_MODE_KX);
-               break;
-
+       case XGBE_MODE_UNKNOWN:
        default:
                return -EINVAL;
        }
@@ -933,38 +1147,60 @@ static int xgbe_phy_config_fixed(struct xgbe_prv_data *pdata)
        if (pdata->phy.duplex != DUPLEX_FULL)
                return -EINVAL;
 
+       xgbe_set_mode(pdata, mode);
+
        return 0;
 }
 
 static int __xgbe_phy_config_aneg(struct xgbe_prv_data *pdata)
 {
+       int ret;
+
        set_bit(XGBE_LINK_INIT, &pdata->dev_state);
        pdata->link_check = jiffies;
 
-       if (pdata->phy.autoneg != AUTONEG_ENABLE)
-               return xgbe_phy_config_fixed(pdata);
+       ret = pdata->phy_if.phy_impl.an_config(pdata);
+       if (ret)
+               return ret;
+
+       if (pdata->phy.autoneg != AUTONEG_ENABLE) {
+               ret = xgbe_phy_config_fixed(pdata);
+               if (ret || !pdata->kr_redrv)
+                       return ret;
 
-       netif_dbg(pdata, link, pdata->netdev, "AN PHY configuration\n");
+               netif_dbg(pdata, link, pdata->netdev, "AN redriver support\n");
+       } else {
+               netif_dbg(pdata, link, pdata->netdev, "AN PHY configuration\n");
+       }
 
        /* Disable auto-negotiation interrupt */
        disable_irq(pdata->an_irq);
 
        /* Start auto-negotiation in a supported mode */
-       if (pdata->phy.advertising & ADVERTISED_10000baseKR_Full) {
+       if (xgbe_use_mode(pdata, XGBE_MODE_KR)) {
                xgbe_set_mode(pdata, XGBE_MODE_KR);
-       } else if ((pdata->phy.advertising & ADVERTISED_1000baseKX_Full) ||
-                  (pdata->phy.advertising & ADVERTISED_2500baseX_Full)) {
-               xgbe_set_mode(pdata, XGBE_MODE_KX);
+       } else if (xgbe_use_mode(pdata, XGBE_MODE_KX_2500)) {
+               xgbe_set_mode(pdata, XGBE_MODE_KX_2500);
+       } else if (xgbe_use_mode(pdata, XGBE_MODE_KX_1000)) {
+               xgbe_set_mode(pdata, XGBE_MODE_KX_1000);
+       } else if (xgbe_use_mode(pdata, XGBE_MODE_SFI)) {
+               xgbe_set_mode(pdata, XGBE_MODE_SFI);
+       } else if (xgbe_use_mode(pdata, XGBE_MODE_X)) {
+               xgbe_set_mode(pdata, XGBE_MODE_X);
+       } else if (xgbe_use_mode(pdata, XGBE_MODE_SGMII_1000)) {
+               xgbe_set_mode(pdata, XGBE_MODE_SGMII_1000);
+       } else if (xgbe_use_mode(pdata, XGBE_MODE_SGMII_100)) {
+               xgbe_set_mode(pdata, XGBE_MODE_SGMII_100);
        } else {
                enable_irq(pdata->an_irq);
                return -EINVAL;
        }
 
        /* Disable and stop any in progress auto-negotiation */
-       xgbe_disable_an(pdata);
+       xgbe_an_disable_all(pdata);
 
        /* Clear any auto-negotitation interrupts */
-       XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_INT, 0);
+       xgbe_an_clear_interrupts_all(pdata);
 
        pdata->an_result = XGBE_AN_READY;
        pdata->an_state = XGBE_AN_READY;
@@ -974,11 +1210,8 @@ static int __xgbe_phy_config_aneg(struct xgbe_prv_data *pdata)
        /* Re-enable auto-negotiation interrupt */
        enable_irq(pdata->an_irq);
 
-       /* Set up advertisement registers based on current settings */
        xgbe_an_init(pdata);
-
-       /* Enable and start auto-negotiation */
-       xgbe_restart_an(pdata);
+       xgbe_an_restart(pdata);
 
        return 0;
 }
@@ -1016,108 +1249,52 @@ static void xgbe_check_link_timeout(struct xgbe_prv_data *pdata)
        }
 }
 
-static void xgbe_phy_status_force(struct xgbe_prv_data *pdata)
+static enum xgbe_mode xgbe_phy_status_aneg(struct xgbe_prv_data *pdata)
 {
-       if (xgbe_in_kr_mode(pdata)) {
-               pdata->phy.speed = SPEED_10000;
-       } else {
-               switch (pdata->speed_set) {
-               case XGBE_SPEEDSET_1000_10000:
-                       pdata->phy.speed = SPEED_1000;
-                       break;
-
-               case XGBE_SPEEDSET_2500_10000:
-                       pdata->phy.speed = SPEED_2500;
-                       break;
-               }
-       }
-       pdata->phy.duplex = DUPLEX_FULL;
+       return pdata->phy_if.phy_impl.an_outcome(pdata);
 }
 
-static void xgbe_phy_status_aneg(struct xgbe_prv_data *pdata)
+static void xgbe_phy_status_result(struct xgbe_prv_data *pdata)
 {
-       unsigned int ad_reg, lp_reg;
+       enum xgbe_mode mode;
 
        pdata->phy.lp_advertising = 0;
 
        if ((pdata->phy.autoneg != AUTONEG_ENABLE) || pdata->parallel_detect)
-               return xgbe_phy_status_force(pdata);
-
-       pdata->phy.lp_advertising |= ADVERTISED_Autoneg;
-       pdata->phy.lp_advertising |= ADVERTISED_Backplane;
-
-       /* Compare Advertisement and Link Partner register 1 */
-       ad_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE);
-       lp_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_LPA);
-       if (lp_reg & 0x400)
-               pdata->phy.lp_advertising |= ADVERTISED_Pause;
-       if (lp_reg & 0x800)
-               pdata->phy.lp_advertising |= ADVERTISED_Asym_Pause;
-
-       if (pdata->phy.pause_autoneg) {
-               /* Set flow control based on auto-negotiation result */
-               pdata->phy.tx_pause = 0;
-               pdata->phy.rx_pause = 0;
-
-               if (ad_reg & lp_reg & 0x400) {
-                       pdata->phy.tx_pause = 1;
-                       pdata->phy.rx_pause = 1;
-               } else if (ad_reg & lp_reg & 0x800) {
-                       if (ad_reg & 0x400)
-                               pdata->phy.rx_pause = 1;
-                       else if (lp_reg & 0x400)
-                               pdata->phy.tx_pause = 1;
-               }
-       }
-
-       /* Compare Advertisement and Link Partner register 2 */
-       ad_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 1);
-       lp_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_LPA + 1);
-       if (lp_reg & 0x80)
-               pdata->phy.lp_advertising |= ADVERTISED_10000baseKR_Full;
-       if (lp_reg & 0x20) {
-               switch (pdata->speed_set) {
-               case XGBE_SPEEDSET_1000_10000:
-                       pdata->phy.lp_advertising |= ADVERTISED_1000baseKX_Full;
-                       break;
-               case XGBE_SPEEDSET_2500_10000:
-                       pdata->phy.lp_advertising |= ADVERTISED_2500baseX_Full;
-                       break;
-               }
-       }
+               mode = xgbe_cur_mode(pdata);
+       else
+               mode = xgbe_phy_status_aneg(pdata);
 
-       ad_reg &= lp_reg;
-       if (ad_reg & 0x80) {
+       switch (mode) {
+       case XGBE_MODE_SGMII_100:
+               pdata->phy.speed = SPEED_100;
+               break;
+       case XGBE_MODE_X:
+       case XGBE_MODE_KX_1000:
+       case XGBE_MODE_SGMII_1000:
+               pdata->phy.speed = SPEED_1000;
+               break;
+       case XGBE_MODE_KX_2500:
+               pdata->phy.speed = SPEED_2500;
+               break;
+       case XGBE_MODE_KR:
+       case XGBE_MODE_SFI:
                pdata->phy.speed = SPEED_10000;
-               xgbe_set_mode(pdata, XGBE_MODE_KR);
-       } else if (ad_reg & 0x20) {
-               switch (pdata->speed_set) {
-               case XGBE_SPEEDSET_1000_10000:
-                       pdata->phy.speed = SPEED_1000;
-                       break;
-
-               case XGBE_SPEEDSET_2500_10000:
-                       pdata->phy.speed = SPEED_2500;
-                       break;
-               }
-
-               xgbe_set_mode(pdata, XGBE_MODE_KX);
-       } else {
+               break;
+       case XGBE_MODE_UNKNOWN:
+       default:
                pdata->phy.speed = SPEED_UNKNOWN;
        }
 
-       /* Compare Advertisement and Link Partner register 3 */
-       ad_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 2);
-       lp_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_LPA + 2);
-       if (lp_reg & 0xc000)
-               pdata->phy.lp_advertising |= ADVERTISED_10000baseR_FEC;
-
        pdata->phy.duplex = DUPLEX_FULL;
+
+       xgbe_set_mode(pdata, mode);
 }
 
 static void xgbe_phy_status(struct xgbe_prv_data *pdata)
 {
-       unsigned int reg, link_aneg;
+       unsigned int link_aneg;
+       int an_restart;
 
        if (test_bit(XGBE_LINK_ERR, &pdata->dev_state)) {
                netif_carrier_off(pdata->netdev);
@@ -1128,12 +1305,12 @@ static void xgbe_phy_status(struct xgbe_prv_data *pdata)
 
        link_aneg = (pdata->phy.autoneg == AUTONEG_ENABLE);
 
-       /* Get the link status. Link status is latched low, so read
-        * once to clear and then read again to get current state
-        */
-       reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1);
-       reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1);
-       pdata->phy.link = (reg & MDIO_STAT1_LSTATUS) ? 1 : 0;
+       pdata->phy.link = pdata->phy_if.phy_impl.link_status(pdata,
+                                                            &an_restart);
+       if (an_restart) {
+               xgbe_phy_config_aneg(pdata);
+               return;
+       }
 
        if (pdata->phy.link) {
                if (link_aneg && !xgbe_phy_aneg_done(pdata)) {
@@ -1141,7 +1318,7 @@ static void xgbe_phy_status(struct xgbe_prv_data *pdata)
                        return;
                }
 
-               xgbe_phy_status_aneg(pdata);
+               xgbe_phy_status_result(pdata);
 
                if (test_bit(XGBE_LINK_INIT, &pdata->dev_state))
                        clear_bit(XGBE_LINK_INIT, &pdata->dev_state);
@@ -1155,7 +1332,7 @@ static void xgbe_phy_status(struct xgbe_prv_data *pdata)
                                return;
                }
 
-               xgbe_phy_status_aneg(pdata);
+               xgbe_phy_status_result(pdata);
 
                netif_carrier_off(pdata->netdev);
        }
@@ -1168,13 +1345,19 @@ static void xgbe_phy_stop(struct xgbe_prv_data *pdata)
 {
        netif_dbg(pdata, link, pdata->netdev, "stopping PHY\n");
 
+       if (!pdata->phy_started)
+               return;
+
+       /* Indicate the PHY is down */
+       pdata->phy_started = 0;
+
        /* Disable auto-negotiation */
-       xgbe_disable_an(pdata);
+       xgbe_an_disable_all(pdata);
 
-       /* Disable auto-negotiation interrupts */
-       XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_INTMASK, 0);
+       if (pdata->dev_irq != pdata->an_irq)
+               devm_free_irq(pdata->dev, pdata->an_irq, pdata);
 
-       devm_free_irq(pdata->dev, pdata->an_irq, pdata);
+       pdata->phy_if.phy_impl.stop(pdata);
 
        pdata->phy.link = 0;
        netif_carrier_off(pdata->netdev);
@@ -1189,64 +1372,74 @@ static int xgbe_phy_start(struct xgbe_prv_data *pdata)
 
        netif_dbg(pdata, link, pdata->netdev, "starting PHY\n");
 
-       ret = devm_request_irq(pdata->dev, pdata->an_irq,
-                              xgbe_an_isr, 0, pdata->an_name,
-                              pdata);
-       if (ret) {
-               netdev_err(netdev, "phy irq request failed\n");
+       ret = pdata->phy_if.phy_impl.start(pdata);
+       if (ret)
                return ret;
+
+       /* If we have a separate AN irq, enable it */
+       if (pdata->dev_irq != pdata->an_irq) {
+               ret = devm_request_irq(pdata->dev, pdata->an_irq,
+                                      xgbe_an_isr, 0, pdata->an_name,
+                                      pdata);
+               if (ret) {
+                       netdev_err(netdev, "phy irq request failed\n");
+                       goto err_stop;
+               }
        }
 
        /* Set initial mode - call the mode setting routines
         * directly to insure we are properly configured
         */
-       if (xgbe_use_xgmii_mode(pdata)) {
-               xgbe_xgmii_mode(pdata);
-       } else if (xgbe_use_gmii_mode(pdata)) {
-               xgbe_gmii_mode(pdata);
-       } else if (xgbe_use_gmii_2500_mode(pdata)) {
-               xgbe_gmii_2500_mode(pdata);
+       if (xgbe_use_mode(pdata, XGBE_MODE_KR)) {
+               xgbe_kr_mode(pdata);
+       } else if (xgbe_use_mode(pdata, XGBE_MODE_KX_2500)) {
+               xgbe_kx_2500_mode(pdata);
+       } else if (xgbe_use_mode(pdata, XGBE_MODE_KX_1000)) {
+               xgbe_kx_1000_mode(pdata);
+       } else if (xgbe_use_mode(pdata, XGBE_MODE_SFI)) {
+               xgbe_sfi_mode(pdata);
+       } else if (xgbe_use_mode(pdata, XGBE_MODE_X)) {
+               xgbe_x_mode(pdata);
+       } else if (xgbe_use_mode(pdata, XGBE_MODE_SGMII_1000)) {
+               xgbe_sgmii_1000_mode(pdata);
+       } else if (xgbe_use_mode(pdata, XGBE_MODE_SGMII_100)) {
+               xgbe_sgmii_100_mode(pdata);
        } else {
                ret = -EINVAL;
                goto err_irq;
        }
 
-       /* Set up advertisement registers based on current settings */
-       xgbe_an_init(pdata);
+       /* Indicate the PHY is up and running */
+       pdata->phy_started = 1;
 
-       /* Enable auto-negotiation interrupts */
-       XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_INTMASK, 0x07);
+       xgbe_an_init(pdata);
+       xgbe_an_enable_interrupts(pdata);
 
        return xgbe_phy_config_aneg(pdata);
 
 err_irq:
-       devm_free_irq(pdata->dev, pdata->an_irq, pdata);
+       if (pdata->dev_irq != pdata->an_irq)
+               devm_free_irq(pdata->dev, pdata->an_irq, pdata);
+
+err_stop:
+       pdata->phy_if.phy_impl.stop(pdata);
 
        return ret;
 }
 
 static int xgbe_phy_reset(struct xgbe_prv_data *pdata)
 {
-       unsigned int count, reg;
-
-       reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1);
-       reg |= MDIO_CTRL1_RESET;
-       XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, reg);
-
-       count = 50;
-       do {
-               msleep(20);
-               reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1);
-       } while ((reg & MDIO_CTRL1_RESET) && --count);
+       int ret;
 
-       if (reg & MDIO_CTRL1_RESET)
-               return -ETIMEDOUT;
+       ret = pdata->phy_if.phy_impl.reset(pdata);
+       if (ret)
+               return ret;
 
        /* Disable auto-negotiation for now */
-       xgbe_disable_an(pdata);
+       xgbe_an_disable_all(pdata);
 
        /* Clear auto-negotiation interrupts */
-       XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_INT, 0);
+       xgbe_an_clear_interrupts_all(pdata);
 
        return 0;
 }
@@ -1257,74 +1450,96 @@ static void xgbe_dump_phy_registers(struct xgbe_prv_data *pdata)
 
        dev_dbg(dev, "\n************* PHY Reg dump **********************\n");
 
-       dev_dbg(dev, "PCS Control Reg (%#04x) = %#04x\n", MDIO_CTRL1,
+       dev_dbg(dev, "PCS Control Reg (%#06x) = %#06x\n", MDIO_CTRL1,
                XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1));
-       dev_dbg(dev, "PCS Status Reg (%#04x) = %#04x\n", MDIO_STAT1,
+       dev_dbg(dev, "PCS Status Reg (%#06x) = %#06x\n", MDIO_STAT1,
                XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1));
-       dev_dbg(dev, "Phy Id (PHYS ID 1 %#04x)= %#04x\n", MDIO_DEVID1,
+       dev_dbg(dev, "Phy Id (PHYS ID 1 %#06x)= %#06x\n", MDIO_DEVID1,
                XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_DEVID1));
-       dev_dbg(dev, "Phy Id (PHYS ID 2 %#04x)= %#04x\n", MDIO_DEVID2,
+       dev_dbg(dev, "Phy Id (PHYS ID 2 %#06x)= %#06x\n", MDIO_DEVID2,
                XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_DEVID2));
-       dev_dbg(dev, "Devices in Package (%#04x)= %#04x\n", MDIO_DEVS1,
+       dev_dbg(dev, "Devices in Package (%#06x)= %#06x\n", MDIO_DEVS1,
                XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_DEVS1));
-       dev_dbg(dev, "Devices in Package (%#04x)= %#04x\n", MDIO_DEVS2,
+       dev_dbg(dev, "Devices in Package (%#06x)= %#06x\n", MDIO_DEVS2,
                XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_DEVS2));
 
-       dev_dbg(dev, "Auto-Neg Control Reg (%#04x) = %#04x\n", MDIO_CTRL1,
+       dev_dbg(dev, "Auto-Neg Control Reg (%#06x) = %#06x\n", MDIO_CTRL1,
                XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_CTRL1));
-       dev_dbg(dev, "Auto-Neg Status Reg (%#04x) = %#04x\n", MDIO_STAT1,
+       dev_dbg(dev, "Auto-Neg Status Reg (%#06x) = %#06x\n", MDIO_STAT1,
                XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_STAT1));
-       dev_dbg(dev, "Auto-Neg Ad Reg 1 (%#04x) = %#04x\n",
+       dev_dbg(dev, "Auto-Neg Ad Reg 1 (%#06x) = %#06x\n",
                MDIO_AN_ADVERTISE,
                XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE));
-       dev_dbg(dev, "Auto-Neg Ad Reg 2 (%#04x) = %#04x\n",
+       dev_dbg(dev, "Auto-Neg Ad Reg 2 (%#06x) = %#06x\n",
                MDIO_AN_ADVERTISE + 1,
                XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 1));
-       dev_dbg(dev, "Auto-Neg Ad Reg 3 (%#04x) = %#04x\n",
+       dev_dbg(dev, "Auto-Neg Ad Reg 3 (%#06x) = %#06x\n",
                MDIO_AN_ADVERTISE + 2,
                XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 2));
-       dev_dbg(dev, "Auto-Neg Completion Reg (%#04x) = %#04x\n",
+       dev_dbg(dev, "Auto-Neg Completion Reg (%#06x) = %#06x\n",
                MDIO_AN_COMP_STAT,
                XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_COMP_STAT));
 
        dev_dbg(dev, "\n*************************************************\n");
 }
 
-static void xgbe_phy_init(struct xgbe_prv_data *pdata)
+static int xgbe_phy_best_advertised_speed(struct xgbe_prv_data *pdata)
 {
+       if (pdata->phy.advertising & ADVERTISED_10000baseKR_Full)
+               return SPEED_10000;
+       else if (pdata->phy.advertising & ADVERTISED_10000baseT_Full)
+               return SPEED_10000;
+       else if (pdata->phy.advertising & ADVERTISED_2500baseX_Full)
+               return SPEED_2500;
+       else if (pdata->phy.advertising & ADVERTISED_1000baseKX_Full)
+               return SPEED_1000;
+       else if (pdata->phy.advertising & ADVERTISED_1000baseT_Full)
+               return SPEED_1000;
+       else if (pdata->phy.advertising & ADVERTISED_100baseT_Full)
+               return SPEED_100;
+
+       return SPEED_UNKNOWN;
+}
+
+static void xgbe_phy_exit(struct xgbe_prv_data *pdata)
+{
+       xgbe_phy_stop(pdata);
+
+       pdata->phy_if.phy_impl.exit(pdata);
+}
+
+static int xgbe_phy_init(struct xgbe_prv_data *pdata)
+{
+       int ret;
+
        mutex_init(&pdata->an_mutex);
        INIT_WORK(&pdata->an_irq_work, xgbe_an_irq_work);
        INIT_WORK(&pdata->an_work, xgbe_an_state_machine);
        pdata->mdio_mmd = MDIO_MMD_PCS;
 
-       /* Initialize supported features */
-       pdata->phy.supported = SUPPORTED_Autoneg;
-       pdata->phy.supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause;
-       pdata->phy.supported |= SUPPORTED_Backplane;
-       pdata->phy.supported |= SUPPORTED_10000baseKR_Full;
-       switch (pdata->speed_set) {
-       case XGBE_SPEEDSET_1000_10000:
-               pdata->phy.supported |= SUPPORTED_1000baseKX_Full;
-               break;
-       case XGBE_SPEEDSET_2500_10000:
-               pdata->phy.supported |= SUPPORTED_2500baseX_Full;
-               break;
-       }
-
+       /* Check for FEC support */
        pdata->fec_ability = XMDIO_READ(pdata, MDIO_MMD_PMAPMD,
                                        MDIO_PMA_10GBR_FECABLE);
        pdata->fec_ability &= (MDIO_PMA_10GBR_FECABLE_ABLE |
                               MDIO_PMA_10GBR_FECABLE_ERRABLE);
-       if (pdata->fec_ability & MDIO_PMA_10GBR_FECABLE_ABLE)
-               pdata->phy.supported |= SUPPORTED_10000baseR_FEC;
 
+       /* Setup the phy (including supported features) */
+       ret = pdata->phy_if.phy_impl.init(pdata);
+       if (ret)
+               return ret;
        pdata->phy.advertising = pdata->phy.supported;
 
        pdata->phy.address = 0;
 
-       pdata->phy.autoneg = AUTONEG_ENABLE;
-       pdata->phy.speed = SPEED_UNKNOWN;
-       pdata->phy.duplex = DUPLEX_UNKNOWN;
+       if (pdata->phy.advertising & ADVERTISED_Autoneg) {
+               pdata->phy.autoneg = AUTONEG_ENABLE;
+               pdata->phy.speed = SPEED_UNKNOWN;
+               pdata->phy.duplex = DUPLEX_UNKNOWN;
+       } else {
+               pdata->phy.autoneg = AUTONEG_DISABLE;
+               pdata->phy.speed = xgbe_phy_best_advertised_speed(pdata);
+               pdata->phy.duplex = DUPLEX_FULL;
+       }
 
        pdata->phy.link = 0;
 
@@ -1346,11 +1561,14 @@ static void xgbe_phy_init(struct xgbe_prv_data *pdata)
 
        if (netif_msg_drv(pdata))
                xgbe_dump_phy_registers(pdata);
+
+       return 0;
 }
 
 void xgbe_init_function_ptrs_phy(struct xgbe_phy_if *phy_if)
 {
        phy_if->phy_init        = xgbe_phy_init;
+       phy_if->phy_exit        = xgbe_phy_exit;
 
        phy_if->phy_reset       = xgbe_phy_reset;
        phy_if->phy_start       = xgbe_phy_start;
@@ -1358,4 +1576,8 @@ void xgbe_init_function_ptrs_phy(struct xgbe_phy_if *phy_if)
 
        phy_if->phy_status      = xgbe_phy_status;
        phy_if->phy_config_aneg = xgbe_phy_config_aneg;
+
+       phy_if->phy_valid_speed = xgbe_phy_valid_speed;
+
+       phy_if->an_isr          = xgbe_an_combined_isr;
 }
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c
new file mode 100644 (file)
index 0000000..e76b7f6
--- /dev/null
@@ -0,0 +1,529 @@
+/*
+ * AMD 10Gb Ethernet driver
+ *
+ * This file is available to you under your choice of the following two
+ * licenses:
+ *
+ * License 1: GPLv2
+ *
+ * Copyright (c) 2016 Advanced Micro Devices, Inc.
+ *
+ * This file is free software; you may copy, redistribute and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or (at
+ * your option) any later version.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * This file incorporates work covered by the following copyright and
+ * permission notice:
+ *     The Synopsys DWC ETHER XGMAC Software Driver and documentation
+ *     (hereinafter "Software") is an unsupported proprietary work of Synopsys,
+ *     Inc. unless otherwise expressly agreed to in writing between Synopsys
+ *     and you.
+ *
+ *     The Software IS NOT an item of Licensed Software or Licensed Product
+ *     under any End User Software License Agreement or Agreement for Licensed
+ *     Product with Synopsys or any supplement thereto.  Permission is hereby
+ *     granted, free of charge, to any person obtaining a copy of this software
+ *     annotated with this license and the Software, to deal in the Software
+ *     without restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ *     of the Software, and to permit persons to whom the Software is furnished
+ *     to do so, subject to the following conditions:
+ *
+ *     The above copyright notice and this permission notice shall be included
+ *     in all copies or substantial portions of the Software.
+ *
+ *     THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
+ *     BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ *     TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ *     PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
+ *     BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ *     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ *     THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ * License 2: Modified BSD
+ *
+ * Copyright (c) 2016 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Advanced Micro Devices, Inc. nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * This file incorporates work covered by the following copyright and
+ * permission notice:
+ *     The Synopsys DWC ETHER XGMAC Software Driver and documentation
+ *     (hereinafter "Software") is an unsupported proprietary work of Synopsys,
+ *     Inc. unless otherwise expressly agreed to in writing between Synopsys
+ *     and you.
+ *
+ *     The Software IS NOT an item of Licensed Software or Licensed Product
+ *     under any End User Software License Agreement or Agreement for Licensed
+ *     Product with Synopsys or any supplement thereto.  Permission is hereby
+ *     granted, free of charge, to any person obtaining a copy of this software
+ *     annotated with this license and the Software, to deal in the Software
+ *     without restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ *     of the Software, and to permit persons to whom the Software is furnished
+ *     to do so, subject to the following conditions:
+ *
+ *     The above copyright notice and this permission notice shall be included
+ *     in all copies or substantial portions of the Software.
+ *
+ *     THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
+ *     BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ *     TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ *     PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
+ *     BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ *     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ *     THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/pci.h>
+#include <linux/log2.h>
+
+#include "xgbe.h"
+#include "xgbe-common.h"
+
+static int xgbe_config_msi(struct xgbe_prv_data *pdata)
+{
+       unsigned int msi_count;
+       unsigned int i, j;
+       int ret;
+
+       msi_count = XGBE_MSIX_BASE_COUNT;
+       msi_count += max(pdata->rx_ring_count,
+                        pdata->tx_ring_count);
+       msi_count = roundup_pow_of_two(msi_count);
+
+       ret = pci_enable_msi_exact(pdata->pcidev, msi_count);
+       if (ret < 0) {
+               dev_info(pdata->dev, "MSI request for %u interrupts failed\n",
+                        msi_count);
+
+               ret = pci_enable_msi(pdata->pcidev);
+               if (ret < 0) {
+                       dev_info(pdata->dev, "MSI enablement failed\n");
+                       return ret;
+               }
+
+               msi_count = 1;
+       }
+
+       pdata->irq_count = msi_count;
+
+       pdata->dev_irq = pdata->pcidev->irq;
+
+       if (msi_count > 1) {
+               pdata->ecc_irq = pdata->pcidev->irq + 1;
+               pdata->i2c_irq = pdata->pcidev->irq + 2;
+               pdata->an_irq = pdata->pcidev->irq + 3;
+
+               for (i = XGBE_MSIX_BASE_COUNT, j = 0;
+                    (i < msi_count) && (j < XGBE_MAX_DMA_CHANNELS);
+                    i++, j++)
+                       pdata->channel_irq[j] = pdata->pcidev->irq + i;
+               pdata->channel_irq_count = j;
+
+               pdata->per_channel_irq = 1;
+               pdata->channel_irq_mode = XGBE_IRQ_MODE_LEVEL;
+       } else {
+               pdata->ecc_irq = pdata->pcidev->irq;
+               pdata->i2c_irq = pdata->pcidev->irq;
+               pdata->an_irq = pdata->pcidev->irq;
+       }
+
+       if (netif_msg_probe(pdata))
+               dev_dbg(pdata->dev, "MSI interrupts enabled\n");
+
+       return 0;
+}
+
+static int xgbe_config_msix(struct xgbe_prv_data *pdata)
+{
+       unsigned int msix_count;
+       unsigned int i, j;
+       int ret;
+
+       msix_count = XGBE_MSIX_BASE_COUNT;
+       msix_count += max(pdata->rx_ring_count,
+                         pdata->tx_ring_count);
+
+       pdata->msix_entries = devm_kcalloc(pdata->dev, msix_count,
+                                          sizeof(struct msix_entry),
+                                          GFP_KERNEL);
+       if (!pdata->msix_entries)
+               return -ENOMEM;
+
+       for (i = 0; i < msix_count; i++)
+               pdata->msix_entries[i].entry = i;
+
+       ret = pci_enable_msix_range(pdata->pcidev, pdata->msix_entries,
+                                   XGBE_MSIX_MIN_COUNT, msix_count);
+       if (ret < 0) {
+               dev_info(pdata->dev, "MSI-X enablement failed\n");
+               devm_kfree(pdata->dev, pdata->msix_entries);
+               pdata->msix_entries = NULL;
+               return ret;
+       }
+
+       pdata->irq_count = ret;
+
+       pdata->dev_irq = pdata->msix_entries[0].vector;
+       pdata->ecc_irq = pdata->msix_entries[1].vector;
+       pdata->i2c_irq = pdata->msix_entries[2].vector;
+       pdata->an_irq = pdata->msix_entries[3].vector;
+
+       for (i = XGBE_MSIX_BASE_COUNT, j = 0; i < ret; i++, j++)
+               pdata->channel_irq[j] = pdata->msix_entries[i].vector;
+       pdata->channel_irq_count = j;
+
+       pdata->per_channel_irq = 1;
+       pdata->channel_irq_mode = XGBE_IRQ_MODE_LEVEL;
+
+       if (netif_msg_probe(pdata))
+               dev_dbg(pdata->dev, "MSI-X interrupts enabled\n");
+
+       return 0;
+}
+
+static int xgbe_config_irqs(struct xgbe_prv_data *pdata)
+{
+       int ret;
+
+       ret = xgbe_config_msix(pdata);
+       if (!ret)
+               goto out;
+
+       ret = xgbe_config_msi(pdata);
+       if (!ret)
+               goto out;
+
+       pdata->irq_count = 1;
+       pdata->irq_shared = 1;
+
+       pdata->dev_irq = pdata->pcidev->irq;
+       pdata->ecc_irq = pdata->pcidev->irq;
+       pdata->i2c_irq = pdata->pcidev->irq;
+       pdata->an_irq = pdata->pcidev->irq;
+
+out:
+       if (netif_msg_probe(pdata)) {
+               unsigned int i;
+
+               dev_dbg(pdata->dev, " dev irq=%d\n", pdata->dev_irq);
+               dev_dbg(pdata->dev, " ecc irq=%d\n", pdata->ecc_irq);
+               dev_dbg(pdata->dev, " i2c irq=%d\n", pdata->i2c_irq);
+               dev_dbg(pdata->dev, "  an irq=%d\n", pdata->an_irq);
+               for (i = 0; i < pdata->channel_irq_count; i++)
+                       dev_dbg(pdata->dev, " dma%u irq=%d\n",
+                               i, pdata->channel_irq[i]);
+       }
+
+       return 0;
+}
+
+static int xgbe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+       struct xgbe_prv_data *pdata;
+       struct device *dev = &pdev->dev;
+       void __iomem * const *iomap_table;
+       unsigned int ma_lo, ma_hi;
+       unsigned int reg;
+       int bar_mask;
+       int ret;
+
+       pdata = xgbe_alloc_pdata(dev);
+       if (IS_ERR(pdata)) {
+               ret = PTR_ERR(pdata);
+               goto err_alloc;
+       }
+
+       pdata->pcidev = pdev;
+       pci_set_drvdata(pdev, pdata);
+
+       /* Get the version data */
+       pdata->vdata = (struct xgbe_version_data *)id->driver_data;
+
+       ret = pcim_enable_device(pdev);
+       if (ret) {
+               dev_err(dev, "pcim_enable_device failed\n");
+               goto err_pci_enable;
+       }
+
+       /* Obtain the mmio areas for the device */
+       bar_mask = pci_select_bars(pdev, IORESOURCE_MEM);
+       ret = pcim_iomap_regions(pdev, bar_mask, XGBE_DRV_NAME);
+       if (ret) {
+               dev_err(dev, "pcim_iomap_regions failed\n");
+               goto err_pci_enable;
+       }
+
+       iomap_table = pcim_iomap_table(pdev);
+       if (!iomap_table) {
+               dev_err(dev, "pcim_iomap_table failed\n");
+               ret = -ENOMEM;
+               goto err_pci_enable;
+       }
+
+       pdata->xgmac_regs = iomap_table[XGBE_XGMAC_BAR];
+       if (!pdata->xgmac_regs) {
+               dev_err(dev, "xgmac ioremap failed\n");
+               ret = -ENOMEM;
+               goto err_pci_enable;
+       }
+       pdata->xprop_regs = pdata->xgmac_regs + XGBE_MAC_PROP_OFFSET;
+       pdata->xi2c_regs = pdata->xgmac_regs + XGBE_I2C_CTRL_OFFSET;
+       if (netif_msg_probe(pdata)) {
+               dev_dbg(dev, "xgmac_regs = %p\n", pdata->xgmac_regs);
+               dev_dbg(dev, "xprop_regs = %p\n", pdata->xprop_regs);
+               dev_dbg(dev, "xi2c_regs  = %p\n", pdata->xi2c_regs);
+       }
+
+       pdata->xpcs_regs = iomap_table[XGBE_XPCS_BAR];
+       if (!pdata->xpcs_regs) {
+               dev_err(dev, "xpcs ioremap failed\n");
+               ret = -ENOMEM;
+               goto err_pci_enable;
+       }
+       if (netif_msg_probe(pdata))
+               dev_dbg(dev, "xpcs_regs  = %p\n", pdata->xpcs_regs);
+
+       /* Configure the PCS indirect addressing support */
+       reg = XPCS32_IOREAD(pdata, PCS_V2_WINDOW_DEF);
+       pdata->xpcs_window = XPCS_GET_BITS(reg, PCS_V2_WINDOW_DEF, OFFSET);
+       pdata->xpcs_window <<= 6;
+       pdata->xpcs_window_size = XPCS_GET_BITS(reg, PCS_V2_WINDOW_DEF, SIZE);
+       pdata->xpcs_window_size = 1 << (pdata->xpcs_window_size + 7);
+       pdata->xpcs_window_mask = pdata->xpcs_window_size - 1;
+       if (netif_msg_probe(pdata)) {
+               dev_dbg(dev, "xpcs window      = %#010x\n",
+                       pdata->xpcs_window);
+               dev_dbg(dev, "xpcs window size = %#010x\n",
+                       pdata->xpcs_window_size);
+               dev_dbg(dev, "xpcs window mask = %#010x\n",
+                       pdata->xpcs_window_mask);
+       }
+
+       pci_set_master(pdev);
+
+       /* Enable all interrupts in the hardware */
+       XP_IOWRITE(pdata, XP_INT_EN, 0x1fffff);
+
+       /* Retrieve the MAC address */
+       ma_lo = XP_IOREAD(pdata, XP_MAC_ADDR_LO);
+       ma_hi = XP_IOREAD(pdata, XP_MAC_ADDR_HI);
+       pdata->mac_addr[0] = ma_lo & 0xff;
+       pdata->mac_addr[1] = (ma_lo >> 8) & 0xff;
+       pdata->mac_addr[2] = (ma_lo >> 16) & 0xff;
+       pdata->mac_addr[3] = (ma_lo >> 24) & 0xff;
+       pdata->mac_addr[4] = ma_hi & 0xff;
+       pdata->mac_addr[5] = (ma_hi >> 8) & 0xff;
+       if (!XP_GET_BITS(ma_hi, XP_MAC_ADDR_HI, VALID) ||
+           !is_valid_ether_addr(pdata->mac_addr)) {
+               dev_err(dev, "invalid mac address\n");
+               ret = -EINVAL;
+               goto err_pci_enable;
+       }
+
+       /* Clock settings */
+       pdata->sysclk_rate = XGBE_V2_DMA_CLOCK_FREQ;
+       pdata->ptpclk_rate = XGBE_V2_PTP_CLOCK_FREQ;
+
+       /* Set the DMA coherency values */
+       pdata->coherent = 1;
+       pdata->axdomain = XGBE_DMA_OS_AXDOMAIN;
+       pdata->arcache = XGBE_DMA_OS_ARCACHE;
+       pdata->awcache = XGBE_DMA_OS_AWCACHE;
+
+       /* Set the maximum channels and queues */
+       reg = XP_IOREAD(pdata, XP_PROP_1);
+       pdata->tx_max_channel_count = XP_GET_BITS(reg, XP_PROP_1, MAX_TX_DMA);
+       pdata->rx_max_channel_count = XP_GET_BITS(reg, XP_PROP_1, MAX_RX_DMA);
+       pdata->tx_max_q_count = XP_GET_BITS(reg, XP_PROP_1, MAX_TX_QUEUES);
+       pdata->rx_max_q_count = XP_GET_BITS(reg, XP_PROP_1, MAX_RX_QUEUES);
+       if (netif_msg_probe(pdata)) {
+               dev_dbg(dev, "max tx/rx channel count = %u/%u\n",
+                       pdata->tx_max_channel_count,
+                       pdata->tx_max_channel_count);
+               dev_dbg(dev, "max tx/rx hw queue count = %u/%u\n",
+                       pdata->tx_max_q_count, pdata->rx_max_q_count);
+       }
+
+       /* Set the hardware channel and queue counts */
+       xgbe_set_counts(pdata);
+
+       /* Set the maximum fifo amounts */
+       reg = XP_IOREAD(pdata, XP_PROP_2);
+       pdata->tx_max_fifo_size = XP_GET_BITS(reg, XP_PROP_2, TX_FIFO_SIZE);
+       pdata->tx_max_fifo_size *= 16384;
+       pdata->tx_max_fifo_size = min(pdata->tx_max_fifo_size,
+                                     pdata->vdata->tx_max_fifo_size);
+       pdata->rx_max_fifo_size = XP_GET_BITS(reg, XP_PROP_2, RX_FIFO_SIZE);
+       pdata->rx_max_fifo_size *= 16384;
+       pdata->rx_max_fifo_size = min(pdata->rx_max_fifo_size,
+                                     pdata->vdata->rx_max_fifo_size);
+       if (netif_msg_probe(pdata))
+               dev_dbg(dev, "max tx/rx max fifo size = %u/%u\n",
+                       pdata->tx_max_fifo_size, pdata->rx_max_fifo_size);
+
+       /* Configure interrupt support */
+       ret = xgbe_config_irqs(pdata);
+       if (ret)
+               goto err_pci_enable;
+
+       /* Configure the netdev resource */
+       ret = xgbe_config_netdev(pdata);
+       if (ret)
+               goto err_pci_enable;
+
+       netdev_notice(pdata->netdev, "net device enabled\n");
+
+       return 0;
+
+err_pci_enable:
+       xgbe_free_pdata(pdata);
+
+err_alloc:
+       dev_notice(dev, "net device not enabled\n");
+
+       return ret;
+}
+
+static void xgbe_pci_remove(struct pci_dev *pdev)
+{
+       struct xgbe_prv_data *pdata = pci_get_drvdata(pdev);
+
+       xgbe_deconfig_netdev(pdata);
+
+       xgbe_free_pdata(pdata);
+}
+
+#ifdef CONFIG_PM
+static int xgbe_pci_suspend(struct pci_dev *pdev, pm_message_t state)
+{
+       struct xgbe_prv_data *pdata = pci_get_drvdata(pdev);
+       struct net_device *netdev = pdata->netdev;
+       int ret = 0;
+
+       if (netif_running(netdev))
+               ret = xgbe_powerdown(netdev, XGMAC_DRIVER_CONTEXT);
+
+       pdata->lpm_ctrl = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1);
+       pdata->lpm_ctrl |= MDIO_CTRL1_LPOWER;
+       XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, pdata->lpm_ctrl);
+
+       return ret;
+}
+
+static int xgbe_pci_resume(struct pci_dev *pdev)
+{
+       struct xgbe_prv_data *pdata = pci_get_drvdata(pdev);
+       struct net_device *netdev = pdata->netdev;
+       int ret = 0;
+
+       pdata->lpm_ctrl &= ~MDIO_CTRL1_LPOWER;
+       XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, pdata->lpm_ctrl);
+
+       if (netif_running(netdev)) {
+               ret = xgbe_powerup(netdev, XGMAC_DRIVER_CONTEXT);
+
+               /* Schedule a restart in case the link or phy state changed
+                * while we were powered down.
+                */
+               schedule_work(&pdata->restart_work);
+       }
+
+       return ret;
+}
+#endif /* CONFIG_PM */
+
+static const struct xgbe_version_data xgbe_v2a = {
+       .init_function_ptrs_phy_impl    = xgbe_init_function_ptrs_phy_v2,
+       .xpcs_access                    = XGBE_XPCS_ACCESS_V2,
+       .mmc_64bit                      = 1,
+       .tx_max_fifo_size               = 229376,
+       .rx_max_fifo_size               = 229376,
+       .tx_tstamp_workaround           = 1,
+       .ecc_support                    = 1,
+       .i2c_support                    = 1,
+};
+
+static const struct xgbe_version_data xgbe_v2b = {
+       .init_function_ptrs_phy_impl    = xgbe_init_function_ptrs_phy_v2,
+       .xpcs_access                    = XGBE_XPCS_ACCESS_V2,
+       .mmc_64bit                      = 1,
+       .tx_max_fifo_size               = 65536,
+       .rx_max_fifo_size               = 65536,
+       .tx_tstamp_workaround           = 1,
+       .ecc_support                    = 1,
+       .i2c_support                    = 1,
+};
+
+static const struct pci_device_id xgbe_pci_table[] = {
+       { PCI_VDEVICE(AMD, 0x1458),
+         .driver_data = (kernel_ulong_t)&xgbe_v2a },
+       { PCI_VDEVICE(AMD, 0x1459),
+         .driver_data = (kernel_ulong_t)&xgbe_v2b },
+       /* Last entry must be zero */
+       { 0, }
+};
+MODULE_DEVICE_TABLE(pci, xgbe_pci_table);
+
+static struct pci_driver xgbe_driver = {
+       .name = XGBE_DRV_NAME,
+       .id_table = xgbe_pci_table,
+       .probe = xgbe_pci_probe,
+       .remove = xgbe_pci_remove,
+#ifdef CONFIG_PM
+       .suspend = xgbe_pci_suspend,
+       .resume = xgbe_pci_resume,
+#endif
+};
+
+int xgbe_pci_init(void)
+{
+       return pci_register_driver(&xgbe_driver);
+}
+
+void xgbe_pci_exit(void)
+{
+       pci_unregister_driver(&xgbe_driver);
+}
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v1.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v1.c
new file mode 100644 (file)
index 0000000..c75edca
--- /dev/null
@@ -0,0 +1,845 @@
+/*
+ * AMD 10Gb Ethernet driver
+ *
+ * This file is available to you under your choice of the following two
+ * licenses:
+ *
+ * License 1: GPLv2
+ *
+ * Copyright (c) 2016 Advanced Micro Devices, Inc.
+ *
+ * This file is free software; you may copy, redistribute and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or (at
+ * your option) any later version.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * This file incorporates work covered by the following copyright and
+ * permission notice:
+ *     The Synopsys DWC ETHER XGMAC Software Driver and documentation
+ *     (hereinafter "Software") is an unsupported proprietary work of Synopsys,
+ *     Inc. unless otherwise expressly agreed to in writing between Synopsys
+ *     and you.
+ *
+ *     The Software IS NOT an item of Licensed Software or Licensed Product
+ *     under any End User Software License Agreement or Agreement for Licensed
+ *     Product with Synopsys or any supplement thereto.  Permission is hereby
+ *     granted, free of charge, to any person obtaining a copy of this software
+ *     annotated with this license and the Software, to deal in the Software
+ *     without restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ *     of the Software, and to permit persons to whom the Software is furnished
+ *     to do so, subject to the following conditions:
+ *
+ *     The above copyright notice and this permission notice shall be included
+ *     in all copies or substantial portions of the Software.
+ *
+ *     THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
+ *     BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ *     TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ *     PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
+ *     BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ *     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ *     THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ * License 2: Modified BSD
+ *
+ * Copyright (c) 2016 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Advanced Micro Devices, Inc. nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * This file incorporates work covered by the following copyright and
+ * permission notice:
+ *     The Synopsys DWC ETHER XGMAC Software Driver and documentation
+ *     (hereinafter "Software") is an unsupported proprietary work of Synopsys,
+ *     Inc. unless otherwise expressly agreed to in writing between Synopsys
+ *     and you.
+ *
+ *     The Software IS NOT an item of Licensed Software or Licensed Product
+ *     under any End User Software License Agreement or Agreement for Licensed
+ *     Product with Synopsys or any supplement thereto.  Permission is hereby
+ *     granted, free of charge, to any person obtaining a copy of this software
+ *     annotated with this license and the Software, to deal in the Software
+ *     without restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ *     of the Software, and to permit persons to whom the Software is furnished
+ *     to do so, subject to the following conditions:
+ *
+ *     The above copyright notice and this permission notice shall be included
+ *     in all copies or substantial portions of the Software.
+ *
+ *     THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
+ *     BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ *     TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ *     PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
+ *     BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ *     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ *     THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/module.h>
+#include <linux/kmod.h>
+#include <linux/device.h>
+#include <linux/property.h>
+#include <linux/mdio.h>
+#include <linux/phy.h>
+
+#include "xgbe.h"
+#include "xgbe-common.h"
+
+#define XGBE_BLWC_PROPERTY             "amd,serdes-blwc"
+#define XGBE_CDR_RATE_PROPERTY         "amd,serdes-cdr-rate"
+#define XGBE_PQ_SKEW_PROPERTY          "amd,serdes-pq-skew"
+#define XGBE_TX_AMP_PROPERTY           "amd,serdes-tx-amp"
+#define XGBE_DFE_CFG_PROPERTY          "amd,serdes-dfe-tap-config"
+#define XGBE_DFE_ENA_PROPERTY          "amd,serdes-dfe-tap-enable"
+
+/* Default SerDes settings */
+#define XGBE_SPEED_1000_BLWC           1
+#define XGBE_SPEED_1000_CDR            0x2
+#define XGBE_SPEED_1000_PLL            0x0
+#define XGBE_SPEED_1000_PQ             0xa
+#define XGBE_SPEED_1000_RATE           0x3
+#define XGBE_SPEED_1000_TXAMP          0xf
+#define XGBE_SPEED_1000_WORD           0x1
+#define XGBE_SPEED_1000_DFE_TAP_CONFIG 0x3
+#define XGBE_SPEED_1000_DFE_TAP_ENABLE 0x0
+
+#define XGBE_SPEED_2500_BLWC           1
+#define XGBE_SPEED_2500_CDR            0x2
+#define XGBE_SPEED_2500_PLL            0x0
+#define XGBE_SPEED_2500_PQ             0xa
+#define XGBE_SPEED_2500_RATE           0x1
+#define XGBE_SPEED_2500_TXAMP          0xf
+#define XGBE_SPEED_2500_WORD           0x1
+#define XGBE_SPEED_2500_DFE_TAP_CONFIG 0x3
+#define XGBE_SPEED_2500_DFE_TAP_ENABLE 0x0
+
+#define XGBE_SPEED_10000_BLWC          0
+#define XGBE_SPEED_10000_CDR           0x7
+#define XGBE_SPEED_10000_PLL           0x1
+#define XGBE_SPEED_10000_PQ            0x12
+#define XGBE_SPEED_10000_RATE          0x0
+#define XGBE_SPEED_10000_TXAMP         0xa
+#define XGBE_SPEED_10000_WORD          0x7
+#define XGBE_SPEED_10000_DFE_TAP_CONFIG        0x1
+#define XGBE_SPEED_10000_DFE_TAP_ENABLE        0x7f
+
+/* Rate-change complete wait/retry count */
+#define XGBE_RATECHANGE_COUNT          500
+
+static const u32 xgbe_phy_blwc[] = {
+       XGBE_SPEED_1000_BLWC,
+       XGBE_SPEED_2500_BLWC,
+       XGBE_SPEED_10000_BLWC,
+};
+
+static const u32 xgbe_phy_cdr_rate[] = {
+       XGBE_SPEED_1000_CDR,
+       XGBE_SPEED_2500_CDR,
+       XGBE_SPEED_10000_CDR,
+};
+
+static const u32 xgbe_phy_pq_skew[] = {
+       XGBE_SPEED_1000_PQ,
+       XGBE_SPEED_2500_PQ,
+       XGBE_SPEED_10000_PQ,
+};
+
+static const u32 xgbe_phy_tx_amp[] = {
+       XGBE_SPEED_1000_TXAMP,
+       XGBE_SPEED_2500_TXAMP,
+       XGBE_SPEED_10000_TXAMP,
+};
+
+static const u32 xgbe_phy_dfe_tap_cfg[] = {
+       XGBE_SPEED_1000_DFE_TAP_CONFIG,
+       XGBE_SPEED_2500_DFE_TAP_CONFIG,
+       XGBE_SPEED_10000_DFE_TAP_CONFIG,
+};
+
+static const u32 xgbe_phy_dfe_tap_ena[] = {
+       XGBE_SPEED_1000_DFE_TAP_ENABLE,
+       XGBE_SPEED_2500_DFE_TAP_ENABLE,
+       XGBE_SPEED_10000_DFE_TAP_ENABLE,
+};
+
+struct xgbe_phy_data {
+       /* 1000/10000 vs 2500/10000 indicator */
+       unsigned int speed_set;
+
+       /* SerDes UEFI configurable settings.
+        *   Switching between modes/speeds requires new values for some
+        *   SerDes settings.  The values can be supplied as device
+        *   properties in array format.  The first array entry is for
+        *   1GbE, second for 2.5GbE and third for 10GbE
+        */
+       u32 blwc[XGBE_SPEEDS];
+       u32 cdr_rate[XGBE_SPEEDS];
+       u32 pq_skew[XGBE_SPEEDS];
+       u32 tx_amp[XGBE_SPEEDS];
+       u32 dfe_tap_cfg[XGBE_SPEEDS];
+       u32 dfe_tap_ena[XGBE_SPEEDS];
+};
+
+static void xgbe_phy_kr_training_pre(struct xgbe_prv_data *pdata)
+{
+               XSIR0_IOWRITE_BITS(pdata, SIR0_KR_RT_1, RESET, 1);
+}
+
+static void xgbe_phy_kr_training_post(struct xgbe_prv_data *pdata)
+{
+               XSIR0_IOWRITE_BITS(pdata, SIR0_KR_RT_1, RESET, 0);
+}
+
+static enum xgbe_mode xgbe_phy_an_outcome(struct xgbe_prv_data *pdata)
+{
+       struct xgbe_phy_data *phy_data = pdata->phy_data;
+       enum xgbe_mode mode;
+       unsigned int ad_reg, lp_reg;
+
+       pdata->phy.lp_advertising |= ADVERTISED_Autoneg;
+       pdata->phy.lp_advertising |= ADVERTISED_Backplane;
+
+       /* Compare Advertisement and Link Partner register 1 */
+       ad_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE);
+       lp_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_LPA);
+       if (lp_reg & 0x400)
+               pdata->phy.lp_advertising |= ADVERTISED_Pause;
+       if (lp_reg & 0x800)
+               pdata->phy.lp_advertising |= ADVERTISED_Asym_Pause;
+
+       if (pdata->phy.pause_autoneg) {
+               /* Set flow control based on auto-negotiation result */
+               pdata->phy.tx_pause = 0;
+               pdata->phy.rx_pause = 0;
+
+               if (ad_reg & lp_reg & 0x400) {
+                       pdata->phy.tx_pause = 1;
+                       pdata->phy.rx_pause = 1;
+               } else if (ad_reg & lp_reg & 0x800) {
+                       if (ad_reg & 0x400)
+                               pdata->phy.rx_pause = 1;
+                       else if (lp_reg & 0x400)
+                               pdata->phy.tx_pause = 1;
+               }
+       }
+
+       /* Compare Advertisement and Link Partner register 2 */
+       ad_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 1);
+       lp_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_LPA + 1);
+       if (lp_reg & 0x80)
+               pdata->phy.lp_advertising |= ADVERTISED_10000baseKR_Full;
+       if (lp_reg & 0x20) {
+               if (phy_data->speed_set == XGBE_SPEEDSET_2500_10000)
+                       pdata->phy.lp_advertising |= ADVERTISED_2500baseX_Full;
+               else
+                       pdata->phy.lp_advertising |= ADVERTISED_1000baseKX_Full;
+       }
+
+       ad_reg &= lp_reg;
+       if (ad_reg & 0x80) {
+               mode = XGBE_MODE_KR;
+       } else if (ad_reg & 0x20) {
+               if (phy_data->speed_set == XGBE_SPEEDSET_2500_10000)
+                       mode = XGBE_MODE_KX_2500;
+               else
+                       mode = XGBE_MODE_KX_1000;
+       } else {
+               mode = XGBE_MODE_UNKNOWN;
+       }
+
+       /* Compare Advertisement and Link Partner register 3 */
+       ad_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 2);
+       lp_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_LPA + 2);
+       if (lp_reg & 0xc000)
+               pdata->phy.lp_advertising |= ADVERTISED_10000baseR_FEC;
+
+       return mode;
+}
+
+static unsigned int xgbe_phy_an_advertising(struct xgbe_prv_data *pdata)
+{
+       return pdata->phy.advertising;
+}
+
+static int xgbe_phy_an_config(struct xgbe_prv_data *pdata)
+{
+       /* Nothing uniquely required for an configuration */
+       return 0;
+}
+
+static enum xgbe_an_mode xgbe_phy_an_mode(struct xgbe_prv_data *pdata)
+{
+       return XGBE_AN_MODE_CL73;
+}
+
+static void xgbe_phy_pcs_power_cycle(struct xgbe_prv_data *pdata)
+{
+       unsigned int reg;
+
+       reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1);
+
+       reg |= MDIO_CTRL1_LPOWER;
+       XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, reg);
+
+       usleep_range(75, 100);
+
+       reg &= ~MDIO_CTRL1_LPOWER;
+       XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, reg);
+}
+
+static void xgbe_phy_start_ratechange(struct xgbe_prv_data *pdata)
+{
+       /* Assert Rx and Tx ratechange */
+       XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, RATECHANGE, 1);
+}
+
+static void xgbe_phy_complete_ratechange(struct xgbe_prv_data *pdata)
+{
+       unsigned int wait;
+       u16 status;
+
+       /* Release Rx and Tx ratechange */
+       XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, RATECHANGE, 0);
+
+       /* Wait for Rx and Tx ready */
+       wait = XGBE_RATECHANGE_COUNT;
+       while (wait--) {
+               usleep_range(50, 75);
+
+               status = XSIR0_IOREAD(pdata, SIR0_STATUS);
+               if (XSIR_GET_BITS(status, SIR0_STATUS, RX_READY) &&
+                   XSIR_GET_BITS(status, SIR0_STATUS, TX_READY))
+                       goto rx_reset;
+       }
+
+       netif_dbg(pdata, link, pdata->netdev, "SerDes rx/tx not ready (%#hx)\n",
+                 status);
+
+rx_reset:
+       /* Perform Rx reset for the DFE changes */
+       XRXTX_IOWRITE_BITS(pdata, RXTX_REG6, RESETB_RXD, 0);
+       XRXTX_IOWRITE_BITS(pdata, RXTX_REG6, RESETB_RXD, 1);
+}
+
+static void xgbe_phy_kr_mode(struct xgbe_prv_data *pdata)
+{
+       struct xgbe_phy_data *phy_data = pdata->phy_data;
+       unsigned int reg;
+
+       /* Set PCS to KR/10G speed */
+       reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL2);
+       reg &= ~MDIO_PCS_CTRL2_TYPE;
+       reg |= MDIO_PCS_CTRL2_10GBR;
+       XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL2, reg);
+
+       reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1);
+       reg &= ~MDIO_CTRL1_SPEEDSEL;
+       reg |= MDIO_CTRL1_SPEED10G;
+       XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, reg);
+
+       xgbe_phy_pcs_power_cycle(pdata);
+
+       /* Set SerDes to 10G speed */
+       xgbe_phy_start_ratechange(pdata);
+
+       XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, DATARATE, XGBE_SPEED_10000_RATE);
+       XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, WORDMODE, XGBE_SPEED_10000_WORD);
+       XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, PLLSEL, XGBE_SPEED_10000_PLL);
+
+       XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, CDR_RATE,
+                          phy_data->cdr_rate[XGBE_SPEED_10000]);
+       XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, TXAMP,
+                          phy_data->tx_amp[XGBE_SPEED_10000]);
+       XRXTX_IOWRITE_BITS(pdata, RXTX_REG20, BLWC_ENA,
+                          phy_data->blwc[XGBE_SPEED_10000]);
+       XRXTX_IOWRITE_BITS(pdata, RXTX_REG114, PQ_REG,
+                          phy_data->pq_skew[XGBE_SPEED_10000]);
+       XRXTX_IOWRITE_BITS(pdata, RXTX_REG129, RXDFE_CONFIG,
+                          phy_data->dfe_tap_cfg[XGBE_SPEED_10000]);
+       XRXTX_IOWRITE(pdata, RXTX_REG22,
+                     phy_data->dfe_tap_ena[XGBE_SPEED_10000]);
+
+       xgbe_phy_complete_ratechange(pdata);
+
+       netif_dbg(pdata, link, pdata->netdev, "10GbE KR mode set\n");
+}
+
+static void xgbe_phy_kx_2500_mode(struct xgbe_prv_data *pdata)
+{
+       struct xgbe_phy_data *phy_data = pdata->phy_data;
+       unsigned int reg;
+
+       /* Set PCS to KX/1G speed */
+       reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL2);
+       reg &= ~MDIO_PCS_CTRL2_TYPE;
+       reg |= MDIO_PCS_CTRL2_10GBX;
+       XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL2, reg);
+
+       reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1);
+       reg &= ~MDIO_CTRL1_SPEEDSEL;
+       reg |= MDIO_CTRL1_SPEED1G;
+       XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, reg);
+
+       xgbe_phy_pcs_power_cycle(pdata);
+
+       /* Set SerDes to 2.5G speed */
+       xgbe_phy_start_ratechange(pdata);
+
+       XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, DATARATE, XGBE_SPEED_2500_RATE);
+       XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, WORDMODE, XGBE_SPEED_2500_WORD);
+       XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, PLLSEL, XGBE_SPEED_2500_PLL);
+
+       XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, CDR_RATE,
+                          phy_data->cdr_rate[XGBE_SPEED_2500]);
+       XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, TXAMP,
+                          phy_data->tx_amp[XGBE_SPEED_2500]);
+       XRXTX_IOWRITE_BITS(pdata, RXTX_REG20, BLWC_ENA,
+                          phy_data->blwc[XGBE_SPEED_2500]);
+       XRXTX_IOWRITE_BITS(pdata, RXTX_REG114, PQ_REG,
+                          phy_data->pq_skew[XGBE_SPEED_2500]);
+       XRXTX_IOWRITE_BITS(pdata, RXTX_REG129, RXDFE_CONFIG,
+                          phy_data->dfe_tap_cfg[XGBE_SPEED_2500]);
+       XRXTX_IOWRITE(pdata, RXTX_REG22,
+                     phy_data->dfe_tap_ena[XGBE_SPEED_2500]);
+
+       xgbe_phy_complete_ratechange(pdata);
+
+       netif_dbg(pdata, link, pdata->netdev, "2.5GbE KX mode set\n");
+}
+
+static void xgbe_phy_kx_1000_mode(struct xgbe_prv_data *pdata)
+{
+       struct xgbe_phy_data *phy_data = pdata->phy_data;
+       unsigned int reg;
+
+       /* Set PCS to KX/1G speed */
+       reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL2);
+       reg &= ~MDIO_PCS_CTRL2_TYPE;
+       reg |= MDIO_PCS_CTRL2_10GBX;
+       XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL2, reg);
+
+       reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1);
+       reg &= ~MDIO_CTRL1_SPEEDSEL;
+       reg |= MDIO_CTRL1_SPEED1G;
+       XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, reg);
+
+       xgbe_phy_pcs_power_cycle(pdata);
+
+       /* Set SerDes to 1G speed */
+       xgbe_phy_start_ratechange(pdata);
+
+       XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, DATARATE, XGBE_SPEED_1000_RATE);
+       XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, WORDMODE, XGBE_SPEED_1000_WORD);
+       XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, PLLSEL, XGBE_SPEED_1000_PLL);
+
+       XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, CDR_RATE,
+                          phy_data->cdr_rate[XGBE_SPEED_1000]);
+       XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, TXAMP,
+                          phy_data->tx_amp[XGBE_SPEED_1000]);
+       XRXTX_IOWRITE_BITS(pdata, RXTX_REG20, BLWC_ENA,
+                          phy_data->blwc[XGBE_SPEED_1000]);
+       XRXTX_IOWRITE_BITS(pdata, RXTX_REG114, PQ_REG,
+                          phy_data->pq_skew[XGBE_SPEED_1000]);
+       XRXTX_IOWRITE_BITS(pdata, RXTX_REG129, RXDFE_CONFIG,
+                          phy_data->dfe_tap_cfg[XGBE_SPEED_1000]);
+       XRXTX_IOWRITE(pdata, RXTX_REG22,
+                     phy_data->dfe_tap_ena[XGBE_SPEED_1000]);
+
+       xgbe_phy_complete_ratechange(pdata);
+
+       netif_dbg(pdata, link, pdata->netdev, "1GbE KX mode set\n");
+}
+
+static enum xgbe_mode xgbe_phy_cur_mode(struct xgbe_prv_data *pdata)
+{
+       struct xgbe_phy_data *phy_data = pdata->phy_data;
+       enum xgbe_mode mode;
+       unsigned int reg;
+
+       reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL2);
+       reg &= MDIO_PCS_CTRL2_TYPE;
+
+       if (reg == MDIO_PCS_CTRL2_10GBR) {
+               mode = XGBE_MODE_KR;
+       } else {
+               if (phy_data->speed_set == XGBE_SPEEDSET_2500_10000)
+                       mode = XGBE_MODE_KX_2500;
+               else
+                       mode = XGBE_MODE_KX_1000;
+       }
+
+       return mode;
+}
+
+static enum xgbe_mode xgbe_phy_switch_mode(struct xgbe_prv_data *pdata)
+{
+       struct xgbe_phy_data *phy_data = pdata->phy_data;
+       enum xgbe_mode mode;
+
+       /* If we are in KR switch to KX, and vice-versa */
+       if (xgbe_phy_cur_mode(pdata) == XGBE_MODE_KR) {
+               if (phy_data->speed_set == XGBE_SPEEDSET_2500_10000)
+                       mode = XGBE_MODE_KX_2500;
+               else
+                       mode = XGBE_MODE_KX_1000;
+       } else {
+               mode = XGBE_MODE_KR;
+       }
+
+       return mode;
+}
+
+static enum xgbe_mode xgbe_phy_get_mode(struct xgbe_prv_data *pdata,
+                                       int speed)
+{
+       struct xgbe_phy_data *phy_data = pdata->phy_data;
+
+       switch (speed) {
+       case SPEED_1000:
+               return (phy_data->speed_set == XGBE_SPEEDSET_1000_10000)
+                       ? XGBE_MODE_KX_1000 : XGBE_MODE_UNKNOWN;
+       case SPEED_2500:
+               return (phy_data->speed_set == XGBE_SPEEDSET_2500_10000)
+                       ? XGBE_MODE_KX_2500 : XGBE_MODE_UNKNOWN;
+       case SPEED_10000:
+               return XGBE_MODE_KR;
+       default:
+               return XGBE_MODE_UNKNOWN;
+       }
+}
+
+static void xgbe_phy_set_mode(struct xgbe_prv_data *pdata, enum xgbe_mode mode)
+{
+       switch (mode) {
+       case XGBE_MODE_KX_1000:
+               xgbe_phy_kx_1000_mode(pdata);
+               break;
+       case XGBE_MODE_KX_2500:
+               xgbe_phy_kx_2500_mode(pdata);
+               break;
+       case XGBE_MODE_KR:
+               xgbe_phy_kr_mode(pdata);
+               break;
+       default:
+               break;
+       }
+}
+
+static bool xgbe_phy_check_mode(struct xgbe_prv_data *pdata,
+                               enum xgbe_mode mode, u32 advert)
+{
+       if (pdata->phy.autoneg == AUTONEG_ENABLE) {
+               if (pdata->phy.advertising & advert)
+                       return true;
+       } else {
+               enum xgbe_mode cur_mode;
+
+               cur_mode = xgbe_phy_get_mode(pdata, pdata->phy.speed);
+               if (cur_mode == mode)
+                       return true;
+       }
+
+       return false;
+}
+
+static bool xgbe_phy_use_mode(struct xgbe_prv_data *pdata, enum xgbe_mode mode)
+{
+       switch (mode) {
+       case XGBE_MODE_KX_1000:
+               return xgbe_phy_check_mode(pdata, mode,
+                                          ADVERTISED_1000baseKX_Full);
+       case XGBE_MODE_KX_2500:
+               return xgbe_phy_check_mode(pdata, mode,
+                                          ADVERTISED_2500baseX_Full);
+       case XGBE_MODE_KR:
+               return xgbe_phy_check_mode(pdata, mode,
+                                          ADVERTISED_10000baseKR_Full);
+       default:
+               return false;
+       }
+}
+
+static bool xgbe_phy_valid_speed(struct xgbe_prv_data *pdata, int speed)
+{
+       struct xgbe_phy_data *phy_data = pdata->phy_data;
+
+       switch (speed) {
+       case SPEED_1000:
+               if (phy_data->speed_set != XGBE_SPEEDSET_1000_10000)
+                       return false;
+               return true;
+       case SPEED_2500:
+               if (phy_data->speed_set != XGBE_SPEEDSET_2500_10000)
+                       return false;
+               return true;
+       case SPEED_10000:
+               return true;
+       default:
+               return false;
+       }
+}
+
+static int xgbe_phy_link_status(struct xgbe_prv_data *pdata, int *an_restart)
+{
+       unsigned int reg;
+
+       *an_restart = 0;
+
+       /* Link status is latched low, so read once to clear
+        * and then read again to get current state
+        */
+       reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1);
+       reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1);
+
+       return (reg & MDIO_STAT1_LSTATUS) ? 1 : 0;
+}
+
+static void xgbe_phy_stop(struct xgbe_prv_data *pdata)
+{
+       /* Nothing uniquely required for stop */
+}
+
+static int xgbe_phy_start(struct xgbe_prv_data *pdata)
+{
+       /* Nothing uniquely required for start */
+       return 0;
+}
+
+static int xgbe_phy_reset(struct xgbe_prv_data *pdata)
+{
+       unsigned int reg, count;
+
+       /* Perform a software reset of the PCS */
+       reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1);
+       reg |= MDIO_CTRL1_RESET;
+       XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, reg);
+
+       count = 50;
+       do {
+               msleep(20);
+               reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1);
+       } while ((reg & MDIO_CTRL1_RESET) && --count);
+
+       if (reg & MDIO_CTRL1_RESET)
+               return -ETIMEDOUT;
+
+       return 0;
+}
+
+static void xgbe_phy_exit(struct xgbe_prv_data *pdata)
+{
+       /* Nothing uniquely required for exit */
+}
+
+static int xgbe_phy_init(struct xgbe_prv_data *pdata)
+{
+       struct xgbe_phy_data *phy_data;
+       int ret;
+
+       phy_data = devm_kzalloc(pdata->dev, sizeof(*phy_data), GFP_KERNEL);
+       if (!phy_data)
+               return -ENOMEM;
+
+       /* Retrieve the PHY speedset */
+       ret = device_property_read_u32(pdata->phy_dev, XGBE_SPEEDSET_PROPERTY,
+                                      &phy_data->speed_set);
+       if (ret) {
+               dev_err(pdata->dev, "invalid %s property\n",
+                       XGBE_SPEEDSET_PROPERTY);
+               return ret;
+       }
+
+       switch (phy_data->speed_set) {
+       case XGBE_SPEEDSET_1000_10000:
+       case XGBE_SPEEDSET_2500_10000:
+               break;
+       default:
+               dev_err(pdata->dev, "invalid %s property\n",
+                       XGBE_SPEEDSET_PROPERTY);
+               return -EINVAL;
+       }
+
+       /* Retrieve the PHY configuration properties */
+       if (device_property_present(pdata->phy_dev, XGBE_BLWC_PROPERTY)) {
+               ret = device_property_read_u32_array(pdata->phy_dev,
+                                                    XGBE_BLWC_PROPERTY,
+                                                    phy_data->blwc,
+                                                    XGBE_SPEEDS);
+               if (ret) {
+                       dev_err(pdata->dev, "invalid %s property\n",
+                               XGBE_BLWC_PROPERTY);
+                       return ret;
+               }
+       } else {
+               memcpy(phy_data->blwc, xgbe_phy_blwc,
+                      sizeof(phy_data->blwc));
+       }
+
+       if (device_property_present(pdata->phy_dev, XGBE_CDR_RATE_PROPERTY)) {
+               ret = device_property_read_u32_array(pdata->phy_dev,
+                                                    XGBE_CDR_RATE_PROPERTY,
+                                                    phy_data->cdr_rate,
+                                                    XGBE_SPEEDS);
+               if (ret) {
+                       dev_err(pdata->dev, "invalid %s property\n",
+                               XGBE_CDR_RATE_PROPERTY);
+                       return ret;
+               }
+       } else {
+               memcpy(phy_data->cdr_rate, xgbe_phy_cdr_rate,
+                      sizeof(phy_data->cdr_rate));
+       }
+
+       if (device_property_present(pdata->phy_dev, XGBE_PQ_SKEW_PROPERTY)) {
+               ret = device_property_read_u32_array(pdata->phy_dev,
+                                                    XGBE_PQ_SKEW_PROPERTY,
+                                                    phy_data->pq_skew,
+                                                    XGBE_SPEEDS);
+               if (ret) {
+                       dev_err(pdata->dev, "invalid %s property\n",
+                               XGBE_PQ_SKEW_PROPERTY);
+                       return ret;
+               }
+       } else {
+               memcpy(phy_data->pq_skew, xgbe_phy_pq_skew,
+                      sizeof(phy_data->pq_skew));
+       }
+
+       if (device_property_present(pdata->phy_dev, XGBE_TX_AMP_PROPERTY)) {
+               ret = device_property_read_u32_array(pdata->phy_dev,
+                                                    XGBE_TX_AMP_PROPERTY,
+                                                    phy_data->tx_amp,
+                                                    XGBE_SPEEDS);
+               if (ret) {
+                       dev_err(pdata->dev, "invalid %s property\n",
+                               XGBE_TX_AMP_PROPERTY);
+                       return ret;
+               }
+       } else {
+               memcpy(phy_data->tx_amp, xgbe_phy_tx_amp,
+                      sizeof(phy_data->tx_amp));
+       }
+
+       if (device_property_present(pdata->phy_dev, XGBE_DFE_CFG_PROPERTY)) {
+               ret = device_property_read_u32_array(pdata->phy_dev,
+                                                    XGBE_DFE_CFG_PROPERTY,
+                                                    phy_data->dfe_tap_cfg,
+                                                    XGBE_SPEEDS);
+               if (ret) {
+                       dev_err(pdata->dev, "invalid %s property\n",
+                               XGBE_DFE_CFG_PROPERTY);
+                       return ret;
+               }
+       } else {
+               memcpy(phy_data->dfe_tap_cfg, xgbe_phy_dfe_tap_cfg,
+                      sizeof(phy_data->dfe_tap_cfg));
+       }
+
+       if (device_property_present(pdata->phy_dev, XGBE_DFE_ENA_PROPERTY)) {
+               ret = device_property_read_u32_array(pdata->phy_dev,
+                                                    XGBE_DFE_ENA_PROPERTY,
+                                                    phy_data->dfe_tap_ena,
+                                                    XGBE_SPEEDS);
+               if (ret) {
+                       dev_err(pdata->dev, "invalid %s property\n",
+                               XGBE_DFE_ENA_PROPERTY);
+                       return ret;
+               }
+       } else {
+               memcpy(phy_data->dfe_tap_ena, xgbe_phy_dfe_tap_ena,
+                      sizeof(phy_data->dfe_tap_ena));
+       }
+
+       /* Initialize supported features */
+       pdata->phy.supported = SUPPORTED_Autoneg;
+       pdata->phy.supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause;
+       pdata->phy.supported |= SUPPORTED_Backplane;
+       pdata->phy.supported |= SUPPORTED_10000baseKR_Full;
+       switch (phy_data->speed_set) {
+       case XGBE_SPEEDSET_1000_10000:
+               pdata->phy.supported |= SUPPORTED_1000baseKX_Full;
+               break;
+       case XGBE_SPEEDSET_2500_10000:
+               pdata->phy.supported |= SUPPORTED_2500baseX_Full;
+               break;
+       }
+
+       if (pdata->fec_ability & MDIO_PMA_10GBR_FECABLE_ABLE)
+               pdata->phy.supported |= SUPPORTED_10000baseR_FEC;
+
+       pdata->phy_data = phy_data;
+
+       return 0;
+}
+
+void xgbe_init_function_ptrs_phy_v1(struct xgbe_phy_if *phy_if)
+{
+       struct xgbe_phy_impl_if *phy_impl = &phy_if->phy_impl;
+
+       phy_impl->init                  = xgbe_phy_init;
+       phy_impl->exit                  = xgbe_phy_exit;
+
+       phy_impl->reset                 = xgbe_phy_reset;
+       phy_impl->start                 = xgbe_phy_start;
+       phy_impl->stop                  = xgbe_phy_stop;
+
+       phy_impl->link_status           = xgbe_phy_link_status;
+
+       phy_impl->valid_speed           = xgbe_phy_valid_speed;
+
+       phy_impl->use_mode              = xgbe_phy_use_mode;
+       phy_impl->set_mode              = xgbe_phy_set_mode;
+       phy_impl->get_mode              = xgbe_phy_get_mode;
+       phy_impl->switch_mode           = xgbe_phy_switch_mode;
+       phy_impl->cur_mode              = xgbe_phy_cur_mode;
+
+       phy_impl->an_mode               = xgbe_phy_an_mode;
+
+       phy_impl->an_config             = xgbe_phy_an_config;
+
+       phy_impl->an_advertising        = xgbe_phy_an_advertising;
+
+       phy_impl->an_outcome            = xgbe_phy_an_outcome;
+
+       phy_impl->kr_training_pre       = xgbe_phy_kr_training_pre;
+       phy_impl->kr_training_post      = xgbe_phy_kr_training_post;
+}
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
new file mode 100644 (file)
index 0000000..9d8c953
--- /dev/null
@@ -0,0 +1,3084 @@
+/*
+ * AMD 10Gb Ethernet driver
+ *
+ * This file is available to you under your choice of the following two
+ * licenses:
+ *
+ * License 1: GPLv2
+ *
+ * Copyright (c) 2016 Advanced Micro Devices, Inc.
+ *
+ * This file is free software; you may copy, redistribute and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or (at
+ * your option) any later version.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * This file incorporates work covered by the following copyright and
+ * permission notice:
+ *     The Synopsys DWC ETHER XGMAC Software Driver and documentation
+ *     (hereinafter "Software") is an unsupported proprietary work of Synopsys,
+ *     Inc. unless otherwise expressly agreed to in writing between Synopsys
+ *     and you.
+ *
+ *     The Software IS NOT an item of Licensed Software or Licensed Product
+ *     under any End User Software License Agreement or Agreement for Licensed
+ *     Product with Synopsys or any supplement thereto.  Permission is hereby
+ *     granted, free of charge, to any person obtaining a copy of this software
+ *     annotated with this license and the Software, to deal in the Software
+ *     without restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ *     of the Software, and to permit persons to whom the Software is furnished
+ *     to do so, subject to the following conditions:
+ *
+ *     The above copyright notice and this permission notice shall be included
+ *     in all copies or substantial portions of the Software.
+ *
+ *     THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
+ *     BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ *     TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ *     PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
+ *     BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ *     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ *     THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ * License 2: Modified BSD
+ *
+ * Copyright (c) 2016 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Advanced Micro Devices, Inc. nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * This file incorporates work covered by the following copyright and
+ * permission notice:
+ *     The Synopsys DWC ETHER XGMAC Software Driver and documentation
+ *     (hereinafter "Software") is an unsupported proprietary work of Synopsys,
+ *     Inc. unless otherwise expressly agreed to in writing between Synopsys
+ *     and you.
+ *
+ *     The Software IS NOT an item of Licensed Software or Licensed Product
+ *     under any End User Software License Agreement or Agreement for Licensed
+ *     Product with Synopsys or any supplement thereto.  Permission is hereby
+ *     granted, free of charge, to any person obtaining a copy of this software
+ *     annotated with this license and the Software, to deal in the Software
+ *     without restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ *     of the Software, and to permit persons to whom the Software is furnished
+ *     to do so, subject to the following conditions:
+ *
+ *     The above copyright notice and this permission notice shall be included
+ *     in all copies or substantial portions of the Software.
+ *
+ *     THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
+ *     BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ *     TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ *     PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
+ *     BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ *     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ *     THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/kmod.h>
+#include <linux/mdio.h>
+#include <linux/phy.h>
+
+#include "xgbe.h"
+#include "xgbe-common.h"
+
+#define XGBE_PHY_PORT_SPEED_100                BIT(0)
+#define XGBE_PHY_PORT_SPEED_1000       BIT(1)
+#define XGBE_PHY_PORT_SPEED_2500       BIT(2)
+#define XGBE_PHY_PORT_SPEED_10000      BIT(3)
+
+#define XGBE_MUTEX_RELEASE             0x80000000
+
+#define XGBE_SFP_DIRECT                        7
+
+/* I2C target addresses */
+#define XGBE_SFP_SERIAL_ID_ADDRESS     0x50
+#define XGBE_SFP_DIAG_INFO_ADDRESS     0x51
+#define XGBE_SFP_PHY_ADDRESS           0x56
+#define XGBE_GPIO_ADDRESS_PCA9555      0x20
+
+/* SFP sideband signal indicators */
+#define XGBE_GPIO_NO_TX_FAULT          BIT(0)
+#define XGBE_GPIO_NO_RATE_SELECT       BIT(1)
+#define XGBE_GPIO_NO_MOD_ABSENT                BIT(2)
+#define XGBE_GPIO_NO_RX_LOS            BIT(3)
+
+/* Rate-change complete wait/retry count */
+#define XGBE_RATECHANGE_COUNT          500
+
+enum xgbe_port_mode {
+       XGBE_PORT_MODE_RSVD = 0,
+       XGBE_PORT_MODE_BACKPLANE,
+       XGBE_PORT_MODE_BACKPLANE_2500,
+       XGBE_PORT_MODE_1000BASE_T,
+       XGBE_PORT_MODE_1000BASE_X,
+       XGBE_PORT_MODE_NBASE_T,
+       XGBE_PORT_MODE_10GBASE_T,
+       XGBE_PORT_MODE_10GBASE_R,
+       XGBE_PORT_MODE_SFP,
+       XGBE_PORT_MODE_MAX,
+};
+
+enum xgbe_conn_type {
+       XGBE_CONN_TYPE_NONE = 0,
+       XGBE_CONN_TYPE_SFP,
+       XGBE_CONN_TYPE_MDIO,
+       XGBE_CONN_TYPE_RSVD1,
+       XGBE_CONN_TYPE_BACKPLANE,
+       XGBE_CONN_TYPE_MAX,
+};
+
+/* SFP/SFP+ related definitions */
+enum xgbe_sfp_comm {
+       XGBE_SFP_COMM_DIRECT = 0,
+       XGBE_SFP_COMM_PCA9545,
+};
+
+enum xgbe_sfp_cable {
+       XGBE_SFP_CABLE_UNKNOWN = 0,
+       XGBE_SFP_CABLE_ACTIVE,
+       XGBE_SFP_CABLE_PASSIVE,
+};
+
+enum xgbe_sfp_base {
+       XGBE_SFP_BASE_UNKNOWN = 0,
+       XGBE_SFP_BASE_1000_T,
+       XGBE_SFP_BASE_1000_SX,
+       XGBE_SFP_BASE_1000_LX,
+       XGBE_SFP_BASE_1000_CX,
+       XGBE_SFP_BASE_10000_SR,
+       XGBE_SFP_BASE_10000_LR,
+       XGBE_SFP_BASE_10000_LRM,
+       XGBE_SFP_BASE_10000_ER,
+       XGBE_SFP_BASE_10000_CR,
+};
+
+enum xgbe_sfp_speed {
+       XGBE_SFP_SPEED_UNKNOWN = 0,
+       XGBE_SFP_SPEED_100_1000,
+       XGBE_SFP_SPEED_1000,
+       XGBE_SFP_SPEED_10000,
+};
+
+/* SFP Serial ID Base ID values relative to an offset of 0 */
+#define XGBE_SFP_BASE_ID                       0
+#define XGBE_SFP_ID_SFP                                0x03
+
+#define XGBE_SFP_BASE_EXT_ID                   1
+#define XGBE_SFP_EXT_ID_SFP                    0x04
+
+#define XGBE_SFP_BASE_10GBE_CC                 3
+#define XGBE_SFP_BASE_10GBE_CC_SR              BIT(4)
+#define XGBE_SFP_BASE_10GBE_CC_LR              BIT(5)
+#define XGBE_SFP_BASE_10GBE_CC_LRM             BIT(6)
+#define XGBE_SFP_BASE_10GBE_CC_ER              BIT(7)
+
+#define XGBE_SFP_BASE_1GBE_CC                  6
+#define XGBE_SFP_BASE_1GBE_CC_SX               BIT(0)
+#define XGBE_SFP_BASE_1GBE_CC_LX               BIT(1)
+#define XGBE_SFP_BASE_1GBE_CC_CX               BIT(2)
+#define XGBE_SFP_BASE_1GBE_CC_T                        BIT(3)
+
+#define XGBE_SFP_BASE_CABLE                    8
+#define XGBE_SFP_BASE_CABLE_PASSIVE            BIT(2)
+#define XGBE_SFP_BASE_CABLE_ACTIVE             BIT(3)
+
+#define XGBE_SFP_BASE_BR                       12
+#define XGBE_SFP_BASE_BR_1GBE_MIN              0x0a
+#define XGBE_SFP_BASE_BR_1GBE_MAX              0x0d
+#define XGBE_SFP_BASE_BR_10GBE_MIN             0x64
+#define XGBE_SFP_BASE_BR_10GBE_MAX             0x68
+
+#define XGBE_SFP_BASE_CU_CABLE_LEN             18
+
+#define XGBE_SFP_BASE_VENDOR_NAME              20
+#define XGBE_SFP_BASE_VENDOR_NAME_LEN          16
+#define XGBE_SFP_BASE_VENDOR_PN                        40
+#define XGBE_SFP_BASE_VENDOR_PN_LEN            16
+#define XGBE_SFP_BASE_VENDOR_REV               56
+#define XGBE_SFP_BASE_VENDOR_REV_LEN           4
+
+#define XGBE_SFP_BASE_CC                       63
+
+/* SFP Serial ID Extended ID values relative to an offset of 64 */
+#define XGBE_SFP_BASE_VENDOR_SN                        4
+#define XGBE_SFP_BASE_VENDOR_SN_LEN            16
+
+#define XGBE_SFP_EXTD_DIAG                     28
+#define XGBE_SFP_EXTD_DIAG_ADDR_CHANGE         BIT(2)
+
+#define XGBE_SFP_EXTD_SFF_8472                 30
+
+#define XGBE_SFP_EXTD_CC                       31
+
+struct xgbe_sfp_eeprom {
+       u8 base[64];
+       u8 extd[32];
+       u8 vendor[32];
+};
+
+#define XGBE_BEL_FUSE_VENDOR   "BEL-FUSE        "
+#define XGBE_BEL_FUSE_PARTNO   "1GBT-SFP06      "
+
+struct xgbe_sfp_ascii {
+       union {
+               char vendor[XGBE_SFP_BASE_VENDOR_NAME_LEN + 1];
+               char partno[XGBE_SFP_BASE_VENDOR_PN_LEN + 1];
+               char rev[XGBE_SFP_BASE_VENDOR_REV_LEN + 1];
+               char serno[XGBE_SFP_BASE_VENDOR_SN_LEN + 1];
+       } u;
+};
+
+/* MDIO PHY reset types */
+enum xgbe_mdio_reset {
+       XGBE_MDIO_RESET_NONE = 0,
+       XGBE_MDIO_RESET_I2C_GPIO,
+       XGBE_MDIO_RESET_INT_GPIO,
+       XGBE_MDIO_RESET_MAX,
+};
+
+/* Re-driver related definitions */
+enum xgbe_phy_redrv_if {
+       XGBE_PHY_REDRV_IF_MDIO = 0,
+       XGBE_PHY_REDRV_IF_I2C,
+       XGBE_PHY_REDRV_IF_MAX,
+};
+
+enum xgbe_phy_redrv_model {
+       XGBE_PHY_REDRV_MODEL_4223 = 0,
+       XGBE_PHY_REDRV_MODEL_4227,
+       XGBE_PHY_REDRV_MODEL_MAX,
+};
+
+enum xgbe_phy_redrv_mode {
+       XGBE_PHY_REDRV_MODE_CX = 5,
+       XGBE_PHY_REDRV_MODE_SR = 9,
+};
+
+#define XGBE_PHY_REDRV_MODE_REG        0x12b0
+
+/* PHY related configuration information */
+struct xgbe_phy_data {
+       enum xgbe_port_mode port_mode;
+
+       unsigned int port_id;
+
+       unsigned int port_speeds;
+
+       enum xgbe_conn_type conn_type;
+
+       enum xgbe_mode cur_mode;
+       enum xgbe_mode start_mode;
+
+       unsigned int rrc_count;
+
+       unsigned int mdio_addr;
+
+       unsigned int comm_owned;
+
+       /* SFP Support */
+       enum xgbe_sfp_comm sfp_comm;
+       unsigned int sfp_mux_address;
+       unsigned int sfp_mux_channel;
+
+       unsigned int sfp_gpio_address;
+       unsigned int sfp_gpio_mask;
+       unsigned int sfp_gpio_rx_los;
+       unsigned int sfp_gpio_tx_fault;
+       unsigned int sfp_gpio_mod_absent;
+       unsigned int sfp_gpio_rate_select;
+
+       unsigned int sfp_rx_los;
+       unsigned int sfp_tx_fault;
+       unsigned int sfp_mod_absent;
+       unsigned int sfp_diags;
+       unsigned int sfp_changed;
+       unsigned int sfp_phy_avail;
+       unsigned int sfp_cable_len;
+       enum xgbe_sfp_base sfp_base;
+       enum xgbe_sfp_cable sfp_cable;
+       enum xgbe_sfp_speed sfp_speed;
+       struct xgbe_sfp_eeprom sfp_eeprom;
+
+       /* External PHY support */
+       enum xgbe_mdio_mode phydev_mode;
+       struct mii_bus *mii;
+       struct phy_device *phydev;
+       enum xgbe_mdio_reset mdio_reset;
+       unsigned int mdio_reset_addr;
+       unsigned int mdio_reset_gpio;
+
+       /* Re-driver support */
+       unsigned int redrv;
+       unsigned int redrv_if;
+       unsigned int redrv_addr;
+       unsigned int redrv_lane;
+       unsigned int redrv_model;
+};
+
+/* I2C, MDIO and GPIO lines are muxed, so only one device at a time */
+static DEFINE_MUTEX(xgbe_phy_comm_lock);
+
+static enum xgbe_an_mode xgbe_phy_an_mode(struct xgbe_prv_data *pdata);
+
+static int xgbe_phy_i2c_xfer(struct xgbe_prv_data *pdata,
+                            struct xgbe_i2c_op *i2c_op)
+{
+       struct xgbe_phy_data *phy_data = pdata->phy_data;
+
+       /* Be sure we own the bus */
+       if (WARN_ON(!phy_data->comm_owned))
+               return -EIO;
+
+       return pdata->i2c_if.i2c_xfer(pdata, i2c_op);
+}
+
+static int xgbe_phy_redrv_write(struct xgbe_prv_data *pdata, unsigned int reg,
+                               unsigned int val)
+{
+       struct xgbe_phy_data *phy_data = pdata->phy_data;
+       struct xgbe_i2c_op i2c_op;
+       __be16 *redrv_val;
+       u8 redrv_data[5], csum;
+       unsigned int i, retry;
+       int ret;
+
+       /* High byte of register contains read/write indicator */
+       redrv_data[0] = ((reg >> 8) & 0xff) << 1;
+       redrv_data[1] = reg & 0xff;
+       redrv_val = (__be16 *)&redrv_data[2];
+       *redrv_val = cpu_to_be16(val);
+
+       /* Calculate 1 byte checksum */
+       csum = 0;
+       for (i = 0; i < 4; i++) {
+               csum += redrv_data[i];
+               if (redrv_data[i] > csum)
+                       csum++;
+       }
+       redrv_data[4] = ~csum;
+
+       retry = 1;
+again1:
+       i2c_op.cmd = XGBE_I2C_CMD_WRITE;
+       i2c_op.target = phy_data->redrv_addr;
+       i2c_op.len = sizeof(redrv_data);
+       i2c_op.buf = redrv_data;
+       ret = xgbe_phy_i2c_xfer(pdata, &i2c_op);
+       if (ret) {
+               if ((ret == -EAGAIN) && retry--)
+                       goto again1;
+
+               return ret;
+       }
+
+       retry = 1;
+again2:
+       i2c_op.cmd = XGBE_I2C_CMD_READ;
+       i2c_op.target = phy_data->redrv_addr;
+       i2c_op.len = 1;
+       i2c_op.buf = redrv_data;
+       ret = xgbe_phy_i2c_xfer(pdata, &i2c_op);
+       if (ret) {
+               if ((ret == -EAGAIN) && retry--)
+                       goto again2;
+
+               return ret;
+       }
+
+       if (redrv_data[0] != 0xff) {
+               netif_dbg(pdata, drv, pdata->netdev,
+                         "Redriver write checksum error\n");
+               ret = -EIO;
+       }
+
+       return ret;
+}
+
+static int xgbe_phy_i2c_write(struct xgbe_prv_data *pdata, unsigned int target,
+                             void *val, unsigned int val_len)
+{
+       struct xgbe_i2c_op i2c_op;
+       int retry, ret;
+
+       retry = 1;
+again:
+       /* Write the specfied register */
+       i2c_op.cmd = XGBE_I2C_CMD_WRITE;
+       i2c_op.target = target;
+       i2c_op.len = val_len;
+       i2c_op.buf = val;
+       ret = xgbe_phy_i2c_xfer(pdata, &i2c_op);
+       if ((ret == -EAGAIN) && retry--)
+               goto again;
+
+       return ret;
+}
+
+static int xgbe_phy_i2c_read(struct xgbe_prv_data *pdata, unsigned int target,
+                            void *reg, unsigned int reg_len,
+                            void *val, unsigned int val_len)
+{
+       struct xgbe_i2c_op i2c_op;
+       int retry, ret;
+
+       retry = 1;
+again1:
+       /* Set the specified register to read */
+       i2c_op.cmd = XGBE_I2C_CMD_WRITE;
+       i2c_op.target = target;
+       i2c_op.len = reg_len;
+       i2c_op.buf = reg;
+       ret = xgbe_phy_i2c_xfer(pdata, &i2c_op);
+       if (ret) {
+               if ((ret == -EAGAIN) && retry--)
+                       goto again1;
+
+               return ret;
+       }
+
+       retry = 1;
+again2:
+       /* Read the specfied register */
+       i2c_op.cmd = XGBE_I2C_CMD_READ;
+       i2c_op.target = target;
+       i2c_op.len = val_len;
+       i2c_op.buf = val;
+       ret = xgbe_phy_i2c_xfer(pdata, &i2c_op);
+       if ((ret == -EAGAIN) && retry--)
+               goto again2;
+
+       return ret;
+}
+
+static int xgbe_phy_sfp_put_mux(struct xgbe_prv_data *pdata)
+{
+       struct xgbe_phy_data *phy_data = pdata->phy_data;
+       struct xgbe_i2c_op i2c_op;
+       u8 mux_channel;
+
+       if (phy_data->sfp_comm == XGBE_SFP_COMM_DIRECT)
+               return 0;
+
+       /* Select no mux channels */
+       mux_channel = 0;
+       i2c_op.cmd = XGBE_I2C_CMD_WRITE;
+       i2c_op.target = phy_data->sfp_mux_address;
+       i2c_op.len = sizeof(mux_channel);
+       i2c_op.buf = &mux_channel;
+
+       return xgbe_phy_i2c_xfer(pdata, &i2c_op);
+}
+
+static int xgbe_phy_sfp_get_mux(struct xgbe_prv_data *pdata)
+{
+       struct xgbe_phy_data *phy_data = pdata->phy_data;
+       struct xgbe_i2c_op i2c_op;
+       u8 mux_channel;
+
+       if (phy_data->sfp_comm == XGBE_SFP_COMM_DIRECT)
+               return 0;
+
+       /* Select desired mux channel */
+       mux_channel = 1 << phy_data->sfp_mux_channel;
+       i2c_op.cmd = XGBE_I2C_CMD_WRITE;
+       i2c_op.target = phy_data->sfp_mux_address;
+       i2c_op.len = sizeof(mux_channel);
+       i2c_op.buf = &mux_channel;
+
+       return xgbe_phy_i2c_xfer(pdata, &i2c_op);
+}
+
+static void xgbe_phy_put_comm_ownership(struct xgbe_prv_data *pdata)
+{
+       struct xgbe_phy_data *phy_data = pdata->phy_data;
+
+       phy_data->comm_owned = 0;
+
+       mutex_unlock(&xgbe_phy_comm_lock);
+}
+
+static int xgbe_phy_get_comm_ownership(struct xgbe_prv_data *pdata)
+{
+       struct xgbe_phy_data *phy_data = pdata->phy_data;
+       unsigned long timeout;
+       unsigned int mutex_id;
+
+       if (phy_data->comm_owned)
+               return 0;
+
+       /* The I2C and MDIO/GPIO bus is multiplexed between multiple devices,
+        * the driver needs to take the software mutex and then the hardware
+        * mutexes before being able to use the busses.
+        */
+       mutex_lock(&xgbe_phy_comm_lock);
+
+       /* Clear the mutexes */
+       XP_IOWRITE(pdata, XP_I2C_MUTEX, XGBE_MUTEX_RELEASE);
+       XP_IOWRITE(pdata, XP_MDIO_MUTEX, XGBE_MUTEX_RELEASE);
+
+       /* Mutex formats are the same for I2C and MDIO/GPIO */
+       mutex_id = 0;
+       XP_SET_BITS(mutex_id, XP_I2C_MUTEX, ID, phy_data->port_id);
+       XP_SET_BITS(mutex_id, XP_I2C_MUTEX, ACTIVE, 1);
+
+       timeout = jiffies + (5 * HZ);
+       while (time_before(jiffies, timeout)) {
+               /* Must be all zeroes in order to obtain the mutex */
+               if (XP_IOREAD(pdata, XP_I2C_MUTEX) ||
+                   XP_IOREAD(pdata, XP_MDIO_MUTEX)) {
+                       usleep_range(100, 200);
+                       continue;
+               }
+
+               /* Obtain the mutex */
+               XP_IOWRITE(pdata, XP_I2C_MUTEX, mutex_id);
+               XP_IOWRITE(pdata, XP_MDIO_MUTEX, mutex_id);
+
+               phy_data->comm_owned = 1;
+               return 0;
+       }
+
+       mutex_unlock(&xgbe_phy_comm_lock);
+
+       netdev_err(pdata->netdev, "unable to obtain hardware mutexes\n");
+
+       return -ETIMEDOUT;
+}
+
+static int xgbe_phy_mdio_mii_write(struct xgbe_prv_data *pdata, int addr,
+                                  int reg, u16 val)
+{
+       struct xgbe_phy_data *phy_data = pdata->phy_data;
+
+       if (reg & MII_ADDR_C45) {
+               if (phy_data->phydev_mode != XGBE_MDIO_MODE_CL45)
+                       return -ENOTSUPP;
+       } else {
+               if (phy_data->phydev_mode != XGBE_MDIO_MODE_CL22)
+                       return -ENOTSUPP;
+       }
+
+       return pdata->hw_if.write_ext_mii_regs(pdata, addr, reg, val);
+}
+
+static int xgbe_phy_i2c_mii_write(struct xgbe_prv_data *pdata, int reg, u16 val)
+{
+       __be16 *mii_val;
+       u8 mii_data[3];
+       int ret;
+
+       ret = xgbe_phy_sfp_get_mux(pdata);
+       if (ret)
+               return ret;
+
+       mii_data[0] = reg & 0xff;
+       mii_val = (__be16 *)&mii_data[1];
+       *mii_val = cpu_to_be16(val);
+
+       ret = xgbe_phy_i2c_write(pdata, XGBE_SFP_PHY_ADDRESS,
+                                mii_data, sizeof(mii_data));
+
+       xgbe_phy_sfp_put_mux(pdata);
+
+       return ret;
+}
+
+static int xgbe_phy_mii_write(struct mii_bus *mii, int addr, int reg, u16 val)
+{
+       struct xgbe_prv_data *pdata = mii->priv;
+       struct xgbe_phy_data *phy_data = pdata->phy_data;
+       int ret;
+
+       ret = xgbe_phy_get_comm_ownership(pdata);
+       if (ret)
+               return ret;
+
+       if (phy_data->conn_type == XGBE_CONN_TYPE_SFP)
+               ret = xgbe_phy_i2c_mii_write(pdata, reg, val);
+       else if (phy_data->conn_type & XGBE_CONN_TYPE_MDIO)
+               ret = xgbe_phy_mdio_mii_write(pdata, addr, reg, val);
+       else
+               ret = -ENOTSUPP;
+
+       xgbe_phy_put_comm_ownership(pdata);
+
+       return ret;
+}
+
+static int xgbe_phy_mdio_mii_read(struct xgbe_prv_data *pdata, int addr,
+                                 int reg)
+{
+       struct xgbe_phy_data *phy_data = pdata->phy_data;
+
+       if (reg & MII_ADDR_C45) {
+               if (phy_data->phydev_mode != XGBE_MDIO_MODE_CL45)
+                       return -ENOTSUPP;
+       } else {
+               if (phy_data->phydev_mode != XGBE_MDIO_MODE_CL22)
+                       return -ENOTSUPP;
+       }
+
+       return pdata->hw_if.read_ext_mii_regs(pdata, addr, reg);
+}
+
+static int xgbe_phy_i2c_mii_read(struct xgbe_prv_data *pdata, int reg)
+{
+       __be16 mii_val;
+       u8 mii_reg;
+       int ret;
+
+       ret = xgbe_phy_sfp_get_mux(pdata);
+       if (ret)
+               return ret;
+
+       mii_reg = reg;
+       ret = xgbe_phy_i2c_read(pdata, XGBE_SFP_PHY_ADDRESS,
+                               &mii_reg, sizeof(mii_reg),
+                               &mii_val, sizeof(mii_val));
+       if (!ret)
+               ret = be16_to_cpu(mii_val);
+
+       xgbe_phy_sfp_put_mux(pdata);
+
+       return ret;
+}
+
+static int xgbe_phy_mii_read(struct mii_bus *mii, int addr, int reg)
+{
+       struct xgbe_prv_data *pdata = mii->priv;
+       struct xgbe_phy_data *phy_data = pdata->phy_data;
+       int ret;
+
+       ret = xgbe_phy_get_comm_ownership(pdata);
+       if (ret)
+               return ret;
+
+       if (phy_data->conn_type == XGBE_CONN_TYPE_SFP)
+               ret = xgbe_phy_i2c_mii_read(pdata, reg);
+       else if (phy_data->conn_type & XGBE_CONN_TYPE_MDIO)
+               ret = xgbe_phy_mdio_mii_read(pdata, addr, reg);
+       else
+               ret = -ENOTSUPP;
+
+       xgbe_phy_put_comm_ownership(pdata);
+
+       return ret;
+}
+
+static void xgbe_phy_sfp_phy_settings(struct xgbe_prv_data *pdata)
+{
+       struct xgbe_phy_data *phy_data = pdata->phy_data;
+
+       if (phy_data->sfp_mod_absent) {
+               pdata->phy.speed = SPEED_UNKNOWN;
+               pdata->phy.duplex = DUPLEX_UNKNOWN;
+               pdata->phy.autoneg = AUTONEG_ENABLE;
+               pdata->phy.advertising = pdata->phy.supported;
+       }
+
+       pdata->phy.advertising &= ~ADVERTISED_Autoneg;
+       pdata->phy.advertising &= ~ADVERTISED_TP;
+       pdata->phy.advertising &= ~ADVERTISED_FIBRE;
+       pdata->phy.advertising &= ~ADVERTISED_100baseT_Full;
+       pdata->phy.advertising &= ~ADVERTISED_1000baseT_Full;
+       pdata->phy.advertising &= ~ADVERTISED_10000baseT_Full;
+       pdata->phy.advertising &= ~ADVERTISED_10000baseR_FEC;
+
+       switch (phy_data->sfp_base) {
+       case XGBE_SFP_BASE_1000_T:
+       case XGBE_SFP_BASE_1000_SX:
+       case XGBE_SFP_BASE_1000_LX:
+       case XGBE_SFP_BASE_1000_CX:
+               pdata->phy.speed = SPEED_UNKNOWN;
+               pdata->phy.duplex = DUPLEX_UNKNOWN;
+               pdata->phy.autoneg = AUTONEG_ENABLE;
+               pdata->phy.advertising |= ADVERTISED_Autoneg;
+               break;
+       case XGBE_SFP_BASE_10000_SR:
+       case XGBE_SFP_BASE_10000_LR:
+       case XGBE_SFP_BASE_10000_LRM:
+       case XGBE_SFP_BASE_10000_ER:
+       case XGBE_SFP_BASE_10000_CR:
+       default:
+               pdata->phy.speed = SPEED_10000;
+               pdata->phy.duplex = DUPLEX_FULL;
+               pdata->phy.autoneg = AUTONEG_DISABLE;
+               break;
+       }
+
+       switch (phy_data->sfp_base) {
+       case XGBE_SFP_BASE_1000_T:
+       case XGBE_SFP_BASE_1000_CX:
+       case XGBE_SFP_BASE_10000_CR:
+               pdata->phy.advertising |= ADVERTISED_TP;
+               break;
+       default:
+               pdata->phy.advertising |= ADVERTISED_FIBRE;
+       }
+
+       switch (phy_data->sfp_speed) {
+       case XGBE_SFP_SPEED_100_1000:
+               if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100)
+                       pdata->phy.advertising |= ADVERTISED_100baseT_Full;
+               if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000)
+                       pdata->phy.advertising |= ADVERTISED_1000baseT_Full;
+               break;
+       case XGBE_SFP_SPEED_1000:
+               if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000)
+                       pdata->phy.advertising |= ADVERTISED_1000baseT_Full;
+               break;
+       case XGBE_SFP_SPEED_10000:
+               if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10000)
+                       pdata->phy.advertising |= ADVERTISED_10000baseT_Full;
+               break;
+       default:
+               /* Choose the fastest supported speed */
+               if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10000)
+                       pdata->phy.advertising |= ADVERTISED_10000baseT_Full;
+               else if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000)
+                       pdata->phy.advertising |= ADVERTISED_1000baseT_Full;
+               else if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100)
+                       pdata->phy.advertising |= ADVERTISED_100baseT_Full;
+       }
+}
+
+static bool xgbe_phy_sfp_bit_rate(struct xgbe_sfp_eeprom *sfp_eeprom,
+                                 enum xgbe_sfp_speed sfp_speed)
+{
+       u8 *sfp_base, min, max;
+
+       sfp_base = sfp_eeprom->base;
+
+       switch (sfp_speed) {
+       case XGBE_SFP_SPEED_1000:
+               min = XGBE_SFP_BASE_BR_1GBE_MIN;
+               max = XGBE_SFP_BASE_BR_1GBE_MAX;
+               break;
+       case XGBE_SFP_SPEED_10000:
+               min = XGBE_SFP_BASE_BR_10GBE_MIN;
+               max = XGBE_SFP_BASE_BR_10GBE_MAX;
+               break;
+       default:
+               return false;
+       }
+
+       return ((sfp_base[XGBE_SFP_BASE_BR] >= min) &&
+               (sfp_base[XGBE_SFP_BASE_BR] <= max));
+}
+
+static void xgbe_phy_free_phy_device(struct xgbe_prv_data *pdata)
+{
+       struct xgbe_phy_data *phy_data = pdata->phy_data;
+
+       if (phy_data->phydev) {
+               phy_detach(phy_data->phydev);
+               phy_device_remove(phy_data->phydev);
+               phy_device_free(phy_data->phydev);
+               phy_data->phydev = NULL;
+       }
+}
+
+static bool xgbe_phy_finisar_phy_quirks(struct xgbe_prv_data *pdata)
+{
+       struct xgbe_phy_data *phy_data = pdata->phy_data;
+       unsigned int phy_id = phy_data->phydev->phy_id;
+
+       if ((phy_id & 0xfffffff0) != 0x01ff0cc0)
+               return false;
+
+       /* Enable Base-T AN */
+       phy_write(phy_data->phydev, 0x16, 0x0001);
+       phy_write(phy_data->phydev, 0x00, 0x9140);
+       phy_write(phy_data->phydev, 0x16, 0x0000);
+
+       /* Enable SGMII at 100Base-T/1000Base-T Full Duplex */
+       phy_write(phy_data->phydev, 0x1b, 0x9084);
+       phy_write(phy_data->phydev, 0x09, 0x0e00);
+       phy_write(phy_data->phydev, 0x00, 0x8140);
+       phy_write(phy_data->phydev, 0x04, 0x0d01);
+       phy_write(phy_data->phydev, 0x00, 0x9140);
+
+       phy_data->phydev->supported = PHY_GBIT_FEATURES;
+       phy_data->phydev->supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause;
+       phy_data->phydev->advertising = phy_data->phydev->supported;
+
+       netif_dbg(pdata, drv, pdata->netdev,
+                 "Finisar PHY quirk in place\n");
+
+       return true;
+}
+
+static void xgbe_phy_external_phy_quirks(struct xgbe_prv_data *pdata)
+{
+       if (xgbe_phy_finisar_phy_quirks(pdata))
+               return;
+}
+
+static int xgbe_phy_find_phy_device(struct xgbe_prv_data *pdata)
+{
+       struct xgbe_phy_data *phy_data = pdata->phy_data;
+       struct phy_device *phydev;
+       int ret;
+
+       /* If we already have a PHY, just return */
+       if (phy_data->phydev)
+               return 0;
+
+       /* Check for the use of an external PHY */
+       if (phy_data->phydev_mode == XGBE_MDIO_MODE_NONE)
+               return 0;
+
+       /* For SFP, only use an external PHY if available */
+       if ((phy_data->port_mode == XGBE_PORT_MODE_SFP) &&
+           !phy_data->sfp_phy_avail)
+               return 0;
+
+       /* Create and connect to the PHY device */
+       phydev = get_phy_device(phy_data->mii, phy_data->mdio_addr,
+                               (phy_data->phydev_mode == XGBE_MDIO_MODE_CL45));
+       if (IS_ERR(phydev)) {
+               netdev_err(pdata->netdev, "get_phy_device failed\n");
+               return -ENODEV;
+       }
+       netif_dbg(pdata, drv, pdata->netdev, "external PHY id is %#010x\n",
+                 phydev->phy_id);
+
+       /*TODO: If c45, add request_module based on one of the MMD ids? */
+
+       ret = phy_device_register(phydev);
+       if (ret) {
+               netdev_err(pdata->netdev, "phy_device_register failed\n");
+               phy_device_free(phydev);
+               return ret;
+       }
+
+       ret = phy_attach_direct(pdata->netdev, phydev, phydev->dev_flags,
+                               PHY_INTERFACE_MODE_SGMII);
+       if (ret) {
+               netdev_err(pdata->netdev, "phy_attach_direct failed\n");
+               phy_device_remove(phydev);
+               phy_device_free(phydev);
+               return ret;
+       }
+       phy_data->phydev = phydev;
+
+       xgbe_phy_external_phy_quirks(pdata);
+       phydev->advertising &= pdata->phy.advertising;
+
+       phy_start_aneg(phy_data->phydev);
+
+       return 0;
+}
+
+static void xgbe_phy_sfp_external_phy(struct xgbe_prv_data *pdata)
+{
+       struct xgbe_phy_data *phy_data = pdata->phy_data;
+       int ret;
+
+       if (!phy_data->sfp_changed)
+               return;
+
+       phy_data->sfp_phy_avail = 0;
+
+       if (phy_data->sfp_base != XGBE_SFP_BASE_1000_T)
+               return;
+
+       /* Check access to the PHY by reading CTRL1 */
+       ret = xgbe_phy_i2c_mii_read(pdata, MII_BMCR);
+       if (ret < 0)
+               return;
+
+       /* Successfully accessed the PHY */
+       phy_data->sfp_phy_avail = 1;
+}
+
+static bool xgbe_phy_belfuse_parse_quirks(struct xgbe_prv_data *pdata)
+{
+       struct xgbe_phy_data *phy_data = pdata->phy_data;
+       struct xgbe_sfp_eeprom *sfp_eeprom = &phy_data->sfp_eeprom;
+
+       if (memcmp(&sfp_eeprom->base[XGBE_SFP_BASE_VENDOR_NAME],
+                  XGBE_BEL_FUSE_VENDOR, XGBE_SFP_BASE_VENDOR_NAME_LEN))
+               return false;
+
+       if (!memcmp(&sfp_eeprom->base[XGBE_SFP_BASE_VENDOR_PN],
+                   XGBE_BEL_FUSE_PARTNO, XGBE_SFP_BASE_VENDOR_PN_LEN)) {
+               phy_data->sfp_base = XGBE_SFP_BASE_1000_SX;
+               phy_data->sfp_cable = XGBE_SFP_CABLE_ACTIVE;
+               phy_data->sfp_speed = XGBE_SFP_SPEED_1000;
+               if (phy_data->sfp_changed)
+                       netif_dbg(pdata, drv, pdata->netdev,
+                                 "Bel-Fuse SFP quirk in place\n");
+               return true;
+       }
+
+       return false;
+}
+
+static bool xgbe_phy_sfp_parse_quirks(struct xgbe_prv_data *pdata)
+{
+       if (xgbe_phy_belfuse_parse_quirks(pdata))
+               return true;
+
+       return false;
+}
+
+static void xgbe_phy_sfp_parse_eeprom(struct xgbe_prv_data *pdata)
+{
+       struct xgbe_phy_data *phy_data = pdata->phy_data;
+       struct xgbe_sfp_eeprom *sfp_eeprom = &phy_data->sfp_eeprom;
+       u8 *sfp_base;
+
+       sfp_base = sfp_eeprom->base;
+
+       if (sfp_base[XGBE_SFP_BASE_ID] != XGBE_SFP_ID_SFP)
+               return;
+
+       if (sfp_base[XGBE_SFP_BASE_EXT_ID] != XGBE_SFP_EXT_ID_SFP)
+               return;
+
+       if (xgbe_phy_sfp_parse_quirks(pdata))
+               return;
+
+       /* Assume ACTIVE cable unless told it is PASSIVE */
+       if (sfp_base[XGBE_SFP_BASE_CABLE] & XGBE_SFP_BASE_CABLE_PASSIVE) {
+               phy_data->sfp_cable = XGBE_SFP_CABLE_PASSIVE;
+               phy_data->sfp_cable_len = sfp_base[XGBE_SFP_BASE_CU_CABLE_LEN];
+       } else {
+               phy_data->sfp_cable = XGBE_SFP_CABLE_ACTIVE;
+       }
+
+       /* Determine the type of SFP */
+       if (sfp_base[XGBE_SFP_BASE_10GBE_CC] & XGBE_SFP_BASE_10GBE_CC_SR)
+               phy_data->sfp_base = XGBE_SFP_BASE_10000_SR;
+       else if (sfp_base[XGBE_SFP_BASE_10GBE_CC] & XGBE_SFP_BASE_10GBE_CC_LR)
+               phy_data->sfp_base = XGBE_SFP_BASE_10000_LR;
+       else if (sfp_base[XGBE_SFP_BASE_10GBE_CC] & XGBE_SFP_BASE_10GBE_CC_LRM)
+               phy_data->sfp_base = XGBE_SFP_BASE_10000_LRM;
+       else if (sfp_base[XGBE_SFP_BASE_10GBE_CC] & XGBE_SFP_BASE_10GBE_CC_ER)
+               phy_data->sfp_base = XGBE_SFP_BASE_10000_ER;
+       else if (sfp_base[XGBE_SFP_BASE_1GBE_CC] & XGBE_SFP_BASE_1GBE_CC_SX)
+               phy_data->sfp_base = XGBE_SFP_BASE_1000_SX;
+       else if (sfp_base[XGBE_SFP_BASE_1GBE_CC] & XGBE_SFP_BASE_1GBE_CC_LX)
+               phy_data->sfp_base = XGBE_SFP_BASE_1000_LX;
+       else if (sfp_base[XGBE_SFP_BASE_1GBE_CC] & XGBE_SFP_BASE_1GBE_CC_CX)
+               phy_data->sfp_base = XGBE_SFP_BASE_1000_CX;
+       else if (sfp_base[XGBE_SFP_BASE_1GBE_CC] & XGBE_SFP_BASE_1GBE_CC_T)
+               phy_data->sfp_base = XGBE_SFP_BASE_1000_T;
+       else if ((phy_data->sfp_cable == XGBE_SFP_CABLE_PASSIVE) &&
+                xgbe_phy_sfp_bit_rate(sfp_eeprom, XGBE_SFP_SPEED_10000))
+               phy_data->sfp_base = XGBE_SFP_BASE_10000_CR;
+
+       switch (phy_data->sfp_base) {
+       case XGBE_SFP_BASE_1000_T:
+               phy_data->sfp_speed = XGBE_SFP_SPEED_100_1000;
+               break;
+       case XGBE_SFP_BASE_1000_SX:
+       case XGBE_SFP_BASE_1000_LX:
+       case XGBE_SFP_BASE_1000_CX:
+               phy_data->sfp_speed = XGBE_SFP_SPEED_1000;
+               break;
+       case XGBE_SFP_BASE_10000_SR:
+       case XGBE_SFP_BASE_10000_LR:
+       case XGBE_SFP_BASE_10000_LRM:
+       case XGBE_SFP_BASE_10000_ER:
+       case XGBE_SFP_BASE_10000_CR:
+               phy_data->sfp_speed = XGBE_SFP_SPEED_10000;
+               break;
+       default:
+               break;
+       }
+}
+
+static void xgbe_phy_sfp_eeprom_info(struct xgbe_prv_data *pdata,
+                                    struct xgbe_sfp_eeprom *sfp_eeprom)
+{
+       struct xgbe_sfp_ascii sfp_ascii;
+       char *sfp_data = (char *)&sfp_ascii;
+
+       netif_dbg(pdata, drv, pdata->netdev, "SFP detected:\n");
+       memcpy(sfp_data, &sfp_eeprom->base[XGBE_SFP_BASE_VENDOR_NAME],
+              XGBE_SFP_BASE_VENDOR_NAME_LEN);
+       sfp_data[XGBE_SFP_BASE_VENDOR_NAME_LEN] = '\0';
+       netif_dbg(pdata, drv, pdata->netdev, "  vendor:         %s\n",
+                 sfp_data);
+
+       memcpy(sfp_data, &sfp_eeprom->base[XGBE_SFP_BASE_VENDOR_PN],
+              XGBE_SFP_BASE_VENDOR_PN_LEN);
+       sfp_data[XGBE_SFP_BASE_VENDOR_PN_LEN] = '\0';
+       netif_dbg(pdata, drv, pdata->netdev, "  part number:    %s\n",
+                 sfp_data);
+
+       memcpy(sfp_data, &sfp_eeprom->base[XGBE_SFP_BASE_VENDOR_REV],
+              XGBE_SFP_BASE_VENDOR_REV_LEN);
+       sfp_data[XGBE_SFP_BASE_VENDOR_REV_LEN] = '\0';
+       netif_dbg(pdata, drv, pdata->netdev, "  revision level: %s\n",
+                 sfp_data);
+
+       memcpy(sfp_data, &sfp_eeprom->extd[XGBE_SFP_BASE_VENDOR_SN],
+              XGBE_SFP_BASE_VENDOR_SN_LEN);
+       sfp_data[XGBE_SFP_BASE_VENDOR_SN_LEN] = '\0';
+       netif_dbg(pdata, drv, pdata->netdev, "  serial number:  %s\n",
+                 sfp_data);
+}
+
+static bool xgbe_phy_sfp_verify_eeprom(u8 cc_in, u8 *buf, unsigned int len)
+{
+       u8 cc;
+
+       for (cc = 0; len; buf++, len--)
+               cc += *buf;
+
+       return (cc == cc_in) ? true : false;
+}
+
+static int xgbe_phy_sfp_read_eeprom(struct xgbe_prv_data *pdata)
+{
+       struct xgbe_phy_data *phy_data = pdata->phy_data;
+       struct xgbe_sfp_eeprom sfp_eeprom;
+       u8 eeprom_addr;
+       int ret;
+
+       ret = xgbe_phy_sfp_get_mux(pdata);
+       if (ret) {
+               netdev_err(pdata->netdev, "I2C error setting SFP MUX\n");
+               return ret;
+       }
+
+       /* Read the SFP serial ID eeprom */
+       eeprom_addr = 0;
+       ret = xgbe_phy_i2c_read(pdata, XGBE_SFP_SERIAL_ID_ADDRESS,
+                               &eeprom_addr, sizeof(eeprom_addr),
+                               &sfp_eeprom, sizeof(sfp_eeprom));
+       if (ret) {
+               netdev_err(pdata->netdev, "I2C error reading SFP EEPROM\n");
+               goto put;
+       }
+
+       /* Validate the contents read */
+       if (!xgbe_phy_sfp_verify_eeprom(sfp_eeprom.base[XGBE_SFP_BASE_CC],
+                                       sfp_eeprom.base,
+                                       sizeof(sfp_eeprom.base) - 1)) {
+               ret = -EINVAL;
+               goto put;
+       }
+
+       if (!xgbe_phy_sfp_verify_eeprom(sfp_eeprom.extd[XGBE_SFP_EXTD_CC],
+                                       sfp_eeprom.extd,
+                                       sizeof(sfp_eeprom.extd) - 1)) {
+               ret = -EINVAL;
+               goto put;
+       }
+
+       /* Check for an added or changed SFP */
+       if (memcmp(&phy_data->sfp_eeprom, &sfp_eeprom, sizeof(sfp_eeprom))) {
+               phy_data->sfp_changed = 1;
+
+               if (netif_msg_drv(pdata))
+                       xgbe_phy_sfp_eeprom_info(pdata, &sfp_eeprom);
+
+               memcpy(&phy_data->sfp_eeprom, &sfp_eeprom, sizeof(sfp_eeprom));
+
+               if (sfp_eeprom.extd[XGBE_SFP_EXTD_SFF_8472]) {
+                       u8 diag_type = sfp_eeprom.extd[XGBE_SFP_EXTD_DIAG];
+
+                       if (!(diag_type & XGBE_SFP_EXTD_DIAG_ADDR_CHANGE))
+                               phy_data->sfp_diags = 1;
+               }
+
+               xgbe_phy_free_phy_device(pdata);
+       } else {
+               phy_data->sfp_changed = 0;
+       }
+
+put:
+       xgbe_phy_sfp_put_mux(pdata);
+
+       return ret;
+}
+
+static void xgbe_phy_sfp_signals(struct xgbe_prv_data *pdata)
+{
+       struct xgbe_phy_data *phy_data = pdata->phy_data;
+       unsigned int gpio_input;
+       u8 gpio_reg, gpio_ports[2];
+       int ret;
+
+       /* Read the input port registers */
+       gpio_reg = 0;
+       ret = xgbe_phy_i2c_read(pdata, phy_data->sfp_gpio_address,
+                               &gpio_reg, sizeof(gpio_reg),
+                               gpio_ports, sizeof(gpio_ports));
+       if (ret) {
+               netdev_err(pdata->netdev, "I2C error reading SFP GPIOs\n");
+               return;
+       }
+
+       gpio_input = (gpio_ports[1] << 8) | gpio_ports[0];
+
+       if (phy_data->sfp_gpio_mask & XGBE_GPIO_NO_MOD_ABSENT) {
+               /* No GPIO, just assume the module is present for now */
+               phy_data->sfp_mod_absent = 0;
+       } else {
+               if (!(gpio_input & (1 << phy_data->sfp_gpio_mod_absent)))
+                       phy_data->sfp_mod_absent = 0;
+       }
+
+       if (!(phy_data->sfp_gpio_mask & XGBE_GPIO_NO_RX_LOS) &&
+           (gpio_input & (1 << phy_data->sfp_gpio_rx_los)))
+               phy_data->sfp_rx_los = 1;
+
+       if (!(phy_data->sfp_gpio_mask & XGBE_GPIO_NO_TX_FAULT) &&
+           (gpio_input & (1 << phy_data->sfp_gpio_tx_fault)))
+               phy_data->sfp_tx_fault = 1;
+}
+
+static void xgbe_phy_sfp_mod_absent(struct xgbe_prv_data *pdata)
+{
+       struct xgbe_phy_data *phy_data = pdata->phy_data;
+
+       xgbe_phy_free_phy_device(pdata);
+
+       phy_data->sfp_mod_absent = 1;
+       phy_data->sfp_phy_avail = 0;
+       memset(&phy_data->sfp_eeprom, 0, sizeof(phy_data->sfp_eeprom));
+}
+
+static void xgbe_phy_sfp_reset(struct xgbe_phy_data *phy_data)
+{
+       phy_data->sfp_rx_los = 0;
+       phy_data->sfp_tx_fault = 0;
+       phy_data->sfp_mod_absent = 1;
+       phy_data->sfp_diags = 0;
+       phy_data->sfp_base = XGBE_SFP_BASE_UNKNOWN;
+       phy_data->sfp_cable = XGBE_SFP_CABLE_UNKNOWN;
+       phy_data->sfp_speed = XGBE_SFP_SPEED_UNKNOWN;
+}
+
+static void xgbe_phy_sfp_detect(struct xgbe_prv_data *pdata)
+{
+       struct xgbe_phy_data *phy_data = pdata->phy_data;
+       int ret;
+
+       /* Reset the SFP signals and info */
+       xgbe_phy_sfp_reset(phy_data);
+
+       ret = xgbe_phy_get_comm_ownership(pdata);
+       if (ret)
+               return;
+
+       /* Read the SFP signals and check for module presence */
+       xgbe_phy_sfp_signals(pdata);
+       if (phy_data->sfp_mod_absent) {
+               xgbe_phy_sfp_mod_absent(pdata);
+               goto put;
+       }
+
+       ret = xgbe_phy_sfp_read_eeprom(pdata);
+       if (ret) {
+               /* Treat any error as if there isn't an SFP plugged in */
+               xgbe_phy_sfp_reset(phy_data);
+               xgbe_phy_sfp_mod_absent(pdata);
+               goto put;
+       }
+
+       xgbe_phy_sfp_parse_eeprom(pdata);
+
+       xgbe_phy_sfp_external_phy(pdata);
+
+put:
+       xgbe_phy_sfp_phy_settings(pdata);
+
+       xgbe_phy_put_comm_ownership(pdata);
+}
+
+static void xgbe_phy_phydev_flowctrl(struct xgbe_prv_data *pdata)
+{
+       struct xgbe_phy_data *phy_data = pdata->phy_data;
+       u16 lcl_adv = 0, rmt_adv = 0;
+       u8 fc;
+
+       pdata->phy.tx_pause = 0;
+       pdata->phy.rx_pause = 0;
+
+       if (!phy_data->phydev)
+               return;
+
+       if (phy_data->phydev->advertising & ADVERTISED_Pause)
+               lcl_adv |= ADVERTISE_PAUSE_CAP;
+       if (phy_data->phydev->advertising & ADVERTISED_Asym_Pause)
+               lcl_adv |= ADVERTISE_PAUSE_ASYM;
+
+       if (phy_data->phydev->pause) {
+               pdata->phy.lp_advertising |= ADVERTISED_Pause;
+               rmt_adv |= LPA_PAUSE_CAP;
+       }
+       if (phy_data->phydev->asym_pause) {
+               pdata->phy.lp_advertising |= ADVERTISED_Asym_Pause;
+               rmt_adv |= LPA_PAUSE_ASYM;
+       }
+
+       fc = mii_resolve_flowctrl_fdx(lcl_adv, rmt_adv);
+       if (fc & FLOW_CTRL_TX)
+               pdata->phy.tx_pause = 1;
+       if (fc & FLOW_CTRL_RX)
+               pdata->phy.rx_pause = 1;
+}
+
+static enum xgbe_mode xgbe_phy_an37_sgmii_outcome(struct xgbe_prv_data *pdata)
+{
+       enum xgbe_mode mode;
+
+       pdata->phy.lp_advertising |= ADVERTISED_Autoneg;
+       pdata->phy.lp_advertising |= ADVERTISED_TP;
+
+       /* Use external PHY to determine flow control */
+       if (pdata->phy.pause_autoneg)
+               xgbe_phy_phydev_flowctrl(pdata);
+
+       switch (pdata->an_status & XGBE_SGMII_AN_LINK_SPEED) {
+       case XGBE_SGMII_AN_LINK_SPEED_100:
+               if (pdata->an_status & XGBE_SGMII_AN_LINK_DUPLEX) {
+                       pdata->phy.lp_advertising |= ADVERTISED_100baseT_Full;
+                       mode = XGBE_MODE_SGMII_100;
+               } else {
+                       /* Half-duplex not supported */
+                       pdata->phy.lp_advertising |= ADVERTISED_100baseT_Half;
+                       mode = XGBE_MODE_UNKNOWN;
+               }
+               break;
+       case XGBE_SGMII_AN_LINK_SPEED_1000:
+               if (pdata->an_status & XGBE_SGMII_AN_LINK_DUPLEX) {
+                       pdata->phy.lp_advertising |= ADVERTISED_1000baseT_Full;
+                       mode = XGBE_MODE_SGMII_1000;
+               } else {
+                       /* Half-duplex not supported */
+                       pdata->phy.lp_advertising |= ADVERTISED_1000baseT_Half;
+                       mode = XGBE_MODE_UNKNOWN;
+               }
+               break;
+       default:
+               mode = XGBE_MODE_UNKNOWN;
+       }
+
+       return mode;
+}
+
+static enum xgbe_mode xgbe_phy_an37_outcome(struct xgbe_prv_data *pdata)
+{
+       enum xgbe_mode mode;
+       unsigned int ad_reg, lp_reg;
+
+       pdata->phy.lp_advertising |= ADVERTISED_Autoneg;
+       pdata->phy.lp_advertising |= ADVERTISED_FIBRE;
+
+       /* Compare Advertisement and Link Partner register */
+       ad_reg = XMDIO_READ(pdata, MDIO_MMD_VEND2, MDIO_VEND2_AN_ADVERTISE);
+       lp_reg = XMDIO_READ(pdata, MDIO_MMD_VEND2, MDIO_VEND2_AN_LP_ABILITY);
+       if (lp_reg & 0x100)
+               pdata->phy.lp_advertising |= ADVERTISED_Pause;
+       if (lp_reg & 0x80)
+               pdata->phy.lp_advertising |= ADVERTISED_Asym_Pause;
+
+       if (pdata->phy.pause_autoneg) {
+               /* Set flow control based on auto-negotiation result */
+               pdata->phy.tx_pause = 0;
+               pdata->phy.rx_pause = 0;
+
+               if (ad_reg & lp_reg & 0x100) {
+                       pdata->phy.tx_pause = 1;
+                       pdata->phy.rx_pause = 1;
+               } else if (ad_reg & lp_reg & 0x80) {
+                       if (ad_reg & 0x100)
+                               pdata->phy.rx_pause = 1;
+                       else if (lp_reg & 0x100)
+                               pdata->phy.tx_pause = 1;
+               }
+       }
+
+       if (lp_reg & 0x40)
+               pdata->phy.lp_advertising |= ADVERTISED_1000baseT_Half;
+       if (lp_reg & 0x20)
+               pdata->phy.lp_advertising |= ADVERTISED_1000baseT_Full;
+
+       /* Half duplex is not supported */
+       ad_reg &= lp_reg;
+       mode = (ad_reg & 0x20) ? XGBE_MODE_X : XGBE_MODE_UNKNOWN;
+
+       return mode;
+}
+
+static enum xgbe_mode xgbe_phy_an73_redrv_outcome(struct xgbe_prv_data *pdata)
+{
+       struct xgbe_phy_data *phy_data = pdata->phy_data;
+       enum xgbe_mode mode;
+       unsigned int ad_reg, lp_reg;
+
+       pdata->phy.lp_advertising |= ADVERTISED_Autoneg;
+       pdata->phy.lp_advertising |= ADVERTISED_Backplane;
+
+       /* Use external PHY to determine flow control */
+       if (pdata->phy.pause_autoneg)
+               xgbe_phy_phydev_flowctrl(pdata);
+
+       /* Compare Advertisement and Link Partner register 2 */
+       ad_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 1);
+       lp_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_LPA + 1);
+       if (lp_reg & 0x80)
+               pdata->phy.lp_advertising |= ADVERTISED_10000baseKR_Full;
+       if (lp_reg & 0x20)
+               pdata->phy.lp_advertising |= ADVERTISED_1000baseKX_Full;
+
+       ad_reg &= lp_reg;
+       if (ad_reg & 0x80) {
+               switch (phy_data->port_mode) {
+               case XGBE_PORT_MODE_BACKPLANE:
+                       mode = XGBE_MODE_KR;
+                       break;
+               default:
+                       mode = XGBE_MODE_SFI;
+                       break;
+               }
+       } else if (ad_reg & 0x20) {
+               switch (phy_data->port_mode) {
+               case XGBE_PORT_MODE_BACKPLANE:
+                       mode = XGBE_MODE_KX_1000;
+                       break;
+               case XGBE_PORT_MODE_1000BASE_X:
+                       mode = XGBE_MODE_X;
+                       break;
+               case XGBE_PORT_MODE_SFP:
+                       switch (phy_data->sfp_base) {
+                       case XGBE_SFP_BASE_1000_T:
+                               if (phy_data->phydev &&
+                                   (phy_data->phydev->speed == SPEED_100))
+                                       mode = XGBE_MODE_SGMII_100;
+                               else
+                                       mode = XGBE_MODE_SGMII_1000;
+                               break;
+                       case XGBE_SFP_BASE_1000_SX:
+                       case XGBE_SFP_BASE_1000_LX:
+                       case XGBE_SFP_BASE_1000_CX:
+                       default:
+                               mode = XGBE_MODE_X;
+                               break;
+                       }
+                       break;
+               default:
+                       if (phy_data->phydev &&
+                           (phy_data->phydev->speed == SPEED_100))
+                               mode = XGBE_MODE_SGMII_100;
+                       else
+                               mode = XGBE_MODE_SGMII_1000;
+                       break;
+               }
+       } else {
+               mode = XGBE_MODE_UNKNOWN;
+       }
+
+       /* Compare Advertisement and Link Partner register 3 */
+       ad_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 2);
+       lp_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_LPA + 2);
+       if (lp_reg & 0xc000)
+               pdata->phy.lp_advertising |= ADVERTISED_10000baseR_FEC;
+
+       return mode;
+}
+
+static enum xgbe_mode xgbe_phy_an73_outcome(struct xgbe_prv_data *pdata)
+{
+       enum xgbe_mode mode;
+       unsigned int ad_reg, lp_reg;
+
+       pdata->phy.lp_advertising |= ADVERTISED_Autoneg;
+       pdata->phy.lp_advertising |= ADVERTISED_Backplane;
+
+       /* Compare Advertisement and Link Partner register 1 */
+       ad_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE);
+       lp_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_LPA);
+       if (lp_reg & 0x400)
+               pdata->phy.lp_advertising |= ADVERTISED_Pause;
+       if (lp_reg & 0x800)
+               pdata->phy.lp_advertising |= ADVERTISED_Asym_Pause;
+
+       if (pdata->phy.pause_autoneg) {
+               /* Set flow control based on auto-negotiation result */
+               pdata->phy.tx_pause = 0;
+               pdata->phy.rx_pause = 0;
+
+               if (ad_reg & lp_reg & 0x400) {
+                       pdata->phy.tx_pause = 1;
+                       pdata->phy.rx_pause = 1;
+               } else if (ad_reg & lp_reg & 0x800) {
+                       if (ad_reg & 0x400)
+                               pdata->phy.rx_pause = 1;
+                       else if (lp_reg & 0x400)
+                               pdata->phy.tx_pause = 1;
+               }
+       }
+
+       /* Compare Advertisement and Link Partner register 2 */
+       ad_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 1);
+       lp_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_LPA + 1);
+       if (lp_reg & 0x80)
+               pdata->phy.lp_advertising |= ADVERTISED_10000baseKR_Full;
+       if (lp_reg & 0x20)
+               pdata->phy.lp_advertising |= ADVERTISED_1000baseKX_Full;
+
+       ad_reg &= lp_reg;
+       if (ad_reg & 0x80)
+               mode = XGBE_MODE_KR;
+       else if (ad_reg & 0x20)
+               mode = XGBE_MODE_KX_1000;
+       else
+               mode = XGBE_MODE_UNKNOWN;
+
+       /* Compare Advertisement and Link Partner register 3 */
+       ad_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 2);
+       lp_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_LPA + 2);
+       if (lp_reg & 0xc000)
+               pdata->phy.lp_advertising |= ADVERTISED_10000baseR_FEC;
+
+       return mode;
+}
+
+static enum xgbe_mode xgbe_phy_an_outcome(struct xgbe_prv_data *pdata)
+{
+       switch (pdata->an_mode) {
+       case XGBE_AN_MODE_CL73:
+               return xgbe_phy_an73_outcome(pdata);
+       case XGBE_AN_MODE_CL73_REDRV:
+               return xgbe_phy_an73_redrv_outcome(pdata);
+       case XGBE_AN_MODE_CL37:
+               return xgbe_phy_an37_outcome(pdata);
+       case XGBE_AN_MODE_CL37_SGMII:
+               return xgbe_phy_an37_sgmii_outcome(pdata);
+       default:
+               return XGBE_MODE_UNKNOWN;
+       }
+}
+
+static unsigned int xgbe_phy_an_advertising(struct xgbe_prv_data *pdata)
+{
+       struct xgbe_phy_data *phy_data = pdata->phy_data;
+       unsigned int advertising;
+
+       /* Without a re-driver, just return current advertising */
+       if (!phy_data->redrv)
+               return pdata->phy.advertising;
+
+       /* With the KR re-driver we need to advertise a single speed */
+       advertising = pdata->phy.advertising;
+       advertising &= ~ADVERTISED_1000baseKX_Full;
+       advertising &= ~ADVERTISED_10000baseKR_Full;
+
+       switch (phy_data->port_mode) {
+       case XGBE_PORT_MODE_BACKPLANE:
+               advertising |= ADVERTISED_10000baseKR_Full;
+               break;
+       case XGBE_PORT_MODE_BACKPLANE_2500:
+               advertising |= ADVERTISED_1000baseKX_Full;
+               break;
+       case XGBE_PORT_MODE_1000BASE_T:
+       case XGBE_PORT_MODE_1000BASE_X:
+       case XGBE_PORT_MODE_NBASE_T:
+               advertising |= ADVERTISED_1000baseKX_Full;
+               break;
+       case XGBE_PORT_MODE_10GBASE_T:
+               if (phy_data->phydev &&
+                   (phy_data->phydev->speed == SPEED_10000))
+                       advertising |= ADVERTISED_10000baseKR_Full;
+               else
+                       advertising |= ADVERTISED_1000baseKX_Full;
+               break;
+       case XGBE_PORT_MODE_10GBASE_R:
+               advertising |= ADVERTISED_10000baseKR_Full;
+               break;
+       case XGBE_PORT_MODE_SFP:
+               switch (phy_data->sfp_base) {
+               case XGBE_SFP_BASE_1000_T:
+               case XGBE_SFP_BASE_1000_SX:
+               case XGBE_SFP_BASE_1000_LX:
+               case XGBE_SFP_BASE_1000_CX:
+                       advertising |= ADVERTISED_1000baseKX_Full;
+                       break;
+               default:
+                       advertising |= ADVERTISED_10000baseKR_Full;
+                       break;
+               }
+               break;
+       default:
+               advertising |= ADVERTISED_10000baseKR_Full;
+               break;
+       }
+
+       return advertising;
+}
+
+static int xgbe_phy_an_config(struct xgbe_prv_data *pdata)
+{
+       struct xgbe_phy_data *phy_data = pdata->phy_data;
+       int ret;
+
+       ret = xgbe_phy_find_phy_device(pdata);
+       if (ret)
+               return ret;
+
+       if (!phy_data->phydev)
+               return 0;
+
+       phy_data->phydev->autoneg = pdata->phy.autoneg;
+       phy_data->phydev->advertising = phy_data->phydev->supported &
+                                       pdata->phy.advertising;
+
+       if (pdata->phy.autoneg != AUTONEG_ENABLE) {
+               phy_data->phydev->speed = pdata->phy.speed;
+               phy_data->phydev->duplex = pdata->phy.duplex;
+       }
+
+       ret = phy_start_aneg(phy_data->phydev);
+
+       return ret;
+}
+
+static enum xgbe_an_mode xgbe_phy_an_sfp_mode(struct xgbe_phy_data *phy_data)
+{
+       switch (phy_data->sfp_base) {
+       case XGBE_SFP_BASE_1000_T:
+               return XGBE_AN_MODE_CL37_SGMII;
+       case XGBE_SFP_BASE_1000_SX:
+       case XGBE_SFP_BASE_1000_LX:
+       case XGBE_SFP_BASE_1000_CX:
+               return XGBE_AN_MODE_CL37;
+       default:
+               return XGBE_AN_MODE_NONE;
+       }
+}
+
+static enum xgbe_an_mode xgbe_phy_an_mode(struct xgbe_prv_data *pdata)
+{
+       struct xgbe_phy_data *phy_data = pdata->phy_data;
+
+       /* A KR re-driver will always require CL73 AN */
+       if (phy_data->redrv)
+               return XGBE_AN_MODE_CL73_REDRV;
+
+       switch (phy_data->port_mode) {
+       case XGBE_PORT_MODE_BACKPLANE:
+               return XGBE_AN_MODE_CL73;
+       case XGBE_PORT_MODE_BACKPLANE_2500:
+               return XGBE_AN_MODE_NONE;
+       case XGBE_PORT_MODE_1000BASE_T:
+               return XGBE_AN_MODE_CL37_SGMII;
+       case XGBE_PORT_MODE_1000BASE_X:
+               return XGBE_AN_MODE_CL37;
+       case XGBE_PORT_MODE_NBASE_T:
+               return XGBE_AN_MODE_CL37_SGMII;
+       case XGBE_PORT_MODE_10GBASE_T:
+               return XGBE_AN_MODE_CL73;
+       case XGBE_PORT_MODE_10GBASE_R:
+               return XGBE_AN_MODE_NONE;
+       case XGBE_PORT_MODE_SFP:
+               return xgbe_phy_an_sfp_mode(phy_data);
+       default:
+               return XGBE_AN_MODE_NONE;
+       }
+}
+
+static int xgbe_phy_set_redrv_mode_mdio(struct xgbe_prv_data *pdata,
+                                       enum xgbe_phy_redrv_mode mode)
+{
+       struct xgbe_phy_data *phy_data = pdata->phy_data;
+       u16 redrv_reg, redrv_val;
+
+       redrv_reg = XGBE_PHY_REDRV_MODE_REG + (phy_data->redrv_lane * 0x1000);
+       redrv_val = (u16)mode;
+
+       return pdata->hw_if.write_ext_mii_regs(pdata, phy_data->redrv_addr,
+                                              redrv_reg, redrv_val);
+}
+
+static int xgbe_phy_set_redrv_mode_i2c(struct xgbe_prv_data *pdata,
+                                      enum xgbe_phy_redrv_mode mode)
+{
+       struct xgbe_phy_data *phy_data = pdata->phy_data;
+       unsigned int redrv_reg;
+       int ret;
+
+       /* Calculate the register to write */
+       redrv_reg = XGBE_PHY_REDRV_MODE_REG + (phy_data->redrv_lane * 0x1000);
+
+       ret = xgbe_phy_redrv_write(pdata, redrv_reg, mode);
+
+       return ret;
+}
+
+static void xgbe_phy_set_redrv_mode(struct xgbe_prv_data *pdata)
+{
+       struct xgbe_phy_data *phy_data = pdata->phy_data;
+       enum xgbe_phy_redrv_mode mode;
+       int ret;
+
+       if (!phy_data->redrv)
+               return;
+
+       mode = XGBE_PHY_REDRV_MODE_CX;
+       if ((phy_data->port_mode == XGBE_PORT_MODE_SFP) &&
+           (phy_data->sfp_base != XGBE_SFP_BASE_1000_CX) &&
+           (phy_data->sfp_base != XGBE_SFP_BASE_10000_CR))
+               mode = XGBE_PHY_REDRV_MODE_SR;
+
+       ret = xgbe_phy_get_comm_ownership(pdata);
+       if (ret)
+               return;
+
+       if (phy_data->redrv_if)
+               xgbe_phy_set_redrv_mode_i2c(pdata, mode);
+       else
+               xgbe_phy_set_redrv_mode_mdio(pdata, mode);
+
+       xgbe_phy_put_comm_ownership(pdata);
+}
+
+static void xgbe_phy_start_ratechange(struct xgbe_prv_data *pdata)
+{
+       if (!XP_IOREAD_BITS(pdata, XP_DRIVER_INT_RO, STATUS))
+               return;
+
+       /* Log if a previous command did not complete */
+       netif_dbg(pdata, link, pdata->netdev,
+                 "firmware mailbox not ready for command\n");
+}
+
+static void xgbe_phy_complete_ratechange(struct xgbe_prv_data *pdata)
+{
+       unsigned int wait;
+
+       /* Wait for command to complete */
+       wait = XGBE_RATECHANGE_COUNT;
+       while (wait--) {
+               if (!XP_IOREAD_BITS(pdata, XP_DRIVER_INT_RO, STATUS))
+                       return;
+
+               usleep_range(1000, 2000);
+       }
+
+       netif_dbg(pdata, link, pdata->netdev,
+                 "firmware mailbox command did not complete\n");
+}
+
+static void xgbe_phy_rrc(struct xgbe_prv_data *pdata)
+{
+       unsigned int s0;
+
+       xgbe_phy_start_ratechange(pdata);
+
+       /* Receiver Reset Cycle */
+       s0 = 0;
+       XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, 5);
+       XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 0);
+
+       /* Call FW to make the change */
+       XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, s0);
+       XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0);
+       XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1);
+
+       xgbe_phy_complete_ratechange(pdata);
+
+       netif_dbg(pdata, link, pdata->netdev, "receiver reset complete\n");
+}
+
+static void xgbe_phy_power_off(struct xgbe_prv_data *pdata)
+{
+       struct xgbe_phy_data *phy_data = pdata->phy_data;
+
+       xgbe_phy_start_ratechange(pdata);
+
+       /* Call FW to make the change */
+       XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, 0);
+       XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0);
+       XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1);
+
+       xgbe_phy_complete_ratechange(pdata);
+
+       phy_data->cur_mode = XGBE_MODE_UNKNOWN;
+
+       netif_dbg(pdata, link, pdata->netdev, "phy powered off\n");
+}
+
+static void xgbe_phy_sfi_mode(struct xgbe_prv_data *pdata)
+{
+       struct xgbe_phy_data *phy_data = pdata->phy_data;
+       unsigned int s0;
+
+       xgbe_phy_set_redrv_mode(pdata);
+
+       xgbe_phy_start_ratechange(pdata);
+
+       /* 10G/SFI */
+       s0 = 0;
+       XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, 3);
+       if (phy_data->sfp_cable != XGBE_SFP_CABLE_PASSIVE) {
+               XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 0);
+       } else {
+               if (phy_data->sfp_cable_len <= 1)
+                       XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 1);
+               else if (phy_data->sfp_cable_len <= 3)
+                       XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 2);
+               else
+                       XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 3);
+       }
+
+       /* Call FW to make the change */
+       XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, s0);
+       XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0);
+       XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1);
+
+       xgbe_phy_complete_ratechange(pdata);
+
+       phy_data->cur_mode = XGBE_MODE_SFI;
+
+       netif_dbg(pdata, link, pdata->netdev, "10GbE SFI mode set\n");
+}
+
+static void xgbe_phy_x_mode(struct xgbe_prv_data *pdata)
+{
+       struct xgbe_phy_data *phy_data = pdata->phy_data;
+       unsigned int s0;
+
+       xgbe_phy_set_redrv_mode(pdata);
+
+       xgbe_phy_start_ratechange(pdata);
+
+       /* 1G/X */
+       s0 = 0;
+       XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, 1);
+       XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 3);
+
+       /* Call FW to make the change */
+       XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, s0);
+       XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0);
+       XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1);
+
+       xgbe_phy_complete_ratechange(pdata);
+
+       phy_data->cur_mode = XGBE_MODE_X;
+
+       netif_dbg(pdata, link, pdata->netdev, "1GbE X mode set\n");
+}
+
+static void xgbe_phy_sgmii_1000_mode(struct xgbe_prv_data *pdata)
+{
+       struct xgbe_phy_data *phy_data = pdata->phy_data;
+       unsigned int s0;
+
+       xgbe_phy_set_redrv_mode(pdata);
+
+       xgbe_phy_start_ratechange(pdata);
+
+       /* 1G/SGMII */
+       s0 = 0;
+       XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, 1);
+       XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 2);
+
+       /* Call FW to make the change */
+       XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, s0);
+       XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0);
+       XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1);
+
+       xgbe_phy_complete_ratechange(pdata);
+
+       phy_data->cur_mode = XGBE_MODE_SGMII_1000;
+
+       netif_dbg(pdata, link, pdata->netdev, "1GbE SGMII mode set\n");
+}
+
+static void xgbe_phy_sgmii_100_mode(struct xgbe_prv_data *pdata)
+{
+       struct xgbe_phy_data *phy_data = pdata->phy_data;
+       unsigned int s0;
+
+       xgbe_phy_set_redrv_mode(pdata);
+
+       xgbe_phy_start_ratechange(pdata);
+
+       /* 1G/SGMII */
+       s0 = 0;
+       XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, 1);
+       XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 1);
+
+       /* Call FW to make the change */
+       XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, s0);
+       XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0);
+       XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1);
+
+       xgbe_phy_complete_ratechange(pdata);
+
+       phy_data->cur_mode = XGBE_MODE_SGMII_100;
+
+       netif_dbg(pdata, link, pdata->netdev, "100MbE SGMII mode set\n");
+}
+
+static void xgbe_phy_kr_mode(struct xgbe_prv_data *pdata)
+{
+       struct xgbe_phy_data *phy_data = pdata->phy_data;
+       unsigned int s0;
+
+       xgbe_phy_set_redrv_mode(pdata);
+
+       xgbe_phy_start_ratechange(pdata);
+
+       /* 10G/KR */
+       s0 = 0;
+       XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, 4);
+       XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 0);
+
+       /* Call FW to make the change */
+       XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, s0);
+       XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0);
+       XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1);
+
+       xgbe_phy_complete_ratechange(pdata);
+
+       phy_data->cur_mode = XGBE_MODE_KR;
+
+       netif_dbg(pdata, link, pdata->netdev, "10GbE KR mode set\n");
+}
+
+static void xgbe_phy_kx_2500_mode(struct xgbe_prv_data *pdata)
+{
+       struct xgbe_phy_data *phy_data = pdata->phy_data;
+       unsigned int s0;
+
+       xgbe_phy_set_redrv_mode(pdata);
+
+       xgbe_phy_start_ratechange(pdata);
+
+       /* 2.5G/KX */
+       s0 = 0;
+       XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, 2);
+       XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 0);
+
+       /* Call FW to make the change */
+       XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, s0);
+       XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0);
+       XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1);
+
+       xgbe_phy_complete_ratechange(pdata);
+
+       phy_data->cur_mode = XGBE_MODE_KX_2500;
+
+       netif_dbg(pdata, link, pdata->netdev, "2.5GbE KX mode set\n");
+}
+
+static void xgbe_phy_kx_1000_mode(struct xgbe_prv_data *pdata)
+{
+       struct xgbe_phy_data *phy_data = pdata->phy_data;
+       unsigned int s0;
+
+       xgbe_phy_set_redrv_mode(pdata);
+
+       xgbe_phy_start_ratechange(pdata);
+
+       /* 1G/KX */
+       s0 = 0;
+       XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, 1);
+       XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 3);
+
+       /* Call FW to make the change */
+       XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, s0);
+       XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0);
+       XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1);
+
+       xgbe_phy_complete_ratechange(pdata);
+
+       phy_data->cur_mode = XGBE_MODE_KX_1000;
+
+       netif_dbg(pdata, link, pdata->netdev, "1GbE KX mode set\n");
+}
+
+static enum xgbe_mode xgbe_phy_cur_mode(struct xgbe_prv_data *pdata)
+{
+       struct xgbe_phy_data *phy_data = pdata->phy_data;
+
+       return phy_data->cur_mode;
+}
+
+static enum xgbe_mode xgbe_phy_switch_baset_mode(struct xgbe_prv_data *pdata)
+{
+       struct xgbe_phy_data *phy_data = pdata->phy_data;
+
+       /* No switching if not 10GBase-T */
+       if (phy_data->port_mode != XGBE_PORT_MODE_10GBASE_T)
+               return xgbe_phy_cur_mode(pdata);
+
+       switch (xgbe_phy_cur_mode(pdata)) {
+       case XGBE_MODE_SGMII_100:
+       case XGBE_MODE_SGMII_1000:
+               return XGBE_MODE_KR;
+       case XGBE_MODE_KR:
+       default:
+               return XGBE_MODE_SGMII_1000;
+       }
+}
+
+static enum xgbe_mode xgbe_phy_switch_bp_2500_mode(struct xgbe_prv_data *pdata)
+{
+       return XGBE_MODE_KX_2500;
+}
+
+static enum xgbe_mode xgbe_phy_switch_bp_mode(struct xgbe_prv_data *pdata)
+{
+       /* If we are in KR switch to KX, and vice-versa */
+       switch (xgbe_phy_cur_mode(pdata)) {
+       case XGBE_MODE_KX_1000:
+               return XGBE_MODE_KR;
+       case XGBE_MODE_KR:
+       default:
+               return XGBE_MODE_KX_1000;
+       }
+}
+
+static enum xgbe_mode xgbe_phy_switch_mode(struct xgbe_prv_data *pdata)
+{
+       struct xgbe_phy_data *phy_data = pdata->phy_data;
+
+       switch (phy_data->port_mode) {
+       case XGBE_PORT_MODE_BACKPLANE:
+               return xgbe_phy_switch_bp_mode(pdata);
+       case XGBE_PORT_MODE_BACKPLANE_2500:
+               return xgbe_phy_switch_bp_2500_mode(pdata);
+       case XGBE_PORT_MODE_1000BASE_T:
+       case XGBE_PORT_MODE_NBASE_T:
+       case XGBE_PORT_MODE_10GBASE_T:
+               return xgbe_phy_switch_baset_mode(pdata);
+       case XGBE_PORT_MODE_1000BASE_X:
+       case XGBE_PORT_MODE_10GBASE_R:
+       case XGBE_PORT_MODE_SFP:
+               /* No switching, so just return current mode */
+               return xgbe_phy_cur_mode(pdata);
+       default:
+               return XGBE_MODE_UNKNOWN;
+       }
+}
+
+static enum xgbe_mode xgbe_phy_get_basex_mode(struct xgbe_phy_data *phy_data,
+                                             int speed)
+{
+       switch (speed) {
+       case SPEED_1000:
+               return XGBE_MODE_X;
+       case SPEED_10000:
+               return XGBE_MODE_KR;
+       default:
+               return XGBE_MODE_UNKNOWN;
+       }
+}
+
+static enum xgbe_mode xgbe_phy_get_baset_mode(struct xgbe_phy_data *phy_data,
+                                             int speed)
+{
+       switch (speed) {
+       case SPEED_100:
+               return XGBE_MODE_SGMII_100;
+       case SPEED_1000:
+               return XGBE_MODE_SGMII_1000;
+       case SPEED_10000:
+               return XGBE_MODE_KR;
+       default:
+               return XGBE_MODE_UNKNOWN;
+       }
+}
+
+static enum xgbe_mode xgbe_phy_get_sfp_mode(struct xgbe_phy_data *phy_data,
+                                           int speed)
+{
+       switch (speed) {
+       case SPEED_100:
+               return XGBE_MODE_SGMII_100;
+       case SPEED_1000:
+               if (phy_data->sfp_base == XGBE_SFP_BASE_1000_T)
+                       return XGBE_MODE_SGMII_1000;
+               else
+                       return XGBE_MODE_X;
+       case SPEED_10000:
+       case SPEED_UNKNOWN:
+               return XGBE_MODE_SFI;
+       default:
+               return XGBE_MODE_UNKNOWN;
+       }
+}
+
+static enum xgbe_mode xgbe_phy_get_bp_2500_mode(int speed)
+{
+       switch (speed) {
+       case SPEED_2500:
+               return XGBE_MODE_KX_2500;
+       default:
+               return XGBE_MODE_UNKNOWN;
+       }
+}
+
+static enum xgbe_mode xgbe_phy_get_bp_mode(int speed)
+{
+       switch (speed) {
+       case SPEED_1000:
+               return XGBE_MODE_KX_1000;
+       case SPEED_10000:
+               return XGBE_MODE_KR;
+       default:
+               return XGBE_MODE_UNKNOWN;
+       }
+}
+
+static enum xgbe_mode xgbe_phy_get_mode(struct xgbe_prv_data *pdata,
+                                       int speed)
+{
+       struct xgbe_phy_data *phy_data = pdata->phy_data;
+
+       switch (phy_data->port_mode) {
+       case XGBE_PORT_MODE_BACKPLANE:
+               return xgbe_phy_get_bp_mode(speed);
+       case XGBE_PORT_MODE_BACKPLANE_2500:
+               return xgbe_phy_get_bp_2500_mode(speed);
+       case XGBE_PORT_MODE_1000BASE_T:
+       case XGBE_PORT_MODE_NBASE_T:
+       case XGBE_PORT_MODE_10GBASE_T:
+               return xgbe_phy_get_baset_mode(phy_data, speed);
+       case XGBE_PORT_MODE_1000BASE_X:
+       case XGBE_PORT_MODE_10GBASE_R:
+               return xgbe_phy_get_basex_mode(phy_data, speed);
+       case XGBE_PORT_MODE_SFP:
+               return xgbe_phy_get_sfp_mode(phy_data, speed);
+       default:
+               return XGBE_MODE_UNKNOWN;
+       }
+}
+
+static void xgbe_phy_set_mode(struct xgbe_prv_data *pdata, enum xgbe_mode mode)
+{
+       switch (mode) {
+       case XGBE_MODE_KX_1000:
+               xgbe_phy_kx_1000_mode(pdata);
+               break;
+       case XGBE_MODE_KX_2500:
+               xgbe_phy_kx_2500_mode(pdata);
+               break;
+       case XGBE_MODE_KR:
+               xgbe_phy_kr_mode(pdata);
+               break;
+       case XGBE_MODE_SGMII_100:
+               xgbe_phy_sgmii_100_mode(pdata);
+               break;
+       case XGBE_MODE_SGMII_1000:
+               xgbe_phy_sgmii_1000_mode(pdata);
+               break;
+       case XGBE_MODE_X:
+               xgbe_phy_x_mode(pdata);
+               break;
+       case XGBE_MODE_SFI:
+               xgbe_phy_sfi_mode(pdata);
+               break;
+       default:
+               break;
+       }
+}
+
+static bool xgbe_phy_check_mode(struct xgbe_prv_data *pdata,
+                               enum xgbe_mode mode, u32 advert)
+{
+       if (pdata->phy.autoneg == AUTONEG_ENABLE) {
+               if (pdata->phy.advertising & advert)
+                       return true;
+       } else {
+               enum xgbe_mode cur_mode;
+
+               cur_mode = xgbe_phy_get_mode(pdata, pdata->phy.speed);
+               if (cur_mode == mode)
+                       return true;
+       }
+
+       return false;
+}
+
+static bool xgbe_phy_use_basex_mode(struct xgbe_prv_data *pdata,
+                                   enum xgbe_mode mode)
+{
+       switch (mode) {
+       case XGBE_MODE_X:
+               return xgbe_phy_check_mode(pdata, mode,
+                                          ADVERTISED_1000baseT_Full);
+       case XGBE_MODE_KR:
+               return xgbe_phy_check_mode(pdata, mode,
+                                          ADVERTISED_10000baseT_Full);
+       default:
+               return false;
+       }
+}
+
+static bool xgbe_phy_use_baset_mode(struct xgbe_prv_data *pdata,
+                                   enum xgbe_mode mode)
+{
+       switch (mode) {
+       case XGBE_MODE_SGMII_100:
+               return xgbe_phy_check_mode(pdata, mode,
+                                          ADVERTISED_100baseT_Full);
+       case XGBE_MODE_SGMII_1000:
+               return xgbe_phy_check_mode(pdata, mode,
+                                          ADVERTISED_1000baseT_Full);
+       case XGBE_MODE_KR:
+               return xgbe_phy_check_mode(pdata, mode,
+                                          ADVERTISED_10000baseT_Full);
+       default:
+               return false;
+       }
+}
+
+static bool xgbe_phy_use_sfp_mode(struct xgbe_prv_data *pdata,
+                                 enum xgbe_mode mode)
+{
+       struct xgbe_phy_data *phy_data = pdata->phy_data;
+
+       switch (mode) {
+       case XGBE_MODE_X:
+               if (phy_data->sfp_base == XGBE_SFP_BASE_1000_T)
+                       return false;
+               return xgbe_phy_check_mode(pdata, mode,
+                                          ADVERTISED_1000baseT_Full);
+       case XGBE_MODE_SGMII_100:
+               if (phy_data->sfp_base != XGBE_SFP_BASE_1000_T)
+                       return false;
+               return xgbe_phy_check_mode(pdata, mode,
+                                          ADVERTISED_100baseT_Full);
+       case XGBE_MODE_SGMII_1000:
+               if (phy_data->sfp_base != XGBE_SFP_BASE_1000_T)
+                       return false;
+               return xgbe_phy_check_mode(pdata, mode,
+                                          ADVERTISED_1000baseT_Full);
+       case XGBE_MODE_SFI:
+               return xgbe_phy_check_mode(pdata, mode,
+                                          ADVERTISED_10000baseT_Full);
+       default:
+               return false;
+       }
+}
+
+static bool xgbe_phy_use_bp_2500_mode(struct xgbe_prv_data *pdata,
+                                     enum xgbe_mode mode)
+{
+       switch (mode) {
+       case XGBE_MODE_KX_2500:
+               return xgbe_phy_check_mode(pdata, mode,
+                                          ADVERTISED_2500baseX_Full);
+       default:
+               return false;
+       }
+}
+
+static bool xgbe_phy_use_bp_mode(struct xgbe_prv_data *pdata,
+                                enum xgbe_mode mode)
+{
+       switch (mode) {
+       case XGBE_MODE_KX_1000:
+               return xgbe_phy_check_mode(pdata, mode,
+                                          ADVERTISED_1000baseKX_Full);
+       case XGBE_MODE_KR:
+               return xgbe_phy_check_mode(pdata, mode,
+                                          ADVERTISED_10000baseKR_Full);
+       default:
+               return false;
+       }
+}
+
+static bool xgbe_phy_use_mode(struct xgbe_prv_data *pdata, enum xgbe_mode mode)
+{
+       struct xgbe_phy_data *phy_data = pdata->phy_data;
+
+       switch (phy_data->port_mode) {
+       case XGBE_PORT_MODE_BACKPLANE:
+               return xgbe_phy_use_bp_mode(pdata, mode);
+       case XGBE_PORT_MODE_BACKPLANE_2500:
+               return xgbe_phy_use_bp_2500_mode(pdata, mode);
+       case XGBE_PORT_MODE_1000BASE_T:
+       case XGBE_PORT_MODE_NBASE_T:
+       case XGBE_PORT_MODE_10GBASE_T:
+               return xgbe_phy_use_baset_mode(pdata, mode);
+       case XGBE_PORT_MODE_1000BASE_X:
+       case XGBE_PORT_MODE_10GBASE_R:
+               return xgbe_phy_use_basex_mode(pdata, mode);
+       case XGBE_PORT_MODE_SFP:
+               return xgbe_phy_use_sfp_mode(pdata, mode);
+       default:
+               return false;
+       }
+}
+
+static bool xgbe_phy_valid_speed_basex_mode(struct xgbe_phy_data *phy_data,
+                                           int speed)
+{
+       switch (speed) {
+       case SPEED_1000:
+               return (phy_data->port_mode == XGBE_PORT_MODE_1000BASE_X);
+       case SPEED_10000:
+               return (phy_data->port_mode == XGBE_PORT_MODE_10GBASE_R);
+       default:
+               return false;
+       }
+}
+
+static bool xgbe_phy_valid_speed_baset_mode(struct xgbe_phy_data *phy_data,
+                                           int speed)
+{
+       switch (speed) {
+       case SPEED_100:
+       case SPEED_1000:
+               return true;
+       case SPEED_10000:
+               return (phy_data->port_mode == XGBE_PORT_MODE_10GBASE_T);
+       default:
+               return false;
+       }
+}
+
+static bool xgbe_phy_valid_speed_sfp_mode(struct xgbe_phy_data *phy_data,
+                                         int speed)
+{
+       switch (speed) {
+       case SPEED_100:
+               return (phy_data->sfp_speed == XGBE_SFP_SPEED_100_1000);
+       case SPEED_1000:
+               return ((phy_data->sfp_speed == XGBE_SFP_SPEED_100_1000) ||
+                       (phy_data->sfp_speed == XGBE_SFP_SPEED_1000));
+       case SPEED_10000:
+               return (phy_data->sfp_speed == XGBE_SFP_SPEED_10000);
+       default:
+               return false;
+       }
+}
+
+static bool xgbe_phy_valid_speed_bp_2500_mode(int speed)
+{
+       switch (speed) {
+       case SPEED_2500:
+               return true;
+       default:
+               return false;
+       }
+}
+
+static bool xgbe_phy_valid_speed_bp_mode(int speed)
+{
+       switch (speed) {
+       case SPEED_1000:
+       case SPEED_10000:
+               return true;
+       default:
+               return false;
+       }
+}
+
+static bool xgbe_phy_valid_speed(struct xgbe_prv_data *pdata, int speed)
+{
+       struct xgbe_phy_data *phy_data = pdata->phy_data;
+
+       switch (phy_data->port_mode) {
+       case XGBE_PORT_MODE_BACKPLANE:
+               return xgbe_phy_valid_speed_bp_mode(speed);
+       case XGBE_PORT_MODE_BACKPLANE_2500:
+               return xgbe_phy_valid_speed_bp_2500_mode(speed);
+       case XGBE_PORT_MODE_1000BASE_T:
+       case XGBE_PORT_MODE_NBASE_T:
+       case XGBE_PORT_MODE_10GBASE_T:
+               return xgbe_phy_valid_speed_baset_mode(phy_data, speed);
+       case XGBE_PORT_MODE_1000BASE_X:
+       case XGBE_PORT_MODE_10GBASE_R:
+               return xgbe_phy_valid_speed_basex_mode(phy_data, speed);
+       case XGBE_PORT_MODE_SFP:
+               return xgbe_phy_valid_speed_sfp_mode(phy_data, speed);
+       default:
+               return false;
+       }
+}
+
+static int xgbe_phy_link_status(struct xgbe_prv_data *pdata, int *an_restart)
+{
+       struct xgbe_phy_data *phy_data = pdata->phy_data;
+       unsigned int reg;
+       int ret;
+
+       *an_restart = 0;
+
+       if (phy_data->port_mode == XGBE_PORT_MODE_SFP) {
+               /* Check SFP signals */
+               xgbe_phy_sfp_detect(pdata);
+
+               if (phy_data->sfp_changed) {
+                       *an_restart = 1;
+                       return 0;
+               }
+
+               if (phy_data->sfp_mod_absent || phy_data->sfp_rx_los)
+                       return 0;
+       }
+
+       if (phy_data->phydev) {
+               /* Check external PHY */
+               ret = phy_read_status(phy_data->phydev);
+               if (ret < 0)
+                       return 0;
+
+               if ((pdata->phy.autoneg == AUTONEG_ENABLE) &&
+                   !phy_aneg_done(phy_data->phydev))
+                       return 0;
+
+               if (!phy_data->phydev->link)
+                       return 0;
+       }
+
+       /* Link status is latched low, so read once to clear
+        * and then read again to get current state
+        */
+       reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1);
+       reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1);
+       if (reg & MDIO_STAT1_LSTATUS)
+               return 1;
+
+       /* No link, attempt a receiver reset cycle */
+       if (phy_data->rrc_count++) {
+               phy_data->rrc_count = 0;
+               xgbe_phy_rrc(pdata);
+       }
+
+       return 0;
+}
+
+static void xgbe_phy_sfp_gpio_setup(struct xgbe_prv_data *pdata)
+{
+       struct xgbe_phy_data *phy_data = pdata->phy_data;
+       unsigned int reg;
+
+       reg = XP_IOREAD(pdata, XP_PROP_3);
+
+       phy_data->sfp_gpio_address = XGBE_GPIO_ADDRESS_PCA9555 +
+                                    XP_GET_BITS(reg, XP_PROP_3, GPIO_ADDR);
+
+       phy_data->sfp_gpio_mask = XP_GET_BITS(reg, XP_PROP_3, GPIO_MASK);
+
+       phy_data->sfp_gpio_rx_los = XP_GET_BITS(reg, XP_PROP_3,
+                                               GPIO_RX_LOS);
+       phy_data->sfp_gpio_tx_fault = XP_GET_BITS(reg, XP_PROP_3,
+                                                 GPIO_TX_FAULT);
+       phy_data->sfp_gpio_mod_absent = XP_GET_BITS(reg, XP_PROP_3,
+                                                   GPIO_MOD_ABS);
+       phy_data->sfp_gpio_rate_select = XP_GET_BITS(reg, XP_PROP_3,
+                                                    GPIO_RATE_SELECT);
+
+       if (netif_msg_probe(pdata)) {
+               dev_dbg(pdata->dev, "SFP: gpio_address=%#x\n",
+                       phy_data->sfp_gpio_address);
+               dev_dbg(pdata->dev, "SFP: gpio_mask=%#x\n",
+                       phy_data->sfp_gpio_mask);
+               dev_dbg(pdata->dev, "SFP: gpio_rx_los=%u\n",
+                       phy_data->sfp_gpio_rx_los);
+               dev_dbg(pdata->dev, "SFP: gpio_tx_fault=%u\n",
+                       phy_data->sfp_gpio_tx_fault);
+               dev_dbg(pdata->dev, "SFP: gpio_mod_absent=%u\n",
+                       phy_data->sfp_gpio_mod_absent);
+               dev_dbg(pdata->dev, "SFP: gpio_rate_select=%u\n",
+                       phy_data->sfp_gpio_rate_select);
+       }
+}
+
+static void xgbe_phy_sfp_comm_setup(struct xgbe_prv_data *pdata)
+{
+       struct xgbe_phy_data *phy_data = pdata->phy_data;
+       unsigned int reg, mux_addr_hi, mux_addr_lo;
+
+       reg = XP_IOREAD(pdata, XP_PROP_4);
+
+       mux_addr_hi = XP_GET_BITS(reg, XP_PROP_4, MUX_ADDR_HI);
+       mux_addr_lo = XP_GET_BITS(reg, XP_PROP_4, MUX_ADDR_LO);
+       if (mux_addr_lo == XGBE_SFP_DIRECT)
+               return;
+
+       phy_data->sfp_comm = XGBE_SFP_COMM_PCA9545;
+       phy_data->sfp_mux_address = (mux_addr_hi << 2) + mux_addr_lo;
+       phy_data->sfp_mux_channel = XP_GET_BITS(reg, XP_PROP_4, MUX_CHAN);
+
+       if (netif_msg_probe(pdata)) {
+               dev_dbg(pdata->dev, "SFP: mux_address=%#x\n",
+                       phy_data->sfp_mux_address);
+               dev_dbg(pdata->dev, "SFP: mux_channel=%u\n",
+                       phy_data->sfp_mux_channel);
+       }
+}
+
+static void xgbe_phy_sfp_setup(struct xgbe_prv_data *pdata)
+{
+       xgbe_phy_sfp_comm_setup(pdata);
+       xgbe_phy_sfp_gpio_setup(pdata);
+}
+
+static int xgbe_phy_int_mdio_reset(struct xgbe_prv_data *pdata)
+{
+       struct xgbe_phy_data *phy_data = pdata->phy_data;
+       unsigned int ret;
+
+       ret = pdata->hw_if.set_gpio(pdata, phy_data->mdio_reset_gpio);
+       if (ret)
+               return ret;
+
+       ret = pdata->hw_if.clr_gpio(pdata, phy_data->mdio_reset_gpio);
+
+       return ret;
+}
+
+static int xgbe_phy_i2c_mdio_reset(struct xgbe_prv_data *pdata)
+{
+       struct xgbe_phy_data *phy_data = pdata->phy_data;
+       u8 gpio_reg, gpio_ports[2], gpio_data[3];
+       int ret;
+
+       /* Read the output port registers */
+       gpio_reg = 2;
+       ret = xgbe_phy_i2c_read(pdata, phy_data->mdio_reset_addr,
+                               &gpio_reg, sizeof(gpio_reg),
+                               gpio_ports, sizeof(gpio_ports));
+       if (ret)
+               return ret;
+
+       /* Prepare to write the GPIO data */
+       gpio_data[0] = 2;
+       gpio_data[1] = gpio_ports[0];
+       gpio_data[2] = gpio_ports[1];
+
+       /* Set the GPIO pin */
+       if (phy_data->mdio_reset_gpio < 8)
+               gpio_data[1] |= (1 << (phy_data->mdio_reset_gpio % 8));
+       else
+               gpio_data[2] |= (1 << (phy_data->mdio_reset_gpio % 8));
+
+       /* Write the output port registers */
+       ret = xgbe_phy_i2c_write(pdata, phy_data->mdio_reset_addr,
+                                gpio_data, sizeof(gpio_data));
+       if (ret)
+               return ret;
+
+       /* Clear the GPIO pin */
+       if (phy_data->mdio_reset_gpio < 8)
+               gpio_data[1] &= ~(1 << (phy_data->mdio_reset_gpio % 8));
+       else
+               gpio_data[2] &= ~(1 << (phy_data->mdio_reset_gpio % 8));
+
+       /* Write the output port registers */
+       ret = xgbe_phy_i2c_write(pdata, phy_data->mdio_reset_addr,
+                                gpio_data, sizeof(gpio_data));
+
+       return ret;
+}
+
+static int xgbe_phy_mdio_reset(struct xgbe_prv_data *pdata)
+{
+       struct xgbe_phy_data *phy_data = pdata->phy_data;
+       int ret;
+
+       if (phy_data->conn_type != XGBE_CONN_TYPE_MDIO)
+               return 0;
+
+       ret = xgbe_phy_get_comm_ownership(pdata);
+       if (ret)
+               return ret;
+
+       if (phy_data->mdio_reset == XGBE_MDIO_RESET_I2C_GPIO)
+               ret = xgbe_phy_i2c_mdio_reset(pdata);
+       else if (phy_data->mdio_reset == XGBE_MDIO_RESET_INT_GPIO)
+               ret = xgbe_phy_int_mdio_reset(pdata);
+
+       xgbe_phy_put_comm_ownership(pdata);
+
+       return ret;
+}
+
+static bool xgbe_phy_redrv_error(struct xgbe_phy_data *phy_data)
+{
+       if (!phy_data->redrv)
+               return false;
+
+       if (phy_data->redrv_if >= XGBE_PHY_REDRV_IF_MAX)
+               return true;
+
+       switch (phy_data->redrv_model) {
+       case XGBE_PHY_REDRV_MODEL_4223:
+               if (phy_data->redrv_lane > 3)
+                       return true;
+               break;
+       case XGBE_PHY_REDRV_MODEL_4227:
+               if (phy_data->redrv_lane > 1)
+                       return true;
+               break;
+       default:
+               return true;
+       }
+
+       return false;
+}
+
+static int xgbe_phy_mdio_reset_setup(struct xgbe_prv_data *pdata)
+{
+       struct xgbe_phy_data *phy_data = pdata->phy_data;
+       unsigned int reg;
+
+       if (phy_data->conn_type != XGBE_CONN_TYPE_MDIO)
+               return 0;
+
+       reg = XP_IOREAD(pdata, XP_PROP_3);
+       phy_data->mdio_reset = XP_GET_BITS(reg, XP_PROP_3, MDIO_RESET);
+       switch (phy_data->mdio_reset) {
+       case XGBE_MDIO_RESET_NONE:
+       case XGBE_MDIO_RESET_I2C_GPIO:
+       case XGBE_MDIO_RESET_INT_GPIO:
+               break;
+       default:
+               dev_err(pdata->dev, "unsupported MDIO reset (%#x)\n",
+                       phy_data->mdio_reset);
+               return -EINVAL;
+       }
+
+       if (phy_data->mdio_reset == XGBE_MDIO_RESET_I2C_GPIO) {
+               phy_data->mdio_reset_addr = XGBE_GPIO_ADDRESS_PCA9555 +
+                                           XP_GET_BITS(reg, XP_PROP_3,
+                                                       MDIO_RESET_I2C_ADDR);
+               phy_data->mdio_reset_gpio = XP_GET_BITS(reg, XP_PROP_3,
+                                                       MDIO_RESET_I2C_GPIO);
+       } else if (phy_data->mdio_reset == XGBE_MDIO_RESET_INT_GPIO) {
+               phy_data->mdio_reset_gpio = XP_GET_BITS(reg, XP_PROP_3,
+                                                       MDIO_RESET_INT_GPIO);
+       }
+
+       return 0;
+}
+
+static bool xgbe_phy_port_mode_mismatch(struct xgbe_prv_data *pdata)
+{
+       struct xgbe_phy_data *phy_data = pdata->phy_data;
+
+       switch (phy_data->port_mode) {
+       case XGBE_PORT_MODE_BACKPLANE:
+               if ((phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000) ||
+                   (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10000))
+                       return false;
+               break;
+       case XGBE_PORT_MODE_BACKPLANE_2500:
+               if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_2500)
+                       return false;
+               break;
+       case XGBE_PORT_MODE_1000BASE_T:
+               if ((phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100) ||
+                   (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000))
+                       return false;
+               break;
+       case XGBE_PORT_MODE_1000BASE_X:
+               if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000)
+                       return false;
+               break;
+       case XGBE_PORT_MODE_NBASE_T:
+               if ((phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100) ||
+                   (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000) ||
+                   (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_2500))
+                       return false;
+               break;
+       case XGBE_PORT_MODE_10GBASE_T:
+               if ((phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100) ||
+                   (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000) ||
+                   (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10000))
+                       return false;
+               break;
+       case XGBE_PORT_MODE_10GBASE_R:
+               if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10000)
+                       return false;
+               break;
+       case XGBE_PORT_MODE_SFP:
+               if ((phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100) ||
+                   (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000) ||
+                   (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10000))
+                       return false;
+               break;
+       default:
+               break;
+       }
+
+       return true;
+}
+
+static bool xgbe_phy_conn_type_mismatch(struct xgbe_prv_data *pdata)
+{
+       struct xgbe_phy_data *phy_data = pdata->phy_data;
+
+       switch (phy_data->port_mode) {
+       case XGBE_PORT_MODE_BACKPLANE:
+       case XGBE_PORT_MODE_BACKPLANE_2500:
+               if (phy_data->conn_type == XGBE_CONN_TYPE_BACKPLANE)
+                       return false;
+               break;
+       case XGBE_PORT_MODE_1000BASE_T:
+       case XGBE_PORT_MODE_1000BASE_X:
+       case XGBE_PORT_MODE_NBASE_T:
+       case XGBE_PORT_MODE_10GBASE_T:
+       case XGBE_PORT_MODE_10GBASE_R:
+               if (phy_data->conn_type == XGBE_CONN_TYPE_MDIO)
+                       return false;
+               break;
+       case XGBE_PORT_MODE_SFP:
+               if (phy_data->conn_type == XGBE_CONN_TYPE_SFP)
+                       return false;
+               break;
+       default:
+               break;
+       }
+
+       return true;
+}
+
+static bool xgbe_phy_port_enabled(struct xgbe_prv_data *pdata)
+{
+       unsigned int reg;
+
+       reg = XP_IOREAD(pdata, XP_PROP_0);
+       if (!XP_GET_BITS(reg, XP_PROP_0, PORT_SPEEDS))
+               return false;
+       if (!XP_GET_BITS(reg, XP_PROP_0, CONN_TYPE))
+               return false;
+
+       return true;
+}
+
+static void xgbe_phy_stop(struct xgbe_prv_data *pdata)
+{
+       struct xgbe_phy_data *phy_data = pdata->phy_data;
+
+       /* If we have an external PHY, free it */
+       xgbe_phy_free_phy_device(pdata);
+
+       /* Reset SFP data */
+       xgbe_phy_sfp_reset(phy_data);
+       xgbe_phy_sfp_mod_absent(pdata);
+
+       /* Power off the PHY */
+       xgbe_phy_power_off(pdata);
+
+       /* Stop the I2C controller */
+       pdata->i2c_if.i2c_stop(pdata);
+}
+
+static int xgbe_phy_start(struct xgbe_prv_data *pdata)
+{
+       struct xgbe_phy_data *phy_data = pdata->phy_data;
+       int ret;
+
+       /* Start the I2C controller */
+       ret = pdata->i2c_if.i2c_start(pdata);
+       if (ret)
+               return ret;
+
+       /* Start in highest supported mode */
+       xgbe_phy_set_mode(pdata, phy_data->start_mode);
+
+       /* After starting the I2C controller, we can check for an SFP */
+       switch (phy_data->port_mode) {
+       case XGBE_PORT_MODE_SFP:
+               xgbe_phy_sfp_detect(pdata);
+               break;
+       default:
+               break;
+       }
+
+       /* If we have an external PHY, start it */
+       ret = xgbe_phy_find_phy_device(pdata);
+       if (ret)
+               goto err_i2c;
+
+       return 0;
+
+err_i2c:
+       pdata->i2c_if.i2c_stop(pdata);
+
+       return ret;
+}
+
+static int xgbe_phy_reset(struct xgbe_prv_data *pdata)
+{
+       struct xgbe_phy_data *phy_data = pdata->phy_data;
+       enum xgbe_mode cur_mode;
+       int ret;
+
+       /* Reset by power cycling the PHY */
+       cur_mode = phy_data->cur_mode;
+       xgbe_phy_power_off(pdata);
+       xgbe_phy_set_mode(pdata, cur_mode);
+
+       if (!phy_data->phydev)
+               return 0;
+
+       /* Reset the external PHY */
+       ret = xgbe_phy_mdio_reset(pdata);
+       if (ret)
+               return ret;
+
+       return phy_init_hw(phy_data->phydev);
+}
+
+static void xgbe_phy_exit(struct xgbe_prv_data *pdata)
+{
+       struct xgbe_phy_data *phy_data = pdata->phy_data;
+
+       /* Unregister for driving external PHYs */
+       mdiobus_unregister(phy_data->mii);
+}
+
+static int xgbe_phy_init(struct xgbe_prv_data *pdata)
+{
+       struct xgbe_phy_data *phy_data;
+       struct mii_bus *mii;
+       unsigned int reg;
+       int ret;
+
+       /* Check if enabled */
+       if (!xgbe_phy_port_enabled(pdata)) {
+               dev_info(pdata->dev, "device is not enabled\n");
+               return -ENODEV;
+       }
+
+       /* Initialize the I2C controller */
+       ret = pdata->i2c_if.i2c_init(pdata);
+       if (ret)
+               return ret;
+
+       phy_data = devm_kzalloc(pdata->dev, sizeof(*phy_data), GFP_KERNEL);
+       if (!phy_data)
+               return -ENOMEM;
+       pdata->phy_data = phy_data;
+
+       reg = XP_IOREAD(pdata, XP_PROP_0);
+       phy_data->port_mode = XP_GET_BITS(reg, XP_PROP_0, PORT_MODE);
+       phy_data->port_id = XP_GET_BITS(reg, XP_PROP_0, PORT_ID);
+       phy_data->port_speeds = XP_GET_BITS(reg, XP_PROP_0, PORT_SPEEDS);
+       phy_data->conn_type = XP_GET_BITS(reg, XP_PROP_0, CONN_TYPE);
+       phy_data->mdio_addr = XP_GET_BITS(reg, XP_PROP_0, MDIO_ADDR);
+       if (netif_msg_probe(pdata)) {
+               dev_dbg(pdata->dev, "port mode=%u\n", phy_data->port_mode);
+               dev_dbg(pdata->dev, "port id=%u\n", phy_data->port_id);
+               dev_dbg(pdata->dev, "port speeds=%#x\n", phy_data->port_speeds);
+               dev_dbg(pdata->dev, "conn type=%u\n", phy_data->conn_type);
+               dev_dbg(pdata->dev, "mdio addr=%u\n", phy_data->mdio_addr);
+       }
+
+       reg = XP_IOREAD(pdata, XP_PROP_4);
+       phy_data->redrv = XP_GET_BITS(reg, XP_PROP_4, REDRV_PRESENT);
+       phy_data->redrv_if = XP_GET_BITS(reg, XP_PROP_4, REDRV_IF);
+       phy_data->redrv_addr = XP_GET_BITS(reg, XP_PROP_4, REDRV_ADDR);
+       phy_data->redrv_lane = XP_GET_BITS(reg, XP_PROP_4, REDRV_LANE);
+       phy_data->redrv_model = XP_GET_BITS(reg, XP_PROP_4, REDRV_MODEL);
+       if (phy_data->redrv && netif_msg_probe(pdata)) {
+               dev_dbg(pdata->dev, "redrv present\n");
+               dev_dbg(pdata->dev, "redrv i/f=%u\n", phy_data->redrv_if);
+               dev_dbg(pdata->dev, "redrv addr=%#x\n", phy_data->redrv_addr);
+               dev_dbg(pdata->dev, "redrv lane=%u\n", phy_data->redrv_lane);
+               dev_dbg(pdata->dev, "redrv model=%u\n", phy_data->redrv_model);
+       }
+
+       /* Validate the connection requested */
+       if (xgbe_phy_conn_type_mismatch(pdata)) {
+               dev_err(pdata->dev, "phy mode/connection mismatch (%#x/%#x)\n",
+                       phy_data->port_mode, phy_data->conn_type);
+               return -EINVAL;
+       }
+
+       /* Validate the mode requested */
+       if (xgbe_phy_port_mode_mismatch(pdata)) {
+               dev_err(pdata->dev, "phy mode/speed mismatch (%#x/%#x)\n",
+                       phy_data->port_mode, phy_data->port_speeds);
+               return -EINVAL;
+       }
+
+       /* Check for and validate MDIO reset support */
+       ret = xgbe_phy_mdio_reset_setup(pdata);
+       if (ret)
+               return ret;
+
+       /* Validate the re-driver information */
+       if (xgbe_phy_redrv_error(phy_data)) {
+               dev_err(pdata->dev, "phy re-driver settings error\n");
+               return -EINVAL;
+       }
+       pdata->kr_redrv = phy_data->redrv;
+
+       /* Indicate current mode is unknown */
+       phy_data->cur_mode = XGBE_MODE_UNKNOWN;
+
+       /* Initialize supported features */
+       pdata->phy.supported = 0;
+
+       switch (phy_data->port_mode) {
+       /* Backplane support */
+       case XGBE_PORT_MODE_BACKPLANE:
+               pdata->phy.supported |= SUPPORTED_Autoneg;
+               pdata->phy.supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause;
+               pdata->phy.supported |= SUPPORTED_Backplane;
+               if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000) {
+                       pdata->phy.supported |= SUPPORTED_1000baseKX_Full;
+                       phy_data->start_mode = XGBE_MODE_KX_1000;
+               }
+               if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10000) {
+                       pdata->phy.supported |= SUPPORTED_10000baseKR_Full;
+                       if (pdata->fec_ability & MDIO_PMA_10GBR_FECABLE_ABLE)
+                               pdata->phy.supported |=
+                                       SUPPORTED_10000baseR_FEC;
+                       phy_data->start_mode = XGBE_MODE_KR;
+               }
+
+               phy_data->phydev_mode = XGBE_MDIO_MODE_NONE;
+               break;
+       case XGBE_PORT_MODE_BACKPLANE_2500:
+               pdata->phy.supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause;
+               pdata->phy.supported |= SUPPORTED_Backplane;
+               pdata->phy.supported |= SUPPORTED_2500baseX_Full;
+               phy_data->start_mode = XGBE_MODE_KX_2500;
+
+               phy_data->phydev_mode = XGBE_MDIO_MODE_NONE;
+               break;
+
+       /* MDIO 1GBase-T support */
+       case XGBE_PORT_MODE_1000BASE_T:
+               pdata->phy.supported |= SUPPORTED_Autoneg;
+               pdata->phy.supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause;
+               pdata->phy.supported |= SUPPORTED_TP;
+               if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100) {
+                       pdata->phy.supported |= SUPPORTED_100baseT_Full;
+                       phy_data->start_mode = XGBE_MODE_SGMII_100;
+               }
+               if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000) {
+                       pdata->phy.supported |= SUPPORTED_1000baseT_Full;
+                       phy_data->start_mode = XGBE_MODE_SGMII_1000;
+               }
+
+               phy_data->phydev_mode = XGBE_MDIO_MODE_CL22;
+               break;
+
+       /* MDIO Base-X support */
+       case XGBE_PORT_MODE_1000BASE_X:
+               pdata->phy.supported |= SUPPORTED_Autoneg;
+               pdata->phy.supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause;
+               pdata->phy.supported |= SUPPORTED_FIBRE;
+               pdata->phy.supported |= SUPPORTED_1000baseT_Full;
+               phy_data->start_mode = XGBE_MODE_X;
+
+               phy_data->phydev_mode = XGBE_MDIO_MODE_CL22;
+               break;
+
+       /* MDIO NBase-T support */
+       case XGBE_PORT_MODE_NBASE_T:
+               pdata->phy.supported |= SUPPORTED_Autoneg;
+               pdata->phy.supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause;
+               pdata->phy.supported |= SUPPORTED_TP;
+               if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100) {
+                       pdata->phy.supported |= SUPPORTED_100baseT_Full;
+                       phy_data->start_mode = XGBE_MODE_SGMII_100;
+               }
+               if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000) {
+                       pdata->phy.supported |= SUPPORTED_1000baseT_Full;
+                       phy_data->start_mode = XGBE_MODE_SGMII_1000;
+               }
+               if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_2500) {
+                       pdata->phy.supported |= SUPPORTED_2500baseX_Full;
+                       phy_data->start_mode = XGBE_MODE_KX_2500;
+               }
+
+               phy_data->phydev_mode = XGBE_MDIO_MODE_CL45;
+               break;
+
+       /* 10GBase-T support */
+       case XGBE_PORT_MODE_10GBASE_T:
+               pdata->phy.supported |= SUPPORTED_Autoneg;
+               pdata->phy.supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause;
+               pdata->phy.supported |= SUPPORTED_TP;
+               if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100) {
+                       pdata->phy.supported |= SUPPORTED_100baseT_Full;
+                       phy_data->start_mode = XGBE_MODE_SGMII_100;
+               }
+               if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000) {
+                       pdata->phy.supported |= SUPPORTED_1000baseT_Full;
+                       phy_data->start_mode = XGBE_MODE_SGMII_1000;
+               }
+               if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10000) {
+                       pdata->phy.supported |= SUPPORTED_10000baseT_Full;
+                       phy_data->start_mode = XGBE_MODE_KR;
+               }
+
+               phy_data->phydev_mode = XGBE_MDIO_MODE_NONE;
+               break;
+
+       /* 10GBase-R support */
+       case XGBE_PORT_MODE_10GBASE_R:
+               pdata->phy.supported |= SUPPORTED_Autoneg;
+               pdata->phy.supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause;
+               pdata->phy.supported |= SUPPORTED_TP;
+               pdata->phy.supported |= SUPPORTED_10000baseT_Full;
+               if (pdata->fec_ability & MDIO_PMA_10GBR_FECABLE_ABLE)
+                       pdata->phy.supported |= SUPPORTED_10000baseR_FEC;
+               phy_data->start_mode = XGBE_MODE_SFI;
+
+               phy_data->phydev_mode = XGBE_MDIO_MODE_NONE;
+               break;
+
+       /* SFP support */
+       case XGBE_PORT_MODE_SFP:
+               pdata->phy.supported |= SUPPORTED_Autoneg;
+               pdata->phy.supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause;
+               pdata->phy.supported |= SUPPORTED_TP;
+               pdata->phy.supported |= SUPPORTED_FIBRE;
+               if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100) {
+                       pdata->phy.supported |= SUPPORTED_100baseT_Full;
+                       phy_data->start_mode = XGBE_MODE_SGMII_100;
+               }
+               if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000) {
+                       pdata->phy.supported |= SUPPORTED_1000baseT_Full;
+                       phy_data->start_mode = XGBE_MODE_SGMII_1000;
+               }
+               if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10000) {
+                       pdata->phy.supported |= SUPPORTED_10000baseT_Full;
+                       phy_data->start_mode = XGBE_MODE_SFI;
+                       if (pdata->fec_ability & MDIO_PMA_10GBR_FECABLE_ABLE)
+                               pdata->phy.supported |=
+                                       SUPPORTED_10000baseR_FEC;
+               }
+
+               phy_data->phydev_mode = XGBE_MDIO_MODE_CL22;
+
+               xgbe_phy_sfp_setup(pdata);
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       if (netif_msg_probe(pdata))
+               dev_dbg(pdata->dev, "phy supported=%#x\n",
+                       pdata->phy.supported);
+
+       if ((phy_data->conn_type & XGBE_CONN_TYPE_MDIO) &&
+           (phy_data->phydev_mode != XGBE_MDIO_MODE_NONE)) {
+               ret = pdata->hw_if.set_ext_mii_mode(pdata, phy_data->mdio_addr,
+                                                   phy_data->phydev_mode);
+               if (ret) {
+                       dev_err(pdata->dev,
+                               "mdio port/clause not compatible (%d/%u)\n",
+                               phy_data->mdio_addr, phy_data->phydev_mode);
+                       return -EINVAL;
+               }
+       }
+
+       if (phy_data->redrv && !phy_data->redrv_if) {
+               ret = pdata->hw_if.set_ext_mii_mode(pdata, phy_data->redrv_addr,
+                                                   XGBE_MDIO_MODE_CL22);
+               if (ret) {
+                       dev_err(pdata->dev,
+                               "redriver mdio port not compatible (%u)\n",
+                               phy_data->redrv_addr);
+                       return -EINVAL;
+               }
+       }
+
+       /* Register for driving external PHYs */
+       mii = devm_mdiobus_alloc(pdata->dev);
+       if (!mii) {
+               dev_err(pdata->dev, "mdiobus_alloc failed\n");
+               return -ENOMEM;
+       }
+
+       mii->priv = pdata;
+       mii->name = "amd-xgbe-mii";
+       mii->read = xgbe_phy_mii_read;
+       mii->write = xgbe_phy_mii_write;
+       mii->parent = pdata->dev;
+       mii->phy_mask = ~0;
+       snprintf(mii->id, sizeof(mii->id), "%s", dev_name(pdata->dev));
+       ret = mdiobus_register(mii);
+       if (ret) {
+               dev_err(pdata->dev, "mdiobus_register failed\n");
+               return ret;
+       }
+       phy_data->mii = mii;
+
+       return 0;
+}
+
+void xgbe_init_function_ptrs_phy_v2(struct xgbe_phy_if *phy_if)
+{
+       struct xgbe_phy_impl_if *phy_impl = &phy_if->phy_impl;
+
+       phy_impl->init                  = xgbe_phy_init;
+       phy_impl->exit                  = xgbe_phy_exit;
+
+       phy_impl->reset                 = xgbe_phy_reset;
+       phy_impl->start                 = xgbe_phy_start;
+       phy_impl->stop                  = xgbe_phy_stop;
+
+       phy_impl->link_status           = xgbe_phy_link_status;
+
+       phy_impl->valid_speed           = xgbe_phy_valid_speed;
+
+       phy_impl->use_mode              = xgbe_phy_use_mode;
+       phy_impl->set_mode              = xgbe_phy_set_mode;
+       phy_impl->get_mode              = xgbe_phy_get_mode;
+       phy_impl->switch_mode           = xgbe_phy_switch_mode;
+       phy_impl->cur_mode              = xgbe_phy_cur_mode;
+
+       phy_impl->an_mode               = xgbe_phy_an_mode;
+
+       phy_impl->an_config             = xgbe_phy_an_config;
+
+       phy_impl->an_advertising        = xgbe_phy_an_advertising;
+
+       phy_impl->an_outcome            = xgbe_phy_an_outcome;
+}
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-platform.c b/drivers/net/ethernet/amd/xgbe/xgbe-platform.c
new file mode 100644 (file)
index 0000000..8c530dc
--- /dev/null
@@ -0,0 +1,642 @@
+/*
+ * AMD 10Gb Ethernet driver
+ *
+ * This file is available to you under your choice of the following two
+ * licenses:
+ *
+ * License 1: GPLv2
+ *
+ * Copyright (c) 2014-2016 Advanced Micro Devices, Inc.
+ *
+ * This file is free software; you may copy, redistribute and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or (at
+ * your option) any later version.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * This file incorporates work covered by the following copyright and
+ * permission notice:
+ *     The Synopsys DWC ETHER XGMAC Software Driver and documentation
+ *     (hereinafter "Software") is an unsupported proprietary work of Synopsys,
+ *     Inc. unless otherwise expressly agreed to in writing between Synopsys
+ *     and you.
+ *
+ *     The Software IS NOT an item of Licensed Software or Licensed Product
+ *     under any End User Software License Agreement or Agreement for Licensed
+ *     Product with Synopsys or any supplement thereto.  Permission is hereby
+ *     granted, free of charge, to any person obtaining a copy of this software
+ *     annotated with this license and the Software, to deal in the Software
+ *     without restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ *     of the Software, and to permit persons to whom the Software is furnished
+ *     to do so, subject to the following conditions:
+ *
+ *     The above copyright notice and this permission notice shall be included
+ *     in all copies or substantial portions of the Software.
+ *
+ *     THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
+ *     BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ *     TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ *     PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
+ *     BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ *     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ *     THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ * License 2: Modified BSD
+ *
+ * Copyright (c) 2014-2016 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Advanced Micro Devices, Inc. nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * This file incorporates work covered by the following copyright and
+ * permission notice:
+ *     The Synopsys DWC ETHER XGMAC Software Driver and documentation
+ *     (hereinafter "Software") is an unsupported proprietary work of Synopsys,
+ *     Inc. unless otherwise expressly agreed to in writing between Synopsys
+ *     and you.
+ *
+ *     The Software IS NOT an item of Licensed Software or Licensed Product
+ *     under any End User Software License Agreement or Agreement for Licensed
+ *     Product with Synopsys or any supplement thereto.  Permission is hereby
+ *     granted, free of charge, to any person obtaining a copy of this software
+ *     annotated with this license and the Software, to deal in the Software
+ *     without restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ *     of the Software, and to permit persons to whom the Software is furnished
+ *     to do so, subject to the following conditions:
+ *
+ *     The above copyright notice and this permission notice shall be included
+ *     in all copies or substantial portions of the Software.
+ *
+ *     THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
+ *     BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ *     TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ *     PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
+ *     BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ *     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ *     THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/platform_device.h>
+#include <linux/spinlock.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_net.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+#include <linux/of_device.h>
+#include <linux/clk.h>
+#include <linux/property.h>
+#include <linux/acpi.h>
+#include <linux/mdio.h>
+
+#include "xgbe.h"
+#include "xgbe-common.h"
+
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id xgbe_acpi_match[];
+
+static struct xgbe_version_data *xgbe_acpi_vdata(struct xgbe_prv_data *pdata)
+{
+       const struct acpi_device_id *id;
+
+       id = acpi_match_device(xgbe_acpi_match, pdata->dev);
+
+       return id ? (struct xgbe_version_data *)id->driver_data : NULL;
+}
+
+static int xgbe_acpi_support(struct xgbe_prv_data *pdata)
+{
+       struct device *dev = pdata->dev;
+       u32 property;
+       int ret;
+
+       /* Obtain the system clock setting */
+       ret = device_property_read_u32(dev, XGBE_ACPI_DMA_FREQ, &property);
+       if (ret) {
+               dev_err(dev, "unable to obtain %s property\n",
+                       XGBE_ACPI_DMA_FREQ);
+               return ret;
+       }
+       pdata->sysclk_rate = property;
+
+       /* Obtain the PTP clock setting */
+       ret = device_property_read_u32(dev, XGBE_ACPI_PTP_FREQ, &property);
+       if (ret) {
+               dev_err(dev, "unable to obtain %s property\n",
+                       XGBE_ACPI_PTP_FREQ);
+               return ret;
+       }
+       pdata->ptpclk_rate = property;
+
+       return 0;
+}
+#else   /* CONFIG_ACPI */
+static struct xgbe_version_data *xgbe_acpi_vdata(struct xgbe_prv_data *pdata)
+{
+       return NULL;
+}
+
+static int xgbe_acpi_support(struct xgbe_prv_data *pdata)
+{
+       return -EINVAL;
+}
+#endif  /* CONFIG_ACPI */
+
+#ifdef CONFIG_OF
+static const struct of_device_id xgbe_of_match[];
+
+static struct xgbe_version_data *xgbe_of_vdata(struct xgbe_prv_data *pdata)
+{
+       const struct of_device_id *id;
+
+       id = of_match_device(xgbe_of_match, pdata->dev);
+
+       return id ? (struct xgbe_version_data *)id->data : NULL;
+}
+
+static int xgbe_of_support(struct xgbe_prv_data *pdata)
+{
+       struct device *dev = pdata->dev;
+
+       /* Obtain the system clock setting */
+       pdata->sysclk = devm_clk_get(dev, XGBE_DMA_CLOCK);
+       if (IS_ERR(pdata->sysclk)) {
+               dev_err(dev, "dma devm_clk_get failed\n");
+               return PTR_ERR(pdata->sysclk);
+       }
+       pdata->sysclk_rate = clk_get_rate(pdata->sysclk);
+
+       /* Obtain the PTP clock setting */
+       pdata->ptpclk = devm_clk_get(dev, XGBE_PTP_CLOCK);
+       if (IS_ERR(pdata->ptpclk)) {
+               dev_err(dev, "ptp devm_clk_get failed\n");
+               return PTR_ERR(pdata->ptpclk);
+       }
+       pdata->ptpclk_rate = clk_get_rate(pdata->ptpclk);
+
+       return 0;
+}
+
+static struct platform_device *xgbe_of_get_phy_pdev(struct xgbe_prv_data *pdata)
+{
+       struct device *dev = pdata->dev;
+       struct device_node *phy_node;
+       struct platform_device *phy_pdev;
+
+       phy_node = of_parse_phandle(dev->of_node, "phy-handle", 0);
+       if (phy_node) {
+               /* Old style device tree:
+                *   The XGBE and PHY resources are separate
+                */
+               phy_pdev = of_find_device_by_node(phy_node);
+               of_node_put(phy_node);
+       } else {
+               /* New style device tree:
+                *   The XGBE and PHY resources are grouped together with
+                *   the PHY resources listed last
+                */
+               get_device(dev);
+               phy_pdev = pdata->platdev;
+       }
+
+       return phy_pdev;
+}
+#else   /* CONFIG_OF */
+static struct xgbe_version_data *xgbe_of_vdata(struct xgbe_prv_data *pdata)
+{
+       return NULL;
+}
+
+static int xgbe_of_support(struct xgbe_prv_data *pdata)
+{
+       return -EINVAL;
+}
+
+static struct platform_device *xgbe_of_get_phy_pdev(struct xgbe_prv_data *pdata)
+{
+       return NULL;
+}
+#endif  /* CONFIG_OF */
+
+static unsigned int xgbe_resource_count(struct platform_device *pdev,
+                                       unsigned int type)
+{
+       unsigned int count;
+       int i;
+
+       for (i = 0, count = 0; i < pdev->num_resources; i++) {
+               struct resource *res = &pdev->resource[i];
+
+               if (type == resource_type(res))
+                       count++;
+       }
+
+       return count;
+}
+
+static struct platform_device *xgbe_get_phy_pdev(struct xgbe_prv_data *pdata)
+{
+       struct platform_device *phy_pdev;
+
+       if (pdata->use_acpi) {
+               get_device(pdata->dev);
+               phy_pdev = pdata->platdev;
+       } else {
+               phy_pdev = xgbe_of_get_phy_pdev(pdata);
+       }
+
+       return phy_pdev;
+}
+
+static struct xgbe_version_data *xgbe_get_vdata(struct xgbe_prv_data *pdata)
+{
+       return pdata->use_acpi ? xgbe_acpi_vdata(pdata)
+                              : xgbe_of_vdata(pdata);
+}
+
+static int xgbe_platform_probe(struct platform_device *pdev)
+{
+       struct xgbe_prv_data *pdata;
+       struct device *dev = &pdev->dev;
+       struct platform_device *phy_pdev;
+       struct resource *res;
+       const char *phy_mode;
+       unsigned int phy_memnum, phy_irqnum;
+       unsigned int dma_irqnum, dma_irqend;
+       enum dev_dma_attr attr;
+       int ret;
+
+       pdata = xgbe_alloc_pdata(dev);
+       if (IS_ERR(pdata)) {
+               ret = PTR_ERR(pdata);
+               goto err_alloc;
+       }
+
+       pdata->platdev = pdev;
+       pdata->adev = ACPI_COMPANION(dev);
+       platform_set_drvdata(pdev, pdata);
+
+       /* Check if we should use ACPI or DT */
+       pdata->use_acpi = dev->of_node ? 0 : 1;
+
+       /* Get the version data */
+       pdata->vdata = xgbe_get_vdata(pdata);
+
+       phy_pdev = xgbe_get_phy_pdev(pdata);
+       if (!phy_pdev) {
+               dev_err(dev, "unable to obtain phy device\n");
+               ret = -EINVAL;
+               goto err_phydev;
+       }
+       pdata->phy_platdev = phy_pdev;
+       pdata->phy_dev = &phy_pdev->dev;
+
+       if (pdev == phy_pdev) {
+               /* New style device tree or ACPI:
+                *   The XGBE and PHY resources are grouped together with
+                *   the PHY resources listed last
+                */
+               phy_memnum = xgbe_resource_count(pdev, IORESOURCE_MEM) - 3;
+               phy_irqnum = xgbe_resource_count(pdev, IORESOURCE_IRQ) - 1;
+               dma_irqnum = 1;
+               dma_irqend = phy_irqnum;
+       } else {
+               /* Old style device tree:
+                *   The XGBE and PHY resources are separate
+                */
+               phy_memnum = 0;
+               phy_irqnum = 0;
+               dma_irqnum = 1;
+               dma_irqend = xgbe_resource_count(pdev, IORESOURCE_IRQ);
+       }
+
+       /* Obtain the mmio areas for the device */
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       pdata->xgmac_regs = devm_ioremap_resource(dev, res);
+       if (IS_ERR(pdata->xgmac_regs)) {
+               dev_err(dev, "xgmac ioremap failed\n");
+               ret = PTR_ERR(pdata->xgmac_regs);
+               goto err_io;
+       }
+       if (netif_msg_probe(pdata))
+               dev_dbg(dev, "xgmac_regs = %p\n", pdata->xgmac_regs);
+
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+       pdata->xpcs_regs = devm_ioremap_resource(dev, res);
+       if (IS_ERR(pdata->xpcs_regs)) {
+               dev_err(dev, "xpcs ioremap failed\n");
+               ret = PTR_ERR(pdata->xpcs_regs);
+               goto err_io;
+       }
+       if (netif_msg_probe(pdata))
+               dev_dbg(dev, "xpcs_regs  = %p\n", pdata->xpcs_regs);
+
+       res = platform_get_resource(phy_pdev, IORESOURCE_MEM, phy_memnum++);
+       pdata->rxtx_regs = devm_ioremap_resource(dev, res);
+       if (IS_ERR(pdata->rxtx_regs)) {
+               dev_err(dev, "rxtx ioremap failed\n");
+               ret = PTR_ERR(pdata->rxtx_regs);
+               goto err_io;
+       }
+       if (netif_msg_probe(pdata))
+               dev_dbg(dev, "rxtx_regs  = %p\n", pdata->rxtx_regs);
+
+       res = platform_get_resource(phy_pdev, IORESOURCE_MEM, phy_memnum++);
+       pdata->sir0_regs = devm_ioremap_resource(dev, res);
+       if (IS_ERR(pdata->sir0_regs)) {
+               dev_err(dev, "sir0 ioremap failed\n");
+               ret = PTR_ERR(pdata->sir0_regs);
+               goto err_io;
+       }
+       if (netif_msg_probe(pdata))
+               dev_dbg(dev, "sir0_regs  = %p\n", pdata->sir0_regs);
+
+       res = platform_get_resource(phy_pdev, IORESOURCE_MEM, phy_memnum++);
+       pdata->sir1_regs = devm_ioremap_resource(dev, res);
+       if (IS_ERR(pdata->sir1_regs)) {
+               dev_err(dev, "sir1 ioremap failed\n");
+               ret = PTR_ERR(pdata->sir1_regs);
+               goto err_io;
+       }
+       if (netif_msg_probe(pdata))
+               dev_dbg(dev, "sir1_regs  = %p\n", pdata->sir1_regs);
+
+       /* Retrieve the MAC address */
+       ret = device_property_read_u8_array(dev, XGBE_MAC_ADDR_PROPERTY,
+                                           pdata->mac_addr,
+                                           sizeof(pdata->mac_addr));
+       if (ret || !is_valid_ether_addr(pdata->mac_addr)) {
+               dev_err(dev, "invalid %s property\n", XGBE_MAC_ADDR_PROPERTY);
+               if (!ret)
+                       ret = -EINVAL;
+               goto err_io;
+       }
+
+       /* Retrieve the PHY mode - it must be "xgmii" */
+       ret = device_property_read_string(dev, XGBE_PHY_MODE_PROPERTY,
+                                         &phy_mode);
+       if (ret || strcmp(phy_mode, phy_modes(PHY_INTERFACE_MODE_XGMII))) {
+               dev_err(dev, "invalid %s property\n", XGBE_PHY_MODE_PROPERTY);
+               if (!ret)
+                       ret = -EINVAL;
+               goto err_io;
+       }
+       pdata->phy_mode = PHY_INTERFACE_MODE_XGMII;
+
+       /* Check for per channel interrupt support */
+       if (device_property_present(dev, XGBE_DMA_IRQS_PROPERTY)) {
+               pdata->per_channel_irq = 1;
+               pdata->channel_irq_mode = XGBE_IRQ_MODE_EDGE;
+       }
+
+       /* Obtain device settings unique to ACPI/OF */
+       if (pdata->use_acpi)
+               ret = xgbe_acpi_support(pdata);
+       else
+               ret = xgbe_of_support(pdata);
+       if (ret)
+               goto err_io;
+
+       /* Set the DMA coherency values */
+       attr = device_get_dma_attr(dev);
+       if (attr == DEV_DMA_NOT_SUPPORTED) {
+               dev_err(dev, "DMA is not supported");
+               ret = -ENODEV;
+               goto err_io;
+       }
+       pdata->coherent = (attr == DEV_DMA_COHERENT);
+       if (pdata->coherent) {
+               pdata->axdomain = XGBE_DMA_OS_AXDOMAIN;
+               pdata->arcache = XGBE_DMA_OS_ARCACHE;
+               pdata->awcache = XGBE_DMA_OS_AWCACHE;
+       } else {
+               pdata->axdomain = XGBE_DMA_SYS_AXDOMAIN;
+               pdata->arcache = XGBE_DMA_SYS_ARCACHE;
+               pdata->awcache = XGBE_DMA_SYS_AWCACHE;
+       }
+
+       /* Set the maximum fifo amounts */
+       pdata->tx_max_fifo_size = pdata->vdata->tx_max_fifo_size;
+       pdata->rx_max_fifo_size = pdata->vdata->rx_max_fifo_size;
+
+       /* Set the hardware channel and queue counts */
+       xgbe_set_counts(pdata);
+
+       /* Always have XGMAC and XPCS (auto-negotiation) interrupts */
+       pdata->irq_count = 2;
+
+       /* Get the device interrupt */
+       ret = platform_get_irq(pdev, 0);
+       if (ret < 0) {
+               dev_err(dev, "platform_get_irq 0 failed\n");
+               goto err_io;
+       }
+       pdata->dev_irq = ret;
+
+       /* Get the per channel DMA interrupts */
+       if (pdata->per_channel_irq) {
+               unsigned int i, max = ARRAY_SIZE(pdata->channel_irq);
+
+               for (i = 0; (i < max) && (dma_irqnum < dma_irqend); i++) {
+                       ret = platform_get_irq(pdata->platdev, dma_irqnum++);
+                       if (ret < 0) {
+                               netdev_err(pdata->netdev,
+                                          "platform_get_irq %u failed\n",
+                                          dma_irqnum - 1);
+                               goto err_io;
+                       }
+
+                       pdata->channel_irq[i] = ret;
+               }
+
+               pdata->channel_irq_count = max;
+
+               pdata->irq_count += max;
+       }
+
+       /* Get the auto-negotiation interrupt */
+       ret = platform_get_irq(phy_pdev, phy_irqnum++);
+       if (ret < 0) {
+               dev_err(dev, "platform_get_irq phy 0 failed\n");
+               goto err_io;
+       }
+       pdata->an_irq = ret;
+
+       /* Configure the netdev resource */
+       ret = xgbe_config_netdev(pdata);
+       if (ret)
+               goto err_io;
+
+       netdev_notice(pdata->netdev, "net device enabled\n");
+
+       return 0;
+
+err_io:
+       platform_device_put(phy_pdev);
+
+err_phydev:
+       xgbe_free_pdata(pdata);
+
+err_alloc:
+       dev_notice(dev, "net device not enabled\n");
+
+       return ret;
+}
+
+static int xgbe_platform_remove(struct platform_device *pdev)
+{
+       struct xgbe_prv_data *pdata = platform_get_drvdata(pdev);
+
+       xgbe_deconfig_netdev(pdata);
+
+       platform_device_put(pdata->phy_platdev);
+
+       xgbe_free_pdata(pdata);
+
+       return 0;
+}
+
+#ifdef CONFIG_PM
+static int xgbe_platform_suspend(struct device *dev)
+{
+       struct xgbe_prv_data *pdata = dev_get_drvdata(dev);
+       struct net_device *netdev = pdata->netdev;
+       int ret = 0;
+
+       DBGPR("-->xgbe_suspend\n");
+
+       if (netif_running(netdev))
+               ret = xgbe_powerdown(netdev, XGMAC_DRIVER_CONTEXT);
+
+       pdata->lpm_ctrl = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1);
+       pdata->lpm_ctrl |= MDIO_CTRL1_LPOWER;
+       XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, pdata->lpm_ctrl);
+
+       DBGPR("<--xgbe_suspend\n");
+
+       return ret;
+}
+
+static int xgbe_platform_resume(struct device *dev)
+{
+       struct xgbe_prv_data *pdata = dev_get_drvdata(dev);
+       struct net_device *netdev = pdata->netdev;
+       int ret = 0;
+
+       DBGPR("-->xgbe_resume\n");
+
+       pdata->lpm_ctrl &= ~MDIO_CTRL1_LPOWER;
+       XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, pdata->lpm_ctrl);
+
+       if (netif_running(netdev)) {
+               ret = xgbe_powerup(netdev, XGMAC_DRIVER_CONTEXT);
+
+               /* Schedule a restart in case the link or phy state changed
+                * while we were powered down.
+                */
+               schedule_work(&pdata->restart_work);
+       }
+
+       DBGPR("<--xgbe_resume\n");
+
+       return ret;
+}
+#endif /* CONFIG_PM */
+
+static const struct xgbe_version_data xgbe_v1 = {
+       .init_function_ptrs_phy_impl    = xgbe_init_function_ptrs_phy_v1,
+       .xpcs_access                    = XGBE_XPCS_ACCESS_V1,
+       .tx_max_fifo_size               = 81920,
+       .rx_max_fifo_size               = 81920,
+       .tx_tstamp_workaround           = 1,
+};
+
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id xgbe_acpi_match[] = {
+       { .id = "AMDI8001",
+         .driver_data = (kernel_ulong_t)&xgbe_v1 },
+       {},
+};
+
+MODULE_DEVICE_TABLE(acpi, xgbe_acpi_match);
+#endif
+
+#ifdef CONFIG_OF
+static const struct of_device_id xgbe_of_match[] = {
+       { .compatible = "amd,xgbe-seattle-v1a",
+         .data = &xgbe_v1 },
+       {},
+};
+
+MODULE_DEVICE_TABLE(of, xgbe_of_match);
+#endif
+
+static SIMPLE_DEV_PM_OPS(xgbe_platform_pm_ops,
+                        xgbe_platform_suspend, xgbe_platform_resume);
+
+static struct platform_driver xgbe_driver = {
+       .driver = {
+               .name = XGBE_DRV_NAME,
+#ifdef CONFIG_ACPI
+               .acpi_match_table = xgbe_acpi_match,
+#endif
+#ifdef CONFIG_OF
+               .of_match_table = xgbe_of_match,
+#endif
+               .pm = &xgbe_platform_pm_ops,
+       },
+       .probe = xgbe_platform_probe,
+       .remove = xgbe_platform_remove,
+};
+
+int xgbe_platform_init(void)
+{
+       return platform_driver_register(&xgbe_driver);
+}
+
+void xgbe_platform_exit(void)
+{
+       platform_driver_unregister(&xgbe_driver);
+}
index 5dd17dcea2f847a83e03cbc220c975e351fcea21..f52a9bd05baca559d7afd6e1184010d103e1c08a 100644 (file)
 #include <linux/timecounter.h>
 #include <linux/net_tstamp.h>
 #include <net/dcbnl.h>
+#include <linux/completion.h>
 
 #define XGBE_DRV_NAME          "amd-xgbe"
-#define XGBE_DRV_VERSION       "1.0.2"
+#define XGBE_DRV_VERSION       "1.0.3"
 #define XGBE_DRV_DESC          "AMD 10 Gigabit Ethernet Driver"
 
 /* Descriptor related defines */
 
 #define XGBE_MAX_DMA_CHANNELS  16
 #define XGBE_MAX_QUEUES                16
-#define XGBE_DMA_STOP_TIMEOUT  5
+#define XGBE_PRIORITY_QUEUES   8
+#define XGBE_DMA_STOP_TIMEOUT  1
 
 /* DMA cache settings - Outer sharable, write-back, write-allocate */
 #define XGBE_DMA_OS_AXDOMAIN   0x2
 #define XGBE_DMA_SYS_ARCACHE   0x0
 #define XGBE_DMA_SYS_AWCACHE   0x0
 
+/* DMA channel interrupt modes */
+#define XGBE_IRQ_MODE_EDGE     0
+#define XGBE_IRQ_MODE_LEVEL    1
+
 #define XGBE_DMA_INTERRUPT_MASK        0x31c7
 
 #define XGMAC_MIN_PACKET       60
 #define XGMAC_MAX_STD_PACKET   1518
 #define XGMAC_JUMBO_PACKET_MTU 9000
 #define XGMAC_MAX_JUMBO_PACKET 9018
+#define XGMAC_ETH_PREAMBLE     (12 + 8)        /* Inter-frame gap + preamble */
+
+#define XGMAC_PFC_DATA_LEN     46
+#define XGMAC_PFC_DELAYS       14000
+
+#define XGMAC_PRIO_QUEUES(_cnt)                                        \
+       min_t(unsigned int, IEEE_8021QAZ_MAX_TCS, (_cnt))
 
 /* Common property names */
 #define XGBE_MAC_ADDR_PROPERTY "mac-address"
 #define XGBE_PHY_MODE_PROPERTY "phy-mode"
 #define XGBE_DMA_IRQS_PROPERTY "amd,per-channel-interrupt"
 #define XGBE_SPEEDSET_PROPERTY "amd,speed-set"
-#define XGBE_BLWC_PROPERTY     "amd,serdes-blwc"
-#define XGBE_CDR_RATE_PROPERTY "amd,serdes-cdr-rate"
-#define XGBE_PQ_SKEW_PROPERTY  "amd,serdes-pq-skew"
-#define XGBE_TX_AMP_PROPERTY   "amd,serdes-tx-amp"
-#define XGBE_DFE_CFG_PROPERTY  "amd,serdes-dfe-tap-config"
-#define XGBE_DFE_ENA_PROPERTY  "amd,serdes-dfe-tap-enable"
 
 /* Device-tree clock names */
 #define XGBE_DMA_CLOCK         "dma_clk"
 #define XGBE_ACPI_DMA_FREQ     "amd,dma-freq"
 #define XGBE_ACPI_PTP_FREQ     "amd,ptp-freq"
 
+/* PCI BAR mapping */
+#define XGBE_XGMAC_BAR         0
+#define XGBE_XPCS_BAR          1
+#define XGBE_MAC_PROP_OFFSET   0x1d000
+#define XGBE_I2C_CTRL_OFFSET   0x1e000
+
+/* PCI MSIx support */
+#define XGBE_MSIX_BASE_COUNT   4
+#define XGBE_MSIX_MIN_COUNT    (XGBE_MSIX_BASE_COUNT + 1)
+
+/* PCI clock frequencies */
+#define XGBE_V2_DMA_CLOCK_FREQ 500000000       /* 500 MHz */
+#define XGBE_V2_PTP_CLOCK_FREQ 125000000       /* 125 MHz */
+
 /* Timestamp support - values based on 50MHz PTP clock
  *   50MHz => 20 nsec
  */
 #define XGMAC_DRIVER_CONTEXT   1
 #define XGMAC_IOCTL_CONTEXT    2
 
-#define XGBE_FIFO_MAX          81920
+#define XGMAC_FIFO_MIN_ALLOC   2048
+#define XGMAC_FIFO_UNIT                256
+#define XGMAC_FIFO_ALIGN(_x)                           \
+       (((_x) + XGMAC_FIFO_UNIT - 1) & ~(XGMAC_FIFO_UNIT - 1))
+#define XGMAC_FIFO_FC_OFF      2048
+#define XGMAC_FIFO_FC_MIN      4096
 
 #define XGBE_TC_MIN_QUANTUM    10
 
 /* Flow control queue count */
 #define XGMAC_MAX_FLOW_CONTROL_QUEUES  8
 
+/* Flow control threshold units */
+#define XGMAC_FLOW_CONTROL_UNIT                512
+#define XGMAC_FLOW_CONTROL_ALIGN(_x)                           \
+       (((_x) + XGMAC_FLOW_CONTROL_UNIT - 1) & ~(XGMAC_FLOW_CONTROL_UNIT - 1))
+#define XGMAC_FLOW_CONTROL_VALUE(_x)                           \
+       (((_x) < 1024) ? 0 : ((_x) / XGMAC_FLOW_CONTROL_UNIT) - 2)
+#define XGMAC_FLOW_CONTROL_MAX         33280
+
 /* Maximum MAC address hash table size (256 bits = 8 bytes) */
 #define XGBE_MAC_HASH_TABLE_SIZE       8
 
 
 /* Auto-negotiation */
 #define XGBE_AN_MS_TIMEOUT             500
-#define XGBE_LINK_TIMEOUT              10
-
-#define XGBE_AN_INT_CMPLT              0x01
-#define XGBE_AN_INC_LINK               0x02
-#define XGBE_AN_PG_RCV                 0x04
-#define XGBE_AN_INT_MASK               0x07
-
-/* Rate-change complete wait/retry count */
-#define XGBE_RATECHANGE_COUNT          500
-
-/* Default SerDes settings */
-#define XGBE_SPEED_10000_BLWC          0
-#define XGBE_SPEED_10000_CDR           0x7
-#define XGBE_SPEED_10000_PLL           0x1
-#define XGBE_SPEED_10000_PQ            0x12
-#define XGBE_SPEED_10000_RATE          0x0
-#define XGBE_SPEED_10000_TXAMP         0xa
-#define XGBE_SPEED_10000_WORD          0x7
-#define XGBE_SPEED_10000_DFE_TAP_CONFIG        0x1
-#define XGBE_SPEED_10000_DFE_TAP_ENABLE        0x7f
-
-#define XGBE_SPEED_2500_BLWC           1
-#define XGBE_SPEED_2500_CDR            0x2
-#define XGBE_SPEED_2500_PLL            0x0
-#define XGBE_SPEED_2500_PQ             0xa
-#define XGBE_SPEED_2500_RATE           0x1
-#define XGBE_SPEED_2500_TXAMP          0xf
-#define XGBE_SPEED_2500_WORD           0x1
-#define XGBE_SPEED_2500_DFE_TAP_CONFIG 0x3
-#define XGBE_SPEED_2500_DFE_TAP_ENABLE 0x0
-
-#define XGBE_SPEED_1000_BLWC           1
-#define XGBE_SPEED_1000_CDR            0x2
-#define XGBE_SPEED_1000_PLL            0x0
-#define XGBE_SPEED_1000_PQ             0xa
-#define XGBE_SPEED_1000_RATE           0x3
-#define XGBE_SPEED_1000_TXAMP          0xf
-#define XGBE_SPEED_1000_WORD           0x1
-#define XGBE_SPEED_1000_DFE_TAP_CONFIG 0x3
-#define XGBE_SPEED_1000_DFE_TAP_ENABLE 0x0
+#define XGBE_LINK_TIMEOUT              5
+
+#define XGBE_SGMII_AN_LINK_STATUS      BIT(1)
+#define XGBE_SGMII_AN_LINK_SPEED       (BIT(2) | BIT(3))
+#define XGBE_SGMII_AN_LINK_SPEED_100   0x04
+#define XGBE_SGMII_AN_LINK_SPEED_1000  0x08
+#define XGBE_SGMII_AN_LINK_DUPLEX      BIT(4)
+
+/* ECC correctable error notification window (seconds) */
+#define XGBE_ECC_LIMIT                 60
+
+/* MDIO port types */
+#define XGMAC_MAX_C22_PORT             3
 
 struct xgbe_prv_data;
 
@@ -461,6 +468,7 @@ enum xgbe_state {
        XGBE_DOWN,
        XGBE_LINK_INIT,
        XGBE_LINK_ERR,
+       XGBE_STOPPED,
 };
 
 enum xgbe_int {
@@ -480,6 +488,12 @@ enum xgbe_int_state {
        XGMAC_INT_STATE_RESTORE,
 };
 
+enum xgbe_ecc_sec {
+       XGBE_ECC_SEC_TX,
+       XGBE_ECC_SEC_RX,
+       XGBE_ECC_SEC_DESC,
+};
+
 enum xgbe_speed {
        XGBE_SPEED_1000 = 0,
        XGBE_SPEED_2500,
@@ -487,6 +501,19 @@ enum xgbe_speed {
        XGBE_SPEEDS,
 };
 
+enum xgbe_xpcs_access {
+       XGBE_XPCS_ACCESS_V1 = 0,
+       XGBE_XPCS_ACCESS_V2,
+};
+
+enum xgbe_an_mode {
+       XGBE_AN_MODE_CL73 = 0,
+       XGBE_AN_MODE_CL73_REDRV,
+       XGBE_AN_MODE_CL37,
+       XGBE_AN_MODE_CL37_SGMII,
+       XGBE_AN_MODE_NONE,
+};
+
 enum xgbe_an {
        XGBE_AN_READY = 0,
        XGBE_AN_PAGE_RECEIVED,
@@ -504,8 +531,14 @@ enum xgbe_rx {
 };
 
 enum xgbe_mode {
-       XGBE_MODE_KR = 0,
-       XGBE_MODE_KX,
+       XGBE_MODE_KX_1000 = 0,
+       XGBE_MODE_KX_2500,
+       XGBE_MODE_KR,
+       XGBE_MODE_X,
+       XGBE_MODE_SGMII_100,
+       XGBE_MODE_SGMII_1000,
+       XGBE_MODE_SFI,
+       XGBE_MODE_UNKNOWN,
 };
 
 enum xgbe_speedset {
@@ -513,6 +546,12 @@ enum xgbe_speedset {
        XGBE_SPEEDSET_2500_10000,
 };
 
+enum xgbe_mdio_mode {
+       XGBE_MDIO_MODE_NONE = 0,
+       XGBE_MDIO_MODE_CL22,
+       XGBE_MDIO_MODE_CL45,
+};
+
 struct xgbe_phy {
        u32 supported;
        u32 advertising;
@@ -531,6 +570,43 @@ struct xgbe_phy {
        int rx_pause;
 };
 
+enum xgbe_i2c_cmd {
+       XGBE_I2C_CMD_READ = 0,
+       XGBE_I2C_CMD_WRITE,
+};
+
+struct xgbe_i2c_op {
+       enum xgbe_i2c_cmd cmd;
+
+       unsigned int target;
+
+       void *buf;
+       unsigned int len;
+};
+
+struct xgbe_i2c_op_state {
+       struct xgbe_i2c_op *op;
+
+       unsigned int tx_len;
+       unsigned char *tx_buf;
+
+       unsigned int rx_len;
+       unsigned char *rx_buf;
+
+       unsigned int tx_abort_source;
+
+       int ret;
+};
+
+struct xgbe_i2c {
+       unsigned int started;
+       unsigned int max_speed_mode;
+       unsigned int rx_fifo_size;
+       unsigned int tx_fifo_size;
+
+       struct xgbe_i2c_op_state op_state;
+};
+
 struct xgbe_mmc_stats {
        /* Tx Stats */
        u64 txoctetcount_gb;
@@ -601,9 +677,15 @@ struct xgbe_hw_if {
 
        int (*read_mmd_regs)(struct xgbe_prv_data *, int, int);
        void (*write_mmd_regs)(struct xgbe_prv_data *, int, int, int);
-       int (*set_gmii_speed)(struct xgbe_prv_data *);
-       int (*set_gmii_2500_speed)(struct xgbe_prv_data *);
-       int (*set_xgmii_speed)(struct xgbe_prv_data *);
+       int (*set_speed)(struct xgbe_prv_data *, int);
+
+       int (*set_ext_mii_mode)(struct xgbe_prv_data *, unsigned int,
+                               enum xgbe_mdio_mode);
+       int (*read_ext_mii_regs)(struct xgbe_prv_data *, int, int);
+       int (*write_ext_mii_regs)(struct xgbe_prv_data *, int, int, u16);
+
+       int (*set_gpio)(struct xgbe_prv_data *, unsigned int);
+       int (*clr_gpio)(struct xgbe_prv_data *, unsigned int);
 
        void (*enable_tx)(struct xgbe_prv_data *);
        void (*disable_tx)(struct xgbe_prv_data *);
@@ -682,11 +764,65 @@ struct xgbe_hw_if {
        int (*disable_rss)(struct xgbe_prv_data *);
        int (*set_rss_hash_key)(struct xgbe_prv_data *, const u8 *);
        int (*set_rss_lookup_table)(struct xgbe_prv_data *, const u32 *);
+
+       /* For ECC */
+       void (*disable_ecc_ded)(struct xgbe_prv_data *);
+       void (*disable_ecc_sec)(struct xgbe_prv_data *, enum xgbe_ecc_sec);
+};
+
+/* This structure represents implementation specific routines for an
+ * implementation of a PHY. All routines are required unless noted below.
+ *   Optional routines:
+ *     kr_training_pre, kr_training_post
+ */
+struct xgbe_phy_impl_if {
+       /* Perform Setup/teardown actions */
+       int (*init)(struct xgbe_prv_data *);
+       void (*exit)(struct xgbe_prv_data *);
+
+       /* Perform start/stop specific actions */
+       int (*reset)(struct xgbe_prv_data *);
+       int (*start)(struct xgbe_prv_data *);
+       void (*stop)(struct xgbe_prv_data *);
+
+       /* Return the link status */
+       int (*link_status)(struct xgbe_prv_data *, int *);
+
+       /* Indicate if a particular speed is valid */
+       bool (*valid_speed)(struct xgbe_prv_data *, int);
+
+       /* Check if the specified mode can/should be used */
+       bool (*use_mode)(struct xgbe_prv_data *, enum xgbe_mode);
+       /* Switch the PHY into various modes */
+       void (*set_mode)(struct xgbe_prv_data *, enum xgbe_mode);
+       /* Retrieve mode needed for a specific speed */
+       enum xgbe_mode (*get_mode)(struct xgbe_prv_data *, int);
+       /* Retrieve new/next mode when trying to auto-negotiate */
+       enum xgbe_mode (*switch_mode)(struct xgbe_prv_data *);
+       /* Retrieve current mode */
+       enum xgbe_mode (*cur_mode)(struct xgbe_prv_data *);
+
+       /* Retrieve current auto-negotiation mode */
+       enum xgbe_an_mode (*an_mode)(struct xgbe_prv_data *);
+
+       /* Configure auto-negotiation settings */
+       int (*an_config)(struct xgbe_prv_data *);
+
+       /* Set/override auto-negotiation advertisement settings */
+       unsigned int (*an_advertising)(struct xgbe_prv_data *);
+
+       /* Process results of auto-negotiation */
+       enum xgbe_mode (*an_outcome)(struct xgbe_prv_data *);
+
+       /* Pre/Post KR training enablement support */
+       void (*kr_training_pre)(struct xgbe_prv_data *);
+       void (*kr_training_post)(struct xgbe_prv_data *);
 };
 
 struct xgbe_phy_if {
-       /* For initial PHY setup */
-       void (*phy_init)(struct xgbe_prv_data *);
+       /* For PHY setup/teardown */
+       int (*phy_init)(struct xgbe_prv_data *);
+       void (*phy_exit)(struct xgbe_prv_data *);
 
        /* For PHY support when setting device up/down */
        int (*phy_reset)(struct xgbe_prv_data *);
@@ -696,6 +832,30 @@ struct xgbe_phy_if {
        /* For PHY support while device is up */
        void (*phy_status)(struct xgbe_prv_data *);
        int (*phy_config_aneg)(struct xgbe_prv_data *);
+
+       /* For PHY settings validation */
+       bool (*phy_valid_speed)(struct xgbe_prv_data *, int);
+
+       /* For single interrupt support */
+       irqreturn_t (*an_isr)(int, struct xgbe_prv_data *);
+
+       /* PHY implementation specific services */
+       struct xgbe_phy_impl_if phy_impl;
+};
+
+struct xgbe_i2c_if {
+       /* For initial I2C setup */
+       int (*i2c_init)(struct xgbe_prv_data *);
+
+       /* For I2C support when setting device up/down */
+       int (*i2c_start)(struct xgbe_prv_data *);
+       void (*i2c_stop)(struct xgbe_prv_data *);
+
+       /* For performing I2C operations */
+       int (*i2c_xfer)(struct xgbe_prv_data *, struct xgbe_i2c_op *);
+
+       /* For single interrupt support */
+       irqreturn_t (*i2c_isr)(int, struct xgbe_prv_data *);
 };
 
 struct xgbe_desc_if {
@@ -755,11 +915,28 @@ struct xgbe_hw_features {
        unsigned int aux_snap_num;      /* Number of Aux snapshot inputs */
 };
 
+struct xgbe_version_data {
+       void (*init_function_ptrs_phy_impl)(struct xgbe_phy_if *);
+       enum xgbe_xpcs_access xpcs_access;
+       unsigned int mmc_64bit;
+       unsigned int tx_max_fifo_size;
+       unsigned int rx_max_fifo_size;
+       unsigned int tx_tstamp_workaround;
+       unsigned int ecc_support;
+       unsigned int i2c_support;
+};
+
 struct xgbe_prv_data {
        struct net_device *netdev;
-       struct platform_device *pdev;
+       struct pci_dev *pcidev;
+       struct platform_device *platdev;
        struct acpi_device *adev;
        struct device *dev;
+       struct platform_device *phy_platdev;
+       struct device *phy_dev;
+
+       /* Version related data */
+       struct xgbe_version_data *vdata;
 
        /* ACPI or DT flag */
        unsigned int use_acpi;
@@ -770,12 +947,17 @@ struct xgbe_prv_data {
        void __iomem *rxtx_regs;        /* SerDes Rx/Tx CSRs */
        void __iomem *sir0_regs;        /* SerDes integration registers (1/2) */
        void __iomem *sir1_regs;        /* SerDes integration registers (2/2) */
+       void __iomem *xprop_regs;       /* XGBE property registers */
+       void __iomem *xi2c_regs;        /* XGBE I2C CSRs */
 
        /* Overall device lock */
        spinlock_t lock;
 
        /* XPCS indirect addressing lock */
        spinlock_t xpcs_lock;
+       unsigned int xpcs_window;
+       unsigned int xpcs_window_size;
+       unsigned int xpcs_window_mask;
 
        /* RSS addressing mutex */
        struct mutex rss_mutex;
@@ -783,12 +965,39 @@ struct xgbe_prv_data {
        /* Flags representing xgbe_state */
        unsigned long dev_state;
 
+       /* ECC support */
+       unsigned long tx_sec_period;
+       unsigned long tx_ded_period;
+       unsigned long rx_sec_period;
+       unsigned long rx_ded_period;
+       unsigned long desc_sec_period;
+       unsigned long desc_ded_period;
+
+       unsigned int tx_sec_count;
+       unsigned int tx_ded_count;
+       unsigned int rx_sec_count;
+       unsigned int rx_ded_count;
+       unsigned int desc_ded_count;
+       unsigned int desc_sec_count;
+
+       struct msix_entry *msix_entries;
        int dev_irq;
+       int ecc_irq;
+       int i2c_irq;
+       int channel_irq[XGBE_MAX_DMA_CHANNELS];
+
        unsigned int per_channel_irq;
+       unsigned int irq_shared;
+       unsigned int irq_count;
+       unsigned int channel_irq_count;
+       unsigned int channel_irq_mode;
+
+       char ecc_name[IFNAMSIZ + 32];
 
        struct xgbe_hw_if hw_if;
        struct xgbe_phy_if phy_if;
        struct xgbe_desc_if desc_if;
+       struct xgbe_i2c_if i2c_if;
 
        /* AXI DMA settings */
        unsigned int coherent;
@@ -803,12 +1012,16 @@ struct xgbe_prv_data {
 
        /* Rings for Tx/Rx on a DMA channel */
        struct xgbe_channel *channel;
+       unsigned int tx_max_channel_count;
+       unsigned int rx_max_channel_count;
        unsigned int channel_count;
        unsigned int tx_ring_count;
        unsigned int tx_desc_count;
        unsigned int rx_ring_count;
        unsigned int rx_desc_count;
 
+       unsigned int tx_max_q_count;
+       unsigned int rx_max_q_count;
        unsigned int tx_q_count;
        unsigned int rx_q_count;
 
@@ -820,11 +1033,13 @@ struct xgbe_prv_data {
        unsigned int tx_threshold;
        unsigned int tx_pbl;
        unsigned int tx_osp_mode;
+       unsigned int tx_max_fifo_size;
 
        /* Rx settings */
        unsigned int rx_sf_mode;
        unsigned int rx_threshold;
        unsigned int rx_pbl;
+       unsigned int rx_max_fifo_size;
 
        /* Tx coalescing settings */
        unsigned int tx_usecs;
@@ -842,6 +1057,8 @@ struct xgbe_prv_data {
        unsigned int pause_autoneg;
        unsigned int tx_pause;
        unsigned int rx_pause;
+       unsigned int rx_rfa[XGBE_MAX_QUEUES];
+       unsigned int rx_rfd[XGBE_MAX_QUEUES];
 
        /* Receive Side Scaling settings */
        u8 rss_key[XGBE_RSS_HASH_KEY_SIZE];
@@ -881,13 +1098,16 @@ struct xgbe_prv_data {
        struct ieee_pfc *pfc;
        unsigned int q2tc_map[XGBE_MAX_QUEUES];
        unsigned int prio2q_map[IEEE_8021QAZ_MAX_TCS];
+       unsigned int pfcq[XGBE_MAX_QUEUES];
+       unsigned int pfc_rfa;
        u8 num_tcs;
 
        /* Hardware features of the device */
        struct xgbe_hw_features hw_feat;
 
-       /* Device restart work structure */
+       /* Device work structures */
        struct work_struct restart_work;
+       struct work_struct stopdev_work;
 
        /* Keeps track of power mode */
        unsigned int power_down;
@@ -901,9 +1121,14 @@ struct xgbe_prv_data {
        int phy_speed;
 
        /* MDIO/PHY related settings */
+       unsigned int phy_started;
+       void *phy_data;
        struct xgbe_phy phy;
        int mdio_mmd;
        unsigned long link_check;
+       struct completion mdio_complete;
+
+       unsigned int kr_redrv;
 
        char an_name[IFNAMSIZ + 32];
        struct workqueue_struct *an_workqueue;
@@ -911,23 +1136,9 @@ struct xgbe_prv_data {
        int an_irq;
        struct work_struct an_irq_work;
 
-       unsigned int speed_set;
-
-       /* SerDes UEFI configurable settings.
-        *   Switching between modes/speeds requires new values for some
-        *   SerDes settings.  The values can be supplied as device
-        *   properties in array format.  The first array entry is for
-        *   1GbE, second for 2.5GbE and third for 10GbE
-        */
-       u32 serdes_blwc[XGBE_SPEEDS];
-       u32 serdes_cdr_rate[XGBE_SPEEDS];
-       u32 serdes_pq_skew[XGBE_SPEEDS];
-       u32 serdes_tx_amp[XGBE_SPEEDS];
-       u32 serdes_dfe_tap_cfg[XGBE_SPEEDS];
-       u32 serdes_dfe_tap_ena[XGBE_SPEEDS];
-
        /* Auto-negotiation state machine support */
        unsigned int an_int;
+       unsigned int an_status;
        struct mutex an_mutex;
        enum xgbe_an an_result;
        enum xgbe_an an_state;
@@ -938,6 +1149,13 @@ struct xgbe_prv_data {
        unsigned int parallel_detect;
        unsigned int fec_ability;
        unsigned long an_start;
+       enum xgbe_an_mode an_mode;
+
+       /* I2C support */
+       struct xgbe_i2c i2c;
+       struct mutex i2c_mutex;
+       struct completion i2c_complete;
+       char i2c_name[IFNAMSIZ + 32];
 
        unsigned int lpm_ctrl;          /* CTRL1 for resume */
 
@@ -948,14 +1166,36 @@ struct xgbe_prv_data {
 
        unsigned int debugfs_xpcs_mmd;
        unsigned int debugfs_xpcs_reg;
+
+       unsigned int debugfs_xprop_reg;
+
+       unsigned int debugfs_xi2c_reg;
 #endif
 };
 
 /* Function prototypes*/
+struct xgbe_prv_data *xgbe_alloc_pdata(struct device *);
+void xgbe_free_pdata(struct xgbe_prv_data *);
+void xgbe_set_counts(struct xgbe_prv_data *);
+int xgbe_config_netdev(struct xgbe_prv_data *);
+void xgbe_deconfig_netdev(struct xgbe_prv_data *);
+
+int xgbe_platform_init(void);
+void xgbe_platform_exit(void);
+#ifdef CONFIG_PCI
+int xgbe_pci_init(void);
+void xgbe_pci_exit(void);
+#else
+static inline int xgbe_pci_init(void) { return 0; }
+static inline void xgbe_pci_exit(void) { }
+#endif
 
 void xgbe_init_function_ptrs_dev(struct xgbe_hw_if *);
 void xgbe_init_function_ptrs_phy(struct xgbe_phy_if *);
+void xgbe_init_function_ptrs_phy_v1(struct xgbe_phy_if *);
+void xgbe_init_function_ptrs_phy_v2(struct xgbe_phy_if *);
 void xgbe_init_function_ptrs_desc(struct xgbe_desc_if *);
+void xgbe_init_function_ptrs_i2c(struct xgbe_i2c_if *);
 const struct net_device_ops *xgbe_get_netdev_ops(void);
 const struct ethtool_ops *xgbe_get_ethtool_ops(void);
 
index c481f104a8febc4acacf223ad2c3fd5f4147ef41..5390ae89136c6b7870c5d915655854707192b0d5 100644 (file)
@@ -204,17 +204,6 @@ static u32 xgene_enet_ring_len(struct xgene_enet_desc_ring *ring)
        return num_msgs;
 }
 
-static void xgene_enet_setup_coalescing(struct xgene_enet_desc_ring *ring)
-{
-       u32 data = 0x7777;
-
-       xgene_enet_ring_wr32(ring, CSR_PBM_COAL, 0x8e);
-       xgene_enet_ring_wr32(ring, CSR_PBM_CTICK1, data);
-       xgene_enet_ring_wr32(ring, CSR_PBM_CTICK2, data << 16);
-       xgene_enet_ring_wr32(ring, CSR_THRESHOLD0_SET1, 0x40);
-       xgene_enet_ring_wr32(ring, CSR_THRESHOLD1_SET1, 0x80);
-}
-
 void xgene_enet_parse_error(struct xgene_enet_desc_ring *ring,
                            struct xgene_enet_pdata *pdata,
                            enum xgene_enet_err_code status)
@@ -929,5 +918,4 @@ struct xgene_ring_ops xgene_ring1_ops = {
        .clear = xgene_enet_clear_ring,
        .wr_cmd = xgene_enet_wr_cmd,
        .len = xgene_enet_ring_len,
-       .coalesce = xgene_enet_setup_coalescing,
 };
index 8456337a237db91a837593570b3a659e66835d69..06e598c8bc16e5618c110fcfbd5b183b464f86c3 100644 (file)
@@ -55,8 +55,10 @@ enum xgene_enet_rm {
 #define PREFETCH_BUF_EN                BIT(21)
 #define CSR_RING_ID_BUF                0x000c
 #define CSR_PBM_COAL           0x0014
+#define CSR_PBM_CTICK0         0x0018
 #define CSR_PBM_CTICK1         0x001c
 #define CSR_PBM_CTICK2         0x0020
+#define CSR_PBM_CTICK3         0x0024
 #define CSR_THRESHOLD0_SET1    0x0030
 #define CSR_THRESHOLD1_SET1    0x0034
 #define CSR_RING_NE_INT_MODE   0x017c
index 3fc7b0db952bbf404e2182da71da670f566accf1..1352b5245fcc7a7ccd4e63f32b3e4a969e03831b 100644 (file)
@@ -1188,7 +1188,8 @@ static int xgene_enet_create_desc_rings(struct net_device *ndev)
                tx_ring->dst_ring_num = xgene_enet_dst_ring_num(cp_ring);
        }
 
-       pdata->ring_ops->coalesce(pdata->tx_ring[0]);
+       if (pdata->ring_ops->coalesce)
+               pdata->ring_ops->coalesce(pdata->tx_ring[0]);
        pdata->tx_qcnt_hi = pdata->tx_ring[0]->slots - 128;
 
        return 0;
index 2b76732add5dbbd6d25fd6fd6e1b887c317d167a..af51dd5844ceeeee5d7aa9bfd5a81979c24f7d56 100644 (file)
@@ -30,7 +30,7 @@ static void xgene_enet_ring_init(struct xgene_enet_desc_ring *ring)
                ring_cfg[0] |= SET_VAL(X2_INTLINE, ring->id & RING_BUFNUM_MASK);
                ring_cfg[3] |= SET_BIT(X2_DEQINTEN);
        }
-       ring_cfg[0] |= SET_VAL(X2_CFGCRID, 1);
+       ring_cfg[0] |= SET_VAL(X2_CFGCRID, 2);
 
        addr >>= 8;
        ring_cfg[2] |= QCOHERENT | SET_VAL(RINGADDRL, addr);
@@ -192,13 +192,15 @@ static u32 xgene_enet_ring_len(struct xgene_enet_desc_ring *ring)
 
 static void xgene_enet_setup_coalescing(struct xgene_enet_desc_ring *ring)
 {
-       u32 data = 0x7777;
+       u32 data = 0x77777777;
 
        xgene_enet_ring_wr32(ring, CSR_PBM_COAL, 0x8e);
+       xgene_enet_ring_wr32(ring, CSR_PBM_CTICK0, data);
        xgene_enet_ring_wr32(ring, CSR_PBM_CTICK1, data);
-       xgene_enet_ring_wr32(ring, CSR_PBM_CTICK2, data << 16);
-       xgene_enet_ring_wr32(ring, CSR_THRESHOLD0_SET1, 0x40);
-       xgene_enet_ring_wr32(ring, CSR_THRESHOLD1_SET1, 0x80);
+       xgene_enet_ring_wr32(ring, CSR_PBM_CTICK2, data);
+       xgene_enet_ring_wr32(ring, CSR_PBM_CTICK3, data);
+       xgene_enet_ring_wr32(ring, CSR_THRESHOLD0_SET1, 0x08);
+       xgene_enet_ring_wr32(ring, CSR_THRESHOLD1_SET1, 0x10);
 }
 
 struct xgene_ring_ops xgene_ring2_ops = {
index 689045186064a117c81d420d014a6ab1857f3330..e743ddf46343302fe69c4c562c7cba239fe06dd9 100644 (file)
@@ -17,13 +17,14 @@ if NET_VENDOR_ARC
 
 config ARC_EMAC_CORE
        tristate
+       depends on ARC || ARCH_ROCKCHIP || COMPILE_TEST
        select MII
        select PHYLIB
 
 config ARC_EMAC
        tristate "ARC EMAC support"
        select ARC_EMAC_CORE
-       depends on OF_IRQ && OF_NET && HAS_DMA
+       depends on OF_IRQ && OF_NET && HAS_DMA && (ARC || COMPILE_TEST)
        ---help---
          On some legacy ARC (Synopsys) FPGA boards such as ARCAngel4/ML50x
          non-standard on-chip ethernet device ARC EMAC 10/100 is used.
@@ -32,7 +33,7 @@ config ARC_EMAC
 config EMAC_ROCKCHIP
        tristate "Rockchip EMAC support"
        select ARC_EMAC_CORE
-       depends on OF_IRQ && OF_NET && REGULATOR && HAS_DMA
+       depends on OF_IRQ && OF_NET && REGULATOR && HAS_DMA && (ARCH_ROCKCHIP || COMPILE_TEST)
        ---help---
          Support for Rockchip RK3036/RK3066/RK3188 EMAC ethernet controllers.
          This selects Rockchip SoC glue layer support for the
index 95d8b3ea7bc3bf06e4fdd3dfe654a8d727963b45..abc9f2a590546e0be4c5a5dabcf6ff3d219f1936 100644 (file)
@@ -460,7 +460,7 @@ static void arc_emac_set_rx_mode(struct net_device *ndev)
                if (ndev->flags & IFF_ALLMULTI) {
                        arc_reg_set(priv, R_LAFL, ~0);
                        arc_reg_set(priv, R_LAFH, ~0);
-               } else {
+               } else if (ndev->flags & IFF_MULTICAST) {
                        struct netdev_hw_addr *ha;
                        unsigned int filter[2] = { 0, 0 };
                        int bit;
@@ -472,6 +472,9 @@ static void arc_emac_set_rx_mode(struct net_device *ndev)
 
                        arc_reg_set(priv, R_LAFL, filter[0]);
                        arc_reg_set(priv, R_LAFH, filter[1]);
+               } else {
+                       arc_reg_set(priv, R_LAFL, 0);
+                       arc_reg_set(priv, R_LAFH, 0);
                }
        }
 }
@@ -764,8 +767,6 @@ int arc_emac_probe(struct net_device *ndev, int interface)
        ndev->netdev_ops = &arc_emac_netdev_ops;
        ndev->ethtool_ops = &arc_emac_ethtool_ops;
        ndev->watchdog_timeo = TX_TIMEOUT;
-       /* FIXME :: no multicast support yet */
-       ndev->flags &= ~IFF_MULTICAST;
 
        priv = netdev_priv(ndev);
        priv->dev = dev;
index 6cac919272eaee7d5239619eea43eb8cbda291d0..d4a409139ea2c9cc13a03d7d63511d322210cc7a 100644 (file)
@@ -50,6 +50,10 @@ struct alx_buffer {
 };
 
 struct alx_rx_queue {
+       struct net_device *netdev;
+       struct device *dev;
+       struct alx_napi *np;
+
        struct alx_rrd *rrd;
        dma_addr_t rrd_dma;
 
@@ -58,16 +62,26 @@ struct alx_rx_queue {
 
        struct alx_buffer *bufs;
 
+       u16 count;
        u16 write_idx, read_idx;
        u16 rrd_read_idx;
+       u16 queue_idx;
 };
 #define ALX_RX_ALLOC_THRESH    32
 
 struct alx_tx_queue {
+       struct net_device *netdev;
+       struct device *dev;
+
        struct alx_txd *tpd;
        dma_addr_t tpd_dma;
+
        struct alx_buffer *bufs;
+
+       u16 count;
        u16 write_idx, read_idx;
+       u16 queue_idx;
+       u16 p_reg, c_reg;
 };
 
 #define ALX_DEFAULT_TX_WORK 128
@@ -76,6 +90,18 @@ enum alx_device_quirks {
        ALX_DEV_QUIRK_MSI_INTX_DISABLE_BUG = BIT(0),
 };
 
+struct alx_napi {
+       struct napi_struct      napi;
+       struct alx_priv         *alx;
+       struct alx_rx_queue     *rxq;
+       struct alx_tx_queue     *txq;
+       int                     vec_idx;
+       u32                     vec_mask;
+       char                    irq_lbl[IFNAMSIZ + 8];
+};
+
+#define ALX_MAX_NAPIS 8
+
 #define ALX_FLAG_USING_MSIX    BIT(0)
 #define ALX_FLAG_USING_MSI     BIT(1)
 
@@ -87,7 +113,6 @@ struct alx_priv {
        /* msi-x vectors */
        int num_vec;
        struct msix_entry *msix_entries;
-       char irq_lbl[IFNAMSIZ + 8];
 
        /* all descriptor memory */
        struct {
@@ -96,6 +121,11 @@ struct alx_priv {
                unsigned int size;
        } descmem;
 
+       struct alx_napi *qnapi[ALX_MAX_NAPIS];
+       int num_txq;
+       int num_rxq;
+       int num_napi;
+
        /* protect int_mask updates */
        spinlock_t irq_lock;
        u32 int_mask;
@@ -104,10 +134,6 @@ struct alx_priv {
        unsigned int rx_ringsz;
        unsigned int rxbuf_size;
 
-       struct napi_struct napi;
-       struct alx_tx_queue txq;
-       struct alx_rx_queue rxq;
-
        struct work_struct link_check_wk;
        struct work_struct reset_wk;
 
index 08e22df2a300ad1a5453e56f548f89bccd057cb6..2f4eabf652e800ff9a2722a43acac174f8ddc427 100644 (file)
@@ -125,64 +125,75 @@ static u32 alx_get_supported_speeds(struct alx_hw *hw)
        return supported;
 }
 
-static int alx_get_settings(struct net_device *netdev, struct ethtool_cmd *ecmd)
+static int alx_get_link_ksettings(struct net_device *netdev,
+                                 struct ethtool_link_ksettings *cmd)
 {
        struct alx_priv *alx = netdev_priv(netdev);
        struct alx_hw *hw = &alx->hw;
+       u32 supported, advertising;
 
-       ecmd->supported = SUPPORTED_Autoneg |
+       supported = SUPPORTED_Autoneg |
                          SUPPORTED_TP |
                          SUPPORTED_Pause |
                          SUPPORTED_Asym_Pause;
        if (alx_hw_giga(hw))
-               ecmd->supported |= SUPPORTED_1000baseT_Full;
-       ecmd->supported |= alx_get_supported_speeds(hw);
+               supported |= SUPPORTED_1000baseT_Full;
+       supported |= alx_get_supported_speeds(hw);
 
-       ecmd->advertising = ADVERTISED_TP;
+       advertising = ADVERTISED_TP;
        if (hw->adv_cfg & ADVERTISED_Autoneg)
-               ecmd->advertising |= hw->adv_cfg;
+               advertising |= hw->adv_cfg;
 
-       ecmd->port = PORT_TP;
-       ecmd->phy_address = 0;
+       cmd->base.port = PORT_TP;
+       cmd->base.phy_address = 0;
 
        if (hw->adv_cfg & ADVERTISED_Autoneg)
-               ecmd->autoneg = AUTONEG_ENABLE;
+               cmd->base.autoneg = AUTONEG_ENABLE;
        else
-               ecmd->autoneg = AUTONEG_DISABLE;
-       ecmd->transceiver = XCVR_INTERNAL;
+               cmd->base.autoneg = AUTONEG_DISABLE;
 
        if (hw->flowctrl & ALX_FC_ANEG && hw->adv_cfg & ADVERTISED_Autoneg) {
                if (hw->flowctrl & ALX_FC_RX) {
-                       ecmd->advertising |= ADVERTISED_Pause;
+                       advertising |= ADVERTISED_Pause;
 
                        if (!(hw->flowctrl & ALX_FC_TX))
-                               ecmd->advertising |= ADVERTISED_Asym_Pause;
+                               advertising |= ADVERTISED_Asym_Pause;
                } else if (hw->flowctrl & ALX_FC_TX) {
-                       ecmd->advertising |= ADVERTISED_Asym_Pause;
+                       advertising |= ADVERTISED_Asym_Pause;
                }
        }
 
-       ethtool_cmd_speed_set(ecmd, hw->link_speed);
-       ecmd->duplex = hw->duplex;
+       cmd->base.speed = hw->link_speed;
+       cmd->base.duplex = hw->duplex;
+
+       ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+                                               supported);
+       ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
+                                               advertising);
 
        return 0;
 }
 
-static int alx_set_settings(struct net_device *netdev, struct ethtool_cmd *ecmd)
+static int alx_set_link_ksettings(struct net_device *netdev,
+                                 const struct ethtool_link_ksettings *cmd)
 {
        struct alx_priv *alx = netdev_priv(netdev);
        struct alx_hw *hw = &alx->hw;
        u32 adv_cfg;
+       u32 advertising;
 
        ASSERT_RTNL();
 
-       if (ecmd->autoneg == AUTONEG_ENABLE) {
-               if (ecmd->advertising & ~alx_get_supported_speeds(hw))
+       ethtool_convert_link_mode_to_legacy_u32(&advertising,
+                                               cmd->link_modes.advertising);
+
+       if (cmd->base.autoneg == AUTONEG_ENABLE) {
+               if (advertising & ~alx_get_supported_speeds(hw))
                        return -EINVAL;
-               adv_cfg = ecmd->advertising | ADVERTISED_Autoneg;
+               adv_cfg = advertising | ADVERTISED_Autoneg;
        } else {
-               adv_cfg = alx_speed_to_ethadv(ethtool_cmd_speed(ecmd),
-                                             ecmd->duplex);
+               adv_cfg = alx_speed_to_ethadv(cmd->base.speed,
+                                             cmd->base.duplex);
 
                if (!adv_cfg || adv_cfg == ADVERTISED_1000baseT_Full)
                        return -EINVAL;
@@ -300,8 +311,6 @@ static int alx_get_sset_count(struct net_device *netdev, int sset)
 }
 
 const struct ethtool_ops alx_ethtool_ops = {
-       .get_settings   = alx_get_settings,
-       .set_settings   = alx_set_settings,
        .get_pauseparam = alx_get_pauseparam,
        .set_pauseparam = alx_set_pauseparam,
        .get_msglevel   = alx_get_msglevel,
@@ -310,4 +319,6 @@ const struct ethtool_ops alx_ethtool_ops = {
        .get_strings    = alx_get_strings,
        .get_sset_count = alx_get_sset_count,
        .get_ethtool_stats      = alx_get_ethtool_stats,
+       .get_link_ksettings     = alx_get_link_ksettings,
+       .set_link_ksettings     = alx_set_link_ksettings,
 };
index eccbacd96201d00b9e7c03926aa07cb25c98badb..c8f525574d68c67cbed2899c8461ace4606a7515 100644 (file)
 
 const char alx_drv_name[] = "alx";
 
-static bool msix = false;
-module_param(msix, bool, 0);
-MODULE_PARM_DESC(msix, "Enable msi-x interrupt support");
-
-static void alx_free_txbuf(struct alx_priv *alx, int entry)
+static void alx_free_txbuf(struct alx_tx_queue *txq, int entry)
 {
-       struct alx_buffer *txb = &alx->txq.bufs[entry];
+       struct alx_buffer *txb = &txq->bufs[entry];
 
        if (dma_unmap_len(txb, size)) {
-               dma_unmap_single(&alx->hw.pdev->dev,
+               dma_unmap_single(txq->dev,
                                 dma_unmap_addr(txb, dma),
                                 dma_unmap_len(txb, size),
                                 DMA_TO_DEVICE);
@@ -75,7 +71,7 @@ static void alx_free_txbuf(struct alx_priv *alx, int entry)
 
 static int alx_refill_rx_ring(struct alx_priv *alx, gfp_t gfp)
 {
-       struct alx_rx_queue *rxq = &alx->rxq;
+       struct alx_rx_queue *rxq = alx->qnapi[0]->rxq;
        struct sk_buff *skb;
        struct alx_buffer *cur_buf;
        dma_addr_t dma;
@@ -143,24 +139,42 @@ static int alx_refill_rx_ring(struct alx_priv *alx, gfp_t gfp)
        return count;
 }
 
-static inline int alx_tpd_avail(struct alx_priv *alx)
+static struct alx_tx_queue *alx_tx_queue_mapping(struct alx_priv *alx,
+                                                struct sk_buff *skb)
 {
-       struct alx_tx_queue *txq = &alx->txq;
+       unsigned int r_idx = skb->queue_mapping;
+
+       if (r_idx >= alx->num_txq)
+               r_idx = r_idx % alx->num_txq;
 
+       return alx->qnapi[r_idx]->txq;
+}
+
+static struct netdev_queue *alx_get_tx_queue(const struct alx_tx_queue *txq)
+{
+       return netdev_get_tx_queue(txq->netdev, txq->queue_idx);
+}
+
+static inline int alx_tpd_avail(struct alx_tx_queue *txq)
+{
        if (txq->write_idx >= txq->read_idx)
-               return alx->tx_ringsz + txq->read_idx - txq->write_idx - 1;
+               return txq->count + txq->read_idx - txq->write_idx - 1;
        return txq->read_idx - txq->write_idx - 1;
 }
 
-static bool alx_clean_tx_irq(struct alx_priv *alx)
+static bool alx_clean_tx_irq(struct alx_tx_queue *txq)
 {
-       struct alx_tx_queue *txq = &alx->txq;
+       struct alx_priv *alx;
+       struct netdev_queue *tx_queue;
        u16 hw_read_idx, sw_read_idx;
        unsigned int total_bytes = 0, total_packets = 0;
        int budget = ALX_DEFAULT_TX_WORK;
 
+       alx = netdev_priv(txq->netdev);
+       tx_queue = alx_get_tx_queue(txq);
+
        sw_read_idx = txq->read_idx;
-       hw_read_idx = alx_read_mem16(&alx->hw, ALX_TPD_PRI0_CIDX);
+       hw_read_idx = alx_read_mem16(&alx->hw, txq->c_reg);
 
        if (sw_read_idx != hw_read_idx) {
                while (sw_read_idx != hw_read_idx && budget > 0) {
@@ -173,19 +187,19 @@ static bool alx_clean_tx_irq(struct alx_priv *alx)
                                budget--;
                        }
 
-                       alx_free_txbuf(alx, sw_read_idx);
+                       alx_free_txbuf(txq, sw_read_idx);
 
-                       if (++sw_read_idx == alx->tx_ringsz)
+                       if (++sw_read_idx == txq->count)
                                sw_read_idx = 0;
                }
                txq->read_idx = sw_read_idx;
 
-               netdev_completed_queue(alx->dev, total_packets, total_bytes);
+               netdev_tx_completed_queue(tx_queue, total_packets, total_bytes);
        }
 
-       if (netif_queue_stopped(alx->dev) && netif_carrier_ok(alx->dev) &&
-           alx_tpd_avail(alx) > alx->tx_ringsz/4)
-               netif_wake_queue(alx->dev);
+       if (netif_tx_queue_stopped(tx_queue) && netif_carrier_ok(alx->dev) &&
+           alx_tpd_avail(txq) > txq->count / 4)
+               netif_tx_wake_queue(tx_queue);
 
        return sw_read_idx == hw_read_idx;
 }
@@ -200,15 +214,17 @@ static void alx_schedule_reset(struct alx_priv *alx)
        schedule_work(&alx->reset_wk);
 }
 
-static int alx_clean_rx_irq(struct alx_priv *alx, int budget)
+static int alx_clean_rx_irq(struct alx_rx_queue *rxq, int budget)
 {
-       struct alx_rx_queue *rxq = &alx->rxq;
+       struct alx_priv *alx;
        struct alx_rrd *rrd;
        struct alx_buffer *rxb;
        struct sk_buff *skb;
        u16 length, rfd_cleaned = 0;
        int work = 0;
 
+       alx = netdev_priv(rxq->netdev);
+
        while (work < budget) {
                rrd = &rxq->rrd[rxq->rrd_read_idx];
                if (!(rrd->word3 & cpu_to_le32(1 << RRD_UPDATED_SHIFT)))
@@ -224,7 +240,7 @@ static int alx_clean_rx_irq(struct alx_priv *alx, int budget)
                }
 
                rxb = &rxq->bufs[rxq->read_idx];
-               dma_unmap_single(&alx->hw.pdev->dev,
+               dma_unmap_single(rxq->dev,
                                 dma_unmap_addr(rxb, dma),
                                 dma_unmap_len(rxb, size),
                                 DMA_FROM_DEVICE);
@@ -242,7 +258,7 @@ static int alx_clean_rx_irq(struct alx_priv *alx, int budget)
                length = ALX_GET_FIELD(le32_to_cpu(rrd->word3),
                                       RRD_PKTLEN) - ETH_FCS_LEN;
                skb_put(skb, length);
-               skb->protocol = eth_type_trans(skb, alx->dev);
+               skb->protocol = eth_type_trans(skb, rxq->netdev);
 
                skb_checksum_none_assert(skb);
                if (alx->dev->features & NETIF_F_RXCSUM &&
@@ -259,13 +275,13 @@ static int alx_clean_rx_irq(struct alx_priv *alx, int budget)
                        }
                }
 
-               napi_gro_receive(&alx->napi, skb);
+               napi_gro_receive(&rxq->np->napi, skb);
                work++;
 
 next_pkt:
-               if (++rxq->read_idx == alx->rx_ringsz)
+               if (++rxq->read_idx == rxq->count)
                        rxq->read_idx = 0;
-               if (++rxq->rrd_read_idx == alx->rx_ringsz)
+               if (++rxq->rrd_read_idx == rxq->count)
                        rxq->rrd_read_idx = 0;
 
                if (++rfd_cleaned > ALX_RX_ALLOC_THRESH)
@@ -280,23 +296,26 @@ next_pkt:
 
 static int alx_poll(struct napi_struct *napi, int budget)
 {
-       struct alx_priv *alx = container_of(napi, struct alx_priv, napi);
+       struct alx_napi *np = container_of(napi, struct alx_napi, napi);
+       struct alx_priv *alx = np->alx;
        struct alx_hw *hw = &alx->hw;
        unsigned long flags;
-       bool tx_complete;
-       int work;
+       bool tx_complete = true;
+       int work = 0;
 
-       tx_complete = alx_clean_tx_irq(alx);
-       work = alx_clean_rx_irq(alx, budget);
+       if (np->txq)
+               tx_complete = alx_clean_tx_irq(np->txq);
+       if (np->rxq)
+               work = alx_clean_rx_irq(np->rxq, budget);
 
        if (!tx_complete || work == budget)
                return budget;
 
-       napi_complete(&alx->napi);
+       napi_complete(&np->napi);
 
        /* enable interrupt */
        if (alx->flags & ALX_FLAG_USING_MSIX) {
-               alx_mask_msix(hw, 1, false);
+               alx_mask_msix(hw, np->vec_idx, false);
        } else {
                spin_lock_irqsave(&alx->irq_lock, flags);
                alx->int_mask |= ALX_ISR_TX_Q0 | ALX_ISR_RX_Q0;
@@ -350,7 +369,7 @@ static irqreturn_t alx_intr_handle(struct alx_priv *alx, u32 intr)
                goto out;
 
        if (intr & (ALX_ISR_TX_Q0 | ALX_ISR_RX_Q0)) {
-               napi_schedule(&alx->napi);
+               napi_schedule(&alx->qnapi[0]->napi);
                /* mask rx/tx interrupt, enable them when napi complete */
                alx->int_mask &= ~ALX_ISR_ALL_QUEUES;
                alx_write_mem32(hw, ALX_IMR, alx->int_mask);
@@ -365,15 +384,15 @@ static irqreturn_t alx_intr_handle(struct alx_priv *alx, u32 intr)
 
 static irqreturn_t alx_intr_msix_ring(int irq, void *data)
 {
-       struct alx_priv *alx = data;
-       struct alx_hw *hw = &alx->hw;
+       struct alx_napi *np = data;
+       struct alx_hw *hw = &np->alx->hw;
 
        /* mask interrupt to ACK chip */
-       alx_mask_msix(hw, 1, true);
+       alx_mask_msix(hw, np->vec_idx, true);
        /* clear interrupt status */
-       alx_write_mem32(hw, ALX_ISR, (ALX_ISR_TX_Q0 | ALX_ISR_RX_Q0));
+       alx_write_mem32(hw, ALX_ISR, np->vec_mask);
 
-       napi_schedule(&alx->napi);
+       napi_schedule(&np->napi);
 
        return IRQ_HANDLED;
 }
@@ -424,63 +443,79 @@ static irqreturn_t alx_intr_legacy(int irq, void *data)
        return alx_intr_handle(alx, intr);
 }
 
+static const u16 txring_header_reg[] = {ALX_TPD_PRI0_ADDR_LO,
+                                       ALX_TPD_PRI1_ADDR_LO,
+                                       ALX_TPD_PRI2_ADDR_LO,
+                                       ALX_TPD_PRI3_ADDR_LO};
+
 static void alx_init_ring_ptrs(struct alx_priv *alx)
 {
        struct alx_hw *hw = &alx->hw;
        u32 addr_hi = ((u64)alx->descmem.dma) >> 32;
+       struct alx_napi *np;
+       int i;
+
+       for (i = 0; i < alx->num_napi; i++) {
+               np = alx->qnapi[i];
+               if (np->txq) {
+                       np->txq->read_idx = 0;
+                       np->txq->write_idx = 0;
+                       alx_write_mem32(hw,
+                                       txring_header_reg[np->txq->queue_idx],
+                                       np->txq->tpd_dma);
+               }
+
+               if (np->rxq) {
+                       np->rxq->read_idx = 0;
+                       np->rxq->write_idx = 0;
+                       np->rxq->rrd_read_idx = 0;
+                       alx_write_mem32(hw, ALX_RRD_ADDR_LO, np->rxq->rrd_dma);
+                       alx_write_mem32(hw, ALX_RFD_ADDR_LO, np->rxq->rfd_dma);
+               }
+       }
+
+       alx_write_mem32(hw, ALX_TX_BASE_ADDR_HI, addr_hi);
+       alx_write_mem32(hw, ALX_TPD_RING_SZ, alx->tx_ringsz);
 
-       alx->rxq.read_idx = 0;
-       alx->rxq.write_idx = 0;
-       alx->rxq.rrd_read_idx = 0;
        alx_write_mem32(hw, ALX_RX_BASE_ADDR_HI, addr_hi);
-       alx_write_mem32(hw, ALX_RRD_ADDR_LO, alx->rxq.rrd_dma);
        alx_write_mem32(hw, ALX_RRD_RING_SZ, alx->rx_ringsz);
-       alx_write_mem32(hw, ALX_RFD_ADDR_LO, alx->rxq.rfd_dma);
        alx_write_mem32(hw, ALX_RFD_RING_SZ, alx->rx_ringsz);
        alx_write_mem32(hw, ALX_RFD_BUF_SZ, alx->rxbuf_size);
 
-       alx->txq.read_idx = 0;
-       alx->txq.write_idx = 0;
-       alx_write_mem32(hw, ALX_TX_BASE_ADDR_HI, addr_hi);
-       alx_write_mem32(hw, ALX_TPD_PRI0_ADDR_LO, alx->txq.tpd_dma);
-       alx_write_mem32(hw, ALX_TPD_RING_SZ, alx->tx_ringsz);
-
        /* load these pointers into the chip */
        alx_write_mem32(hw, ALX_SRAM9, ALX_SRAM_LOAD_PTR);
 }
 
-static void alx_free_txring_buf(struct alx_priv *alx)
+static void alx_free_txring_buf(struct alx_tx_queue *txq)
 {
-       struct alx_tx_queue *txq = &alx->txq;
        int i;
 
        if (!txq->bufs)
                return;
 
-       for (i = 0; i < alx->tx_ringsz; i++)
-               alx_free_txbuf(alx, i);
+       for (i = 0; i < txq->count; i++)
+               alx_free_txbuf(txq, i);
 
-       memset(txq->bufs, 0, alx->tx_ringsz * sizeof(struct alx_buffer));
-       memset(txq->tpd, 0, alx->tx_ringsz * sizeof(struct alx_txd));
+       memset(txq->bufs, 0, txq->count * sizeof(struct alx_buffer));
+       memset(txq->tpd, 0, txq->count * sizeof(struct alx_txd));
        txq->write_idx = 0;
        txq->read_idx = 0;
 
-       netdev_reset_queue(alx->dev);
+       netdev_tx_reset_queue(alx_get_tx_queue(txq));
 }
 
-static void alx_free_rxring_buf(struct alx_priv *alx)
+static void alx_free_rxring_buf(struct alx_rx_queue *rxq)
 {
-       struct alx_rx_queue *rxq = &alx->rxq;
        struct alx_buffer *cur_buf;
        u16 i;
 
-       if (rxq == NULL)
+       if (!rxq->bufs)
                return;
 
-       for (i = 0; i < alx->rx_ringsz; i++) {
+       for (i = 0; i < rxq->count; i++) {
                cur_buf = rxq->bufs + i;
                if (cur_buf->skb) {
-                       dma_unmap_single(&alx->hw.pdev->dev,
+                       dma_unmap_single(rxq->dev,
                                         dma_unmap_addr(cur_buf, dma),
                                         dma_unmap_len(cur_buf, size),
                                         DMA_FROM_DEVICE);
@@ -498,8 +533,14 @@ static void alx_free_rxring_buf(struct alx_priv *alx)
 
 static void alx_free_buffers(struct alx_priv *alx)
 {
-       alx_free_txring_buf(alx);
-       alx_free_rxring_buf(alx);
+       int i;
+
+       for (i = 0; i < alx->num_txq; i++)
+               if (alx->qnapi[i] && alx->qnapi[i]->txq)
+                       alx_free_txring_buf(alx->qnapi[i]->txq);
+
+       if (alx->qnapi[0] && alx->qnapi[0]->rxq)
+               alx_free_rxring_buf(alx->qnapi[0]->rxq);
 }
 
 static int alx_reinit_rings(struct alx_priv *alx)
@@ -573,19 +614,41 @@ static int alx_set_mac_address(struct net_device *netdev, void *data)
        return 0;
 }
 
-static int alx_alloc_descriptors(struct alx_priv *alx)
+static int alx_alloc_tx_ring(struct alx_priv *alx, struct alx_tx_queue *txq,
+                            int offset)
 {
-       alx->txq.bufs = kcalloc(alx->tx_ringsz,
-                               sizeof(struct alx_buffer),
-                               GFP_KERNEL);
-       if (!alx->txq.bufs)
+       txq->bufs = kcalloc(txq->count, sizeof(struct alx_buffer), GFP_KERNEL);
+       if (!txq->bufs)
                return -ENOMEM;
 
-       alx->rxq.bufs = kcalloc(alx->rx_ringsz,
-                               sizeof(struct alx_buffer),
-                               GFP_KERNEL);
-       if (!alx->rxq.bufs)
-               goto out_free;
+       txq->tpd = alx->descmem.virt + offset;
+       txq->tpd_dma = alx->descmem.dma + offset;
+       offset += sizeof(struct alx_txd) * txq->count;
+
+       return offset;
+}
+
+static int alx_alloc_rx_ring(struct alx_priv *alx, struct alx_rx_queue *rxq,
+                            int offset)
+{
+       rxq->bufs = kcalloc(rxq->count, sizeof(struct alx_buffer), GFP_KERNEL);
+       if (!rxq->bufs)
+               return -ENOMEM;
+
+       rxq->rrd = alx->descmem.virt + offset;
+       rxq->rrd_dma = alx->descmem.dma + offset;
+       offset += sizeof(struct alx_rrd) * rxq->count;
+
+       rxq->rfd = alx->descmem.virt + offset;
+       rxq->rfd_dma = alx->descmem.dma + offset;
+       offset += sizeof(struct alx_rfd) * rxq->count;
+
+       return offset;
+}
+
+static int alx_alloc_rings(struct alx_priv *alx)
+{
+       int i, offset = 0;
 
        /* physical tx/rx ring descriptors
         *
@@ -593,7 +656,8 @@ static int alx_alloc_descriptors(struct alx_priv *alx)
         * 4G boundary (hardware has a single register for high 32 bits
         * of addresses only)
         */
-       alx->descmem.size = sizeof(struct alx_txd) * alx->tx_ringsz +
+       alx->descmem.size = sizeof(struct alx_txd) * alx->tx_ringsz *
+                           alx->num_txq +
                            sizeof(struct alx_rrd) * alx->rx_ringsz +
                            sizeof(struct alx_rfd) * alx->rx_ringsz;
        alx->descmem.virt = dma_zalloc_coherent(&alx->hw.pdev->dev,
@@ -601,87 +665,178 @@ static int alx_alloc_descriptors(struct alx_priv *alx)
                                                &alx->descmem.dma,
                                                GFP_KERNEL);
        if (!alx->descmem.virt)
-               goto out_free;
-
-       alx->txq.tpd = alx->descmem.virt;
-       alx->txq.tpd_dma = alx->descmem.dma;
+               return -ENOMEM;
 
-       /* alignment requirement for next block */
+       /* alignment requirements */
        BUILD_BUG_ON(sizeof(struct alx_txd) % 8);
+       BUILD_BUG_ON(sizeof(struct alx_rrd) % 8);
 
-       alx->rxq.rrd =
-               (void *)((u8 *)alx->descmem.virt +
-                        sizeof(struct alx_txd) * alx->tx_ringsz);
-       alx->rxq.rrd_dma = alx->descmem.dma +
-                          sizeof(struct alx_txd) * alx->tx_ringsz;
+       for (i = 0; i < alx->num_txq; i++) {
+               offset = alx_alloc_tx_ring(alx, alx->qnapi[i]->txq, offset);
+               if (offset < 0) {
+                       netdev_err(alx->dev, "Allocation of tx buffer failed!\n");
+                       return -ENOMEM;
+               }
+       }
 
-       /* alignment requirement for next block */
-       BUILD_BUG_ON(sizeof(struct alx_rrd) % 8);
+       offset = alx_alloc_rx_ring(alx, alx->qnapi[0]->rxq, offset);
+       if (offset < 0) {
+               netdev_err(alx->dev, "Allocation of rx buffer failed!\n");
+               return -ENOMEM;
+       }
 
-       alx->rxq.rfd =
-               (void *)((u8 *)alx->descmem.virt +
-                        sizeof(struct alx_txd) * alx->tx_ringsz +
-                        sizeof(struct alx_rrd) * alx->rx_ringsz);
-       alx->rxq.rfd_dma = alx->descmem.dma +
-                          sizeof(struct alx_txd) * alx->tx_ringsz +
-                          sizeof(struct alx_rrd) * alx->rx_ringsz;
+       alx_reinit_rings(alx);
 
        return 0;
-out_free:
-       kfree(alx->txq.bufs);
-       kfree(alx->rxq.bufs);
-       return -ENOMEM;
 }
 
-static int alx_alloc_rings(struct alx_priv *alx)
+static void alx_free_rings(struct alx_priv *alx)
 {
-       int err;
+       int i;
 
-       err = alx_alloc_descriptors(alx);
-       if (err)
-               return err;
+       alx_free_buffers(alx);
 
-       alx->int_mask &= ~ALX_ISR_ALL_QUEUES;
-       alx->int_mask |= ALX_ISR_TX_Q0 | ALX_ISR_RX_Q0;
+       for (i = 0; i < alx->num_txq; i++)
+               if (alx->qnapi[i] && alx->qnapi[i]->txq)
+                       kfree(alx->qnapi[i]->txq->bufs);
 
-       netif_napi_add(alx->dev, &alx->napi, alx_poll, 64);
+       if (alx->qnapi[0] && alx->qnapi[0]->rxq)
+               kfree(alx->qnapi[0]->rxq->bufs);
 
-       alx_reinit_rings(alx);
-       return 0;
+       if (!alx->descmem.virt)
+               dma_free_coherent(&alx->hw.pdev->dev,
+                                 alx->descmem.size,
+                                 alx->descmem.virt,
+                                 alx->descmem.dma);
 }
 
-static void alx_free_rings(struct alx_priv *alx)
+static void alx_free_napis(struct alx_priv *alx)
 {
-       netif_napi_del(&alx->napi);
-       alx_free_buffers(alx);
+       struct alx_napi *np;
+       int i;
 
-       kfree(alx->txq.bufs);
-       kfree(alx->rxq.bufs);
+       for (i = 0; i < alx->num_napi; i++) {
+               np = alx->qnapi[i];
+               if (!np)
+                       continue;
 
-       dma_free_coherent(&alx->hw.pdev->dev,
-                         alx->descmem.size,
-                         alx->descmem.virt,
-                         alx->descmem.dma);
+               netif_napi_del(&np->napi);
+               kfree(np->txq);
+               kfree(np->rxq);
+               kfree(np);
+               alx->qnapi[i] = NULL;
+       }
+}
+
+static const u16 tx_pidx_reg[] = {ALX_TPD_PRI0_PIDX, ALX_TPD_PRI1_PIDX,
+                                 ALX_TPD_PRI2_PIDX, ALX_TPD_PRI3_PIDX};
+static const u16 tx_cidx_reg[] = {ALX_TPD_PRI0_CIDX, ALX_TPD_PRI1_CIDX,
+                                 ALX_TPD_PRI2_CIDX, ALX_TPD_PRI3_CIDX};
+static const u32 tx_vect_mask[] = {ALX_ISR_TX_Q0, ALX_ISR_TX_Q1,
+                                  ALX_ISR_TX_Q2, ALX_ISR_TX_Q3};
+static const u32 rx_vect_mask[] = {ALX_ISR_RX_Q0, ALX_ISR_RX_Q1,
+                                  ALX_ISR_RX_Q2, ALX_ISR_RX_Q3,
+                                  ALX_ISR_RX_Q4, ALX_ISR_RX_Q5,
+                                  ALX_ISR_RX_Q6, ALX_ISR_RX_Q7};
+
+static int alx_alloc_napis(struct alx_priv *alx)
+{
+       struct alx_napi *np;
+       struct alx_rx_queue *rxq;
+       struct alx_tx_queue *txq;
+       int i;
+
+       alx->int_mask &= ~ALX_ISR_ALL_QUEUES;
+
+       /* allocate alx_napi structures */
+       for (i = 0; i < alx->num_napi; i++) {
+               np = kzalloc(sizeof(struct alx_napi), GFP_KERNEL);
+               if (!np)
+                       goto err_out;
+
+               np->alx = alx;
+               netif_napi_add(alx->dev, &np->napi, alx_poll, 64);
+               alx->qnapi[i] = np;
+       }
+
+       /* allocate tx queues */
+       for (i = 0; i < alx->num_txq; i++) {
+               np = alx->qnapi[i];
+               txq = kzalloc(sizeof(*txq), GFP_KERNEL);
+               if (!txq)
+                       goto err_out;
+
+               np->txq = txq;
+               txq->p_reg = tx_pidx_reg[i];
+               txq->c_reg = tx_cidx_reg[i];
+               txq->queue_idx = i;
+               txq->count = alx->tx_ringsz;
+               txq->netdev = alx->dev;
+               txq->dev = &alx->hw.pdev->dev;
+               np->vec_mask |= tx_vect_mask[i];
+               alx->int_mask |= tx_vect_mask[i];
+       }
+
+       /* allocate rx queues */
+       np = alx->qnapi[0];
+       rxq = kzalloc(sizeof(*rxq), GFP_KERNEL);
+       if (!rxq)
+               goto err_out;
+
+       np->rxq = rxq;
+       rxq->np = alx->qnapi[0];
+       rxq->queue_idx = 0;
+       rxq->count = alx->rx_ringsz;
+       rxq->netdev = alx->dev;
+       rxq->dev = &alx->hw.pdev->dev;
+       np->vec_mask |= rx_vect_mask[0];
+       alx->int_mask |= rx_vect_mask[0];
+
+       return 0;
+
+err_out:
+       netdev_err(alx->dev, "error allocating internal structures\n");
+       alx_free_napis(alx);
+       return -ENOMEM;
 }
 
+static const int txq_vec_mapping_shift[] = {
+       0, ALX_MSI_MAP_TBL1_TXQ0_SHIFT,
+       0, ALX_MSI_MAP_TBL1_TXQ1_SHIFT,
+       1, ALX_MSI_MAP_TBL2_TXQ2_SHIFT,
+       1, ALX_MSI_MAP_TBL2_TXQ3_SHIFT,
+};
+
 static void alx_config_vector_mapping(struct alx_priv *alx)
 {
        struct alx_hw *hw = &alx->hw;
-       u32 tbl = 0;
+       u32 tbl[2] = {0, 0};
+       int i, vector, idx, shift;
 
        if (alx->flags & ALX_FLAG_USING_MSIX) {
-               tbl |= 1 << ALX_MSI_MAP_TBL1_TXQ0_SHIFT;
-               tbl |= 1 << ALX_MSI_MAP_TBL1_RXQ0_SHIFT;
+               /* tx mappings */
+               for (i = 0, vector = 1; i < alx->num_txq; i++, vector++) {
+                       idx = txq_vec_mapping_shift[i * 2];
+                       shift = txq_vec_mapping_shift[i * 2 + 1];
+                       tbl[idx] |= vector << shift;
+               }
+
+               /* rx mapping */
+               tbl[0] |= 1 << ALX_MSI_MAP_TBL1_RXQ0_SHIFT;
        }
 
-       alx_write_mem32(hw, ALX_MSI_MAP_TBL1, tbl);
-       alx_write_mem32(hw, ALX_MSI_MAP_TBL2, 0);
+       alx_write_mem32(hw, ALX_MSI_MAP_TBL1, tbl[0]);
+       alx_write_mem32(hw, ALX_MSI_MAP_TBL2, tbl[1]);
        alx_write_mem32(hw, ALX_MSI_ID_MAP, 0);
 }
 
 static bool alx_enable_msix(struct alx_priv *alx)
 {
-       int i, err, num_vec = 2;
+       int i, err, num_vec, num_txq, num_rxq;
+
+       num_txq = min_t(int, num_online_cpus(), ALX_MAX_TX_QUEUES);
+       num_rxq = 1;
+       num_vec = max_t(int, num_txq, num_rxq) + 1;
 
        alx->msix_entries = kcalloc(num_vec, sizeof(struct msix_entry),
                                    GFP_KERNEL);
@@ -701,6 +856,10 @@ static bool alx_enable_msix(struct alx_priv *alx)
        }
 
        alx->num_vec = num_vec;
+       alx->num_napi = num_vec - 1;
+       alx->num_txq = num_txq;
+       alx->num_rxq = num_rxq;
+
        return true;
 }
 
@@ -714,14 +873,29 @@ static int alx_request_msix(struct alx_priv *alx)
        if (err)
                goto out_err;
 
-       vector++;
-       sprintf(alx->irq_lbl, "%s-TxRx-0", netdev->name);
-
-       err = request_irq(alx->msix_entries[vector].vector,
-                         alx_intr_msix_ring, 0, alx->irq_lbl, alx);
+       for (i = 0; i < alx->num_napi; i++) {
+               struct alx_napi *np = alx->qnapi[i];
+
+               vector++;
+
+               if (np->txq && np->rxq)
+                       sprintf(np->irq_lbl, "%s-TxRx-%u", netdev->name,
+                               np->txq->queue_idx);
+               else if (np->txq)
+                       sprintf(np->irq_lbl, "%s-tx-%u", netdev->name,
+                               np->txq->queue_idx);
+               else if (np->rxq)
+                       sprintf(np->irq_lbl, "%s-rx-%u", netdev->name,
+                               np->rxq->queue_idx);
+               else
+                       sprintf(np->irq_lbl, "%s-unused", netdev->name);
+
+               np->vec_idx = vector;
+               err = request_irq(alx->msix_entries[vector].vector,
+                                 alx_intr_msix_ring, 0, np->irq_lbl, np);
                if (err)
                        goto out_free;
-
+       }
        return 0;
 
 out_free:
@@ -729,7 +903,8 @@ out_free:
 
        vector--;
        for (i = 0; i < vector; i++)
-               free_irq(alx->msix_entries[free_vector++].vector, alx);
+               free_irq(alx->msix_entries[free_vector++].vector,
+                        alx->qnapi[i]);
 
 out_err:
        return err;
@@ -744,6 +919,9 @@ static void alx_init_intr(struct alx_priv *alx, bool msix)
 
        if (!(alx->flags & ALX_FLAG_USING_MSIX)) {
                alx->num_vec = 1;
+               alx->num_napi = 1;
+               alx->num_txq = 1;
+               alx->num_rxq = 1;
 
                if (!pci_enable_msi(alx->hw.pdev))
                        alx->flags |= ALX_FLAG_USING_MSI;
@@ -799,6 +977,25 @@ static void alx_irq_disable(struct alx_priv *alx)
        }
 }
 
+static int alx_realloc_resources(struct alx_priv *alx)
+{
+       int err;
+
+       alx_free_rings(alx);
+       alx_free_napis(alx);
+       alx_disable_advanced_intr(alx);
+
+       err = alx_alloc_napis(alx);
+       if (err)
+               return err;
+
+       err = alx_alloc_rings(alx);
+       if (err)
+               return err;
+
+       return 0;
+}
+
 static int alx_request_irq(struct alx_priv *alx)
 {
        struct pci_dev *pdev = alx->hw.pdev;
@@ -815,8 +1012,9 @@ static int alx_request_irq(struct alx_priv *alx)
                        goto out;
 
                /* msix request failed, realloc resources */
-               alx_disable_advanced_intr(alx);
-               alx_init_intr(alx, false);
+               err = alx_realloc_resources(alx);
+               if (err)
+                       goto out;
        }
 
        if (alx->flags & ALX_FLAG_USING_MSI) {
@@ -845,12 +1043,13 @@ out:
 static void alx_free_irq(struct alx_priv *alx)
 {
        struct pci_dev *pdev = alx->hw.pdev;
-       int i;
+       int i, vector = 0;
 
        if (alx->flags & ALX_FLAG_USING_MSIX) {
-               /* we have only 2 vectors without multi queue support */
-               for (i = 0; i < 2; i++)
-                       free_irq(alx->msix_entries[i].vector, alx);
+               free_irq(alx->msix_entries[vector++].vector, alx);
+               for (i = 0; i < alx->num_napi; i++)
+                       free_irq(alx->msix_entries[vector++].vector,
+                                alx->qnapi[i]);
        } else {
                free_irq(pdev->irq, alx);
        }
@@ -935,11 +1134,14 @@ static netdev_features_t alx_fix_features(struct net_device *netdev,
 
 static void alx_netif_stop(struct alx_priv *alx)
 {
+       int i;
+
        netif_trans_update(alx->dev);
        if (netif_carrier_ok(alx->dev)) {
                netif_carrier_off(alx->dev);
                netif_tx_disable(alx->dev);
-               napi_disable(&alx->napi);
+               for (i = 0; i < alx->num_napi; i++)
+                       napi_disable(&alx->qnapi[i]->napi);
        }
 }
 
@@ -1008,8 +1210,11 @@ static int alx_change_mtu(struct net_device *netdev, int mtu)
 
 static void alx_netif_start(struct alx_priv *alx)
 {
+       int i;
+
        netif_tx_wake_all_queues(alx->dev);
-       napi_enable(&alx->napi);
+       for (i = 0; i < alx->num_napi; i++)
+               napi_enable(&alx->qnapi[i]->napi);
        netif_carrier_on(alx->dev);
 }
 
@@ -1017,21 +1222,28 @@ static int __alx_open(struct alx_priv *alx, bool resume)
 {
        int err;
 
-       alx_init_intr(alx, msix);
+       alx_init_intr(alx, true);
 
        if (!resume)
                netif_carrier_off(alx->dev);
 
-       err = alx_alloc_rings(alx);
+       err = alx_alloc_napis(alx);
        if (err)
                goto out_disable_adv_intr;
 
+       err = alx_alloc_rings(alx);
+       if (err)
+               goto out_free_rings;
+
        alx_configure(alx);
 
        err = alx_request_irq(alx);
        if (err)
                goto out_free_rings;
 
+       netif_set_real_num_tx_queues(alx->dev, alx->num_txq);
+       netif_set_real_num_rx_queues(alx->dev, alx->num_rxq);
+
        /* clear old interrupts */
        alx_write_mem32(&alx->hw, ALX_ISR, ~(u32)ALX_ISR_DIS);
 
@@ -1045,6 +1257,7 @@ static int __alx_open(struct alx_priv *alx, bool resume)
 
 out_free_rings:
        alx_free_rings(alx);
+       alx_free_napis(alx);
 out_disable_adv_intr:
        alx_disable_advanced_intr(alx);
        return err;
@@ -1055,6 +1268,7 @@ static void __alx_stop(struct alx_priv *alx)
        alx_halt(alx);
        alx_free_irq(alx);
        alx_free_rings(alx);
+       alx_free_napis(alx);
 }
 
 static const char *alx_speed_desc(struct alx_hw *hw)
@@ -1237,9 +1451,8 @@ static int alx_tso(struct sk_buff *skb, struct alx_txd *first)
        return 1;
 }
 
-static int alx_map_tx_skb(struct alx_priv *alx, struct sk_buff *skb)
+static int alx_map_tx_skb(struct alx_tx_queue *txq, struct sk_buff *skb)
 {
-       struct alx_tx_queue *txq = &alx->txq;
        struct alx_txd *tpd, *first_tpd;
        dma_addr_t dma;
        int maplen, f, first_idx = txq->write_idx;
@@ -1248,7 +1461,7 @@ static int alx_map_tx_skb(struct alx_priv *alx, struct sk_buff *skb)
        tpd = first_tpd;
 
        if (tpd->word1 & (1 << TPD_LSO_V2_SHIFT)) {
-               if (++txq->write_idx == alx->tx_ringsz)
+               if (++txq->write_idx == txq->count)
                        txq->write_idx = 0;
 
                tpd = &txq->tpd[txq->write_idx];
@@ -1258,9 +1471,9 @@ static int alx_map_tx_skb(struct alx_priv *alx, struct sk_buff *skb)
        }
 
        maplen = skb_headlen(skb);
-       dma = dma_map_single(&alx->hw.pdev->dev, skb->data, maplen,
+       dma = dma_map_single(txq->dev, skb->data, maplen,
                             DMA_TO_DEVICE);
-       if (dma_mapping_error(&alx->hw.pdev->dev, dma))
+       if (dma_mapping_error(txq->dev, dma))
                goto err_dma;
 
        dma_unmap_len_set(&txq->bufs[txq->write_idx], size, maplen);
@@ -1274,16 +1487,16 @@ static int alx_map_tx_skb(struct alx_priv *alx, struct sk_buff *skb)
 
                frag = &skb_shinfo(skb)->frags[f];
 
-               if (++txq->write_idx == alx->tx_ringsz)
+               if (++txq->write_idx == txq->count)
                        txq->write_idx = 0;
                tpd = &txq->tpd[txq->write_idx];
 
                tpd->word1 = first_tpd->word1;
 
                maplen = skb_frag_size(frag);
-               dma = skb_frag_dma_map(&alx->hw.pdev->dev, frag, 0,
+               dma = skb_frag_dma_map(txq->dev, frag, 0,
                                       maplen, DMA_TO_DEVICE);
-               if (dma_mapping_error(&alx->hw.pdev->dev, dma))
+               if (dma_mapping_error(txq->dev, dma))
                        goto err_dma;
                dma_unmap_len_set(&txq->bufs[txq->write_idx], size, maplen);
                dma_unmap_addr_set(&txq->bufs[txq->write_idx], dma, dma);
@@ -1296,7 +1509,7 @@ static int alx_map_tx_skb(struct alx_priv *alx, struct sk_buff *skb)
        tpd->word1 |= cpu_to_le32(1 << TPD_EOP_SHIFT);
        txq->bufs[txq->write_idx].skb = skb;
 
-       if (++txq->write_idx == alx->tx_ringsz)
+       if (++txq->write_idx == txq->count)
                txq->write_idx = 0;
 
        return 0;
@@ -1304,23 +1517,24 @@ static int alx_map_tx_skb(struct alx_priv *alx, struct sk_buff *skb)
 err_dma:
        f = first_idx;
        while (f != txq->write_idx) {
-               alx_free_txbuf(alx, f);
-               if (++f == alx->tx_ringsz)
+               alx_free_txbuf(txq, f);
+               if (++f == txq->count)
                        f = 0;
        }
        return -ENOMEM;
 }
 
-static netdev_tx_t alx_start_xmit(struct sk_buff *skb,
-                                 struct net_device *netdev)
+static netdev_tx_t alx_start_xmit_ring(struct sk_buff *skb,
+                                      struct alx_tx_queue *txq)
 {
-       struct alx_priv *alx = netdev_priv(netdev);
-       struct alx_tx_queue *txq = &alx->txq;
+       struct alx_priv *alx;
        struct alx_txd *first;
        int tso;
 
-       if (alx_tpd_avail(alx) < alx_tpd_req(skb)) {
-               netif_stop_queue(alx->dev);
+       alx = netdev_priv(txq->netdev);
+
+       if (alx_tpd_avail(txq) < alx_tpd_req(skb)) {
+               netif_tx_stop_queue(alx_get_tx_queue(txq));
                goto drop;
        }
 
@@ -1333,17 +1547,17 @@ static netdev_tx_t alx_start_xmit(struct sk_buff *skb,
        else if (!tso && alx_tx_csum(skb, first))
                goto drop;
 
-       if (alx_map_tx_skb(alx, skb) < 0)
+       if (alx_map_tx_skb(txq, skb) < 0)
                goto drop;
 
-       netdev_sent_queue(alx->dev, skb->len);
+       netdev_tx_sent_queue(alx_get_tx_queue(txq), skb->len);
 
        /* flush updates before updating hardware */
        wmb();
-       alx_write_mem16(&alx->hw, ALX_TPD_PRI0_PIDX, txq->write_idx);
+       alx_write_mem16(&alx->hw, txq->p_reg, txq->write_idx);
 
-       if (alx_tpd_avail(alx) < alx->tx_ringsz/8)
-               netif_stop_queue(alx->dev);
+       if (alx_tpd_avail(txq) < txq->count / 8)
+               netif_tx_stop_queue(alx_get_tx_queue(txq));
 
        return NETDEV_TX_OK;
 
@@ -1352,6 +1566,13 @@ drop:
        return NETDEV_TX_OK;
 }
 
+static netdev_tx_t alx_start_xmit(struct sk_buff *skb,
+                                 struct net_device *netdev)
+{
+       struct alx_priv *alx = netdev_priv(netdev);
+       return alx_start_xmit_ring(skb, alx_tx_queue_mapping(alx, skb));
+}
+
 static void alx_tx_timeout(struct net_device *dev)
 {
        struct alx_priv *alx = netdev_priv(dev);
@@ -1409,10 +1630,12 @@ static int alx_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
 static void alx_poll_controller(struct net_device *netdev)
 {
        struct alx_priv *alx = netdev_priv(netdev);
+       int i;
 
        if (alx->flags & ALX_FLAG_USING_MSIX) {
                alx_intr_msix_misc(0, alx);
-               alx_intr_msix_ring(0, alx);
+               for (i = 0; i < alx->num_txq; i++)
+                       alx_intr_msix_ring(0, alx->qnapi[i]);
        } else if (alx->flags & ALX_FLAG_USING_MSI)
                alx_intr_msi(0, alx);
        else
@@ -1529,7 +1752,8 @@ static int alx_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
                goto out_pci_release;
        }
 
-       netdev = alloc_etherdev(sizeof(*alx));
+       netdev = alloc_etherdev_mqs(sizeof(*alx),
+                                   ALX_MAX_TX_QUEUES, 1);
        if (!netdev) {
                err = -ENOMEM;
                goto out_pci_release;
index 872b7abb01962e2d70d327c6f7137241fedd9008..cfe86a20c899dd3bf58e5780e138b9bd7056daf5 100644 (file)
 
 #include "atl1c.h"
 
-static int atl1c_get_settings(struct net_device *netdev,
-                             struct ethtool_cmd *ecmd)
+static int atl1c_get_link_ksettings(struct net_device *netdev,
+                                   struct ethtool_link_ksettings *cmd)
 {
        struct atl1c_adapter *adapter = netdev_priv(netdev);
        struct atl1c_hw *hw = &adapter->hw;
+       u32 supported, advertising;
 
-       ecmd->supported = (SUPPORTED_10baseT_Half  |
+       supported = (SUPPORTED_10baseT_Half  |
                           SUPPORTED_10baseT_Full  |
                           SUPPORTED_100baseT_Half |
                           SUPPORTED_100baseT_Full |
                           SUPPORTED_Autoneg       |
                           SUPPORTED_TP);
        if (hw->link_cap_flags & ATL1C_LINK_CAP_1000M)
-               ecmd->supported |= SUPPORTED_1000baseT_Full;
+               supported |= SUPPORTED_1000baseT_Full;
 
-       ecmd->advertising = ADVERTISED_TP;
+       advertising = ADVERTISED_TP;
 
-       ecmd->advertising |= hw->autoneg_advertised;
+       advertising |= hw->autoneg_advertised;
 
-       ecmd->port = PORT_TP;
-       ecmd->phy_address = 0;
-       ecmd->transceiver = XCVR_INTERNAL;
+       cmd->base.port = PORT_TP;
+       cmd->base.phy_address = 0;
 
        if (adapter->link_speed != SPEED_0) {
-               ethtool_cmd_speed_set(ecmd, adapter->link_speed);
+               cmd->base.speed = adapter->link_speed;
                if (adapter->link_duplex == FULL_DUPLEX)
-                       ecmd->duplex = DUPLEX_FULL;
+                       cmd->base.duplex = DUPLEX_FULL;
                else
-                       ecmd->duplex = DUPLEX_HALF;
+                       cmd->base.duplex = DUPLEX_HALF;
        } else {
-               ethtool_cmd_speed_set(ecmd, SPEED_UNKNOWN);
-               ecmd->duplex = DUPLEX_UNKNOWN;
+               cmd->base.speed = SPEED_UNKNOWN;
+               cmd->base.duplex = DUPLEX_UNKNOWN;
        }
 
-       ecmd->autoneg = AUTONEG_ENABLE;
+       cmd->base.autoneg = AUTONEG_ENABLE;
+
+       ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+                                               supported);
+       ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
+                                               advertising);
+
        return 0;
 }
 
-static int atl1c_set_settings(struct net_device *netdev,
-                             struct ethtool_cmd *ecmd)
+static int atl1c_set_link_ksettings(struct net_device *netdev,
+                                   const struct ethtool_link_ksettings *cmd)
 {
        struct atl1c_adapter *adapter = netdev_priv(netdev);
        struct atl1c_hw *hw = &adapter->hw;
@@ -74,12 +80,12 @@ static int atl1c_set_settings(struct net_device *netdev,
        while (test_and_set_bit(__AT_RESETTING, &adapter->flags))
                msleep(1);
 
-       if (ecmd->autoneg == AUTONEG_ENABLE) {
+       if (cmd->base.autoneg == AUTONEG_ENABLE) {
                autoneg_advertised = ADVERTISED_Autoneg;
        } else {
-               u32 speed = ethtool_cmd_speed(ecmd);
+               u32 speed = cmd->base.speed;
                if (speed == SPEED_1000) {
-                       if (ecmd->duplex != DUPLEX_FULL) {
+                       if (cmd->base.duplex != DUPLEX_FULL) {
                                if (netif_msg_link(adapter))
                                        dev_warn(&adapter->pdev->dev,
                                                "1000M half is invalid\n");
@@ -88,12 +94,12 @@ static int atl1c_set_settings(struct net_device *netdev,
                        }
                        autoneg_advertised = ADVERTISED_1000baseT_Full;
                } else if (speed == SPEED_100) {
-                       if (ecmd->duplex == DUPLEX_FULL)
+                       if (cmd->base.duplex == DUPLEX_FULL)
                                autoneg_advertised = ADVERTISED_100baseT_Full;
                        else
                                autoneg_advertised = ADVERTISED_100baseT_Half;
                } else {
-                       if (ecmd->duplex == DUPLEX_FULL)
+                       if (cmd->base.duplex == DUPLEX_FULL)
                                autoneg_advertised = ADVERTISED_10baseT_Full;
                        else
                                autoneg_advertised = ADVERTISED_10baseT_Half;
@@ -284,8 +290,6 @@ static int atl1c_nway_reset(struct net_device *netdev)
 }
 
 static const struct ethtool_ops atl1c_ethtool_ops = {
-       .get_settings           = atl1c_get_settings,
-       .set_settings           = atl1c_set_settings,
        .get_drvinfo            = atl1c_get_drvinfo,
        .get_regs_len           = atl1c_get_regs_len,
        .get_regs               = atl1c_get_regs,
@@ -297,6 +301,8 @@ static const struct ethtool_ops atl1c_ethtool_ops = {
        .get_link               = ethtool_op_get_link,
        .get_eeprom_len         = atl1c_get_eeprom_len,
        .get_eeprom             = atl1c_get_eeprom,
+       .get_link_ksettings     = atl1c_get_link_ksettings,
+       .set_link_ksettings     = atl1c_set_link_ksettings,
 };
 
 void atl1c_set_ethtool_ops(struct net_device *netdev)
index 8e3dbd4d9f79eab2ac291f903816667d6f13b72b..cb489e7e8374b5247cf1d431903b04ea81f2474a 100644 (file)
 
 #include "atl1e.h"
 
-static int atl1e_get_settings(struct net_device *netdev,
-                             struct ethtool_cmd *ecmd)
+static int atl1e_get_link_ksettings(struct net_device *netdev,
+                                   struct ethtool_link_ksettings *cmd)
 {
        struct atl1e_adapter *adapter = netdev_priv(netdev);
        struct atl1e_hw *hw = &adapter->hw;
+       u32 supported, advertising;
 
-       ecmd->supported = (SUPPORTED_10baseT_Half  |
+       supported = (SUPPORTED_10baseT_Half  |
                           SUPPORTED_10baseT_Full  |
                           SUPPORTED_100baseT_Half |
                           SUPPORTED_100baseT_Full |
                           SUPPORTED_Autoneg       |
                           SUPPORTED_TP);
        if (hw->nic_type == athr_l1e)
-               ecmd->supported |= SUPPORTED_1000baseT_Full;
+               supported |= SUPPORTED_1000baseT_Full;
 
-       ecmd->advertising = ADVERTISED_TP;
+       advertising = ADVERTISED_TP;
 
-       ecmd->advertising |= ADVERTISED_Autoneg;
-       ecmd->advertising |= hw->autoneg_advertised;
+       advertising |= ADVERTISED_Autoneg;
+       advertising |= hw->autoneg_advertised;
 
-       ecmd->port = PORT_TP;
-       ecmd->phy_address = 0;
-       ecmd->transceiver = XCVR_INTERNAL;
+       cmd->base.port = PORT_TP;
+       cmd->base.phy_address = 0;
 
        if (adapter->link_speed != SPEED_0) {
-               ethtool_cmd_speed_set(ecmd, adapter->link_speed);
+               cmd->base.speed = adapter->link_speed;
                if (adapter->link_duplex == FULL_DUPLEX)
-                       ecmd->duplex = DUPLEX_FULL;
+                       cmd->base.duplex = DUPLEX_FULL;
                else
-                       ecmd->duplex = DUPLEX_HALF;
+                       cmd->base.duplex = DUPLEX_HALF;
        } else {
-               ethtool_cmd_speed_set(ecmd, SPEED_UNKNOWN);
-               ecmd->duplex = DUPLEX_UNKNOWN;
+               cmd->base.speed = SPEED_UNKNOWN;
+               cmd->base.duplex = DUPLEX_UNKNOWN;
        }
 
-       ecmd->autoneg = AUTONEG_ENABLE;
+       cmd->base.autoneg = AUTONEG_ENABLE;
+
+       ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+                                               supported);
+       ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
+                                               advertising);
+
        return 0;
 }
 
-static int atl1e_set_settings(struct net_device *netdev,
-                             struct ethtool_cmd *ecmd)
+static int atl1e_set_link_ksettings(struct net_device *netdev,
+                                   const struct ethtool_link_ksettings *cmd)
 {
        struct atl1e_adapter *adapter = netdev_priv(netdev);
        struct atl1e_hw *hw = &adapter->hw;
+       u32 advertising;
+
+       ethtool_convert_link_mode_to_legacy_u32(&advertising,
+                                               cmd->link_modes.advertising);
 
        while (test_and_set_bit(__AT_RESETTING, &adapter->flags))
                msleep(1);
 
-       if (ecmd->autoneg == AUTONEG_ENABLE) {
+       if (cmd->base.autoneg == AUTONEG_ENABLE) {
                u16 adv4, adv9;
 
-               if ((ecmd->advertising&ADVERTISE_1000_FULL)) {
+               if (advertising & ADVERTISE_1000_FULL) {
                        if (hw->nic_type == athr_l1e) {
                                hw->autoneg_advertised =
-                                       ecmd->advertising & AT_ADV_MASK;
+                                       advertising & AT_ADV_MASK;
                        } else {
                                clear_bit(__AT_RESETTING, &adapter->flags);
                                return -EINVAL;
                        }
-               } else if (ecmd->advertising&ADVERTISE_1000_HALF) {
+               } else if (advertising & ADVERTISE_1000_HALF) {
                        clear_bit(__AT_RESETTING, &adapter->flags);
                        return -EINVAL;
                } else {
                        hw->autoneg_advertised =
-                               ecmd->advertising & AT_ADV_MASK;
+                               advertising & AT_ADV_MASK;
                }
-               ecmd->advertising = hw->autoneg_advertised |
+               advertising = hw->autoneg_advertised |
                                    ADVERTISED_TP | ADVERTISED_Autoneg;
 
                adv4 = hw->mii_autoneg_adv_reg & ~ADVERTISE_ALL;
@@ -367,8 +377,6 @@ static int atl1e_nway_reset(struct net_device *netdev)
 }
 
 static const struct ethtool_ops atl1e_ethtool_ops = {
-       .get_settings           = atl1e_get_settings,
-       .set_settings           = atl1e_set_settings,
        .get_drvinfo            = atl1e_get_drvinfo,
        .get_regs_len           = atl1e_get_regs_len,
        .get_regs               = atl1e_get_regs,
@@ -380,6 +388,8 @@ static const struct ethtool_ops atl1e_ethtool_ops = {
        .get_eeprom_len         = atl1e_get_eeprom_len,
        .get_eeprom             = atl1e_get_eeprom,
        .set_eeprom             = atl1e_set_eeprom,
+       .get_link_ksettings     = atl1e_get_link_ksettings,
+       .set_link_ksettings     = atl1e_set_link_ksettings,
 };
 
 void atl1e_set_ethtool_ops(struct net_device *netdev)
index 9aede18aa70f4470a17100e21591d303864c20b9..7dad8e4b9d2a8aabdf6a5b8f12d3046e51313a07 100644 (file)
@@ -3214,66 +3214,72 @@ static int atl1_get_sset_count(struct net_device *netdev, int sset)
        }
 }
 
-static int atl1_get_settings(struct net_device *netdev,
-       struct ethtool_cmd *ecmd)
+static int atl1_get_link_ksettings(struct net_device *netdev,
+                                  struct ethtool_link_ksettings *cmd)
 {
        struct atl1_adapter *adapter = netdev_priv(netdev);
        struct atl1_hw *hw = &adapter->hw;
+       u32 supported, advertising;
 
-       ecmd->supported = (SUPPORTED_10baseT_Half |
+       supported = (SUPPORTED_10baseT_Half |
                           SUPPORTED_10baseT_Full |
                           SUPPORTED_100baseT_Half |
                           SUPPORTED_100baseT_Full |
                           SUPPORTED_1000baseT_Full |
                           SUPPORTED_Autoneg | SUPPORTED_TP);
-       ecmd->advertising = ADVERTISED_TP;
+       advertising = ADVERTISED_TP;
        if (hw->media_type == MEDIA_TYPE_AUTO_SENSOR ||
            hw->media_type == MEDIA_TYPE_1000M_FULL) {
-               ecmd->advertising |= ADVERTISED_Autoneg;
+               advertising |= ADVERTISED_Autoneg;
                if (hw->media_type == MEDIA_TYPE_AUTO_SENSOR) {
-                       ecmd->advertising |= ADVERTISED_Autoneg;
-                       ecmd->advertising |=
+                       advertising |= ADVERTISED_Autoneg;
+                       advertising |=
                            (ADVERTISED_10baseT_Half |
                             ADVERTISED_10baseT_Full |
                             ADVERTISED_100baseT_Half |
                             ADVERTISED_100baseT_Full |
                             ADVERTISED_1000baseT_Full);
                } else
-                       ecmd->advertising |= (ADVERTISED_1000baseT_Full);
+                       advertising |= (ADVERTISED_1000baseT_Full);
        }
-       ecmd->port = PORT_TP;
-       ecmd->phy_address = 0;
-       ecmd->transceiver = XCVR_INTERNAL;
+       cmd->base.port = PORT_TP;
+       cmd->base.phy_address = 0;
 
        if (netif_carrier_ok(adapter->netdev)) {
                u16 link_speed, link_duplex;
                atl1_get_speed_and_duplex(hw, &link_speed, &link_duplex);
-               ethtool_cmd_speed_set(ecmd, link_speed);
+               cmd->base.speed = link_speed;
                if (link_duplex == FULL_DUPLEX)
-                       ecmd->duplex = DUPLEX_FULL;
+                       cmd->base.duplex = DUPLEX_FULL;
                else
-                       ecmd->duplex = DUPLEX_HALF;
+                       cmd->base.duplex = DUPLEX_HALF;
        } else {
-               ethtool_cmd_speed_set(ecmd, SPEED_UNKNOWN);
-               ecmd->duplex = DUPLEX_UNKNOWN;
+               cmd->base.speed = SPEED_UNKNOWN;
+               cmd->base.duplex = DUPLEX_UNKNOWN;
        }
        if (hw->media_type == MEDIA_TYPE_AUTO_SENSOR ||
            hw->media_type == MEDIA_TYPE_1000M_FULL)
-               ecmd->autoneg = AUTONEG_ENABLE;
+               cmd->base.autoneg = AUTONEG_ENABLE;
        else
-               ecmd->autoneg = AUTONEG_DISABLE;
+               cmd->base.autoneg = AUTONEG_DISABLE;
+
+       ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+                                               supported);
+       ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
+                                               advertising);
 
        return 0;
 }
 
-static int atl1_set_settings(struct net_device *netdev,
-       struct ethtool_cmd *ecmd)
+static int atl1_set_link_ksettings(struct net_device *netdev,
+                                  const struct ethtool_link_ksettings *cmd)
 {
        struct atl1_adapter *adapter = netdev_priv(netdev);
        struct atl1_hw *hw = &adapter->hw;
        u16 phy_data;
        int ret_val = 0;
        u16 old_media_type = hw->media_type;
+       u32 advertising;
 
        if (netif_running(adapter->netdev)) {
                if (netif_msg_link(adapter))
@@ -3282,12 +3288,12 @@ static int atl1_set_settings(struct net_device *netdev,
                atl1_down(adapter);
        }
 
-       if (ecmd->autoneg == AUTONEG_ENABLE)
+       if (cmd->base.autoneg == AUTONEG_ENABLE)
                hw->media_type = MEDIA_TYPE_AUTO_SENSOR;
        else {
-               u32 speed = ethtool_cmd_speed(ecmd);
+               u32 speed = cmd->base.speed;
                if (speed == SPEED_1000) {
-                       if (ecmd->duplex != DUPLEX_FULL) {
+                       if (cmd->base.duplex != DUPLEX_FULL) {
                                if (netif_msg_link(adapter))
                                        dev_warn(&adapter->pdev->dev,
                                                "1000M half is invalid\n");
@@ -3296,12 +3302,12 @@ static int atl1_set_settings(struct net_device *netdev,
                        }
                        hw->media_type = MEDIA_TYPE_1000M_FULL;
                } else if (speed == SPEED_100) {
-                       if (ecmd->duplex == DUPLEX_FULL)
+                       if (cmd->base.duplex == DUPLEX_FULL)
                                hw->media_type = MEDIA_TYPE_100M_FULL;
                        else
                                hw->media_type = MEDIA_TYPE_100M_HALF;
                } else {
-                       if (ecmd->duplex == DUPLEX_FULL)
+                       if (cmd->base.duplex == DUPLEX_FULL)
                                hw->media_type = MEDIA_TYPE_10M_FULL;
                        else
                                hw->media_type = MEDIA_TYPE_10M_HALF;
@@ -3309,7 +3315,7 @@ static int atl1_set_settings(struct net_device *netdev,
        }
        switch (hw->media_type) {
        case MEDIA_TYPE_AUTO_SENSOR:
-               ecmd->advertising =
+               advertising =
                    ADVERTISED_10baseT_Half |
                    ADVERTISED_10baseT_Full |
                    ADVERTISED_100baseT_Half |
@@ -3318,12 +3324,12 @@ static int atl1_set_settings(struct net_device *netdev,
                    ADVERTISED_Autoneg | ADVERTISED_TP;
                break;
        case MEDIA_TYPE_1000M_FULL:
-               ecmd->advertising =
+               advertising =
                    ADVERTISED_1000baseT_Full |
                    ADVERTISED_Autoneg | ADVERTISED_TP;
                break;
        default:
-               ecmd->advertising = 0;
+               advertising = 0;
                break;
        }
        if (atl1_phy_setup_autoneg_adv(hw)) {
@@ -3663,8 +3669,6 @@ static int atl1_nway_reset(struct net_device *netdev)
 }
 
 static const struct ethtool_ops atl1_ethtool_ops = {
-       .get_settings           = atl1_get_settings,
-       .set_settings           = atl1_set_settings,
        .get_drvinfo            = atl1_get_drvinfo,
        .get_wol                = atl1_get_wol,
        .set_wol                = atl1_set_wol,
@@ -3681,6 +3685,8 @@ static const struct ethtool_ops atl1_ethtool_ops = {
        .nway_reset             = atl1_nway_reset,
        .get_ethtool_stats      = atl1_get_ethtool_stats,
        .get_sset_count         = atl1_get_sset_count,
+       .get_link_ksettings     = atl1_get_link_ksettings,
+       .set_link_ksettings     = atl1_set_link_ksettings,
 };
 
 module_pci_driver(atl1_driver);
index 6911394115b20e7023d89feb4d757750c50f604d..63f2deec2a52994684fa7a58763f68502265542e 100644 (file)
@@ -1737,81 +1737,87 @@ static void atl2_write_pci_cfg(struct atl2_hw *hw, u32 reg, u16 *value)
        pci_write_config_word(adapter->pdev, reg, *value);
 }
 
-static int atl2_get_settings(struct net_device *netdev,
-       struct ethtool_cmd *ecmd)
+static int atl2_get_link_ksettings(struct net_device *netdev,
+                                  struct ethtool_link_ksettings *cmd)
 {
        struct atl2_adapter *adapter = netdev_priv(netdev);
        struct atl2_hw *hw = &adapter->hw;
+       u32 supported, advertising;
 
-       ecmd->supported = (SUPPORTED_10baseT_Half |
+       supported = (SUPPORTED_10baseT_Half |
                SUPPORTED_10baseT_Full |
                SUPPORTED_100baseT_Half |
                SUPPORTED_100baseT_Full |
                SUPPORTED_Autoneg |
                SUPPORTED_TP);
-       ecmd->advertising = ADVERTISED_TP;
+       advertising = ADVERTISED_TP;
 
-       ecmd->advertising |= ADVERTISED_Autoneg;
-       ecmd->advertising |= hw->autoneg_advertised;
+       advertising |= ADVERTISED_Autoneg;
+       advertising |= hw->autoneg_advertised;
 
-       ecmd->port = PORT_TP;
-       ecmd->phy_address = 0;
-       ecmd->transceiver = XCVR_INTERNAL;
+       cmd->base.port = PORT_TP;
+       cmd->base.phy_address = 0;
 
        if (adapter->link_speed != SPEED_0) {
-               ethtool_cmd_speed_set(ecmd, adapter->link_speed);
+               cmd->base.speed = adapter->link_speed;
                if (adapter->link_duplex == FULL_DUPLEX)
-                       ecmd->duplex = DUPLEX_FULL;
+                       cmd->base.duplex = DUPLEX_FULL;
                else
-                       ecmd->duplex = DUPLEX_HALF;
+                       cmd->base.duplex = DUPLEX_HALF;
        } else {
-               ethtool_cmd_speed_set(ecmd, SPEED_UNKNOWN);
-               ecmd->duplex = DUPLEX_UNKNOWN;
+               cmd->base.speed = SPEED_UNKNOWN;
+               cmd->base.duplex = DUPLEX_UNKNOWN;
        }
 
-       ecmd->autoneg = AUTONEG_ENABLE;
+       cmd->base.autoneg = AUTONEG_ENABLE;
+
+       ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+                                               supported);
+       ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
+                                               advertising);
+
        return 0;
 }
 
-static int atl2_set_settings(struct net_device *netdev,
-       struct ethtool_cmd *ecmd)
+static int atl2_set_link_ksettings(struct net_device *netdev,
+                                  const struct ethtool_link_ksettings *cmd)
 {
        struct atl2_adapter *adapter = netdev_priv(netdev);
        struct atl2_hw *hw = &adapter->hw;
+       u32 advertising;
+
+       ethtool_convert_link_mode_to_legacy_u32(&advertising,
+                                               cmd->link_modes.advertising);
 
        while (test_and_set_bit(__ATL2_RESETTING, &adapter->flags))
                msleep(1);
 
-       if (ecmd->autoneg == AUTONEG_ENABLE) {
+       if (cmd->base.autoneg == AUTONEG_ENABLE) {
 #define MY_ADV_MASK    (ADVERTISE_10_HALF | \
                         ADVERTISE_10_FULL | \
                         ADVERTISE_100_HALF| \
                         ADVERTISE_100_FULL)
 
-               if ((ecmd->advertising & MY_ADV_MASK) == MY_ADV_MASK) {
+               if ((advertising & MY_ADV_MASK) == MY_ADV_MASK) {
                        hw->MediaType = MEDIA_TYPE_AUTO_SENSOR;
                        hw->autoneg_advertised =  MY_ADV_MASK;
-               } else if ((ecmd->advertising & MY_ADV_MASK) ==
-                               ADVERTISE_100_FULL) {
+               } else if ((advertising & MY_ADV_MASK) == ADVERTISE_100_FULL) {
                        hw->MediaType = MEDIA_TYPE_100M_FULL;
                        hw->autoneg_advertised = ADVERTISE_100_FULL;
-               } else if ((ecmd->advertising & MY_ADV_MASK) ==
-                               ADVERTISE_100_HALF) {
+               } else if ((advertising & MY_ADV_MASK) == ADVERTISE_100_HALF) {
                        hw->MediaType = MEDIA_TYPE_100M_HALF;
                        hw->autoneg_advertised = ADVERTISE_100_HALF;
-               } else if ((ecmd->advertising & MY_ADV_MASK) ==
-                               ADVERTISE_10_FULL) {
+               } else if ((advertising & MY_ADV_MASK) == ADVERTISE_10_FULL) {
                        hw->MediaType = MEDIA_TYPE_10M_FULL;
                        hw->autoneg_advertised = ADVERTISE_10_FULL;
-               }  else if ((ecmd->advertising & MY_ADV_MASK) ==
-                               ADVERTISE_10_HALF) {
+               }  else if ((advertising & MY_ADV_MASK) == ADVERTISE_10_HALF) {
                        hw->MediaType = MEDIA_TYPE_10M_HALF;
                        hw->autoneg_advertised = ADVERTISE_10_HALF;
                } else {
                        clear_bit(__ATL2_RESETTING, &adapter->flags);
                        return -EINVAL;
                }
-               ecmd->advertising = hw->autoneg_advertised |
+               advertising = hw->autoneg_advertised |
                        ADVERTISED_TP | ADVERTISED_Autoneg;
        } else {
                clear_bit(__ATL2_RESETTING, &adapter->flags);
@@ -2080,8 +2086,6 @@ static int atl2_nway_reset(struct net_device *netdev)
 }
 
 static const struct ethtool_ops atl2_ethtool_ops = {
-       .get_settings           = atl2_get_settings,
-       .set_settings           = atl2_set_settings,
        .get_drvinfo            = atl2_get_drvinfo,
        .get_regs_len           = atl2_get_regs_len,
        .get_regs               = atl2_get_regs,
@@ -2094,6 +2098,8 @@ static const struct ethtool_ops atl2_ethtool_ops = {
        .get_eeprom_len         = atl2_get_eeprom_len,
        .get_eeprom             = atl2_get_eeprom,
        .set_eeprom             = atl2_set_eeprom,
+       .get_link_ksettings     = atl2_get_link_ksettings,
+       .set_link_ksettings     = atl2_set_link_ksettings,
 };
 
 #define LBYTESWAP(a)  ((((a) & 0x00ff00ff) << 8) | \
index 99c40552ea9084d55dd8558d613f6e7f51bb92a5..07ff6492402a06340635a9c6384baf6127fe8841 100644 (file)
@@ -1037,16 +1037,6 @@ static const struct net_device_ops nb8800_netdev_ops = {
        .ndo_validate_addr      = eth_validate_addr,
 };
 
-static int nb8800_nway_reset(struct net_device *dev)
-{
-       struct phy_device *phydev = dev->phydev;
-
-       if (!phydev)
-               return -ENODEV;
-
-       return genphy_restart_aneg(phydev);
-}
-
 static void nb8800_get_pauseparam(struct net_device *dev,
                                  struct ethtool_pauseparam *pp)
 {
@@ -1165,7 +1155,7 @@ static void nb8800_get_ethtool_stats(struct net_device *dev,
 }
 
 static const struct ethtool_ops nb8800_ethtool_ops = {
-       .nway_reset             = nb8800_nway_reset,
+       .nway_reset             = phy_ethtool_nway_reset,
        .get_link               = ethtool_op_get_link,
        .get_pauseparam         = nb8800_get_pauseparam,
        .set_pauseparam         = nb8800_set_pauseparam,
@@ -1359,6 +1349,7 @@ static const struct of_device_id nb8800_dt_ids[] = {
        },
        { }
 };
+MODULE_DEVICE_TABLE(of, nb8800_dt_ids);
 
 static int nb8800_probe(struct platform_device *pdev)
 {
index 7e513cacb57af8acb6eccf48cde68a9fde7d5cfd..3b14d51442280b8a399b0d9b7145bebfc1560597 100644 (file)
@@ -1126,7 +1126,8 @@ out_freeirq:
        free_irq(dev->irq, dev);
 
 out_phy_disconnect:
-       phy_disconnect(phydev);
+       if (priv->has_phy)
+               phy_disconnect(phydev);
 
        return ret;
 }
@@ -1433,11 +1434,8 @@ static int bcm_enet_nway_reset(struct net_device *dev)
        struct bcm_enet_priv *priv;
 
        priv = netdev_priv(dev);
-       if (priv->has_phy) {
-               if (!dev->phydev)
-                       return -ENODEV;
-               return genphy_restart_aneg(dev->phydev);
-       }
+       if (priv->has_phy)
+               return phy_ethtool_nway_reset(dev);
 
        return -EOPNOTSUPP;
 }
index c16ec3a51876aacee21ad81dc714a343afb75ba7..4a4ffc0c4c65d6d1777d8a24ffc5b0e0cdc63aeb 100644 (file)
@@ -80,6 +80,24 @@ static void bcma_bgmac_cmn_maskset32(struct bgmac *bgmac, u16 offset, u32 mask,
        bcma_maskset32(bgmac->bcma.cmn, offset, mask, set);
 }
 
+static int bcma_phy_connect(struct bgmac *bgmac)
+{
+       struct phy_device *phy_dev;
+       char bus_id[MII_BUS_ID_SIZE + 3];
+
+       /* Connect to the PHY */
+       snprintf(bus_id, sizeof(bus_id), PHY_ID_FMT, bgmac->mii_bus->id,
+                bgmac->phyaddr);
+       phy_dev = phy_connect(bgmac->net_dev, bus_id, bgmac_adjust_link,
+                             PHY_INTERFACE_MODE_MII);
+       if (IS_ERR(phy_dev)) {
+               dev_err(bgmac->dev, "PHY connection failed\n");
+               return PTR_ERR(phy_dev);
+       }
+
+       return 0;
+}
+
 static const struct bcma_device_id bgmac_bcma_tbl[] = {
        BCMA_CORE(BCMA_MANUF_BCM, BCMA_CORE_4706_MAC_GBIT,
                  BCMA_ANY_REV, BCMA_ANY_CLASS),
@@ -275,6 +293,10 @@ static int bgmac_probe(struct bcma_device *core)
        bgmac->cco_ctl_maskset = bcma_bgmac_cco_ctl_maskset;
        bgmac->get_bus_clock = bcma_bgmac_get_bus_clock;
        bgmac->cmn_maskset32 = bcma_bgmac_cmn_maskset32;
+       if (bgmac->mii_bus)
+               bgmac->phy_connect = bcma_phy_connect;
+       else
+               bgmac->phy_connect = bgmac_phy_connect_direct;
 
        err = bgmac_enet_probe(bgmac);
        if (err)
index be52f270c2c1448f058a4399af44c0d4fb21b722..6f736c19872fe304f093f47951ecfe5dd5d2e727 100644 (file)
 #define pr_fmt(fmt)            KBUILD_MODNAME ": " fmt
 
 #include <linux/bcma/bcma.h>
+#include <linux/brcmphy.h>
 #include <linux/etherdevice.h>
 #include <linux/of_address.h>
+#include <linux/of_mdio.h>
 #include <linux/of_net.h>
 #include "bgmac.h"
 
+#define NICPM_IOMUX_CTRL               0x00000008
+
+#define NICPM_IOMUX_CTRL_INIT_VAL      0x3196e000
+#define NICPM_IOMUX_CTRL_SPD_SHIFT     10
+#define NICPM_IOMUX_CTRL_SPD_10M       0
+#define NICPM_IOMUX_CTRL_SPD_100M      1
+#define NICPM_IOMUX_CTRL_SPD_1000M     2
+
 static u32 platform_bgmac_read(struct bgmac *bgmac, u16 offset)
 {
        return readl(bgmac->plat.base + offset);
@@ -86,6 +96,54 @@ static void platform_bgmac_cmn_maskset32(struct bgmac *bgmac, u16 offset,
        WARN_ON(1);
 }
 
+static void bgmac_nicpm_speed_set(struct net_device *net_dev)
+{
+       struct bgmac *bgmac = netdev_priv(net_dev);
+       u32 val;
+
+       if (!bgmac->plat.nicpm_base)
+               return;
+
+       val = NICPM_IOMUX_CTRL_INIT_VAL;
+       switch (bgmac->net_dev->phydev->speed) {
+       default:
+               netdev_err(net_dev, "Unsupported speed. Defaulting to 1000Mb\n");
+       case SPEED_1000:
+               val |= NICPM_IOMUX_CTRL_SPD_1000M << NICPM_IOMUX_CTRL_SPD_SHIFT;
+               break;
+       case SPEED_100:
+               val |= NICPM_IOMUX_CTRL_SPD_100M << NICPM_IOMUX_CTRL_SPD_SHIFT;
+               break;
+       case SPEED_10:
+               val |= NICPM_IOMUX_CTRL_SPD_10M << NICPM_IOMUX_CTRL_SPD_SHIFT;
+               break;
+       }
+
+       writel(val, bgmac->plat.nicpm_base + NICPM_IOMUX_CTRL);
+
+       bgmac_adjust_link(bgmac->net_dev);
+}
+
+static int platform_phy_connect(struct bgmac *bgmac)
+{
+       struct phy_device *phy_dev;
+
+       if (bgmac->plat.nicpm_base)
+               phy_dev = of_phy_get_and_connect(bgmac->net_dev,
+                                                bgmac->dev->of_node,
+                                                bgmac_nicpm_speed_set);
+       else
+               phy_dev = of_phy_get_and_connect(bgmac->net_dev,
+                                                bgmac->dev->of_node,
+                                                bgmac_adjust_link);
+       if (!phy_dev) {
+               dev_err(bgmac->dev, "PHY connection failed\n");
+               return -ENODEV;
+       }
+
+       return 0;
+}
+
 static int bgmac_probe(struct platform_device *pdev)
 {
        struct device_node *np = pdev->dev.of_node;
@@ -102,7 +160,6 @@ static int bgmac_probe(struct platform_device *pdev)
        /* Set the features of the 4707 family */
        bgmac->feature_flags |= BGMAC_FEAT_CLKCTLST;
        bgmac->feature_flags |= BGMAC_FEAT_NO_RESET;
-       bgmac->feature_flags |= BGMAC_FEAT_FORCE_SPEED_2500;
        bgmac->feature_flags |= BGMAC_FEAT_CMDCFG_SR_REV4;
        bgmac->feature_flags |= BGMAC_FEAT_TX_MASK_SETUP;
        bgmac->feature_flags |= BGMAC_FEAT_RX_MASK_SETUP;
@@ -142,6 +199,14 @@ static int bgmac_probe(struct platform_device *pdev)
        if (IS_ERR(bgmac->plat.idm_base))
                return PTR_ERR(bgmac->plat.idm_base);
 
+       regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, "nicpm_base");
+       if (regs) {
+               bgmac->plat.nicpm_base = devm_ioremap_resource(&pdev->dev,
+                                                              regs);
+               if (IS_ERR(bgmac->plat.nicpm_base))
+                       return PTR_ERR(bgmac->plat.nicpm_base);
+       }
+
        bgmac->read = platform_bgmac_read;
        bgmac->write = platform_bgmac_write;
        bgmac->idm_read = platform_bgmac_idm_read;
@@ -151,6 +216,12 @@ static int bgmac_probe(struct platform_device *pdev)
        bgmac->cco_ctl_maskset = platform_bgmac_cco_ctl_maskset;
        bgmac->get_bus_clock = platform_bgmac_get_bus_clock;
        bgmac->cmn_maskset32 = platform_bgmac_cmn_maskset32;
+       if (of_parse_phandle(np, "phy-handle", 0)) {
+               bgmac->phy_connect = platform_phy_connect;
+       } else {
+               bgmac->phy_connect = bgmac_phy_connect_direct;
+               bgmac->feature_flags |= BGMAC_FEAT_FORCE_SPEED_2500;
+       }
 
        return bgmac_enet_probe(bgmac);
 }
@@ -167,6 +238,7 @@ static int bgmac_remove(struct platform_device *pdev)
 static const struct of_device_id bgmac_of_enet_match[] = {
        {.compatible = "brcm,amac",},
        {.compatible = "brcm,nsp-amac",},
+       {.compatible = "brcm,ns2-amac",},
        {},
 };
 
index 856379cbb40265ed8e4e34a9e23ded1a32ecda0b..0e066dc6b8cc32436f0a5bbcab0505d7af376c1a 100644 (file)
@@ -307,6 +307,10 @@ static void bgmac_dma_rx_enable(struct bgmac *bgmac,
        u32 ctl;
 
        ctl = bgmac_read(bgmac, ring->mmio_base + BGMAC_DMA_RX_CTL);
+
+       /* preserve ONLY bits 16-17 from current hardware value */
+       ctl &= BGMAC_DMA_RX_ADDREXT_MASK;
+
        if (bgmac->feature_flags & BGMAC_FEAT_RX_MASK_SETUP) {
                ctl &= ~BGMAC_DMA_RX_BL_MASK;
                ctl |= BGMAC_DMA_RX_BL_128 << BGMAC_DMA_RX_BL_SHIFT;
@@ -317,7 +321,6 @@ static void bgmac_dma_rx_enable(struct bgmac *bgmac,
                ctl &= ~BGMAC_DMA_RX_PT_MASK;
                ctl |= BGMAC_DMA_RX_PT_1 << BGMAC_DMA_RX_PT_SHIFT;
        }
-       ctl &= BGMAC_DMA_RX_ADDREXT_MASK;
        ctl |= BGMAC_DMA_RX_ENABLE;
        ctl |= BGMAC_DMA_RX_PARITY_DISABLE;
        ctl |= BGMAC_DMA_RX_OVERFLOW_CONT;
@@ -1046,9 +1049,9 @@ static void bgmac_enable(struct bgmac *bgmac)
 
        mode = (bgmac_read(bgmac, BGMAC_DEV_STATUS) & BGMAC_DS_MM_MASK) >>
                BGMAC_DS_MM_SHIFT;
-       if (!(bgmac->feature_flags & BGMAC_FEAT_CLKCTLST) || mode != 0)
+       if (bgmac->feature_flags & BGMAC_FEAT_CLKCTLST || mode != 0)
                bgmac_set(bgmac, BCMA_CLKCTLST, BCMA_CLKCTLST_FORCEHT);
-       if (bgmac->feature_flags & BGMAC_FEAT_CLKCTLST && mode == 2)
+       if (!(bgmac->feature_flags & BGMAC_FEAT_CLKCTLST) && mode == 2)
                bgmac_cco_ctl_maskset(bgmac, 1, ~0,
                                      BGMAC_CHIPCTL_1_RXC_DLL_BYPASS);
 
@@ -1082,6 +1085,9 @@ static void bgmac_enable(struct bgmac *bgmac)
 /* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/chipinit */
 static void bgmac_chip_init(struct bgmac *bgmac)
 {
+       /* Clear any erroneously pending interrupts */
+       bgmac_write(bgmac, BGMAC_INT_STATUS, ~0);
+
        /* 1 interrupt per received frame */
        bgmac_write(bgmac, BGMAC_INT_RECV_LAZY, 1 << BGMAC_IRL_FC_SHIFT);
 
@@ -1388,7 +1394,7 @@ static const struct ethtool_ops bgmac_ethtool_ops = {
  * MII
  **************************************************/
 
-static void bgmac_adjust_link(struct net_device *net_dev)
+void bgmac_adjust_link(struct net_device *net_dev)
 {
        struct bgmac *bgmac = netdev_priv(net_dev);
        struct phy_device *phy_dev = net_dev->phydev;
@@ -1411,8 +1417,9 @@ static void bgmac_adjust_link(struct net_device *net_dev)
                phy_print_status(phy_dev);
        }
 }
+EXPORT_SYMBOL_GPL(bgmac_adjust_link);
 
-static int bgmac_phy_connect_direct(struct bgmac *bgmac)
+int bgmac_phy_connect_direct(struct bgmac *bgmac)
 {
        struct fixed_phy_status fphy_status = {
                .link = 1,
@@ -1437,24 +1444,7 @@ static int bgmac_phy_connect_direct(struct bgmac *bgmac)
 
        return err;
 }
-
-static int bgmac_phy_connect(struct bgmac *bgmac)
-{
-       struct phy_device *phy_dev;
-       char bus_id[MII_BUS_ID_SIZE + 3];
-
-       /* Connect to the PHY */
-       snprintf(bus_id, sizeof(bus_id), PHY_ID_FMT, bgmac->mii_bus->id,
-                bgmac->phyaddr);
-       phy_dev = phy_connect(bgmac->net_dev, bus_id, &bgmac_adjust_link,
-                             PHY_INTERFACE_MODE_MII);
-       if (IS_ERR(phy_dev)) {
-               dev_err(bgmac->dev, "PHY connecton failed\n");
-               return PTR_ERR(phy_dev);
-       }
-
-       return 0;
-}
+EXPORT_SYMBOL_GPL(bgmac_phy_connect_direct);
 
 int bgmac_enet_probe(struct bgmac *info)
 {
@@ -1507,10 +1497,7 @@ int bgmac_enet_probe(struct bgmac *info)
 
        netif_napi_add(net_dev, &bgmac->napi, bgmac_poll, BGMAC_WEIGHT);
 
-       if (!bgmac->mii_bus)
-               err = bgmac_phy_connect_direct(bgmac);
-       else
-               err = bgmac_phy_connect(bgmac);
+       err = bgmac_phy_connect(bgmac);
        if (err) {
                dev_err(bgmac->dev, "Cannot connect to phy\n");
                goto err_dma_free;
index 80836b4c9f3814cf115f85a8eb093e54538acd99..71f493f2451f7d333492965d1bc9da94a2e55695 100644 (file)
@@ -463,6 +463,7 @@ struct bgmac {
                struct {
                        void *base;
                        void *idm_base;
+                       void *nicpm_base;
                } plat;
                struct {
                        struct bcma_device *core;
@@ -513,10 +514,13 @@ struct bgmac {
        u32 (*get_bus_clock)(struct bgmac *bgmac);
        void (*cmn_maskset32)(struct bgmac *bgmac, u16 offset, u32 mask,
                              u32 set);
+       int (*phy_connect)(struct bgmac *bgmac);
 };
 
 int bgmac_enet_probe(struct bgmac *info);
 void bgmac_enet_remove(struct bgmac *bgmac);
+void bgmac_adjust_link(struct net_device *net_dev);
+int bgmac_phy_connect_direct(struct bgmac *bgmac);
 
 struct mii_bus *bcma_mdio_mii_register(struct bcma_device *core, u8 phyaddr);
 void bcma_mdio_mii_unregister(struct mii_bus *mii_bus);
@@ -583,4 +587,9 @@ static inline void bgmac_set(struct bgmac *bgmac, u16 offset, u32 set)
 {
        bgmac_maskset(bgmac, offset, ~0, set);
 }
+
+static inline int bgmac_phy_connect(struct bgmac *bgmac)
+{
+       return bgmac->phy_connect(bgmac);
+}
 #endif /* _BGMAC_H */
index 2a5df3f71e9f18f39da8325631cf274d59037fb3..d5d1026be4b70a320c48af7545a1891c74e19a47 100644 (file)
@@ -49,6 +49,7 @@
 #include <linux/firmware.h>
 #include <linux/log2.h>
 #include <linux/aer.h>
+#include <linux/crash_dump.h>
 
 #if IS_ENABLED(CONFIG_CNIC)
 #define BCM_CNIC 1
@@ -253,13 +254,10 @@ static inline u32 bnx2_tx_avail(struct bnx2 *bp, struct bnx2_tx_ring_info *txr)
 {
        u32 diff;
 
-       /* Tell compiler to fetch tx_prod and tx_cons from memory. */
-       barrier();
-
        /* The ring uses 256 indices for 255 entries, one of them
         * needs to be skipped.
         */
-       diff = txr->tx_prod - txr->tx_cons;
+       diff = READ_ONCE(txr->tx_prod) - READ_ONCE(txr->tx_cons);
        if (unlikely(diff >= BNX2_TX_DESC_CNT)) {
                diff &= 0xffff;
                if (diff == BNX2_TX_DESC_CNT)
@@ -271,22 +269,25 @@ static inline u32 bnx2_tx_avail(struct bnx2 *bp, struct bnx2_tx_ring_info *txr)
 static u32
 bnx2_reg_rd_ind(struct bnx2 *bp, u32 offset)
 {
+       unsigned long flags;
        u32 val;
 
-       spin_lock_bh(&bp->indirect_lock);
+       spin_lock_irqsave(&bp->indirect_lock, flags);
        BNX2_WR(bp, BNX2_PCICFG_REG_WINDOW_ADDRESS, offset);
        val = BNX2_RD(bp, BNX2_PCICFG_REG_WINDOW);
-       spin_unlock_bh(&bp->indirect_lock);
+       spin_unlock_irqrestore(&bp->indirect_lock, flags);
        return val;
 }
 
 static void
 bnx2_reg_wr_ind(struct bnx2 *bp, u32 offset, u32 val)
 {
-       spin_lock_bh(&bp->indirect_lock);
+       unsigned long flags;
+
+       spin_lock_irqsave(&bp->indirect_lock, flags);
        BNX2_WR(bp, BNX2_PCICFG_REG_WINDOW_ADDRESS, offset);
        BNX2_WR(bp, BNX2_PCICFG_REG_WINDOW, val);
-       spin_unlock_bh(&bp->indirect_lock);
+       spin_unlock_irqrestore(&bp->indirect_lock, flags);
 }
 
 static void
@@ -304,8 +305,10 @@ bnx2_shmem_rd(struct bnx2 *bp, u32 offset)
 static void
 bnx2_ctx_wr(struct bnx2 *bp, u32 cid_addr, u32 offset, u32 val)
 {
+       unsigned long flags;
+
        offset += cid_addr;
-       spin_lock_bh(&bp->indirect_lock);
+       spin_lock_irqsave(&bp->indirect_lock, flags);
        if (BNX2_CHIP(bp) == BNX2_CHIP_5709) {
                int i;
 
@@ -322,7 +325,7 @@ bnx2_ctx_wr(struct bnx2 *bp, u32 cid_addr, u32 offset, u32 val)
                BNX2_WR(bp, BNX2_CTX_DATA_ADR, offset);
                BNX2_WR(bp, BNX2_CTX_DATA, val);
        }
-       spin_unlock_bh(&bp->indirect_lock);
+       spin_unlock_irqrestore(&bp->indirect_lock, flags);
 }
 
 #ifdef BCM_CNIC
@@ -2833,10 +2836,8 @@ bnx2_get_hw_tx_cons(struct bnx2_napi *bnapi)
 {
        u16 cons;
 
-       /* Tell compiler that status block fields can change. */
-       barrier();
-       cons = *bnapi->hw_tx_cons_ptr;
-       barrier();
+       cons = READ_ONCE(*bnapi->hw_tx_cons_ptr);
+
        if (unlikely((cons & BNX2_MAX_TX_DESC_CNT) == BNX2_MAX_TX_DESC_CNT))
                cons++;
        return cons;
@@ -3135,10 +3136,8 @@ bnx2_get_hw_rx_cons(struct bnx2_napi *bnapi)
 {
        u16 cons;
 
-       /* Tell compiler that status block fields can change. */
-       barrier();
-       cons = *bnapi->hw_rx_cons_ptr;
-       barrier();
+       cons = READ_ONCE(*bnapi->hw_rx_cons_ptr);
+
        if (unlikely((cons & BNX2_MAX_RX_DESC_CNT) == BNX2_MAX_RX_DESC_CNT))
                cons++;
        return cons;
@@ -4759,15 +4758,16 @@ bnx2_setup_msix_tbl(struct bnx2 *bp)
        BNX2_WR(bp, BNX2_PCI_GRC_WINDOW3_ADDR, BNX2_MSIX_PBA_ADDR);
 }
 
-static int
-bnx2_reset_chip(struct bnx2 *bp, u32 reset_code)
+static void
+bnx2_wait_dma_complete(struct bnx2 *bp)
 {
        u32 val;
-       int i, rc = 0;
-       u8 old_port;
+       int i;
 
-       /* Wait for the current PCI transaction to complete before
-        * issuing a reset. */
+       /*
+        * Wait for the current PCI transaction to complete before
+        * issuing a reset.
+        */
        if ((BNX2_CHIP(bp) == BNX2_CHIP_5706) ||
            (BNX2_CHIP(bp) == BNX2_CHIP_5708)) {
                BNX2_WR(bp, BNX2_MISC_ENABLE_CLR_BITS,
@@ -4791,6 +4791,21 @@ bnx2_reset_chip(struct bnx2 *bp, u32 reset_code)
                }
        }
 
+       return;
+}
+
+
+static int
+bnx2_reset_chip(struct bnx2 *bp, u32 reset_code)
+{
+       u32 val;
+       int i, rc = 0;
+       u8 old_port;
+
+       /* Wait for the current PCI transaction to complete before
+        * issuing a reset. */
+       bnx2_wait_dma_complete(bp);
+
        /* Wait for the firmware to tell us it is ok to issue a reset. */
        bnx2_fw_sync(bp, BNX2_DRV_MSG_DATA_WAIT0 | reset_code, 1, 1);
 
@@ -6356,6 +6371,10 @@ bnx2_open(struct net_device *dev)
        struct bnx2 *bp = netdev_priv(dev);
        int rc;
 
+       rc = bnx2_request_firmware(bp);
+       if (rc < 0)
+               goto out;
+
        netif_carrier_off(dev);
 
        bnx2_disable_int(bp);
@@ -6424,6 +6443,7 @@ open_err:
        bnx2_free_irq(bp);
        bnx2_free_mem(bp);
        bnx2_del_napi(bp);
+       bnx2_release_firmware(bp);
        goto out;
 }
 
@@ -6877,12 +6897,14 @@ bnx2_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *net_stats)
 /* All ethtool functions called with rtnl_lock */
 
 static int
-bnx2_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+bnx2_get_link_ksettings(struct net_device *dev,
+                       struct ethtool_link_ksettings *cmd)
 {
        struct bnx2 *bp = netdev_priv(dev);
        int support_serdes = 0, support_copper = 0;
+       u32 supported, advertising;
 
-       cmd->supported = SUPPORTED_Autoneg;
+       supported = SUPPORTED_Autoneg;
        if (bp->phy_flags & BNX2_PHY_FLAG_REMOTE_PHY_CAP) {
                support_serdes = 1;
                support_copper = 1;
@@ -6892,56 +6914,59 @@ bnx2_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
                support_copper = 1;
 
        if (support_serdes) {
-               cmd->supported |= SUPPORTED_1000baseT_Full |
+               supported |= SUPPORTED_1000baseT_Full |
                        SUPPORTED_FIBRE;
                if (bp->phy_flags & BNX2_PHY_FLAG_2_5G_CAPABLE)
-                       cmd->supported |= SUPPORTED_2500baseX_Full;
-
+                       supported |= SUPPORTED_2500baseX_Full;
        }
        if (support_copper) {
-               cmd->supported |= SUPPORTED_10baseT_Half |
+               supported |= SUPPORTED_10baseT_Half |
                        SUPPORTED_10baseT_Full |
                        SUPPORTED_100baseT_Half |
                        SUPPORTED_100baseT_Full |
                        SUPPORTED_1000baseT_Full |
                        SUPPORTED_TP;
-
        }
 
        spin_lock_bh(&bp->phy_lock);
-       cmd->port = bp->phy_port;
-       cmd->advertising = bp->advertising;
+       cmd->base.port = bp->phy_port;
+       advertising = bp->advertising;
 
        if (bp->autoneg & AUTONEG_SPEED) {
-               cmd->autoneg = AUTONEG_ENABLE;
+               cmd->base.autoneg = AUTONEG_ENABLE;
        } else {
-               cmd->autoneg = AUTONEG_DISABLE;
+               cmd->base.autoneg = AUTONEG_DISABLE;
        }
 
        if (netif_carrier_ok(dev)) {
-               ethtool_cmd_speed_set(cmd, bp->line_speed);
-               cmd->duplex = bp->duplex;
+               cmd->base.speed = bp->line_speed;
+               cmd->base.duplex = bp->duplex;
                if (!(bp->phy_flags & BNX2_PHY_FLAG_SERDES)) {
                        if (bp->phy_flags & BNX2_PHY_FLAG_MDIX)
-                               cmd->eth_tp_mdix = ETH_TP_MDI_X;
+                               cmd->base.eth_tp_mdix = ETH_TP_MDI_X;
                        else
-                               cmd->eth_tp_mdix = ETH_TP_MDI;
+                               cmd->base.eth_tp_mdix = ETH_TP_MDI;
                }
        }
        else {
-               ethtool_cmd_speed_set(cmd, SPEED_UNKNOWN);
-               cmd->duplex = DUPLEX_UNKNOWN;
+               cmd->base.speed = SPEED_UNKNOWN;
+               cmd->base.duplex = DUPLEX_UNKNOWN;
        }
        spin_unlock_bh(&bp->phy_lock);
 
-       cmd->transceiver = XCVR_INTERNAL;
-       cmd->phy_address = bp->phy_addr;
+       cmd->base.phy_address = bp->phy_addr;
+
+       ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+                                               supported);
+       ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
+                                               advertising);
 
        return 0;
 }
 
 static int
-bnx2_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+bnx2_set_link_ksettings(struct net_device *dev,
+                       const struct ethtool_link_ksettings *cmd)
 {
        struct bnx2 *bp = netdev_priv(dev);
        u8 autoneg = bp->autoneg;
@@ -6952,24 +6977,26 @@ bnx2_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 
        spin_lock_bh(&bp->phy_lock);
 
-       if (cmd->port != PORT_TP && cmd->port != PORT_FIBRE)
+       if (cmd->base.port != PORT_TP && cmd->base.port != PORT_FIBRE)
                goto err_out_unlock;
 
-       if (cmd->port != bp->phy_port &&
+       if (cmd->base.port != bp->phy_port &&
            !(bp->phy_flags & BNX2_PHY_FLAG_REMOTE_PHY_CAP))
                goto err_out_unlock;
 
        /* If device is down, we can store the settings only if the user
         * is setting the currently active port.
         */
-       if (!netif_running(dev) && cmd->port != bp->phy_port)
+       if (!netif_running(dev) && cmd->base.port != bp->phy_port)
                goto err_out_unlock;
 
-       if (cmd->autoneg == AUTONEG_ENABLE) {
+       if (cmd->base.autoneg == AUTONEG_ENABLE) {
                autoneg |= AUTONEG_SPEED;
 
-               advertising = cmd->advertising;
-               if (cmd->port == PORT_TP) {
+               ethtool_convert_link_mode_to_legacy_u32(
+                       &advertising, cmd->link_modes.advertising);
+
+               if (cmd->base.port == PORT_TP) {
                        advertising &= ETHTOOL_ALL_COPPER_SPEED;
                        if (!advertising)
                                advertising = ETHTOOL_ALL_COPPER_SPEED;
@@ -6981,11 +7008,12 @@ bnx2_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
                advertising |= ADVERTISED_Autoneg;
        }
        else {
-               u32 speed = ethtool_cmd_speed(cmd);
-               if (cmd->port == PORT_FIBRE) {
+               u32 speed = cmd->base.speed;
+
+               if (cmd->base.port == PORT_FIBRE) {
                        if ((speed != SPEED_1000 &&
                             speed != SPEED_2500) ||
-                           (cmd->duplex != DUPLEX_FULL))
+                           (cmd->base.duplex != DUPLEX_FULL))
                                goto err_out_unlock;
 
                        if (speed == SPEED_2500 &&
@@ -6996,7 +7024,7 @@ bnx2_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 
                autoneg &= ~AUTONEG_SPEED;
                req_line_speed = speed;
-               req_duplex = cmd->duplex;
+               req_duplex = cmd->base.duplex;
                advertising = 0;
        }
 
@@ -7010,7 +7038,7 @@ bnx2_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
         * brought up.
         */
        if (netif_running(dev))
-               err = bnx2_setup_phy(bp, cmd->port);
+               err = bnx2_setup_phy(bp, cmd->base.port);
 
 err_out_unlock:
        spin_unlock_bh(&bp->phy_lock);
@@ -7795,8 +7823,6 @@ static int bnx2_set_channels(struct net_device *dev,
 }
 
 static const struct ethtool_ops bnx2_ethtool_ops = {
-       .get_settings           = bnx2_get_settings,
-       .set_settings           = bnx2_set_settings,
        .get_drvinfo            = bnx2_get_drvinfo,
        .get_regs_len           = bnx2_get_regs_len,
        .get_regs               = bnx2_get_regs,
@@ -7820,6 +7846,8 @@ static const struct ethtool_ops bnx2_ethtool_ops = {
        .get_sset_count         = bnx2_get_sset_count,
        .get_channels           = bnx2_get_channels,
        .set_channels           = bnx2_set_channels,
+       .get_link_ksettings     = bnx2_get_link_ksettings,
+       .set_link_ksettings     = bnx2_set_link_ksettings,
 };
 
 /* Called with rtnl_lock */
@@ -8566,12 +8594,15 @@ bnx2_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 
        pci_set_drvdata(pdev, dev);
 
-       rc = bnx2_request_firmware(bp);
-       if (rc < 0)
-               goto error;
-
+       /*
+        * In-flight DMA from 1st kernel could continue going in kdump kernel.
+        * New io-page table has been created before bnx2 does reset at open stage.
+        * We have to wait for the in-flight DMA to complete to avoid it look up
+        * into the newly created io-page table.
+        */
+       if (is_kdump_kernel())
+               bnx2_wait_dma_complete(bp);
 
-       bnx2_reset_chip(bp, BNX2_DRV_MSG_CODE_RESET);
        memcpy(dev->dev_addr, bp->mac_addr, ETH_ALEN);
 
        dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_SG |
@@ -8606,7 +8637,6 @@ bnx2_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
        return 0;
 
 error:
-       bnx2_release_firmware(bp);
        pci_iounmap(pdev, bp->regview);
        pci_release_regions(pdev);
        pci_disable_device(pdev);
index ed42c100968551525fd393212cfb68ed1723e9c0..3fd36b421d51bcafeb4591832253622db05038d9 100644 (file)
@@ -3248,13 +3248,14 @@ static int bnx2x_poll(struct napi_struct *napi, int budget)
                        rmb();
 
                        if (!(bnx2x_has_rx_work(fp) || bnx2x_has_tx_work(fp))) {
-                               napi_complete(napi);
-                               /* Re-enable interrupts */
-                               DP(NETIF_MSG_RX_STATUS,
-                                  "Update index to %d\n", fp->fp_hc_idx);
-                               bnx2x_ack_sb(bp, fp->igu_sb_id, USTORM_ID,
-                                            le16_to_cpu(fp->fp_hc_idx),
-                                            IGU_INT_ENABLE, 1);
+                               if (napi_complete_done(napi, rx_work_done)) {
+                                       /* Re-enable interrupts */
+                                       DP(NETIF_MSG_RX_STATUS,
+                                          "Update index to %d\n", fp->fp_hc_idx);
+                                       bnx2x_ack_sb(bp, fp->igu_sb_id, USTORM_ID,
+                                                    le16_to_cpu(fp->fp_hc_idx),
+                                                    IGU_INT_ENABLE, 1);
+                               }
                        } else {
                                rx_work_done = budget;
                        }
index 67b6180bdbf684c6447178f311388be3c25bfa90..ab990da677d56a798bc93c6d1db6ee8418b2777e 100644 (file)
@@ -15244,7 +15244,7 @@ static void bnx2x_init_cyclecounter(struct bnx2x *bp)
        memset(&bp->cyclecounter, 0, sizeof(bp->cyclecounter));
        bp->cyclecounter.read = bnx2x_cyclecounter_read;
        bp->cyclecounter.mask = CYCLECOUNTER_MASK(64);
-       bp->cyclecounter.shift = 1;
+       bp->cyclecounter.shift = 0;
        bp->cyclecounter.mult = 1;
 }
 
index a042da1ff4b90e9aae4f76db71c99c2c4da321d3..0e4f168bea9e0f0d816101534f3a3c3ffccc4379 100644 (file)
@@ -1499,6 +1499,7 @@ static int bnxt_async_event_process(struct bnxt *bp,
                        netdev_warn(bp->dev, "Link speed %d no longer supported\n",
                                    speed);
                }
+               set_bit(BNXT_LINK_SPEED_CHNG_SP_EVENT, &bp->sp_event);
                /* fall thru */
        }
        case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_STATUS_CHANGE:
@@ -1811,6 +1812,9 @@ static int bnxt_busy_poll(struct napi_struct *napi)
        if (atomic_read(&bp->intr_sem) != 0)
                return LL_FLUSH_FAILED;
 
+       if (!bp->link_info.link_up)
+               return LL_FLUSH_FAILED;
+
        if (!bnxt_lock_poll(bnapi))
                return LL_FLUSH_BUSY;
 
@@ -3210,11 +3214,17 @@ static int bnxt_hwrm_tunnel_dst_port_alloc(struct bnxt *bp, __be16 port,
                goto err_out;
        }
 
-       if (tunnel_type & TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_VXLAN)
+       switch (tunnel_type) {
+       case TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_VXLAN:
                bp->vxlan_fw_dst_port_id = resp->tunnel_dst_port_id;
-
-       else if (tunnel_type & TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_GENEVE)
+               break;
+       case TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_GENEVE:
                bp->nge_fw_dst_port_id = resp->tunnel_dst_port_id;
+               break;
+       default:
+               break;
+       }
+
 err_out:
        mutex_unlock(&bp->hwrm_cmd_lock);
        return rc;
@@ -3424,13 +3434,7 @@ static int bnxt_hwrm_vnic_set_rss(struct bnxt *bp, u16 vnic_id, bool set_rss)
 
        bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_VNIC_RSS_CFG, -1, -1);
        if (set_rss) {
-               vnic->hash_type = VNIC_RSS_CFG_REQ_HASH_TYPE_IPV4 |
-                                 VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV4 |
-                                 VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6 |
-                                 VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV6;
-
-               req.hash_type = cpu_to_le32(vnic->hash_type);
-
+               req.hash_type = cpu_to_le32(bp->rss_hash_cfg);
                if (vnic->flags & BNXT_VNIC_RSS_FLAG) {
                        if (BNXT_CHIP_TYPE_NITRO_A0(bp))
                                max_rings = bp->rx_nr_rings - 1;
@@ -4934,6 +4938,10 @@ static void bnxt_del_napi(struct bnxt *bp)
                napi_hash_del(&bnapi->napi);
                netif_napi_del(&bnapi->napi);
        }
+       /* We called napi_hash_del() before netif_napi_del(), we need
+        * to respect an RCU grace period before freeing napi structures.
+        */
+       synchronize_net();
 }
 
 static void bnxt_init_napi(struct bnxt *bp)
@@ -4954,7 +4962,6 @@ static void bnxt_init_napi(struct bnxt *bp)
                        bnapi = bp->bnapi[cp_nr_rings];
                        netif_napi_add(bp->dev, &bnapi->napi,
                                       bnxt_poll_nitroa0, 64);
-                       napi_hash_add(&bnapi->napi);
                }
        } else {
                bnapi = bp->bnapi[0];
@@ -5096,6 +5103,7 @@ static int bnxt_update_link(struct bnxt *bp, bool chng_link_state)
        struct hwrm_port_phy_qcfg_input req = {0};
        struct hwrm_port_phy_qcfg_output *resp = bp->hwrm_cmd_resp_addr;
        u8 link_up = link_info->link_up;
+       u16 diff;
 
        bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_PORT_PHY_QCFG, -1, -1);
 
@@ -5183,6 +5191,23 @@ static int bnxt_update_link(struct bnxt *bp, bool chng_link_state)
                link_info->link_up = 0;
        }
        mutex_unlock(&bp->hwrm_cmd_lock);
+
+       diff = link_info->support_auto_speeds ^ link_info->advertising;
+       if ((link_info->support_auto_speeds | diff) !=
+           link_info->support_auto_speeds) {
+               /* An advertised speed is no longer supported, so we need to
+                * update the advertisement settings.  See bnxt_reset() for
+                * comments about the rtnl_lock() sequence below.
+                */
+               clear_bit(BNXT_STATE_IN_SP_TASK, &bp->state);
+               rtnl_lock();
+               link_info->advertising = link_info->support_auto_speeds;
+               if (test_bit(BNXT_STATE_OPEN, &bp->state) &&
+                   (link_info->autoneg & BNXT_AUTONEG_SPEED))
+                       bnxt_hwrm_set_link_setting(bp, true, false);
+               set_bit(BNXT_STATE_IN_SP_TASK, &bp->state);
+               rtnl_unlock();
+       }
        return 0;
 }
 
@@ -5347,7 +5372,7 @@ static int bnxt_hwrm_shutdown_link(struct bnxt *bp)
                return 0;
 
        bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_PORT_PHY_CFG, -1, -1);
-       req.flags = cpu_to_le32(PORT_PHY_CFG_REQ_FLAGS_FORCE_LINK_DOWN);
+       req.flags = cpu_to_le32(PORT_PHY_CFG_REQ_FLAGS_FORCE_LINK_DWN);
        return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
 }
 
@@ -5410,6 +5435,12 @@ static int bnxt_update_phy_setting(struct bnxt *bp)
                        update_link = true;
        }
 
+       /* The last close may have shutdown the link, so need to call
+        * PHY_CFG to bring it back up.
+        */
+       if (!netif_carrier_ok(bp->dev))
+               update_link = true;
+
        if (!bnxt_eee_config_ok(bp))
                update_eee = true;
 
@@ -6103,6 +6134,10 @@ static void bnxt_sp_task(struct work_struct *work)
        if (test_and_clear_bit(BNXT_RX_NTP_FLTR_SP_EVENT, &bp->sp_event))
                bnxt_cfg_ntp_filters(bp);
        if (test_and_clear_bit(BNXT_LINK_CHNG_SP_EVENT, &bp->sp_event)) {
+               if (test_and_clear_bit(BNXT_LINK_SPEED_CHNG_SP_EVENT,
+                                      &bp->sp_event))
+                       bnxt_hwrm_phy_qcaps(bp);
+
                rc = bnxt_update_link(bp, true);
                if (rc)
                        netdev_err(bp->dev, "SP task can't update link (rc: %x)\n",
@@ -6306,6 +6341,7 @@ static int bnxt_setup_tc(struct net_device *dev, u32 handle, __be16 proto,
                         struct tc_to_netdev *ntc)
 {
        struct bnxt *bp = netdev_priv(dev);
+       bool sh = false;
        u8 tc;
 
        if (ntc->type != TC_SETUP_MQPRIO)
@@ -6322,12 +6358,11 @@ static int bnxt_setup_tc(struct net_device *dev, u32 handle, __be16 proto,
        if (netdev_get_num_tc(dev) == tc)
                return 0;
 
+       if (bp->flags & BNXT_FLAG_SHARED_RINGS)
+               sh = true;
+
        if (tc) {
                int max_rx_rings, max_tx_rings, rc;
-               bool sh = false;
-
-               if (bp->flags & BNXT_FLAG_SHARED_RINGS)
-                       sh = true;
 
                rc = bnxt_get_max_rings(bp, &max_rx_rings, &max_tx_rings, sh);
                if (rc || bp->tx_nr_rings_per_tc * tc > max_tx_rings)
@@ -6345,7 +6380,8 @@ static int bnxt_setup_tc(struct net_device *dev, u32 handle, __be16 proto,
                bp->tx_nr_rings = bp->tx_nr_rings_per_tc;
                netdev_reset_tc(dev);
        }
-       bp->cp_nr_rings = max_t(int, bp->tx_nr_rings, bp->rx_nr_rings);
+       bp->cp_nr_rings = sh ? max_t(int, bp->tx_nr_rings, bp->rx_nr_rings) :
+                              bp->tx_nr_rings + bp->rx_nr_rings;
        bp->num_stat_ctxs = bp->cp_nr_rings;
 
        if (netif_running(bp->dev))
@@ -6911,6 +6947,19 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 #endif
        bnxt_set_dflt_rings(bp);
 
+       /* Default RSS hash cfg. */
+       bp->rss_hash_cfg = VNIC_RSS_CFG_REQ_HASH_TYPE_IPV4 |
+                          VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV4 |
+                          VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6 |
+                          VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV6;
+       if (!BNXT_CHIP_NUM_57X0X(bp->chip_num) &&
+           !BNXT_CHIP_TYPE_NITRO_A0(bp) &&
+           bp->hwrm_spec_code >= 0x10501) {
+               bp->flags |= BNXT_FLAG_UDP_RSS_CAP;
+               bp->rss_hash_cfg |= VNIC_RSS_CFG_REQ_HASH_TYPE_UDP_IPV4 |
+                                   VNIC_RSS_CFG_REQ_HASH_TYPE_UDP_IPV6;
+       }
+
        if (BNXT_PF(bp) && !BNXT_CHIP_TYPE_NITRO_A0(bp)) {
                dev->hw_features |= NETIF_F_NTUPLE;
                if (bnxt_rfs_capable(bp)) {
index 51b164a0e8442200c55bd4dd9a54034c27bbb9fc..47be7894c67bcea3e1f1002fcfc88a1992d10b22 100644 (file)
@@ -700,7 +700,6 @@ struct bnxt_vnic_info {
        u8              *uc_list;
 
        u16             *fw_grp_ids;
-       u16             hash_type;
        dma_addr_t      rss_table_dma_addr;
        __le16          *rss_table;
        dma_addr_t      rss_hash_key_dma_addr;
@@ -952,6 +951,7 @@ struct bnxt {
        #define BNXT_FLAG_RFS           0x100
        #define BNXT_FLAG_SHARED_RINGS  0x200
        #define BNXT_FLAG_PORT_STATS    0x400
+       #define BNXT_FLAG_UDP_RSS_CAP   0x800
        #define BNXT_FLAG_EEE_CAP       0x1000
        #define BNXT_FLAG_CHIP_NITRO_A0 0x1000000
 
@@ -1007,6 +1007,7 @@ struct bnxt {
        struct bnxt_ring_grp_info       *grp_info;
        struct bnxt_vnic_info   *vnic_info;
        int                     nr_vnics;
+       u32                     rss_hash_cfg;
 
        u8                      max_tc;
        struct bnxt_queue_info  q_info[BNXT_MAX_QUEUE];
@@ -1089,6 +1090,7 @@ struct bnxt {
 #define BNXT_RESET_TASK_SILENT_SP_EVENT        11
 #define BNXT_GENEVE_ADD_PORT_SP_EVENT  12
 #define BNXT_GENEVE_DEL_PORT_SP_EVENT  13
+#define BNXT_LINK_SPEED_CHNG_SP_EVENT  14
 
        struct bnxt_pf_info     pf;
 #ifdef CONFIG_BNXT_SRIOV
index a7e04ff4eaedefac2e01f6ecb90f224d63f50f6f..fa6125eb24afc576cddf2c1bd5128a0dda26fb18 100644 (file)
@@ -542,6 +542,146 @@ fltr_err:
 
        return rc;
 }
+#endif
+
+static u64 get_ethtool_ipv4_rss(struct bnxt *bp)
+{
+       if (bp->rss_hash_cfg & VNIC_RSS_CFG_REQ_HASH_TYPE_IPV4)
+               return RXH_IP_SRC | RXH_IP_DST;
+       return 0;
+}
+
+static u64 get_ethtool_ipv6_rss(struct bnxt *bp)
+{
+       if (bp->rss_hash_cfg & VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6)
+               return RXH_IP_SRC | RXH_IP_DST;
+       return 0;
+}
+
+static int bnxt_grxfh(struct bnxt *bp, struct ethtool_rxnfc *cmd)
+{
+       cmd->data = 0;
+       switch (cmd->flow_type) {
+       case TCP_V4_FLOW:
+               if (bp->rss_hash_cfg & VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV4)
+                       cmd->data |= RXH_IP_SRC | RXH_IP_DST |
+                                    RXH_L4_B_0_1 | RXH_L4_B_2_3;
+               cmd->data |= get_ethtool_ipv4_rss(bp);
+               break;
+       case UDP_V4_FLOW:
+               if (bp->rss_hash_cfg & VNIC_RSS_CFG_REQ_HASH_TYPE_UDP_IPV4)
+                       cmd->data |= RXH_IP_SRC | RXH_IP_DST |
+                                    RXH_L4_B_0_1 | RXH_L4_B_2_3;
+               /* fall through */
+       case SCTP_V4_FLOW:
+       case AH_ESP_V4_FLOW:
+       case AH_V4_FLOW:
+       case ESP_V4_FLOW:
+       case IPV4_FLOW:
+               cmd->data |= get_ethtool_ipv4_rss(bp);
+               break;
+
+       case TCP_V6_FLOW:
+               if (bp->rss_hash_cfg & VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV6)
+                       cmd->data |= RXH_IP_SRC | RXH_IP_DST |
+                                    RXH_L4_B_0_1 | RXH_L4_B_2_3;
+               cmd->data |= get_ethtool_ipv6_rss(bp);
+               break;
+       case UDP_V6_FLOW:
+               if (bp->rss_hash_cfg & VNIC_RSS_CFG_REQ_HASH_TYPE_UDP_IPV6)
+                       cmd->data |= RXH_IP_SRC | RXH_IP_DST |
+                                    RXH_L4_B_0_1 | RXH_L4_B_2_3;
+               /* fall through */
+       case SCTP_V6_FLOW:
+       case AH_ESP_V6_FLOW:
+       case AH_V6_FLOW:
+       case ESP_V6_FLOW:
+       case IPV6_FLOW:
+               cmd->data |= get_ethtool_ipv6_rss(bp);
+               break;
+       }
+       return 0;
+}
+
+#define RXH_4TUPLE (RXH_IP_SRC | RXH_IP_DST | RXH_L4_B_0_1 | RXH_L4_B_2_3)
+#define RXH_2TUPLE (RXH_IP_SRC | RXH_IP_DST)
+
+static int bnxt_srxfh(struct bnxt *bp, struct ethtool_rxnfc *cmd)
+{
+       u32 rss_hash_cfg = bp->rss_hash_cfg;
+       int tuple, rc = 0;
+
+       if (cmd->data == RXH_4TUPLE)
+               tuple = 4;
+       else if (cmd->data == RXH_2TUPLE)
+               tuple = 2;
+       else if (!cmd->data)
+               tuple = 0;
+       else
+               return -EINVAL;
+
+       if (cmd->flow_type == TCP_V4_FLOW) {
+               rss_hash_cfg &= ~VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV4;
+               if (tuple == 4)
+                       rss_hash_cfg |= VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV4;
+       } else if (cmd->flow_type == UDP_V4_FLOW) {
+               if (tuple == 4 && !(bp->flags & BNXT_FLAG_UDP_RSS_CAP))
+                       return -EINVAL;
+               rss_hash_cfg &= ~VNIC_RSS_CFG_REQ_HASH_TYPE_UDP_IPV4;
+               if (tuple == 4)
+                       rss_hash_cfg |= VNIC_RSS_CFG_REQ_HASH_TYPE_UDP_IPV4;
+       } else if (cmd->flow_type == TCP_V6_FLOW) {
+               rss_hash_cfg &= ~VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV6;
+               if (tuple == 4)
+                       rss_hash_cfg |= VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV6;
+       } else if (cmd->flow_type == UDP_V6_FLOW) {
+               if (tuple == 4 && !(bp->flags & BNXT_FLAG_UDP_RSS_CAP))
+                       return -EINVAL;
+               rss_hash_cfg &= ~VNIC_RSS_CFG_REQ_HASH_TYPE_UDP_IPV6;
+               if (tuple == 4)
+                       rss_hash_cfg |= VNIC_RSS_CFG_REQ_HASH_TYPE_UDP_IPV6;
+       } else if (tuple == 4) {
+               return -EINVAL;
+       }
+
+       switch (cmd->flow_type) {
+       case TCP_V4_FLOW:
+       case UDP_V4_FLOW:
+       case SCTP_V4_FLOW:
+       case AH_ESP_V4_FLOW:
+       case AH_V4_FLOW:
+       case ESP_V4_FLOW:
+       case IPV4_FLOW:
+               if (tuple == 2)
+                       rss_hash_cfg |= VNIC_RSS_CFG_REQ_HASH_TYPE_IPV4;
+               else if (!tuple)
+                       rss_hash_cfg &= ~VNIC_RSS_CFG_REQ_HASH_TYPE_IPV4;
+               break;
+
+       case TCP_V6_FLOW:
+       case UDP_V6_FLOW:
+       case SCTP_V6_FLOW:
+       case AH_ESP_V6_FLOW:
+       case AH_V6_FLOW:
+       case ESP_V6_FLOW:
+       case IPV6_FLOW:
+               if (tuple == 2)
+                       rss_hash_cfg |= VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6;
+               else if (!tuple)
+                       rss_hash_cfg &= ~VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6;
+               break;
+       }
+
+       if (bp->rss_hash_cfg == rss_hash_cfg)
+               return 0;
+
+       bp->rss_hash_cfg = rss_hash_cfg;
+       if (netif_running(bp->dev)) {
+               bnxt_close_nic(bp, false, false);
+               rc = bnxt_open_nic(bp, false, false);
+       }
+       return rc;
+}
 
 static int bnxt_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
                          u32 *rule_locs)
@@ -550,6 +690,7 @@ static int bnxt_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
        int rc = 0;
 
        switch (cmd->cmd) {
+#ifdef CONFIG_RFS_ACCEL
        case ETHTOOL_GRXRINGS:
                cmd->data = bp->rx_nr_rings;
                break;
@@ -566,6 +707,11 @@ static int bnxt_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
        case ETHTOOL_GRXCLSRULE:
                rc = bnxt_grxclsrule(bp, cmd);
                break;
+#endif
+
+       case ETHTOOL_GRXFH:
+               rc = bnxt_grxfh(bp, cmd);
+               break;
 
        default:
                rc = -EOPNOTSUPP;
@@ -574,7 +720,23 @@ static int bnxt_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
 
        return rc;
 }
-#endif
+
+static int bnxt_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
+{
+       struct bnxt *bp = netdev_priv(dev);
+       int rc;
+
+       switch (cmd->cmd) {
+       case ETHTOOL_SRXFH:
+               rc = bnxt_srxfh(bp, cmd);
+               break;
+
+       default:
+               rc = -EOPNOTSUPP;
+               break;
+       }
+       return rc;
+}
 
 static u32 bnxt_get_rxfh_indir_size(struct net_device *dev)
 {
@@ -1885,9 +2047,8 @@ const struct ethtool_ops bnxt_ethtool_ops = {
        .get_ringparam          = bnxt_get_ringparam,
        .get_channels           = bnxt_get_channels,
        .set_channels           = bnxt_set_channels,
-#ifdef CONFIG_RFS_ACCEL
        .get_rxnfc              = bnxt_get_rxnfc,
-#endif
+       .set_rxnfc              = bnxt_set_rxnfc,
        .get_rxfh_indir_size    = bnxt_get_rxfh_indir_size,
        .get_rxfh_key_size      = bnxt_get_rxfh_key_size,
        .get_rxfh               = bnxt_get_rxfh,
index 04a96cc3498a8035e8aa3820ba04d1a13b7b11e3..0456d5b5d689d84fe2155e15759c729633a07fdf 100644 (file)
@@ -215,6 +215,9 @@ struct hwrm_async_event_cmpl_dcb_config_change {
        __le16 event_id;
        #define HWRM_ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_EVENT_ID_DCB_CONFIG_CHANGE 0x3UL
        __le32 event_data2;
+       #define HWRM_ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_EVENT_DATA2_ETS 0x1UL
+       #define HWRM_ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_EVENT_DATA2_PFC 0x2UL
+       #define HWRM_ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_EVENT_DATA2_APP 0x4UL
        u8 opaque_v;
        #define HWRM_ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_V           0x1UL
        #define HWRM_ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_OPAQUE_MASK 0xfeUL
@@ -224,6 +227,14 @@ struct hwrm_async_event_cmpl_dcb_config_change {
        __le32 event_data1;
        #define HWRM_ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_EVENT_DATA1_PORT_ID_MASK 0xffffUL
        #define HWRM_ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_EVENT_DATA1_PORT_ID_SFT 0
+       #define HWRM_ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_EVENT_DATA1_RECOMMEND_ROCE_PRIORITY_MASK 0xff0000UL
+       #define HWRM_ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_EVENT_DATA1_RECOMMEND_ROCE_PRIORITY_SFT 16
+       #define HWRM_ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_EVENT_DATA1_RECOMMEND_ROCE_PRIORITY_NONE (0xffUL << 16)
+       #define HWRM_ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_EVENT_DATA1_RECOMMEND_ROCE_PRIORITY_LAST    HWRM_ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_EVENT_DATA1_RECOMMEND_ROCE_PRIORITY_NONE
+       #define HWRM_ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_EVENT_DATA1_RECOMMEND_L2_PRIORITY_MASK 0xff000000UL
+       #define HWRM_ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_EVENT_DATA1_RECOMMEND_L2_PRIORITY_SFT 24
+       #define HWRM_ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_EVENT_DATA1_RECOMMEND_L2_PRIORITY_NONE (0xffUL << 24)
+       #define HWRM_ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_EVENT_DATA1_RECOMMEND_L2_PRIORITY_LAST    HWRM_ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_EVENT_DATA1_RECOMMEND_L2_PRIORITY_NONE
 };
 
 /* HWRM Asynchronous Event Completion Record for port connection not allowed (16 bytes) */
@@ -485,12 +496,12 @@ struct hwrm_async_event_cmpl_hwrm_error {
        #define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA1_TIMESTAMP 0x1UL
 };
 
-/* HW Resource Manager Specification 1.5.1 */
+/* HW Resource Manager Specification 1.5.4 */
 #define HWRM_VERSION_MAJOR     1
 #define HWRM_VERSION_MINOR     5
-#define HWRM_VERSION_UPDATE    1
+#define HWRM_VERSION_UPDATE    4
 
-#define HWRM_VERSION_STR       "1.5.1"
+#define HWRM_VERSION_STR       "1.5.4"
 /*
  * Following is the signature for HWRM message field that indicates not
  * applicable (All F's). Need to cast it the size of the field if needed.
@@ -612,6 +623,9 @@ struct cmd_nums {
        #define HWRM_FW_QSTATUS                            (0xc1UL)
        #define HWRM_FW_SET_TIME                                   (0xc8UL)
        #define HWRM_FW_GET_TIME                                   (0xc9UL)
+       #define HWRM_FW_SET_STRUCTURED_DATA                        (0xcaUL)
+       #define HWRM_FW_GET_STRUCTURED_DATA                        (0xcbUL)
+       #define HWRM_FW_IPC_MAILBOX                                (0xccUL)
        #define HWRM_EXEC_FWD_RESP                                 (0xd0UL)
        #define HWRM_REJECT_FWD_RESP                               (0xd1UL)
        #define HWRM_FWD_RESP                                      (0xd2UL)
@@ -626,6 +640,8 @@ struct cmd_nums {
        #define HWRM_DBG_WRITE_DIRECT                              (0xff12UL)
        #define HWRM_DBG_WRITE_INDIRECT                    (0xff13UL)
        #define HWRM_DBG_DUMP                                      (0xff14UL)
+       #define HWRM_NVM_GET_VARIABLE                              (0xfff1UL)
+       #define HWRM_NVM_SET_VARIABLE                              (0xfff2UL)
        #define HWRM_NVM_INSTALL_UPDATE                    (0xfff3UL)
        #define HWRM_NVM_MODIFY                            (0xfff4UL)
        #define HWRM_NVM_VERIFY_UPDATE                             (0xfff5UL)
@@ -1399,6 +1415,7 @@ struct hwrm_func_drv_rgtr_input {
        #define FUNC_DRV_RGTR_REQ_OS_TYPE_ESXI                     0x68UL
        #define FUNC_DRV_RGTR_REQ_OS_TYPE_WIN864                   0x73UL
        #define FUNC_DRV_RGTR_REQ_OS_TYPE_WIN2012R2                0x74UL
+       #define FUNC_DRV_RGTR_REQ_OS_TYPE_UEFI                     0x8000UL
        u8 ver_maj;
        u8 ver_min;
        u8 ver_upd;
@@ -1549,7 +1566,7 @@ struct hwrm_port_phy_cfg_input {
        __le64 resp_addr;
        __le32 flags;
        #define PORT_PHY_CFG_REQ_FLAGS_RESET_PHY                    0x1UL
-       #define PORT_PHY_CFG_REQ_FLAGS_FORCE_LINK_DOWN              0x2UL
+       #define PORT_PHY_CFG_REQ_FLAGS_DEPRECATED                   0x2UL
        #define PORT_PHY_CFG_REQ_FLAGS_FORCE                        0x4UL
        #define PORT_PHY_CFG_REQ_FLAGS_RESTART_AUTONEG              0x8UL
        #define PORT_PHY_CFG_REQ_FLAGS_EEE_ENABLE                   0x10UL
@@ -1562,6 +1579,7 @@ struct hwrm_port_phy_cfg_input {
        #define PORT_PHY_CFG_REQ_FLAGS_FEC_CLAUSE74_DISABLE         0x800UL
        #define PORT_PHY_CFG_REQ_FLAGS_FEC_CLAUSE91_ENABLE          0x1000UL
        #define PORT_PHY_CFG_REQ_FLAGS_FEC_CLAUSE91_DISABLE         0x2000UL
+       #define PORT_PHY_CFG_REQ_FLAGS_FORCE_LINK_DWN               0x4000UL
        __le32 enables;
        #define PORT_PHY_CFG_REQ_ENABLES_AUTO_MODE                  0x1UL
        #define PORT_PHY_CFG_REQ_ENABLES_AUTO_DUPLEX                0x2UL
@@ -4023,6 +4041,71 @@ struct hwrm_fw_set_time_output {
        u8 valid;
 };
 
+/* hwrm_fw_set_structured_data */
+/* Input (32 bytes) */
+struct hwrm_fw_set_structured_data_input {
+       __le16 req_type;
+       __le16 cmpl_ring;
+       __le16 seq_id;
+       __le16 target_id;
+       __le64 resp_addr;
+       __le64 src_data_addr;
+       __le16 data_len;
+       u8 hdr_cnt;
+       u8 unused_0[5];
+};
+
+/* Output (16 bytes) */
+struct hwrm_fw_set_structured_data_output {
+       __le16 error_code;
+       __le16 req_type;
+       __le16 seq_id;
+       __le16 resp_len;
+       __le32 unused_0;
+       u8 unused_1;
+       u8 unused_2;
+       u8 unused_3;
+       u8 valid;
+};
+
+/* hwrm_fw_get_structured_data */
+/* Input (32 bytes) */
+struct hwrm_fw_get_structured_data_input {
+       __le16 req_type;
+       __le16 cmpl_ring;
+       __le16 seq_id;
+       __le16 target_id;
+       __le64 resp_addr;
+       __le64 dest_data_addr;
+       __le16 data_len;
+       __le16 structure_id;
+       __le16 subtype;
+       #define FW_GET_STRUCTURED_DATA_REQ_SUBTYPE_ALL             0xffffUL
+       #define FW_GET_STRUCTURED_DATA_REQ_SUBTYPE_NEAR_BRIDGE_ADMIN 0x100UL
+       #define FW_GET_STRUCTURED_DATA_REQ_SUBTYPE_NEAR_BRIDGE_PEER 0x101UL
+       #define FW_GET_STRUCTURED_DATA_REQ_SUBTYPE_NEAR_BRIDGE_OPERATIONAL 0x102UL
+       #define FW_GET_STRUCTURED_DATA_REQ_SUBTYPE_NON_TPMR_ADMIN 0x200UL
+       #define FW_GET_STRUCTURED_DATA_REQ_SUBTYPE_NON_TPMR_PEER  0x201UL
+       #define FW_GET_STRUCTURED_DATA_REQ_SUBTYPE_NON_TPMR_OPERATIONAL 0x202UL
+       u8 count;
+       u8 unused_0;
+};
+
+/* Output (16 bytes) */
+struct hwrm_fw_get_structured_data_output {
+       __le16 error_code;
+       __le16 req_type;
+       __le16 seq_id;
+       __le16 resp_len;
+       u8 hdr_cnt;
+       u8 unused_0;
+       __le16 unused_1;
+       u8 unused_2;
+       u8 unused_3;
+       u8 unused_4;
+       u8 valid;
+};
+
 /* hwrm_exec_fwd_resp */
 /* Input (128 bytes) */
 struct hwrm_exec_fwd_resp_input {
index ec6cd18842c3cf15d6fa215fee145b693a04466e..60e2af8678bdc2bcd71f5015d35ca8599268b8c5 100644 (file)
@@ -774,8 +774,8 @@ static int bnxt_vf_set_link(struct bnxt *bp, struct bnxt_vf_info *vf)
 
                if (vf->flags & BNXT_VF_LINK_UP) {
                        /* if physical link is down, force link up on VF */
-                       if (phy_qcfg_resp.link ==
-                           PORT_PHY_QCFG_RESP_LINK_NO_LINK) {
+                       if (phy_qcfg_resp.link !=
+                           PORT_PHY_QCFG_RESP_LINK_LINK) {
                                phy_qcfg_resp.link =
                                        PORT_PHY_QCFG_RESP_LINK_LINK;
                                phy_qcfg_resp.link_speed = cpu_to_le16(
index 4464bc5db9347654806831b5e0f1d6042a3f8c75..33638dc0e104be19ee4277c1e37dc1482bef61ca 100644 (file)
@@ -971,13 +971,6 @@ static int bcmgenet_set_eee(struct net_device *dev, struct ethtool_eee *e)
        return phy_ethtool_set_eee(priv->phydev, e);
 }
 
-static int bcmgenet_nway_reset(struct net_device *dev)
-{
-       struct bcmgenet_priv *priv = netdev_priv(dev);
-
-       return genphy_restart_aneg(priv->phydev);
-}
-
 /* standard ethtool support functions. */
 static const struct ethtool_ops bcmgenet_ethtool_ops = {
        .get_strings            = bcmgenet_get_strings,
@@ -991,7 +984,7 @@ static const struct ethtool_ops bcmgenet_ethtool_ops = {
        .set_wol                = bcmgenet_set_wol,
        .get_eee                = bcmgenet_get_eee,
        .set_eee                = bcmgenet_set_eee,
-       .nway_reset             = bcmgenet_nway_reset,
+       .nway_reset             = phy_ethtool_nway_reset,
        .get_coalesce           = bcmgenet_get_coalesce,
        .set_coalesce           = bcmgenet_set_coalesce,
        .get_link_ksettings     = bcmgenet_get_link_ksettings,
index cb312e4c89f46ef8e29fda76d107093b72ae9d3a..435a2e4739d16d721918b55cc54c855fd3f50a56 100644 (file)
@@ -2219,7 +2219,7 @@ static int sbmac_init(struct platform_device *pldev, long long base)
 
        dev->netdev_ops = &sbmac_netdev_ops;
        dev->watchdog_timeo = TX_TIMEOUT;
-       dev->max_mtu = 0;
+       dev->min_mtu = 0;
        dev->max_mtu = ENET_PACKET_SIZE;
 
        netif_napi_add(dev, &sc->napi, sbmac_poll, 16);
index b200a783443eb983a87a617cf7620b2227430107..112030828c4b7074a00c8475696461fb4a6c97cc 100644 (file)
@@ -177,6 +177,7 @@ bnad_txcmpl_process(struct bnad *bnad, struct bna_tcb *tcb)
                return 0;
 
        hw_cons = *(tcb->hw_consumer_index);
+       rmb();
        cons = tcb->consumer_index;
        q_depth = tcb->q_depth;
 
@@ -3094,7 +3095,7 @@ bnad_start_xmit(struct sk_buff *skb, struct net_device *netdev)
        BNA_QE_INDX_INC(prod, q_depth);
        tcb->producer_index = prod;
 
-       smp_mb();
+       wmb();
 
        if (unlikely(!test_bit(BNAD_TXQ_TX_STARTED, &tcb->flags)))
                return NETDEV_TX_OK;
@@ -3102,7 +3103,6 @@ bnad_start_xmit(struct sk_buff *skb, struct net_device *netdev)
        skb_tx_timestamp(skb);
 
        bna_txq_prod_indx_doorbell(tcb);
-       smp_mb();
 
        return NETDEV_TX_OK;
 }
index 31f61a744d6655b1b999ea34d4aab9bee7ecd3bc..286593922139e1f59e6c66c33df8a4d54ee33314 100644 (file)
@@ -240,40 +240,46 @@ static const char *bnad_net_stats_strings[] = {
 #define BNAD_ETHTOOL_STATS_NUM ARRAY_SIZE(bnad_net_stats_strings)
 
 static int
-bnad_get_settings(struct net_device *netdev, struct ethtool_cmd *cmd)
+bnad_get_link_ksettings(struct net_device *netdev,
+                       struct ethtool_link_ksettings *cmd)
 {
-       cmd->supported = SUPPORTED_10000baseT_Full;
-       cmd->advertising = ADVERTISED_10000baseT_Full;
-       cmd->autoneg = AUTONEG_DISABLE;
-       cmd->supported |= SUPPORTED_FIBRE;
-       cmd->advertising |= ADVERTISED_FIBRE;
-       cmd->port = PORT_FIBRE;
-       cmd->phy_address = 0;
+       u32 supported, advertising;
+
+       supported = SUPPORTED_10000baseT_Full;
+       advertising = ADVERTISED_10000baseT_Full;
+       cmd->base.autoneg = AUTONEG_DISABLE;
+       supported |= SUPPORTED_FIBRE;
+       advertising |= ADVERTISED_FIBRE;
+       cmd->base.port = PORT_FIBRE;
+       cmd->base.phy_address = 0;
 
        if (netif_carrier_ok(netdev)) {
-               ethtool_cmd_speed_set(cmd, SPEED_10000);
-               cmd->duplex = DUPLEX_FULL;
+               cmd->base.speed = SPEED_10000;
+               cmd->base.duplex = DUPLEX_FULL;
        } else {
-               ethtool_cmd_speed_set(cmd, SPEED_UNKNOWN);
-               cmd->duplex = DUPLEX_UNKNOWN;
+               cmd->base.speed = SPEED_UNKNOWN;
+               cmd->base.duplex = DUPLEX_UNKNOWN;
        }
-       cmd->transceiver = XCVR_EXTERNAL;
-       cmd->maxtxpkt = 0;
-       cmd->maxrxpkt = 0;
+
+       ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+                                               supported);
+       ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
+                                               advertising);
 
        return 0;
 }
 
 static int
-bnad_set_settings(struct net_device *netdev, struct ethtool_cmd *cmd)
+bnad_set_link_ksettings(struct net_device *netdev,
+                       const struct ethtool_link_ksettings *cmd)
 {
        /* 10G full duplex setting supported only */
-       if (cmd->autoneg == AUTONEG_ENABLE)
-               return -EOPNOTSUPP; else {
-               if ((ethtool_cmd_speed(cmd) == SPEED_10000)
-                   && (cmd->duplex == DUPLEX_FULL))
-                       return 0;
-       }
+       if (cmd->base.autoneg == AUTONEG_ENABLE)
+               return -EOPNOTSUPP;
+
+       if ((cmd->base.speed == SPEED_10000) &&
+           (cmd->base.duplex == DUPLEX_FULL))
+               return 0;
 
        return -EOPNOTSUPP;
 }
@@ -1118,8 +1124,6 @@ out:
 }
 
 static const struct ethtool_ops bnad_ethtool_ops = {
-       .get_settings = bnad_get_settings,
-       .set_settings = bnad_set_settings,
        .get_drvinfo = bnad_get_drvinfo,
        .get_wol = bnad_get_wol,
        .get_link = ethtool_op_get_link,
@@ -1137,6 +1141,8 @@ static const struct ethtool_ops bnad_ethtool_ops = {
        .set_eeprom = bnad_set_eeprom,
        .flash_device = bnad_flash_device,
        .get_ts_info = ethtool_op_get_ts_info,
+       .get_link_ksettings = bnad_get_link_ksettings,
+       .set_link_ksettings = bnad_set_link_ksettings,
 };
 
 void
index e1847ce6308deba39998fcdd81e9d3d272a23873..0e489bb82456adc5018694495cb52955c04d79fd 100644 (file)
@@ -32,7 +32,9 @@
 #include <linux/of_gpio.h>
 #include <linux/of_mdio.h>
 #include <linux/of_net.h>
-
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <linux/tcp.h>
 #include "macb.h"
 
 #define MACB_RX_BUFFER_SIZE    128
                                        | MACB_BIT(TXERR))
 #define MACB_TX_INT_FLAGS      (MACB_TX_ERR_FLAGS | MACB_BIT(TCOMP))
 
-#define MACB_MAX_TX_LEN                ((unsigned int)((1 << MACB_TX_FRMLEN_SIZE) - 1))
-#define GEM_MAX_TX_LEN         ((unsigned int)((1 << GEM_TX_FRMLEN_SIZE) - 1))
+/* Max length of transmit frame must be a multiple of 8 bytes */
+#define MACB_TX_LEN_ALIGN      8
+#define MACB_MAX_TX_LEN                ((unsigned int)((1 << MACB_TX_FRMLEN_SIZE) - 1) & ~((unsigned int)(MACB_TX_LEN_ALIGN - 1)))
+#define GEM_MAX_TX_LEN         ((unsigned int)((1 << GEM_TX_FRMLEN_SIZE) - 1) & ~((unsigned int)(MACB_TX_LEN_ALIGN - 1)))
 
 #define GEM_MTU_MIN_SIZE       ETH_MIN_MTU
+#define MACB_NETIF_LSO         (NETIF_F_TSO | NETIF_F_UFO)
 
 #define MACB_WOL_HAS_MAGIC_PACKET      (0x1 << 0)
 #define MACB_WOL_ENABLED               (0x1 << 1)
@@ -1223,7 +1228,8 @@ static void macb_poll_controller(struct net_device *dev)
 
 static unsigned int macb_tx_map(struct macb *bp,
                                struct macb_queue *queue,
-                               struct sk_buff *skb)
+                               struct sk_buff *skb,
+                               unsigned int hdrlen)
 {
        dma_addr_t mapping;
        unsigned int len, entry, i, tx_head = queue->tx_head;
@@ -1231,14 +1237,27 @@ static unsigned int macb_tx_map(struct macb *bp,
        struct macb_dma_desc *desc;
        unsigned int offset, size, count = 0;
        unsigned int f, nr_frags = skb_shinfo(skb)->nr_frags;
-       unsigned int eof = 1;
-       u32 ctrl;
+       unsigned int eof = 1, mss_mfs = 0;
+       u32 ctrl, lso_ctrl = 0, seq_ctrl = 0;
+
+       /* LSO */
+       if (skb_shinfo(skb)->gso_size != 0) {
+               if (ip_hdr(skb)->protocol == IPPROTO_UDP)
+                       /* UDP - UFO */
+                       lso_ctrl = MACB_LSO_UFO_ENABLE;
+               else
+                       /* TCP - TSO */
+                       lso_ctrl = MACB_LSO_TSO_ENABLE;
+       }
 
        /* First, map non-paged data */
        len = skb_headlen(skb);
+
+       /* first buffer length */
+       size = hdrlen;
+
        offset = 0;
        while (len) {
-               size = min(len, bp->max_tx_length);
                entry = macb_tx_ring_wrap(bp, tx_head);
                tx_skb = &queue->tx_skb[entry];
 
@@ -1258,6 +1277,8 @@ static unsigned int macb_tx_map(struct macb *bp,
                offset += size;
                count++;
                tx_head++;
+
+               size = min(len, bp->max_tx_length);
        }
 
        /* Then, map paged data from fragments */
@@ -1311,6 +1332,21 @@ static unsigned int macb_tx_map(struct macb *bp,
        desc = &queue->tx_ring[entry];
        desc->ctrl = ctrl;
 
+       if (lso_ctrl) {
+               if (lso_ctrl == MACB_LSO_UFO_ENABLE)
+                       /* include header and FCS in value given to h/w */
+                       mss_mfs = skb_shinfo(skb)->gso_size +
+                                       skb_transport_offset(skb) +
+                                       ETH_FCS_LEN;
+               else /* TSO */ {
+                       mss_mfs = skb_shinfo(skb)->gso_size;
+                       /* TCP Sequence Number Source Select
+                        * can be set only for TSO
+                        */
+                       seq_ctrl = 0;
+               }
+       }
+
        do {
                i--;
                entry = macb_tx_ring_wrap(bp, i);
@@ -1325,6 +1361,16 @@ static unsigned int macb_tx_map(struct macb *bp,
                if (unlikely(entry == (bp->tx_ring_size - 1)))
                        ctrl |= MACB_BIT(TX_WRAP);
 
+               /* First descriptor is header descriptor */
+               if (i == queue->tx_head) {
+                       ctrl |= MACB_BF(TX_LSO, lso_ctrl);
+                       ctrl |= MACB_BF(TX_TCP_SEQ_SRC, seq_ctrl);
+               } else
+                       /* Only set MSS/MFS on payload descriptors
+                        * (second or later descriptor)
+                        */
+                       ctrl |= MACB_BF(MSS_MFS, mss_mfs);
+
                /* Set TX buffer descriptor */
                macb_set_addr(desc, tx_skb->mapping);
                /* desc->addr must be visible to hardware before clearing
@@ -1350,6 +1396,43 @@ dma_error:
        return 0;
 }
 
+static netdev_features_t macb_features_check(struct sk_buff *skb,
+                                            struct net_device *dev,
+                                            netdev_features_t features)
+{
+       unsigned int nr_frags, f;
+       unsigned int hdrlen;
+
+       /* Validate LSO compatibility */
+
+       /* there is only one buffer */
+       if (!skb_is_nonlinear(skb))
+               return features;
+
+       /* length of header */
+       hdrlen = skb_transport_offset(skb);
+       if (ip_hdr(skb)->protocol == IPPROTO_TCP)
+               hdrlen += tcp_hdrlen(skb);
+
+       /* For LSO:
+        * When software supplies two or more payload buffers all payload buffers
+        * apart from the last must be a multiple of 8 bytes in size.
+        */
+       if (!IS_ALIGNED(skb_headlen(skb) - hdrlen, MACB_TX_LEN_ALIGN))
+               return features & ~MACB_NETIF_LSO;
+
+       nr_frags = skb_shinfo(skb)->nr_frags;
+       /* No need to check last fragment */
+       nr_frags--;
+       for (f = 0; f < nr_frags; f++) {
+               const skb_frag_t *frag = &skb_shinfo(skb)->frags[f];
+
+               if (!IS_ALIGNED(skb_frag_size(frag), MACB_TX_LEN_ALIGN))
+                       return features & ~MACB_NETIF_LSO;
+       }
+       return features;
+}
+
 static inline int macb_clear_csum(struct sk_buff *skb)
 {
        /* no change for packets without checksum offloading */
@@ -1374,7 +1457,28 @@ static int macb_start_xmit(struct sk_buff *skb, struct net_device *dev)
        struct macb *bp = netdev_priv(dev);
        struct macb_queue *queue = &bp->queues[queue_index];
        unsigned long flags;
-       unsigned int count, nr_frags, frag_size, f;
+       unsigned int desc_cnt, nr_frags, frag_size, f;
+       unsigned int hdrlen;
+       bool is_lso, is_udp = 0;
+
+       is_lso = (skb_shinfo(skb)->gso_size != 0);
+
+       if (is_lso) {
+               is_udp = !!(ip_hdr(skb)->protocol == IPPROTO_UDP);
+
+               /* length of headers */
+               if (is_udp)
+                       /* only queue eth + ip headers separately for UDP */
+                       hdrlen = skb_transport_offset(skb);
+               else
+                       hdrlen = skb_transport_offset(skb) + tcp_hdrlen(skb);
+               if (skb_headlen(skb) < hdrlen) {
+                       netdev_err(bp->dev, "Error - LSO headers fragmented!!!\n");
+                       /* if this is required, would need to copy to single buffer */
+                       return NETDEV_TX_BUSY;
+               }
+       } else
+               hdrlen = min(skb_headlen(skb), bp->max_tx_length);
 
 #if defined(DEBUG) && defined(VERBOSE_DEBUG)
        netdev_vdbg(bp->dev,
@@ -1389,18 +1493,22 @@ static int macb_start_xmit(struct sk_buff *skb, struct net_device *dev)
         * socket buffer: skb fragments of jumbo frames may need to be
         * split into many buffer descriptors.
         */
-       count = DIV_ROUND_UP(skb_headlen(skb), bp->max_tx_length);
+       if (is_lso && (skb_headlen(skb) > hdrlen))
+               /* extra header descriptor if also payload in first buffer */
+               desc_cnt = DIV_ROUND_UP((skb_headlen(skb) - hdrlen), bp->max_tx_length) + 1;
+       else
+               desc_cnt = DIV_ROUND_UP(skb_headlen(skb), bp->max_tx_length);
        nr_frags = skb_shinfo(skb)->nr_frags;
        for (f = 0; f < nr_frags; f++) {
                frag_size = skb_frag_size(&skb_shinfo(skb)->frags[f]);
-               count += DIV_ROUND_UP(frag_size, bp->max_tx_length);
+               desc_cnt += DIV_ROUND_UP(frag_size, bp->max_tx_length);
        }
 
        spin_lock_irqsave(&bp->lock, flags);
 
        /* This is a hard error, log it. */
        if (CIRC_SPACE(queue->tx_head, queue->tx_tail,
-                      bp->tx_ring_size) < count) {
+                      bp->tx_ring_size) < desc_cnt) {
                netif_stop_subqueue(dev, queue_index);
                spin_unlock_irqrestore(&bp->lock, flags);
                netdev_dbg(bp->dev, "tx_head = %u, tx_tail = %u\n",
@@ -1414,7 +1522,7 @@ static int macb_start_xmit(struct sk_buff *skb, struct net_device *dev)
        }
 
        /* Map socket buffer for DMA transfer */
-       if (!macb_tx_map(bp, queue, skb)) {
+       if (!macb_tx_map(bp, queue, skb, hdrlen)) {
                dev_kfree_skb_any(skb);
                goto unlock;
        }
@@ -2354,6 +2462,7 @@ static const struct net_device_ops macb_netdev_ops = {
        .ndo_poll_controller    = macb_poll_controller,
 #endif
        .ndo_set_features       = macb_set_features,
+       .ndo_features_check     = macb_features_check,
 };
 
 /* Configure peripheral capabilities according to device tree
@@ -2560,6 +2669,11 @@ static int macb_init(struct platform_device *pdev)
 
        /* Set features */
        dev->hw_features = NETIF_F_SG;
+
+       /* Check LSO capability */
+       if (GEM_BFEXT(PBUF_LSO, gem_readl(bp, DCFG6)))
+               dev->hw_features |= MACB_NETIF_LSO;
+
        /* Checksum offload is only available on gem with packet buffer */
        if (macb_is_gem(bp) && !(bp->caps & MACB_CAPS_FIFO_MODE))
                dev->hw_features |= NETIF_F_HW_CSUM | NETIF_F_RXCSUM;
@@ -2732,6 +2846,12 @@ static int at91ether_start_xmit(struct sk_buff *skb, struct net_device *dev)
                lp->skb_length = skb->len;
                lp->skb_physaddr = dma_map_single(NULL, skb->data, skb->len,
                                                        DMA_TO_DEVICE);
+               if (dma_mapping_error(NULL, lp->skb_physaddr)) {
+                       dev_kfree_skb_any(skb);
+                       dev->stats.tx_dropped++;
+                       netdev_err(dev, "%s: DMA mapping error\n", __func__);
+                       return NETDEV_TX_OK;
+               }
 
                /* Set address of the data in the Transmit Address register */
                macb_writel(lp, TAR, lp->skb_physaddr);
index 1216950c97d15233761c3392f4b4229d6c6068fa..d67adad67be1c097a339d993a866597b4d202f4d 100644 (file)
 #define GEM_TX_PKT_BUFF_OFFSET                 21
 #define GEM_TX_PKT_BUFF_SIZE                   1
 
+/* Bitfields in DCFG6. */
+#define GEM_PBUF_LSO_OFFSET                    27
+#define GEM_PBUF_LSO_SIZE                      1
+
 /* Constants for CLK */
 #define MACB_CLK_DIV8                          0
 #define MACB_CLK_DIV16                         1
 #define MACB_CAPS_SG_DISABLED                  0x40000000
 #define MACB_CAPS_MACB_IS_GEM                  0x80000000
 
+/* LSO settings */
+#define MACB_LSO_UFO_ENABLE                    0x01
+#define MACB_LSO_TSO_ENABLE                    0x02
+
 /* Bit manipulation macros */
 #define MACB_BIT(name)                                 \
        (1 << MACB_##name##_OFFSET)
@@ -545,6 +553,12 @@ struct macb_dma_desc {
 #define MACB_TX_LAST_SIZE                      1
 #define MACB_TX_NOCRC_OFFSET                   16
 #define MACB_TX_NOCRC_SIZE                     1
+#define MACB_MSS_MFS_OFFSET                    16
+#define MACB_MSS_MFS_SIZE                      14
+#define MACB_TX_LSO_OFFSET                     17
+#define MACB_TX_LSO_SIZE                       2
+#define MACB_TX_TCP_SEQ_SRC_OFFSET             19
+#define MACB_TX_TCP_SEQ_SRC_SIZE               1
 #define MACB_TX_BUF_EXHAUSTED_OFFSET           27
 #define MACB_TX_BUF_EXHAUSTED_SIZE             1
 #define MACB_TX_UNDERRUN_OFFSET                        28
index 92f411c9f0df0da3e827d2ae571f15d2a357410f..c0679c21638ad3cbc01816942bf4ed51d11b85b9 100644 (file)
@@ -74,4 +74,16 @@ config OCTEON_MGMT_ETHERNET
          port on Cavium Networks' Octeon CN57XX, CN56XX, CN55XX,
          CN54XX, CN52XX, and CN6XXX chips.
 
+config LIQUIDIO_VF
+       tristate "Cavium LiquidIO VF support"
+       depends on 64BIT && PCI_MSI
+       select PTP_1588_CLOCK
+       ---help---
+         This driver supports Cavium LiquidIO Intelligent Server Adapter
+         based on CN23XX chips.
+
+         To compile this driver as a module, choose M here: The module
+         will be called liquidio_vf. MSI-X interrupt support is required
+         for this driver to work correctly
+
 endif # NET_VENDOR_CAVIUM
index 5a27b2a440392e8487118046902cc5ca19bd0dc4..c4d411d1aa287a76ff8d8a446fa56a4c352d694a 100644 (file)
@@ -11,8 +11,32 @@ liquidio-$(CONFIG_LIQUIDIO) += lio_ethtool.o \
                        cn66xx_device.o    \
                        cn68xx_device.o    \
                        cn23xx_pf_device.o \
+                       cn23xx_vf_device.o \
+                       octeon_mailbox.o   \
                        octeon_mem_ops.o   \
                        octeon_droq.o      \
                        octeon_nic.o
 
 liquidio-objs := lio_main.o octeon_console.o $(liquidio-y)
+
+obj-$(CONFIG_LIQUIDIO_VF) += liquidio_vf.o
+
+ifeq ($(CONFIG_LIQUIDIO)$(CONFIG_LIQUIDIO_VF), yy)
+       liquidio_vf-objs := lio_vf_main.o
+else
+liquidio_vf-$(CONFIG_LIQUIDIO_VF) += lio_ethtool.o \
+                       lio_core.o         \
+                       request_manager.o  \
+                       response_manager.o \
+                       octeon_device.o    \
+                       cn66xx_device.o    \
+                       cn68xx_device.o    \
+                       cn23xx_pf_device.o \
+                       cn23xx_vf_device.o \
+                       octeon_mailbox.o   \
+                       octeon_mem_ops.o   \
+                       octeon_droq.o      \
+                       octeon_nic.o
+
+liquidio_vf-objs := lio_vf_main.o $(liquidio_vf-y)
+endif
index 380a64115a982bb15d1f6bbe5c4122711aa79c1a..962dcbcef8b5a05d93c42960532923daf22dfe93 100644 (file)
@@ -1,28 +1,23 @@
 /**********************************************************************
-* Author: Cavium, Inc.
-*
-* Contact: support@cavium.com
-*          Please include "LiquidIO" in the subject.
-*
-* Copyright (c) 2003-2015 Cavium, Inc.
-*
-* This file is free software; you can redistribute it and/or modify
-* it under the terms of the GNU General Public License, Version 2, as
-* published by the Free Software Foundation.
-*
-* This file is distributed in the hope that it will be useful, but
-* AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
-* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
-* NONINFRINGEMENT.  See the GNU General Public License for more
-* details.
-*
-* This file may also be available under a different license from Cavium.
-* Contact Cavium, Inc. for more information
-**********************************************************************/
-
+ * Author: Cavium, Inc.
+ *
+ * Contact: support@cavium.com
+ *          Please include "LiquidIO" in the subject.
+ *
+ * Copyright (c) 2003-2016 Cavium, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, Version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
+ * NONINFRINGEMENT.  See the GNU General Public License for more details.
+ ***********************************************************************/
 #include <linux/pci.h>
-#include <linux/netdevice.h>
 #include <linux/vmalloc.h>
+#include <linux/etherdevice.h>
 #include "liquidio_common.h"
 #include "octeon_droq.h"
 #include "octeon_iq.h"
@@ -30,6 +25,7 @@
 #include "octeon_device.h"
 #include "cn23xx_pf_device.h"
 #include "octeon_main.h"
+#include "octeon_mailbox.h"
 
 #define RESET_NOTDONE 0
 #define RESET_DONE 1
  */
 #define CN23XX_INPUT_JABBER 64600
 
-#define LIOLUT_RING_DISTRIBUTION 9
-const int liolut_num_vfs_to_rings_per_vf[LIOLUT_RING_DISTRIBUTION] = {
-       0, 8, 4, 2, 2, 2, 1, 1, 1
-};
-
 void cn23xx_dump_pf_initialized_regs(struct octeon_device *oct)
 {
        int i = 0;
@@ -309,9 +300,10 @@ u32 cn23xx_pf_get_oq_ticks(struct octeon_device *oct, u32 time_intr_in_us)
 
 static void cn23xx_setup_global_mac_regs(struct octeon_device *oct)
 {
-       u64 reg_val;
        u16 mac_no = oct->pcie_port;
        u16 pf_num = oct->pf_num;
+       u64 reg_val;
+       u64 temp;
 
        /* programming SRN and TRS for each MAC(0..3)  */
 
@@ -333,6 +325,14 @@ static void cn23xx_setup_global_mac_regs(struct octeon_device *oct)
        /* setting TRS <23:16> */
        reg_val = reg_val |
                  (oct->sriov_info.trs << CN23XX_PKT_MAC_CTL_RINFO_TRS_BIT_POS);
+       /* setting RPVF <39:32> */
+       temp = oct->sriov_info.rings_per_vf & 0xff;
+       reg_val |= (temp << CN23XX_PKT_MAC_CTL_RINFO_RPVF_BIT_POS);
+
+       /* setting NVFS <55:48> */
+       temp = oct->sriov_info.max_vfs & 0xff;
+       reg_val |= (temp << CN23XX_PKT_MAC_CTL_RINFO_NVFS_BIT_POS);
+
        /* write these settings to MAC register */
        octeon_write_csr64(oct, CN23XX_SLI_PKT_MAC_RINFO64(mac_no, pf_num),
                           reg_val);
@@ -399,11 +399,12 @@ static int cn23xx_reset_io_queues(struct octeon_device *oct)
 
 static int cn23xx_pf_setup_global_input_regs(struct octeon_device *oct)
 {
+       struct octeon_cn23xx_pf *cn23xx = (struct octeon_cn23xx_pf *)oct->chip;
+       struct octeon_instr_queue *iq;
+       u64 intr_threshold, reg_val;
        u32 q_no, ern, srn;
        u64 pf_num;
-       u64 intr_threshold, reg_val;
-       struct octeon_instr_queue *iq;
-       struct octeon_cn23xx_pf *cn23xx = (struct octeon_cn23xx_pf *)oct->chip;
+       u64 vf_num;
 
        pf_num = oct->pf_num;
 
@@ -414,12 +415,22 @@ static int cn23xx_pf_setup_global_input_regs(struct octeon_device *oct)
                return -1;
 
        /** Set the MAC_NUM and PVF_NUM in IQ_PKT_CONTROL reg
-       * for all queues.Only PF can set these bits.
-       * bits 29:30 indicate the MAC num.
-       * bits 32:47 indicate the PVF num.
-       */
+        * for all queues.Only PF can set these bits.
+        * bits 29:30 indicate the MAC num.
+        * bits 32:47 indicate the PVF num.
+        */
        for (q_no = 0; q_no < ern; q_no++) {
                reg_val = oct->pcie_port << CN23XX_PKT_INPUT_CTL_MAC_NUM_POS;
+
+               /* for VF assigned queues. */
+               if (q_no < oct->sriov_info.pf_srn) {
+                       vf_num = q_no / oct->sriov_info.rings_per_vf;
+                       vf_num += 1; /* VF1, VF2,........ */
+               } else {
+                       vf_num = 0;
+               }
+
+               reg_val |= vf_num << CN23XX_PKT_INPUT_CTL_VF_NUM_POS;
                reg_val |= pf_num << CN23XX_PKT_INPUT_CTL_PF_NUM_POS;
 
                octeon_write_csr64(oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no),
@@ -530,8 +541,8 @@ static void cn23xx_pf_setup_global_output_regs(struct octeon_device *oct)
        writeq(0x40, (u8 *)oct->mmio[0].hw_addr + CN23XX_SLI_OQ_WMARK);
 
        /** Disabling setting OQs in reset when ring has no dorebells
-         * enabling this will cause of head of line blocking
-         */
+        * enabling this will cause of head of line blocking
+        */
        /* Do it only for pass1.1. and pass1.2 */
        if ((oct->rev_id == OCTEON_CN23XX_REV_1_0) ||
            (oct->rev_id == OCTEON_CN23XX_REV_1_1))
@@ -662,6 +673,118 @@ static void cn23xx_setup_oq_regs(struct octeon_device *oct, u32 oq_no)
        }
 }
 
+static void cn23xx_pf_mbox_thread(struct work_struct *work)
+{
+       struct cavium_wk *wk = (struct cavium_wk *)work;
+       struct octeon_mbox *mbox = (struct octeon_mbox *)wk->ctxptr;
+       struct octeon_device *oct = mbox->oct_dev;
+       u64 mbox_int_val, val64;
+       u32 q_no, i;
+
+       if (oct->rev_id < OCTEON_CN23XX_REV_1_1) {
+               /*read and clear by writing 1*/
+               mbox_int_val = readq(mbox->mbox_int_reg);
+               writeq(mbox_int_val, mbox->mbox_int_reg);
+
+               for (i = 0; i < oct->sriov_info.num_vfs_alloced; i++) {
+                       q_no = i * oct->sriov_info.rings_per_vf;
+
+                       val64 = readq(oct->mbox[q_no]->mbox_write_reg);
+
+                       if (val64 && (val64 != OCTEON_PFVFACK)) {
+                               if (octeon_mbox_read(oct->mbox[q_no]))
+                                       octeon_mbox_process_message(
+                                           oct->mbox[q_no]);
+                       }
+               }
+
+               schedule_delayed_work(&wk->work, msecs_to_jiffies(10));
+       } else {
+               octeon_mbox_process_message(mbox);
+       }
+}
+
+static int cn23xx_setup_pf_mbox(struct octeon_device *oct)
+{
+       struct octeon_mbox *mbox = NULL;
+       u16 mac_no = oct->pcie_port;
+       u16 pf_num = oct->pf_num;
+       u32 q_no, i;
+
+       if (!oct->sriov_info.max_vfs)
+               return 0;
+
+       for (i = 0; i < oct->sriov_info.max_vfs; i++) {
+               q_no = i * oct->sriov_info.rings_per_vf;
+
+               mbox = vmalloc(sizeof(*mbox));
+               if (!mbox)
+                       goto free_mbox;
+
+               memset(mbox, 0, sizeof(struct octeon_mbox));
+
+               spin_lock_init(&mbox->lock);
+
+               mbox->oct_dev = oct;
+
+               mbox->q_no = q_no;
+
+               mbox->state = OCTEON_MBOX_STATE_IDLE;
+
+               /* PF mbox interrupt reg */
+               mbox->mbox_int_reg = (u8 *)oct->mmio[0].hw_addr +
+                                    CN23XX_SLI_MAC_PF_MBOX_INT(mac_no, pf_num);
+
+               /* PF writes into SIG0 reg */
+               mbox->mbox_write_reg = (u8 *)oct->mmio[0].hw_addr +
+                                      CN23XX_SLI_PKT_PF_VF_MBOX_SIG(q_no, 0);
+
+               /* PF reads from SIG1 reg */
+               mbox->mbox_read_reg = (u8 *)oct->mmio[0].hw_addr +
+                                     CN23XX_SLI_PKT_PF_VF_MBOX_SIG(q_no, 1);
+
+               /*Mail Box Thread creation*/
+               INIT_DELAYED_WORK(&mbox->mbox_poll_wk.work,
+                                 cn23xx_pf_mbox_thread);
+               mbox->mbox_poll_wk.ctxptr = (void *)mbox;
+
+               oct->mbox[q_no] = mbox;
+
+               writeq(OCTEON_PFVFSIG, mbox->mbox_read_reg);
+       }
+
+       if (oct->rev_id < OCTEON_CN23XX_REV_1_1)
+               schedule_delayed_work(&oct->mbox[0]->mbox_poll_wk.work,
+                                     msecs_to_jiffies(0));
+
+       return 0;
+
+free_mbox:
+       while (i) {
+               i--;
+               vfree(oct->mbox[i]);
+       }
+
+       return 1;
+}
+
+static int cn23xx_free_pf_mbox(struct octeon_device *oct)
+{
+       u32 q_no, i;
+
+       if (!oct->sriov_info.max_vfs)
+               return 0;
+
+       for (i = 0; i < oct->sriov_info.max_vfs; i++) {
+               q_no = i * oct->sriov_info.rings_per_vf;
+               cancel_delayed_work_sync(
+                   &oct->mbox[q_no]->mbox_poll_wk.work);
+               vfree(oct->mbox[q_no]);
+       }
+
+       return 0;
+}
+
 static int cn23xx_enable_io_queues(struct octeon_device *oct)
 {
        u64 reg_val;
@@ -856,6 +979,29 @@ static u64 cn23xx_pf_msix_interrupt_handler(void *dev)
        return ret;
 }
 
+static void cn23xx_handle_pf_mbox_intr(struct octeon_device *oct)
+{
+       struct delayed_work *work;
+       u64 mbox_int_val;
+       u32 i, q_no;
+
+       mbox_int_val = readq(oct->mbox[0]->mbox_int_reg);
+
+       for (i = 0; i < oct->sriov_info.num_vfs_alloced; i++) {
+               q_no = i * oct->sriov_info.rings_per_vf;
+
+               if (mbox_int_val & BIT_ULL(q_no)) {
+                       writeq(BIT_ULL(q_no),
+                              oct->mbox[0]->mbox_int_reg);
+                       if (octeon_mbox_read(oct->mbox[q_no])) {
+                               work = &oct->mbox[q_no]->mbox_poll_wk.work;
+                               schedule_delayed_work(work,
+                                                     msecs_to_jiffies(0));
+                       }
+               }
+       }
+}
+
 static irqreturn_t cn23xx_interrupt_handler(void *dev)
 {
        struct octeon_device *oct = (struct octeon_device *)dev;
@@ -871,6 +1017,10 @@ static irqreturn_t cn23xx_interrupt_handler(void *dev)
                dev_err(&oct->pci_dev->dev, "OCTEON[%d]: Error Intr: 0x%016llx\n",
                        oct->octeon_id, CVM_CAST64(intr64));
 
+       /* When VFs write into MBOX_SIG2 reg,these intr is set in PF */
+       if (intr64 & CN23XX_INTR_VF_MBOX)
+               cn23xx_handle_pf_mbox_intr(oct);
+
        if (oct->msix_on != LIO_FLAG_MSIX_ENABLED) {
                if (intr64 & CN23XX_INTR_PKT_DATA)
                        oct->int_status |= OCT_DEV_INTR_PKT_DATA;
@@ -961,6 +1111,13 @@ static void cn23xx_enable_pf_interrupt(struct octeon_device *oct, u8 intr_flag)
                intr_val = readq(cn23xx->intr_enb_reg64);
                intr_val |= CN23XX_INTR_PKT_DATA;
                writeq(intr_val, cn23xx->intr_enb_reg64);
+       } else if ((intr_flag & OCTEON_MBOX_INTR) &&
+                  (oct->sriov_info.max_vfs > 0)) {
+               if (oct->rev_id >= OCTEON_CN23XX_REV_1_1) {
+                       intr_val = readq(cn23xx->intr_enb_reg64);
+                       intr_val |= CN23XX_INTR_VF_MBOX;
+                       writeq(intr_val, cn23xx->intr_enb_reg64);
+               }
        }
 }
 
@@ -976,6 +1133,13 @@ static void cn23xx_disable_pf_interrupt(struct octeon_device *oct, u8 intr_flag)
                intr_val = readq(cn23xx->intr_enb_reg64);
                intr_val &= ~CN23XX_INTR_PKT_DATA;
                writeq(intr_val, cn23xx->intr_enb_reg64);
+       } else if ((intr_flag & OCTEON_MBOX_INTR) &&
+                  (oct->sriov_info.max_vfs > 0)) {
+               if (oct->rev_id >= OCTEON_CN23XX_REV_1_1) {
+                       intr_val = readq(cn23xx->intr_enb_reg64);
+                       intr_val &= ~CN23XX_INTR_VF_MBOX;
+                       writeq(intr_val, cn23xx->intr_enb_reg64);
+               }
        }
 }
 
@@ -1048,50 +1212,59 @@ static void cn23xx_setup_reg_address(struct octeon_device *oct)
 
 static int cn23xx_sriov_config(struct octeon_device *oct)
 {
-       u32 total_rings;
        struct octeon_cn23xx_pf *cn23xx = (struct octeon_cn23xx_pf *)oct->chip;
-       /* num_vfs is already filled for us */
+       u32 max_rings, total_rings, max_vfs, rings_per_vf;
        u32 pf_srn, num_pf_rings;
+       u32 max_possible_vfs;
 
        cn23xx->conf =
-           (struct octeon_config *)oct_get_config_info(oct, LIO_23XX);
+               (struct octeon_config *)oct_get_config_info(oct, LIO_23XX);
        switch (oct->rev_id) {
        case OCTEON_CN23XX_REV_1_0:
-               total_rings = CN23XX_MAX_RINGS_PER_PF_PASS_1_0;
+               max_rings = CN23XX_MAX_RINGS_PER_PF_PASS_1_0;
+               max_possible_vfs = CN23XX_MAX_VFS_PER_PF_PASS_1_0;
                break;
        case OCTEON_CN23XX_REV_1_1:
-               total_rings = CN23XX_MAX_RINGS_PER_PF_PASS_1_1;
+               max_rings = CN23XX_MAX_RINGS_PER_PF_PASS_1_1;
+               max_possible_vfs = CN23XX_MAX_VFS_PER_PF_PASS_1_1;
                break;
        default:
-               total_rings = CN23XX_MAX_RINGS_PER_PF;
+               max_rings = CN23XX_MAX_RINGS_PER_PF;
+               max_possible_vfs = CN23XX_MAX_VFS_PER_PF;
                break;
        }
-       if (!oct->sriov_info.num_pf_rings) {
-               if (total_rings > num_present_cpus())
-                       num_pf_rings = num_present_cpus();
-               else
-                       num_pf_rings = total_rings;
-       } else {
-               num_pf_rings = oct->sriov_info.num_pf_rings;
 
-               if (num_pf_rings > total_rings) {
-                       dev_warn(&oct->pci_dev->dev,
-                                "num_queues_per_pf requested %u is more than available rings. Reducing to %u\n",
-                                num_pf_rings, total_rings);
-                       num_pf_rings = total_rings;
-               }
-       }
+       if (max_rings <= num_present_cpus())
+               num_pf_rings = 1;
+       else
+               num_pf_rings = num_present_cpus();
+
+#ifdef CONFIG_PCI_IOV
+       max_vfs = min_t(u32,
+                       (max_rings - num_pf_rings), max_possible_vfs);
+       rings_per_vf = 1;
+#else
+       max_vfs = 0;
+       rings_per_vf = 0;
+#endif
+
+       total_rings = num_pf_rings + max_vfs;
 
-       total_rings = num_pf_rings;
        /* the first ring of the pf */
        pf_srn = total_rings - num_pf_rings;
 
        oct->sriov_info.trs = total_rings;
+       oct->sriov_info.max_vfs = max_vfs;
+       oct->sriov_info.rings_per_vf = rings_per_vf;
        oct->sriov_info.pf_srn = pf_srn;
        oct->sriov_info.num_pf_rings = num_pf_rings;
-       dev_dbg(&oct->pci_dev->dev, "trs:%d pf_srn:%d num_pf_rings:%d\n",
-               oct->sriov_info.trs, oct->sriov_info.pf_srn,
-               oct->sriov_info.num_pf_rings);
+       dev_notice(&oct->pci_dev->dev, "trs:%d max_vfs:%d rings_per_vf:%d pf_srn:%d num_pf_rings:%d\n",
+                  oct->sriov_info.trs, oct->sriov_info.max_vfs,
+                  oct->sriov_info.rings_per_vf, oct->sriov_info.pf_srn,
+                  oct->sriov_info.num_pf_rings);
+
+       oct->sriov_info.sriov_enabled = 0;
+
        return 0;
 }
 
@@ -1119,6 +1292,9 @@ int setup_cn23xx_octeon_pf_device(struct octeon_device *oct)
 
        oct->fn_list.setup_iq_regs = cn23xx_setup_iq_regs;
        oct->fn_list.setup_oq_regs = cn23xx_setup_oq_regs;
+       oct->fn_list.setup_mbox = cn23xx_setup_pf_mbox;
+       oct->fn_list.free_mbox = cn23xx_free_pf_mbox;
+
        oct->fn_list.process_interrupt_regs = cn23xx_interrupt_handler;
        oct->fn_list.msix_interrupt_handler = cn23xx_pf_msix_interrupt_handler;
 
@@ -1209,8 +1385,7 @@ void cn23xx_dump_iq_regs(struct octeon_device *oct)
                dev_dbg(&oct->pci_dev->dev, "SLI_PKT[%d]_INPUT_CTL [0x%x]: 0x%016llx\n",
                        q_no, CN23XX_SLI_IQ_PKT_CONTROL64(q_no),
                        CVM_CAST64(octeon_read_csr64
-                               (oct,
-                                       CN23XX_SLI_IQ_PKT_CONTROL64(q_no))));
+                               (oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no))));
        }
 
        pci_read_config_dword(oct->pci_dev, CN23XX_CONFIG_PCIE_DEVCTL, &regval);
@@ -1235,3 +1410,24 @@ int cn23xx_fw_loaded(struct octeon_device *oct)
        val = octeon_read_csr64(oct, CN23XX_SLI_SCRATCH1);
        return (val >> 1) & 1ULL;
 }
+
+void cn23xx_tell_vf_its_macaddr_changed(struct octeon_device *oct, int vfidx,
+                                       u8 *mac)
+{
+       if (oct->sriov_info.vf_drv_loaded_mask & BIT_ULL(vfidx)) {
+               struct octeon_mbox_cmd mbox_cmd;
+
+               mbox_cmd.msg.u64 = 0;
+               mbox_cmd.msg.s.type = OCTEON_MBOX_REQUEST;
+               mbox_cmd.msg.s.resp_needed = 0;
+               mbox_cmd.msg.s.cmd = OCTEON_PF_CHANGED_VF_MACADDR;
+               mbox_cmd.msg.s.len = 1;
+               mbox_cmd.recv_len = 0;
+               mbox_cmd.recv_status = 0;
+               mbox_cmd.fn = NULL;
+               mbox_cmd.fn_arg = 0;
+               ether_addr_copy(mbox_cmd.msg.s.params, mac);
+               mbox_cmd.q_no = vfidx * oct->sriov_info.rings_per_vf;
+               octeon_mbox_write(oct, &mbox_cmd);
+       }
+}
index 21b5c9051967b747d4fed6549e93385d1da76d07..2fedd91f3df88fb5ea88f288e5121a8fc8fb3cf0 100644 (file)
@@ -1,34 +1,31 @@
 /**********************************************************************
-* Author: Cavium, Inc.
-*
-* Contact: support@cavium.com
-*          Please include "LiquidIO" in the subject.
-*
-* Copyright (c) 2003-2015 Cavium, Inc.
-*
-* This file is free software; you can redistribute it and/or modify
-* it under the terms of the GNU General Public License, Version 2, as
-* published by the Free Software Foundation.
-*
-* This file is distributed in the hope that it will be useful, but
-* AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
-* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
-* NONINFRINGEMENT.  See the GNU General Public License for more
-* details.
-*
-* This file may also be available under a different license from Cavium.
-* Contact Cavium, Inc. for more information
-**********************************************************************/
-
+ * Author: Cavium, Inc.
+ *
+ * Contact: support@cavium.com
+ *          Please include "LiquidIO" in the subject.
+ *
+ * Copyright (c) 2003-2016 Cavium, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, Version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
+ * NONINFRINGEMENT.  See the GNU General Public License for more details.
+ ***********************************************************************/
 /*! \file  cn23xx_device.h
  * \brief Host Driver: Routines that perform CN23XX specific operations.
-*/
+ */
 
 #ifndef __CN23XX_PF_DEVICE_H__
 #define __CN23XX_PF_DEVICE_H__
 
 #include "cn23xx_pf_regs.h"
 
+#define LIO_CMD_WAIT_TM 100
+
 /* Register address and configuration for a CN23XX devices.
  * If device specific changes need to be made then add a struct to include
  * device specific fields as shown in the commented section
@@ -56,4 +53,7 @@ u32 cn23xx_pf_get_oq_ticks(struct octeon_device *oct, u32 time_intr_in_us);
 void cn23xx_dump_pf_initialized_regs(struct octeon_device *oct);
 
 int cn23xx_fw_loaded(struct octeon_device *oct);
+
+void cn23xx_tell_vf_its_macaddr_changed(struct octeon_device *oct, int vfidx,
+                                       u8 *mac);
 #endif
index 03d79d95ab75f6d342e2bdb69969e26002e47417..e6d4ad99cc3874bff3ba5b70a51876936393d2d4 100644 (file)
@@ -1,29 +1,24 @@
 /**********************************************************************
-* Author: Cavium, Inc.
-*
-* Contact: support@cavium.com
-*          Please include "LiquidIO" in the subject.
-*
-* Copyright (c) 2003-2015 Cavium, Inc.
-*
-* This file is free software; you can redistribute it and/or modify
-* it under the terms of the GNU General Public License, Version 2, as
-* published by the Free Software Foundation.
-*
-* This file is distributed in the hope that it will be useful, but
-* AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
-* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
-* NONINFRINGEMENT.  See the GNU General Public License for more
-* details.
-*
-* This file may also be available under a different license from Cavium.
-* Contact Cavium, Inc. for more information
-**********************************************************************/
-
+ * Author: Cavium, Inc.
+ *
+ * Contact: support@cavium.com
+ *          Please include "LiquidIO" in the subject.
+ *
+ * Copyright (c) 2003-2016 Cavium, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, Version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
+ * NONINFRINGEMENT.  See the GNU General Public License for more details.
+ ***********************************************************************/
 /*! \file cn23xx_regs.h
  * \brief Host Driver: Register Address and Register Mask values for
  * Octeon CN23XX devices.
-*/
+ */
 
 #ifndef __CN23XX_PF_REGS_H__
 #define __CN23XX_PF_REGS_H__
@@ -63,7 +58,7 @@
 
 #define     CN23XX_CONFIG_SRIOV_BAR_START         0x19C
 #define     CN23XX_CONFIG_SRIOV_BARX(i)                \
-               (CN23XX_CONFIG_SRIOV_BAR_START + (i * 4))
+               (CN23XX_CONFIG_SRIOV_BAR_START + ((i) * 4))
 #define     CN23XX_CONFIG_SRIOV_BAR_PF            0x08
 #define     CN23XX_CONFIG_SRIOV_BAR_64BIT         0x04
 #define     CN23XX_CONFIG_SRIOV_BAR_IO            0x01
 /* 4 Registers (64 - bit) */
 #define    CN23XX_SLI_S2M_PORT_CTL_START         0x23D80
 #define    CN23XX_SLI_S2M_PORTX_CTL(port)      \
-               (CN23XX_SLI_S2M_PORT_CTL_START + (port * 0x10))
+               (CN23XX_SLI_S2M_PORT_CTL_START + ((port) * 0x10))
 
 #define    CN23XX_SLI_MAC_NUMBER                 0x20050
 
  * Provides DMA Engine Queue Enable
  */
 #define    CN23XX_DPI_DMA_ENG0_ENB        0x0001df0000000080ULL
-#define    CN23XX_DPI_DMA_ENG_ENB(eng) (CN23XX_DPI_DMA_ENG0_ENB + (eng * 8))
+#define    CN23XX_DPI_DMA_ENG_ENB(eng) (CN23XX_DPI_DMA_ENG0_ENB + ((eng) * 8))
 
 /* 8 register (64-bit) - DPI_DMA(0..7)_REQQ_CTL
  * Provides control bits for transaction on 8 Queues
  */
 #define    CN23XX_DPI_DMA_REQQ0_CTL       0x0001df0000000180ULL
 #define    CN23XX_DPI_DMA_REQQ_CTL(q_no)       \
-               (CN23XX_DPI_DMA_REQQ0_CTL + (q_no * 8))
+               (CN23XX_DPI_DMA_REQQ0_CTL + ((q_no) * 8))
 
 /* 6 register (64-bit) - DPI_ENG(0..5)_BUF
  * Provides DMA Engine FIFO (Queue) Size
  */
 #define    CN23XX_DPI_DMA_ENG0_BUF        0x0001df0000000880ULL
 #define    CN23XX_DPI_DMA_ENG_BUF(eng)   \
-               (CN23XX_DPI_DMA_ENG0_BUF + (eng * 8))
+               (CN23XX_DPI_DMA_ENG0_BUF + ((eng) * 8))
 
 /* 4 Registers (64-bit) */
 #define    CN23XX_DPI_SLI_PRT_CFG_START   0x0001df0000000900ULL
 #define    CN23XX_DPI_SLI_PRTX_CFG(port)        \
-               (CN23XX_DPI_SLI_PRT_CFG_START + (port * 0x8))
+               (CN23XX_DPI_SLI_PRT_CFG_START + ((port) * 0x8))
 
 /* Masks for DPI_DMA_CONTROL Register */
 #define    CN23XX_DPI_DMA_COMMIT_MODE     BIT_ULL(58)
diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_vf_device.c b/drivers/net/ethernet/cavium/liquidio/cn23xx_vf_device.c
new file mode 100644 (file)
index 0000000..108e487
--- /dev/null
@@ -0,0 +1,701 @@
+/**********************************************************************
+ * Author: Cavium, Inc.
+ *
+ * Contact: support@cavium.com
+ *          Please include "LiquidIO" in the subject.
+ *
+ * Copyright (c) 2003-2016 Cavium, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, Version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
+ * NONINFRINGEMENT.  See the GNU General Public License for more details.
+ ***********************************************************************/
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/vmalloc.h>
+#include "liquidio_common.h"
+#include "octeon_droq.h"
+#include "octeon_iq.h"
+#include "response_manager.h"
+#include "octeon_device.h"
+#include "cn23xx_vf_device.h"
+#include "octeon_main.h"
+#include "octeon_mailbox.h"
+
+u32 cn23xx_vf_get_oq_ticks(struct octeon_device *oct, u32 time_intr_in_us)
+{
+       /* This gives the SLI clock per microsec */
+       u32 oqticks_per_us = (u32)oct->pfvf_hsword.coproc_tics_per_us;
+
+       /* This gives the clock cycles per millisecond */
+       oqticks_per_us *= 1000;
+
+       /* This gives the oq ticks (1024 core clock cycles) per millisecond */
+       oqticks_per_us /= 1024;
+
+       /* time_intr is in microseconds. The next 2 steps gives the oq ticks
+        * corressponding to time_intr.
+        */
+       oqticks_per_us *= time_intr_in_us;
+       oqticks_per_us /= 1000;
+
+       return oqticks_per_us;
+}
+
+static int cn23xx_vf_reset_io_queues(struct octeon_device *oct, u32 num_queues)
+{
+       u32 loop = BUSY_READING_REG_VF_LOOP_COUNT;
+       int ret_val = 0;
+       u32 q_no;
+       u64 d64;
+
+       for (q_no = 0; q_no < num_queues; q_no++) {
+               /* set RST bit to 1. This bit applies to both IQ and OQ */
+               d64 = octeon_read_csr64(oct,
+                                       CN23XX_VF_SLI_IQ_PKT_CONTROL64(q_no));
+               d64 |= CN23XX_PKT_INPUT_CTL_RST;
+               octeon_write_csr64(oct, CN23XX_VF_SLI_IQ_PKT_CONTROL64(q_no),
+                                  d64);
+       }
+
+       /* wait until the RST bit is clear or the RST and QUIET bits are set */
+       for (q_no = 0; q_no < num_queues; q_no++) {
+               u64 reg_val = octeon_read_csr64(oct,
+                                       CN23XX_VF_SLI_IQ_PKT_CONTROL64(q_no));
+               while ((READ_ONCE(reg_val) & CN23XX_PKT_INPUT_CTL_RST) &&
+                      !(READ_ONCE(reg_val) & CN23XX_PKT_INPUT_CTL_QUIET) &&
+                      loop) {
+                       WRITE_ONCE(reg_val, octeon_read_csr64(
+                           oct, CN23XX_VF_SLI_IQ_PKT_CONTROL64(q_no)));
+                       loop--;
+               }
+               if (!loop) {
+                       dev_err(&oct->pci_dev->dev,
+                               "clearing the reset reg failed or setting the quiet reg failed for qno: %u\n",
+                               q_no);
+                       return -1;
+               }
+               WRITE_ONCE(reg_val, READ_ONCE(reg_val) &
+                          ~CN23XX_PKT_INPUT_CTL_RST);
+               octeon_write_csr64(oct, CN23XX_VF_SLI_IQ_PKT_CONTROL64(q_no),
+                                  READ_ONCE(reg_val));
+
+               WRITE_ONCE(reg_val, octeon_read_csr64(
+                   oct, CN23XX_VF_SLI_IQ_PKT_CONTROL64(q_no)));
+               if (READ_ONCE(reg_val) & CN23XX_PKT_INPUT_CTL_RST) {
+                       dev_err(&oct->pci_dev->dev,
+                               "clearing the reset failed for qno: %u\n",
+                               q_no);
+                       ret_val = -1;
+               }
+       }
+
+       return ret_val;
+}
+
+static int cn23xx_vf_setup_global_input_regs(struct octeon_device *oct)
+{
+       struct octeon_cn23xx_vf *cn23xx = (struct octeon_cn23xx_vf *)oct->chip;
+       struct octeon_instr_queue *iq;
+       u64 q_no, intr_threshold;
+       u64 d64;
+
+       if (cn23xx_vf_reset_io_queues(oct, oct->sriov_info.rings_per_vf))
+               return -1;
+
+       for (q_no = 0; q_no < (oct->sriov_info.rings_per_vf); q_no++) {
+               void __iomem *inst_cnt_reg;
+
+               octeon_write_csr64(oct, CN23XX_VF_SLI_IQ_DOORBELL(q_no),
+                                  0xFFFFFFFF);
+               iq = oct->instr_queue[q_no];
+
+               if (iq)
+                       inst_cnt_reg = iq->inst_cnt_reg;
+               else
+                       inst_cnt_reg = (u8 *)oct->mmio[0].hw_addr +
+                                      CN23XX_VF_SLI_IQ_INSTR_COUNT64(q_no);
+
+               d64 = octeon_read_csr64(oct,
+                                       CN23XX_VF_SLI_IQ_INSTR_COUNT64(q_no));
+
+               d64 &= 0xEFFFFFFFFFFFFFFFL;
+
+               octeon_write_csr64(oct, CN23XX_VF_SLI_IQ_INSTR_COUNT64(q_no),
+                                  d64);
+
+               /* Select ES, RO, NS, RDSIZE,DPTR Fomat#0 for
+                * the Input Queues
+                */
+               octeon_write_csr64(oct, CN23XX_VF_SLI_IQ_PKT_CONTROL64(q_no),
+                                  CN23XX_PKT_INPUT_CTL_MASK);
+
+               /* set the wmark level to trigger PI_INT */
+               intr_threshold = CFG_GET_IQ_INTR_PKT(cn23xx->conf) &
+                                CN23XX_PKT_IN_DONE_WMARK_MASK;
+
+               writeq((readq(inst_cnt_reg) &
+                       ~(CN23XX_PKT_IN_DONE_WMARK_MASK <<
+                         CN23XX_PKT_IN_DONE_WMARK_BIT_POS)) |
+                      (intr_threshold << CN23XX_PKT_IN_DONE_WMARK_BIT_POS),
+                      inst_cnt_reg);
+       }
+       return 0;
+}
+
+static void cn23xx_vf_setup_global_output_regs(struct octeon_device *oct)
+{
+       u32 reg_val;
+       u32 q_no;
+
+       for (q_no = 0; q_no < (oct->sriov_info.rings_per_vf); q_no++) {
+               octeon_write_csr(oct, CN23XX_VF_SLI_OQ_PKTS_CREDIT(q_no),
+                                0xFFFFFFFF);
+
+               reg_val =
+                   octeon_read_csr(oct, CN23XX_VF_SLI_OQ_PKTS_SENT(q_no));
+
+               reg_val &= 0xEFFFFFFFFFFFFFFFL;
+
+               reg_val =
+                   octeon_read_csr(oct, CN23XX_VF_SLI_OQ_PKT_CONTROL(q_no));
+
+               /* set IPTR & DPTR */
+               reg_val |=
+                   (CN23XX_PKT_OUTPUT_CTL_IPTR | CN23XX_PKT_OUTPUT_CTL_DPTR);
+
+               /* reset BMODE */
+               reg_val &= ~(CN23XX_PKT_OUTPUT_CTL_BMODE);
+
+               /* No Relaxed Ordering, No Snoop, 64-bit Byte swap
+                * for Output Queue ScatterList reset ROR_P, NSR_P
+                */
+               reg_val &= ~(CN23XX_PKT_OUTPUT_CTL_ROR_P);
+               reg_val &= ~(CN23XX_PKT_OUTPUT_CTL_NSR_P);
+
+#ifdef __LITTLE_ENDIAN_BITFIELD
+               reg_val &= ~(CN23XX_PKT_OUTPUT_CTL_ES_P);
+#else
+               reg_val |= (CN23XX_PKT_OUTPUT_CTL_ES_P);
+#endif
+               /* No Relaxed Ordering, No Snoop, 64-bit Byte swap
+                * for Output Queue Data reset ROR, NSR
+                */
+               reg_val &= ~(CN23XX_PKT_OUTPUT_CTL_ROR);
+               reg_val &= ~(CN23XX_PKT_OUTPUT_CTL_NSR);
+               /* set the ES bit */
+               reg_val |= (CN23XX_PKT_OUTPUT_CTL_ES);
+
+               /* write all the selected settings */
+               octeon_write_csr(oct, CN23XX_VF_SLI_OQ_PKT_CONTROL(q_no),
+                                reg_val);
+       }
+}
+
+static int cn23xx_setup_vf_device_regs(struct octeon_device *oct)
+{
+       if (cn23xx_vf_setup_global_input_regs(oct))
+               return -1;
+
+       cn23xx_vf_setup_global_output_regs(oct);
+
+       return 0;
+}
+
+static void cn23xx_setup_vf_iq_regs(struct octeon_device *oct, u32 iq_no)
+{
+       struct octeon_instr_queue *iq = oct->instr_queue[iq_no];
+       u64 pkt_in_done;
+
+       /* Write the start of the input queue's ring and its size */
+       octeon_write_csr64(oct, CN23XX_VF_SLI_IQ_BASE_ADDR64(iq_no),
+                          iq->base_addr_dma);
+       octeon_write_csr(oct, CN23XX_VF_SLI_IQ_SIZE(iq_no), iq->max_count);
+
+       /* Remember the doorbell & instruction count register addr
+        * for this queue
+        */
+       iq->doorbell_reg =
+           (u8 *)oct->mmio[0].hw_addr + CN23XX_VF_SLI_IQ_DOORBELL(iq_no);
+       iq->inst_cnt_reg =
+           (u8 *)oct->mmio[0].hw_addr + CN23XX_VF_SLI_IQ_INSTR_COUNT64(iq_no);
+       dev_dbg(&oct->pci_dev->dev, "InstQ[%d]:dbell reg @ 0x%p instcnt_reg @ 0x%p\n",
+               iq_no, iq->doorbell_reg, iq->inst_cnt_reg);
+
+       /* Store the current instruction counter (used in flush_iq
+        * calculation)
+        */
+       pkt_in_done = readq(iq->inst_cnt_reg);
+
+       if (oct->msix_on) {
+               /* Set CINT_ENB to enable IQ interrupt */
+               writeq((pkt_in_done | CN23XX_INTR_CINT_ENB),
+                      iq->inst_cnt_reg);
+       }
+       iq->reset_instr_cnt = 0;
+}
+
+static void cn23xx_setup_vf_oq_regs(struct octeon_device *oct, u32 oq_no)
+{
+       struct octeon_droq *droq = oct->droq[oq_no];
+
+       octeon_write_csr64(oct, CN23XX_VF_SLI_OQ_BASE_ADDR64(oq_no),
+                          droq->desc_ring_dma);
+       octeon_write_csr(oct, CN23XX_VF_SLI_OQ_SIZE(oq_no), droq->max_count);
+
+       octeon_write_csr(oct, CN23XX_VF_SLI_OQ_BUFF_INFO_SIZE(oq_no),
+                        (droq->buffer_size | (OCT_RH_SIZE << 16)));
+
+       /* Get the mapped address of the pkt_sent and pkts_credit regs */
+       droq->pkts_sent_reg =
+           (u8 *)oct->mmio[0].hw_addr + CN23XX_VF_SLI_OQ_PKTS_SENT(oq_no);
+       droq->pkts_credit_reg =
+           (u8 *)oct->mmio[0].hw_addr + CN23XX_VF_SLI_OQ_PKTS_CREDIT(oq_no);
+}
+
+static void cn23xx_vf_mbox_thread(struct work_struct *work)
+{
+       struct cavium_wk *wk = (struct cavium_wk *)work;
+       struct octeon_mbox *mbox = (struct octeon_mbox *)wk->ctxptr;
+
+       octeon_mbox_process_message(mbox);
+}
+
+static int cn23xx_free_vf_mbox(struct octeon_device *oct)
+{
+       cancel_delayed_work_sync(&oct->mbox[0]->mbox_poll_wk.work);
+       vfree(oct->mbox[0]);
+       return 0;
+}
+
+static int cn23xx_setup_vf_mbox(struct octeon_device *oct)
+{
+       struct octeon_mbox *mbox = NULL;
+
+       mbox = vmalloc(sizeof(*mbox));
+       if (!mbox)
+               return 1;
+
+       memset(mbox, 0, sizeof(struct octeon_mbox));
+
+       spin_lock_init(&mbox->lock);
+
+       mbox->oct_dev = oct;
+
+       mbox->q_no = 0;
+
+       mbox->state = OCTEON_MBOX_STATE_IDLE;
+
+       /* VF mbox interrupt reg */
+       mbox->mbox_int_reg =
+           (u8 *)oct->mmio[0].hw_addr + CN23XX_VF_SLI_PKT_MBOX_INT(0);
+       /* VF reads from SIG0 reg */
+       mbox->mbox_read_reg =
+           (u8 *)oct->mmio[0].hw_addr + CN23XX_SLI_PKT_PF_VF_MBOX_SIG(0, 0);
+       /* VF writes into SIG1 reg */
+       mbox->mbox_write_reg =
+           (u8 *)oct->mmio[0].hw_addr + CN23XX_SLI_PKT_PF_VF_MBOX_SIG(0, 1);
+
+       INIT_DELAYED_WORK(&mbox->mbox_poll_wk.work,
+                         cn23xx_vf_mbox_thread);
+
+       mbox->mbox_poll_wk.ctxptr = mbox;
+
+       oct->mbox[0] = mbox;
+
+       writeq(OCTEON_PFVFSIG, mbox->mbox_read_reg);
+
+       return 0;
+}
+
+static int cn23xx_enable_vf_io_queues(struct octeon_device *oct)
+{
+       u32 q_no;
+
+       for (q_no = 0; q_no < oct->num_iqs; q_no++) {
+               u64 reg_val;
+
+               /* set the corresponding IQ IS_64B bit */
+               if (oct->io_qmask.iq64B & BIT_ULL(q_no)) {
+                       reg_val = octeon_read_csr64(
+                           oct, CN23XX_VF_SLI_IQ_PKT_CONTROL64(q_no));
+                       reg_val |= CN23XX_PKT_INPUT_CTL_IS_64B;
+                       octeon_write_csr64(
+                           oct, CN23XX_VF_SLI_IQ_PKT_CONTROL64(q_no), reg_val);
+               }
+
+               /* set the corresponding IQ ENB bit */
+               if (oct->io_qmask.iq & BIT_ULL(q_no)) {
+                       reg_val = octeon_read_csr64(
+                           oct, CN23XX_VF_SLI_IQ_PKT_CONTROL64(q_no));
+                       reg_val |= CN23XX_PKT_INPUT_CTL_RING_ENB;
+                       octeon_write_csr64(
+                           oct, CN23XX_VF_SLI_IQ_PKT_CONTROL64(q_no), reg_val);
+               }
+       }
+       for (q_no = 0; q_no < oct->num_oqs; q_no++) {
+               u32 reg_val;
+
+               /* set the corresponding OQ ENB bit */
+               if (oct->io_qmask.oq & BIT_ULL(q_no)) {
+                       reg_val = octeon_read_csr(
+                           oct, CN23XX_VF_SLI_OQ_PKT_CONTROL(q_no));
+                       reg_val |= CN23XX_PKT_OUTPUT_CTL_RING_ENB;
+                       octeon_write_csr(
+                           oct, CN23XX_VF_SLI_OQ_PKT_CONTROL(q_no), reg_val);
+               }
+       }
+
+       return 0;
+}
+
+static void cn23xx_disable_vf_io_queues(struct octeon_device *oct)
+{
+       u32 num_queues = oct->num_iqs;
+
+       /* per HRM, rings can only be disabled via reset operation,
+        * NOT via SLI_PKT()_INPUT/OUTPUT_CONTROL[ENB]
+        */
+       if (num_queues < oct->num_oqs)
+               num_queues = oct->num_oqs;
+
+       cn23xx_vf_reset_io_queues(oct, num_queues);
+}
+
+void cn23xx_vf_ask_pf_to_do_flr(struct octeon_device *oct)
+{
+       struct octeon_mbox_cmd mbox_cmd;
+
+       mbox_cmd.msg.u64 = 0;
+       mbox_cmd.msg.s.type = OCTEON_MBOX_REQUEST;
+       mbox_cmd.msg.s.resp_needed = 0;
+       mbox_cmd.msg.s.cmd = OCTEON_VF_FLR_REQUEST;
+       mbox_cmd.msg.s.len = 1;
+       mbox_cmd.q_no = 0;
+       mbox_cmd.recv_len = 0;
+       mbox_cmd.recv_status = 0;
+       mbox_cmd.fn = NULL;
+       mbox_cmd.fn_arg = 0;
+
+       octeon_mbox_write(oct, &mbox_cmd);
+}
+
+static void octeon_pfvf_hs_callback(struct octeon_device *oct,
+                                   struct octeon_mbox_cmd *cmd,
+                                   void *arg)
+{
+       u32 major = 0;
+
+       memcpy((uint8_t *)&oct->pfvf_hsword, cmd->msg.s.params,
+              CN23XX_MAILBOX_MSGPARAM_SIZE);
+       if (cmd->recv_len > 1)  {
+               major = ((struct lio_version *)(cmd->data))->major;
+               major = major << 16;
+       }
+
+       atomic_set((atomic_t *)arg, major | 1);
+}
+
+int cn23xx_octeon_pfvf_handshake(struct octeon_device *oct)
+{
+       struct octeon_mbox_cmd mbox_cmd;
+       u32 q_no, count = 0;
+       atomic_t status;
+       u32 pfmajor;
+       u32 vfmajor;
+       u32 ret;
+
+       /* Sending VF_ACTIVE indication to the PF driver */
+       dev_dbg(&oct->pci_dev->dev, "requesting info from pf\n");
+
+       mbox_cmd.msg.u64 = 0;
+       mbox_cmd.msg.s.type = OCTEON_MBOX_REQUEST;
+       mbox_cmd.msg.s.resp_needed = 1;
+       mbox_cmd.msg.s.cmd = OCTEON_VF_ACTIVE;
+       mbox_cmd.msg.s.len = 2;
+       mbox_cmd.data[0] = 0;
+       ((struct lio_version *)&mbox_cmd.data[0])->major =
+                                               LIQUIDIO_BASE_MAJOR_VERSION;
+       ((struct lio_version *)&mbox_cmd.data[0])->minor =
+                                               LIQUIDIO_BASE_MINOR_VERSION;
+       ((struct lio_version *)&mbox_cmd.data[0])->micro =
+                                               LIQUIDIO_BASE_MICRO_VERSION;
+       mbox_cmd.q_no = 0;
+       mbox_cmd.recv_len = 0;
+       mbox_cmd.recv_status = 0;
+       mbox_cmd.fn = (octeon_mbox_callback_t)octeon_pfvf_hs_callback;
+       mbox_cmd.fn_arg = &status;
+
+       /* Interrupts are not enabled at this point.
+        * Enable them with default oq ticks
+        */
+       oct->fn_list.enable_interrupt(oct, OCTEON_ALL_INTR);
+
+       octeon_mbox_write(oct, &mbox_cmd);
+
+       atomic_set(&status, 0);
+
+       do {
+               schedule_timeout_uninterruptible(1);
+       } while ((!atomic_read(&status)) && (count++ < 100000));
+
+       /* Disable the interrupt so that the interrupsts will be reenabled
+        * with the oq ticks received from the PF
+        */
+       oct->fn_list.disable_interrupt(oct, OCTEON_ALL_INTR);
+
+       ret = atomic_read(&status);
+       if (!ret) {
+               dev_err(&oct->pci_dev->dev, "octeon_pfvf_handshake timeout\n");
+               return 1;
+       }
+
+       for (q_no = 0 ; q_no < oct->num_iqs ; q_no++)
+               oct->instr_queue[q_no]->txpciq.s.pkind = oct->pfvf_hsword.pkind;
+
+       vfmajor = LIQUIDIO_BASE_MAJOR_VERSION;
+       pfmajor = ret >> 16;
+       if (pfmajor != vfmajor) {
+               dev_err(&oct->pci_dev->dev,
+                       "VF Liquidio driver (major version %d) is not compatible with Liquidio PF driver (major version %d)\n",
+                       vfmajor, pfmajor);
+               return 1;
+       }
+
+       dev_dbg(&oct->pci_dev->dev,
+               "VF Liquidio driver (major version %d), Liquidio PF driver (major version %d)\n",
+               vfmajor, pfmajor);
+
+       dev_dbg(&oct->pci_dev->dev, "got data from pf pkind is %d\n",
+               oct->pfvf_hsword.pkind);
+
+       return 0;
+}
+
+static void cn23xx_handle_vf_mbox_intr(struct octeon_ioq_vector *ioq_vector)
+{
+       struct octeon_device *oct = ioq_vector->oct_dev;
+       u64 mbox_int_val;
+
+       if (!ioq_vector->droq_index) {
+               /* read and clear by writing 1 */
+               mbox_int_val = readq(oct->mbox[0]->mbox_int_reg);
+               writeq(mbox_int_val, oct->mbox[0]->mbox_int_reg);
+               if (octeon_mbox_read(oct->mbox[0]))
+                       schedule_delayed_work(&oct->mbox[0]->mbox_poll_wk.work,
+                                             msecs_to_jiffies(0));
+       }
+}
+
+static u64 cn23xx_vf_msix_interrupt_handler(void *dev)
+{
+       struct octeon_ioq_vector *ioq_vector = (struct octeon_ioq_vector *)dev;
+       struct octeon_device *oct = ioq_vector->oct_dev;
+       struct octeon_droq *droq = oct->droq[ioq_vector->droq_index];
+       u64 pkts_sent;
+       u64 ret = 0;
+
+       dev_dbg(&oct->pci_dev->dev, "In %s octeon_dev @ %p\n", __func__, oct);
+       pkts_sent = readq(droq->pkts_sent_reg);
+
+       /* If our device has interrupted, then proceed. Also check
+        * for all f's if interrupt was triggered on an error
+        * and the PCI read fails.
+        */
+       if (!pkts_sent || (pkts_sent == 0xFFFFFFFFFFFFFFFFULL))
+               return ret;
+
+       /* Write count reg in sli_pkt_cnts to clear these int. */
+       if ((pkts_sent & CN23XX_INTR_PO_INT) ||
+           (pkts_sent & CN23XX_INTR_PI_INT)) {
+               if (pkts_sent & CN23XX_INTR_PO_INT)
+                       ret |= MSIX_PO_INT;
+       }
+
+       if (pkts_sent & CN23XX_INTR_PI_INT)
+               /* We will clear the count when we update the read_index. */
+               ret |= MSIX_PI_INT;
+
+       if (pkts_sent & CN23XX_INTR_MBOX_INT) {
+               cn23xx_handle_vf_mbox_intr(ioq_vector);
+               ret |= MSIX_MBOX_INT;
+       }
+
+       return ret;
+}
+
+static void cn23xx_enable_vf_interrupt(struct octeon_device *oct, u8 intr_flag)
+{
+       struct octeon_cn23xx_vf *cn23xx = (struct octeon_cn23xx_vf *)oct->chip;
+       u32 q_no, time_threshold;
+
+       if (intr_flag & OCTEON_OUTPUT_INTR) {
+               for (q_no = 0; q_no < oct->num_oqs; q_no++) {
+                       /* Set up interrupt packet and time thresholds
+                        * for all the OQs
+                        */
+                       time_threshold = cn23xx_vf_get_oq_ticks(
+                               oct, (u32)CFG_GET_OQ_INTR_TIME(cn23xx->conf));
+
+                       octeon_write_csr64(
+                           oct, CN23XX_VF_SLI_OQ_PKT_INT_LEVELS(q_no),
+                           (CFG_GET_OQ_INTR_PKT(cn23xx->conf) |
+                            ((u64)time_threshold << 32)));
+               }
+       }
+
+       if (intr_flag & OCTEON_INPUT_INTR) {
+               for (q_no = 0; q_no < oct->num_oqs; q_no++) {
+                       /* Set CINT_ENB to enable IQ interrupt */
+                       octeon_write_csr64(
+                           oct, CN23XX_VF_SLI_IQ_INSTR_COUNT64(q_no),
+                           ((octeon_read_csr64(
+                                 oct, CN23XX_VF_SLI_IQ_INSTR_COUNT64(q_no)) &
+                             ~CN23XX_PKT_IN_DONE_CNT_MASK) |
+                            CN23XX_INTR_CINT_ENB));
+               }
+       }
+
+       /* Set queue-0 MBOX_ENB to enable VF mailbox interrupt */
+       if (intr_flag & OCTEON_MBOX_INTR) {
+               octeon_write_csr64(
+                   oct, CN23XX_VF_SLI_PKT_MBOX_INT(0),
+                   (octeon_read_csr64(oct, CN23XX_VF_SLI_PKT_MBOX_INT(0)) |
+                    CN23XX_INTR_MBOX_ENB));
+       }
+}
+
+static void cn23xx_disable_vf_interrupt(struct octeon_device *oct, u8 intr_flag)
+{
+       u32 q_no;
+
+       if (intr_flag & OCTEON_OUTPUT_INTR) {
+               for (q_no = 0; q_no < oct->num_oqs; q_no++) {
+                       /* Write all 1's in INT_LEVEL reg to disable PO_INT */
+                       octeon_write_csr64(
+                           oct, CN23XX_VF_SLI_OQ_PKT_INT_LEVELS(q_no),
+                           0x3fffffffffffff);
+               }
+       }
+       if (intr_flag & OCTEON_INPUT_INTR) {
+               for (q_no = 0; q_no < oct->num_oqs; q_no++) {
+                       octeon_write_csr64(
+                           oct, CN23XX_VF_SLI_IQ_INSTR_COUNT64(q_no),
+                           (octeon_read_csr64(
+                                oct, CN23XX_VF_SLI_IQ_INSTR_COUNT64(q_no)) &
+                            ~(CN23XX_INTR_CINT_ENB |
+                              CN23XX_PKT_IN_DONE_CNT_MASK)));
+               }
+       }
+
+       if (intr_flag & OCTEON_MBOX_INTR) {
+               octeon_write_csr64(
+                   oct, CN23XX_VF_SLI_PKT_MBOX_INT(0),
+                   (octeon_read_csr64(oct, CN23XX_VF_SLI_PKT_MBOX_INT(0)) &
+                    ~CN23XX_INTR_MBOX_ENB));
+       }
+}
+
+int cn23xx_setup_octeon_vf_device(struct octeon_device *oct)
+{
+       struct octeon_cn23xx_vf *cn23xx = (struct octeon_cn23xx_vf *)oct->chip;
+       u32 rings_per_vf, ring_flag;
+       u64 reg_val;
+
+       if (octeon_map_pci_barx(oct, 0, 0))
+               return 1;
+
+       /* INPUT_CONTROL[RPVF] gives the VF IOq count */
+       reg_val = octeon_read_csr64(oct, CN23XX_VF_SLI_IQ_PKT_CONTROL64(0));
+
+       oct->pf_num = (reg_val >> CN23XX_PKT_INPUT_CTL_PF_NUM_POS) &
+                     CN23XX_PKT_INPUT_CTL_PF_NUM_MASK;
+       oct->vf_num = (reg_val >> CN23XX_PKT_INPUT_CTL_VF_NUM_POS) &
+                     CN23XX_PKT_INPUT_CTL_VF_NUM_MASK;
+
+       reg_val = reg_val >> CN23XX_PKT_INPUT_CTL_RPVF_POS;
+
+       rings_per_vf = reg_val & CN23XX_PKT_INPUT_CTL_RPVF_MASK;
+
+       ring_flag = 0;
+
+       cn23xx->conf  = oct_get_config_info(oct, LIO_23XX);
+       if (!cn23xx->conf) {
+               dev_err(&oct->pci_dev->dev, "%s No Config found for CN23XX\n",
+                       __func__);
+               octeon_unmap_pci_barx(oct, 0);
+               return 1;
+       }
+
+       if (oct->sriov_info.rings_per_vf > rings_per_vf) {
+               dev_warn(&oct->pci_dev->dev,
+                        "num_queues:%d greater than PF configured rings_per_vf:%d. Reducing to %d.\n",
+                        oct->sriov_info.rings_per_vf, rings_per_vf,
+                        rings_per_vf);
+               oct->sriov_info.rings_per_vf = rings_per_vf;
+       } else {
+               if (rings_per_vf > num_present_cpus()) {
+                       dev_warn(&oct->pci_dev->dev,
+                                "PF configured rings_per_vf:%d greater than num_cpu:%d. Using rings_per_vf:%d equal to num cpus\n",
+                                rings_per_vf,
+                                num_present_cpus(),
+                                num_present_cpus());
+                       oct->sriov_info.rings_per_vf =
+                               num_present_cpus();
+               } else {
+                       oct->sriov_info.rings_per_vf = rings_per_vf;
+               }
+       }
+
+       oct->fn_list.setup_iq_regs = cn23xx_setup_vf_iq_regs;
+       oct->fn_list.setup_oq_regs = cn23xx_setup_vf_oq_regs;
+       oct->fn_list.setup_mbox = cn23xx_setup_vf_mbox;
+       oct->fn_list.free_mbox = cn23xx_free_vf_mbox;
+
+       oct->fn_list.msix_interrupt_handler = cn23xx_vf_msix_interrupt_handler;
+
+       oct->fn_list.setup_device_regs = cn23xx_setup_vf_device_regs;
+
+       oct->fn_list.enable_interrupt = cn23xx_enable_vf_interrupt;
+       oct->fn_list.disable_interrupt = cn23xx_disable_vf_interrupt;
+
+       oct->fn_list.enable_io_queues = cn23xx_enable_vf_io_queues;
+       oct->fn_list.disable_io_queues = cn23xx_disable_vf_io_queues;
+
+       return 0;
+}
+
+void cn23xx_dump_vf_iq_regs(struct octeon_device *oct)
+{
+       u32 regval, q_no;
+
+       dev_dbg(&oct->pci_dev->dev, "SLI_IQ_DOORBELL_0 [0x%x]: 0x%016llx\n",
+               CN23XX_VF_SLI_IQ_DOORBELL(0),
+               CVM_CAST64(octeon_read_csr64(
+                                       oct, CN23XX_VF_SLI_IQ_DOORBELL(0))));
+
+       dev_dbg(&oct->pci_dev->dev, "SLI_IQ_BASEADDR_0 [0x%x]: 0x%016llx\n",
+               CN23XX_VF_SLI_IQ_BASE_ADDR64(0),
+               CVM_CAST64(octeon_read_csr64(
+                       oct, CN23XX_VF_SLI_IQ_BASE_ADDR64(0))));
+
+       dev_dbg(&oct->pci_dev->dev, "SLI_IQ_FIFO_RSIZE_0 [0x%x]: 0x%016llx\n",
+               CN23XX_VF_SLI_IQ_SIZE(0),
+               CVM_CAST64(octeon_read_csr64(oct, CN23XX_VF_SLI_IQ_SIZE(0))));
+
+       for (q_no = 0; q_no < oct->sriov_info.rings_per_vf; q_no++) {
+               dev_dbg(&oct->pci_dev->dev, "SLI_PKT[%d]_INPUT_CTL [0x%x]: 0x%016llx\n",
+                       q_no, CN23XX_VF_SLI_IQ_PKT_CONTROL64(q_no),
+                       CVM_CAST64(octeon_read_csr64(
+                               oct, CN23XX_VF_SLI_IQ_PKT_CONTROL64(q_no))));
+       }
+
+       pci_read_config_dword(oct->pci_dev, CN23XX_CONFIG_PCIE_DEVCTL, &regval);
+       dev_dbg(&oct->pci_dev->dev, "Config DevCtl [0x%x]: 0x%08x\n",
+               CN23XX_CONFIG_PCIE_DEVCTL, regval);
+}
diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_vf_device.h b/drivers/net/ethernet/cavium/liquidio/cn23xx_vf_device.h
new file mode 100644 (file)
index 0000000..6715df3
--- /dev/null
@@ -0,0 +1,48 @@
+/**********************************************************************
+ * Author: Cavium, Inc.
+ *
+ * Contact: support@cavium.com
+ *          Please include "LiquidIO" in the subject.
+ *
+ * Copyright (c) 2003-2016 Cavium, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, Version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
+ * NONINFRINGEMENT.  See the GNU General Public License for more details.
+ ***********************************************************************/
+/*! \file  cn23xx_device.h
+ * \brief Host Driver: Routines that perform CN23XX specific operations.
+ */
+
+#ifndef __CN23XX_VF_DEVICE_H__
+#define __CN23XX_VF_DEVICE_H__
+
+#include "cn23xx_vf_regs.h"
+
+/* Register address and configuration for a CN23XX devices.
+ * If device specific changes need to be made then add a struct to include
+ * device specific fields as shown in the commented section
+ */
+struct octeon_cn23xx_vf {
+       struct octeon_config *conf;
+};
+
+#define BUSY_READING_REG_VF_LOOP_COUNT         10000
+
+#define CN23XX_MAILBOX_MSGPARAM_SIZE           6
+
+#define MAX_VF_IP_OP_PENDING_PKT_COUNT         100
+
+void cn23xx_vf_ask_pf_to_do_flr(struct octeon_device *oct);
+
+int cn23xx_octeon_pfvf_handshake(struct octeon_device *oct);
+
+int cn23xx_setup_octeon_vf_device(struct octeon_device *oct);
+
+void cn23xx_dump_vf_initialized_regs(struct octeon_device *oct);
+#endif
diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_vf_regs.h b/drivers/net/ethernet/cavium/liquidio/cn23xx_vf_regs.h
new file mode 100644 (file)
index 0000000..d33dd8f
--- /dev/null
@@ -0,0 +1,274 @@
+/**********************************************************************
+ * Author: Cavium, Inc.
+ *
+ * Contact: support@cavium.com
+ *          Please include "LiquidIO" in the subject.
+ *
+ * Copyright (c) 2003-2016 Cavium, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, Version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
+ * NONINFRINGEMENT.  See the GNU General Public License for more details.
+ ***********************************************************************/
+/*! \file cn23xx_vf_regs.h
+ * \brief Host Driver: Register Address and Register Mask values for
+ * Octeon CN23XX vf functions.
+ */
+
+#ifndef __CN23XX_VF_REGS_H__
+#define __CN23XX_VF_REGS_H__
+
+#define     CN23XX_CONFIG_XPANSION_BAR             0x38
+
+#define     CN23XX_CONFIG_PCIE_CAP                 0x70
+#define     CN23XX_CONFIG_PCIE_DEVCAP              0x74
+#define     CN23XX_CONFIG_PCIE_DEVCTL              0x78
+#define     CN23XX_CONFIG_PCIE_LINKCAP             0x7C
+#define     CN23XX_CONFIG_PCIE_LINKCTL             0x80
+#define     CN23XX_CONFIG_PCIE_SLOTCAP             0x84
+#define     CN23XX_CONFIG_PCIE_SLOTCTL             0x88
+
+#define     CN23XX_CONFIG_PCIE_FLTMSK              0x720
+
+/* The input jabber is used to determine the TSO max size.
+ * Due to H/W limitation, this need to be reduced to 60000
+ * in order to to H/W TSO and avoid the WQE malfarmation
+ * PKO_BUG_24989_WQE_LEN
+ */
+#define    CN23XX_DEFAULT_INPUT_JABBER             0xEA60 /*60000*/
+
+/* ##############  BAR0 Registers ################ */
+
+/* Each Input Queue register is at a 16-byte Offset in BAR0 */
+#define    CN23XX_VF_IQ_OFFSET                     0x20000
+
+/*###################### REQUEST QUEUE #########################*/
+
+/* 64 registers for Input Queue Instr Count - SLI_PKT_IN_DONE0_CNTS */
+#define    CN23XX_VF_SLI_IQ_INSTR_COUNT_START64     0x10040
+
+/* 64 registers for Input Queues Start Addr - SLI_PKT0_INSTR_BADDR */
+#define    CN23XX_VF_SLI_IQ_BASE_ADDR_START64       0x10010
+
+/* 64 registers for Input Doorbell - SLI_PKT0_INSTR_BAOFF_DBELL */
+#define    CN23XX_VF_SLI_IQ_DOORBELL_START          0x10020
+
+/* 64 registers for Input Queue size - SLI_PKT0_INSTR_FIFO_RSIZE */
+#define    CN23XX_VF_SLI_IQ_SIZE_START              0x10030
+
+/* 64 registers (64-bit) - ES, RO, NS, Arbitration for Input Queue Data &
+ * gather list fetches. SLI_PKT(0..63)_INPUT_CONTROL.
+ */
+#define    CN23XX_VF_SLI_IQ_PKT_CONTROL_START64     0x10000
+
+/*------- Request Queue Macros ---------*/
+#define CN23XX_VF_SLI_IQ_PKT_CONTROL64(iq)             \
+       (CN23XX_VF_SLI_IQ_PKT_CONTROL_START64 + ((iq) * CN23XX_VF_IQ_OFFSET))
+
+#define CN23XX_VF_SLI_IQ_BASE_ADDR64(iq)               \
+       (CN23XX_VF_SLI_IQ_BASE_ADDR_START64 + ((iq) * CN23XX_VF_IQ_OFFSET))
+
+#define CN23XX_VF_SLI_IQ_SIZE(iq)                      \
+       (CN23XX_VF_SLI_IQ_SIZE_START + ((iq) * CN23XX_VF_IQ_OFFSET))
+
+#define CN23XX_VF_SLI_IQ_DOORBELL(iq)                  \
+       (CN23XX_VF_SLI_IQ_DOORBELL_START + ((iq) * CN23XX_VF_IQ_OFFSET))
+
+#define CN23XX_VF_SLI_IQ_INSTR_COUNT64(iq)             \
+       (CN23XX_VF_SLI_IQ_INSTR_COUNT_START64 + ((iq) * CN23XX_VF_IQ_OFFSET))
+
+/*------------------ Masks ----------------*/
+#define    CN23XX_PKT_INPUT_CTL_VF_NUM                  BIT_ULL(32)
+#define    CN23XX_PKT_INPUT_CTL_MAC_NUM                 BIT(29)
+/* Number of instructions to be read in one MAC read request.
+ * setting to Max value(4)
+ */
+#define    CN23XX_PKT_INPUT_CTL_RDSIZE                  (3 << 25)
+#define    CN23XX_PKT_INPUT_CTL_IS_64B                  BIT(24)
+#define    CN23XX_PKT_INPUT_CTL_RST                     BIT(23)
+#define    CN23XX_PKT_INPUT_CTL_QUIET                   BIT(28)
+#define    CN23XX_PKT_INPUT_CTL_RING_ENB                BIT(22)
+#define    CN23XX_PKT_INPUT_CTL_DATA_NS                 BIT(8)
+#define    CN23XX_PKT_INPUT_CTL_DATA_ES_64B_SWAP        BIT(6)
+#define    CN23XX_PKT_INPUT_CTL_DATA_RO                 BIT(5)
+#define    CN23XX_PKT_INPUT_CTL_USE_CSR                 BIT(4)
+#define    CN23XX_PKT_INPUT_CTL_GATHER_NS               BIT(3)
+#define    CN23XX_PKT_INPUT_CTL_GATHER_ES_64B_SWAP      (2)
+#define    CN23XX_PKT_INPUT_CTL_GATHER_RO               (1)
+
+/** Rings per Virtual Function [RO] **/
+#define    CN23XX_PKT_INPUT_CTL_RPVF_MASK               (0x3F)
+#define    CN23XX_PKT_INPUT_CTL_RPVF_POS                (48)
+/* These bits[47:44][RO] give the Physical function number info within the MAC*/
+#define    CN23XX_PKT_INPUT_CTL_PF_NUM_MASK             (0x7)
+#define    CN23XX_PKT_INPUT_CTL_PF_NUM_POS              (45)
+/** These bits[43:32][RO] give the virtual function number info within the PF*/
+#define    CN23XX_PKT_INPUT_CTL_VF_NUM_MASK             (0x1FFF)
+#define    CN23XX_PKT_INPUT_CTL_VF_NUM_POS              (32)
+#define    CN23XX_PKT_INPUT_CTL_MAC_NUM_MASK            (0x3)
+#define    CN23XX_PKT_INPUT_CTL_MAC_NUM_POS             (29)
+#define    CN23XX_PKT_IN_DONE_WMARK_MASK                (0xFFFFULL)
+#define    CN23XX_PKT_IN_DONE_WMARK_BIT_POS             (32)
+#define    CN23XX_PKT_IN_DONE_CNT_MASK                  (0x00000000FFFFFFFFULL)
+
+#ifdef __LITTLE_ENDIAN_BITFIELD
+#define CN23XX_PKT_INPUT_CTL_MASK                      \
+       (CN23XX_PKT_INPUT_CTL_RDSIZE                    \
+        | CN23XX_PKT_INPUT_CTL_DATA_ES_64B_SWAP        \
+        | CN23XX_PKT_INPUT_CTL_USE_CSR)
+#else
+#define CN23XX_PKT_INPUT_CTL_MASK                      \
+       (CN23XX_PKT_INPUT_CTL_RDSIZE                    \
+        | CN23XX_PKT_INPUT_CTL_DATA_ES_64B_SWAP        \
+        | CN23XX_PKT_INPUT_CTL_USE_CSR                 \
+        | CN23XX_PKT_INPUT_CTL_GATHER_ES_64B_SWAP)
+#endif
+
+/** Masks for SLI_PKT_IN_DONE(0..63)_CNTS Register */
+#define    CN23XX_IN_DONE_CNTS_PI_INT               BIT_ULL(62)
+#define    CN23XX_IN_DONE_CNTS_CINT_ENB             BIT_ULL(48)
+
+/*############################ OUTPUT QUEUE #########################*/
+
+/* 64 registers for Output queue control - SLI_PKT(0..63)_OUTPUT_CONTROL */
+#define    CN23XX_VF_SLI_OQ_PKT_CONTROL_START       0x10050
+
+/* 64 registers for Output queue buffer and info size - SLI_PKT0_OUT_SIZE */
+#define    CN23XX_VF_SLI_OQ0_BUFF_INFO_SIZE         0x10060
+
+/* 64 registers for Output Queue Start Addr - SLI_PKT0_SLIST_BADDR */
+#define    CN23XX_VF_SLI_OQ_BASE_ADDR_START64       0x10070
+
+/* 64 registers for Output Queue Packet Credits - SLI_PKT0_SLIST_BAOFF_DBELL */
+#define    CN23XX_VF_SLI_OQ_PKT_CREDITS_START       0x10080
+
+/* 64 registers for Output Queue size - SLI_PKT0_SLIST_FIFO_RSIZE */
+#define    CN23XX_VF_SLI_OQ_SIZE_START              0x10090
+
+/* 64 registers for Output Queue Packet Count - SLI_PKT0_CNTS */
+#define    CN23XX_VF_SLI_OQ_PKT_SENT_START          0x100B0
+
+/* 64 registers for Output Queue INT Levels - SLI_PKT0_INT_LEVELS */
+#define    CN23XX_VF_SLI_OQ_PKT_INT_LEVELS_START64  0x100A0
+
+/* Each Output Queue register is at a 16-byte Offset in BAR0 */
+#define    CN23XX_VF_OQ_OFFSET                      0x20000
+
+/*------- Output Queue Macros ---------*/
+
+#define CN23XX_VF_SLI_OQ_PKT_CONTROL(oq)               \
+       (CN23XX_VF_SLI_OQ_PKT_CONTROL_START + ((oq) * CN23XX_VF_OQ_OFFSET))
+
+#define CN23XX_VF_SLI_OQ_BASE_ADDR64(oq)               \
+       (CN23XX_VF_SLI_OQ_BASE_ADDR_START64 + ((oq) * CN23XX_VF_OQ_OFFSET))
+
+#define CN23XX_VF_SLI_OQ_SIZE(oq)                      \
+       (CN23XX_VF_SLI_OQ_SIZE_START + ((oq) * CN23XX_VF_OQ_OFFSET))
+
+#define CN23XX_VF_SLI_OQ_BUFF_INFO_SIZE(oq)            \
+       (CN23XX_VF_SLI_OQ0_BUFF_INFO_SIZE + ((oq) * CN23XX_VF_OQ_OFFSET))
+
+#define CN23XX_VF_SLI_OQ_PKTS_SENT(oq)         \
+       (CN23XX_VF_SLI_OQ_PKT_SENT_START + ((oq) * CN23XX_VF_OQ_OFFSET))
+
+#define CN23XX_VF_SLI_OQ_PKTS_CREDIT(oq)               \
+       (CN23XX_VF_SLI_OQ_PKT_CREDITS_START + ((oq) * CN23XX_VF_OQ_OFFSET))
+
+#define CN23XX_VF_SLI_OQ_PKT_INT_LEVELS(oq)            \
+       (CN23XX_VF_SLI_OQ_PKT_INT_LEVELS_START64 + ((oq) * CN23XX_VF_OQ_OFFSET))
+
+/* Macro's for accessing CNT and TIME separately from INT_LEVELS */
+#define CN23XX_VF_SLI_OQ_PKT_INT_LEVELS_CNT(oq)        \
+       (CN23XX_VF_SLI_OQ_PKT_INT_LEVELS_START64 + ((oq) * CN23XX_VF_OQ_OFFSET))
+
+#define CN23XX_VF_SLI_OQ_PKT_INT_LEVELS_TIME(oq)       \
+       (CN23XX_VF_SLI_OQ_PKT_INT_LEVELS_START64 +      \
+        ((oq) * CN23XX_VF_OQ_OFFSET) + 4)
+
+/*------------------ Masks ----------------*/
+#define    CN23XX_PKT_OUTPUT_CTL_TENB                  BIT(13)
+#define    CN23XX_PKT_OUTPUT_CTL_CENB                  BIT(12)
+#define    CN23XX_PKT_OUTPUT_CTL_IPTR                  BIT(11)
+#define    CN23XX_PKT_OUTPUT_CTL_ES                    BIT(9)
+#define    CN23XX_PKT_OUTPUT_CTL_NSR                   BIT(8)
+#define    CN23XX_PKT_OUTPUT_CTL_ROR                   BIT(7)
+#define    CN23XX_PKT_OUTPUT_CTL_DPTR                  BIT(6)
+#define    CN23XX_PKT_OUTPUT_CTL_BMODE                 BIT(5)
+#define    CN23XX_PKT_OUTPUT_CTL_ES_P                  BIT(3)
+#define    CN23XX_PKT_OUTPUT_CTL_NSR_P                 BIT(2)
+#define    CN23XX_PKT_OUTPUT_CTL_ROR_P                 BIT(1)
+#define    CN23XX_PKT_OUTPUT_CTL_RING_ENB              BIT(0)
+
+/*######################### Mailbox Reg Macros ########################*/
+#define    CN23XX_VF_SLI_PKT_MBOX_INT_START            0x10210
+#define    CN23XX_SLI_PKT_PF_VF_MBOX_SIG_START         0x10200
+
+#define    CN23XX_SLI_MBOX_OFFSET                      0x20000
+#define    CN23XX_SLI_MBOX_SIG_IDX_OFFSET              0x8
+
+#define CN23XX_VF_SLI_PKT_MBOX_INT(q)  \
+       (CN23XX_VF_SLI_PKT_MBOX_INT_START + ((q) * CN23XX_SLI_MBOX_OFFSET))
+
+#define CN23XX_SLI_PKT_PF_VF_MBOX_SIG(q, idx)          \
+       (CN23XX_SLI_PKT_PF_VF_MBOX_SIG_START +          \
+        ((q) * CN23XX_SLI_MBOX_OFFSET +                \
+         (idx) * CN23XX_SLI_MBOX_SIG_IDX_OFFSET))
+
+/*######################## INTERRUPTS #########################*/
+
+#define    CN23XX_VF_SLI_INT_SUM_START           0x100D0
+
+#define CN23XX_VF_SLI_INT_SUM(q)                       \
+       (CN23XX_VF_SLI_INT_SUM_START + ((q) * CN23XX_VF_IQ_OFFSET))
+
+/*------------------ Interrupt Masks ----------------*/
+
+#define    CN23XX_INTR_PO_INT                   BIT_ULL(63)
+#define    CN23XX_INTR_PI_INT                   BIT_ULL(62)
+#define    CN23XX_INTR_MBOX_INT                 BIT_ULL(61)
+#define    CN23XX_INTR_RESEND                   BIT_ULL(60)
+
+#define    CN23XX_INTR_CINT_ENB                 BIT_ULL(48)
+#define    CN23XX_INTR_MBOX_ENB                 BIT(0)
+
+/*############################ MIO #########################*/
+#define    CN23XX_MIO_PTP_CLOCK_CFG       0x0001070000000f00ULL
+#define    CN23XX_MIO_PTP_CLOCK_LO        0x0001070000000f08ULL
+#define    CN23XX_MIO_PTP_CLOCK_HI        0x0001070000000f10ULL
+#define    CN23XX_MIO_PTP_CLOCK_COMP      0x0001070000000f18ULL
+#define    CN23XX_MIO_PTP_TIMESTAMP       0x0001070000000f20ULL
+#define    CN23XX_MIO_PTP_EVT_CNT         0x0001070000000f28ULL
+#define    CN23XX_MIO_PTP_CKOUT_THRESH_LO 0x0001070000000f30ULL
+#define    CN23XX_MIO_PTP_CKOUT_THRESH_HI 0x0001070000000f38ULL
+#define    CN23XX_MIO_PTP_CKOUT_HI_INCR   0x0001070000000f40ULL
+#define    CN23XX_MIO_PTP_CKOUT_LO_INCR   0x0001070000000f48ULL
+#define    CN23XX_MIO_PTP_PPS_THRESH_LO   0x0001070000000f50ULL
+#define    CN23XX_MIO_PTP_PPS_THRESH_HI   0x0001070000000f58ULL
+#define    CN23XX_MIO_PTP_PPS_HI_INCR     0x0001070000000f60ULL
+#define    CN23XX_MIO_PTP_PPS_LO_INCR     0x0001070000000f68ULL
+
+/*############################ RST #########################*/
+#define    CN23XX_RST_BOOT                0x0001180006001600ULL
+
+/*######################## MSIX TABLE #########################*/
+
+#define    CN23XX_MSIX_TABLE_ADDR_START    0x0
+#define    CN23XX_MSIX_TABLE_DATA_START    0x8
+
+#define    CN23XX_MSIX_TABLE_SIZE          0x10
+#define    CN23XX_MSIX_TABLE_ENTRIES       0x41
+
+#define    CN23XX_MSIX_ENTRY_VECTOR_CTL    BIT_ULL(32)
+
+#define CN23XX_MSIX_TABLE_ADDR(idx)            \
+       (CN23XX_MSIX_TABLE_ADDR_START + ((idx) * CN23XX_MSIX_TABLE_SIZE))
+
+#define CN23XX_MSIX_TABLE_DATA(idx)            \
+       (CN23XX_MSIX_TABLE_DATA_START + ((idx) * CN23XX_MSIX_TABLE_SIZE))
+
+#endif
index e779af88621b1141055274ee68a5f2a03b3d9e00..bdec051107a6bff92d3c4b1a0b708cb8f7c44116 100644 (file)
@@ -1,24 +1,20 @@
 /**********************************************************************
-* Author: Cavium, Inc.
-*
-* Contact: support@cavium.com
-*          Please include "LiquidIO" in the subject.
-*
-* Copyright (c) 2003-2015 Cavium, Inc.
-*
-* This file is free software; you can redistribute it and/or modify
-* it under the terms of the GNU General Public License, Version 2, as
-* published by the Free Software Foundation.
-*
-* This file is distributed in the hope that it will be useful, but
-* AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
-* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
-* NONINFRINGEMENT.  See the GNU General Public License for more
-* details.
-*
-* This file may also be available under a different license from Cavium.
-* Contact Cavium, Inc. for more information
-**********************************************************************/
+ * Author: Cavium, Inc.
+ *
+ * Contact: support@cavium.com
+ *          Please include "LiquidIO" in the subject.
+ *
+ * Copyright (c) 2003-2016 Cavium, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, Version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
+ * NONINFRINGEMENT.  See the GNU General Public License for more details.
+ ***********************************************************************/
 #include <linux/pci.h>
 #include <linux/netdevice.h>
 #include "liquidio_common.h"
@@ -275,7 +271,6 @@ void lio_cn6xxx_setup_iq_regs(struct octeon_device *oct, u32 iq_no)
 {
        struct octeon_instr_queue *iq = oct->instr_queue[iq_no];
 
-       /* Disable Packet-by-Packet mode; No Parse Mode or Skip length */
        octeon_write_csr64(oct, CN6XXX_SLI_IQ_PKT_INSTR_HDR64(iq_no), 0);
 
        /* Write the start of the input queue's ring and its size  */
@@ -378,7 +373,7 @@ void lio_cn6xxx_disable_io_queues(struct octeon_device *oct)
 
        /* Reset the doorbell register for each Input queue. */
        for (i = 0; i < MAX_OCTEON_INSTR_QUEUES(oct); i++) {
-               if (!(oct->io_qmask.iq & (1ULL << i)))
+               if (!(oct->io_qmask.iq & BIT_ULL(i)))
                        continue;
                octeon_write_csr(oct, CN6XXX_SLI_IQ_DOORBELL(i), 0xFFFFFFFF);
                d32 = octeon_read_csr(oct, CN6XXX_SLI_IQ_DOORBELL(i));
@@ -400,9 +395,8 @@ void lio_cn6xxx_disable_io_queues(struct octeon_device *oct)
        ;
 
        /* Reset the doorbell register for each Output queue. */
-       /* for (i = 0; i < oct->num_oqs; i++) { */
        for (i = 0; i < MAX_OCTEON_OUTPUT_QUEUES(oct); i++) {
-               if (!(oct->io_qmask.oq & (1ULL << i)))
+               if (!(oct->io_qmask.oq & BIT_ULL(i)))
                        continue;
                octeon_write_csr(oct, CN6XXX_SLI_OQ_PKTS_CREDIT(i), 0xFFFFFFFF);
                d32 = octeon_read_csr(oct, CN6XXX_SLI_OQ_PKTS_CREDIT(i));
@@ -537,15 +531,14 @@ static int lio_cn6xxx_process_droq_intr_regs(struct octeon_device *oct)
 
        oct->droq_intr = 0;
 
-       /* for (oq_no = 0; oq_no < oct->num_oqs; oq_no++) { */
        for (oq_no = 0; oq_no < MAX_OCTEON_OUTPUT_QUEUES(oct); oq_no++) {
-               if (!(droq_mask & (1ULL << oq_no)))
+               if (!(droq_mask & BIT_ULL(oq_no)))
                        continue;
 
                droq = oct->droq[oq_no];
                pkt_count = octeon_droq_check_hw_for_pkts(droq);
                if (pkt_count) {
-                       oct->droq_intr |= (1ULL << oq_no);
+                       oct->droq_intr |= BIT_ULL(oq_no);
                        if (droq->ops.poll_mode) {
                                u32 value;
                                u32 reg;
@@ -721,8 +714,6 @@ int lio_setup_cn66xx_octeon_device(struct octeon_device *oct)
 int lio_validate_cn6xxx_config_info(struct octeon_device *oct,
                                    struct octeon_config *conf6xxx)
 {
-       /* int total_instrs = 0; */
-
        if (CFG_GET_IQ_MAX_Q(conf6xxx) > CN6XXX_MAX_INPUT_QUEUES) {
                dev_err(&oct->pci_dev->dev, "%s: Num IQ (%d) exceeds Max (%d)\n",
                        __func__, CFG_GET_IQ_MAX_Q(conf6xxx),
index a40a913940796e9d71c5d5a7e22b599c261a474b..8ed57134ee0ce142d98e13f2ca006b4fe6726d30 100644 (file)
@@ -1,25 +1,20 @@
 /**********************************************************************
-* Author: Cavium, Inc.
-*
-* Contact: support@cavium.com
-*          Please include "LiquidIO" in the subject.
-*
-* Copyright (c) 2003-2015 Cavium, Inc.
-*
-* This file is free software; you can redistribute it and/or modify
-* it under the terms of the GNU General Public License, Version 2, as
-* published by the Free Software Foundation.
-*
-* This file is distributed in the hope that it will be useful, but
-* AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
-* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
-* NONINFRINGEMENT.  See the GNU General Public License for more
-* details.
-*
-* This file may also be available under a different license from Cavium.
-* Contact Cavium, Inc. for more information
-**********************************************************************/
-
+ * Author: Cavium, Inc.
+ *
+ * Contact: support@cavium.com
+ *          Please include "LiquidIO" in the subject.
+ *
+ * Copyright (c) 2003-2016 Cavium, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, Version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
+ * NONINFRINGEMENT.  See the GNU General Public License for more details.
+ ***********************************************************************/
 /*! \file  cn66xx_device.h
  *  \brief Host Driver: Routines that perform CN66XX specific operations.
  */
@@ -96,8 +91,8 @@ void lio_cn6xxx_setup_reg_address(struct octeon_device *oct, void *chip,
                                  struct octeon_reg_list *reg_list);
 u32 lio_cn6xxx_coprocessor_clock(struct octeon_device *oct);
 u32 lio_cn6xxx_get_oq_ticks(struct octeon_device *oct, u32 time_intr_in_us);
-int lio_setup_cn66xx_octeon_device(struct octeon_device *);
+int lio_setup_cn66xx_octeon_device(struct octeon_device *oct);
 int lio_validate_cn6xxx_config_info(struct octeon_device *oct,
-                                   struct octeon_config *);
+                                   struct octeon_config *conf6xxx);
 
 #endif
index 5e3aff242ad3848ba63f509773c2b6f0590a03d9..b248966837b4c2b2e8ae25cf05431d52c94c5832 100644 (file)
@@ -1,25 +1,20 @@
 /**********************************************************************
-* Author: Cavium, Inc.
-*
-* Contact: support@cavium.com
-*          Please include "LiquidIO" in the subject.
-*
-* Copyright (c) 2003-2015 Cavium, Inc.
-*
-* This file is free software; you can redistribute it and/or modify
-* it under the terms of the GNU General Public License, Version 2, as
-* published by the Free Software Foundation.
-*
-* This file is distributed in the hope that it will be useful, but
-* AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
-* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
-* NONINFRINGEMENT.  See the GNU General Public License for more
-* details.
-*
-* This file may also be available under a different license from Cavium.
-* Contact Cavium, Inc. for more information
-**********************************************************************/
-
+ * Author: Cavium, Inc.
+ *
+ * Contact: support@cavium.com
+ *          Please include "LiquidIO" in the subject.
+ *
+ * Copyright (c) 2003-2016 Cavium, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, Version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
+ * NONINFRINGEMENT.  See the GNU General Public License for more details.
+ ***********************************************************************/
 /*! \file cn66xx_regs.h
  *  \brief Host Driver: Register Address and Register Mask values for
  *  Octeon CN66XX devices.
 #define    CN6XXX_SLI_S2M_PORT0_CTL              0x3D80
 #define    CN6XXX_SLI_S2M_PORT1_CTL              0x3D90
 #define    CN6XXX_SLI_S2M_PORTX_CTL(port)        \
-       (CN6XXX_SLI_S2M_PORT0_CTL + (port * 0x10))
+       (CN6XXX_SLI_S2M_PORT0_CTL + ((port) * 0x10))
 
 #define    CN6XXX_SLI_INT_ENB64(port)            \
-       (CN6XXX_SLI_INT_ENB64_PORT0 + (port * 0x10))
+       (CN6XXX_SLI_INT_ENB64_PORT0 + ((port) * 0x10))
 
 #define    CN6XXX_SLI_MAC_NUMBER                 0x3E00
 
 #define    CN6XXX_PCI_BAR1_OFFSET                  0x8
 
 #define    CN6XXX_BAR1_REG(idx, port) \
-               (CN6XXX_BAR1_INDEX_START + (port * CN6XXX_PEM_OFFSET) + \
+               (CN6XXX_BAR1_INDEX_START + ((port) * CN6XXX_PEM_OFFSET) + \
                (CN6XXX_PCI_BAR1_OFFSET * (idx)))
 
 /*############################ DPI #########################*/
 #define    CN6XXX_DPI_DMA_ENG0_ENB        0x0001df0000000080ULL
 
 #define    CN6XXX_DPI_DMA_ENG_ENB(q_no)   \
-       (CN6XXX_DPI_DMA_ENG0_ENB + (q_no * 8))
+       (CN6XXX_DPI_DMA_ENG0_ENB + ((q_no) * 8))
 
 #define    CN6XXX_DPI_DMA_ENG0_BUF        0x0001df0000000880ULL
 
 #define    CN6XXX_DPI_DMA_ENG_BUF(q_no)   \
-       (CN6XXX_DPI_DMA_ENG0_BUF + (q_no * 8))
+       (CN6XXX_DPI_DMA_ENG0_BUF + ((q_no) * 8))
 
 #define    CN6XXX_DPI_SLI_PRT0_CFG        0x0001df0000000900ULL
 #define    CN6XXX_DPI_SLI_PRT1_CFG        0x0001df0000000908ULL
 #define    CN6XXX_DPI_SLI_PRTX_CFG(port)        \
-       (CN6XXX_DPI_SLI_PRT0_CFG + (port * 0x10))
+       (CN6XXX_DPI_SLI_PRT0_CFG + ((port) * 0x10))
 
 #define    CN6XXX_DPI_DMA_COMMIT_MODE     BIT_ULL(58)
 #define    CN6XXX_DPI_DMA_PKT_HP          BIT_ULL(57)
index dbf3566ead538110d1c18ce21d75856a07393046..50b533ff58e6db1f65aeeb3a3591ed7121132fa4 100644 (file)
@@ -1,24 +1,20 @@
 /**********************************************************************
-* Author: Cavium, Inc.
-*
-* Contact: support@cavium.com
-*          Please include "LiquidIO" in the subject.
-*
-* Copyright (c) 2003-2015 Cavium, Inc.
-*
-* This file is free software; you can redistribute it and/or modify
-* it under the terms of the GNU General Public License, Version 2, as
-* published by the Free Software Foundation.
-*
-* This file is distributed in the hope that it will be useful, but
-* AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
-* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
-* NONINFRINGEMENT.  See the GNU General Public License for more
-* details.
-*
-* This file may also be available under a different license from Cavium.
-* Contact Cavium, Inc. for more information
-**********************************************************************/
+ * Author: Cavium, Inc.
+ *
+ * Contact: support@cavium.com
+ *          Please include "LiquidIO" in the subject.
+ *
+ * Copyright (c) 2003-2016 Cavium, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, Version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
+ * NONINFRINGEMENT.  See the GNU General Public License for more details.
+ ***********************************************************************/
 #include <linux/pci.h>
 #include <linux/netdevice.h>
 #include "liquidio_common.h"
@@ -76,7 +72,7 @@ static void lio_cn68xx_setup_pkt_ctl_regs(struct octeon_device *oct)
        pktctl = octeon_read_csr64(oct, CN6XXX_SLI_PKT_CTL);
 
        /* 68XX specific */
-       max_oqs = CFG_GET_OQ_MAX_Q(CHIP_FIELD(oct, cn6xxx, conf));
+       max_oqs = CFG_GET_OQ_MAX_Q(CHIP_CONF(oct, cn6xxx));
        tx_pipe  = octeon_read_csr64(oct, CN68XX_SLI_TX_PIPE);
        tx_pipe &= 0xffffffffff00ffffULL; /* clear out NUMP field */
        tx_pipe |= max_oqs << 16; /* put max_oqs in NUMP field */
index ea7bdcce60444ee1a8483988815686b55fb97003..66b8d6bf5ec42e7043b5c94edb1d370319128831 100644 (file)
@@ -1,25 +1,20 @@
 /**********************************************************************
-* Author: Cavium, Inc.
-*
-* Contact: support@cavium.com
-*          Please include "LiquidIO" in the subject.
-*
-* Copyright (c) 2003-2015 Cavium, Inc.
-*
-* This file is free software; you can redistribute it and/or modify
-* it under the terms of the GNU General Public License, Version 2, as
-* published by the Free Software Foundation.
-*
-* This file is distributed in the hope that it will be useful, but
-* AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
-* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
-* NONINFRINGEMENT.  See the GNU General Public License for more
-* details.
-*
-* This file may also be available under a different license from Cavium.
-* Contact Cavium, Inc. for more information
-**********************************************************************/
-
+ * Author: Cavium, Inc.
+ *
+ * Contact: support@cavium.com
+ *          Please include "LiquidIO" in the subject.
+ *
+ * Copyright (c) 2003-2016 Cavium, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, Version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
+ * NONINFRINGEMENT.  See the GNU General Public License for more details.
+ ***********************************************************************/
 /*! \file  cn68xx_device.h
  *  \brief Host Driver: Routines that perform CN68XX specific operations.
  */
index d45a0f4aaf1f50daeef9f5008a25ca07dcac208d..0b742f09e49d66ee804976b174333c0a2e87af8f 100644 (file)
@@ -1,25 +1,20 @@
 /**********************************************************************
-* Author: Cavium, Inc.
-*
-* Contact: support@cavium.com
-*          Please include "LiquidIO" in the subject.
-*
-* Copyright (c) 2003-2015 Cavium, Inc.
-*
-* This file is free software; you can redistribute it and/or modify
-* it under the terms of the GNU General Public License, Version 2, as
-* published by the Free Software Foundation.
-*
-* This file is distributed in the hope that it will be useful, but
-* AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
-* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
-* NONINFRINGEMENT.  See the GNU General Public License for more
-* details.
-*
-* This file may also be available under a different license from Cavium.
-* Contact Cavium, Inc. for more information
-**********************************************************************/
-
+ * Author: Cavium, Inc.
+ *
+ * Contact: support@cavium.com
+ *          Please include "LiquidIO" in the subject.
+ *
+ * Copyright (c) 2003-2016 Cavium, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, Version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
+ * NONINFRINGEMENT.  See the GNU General Public License for more details.
+ ***********************************************************************/
 /*! \file cn68xx_regs.h
  *  \brief Host Driver: Register Address and Register Mask values for
  *  Octeon CN68XX devices. The register map for CN66XX is the same
index 201eddb3013aefece7cc7290bd61145dbfd964d2..f629c2fe04a44b16794db66d2a3e4c6bf5c09876 100644 (file)
@@ -1,24 +1,20 @@
 /**********************************************************************
-* Author: Cavium, Inc.
-*
-* Contact: support@cavium.com
-*          Please include "LiquidIO" in the subject.
-*
-* Copyright (c) 2003-2015 Cavium, Inc.
-*
-* This file is free software; you can redistribute it and/or modify
-* it under the terms of the GNU General Public License, Version 2, as
-* published by the Free Software Foundation.
-*
-* This file is distributed in the hope that it will be useful, but
-* AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
-* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
-* NONINFRINGEMENT.  See the GNU General Public License for more
-* details.
-*
-* This file may also be available under a different license from Cavium.
-* Contact Cavium, Inc. for more information
-**********************************************************************/
+ * Author: Cavium, Inc.
+ *
+ * Contact: support@cavium.com
+ *          Please include "LiquidIO" in the subject.
+ *
+ * Copyright (c) 2003-2016 Cavium, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, Version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
+ * NONINFRINGEMENT.  See the GNU General Public License for more details.
+ ***********************************************************************/
 #include <linux/pci.h>
 #include <linux/if_vlan.h>
 #include "liquidio_common.h"
@@ -89,13 +85,6 @@ void octeon_update_tx_completion_counters(void *buf, int reqtype,
        }
 
        (*pkts_compl)++;
-/*TODO, Use some other pound define to suggest
- * the fact that iqs are not tied to netdevs
- * and can take traffic from different netdevs
- * hence bql reporting is done per packet
- * than in bulk. Usage of NO_NAPI in txq completion is
- * a little confusing
- */
        *bytes_compl += skb->len;
 }
 
@@ -264,3 +253,34 @@ void liquidio_link_ctrl_cmd_completion(void *nctrl_ptr)
                        nctrl->ncmd.s.cmd);
        }
 }
+
+void octeon_pf_changed_vf_macaddr(struct octeon_device *oct, u8 *mac)
+{
+       bool macaddr_changed = false;
+       struct net_device *netdev;
+       struct lio *lio;
+
+       rtnl_lock();
+
+       netdev = oct->props[0].netdev;
+       lio = GET_LIO(netdev);
+
+       lio->linfo.macaddr_is_admin_asgnd = true;
+
+       if (!ether_addr_equal(netdev->dev_addr, mac)) {
+               macaddr_changed = true;
+               ether_addr_copy(netdev->dev_addr, mac);
+               ether_addr_copy(((u8 *)&lio->linfo.hw_addr) + 2, mac);
+               call_netdevice_notifiers(NETDEV_CHANGEADDR, netdev);
+       }
+
+       rtnl_unlock();
+
+       if (macaddr_changed)
+               dev_info(&oct->pci_dev->dev,
+                        "PF changed VF's MAC address to %pM\n", mac);
+
+       /* no need to notify the firmware of the macaddr change because
+        * the PF did that already
+        */
+}
index f163e0abbeb24070c001f8b6dd8385a5ecec69f4..e233796ed4a3a61127476c8bf8b3fe037ade6290 100644 (file)
@@ -1,24 +1,20 @@
 /**********************************************************************
-* Author: Cavium, Inc.
-*
-* Contact: support@cavium.com
-*          Please include "LiquidIO" in the subject.
-*
-* Copyright (c) 2003-2015 Cavium, Inc.
-*
-* This file is free software; you can redistribute it and/or modify
-* it under the terms of the GNU General Public License, Version 2, as
-* published by the Free Software Foundation.
-*
-* This file is distributed in the hope that it will be useful, but
-* AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
-* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
-* NONINFRINGEMENT.  See the GNU General Public License for more
-* details.
-*
-* This file may also be available under a different license from Cavium.
-* Contact Cavium, Inc. for more information
-**********************************************************************/
+ * Author: Cavium, Inc.
+ *
+ * Contact: support@cavium.com
+ *          Please include "LiquidIO" in the subject.
+ *
+ * Copyright (c) 2003-2016 Cavium, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, Version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
+ * NONINFRINGEMENT.  See the GNU General Public License for more details.
+ ***********************************************************************/
 #include <linux/netdevice.h>
 #include <linux/net_tstamp.h>
 #include <linux/pci.h>
@@ -74,7 +70,6 @@ enum {
        INTERFACE_MODE_MIXED,
 };
 
-#define ARRAY_LENGTH(a) (sizeof(a) / sizeof((a)[0]))
 #define OCT_ETHTOOL_REGDUMP_LEN  4096
 #define OCT_ETHTOOL_REGDUMP_LEN_23XX  (4096 * 11)
 #define OCT_ETHTOOL_REGSVER  1
@@ -87,9 +82,9 @@ static const char oct_stats_strings[][ETH_GSTRING_LEN] = {
        "tx_bytes",
        "rx_errors",    /*jabber_err+l2_err+frame_err */
        "tx_errors",    /*fw_err_pko+fw_err_link+fw_err_drop */
-       "rx_dropped",   /*st->fromwire.total_rcvd - st->fromwire.fw_total_rcvd
-                       *+st->fromwire.dmac_drop + st->fromwire.fw_err_drop
-                       */
+       "rx_dropped",   /*st->fromwire.total_rcvd - st->fromwire.fw_total_rcvd +
+                        *st->fromwire.dmac_drop + st->fromwire.fw_err_drop
+                        */
        "tx_dropped",
 
        "tx_total_sent",
@@ -259,14 +254,14 @@ lio_ethtool_get_channels(struct net_device *dev,
        u32 max_rx = 0, max_tx = 0, tx_count = 0, rx_count = 0;
 
        if (OCTEON_CN6XXX(oct)) {
-               struct octeon_config *conf6x = CHIP_FIELD(oct, cn6xxx, conf);
+               struct octeon_config *conf6x = CHIP_CONF(oct, cn6xxx);
 
                max_rx = CFG_GET_OQ_MAX_Q(conf6x);
                max_tx = CFG_GET_IQ_MAX_Q(conf6x);
                rx_count = CFG_GET_NUM_RXQS_NIC_IF(conf6x, lio->ifidx);
                tx_count = CFG_GET_NUM_TXQS_NIC_IF(conf6x, lio->ifidx);
        } else if (OCTEON_CN23XX_PF(oct)) {
-               struct octeon_config *conf23 = CHIP_FIELD(oct, cn23xx_pf, conf);
+               struct octeon_config *conf23 = CHIP_CONF(oct, cn23xx_pf);
 
                max_rx = CFG_GET_OQ_MAX_Q(conf23);
                max_tx = CFG_GET_IQ_MAX_Q(conf23);
@@ -589,14 +584,14 @@ lio_ethtool_get_ringparam(struct net_device *netdev,
            rx_pending = 0;
 
        if (OCTEON_CN6XXX(oct)) {
-               struct octeon_config *conf6x = CHIP_FIELD(oct, cn6xxx, conf);
+               struct octeon_config *conf6x = CHIP_CONF(oct, cn6xxx);
 
                tx_max_pending = CN6XXX_MAX_IQ_DESCRIPTORS;
                rx_max_pending = CN6XXX_MAX_OQ_DESCRIPTORS;
                rx_pending = CFG_GET_NUM_RX_DESCS_NIC_IF(conf6x, lio->ifidx);
                tx_pending = CFG_GET_NUM_TX_DESCS_NIC_IF(conf6x, lio->ifidx);
        } else if (OCTEON_CN23XX_PF(oct)) {
-               struct octeon_config *conf23 = CHIP_FIELD(oct, cn23xx_pf, conf);
+               struct octeon_config *conf23 = CHIP_CONF(oct, cn23xx_pf);
 
                tx_max_pending = CN23XX_MAX_IQ_DESCRIPTORS;
                rx_max_pending = CN23XX_MAX_OQ_DESCRIPTORS;
@@ -757,9 +752,6 @@ lio_get_ethtool_stats(struct net_device *netdev,
        /*sum of oct->instr_queue[iq_no]->stats.tx_dropped */
        data[i++] = CVM_CAST64(netstats->tx_dropped);
 
-       /*data[i++] = CVM_CAST64(stats->multicast); */
-       /*data[i++] = CVM_CAST64(stats->collisions); */
-
        /* firmware tx stats */
        /*per_core_stats[cvmx_get_core_num()].link_stats[mdata->from_ifidx].
         *fromhost.fw_total_sent
@@ -910,9 +902,8 @@ lio_get_ethtool_stats(struct net_device *netdev,
        /*lio->link_changes*/
        data[i++] = CVM_CAST64(lio->link_changes);
 
-       /* TX  -- lio_update_stats(lio); */
        for (j = 0; j < MAX_OCTEON_INSTR_QUEUES(oct_dev); j++) {
-               if (!(oct_dev->io_qmask.iq & (1ULL << j)))
+               if (!(oct_dev->io_qmask.iq & BIT_ULL(j)))
                        continue;
                /*packets to network port*/
                /*# of packets tx to network */
@@ -954,9 +945,8 @@ lio_get_ethtool_stats(struct net_device *netdev,
        }
 
        /* RX */
-       /* for (j = 0; j < oct_dev->num_oqs; j++) { */
        for (j = 0; j < MAX_OCTEON_OUTPUT_QUEUES(oct_dev); j++) {
-               if (!(oct_dev->io_qmask.oq & (1ULL << j)))
+               if (!(oct_dev->io_qmask.oq & BIT_ULL(j)))
                        continue;
 
                /*packets send to TCP/IP network stack */
@@ -1030,7 +1020,7 @@ static void lio_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
 
                num_iq_stats = ARRAY_SIZE(oct_iq_stats_strings);
                for (i = 0; i < MAX_OCTEON_INSTR_QUEUES(oct_dev); i++) {
-                       if (!(oct_dev->io_qmask.iq & (1ULL << i)))
+                       if (!(oct_dev->io_qmask.iq & BIT_ULL(i)))
                                continue;
                        for (j = 0; j < num_iq_stats; j++) {
                                sprintf(data, "tx-%d-%s", i,
@@ -1040,9 +1030,8 @@ static void lio_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
                }
 
                num_oq_stats = ARRAY_SIZE(oct_droq_stats_strings);
-               /* for (i = 0; i < oct_dev->num_oqs; i++) { */
                for (i = 0; i < MAX_OCTEON_OUTPUT_QUEUES(oct_dev); i++) {
-                       if (!(oct_dev->io_qmask.oq & (1ULL << i)))
+                       if (!(oct_dev->io_qmask.oq & BIT_ULL(i)))
                                continue;
                        for (j = 0; j < num_oq_stats; j++) {
                                sprintf(data, "rx-%d-%s", i,
index 71d01a77896d427cc0faa12af5eba24562eeffb9..39a9665c9d004581121f727bdb2117b9052819b7 100644 (file)
@@ -1,28 +1,22 @@
 /**********************************************************************
-* Author: Cavium, Inc.
-*
-* Contact: support@cavium.com
-*          Please include "LiquidIO" in the subject.
-*
-* Copyright (c) 2003-2015 Cavium, Inc.
-*
-* This file is free software; you can redistribute it and/or modify
-* it under the terms of the GNU General Public License, Version 2, as
-* published by the Free Software Foundation.
-*
-* This file is distributed in the hope that it will be useful, but
-* AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
-* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
-* NONINFRINGEMENT.  See the GNU General Public License for more
-* details.
-*
-* This file may also be available under a different license from Cavium.
-* Contact Cavium, Inc. for more information
-**********************************************************************/
-#include <linux/version.h>
+ * Author: Cavium, Inc.
+ *
+ * Contact: support@cavium.com
+ *          Please include "LiquidIO" in the subject.
+ *
+ * Copyright (c) 2003-2016 Cavium, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, Version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
+ * NONINFRINGEMENT.  See the GNU General Public License for more details.
+ ***********************************************************************/
 #include <linux/pci.h>
 #include <linux/firmware.h>
-#include <linux/ptp_clock_kernel.h>
 #include <net/vxlan.h>
 #include <linux/kthread.h>
 #include "liquidio_common.h"
@@ -46,6 +40,7 @@ MODULE_VERSION(LIQUIDIO_VERSION);
 MODULE_FIRMWARE(LIO_FW_DIR LIO_FW_BASE_NAME LIO_210SV_NAME LIO_FW_NAME_SUFFIX);
 MODULE_FIRMWARE(LIO_FW_DIR LIO_FW_BASE_NAME LIO_210NV_NAME LIO_FW_NAME_SUFFIX);
 MODULE_FIRMWARE(LIO_FW_DIR LIO_FW_BASE_NAME LIO_410NV_NAME LIO_FW_NAME_SUFFIX);
+MODULE_FIRMWARE(LIO_FW_DIR LIO_FW_BASE_NAME LIO_23XX_NAME LIO_FW_NAME_SUFFIX);
 
 static int ddr_timeout = 10000;
 module_param(ddr_timeout, int, 0644);
@@ -54,9 +49,6 @@ MODULE_PARM_DESC(ddr_timeout,
 
 #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK)
 
-#define INCR_INSTRQUEUE_PKT_COUNT(octeon_dev_ptr, iq_no, field, count)  \
-       (octeon_dev_ptr->instr_queue[iq_no]->stats.field += count)
-
 static int debug = -1;
 module_param(debug, int, 0644);
 MODULE_PARM_DESC(debug, "NETIF_MSG debug bits");
@@ -65,10 +57,6 @@ static char fw_type[LIO_MAX_FW_TYPE_LEN];
 module_param_string(fw_type, fw_type, sizeof(fw_type), 0000);
 MODULE_PARM_DESC(fw_type, "Type of firmware to be loaded. Default \"nic\"");
 
-static int conf_type;
-module_param(conf_type, int, 0);
-MODULE_PARM_DESC(conf_type, "select octeon configuration 0 default 1 ovs");
-
 static int ptp_enable = 1;
 
 /* Bit mask values for lio->ifstate */
@@ -180,6 +168,10 @@ struct octeon_device_priv {
        unsigned long napi_mask;
 };
 
+#ifdef CONFIG_PCI_IOV
+static int liquidio_enable_sriov(struct pci_dev *dev, int num_vfs);
+#endif
+
 static int octeon_device_init(struct octeon_device *);
 static int liquidio_stop(struct net_device *netdev);
 static void liquidio_remove(struct pci_dev *pdev);
@@ -197,9 +189,8 @@ static void octeon_droq_bh(unsigned long pdev)
        struct octeon_device_priv *oct_priv =
                (struct octeon_device_priv *)oct->priv;
 
-       /* for (q_no = 0; q_no < oct->num_oqs; q_no++) { */
        for (q_no = 0; q_no < MAX_OCTEON_OUTPUT_QUEUES(oct); q_no++) {
-               if (!(oct->io_qmask.oq & (1ULL << q_no)))
+               if (!(oct->io_qmask.oq & BIT_ULL(q_no)))
                        continue;
                reschedule |= octeon_droq_process_packets(oct, oct->droq[q_no],
                                                          MAX_PACKET_BUDGET);
@@ -234,7 +225,7 @@ static int lio_wait_for_oq_pkts(struct octeon_device *oct)
                pending_pkts = 0;
 
                for (i = 0; i < MAX_OCTEON_OUTPUT_QUEUES(oct); i++) {
-                       if (!(oct->io_qmask.oq & (1ULL << i)))
+                       if (!(oct->io_qmask.oq & BIT_ULL(i)))
                                continue;
                        pkt_cnt += octeon_droq_check_hw_for_pkts(oct->droq[i]);
                }
@@ -316,7 +307,7 @@ static inline void pcierror_quiesce_device(struct octeon_device *oct)
        for (i = 0; i < MAX_OCTEON_INSTR_QUEUES(oct); i++) {
                struct octeon_instr_queue *iq;
 
-               if (!(oct->io_qmask.iq & (1ULL << i)))
+               if (!(oct->io_qmask.iq & BIT_ULL(i)))
                        continue;
                iq = oct->instr_queue[i];
 
@@ -382,7 +373,6 @@ static void stop_pci_io(struct octeon_device *oct)
        dev_dbg(&oct->pci_dev->dev, "Device state is now %s\n",
                lio_get_state_string(&oct->status));
 
-       /* cn63xx_cleanup_aer_uncorrect_error_status(oct->pci_dev); */
        /* making it a common function for all OCTEON models */
        cleanup_aer_uncorrect_error_status(oct->pci_dev);
 }
@@ -518,6 +508,9 @@ static struct pci_driver liquidio_pci_driver = {
        .suspend        = liquidio_suspend,
        .resume         = liquidio_resume,
 #endif
+#ifdef CONFIG_PCI_IOV
+       .sriov_configure = liquidio_enable_sriov,
+#endif
 };
 
 /**
@@ -763,6 +756,7 @@ static void delete_glists(struct lio *lio)
        }
 
        kfree((void *)lio->glist);
+       kfree((void *)lio->glist_lock);
 }
 
 /**
@@ -933,7 +927,6 @@ static inline void update_link_status(struct net_device *netdev,
 
                if (lio->linfo.link.s.link_up) {
                        netif_carrier_on(netdev);
-                       /* start_txq(netdev); */
                        txqs_wake(netdev);
                } else {
                        netif_carrier_off(netdev);
@@ -1011,7 +1004,7 @@ static void liquidio_schedule_droq_pkt_handlers(struct octeon_device *oct)
        if (oct->int_status & OCT_DEV_INTR_PKT_DATA) {
                for (oq_no = 0; oq_no < MAX_OCTEON_OUTPUT_QUEUES(oct);
                     oq_no++) {
-                       if (!(oct->droq_intr & (1ULL << oq_no)))
+                       if (!(oct->droq_intr & BIT_ULL(oq_no)))
                                continue;
 
                        droq = oct->droq[oq_no];
@@ -1322,6 +1315,7 @@ liquidio_probe(struct pci_dev *pdev,
                complete(&first_stage);
 
        if (octeon_device_init(oct_dev)) {
+               complete(&hs->init);
                liquidio_remove(pdev);
                return -ENOMEM;
        }
@@ -1346,7 +1340,15 @@ liquidio_probe(struct pci_dev *pdev,
                        oct_dev->watchdog_task = kthread_create(
                            liquidio_watchdog, oct_dev,
                            "liowd/%02hhx:%02hhx.%hhx", bus, device, function);
-                       wake_up_process(oct_dev->watchdog_task);
+                       if (!IS_ERR(oct_dev->watchdog_task)) {
+                               wake_up_process(oct_dev->watchdog_task);
+                       } else {
+                               oct_dev->watchdog_task = NULL;
+                               dev_err(&oct_dev->pci_dev->dev,
+                                       "failed to create kernel_thread\n");
+                               liquidio_remove(pdev);
+                               return -1;
+                       }
                }
        }
 
@@ -1410,6 +1412,8 @@ static void octeon_destroy_resources(struct octeon_device *oct)
                if (lio_wait_for_oq_pkts(oct))
                        dev_err(&oct->pci_dev->dev, "OQ had pending packets\n");
 
+       /* fallthrough */
+       case OCT_DEV_INTR_SET_DONE:
                /* Disable interrupts  */
                oct->fn_list.disable_interrupt(oct, OCTEON_ALL_INTR);
 
@@ -1436,12 +1440,20 @@ static void octeon_destroy_resources(struct octeon_device *oct)
                                pci_disable_msi(oct->pci_dev);
                }
 
+       /* fallthrough */
+       case OCT_DEV_MSIX_ALLOC_VECTOR_DONE:
                if (OCTEON_CN23XX_PF(oct))
                        octeon_free_ioq_vector(oct);
+
+       /* fallthrough */
+       case OCT_DEV_MBOX_SETUP_DONE:
+               if (OCTEON_CN23XX_PF(oct))
+                       oct->fn_list.free_mbox(oct);
+
        /* fallthrough */
        case OCT_DEV_IN_RESET:
        case OCT_DEV_DROQ_INIT_DONE:
-               /*atomic_set(&oct->status, OCT_DEV_DROQ_INIT_DONE);*/
+               /* Wait for any pending operations */
                mdelay(100);
                for (i = 0; i < MAX_OCTEON_OUTPUT_QUEUES(oct); i++) {
                        if (!(oct->io_qmask.oq & BIT_ULL(i)))
@@ -1472,6 +1484,10 @@ static void octeon_destroy_resources(struct octeon_device *oct)
                                continue;
                        octeon_delete_instr_queue(oct, i);
                }
+#ifdef CONFIG_PCI_IOV
+               if (oct->sriov_info.sriov_enabled)
+                       pci_disable_sriov(oct->pci_dev);
+#endif
                /* fallthrough */
        case OCT_DEV_SC_BUFF_POOL_INIT_DONE:
                octeon_free_sc_buffer_pool(oct);
@@ -1491,10 +1507,13 @@ static void octeon_destroy_resources(struct octeon_device *oct)
                octeon_unmap_pci_barx(oct, 1);
 
                /* fallthrough */
-       case OCT_DEV_BEGIN_STATE:
+       case OCT_DEV_PCI_ENABLE_DONE:
+               pci_clear_master(oct->pci_dev);
                /* Disable the device, releasing the PCI INT */
                pci_disable_device(oct->pci_dev);
 
+               /* fallthrough */
+       case OCT_DEV_BEGIN_STATE:
                /* Nothing to be done here either */
                break;
        }                       /* end switch (oct->status) */
@@ -1764,6 +1783,7 @@ static int octeon_pci_os_setup(struct octeon_device *oct)
 
        if (dma_set_mask_and_coherent(&oct->pci_dev->dev, DMA_BIT_MASK(64))) {
                dev_err(&oct->pci_dev->dev, "Unexpected DMA device capability\n");
+               pci_disable_device(oct->pci_dev);
                return 1;
        }
 
@@ -2426,7 +2446,6 @@ static int liquidio_napi_poll(struct napi_struct *napi, int budget)
                 * Return back if tx_done is false.
                 */
                update_txq_status(oct, iq_no);
-               /*tx_done = (iq->flush_index == iq->octeon_read_index);*/
        } else {
                dev_err(&oct->pci_dev->dev, "%s:  iq (%d) num invalid\n",
                        __func__, iq_no);
@@ -3556,7 +3575,152 @@ static void liquidio_del_vxlan_port(struct net_device *netdev,
                                    OCTNET_CMD_VXLAN_PORT_DEL);
 }
 
-static struct net_device_ops lionetdevops = {
+static int __liquidio_set_vf_mac(struct net_device *netdev, int vfidx,
+                                u8 *mac, bool is_admin_assigned)
+{
+       struct lio *lio = GET_LIO(netdev);
+       struct octeon_device *oct = lio->oct_dev;
+       struct octnic_ctrl_pkt nctrl;
+
+       if (!is_valid_ether_addr(mac))
+               return -EINVAL;
+
+       if (vfidx < 0 || vfidx >= oct->sriov_info.max_vfs)
+               return -EINVAL;
+
+       memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt));
+
+       nctrl.ncmd.u64 = 0;
+       nctrl.ncmd.s.cmd = OCTNET_CMD_CHANGE_MACADDR;
+       /* vfidx is 0 based, but vf_num (param1) is 1 based */
+       nctrl.ncmd.s.param1 = vfidx + 1;
+       nctrl.ncmd.s.param2 = (is_admin_assigned ? 1 : 0);
+       nctrl.ncmd.s.more = 1;
+       nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
+       nctrl.cb_fn = 0;
+       nctrl.wait_time = LIO_CMD_WAIT_TM;
+
+       nctrl.udd[0] = 0;
+       /* The MAC Address is presented in network byte order. */
+       ether_addr_copy((u8 *)&nctrl.udd[0] + 2, mac);
+
+       oct->sriov_info.vf_macaddr[vfidx] = nctrl.udd[0];
+
+       octnet_send_nic_ctrl_pkt(oct, &nctrl);
+
+       return 0;
+}
+
+static int liquidio_set_vf_mac(struct net_device *netdev, int vfidx, u8 *mac)
+{
+       struct lio *lio = GET_LIO(netdev);
+       struct octeon_device *oct = lio->oct_dev;
+       int retval;
+
+       retval = __liquidio_set_vf_mac(netdev, vfidx, mac, true);
+       if (!retval)
+               cn23xx_tell_vf_its_macaddr_changed(oct, vfidx, mac);
+
+       return retval;
+}
+
+static int liquidio_set_vf_vlan(struct net_device *netdev, int vfidx,
+                               u16 vlan, u8 qos, __be16 vlan_proto)
+{
+       struct lio *lio = GET_LIO(netdev);
+       struct octeon_device *oct = lio->oct_dev;
+       struct octnic_ctrl_pkt nctrl;
+       u16 vlantci;
+
+       if (vfidx < 0 || vfidx >= oct->sriov_info.num_vfs_alloced)
+               return -EINVAL;
+
+       if (vlan_proto != htons(ETH_P_8021Q))
+               return -EPROTONOSUPPORT;
+
+       if (vlan >= VLAN_N_VID || qos > 7)
+               return -EINVAL;
+
+       if (vlan)
+               vlantci = vlan | (u16)qos << VLAN_PRIO_SHIFT;
+       else
+               vlantci = 0;
+
+       if (oct->sriov_info.vf_vlantci[vfidx] == vlantci)
+               return 0;
+
+       memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt));
+
+       if (vlan)
+               nctrl.ncmd.s.cmd = OCTNET_CMD_ADD_VLAN_FILTER;
+       else
+               nctrl.ncmd.s.cmd = OCTNET_CMD_DEL_VLAN_FILTER;
+
+       nctrl.ncmd.s.param1 = vlantci;
+       nctrl.ncmd.s.param2 =
+           vfidx + 1; /* vfidx is 0 based, but vf_num (param2) is 1 based */
+       nctrl.ncmd.s.more = 0;
+       nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
+       nctrl.cb_fn = 0;
+       nctrl.wait_time = LIO_CMD_WAIT_TM;
+
+       octnet_send_nic_ctrl_pkt(oct, &nctrl);
+
+       oct->sriov_info.vf_vlantci[vfidx] = vlantci;
+
+       return 0;
+}
+
+static int liquidio_get_vf_config(struct net_device *netdev, int vfidx,
+                                 struct ifla_vf_info *ivi)
+{
+       struct lio *lio = GET_LIO(netdev);
+       struct octeon_device *oct = lio->oct_dev;
+       u8 *macaddr;
+
+       if (vfidx < 0 || vfidx >= oct->sriov_info.num_vfs_alloced)
+               return -EINVAL;
+
+       ivi->vf = vfidx;
+       macaddr = 2 + (u8 *)&oct->sriov_info.vf_macaddr[vfidx];
+       ether_addr_copy(&ivi->mac[0], macaddr);
+       ivi->vlan = oct->sriov_info.vf_vlantci[vfidx] & VLAN_VID_MASK;
+       ivi->qos = oct->sriov_info.vf_vlantci[vfidx] >> VLAN_PRIO_SHIFT;
+       ivi->linkstate = oct->sriov_info.vf_linkstate[vfidx];
+       return 0;
+}
+
+static int liquidio_set_vf_link_state(struct net_device *netdev, int vfidx,
+                                     int linkstate)
+{
+       struct lio *lio = GET_LIO(netdev);
+       struct octeon_device *oct = lio->oct_dev;
+       struct octnic_ctrl_pkt nctrl;
+
+       if (vfidx < 0 || vfidx >= oct->sriov_info.num_vfs_alloced)
+               return -EINVAL;
+
+       if (oct->sriov_info.vf_linkstate[vfidx] == linkstate)
+               return 0;
+
+       memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt));
+       nctrl.ncmd.s.cmd = OCTNET_CMD_SET_VF_LINKSTATE;
+       nctrl.ncmd.s.param1 =
+           vfidx + 1; /* vfidx is 0 based, but vf_num (param1) is 1 based */
+       nctrl.ncmd.s.param2 = linkstate;
+       nctrl.ncmd.s.more = 0;
+       nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
+       nctrl.cb_fn = 0;
+       nctrl.wait_time = LIO_CMD_WAIT_TM;
+
+       octnet_send_nic_ctrl_pkt(oct, &nctrl);
+
+       oct->sriov_info.vf_linkstate[vfidx] = linkstate;
+
+       return 0;
+}
+
+static const struct net_device_ops lionetdevops = {
        .ndo_open               = liquidio_open,
        .ndo_stop               = liquidio_stop,
        .ndo_start_xmit         = liquidio_xmit,
@@ -3573,6 +3737,11 @@ static struct net_device_ops lionetdevops = {
        .ndo_set_features       = liquidio_set_features,
        .ndo_udp_tunnel_add     = liquidio_add_vxlan_port,
        .ndo_udp_tunnel_del     = liquidio_del_vxlan_port,
+       .ndo_set_vf_mac         = liquidio_set_vf_mac,
+       .ndo_set_vf_vlan        = liquidio_set_vf_vlan,
+       .ndo_get_vf_config      = liquidio_get_vf_config,
+       .ndo_set_vf_link_state  = liquidio_set_vf_link_state,
+       .ndo_select_queue       = select_q
 };
 
 /** \brief Entry point for the liquidio module
@@ -3584,7 +3753,7 @@ static int __init liquidio_init(void)
 
        init_completion(&first_stage);
 
-       octeon_init_device_list(conf_type);
+       octeon_init_device_list(OCTEON_CONFIG_TYPE_DEFAULT);
 
        if (liquidio_init_pci())
                return -EINVAL;
@@ -3805,9 +3974,6 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
 
                SET_NETDEV_DEV(netdev, &octeon_dev->pci_dev->dev);
 
-               if (num_iqueues > 1)
-                       lionetdevops.ndo_select_queue = select_q;
-
                /* Associate the routines that will handle different
                 * netdev tasks.
                 */
@@ -3895,6 +4061,19 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
                        "if%d gmx: %d hw_addr: 0x%llx\n", i,
                        lio->linfo.gmxport, CVM_CAST64(lio->linfo.hw_addr));
 
+               for (j = 0; j < octeon_dev->sriov_info.max_vfs; j++) {
+                       u8 vfmac[ETH_ALEN];
+
+                       random_ether_addr(&vfmac[0]);
+                       if (__liquidio_set_vf_mac(netdev, j,
+                                                 &vfmac[0], false)) {
+                               dev_err(&octeon_dev->pci_dev->dev,
+                                       "Error setting VF%d MAC address\n",
+                                       j);
+                               goto setup_nic_dev_fail;
+                       }
+               }
+
                /* 64-bit swap required on LE machines */
                octeon_swap_8B_data(&lio->linfo.hw_addr, 1);
                for (j = 0; j < 6; j++)
@@ -3990,6 +4169,101 @@ setup_nic_wait_intr:
        return -ENODEV;
 }
 
+#ifdef CONFIG_PCI_IOV
+static int octeon_enable_sriov(struct octeon_device *oct)
+{
+       unsigned int num_vfs_alloced = oct->sriov_info.num_vfs_alloced;
+       struct pci_dev *vfdev;
+       int err;
+       u32 u;
+
+       if (OCTEON_CN23XX_PF(oct) && num_vfs_alloced) {
+               err = pci_enable_sriov(oct->pci_dev,
+                                      oct->sriov_info.num_vfs_alloced);
+               if (err) {
+                       dev_err(&oct->pci_dev->dev,
+                               "OCTEON: Failed to enable PCI sriov: %d\n",
+                               err);
+                       oct->sriov_info.num_vfs_alloced = 0;
+                       return err;
+               }
+               oct->sriov_info.sriov_enabled = 1;
+
+               /* init lookup table that maps DPI ring number to VF pci_dev
+                * struct pointer
+                */
+               u = 0;
+               vfdev = pci_get_device(PCI_VENDOR_ID_CAVIUM,
+                                      OCTEON_CN23XX_VF_VID, NULL);
+               while (vfdev) {
+                       if (vfdev->is_virtfn &&
+                           (vfdev->physfn == oct->pci_dev)) {
+                               oct->sriov_info.dpiring_to_vfpcidev_lut[u] =
+                                       vfdev;
+                               u += oct->sriov_info.rings_per_vf;
+                       }
+                       vfdev = pci_get_device(PCI_VENDOR_ID_CAVIUM,
+                                              OCTEON_CN23XX_VF_VID, vfdev);
+               }
+       }
+
+       return num_vfs_alloced;
+}
+
+static int lio_pci_sriov_disable(struct octeon_device *oct)
+{
+       int u;
+
+       if (pci_vfs_assigned(oct->pci_dev)) {
+               dev_err(&oct->pci_dev->dev, "VFs are still assigned to VMs.\n");
+               return -EPERM;
+       }
+
+       pci_disable_sriov(oct->pci_dev);
+
+       u = 0;
+       while (u < MAX_POSSIBLE_VFS) {
+               oct->sriov_info.dpiring_to_vfpcidev_lut[u] = NULL;
+               u += oct->sriov_info.rings_per_vf;
+       }
+
+       oct->sriov_info.num_vfs_alloced = 0;
+       dev_info(&oct->pci_dev->dev, "oct->pf_num:%d disabled VFs\n",
+                oct->pf_num);
+
+       return 0;
+}
+
+static int liquidio_enable_sriov(struct pci_dev *dev, int num_vfs)
+{
+       struct octeon_device *oct = pci_get_drvdata(dev);
+       int ret = 0;
+
+       if ((num_vfs == oct->sriov_info.num_vfs_alloced) &&
+           (oct->sriov_info.sriov_enabled)) {
+               dev_info(&oct->pci_dev->dev, "oct->pf_num:%d already enabled num_vfs:%d\n",
+                        oct->pf_num, num_vfs);
+               return 0;
+       }
+
+       if (!num_vfs) {
+               ret = lio_pci_sriov_disable(oct);
+       } else if (num_vfs > oct->sriov_info.max_vfs) {
+               dev_err(&oct->pci_dev->dev,
+                       "OCTEON: Max allowed VFs:%d user requested:%d",
+                       oct->sriov_info.max_vfs, num_vfs);
+               ret = -EPERM;
+       } else {
+               oct->sriov_info.num_vfs_alloced = num_vfs;
+               ret = octeon_enable_sriov(oct);
+               dev_info(&oct->pci_dev->dev, "oct->pf_num:%d num_vfs:%d\n",
+                        oct->pf_num, num_vfs);
+       }
+
+       return ret;
+}
+#endif
+
 /**
  * \brief initialize the NIC
  * @param oct octeon device
@@ -4095,6 +4369,52 @@ static void nic_starter(struct work_struct *work)
        complete(&handshake[oct->octeon_id].started);
 }
 
+static int
+octeon_recv_vf_drv_notice(struct octeon_recv_info *recv_info, void *buf)
+{
+       struct octeon_device *oct = (struct octeon_device *)buf;
+       struct octeon_recv_pkt *recv_pkt = recv_info->recv_pkt;
+       int i, notice, vf_idx;
+       u64 *data, vf_num;
+
+       notice = recv_pkt->rh.r.ossp;
+       data = (u64 *)get_rbd(recv_pkt->buffer_ptr[0]);
+
+       /* the first 64-bit word of data is the vf_num */
+       vf_num = data[0];
+       octeon_swap_8B_data(&vf_num, 1);
+       vf_idx = (int)vf_num - 1;
+
+       if (notice == VF_DRV_LOADED) {
+               if (!(oct->sriov_info.vf_drv_loaded_mask & BIT_ULL(vf_idx))) {
+                       oct->sriov_info.vf_drv_loaded_mask |= BIT_ULL(vf_idx);
+                       dev_info(&oct->pci_dev->dev,
+                                "driver for VF%d was loaded\n", vf_idx);
+                       try_module_get(THIS_MODULE);
+               }
+       } else if (notice == VF_DRV_REMOVED) {
+               if (oct->sriov_info.vf_drv_loaded_mask & BIT_ULL(vf_idx)) {
+                       oct->sriov_info.vf_drv_loaded_mask &= ~BIT_ULL(vf_idx);
+                       dev_info(&oct->pci_dev->dev,
+                                "driver for VF%d was removed\n", vf_idx);
+                       module_put(THIS_MODULE);
+               }
+       } else if (notice == VF_DRV_MACADDR_CHANGED) {
+               u8 *b = (u8 *)&data[1];
+
+               oct->sriov_info.vf_macaddr[vf_idx] = data[1];
+               dev_info(&oct->pci_dev->dev,
+                        "VF driver changed VF%d's MAC address to %pM\n",
+                        vf_idx, b + 2);
+       }
+
+       for (i = 0; i < recv_pkt->buffer_count; i++)
+               recv_buffer_free(recv_pkt->buffer_ptr[i]);
+       octeon_free_recv_info(recv_info);
+
+       return 0;
+}
+
 /**
  * \brief Device initialization for each Octeon device that is probed
  * @param octeon_dev  octeon device
@@ -4114,6 +4434,8 @@ static int octeon_device_init(struct octeon_device *octeon_dev)
        if (octeon_pci_os_setup(octeon_dev))
                return 1;
 
+       atomic_set(&octeon_dev->status, OCT_DEV_PCI_ENABLE_DONE);
+
        /* Identify the Octeon type and map the BAR address space. */
        if (octeon_chip_specific_setup(octeon_dev)) {
                dev_err(&octeon_dev->pci_dev->dev, "Chip specific setup failed\n");
@@ -4153,6 +4475,9 @@ static int octeon_device_init(struct octeon_device *octeon_dev)
                                    octeon_core_drv_init,
                                    octeon_dev);
 
+       octeon_register_dispatch_fn(octeon_dev, OPCODE_NIC,
+                                   OPCODE_NIC_VF_DRV_NOTICE,
+                                   octeon_recv_vf_drv_notice, octeon_dev);
        INIT_DELAYED_WORK(&octeon_dev->nic_poll_work.work, nic_starter);
        octeon_dev->nic_poll_work.ctxptr = (void *)octeon_dev;
        schedule_delayed_work(&octeon_dev->nic_poll_work.work,
@@ -4160,7 +4485,10 @@ static int octeon_device_init(struct octeon_device *octeon_dev)
 
        atomic_set(&octeon_dev->status, OCT_DEV_DISPATCH_INIT_DONE);
 
-       octeon_set_io_queues_off(octeon_dev);
+       if (octeon_set_io_queues_off(octeon_dev)) {
+               dev_err(&octeon_dev->pci_dev->dev, "setting io queues off failed\n");
+               return 1;
+       }
 
        if (OCTEON_CN23XX_PF(octeon_dev)) {
                ret = octeon_dev->fn_list.setup_device_regs(octeon_dev);
@@ -4182,9 +4510,6 @@ static int octeon_device_init(struct octeon_device *octeon_dev)
        if (octeon_setup_instr_queues(octeon_dev)) {
                dev_err(&octeon_dev->pci_dev->dev,
                        "instruction queue initialization failed\n");
-               /* On error, release any previously allocated queues */
-               for (j = 0; j < octeon_dev->num_iqs; j++)
-                       octeon_delete_instr_queue(octeon_dev, j);
                return 1;
        }
        atomic_set(&octeon_dev->status, OCT_DEV_INSTR_QUEUE_INIT_DONE);
@@ -4200,19 +4525,23 @@ static int octeon_device_init(struct octeon_device *octeon_dev)
 
        if (octeon_setup_output_queues(octeon_dev)) {
                dev_err(&octeon_dev->pci_dev->dev, "Output queue initialization failed\n");
-               /* Release any previously allocated queues */
-               for (j = 0; j < octeon_dev->num_oqs; j++)
-                       octeon_delete_droq(octeon_dev, j);
                return 1;
        }
 
        atomic_set(&octeon_dev->status, OCT_DEV_DROQ_INIT_DONE);
 
        if (OCTEON_CN23XX_PF(octeon_dev)) {
+               if (octeon_dev->fn_list.setup_mbox(octeon_dev)) {
+                       dev_err(&octeon_dev->pci_dev->dev, "OCTEON: Mailbox setup failed\n");
+                       return 1;
+               }
+               atomic_set(&octeon_dev->status, OCT_DEV_MBOX_SETUP_DONE);
+
                if (octeon_allocate_ioq_vector(octeon_dev)) {
                        dev_err(&octeon_dev->pci_dev->dev, "OCTEON: ioq vector allocation failed\n");
                        return 1;
                }
+               atomic_set(&octeon_dev->status, OCT_DEV_MSIX_ALLOC_VECTOR_DONE);
 
        } else {
                /* The input and output queue registers were setup earlier (the
@@ -4240,6 +4569,8 @@ static int octeon_device_init(struct octeon_device *octeon_dev)
        /* Enable Octeon device interrupts */
        octeon_dev->fn_list.enable_interrupt(octeon_dev, OCTEON_ALL_INTR);
 
+       atomic_set(&octeon_dev->status, OCT_DEV_INTR_SET_DONE);
+
        /* Enable the input and output queues for this Octeon device */
        ret = octeon_dev->fn_list.enable_io_queues(octeon_dev);
        if (ret) {
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
new file mode 100644 (file)
index 0000000..e6321f3
--- /dev/null
@@ -0,0 +1,614 @@
+/**********************************************************************
+ * Author: Cavium, Inc.
+ *
+ * Contact: support@cavium.com
+ *          Please include "LiquidIO" in the subject.
+ *
+ * Copyright (c) 2003-2016 Cavium, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, Version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
+ * NONINFRINGEMENT.  See the GNU General Public License for more details.
+ ***********************************************************************/
+#include <linux/pci.h>
+#include <net/vxlan.h>
+#include "liquidio_common.h"
+#include "octeon_droq.h"
+#include "octeon_iq.h"
+#include "response_manager.h"
+#include "octeon_device.h"
+#include "octeon_main.h"
+#include "cn23xx_vf_device.h"
+
+MODULE_AUTHOR("Cavium Networks, <support@cavium.com>");
+MODULE_DESCRIPTION("Cavium LiquidIO Intelligent Server Adapter Virtual Function Driver");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(LIQUIDIO_VERSION);
+
+struct octeon_device_priv {
+       /* Tasklet structures for this device. */
+       struct tasklet_struct droq_tasklet;
+       unsigned long napi_mask;
+};
+
+static int
+liquidio_vf_probe(struct pci_dev *pdev, const struct pci_device_id *ent);
+static void liquidio_vf_remove(struct pci_dev *pdev);
+static int octeon_device_init(struct octeon_device *oct);
+
+static int lio_wait_for_oq_pkts(struct octeon_device *oct)
+{
+       struct octeon_device_priv *oct_priv =
+           (struct octeon_device_priv *)oct->priv;
+       int retry = MAX_VF_IP_OP_PENDING_PKT_COUNT;
+       int pkt_cnt = 0, pending_pkts;
+       int i;
+
+       do {
+               pending_pkts = 0;
+
+               for (i = 0; i < MAX_OCTEON_OUTPUT_QUEUES(oct); i++) {
+                       if (!(oct->io_qmask.oq & BIT_ULL(i)))
+                               continue;
+                       pkt_cnt += octeon_droq_check_hw_for_pkts(oct->droq[i]);
+               }
+               if (pkt_cnt > 0) {
+                       pending_pkts += pkt_cnt;
+                       tasklet_schedule(&oct_priv->droq_tasklet);
+               }
+               pkt_cnt = 0;
+               schedule_timeout_uninterruptible(1);
+
+       } while (retry-- && pending_pkts);
+
+       return pkt_cnt;
+}
+
+/**
+ * \brief wait for all pending requests to complete
+ * @param oct Pointer to Octeon device
+ *
+ * Called during shutdown sequence
+ */
+static int wait_for_pending_requests(struct octeon_device *oct)
+{
+       int i, pcount = 0;
+
+       for (i = 0; i < MAX_VF_IP_OP_PENDING_PKT_COUNT; i++) {
+               pcount = atomic_read(
+                   &oct->response_list[OCTEON_ORDERED_SC_LIST]
+                        .pending_req_count);
+               if (pcount)
+                       schedule_timeout_uninterruptible(HZ / 10);
+               else
+                       break;
+       }
+
+       if (pcount)
+               return 1;
+
+       return 0;
+}
+
+static const struct pci_device_id liquidio_vf_pci_tbl[] = {
+       {
+               PCI_VENDOR_ID_CAVIUM, OCTEON_CN23XX_VF_VID,
+               PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0
+       },
+       {
+               0, 0, 0, 0, 0, 0, 0
+       }
+};
+MODULE_DEVICE_TABLE(pci, liquidio_vf_pci_tbl);
+
+static struct pci_driver liquidio_vf_pci_driver = {
+       .name           = "LiquidIO_VF",
+       .id_table       = liquidio_vf_pci_tbl,
+       .probe          = liquidio_vf_probe,
+       .remove         = liquidio_vf_remove,
+};
+
+static
+int liquidio_schedule_msix_droq_pkt_handler(struct octeon_droq *droq, u64 ret)
+{
+       struct octeon_device *oct = droq->oct_dev;
+       struct octeon_device_priv *oct_priv =
+           (struct octeon_device_priv *)oct->priv;
+
+       if (droq->ops.poll_mode) {
+               droq->ops.napi_fn(droq);
+       } else {
+               if (ret & MSIX_PO_INT) {
+                       dev_err(&oct->pci_dev->dev,
+                               "should not come here should not get rx when poll mode = 0 for vf\n");
+                       tasklet_schedule(&oct_priv->droq_tasklet);
+                       return 1;
+               }
+               /* this will be flushed periodically by check iq db */
+               if (ret & MSIX_PI_INT)
+                       return 0;
+       }
+       return 0;
+}
+
+static irqreturn_t
+liquidio_msix_intr_handler(int irq __attribute__((unused)), void *dev)
+{
+       struct octeon_ioq_vector *ioq_vector = (struct octeon_ioq_vector *)dev;
+       struct octeon_device *oct = ioq_vector->oct_dev;
+       struct octeon_droq *droq = oct->droq[ioq_vector->droq_index];
+       u64 ret;
+
+       ret = oct->fn_list.msix_interrupt_handler(ioq_vector);
+
+       if ((ret & MSIX_PO_INT) || (ret & MSIX_PI_INT))
+               liquidio_schedule_msix_droq_pkt_handler(droq, ret);
+
+       return IRQ_HANDLED;
+}
+
+/**
+ * \brief Setup interrupt for octeon device
+ * @param oct octeon device
+ *
+ *  Enable interrupt in Octeon device as given in the PCI interrupt mask.
+ */
+static int octeon_setup_interrupt(struct octeon_device *oct)
+{
+       struct msix_entry *msix_entries;
+       int num_alloc_ioq_vectors;
+       int num_ioq_vectors;
+       int irqret;
+       int i;
+
+       if (oct->msix_on) {
+               oct->num_msix_irqs = oct->sriov_info.rings_per_vf;
+
+               oct->msix_entries = kcalloc(
+                   oct->num_msix_irqs, sizeof(struct msix_entry), GFP_KERNEL);
+               if (!oct->msix_entries)
+                       return 1;
+
+               msix_entries = (struct msix_entry *)oct->msix_entries;
+
+               for (i = 0; i < oct->num_msix_irqs; i++)
+                       msix_entries[i].entry = i;
+               num_alloc_ioq_vectors = pci_enable_msix_range(
+                                               oct->pci_dev, msix_entries,
+                                               oct->num_msix_irqs,
+                                               oct->num_msix_irqs);
+               if (num_alloc_ioq_vectors < 0) {
+                       dev_err(&oct->pci_dev->dev, "unable to Allocate MSI-X interrupts\n");
+                       kfree(oct->msix_entries);
+                       oct->msix_entries = NULL;
+                       return 1;
+               }
+               dev_dbg(&oct->pci_dev->dev, "OCTEON: Enough MSI-X interrupts are allocated...\n");
+
+               num_ioq_vectors = oct->num_msix_irqs;
+
+               for (i = 0; i < num_ioq_vectors; i++) {
+                       irqret = request_irq(msix_entries[i].vector,
+                                            liquidio_msix_intr_handler, 0,
+                                            "octeon", &oct->ioq_vector[i]);
+                       if (irqret) {
+                               dev_err(&oct->pci_dev->dev,
+                                       "OCTEON: Request_irq failed for MSIX interrupt Error: %d\n",
+                                       irqret);
+
+                               while (i) {
+                                       i--;
+                                       irq_set_affinity_hint(
+                                           msix_entries[i].vector, NULL);
+                                       free_irq(msix_entries[i].vector,
+                                                &oct->ioq_vector[i]);
+                               }
+                               pci_disable_msix(oct->pci_dev);
+                               kfree(oct->msix_entries);
+                               oct->msix_entries = NULL;
+                               return 1;
+                       }
+                       oct->ioq_vector[i].vector = msix_entries[i].vector;
+                       /* assign the cpu mask for this msix interrupt vector */
+                       irq_set_affinity_hint(
+                           msix_entries[i].vector,
+                           (&oct->ioq_vector[i].affinity_mask));
+               }
+               dev_dbg(&oct->pci_dev->dev,
+                       "OCTEON[%d]: MSI-X enabled\n", oct->octeon_id);
+       }
+       return 0;
+}
+
+/**
+ * \brief PCI probe handler
+ * @param pdev PCI device structure
+ * @param ent unused
+ */
+static int
+liquidio_vf_probe(struct pci_dev *pdev,
+                 const struct pci_device_id *ent __attribute__((unused)))
+{
+       struct octeon_device *oct_dev = NULL;
+
+       oct_dev = octeon_allocate_device(pdev->device,
+                                        sizeof(struct octeon_device_priv));
+
+       if (!oct_dev) {
+               dev_err(&pdev->dev, "Unable to allocate device\n");
+               return -ENOMEM;
+       }
+       oct_dev->msix_on = LIO_FLAG_MSIX_ENABLED;
+
+       dev_info(&pdev->dev, "Initializing device %x:%x.\n",
+                (u32)pdev->vendor, (u32)pdev->device);
+
+       /* Assign octeon_device for this device to the private data area. */
+       pci_set_drvdata(pdev, oct_dev);
+
+       /* set linux specific device pointer */
+       oct_dev->pci_dev = pdev;
+
+       if (octeon_device_init(oct_dev)) {
+               liquidio_vf_remove(pdev);
+               return -ENOMEM;
+       }
+
+       dev_dbg(&oct_dev->pci_dev->dev, "Device is ready\n");
+
+       return 0;
+}
+
+/**
+ * \brief PCI FLR for each Octeon device.
+ * @param oct octeon device
+ */
+static void octeon_pci_flr(struct octeon_device *oct)
+{
+       u16 status;
+
+       pci_save_state(oct->pci_dev);
+
+       pci_cfg_access_lock(oct->pci_dev);
+
+       /* Quiesce the device completely */
+       pci_write_config_word(oct->pci_dev, PCI_COMMAND,
+                             PCI_COMMAND_INTX_DISABLE);
+
+       /* Wait for Transaction Pending bit clean */
+       msleep(100);
+       pcie_capability_read_word(oct->pci_dev, PCI_EXP_DEVSTA, &status);
+       if (status & PCI_EXP_DEVSTA_TRPND) {
+               dev_info(&oct->pci_dev->dev, "Function reset incomplete after 100ms, sleeping for 5 seconds\n");
+               ssleep(5);
+               pcie_capability_read_word(oct->pci_dev, PCI_EXP_DEVSTA,
+                                         &status);
+               if (status & PCI_EXP_DEVSTA_TRPND)
+                       dev_info(&oct->pci_dev->dev, "Function reset still incomplete after 5s, reset anyway\n");
+       }
+       pcie_capability_set_word(oct->pci_dev, PCI_EXP_DEVCTL,
+                                PCI_EXP_DEVCTL_BCR_FLR);
+       mdelay(100);
+
+       pci_cfg_access_unlock(oct->pci_dev);
+
+       pci_restore_state(oct->pci_dev);
+}
+
+/**
+ *\brief Destroy resources associated with octeon device
+ * @param pdev PCI device structure
+ * @param ent unused
+ */
+static void octeon_destroy_resources(struct octeon_device *oct)
+{
+       struct msix_entry *msix_entries;
+       int i;
+
+       switch (atomic_read(&oct->status)) {
+       case OCT_DEV_RUNNING:
+       case OCT_DEV_CORE_OK:
+               /* No more instructions will be forwarded. */
+               atomic_set(&oct->status, OCT_DEV_IN_RESET);
+
+               dev_dbg(&oct->pci_dev->dev, "Device state is now %s\n",
+                       lio_get_state_string(&oct->status));
+
+               schedule_timeout_uninterruptible(HZ / 10);
+
+               /* fallthrough */
+       case OCT_DEV_HOST_OK:
+               /* fallthrough */
+       case OCT_DEV_IO_QUEUES_DONE:
+               if (wait_for_pending_requests(oct))
+                       dev_err(&oct->pci_dev->dev, "There were pending requests\n");
+
+               if (lio_wait_for_instr_fetch(oct))
+                       dev_err(&oct->pci_dev->dev, "IQ had pending instructions\n");
+
+               /* Disable the input and output queues now. No more packets will
+                * arrive from Octeon, but we should wait for all packet
+                * processing to finish.
+                */
+               oct->fn_list.disable_io_queues(oct);
+
+               if (lio_wait_for_oq_pkts(oct))
+                       dev_err(&oct->pci_dev->dev, "OQ had pending packets\n");
+
+       case OCT_DEV_INTR_SET_DONE:
+               /* Disable interrupts  */
+               oct->fn_list.disable_interrupt(oct, OCTEON_ALL_INTR);
+
+               if (oct->msix_on) {
+                       msix_entries = (struct msix_entry *)oct->msix_entries;
+                       for (i = 0; i < oct->num_msix_irqs; i++) {
+                               irq_set_affinity_hint(msix_entries[i].vector,
+                                                     NULL);
+                               free_irq(msix_entries[i].vector,
+                                        &oct->ioq_vector[i]);
+                       }
+                       pci_disable_msix(oct->pci_dev);
+                       kfree(oct->msix_entries);
+                       oct->msix_entries = NULL;
+               }
+               /* Soft reset the octeon device before exiting */
+               if (oct->pci_dev->reset_fn)
+                       octeon_pci_flr(oct);
+               else
+                       cn23xx_vf_ask_pf_to_do_flr(oct);
+
+               /* fallthrough */
+       case OCT_DEV_MSIX_ALLOC_VECTOR_DONE:
+               octeon_free_ioq_vector(oct);
+
+               /* fallthrough */
+       case OCT_DEV_MBOX_SETUP_DONE:
+               oct->fn_list.free_mbox(oct);
+
+               /* fallthrough */
+       case OCT_DEV_IN_RESET:
+       case OCT_DEV_DROQ_INIT_DONE:
+               mdelay(100);
+               for (i = 0; i < MAX_OCTEON_OUTPUT_QUEUES(oct); i++) {
+                       if (!(oct->io_qmask.oq & BIT_ULL(i)))
+                               continue;
+                       octeon_delete_droq(oct, i);
+               }
+
+               /* fallthrough */
+       case OCT_DEV_RESP_LIST_INIT_DONE:
+               octeon_delete_response_list(oct);
+
+               /* fallthrough */
+       case OCT_DEV_INSTR_QUEUE_INIT_DONE:
+               for (i = 0; i < MAX_OCTEON_INSTR_QUEUES(oct); i++) {
+                       if (!(oct->io_qmask.iq & BIT_ULL(i)))
+                               continue;
+                       octeon_delete_instr_queue(oct, i);
+               }
+
+               /* fallthrough */
+       case OCT_DEV_SC_BUFF_POOL_INIT_DONE:
+               octeon_free_sc_buffer_pool(oct);
+
+               /* fallthrough */
+       case OCT_DEV_DISPATCH_INIT_DONE:
+               octeon_delete_dispatch_list(oct);
+               cancel_delayed_work_sync(&oct->nic_poll_work.work);
+
+               /* fallthrough */
+       case OCT_DEV_PCI_MAP_DONE:
+               octeon_unmap_pci_barx(oct, 0);
+               octeon_unmap_pci_barx(oct, 1);
+
+               /* fallthrough */
+       case OCT_DEV_PCI_ENABLE_DONE:
+               pci_clear_master(oct->pci_dev);
+               /* Disable the device, releasing the PCI INT */
+               pci_disable_device(oct->pci_dev);
+
+               /* fallthrough */
+       case OCT_DEV_BEGIN_STATE:
+               /* Nothing to be done here either */
+               break;
+       }
+}
+
+/**
+ * \brief Cleans up resources at unload time
+ * @param pdev PCI device structure
+ */
+static void liquidio_vf_remove(struct pci_dev *pdev)
+{
+       struct octeon_device *oct_dev = pci_get_drvdata(pdev);
+
+       dev_dbg(&oct_dev->pci_dev->dev, "Stopping device\n");
+
+       /* Reset the octeon device and cleanup all memory allocated for
+        * the octeon device by driver.
+        */
+       octeon_destroy_resources(oct_dev);
+
+       dev_info(&oct_dev->pci_dev->dev, "Device removed\n");
+
+       /* This octeon device has been removed. Update the global
+        * data structure to reflect this. Free the device structure.
+        */
+       octeon_free_device_mem(oct_dev);
+}
+
+/**
+ * \brief PCI initialization for each Octeon device.
+ * @param oct octeon device
+ */
+static int octeon_pci_os_setup(struct octeon_device *oct)
+{
+#ifdef CONFIG_PCI_IOV
+       /* setup PCI stuff first */
+       if (!oct->pci_dev->physfn)
+               octeon_pci_flr(oct);
+#endif
+
+       if (pci_enable_device(oct->pci_dev)) {
+               dev_err(&oct->pci_dev->dev, "pci_enable_device failed\n");
+               return 1;
+       }
+
+       if (dma_set_mask_and_coherent(&oct->pci_dev->dev, DMA_BIT_MASK(64))) {
+               dev_err(&oct->pci_dev->dev, "Unexpected DMA device capability\n");
+               pci_disable_device(oct->pci_dev);
+               return 1;
+       }
+
+       /* Enable PCI DMA Master. */
+       pci_set_master(oct->pci_dev);
+
+       return 0;
+}
+
+/**
+ * \brief Device initialization for each Octeon device that is probed
+ * @param octeon_dev  octeon device
+ */
+static int octeon_device_init(struct octeon_device *oct)
+{
+       u32 rev_id;
+       int j;
+
+       atomic_set(&oct->status, OCT_DEV_BEGIN_STATE);
+
+       /* Enable access to the octeon device and make its DMA capability
+        * known to the OS.
+        */
+       if (octeon_pci_os_setup(oct))
+               return 1;
+       atomic_set(&oct->status, OCT_DEV_PCI_ENABLE_DONE);
+
+       oct->chip_id = OCTEON_CN23XX_VF_VID;
+       pci_read_config_dword(oct->pci_dev, 8, &rev_id);
+       oct->rev_id = rev_id & 0xff;
+
+       if (cn23xx_setup_octeon_vf_device(oct))
+               return 1;
+
+       atomic_set(&oct->status, OCT_DEV_PCI_MAP_DONE);
+
+       /* Initialize the dispatch mechanism used to push packets arriving on
+        * Octeon Output queues.
+        */
+       if (octeon_init_dispatch_list(oct))
+               return 1;
+
+       atomic_set(&oct->status, OCT_DEV_DISPATCH_INIT_DONE);
+
+       if (octeon_set_io_queues_off(oct)) {
+               dev_err(&oct->pci_dev->dev, "setting io queues off failed\n");
+               return 1;
+       }
+
+       if (oct->fn_list.setup_device_regs(oct)) {
+               dev_err(&oct->pci_dev->dev, "device registers configuration failed\n");
+               return 1;
+       }
+
+       /* Initialize soft command buffer pool */
+       if (octeon_setup_sc_buffer_pool(oct)) {
+               dev_err(&oct->pci_dev->dev, "sc buffer pool allocation failed\n");
+               return 1;
+       }
+       atomic_set(&oct->status, OCT_DEV_SC_BUFF_POOL_INIT_DONE);
+
+       /* Setup the data structures that manage this Octeon's Input queues. */
+       if (octeon_setup_instr_queues(oct)) {
+               dev_err(&oct->pci_dev->dev, "instruction queue initialization failed\n");
+               return 1;
+       }
+       atomic_set(&oct->status, OCT_DEV_INSTR_QUEUE_INIT_DONE);
+
+       /* Initialize lists to manage the requests of different types that
+        * arrive from user & kernel applications for this octeon device.
+        */
+       if (octeon_setup_response_list(oct)) {
+               dev_err(&oct->pci_dev->dev, "Response list allocation failed\n");
+               return 1;
+       }
+       atomic_set(&oct->status, OCT_DEV_RESP_LIST_INIT_DONE);
+
+       if (octeon_setup_output_queues(oct)) {
+               dev_err(&oct->pci_dev->dev, "Output queue initialization failed\n");
+               return 1;
+       }
+       atomic_set(&oct->status, OCT_DEV_DROQ_INIT_DONE);
+
+       if (oct->fn_list.setup_mbox(oct)) {
+               dev_err(&oct->pci_dev->dev, "Mailbox setup failed\n");
+               return 1;
+       }
+       atomic_set(&oct->status, OCT_DEV_MBOX_SETUP_DONE);
+
+       if (octeon_allocate_ioq_vector(oct)) {
+               dev_err(&oct->pci_dev->dev, "ioq vector allocation failed\n");
+               return 1;
+       }
+       atomic_set(&oct->status, OCT_DEV_MSIX_ALLOC_VECTOR_DONE);
+
+       dev_info(&oct->pci_dev->dev, "OCTEON_CN23XX VF Version: %s, %d ioqs\n",
+                LIQUIDIO_VERSION, oct->sriov_info.rings_per_vf);
+
+       /* Setup the interrupt handler and record the INT SUM register address*/
+       if (octeon_setup_interrupt(oct))
+               return 1;
+
+       if (cn23xx_octeon_pfvf_handshake(oct))
+               return 1;
+
+       /* Enable Octeon device interrupts */
+       oct->fn_list.enable_interrupt(oct, OCTEON_ALL_INTR);
+
+       atomic_set(&oct->status, OCT_DEV_INTR_SET_DONE);
+
+       /* Enable the input and output queues for this Octeon device */
+       if (oct->fn_list.enable_io_queues(oct)) {
+               dev_err(&oct->pci_dev->dev, "enabling io queues failed\n");
+               return 1;
+       }
+
+       atomic_set(&oct->status, OCT_DEV_IO_QUEUES_DONE);
+
+       atomic_set(&oct->status, OCT_DEV_HOST_OK);
+
+       /* Send Credit for Octeon Output queues. Credits are always sent after
+        * the output queue is enabled.
+        */
+       for (j = 0; j < oct->num_oqs; j++)
+               writel(oct->droq[j]->max_count, oct->droq[j]->pkts_credit_reg);
+
+       /* Packets can start arriving on the output queues from this point. */
+
+       atomic_set(&oct->status, OCT_DEV_CORE_OK);
+
+       atomic_set(&oct->status, OCT_DEV_RUNNING);
+
+       return 0;
+}
+
+static int __init liquidio_vf_init(void)
+{
+       octeon_init_device_list(0);
+       return pci_register_driver(&liquidio_vf_pci_driver);
+}
+
+static void __exit liquidio_vf_exit(void)
+{
+       pci_unregister_driver(&liquidio_vf_pci_driver);
+
+       pr_info("LiquidIO_VF network module is now unloaded\n");
+}
+
+module_init(liquidio_vf_init);
+module_exit(liquidio_vf_exit);
index 0d990accb65e20f604373e43b4b52bfea7d4057b..f308ee49a7546aef25ebb6681a324a120f3d679f 100644 (file)
@@ -1,25 +1,20 @@
 /**********************************************************************
-* Author: Cavium, Inc.
-*
-* Contact: support@cavium.com
-*          Please include "LiquidIO" in the subject.
-*
-* Copyright (c) 2003-2015 Cavium, Inc.
-*
-* This file is free software; you can redistribute it and/or modify
-* it under the terms of the GNU General Public License, Version 2, as
-* published by the Free Software Foundation.
-*
-* This file is distributed in the hope that it will be useful, but
-* AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
-* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
-* NONINFRINGEMENT.  See the GNU General Public License for more
-* details.
-*
-* This file may also be available under a different license from Cavium.
-* Contact Cavium, Inc. for more information
-**********************************************************************/
-
+ * Author: Cavium, Inc.
+ *
+ * Contact: support@cavium.com
+ *          Please include "LiquidIO" in the subject.
+ *
+ * Copyright (c) 2003-2016 Cavium, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, Version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
+ * NONINFRINGEMENT.  See the GNU General Public License for more details.
+ ***********************************************************************/
 /*!  \file  liquidio_common.h
  *   \brief Common: Structures and macros used in PCI-NIC package by core and
  *   host driver.
@@ -68,12 +63,10 @@ enum octeon_tag_type {
  */
 #define OPCODE_CORE 0           /* used for generic core operations */
 #define OPCODE_NIC  1           /* used for NIC operations */
-#define OPCODE_LAST OPCODE_NIC
-
 /* Subcodes are used by host driver/apps to identify the sub-operation
  * for the core. They only need to by unique for a given subsystem.
  */
-#define OPCODE_SUBCODE(op, sub)       (((op & 0x0f) << 8) | ((sub) & 0x7f))
+#define OPCODE_SUBCODE(op, sub)       ((((op) & 0x0f) << 8) | ((sub) & 0x7f))
 
 /** OPCODE_CORE subcodes. For future use. */
 
@@ -89,13 +82,13 @@ enum octeon_tag_type {
 #define OPCODE_NIC_TIMESTAMP           0x07
 #define OPCODE_NIC_INTRMOD_CFG         0x08
 #define OPCODE_NIC_IF_CFG              0x09
+#define OPCODE_NIC_VF_DRV_NOTICE       0x0A
+#define VF_DRV_LOADED                  1
+#define VF_DRV_REMOVED                -1
+#define VF_DRV_MACADDR_CHANGED         2
 
 #define CORE_DRV_TEST_SCATTER_OP    0xFFF5
 
-#define OPCODE_SLOW_PATH(rh)  \
-       (OPCODE_SUBCODE(rh->r.opcode, rh->r.subcode) != \
-               OPCODE_SUBCODE(OPCODE_NIC, OPCODE_NIC_NW_DATA))
-
 /* Application codes advertised by the core driver initialization packet. */
 #define CVM_DRV_APP_START           0x0
 #define CVM_DRV_NO_APP              0
@@ -105,31 +98,15 @@ enum octeon_tag_type {
 #define CVM_DRV_INVALID_APP         (CVM_DRV_APP_START + 0x2)
 #define CVM_DRV_APP_END             (CVM_DRV_INVALID_APP - 1)
 
-/* Macro to increment index.
- * Index is incremented by count; if the sum exceeds
- * max, index is wrapped-around to the start.
- */
-#define INCR_INDEX(index, count, max)                \
-do {                                                 \
-       if (((index) + (count)) >= (max))            \
-               index = ((index) + (count)) - (max); \
-       else                                         \
-               index += (count);                    \
-} while (0)
-
-#define INCR_INDEX_BY1(index, max)     \
-do {                                    \
-       if ((++(index)) == (max))       \
-               index = 0;              \
-} while (0)
-
-#define DECR_INDEX(index, count, max)                  \
-do {                                                  \
-       if ((count) > (index))                         \
-               index = ((max) - ((count - index)));   \
-       else                                           \
-               index -= count;                        \
-} while (0)
+static inline u32 incr_index(u32 index, u32 count, u32 max)
+{
+       if ((index + count) >= max)
+               index = index + count - max;
+       else
+               index += count;
+
+       return index;
+}
 
 #define OCT_BOARD_NAME 32
 #define OCT_SERIAL_LEN 64
@@ -235,6 +212,7 @@ static inline void add_sg_size(struct octeon_sg_entry *sg_entry,
 
 #define   OCTNET_CMD_ID_ACTIVE         0x1a
 
+#define   OCTNET_CMD_SET_VF_LINKSTATE  0x1c
 #define   OCTNET_CMD_VXLAN_PORT_ADD    0x0
 #define   OCTNET_CMD_VXLAN_PORT_DEL    0x1
 #define   OCTNET_CMD_RXCSUM_ENABLE     0x0
@@ -731,13 +709,15 @@ struct oct_link_info {
 
 #ifdef __BIG_ENDIAN_BITFIELD
        u64 gmxport:16;
-       u64 rsvd:32;
+       u64 macaddr_is_admin_asgnd:1;
+       u64 rsvd:31;
        u64 num_txpciq:8;
        u64 num_rxpciq:8;
 #else
        u64 num_rxpciq:8;
        u64 num_txpciq:8;
-       u64 rsvd:32;
+       u64 rsvd:31;
+       u64 macaddr_is_admin_asgnd:1;
        u64 gmxport:16;
 #endif
 
@@ -827,6 +807,16 @@ struct oct_link_stats {
 
 };
 
+static inline int opcode_slow_path(union octeon_rh *rh)
+{
+       u16 subcode1, subcode2;
+
+       subcode1 = OPCODE_SUBCODE((rh)->r.opcode, (rh)->r.subcode);
+       subcode2 = OPCODE_SUBCODE(OPCODE_NIC, OPCODE_NIC_NW_DATA);
+
+       return (subcode2 != subcode1);
+}
+
 #define LIO68XX_LED_CTRL_ADDR     0x3501
 #define LIO68XX_LED_CTRL_CFGON    0x1f
 #define LIO68XX_LED_CTRL_CFGOFF   0x100
index 93819bd8602b66e925c0e7f2ed1f155d801919bc..78a3685f6fe0faac471fcdc2dabc09a8c69990f8 100644 (file)
@@ -1,24 +1,20 @@
 /**********************************************************************
-* Author: Cavium, Inc.
-*
-* Contact: support@cavium.com
-*          Please include "LiquidIO" in the subject.
-*
-* Copyright (c) 2003-2015 Cavium, Inc.
-*
-* This file is free software; you can redistribute it and/or modify
-* it under the terms of the GNU General Public License, Version 2, as
-* published by the Free Software Foundation.
-*
-* This file is distributed in the hope that it will be useful, but
-* AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
-* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
-* NONINFRINGEMENT.  See the GNU General Public License for more
-* details.
-*
-* This file may also be available under a different license from Cavium.
-* Contact Cavium, Inc. for more information
-**********************************************************************/
+ * Author: Cavium, Inc.
+ *
+ * Contact: support@cavium.com
+ *          Please include "LiquidIO" in the subject.
+ *
+ * Copyright (c) 2003-2016 Cavium, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, Version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
+ * NONINFRINGEMENT.  See the GNU General Public License for more details.
+ ***********************************************************************/
 #ifndef _LIQUIDIO_IMAGE_H_
 #define _LIQUIDIO_IMAGE_H_
 
index c76556809ed151d9f5f3fb4ebe2da2bc9d1df198..1cb3514fc949dafbb1f4280b92416c85f6aac6f0 100644 (file)
@@ -1,25 +1,20 @@
 /**********************************************************************
-* Author: Cavium, Inc.
-*
-* Contact: support@cavium.com
-*          Please include "LiquidIO" in the subject.
-*
-* Copyright (c) 2003-2015 Cavium, Inc.
-*
-* This file is free software; you can redistribute it and/or modify
-* it under the terms of the GNU General Public License, Version 2, as
-* published by the Free Software Foundation.
-*
-* This file is distributed in the hope that it will be useful, but
-* AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
-* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
-* NONINFRINGEMENT.  See the GNU General Public License for more
-* details.
-*
-* This file may also be available under a different license from Cavium.
-* Contact Cavium, Inc. for more information
-**********************************************************************/
-
+ * Author: Cavium, Inc.
+ *
+ * Contact: support@cavium.com
+ *          Please include "LiquidIO" in the subject.
+ *
+ * Copyright (c) 2003-2016 Cavium, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, Version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
+ * NONINFRINGEMENT.  See the GNU General Public License for more details.
+ ***********************************************************************/
 /*! \file  octeon_config.h
  *  \brief Host Driver: Configuration data structures for the host driver.
  */
 #define   DEFAULT_NUM_NIC_PORTS_68XX_210NV  2
 
 /* CN23xx  IQ configuration macros */
+#define   CN23XX_MAX_VFS_PER_PF_PASS_1_0 8
+#define   CN23XX_MAX_VFS_PER_PF_PASS_1_1 31
+#define   CN23XX_MAX_VFS_PER_PF          63
+#define   CN23XX_MAX_RINGS_PER_VF        8
+
 #define   CN23XX_MAX_RINGS_PER_PF_PASS_1_0 12
 #define   CN23XX_MAX_RINGS_PER_PF_PASS_1_1 32
 #define   CN23XX_MAX_RINGS_PER_PF          64
+#define   CN23XX_MAX_RINGS_PER_VF          8
 
 #define   CN23XX_MAX_INPUT_QUEUES      CN23XX_MAX_RINGS_PER_PF
 #define   CN23XX_MAX_IQ_DESCRIPTORS    2048
@@ -466,4 +467,7 @@ struct octeon_config {
 
 #define MAX_POSSIBLE_OCTEON_INSTR_QUEUES       CN23XX_MAX_INPUT_QUEUES
 #define MAX_POSSIBLE_OCTEON_OUTPUT_QUEUES      CN23XX_MAX_OUTPUT_QUEUES
+
+#define MAX_POSSIBLE_VFS                       64
+
 #endif /* __OCTEON_CONFIG_H__  */
index 01a50f3b0c8e28a342cd03828be7bf60906d41df..3265e0b7923ee4712190696e49abe011461c2386 100644 (file)
@@ -1,25 +1,20 @@
 /**********************************************************************
-* Author: Cavium, Inc.
-*
-* Contact: support@cavium.com
-*          Please include "LiquidIO" in the subject.
-*
-* Copyright (c) 2003-2015 Cavium, Inc.
-*
-* This file is free software; you can redistribute it and/or modify
-* it under the terms of the GNU General Public License, Version 2, as
-* published by the Free Software Foundation.
-*
-* This file is distributed in the hope that it will be useful, but
-* AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
-* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
-* NONINFRINGEMENT.  See the GNU General Public License for more
-* details.
-*
-* This file may also be available under a different license from Cavium.
-* Contact Cavium, Inc. for more information
-**********************************************************************/
-
+ * Author: Cavium, Inc.
+ *
+ * Contact: support@cavium.com
+ *          Please include "LiquidIO" in the subject.
+ *
+ * Copyright (c) 2003-2016 Cavium, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, Version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
+ * NONINFRINGEMENT.  See the GNU General Public License for more details.
+ ***********************************************************************/
 /**
  * @file octeon_console.c
  */
@@ -76,9 +71,9 @@ MODULE_PARM_DESC(console_bitmask,
 #define OCTEON_CONSOLE_POLL_INTERVAL_MS  100    /* 10 times per second */
 
 /* First three members of cvmx_bootmem_desc are left in original
-** positions for backwards compatibility.
-** Assumes big endian target
-*/
+ * positions for backwards compatibility.
+ * Assumes big endian target
+ */
 struct cvmx_bootmem_desc {
        /** spinlock to control access to list */
        u32 lock;
@@ -142,46 +137,6 @@ struct octeon_pci_console_desc {
        /* Implicit storage for console_addr_array */
 };
 
-/**
- * This macro returns the size of a member of a structure.
- * Logically it is the same as "sizeof(s::field)" in C++, but
- * C lacks the "::" operator.
- */
-#define SIZEOF_FIELD(s, field) sizeof(((s *)NULL)->field)
-
-/**
- * This macro returns a member of the cvmx_bootmem_desc
- * structure. These members can't be directly addressed as
- * they might be in memory not directly reachable. In the case
- * where bootmem is compiled with LINUX_HOST, the structure
- * itself might be located on a remote Octeon. The argument
- * "field" is the member name of the cvmx_bootmem_desc to read.
- * Regardless of the type of the field, the return type is always
- * a u64.
- */
-#define CVMX_BOOTMEM_DESC_GET_FIELD(oct, field)                              \
-       __cvmx_bootmem_desc_get(oct, oct->bootmem_desc_addr,                 \
-                               offsetof(struct cvmx_bootmem_desc, field),   \
-                               SIZEOF_FIELD(struct cvmx_bootmem_desc, field))
-
-#define __cvmx_bootmem_lock(flags)     (flags = flags)
-#define __cvmx_bootmem_unlock(flags)   (flags = flags)
-
-/**
- * This macro returns a member of the
- * cvmx_bootmem_named_block_desc structure. These members can't
- * be directly addressed as they might be in memory not directly
- * reachable. In the case where bootmem is compiled with
- * LINUX_HOST, the structure itself might be located on a remote
- * Octeon. The argument "field" is the member name of the
- * cvmx_bootmem_named_block_desc to read. Regardless of the type
- * of the field, the return type is always a u64. The "addr"
- * parameter is the physical address of the structure.
- */
-#define CVMX_BOOTMEM_NAMED_GET_FIELD(oct, addr, field)                   \
-       __cvmx_bootmem_desc_get(oct, addr,                               \
-               offsetof(struct cvmx_bootmem_named_block_desc, field),   \
-               SIZEOF_FIELD(struct cvmx_bootmem_named_block_desc, field))
 /**
  * \brief determines if a given console has debug enabled.
  * @param console console to check
@@ -263,10 +218,15 @@ static int __cvmx_bootmem_check_version(struct octeon_device *oct,
                oct->bootmem_desc_addr =
                        octeon_read_device_mem64(oct,
                                                 BOOTLOADER_PCI_READ_DESC_ADDR);
-       major_version =
-               (u32)CVMX_BOOTMEM_DESC_GET_FIELD(oct, major_version);
-       minor_version =
-               (u32)CVMX_BOOTMEM_DESC_GET_FIELD(oct, minor_version);
+       major_version = (u32)__cvmx_bootmem_desc_get(
+                       oct, oct->bootmem_desc_addr,
+                       offsetof(struct cvmx_bootmem_desc, major_version),
+                       FIELD_SIZEOF(struct cvmx_bootmem_desc, major_version));
+       minor_version = (u32)__cvmx_bootmem_desc_get(
+                       oct, oct->bootmem_desc_addr,
+                       offsetof(struct cvmx_bootmem_desc, minor_version),
+                       FIELD_SIZEOF(struct cvmx_bootmem_desc, minor_version));
+
        dev_dbg(&oct->pci_dev->dev, "%s: major_version=%d\n", __func__,
                major_version);
        if ((major_version > 3) ||
@@ -289,10 +249,20 @@ static const struct cvmx_bootmem_named_block_desc
        u64 named_addr = cvmx_bootmem_phy_named_block_find(oct, name, flags);
 
        if (named_addr) {
-               desc->base_addr = CVMX_BOOTMEM_NAMED_GET_FIELD(oct, named_addr,
-                                                              base_addr);
-               desc->size =
-                       CVMX_BOOTMEM_NAMED_GET_FIELD(oct, named_addr, size);
+               desc->base_addr = __cvmx_bootmem_desc_get(
+                               oct, named_addr,
+                               offsetof(struct cvmx_bootmem_named_block_desc,
+                                        base_addr),
+                               FIELD_SIZEOF(
+                                       struct cvmx_bootmem_named_block_desc,
+                                       base_addr));
+               desc->size = __cvmx_bootmem_desc_get(oct, named_addr,
+                               offsetof(struct cvmx_bootmem_named_block_desc,
+                                        size),
+                               FIELD_SIZEOF(
+                                       struct cvmx_bootmem_named_block_desc,
+                                       size));
+
                strncpy(desc->name, name, sizeof(desc->name));
                desc->name[sizeof(desc->name) - 1] = 0;
                return &oct->bootmem_named_block_desc;
@@ -307,22 +277,41 @@ static u64 cvmx_bootmem_phy_named_block_find(struct octeon_device *oct,
 {
        u64 result = 0;
 
-       __cvmx_bootmem_lock(flags);
        if (!__cvmx_bootmem_check_version(oct, 3)) {
                u32 i;
-               u64 named_block_array_addr =
-                       CVMX_BOOTMEM_DESC_GET_FIELD(oct,
-                                                   named_block_array_addr);
-               u32 num_blocks = (u32)
-                       CVMX_BOOTMEM_DESC_GET_FIELD(oct, nb_num_blocks);
-               u32 name_length = (u32)
-                       CVMX_BOOTMEM_DESC_GET_FIELD(oct, named_block_name_len);
+
+               u64 named_block_array_addr = __cvmx_bootmem_desc_get(
+                                       oct, oct->bootmem_desc_addr,
+                                       offsetof(struct cvmx_bootmem_desc,
+                                                named_block_array_addr),
+                                       FIELD_SIZEOF(struct cvmx_bootmem_desc,
+                                                    named_block_array_addr));
+               u32 num_blocks = (u32)__cvmx_bootmem_desc_get(
+                                       oct, oct->bootmem_desc_addr,
+                                       offsetof(struct cvmx_bootmem_desc,
+                                                nb_num_blocks),
+                                       FIELD_SIZEOF(struct cvmx_bootmem_desc,
+                                                    nb_num_blocks));
+
+               u32 name_length = (u32)__cvmx_bootmem_desc_get(
+                                       oct, oct->bootmem_desc_addr,
+                                       offsetof(struct cvmx_bootmem_desc,
+                                                named_block_name_len),
+                                       FIELD_SIZEOF(struct cvmx_bootmem_desc,
+                                                    named_block_name_len));
+
                u64 named_addr = named_block_array_addr;
 
                for (i = 0; i < num_blocks; i++) {
-                       u64 named_size =
-                               CVMX_BOOTMEM_NAMED_GET_FIELD(oct, named_addr,
-                                                            size);
+                       u64 named_size = __cvmx_bootmem_desc_get(
+                                       oct, named_addr,
+                                        offsetof(
+                                       struct cvmx_bootmem_named_block_desc,
+                                       size),
+                                        FIELD_SIZEOF(
+                                       struct cvmx_bootmem_named_block_desc,
+                                       size));
+
                        if (name && named_size) {
                                char *name_tmp =
                                        kmalloc(name_length + 1, GFP_KERNEL);
@@ -347,7 +336,6 @@ static u64 cvmx_bootmem_phy_named_block_find(struct octeon_device *oct,
                                sizeof(struct cvmx_bootmem_named_block_desc);
                }
        }
-       __cvmx_bootmem_unlock(flags);
        return result;
 }
 
index 586b68899b06a744bdf521500c5006cbcfafdbf5..6d54032b10ab1ff8c5f821984fdb0193081d1d52 100644 (file)
@@ -1,24 +1,20 @@
 /**********************************************************************
-* Author: Cavium, Inc.
-*
-* Contact: support@cavium.com
-*          Please include "LiquidIO" in the subject.
-*
-* Copyright (c) 2003-2015 Cavium, Inc.
-*
-* This file is free software; you can redistribute it and/or modify
-* it under the terms of the GNU General Public License, Version 2, as
-* published by the Free Software Foundation.
-*
-* This file is distributed in the hope that it will be useful, but
-* AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
-* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
-* NONINFRINGEMENT.  See the GNU General Public License for more
-* details.
-*
-* This file may also be available under a different license from Cavium.
-* Contact Cavium, Inc. for more information
-**********************************************************************/
+ * Author: Cavium, Inc.
+ *
+ * Contact: support@cavium.com
+ *          Please include "LiquidIO" in the subject.
+ *
+ * Copyright (c) 2003-2016 Cavium, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, Version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
+ * NONINFRINGEMENT.  See the GNU General Public License for more details.
+ ***********************************************************************/
 #include <linux/pci.h>
 #include <linux/netdevice.h>
 #include <linux/vmalloc.h>
@@ -32,6 +28,7 @@
 #include "cn66xx_regs.h"
 #include "cn66xx_device.h"
 #include "cn23xx_pf_device.h"
+#include "cn23xx_vf_device.h"
 
 /** Default configuration
  *  for CN66XX OCTEON Models.
@@ -520,11 +517,6 @@ static struct octeon_config default_cn23xx_conf = {
        }
 };
 
-enum {
-       OCTEON_CONFIG_TYPE_DEFAULT = 0,
-       NUM_OCTEON_CONFS,
-};
-
 static struct octeon_config_ptr {
        u32 conf_type;
 } oct_conf_info[MAX_OCTEON_DEVICES] = {
@@ -580,15 +572,17 @@ static void *__retrieve_octeon_config_info(struct octeon_device *oct,
        switch (oct_conf_info[oct_id].conf_type) {
        case OCTEON_CONFIG_TYPE_DEFAULT:
                if (oct->chip_id == OCTEON_CN66XX) {
-                       ret = (void *)&default_cn66xx_conf;
+                       ret = &default_cn66xx_conf;
                } else if ((oct->chip_id == OCTEON_CN68XX) &&
                           (card_type == LIO_210NV)) {
-                       ret =  (void *)&default_cn68xx_210nv_conf;
+                       ret = &default_cn68xx_210nv_conf;
                } else if ((oct->chip_id == OCTEON_CN68XX) &&
                           (card_type == LIO_410NV)) {
-                       ret =  (void *)&default_cn68xx_conf;
+                       ret = &default_cn68xx_conf;
                } else if (oct->chip_id == OCTEON_CN23XX_PF_VID) {
-                       ret =  (void *)&default_cn23xx_conf;
+                       ret = &default_cn23xx_conf;
+               } else if (oct->chip_id == OCTEON_CN23XX_VF_VID) {
+                       ret = &default_cn23xx_conf;
                }
                break;
        default:
@@ -604,6 +598,7 @@ static int __verify_octeon_config_info(struct octeon_device *oct, void *conf)
        case OCTEON_CN68XX:
                return lio_validate_cn6xxx_config_info(oct, conf);
        case OCTEON_CN23XX_PF_VID:
+       case OCTEON_CN23XX_VF_VID:
                return 0;
        default:
                break;
@@ -649,12 +644,12 @@ void octeon_free_device_mem(struct octeon_device *oct)
        int i;
 
        for (i = 0; i < MAX_OCTEON_OUTPUT_QUEUES(oct); i++) {
-               if (oct->io_qmask.oq & (1ULL << i))
+               if (oct->io_qmask.oq & BIT_ULL(i))
                        vfree(oct->droq[i]);
        }
 
        for (i = 0; i < MAX_OCTEON_INSTR_QUEUES(oct); i++) {
-               if (oct->io_qmask.iq & (1ULL << i))
+               if (oct->io_qmask.iq & BIT_ULL(i))
                        vfree(oct->instr_queue[i]);
        }
 
@@ -681,6 +676,9 @@ static struct octeon_device *octeon_allocate_device_mem(u32 pci_id,
        case OCTEON_CN23XX_PF_VID:
                configsize = sizeof(struct octeon_cn23xx_pf);
                break;
+       case OCTEON_CN23XX_VF_VID:
+               configsize = sizeof(struct octeon_cn23xx_vf);
+               break;
        default:
                pr_err("%s: Unknown PCI Device: 0x%x\n",
                       __func__,
@@ -756,6 +754,9 @@ octeon_allocate_ioq_vector(struct octeon_device  *oct)
 
        if (OCTEON_CN23XX_PF(oct))
                num_ioqs = oct->sriov_info.num_pf_rings;
+       else if (OCTEON_CN23XX_VF(oct))
+               num_ioqs = oct->sriov_info.rings_per_vf;
+
        size = sizeof(struct octeon_ioq_vector) * num_ioqs;
 
        oct->ioq_vector = vmalloc(size);
@@ -767,6 +768,7 @@ octeon_allocate_ioq_vector(struct octeon_device  *oct)
                ioq_vector->oct_dev     = oct;
                ioq_vector->iq_index    = i;
                ioq_vector->droq_index  = i;
+               ioq_vector->mbox        = oct->mbox[i];
 
                cpu_num = i % num_online_cpus();
                cpumask_set_cpu(cpu_num, &ioq_vector->affinity_mask);
@@ -795,10 +797,11 @@ int octeon_setup_instr_queues(struct octeon_device *oct)
 
        if (OCTEON_CN6XXX(oct))
                num_descs =
-                       CFG_GET_NUM_DEF_TX_DESCS(CHIP_FIELD(oct, cn6xxx, conf));
+                       CFG_GET_NUM_DEF_TX_DESCS(CHIP_CONF(oct, cn6xxx));
        else if (OCTEON_CN23XX_PF(oct))
-               num_descs = CFG_GET_NUM_DEF_TX_DESCS(CHIP_FIELD(oct, cn23xx_pf,
-                                                               conf));
+               num_descs = CFG_GET_NUM_DEF_TX_DESCS(CHIP_CONF(oct, cn23xx_pf));
+       else if (OCTEON_CN23XX_VF(oct))
+               num_descs = CFG_GET_NUM_DEF_TX_DESCS(CHIP_CONF(oct, cn23xx_vf));
 
        oct->num_iqs = 0;
 
@@ -821,6 +824,7 @@ int octeon_setup_instr_queues(struct octeon_device *oct)
        if (octeon_init_instr_queue(oct, txpciq, num_descs)) {
                /* prevent memory leak */
                vfree(oct->instr_queue[0]);
+               oct->instr_queue[0] = NULL;
                return 1;
        }
 
@@ -837,14 +841,15 @@ int octeon_setup_output_queues(struct octeon_device *oct)
 
        if (OCTEON_CN6XXX(oct)) {
                num_descs =
-                       CFG_GET_NUM_DEF_RX_DESCS(CHIP_FIELD(oct, cn6xxx, conf));
+                       CFG_GET_NUM_DEF_RX_DESCS(CHIP_CONF(oct, cn6xxx));
                desc_size =
-                       CFG_GET_DEF_RX_BUF_SIZE(CHIP_FIELD(oct, cn6xxx, conf));
+                       CFG_GET_DEF_RX_BUF_SIZE(CHIP_CONF(oct, cn6xxx));
        } else if (OCTEON_CN23XX_PF(oct)) {
-               num_descs = CFG_GET_NUM_DEF_RX_DESCS(CHIP_FIELD(oct, cn23xx_pf,
-                                                               conf));
-               desc_size = CFG_GET_DEF_RX_BUF_SIZE(CHIP_FIELD(oct, cn23xx_pf,
-                                                              conf));
+               num_descs = CFG_GET_NUM_DEF_RX_DESCS(CHIP_CONF(oct, cn23xx_pf));
+               desc_size = CFG_GET_DEF_RX_BUF_SIZE(CHIP_CONF(oct, cn23xx_pf));
+       } else if (OCTEON_CN23XX_VF(oct)) {
+               num_descs = CFG_GET_NUM_DEF_RX_DESCS(CHIP_CONF(oct, cn23xx_vf));
+               desc_size = CFG_GET_DEF_RX_BUF_SIZE(CHIP_CONF(oct, cn23xx_vf));
        }
        oct->num_oqs = 0;
        oct->droq[0] = vmalloc_node(sizeof(*oct->droq[0]), numa_node);
@@ -853,19 +858,63 @@ int octeon_setup_output_queues(struct octeon_device *oct)
        if (!oct->droq[0])
                return 1;
 
-       if (octeon_init_droq(oct, oq_no, num_descs, desc_size, NULL))
+       if (octeon_init_droq(oct, oq_no, num_descs, desc_size, NULL)) {
+               vfree(oct->droq[oq_no]);
+               oct->droq[oq_no] = NULL;
                return 1;
+       }
        oct->num_oqs++;
 
        return 0;
 }
 
-void octeon_set_io_queues_off(struct octeon_device *oct)
+int octeon_set_io_queues_off(struct octeon_device *oct)
 {
+       int loop = BUSY_READING_REG_VF_LOOP_COUNT;
+
        if (OCTEON_CN6XXX(oct)) {
                octeon_write_csr(oct, CN6XXX_SLI_PKT_INSTR_ENB, 0);
                octeon_write_csr(oct, CN6XXX_SLI_PKT_OUT_ENB, 0);
+       } else if (oct->chip_id == OCTEON_CN23XX_VF_VID) {
+               u32 q_no;
+
+               /* IOQs will already be in reset.
+                * If RST bit is set, wait for quiet bit to be set.
+                * Once quiet bit is set, clear the RST bit.
+                */
+               for (q_no = 0; q_no < oct->sriov_info.rings_per_vf; q_no++) {
+                       u64 reg_val = octeon_read_csr64(
+                               oct, CN23XX_VF_SLI_IQ_PKT_CONTROL64(q_no));
+
+                       while ((reg_val & CN23XX_PKT_INPUT_CTL_RST) &&
+                              !(reg_val &  CN23XX_PKT_INPUT_CTL_QUIET) &&
+                              loop) {
+                               reg_val = octeon_read_csr64(
+                                       oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no));
+                               loop--;
+                       }
+                       if (!loop) {
+                               dev_err(&oct->pci_dev->dev,
+                                       "clearing the reset reg failed or setting the quiet reg failed for qno: %u\n",
+                                       q_no);
+                               return -1;
+                       }
+
+                       reg_val = reg_val & ~CN23XX_PKT_INPUT_CTL_RST;
+                       octeon_write_csr64(oct,
+                                          CN23XX_SLI_IQ_PKT_CONTROL64(q_no),
+                                          reg_val);
+
+                       reg_val = octeon_read_csr64(
+                                       oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no));
+                       if (reg_val & CN23XX_PKT_INPUT_CTL_RST) {
+                               dev_err(&oct->pci_dev->dev,
+                                       "unable to reset qno %u\n", q_no);
+                               return -1;
+                       }
+               }
        }
+       return 0;
 }
 
 void octeon_set_droq_pkt_op(struct octeon_device *oct,
@@ -1070,10 +1119,10 @@ int octeon_core_drv_init(struct octeon_recv_info *recv_info, void *buf)
 
        if (OCTEON_CN6XXX(oct))
                num_nic_ports =
-                       CFG_GET_NUM_NIC_PORTS(CHIP_FIELD(oct, cn6xxx, conf));
+                       CFG_GET_NUM_NIC_PORTS(CHIP_CONF(oct, cn6xxx));
        else if (OCTEON_CN23XX_PF(oct))
                num_nic_ports =
-                       CFG_GET_NUM_NIC_PORTS(CHIP_FIELD(oct, cn23xx_pf, conf));
+                       CFG_GET_NUM_NIC_PORTS(CHIP_CONF(oct, cn23xx_pf));
 
        if (atomic_read(&oct->status) >= OCT_DEV_RUNNING) {
                dev_err(&oct->pci_dev->dev, "Received CORE OK when device state is 0x%x\n",
@@ -1143,7 +1192,7 @@ int octeon_get_tx_qsize(struct octeon_device *oct, u32 q_no)
 
 {
        if (oct && (q_no < MAX_OCTEON_INSTR_QUEUES(oct)) &&
-           (oct->io_qmask.iq & (1ULL << q_no)))
+           (oct->io_qmask.iq & BIT_ULL(q_no)))
                return oct->instr_queue[q_no]->max_count;
 
        return -1;
@@ -1152,7 +1201,7 @@ int octeon_get_tx_qsize(struct octeon_device *oct, u32 q_no)
 int octeon_get_rx_qsize(struct octeon_device *oct, u32 q_no)
 {
        if (oct && (q_no < MAX_OCTEON_OUTPUT_QUEUES(oct)) &&
-           (oct->io_qmask.oq & (1ULL << q_no)))
+           (oct->io_qmask.oq & BIT_ULL(q_no)))
                return oct->droq[q_no]->max_count;
        return -1;
 }
@@ -1168,10 +1217,10 @@ struct octeon_config *octeon_get_conf(struct octeon_device *oct)
 
        if (OCTEON_CN6XXX(oct)) {
                default_oct_conf =
-                       (struct octeon_config *)(CHIP_FIELD(oct, cn6xxx, conf));
+                       (struct octeon_config *)(CHIP_CONF(oct, cn6xxx));
        } else if (OCTEON_CN23XX_PF(oct)) {
                default_oct_conf = (struct octeon_config *)
-                       (CHIP_FIELD(oct, cn23xx_pf, conf));
+                       (CHIP_CONF(oct, cn23xx_pf));
        }
        return default_oct_conf;
 }
index da15c2ae93303a7e318986976b85100d9fa14b26..18f6836250a6c04c7ba98f85ca884e679b3d341e 100644 (file)
@@ -1,25 +1,20 @@
 /**********************************************************************
-* Author: Cavium, Inc.
-*
-* Contact: support@cavium.com
-*          Please include "LiquidIO" in the subject.
-*
-* Copyright (c) 2003-2015 Cavium, Inc.
-*
-* This file is free software; you can redistribute it and/or modify
-* it under the terms of the GNU General Public License, Version 2, as
-* published by the Free Software Foundation.
-*
-* This file is distributed in the hope that it will be useful, but
-* AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
-* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
-* NONINFRINGEMENT.  See the GNU General Public License for more
-* details.
-*
-* This file may also be available under a different license from Cavium.
-* Contact Cavium, Inc. for more information
-**********************************************************************/
-
+ * Author: Cavium, Inc.
+ *
+ * Contact: support@cavium.com
+ *          Please include "LiquidIO" in the subject.
+ *
+ * Copyright (c) 2003-2016 Cavium, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, Version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
+ * NONINFRINGEMENT.  See the GNU General Public License for more details.
+ ***********************************************************************/
 /*! \file octeon_device.h
  *  \brief Host Driver: This file defines the octeon device structure.
  */
@@ -38,6 +33,7 @@
 #define  OCTEON_CN68XX                0x0091
 #define  OCTEON_CN66XX                0x0092
 #define  OCTEON_CN23XX_PF_VID         0x9702
+#define  OCTEON_CN23XX_VF_VID         0x9712
 
 /**RevisionId for the chips */
 #define  OCTEON_CN23XX_REV_1_0        0x00
@@ -52,7 +48,14 @@ enum octeon_pci_swap_mode {
        OCTEON_PCI_32BIT_LW_SWAP = 3
 };
 
+enum {
+       OCTEON_CONFIG_TYPE_DEFAULT = 0,
+       NUM_OCTEON_CONFS,
+};
+
+#define  OCTEON_INPUT_INTR    (1)
 #define  OCTEON_OUTPUT_INTR   (2)
+#define  OCTEON_MBOX_INTR     (4)
 #define  OCTEON_ALL_INTR      0xff
 
 /*---------------   PCI BAR1 index registers -------------*/
@@ -70,26 +73,30 @@ enum octeon_pci_swap_mode {
  *  as it is initialized.
  */
 #define    OCT_DEV_BEGIN_STATE            0x0
-#define    OCT_DEV_PCI_MAP_DONE           0x1
-#define    OCT_DEV_DISPATCH_INIT_DONE     0x2
-#define    OCT_DEV_INSTR_QUEUE_INIT_DONE  0x3
-#define    OCT_DEV_SC_BUFF_POOL_INIT_DONE 0x4
-#define    OCT_DEV_RESP_LIST_INIT_DONE    0x5
-#define    OCT_DEV_DROQ_INIT_DONE         0x6
-#define    OCT_DEV_IO_QUEUES_DONE         0x7
-#define    OCT_DEV_CONSOLE_INIT_DONE      0x8
-#define    OCT_DEV_HOST_OK                0x9
-#define    OCT_DEV_CORE_OK                0xa
-#define    OCT_DEV_RUNNING                0xb
-#define    OCT_DEV_IN_RESET               0xc
-#define    OCT_DEV_STATE_INVALID          0xd
+#define    OCT_DEV_PCI_ENABLE_DONE        0x1
+#define    OCT_DEV_PCI_MAP_DONE           0x2
+#define    OCT_DEV_DISPATCH_INIT_DONE     0x3
+#define    OCT_DEV_INSTR_QUEUE_INIT_DONE  0x4
+#define    OCT_DEV_SC_BUFF_POOL_INIT_DONE 0x5
+#define    OCT_DEV_RESP_LIST_INIT_DONE    0x6
+#define    OCT_DEV_DROQ_INIT_DONE         0x7
+#define    OCT_DEV_MBOX_SETUP_DONE        0x8
+#define    OCT_DEV_MSIX_ALLOC_VECTOR_DONE 0x9
+#define    OCT_DEV_INTR_SET_DONE          0xa
+#define    OCT_DEV_IO_QUEUES_DONE         0xb
+#define    OCT_DEV_CONSOLE_INIT_DONE      0xc
+#define    OCT_DEV_HOST_OK                0xd
+#define    OCT_DEV_CORE_OK                0xe
+#define    OCT_DEV_RUNNING                0xf
+#define    OCT_DEV_IN_RESET               0x10
+#define    OCT_DEV_STATE_INVALID          0x11
 
 #define    OCT_DEV_STATES                 OCT_DEV_STATE_INVALID
 
 /** Octeon Device interrupts
 *  These interrupt bits are set in int_status filed of
 *  octeon_device structure
 */
* These interrupt bits are set in int_status filed of
* octeon_device structure
+ */
 #define           OCT_DEV_INTR_DMA0_FORCE        0x01
 #define           OCT_DEV_INTR_DMA1_FORCE        0x02
 #define           OCT_DEV_INTR_PKT_DATA          0x04
@@ -208,6 +215,10 @@ struct octeon_fn_list {
 
        irqreturn_t (*process_interrupt_regs)(void *);
        u64 (*msix_interrupt_handler)(void *);
+
+       int (*setup_mbox)(struct octeon_device *);
+       int (*free_mbox)(struct octeon_device *);
+
        int (*soft_reset)(struct octeon_device *);
        int (*setup_device_regs)(struct octeon_device *);
        void (*bar1_idx_setup)(struct octeon_device *, u64, u32, int);
@@ -284,6 +295,7 @@ struct octdev_props {
 #define LIO_FLAG_MSIX_ENABLED  0x1
 #define MSIX_PO_INT            0x1
 #define MSIX_PI_INT            0x2
+#define MSIX_MBOX_INT          0x4
 
 struct octeon_pf_vf_hs_word {
 #ifdef __LITTLE_ENDIAN_BITFIELD
@@ -322,14 +334,39 @@ struct octeon_pf_vf_hs_word {
 };
 
 struct octeon_sriov_info {
+       /* Number of rings assigned to VF */
+       u32     rings_per_vf;
+
+       /** Max Number of VF devices that can be enabled. This variable can
+        *  specified during load time or it will be derived after allocating
+        *  PF queues. When max_vfs is derived then each VF will get one queue
+        **/
+       u32     max_vfs;
+
+       /** Number of VF devices enabled using sysfs. */
+       u32     num_vfs_alloced;
+
        /* Actual rings left for PF device */
        u32     num_pf_rings;
 
-       /* SRN of PF usable IO queues   */
+       /* SRN of PF usable IO queues */
        u32     pf_srn;
+
        /* total pf rings */
        u32     trs;
 
+       u32     sriov_enabled;
+
+       /*lookup table that maps DPI ring number to VF pci_dev struct pointer*/
+       struct pci_dev *dpiring_to_vfpcidev_lut[MAX_POSSIBLE_VFS];
+
+       u64     vf_macaddr[MAX_POSSIBLE_VFS];
+
+       u16     vf_vlantci[MAX_POSSIBLE_VFS];
+
+       int     vf_linkstate[MAX_POSSIBLE_VFS];
+
+       u64     vf_drv_loaded_mask;
 };
 
 struct octeon_ioq_vector {
@@ -337,6 +374,7 @@ struct octeon_ioq_vector {
        int                     iq_index;
        int                     droq_index;
        int                     vector;
+       struct octeon_mbox     *mbox;
        struct cpumask          affinity_mask;
        u32                     ioq_num;
 };
@@ -365,8 +403,13 @@ struct octeon_device {
 
        /** Octeon Chip type. */
        u16 chip_id;
+
        u16 rev_id;
+
        u16 pf_num;
+
+       u16 vf_num;
+
        /** This device's id - set by the driver. */
        u32 octeon_id;
 
@@ -474,6 +517,9 @@ struct octeon_device {
 
        int msix_on;
 
+       /** Mail Box details of each octeon queue. */
+       struct octeon_mbox  *mbox[MAX_POSSIBLE_VFS];
+
        /** IOq information of it's corresponding MSI-X interrupt. */
        struct octeon_ioq_vector    *ioq_vector;
 
@@ -490,11 +536,14 @@ struct octeon_device {
 
 #define  OCT_DRV_ONLINE 1
 #define  OCT_DRV_OFFLINE 2
-#define  OCTEON_CN6XXX(oct)           ((oct->chip_id == OCTEON_CN66XX) || \
-                                      (oct->chip_id == OCTEON_CN68XX))
-#define  OCTEON_CN23XX_PF(oct)        (oct->chip_id == OCTEON_CN23XX_PF_VID)
-#define CHIP_FIELD(oct, TYPE, field)             \
-       (((struct octeon_ ## TYPE  *)(oct->chip))->field)
+#define  OCTEON_CN6XXX(oct)    ({                                      \
+                                typeof(oct) _oct = (oct);              \
+                                ((_oct->chip_id == OCTEON_CN66XX) ||   \
+                                 (_oct->chip_id == OCTEON_CN68XX));    })
+#define  OCTEON_CN23XX_PF(oct)        ((oct)->chip_id == OCTEON_CN23XX_PF_VID)
+#define  OCTEON_CN23XX_VF(oct)        ((oct)->chip_id == OCTEON_CN23XX_VF_VID)
+#define CHIP_CONF(oct, TYPE)             \
+       (((struct octeon_ ## TYPE  *)((oct)->chip))->conf)
 
 struct oct_intrmod_cmd {
        struct octeon_device *oct_dev;
@@ -508,7 +557,7 @@ struct oct_intrmod_cmd {
 void octeon_init_device_list(int conf_type);
 
 /** Free memory for Input and Output queue structures for a octeon device */
-void octeon_free_device_mem(struct octeon_device *);
+void octeon_free_device_mem(struct octeon_device *oct);
 
 /* Look up a free entry in the octeon_device table and allocate resources
  * for the octeon_device structure for an octeon device. Called at init
@@ -606,16 +655,16 @@ void lio_pci_writeq(struct octeon_device *oct, u64 val, u64 addr);
 
 /* Routines for reading and writing CSRs */
 #define   octeon_write_csr(oct_dev, reg_off, value) \
-               writel(value, oct_dev->mmio[0].hw_addr + reg_off)
+               writel(value, (oct_dev)->mmio[0].hw_addr + (reg_off))
 
 #define   octeon_write_csr64(oct_dev, reg_off, val64) \
-               writeq(val64, oct_dev->mmio[0].hw_addr + reg_off)
+               writeq(val64, (oct_dev)->mmio[0].hw_addr + (reg_off))
 
 #define   octeon_read_csr(oct_dev, reg_off)         \
-               readl(oct_dev->mmio[0].hw_addr + reg_off)
+               readl((oct_dev)->mmio[0].hw_addr + (reg_off))
 
 #define   octeon_read_csr64(oct_dev, reg_off)         \
-               readq(oct_dev->mmio[0].hw_addr + reg_off)
+               readq((oct_dev)->mmio[0].hw_addr + (reg_off))
 
 /**
  * Checks if memory access is okay
@@ -724,7 +773,7 @@ int octeon_get_rx_qsize(struct octeon_device *oct, u32 q_no);
 /** Turns off the input and output queues for the device
  *  @param oct which octeon to disable
  */
-void octeon_set_io_queues_off(struct octeon_device *oct);
+int octeon_set_io_queues_off(struct octeon_device *oct);
 
 /** Turns on or off the given output queue for the device
  *  @param oct which octeon to change
index f60e5320daf43da8fc1d0ea08846707a9afcf22c..8bf1ac76bcdccc9dda1c6af5da23ec426eff4b97 100644 (file)
@@ -1,24 +1,20 @@
 /**********************************************************************
-* Author: Cavium, Inc.
-*
-* Contact: support@cavium.com
-*          Please include "LiquidIO" in the subject.
-*
-* Copyright (c) 2003-2015 Cavium, Inc.
-*
-* This file is free software; you can redistribute it and/or modify
-* it under the terms of the GNU General Public License, Version 2, as
-* published by the Free Software Foundation.
-*
-* This file is distributed in the hope that it will be useful, but
-* AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
-* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
-* NONINFRINGEMENT.  See the GNU General Public License for more
-* details.
-*
-* This file may also be available under a different license from Cavium.
-* Contact Cavium, Inc. for more information
-**********************************************************************/
+ * Author: Cavium, Inc.
+ *
+ * Contact: support@cavium.com
+ *          Please include "LiquidIO" in the subject.
+ *
+ * Copyright (c) 2003-2016 Cavium, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, Version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
+ * NONINFRINGEMENT.  See the GNU General Public License for more details.
+ ***********************************************************************/
 #include <linux/pci.h>
 #include <linux/netdevice.h>
 #include <linux/vmalloc.h>
@@ -33,9 +29,6 @@
 #include "cn66xx_device.h"
 #include "cn23xx_pf_device.h"
 
-#define     CVM_MIN(d1, d2)           (((d1) < (d2)) ? (d1) : (d2))
-#define     CVM_MAX(d1, d2)           (((d1) > (d2)) ? (d1) : (d2))
-
 struct niclist {
        struct list_head list;
        void *ptr;
@@ -258,13 +251,13 @@ int octeon_init_droq(struct octeon_device *oct,
        c_num_descs = num_descs;
        c_buf_size = desc_size;
        if (OCTEON_CN6XXX(oct)) {
-               struct octeon_config *conf6x = CHIP_FIELD(oct, cn6xxx, conf);
+               struct octeon_config *conf6x = CHIP_CONF(oct, cn6xxx);
 
                c_pkts_per_intr = (u32)CFG_GET_OQ_PKTS_PER_INTR(conf6x);
                c_refill_threshold =
                        (u32)CFG_GET_OQ_REFILL_THRESHOLD(conf6x);
        } else if (OCTEON_CN23XX_PF(oct)) {
-               struct octeon_config *conf23 = CHIP_FIELD(oct, cn23xx_pf, conf);
+               struct octeon_config *conf23 = CHIP_CONF(oct, cn23xx_pf);
 
                c_pkts_per_intr = (u32)CFG_GET_OQ_PKTS_PER_INTR(conf23);
                c_refill_threshold = (u32)CFG_GET_OQ_REFILL_THRESHOLD(conf23);
@@ -337,7 +330,7 @@ int octeon_init_droq(struct octeon_device *oct,
        /* For 56xx Pass1, this function won't be called, so no checks. */
        oct->fn_list.setup_oq_regs(oct, q_no);
 
-       oct->io_qmask.oq |= (1ULL << q_no);
+       oct->io_qmask.oq |= BIT_ULL(q_no);
 
        return 0;
 
@@ -409,7 +402,7 @@ static inline struct octeon_recv_info *octeon_create_recv_info(
                recv_pkt->buffer_ptr[i] = droq->recv_buf_list[idx].buffer;
                droq->recv_buf_list[idx].buffer = NULL;
 
-               INCR_INDEX_BY1(idx, droq->max_count);
+               idx = incr_index(idx, 1, droq->max_count);
                bytes_left -= droq->buffer_size;
                i++;
                buf_cnt--;
@@ -440,14 +433,15 @@ octeon_droq_refill_pullup_descs(struct octeon_droq *droq,
                        droq->recv_buf_list[refill_index].buffer = NULL;
                        desc_ring[refill_index].buffer_ptr = 0;
                        do {
-                               INCR_INDEX_BY1(droq->refill_idx,
-                                              droq->max_count);
+                               droq->refill_idx = incr_index(droq->refill_idx,
+                                                             1,
+                                                             droq->max_count);
                                desc_refilled++;
                                droq->refill_count--;
                        } while (droq->recv_buf_list[droq->refill_idx].
                                 buffer);
                }
-               INCR_INDEX_BY1(refill_index, droq->max_count);
+               refill_index = incr_index(refill_index, 1, droq->max_count);
        }                       /* while */
        return desc_refilled;
 }
@@ -514,7 +508,8 @@ octeon_droq_refill(struct octeon_device *octeon_dev, struct octeon_droq *droq)
                /* Reset any previous values in the length field. */
                droq->info_list[droq->refill_idx].length = 0;
 
-               INCR_INDEX_BY1(droq->refill_idx, droq->max_count);
+               droq->refill_idx = incr_index(droq->refill_idx, 1,
+                                             droq->max_count);
                desc_refilled++;
                droq->refill_count--;
        }
@@ -599,7 +594,8 @@ static inline void octeon_droq_drop_packets(struct octeon_device *oct,
                        buf_cnt = 1;
                }
 
-               INCR_INDEX(droq->read_idx, buf_cnt, droq->max_count);
+               droq->read_idx = incr_index(droq->read_idx, buf_cnt,
+                                           droq->max_count);
                droq->refill_count += buf_cnt;
        }
 }
@@ -639,11 +635,12 @@ octeon_droq_fast_process_packets(struct octeon_device *oct,
                rh = &info->rh;
 
                total_len += (u32)info->length;
-               if (OPCODE_SLOW_PATH(rh)) {
+               if (opcode_slow_path(rh)) {
                        u32 buf_cnt;
 
                        buf_cnt = octeon_droq_dispatch_pkt(oct, droq, rh, info);
-                       INCR_INDEX(droq->read_idx, buf_cnt, droq->max_count);
+                       droq->read_idx = incr_index(droq->read_idx,
+                                                   buf_cnt, droq->max_count);
                        droq->refill_count += buf_cnt;
                } else {
                        if (info->length <= droq->buffer_size) {
@@ -657,7 +654,8 @@ octeon_droq_fast_process_packets(struct octeon_device *oct,
                                droq->recv_buf_list[droq->read_idx].buffer =
                                        NULL;
 
-                               INCR_INDEX_BY1(droq->read_idx, droq->max_count);
+                               droq->read_idx = incr_index(droq->read_idx, 1,
+                                                           droq->max_count);
                                droq->refill_count++;
                        } else {
                                nicbuf = octeon_fast_packet_alloc((u32)
@@ -689,8 +687,9 @@ octeon_droq_fast_process_packets(struct octeon_device *oct,
                                        }
 
                                        pkt_len += cpy_len;
-                                       INCR_INDEX_BY1(droq->read_idx,
-                                                      droq->max_count);
+                                       droq->read_idx =
+                                               incr_index(droq->read_idx, 1,
+                                                          droq->max_count);
                                        droq->refill_count++;
                                }
                        }
@@ -804,9 +803,8 @@ octeon_droq_process_poll_pkts(struct octeon_device *oct,
        while (total_pkts_processed < budget) {
                octeon_droq_check_hw_for_pkts(droq);
 
-               pkts_available =
-                       CVM_MIN((budget - total_pkts_processed),
-                               (u32)(atomic_read(&droq->pkts_pending)));
+               pkts_available = min((budget - total_pkts_processed),
+                                    (u32)(atomic_read(&droq->pkts_pending)));
 
                if (pkts_available == 0)
                        break;
@@ -988,7 +986,8 @@ int octeon_create_droq(struct octeon_device *oct,
        if (!droq)
                droq = vmalloc(sizeof(*droq));
        if (!droq)
-               goto create_droq_fail;
+               return -1;
+
        memset(droq, 0, sizeof(struct octeon_droq));
 
        /*Disable the pkt o/p for this Q  */
@@ -996,7 +995,11 @@ int octeon_create_droq(struct octeon_device *oct,
        oct->droq[q_no] = droq;
 
        /* Initialize the Droq */
-       octeon_init_droq(oct, q_no, num_descs, desc_size, app_ctx);
+       if (octeon_init_droq(oct, q_no, num_descs, desc_size, app_ctx)) {
+               vfree(oct->droq[q_no]);
+               oct->droq[q_no] = NULL;
+               return -1;
+       }
 
        oct->num_oqs++;
 
@@ -1009,8 +1012,4 @@ int octeon_create_droq(struct octeon_device *oct,
         * the same time.
         */
        return 0;
-
-create_droq_fail:
-       octeon_delete_droq(oct, q_no);
-       return -ENOMEM;
 }
index 5be002d5dba4fb2f4a078e2a881d11b69172df70..e62074090681d3597f973f54fadb2133b21b2931 100644 (file)
@@ -4,7 +4,7 @@
  * Contact: support@cavium.com
  *          Please include "LiquidIO" in the subject.
  *
- * Copyright (c) 2003-2015 Cavium, Inc.
+ * Copyright (c) 2003-2016 Cavium, Inc.
  *
  * This file is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License, Version 2, as
  * This file is distributed in the hope that it will be useful, but
  * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
- * NONINFRINGEMENT.  See the GNU General Public License for more
- * details.
- *
- * This file may also be available under a different license from Cavium.
- * Contact Cavium, Inc. for more information
- **********************************************************************/
-
+ * NONINFRINGEMENT.  See the GNU General Public License for more details.
+ ***********************************************************************/
 /*!  \file  octeon_droq.h
  *   \brief Implementation of Octeon Output queues. "Output" is with
  *   respect to the Octeon device on the NIC. From this driver's point of
@@ -81,7 +76,7 @@ struct octeon_skb_page_info {
  *  the Octeon device. Since the descriptor ring keeps physical (bus)
  *  addresses, this field is required for the driver to keep track of
  *  the virtual address pointers.
-*/
+ */
 struct octeon_recv_buffer {
        /** Packet buffer, including metadata. */
        void *buffer;
@@ -121,7 +116,6 @@ struct oct_droq_stats {
        /** Num of Packets dropped due to receive path failures. */
        u64 rx_dropped;
 
-       /** Num of vxlan packets received; */
        u64 rx_vxlan;
 
        /** Num of failures of recv_buffer_alloc() */
@@ -359,7 +353,7 @@ struct octeon_droq {
  * @param  q_no       - droq no. ranges from 0 - 3.
  * @param app_ctx     - pointer to application context
  * @return Success: 0    Failure: 1
-*/
+ */
 int octeon_init_droq(struct octeon_device *oct_dev,
                     u32 q_no,
                     u32 num_descs,
@@ -372,7 +366,7 @@ int octeon_init_droq(struct octeon_device *oct_dev,
  *  @param oct_dev - pointer to the octeon device structure
  *  @param q_no    - droq no. ranges from 0 - 3.
  *  @return:    Success: 0    Failure: 1
-*/
+ */
 int octeon_delete_droq(struct octeon_device *oct_dev, u32 q_no);
 
 /** Register a change in droq operations. The ops field has a pointer to a
index e4d426ba18dcfa27e9e04894a0d390c9f04279ff..e04ca8f0b4a75a0af4950400fb0cba93d9ccddf8 100644 (file)
@@ -4,7 +4,7 @@
  * Contact: support@cavium.com
  *          Please include "LiquidIO" in the subject.
  *
- * Copyright (c) 2003-2015 Cavium, Inc.
+ * Copyright (c) 2003-2016 Cavium, Inc.
  *
  * This file is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License, Version 2, as
  * This file is distributed in the hope that it will be useful, but
  * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
- * NONINFRINGEMENT.  See the GNU General Public License for more
- * details.
- *
- * This file may also be available under a different license from Cavium.
- * Contact Cavium, Inc. for more information
- **********************************************************************/
-
+ * NONINFRINGEMENT.  See the GNU General Public License for more details.
+ ***********************************************************************/
 /*!  \file  octeon_iq.h
  *   \brief Host Driver: Implementation of Octeon input queues. "Input" is
  *   with respect to the Octeon device on the NIC. From this driver's
@@ -69,7 +64,6 @@ struct oct_iq_stats {
        u64 tx_vxlan; /* tunnel */
        u64 tx_dmamap_fail;
        u64 tx_restart;
-       /*u64 tx_timeout_count;*/
 };
 
 #define OCT_IQ_STATS_SIZE   (sizeof(struct oct_iq_stats))
@@ -78,7 +72,7 @@ struct oct_iq_stats {
  *  The input queue is used to post raw (instruction) mode data or packet
  *  data to Octeon device from the host. Each input queue (upto 4) for
  *  a Octeon device has one such structure to represent it.
-*/
+ */
 struct octeon_instr_queue {
        struct octeon_device *oct_dev;
 
@@ -118,8 +112,8 @@ struct octeon_instr_queue {
        u32 octeon_read_index;
 
        /** This index aids in finding the window in the queue where Octeon
-         * has read the commands.
-         */
+         has read the commands.
+        */
        u32 flush_index;
 
        /** This field keeps track of the instructions pending in this queue. */
@@ -150,8 +144,8 @@ struct octeon_instr_queue {
        u64 last_db_time;
 
        /** The doorbell timeout. If the doorbell was not rung for this time and
-         * fill_cnt is non-zero, ring the doorbell again.
-         */
+        * fill_cnt is non-zero, ring the doorbell again.
+        */
        u32 db_timeout;
 
        /** Statistics for this input queue. */
@@ -309,6 +303,9 @@ struct octeon_sc_buffer_pool {
        atomic_t alloc_buf_count;
 };
 
+#define INCR_INSTRQUEUE_PKT_COUNT(octeon_dev_ptr, iq_no, field, count)  \
+               (((octeon_dev_ptr)->instr_queue[iq_no]->stats.field) += count)
+
 int octeon_setup_sc_buffer_pool(struct octeon_device *oct);
 int octeon_free_sc_buffer_pool(struct octeon_device *oct);
 struct octeon_soft_command *
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.c b/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.c
new file mode 100644 (file)
index 0000000..73696b4
--- /dev/null
@@ -0,0 +1,318 @@
+/**********************************************************************
+ * Author: Cavium, Inc.
+ *
+ * Contact: support@cavium.com
+ *          Please include "LiquidIO" in the subject.
+ *
+ * Copyright (c) 2003-2016 Cavium, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, Version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
+ * NONINFRINGEMENT.  See the GNU General Public License for more details.
+ ***********************************************************************/
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include "liquidio_common.h"
+#include "octeon_droq.h"
+#include "octeon_iq.h"
+#include "response_manager.h"
+#include "octeon_device.h"
+#include "octeon_main.h"
+#include "octeon_mailbox.h"
+
+/**
+ * octeon_mbox_read:
+ * @oct: Pointer mailbox
+ *
+ * Reads the 8-bytes of data from the mbox register
+ * Writes back the acknowldgement inidcating completion of read
+ */
+int octeon_mbox_read(struct octeon_mbox *mbox)
+{
+       union octeon_mbox_message msg;
+       int ret = 0;
+
+       spin_lock(&mbox->lock);
+
+       msg.u64 = readq(mbox->mbox_read_reg);
+
+       if ((msg.u64 == OCTEON_PFVFACK) || (msg.u64 == OCTEON_PFVFSIG)) {
+               spin_unlock(&mbox->lock);
+               return 0;
+       }
+
+       if (mbox->state & OCTEON_MBOX_STATE_REQUEST_RECEIVING) {
+               mbox->mbox_req.data[mbox->mbox_req.recv_len - 1] = msg.u64;
+               mbox->mbox_req.recv_len++;
+       } else {
+               if (mbox->state & OCTEON_MBOX_STATE_RESPONSE_RECEIVING) {
+                       mbox->mbox_resp.data[mbox->mbox_resp.recv_len - 1] =
+                               msg.u64;
+                       mbox->mbox_resp.recv_len++;
+               } else {
+                       if ((mbox->state & OCTEON_MBOX_STATE_IDLE) &&
+                           (msg.s.type == OCTEON_MBOX_REQUEST)) {
+                               mbox->state &= ~OCTEON_MBOX_STATE_IDLE;
+                               mbox->state |=
+                                   OCTEON_MBOX_STATE_REQUEST_RECEIVING;
+                               mbox->mbox_req.msg.u64 = msg.u64;
+                               mbox->mbox_req.q_no = mbox->q_no;
+                               mbox->mbox_req.recv_len = 1;
+                       } else {
+                               if ((mbox->state &
+                                    OCTEON_MBOX_STATE_RESPONSE_PENDING) &&
+                                   (msg.s.type == OCTEON_MBOX_RESPONSE)) {
+                                       mbox->state &=
+                                           ~OCTEON_MBOX_STATE_RESPONSE_PENDING;
+                                       mbox->state |=
+                                           OCTEON_MBOX_STATE_RESPONSE_RECEIVING
+                                           ;
+                                       mbox->mbox_resp.msg.u64 = msg.u64;
+                                       mbox->mbox_resp.q_no = mbox->q_no;
+                                       mbox->mbox_resp.recv_len = 1;
+                               } else {
+                                       writeq(OCTEON_PFVFERR,
+                                              mbox->mbox_read_reg);
+                                       mbox->state |= OCTEON_MBOX_STATE_ERROR;
+                                       spin_unlock(&mbox->lock);
+                                       return 1;
+                               }
+                       }
+               }
+       }
+
+       if (mbox->state & OCTEON_MBOX_STATE_REQUEST_RECEIVING) {
+               if (mbox->mbox_req.recv_len < msg.s.len) {
+                       ret = 0;
+               } else {
+                       mbox->state &= ~OCTEON_MBOX_STATE_REQUEST_RECEIVING;
+                       mbox->state |= OCTEON_MBOX_STATE_REQUEST_RECEIVED;
+                       ret = 1;
+               }
+       } else {
+               if (mbox->state & OCTEON_MBOX_STATE_RESPONSE_RECEIVING) {
+                       if (mbox->mbox_resp.recv_len < msg.s.len) {
+                               ret = 0;
+                       } else {
+                               mbox->state &=
+                                   ~OCTEON_MBOX_STATE_RESPONSE_RECEIVING;
+                               mbox->state |=
+                                   OCTEON_MBOX_STATE_RESPONSE_RECEIVED;
+                               ret = 1;
+                       }
+               } else {
+                       WARN_ON(1);
+               }
+       }
+
+       writeq(OCTEON_PFVFACK, mbox->mbox_read_reg);
+
+       spin_unlock(&mbox->lock);
+
+       return ret;
+}
+
+/**
+ * octeon_mbox_write:
+ * @oct: Pointer Octeon Device
+ * @mbox_cmd: Cmd to send to mailbox.
+ *
+ * Populates the queue specific mbox structure
+ * with cmd information.
+ * Write the cmd to mbox register
+ */
+int octeon_mbox_write(struct octeon_device *oct,
+                     struct octeon_mbox_cmd *mbox_cmd)
+{
+       struct octeon_mbox *mbox = oct->mbox[mbox_cmd->q_no];
+       u32 count, i, ret = OCTEON_MBOX_STATUS_SUCCESS;
+       unsigned long flags;
+
+       spin_lock_irqsave(&mbox->lock, flags);
+
+       if ((mbox_cmd->msg.s.type == OCTEON_MBOX_RESPONSE) &&
+           !(mbox->state & OCTEON_MBOX_STATE_REQUEST_RECEIVED)) {
+               spin_unlock_irqrestore(&mbox->lock, flags);
+               return OCTEON_MBOX_STATUS_FAILED;
+       }
+
+       if ((mbox_cmd->msg.s.type == OCTEON_MBOX_REQUEST) &&
+           !(mbox->state & OCTEON_MBOX_STATE_IDLE)) {
+               spin_unlock_irqrestore(&mbox->lock, flags);
+               return OCTEON_MBOX_STATUS_BUSY;
+       }
+
+       if (mbox_cmd->msg.s.type == OCTEON_MBOX_REQUEST) {
+               memcpy(&mbox->mbox_resp, mbox_cmd,
+                      sizeof(struct octeon_mbox_cmd));
+               mbox->state = OCTEON_MBOX_STATE_RESPONSE_PENDING;
+       }
+
+       spin_unlock_irqrestore(&mbox->lock, flags);
+
+       count = 0;
+
+       while (readq(mbox->mbox_write_reg) != OCTEON_PFVFSIG) {
+               schedule_timeout_uninterruptible(LIO_MBOX_WRITE_WAIT_TIME);
+               if (count++ == LIO_MBOX_WRITE_WAIT_CNT) {
+                       ret = OCTEON_MBOX_STATUS_FAILED;
+                       break;
+               }
+       }
+
+       if (ret == OCTEON_MBOX_STATUS_SUCCESS) {
+               writeq(mbox_cmd->msg.u64, mbox->mbox_write_reg);
+               for (i = 0; i < (u32)(mbox_cmd->msg.s.len - 1); i++) {
+                       count = 0;
+                       while (readq(mbox->mbox_write_reg) !=
+                              OCTEON_PFVFACK) {
+                               schedule_timeout_uninterruptible(10);
+                               if (count++ == LIO_MBOX_WRITE_WAIT_CNT) {
+                                       ret = OCTEON_MBOX_STATUS_FAILED;
+                                       break;
+                               }
+                       }
+                       writeq(mbox_cmd->data[i], mbox->mbox_write_reg);
+               }
+       }
+
+       spin_lock_irqsave(&mbox->lock, flags);
+       if (mbox_cmd->msg.s.type == OCTEON_MBOX_RESPONSE) {
+               mbox->state = OCTEON_MBOX_STATE_IDLE;
+               writeq(OCTEON_PFVFSIG, mbox->mbox_read_reg);
+       } else {
+               if ((!mbox_cmd->msg.s.resp_needed) ||
+                   (ret == OCTEON_MBOX_STATUS_FAILED)) {
+                       mbox->state &= ~OCTEON_MBOX_STATE_RESPONSE_PENDING;
+                       if (!(mbox->state &
+                             (OCTEON_MBOX_STATE_REQUEST_RECEIVING |
+                              OCTEON_MBOX_STATE_REQUEST_RECEIVED)))
+                               mbox->state = OCTEON_MBOX_STATE_IDLE;
+               }
+       }
+       spin_unlock_irqrestore(&mbox->lock, flags);
+
+       return ret;
+}
+
+/**
+ * octeon_mbox_process_cmd:
+ * @mbox: Pointer mailbox
+ * @mbox_cmd: Pointer to command received
+ *
+ * Process the cmd received in mbox
+ */
+static int octeon_mbox_process_cmd(struct octeon_mbox *mbox,
+                                  struct octeon_mbox_cmd *mbox_cmd)
+{
+       struct octeon_device *oct = mbox->oct_dev;
+
+       switch (mbox_cmd->msg.s.cmd) {
+       case OCTEON_VF_ACTIVE:
+               dev_dbg(&oct->pci_dev->dev, "got vfactive sending data back\n");
+               mbox_cmd->msg.s.type = OCTEON_MBOX_RESPONSE;
+               mbox_cmd->msg.s.resp_needed = 1;
+               mbox_cmd->msg.s.len = 2;
+               mbox_cmd->data[0] = 0; /* VF version is in mbox_cmd->data[0] */
+               ((struct lio_version *)&mbox_cmd->data[0])->major =
+                       LIQUIDIO_BASE_MAJOR_VERSION;
+               ((struct lio_version *)&mbox_cmd->data[0])->minor =
+                       LIQUIDIO_BASE_MINOR_VERSION;
+               ((struct lio_version *)&mbox_cmd->data[0])->micro =
+                       LIQUIDIO_BASE_MICRO_VERSION;
+               memcpy(mbox_cmd->msg.s.params, (uint8_t *)&oct->pfvf_hsword, 6);
+               /* Sending core cofig info to the corresponding active VF.*/
+               octeon_mbox_write(oct, mbox_cmd);
+               break;
+
+       case OCTEON_VF_FLR_REQUEST:
+               dev_info(&oct->pci_dev->dev,
+                        "got a request for FLR from VF that owns DPI ring %u\n",
+                        mbox->q_no);
+               pcie_capability_set_word(
+                       oct->sriov_info.dpiring_to_vfpcidev_lut[mbox->q_no],
+                       PCI_EXP_DEVCTL, PCI_EXP_DEVCTL_BCR_FLR);
+               break;
+
+       case OCTEON_PF_CHANGED_VF_MACADDR:
+               if (OCTEON_CN23XX_VF(oct))
+                       octeon_pf_changed_vf_macaddr(oct,
+                                                    mbox_cmd->msg.s.params);
+               break;
+
+       default:
+               break;
+       }
+       return 0;
+}
+
+/**
+ *octeon_mbox_process_message:
+ *
+ * Process the received mbox message.
+ */
+int octeon_mbox_process_message(struct octeon_mbox *mbox)
+{
+       struct octeon_mbox_cmd mbox_cmd;
+       unsigned long flags;
+
+       spin_lock_irqsave(&mbox->lock, flags);
+
+       if (mbox->state & OCTEON_MBOX_STATE_ERROR) {
+               if (mbox->state & (OCTEON_MBOX_STATE_RESPONSE_PENDING |
+                                  OCTEON_MBOX_STATE_RESPONSE_RECEIVING)) {
+                       memcpy(&mbox_cmd, &mbox->mbox_resp,
+                              sizeof(struct octeon_mbox_cmd));
+                       mbox->state = OCTEON_MBOX_STATE_IDLE;
+                       writeq(OCTEON_PFVFSIG, mbox->mbox_read_reg);
+                       spin_unlock_irqrestore(&mbox->lock, flags);
+                       mbox_cmd.recv_status = 1;
+                       if (mbox_cmd.fn)
+                               mbox_cmd.fn(mbox->oct_dev, &mbox_cmd,
+                                           mbox_cmd.fn_arg);
+                       return 0;
+               }
+
+               mbox->state = OCTEON_MBOX_STATE_IDLE;
+               writeq(OCTEON_PFVFSIG, mbox->mbox_read_reg);
+               spin_unlock_irqrestore(&mbox->lock, flags);
+               return 0;
+       }
+
+       if (mbox->state & OCTEON_MBOX_STATE_RESPONSE_RECEIVED) {
+               memcpy(&mbox_cmd, &mbox->mbox_resp,
+                      sizeof(struct octeon_mbox_cmd));
+               mbox->state = OCTEON_MBOX_STATE_IDLE;
+               writeq(OCTEON_PFVFSIG, mbox->mbox_read_reg);
+               spin_unlock_irqrestore(&mbox->lock, flags);
+               mbox_cmd.recv_status = 0;
+               if (mbox_cmd.fn)
+                       mbox_cmd.fn(mbox->oct_dev, &mbox_cmd, mbox_cmd.fn_arg);
+               return 0;
+       }
+
+       if (mbox->state & OCTEON_MBOX_STATE_REQUEST_RECEIVED) {
+               memcpy(&mbox_cmd, &mbox->mbox_req,
+                      sizeof(struct octeon_mbox_cmd));
+               if (!mbox_cmd.msg.s.resp_needed) {
+                       mbox->state &= ~OCTEON_MBOX_STATE_REQUEST_RECEIVED;
+                       if (!(mbox->state &
+                             OCTEON_MBOX_STATE_RESPONSE_PENDING))
+                               mbox->state = OCTEON_MBOX_STATE_IDLE;
+                       writeq(OCTEON_PFVFSIG, mbox->mbox_read_reg);
+               }
+
+               spin_unlock_irqrestore(&mbox->lock, flags);
+               octeon_mbox_process_cmd(mbox, &mbox_cmd);
+               return 0;
+       }
+
+       WARN_ON(1);
+
+       return 0;
+}
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.h b/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.h
new file mode 100644 (file)
index 0000000..fe60a3e
--- /dev/null
@@ -0,0 +1,115 @@
+/**********************************************************************
+ * Author: Cavium, Inc.
+ *
+ * Contact: support@cavium.com
+ *          Please include "LiquidIO" in the subject.
+ *
+ * Copyright (c) 2003-2016 Cavium, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, Version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
+ * NONINFRINGEMENT.  See the GNU General Public License for more details.
+ ***********************************************************************/
+#ifndef __MAILBOX_H__
+#define __MAILBOX_H__
+
+/* Macros for Mail Box Communication */
+
+#define OCTEON_MBOX_DATA_MAX   32
+
+#define OCTEON_VF_ACTIVE               0x1
+#define OCTEON_VF_FLR_REQUEST          0x2
+#define OCTEON_PF_CHANGED_VF_MACADDR   0x4
+
+/*Macro for Read acknowldgement*/
+#define OCTEON_PFVFACK                 0xffffffffffffffff
+#define OCTEON_PFVFSIG                 0x1122334455667788
+#define OCTEON_PFVFERR                 0xDEADDEADDEADDEAD
+
+#define LIO_MBOX_WRITE_WAIT_CNT          1000
+#define LIO_MBOX_WRITE_WAIT_TIME           10
+
+enum octeon_mbox_cmd_status {
+       OCTEON_MBOX_STATUS_SUCCESS = 0,
+       OCTEON_MBOX_STATUS_FAILED = 1,
+       OCTEON_MBOX_STATUS_BUSY = 2
+};
+
+enum octeon_mbox_message_type {
+       OCTEON_MBOX_REQUEST = 0,
+       OCTEON_MBOX_RESPONSE = 1
+};
+
+union octeon_mbox_message {
+       u64 u64;
+       struct {
+               u16 type : 1;
+               u16 resp_needed : 1;
+               u16 cmd : 6;
+               u16 len : 8;
+               u8 params[6];
+       } s;
+};
+
+typedef void (*octeon_mbox_callback_t)(void *, void *, void *);
+
+struct octeon_mbox_cmd {
+       union octeon_mbox_message msg;
+       u64 data[OCTEON_MBOX_DATA_MAX];
+       u32 q_no;
+       u32 recv_len;
+       u32 recv_status;
+       octeon_mbox_callback_t fn;
+       void *fn_arg;
+};
+
+enum octeon_mbox_state {
+       OCTEON_MBOX_STATE_IDLE = 1,
+       OCTEON_MBOX_STATE_REQUEST_RECEIVING = 2,
+       OCTEON_MBOX_STATE_REQUEST_RECEIVED = 4,
+       OCTEON_MBOX_STATE_RESPONSE_PENDING = 8,
+       OCTEON_MBOX_STATE_RESPONSE_RECEIVING = 16,
+       OCTEON_MBOX_STATE_RESPONSE_RECEIVED = 16,
+       OCTEON_MBOX_STATE_ERROR = 32
+};
+
+struct octeon_mbox {
+       /** A spinlock to protect access to this q_mbox. */
+       spinlock_t lock;
+
+       struct octeon_device *oct_dev;
+
+       u32 q_no;
+
+       enum octeon_mbox_state state;
+
+       struct cavium_wk mbox_poll_wk;
+
+       /** SLI_MAC_PF_MBOX_INT for PF, SLI_PKT_MBOX_INT for VF. */
+       void *mbox_int_reg;
+
+       /** SLI_PKT_PF_VF_MBOX_SIG(0) for PF, SLI_PKT_PF_VF_MBOX_SIG(1) for VF.
+        */
+       void *mbox_write_reg;
+
+       /** SLI_PKT_PF_VF_MBOX_SIG(1) for PF, SLI_PKT_PF_VF_MBOX_SIG(0) for VF.
+        */
+       void *mbox_read_reg;
+
+       struct octeon_mbox_cmd mbox_req;
+
+       struct octeon_mbox_cmd mbox_resp;
+
+};
+
+int octeon_mbox_read(struct octeon_mbox *mbox);
+int octeon_mbox_write(struct octeon_device *oct,
+                     struct octeon_mbox_cmd *mbox_cmd);
+int octeon_mbox_process_message(struct octeon_mbox *mbox);
+
+#endif
index 366298f7bcb2fbc4e9d38376d84960c631cd5fa3..8cd389148166654cece765f876dcb3eee89cd20c 100644 (file)
@@ -4,7 +4,7 @@
  * Contact: support@cavium.com
  *          Please include "LiquidIO" in the subject.
  *
- * Copyright (c) 2003-2015 Cavium, Inc.
+ * Copyright (c) 2003-2016 Cavium, Inc.
  *
  * This file is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License, Version 2, as
  * This file is distributed in the hope that it will be useful, but
  * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
- * NONINFRINGEMENT.  See the GNU General Public License for more
- * details.
- *
- * This file may also be available under a different license from Cavium.
- * Contact Cavium, Inc. for more information
- **********************************************************************/
-
+ * NONINFRINGEMENT.  See the GNU General Public License for more details.
+ ***********************************************************************/
 /*! \file octeon_main.h
  *  \brief Host Driver: This file is included by all host driver source files
  *  to include common definitions.
@@ -66,7 +61,7 @@ void octeon_update_tx_completion_counters(void *buf, int reqtype,
                                          unsigned int *bytes_compl);
 void octeon_report_tx_completion_to_bql(void *txq, unsigned int pkts_compl,
                                        unsigned int bytes_compl);
-
+void octeon_pf_changed_vf_macaddr(struct octeon_device *oct, u8 *mac);
 /** Swap 8B blocks */
 static inline void octeon_swap_8B_data(u64 *data, u32 blocks)
 {
@@ -78,10 +73,10 @@ static inline void octeon_swap_8B_data(u64 *data, u32 blocks)
 }
 
 /**
 * \brief unmaps a PCI BAR
 * @param oct Pointer to Octeon device
 * @param baridx bar index
 */
+ * \brief unmaps a PCI BAR
+ * @param oct Pointer to Octeon device
+ * @param baridx bar index
+ */
 static inline void octeon_unmap_pci_barx(struct octeon_device *oct, int baridx)
 {
        dev_dbg(&oct->pci_dev->dev, "Freeing PCI mapped regions for Bar%d\n",
@@ -116,7 +111,7 @@ static inline int octeon_map_pci_barx(struct octeon_device *oct,
 
        mapped_len = oct->mmio[baridx].len;
        if (!mapped_len)
-               return 1;
+               goto err_release_region;
 
        if (max_map_len && (mapped_len > max_map_len))
                mapped_len = max_map_len;
@@ -132,11 +127,15 @@ static inline int octeon_map_pci_barx(struct octeon_device *oct,
        if (!oct->mmio[baridx].hw_addr) {
                dev_err(&oct->pci_dev->dev, "error ioremap for bar %d\n",
                        baridx);
-               return 1;
+               goto err_release_region;
        }
        oct->mmio[baridx].done = 1;
 
        return 0;
+
+err_release_region:
+       pci_release_region(oct->pci_dev, baridx * 2);
+       return 1;
 }
 
 static inline void *
@@ -203,24 +202,6 @@ out:
        return errno;
 }
 
-static inline void
-sleep_atomic_cond(wait_queue_head_t *waitq, atomic_t *pcond)
-{
-       wait_queue_t we;
-
-       init_waitqueue_entry(&we, current);
-       add_wait_queue(waitq, &we);
-       while (!atomic_read(pcond)) {
-               set_current_state(TASK_INTERRUPTIBLE);
-               if (signal_pending(current))
-                       goto out;
-               schedule();
-       }
-out:
-       set_current_state(TASK_RUNNING);
-       remove_wait_queue(waitq, &we);
-}
-
 /* Gives up the CPU for a timeout period.
  * Check that the condition is not true before we go to sleep for a
  * timeout period.
index 0dc081a99b3078547cfd0d66a233ee7db7fa7e54..13a18c9a7a5160d0a28e916ba3939b2a9fd0d952 100644 (file)
@@ -4,7 +4,7 @@
  * Contact: support@cavium.com
  *          Please include "LiquidIO" in the subject.
  *
- * Copyright (c) 2003-2015 Cavium, Inc.
+ * Copyright (c) 2003-2016 Cavium, Inc.
  *
  * This file is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License, Version 2, as
@@ -15,9 +15,6 @@
  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
  * NONINFRINGEMENT.  See the GNU General Public License for more
  * details.
- *
- * This file may also be available under a different license from Cavium.
- * Contact Cavium, Inc. for more information
  **********************************************************************/
 #include <linux/netdevice.h>
 #include "liquidio_common.h"
@@ -39,7 +36,7 @@ octeon_toggle_bar1_swapmode(struct octeon_device *oct, u32 idx)
        oct->fn_list.bar1_idx_write(oct, idx, mask);
 }
 #else
-#define octeon_toggle_bar1_swapmode(oct, idx) (oct = oct)
+#define octeon_toggle_bar1_swapmode(oct, idx)
 #endif
 
 static void
index 11b183377b44992bf0fc3b6795fc1025c20f0a8e..bae2fdd895037a672eff60ab4e65e31f9360abc6 100644 (file)
@@ -4,7 +4,7 @@
  * Contact: support@cavium.com
  *          Please include "LiquidIO" in the subject.
  *
- * Copyright (c) 2003-2015 Cavium, Inc.
+ * Copyright (c) 2003-2016 Cavium, Inc.
  *
  * This file is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License, Version 2, as
@@ -15,9 +15,6 @@
  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
  * NONINFRINGEMENT.  See the GNU General Public License for more
  * details.
- *
- * This file may also be available under a different license from Cavium.
- * Contact Cavium, Inc. for more information
  **********************************************************************/
 
 /*!  \file octeon_mem_ops.h
index 54b9665963235b0631cf3d87f0c5264e9685e101..e94edc841cadebcb5b38b30766dc4fbc3210f8cf 100644 (file)
@@ -4,7 +4,7 @@
  * Contact: support@cavium.com
  *          Please include "LiquidIO" in the subject.
  *
- * Copyright (c) 2003-2015 Cavium, Inc.
+ * Copyright (c) 2003-2016 Cavium, Inc.
  *
  * This file is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License, Version 2, as
@@ -15,9 +15,6 @@
  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
  * NONINFRINGEMENT.  See the GNU General Public License for more
  * details.
- *
- * This file may also be available under a different license from Cavium.
- * Contact Cavium, Inc. for more information
  **********************************************************************/
 
 /*!  \file  octeon_network.h
@@ -131,7 +128,7 @@ struct lio {
 #define LIO_SIZE         (sizeof(struct lio))
 #define GET_LIO(netdev)  ((struct lio *)netdev_priv(netdev))
 
-#define CIU3_WDOG(c)                 (0x1010000020000ULL + (c << 3))
+#define CIU3_WDOG(c)                 (0x1010000020000ULL + ((c) << 3))
 #define CIU3_WDOG_MASK               12ULL
 #define LIO_MONITOR_WDOG_EXPIRE      1
 #define LIO_MONITOR_CORE_STUCK_MSGD  2
@@ -342,9 +339,9 @@ static inline void tx_buffer_free(void *buffer)
 }
 
 #define lio_dma_alloc(oct, size, dma_addr) \
-       dma_alloc_coherent(&oct->pci_dev->dev, size, dma_addr, GFP_KERNEL)
+       dma_alloc_coherent(&(oct)->pci_dev->dev, size, dma_addr, GFP_KERNEL)
 #define lio_dma_free(oct, size, virt_addr, dma_addr) \
-       dma_free_coherent(&oct->pci_dev->dev, size, virt_addr, dma_addr)
+       dma_free_coherent(&(oct)->pci_dev->dev, size, virt_addr, dma_addr)
 
 static inline
 void *get_rbd(struct sk_buff *skb)
index 40ac1fe889569de3f33660656a2d4e7187865364..c3d6a822836222e48cae80d10aa2549cb3a4f074 100644 (file)
@@ -4,7 +4,7 @@
  * Contact: support@cavium.com
  *          Please include "LiquidIO" in the subject.
  *
- * Copyright (c) 2003-2015 Cavium, Inc.
+ * Copyright (c) 2003-2016 Cavium, Inc.
  *
  * This file is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License, Version 2, as
@@ -15,9 +15,6 @@
  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
  * NONINFRINGEMENT.  See the GNU General Public License for more
  * details.
- *
- * This file may also be available under a different license from Cavium.
- * Contact Cavium, Inc. for more information
  **********************************************************************/
 #include <linux/pci.h>
 #include <linux/netdevice.h>
index 4b8da67b995fe28e87b525cadfbd51eff2ab689a..0c7a5c9b2932d4db89066d3496b28d134a8bcedc 100644 (file)
@@ -4,7 +4,7 @@
  * Contact: support@cavium.com
  *          Please include "LiquidIO" in the subject.
  *
- * Copyright (c) 2003-2015 Cavium, Inc.
+ * Copyright (c) 2003-2016 Cavium, Inc.
  *
  * This file is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License, Version 2, as
@@ -15,9 +15,6 @@
  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
  * NONINFRINGEMENT.  See the GNU General Public License for more
  * details.
- *
- * This file may also be available under a different license from Cavium.
- * Contact Cavium, Inc. for more information
  **********************************************************************/
 
 /*!  \file octeon_nic.h
@@ -67,7 +64,7 @@ struct octnic_ctrl_pkt {
        octnic_ctrl_pkt_cb_fn_t cb_fn;
 };
 
-#define MAX_UDD_SIZE(nctrl) (sizeof(nctrl->udd))
+#define MAX_UDD_SIZE(nctrl) (sizeof((nctrl)->udd))
 
 /** Structure of data information passed by the NIC module to the OSI
  * layer when forwarding data to Octeon device software.
index 90866bb5003321163138ba9a200694998f613942..ea2b7e46631d0c1e1651dadc9b7f41bb905d024d 100644 (file)
@@ -4,7 +4,7 @@
  * Contact: support@cavium.com
  *          Please include "LiquidIO" in the subject.
  *
- * Copyright (c) 2003-2015 Cavium, Inc.
+ * Copyright (c) 2003-2016 Cavium, Inc.
  *
  * This file is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License, Version 2, as
@@ -15,9 +15,6 @@
  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
  * NONINFRINGEMENT.  See the GNU General Public License for more
  * details.
- *
- * This file may also be available under a different license from Cavium.
- * Contact Cavium, Inc. for more information
  **********************************************************************/
 #include <linux/pci.h>
 #include <linux/netdevice.h>
@@ -31,9 +28,7 @@
 #include "octeon_network.h"
 #include "cn66xx_device.h"
 #include "cn23xx_pf_device.h"
-
-#define INCR_INSTRQUEUE_PKT_COUNT(octeon_dev_ptr, iq_no, field, count)  \
-       (octeon_dev_ptr->instr_queue[iq_no]->stats.field += count)
+#include "cn23xx_vf_device.h"
 
 struct iq_post_status {
        int status;
@@ -71,9 +66,12 @@ int octeon_init_instr_queue(struct octeon_device *oct,
        int numa_node = cpu_to_node(iq_no % num_online_cpus());
 
        if (OCTEON_CN6XXX(oct))
-               conf = &(CFG_GET_IQ_CFG(CHIP_FIELD(oct, cn6xxx, conf)));
+               conf = &(CFG_GET_IQ_CFG(CHIP_CONF(oct, cn6xxx)));
        else if (OCTEON_CN23XX_PF(oct))
-               conf = &(CFG_GET_IQ_CFG(CHIP_FIELD(oct, cn23xx_pf, conf)));
+               conf = &(CFG_GET_IQ_CFG(CHIP_CONF(oct, cn23xx_pf)));
+       else if (OCTEON_CN23XX_VF(oct))
+               conf = &(CFG_GET_IQ_CFG(CHIP_CONF(oct, cn23xx_vf)));
+
        if (!conf) {
                dev_err(&oct->pci_dev->dev, "Unsupported Chip %x\n",
                        oct->chip_id);
@@ -145,7 +143,7 @@ int octeon_init_instr_queue(struct octeon_device *oct,
 
        spin_lock_init(&iq->iq_flush_running_lock);
 
-       oct->io_qmask.iq |= (1ULL << iq_no);
+       oct->io_qmask.iq |= BIT_ULL(iq_no);
 
        /* Set the 32B/64B mode for each input queue */
        oct->io_qmask.iq64B |= ((conf->instr_type == 64) << iq_no);
@@ -157,6 +155,8 @@ int octeon_init_instr_queue(struct octeon_device *oct,
                                                     WQ_MEM_RECLAIM,
                                                     0);
        if (!oct->check_db_wq[iq_no].wq) {
+               vfree(iq->request_list);
+               iq->request_list = NULL;
                lio_dma_free(oct, q_size, iq->base_addr, iq->base_addr_dma);
                dev_err(&oct->pci_dev->dev, "check db wq create failed for iq %d\n",
                        iq_no);
@@ -183,10 +183,13 @@ int octeon_delete_instr_queue(struct octeon_device *oct, u32 iq_no)
 
        if (OCTEON_CN6XXX(oct))
                desc_size =
-                   CFG_GET_IQ_INSTR_TYPE(CHIP_FIELD(oct, cn6xxx, conf));
+                   CFG_GET_IQ_INSTR_TYPE(CHIP_CONF(oct, cn6xxx));
        else if (OCTEON_CN23XX_PF(oct))
                desc_size =
-                   CFG_GET_IQ_INSTR_TYPE(CHIP_FIELD(oct, cn23xx_pf, conf));
+                   CFG_GET_IQ_INSTR_TYPE(CHIP_CONF(oct, cn23xx_pf));
+       else if (OCTEON_CN23XX_VF(oct))
+               desc_size =
+                   CFG_GET_IQ_INSTR_TYPE(CHIP_CONF(oct, cn23xx_vf));
 
        vfree(iq->request_list);
 
@@ -239,7 +242,9 @@ int octeon_setup_iq(struct octeon_device *oct,
        }
 
        oct->num_iqs++;
-       oct->fn_list.enable_io_queues(oct);
+       if (oct->fn_list.enable_io_queues(oct))
+               return 1;
+
        return 0;
 }
 
@@ -250,9 +255,8 @@ int lio_wait_for_instr_fetch(struct octeon_device *oct)
        do {
                instr_cnt = 0;
 
-               /*for (i = 0; i < oct->num_iqs; i++) {*/
                for (i = 0; i < MAX_OCTEON_INSTR_QUEUES(oct); i++) {
-                       if (!(oct->io_qmask.iq & (1ULL << i)))
+                       if (!(oct->io_qmask.iq & BIT_ULL(i)))
                                continue;
                        pending =
                            atomic_read(&oct->
@@ -319,7 +323,8 @@ __post_command2(struct octeon_instr_queue *iq, u8 *cmd)
 
        /* "index" is returned, host_write_index is modified. */
        st.index = iq->host_write_index;
-       INCR_INDEX_BY1(iq->host_write_index, iq->max_count);
+       iq->host_write_index = incr_index(iq->host_write_index, 1,
+                                         iq->max_count);
        iq->fill_cnt++;
 
        /* Flush the command into memory. We need to be sure the data is in
@@ -434,7 +439,7 @@ lio_process_iq_request_list(struct octeon_device *oct,
 
  skip_this:
                inst_count++;
-               INCR_INDEX_BY1(old, iq->max_count);
+               old = incr_index(old, 1, iq->max_count);
 
                if ((napi_budget) && (inst_count >= napi_budget))
                        break;
@@ -577,8 +582,6 @@ octeon_send_command(struct octeon_device *oct, u32 iq_no,
        /* This is only done here to expedite packets being flushed
         * for cases where there are no IQ completion interrupts.
         */
-       /*if (iq->do_auto_flush)*/
-       /*      octeon_flush_iq(oct, iq, 2, 0);*/
 
        return st.status;
 }
@@ -749,8 +752,10 @@ int octeon_setup_sc_buffer_pool(struct octeon_device *oct)
                        lio_dma_alloc(oct,
                                      SOFT_COMMAND_BUFFER_SIZE,
                                          (dma_addr_t *)&dma_addr);
-               if (!sc)
+               if (!sc) {
+                       octeon_free_sc_buffer_pool(oct);
                        return 1;
+               }
 
                sc->dma_addr = dma_addr;
                sc->size = SOFT_COMMAND_BUFFER_SIZE;
index be52178d8cb662e87a90b773811b2dd111100c21..fdaf742a59cb264a4d37d33744cc5d8bc89574f7 100644 (file)
@@ -4,7 +4,7 @@
  * Contact: support@cavium.com
  *          Please include "LiquidIO" in the subject.
  *
- * Copyright (c) 2003-2015 Cavium, Inc.
+ * Copyright (c) 2003-2016 Cavium, Inc.
  *
  * This file is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License, Version 2, as
@@ -15,9 +15,6 @@
  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
  * NONINFRINGEMENT.  See the GNU General Public License for more
  * details.
- *
- * This file may also be available under a different license from Cavium.
- * Contact Cavium, Inc. for more information
  **********************************************************************/
 #include <linux/pci.h>
 #include <linux/netdevice.h>
@@ -81,11 +78,7 @@ int lio_process_ordered_list(struct octeon_device *octeon_dev,
                spin_lock_bh(&ordered_sc_list->lock);
 
                if (ordered_sc_list->head.next == &ordered_sc_list->head) {
-                       /* ordered_sc_list is empty; there is
-                        * nothing to process
-                        */
-                       spin_unlock_bh
-                           (&ordered_sc_list->lock);
+                       spin_unlock_bh(&ordered_sc_list->lock);
                        return 1;
                }
 
index 7a48752dcb1088d2e53d21249b05dfcef19f7546..cbb2d84e89323aea4852c31c164c2ff49c40257c 100644 (file)
@@ -4,7 +4,7 @@
  * Contact: support@cavium.com
  *          Please include "LiquidIO" in the subject.
  *
- * Copyright (c) 2003-2015 Cavium, Inc.
+ * Copyright (c) 2003-2016 Cavium, Inc.
  *
  * This file is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License, Version 2, as
@@ -15,9 +15,6 @@
  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
  * NONINFRINGEMENT.  See the GNU General Public License for more
  * details.
- *
- * This file may also be available under a different license from Cavium.
- * Contact Cavium, Inc. for more information
  **********************************************************************/
 
 /*! \file response_manager.h
@@ -85,7 +82,6 @@ enum {
 /**  A value of 0x00000000 indicates no error i.e. success */
 #define DRIVER_ERROR_NONE                 0x00000000
 
-/**  (Major number: 0x0000; Minor Number: 0x0001) */
 #define DRIVER_ERROR_REQ_PENDING          0x00000001
 #define DRIVER_ERROR_REQ_TIMEOUT          0x00000003
 #define DRIVER_ERROR_REQ_EINTR            0x00000004
index 30426109711cf4ebec9f1cc28fed75e7961d79a0..e739c715356283553f4ace131a251bc4b30d6de2 100644 (file)
@@ -47,7 +47,7 @@
 
 /* Min/Max packet size */
 #define        NIC_HW_MIN_FRS                  64
-#define        NIC_HW_MAX_FRS                  9200 /* 9216 max packet including FCS */
+#define        NIC_HW_MAX_FRS                  9190 /* Excluding L2 header and FCS */
 
 /* Max pkinds */
 #define        NIC_MAX_PKIND                   16
@@ -149,6 +149,12 @@ struct nicvf_rss_info {
        u64 key[RSS_HASH_KEY_SIZE];
 } ____cacheline_aligned_in_smp;
 
+struct nicvf_pfc {
+       u8    autoneg;
+       u8    fc_rx;
+       u8    fc_tx;
+};
+
 enum rx_stats_reg_offset {
        RX_OCTS = 0x0,
        RX_UCAST = 0x1,
@@ -178,11 +184,11 @@ enum tx_stats_reg_offset {
 
 struct nicvf_hw_stats {
        u64 rx_bytes;
+       u64 rx_frames;
        u64 rx_ucast_frames;
        u64 rx_bcast_frames;
        u64 rx_mcast_frames;
-       u64 rx_fcs_errors;
-       u64 rx_l2_errors;
+       u64 rx_drops;
        u64 rx_drop_red;
        u64 rx_drop_red_bytes;
        u64 rx_drop_overrun;
@@ -191,6 +197,19 @@ struct nicvf_hw_stats {
        u64 rx_drop_mcast;
        u64 rx_drop_l3_bcast;
        u64 rx_drop_l3_mcast;
+       u64 rx_fcs_errors;
+       u64 rx_l2_errors;
+
+       u64 tx_bytes;
+       u64 tx_frames;
+       u64 tx_ucast_frames;
+       u64 tx_bcast_frames;
+       u64 tx_mcast_frames;
+       u64 tx_drops;
+};
+
+struct nicvf_drv_stats {
+       /* CQE Rx errs */
        u64 rx_bgx_truncated_pkts;
        u64 rx_jabber_errs;
        u64 rx_fcs_errs;
@@ -216,34 +235,30 @@ struct nicvf_hw_stats {
        u64 rx_l4_pclp;
        u64 rx_truncated_pkts;
 
-       u64 tx_bytes_ok;
-       u64 tx_ucast_frames_ok;
-       u64 tx_bcast_frames_ok;
-       u64 tx_mcast_frames_ok;
-       u64 tx_drops;
-};
-
-struct nicvf_drv_stats {
-       /* Rx */
-       u64 rx_frames_ok;
-       u64 rx_frames_64;
-       u64 rx_frames_127;
-       u64 rx_frames_255;
-       u64 rx_frames_511;
-       u64 rx_frames_1023;
-       u64 rx_frames_1518;
-       u64 rx_frames_jumbo;
-       u64 rx_drops;
-
+       /* CQE Tx errs */
+       u64 tx_desc_fault;
+       u64 tx_hdr_cons_err;
+       u64 tx_subdesc_err;
+       u64 tx_max_size_exceeded;
+       u64 tx_imm_size_oflow;
+       u64 tx_data_seq_err;
+       u64 tx_mem_seq_err;
+       u64 tx_lock_viol;
+       u64 tx_data_fault;
+       u64 tx_tstmp_conflict;
+       u64 tx_tstmp_timeout;
+       u64 tx_mem_fault;
+       u64 tx_csum_overlap;
+       u64 tx_csum_overflow;
+
+       /* driver debug stats */
        u64 rcv_buffer_alloc_failures;
-
-       /* Tx */
-       u64 tx_frames_ok;
-       u64 tx_drops;
        u64 tx_tso;
        u64 tx_timeout;
        u64 txq_stop;
        u64 txq_wake;
+
+       struct u64_stats_sync   syncp;
 };
 
 struct nicvf {
@@ -282,13 +297,14 @@ struct nicvf {
 
        u8                      node;
        u8                      cpi_alg;
-       u16                     mtu;
        bool                    link_up;
+       u8                      mac_type;
        u8                      duplex;
        u32                     speed;
        bool                    tns_mode;
        bool                    loopback_supported;
        struct nicvf_rss_info   rss_info;
+       struct nicvf_pfc        pfc;
        struct tasklet_struct   qs_err_task;
        struct work_struct      reset_task;
 
@@ -298,7 +314,7 @@ struct nicvf {
 
        /* Stats */
        struct nicvf_hw_stats   hw_stats;
-       struct nicvf_drv_stats  drv_stats;
+       struct nicvf_drv_stats  __percpu *drv_stats;
        struct bgx_stats        bgx_stats;
 
        /* MSI-X  */
@@ -349,6 +365,7 @@ struct nicvf {
 #define        NIC_MBOX_MSG_SNICVF_PTR         0x15    /* Send sqet nicvf ptr to PVF */
 #define        NIC_MBOX_MSG_LOOPBACK           0x16    /* Set interface in loopback */
 #define        NIC_MBOX_MSG_RESET_STAT_COUNTER 0x17    /* Reset statistics counters */
+#define        NIC_MBOX_MSG_PFC                0x18    /* Pause frame control */
 #define        NIC_MBOX_MSG_CFG_DONE           0xF0    /* VF configuration done */
 #define        NIC_MBOX_MSG_SHUTDOWN           0xF1    /* VF is being shutdown */
 
@@ -438,6 +455,7 @@ struct bgx_stats_msg {
 /* Physical interface link status */
 struct bgx_link_status {
        u8    msg;
+       u8    mac_type;
        u8    link_up;
        u8    duplex;
        u32   speed;
@@ -490,6 +508,14 @@ struct reset_stat_cfg {
        u16   sq_stat_mask;
 };
 
+struct pfc {
+       u8    msg;
+       u8    get; /* Get or set PFC settings */
+       u8    autoneg;
+       u8    fc_rx;
+       u8    fc_tx;
+};
+
 /* 128 bit shared memory between PF and each VF */
 union nic_mbx {
        struct { u8 msg; }      msg;
@@ -508,6 +534,7 @@ union nic_mbx {
        struct nicvf_ptr        nicvf;
        struct set_loopback     lbk;
        struct reset_stat_cfg   reset_stat;
+       struct pfc              pfc;
 };
 
 #define NIC_NODE_ID_MASK       0x03
index 2bbf4cbf08b21f273100d589f6893eb2e04fd3b4..767234e2e8f94bb0520bee29e1813f8934cae7a8 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/pci.h>
 #include <linux/etherdevice.h>
 #include <linux/of.h>
+#include <linux/if_vlan.h>
 
 #include "nic_reg.h"
 #include "nic.h"
@@ -260,18 +261,31 @@ static void nic_get_bgx_stats(struct nicpf *nic, struct bgx_stats_msg *bgx)
 /* Update hardware min/max frame size */
 static int nic_update_hw_frs(struct nicpf *nic, int new_frs, int vf)
 {
-       if ((new_frs > NIC_HW_MAX_FRS) || (new_frs < NIC_HW_MIN_FRS)) {
-               dev_err(&nic->pdev->dev,
-                       "Invalid MTU setting from VF%d rejected, should be between %d and %d\n",
-                          vf, NIC_HW_MIN_FRS, NIC_HW_MAX_FRS);
+       int bgx, lmac, lmac_cnt;
+       u64 lmac_credits;
+
+       if ((new_frs > NIC_HW_MAX_FRS) || (new_frs < NIC_HW_MIN_FRS))
                return 1;
-       }
-       new_frs += ETH_HLEN;
-       if (new_frs <= nic->pkind.maxlen)
-               return 0;
 
-       nic->pkind.maxlen = new_frs;
-       nic_reg_write(nic, NIC_PF_PKIND_0_15_CFG, *(u64 *)&nic->pkind);
+       bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+       lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+       lmac += bgx * MAX_LMAC_PER_BGX;
+
+       new_frs += VLAN_ETH_HLEN + ETH_FCS_LEN + 4;
+
+       /* Update corresponding LMAC credits */
+       lmac_cnt = bgx_get_lmac_count(nic->node, bgx);
+       lmac_credits = nic_reg_read(nic, NIC_PF_LMAC_0_7_CREDIT + (lmac * 8));
+       lmac_credits &= ~(0xFFFFFULL << 12);
+       lmac_credits |= (((((48 * 1024) / lmac_cnt) - new_frs) / 16) << 12);
+       nic_reg_write(nic, NIC_PF_LMAC_0_7_CREDIT + (lmac * 8), lmac_credits);
+
+       /* Enforce MTU in HW
+        * This config is supported only from 88xx pass 2.0 onwards.
+        */
+       if (!pass1_silicon(nic->pdev))
+               nic_reg_write(nic,
+                             NIC_PF_LMAC_0_7_CFG2 + (lmac * 8), new_frs);
        return 0;
 }
 
@@ -464,7 +478,7 @@ static int nic_init_hw(struct nicpf *nic)
 
        /* PKIND configuration */
        nic->pkind.minlen = 0;
-       nic->pkind.maxlen = NIC_HW_MAX_FRS + ETH_HLEN;
+       nic->pkind.maxlen = NIC_HW_MAX_FRS + VLAN_ETH_HLEN + ETH_FCS_LEN + 4;
        nic->pkind.lenerr_en = 1;
        nic->pkind.rx_hdr = 0;
        nic->pkind.hdr_sl = 0;
@@ -795,6 +809,15 @@ static int nic_config_loopback(struct nicpf *nic, struct set_loopback *lbk)
 
        bgx_lmac_internal_loopback(nic->node, bgx_idx, lmac_idx, lbk->enable);
 
+       /* Enable moving average calculation.
+        * Keep the LVL/AVG delay to HW enforced minimum so that, not too many
+        * packets sneek in between average calculations.
+        */
+       nic_reg_write(nic, NIC_PF_CQ_AVG_CFG,
+                     (BIT_ULL(20) | 0x2ull << 14 | 0x1));
+       nic_reg_write(nic, NIC_PF_RRM_AVG_CFG,
+                     (BIT_ULL(20) | 0x3ull << 14 | 0x1));
+
        return 0;
 }
 
@@ -837,6 +860,7 @@ static int nic_reset_stat_counters(struct nicpf *nic,
                        nic_reg_write(nic, reg_addr, 0);
                }
        }
+
        return 0;
 }
 
@@ -874,6 +898,30 @@ static void nic_enable_vf(struct nicpf *nic, int vf, bool enable)
        bgx_lmac_rx_tx_enable(nic->node, bgx, lmac, enable);
 }
 
+static void nic_pause_frame(struct nicpf *nic, int vf, struct pfc *cfg)
+{
+       int bgx, lmac;
+       struct pfc pfc;
+       union nic_mbx mbx = {};
+
+       if (vf >= nic->num_vf_en)
+               return;
+       bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+       lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+
+       if (cfg->get) {
+               bgx_lmac_get_pfc(nic->node, bgx, lmac, &pfc);
+               mbx.pfc.msg = NIC_MBOX_MSG_PFC;
+               mbx.pfc.autoneg = pfc.autoneg;
+               mbx.pfc.fc_rx = pfc.fc_rx;
+               mbx.pfc.fc_tx = pfc.fc_tx;
+               nic_send_msg_to_vf(nic, vf, &mbx);
+       } else {
+               bgx_lmac_set_pfc(nic->node, bgx, lmac, cfg);
+               nic_mbx_send_ack(nic, vf);
+       }
+}
+
 /* Interrupt handler to handle mailbox messages from VFs */
 static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
 {
@@ -1013,6 +1061,9 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
        case NIC_MBOX_MSG_RESET_STAT_COUNTER:
                ret = nic_reset_stat_counters(nic, vf, &mbx.reset_stat);
                break;
+       case NIC_MBOX_MSG_PFC:
+               nic_pause_frame(nic, vf, &mbx.pfc);
+               goto unlock;
        default:
                dev_err(&nic->pdev->dev,
                        "Invalid msg from VF%d, msg 0x%x\n", vf, mbx.msg.msg);
@@ -1243,6 +1294,7 @@ static void nic_poll_for_link(struct work_struct *work)
                        mbx.link_status.link_up = link.link_up;
                        mbx.link_status.duplex = link.duplex;
                        mbx.link_status.speed = link.speed;
+                       mbx.link_status.mac_type = link.mac_type;
                        nic_send_msg_to_vf(nic, vf, &mbx);
                }
        }
index edf779f5a227022df6069e267ef697ef05e784c8..80d46337cf29183a3a0911b0d94ff23ad08f3418 100644 (file)
 #define   NIC_PF_MPI_0_2047_CFG                        (0x210000)
 #define   NIC_PF_RSSI_0_4097_RQ                        (0x220000)
 #define   NIC_PF_LMAC_0_7_CFG                  (0x240000)
+#define   NIC_PF_LMAC_0_7_CFG2                 (0x240100)
 #define   NIC_PF_LMAC_0_7_SW_XOFF              (0x242000)
 #define   NIC_PF_LMAC_0_7_CREDIT               (0x244000)
 #define   NIC_PF_CHAN_0_255_TX_CFG             (0x400000)
index ad4fddb5542160b4512643cde5228f7a9d28b2e0..b0482410052d6c2f459d91fea635abe7d03f7cfa 100644 (file)
@@ -36,11 +36,11 @@ struct nicvf_stat {
 
 static const struct nicvf_stat nicvf_hw_stats[] = {
        NICVF_HW_STAT(rx_bytes),
+       NICVF_HW_STAT(rx_frames),
        NICVF_HW_STAT(rx_ucast_frames),
        NICVF_HW_STAT(rx_bcast_frames),
        NICVF_HW_STAT(rx_mcast_frames),
-       NICVF_HW_STAT(rx_fcs_errors),
-       NICVF_HW_STAT(rx_l2_errors),
+       NICVF_HW_STAT(rx_drops),
        NICVF_HW_STAT(rx_drop_red),
        NICVF_HW_STAT(rx_drop_red_bytes),
        NICVF_HW_STAT(rx_drop_overrun),
@@ -49,50 +49,59 @@ static const struct nicvf_stat nicvf_hw_stats[] = {
        NICVF_HW_STAT(rx_drop_mcast),
        NICVF_HW_STAT(rx_drop_l3_bcast),
        NICVF_HW_STAT(rx_drop_l3_mcast),
-       NICVF_HW_STAT(rx_bgx_truncated_pkts),
-       NICVF_HW_STAT(rx_jabber_errs),
-       NICVF_HW_STAT(rx_fcs_errs),
-       NICVF_HW_STAT(rx_bgx_errs),
-       NICVF_HW_STAT(rx_prel2_errs),
-       NICVF_HW_STAT(rx_l2_hdr_malformed),
-       NICVF_HW_STAT(rx_oversize),
-       NICVF_HW_STAT(rx_undersize),
-       NICVF_HW_STAT(rx_l2_len_mismatch),
-       NICVF_HW_STAT(rx_l2_pclp),
-       NICVF_HW_STAT(rx_ip_ver_errs),
-       NICVF_HW_STAT(rx_ip_csum_errs),
-       NICVF_HW_STAT(rx_ip_hdr_malformed),
-       NICVF_HW_STAT(rx_ip_payload_malformed),
-       NICVF_HW_STAT(rx_ip_ttl_errs),
-       NICVF_HW_STAT(rx_l3_pclp),
-       NICVF_HW_STAT(rx_l4_malformed),
-       NICVF_HW_STAT(rx_l4_csum_errs),
-       NICVF_HW_STAT(rx_udp_len_errs),
-       NICVF_HW_STAT(rx_l4_port_errs),
-       NICVF_HW_STAT(rx_tcp_flag_errs),
-       NICVF_HW_STAT(rx_tcp_offset_errs),
-       NICVF_HW_STAT(rx_l4_pclp),
-       NICVF_HW_STAT(rx_truncated_pkts),
-       NICVF_HW_STAT(tx_bytes_ok),
-       NICVF_HW_STAT(tx_ucast_frames_ok),
-       NICVF_HW_STAT(tx_bcast_frames_ok),
-       NICVF_HW_STAT(tx_mcast_frames_ok),
+       NICVF_HW_STAT(rx_fcs_errors),
+       NICVF_HW_STAT(rx_l2_errors),
+       NICVF_HW_STAT(tx_bytes),
+       NICVF_HW_STAT(tx_frames),
+       NICVF_HW_STAT(tx_ucast_frames),
+       NICVF_HW_STAT(tx_bcast_frames),
+       NICVF_HW_STAT(tx_mcast_frames),
+       NICVF_HW_STAT(tx_drops),
 };
 
 static const struct nicvf_stat nicvf_drv_stats[] = {
-       NICVF_DRV_STAT(rx_frames_ok),
-       NICVF_DRV_STAT(rx_frames_64),
-       NICVF_DRV_STAT(rx_frames_127),
-       NICVF_DRV_STAT(rx_frames_255),
-       NICVF_DRV_STAT(rx_frames_511),
-       NICVF_DRV_STAT(rx_frames_1023),
-       NICVF_DRV_STAT(rx_frames_1518),
-       NICVF_DRV_STAT(rx_frames_jumbo),
-       NICVF_DRV_STAT(rx_drops),
+       NICVF_DRV_STAT(rx_bgx_truncated_pkts),
+       NICVF_DRV_STAT(rx_jabber_errs),
+       NICVF_DRV_STAT(rx_fcs_errs),
+       NICVF_DRV_STAT(rx_bgx_errs),
+       NICVF_DRV_STAT(rx_prel2_errs),
+       NICVF_DRV_STAT(rx_l2_hdr_malformed),
+       NICVF_DRV_STAT(rx_oversize),
+       NICVF_DRV_STAT(rx_undersize),
+       NICVF_DRV_STAT(rx_l2_len_mismatch),
+       NICVF_DRV_STAT(rx_l2_pclp),
+       NICVF_DRV_STAT(rx_ip_ver_errs),
+       NICVF_DRV_STAT(rx_ip_csum_errs),
+       NICVF_DRV_STAT(rx_ip_hdr_malformed),
+       NICVF_DRV_STAT(rx_ip_payload_malformed),
+       NICVF_DRV_STAT(rx_ip_ttl_errs),
+       NICVF_DRV_STAT(rx_l3_pclp),
+       NICVF_DRV_STAT(rx_l4_malformed),
+       NICVF_DRV_STAT(rx_l4_csum_errs),
+       NICVF_DRV_STAT(rx_udp_len_errs),
+       NICVF_DRV_STAT(rx_l4_port_errs),
+       NICVF_DRV_STAT(rx_tcp_flag_errs),
+       NICVF_DRV_STAT(rx_tcp_offset_errs),
+       NICVF_DRV_STAT(rx_l4_pclp),
+       NICVF_DRV_STAT(rx_truncated_pkts),
+
+       NICVF_DRV_STAT(tx_desc_fault),
+       NICVF_DRV_STAT(tx_hdr_cons_err),
+       NICVF_DRV_STAT(tx_subdesc_err),
+       NICVF_DRV_STAT(tx_max_size_exceeded),
+       NICVF_DRV_STAT(tx_imm_size_oflow),
+       NICVF_DRV_STAT(tx_data_seq_err),
+       NICVF_DRV_STAT(tx_mem_seq_err),
+       NICVF_DRV_STAT(tx_lock_viol),
+       NICVF_DRV_STAT(tx_data_fault),
+       NICVF_DRV_STAT(tx_tstmp_conflict),
+       NICVF_DRV_STAT(tx_tstmp_timeout),
+       NICVF_DRV_STAT(tx_mem_fault),
+       NICVF_DRV_STAT(tx_csum_overlap),
+       NICVF_DRV_STAT(tx_csum_overflow),
+
        NICVF_DRV_STAT(rcv_buffer_alloc_failures),
-       NICVF_DRV_STAT(tx_frames_ok),
        NICVF_DRV_STAT(tx_tso),
-       NICVF_DRV_STAT(tx_drops),
        NICVF_DRV_STAT(tx_timeout),
        NICVF_DRV_STAT(txq_stop),
        NICVF_DRV_STAT(txq_wake),
@@ -121,12 +130,42 @@ static int nicvf_get_settings(struct net_device *netdev,
                return 0;
        }
 
-       if (nic->speed <= 1000) {
-               cmd->port = PORT_MII;
+       switch (nic->speed) {
+       case SPEED_1000:
+               cmd->port = PORT_MII | PORT_TP;
                cmd->autoneg = AUTONEG_ENABLE;
-       } else {
+               cmd->supported |= SUPPORTED_MII | SUPPORTED_TP;
+               cmd->supported |= SUPPORTED_1000baseT_Full |
+                                 SUPPORTED_1000baseT_Half |
+                                 SUPPORTED_100baseT_Full  |
+                                 SUPPORTED_100baseT_Half  |
+                                 SUPPORTED_10baseT_Full   |
+                                 SUPPORTED_10baseT_Half;
+               cmd->supported |= SUPPORTED_Autoneg;
+               cmd->advertising |= ADVERTISED_1000baseT_Full |
+                                   ADVERTISED_1000baseT_Half |
+                                   ADVERTISED_100baseT_Full  |
+                                   ADVERTISED_100baseT_Half  |
+                                   ADVERTISED_10baseT_Full   |
+                                   ADVERTISED_10baseT_Half;
+               break;
+       case SPEED_10000:
+               if (nic->mac_type == BGX_MODE_RXAUI) {
+                       cmd->port = PORT_TP;
+                       cmd->supported |= SUPPORTED_TP;
+               } else {
+                       cmd->port = PORT_FIBRE;
+                       cmd->supported |= SUPPORTED_FIBRE;
+               }
+               cmd->autoneg = AUTONEG_DISABLE;
+               cmd->supported |= SUPPORTED_10000baseT_Full;
+               break;
+       case SPEED_40000:
                cmd->port = PORT_FIBRE;
                cmd->autoneg = AUTONEG_DISABLE;
+               cmd->supported |= SUPPORTED_FIBRE;
+               cmd->supported |= SUPPORTED_40000baseCR4_Full;
+               break;
        }
        cmd->duplex = nic->duplex;
        ethtool_cmd_speed_set(cmd, nic->speed);
@@ -278,8 +317,8 @@ static void nicvf_get_ethtool_stats(struct net_device *netdev,
                                    struct ethtool_stats *stats, u64 *data)
 {
        struct nicvf *nic = netdev_priv(netdev);
-       int stat;
-       int sqs;
+       int stat, tmp_stats;
+       int sqs, cpu;
 
        nicvf_update_stats(nic);
 
@@ -289,9 +328,13 @@ static void nicvf_get_ethtool_stats(struct net_device *netdev,
        for (stat = 0; stat < nicvf_n_hw_stats; stat++)
                *(data++) = ((u64 *)&nic->hw_stats)
                                [nicvf_hw_stats[stat].index];
-       for (stat = 0; stat < nicvf_n_drv_stats; stat++)
-               *(data++) = ((u64 *)&nic->drv_stats)
-                               [nicvf_drv_stats[stat].index];
+       for (stat = 0; stat < nicvf_n_drv_stats; stat++) {
+               tmp_stats = 0;
+               for_each_possible_cpu(cpu)
+                       tmp_stats += ((u64 *)per_cpu_ptr(nic->drv_stats, cpu))
+                                    [nicvf_drv_stats[stat].index];
+               *(data++) = tmp_stats;
+       }
 
        nicvf_get_qset_stats(nic, stats, &data);
 
@@ -677,6 +720,55 @@ static int nicvf_set_channels(struct net_device *dev,
        return err;
 }
 
+static void nicvf_get_pauseparam(struct net_device *dev,
+                                struct ethtool_pauseparam *pause)
+{
+       struct nicvf *nic = netdev_priv(dev);
+       union nic_mbx mbx = {};
+
+       /* Supported only for 10G/40G interfaces */
+       if ((nic->mac_type == BGX_MODE_SGMII) ||
+           (nic->mac_type == BGX_MODE_QSGMII) ||
+           (nic->mac_type == BGX_MODE_RGMII))
+               return;
+
+       mbx.pfc.msg = NIC_MBOX_MSG_PFC;
+       mbx.pfc.get = 1;
+       if (!nicvf_send_msg_to_pf(nic, &mbx)) {
+               pause->autoneg = nic->pfc.autoneg;
+               pause->rx_pause = nic->pfc.fc_rx;
+               pause->tx_pause = nic->pfc.fc_tx;
+       }
+}
+
+static int nicvf_set_pauseparam(struct net_device *dev,
+                               struct ethtool_pauseparam *pause)
+{
+       struct nicvf *nic = netdev_priv(dev);
+       union nic_mbx mbx = {};
+
+       /* Supported only for 10G/40G interfaces */
+       if ((nic->mac_type == BGX_MODE_SGMII) ||
+           (nic->mac_type == BGX_MODE_QSGMII) ||
+           (nic->mac_type == BGX_MODE_RGMII))
+               return -EOPNOTSUPP;
+
+       if (pause->autoneg)
+               return -EOPNOTSUPP;
+
+       mbx.pfc.msg = NIC_MBOX_MSG_PFC;
+       mbx.pfc.get = 0;
+       mbx.pfc.fc_rx = pause->rx_pause;
+       mbx.pfc.fc_tx = pause->tx_pause;
+       if (nicvf_send_msg_to_pf(nic, &mbx))
+               return -EAGAIN;
+
+       nic->pfc.fc_rx = pause->rx_pause;
+       nic->pfc.fc_tx = pause->tx_pause;
+
+       return 0;
+}
+
 static const struct ethtool_ops nicvf_ethtool_ops = {
        .get_settings           = nicvf_get_settings,
        .get_link               = nicvf_get_link,
@@ -698,6 +790,8 @@ static const struct ethtool_ops nicvf_ethtool_ops = {
        .set_rxfh               = nicvf_set_rxfh,
        .get_channels           = nicvf_get_channels,
        .set_channels           = nicvf_set_channels,
+       .get_pauseparam         = nicvf_get_pauseparam,
+       .set_pauseparam         = nicvf_set_pauseparam,
        .get_ts_info            = ethtool_op_get_ts_info,
 };
 
index b192712c93b7bd6bf39b1dd5cf394fbaba8da62b..2006f58b14b17ec3c5262b7244b3375539a8fae3 100644 (file)
@@ -69,25 +69,6 @@ static inline u8 nicvf_netdev_qidx(struct nicvf *nic, u8 qidx)
                return qidx;
 }
 
-static inline void nicvf_set_rx_frame_cnt(struct nicvf *nic,
-                                         struct sk_buff *skb)
-{
-       if (skb->len <= 64)
-               nic->drv_stats.rx_frames_64++;
-       else if (skb->len <= 127)
-               nic->drv_stats.rx_frames_127++;
-       else if (skb->len <= 255)
-               nic->drv_stats.rx_frames_255++;
-       else if (skb->len <= 511)
-               nic->drv_stats.rx_frames_511++;
-       else if (skb->len <= 1023)
-               nic->drv_stats.rx_frames_1023++;
-       else if (skb->len <= 1518)
-               nic->drv_stats.rx_frames_1518++;
-       else
-               nic->drv_stats.rx_frames_jumbo++;
-}
-
 /* The Cavium ThunderX network controller can *only* be found in SoCs
  * containing the ThunderX ARM64 CPU implementation.  All accesses to the device
  * registers on this platform are implicitly strongly ordered with respect
@@ -240,6 +221,7 @@ static void  nicvf_handle_mbx_intr(struct nicvf *nic)
                nic->link_up = mbx.link_status.link_up;
                nic->duplex = mbx.link_status.duplex;
                nic->speed = mbx.link_status.speed;
+               nic->mac_type = mbx.link_status.mac_type;
                if (nic->link_up) {
                        netdev_info(nic->netdev, "%s: Link is Up %d Mbps %s\n",
                                    nic->netdev->name, nic->speed,
@@ -274,6 +256,12 @@ static void  nicvf_handle_mbx_intr(struct nicvf *nic)
                nic->pnicvf = (struct nicvf *)mbx.nicvf.nicvf;
                nic->pf_acked = true;
                break;
+       case NIC_MBOX_MSG_PFC:
+               nic->pfc.autoneg = mbx.pfc.autoneg;
+               nic->pfc.fc_rx = mbx.pfc.fc_rx;
+               nic->pfc.fc_tx = mbx.pfc.fc_tx;
+               nic->pf_acked = true;
+               break;
        default:
                netdev_err(nic->netdev,
                           "Invalid message from PF, msg 0x%x\n", mbx.msg.msg);
@@ -492,9 +480,6 @@ int nicvf_set_real_num_queues(struct net_device *netdev,
 static int nicvf_init_resources(struct nicvf *nic)
 {
        int err;
-       union nic_mbx mbx = {};
-
-       mbx.msg.msg = NIC_MBOX_MSG_CFG_DONE;
 
        /* Enable Qset */
        nicvf_qset_config(nic, true);
@@ -507,14 +492,10 @@ static int nicvf_init_resources(struct nicvf *nic)
                return err;
        }
 
-       /* Send VF config done msg to PF */
-       nicvf_write_to_mbx(nic, &mbx);
-
        return 0;
 }
 
 static void nicvf_snd_pkt_handler(struct net_device *netdev,
-                                 struct cmp_queue *cq,
                                  struct cqe_send_t *cqe_tx,
                                  int cqe_type, int budget,
                                  unsigned int *tx_pkts, unsigned int *tx_bytes)
@@ -536,7 +517,7 @@ static void nicvf_snd_pkt_handler(struct net_device *netdev,
                   __func__, cqe_tx->sq_qs, cqe_tx->sq_idx,
                   cqe_tx->sqe_ptr, hdr->subdesc_cnt);
 
-       nicvf_check_cqe_tx_errs(nic, cq, cqe_tx);
+       nicvf_check_cqe_tx_errs(nic, cqe_tx);
        skb = (struct sk_buff *)sq->skbuff[cqe_tx->sqe_ptr];
        if (skb) {
                /* Check for dummy descriptor used for HW TSO offload on 88xx */
@@ -630,8 +611,6 @@ static void nicvf_rcv_pkt_handler(struct net_device *netdev,
                return;
        }
 
-       nicvf_set_rx_frame_cnt(nic, skb);
-
        nicvf_set_rxhash(netdev, cqe_rx, skb);
 
        skb_record_rx_queue(skb, rq_idx);
@@ -665,6 +644,7 @@ static int nicvf_cq_intr_handler(struct net_device *netdev, u8 cq_idx,
        struct cmp_queue *cq = &qs->cq[cq_idx];
        struct cqe_rx_t *cq_desc;
        struct netdev_queue *txq;
+       struct snd_queue *sq;
        unsigned int tx_pkts = 0, tx_bytes = 0;
 
        spin_lock_bh(&cq->lock);
@@ -703,7 +683,7 @@ loop:
                        work_done++;
                break;
                case CQE_TYPE_SEND:
-                       nicvf_snd_pkt_handler(netdev, cq,
+                       nicvf_snd_pkt_handler(netdev,
                                              (void *)cq_desc, CQE_TYPE_SEND,
                                              budget, &tx_pkts, &tx_bytes);
                        tx_done++;
@@ -730,17 +710,21 @@ loop:
 
 done:
        /* Wakeup TXQ if its stopped earlier due to SQ full */
-       if (tx_done) {
+       sq = &nic->qs->sq[cq_idx];
+       if (tx_done ||
+           (atomic_read(&sq->free_cnt) >= MIN_SQ_DESC_PER_PKT_XMIT)) {
                netdev = nic->pnicvf->netdev;
                txq = netdev_get_tx_queue(netdev,
                                          nicvf_netdev_qidx(nic, cq_idx));
                if (tx_pkts)
                        netdev_tx_completed_queue(txq, tx_pkts, tx_bytes);
 
-               nic = nic->pnicvf;
+               /* To read updated queue and carrier status */
+               smp_mb();
                if (netif_tx_queue_stopped(txq) && netif_carrier_ok(netdev)) {
-                       netif_tx_start_queue(txq);
-                       nic->drv_stats.txq_wake++;
+                       netif_tx_wake_queue(txq);
+                       nic = nic->pnicvf;
+                       this_cpu_inc(nic->drv_stats->txq_wake);
                        if (netif_msg_tx_err(nic))
                                netdev_warn(netdev,
                                            "%s: Transmit queue wakeup SQ%d\n",
@@ -1075,6 +1059,9 @@ static netdev_tx_t nicvf_xmit(struct sk_buff *skb, struct net_device *netdev)
        struct nicvf *nic = netdev_priv(netdev);
        int qid = skb_get_queue_mapping(skb);
        struct netdev_queue *txq = netdev_get_tx_queue(netdev, qid);
+       struct nicvf *snic;
+       struct snd_queue *sq;
+       int tmp;
 
        /* Check for minimum packet length */
        if (skb->len <= ETH_HLEN) {
@@ -1082,13 +1069,39 @@ static netdev_tx_t nicvf_xmit(struct sk_buff *skb, struct net_device *netdev)
                return NETDEV_TX_OK;
        }
 
-       if (!netif_tx_queue_stopped(txq) && !nicvf_sq_append_skb(nic, skb)) {
+       snic = nic;
+       /* Get secondary Qset's SQ structure */
+       if (qid >= MAX_SND_QUEUES_PER_QS) {
+               tmp = qid / MAX_SND_QUEUES_PER_QS;
+               snic = (struct nicvf *)nic->snicvf[tmp - 1];
+               if (!snic) {
+                       netdev_warn(nic->netdev,
+                                   "Secondary Qset#%d's ptr not initialized\n",
+                                   tmp - 1);
+                       dev_kfree_skb(skb);
+                       return NETDEV_TX_OK;
+               }
+               qid = qid % MAX_SND_QUEUES_PER_QS;
+       }
+
+       sq = &snic->qs->sq[qid];
+       if (!netif_tx_queue_stopped(txq) &&
+           !nicvf_sq_append_skb(snic, sq, skb, qid)) {
                netif_tx_stop_queue(txq);
-               nic->drv_stats.txq_stop++;
-               if (netif_msg_tx_err(nic))
-                       netdev_warn(netdev,
-                                   "%s: Transmit ring full, stopping SQ%d\n",
-                                   netdev->name, qid);
+
+               /* Barrier, so that stop_queue visible to other cpus */
+               smp_mb();
+
+               /* Check again, incase another cpu freed descriptors */
+               if (atomic_read(&sq->free_cnt) > MIN_SQ_DESC_PER_PKT_XMIT) {
+                       netif_tx_wake_queue(txq);
+               } else {
+                       this_cpu_inc(nic->drv_stats->txq_stop);
+                       if (netif_msg_tx_err(nic))
+                               netdev_warn(netdev,
+                                           "%s: Transmit ring full, stopping SQ%d\n",
+                                           netdev->name, qid);
+               }
                return NETDEV_TX_BUSY;
        }
 
@@ -1189,14 +1202,24 @@ int nicvf_stop(struct net_device *netdev)
        return 0;
 }
 
+static int nicvf_update_hw_max_frs(struct nicvf *nic, int mtu)
+{
+       union nic_mbx mbx = {};
+
+       mbx.frs.msg = NIC_MBOX_MSG_SET_MAX_FRS;
+       mbx.frs.max_frs = mtu;
+       mbx.frs.vf_id = nic->vf_id;
+
+       return nicvf_send_msg_to_pf(nic, &mbx);
+}
+
 int nicvf_open(struct net_device *netdev)
 {
-       int err, qidx;
+       int cpu, err, qidx;
        struct nicvf *nic = netdev_priv(netdev);
        struct queue_set *qs = nic->qs;
        struct nicvf_cq_poll *cq_poll = NULL;
-
-       nic->mtu = netdev->mtu;
+       union nic_mbx mbx = {};
 
        netif_carrier_off(netdev);
 
@@ -1248,9 +1271,17 @@ int nicvf_open(struct net_device *netdev)
        if (nic->sqs_mode)
                nicvf_get_primary_vf_struct(nic);
 
-       /* Configure receive side scaling */
-       if (!nic->sqs_mode)
+       /* Configure receive side scaling and MTU */
+       if (!nic->sqs_mode) {
                nicvf_rss_init(nic);
+               if (nicvf_update_hw_max_frs(nic, netdev->mtu))
+                       goto cleanup;
+
+               /* Clear percpu stats */
+               for_each_possible_cpu(cpu)
+                       memset(per_cpu_ptr(nic->drv_stats, cpu), 0,
+                              sizeof(struct nicvf_drv_stats));
+       }
 
        err = nicvf_register_interrupts(nic);
        if (err)
@@ -1276,8 +1307,9 @@ int nicvf_open(struct net_device *netdev)
        for (qidx = 0; qidx < qs->rbdr_cnt; qidx++)
                nicvf_enable_intr(nic, NICVF_INTR_RBDR, qidx);
 
-       nic->drv_stats.txq_stop = 0;
-       nic->drv_stats.txq_wake = 0;
+       /* Send VF config done msg to PF */
+       mbx.msg.msg = NIC_MBOX_MSG_CFG_DONE;
+       nicvf_write_to_mbx(nic, &mbx);
 
        return 0;
 cleanup:
@@ -1297,25 +1329,20 @@ napi_del:
        return err;
 }
 
-static int nicvf_update_hw_max_frs(struct nicvf *nic, int mtu)
-{
-       union nic_mbx mbx = {};
-
-       mbx.frs.msg = NIC_MBOX_MSG_SET_MAX_FRS;
-       mbx.frs.max_frs = mtu;
-       mbx.frs.vf_id = nic->vf_id;
-
-       return nicvf_send_msg_to_pf(nic, &mbx);
-}
-
 static int nicvf_change_mtu(struct net_device *netdev, int new_mtu)
 {
        struct nicvf *nic = netdev_priv(netdev);
+       int orig_mtu = netdev->mtu;
 
-       if (nicvf_update_hw_max_frs(nic, new_mtu))
-               return -EINVAL;
        netdev->mtu = new_mtu;
-       nic->mtu = new_mtu;
+
+       if (!netif_running(netdev))
+               return 0;
+
+       if (nicvf_update_hw_max_frs(nic, new_mtu)) {
+               netdev->mtu = orig_mtu;
+               return -EINVAL;
+       }
 
        return 0;
 }
@@ -1373,9 +1400,10 @@ void nicvf_update_lmac_stats(struct nicvf *nic)
 
 void nicvf_update_stats(struct nicvf *nic)
 {
-       int qidx;
+       int qidx, cpu;
+       u64 tmp_stats = 0;
        struct nicvf_hw_stats *stats = &nic->hw_stats;
-       struct nicvf_drv_stats *drv_stats = &nic->drv_stats;
+       struct nicvf_drv_stats *drv_stats;
        struct queue_set *qs = nic->qs;
 
 #define GET_RX_STATS(reg) \
@@ -1398,21 +1426,33 @@ void nicvf_update_stats(struct nicvf *nic)
        stats->rx_drop_l3_bcast = GET_RX_STATS(RX_DRP_L3BCAST);
        stats->rx_drop_l3_mcast = GET_RX_STATS(RX_DRP_L3MCAST);
 
-       stats->tx_bytes_ok = GET_TX_STATS(TX_OCTS);
-       stats->tx_ucast_frames_ok = GET_TX_STATS(TX_UCAST);
-       stats->tx_bcast_frames_ok = GET_TX_STATS(TX_BCAST);
-       stats->tx_mcast_frames_ok = GET_TX_STATS(TX_MCAST);
+       stats->tx_bytes = GET_TX_STATS(TX_OCTS);
+       stats->tx_ucast_frames = GET_TX_STATS(TX_UCAST);
+       stats->tx_bcast_frames = GET_TX_STATS(TX_BCAST);
+       stats->tx_mcast_frames = GET_TX_STATS(TX_MCAST);
        stats->tx_drops = GET_TX_STATS(TX_DROP);
 
-       drv_stats->tx_frames_ok = stats->tx_ucast_frames_ok +
-                                 stats->tx_bcast_frames_ok +
-                                 stats->tx_mcast_frames_ok;
-       drv_stats->rx_frames_ok = stats->rx_ucast_frames +
-                                 stats->rx_bcast_frames +
-                                 stats->rx_mcast_frames;
-       drv_stats->rx_drops = stats->rx_drop_red +
-                             stats->rx_drop_overrun;
-       drv_stats->tx_drops = stats->tx_drops;
+       /* On T88 pass 2.0, the dummy SQE added for TSO notification
+        * via CQE has 'dont_send' set. Hence HW drops the pkt pointed
+        * pointed by dummy SQE and results in tx_drops counter being
+        * incremented. Subtracting it from tx_tso counter will give
+        * exact tx_drops counter.
+        */
+       if (nic->t88 && nic->hw_tso) {
+               for_each_possible_cpu(cpu) {
+                       drv_stats = per_cpu_ptr(nic->drv_stats, cpu);
+                       tmp_stats += drv_stats->tx_tso;
+               }
+               stats->tx_drops = tmp_stats - stats->tx_drops;
+       }
+       stats->tx_frames = stats->tx_ucast_frames +
+                          stats->tx_bcast_frames +
+                          stats->tx_mcast_frames;
+       stats->rx_frames = stats->rx_ucast_frames +
+                          stats->rx_bcast_frames +
+                          stats->rx_mcast_frames;
+       stats->rx_drops = stats->rx_drop_red +
+                         stats->rx_drop_overrun;
 
        /* Update RQ and SQ stats */
        for (qidx = 0; qidx < qs->rq_cnt; qidx++)
@@ -1426,18 +1466,17 @@ static struct rtnl_link_stats64 *nicvf_get_stats64(struct net_device *netdev,
 {
        struct nicvf *nic = netdev_priv(netdev);
        struct nicvf_hw_stats *hw_stats = &nic->hw_stats;
-       struct nicvf_drv_stats *drv_stats = &nic->drv_stats;
 
        nicvf_update_stats(nic);
 
        stats->rx_bytes = hw_stats->rx_bytes;
-       stats->rx_packets = drv_stats->rx_frames_ok;
-       stats->rx_dropped = drv_stats->rx_drops;
+       stats->rx_packets = hw_stats->rx_frames;
+       stats->rx_dropped = hw_stats->rx_drops;
        stats->multicast = hw_stats->rx_mcast_frames;
 
-       stats->tx_bytes = hw_stats->tx_bytes_ok;
-       stats->tx_packets = drv_stats->tx_frames_ok;
-       stats->tx_dropped = drv_stats->tx_drops;
+       stats->tx_bytes = hw_stats->tx_bytes;
+       stats->tx_packets = hw_stats->tx_frames;
+       stats->tx_dropped = hw_stats->tx_drops;
 
        return stats;
 }
@@ -1450,7 +1489,7 @@ static void nicvf_tx_timeout(struct net_device *dev)
                netdev_warn(dev, "%s: Transmit timed out, resetting\n",
                            dev->name);
 
-       nic->drv_stats.tx_timeout++;
+       this_cpu_inc(nic->drv_stats->tx_timeout);
        schedule_work(&nic->reset_task);
 }
 
@@ -1584,6 +1623,12 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
                goto err_free_netdev;
        }
 
+       nic->drv_stats = netdev_alloc_pcpu_stats(struct nicvf_drv_stats);
+       if (!nic->drv_stats) {
+               err = -ENOMEM;
+               goto err_free_netdev;
+       }
+
        err = nicvf_set_qset_resources(nic);
        if (err)
                goto err_free_netdev;
@@ -1646,6 +1691,8 @@ err_unregister_interrupts:
        nicvf_unregister_interrupts(nic);
 err_free_netdev:
        pci_set_drvdata(pdev, NULL);
+       if (nic->drv_stats)
+               free_percpu(nic->drv_stats);
        free_netdev(netdev);
 err_release_regions:
        pci_release_regions(pdev);
@@ -1673,6 +1720,8 @@ static void nicvf_remove(struct pci_dev *pdev)
                unregister_netdev(pnetdev);
        nicvf_unregister_interrupts(nic);
        pci_set_drvdata(pdev, NULL);
+       if (nic->drv_stats)
+               free_percpu(nic->drv_stats);
        free_netdev(netdev);
        pci_release_regions(pdev);
        pci_disable_device(pdev);
index a4fc501558817639fbc0da5cb9bb388f0f49a51f..d2ac133e36f177aa548a1a5e21964737e4b1e27e 100644 (file)
@@ -104,7 +104,8 @@ static inline int nicvf_alloc_rcv_buffer(struct nicvf *nic, gfp_t gfp,
                nic->rb_page = alloc_pages(gfp | __GFP_COMP | __GFP_NOWARN,
                                           order);
                if (!nic->rb_page) {
-                       nic->drv_stats.rcv_buffer_alloc_failures++;
+                       this_cpu_inc(nic->pnicvf->drv_stats->
+                                    rcv_buffer_alloc_failures);
                        return -ENOMEM;
                }
                nic->rb_page_offset = 0;
@@ -270,7 +271,8 @@ refill:
                              rbdr_idx, new_rb);
 next_rbdr:
        /* Re-enable RBDR interrupts only if buffer allocation is success */
-       if (!nic->rb_alloc_fail && rbdr->enable)
+       if (!nic->rb_alloc_fail && rbdr->enable &&
+           netif_running(nic->pnicvf->netdev))
                nicvf_enable_intr(nic, NICVF_INTR_RBDR, rbdr_idx);
 
        if (rbdr_idx)
@@ -361,6 +363,8 @@ static int nicvf_init_snd_queue(struct nicvf *nic,
 
 static void nicvf_free_snd_queue(struct nicvf *nic, struct snd_queue *sq)
 {
+       struct sk_buff *skb;
+
        if (!sq)
                return;
        if (!sq->dmem.base)
@@ -371,6 +375,15 @@ static void nicvf_free_snd_queue(struct nicvf *nic, struct snd_queue *sq)
                                  sq->dmem.q_len * TSO_HEADER_SIZE,
                                  sq->tso_hdrs, sq->tso_hdrs_phys);
 
+       /* Free pending skbs in the queue */
+       smp_rmb();
+       while (sq->head != sq->tail) {
+               skb = (struct sk_buff *)sq->skbuff[sq->head];
+               if (skb)
+                       dev_kfree_skb_any(skb);
+               sq->head++;
+               sq->head &= (sq->dmem.q_len - 1);
+       }
        kfree(sq->skbuff);
        nicvf_free_q_desc_mem(nic, &sq->dmem);
 }
@@ -483,9 +496,12 @@ static void nicvf_reset_rcv_queue_stats(struct nicvf *nic)
 {
        union nic_mbx mbx = {};
 
-       /* Reset all RXQ's stats */
+       /* Reset all RQ/SQ and VF stats */
        mbx.reset_stat.msg = NIC_MBOX_MSG_RESET_STAT_COUNTER;
+       mbx.reset_stat.rx_stat_mask = 0x3FFF;
+       mbx.reset_stat.tx_stat_mask = 0x1F;
        mbx.reset_stat.rq_stat_mask = 0xFFFF;
+       mbx.reset_stat.sq_stat_mask = 0xFFFF;
        nicvf_send_msg_to_pf(nic, &mbx);
 }
 
@@ -528,19 +544,26 @@ static void nicvf_rcv_queue_config(struct nicvf *nic, struct queue_set *qs,
        nicvf_send_msg_to_pf(nic, &mbx);
 
        mbx.rq.msg = NIC_MBOX_MSG_RQ_BP_CFG;
-       mbx.rq.cfg = (1ULL << 63) | (1ULL << 62) | (qs->vnic_id << 0);
+       mbx.rq.cfg = BIT_ULL(63) | BIT_ULL(62) |
+                    (RQ_PASS_RBDR_LVL << 16) | (RQ_PASS_CQ_LVL << 8) |
+                    (qs->vnic_id << 0);
        nicvf_send_msg_to_pf(nic, &mbx);
 
        /* RQ drop config
         * Enable CQ drop to reserve sufficient CQEs for all tx packets
         */
        mbx.rq.msg = NIC_MBOX_MSG_RQ_DROP_CFG;
-       mbx.rq.cfg = (1ULL << 62) | (RQ_CQ_DROP << 8);
+       mbx.rq.cfg = BIT_ULL(63) | BIT_ULL(62) |
+                    (RQ_PASS_RBDR_LVL << 40) | (RQ_DROP_RBDR_LVL << 32) |
+                    (RQ_PASS_CQ_LVL << 16) | (RQ_DROP_CQ_LVL << 8);
        nicvf_send_msg_to_pf(nic, &mbx);
 
-       nicvf_queue_reg_write(nic, NIC_QSET_RQ_GEN_CFG, 0, 0x00);
-       if (!nic->sqs_mode)
+       if (!nic->sqs_mode && (qidx == 0)) {
+               /* Enable checking L3/L4 length and TCP/UDP checksums */
+               nicvf_queue_reg_write(nic, NIC_QSET_RQ_GEN_CFG, 0,
+                                     (BIT(24) | BIT(23) | BIT(21)));
                nicvf_config_vlan_stripping(nic, nic->netdev->features);
+       }
 
        /* Enable Receive queue */
        memset(&rq_cfg, 0, sizeof(struct rq_cfg));
@@ -631,6 +654,7 @@ static void nicvf_snd_queue_config(struct nicvf *nic, struct queue_set *qs,
        sq_cfg.ldwb = 0;
        sq_cfg.qsize = SND_QSIZE;
        sq_cfg.tstmp_bgx_intf = 0;
+       sq_cfg.cq_limit = 0;
        nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, *(u64 *)&sq_cfg);
 
        /* Set threshold value for interrupt generation */
@@ -1029,7 +1053,7 @@ nicvf_sq_add_hdr_subdesc(struct nicvf *nic, struct snd_queue *sq, int qentry,
                hdr->tso_max_paysize = skb_shinfo(skb)->gso_size;
                /* For non-tunneled pkts, point this to L2 ethertype */
                hdr->inner_l3_offset = skb_network_offset(skb) - 2;
-               nic->drv_stats.tx_tso++;
+               this_cpu_inc(nic->pnicvf->drv_stats->tx_tso);
        }
 }
 
@@ -1161,35 +1185,17 @@ static int nicvf_sq_append_tso(struct nicvf *nic, struct snd_queue *sq,
 
        nicvf_sq_doorbell(nic, skb, sq_num, desc_cnt);
 
-       nic->drv_stats.tx_tso++;
+       this_cpu_inc(nic->pnicvf->drv_stats->tx_tso);
        return 1;
 }
 
 /* Append an skb to a SQ for packet transfer. */
-int nicvf_sq_append_skb(struct nicvf *nic, struct sk_buff *skb)
+int nicvf_sq_append_skb(struct nicvf *nic, struct snd_queue *sq,
+                       struct sk_buff *skb, u8 sq_num)
 {
        int i, size;
        int subdesc_cnt, tso_sqe = 0;
-       int sq_num, qentry;
-       struct queue_set *qs;
-       struct snd_queue *sq;
-
-       sq_num = skb_get_queue_mapping(skb);
-       if (sq_num >= MAX_SND_QUEUES_PER_QS) {
-               /* Get secondary Qset's SQ structure */
-               i = sq_num / MAX_SND_QUEUES_PER_QS;
-               if (!nic->snicvf[i - 1]) {
-                       netdev_warn(nic->netdev,
-                                   "Secondary Qset#%d's ptr not initialized\n",
-                                   i - 1);
-                       return 1;
-               }
-               nic = (struct nicvf *)nic->snicvf[i - 1];
-               sq_num = sq_num % MAX_SND_QUEUES_PER_QS;
-       }
-
-       qs = nic->qs;
-       sq = &qs->sq[sq_num];
+       int qentry;
 
        subdesc_cnt = nicvf_sq_subdesc_required(nic, skb);
        if (subdesc_cnt > atomic_read(&sq->free_cnt))
@@ -1422,8 +1428,6 @@ void nicvf_update_sq_stats(struct nicvf *nic, int sq_idx)
 /* Check for errors in the receive cmp.queue entry */
 int nicvf_check_cqe_rx_errs(struct nicvf *nic, struct cqe_rx_t *cqe_rx)
 {
-       struct nicvf_hw_stats *stats = &nic->hw_stats;
-
        if (!cqe_rx->err_level && !cqe_rx->err_opcode)
                return 0;
 
@@ -1435,76 +1439,76 @@ int nicvf_check_cqe_rx_errs(struct nicvf *nic, struct cqe_rx_t *cqe_rx)
 
        switch (cqe_rx->err_opcode) {
        case CQ_RX_ERROP_RE_PARTIAL:
-               stats->rx_bgx_truncated_pkts++;
+               this_cpu_inc(nic->drv_stats->rx_bgx_truncated_pkts);
                break;
        case CQ_RX_ERROP_RE_JABBER:
-               stats->rx_jabber_errs++;
+               this_cpu_inc(nic->drv_stats->rx_jabber_errs);
                break;
        case CQ_RX_ERROP_RE_FCS:
-               stats->rx_fcs_errs++;
+               this_cpu_inc(nic->drv_stats->rx_fcs_errs);
                break;
        case CQ_RX_ERROP_RE_RX_CTL:
-               stats->rx_bgx_errs++;
+               this_cpu_inc(nic->drv_stats->rx_bgx_errs);
                break;
        case CQ_RX_ERROP_PREL2_ERR:
-               stats->rx_prel2_errs++;
+               this_cpu_inc(nic->drv_stats->rx_prel2_errs);
                break;
        case CQ_RX_ERROP_L2_MAL:
-               stats->rx_l2_hdr_malformed++;
+               this_cpu_inc(nic->drv_stats->rx_l2_hdr_malformed);
                break;
        case CQ_RX_ERROP_L2_OVERSIZE:
-               stats->rx_oversize++;
+               this_cpu_inc(nic->drv_stats->rx_oversize);
                break;
        case CQ_RX_ERROP_L2_UNDERSIZE:
-               stats->rx_undersize++;
+               this_cpu_inc(nic->drv_stats->rx_undersize);
                break;
        case CQ_RX_ERROP_L2_LENMISM:
-               stats->rx_l2_len_mismatch++;
+               this_cpu_inc(nic->drv_stats->rx_l2_len_mismatch);
                break;
        case CQ_RX_ERROP_L2_PCLP:
-               stats->rx_l2_pclp++;
+               this_cpu_inc(nic->drv_stats->rx_l2_pclp);
                break;
        case CQ_RX_ERROP_IP_NOT:
-               stats->rx_ip_ver_errs++;
+               this_cpu_inc(nic->drv_stats->rx_ip_ver_errs);
                break;
        case CQ_RX_ERROP_IP_CSUM_ERR:
-               stats->rx_ip_csum_errs++;
+               this_cpu_inc(nic->drv_stats->rx_ip_csum_errs);
                break;
        case CQ_RX_ERROP_IP_MAL:
-               stats->rx_ip_hdr_malformed++;
+               this_cpu_inc(nic->drv_stats->rx_ip_hdr_malformed);
                break;
        case CQ_RX_ERROP_IP_MALD:
-               stats->rx_ip_payload_malformed++;
+               this_cpu_inc(nic->drv_stats->rx_ip_payload_malformed);
                break;
        case CQ_RX_ERROP_IP_HOP:
-               stats->rx_ip_ttl_errs++;
+               this_cpu_inc(nic->drv_stats->rx_ip_ttl_errs);
                break;
        case CQ_RX_ERROP_L3_PCLP:
-               stats->rx_l3_pclp++;
+               this_cpu_inc(nic->drv_stats->rx_l3_pclp);
                break;
        case CQ_RX_ERROP_L4_MAL:
-               stats->rx_l4_malformed++;
+               this_cpu_inc(nic->drv_stats->rx_l4_malformed);
                break;
        case CQ_RX_ERROP_L4_CHK:
-               stats->rx_l4_csum_errs++;
+               this_cpu_inc(nic->drv_stats->rx_l4_csum_errs);
                break;
        case CQ_RX_ERROP_UDP_LEN:
-               stats->rx_udp_len_errs++;
+               this_cpu_inc(nic->drv_stats->rx_udp_len_errs);
                break;
        case CQ_RX_ERROP_L4_PORT:
-               stats->rx_l4_port_errs++;
+               this_cpu_inc(nic->drv_stats->rx_l4_port_errs);
                break;
        case CQ_RX_ERROP_TCP_FLAG:
-               stats->rx_tcp_flag_errs++;
+               this_cpu_inc(nic->drv_stats->rx_tcp_flag_errs);
                break;
        case CQ_RX_ERROP_TCP_OFFSET:
-               stats->rx_tcp_offset_errs++;
+               this_cpu_inc(nic->drv_stats->rx_tcp_offset_errs);
                break;
        case CQ_RX_ERROP_L4_PCLP:
-               stats->rx_l4_pclp++;
+               this_cpu_inc(nic->drv_stats->rx_l4_pclp);
                break;
        case CQ_RX_ERROP_RBDR_TRUNC:
-               stats->rx_truncated_pkts++;
+               this_cpu_inc(nic->drv_stats->rx_truncated_pkts);
                break;
        }
 
@@ -1512,53 +1516,52 @@ int nicvf_check_cqe_rx_errs(struct nicvf *nic, struct cqe_rx_t *cqe_rx)
 }
 
 /* Check for errors in the send cmp.queue entry */
-int nicvf_check_cqe_tx_errs(struct nicvf *nic,
-                           struct cmp_queue *cq, struct cqe_send_t *cqe_tx)
+int nicvf_check_cqe_tx_errs(struct nicvf *nic, struct cqe_send_t *cqe_tx)
 {
-       struct cmp_queue_stats *stats = &cq->stats;
-
        switch (cqe_tx->send_status) {
        case CQ_TX_ERROP_GOOD:
-               stats->tx.good++;
                return 0;
        case CQ_TX_ERROP_DESC_FAULT:
-               stats->tx.desc_fault++;
+               this_cpu_inc(nic->drv_stats->tx_desc_fault);
                break;
        case CQ_TX_ERROP_HDR_CONS_ERR:
-               stats->tx.hdr_cons_err++;
+               this_cpu_inc(nic->drv_stats->tx_hdr_cons_err);
                break;
        case CQ_TX_ERROP_SUBDC_ERR:
-               stats->tx.subdesc_err++;
+               this_cpu_inc(nic->drv_stats->tx_subdesc_err);
+               break;
+       case CQ_TX_ERROP_MAX_SIZE_VIOL:
+               this_cpu_inc(nic->drv_stats->tx_max_size_exceeded);
                break;
        case CQ_TX_ERROP_IMM_SIZE_OFLOW:
-               stats->tx.imm_size_oflow++;
+               this_cpu_inc(nic->drv_stats->tx_imm_size_oflow);
                break;
        case CQ_TX_ERROP_DATA_SEQUENCE_ERR:
-               stats->tx.data_seq_err++;
+               this_cpu_inc(nic->drv_stats->tx_data_seq_err);
                break;
        case CQ_TX_ERROP_MEM_SEQUENCE_ERR:
-               stats->tx.mem_seq_err++;
+               this_cpu_inc(nic->drv_stats->tx_mem_seq_err);
                break;
        case CQ_TX_ERROP_LOCK_VIOL:
-               stats->tx.lock_viol++;
+               this_cpu_inc(nic->drv_stats->tx_lock_viol);
                break;
        case CQ_TX_ERROP_DATA_FAULT:
-               stats->tx.data_fault++;
+               this_cpu_inc(nic->drv_stats->tx_data_fault);
                break;
        case CQ_TX_ERROP_TSTMP_CONFLICT:
-               stats->tx.tstmp_conflict++;
+               this_cpu_inc(nic->drv_stats->tx_tstmp_conflict);
                break;
        case CQ_TX_ERROP_TSTMP_TIMEOUT:
-               stats->tx.tstmp_timeout++;
+               this_cpu_inc(nic->drv_stats->tx_tstmp_timeout);
                break;
        case CQ_TX_ERROP_MEM_FAULT:
-               stats->tx.mem_fault++;
+               this_cpu_inc(nic->drv_stats->tx_mem_fault);
                break;
        case CQ_TX_ERROP_CK_OVERLAP:
-               stats->tx.csum_overlap++;
+               this_cpu_inc(nic->drv_stats->tx_csum_overlap);
                break;
        case CQ_TX_ERROP_CK_OFLOW:
-               stats->tx.csum_overflow++;
+               this_cpu_inc(nic->drv_stats->tx_csum_overflow);
                break;
        }
 
index 869f3386028b1e765c4860cf86560f1f6b564b42..9e2104675bc9dc0cbae020f73476d04836c01870 100644 (file)
 
 #define MAX_CQES_FOR_TX                ((SND_QUEUE_LEN / MIN_SQ_DESC_PER_PKT_XMIT) * \
                                 MAX_CQE_PER_PKT_XMIT)
-/* Calculate number of CQEs to reserve for all SQEs.
- * Its 1/256th level of CQ size.
- * '+ 1' to account for pipelining
+
+/* RED and Backpressure levels of CQ for pkt reception
+ * For CQ, level is a measure of emptiness i.e 0x0 means full
+ * eg: For CQ of size 4K, and for pass/drop levels of 160/144
+ * HW accepts pkt if unused CQE >= 2560
+ * RED accepts pkt if unused CQE < 2304 & >= 2560
+ * DROPs pkts if unused CQE < 2304
+ */
+#define RQ_PASS_CQ_LVL         160ULL
+#define RQ_DROP_CQ_LVL         144ULL
+
+/* RED and Backpressure levels of RBDR for pkt reception
+ * For RBDR, level is a measure of fullness i.e 0x0 means empty
+ * eg: For RBDR of size 8K, and for pass/drop levels of 4/0
+ * HW accepts pkt if unused RBs >= 256
+ * RED accepts pkt if unused RBs < 256 & >= 0
+ * DROPs pkts if unused RBs < 0
  */
-#define RQ_CQ_DROP             ((256 / (CMP_QUEUE_LEN / \
-                                (CMP_QUEUE_LEN - MAX_CQES_FOR_TX))) + 1)
+#define RQ_PASS_RBDR_LVL       8ULL
+#define RQ_DROP_RBDR_LVL       0ULL
 
 /* Descriptor size in bytes */
 #define SND_QUEUE_DESC_SIZE    16
@@ -158,6 +172,7 @@ enum CQ_TX_ERROP_E {
        CQ_TX_ERROP_DESC_FAULT = 0x10,
        CQ_TX_ERROP_HDR_CONS_ERR = 0x11,
        CQ_TX_ERROP_SUBDC_ERR = 0x12,
+       CQ_TX_ERROP_MAX_SIZE_VIOL = 0x13,
        CQ_TX_ERROP_IMM_SIZE_OFLOW = 0x80,
        CQ_TX_ERROP_DATA_SEQUENCE_ERR = 0x81,
        CQ_TX_ERROP_MEM_SEQUENCE_ERR = 0x82,
@@ -171,25 +186,6 @@ enum CQ_TX_ERROP_E {
        CQ_TX_ERROP_ENUM_LAST = 0x8a,
 };
 
-struct cmp_queue_stats {
-       struct tx_stats {
-               u64 good;
-               u64 desc_fault;
-               u64 hdr_cons_err;
-               u64 subdesc_err;
-               u64 imm_size_oflow;
-               u64 data_seq_err;
-               u64 mem_seq_err;
-               u64 lock_viol;
-               u64 data_fault;
-               u64 tstmp_conflict;
-               u64 tstmp_timeout;
-               u64 mem_fault;
-               u64 csum_overlap;
-               u64 csum_overflow;
-       } tx;
-} ____cacheline_aligned_in_smp;
-
 enum RQ_SQ_STATS {
        RQ_SQ_STATS_OCTS,
        RQ_SQ_STATS_PKTS,
@@ -241,7 +237,6 @@ struct cmp_queue {
        spinlock_t      lock;  /* lock to serialize processing CQEs */
        void            *desc;
        struct q_desc_mem   dmem;
-       struct cmp_queue_stats  stats;
        int             irq;
 } ____cacheline_aligned_in_smp;
 
@@ -311,7 +306,8 @@ void nicvf_sq_disable(struct nicvf *nic, int qidx);
 void nicvf_put_sq_desc(struct snd_queue *sq, int desc_cnt);
 void nicvf_sq_free_used_descs(struct net_device *netdev,
                              struct snd_queue *sq, int qidx);
-int nicvf_sq_append_skb(struct nicvf *nic, struct sk_buff *skb);
+int nicvf_sq_append_skb(struct nicvf *nic, struct snd_queue *sq,
+                       struct sk_buff *skb, u8 sq_num);
 
 struct sk_buff *nicvf_get_rcv_skb(struct nicvf *nic, struct cqe_rx_t *cqe_rx);
 void nicvf_rbdr_task(unsigned long data);
@@ -336,6 +332,5 @@ u64  nicvf_queue_reg_read(struct nicvf *nic,
 void nicvf_update_rq_stats(struct nicvf *nic, int rq_idx);
 void nicvf_update_sq_stats(struct nicvf *nic, int sq_idx);
 int nicvf_check_cqe_rx_errs(struct nicvf *nic, struct cqe_rx_t *cqe_rx);
-int nicvf_check_cqe_tx_errs(struct nicvf *nic,
-                           struct cmp_queue *cq, struct cqe_send_t *cqe_tx);
+int nicvf_check_cqe_tx_errs(struct nicvf *nic, struct cqe_send_t *cqe_tx);
 #endif /* NICVF_QUEUES_H */
index 9e6d9876bfd0488361cc130b90fa7297fb9db0fb..f36347237a5425c7baf48540ad3fa2fd555b882d 100644 (file)
@@ -624,7 +624,9 @@ struct cq_cfg {
 
 struct sq_cfg {
 #if defined(__BIG_ENDIAN_BITFIELD)
-       u64 reserved_20_63:44;
+       u64 reserved_32_63:32;
+       u64 cq_limit:8;
+       u64 reserved_20_23:4;
        u64 ena:1;
        u64 reserved_18_18:1;
        u64 reset:1;
@@ -642,7 +644,9 @@ struct sq_cfg {
        u64 reset:1;
        u64 reserved_18_18:1;
        u64 ena:1;
-       u64 reserved_20_63:44;
+       u64 reserved_20_23:4;
+       u64 cq_limit:8;
+       u64 reserved_32_63:32;
 #endif
 };
 
index 8bbaedbb7b946353470f4bb57138f542025b068b..9211c750e0642660bfdd79bb023f9058a89c18af 100644 (file)
@@ -161,6 +161,7 @@ void bgx_get_lmac_link_state(int node, int bgx_idx, int lmacid, void *status)
                return;
 
        lmac = &bgx->lmac[lmacid];
+       link->mac_type = lmac->lmac_type;
        link->link_up = lmac->link_up;
        link->duplex = lmac->last_duplex;
        link->speed = lmac->last_speed;
@@ -211,6 +212,47 @@ void bgx_lmac_rx_tx_enable(int node, int bgx_idx, int lmacid, bool enable)
 }
 EXPORT_SYMBOL(bgx_lmac_rx_tx_enable);
 
+void bgx_lmac_get_pfc(int node, int bgx_idx, int lmacid, void *pause)
+{
+       struct pfc *pfc = (struct pfc *)pause;
+       struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx];
+       struct lmac *lmac;
+       u64 cfg;
+
+       if (!bgx)
+               return;
+       lmac = &bgx->lmac[lmacid];
+       if (lmac->is_sgmii)
+               return;
+
+       cfg = bgx_reg_read(bgx, lmacid, BGX_SMUX_CBFC_CTL);
+       pfc->fc_rx = cfg & RX_EN;
+       pfc->fc_tx = cfg & TX_EN;
+       pfc->autoneg = 0;
+}
+EXPORT_SYMBOL(bgx_lmac_get_pfc);
+
+void bgx_lmac_set_pfc(int node, int bgx_idx, int lmacid, void *pause)
+{
+       struct pfc *pfc = (struct pfc *)pause;
+       struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx];
+       struct lmac *lmac;
+       u64 cfg;
+
+       if (!bgx)
+               return;
+       lmac = &bgx->lmac[lmacid];
+       if (lmac->is_sgmii)
+               return;
+
+       cfg = bgx_reg_read(bgx, lmacid, BGX_SMUX_CBFC_CTL);
+       cfg &= ~(RX_EN | TX_EN);
+       cfg |= (pfc->fc_rx ? RX_EN : 0x00);
+       cfg |= (pfc->fc_tx ? TX_EN : 0x00);
+       bgx_reg_write(bgx, lmacid, BGX_SMUX_CBFC_CTL, cfg);
+}
+EXPORT_SYMBOL(bgx_lmac_set_pfc);
+
 static void bgx_sgmii_change_link_state(struct lmac *lmac)
 {
        struct bgx *bgx = lmac->bgx;
@@ -524,6 +566,18 @@ static int bgx_lmac_xaui_init(struct bgx *bgx, struct lmac *lmac)
        cfg |= SMU_TX_CTL_DIC_EN;
        bgx_reg_write(bgx, lmacid, BGX_SMUX_TX_CTL, cfg);
 
+       /* Enable receive and transmission of pause frames */
+       bgx_reg_write(bgx, lmacid, BGX_SMUX_CBFC_CTL, ((0xffffULL << 32) |
+                     BCK_EN | DRP_EN | TX_EN | RX_EN));
+       /* Configure pause time and interval */
+       bgx_reg_write(bgx, lmacid,
+                     BGX_SMUX_TX_PAUSE_PKT_TIME, DEFAULT_PAUSE_TIME);
+       cfg = bgx_reg_read(bgx, lmacid, BGX_SMUX_TX_PAUSE_PKT_INTERVAL);
+       cfg &= ~0xFFFFull;
+       bgx_reg_write(bgx, lmacid, BGX_SMUX_TX_PAUSE_PKT_INTERVAL,
+                     cfg | (DEFAULT_PAUSE_TIME - 0x1000));
+       bgx_reg_write(bgx, lmacid, BGX_SMUX_TX_PAUSE_ZERO, 0x01);
+
        /* take lmac_count into account */
        bgx_reg_modify(bgx, lmacid, BGX_SMUX_TX_THRESH, (0x100 - 1));
        /* max packet size */
@@ -970,11 +1024,25 @@ static void bgx_set_lmac_config(struct bgx *bgx, u8 idx)
                lmac_set_training(bgx, lmac, lmac->lmacid);
                lmac_set_lane2sds(bgx, lmac);
 
-               /* Set LMAC type of other lmac on same DLM i.e LMAC 1/3 */
                olmac = &bgx->lmac[idx + 1];
-               olmac->lmac_type = lmac->lmac_type;
+               /*  Check if other LMAC on the same DLM is already configured by
+                *  firmware, if so use the same config or else set as same, as
+                *  that of LMAC 0/2.
+                *  This check is needed as on 80xx only one lane of each of the
+                *  DLM of BGX0 is used, so have to rely on firmware for
+                *  distingushing 80xx from 81xx.
+                */
+               cmr_cfg = bgx_reg_read(bgx, idx + 1, BGX_CMRX_CFG);
+               lmac_type = (u8)((cmr_cfg >> 8) & 0x07);
+               lane_to_sds = (u8)(cmr_cfg & 0xFF);
+               if ((lmac_type == 0) && (lane_to_sds == 0xE4)) {
+                       olmac->lmac_type = lmac->lmac_type;
+                       lmac_set_lane2sds(bgx, olmac);
+               } else {
+                       olmac->lmac_type = lmac_type;
+                       olmac->lane_to_sds = lane_to_sds;
+               }
                lmac_set_training(bgx, olmac, olmac->lmacid);
-               lmac_set_lane2sds(bgx, olmac);
        }
 }
 
@@ -1242,8 +1310,8 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
        pci_read_config_word(pdev, PCI_DEVICE_ID, &sdevid);
        if (sdevid != PCI_DEVICE_ID_THUNDER_RGX) {
-               bgx->bgx_id =
-                   (pci_resource_start(pdev, PCI_CFG_REG_BAR_NUM) >> 24) & 1;
+               bgx->bgx_id = (pci_resource_start(pdev,
+                       PCI_CFG_REG_BAR_NUM) >> 24) & BGX_ID_MASK;
                bgx->bgx_id += nic_get_node_id(pdev) * MAX_BGX_PER_NODE;
                bgx->max_lmac = MAX_LMAC_PER_BGX;
                bgx_vnic[bgx->bgx_id] = bgx;
index d59c71e4a0008bb576c1c3a6e192eb225381c82a..c18ebfeb203919ea9b16e406bb678bd05a98a656 100644 (file)
@@ -27,6 +27,9 @@
 #define    MAX_BGX_CHANS_PER_LMAC              16
 #define    MAX_DMAC_PER_LMAC                   8
 #define    MAX_FRAME_SIZE                      9216
+#define    DEFAULT_PAUSE_TIME                  0xFFFF
+
+#define           BGX_ID_MASK                          0x3
 
 #define    MAX_DMAC_PER_LMAC_TNS_BYPASS_MODE   2
 
 #define  SMU_RX_CTL_STATUS                     (3ull << 0)
 #define BGX_SMUX_TX_APPEND             0x20100
 #define  SMU_TX_APPEND_FCS_D                   BIT_ULL(2)
+#define BGX_SMUX_TX_PAUSE_PKT_TIME     0x20110
 #define BGX_SMUX_TX_MIN_PKT            0x20118
+#define BGX_SMUX_TX_PAUSE_PKT_INTERVAL 0x20120
+#define BGX_SMUX_TX_PAUSE_ZERO         0x20138
 #define BGX_SMUX_TX_INT                        0x20140
 #define BGX_SMUX_TX_CTL                        0x20178
 #define  SMU_TX_CTL_DIC_EN                     BIT_ULL(0)
 #define BGX_SMUX_CTL                   0x20200
 #define  SMU_CTL_RX_IDLE                       BIT_ULL(0)
 #define  SMU_CTL_TX_IDLE                       BIT_ULL(1)
+#define        BGX_SMUX_CBFC_CTL               0x20218
+#define        RX_EN                                   BIT_ULL(0)
+#define        TX_EN                                   BIT_ULL(1)
+#define        BCK_EN                                  BIT_ULL(2)
+#define        DRP_EN                                  BIT_ULL(3)
 
 #define BGX_GMP_PCS_MRX_CTL            0x30000
 #define         PCS_MRX_CTL_RST_AN                     BIT_ULL(9)
@@ -205,6 +216,9 @@ void bgx_set_lmac_mac(int node, int bgx_idx, int lmacid, const u8 *mac);
 void bgx_get_lmac_link_state(int node, int bgx_idx, int lmacid, void *status);
 void bgx_lmac_internal_loopback(int node, int bgx_idx,
                                int lmac_idx, bool enable);
+void bgx_lmac_get_pfc(int node, int bgx_idx, int lmacid, void *pause);
+void bgx_lmac_set_pfc(int node, int bgx_idx, int lmacid, void *pause);
+
 void xcv_init_hw(void);
 void xcv_setup_link(bool link_up, int link_speed);
 
index 2125903043fbb12c08f29d0f6f9e3f56e81e222b..0bce1bf9ca0fc587f84b8f2f5f8bd28227b22362 100644 (file)
@@ -635,6 +635,7 @@ struct tx_sw_desc;
 
 struct sge_txq {
        unsigned int  in_use;       /* # of in-use Tx descriptors */
+       unsigned int  q_type;       /* Q type Eth/Ctrl/Ofld */
        unsigned int  size;         /* # of descriptors */
        unsigned int  cidx;         /* SW consumer index */
        unsigned int  pidx;         /* producer index */
@@ -665,7 +666,7 @@ struct sge_eth_txq {                /* state for an SGE Ethernet Tx queue */
        unsigned long mapping_err;  /* # of I/O MMU packet mapping errors */
 } ____cacheline_aligned_in_smp;
 
-struct sge_ofld_txq {               /* state for an SGE offload Tx queue */
+struct sge_uld_txq {               /* state for an SGE offload Tx queue */
        struct sge_txq q;
        struct adapter *adap;
        struct sk_buff_head sendq;  /* list of backpressured packets */
@@ -693,14 +694,20 @@ struct sge_uld_rxq_info {
        u8 uld;                 /* uld type */
 };
 
+struct sge_uld_txq_info {
+       struct sge_uld_txq *uldtxq; /* Txq's for ULD */
+       atomic_t users;         /* num users */
+       u16 ntxq;               /* # of egress uld queues */
+};
+
 struct sge {
        struct sge_eth_txq ethtxq[MAX_ETH_QSETS];
-       struct sge_ofld_txq ofldtxq[MAX_OFLD_QSETS];
        struct sge_ctrl_txq ctrlq[MAX_CTRL_QUEUES];
 
        struct sge_eth_rxq ethrxq[MAX_ETH_QSETS];
        struct sge_rspq fw_evtq ____cacheline_aligned_in_smp;
        struct sge_uld_rxq_info **uld_rxq_info;
+       struct sge_uld_txq_info **uld_txq_info;
 
        struct sge_rspq intrq ____cacheline_aligned_in_smp;
        spinlock_t intrq_lock;
@@ -1298,8 +1305,9 @@ int t4_sge_alloc_ctrl_txq(struct adapter *adap, struct sge_ctrl_txq *txq,
                          unsigned int cmplqid);
 int t4_sge_mod_ctrl_txq(struct adapter *adap, unsigned int eqid,
                        unsigned int cmplqid);
-int t4_sge_alloc_ofld_txq(struct adapter *adap, struct sge_ofld_txq *txq,
-                         struct net_device *dev, unsigned int iqid);
+int t4_sge_alloc_uld_txq(struct adapter *adap, struct sge_uld_txq *txq,
+                        struct net_device *dev, unsigned int iqid,
+                        unsigned int uld_type);
 irqreturn_t t4_sge_intr_msix(int irq, void *cookie);
 int t4_sge_init(struct adapter *adap);
 void t4_sge_start(struct adapter *adap);
@@ -1661,4 +1669,7 @@ int t4_uld_mem_alloc(struct adapter *adap);
 void t4_uld_clean_up(struct adapter *adap);
 void t4_register_netevent_notifier(void);
 void free_rspq_fl(struct adapter *adap, struct sge_rspq *rq, struct sge_fl *fl);
+void free_tx_desc(struct adapter *adap, struct sge_txq *q,
+                 unsigned int n, bool unmap);
+void free_txq(struct adapter *adap, struct sge_txq *q);
 #endif /* __CXGB4_H__ */
index 20455d082cb80297a16fc8d50fb8ad9c20083116..acc231293e4d342c07b98f529e83956e4d727177 100644 (file)
@@ -2512,18 +2512,6 @@ do { \
                RL("FLLow:", fl.low);
                RL("FLStarving:", fl.starving);
 
-       } else if (ofld_idx < ofld_entries) {
-               const struct sge_ofld_txq *tx =
-                       &adap->sge.ofldtxq[ofld_idx * 4];
-               int n = min(4, adap->sge.ofldqsets - 4 * ofld_idx);
-
-               S("QType:", "OFLD-Txq");
-               T("TxQ ID:", q.cntxt_id);
-               T("TxQ size:", q.size);
-               T("TxQ inuse:", q.in_use);
-               T("TxQ CIDX:", q.cidx);
-               T("TxQ PIDX:", q.pidx);
-
        } else if (ctrl_idx < ctrl_entries) {
                const struct sge_ctrl_txq *tx = &adap->sge.ctrlq[ctrl_idx * 4];
                int n = min(4, adap->params.nports - 4 * ctrl_idx);
index b0bb23f95beb4c036d0f5e9ea720ac2153fcc337..449884f8dd67267eddc530006d56144b0d6c814f 100644 (file)
@@ -530,15 +530,15 @@ static int fwevtq_handler(struct sge_rspq *q, const __be64 *rsp,
 
                txq = q->adap->sge.egr_map[qid - q->adap->sge.egr_start];
                txq->restarts++;
-               if ((u8 *)txq < (u8 *)q->adap->sge.ofldtxq) {
+               if (txq->q_type == CXGB4_TXQ_ETH) {
                        struct sge_eth_txq *eq;
 
                        eq = container_of(txq, struct sge_eth_txq, q);
                        netif_tx_wake_queue(eq->txq);
                } else {
-                       struct sge_ofld_txq *oq;
+                       struct sge_uld_txq *oq;
 
-                       oq = container_of(txq, struct sge_ofld_txq, q);
+                       oq = container_of(txq, struct sge_uld_txq, q);
                        tasklet_schedule(&oq->qresume_tsk);
                }
        } else if (opcode == CPL_FW6_MSG || opcode == CPL_FW4_MSG) {
@@ -885,15 +885,6 @@ static int setup_sge_queues(struct adapter *adap)
                }
        }
 
-       j = s->ofldqsets / adap->params.nports; /* iscsi queues per channel */
-       for_each_ofldtxq(s, i) {
-               err = t4_sge_alloc_ofld_txq(adap, &s->ofldtxq[i],
-                                           adap->port[i / j],
-                                           s->fw_evtq.cntxt_id);
-               if (err)
-                       goto freeout;
-       }
-
        for_each_port(adap, i) {
                /* Note that cmplqid below is 0 if we don't
                 * have RDMA queues, and that's the right value.
@@ -1922,8 +1913,18 @@ static void disable_dbs(struct adapter *adap)
 
        for_each_ethrxq(&adap->sge, i)
                disable_txq_db(&adap->sge.ethtxq[i].q);
-       for_each_ofldtxq(&adap->sge, i)
-               disable_txq_db(&adap->sge.ofldtxq[i].q);
+       if (is_offload(adap)) {
+               struct sge_uld_txq_info *txq_info =
+                       adap->sge.uld_txq_info[CXGB4_TX_OFLD];
+
+               if (txq_info) {
+                       for_each_ofldtxq(&adap->sge, i) {
+                               struct sge_uld_txq *txq = &txq_info->uldtxq[i];
+
+                               disable_txq_db(&txq->q);
+                       }
+               }
+       }
        for_each_port(adap, i)
                disable_txq_db(&adap->sge.ctrlq[i].q);
 }
@@ -1934,8 +1935,18 @@ static void enable_dbs(struct adapter *adap)
 
        for_each_ethrxq(&adap->sge, i)
                enable_txq_db(adap, &adap->sge.ethtxq[i].q);
-       for_each_ofldtxq(&adap->sge, i)
-               enable_txq_db(adap, &adap->sge.ofldtxq[i].q);
+       if (is_offload(adap)) {
+               struct sge_uld_txq_info *txq_info =
+                       adap->sge.uld_txq_info[CXGB4_TX_OFLD];
+
+               if (txq_info) {
+                       for_each_ofldtxq(&adap->sge, i) {
+                               struct sge_uld_txq *txq = &txq_info->uldtxq[i];
+
+                               enable_txq_db(adap, &txq->q);
+                       }
+               }
+       }
        for_each_port(adap, i)
                enable_txq_db(adap, &adap->sge.ctrlq[i].q);
 }
@@ -2006,8 +2017,17 @@ static void recover_all_queues(struct adapter *adap)
 
        for_each_ethrxq(&adap->sge, i)
                sync_txq_pidx(adap, &adap->sge.ethtxq[i].q);
-       for_each_ofldtxq(&adap->sge, i)
-               sync_txq_pidx(adap, &adap->sge.ofldtxq[i].q);
+       if (is_offload(adap)) {
+               struct sge_uld_txq_info *txq_info =
+                       adap->sge.uld_txq_info[CXGB4_TX_OFLD];
+               if (txq_info) {
+                       for_each_ofldtxq(&adap->sge, i) {
+                               struct sge_uld_txq *txq = &txq_info->uldtxq[i];
+
+                               sync_txq_pidx(adap, &txq->q);
+                       }
+               }
+       }
        for_each_port(adap, i)
                sync_txq_pidx(adap, &adap->sge.ctrlq[i].q);
 }
@@ -3991,7 +4011,7 @@ static inline bool is_x_10g_port(const struct link_config *lc)
 static void cfg_queues(struct adapter *adap)
 {
        struct sge *s = &adap->sge;
-       int i, n10g = 0, qidx = 0;
+       int i = 0, n10g = 0, qidx = 0;
 #ifndef CONFIG_CHELSIO_T4_DCB
        int q10g = 0;
 #endif
@@ -4006,8 +4026,7 @@ static void cfg_queues(struct adapter *adap)
                adap->params.crypto = 0;
        }
 
-       for_each_port(adap, i)
-               n10g += is_x_10g_port(&adap2pinfo(adap, i)->link_cfg);
+       n10g += is_x_10g_port(&adap2pinfo(adap, i)->link_cfg);
 #ifdef CONFIG_CHELSIO_T4_DCB
        /* For Data Center Bridging support we need to be able to support up
         * to 8 Traffic Priorities; each of which will be assigned to its
@@ -4055,7 +4074,7 @@ static void cfg_queues(struct adapter *adap)
                 * capped by the number of available cores.
                 */
                if (n10g) {
-                       i = num_online_cpus();
+                       i = min_t(int, MAX_OFLD_QSETS, num_online_cpus());
                        s->ofldqsets = roundup(i, adap->params.nports);
                } else {
                        s->ofldqsets = adap->params.nports;
@@ -4075,9 +4094,6 @@ static void cfg_queues(struct adapter *adap)
        for (i = 0; i < ARRAY_SIZE(s->ctrlq); i++)
                s->ctrlq[i].q.size = 512;
 
-       for (i = 0; i < ARRAY_SIZE(s->ofldtxq); i++)
-               s->ofldtxq[i].q.size = 1024;
-
        init_rspq(adap, &s->fw_evtq, 0, 1, 1024, 64);
        init_rspq(adap, &s->intrq, 0, 1, 512, 64);
 }
index 0945fa49a5dd83251af4083535b27f081ae277b0..8098902c094a1d6e9e340dfbd54102079823e488 100644 (file)
@@ -135,15 +135,17 @@ static int uldrx_handler(struct sge_rspq *q, const __be64 *rsp,
 }
 
 static int alloc_uld_rxqs(struct adapter *adap,
-                         struct sge_uld_rxq_info *rxq_info,
-                         unsigned int nq, unsigned int offset, bool lro)
+                         struct sge_uld_rxq_info *rxq_info, bool lro)
 {
        struct sge *s = &adap->sge;
-       struct sge_ofld_rxq *q = rxq_info->uldrxq + offset;
-       unsigned short *ids = rxq_info->rspq_id + offset;
-       unsigned int per_chan = nq / adap->params.nports;
+       unsigned int nq = rxq_info->nrxq + rxq_info->nciq;
+       struct sge_ofld_rxq *q = rxq_info->uldrxq;
+       unsigned short *ids = rxq_info->rspq_id;
        unsigned int bmap_idx = 0;
-       int i, err, msi_idx;
+       unsigned int per_chan;
+       int i, err, msi_idx, que_idx = 0;
+
+       per_chan = rxq_info->nrxq / adap->params.nports;
 
        if (adap->flags & USING_MSIX)
                msi_idx = 1;
@@ -151,12 +153,18 @@ static int alloc_uld_rxqs(struct adapter *adap,
                msi_idx = -((int)s->intrq.abs_id + 1);
 
        for (i = 0; i < nq; i++, q++) {
+               if (i == rxq_info->nrxq) {
+                       /* start allocation of concentrator queues */
+                       per_chan = rxq_info->nciq / adap->params.nports;
+                       que_idx = 0;
+               }
+
                if (msi_idx >= 0) {
                        bmap_idx = get_msix_idx_from_bmap(adap);
                        msi_idx = adap->msix_info_ulds[bmap_idx].idx;
                }
                err = t4_sge_alloc_rxq(adap, &q->rspq, false,
-                                      adap->port[i / per_chan],
+                                      adap->port[que_idx++ / per_chan],
                                       msi_idx,
                                       q->fl.size ? &q->fl : NULL,
                                       uldrx_handler,
@@ -165,29 +173,19 @@ static int alloc_uld_rxqs(struct adapter *adap,
                if (err)
                        goto freeout;
                if (msi_idx >= 0)
-                       rxq_info->msix_tbl[i + offset] = bmap_idx;
+                       rxq_info->msix_tbl[i] = bmap_idx;
                memset(&q->stats, 0, sizeof(q->stats));
                if (ids)
                        ids[i] = q->rspq.abs_id;
        }
        return 0;
 freeout:
-       q = rxq_info->uldrxq + offset;
+       q = rxq_info->uldrxq;
        for ( ; i; i--, q++) {
                if (q->rspq.desc)
                        free_rspq_fl(adap, &q->rspq,
                                     q->fl.size ? &q->fl : NULL);
        }
-
-       /* We need to free rxq also in case of ciq allocation failure */
-       if (offset) {
-               q = rxq_info->uldrxq + offset;
-               for ( ; i; i--, q++) {
-                       if (q->rspq.desc)
-                               free_rspq_fl(adap, &q->rspq,
-                                            q->fl.size ? &q->fl : NULL);
-               }
-       }
        return err;
 }
 
@@ -205,9 +203,7 @@ setup_sge_queues_uld(struct adapter *adap, unsigned int uld_type, bool lro)
                        return -ENOMEM;
        }
 
-       ret = !(!alloc_uld_rxqs(adap, rxq_info, rxq_info->nrxq, 0, lro) &&
-                !alloc_uld_rxqs(adap, rxq_info, rxq_info->nciq,
-                                rxq_info->nrxq, lro));
+       ret = !(!alloc_uld_rxqs(adap, rxq_info, lro));
 
        /* Tell uP to route control queue completions to rdma rspq */
        if (adap->flags & FULL_INIT_DONE &&
@@ -451,6 +447,106 @@ static void quiesce_rx_uld(struct adapter *adap, unsigned int uld_type)
                quiesce_rx(adap, &rxq_info->uldrxq[idx].rspq);
 }
 
+static void
+free_sge_txq_uld(struct adapter *adap, struct sge_uld_txq_info *txq_info)
+{
+       int nq = txq_info->ntxq;
+       int i;
+
+       for (i = 0; i < nq; i++) {
+               struct sge_uld_txq *txq = &txq_info->uldtxq[i];
+
+               if (txq && txq->q.desc) {
+                       tasklet_kill(&txq->qresume_tsk);
+                       t4_ofld_eq_free(adap, adap->mbox, adap->pf, 0,
+                                       txq->q.cntxt_id);
+                       free_tx_desc(adap, &txq->q, txq->q.in_use, false);
+                       kfree(txq->q.sdesc);
+                       __skb_queue_purge(&txq->sendq);
+                       free_txq(adap, &txq->q);
+               }
+       }
+}
+
+static int
+alloc_sge_txq_uld(struct adapter *adap, struct sge_uld_txq_info *txq_info,
+                 unsigned int uld_type)
+{
+       struct sge *s = &adap->sge;
+       int nq = txq_info->ntxq;
+       int i, j, err;
+
+       j = nq / adap->params.nports;
+       for (i = 0; i < nq; i++) {
+               struct sge_uld_txq *txq = &txq_info->uldtxq[i];
+
+               txq->q.size = 1024;
+               err = t4_sge_alloc_uld_txq(adap, txq, adap->port[i / j],
+                                          s->fw_evtq.cntxt_id, uld_type);
+               if (err)
+                       goto freeout;
+       }
+       return 0;
+freeout:
+       free_sge_txq_uld(adap, txq_info);
+       return err;
+}
+
+static void
+release_sge_txq_uld(struct adapter *adap, unsigned int uld_type)
+{
+       struct sge_uld_txq_info *txq_info = NULL;
+       int tx_uld_type = TX_ULD(uld_type);
+
+       txq_info = adap->sge.uld_txq_info[tx_uld_type];
+
+       if (txq_info && atomic_dec_and_test(&txq_info->users)) {
+               free_sge_txq_uld(adap, txq_info);
+               kfree(txq_info->uldtxq);
+               kfree(txq_info);
+               adap->sge.uld_txq_info[tx_uld_type] = NULL;
+       }
+}
+
+static int
+setup_sge_txq_uld(struct adapter *adap, unsigned int uld_type,
+                 const struct cxgb4_uld_info *uld_info)
+{
+       struct sge_uld_txq_info *txq_info = NULL;
+       int tx_uld_type, i;
+
+       tx_uld_type = TX_ULD(uld_type);
+       txq_info = adap->sge.uld_txq_info[tx_uld_type];
+
+       if ((tx_uld_type == CXGB4_TX_OFLD) && txq_info &&
+           (atomic_inc_return(&txq_info->users) > 1))
+               return 0;
+
+       txq_info = kzalloc(sizeof(*txq_info), GFP_KERNEL);
+       if (!txq_info)
+               return -ENOMEM;
+
+       i = min_t(int, uld_info->ntxq, num_online_cpus());
+       txq_info->ntxq = roundup(i, adap->params.nports);
+
+       txq_info->uldtxq = kcalloc(txq_info->ntxq, sizeof(struct sge_uld_txq),
+                                  GFP_KERNEL);
+       if (!txq_info->uldtxq) {
+               kfree(txq_info);
+               return -ENOMEM;
+       }
+
+       if (alloc_sge_txq_uld(adap, txq_info, tx_uld_type)) {
+               kfree(txq_info->uldtxq);
+               kfree(txq_info);
+               return -ENOMEM;
+       }
+
+       atomic_inc(&txq_info->users);
+       adap->sge.uld_txq_info[tx_uld_type] = txq_info;
+       return 0;
+}
+
 static void uld_queue_init(struct adapter *adap, unsigned int uld_type,
                           struct cxgb4_lld_info *lli)
 {
@@ -476,7 +572,15 @@ int t4_uld_mem_alloc(struct adapter *adap)
        if (!s->uld_rxq_info)
                goto err_uld;
 
+       s->uld_txq_info = kzalloc(CXGB4_TX_MAX *
+                                 sizeof(struct sge_uld_txq_info *),
+                                 GFP_KERNEL);
+       if (!s->uld_txq_info)
+               goto err_uld_rx;
        return 0;
+
+err_uld_rx:
+       kfree(s->uld_rxq_info);
 err_uld:
        kfree(adap->uld);
        return -ENOMEM;
@@ -486,6 +590,7 @@ void t4_uld_mem_free(struct adapter *adap)
 {
        struct sge *s = &adap->sge;
 
+       kfree(s->uld_txq_info);
        kfree(s->uld_rxq_info);
        kfree(adap->uld);
 }
@@ -620,6 +725,9 @@ int cxgb4_register_uld(enum cxgb4_uld type,
                        ret = -EBUSY;
                        goto free_irq;
                }
+               ret = setup_sge_txq_uld(adap, type, p);
+               if (ret)
+                       goto free_irq;
                adap->uld[type] = *p;
                uld_attach(adap, type);
                adap_idx++;
@@ -648,6 +756,7 @@ out:
                        break;
                adap->uld[type].handle = NULL;
                adap->uld[type].add = NULL;
+               release_sge_txq_uld(adap, type);
                if (adap->flags & FULL_INIT_DONE)
                        quiesce_rx_uld(adap, type);
                if (adap->flags & USING_MSIX)
@@ -683,6 +792,7 @@ int cxgb4_unregister_uld(enum cxgb4_uld type)
                        continue;
                adap->uld[type].handle = NULL;
                adap->uld[type].add = NULL;
+               release_sge_txq_uld(adap, type);
                if (adap->flags & FULL_INIT_DONE)
                        quiesce_rx_uld(adap, type);
                if (adap->flags & USING_MSIX)
index 2996793b1aaa3e463d8e11083d858c20acb91d15..4c856605fdfa17e339a7a7a8b0d4b99f497048c6 100644 (file)
@@ -77,6 +77,8 @@ enum {
 
 /* Special asynchronous notification message */
 #define CXGB4_MSG_AN ((void *)1)
+#define TX_ULD(uld)(((uld) != CXGB4_ULD_CRYPTO) ? CXGB4_TX_OFLD :\
+                     CXGB4_TX_CRYPTO)
 
 struct serv_entry {
        void *data;
@@ -223,6 +225,19 @@ enum cxgb4_uld {
        CXGB4_ULD_MAX
 };
 
+enum cxgb4_tx_uld {
+       CXGB4_TX_OFLD,
+       CXGB4_TX_CRYPTO,
+       CXGB4_TX_MAX
+};
+
+enum cxgb4_txq_type {
+       CXGB4_TXQ_ETH,
+       CXGB4_TXQ_ULD,
+       CXGB4_TXQ_CTRL,
+       CXGB4_TXQ_MAX
+};
+
 enum cxgb4_state {
        CXGB4_STATE_UP,
        CXGB4_STATE_START_RECOVERY,
@@ -316,6 +331,7 @@ struct cxgb4_uld_info {
        void *handle;
        unsigned int nrxq;
        unsigned int rxq_size;
+       unsigned int ntxq;
        bool ciq;
        bool lro;
        void *(*add)(const struct cxgb4_lld_info *p);
@@ -333,6 +349,7 @@ struct cxgb4_uld_info {
 int cxgb4_register_uld(enum cxgb4_uld type, const struct cxgb4_uld_info *p);
 int cxgb4_unregister_uld(enum cxgb4_uld type);
 int cxgb4_ofld_send(struct net_device *dev, struct sk_buff *skb);
+int cxgb4_crypto_send(struct net_device *dev, struct sk_buff *skb);
 unsigned int cxgb4_dbfifo_count(const struct net_device *dev, int lpfifo);
 unsigned int cxgb4_port_chan(const struct net_device *dev);
 unsigned int cxgb4_port_viid(const struct net_device *dev);
index 539de764bbd30af3d2e148e9220228e710d6d3f5..cbd68a8fe2e48b54bd5a9296eac7e8cca32063e4 100644 (file)
@@ -210,8 +210,10 @@ static int t4_sched_queue_bind(struct port_info *pi, struct ch_sched_queue *p)
 
        /* Unbind queue from any existing class */
        err = t4_sched_queue_unbind(pi, p);
-       if (err)
+       if (err) {
+               t4_free_mem(qe);
                goto out;
+       }
 
        /* Bind queue to specified class */
        memset(qe, 0, sizeof(*qe));
index 1e74fd6085df43b954f2343cd47ba1f8b592deaf..9f606478c29cb2ad7fd445b9c6709eb2c92d5e8c 100644 (file)
@@ -377,8 +377,8 @@ unmap:                      dma_unmap_page(dev, be64_to_cpu(p->addr[0]),
  *     Reclaims Tx descriptors from an SGE Tx queue and frees the associated
  *     Tx buffers.  Called with the Tx queue lock held.
  */
-static void free_tx_desc(struct adapter *adap, struct sge_txq *q,
-                        unsigned int n, bool unmap)
+void free_tx_desc(struct adapter *adap, struct sge_txq *q,
+                 unsigned int n, bool unmap)
 {
        struct tx_sw_desc *d;
        unsigned int cidx = q->cidx;
@@ -1543,7 +1543,7 @@ static inline unsigned int calc_tx_flits_ofld(const struct sk_buff *skb)
  *     inability to map packets.  A periodic timer attempts to restart
  *     queues so marked.
  */
-static void txq_stop_maperr(struct sge_ofld_txq *q)
+static void txq_stop_maperr(struct sge_uld_txq *q)
 {
        q->mapping_err++;
        q->q.stops++;
@@ -1559,7 +1559,7 @@ static void txq_stop_maperr(struct sge_ofld_txq *q)
  *     Stops an offload Tx queue that has become full and modifies the packet
  *     being written to request a wakeup.
  */
-static void ofldtxq_stop(struct sge_ofld_txq *q, struct sk_buff *skb)
+static void ofldtxq_stop(struct sge_uld_txq *q, struct sk_buff *skb)
 {
        struct fw_wr_hdr *wr = (struct fw_wr_hdr *)skb->data;
 
@@ -1586,7 +1586,7 @@ static void ofldtxq_stop(struct sge_ofld_txq *q, struct sk_buff *skb)
  *     boolean "service_ofldq_running" to make sure that only one instance
  *     is ever running at a time ...
  */
-static void service_ofldq(struct sge_ofld_txq *q)
+static void service_ofldq(struct sge_uld_txq *q)
 {
        u64 *pos, *before, *end;
        int credits;
@@ -1706,7 +1706,7 @@ static void service_ofldq(struct sge_ofld_txq *q)
  *
  *     Send an offload packet through an SGE offload queue.
  */
-static int ofld_xmit(struct sge_ofld_txq *q, struct sk_buff *skb)
+static int ofld_xmit(struct sge_uld_txq *q, struct sk_buff *skb)
 {
        skb->priority = calc_tx_flits_ofld(skb);       /* save for restart */
        spin_lock(&q->sendq.lock);
@@ -1735,7 +1735,7 @@ static int ofld_xmit(struct sge_ofld_txq *q, struct sk_buff *skb)
  */
 static void restart_ofldq(unsigned long data)
 {
-       struct sge_ofld_txq *q = (struct sge_ofld_txq *)data;
+       struct sge_uld_txq *q = (struct sge_uld_txq *)data;
 
        spin_lock(&q->sendq.lock);
        q->full = 0;            /* the queue actually is completely empty now */
@@ -1767,17 +1767,23 @@ static inline unsigned int is_ctrl_pkt(const struct sk_buff *skb)
        return skb->queue_mapping & 1;
 }
 
-static inline int ofld_send(struct adapter *adap, struct sk_buff *skb)
+static inline int uld_send(struct adapter *adap, struct sk_buff *skb,
+                          unsigned int tx_uld_type)
 {
+       struct sge_uld_txq_info *txq_info;
+       struct sge_uld_txq *txq;
        unsigned int idx = skb_txq(skb);
 
+       txq_info = adap->sge.uld_txq_info[tx_uld_type];
+       txq = &txq_info->uldtxq[idx];
+
        if (unlikely(is_ctrl_pkt(skb))) {
                /* Single ctrl queue is a requirement for LE workaround path */
                if (adap->tids.nsftids)
                        idx = 0;
                return ctrl_xmit(&adap->sge.ctrlq[idx], skb);
        }
-       return ofld_xmit(&adap->sge.ofldtxq[idx], skb);
+       return ofld_xmit(txq, skb);
 }
 
 /**
@@ -1794,7 +1800,7 @@ int t4_ofld_send(struct adapter *adap, struct sk_buff *skb)
        int ret;
 
        local_bh_disable();
-       ret = ofld_send(adap, skb);
+       ret = uld_send(adap, skb, CXGB4_TX_OFLD);
        local_bh_enable();
        return ret;
 }
@@ -1813,6 +1819,39 @@ int cxgb4_ofld_send(struct net_device *dev, struct sk_buff *skb)
 }
 EXPORT_SYMBOL(cxgb4_ofld_send);
 
+/**
+ *     t4_crypto_send - send crypto packet
+ *     @adap: the adapter
+ *     @skb: the packet
+ *
+ *     Sends crypto packet.  We use the packet queue_mapping to select the
+ *     appropriate Tx queue as follows: bit 0 indicates whether the packet
+ *     should be sent as regular or control, bits 1-15 select the queue.
+ */
+static int t4_crypto_send(struct adapter *adap, struct sk_buff *skb)
+{
+       int ret;
+
+       local_bh_disable();
+       ret = uld_send(adap, skb, CXGB4_TX_CRYPTO);
+       local_bh_enable();
+       return ret;
+}
+
+/**
+ *     cxgb4_crypto_send - send crypto packet
+ *     @dev: the net device
+ *     @skb: the packet
+ *
+ *     Sends crypto packet.  This is an exported version of @t4_crypto_send,
+ *     intended for ULDs.
+ */
+int cxgb4_crypto_send(struct net_device *dev, struct sk_buff *skb)
+{
+       return t4_crypto_send(netdev2adap(dev), skb);
+}
+EXPORT_SYMBOL(cxgb4_crypto_send);
+
 static inline void copy_frags(struct sk_buff *skb,
                              const struct pkt_gl *gl, unsigned int offset)
 {
@@ -2479,7 +2518,7 @@ static void sge_tx_timer_cb(unsigned long data)
        for (i = 0; i < BITS_TO_LONGS(s->egr_sz); i++)
                for (m = s->txq_maperr[i]; m; m &= m - 1) {
                        unsigned long id = __ffs(m) + i * BITS_PER_LONG;
-                       struct sge_ofld_txq *txq = s->egr_map[id];
+                       struct sge_uld_txq *txq = s->egr_map[id];
 
                        clear_bit(id, s->txq_maperr);
                        tasklet_schedule(&txq->qresume_tsk);
@@ -2799,6 +2838,7 @@ int t4_sge_alloc_eth_txq(struct adapter *adap, struct sge_eth_txq *txq,
                return ret;
        }
 
+       txq->q.q_type = CXGB4_TXQ_ETH;
        init_txq(adap, &txq->q, FW_EQ_ETH_CMD_EQID_G(ntohl(c.eqid_pkd)));
        txq->txq = netdevq;
        txq->tso = txq->tx_cso = txq->vlan_ins = 0;
@@ -2852,6 +2892,7 @@ int t4_sge_alloc_ctrl_txq(struct adapter *adap, struct sge_ctrl_txq *txq,
                return ret;
        }
 
+       txq->q.q_type = CXGB4_TXQ_CTRL;
        init_txq(adap, &txq->q, FW_EQ_CTRL_CMD_EQID_G(ntohl(c.cmpliqid_eqid)));
        txq->adap = adap;
        skb_queue_head_init(&txq->sendq);
@@ -2872,13 +2913,15 @@ int t4_sge_mod_ctrl_txq(struct adapter *adap, unsigned int eqid,
        return t4_set_params(adap, adap->mbox, adap->pf, 0, 1, &param, &val);
 }
 
-int t4_sge_alloc_ofld_txq(struct adapter *adap, struct sge_ofld_txq *txq,
-                         struct net_device *dev, unsigned int iqid)
+int t4_sge_alloc_uld_txq(struct adapter *adap, struct sge_uld_txq *txq,
+                        struct net_device *dev, unsigned int iqid,
+                        unsigned int uld_type)
 {
        int ret, nentries;
        struct fw_eq_ofld_cmd c;
        struct sge *s = &adap->sge;
        struct port_info *pi = netdev_priv(dev);
+       int cmd = FW_EQ_OFLD_CMD;
 
        /* Add status entries */
        nentries = txq->q.size + s->stat_len / sizeof(struct tx_desc);
@@ -2891,7 +2934,9 @@ int t4_sge_alloc_ofld_txq(struct adapter *adap, struct sge_ofld_txq *txq,
                return -ENOMEM;
 
        memset(&c, 0, sizeof(c));
-       c.op_to_vfn = htonl(FW_CMD_OP_V(FW_EQ_OFLD_CMD) | FW_CMD_REQUEST_F |
+       if (unlikely(uld_type == CXGB4_TX_CRYPTO))
+               cmd = FW_EQ_CTRL_CMD;
+       c.op_to_vfn = htonl(FW_CMD_OP_V(cmd) | FW_CMD_REQUEST_F |
                            FW_CMD_WRITE_F | FW_CMD_EXEC_F |
                            FW_EQ_OFLD_CMD_PFN_V(adap->pf) |
                            FW_EQ_OFLD_CMD_VFN_V(0));
@@ -2919,6 +2964,7 @@ int t4_sge_alloc_ofld_txq(struct adapter *adap, struct sge_ofld_txq *txq,
                return ret;
        }
 
+       txq->q.q_type = CXGB4_TXQ_ULD;
        init_txq(adap, &txq->q, FW_EQ_OFLD_CMD_EQID_G(ntohl(c.eqid_pkd)));
        txq->adap = adap;
        skb_queue_head_init(&txq->sendq);
@@ -2928,7 +2974,7 @@ int t4_sge_alloc_ofld_txq(struct adapter *adap, struct sge_ofld_txq *txq,
        return 0;
 }
 
-static void free_txq(struct adapter *adap, struct sge_txq *q)
+void free_txq(struct adapter *adap, struct sge_txq *q)
 {
        struct sge *s = &adap->sge;
 
@@ -2951,7 +2997,6 @@ void free_rspq_fl(struct adapter *adap, struct sge_rspq *rq,
                   rq->cntxt_id, fl_id, 0xffff);
        dma_free_coherent(adap->pdev_dev, (rq->size + 1) * rq->iqe_len,
                          rq->desc, rq->phys_addr);
-       napi_hash_del(&rq->napi);
        netif_napi_del(&rq->napi);
        rq->netdev = NULL;
        rq->cntxt_id = rq->abs_id = 0;
@@ -3026,21 +3071,6 @@ void t4_free_sge_resources(struct adapter *adap)
                }
        }
 
-       /* clean up offload Tx queues */
-       for (i = 0; i < ARRAY_SIZE(adap->sge.ofldtxq); i++) {
-               struct sge_ofld_txq *q = &adap->sge.ofldtxq[i];
-
-               if (q->q.desc) {
-                       tasklet_kill(&q->qresume_tsk);
-                       t4_ofld_eq_free(adap, adap->mbox, adap->pf, 0,
-                                       q->q.cntxt_id);
-                       free_tx_desc(adap, &q->q, q->q.in_use, false);
-                       kfree(q->q.sdesc);
-                       __skb_queue_purge(&q->sendq);
-                       free_txq(adap, &q->q);
-               }
-       }
-
        /* clean up control Tx queues */
        for (i = 0; i < ARRAY_SIZE(adap->sge.ctrlq); i++) {
                struct sge_ctrl_txq *cq = &adap->sge.ctrlq[i];
@@ -3093,12 +3123,34 @@ void t4_sge_stop(struct adapter *adap)
        if (s->tx_timer.function)
                del_timer_sync(&s->tx_timer);
 
-       for (i = 0; i < ARRAY_SIZE(s->ofldtxq); i++) {
-               struct sge_ofld_txq *q = &s->ofldtxq[i];
+       if (is_offload(adap)) {
+               struct sge_uld_txq_info *txq_info;
+
+               txq_info = adap->sge.uld_txq_info[CXGB4_TX_OFLD];
+               if (txq_info) {
+                       struct sge_uld_txq *txq = txq_info->uldtxq;
 
-               if (q->q.desc)
-                       tasklet_kill(&q->qresume_tsk);
+                       for_each_ofldtxq(&adap->sge, i) {
+                               if (txq->q.desc)
+                                       tasklet_kill(&txq->qresume_tsk);
+                       }
+               }
        }
+
+       if (is_pci_uld(adap)) {
+               struct sge_uld_txq_info *txq_info;
+
+               txq_info = adap->sge.uld_txq_info[CXGB4_TX_CRYPTO];
+               if (txq_info) {
+                       struct sge_uld_txq *txq = txq_info->uldtxq;
+
+                       for_each_ofldtxq(&adap->sge, i) {
+                               if (txq->q.desc)
+                                       tasklet_kill(&txq->qresume_tsk);
+                       }
+               }
+       }
+
        for (i = 0; i < ARRAY_SIZE(s->ctrlq); i++) {
                struct sge_ctrl_txq *cq = &s->ctrlq[i];
 
index 50812a1d67bdf8aab1ca8cb5fe560c7c5e2cb778..df1573c4a6597e17845837d59a7e74d8fbad8838 100644 (file)
@@ -178,9 +178,9 @@ CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN
        CH_PCI_ID_TABLE_FENTRY(0x6005),
        CH_PCI_ID_TABLE_FENTRY(0x6006),
        CH_PCI_ID_TABLE_FENTRY(0x6007),
+       CH_PCI_ID_TABLE_FENTRY(0x6008),
        CH_PCI_ID_TABLE_FENTRY(0x6009),
        CH_PCI_ID_TABLE_FENTRY(0x600d),
-       CH_PCI_ID_TABLE_FENTRY(0x6010),
        CH_PCI_ID_TABLE_FENTRY(0x6011),
        CH_PCI_ID_TABLE_FENTRY(0x6014),
        CH_PCI_ID_TABLE_FENTRY(0x6015),
index 130f910e47854958e7278c255eee742dd5f46d2b..9023c858715d5759064081eee54b120ddaace673 100644 (file)
@@ -33,7 +33,7 @@
 
 #define DRV_NAME               "enic"
 #define DRV_DESCRIPTION                "Cisco VIC Ethernet NIC Driver"
-#define DRV_VERSION            "2.3.0.20"
+#define DRV_VERSION            "2.3.0.31"
 #define DRV_COPYRIGHT          "Copyright 2008-2013 Cisco Systems, Inc"
 
 #define ENIC_BARS_MAX          6
index f514faf8b7093f883595569efedf338abdaee73c..cdd7a1a59aa7b4f8c1d13a41a02b3e20694270ff 100644 (file)
@@ -1166,12 +1166,18 @@ static void enic_rq_indicate_buf(struct vnic_rq *rq,
                skb->protocol = eth_type_trans(skb, netdev);
                skb_record_rx_queue(skb, q_number);
                if (netdev->features & NETIF_F_RXHASH) {
-                       skb_set_hash(skb, rss_hash,
-                                    (rss_type &
-                                     (NIC_CFG_RSS_HASH_TYPE_TCP_IPV6_EX |
-                                      NIC_CFG_RSS_HASH_TYPE_TCP_IPV6 |
-                                      NIC_CFG_RSS_HASH_TYPE_TCP_IPV4)) ?
-                                    PKT_HASH_TYPE_L4 : PKT_HASH_TYPE_L3);
+                       switch (rss_type) {
+                       case CQ_ENET_RQ_DESC_RSS_TYPE_TCP_IPv4:
+                       case CQ_ENET_RQ_DESC_RSS_TYPE_TCP_IPv6:
+                       case CQ_ENET_RQ_DESC_RSS_TYPE_TCP_IPv6_EX:
+                               skb_set_hash(skb, rss_hash, PKT_HASH_TYPE_L4);
+                               break;
+                       case CQ_ENET_RQ_DESC_RSS_TYPE_IPv4:
+                       case CQ_ENET_RQ_DESC_RSS_TYPE_IPv6:
+                       case CQ_ENET_RQ_DESC_RSS_TYPE_IPv6_EX:
+                               skb_set_hash(skb, rss_hash, PKT_HASH_TYPE_L3);
+                               break;
+                       }
                }
 
                /* Hardware does not provide whole packet checksum. It only
index e572a527b18dd593c54e52e33386481fc9fbe95b..36bc2c71fba981bdd05d73c7e5b4537b07e19cab 100644 (file)
@@ -169,19 +169,28 @@ int vnic_rq_disable(struct vnic_rq *rq)
 {
        unsigned int wait;
        struct vnic_dev *vdev = rq->vdev;
+       int i;
 
-       iowrite32(0, &rq->ctrl->enable);
+       /* Due to a race condition with clearing RQ "mini-cache" in hw, we need
+        * to disable the RQ twice to guarantee that stale descriptors are not
+        * used when this RQ is re-enabled.
+        */
+       for (i = 0; i < 2; i++) {
+               iowrite32(0, &rq->ctrl->enable);
 
-       /* Wait for HW to ACK disable request */
-       for (wait = 0; wait < 1000; wait++) {
-               if (!(ioread32(&rq->ctrl->running)))
-                       return 0;
-               udelay(10);
-       }
+               /* Wait for HW to ACK disable request */
+               for (wait = 20000; wait > 0; wait--)
+                       if (!ioread32(&rq->ctrl->running))
+                               break;
+               if (!wait) {
+                       vdev_neterr(vdev, "Failed to disable RQ[%d]\n",
+                                   rq->index);
 
-       vdev_neterr(vdev, "Failed to disable RQ[%d]\n", rq->index);
+                       return -ETIMEDOUT;
+               }
+       }
 
-       return -ETIMEDOUT;
+       return 0;
 }
 
 void vnic_rq_clean(struct vnic_rq *rq,
@@ -212,6 +221,11 @@ void vnic_rq_clean(struct vnic_rq *rq,
                        [fetch_index % VNIC_RQ_BUF_BLK_ENTRIES(count)];
        iowrite32(fetch_index, &rq->ctrl->posted_index);
 
+       /* Anytime we write fetch_index, we need to re-write 0 to rq->enable
+        * to re-sync internal VIC state.
+        */
+       iowrite32(0, &rq->ctrl->enable);
+
        vnic_dev_clear_desc_ring(&rq->ring);
 }
 
index 3f6152cc648c86feb94a85edbe8924bd2a95ea90..7e1633bf5a22ccf1c9c123ba541349b5dfea1064 100644 (file)
@@ -2796,7 +2796,6 @@ static void be_evt_queues_destroy(struct be_adapter *adapter)
                if (eqo->q.created) {
                        be_eq_clean(eqo);
                        be_cmd_q_destroy(adapter, &eqo->q, QTYPE_EQ);
-                       napi_hash_del(&eqo->napi);
                        netif_napi_del(&eqo->napi);
                        free_cpumask_var(eqo->affinity_mask);
                }
index c044667a0a25ec6e5973f4ba1d15c4eb54b829d2..6456c180114bba6b7e9f2cca56bbe235f94b96ea 100644 (file)
@@ -966,6 +966,7 @@ static int ethoc_set_ringparam(struct net_device *dev,
 const struct ethtool_ops ethoc_ethtool_ops = {
        .get_regs_len = ethoc_get_regs_len,
        .get_regs = ethoc_get_regs,
+       .nway_reset = phy_ethtool_nway_reset,
        .get_link = ethtool_op_get_link,
        .get_ringparam = ethoc_get_ringparam,
        .set_ringparam = ethoc_set_ringparam,
index f928e6f79c8954ed24408ddf0ad21faed4dd4870..223f35cc034cf4f9846856d1cbb4c551a0014747 100644 (file)
@@ -669,6 +669,7 @@ static const struct of_device_id nps_enet_dt_ids[] = {
        { .compatible = "ezchip,nps-mgt-enet" },
        { /* Sentinel */ }
 };
+MODULE_DEVICE_TABLE(of, nps_enet_dt_ids);
 
 static struct platform_driver nps_enet_driver = {
        .probe = nps_enet_probe,
index d1ca45fbb1645f6490841d549c0202098dad9fab..0d415516b5778f7e5c4bdf7d291c9ed2ea64abcd 100644 (file)
@@ -8,7 +8,7 @@ config NET_VENDOR_FREESCALE
        depends on FSL_SOC || QUICC_ENGINE || CPM1 || CPM2 || PPC_MPC512x || \
                   M523x || M527x || M5272 || M528x || M520x || M532x || \
                   ARCH_MXC || ARCH_MXS || (PPC_MPC52xx && PPC_BESTCOMM) || \
-                  ARCH_LAYERSCAPE
+                  ARCH_LAYERSCAPE || COMPILE_TEST
        ---help---
          If you have a network (Ethernet) card belonging to this class, say Y.
 
@@ -65,6 +65,7 @@ config FSL_PQ_MDIO
 config FSL_XGMAC_MDIO
        tristate "Freescale XGMAC MDIO"
        select PHYLIB
+       depends on OF
        select OF_MDIO
        ---help---
          This driver supports the MDIO bus on the Fman 10G Ethernet MACs, and
@@ -85,6 +86,7 @@ config UGETH_TX_ON_DEMAND
 
 config GIANFAR
        tristate "Gianfar Ethernet"
+       depends on HAS_DMA
        select FSL_PQ_MDIO
        select PHYLIB
        select CRC32
@@ -93,4 +95,6 @@ config GIANFAR
          and MPC86xx family of chips, the eTSEC on LS1021A and the FEC
          on the 8540.
 
+source "drivers/net/ethernet/freescale/dpaa/Kconfig"
+
 endif # NET_VENDOR_FREESCALE
index cbe21dc7e37ee41dab11425d673ba0d20409074a..4a13115155c9c2516380eafd0bebc691c2c06f8c 100644 (file)
@@ -22,3 +22,4 @@ obj-$(CONFIG_UCC_GETH) += ucc_geth_driver.o
 ucc_geth_driver-objs := ucc_geth.o ucc_geth_ethtool.o
 
 obj-$(CONFIG_FSL_FMAN) += fman/
+obj-$(CONFIG_FSL_DPAA_ETH) += dpaa/
diff --git a/drivers/net/ethernet/freescale/dpaa/Kconfig b/drivers/net/ethernet/freescale/dpaa/Kconfig
new file mode 100644 (file)
index 0000000..f3a3454
--- /dev/null
@@ -0,0 +1,10 @@
+menuconfig FSL_DPAA_ETH
+       tristate "DPAA Ethernet"
+       depends on FSL_SOC && FSL_DPAA && FSL_FMAN
+       select PHYLIB
+       select FSL_FMAN_MAC
+       ---help---
+         Data Path Acceleration Architecture Ethernet driver,
+         supporting the Freescale QorIQ chips.
+         Depends on Freescale Buffer Manager and Queue Manager
+         driver and Frame Manager Driver.
diff --git a/drivers/net/ethernet/freescale/dpaa/Makefile b/drivers/net/ethernet/freescale/dpaa/Makefile
new file mode 100644 (file)
index 0000000..7db50bc
--- /dev/null
@@ -0,0 +1,12 @@
+#
+# Makefile for the Freescale DPAA Ethernet controllers
+#
+
+# Include FMan headers
+FMAN        = $(srctree)/drivers/net/ethernet/freescale/fman
+ccflags-y += -I$(FMAN)
+
+obj-$(CONFIG_FSL_DPAA_ETH) += fsl_dpa.o
+
+fsl_dpa-objs += dpaa_eth.o dpaa_ethtool.o dpaa_eth_sysfs.o
+CFLAGS_dpaa_eth.o := -I$(src)
diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
new file mode 100644 (file)
index 0000000..3c48a84
--- /dev/null
@@ -0,0 +1,2753 @@
+/* Copyright 2008 - 2016 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *      names of its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written permission.
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/of_platform.h>
+#include <linux/of_mdio.h>
+#include <linux/of_net.h>
+#include <linux/io.h>
+#include <linux/if_arp.h>
+#include <linux/if_vlan.h>
+#include <linux/icmp.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/udp.h>
+#include <linux/tcp.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/etherdevice.h>
+#include <linux/if_ether.h>
+#include <linux/highmem.h>
+#include <linux/percpu.h>
+#include <linux/dma-mapping.h>
+#include <linux/sort.h>
+#include <soc/fsl/bman.h>
+#include <soc/fsl/qman.h>
+
+#include "fman.h"
+#include "fman_port.h"
+#include "mac.h"
+#include "dpaa_eth.h"
+
+/* CREATE_TRACE_POINTS only needs to be defined once. Other dpaa files
+ * using trace events only need to #include <trace/events/sched.h>
+ */
+#define CREATE_TRACE_POINTS
+#include "dpaa_eth_trace.h"
+
+static int debug = -1;
+module_param(debug, int, 0444);
+MODULE_PARM_DESC(debug, "Module/Driver verbosity level (0=none,...,16=all)");
+
+static u16 tx_timeout = 1000;
+module_param(tx_timeout, ushort, 0444);
+MODULE_PARM_DESC(tx_timeout, "The Tx timeout in ms");
+
+#define FM_FD_STAT_RX_ERRORS                                           \
+       (FM_FD_ERR_DMA | FM_FD_ERR_PHYSICAL     | \
+        FM_FD_ERR_SIZE | FM_FD_ERR_CLS_DISCARD | \
+        FM_FD_ERR_EXTRACTION | FM_FD_ERR_NO_SCHEME     | \
+        FM_FD_ERR_PRS_TIMEOUT | FM_FD_ERR_PRS_ILL_INSTRUCT | \
+        FM_FD_ERR_PRS_HDR_ERR)
+
+#define FM_FD_STAT_TX_ERRORS \
+       (FM_FD_ERR_UNSUPPORTED_FORMAT | \
+        FM_FD_ERR_LENGTH | FM_FD_ERR_DMA)
+
+#define DPAA_MSG_DEFAULT (NETIF_MSG_DRV | NETIF_MSG_PROBE | \
+                         NETIF_MSG_LINK | NETIF_MSG_IFUP | \
+                         NETIF_MSG_IFDOWN)
+
+#define DPAA_INGRESS_CS_THRESHOLD 0x10000000
+/* Ingress congestion threshold on FMan ports
+ * The size in bytes of the ingress tail-drop threshold on FMan ports.
+ * Traffic piling up above this value will be rejected by QMan and discarded
+ * by FMan.
+ */
+
+/* Size in bytes of the FQ taildrop threshold */
+#define DPAA_FQ_TD 0x200000
+
+#define DPAA_CS_THRESHOLD_1G 0x06000000
+/* Egress congestion threshold on 1G ports, range 0x1000 .. 0x10000000
+ * The size in bytes of the egress Congestion State notification threshold on
+ * 1G ports. The 1G dTSECs can quite easily be flooded by cores doing Tx in a
+ * tight loop (e.g. by sending UDP datagrams at "while(1) speed"),
+ * and the larger the frame size, the more acute the problem.
+ * So we have to find a balance between these factors:
+ * - avoiding the device staying congested for a prolonged time (risking
+ *   the netdev watchdog to fire - see also the tx_timeout module param);
+ * - affecting performance of protocols such as TCP, which otherwise
+ *   behave well under the congestion notification mechanism;
+ * - preventing the Tx cores from tightly-looping (as if the congestion
+ *   threshold was too low to be effective);
+ * - running out of memory if the CS threshold is set too high.
+ */
+
+#define DPAA_CS_THRESHOLD_10G 0x10000000
+/* The size in bytes of the egress Congestion State notification threshold on
+ * 10G ports, range 0x1000 .. 0x10000000
+ */
+
+/* Largest value that the FQD's OAL field can hold */
+#define FSL_QMAN_MAX_OAL       127
+
+/* Default alignment for start of data in an Rx FD */
+#define DPAA_FD_DATA_ALIGNMENT  16
+
+/* Values for the L3R field of the FM Parse Results
+ */
+/* L3 Type field: First IP Present IPv4 */
+#define FM_L3_PARSE_RESULT_IPV4        0x8000
+/* L3 Type field: First IP Present IPv6 */
+#define FM_L3_PARSE_RESULT_IPV6        0x4000
+/* Values for the L4R field of the FM Parse Results */
+/* L4 Type field: UDP */
+#define FM_L4_PARSE_RESULT_UDP 0x40
+/* L4 Type field: TCP */
+#define FM_L4_PARSE_RESULT_TCP 0x20
+
+#define DPAA_SGT_MAX_ENTRIES 16 /* maximum number of entries in SG Table */
+#define DPAA_BUFF_RELEASE_MAX 8 /* maximum number of buffers released at once */
+
+#define FSL_DPAA_BPID_INV              0xff
+#define FSL_DPAA_ETH_MAX_BUF_COUNT     128
+#define FSL_DPAA_ETH_REFILL_THRESHOLD  80
+
+#define DPAA_TX_PRIV_DATA_SIZE 16
+#define DPAA_PARSE_RESULTS_SIZE sizeof(struct fman_prs_result)
+#define DPAA_TIME_STAMP_SIZE 8
+#define DPAA_HASH_RESULTS_SIZE 8
+#define DPAA_RX_PRIV_DATA_SIZE (u16)(DPAA_TX_PRIV_DATA_SIZE + \
+                                       dpaa_rx_extra_headroom)
+
+#define DPAA_ETH_RX_QUEUES     128
+
+#define DPAA_ENQUEUE_RETRIES   100000
+
+enum port_type {RX, TX};
+
+struct fm_port_fqs {
+       struct dpaa_fq *tx_defq;
+       struct dpaa_fq *tx_errq;
+       struct dpaa_fq *rx_defq;
+       struct dpaa_fq *rx_errq;
+};
+
+/* All the dpa bps in use at any moment */
+static struct dpaa_bp *dpaa_bp_array[BM_MAX_NUM_OF_POOLS];
+
+/* The raw buffer size must be cacheline aligned */
+#define DPAA_BP_RAW_SIZE 4096
+/* When using more than one buffer pool, the raw sizes are as follows:
+ * 1 bp: 4KB
+ * 2 bp: 2KB, 4KB
+ * 3 bp: 1KB, 2KB, 4KB
+ * 4 bp: 1KB, 2KB, 4KB, 8KB
+ */
+static inline size_t bpool_buffer_raw_size(u8 index, u8 cnt)
+{
+       size_t res = DPAA_BP_RAW_SIZE / 4;
+       u8 i;
+
+       for (i = (cnt < 3) ? cnt : 3; i < 3 + index; i++)
+               res *= 2;
+       return res;
+}
+
+/* FMan-DMA requires 16-byte alignment for Rx buffers, but SKB_DATA_ALIGN is
+ * even stronger (SMP_CACHE_BYTES-aligned), so we just get away with that,
+ * via SKB_WITH_OVERHEAD(). We can't rely on netdev_alloc_frag() giving us
+ * half-page-aligned buffers, so we reserve some more space for start-of-buffer
+ * alignment.
+ */
+#define dpaa_bp_size(raw_size) SKB_WITH_OVERHEAD((raw_size) - SMP_CACHE_BYTES)
+
+static int dpaa_max_frm;
+
+static int dpaa_rx_extra_headroom;
+
+#define dpaa_get_max_mtu()     \
+       (dpaa_max_frm - (VLAN_ETH_HLEN + ETH_FCS_LEN))
+
+static int dpaa_netdev_init(struct net_device *net_dev,
+                           const struct net_device_ops *dpaa_ops,
+                           u16 tx_timeout)
+{
+       struct dpaa_priv *priv = netdev_priv(net_dev);
+       struct device *dev = net_dev->dev.parent;
+       struct dpaa_percpu_priv *percpu_priv;
+       const u8 *mac_addr;
+       int i, err;
+
+       /* Although we access another CPU's private data here
+        * we do it at initialization so it is safe
+        */
+       for_each_possible_cpu(i) {
+               percpu_priv = per_cpu_ptr(priv->percpu_priv, i);
+               percpu_priv->net_dev = net_dev;
+       }
+
+       net_dev->netdev_ops = dpaa_ops;
+       mac_addr = priv->mac_dev->addr;
+
+       net_dev->mem_start = priv->mac_dev->res->start;
+       net_dev->mem_end = priv->mac_dev->res->end;
+
+       net_dev->min_mtu = ETH_MIN_MTU;
+       net_dev->max_mtu = dpaa_get_max_mtu();
+
+       net_dev->hw_features |= (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
+                                NETIF_F_LLTX);
+
+       net_dev->hw_features |= NETIF_F_SG | NETIF_F_HIGHDMA;
+       /* The kernels enables GSO automatically, if we declare NETIF_F_SG.
+        * For conformity, we'll still declare GSO explicitly.
+        */
+       net_dev->features |= NETIF_F_GSO;
+
+       net_dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+       /* we do not want shared skbs on TX */
+       net_dev->priv_flags &= ~IFF_TX_SKB_SHARING;
+
+       net_dev->features |= net_dev->hw_features;
+       net_dev->vlan_features = net_dev->features;
+
+       memcpy(net_dev->perm_addr, mac_addr, net_dev->addr_len);
+       memcpy(net_dev->dev_addr, mac_addr, net_dev->addr_len);
+
+       net_dev->ethtool_ops = &dpaa_ethtool_ops;
+
+       net_dev->needed_headroom = priv->tx_headroom;
+       net_dev->watchdog_timeo = msecs_to_jiffies(tx_timeout);
+
+       /* start without the RUNNING flag, phylib controls it later */
+       netif_carrier_off(net_dev);
+
+       err = register_netdev(net_dev);
+       if (err < 0) {
+               dev_err(dev, "register_netdev() = %d\n", err);
+               return err;
+       }
+
+       return 0;
+}
+
+static int dpaa_stop(struct net_device *net_dev)
+{
+       struct mac_device *mac_dev;
+       struct dpaa_priv *priv;
+       int i, err, error;
+
+       priv = netdev_priv(net_dev);
+       mac_dev = priv->mac_dev;
+
+       netif_tx_stop_all_queues(net_dev);
+       /* Allow the Fman (Tx) port to process in-flight frames before we
+        * try switching it off.
+        */
+       usleep_range(5000, 10000);
+
+       err = mac_dev->stop(mac_dev);
+       if (err < 0)
+               netif_err(priv, ifdown, net_dev, "mac_dev->stop() = %d\n",
+                         err);
+
+       for (i = 0; i < ARRAY_SIZE(mac_dev->port); i++) {
+               error = fman_port_disable(mac_dev->port[i]);
+               if (error)
+                       err = error;
+       }
+
+       if (net_dev->phydev)
+               phy_disconnect(net_dev->phydev);
+       net_dev->phydev = NULL;
+
+       return err;
+}
+
+static void dpaa_tx_timeout(struct net_device *net_dev)
+{
+       struct dpaa_percpu_priv *percpu_priv;
+       const struct dpaa_priv  *priv;
+
+       priv = netdev_priv(net_dev);
+       percpu_priv = this_cpu_ptr(priv->percpu_priv);
+
+       netif_crit(priv, timer, net_dev, "Transmit timeout latency: %u ms\n",
+                  jiffies_to_msecs(jiffies - dev_trans_start(net_dev)));
+
+       percpu_priv->stats.tx_errors++;
+}
+
+/* Calculates the statistics for the given device by adding the statistics
+ * collected by each CPU.
+ */
+static struct rtnl_link_stats64 *dpaa_get_stats64(struct net_device *net_dev,
+                                                 struct rtnl_link_stats64 *s)
+{
+       int numstats = sizeof(struct rtnl_link_stats64) / sizeof(u64);
+       struct dpaa_priv *priv = netdev_priv(net_dev);
+       struct dpaa_percpu_priv *percpu_priv;
+       u64 *netstats = (u64 *)s;
+       u64 *cpustats;
+       int i, j;
+
+       for_each_possible_cpu(i) {
+               percpu_priv = per_cpu_ptr(priv->percpu_priv, i);
+
+               cpustats = (u64 *)&percpu_priv->stats;
+
+               /* add stats from all CPUs */
+               for (j = 0; j < numstats; j++)
+                       netstats[j] += cpustats[j];
+       }
+
+       return s;
+}
+
+static struct mac_device *dpaa_mac_dev_get(struct platform_device *pdev)
+{
+       struct platform_device *of_dev;
+       struct dpaa_eth_data *eth_data;
+       struct device *dpaa_dev, *dev;
+       struct device_node *mac_node;
+       struct mac_device *mac_dev;
+
+       dpaa_dev = &pdev->dev;
+       eth_data = dpaa_dev->platform_data;
+       if (!eth_data)
+               return ERR_PTR(-ENODEV);
+
+       mac_node = eth_data->mac_node;
+
+       of_dev = of_find_device_by_node(mac_node);
+       if (!of_dev) {
+               dev_err(dpaa_dev, "of_find_device_by_node(%s) failed\n",
+                       mac_node->full_name);
+               of_node_put(mac_node);
+               return ERR_PTR(-EINVAL);
+       }
+       of_node_put(mac_node);
+
+       dev = &of_dev->dev;
+
+       mac_dev = dev_get_drvdata(dev);
+       if (!mac_dev) {
+               dev_err(dpaa_dev, "dev_get_drvdata(%s) failed\n",
+                       dev_name(dev));
+               return ERR_PTR(-EINVAL);
+       }
+
+       return mac_dev;
+}
+
+static int dpaa_set_mac_address(struct net_device *net_dev, void *addr)
+{
+       const struct dpaa_priv *priv;
+       struct mac_device *mac_dev;
+       struct sockaddr old_addr;
+       int err;
+
+       priv = netdev_priv(net_dev);
+
+       memcpy(old_addr.sa_data, net_dev->dev_addr,  ETH_ALEN);
+
+       err = eth_mac_addr(net_dev, addr);
+       if (err < 0) {
+               netif_err(priv, drv, net_dev, "eth_mac_addr() = %d\n", err);
+               return err;
+       }
+
+       mac_dev = priv->mac_dev;
+
+       err = mac_dev->change_addr(mac_dev->fman_mac,
+                                  (enet_addr_t *)net_dev->dev_addr);
+       if (err < 0) {
+               netif_err(priv, drv, net_dev, "mac_dev->change_addr() = %d\n",
+                         err);
+               /* reverting to previous address */
+               eth_mac_addr(net_dev, &old_addr);
+
+               return err;
+       }
+
+       return 0;
+}
+
+static void dpaa_set_rx_mode(struct net_device *net_dev)
+{
+       const struct dpaa_priv  *priv;
+       int err;
+
+       priv = netdev_priv(net_dev);
+
+       if (!!(net_dev->flags & IFF_PROMISC) != priv->mac_dev->promisc) {
+               priv->mac_dev->promisc = !priv->mac_dev->promisc;
+               err = priv->mac_dev->set_promisc(priv->mac_dev->fman_mac,
+                                                priv->mac_dev->promisc);
+               if (err < 0)
+                       netif_err(priv, drv, net_dev,
+                                 "mac_dev->set_promisc() = %d\n",
+                                 err);
+       }
+
+       err = priv->mac_dev->set_multi(net_dev, priv->mac_dev);
+       if (err < 0)
+               netif_err(priv, drv, net_dev, "mac_dev->set_multi() = %d\n",
+                         err);
+}
+
+static struct dpaa_bp *dpaa_bpid2pool(int bpid)
+{
+       if (WARN_ON(bpid < 0 || bpid >= BM_MAX_NUM_OF_POOLS))
+               return NULL;
+
+       return dpaa_bp_array[bpid];
+}
+
+/* checks if this bpool is already allocated */
+static bool dpaa_bpid2pool_use(int bpid)
+{
+       if (dpaa_bpid2pool(bpid)) {
+               atomic_inc(&dpaa_bp_array[bpid]->refs);
+               return true;
+       }
+
+       return false;
+}
+
+/* called only once per bpid by dpaa_bp_alloc_pool() */
+static void dpaa_bpid2pool_map(int bpid, struct dpaa_bp *dpaa_bp)
+{
+       dpaa_bp_array[bpid] = dpaa_bp;
+       atomic_set(&dpaa_bp->refs, 1);
+}
+
+static int dpaa_bp_alloc_pool(struct dpaa_bp *dpaa_bp)
+{
+       int err;
+
+       if (dpaa_bp->size == 0 || dpaa_bp->config_count == 0) {
+               pr_err("%s: Buffer pool is not properly initialized! Missing size or initial number of buffers\n",
+                      __func__);
+               return -EINVAL;
+       }
+
+       /* If the pool is already specified, we only create one per bpid */
+       if (dpaa_bp->bpid != FSL_DPAA_BPID_INV &&
+           dpaa_bpid2pool_use(dpaa_bp->bpid))
+               return 0;
+
+       if (dpaa_bp->bpid == FSL_DPAA_BPID_INV) {
+               dpaa_bp->pool = bman_new_pool();
+               if (!dpaa_bp->pool) {
+                       pr_err("%s: bman_new_pool() failed\n",
+                              __func__);
+                       return -ENODEV;
+               }
+
+               dpaa_bp->bpid = (u8)bman_get_bpid(dpaa_bp->pool);
+       }
+
+       if (dpaa_bp->seed_cb) {
+               err = dpaa_bp->seed_cb(dpaa_bp);
+               if (err)
+                       goto pool_seed_failed;
+       }
+
+       dpaa_bpid2pool_map(dpaa_bp->bpid, dpaa_bp);
+
+       return 0;
+
+pool_seed_failed:
+       pr_err("%s: pool seeding failed\n", __func__);
+       bman_free_pool(dpaa_bp->pool);
+
+       return err;
+}
+
+/* remove and free all the buffers from the given buffer pool */
+static void dpaa_bp_drain(struct dpaa_bp *bp)
+{
+       u8 num = 8;
+       int ret;
+
+       do {
+               struct bm_buffer bmb[8];
+               int i;
+
+               ret = bman_acquire(bp->pool, bmb, num);
+               if (ret < 0) {
+                       if (num == 8) {
+                               /* we have less than 8 buffers left;
+                                * drain them one by one
+                                */
+                               num = 1;
+                               ret = 1;
+                               continue;
+                       } else {
+                               /* Pool is fully drained */
+                               break;
+                       }
+               }
+
+               if (bp->free_buf_cb)
+                       for (i = 0; i < num; i++)
+                               bp->free_buf_cb(bp, &bmb[i]);
+       } while (ret > 0);
+}
+
+static void dpaa_bp_free(struct dpaa_bp *dpaa_bp)
+{
+       struct dpaa_bp *bp = dpaa_bpid2pool(dpaa_bp->bpid);
+
+       /* the mapping between bpid and dpaa_bp is done very late in the
+        * allocation procedure; if something failed before the mapping, the bp
+        * was not configured, therefore we don't need the below instructions
+        */
+       if (!bp)
+               return;
+
+       if (!atomic_dec_and_test(&bp->refs))
+               return;
+
+       if (bp->free_buf_cb)
+               dpaa_bp_drain(bp);
+
+       dpaa_bp_array[bp->bpid] = NULL;
+       bman_free_pool(bp->pool);
+}
+
+static void dpaa_bps_free(struct dpaa_priv *priv)
+{
+       int i;
+
+       for (i = 0; i < DPAA_BPS_NUM; i++)
+               dpaa_bp_free(priv->dpaa_bps[i]);
+}
+
+/* Use multiple WQs for FQ assignment:
+ *     - Tx Confirmation queues go to WQ1.
+ *     - Rx Error and Tx Error queues go to WQ2 (giving them a better chance
+ *       to be scheduled, in case there are many more FQs in WQ3).
+ *     - Rx Default and Tx queues go to WQ3 (no differentiation between
+ *       Rx and Tx traffic).
+ * This ensures that Tx-confirmed buffers are timely released. In particular,
+ * it avoids congestion on the Tx Confirm FQs, which can pile up PFDRs if they
+ * are greatly outnumbered by other FQs in the system, while
+ * dequeue scheduling is round-robin.
+ */
+static inline void dpaa_assign_wq(struct dpaa_fq *fq)
+{
+       switch (fq->fq_type) {
+       case FQ_TYPE_TX_CONFIRM:
+       case FQ_TYPE_TX_CONF_MQ:
+               fq->wq = 1;
+               break;
+       case FQ_TYPE_RX_ERROR:
+       case FQ_TYPE_TX_ERROR:
+               fq->wq = 2;
+               break;
+       case FQ_TYPE_RX_DEFAULT:
+       case FQ_TYPE_TX:
+               fq->wq = 3;
+               break;
+       default:
+               WARN(1, "Invalid FQ type %d for FQID %d!\n",
+                    fq->fq_type, fq->fqid);
+       }
+}
+
+static struct dpaa_fq *dpaa_fq_alloc(struct device *dev,
+                                    u32 start, u32 count,
+                                    struct list_head *list,
+                                    enum dpaa_fq_type fq_type)
+{
+       struct dpaa_fq *dpaa_fq;
+       int i;
+
+       dpaa_fq = devm_kzalloc(dev, sizeof(*dpaa_fq) * count,
+                              GFP_KERNEL);
+       if (!dpaa_fq)
+               return NULL;
+
+       for (i = 0; i < count; i++) {
+               dpaa_fq[i].fq_type = fq_type;
+               dpaa_fq[i].fqid = start ? start + i : 0;
+               list_add_tail(&dpaa_fq[i].list, list);
+       }
+
+       for (i = 0; i < count; i++)
+               dpaa_assign_wq(dpaa_fq + i);
+
+       return dpaa_fq;
+}
+
+static int dpaa_alloc_all_fqs(struct device *dev, struct list_head *list,
+                             struct fm_port_fqs *port_fqs)
+{
+       struct dpaa_fq *dpaa_fq;
+
+       dpaa_fq = dpaa_fq_alloc(dev, 0, 1, list, FQ_TYPE_RX_ERROR);
+       if (!dpaa_fq)
+               goto fq_alloc_failed;
+
+       port_fqs->rx_errq = &dpaa_fq[0];
+
+       dpaa_fq = dpaa_fq_alloc(dev, 0, 1, list, FQ_TYPE_RX_DEFAULT);
+       if (!dpaa_fq)
+               goto fq_alloc_failed;
+
+       port_fqs->rx_defq = &dpaa_fq[0];
+
+       if (!dpaa_fq_alloc(dev, 0, DPAA_ETH_TXQ_NUM, list, FQ_TYPE_TX_CONF_MQ))
+               goto fq_alloc_failed;
+
+       dpaa_fq = dpaa_fq_alloc(dev, 0, 1, list, FQ_TYPE_TX_ERROR);
+       if (!dpaa_fq)
+               goto fq_alloc_failed;
+
+       port_fqs->tx_errq = &dpaa_fq[0];
+
+       dpaa_fq = dpaa_fq_alloc(dev, 0, 1, list, FQ_TYPE_TX_CONFIRM);
+       if (!dpaa_fq)
+               goto fq_alloc_failed;
+
+       port_fqs->tx_defq = &dpaa_fq[0];
+
+       if (!dpaa_fq_alloc(dev, 0, DPAA_ETH_TXQ_NUM, list, FQ_TYPE_TX))
+               goto fq_alloc_failed;
+
+       return 0;
+
+fq_alloc_failed:
+       dev_err(dev, "dpaa_fq_alloc() failed\n");
+       return -ENOMEM;
+}
+
+static u32 rx_pool_channel;
+static DEFINE_SPINLOCK(rx_pool_channel_init);
+
+static int dpaa_get_channel(void)
+{
+       spin_lock(&rx_pool_channel_init);
+       if (!rx_pool_channel) {
+               u32 pool;
+               int ret;
+
+               ret = qman_alloc_pool(&pool);
+
+               if (!ret)
+                       rx_pool_channel = pool;
+       }
+       spin_unlock(&rx_pool_channel_init);
+       if (!rx_pool_channel)
+               return -ENOMEM;
+       return rx_pool_channel;
+}
+
+static void dpaa_release_channel(void)
+{
+       qman_release_pool(rx_pool_channel);
+}
+
+static void dpaa_eth_add_channel(u16 channel)
+{
+       u32 pool = QM_SDQCR_CHANNELS_POOL_CONV(channel);
+       const cpumask_t *cpus = qman_affine_cpus();
+       struct qman_portal *portal;
+       int cpu;
+
+       for_each_cpu(cpu, cpus) {
+               portal = qman_get_affine_portal(cpu);
+               qman_p_static_dequeue_add(portal, pool);
+       }
+}
+
+/* Congestion group state change notification callback.
+ * Stops the device's egress queues while they are congested and
+ * wakes them upon exiting congested state.
+ * Also updates some CGR-related stats.
+ */
+static void dpaa_eth_cgscn(struct qman_portal *qm, struct qman_cgr *cgr,
+                          int congested)
+{
+       struct dpaa_priv *priv = (struct dpaa_priv *)container_of(cgr,
+               struct dpaa_priv, cgr_data.cgr);
+
+       if (congested) {
+               priv->cgr_data.congestion_start_jiffies = jiffies;
+               netif_tx_stop_all_queues(priv->net_dev);
+               priv->cgr_data.cgr_congested_count++;
+       } else {
+               priv->cgr_data.congested_jiffies +=
+                       (jiffies - priv->cgr_data.congestion_start_jiffies);
+               netif_tx_wake_all_queues(priv->net_dev);
+       }
+}
+
+static int dpaa_eth_cgr_init(struct dpaa_priv *priv)
+{
+       struct qm_mcc_initcgr initcgr;
+       u32 cs_th;
+       int err;
+
+       err = qman_alloc_cgrid(&priv->cgr_data.cgr.cgrid);
+       if (err < 0) {
+               if (netif_msg_drv(priv))
+                       pr_err("%s: Error %d allocating CGR ID\n",
+                              __func__, err);
+               goto out_error;
+       }
+       priv->cgr_data.cgr.cb = dpaa_eth_cgscn;
+
+       /* Enable Congestion State Change Notifications and CS taildrop */
+       initcgr.we_mask = QM_CGR_WE_CSCN_EN | QM_CGR_WE_CS_THRES;
+       initcgr.cgr.cscn_en = QM_CGR_EN;
+
+       /* Set different thresholds based on the MAC speed.
+        * This may turn suboptimal if the MAC is reconfigured at a speed
+        * lower than its max, e.g. if a dTSEC later negotiates a 100Mbps link.
+        * In such cases, we ought to reconfigure the threshold, too.
+        */
+       if (priv->mac_dev->if_support & SUPPORTED_10000baseT_Full)
+               cs_th = DPAA_CS_THRESHOLD_10G;
+       else
+               cs_th = DPAA_CS_THRESHOLD_1G;
+       qm_cgr_cs_thres_set64(&initcgr.cgr.cs_thres, cs_th, 1);
+
+       initcgr.we_mask |= QM_CGR_WE_CSTD_EN;
+       initcgr.cgr.cstd_en = QM_CGR_EN;
+
+       err = qman_create_cgr(&priv->cgr_data.cgr, QMAN_CGR_FLAG_USE_INIT,
+                             &initcgr);
+       if (err < 0) {
+               if (netif_msg_drv(priv))
+                       pr_err("%s: Error %d creating CGR with ID %d\n",
+                              __func__, err, priv->cgr_data.cgr.cgrid);
+               qman_release_cgrid(priv->cgr_data.cgr.cgrid);
+               goto out_error;
+       }
+       if (netif_msg_drv(priv))
+               pr_debug("Created CGR %d for netdev with hwaddr %pM on QMan channel %d\n",
+                        priv->cgr_data.cgr.cgrid, priv->mac_dev->addr,
+                        priv->cgr_data.cgr.chan);
+
+out_error:
+       return err;
+}
+
+static inline void dpaa_setup_ingress(const struct dpaa_priv *priv,
+                                     struct dpaa_fq *fq,
+                                     const struct qman_fq *template)
+{
+       fq->fq_base = *template;
+       fq->net_dev = priv->net_dev;
+
+       fq->flags = QMAN_FQ_FLAG_NO_ENQUEUE;
+       fq->channel = priv->channel;
+}
+
+static inline void dpaa_setup_egress(const struct dpaa_priv *priv,
+                                    struct dpaa_fq *fq,
+                                    struct fman_port *port,
+                                    const struct qman_fq *template)
+{
+       fq->fq_base = *template;
+       fq->net_dev = priv->net_dev;
+
+       if (port) {
+               fq->flags = QMAN_FQ_FLAG_TO_DCPORTAL;
+               fq->channel = (u16)fman_port_get_qman_channel_id(port);
+       } else {
+               fq->flags = QMAN_FQ_FLAG_NO_MODIFY;
+       }
+}
+
+static void dpaa_fq_setup(struct dpaa_priv *priv,
+                         const struct dpaa_fq_cbs *fq_cbs,
+                         struct fman_port *tx_port)
+{
+       int egress_cnt = 0, conf_cnt = 0, num_portals = 0, cpu;
+       const cpumask_t *affine_cpus = qman_affine_cpus();
+       u16 portals[NR_CPUS];
+       struct dpaa_fq *fq;
+
+       for_each_cpu(cpu, affine_cpus)
+               portals[num_portals++] = qman_affine_channel(cpu);
+       if (num_portals == 0)
+               dev_err(priv->net_dev->dev.parent,
+                       "No Qman software (affine) channels found");
+
+       /* Initialize each FQ in the list */
+       list_for_each_entry(fq, &priv->dpaa_fq_list, list) {
+               switch (fq->fq_type) {
+               case FQ_TYPE_RX_DEFAULT:
+                       dpaa_setup_ingress(priv, fq, &fq_cbs->rx_defq);
+                       break;
+               case FQ_TYPE_RX_ERROR:
+                       dpaa_setup_ingress(priv, fq, &fq_cbs->rx_errq);
+                       break;
+               case FQ_TYPE_TX:
+                       dpaa_setup_egress(priv, fq, tx_port,
+                                         &fq_cbs->egress_ern);
+                       /* If we have more Tx queues than the number of cores,
+                        * just ignore the extra ones.
+                        */
+                       if (egress_cnt < DPAA_ETH_TXQ_NUM)
+                               priv->egress_fqs[egress_cnt++] = &fq->fq_base;
+                       break;
+               case FQ_TYPE_TX_CONF_MQ:
+                       priv->conf_fqs[conf_cnt++] = &fq->fq_base;
+                       /* fall through */
+               case FQ_TYPE_TX_CONFIRM:
+                       dpaa_setup_ingress(priv, fq, &fq_cbs->tx_defq);
+                       break;
+               case FQ_TYPE_TX_ERROR:
+                       dpaa_setup_ingress(priv, fq, &fq_cbs->tx_errq);
+                       break;
+               default:
+                       dev_warn(priv->net_dev->dev.parent,
+                                "Unknown FQ type detected!\n");
+                       break;
+               }
+       }
+
+        /* Make sure all CPUs receive a corresponding Tx queue. */
+       while (egress_cnt < DPAA_ETH_TXQ_NUM) {
+               list_for_each_entry(fq, &priv->dpaa_fq_list, list) {
+                       if (fq->fq_type != FQ_TYPE_TX)
+                               continue;
+                       priv->egress_fqs[egress_cnt++] = &fq->fq_base;
+                       if (egress_cnt == DPAA_ETH_TXQ_NUM)
+                               break;
+               }
+       }
+}
+
+static inline int dpaa_tx_fq_to_id(const struct dpaa_priv *priv,
+                                  struct qman_fq *tx_fq)
+{
+       int i;
+
+       for (i = 0; i < DPAA_ETH_TXQ_NUM; i++)
+               if (priv->egress_fqs[i] == tx_fq)
+                       return i;
+
+       return -EINVAL;
+}
+
+static int dpaa_fq_init(struct dpaa_fq *dpaa_fq, bool td_enable)
+{
+       const struct dpaa_priv  *priv;
+       struct qman_fq *confq = NULL;
+       struct qm_mcc_initfq initfq;
+       struct device *dev;
+       struct qman_fq *fq;
+       int queue_id;
+       int err;
+
+       priv = netdev_priv(dpaa_fq->net_dev);
+       dev = dpaa_fq->net_dev->dev.parent;
+
+       if (dpaa_fq->fqid == 0)
+               dpaa_fq->flags |= QMAN_FQ_FLAG_DYNAMIC_FQID;
+
+       dpaa_fq->init = !(dpaa_fq->flags & QMAN_FQ_FLAG_NO_MODIFY);
+
+       err = qman_create_fq(dpaa_fq->fqid, dpaa_fq->flags, &dpaa_fq->fq_base);
+       if (err) {
+               dev_err(dev, "qman_create_fq() failed\n");
+               return err;
+       }
+       fq = &dpaa_fq->fq_base;
+
+       if (dpaa_fq->init) {
+               memset(&initfq, 0, sizeof(initfq));
+
+               initfq.we_mask = QM_INITFQ_WE_FQCTRL;
+               /* Note: we may get to keep an empty FQ in cache */
+               initfq.fqd.fq_ctrl = QM_FQCTRL_PREFERINCACHE;
+
+               /* Try to reduce the number of portal interrupts for
+                * Tx Confirmation FQs.
+                */
+               if (dpaa_fq->fq_type == FQ_TYPE_TX_CONFIRM)
+                       initfq.fqd.fq_ctrl |= QM_FQCTRL_HOLDACTIVE;
+
+               /* FQ placement */
+               initfq.we_mask |= QM_INITFQ_WE_DESTWQ;
+
+               qm_fqd_set_destwq(&initfq.fqd, dpaa_fq->channel, dpaa_fq->wq);
+
+               /* Put all egress queues in a congestion group of their own.
+                * Sensu stricto, the Tx confirmation queues are Rx FQs,
+                * rather than Tx - but they nonetheless account for the
+                * memory footprint on behalf of egress traffic. We therefore
+                * place them in the netdev's CGR, along with the Tx FQs.
+                */
+               if (dpaa_fq->fq_type == FQ_TYPE_TX ||
+                   dpaa_fq->fq_type == FQ_TYPE_TX_CONFIRM ||
+                   dpaa_fq->fq_type == FQ_TYPE_TX_CONF_MQ) {
+                       initfq.we_mask |= QM_INITFQ_WE_CGID;
+                       initfq.fqd.fq_ctrl |= QM_FQCTRL_CGE;
+                       initfq.fqd.cgid = (u8)priv->cgr_data.cgr.cgrid;
+                       /* Set a fixed overhead accounting, in an attempt to
+                        * reduce the impact of fixed-size skb shells and the
+                        * driver's needed headroom on system memory. This is
+                        * especially the case when the egress traffic is
+                        * composed of small datagrams.
+                        * Unfortunately, QMan's OAL value is capped to an
+                        * insufficient value, but even that is better than
+                        * no overhead accounting at all.
+                        */
+                       initfq.we_mask |= QM_INITFQ_WE_OAC;
+                       qm_fqd_set_oac(&initfq.fqd, QM_OAC_CG);
+                       qm_fqd_set_oal(&initfq.fqd,
+                                      min(sizeof(struct sk_buff) +
+                                      priv->tx_headroom,
+                                      (size_t)FSL_QMAN_MAX_OAL));
+               }
+
+               if (td_enable) {
+                       initfq.we_mask |= QM_INITFQ_WE_TDTHRESH;
+                       qm_fqd_set_taildrop(&initfq.fqd, DPAA_FQ_TD, 1);
+                       initfq.fqd.fq_ctrl = QM_FQCTRL_TDE;
+               }
+
+               if (dpaa_fq->fq_type == FQ_TYPE_TX) {
+                       queue_id = dpaa_tx_fq_to_id(priv, &dpaa_fq->fq_base);
+                       if (queue_id >= 0)
+                               confq = priv->conf_fqs[queue_id];
+                       if (confq) {
+                               initfq.we_mask |= QM_INITFQ_WE_CONTEXTA;
+                       /* ContextA: OVOM=1(use contextA2 bits instead of ICAD)
+                        *           A2V=1 (contextA A2 field is valid)
+                        *           A0V=1 (contextA A0 field is valid)
+                        *           B0V=1 (contextB field is valid)
+                        * ContextA A2: EBD=1 (deallocate buffers inside FMan)
+                        * ContextB B0(ASPID): 0 (absolute Virtual Storage ID)
+                        */
+                               initfq.fqd.context_a.hi = 0x1e000000;
+                               initfq.fqd.context_a.lo = 0x80000000;
+                       }
+               }
+
+               /* Put all the ingress queues in our "ingress CGR". */
+               if (priv->use_ingress_cgr &&
+                   (dpaa_fq->fq_type == FQ_TYPE_RX_DEFAULT ||
+                    dpaa_fq->fq_type == FQ_TYPE_RX_ERROR)) {
+                       initfq.we_mask |= QM_INITFQ_WE_CGID;
+                       initfq.fqd.fq_ctrl |= QM_FQCTRL_CGE;
+                       initfq.fqd.cgid = (u8)priv->ingress_cgr.cgrid;
+                       /* Set a fixed overhead accounting, just like for the
+                        * egress CGR.
+                        */
+                       initfq.we_mask |= QM_INITFQ_WE_OAC;
+                       qm_fqd_set_oac(&initfq.fqd, QM_OAC_CG);
+                       qm_fqd_set_oal(&initfq.fqd,
+                                      min(sizeof(struct sk_buff) +
+                                      priv->tx_headroom,
+                                      (size_t)FSL_QMAN_MAX_OAL));
+               }
+
+               /* Initialization common to all ingress queues */
+               if (dpaa_fq->flags & QMAN_FQ_FLAG_NO_ENQUEUE) {
+                       initfq.we_mask |= QM_INITFQ_WE_CONTEXTA;
+                       initfq.fqd.fq_ctrl |=
+                               QM_FQCTRL_HOLDACTIVE;
+                       initfq.fqd.context_a.stashing.exclusive =
+                               QM_STASHING_EXCL_DATA | QM_STASHING_EXCL_CTX |
+                               QM_STASHING_EXCL_ANNOTATION;
+                       qm_fqd_set_stashing(&initfq.fqd, 1, 2,
+                                           DIV_ROUND_UP(sizeof(struct qman_fq),
+                                                        64));
+               }
+
+               err = qman_init_fq(fq, QMAN_INITFQ_FLAG_SCHED, &initfq);
+               if (err < 0) {
+                       dev_err(dev, "qman_init_fq(%u) = %d\n",
+                               qman_fq_fqid(fq), err);
+                       qman_destroy_fq(fq);
+                       return err;
+               }
+       }
+
+       dpaa_fq->fqid = qman_fq_fqid(fq);
+
+       return 0;
+}
+
+static int dpaa_fq_free_entry(struct device *dev, struct qman_fq *fq)
+{
+       const struct dpaa_priv  *priv;
+       struct dpaa_fq *dpaa_fq;
+       int err, error;
+
+       err = 0;
+
+       dpaa_fq = container_of(fq, struct dpaa_fq, fq_base);
+       priv = netdev_priv(dpaa_fq->net_dev);
+
+       if (dpaa_fq->init) {
+               err = qman_retire_fq(fq, NULL);
+               if (err < 0 && netif_msg_drv(priv))
+                       dev_err(dev, "qman_retire_fq(%u) = %d\n",
+                               qman_fq_fqid(fq), err);
+
+               error = qman_oos_fq(fq);
+               if (error < 0 && netif_msg_drv(priv)) {
+                       dev_err(dev, "qman_oos_fq(%u) = %d\n",
+                               qman_fq_fqid(fq), error);
+                       if (err >= 0)
+                               err = error;
+               }
+       }
+
+       qman_destroy_fq(fq);
+       list_del(&dpaa_fq->list);
+
+       return err;
+}
+
+static int dpaa_fq_free(struct device *dev, struct list_head *list)
+{
+       struct dpaa_fq *dpaa_fq, *tmp;
+       int err, error;
+
+       err = 0;
+       list_for_each_entry_safe(dpaa_fq, tmp, list, list) {
+               error = dpaa_fq_free_entry(dev, (struct qman_fq *)dpaa_fq);
+               if (error < 0 && err >= 0)
+                       err = error;
+       }
+
+       return err;
+}
+
+static void dpaa_eth_init_tx_port(struct fman_port *port, struct dpaa_fq *errq,
+                                 struct dpaa_fq *defq,
+                                 struct dpaa_buffer_layout *buf_layout)
+{
+       struct fman_buffer_prefix_content buf_prefix_content;
+       struct fman_port_params params;
+       int err;
+
+       memset(&params, 0, sizeof(params));
+       memset(&buf_prefix_content, 0, sizeof(buf_prefix_content));
+
+       buf_prefix_content.priv_data_size = buf_layout->priv_data_size;
+       buf_prefix_content.pass_prs_result = true;
+       buf_prefix_content.pass_hash_result = true;
+       buf_prefix_content.pass_time_stamp = false;
+       buf_prefix_content.data_align = DPAA_FD_DATA_ALIGNMENT;
+
+       params.specific_params.non_rx_params.err_fqid = errq->fqid;
+       params.specific_params.non_rx_params.dflt_fqid = defq->fqid;
+
+       err = fman_port_config(port, &params);
+       if (err)
+               pr_err("%s: fman_port_config failed\n", __func__);
+
+       err = fman_port_cfg_buf_prefix_content(port, &buf_prefix_content);
+       if (err)
+               pr_err("%s: fman_port_cfg_buf_prefix_content failed\n",
+                      __func__);
+
+       err = fman_port_init(port);
+       if (err)
+               pr_err("%s: fm_port_init failed\n", __func__);
+}
+
+static void dpaa_eth_init_rx_port(struct fman_port *port, struct dpaa_bp **bps,
+                                 size_t count, struct dpaa_fq *errq,
+                                 struct dpaa_fq *defq,
+                                 struct dpaa_buffer_layout *buf_layout)
+{
+       struct fman_buffer_prefix_content buf_prefix_content;
+       struct fman_port_rx_params *rx_p;
+       struct fman_port_params params;
+       int i, err;
+
+       memset(&params, 0, sizeof(params));
+       memset(&buf_prefix_content, 0, sizeof(buf_prefix_content));
+
+       buf_prefix_content.priv_data_size = buf_layout->priv_data_size;
+       buf_prefix_content.pass_prs_result = true;
+       buf_prefix_content.pass_hash_result = true;
+       buf_prefix_content.pass_time_stamp = false;
+       buf_prefix_content.data_align = DPAA_FD_DATA_ALIGNMENT;
+
+       rx_p = &params.specific_params.rx_params;
+       rx_p->err_fqid = errq->fqid;
+       rx_p->dflt_fqid = defq->fqid;
+
+       count = min(ARRAY_SIZE(rx_p->ext_buf_pools.ext_buf_pool), count);
+       rx_p->ext_buf_pools.num_of_pools_used = (u8)count;
+       for (i = 0; i < count; i++) {
+               rx_p->ext_buf_pools.ext_buf_pool[i].id =  bps[i]->bpid;
+               rx_p->ext_buf_pools.ext_buf_pool[i].size = (u16)bps[i]->size;
+       }
+
+       err = fman_port_config(port, &params);
+       if (err)
+               pr_err("%s: fman_port_config failed\n", __func__);
+
+       err = fman_port_cfg_buf_prefix_content(port, &buf_prefix_content);
+       if (err)
+               pr_err("%s: fman_port_cfg_buf_prefix_content failed\n",
+                      __func__);
+
+       err = fman_port_init(port);
+       if (err)
+               pr_err("%s: fm_port_init failed\n", __func__);
+}
+
+static void dpaa_eth_init_ports(struct mac_device *mac_dev,
+                               struct dpaa_bp **bps, size_t count,
+                               struct fm_port_fqs *port_fqs,
+                               struct dpaa_buffer_layout *buf_layout,
+                               struct device *dev)
+{
+       struct fman_port *rxport = mac_dev->port[RX];
+       struct fman_port *txport = mac_dev->port[TX];
+
+       dpaa_eth_init_tx_port(txport, port_fqs->tx_errq,
+                             port_fqs->tx_defq, &buf_layout[TX]);
+       dpaa_eth_init_rx_port(rxport, bps, count, port_fqs->rx_errq,
+                             port_fqs->rx_defq, &buf_layout[RX]);
+}
+
+static int dpaa_bman_release(const struct dpaa_bp *dpaa_bp,
+                            struct bm_buffer *bmb, int cnt)
+{
+       int err;
+
+       err = bman_release(dpaa_bp->pool, bmb, cnt);
+       /* Should never occur, address anyway to avoid leaking the buffers */
+       if (unlikely(WARN_ON(err)) && dpaa_bp->free_buf_cb)
+               while (cnt-- > 0)
+                       dpaa_bp->free_buf_cb(dpaa_bp, &bmb[cnt]);
+
+       return cnt;
+}
+
+static void dpaa_release_sgt_members(struct qm_sg_entry *sgt)
+{
+       struct bm_buffer bmb[DPAA_BUFF_RELEASE_MAX];
+       struct dpaa_bp *dpaa_bp;
+       int i = 0, j;
+
+       memset(bmb, 0, sizeof(bmb));
+
+       do {
+               dpaa_bp = dpaa_bpid2pool(sgt[i].bpid);
+               if (!dpaa_bp)
+                       return;
+
+               j = 0;
+               do {
+                       WARN_ON(qm_sg_entry_is_ext(&sgt[i]));
+
+                       bm_buffer_set64(&bmb[j], qm_sg_entry_get64(&sgt[i]));
+
+                       j++; i++;
+               } while (j < ARRAY_SIZE(bmb) &&
+                               !qm_sg_entry_is_final(&sgt[i - 1]) &&
+                               sgt[i - 1].bpid == sgt[i].bpid);
+
+               dpaa_bman_release(dpaa_bp, bmb, j);
+       } while (!qm_sg_entry_is_final(&sgt[i - 1]));
+}
+
+static void dpaa_fd_release(const struct net_device *net_dev,
+                           const struct qm_fd *fd)
+{
+       struct qm_sg_entry *sgt;
+       struct dpaa_bp *dpaa_bp;
+       struct bm_buffer bmb;
+       dma_addr_t addr;
+       void *vaddr;
+
+       bmb.data = 0;
+       bm_buffer_set64(&bmb, qm_fd_addr(fd));
+
+       dpaa_bp = dpaa_bpid2pool(fd->bpid);
+       if (!dpaa_bp)
+               return;
+
+       if (qm_fd_get_format(fd) == qm_fd_sg) {
+               vaddr = phys_to_virt(qm_fd_addr(fd));
+               sgt = vaddr + qm_fd_get_offset(fd);
+
+               dma_unmap_single(dpaa_bp->dev, qm_fd_addr(fd), dpaa_bp->size,
+                                DMA_FROM_DEVICE);
+
+               dpaa_release_sgt_members(sgt);
+
+               addr = dma_map_single(dpaa_bp->dev, vaddr, dpaa_bp->size,
+                                     DMA_FROM_DEVICE);
+               if (dma_mapping_error(dpaa_bp->dev, addr)) {
+                       dev_err(dpaa_bp->dev, "DMA mapping failed");
+                       return;
+               }
+               bm_buffer_set64(&bmb, addr);
+       }
+
+       dpaa_bman_release(dpaa_bp, &bmb, 1);
+}
+
+static void count_ern(struct dpaa_percpu_priv *percpu_priv,
+                     const union qm_mr_entry *msg)
+{
+       switch (msg->ern.rc & QM_MR_RC_MASK) {
+       case QM_MR_RC_CGR_TAILDROP:
+               percpu_priv->ern_cnt.cg_tdrop++;
+               break;
+       case QM_MR_RC_WRED:
+               percpu_priv->ern_cnt.wred++;
+               break;
+       case QM_MR_RC_ERROR:
+               percpu_priv->ern_cnt.err_cond++;
+               break;
+       case QM_MR_RC_ORPWINDOW_EARLY:
+               percpu_priv->ern_cnt.early_window++;
+               break;
+       case QM_MR_RC_ORPWINDOW_LATE:
+               percpu_priv->ern_cnt.late_window++;
+               break;
+       case QM_MR_RC_FQ_TAILDROP:
+               percpu_priv->ern_cnt.fq_tdrop++;
+               break;
+       case QM_MR_RC_ORPWINDOW_RETIRED:
+               percpu_priv->ern_cnt.fq_retired++;
+               break;
+       case QM_MR_RC_ORP_ZERO:
+               percpu_priv->ern_cnt.orp_zero++;
+               break;
+       }
+}
+
+/* Turn on HW checksum computation for this outgoing frame.
+ * If the current protocol is not something we support in this regard
+ * (or if the stack has already computed the SW checksum), we do nothing.
+ *
+ * Returns 0 if all goes well (or HW csum doesn't apply), and a negative value
+ * otherwise.
+ *
+ * Note that this function may modify the fd->cmd field and the skb data buffer
+ * (the Parse Results area).
+ */
+static int dpaa_enable_tx_csum(struct dpaa_priv *priv,
+                              struct sk_buff *skb,
+                              struct qm_fd *fd,
+                              char *parse_results)
+{
+       struct fman_prs_result *parse_result;
+       u16 ethertype = ntohs(skb->protocol);
+       struct ipv6hdr *ipv6h = NULL;
+       struct iphdr *iph;
+       int retval = 0;
+       u8 l4_proto;
+
+       if (skb->ip_summed != CHECKSUM_PARTIAL)
+               return 0;
+
+       /* Note: L3 csum seems to be already computed in sw, but we can't choose
+        * L4 alone from the FM configuration anyway.
+        */
+
+       /* Fill in some fields of the Parse Results array, so the FMan
+        * can find them as if they came from the FMan Parser.
+        */
+       parse_result = (struct fman_prs_result *)parse_results;
+
+       /* If we're dealing with VLAN, get the real Ethernet type */
+       if (ethertype == ETH_P_8021Q) {
+               /* We can't always assume the MAC header is set correctly
+                * by the stack, so reset to beginning of skb->data
+                */
+               skb_reset_mac_header(skb);
+               ethertype = ntohs(vlan_eth_hdr(skb)->h_vlan_encapsulated_proto);
+       }
+
+       /* Fill in the relevant L3 parse result fields
+        * and read the L4 protocol type
+        */
+       switch (ethertype) {
+       case ETH_P_IP:
+               parse_result->l3r = cpu_to_be16(FM_L3_PARSE_RESULT_IPV4);
+               iph = ip_hdr(skb);
+               WARN_ON(!iph);
+               l4_proto = iph->protocol;
+               break;
+       case ETH_P_IPV6:
+               parse_result->l3r = cpu_to_be16(FM_L3_PARSE_RESULT_IPV6);
+               ipv6h = ipv6_hdr(skb);
+               WARN_ON(!ipv6h);
+               l4_proto = ipv6h->nexthdr;
+               break;
+       default:
+               /* We shouldn't even be here */
+               if (net_ratelimit())
+                       netif_alert(priv, tx_err, priv->net_dev,
+                                   "Can't compute HW csum for L3 proto 0x%x\n",
+                                   ntohs(skb->protocol));
+               retval = -EIO;
+               goto return_error;
+       }
+
+       /* Fill in the relevant L4 parse result fields */
+       switch (l4_proto) {
+       case IPPROTO_UDP:
+               parse_result->l4r = FM_L4_PARSE_RESULT_UDP;
+               break;
+       case IPPROTO_TCP:
+               parse_result->l4r = FM_L4_PARSE_RESULT_TCP;
+               break;
+       default:
+               if (net_ratelimit())
+                       netif_alert(priv, tx_err, priv->net_dev,
+                                   "Can't compute HW csum for L4 proto 0x%x\n",
+                                   l4_proto);
+               retval = -EIO;
+               goto return_error;
+       }
+
+       /* At index 0 is IPOffset_1 as defined in the Parse Results */
+       parse_result->ip_off[0] = (u8)skb_network_offset(skb);
+       parse_result->l4_off = (u8)skb_transport_offset(skb);
+
+       /* Enable L3 (and L4, if TCP or UDP) HW checksum. */
+       fd->cmd |= FM_FD_CMD_RPD | FM_FD_CMD_DTC;
+
+       /* On P1023 and similar platforms fd->cmd interpretation could
+        * be disabled by setting CONTEXT_A bit ICMD; currently this bit
+        * is not set so we do not need to check; in the future, if/when
+        * using context_a we need to check this bit
+        */
+
+return_error:
+       return retval;
+}
+
+static int dpaa_bp_add_8_bufs(const struct dpaa_bp *dpaa_bp)
+{
+       struct device *dev = dpaa_bp->dev;
+       struct bm_buffer bmb[8];
+       dma_addr_t addr;
+       void *new_buf;
+       u8 i;
+
+       for (i = 0; i < 8; i++) {
+               new_buf = netdev_alloc_frag(dpaa_bp->raw_size);
+               if (unlikely(!new_buf)) {
+                       dev_err(dev, "netdev_alloc_frag() failed, size %zu\n",
+                               dpaa_bp->raw_size);
+                       goto release_previous_buffs;
+               }
+               new_buf = PTR_ALIGN(new_buf, SMP_CACHE_BYTES);
+
+               addr = dma_map_single(dev, new_buf,
+                                     dpaa_bp->size, DMA_FROM_DEVICE);
+               if (unlikely(dma_mapping_error(dev, addr))) {
+                       dev_err(dpaa_bp->dev, "DMA map failed");
+                       goto release_previous_buffs;
+               }
+
+               bmb[i].data = 0;
+               bm_buffer_set64(&bmb[i], addr);
+       }
+
+release_bufs:
+       return dpaa_bman_release(dpaa_bp, bmb, i);
+
+release_previous_buffs:
+       WARN_ONCE(1, "dpaa_eth: failed to add buffers on Rx\n");
+
+       bm_buffer_set64(&bmb[i], 0);
+       /* Avoid releasing a completely null buffer; bman_release() requires
+        * at least one buffer.
+        */
+       if (likely(i))
+               goto release_bufs;
+
+       return 0;
+}
+
+static int dpaa_bp_seed(struct dpaa_bp *dpaa_bp)
+{
+       int i;
+
+       /* Give each CPU an allotment of "config_count" buffers */
+       for_each_possible_cpu(i) {
+               int *count_ptr = per_cpu_ptr(dpaa_bp->percpu_count, i);
+               int j;
+
+               /* Although we access another CPU's counters here
+                * we do it at boot time so it is safe
+                */
+               for (j = 0; j < dpaa_bp->config_count; j += 8)
+                       *count_ptr += dpaa_bp_add_8_bufs(dpaa_bp);
+       }
+       return 0;
+}
+
+/* Add buffers/(pages) for Rx processing whenever bpool count falls below
+ * REFILL_THRESHOLD.
+ */
+static int dpaa_eth_refill_bpool(struct dpaa_bp *dpaa_bp, int *countptr)
+{
+       int count = *countptr;
+       int new_bufs;
+
+       if (unlikely(count < FSL_DPAA_ETH_REFILL_THRESHOLD)) {
+               do {
+                       new_bufs = dpaa_bp_add_8_bufs(dpaa_bp);
+                       if (unlikely(!new_bufs)) {
+                               /* Avoid looping forever if we've temporarily
+                                * run out of memory. We'll try again at the
+                                * next NAPI cycle.
+                                */
+                               break;
+                       }
+                       count += new_bufs;
+               } while (count < FSL_DPAA_ETH_MAX_BUF_COUNT);
+
+               *countptr = count;
+               if (unlikely(count < FSL_DPAA_ETH_MAX_BUF_COUNT))
+                       return -ENOMEM;
+       }
+
+       return 0;
+}
+
+static int dpaa_eth_refill_bpools(struct dpaa_priv *priv)
+{
+       struct dpaa_bp *dpaa_bp;
+       int *countptr;
+       int res, i;
+
+       for (i = 0; i < DPAA_BPS_NUM; i++) {
+               dpaa_bp = priv->dpaa_bps[i];
+               if (!dpaa_bp)
+                       return -EINVAL;
+               countptr = this_cpu_ptr(dpaa_bp->percpu_count);
+               res  = dpaa_eth_refill_bpool(dpaa_bp, countptr);
+               if (res)
+                       return res;
+       }
+       return 0;
+}
+
+/* Cleanup function for outgoing frame descriptors that were built on Tx path,
+ * either contiguous frames or scatter/gather ones.
+ * Skb freeing is not handled here.
+ *
+ * This function may be called on error paths in the Tx function, so guard
+ * against cases when not all fd relevant fields were filled in.
+ *
+ * Return the skb backpointer, since for S/G frames the buffer containing it
+ * gets freed here.
+ */
+static struct sk_buff *dpaa_cleanup_tx_fd(const struct dpaa_priv *priv,
+                                         const struct qm_fd *fd)
+{
+       const enum dma_data_direction dma_dir = DMA_TO_DEVICE;
+       struct device *dev = priv->net_dev->dev.parent;
+       dma_addr_t addr = qm_fd_addr(fd);
+       const struct qm_sg_entry *sgt;
+       struct sk_buff **skbh, *skb;
+       int nr_frags, i;
+
+       skbh = (struct sk_buff **)phys_to_virt(addr);
+       skb = *skbh;
+
+       if (unlikely(qm_fd_get_format(fd) == qm_fd_sg)) {
+               nr_frags = skb_shinfo(skb)->nr_frags;
+               dma_unmap_single(dev, addr, qm_fd_get_offset(fd) +
+                                sizeof(struct qm_sg_entry) * (1 + nr_frags),
+                                dma_dir);
+
+               /* The sgt buffer has been allocated with netdev_alloc_frag(),
+                * it's from lowmem.
+                */
+               sgt = phys_to_virt(addr + qm_fd_get_offset(fd));
+
+               /* sgt[0] is from lowmem, was dma_map_single()-ed */
+               dma_unmap_single(dev, qm_sg_addr(&sgt[0]),
+                                qm_sg_entry_get_len(&sgt[0]), dma_dir);
+
+               /* remaining pages were mapped with skb_frag_dma_map() */
+               for (i = 1; i < nr_frags; i++) {
+                       WARN_ON(qm_sg_entry_is_ext(&sgt[i]));
+
+                       dma_unmap_page(dev, qm_sg_addr(&sgt[i]),
+                                      qm_sg_entry_get_len(&sgt[i]), dma_dir);
+               }
+
+               /* Free the page frag that we allocated on Tx */
+               skb_free_frag(phys_to_virt(addr));
+       } else {
+               dma_unmap_single(dev, addr,
+                                skb_tail_pointer(skb) - (u8 *)skbh, dma_dir);
+       }
+
+       return skb;
+}
+
+/* Build a linear skb around the received buffer.
+ * We are guaranteed there is enough room at the end of the data buffer to
+ * accommodate the shared info area of the skb.
+ */
+static struct sk_buff *contig_fd_to_skb(const struct dpaa_priv *priv,
+                                       const struct qm_fd *fd)
+{
+       ssize_t fd_off = qm_fd_get_offset(fd);
+       dma_addr_t addr = qm_fd_addr(fd);
+       struct dpaa_bp *dpaa_bp;
+       struct sk_buff *skb;
+       void *vaddr;
+
+       vaddr = phys_to_virt(addr);
+       WARN_ON(!IS_ALIGNED((unsigned long)vaddr, SMP_CACHE_BYTES));
+
+       dpaa_bp = dpaa_bpid2pool(fd->bpid);
+       if (!dpaa_bp)
+               goto free_buffer;
+
+       skb = build_skb(vaddr, dpaa_bp->size +
+                       SKB_DATA_ALIGN(sizeof(struct skb_shared_info)));
+       if (unlikely(!skb)) {
+               WARN_ONCE(1, "Build skb failure on Rx\n");
+               goto free_buffer;
+       }
+       WARN_ON(fd_off != priv->rx_headroom);
+       skb_reserve(skb, fd_off);
+       skb_put(skb, qm_fd_get_length(fd));
+
+       skb->ip_summed = CHECKSUM_NONE;
+
+       return skb;
+
+free_buffer:
+       skb_free_frag(vaddr);
+       return NULL;
+}
+
+/* Build an skb with the data of the first S/G entry in the linear portion and
+ * the rest of the frame as skb fragments.
+ *
+ * The page fragment holding the S/G Table is recycled here.
+ */
+static struct sk_buff *sg_fd_to_skb(const struct dpaa_priv *priv,
+                                   const struct qm_fd *fd)
+{
+       ssize_t fd_off = qm_fd_get_offset(fd);
+       dma_addr_t addr = qm_fd_addr(fd);
+       const struct qm_sg_entry *sgt;
+       struct page *page, *head_page;
+       struct dpaa_bp *dpaa_bp;
+       void *vaddr, *sg_vaddr;
+       int frag_off, frag_len;
+       struct sk_buff *skb;
+       dma_addr_t sg_addr;
+       int page_offset;
+       unsigned int sz;
+       int *count_ptr;
+       int i;
+
+       vaddr = phys_to_virt(addr);
+       WARN_ON(!IS_ALIGNED((unsigned long)vaddr, SMP_CACHE_BYTES));
+
+       /* Iterate through the SGT entries and add data buffers to the skb */
+       sgt = vaddr + fd_off;
+       for (i = 0; i < DPAA_SGT_MAX_ENTRIES; i++) {
+               /* Extension bit is not supported */
+               WARN_ON(qm_sg_entry_is_ext(&sgt[i]));
+
+               sg_addr = qm_sg_addr(&sgt[i]);
+               sg_vaddr = phys_to_virt(sg_addr);
+               WARN_ON(!IS_ALIGNED((unsigned long)sg_vaddr,
+                                   SMP_CACHE_BYTES));
+
+               /* We may use multiple Rx pools */
+               dpaa_bp = dpaa_bpid2pool(sgt[i].bpid);
+               if (!dpaa_bp)
+                       goto free_buffers;
+
+               count_ptr = this_cpu_ptr(dpaa_bp->percpu_count);
+               dma_unmap_single(dpaa_bp->dev, sg_addr, dpaa_bp->size,
+                                DMA_FROM_DEVICE);
+               if (i == 0) {
+                       sz = dpaa_bp->size +
+                               SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+                       skb = build_skb(sg_vaddr, sz);
+                       if (WARN_ON(unlikely(!skb)))
+                               goto free_buffers;
+
+                       skb->ip_summed = CHECKSUM_NONE;
+
+                       /* Make sure forwarded skbs will have enough space
+                        * on Tx, if extra headers are added.
+                        */
+                       WARN_ON(fd_off != priv->rx_headroom);
+                       skb_reserve(skb, fd_off);
+                       skb_put(skb, qm_sg_entry_get_len(&sgt[i]));
+               } else {
+                       /* Not the first S/G entry; all data from buffer will
+                        * be added in an skb fragment; fragment index is offset
+                        * by one since first S/G entry was incorporated in the
+                        * linear part of the skb.
+                        *
+                        * Caution: 'page' may be a tail page.
+                        */
+                       page = virt_to_page(sg_vaddr);
+                       head_page = virt_to_head_page(sg_vaddr);
+
+                       /* Compute offset in (possibly tail) page */
+                       page_offset = ((unsigned long)sg_vaddr &
+                                       (PAGE_SIZE - 1)) +
+                               (page_address(page) - page_address(head_page));
+                       /* page_offset only refers to the beginning of sgt[i];
+                        * but the buffer itself may have an internal offset.
+                        */
+                       frag_off = qm_sg_entry_get_off(&sgt[i]) + page_offset;
+                       frag_len = qm_sg_entry_get_len(&sgt[i]);
+                       /* skb_add_rx_frag() does no checking on the page; if
+                        * we pass it a tail page, we'll end up with
+                        * bad page accounting and eventually with segafults.
+                        */
+                       skb_add_rx_frag(skb, i - 1, head_page, frag_off,
+                                       frag_len, dpaa_bp->size);
+               }
+               /* Update the pool count for the current {cpu x bpool} */
+               (*count_ptr)--;
+
+               if (qm_sg_entry_is_final(&sgt[i]))
+                       break;
+       }
+       WARN_ONCE(i == DPAA_SGT_MAX_ENTRIES, "No final bit on SGT\n");
+
+       /* free the SG table buffer */
+       skb_free_frag(vaddr);
+
+       return skb;
+
+free_buffers:
+       /* compensate sw bpool counter changes */
+       for (i--; i > 0; i--) {
+               dpaa_bp = dpaa_bpid2pool(sgt[i].bpid);
+               if (dpaa_bp) {
+                       count_ptr = this_cpu_ptr(dpaa_bp->percpu_count);
+                       (*count_ptr)++;
+               }
+       }
+       /* free all the SG entries */
+       for (i = 0; i < DPAA_SGT_MAX_ENTRIES ; i++) {
+               sg_addr = qm_sg_addr(&sgt[i]);
+               sg_vaddr = phys_to_virt(sg_addr);
+               skb_free_frag(sg_vaddr);
+               dpaa_bp = dpaa_bpid2pool(sgt[i].bpid);
+               if (dpaa_bp) {
+                       count_ptr = this_cpu_ptr(dpaa_bp->percpu_count);
+                       (*count_ptr)--;
+               }
+
+               if (qm_sg_entry_is_final(&sgt[i]))
+                       break;
+       }
+       /* free the SGT fragment */
+       skb_free_frag(vaddr);
+
+       return NULL;
+}
+
+static int skb_to_contig_fd(struct dpaa_priv *priv,
+                           struct sk_buff *skb, struct qm_fd *fd,
+                           int *offset)
+{
+       struct net_device *net_dev = priv->net_dev;
+       struct device *dev = net_dev->dev.parent;
+       enum dma_data_direction dma_dir;
+       unsigned char *buffer_start;
+       struct sk_buff **skbh;
+       dma_addr_t addr;
+       int err;
+
+       /* We are guaranteed to have at least tx_headroom bytes
+        * available, so just use that for offset.
+        */
+       fd->bpid = FSL_DPAA_BPID_INV;
+       buffer_start = skb->data - priv->tx_headroom;
+       dma_dir = DMA_TO_DEVICE;
+
+       skbh = (struct sk_buff **)buffer_start;
+       *skbh = skb;
+
+       /* Enable L3/L4 hardware checksum computation.
+        *
+        * We must do this before dma_map_single(DMA_TO_DEVICE), because we may
+        * need to write into the skb.
+        */
+       err = dpaa_enable_tx_csum(priv, skb, fd,
+                                 ((char *)skbh) + DPAA_TX_PRIV_DATA_SIZE);
+       if (unlikely(err < 0)) {
+               if (net_ratelimit())
+                       netif_err(priv, tx_err, net_dev, "HW csum error: %d\n",
+                                 err);
+               return err;
+       }
+
+       /* Fill in the rest of the FD fields */
+       qm_fd_set_contig(fd, priv->tx_headroom, skb->len);
+       fd->cmd |= FM_FD_CMD_FCO;
+
+       /* Map the entire buffer size that may be seen by FMan, but no more */
+       addr = dma_map_single(dev, skbh,
+                             skb_tail_pointer(skb) - buffer_start, dma_dir);
+       if (unlikely(dma_mapping_error(dev, addr))) {
+               if (net_ratelimit())
+                       netif_err(priv, tx_err, net_dev, "dma_map_single() failed\n");
+               return -EINVAL;
+       }
+       qm_fd_addr_set64(fd, addr);
+
+       return 0;
+}
+
+static int skb_to_sg_fd(struct dpaa_priv *priv,
+                       struct sk_buff *skb, struct qm_fd *fd)
+{
+       const enum dma_data_direction dma_dir = DMA_TO_DEVICE;
+       const int nr_frags = skb_shinfo(skb)->nr_frags;
+       struct net_device *net_dev = priv->net_dev;
+       struct device *dev = net_dev->dev.parent;
+       struct qm_sg_entry *sgt;
+       struct sk_buff **skbh;
+       int i, j, err, sz;
+       void *buffer_start;
+       skb_frag_t *frag;
+       dma_addr_t addr;
+       size_t frag_len;
+       void *sgt_buf;
+
+       /* get a page frag to store the SGTable */
+       sz = SKB_DATA_ALIGN(priv->tx_headroom +
+               sizeof(struct qm_sg_entry) * (1 + nr_frags));
+       sgt_buf = netdev_alloc_frag(sz);
+       if (unlikely(!sgt_buf)) {
+               netdev_err(net_dev, "netdev_alloc_frag() failed for size %d\n",
+                          sz);
+               return -ENOMEM;
+       }
+
+       /* Enable L3/L4 hardware checksum computation.
+        *
+        * We must do this before dma_map_single(DMA_TO_DEVICE), because we may
+        * need to write into the skb.
+        */
+       err = dpaa_enable_tx_csum(priv, skb, fd,
+                                 sgt_buf + DPAA_TX_PRIV_DATA_SIZE);
+       if (unlikely(err < 0)) {
+               if (net_ratelimit())
+                       netif_err(priv, tx_err, net_dev, "HW csum error: %d\n",
+                                 err);
+               goto csum_failed;
+       }
+
+       sgt = (struct qm_sg_entry *)(sgt_buf + priv->tx_headroom);
+       qm_sg_entry_set_len(&sgt[0], skb_headlen(skb));
+       sgt[0].bpid = FSL_DPAA_BPID_INV;
+       sgt[0].offset = 0;
+       addr = dma_map_single(dev, skb->data,
+                             skb_headlen(skb), dma_dir);
+       if (unlikely(dma_mapping_error(dev, addr))) {
+               dev_err(dev, "DMA mapping failed");
+               err = -EINVAL;
+               goto sg0_map_failed;
+       }
+       qm_sg_entry_set64(&sgt[0], addr);
+
+       /* populate the rest of SGT entries */
+       frag = &skb_shinfo(skb)->frags[0];
+       frag_len = frag->size;
+       for (i = 1; i <= nr_frags; i++, frag++) {
+               WARN_ON(!skb_frag_page(frag));
+               addr = skb_frag_dma_map(dev, frag, 0,
+                                       frag_len, dma_dir);
+               if (unlikely(dma_mapping_error(dev, addr))) {
+                       dev_err(dev, "DMA mapping failed");
+                       err = -EINVAL;
+                       goto sg_map_failed;
+               }
+
+               qm_sg_entry_set_len(&sgt[i], frag_len);
+               sgt[i].bpid = FSL_DPAA_BPID_INV;
+               sgt[i].offset = 0;
+
+               /* keep the offset in the address */
+               qm_sg_entry_set64(&sgt[i], addr);
+               frag_len = frag->size;
+       }
+       qm_sg_entry_set_f(&sgt[i - 1], frag_len);
+
+       qm_fd_set_sg(fd, priv->tx_headroom, skb->len);
+
+       /* DMA map the SGT page */
+       buffer_start = (void *)sgt - priv->tx_headroom;
+       skbh = (struct sk_buff **)buffer_start;
+       *skbh = skb;
+
+       addr = dma_map_single(dev, buffer_start, priv->tx_headroom +
+                             sizeof(struct qm_sg_entry) * (1 + nr_frags),
+                             dma_dir);
+       if (unlikely(dma_mapping_error(dev, addr))) {
+               dev_err(dev, "DMA mapping failed");
+               err = -EINVAL;
+               goto sgt_map_failed;
+       }
+
+       fd->bpid = FSL_DPAA_BPID_INV;
+       fd->cmd |= FM_FD_CMD_FCO;
+       qm_fd_addr_set64(fd, addr);
+
+       return 0;
+
+sgt_map_failed:
+sg_map_failed:
+       for (j = 0; j < i; j++)
+               dma_unmap_page(dev, qm_sg_addr(&sgt[j]),
+                              qm_sg_entry_get_len(&sgt[j]), dma_dir);
+sg0_map_failed:
+csum_failed:
+       skb_free_frag(sgt_buf);
+
+       return err;
+}
+
+static inline int dpaa_xmit(struct dpaa_priv *priv,
+                           struct rtnl_link_stats64 *percpu_stats,
+                           int queue,
+                           struct qm_fd *fd)
+{
+       struct qman_fq *egress_fq;
+       int err, i;
+
+       egress_fq = priv->egress_fqs[queue];
+       if (fd->bpid == FSL_DPAA_BPID_INV)
+               fd->cmd |= qman_fq_fqid(priv->conf_fqs[queue]);
+
+       /* Trace this Tx fd */
+       trace_dpaa_tx_fd(priv->net_dev, egress_fq, fd);
+
+       for (i = 0; i < DPAA_ENQUEUE_RETRIES; i++) {
+               err = qman_enqueue(egress_fq, fd);
+               if (err != -EBUSY)
+                       break;
+       }
+
+       if (unlikely(err < 0)) {
+               percpu_stats->tx_errors++;
+               percpu_stats->tx_fifo_errors++;
+               return err;
+       }
+
+       percpu_stats->tx_packets++;
+       percpu_stats->tx_bytes += qm_fd_get_length(fd);
+
+       return 0;
+}
+
+static int dpaa_start_xmit(struct sk_buff *skb, struct net_device *net_dev)
+{
+       const int queue_mapping = skb_get_queue_mapping(skb);
+       bool nonlinear = skb_is_nonlinear(skb);
+       struct rtnl_link_stats64 *percpu_stats;
+       struct dpaa_percpu_priv *percpu_priv;
+       struct dpaa_priv *priv;
+       struct qm_fd fd;
+       int offset = 0;
+       int err = 0;
+
+       priv = netdev_priv(net_dev);
+       percpu_priv = this_cpu_ptr(priv->percpu_priv);
+       percpu_stats = &percpu_priv->stats;
+
+       qm_fd_clear_fd(&fd);
+
+       if (!nonlinear) {
+               /* We're going to store the skb backpointer at the beginning
+                * of the data buffer, so we need a privately owned skb
+                *
+                * We've made sure skb is not shared in dev->priv_flags,
+                * we need to verify the skb head is not cloned
+                */
+               if (skb_cow_head(skb, priv->tx_headroom))
+                       goto enomem;
+
+               WARN_ON(skb_is_nonlinear(skb));
+       }
+
+       /* MAX_SKB_FRAGS is equal or larger than our dpaa_SGT_MAX_ENTRIES;
+        * make sure we don't feed FMan with more fragments than it supports.
+        */
+       if (nonlinear &&
+           likely(skb_shinfo(skb)->nr_frags < DPAA_SGT_MAX_ENTRIES)) {
+               /* Just create a S/G fd based on the skb */
+               err = skb_to_sg_fd(priv, skb, &fd);
+               percpu_priv->tx_frag_skbuffs++;
+       } else {
+               /* If the egress skb contains more fragments than we support
+                * we have no choice but to linearize it ourselves.
+                */
+               if (unlikely(nonlinear) && __skb_linearize(skb))
+                       goto enomem;
+
+               /* Finally, create a contig FD from this skb */
+               err = skb_to_contig_fd(priv, skb, &fd, &offset);
+       }
+       if (unlikely(err < 0))
+               goto skb_to_fd_failed;
+
+       if (likely(dpaa_xmit(priv, percpu_stats, queue_mapping, &fd) == 0))
+               return NETDEV_TX_OK;
+
+       dpaa_cleanup_tx_fd(priv, &fd);
+skb_to_fd_failed:
+enomem:
+       percpu_stats->tx_errors++;
+       dev_kfree_skb(skb);
+       return NETDEV_TX_OK;
+}
+
+static void dpaa_rx_error(struct net_device *net_dev,
+                         const struct dpaa_priv *priv,
+                         struct dpaa_percpu_priv *percpu_priv,
+                         const struct qm_fd *fd,
+                         u32 fqid)
+{
+       if (net_ratelimit())
+               netif_err(priv, hw, net_dev, "Err FD status = 0x%08x\n",
+                         fd->status & FM_FD_STAT_RX_ERRORS);
+
+       percpu_priv->stats.rx_errors++;
+
+       if (fd->status & FM_FD_ERR_DMA)
+               percpu_priv->rx_errors.dme++;
+       if (fd->status & FM_FD_ERR_PHYSICAL)
+               percpu_priv->rx_errors.fpe++;
+       if (fd->status & FM_FD_ERR_SIZE)
+               percpu_priv->rx_errors.fse++;
+       if (fd->status & FM_FD_ERR_PRS_HDR_ERR)
+               percpu_priv->rx_errors.phe++;
+
+       dpaa_fd_release(net_dev, fd);
+}
+
+static void dpaa_tx_error(struct net_device *net_dev,
+                         const struct dpaa_priv *priv,
+                         struct dpaa_percpu_priv *percpu_priv,
+                         const struct qm_fd *fd,
+                         u32 fqid)
+{
+       struct sk_buff *skb;
+
+       if (net_ratelimit())
+               netif_warn(priv, hw, net_dev, "FD status = 0x%08x\n",
+                          fd->status & FM_FD_STAT_TX_ERRORS);
+
+       percpu_priv->stats.tx_errors++;
+
+       skb = dpaa_cleanup_tx_fd(priv, fd);
+       dev_kfree_skb(skb);
+}
+
+static int dpaa_eth_poll(struct napi_struct *napi, int budget)
+{
+       struct dpaa_napi_portal *np =
+                       container_of(napi, struct dpaa_napi_portal, napi);
+
+       int cleaned = qman_p_poll_dqrr(np->p, budget);
+
+       if (cleaned < budget) {
+               napi_complete(napi);
+               qman_p_irqsource_add(np->p, QM_PIRQ_DQRI);
+
+       } else if (np->down) {
+               qman_p_irqsource_add(np->p, QM_PIRQ_DQRI);
+       }
+
+       return cleaned;
+}
+
+static void dpaa_tx_conf(struct net_device *net_dev,
+                        const struct dpaa_priv *priv,
+                        struct dpaa_percpu_priv *percpu_priv,
+                        const struct qm_fd *fd,
+                        u32 fqid)
+{
+       struct sk_buff  *skb;
+
+       if (unlikely(fd->status & FM_FD_STAT_TX_ERRORS) != 0) {
+               if (net_ratelimit())
+                       netif_warn(priv, hw, net_dev, "FD status = 0x%08x\n",
+                                  fd->status & FM_FD_STAT_TX_ERRORS);
+
+               percpu_priv->stats.tx_errors++;
+       }
+
+       percpu_priv->tx_confirm++;
+
+       skb = dpaa_cleanup_tx_fd(priv, fd);
+
+       consume_skb(skb);
+}
+
+static inline int dpaa_eth_napi_schedule(struct dpaa_percpu_priv *percpu_priv,
+                                        struct qman_portal *portal)
+{
+       if (unlikely(in_irq() || !in_serving_softirq())) {
+               /* Disable QMan IRQ and invoke NAPI */
+               qman_p_irqsource_remove(portal, QM_PIRQ_DQRI);
+
+               percpu_priv->np.p = portal;
+               napi_schedule(&percpu_priv->np.napi);
+               percpu_priv->in_interrupt++;
+               return 1;
+       }
+       return 0;
+}
+
+static enum qman_cb_dqrr_result rx_error_dqrr(struct qman_portal *portal,
+                                             struct qman_fq *fq,
+                                             const struct qm_dqrr_entry *dq)
+{
+       struct dpaa_fq *dpaa_fq = container_of(fq, struct dpaa_fq, fq_base);
+       struct dpaa_percpu_priv *percpu_priv;
+       struct net_device *net_dev;
+       struct dpaa_bp *dpaa_bp;
+       struct dpaa_priv *priv;
+
+       net_dev = dpaa_fq->net_dev;
+       priv = netdev_priv(net_dev);
+       dpaa_bp = dpaa_bpid2pool(dq->fd.bpid);
+       if (!dpaa_bp)
+               return qman_cb_dqrr_consume;
+
+       percpu_priv = this_cpu_ptr(priv->percpu_priv);
+
+       if (dpaa_eth_napi_schedule(percpu_priv, portal))
+               return qman_cb_dqrr_stop;
+
+       if (dpaa_eth_refill_bpools(priv))
+               /* Unable to refill the buffer pool due to insufficient
+                * system memory. Just release the frame back into the pool,
+                * otherwise we'll soon end up with an empty buffer pool.
+                */
+               dpaa_fd_release(net_dev, &dq->fd);
+       else
+               dpaa_rx_error(net_dev, priv, percpu_priv, &dq->fd, fq->fqid);
+
+       return qman_cb_dqrr_consume;
+}
+
+static enum qman_cb_dqrr_result rx_default_dqrr(struct qman_portal *portal,
+                                               struct qman_fq *fq,
+                                               const struct qm_dqrr_entry *dq)
+{
+       struct rtnl_link_stats64 *percpu_stats;
+       struct dpaa_percpu_priv *percpu_priv;
+       const struct qm_fd *fd = &dq->fd;
+       dma_addr_t addr = qm_fd_addr(fd);
+       enum qm_fd_format fd_format;
+       struct net_device *net_dev;
+       u32 fd_status = fd->status;
+       struct dpaa_bp *dpaa_bp;
+       struct dpaa_priv *priv;
+       unsigned int skb_len;
+       struct sk_buff *skb;
+       int *count_ptr;
+
+       net_dev = ((struct dpaa_fq *)fq)->net_dev;
+       priv = netdev_priv(net_dev);
+       dpaa_bp = dpaa_bpid2pool(dq->fd.bpid);
+       if (!dpaa_bp)
+               return qman_cb_dqrr_consume;
+
+       /* Trace the Rx fd */
+       trace_dpaa_rx_fd(net_dev, fq, &dq->fd);
+
+       percpu_priv = this_cpu_ptr(priv->percpu_priv);
+       percpu_stats = &percpu_priv->stats;
+
+       if (unlikely(dpaa_eth_napi_schedule(percpu_priv, portal)))
+               return qman_cb_dqrr_stop;
+
+       /* Make sure we didn't run out of buffers */
+       if (unlikely(dpaa_eth_refill_bpools(priv))) {
+               /* Unable to refill the buffer pool due to insufficient
+                * system memory. Just release the frame back into the pool,
+                * otherwise we'll soon end up with an empty buffer pool.
+                */
+               dpaa_fd_release(net_dev, &dq->fd);
+               return qman_cb_dqrr_consume;
+       }
+
+       if (unlikely(fd_status & FM_FD_STAT_RX_ERRORS) != 0) {
+               if (net_ratelimit())
+                       netif_warn(priv, hw, net_dev, "FD status = 0x%08x\n",
+                                  fd_status & FM_FD_STAT_RX_ERRORS);
+
+               percpu_stats->rx_errors++;
+               dpaa_fd_release(net_dev, fd);
+               return qman_cb_dqrr_consume;
+       }
+
+       dpaa_bp = dpaa_bpid2pool(fd->bpid);
+       if (!dpaa_bp)
+               return qman_cb_dqrr_consume;
+
+       dma_unmap_single(dpaa_bp->dev, addr, dpaa_bp->size, DMA_FROM_DEVICE);
+
+       /* prefetch the first 64 bytes of the frame or the SGT start */
+       prefetch(phys_to_virt(addr) + qm_fd_get_offset(fd));
+
+       fd_format = qm_fd_get_format(fd);
+       /* The only FD types that we may receive are contig and S/G */
+       WARN_ON((fd_format != qm_fd_contig) && (fd_format != qm_fd_sg));
+
+       /* Account for either the contig buffer or the SGT buffer (depending on
+        * which case we were in) having been removed from the pool.
+        */
+       count_ptr = this_cpu_ptr(dpaa_bp->percpu_count);
+       (*count_ptr)--;
+
+       if (likely(fd_format == qm_fd_contig))
+               skb = contig_fd_to_skb(priv, fd);
+       else
+               skb = sg_fd_to_skb(priv, fd);
+       if (!skb)
+               return qman_cb_dqrr_consume;
+
+       skb->protocol = eth_type_trans(skb, net_dev);
+
+       skb_len = skb->len;
+
+       if (unlikely(netif_receive_skb(skb) == NET_RX_DROP))
+               return qman_cb_dqrr_consume;
+
+       percpu_stats->rx_packets++;
+       percpu_stats->rx_bytes += skb_len;
+
+       return qman_cb_dqrr_consume;
+}
+
+static enum qman_cb_dqrr_result conf_error_dqrr(struct qman_portal *portal,
+                                               struct qman_fq *fq,
+                                               const struct qm_dqrr_entry *dq)
+{
+       struct dpaa_percpu_priv *percpu_priv;
+       struct net_device *net_dev;
+       struct dpaa_priv *priv;
+
+       net_dev = ((struct dpaa_fq *)fq)->net_dev;
+       priv = netdev_priv(net_dev);
+
+       percpu_priv = this_cpu_ptr(priv->percpu_priv);
+
+       if (dpaa_eth_napi_schedule(percpu_priv, portal))
+               return qman_cb_dqrr_stop;
+
+       dpaa_tx_error(net_dev, priv, percpu_priv, &dq->fd, fq->fqid);
+
+       return qman_cb_dqrr_consume;
+}
+
+static enum qman_cb_dqrr_result conf_dflt_dqrr(struct qman_portal *portal,
+                                              struct qman_fq *fq,
+                                              const struct qm_dqrr_entry *dq)
+{
+       struct dpaa_percpu_priv *percpu_priv;
+       struct net_device *net_dev;
+       struct dpaa_priv *priv;
+
+       net_dev = ((struct dpaa_fq *)fq)->net_dev;
+       priv = netdev_priv(net_dev);
+
+       /* Trace the fd */
+       trace_dpaa_tx_conf_fd(net_dev, fq, &dq->fd);
+
+       percpu_priv = this_cpu_ptr(priv->percpu_priv);
+
+       if (dpaa_eth_napi_schedule(percpu_priv, portal))
+               return qman_cb_dqrr_stop;
+
+       dpaa_tx_conf(net_dev, priv, percpu_priv, &dq->fd, fq->fqid);
+
+       return qman_cb_dqrr_consume;
+}
+
+static void egress_ern(struct qman_portal *portal,
+                      struct qman_fq *fq,
+                      const union qm_mr_entry *msg)
+{
+       const struct qm_fd *fd = &msg->ern.fd;
+       struct dpaa_percpu_priv *percpu_priv;
+       const struct dpaa_priv *priv;
+       struct net_device *net_dev;
+       struct sk_buff *skb;
+
+       net_dev = ((struct dpaa_fq *)fq)->net_dev;
+       priv = netdev_priv(net_dev);
+       percpu_priv = this_cpu_ptr(priv->percpu_priv);
+
+       percpu_priv->stats.tx_dropped++;
+       percpu_priv->stats.tx_fifo_errors++;
+       count_ern(percpu_priv, msg);
+
+       skb = dpaa_cleanup_tx_fd(priv, fd);
+       dev_kfree_skb_any(skb);
+}
+
+static const struct dpaa_fq_cbs dpaa_fq_cbs = {
+       .rx_defq = { .cb = { .dqrr = rx_default_dqrr } },
+       .tx_defq = { .cb = { .dqrr = conf_dflt_dqrr } },
+       .rx_errq = { .cb = { .dqrr = rx_error_dqrr } },
+       .tx_errq = { .cb = { .dqrr = conf_error_dqrr } },
+       .egress_ern = { .cb = { .ern = egress_ern } }
+};
+
+static void dpaa_eth_napi_enable(struct dpaa_priv *priv)
+{
+       struct dpaa_percpu_priv *percpu_priv;
+       int i;
+
+       for_each_possible_cpu(i) {
+               percpu_priv = per_cpu_ptr(priv->percpu_priv, i);
+
+               percpu_priv->np.down = 0;
+               napi_enable(&percpu_priv->np.napi);
+       }
+}
+
+static void dpaa_eth_napi_disable(struct dpaa_priv *priv)
+{
+       struct dpaa_percpu_priv *percpu_priv;
+       int i;
+
+       for_each_possible_cpu(i) {
+               percpu_priv = per_cpu_ptr(priv->percpu_priv, i);
+
+               percpu_priv->np.down = 1;
+               napi_disable(&percpu_priv->np.napi);
+       }
+}
+
+static int dpaa_open(struct net_device *net_dev)
+{
+       struct mac_device *mac_dev;
+       struct dpaa_priv *priv;
+       int err, i;
+
+       priv = netdev_priv(net_dev);
+       mac_dev = priv->mac_dev;
+       dpaa_eth_napi_enable(priv);
+
+       net_dev->phydev = mac_dev->init_phy(net_dev, priv->mac_dev);
+       if (!net_dev->phydev) {
+               netif_err(priv, ifup, net_dev, "init_phy() failed\n");
+               return -ENODEV;
+       }
+
+       for (i = 0; i < ARRAY_SIZE(mac_dev->port); i++) {
+               err = fman_port_enable(mac_dev->port[i]);
+               if (err)
+                       goto mac_start_failed;
+       }
+
+       err = priv->mac_dev->start(mac_dev);
+       if (err < 0) {
+               netif_err(priv, ifup, net_dev, "mac_dev->start() = %d\n", err);
+               goto mac_start_failed;
+       }
+
+       netif_tx_start_all_queues(net_dev);
+
+       return 0;
+
+mac_start_failed:
+       for (i = 0; i < ARRAY_SIZE(mac_dev->port); i++)
+               fman_port_disable(mac_dev->port[i]);
+
+       dpaa_eth_napi_disable(priv);
+
+       return err;
+}
+
+static int dpaa_eth_stop(struct net_device *net_dev)
+{
+       struct dpaa_priv *priv;
+       int err;
+
+       err = dpaa_stop(net_dev);
+
+       priv = netdev_priv(net_dev);
+       dpaa_eth_napi_disable(priv);
+
+       return err;
+}
+
+static const struct net_device_ops dpaa_ops = {
+       .ndo_open = dpaa_open,
+       .ndo_start_xmit = dpaa_start_xmit,
+       .ndo_stop = dpaa_eth_stop,
+       .ndo_tx_timeout = dpaa_tx_timeout,
+       .ndo_get_stats64 = dpaa_get_stats64,
+       .ndo_set_mac_address = dpaa_set_mac_address,
+       .ndo_validate_addr = eth_validate_addr,
+       .ndo_set_rx_mode = dpaa_set_rx_mode,
+};
+
+static int dpaa_napi_add(struct net_device *net_dev)
+{
+       struct dpaa_priv *priv = netdev_priv(net_dev);
+       struct dpaa_percpu_priv *percpu_priv;
+       int cpu;
+
+       for_each_possible_cpu(cpu) {
+               percpu_priv = per_cpu_ptr(priv->percpu_priv, cpu);
+
+               netif_napi_add(net_dev, &percpu_priv->np.napi,
+                              dpaa_eth_poll, NAPI_POLL_WEIGHT);
+       }
+
+       return 0;
+}
+
+static void dpaa_napi_del(struct net_device *net_dev)
+{
+       struct dpaa_priv *priv = netdev_priv(net_dev);
+       struct dpaa_percpu_priv *percpu_priv;
+       int cpu;
+
+       for_each_possible_cpu(cpu) {
+               percpu_priv = per_cpu_ptr(priv->percpu_priv, cpu);
+
+               netif_napi_del(&percpu_priv->np.napi);
+       }
+}
+
+static inline void dpaa_bp_free_pf(const struct dpaa_bp *bp,
+                                  struct bm_buffer *bmb)
+{
+       dma_addr_t addr = bm_buf_addr(bmb);
+
+       dma_unmap_single(bp->dev, addr, bp->size, DMA_FROM_DEVICE);
+
+       skb_free_frag(phys_to_virt(addr));
+}
+
+/* Alloc the dpaa_bp struct and configure default values */
+static struct dpaa_bp *dpaa_bp_alloc(struct device *dev)
+{
+       struct dpaa_bp *dpaa_bp;
+
+       dpaa_bp = devm_kzalloc(dev, sizeof(*dpaa_bp), GFP_KERNEL);
+       if (!dpaa_bp)
+               return ERR_PTR(-ENOMEM);
+
+       dpaa_bp->bpid = FSL_DPAA_BPID_INV;
+       dpaa_bp->percpu_count = devm_alloc_percpu(dev, *dpaa_bp->percpu_count);
+       dpaa_bp->config_count = FSL_DPAA_ETH_MAX_BUF_COUNT;
+
+       dpaa_bp->seed_cb = dpaa_bp_seed;
+       dpaa_bp->free_buf_cb = dpaa_bp_free_pf;
+
+       return dpaa_bp;
+}
+
+/* Place all ingress FQs (Rx Default, Rx Error) in a dedicated CGR.
+ * We won't be sending congestion notifications to FMan; for now, we just use
+ * this CGR to generate enqueue rejections to FMan in order to drop the frames
+ * before they reach our ingress queues and eat up memory.
+ */
+static int dpaa_ingress_cgr_init(struct dpaa_priv *priv)
+{
+       struct qm_mcc_initcgr initcgr;
+       u32 cs_th;
+       int err;
+
+       err = qman_alloc_cgrid(&priv->ingress_cgr.cgrid);
+       if (err < 0) {
+               if (netif_msg_drv(priv))
+                       pr_err("Error %d allocating CGR ID\n", err);
+               goto out_error;
+       }
+
+       /* Enable CS TD, but disable Congestion State Change Notifications. */
+       initcgr.we_mask = QM_CGR_WE_CS_THRES;
+       initcgr.cgr.cscn_en = QM_CGR_EN;
+       cs_th = DPAA_INGRESS_CS_THRESHOLD;
+       qm_cgr_cs_thres_set64(&initcgr.cgr.cs_thres, cs_th, 1);
+
+       initcgr.we_mask |= QM_CGR_WE_CSTD_EN;
+       initcgr.cgr.cstd_en = QM_CGR_EN;
+
+       /* This CGR will be associated with the SWP affined to the current CPU.
+        * However, we'll place all our ingress FQs in it.
+        */
+       err = qman_create_cgr(&priv->ingress_cgr, QMAN_CGR_FLAG_USE_INIT,
+                             &initcgr);
+       if (err < 0) {
+               if (netif_msg_drv(priv))
+                       pr_err("Error %d creating ingress CGR with ID %d\n",
+                              err, priv->ingress_cgr.cgrid);
+               qman_release_cgrid(priv->ingress_cgr.cgrid);
+               goto out_error;
+       }
+       if (netif_msg_drv(priv))
+               pr_debug("Created ingress CGR %d for netdev with hwaddr %pM\n",
+                        priv->ingress_cgr.cgrid, priv->mac_dev->addr);
+
+       priv->use_ingress_cgr = true;
+
+out_error:
+       return err;
+}
+
+static const struct of_device_id dpaa_match[];
+
+static inline u16 dpaa_get_headroom(struct dpaa_buffer_layout *bl)
+{
+       u16 headroom;
+
+       /* The frame headroom must accommodate:
+        * - the driver private data area
+        * - parse results, hash results, timestamp if selected
+        * If either hash results or time stamp are selected, both will
+        * be copied to/from the frame headroom, as TS is located between PR and
+        * HR in the IC and IC copy size has a granularity of 16bytes
+        * (see description of FMBM_RICP and FMBM_TICP registers in DPAARM)
+        *
+        * Also make sure the headroom is a multiple of data_align bytes
+        */
+       headroom = (u16)(bl->priv_data_size + DPAA_PARSE_RESULTS_SIZE +
+               DPAA_TIME_STAMP_SIZE + DPAA_HASH_RESULTS_SIZE);
+
+       return DPAA_FD_DATA_ALIGNMENT ? ALIGN(headroom,
+                                             DPAA_FD_DATA_ALIGNMENT) :
+                                       headroom;
+}
+
+static int dpaa_eth_probe(struct platform_device *pdev)
+{
+       struct dpaa_bp *dpaa_bps[DPAA_BPS_NUM] = {NULL};
+       struct dpaa_percpu_priv *percpu_priv;
+       struct net_device *net_dev = NULL;
+       struct dpaa_fq *dpaa_fq, *tmp;
+       struct dpaa_priv *priv = NULL;
+       struct fm_port_fqs port_fqs;
+       struct mac_device *mac_dev;
+       int err = 0, i, channel;
+       struct device *dev;
+
+       dev = &pdev->dev;
+
+       /* Allocate this early, so we can store relevant information in
+        * the private area
+        */
+       net_dev = alloc_etherdev_mq(sizeof(*priv), DPAA_ETH_TXQ_NUM);
+       if (!net_dev) {
+               dev_err(dev, "alloc_etherdev_mq() failed\n");
+               goto alloc_etherdev_mq_failed;
+       }
+
+       /* Do this here, so we can be verbose early */
+       SET_NETDEV_DEV(net_dev, dev);
+       dev_set_drvdata(dev, net_dev);
+
+       priv = netdev_priv(net_dev);
+       priv->net_dev = net_dev;
+
+       priv->msg_enable = netif_msg_init(debug, DPAA_MSG_DEFAULT);
+
+       mac_dev = dpaa_mac_dev_get(pdev);
+       if (IS_ERR(mac_dev)) {
+               dev_err(dev, "dpaa_mac_dev_get() failed\n");
+               err = PTR_ERR(mac_dev);
+               goto mac_probe_failed;
+       }
+
+       /* If fsl_fm_max_frm is set to a higher value than the all-common 1500,
+        * we choose conservatively and let the user explicitly set a higher
+        * MTU via ifconfig. Otherwise, the user may end up with different MTUs
+        * in the same LAN.
+        * If on the other hand fsl_fm_max_frm has been chosen below 1500,
+        * start with the maximum allowed.
+        */
+       net_dev->mtu = min(dpaa_get_max_mtu(), ETH_DATA_LEN);
+
+       netdev_dbg(net_dev, "Setting initial MTU on net device: %d\n",
+                  net_dev->mtu);
+
+       priv->buf_layout[RX].priv_data_size = DPAA_RX_PRIV_DATA_SIZE; /* Rx */
+       priv->buf_layout[TX].priv_data_size = DPAA_TX_PRIV_DATA_SIZE; /* Tx */
+
+       /* device used for DMA mapping */
+       arch_setup_dma_ops(dev, 0, 0, NULL, false);
+       err = dma_coerce_mask_and_coherent(dev, DMA_BIT_MASK(40));
+       if (err) {
+               dev_err(dev, "dma_coerce_mask_and_coherent() failed\n");
+               goto dev_mask_failed;
+       }
+
+       /* bp init */
+       for (i = 0; i < DPAA_BPS_NUM; i++) {
+               int err;
+
+               dpaa_bps[i] = dpaa_bp_alloc(dev);
+               if (IS_ERR(dpaa_bps[i]))
+                       return PTR_ERR(dpaa_bps[i]);
+               /* the raw size of the buffers used for reception */
+               dpaa_bps[i]->raw_size = bpool_buffer_raw_size(i, DPAA_BPS_NUM);
+               /* avoid runtime computations by keeping the usable size here */
+               dpaa_bps[i]->size = dpaa_bp_size(dpaa_bps[i]->raw_size);
+               dpaa_bps[i]->dev = dev;
+
+               err = dpaa_bp_alloc_pool(dpaa_bps[i]);
+               if (err < 0) {
+                       dpaa_bps_free(priv);
+                       priv->dpaa_bps[i] = NULL;
+                       goto bp_create_failed;
+               }
+               priv->dpaa_bps[i] = dpaa_bps[i];
+       }
+
+       INIT_LIST_HEAD(&priv->dpaa_fq_list);
+
+       memset(&port_fqs, 0, sizeof(port_fqs));
+
+       err = dpaa_alloc_all_fqs(dev, &priv->dpaa_fq_list, &port_fqs);
+       if (err < 0) {
+               dev_err(dev, "dpaa_alloc_all_fqs() failed\n");
+               goto fq_probe_failed;
+       }
+
+       priv->mac_dev = mac_dev;
+
+       channel = dpaa_get_channel();
+       if (channel < 0) {
+               dev_err(dev, "dpaa_get_channel() failed\n");
+               err = channel;
+               goto get_channel_failed;
+       }
+
+       priv->channel = (u16)channel;
+
+       /* Start a thread that will walk the CPUs with affine portals
+        * and add this pool channel to each's dequeue mask.
+        */
+       dpaa_eth_add_channel(priv->channel);
+
+       dpaa_fq_setup(priv, &dpaa_fq_cbs, priv->mac_dev->port[TX]);
+
+       /* Create a congestion group for this netdev, with
+        * dynamically-allocated CGR ID.
+        * Must be executed after probing the MAC, but before
+        * assigning the egress FQs to the CGRs.
+        */
+       err = dpaa_eth_cgr_init(priv);
+       if (err < 0) {
+               dev_err(dev, "Error initializing CGR\n");
+               goto tx_cgr_init_failed;
+       }
+
+       err = dpaa_ingress_cgr_init(priv);
+       if (err < 0) {
+               dev_err(dev, "Error initializing ingress CGR\n");
+               goto rx_cgr_init_failed;
+       }
+
+       /* Add the FQs to the interface, and make them active */
+       list_for_each_entry_safe(dpaa_fq, tmp, &priv->dpaa_fq_list, list) {
+               err = dpaa_fq_init(dpaa_fq, false);
+               if (err < 0)
+                       goto fq_alloc_failed;
+       }
+
+       priv->tx_headroom = dpaa_get_headroom(&priv->buf_layout[TX]);
+       priv->rx_headroom = dpaa_get_headroom(&priv->buf_layout[RX]);
+
+       /* All real interfaces need their ports initialized */
+       dpaa_eth_init_ports(mac_dev, dpaa_bps, DPAA_BPS_NUM, &port_fqs,
+                           &priv->buf_layout[0], dev);
+
+       priv->percpu_priv = devm_alloc_percpu(dev, *priv->percpu_priv);
+       if (!priv->percpu_priv) {
+               dev_err(dev, "devm_alloc_percpu() failed\n");
+               err = -ENOMEM;
+               goto alloc_percpu_failed;
+       }
+       for_each_possible_cpu(i) {
+               percpu_priv = per_cpu_ptr(priv->percpu_priv, i);
+               memset(percpu_priv, 0, sizeof(*percpu_priv));
+       }
+
+       /* Initialize NAPI */
+       err = dpaa_napi_add(net_dev);
+       if (err < 0)
+               goto napi_add_failed;
+
+       err = dpaa_netdev_init(net_dev, &dpaa_ops, tx_timeout);
+       if (err < 0)
+               goto netdev_init_failed;
+
+       dpaa_eth_sysfs_init(&net_dev->dev);
+
+       netif_info(priv, probe, net_dev, "Probed interface %s\n",
+                  net_dev->name);
+
+       return 0;
+
+netdev_init_failed:
+napi_add_failed:
+       dpaa_napi_del(net_dev);
+alloc_percpu_failed:
+       dpaa_fq_free(dev, &priv->dpaa_fq_list);
+fq_alloc_failed:
+       qman_delete_cgr_safe(&priv->ingress_cgr);
+       qman_release_cgrid(priv->ingress_cgr.cgrid);
+rx_cgr_init_failed:
+       qman_delete_cgr_safe(&priv->cgr_data.cgr);
+       qman_release_cgrid(priv->cgr_data.cgr.cgrid);
+tx_cgr_init_failed:
+get_channel_failed:
+       dpaa_bps_free(priv);
+bp_create_failed:
+fq_probe_failed:
+dev_mask_failed:
+mac_probe_failed:
+       dev_set_drvdata(dev, NULL);
+       free_netdev(net_dev);
+alloc_etherdev_mq_failed:
+       for (i = 0; i < DPAA_BPS_NUM && dpaa_bps[i]; i++) {
+               if (atomic_read(&dpaa_bps[i]->refs) == 0)
+                       devm_kfree(dev, dpaa_bps[i]);
+       }
+       return err;
+}
+
+static int dpaa_remove(struct platform_device *pdev)
+{
+       struct net_device *net_dev;
+       struct dpaa_priv *priv;
+       struct device *dev;
+       int err;
+
+       dev = &pdev->dev;
+       net_dev = dev_get_drvdata(dev);
+
+       priv = netdev_priv(net_dev);
+
+       dpaa_eth_sysfs_remove(dev);
+
+       dev_set_drvdata(dev, NULL);
+       unregister_netdev(net_dev);
+
+       err = dpaa_fq_free(dev, &priv->dpaa_fq_list);
+
+       qman_delete_cgr_safe(&priv->ingress_cgr);
+       qman_release_cgrid(priv->ingress_cgr.cgrid);
+       qman_delete_cgr_safe(&priv->cgr_data.cgr);
+       qman_release_cgrid(priv->cgr_data.cgr.cgrid);
+
+       dpaa_napi_del(net_dev);
+
+       dpaa_bps_free(priv);
+
+       free_netdev(net_dev);
+
+       return err;
+}
+
+static struct platform_device_id dpaa_devtype[] = {
+       {
+               .name = "dpaa-ethernet",
+               .driver_data = 0,
+       }, {
+       }
+};
+MODULE_DEVICE_TABLE(platform, dpaa_devtype);
+
+static struct platform_driver dpaa_driver = {
+       .driver = {
+               .name = KBUILD_MODNAME,
+       },
+       .id_table = dpaa_devtype,
+       .probe = dpaa_eth_probe,
+       .remove = dpaa_remove
+};
+
+static int __init dpaa_load(void)
+{
+       int err;
+
+       pr_debug("FSL DPAA Ethernet driver\n");
+
+       /* initialize dpaa_eth mirror values */
+       dpaa_rx_extra_headroom = fman_get_rx_extra_headroom();
+       dpaa_max_frm = fman_get_max_frm();
+
+       err = platform_driver_register(&dpaa_driver);
+       if (err < 0)
+               pr_err("Error, platform_driver_register() = %d\n", err);
+
+       return err;
+}
+module_init(dpaa_load);
+
+static void __exit dpaa_unload(void)
+{
+       platform_driver_unregister(&dpaa_driver);
+
+       /* Only one channel is used and needs to be released after all
+        * interfaces are removed
+        */
+       dpaa_release_channel();
+}
+module_exit(dpaa_unload);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("FSL DPAA Ethernet driver");
diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.h b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.h
new file mode 100644 (file)
index 0000000..1f9aebf
--- /dev/null
@@ -0,0 +1,185 @@
+/* Copyright 2008 - 2016 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *      names of its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written permission.
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __DPAA_H
+#define __DPAA_H
+
+#include <linux/netdevice.h>
+#include <soc/fsl/qman.h>
+#include <soc/fsl/bman.h>
+
+#include "fman.h"
+#include "mac.h"
+#include "dpaa_eth_trace.h"
+
+#define DPAA_ETH_TXQ_NUM       NR_CPUS
+
+#define DPAA_BPS_NUM 3 /* number of bpools per interface */
+
+/* More detailed FQ types - used for fine-grained WQ assignments */
+enum dpaa_fq_type {
+       FQ_TYPE_RX_DEFAULT = 1, /* Rx Default FQs */
+       FQ_TYPE_RX_ERROR,       /* Rx Error FQs */
+       FQ_TYPE_TX,             /* "Real" Tx FQs */
+       FQ_TYPE_TX_CONFIRM,     /* Tx default Conf FQ (actually an Rx FQ) */
+       FQ_TYPE_TX_CONF_MQ,     /* Tx conf FQs (one for each Tx FQ) */
+       FQ_TYPE_TX_ERROR,       /* Tx Error FQs (these are actually Rx FQs) */
+};
+
+struct dpaa_fq {
+       struct qman_fq fq_base;
+       struct list_head list;
+       struct net_device *net_dev;
+       bool init;
+       u32 fqid;
+       u32 flags;
+       u16 channel;
+       u8 wq;
+       enum dpaa_fq_type fq_type;
+};
+
+struct dpaa_fq_cbs {
+       struct qman_fq rx_defq;
+       struct qman_fq tx_defq;
+       struct qman_fq rx_errq;
+       struct qman_fq tx_errq;
+       struct qman_fq egress_ern;
+};
+
+struct dpaa_bp {
+       /* device used in the DMA mapping operations */
+       struct device *dev;
+       /* current number of buffers in the buffer pool alloted to each CPU */
+       int __percpu *percpu_count;
+       /* all buffers allocated for this pool have this raw size */
+       size_t raw_size;
+       /* all buffers in this pool have this same usable size */
+       size_t size;
+       /* the buffer pools are initialized with config_count buffers for each
+        * CPU; at runtime the number of buffers per CPU is constantly brought
+        * back to this level
+        */
+       u16 config_count;
+       u8 bpid;
+       struct bman_pool *pool;
+       /* bpool can be seeded before use by this cb */
+       int (*seed_cb)(struct dpaa_bp *);
+       /* bpool can be emptied before freeing by this cb */
+       void (*free_buf_cb)(const struct dpaa_bp *, struct bm_buffer *);
+       atomic_t refs;
+};
+
+struct dpaa_rx_errors {
+       u64 dme;                /* DMA Error */
+       u64 fpe;                /* Frame Physical Error */
+       u64 fse;                /* Frame Size Error */
+       u64 phe;                /* Header Error */
+};
+
+/* Counters for QMan ERN frames - one counter per rejection code */
+struct dpaa_ern_cnt {
+       u64 cg_tdrop;           /* Congestion group taildrop */
+       u64 wred;               /* WRED congestion */
+       u64 err_cond;           /* Error condition */
+       u64 early_window;       /* Order restoration, frame too early */
+       u64 late_window;        /* Order restoration, frame too late */
+       u64 fq_tdrop;           /* FQ taildrop */
+       u64 fq_retired;         /* FQ is retired */
+       u64 orp_zero;           /* ORP disabled */
+};
+
+struct dpaa_napi_portal {
+       struct napi_struct napi;
+       struct qman_portal *p;
+       bool down;
+};
+
+struct dpaa_percpu_priv {
+       struct net_device *net_dev;
+       struct dpaa_napi_portal np;
+       u64 in_interrupt;
+       u64 tx_confirm;
+       /* fragmented (non-linear) skbuffs received from the stack */
+       u64 tx_frag_skbuffs;
+       struct rtnl_link_stats64 stats;
+       struct dpaa_rx_errors rx_errors;
+       struct dpaa_ern_cnt ern_cnt;
+};
+
+struct dpaa_buffer_layout {
+       u16 priv_data_size;
+};
+
+struct dpaa_priv {
+       struct dpaa_percpu_priv __percpu *percpu_priv;
+       struct dpaa_bp *dpaa_bps[DPAA_BPS_NUM];
+       /* Store here the needed Tx headroom for convenience and speed
+        * (even though it can be computed based on the fields of buf_layout)
+        */
+       u16 tx_headroom;
+       struct net_device *net_dev;
+       struct mac_device *mac_dev;
+       struct qman_fq *egress_fqs[DPAA_ETH_TXQ_NUM];
+       struct qman_fq *conf_fqs[DPAA_ETH_TXQ_NUM];
+
+       u16 channel;
+       struct list_head dpaa_fq_list;
+
+       u32 msg_enable; /* net_device message level */
+
+       struct {
+               /* All egress queues to a given net device belong to one
+                * (and the same) congestion group.
+                */
+               struct qman_cgr cgr;
+               /* If congested, when it began. Used for performance stats. */
+               u32 congestion_start_jiffies;
+               /* Number of jiffies the Tx port was congested. */
+               u32 congested_jiffies;
+               /* Counter for the number of times the CGR
+                * entered congestion state
+                */
+               u32 cgr_congested_count;
+       } cgr_data;
+       /* Use a per-port CGR for ingress traffic. */
+       bool use_ingress_cgr;
+       struct qman_cgr ingress_cgr;
+
+       struct dpaa_buffer_layout buf_layout[2];
+       u16 rx_headroom;
+};
+
+/* from dpaa_ethtool.c */
+extern const struct ethtool_ops dpaa_ethtool_ops;
+
+/* from dpaa_eth_sysfs.c */
+void dpaa_eth_sysfs_remove(struct device *dev);
+void dpaa_eth_sysfs_init(struct device *dev);
+#endif /* __DPAA_H */
diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth_sysfs.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth_sysfs.c
new file mode 100644 (file)
index 0000000..ec75d1c
--- /dev/null
@@ -0,0 +1,165 @@
+/* Copyright 2008-2016 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *      names of its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/io.h>
+#include <linux/of_net.h>
+#include "dpaa_eth.h"
+#include "mac.h"
+
+static ssize_t dpaa_eth_show_addr(struct device *dev,
+                                 struct device_attribute *attr, char *buf)
+{
+       struct dpaa_priv *priv = netdev_priv(to_net_dev(dev));
+       struct mac_device *mac_dev = priv->mac_dev;
+
+       if (mac_dev)
+               return sprintf(buf, "%llx",
+                               (unsigned long long)mac_dev->res->start);
+       else
+               return sprintf(buf, "none");
+}
+
+static ssize_t dpaa_eth_show_fqids(struct device *dev,
+                                  struct device_attribute *attr, char *buf)
+{
+       struct dpaa_priv *priv = netdev_priv(to_net_dev(dev));
+       struct dpaa_fq *prev = NULL;
+       char *prevstr = NULL;
+       struct dpaa_fq *tmp;
+       struct dpaa_fq *fq;
+       u32 first_fqid = 0;
+       u32 last_fqid = 0;
+       ssize_t bytes = 0;
+       char *str;
+       int i = 0;
+
+       list_for_each_entry_safe(fq, tmp, &priv->dpaa_fq_list, list) {
+               switch (fq->fq_type) {
+               case FQ_TYPE_RX_DEFAULT:
+                       str = "Rx default";
+                       break;
+               case FQ_TYPE_RX_ERROR:
+                       str = "Rx error";
+                       break;
+               case FQ_TYPE_TX_CONFIRM:
+                       str = "Tx default confirmation";
+                       break;
+               case FQ_TYPE_TX_CONF_MQ:
+                       str = "Tx confirmation (mq)";
+                       break;
+               case FQ_TYPE_TX_ERROR:
+                       str = "Tx error";
+                       break;
+               case FQ_TYPE_TX:
+                       str = "Tx";
+                       break;
+               default:
+                       str = "Unknown";
+               }
+
+               if (prev && (abs(fq->fqid - prev->fqid) != 1 ||
+                            str != prevstr)) {
+                       if (last_fqid == first_fqid)
+                               bytes += sprintf(buf + bytes,
+                                       "%s: %d\n", prevstr, prev->fqid);
+                       else
+                               bytes += sprintf(buf + bytes,
+                                       "%s: %d - %d\n", prevstr,
+                                       first_fqid, last_fqid);
+               }
+
+               if (prev && abs(fq->fqid - prev->fqid) == 1 &&
+                   str == prevstr) {
+                       last_fqid = fq->fqid;
+               } else {
+                       first_fqid = fq->fqid;
+                       last_fqid = fq->fqid;
+               }
+
+               prev = fq;
+               prevstr = str;
+               i++;
+       }
+
+       if (prev) {
+               if (last_fqid == first_fqid)
+                       bytes += sprintf(buf + bytes, "%s: %d\n", prevstr,
+                                       prev->fqid);
+               else
+                       bytes += sprintf(buf + bytes, "%s: %d - %d\n", prevstr,
+                                       first_fqid, last_fqid);
+       }
+
+       return bytes;
+}
+
+static ssize_t dpaa_eth_show_bpids(struct device *dev,
+                                  struct device_attribute *attr, char *buf)
+{
+       struct dpaa_priv *priv = netdev_priv(to_net_dev(dev));
+       ssize_t bytes = 0;
+       int i = 0;
+
+       for (i = 0; i < DPAA_BPS_NUM; i++)
+               bytes += snprintf(buf + bytes, PAGE_SIZE - bytes, "%u\n",
+                                 priv->dpaa_bps[i]->bpid);
+
+       return bytes;
+}
+
+static struct device_attribute dpaa_eth_attrs[] = {
+       __ATTR(device_addr, 0444, dpaa_eth_show_addr, NULL),
+       __ATTR(fqids, 0444, dpaa_eth_show_fqids, NULL),
+       __ATTR(bpids, 0444, dpaa_eth_show_bpids, NULL),
+};
+
+void dpaa_eth_sysfs_init(struct device *dev)
+{
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(dpaa_eth_attrs); i++)
+               if (device_create_file(dev, &dpaa_eth_attrs[i])) {
+                       dev_err(dev, "Error creating sysfs file\n");
+                       while (i > 0)
+                               device_remove_file(dev, &dpaa_eth_attrs[--i]);
+                       return;
+               }
+}
+
+void dpaa_eth_sysfs_remove(struct device *dev)
+{
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(dpaa_eth_attrs); i++)
+               device_remove_file(dev, &dpaa_eth_attrs[i]);
+}
diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth_trace.h b/drivers/net/ethernet/freescale/dpaa/dpaa_eth_trace.h
new file mode 100644 (file)
index 0000000..409c1dc
--- /dev/null
@@ -0,0 +1,141 @@
+/* Copyright 2013-2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *      names of its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM   dpaa_eth
+
+#if !defined(_DPAA_ETH_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _DPAA_ETH_TRACE_H
+
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include "dpaa_eth.h"
+#include <linux/tracepoint.h>
+
+#define fd_format_name(format) { qm_fd_##format, #format }
+#define fd_format_list \
+       fd_format_name(contig), \
+       fd_format_name(sg)
+
+/* This is used to declare a class of events.
+ * individual events of this type will be defined below.
+ */
+
+/* Store details about a frame descriptor and the FQ on which it was
+ * transmitted/received.
+ */
+DECLARE_EVENT_CLASS(dpaa_eth_fd,
+       /* Trace function prototype */
+       TP_PROTO(struct net_device *netdev,
+                struct qman_fq *fq,
+                const struct qm_fd *fd),
+
+       /* Repeat argument list here */
+       TP_ARGS(netdev, fq, fd),
+
+       /* A structure containing the relevant information we want to record.
+        * Declare name and type for each normal element, name, type and size
+        * for arrays. Use __string for variable length strings.
+        */
+       TP_STRUCT__entry(
+               __field(u32,    fqid)
+               __field(u64,    fd_addr)
+               __field(u8,     fd_format)
+               __field(u16,    fd_offset)
+               __field(u32,    fd_length)
+               __field(u32,    fd_status)
+               __string(name,  netdev->name)
+       ),
+
+       /* The function that assigns values to the above declared fields */
+       TP_fast_assign(
+               __entry->fqid = fq->fqid;
+               __entry->fd_addr = qm_fd_addr_get64(fd);
+               __entry->fd_format = qm_fd_get_format(fd);
+               __entry->fd_offset = qm_fd_get_offset(fd);
+               __entry->fd_length = qm_fd_get_length(fd);
+               __entry->fd_status = fd->status;
+               __assign_str(name, netdev->name);
+       ),
+
+       /* This is what gets printed when the trace event is triggered */
+       TP_printk("[%s] fqid=%d, fd: addr=0x%llx, format=%s, off=%u, len=%u, status=0x%08x",
+                 __get_str(name), __entry->fqid, __entry->fd_addr,
+                 __print_symbolic(__entry->fd_format, fd_format_list),
+                 __entry->fd_offset, __entry->fd_length, __entry->fd_status)
+);
+
+/* Now declare events of the above type. Format is:
+ * DEFINE_EVENT(class, name, proto, args), with proto and args same as for class
+ */
+
+/* Tx (egress) fd */
+DEFINE_EVENT(dpaa_eth_fd, dpaa_tx_fd,
+
+       TP_PROTO(struct net_device *netdev,
+                struct qman_fq *fq,
+                const struct qm_fd *fd),
+
+       TP_ARGS(netdev, fq, fd)
+);
+
+/* Rx fd */
+DEFINE_EVENT(dpaa_eth_fd, dpaa_rx_fd,
+
+       TP_PROTO(struct net_device *netdev,
+                struct qman_fq *fq,
+                const struct qm_fd *fd),
+
+       TP_ARGS(netdev, fq, fd)
+);
+
+/* Tx confirmation fd */
+DEFINE_EVENT(dpaa_eth_fd, dpaa_tx_conf_fd,
+
+       TP_PROTO(struct net_device *netdev,
+                struct qman_fq *fq,
+                const struct qm_fd *fd),
+
+       TP_ARGS(netdev, fq, fd)
+);
+
+/* If only one event of a certain type needs to be declared, use TRACE_EVENT().
+ * The syntax is the same as for DECLARE_EVENT_CLASS().
+ */
+
+#endif /* _DPAA_ETH_TRACE_H */
+
+/* This must be outside ifdef _DPAA_ETH_TRACE_H */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE     dpaa_eth_trace
+#include <trace/define_trace.h>
diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c
new file mode 100644 (file)
index 0000000..27e7044
--- /dev/null
@@ -0,0 +1,417 @@
+/* Copyright 2008-2016 Freescale Semiconductor, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *      names of its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/string.h>
+
+#include "dpaa_eth.h"
+#include "mac.h"
+
+static const char dpaa_stats_percpu[][ETH_GSTRING_LEN] = {
+       "interrupts",
+       "rx packets",
+       "tx packets",
+       "tx confirm",
+       "tx S/G",
+       "tx error",
+       "rx error",
+};
+
+static char dpaa_stats_global[][ETH_GSTRING_LEN] = {
+       /* dpa rx errors */
+       "rx dma error",
+       "rx frame physical error",
+       "rx frame size error",
+       "rx header error",
+
+       /* demultiplexing errors */
+       "qman cg_tdrop",
+       "qman wred",
+       "qman error cond",
+       "qman early window",
+       "qman late window",
+       "qman fq tdrop",
+       "qman fq retired",
+       "qman orp disabled",
+
+       /* congestion related stats */
+       "congestion time (ms)",
+       "entered congestion",
+       "congested (0/1)"
+};
+
+#define DPAA_STATS_PERCPU_LEN ARRAY_SIZE(dpaa_stats_percpu)
+#define DPAA_STATS_GLOBAL_LEN ARRAY_SIZE(dpaa_stats_global)
+
+static int dpaa_get_settings(struct net_device *net_dev,
+                            struct ethtool_cmd *et_cmd)
+{
+       int err;
+
+       if (!net_dev->phydev) {
+               netdev_dbg(net_dev, "phy device not initialized\n");
+               return 0;
+       }
+
+       err = phy_ethtool_gset(net_dev->phydev, et_cmd);
+
+       return err;
+}
+
+static int dpaa_set_settings(struct net_device *net_dev,
+                            struct ethtool_cmd *et_cmd)
+{
+       int err;
+
+       if (!net_dev->phydev) {
+               netdev_err(net_dev, "phy device not initialized\n");
+               return -ENODEV;
+       }
+
+       err = phy_ethtool_sset(net_dev->phydev, et_cmd);
+       if (err < 0)
+               netdev_err(net_dev, "phy_ethtool_sset() = %d\n", err);
+
+       return err;
+}
+
+static void dpaa_get_drvinfo(struct net_device *net_dev,
+                            struct ethtool_drvinfo *drvinfo)
+{
+       int len;
+
+       strlcpy(drvinfo->driver, KBUILD_MODNAME,
+               sizeof(drvinfo->driver));
+       len = snprintf(drvinfo->version, sizeof(drvinfo->version),
+                      "%X", 0);
+       len = snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
+                      "%X", 0);
+
+       if (len >= sizeof(drvinfo->fw_version)) {
+               /* Truncated output */
+               netdev_notice(net_dev, "snprintf() = %d\n", len);
+       }
+       strlcpy(drvinfo->bus_info, dev_name(net_dev->dev.parent->parent),
+               sizeof(drvinfo->bus_info));
+}
+
+static u32 dpaa_get_msglevel(struct net_device *net_dev)
+{
+       return ((struct dpaa_priv *)netdev_priv(net_dev))->msg_enable;
+}
+
+static void dpaa_set_msglevel(struct net_device *net_dev,
+                             u32 msg_enable)
+{
+       ((struct dpaa_priv *)netdev_priv(net_dev))->msg_enable = msg_enable;
+}
+
+static int dpaa_nway_reset(struct net_device *net_dev)
+{
+       int err;
+
+       if (!net_dev->phydev) {
+               netdev_err(net_dev, "phy device not initialized\n");
+               return -ENODEV;
+       }
+
+       err = 0;
+       if (net_dev->phydev->autoneg) {
+               err = phy_start_aneg(net_dev->phydev);
+               if (err < 0)
+                       netdev_err(net_dev, "phy_start_aneg() = %d\n",
+                                  err);
+       }
+
+       return err;
+}
+
+static void dpaa_get_pauseparam(struct net_device *net_dev,
+                               struct ethtool_pauseparam *epause)
+{
+       struct mac_device *mac_dev;
+       struct dpaa_priv *priv;
+
+       priv = netdev_priv(net_dev);
+       mac_dev = priv->mac_dev;
+
+       if (!net_dev->phydev) {
+               netdev_err(net_dev, "phy device not initialized\n");
+               return;
+       }
+
+       epause->autoneg = mac_dev->autoneg_pause;
+       epause->rx_pause = mac_dev->rx_pause_active;
+       epause->tx_pause = mac_dev->tx_pause_active;
+}
+
+static int dpaa_set_pauseparam(struct net_device *net_dev,
+                              struct ethtool_pauseparam *epause)
+{
+       struct mac_device *mac_dev;
+       struct phy_device *phydev;
+       bool rx_pause, tx_pause;
+       struct dpaa_priv *priv;
+       u32 newadv, oldadv;
+       int err;
+
+       priv = netdev_priv(net_dev);
+       mac_dev = priv->mac_dev;
+
+       phydev = net_dev->phydev;
+       if (!phydev) {
+               netdev_err(net_dev, "phy device not initialized\n");
+               return -ENODEV;
+       }
+
+       if (!(phydev->supported & SUPPORTED_Pause) ||
+           (!(phydev->supported & SUPPORTED_Asym_Pause) &&
+           (epause->rx_pause != epause->tx_pause)))
+               return -EINVAL;
+
+       /* The MAC should know how to handle PAUSE frame autonegotiation before
+        * adjust_link is triggered by a forced renegotiation of sym/asym PAUSE
+        * settings.
+        */
+       mac_dev->autoneg_pause = !!epause->autoneg;
+       mac_dev->rx_pause_req = !!epause->rx_pause;
+       mac_dev->tx_pause_req = !!epause->tx_pause;
+
+       /* Determine the sym/asym advertised PAUSE capabilities from the desired
+        * rx/tx pause settings.
+        */
+       newadv = 0;
+       if (epause->rx_pause)
+               newadv = ADVERTISED_Pause | ADVERTISED_Asym_Pause;
+       if (epause->tx_pause)
+               newadv |= ADVERTISED_Asym_Pause;
+
+       oldadv = phydev->advertising &
+                       (ADVERTISED_Pause | ADVERTISED_Asym_Pause);
+
+       /* If there are differences between the old and the new advertised
+        * values, restart PHY autonegotiation and advertise the new values.
+        */
+       if (oldadv != newadv) {
+               phydev->advertising &= ~(ADVERTISED_Pause
+                               | ADVERTISED_Asym_Pause);
+               phydev->advertising |= newadv;
+               if (phydev->autoneg) {
+                       err = phy_start_aneg(phydev);
+                       if (err < 0)
+                               netdev_err(net_dev, "phy_start_aneg() = %d\n",
+                                          err);
+               }
+       }
+
+       fman_get_pause_cfg(mac_dev, &rx_pause, &tx_pause);
+       err = fman_set_mac_active_pause(mac_dev, rx_pause, tx_pause);
+       if (err < 0)
+               netdev_err(net_dev, "set_mac_active_pause() = %d\n", err);
+
+       return err;
+}
+
+static int dpaa_get_sset_count(struct net_device *net_dev, int type)
+{
+       unsigned int total_stats, num_stats;
+
+       num_stats   = num_online_cpus() + 1;
+       total_stats = num_stats * (DPAA_STATS_PERCPU_LEN + DPAA_BPS_NUM) +
+                       DPAA_STATS_GLOBAL_LEN;
+
+       switch (type) {
+       case ETH_SS_STATS:
+               return total_stats;
+       default:
+               return -EOPNOTSUPP;
+       }
+}
+
+static void copy_stats(struct dpaa_percpu_priv *percpu_priv, int num_cpus,
+                      int crr_cpu, u64 *bp_count, u64 *data)
+{
+       int num_values = num_cpus + 1;
+       int crr = 0, j;
+
+       /* update current CPU's stats and also add them to the total values */
+       data[crr * num_values + crr_cpu] = percpu_priv->in_interrupt;
+       data[crr++ * num_values + num_cpus] += percpu_priv->in_interrupt;
+
+       data[crr * num_values + crr_cpu] = percpu_priv->stats.rx_packets;
+       data[crr++ * num_values + num_cpus] += percpu_priv->stats.rx_packets;
+
+       data[crr * num_values + crr_cpu] = percpu_priv->stats.tx_packets;
+       data[crr++ * num_values + num_cpus] += percpu_priv->stats.tx_packets;
+
+       data[crr * num_values + crr_cpu] = percpu_priv->tx_confirm;
+       data[crr++ * num_values + num_cpus] += percpu_priv->tx_confirm;
+
+       data[crr * num_values + crr_cpu] = percpu_priv->tx_frag_skbuffs;
+       data[crr++ * num_values + num_cpus] += percpu_priv->tx_frag_skbuffs;
+
+       data[crr * num_values + crr_cpu] = percpu_priv->stats.tx_errors;
+       data[crr++ * num_values + num_cpus] += percpu_priv->stats.tx_errors;
+
+       data[crr * num_values + crr_cpu] = percpu_priv->stats.rx_errors;
+       data[crr++ * num_values + num_cpus] += percpu_priv->stats.rx_errors;
+
+       for (j = 0; j < DPAA_BPS_NUM; j++) {
+               data[crr * num_values + crr_cpu] = bp_count[j];
+               data[crr++ * num_values + num_cpus] += bp_count[j];
+       }
+}
+
+static void dpaa_get_ethtool_stats(struct net_device *net_dev,
+                                  struct ethtool_stats *stats, u64 *data)
+{
+       u64 bp_count[DPAA_BPS_NUM], cg_time, cg_num;
+       struct dpaa_percpu_priv *percpu_priv;
+       struct dpaa_rx_errors rx_errors;
+       unsigned int num_cpus, offset;
+       struct dpaa_ern_cnt ern_cnt;
+       struct dpaa_bp *dpaa_bp;
+       struct dpaa_priv *priv;
+       int total_stats, i, j;
+       bool cg_status;
+
+       total_stats = dpaa_get_sset_count(net_dev, ETH_SS_STATS);
+       priv     = netdev_priv(net_dev);
+       num_cpus = num_online_cpus();
+
+       memset(&bp_count, 0, sizeof(bp_count));
+       memset(&rx_errors, 0, sizeof(struct dpaa_rx_errors));
+       memset(&ern_cnt, 0, sizeof(struct dpaa_ern_cnt));
+       memset(data, 0, total_stats * sizeof(u64));
+
+       for_each_online_cpu(i) {
+               percpu_priv = per_cpu_ptr(priv->percpu_priv, i);
+               for (j = 0; j < DPAA_BPS_NUM; j++) {
+                       dpaa_bp = priv->dpaa_bps[j];
+                       if (!dpaa_bp->percpu_count)
+                               continue;
+                       bp_count[j] = *(per_cpu_ptr(dpaa_bp->percpu_count, i));
+               }
+               rx_errors.dme += percpu_priv->rx_errors.dme;
+               rx_errors.fpe += percpu_priv->rx_errors.fpe;
+               rx_errors.fse += percpu_priv->rx_errors.fse;
+               rx_errors.phe += percpu_priv->rx_errors.phe;
+
+               ern_cnt.cg_tdrop     += percpu_priv->ern_cnt.cg_tdrop;
+               ern_cnt.wred         += percpu_priv->ern_cnt.wred;
+               ern_cnt.err_cond     += percpu_priv->ern_cnt.err_cond;
+               ern_cnt.early_window += percpu_priv->ern_cnt.early_window;
+               ern_cnt.late_window  += percpu_priv->ern_cnt.late_window;
+               ern_cnt.fq_tdrop     += percpu_priv->ern_cnt.fq_tdrop;
+               ern_cnt.fq_retired   += percpu_priv->ern_cnt.fq_retired;
+               ern_cnt.orp_zero     += percpu_priv->ern_cnt.orp_zero;
+
+               copy_stats(percpu_priv, num_cpus, i, bp_count, data);
+       }
+
+       offset = (num_cpus + 1) * (DPAA_STATS_PERCPU_LEN + DPAA_BPS_NUM);
+       memcpy(data + offset, &rx_errors, sizeof(struct dpaa_rx_errors));
+
+       offset += sizeof(struct dpaa_rx_errors) / sizeof(u64);
+       memcpy(data + offset, &ern_cnt, sizeof(struct dpaa_ern_cnt));
+
+       /* gather congestion related counters */
+       cg_num    = 0;
+       cg_status = 0;
+       cg_time   = jiffies_to_msecs(priv->cgr_data.congested_jiffies);
+       if (qman_query_cgr_congested(&priv->cgr_data.cgr, &cg_status) == 0) {
+               cg_num    = priv->cgr_data.cgr_congested_count;
+
+               /* reset congestion stats (like QMan API does */
+               priv->cgr_data.congested_jiffies   = 0;
+               priv->cgr_data.cgr_congested_count = 0;
+       }
+
+       offset += sizeof(struct dpaa_ern_cnt) / sizeof(u64);
+       data[offset++] = cg_time;
+       data[offset++] = cg_num;
+       data[offset++] = cg_status;
+}
+
+static void dpaa_get_strings(struct net_device *net_dev, u32 stringset,
+                            u8 *data)
+{
+       unsigned int i, j, num_cpus, size;
+       char string_cpu[ETH_GSTRING_LEN];
+       u8 *strings;
+
+       memset(string_cpu, 0, sizeof(string_cpu));
+       strings   = data;
+       num_cpus  = num_online_cpus();
+       size      = DPAA_STATS_GLOBAL_LEN * ETH_GSTRING_LEN;
+
+       for (i = 0; i < DPAA_STATS_PERCPU_LEN; i++) {
+               for (j = 0; j < num_cpus; j++) {
+                       snprintf(string_cpu, ETH_GSTRING_LEN, "%s [CPU %d]",
+                                dpaa_stats_percpu[i], j);
+                       memcpy(strings, string_cpu, ETH_GSTRING_LEN);
+                       strings += ETH_GSTRING_LEN;
+               }
+               snprintf(string_cpu, ETH_GSTRING_LEN, "%s [TOTAL]",
+                        dpaa_stats_percpu[i]);
+               memcpy(strings, string_cpu, ETH_GSTRING_LEN);
+               strings += ETH_GSTRING_LEN;
+       }
+       for (i = 0; i < DPAA_BPS_NUM; i++) {
+               for (j = 0; j < num_cpus; j++) {
+                       snprintf(string_cpu, ETH_GSTRING_LEN,
+                                "bpool %c [CPU %d]", 'a' + i, j);
+                       memcpy(strings, string_cpu, ETH_GSTRING_LEN);
+                       strings += ETH_GSTRING_LEN;
+               }
+               snprintf(string_cpu, ETH_GSTRING_LEN, "bpool %c [TOTAL]",
+                        'a' + i);
+               memcpy(strings, string_cpu, ETH_GSTRING_LEN);
+               strings += ETH_GSTRING_LEN;
+       }
+       memcpy(strings, dpaa_stats_global, size);
+}
+
+const struct ethtool_ops dpaa_ethtool_ops = {
+       .get_settings = dpaa_get_settings,
+       .set_settings = dpaa_set_settings,
+       .get_drvinfo = dpaa_get_drvinfo,
+       .get_msglevel = dpaa_get_msglevel,
+       .set_msglevel = dpaa_set_msglevel,
+       .nway_reset = dpaa_nway_reset,
+       .get_pauseparam = dpaa_get_pauseparam,
+       .set_pauseparam = dpaa_set_pauseparam,
+       .get_link = ethtool_op_get_link,
+       .get_sset_count = dpaa_get_sset_count,
+       .get_ethtool_stats = dpaa_get_ethtool_stats,
+       .get_strings = dpaa_get_strings,
+};
index 43b2839a3d1179de8abc9b2d05a88cc93f738849..1aabe4bcc8ea60d911f9ef022349093807366a47 100644 (file)
@@ -1430,14 +1430,14 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id)
                skb_put(skb, pkt_len - 4);
                data = skb->data;
 
+               if (!is_copybreak && need_swap)
+                       swap_buffer(data, pkt_len);
+
 #if !defined(CONFIG_M5272)
                if (fep->quirks & FEC_QUIRK_HAS_RACC)
                        data = skb_pull_inline(skb, 2);
 #endif
 
-               if (!is_copybreak && need_swap)
-                       swap_buffer(data, pkt_len);
-
                /* Extract the enhanced buffer descriptor */
                ebdp = NULL;
                if (fep->bufdesc_ex)
@@ -2344,16 +2344,6 @@ static int fec_enet_get_sset_count(struct net_device *dev, int sset)
 }
 #endif /* !defined(CONFIG_M5272) */
 
-static int fec_enet_nway_reset(struct net_device *dev)
-{
-       struct phy_device *phydev = dev->phydev;
-
-       if (!phydev)
-               return -ENODEV;
-
-       return genphy_restart_aneg(phydev);
-}
-
 /* ITR clock source is enet system clock (clk_ahb).
  * TCTT unit is cycle_ns * 64 cycle
  * So, the ICTT value = X us / (cycle_ns * 64)
@@ -2553,7 +2543,7 @@ static const struct ethtool_ops fec_enet_ethtool_ops = {
        .get_drvinfo            = fec_enet_get_drvinfo,
        .get_regs_len           = fec_enet_get_regs_len,
        .get_regs               = fec_enet_get_regs,
-       .nway_reset             = fec_enet_nway_reset,
+       .nway_reset             = phy_ethtool_nway_reset,
        .get_link               = ethtool_op_get_link,
        .get_coalesce           = fec_enet_get_coalesce,
        .set_coalesce           = fec_enet_set_coalesce,
index efabb04a1ae8cc445fab85eab8c8aede0dcd0a58..4b0f3a50b2939aa31fc6274235e263f1277461fb 100644 (file)
@@ -722,9 +722,6 @@ int tgec_free(struct fman_mac *tgec)
 {
        free_init_resources(tgec);
 
-       if (tgec->cfg)
-               tgec->cfg = NULL;
-
        kfree(tgec->cfg);
        kfree(tgec);
 
index 44f50e1687035b019514d55663d95a42b684c344..34843c1554209caa701dd90d75c4a00202067f41 100644 (file)
@@ -807,11 +807,6 @@ static void fs_get_regs(struct net_device *dev, struct ethtool_regs *regs,
                regs->version = 0;
 }
 
-static int fs_nway_reset(struct net_device *dev)
-{
-       return 0;
-}
-
 static u32 fs_get_msglevel(struct net_device *dev)
 {
        struct fs_enet_private *fep = netdev_priv(dev);
@@ -865,7 +860,7 @@ static int fs_set_tunable(struct net_device *dev,
 static const struct ethtool_ops fs_ethtool_ops = {
        .get_drvinfo = fs_get_drvinfo,
        .get_regs_len = fs_get_regs_len,
-       .nway_reset = fs_nway_reset,
+       .nway_reset = phy_ethtool_nway_reset,
        .get_link = ethtool_op_get_link,
        .get_msglevel = fs_get_msglevel,
        .set_msglevel = fs_set_msglevel,
index 57798814160dca157d9576448a04e06bfc183255..721be13081f93895e9fec7da20b50b34efe507f7 100644 (file)
@@ -72,7 +72,7 @@ struct gianfar_ptp_registers {
 /* Bit definitions for the TMR_CTRL register */
 #define ALM1P                 (1<<31) /* Alarm1 output polarity */
 #define ALM2P                 (1<<30) /* Alarm2 output polarity */
-#define FS                    (1<<28) /* FIPER start indication */
+#define FIPERST               (1<<28) /* FIPER start indication */
 #define PP1L                  (1<<27) /* Fiper1 pulse loopback mode enabled. */
 #define PP2L                  (1<<26) /* Fiper2 pulse loopback mode enabled. */
 #define TCLK_PERIOD_SHIFT     (16) /* 1588 timer reference clock period. */
@@ -280,21 +280,26 @@ static irqreturn_t isr(int irq, void *priv)
  * PTP clock operations
  */
 
-static int ptp_gianfar_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
+static int ptp_gianfar_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
 {
-       u64 adj;
-       u32 diff, tmr_add;
+       u64 adj, diff;
+       u32 tmr_add;
        int neg_adj = 0;
        struct etsects *etsects = container_of(ptp, struct etsects, caps);
 
-       if (ppb < 0) {
+       if (scaled_ppm < 0) {
                neg_adj = 1;
-               ppb = -ppb;
+               scaled_ppm = -scaled_ppm;
        }
        tmr_add = etsects->tmr_add;
        adj = tmr_add;
-       adj *= ppb;
-       diff = div_u64(adj, 1000000000ULL);
+
+       /* calculate diff as adj*(scaled_ppm/65536)/1000000
+        * and round() to the nearest integer
+        */
+       adj *= scaled_ppm;
+       diff = div_u64(adj, 8000000);
+       diff = (diff >> 13) + ((diff >> 12) & 1);
 
        tmr_add = neg_adj ? tmr_add - diff : tmr_add + diff;
 
@@ -415,7 +420,7 @@ static struct ptp_clock_info ptp_gianfar_caps = {
        .n_per_out      = 0,
        .n_pins         = 0,
        .pps            = 1,
-       .adjfreq        = ptp_gianfar_adjfreq,
+       .adjfine        = ptp_gianfar_adjfine,
        .adjtime        = ptp_gianfar_adjtime,
        .gettime64      = ptp_gianfar_gettime,
        .settime64      = ptp_gianfar_settime,
@@ -502,7 +507,7 @@ static int gianfar_ptp_probe(struct platform_device *dev)
        gfar_write(&etsects->regs->tmr_fiper1, etsects->tmr_fiper1);
        gfar_write(&etsects->regs->tmr_fiper2, etsects->tmr_fiper2);
        set_alarm(etsects);
-       gfar_write(&etsects->regs->tmr_ctrl,   tmr_ctrl|FS|RTPE|TE|FRD);
+       gfar_write(&etsects->regs->tmr_ctrl,   tmr_ctrl|FIPERST|RTPE|TE|FRD);
 
        spin_unlock_irqrestore(&etsects->lock, flags);
 
index 812a968a78e9808c37fa9bef1d308b42fead4d95..8ba636f61b5063a5978e647145cc0963de74389f 100644 (file)
@@ -332,13 +332,6 @@ static void uec_get_ethtool_stats(struct net_device *netdev,
        }
 }
 
-static int uec_nway_reset(struct net_device *netdev)
-{
-       struct ucc_geth_private *ugeth = netdev_priv(netdev);
-
-       return phy_start_aneg(ugeth->phydev);
-}
-
 /* Report driver information */
 static void
 uec_get_drvinfo(struct net_device *netdev,
@@ -394,7 +387,7 @@ static const struct ethtool_ops uec_ethtool_ops = {
        .get_regs               = uec_get_regs,
        .get_msglevel           = uec_get_msglevel,
        .set_msglevel           = uec_set_msglevel,
-       .nway_reset             = uec_nway_reset,
+       .nway_reset             = phy_ethtool_nway_reset,
        .get_link               = ethtool_op_get_link,
        .get_ringparam          = uec_get_ringparam,
        .set_ringparam          = uec_set_ringparam,
index c54c6fac0d1de065cc891fe9103bba027c6f2df8..b6ed818f78fffe21ee2b4c385c7c6222bc5df9f3 100644 (file)
@@ -332,8 +332,10 @@ struct hnae_handle *hnae_get_handle(struct device *owner_dev,
                return ERR_PTR(-ENODEV);
 
        handle = dev->ops->get_handle(dev, port_id);
-       if (IS_ERR(handle))
+       if (IS_ERR(handle)) {
+               put_device(&dev->cls_dev);
                return handle;
+       }
 
        handle->dev = dev;
        handle->owner_dev = owner_dev;
@@ -356,6 +358,8 @@ out_when_init_queue:
        for (j = i - 1; j >= 0; j--)
                hnae_fini_queue(handle->qs[j]);
 
+       put_device(&dev->cls_dev);
+
        return ERR_PTR(-ENOMEM);
 }
 EXPORT_SYMBOL(hnae_get_handle);
@@ -377,6 +381,8 @@ void hnae_put_handle(struct hnae_handle *h)
                dev->ops->put_handle(h);
 
        module_put(dev->owner);
+
+       put_device(&dev->cls_dev);
 }
 EXPORT_SYMBOL(hnae_put_handle);
 
index e093cbf26c8c50d22bf782741b382c96ca29e1d6..09602f1187f5493b34d9525b108d7582d16f25f3 100644 (file)
@@ -426,8 +426,14 @@ enum hnae_media_type {
  *   get mac address
  * set_mac_addr()
  *   set mac address
+ * clr_mc_addr()
+ *   clear mcast tcam table
  * set_mc_addr()
  *   set multicast mode
+ * add_uc_addr()
+ *   add ucast address
+ * rm_uc_addr()
+ *   remove ucast address
  * set_mtu()
  *   set mtu
  * update_stats()
@@ -488,6 +494,11 @@ struct hnae_ae_ops {
        void (*set_promisc_mode)(struct hnae_handle *handle, u32 en);
        int (*get_mac_addr)(struct hnae_handle *handle, void **p);
        int (*set_mac_addr)(struct hnae_handle *handle, void *p);
+       int (*add_uc_addr)(struct hnae_handle *handle,
+                          const unsigned char *addr);
+       int (*rm_uc_addr)(struct hnae_handle *handle,
+                         const unsigned char *addr);
+       int (*clr_mc_addr)(struct hnae_handle *handle);
        int (*set_mc_addr)(struct hnae_handle *handle, void *addr);
        int (*set_mtu)(struct hnae_handle *handle, int new_mtu);
        void (*set_tso_stats)(struct hnae_handle *handle, int enable);
@@ -590,7 +601,7 @@ static inline int hnae_alloc_buffer_attach(struct hnae_ring *ring, int i)
        if (ret)
                return ret;
 
-       ring->desc[i].addr = (__le64)ring->desc_cb[i].dma;
+       ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma);
 
        return 0;
 }
@@ -621,14 +632,14 @@ static inline void hnae_replace_buffer(struct hnae_ring *ring, int i,
 
        bops->unmap_buffer(ring, &ring->desc_cb[i]);
        ring->desc_cb[i] = *res_cb;
-       ring->desc[i].addr = (__le64)ring->desc_cb[i].dma;
+       ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma);
        ring->desc[i].rx.ipoff_bnum_pid_flag = 0;
 }
 
 static inline void hnae_reuse_buffer(struct hnae_ring *ring, int i)
 {
        ring->desc_cb[i].reuse_flag = 0;
-       ring->desc[i].addr = (__le64)(ring->desc_cb[i].dma
+       ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma
                + ring->desc_cb[i].page_offset);
        ring->desc[i].rx.ipoff_bnum_pid_flag = 0;
 }
index e28d960997af3189885161bd40c355b28aee84af..0a9cdf00b31afa9608414a4ad3de3089e4f61d04 100644 (file)
@@ -18,9 +18,6 @@
 #include "hns_dsaf_rcb.h"
 
 #define AE_NAME_PORT_ID_IDX 6
-#define ETH_STATIC_REG  1
-#define ETH_DUMP_REG    5
-#define ETH_GSTRING_LEN        32
 
 static struct hns_mac_cb *hns_get_mac_cb(struct hnae_handle *handle)
 {
@@ -202,11 +199,34 @@ static int hns_ae_set_mac_address(struct hnae_handle *handle, void *p)
        return 0;
 }
 
+static int hns_ae_add_uc_address(struct hnae_handle *handle,
+                                const unsigned char *addr)
+{
+       struct hns_mac_cb *mac_cb = hns_get_mac_cb(handle);
+
+       if (mac_cb->mac_type != HNAE_PORT_SERVICE)
+               return -ENOSPC;
+
+       return hns_mac_add_uc_addr(mac_cb, handle->vf_id, addr);
+}
+
+static int hns_ae_rm_uc_address(struct hnae_handle *handle,
+                               const unsigned char *addr)
+{
+       struct hns_mac_cb *mac_cb = hns_get_mac_cb(handle);
+
+       if (mac_cb->mac_type != HNAE_PORT_SERVICE)
+               return -ENOSPC;
+
+       return hns_mac_rm_uc_addr(mac_cb, handle->vf_id, addr);
+}
+
 static int hns_ae_set_multicast_one(struct hnae_handle *handle, void *addr)
 {
        int ret;
        char *mac_addr = (char *)addr;
        struct hns_mac_cb *mac_cb = hns_get_mac_cb(handle);
+       u8 port_num;
 
        assert(mac_cb);
 
@@ -221,8 +241,11 @@ static int hns_ae_set_multicast_one(struct hnae_handle *handle, void *addr)
                return ret;
        }
 
-       ret = hns_mac_set_multi(mac_cb, DSAF_BASE_INNER_PORT_NUM,
-                               mac_addr, true);
+       ret = hns_mac_get_inner_port_num(mac_cb, handle->vf_id, &port_num);
+       if (ret)
+               return ret;
+
+       ret = hns_mac_set_multi(mac_cb, port_num, mac_addr, true);
        if (ret)
                dev_err(handle->owner_dev,
                        "mac add mul_mac:%pM port%d  fail, ret = %#x!\n",
@@ -231,6 +254,16 @@ static int hns_ae_set_multicast_one(struct hnae_handle *handle, void *addr)
        return ret;
 }
 
+static int hns_ae_clr_multicast(struct hnae_handle *handle)
+{
+       struct hns_mac_cb *mac_cb = hns_get_mac_cb(handle);
+
+       if (mac_cb->mac_type != HNAE_PORT_SERVICE)
+               return 0;
+
+       return hns_mac_clr_multicast(mac_cb, handle->vf_id);
+}
+
 static int hns_ae_set_mtu(struct hnae_handle *handle, int new_mtu)
 {
        struct hns_mac_cb *mac_cb = hns_get_mac_cb(handle);
@@ -678,9 +711,6 @@ static int hns_ae_config_loopback(struct hnae_handle *handle,
                ret = -EINVAL;
        }
 
-       if (!ret)
-               hns_dsaf_set_inner_lb(mac_cb->dsaf_dev, mac_cb->mac_id, en);
-
        return ret;
 }
 
@@ -822,7 +852,10 @@ static struct hnae_ae_ops hns_dsaf_ops = {
        .get_coalesce_range = hns_ae_get_coalesce_range,
        .set_promisc_mode = hns_ae_set_promisc_mode,
        .set_mac_addr = hns_ae_set_mac_address,
+       .add_uc_addr = hns_ae_add_uc_address,
+       .rm_uc_addr = hns_ae_rm_uc_address,
        .set_mc_addr = hns_ae_set_multicast_one,
+       .clr_mc_addr = hns_ae_clr_multicast,
        .set_mtu = hns_ae_set_mtu,
        .update_stats = hns_ae_update_stats,
        .set_tso_stats = hns_ae_set_tso_stats,
index 1e1eb92998fb3d66f497f88b890817e22c4a8d3d..3382441fe7b51e84bb5e815ffc1e4fa192b09a91 100644 (file)
@@ -37,8 +37,8 @@ static const struct mac_stats_string g_gmac_stats_string[] = {
        {"gmac_rx_very_long_err", MAC_STATS_FIELD_OFF(rx_long_err)},
        {"gmac_rx_runt_err", MAC_STATS_FIELD_OFF(rx_minto64)},
        {"gmac_rx_short_err", MAC_STATS_FIELD_OFF(rx_under_min)},
-       {"gmac_rx_filt_pkt", MAC_STATS_FIELD_OFF(rx_filter_bytes)},
-       {"gmac_rx_octets_total_filt", MAC_STATS_FIELD_OFF(rx_filter_pkts)},
+       {"gmac_rx_filt_pkt", MAC_STATS_FIELD_OFF(rx_filter_pkts)},
+       {"gmac_rx_octets_total_filt", MAC_STATS_FIELD_OFF(rx_filter_bytes)},
        {"gmac_rx_overrun_cnt", MAC_STATS_FIELD_OFF(rx_fifo_overrun_err)},
        {"gmac_rx_length_err", MAC_STATS_FIELD_OFF(rx_len_err)},
        {"gmac_rx_fail_comma", MAC_STATS_FIELD_OFF(rx_comma_err)},
index d8e99416ab244d347a7c10766a28a1d93073f776..3239d27143b935dc0056490b32f700093163c74a 100644 (file)
@@ -141,9 +141,10 @@ void hns_mac_adjust_link(struct hns_mac_cb *mac_cb, int speed, int duplex)
  *@port_num:port number
  *
  */
-static int hns_mac_get_inner_port_num(struct hns_mac_cb *mac_cb,
-                                     u8 vmid, u8 *port_num)
+int hns_mac_get_inner_port_num(struct hns_mac_cb *mac_cb, u8 vmid, u8 *port_num)
 {
+       int q_num_per_vf, vf_num_per_port;
+       int vm_queue_id;
        u8 tmp_port;
 
        if (mac_cb->dsaf_dev->dsaf_mode <= DSAF_MODE_ENABLE) {
@@ -174,6 +175,12 @@ static int hns_mac_get_inner_port_num(struct hns_mac_cb *mac_cb,
                return -EINVAL;
        }
 
+       q_num_per_vf = mac_cb->dsaf_dev->rcb_common[0]->max_q_per_vf;
+       vf_num_per_port = mac_cb->dsaf_dev->rcb_common[0]->max_vfn;
+
+       vm_queue_id = vmid * q_num_per_vf +
+                       vf_num_per_port * q_num_per_vf * mac_cb->mac_id;
+
        switch (mac_cb->dsaf_dev->dsaf_mode) {
        case DSAF_MODE_ENABLE_FIX:
                tmp_port = 0;
@@ -193,7 +200,7 @@ static int hns_mac_get_inner_port_num(struct hns_mac_cb *mac_cb,
        case DSAF_MODE_DISABLE_6PORT_2VM:
        case DSAF_MODE_DISABLE_6PORT_4VM:
        case DSAF_MODE_DISABLE_6PORT_16VM:
-               tmp_port = vmid;
+               tmp_port = vm_queue_id;
                break;
        default:
                dev_err(mac_cb->dev, "dsaf mode invalid, %s mac%d!\n",
@@ -256,6 +263,46 @@ int hns_mac_change_vf_addr(struct hns_mac_cb *mac_cb,
        return 0;
 }
 
+int hns_mac_add_uc_addr(struct hns_mac_cb *mac_cb, u8 vf_id,
+                       const unsigned char *addr)
+{
+       struct dsaf_device *dsaf_dev = mac_cb->dsaf_dev;
+       struct dsaf_drv_mac_single_dest_entry mac_entry;
+       int ret;
+
+       if (HNS_DSAF_IS_DEBUG(dsaf_dev))
+               return -ENOSPC;
+
+       memset(&mac_entry, 0, sizeof(mac_entry));
+       memcpy(mac_entry.addr, addr, sizeof(mac_entry.addr));
+       mac_entry.in_port_num = mac_cb->mac_id;
+       ret = hns_mac_get_inner_port_num(mac_cb, vf_id, &mac_entry.port_num);
+       if (ret)
+               return ret;
+
+       return hns_dsaf_set_mac_uc_entry(dsaf_dev, &mac_entry);
+}
+
+int hns_mac_rm_uc_addr(struct hns_mac_cb *mac_cb, u8 vf_id,
+                      const unsigned char *addr)
+{
+       struct dsaf_device *dsaf_dev = mac_cb->dsaf_dev;
+       struct dsaf_drv_mac_single_dest_entry mac_entry;
+       int ret;
+
+       if (HNS_DSAF_IS_DEBUG(dsaf_dev))
+               return -ENOSPC;
+
+       memset(&mac_entry, 0, sizeof(mac_entry));
+       memcpy(mac_entry.addr, addr, sizeof(mac_entry.addr));
+       mac_entry.in_port_num = mac_cb->mac_id;
+       ret = hns_mac_get_inner_port_num(mac_cb, vf_id, &mac_entry.port_num);
+       if (ret)
+               return ret;
+
+       return hns_dsaf_rm_mac_addr(dsaf_dev, &mac_entry);
+}
+
 int hns_mac_set_multi(struct hns_mac_cb *mac_cb,
                      u32 port_num, char *addr, bool enable)
 {
@@ -323,13 +370,24 @@ int hns_mac_del_mac(struct hns_mac_cb *mac_cb, u32 vfn, char *mac)
        return 0;
 }
 
+int hns_mac_clr_multicast(struct hns_mac_cb *mac_cb, int vfn)
+{
+       struct dsaf_device *dsaf_dev = mac_cb->dsaf_dev;
+       u8 port_num;
+       int ret = hns_mac_get_inner_port_num(mac_cb, vfn, &port_num);
+
+       if (ret)
+               return ret;
+
+       return hns_dsaf_clr_mac_mc_port(dsaf_dev, mac_cb->mac_id, port_num);
+}
+
 static void hns_mac_param_get(struct mac_params *param,
                              struct hns_mac_cb *mac_cb)
 {
        param->vaddr = (void *)mac_cb->vaddr;
        param->mac_mode = hns_get_enet_interface(mac_cb);
-       memcpy(param->addr, mac_cb->addr_entry_idx[0].addr,
-              MAC_NUM_OCTETS_PER_ADDR);
+       ether_addr_copy(param->addr, mac_cb->addr_entry_idx[0].addr);
        param->mac_id = mac_cb->mac_id;
        param->dev = mac_cb->dev;
 }
@@ -346,8 +404,7 @@ static int hns_mac_port_config_bc_en(struct hns_mac_cb *mac_cb,
 {
        int ret;
        struct dsaf_device *dsaf_dev = mac_cb->dsaf_dev;
-       u8 addr[MAC_NUM_OCTETS_PER_ADDR]
-               = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+       u8 addr[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
        struct dsaf_drv_mac_single_dest_entry mac_entry;
 
        /* directy return ok in debug network mode */
@@ -382,8 +439,7 @@ int hns_mac_vm_config_bc_en(struct hns_mac_cb *mac_cb, u32 vmid, bool enable)
        int ret;
        struct dsaf_device *dsaf_dev = mac_cb->dsaf_dev;
        u8 port_num;
-       u8 addr[MAC_NUM_OCTETS_PER_ADDR]
-               = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+       u8 addr[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
        struct mac_entry_idx *uc_mac_entry;
        struct dsaf_drv_mac_single_dest_entry mac_entry;
 
@@ -861,6 +917,13 @@ static int  hns_mac_get_info(struct hns_mac_cb *mac_cb)
                }
        }
 
+       if (fwnode_property_read_u8_array(mac_cb->fw_port, "mc-mac-mask",
+                                         mac_cb->mc_mask, ETH_ALEN)) {
+               dev_warn(mac_cb->dev,
+                        "no mc-mac-mask property, set to default value.\n");
+               eth_broadcast_addr(mac_cb->mc_mask);
+       }
+
        return 0;
 }
 
@@ -1074,6 +1137,8 @@ void hns_mac_set_promisc(struct hns_mac_cb *mac_cb, u8 en)
 {
        struct mac_driver *mac_ctrl_drv = hns_mac_get_drv(mac_cb);
 
+       hns_dsaf_set_promisc_tcam(mac_cb->dsaf_dev, mac_cb->mac_id, !!en);
+
        if (mac_ctrl_drv->set_promiscuous)
                mac_ctrl_drv->set_promiscuous(mac_ctrl_drv, en);
 }
index 4cbdf14f5c163e7405dd323327e8d3dc015a6ae5..2bb3d1e93c64a315c92f0e493573add3e5f1e023 100644 (file)
@@ -31,7 +31,7 @@ struct dsaf_device;
 #define MAC_MIN_MTU            68
 #define MAC_MAX_MTU_DBG                MAC_DEFAULT_MTU
 
-#define MAC_DEFAULT_PAUSE_TIME 0xff
+#define MAC_DEFAULT_PAUSE_TIME 0xffff
 
 #define MAC_GMAC_IDX 0
 #define MAC_XGMAC_IDX 1
@@ -56,9 +56,6 @@ struct dsaf_device;
 /*check mac addr multicast*/
 #define MAC_IS_MULTICAST(p)    ((*((u8 *)((p) + 0)) & 0x01) ? (1) : (0))
 
-/**< Number of octets (8-bit bytes) in an ethernet address */
-#define MAC_NUM_OCTETS_PER_ADDR 6
-
 struct mac_priv {
        void *mac;
 };
@@ -189,7 +186,7 @@ struct mac_statistics {
 
 /*mac para struct ,mac get param from nic or dsaf when initialize*/
 struct mac_params {
-       char addr[MAC_NUM_OCTETS_PER_ADDR];
+       char addr[ETH_ALEN];
        void *vaddr; /*virtual address*/
        struct device *dev;
        u8 mac_id;
@@ -214,7 +211,7 @@ struct mac_info {
 };
 
 struct mac_entry_idx {
-       u8 addr[MAC_NUM_OCTETS_PER_ADDR];
+       u8 addr[ETH_ALEN];
        u16 vlan_id:12;
        u16 valid:1;
        u16 qos:3;
@@ -317,6 +314,7 @@ struct hns_mac_cb {
        u8 __iomem *serdes_vaddr;
        struct regmap *serdes_ctrl;
        struct regmap *cpld_ctrl;
+       char mc_mask[ETH_ALEN];
        u32 cpld_ctrl_reg;
        u32 port_rst_off;
        u32 port_mode_off;
@@ -409,7 +407,7 @@ struct mac_driver {
 };
 
 struct mac_stats_string {
-       char desc[64];
+       char desc[ETH_GSTRING_LEN];
        unsigned long offset;
 };
 
@@ -461,5 +459,12 @@ void hns_set_led_opt(struct hns_mac_cb *mac_cb);
 int hns_cpld_led_set_id(struct hns_mac_cb *mac_cb,
                        enum hnae_led_state status);
 void hns_mac_set_promisc(struct hns_mac_cb *mac_cb, u8 en);
+int hns_mac_get_inner_port_num(struct hns_mac_cb *mac_cb,
+                              u8 vmid, u8 *port_num);
+int hns_mac_add_uc_addr(struct hns_mac_cb *mac_cb, u8 vf_id,
+                       const unsigned char *addr);
+int hns_mac_rm_uc_addr(struct hns_mac_cb *mac_cb, u8 vf_id,
+                      const unsigned char *addr);
+int hns_mac_clr_multicast(struct hns_mac_cb *mac_cb, int vfn);
 
 #endif /* _HNS_DSAF_MAC_H */
index 8e5b3f51b47b80201d7c447a9f061116b69e141c..90dbda7926144a41120d18c28a2c7d033f245f8c 100644 (file)
@@ -591,6 +591,16 @@ static void hns_dsaf_voq_bp_all_thrd_cfg(struct dsaf_device *dsaf_dev)
        }
 }
 
+static void hns_dsaf_tbl_tcam_match_cfg(
+       struct dsaf_device *dsaf_dev,
+       struct dsaf_tbl_tcam_data *ptbl_tcam_data)
+{
+       dsaf_write_dev(dsaf_dev, DSAF_TBL_TCAM_MATCH_CFG_L_REG,
+                      ptbl_tcam_data->tbl_tcam_data_low);
+       dsaf_write_dev(dsaf_dev, DSAF_TBL_TCAM_MATCH_CFG_H_REG,
+                      ptbl_tcam_data->tbl_tcam_data_high);
+}
+
 /**
  * hns_dsaf_tbl_tcam_data_cfg - tbl
  * @dsaf_id: dsa fabric id
@@ -755,21 +765,11 @@ static void hns_dsaf_tbl_tcam_data_ucast_pul(
 
 void hns_dsaf_set_promisc_mode(struct dsaf_device *dsaf_dev, u32 en)
 {
-       if (!HNS_DSAF_IS_DEBUG(dsaf_dev))
+       if (AE_IS_VER1(dsaf_dev->dsaf_ver) && !HNS_DSAF_IS_DEBUG(dsaf_dev))
                dsaf_set_dev_bit(dsaf_dev, DSAF_CFG_0_REG,
                                 DSAF_CFG_MIX_MODE_S, !!en);
 }
 
-void hns_dsaf_set_inner_lb(struct dsaf_device *dsaf_dev, u32 mac_id, u32 en)
-{
-       if (AE_IS_VER1(dsaf_dev->dsaf_ver) ||
-           dsaf_dev->mac_cb[mac_id]->mac_type == HNAE_PORT_DEBUG)
-               return;
-
-       dsaf_set_dev_bit(dsaf_dev, DSAFV2_SERDES_LBK_0_REG + 4 * mac_id,
-                        DSAFV2_SERDES_LBK_EN_B, !!en);
-}
-
 /**
  * hns_dsaf_tbl_stat_en - tbl
  * @dsaf_id: dsa fabric id
@@ -904,15 +904,16 @@ static void hns_dsaf_tcam_uc_cfg(
 }
 
 /**
- * hns_dsaf_tcam_mc_cfg - INT
- * @dsaf_id: dsa fabric id
- * @address,
- * @ptbl_tcam_data,
- * @ptbl_tcam_mcast,
+ * hns_dsaf_tcam_mc_cfg - cfg the tcam for mc
+ * @dsaf_dev: dsa fabric device struct pointer
+ * @address: tcam index
+ * @ptbl_tcam_data: tcam data struct pointer
+ * @ptbl_tcam_mcast: tcam mask struct pointer, it must be null for HNSv1
  */
 static void hns_dsaf_tcam_mc_cfg(
        struct dsaf_device *dsaf_dev, u32 address,
        struct dsaf_tbl_tcam_data *ptbl_tcam_data,
+       struct dsaf_tbl_tcam_data *ptbl_tcam_mask,
        struct dsaf_tbl_tcam_mcast_cfg *ptbl_tcam_mcast)
 {
        spin_lock_bh(&dsaf_dev->tcam_lock);
@@ -923,7 +924,11 @@ static void hns_dsaf_tcam_mc_cfg(
        hns_dsaf_tbl_tcam_data_cfg(dsaf_dev, ptbl_tcam_data);
        /*Write Tcam Mcast*/
        hns_dsaf_tbl_tcam_mcast_cfg(dsaf_dev, ptbl_tcam_mcast);
-       /*Write Plus*/
+       /* Write Match Data */
+       if (ptbl_tcam_mask)
+               hns_dsaf_tbl_tcam_match_cfg(dsaf_dev, ptbl_tcam_mask);
+
+       /* Write Puls */
        hns_dsaf_tbl_tcam_data_mcast_pul(dsaf_dev);
 
        spin_unlock_bh(&dsaf_dev->tcam_lock);
@@ -954,6 +959,16 @@ static void hns_dsaf_tcam_mc_invld(struct dsaf_device *dsaf_dev, u32 address)
        spin_unlock_bh(&dsaf_dev->tcam_lock);
 }
 
+void hns_dsaf_tcam_addr_get(struct dsaf_drv_tbl_tcam_key *mac_key, u8 *addr)
+{
+       addr[0] = mac_key->high.bits.mac_0;
+       addr[1] = mac_key->high.bits.mac_1;
+       addr[2] = mac_key->high.bits.mac_2;
+       addr[3] = mac_key->high.bits.mac_3;
+       addr[4] = mac_key->low.bits.mac_4;
+       addr[5] = mac_key->low.bits.mac_5;
+}
+
 /**
  * hns_dsaf_tcam_uc_get - INT
  * @dsaf_id: dsa fabric id
@@ -1379,6 +1394,12 @@ static int hns_dsaf_init(struct dsaf_device *dsaf_dev)
        if (HNS_DSAF_IS_DEBUG(dsaf_dev))
                return 0;
 
+       if (AE_IS_VER1(dsaf_dev->dsaf_ver))
+               dsaf_dev->tcam_max_num = DSAF_TCAM_SUM;
+       else
+               dsaf_dev->tcam_max_num =
+                       DSAF_TCAM_SUM - DSAFV2_MAC_FUZZY_TCAM_NUM;
+
        spin_lock_init(&dsaf_dev->tcam_lock);
        ret = hns_dsaf_init_hw(dsaf_dev);
        if (ret)
@@ -1434,7 +1455,7 @@ static u16 hns_dsaf_find_soft_mac_entry(
        u32 i;
 
        soft_mac_entry = priv->soft_mac_tbl;
-       for (i = 0; i < DSAF_TCAM_SUM; i++) {
+       for (i = 0; i < dsaf_dev->tcam_max_num; i++) {
                /* invall tab entry */
                if ((soft_mac_entry->index != DSAF_INVALID_ENTRY_IDX) &&
                    (soft_mac_entry->tcam_key.high.val == mac_key->high.val) &&
@@ -1459,7 +1480,7 @@ static u16 hns_dsaf_find_empty_mac_entry(struct dsaf_device *dsaf_dev)
        u32 i;
 
        soft_mac_entry = priv->soft_mac_tbl;
-       for (i = 0; i < DSAF_TCAM_SUM; i++) {
+       for (i = 0; i < dsaf_dev->tcam_max_num; i++) {
                /* inv all entry */
                if (soft_mac_entry->index == DSAF_INVALID_ENTRY_IDX)
                        /* return find result --soft index */
@@ -1498,8 +1519,12 @@ static void hns_dsaf_set_mac_key(
        mac_key->high.bits.mac_3 = addr[3];
        mac_key->low.bits.mac_4 = addr[4];
        mac_key->low.bits.mac_5 = addr[5];
-       mac_key->low.bits.vlan = vlan_id;
-       mac_key->low.bits.port = port;
+       dsaf_set_field(mac_key->low.bits.port_vlan, DSAF_TBL_TCAM_KEY_VLAN_M,
+                      DSAF_TBL_TCAM_KEY_VLAN_S, vlan_id);
+       dsaf_set_field(mac_key->low.bits.port_vlan, DSAF_TBL_TCAM_KEY_PORT_M,
+                      DSAF_TBL_TCAM_KEY_PORT_S, port);
+
+       mac_key->low.bits.port_vlan = le16_to_cpu(mac_key->low.bits.port_vlan);
 }
 
 /**
@@ -1517,6 +1542,7 @@ int hns_dsaf_set_mac_uc_entry(
        struct dsaf_drv_priv *priv =
            (struct dsaf_drv_priv *)hns_dsaf_dev_priv(dsaf_dev);
        struct dsaf_drv_soft_mac_tbl *soft_mac_entry = priv->soft_mac_tbl;
+       struct dsaf_tbl_tcam_data tcam_data;
 
        /* mac addr check */
        if (MAC_IS_ALL_ZEROS(mac_entry->addr) ||
@@ -1558,9 +1584,10 @@ int hns_dsaf_set_mac_uc_entry(
        /* default config dvc to 0 */
        mac_data.tbl_ucast_dvc = 0;
        mac_data.tbl_ucast_out_port = mac_entry->port_num;
-       hns_dsaf_tcam_uc_cfg(
-               dsaf_dev, entry_index,
-               (struct dsaf_tbl_tcam_data *)(&mac_key), &mac_data);
+       tcam_data.tbl_tcam_data_high = cpu_to_le32(mac_key.high.val);
+       tcam_data.tbl_tcam_data_low = cpu_to_le32(mac_key.low.val);
+
+       hns_dsaf_tcam_uc_cfg(dsaf_dev, entry_index, &tcam_data, &mac_data);
 
        /* config software entry */
        soft_mac_entry += entry_index;
@@ -1571,6 +1598,55 @@ int hns_dsaf_set_mac_uc_entry(
        return 0;
 }
 
+int hns_dsaf_rm_mac_addr(
+       struct dsaf_device *dsaf_dev,
+       struct dsaf_drv_mac_single_dest_entry *mac_entry)
+{
+       u16 entry_index = DSAF_INVALID_ENTRY_IDX;
+       struct dsaf_tbl_tcam_ucast_cfg mac_data;
+       struct dsaf_drv_tbl_tcam_key mac_key;
+
+       /* mac addr check */
+       if (!is_valid_ether_addr(mac_entry->addr)) {
+               dev_err(dsaf_dev->dev, "rm_uc_addr %s Mac %pM err!\n",
+                       dsaf_dev->ae_dev.name, mac_entry->addr);
+               return -EINVAL;
+       }
+
+       /* config key */
+       hns_dsaf_set_mac_key(dsaf_dev, &mac_key, mac_entry->in_vlan_id,
+                            mac_entry->in_port_num, mac_entry->addr);
+
+       entry_index = hns_dsaf_find_soft_mac_entry(dsaf_dev, &mac_key);
+       if (entry_index == DSAF_INVALID_ENTRY_IDX) {
+               /* can not find the tcam entry, return 0 */
+               dev_info(dsaf_dev->dev,
+                        "rm_uc_addr no tcam, %s Mac key(%#x:%#x)\n",
+                        dsaf_dev->ae_dev.name,
+                        mac_key.high.val, mac_key.low.val);
+               return 0;
+       }
+
+       dev_dbg(dsaf_dev->dev,
+               "rm_uc_addr, %s Mac key(%#x:%#x) entry_index%d\n",
+               dsaf_dev->ae_dev.name, mac_key.high.val,
+               mac_key.low.val, entry_index);
+
+       hns_dsaf_tcam_uc_get(
+                       dsaf_dev, entry_index,
+                       (struct dsaf_tbl_tcam_data *)&mac_key,
+                       &mac_data);
+
+       /* unicast entry not used locally should not clear */
+       if (mac_entry->port_num != mac_data.tbl_ucast_out_port)
+               return -EFAULT;
+
+       return hns_dsaf_del_mac_entry(dsaf_dev,
+                                     mac_entry->in_vlan_id,
+                                     mac_entry->in_port_num,
+                                     mac_entry->addr);
+}
+
 /**
  * hns_dsaf_set_mac_mc_entry - set mac mc-entry
  * @dsaf_dev: dsa fabric device struct pointer
@@ -1587,6 +1663,7 @@ int hns_dsaf_set_mac_mc_entry(
            (struct dsaf_drv_priv *)hns_dsaf_dev_priv(dsaf_dev);
        struct dsaf_drv_soft_mac_tbl *soft_mac_entry = priv->soft_mac_tbl;
        struct dsaf_drv_tbl_tcam_key tmp_mac_key;
+       struct dsaf_tbl_tcam_data tcam_data;
 
        /* mac addr check */
        if (MAC_IS_ALL_ZEROS(mac_entry->addr)) {
@@ -1619,9 +1696,12 @@ int hns_dsaf_set_mac_mc_entry(
                       0, sizeof(mac_data.tbl_mcast_port_msk));
        } else {
                /* config hardware entry */
-               hns_dsaf_tcam_mc_get(
-                       dsaf_dev, entry_index,
-                       (struct dsaf_tbl_tcam_data *)(&tmp_mac_key), &mac_data);
+               hns_dsaf_tcam_mc_get(dsaf_dev, entry_index, &tcam_data,
+                                    &mac_data);
+
+               tmp_mac_key.high.val =
+                       le32_to_cpu(tcam_data.tbl_tcam_data_high);
+               tmp_mac_key.low.val = le32_to_cpu(tcam_data.tbl_tcam_data_low);
        }
        mac_data.tbl_mcast_old_en = 0;
        mac_data.tbl_mcast_item_vld = 1;
@@ -1633,9 +1713,11 @@ int hns_dsaf_set_mac_mc_entry(
                dsaf_dev->ae_dev.name, mac_key.high.val,
                mac_key.low.val, entry_index);
 
-       hns_dsaf_tcam_mc_cfg(
-               dsaf_dev, entry_index,
-               (struct dsaf_tbl_tcam_data *)(&mac_key), &mac_data);
+       tcam_data.tbl_tcam_data_high = cpu_to_le32(mac_key.high.val);
+       tcam_data.tbl_tcam_data_low = cpu_to_le32(mac_key.low.val);
+
+       hns_dsaf_tcam_mc_cfg(dsaf_dev, entry_index, &tcam_data, NULL,
+                            &mac_data);
 
        /* config software entry */
        soft_mac_entry += entry_index;
@@ -1646,6 +1728,16 @@ int hns_dsaf_set_mac_mc_entry(
        return 0;
 }
 
+static void hns_dsaf_mc_mask_bit_clear(char *dst, const char *src)
+{
+       u16 *a = (u16 *)dst;
+       const u16 *b = (const u16 *)src;
+
+       a[0] &= b[0];
+       a[1] &= b[1];
+       a[2] &= b[2];
+}
+
 /**
  * hns_dsaf_add_mac_mc_port - add mac mc-port
  * @dsaf_dev: dsa fabric device struct pointer
@@ -1656,11 +1748,15 @@ int hns_dsaf_add_mac_mc_port(struct dsaf_device *dsaf_dev,
 {
        u16 entry_index = DSAF_INVALID_ENTRY_IDX;
        struct dsaf_drv_tbl_tcam_key mac_key;
+       struct dsaf_drv_tbl_tcam_key mask_key;
+       struct dsaf_tbl_tcam_data *pmask_key = NULL;
        struct dsaf_tbl_tcam_mcast_cfg mac_data;
-       struct dsaf_drv_priv *priv =
-           (struct dsaf_drv_priv *)hns_dsaf_dev_priv(dsaf_dev);
+       struct dsaf_drv_priv *priv = hns_dsaf_dev_priv(dsaf_dev);
        struct dsaf_drv_soft_mac_tbl *soft_mac_entry = priv->soft_mac_tbl;
        struct dsaf_drv_tbl_tcam_key tmp_mac_key;
+       struct dsaf_tbl_tcam_data tcam_data;
+       u8 mc_addr[ETH_ALEN];
+       u8 *mc_mask;
        int mskid;
 
        /*chechk mac addr */
@@ -1670,14 +1766,32 @@ int hns_dsaf_add_mac_mc_port(struct dsaf_device *dsaf_dev,
                return -EINVAL;
        }
 
+       ether_addr_copy(mc_addr, mac_entry->addr);
+       mc_mask = dsaf_dev->mac_cb[mac_entry->in_port_num]->mc_mask;
+       if (!AE_IS_VER1(dsaf_dev->dsaf_ver)) {
+               /* prepare for key data setting */
+               hns_dsaf_mc_mask_bit_clear(mc_addr, mc_mask);
+
+               /* config key mask */
+               hns_dsaf_set_mac_key(dsaf_dev, &mask_key,
+                                    0x0,
+                                    0xff,
+                                    mc_mask);
+
+               mask_key.high.val = le32_to_cpu(mask_key.high.val);
+               mask_key.low.val = le32_to_cpu(mask_key.low.val);
+
+               pmask_key = (struct dsaf_tbl_tcam_data *)(&mask_key);
+       }
+
        /*config key */
        hns_dsaf_set_mac_key(
                dsaf_dev, &mac_key, mac_entry->in_vlan_id,
-               mac_entry->in_port_num, mac_entry->addr);
+               mac_entry->in_port_num, mc_addr);
 
        memset(&mac_data, 0, sizeof(struct dsaf_tbl_tcam_mcast_cfg));
 
-       /*check exist? */
+       /* check if the tcam is exist */
        entry_index = hns_dsaf_find_soft_mac_entry(dsaf_dev, &mac_key);
        if (entry_index == DSAF_INVALID_ENTRY_IDX) {
                /*if hasnot , find a empty*/
@@ -1691,11 +1805,15 @@ int hns_dsaf_add_mac_mc_port(struct dsaf_device *dsaf_dev,
                        return -EINVAL;
                }
        } else {
-               /*if exist, add in */
-               hns_dsaf_tcam_mc_get(
-                       dsaf_dev, entry_index,
-                       (struct dsaf_tbl_tcam_data *)(&tmp_mac_key), &mac_data);
+               /* if exist, add in */
+               hns_dsaf_tcam_mc_get(dsaf_dev, entry_index, &tcam_data,
+                                    &mac_data);
+
+               tmp_mac_key.high.val =
+                       le32_to_cpu(tcam_data.tbl_tcam_data_high);
+               tmp_mac_key.low.val = le32_to_cpu(tcam_data.tbl_tcam_data_low);
        }
+
        /* config hardware entry */
        if (mac_entry->port_num < DSAF_SERVICE_NW_NUM) {
                mskid = mac_entry->port_num;
@@ -1718,9 +1836,12 @@ int hns_dsaf_add_mac_mc_port(struct dsaf_device *dsaf_dev,
                dsaf_dev->ae_dev.name, mac_key.high.val,
                mac_key.low.val, entry_index);
 
-       hns_dsaf_tcam_mc_cfg(
-               dsaf_dev, entry_index,
-               (struct dsaf_tbl_tcam_data *)(&mac_key), &mac_data);
+       tcam_data.tbl_tcam_data_high = cpu_to_le32(mac_key.high.val);
+       tcam_data.tbl_tcam_data_low = cpu_to_le32(mac_key.low.val);
+
+       /* config mc entry with mask */
+       hns_dsaf_tcam_mc_cfg(dsaf_dev, entry_index, &tcam_data,
+                            pmask_key, &mac_data);
 
        /*config software entry */
        soft_mac_entry += entry_index;
@@ -1792,15 +1913,18 @@ int hns_dsaf_del_mac_mc_port(struct dsaf_device *dsaf_dev,
 {
        u16 entry_index = DSAF_INVALID_ENTRY_IDX;
        struct dsaf_drv_tbl_tcam_key mac_key;
-       struct dsaf_drv_priv *priv =
-           (struct dsaf_drv_priv *)hns_dsaf_dev_priv(dsaf_dev);
+       struct dsaf_drv_priv *priv = hns_dsaf_dev_priv(dsaf_dev);
        struct dsaf_drv_soft_mac_tbl *soft_mac_entry = priv->soft_mac_tbl;
        u16 vlan_id;
        u8 in_port_num;
        struct dsaf_tbl_tcam_mcast_cfg mac_data;
-       struct dsaf_drv_tbl_tcam_key tmp_mac_key;
+       struct dsaf_tbl_tcam_data tcam_data;
        int mskid;
        const u8 empty_msk[sizeof(mac_data.tbl_mcast_port_msk)] = {0};
+       struct dsaf_drv_tbl_tcam_key mask_key, tmp_mac_key;
+       struct dsaf_tbl_tcam_data *pmask_key = NULL;
+       u8 mc_addr[ETH_ALEN];
+       u8 *mc_mask;
 
        if (!(void *)mac_entry) {
                dev_err(dsaf_dev->dev,
@@ -1808,10 +1932,6 @@ int hns_dsaf_del_mac_mc_port(struct dsaf_device *dsaf_dev,
                return -EINVAL;
        }
 
-       /*get key info*/
-       vlan_id = mac_entry->in_vlan_id;
-       in_port_num = mac_entry->in_port_num;
-
        /*check mac addr */
        if (MAC_IS_ALL_ZEROS(mac_entry->addr)) {
                dev_err(dsaf_dev->dev, "del_port failed, addr %pM!\n",
@@ -1819,11 +1939,31 @@ int hns_dsaf_del_mac_mc_port(struct dsaf_device *dsaf_dev,
                return -EINVAL;
        }
 
-       /*config key */
-       hns_dsaf_set_mac_key(dsaf_dev, &mac_key, vlan_id, in_port_num,
-                            mac_entry->addr);
+       /* always mask vlan_id field */
+       ether_addr_copy(mc_addr, mac_entry->addr);
+       mc_mask = dsaf_dev->mac_cb[mac_entry->in_port_num]->mc_mask;
+
+       if (!AE_IS_VER1(dsaf_dev->dsaf_ver)) {
+               /* prepare for key data setting */
+               hns_dsaf_mc_mask_bit_clear(mc_addr, mc_mask);
 
-       /*check is exist? */
+               /* config key mask */
+               hns_dsaf_set_mac_key(dsaf_dev, &mask_key, 0x00, 0xff, mc_addr);
+
+               mask_key.high.val = le32_to_cpu(mask_key.high.val);
+               mask_key.low.val = le32_to_cpu(mask_key.low.val);
+
+               pmask_key = (struct dsaf_tbl_tcam_data *)(&mask_key);
+       }
+
+       /* get key info */
+       vlan_id = mac_entry->in_vlan_id;
+       in_port_num = mac_entry->in_port_num;
+
+       /* config key */
+       hns_dsaf_set_mac_key(dsaf_dev, &mac_key, vlan_id, in_port_num, mc_addr);
+
+       /* check if the tcam entry is exist */
        entry_index = hns_dsaf_find_soft_mac_entry(dsaf_dev, &mac_key);
        if (entry_index == DSAF_INVALID_ENTRY_IDX) {
                /*find none */
@@ -1839,10 +1979,11 @@ int hns_dsaf_del_mac_mc_port(struct dsaf_device *dsaf_dev,
                dsaf_dev->ae_dev.name, mac_key.high.val,
                mac_key.low.val, entry_index);
 
-       /*read entry*/
-       hns_dsaf_tcam_mc_get(
-               dsaf_dev, entry_index,
-               (struct dsaf_tbl_tcam_data *)(&tmp_mac_key), &mac_data);
+       /* read entry */
+       hns_dsaf_tcam_mc_get(dsaf_dev, entry_index, &tcam_data, &mac_data);
+
+       tmp_mac_key.high.val = le32_to_cpu(tcam_data.tbl_tcam_data_high);
+       tmp_mac_key.low.val = le32_to_cpu(tcam_data.tbl_tcam_data_low);
 
        /*del the port*/
        if (mac_entry->port_num < DSAF_SERVICE_NW_NUM) {
@@ -1867,15 +2008,87 @@ int hns_dsaf_del_mac_mc_port(struct dsaf_device *dsaf_dev,
                /* del soft entry */
                soft_mac_entry += entry_index;
                soft_mac_entry->index = DSAF_INVALID_ENTRY_IDX;
-       } else { /* not zer, just del port, updata*/
-               hns_dsaf_tcam_mc_cfg(
-                       dsaf_dev, entry_index,
-                       (struct dsaf_tbl_tcam_data *)(&mac_key), &mac_data);
+       } else { /* not zero, just del port, update */
+               tcam_data.tbl_tcam_data_high = cpu_to_le32(mac_key.high.val);
+               tcam_data.tbl_tcam_data_low = cpu_to_le32(mac_key.low.val);
+
+               hns_dsaf_tcam_mc_cfg(dsaf_dev, entry_index,
+                                    &tcam_data,
+                                    pmask_key, &mac_data);
        }
 
        return 0;
 }
 
+int hns_dsaf_clr_mac_mc_port(struct dsaf_device *dsaf_dev, u8 mac_id,
+                            u8 port_num)
+{
+       struct dsaf_drv_priv *priv = hns_dsaf_dev_priv(dsaf_dev);
+       struct dsaf_drv_soft_mac_tbl *soft_mac_entry;
+       struct dsaf_tbl_tcam_mcast_cfg mac_data;
+       int ret = 0, i;
+
+       if (HNS_DSAF_IS_DEBUG(dsaf_dev))
+               return 0;
+
+       for (i = 0; i < DSAF_TCAM_SUM - DSAFV2_MAC_FUZZY_TCAM_NUM; i++) {
+               u8 addr[ETH_ALEN];
+               u8 port;
+
+               soft_mac_entry = priv->soft_mac_tbl + i;
+
+               hns_dsaf_tcam_addr_get(&soft_mac_entry->tcam_key, addr);
+               port = dsaf_get_field(
+                               soft_mac_entry->tcam_key.low.bits.port_vlan,
+                               DSAF_TBL_TCAM_KEY_PORT_M,
+                               DSAF_TBL_TCAM_KEY_PORT_S);
+               /* check valid tcam mc entry */
+               if (soft_mac_entry->index != DSAF_INVALID_ENTRY_IDX &&
+                   port == mac_id &&
+                   is_multicast_ether_addr(addr) &&
+                   !is_broadcast_ether_addr(addr)) {
+                       const u32 empty_msk[DSAF_PORT_MSK_NUM] = {0};
+                       struct dsaf_drv_mac_single_dest_entry mac_entry;
+
+                       /* disable receiving of this multicast address for
+                        * the VF.
+                        */
+                       ether_addr_copy(mac_entry.addr, addr);
+                       mac_entry.in_vlan_id = dsaf_get_field(
+                               soft_mac_entry->tcam_key.low.bits.port_vlan,
+                               DSAF_TBL_TCAM_KEY_VLAN_M,
+                               DSAF_TBL_TCAM_KEY_VLAN_S);
+                       mac_entry.in_port_num = mac_id;
+                       mac_entry.port_num = port_num;
+                       if (hns_dsaf_del_mac_mc_port(dsaf_dev, &mac_entry)) {
+                               ret = -EINVAL;
+                               continue;
+                       }
+
+                       /* disable receiving of this multicast address for
+                        * the mac port if all VF are disable
+                        */
+                       hns_dsaf_tcam_mc_get(dsaf_dev, i,
+                                            (struct dsaf_tbl_tcam_data *)
+                                            (&soft_mac_entry->tcam_key),
+                                            &mac_data);
+                       dsaf_set_bit(mac_data.tbl_mcast_port_msk[mac_id / 32],
+                                    mac_id % 32, 0);
+                       if (!memcmp(mac_data.tbl_mcast_port_msk, empty_msk,
+                                   sizeof(u32) * DSAF_PORT_MSK_NUM)) {
+                               mac_entry.port_num = mac_id;
+                               if (hns_dsaf_del_mac_mc_port(dsaf_dev,
+                                                            &mac_entry)) {
+                                       ret = -EINVAL;
+                                       continue;
+                               }
+                       }
+               }
+       }
+
+       return ret;
+}
+
 /**
  * hns_dsaf_get_mac_uc_entry - get mac uc entry
  * @dsaf_dev: dsa fabric device struct pointer
@@ -1888,6 +2101,7 @@ int hns_dsaf_get_mac_uc_entry(struct dsaf_device *dsaf_dev,
        struct dsaf_drv_tbl_tcam_key mac_key;
 
        struct dsaf_tbl_tcam_ucast_cfg mac_data;
+       struct dsaf_tbl_tcam_data tcam_data;
 
        /* check macaddr */
        if (MAC_IS_ALL_ZEROS(mac_entry->addr) ||
@@ -1916,9 +2130,12 @@ int hns_dsaf_get_mac_uc_entry(struct dsaf_device *dsaf_dev,
                dsaf_dev->ae_dev.name, mac_key.high.val,
                mac_key.low.val, entry_index);
 
-       /*read entry*/
-       hns_dsaf_tcam_uc_get(dsaf_dev, entry_index,
-                            (struct dsaf_tbl_tcam_data *)&mac_key, &mac_data);
+       /* read entry */
+       hns_dsaf_tcam_uc_get(dsaf_dev, entry_index, &tcam_data, &mac_data);
+
+       mac_key.high.val = le32_to_cpu(tcam_data.tbl_tcam_data_high);
+       mac_key.low.val = le32_to_cpu(tcam_data.tbl_tcam_data_low);
+
        mac_entry->port_num = mac_data.tbl_ucast_out_port;
 
        return 0;
@@ -1936,6 +2153,7 @@ int hns_dsaf_get_mac_mc_entry(struct dsaf_device *dsaf_dev,
        struct dsaf_drv_tbl_tcam_key mac_key;
 
        struct dsaf_tbl_tcam_mcast_cfg mac_data;
+       struct dsaf_tbl_tcam_data tcam_data;
 
        /*check mac addr */
        if (MAC_IS_ALL_ZEROS(mac_entry->addr) ||
@@ -1965,8 +2183,10 @@ int hns_dsaf_get_mac_mc_entry(struct dsaf_device *dsaf_dev,
                mac_key.low.val, entry_index);
 
        /*read entry */
-       hns_dsaf_tcam_mc_get(dsaf_dev, entry_index,
-                            (struct dsaf_tbl_tcam_data *)&mac_key, &mac_data);
+       hns_dsaf_tcam_mc_get(dsaf_dev, entry_index, &tcam_data, &mac_data);
+
+       mac_key.high.val = le32_to_cpu(tcam_data.tbl_tcam_data_high);
+       mac_key.low.val = le32_to_cpu(tcam_data.tbl_tcam_data_low);
 
        mac_entry->port_mask[0] = mac_data.tbl_mcast_port_msk[0] & 0x3F;
        return 0;
@@ -1986,9 +2206,10 @@ int hns_dsaf_get_mac_entry_by_index(
 
        struct dsaf_tbl_tcam_mcast_cfg mac_data;
        struct dsaf_tbl_tcam_ucast_cfg mac_uc_data;
-       char mac_addr[MAC_NUM_OCTETS_PER_ADDR] = {0};
+       struct dsaf_tbl_tcam_data tcam_data;
+       char mac_addr[ETH_ALEN] = {0};
 
-       if (entry_index >= DSAF_TCAM_SUM) {
+       if (entry_index >= dsaf_dev->tcam_max_num) {
                /* find none, del error */
                dev_err(dsaf_dev->dev, "get_uc_entry failed, %s\n",
                        dsaf_dev->ae_dev.name);
@@ -1996,8 +2217,10 @@ int hns_dsaf_get_mac_entry_by_index(
        }
 
        /* mc entry, do read opt */
-       hns_dsaf_tcam_mc_get(dsaf_dev, entry_index,
-                            (struct dsaf_tbl_tcam_data *)&mac_key, &mac_data);
+       hns_dsaf_tcam_mc_get(dsaf_dev, entry_index, &tcam_data, &mac_data);
+
+       mac_key.high.val = le32_to_cpu(tcam_data.tbl_tcam_data_high);
+       mac_key.low.val = le32_to_cpu(tcam_data.tbl_tcam_data_low);
 
        mac_entry->port_mask[0] = mac_data.tbl_mcast_port_msk[0] & 0x3F;
 
@@ -2014,9 +2237,12 @@ int hns_dsaf_get_mac_entry_by_index(
                /**mc donot do*/
        } else {
                /*is not mc, just uc... */
-               hns_dsaf_tcam_uc_get(dsaf_dev, entry_index,
-                                    (struct dsaf_tbl_tcam_data *)&mac_key,
+               hns_dsaf_tcam_uc_get(dsaf_dev, entry_index, &tcam_data,
                                     &mac_uc_data);
+
+               mac_key.high.val = le32_to_cpu(tcam_data.tbl_tcam_data_high);
+               mac_key.low.val = le32_to_cpu(tcam_data.tbl_tcam_data_low);
+
                mac_entry->port_mask[0] = (1 << mac_uc_data.tbl_ucast_out_port);
        }
 
@@ -2680,6 +2906,59 @@ int hns_dsaf_get_regs_count(void)
        return DSAF_DUMP_REGS_NUM;
 }
 
+/* Reserve the last TCAM entry for promisc support */
+#define dsaf_promisc_tcam_entry(port) \
+       (DSAF_TCAM_SUM - DSAFV2_MAC_FUZZY_TCAM_NUM + (port))
+void hns_dsaf_set_promisc_tcam(struct dsaf_device *dsaf_dev,
+                              u32 port, bool enable)
+{
+       struct dsaf_drv_priv *priv = hns_dsaf_dev_priv(dsaf_dev);
+       struct dsaf_drv_soft_mac_tbl *soft_mac_entry = priv->soft_mac_tbl;
+       u16 entry_index;
+       struct dsaf_drv_tbl_tcam_key tbl_tcam_data, tbl_tcam_mask;
+       struct dsaf_tbl_tcam_mcast_cfg mac_data = {0};
+
+       if ((AE_IS_VER1(dsaf_dev->dsaf_ver)) || HNS_DSAF_IS_DEBUG(dsaf_dev))
+               return;
+
+       /* find the tcam entry index for promisc */
+       entry_index = dsaf_promisc_tcam_entry(port);
+
+       /* config key mask */
+       if (enable) {
+               memset(&tbl_tcam_data, 0, sizeof(tbl_tcam_data));
+               memset(&tbl_tcam_mask, 0, sizeof(tbl_tcam_mask));
+               dsaf_set_field(tbl_tcam_data.low.bits.port_vlan,
+                              DSAF_TBL_TCAM_KEY_PORT_M,
+                              DSAF_TBL_TCAM_KEY_PORT_S, port);
+               dsaf_set_field(tbl_tcam_mask.low.bits.port_vlan,
+                              DSAF_TBL_TCAM_KEY_PORT_M,
+                              DSAF_TBL_TCAM_KEY_PORT_S, 0xf);
+
+               /* SUB_QID */
+               dsaf_set_bit(mac_data.tbl_mcast_port_msk[0],
+                            DSAF_SERVICE_NW_NUM, true);
+               mac_data.tbl_mcast_item_vld = true;     /* item_vld bit */
+       } else {
+               mac_data.tbl_mcast_item_vld = false;    /* item_vld bit */
+       }
+
+       dev_dbg(dsaf_dev->dev,
+               "set_promisc_entry, %s Mac key(%#x:%#x) entry_index%d\n",
+               dsaf_dev->ae_dev.name, tbl_tcam_data.high.val,
+               tbl_tcam_data.low.val, entry_index);
+
+       /* config promisc entry with mask */
+       hns_dsaf_tcam_mc_cfg(dsaf_dev, entry_index,
+                            (struct dsaf_tbl_tcam_data *)&tbl_tcam_data,
+                            (struct dsaf_tbl_tcam_data *)&tbl_tcam_mask,
+                            &mac_data);
+
+       /* config software entry */
+       soft_mac_entry += entry_index;
+       soft_mac_entry->index = enable ? entry_index : DSAF_INVALID_ENTRY_IDX;
+}
+
 /**
  * dsaf_probe - probo dsaf dev
  * @pdev: dasf platform device
@@ -2761,6 +3040,7 @@ static const struct of_device_id g_dsaf_match[] = {
        {.compatible = "hisilicon,hns-dsaf-v2"},
        {}
 };
+MODULE_DEVICE_TABLE(of, g_dsaf_match);
 
 static struct platform_driver g_dsaf_driver = {
        .probe = hns_dsaf_probe,
index 35df187e66f1909185b010289f4b6009b54c2a10..cef6bf46ae9309bf84c9f5ff466982d59d8bed93 100644 (file)
@@ -35,8 +35,6 @@ struct hns_mac_cb;
 
 #define DSAF_CFG_READ_CNT   30
 
-#define MAC_NUM_OCTETS_PER_ADDR 6
-
 #define DSAF_DUMP_REGS_NUM 504
 #define DSAF_STATIC_NUM 28
 #define DSAF_V2_STATIC_NUM     44
@@ -165,7 +163,7 @@ enum dsaf_mode {
 /*mac entry, mc or uc entry*/
 struct dsaf_drv_mac_single_dest_entry {
        /* mac addr, match the entry*/
-       u8 addr[MAC_NUM_OCTETS_PER_ADDR];
+       u8 addr[ETH_ALEN];
        u16 in_vlan_id; /* value of VlanId */
 
        /* the vld input port num, dsaf-mode fix 0, */
@@ -179,7 +177,7 @@ struct dsaf_drv_mac_single_dest_entry {
 /*only mc entry*/
 struct dsaf_drv_mac_multi_dest_entry {
        /* mac addr, match the entry*/
-       u8 addr[MAC_NUM_OCTETS_PER_ADDR];
+       u8 addr[ETH_ALEN];
        u16 in_vlan_id;
        /* this mac addr output port,*/
        /*      bit0-bit5 means Port0-Port5(1bit is vld)**/
@@ -308,8 +306,6 @@ struct dsaf_misc_op {
        /* reset series function, it will be reset if the dereset is 0 */
        void (*dsaf_reset)(struct dsaf_device *dsaf_dev, bool dereset);
        void (*xge_srst)(struct dsaf_device *dsaf_dev, u32 port, bool dereset);
-       void (*xge_core_srst)(struct dsaf_device *dsaf_dev, u32 port,
-                             bool dereset);
        void (*ge_srst)(struct dsaf_device *dsaf_dev, u32 port, bool dereset);
        void (*ppe_srst)(struct dsaf_device *dsaf_dev, u32 port, bool dereset);
        void (*ppe_comm_srst)(struct dsaf_device *dsaf_dev, bool dereset);
@@ -343,6 +339,7 @@ struct dsaf_device {
        enum hal_dsaf_mode dsaf_en;
        enum hal_dsaf_tc_mode dsaf_tc_mode;
        u32 dsaf_ver;
+       u16 tcam_max_num;       /* max TCAM entry for user except promisc */
 
        struct ppe_common_cb *ppe_common[DSAF_COMM_DEV_NUM];
        struct rcb_common_cb *rcb_common[DSAF_COMM_DEV_NUM];
@@ -360,6 +357,11 @@ static inline void *hns_dsaf_dev_priv(const struct dsaf_device *dsaf_dev)
        return (void *)((u8 *)dsaf_dev + sizeof(*dsaf_dev));
 }
 
+#define DSAF_TBL_TCAM_KEY_PORT_S 0
+#define DSAF_TBL_TCAM_KEY_PORT_M (((1ULL << 4) - 1) << 0)
+#define DSAF_TBL_TCAM_KEY_VLAN_S 4
+#define DSAF_TBL_TCAM_KEY_VLAN_M (((1ULL << 12) - 1) << 4)
+
 struct dsaf_drv_tbl_tcam_key {
        union {
                struct {
@@ -373,11 +375,9 @@ struct dsaf_drv_tbl_tcam_key {
        } high;
        union {
                struct {
-                       u32 port:4; /* port id, */
-                       /* dsaf-mode fixed 0, non-dsaf-mode port id*/
-                       u32 vlan:12; /* vlan id */
-                       u32 mac_5:8;
-                       u32 mac_4:8;
+                       u16 port_vlan;
+                       u8 mac_5;
+                       u8 mac_4;
                } bits;
 
                u32 val;
@@ -461,11 +461,19 @@ void hns_dsaf_get_strings(int stringset, u8 *data, int port,
 void hns_dsaf_get_regs(struct dsaf_device *ddev, u32 port, void *data);
 int hns_dsaf_get_regs_count(void);
 void hns_dsaf_set_promisc_mode(struct dsaf_device *dsaf_dev, u32 en);
+void hns_dsaf_set_promisc_tcam(struct dsaf_device *dsaf_dev,
+                              u32 port, bool enable);
 
 void hns_dsaf_get_rx_mac_pause_en(struct dsaf_device *dsaf_dev, int mac_id,
                                  u32 *en);
 int hns_dsaf_set_rx_mac_pause_en(struct dsaf_device *dsaf_dev, int mac_id,
                                 u32 en);
-void hns_dsaf_set_inner_lb(struct dsaf_device *dsaf_dev, u32 mac_id, u32 en);
+int hns_dsaf_rm_mac_addr(
+       struct dsaf_device *dsaf_dev,
+       struct dsaf_drv_mac_single_dest_entry *mac_entry);
+
+int hns_dsaf_clr_mac_mc_port(struct dsaf_device *dsaf_dev,
+                            u8 mac_id, u8 port_num);
+
 
 #endif /* __HNS_DSAF_MAIN_H__ */
index 67accce1d33d098d756938fc650418ffc10aff0c..a2c22d084ce90cb03337ee09e4b4f1b723046ef4 100644 (file)
@@ -23,7 +23,6 @@ enum _dsm_op_index {
 enum _dsm_rst_type {
        HNS_DSAF_RESET_FUNC     = 0x1,
        HNS_PPE_RESET_FUNC      = 0x2,
-       HNS_XGE_CORE_RESET_FUNC = 0x3,
        HNS_XGE_RESET_FUNC      = 0x4,
        HNS_GE_RESET_FUNC       = 0x5,
        HNS_DSAF_CHN_RESET_FUNC = 0x6,
@@ -213,26 +212,6 @@ static void hns_dsaf_xge_srst_by_port_acpi(struct dsaf_device *dsaf_dev,
                                   HNS_XGE_RESET_FUNC, port, dereset);
 }
 
-static void hns_dsaf_xge_core_srst_by_port(struct dsaf_device *dsaf_dev,
-                                          u32 port, bool dereset)
-{
-       u32 reg_val = 0;
-       u32 reg_addr;
-
-       if (port >= DSAF_XGE_NUM)
-               return;
-
-       reg_val |= XGMAC_TRX_CORE_SRST_M
-               << dsaf_dev->mac_cb[port]->port_rst_off;
-
-       if (!dereset)
-               reg_addr = DSAF_SUB_SC_XGE_RESET_REQ_REG;
-       else
-               reg_addr = DSAF_SUB_SC_XGE_RESET_DREQ_REG;
-
-       dsaf_write_sub(dsaf_dev, reg_addr, reg_val);
-}
-
 /**
  * hns_dsaf_srst_chns - reset dsaf channels
  * @dsaf_dev: dsaf device struct pointer
@@ -293,14 +272,6 @@ void hns_dsaf_roce_srst_acpi(struct dsaf_device *dsaf_dev, bool dereset)
                                   HNS_ROCE_RESET_FUNC, 0, dereset);
 }
 
-static void
-hns_dsaf_xge_core_srst_by_port_acpi(struct dsaf_device *dsaf_dev,
-                                   u32 port, bool dereset)
-{
-       hns_dsaf_acpi_srst_by_port(dsaf_dev, HNS_OP_RESET_FUNC,
-                                  HNS_XGE_CORE_RESET_FUNC, port, dereset);
-}
-
 static void hns_dsaf_ge_srst_by_port(struct dsaf_device *dsaf_dev, u32 port,
                                     bool dereset)
 {
@@ -597,7 +568,6 @@ struct dsaf_misc_op *hns_misc_op_get(struct dsaf_device *dsaf_dev)
 
                misc_op->dsaf_reset = hns_dsaf_rst;
                misc_op->xge_srst = hns_dsaf_xge_srst_by_port;
-               misc_op->xge_core_srst = hns_dsaf_xge_core_srst_by_port;
                misc_op->ge_srst = hns_dsaf_ge_srst_by_port;
                misc_op->ppe_srst = hns_ppe_srst_by_port;
                misc_op->ppe_comm_srst = hns_ppe_com_srst;
@@ -615,7 +585,6 @@ struct dsaf_misc_op *hns_misc_op_get(struct dsaf_device *dsaf_dev)
 
                misc_op->dsaf_reset = hns_dsaf_rst_acpi;
                misc_op->xge_srst = hns_dsaf_xge_srst_by_port_acpi;
-               misc_op->xge_core_srst = hns_dsaf_xge_core_srst_by_port_acpi;
                misc_op->ge_srst = hns_dsaf_ge_srst_by_port_acpi;
                misc_op->ppe_srst = hns_ppe_srst_by_port_acpi;
                misc_op->ppe_comm_srst = hns_ppe_com_srst;
index ef1107777c08d58117110cf2c5da08c2dcec3e23..f0ed80d6ef9cd45a8408c987ab4315646098f438 100644 (file)
@@ -543,6 +543,22 @@ int hns_rcb_set_coalesce_usecs(
                        "error: coalesce_usecs setting supports 0~1023us\n");
                return -EINVAL;
        }
+
+       if (!AE_IS_VER1(rcb_common->dsaf_dev->dsaf_ver)) {
+               if (timeout == 0)
+                       /* set timeout to 0, Disable gap time */
+                       dsaf_set_reg_field(rcb_common->io_base,
+                                          RCB_INT_GAP_TIME_REG + port_idx * 4,
+                                          PPE_INT_GAPTIME_M, PPE_INT_GAPTIME_B,
+                                          0);
+               else
+                       /* set timeout non 0, restore gap time to 1 */
+                       dsaf_set_reg_field(rcb_common->io_base,
+                                          RCB_INT_GAP_TIME_REG + port_idx * 4,
+                                          PPE_INT_GAPTIME_M, PPE_INT_GAPTIME_B,
+                                          1);
+       }
+
        hns_rcb_set_port_timeout(rcb_common, port_idx, timeout);
        return 0;
 }
index 4b8b803822d1b2ae7d15c599aa7778a728cc3883..87226685f74215a2093e59a99bd8042a2e2585c5 100644 (file)
@@ -41,6 +41,9 @@
 #define DSAF_SW_PORT_NUM       8
 #define DSAF_TOTAL_QUEUE_NUM   129
 
+/* reserved a tcam entry for each port to support promisc by fuzzy match */
+#define DSAFV2_MAC_FUZZY_TCAM_NUM    DSAF_MAX_PORT_NUM
+
 #define DSAF_TCAM_SUM          512
 #define DSAF_LINE_SUM          (2048 * 14)
 
 #define DSAF_TBL_LKUP_NUM_I_0_REG              0x50C0
 #define DSAF_TBL_LKUP_NUM_O_0_REG              0x50E0
 #define DSAF_TBL_UCAST_BCAST_MIS_INFO_0_0_REG  0x510C
+#define DSAF_TBL_TCAM_MATCH_CFG_H_REG          0x5130
+#define DSAF_TBL_TCAM_MATCH_CFG_L_REG          0x5134
 
 #define DSAF_INODE_FIFO_WL_0_REG               0x6000
 #define DSAF_ONODE_FIFO_WL_0_REG               0x6020
 #define PPE_COM_INTEN_REG                      0x110
 #define PPE_COM_RINT_REG                       0x114
 #define PPE_COM_INTSTS_REG                     0x118
-#define PPE_COM_COMMON_CNT_CLR_CE_REG          0x1120
 #define PPE_COM_HIS_RX_PKT_QID_DROP_CNT_REG    0x300
 #define PPE_COM_HIS_RX_PKT_QID_OK_CNT_REG      0x600
 #define PPE_COM_HIS_TX_PKT_QID_ERR_CNT_REG     0x900
 #define RCB_CFG_OVERTIME_REG                   0x9300
 #define RCB_CFG_PKTLINE_INT_NUM_REG            0x9304
 #define RCB_CFG_OVERTIME_INT_NUM_REG           0x9308
+#define RCB_INT_GAP_TIME_REG                   0x9400
 #define RCB_PORT_CFG_OVERTIME_REG              0x9430
 
 #define RCB_RING_RX_RING_BASEADDR_L_REG                0x00000
 #define XGMAC_RX_SYMBOLERRPKTS                 0x0210
 #define XGMAC_RX_FCSERRPKTS                    0x0218
 
-#define XGMAC_TRX_CORE_SRST_M                  0x2080
-
 #define DSAF_SRAM_INIT_OVER_M 0xff
 #define DSAFV2_SRAM_INIT_OVER_M 0x3ff
 #define DSAF_SRAM_INIT_OVER_S 0
 #define PPE_CNT_CLR_CE_B       0
 #define PPE_CNT_CLR_SNAP_EN_B  1
 
+#define PPE_INT_GAPTIME_B      0
+#define PPE_INT_GAPTIME_M      0x3ff
+
 #define PPE_COMMON_CNT_CLR_CE_B        0
 #define PPE_COMMON_CNT_CLR_SNAP_EN_B   1
 #define RCB_COM_TSO_MODE_B     0
 #define XGMAC_ENABLE_TX_B              0
 #define XGMAC_ENABLE_RX_B              1
 
+#define XGMAC_UNIDIR_EN_B              0
+#define XGMAC_RF_TX_EN_B               1
+#define XGMAC_LF_RF_INSERT_S           2
+#define XGMAC_LF_RF_INSERT_M           (0x3 << XGMAC_LF_RF_INSERT_S)
+
 #define XGMAC_CTL_TX_FCS_B             0
 #define XGMAC_CTL_TX_PAD_B             1
 #define XGMAC_CTL_TX_PREAMBLE_TRANS_B  3
index 8f4f0e8da984d37fa84c156dbba185efb8c6c9a9..aae830a93050ad5f99ece2b6901dd30531852d87 100644 (file)
@@ -107,6 +107,31 @@ static void hns_xgmac_rx_enable(struct mac_driver *drv, u32 value)
        dsaf_set_dev_bit(drv, XGMAC_MAC_ENABLE_REG, XGMAC_ENABLE_RX_B, !!value);
 }
 
+/**
+ * hns_xgmac_tx_lf_rf_insert - insert lf rf control about xgmac
+ * @mac_drv: mac driver
+ * @mode: inserf rf or lf
+ */
+static void hns_xgmac_lf_rf_insert(struct mac_driver *mac_drv, u32 mode)
+{
+       dsaf_set_dev_field(mac_drv, XGMAC_MAC_TX_LF_RF_CONTROL_REG,
+                          XGMAC_LF_RF_INSERT_M, XGMAC_LF_RF_INSERT_S, mode);
+}
+
+/**
+ * hns_xgmac__lf_rf_control_init - initial the lf rf control register
+ * @mac_drv: mac driver
+ */
+static void hns_xgmac_lf_rf_control_init(struct mac_driver *mac_drv)
+{
+       u32 val = 0;
+
+       dsaf_set_bit(val, XGMAC_UNIDIR_EN_B, 0);
+       dsaf_set_bit(val, XGMAC_RF_TX_EN_B, 1);
+       dsaf_set_field(val, XGMAC_LF_RF_INSERT_M, XGMAC_LF_RF_INSERT_S, 0);
+       dsaf_write_reg(mac_drv, XGMAC_MAC_TX_LF_RF_CONTROL_REG, val);
+}
+
 /**
  *hns_xgmac_enable - enable xgmac port
  *@drv: mac driver
@@ -115,12 +140,8 @@ static void hns_xgmac_rx_enable(struct mac_driver *drv, u32 value)
 static void hns_xgmac_enable(void *mac_drv, enum mac_commom_mode mode)
 {
        struct mac_driver *drv = (struct mac_driver *)mac_drv;
-       struct dsaf_device *dsaf_dev
-               = (struct dsaf_device *)dev_get_drvdata(drv->dev);
-       u32 port = drv->mac_id;
 
-       dsaf_dev->misc_op->xge_core_srst(dsaf_dev, port, 1);
-       mdelay(10);
+       hns_xgmac_lf_rf_insert(drv, HNS_XGMAC_NO_LF_RF_INSERT);
 
        /*enable XGE rX/tX */
        if (mode == MAC_COMM_MODE_TX) {
@@ -143,9 +164,6 @@ static void hns_xgmac_enable(void *mac_drv, enum mac_commom_mode mode)
 static void hns_xgmac_disable(void *mac_drv, enum mac_commom_mode mode)
 {
        struct mac_driver *drv = (struct mac_driver *)mac_drv;
-       struct dsaf_device *dsaf_dev
-               = (struct dsaf_device *)dev_get_drvdata(drv->dev);
-       u32 port = drv->mac_id;
 
        if (mode == MAC_COMM_MODE_TX) {
                hns_xgmac_tx_enable(drv, 0);
@@ -155,9 +173,7 @@ static void hns_xgmac_disable(void *mac_drv, enum mac_commom_mode mode)
                hns_xgmac_tx_enable(drv, 0);
                hns_xgmac_rx_enable(drv, 0);
        }
-
-       mdelay(10);
-       dsaf_dev->misc_op->xge_core_srst(dsaf_dev, port, 0);
+       hns_xgmac_lf_rf_insert(drv, HNS_XGMAC_LF_INSERT);
 }
 
 /**
@@ -203,6 +219,7 @@ static void hns_xgmac_init(void *mac_drv)
        dsaf_dev->misc_op->xge_srst(dsaf_dev, port, 1);
 
        mdelay(100);
+       hns_xgmac_lf_rf_control_init(drv);
        hns_xgmac_exc_irq_en(drv, 0);
 
        hns_xgmac_pma_fec_enable(drv, 0x0, 0x0);
@@ -788,7 +805,7 @@ static int hns_xgmac_get_sset_count(int stringset)
  */
 static int hns_xgmac_get_regs_count(void)
 {
-       return ETH_XGMAC_DUMP_NUM;
+       return HNS_XGMAC_DUMP_NUM;
 }
 
 void *hns_xgmac_config(struct hns_mac_cb *mac_cb, struct mac_params *mac_param)
index 139f7297c7b4f7be7cd968ed165d6b9f2bfb5d8a..da6c5343d3e139898662b3b60b2a1168d8aaccc3 100644 (file)
@@ -10,6 +10,7 @@
 #ifndef _HNS_XGMAC_H
 #define _HNS_XGMAC_H
 
-#define ETH_XGMAC_DUMP_NUM             (214)
-
+#define HNS_XGMAC_DUMP_NUM             214
+#define HNS_XGMAC_NO_LF_RF_INSERT      0x0
+#define HNS_XGMAC_LF_INSERT            0x2
 #endif
index a7208673116c8b6b1e2be8e42fc884580530d3b3..776d81e785d8c72555d183e7b13100bfc1f9b2a3 100644 (file)
@@ -575,7 +575,6 @@ static int hns_nic_poll_rx_skb(struct hns_nic_ring_data *ring_data,
        struct sk_buff *skb;
        struct hnae_desc *desc;
        struct hnae_desc_cb *desc_cb;
-       struct ethhdr *eh;
        unsigned char *va;
        int bnum, length, i;
        int pull_len;
@@ -601,7 +600,6 @@ static int hns_nic_poll_rx_skb(struct hns_nic_ring_data *ring_data,
                ring->stats.sw_err_cnt++;
                return -ENOMEM;
        }
-       skb_reset_mac_header(skb);
 
        prefetchw(skb->data);
        length = le16_to_cpu(desc->rx.pkt_len);
@@ -683,14 +681,6 @@ out_bnum_err:
                return -EFAULT;
        }
 
-       /* filter out multicast pkt with the same src mac as this port */
-       eh = eth_hdr(skb);
-       if (unlikely(is_multicast_ether_addr(eh->h_dest) &&
-                    ether_addr_equal(ndev->dev_addr, eh->h_source))) {
-               dev_kfree_skb_any(skb);
-               return -EFAULT;
-       }
-
        ring->stats.rx_pkts++;
        ring->stats.rx_bytes += skb->len;
 
@@ -748,25 +738,37 @@ static void hns_nic_rx_up_pro(struct hns_nic_ring_data *ring_data,
        ndev->last_rx = jiffies;
 }
 
+static int hns_desc_unused(struct hnae_ring *ring)
+{
+       int ntc = ring->next_to_clean;
+       int ntu = ring->next_to_use;
+
+       return ((ntc >= ntu) ? 0 : ring->desc_num) + ntc - ntu;
+}
+
 static int hns_nic_rx_poll_one(struct hns_nic_ring_data *ring_data,
                               int budget, void *v)
 {
        struct hnae_ring *ring = ring_data->ring;
        struct sk_buff *skb;
-       int num, bnum, ex_num;
+       int num, bnum;
 #define RCB_NOF_ALLOC_RX_BUFF_ONCE 16
        int recv_pkts, recv_bds, clean_count, err;
+       int unused_count = hns_desc_unused(ring);
 
        num = readl_relaxed(ring->io_base + RCB_REG_FBDNUM);
        rmb(); /* make sure num taken effect before the other data is touched */
 
        recv_pkts = 0, recv_bds = 0, clean_count = 0;
-recv:
+       num -= unused_count;
+
        while (recv_pkts < budget && recv_bds < num) {
                /* reuse or realloc buffers */
-               if (clean_count >= RCB_NOF_ALLOC_RX_BUFF_ONCE) {
-                       hns_nic_alloc_rx_buffers(ring_data, clean_count);
+               if (clean_count + unused_count >= RCB_NOF_ALLOC_RX_BUFF_ONCE) {
+                       hns_nic_alloc_rx_buffers(ring_data,
+                                                clean_count + unused_count);
                        clean_count = 0;
+                       unused_count = hns_desc_unused(ring);
                }
 
                /* poll one pkt */
@@ -787,21 +789,11 @@ recv:
                recv_pkts++;
        }
 
-       /* make all data has been write before submit */
-       if (recv_pkts < budget) {
-               ex_num = readl_relaxed(ring->io_base + RCB_REG_FBDNUM);
-
-               if (ex_num > clean_count) {
-                       num += ex_num - clean_count;
-                       rmb(); /*complete read rx ring bd number*/
-                       goto recv;
-               }
-       }
-
 out:
        /* make all data has been write before submit */
-       if (clean_count > 0)
-               hns_nic_alloc_rx_buffers(ring_data, clean_count);
+       if (clean_count + unused_count > 0)
+               hns_nic_alloc_rx_buffers(ring_data,
+                                        clean_count + unused_count);
 
        return recv_pkts;
 }
@@ -811,6 +803,8 @@ static void hns_nic_rx_fini_pro(struct hns_nic_ring_data *ring_data)
        struct hnae_ring *ring = ring_data->ring;
        int num = 0;
 
+       ring_data->ring->q->handle->dev->ops->toggle_ring_irq(ring, 0);
+
        /* for hardware bug fixed */
        num = readl_relaxed(ring->io_base + RCB_REG_FBDNUM);
 
@@ -822,6 +816,20 @@ static void hns_nic_rx_fini_pro(struct hns_nic_ring_data *ring_data)
        }
 }
 
+static void hns_nic_rx_fini_pro_v2(struct hns_nic_ring_data *ring_data)
+{
+       struct hnae_ring *ring = ring_data->ring;
+       int num = 0;
+
+       num = readl_relaxed(ring->io_base + RCB_REG_FBDNUM);
+
+       if (num == 0)
+               ring_data->ring->q->handle->dev->ops->toggle_ring_irq(
+                       ring, 0);
+       else
+               napi_schedule(&ring_data->napi);
+}
+
 static inline void hns_nic_reclaim_one_desc(struct hnae_ring *ring,
                                            int *bytes, int *pkts)
 {
@@ -923,7 +931,11 @@ static int hns_nic_tx_poll_one(struct hns_nic_ring_data *ring_data,
 static void hns_nic_tx_fini_pro(struct hns_nic_ring_data *ring_data)
 {
        struct hnae_ring *ring = ring_data->ring;
-       int head = readl_relaxed(ring->io_base + RCB_REG_HEAD);
+       int head;
+
+       ring_data->ring->q->handle->dev->ops->toggle_ring_irq(ring, 0);
+
+       head = readl_relaxed(ring->io_base + RCB_REG_HEAD);
 
        if (head != ring->next_to_clean) {
                ring_data->ring->q->handle->dev->ops->toggle_ring_irq(
@@ -933,6 +945,18 @@ static void hns_nic_tx_fini_pro(struct hns_nic_ring_data *ring_data)
        }
 }
 
+static void hns_nic_tx_fini_pro_v2(struct hns_nic_ring_data *ring_data)
+{
+       struct hnae_ring *ring = ring_data->ring;
+       int head = readl_relaxed(ring->io_base + RCB_REG_HEAD);
+
+       if (head == ring->next_to_clean)
+               ring_data->ring->q->handle->dev->ops->toggle_ring_irq(
+                       ring, 0);
+       else
+               napi_schedule(&ring_data->napi);
+}
+
 static void hns_nic_tx_clr_all_bufs(struct hns_nic_ring_data *ring_data)
 {
        struct hnae_ring *ring = ring_data->ring;
@@ -964,10 +988,7 @@ static int hns_nic_common_poll(struct napi_struct *napi, int budget)
 
        if (clean_complete >= 0 && clean_complete < budget) {
                napi_complete(napi);
-               ring_data->ring->q->handle->dev->ops->toggle_ring_irq(
-                       ring_data->ring, 0);
-               if (ring_data->fini_process)
-                       ring_data->fini_process(ring_data);
+               ring_data->fini_process(ring_data);
                return 0;
        }
 
@@ -1472,6 +1493,29 @@ static netdev_features_t hns_nic_fix_features(
        return features;
 }
 
+static int hns_nic_uc_sync(struct net_device *netdev, const unsigned char *addr)
+{
+       struct hns_nic_priv *priv = netdev_priv(netdev);
+       struct hnae_handle *h = priv->ae_handle;
+
+       if (h->dev->ops->add_uc_addr)
+               return h->dev->ops->add_uc_addr(h, addr);
+
+       return 0;
+}
+
+static int hns_nic_uc_unsync(struct net_device *netdev,
+                            const unsigned char *addr)
+{
+       struct hns_nic_priv *priv = netdev_priv(netdev);
+       struct hnae_handle *h = priv->ae_handle;
+
+       if (h->dev->ops->rm_uc_addr)
+               return h->dev->ops->rm_uc_addr(h, addr);
+
+       return 0;
+}
+
 /**
  * nic_set_multicast_list - set mutl mac address
  * @netdev: net device
@@ -1490,6 +1534,10 @@ void hns_set_multicast_list(struct net_device *ndev)
                return;
        }
 
+       if (h->dev->ops->clr_mc_addr)
+               if (h->dev->ops->clr_mc_addr(h))
+                       netdev_err(ndev, "clear multicast address fail\n");
+
        if (h->dev->ops->set_mc_addr) {
                netdev_for_each_mc_addr(ha, ndev)
                        if (h->dev->ops->set_mc_addr(h, ha->addr))
@@ -1510,6 +1558,9 @@ void hns_nic_set_rx_mode(struct net_device *ndev)
        }
 
        hns_set_multicast_list(ndev);
+
+       if (__dev_uc_sync(ndev, hns_nic_uc_sync, hns_nic_uc_unsync))
+               netdev_err(ndev, "sync uc address fail\n");
 }
 
 struct rtnl_link_stats64 *hns_nic_get_stats64(struct net_device *ndev,
@@ -1559,6 +1610,21 @@ struct rtnl_link_stats64 *hns_nic_get_stats64(struct net_device *ndev,
        return stats;
 }
 
+static u16
+hns_nic_select_queue(struct net_device *ndev, struct sk_buff *skb,
+                    void *accel_priv, select_queue_fallback_t fallback)
+{
+       struct ethhdr *eth_hdr = (struct ethhdr *)skb->data;
+       struct hns_nic_priv *priv = netdev_priv(ndev);
+
+       /* fix hardware broadcast/multicast packets queue loopback */
+       if (!AE_IS_VER1(priv->enet_ver) &&
+           is_multicast_ether_addr(eth_hdr->h_dest))
+               return 0;
+       else
+               return fallback(ndev, skb);
+}
+
 static const struct net_device_ops hns_nic_netdev_ops = {
        .ndo_open = hns_nic_net_open,
        .ndo_stop = hns_nic_net_stop,
@@ -1574,6 +1640,7 @@ static const struct net_device_ops hns_nic_netdev_ops = {
        .ndo_poll_controller = hns_nic_poll_controller,
 #endif
        .ndo_set_rx_mode = hns_nic_set_rx_mode,
+       .ndo_select_queue = hns_nic_select_queue,
 };
 
 static void hns_nic_update_link_status(struct net_device *netdev)
@@ -1735,7 +1802,8 @@ static int hns_nic_init_ring_data(struct hns_nic_priv *priv)
                rd->queue_index = i;
                rd->ring = &h->qs[i]->tx_ring;
                rd->poll_one = hns_nic_tx_poll_one;
-               rd->fini_process = is_ver1 ? hns_nic_tx_fini_pro : NULL;
+               rd->fini_process = is_ver1 ? hns_nic_tx_fini_pro :
+                       hns_nic_tx_fini_pro_v2;
 
                netif_napi_add(priv->netdev, &rd->napi,
                               hns_nic_common_poll, NIC_TX_CLEAN_MAX_NUM);
@@ -1747,7 +1815,8 @@ static int hns_nic_init_ring_data(struct hns_nic_priv *priv)
                rd->ring = &h->qs[i - h->q_num]->rx_ring;
                rd->poll_one = hns_nic_rx_poll_one;
                rd->ex_process = hns_nic_rx_up_pro;
-               rd->fini_process = is_ver1 ? hns_nic_rx_fini_pro : NULL;
+               rd->fini_process = is_ver1 ? hns_nic_rx_fini_pro :
+                       hns_nic_rx_fini_pro_v2;
 
                netif_napi_add(priv->netdev, &rd->napi,
                               hns_nic_common_poll, NIC_RX_CLEAN_MAX_NUM);
index 47e59bbfd061bfe1d9576b8111f92d361be599e1..3ac2183dbd2119e0746d35510fcfc1e390ab6d9b 100644 (file)
@@ -352,6 +352,13 @@ static int __lb_setup(struct net_device *ndev,
                break;
        }
 
+       if (!ret) {
+               if (loop == MAC_LOOP_NONE)
+                       h->dev->ops->set_promisc_mode(
+                               h, ndev->flags & IFF_PROMISC);
+               else
+                       h->dev->ops->set_promisc_mode(h, 1);
+       }
        return ret;
 }
 
@@ -1171,7 +1178,8 @@ static int hns_nic_nway_reset(struct net_device *netdev)
        struct phy_device *phy = netdev->phydev;
 
        if (netif_running(netdev)) {
-               if (phy)
+               /* if autoneg is disabled, don't restart auto-negotiation */
+               if (phy && phy->autoneg == AUTONEG_ENABLE)
                        ret = genphy_restart_aneg(phy);
        }
 
index 33f4c483af0f46c6b6506bae6786833d4161fbd0..501eb2090ca62bcd118abc136e4c433bdaa38eb6 100644 (file)
@@ -563,6 +563,7 @@ static const struct of_device_id hns_mdio_match[] = {
        {.compatible = "hisilicon,hns-mdio"},
        {}
 };
+MODULE_DEVICE_TABLE(of, hns_mdio_match);
 
 static const struct acpi_device_id hns_mdio_acpi_match[] = {
        { "HISI0141", 0 },
index e9719ba450d7ccf8770eba14ad02773ca5577e7d..702446a93697a42a94cae938ac303d58350461cf 100644 (file)
@@ -2438,6 +2438,8 @@ static int ehea_open(struct net_device *dev)
 
        netif_info(port, ifup, dev, "enabling port\n");
 
+       netif_carrier_off(dev);
+
        ret = ehea_up(dev);
        if (!ret) {
                port_napi_enable(port);
@@ -3042,7 +3044,6 @@ static struct ehea_port *ehea_setup_single_port(struct ehea_adapter *adapter,
        init_waitqueue_head(&port->swqe_avail_wq);
        init_waitqueue_head(&port->restart_wq);
 
-       memset(&port->stats, 0, sizeof(struct net_device_stats));
        ret = register_netdev(dev);
        if (ret) {
                pr_err("register_netdev failed. ret=%d\n", ret);
index 29c05d0d79a9897894b8c9952411e48bbdfc40e3..4a81c892fc3142af7e271304a5975c64adff2b88 100644 (file)
@@ -1549,7 +1549,7 @@ static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id)
        }
 
        netdev->min_mtu = IBMVETH_MIN_MTU;
-       netdev->min_mtu = ETH_MAX_MTU;
+       netdev->max_mtu = ETH_MAX_MTU;
 
        memcpy(netdev->dev_addr, mac_addr_p, ETH_ALEN);
 
index 657206be7ba99b20a31c839ad2a3a23fa966c300..c12596676bbbba5ef426b1e8463e7080bdd2539b 100644 (file)
@@ -74,7 +74,6 @@
 #include <asm/iommu.h>
 #include <linux/uaccess.h>
 #include <asm/firmware.h>
-#include <linux/seq_file.h>
 #include <linux/workqueue.h>
 
 #include "ibmvnic.h"
@@ -1178,7 +1177,7 @@ static struct ibmvnic_sub_crq_queue *init_sub_crq_queue(struct ibmvnic_adapter
        if (!scrq)
                return NULL;
 
-       scrq->msgs = (union sub_crq *)__get_free_pages(GFP_KERNEL, 2);
+       scrq->msgs = (union sub_crq *)__get_free_pages(GFP_ATOMIC, 2);
        memset(scrq->msgs, 0, 4 * PAGE_SIZE);
        if (!scrq->msgs) {
                dev_warn(dev, "Couldn't allocate crq queue messages page\n");
@@ -1449,14 +1448,16 @@ static int init_sub_crq_irqs(struct ibmvnic_adapter *adapter)
        return rc;
 
 req_rx_irq_failed:
-       for (j = 0; j < i; j++)
+       for (j = 0; j < i; j++) {
                free_irq(adapter->rx_scrq[j]->irq, adapter->rx_scrq[j]);
                irq_dispose_mapping(adapter->rx_scrq[j]->irq);
+       }
        i = adapter->req_tx_queues;
 req_tx_irq_failed:
-       for (j = 0; j < i; j++)
+       for (j = 0; j < i; j++) {
                free_irq(adapter->tx_scrq[j]->irq, adapter->tx_scrq[j]);
                irq_dispose_mapping(adapter->rx_scrq[j]->irq);
+       }
        release_sub_crqs_no_irqs(adapter);
        return rc;
 }
@@ -1491,9 +1492,8 @@ static void init_sub_crqs(struct ibmvnic_adapter *adapter, int retry)
                    adapter->max_rx_add_entries_per_subcrq > entries_page ?
                    entries_page : adapter->max_rx_add_entries_per_subcrq;
 
-               /* Choosing the maximum number of queues supported by firmware*/
-               adapter->req_tx_queues = adapter->max_tx_queues;
-               adapter->req_rx_queues = adapter->max_rx_queues;
+               adapter->req_tx_queues = adapter->opt_tx_comp_sub_queues;
+               adapter->req_rx_queues = adapter->opt_rx_comp_queues;
                adapter->req_rx_add_queues = adapter->max_rx_add_queues;
 
                adapter->req_mtu = adapter->max_mtu;
@@ -3222,6 +3222,27 @@ static void ibmvnic_free_inflight(struct ibmvnic_adapter *adapter)
        spin_unlock_irqrestore(&adapter->inflight_lock, flags);
 }
 
+static void ibmvnic_xport_event(struct work_struct *work)
+{
+       struct ibmvnic_adapter *adapter = container_of(work,
+                                                      struct ibmvnic_adapter,
+                                                      ibmvnic_xport);
+       struct device *dev = &adapter->vdev->dev;
+       long rc;
+
+       ibmvnic_free_inflight(adapter);
+       release_sub_crqs(adapter);
+       if (adapter->migrated) {
+               rc = ibmvnic_reenable_crq_queue(adapter);
+               if (rc)
+                       dev_err(dev, "Error after enable rc=%ld\n", rc);
+               adapter->migrated = false;
+               rc = ibmvnic_send_crq_init(adapter);
+               if (rc)
+                       dev_err(dev, "Error sending init rc=%ld\n", rc);
+       }
+}
+
 static void ibmvnic_handle_crq(union ibmvnic_crq *crq,
                               struct ibmvnic_adapter *adapter)
 {
@@ -3257,15 +3278,7 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq,
                if (gen_crq->cmd == IBMVNIC_PARTITION_MIGRATED) {
                        dev_info(dev, "Re-enabling adapter\n");
                        adapter->migrated = true;
-                       ibmvnic_free_inflight(adapter);
-                       release_sub_crqs(adapter);
-                       rc = ibmvnic_reenable_crq_queue(adapter);
-                       if (rc)
-                               dev_err(dev, "Error after enable rc=%ld\n", rc);
-                       adapter->migrated = false;
-                       rc = ibmvnic_send_crq_init(adapter);
-                       if (rc)
-                               dev_err(dev, "Error sending init rc=%ld\n", rc);
+                       schedule_work(&adapter->ibmvnic_xport);
                } else if (gen_crq->cmd == IBMVNIC_DEVICE_FAILOVER) {
                        dev_info(dev, "Backing device failover detected\n");
                        netif_carrier_off(netdev);
@@ -3274,8 +3287,7 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq,
                        /* The adapter lost the connection */
                        dev_err(dev, "Virtual Adapter failed (rc=%d)\n",
                                gen_crq->cmd);
-                       ibmvnic_free_inflight(adapter);
-                       release_sub_crqs(adapter);
+                       schedule_work(&adapter->ibmvnic_xport);
                }
                return;
        case IBMVNIC_CRQ_CMD_RSP:
@@ -3644,6 +3656,7 @@ static void handle_crq_init_rsp(struct work_struct *work)
                goto task_failed;
 
        netdev->real_num_tx_queues = adapter->req_tx_queues;
+       netdev->mtu = adapter->req_mtu;
        netdev->min_mtu = adapter->min_mtu;
        netdev->max_mtu = adapter->max_mtu;
 
@@ -3683,7 +3696,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
        struct net_device *netdev;
        unsigned char *mac_addr_p;
        struct dentry *ent;
-       char buf[16]; /* debugfs name buf */
+       char buf[17]; /* debugfs name buf */
        int rc;
 
        dev_dbg(&dev->dev, "entering ibmvnic_probe for UA 0x%x\n",
@@ -3717,6 +3730,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
        SET_NETDEV_DEV(netdev, &dev->dev);
 
        INIT_WORK(&adapter->vnic_crq_init, handle_crq_init_rsp);
+       INIT_WORK(&adapter->ibmvnic_xport, ibmvnic_xport_event);
 
        spin_lock_init(&adapter->stats_lock);
 
@@ -3784,6 +3798,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
        }
 
        netdev->real_num_tx_queues = adapter->req_tx_queues;
+       netdev->mtu = adapter->req_mtu;
 
        rc = register_netdev(netdev);
        if (rc) {
@@ -3820,6 +3835,9 @@ static int ibmvnic_remove(struct vio_dev *dev)
        if (adapter->debugfs_dir && !IS_ERR(adapter->debugfs_dir))
                debugfs_remove_recursive(adapter->debugfs_dir);
 
+       dma_unmap_single(&dev->dev, adapter->stats_token,
+                        sizeof(struct ibmvnic_statistics), DMA_FROM_DEVICE);
+
        if (adapter->ras_comps)
                dma_free_coherent(&dev->dev,
                                  adapter->ras_comp_num *
index bfc84c7d0e1146570d955617faf83a2643eff591..dd775d951b739eed4cd27985c055cacef0e56b6b 100644 (file)
@@ -27,7 +27,7 @@
 /**************************************************************************/
 
 #define IBMVNIC_NAME           "ibmvnic"
-#define IBMVNIC_DRIVER_VERSION "1.0"
+#define IBMVNIC_DRIVER_VERSION "1.0.1"
 #define IBMVNIC_INVALID_MAP    -1
 #define IBMVNIC_STATS_TIMEOUT  1
 /* basic structures plus 100 2k buffers */
@@ -1048,5 +1048,6 @@ struct ibmvnic_adapter {
        u8 map_id;
 
        struct work_struct vnic_crq_init;
+       struct work_struct ibmvnic_xport;
        bool failover;
 };
index 2030d7c1dc94ab01cfbb79862ff1cd8da8e73f3d..29c23183a0e011515030de2bf21c5d020efd7236 100644 (file)
@@ -39,6 +39,7 @@
 #include <linux/iommu.h>
 #include <linux/slab.h>
 #include <linux/list.h>
+#include <linux/hashtable.h>
 #include <linux/string.h>
 #include <linux/in.h>
 #include <linux/ip.h>
@@ -92,6 +93,7 @@
 #define I40E_AQ_LEN                    256
 #define I40E_AQ_WORK_LIMIT             66 /* max number of VFs + a little */
 #define I40E_MAX_USER_PRIORITY         8
+#define I40E_DEFAULT_TRAFFIC_CLASS     BIT(0)
 #define I40E_DEFAULT_MSG_ENABLE                4
 #define I40E_QUEUE_WAIT_RETRY_LIMIT    10
 #define I40E_INT_NAME_STR_LEN          (IFNAMSIZ + 16)
@@ -427,11 +429,13 @@ struct i40e_pf {
        struct ptp_clock_info ptp_caps;
        struct sk_buff *ptp_tx_skb;
        struct hwtstamp_config tstamp_config;
-       unsigned long last_rx_ptp_check;
-       spinlock_t tmreg_lock; /* Used to protect the device time registers. */
+       struct mutex tmreg_lock; /* Used to protect the SYSTIME registers. */
        u64 ptp_base_adj;
        u32 tx_hwtstamp_timeouts;
        u32 rx_hwtstamp_cleared;
+       u32 latch_event_flags;
+       spinlock_t ptp_rx_lock; /* Used to protect Rx timestamp registers. */
+       unsigned long latch_events[4];
        bool ptp_tx;
        bool ptp_rx;
        u16 rss_table_size; /* HW RSS table size */
@@ -444,6 +448,20 @@ struct i40e_pf {
        u16 phy_led_val;
 };
 
+/**
+ * i40e_mac_to_hkey - Convert a 6-byte MAC Address to a u64 hash key
+ * @macaddr: the MAC Address as the base key
+ *
+ * Simply copies the address and returns it as a u64 for hashing
+ **/
+static inline u64 i40e_addr_to_hkey(const u8 *macaddr)
+{
+       u64 key = 0;
+
+       ether_addr_copy((u8 *)&key, macaddr);
+       return key;
+}
+
 enum i40e_filter_state {
        I40E_FILTER_INVALID = 0,        /* Invalid state */
        I40E_FILTER_NEW,                /* New, not sent to FW yet */
@@ -453,13 +471,10 @@ enum i40e_filter_state {
 /* There is no 'removed' state; the filter struct is freed */
 };
 struct i40e_mac_filter {
-       struct list_head list;
+       struct hlist_node hlist;
        u8 macaddr[ETH_ALEN];
 #define I40E_VLAN_ANY -1
        s16 vlan;
-       u8 counter;             /* number of instances of this filter */
-       bool is_vf;             /* filter belongs to a VF */
-       bool is_netdev;         /* filter belongs to a netdev */
        enum i40e_filter_state state;
 };
 
@@ -500,9 +515,11 @@ struct i40e_vsi {
 #define I40E_VSI_FLAG_VEB_OWNER                BIT(1)
        unsigned long flags;
 
-       /* Per VSI lock to protect elements/list (MAC filter) */
-       spinlock_t mac_filter_list_lock;
-       struct list_head mac_filter_list;
+       /* Per VSI lock to protect elements/hash (MAC filter) */
+       spinlock_t mac_filter_hash_lock;
+       /* Fixed size hash table with 2^8 buckets for MAC filters */
+       DECLARE_HASHTABLE(mac_filter_hash, 8);
+       bool has_vlan_filter;
 
        /* VSI stats */
        struct rtnl_link_stats64 net_stats;
@@ -607,6 +624,8 @@ struct i40e_q_vector {
        unsigned long hung_detected; /* Set/Reset for hung_detection logic */
 
        cpumask_t affinity_mask;
+       struct irq_affinity_notify affinity_notify;
+
        struct rcu_head rcu;    /* to avoid race with update stats on free */
        char name[I40E_INT_NAME_STR_LEN];
        bool arm_wb_state;
@@ -704,6 +723,25 @@ int i40e_get_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size);
 void i40e_fill_rss_lut(struct i40e_pf *pf, u8 *lut,
                       u16 rss_table_size, u16 rss_size);
 struct i40e_vsi *i40e_find_vsi_from_id(struct i40e_pf *pf, u16 id);
+/**
+ * i40e_find_vsi_by_type - Find and return Flow Director VSI
+ * @pf: PF to search for VSI
+ * @type: Value indicating type of VSI we are looking for
+ **/
+static inline struct i40e_vsi *
+i40e_find_vsi_by_type(struct i40e_pf *pf, u16 type)
+{
+       int i;
+
+       for (i = 0; i < pf->num_alloc_vsi; i++) {
+               struct i40e_vsi *vsi = pf->vsi[i];
+
+               if (vsi && vsi->type == type)
+                       return vsi;
+       }
+
+       return NULL;
+}
 void i40e_update_stats(struct i40e_vsi *vsi);
 void i40e_update_eth_stats(struct i40e_vsi *vsi);
 struct rtnl_link_stats64 *i40e_get_vsi_stats_struct(struct i40e_vsi *vsi);
@@ -720,16 +758,12 @@ u32 i40e_get_global_fd_count(struct i40e_pf *pf);
 bool i40e_set_ntuple(struct i40e_pf *pf, netdev_features_t features);
 void i40e_set_ethtool_ops(struct net_device *netdev);
 struct i40e_mac_filter *i40e_add_filter(struct i40e_vsi *vsi,
-                                       u8 *macaddr, s16 vlan,
-                                       bool is_vf, bool is_netdev);
-void i40e_del_filter(struct i40e_vsi *vsi, u8 *macaddr, s16 vlan,
-                    bool is_vf, bool is_netdev);
+                                       const u8 *macaddr, s16 vlan);
+void i40e_del_filter(struct i40e_vsi *vsi, const u8 *macaddr, s16 vlan);
 int i40e_sync_vsi_filters(struct i40e_vsi *vsi);
 struct i40e_vsi *i40e_vsi_setup(struct i40e_pf *pf, u8 type,
                                u16 uplink, u32 param1);
 int i40e_vsi_release(struct i40e_vsi *vsi);
-struct i40e_vsi *i40e_vsi_lookup(struct i40e_pf *pf, enum i40e_vsi_type type,
-                                struct i40e_vsi *start_vsi);
 #ifdef I40E_FCOE
 void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi,
                              struct i40e_vsi_context *ctxt,
@@ -739,7 +773,8 @@ void i40e_service_event_schedule(struct i40e_pf *pf);
 void i40e_notify_client_of_vf_msg(struct i40e_vsi *vsi, u32 vf_id,
                                  u8 *msg, u16 len);
 
-int i40e_vsi_control_rings(struct i40e_vsi *vsi, bool enable);
+int i40e_vsi_start_rings(struct i40e_vsi *vsi);
+void i40e_vsi_stop_rings(struct i40e_vsi *vsi);
 int i40e_reconfig_rss_queues(struct i40e_pf *pf, int queue_count);
 struct i40e_veb *i40e_veb_setup(struct i40e_pf *pf, u16 flags, u16 uplink_seid,
                                u16 downlink_seid, u8 enabled_tc);
@@ -815,14 +850,12 @@ int i40e_close(struct net_device *netdev);
 int i40e_vsi_open(struct i40e_vsi *vsi);
 void i40e_vlan_stripping_disable(struct i40e_vsi *vsi);
 int i40e_vsi_add_vlan(struct i40e_vsi *vsi, s16 vid);
-int i40e_vsi_kill_vlan(struct i40e_vsi *vsi, s16 vid);
-struct i40e_mac_filter *i40e_put_mac_in_vlan(struct i40e_vsi *vsi, u8 *macaddr,
-                                            bool is_vf, bool is_netdev);
-int i40e_del_mac_all_vlan(struct i40e_vsi *vsi, u8 *macaddr,
-                         bool is_vf, bool is_netdev);
+void i40e_vsi_kill_vlan(struct i40e_vsi *vsi, s16 vid);
+struct i40e_mac_filter *i40e_put_mac_in_vlan(struct i40e_vsi *vsi,
+                                            const u8 *macaddr);
+int i40e_del_mac_all_vlan(struct i40e_vsi *vsi, const u8 *macaddr);
 bool i40e_is_vsi_in_vlan(struct i40e_vsi *vsi);
-struct i40e_mac_filter *i40e_find_mac(struct i40e_vsi *vsi, u8 *macaddr,
-                                     bool is_vf, bool is_netdev);
+struct i40e_mac_filter *i40e_find_mac(struct i40e_vsi *vsi, const u8 *macaddr);
 #ifdef I40E_FCOE
 int __i40e_setup_tc(struct net_device *netdev, u32 handle, __be16 proto,
                    struct tc_to_netdev *tc);
index 738b42a44f20b420eccdb4851baca5215a7100c8..56fb272989369c22be618049e523ac9f24ca850b 100644 (file)
@@ -964,11 +964,11 @@ i40e_status i40e_clean_arq_element(struct i40e_hw *hw,
        desc = I40E_ADMINQ_DESC(hw->aq.arq, ntc);
        desc_idx = ntc;
 
+       hw->aq.arq_last_status =
+               (enum i40e_admin_queue_err)le16_to_cpu(desc->retval);
        flags = le16_to_cpu(desc->flags);
        if (flags & I40E_AQ_FLAG_ERR) {
                ret_code = I40E_ERR_ADMIN_QUEUE_ERROR;
-               hw->aq.arq_last_status =
-                       (enum i40e_admin_queue_err)le16_to_cpu(desc->retval);
                i40e_debug(hw,
                           I40E_DEBUG_AQ_MESSAGE,
                           "AQRX: Event received with error 0x%X.\n",
index 250db0b244b7677107ca1c56df23c23789e80fee..7fe72abc0b4a817afd99d57516b49b573f25bed5 100644 (file)
@@ -287,6 +287,7 @@ void i40e_notify_client_of_netdev_close(struct i40e_vsi *vsi, bool reset)
                        }
                        cdev->client->ops->close(&cdev->lan_info, cdev->client,
                                                 reset);
+                       clear_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state);
                        i40e_client_release_qvlist(&cdev->lan_info);
                }
        }
@@ -405,37 +406,6 @@ int i40e_vf_client_capable(struct i40e_pf *pf, u32 vf_id,
        return capable;
 }
 
-/**
- * i40e_vsi_lookup - finds a matching VSI from the PF list starting at start_vsi
- * @pf: board private structure
- * @type: vsi type
- * @start_vsi: a VSI pointer from where to start the search
- *
- * Returns non NULL on success or NULL for failure
- **/
-struct i40e_vsi *i40e_vsi_lookup(struct i40e_pf *pf,
-                                enum i40e_vsi_type type,
-                                struct i40e_vsi *start_vsi)
-{
-       struct i40e_vsi *vsi;
-       int i = 0;
-
-       if (start_vsi) {
-               for (i = 0; i < pf->num_alloc_vsi; i++) {
-                       vsi = pf->vsi[i];
-                       if (vsi == start_vsi)
-                               break;
-               }
-       }
-       for (; i < pf->num_alloc_vsi; i++) {
-               vsi = pf->vsi[i];
-               if (vsi && vsi->type == type)
-                       return vsi;
-       }
-
-       return NULL;
-}
-
 /**
  * i40e_client_add_instance - add a client instance struct to the instance list
  * @pf: pointer to the board struct
@@ -565,7 +535,7 @@ void i40e_client_subtask(struct i40e_pf *pf)
                        if (test_bit(__I40E_DOWN, &pf->vsi[pf->lan_vsi]->state))
                                continue;
                } else {
-                       dev_warn(&pf->pdev->dev, "This client %s is being instanciated at probe\n",
+                       dev_warn(&pf->pdev->dev, "This client %s is being instantiated at probe\n",
                                 client->name);
                }
 
@@ -575,29 +545,25 @@ void i40e_client_subtask(struct i40e_pf *pf)
                        continue;
 
                if (!existing) {
-                       /* Also up the ref_cnt for no. of instances of this
-                        * client.
-                        */
-                       atomic_inc(&client->ref_cnt);
                        dev_info(&pf->pdev->dev, "Added instance of Client %s to PF%d bus=0x%02x func=0x%02x\n",
                                 client->name, pf->hw.pf_id,
                                 pf->hw.bus.device, pf->hw.bus.func);
                }
 
                mutex_lock(&i40e_client_instance_mutex);
-               /* Send an Open request to the client */
-               atomic_inc(&cdev->ref_cnt);
-               if (client->ops && client->ops->open)
-                       ret = client->ops->open(&cdev->lan_info, client);
-               atomic_dec(&cdev->ref_cnt);
-               if (!ret) {
-                       set_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state);
-               } else {
-                       /* remove client instance */
-                       mutex_unlock(&i40e_client_instance_mutex);
-                       i40e_client_del_instance(pf, client);
-                       atomic_dec(&client->ref_cnt);
-                       continue;
+               if (!test_bit(__I40E_CLIENT_INSTANCE_OPENED,
+                             &cdev->state)) {
+                       /* Send an Open request to the client */
+                       if (client->ops && client->ops->open)
+                               ret = client->ops->open(&cdev->lan_info,
+                                                       client);
+                       if (!ret) {
+                               set_bit(__I40E_CLIENT_INSTANCE_OPENED,
+                                       &cdev->state);
+                       } else {
+                               /* remove client instance */
+                               i40e_client_del_instance(pf, client);
+                       }
                }
                mutex_unlock(&i40e_client_instance_mutex);
        }
@@ -694,10 +660,6 @@ static int i40e_client_release(struct i40e_client *client)
                        continue;
                pf = (struct i40e_pf *)cdev->lan_info.pf;
                if (test_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state)) {
-                       if (atomic_read(&cdev->ref_cnt) > 0) {
-                               ret = I40E_ERR_NOT_READY;
-                               goto out;
-                       }
                        if (client->ops && client->ops->close)
                                client->ops->close(&cdev->lan_info, client,
                                                   false);
@@ -710,11 +672,9 @@ static int i40e_client_release(struct i40e_client *client)
                }
                /* delete the client instance from the list */
                list_move(&cdev->list, &cdevs_tmp);
-               atomic_dec(&client->ref_cnt);
                dev_info(&pf->pdev->dev, "Deleted client instance of Client %s\n",
                         client->name);
        }
-out:
        mutex_unlock(&i40e_client_instance_mutex);
 
        /* free the client device and release its vsi */
@@ -1040,17 +1000,10 @@ int i40e_unregister_client(struct i40e_client *client)
                ret = -ENODEV;
                goto out;
        }
-       if (atomic_read(&client->ref_cnt) == 0) {
-               clear_bit(__I40E_CLIENT_REGISTERED, &client->state);
-               list_del(&client->list);
-               pr_info("i40e: Unregistered client %s with return code %d\n",
-                       client->name, ret);
-       } else {
-               ret = I40E_ERR_NOT_READY;
-               pr_err("i40e: Client %s failed unregister - client has open instances\n",
-                      client->name);
-       }
-
+       clear_bit(__I40E_CLIENT_REGISTERED, &client->state);
+       list_del(&client->list);
+       pr_info("i40e: Unregistered client %s with return code %d\n",
+               client->name, ret);
 out:
        mutex_unlock(&i40e_client_mutex);
        return ret;
index 38a6c36a6a0e3bfeeeb422d08ae6240d6dca3cde..528bd79b05fecc68d981ea08b144d9898c6aaaa0 100644 (file)
@@ -203,8 +203,6 @@ struct i40e_client_instance {
        struct i40e_info lan_info;
        struct i40e_client *client;
        unsigned long  state;
-       /* A count of all the in-progress calls to the client */
-       atomic_t ref_cnt;
 };
 
 struct i40e_client {
index 2154a34c1dd804e10c0905ac63718ce642643dec..98791ba572110cfdf11992f6e5ea24376fdcb0fd 100644 (file)
@@ -1849,7 +1849,7 @@ i40e_status i40e_aq_get_link_info(struct i40e_hw *hw,
        else
                hw_link_info->crc_enable = false;
 
-       if (resp->command_flags & cpu_to_le16(I40E_AQ_LSE_ENABLE))
+       if (resp->command_flags & cpu_to_le16(I40E_AQ_LSE_IS_ENABLED))
                hw_link_info->lse_enable = true;
        else
                hw_link_info->lse_enable = false;
@@ -2494,7 +2494,10 @@ i40e_status i40e_update_link_info(struct i40e_hw *hw)
        if (status)
                return status;
 
-       if (hw->phy.link_info.link_info & I40E_AQ_MEDIA_AVAILABLE) {
+       /* extra checking needed to ensure link info to user is timely */
+       if ((hw->phy.link_info.link_info & I40E_AQ_MEDIA_AVAILABLE) &&
+           ((hw->phy.link_info.link_info & I40E_AQ_LINK_UP) ||
+            !(hw->phy.link_info_old.link_info & I40E_AQ_LINK_UP))) {
                status = i40e_aq_get_phy_capabilities(hw, false, false,
                                                      &abilities, NULL);
                if (status)
@@ -3310,8 +3313,10 @@ static void i40e_parse_discover_capabilities(struct i40e_hw *hw, void *buff,
        /* partition id is 1-based, and functions are evenly spread
         * across the ports as partitions
         */
-       hw->partition_id = (hw->pf_id / hw->num_ports) + 1;
-       hw->num_partitions = num_functions / hw->num_ports;
+       if (hw->num_ports != 0) {
+               hw->partition_id = (hw->pf_id / hw->num_ports) + 1;
+               hw->num_partitions = num_functions / hw->num_ports;
+       }
 
        /* additional HW specific goodies that might
         * someday be HW version specific
index 0c1875b5b16d2d79839cf6f85d5194741ffb697e..b8a03a05c4e852e618a5baba97c592899196f8af 100644 (file)
@@ -134,7 +134,7 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid)
        struct rtnl_link_stats64 *nstat;
        struct i40e_mac_filter *f;
        struct i40e_vsi *vsi;
-       int i;
+       int i, bkt;
 
        vsi = i40e_dbg_find_vsi(pf, seid);
        if (!vsi) {
@@ -166,11 +166,11 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid)
                         pf->hw.mac.addr,
                         pf->hw.mac.san_addr,
                         pf->hw.mac.port_addr);
-       list_for_each_entry(f, &vsi->mac_filter_list, list) {
+       hash_for_each(vsi->mac_filter_hash, bkt, f, hlist) {
                dev_info(&pf->pdev->dev,
-                        "    mac_filter_list: %pM vid=%d, is_netdev=%d is_vf=%d counter=%d, state %s\n",
-                        f->macaddr, f->vlan, f->is_netdev, f->is_vf,
-                        f->counter, i40e_filter_state_string[f->state]);
+                        "    mac_filter_hash: %pM vid=%d, state %s\n",
+                        f->macaddr, f->vlan,
+                        i40e_filter_state_string[f->state]);
        }
        dev_info(&pf->pdev->dev, "    active_filters %d, promisc_threshold %d, overflow promisc %s\n",
                 vsi->active_filters, vsi->promisc_threshold,
@@ -867,86 +867,6 @@ static ssize_t i40e_dbg_command_write(struct file *filp,
 
                dev_info(&pf->pdev->dev, "deleting relay %d\n", veb_seid);
                i40e_veb_release(pf->veb[i]);
-
-       } else if (strncmp(cmd_buf, "add macaddr", 11) == 0) {
-               struct i40e_mac_filter *f;
-               int vlan = 0;
-               u8 ma[6];
-               int ret;
-
-               cnt = sscanf(&cmd_buf[11],
-                            "%i %hhx:%hhx:%hhx:%hhx:%hhx:%hhx %i",
-                            &vsi_seid,
-                            &ma[0], &ma[1], &ma[2], &ma[3], &ma[4], &ma[5],
-                            &vlan);
-               if (cnt == 7) {
-                       vlan = 0;
-               } else if (cnt != 8) {
-                       dev_info(&pf->pdev->dev,
-                                "add macaddr: bad command string, cnt=%d\n",
-                                cnt);
-                       goto command_write_done;
-               }
-
-               vsi = i40e_dbg_find_vsi(pf, vsi_seid);
-               if (!vsi) {
-                       dev_info(&pf->pdev->dev,
-                                "add macaddr: VSI %d not found\n", vsi_seid);
-                       goto command_write_done;
-               }
-
-               spin_lock_bh(&vsi->mac_filter_list_lock);
-               f = i40e_add_filter(vsi, ma, vlan, false, false);
-               spin_unlock_bh(&vsi->mac_filter_list_lock);
-               ret = i40e_sync_vsi_filters(vsi);
-               if (f && !ret)
-                       dev_info(&pf->pdev->dev,
-                                "add macaddr: %pM vlan=%d added to VSI %d\n",
-                                ma, vlan, vsi_seid);
-               else
-                       dev_info(&pf->pdev->dev,
-                                "add macaddr: %pM vlan=%d to VSI %d failed, f=%p ret=%d\n",
-                                ma, vlan, vsi_seid, f, ret);
-
-       } else if (strncmp(cmd_buf, "del macaddr", 11) == 0) {
-               int vlan = 0;
-               u8 ma[6];
-               int ret;
-
-               cnt = sscanf(&cmd_buf[11],
-                            "%i %hhx:%hhx:%hhx:%hhx:%hhx:%hhx %i",
-                            &vsi_seid,
-                            &ma[0], &ma[1], &ma[2], &ma[3], &ma[4], &ma[5],
-                            &vlan);
-               if (cnt == 7) {
-                       vlan = 0;
-               } else if (cnt != 8) {
-                       dev_info(&pf->pdev->dev,
-                                "del macaddr: bad command string, cnt=%d\n",
-                                cnt);
-                       goto command_write_done;
-               }
-
-               vsi = i40e_dbg_find_vsi(pf, vsi_seid);
-               if (!vsi) {
-                       dev_info(&pf->pdev->dev,
-                                "del macaddr: VSI %d not found\n", vsi_seid);
-                       goto command_write_done;
-               }
-
-               spin_lock_bh(&vsi->mac_filter_list_lock);
-               i40e_del_filter(vsi, ma, vlan, false, false);
-               spin_unlock_bh(&vsi->mac_filter_list_lock);
-               ret = i40e_sync_vsi_filters(vsi);
-               if (!ret)
-                       dev_info(&pf->pdev->dev,
-                                "del macaddr: %pM vlan=%d removed from VSI %d\n",
-                                ma, vlan, vsi_seid);
-               else
-                       dev_info(&pf->pdev->dev,
-                                "del macaddr: %pM vlan=%d from VSI %d failed, ret=%d\n",
-                                ma, vlan, vsi_seid, ret);
-
        } else if (strncmp(cmd_buf, "add pvid", 8) == 0) {
                i40e_status ret;
                u16 vid;
@@ -1210,24 +1130,6 @@ static ssize_t i40e_dbg_command_write(struct file *filp,
                        dev_info(&pf->pdev->dev,
                                 "dump debug fwdata <cluster_id> <table_id> <index>\n");
                }
-
-       } else if (strncmp(cmd_buf, "msg_enable", 10) == 0) {
-               u32 level;
-               cnt = sscanf(&cmd_buf[10], "%i", &level);
-               if (cnt) {
-                       if (I40E_DEBUG_USER & level) {
-                               pf->hw.debug_mask = level;
-                               dev_info(&pf->pdev->dev,
-                                        "set hw.debug_mask = 0x%08x\n",
-                                        pf->hw.debug_mask);
-                       }
-                       pf->msg_enable = level;
-                       dev_info(&pf->pdev->dev, "set msg_enable = 0x%08x\n",
-                                pf->msg_enable);
-               } else {
-                       dev_info(&pf->pdev->dev, "msg_enable = 0x%08x\n",
-                                pf->msg_enable);
-               }
        } else if (strncmp(cmd_buf, "pfr", 3) == 0) {
                dev_info(&pf->pdev->dev, "debugfs: forcing PFR\n");
                i40e_do_reset_safe(pf, BIT(__I40E_PF_RESET_REQUESTED));
@@ -1633,8 +1535,6 @@ static ssize_t i40e_dbg_command_write(struct file *filp,
                dev_info(&pf->pdev->dev, "  del vsi [vsi_seid]\n");
                dev_info(&pf->pdev->dev, "  add relay <uplink_seid> <vsi_seid>\n");
                dev_info(&pf->pdev->dev, "  del relay <relay_seid>\n");
-               dev_info(&pf->pdev->dev, "  add macaddr <vsi_seid> <aa:bb:cc:dd:ee:ff> [vlan]\n");
-               dev_info(&pf->pdev->dev, "  del macaddr <vsi_seid> <aa:bb:cc:dd:ee:ff> [vlan]\n");
                dev_info(&pf->pdev->dev, "  add pvid <vsi_seid> <vid>\n");
                dev_info(&pf->pdev->dev, "  del pvid <vsi_seid>\n");
                dev_info(&pf->pdev->dev, "  dump switch\n");
@@ -1644,7 +1544,6 @@ static ssize_t i40e_dbg_command_write(struct file *filp,
                dev_info(&pf->pdev->dev, "  dump desc aq\n");
                dev_info(&pf->pdev->dev, "  dump reset stats\n");
                dev_info(&pf->pdev->dev, "  dump debug fwdata <cluster_id> <table_id> <index>\n");
-               dev_info(&pf->pdev->dev, "  msg_enable [level]\n");
                dev_info(&pf->pdev->dev, "  read <reg>\n");
                dev_info(&pf->pdev->dev, "  write <reg> <value>\n");
                dev_info(&pf->pdev->dev, "  clear_stats vsi [seid]\n");
index 92bc8846f1ba991abea8de574efefb340d121e95..b9e1162d927f3a8bbdb965ab40f74d343203d85a 100644 (file)
@@ -104,7 +104,7 @@ static const struct i40e_stats i40e_gstrings_misc_stats[] = {
  * The PF_STATs are appended to the netdev stats only when ethtool -S
  * is queried on the base PF netdev, not on the VMDq or FCoE netdev.
  */
-static struct i40e_stats i40e_gstrings_stats[] = {
+static const struct i40e_stats i40e_gstrings_stats[] = {
        I40E_PF_STAT("rx_bytes", stats.eth.rx_bytes),
        I40E_PF_STAT("tx_bytes", stats.eth.tx_bytes),
        I40E_PF_STAT("rx_unicast", stats.eth.rx_unicast),
@@ -216,7 +216,6 @@ enum i40e_ethtool_test_id {
        I40E_ETH_TEST_REG = 0,
        I40E_ETH_TEST_EEPROM,
        I40E_ETH_TEST_INTR,
-       I40E_ETH_TEST_LOOPBACK,
        I40E_ETH_TEST_LINK,
 };
 
@@ -224,7 +223,6 @@ static const char i40e_gstrings_test[][ETH_GSTRING_LEN] = {
        "Register test  (offline)",
        "Eeprom test    (offline)",
        "Interrupt test (offline)",
-       "Loopback test  (offline)",
        "Link test   (on/offline)"
 };
 
@@ -978,6 +976,10 @@ static u32 i40e_get_msglevel(struct net_device *netdev)
 {
        struct i40e_netdev_priv *np = netdev_priv(netdev);
        struct i40e_pf *pf = np->vsi->back;
+       u32 debug_mask = pf->hw.debug_mask;
+
+       if (debug_mask)
+               netdev_info(netdev, "i40e debug_mask: 0x%08X\n", debug_mask);
 
        return pf->msg_enable;
 }
@@ -989,7 +991,8 @@ static void i40e_set_msglevel(struct net_device *netdev, u32 data)
 
        if (I40E_DEBUG_USER & data)
                pf->hw.debug_mask = data;
-       pf->msg_enable = data;
+       else
+               pf->msg_enable = data;
 }
 
 static int i40e_get_regs_len(struct net_device *netdev)
@@ -1739,17 +1742,6 @@ static int i40e_intr_test(struct net_device *netdev, u64 *data)
        return *data;
 }
 
-static int i40e_loopback_test(struct net_device *netdev, u64 *data)
-{
-       struct i40e_netdev_priv *np = netdev_priv(netdev);
-       struct i40e_pf *pf = np->vsi->back;
-
-       netif_info(pf, hw, netdev, "loopback test not implemented\n");
-       *data = 0;
-
-       return *data;
-}
-
 static inline bool i40e_active_vfs(struct i40e_pf *pf)
 {
        struct i40e_vf *vfs = pf->vf;
@@ -1763,17 +1755,7 @@ static inline bool i40e_active_vfs(struct i40e_pf *pf)
 
 static inline bool i40e_active_vmdqs(struct i40e_pf *pf)
 {
-       struct i40e_vsi **vsi = pf->vsi;
-       int i;
-
-       for (i = 0; i < pf->num_alloc_vsi; i++) {
-               if (!vsi[i])
-                       continue;
-               if (vsi[i]->type == I40E_VSI_VMDQ2)
-                       return true;
-       }
-
-       return false;
+       return !!i40e_find_vsi_by_type(pf, I40E_VSI_VMDQ2);
 }
 
 static void i40e_diag_test(struct net_device *netdev,
@@ -1795,7 +1777,6 @@ static void i40e_diag_test(struct net_device *netdev,
                        data[I40E_ETH_TEST_REG]         = 1;
                        data[I40E_ETH_TEST_EEPROM]      = 1;
                        data[I40E_ETH_TEST_INTR]        = 1;
-                       data[I40E_ETH_TEST_LOOPBACK]    = 1;
                        data[I40E_ETH_TEST_LINK]        = 1;
                        eth_test->flags |= ETH_TEST_FL_FAILED;
                        clear_bit(__I40E_TESTING, &pf->state);
@@ -1823,9 +1804,6 @@ static void i40e_diag_test(struct net_device *netdev,
                if (i40e_intr_test(netdev, &data[I40E_ETH_TEST_INTR]))
                        eth_test->flags |= ETH_TEST_FL_FAILED;
 
-               if (i40e_loopback_test(netdev, &data[I40E_ETH_TEST_LOOPBACK]))
-                       eth_test->flags |= ETH_TEST_FL_FAILED;
-
                /* run reg test last, a reset is required after it */
                if (i40e_reg_test(netdev, &data[I40E_ETH_TEST_REG]))
                        eth_test->flags |= ETH_TEST_FL_FAILED;
@@ -1846,7 +1824,6 @@ static void i40e_diag_test(struct net_device *netdev,
                data[I40E_ETH_TEST_REG] = 0;
                data[I40E_ETH_TEST_EEPROM] = 0;
                data[I40E_ETH_TEST_INTR] = 0;
-               data[I40E_ETH_TEST_LOOPBACK] = 0;
        }
 
 skip_ol_tests:
index 58e6c1570335a21b5470c7eb34c19c22e1de324e..b077ef8b00fa74e8480cc59f443b402e6cf647f5 100644 (file)
@@ -1522,12 +1522,12 @@ void i40e_fcoe_config_netdev(struct net_device *netdev, struct i40e_vsi *vsi)
         * same PCI function.
         */
        netdev->dev_port = 1;
-       spin_lock_bh(&vsi->mac_filter_list_lock);
-       i40e_add_filter(vsi, hw->mac.san_addr, 0, false, false);
-       i40e_add_filter(vsi, (u8[6]) FC_FCOE_FLOGI_MAC, 0, false, false);
-       i40e_add_filter(vsi, FIP_ALL_FCOE_MACS, 0, false, false);
-       i40e_add_filter(vsi, FIP_ALL_ENODE_MACS, 0, false, false);
-       spin_unlock_bh(&vsi->mac_filter_list_lock);
+       spin_lock_bh(&vsi->mac_filter_hash_lock);
+       i40e_add_filter(vsi, hw->mac.san_addr, 0);
+       i40e_add_filter(vsi, (u8[6]) FC_FCOE_FLOGI_MAC, 0);
+       i40e_add_filter(vsi, FIP_ALL_FCOE_MACS, 0);
+       i40e_add_filter(vsi, FIP_ALL_ENODE_MACS, 0);
+       spin_unlock_bh(&vsi->mac_filter_hash_lock);
 
        /* use san mac */
        ether_addr_copy(netdev->dev_addr, hw->mac.san_addr);
index 83edbe8e3618258d3be6f77254bca763d9b77cf3..5c6a5ceb8a913449157402d7ea62848621accae1 100644 (file)
@@ -41,7 +41,7 @@ static const char i40e_driver_string[] =
 
 #define DRV_VERSION_MAJOR 1
 #define DRV_VERSION_MINOR 6
-#define DRV_VERSION_BUILD 16
+#define DRV_VERSION_BUILD 21
 #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \
             __stringify(DRV_VERSION_MINOR) "." \
             __stringify(DRV_VERSION_BUILD)    DRV_KERN
@@ -93,8 +93,8 @@ MODULE_DEVICE_TABLE(pci, i40e_pci_tbl);
 
 #define I40E_MAX_VF_COUNT 128
 static int debug = -1;
-module_param(debug, int, 0);
-MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
+module_param(debug, uint, 0);
+MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all), Debug mask (0x8XXXXXXX)");
 
 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
 MODULE_DESCRIPTION("Intel(R) Ethernet Connection XL710 Network Driver");
@@ -1145,25 +1145,22 @@ void i40e_update_stats(struct i40e_vsi *vsi)
  * @vsi: the VSI to be searched
  * @macaddr: the MAC address
  * @vlan: the vlan
- * @is_vf: make sure its a VF filter, else doesn't matter
- * @is_netdev: make sure its a netdev filter, else doesn't matter
  *
  * Returns ptr to the filter object or NULL
  **/
 static struct i40e_mac_filter *i40e_find_filter(struct i40e_vsi *vsi,
-                                               u8 *macaddr, s16 vlan,
-                                               bool is_vf, bool is_netdev)
+                                               const u8 *macaddr, s16 vlan)
 {
        struct i40e_mac_filter *f;
+       u64 key;
 
        if (!vsi || !macaddr)
                return NULL;
 
-       list_for_each_entry(f, &vsi->mac_filter_list, list) {
+       key = i40e_addr_to_hkey(macaddr);
+       hash_for_each_possible(vsi->mac_filter_hash, f, hlist, key) {
                if ((ether_addr_equal(macaddr, f->macaddr)) &&
-                   (vlan == f->vlan)    &&
-                   (!is_vf || f->is_vf) &&
-                   (!is_netdev || f->is_netdev))
+                   (vlan == f->vlan))
                        return f;
        }
        return NULL;
@@ -1173,24 +1170,21 @@ static struct i40e_mac_filter *i40e_find_filter(struct i40e_vsi *vsi,
  * i40e_find_mac - Find a mac addr in the macvlan filters list
  * @vsi: the VSI to be searched
  * @macaddr: the MAC address we are searching for
- * @is_vf: make sure its a VF filter, else doesn't matter
- * @is_netdev: make sure its a netdev filter, else doesn't matter
  *
  * Returns the first filter with the provided MAC address or NULL if
  * MAC address was not found
  **/
-struct i40e_mac_filter *i40e_find_mac(struct i40e_vsi *vsi, u8 *macaddr,
-                                     bool is_vf, bool is_netdev)
+struct i40e_mac_filter *i40e_find_mac(struct i40e_vsi *vsi, const u8 *macaddr)
 {
        struct i40e_mac_filter *f;
+       u64 key;
 
        if (!vsi || !macaddr)
                return NULL;
 
-       list_for_each_entry(f, &vsi->mac_filter_list, list) {
-               if ((ether_addr_equal(macaddr, f->macaddr)) &&
-                   (!is_vf || f->is_vf) &&
-                   (!is_netdev || f->is_netdev))
+       key = i40e_addr_to_hkey(macaddr);
+       hash_for_each_possible(vsi->mac_filter_hash, f, hlist, key) {
+               if ((ether_addr_equal(macaddr, f->macaddr)))
                        return f;
        }
        return NULL;
@@ -1204,119 +1198,31 @@ struct i40e_mac_filter *i40e_find_mac(struct i40e_vsi *vsi, u8 *macaddr,
  **/
 bool i40e_is_vsi_in_vlan(struct i40e_vsi *vsi)
 {
-       struct i40e_mac_filter *f;
+       /* If we have a PVID, always operate in VLAN mode */
+       if (vsi->info.pvid)
+               return true;
 
-       /* Only -1 for all the filters denotes not in vlan mode
-        * so we have to go through all the list in order to make sure
+       /* We need to operate in VLAN mode whenever we have any filters with
+        * a VLAN other than I40E_VLAN_ALL. We could check the table each
+        * time, incurring search cost repeatedly. However, we can notice two
+        * things:
+        *
+        * 1) the only place where we can gain a VLAN filter is in
+        *    i40e_add_filter.
+        *
+        * 2) the only place where filters are actually removed is in
+        *    i40e_vsi_sync_filters_subtask.
+        *
+        * Thus, we can simply use a boolean value, has_vlan_filters which we
+        * will set to true when we add a VLAN filter in i40e_add_filter. Then
+        * we have to perform the full search after deleting filters in
+        * i40e_vsi_sync_filters_subtask, but we already have to search
+        * filters here and can perform the check at the same time. This
+        * results in avoiding embedding a loop for VLAN mode inside another
+        * loop over all the filters, and should maintain correctness as noted
+        * above.
         */
-       list_for_each_entry(f, &vsi->mac_filter_list, list) {
-               if (f->vlan >= 0 || vsi->info.pvid)
-                       return true;
-       }
-
-       return false;
-}
-
-/**
- * i40e_put_mac_in_vlan - Make macvlan filters from macaddrs and vlans
- * @vsi: the VSI to be searched
- * @macaddr: the mac address to be filtered
- * @is_vf: true if it is a VF
- * @is_netdev: true if it is a netdev
- *
- * Goes through all the macvlan filters and adds a
- * macvlan filter for each unique vlan that already exists
- *
- * Returns first filter found on success, else NULL
- **/
-struct i40e_mac_filter *i40e_put_mac_in_vlan(struct i40e_vsi *vsi, u8 *macaddr,
-                                            bool is_vf, bool is_netdev)
-{
-       struct i40e_mac_filter *f;
-
-       list_for_each_entry(f, &vsi->mac_filter_list, list) {
-               if (vsi->info.pvid)
-                       f->vlan = le16_to_cpu(vsi->info.pvid);
-               if (!i40e_find_filter(vsi, macaddr, f->vlan,
-                                     is_vf, is_netdev)) {
-                       if (!i40e_add_filter(vsi, macaddr, f->vlan,
-                                            is_vf, is_netdev))
-                               return NULL;
-               }
-       }
-
-       return list_first_entry_or_null(&vsi->mac_filter_list,
-                                       struct i40e_mac_filter, list);
-}
-
-/**
- * i40e_del_mac_all_vlan - Remove a MAC filter from all VLANS
- * @vsi: the VSI to be searched
- * @macaddr: the mac address to be removed
- * @is_vf: true if it is a VF
- * @is_netdev: true if it is a netdev
- *
- * Removes a given MAC address from a VSI, regardless of VLAN
- *
- * Returns 0 for success, or error
- **/
-int i40e_del_mac_all_vlan(struct i40e_vsi *vsi, u8 *macaddr,
-                         bool is_vf, bool is_netdev)
-{
-       struct i40e_mac_filter *f = NULL;
-       int changed = 0;
-
-       WARN(!spin_is_locked(&vsi->mac_filter_list_lock),
-            "Missing mac_filter_list_lock\n");
-       list_for_each_entry(f, &vsi->mac_filter_list, list) {
-               if ((ether_addr_equal(macaddr, f->macaddr)) &&
-                   (is_vf == f->is_vf) &&
-                   (is_netdev == f->is_netdev)) {
-                       f->counter--;
-                       changed = 1;
-                       if (f->counter == 0)
-                               f->state = I40E_FILTER_REMOVE;
-               }
-       }
-       if (changed) {
-               vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED;
-               vsi->back->flags |= I40E_FLAG_FILTER_SYNC;
-               return 0;
-       }
-       return -ENOENT;
-}
-
-/**
- * i40e_rm_default_mac_filter - Remove the default MAC filter set by NVM
- * @vsi: the PF Main VSI - inappropriate for any other VSI
- * @macaddr: the MAC address
- *
- * Remove whatever filter the firmware set up so the driver can manage
- * its own filtering intelligently.
- **/
-static void i40e_rm_default_mac_filter(struct i40e_vsi *vsi, u8 *macaddr)
-{
-       struct i40e_aqc_remove_macvlan_element_data element;
-       struct i40e_pf *pf = vsi->back;
-
-       /* Only appropriate for the PF main VSI */
-       if (vsi->type != I40E_VSI_MAIN)
-               return;
-
-       memset(&element, 0, sizeof(element));
-       ether_addr_copy(element.mac_addr, macaddr);
-       element.vlan_tag = 0;
-       /* Ignore error returns, some firmware does it this way... */
-       element.flags = I40E_AQC_MACVLAN_DEL_PERFECT_MATCH;
-       i40e_aq_remove_macvlan(&pf->hw, vsi->seid, &element, 1, NULL);
-
-       memset(&element, 0, sizeof(element));
-       ether_addr_copy(element.mac_addr, macaddr);
-       element.vlan_tag = 0;
-       /* ...and some firmware does it this way. */
-       element.flags = I40E_AQC_MACVLAN_DEL_PERFECT_MATCH |
-                       I40E_AQC_MACVLAN_DEL_IGNORE_VLAN;
-       i40e_aq_remove_macvlan(&pf->hw, vsi->seid, &element, 1, NULL);
+       return vsi->has_vlan_filter;
 }
 
 /**
@@ -1324,20 +1230,17 @@ static void i40e_rm_default_mac_filter(struct i40e_vsi *vsi, u8 *macaddr)
  * @vsi: the VSI to be searched
  * @macaddr: the MAC address
  * @vlan: the vlan
- * @is_vf: make sure its a VF filter, else doesn't matter
- * @is_netdev: make sure its a netdev filter, else doesn't matter
  *
  * Returns ptr to the filter object or NULL when no memory available.
  *
- * NOTE: This function is expected to be called with mac_filter_list_lock
+ * NOTE: This function is expected to be called with mac_filter_hash_lock
  * being held.
  **/
 struct i40e_mac_filter *i40e_add_filter(struct i40e_vsi *vsi,
-                                       u8 *macaddr, s16 vlan,
-                                       bool is_vf, bool is_netdev)
+                                       const u8 *macaddr, s16 vlan)
 {
        struct i40e_mac_filter *f;
-       int changed = false;
+       u64 key;
 
        if (!vsi || !macaddr)
                return NULL;
@@ -1349,11 +1252,17 @@ struct i40e_mac_filter *i40e_add_filter(struct i40e_vsi *vsi,
        if (is_broadcast_ether_addr(macaddr))
                return NULL;
 
-       f = i40e_find_filter(vsi, macaddr, vlan, is_vf, is_netdev);
+       f = i40e_find_filter(vsi, macaddr, vlan);
        if (!f) {
                f = kzalloc(sizeof(*f), GFP_ATOMIC);
                if (!f)
-                       goto add_filter_out;
+                       return NULL;
+
+               /* Update the boolean indicating if we need to function in
+                * VLAN mode.
+                */
+               if (vlan >= 0)
+                       vsi->has_vlan_filter = true;
 
                ether_addr_copy(f->macaddr, macaddr);
                f->vlan = vlan;
@@ -1365,100 +1274,148 @@ struct i40e_mac_filter *i40e_add_filter(struct i40e_vsi *vsi,
                        f->state = I40E_FILTER_FAILED;
                else
                        f->state = I40E_FILTER_NEW;
-               changed = true;
-               INIT_LIST_HEAD(&f->list);
-               list_add_tail(&f->list, &vsi->mac_filter_list);
-       }
+               INIT_HLIST_NODE(&f->hlist);
 
-       /* increment counter and add a new flag if needed */
-       if (is_vf) {
-               if (!f->is_vf) {
-                       f->is_vf = true;
-                       f->counter++;
-               }
-       } else if (is_netdev) {
-               if (!f->is_netdev) {
-                       f->is_netdev = true;
-                       f->counter++;
-               }
-       } else {
-               f->counter++;
-       }
+               key = i40e_addr_to_hkey(macaddr);
+               hash_add(vsi->mac_filter_hash, &f->hlist, key);
 
-       if (changed) {
                vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED;
                vsi->back->flags |= I40E_FLAG_FILTER_SYNC;
        }
 
-add_filter_out:
+       /* If we're asked to add a filter that has been marked for removal, it
+        * is safe to simply restore it to active state. __i40e_del_filter
+        * will have simply deleted any filters which were previously marked
+        * NEW or FAILED, so if it is currently marked REMOVE it must have
+        * previously been ACTIVE. Since we haven't yet run the sync filters
+        * task, just restore this filter to the ACTIVE state so that the
+        * sync task leaves it in place
+        */
+       if (f->state == I40E_FILTER_REMOVE)
+               f->state = I40E_FILTER_ACTIVE;
+
        return f;
 }
 
 /**
- * i40e_del_filter - Remove a mac/vlan filter from the VSI
+ * __i40e_del_filter - Remove a specific filter from the VSI
+ * @vsi: VSI to remove from
+ * @f: the filter to remove from the list
+ *
+ * This function should be called instead of i40e_del_filter only if you know
+ * the exact filter you will remove already, such as via i40e_find_filter or
+ * i40e_find_mac.
+ *
+ * NOTE: This function is expected to be called with mac_filter_hash_lock
+ * being held.
+ * ANOTHER NOTE: This function MUST be called from within the context of
+ * the "safe" variants of any list iterators, e.g. list_for_each_entry_safe()
+ * instead of list_for_each_entry().
+ **/
+static void __i40e_del_filter(struct i40e_vsi *vsi, struct i40e_mac_filter *f)
+{
+       if (!f)
+               return;
+
+       if ((f->state == I40E_FILTER_FAILED) ||
+           (f->state == I40E_FILTER_NEW)) {
+               /* this one never got added by the FW. Just remove it,
+                * no need to sync anything.
+                */
+               hash_del(&f->hlist);
+               kfree(f);
+       } else {
+               f->state = I40E_FILTER_REMOVE;
+               vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED;
+               vsi->back->flags |= I40E_FLAG_FILTER_SYNC;
+       }
+}
+
+/**
+ * i40e_del_filter - Remove a MAC/VLAN filter from the VSI
  * @vsi: the VSI to be searched
  * @macaddr: the MAC address
- * @vlan: the vlan
- * @is_vf: make sure it's a VF filter, else doesn't matter
- * @is_netdev: make sure it's a netdev filter, else doesn't matter
+ * @vlan: the VLAN
  *
- * NOTE: This function is expected to be called with mac_filter_list_lock
+ * NOTE: This function is expected to be called with mac_filter_hash_lock
  * being held.
  * ANOTHER NOTE: This function MUST be called from within the context of
  * the "safe" variants of any list iterators, e.g. list_for_each_entry_safe()
  * instead of list_for_each_entry().
  **/
-void i40e_del_filter(struct i40e_vsi *vsi,
-                    u8 *macaddr, s16 vlan,
-                    bool is_vf, bool is_netdev)
+void i40e_del_filter(struct i40e_vsi *vsi, const u8 *macaddr, s16 vlan)
 {
        struct i40e_mac_filter *f;
 
        if (!vsi || !macaddr)
                return;
 
-       f = i40e_find_filter(vsi, macaddr, vlan, is_vf, is_netdev);
-       if (!f || f->counter == 0)
-               return;
+       f = i40e_find_filter(vsi, macaddr, vlan);
+       __i40e_del_filter(vsi, f);
+}
 
-       if (is_vf) {
-               if (f->is_vf) {
-                       f->is_vf = false;
-                       f->counter--;
-               }
-       } else if (is_netdev) {
-               if (f->is_netdev) {
-                       f->is_netdev = false;
-                       f->counter--;
-               }
-       } else {
-               /* make sure we don't remove a filter in use by VF or netdev */
-               int min_f = 0;
+/**
+ * i40e_put_mac_in_vlan - Make macvlan filters from macaddrs and vlans
+ * @vsi: the VSI to be searched
+ * @macaddr: the mac address to be filtered
+ *
+ * Goes through all the macvlan filters and adds a macvlan filter for each
+ * unique vlan that already exists. If a PVID has been assigned, instead only
+ * add the macaddr to that VLAN.
+ *
+ * Returns last filter added on success, else NULL
+ **/
+struct i40e_mac_filter *i40e_put_mac_in_vlan(struct i40e_vsi *vsi,
+                                            const u8 *macaddr)
+{
+       struct i40e_mac_filter *f, *add = NULL;
+       struct hlist_node *h;
+       int bkt;
 
-               min_f += (f->is_vf ? 1 : 0);
-               min_f += (f->is_netdev ? 1 : 0);
+       if (vsi->info.pvid)
+               return i40e_add_filter(vsi, macaddr,
+                                      le16_to_cpu(vsi->info.pvid));
 
-               if (f->counter > min_f)
-                       f->counter--;
+       hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) {
+               if (f->state == I40E_FILTER_REMOVE)
+                       continue;
+               add = i40e_add_filter(vsi, macaddr, f->vlan);
+               if (!add)
+                       return NULL;
        }
 
-       /* counter == 0 tells sync_filters_subtask to
-        * remove the filter from the firmware's list
-        */
-       if (f->counter == 0) {
-               if ((f->state == I40E_FILTER_FAILED) ||
-                   (f->state == I40E_FILTER_NEW)) {
-                       /* this one never got added by the FW. Just remove it,
-                        * no need to sync anything.
-                        */
-                       list_del(&f->list);
-                       kfree(f);
-               } else {
-                       f->state = I40E_FILTER_REMOVE;
-                       vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED;
-                       vsi->back->flags |= I40E_FLAG_FILTER_SYNC;
+       return add;
+}
+
+/**
+ * i40e_del_mac_all_vlan - Remove a MAC filter from all VLANS
+ * @vsi: the VSI to be searched
+ * @macaddr: the mac address to be removed
+ *
+ * Removes a given MAC address from a VSI, regardless of VLAN
+ *
+ * Returns 0 for success, or error
+ **/
+int i40e_del_mac_all_vlan(struct i40e_vsi *vsi, const u8 *macaddr)
+{
+       struct i40e_mac_filter *f;
+       struct hlist_node *h;
+       bool found = false;
+       int bkt;
+
+       WARN(!spin_is_locked(&vsi->mac_filter_hash_lock),
+            "Missing mac_filter_hash_lock\n");
+       hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) {
+               if (ether_addr_equal(macaddr, f->macaddr)) {
+                       __i40e_del_filter(vsi, f);
+                       found = true;
                }
        }
+
+       if (found)
+               return 0;
+       else
+               return -ENOENT;
 }
 
 /**
@@ -1499,10 +1456,10 @@ static int i40e_set_mac(struct net_device *netdev, void *p)
        else
                netdev_info(netdev, "set new mac address %pM\n", addr->sa_data);
 
-       spin_lock_bh(&vsi->mac_filter_list_lock);
-       i40e_del_mac_all_vlan(vsi, netdev->dev_addr, false, true);
-       i40e_put_mac_in_vlan(vsi, addr->sa_data, false, true);
-       spin_unlock_bh(&vsi->mac_filter_list_lock);
+       spin_lock_bh(&vsi->mac_filter_hash_lock);
+       i40e_del_mac_all_vlan(vsi, netdev->dev_addr);
+       i40e_put_mac_in_vlan(vsi, addr->sa_data);
+       spin_unlock_bh(&vsi->mac_filter_hash_lock);
        ether_addr_copy(netdev->dev_addr, addr->sa_data);
        if (vsi->type == I40E_VSI_MAIN) {
                i40e_status ret;
@@ -1665,6 +1622,52 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi,
        ctxt->info.valid_sections |= cpu_to_le16(sections);
 }
 
+/**
+ * i40e_addr_sync - Callback for dev_(mc|uc)_sync to add address
+ * @netdev: the netdevice
+ * @addr: address to add
+ *
+ * Called by __dev_(mc|uc)_sync when an address needs to be added. We call
+ * __dev_(uc|mc)_sync from .set_rx_mode and guarantee to hold the hash lock.
+ */
+static int i40e_addr_sync(struct net_device *netdev, const u8 *addr)
+{
+       struct i40e_netdev_priv *np = netdev_priv(netdev);
+       struct i40e_vsi *vsi = np->vsi;
+       struct i40e_mac_filter *f;
+
+       if (i40e_is_vsi_in_vlan(vsi))
+               f = i40e_put_mac_in_vlan(vsi, addr);
+       else
+               f = i40e_add_filter(vsi, addr, I40E_VLAN_ANY);
+
+       if (f)
+               return 0;
+       else
+               return -ENOMEM;
+}
+
+/**
+ * i40e_addr_unsync - Callback for dev_(mc|uc)_sync to remove address
+ * @netdev: the netdevice
+ * @addr: address to add
+ *
+ * Called by __dev_(mc|uc)_sync when an address needs to be removed. We call
+ * __dev_(uc|mc)_sync from .set_rx_mode and guarantee to hold the hash lock.
+ */
+static int i40e_addr_unsync(struct net_device *netdev, const u8 *addr)
+{
+       struct i40e_netdev_priv *np = netdev_priv(netdev);
+       struct i40e_vsi *vsi = np->vsi;
+
+       if (i40e_is_vsi_in_vlan(vsi))
+               i40e_del_mac_all_vlan(vsi, addr);
+       else
+               i40e_del_filter(vsi, addr, I40E_VLAN_ANY);
+
+       return 0;
+}
+
 /**
  * i40e_set_rx_mode - NDO callback to set the netdev filters
  * @netdev: network interface device structure
@@ -1676,62 +1679,14 @@ static void i40e_set_rx_mode(struct net_device *netdev)
 #endif
 {
        struct i40e_netdev_priv *np = netdev_priv(netdev);
-       struct i40e_mac_filter *f, *ftmp;
        struct i40e_vsi *vsi = np->vsi;
-       struct netdev_hw_addr *uca;
-       struct netdev_hw_addr *mca;
-       struct netdev_hw_addr *ha;
-
-       spin_lock_bh(&vsi->mac_filter_list_lock);
-
-       /* add addr if not already in the filter list */
-       netdev_for_each_uc_addr(uca, netdev) {
-               if (!i40e_find_mac(vsi, uca->addr, false, true)) {
-                       if (i40e_is_vsi_in_vlan(vsi))
-                               i40e_put_mac_in_vlan(vsi, uca->addr,
-                                                    false, true);
-                       else
-                               i40e_add_filter(vsi, uca->addr, I40E_VLAN_ANY,
-                                               false, true);
-               }
-       }
 
-       netdev_for_each_mc_addr(mca, netdev) {
-               if (!i40e_find_mac(vsi, mca->addr, false, true)) {
-                       if (i40e_is_vsi_in_vlan(vsi))
-                               i40e_put_mac_in_vlan(vsi, mca->addr,
-                                                    false, true);
-                       else
-                               i40e_add_filter(vsi, mca->addr, I40E_VLAN_ANY,
-                                               false, true);
-               }
-       }
-
-       /* remove filter if not in netdev list */
-       list_for_each_entry_safe(f, ftmp, &vsi->mac_filter_list, list) {
-
-               if (!f->is_netdev)
-                       continue;
-
-               netdev_for_each_mc_addr(mca, netdev)
-                       if (ether_addr_equal(mca->addr, f->macaddr))
-                               goto bottom_of_search_loop;
+       spin_lock_bh(&vsi->mac_filter_hash_lock);
 
-               netdev_for_each_uc_addr(uca, netdev)
-                       if (ether_addr_equal(uca->addr, f->macaddr))
-                               goto bottom_of_search_loop;
+       __dev_uc_sync(netdev, i40e_addr_sync, i40e_addr_unsync);
+       __dev_mc_sync(netdev, i40e_addr_sync, i40e_addr_unsync);
 
-               for_each_dev_addr(netdev, ha)
-                       if (ether_addr_equal(ha->addr, f->macaddr))
-                               goto bottom_of_search_loop;
-
-               /* f->macaddr wasn't found in uc, mc, or ha list so delete it */
-               i40e_del_filter(vsi, f->macaddr, I40E_VLAN_ANY, false, true);
-
-bottom_of_search_loop:
-               continue;
-       }
-       spin_unlock_bh(&vsi->mac_filter_list_lock);
+       spin_unlock_bh(&vsi->mac_filter_hash_lock);
 
        /* check for other flag changes */
        if (vsi->current_netdev_flags != vsi->netdev->flags) {
@@ -1746,21 +1701,26 @@ bottom_of_search_loop:
 }
 
 /**
- * i40e_undo_del_filter_entries - Undo the changes made to MAC filter entries
- * @vsi: pointer to vsi struct
+ * i40e_undo_filter_entries - Undo the changes made to MAC filter entries
+ * @vsi: Pointer to VSI struct
  * @from: Pointer to list which contains MAC filter entries - changes to
  *        those entries needs to be undone.
  *
- * MAC filter entries from list were slated to be removed from device.
+ * MAC filter entries from list were slated to be sent to firmware, either for
+ * addition or deletion.
  **/
-static void i40e_undo_del_filter_entries(struct i40e_vsi *vsi,
-                                        struct list_head *from)
+static void i40e_undo_filter_entries(struct i40e_vsi *vsi,
+                                    struct hlist_head *from)
 {
-       struct i40e_mac_filter *f, *ftmp;
+       struct i40e_mac_filter *f;
+       struct hlist_node *h;
+
+       hlist_for_each_entry_safe(f, h, from, hlist) {
+               u64 key = i40e_addr_to_hkey(f->macaddr);
 
-       list_for_each_entry_safe(f, ftmp, from, list) {
                /* Move the element back into MAC filter list*/
-               list_move_tail(&f->list, &vsi->mac_filter_list);
+               hlist_del(&f->hlist);
+               hash_add(vsi->mac_filter_hash, &f->hlist, key);
        }
 }
 
@@ -1789,7 +1749,9 @@ i40e_update_filter_state(int count,
                /* Everything's good, mark all filters active. */
                for (i = 0; i < count ; i++) {
                        add_head->state = I40E_FILTER_ACTIVE;
-                       add_head = list_next_entry(add_head, list);
+                       add_head = hlist_entry(add_head->hlist.next,
+                                              typeof(struct i40e_mac_filter),
+                                              hlist);
                }
        } else if (aq_err == I40E_AQ_RC_ENOSPC) {
                /* Device ran out of filter space. Check the return value
@@ -1803,19 +1765,97 @@ i40e_update_filter_state(int count,
                                add_head->state = I40E_FILTER_ACTIVE;
                                retval++;
                        }
-                       add_head = list_next_entry(add_head, list);
+                       add_head = hlist_entry(add_head->hlist.next,
+                                              typeof(struct i40e_mac_filter),
+                                              hlist);
                }
        } else {
                /* Some other horrible thing happened, fail all filters */
                retval = 0;
                for (i = 0; i < count ; i++) {
                        add_head->state = I40E_FILTER_FAILED;
-                       add_head = list_next_entry(add_head, list);
+                       add_head = hlist_entry(add_head->hlist.next,
+                                              typeof(struct i40e_mac_filter),
+                                              hlist);
                }
        }
        return retval;
 }
 
+/**
+ * i40e_aqc_del_filters - Request firmware to delete a set of filters
+ * @vsi: ptr to the VSI
+ * @vsi_name: name to display in messages
+ * @list: the list of filters to send to firmware
+ * @num_del: the number of filters to delete
+ * @retval: Set to -EIO on failure to delete
+ *
+ * Send a request to firmware via AdminQ to delete a set of filters. Uses
+ * *retval instead of a return value so that success does not force ret_val to
+ * be set to 0. This ensures that a sequence of calls to this function
+ * preserve the previous value of *retval on successful delete.
+ */
+static
+void i40e_aqc_del_filters(struct i40e_vsi *vsi, const char *vsi_name,
+                         struct i40e_aqc_remove_macvlan_element_data *list,
+                         int num_del, int *retval)
+{
+       struct i40e_hw *hw = &vsi->back->hw;
+       i40e_status aq_ret;
+       int aq_err;
+
+       aq_ret = i40e_aq_remove_macvlan(hw, vsi->seid, list, num_del, NULL);
+       aq_err = hw->aq.asq_last_status;
+
+       /* Explicitly ignore and do not report when firmware returns ENOENT */
+       if (aq_ret && !(aq_err == I40E_AQ_RC_ENOENT)) {
+               *retval = -EIO;
+               dev_info(&vsi->back->pdev->dev,
+                        "ignoring delete macvlan error on %s, err %s, aq_err %s\n",
+                        vsi_name, i40e_stat_str(hw, aq_ret),
+                        i40e_aq_str(hw, aq_err));
+       }
+}
+
+/**
+ * i40e_aqc_add_filters - Request firmware to add a set of filters
+ * @vsi: ptr to the VSI
+ * @vsi_name: name to display in messages
+ * @list: the list of filters to send to firmware
+ * @add_head: Position in the add hlist
+ * @num_add: the number of filters to add
+ * @promisc_change: set to true on exit if promiscuous mode was forced on
+ *
+ * Send a request to firmware via AdminQ to add a chunk of filters. Will set
+ * promisc_changed to true if the firmware has run out of space for more
+ * filters.
+ */
+static
+void i40e_aqc_add_filters(struct i40e_vsi *vsi, const char *vsi_name,
+                         struct i40e_aqc_add_macvlan_element_data *list,
+                         struct i40e_mac_filter *add_head,
+                         int num_add, bool *promisc_changed)
+{
+       struct i40e_hw *hw = &vsi->back->hw;
+       i40e_status aq_ret;
+       int aq_err, fcnt;
+
+       aq_ret = i40e_aq_add_macvlan(hw, vsi->seid, list, num_add, NULL);
+       aq_err = hw->aq.asq_last_status;
+       fcnt = i40e_update_filter_state(num_add, list, add_head, aq_ret);
+       vsi->active_filters += fcnt;
+
+       if (fcnt != num_add) {
+               *promisc_changed = true;
+               set_bit(__I40E_FILTER_OVERFLOW_PROMISC, &vsi->state);
+               vsi->promisc_threshold = (vsi->active_filters * 3) / 4;
+               dev_warn(&vsi->back->pdev->dev,
+                        "Error %s adding RX filters on %s, promiscuous mode forced on\n",
+                        i40e_aq_str(hw, aq_err),
+                        vsi_name);
+       }
+}
+
 /**
  * i40e_sync_vsi_filters - Update the VSI filter list to the HW
  * @vsi: ptr to the VSI
@@ -1826,22 +1866,25 @@ i40e_update_filter_state(int count,
  **/
 int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
 {
-       struct i40e_mac_filter *f, *ftmp, *add_head = NULL;
-       struct list_head tmp_add_list, tmp_del_list;
+       struct hlist_head tmp_add_list, tmp_del_list;
+       struct i40e_mac_filter *f, *add_head = NULL;
        struct i40e_hw *hw = &vsi->back->hw;
+       unsigned int vlan_any_filters = 0;
+       unsigned int non_vlan_filters = 0;
+       unsigned int vlan_filters = 0;
        bool promisc_changed = false;
        char vsi_name[16] = "PF";
        int filter_list_len = 0;
-       u32 changed_flags = 0;
        i40e_status aq_ret = 0;
-       int retval = 0;
+       u32 changed_flags = 0;
+       struct hlist_node *h;
        struct i40e_pf *pf;
        int num_add = 0;
        int num_del = 0;
-       int aq_err = 0;
+       int retval = 0;
        u16 cmd_flags;
        int list_size;
-       int fcnt;
+       int bkt;
 
        /* empty array typed pointers, kcalloc later */
        struct i40e_aqc_add_macvlan_element_data *add_list;
@@ -1856,8 +1899,8 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
                vsi->current_netdev_flags = vsi->netdev->flags;
        }
 
-       INIT_LIST_HEAD(&tmp_add_list);
-       INIT_LIST_HEAD(&tmp_del_list);
+       INIT_HLIST_HEAD(&tmp_add_list);
+       INIT_HLIST_HEAD(&tmp_del_list);
 
        if (vsi->type == I40E_VSI_SRIOV)
                snprintf(vsi_name, sizeof(vsi_name) - 1, "VF %d", vsi->vf_id);
@@ -1867,41 +1910,98 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
        if (vsi->flags & I40E_VSI_FLAG_FILTER_CHANGED) {
                vsi->flags &= ~I40E_VSI_FLAG_FILTER_CHANGED;
 
-               spin_lock_bh(&vsi->mac_filter_list_lock);
+               spin_lock_bh(&vsi->mac_filter_hash_lock);
                /* Create a list of filters to delete. */
-               list_for_each_entry_safe(f, ftmp, &vsi->mac_filter_list, list) {
+               hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) {
                        if (f->state == I40E_FILTER_REMOVE) {
-                               WARN_ON(f->counter != 0);
                                /* Move the element into temporary del_list */
-                               list_move_tail(&f->list, &tmp_del_list);
+                               hash_del(&f->hlist);
+                               hlist_add_head(&f->hlist, &tmp_del_list);
                                vsi->active_filters--;
+
+                               /* Avoid counting removed filters */
+                               continue;
                        }
                        if (f->state == I40E_FILTER_NEW) {
-                               WARN_ON(f->counter == 0);
-                               /* Move the element into temporary add_list */
-                               list_move_tail(&f->list, &tmp_add_list);
+                               hash_del(&f->hlist);
+                               hlist_add_head(&f->hlist, &tmp_add_list);
                        }
+
+                       /* Count the number of each type of filter we have
+                        * remaining, ignoring any filters we're about to
+                        * delete.
+                        */
+                       if (f->vlan > 0)
+                               vlan_filters++;
+                       else if (!f->vlan)
+                               non_vlan_filters++;
+                       else
+                               vlan_any_filters++;
                }
-               spin_unlock_bh(&vsi->mac_filter_list_lock);
+
+               /* We should never have VLAN=-1 filters at the same time as we
+                * have either VLAN=0 or VLAN>0 filters, so warn about this
+                * case here to help catch any issues.
+                */
+               WARN_ON(vlan_any_filters && (vlan_filters + non_vlan_filters));
+
+               /* If we only have VLAN=0 filters remaining, and don't have
+                * any other VLAN filters, we need to convert these VLAN=0
+                * filters into VLAN=-1 (I40E_VLAN_ANY) so that we operate
+                * correctly in non-VLAN mode and receive all traffic tagged
+                * or untagged.
+                */
+               if (non_vlan_filters && !vlan_filters) {
+                       hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f,
+                                          hlist) {
+                               /* Only replace VLAN=0 filters */
+                               if (f->vlan)
+                                       continue;
+
+                               /* Allocate a replacement element */
+                               add_head = kzalloc(sizeof(*add_head),
+                                                  GFP_KERNEL);
+                               if (!add_head)
+                                       goto err_no_memory_locked;
+
+                               /* Copy the filter, with new state and VLAN */
+                               *add_head = *f;
+                               add_head->state = I40E_FILTER_NEW;
+                               add_head->vlan = I40E_VLAN_ANY;
+
+                               /* Move the replacement to the add list */
+                               INIT_HLIST_NODE(&add_head->hlist);
+                               hlist_add_head(&add_head->hlist,
+                                              &tmp_add_list);
+
+                               /* Move the original to the delete list */
+                               f->state = I40E_FILTER_REMOVE;
+                               hash_del(&f->hlist);
+                               hlist_add_head(&f->hlist, &tmp_del_list);
+                               vsi->active_filters--;
+                       }
+
+                       /* Also update any filters on the tmp_add list */
+                       hlist_for_each_entry(f, &tmp_add_list, hlist) {
+                               if (!f->vlan)
+                                       f->vlan = I40E_VLAN_ANY;
+                       }
+                       add_head = NULL;
+               }
+               spin_unlock_bh(&vsi->mac_filter_hash_lock);
        }
 
        /* Now process 'del_list' outside the lock */
-       if (!list_empty(&tmp_del_list)) {
+       if (!hlist_empty(&tmp_del_list)) {
                filter_list_len = hw->aq.asq_buf_size /
                            sizeof(struct i40e_aqc_remove_macvlan_element_data);
                list_size = filter_list_len *
                            sizeof(struct i40e_aqc_remove_macvlan_element_data);
                del_list = kzalloc(list_size, GFP_ATOMIC);
-               if (!del_list) {
-                       /* Undo VSI's MAC filter entry element updates */
-                       spin_lock_bh(&vsi->mac_filter_list_lock);
-                       i40e_undo_del_filter_entries(vsi, &tmp_del_list);
-                       spin_unlock_bh(&vsi->mac_filter_list_lock);
-                       retval = -ENOMEM;
-                       goto out;
-               }
+               if (!del_list)
+                       goto err_no_memory;
 
-               list_for_each_entry_safe(f, ftmp, &tmp_del_list, list) {
+               hlist_for_each_entry_safe(f, h, &tmp_del_list, hlist) {
                        cmd_flags = 0;
 
                        /* add to delete list */
@@ -1920,68 +2020,47 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
 
                        /* flush a full buffer */
                        if (num_del == filter_list_len) {
-                               aq_ret = i40e_aq_remove_macvlan(hw, vsi->seid,
-                                                               del_list,
-                                                               num_del, NULL);
-                               aq_err = hw->aq.asq_last_status;
-                               num_del = 0;
+                               i40e_aqc_del_filters(vsi, vsi_name, del_list,
+                                                    num_del, &retval);
                                memset(del_list, 0, list_size);
-
-                               /* Explicitly ignore and do not report when
-                                * firmware returns ENOENT.
-                                */
-                               if (aq_ret && !(aq_err == I40E_AQ_RC_ENOENT)) {
-                                       retval = -EIO;
-                                       dev_info(&pf->pdev->dev,
-                                                "ignoring delete macvlan error on %s, err %s, aq_err %s\n",
-                                                vsi_name,
-                                                i40e_stat_str(hw, aq_ret),
-                                                i40e_aq_str(hw, aq_err));
-                               }
+                               num_del = 0;
                        }
                        /* Release memory for MAC filter entries which were
                         * synced up with HW.
                         */
-                       list_del(&f->list);
+                       hlist_del(&f->hlist);
                        kfree(f);
                }
 
                if (num_del) {
-                       aq_ret = i40e_aq_remove_macvlan(hw, vsi->seid, del_list,
-                                                       num_del, NULL);
-                       aq_err = hw->aq.asq_last_status;
-                       num_del = 0;
-
-                       /* Explicitly ignore and do not report when firmware
-                        * returns ENOENT.
-                        */
-                       if (aq_ret && !(aq_err == I40E_AQ_RC_ENOENT)) {
-                               retval = -EIO;
-                               dev_info(&pf->pdev->dev,
-                                        "ignoring delete macvlan error on %s, err %s aq_err %s\n",
-                                        vsi_name,
-                                        i40e_stat_str(hw, aq_ret),
-                                        i40e_aq_str(hw, aq_err));
-                       }
+                       i40e_aqc_del_filters(vsi, vsi_name, del_list,
+                                            num_del, &retval);
                }
 
                kfree(del_list);
                del_list = NULL;
        }
 
-       if (!list_empty(&tmp_add_list)) {
+       /* After finishing notifying firmware of the deleted filters, update
+        * the cached value of vsi->has_vlan_filter. Note that we are safe to
+        * use just !!vlan_filters here because if we only have VLAN=0 (that
+        * is, non_vlan_filters) these will all be converted to VLAN=-1 in the
+        * logic above already so this value would still be correct.
+        */
+       vsi->has_vlan_filter = !!vlan_filters;
+
+       if (!hlist_empty(&tmp_add_list)) {
                /* Do all the adds now. */
                filter_list_len = hw->aq.asq_buf_size /
                               sizeof(struct i40e_aqc_add_macvlan_element_data);
                list_size = filter_list_len *
                               sizeof(struct i40e_aqc_add_macvlan_element_data);
                add_list = kzalloc(list_size, GFP_ATOMIC);
-               if (!add_list) {
-                       retval = -ENOMEM;
-                       goto out;
-               }
+               if (!add_list)
+                       goto err_no_memory;
+
                num_add = 0;
-               list_for_each_entry(f, &tmp_add_list, list) {
+               hlist_for_each_entry(f, &tmp_add_list, hlist) {
                        if (test_bit(__I40E_FILTER_OVERFLOW_PROMISC,
                                     &vsi->state)) {
                                f->state = I40E_FILTER_FAILED;
@@ -2006,57 +2085,28 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
 
                        /* flush a full buffer */
                        if (num_add == filter_list_len) {
-                               aq_ret = i40e_aq_add_macvlan(hw, vsi->seid,
-                                                            add_list, num_add,
-                                                            NULL);
-                               aq_err = hw->aq.asq_last_status;
-                               fcnt = i40e_update_filter_state(num_add,
-                                                               add_list,
-                                                               add_head,
-                                                               aq_ret);
-                               vsi->active_filters += fcnt;
-
-                               if (fcnt != num_add) {
-                                       promisc_changed = true;
-                                       set_bit(__I40E_FILTER_OVERFLOW_PROMISC,
-                                               &vsi->state);
-                                       vsi->promisc_threshold =
-                                               (vsi->active_filters * 3) / 4;
-                                       dev_warn(&pf->pdev->dev,
-                                                "Error %s adding RX filters on %s, promiscuous mode forced on\n",
-                                                i40e_aq_str(hw, aq_err),
-                                                vsi_name);
-                               }
+                               i40e_aqc_add_filters(vsi, vsi_name, add_list,
+                                                    add_head, num_add,
+                                                    &promisc_changed);
                                memset(add_list, 0, list_size);
                                num_add = 0;
                        }
                }
                if (num_add) {
-                       aq_ret = i40e_aq_add_macvlan(hw, vsi->seid,
-                                                    add_list, num_add, NULL);
-                       aq_err = hw->aq.asq_last_status;
-                       fcnt = i40e_update_filter_state(num_add, add_list,
-                                                       add_head, aq_ret);
-                       vsi->active_filters += fcnt;
-                       if (fcnt != num_add) {
-                               promisc_changed = true;
-                               set_bit(__I40E_FILTER_OVERFLOW_PROMISC,
-                                       &vsi->state);
-                               vsi->promisc_threshold =
-                                               (vsi->active_filters * 3) / 4;
-                               dev_warn(&pf->pdev->dev,
-                                        "Error %s adding RX filters on %s, promiscuous mode forced on\n",
-                                        i40e_aq_str(hw, aq_err), vsi_name);
-                       }
+                       i40e_aqc_add_filters(vsi, vsi_name, add_list, add_head,
+                                            num_add, &promisc_changed);
                }
                /* Now move all of the filters from the temp add list back to
                 * the VSI's list.
                 */
-               spin_lock_bh(&vsi->mac_filter_list_lock);
-               list_for_each_entry_safe(f, ftmp, &tmp_add_list, list) {
-                       list_move_tail(&f->list, &vsi->mac_filter_list);
+               spin_lock_bh(&vsi->mac_filter_hash_lock);
+               hlist_for_each_entry_safe(f, h, &tmp_add_list, hlist) {
+                       u64 key = i40e_addr_to_hkey(f->macaddr);
+
+                       hlist_del(&f->hlist);
+                       hash_add(vsi->mac_filter_hash, &f->hlist, key);
                }
-               spin_unlock_bh(&vsi->mac_filter_list_lock);
+               spin_unlock_bh(&vsi->mac_filter_hash_lock);
                kfree(add_list);
                add_list = NULL;
        }
@@ -2068,12 +2118,12 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
                /* See if we have any failed filters. We can't drop out of
                 * promiscuous until these have all been deleted.
                 */
-               spin_lock_bh(&vsi->mac_filter_list_lock);
-               list_for_each_entry(f, &vsi->mac_filter_list, list) {
+               spin_lock_bh(&vsi->mac_filter_hash_lock);
+               hash_for_each(vsi->mac_filter_hash, bkt, f, hlist) {
                        if (f->state == I40E_FILTER_FAILED)
                                failed_count++;
                }
-               spin_unlock_bh(&vsi->mac_filter_list_lock);
+               spin_unlock_bh(&vsi->mac_filter_hash_lock);
                if (!failed_count) {
                        dev_info(&pf->pdev->dev,
                                 "filter logjam cleared on %s, leaving overflow promiscuous mode\n",
@@ -2201,6 +2251,18 @@ out:
 
        clear_bit(__I40E_CONFIG_BUSY, &vsi->state);
        return retval;
+
+err_no_memory:
+       /* Restore elements on the temporary add and delete lists */
+       spin_lock_bh(&vsi->mac_filter_hash_lock);
+err_no_memory_locked:
+       i40e_undo_filter_entries(vsi, &tmp_del_list);
+       i40e_undo_filter_entries(vsi, &tmp_add_list);
+       spin_unlock_bh(&vsi->mac_filter_hash_lock);
+
+       vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED;
+       clear_bit(__I40E_CONFIG_BUSY, &vsi->state);
+       return -ENOMEM;
 }
 
 /**
@@ -2355,34 +2417,33 @@ static void i40e_vlan_rx_register(struct net_device *netdev, u32 features)
  **/
 int i40e_vsi_add_vlan(struct i40e_vsi *vsi, s16 vid)
 {
-       struct i40e_mac_filter *f, *ftmp, *add_f;
-       bool is_netdev, is_vf;
-
-       is_vf = (vsi->type == I40E_VSI_SRIOV);
-       is_netdev = !!(vsi->netdev);
+       struct i40e_mac_filter *f, *add_f, *del_f;
+       struct hlist_node *h;
+       int bkt;
 
        /* Locked once because all functions invoked below iterates list*/
-       spin_lock_bh(&vsi->mac_filter_list_lock);
+       spin_lock_bh(&vsi->mac_filter_hash_lock);
 
-       if (is_netdev) {
-               add_f = i40e_add_filter(vsi, vsi->netdev->dev_addr, vid,
-                                       is_vf, is_netdev);
+       if (vsi->netdev) {
+               add_f = i40e_add_filter(vsi, vsi->netdev->dev_addr, vid);
                if (!add_f) {
                        dev_info(&vsi->back->pdev->dev,
                                 "Could not add vlan filter %d for %pM\n",
                                 vid, vsi->netdev->dev_addr);
-                       spin_unlock_bh(&vsi->mac_filter_list_lock);
+                       spin_unlock_bh(&vsi->mac_filter_hash_lock);
                        return -ENOMEM;
                }
        }
 
-       list_for_each_entry_safe(f, ftmp, &vsi->mac_filter_list, list) {
-               add_f = i40e_add_filter(vsi, f->macaddr, vid, is_vf, is_netdev);
+       hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) {
+               if (f->state == I40E_FILTER_REMOVE)
+                       continue;
+               add_f = i40e_add_filter(vsi, f->macaddr, vid);
                if (!add_f) {
                        dev_info(&vsi->back->pdev->dev,
                                 "Could not add vlan filter %d for %pM\n",
                                 vid, f->macaddr);
-                       spin_unlock_bh(&vsi->mac_filter_list_lock);
+                       spin_unlock_bh(&vsi->mac_filter_hash_lock);
                        return -ENOMEM;
                }
        }
@@ -2392,19 +2453,17 @@ int i40e_vsi_add_vlan(struct i40e_vsi *vsi, s16 vid)
         * with 0, so we now accept untagged and specified tagged traffic
         * (and not all tags along with untagged)
         */
-       if (vid > 0) {
-               if (is_netdev && i40e_find_filter(vsi, vsi->netdev->dev_addr,
-                                                 I40E_VLAN_ANY,
-                                                 is_vf, is_netdev)) {
-                       i40e_del_filter(vsi, vsi->netdev->dev_addr,
-                                       I40E_VLAN_ANY, is_vf, is_netdev);
-                       add_f = i40e_add_filter(vsi, vsi->netdev->dev_addr, 0,
-                                               is_vf, is_netdev);
+       if (vid > 0 && vsi->netdev) {
+               del_f = i40e_find_filter(vsi, vsi->netdev->dev_addr,
+                                        I40E_VLAN_ANY);
+               if (del_f) {
+                       __i40e_del_filter(vsi, del_f);
+                       add_f = i40e_add_filter(vsi, vsi->netdev->dev_addr, 0);
                        if (!add_f) {
                                dev_info(&vsi->back->pdev->dev,
                                         "Could not add filter 0 for %pM\n",
                                         vsi->netdev->dev_addr);
-                               spin_unlock_bh(&vsi->mac_filter_list_lock);
+                               spin_unlock_bh(&vsi->mac_filter_hash_lock);
                                return -ENOMEM;
                        }
                }
@@ -2412,25 +2471,26 @@ int i40e_vsi_add_vlan(struct i40e_vsi *vsi, s16 vid)
 
        /* Do not assume that I40E_VLAN_ANY should be reset to VLAN 0 */
        if (vid > 0 && !vsi->info.pvid) {
-               list_for_each_entry_safe(f, ftmp, &vsi->mac_filter_list, list) {
-                       if (!i40e_find_filter(vsi, f->macaddr, I40E_VLAN_ANY,
-                                             is_vf, is_netdev))
+               hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) {
+                       if (f->state == I40E_FILTER_REMOVE)
+                               continue;
+                       del_f = i40e_find_filter(vsi, f->macaddr,
+                                                I40E_VLAN_ANY);
+                       if (!del_f)
                                continue;
-                       i40e_del_filter(vsi, f->macaddr, I40E_VLAN_ANY,
-                                       is_vf, is_netdev);
-                       add_f = i40e_add_filter(vsi, f->macaddr,
-                                               0, is_vf, is_netdev);
+                       __i40e_del_filter(vsi, del_f);
+                       add_f = i40e_add_filter(vsi, f->macaddr, 0);
                        if (!add_f) {
                                dev_info(&vsi->back->pdev->dev,
                                         "Could not add filter 0 for %pM\n",
                                        f->macaddr);
-                               spin_unlock_bh(&vsi->mac_filter_list_lock);
+                               spin_unlock_bh(&vsi->mac_filter_hash_lock);
                                return -ENOMEM;
                        }
                }
        }
 
-       spin_unlock_bh(&vsi->mac_filter_list_lock);
+       spin_unlock_bh(&vsi->mac_filter_hash_lock);
 
        /* schedule our worker thread which will take care of
         * applying the new filter changes
@@ -2443,79 +2503,31 @@ int i40e_vsi_add_vlan(struct i40e_vsi *vsi, s16 vid)
  * i40e_vsi_kill_vlan - Remove vsi membership for given vlan
  * @vsi: the vsi being configured
  * @vid: vlan id to be removed (0 = untagged only , -1 = any)
- *
- * Return: 0 on success or negative otherwise
  **/
-int i40e_vsi_kill_vlan(struct i40e_vsi *vsi, s16 vid)
+void i40e_vsi_kill_vlan(struct i40e_vsi *vsi, s16 vid)
 {
        struct net_device *netdev = vsi->netdev;
-       struct i40e_mac_filter *f, *ftmp, *add_f;
-       bool is_vf, is_netdev;
-       int filter_count = 0;
-
-       is_vf = (vsi->type == I40E_VSI_SRIOV);
-       is_netdev = !!(netdev);
+       struct i40e_mac_filter *f;
+       struct hlist_node *h;
+       int bkt;
 
        /* Locked once because all functions invoked below iterates list */
-       spin_lock_bh(&vsi->mac_filter_list_lock);
-
-       if (is_netdev)
-               i40e_del_filter(vsi, netdev->dev_addr, vid, is_vf, is_netdev);
-
-       list_for_each_entry_safe(f, ftmp, &vsi->mac_filter_list, list)
-               i40e_del_filter(vsi, f->macaddr, vid, is_vf, is_netdev);
-
-       /* go through all the filters for this VSI and if there is only
-        * vid == 0 it means there are no other filters, so vid 0 must
-        * be replaced with -1. This signifies that we should from now
-        * on accept any traffic (with any tag present, or untagged)
-        */
-       list_for_each_entry(f, &vsi->mac_filter_list, list) {
-               if (is_netdev) {
-                       if (f->vlan &&
-                           ether_addr_equal(netdev->dev_addr, f->macaddr))
-                               filter_count++;
-               }
+       spin_lock_bh(&vsi->mac_filter_hash_lock);
 
-               if (f->vlan)
-                       filter_count++;
-       }
+       if (vsi->netdev)
+               i40e_del_filter(vsi, netdev->dev_addr, vid);
 
-       if (!filter_count && is_netdev) {
-               i40e_del_filter(vsi, netdev->dev_addr, 0, is_vf, is_netdev);
-               f = i40e_add_filter(vsi, netdev->dev_addr, I40E_VLAN_ANY,
-                                   is_vf, is_netdev);
-               if (!f) {
-                       dev_info(&vsi->back->pdev->dev,
-                                "Could not add filter %d for %pM\n",
-                                I40E_VLAN_ANY, netdev->dev_addr);
-                       spin_unlock_bh(&vsi->mac_filter_list_lock);
-                       return -ENOMEM;
-               }
+       hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) {
+               if (f->vlan == vid)
+                       __i40e_del_filter(vsi, f);
        }
 
-       if (!filter_count) {
-               list_for_each_entry_safe(f, ftmp, &vsi->mac_filter_list, list) {
-                       i40e_del_filter(vsi, f->macaddr, 0, is_vf, is_netdev);
-                       add_f = i40e_add_filter(vsi, f->macaddr, I40E_VLAN_ANY,
-                                               is_vf, is_netdev);
-                       if (!add_f) {
-                               dev_info(&vsi->back->pdev->dev,
-                                        "Could not add filter %d for %pM\n",
-                                        I40E_VLAN_ANY, f->macaddr);
-                               spin_unlock_bh(&vsi->mac_filter_list_lock);
-                               return -ENOMEM;
-                       }
-               }
-       }
-
-       spin_unlock_bh(&vsi->mac_filter_list_lock);
+       spin_unlock_bh(&vsi->mac_filter_hash_lock);
 
        /* schedule our worker thread which will take care of
         * applying the new filter changes
         */
        i40e_service_event_schedule(vsi->back);
-       return 0;
 }
 
 /**
@@ -3316,6 +3328,33 @@ static irqreturn_t i40e_msix_clean_rings(int irq, void *data)
        return IRQ_HANDLED;
 }
 
+/**
+ * i40e_irq_affinity_notify - Callback for affinity changes
+ * @notify: context as to what irq was changed
+ * @mask: the new affinity mask
+ *
+ * This is a callback function used by the irq_set_affinity_notifier function
+ * so that we may register to receive changes to the irq affinity masks.
+ **/
+static void i40e_irq_affinity_notify(struct irq_affinity_notify *notify,
+                                    const cpumask_t *mask)
+{
+       struct i40e_q_vector *q_vector =
+               container_of(notify, struct i40e_q_vector, affinity_notify);
+
+       q_vector->affinity_mask = *mask;
+}
+
+/**
+ * i40e_irq_affinity_release - Callback for affinity notifier release
+ * @ref: internal core kernel usage
+ *
+ * This is a callback function used by the irq_set_affinity_notifier function
+ * to inform the current notification subscriber that they will no longer
+ * receive notifications.
+ **/
+static void i40e_irq_affinity_release(struct kref *ref) {}
+
 /**
  * i40e_vsi_request_irq_msix - Initialize MSI-X interrupts
  * @vsi: the VSI being configured
@@ -3331,10 +3370,13 @@ static int i40e_vsi_request_irq_msix(struct i40e_vsi *vsi, char *basename)
        int rx_int_idx = 0;
        int tx_int_idx = 0;
        int vector, err;
+       int irq_num;
 
        for (vector = 0; vector < q_vectors; vector++) {
                struct i40e_q_vector *q_vector = vsi->q_vectors[vector];
 
+               irq_num = pf->msix_entries[base + vector].vector;
+
                if (q_vector->tx.ring && q_vector->rx.ring) {
                        snprintf(q_vector->name, sizeof(q_vector->name) - 1,
                                 "%s-%s-%d", basename, "TxRx", rx_int_idx++);
@@ -3349,7 +3391,7 @@ static int i40e_vsi_request_irq_msix(struct i40e_vsi *vsi, char *basename)
                        /* skip this unused q_vector */
                        continue;
                }
-               err = request_irq(pf->msix_entries[base + vector].vector,
+               err = request_irq(irq_num,
                                  vsi->irq_handler,
                                  0,
                                  q_vector->name,
@@ -3359,9 +3401,13 @@ static int i40e_vsi_request_irq_msix(struct i40e_vsi *vsi, char *basename)
                                 "MSIX request_irq failed, error: %d\n", err);
                        goto free_queue_irqs;
                }
+
+               /* register for affinity change notifications */
+               q_vector->affinity_notify.notify = i40e_irq_affinity_notify;
+               q_vector->affinity_notify.release = i40e_irq_affinity_release;
+               irq_set_affinity_notifier(irq_num, &q_vector->affinity_notify);
                /* assign the mask for this irq */
-               irq_set_affinity_hint(pf->msix_entries[base + vector].vector,
-                                     &q_vector->affinity_mask);
+               irq_set_affinity_hint(irq_num, &q_vector->affinity_mask);
        }
 
        vsi->irqs_ready = true;
@@ -3370,10 +3416,10 @@ static int i40e_vsi_request_irq_msix(struct i40e_vsi *vsi, char *basename)
 free_queue_irqs:
        while (vector) {
                vector--;
-               irq_set_affinity_hint(pf->msix_entries[base + vector].vector,
-                                     NULL);
-               free_irq(pf->msix_entries[base + vector].vector,
-                        &(vsi->q_vectors[vector]));
+               irq_num = pf->msix_entries[base + vector].vector;
+               irq_set_affinity_notifier(irq_num, NULL);
+               irq_set_affinity_hint(irq_num, NULL);
+               free_irq(irq_num, &vsi->q_vectors[vector]);
        }
        return err;
 }
@@ -3968,29 +4014,35 @@ static int i40e_vsi_control_rx(struct i40e_vsi *vsi, bool enable)
 }
 
 /**
- * i40e_vsi_control_rings - Start or stop a VSI's rings
+ * i40e_vsi_start_rings - Start a VSI's rings
  * @vsi: the VSI being configured
- * @enable: start or stop the rings
  **/
-int i40e_vsi_control_rings(struct i40e_vsi *vsi, bool request)
+int i40e_vsi_start_rings(struct i40e_vsi *vsi)
 {
        int ret = 0;
 
        /* do rx first for enable and last for disable */
-       if (request) {
-               ret = i40e_vsi_control_rx(vsi, request);
-               if (ret)
-                       return ret;
-               ret = i40e_vsi_control_tx(vsi, request);
-       } else {
-               /* Ignore return value, we need to shutdown whatever we can */
-               i40e_vsi_control_tx(vsi, request);
-               i40e_vsi_control_rx(vsi, request);
-       }
+       ret = i40e_vsi_control_rx(vsi, true);
+       if (ret)
+               return ret;
+       ret = i40e_vsi_control_tx(vsi, true);
 
        return ret;
 }
 
+/**
+ * i40e_vsi_stop_rings - Stop a VSI's rings
+ * @vsi: the VSI being configured
+ **/
+void i40e_vsi_stop_rings(struct i40e_vsi *vsi)
+{
+       /* do rx first for enable and last for disable
+        * Ignore return value, we need to shutdown whatever we can
+        */
+       i40e_vsi_control_tx(vsi, false);
+       i40e_vsi_control_rx(vsi, false);
+}
+
 /**
  * i40e_vsi_free_irq - Free the irq association with the OS
  * @vsi: the VSI being configured
@@ -4012,19 +4064,23 @@ static void i40e_vsi_free_irq(struct i40e_vsi *vsi)
 
                vsi->irqs_ready = false;
                for (i = 0; i < vsi->num_q_vectors; i++) {
-                       u16 vector = i + base;
+                       int irq_num;
+                       u16 vector;
+
+                       vector = i + base;
+                       irq_num = pf->msix_entries[vector].vector;
 
                        /* free only the irqs that were actually requested */
                        if (!vsi->q_vectors[i] ||
                            !vsi->q_vectors[i]->num_ringpairs)
                                continue;
 
+                       /* clear the affinity notifier in the IRQ descriptor */
+                       irq_set_affinity_notifier(irq_num, NULL);
                        /* clear the affinity_mask in the IRQ descriptor */
-                       irq_set_affinity_hint(pf->msix_entries[vector].vector,
-                                             NULL);
-                       synchronize_irq(pf->msix_entries[vector].vector);
-                       free_irq(pf->msix_entries[vector].vector,
-                                vsi->q_vectors[i]);
+                       irq_set_affinity_hint(irq_num, NULL);
+                       synchronize_irq(irq_num);
+                       free_irq(irq_num, vsi->q_vectors[i]);
 
                        /* Tear down the interrupt queue link list
                         *
@@ -4635,29 +4691,6 @@ static u8 i40e_pf_get_num_tc(struct i40e_pf *pf)
        return num_tc;
 }
 
-/**
- * i40e_pf_get_default_tc - Get bitmap for first enabled TC
- * @pf: PF being queried
- *
- * Return a bitmap for first enabled traffic class for this PF.
- **/
-static u8 i40e_pf_get_default_tc(struct i40e_pf *pf)
-{
-       u8 enabled_tc = pf->hw.func_caps.enabled_tcmap;
-       u8 i = 0;
-
-       if (!enabled_tc)
-               return 0x1; /* TC0 */
-
-       /* Find the first enabled TC */
-       for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
-               if (enabled_tc & BIT(i))
-                       break;
-       }
-
-       return BIT(i);
-}
-
 /**
  * i40e_pf_get_pf_tc_map - Get bitmap for enabled traffic classes
  * @pf: PF being queried
@@ -4668,7 +4701,7 @@ static u8 i40e_pf_get_tc_map(struct i40e_pf *pf)
 {
        /* If DCB is not enabled for this PF then just return default TC */
        if (!(pf->flags & I40E_FLAG_DCB_ENABLED))
-               return i40e_pf_get_default_tc(pf);
+               return I40E_DEFAULT_TRAFFIC_CLASS;
 
        /* SFP mode we want PF to be enabled for all TCs */
        if (!(pf->flags & I40E_FLAG_MFP_ENABLED))
@@ -4678,7 +4711,7 @@ static u8 i40e_pf_get_tc_map(struct i40e_pf *pf)
        if (pf->hw.func_caps.iscsi)
                return i40e_get_iscsi_tc_map(pf);
        else
-               return i40e_pf_get_default_tc(pf);
+               return I40E_DEFAULT_TRAFFIC_CLASS;
 }
 
 /**
@@ -5024,7 +5057,7 @@ static void i40e_dcb_reconfigure(struct i40e_pf *pf)
                if (v == pf->lan_vsi)
                        tc_map = i40e_pf_get_tc_map(pf);
                else
-                       tc_map = i40e_pf_get_default_tc(pf);
+                       tc_map = I40E_DEFAULT_TRAFFIC_CLASS;
 #ifdef I40E_FCOE
                if (pf->vsi[v]->type == I40E_VSI_FCOE)
                        tc_map = i40e_get_fcoe_tc_map(pf);
@@ -5208,7 +5241,7 @@ static int i40e_up_complete(struct i40e_vsi *vsi)
                i40e_configure_msi_and_legacy(vsi);
 
        /* start rings */
-       err = i40e_vsi_control_rings(vsi, true);
+       err = i40e_vsi_start_rings(vsi);
        if (err)
                return err;
 
@@ -5305,7 +5338,7 @@ void i40e_down(struct i40e_vsi *vsi)
                netif_tx_disable(vsi->netdev);
        }
        i40e_vsi_disable_irq(vsi);
-       i40e_vsi_control_rings(vsi, false);
+       i40e_vsi_stop_rings(vsi);
        i40e_napi_disable_all(vsi);
 
        for (i = 0; i < vsi->num_queue_pairs; i++) {
@@ -5712,7 +5745,7 @@ static int i40e_handle_lldp_event(struct i40e_pf *pf,
        u8 type;
 
        /* Not DCB capable or capability disabled */
-       if (!(pf->flags & I40E_FLAG_DCB_ENABLED))
+       if (!(pf->flags & I40E_FLAG_DCB_CAPABLE))
                return ret;
 
        /* Ignore if event is not for Nearest Bridge */
@@ -6688,7 +6721,6 @@ static int i40e_vsi_clear(struct i40e_vsi *vsi);
 static void i40e_fdir_sb_setup(struct i40e_pf *pf)
 {
        struct i40e_vsi *vsi;
-       int i;
 
        /* quick workaround for an NVM issue that leaves a critical register
         * uninitialized
@@ -6699,6 +6731,7 @@ static void i40e_fdir_sb_setup(struct i40e_pf *pf)
                        0xeacb7d61, 0xaa4f05b6, 0x9c5c89ed, 0xfc425ddb,
                        0xa4654832, 0xfc7461d4, 0x8f827619, 0xf5c63c21,
                        0x95b3a76d};
+               int i;
 
                for (i = 0; i <= I40E_GLQF_HKEY_MAX_INDEX; i++)
                        wr32(&pf->hw, I40E_GLQF_HKEY(i), hkey[i]);
@@ -6708,13 +6741,7 @@ static void i40e_fdir_sb_setup(struct i40e_pf *pf)
                return;
 
        /* find existing VSI and see if it needs configuring */
-       vsi = NULL;
-       for (i = 0; i < pf->num_alloc_vsi; i++) {
-               if (pf->vsi[i] && pf->vsi[i]->type == I40E_VSI_FDIR) {
-                       vsi = pf->vsi[i];
-                       break;
-               }
-       }
+       vsi = i40e_find_vsi_by_type(pf, I40E_VSI_FDIR);
 
        /* create a new VSI if none exists */
        if (!vsi) {
@@ -6736,15 +6763,12 @@ static void i40e_fdir_sb_setup(struct i40e_pf *pf)
  **/
 static void i40e_fdir_teardown(struct i40e_pf *pf)
 {
-       int i;
+       struct i40e_vsi *vsi;
 
        i40e_fdir_filter_exit(pf);
-       for (i = 0; i < pf->num_alloc_vsi; i++) {
-               if (pf->vsi[i] && pf->vsi[i]->type == I40E_VSI_FDIR) {
-                       i40e_vsi_release(pf->vsi[i]);
-                       break;
-               }
-       }
+       vsi = i40e_find_vsi_by_type(pf, I40E_VSI_FDIR);
+       if (vsi)
+               i40e_vsi_release(vsi);
 }
 
 /**
@@ -7372,7 +7396,7 @@ static int i40e_vsi_mem_alloc(struct i40e_pf *pf, enum i40e_vsi_type type)
                                pf->rss_table_size : 64;
        vsi->netdev_registered = false;
        vsi->work_limit = I40E_DEFAULT_IRQ_WORK;
-       INIT_LIST_HEAD(&vsi->mac_filter_list);
+       hash_init(vsi->mac_filter_hash);
        vsi->irqs_ready = false;
 
        ret = i40e_set_num_rings_in_vsi(vsi);
@@ -7387,7 +7411,7 @@ static int i40e_vsi_mem_alloc(struct i40e_pf *pf, enum i40e_vsi_type type)
        i40e_vsi_setup_irqhandler(vsi, i40e_msix_clean_rings);
 
        /* Initialize VSI lock */
-       spin_lock_init(&vsi->mac_filter_list_lock);
+       spin_lock_init(&vsi->mac_filter_hash_lock);
        pf->vsi[vsi_idx] = vsi;
        ret = vsi_idx;
        goto unlock_pf;
@@ -7702,6 +7726,7 @@ static int i40e_init_msix(struct i40e_pf *pf)
                pf->flags &= ~I40E_FLAG_MSIX_ENABLED;
                kfree(pf->msix_entries);
                pf->msix_entries = NULL;
+               pci_disable_msix(pf->pdev);
                return -ENODEV;
 
        } else if (v_actual == I40E_MIN_MSIX) {
@@ -8362,8 +8387,8 @@ int i40e_reconfig_rss_queues(struct i40e_pf *pf, int queue_count)
 
                i40e_pf_config_rss(pf);
        }
-       dev_info(&pf->pdev->dev, "RSS count/HW max RSS count:  %d/%d\n",
-                pf->alloc_rss_size, pf->rss_size_max);
+       dev_info(&pf->pdev->dev, "User requested queue count/HW max RSS count:  %d/%d\n",
+                vsi->req_queue_pairs, pf->rss_size_max);
        return pf->alloc_rss_size;
 }
 
@@ -8506,15 +8531,6 @@ static int i40e_sw_init(struct i40e_pf *pf)
        int err = 0;
        int size;
 
-       pf->msg_enable = netif_msg_init(I40E_DEFAULT_MSG_ENABLE,
-                               (NETIF_MSG_DRV|NETIF_MSG_PROBE|NETIF_MSG_LINK));
-       if (debug != -1 && debug != I40E_DEFAULT_MSG_ENABLE) {
-               if (I40E_DEBUG_USER & debug)
-                       pf->hw.debug_mask = debug;
-               pf->msg_enable = netif_msg_init((debug & ~I40E_DEBUG_USER),
-                                               I40E_DEFAULT_MSG_ENABLE);
-       }
-
        /* Set default capability flags */
        pf->flags = I40E_FLAG_RX_CSUM_ENABLED |
                    I40E_FLAG_MSI_ENABLED     |
@@ -9051,7 +9067,7 @@ static int i40e_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
                return 0;
 
        return ndo_dflt_bridge_getlink(skb, pid, seq, dev, veb->bridge_mode,
-                                      nlflags, 0, 0, filter_mask, NULL);
+                                      0, 0, nlflags, filter_mask, NULL);
 }
 
 /* Hardware supports L4 tunnel length of 128B (=2^7) which includes
@@ -9180,24 +9196,18 @@ static int i40e_config_netdev(struct i40e_vsi *vsi)
        if (vsi->type == I40E_VSI_MAIN) {
                SET_NETDEV_DEV(netdev, &pf->pdev->dev);
                ether_addr_copy(mac_addr, hw->mac.perm_addr);
-               /* The following steps are necessary to prevent reception
-                * of tagged packets - some older NVM configurations load a
-                * default a MAC-VLAN filter that accepts any tagged packet
-                * which must be replaced by a normal filter.
-                */
-               i40e_rm_default_mac_filter(vsi, mac_addr);
-               spin_lock_bh(&vsi->mac_filter_list_lock);
-               i40e_add_filter(vsi, mac_addr, I40E_VLAN_ANY, false, true);
-               spin_unlock_bh(&vsi->mac_filter_list_lock);
+               spin_lock_bh(&vsi->mac_filter_hash_lock);
+               i40e_add_filter(vsi, mac_addr, I40E_VLAN_ANY);
+               spin_unlock_bh(&vsi->mac_filter_hash_lock);
        } else {
                /* relate the VSI_VMDQ name to the VSI_MAIN name */
                snprintf(netdev->name, IFNAMSIZ, "%sv%%d",
                         pf->vsi[pf->lan_vsi]->netdev->name);
                random_ether_addr(mac_addr);
 
-               spin_lock_bh(&vsi->mac_filter_list_lock);
-               i40e_add_filter(vsi, mac_addr, I40E_VLAN_ANY, false, false);
-               spin_unlock_bh(&vsi->mac_filter_list_lock);
+               spin_lock_bh(&vsi->mac_filter_hash_lock);
+               i40e_add_filter(vsi, mac_addr, I40E_VLAN_ANY);
+               spin_unlock_bh(&vsi->mac_filter_hash_lock);
        }
 
        ether_addr_copy(netdev->dev_addr, mac_addr);
@@ -9286,7 +9296,9 @@ static int i40e_add_vsi(struct i40e_vsi *vsi)
        struct i40e_pf *pf = vsi->back;
        struct i40e_hw *hw = &pf->hw;
        struct i40e_vsi_context ctxt;
-       struct i40e_mac_filter *f, *ftmp;
+       struct i40e_mac_filter *f;
+       struct hlist_node *h;
+       int bkt;
 
        u8 enabled_tc = 0x1; /* TC0 enabled */
        int f_count = 0;
@@ -9485,13 +9497,13 @@ static int i40e_add_vsi(struct i40e_vsi *vsi)
 
        vsi->active_filters = 0;
        clear_bit(__I40E_FILTER_OVERFLOW_PROMISC, &vsi->state);
-       spin_lock_bh(&vsi->mac_filter_list_lock);
+       spin_lock_bh(&vsi->mac_filter_hash_lock);
        /* If macvlan filters already exist, force them to get loaded */
-       list_for_each_entry_safe(f, ftmp, &vsi->mac_filter_list, list) {
+       hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) {
                f->state = I40E_FILTER_NEW;
                f_count++;
        }
-       spin_unlock_bh(&vsi->mac_filter_list_lock);
+       spin_unlock_bh(&vsi->mac_filter_hash_lock);
 
        if (f_count) {
                vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED;
@@ -9521,11 +9533,12 @@ err:
  **/
 int i40e_vsi_release(struct i40e_vsi *vsi)
 {
-       struct i40e_mac_filter *f, *ftmp;
+       struct i40e_mac_filter *f;
+       struct hlist_node *h;
        struct i40e_veb *veb = NULL;
        struct i40e_pf *pf;
        u16 uplink_seid;
-       int i, n;
+       int i, n, bkt;
 
        pf = vsi->back;
 
@@ -9555,11 +9568,19 @@ int i40e_vsi_release(struct i40e_vsi *vsi)
                i40e_vsi_disable_irq(vsi);
        }
 
-       spin_lock_bh(&vsi->mac_filter_list_lock);
-       list_for_each_entry_safe(f, ftmp, &vsi->mac_filter_list, list)
-               i40e_del_filter(vsi, f->macaddr, f->vlan,
-                               f->is_vf, f->is_netdev);
-       spin_unlock_bh(&vsi->mac_filter_list_lock);
+       spin_lock_bh(&vsi->mac_filter_hash_lock);
+
+       /* clear the sync flag on all filters */
+       if (vsi->netdev) {
+               __dev_uc_unsync(vsi->netdev, NULL);
+               __dev_mc_unsync(vsi->netdev, NULL);
+       }
+
+       /* make sure any remaining filters are marked for deletion */
+       hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist)
+               __i40e_del_filter(vsi, f);
+
+       spin_unlock_bh(&vsi->mac_filter_hash_lock);
 
        i40e_sync_vsi_filters(vsi);
 
@@ -9703,8 +9724,6 @@ static struct i40e_vsi *i40e_vsi_reinit_setup(struct i40e_vsi *vsi)
        pf->vsi[pf->lan_vsi]->tc_config.enabled_tc = 0;
        pf->vsi[pf->lan_vsi]->seid = pf->main_vsi_seid;
        i40e_vsi_config_tc(pf->vsi[pf->lan_vsi], enabled_tc);
-       if (vsi->type == I40E_VSI_MAIN)
-               i40e_rm_default_mac_filter(vsi, pf->hw.mac.perm_addr);
 
        /* assign it some queues */
        ret = i40e_alloc_rings(vsi);
@@ -10828,10 +10847,12 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        mutex_init(&hw->aq.asq_mutex);
        mutex_init(&hw->aq.arq_mutex);
 
-       if (debug != -1) {
-               pf->msg_enable = pf->hw.debug_mask;
-               pf->msg_enable = debug;
-       }
+       pf->msg_enable = netif_msg_init(debug,
+                                       NETIF_MSG_DRV |
+                                       NETIF_MSG_PROBE |
+                                       NETIF_MSG_LINK);
+       if (debug < -1)
+               pf->hw.debug_mask = debug;
 
        /* do a special CORER for clearing PXE mode once at init */
        if (hw->revision_id == 0 &&
@@ -10973,7 +10994,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        err = i40e_init_pf_dcb(pf);
        if (err) {
                dev_info(&pdev->dev, "DCB init failed %d, disabled\n", err);
-               pf->flags &= ~(I40E_FLAG_DCB_CAPABLE & I40E_FLAG_DCB_ENABLED);
+               pf->flags &= ~(I40E_FLAG_DCB_CAPABLE | I40E_FLAG_DCB_ENABLED);
                /* Continue without DCB enabled */
        }
 #endif /* CONFIG_I40E_DCB */
index 954efe3118dbb15fcf436e5a56f0a98499d165a4..38ee18f1112444df1ad753f845d1dd1b1fd6ddcc 100644 (file)
@@ -722,9 +722,20 @@ i40e_status i40e_nvmupd_command(struct i40e_hw *hw,
                        *((u16 *)&bytes[2]) = hw->nvm_wait_opcode;
                }
 
+               /* Clear error status on read */
+               if (hw->nvmupd_state == I40E_NVMUPD_STATE_ERROR)
+                       hw->nvmupd_state = I40E_NVMUPD_STATE_INIT;
+
                return 0;
        }
 
+       /* Clear status even it is not read and log */
+       if (hw->nvmupd_state == I40E_NVMUPD_STATE_ERROR) {
+               i40e_debug(hw, I40E_DEBUG_NVM,
+                          "Clearing I40E_NVMUPD_STATE_ERROR state without reading\n");
+               hw->nvmupd_state = I40E_NVMUPD_STATE_INIT;
+       }
+
        switch (hw->nvmupd_state) {
        case I40E_NVMUPD_STATE_INIT:
                status = i40e_nvmupd_state_init(hw, cmd, bytes, perrno);
@@ -1074,6 +1085,11 @@ void i40e_nvmupd_check_wait_event(struct i40e_hw *hw, u16 opcode)
                }
                hw->nvm_wait_opcode = 0;
 
+               if (hw->aq.arq_last_status) {
+                       hw->nvmupd_state = I40E_NVMUPD_STATE_ERROR;
+                       return;
+               }
+
                switch (hw->nvmupd_state) {
                case I40E_NVMUPD_STATE_INIT_WAIT:
                        hw->nvmupd_state = I40E_NVMUPD_STATE_INIT;
index f1feceab758a5235237df685f2b8666a5a9c0ffd..5e2272c9e717e261e7018a0107bab6523e3339cf 100644 (file)
@@ -159,16 +159,15 @@ static int i40e_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
 {
        struct i40e_pf *pf = container_of(ptp, struct i40e_pf, ptp_caps);
        struct timespec64 now, then;
-       unsigned long flags;
 
        then = ns_to_timespec64(delta);
-       spin_lock_irqsave(&pf->tmreg_lock, flags);
+       mutex_lock(&pf->tmreg_lock);
 
        i40e_ptp_read(pf, &now);
        now = timespec64_add(now, then);
        i40e_ptp_write(pf, (const struct timespec64 *)&now);
 
-       spin_unlock_irqrestore(&pf->tmreg_lock, flags);
+       mutex_unlock(&pf->tmreg_lock);
 
        return 0;
 }
@@ -184,11 +183,10 @@ static int i40e_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
 static int i40e_ptp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts)
 {
        struct i40e_pf *pf = container_of(ptp, struct i40e_pf, ptp_caps);
-       unsigned long flags;
 
-       spin_lock_irqsave(&pf->tmreg_lock, flags);
+       mutex_lock(&pf->tmreg_lock);
        i40e_ptp_read(pf, ts);
-       spin_unlock_irqrestore(&pf->tmreg_lock, flags);
+       mutex_unlock(&pf->tmreg_lock);
 
        return 0;
 }
@@ -205,11 +203,10 @@ static int i40e_ptp_settime(struct ptp_clock_info *ptp,
                            const struct timespec64 *ts)
 {
        struct i40e_pf *pf = container_of(ptp, struct i40e_pf, ptp_caps);
-       unsigned long flags;
 
-       spin_lock_irqsave(&pf->tmreg_lock, flags);
+       mutex_lock(&pf->tmreg_lock);
        i40e_ptp_write(pf, ts);
-       spin_unlock_irqrestore(&pf->tmreg_lock, flags);
+       mutex_unlock(&pf->tmreg_lock);
 
        return 0;
 }
@@ -229,6 +226,47 @@ static int i40e_ptp_feature_enable(struct ptp_clock_info *ptp,
        return -EOPNOTSUPP;
 }
 
+/**
+ * i40e_ptp_update_latch_events - Read I40E_PRTTSYN_STAT_1 and latch events
+ * @pf: the PF data structure
+ *
+ * This function reads I40E_PRTTSYN_STAT_1 and updates the corresponding timers
+ * for noticed latch events. This allows the driver to keep track of the first
+ * time a latch event was noticed which will be used to help clear out Rx
+ * timestamps for packets that got dropped or lost.
+ *
+ * This function will return the current value of I40E_PRTTSYN_STAT_1 and is
+ * expected to be called only while under the ptp_rx_lock.
+ **/
+static u32 i40e_ptp_get_rx_events(struct i40e_pf *pf)
+{
+       struct i40e_hw *hw = &pf->hw;
+       u32 prttsyn_stat, new_latch_events;
+       int  i;
+
+       prttsyn_stat = rd32(hw, I40E_PRTTSYN_STAT_1);
+       new_latch_events = prttsyn_stat & ~pf->latch_event_flags;
+
+       /* Update the jiffies time for any newly latched timestamp. This
+        * ensures that we store the time that we first discovered a timestamp
+        * was latched by the hardware. The service task will later determine
+        * if we should free the latch and drop that timestamp should too much
+        * time pass. This flow ensures that we only update jiffies for new
+        * events latched since the last time we checked, and not all events
+        * currently latched, so that the service task accounting remains
+        * accurate.
+        */
+       for (i = 0; i < 4; i++) {
+               if (new_latch_events & BIT(i))
+                       pf->latch_events[i] = jiffies;
+       }
+
+       /* Finally, we store the current status of the Rx timestamp latches */
+       pf->latch_event_flags = prttsyn_stat;
+
+       return prttsyn_stat;
+}
+
 /**
  * i40e_ptp_rx_hang - Detect error case when Rx timestamp registers are hung
  * @vsi: The VSI with the rings relevant to 1588
@@ -242,10 +280,7 @@ void i40e_ptp_rx_hang(struct i40e_vsi *vsi)
 {
        struct i40e_pf *pf = vsi->back;
        struct i40e_hw *hw = &pf->hw;
-       struct i40e_ring *rx_ring;
-       unsigned long rx_event;
-       u32 prttsyn_stat;
-       int n;
+       int i;
 
        /* Since we cannot turn off the Rx timestamp logic if the device is
         * configured for Tx timestamping, we check if Rx timestamping is
@@ -255,42 +290,30 @@ void i40e_ptp_rx_hang(struct i40e_vsi *vsi)
        if (!(pf->flags & I40E_FLAG_PTP) || !pf->ptp_rx)
                return;
 
-       prttsyn_stat = rd32(hw, I40E_PRTTSYN_STAT_1);
+       spin_lock_bh(&pf->ptp_rx_lock);
 
-       /* Unless all four receive timestamp registers are latched, we are not
-        * concerned about a possible PTP Rx hang, so just update the timeout
-        * counter and exit.
-        */
-       if (!(prttsyn_stat & ((I40E_PRTTSYN_STAT_1_RXT0_MASK <<
-                              I40E_PRTTSYN_STAT_1_RXT0_SHIFT) |
-                             (I40E_PRTTSYN_STAT_1_RXT1_MASK <<
-                              I40E_PRTTSYN_STAT_1_RXT1_SHIFT) |
-                             (I40E_PRTTSYN_STAT_1_RXT2_MASK <<
-                              I40E_PRTTSYN_STAT_1_RXT2_SHIFT) |
-                             (I40E_PRTTSYN_STAT_1_RXT3_MASK <<
-                              I40E_PRTTSYN_STAT_1_RXT3_SHIFT)))) {
-               pf->last_rx_ptp_check = jiffies;
-               return;
-       }
+       /* Update current latch times for Rx events */
+       i40e_ptp_get_rx_events(pf);
 
-       /* Determine the most recent watchdog or rx_timestamp event. */
-       rx_event = pf->last_rx_ptp_check;
-       for (n = 0; n < vsi->num_queue_pairs; n++) {
-               rx_ring = vsi->rx_rings[n];
-               if (time_after(rx_ring->last_rx_timestamp, rx_event))
-                       rx_event = rx_ring->last_rx_timestamp;
+       /* Check all the currently latched Rx events and see whether they have
+        * been latched for over a second. It is assumed that any timestamp
+        * should have been cleared within this time, or else it was captured
+        * for a dropped frame that the driver never received. Thus, we will
+        * clear any timestamp that has been latched for over 1 second.
+        */
+       for (i = 0; i < 4; i++) {
+               if ((pf->latch_event_flags & BIT(i)) &&
+                   time_is_before_jiffies(pf->latch_events[i] + HZ)) {
+                       rd32(hw, I40E_PRTTSYN_RXTIME_H(i));
+                       pf->latch_event_flags &= ~BIT(i);
+                       pf->rx_hwtstamp_cleared++;
+                       dev_warn(&pf->pdev->dev,
+                                "Clearing a missed Rx timestamp event for RXTIME[%d]\n",
+                                i);
+               }
        }
 
-       /* Only need to read the high RXSTMP register to clear the lock */
-       if (time_is_before_jiffies(rx_event + 5 * HZ)) {
-               rd32(hw, I40E_PRTTSYN_RXTIME_H(0));
-               rd32(hw, I40E_PRTTSYN_RXTIME_H(1));
-               rd32(hw, I40E_PRTTSYN_RXTIME_H(2));
-               rd32(hw, I40E_PRTTSYN_RXTIME_H(3));
-               pf->last_rx_ptp_check = jiffies;
-               pf->rx_hwtstamp_cleared++;
-               WARN_ONCE(1, "Detected Rx timestamp register hang\n");
-       }
+       spin_unlock_bh(&pf->ptp_rx_lock);
 }
 
 /**
@@ -353,14 +376,25 @@ void i40e_ptp_rx_hwtstamp(struct i40e_pf *pf, struct sk_buff *skb, u8 index)
 
        hw = &pf->hw;
 
-       prttsyn_stat = rd32(hw, I40E_PRTTSYN_STAT_1);
+       spin_lock_bh(&pf->ptp_rx_lock);
 
-       if (!(prttsyn_stat & BIT(index)))
+       /* Get current Rx events and update latch times */
+       prttsyn_stat = i40e_ptp_get_rx_events(pf);
+
+       /* TODO: Should we warn about missing Rx timestamp event? */
+       if (!(prttsyn_stat & BIT(index))) {
+               spin_unlock_bh(&pf->ptp_rx_lock);
                return;
+       }
+
+       /* Clear the latched event since we're about to read its register */
+       pf->latch_event_flags &= ~BIT(index);
 
        lo = rd32(hw, I40E_PRTTSYN_RXTIME_L(index));
        hi = rd32(hw, I40E_PRTTSYN_RXTIME_H(index));
 
+       spin_unlock_bh(&pf->ptp_rx_lock);
+
        ns = (((u64)hi) << 32) | lo;
 
        i40e_ptp_convert_to_hwtstamp(skb_hwtstamps(skb), ns);
@@ -514,12 +548,15 @@ static int i40e_ptp_set_timestamp_mode(struct i40e_pf *pf,
        }
 
        /* Clear out all 1588-related registers to clear and unlatch them. */
+       spin_lock_bh(&pf->ptp_rx_lock);
        rd32(hw, I40E_PRTTSYN_STAT_0);
        rd32(hw, I40E_PRTTSYN_TXTIME_H);
        rd32(hw, I40E_PRTTSYN_RXTIME_H(0));
        rd32(hw, I40E_PRTTSYN_RXTIME_H(1));
        rd32(hw, I40E_PRTTSYN_RXTIME_H(2));
        rd32(hw, I40E_PRTTSYN_RXTIME_H(3));
+       pf->latch_event_flags = 0;
+       spin_unlock_bh(&pf->ptp_rx_lock);
 
        /* Enable/disable the Tx timestamp interrupt based on user input. */
        regval = rd32(hw, I40E_PRTTSYN_CTL0);
@@ -658,10 +695,8 @@ void i40e_ptp_init(struct i40e_pf *pf)
                return;
        }
 
-       /* we have to initialize the lock first, since we can't control
-        * when the user will enter the PHC device entry points
-        */
-       spin_lock_init(&pf->tmreg_lock);
+       mutex_init(&pf->tmreg_lock);
+       spin_lock_init(&pf->ptp_rx_lock);
 
        /* ensure we have a clock device */
        err = i40e_ptp_create_clock(pf);
index 6287bf63c43cae5bb5d1c7e8428ad623aafc66b0..5544b509832feb39e12af93010f69d6e6b5b4e86 100644 (file)
@@ -122,14 +122,10 @@ static int i40e_program_fdir_filter(struct i40e_fdir_filter *fdir_data,
        struct device *dev;
        dma_addr_t dma;
        u32 td_cmd = 0;
-       u16 delay = 0;
        u16 i;
 
        /* find existing FDIR VSI */
-       vsi = NULL;
-       for (i = 0; i < pf->num_alloc_vsi; i++)
-               if (pf->vsi[i] && pf->vsi[i]->type == I40E_VSI_FDIR)
-                       vsi = pf->vsi[i];
+       vsi = i40e_find_vsi_by_type(pf, I40E_VSI_FDIR);
        if (!vsi)
                return -ENOENT;
 
@@ -137,15 +133,11 @@ static int i40e_program_fdir_filter(struct i40e_fdir_filter *fdir_data,
        dev = tx_ring->dev;
 
        /* we need two descriptors to add/del a filter and we can wait */
-       do {
-               if (I40E_DESC_UNUSED(tx_ring) > 1)
-                       break;
+       for (i = I40E_FD_CLEAN_DELAY; I40E_DESC_UNUSED(tx_ring) < 2; i--) {
+               if (!i)
+                       return -EAGAIN;
                msleep_interruptible(1);
-               delay++;
-       } while (delay < I40E_FD_CLEAN_DELAY);
-
-       if (!(I40E_DESC_UNUSED(tx_ring) > 1))
-               return -EAGAIN;
+       }
 
        dma = dma_map_single(dev, raw_packet,
                             I40E_FDIR_MAX_RAW_PACKET_SIZE, DMA_TO_DEVICE);
@@ -335,22 +327,6 @@ static int i40e_add_del_fdir_tcpv4(struct i40e_vsi *vsi,
        return err ? -EOPNOTSUPP : 0;
 }
 
-/**
- * i40e_add_del_fdir_sctpv4 - Add/Remove SCTPv4 Flow Director filters for
- * a specific flow spec
- * @vsi: pointer to the targeted VSI
- * @fd_data: the flow director data required for the FDir descriptor
- * @add: true adds a filter, false removes it
- *
- * Returns 0 if the filters were successfully added or removed
- **/
-static int i40e_add_del_fdir_sctpv4(struct i40e_vsi *vsi,
-                                   struct i40e_fdir_filter *fd_data,
-                                   bool add)
-{
-       return -EOPNOTSUPP;
-}
-
 #define I40E_IP_DUMMY_PACKET_LEN 34
 /**
  * i40e_add_del_fdir_ipv4 - Add/Remove IPv4 Flow Director filters for
@@ -433,12 +409,6 @@ int i40e_add_del_fdir(struct i40e_vsi *vsi,
        case UDP_V4_FLOW:
                ret = i40e_add_del_fdir_udpv4(vsi, input, add);
                break;
-       case SCTP_V4_FLOW:
-               ret = i40e_add_del_fdir_sctpv4(vsi, input, add);
-               break;
-       case IPV4_FLOW:
-               ret = i40e_add_del_fdir_ipv4(vsi, input, add);
-               break;
        case IP_USER_FLOW:
                switch (input->ip4_proto) {
                case IPPROTO_TCP:
@@ -447,15 +417,16 @@ int i40e_add_del_fdir(struct i40e_vsi *vsi,
                case IPPROTO_UDP:
                        ret = i40e_add_del_fdir_udpv4(vsi, input, add);
                        break;
-               case IPPROTO_SCTP:
-                       ret = i40e_add_del_fdir_sctpv4(vsi, input, add);
-                       break;
-               default:
+               case IPPROTO_IP:
                        ret = i40e_add_del_fdir_ipv4(vsi, input, add);
                        break;
+               default:
+                       /* We cannot support masking based on protocol */
+                       goto unsupported_flow;
                }
                break;
        default:
+unsupported_flow:
                dev_info(&pf->pdev->dev, "Could not specify spec type %d\n",
                         input->flow_type);
                ret = -EINVAL;
@@ -645,7 +616,7 @@ u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw)
        return 0;
 }
 
-#define WB_STRIDE 0x3
+#define WB_STRIDE 4
 
 /**
  * i40e_clean_tx_irq - Reclaim resources after transmit completes
@@ -761,7 +732,7 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
                unsigned int j = i40e_get_tx_pending(tx_ring, false);
 
                if (budget &&
-                   ((j / (WB_STRIDE + 1)) == 0) && (j != 0) &&
+                   ((j / WB_STRIDE) == 0) && (j > 0) &&
                    !test_bit(__I40E_DOWN, &vsi->state) &&
                    (I40E_DESC_UNUSED(tx_ring) != tx_ring->count))
                        tx_ring->arm_wb = true;
@@ -1246,7 +1217,6 @@ bool i40e_alloc_rx_buffers(struct i40e_ring *rx_ring, u16 cleaned_count)
                 * because each write-back erases this info.
                 */
                rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset);
-               rx_desc->read.hdr_addr = 0;
 
                rx_desc++;
                bi++;
@@ -1437,13 +1407,12 @@ void i40e_process_skb_fields(struct i40e_ring *rx_ring,
        u64 qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
        u32 rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >>
                        I40E_RXD_QW1_STATUS_SHIFT;
-       u32 rsyn = (rx_status & I40E_RXD_QW1_STATUS_TSYNINDX_MASK) >>
+       u32 tsynvalid = rx_status & I40E_RXD_QW1_STATUS_TSYNVALID_MASK;
+       u32 tsyn = (rx_status & I40E_RXD_QW1_STATUS_TSYNINDX_MASK) >>
                   I40E_RXD_QW1_STATUS_TSYNINDX_SHIFT;
 
-       if (unlikely(rsyn)) {
-               i40e_ptp_rx_hwtstamp(rx_ring->vsi->back, skb, rsyn);
-               rx_ring->last_rx_timestamp = jiffies;
-       }
+       if (unlikely(tsynvalid))
+               i40e_ptp_rx_hwtstamp(rx_ring->vsi->back, skb, tsyn);
 
        i40e_rx_hash(rx_ring, rx_desc, skb, rx_ptype);
 
@@ -1767,7 +1736,6 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
        while (likely(total_rx_packets < budget)) {
                union i40e_rx_desc *rx_desc;
                struct sk_buff *skb;
-               u32 rx_status;
                u16 vlan_tag;
                u8 rx_ptype;
                u64 qword;
@@ -1781,21 +1749,13 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
 
                rx_desc = I40E_RX_DESC(rx_ring, rx_ring->next_to_clean);
 
-               qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
-               rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >>
-                          I40E_RXD_QW1_PTYPE_SHIFT;
-               rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >>
-                           I40E_RXD_QW1_STATUS_SHIFT;
-
-               if (!(rx_status & BIT(I40E_RX_DESC_STATUS_DD_SHIFT)))
-                       break;
-
                /* status_error_len will always be zero for unused descriptors
                 * because it's cleared in cleanup, and overlaps with hdr_addr
                 * which is always zero because packet split isn't used, if the
                 * hardware wrote DD then it will be non-zero
                 */
-               if (!rx_desc->wb.qword1.status_error_len)
+               if (!i40e_test_staterr(rx_desc,
+                                      BIT(I40E_RX_DESC_STATUS_DD_SHIFT)))
                        break;
 
                /* This memory barrier is needed to keep us from reading
@@ -1829,6 +1789,10 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
                /* probably a little skewed due to removing CRC */
                total_rx_bytes += skb->len;
 
+               qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
+               rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >>
+                          I40E_RXD_QW1_PTYPE_SHIFT;
+
                /* populate checksum, VLAN, and protocol */
                i40e_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype);
 
@@ -2025,12 +1989,25 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
 
        /* If work not completed, return budget and polling will return */
        if (!clean_complete) {
+               const cpumask_t *aff_mask = &q_vector->affinity_mask;
+               int cpu_id = smp_processor_id();
+
+               /* It is possible that the interrupt affinity has changed but,
+                * if the cpu is pegged at 100%, polling will never exit while
+                * traffic continues and the interrupt will be stuck on this
+                * cpu.  We check to make sure affinity is correct before we
+                * continue to poll, otherwise we must stop polling so the
+                * interrupt can move to the correct cpu.
+                */
+               if (likely(cpumask_test_cpu(cpu_id, aff_mask) ||
+                          !(vsi->back->flags & I40E_FLAG_MSIX_ENABLED))) {
 tx_only:
-               if (arm_wb) {
-                       q_vector->tx.ring[0].tx_stats.tx_force_wb++;
-                       i40e_enable_wb_on_itr(vsi, q_vector);
+                       if (arm_wb) {
+                               q_vector->tx.ring[0].tx_stats.tx_force_wb++;
+                               i40e_enable_wb_on_itr(vsi, q_vector);
+                       }
+                       return budget;
                }
-               return budget;
        }
 
        if (vsi->back->flags & I40E_TXR_FLAGS_WB_ON_ITR)
@@ -2038,11 +2015,18 @@ tx_only:
 
        /* Work is done so exit the polling mode and re-enable the interrupt */
        napi_complete_done(napi, work_done);
-       if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) {
-               i40e_update_enable_itr(vsi, q_vector);
-       } else { /* Legacy mode */
+
+       /* If we're prematurely stopping polling to fix the interrupt
+        * affinity we want to make sure polling starts back up so we
+        * issue a call to i40e_force_wb which triggers a SW interrupt.
+        */
+       if (!clean_complete)
+               i40e_force_wb(vsi, q_vector);
+       else if (!(vsi->back->flags & I40E_FLAG_MSIX_ENABLED))
                i40e_irq_dynamic_enable_icr0(vsi->back, false);
-       }
+       else
+               i40e_update_enable_itr(vsi, q_vector);
+
        return 0;
 }
 
@@ -2716,9 +2700,7 @@ static inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
        u32 td_tag = 0;
        dma_addr_t dma;
        u16 gso_segs;
-       u16 desc_count = 0;
-       bool tail_bump = true;
-       bool do_rs = false;
+       u16 desc_count = 1;
 
        if (tx_flags & I40E_TX_FLAGS_HW_VLAN) {
                td_cmd |= I40E_TX_DESC_CMD_IL2TAG1;
@@ -2801,8 +2783,7 @@ static inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
                tx_bi = &tx_ring->tx_bi[i];
        }
 
-       /* set next_to_watch value indicating a packet is present */
-       first->next_to_watch = tx_desc;
+       netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
 
        i++;
        if (i == tx_ring->count)
@@ -2810,66 +2791,72 @@ static inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
 
        tx_ring->next_to_use = i;
 
-       netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
        i40e_maybe_stop_tx(tx_ring, DESC_NEEDED);
 
+       /* write last descriptor with EOP bit */
+       td_cmd |= I40E_TX_DESC_CMD_EOP;
+
+       /* We can OR these values together as they both are checked against
+        * 4 below and at this point desc_count will be used as a boolean value
+        * after this if/else block.
+        */
+       desc_count |= ++tx_ring->packet_stride;
+
        /* Algorithm to optimize tail and RS bit setting:
-        * if xmit_more is supported
-        *      if xmit_more is true
-        *              do not update tail and do not mark RS bit.
-        *      if xmit_more is false and last xmit_more was false
-        *              if every packet spanned less than 4 desc
-        *                      then set RS bit on 4th packet and update tail
-        *                      on every packet
-        *              else
-        *                      update tail and set RS bit on every packet.
-        *      if xmit_more is false and last_xmit_more was true
-        *              update tail and set RS bit.
+        * if queue is stopped
+        *      mark RS bit
+        *      reset packet counter
+        * else if xmit_more is supported and is true
+        *      advance packet counter to 4
+        *      reset desc_count to 0
         *
-        * Optimization: wmb to be issued only in case of tail update.
-        * Also optimize the Descriptor WB path for RS bit with the same
-        * algorithm.
+        * if desc_count >= 4
+        *      mark RS bit
+        *      reset packet counter
+        * if desc_count > 0
+        *      update tail
         *
-        * Note: If there are less than 4 packets
+        * Note: If there are less than 4 descriptors
         * pending and interrupts were disabled the service task will
         * trigger a force WB.
         */
-       if (skb->xmit_more  &&
-           !netif_xmit_stopped(txring_txq(tx_ring))) {
-               tx_ring->flags |= I40E_TXR_FLAGS_LAST_XMIT_MORE_SET;
-               tail_bump = false;
-       } else if (!skb->xmit_more &&
-                  !netif_xmit_stopped(txring_txq(tx_ring)) &&
-                  (!(tx_ring->flags & I40E_TXR_FLAGS_LAST_XMIT_MORE_SET)) &&
-                  (tx_ring->packet_stride < WB_STRIDE) &&
-                  (desc_count < WB_STRIDE)) {
-               tx_ring->packet_stride++;
-       } else {
+       if (netif_xmit_stopped(txring_txq(tx_ring))) {
+               goto do_rs;
+       } else if (skb->xmit_more) {
+               /* set stride to arm on next packet and reset desc_count */
+               tx_ring->packet_stride = WB_STRIDE;
+               desc_count = 0;
+       } else if (desc_count >= WB_STRIDE) {
+do_rs:
+               /* write last descriptor with RS bit set */
+               td_cmd |= I40E_TX_DESC_CMD_RS;
                tx_ring->packet_stride = 0;
-               tx_ring->flags &= ~I40E_TXR_FLAGS_LAST_XMIT_MORE_SET;
-               do_rs = true;
        }
-       if (do_rs)
-               tx_ring->packet_stride = 0;
 
        tx_desc->cmd_type_offset_bsz =
-                       build_ctob(td_cmd, td_offset, size, td_tag) |
-                       cpu_to_le64((u64)(do_rs ? I40E_TXD_CMD :
-                                                 I40E_TX_DESC_CMD_EOP) <<
-                                                 I40E_TXD_QW1_CMD_SHIFT);
+                       build_ctob(td_cmd, td_offset, size, td_tag);
+
+       /* Force memory writes to complete before letting h/w know there
+        * are new descriptors to fetch.
+        *
+        * We also use this memory barrier to make certain all of the
+        * status bits have been updated before next_to_watch is written.
+        */
+       wmb();
+
+       /* set next_to_watch value indicating a packet is present */
+       first->next_to_watch = tx_desc;
 
        /* notify HW of packet */
-       if (!tail_bump) {
-               prefetchw(tx_desc + 1);
-       } else {
-               /* Force memory writes to complete before letting h/w
-                * know there are new descriptors to fetch.  (Only
-                * applicable for weak-ordered memory model archs,
-                * such as IA-64).
-                */
-               wmb();
+       if (desc_count) {
                writel(i, tx_ring->tail);
+
+               /* we need this if more than one processor can write to our tail
+                * at a time, it synchronizes IO on IA64/Altix systems
+                */
+               mmiowb();
        }
+
        return;
 
 dma_error:
index 508840585645d368e737bcd72c987bb4977b50c6..de8550f4e3a4dd8d53a0ab9e37b0c4f1a8b19caa 100644 (file)
@@ -307,15 +307,12 @@ struct i40e_ring {
        u8 atr_sample_rate;
        u8 atr_count;
 
-       unsigned long last_rx_timestamp;
-
        bool ring_active;               /* is ring online or not */
        bool arm_wb;            /* do something to arm write back */
        u8 packet_stride;
 
        u16 flags;
 #define I40E_TXR_FLAGS_WB_ON_ITR       BIT(0)
-#define I40E_TXR_FLAGS_LAST_XMIT_MORE_SET BIT(2)
 
        /* stats structs */
        struct i40e_queue_stats stats;
index bd5f13bef83c794bb03435308b282831b9300c95..d9a266041bf154416e316562fda82140a9f6bba5 100644 (file)
@@ -366,6 +366,7 @@ enum i40e_nvmupd_state {
        I40E_NVMUPD_STATE_WRITING,
        I40E_NVMUPD_STATE_INIT_WAIT,
        I40E_NVMUPD_STATE_WRITE_WAIT,
+       I40E_NVMUPD_STATE_ERROR
 };
 
 /* nvm_access definition and its masks/shifts need to be accessible to
index f861d3109d1a10ace23242f4d3779328a6a32539..974ba2baf6ea006d2f3dae4e7aa841e7bffb96d3 100644 (file)
@@ -165,6 +165,10 @@ struct i40e_virtchnl_vsi_resource {
 #define I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF                0X00080000
 #define I40E_VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM    0X00100000
 
+#define I40E_VF_BASE_MODE_OFFLOADS (I40E_VIRTCHNL_VF_OFFLOAD_L2 | \
+                                   I40E_VIRTCHNL_VF_OFFLOAD_VLAN | \
+                                   I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF)
+
 struct i40e_virtchnl_vf_resource {
        u16 num_vsis;
        u16 num_queue_pairs;
index 54b8ee2583f14da4e6e2f9951d672c784a8edf8d..53b46553dd8df1c520639d273eea837e84064a20 100644 (file)
@@ -686,17 +686,17 @@ static int i40e_alloc_vsi_res(struct i40e_vf *vf, enum i40e_vsi_type type)
                if (vf->port_vlan_id)
                        i40e_vsi_add_pvid(vsi, vf->port_vlan_id);
 
-               spin_lock_bh(&vsi->mac_filter_list_lock);
+               spin_lock_bh(&vsi->mac_filter_hash_lock);
                if (is_valid_ether_addr(vf->default_lan_addr.addr)) {
                        f = i40e_add_filter(vsi, vf->default_lan_addr.addr,
-                                      vf->port_vlan_id ? vf->port_vlan_id : -1,
-                                      true, false);
+                                      vf->port_vlan_id ?
+                                      vf->port_vlan_id : -1);
                        if (!f)
                                dev_info(&pf->pdev->dev,
                                         "Could not add MAC filter %pM for VF %d\n",
                                        vf->default_lan_addr.addr, vf->vf_id);
                }
-               spin_unlock_bh(&vsi->mac_filter_list_lock);
+               spin_unlock_bh(&vsi->mac_filter_hash_lock);
                i40e_write_rx_ctl(&pf->hw, I40E_VFQF_HENA1(0, vf->vf_id),
                                  (u32)hena);
                i40e_write_rx_ctl(&pf->hw, I40E_VFQF_HENA1(1, vf->vf_id),
@@ -811,6 +811,7 @@ static void i40e_free_vf_res(struct i40e_vf *vf)
                i40e_vsi_release(pf->vsi[vf->lan_vsi_idx]);
                vf->lan_vsi_idx = 0;
                vf->lan_vsi_id = 0;
+               vf->num_mac = 0;
        }
        msix_vf = pf->hw.func_caps.num_msix_vectors_vf;
 
@@ -990,7 +991,7 @@ void i40e_reset_vf(struct i40e_vf *vf, bool flr)
        if (vf->lan_vsi_idx == 0)
                goto complete_reset;
 
-       i40e_vsi_control_rings(pf->vsi[vf->lan_vsi_idx], false);
+       i40e_vsi_stop_rings(pf->vsi[vf->lan_vsi_idx]);
 complete_reset:
        /* reallocate VF resources to reset the VSI state */
        i40e_free_vf_res(vf);
@@ -1031,8 +1032,7 @@ void i40e_free_vfs(struct i40e_pf *pf)
        i40e_notify_client_of_vf_enable(pf, 0);
        for (i = 0; i < pf->num_alloc_vfs; i++)
                if (test_bit(I40E_VF_STAT_INIT, &pf->vf[i].vf_states))
-                       i40e_vsi_control_rings(pf->vsi[pf->vf[i].lan_vsi_idx],
-                                              false);
+                       i40e_vsi_stop_rings(pf->vsi[pf->vf[i].lan_vsi_idx]);
 
        /* Disable IOV before freeing resources. This lets any VF drivers
         * running in the host get themselves cleaned up before we yank
@@ -1449,9 +1449,9 @@ static void i40e_vc_reset_vf_msg(struct i40e_vf *vf)
 static inline int i40e_getnum_vf_vsi_vlan_filters(struct i40e_vsi *vsi)
 {
        struct i40e_mac_filter *f;
-       int num_vlans = 0;
+       int num_vlans = 0, bkt;
 
-       list_for_each_entry(f, &vsi->mac_filter_list, list) {
+       hash_for_each(vsi->mac_filter_hash, bkt, f, hlist) {
                if (f->vlan >= 0 && f->vlan <= I40E_MAX_VLANID)
                        num_vlans++;
        }
@@ -1481,6 +1481,7 @@ static int i40e_vc_config_promiscuous_mode_msg(struct i40e_vf *vf,
        struct i40e_vsi *vsi;
        bool alluni = false;
        int aq_err = 0;
+       int bkt;
 
        vsi = i40e_find_vsi_from_id(pf, info->vsi_id);
        if (!test_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states) ||
@@ -1507,7 +1508,7 @@ static int i40e_vc_config_promiscuous_mode_msg(struct i40e_vf *vf,
                                                            vf->port_vlan_id,
                                                            NULL);
        } else if (i40e_getnum_vf_vsi_vlan_filters(vsi)) {
-               list_for_each_entry(f, &vsi->mac_filter_list, list) {
+               hash_for_each(vsi->mac_filter_hash, bkt, f, hlist) {
                        if (f->vlan < 0 || f->vlan > I40E_MAX_VLANID)
                                continue;
                        aq_ret = i40e_aq_set_vsi_mc_promisc_on_vlan(hw,
@@ -1557,7 +1558,7 @@ static int i40e_vc_config_promiscuous_mode_msg(struct i40e_vf *vf,
                                                            vf->port_vlan_id,
                                                            NULL);
        } else if (i40e_getnum_vf_vsi_vlan_filters(vsi)) {
-               list_for_each_entry(f, &vsi->mac_filter_list, list) {
+               hash_for_each(vsi->mac_filter_hash, bkt, f, hlist) {
                        aq_ret = 0;
                        if (f->vlan >= 0 && f->vlan <= I40E_MAX_VLANID) {
                                aq_ret =
@@ -1757,7 +1758,7 @@ static int i40e_vc_enable_queues_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
                goto error_param;
        }
 
-       if (i40e_vsi_control_rings(pf->vsi[vf->lan_vsi_idx], true))
+       if (i40e_vsi_start_rings(pf->vsi[vf->lan_vsi_idx]))
                aq_ret = I40E_ERR_TIMEOUT;
 error_param:
        /* send the response to the VF */
@@ -1796,8 +1797,7 @@ static int i40e_vc_disable_queues_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
                goto error_param;
        }
 
-       if (i40e_vsi_control_rings(pf->vsi[vf->lan_vsi_idx], false))
-               aq_ret = I40E_ERR_TIMEOUT;
+       i40e_vsi_stop_rings(pf->vsi[vf->lan_vsi_idx]);
 
 error_param:
        /* send the response to the VF */
@@ -1927,20 +1927,18 @@ static int i40e_vc_add_mac_addr_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
        /* Lock once, because all function inside for loop accesses VSI's
         * MAC filter list which needs to be protected using same lock.
         */
-       spin_lock_bh(&vsi->mac_filter_list_lock);
+       spin_lock_bh(&vsi->mac_filter_hash_lock);
 
        /* add new addresses to the list */
        for (i = 0; i < al->num_elements; i++) {
                struct i40e_mac_filter *f;
 
-               f = i40e_find_mac(vsi, al->list[i].addr, true, false);
+               f = i40e_find_mac(vsi, al->list[i].addr);
                if (!f) {
                        if (i40e_is_vsi_in_vlan(vsi))
-                               f = i40e_put_mac_in_vlan(vsi, al->list[i].addr,
-                                                        true, false);
+                               f = i40e_put_mac_in_vlan(vsi, al->list[i].addr);
                        else
-                               f = i40e_add_filter(vsi, al->list[i].addr, -1,
-                                                   true, false);
+                               f = i40e_add_filter(vsi, al->list[i].addr, -1);
                }
 
                if (!f) {
@@ -1948,13 +1946,13 @@ static int i40e_vc_add_mac_addr_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
                                "Unable to add MAC filter %pM for VF %d\n",
                                 al->list[i].addr, vf->vf_id);
                        ret = I40E_ERR_PARAM;
-                       spin_unlock_bh(&vsi->mac_filter_list_lock);
+                       spin_unlock_bh(&vsi->mac_filter_hash_lock);
                        goto error_param;
                } else {
                        vf->num_mac++;
                }
        }
-       spin_unlock_bh(&vsi->mac_filter_list_lock);
+       spin_unlock_bh(&vsi->mac_filter_hash_lock);
 
        /* program the updated filter list */
        ret = i40e_sync_vsi_filters(vsi);
@@ -2003,18 +2001,18 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
        }
        vsi = pf->vsi[vf->lan_vsi_idx];
 
-       spin_lock_bh(&vsi->mac_filter_list_lock);
+       spin_lock_bh(&vsi->mac_filter_hash_lock);
        /* delete addresses from the list */
        for (i = 0; i < al->num_elements; i++)
-               if (i40e_del_mac_all_vlan(vsi, al->list[i].addr, true, false)) {
+               if (i40e_del_mac_all_vlan(vsi, al->list[i].addr)) {
                        ret = I40E_ERR_INVALID_MAC_ADDR;
-                       spin_unlock_bh(&vsi->mac_filter_list_lock);
+                       spin_unlock_bh(&vsi->mac_filter_hash_lock);
                        goto error_param;
                } else {
                        vf->num_mac--;
                }
 
-       spin_unlock_bh(&vsi->mac_filter_list_lock);
+       spin_unlock_bh(&vsi->mac_filter_hash_lock);
 
        /* program the updated filter list */
        ret = i40e_sync_vsi_filters(vsi);
@@ -2139,9 +2137,8 @@ static int i40e_vc_remove_vlan_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
        }
 
        for (i = 0; i < vfl->num_elements; i++) {
-               int ret = i40e_vsi_kill_vlan(vsi, vfl->vlan_id[i]);
-               if (!ret)
-                       vf->num_vlan--;
+               i40e_vsi_kill_vlan(vsi, vfl->vlan_id[i]);
+               vf->num_vlan--;
 
                if (test_bit(I40E_VF_STAT_UC_PROMISC, &vf->vf_states))
                        i40e_aq_set_vsi_uc_promisc_on_vlan(&pf->hw, vsi->seid,
@@ -2153,11 +2150,6 @@ static int i40e_vc_remove_vlan_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
                                                           false,
                                                           vfl->vlan_id[i],
                                                           NULL);
-
-               if (ret)
-                       dev_err(&pf->pdev->dev,
-                               "Unable to delete VLAN filter %d for VF %d, error %d\n",
-                               vfl->vlan_id[i], vf->vf_id, ret);
        }
 
 error_param:
@@ -2689,6 +2681,7 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
        struct i40e_mac_filter *f;
        struct i40e_vf *vf;
        int ret = 0;
+       int bkt;
 
        /* validate the request */
        if (vf_id >= pf->num_alloc_vfs) {
@@ -2715,23 +2708,22 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
        }
 
        /* Lock once because below invoked function add/del_filter requires
-        * mac_filter_list_lock to be held
+        * mac_filter_hash_lock to be held
         */
-       spin_lock_bh(&vsi->mac_filter_list_lock);
+       spin_lock_bh(&vsi->mac_filter_hash_lock);
 
        /* delete the temporary mac address */
        if (!is_zero_ether_addr(vf->default_lan_addr.addr))
                i40e_del_filter(vsi, vf->default_lan_addr.addr,
-                               vf->port_vlan_id ? vf->port_vlan_id : -1,
-                               true, false);
+                               vf->port_vlan_id ? vf->port_vlan_id : -1);
 
        /* Delete all the filters for this VSI - we're going to kill it
         * anyway.
         */
-       list_for_each_entry(f, &vsi->mac_filter_list, list)
-               i40e_del_filter(vsi, f->macaddr, f->vlan, true, false);
+       hash_for_each(vsi->mac_filter_hash, bkt, f, hlist)
+               i40e_del_filter(vsi, f->macaddr, f->vlan);
 
-       spin_unlock_bh(&vsi->mac_filter_list_lock);
+       spin_unlock_bh(&vsi->mac_filter_hash_lock);
 
        dev_info(&pf->pdev->dev, "Setting MAC %pM on VF %d\n", mac, vf_id);
        /* program mac filter */
@@ -2803,9 +2795,9 @@ int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, int vf_id,
                /* duplicate request, so just return success */
                goto error_pvid;
 
-       spin_lock_bh(&vsi->mac_filter_list_lock);
+       spin_lock_bh(&vsi->mac_filter_hash_lock);
        is_vsi_in_vlan = i40e_is_vsi_in_vlan(vsi);
-       spin_unlock_bh(&vsi->mac_filter_list_lock);
+       spin_unlock_bh(&vsi->mac_filter_hash_lock);
 
        if (le16_to_cpu(vsi->info.pvid) == 0 && is_vsi_in_vlan) {
                dev_err(&pf->pdev->dev,
@@ -2835,13 +2827,8 @@ int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, int vf_id,
 
        if (vsi->info.pvid) {
                /* kill old VLAN */
-               ret = i40e_vsi_kill_vlan(vsi, (le16_to_cpu(vsi->info.pvid) &
-                                              VLAN_VID_MASK));
-               if (ret) {
-                       dev_info(&vsi->back->pdev->dev,
-                                "remove VLAN failed, ret=%d, aq_err=%d\n",
-                                ret, pf->hw.aq.asq_last_status);
-               }
+               i40e_vsi_kill_vlan(vsi, (le16_to_cpu(vsi->info.pvid) &
+                                        VLAN_VID_MASK));
        }
        if (vlan_id || qos)
                ret = i40e_vsi_add_pvid(vsi, vlanprio);
@@ -2940,7 +2927,7 @@ int i40e_ndo_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate,
        }
 
        if (max_tx_rate > speed) {
-               dev_err(&pf->pdev->dev, "Invalid max tx rate %d specified for VF %d.",
+               dev_err(&pf->pdev->dev, "Invalid max tx rate %d specified for VF %d.\n",
                        max_tx_rate, vf->vf_id);
                ret = -EINVAL;
                goto error;
index 44f7ed7583dd15d1f11b49ef871628d22d23c144..96385156b82451e652eb95e0b5fbc2459b4836c3 100644 (file)
@@ -912,11 +912,11 @@ i40e_status i40evf_clean_arq_element(struct i40e_hw *hw,
        desc = I40E_ADMINQ_DESC(hw->aq.arq, ntc);
        desc_idx = ntc;
 
+       hw->aq.arq_last_status =
+               (enum i40e_admin_queue_err)le16_to_cpu(desc->retval);
        flags = le16_to_cpu(desc->flags);
        if (flags & I40E_AQ_FLAG_ERR) {
                ret_code = I40E_ERR_ADMIN_QUEUE_ERROR;
-               hw->aq.arq_last_status =
-                       (enum i40e_admin_queue_err)le16_to_cpu(desc->retval);
                i40e_debug(hw,
                           I40E_DEBUG_AQ_MESSAGE,
                           "AQRX: Event received with error 0x%X.\n",
index 75f2a2cdd738592795e387473eefab6c81215dc5..c4b174afd253d69ac2cd556e22d6ad2f46c318ad 100644 (file)
@@ -150,7 +150,7 @@ u32 i40evf_get_tx_pending(struct i40e_ring *ring, bool in_sw)
        return 0;
 }
 
-#define WB_STRIDE 0x3
+#define WB_STRIDE 4
 
 /**
  * i40e_clean_tx_irq - Reclaim resources after transmit completes
@@ -266,7 +266,7 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
                unsigned int j = i40evf_get_tx_pending(tx_ring, false);
 
                if (budget &&
-                   ((j / (WB_STRIDE + 1)) == 0) && (j > 0) &&
+                   ((j / WB_STRIDE) == 0) && (j > 0) &&
                    !test_bit(__I40E_DOWN, &vsi->state) &&
                    (I40E_DESC_UNUSED(tx_ring) != tx_ring->count))
                        tx_ring->arm_wb = true;
@@ -705,7 +705,6 @@ bool i40evf_alloc_rx_buffers(struct i40e_ring *rx_ring, u16 cleaned_count)
                 * because each write-back erases this info.
                 */
                rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset);
-               rx_desc->read.hdr_addr = 0;
 
                rx_desc++;
                bi++;
@@ -1209,7 +1208,6 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
        while (likely(total_rx_packets < budget)) {
                union i40e_rx_desc *rx_desc;
                struct sk_buff *skb;
-               u32 rx_status;
                u16 vlan_tag;
                u8 rx_ptype;
                u64 qword;
@@ -1223,21 +1221,13 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
 
                rx_desc = I40E_RX_DESC(rx_ring, rx_ring->next_to_clean);
 
-               qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
-               rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >>
-                          I40E_RXD_QW1_PTYPE_SHIFT;
-               rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >>
-                           I40E_RXD_QW1_STATUS_SHIFT;
-
-               if (!(rx_status & BIT(I40E_RX_DESC_STATUS_DD_SHIFT)))
-                       break;
-
                /* status_error_len will always be zero for unused descriptors
                 * because it's cleared in cleanup, and overlaps with hdr_addr
                 * which is always zero because packet split isn't used, if the
                 * hardware wrote DD then it will be non-zero
                 */
-               if (!rx_desc->wb.qword1.status_error_len)
+               if (!i40e_test_staterr(rx_desc,
+                                      BIT(I40E_RX_DESC_STATUS_DD_SHIFT)))
                        break;
 
                /* This memory barrier is needed to keep us from reading
@@ -1271,6 +1261,10 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
                /* probably a little skewed due to removing CRC */
                total_rx_bytes += skb->len;
 
+               qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
+               rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >>
+                          I40E_RXD_QW1_PTYPE_SHIFT;
+
                /* populate checksum, VLAN, and protocol */
                i40evf_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype);
 
@@ -1461,12 +1455,24 @@ int i40evf_napi_poll(struct napi_struct *napi, int budget)
 
        /* If work not completed, return budget and polling will return */
        if (!clean_complete) {
+               const cpumask_t *aff_mask = &q_vector->affinity_mask;
+               int cpu_id = smp_processor_id();
+
+               /* It is possible that the interrupt affinity has changed but,
+                * if the cpu is pegged at 100%, polling will never exit while
+                * traffic continues and the interrupt will be stuck on this
+                * cpu.  We check to make sure affinity is correct before we
+                * continue to poll, otherwise we must stop polling so the
+                * interrupt can move to the correct cpu.
+                */
+               if (likely(cpumask_test_cpu(cpu_id, aff_mask))) {
 tx_only:
-               if (arm_wb) {
-                       q_vector->tx.ring[0].tx_stats.tx_force_wb++;
-                       i40e_enable_wb_on_itr(vsi, q_vector);
+                       if (arm_wb) {
+                               q_vector->tx.ring[0].tx_stats.tx_force_wb++;
+                               i40e_enable_wb_on_itr(vsi, q_vector);
+                       }
+                       return budget;
                }
-               return budget;
        }
 
        if (vsi->back->flags & I40E_TXR_FLAGS_WB_ON_ITR)
@@ -1474,7 +1480,16 @@ tx_only:
 
        /* Work is done so exit the polling mode and re-enable the interrupt */
        napi_complete_done(napi, work_done);
-       i40e_update_enable_itr(vsi, q_vector);
+
+       /* If we're prematurely stopping polling to fix the interrupt
+        * affinity we want to make sure polling starts back up so we
+        * issue a call to i40evf_force_wb which triggers a SW interrupt.
+        */
+       if (!clean_complete)
+               i40evf_force_wb(vsi, q_vector);
+       else
+               i40e_update_enable_itr(vsi, q_vector);
+
        return 0;
 }
 
@@ -1935,9 +1950,7 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
        u32 td_tag = 0;
        dma_addr_t dma;
        u16 gso_segs;
-       u16 desc_count = 0;
-       bool tail_bump = true;
-       bool do_rs = false;
+       u16 desc_count = 1;
 
        if (tx_flags & I40E_TX_FLAGS_HW_VLAN) {
                td_cmd |= I40E_TX_DESC_CMD_IL2TAG1;
@@ -2020,8 +2033,7 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
                tx_bi = &tx_ring->tx_bi[i];
        }
 
-       /* set next_to_watch value indicating a packet is present */
-       first->next_to_watch = tx_desc;
+       netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
 
        i++;
        if (i == tx_ring->count)
@@ -2029,66 +2041,72 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
 
        tx_ring->next_to_use = i;
 
-       netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
        i40e_maybe_stop_tx(tx_ring, DESC_NEEDED);
 
+       /* write last descriptor with EOP bit */
+       td_cmd |= I40E_TX_DESC_CMD_EOP;
+
+       /* We can OR these values together as they both are checked against
+        * 4 below and at this point desc_count will be used as a boolean value
+        * after this if/else block.
+        */
+       desc_count |= ++tx_ring->packet_stride;
+
        /* Algorithm to optimize tail and RS bit setting:
-        * if xmit_more is supported
-        *      if xmit_more is true
-        *              do not update tail and do not mark RS bit.
-        *      if xmit_more is false and last xmit_more was false
-        *              if every packet spanned less than 4 desc
-        *                      then set RS bit on 4th packet and update tail
-        *                      on every packet
-        *              else
-        *                      update tail and set RS bit on every packet.
-        *      if xmit_more is false and last_xmit_more was true
-        *              update tail and set RS bit.
+        * if queue is stopped
+        *      mark RS bit
+        *      reset packet counter
+        * else if xmit_more is supported and is true
+        *      advance packet counter to 4
+        *      reset desc_count to 0
         *
-        * Optimization: wmb to be issued only in case of tail update.
-        * Also optimize the Descriptor WB path for RS bit with the same
-        * algorithm.
+        * if desc_count >= 4
+        *      mark RS bit
+        *      reset packet counter
+        * if desc_count > 0
+        *      update tail
         *
-        * Note: If there are less than 4 packets
+        * Note: If there are less than 4 descriptors
         * pending and interrupts were disabled the service task will
         * trigger a force WB.
         */
-       if (skb->xmit_more  &&
-           !netif_xmit_stopped(txring_txq(tx_ring))) {
-               tx_ring->flags |= I40E_TXR_FLAGS_LAST_XMIT_MORE_SET;
-               tail_bump = false;
-       } else if (!skb->xmit_more &&
-                  !netif_xmit_stopped(txring_txq(tx_ring)) &&
-                  (!(tx_ring->flags & I40E_TXR_FLAGS_LAST_XMIT_MORE_SET)) &&
-                  (tx_ring->packet_stride < WB_STRIDE) &&
-                  (desc_count < WB_STRIDE)) {
-               tx_ring->packet_stride++;
-       } else {
+       if (netif_xmit_stopped(txring_txq(tx_ring))) {
+               goto do_rs;
+       } else if (skb->xmit_more) {
+               /* set stride to arm on next packet and reset desc_count */
+               tx_ring->packet_stride = WB_STRIDE;
+               desc_count = 0;
+       } else if (desc_count >= WB_STRIDE) {
+do_rs:
+               /* write last descriptor with RS bit set */
+               td_cmd |= I40E_TX_DESC_CMD_RS;
                tx_ring->packet_stride = 0;
-               tx_ring->flags &= ~I40E_TXR_FLAGS_LAST_XMIT_MORE_SET;
-               do_rs = true;
        }
-       if (do_rs)
-               tx_ring->packet_stride = 0;
 
        tx_desc->cmd_type_offset_bsz =
-                       build_ctob(td_cmd, td_offset, size, td_tag) |
-                       cpu_to_le64((u64)(do_rs ? I40E_TXD_CMD :
-                                                 I40E_TX_DESC_CMD_EOP) <<
-                                                 I40E_TXD_QW1_CMD_SHIFT);
+                       build_ctob(td_cmd, td_offset, size, td_tag);
+
+       /* Force memory writes to complete before letting h/w know there
+        * are new descriptors to fetch.
+        *
+        * We also use this memory barrier to make certain all of the
+        * status bits have been updated before next_to_watch is written.
+        */
+       wmb();
+
+       /* set next_to_watch value indicating a packet is present */
+       first->next_to_watch = tx_desc;
 
        /* notify HW of packet */
-       if (!tail_bump) {
-               prefetchw(tx_desc + 1);
-       } else {
-               /* Force memory writes to complete before letting h/w
-                * know there are new descriptors to fetch.  (Only
-                * applicable for weak-ordered memory model archs,
-                * such as IA-64).
-                */
-               wmb();
+       if (desc_count) {
                writel(i, tx_ring->tail);
+
+               /* we need this if more than one processor can write to our tail
+                * at a time, it synchronizes IO on IA64/Altix systems
+                */
+               mmiowb();
        }
+
        return;
 
 dma_error:
index abcdecabbc560b471ad857d9f96840d1c7248b54..a586e19cfd1d77b0e7ad25d167760a95d61110c4 100644 (file)
@@ -309,7 +309,6 @@ struct i40e_ring {
        bool ring_active;               /* is ring online or not */
        bool arm_wb;            /* do something to arm write back */
        u8 packet_stride;
-#define I40E_TXR_FLAGS_LAST_XMIT_MORE_SET BIT(2)
 
        u16 flags;
 #define I40E_TXR_FLAGS_WB_ON_ITR       BIT(0)
index 97f96e0d9c4c4000cc8d003129f09c1fb93f3219..ca7afe59c55f7fd8ca0b73cf8b24f0af3da4686c 100644 (file)
@@ -348,6 +348,7 @@ enum i40e_nvmupd_state {
        I40E_NVMUPD_STATE_WRITING,
        I40E_NVMUPD_STATE_INIT_WAIT,
        I40E_NVMUPD_STATE_WRITE_WAIT,
+       I40E_NVMUPD_STATE_ERROR
 };
 
 /* nvm_access definition and its masks/shifts need to be accessible to
index bd691ad86673d21bcd70600226126b36136a42c1..fc374f833aa9a930f2d119c306dd2547209884af 100644 (file)
@@ -162,6 +162,10 @@ struct i40e_virtchnl_vsi_resource {
 #define I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF                0X00080000
 #define I40E_VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM    0X00100000
 
+#define I40E_VF_BASE_MODE_OFFLOADS (I40E_VIRTCHNL_VF_OFFLOAD_L2 | \
+                                   I40E_VIRTCHNL_VF_OFFLOAD_VLAN | \
+                                   I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF)
+
 struct i40e_virtchnl_vf_resource {
        u16 num_vsis;
        u16 num_queue_pairs;
index c5fd724313c7f786bf1912dc200a5174e646eae1..fffe4cf2c20b302df75a830877719dc915254687 100644 (file)
@@ -107,7 +107,8 @@ struct i40e_q_vector {
        int v_idx;      /* vector index in list */
        char name[IFNAMSIZ + 9];
        bool arm_wb_state;
-       cpumask_var_t affinity_mask;
+       cpumask_t affinity_mask;
+       struct irq_affinity_notify affinity_notify;
 };
 
 /* Helper macros to switch between ints/sec and what the register uses.
index 777eb29e4ff75f73211876d4bf2c640f3a68db46..db36744c6691f941085bd97b3df46c8bec412cea 100644 (file)
@@ -38,7 +38,7 @@ static const char i40evf_driver_string[] =
 
 #define DRV_VERSION_MAJOR 1
 #define DRV_VERSION_MINOR 6
-#define DRV_VERSION_BUILD 16
+#define DRV_VERSION_BUILD 21
 #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \
             __stringify(DRV_VERSION_MINOR) "." \
             __stringify(DRV_VERSION_BUILD) \
@@ -495,6 +495,33 @@ static void i40evf_netpoll(struct net_device *netdev)
 }
 
 #endif
+/**
+ * i40evf_irq_affinity_notify - Callback for affinity changes
+ * @notify: context as to what irq was changed
+ * @mask: the new affinity mask
+ *
+ * This is a callback function used by the irq_set_affinity_notifier function
+ * so that we may register to receive changes to the irq affinity masks.
+ **/
+static void i40evf_irq_affinity_notify(struct irq_affinity_notify *notify,
+                                      const cpumask_t *mask)
+{
+       struct i40e_q_vector *q_vector =
+               container_of(notify, struct i40e_q_vector, affinity_notify);
+
+       q_vector->affinity_mask = *mask;
+}
+
+/**
+ * i40evf_irq_affinity_release - Callback for affinity notifier release
+ * @ref: internal core kernel usage
+ *
+ * This is a callback function used by the irq_set_affinity_notifier function
+ * to inform the current notification subscriber that they will no longer
+ * receive notifications.
+ **/
+static void i40evf_irq_affinity_release(struct kref *ref) {}
+
 /**
  * i40evf_request_traffic_irqs - Initialize MSI-X interrupts
  * @adapter: board private structure
@@ -507,6 +534,7 @@ i40evf_request_traffic_irqs(struct i40evf_adapter *adapter, char *basename)
 {
        int vector, err, q_vectors;
        int rx_int_idx = 0, tx_int_idx = 0;
+       int irq_num;
 
        i40evf_irq_disable(adapter);
        /* Decrement for Other and TCP Timer vectors */
@@ -514,6 +542,7 @@ i40evf_request_traffic_irqs(struct i40evf_adapter *adapter, char *basename)
 
        for (vector = 0; vector < q_vectors; vector++) {
                struct i40e_q_vector *q_vector = &adapter->q_vectors[vector];
+               irq_num = adapter->msix_entries[vector + NONQ_VECS].vector;
 
                if (q_vector->tx.ring && q_vector->rx.ring) {
                        snprintf(q_vector->name, sizeof(q_vector->name) - 1,
@@ -532,21 +561,23 @@ i40evf_request_traffic_irqs(struct i40evf_adapter *adapter, char *basename)
                        /* skip this unused q_vector */
                        continue;
                }
-               err = request_irq(
-                       adapter->msix_entries[vector + NONQ_VECS].vector,
-                       i40evf_msix_clean_rings,
-                       0,
-                       q_vector->name,
-                       q_vector);
+               err = request_irq(irq_num,
+                                 i40evf_msix_clean_rings,
+                                 0,
+                                 q_vector->name,
+                                 q_vector);
                if (err) {
                        dev_info(&adapter->pdev->dev,
                                 "Request_irq failed, error: %d\n", err);
                        goto free_queue_irqs;
                }
+               /* register for affinity change notifications */
+               q_vector->affinity_notify.notify = i40evf_irq_affinity_notify;
+               q_vector->affinity_notify.release =
+                                                  i40evf_irq_affinity_release;
+               irq_set_affinity_notifier(irq_num, &q_vector->affinity_notify);
                /* assign the mask for this irq */
-               irq_set_affinity_hint(
-                       adapter->msix_entries[vector + NONQ_VECS].vector,
-                       q_vector->affinity_mask);
+               irq_set_affinity_hint(irq_num, &q_vector->affinity_mask);
        }
 
        return 0;
@@ -554,11 +585,10 @@ i40evf_request_traffic_irqs(struct i40evf_adapter *adapter, char *basename)
 free_queue_irqs:
        while (vector) {
                vector--;
-               irq_set_affinity_hint(
-                       adapter->msix_entries[vector + NONQ_VECS].vector,
-                       NULL);
-               free_irq(adapter->msix_entries[vector + NONQ_VECS].vector,
-                        &adapter->q_vectors[vector]);
+               irq_num = adapter->msix_entries[vector + NONQ_VECS].vector;
+               irq_set_affinity_notifier(irq_num, NULL);
+               irq_set_affinity_hint(irq_num, NULL);
+               free_irq(irq_num, &adapter->q_vectors[vector]);
        }
        return err;
 }
@@ -599,16 +629,15 @@ static int i40evf_request_misc_irq(struct i40evf_adapter *adapter)
  **/
 static void i40evf_free_traffic_irqs(struct i40evf_adapter *adapter)
 {
-       int i;
-       int q_vectors;
+       int vector, irq_num, q_vectors;
 
        q_vectors = adapter->num_msix_vectors - NONQ_VECS;
 
-       for (i = 0; i < q_vectors; i++) {
-               irq_set_affinity_hint(adapter->msix_entries[i+1].vector,
-                                     NULL);
-               free_irq(adapter->msix_entries[i+1].vector,
-                        &adapter->q_vectors[i]);
+       for (vector = 0; vector < q_vectors; vector++) {
+               irq_num = adapter->msix_entries[vector + NONQ_VECS].vector;
+               irq_set_affinity_notifier(irq_num, NULL);
+               irq_set_affinity_hint(irq_num, NULL);
+               free_irq(irq_num, &adapter->q_vectors[vector]);
        }
 }
 
@@ -1717,15 +1746,17 @@ static void i40evf_reset_task(struct work_struct *work)
 
        /* wait until the reset is complete and the PF is responding to us */
        for (i = 0; i < I40EVF_RESET_WAIT_COUNT; i++) {
+               /* sleep first to make sure a minimum wait time is met */
+               msleep(I40EVF_RESET_WAIT_MS);
+
                reg_val = rd32(hw, I40E_VFGEN_RSTAT) &
                          I40E_VFGEN_RSTAT_VFR_STATE_MASK;
                if (reg_val == I40E_VFR_VFACTIVE)
                        break;
-               msleep(I40EVF_RESET_WAIT_MS);
        }
+
        pci_set_master(adapter->pdev);
-       /* extra wait to make sure minimum wait is met */
-       msleep(I40EVF_RESET_WAIT_MS);
+
        if (i == I40EVF_RESET_WAIT_COUNT) {
                struct i40evf_mac_filter *ftmp;
                struct i40evf_vlan_filter *fv, *fvtmp;
index a7895c4cbcc3e09023e8901a33d420427548071b..c30eea8399a7dfb4de23717d33e8169e054cd600 100644 (file)
@@ -226,7 +226,7 @@ static int igb_ptp_adjfreq_82576(struct ptp_clock_info *ptp, s32 ppb)
        return 0;
 }
 
-static int igb_ptp_adjfreq_82580(struct ptp_clock_info *ptp, s32 ppb)
+static int igb_ptp_adjfine_82580(struct ptp_clock_info *ptp, long scaled_ppm)
 {
        struct igb_adapter *igb = container_of(ptp, struct igb_adapter,
                                               ptp_caps);
@@ -235,13 +235,13 @@ static int igb_ptp_adjfreq_82580(struct ptp_clock_info *ptp, s32 ppb)
        u64 rate;
        u32 inca;
 
-       if (ppb < 0) {
+       if (scaled_ppm < 0) {
                neg_adj = 1;
-               ppb = -ppb;
+               scaled_ppm = -scaled_ppm;
        }
-       rate = ppb;
-       rate <<= 26;
-       rate = div_u64(rate, 1953125);
+       rate = scaled_ppm;
+       rate <<= 13;
+       rate = div_u64(rate, 15625);
 
        inca = rate & INCVALUE_MASK;
        if (neg_adj)
@@ -1103,7 +1103,7 @@ void igb_ptp_init(struct igb_adapter *adapter)
                adapter->ptp_caps.max_adj = 62499999;
                adapter->ptp_caps.n_ext_ts = 0;
                adapter->ptp_caps.pps = 0;
-               adapter->ptp_caps.adjfreq = igb_ptp_adjfreq_82580;
+               adapter->ptp_caps.adjfine = igb_ptp_adjfine_82580;
                adapter->ptp_caps.adjtime = igb_ptp_adjtime_82576;
                adapter->ptp_caps.gettime64 = igb_ptp_gettime_82576;
                adapter->ptp_caps.settime64 = igb_ptp_settime_82576;
@@ -1131,7 +1131,7 @@ void igb_ptp_init(struct igb_adapter *adapter)
                adapter->ptp_caps.n_pins = IGB_N_SDP;
                adapter->ptp_caps.pps = 1;
                adapter->ptp_caps.pin_config = adapter->sdp_config;
-               adapter->ptp_caps.adjfreq = igb_ptp_adjfreq_82580;
+               adapter->ptp_caps.adjfine = igb_ptp_adjfine_82580;
                adapter->ptp_caps.adjtime = igb_ptp_adjtime_i210;
                adapter->ptp_caps.gettime64 = igb_ptp_gettime_i210;
                adapter->ptp_caps.settime64 = igb_ptp_settime_i210;
index b06e32d0d22af4dc0c9bc3cfbc5fc184da2d6c57..ef81c3d8c2952fa305390232bf9e93a901f402f2 100644 (file)
@@ -1027,4 +1027,6 @@ netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb,
                                  struct ixgbe_ring *tx_ring);
 u32 ixgbe_rss_indir_tbl_entries(struct ixgbe_adapter *adapter);
 void ixgbe_store_reta(struct ixgbe_adapter *adapter);
+s32 ixgbe_negotiate_fc(struct ixgbe_hw *hw, u32 adv_reg, u32 lp_reg,
+                      u32 adv_sym, u32 adv_asm, u32 lp_sym, u32 lp_asm);
 #endif /* _IXGBE_H_ */
index fb51be74dd4c2f790cd9636439652ad3ad283c55..805ab319e578ef16fc90a046518b54086bd966ce 100644 (file)
@@ -367,7 +367,7 @@ static s32 ixgbe_fc_enable_82598(struct ixgbe_hw *hw)
        }
 
        /* Negotiate the fc mode to use */
-       ixgbe_fc_autoneg(hw);
+       hw->mac.ops.fc_autoneg(hw);
 
        /* Disable any previous flow control settings */
        fctrl_reg = IXGBE_READ_REG(hw, IXGBE_FCTRL);
@@ -1179,6 +1179,7 @@ static const struct ixgbe_mac_operations mac_ops_82598 = {
        .get_link_capabilities  = &ixgbe_get_link_capabilities_82598,
        .led_on                 = &ixgbe_led_on_generic,
        .led_off                = &ixgbe_led_off_generic,
+       .init_led_link_act      = ixgbe_init_led_link_act_generic,
        .blink_led_start        = &ixgbe_blink_led_start_generic,
        .blink_led_stop         = &ixgbe_blink_led_stop_generic,
        .set_rar                = &ixgbe_set_rar_generic,
@@ -1193,6 +1194,7 @@ static const struct ixgbe_mac_operations mac_ops_82598 = {
        .set_vfta               = &ixgbe_set_vfta_82598,
        .fc_enable              = &ixgbe_fc_enable_82598,
        .setup_fc               = ixgbe_setup_fc_generic,
+       .fc_autoneg             = ixgbe_fc_autoneg,
        .set_fw_drv_ver         = NULL,
        .acquire_swfw_sync      = &ixgbe_acquire_swfw_sync,
        .release_swfw_sync      = &ixgbe_release_swfw_sync,
index 63b25006ac90393d6ccdebab051739b706ccc36c..e00aaeb9182740f84c1a6a1ba48732f38f2df5fd 100644 (file)
@@ -2204,6 +2204,7 @@ static const struct ixgbe_mac_operations mac_ops_82599 = {
        .get_link_capabilities  = &ixgbe_get_link_capabilities_82599,
        .led_on                 = &ixgbe_led_on_generic,
        .led_off                = &ixgbe_led_off_generic,
+       .init_led_link_act      = ixgbe_init_led_link_act_generic,
        .blink_led_start        = &ixgbe_blink_led_start_generic,
        .blink_led_stop         = &ixgbe_blink_led_stop_generic,
        .set_rar                = &ixgbe_set_rar_generic,
@@ -2219,6 +2220,7 @@ static const struct ixgbe_mac_operations mac_ops_82599 = {
        .set_vfta               = &ixgbe_set_vfta_generic,
        .fc_enable              = &ixgbe_fc_enable_generic,
        .setup_fc               = ixgbe_setup_fc_generic,
+       .fc_autoneg             = ixgbe_fc_autoneg,
        .set_fw_drv_ver         = &ixgbe_set_fw_drv_ver_generic,
        .init_uta_tables        = &ixgbe_init_uta_tables_generic,
        .setup_sfp              = &ixgbe_setup_sfp_modules_82599,
index 77d3039283f67ce380e93b660c4dfe264888ab42..8832df3eba255c9b99c2110f8d7bcf6f94111071 100644 (file)
@@ -298,10 +298,12 @@ s32 ixgbe_start_hw_generic(struct ixgbe_hw *hw)
        IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);
        IXGBE_WRITE_FLUSH(hw);
 
-       /* Setup flow control */
-       ret_val = hw->mac.ops.setup_fc(hw);
-       if (ret_val)
-               return ret_val;
+       /* Setup flow control if method for doing so */
+       if (hw->mac.ops.setup_fc) {
+               ret_val = hw->mac.ops.setup_fc(hw);
+               if (ret_val)
+                       return ret_val;
+       }
 
        /* Cashe bit indicating need for crosstalk fix */
        switch (hw->mac.type) {
@@ -390,6 +392,9 @@ s32 ixgbe_init_hw_generic(struct ixgbe_hw *hw)
                status = hw->mac.ops.start_hw(hw);
        }
 
+       /* Initialize the LED link active for LED blink support */
+       hw->mac.ops.init_led_link_act(hw);
+
        return status;
 }
 
@@ -772,6 +777,49 @@ s32 ixgbe_stop_adapter_generic(struct ixgbe_hw *hw)
        return ixgbe_disable_pcie_master(hw);
 }
 
+/**
+ *  ixgbe_init_led_link_act_generic - Store the LED index link/activity.
+ *  @hw: pointer to hardware structure
+ *
+ *  Store the index for the link active LED. This will be used to support
+ *  blinking the LED.
+ **/
+s32 ixgbe_init_led_link_act_generic(struct ixgbe_hw *hw)
+{
+       struct ixgbe_mac_info *mac = &hw->mac;
+       u32 led_reg, led_mode;
+       u16 i;
+
+       led_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL);
+
+       /* Get LED link active from the LEDCTL register */
+       for (i = 0; i < 4; i++) {
+               led_mode = led_reg >> IXGBE_LED_MODE_SHIFT(i);
+
+               if ((led_mode & IXGBE_LED_MODE_MASK_BASE) ==
+                   IXGBE_LED_LINK_ACTIVE) {
+                       mac->led_link_act = i;
+                       return 0;
+               }
+       }
+
+       /* If LEDCTL register does not have the LED link active set, then use
+        * known MAC defaults.
+        */
+       switch (hw->mac.type) {
+       case ixgbe_mac_x550em_a:
+               mac->led_link_act = 0;
+               break;
+       case ixgbe_mac_X550EM_x:
+               mac->led_link_act = 1;
+               break;
+       default:
+               mac->led_link_act = 2;
+       }
+
+       return 0;
+}
+
 /**
  *  ixgbe_led_on_generic - Turns on the software controllable LEDs.
  *  @hw: pointer to hardware structure
@@ -2127,7 +2175,7 @@ s32 ixgbe_fc_enable_generic(struct ixgbe_hw *hw)
        }
 
        /* Negotiate the fc mode to use */
-       ixgbe_fc_autoneg(hw);
+       hw->mac.ops.fc_autoneg(hw);
 
        /* Disable any previous flow control settings */
        mflcn_reg = IXGBE_READ_REG(hw, IXGBE_MFLCN);
@@ -2231,8 +2279,8 @@ s32 ixgbe_fc_enable_generic(struct ixgbe_hw *hw)
  *  Find the intersection between advertised settings and link partner's
  *  advertised settings
  **/
-static s32 ixgbe_negotiate_fc(struct ixgbe_hw *hw, u32 adv_reg, u32 lp_reg,
-                             u32 adv_sym, u32 adv_asm, u32 lp_sym, u32 lp_asm)
+s32 ixgbe_negotiate_fc(struct ixgbe_hw *hw, u32 adv_reg, u32 lp_reg,
+                      u32 adv_sym, u32 adv_asm, u32 lp_sym, u32 lp_asm)
 {
        if ((!(adv_reg)) ||  (!(lp_reg)))
                return IXGBE_ERR_FC_NOT_NEGOTIATED;
index 6d4c260d0cbdf9e31a8d09e55b5231323a12430b..5b3e3c65927e58b9e4ca602731796d96af0b1769 100644 (file)
@@ -49,6 +49,7 @@ s32 ixgbe_stop_adapter_generic(struct ixgbe_hw *hw);
 
 s32 ixgbe_led_on_generic(struct ixgbe_hw *hw, u32 index);
 s32 ixgbe_led_off_generic(struct ixgbe_hw *hw, u32 index);
+s32 ixgbe_init_led_link_act_generic(struct ixgbe_hw *hw);
 
 s32 ixgbe_init_eeprom_params_generic(struct ixgbe_hw *hw);
 s32 ixgbe_write_eeprom_generic(struct ixgbe_hw *hw, u16 offset, u16 data);
index f49f80380aa57325ebb6c6c80f1591e7ee23e419..fd192bf29b26bae2a30a564aef486052d7c39a95 100644 (file)
@@ -2225,11 +2225,11 @@ static int ixgbe_set_phys_id(struct net_device *netdev,
                return 2;
 
        case ETHTOOL_ID_ON:
-               hw->mac.ops.led_on(hw, hw->bus.func);
+               hw->mac.ops.led_on(hw, hw->mac.led_link_act);
                break;
 
        case ETHTOOL_ID_OFF:
-               hw->mac.ops.led_off(hw, hw->bus.func);
+               hw->mac.ops.led_off(hw, hw->mac.led_link_act);
                break;
 
        case ETHTOOL_ID_INACTIVE:
index cbd2cfa1b154bb90a90290ba4c3756132473ba79..2436984481ccb5329e6b7d10ec8c5f48c8cd68be 100644 (file)
@@ -54,6 +54,7 @@
 #include <net/pkt_cls.h>
 #include <net/tc_act/tc_gact.h>
 #include <net/tc_act/tc_mirred.h>
+#include <net/vxlan.h>
 
 #include "ixgbe.h"
 #include "ixgbe_common.h"
@@ -3070,6 +3071,9 @@ static void ixgbe_free_irq(struct ixgbe_adapter *adapter)
                return;
        }
 
+       if (!adapter->msix_entries)
+               return;
+
        for (vector = 0; vector < adapter->num_q_vectors; vector++) {
                struct ixgbe_q_vector *q_vector = adapter->q_vector[vector];
                struct msix_entry *entry = &adapter->msix_entries[vector];
@@ -5621,7 +5625,8 @@ static void ixgbe_init_dcb(struct ixgbe_adapter *adapter)
  * Fields are initialized based on PCI device information and
  * OS network device settings (MTU size).
  **/
-static int ixgbe_sw_init(struct ixgbe_adapter *adapter)
+static int ixgbe_sw_init(struct ixgbe_adapter *adapter,
+                        const struct ixgbe_info *ii)
 {
        struct ixgbe_hw *hw = &adapter->hw;
        struct pci_dev *pdev = adapter->pdev;
@@ -5637,6 +5642,9 @@ static int ixgbe_sw_init(struct ixgbe_adapter *adapter)
        hw->subsystem_vendor_id = pdev->subsystem_vendor;
        hw->subsystem_device_id = pdev->subsystem_device;
 
+       /* get_invariants needs the device IDs */
+       ii->get_invariants(hw);
+
        /* Set common capability flags and settings */
        rss = min_t(int, ixgbe_max_rss_indices(adapter), num_online_cpus());
        adapter->ring_feature[RING_F_RSS].limit = rss;
@@ -7653,11 +7661,17 @@ static void ixgbe_atr(struct ixgbe_ring *ring,
        /* snag network header to get L4 type and address */
        skb = first->skb;
        hdr.network = skb_network_header(skb);
+       if (unlikely(hdr.network <= skb->data))
+               return;
        if (skb->encapsulation &&
            first->protocol == htons(ETH_P_IP) &&
-           hdr.ipv4->protocol != IPPROTO_UDP) {
+           hdr.ipv4->protocol == IPPROTO_UDP) {
                struct ixgbe_adapter *adapter = q_vector->adapter;
 
+               if (unlikely(skb_tail_pointer(skb) < hdr.network +
+                            VXLAN_HEADROOM))
+                       return;
+
                /* verify the port is recognized as VXLAN */
                if (adapter->vxlan_port &&
                    udp_hdr(skb)->dest == adapter->vxlan_port)
@@ -7668,6 +7682,12 @@ static void ixgbe_atr(struct ixgbe_ring *ring,
                        hdr.network = skb_inner_network_header(skb);
        }
 
+       /* Make sure we have at least [minimum IPv4 header + TCP]
+        * or [IPv6 header] bytes
+        */
+       if (unlikely(skb_tail_pointer(skb) < hdr.network + 40))
+               return;
+
        /* Currently only IPv4/IPv6 with TCP is supported */
        switch (hdr.ipv4->version) {
        case IPVERSION:
@@ -7687,6 +7707,10 @@ static void ixgbe_atr(struct ixgbe_ring *ring,
        if (l4_proto != IPPROTO_TCP)
                return;
 
+       if (unlikely(skb_tail_pointer(skb) < hdr.network +
+                    hlen + sizeof(struct tcphdr)))
+               return;
+
        th = (struct tcphdr *)(hdr.network + hlen);
 
        /* skip this packet since the socket is closing */
@@ -9162,10 +9186,14 @@ static void *ixgbe_fwd_add(struct net_device *pdev, struct net_device *vdev)
                goto fwd_add_err;
        fwd_adapter->pool = pool;
        fwd_adapter->real_adapter = adapter;
-       err = ixgbe_fwd_ring_up(vdev, fwd_adapter);
-       if (err)
-               goto fwd_add_err;
-       netif_tx_start_all_queues(vdev);
+
+       if (netif_running(pdev)) {
+               err = ixgbe_fwd_ring_up(vdev, fwd_adapter);
+               if (err)
+                       goto fwd_add_err;
+               netif_tx_start_all_queues(vdev);
+       }
+
        return fwd_adapter;
 fwd_add_err:
        /* unwind counter and free adapter struct */
@@ -9500,6 +9528,8 @@ static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        hw->mac.ops   = *ii->mac_ops;
        hw->mac.type  = ii->mac;
        hw->mvals     = ii->mvals;
+       if (ii->link_ops)
+               hw->link.ops  = *ii->link_ops;
 
        /* EEPROM */
        hw->eeprom.ops = *ii->eeprom_ops;
@@ -9523,10 +9553,8 @@ static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        hw->phy.mdio.mdio_read = ixgbe_mdio_read;
        hw->phy.mdio.mdio_write = ixgbe_mdio_write;
 
-       ii->get_invariants(hw);
-
        /* setup the private structure */
-       err = ixgbe_sw_init(adapter);
+       err = ixgbe_sw_init(adapter, ii);
        if (err)
                goto err_sw_init;
 
index 021ab9b89c71e30cd8262118e051ab9aaf672ec4..3b8362085f57b15dba5b9ff5e8c61bf41bec3713 100644 (file)
@@ -109,8 +109,8 @@ static u8 ixgbe_ones_comp_byte_add(u8 add1, u8 add2)
  *
  *  Returns an error code on error.
  */
-static s32 ixgbe_read_i2c_combined_generic_int(struct ixgbe_hw *hw, u8 addr,
-                                              u16 reg, u16 *val, bool lock)
+s32 ixgbe_read_i2c_combined_generic_int(struct ixgbe_hw *hw, u8 addr,
+                                       u16 reg, u16 *val, bool lock)
 {
        u32 swfw_mask = hw->phy.phy_semaphore_mask;
        int max_retry = 10;
@@ -177,36 +177,6 @@ fail:
        return IXGBE_ERR_I2C;
 }
 
-/**
- *  ixgbe_read_i2c_combined_generic - Perform I2C read combined operation
- *  @hw: pointer to the hardware structure
- *  @addr: I2C bus address to read from
- *  @reg: I2C device register to read from
- *  @val: pointer to location to receive read value
- *
- *  Returns an error code on error.
- */
-s32 ixgbe_read_i2c_combined_generic(struct ixgbe_hw *hw, u8 addr,
-                                   u16 reg, u16 *val)
-{
-       return ixgbe_read_i2c_combined_generic_int(hw, addr, reg, val, true);
-}
-
-/**
- *  ixgbe_read_i2c_combined_generic_unlocked - Unlocked I2C read combined
- *  @hw: pointer to the hardware structure
- *  @addr: I2C bus address to read from
- *  @reg: I2C device register to read from
- *  @val: pointer to location to receive read value
- *
- *  Returns an error code on error.
- */
-s32 ixgbe_read_i2c_combined_generic_unlocked(struct ixgbe_hw *hw, u8 addr,
-                                            u16 reg, u16 *val)
-{
-       return ixgbe_read_i2c_combined_generic_int(hw, addr, reg, val, false);
-}
-
 /**
  *  ixgbe_write_i2c_combined_generic_int - Perform I2C write combined operation
  *  @hw: pointer to the hardware structure
@@ -217,8 +187,8 @@ s32 ixgbe_read_i2c_combined_generic_unlocked(struct ixgbe_hw *hw, u8 addr,
  *
  *  Returns an error code on error.
  */
-static s32 ixgbe_write_i2c_combined_generic_int(struct ixgbe_hw *hw, u8 addr,
-                                               u16 reg, u16 val, bool lock)
+s32 ixgbe_write_i2c_combined_generic_int(struct ixgbe_hw *hw, u8 addr,
+                                        u16 reg, u16 val, bool lock)
 {
        u32 swfw_mask = hw->phy.phy_semaphore_mask;
        int max_retry = 1;
@@ -273,33 +243,39 @@ fail:
 }
 
 /**
- *  ixgbe_write_i2c_combined_generic - Perform I2C write combined operation
- *  @hw: pointer to the hardware structure
- *  @addr: I2C bus address to write to
- *  @reg: I2C device register to write to
- *  @val: value to write
+ *  ixgbe_probe_phy - Probe a single address for a PHY
+ *  @hw: pointer to hardware structure
+ *  @phy_addr: PHY address to probe
  *
- *  Returns an error code on error.
- */
-s32 ixgbe_write_i2c_combined_generic(struct ixgbe_hw *hw,
-                                    u8 addr, u16 reg, u16 val)
+ *  Returns true if PHY found
+ **/
+static bool ixgbe_probe_phy(struct ixgbe_hw *hw, u16 phy_addr)
 {
-       return ixgbe_write_i2c_combined_generic_int(hw, addr, reg, val, true);
-}
+       u16 ext_ability = 0;
 
-/**
- *  ixgbe_write_i2c_combined_generic_unlocked - Unlocked I2C write combined
- *  @hw: pointer to the hardware structure
- *  @addr: I2C bus address to write to
- *  @reg: I2C device register to write to
- *  @val: value to write
- *
- *  Returns an error code on error.
- */
-s32 ixgbe_write_i2c_combined_generic_unlocked(struct ixgbe_hw *hw,
-                                             u8 addr, u16 reg, u16 val)
-{
-       return ixgbe_write_i2c_combined_generic_int(hw, addr, reg, val, false);
+       hw->phy.mdio.prtad = phy_addr;
+       if (mdio45_probe(&hw->phy.mdio, phy_addr) != 0)
+               return false;
+
+       if (ixgbe_get_phy_id(hw))
+               return false;
+
+       hw->phy.type = ixgbe_get_phy_type_from_id(hw->phy.id);
+
+       if (hw->phy.type == ixgbe_phy_unknown) {
+               hw->phy.ops.read_reg(hw,
+                                    MDIO_PMA_EXTABLE,
+                                    MDIO_MMD_PMAPMD,
+                                    &ext_ability);
+               if (ext_ability &
+                   (MDIO_PMA_EXTABLE_10GBT |
+                    MDIO_PMA_EXTABLE_1000BT))
+                       hw->phy.type = ixgbe_phy_cu_unknown;
+               else
+                       hw->phy.type = ixgbe_phy_generic;
+       }
+
+       return true;
 }
 
 /**
@@ -311,7 +287,7 @@ s32 ixgbe_write_i2c_combined_generic_unlocked(struct ixgbe_hw *hw,
 s32 ixgbe_identify_phy_generic(struct ixgbe_hw *hw)
 {
        u32 phy_addr;
-       u16 ext_ability = 0;
+       u32 status = IXGBE_ERR_PHY_ADDR_INVALID;
 
        if (!hw->phy.phy_semaphore_mask) {
                if (hw->bus.lan_id)
@@ -320,37 +296,34 @@ s32 ixgbe_identify_phy_generic(struct ixgbe_hw *hw)
                        hw->phy.phy_semaphore_mask = IXGBE_GSSR_PHY0_SM;
        }
 
-       if (hw->phy.type == ixgbe_phy_unknown) {
-               for (phy_addr = 0; phy_addr < IXGBE_MAX_PHY_ADDR; phy_addr++) {
-                       hw->phy.mdio.prtad = phy_addr;
-                       if (mdio45_probe(&hw->phy.mdio, phy_addr) == 0) {
-                               ixgbe_get_phy_id(hw);
-                               hw->phy.type =
-                                       ixgbe_get_phy_type_from_id(hw->phy.id);
-
-                               if (hw->phy.type == ixgbe_phy_unknown) {
-                                       hw->phy.ops.read_reg(hw,
-                                                            MDIO_PMA_EXTABLE,
-                                                            MDIO_MMD_PMAPMD,
-                                                            &ext_ability);
-                                       if (ext_ability &
-                                           (MDIO_PMA_EXTABLE_10GBT |
-                                            MDIO_PMA_EXTABLE_1000BT))
-                                               hw->phy.type =
-                                                        ixgbe_phy_cu_unknown;
-                                       else
-                                               hw->phy.type =
-                                                        ixgbe_phy_generic;
-                               }
+       if (hw->phy.type != ixgbe_phy_unknown)
+               return 0;
 
-                               return 0;
-                       }
+       if (hw->phy.nw_mng_if_sel) {
+               phy_addr = (hw->phy.nw_mng_if_sel &
+                           IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD) >>
+                          IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD_SHIFT;
+               if (ixgbe_probe_phy(hw, phy_addr))
+                       return 0;
+               else
+                       return IXGBE_ERR_PHY_ADDR_INVALID;
+       }
+
+       for (phy_addr = 0; phy_addr < IXGBE_MAX_PHY_ADDR; phy_addr++) {
+               if (ixgbe_probe_phy(hw, phy_addr)) {
+                       status = 0;
+                       break;
                }
-               /* indicate no PHY found */
-               hw->phy.mdio.prtad = MDIO_PRTAD_NONE;
-               return IXGBE_ERR_PHY_ADDR_INVALID;
        }
-       return 0;
+
+       /* Certain media types do not have a phy so an address will not
+        * be found and the code will take this path.  Caller has to
+        * decide if it is an error or not.
+        */
+       if (status)
+               hw->phy.mdio.prtad = MDIO_PRTAD_NONE;
+
+       return status;
 }
 
 /**
@@ -416,7 +389,8 @@ static enum ixgbe_phy_type ixgbe_get_phy_type_from_id(u32 phy_id)
        case TN1010_PHY_ID:
                phy_type = ixgbe_phy_tn;
                break;
-       case X550_PHY_ID:
+       case X550_PHY_ID2:
+       case X550_PHY_ID3:
        case X540_PHY_ID:
                phy_type = ixgbe_phy_aq;
                break;
@@ -427,6 +401,7 @@ static enum ixgbe_phy_type ixgbe_get_phy_type_from_id(u32 phy_id)
                phy_type = ixgbe_phy_nl;
                break;
        case X557_PHY_ID:
+       case X557_PHY_ID2:
                phy_type = ixgbe_phy_x550em_ext_t;
                break;
        default:
@@ -477,8 +452,7 @@ s32 ixgbe_reset_phy_generic(struct ixgbe_hw *hw)
         */
        for (i = 0; i < 30; i++) {
                msleep(100);
-               hw->phy.ops.read_reg(hw, MDIO_CTRL1,
-                                    MDIO_MMD_PHYXS, &ctrl);
+               hw->phy.ops.read_reg(hw, MDIO_CTRL1, MDIO_MMD_PHYXS, &ctrl);
                if (!(ctrl & MDIO_CTRL1_RESET)) {
                        udelay(2);
                        break;
@@ -705,53 +679,52 @@ s32 ixgbe_setup_phy_link_generic(struct ixgbe_hw *hw)
 
        ixgbe_get_copper_link_capabilities_generic(hw, &speed, &autoneg);
 
-       if (speed & IXGBE_LINK_SPEED_10GB_FULL) {
-               /* Set or unset auto-negotiation 10G advertisement */
-               hw->phy.ops.read_reg(hw, MDIO_AN_10GBT_CTRL,
-                                    MDIO_MMD_AN,
-                                    &autoneg_reg);
+       /* Set or unset auto-negotiation 10G advertisement */
+       hw->phy.ops.read_reg(hw, MDIO_AN_10GBT_CTRL, MDIO_MMD_AN, &autoneg_reg);
 
-               autoneg_reg &= ~MDIO_AN_10GBT_CTRL_ADV10G;
-               if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_10GB_FULL)
-                       autoneg_reg |= MDIO_AN_10GBT_CTRL_ADV10G;
-
-               hw->phy.ops.write_reg(hw, MDIO_AN_10GBT_CTRL,
-                                     MDIO_MMD_AN,
-                                     autoneg_reg);
-       }
+       autoneg_reg &= ~MDIO_AN_10GBT_CTRL_ADV10G;
+       if ((hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_10GB_FULL) &&
+           (speed & IXGBE_LINK_SPEED_10GB_FULL))
+               autoneg_reg |= MDIO_AN_10GBT_CTRL_ADV10G;
 
-       if (speed & IXGBE_LINK_SPEED_1GB_FULL) {
-               /* Set or unset auto-negotiation 1G advertisement */
-               hw->phy.ops.read_reg(hw,
-                                    IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG,
-                                    MDIO_MMD_AN,
-                                    &autoneg_reg);
+       hw->phy.ops.write_reg(hw, MDIO_AN_10GBT_CTRL, MDIO_MMD_AN, autoneg_reg);
 
-               autoneg_reg &= ~IXGBE_MII_1GBASE_T_ADVERTISE;
-               if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_1GB_FULL)
-                       autoneg_reg |= IXGBE_MII_1GBASE_T_ADVERTISE;
+       hw->phy.ops.read_reg(hw, IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG,
+                            MDIO_MMD_AN, &autoneg_reg);
 
-               hw->phy.ops.write_reg(hw,
-                                     IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG,
-                                     MDIO_MMD_AN,
-                                     autoneg_reg);
+       if (hw->mac.type == ixgbe_mac_X550) {
+               /* Set or unset auto-negotiation 5G advertisement */
+               autoneg_reg &= ~IXGBE_MII_5GBASE_T_ADVERTISE;
+               if ((hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_5GB_FULL) &&
+                   (speed & IXGBE_LINK_SPEED_5GB_FULL))
+                       autoneg_reg |= IXGBE_MII_5GBASE_T_ADVERTISE;
+
+               /* Set or unset auto-negotiation 2.5G advertisement */
+               autoneg_reg &= ~IXGBE_MII_2_5GBASE_T_ADVERTISE;
+               if ((hw->phy.autoneg_advertised &
+                    IXGBE_LINK_SPEED_2_5GB_FULL) &&
+                   (speed & IXGBE_LINK_SPEED_2_5GB_FULL))
+                       autoneg_reg |= IXGBE_MII_2_5GBASE_T_ADVERTISE;
        }
 
-       if (speed & IXGBE_LINK_SPEED_100_FULL) {
-               /* Set or unset auto-negotiation 100M advertisement */
-               hw->phy.ops.read_reg(hw, MDIO_AN_ADVERTISE,
-                                    MDIO_MMD_AN,
-                                    &autoneg_reg);
+       /* Set or unset auto-negotiation 1G advertisement */
+       autoneg_reg &= ~IXGBE_MII_1GBASE_T_ADVERTISE;
+       if ((hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_1GB_FULL) &&
+           (speed & IXGBE_LINK_SPEED_1GB_FULL))
+               autoneg_reg |= IXGBE_MII_1GBASE_T_ADVERTISE;
 
-               autoneg_reg &= ~(ADVERTISE_100FULL |
-                                ADVERTISE_100HALF);
-               if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_100_FULL)
-                       autoneg_reg |= ADVERTISE_100FULL;
+       hw->phy.ops.write_reg(hw, IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG,
+                             MDIO_MMD_AN, autoneg_reg);
 
-               hw->phy.ops.write_reg(hw, MDIO_AN_ADVERTISE,
-                                     MDIO_MMD_AN,
-                                     autoneg_reg);
-       }
+       /* Set or unset auto-negotiation 100M advertisement */
+       hw->phy.ops.read_reg(hw, MDIO_AN_ADVERTISE, MDIO_MMD_AN, &autoneg_reg);
+
+       autoneg_reg &= ~(ADVERTISE_100FULL | ADVERTISE_100HALF);
+       if ((hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_100_FULL) &&
+           (speed & IXGBE_LINK_SPEED_100_FULL))
+               autoneg_reg |= ADVERTISE_100FULL;
+
+       hw->phy.ops.write_reg(hw, MDIO_AN_ADVERTISE, MDIO_MMD_AN, autoneg_reg);
 
        /* Blocked by MNG FW so don't reset PHY */
        if (ixgbe_check_reset_blocked(hw))
@@ -830,6 +803,7 @@ static s32 ixgbe_get_copper_speeds_supported(struct ixgbe_hw *hw)
                hw->phy.speeds_supported |= IXGBE_LINK_SPEED_5GB_FULL;
                break;
        case ixgbe_mac_X550EM_x:
+       case ixgbe_mac_x550em_a:
                hw->phy.speeds_supported &= ~IXGBE_LINK_SPEED_100_FULL;
                break;
        default:
@@ -2396,9 +2370,7 @@ s32 ixgbe_set_copper_phy_power(struct ixgbe_hw *hw, bool on)
        if (!on && ixgbe_mng_present(hw))
                return 0;
 
-       status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_VENDOR_SPECIFIC_1_CONTROL,
-                                     IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE,
-                                     &reg);
+       status = hw->phy.ops.read_reg(hw, MDIO_CTRL1, MDIO_MMD_VEND1, &reg);
        if (status)
                return status;
 
@@ -2410,8 +2382,6 @@ s32 ixgbe_set_copper_phy_power(struct ixgbe_hw *hw, bool on)
                reg |= IXGBE_MDIO_PHY_SET_LOW_POWER_MODE;
        }
 
-       status = hw->phy.ops.write_reg(hw, IXGBE_MDIO_VENDOR_SPECIFIC_1_CONTROL,
-                                      IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE,
-                                      reg);
+       status = hw->phy.ops.write_reg(hw, MDIO_CTRL1, MDIO_MMD_VEND1, reg);
        return status;
 }
index cc735ec3e045f74facb025ebdf0a3d147d01f065..ecf05f838fc52a19bdaee01456a6e948af075b5a 100644 (file)
@@ -195,12 +195,8 @@ s32 ixgbe_read_i2c_sff8472_generic(struct ixgbe_hw *hw, u8 byte_offset,
                                   u8 *sff8472_data);
 s32 ixgbe_write_i2c_eeprom_generic(struct ixgbe_hw *hw, u8 byte_offset,
                                   u8 eeprom_data);
-s32 ixgbe_read_i2c_combined_generic(struct ixgbe_hw *hw, u8 addr,
-                                   u16 reg, u16 *val);
-s32 ixgbe_read_i2c_combined_generic_unlocked(struct ixgbe_hw *hw, u8 addr,
-                                            u16 reg, u16 *val);
-s32 ixgbe_write_i2c_combined_generic(struct ixgbe_hw *hw, u8 addr,
-                                    u16 reg, u16 val);
-s32 ixgbe_write_i2c_combined_generic_unlocked(struct ixgbe_hw *hw, u8 addr,
-                                             u16 reg, u16 val);
+s32 ixgbe_read_i2c_combined_generic_int(struct ixgbe_hw *, u8 addr, u16 reg,
+                                       u16 *val, bool lock);
+s32 ixgbe_write_i2c_combined_generic_int(struct ixgbe_hw *, u8 addr, u16 reg,
+                                        u16 val, bool lock);
 #endif /* _IXGBE_PHY_H_ */
index 31d82e3abac8beb4f13d40576150a884e695288b..cf21273db20197a13157051704180a7107fe36dc 100644 (file)
@@ -874,19 +874,13 @@ struct ixgbe_thermal_sensor_data {
 #define IXGBE_MDIO_AUTO_NEG_VENDOR_STATUS_1GB  0x4 /* 1Gb/s */
 #define IXGBE_MDIO_AUTO_NEG_VENDOR_STATUS_10GB 0x6 /* 10Gb/s */
 
-#define IXGBE_MII_10GBASE_T_AUTONEG_CTRL_REG   0x20    /* 10G Control Reg */
 #define IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG 0xC400        /* 1G Provisioning 1 */
 #define IXGBE_MII_AUTONEG_XNP_TX_REG           0x17    /* 1G XNP Transmit */
-#define IXGBE_MII_AUTONEG_ADVERTISE_REG                0x10    /* 100M Advertisement */
-#define IXGBE_MII_10GBASE_T_ADVERTISE          0x1000  /* full duplex, bit:12*/
 #define IXGBE_MII_1GBASE_T_ADVERTISE_XNP_TX    0x4000  /* full duplex, bit:14*/
 #define IXGBE_MII_1GBASE_T_ADVERTISE           0x8000  /* full duplex, bit:15*/
 #define IXGBE_MII_2_5GBASE_T_ADVERTISE         0x0400
 #define IXGBE_MII_5GBASE_T_ADVERTISE           0x0800
-#define IXGBE_MII_100BASE_T_ADVERTISE          0x0100  /* full duplex, bit:8 */
-#define IXGBE_MII_100BASE_T_ADVERTISE_HALF     0x0080  /* half duplex, bit:7 */
 #define IXGBE_MII_RESTART                      0x200
-#define IXGBE_MII_AUTONEG_COMPLETE             0x20
 #define IXGBE_MII_AUTONEG_LINK_UP              0x04
 #define IXGBE_MII_AUTONEG_REG                  0x0
 
@@ -1320,30 +1314,20 @@ struct ixgbe_thermal_sensor_data {
 /* MDIO definitions */
 
 #define IXGBE_MDIO_ZERO_DEV_TYPE               0x0
-#define IXGBE_MDIO_PMA_PMD_DEV_TYPE            0x1
 #define IXGBE_MDIO_PCS_DEV_TYPE                0x3
-#define IXGBE_MDIO_PHY_XS_DEV_TYPE             0x4
-#define IXGBE_MDIO_AUTO_NEG_DEV_TYPE           0x7
-#define IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE  0x1E   /* Device 30 */
 #define IXGBE_TWINAX_DEV                       1
 
 #define IXGBE_MDIO_COMMAND_TIMEOUT     100 /* PHY Timeout for 1 GB mode */
 
-#define IXGBE_MDIO_VENDOR_SPECIFIC_1_CONTROL      0x0    /* VS1 Control Reg */
-#define IXGBE_MDIO_VENDOR_SPECIFIC_1_STATUS       0x1    /* VS1 Status Reg */
 #define IXGBE_MDIO_VENDOR_SPECIFIC_1_LINK_STATUS  0x0008 /* 1 = Link Up */
 #define IXGBE_MDIO_VENDOR_SPECIFIC_1_SPEED_STATUS 0x0010 /* 0 - 10G, 1 - 1G */
 #define IXGBE_MDIO_VENDOR_SPECIFIC_1_10G_SPEED    0x0018
 #define IXGBE_MDIO_VENDOR_SPECIFIC_1_1G_SPEED     0x0010
 
-#define IXGBE_MDIO_AUTO_NEG_CONTROL    0x0 /* AUTO_NEG Control Reg */
-#define IXGBE_MDIO_AUTO_NEG_STATUS     0x1 /* AUTO_NEG Status Reg */
 #define IXGBE_MDIO_AUTO_NEG_VENDOR_STAT        0xC800 /* AUTO_NEG Vendor Status Reg */
 #define IXGBE_MDIO_AUTO_NEG_VENDOR_TX_ALARM  0xCC00 /* AUTO_NEG Vendor TX Reg */
 #define IXGBE_MDIO_AUTO_NEG_VENDOR_TX_ALARM2 0xCC01 /* AUTO_NEG Vendor Tx Reg */
 #define IXGBE_MDIO_AUTO_NEG_VEN_LSC    0x1 /* AUTO_NEG Vendor Tx LSC */
-#define IXGBE_MDIO_AUTO_NEG_ADVT       0x10 /* AUTO_NEG Advt Reg */
-#define IXGBE_MDIO_AUTO_NEG_LP         0x13 /* AUTO_NEG LP Status Reg */
 #define IXGBE_MDIO_AUTO_NEG_EEE_ADVT   0x3C /* AUTO_NEG EEE Advt Reg */
 
 #define IXGBE_MDIO_PHY_SET_LOW_POWER_MODE       0x0800 /* Set low power mode */
@@ -1393,8 +1377,10 @@ struct ixgbe_thermal_sensor_data {
 #define TN1010_PHY_ID    0x00A19410
 #define TNX_FW_REV       0xB
 #define X540_PHY_ID      0x01540200
-#define X550_PHY_ID      0x01540220
+#define X550_PHY_ID2   0x01540223
+#define X550_PHY_ID3   0x01540221
 #define X557_PHY_ID      0x01540240
+#define X557_PHY_ID2   0x01540250
 #define QT2022_PHY_ID    0x0043A400
 #define ATH_PHY_ID       0x03429050
 #define AQ_FW_REV        0x20
@@ -3352,6 +3338,7 @@ struct ixgbe_mac_operations {
        s32 (*led_off)(struct ixgbe_hw *, u32);
        s32 (*blink_led_start)(struct ixgbe_hw *, u32);
        s32 (*blink_led_stop)(struct ixgbe_hw *, u32);
+       s32 (*init_led_link_act)(struct ixgbe_hw *);
 
        /* RAR, Multicast, VLAN */
        s32 (*set_rar)(struct ixgbe_hw *, u32, u8 *, u32, u32);
@@ -3372,6 +3359,7 @@ struct ixgbe_mac_operations {
        /* Flow Control */
        s32 (*fc_enable)(struct ixgbe_hw *);
        s32 (*setup_fc)(struct ixgbe_hw *);
+       void (*fc_autoneg)(struct ixgbe_hw *);
 
        /* Manageability interface */
        s32 (*set_fw_drv_ver)(struct ixgbe_hw *, u8, u8, u8, u8);
@@ -3410,16 +3398,28 @@ struct ixgbe_phy_operations {
        s32 (*read_i2c_sff8472)(struct ixgbe_hw *, u8 , u8 *);
        s32 (*read_i2c_eeprom)(struct ixgbe_hw *, u8 , u8 *);
        s32 (*write_i2c_eeprom)(struct ixgbe_hw *, u8, u8);
-       s32 (*read_i2c_combined)(struct ixgbe_hw *, u8 addr, u16 reg, u16 *val);
-       s32 (*write_i2c_combined)(struct ixgbe_hw *, u8 addr, u16 reg, u16 val);
        s32 (*check_overtemp)(struct ixgbe_hw *);
        s32 (*set_phy_power)(struct ixgbe_hw *, bool on);
        s32 (*enter_lplu)(struct ixgbe_hw *);
        s32 (*handle_lasi)(struct ixgbe_hw *hw);
-       s32 (*read_i2c_combined_unlocked)(struct ixgbe_hw *, u8 addr, u16 reg,
-                                         u16 *value);
-       s32 (*write_i2c_combined_unlocked)(struct ixgbe_hw *, u8 addr, u16 reg,
-                                          u16 value);
+       s32 (*read_i2c_byte_unlocked)(struct ixgbe_hw *, u8 offset, u8 addr,
+                                     u8 *value);
+       s32 (*write_i2c_byte_unlocked)(struct ixgbe_hw *, u8 offset, u8 addr,
+                                      u8 value);
+};
+
+struct ixgbe_link_operations {
+       s32 (*read_link)(struct ixgbe_hw *, u8 addr, u16 reg, u16 *val);
+       s32 (*read_link_unlocked)(struct ixgbe_hw *, u8 addr, u16 reg,
+                                 u16 *val);
+       s32 (*write_link)(struct ixgbe_hw *, u8 addr, u16 reg, u16 val);
+       s32 (*write_link_unlocked)(struct ixgbe_hw *, u8 addr, u16 reg,
+                                  u16 val);
+};
+
+struct ixgbe_link_info {
+       struct ixgbe_link_operations ops;
+       u8 addr;
 };
 
 struct ixgbe_eeprom_info {
@@ -3462,6 +3462,7 @@ struct ixgbe_mac_info {
        u8                              san_mac_rar_index;
        struct ixgbe_thermal_sensor_data  thermal_sensor_data;
        bool                            set_lben;
+       u8                              led_link_act;
 };
 
 struct ixgbe_phy_info {
@@ -3523,6 +3524,7 @@ struct ixgbe_hw {
        struct ixgbe_addr_filter_info   addr_ctrl;
        struct ixgbe_fc_info            fc;
        struct ixgbe_phy_info           phy;
+       struct ixgbe_link_info          link;
        struct ixgbe_eeprom_info        eeprom;
        struct ixgbe_bus_info           bus;
        struct ixgbe_mbx_info           mbx;
@@ -3546,6 +3548,7 @@ struct ixgbe_info {
        const struct ixgbe_eeprom_operations    *eeprom_ops;
        const struct ixgbe_phy_operations       *phy_ops;
        const struct ixgbe_mbx_operations       *mbx_ops;
+       const struct ixgbe_link_operations      *link_ops;
        const u32                       *mvals;
 };
 
@@ -3593,17 +3596,35 @@ struct ixgbe_info {
 #define IXGBE_FUSES0_REV_MASK          (3u << 6)
 
 #define IXGBE_KRM_PORT_CAR_GEN_CTRL(P) ((P) ? 0x8010 : 0x4010)
+#define IXGBE_KRM_LINK_S1(P)           ((P) ? 0x8200 : 0x4200)
 #define IXGBE_KRM_LINK_CTRL_1(P)       ((P) ? 0x820C : 0x420C)
 #define IXGBE_KRM_AN_CNTL_1(P)         ((P) ? 0x822C : 0x422C)
 #define IXGBE_KRM_AN_CNTL_8(P)         ((P) ? 0x8248 : 0x4248)
 #define IXGBE_KRM_SGMII_CTRL(P)                ((P) ? 0x82A0 : 0x42A0)
+#define IXGBE_KRM_LP_BASE_PAGE_HIGH(P) ((P) ? 0x836C : 0x436C)
 #define IXGBE_KRM_DSP_TXFFE_STATE_4(P) ((P) ? 0x8634 : 0x4634)
 #define IXGBE_KRM_DSP_TXFFE_STATE_5(P) ((P) ? 0x8638 : 0x4638)
 #define IXGBE_KRM_RX_TRN_LINKUP_CTRL(P)        ((P) ? 0x8B00 : 0x4B00)
 #define IXGBE_KRM_PMD_DFX_BURNIN(P)    ((P) ? 0x8E00 : 0x4E00)
+#define IXGBE_KRM_PMD_FLX_MASK_ST20(P) ((P) ? 0x9054 : 0x5054)
 #define IXGBE_KRM_TX_COEFF_CTRL_1(P)   ((P) ? 0x9520 : 0x5520)
 #define IXGBE_KRM_RX_ANA_CTL(P)                ((P) ? 0x9A00 : 0x5A00)
 
+#define IXGBE_KRM_PMD_FLX_MASK_ST20_SFI_10G_DA         ~(0x3 << 20)
+#define IXGBE_KRM_PMD_FLX_MASK_ST20_SFI_10G_SR         BIT(20)
+#define IXGBE_KRM_PMD_FLX_MASK_ST20_SFI_10G_LR         (0x2 << 20)
+#define IXGBE_KRM_PMD_FLX_MASK_ST20_SGMII_EN           BIT(25)
+#define IXGBE_KRM_PMD_FLX_MASK_ST20_AN37_EN            BIT(26)
+#define IXGBE_KRM_PMD_FLX_MASK_ST20_AN_EN              BIT(27)
+#define IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_10M          ~(0x7 << 28)
+#define IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_100M         BIT(28)
+#define IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_1G           (0x2 << 28)
+#define IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_10G          (0x3 << 28)
+#define IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_AN           (0x4 << 28)
+#define IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_2_5G         (0x7 << 28)
+#define IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_MASK         (0x7 << 28)
+#define IXGBE_KRM_PMD_FLX_MASK_ST20_FW_AN_RESTART      BIT(31)
+
 #define IXGBE_KRM_PORT_CAR_GEN_CTRL_NELB_32B           BIT(9)
 #define IXGBE_KRM_PORT_CAR_GEN_CTRL_NELB_KRPCS         BIT(11)
 
@@ -3618,6 +3639,7 @@ struct ixgbe_info {
 #define IXGBE_KRM_LINK_CTRL_1_TETH_AN_CAP_KR           BIT(18)
 #define IXGBE_KRM_LINK_CTRL_1_TETH_EEE_CAP_KX          BIT(24)
 #define IXGBE_KRM_LINK_CTRL_1_TETH_EEE_CAP_KR          BIT(26)
+#define IXGBE_KRM_LINK_S1_MAC_AN_COMPLETE              BIT(28)
 #define IXGBE_KRM_LINK_CTRL_1_TETH_AN_ENABLE           BIT(29)
 #define IXGBE_KRM_LINK_CTRL_1_TETH_AN_RESTART          BIT(31)
 
@@ -3627,6 +3649,8 @@ struct ixgbe_info {
 #define IXGBE_KRM_AN_CNTL_8_LINEAR                     BIT(0)
 #define IXGBE_KRM_AN_CNTL_8_LIMITING                   BIT(1)
 
+#define IXGBE_KRM_LP_BASE_PAGE_HIGH_SYM_PAUSE          BIT(10)
+#define IXGBE_KRM_LP_BASE_PAGE_HIGH_ASM_PAUSE          BIT(11)
 #define IXGBE_KRM_SGMII_CTRL_MAC_TAR_FORCE_100_D       BIT(12)
 #define IXGBE_KRM_SGMII_CTRL_MAC_TAR_FORCE_10_D                BIT(19)
 
index f2b1d48a16c3ac7234ffd26b3ab1aef87e2988b0..e2ff823ee202f36c1edd0cd5815209765976aaf4 100644 (file)
@@ -851,6 +851,7 @@ static const struct ixgbe_mac_operations mac_ops_X540 = {
        .get_link_capabilities  = &ixgbe_get_copper_link_capabilities_generic,
        .led_on                 = &ixgbe_led_on_generic,
        .led_off                = &ixgbe_led_off_generic,
+       .init_led_link_act      = ixgbe_init_led_link_act_generic,
        .blink_led_start        = &ixgbe_blink_led_start_X540,
        .blink_led_stop         = &ixgbe_blink_led_stop_X540,
        .set_rar                = &ixgbe_set_rar_generic,
@@ -866,6 +867,7 @@ static const struct ixgbe_mac_operations mac_ops_X540 = {
        .set_vfta               = &ixgbe_set_vfta_generic,
        .fc_enable              = &ixgbe_fc_enable_generic,
        .setup_fc               = ixgbe_setup_fc_generic,
+       .fc_autoneg             = ixgbe_fc_autoneg,
        .set_fw_drv_ver         = &ixgbe_set_fw_drv_ver_generic,
        .init_uta_tables        = &ixgbe_init_uta_tables_generic,
        .setup_sfp              = NULL,
index 7e6b9267ca9da3b4562c150a0e1aac35f8df92be..11fb433eb924ef7f1771400db4c6e69c10354c6c 100644 (file)
 
 static s32 ixgbe_setup_kr_speed_x550em(struct ixgbe_hw *, ixgbe_link_speed);
 static s32 ixgbe_setup_fc_x550em(struct ixgbe_hw *);
+static void ixgbe_fc_autoneg_fiber_x550em_a(struct ixgbe_hw *);
+static void ixgbe_fc_autoneg_backplane_x550em_a(struct ixgbe_hw *);
+static s32 ixgbe_setup_fc_backplane_x550em_a(struct ixgbe_hw *);
 
 static s32 ixgbe_get_invariants_X550_x(struct ixgbe_hw *hw)
+{
+       struct ixgbe_mac_info *mac = &hw->mac;
+       struct ixgbe_phy_info *phy = &hw->phy;
+       struct ixgbe_link_info *link = &hw->link;
+
+       /* Start with X540 invariants, since so simular */
+       ixgbe_get_invariants_X540(hw);
+
+       if (mac->ops.get_media_type(hw) != ixgbe_media_type_copper)
+               phy->ops.set_phy_power = NULL;
+
+       link->addr = IXGBE_CS4227;
+
+       return 0;
+}
+
+static s32 ixgbe_get_invariants_X550_a(struct ixgbe_hw *hw)
 {
        struct ixgbe_mac_info *mac = &hw->mac;
        struct ixgbe_phy_info *phy = &hw->phy;
@@ -69,8 +89,7 @@ static void ixgbe_setup_mux_ctl(struct ixgbe_hw *hw)
  */
 static s32 ixgbe_read_cs4227(struct ixgbe_hw *hw, u16 reg, u16 *value)
 {
-       return hw->phy.ops.read_i2c_combined_unlocked(hw, IXGBE_CS4227, reg,
-                                                     value);
+       return hw->link.ops.read_link_unlocked(hw, hw->link.addr, reg, value);
 }
 
 /**
@@ -83,8 +102,7 @@ static s32 ixgbe_read_cs4227(struct ixgbe_hw *hw, u16 reg, u16 *value)
  */
 static s32 ixgbe_write_cs4227(struct ixgbe_hw *hw, u16 reg, u16 value)
 {
-       return hw->phy.ops.write_i2c_combined_unlocked(hw, IXGBE_CS4227, reg,
-                                                      value);
+       return hw->link.ops.write_link_unlocked(hw, hw->link.addr, reg, value);
 }
 
 /**
@@ -322,6 +340,68 @@ static s32 ixgbe_write_phy_reg_x550em(struct ixgbe_hw *hw, u32 reg_addr,
        return IXGBE_NOT_IMPLEMENTED;
 }
 
+/**
+ * ixgbe_read_i2c_combined_generic - Perform I2C read combined operation
+ * @hw: pointer to the hardware structure
+ * @addr: I2C bus address to read from
+ * @reg: I2C device register to read from
+ * @val: pointer to location to receive read value
+ *
+ * Returns an error code on error.
+ **/
+static s32 ixgbe_read_i2c_combined_generic(struct ixgbe_hw *hw, u8 addr,
+                                          u16 reg, u16 *val)
+{
+       return ixgbe_read_i2c_combined_generic_int(hw, addr, reg, val, true);
+}
+
+/**
+ * ixgbe_read_i2c_combined_generic_unlocked - Do I2C read combined operation
+ * @hw: pointer to the hardware structure
+ * @addr: I2C bus address to read from
+ * @reg: I2C device register to read from
+ * @val: pointer to location to receive read value
+ *
+ * Returns an error code on error.
+ **/
+static s32
+ixgbe_read_i2c_combined_generic_unlocked(struct ixgbe_hw *hw, u8 addr,
+                                        u16 reg, u16 *val)
+{
+       return ixgbe_read_i2c_combined_generic_int(hw, addr, reg, val, false);
+}
+
+/**
+ * ixgbe_write_i2c_combined_generic - Perform I2C write combined operation
+ * @hw: pointer to the hardware structure
+ * @addr: I2C bus address to write to
+ * @reg: I2C device register to write to
+ * @val: value to write
+ *
+ * Returns an error code on error.
+ **/
+static s32 ixgbe_write_i2c_combined_generic(struct ixgbe_hw *hw,
+                                           u8 addr, u16 reg, u16 val)
+{
+       return ixgbe_write_i2c_combined_generic_int(hw, addr, reg, val, true);
+}
+
+/**
+ * ixgbe_write_i2c_combined_generic_unlocked - Do I2C write combined operation
+ * @hw: pointer to the hardware structure
+ * @addr: I2C bus address to write to
+ * @reg: I2C device register to write to
+ * @val: value to write
+ *
+ * Returns an error code on error.
+ **/
+static s32
+ixgbe_write_i2c_combined_generic_unlocked(struct ixgbe_hw *hw,
+                                         u8 addr, u16 reg, u16 val)
+{
+       return ixgbe_write_i2c_combined_generic_int(hw, addr, reg, val, false);
+}
+
 /** ixgbe_init_eeprom_params_X550 - Initialize EEPROM params
  *  @hw: pointer to hardware structure
  *
@@ -1128,47 +1208,17 @@ out:
        return ret;
 }
 
-/** ixgbe_setup_ixfi_x550em - Configure the KR PHY for iXFI mode.
+/**
+ *  ixgbe_setup_ixfi_x550em_x - MAC specific iXFI configuration
  *  @hw: pointer to hardware structure
- *  @speed: the link speed to force
  *
- *  Configures the integrated KR PHY to use iXFI mode. Used to connect an
- *  internal and external PHY at a specific speed, without autonegotiation.
+ *  iXfI configuration needed for ixgbe_mac_X550EM_x devices.
  **/
-static s32 ixgbe_setup_ixfi_x550em(struct ixgbe_hw *hw, ixgbe_link_speed *speed)
+static s32 ixgbe_setup_ixfi_x550em_x(struct ixgbe_hw *hw)
 {
        s32 status;
        u32 reg_val;
 
-       /* Disable AN and force speed to 10G Serial. */
-       status = ixgbe_read_iosf_sb_reg_x550(hw,
-                                       IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
-                                       IXGBE_SB_IOSF_TARGET_KR_PHY, &reg_val);
-       if (status)
-               return status;
-
-       reg_val &= ~IXGBE_KRM_LINK_CTRL_1_TETH_AN_ENABLE;
-       reg_val &= ~IXGBE_KRM_LINK_CTRL_1_TETH_FORCE_SPEED_MASK;
-
-       /* Select forced link speed for internal PHY. */
-       switch (*speed) {
-       case IXGBE_LINK_SPEED_10GB_FULL:
-               reg_val |= IXGBE_KRM_LINK_CTRL_1_TETH_FORCE_SPEED_10G;
-               break;
-       case IXGBE_LINK_SPEED_1GB_FULL:
-               reg_val |= IXGBE_KRM_LINK_CTRL_1_TETH_FORCE_SPEED_1G;
-               break;
-       default:
-               /* Other link speeds are not supported by internal KR PHY. */
-               return IXGBE_ERR_LINK_SETUP;
-       }
-
-       status = ixgbe_write_iosf_sb_reg_x550(hw,
-                               IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
-                               IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val);
-       if (status)
-               return status;
-
        /* Disable training protocol FSM. */
        status = ixgbe_read_iosf_sb_reg_x550(hw,
                                IXGBE_KRM_RX_TRN_LINKUP_CTRL(hw->bus.lan_id),
@@ -1228,20 +1278,106 @@ static s32 ixgbe_setup_ixfi_x550em(struct ixgbe_hw *hw, ixgbe_link_speed *speed)
        status = ixgbe_write_iosf_sb_reg_x550(hw,
                                IXGBE_KRM_TX_COEFF_CTRL_1(hw->bus.lan_id),
                                IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val);
-       if (status)
+       return status;
+}
+
+/**
+ *  ixgbe_restart_an_internal_phy_x550em - restart autonegotiation for the
+ *  internal PHY
+ *  @hw: pointer to hardware structure
+ **/
+static s32 ixgbe_restart_an_internal_phy_x550em(struct ixgbe_hw *hw)
+{
+       s32 status;
+       u32 link_ctrl;
+
+       /* Restart auto-negotiation. */
+       status = hw->mac.ops.read_iosf_sb_reg(hw,
+                               IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
+                               IXGBE_SB_IOSF_TARGET_KR_PHY, &link_ctrl);
+
+       if (status) {
+               hw_dbg(hw, "Auto-negotiation did not complete\n");
                return status;
+       }
 
-       /* Toggle port SW reset by AN reset. */
-       status = ixgbe_read_iosf_sb_reg_x550(hw,
+       link_ctrl |= IXGBE_KRM_LINK_CTRL_1_TETH_AN_RESTART;
+       status = hw->mac.ops.write_iosf_sb_reg(hw,
                                IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
-                               IXGBE_SB_IOSF_TARGET_KR_PHY, &reg_val);
+                               IXGBE_SB_IOSF_TARGET_KR_PHY, link_ctrl);
+
+       if (hw->mac.type == ixgbe_mac_x550em_a) {
+               u32 flx_mask_st20;
+
+               /* Indicate to FW that AN restart has been asserted */
+               status = hw->mac.ops.read_iosf_sb_reg(hw,
+                               IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id),
+                               IXGBE_SB_IOSF_TARGET_KR_PHY, &flx_mask_st20);
+
+               if (status) {
+                       hw_dbg(hw, "Auto-negotiation did not complete\n");
+                       return status;
+               }
+
+               flx_mask_st20 |= IXGBE_KRM_PMD_FLX_MASK_ST20_FW_AN_RESTART;
+               status = hw->mac.ops.write_iosf_sb_reg(hw,
+                               IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id),
+                               IXGBE_SB_IOSF_TARGET_KR_PHY, flx_mask_st20);
+       }
+
+       return status;
+}
+
+/** ixgbe_setup_ixfi_x550em - Configure the KR PHY for iXFI mode.
+ *  @hw: pointer to hardware structure
+ *  @speed: the link speed to force
+ *
+ *  Configures the integrated KR PHY to use iXFI mode. Used to connect an
+ *  internal and external PHY at a specific speed, without autonegotiation.
+ **/
+static s32 ixgbe_setup_ixfi_x550em(struct ixgbe_hw *hw, ixgbe_link_speed *speed)
+{
+       s32 status;
+       u32 reg_val;
+
+       /* Disable AN and force speed to 10G Serial. */
+       status = ixgbe_read_iosf_sb_reg_x550(hw,
+                                       IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
+                                       IXGBE_SB_IOSF_TARGET_KR_PHY, &reg_val);
        if (status)
                return status;
 
-       reg_val |= IXGBE_KRM_LINK_CTRL_1_TETH_AN_RESTART;
+       reg_val &= ~IXGBE_KRM_LINK_CTRL_1_TETH_AN_ENABLE;
+       reg_val &= ~IXGBE_KRM_LINK_CTRL_1_TETH_FORCE_SPEED_MASK;
+
+       /* Select forced link speed for internal PHY. */
+       switch (*speed) {
+       case IXGBE_LINK_SPEED_10GB_FULL:
+               reg_val |= IXGBE_KRM_LINK_CTRL_1_TETH_FORCE_SPEED_10G;
+               break;
+       case IXGBE_LINK_SPEED_1GB_FULL:
+               reg_val |= IXGBE_KRM_LINK_CTRL_1_TETH_FORCE_SPEED_1G;
+               break;
+       default:
+               /* Other link speeds are not supported by internal KR PHY. */
+               return IXGBE_ERR_LINK_SETUP;
+       }
+
        status = ixgbe_write_iosf_sb_reg_x550(hw,
                                IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
                                IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val);
+       if (status)
+               return status;
+
+       /* Additional configuration needed for x550em_x */
+       if (hw->mac.type == ixgbe_mac_X550EM_x) {
+               status = ixgbe_setup_ixfi_x550em_x(hw);
+               if (status)
+                       return status;
+       }
+
+       /* Toggle port SW reset by AN reset. */
+       status = ixgbe_restart_an_internal_phy_x550em(hw);
 
        return status;
 }
@@ -1292,7 +1428,7 @@ ixgbe_setup_mac_link_sfp_x550em(struct ixgbe_hw *hw,
                                __always_unused bool autoneg_wait_to_complete)
 {
        s32 status;
-       u16 slice, value;
+       u16 reg_slice, reg_val;
        bool setup_linear = false;
 
        /* Check if SFP module is supported and linear */
@@ -1308,71 +1444,68 @@ ixgbe_setup_mac_link_sfp_x550em(struct ixgbe_hw *hw,
        if (status)
                return status;
 
-       if (!(hw->phy.nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE)) {
-               /* Configure CS4227 LINE side to 10G SR. */
-               slice = IXGBE_CS4227_LINE_SPARE22_MSB + (hw->bus.lan_id << 12);
-               value = IXGBE_CS4227_SPEED_10G;
-               status = ixgbe_write_i2c_combined_generic(hw, IXGBE_CS4227,
-                                                         slice, value);
-               if (status)
-                       goto i2c_err;
+       /* Configure internal PHY for KR/KX. */
+       ixgbe_setup_kr_speed_x550em(hw, speed);
 
-               slice = IXGBE_CS4227_LINE_SPARE24_LSB + (hw->bus.lan_id << 12);
-               value = (IXGBE_CS4227_EDC_MODE_SR << 1) | 1;
-               status = ixgbe_write_i2c_combined_generic(hw, IXGBE_CS4227,
-                                                         slice, value);
-               if (status)
-                       goto i2c_err;
-
-               /* Configure CS4227 for HOST connection rate then type. */
-               slice = IXGBE_CS4227_HOST_SPARE22_MSB + (hw->bus.lan_id << 12);
-               value = speed & IXGBE_LINK_SPEED_10GB_FULL ?
-                       IXGBE_CS4227_SPEED_10G : IXGBE_CS4227_SPEED_1G;
-               status = ixgbe_write_i2c_combined_generic(hw, IXGBE_CS4227,
-                                                         slice, value);
-               if (status)
-                       goto i2c_err;
+       /* Configure CS4227 LINE side to proper mode. */
+       reg_slice = IXGBE_CS4227_LINE_SPARE24_LSB + (hw->bus.lan_id << 12);
+       if (setup_linear)
+               reg_val = (IXGBE_CS4227_EDC_MODE_CX1 << 1) | 0x1;
+       else
+               reg_val = (IXGBE_CS4227_EDC_MODE_SR << 1) | 0x1;
 
-               slice = IXGBE_CS4227_HOST_SPARE24_LSB + (hw->bus.lan_id << 12);
-               if (setup_linear)
-                       value = (IXGBE_CS4227_EDC_MODE_CX1 << 1) | 1;
-               else
-                       value = (IXGBE_CS4227_EDC_MODE_SR << 1) | 1;
-               status = ixgbe_write_i2c_combined_generic(hw, IXGBE_CS4227,
-                                                         slice, value);
-               if (status)
-                       goto i2c_err;
+       status = hw->link.ops.write_link(hw, hw->link.addr, reg_slice,
+                                        reg_val);
 
-               /* Setup XFI internal link. */
-               status = ixgbe_setup_ixfi_x550em(hw, &speed);
-               if (status) {
-                       hw_dbg(hw, "setup_ixfi failed with %d\n", status);
-                       return status;
-               }
-       } else {
-               /* Configure internal PHY for KR/KX. */
-               status = ixgbe_setup_kr_speed_x550em(hw, speed);
-               if (status) {
-                       hw_dbg(hw, "setup_kr_speed failed with %d\n", status);
-                       return status;
-               }
+       return status;
+}
 
-               /* Configure CS4227 LINE side to proper mode. */
-               slice = IXGBE_CS4227_LINE_SPARE24_LSB + (hw->bus.lan_id << 12);
-               if (setup_linear)
-                       value = (IXGBE_CS4227_EDC_MODE_CX1 << 1) | 1;
-               else
-                       value = (IXGBE_CS4227_EDC_MODE_SR << 1) | 1;
-               status = ixgbe_write_i2c_combined_generic(hw, IXGBE_CS4227,
-                                                         slice, value);
-               if (status)
-                       goto i2c_err;
+/**
+ * ixgbe_setup_sfi_x550a - Configure the internal PHY for native SFI mode
+ * @hw: pointer to hardware structure
+ * @speed: the link speed to force
+ *
+ * Configures the integrated PHY for native SFI mode. Used to connect the
+ * internal PHY directly to an SFP cage, without autonegotiation.
+ **/
+static s32 ixgbe_setup_sfi_x550a(struct ixgbe_hw *hw, ixgbe_link_speed *speed)
+{
+       struct ixgbe_mac_info *mac = &hw->mac;
+       s32 status;
+       u32 reg_val;
+
+       /* Disable all AN and force speed to 10G Serial. */
+       status = mac->ops.read_iosf_sb_reg(hw,
+                               IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id),
+                               IXGBE_SB_IOSF_TARGET_KR_PHY, &reg_val);
+       if (status)
+               return status;
+
+       reg_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_AN_EN;
+       reg_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_AN37_EN;
+       reg_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_SGMII_EN;
+       reg_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_MASK;
+
+       /* Select forced link speed for internal PHY. */
+       switch (*speed) {
+       case IXGBE_LINK_SPEED_10GB_FULL:
+               reg_val |= IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_10G;
+               break;
+       case IXGBE_LINK_SPEED_1GB_FULL:
+               reg_val |= IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_1G;
+               break;
+       default:
+               /* Other link speeds are not supported by internal PHY. */
+               return IXGBE_ERR_LINK_SETUP;
        }
 
-       return 0;
+       status = mac->ops.write_iosf_sb_reg(hw,
+                               IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id),
+                               IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val);
+
+       /* Toggle port SW reset by AN reset. */
+       status = ixgbe_restart_an_internal_phy_x550em(hw);
 
-i2c_err:
-       hw_dbg(hw, "combined i2c access failed with %d\n", status);
        return status;
 }
 
@@ -1388,45 +1521,39 @@ ixgbe_setup_mac_link_sfp_n(struct ixgbe_hw *hw, ixgbe_link_speed speed,
 {
        bool setup_linear = false;
        u32 reg_phy_int;
-       s32 rc;
+       s32 ret_val;
 
        /* Check if SFP module is supported and linear */
-       rc = ixgbe_supported_sfp_modules_X550em(hw, &setup_linear);
+       ret_val = ixgbe_supported_sfp_modules_X550em(hw, &setup_linear);
 
        /* If no SFP module present, then return success. Return success since
         * SFP not present error is not excepted in the setup MAC link flow.
         */
-       if (rc == IXGBE_ERR_SFP_NOT_PRESENT)
+       if (ret_val == IXGBE_ERR_SFP_NOT_PRESENT)
                return 0;
 
-       if (!rc)
-               return rc;
+       if (!ret_val)
+               return ret_val;
 
-       /* Configure internal PHY for native SFI */
-       rc = hw->mac.ops.read_iosf_sb_reg(hw,
-                                         IXGBE_KRM_AN_CNTL_8(hw->bus.lan_id),
-                                         IXGBE_SB_IOSF_TARGET_KR_PHY,
-                                         &reg_phy_int);
-       if (rc)
-               return rc;
+       /* Configure internal PHY for native SFI based on module type */
+       ret_val = hw->mac.ops.read_iosf_sb_reg(hw,
+                               IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id),
+                               IXGBE_SB_IOSF_TARGET_KR_PHY, &reg_phy_int);
+       if (!ret_val)
+               return ret_val;
 
-       if (setup_linear) {
-               reg_phy_int &= ~IXGBE_KRM_AN_CNTL_8_LIMITING;
-               reg_phy_int |= IXGBE_KRM_AN_CNTL_8_LINEAR;
-       } else {
-               reg_phy_int |= IXGBE_KRM_AN_CNTL_8_LIMITING;
-               reg_phy_int &= ~IXGBE_KRM_AN_CNTL_8_LINEAR;
-       }
+       reg_phy_int &= IXGBE_KRM_PMD_FLX_MASK_ST20_SFI_10G_DA;
+       if (!setup_linear)
+               reg_phy_int |= IXGBE_KRM_PMD_FLX_MASK_ST20_SFI_10G_SR;
 
-       rc = hw->mac.ops.write_iosf_sb_reg(hw,
-                                          IXGBE_KRM_AN_CNTL_8(hw->bus.lan_id),
-                                          IXGBE_SB_IOSF_TARGET_KR_PHY,
-                                          reg_phy_int);
-       if (rc)
-               return rc;
+       ret_val = hw->mac.ops.write_iosf_sb_reg(hw,
+                               IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id),
+                               IXGBE_SB_IOSF_TARGET_KR_PHY, reg_phy_int);
+       if (!ret_val)
+               return ret_val;
 
-       /* Setup XFI/SFI internal link */
-       return ixgbe_setup_ixfi_x550em(hw, &speed);
+       /* Setup SFI internal link. */
+       return ixgbe_setup_sfi_x550a(hw, &speed);
 }
 
 /**
@@ -1442,19 +1569,19 @@ ixgbe_setup_mac_link_sfp_x550a(struct ixgbe_hw *hw, ixgbe_link_speed speed,
        u32 reg_slice, slice_offset;
        bool setup_linear = false;
        u16 reg_phy_ext;
-       s32 rc;
+       s32 ret_val;
 
        /* Check if SFP module is supported and linear */
-       rc = ixgbe_supported_sfp_modules_X550em(hw, &setup_linear);
+       ret_val = ixgbe_supported_sfp_modules_X550em(hw, &setup_linear);
 
        /* If no SFP module present, then return success. Return success since
         * SFP not present error is not excepted in the setup MAC link flow.
         */
-       if (rc == IXGBE_ERR_SFP_NOT_PRESENT)
+       if (ret_val == IXGBE_ERR_SFP_NOT_PRESENT)
                return 0;
 
-       if (!rc)
-               return rc;
+       if (!ret_val)
+               return ret_val;
 
        /* Configure internal PHY for KR/KX. */
        ixgbe_setup_kr_speed_x550em(hw, speed);
@@ -1463,10 +1590,10 @@ ixgbe_setup_mac_link_sfp_x550a(struct ixgbe_hw *hw, ixgbe_link_speed speed,
                return IXGBE_ERR_PHY_ADDR_INVALID;
 
        /* Get external PHY device id */
-       rc = hw->phy.ops.read_reg(hw, IXGBE_CS4227_GLOBAL_ID_MSB,
+       ret_val = hw->phy.ops.read_reg(hw, IXGBE_CS4227_GLOBAL_ID_MSB,
                                  IXGBE_MDIO_ZERO_DEV_TYPE, &reg_phy_ext);
-       if (rc)
-               return rc;
+       if (ret_val)
+               return ret_val;
 
        /* When configuring quad port CS4223, the MAC instance is part
         * of the slice offset.
@@ -1538,7 +1665,7 @@ static s32 ixgbe_check_link_t_X550em(struct ixgbe_hw *hw,
                                     bool link_up_wait_to_complete)
 {
        u32 status;
-       u16 autoneg_status;
+       u16 i, autoneg_status;
 
        if (hw->mac.ops.get_media_type(hw) != ixgbe_media_type_copper)
                return IXGBE_ERR_CONFIG;
@@ -1550,14 +1677,18 @@ static s32 ixgbe_check_link_t_X550em(struct ixgbe_hw *hw,
        if (status || !(*link_up))
                return status;
 
-        /* MAC link is up, so check external PHY link.
-         * Read this twice back to back to indicate current status.
-         */
-       status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_STATUS,
-                                     IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
-                                     &autoneg_status);
-       if (status)
-               return status;
+       /* MAC link is up, so check external PHY link.
+        * Link status is latching low, and can only be used to detect link
+        * drop, and not the current status of the link without performing
+        * back-to-back reads.
+        */
+       for (i = 0; i < 2; i++) {
+               status = hw->phy.ops.read_reg(hw, MDIO_STAT1, MDIO_MMD_AN,
+                                             &autoneg_status);
+
+               if (status)
+                       return status;
+       }
 
        /* If external PHY link is not up, then indicate link not up */
        if (!(autoneg_status & IXGBE_MDIO_AUTO_NEG_LINK_STATUS))
@@ -1575,7 +1706,7 @@ ixgbe_setup_sgmii(struct ixgbe_hw *hw, __always_unused ixgbe_link_speed speed,
                  __always_unused bool autoneg_wait_to_complete)
 {
        struct ixgbe_mac_info *mac = &hw->mac;
-       u32 lval, sval;
+       u32 lval, sval, flx_val;
        s32 rc;
 
        rc = mac->ops.read_iosf_sb_reg(hw,
@@ -1609,14 +1740,55 @@ ixgbe_setup_sgmii(struct ixgbe_hw *hw, __always_unused ixgbe_link_speed speed,
        if (rc)
                return rc;
 
-       lval |= IXGBE_KRM_LINK_CTRL_1_TETH_AN_RESTART;
+       rc = mac->ops.read_iosf_sb_reg(hw,
+                               IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id),
+                               IXGBE_SB_IOSF_TARGET_KR_PHY, &flx_val);
+       if (rc)
+               return rc;
+
+       rc = mac->ops.read_iosf_sb_reg(hw,
+                               IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id),
+                               IXGBE_SB_IOSF_TARGET_KR_PHY, &flx_val);
+       if (rc)
+               return rc;
+
+       flx_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_MASK;
+       flx_val |= IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_1G;
+       flx_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_AN_EN;
+       flx_val |= IXGBE_KRM_PMD_FLX_MASK_ST20_SGMII_EN;
+       flx_val |= IXGBE_KRM_PMD_FLX_MASK_ST20_AN37_EN;
+
        rc = mac->ops.write_iosf_sb_reg(hw,
-                                       IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
-                                       IXGBE_SB_IOSF_TARGET_KR_PHY, lval);
+                               IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id),
+                               IXGBE_SB_IOSF_TARGET_KR_PHY, flx_val);
+       if (rc)
+               return rc;
 
+       rc = ixgbe_restart_an_internal_phy_x550em(hw);
        return rc;
 }
 
+/** ixgbe_init_mac_link_ops_X550em_a - Init mac link function pointers
+ *  @hw: pointer to hardware structure
+ **/
+static void ixgbe_init_mac_link_ops_X550em_a(struct ixgbe_hw *hw)
+{
+       struct ixgbe_mac_info *mac = &hw->mac;
+
+       switch (mac->ops.get_media_type(hw)) {
+       case ixgbe_media_type_fiber:
+               mac->ops.setup_fc = NULL;
+               mac->ops.fc_autoneg = ixgbe_fc_autoneg_fiber_x550em_a;
+               break;
+       case ixgbe_media_type_backplane:
+               mac->ops.fc_autoneg = ixgbe_fc_autoneg_backplane_x550em_a;
+               mac->ops.setup_fc = ixgbe_setup_fc_backplane_x550em_a;
+               break;
+       default:
+               break;
+       }
+}
+
 /** ixgbe_init_mac_link_ops_X550em - init mac link function pointers
  *  @hw: pointer to hardware structure
  **/
@@ -1664,6 +1836,10 @@ static void ixgbe_init_mac_link_ops_X550em(struct ixgbe_hw *hw)
        default:
                break;
        }
+
+       /* Additional modification for X550em_a devices */
+       if (hw->mac.type == ixgbe_mac_x550em_a)
+               ixgbe_init_mac_link_ops_X550em_a(hw);
 }
 
 /** ixgbe_setup_sfp_modules_X550em - Setup SFP module
@@ -1740,7 +1916,7 @@ static s32 ixgbe_get_lasi_ext_t_x550em(struct ixgbe_hw *hw, bool *lsc)
 
        /* Vendor alarm triggered */
        status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_CHIP_STD_INT_FLAG,
-                                     IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE,
+                                     MDIO_MMD_VEND1,
                                      &reg);
 
        if (status || !(reg & IXGBE_MDIO_GLOBAL_VEN_ALM_INT_EN))
@@ -1748,7 +1924,7 @@ static s32 ixgbe_get_lasi_ext_t_x550em(struct ixgbe_hw *hw, bool *lsc)
 
        /* Vendor Auto-Neg alarm triggered or Global alarm 1 triggered */
        status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_INT_CHIP_VEN_FLAG,
-                                     IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE,
+                                     MDIO_MMD_VEND1,
                                      &reg);
 
        if (status || !(reg & (IXGBE_MDIO_GLOBAL_AN_VEN_ALM_INT_EN |
@@ -1757,7 +1933,7 @@ static s32 ixgbe_get_lasi_ext_t_x550em(struct ixgbe_hw *hw, bool *lsc)
 
        /* Global alarm triggered */
        status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_ALARM_1,
-                                     IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE,
+                                     MDIO_MMD_VEND1,
                                      &reg);
 
        if (status)
@@ -1772,7 +1948,7 @@ static s32 ixgbe_get_lasi_ext_t_x550em(struct ixgbe_hw *hw, bool *lsc)
        if (reg & IXGBE_MDIO_GLOBAL_ALM_1_DEV_FAULT) {
                /*  device fault alarm triggered */
                status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_FAULT_MSG,
-                                         IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE,
+                                         MDIO_MMD_VEND1,
                                          &reg);
                if (status)
                        return status;
@@ -1787,14 +1963,14 @@ static s32 ixgbe_get_lasi_ext_t_x550em(struct ixgbe_hw *hw, bool *lsc)
 
        /* Vendor alarm 2 triggered */
        status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_CHIP_STD_INT_FLAG,
-                                     IXGBE_MDIO_AUTO_NEG_DEV_TYPE, &reg);
+                                     MDIO_MMD_AN, &reg);
 
        if (status || !(reg & IXGBE_MDIO_GLOBAL_STD_ALM2_INT))
                return status;
 
        /* link connect/disconnect event occurred */
        status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_VENDOR_TX_ALARM2,
-                                     IXGBE_MDIO_AUTO_NEG_DEV_TYPE, &reg);
+                                     MDIO_MMD_AN, &reg);
 
        if (status)
                return status;
@@ -1826,20 +2002,20 @@ static s32 ixgbe_enable_lasi_ext_t_x550em(struct ixgbe_hw *hw)
 
        /* Enable link status change alarm */
        status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_PMA_TX_VEN_LASI_INT_MASK,
-                                     IXGBE_MDIO_AUTO_NEG_DEV_TYPE, &reg);
+                                     MDIO_MMD_AN, &reg);
        if (status)
                return status;
 
        reg |= IXGBE_MDIO_PMA_TX_VEN_LASI_INT_EN;
 
        status = hw->phy.ops.write_reg(hw, IXGBE_MDIO_PMA_TX_VEN_LASI_INT_MASK,
-                                      IXGBE_MDIO_AUTO_NEG_DEV_TYPE, reg);
+                                      MDIO_MMD_AN, reg);
        if (status)
                return status;
 
        /* Enable high temperature failure and global fault alarms */
        status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_INT_MASK,
-                                     IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE,
+                                     MDIO_MMD_VEND1,
                                      &reg);
        if (status)
                return status;
@@ -1848,14 +2024,14 @@ static s32 ixgbe_enable_lasi_ext_t_x550em(struct ixgbe_hw *hw)
                IXGBE_MDIO_GLOBAL_INT_DEV_FAULT_EN);
 
        status = hw->phy.ops.write_reg(hw, IXGBE_MDIO_GLOBAL_INT_MASK,
-                                      IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE,
+                                      MDIO_MMD_VEND1,
                                       reg);
        if (status)
                return status;
 
        /* Enable vendor Auto-Neg alarm and Global Interrupt Mask 1 alarm */
        status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_INT_CHIP_VEN_MASK,
-                                     IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE,
+                                     MDIO_MMD_VEND1,
                                      &reg);
        if (status)
                return status;
@@ -1864,14 +2040,14 @@ static s32 ixgbe_enable_lasi_ext_t_x550em(struct ixgbe_hw *hw)
                IXGBE_MDIO_GLOBAL_ALARM_1_INT);
 
        status = hw->phy.ops.write_reg(hw, IXGBE_MDIO_GLOBAL_INT_CHIP_VEN_MASK,
-                                      IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE,
+                                      MDIO_MMD_VEND1,
                                       reg);
        if (status)
                return status;
 
        /* Enable chip-wide vendor alarm */
        status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_INT_CHIP_STD_MASK,
-                                     IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE,
+                                     MDIO_MMD_VEND1,
                                      &reg);
        if (status)
                return status;
@@ -1879,7 +2055,7 @@ static s32 ixgbe_enable_lasi_ext_t_x550em(struct ixgbe_hw *hw)
        reg |= IXGBE_MDIO_GLOBAL_VEN_ALM_INT_EN;
 
        status = hw->phy.ops.write_reg(hw, IXGBE_MDIO_GLOBAL_INT_CHIP_STD_MASK,
-                                      IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE,
+                                      MDIO_MMD_VEND1,
                                       reg);
 
        return status;
@@ -1945,13 +2121,31 @@ static s32 ixgbe_setup_kr_speed_x550em(struct ixgbe_hw *hw,
        if (speed & IXGBE_LINK_SPEED_1GB_FULL)
                reg_val |= IXGBE_KRM_LINK_CTRL_1_TETH_AN_CAP_KX;
 
-       /* Restart auto-negotiation. */
-       reg_val |= IXGBE_KRM_LINK_CTRL_1_TETH_AN_RESTART;
        status = hw->mac.ops.write_iosf_sb_reg(hw,
                                        IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
                                        IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val);
 
-       return status;
+       if (hw->mac.type == ixgbe_mac_x550em_a) {
+               /* Set lane mode  to KR auto negotiation */
+               status = hw->mac.ops.read_iosf_sb_reg(hw,
+                               IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id),
+                               IXGBE_SB_IOSF_TARGET_KR_PHY, &reg_val);
+
+               if (status)
+                       return status;
+
+               reg_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_MASK;
+               reg_val |= IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_AN;
+               reg_val |= IXGBE_KRM_PMD_FLX_MASK_ST20_AN_EN;
+               reg_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_AN37_EN;
+               reg_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_SGMII_EN;
+
+               status = hw->mac.ops.write_iosf_sb_reg(hw,
+                               IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id),
+                               IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val);
+       }
+
+       return ixgbe_restart_an_internal_phy_x550em(hw);
 }
 
 /** ixgbe_setup_kx4_x550em - Configure the KX4 PHY.
@@ -2020,14 +2214,12 @@ static s32 ixgbe_ext_phy_t_x550em_get_link(struct ixgbe_hw *hw, bool *link_up)
        *link_up = false;
 
        /* read this twice back to back to indicate current status */
-       ret = hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_STATUS,
-                                  IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
+       ret = hw->phy.ops.read_reg(hw, MDIO_STAT1, MDIO_MMD_AN,
                                   &autoneg_status);
        if (ret)
                return ret;
 
-       ret = hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_STATUS,
-                                  IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
+       ret = hw->phy.ops.read_reg(hw, MDIO_STAT1, MDIO_MMD_AN,
                                   &autoneg_status);
        if (ret)
                return ret;
@@ -2073,7 +2265,7 @@ static s32 ixgbe_setup_internal_phy_t_x550em(struct ixgbe_hw *hw)
                return 0;
 
        status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_VENDOR_STAT,
-                                     IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
+                                     MDIO_MMD_AN,
                                      &speed);
        if (status)
                return status;
@@ -2134,10 +2326,10 @@ static s32 ixgbe_led_on_t_x550em(struct ixgbe_hw *hw, u32 led_idx)
 
        /* To turn on the LED, set mode to ON. */
        hw->phy.ops.read_reg(hw, IXGBE_X557_LED_PROVISIONING + led_idx,
-                            IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, &phy_data);
+                            MDIO_MMD_VEND1, &phy_data);
        phy_data |= IXGBE_X557_LED_MANUAL_SET_MASK;
        hw->phy.ops.write_reg(hw, IXGBE_X557_LED_PROVISIONING + led_idx,
-                             IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, phy_data);
+                             MDIO_MMD_VEND1, phy_data);
 
        return 0;
 }
@@ -2156,10 +2348,10 @@ static s32 ixgbe_led_off_t_x550em(struct ixgbe_hw *hw, u32 led_idx)
 
        /* To turn on the LED, set mode to ON. */
        hw->phy.ops.read_reg(hw, IXGBE_X557_LED_PROVISIONING + led_idx,
-                            IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, &phy_data);
+                            MDIO_MMD_VEND1, &phy_data);
        phy_data &= ~IXGBE_X557_LED_MANUAL_SET_MASK;
        hw->phy.ops.write_reg(hw, IXGBE_X557_LED_PROVISIONING + led_idx,
-                             IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, phy_data);
+                             MDIO_MMD_VEND1, phy_data);
 
        return 0;
 }
@@ -2180,7 +2372,7 @@ static s32 ixgbe_get_lcd_t_x550em(struct ixgbe_hw *hw,
        *lcd_speed = IXGBE_LINK_SPEED_UNKNOWN;
 
        status = hw->phy.ops.read_reg(hw, IXGBE_AUTO_NEG_LP_STATUS,
-                                     IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
+                                     MDIO_MMD_AN,
                                      &an_lp_status);
        if (status)
                return status;
@@ -2281,6 +2473,90 @@ static s32 ixgbe_setup_fc_x550em(struct ixgbe_hw *hw)
        return rc;
 }
 
+/**
+ *  ixgbe_fc_autoneg_backplane_x550em_a - Enable flow control IEEE clause 37
+ *  @hw: pointer to hardware structure
+ **/
+static void ixgbe_fc_autoneg_backplane_x550em_a(struct ixgbe_hw *hw)
+{
+       u32 link_s1, lp_an_page_low, an_cntl_1;
+       s32 status = IXGBE_ERR_FC_NOT_NEGOTIATED;
+       ixgbe_link_speed speed;
+       bool link_up;
+
+       /* AN should have completed when the cable was plugged in.
+        * Look for reasons to bail out.  Bail out if:
+        * - FC autoneg is disabled, or if
+        * - link is not up.
+        */
+       if (hw->fc.disable_fc_autoneg) {
+               hw_err(hw, "Flow control autoneg is disabled");
+               goto out;
+       }
+
+       hw->mac.ops.check_link(hw, &speed, &link_up, false);
+       if (!link_up) {
+               hw_err(hw, "The link is down");
+               goto out;
+       }
+
+       /* Check at auto-negotiation has completed */
+       status = hw->mac.ops.read_iosf_sb_reg(hw,
+                                       IXGBE_KRM_LINK_S1(hw->bus.lan_id),
+                                       IXGBE_SB_IOSF_TARGET_KR_PHY, &link_s1);
+
+       if (status || (link_s1 & IXGBE_KRM_LINK_S1_MAC_AN_COMPLETE) == 0) {
+               hw_dbg(hw, "Auto-Negotiation did not complete\n");
+               status = IXGBE_ERR_FC_NOT_NEGOTIATED;
+               goto out;
+       }
+
+       /* Read the 10g AN autoc and LP ability registers and resolve
+        * local flow control settings accordingly
+        */
+       status = hw->mac.ops.read_iosf_sb_reg(hw,
+                               IXGBE_KRM_AN_CNTL_1(hw->bus.lan_id),
+                               IXGBE_SB_IOSF_TARGET_KR_PHY, &an_cntl_1);
+
+       if (status) {
+               hw_dbg(hw, "Auto-Negotiation did not complete\n");
+               goto out;
+       }
+
+       status = hw->mac.ops.read_iosf_sb_reg(hw,
+                               IXGBE_KRM_LP_BASE_PAGE_HIGH(hw->bus.lan_id),
+                               IXGBE_SB_IOSF_TARGET_KR_PHY, &lp_an_page_low);
+
+       if (status) {
+               hw_dbg(hw, "Auto-Negotiation did not complete\n");
+               goto out;
+       }
+
+       status = ixgbe_negotiate_fc(hw, an_cntl_1, lp_an_page_low,
+                                   IXGBE_KRM_AN_CNTL_1_SYM_PAUSE,
+                                   IXGBE_KRM_AN_CNTL_1_ASM_PAUSE,
+                                   IXGBE_KRM_LP_BASE_PAGE_HIGH_SYM_PAUSE,
+                                   IXGBE_KRM_LP_BASE_PAGE_HIGH_ASM_PAUSE);
+
+out:
+       if (!status) {
+               hw->fc.fc_was_autonegged = true;
+       } else {
+               hw->fc.fc_was_autonegged = false;
+               hw->fc.current_mode = hw->fc.requested_mode;
+       }
+}
+
+/**
+ *  ixgbe_fc_autoneg_fiber_x550em_a - passthrough FC settings
+ *  @hw: pointer to hardware structure
+ **/
+static void ixgbe_fc_autoneg_fiber_x550em_a(struct ixgbe_hw *hw)
+{
+       hw->fc.fc_was_autonegged = false;
+       hw->fc.current_mode = hw->fc.requested_mode;
+}
+
 /** ixgbe_enter_lplu_x550em - Transition to low power states
  *  @hw: pointer to hardware structure
  *
@@ -2327,7 +2603,7 @@ static s32 ixgbe_enter_lplu_t_x550em(struct ixgbe_hw *hw)
                return ixgbe_set_copper_phy_power(hw, false);
 
        status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_VENDOR_STAT,
-                                     IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
+                                     MDIO_MMD_AN,
                                      &speed);
        if (status)
                return status;
@@ -2349,20 +2625,20 @@ static s32 ixgbe_enter_lplu_t_x550em(struct ixgbe_hw *hw)
 
        /* Clear AN completed indication */
        status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_VENDOR_TX_ALARM,
-                                     IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
+                                     MDIO_MMD_AN,
                                      &autoneg_reg);
        if (status)
                return status;
 
-       status = hw->phy.ops.read_reg(hw, IXGBE_MII_10GBASE_T_AUTONEG_CTRL_REG,
-                                     IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
+       status = hw->phy.ops.read_reg(hw, MDIO_AN_10GBT_CTRL,
+                                     MDIO_MMD_AN,
                                      &an_10g_cntl_reg);
        if (status)
                return status;
 
        status = hw->phy.ops.read_reg(hw,
                                      IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG,
-                                     IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
+                                     MDIO_MMD_AN,
                                      &autoneg_reg);
        if (status)
                return status;
@@ -2520,7 +2796,7 @@ static s32 ixgbe_init_ext_t_x550em(struct ixgbe_hw *hw)
 
        status = hw->phy.ops.read_reg(hw,
                                      IXGBE_MDIO_TX_VENDOR_ALARMS_3,
-                                     IXGBE_MDIO_PMA_PMD_DEV_TYPE,
+                                     MDIO_MMD_PMAPMD,
                                      &reg);
        if (status)
                return status;
@@ -2531,7 +2807,7 @@ static s32 ixgbe_init_ext_t_x550em(struct ixgbe_hw *hw)
        if (reg & IXGBE_MDIO_TX_VENDOR_ALARMS_3_RST_MASK) {
                status = hw->phy.ops.read_reg(hw,
                                        IXGBE_MDIO_GLOBAL_RES_PR_10,
-                                       IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE,
+                                       MDIO_MMD_VEND1,
                                        &reg);
                if (status)
                        return status;
@@ -2540,7 +2816,7 @@ static s32 ixgbe_init_ext_t_x550em(struct ixgbe_hw *hw)
 
                status = hw->phy.ops.write_reg(hw,
                                        IXGBE_MDIO_GLOBAL_RES_PR_10,
-                                       IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE,
+                                       MDIO_MMD_VEND1,
                                        reg);
                if (status)
                        return status;
@@ -2728,6 +3004,90 @@ static void ixgbe_set_source_address_pruning_X550(struct ixgbe_hw *hw,
        IXGBE_WRITE_REG(hw, IXGBE_PFFLPH, (u32)(pfflp >> 32));
 }
 
+/**
+ *  ixgbe_setup_fc_backplane_x550em_a - Set up flow control
+ *  @hw: pointer to hardware structure
+ *
+ *  Called at init time to set up flow control.
+ **/
+static s32 ixgbe_setup_fc_backplane_x550em_a(struct ixgbe_hw *hw)
+{
+       s32 status = 0;
+       u32 an_cntl = 0;
+
+       /* Validate the requested mode */
+       if (hw->fc.strict_ieee && hw->fc.requested_mode == ixgbe_fc_rx_pause) {
+               hw_err(hw, "ixgbe_fc_rx_pause not valid in strict IEEE mode\n");
+               return IXGBE_ERR_INVALID_LINK_SETTINGS;
+       }
+
+       if (hw->fc.requested_mode == ixgbe_fc_default)
+               hw->fc.requested_mode = ixgbe_fc_full;
+
+       /* Set up the 1G and 10G flow control advertisement registers so the
+        * HW will be able to do FC autoneg once the cable is plugged in.  If
+        * we link at 10G, the 1G advertisement is harmless and vice versa.
+        */
+       status = hw->mac.ops.read_iosf_sb_reg(hw,
+                                       IXGBE_KRM_AN_CNTL_1(hw->bus.lan_id),
+                                       IXGBE_SB_IOSF_TARGET_KR_PHY, &an_cntl);
+
+       if (status) {
+               hw_dbg(hw, "Auto-Negotiation did not complete\n");
+               return status;
+       }
+
+       /* The possible values of fc.requested_mode are:
+        * 0: Flow control is completely disabled
+        * 1: Rx flow control is enabled (we can receive pause frames,
+        *    but not send pause frames).
+        * 2: Tx flow control is enabled (we can send pause frames but
+        *    we do not support receiving pause frames).
+        * 3: Both Rx and Tx flow control (symmetric) are enabled.
+        * other: Invalid.
+        */
+       switch (hw->fc.requested_mode) {
+       case ixgbe_fc_none:
+               /* Flow control completely disabled by software override. */
+               an_cntl &= ~(IXGBE_KRM_AN_CNTL_1_SYM_PAUSE |
+                            IXGBE_KRM_AN_CNTL_1_ASM_PAUSE);
+               break;
+       case ixgbe_fc_tx_pause:
+               /* Tx Flow control is enabled, and Rx Flow control is
+                * disabled by software override.
+                */
+               an_cntl |= IXGBE_KRM_AN_CNTL_1_ASM_PAUSE;
+               an_cntl &= ~IXGBE_KRM_AN_CNTL_1_SYM_PAUSE;
+               break;
+       case ixgbe_fc_rx_pause:
+               /* Rx Flow control is enabled and Tx Flow control is
+                * disabled by software override. Since there really
+                * isn't a way to advertise that we are capable of RX
+                * Pause ONLY, we will advertise that we support both
+                * symmetric and asymmetric Rx PAUSE, as such we fall
+                * through to the fc_full statement.  Later, we will
+                * disable the adapter's ability to send PAUSE frames.
+                */
+       case ixgbe_fc_full:
+               /* Flow control (both Rx and Tx) is enabled by SW override. */
+               an_cntl |= IXGBE_KRM_AN_CNTL_1_SYM_PAUSE |
+                          IXGBE_KRM_AN_CNTL_1_ASM_PAUSE;
+               break;
+       default:
+               hw_err(hw, "Flow control param set incorrectly\n");
+               return IXGBE_ERR_CONFIG;
+       }
+
+       status = hw->mac.ops.write_iosf_sb_reg(hw,
+                                       IXGBE_KRM_AN_CNTL_1(hw->bus.lan_id),
+                                       IXGBE_SB_IOSF_TARGET_KR_PHY, an_cntl);
+
+       /* Restart auto-negotiation. */
+       status = ixgbe_restart_an_internal_phy_x550em(hw);
+
+       return status;
+}
+
 /**
  * ixgbe_set_mux - Set mux for port 1 access with CS4227
  * @hw: pointer to hardware structure
@@ -2934,6 +3294,7 @@ static const struct ixgbe_mac_operations mac_ops_X550 = {
        X550_COMMON_MAC
        .led_on                 = ixgbe_led_on_generic,
        .led_off                = ixgbe_led_off_generic,
+       .init_led_link_act      = ixgbe_init_led_link_act_generic,
        .reset_hw               = &ixgbe_reset_hw_X540,
        .get_media_type         = &ixgbe_get_media_type_X540,
        .get_san_mac_addr       = &ixgbe_get_san_mac_addr_generic,
@@ -2948,12 +3309,14 @@ static const struct ixgbe_mac_operations mac_ops_X550 = {
        .prot_autoc_read        = prot_autoc_read_generic,
        .prot_autoc_write       = prot_autoc_write_generic,
        .setup_fc               = ixgbe_setup_fc_generic,
+       .fc_autoneg             = ixgbe_fc_autoneg,
 };
 
 static const struct ixgbe_mac_operations mac_ops_X550EM_x = {
        X550_COMMON_MAC
        .led_on                 = ixgbe_led_on_t_x550em,
        .led_off                = ixgbe_led_off_t_x550em,
+       .init_led_link_act      = ixgbe_init_led_link_act_generic,
        .reset_hw               = &ixgbe_reset_hw_X550em,
        .get_media_type         = &ixgbe_get_media_type_X550em,
        .get_san_mac_addr       = NULL,
@@ -2966,6 +3329,7 @@ static const struct ixgbe_mac_operations mac_ops_X550EM_x = {
        .release_swfw_sync      = &ixgbe_release_swfw_sync_X550em,
        .init_swfw_sync         = &ixgbe_init_swfw_sync_X540,
        .setup_fc               = NULL, /* defined later */
+       .fc_autoneg             = ixgbe_fc_autoneg,
        .read_iosf_sb_reg       = ixgbe_read_iosf_sb_reg_x550,
        .write_iosf_sb_reg      = ixgbe_write_iosf_sb_reg_x550,
 };
@@ -2974,6 +3338,7 @@ static struct ixgbe_mac_operations mac_ops_x550em_a = {
        X550_COMMON_MAC
        .led_on                 = ixgbe_led_on_t_x550em,
        .led_off                = ixgbe_led_off_t_x550em,
+       .init_led_link_act      = ixgbe_init_led_link_act_generic,
        .reset_hw               = ixgbe_reset_hw_X550em,
        .get_media_type         = ixgbe_get_media_type_X550em,
        .get_san_mac_addr       = NULL,
@@ -2985,6 +3350,7 @@ static struct ixgbe_mac_operations mac_ops_x550em_a = {
        .acquire_swfw_sync      = ixgbe_acquire_swfw_sync_x550em_a,
        .release_swfw_sync      = ixgbe_release_swfw_sync_x550em_a,
        .setup_fc               = ixgbe_setup_fc_x550em,
+       .fc_autoneg             = ixgbe_fc_autoneg,
        .read_iosf_sb_reg       = ixgbe_read_iosf_sb_reg_x550a,
        .write_iosf_sb_reg      = ixgbe_write_iosf_sb_reg_x550a,
 };
@@ -3036,11 +3402,6 @@ static const struct ixgbe_phy_operations phy_ops_X550EM_x = {
        .identify               = &ixgbe_identify_phy_x550em,
        .read_reg               = &ixgbe_read_phy_reg_generic,
        .write_reg              = &ixgbe_write_phy_reg_generic,
-       .read_i2c_combined      = &ixgbe_read_i2c_combined_generic,
-       .write_i2c_combined     = &ixgbe_write_i2c_combined_generic,
-       .read_i2c_combined_unlocked = &ixgbe_read_i2c_combined_generic_unlocked,
-       .write_i2c_combined_unlocked =
-                                    &ixgbe_write_i2c_combined_generic_unlocked,
 };
 
 static const struct ixgbe_phy_operations phy_ops_x550em_a = {
@@ -3053,6 +3414,13 @@ static const struct ixgbe_phy_operations phy_ops_x550em_a = {
        .write_reg_mdi          = &ixgbe_write_phy_reg_mdi,
 };
 
+static const struct ixgbe_link_operations link_ops_x550em_x = {
+       .read_link              = &ixgbe_read_i2c_combined_generic,
+       .read_link_unlocked     = &ixgbe_read_i2c_combined_generic_unlocked,
+       .write_link             = &ixgbe_write_i2c_combined_generic,
+       .write_link_unlocked    = &ixgbe_write_i2c_combined_generic_unlocked,
+};
+
 static const u32 ixgbe_mvals_X550[IXGBE_MVALS_IDX_LIMIT] = {
        IXGBE_MVALS_INIT(X550)
 };
@@ -3083,11 +3451,12 @@ const struct ixgbe_info ixgbe_X550EM_x_info = {
        .phy_ops                = &phy_ops_X550EM_x,
        .mbx_ops                = &mbx_ops_generic,
        .mvals                  = ixgbe_mvals_X550EM_x,
+       .link_ops               = &link_ops_x550em_x,
 };
 
 const struct ixgbe_info ixgbe_x550em_a_info = {
        .mac                    = ixgbe_mac_x550em_a,
-       .get_invariants         = &ixgbe_get_invariants_X550_x,
+       .get_invariants         = &ixgbe_get_invariants_X550_a,
        .mac_ops                = &mac_ops_x550em_a,
        .eeprom_ops             = &eeprom_ops_X550EM_x,
        .phy_ops                = &phy_ops_x550em_a,
index d2775f032f74e7676abbd6b1abdaa4f4fce282ad..d316f503a7279c8e9c608199a6966bfd9a2639b3 100644 (file)
@@ -1498,6 +1498,9 @@ static void ixgbevf_free_irq(struct ixgbevf_adapter *adapter)
 {
        int i, q_vectors;
 
+       if (!adapter->msix_entries)
+               return;
+
        q_vectors = adapter->num_msix_vectors;
        i = q_vectors - 1;
 
@@ -2552,6 +2555,9 @@ static void ixgbevf_free_q_vectors(struct ixgbevf_adapter *adapter)
  **/
 static void ixgbevf_reset_interrupt_capability(struct ixgbevf_adapter *adapter)
 {
+       if (!adapter->msix_entries)
+               return;
+
        pci_disable_msix(adapter->pdev);
        kfree(adapter->msix_entries);
        adapter->msix_entries = NULL;
@@ -3794,11 +3800,10 @@ static int ixgbevf_suspend(struct pci_dev *pdev, pm_message_t state)
                ixgbevf_free_irq(adapter);
                ixgbevf_free_all_tx_resources(adapter);
                ixgbevf_free_all_rx_resources(adapter);
+               ixgbevf_clear_interrupt_scheme(adapter);
                rtnl_unlock();
        }
 
-       ixgbevf_clear_interrupt_scheme(adapter);
-
 #ifdef CONFIG_PM
        retval = pci_save_state(pdev);
        if (retval)
index 1a739d71f1c22ca33eb1d9b490761f5ddaaab070..9d931373a9b9be72bb6b6bbf4103ef461c40a6fe 100644 (file)
@@ -303,15 +303,9 @@ ltq_etop_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
        strlcpy(info->version, DRV_VERSION, sizeof(info->version));
 }
 
-static int
-ltq_etop_nway_reset(struct net_device *dev)
-{
-       return phy_start_aneg(dev->phydev);
-}
-
 static const struct ethtool_ops ltq_etop_ethtool_ops = {
        .get_drvinfo = ltq_etop_get_drvinfo,
-       .nway_reset = ltq_etop_nway_reset,
+       .nway_reset = phy_ethtool_nway_reset,
        .get_link_ksettings = phy_ethtool_get_link_ksettings,
        .set_link_ksettings = phy_ethtool_set_link_ksettings,
 };
index 2664827ddecd969b3e61886c16e7059c74dbf46f..3b8f11fe5e139826f1afb07019f32dc9bbc2efdc 100644 (file)
@@ -5,7 +5,7 @@
 config NET_VENDOR_MARVELL
        bool "Marvell devices"
        default y
-       depends on PCI || CPU_PXA168 || MV64X60 || PPC32 || PLAT_ORION || INET
+       depends on PCI || CPU_PXA168 || MV64X60 || PPC32 || PLAT_ORION || INET || COMPILE_TEST
        ---help---
          If you have a network (Ethernet) card belonging to this class, say Y.
 
@@ -18,7 +18,8 @@ if NET_VENDOR_MARVELL
 
 config MV643XX_ETH
        tristate "Marvell Discovery (643XX) and Orion ethernet support"
-       depends on (MV64X60 || PPC32 || PLAT_ORION) && INET
+       depends on (MV64X60 || PPC32 || PLAT_ORION || COMPILE_TEST) && INET
+       depends on HAS_DMA
        select PHYLIB
        select MVMDIO
        ---help---
@@ -43,6 +44,7 @@ config MVMDIO
 config MVNETA_BM_ENABLE
        tristate "Marvell Armada 38x/XP network interface BM support"
        depends on MVNETA
+       depends on !64BIT
        ---help---
          This driver supports auxiliary block of the network
          interface units in the Marvell ARMADA XP and ARMADA 38x SoC
@@ -54,13 +56,15 @@ config MVNETA_BM_ENABLE
          buffer management.
 
 config MVNETA
-       tristate "Marvell Armada 370/38x/XP network interface support"
-       depends on PLAT_ORION
+       tristate "Marvell Armada 370/38x/XP/37xx network interface support"
+       depends on ARCH_MVEBU || COMPILE_TEST
+       depends on HAS_DMA
        select MVMDIO
        select FIXED_PHY
        ---help---
          This driver supports the network interface units in the
-         Marvell ARMADA XP, ARMADA 370 and ARMADA 38x SoC family.
+         Marvell ARMADA XP, ARMADA 370, ARMADA 38x and
+         ARMADA 37xx SoC family.
 
          Note that this driver is distinct from the mv643xx_eth
          driver, which should be used for the older Marvell SoCs
@@ -68,6 +72,7 @@ config MVNETA
 
 config MVNETA_BM
        tristate
+       depends on !64BIT
        default y if MVNETA=y && MVNETA_BM_ENABLE!=n
        default MVNETA_BM_ENABLE
        select HWBM
@@ -77,7 +82,9 @@ config MVNETA_BM
 
 config MVPP2
        tristate "Marvell Armada 375 network interface support"
-       depends on MACH_ARMADA_375
+       depends on MACH_ARMADA_375 || COMPILE_TEST
+       depends on HAS_DMA
+       depends on !64BIT
        select MVMDIO
        ---help---
          This driver supports the network interface units in the
index 68675d83bdc56c83a17f95fa81263624e12c9e6a..5f62c3d70df9d46ff2411220948ab87bc42f4d11 100644 (file)
@@ -1379,6 +1379,7 @@ static unsigned int get_rx_coal(struct mv643xx_eth_private *mp)
                temp = (val & 0x003fff00) >> 8;
 
        temp *= 64000000;
+       temp += mp->t_clk / 2;
        do_div(temp, mp->t_clk);
 
        return (unsigned int)temp;
@@ -1415,6 +1416,7 @@ static unsigned int get_tx_coal(struct mv643xx_eth_private *mp)
 
        temp = (rdlp(mp, TX_FIFO_URGENT_THRESHOLD) & 0x3fff0) >> 4;
        temp *= 64000000;
+       temp += mp->t_clk / 2;
        do_div(temp, mp->t_clk);
 
        return (unsigned int)temp;
@@ -1637,14 +1639,6 @@ static void mv643xx_eth_get_drvinfo(struct net_device *dev,
        strlcpy(drvinfo->bus_info, "platform", sizeof(drvinfo->bus_info));
 }
 
-static int mv643xx_eth_nway_reset(struct net_device *dev)
-{
-       if (!dev->phydev)
-               return -EINVAL;
-
-       return genphy_restart_aneg(dev->phydev);
-}
-
 static int
 mv643xx_eth_get_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
 {
@@ -1768,7 +1762,7 @@ static int mv643xx_eth_get_sset_count(struct net_device *dev, int sset)
 
 static const struct ethtool_ops mv643xx_eth_ethtool_ops = {
        .get_drvinfo            = mv643xx_eth_get_drvinfo,
-       .nway_reset             = mv643xx_eth_nway_reset,
+       .nway_reset             = phy_ethtool_nway_reset,
        .get_link               = ethtool_op_get_link,
        .get_coalesce           = mv643xx_eth_get_coalesce,
        .set_coalesce           = mv643xx_eth_set_coalesce,
@@ -2981,6 +2975,22 @@ static void set_params(struct mv643xx_eth_private *mp,
        mp->txq_count = pd->tx_queue_count ? : 1;
 }
 
+static int get_phy_mode(struct mv643xx_eth_private *mp)
+{
+       struct device *dev = mp->dev->dev.parent;
+       int iface = -1;
+
+       if (dev->of_node)
+               iface = of_get_phy_mode(dev->of_node);
+
+       /* Historical default if unspecified. We could also read/write
+        * the interface state in the PSC1
+        */
+       if (iface < 0)
+               iface = PHY_INTERFACE_MODE_GMII;
+       return iface;
+}
+
 static struct phy_device *phy_scan(struct mv643xx_eth_private *mp,
                                   int phy_addr)
 {
@@ -3007,7 +3017,7 @@ static struct phy_device *phy_scan(struct mv643xx_eth_private *mp,
                                "orion-mdio-mii", addr);
 
                phydev = phy_connect(mp->dev, phy_id, mv643xx_eth_adjust_link,
-                               PHY_INTERFACE_MODE_GMII);
+                                    get_phy_mode(mp));
                if (!IS_ERR(phydev)) {
                        phy_addr_set(mp, addr);
                        break;
@@ -3106,6 +3116,7 @@ static int mv643xx_eth_probe(struct platform_device *pdev)
        if (!dev)
                return -ENOMEM;
 
+       SET_NETDEV_DEV(dev, &pdev->dev);
        mp = netdev_priv(dev);
        platform_set_drvdata(pdev, mp);
 
@@ -3145,7 +3156,7 @@ static int mv643xx_eth_probe(struct platform_device *pdev)
        if (pd->phy_node) {
                phydev = of_phy_connect(mp->dev, pd->phy_node,
                                        mv643xx_eth_adjust_link, 0,
-                                       PHY_INTERFACE_MODE_GMII);
+                                       get_phy_mode(mp));
                if (!phydev)
                        err = -ENODEV;
                else
@@ -3207,8 +3218,6 @@ static int mv643xx_eth_probe(struct platform_device *pdev)
        dev->min_mtu = 64;
        dev->max_mtu = 9500;
 
-       SET_NETDEV_DEV(dev, &pdev->dev);
-
        if (mp->shared->win_protect)
                wrl(mp, WINDOW_PROTECT(mp->port_num), mp->shared->win_protect);
 
index b85819ea8eea5b429badbb1ab89f3411809289d8..aa87c685847897a088877738ca31d17eeae25ea8 100644 (file)
 /* descriptor aligned size */
 #define MVNETA_DESC_ALIGNED_SIZE       32
 
+/* Number of bytes to be taken into account by HW when putting incoming data
+ * to the buffers. It is needed in case NET_SKB_PAD exceeds maximum packet
+ * offset supported in MVNETA_RXQ_CONFIG_REG(q) registers.
+ */
+#define MVNETA_RX_PKT_OFFSET_CORRECTION                64
+
 #define MVNETA_RX_PKT_SIZE(mtu) \
        ALIGN((mtu) + MVNETA_MH_SIZE + MVNETA_VLAN_TAG_LEN + \
              ETH_HLEN + ETH_FCS_LEN,                        \
@@ -391,6 +397,9 @@ struct mvneta_port {
        spinlock_t lock;
        bool is_stopped;
 
+       u32 cause_rx_tx;
+       struct napi_struct napi;
+
        /* Core clock */
        struct clk *clk;
        /* AXI clock */
@@ -416,6 +425,10 @@ struct mvneta_port {
        u64 ethtool_stats[ARRAY_SIZE(mvneta_statistics)];
 
        u32 indir[MVNETA_RSS_LU_TABLE_SIZE];
+
+       /* Flags for special SoC configurations */
+       bool neta_armada3700;
+       u16 rx_offset_correction;
 };
 
 /* The mvneta_tx_desc and mvneta_rx_desc structures describe the
@@ -561,6 +574,9 @@ struct mvneta_rx_queue {
        u32 pkts_coal;
        u32 time_coal;
 
+       /* Virtual address of the RX buffer */
+       void  **buf_virt_addr;
+
        /* Virtual address of the RX DMA descriptors array */
        struct mvneta_rx_desc *descs;
 
@@ -955,14 +971,9 @@ static int mvneta_mbus_io_win_set(struct mvneta_port *pp, u32 base, u32 wsize,
        return 0;
 }
 
-/* Assign and initialize pools for port. In case of fail
- * buffer manager will remain disabled for current port.
- */
-static int mvneta_bm_port_init(struct platform_device *pdev,
-                              struct mvneta_port *pp)
+static  int mvneta_bm_port_mbus_init(struct mvneta_port *pp)
 {
-       struct device_node *dn = pdev->dev.of_node;
-       u32 long_pool_id, short_pool_id, wsize;
+       u32 wsize;
        u8 target, attr;
        int err;
 
@@ -981,6 +992,25 @@ static int mvneta_bm_port_init(struct platform_device *pdev,
                netdev_info(pp->dev, "fail to configure mbus window to BM\n");
                return err;
        }
+       return 0;
+}
+
+/* Assign and initialize pools for port. In case of fail
+ * buffer manager will remain disabled for current port.
+ */
+static int mvneta_bm_port_init(struct platform_device *pdev,
+                              struct mvneta_port *pp)
+{
+       struct device_node *dn = pdev->dev.of_node;
+       u32 long_pool_id, short_pool_id;
+
+       if (!pp->neta_armada3700) {
+               int ret;
+
+               ret = mvneta_bm_port_mbus_init(pp);
+               if (ret)
+                       return ret;
+       }
 
        if (of_property_read_u32(dn, "bm,pool-long", &long_pool_id)) {
                netdev_info(pp->dev, "missing long pool id\n");
@@ -1349,22 +1379,27 @@ static void mvneta_defaults_set(struct mvneta_port *pp)
        for_each_present_cpu(cpu) {
                int rxq_map = 0, txq_map = 0;
                int rxq, txq;
+               if (!pp->neta_armada3700) {
+                       for (rxq = 0; rxq < rxq_number; rxq++)
+                               if ((rxq % max_cpu) == cpu)
+                                       rxq_map |= MVNETA_CPU_RXQ_ACCESS(rxq);
+
+                       for (txq = 0; txq < txq_number; txq++)
+                               if ((txq % max_cpu) == cpu)
+                                       txq_map |= MVNETA_CPU_TXQ_ACCESS(txq);
+
+                       /* With only one TX queue we configure a special case
+                        * which will allow to get all the irq on a single
+                        * CPU
+                        */
+                       if (txq_number == 1)
+                               txq_map = (cpu == pp->rxq_def) ?
+                                       MVNETA_CPU_TXQ_ACCESS(1) : 0;
 
-               for (rxq = 0; rxq < rxq_number; rxq++)
-                       if ((rxq % max_cpu) == cpu)
-                               rxq_map |= MVNETA_CPU_RXQ_ACCESS(rxq);
-
-               for (txq = 0; txq < txq_number; txq++)
-                       if ((txq % max_cpu) == cpu)
-                               txq_map |= MVNETA_CPU_TXQ_ACCESS(txq);
-
-               /* With only one TX queue we configure a special case
-                * which will allow to get all the irq on a single
-                * CPU
-                */
-               if (txq_number == 1)
-                       txq_map = (cpu == pp->rxq_def) ?
-                               MVNETA_CPU_TXQ_ACCESS(1) : 0;
+               } else {
+                       txq_map = MVNETA_CPU_TXQ_ACCESS_ALL_MASK;
+                       rxq_map = MVNETA_CPU_RXQ_ACCESS_ALL_MASK;
+               }
 
                mvreg_write(pp, MVNETA_CPU_MAP(cpu), rxq_map | txq_map);
        }
@@ -1573,10 +1608,14 @@ static void mvneta_tx_done_pkts_coal_set(struct mvneta_port *pp,
 
 /* Handle rx descriptor fill by setting buf_cookie and buf_phys_addr */
 static void mvneta_rx_desc_fill(struct mvneta_rx_desc *rx_desc,
-                               u32 phys_addr, u32 cookie)
+                               u32 phys_addr, void *virt_addr,
+                               struct mvneta_rx_queue *rxq)
 {
-       rx_desc->buf_cookie = cookie;
+       int i;
+
        rx_desc->buf_phys_addr = phys_addr;
+       i = rx_desc - rxq->descs;
+       rxq->buf_virt_addr[i] = virt_addr;
 }
 
 /* Decrement sent descriptors counter */
@@ -1781,7 +1820,8 @@ EXPORT_SYMBOL_GPL(mvneta_frag_free);
 
 /* Refill processing for SW buffer management */
 static int mvneta_rx_refill(struct mvneta_port *pp,
-                           struct mvneta_rx_desc *rx_desc)
+                           struct mvneta_rx_desc *rx_desc,
+                           struct mvneta_rx_queue *rxq)
 
 {
        dma_addr_t phys_addr;
@@ -1799,7 +1839,8 @@ static int mvneta_rx_refill(struct mvneta_port *pp,
                return -ENOMEM;
        }
 
-       mvneta_rx_desc_fill(rx_desc, phys_addr, (u32)data);
+       phys_addr += pp->rx_offset_correction;
+       mvneta_rx_desc_fill(rx_desc, phys_addr, data, rxq);
        return 0;
 }
 
@@ -1861,7 +1902,7 @@ static void mvneta_rxq_drop_pkts(struct mvneta_port *pp,
 
        for (i = 0; i < rxq->size; i++) {
                struct mvneta_rx_desc *rx_desc = rxq->descs + i;
-               void *data = (void *)rx_desc->buf_cookie;
+               void *data = rxq->buf_virt_addr[i];
 
                dma_unmap_single(pp->dev->dev.parent, rx_desc->buf_phys_addr,
                                 MVNETA_RX_BUF_SIZE(pp->pkt_size), DMA_FROM_DEVICE);
@@ -1894,12 +1935,13 @@ static int mvneta_rx_swbm(struct mvneta_port *pp, int rx_todo,
                unsigned char *data;
                dma_addr_t phys_addr;
                u32 rx_status, frag_size;
-               int rx_bytes, err;
+               int rx_bytes, err, index;
 
                rx_done++;
                rx_status = rx_desc->status;
                rx_bytes = rx_desc->data_size - (ETH_FCS_LEN + MVNETA_MH_SIZE);
-               data = (unsigned char *)rx_desc->buf_cookie;
+               index = rx_desc - rxq->descs;
+               data = rxq->buf_virt_addr[index];
                phys_addr = rx_desc->buf_phys_addr;
 
                if (!mvneta_rxq_desc_is_first_last(rx_status) ||
@@ -1918,7 +1960,7 @@ err_drop_frame:
                                goto err_drop_frame;
 
                        dma_sync_single_range_for_cpu(dev->dev.parent,
-                                                     rx_desc->buf_phys_addr,
+                                                     phys_addr,
                                                      MVNETA_MH_SIZE + NET_SKB_PAD,
                                                      rx_bytes,
                                                      DMA_FROM_DEVICE);
@@ -1938,7 +1980,7 @@ err_drop_frame:
                }
 
                /* Refill processing */
-               err = mvneta_rx_refill(pp, rx_desc);
+               err = mvneta_rx_refill(pp, rx_desc, rxq);
                if (err) {
                        netdev_err(dev, "Linux processing - Can't refill\n");
                        rxq->missed++;
@@ -2020,7 +2062,7 @@ static int mvneta_rx_hwbm(struct mvneta_port *pp, int rx_todo,
                rx_done++;
                rx_status = rx_desc->status;
                rx_bytes = rx_desc->data_size - (ETH_FCS_LEN + MVNETA_MH_SIZE);
-               data = (unsigned char *)rx_desc->buf_cookie;
+               data = (u8 *)(uintptr_t)rx_desc->buf_cookie;
                phys_addr = rx_desc->buf_phys_addr;
                pool_id = MVNETA_RX_GET_BM_POOL_ID(rx_desc);
                bm_pool = &pp->bm_priv->bm_pools[pool_id];
@@ -2609,6 +2651,17 @@ static void mvneta_set_rx_mode(struct net_device *dev)
 
 /* Interrupt handling - the callback for request_irq() */
 static irqreturn_t mvneta_isr(int irq, void *dev_id)
+{
+       struct mvneta_port *pp = (struct mvneta_port *)dev_id;
+
+       mvreg_write(pp, MVNETA_INTR_NEW_MASK, 0);
+       napi_schedule(&pp->napi);
+
+       return IRQ_HANDLED;
+}
+
+/* Interrupt handling - the callback for request_percpu_irq() */
+static irqreturn_t mvneta_percpu_isr(int irq, void *dev_id)
 {
        struct mvneta_pcpu_port *port = (struct mvneta_pcpu_port *)dev_id;
 
@@ -2657,7 +2710,7 @@ static int mvneta_poll(struct napi_struct *napi, int budget)
        struct mvneta_pcpu_port *port = this_cpu_ptr(pp->ports);
 
        if (!netif_running(pp->dev)) {
-               napi_complete(&port->napi);
+               napi_complete(napi);
                return rx_done;
        }
 
@@ -2686,7 +2739,8 @@ static int mvneta_poll(struct napi_struct *napi, int budget)
         */
        rx_queue = fls(((cause_rx_tx >> 8) & 0xff));
 
-       cause_rx_tx |= port->cause_rx_tx;
+       cause_rx_tx |= pp->neta_armada3700 ? pp->cause_rx_tx :
+               port->cause_rx_tx;
 
        if (rx_queue) {
                rx_queue = rx_queue - 1;
@@ -2700,11 +2754,27 @@ static int mvneta_poll(struct napi_struct *napi, int budget)
 
        if (budget > 0) {
                cause_rx_tx = 0;
-               napi_complete(&port->napi);
-               enable_percpu_irq(pp->dev->irq, 0);
+               napi_complete(napi);
+
+               if (pp->neta_armada3700) {
+                       unsigned long flags;
+
+                       local_irq_save(flags);
+                       mvreg_write(pp, MVNETA_INTR_NEW_MASK,
+                                   MVNETA_RX_INTR_MASK(rxq_number) |
+                                   MVNETA_TX_INTR_MASK(txq_number) |
+                                   MVNETA_MISCINTR_INTR_MASK);
+                       local_irq_restore(flags);
+               } else {
+                       enable_percpu_irq(pp->dev->irq, 0);
+               }
        }
 
-       port->cause_rx_tx = cause_rx_tx;
+       if (pp->neta_armada3700)
+               pp->cause_rx_tx = cause_rx_tx;
+       else
+               port->cause_rx_tx = cause_rx_tx;
+
        return rx_done;
 }
 
@@ -2716,7 +2786,7 @@ static int mvneta_rxq_fill(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,
 
        for (i = 0; i < num; i++) {
                memset(rxq->descs + i, 0, sizeof(struct mvneta_rx_desc));
-               if (mvneta_rx_refill(pp, rxq->descs + i) != 0) {
+               if (mvneta_rx_refill(pp, rxq->descs + i, rxq) != 0) {
                        netdev_err(pp->dev, "%s:rxq %d, %d of %d buffs  filled\n",
                                __func__, rxq->id, i, num);
                        break;
@@ -2773,7 +2843,7 @@ static int mvneta_rxq_init(struct mvneta_port *pp,
        mvreg_write(pp, MVNETA_RXQ_SIZE_REG(rxq->id), rxq->size);
 
        /* Set Offset */
-       mvneta_rxq_offset_set(pp, rxq, NET_SKB_PAD);
+       mvneta_rxq_offset_set(pp, rxq, NET_SKB_PAD - pp->rx_offset_correction);
 
        /* Set coalescing pkts and time */
        mvneta_rx_pkts_coal_set(pp, rxq, rxq->pkts_coal);
@@ -2784,14 +2854,14 @@ static int mvneta_rxq_init(struct mvneta_port *pp,
                mvneta_rxq_buf_size_set(pp, rxq,
                                        MVNETA_RX_BUF_SIZE(pp->pkt_size));
                mvneta_rxq_bm_disable(pp, rxq);
+               mvneta_rxq_fill(pp, rxq, rxq->size);
        } else {
                mvneta_rxq_bm_enable(pp, rxq);
                mvneta_rxq_long_pool_set(pp, rxq);
                mvneta_rxq_short_pool_set(pp, rxq);
+               mvneta_rxq_non_occup_desc_add(pp, rxq, rxq->size);
        }
 
-       mvneta_rxq_fill(pp, rxq, rxq->size);
-
        return 0;
 }
 
@@ -2974,11 +3044,16 @@ static void mvneta_start_dev(struct mvneta_port *pp)
        /* start the Rx/Tx activity */
        mvneta_port_enable(pp);
 
-       /* Enable polling on the port */
-       for_each_online_cpu(cpu) {
-               struct mvneta_pcpu_port *port = per_cpu_ptr(pp->ports, cpu);
+       if (!pp->neta_armada3700) {
+               /* Enable polling on the port */
+               for_each_online_cpu(cpu) {
+                       struct mvneta_pcpu_port *port =
+                               per_cpu_ptr(pp->ports, cpu);
 
-               napi_enable(&port->napi);
+                       napi_enable(&port->napi);
+               }
+       } else {
+               napi_enable(&pp->napi);
        }
 
        /* Unmask interrupts. It has to be done from each CPU */
@@ -3000,10 +3075,15 @@ static void mvneta_stop_dev(struct mvneta_port *pp)
 
        phy_stop(ndev->phydev);
 
-       for_each_online_cpu(cpu) {
-               struct mvneta_pcpu_port *port = per_cpu_ptr(pp->ports, cpu);
+       if (!pp->neta_armada3700) {
+               for_each_online_cpu(cpu) {
+                       struct mvneta_pcpu_port *port =
+                               per_cpu_ptr(pp->ports, cpu);
 
-               napi_disable(&port->napi);
+                       napi_disable(&port->napi);
+               }
+       } else {
+               napi_disable(&pp->napi);
        }
 
        netif_carrier_off(pp->dev);
@@ -3413,31 +3493,37 @@ static int mvneta_open(struct net_device *dev)
                goto err_cleanup_rxqs;
 
        /* Connect to port interrupt line */
-       ret = request_percpu_irq(pp->dev->irq, mvneta_isr,
-                                MVNETA_DRIVER_NAME, pp->ports);
+       if (pp->neta_armada3700)
+               ret = request_irq(pp->dev->irq, mvneta_isr, 0,
+                                 dev->name, pp);
+       else
+               ret = request_percpu_irq(pp->dev->irq, mvneta_percpu_isr,
+                                        dev->name, pp->ports);
        if (ret) {
                netdev_err(pp->dev, "cannot request irq %d\n", pp->dev->irq);
                goto err_cleanup_txqs;
        }
 
-       /* Enable per-CPU interrupt on all the CPU to handle our RX
-        * queue interrupts
-        */
-       on_each_cpu(mvneta_percpu_enable, pp, true);
+       if (!pp->neta_armada3700) {
+               /* Enable per-CPU interrupt on all the CPU to handle our RX
+                * queue interrupts
+                */
+               on_each_cpu(mvneta_percpu_enable, pp, true);
 
-       pp->is_stopped = false;
-       /* Register a CPU notifier to handle the case where our CPU
-        * might be taken offline.
-        */
-       ret = cpuhp_state_add_instance_nocalls(online_hpstate,
-                                              &pp->node_online);
-       if (ret)
-               goto err_free_irq;
+               pp->is_stopped = false;
+               /* Register a CPU notifier to handle the case where our CPU
+                * might be taken offline.
+                */
+               ret = cpuhp_state_add_instance_nocalls(online_hpstate,
+                                                      &pp->node_online);
+               if (ret)
+                       goto err_free_irq;
 
-       ret = cpuhp_state_add_instance_nocalls(CPUHP_NET_MVNETA_DEAD,
-                                              &pp->node_dead);
-       if (ret)
-               goto err_free_online_hp;
+               ret = cpuhp_state_add_instance_nocalls(CPUHP_NET_MVNETA_DEAD,
+                                                      &pp->node_dead);
+               if (ret)
+                       goto err_free_online_hp;
+       }
 
        /* In default link is down */
        netif_carrier_off(pp->dev);
@@ -3453,13 +3539,20 @@ static int mvneta_open(struct net_device *dev)
        return 0;
 
 err_free_dead_hp:
-       cpuhp_state_remove_instance_nocalls(CPUHP_NET_MVNETA_DEAD,
-                                           &pp->node_dead);
+       if (!pp->neta_armada3700)
+               cpuhp_state_remove_instance_nocalls(CPUHP_NET_MVNETA_DEAD,
+                                                   &pp->node_dead);
 err_free_online_hp:
-       cpuhp_state_remove_instance_nocalls(online_hpstate, &pp->node_online);
+       if (!pp->neta_armada3700)
+               cpuhp_state_remove_instance_nocalls(online_hpstate,
+                                                   &pp->node_online);
 err_free_irq:
-       on_each_cpu(mvneta_percpu_disable, pp, true);
-       free_percpu_irq(pp->dev->irq, pp->ports);
+       if (pp->neta_armada3700) {
+               free_irq(pp->dev->irq, pp);
+       } else {
+               on_each_cpu(mvneta_percpu_disable, pp, true);
+               free_percpu_irq(pp->dev->irq, pp->ports);
+       }
 err_cleanup_txqs:
        mvneta_cleanup_txqs(pp);
 err_cleanup_rxqs:
@@ -3472,23 +3565,30 @@ static int mvneta_stop(struct net_device *dev)
 {
        struct mvneta_port *pp = netdev_priv(dev);
 
-       /* Inform that we are stopping so we don't want to setup the
-        * driver for new CPUs in the notifiers. The code of the
-        * notifier for CPU online is protected by the same spinlock,
-        * so when we get the lock, the notifer work is done.
-        */
-       spin_lock(&pp->lock);
-       pp->is_stopped = true;
-       spin_unlock(&pp->lock);
+       if (!pp->neta_armada3700) {
+               /* Inform that we are stopping so we don't want to setup the
+                * driver for new CPUs in the notifiers. The code of the
+                * notifier for CPU online is protected by the same spinlock,
+                * so when we get the lock, the notifer work is done.
+                */
+               spin_lock(&pp->lock);
+               pp->is_stopped = true;
+               spin_unlock(&pp->lock);
 
-       mvneta_stop_dev(pp);
-       mvneta_mdio_remove(pp);
+               mvneta_stop_dev(pp);
+               mvneta_mdio_remove(pp);
 
        cpuhp_state_remove_instance_nocalls(online_hpstate, &pp->node_online);
        cpuhp_state_remove_instance_nocalls(CPUHP_NET_MVNETA_DEAD,
                                            &pp->node_dead);
-       on_each_cpu(mvneta_percpu_disable, pp, true);
-       free_percpu_irq(dev->irq, pp->ports);
+               on_each_cpu(mvneta_percpu_disable, pp, true);
+               free_percpu_irq(dev->irq, pp->ports);
+       } else {
+               mvneta_stop_dev(pp);
+               mvneta_mdio_remove(pp);
+               free_irq(dev->irq, pp);
+       }
+
        mvneta_cleanup_rxqs(pp);
        mvneta_cleanup_txqs(pp);
 
@@ -3767,6 +3867,11 @@ static int mvneta_ethtool_set_rxfh(struct net_device *dev, const u32 *indir,
                                   const u8 *key, const u8 hfunc)
 {
        struct mvneta_port *pp = netdev_priv(dev);
+
+       /* Current code for Armada 3700 doesn't support RSS features yet */
+       if (pp->neta_armada3700)
+               return -EOPNOTSUPP;
+
        /* We require at least one supported parameter to be changed
         * and no change in any of the unsupported parameters
         */
@@ -3787,6 +3892,10 @@ static int mvneta_ethtool_get_rxfh(struct net_device *dev, u32 *indir, u8 *key,
 {
        struct mvneta_port *pp = netdev_priv(dev);
 
+       /* Current code for Armada 3700 doesn't support RSS features yet */
+       if (pp->neta_armada3700)
+               return -EOPNOTSUPP;
+
        if (hfunc)
                *hfunc = ETH_RSS_HASH_TOP;
 
@@ -3811,6 +3920,7 @@ static const struct net_device_ops mvneta_netdev_ops = {
 };
 
 const struct ethtool_ops mvneta_eth_tool_ops = {
+       .nway_reset     = phy_ethtool_nway_reset,
        .get_link       = ethtool_op_get_link,
        .set_coalesce   = mvneta_ethtool_set_coalesce,
        .get_coalesce   = mvneta_ethtool_get_coalesce,
@@ -3864,6 +3974,11 @@ static int mvneta_init(struct device *dev, struct mvneta_port *pp)
                rxq->size = pp->rx_ring_size;
                rxq->pkts_coal = MVNETA_RX_COAL_PKTS;
                rxq->time_coal = MVNETA_RX_COAL_USEC;
+               rxq->buf_virt_addr = devm_kmalloc(pp->dev->dev.parent,
+                                                 rxq->size * sizeof(void *),
+                                                 GFP_KERNEL);
+               if (!rxq->buf_virt_addr)
+                       return -ENOMEM;
        }
 
        return 0;
@@ -3888,16 +4003,29 @@ static void mvneta_conf_mbus_windows(struct mvneta_port *pp,
        win_enable = 0x3f;
        win_protect = 0;
 
-       for (i = 0; i < dram->num_cs; i++) {
-               const struct mbus_dram_window *cs = dram->cs + i;
-               mvreg_write(pp, MVNETA_WIN_BASE(i), (cs->base & 0xffff0000) |
-                           (cs->mbus_attr << 8) | dram->mbus_dram_target_id);
+       if (dram) {
+               for (i = 0; i < dram->num_cs; i++) {
+                       const struct mbus_dram_window *cs = dram->cs + i;
+
+                       mvreg_write(pp, MVNETA_WIN_BASE(i),
+                                   (cs->base & 0xffff0000) |
+                                   (cs->mbus_attr << 8) |
+                                   dram->mbus_dram_target_id);
 
-               mvreg_write(pp, MVNETA_WIN_SIZE(i),
-                           (cs->size - 1) & 0xffff0000);
+                       mvreg_write(pp, MVNETA_WIN_SIZE(i),
+                                   (cs->size - 1) & 0xffff0000);
 
-               win_enable &= ~(1 << i);
-               win_protect |= 3 << (2 * i);
+                       win_enable &= ~(1 << i);
+                       win_protect |= 3 << (2 * i);
+               }
+       } else {
+               /* For Armada3700 open default 4GB Mbus window, leaving
+                * arbitration of target/attribute to a different layer
+                * of configuration.
+                */
+               mvreg_write(pp, MVNETA_WIN_SIZE(0), 0xffff0000);
+               win_enable &= ~BIT(0);
+               win_protect = 3;
        }
 
        mvreg_write(pp, MVNETA_BASE_ADDR_ENABLE, win_enable);
@@ -4018,8 +4146,19 @@ static int mvneta_probe(struct platform_device *pdev)
 
        pp->rxq_def = rxq_def;
 
+       /* Set RX packet offset correction for platforms, whose
+        * NET_SKB_PAD, exceeds 64B. It should be 64B for 64-bit
+        * platforms and 0B for 32-bit ones.
+        */
+       pp->rx_offset_correction =
+               max(0, NET_SKB_PAD - MVNETA_RX_PKT_OFFSET_CORRECTION);
+
        pp->indir[0] = rxq_def;
 
+       /* Get special SoC configurations */
+       if (of_device_is_compatible(dn, "marvell,armada-3700-neta"))
+               pp->neta_armada3700 = true;
+
        pp->clk = devm_clk_get(&pdev->dev, "core");
        if (IS_ERR(pp->clk))
                pp->clk = devm_clk_get(&pdev->dev, NULL);
@@ -4087,7 +4226,11 @@ static int mvneta_probe(struct platform_device *pdev)
        pp->tx_csum_limit = tx_csum_limit;
 
        dram_target_info = mv_mbus_dram_info();
-       if (dram_target_info)
+       /* Armada3700 requires setting default configuration of Mbus
+        * windows, however without using filled mbus_dram_target_info
+        * structure.
+        */
+       if (dram_target_info || pp->neta_armada3700)
                mvneta_conf_mbus_windows(pp, dram_target_info);
 
        pp->tx_ring_size = MVNETA_MAX_TXD;
@@ -4120,17 +4263,26 @@ static int mvneta_probe(struct platform_device *pdev)
                goto err_netdev;
        }
 
-       for_each_present_cpu(cpu) {
-               struct mvneta_pcpu_port *port = per_cpu_ptr(pp->ports, cpu);
+       /* Armada3700 network controller does not support per-cpu
+        * operation, so only single NAPI should be initialized.
+        */
+       if (pp->neta_armada3700) {
+               netif_napi_add(dev, &pp->napi, mvneta_poll, NAPI_POLL_WEIGHT);
+       } else {
+               for_each_present_cpu(cpu) {
+                       struct mvneta_pcpu_port *port =
+                               per_cpu_ptr(pp->ports, cpu);
 
-               netif_napi_add(dev, &port->napi, mvneta_poll, NAPI_POLL_WEIGHT);
-               port->pp = pp;
+                       netif_napi_add(dev, &port->napi, mvneta_poll,
+                                      NAPI_POLL_WEIGHT);
+                       port->pp = pp;
+               }
        }
 
        dev->features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO;
        dev->hw_features |= dev->features;
        dev->vlan_features |= dev->features;
-       dev->priv_flags |= IFF_UNICAST_FLT | IFF_LIVE_ADDR_CHANGE;
+       dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
        dev->gso_max_segs = MVNETA_MAX_TSO_SEGS;
 
        /* MTU range: 68 - 9676 */
@@ -4209,6 +4361,7 @@ static int mvneta_remove(struct platform_device *pdev)
 static const struct of_device_id mvneta_match[] = {
        { .compatible = "marvell,armada-370-neta" },
        { .compatible = "marvell,armada-xp-neta" },
+       { .compatible = "marvell,armada-3700-neta" },
        { }
 };
 MODULE_DEVICE_TABLE(of, mvneta_match);
index c8bf155cad94c9155c379bde35562ca0977e1515..dabc5418efcc48287b1e30d95f8a05235755f119 100644 (file)
@@ -3293,7 +3293,7 @@ static void mvpp2_cls_init(struct mvpp2 *priv)
        mvpp2_write(priv, MVPP2_CLS_MODE_REG, MVPP2_CLS_MODE_ACTIVE_MASK);
 
        /* Clear classifier flow table */
-       memset(&fe.data, 0, MVPP2_CLS_FLOWS_TBL_DATA_WORDS);
+       memset(&fe.data, 0, sizeof(fe.data));
        for (index = 0; index < MVPP2_CLS_FLOWS_TBL_SIZE; index++) {
                fe.index = index;
                mvpp2_cls_flow_write(priv, &fe);
@@ -5923,6 +5923,7 @@ static const struct net_device_ops mvpp2_netdev_ops = {
 };
 
 static const struct ethtool_ops mvpp2_eth_tool_ops = {
+       .nway_reset     = phy_ethtool_nway_reset,
        .get_link       = ethtool_op_get_link,
        .set_coalesce   = mvpp2_ethtool_set_coalesce,
        .get_coalesce   = mvpp2_ethtool_get_coalesce,
index b78a838f306c86b21c8c8a29a4c6dbb12fe5aa6c..3af2814ada235c124fd49f1c72f7d0a5c4afbe99 100644 (file)
@@ -1393,6 +1393,7 @@ static void pxa168_get_drvinfo(struct net_device *dev,
 
 static const struct ethtool_ops pxa168_ethtool_ops = {
        .get_drvinfo    = pxa168_get_drvinfo,
+       .nway_reset     = phy_ethtool_nway_reset,
        .get_link       = ethtool_op_get_link,
        .get_ts_info    = ethtool_op_get_ts_info,
        .get_link_ksettings = pxa168_get_link_ksettings,
index aa60f4dcddd84969e84130da51545482afdd9d7d..b60ad0e56a9f1105ce2f93c9f8abb8c31790736c 100644 (file)
@@ -5218,6 +5218,19 @@ static SIMPLE_DEV_PM_OPS(sky2_pm_ops, sky2_suspend, sky2_resume);
 
 static void sky2_shutdown(struct pci_dev *pdev)
 {
+       struct sky2_hw *hw = pci_get_drvdata(pdev);
+       int port;
+
+       for (port = 0; port < hw->ports; port++) {
+               struct net_device *ndev = hw->dev[port];
+
+               rtnl_lock();
+               if (netif_running(ndev)) {
+                       dev_close(ndev);
+                       netif_device_detach(ndev);
+               }
+               rtnl_unlock();
+       }
        sky2_suspend(&pdev->dev);
        pci_wake_from_d3(pdev, device_may_wakeup(&pdev->dev));
        pci_set_power_state(pdev, PCI_D3hot);
index b1cef7a0f7ca62fe982ddf507a8cc4f2900e3243..a49072b4fa520c1ba218a77438e8f9f4c653ef75 100644 (file)
@@ -2469,6 +2469,7 @@ err_comm_admin:
        kfree(priv->mfunc.master.slave_state);
 err_comm:
        iounmap(priv->mfunc.comm);
+       priv->mfunc.comm = NULL;
 err_vhcr:
        dma_free_coherent(&dev->persist->pdev->dev, PAGE_SIZE,
                          priv->mfunc.vhcr,
@@ -2537,6 +2538,13 @@ void mlx4_report_internal_err_comm_event(struct mlx4_dev *dev)
        int slave;
        u32 slave_read;
 
+       /* If the comm channel has not yet been initialized,
+        * skip reporting the internal error event to all
+        * the communication channels.
+        */
+       if (!priv->mfunc.comm)
+               return;
+
        /* Report an internal error event to all
         * communication channels.
         */
@@ -2571,6 +2579,7 @@ void mlx4_multi_func_cleanup(struct mlx4_dev *dev)
        }
 
        iounmap(priv->mfunc.comm);
+       priv->mfunc.comm = NULL;
 }
 
 void mlx4_cmd_cleanup(struct mlx4_dev *dev, int cleanup_mask)
@@ -2670,15 +2679,13 @@ struct mlx4_cmd_mailbox *mlx4_alloc_cmd_mailbox(struct mlx4_dev *dev)
        if (!mailbox)
                return ERR_PTR(-ENOMEM);
 
-       mailbox->buf = pci_pool_alloc(mlx4_priv(dev)->cmd.pool, GFP_KERNEL,
-                                     &mailbox->dma);
+       mailbox->buf = pci_pool_zalloc(mlx4_priv(dev)->cmd.pool, GFP_KERNEL,
+                                      &mailbox->dma);
        if (!mailbox->buf) {
                kfree(mailbox);
                return ERR_PTR(-ENOMEM);
        }
 
-       memset(mailbox->buf, 0, MLX4_MAILBOX_SIZE);
-
        return mailbox;
 }
 EXPORT_SYMBOL_GPL(mlx4_alloc_cmd_mailbox);
index 08fc5fc56d43b489ab6a8502012b5cad925a0e7b..a5fc46bbcbe224373b768633f600f3d66d4865d5 100644 (file)
@@ -245,8 +245,11 @@ static u32 freq_to_shift(u16 freq)
 {
        u32 freq_khz = freq * 1000;
        u64 max_val_cycles = freq_khz * 1000 * MLX4_EN_WRAP_AROUND_SEC;
+       u64 tmp_rounded =
+               roundup_pow_of_two(max_val_cycles) > max_val_cycles ?
+               roundup_pow_of_two(max_val_cycles) - 1 : UINT_MAX;
        u64 max_val_cycles_rounded = is_power_of_2(max_val_cycles + 1) ?
-               max_val_cycles : roundup_pow_of_two(max_val_cycles) - 1;
+               max_val_cycles : tmp_rounded;
        /* calculate max possible multiplier in order to fit in 64bit */
        u64 max_mul = div_u64(0xffffffffffffffffULL, max_val_cycles_rounded);
 
index 132cea655920d636c2e22fd2f110d163a421b8f3..09dd3776db7632ae8818c9638507705176233ea4 100644 (file)
@@ -65,7 +65,7 @@ int mlx4_en_create_cq(struct mlx4_en_priv *priv,
        cq->buf_size = cq->size * mdev->dev->caps.cqe_size;
 
        cq->ring = ring;
-       cq->is_tx = mode;
+       cq->type = mode;
        cq->vector = mdev->dev->caps.num_comp_vectors;
 
        /* Allocate HW buffers on provided NUMA node.
@@ -104,7 +104,7 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq,
        *cq->mcq.arm_db    = 0;
        memset(cq->buf, 0, cq->buf_size);
 
-       if (cq->is_tx == RX) {
+       if (cq->type == RX) {
                if (!mlx4_is_eq_vector_valid(mdev->dev, priv->port,
                                             cq->vector)) {
                        cq->vector = cpumask_first(priv->rx_ring[cq->ring]->affinity_mask);
@@ -133,11 +133,11 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq,
                cq->vector = rx_cq->vector;
        }
 
-       if (!cq->is_tx)
+       if (cq->type == RX)
                cq->size = priv->rx_ring[cq->ring]->actual_size;
 
-       if ((cq->is_tx && priv->hwtstamp_config.tx_type) ||
-           (!cq->is_tx && priv->hwtstamp_config.rx_filter))
+       if ((cq->type != RX && priv->hwtstamp_config.tx_type) ||
+           (cq->type == RX && priv->hwtstamp_config.rx_filter))
                timestamp_en = 1;
 
        err = mlx4_cq_alloc(mdev->dev, cq->size, &cq->wqres.mtt,
@@ -146,10 +146,10 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq,
        if (err)
                goto free_eq;
 
-       cq->mcq.comp  = cq->is_tx ? mlx4_en_tx_irq : mlx4_en_rx_irq;
+       cq->mcq.comp  = cq->type != RX ? mlx4_en_tx_irq : mlx4_en_rx_irq;
        cq->mcq.event = mlx4_en_cq_event;
 
-       if (cq->is_tx)
+       if (cq->type != RX)
                netif_tx_napi_add(cq->dev, &cq->napi, mlx4_en_poll_tx_cq,
                                  NAPI_POLL_WEIGHT);
        else
@@ -173,7 +173,7 @@ void mlx4_en_destroy_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq)
 
        mlx4_free_hwq_res(mdev->dev, &cq->wqres, cq->buf_size);
        if (mlx4_is_eq_vector_valid(mdev->dev, priv->port, cq->vector) &&
-           cq->is_tx == RX)
+           cq->type == RX)
                mlx4_release_eq(priv->mdev->dev, cq->vector);
        cq->vector = 0;
        cq->buf_size = 0;
@@ -185,10 +185,6 @@ void mlx4_en_destroy_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq)
 void mlx4_en_deactivate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq)
 {
        napi_disable(&cq->napi);
-       if (!cq->is_tx) {
-               napi_hash_del(&cq->napi);
-               synchronize_rcu();
-       }
        netif_napi_del(&cq->napi);
 
        mlx4_cq_free(priv->mdev->dev, &cq->mcq);
index bdda17d2ea0f9b9e6d8efd37f2637ef22b873cc3..d9c9f86a30df953fa555934c5406057dcaf28960 100644 (file)
 
 static int mlx4_en_moderation_update(struct mlx4_en_priv *priv)
 {
-       int i;
+       int i, t;
        int err = 0;
 
-       for (i = 0; i < priv->tx_ring_num; i++) {
-               priv->tx_cq[i]->moder_cnt = priv->tx_frames;
-               priv->tx_cq[i]->moder_time = priv->tx_usecs;
-               if (priv->port_up) {
-                       err = mlx4_en_set_cq_moder(priv, priv->tx_cq[i]);
-                       if (err)
-                               return err;
+       for (t = 0 ; t < MLX4_EN_NUM_TX_TYPES; t++) {
+               for (i = 0; i < priv->tx_ring_num[t]; i++) {
+                       priv->tx_cq[t][i]->moder_cnt = priv->tx_frames;
+                       priv->tx_cq[t][i]->moder_time = priv->tx_usecs;
+                       if (priv->port_up) {
+                               err = mlx4_en_set_cq_moder(priv,
+                                                          priv->tx_cq[t][i]);
+                               if (err)
+                                       return err;
+                       }
                }
        }
 
@@ -192,6 +195,10 @@ static const char main_strings[][ETH_GSTRING_LEN] = {
        "tx_prio_7_packets", "tx_prio_7_bytes",
        "tx_novlan_packets", "tx_novlan_bytes",
 
+       /* xdp statistics */
+       "rx_xdp_drop",
+       "rx_xdp_tx",
+       "rx_xdp_tx_full",
 };
 
 static const char mlx4_en_test_names[][ETH_GSTRING_LEN]= {
@@ -336,8 +343,8 @@ static int mlx4_en_get_sset_count(struct net_device *dev, int sset)
        switch (sset) {
        case ETH_SS_STATS:
                return bitmap_iterator_count(&it) +
-                       (priv->tx_ring_num * 2) +
-                       (priv->rx_ring_num * 3);
+                       (priv->tx_ring_num[TX] * 2) +
+                       (priv->rx_ring_num * (3 + NUM_XDP_STATS));
        case ETH_SS_TEST:
                return MLX4_EN_NUM_SELF_TEST - !(priv->mdev->dev->caps.flags
                                        & MLX4_DEV_CAP_FLAG_UC_LOOPBACK) * 2;
@@ -360,6 +367,8 @@ static void mlx4_en_get_ethtool_stats(struct net_device *dev,
 
        spin_lock_bh(&priv->stats_lock);
 
+       mlx4_en_fold_software_stats(dev);
+
        for (i = 0; i < NUM_MAIN_STATS; i++, bitmap_iterator_inc(&it))
                if (bitmap_iterator_test(&it))
                        data[index++] = ((unsigned long *)&dev->stats)[i];
@@ -397,14 +406,21 @@ static void mlx4_en_get_ethtool_stats(struct net_device *dev,
                if (bitmap_iterator_test(&it))
                        data[index++] = ((unsigned long *)&priv->pkstats)[i];
 
-       for (i = 0; i < priv->tx_ring_num; i++) {
-               data[index++] = priv->tx_ring[i]->packets;
-               data[index++] = priv->tx_ring[i]->bytes;
+       for (i = 0; i < NUM_XDP_STATS; i++, bitmap_iterator_inc(&it))
+               if (bitmap_iterator_test(&it))
+                       data[index++] = ((unsigned long *)&priv->xdp_stats)[i];
+
+       for (i = 0; i < priv->tx_ring_num[TX]; i++) {
+               data[index++] = priv->tx_ring[TX][i]->packets;
+               data[index++] = priv->tx_ring[TX][i]->bytes;
        }
        for (i = 0; i < priv->rx_ring_num; i++) {
                data[index++] = priv->rx_ring[i]->packets;
                data[index++] = priv->rx_ring[i]->bytes;
                data[index++] = priv->rx_ring[i]->dropped;
+               data[index++] = priv->rx_ring[i]->xdp_drop;
+               data[index++] = priv->rx_ring[i]->xdp_tx;
+               data[index++] = priv->rx_ring[i]->xdp_tx_full;
        }
        spin_unlock_bh(&priv->stats_lock);
 
@@ -467,7 +483,13 @@ static void mlx4_en_get_strings(struct net_device *dev,
                                strcpy(data + (index++) * ETH_GSTRING_LEN,
                                       main_strings[strings]);
 
-               for (i = 0; i < priv->tx_ring_num; i++) {
+               for (i = 0; i < NUM_XDP_STATS; i++, strings++,
+                    bitmap_iterator_inc(&it))
+                       if (bitmap_iterator_test(&it))
+                               strcpy(data + (index++) * ETH_GSTRING_LEN,
+                                      main_strings[strings]);
+
+               for (i = 0; i < priv->tx_ring_num[TX]; i++) {
                        sprintf(data + (index++) * ETH_GSTRING_LEN,
                                "tx%d_packets", i);
                        sprintf(data + (index++) * ETH_GSTRING_LEN,
@@ -480,6 +502,12 @@ static void mlx4_en_get_strings(struct net_device *dev,
                                "rx%d_bytes", i);
                        sprintf(data + (index++) * ETH_GSTRING_LEN,
                                "rx%d_dropped", i);
+                       sprintf(data + (index++) * ETH_GSTRING_LEN,
+                               "rx%d_xdp_drop", i);
+                       sprintf(data + (index++) * ETH_GSTRING_LEN,
+                               "rx%d_xdp_tx", i);
+                       sprintf(data + (index++) * ETH_GSTRING_LEN,
+                               "rx%d_xdp_tx_full", i);
                }
                break;
        case ETH_SS_PRIV_FLAGS:
@@ -1060,7 +1088,7 @@ static int mlx4_en_set_ringparam(struct net_device *dev,
 
        if (rx_size == (priv->port_up ? priv->rx_ring[0]->actual_size :
                                        priv->rx_ring[0]->size) &&
-           tx_size == priv->tx_ring[0]->size)
+           tx_size == priv->tx_ring[TX][0]->size)
                return 0;
 
        tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
@@ -1105,7 +1133,7 @@ static void mlx4_en_get_ringparam(struct net_device *dev,
        param->tx_max_pending = MLX4_EN_MAX_TX_SIZE;
        param->rx_pending = priv->port_up ?
                priv->rx_ring[0]->actual_size : priv->rx_ring[0]->size;
-       param->tx_pending = priv->tx_ring[0]->size;
+       param->tx_pending = priv->tx_ring[TX][0]->size;
 }
 
 static u32 mlx4_en_get_rxfh_indir_size(struct net_device *dev)
@@ -1710,7 +1738,7 @@ static void mlx4_en_get_channels(struct net_device *dev,
        channel->max_tx = MLX4_EN_MAX_TX_RING_P_UP;
 
        channel->rx_count = priv->rx_ring_num;
-       channel->tx_count = priv->tx_ring_num / MLX4_EN_NUM_UP;
+       channel->tx_count = priv->tx_ring_num[TX] / MLX4_EN_NUM_UP;
 }
 
 static int mlx4_en_set_channels(struct net_device *dev,
@@ -1721,6 +1749,7 @@ static int mlx4_en_set_channels(struct net_device *dev,
        struct mlx4_en_port_profile new_prof;
        struct mlx4_en_priv *tmp;
        int port_up = 0;
+       int xdp_count;
        int err = 0;
 
        if (channel->other_count || channel->combined_count ||
@@ -1729,20 +1758,25 @@ static int mlx4_en_set_channels(struct net_device *dev,
            !channel->tx_count || !channel->rx_count)
                return -EINVAL;
 
-       if (channel->tx_count * MLX4_EN_NUM_UP <= priv->xdp_ring_num) {
-               en_err(priv, "Minimum %d tx channels required with XDP on\n",
-                      priv->xdp_ring_num / MLX4_EN_NUM_UP + 1);
-               return -EINVAL;
-       }
-
        tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
        if (!tmp)
                return -ENOMEM;
 
        mutex_lock(&mdev->state_lock);
+       xdp_count = priv->tx_ring_num[TX_XDP] ? channel->rx_count : 0;
+       if (channel->tx_count * MLX4_EN_NUM_UP + xdp_count > MAX_TX_RINGS) {
+               err = -EINVAL;
+               en_err(priv,
+                      "Total number of TX and XDP rings (%d) exceeds the maximum supported (%d)\n",
+                      channel->tx_count * MLX4_EN_NUM_UP + xdp_count,
+                      MAX_TX_RINGS);
+               goto out;
+       }
+
        memcpy(&new_prof, priv->prof, sizeof(struct mlx4_en_port_profile));
        new_prof.num_tx_rings_p_up = channel->tx_count;
-       new_prof.tx_ring_num = channel->tx_count * MLX4_EN_NUM_UP;
+       new_prof.tx_ring_num[TX] = channel->tx_count * MLX4_EN_NUM_UP;
+       new_prof.tx_ring_num[TX_XDP] = xdp_count;
        new_prof.rx_ring_num = channel->rx_count;
 
        err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof);
@@ -1756,14 +1790,13 @@ static int mlx4_en_set_channels(struct net_device *dev,
 
        mlx4_en_safe_replace_resources(priv, tmp);
 
-       netif_set_real_num_tx_queues(dev, priv->tx_ring_num -
-                                                       priv->xdp_ring_num);
+       netif_set_real_num_tx_queues(dev, priv->tx_ring_num[TX]);
        netif_set_real_num_rx_queues(dev, priv->rx_ring_num);
 
        if (dev->num_tc)
                mlx4_en_setup_tc(dev, MLX4_EN_NUM_UP);
 
-       en_warn(priv, "Using %d TX rings\n", priv->tx_ring_num);
+       en_warn(priv, "Using %d TX rings\n", priv->tx_ring_num[TX]);
        en_warn(priv, "Using %d RX rings\n", priv->rx_ring_num);
 
        if (port_up) {
@@ -1774,8 +1807,8 @@ static int mlx4_en_set_channels(struct net_device *dev,
 
        err = mlx4_en_moderation_update(priv);
 out:
-       kfree(tmp);
        mutex_unlock(&mdev->state_lock);
+       kfree(tmp);
        return err;
 }
 
@@ -1823,11 +1856,15 @@ static int mlx4_en_set_priv_flags(struct net_device *dev, u32 flags)
        int ret = 0;
 
        if (bf_enabled_new != bf_enabled_old) {
+               int t;
+
                if (bf_enabled_new) {
                        bool bf_supported = true;
 
-                       for (i = 0; i < priv->tx_ring_num; i++)
-                               bf_supported &= priv->tx_ring[i]->bf_alloced;
+                       for (t = 0; t < MLX4_EN_NUM_TX_TYPES; t++)
+                               for (i = 0; i < priv->tx_ring_num[t]; i++)
+                                       bf_supported &=
+                                               priv->tx_ring[t][i]->bf_alloced;
 
                        if (!bf_supported) {
                                en_err(priv, "BlueFlame is not supported\n");
@@ -1839,8 +1876,10 @@ static int mlx4_en_set_priv_flags(struct net_device *dev, u32 flags)
                        priv->pflags &= ~MLX4_EN_PRIV_FLAGS_BLUEFLAME;
                }
 
-               for (i = 0; i < priv->tx_ring_num; i++)
-                       priv->tx_ring[i]->bf_enabled = bf_enabled_new;
+               for (t = 0; t < MLX4_EN_NUM_TX_TYPES; t++)
+                       for (i = 0; i < priv->tx_ring_num[t]; i++)
+                               priv->tx_ring[t][i]->bf_enabled =
+                                       bf_enabled_new;
 
                en_info(priv, "BlueFlame %s\n",
                        bf_enabled_new ?  "Enabled" : "Disabled");
index bf7628db098acd24f9a510ca07862736f0475cfe..36a7a54bbb8282af81e662af24ed583089b690a8 100644 (file)
@@ -169,7 +169,7 @@ static int mlx4_en_get_profile(struct mlx4_en_dev *mdev)
                params->prof[i].tx_ppp = pfctx;
                params->prof[i].tx_ring_size = MLX4_EN_DEF_TX_RING_SIZE;
                params->prof[i].rx_ring_size = MLX4_EN_DEF_RX_RING_SIZE;
-               params->prof[i].tx_ring_num = params->num_tx_rings_p_up *
+               params->prof[i].tx_ring_num[TX] = params->num_tx_rings_p_up *
                        MLX4_EN_NUM_UP;
                params->prof[i].rss_rings = 0;
                params->prof[i].inline_thold = inline_thold;
index bf35ac4c1c615ec45c59f4341a62c0b82c99d2e0..091b904262bc7932d3edf99cf850affb23b9ce6e 100644 (file)
@@ -129,6 +129,9 @@ static enum mlx4_net_trans_rule_id mlx4_ip_proto_to_trans_rule_id(u8 ip_proto)
        }
 };
 
+/* Must not acquire state_lock, as its corresponding work_sync
+ * is done under it.
+ */
 static void mlx4_en_filter_work(struct work_struct *work)
 {
        struct mlx4_en_filter *filter = container_of(work,
@@ -1214,8 +1217,8 @@ static void mlx4_en_netpoll(struct net_device *dev)
        struct mlx4_en_cq *cq;
        int i;
 
-       for (i = 0; i < priv->tx_ring_num; i++) {
-               cq = priv->tx_cq[i];
+       for (i = 0; i < priv->tx_ring_num[TX]; i++) {
+               cq = priv->tx_cq[TX][i];
                napi_schedule(&cq->napi);
        }
 }
@@ -1299,12 +1302,14 @@ static void mlx4_en_tx_timeout(struct net_device *dev)
        if (netif_msg_timer(priv))
                en_warn(priv, "Tx timeout called on port:%d\n", priv->port);
 
-       for (i = 0; i < priv->tx_ring_num; i++) {
+       for (i = 0; i < priv->tx_ring_num[TX]; i++) {
+               struct mlx4_en_tx_ring *tx_ring = priv->tx_ring[TX][i];
+
                if (!netif_tx_queue_stopped(netdev_get_tx_queue(dev, i)))
                        continue;
                en_warn(priv, "TX timeout on queue: %d, QP: 0x%x, CQ: 0x%x, Cons: 0x%x, Prod: 0x%x\n",
-                       i, priv->tx_ring[i]->qpn, priv->tx_ring[i]->cqn,
-                       priv->tx_ring[i]->cons, priv->tx_ring[i]->prod);
+                       i, tx_ring->qpn, tx_ring->sp_cqn,
+                       tx_ring->cons, tx_ring->prod);
        }
 
        priv->port_stats.tx_timeout++;
@@ -1319,6 +1324,7 @@ mlx4_en_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
        struct mlx4_en_priv *priv = netdev_priv(dev);
 
        spin_lock_bh(&priv->stats_lock);
+       mlx4_en_fold_software_stats(dev);
        netdev_stats_to_stats64(stats, &dev->stats);
        spin_unlock_bh(&priv->stats_lock);
 
@@ -1328,7 +1334,7 @@ mlx4_en_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
 static void mlx4_en_set_default_moderation(struct mlx4_en_priv *priv)
 {
        struct mlx4_en_cq *cq;
-       int i;
+       int i, t;
 
        /* If we haven't received a specific coalescing setting
         * (module param), we set the moderation parameters as follows:
@@ -1353,10 +1359,12 @@ static void mlx4_en_set_default_moderation(struct mlx4_en_priv *priv)
                priv->last_moder_bytes[i] = 0;
        }
 
-       for (i = 0; i < priv->tx_ring_num; i++) {
-               cq = priv->tx_cq[i];
-               cq->moder_cnt = priv->tx_frames;
-               cq->moder_time = priv->tx_usecs;
+       for (t = 0 ; t < MLX4_EN_NUM_TX_TYPES; t++) {
+               for (i = 0; i < priv->tx_ring_num[t]; i++) {
+                       cq = priv->tx_cq[t][i];
+                       cq->moder_cnt = priv->tx_frames;
+                       cq->moder_time = priv->tx_usecs;
+               }
        }
 
        /* Reset auto-moderation params */
@@ -1387,10 +1395,8 @@ static void mlx4_en_auto_moderation(struct mlx4_en_priv *priv)
                return;
 
        for (ring = 0; ring < priv->rx_ring_num; ring++) {
-               spin_lock_bh(&priv->stats_lock);
-               rx_packets = priv->rx_ring[ring]->packets;
-               rx_bytes = priv->rx_ring[ring]->bytes;
-               spin_unlock_bh(&priv->stats_lock);
+               rx_packets = READ_ONCE(priv->rx_ring[ring]->packets);
+               rx_bytes = READ_ONCE(priv->rx_ring[ring]->bytes);
 
                rx_pkt_diff = ((unsigned long) (rx_packets -
                                priv->last_moder_packets[ring]));
@@ -1526,19 +1532,13 @@ static void mlx4_en_free_affinity_hint(struct mlx4_en_priv *priv, int ring_idx)
 static void mlx4_en_init_recycle_ring(struct mlx4_en_priv *priv,
                                      int tx_ring_idx)
 {
-       struct mlx4_en_tx_ring *tx_ring = priv->tx_ring[tx_ring_idx];
-       int rr_index;
+       struct mlx4_en_tx_ring *tx_ring = priv->tx_ring[TX_XDP][tx_ring_idx];
+       int rr_index = tx_ring_idx;
 
-       rr_index = (priv->xdp_ring_num - priv->tx_ring_num) + tx_ring_idx;
-       if (rr_index >= 0) {
-               tx_ring->free_tx_desc = mlx4_en_recycle_tx_desc;
-               tx_ring->recycle_ring = priv->rx_ring[rr_index];
-               en_dbg(DRV, priv,
-                      "Set tx_ring[%d]->recycle_ring = rx_ring[%d]\n",
-                      tx_ring_idx, rr_index);
-       } else {
-               tx_ring->recycle_ring = NULL;
-       }
+       tx_ring->free_tx_desc = mlx4_en_recycle_tx_desc;
+       tx_ring->recycle_ring = priv->rx_ring[rr_index];
+       en_dbg(DRV, priv, "Set tx_ring[%d][%d]->recycle_ring = rx_ring[%d]\n",
+              TX_XDP, tx_ring_idx, rr_index);
 }
 
 int mlx4_en_start_port(struct net_device *dev)
@@ -1548,9 +1548,8 @@ int mlx4_en_start_port(struct net_device *dev)
        struct mlx4_en_cq *cq;
        struct mlx4_en_tx_ring *tx_ring;
        int rx_index = 0;
-       int tx_index = 0;
        int err = 0;
-       int i;
+       int i, t;
        int j;
        u8 mc_list[16] = {0};
 
@@ -1635,43 +1634,51 @@ int mlx4_en_start_port(struct net_device *dev)
                goto rss_err;
 
        /* Configure tx cq's and rings */
-       for (i = 0; i < priv->tx_ring_num; i++) {
-               /* Configure cq */
-               cq = priv->tx_cq[i];
-               err = mlx4_en_activate_cq(priv, cq, i);
-               if (err) {
-                       en_err(priv, "Failed allocating Tx CQ\n");
-                       goto tx_err;
-               }
-               err = mlx4_en_set_cq_moder(priv, cq);
-               if (err) {
-                       en_err(priv, "Failed setting cq moderation parameters\n");
-                       mlx4_en_deactivate_cq(priv, cq);
-                       goto tx_err;
-               }
-               en_dbg(DRV, priv, "Resetting index of collapsed CQ:%d to -1\n", i);
-               cq->buf->wqe_index = cpu_to_be16(0xffff);
+       for (t = 0 ; t < MLX4_EN_NUM_TX_TYPES; t++) {
+               u8 num_tx_rings_p_up = t == TX ? priv->num_tx_rings_p_up : 1;
 
-               /* Configure ring */
-               tx_ring = priv->tx_ring[i];
-               err = mlx4_en_activate_tx_ring(priv, tx_ring, cq->mcq.cqn,
-                       i / priv->num_tx_rings_p_up);
-               if (err) {
-                       en_err(priv, "Failed allocating Tx ring\n");
-                       mlx4_en_deactivate_cq(priv, cq);
-                       goto tx_err;
-               }
-               tx_ring->tx_queue = netdev_get_tx_queue(dev, i);
-
-               mlx4_en_init_recycle_ring(priv, i);
+               for (i = 0; i < priv->tx_ring_num[t]; i++) {
+                       /* Configure cq */
+                       cq = priv->tx_cq[t][i];
+                       err = mlx4_en_activate_cq(priv, cq, i);
+                       if (err) {
+                               en_err(priv, "Failed allocating Tx CQ\n");
+                               goto tx_err;
+                       }
+                       err = mlx4_en_set_cq_moder(priv, cq);
+                       if (err) {
+                               en_err(priv, "Failed setting cq moderation parameters\n");
+                               mlx4_en_deactivate_cq(priv, cq);
+                               goto tx_err;
+                       }
+                       en_dbg(DRV, priv,
+                              "Resetting index of collapsed CQ:%d to -1\n", i);
+                       cq->buf->wqe_index = cpu_to_be16(0xffff);
+
+                       /* Configure ring */
+                       tx_ring = priv->tx_ring[t][i];
+                       err = mlx4_en_activate_tx_ring(priv, tx_ring,
+                                                      cq->mcq.cqn,
+                                                      i / num_tx_rings_p_up);
+                       if (err) {
+                               en_err(priv, "Failed allocating Tx ring\n");
+                               mlx4_en_deactivate_cq(priv, cq);
+                               goto tx_err;
+                       }
+                       if (t != TX_XDP) {
+                               tx_ring->tx_queue = netdev_get_tx_queue(dev, i);
+                               tx_ring->recycle_ring = NULL;
+                       } else {
+                               mlx4_en_init_recycle_ring(priv, i);
+                       }
 
-               /* Arm CQ for TX completions */
-               mlx4_en_arm_cq(priv, cq);
+                       /* Arm CQ for TX completions */
+                       mlx4_en_arm_cq(priv, cq);
 
-               /* Set initial ownership of all Tx TXBBs to SW (1) */
-               for (j = 0; j < tx_ring->buf_size; j += STAMP_STRIDE)
-                       *((u32 *) (tx_ring->buf + j)) = 0xffffffff;
-               ++tx_index;
+                       /* Set initial ownership of all Tx TXBBs to SW (1) */
+                       for (j = 0; j < tx_ring->buf_size; j += STAMP_STRIDE)
+                               *((u32 *)(tx_ring->buf + j)) = 0xffffffff;
+               }
        }
 
        /* Configure port */
@@ -1733,15 +1740,31 @@ int mlx4_en_start_port(struct net_device *dev)
                udp_tunnel_get_rx_info(dev);
 
        priv->port_up = true;
+
+       /* Process all completions if exist to prevent
+        * the queues freezing if they are full
+        */
+       for (i = 0; i < priv->rx_ring_num; i++)
+               napi_schedule(&priv->rx_cq[i]->napi);
+
        netif_tx_start_all_queues(dev);
        netif_device_attach(dev);
 
        return 0;
 
 tx_err:
-       while (tx_index--) {
-               mlx4_en_deactivate_tx_ring(priv, priv->tx_ring[tx_index]);
-               mlx4_en_deactivate_cq(priv, priv->tx_cq[tx_index]);
+       if (t == MLX4_EN_NUM_TX_TYPES) {
+               t--;
+               i = priv->tx_ring_num[t];
+       }
+       while (t >= 0) {
+               while (i--) {
+                       mlx4_en_deactivate_tx_ring(priv, priv->tx_ring[t][i]);
+                       mlx4_en_deactivate_cq(priv, priv->tx_cq[t][i]);
+               }
+               if (!t--)
+                       break;
+               i = priv->tx_ring_num[t];
        }
        mlx4_en_destroy_drop_qp(priv);
 rss_err:
@@ -1766,7 +1789,7 @@ void mlx4_en_stop_port(struct net_device *dev, int detach)
        struct mlx4_en_dev *mdev = priv->mdev;
        struct mlx4_en_mc_list *mclist, *tmp;
        struct ethtool_flow_id *flow, *tmp_flow;
-       int i;
+       int i, t;
        u8 mc_list[16] = {0};
 
        if (!priv->port_up) {
@@ -1786,8 +1809,12 @@ void mlx4_en_stop_port(struct net_device *dev, int detach)
 
        netif_tx_disable(dev);
 
+       spin_lock_bh(&priv->stats_lock);
+       mlx4_en_fold_software_stats(dev);
        /* Set port as not active */
        priv->port_up = false;
+       spin_unlock_bh(&priv->stats_lock);
+
        priv->counter_index = MLX4_SINK_COUNTER_INDEX(mdev->dev);
 
        /* Promsicuous mode */
@@ -1852,14 +1879,17 @@ void mlx4_en_stop_port(struct net_device *dev, int detach)
        mlx4_en_destroy_drop_qp(priv);
 
        /* Free TX Rings */
-       for (i = 0; i < priv->tx_ring_num; i++) {
-               mlx4_en_deactivate_tx_ring(priv, priv->tx_ring[i]);
-               mlx4_en_deactivate_cq(priv, priv->tx_cq[i]);
+       for (t = 0; t < MLX4_EN_NUM_TX_TYPES; t++) {
+               for (i = 0; i < priv->tx_ring_num[t]; i++) {
+                       mlx4_en_deactivate_tx_ring(priv, priv->tx_ring[t][i]);
+                       mlx4_en_deactivate_cq(priv, priv->tx_cq[t][i]);
+               }
        }
        msleep(10);
 
-       for (i = 0; i < priv->tx_ring_num; i++)
-               mlx4_en_free_tx_buf(dev, priv->tx_ring[i]);
+       for (t = 0; t < MLX4_EN_NUM_TX_TYPES; t++)
+               for (i = 0; i < priv->tx_ring_num[t]; i++)
+                       mlx4_en_free_tx_buf(dev, priv->tx_ring[t][i]);
 
        if (mdev->dev->caps.steering_mode != MLX4_STEERING_MODE_A0)
                mlx4_en_delete_rss_steer_rules(priv);
@@ -1908,10 +1938,12 @@ static void mlx4_en_clear_stats(struct net_device *dev)
 {
        struct mlx4_en_priv *priv = netdev_priv(dev);
        struct mlx4_en_dev *mdev = priv->mdev;
+       struct mlx4_en_tx_ring **tx_ring;
        int i;
 
-       if (mlx4_en_DUMP_ETH_STATS(mdev, priv->port, 1))
-               en_dbg(HW, priv, "Failed dumping statistics\n");
+       if (!mlx4_is_slave(mdev->dev))
+               if (mlx4_en_DUMP_ETH_STATS(mdev, priv->port, 1))
+                       en_dbg(HW, priv, "Failed dumping statistics\n");
 
        memset(&priv->pstats, 0, sizeof(priv->pstats));
        memset(&priv->pkstats, 0, sizeof(priv->pkstats));
@@ -1924,15 +1956,16 @@ static void mlx4_en_clear_stats(struct net_device *dev)
               sizeof(priv->tx_priority_flowstats));
        memset(&priv->pf_stats, 0, sizeof(priv->pf_stats));
 
-       for (i = 0; i < priv->tx_ring_num; i++) {
-               priv->tx_ring[i]->bytes = 0;
-               priv->tx_ring[i]->packets = 0;
-               priv->tx_ring[i]->tx_csum = 0;
-               priv->tx_ring[i]->tx_dropped = 0;
-               priv->tx_ring[i]->queue_stopped = 0;
-               priv->tx_ring[i]->wake_queue = 0;
-               priv->tx_ring[i]->tso_packets = 0;
-               priv->tx_ring[i]->xmit_more = 0;
+       tx_ring = priv->tx_ring[TX];
+       for (i = 0; i < priv->tx_ring_num[TX]; i++) {
+               tx_ring[i]->bytes = 0;
+               tx_ring[i]->packets = 0;
+               tx_ring[i]->tx_csum = 0;
+               tx_ring[i]->tx_dropped = 0;
+               tx_ring[i]->queue_stopped = 0;
+               tx_ring[i]->wake_queue = 0;
+               tx_ring[i]->tso_packets = 0;
+               tx_ring[i]->xmit_more = 0;
        }
        for (i = 0; i < priv->rx_ring_num; i++) {
                priv->rx_ring[i]->bytes = 0;
@@ -1988,17 +2021,20 @@ static int mlx4_en_close(struct net_device *dev)
 
 static void mlx4_en_free_resources(struct mlx4_en_priv *priv)
 {
-       int i;
+       int i, t;
 
 #ifdef CONFIG_RFS_ACCEL
        priv->dev->rx_cpu_rmap = NULL;
 #endif
 
-       for (i = 0; i < priv->tx_ring_num; i++) {
-               if (priv->tx_ring && priv->tx_ring[i])
-                       mlx4_en_destroy_tx_ring(priv, &priv->tx_ring[i]);
-               if (priv->tx_cq && priv->tx_cq[i])
-                       mlx4_en_destroy_cq(priv, &priv->tx_cq[i]);
+       for (t = 0; t < MLX4_EN_NUM_TX_TYPES; t++) {
+               for (i = 0; i < priv->tx_ring_num[t]; i++) {
+                       if (priv->tx_ring[t] && priv->tx_ring[t][i])
+                               mlx4_en_destroy_tx_ring(priv,
+                                                       &priv->tx_ring[t][i]);
+                       if (priv->tx_cq[t] && priv->tx_cq[t][i])
+                               mlx4_en_destroy_cq(priv, &priv->tx_cq[t][i]);
+               }
        }
 
        for (i = 0; i < priv->rx_ring_num; i++) {
@@ -2014,20 +2050,22 @@ static void mlx4_en_free_resources(struct mlx4_en_priv *priv)
 static int mlx4_en_alloc_resources(struct mlx4_en_priv *priv)
 {
        struct mlx4_en_port_profile *prof = priv->prof;
-       int i;
+       int i, t;
        int node;
 
        /* Create tx Rings */
-       for (i = 0; i < priv->tx_ring_num; i++) {
-               node = cpu_to_node(i % num_online_cpus());
-               if (mlx4_en_create_cq(priv, &priv->tx_cq[i],
-                                     prof->tx_ring_size, i, TX, node))
-                       goto err;
-
-               if (mlx4_en_create_tx_ring(priv, &priv->tx_ring[i],
-                                          prof->tx_ring_size, TXBB_SIZE,
-                                          node, i))
-                       goto err;
+       for (t = 0; t < MLX4_EN_NUM_TX_TYPES; t++) {
+               for (i = 0; i < priv->tx_ring_num[t]; i++) {
+                       node = cpu_to_node(i % num_online_cpus());
+                       if (mlx4_en_create_cq(priv, &priv->tx_cq[t][i],
+                                             prof->tx_ring_size, i, t, node))
+                               goto err;
+
+                       if (mlx4_en_create_tx_ring(priv, &priv->tx_ring[t][i],
+                                                  prof->tx_ring_size,
+                                                  TXBB_SIZE, node, i))
+                               goto err;
+               }
        }
 
        /* Create rx Rings */
@@ -2059,11 +2097,14 @@ err:
                if (priv->rx_cq[i])
                        mlx4_en_destroy_cq(priv, &priv->rx_cq[i]);
        }
-       for (i = 0; i < priv->tx_ring_num; i++) {
-               if (priv->tx_ring[i])
-                       mlx4_en_destroy_tx_ring(priv, &priv->tx_ring[i]);
-               if (priv->tx_cq[i])
-                       mlx4_en_destroy_cq(priv, &priv->tx_cq[i]);
+       for (t = 0; t < MLX4_EN_NUM_TX_TYPES; t++) {
+               for (i = 0; i < priv->tx_ring_num[t]; i++) {
+                       if (priv->tx_ring[t][i])
+                               mlx4_en_destroy_tx_ring(priv,
+                                                       &priv->tx_ring[t][i]);
+                       if (priv->tx_cq[t][i])
+                               mlx4_en_destroy_cq(priv, &priv->tx_cq[t][i]);
+               }
        }
        return -ENOMEM;
 }
@@ -2080,10 +2121,11 @@ static int mlx4_en_copy_priv(struct mlx4_en_priv *dst,
                             struct mlx4_en_priv *src,
                             struct mlx4_en_port_profile *prof)
 {
+       int t;
+
        memcpy(&dst->hwtstamp_config, &prof->hwtstamp_config,
               sizeof(dst->hwtstamp_config));
        dst->num_tx_rings_p_up = src->mdev->profile.num_tx_rings_p_up;
-       dst->tx_ring_num = prof->tx_ring_num;
        dst->rx_ring_num = prof->rx_ring_num;
        dst->flags = prof->flags;
        dst->mdev = src->mdev;
@@ -2093,33 +2135,50 @@ static int mlx4_en_copy_priv(struct mlx4_en_priv *dst,
        dst->stride = roundup_pow_of_two(sizeof(struct mlx4_en_rx_desc) +
                                         DS_SIZE * MLX4_EN_MAX_RX_FRAGS);
 
-       dst->tx_ring = kzalloc(sizeof(struct mlx4_en_tx_ring *) * MAX_TX_RINGS,
-                               GFP_KERNEL);
-       if (!dst->tx_ring)
-               return -ENOMEM;
+       for (t = 0; t < MLX4_EN_NUM_TX_TYPES; t++) {
+               dst->tx_ring_num[t] = prof->tx_ring_num[t];
+               if (!dst->tx_ring_num[t])
+                       continue;
 
-       dst->tx_cq = kzalloc(sizeof(struct mlx4_en_cq *) * MAX_TX_RINGS,
-                             GFP_KERNEL);
-       if (!dst->tx_cq) {
-               kfree(dst->tx_ring);
-               return -ENOMEM;
+               dst->tx_ring[t] = kzalloc(sizeof(struct mlx4_en_tx_ring *) *
+                                         MAX_TX_RINGS, GFP_KERNEL);
+               if (!dst->tx_ring[t])
+                       goto err_free_tx;
+
+               dst->tx_cq[t] = kzalloc(sizeof(struct mlx4_en_cq *) *
+                                       MAX_TX_RINGS, GFP_KERNEL);
+               if (!dst->tx_cq[t]) {
+                       kfree(dst->tx_ring[t]);
+                       goto err_free_tx;
+               }
        }
+
        return 0;
+
+err_free_tx:
+       while (t--) {
+               kfree(dst->tx_ring[t]);
+               kfree(dst->tx_cq[t]);
+       }
+       return -ENOMEM;
 }
 
 static void mlx4_en_update_priv(struct mlx4_en_priv *dst,
                                struct mlx4_en_priv *src)
 {
+       int t;
        memcpy(dst->rx_ring, src->rx_ring,
               sizeof(struct mlx4_en_rx_ring *) * src->rx_ring_num);
        memcpy(dst->rx_cq, src->rx_cq,
               sizeof(struct mlx4_en_cq *) * src->rx_ring_num);
        memcpy(&dst->hwtstamp_config, &src->hwtstamp_config,
               sizeof(dst->hwtstamp_config));
-       dst->tx_ring_num = src->tx_ring_num;
+       for (t = 0; t < MLX4_EN_NUM_TX_TYPES; t++) {
+               dst->tx_ring_num[t] = src->tx_ring_num[t];
+               dst->tx_ring[t] = src->tx_ring[t];
+               dst->tx_cq[t] = src->tx_cq[t];
+       }
        dst->rx_ring_num = src->rx_ring_num;
-       dst->tx_ring = src->tx_ring;
-       dst->tx_cq = src->tx_cq;
        memcpy(dst->prof, src->prof, sizeof(struct mlx4_en_port_profile));
 }
 
@@ -2127,14 +2186,18 @@ int mlx4_en_try_alloc_resources(struct mlx4_en_priv *priv,
                                struct mlx4_en_priv *tmp,
                                struct mlx4_en_port_profile *prof)
 {
+       int t;
+
        mlx4_en_copy_priv(tmp, priv, prof);
 
        if (mlx4_en_alloc_resources(tmp)) {
                en_warn(priv,
                        "%s: Resource allocation failed, using previous configuration\n",
                        __func__);
-               kfree(tmp->tx_ring);
-               kfree(tmp->tx_cq);
+               for (t = 0; t < MLX4_EN_NUM_TX_TYPES; t++) {
+                       kfree(tmp->tx_ring[t]);
+                       kfree(tmp->tx_cq[t]);
+               }
                return -ENOMEM;
        }
        return 0;
@@ -2153,6 +2216,7 @@ void mlx4_en_destroy_netdev(struct net_device *dev)
        struct mlx4_en_dev *mdev = priv->mdev;
        bool shutdown = mdev->dev->persist->interface_state &
                                            MLX4_INTERFACE_STATE_SHUTDOWN;
+       int t;
 
        en_dbg(DRV, priv, "Destroying netdev on port:%d\n", priv->port);
 
@@ -2181,16 +2245,18 @@ void mlx4_en_destroy_netdev(struct net_device *dev)
        mutex_lock(&mdev->state_lock);
        mdev->pndev[priv->port] = NULL;
        mdev->upper[priv->port] = NULL;
-       mutex_unlock(&mdev->state_lock);
 
 #ifdef CONFIG_RFS_ACCEL
        mlx4_en_cleanup_filters(priv);
 #endif
 
        mlx4_en_free_resources(priv);
+       mutex_unlock(&mdev->state_lock);
 
-       kfree(priv->tx_ring);
-       kfree(priv->tx_cq);
+       for (t = 0; t < MLX4_EN_NUM_TX_TYPES; t++) {
+               kfree(priv->tx_ring[t]);
+               kfree(priv->tx_cq[t]);
+       }
 
        if (!shutdown)
                free_netdev(dev);
@@ -2205,7 +2271,7 @@ static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu)
        en_dbg(DRV, priv, "Change MTU called - current:%d new:%d\n",
                 dev->mtu, new_mtu);
 
-       if (priv->xdp_ring_num && MLX4_EN_EFF_MTU(new_mtu) > FRAG_SZ0) {
+       if (priv->tx_ring_num[TX_XDP] && MLX4_EN_EFF_MTU(new_mtu) > FRAG_SZ0) {
                en_err(priv, "MTU size:%d requires frags but XDP running\n",
                       new_mtu);
                return -EOPNOTSUPP;
@@ -2596,7 +2662,7 @@ static netdev_features_t mlx4_en_features_check(struct sk_buff *skb,
 static int mlx4_en_set_tx_maxrate(struct net_device *dev, int queue_index, u32 maxrate)
 {
        struct mlx4_en_priv *priv = netdev_priv(dev);
-       struct mlx4_en_tx_ring *tx_ring = priv->tx_ring[queue_index];
+       struct mlx4_en_tx_ring *tx_ring = priv->tx_ring[TX][queue_index];
        struct mlx4_update_qp_params params;
        int err;
 
@@ -2624,18 +2690,21 @@ static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog)
 {
        struct mlx4_en_priv *priv = netdev_priv(dev);
        struct mlx4_en_dev *mdev = priv->mdev;
+       struct mlx4_en_port_profile new_prof;
        struct bpf_prog *old_prog;
+       struct mlx4_en_priv *tmp;
+       int tx_changed = 0;
        int xdp_ring_num;
        int port_up = 0;
        int err;
        int i;
 
-       xdp_ring_num = prog ? ALIGN(priv->rx_ring_num, MLX4_EN_NUM_UP) : 0;
+       xdp_ring_num = prog ? priv->rx_ring_num : 0;
 
        /* No need to reconfigure buffers when simply swapping the
         * program for a new one.
         */
-       if (priv->xdp_ring_num == xdp_ring_num) {
+       if (priv->tx_ring_num[TX_XDP] == xdp_ring_num) {
                if (prog) {
                        prog = bpf_prog_add(prog, priv->rx_ring_num - 1);
                        if (IS_ERR(prog))
@@ -2659,28 +2728,44 @@ static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog)
                return -EOPNOTSUPP;
        }
 
-       if (priv->tx_ring_num < xdp_ring_num + MLX4_EN_NUM_UP) {
-               en_err(priv,
-                      "Minimum %d tx channels required to run XDP\n",
-                      (xdp_ring_num + MLX4_EN_NUM_UP) / MLX4_EN_NUM_UP);
-               return -EINVAL;
-       }
+       tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
+       if (!tmp)
+               return -ENOMEM;
 
        if (prog) {
                prog = bpf_prog_add(prog, priv->rx_ring_num - 1);
-               if (IS_ERR(prog))
-                       return PTR_ERR(prog);
+               if (IS_ERR(prog)) {
+                       err = PTR_ERR(prog);
+                       goto out;
+               }
        }
 
        mutex_lock(&mdev->state_lock);
+       memcpy(&new_prof, priv->prof, sizeof(struct mlx4_en_port_profile));
+       new_prof.tx_ring_num[TX_XDP] = xdp_ring_num;
+
+       if (priv->tx_ring_num[TX] + xdp_ring_num > MAX_TX_RINGS) {
+               tx_changed = 1;
+               new_prof.tx_ring_num[TX] =
+                       MAX_TX_RINGS - ALIGN(xdp_ring_num, MLX4_EN_NUM_UP);
+               en_warn(priv, "Reducing the number of TX rings, to not exceed the max total rings number.\n");
+       }
+
+       err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof);
+       if (err) {
+               if (prog)
+                       bpf_prog_sub(prog, priv->rx_ring_num - 1);
+               goto unlock_out;
+       }
+
        if (priv->port_up) {
                port_up = 1;
                mlx4_en_stop_port(dev, 1);
        }
 
-       priv->xdp_ring_num = xdp_ring_num;
-       netif_set_real_num_tx_queues(dev, priv->tx_ring_num -
-                                                       priv->xdp_ring_num);
+       mlx4_en_safe_replace_resources(priv, tmp);
+       if (tx_changed)
+               netif_set_real_num_tx_queues(dev, priv->tx_ring_num[TX]);
 
        for (i = 0; i < priv->rx_ring_num; i++) {
                old_prog = rcu_dereference_protected(
@@ -2700,15 +2785,18 @@ static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog)
                }
        }
 
+unlock_out:
        mutex_unlock(&mdev->state_lock);
-       return 0;
+out:
+       kfree(tmp);
+       return err;
 }
 
 static bool mlx4_xdp_attached(struct net_device *dev)
 {
        struct mlx4_en_priv *priv = netdev_priv(dev);
 
-       return !!priv->xdp_ring_num;
+       return !!priv->tx_ring_num[TX_XDP];
 }
 
 static int mlx4_xdp(struct net_device *dev, struct netdev_xdp *xdp)
@@ -3045,6 +3133,10 @@ void mlx4_en_set_stats_bitmap(struct mlx4_dev *dev,
 
        if (!mlx4_is_slave(dev))
                bitmap_set(stats_bitmap->bitmap, last_i, NUM_PKT_STATS);
+       last_i += NUM_PKT_STATS;
+
+       bitmap_set(stats_bitmap->bitmap, last_i, NUM_XDP_STATS);
+       last_i += NUM_XDP_STATS;
 }
 
 int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
@@ -3052,7 +3144,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
 {
        struct net_device *dev;
        struct mlx4_en_priv *priv;
-       int i;
+       int i, t;
        int err;
 
        dev = alloc_etherdev_mqs(sizeof(struct mlx4_en_priv),
@@ -3060,7 +3152,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
        if (dev == NULL)
                return -ENOMEM;
 
-       netif_set_real_num_tx_queues(dev, prof->tx_ring_num);
+       netif_set_real_num_tx_queues(dev, prof->tx_ring_num[TX]);
        netif_set_real_num_rx_queues(dev, prof->rx_ring_num);
 
        SET_NETDEV_DEV(dev, &mdev->dev->persist->pdev->dev);
@@ -3097,21 +3189,27 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
        priv->ctrl_flags = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE |
                        MLX4_WQE_CTRL_SOLICITED);
        priv->num_tx_rings_p_up = mdev->profile.num_tx_rings_p_up;
-       priv->tx_ring_num = prof->tx_ring_num;
        priv->tx_work_limit = MLX4_EN_DEFAULT_TX_WORK;
        netdev_rss_key_fill(priv->rss_key, sizeof(priv->rss_key));
 
-       priv->tx_ring = kzalloc(sizeof(struct mlx4_en_tx_ring *) * MAX_TX_RINGS,
-                               GFP_KERNEL);
-       if (!priv->tx_ring) {
-               err = -ENOMEM;
-               goto out;
-       }
-       priv->tx_cq = kzalloc(sizeof(struct mlx4_en_cq *) * MAX_TX_RINGS,
-                             GFP_KERNEL);
-       if (!priv->tx_cq) {
-               err = -ENOMEM;
-               goto out;
+       for (t = 0; t < MLX4_EN_NUM_TX_TYPES; t++) {
+               priv->tx_ring_num[t] = prof->tx_ring_num[t];
+               if (!priv->tx_ring_num[t])
+                       continue;
+
+               priv->tx_ring[t] = kzalloc(sizeof(struct mlx4_en_tx_ring *) *
+                                          MAX_TX_RINGS, GFP_KERNEL);
+               if (!priv->tx_ring[t]) {
+                       err = -ENOMEM;
+                       goto err_free_tx;
+               }
+               priv->tx_cq[t] = kzalloc(sizeof(struct mlx4_en_cq *) *
+                                        MAX_TX_RINGS, GFP_KERNEL);
+               if (!priv->tx_cq[t]) {
+                       kfree(priv->tx_ring[t]);
+                       err = -ENOMEM;
+                       goto out;
+               }
        }
        priv->rx_ring_num = prof->rx_ring_num;
        priv->cqe_factor = (mdev->dev->caps.cqe_size == 64) ? 1 : 0;
@@ -3194,7 +3292,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
        else
                dev->netdev_ops = &mlx4_netdev_ops;
        dev->watchdog_timeo = MLX4_EN_WATCHDOG_TIMEOUT;
-       netif_set_real_num_tx_queues(dev, priv->tx_ring_num);
+       netif_set_real_num_tx_queues(dev, priv->tx_ring_num[TX]);
        netif_set_real_num_rx_queues(dev, priv->rx_ring_num);
 
        dev->ethtool_ops = &mlx4_en_ethtool_ops;
@@ -3294,7 +3392,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
        netif_carrier_off(dev);
        mlx4_en_set_default_moderation(priv);
 
-       en_warn(priv, "Using %d TX rings\n", prof->tx_ring_num);
+       en_warn(priv, "Using %d TX rings\n", prof->tx_ring_num[TX]);
        en_warn(priv, "Using %d RX rings\n", prof->rx_ring_num);
 
        mlx4_en_update_loopback_state(priv->dev, priv->dev->features);
@@ -3354,6 +3452,11 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
 
        return 0;
 
+err_free_tx:
+       while (t--) {
+               kfree(priv->tx_ring[t]);
+               kfree(priv->tx_cq[t]);
+       }
 out:
        mlx4_en_destroy_netdev(dev);
        return err;
index 5aa8b751f4170c782f13a3d0ef6f3b557b21168d..9166d90e732858610b1407fe85cbf6cbe27f5e0b 100644 (file)
@@ -147,6 +147,39 @@ static unsigned long en_stats_adder(__be64 *start, __be64 *next, int num)
        return ret;
 }
 
+void mlx4_en_fold_software_stats(struct net_device *dev)
+{
+       struct mlx4_en_priv *priv = netdev_priv(dev);
+       struct mlx4_en_dev *mdev = priv->mdev;
+       unsigned long packets, bytes;
+       int i;
+
+       if (!priv->port_up || mlx4_is_master(mdev->dev))
+               return;
+
+       packets = 0;
+       bytes = 0;
+       for (i = 0; i < priv->rx_ring_num; i++) {
+               const struct mlx4_en_rx_ring *ring = priv->rx_ring[i];
+
+               packets += READ_ONCE(ring->packets);
+               bytes   += READ_ONCE(ring->bytes);
+       }
+       dev->stats.rx_packets = packets;
+       dev->stats.rx_bytes = bytes;
+
+       packets = 0;
+       bytes = 0;
+       for (i = 0; i < priv->tx_ring_num[TX]; i++) {
+               const struct mlx4_en_tx_ring *ring = priv->tx_ring[TX][i];
+
+               packets += READ_ONCE(ring->packets);
+               bytes   += READ_ONCE(ring->bytes);
+       }
+       dev->stats.tx_packets = packets;
+       dev->stats.tx_bytes = bytes;
+}
+
 int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset)
 {
        struct mlx4_counter tmp_counter_stats;
@@ -159,6 +192,7 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset)
        u64 in_mod = reset << 8 | port;
        int err;
        int i, counter_index;
+       unsigned long sw_tx_dropped = 0;
        unsigned long sw_rx_dropped = 0;
 
        mailbox = mlx4_alloc_cmd_mailbox(mdev->dev);
@@ -166,7 +200,7 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset)
                return PTR_ERR(mailbox);
        err = mlx4_cmd_box(mdev->dev, 0, mailbox->dma, in_mod, 0,
                           MLX4_CMD_DUMP_ETH_STATS, MLX4_CMD_TIME_CLASS_B,
-                          MLX4_CMD_WRAPPED);
+                          MLX4_CMD_NATIVE);
        if (err)
                goto out;
 
@@ -174,40 +208,42 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset)
 
        spin_lock_bh(&priv->stats_lock);
 
-       stats->rx_packets = 0;
-       stats->rx_bytes = 0;
+       mlx4_en_fold_software_stats(dev);
+
        priv->port_stats.rx_chksum_good = 0;
        priv->port_stats.rx_chksum_none = 0;
        priv->port_stats.rx_chksum_complete = 0;
+       priv->xdp_stats.rx_xdp_drop    = 0;
+       priv->xdp_stats.rx_xdp_tx      = 0;
+       priv->xdp_stats.rx_xdp_tx_full = 0;
        for (i = 0; i < priv->rx_ring_num; i++) {
-               stats->rx_packets += priv->rx_ring[i]->packets;
-               stats->rx_bytes += priv->rx_ring[i]->bytes;
-               sw_rx_dropped += priv->rx_ring[i]->dropped;
-               priv->port_stats.rx_chksum_good += priv->rx_ring[i]->csum_ok;
-               priv->port_stats.rx_chksum_none += priv->rx_ring[i]->csum_none;
-               priv->port_stats.rx_chksum_complete += priv->rx_ring[i]->csum_complete;
+               const struct mlx4_en_rx_ring *ring = priv->rx_ring[i];
+
+               sw_rx_dropped                   += READ_ONCE(ring->dropped);
+               priv->port_stats.rx_chksum_good += READ_ONCE(ring->csum_ok);
+               priv->port_stats.rx_chksum_none += READ_ONCE(ring->csum_none);
+               priv->port_stats.rx_chksum_complete += READ_ONCE(ring->csum_complete);
+               priv->xdp_stats.rx_xdp_drop     += READ_ONCE(ring->xdp_drop);
+               priv->xdp_stats.rx_xdp_tx       += READ_ONCE(ring->xdp_tx);
+               priv->xdp_stats.rx_xdp_tx_full  += READ_ONCE(ring->xdp_tx_full);
        }
-       stats->tx_packets = 0;
-       stats->tx_bytes = 0;
-       stats->tx_dropped = 0;
        priv->port_stats.tx_chksum_offload = 0;
        priv->port_stats.queue_stopped = 0;
        priv->port_stats.wake_queue = 0;
        priv->port_stats.tso_packets = 0;
        priv->port_stats.xmit_more = 0;
 
-       for (i = 0; i < priv->tx_ring_num; i++) {
-               const struct mlx4_en_tx_ring *ring = priv->tx_ring[i];
-
-               stats->tx_packets += ring->packets;
-               stats->tx_bytes += ring->bytes;
-               stats->tx_dropped += ring->tx_dropped;
-               priv->port_stats.tx_chksum_offload += ring->tx_csum;
-               priv->port_stats.queue_stopped     += ring->queue_stopped;
-               priv->port_stats.wake_queue        += ring->wake_queue;
-               priv->port_stats.tso_packets       += ring->tso_packets;
-               priv->port_stats.xmit_more         += ring->xmit_more;
+       for (i = 0; i < priv->tx_ring_num[TX]; i++) {
+               const struct mlx4_en_tx_ring *ring = priv->tx_ring[TX][i];
+
+               sw_tx_dropped                      += READ_ONCE(ring->tx_dropped);
+               priv->port_stats.tx_chksum_offload += READ_ONCE(ring->tx_csum);
+               priv->port_stats.queue_stopped     += READ_ONCE(ring->queue_stopped);
+               priv->port_stats.wake_queue        += READ_ONCE(ring->wake_queue);
+               priv->port_stats.tso_packets       += READ_ONCE(ring->tso_packets);
+               priv->port_stats.xmit_more         += READ_ONCE(ring->xmit_more);
        }
+
        if (mlx4_is_master(mdev->dev)) {
                stats->rx_packets = en_stats_adder(&mlx4_en_stats->RTOT_prio_0,
                                                   &mlx4_en_stats->RTOT_prio_1,
@@ -245,7 +281,8 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset)
        stats->rx_length_errors = be32_to_cpu(mlx4_en_stats->RdropLength);
        stats->rx_crc_errors = be32_to_cpu(mlx4_en_stats->RCRC);
        stats->rx_fifo_errors = be32_to_cpu(mlx4_en_stats->RdropOvflw);
-       stats->tx_dropped += be32_to_cpu(mlx4_en_stats->TDROP);
+       stats->tx_dropped = be32_to_cpu(mlx4_en_stats->TDROP) +
+                           sw_tx_dropped;
 
        /* RX stats */
        priv->pkstats.rx_multicast_packets = stats->multicast;
@@ -322,7 +359,7 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset)
                err = mlx4_cmd_box(mdev->dev, 0, mailbox->dma,
                                   in_mod | MLX4_DUMP_ETH_STATS_FLOW_CONTROL,
                                   0, MLX4_CMD_DUMP_ETH_STATS,
-                                  MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED);
+                                  MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
                if (err)
                        goto out;
        }
index f2e8beddcf44fe58f5904267999e74f12e24bc41..6562f78b07f4370b5c1ea2c5e3a4221d7ebaeba8 100644 (file)
@@ -688,18 +688,23 @@ out_loopback:
        dev_kfree_skb_any(skb);
 }
 
-static void mlx4_en_refill_rx_buffers(struct mlx4_en_priv *priv,
-                                    struct mlx4_en_rx_ring *ring)
+static bool mlx4_en_refill_rx_buffers(struct mlx4_en_priv *priv,
+                                     struct mlx4_en_rx_ring *ring)
 {
-       int index = ring->prod & ring->size_mask;
+       u32 missing = ring->actual_size - (ring->prod - ring->cons);
 
-       while ((u32) (ring->prod - ring->cons) < ring->actual_size) {
-               if (mlx4_en_prepare_rx_desc(priv, ring, index,
+       /* Try to batch allocations, but not too much. */
+       if (missing < 8)
+               return false;
+       do {
+               if (mlx4_en_prepare_rx_desc(priv, ring,
+                                           ring->prod & ring->size_mask,
                                            GFP_ATOMIC | __GFP_COLD))
                        break;
                ring->prod++;
-               index = ring->prod & ring->size_mask;
-       }
+       } while (--missing);
+
+       return true;
 }
 
 /* When hardware doesn't strip the vlan, we need to calculate the checksum
@@ -788,7 +793,6 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
        struct bpf_prog *xdp_prog;
        int doorbell_pending;
        struct sk_buff *skb;
-       int tx_index;
        int index;
        int nr;
        unsigned int length;
@@ -808,7 +812,6 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
        rcu_read_lock();
        xdp_prog = rcu_dereference(ring->xdp_prog);
        doorbell_pending = 0;
-       tx_index = (priv->tx_ring_num - priv->xdp_ring_num) + cq->ring;
 
        /* We assume a 1:1 mapping between CQEs and Rx descriptors, so Rx
         * descriptor offset can be deduced from the CQE index instead of
@@ -877,8 +880,6 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
                 */
                length = be32_to_cpu(cqe->byte_cnt);
                length -= ring->fcs_del;
-               ring->bytes += length;
-               ring->packets++;
                l2_tunnel = (dev->hw_enc_features & NETIF_F_RXCSUM) &&
                        (cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_L2_TUNNEL));
 
@@ -904,22 +905,26 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
                        case XDP_PASS:
                                break;
                        case XDP_TX:
-                               if (likely(!mlx4_en_xmit_frame(frags, dev,
-                                                       length, tx_index,
+                               if (likely(!mlx4_en_xmit_frame(ring, frags, dev,
+                                                       length, cq->ring,
                                                        &doorbell_pending)))
                                        goto consumed;
-                               goto xdp_drop; /* Drop on xmit failure */
+                               goto xdp_drop_no_cnt; /* Drop on xmit failure */
                        default:
                                bpf_warn_invalid_xdp_action(act);
                        case XDP_ABORTED:
                        case XDP_DROP:
-xdp_drop:
+                               ring->xdp_drop++;
+xdp_drop_no_cnt:
                                if (likely(mlx4_en_rx_recycle(ring, frags)))
                                        goto consumed;
                                goto next;
                        }
                }
 
+               ring->bytes += length;
+               ring->packets++;
+
                if (likely(dev->features & NETIF_F_RXCSUM)) {
                        if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_TCP |
                                                      MLX4_CQE_STATUS_UDP)) {
@@ -1081,15 +1086,20 @@ consumed:
 
 out:
        rcu_read_unlock();
-       if (doorbell_pending)
-               mlx4_en_xmit_doorbell(priv->tx_ring[tx_index]);
 
+       if (polled) {
+               if (doorbell_pending)
+                       mlx4_en_xmit_doorbell(priv->tx_ring[TX_XDP][cq->ring]);
+
+               mlx4_cq_set_ci(&cq->mcq);
+               wmb(); /* ensure HW sees CQ consumer before we post new buffers */
+               ring->cons = cq->mcq.cons_index;
+       }
        AVG_PERF_COUNTER(priv->pstats.rx_coal_avg, polled);
-       mlx4_cq_set_ci(&cq->mcq);
-       wmb(); /* ensure HW sees CQ consumer before we post new buffers */
-       ring->cons = cq->mcq.cons_index;
-       mlx4_en_refill_rx_buffers(priv, ring);
-       mlx4_en_update_rx_prod_db(ring);
+
+       if (mlx4_en_refill_rx_buffers(priv, ring))
+               mlx4_en_update_rx_prod_db(ring);
+
        return polled;
 }
 
@@ -1131,14 +1141,17 @@ int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget)
                        return budget;
 
                /* Current cpu is not according to smp_irq_affinity -
-                * probably affinity changed. need to stop this NAPI
-                * poll, and restart it on the right CPU
+                * probably affinity changed. Need to stop this NAPI
+                * poll, and restart it on the right CPU.
+                * Try to avoid returning a too small value (like 0),
+                * to not fool net_rx_action() and its netdev_budget
                 */
-               done = 0;
+               if (done)
+                       done--;
        }
        /* Done for now */
-       napi_complete_done(napi, done);
-       mlx4_en_arm_cq(priv, cq);
+       if (napi_complete_done(napi, done))
+               mlx4_en_arm_cq(priv, cq);
        return done;
 }
 
@@ -1162,7 +1175,7 @@ void mlx4_en_calc_rx_buf(struct net_device *dev)
        /* bpf requires buffers to be set up as 1 packet per page.
         * This only works when num_frags == 1.
         */
-       if (priv->xdp_ring_num) {
+       if (priv->tx_ring_num[TX_XDP]) {
                dma_dir = PCI_DMA_BIDIRECTIONAL;
                /* This will gain efficient xdp frame recycling at the expense
                 * of more costly truesize accounting
index b66e03d9711f945fe06827ed480258a78ce26833..c06346a82496876379ff5771e9ae7a03b4ad10a0 100644 (file)
@@ -118,6 +118,29 @@ mlx4_en_test_loopback_exit:
        return !loopback_ok;
 }
 
+static int mlx4_en_test_interrupts(struct mlx4_en_priv *priv)
+{
+       struct mlx4_en_dev *mdev = priv->mdev;
+       int err = 0;
+       int i = 0;
+
+       err = mlx4_test_async(mdev->dev);
+       /* When not in MSI_X or slave, test only async */
+       if (!(mdev->dev->flags & MLX4_FLAG_MSI_X) || mlx4_is_slave(mdev->dev))
+               return err;
+
+       /* A loop over all completion vectors of current port,
+        * for each vector check whether it works by mapping command
+        * completions to that vector and performing a NOP command
+        */
+       for (i = 0; i < priv->rx_ring_num; i++) {
+               err = mlx4_test_interrupt(mdev->dev, priv->rx_cq[i]->vector);
+               if (err)
+                       break;
+       }
+
+       return err;
+}
 
 static int mlx4_en_test_link(struct mlx4_en_priv *priv)
 {
@@ -151,7 +174,6 @@ static int mlx4_en_test_speed(struct mlx4_en_priv *priv)
 void mlx4_en_ex_selftest(struct net_device *dev, u32 *flags, u64 *buf)
 {
        struct mlx4_en_priv *priv = netdev_priv(dev);
-       struct mlx4_en_dev *mdev = priv->mdev;
        int i, carrier_ok;
 
        memset(buf, 0, sizeof(u64) * MLX4_EN_NUM_SELF_TEST);
@@ -177,7 +199,7 @@ void mlx4_en_ex_selftest(struct net_device *dev, u32 *flags, u64 *buf)
                        netif_carrier_on(dev);
 
        }
-       buf[0] = mlx4_test_interrupts(mdev->dev);
+       buf[0] = mlx4_en_test_interrupts(priv);
        buf[1] = mlx4_en_test_link(priv);
        buf[2] = mlx4_en_test_speed(priv);
 
index e2509bba3e7c31dd0db45734a8a274f956cccaa2..4b597dca5c52d114344d638895275ed0d378bd96 100644 (file)
@@ -66,7 +66,7 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
 
        ring->size = size;
        ring->size_mask = size - 1;
-       ring->stride = stride;
+       ring->sp_stride = stride;
        ring->full_size = ring->size - HEADROOM - MAX_DESC_TXBBS;
 
        tmp = size * sizeof(struct mlx4_en_tx_info);
@@ -90,22 +90,22 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
                        goto err_info;
                }
        }
-       ring->buf_size = ALIGN(size * ring->stride, MLX4_EN_PAGE_SIZE);
+       ring->buf_size = ALIGN(size * ring->sp_stride, MLX4_EN_PAGE_SIZE);
 
        /* Allocate HW buffers on provided NUMA node */
        set_dev_node(&mdev->dev->persist->pdev->dev, node);
-       err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres, ring->buf_size);
+       err = mlx4_alloc_hwq_res(mdev->dev, &ring->sp_wqres, ring->buf_size);
        set_dev_node(&mdev->dev->persist->pdev->dev, mdev->dev->numa_node);
        if (err) {
                en_err(priv, "Failed allocating hwq resources\n");
                goto err_bounce;
        }
 
-       ring->buf = ring->wqres.buf.direct.buf;
+       ring->buf = ring->sp_wqres.buf.direct.buf;
 
        en_dbg(DRV, priv, "Allocated TX ring (addr:%p) - buf:%p size:%d buf_size:%d dma:%llx\n",
               ring, ring->buf, ring->size, ring->buf_size,
-              (unsigned long long) ring->wqres.buf.direct.map);
+              (unsigned long long) ring->sp_wqres.buf.direct.map);
 
        err = mlx4_qp_reserve_range(mdev->dev, 1, 1, &ring->qpn,
                                    MLX4_RESERVE_ETH_BF_QP);
@@ -114,12 +114,12 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
                goto err_hwq_res;
        }
 
-       err = mlx4_qp_alloc(mdev->dev, ring->qpn, &ring->qp, GFP_KERNEL);
+       err = mlx4_qp_alloc(mdev->dev, ring->qpn, &ring->sp_qp, GFP_KERNEL);
        if (err) {
                en_err(priv, "Failed allocating qp %d\n", ring->qpn);
                goto err_reserve;
        }
-       ring->qp.event = mlx4_en_sqp_event;
+       ring->sp_qp.event = mlx4_en_sqp_event;
 
        err = mlx4_bf_alloc(mdev->dev, &ring->bf, node);
        if (err) {
@@ -141,7 +141,7 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
        if (queue_index < priv->num_tx_rings_p_up)
                cpumask_set_cpu(cpumask_local_spread(queue_index,
                                                     priv->mdev->dev->numa_node),
-                               &ring->affinity_mask);
+                               &ring->sp_affinity_mask);
 
        *pring = ring;
        return 0;
@@ -149,7 +149,7 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
 err_reserve:
        mlx4_qp_release_range(mdev->dev, ring->qpn, 1);
 err_hwq_res:
-       mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size);
+       mlx4_free_hwq_res(mdev->dev, &ring->sp_wqres, ring->buf_size);
 err_bounce:
        kfree(ring->bounce_buf);
        ring->bounce_buf = NULL;
@@ -171,10 +171,10 @@ void mlx4_en_destroy_tx_ring(struct mlx4_en_priv *priv,
 
        if (ring->bf_alloced)
                mlx4_bf_free(mdev->dev, &ring->bf);
-       mlx4_qp_remove(mdev->dev, &ring->qp);
-       mlx4_qp_free(mdev->dev, &ring->qp);
+       mlx4_qp_remove(mdev->dev, &ring->sp_qp);
+       mlx4_qp_free(mdev->dev, &ring->sp_qp);
        mlx4_qp_release_range(priv->mdev->dev, ring->qpn, 1);
-       mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size);
+       mlx4_free_hwq_res(mdev->dev, &ring->sp_wqres, ring->buf_size);
        kfree(ring->bounce_buf);
        ring->bounce_buf = NULL;
        kvfree(ring->tx_info);
@@ -190,7 +190,7 @@ int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv,
        struct mlx4_en_dev *mdev = priv->mdev;
        int err;
 
-       ring->cqn = cq;
+       ring->sp_cqn = cq;
        ring->prod = 0;
        ring->cons = 0xffffffff;
        ring->last_nr_txbb = 1;
@@ -198,21 +198,21 @@ int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv,
        memset(ring->buf, 0, ring->buf_size);
        ring->free_tx_desc = mlx4_en_free_tx_desc;
 
-       ring->qp_state = MLX4_QP_STATE_RST;
-       ring->doorbell_qpn = cpu_to_be32(ring->qp.qpn << 8);
+       ring->sp_qp_state = MLX4_QP_STATE_RST;
+       ring->doorbell_qpn = cpu_to_be32(ring->sp_qp.qpn << 8);
        ring->mr_key = cpu_to_be32(mdev->mr.key);
 
-       mlx4_en_fill_qp_context(priv, ring->size, ring->stride, 1, 0, ring->qpn,
-                               ring->cqn, user_prio, &ring->context);
+       mlx4_en_fill_qp_context(priv, ring->size, ring->sp_stride, 1, 0, ring->qpn,
+                               ring->sp_cqn, user_prio, &ring->sp_context);
        if (ring->bf_alloced)
-               ring->context.usr_page =
+               ring->sp_context.usr_page =
                        cpu_to_be32(mlx4_to_hw_uar_index(mdev->dev,
                                                         ring->bf.uar->index));
 
-       err = mlx4_qp_to_ready(mdev->dev, &ring->wqres.mtt, &ring->context,
-                              &ring->qp, &ring->qp_state);
-       if (!cpumask_empty(&ring->affinity_mask))
-               netif_set_xps_queue(priv->dev, &ring->affinity_mask,
+       err = mlx4_qp_to_ready(mdev->dev, &ring->sp_wqres.mtt, &ring->sp_context,
+                              &ring->sp_qp, &ring->sp_qp_state);
+       if (!cpumask_empty(&ring->sp_affinity_mask))
+               netif_set_xps_queue(priv->dev, &ring->sp_affinity_mask,
                                    ring->queue_index);
 
        return err;
@@ -223,8 +223,8 @@ void mlx4_en_deactivate_tx_ring(struct mlx4_en_priv *priv,
 {
        struct mlx4_en_dev *mdev = priv->mdev;
 
-       mlx4_qp_modify(mdev->dev, NULL, ring->qp_state,
-                      MLX4_QP_STATE_RST, NULL, 0, 0, &ring->qp);
+       mlx4_qp_modify(mdev->dev, NULL, ring->sp_qp_state,
+                      MLX4_QP_STATE_RST, NULL, 0, 0, &ring->sp_qp);
 }
 
 static inline bool mlx4_en_is_tx_ring_full(struct mlx4_en_tx_ring *ring)
@@ -392,7 +392,8 @@ int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring)
                cnt++;
        }
 
-       netdev_tx_reset_queue(ring->tx_queue);
+       if (ring->tx_queue)
+               netdev_tx_reset_queue(ring->tx_queue);
 
        if (cnt)
                en_dbg(DRV, priv, "Freed %d uncompleted tx descriptors\n", cnt);
@@ -405,7 +406,7 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev,
 {
        struct mlx4_en_priv *priv = netdev_priv(dev);
        struct mlx4_cq *mcq = &cq->mcq;
-       struct mlx4_en_tx_ring *ring = priv->tx_ring[cq->ring];
+       struct mlx4_en_tx_ring *ring = priv->tx_ring[cq->type][cq->ring];
        struct mlx4_cqe *cqe;
        u16 index;
        u16 new_index, ring_index, stamp_index;
@@ -807,7 +808,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
        bool bf_ok;
 
        tx_ind = skb_get_queue_mapping(skb);
-       ring = priv->tx_ring[tx_ind];
+       ring = priv->tx_ring[TX][tx_ind];
 
        if (!priv->port_up)
                goto tx_drop;
@@ -1078,7 +1079,8 @@ tx_drop:
        return NETDEV_TX_OK;
 }
 
-netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_alloc *frame,
+netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring,
+                              struct mlx4_en_rx_alloc *frame,
                               struct net_device *dev, unsigned int length,
                               int tx_ind, int *doorbell_pending)
 {
@@ -1101,7 +1103,7 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_alloc *frame,
        BUILD_BUG_ON_MSG(ALIGN(CTRL_SIZE + DS_SIZE, TXBB_SIZE) != TXBB_SIZE,
                         "mlx4_en_xmit_frame requires minimum size tx desc");
 
-       ring = priv->tx_ring[tx_ind];
+       ring = priv->tx_ring[TX_XDP][tx_ind];
 
        if (!priv->port_up)
                goto tx_drop;
@@ -1153,8 +1155,7 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_alloc *frame,
                ((ring->prod & ring->size) ?
                 cpu_to_be32(MLX4_EN_BIT_DESC_OWN) : 0);
 
-       ring->packets++;
-       ring->bytes += tx_info->nr_bytes;
+       rx_ring->xdp_tx++;
        AVG_PERF_COUNTER(priv->pstats.tx_pktsz_avg, length);
 
        ring->prod += nr_txbb;
@@ -1178,7 +1179,7 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_alloc *frame,
        return NETDEV_TX_OK;
 
 tx_drop_count:
-       ring->tx_dropped++;
+       rx_ring->xdp_tx_full++;
 tx_drop:
        return NETDEV_TX_BUSY;
 }
index cf8f8a72a80154c19a6ccb9ca807499491ccebd9..cd3638e6fe25b2f8db4ea5e771535df51652faae 100644 (file)
@@ -1361,53 +1361,49 @@ void mlx4_cleanup_eq_table(struct mlx4_dev *dev)
        kfree(priv->eq_table.uar_map);
 }
 
-/* A test that verifies that we can accept interrupts on all
- * the irq vectors of the device.
+/* A test that verifies that we can accept interrupts
+ * on the vector allocated for asynchronous events
+ */
+int mlx4_test_async(struct mlx4_dev *dev)
+{
+       return mlx4_NOP(dev);
+}
+EXPORT_SYMBOL(mlx4_test_async);
+
+/* A test that verifies that we can accept interrupts
+ * on the given irq vector of the tested port.
  * Interrupts are checked using the NOP command.
  */
-int mlx4_test_interrupts(struct mlx4_dev *dev)
+int mlx4_test_interrupt(struct mlx4_dev *dev, int vector)
 {
        struct mlx4_priv *priv = mlx4_priv(dev);
-       int i;
        int err;
 
-       err = mlx4_NOP(dev);
-       /* When not in MSI_X, there is only one irq to check */
-       if (!(dev->flags & MLX4_FLAG_MSI_X) || mlx4_is_slave(dev))
-               return err;
-
-       /* A loop over all completion vectors, for each vector we will check
-        * whether it works by mapping command completions to that vector
-        * and performing a NOP command
-        */
-       for(i = 0; !err && (i < dev->caps.num_comp_vectors); ++i) {
-               /* Make sure request_irq was called */
-               if (!priv->eq_table.eq[i].have_irq)
-                       continue;
-
-               /* Temporary use polling for command completions */
-               mlx4_cmd_use_polling(dev);
-
-               /* Map the new eq to handle all asynchronous events */
-               err = mlx4_MAP_EQ(dev, get_async_ev_mask(dev), 0,
-                                 priv->eq_table.eq[i].eqn);
-               if (err) {
-                       mlx4_warn(dev, "Failed mapping eq for interrupt test\n");
-                       mlx4_cmd_use_events(dev);
-                       break;
-               }
+       /* Temporary use polling for command completions */
+       mlx4_cmd_use_polling(dev);
 
-               /* Go back to using events */
-               mlx4_cmd_use_events(dev);
-               err = mlx4_NOP(dev);
+       /* Map the new eq to handle all asynchronous events */
+       err = mlx4_MAP_EQ(dev, get_async_ev_mask(dev), 0,
+                         priv->eq_table.eq[MLX4_CQ_TO_EQ_VECTOR(vector)].eqn);
+       if (err) {
+               mlx4_warn(dev, "Failed mapping eq for interrupt test\n");
+               goto out;
        }
 
+       /* Go back to using events */
+       mlx4_cmd_use_events(dev);
+       err = mlx4_NOP(dev);
+
        /* Return to default */
+       mlx4_cmd_use_polling(dev);
+out:
        mlx4_MAP_EQ(dev, get_async_ev_mask(dev), 0,
                    priv->eq_table.eq[MLX4_EQ_ASYNC].eqn);
+       mlx4_cmd_use_events(dev);
+
        return err;
 }
-EXPORT_SYMBOL(mlx4_test_interrupts);
+EXPORT_SYMBOL(mlx4_test_interrupt);
 
 bool mlx4_is_eq_vector_valid(struct mlx4_dev *dev, u8 port, int vector)
 {
index c41ab31a39f8c93d3caba280510b8a73413c7485..84bab9f0732ea239bce5adaac7eb52d0298cc751 100644 (file)
@@ -49,9 +49,9 @@ enum {
 extern void __buggy_use_of_MLX4_GET(void);
 extern void __buggy_use_of_MLX4_PUT(void);
 
-static bool enable_qos = true;
+static bool enable_qos;
 module_param(enable_qos, bool, 0444);
-MODULE_PARM_DESC(enable_qos, "Enable Enhanced QoS support (default: on)");
+MODULE_PARM_DESC(enable_qos, "Enable Enhanced QoS support (default: off)");
 
 #define MLX4_GET(dest, source, offset)                               \
        do {                                                          \
index 7183ac4135d2f97dbb1e7b63f2d57db4c95a5d0c..6f4e67bc35382e7a0b8de7c0b52cc24b3d81df91 100644 (file)
@@ -1102,6 +1102,14 @@ static int __set_port_type(struct mlx4_port_info *info,
        int i;
        int err = 0;
 
+       if ((port_type & mdev->caps.supported_type[info->port]) != port_type) {
+               mlx4_err(mdev,
+                        "Requested port type for port %d is not supported on this HCA\n",
+                        info->port);
+               err = -EINVAL;
+               goto err_sup;
+       }
+
        mlx4_stop_sense(mdev);
        mutex_lock(&priv->port_mutex);
        info->tmp_type = port_type;
@@ -1147,7 +1155,7 @@ static int __set_port_type(struct mlx4_port_info *info,
 out:
        mlx4_start_sense(mdev);
        mutex_unlock(&priv->port_mutex);
-
+err_sup:
        return err;
 }
 
index e4878f31e45d7578e4b5f86cfc1cbe4004ffdbac..88ee7d8a59231a47d6b7aca2006f9780dbefa578 100644 (file)
@@ -145,9 +145,10 @@ enum mlx4_resource {
        RES_MTT,
        RES_MAC,
        RES_VLAN,
-       RES_EQ,
+       RES_NPORT_ID,
        RES_COUNTER,
        RES_FS_RULE,
+       RES_EQ,
        MLX4_NUM_OF_RESOURCE_TYPE
 };
 
@@ -1329,8 +1330,6 @@ int mlx4_SET_VLAN_FLTR_wrapper(struct mlx4_dev *dev, int slave,
                               struct mlx4_cmd_info *cmd);
 int mlx4_common_set_vlan_fltr(struct mlx4_dev *dev, int function,
                                     int port, void *buf);
-int mlx4_common_dump_eth_stats(struct mlx4_dev *dev, int slave, u32 in_mod,
-                               struct mlx4_cmd_mailbox *outbox);
 int mlx4_DUMP_ETH_STATS_wrapper(struct mlx4_dev *dev, int slave,
                                   struct mlx4_vhcr *vhcr,
                                   struct mlx4_cmd_mailbox *inbox,
index a3528dd1e72e14602eb6a724fb99414e5906875d..20a936428f4a44c8ca0a7161855da310f9166b50 100644 (file)
@@ -207,8 +207,11 @@ enum {
  */
 
 enum cq_type {
-       RX = 0,
-       TX = 1,
+       /* keep tx types first */
+       TX,
+       TX_XDP,
+#define MLX4_EN_NUM_TX_TYPES (TX_XDP + 1)
+       RX,
 };
 
 
@@ -278,46 +281,50 @@ struct mlx4_en_tx_ring {
        u32                     last_nr_txbb;
        u32                     cons;
        unsigned long           wake_queue;
+       struct netdev_queue     *tx_queue;
+       u32                     (*free_tx_desc)(struct mlx4_en_priv *priv,
+                                               struct mlx4_en_tx_ring *ring,
+                                               int index, u8 owner,
+                                               u64 timestamp, int napi_mode);
+       struct mlx4_en_rx_ring  *recycle_ring;
 
        /* cache line used and dirtied in mlx4_en_xmit() */
        u32                     prod ____cacheline_aligned_in_smp;
+       unsigned int            tx_dropped;
        unsigned long           bytes;
        unsigned long           packets;
        unsigned long           tx_csum;
        unsigned long           tso_packets;
        unsigned long           xmit_more;
-       unsigned int            tx_dropped;
        struct mlx4_bf          bf;
-       unsigned long           queue_stopped;
 
        /* Following part should be mostly read */
-       cpumask_t               affinity_mask;
-       struct mlx4_qp          qp;
-       struct mlx4_hwq_resources wqres;
+       __be32                  doorbell_qpn;
+       __be32                  mr_key;
        u32                     size; /* number of TXBBs */
        u32                     size_mask;
-       u16                     stride;
        u32                     full_size;
-       u16                     cqn;    /* index of port CQ associated with this ring */
        u32                     buf_size;
-       __be32                  doorbell_qpn;
-       __be32                  mr_key;
        void                    *buf;
        struct mlx4_en_tx_info  *tx_info;
-       struct mlx4_en_rx_ring  *recycle_ring;
-       u32                     (*free_tx_desc)(struct mlx4_en_priv *priv,
-                                               struct mlx4_en_tx_ring *ring,
-                                               int index, u8 owner,
-                                               u64 timestamp, int napi_mode);
-       u8                      *bounce_buf;
-       struct mlx4_qp_context  context;
        int                     qpn;
-       enum mlx4_qp_state      qp_state;
        u8                      queue_index;
        bool                    bf_enabled;
        bool                    bf_alloced;
-       struct netdev_queue     *tx_queue;
-       int                     hwtstamp_tx_type;
+       u8                      hwtstamp_tx_type;
+       u8                      *bounce_buf;
+
+       /* Not used in fast path
+        * Only queue_stopped might be used if BQL is not properly working.
+        */
+       unsigned long           queue_stopped;
+       struct mlx4_hwq_resources sp_wqres;
+       struct mlx4_qp          sp_qp;
+       struct mlx4_qp_context  sp_context;
+       cpumask_t               sp_affinity_mask;
+       enum mlx4_qp_state      sp_qp_state;
+       u16                     sp_stride;
+       u16                     sp_cqn; /* index of port CQ associated with this ring */
 } ____cacheline_aligned_in_smp;
 
 struct mlx4_en_rx_desc {
@@ -347,6 +354,9 @@ struct mlx4_en_rx_ring {
        unsigned long csum_ok;
        unsigned long csum_none;
        unsigned long csum_complete;
+       unsigned long xdp_drop;
+       unsigned long xdp_tx;
+       unsigned long xdp_tx_full;
        unsigned long dropped;
        int hwtstamp_rx_filter;
        cpumask_var_t affinity_mask;
@@ -361,7 +371,7 @@ struct mlx4_en_cq {
        int size;
        int buf_size;
        int vector;
-       enum cq_type is_tx;
+       enum cq_type type;
        u16 moder_time;
        u16 moder_cnt;
        struct mlx4_cqe *buf;
@@ -372,7 +382,7 @@ struct mlx4_en_cq {
 
 struct mlx4_en_port_profile {
        u32 flags;
-       u32 tx_ring_num;
+       u32 tx_ring_num[MLX4_EN_NUM_TX_TYPES];
        u32 rx_ring_num;
        u32 tx_ring_size;
        u32 rx_ring_size;
@@ -569,17 +579,16 @@ struct mlx4_en_priv {
        u32 flags;
        u8 num_tx_rings_p_up;
        u32 tx_work_limit;
-       u32 tx_ring_num;
+       u32 tx_ring_num[MLX4_EN_NUM_TX_TYPES];
        u32 rx_ring_num;
        u32 rx_skb_size;
        struct mlx4_en_frag_info frag_info[MLX4_EN_MAX_RX_FRAGS];
        u16 num_frags;
        u16 log_rx_info;
-       int xdp_ring_num;
 
-       struct mlx4_en_tx_ring **tx_ring;
+       struct mlx4_en_tx_ring **tx_ring[MLX4_EN_NUM_TX_TYPES];
        struct mlx4_en_rx_ring *rx_ring[MAX_RX_RINGS];
-       struct mlx4_en_cq **tx_cq;
+       struct mlx4_en_cq **tx_cq[MLX4_EN_NUM_TX_TYPES];
        struct mlx4_en_cq *rx_cq[MAX_RX_RINGS];
        struct mlx4_qp drop_qp;
        struct work_struct rx_mode_task;
@@ -597,6 +606,7 @@ struct mlx4_en_priv {
        struct mlx4_en_flow_stats_rx rx_flowstats;
        struct mlx4_en_flow_stats_tx tx_flowstats;
        struct mlx4_en_port_stats port_stats;
+       struct mlx4_en_xdp_stats xdp_stats;
        struct mlx4_en_stats_bitmap stats_bitmap;
        struct list_head mc_list;
        struct list_head curr_list;
@@ -685,7 +695,8 @@ void mlx4_en_tx_irq(struct mlx4_cq *mcq);
 u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb,
                         void *accel_priv, select_queue_fallback_t fallback);
 netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev);
-netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_alloc *frame,
+netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring,
+                              struct mlx4_en_rx_alloc *frame,
                               struct net_device *dev, unsigned int length,
                               int tx_ind, int *doorbell_pending);
 void mlx4_en_xmit_doorbell(struct mlx4_en_tx_ring *ring);
@@ -744,6 +755,7 @@ void mlx4_en_rx_irq(struct mlx4_cq *mcq);
 int mlx4_SET_MCAST_FLTR(struct mlx4_dev *dev, u8 port, u64 mac, u64 clear, u8 mode);
 int mlx4_SET_VLAN_FLTR(struct mlx4_dev *dev, struct mlx4_en_priv *priv);
 
+void mlx4_en_fold_software_stats(struct net_device *dev);
 int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset);
 int mlx4_en_QUERY_PORT(struct mlx4_en_dev *mdev, u8 port);
 
index 7fd466c0b929b63ede0111b1b7d03b961f992abd..48641cb0367f251a07537b82d0a16bf50d8479ef 100644 (file)
@@ -55,6 +55,13 @@ struct mlx4_en_perf_stats {
 #define NUM_PERF_COUNTERS              6
 };
 
+struct mlx4_en_xdp_stats {
+       unsigned long rx_xdp_drop;
+       unsigned long rx_xdp_tx;
+       unsigned long rx_xdp_tx_full;
+#define NUM_XDP_STATS          3
+};
+
 #define NUM_MAIN_STATS 21
 
 #define MLX4_NUM_PRIORITIES    8
@@ -107,7 +114,8 @@ enum {
 };
 
 #define NUM_ALL_STATS  (NUM_MAIN_STATS + NUM_PORT_STATS + NUM_PKT_STATS + \
-                        NUM_FLOW_STATS + NUM_PERF_STATS + NUM_PF_STATS)
+                        NUM_FLOW_STATS + NUM_PERF_STATS + NUM_PF_STATS + \
+                        NUM_XDP_STATS)
 
 #define MLX4_FIND_NETDEV_STAT(n) (offsetof(struct net_device_stats, n) / \
                                  sizeof(((struct net_device_stats *)0)->n))
index c5b2064297a19b0dde2640764acb4216343633f1..b656dd5772e5b9ae3412d11dc8791c49fb10a78f 100644 (file)
@@ -1728,24 +1728,13 @@ int mlx4_SET_VLAN_FLTR_wrapper(struct mlx4_dev *dev, int slave,
        return err;
 }
 
-int mlx4_common_dump_eth_stats(struct mlx4_dev *dev, int slave,
-                              u32 in_mod, struct mlx4_cmd_mailbox *outbox)
-{
-       return mlx4_cmd_box(dev, 0, outbox->dma, in_mod, 0,
-                           MLX4_CMD_DUMP_ETH_STATS, MLX4_CMD_TIME_CLASS_B,
-                           MLX4_CMD_NATIVE);
-}
-
 int mlx4_DUMP_ETH_STATS_wrapper(struct mlx4_dev *dev, int slave,
                                struct mlx4_vhcr *vhcr,
                                struct mlx4_cmd_mailbox *inbox,
                                struct mlx4_cmd_mailbox *outbox,
                                struct mlx4_cmd_info *cmd)
 {
-       if (slave != dev->caps.function)
-               return 0;
-       return mlx4_common_dump_eth_stats(dev, slave,
-                                         vhcr->in_modifier, outbox);
+       return 0;
 }
 
 int mlx4_get_slave_from_roce_gid(struct mlx4_dev *dev, int port, u8 *gid,
index 84d7857ccc271415f8caa448b2710a3d22b92a77..c548beaaf9109e376933660dffb39632622dbfc9 100644 (file)
@@ -1605,13 +1605,14 @@ static int eq_res_start_move_to(struct mlx4_dev *dev, int slave, int index,
                        r->com.from_state = r->com.state;
                        r->com.to_state = state;
                        r->com.state = RES_EQ_BUSY;
-                       if (eq)
-                               *eq = r;
                }
        }
 
        spin_unlock_irq(mlx4_tlock(dev));
 
+       if (!err && eq)
+               *eq = r;
+
        return err;
 }
 
index 0343725d7f44a84fffd7063df578da8fe1211254..9f43beb86250cd0a800c4ac4cb53492e6a0a4493 100644 (file)
@@ -8,6 +8,6 @@ mlx5_core-y :=  main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
 mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o eswitch_offloads.o \
                en_main.o en_common.o en_fs.o en_ethtool.o en_tx.o \
                en_rx.o en_rx_am.o en_txrx.o en_clock.o vxlan.o \
-               en_tc.o en_arfs.o en_rep.o en_fs_ethtool.o
+               en_tc.o en_arfs.o en_rep.o en_fs_ethtool.o en_selftest.o
 
 mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) +=  en_dcbnl.o
index 6cb38304669f6e5618edfea860a8c8d5f49e5c54..44791de5afe6e191df22b1fb4940d82dcd5f0a58 100644 (file)
 
 #include "mlx5_core.h"
 
+struct mlx5_db_pgdir {
+       struct list_head        list;
+       unsigned long          *bitmap;
+       __be32                 *db_page;
+       dma_addr_t              db_dma;
+};
+
 /* Handling for queue buffers -- we allocate a bunch of memory and
  * register it in a memory region at HCA virtual address 0.
  */
@@ -99,20 +106,88 @@ void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_buf *buf)
 }
 EXPORT_SYMBOL_GPL(mlx5_buf_free);
 
+int mlx5_frag_buf_alloc_node(struct mlx5_core_dev *dev, int size,
+                            struct mlx5_frag_buf *buf, int node)
+{
+       int i;
+
+       buf->size = size;
+       buf->npages = 1 << get_order(size);
+       buf->page_shift = PAGE_SHIFT;
+       buf->frags = kcalloc(buf->npages, sizeof(struct mlx5_buf_list),
+                            GFP_KERNEL);
+       if (!buf->frags)
+               goto err_out;
+
+       for (i = 0; i < buf->npages; i++) {
+               struct mlx5_buf_list *frag = &buf->frags[i];
+               int frag_sz = min_t(int, size, PAGE_SIZE);
+
+               frag->buf = mlx5_dma_zalloc_coherent_node(dev, frag_sz,
+                                                         &frag->map, node);
+               if (!frag->buf)
+                       goto err_free_buf;
+               if (frag->map & ((1 << buf->page_shift) - 1)) {
+                       dma_free_coherent(&dev->pdev->dev, frag_sz,
+                                         buf->frags[i].buf, buf->frags[i].map);
+                       mlx5_core_warn(dev, "unexpected map alignment: 0x%p, page_shift=%d\n",
+                                      (void *)frag->map, buf->page_shift);
+                       goto err_free_buf;
+               }
+               size -= frag_sz;
+       }
+
+       return 0;
+
+err_free_buf:
+       while (i--)
+               dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, buf->frags[i].buf,
+                                 buf->frags[i].map);
+       kfree(buf->frags);
+err_out:
+       return -ENOMEM;
+}
+
+void mlx5_frag_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf)
+{
+       int size = buf->size;
+       int i;
+
+       for (i = 0; i < buf->npages; i++) {
+               int frag_sz = min_t(int, size, PAGE_SIZE);
+
+               dma_free_coherent(&dev->pdev->dev, frag_sz, buf->frags[i].buf,
+                                 buf->frags[i].map);
+               size -= frag_sz;
+       }
+       kfree(buf->frags);
+}
+
 static struct mlx5_db_pgdir *mlx5_alloc_db_pgdir(struct mlx5_core_dev *dev,
                                                 int node)
 {
+       u32 db_per_page = PAGE_SIZE / cache_line_size();
        struct mlx5_db_pgdir *pgdir;
 
        pgdir = kzalloc(sizeof(*pgdir), GFP_KERNEL);
        if (!pgdir)
                return NULL;
 
-       bitmap_fill(pgdir->bitmap, MLX5_DB_PER_PAGE);
+       pgdir->bitmap = kcalloc(BITS_TO_LONGS(db_per_page),
+                               sizeof(unsigned long),
+                               GFP_KERNEL);
+
+       if (!pgdir->bitmap) {
+               kfree(pgdir);
+               return NULL;
+       }
+
+       bitmap_fill(pgdir->bitmap, db_per_page);
 
        pgdir->db_page = mlx5_dma_zalloc_coherent_node(dev, PAGE_SIZE,
                                                       &pgdir->db_dma, node);
        if (!pgdir->db_page) {
+               kfree(pgdir->bitmap);
                kfree(pgdir);
                return NULL;
        }
@@ -123,18 +198,19 @@ static struct mlx5_db_pgdir *mlx5_alloc_db_pgdir(struct mlx5_core_dev *dev,
 static int mlx5_alloc_db_from_pgdir(struct mlx5_db_pgdir *pgdir,
                                    struct mlx5_db *db)
 {
+       u32 db_per_page = PAGE_SIZE / cache_line_size();
        int offset;
        int i;
 
-       i = find_first_bit(pgdir->bitmap, MLX5_DB_PER_PAGE);
-       if (i >= MLX5_DB_PER_PAGE)
+       i = find_first_bit(pgdir->bitmap, db_per_page);
+       if (i >= db_per_page)
                return -ENOMEM;
 
        __clear_bit(i, pgdir->bitmap);
 
        db->u.pgdir = pgdir;
        db->index   = i;
-       offset = db->index * L1_CACHE_BYTES;
+       offset = db->index * cache_line_size();
        db->db      = pgdir->db_page + offset / sizeof(*pgdir->db_page);
        db->dma     = pgdir->db_dma  + offset;
 
@@ -181,14 +257,16 @@ EXPORT_SYMBOL_GPL(mlx5_db_alloc);
 
 void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db)
 {
+       u32 db_per_page = PAGE_SIZE / cache_line_size();
        mutex_lock(&dev->priv.pgdir_mutex);
 
        __set_bit(db->index, db->u.pgdir->bitmap);
 
-       if (bitmap_full(db->u.pgdir->bitmap, MLX5_DB_PER_PAGE)) {
+       if (bitmap_full(db->u.pgdir->bitmap, db_per_page)) {
                dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE,
                                  db->u.pgdir->db_page, db->u.pgdir->db_dma);
                list_del(&db->u.pgdir->list);
+               kfree(db->u.pgdir->bitmap);
                kfree(db->u.pgdir);
        }
 
@@ -209,3 +287,12 @@ void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas)
        }
 }
 EXPORT_SYMBOL_GPL(mlx5_fill_page_array);
+
+void mlx5_fill_page_frag_array(struct mlx5_frag_buf *buf, __be64 *pas)
+{
+       int i;
+
+       for (i = 0; i < buf->npages; i++)
+               pas[i] = cpu_to_be64(buf->frags[i].map);
+}
+EXPORT_SYMBOL_GPL(mlx5_fill_page_frag_array);
index 1e639f88602165fcb4f4454cf6a54148372e3235..b0448b55fbc7dc454a2c07d259d5e65ad44863e8 100644 (file)
@@ -53,14 +53,6 @@ enum {
        CMD_MODE_EVENTS
 };
 
-enum {
-       NUM_LONG_LISTS    = 2,
-       NUM_MED_LISTS     = 64,
-       LONG_LIST_SIZE    = (2ULL * 1024 * 1024 * 1024 / PAGE_SIZE) * 8 + 16 +
-                               MLX5_CMD_DATA_BLOCK_SIZE,
-       MED_LIST_SIZE     = 16 + MLX5_CMD_DATA_BLOCK_SIZE,
-};
-
 enum {
        MLX5_CMD_DELIVERY_STAT_OK                       = 0x0,
        MLX5_CMD_DELIVERY_STAT_SIGNAT_ERR               = 0x1,
@@ -318,6 +310,8 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
        case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
        case MLX5_CMD_OP_SET_FLOW_TABLE_ROOT:
        case MLX5_CMD_OP_DEALLOC_ENCAP_HEADER:
+       case MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT:
+       case MLX5_CMD_OP_DESTROY_QOS_PARA_VPORT:
                return MLX5_CMD_STAT_OK;
 
        case MLX5_CMD_OP_QUERY_HCA_CAP:
@@ -419,11 +413,14 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
        case MLX5_CMD_OP_QUERY_FLOW_TABLE:
        case MLX5_CMD_OP_CREATE_FLOW_GROUP:
        case MLX5_CMD_OP_QUERY_FLOW_GROUP:
-
        case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY:
        case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
        case MLX5_CMD_OP_QUERY_FLOW_COUNTER:
        case MLX5_CMD_OP_ALLOC_ENCAP_HEADER:
+       case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT:
+       case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT:
+       case MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT:
+       case MLX5_CMD_OP_CREATE_QOS_PARA_VPORT:
                *status = MLX5_DRIVER_STATUS_ABORTED;
                *synd = MLX5_DRIVER_SYND;
                return -EIO;
@@ -580,6 +577,12 @@ const char *mlx5_command_str(int command)
        MLX5_COMMAND_STR_CASE(MODIFY_FLOW_TABLE);
        MLX5_COMMAND_STR_CASE(ALLOC_ENCAP_HEADER);
        MLX5_COMMAND_STR_CASE(DEALLOC_ENCAP_HEADER);
+       MLX5_COMMAND_STR_CASE(CREATE_SCHEDULING_ELEMENT);
+       MLX5_COMMAND_STR_CASE(DESTROY_SCHEDULING_ELEMENT);
+       MLX5_COMMAND_STR_CASE(QUERY_SCHEDULING_ELEMENT);
+       MLX5_COMMAND_STR_CASE(MODIFY_SCHEDULING_ELEMENT);
+       MLX5_COMMAND_STR_CASE(CREATE_QOS_PARA_VPORT);
+       MLX5_COMMAND_STR_CASE(DESTROY_QOS_PARA_VPORT);
        default: return "unknown command opcode";
        }
 }
@@ -1063,14 +1066,13 @@ static struct mlx5_cmd_mailbox *alloc_cmd_box(struct mlx5_core_dev *dev,
        if (!mailbox)
                return ERR_PTR(-ENOMEM);
 
-       mailbox->buf = pci_pool_alloc(dev->cmd.pool, flags,
-                                     &mailbox->dma);
+       mailbox->buf = pci_pool_zalloc(dev->cmd.pool, flags,
+                                      &mailbox->dma);
        if (!mailbox->buf) {
                mlx5_core_dbg(dev, "failed allocation\n");
                kfree(mailbox);
                return ERR_PTR(-ENOMEM);
        }
-       memset(mailbox->buf, 0, sizeof(struct mlx5_cmd_prot_block));
        mailbox->next = NULL;
 
        return mailbox;
@@ -1361,10 +1363,10 @@ static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg)
 {
        unsigned long flags;
 
-       if (msg->cache) {
-               spin_lock_irqsave(&msg->cache->lock, flags);
-               list_add_tail(&msg->list, &msg->cache->head);
-               spin_unlock_irqrestore(&msg->cache->lock, flags);
+       if (msg->parent) {
+               spin_lock_irqsave(&msg->parent->lock, flags);
+               list_add_tail(&msg->list, &msg->parent->head);
+               spin_unlock_irqrestore(&msg->parent->lock, flags);
        } else {
                mlx5_free_cmd_msg(dev, msg);
        }
@@ -1461,30 +1463,37 @@ static struct mlx5_cmd_msg *alloc_msg(struct mlx5_core_dev *dev, int in_size,
                                      gfp_t gfp)
 {
        struct mlx5_cmd_msg *msg = ERR_PTR(-ENOMEM);
+       struct cmd_msg_cache *ch = NULL;
        struct mlx5_cmd *cmd = &dev->cmd;
-       struct cache_ent *ent = NULL;
-
-       if (in_size > MED_LIST_SIZE && in_size <= LONG_LIST_SIZE)
-               ent = &cmd->cache.large;
-       else if (in_size > 16 && in_size <= MED_LIST_SIZE)
-               ent = &cmd->cache.med;
-
-       if (ent) {
-               spin_lock_irq(&ent->lock);
-               if (!list_empty(&ent->head)) {
-                       msg = list_entry(ent->head.next, typeof(*msg), list);
-                       /* For cached lists, we must explicitly state what is
-                        * the real size
-                        */
-                       msg->len = in_size;
-                       list_del(&msg->list);
+       int i;
+
+       if (in_size <= 16)
+               goto cache_miss;
+
+       for (i = 0; i < MLX5_NUM_COMMAND_CACHES; i++) {
+               ch = &cmd->cache[i];
+               if (in_size > ch->max_inbox_size)
+                       continue;
+               spin_lock_irq(&ch->lock);
+               if (list_empty(&ch->head)) {
+                       spin_unlock_irq(&ch->lock);
+                       continue;
                }
-               spin_unlock_irq(&ent->lock);
+               msg = list_entry(ch->head.next, typeof(*msg), list);
+               /* For cached lists, we must explicitly state what is
+                * the real size
+                */
+               msg->len = in_size;
+               list_del(&msg->list);
+               spin_unlock_irq(&ch->lock);
+               break;
        }
 
-       if (IS_ERR(msg))
-               msg = mlx5_alloc_cmd_msg(dev, gfp, in_size, 0);
+       if (!IS_ERR(msg))
+               return msg;
 
+cache_miss:
+       msg = mlx5_alloc_cmd_msg(dev, gfp, in_size, 0);
        return msg;
 }
 
@@ -1582,58 +1591,56 @@ EXPORT_SYMBOL(mlx5_cmd_exec_cb);
 
 static void destroy_msg_cache(struct mlx5_core_dev *dev)
 {
-       struct mlx5_cmd *cmd = &dev->cmd;
+       struct cmd_msg_cache *ch;
        struct mlx5_cmd_msg *msg;
        struct mlx5_cmd_msg *n;
+       int i;
 
-       list_for_each_entry_safe(msg, n, &cmd->cache.large.head, list) {
-               list_del(&msg->list);
-               mlx5_free_cmd_msg(dev, msg);
-       }
-
-       list_for_each_entry_safe(msg, n, &cmd->cache.med.head, list) {
-               list_del(&msg->list);
-               mlx5_free_cmd_msg(dev, msg);
+       for (i = 0; i < MLX5_NUM_COMMAND_CACHES; i++) {
+               ch = &dev->cmd.cache[i];
+               list_for_each_entry_safe(msg, n, &ch->head, list) {
+                       list_del(&msg->list);
+                       mlx5_free_cmd_msg(dev, msg);
+               }
        }
 }
 
-static int create_msg_cache(struct mlx5_core_dev *dev)
+static unsigned cmd_cache_num_ent[MLX5_NUM_COMMAND_CACHES] = {
+       512, 32, 16, 8, 2
+};
+
+static unsigned cmd_cache_ent_size[MLX5_NUM_COMMAND_CACHES] = {
+       16 + MLX5_CMD_DATA_BLOCK_SIZE,
+       16 + MLX5_CMD_DATA_BLOCK_SIZE * 2,
+       16 + MLX5_CMD_DATA_BLOCK_SIZE * 16,
+       16 + MLX5_CMD_DATA_BLOCK_SIZE * 256,
+       16 + MLX5_CMD_DATA_BLOCK_SIZE * 512,
+};
+
+static void create_msg_cache(struct mlx5_core_dev *dev)
 {
        struct mlx5_cmd *cmd = &dev->cmd;
+       struct cmd_msg_cache *ch;
        struct mlx5_cmd_msg *msg;
-       int err;
        int i;
-
-       spin_lock_init(&cmd->cache.large.lock);
-       INIT_LIST_HEAD(&cmd->cache.large.head);
-       spin_lock_init(&cmd->cache.med.lock);
-       INIT_LIST_HEAD(&cmd->cache.med.head);
-
-       for (i = 0; i < NUM_LONG_LISTS; i++) {
-               msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, LONG_LIST_SIZE, 0);
-               if (IS_ERR(msg)) {
-                       err = PTR_ERR(msg);
-                       goto ex_err;
-               }
-               msg->cache = &cmd->cache.large;
-               list_add_tail(&msg->list, &cmd->cache.large.head);
-       }
-
-       for (i = 0; i < NUM_MED_LISTS; i++) {
-               msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, MED_LIST_SIZE, 0);
-               if (IS_ERR(msg)) {
-                       err = PTR_ERR(msg);
-                       goto ex_err;
+       int k;
+
+       /* Initialize and fill the caches with initial entries */
+       for (k = 0; k < MLX5_NUM_COMMAND_CACHES; k++) {
+               ch = &cmd->cache[k];
+               spin_lock_init(&ch->lock);
+               INIT_LIST_HEAD(&ch->head);
+               ch->num_ent = cmd_cache_num_ent[k];
+               ch->max_inbox_size = cmd_cache_ent_size[k];
+               for (i = 0; i < ch->num_ent; i++) {
+                       msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL | __GFP_NOWARN,
+                                                ch->max_inbox_size, 0);
+                       if (IS_ERR(msg))
+                               break;
+                       msg->parent = ch;
+                       list_add_tail(&msg->list, &ch->head);
                }
-               msg->cache = &cmd->cache.med;
-               list_add_tail(&msg->list, &cmd->cache.med.head);
        }
-
-       return 0;
-
-ex_err:
-       destroy_msg_cache(dev);
-       return err;
 }
 
 static int alloc_cmd_page(struct mlx5_core_dev *dev, struct mlx5_cmd *cmd)
@@ -1756,11 +1763,7 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev)
 
        cmd->mode = CMD_MODE_POLLING;
 
-       err = create_msg_cache(dev);
-       if (err) {
-               dev_err(&dev->pdev->dev, "failed to create command cache\n");
-               goto err_free_page;
-       }
+       create_msg_cache(dev);
 
        set_wqname(dev);
        cmd->wq = create_singlethread_workqueue(cmd->wq_name);
index 460363b66cb1ca02ad0dc7117f8e3bab868f3d9a..63dd6390b1615ae9fbf720cbbb2f2dd0fa809a89 100644 (file)
                                                 MLX5_MPWRQ_WQE_PAGE_ORDER)
 
 #define MLX5_MTT_OCTW(npages) (ALIGN(npages, 8) / 2)
-#define MLX5E_REQUIRED_MTTS(rqs, wqes)\
-       (rqs * wqes * ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8))
-#define MLX5E_VALID_NUM_MTTS(num_mtts) (MLX5_MTT_OCTW(num_mtts) <= U16_MAX)
+#define MLX5E_REQUIRED_MTTS(wqes)              \
+       (wqes * ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8))
+#define MLX5E_VALID_NUM_MTTS(num_mtts) (MLX5_MTT_OCTW(num_mtts) - 1 <= U16_MAX)
 
 #define MLX5_UMR_ALIGN                         (2048)
 #define MLX5_MPWRQ_SMALL_PACKET_THRESHOLD      (128)
 
 #define MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ                 (64 * 1024)
+#define MLX5E_DEFAULT_LRO_TIMEOUT                       32
+#define MLX5E_LRO_TIMEOUT_ARR_SIZE                      4
+
 #define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC      0x10
 #define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE 0x3
 #define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS      0x20
@@ -147,12 +150,6 @@ static inline int mlx5_max_log_rq_size(int wq_type)
        }
 }
 
-enum {
-       MLX5E_INLINE_MODE_L2,
-       MLX5E_INLINE_MODE_VPORT_CONTEXT,
-       MLX5_INLINE_MODE_NOT_REQUIRED,
-};
-
 struct mlx5e_tx_wqe {
        struct mlx5_wqe_ctrl_seg ctrl;
        struct mlx5_wqe_eth_seg  eth;
@@ -170,22 +167,28 @@ struct mlx5e_umr_wqe {
        struct mlx5_wqe_data_seg       data;
 };
 
+extern const char mlx5e_self_tests[][ETH_GSTRING_LEN];
+
 static const char mlx5e_priv_flags[][ETH_GSTRING_LEN] = {
        "rx_cqe_moder",
+       "rx_cqe_compress",
 };
 
 enum mlx5e_priv_flag {
        MLX5E_PFLAG_RX_CQE_BASED_MODER = (1 << 0),
+       MLX5E_PFLAG_RX_CQE_COMPRESS = (1 << 1),
 };
 
-#define MLX5E_SET_PRIV_FLAG(priv, pflag, enable)    \
-       do {                                        \
-               if (enable)                         \
-                       priv->pflags |= pflag;      \
-               else                                \
-                       priv->pflags &= ~pflag;     \
+#define MLX5E_SET_PFLAG(priv, pflag, enable)                   \
+       do {                                                    \
+               if (enable)                                     \
+                       (priv)->params.pflags |= (pflag);       \
+               else                                            \
+                       (priv)->params.pflags &= ~(pflag);      \
        } while (0)
 
+#define MLX5E_GET_PFLAG(priv, pflag) (!!((priv)->params.pflags & (pflag)))
+
 #ifdef CONFIG_MLX5_CORE_EN_DCB
 #define MLX5E_MAX_BW_ALLOC 100 /* Max percentage of BW allocation */
 #endif
@@ -204,8 +207,7 @@ struct mlx5e_params {
        u16 num_channels;
        u8  num_tc;
        u8  rx_cq_period_mode;
-       bool rx_cqe_compress_admin;
-       bool rx_cqe_compress;
+       bool rx_cqe_compress_def;
        struct mlx5e_cq_moder rx_cq_moderation;
        struct mlx5e_cq_moder tx_cq_moderation;
        u16 min_rx_wqes;
@@ -217,12 +219,35 @@ struct mlx5e_params {
        u8  toeplitz_hash_key[40];
        u32 indirection_rqt[MLX5E_INDIR_RQT_SIZE];
        bool vlan_strip_disable;
-#ifdef CONFIG_MLX5_CORE_EN_DCB
-       struct ieee_ets ets;
-#endif
        bool rx_am_enabled;
+       u32 lro_timeout;
+       u32 pflags;
+};
+
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+struct mlx5e_cee_config {
+       /* bw pct for priority group */
+       u8                         pg_bw_pct[CEE_DCBX_MAX_PGS];
+       u8                         prio_to_pg_map[CEE_DCBX_MAX_PRIO];
+       bool                       pfc_setting[CEE_DCBX_MAX_PRIO];
+       bool                       pfc_enable;
 };
 
+enum {
+       MLX5_DCB_CHG_RESET,
+       MLX5_DCB_NO_CHG,
+       MLX5_DCB_CHG_NO_RESET,
+};
+
+struct mlx5e_dcbx {
+       enum mlx5_dcbx_oper_mode   mode;
+       struct mlx5e_cee_config    cee_cfg; /* pending configuration */
+
+       /* The only setting that cannot be read from FW */
+       u8                         tc_tsa[IEEE_8021QAZ_MAX_TCS];
+};
+#endif
+
 struct mlx5e_tstamp {
        rwlock_t                   lock;
        struct cyclecounter        cycles;
@@ -261,7 +286,7 @@ struct mlx5e_cq {
        u16                        decmprs_wqe_counter;
 
        /* control */
-       struct mlx5_wq_ctrl        wq_ctrl;
+       struct mlx5_frag_wq_ctrl   wq_ctrl;
 } ____cacheline_aligned_in_smp;
 
 struct mlx5e_rq;
@@ -322,7 +347,6 @@ struct mlx5e_rq {
                struct {
                        struct mlx5e_mpw_info *info;
                        void                  *mtt_no_align;
-                       u32                    mtt_offset;
                } mpwqe;
        };
        struct {
@@ -357,6 +381,7 @@ struct mlx5e_rq {
        u32                    rqn;
        struct mlx5e_channel  *channel;
        struct mlx5e_priv     *priv;
+       struct mlx5_core_mkey  umr_mkey;
 } ____cacheline_aligned_in_smp;
 
 struct mlx5e_umr_dma_info {
@@ -520,7 +545,7 @@ struct mlx5e_vxlan_db {
 
 struct mlx5e_l2_rule {
        u8  addr[ETH_ALEN + 2];
-       struct mlx5_flow_rule *rule;
+       struct mlx5_flow_handle *rule;
 };
 
 struct mlx5e_flow_table {
@@ -541,10 +566,10 @@ struct mlx5e_tc_table {
 struct mlx5e_vlan_table {
        struct mlx5e_flow_table         ft;
        unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
-       struct mlx5_flow_rule   *active_vlans_rule[VLAN_N_VID];
-       struct mlx5_flow_rule   *untagged_rule;
-       struct mlx5_flow_rule   *any_vlan_rule;
-       bool          filter_disabled;
+       struct mlx5_flow_handle *active_vlans_rule[VLAN_N_VID];
+       struct mlx5_flow_handle *untagged_rule;
+       struct mlx5_flow_handle *any_vlan_rule;
+       bool            filter_disabled;
 };
 
 struct mlx5e_l2_table {
@@ -562,14 +587,14 @@ struct mlx5e_l2_table {
 /* L3/L4 traffic type classifier */
 struct mlx5e_ttc_table {
        struct mlx5e_flow_table  ft;
-       struct mlx5_flow_rule    *rules[MLX5E_NUM_TT];
+       struct mlx5_flow_handle  *rules[MLX5E_NUM_TT];
 };
 
 #define ARFS_HASH_SHIFT BITS_PER_BYTE
 #define ARFS_HASH_SIZE BIT(BITS_PER_BYTE)
 struct arfs_table {
        struct mlx5e_flow_table  ft;
-       struct mlx5_flow_rule    *default_rule;
+       struct mlx5_flow_handle  *default_rule;
        struct hlist_head        rules_hash[ARFS_HASH_SIZE];
 };
 
@@ -664,7 +689,6 @@ struct mlx5e_priv {
 
        unsigned long              state;
        struct mutex               state_lock; /* Protects Interface state */
-       struct mlx5_core_mkey      umr_mkey;
        struct mlx5e_rq            drop_rq;
 
        struct mlx5e_channel     **channel;
@@ -684,12 +708,15 @@ struct mlx5e_priv {
        struct work_struct         tx_timeout_work;
        struct delayed_work        update_stats_work;
 
-       u32                        pflags;
        struct mlx5_core_dev      *mdev;
        struct net_device         *netdev;
        struct mlx5e_stats         stats;
        struct mlx5e_tstamp        tstamp;
        u16 q_counter;
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+       struct mlx5e_dcbx          dcbx;
+#endif
+
        const struct mlx5e_profile *profile;
        void                      *ppriv;
 };
@@ -731,6 +758,9 @@ int mlx5e_create_flow_steering(struct mlx5e_priv *priv);
 void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv);
 void mlx5e_init_l2_addr(struct mlx5e_priv *priv);
 void mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft);
+int mlx5e_self_test_num(struct mlx5e_priv *priv);
+void mlx5e_self_test(struct net_device *ndev, struct ethtool_test *etest,
+                    u64 *buf);
 int mlx5e_ethtool_get_flow(struct mlx5e_priv *priv, struct ethtool_rxnfc *info,
                           int location);
 int mlx5e_ethtool_get_all_flows(struct mlx5e_priv *priv,
@@ -807,8 +837,7 @@ static inline void mlx5e_cq_arm(struct mlx5e_cq *cq)
 
 static inline u32 mlx5e_get_wqe_mtt_offset(struct mlx5e_rq *rq, u16 wqe_ix)
 {
-       return rq->mpwqe.mtt_offset +
-               wqe_ix * ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8);
+       return wqe_ix * ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8);
 }
 
 static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev)
@@ -821,6 +850,7 @@ extern const struct ethtool_ops mlx5e_ethtool_ops;
 #ifdef CONFIG_MLX5_CORE_EN_DCB
 extern const struct dcbnl_rtnl_ops mlx5e_dcbnl_ops;
 int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets);
+void mlx5e_dcbnl_initialize(struct mlx5e_priv *priv);
 #endif
 
 #ifndef CONFIG_RFS_ACCEL
@@ -856,7 +886,8 @@ void mlx5e_destroy_tir(struct mlx5_core_dev *mdev,
                       struct mlx5e_tir *tir);
 int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev);
 void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev);
-int mlx5e_refresh_tirs_self_loopback_enable(struct mlx5_core_dev *mdev);
+int mlx5e_refresh_tirs_self_loopback(struct mlx5_core_dev *mdev,
+                                    bool enable_uc_lb);
 
 struct mlx5_eswitch_rep;
 int mlx5e_vport_rep_load(struct mlx5_eswitch *esw,
@@ -870,6 +901,7 @@ int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv);
 void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv);
 int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr);
 void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
+void mlx5e_update_hw_rep_counters(struct mlx5e_priv *priv);
 
 int mlx5e_create_direct_rqts(struct mlx5e_priv *priv);
 void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt);
@@ -886,7 +918,16 @@ struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev,
 void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv);
 int mlx5e_attach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev);
 void mlx5e_detach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev);
-struct rtnl_link_stats64 *
-mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats);
-
+u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout);
+void mlx5e_add_vxlan_port(struct net_device *netdev,
+                         struct udp_tunnel_info *ti);
+void mlx5e_del_vxlan_port(struct net_device *netdev,
+                         struct udp_tunnel_info *ti);
+
+int mlx5e_get_offload_stats(int attr_id, const struct net_device *dev,
+                           void *sp);
+bool mlx5e_has_offload_stats(const struct net_device *dev, int attr_id);
+
+bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv);
+bool mlx5e_is_vf_vport_rep(struct mlx5e_priv *priv);
 #endif /* __MLX5_EN_H__ */
index a8cb38789774c754bebb2f2f451a3379b6363eb9..68419a01db36e33765b1cc366455da8b55420da7 100644 (file)
@@ -56,7 +56,7 @@ struct arfs_tuple {
 struct arfs_rule {
        struct mlx5e_priv       *priv;
        struct work_struct      arfs_work;
-       struct mlx5_flow_rule   *rule;
+       struct mlx5_flow_handle *rule;
        struct hlist_node       hlist;
        int                     rxq;
        /* Flow ID passed to ndo_rx_flow_steer */
@@ -104,7 +104,7 @@ static int arfs_disable(struct mlx5e_priv *priv)
                tt = arfs_get_tt(i);
                /* Modify ttc rules destination to bypass the aRFS tables*/
                err = mlx5_modify_rule_destination(priv->fs.ttc.rules[tt],
-                                                  &dest);
+                                                  &dest, NULL);
                if (err) {
                        netdev_err(priv->netdev,
                                   "%s: modify ttc destination failed\n",
@@ -137,7 +137,7 @@ int mlx5e_arfs_enable(struct mlx5e_priv *priv)
                tt = arfs_get_tt(i);
                /* Modify ttc rules destination to point on the aRFS FTs */
                err = mlx5_modify_rule_destination(priv->fs.ttc.rules[tt],
-                                                  &dest);
+                                                  &dest, NULL);
                if (err) {
                        netdev_err(priv->netdev,
                                   "%s: modify ttc destination failed err=%d\n",
@@ -151,7 +151,7 @@ int mlx5e_arfs_enable(struct mlx5e_priv *priv)
 
 static void arfs_destroy_table(struct arfs_table *arfs_t)
 {
-       mlx5_del_flow_rule(arfs_t->default_rule);
+       mlx5_del_flow_rules(arfs_t->default_rule);
        mlx5e_destroy_flow_table(&arfs_t->ft);
 }
 
@@ -174,6 +174,11 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv,
                                 enum arfs_type type)
 {
        struct arfs_table *arfs_t = &priv->fs.arfs.arfs_tables[type];
+       struct mlx5_flow_act flow_act = {
+               .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+               .flow_tag = MLX5_FS_DEFAULT_FLOW_TAG,
+               .encap_id = 0,
+       };
        struct mlx5_flow_destination dest;
        struct mlx5e_tir *tir = priv->indir_tir;
        struct mlx5_flow_spec *spec;
@@ -205,10 +210,9 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv,
                goto out;
        }
 
-       arfs_t->default_rule = mlx5_add_flow_rule(arfs_t->ft.t, spec,
-                                                 MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
-                                                 MLX5_FS_DEFAULT_FLOW_TAG,
-                                                 &dest);
+       arfs_t->default_rule = mlx5_add_flow_rules(arfs_t->ft.t, spec,
+                                                  &flow_act,
+                                                  &dest, 1);
        if (IS_ERR(arfs_t->default_rule)) {
                err = PTR_ERR(arfs_t->default_rule);
                arfs_t->default_rule = NULL;
@@ -324,7 +328,7 @@ static int arfs_create_table(struct mlx5e_priv *priv,
        int err;
 
        ft->t = mlx5_create_flow_table(priv->fs.ns, MLX5E_NIC_PRIO,
-                                      MLX5E_ARFS_TABLE_SIZE, MLX5E_ARFS_FT_LEVEL);
+                                      MLX5E_ARFS_TABLE_SIZE, MLX5E_ARFS_FT_LEVEL, 0);
        if (IS_ERR(ft->t)) {
                err = PTR_ERR(ft->t);
                ft->t = NULL;
@@ -396,7 +400,7 @@ static void arfs_may_expire_flow(struct mlx5e_priv *priv)
        spin_unlock_bh(&priv->fs.arfs.arfs_lock);
        hlist_for_each_entry_safe(arfs_rule, htmp, &del_list, hlist) {
                if (arfs_rule->rule)
-                       mlx5_del_flow_rule(arfs_rule->rule);
+                       mlx5_del_flow_rules(arfs_rule->rule);
                hlist_del(&arfs_rule->hlist);
                kfree(arfs_rule);
        }
@@ -420,7 +424,7 @@ static void arfs_del_rules(struct mlx5e_priv *priv)
        hlist_for_each_entry_safe(rule, htmp, &del_list, hlist) {
                cancel_work_sync(&rule->arfs_work);
                if (rule->rule)
-                       mlx5_del_flow_rule(rule->rule);
+                       mlx5_del_flow_rules(rule->rule);
                hlist_del(&rule->hlist);
                kfree(rule);
        }
@@ -462,12 +466,17 @@ static struct arfs_table *arfs_get_table(struct mlx5e_arfs_tables *arfs,
        return NULL;
 }
 
-static struct mlx5_flow_rule *arfs_add_rule(struct mlx5e_priv *priv,
-                                           struct arfs_rule *arfs_rule)
+static struct mlx5_flow_handle *arfs_add_rule(struct mlx5e_priv *priv,
+                                             struct arfs_rule *arfs_rule)
 {
+       struct mlx5_flow_act flow_act = {
+               .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+               .flow_tag = MLX5_FS_DEFAULT_FLOW_TAG,
+               .encap_id = 0,
+       };
        struct mlx5e_arfs_tables *arfs = &priv->fs.arfs;
        struct arfs_tuple *tuple = &arfs_rule->tuple;
-       struct mlx5_flow_rule *rule = NULL;
+       struct mlx5_flow_handle *rule = NULL;
        struct mlx5_flow_destination dest;
        struct arfs_table *arfs_table;
        struct mlx5_flow_spec *spec;
@@ -544,9 +553,7 @@ static struct mlx5_flow_rule *arfs_add_rule(struct mlx5e_priv *priv,
        }
        dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
        dest.tir_num = priv->direct_tir[arfs_rule->rxq].tirn;
-       rule = mlx5_add_flow_rule(ft, spec, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
-                                 MLX5_FS_DEFAULT_FLOW_TAG,
-                                 &dest);
+       rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
        if (IS_ERR(rule)) {
                err = PTR_ERR(rule);
                netdev_err(priv->netdev, "%s: add rule(filter id=%d, rq idx=%d) failed, err=%d\n",
@@ -559,14 +566,14 @@ out:
 }
 
 static void arfs_modify_rule_rq(struct mlx5e_priv *priv,
-                               struct mlx5_flow_rule *rule, u16 rxq)
+                               struct mlx5_flow_handle *rule, u16 rxq)
 {
        struct mlx5_flow_destination dst;
        int err = 0;
 
        dst.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
        dst.tir_num = priv->direct_tir[rxq].tirn;
-       err =  mlx5_modify_rule_destination(rule, &dst);
+       err =  mlx5_modify_rule_destination(rule, &dst, NULL);
        if (err)
                netdev_warn(priv->netdev,
                            "Failed to modfiy aRFS rule destination to rq=%d\n", rxq);
@@ -578,7 +585,7 @@ static void arfs_handle_work(struct work_struct *work)
                                                   struct arfs_rule,
                                                   arfs_work);
        struct mlx5e_priv *priv = arfs_rule->priv;
-       struct mlx5_flow_rule *rule;
+       struct mlx5_flow_handle *rule;
 
        mutex_lock(&priv->state_lock);
        if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
index 13dc388667b6002c311cac9293d2b8894f9afdf2..2cd8e56a573be122a1568687fe436ead5e75ceda 100644 (file)
@@ -94,7 +94,7 @@ int mlx5e_hwstamp_set(struct net_device *dev, struct ifreq *ifr)
        switch (config.rx_filter) {
        case HWTSTAMP_FILTER_NONE:
                /* Reset CQE compression to Admin default */
-               mlx5e_modify_rx_cqe_compression(priv, priv->params.rx_cqe_compress_admin);
+               mlx5e_modify_rx_cqe_compression(priv, priv->params.rx_cqe_compress_def);
                break;
        case HWTSTAMP_FILTER_ALL:
        case HWTSTAMP_FILTER_SOME:
@@ -111,6 +111,7 @@ int mlx5e_hwstamp_set(struct net_device *dev, struct ifreq *ifr)
        case HWTSTAMP_FILTER_PTP_V2_SYNC:
        case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
                /* Disable CQE compression */
+               netdev_warn(dev, "Disabling cqe compression");
                mlx5e_modify_rx_cqe_compression(priv, false);
                config.rx_filter = HWTSTAMP_FILTER_ALL;
                break;
index 029e856f72a0fc7170d8f3a0f554bc924b73df20..f175518ff07aa9163e6e84022a4e9ae4c4c9f9da 100644 (file)
@@ -137,7 +137,8 @@ void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev)
        mlx5_unmap_free_uar(mdev, &res->cq_uar);
 }
 
-int mlx5e_refresh_tirs_self_loopback_enable(struct mlx5_core_dev *mdev)
+int mlx5e_refresh_tirs_self_loopback(struct mlx5_core_dev *mdev,
+                                    bool enable_uc_lb)
 {
        struct mlx5e_tir *tir;
        void *in;
@@ -149,6 +150,10 @@ int mlx5e_refresh_tirs_self_loopback_enable(struct mlx5_core_dev *mdev)
        if (!in)
                return -ENOMEM;
 
+       if (enable_uc_lb)
+               MLX5_SET(modify_tir_in, in, ctx.self_lb_block,
+                        MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST_);
+
        MLX5_SET(modify_tir_in, in, bitmask.self_lb_en, 1);
 
        list_for_each_entry(tir, &mdev->mlx5e_res.td.tirs_list, list) {
index 762af16ed021b4815779728eae427d1cf593baf9..7f6c225666c18b193f642ba74cb8637e471ab3fc 100644 (file)
 #define MLX5E_100MB (100000)
 #define MLX5E_1GB   (1000000)
 
+#define MLX5E_CEE_STATE_UP    1
+#define MLX5E_CEE_STATE_DOWN  0
+
+/* If dcbx mode is non-host set the dcbx mode to host.
+ */
+static int mlx5e_dcbnl_set_dcbx_mode(struct mlx5e_priv *priv,
+                                    enum mlx5_dcbx_oper_mode mode)
+{
+       struct mlx5_core_dev *mdev = priv->mdev;
+       u32 param[MLX5_ST_SZ_DW(dcbx_param)];
+       int err;
+
+       err = mlx5_query_port_dcbx_param(mdev, param);
+       if (err)
+               return err;
+
+       MLX5_SET(dcbx_param, param, version_admin, mode);
+       if (mode != MLX5E_DCBX_PARAM_VER_OPER_HOST)
+               MLX5_SET(dcbx_param, param, willing_admin, 1);
+
+       return mlx5_set_port_dcbx_param(mdev, param);
+}
+
+static int mlx5e_dcbnl_switch_to_host_mode(struct mlx5e_priv *priv)
+{
+       struct mlx5e_dcbx *dcbx = &priv->dcbx;
+       int err;
+
+       if (!MLX5_CAP_GEN(priv->mdev, dcbx))
+               return 0;
+
+       if (dcbx->mode == MLX5E_DCBX_PARAM_VER_OPER_HOST)
+               return 0;
+
+       err = mlx5e_dcbnl_set_dcbx_mode(priv, MLX5E_DCBX_PARAM_VER_OPER_HOST);
+       if (err)
+               return err;
+
+       dcbx->mode = MLX5E_DCBX_PARAM_VER_OPER_HOST;
+       return 0;
+}
+
 static int mlx5e_dcbnl_ieee_getets(struct net_device *netdev,
                                   struct ieee_ets *ets)
 {
        struct mlx5e_priv *priv = netdev_priv(netdev);
+       struct mlx5_core_dev *mdev = priv->mdev;
+       int err = 0;
+       int i;
 
        if (!MLX5_CAP_GEN(priv->mdev, ets))
                return -ENOTSUPP;
 
-       memcpy(ets, &priv->params.ets, sizeof(*ets));
-       return 0;
+       ets->ets_cap = mlx5_max_tc(priv->mdev) + 1;
+       for (i = 0; i < ets->ets_cap; i++) {
+               err = mlx5_query_port_prio_tc(mdev, i, &ets->prio_tc[i]);
+               if (err)
+                       return err;
+       }
+
+       for (i = 0; i < ets->ets_cap; i++) {
+               err = mlx5_query_port_tc_bw_alloc(mdev, i, &ets->tc_tx_bw[i]);
+               if (err)
+                       return err;
+               if (ets->tc_tx_bw[i] < MLX5E_MAX_BW_ALLOC)
+                       priv->dcbx.tc_tsa[i] = IEEE_8021QAZ_TSA_ETS;
+       }
+
+       memcpy(ets->tc_tsa, priv->dcbx.tc_tsa, sizeof(ets->tc_tsa));
+
+       return err;
 }
 
 enum {
@@ -110,9 +171,6 @@ int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets)
        int max_tc = mlx5_max_tc(mdev);
        int err;
 
-       if (!MLX5_CAP_GEN(mdev, ets))
-               return -ENOTSUPP;
-
        mlx5e_build_tc_group(ets, tc_group, max_tc);
        mlx5e_build_tc_tx_bw(ets, tc_tx_bw, tc_group, max_tc);
 
@@ -124,7 +182,14 @@ int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets)
        if (err)
                return err;
 
-       return mlx5_set_port_tc_bw_alloc(mdev, tc_tx_bw);
+       err = mlx5_set_port_tc_bw_alloc(mdev, tc_tx_bw);
+
+       if (err)
+               return err;
+
+       memcpy(priv->dcbx.tc_tsa, ets->tc_tsa, sizeof(ets->tc_tsa));
+
+       return err;
 }
 
 static int mlx5e_dbcnl_validate_ets(struct net_device *netdev,
@@ -170,6 +235,9 @@ static int mlx5e_dcbnl_ieee_setets(struct net_device *netdev,
        struct mlx5e_priv *priv = netdev_priv(netdev);
        int err;
 
+       if (!MLX5_CAP_GEN(priv->mdev, ets))
+               return -ENOTSUPP;
+
        err = mlx5e_dbcnl_validate_ets(netdev, ets);
        if (err)
                return err;
@@ -178,9 +246,6 @@ static int mlx5e_dcbnl_ieee_setets(struct net_device *netdev,
        if (err)
                return err;
 
-       memcpy(&priv->params.ets, ets, sizeof(*ets));
-       priv->params.ets.ets_cap = mlx5_max_tc(priv->mdev) + 1;
-
        return 0;
 }
 
@@ -222,13 +287,39 @@ static int mlx5e_dcbnl_ieee_setpfc(struct net_device *dev,
 
 static u8 mlx5e_dcbnl_getdcbx(struct net_device *dev)
 {
-       return DCB_CAP_DCBX_HOST | DCB_CAP_DCBX_VER_IEEE;
+       struct mlx5e_priv *priv = netdev_priv(dev);
+       struct mlx5e_dcbx *dcbx = &priv->dcbx;
+       u8 mode = DCB_CAP_DCBX_VER_IEEE | DCB_CAP_DCBX_VER_CEE;
+
+       if (dcbx->mode == MLX5E_DCBX_PARAM_VER_OPER_HOST)
+               mode |= DCB_CAP_DCBX_HOST;
+
+       return mode;
 }
 
 static u8 mlx5e_dcbnl_setdcbx(struct net_device *dev, u8 mode)
 {
+       struct mlx5e_priv *priv = netdev_priv(dev);
+       struct mlx5e_dcbx *dcbx = &priv->dcbx;
+
+       if ((!mode) && MLX5_CAP_GEN(priv->mdev, dcbx)) {
+               if (dcbx->mode == MLX5E_DCBX_PARAM_VER_OPER_AUTO)
+                       return 0;
+
+               /* set dcbx to fw controlled */
+               if (!mlx5e_dcbnl_set_dcbx_mode(priv, MLX5E_DCBX_PARAM_VER_OPER_AUTO)) {
+                       dcbx->mode = MLX5E_DCBX_PARAM_VER_OPER_AUTO;
+                       return 0;
+               }
+
+               return 1;
+       }
+
+       if (mlx5e_dcbnl_switch_to_host_mode(netdev_priv(dev)))
+               return 1;
+
        if ((mode & DCB_CAP_DCBX_LLD_MANAGED) ||
-           (mode & DCB_CAP_DCBX_VER_CEE) ||
+           !(mode & DCB_CAP_DCBX_VER_CEE) ||
            !(mode & DCB_CAP_DCBX_VER_IEEE) ||
            !(mode & DCB_CAP_DCBX_HOST))
                return 1;
@@ -304,6 +395,284 @@ static int mlx5e_dcbnl_ieee_setmaxrate(struct net_device *netdev,
        return mlx5_modify_port_ets_rate_limit(mdev, max_bw_value, max_bw_unit);
 }
 
+static u8 mlx5e_dcbnl_setall(struct net_device *netdev)
+{
+       struct mlx5e_priv *priv = netdev_priv(netdev);
+       struct mlx5e_cee_config *cee_cfg = &priv->dcbx.cee_cfg;
+       struct mlx5_core_dev *mdev = priv->mdev;
+       struct ieee_ets ets;
+       struct ieee_pfc pfc;
+       int err = -ENOTSUPP;
+       int i;
+
+       if (!MLX5_CAP_GEN(mdev, ets))
+               goto out;
+
+       memset(&ets, 0, sizeof(ets));
+       memset(&pfc, 0, sizeof(pfc));
+
+       ets.ets_cap = IEEE_8021QAZ_MAX_TCS;
+       for (i = 0; i < CEE_DCBX_MAX_PGS; i++) {
+               ets.tc_tx_bw[i] = cee_cfg->pg_bw_pct[i];
+               ets.tc_rx_bw[i] = cee_cfg->pg_bw_pct[i];
+               ets.tc_tsa[i]   = IEEE_8021QAZ_TSA_ETS;
+               ets.prio_tc[i]  = cee_cfg->prio_to_pg_map[i];
+       }
+
+       err = mlx5e_dbcnl_validate_ets(netdev, &ets);
+       if (err) {
+               netdev_err(netdev,
+                          "%s, Failed to validate ETS: %d\n", __func__, err);
+               goto out;
+       }
+
+       err = mlx5e_dcbnl_ieee_setets_core(priv, &ets);
+       if (err) {
+               netdev_err(netdev,
+                          "%s, Failed to set ETS: %d\n", __func__, err);
+               goto out;
+       }
+
+       /* Set PFC */
+       pfc.pfc_cap = mlx5_max_tc(mdev) + 1;
+       if (!cee_cfg->pfc_enable)
+               pfc.pfc_en = 0;
+       else
+               for (i = 0; i < CEE_DCBX_MAX_PRIO; i++)
+                       pfc.pfc_en |= cee_cfg->pfc_setting[i] << i;
+
+       err = mlx5e_dcbnl_ieee_setpfc(netdev, &pfc);
+       if (err) {
+               netdev_err(netdev,
+                          "%s, Failed to set PFC: %d\n", __func__, err);
+               goto out;
+       }
+out:
+       return err ? MLX5_DCB_NO_CHG : MLX5_DCB_CHG_RESET;
+}
+
+static u8 mlx5e_dcbnl_getstate(struct net_device *netdev)
+{
+       return MLX5E_CEE_STATE_UP;
+}
+
+static void mlx5e_dcbnl_getpermhwaddr(struct net_device *netdev,
+                                     u8 *perm_addr)
+{
+       struct mlx5e_priv *priv = netdev_priv(netdev);
+
+       if (!perm_addr)
+               return;
+
+       mlx5_query_nic_vport_mac_address(priv->mdev, 0, perm_addr);
+}
+
+static void mlx5e_dcbnl_setpgtccfgtx(struct net_device *netdev,
+                                    int priority, u8 prio_type,
+                                    u8 pgid, u8 bw_pct, u8 up_map)
+{
+       struct mlx5e_priv *priv = netdev_priv(netdev);
+       struct mlx5e_cee_config *cee_cfg = &priv->dcbx.cee_cfg;
+
+       if (priority >= CEE_DCBX_MAX_PRIO) {
+               netdev_err(netdev,
+                          "%s, priority is out of range\n", __func__);
+               return;
+       }
+
+       if (pgid >= CEE_DCBX_MAX_PGS) {
+               netdev_err(netdev,
+                          "%s, priority group is out of range\n", __func__);
+               return;
+       }
+
+       cee_cfg->prio_to_pg_map[priority] = pgid;
+}
+
+static void mlx5e_dcbnl_setpgbwgcfgtx(struct net_device *netdev,
+                                     int pgid, u8 bw_pct)
+{
+       struct mlx5e_priv *priv = netdev_priv(netdev);
+       struct mlx5e_cee_config *cee_cfg = &priv->dcbx.cee_cfg;
+
+       if (pgid >= CEE_DCBX_MAX_PGS) {
+               netdev_err(netdev,
+                          "%s, priority group is out of range\n", __func__);
+               return;
+       }
+
+       cee_cfg->pg_bw_pct[pgid] = bw_pct;
+}
+
+static void mlx5e_dcbnl_getpgtccfgtx(struct net_device *netdev,
+                                    int priority, u8 *prio_type,
+                                    u8 *pgid, u8 *bw_pct, u8 *up_map)
+{
+       struct mlx5e_priv *priv = netdev_priv(netdev);
+       struct mlx5_core_dev *mdev = priv->mdev;
+
+       if (priority >= CEE_DCBX_MAX_PRIO) {
+               netdev_err(netdev,
+                          "%s, priority is out of range\n", __func__);
+               return;
+       }
+
+       *prio_type = 0;
+       *bw_pct = 0;
+       *up_map = 0;
+
+       if (mlx5_query_port_prio_tc(mdev, priority, pgid))
+               *pgid = 0;
+}
+
+static void mlx5e_dcbnl_getpgbwgcfgtx(struct net_device *netdev,
+                                     int pgid, u8 *bw_pct)
+{
+       struct mlx5e_priv *priv = netdev_priv(netdev);
+       struct mlx5_core_dev *mdev = priv->mdev;
+
+       if (pgid >= CEE_DCBX_MAX_PGS) {
+               netdev_err(netdev,
+                          "%s, priority group is out of range\n", __func__);
+               return;
+       }
+
+       if (mlx5_query_port_tc_bw_alloc(mdev, pgid, bw_pct))
+               *bw_pct = 0;
+}
+
+static void mlx5e_dcbnl_setpfccfg(struct net_device *netdev,
+                                 int priority, u8 setting)
+{
+       struct mlx5e_priv *priv = netdev_priv(netdev);
+       struct mlx5e_cee_config *cee_cfg = &priv->dcbx.cee_cfg;
+
+       if (priority >= CEE_DCBX_MAX_PRIO) {
+               netdev_err(netdev,
+                          "%s, priority is out of range\n", __func__);
+               return;
+       }
+
+       if (setting > 1)
+               return;
+
+       cee_cfg->pfc_setting[priority] = setting;
+}
+
+static int
+mlx5e_dcbnl_get_priority_pfc(struct net_device *netdev,
+                            int priority, u8 *setting)
+{
+       struct ieee_pfc pfc;
+       int err;
+
+       err = mlx5e_dcbnl_ieee_getpfc(netdev, &pfc);
+
+       if (err)
+               *setting = 0;
+       else
+               *setting = (pfc.pfc_en >> priority) & 0x01;
+
+       return err;
+}
+
+static void mlx5e_dcbnl_getpfccfg(struct net_device *netdev,
+                                 int priority, u8 *setting)
+{
+       if (priority >= CEE_DCBX_MAX_PRIO) {
+               netdev_err(netdev,
+                          "%s, priority is out of range\n", __func__);
+               return;
+       }
+
+       if (!setting)
+               return;
+
+       mlx5e_dcbnl_get_priority_pfc(netdev, priority, setting);
+}
+
+static u8 mlx5e_dcbnl_getcap(struct net_device *netdev,
+                            int capid, u8 *cap)
+{
+       struct mlx5e_priv *priv = netdev_priv(netdev);
+       struct mlx5_core_dev *mdev = priv->mdev;
+       u8 rval = 0;
+
+       switch (capid) {
+       case DCB_CAP_ATTR_PG:
+               *cap = true;
+               break;
+       case DCB_CAP_ATTR_PFC:
+               *cap = true;
+               break;
+       case DCB_CAP_ATTR_UP2TC:
+               *cap = false;
+               break;
+       case DCB_CAP_ATTR_PG_TCS:
+               *cap = 1 << mlx5_max_tc(mdev);
+               break;
+       case DCB_CAP_ATTR_PFC_TCS:
+               *cap = 1 << mlx5_max_tc(mdev);
+               break;
+       case DCB_CAP_ATTR_GSP:
+               *cap = false;
+               break;
+       case DCB_CAP_ATTR_BCN:
+               *cap = false;
+               break;
+       case DCB_CAP_ATTR_DCBX:
+               *cap = (DCB_CAP_DCBX_LLD_MANAGED |
+                       DCB_CAP_DCBX_VER_CEE |
+                       DCB_CAP_DCBX_STATIC);
+               break;
+       default:
+               *cap = 0;
+               rval = 1;
+               break;
+       }
+
+       return rval;
+}
+
+static int mlx5e_dcbnl_getnumtcs(struct net_device *netdev,
+                                int tcs_id, u8 *num)
+{
+       struct mlx5e_priv *priv = netdev_priv(netdev);
+       struct mlx5_core_dev *mdev = priv->mdev;
+
+       switch (tcs_id) {
+       case DCB_NUMTCS_ATTR_PG:
+       case DCB_NUMTCS_ATTR_PFC:
+               *num = mlx5_max_tc(mdev) + 1;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static u8 mlx5e_dcbnl_getpfcstate(struct net_device *netdev)
+{
+       struct ieee_pfc pfc;
+
+       if (mlx5e_dcbnl_ieee_getpfc(netdev, &pfc))
+               return MLX5E_CEE_STATE_DOWN;
+
+       return pfc.pfc_en ? MLX5E_CEE_STATE_UP : MLX5E_CEE_STATE_DOWN;
+}
+
+static void mlx5e_dcbnl_setpfcstate(struct net_device *netdev, u8 state)
+{
+       struct mlx5e_priv *priv = netdev_priv(netdev);
+       struct mlx5e_cee_config *cee_cfg = &priv->dcbx.cee_cfg;
+
+       if ((state != MLX5E_CEE_STATE_UP) && (state != MLX5E_CEE_STATE_DOWN))
+               return;
+
+       cee_cfg->pfc_enable = state;
+}
+
 const struct dcbnl_rtnl_ops mlx5e_dcbnl_ops = {
        .ieee_getets    = mlx5e_dcbnl_ieee_getets,
        .ieee_setets    = mlx5e_dcbnl_ieee_setets,
@@ -313,4 +682,70 @@ const struct dcbnl_rtnl_ops mlx5e_dcbnl_ops = {
        .ieee_setpfc    = mlx5e_dcbnl_ieee_setpfc,
        .getdcbx        = mlx5e_dcbnl_getdcbx,
        .setdcbx        = mlx5e_dcbnl_setdcbx,
+
+/* CEE interfaces */
+       .setall         = mlx5e_dcbnl_setall,
+       .getstate       = mlx5e_dcbnl_getstate,
+       .getpermhwaddr  = mlx5e_dcbnl_getpermhwaddr,
+
+       .setpgtccfgtx   = mlx5e_dcbnl_setpgtccfgtx,
+       .setpgbwgcfgtx  = mlx5e_dcbnl_setpgbwgcfgtx,
+       .getpgtccfgtx   = mlx5e_dcbnl_getpgtccfgtx,
+       .getpgbwgcfgtx  = mlx5e_dcbnl_getpgbwgcfgtx,
+
+       .setpfccfg      = mlx5e_dcbnl_setpfccfg,
+       .getpfccfg      = mlx5e_dcbnl_getpfccfg,
+       .getcap         = mlx5e_dcbnl_getcap,
+       .getnumtcs      = mlx5e_dcbnl_getnumtcs,
+       .getpfcstate    = mlx5e_dcbnl_getpfcstate,
+       .setpfcstate    = mlx5e_dcbnl_setpfcstate,
 };
+
+static void mlx5e_dcbnl_query_dcbx_mode(struct mlx5e_priv *priv,
+                                       enum mlx5_dcbx_oper_mode *mode)
+{
+       u32 out[MLX5_ST_SZ_DW(dcbx_param)];
+
+       *mode = MLX5E_DCBX_PARAM_VER_OPER_HOST;
+
+       if (!mlx5_query_port_dcbx_param(priv->mdev, out))
+               *mode = MLX5_GET(dcbx_param, out, version_oper);
+
+       /* From driver's point of view, we only care if the mode
+        * is host (HOST) or non-host (AUTO)
+        */
+       if (*mode != MLX5E_DCBX_PARAM_VER_OPER_HOST)
+               *mode = MLX5E_DCBX_PARAM_VER_OPER_AUTO;
+}
+
+static void mlx5e_ets_init(struct mlx5e_priv *priv)
+{
+       int i;
+       struct ieee_ets ets;
+
+       memset(&ets, 0, sizeof(ets));
+       ets.ets_cap = mlx5_max_tc(priv->mdev) + 1;
+       for (i = 0; i < ets.ets_cap; i++) {
+               ets.tc_tx_bw[i] = MLX5E_MAX_BW_ALLOC;
+               ets.tc_tsa[i] = IEEE_8021QAZ_TSA_VENDOR;
+               ets.prio_tc[i] = i;
+       }
+
+       memcpy(priv->dcbx.tc_tsa, ets.tc_tsa, sizeof(ets.tc_tsa));
+
+       /* tclass[prio=0]=1, tclass[prio=1]=0, tclass[prio=i]=i (for i>1) */
+       ets.prio_tc[0] = 1;
+       ets.prio_tc[1] = 0;
+
+       mlx5e_dcbnl_ieee_setets_core(priv, &ets);
+}
+
+void mlx5e_dcbnl_initialize(struct mlx5e_priv *priv)
+{
+       struct mlx5e_dcbx *dcbx = &priv->dcbx;
+
+       if (MLX5_CAP_GEN(priv->mdev, dcbx))
+               mlx5e_dcbnl_query_dcbx_mode(priv, &dcbx->mode);
+
+       mlx5e_ets_init(priv);
+}
index 27ff401cec201d48a9e66dab934f3dd9f32d9901..352462af8d51aced0ba2c2a0daa729e7aa05f68f 100644 (file)
@@ -171,11 +171,17 @@ static int mlx5e_get_sset_count(struct net_device *dev, int sset)
                return NUM_SW_COUNTERS +
                       MLX5E_NUM_Q_CNTRS(priv) +
                       NUM_VPORT_COUNTERS + NUM_PPORT_COUNTERS +
+                      NUM_PCIE_COUNTERS +
                       MLX5E_NUM_RQ_STATS(priv) +
                       MLX5E_NUM_SQ_STATS(priv) +
-                      MLX5E_NUM_PFC_COUNTERS(priv);
+                      MLX5E_NUM_PFC_COUNTERS(priv) +
+                      ARRAY_SIZE(mlx5e_pme_status_desc) +
+                      ARRAY_SIZE(mlx5e_pme_error_desc);
+
        case ETH_SS_PRIV_FLAGS:
                return ARRAY_SIZE(mlx5e_priv_flags);
+       case ETH_SS_TEST:
+               return mlx5e_self_test_num(priv);
        /* fallthrough */
        default:
                return -EOPNOTSUPP;
@@ -213,6 +219,14 @@ static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, uint8_t *data)
                strcpy(data + (idx++) * ETH_GSTRING_LEN,
                       pport_2819_stats_desc[i].format);
 
+       for (i = 0; i < NUM_PCIE_PERF_COUNTERS; i++)
+               strcpy(data + (idx++) * ETH_GSTRING_LEN,
+                      pcie_perf_stats_desc[i].format);
+
+       for (i = 0; i < NUM_PCIE_TAS_COUNTERS; i++)
+               strcpy(data + (idx++) * ETH_GSTRING_LEN,
+                      pcie_tas_stats_desc[i].format);
+
        for (prio = 0; prio < NUM_PPORT_PRIO; prio++) {
                for (i = 0; i < NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS; i++)
                        sprintf(data + (idx++) * ETH_GSTRING_LEN,
@@ -237,6 +251,13 @@ static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, uint8_t *data)
                }
        }
 
+       /* port module event counters */
+       for (i = 0; i < ARRAY_SIZE(mlx5e_pme_status_desc); i++)
+               strcpy(data + (idx++) * ETH_GSTRING_LEN, mlx5e_pme_status_desc[i].format);
+
+       for (i = 0; i < ARRAY_SIZE(mlx5e_pme_error_desc); i++)
+               strcpy(data + (idx++) * ETH_GSTRING_LEN, mlx5e_pme_error_desc[i].format);
+
        if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
                return;
 
@@ -267,6 +288,9 @@ static void mlx5e_get_strings(struct net_device *dev,
                break;
 
        case ETH_SS_TEST:
+               for (i = 0; i < mlx5e_self_test_num(priv); i++)
+                       strcpy(data + i * ETH_GSTRING_LEN,
+                              mlx5e_self_tests[i]);
                break;
 
        case ETH_SS_STATS:
@@ -279,6 +303,7 @@ static void mlx5e_get_ethtool_stats(struct net_device *dev,
                                    struct ethtool_stats *stats, u64 *data)
 {
        struct mlx5e_priv *priv = netdev_priv(dev);
+       struct mlx5_priv *mlx5_priv;
        int i, j, tc, prio, idx = 0;
        unsigned long pfc_combined;
 
@@ -314,6 +339,14 @@ static void mlx5e_get_ethtool_stats(struct net_device *dev,
                data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.RFC_2819_counters,
                                                  pport_2819_stats_desc, i);
 
+       for (i = 0; i < NUM_PCIE_PERF_COUNTERS; i++)
+               data[idx++] = MLX5E_READ_CTR32_BE(&priv->stats.pcie.pcie_perf_counters,
+                                                 pcie_perf_stats_desc, i);
+
+       for (i = 0; i < NUM_PCIE_TAS_COUNTERS; i++)
+               data[idx++] = MLX5E_READ_CTR32_BE(&priv->stats.pcie.pcie_tas_counters,
+                                                 pcie_tas_stats_desc, i);
+
        for (prio = 0; prio < NUM_PPORT_PRIO; prio++) {
                for (i = 0; i < NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS; i++)
                        data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[prio],
@@ -335,6 +368,16 @@ static void mlx5e_get_ethtool_stats(struct net_device *dev,
                }
        }
 
+       /* port module event counters */
+       mlx5_priv =  &priv->mdev->priv;
+       for (i = 0; i < ARRAY_SIZE(mlx5e_pme_status_desc); i++)
+               data[idx++] = MLX5E_READ_CTR64_CPU(mlx5_priv->pme_stats.status_counters,
+                                                  mlx5e_pme_status_desc, i);
+
+       for (i = 0; i < ARRAY_SIZE(mlx5e_pme_error_desc); i++)
+               data[idx++] = MLX5E_READ_CTR64_CPU(mlx5_priv->pme_stats.error_counters,
+                                                  mlx5e_pme_error_desc, i);
+
        if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
                return;
 
@@ -456,8 +499,7 @@ static int mlx5e_set_ringparam(struct net_device *dev,
                return -EINVAL;
        }
 
-       num_mtts = MLX5E_REQUIRED_MTTS(priv->params.num_channels,
-                                      rx_pending_wqes);
+       num_mtts = MLX5E_REQUIRED_MTTS(rx_pending_wqes);
        if (priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ &&
            !MLX5E_VALID_NUM_MTTS(num_mtts)) {
                netdev_info(dev, "%s: rx_pending (%d) request can't be satisfied, try to reduce.\n",
@@ -522,7 +564,6 @@ static int mlx5e_set_channels(struct net_device *dev,
        unsigned int count = ch->combined_count;
        bool arfs_enabled;
        bool was_opened;
-       u32 num_mtts;
        int err = 0;
 
        if (!count) {
@@ -541,14 +582,6 @@ static int mlx5e_set_channels(struct net_device *dev,
                return -EINVAL;
        }
 
-       num_mtts = MLX5E_REQUIRED_MTTS(count, BIT(priv->params.log_rq_size));
-       if (priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ &&
-           !MLX5E_VALID_NUM_MTTS(num_mtts)) {
-               netdev_info(dev, "%s: rx count (%d) request can't be satisfied, try to reduce.\n",
-                           __func__, count);
-               return -EINVAL;
-       }
-
        if (priv->params.num_channels == count)
                return 0;
 
@@ -1438,6 +1471,35 @@ static int set_pflag_rx_cqe_based_moder(struct net_device *netdev, bool enable)
        return err;
 }
 
+static int set_pflag_rx_cqe_compress(struct net_device *netdev,
+                                    bool enable)
+{
+       struct mlx5e_priv *priv = netdev_priv(netdev);
+       struct mlx5_core_dev *mdev = priv->mdev;
+       int err = 0;
+       bool reset;
+
+       if (!MLX5_CAP_GEN(mdev, cqe_compression))
+               return -ENOTSUPP;
+
+       if (enable && priv->tstamp.hwtstamp_config.rx_filter != HWTSTAMP_FILTER_NONE) {
+               netdev_err(netdev, "Can't enable cqe compression while timestamping is enabled.\n");
+               return -EINVAL;
+       }
+
+       reset = test_bit(MLX5E_STATE_OPENED, &priv->state);
+
+       if (reset)
+               mlx5e_close_locked(netdev);
+
+       MLX5E_SET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_COMPRESS, enable);
+       priv->params.rx_cqe_compress_def = enable;
+
+       if (reset)
+               err = mlx5e_open_locked(netdev);
+       return err;
+}
+
 static int mlx5e_handle_pflag(struct net_device *netdev,
                              u32 wanted_flags,
                              enum mlx5e_priv_flag flag,
@@ -1445,7 +1507,7 @@ static int mlx5e_handle_pflag(struct net_device *netdev,
 {
        struct mlx5e_priv *priv = netdev_priv(netdev);
        bool enable = !!(wanted_flags & flag);
-       u32 changes = wanted_flags ^ priv->pflags;
+       u32 changes = wanted_flags ^ priv->params.pflags;
        int err;
 
        if (!(changes & flag))
@@ -1458,7 +1520,7 @@ static int mlx5e_handle_pflag(struct net_device *netdev,
                return err;
        }
 
-       MLX5E_SET_PRIV_FLAG(priv, flag, enable);
+       MLX5E_SET_PFLAG(priv, flag, enable);
        return 0;
 }
 
@@ -1468,20 +1530,26 @@ static int mlx5e_set_priv_flags(struct net_device *netdev, u32 pflags)
        int err;
 
        mutex_lock(&priv->state_lock);
-
        err = mlx5e_handle_pflag(netdev, pflags,
                                 MLX5E_PFLAG_RX_CQE_BASED_MODER,
                                 set_pflag_rx_cqe_based_moder);
+       if (err)
+               goto out;
+
+       err = mlx5e_handle_pflag(netdev, pflags,
+                                MLX5E_PFLAG_RX_CQE_COMPRESS,
+                                set_pflag_rx_cqe_compress);
 
+out:
        mutex_unlock(&priv->state_lock);
-       return err ? -EINVAL : 0;
+       return err;
 }
 
 static u32 mlx5e_get_priv_flags(struct net_device *netdev)
 {
        struct mlx5e_priv *priv = netdev_priv(netdev);
 
-       return priv->pflags;
+       return priv->params.pflags;
 }
 
 static int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
@@ -1535,5 +1603,6 @@ const struct ethtool_ops mlx5e_ethtool_ops = {
        .get_module_info   = mlx5e_get_module_info,
        .get_module_eeprom = mlx5e_get_module_eeprom,
        .get_priv_flags    = mlx5e_get_priv_flags,
-       .set_priv_flags    = mlx5e_set_priv_flags
+       .set_priv_flags    = mlx5e_set_priv_flags,
+       .self_test         = mlx5e_self_test,
 };
index 36fbc6b21a335dd32b7703152e5ead0f40e6d520..1fe80de5d68f1f3cf09c6e32530cd32114580051 100644 (file)
@@ -158,9 +158,14 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv,
                                 enum mlx5e_vlan_rule_type rule_type,
                                 u16 vid, struct mlx5_flow_spec *spec)
 {
+       struct mlx5_flow_act flow_act = {
+               .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+               .flow_tag = MLX5_FS_DEFAULT_FLOW_TAG,
+               .encap_id = 0,
+       };
        struct mlx5_flow_table *ft = priv->fs.vlan.ft.t;
        struct mlx5_flow_destination dest;
-       struct mlx5_flow_rule **rule_p;
+       struct mlx5_flow_handle **rule_p;
        int err = 0;
 
        dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
@@ -187,10 +192,7 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv,
                break;
        }
 
-       *rule_p = mlx5_add_flow_rule(ft, spec,
-                                    MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
-                                    MLX5_FS_DEFAULT_FLOW_TAG,
-                                    &dest);
+       *rule_p = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
 
        if (IS_ERR(*rule_p)) {
                err = PTR_ERR(*rule_p);
@@ -229,20 +231,20 @@ static void mlx5e_del_vlan_rule(struct mlx5e_priv *priv,
        switch (rule_type) {
        case MLX5E_VLAN_RULE_TYPE_UNTAGGED:
                if (priv->fs.vlan.untagged_rule) {
-                       mlx5_del_flow_rule(priv->fs.vlan.untagged_rule);
+                       mlx5_del_flow_rules(priv->fs.vlan.untagged_rule);
                        priv->fs.vlan.untagged_rule = NULL;
                }
                break;
        case MLX5E_VLAN_RULE_TYPE_ANY_VID:
                if (priv->fs.vlan.any_vlan_rule) {
-                       mlx5_del_flow_rule(priv->fs.vlan.any_vlan_rule);
+                       mlx5_del_flow_rules(priv->fs.vlan.any_vlan_rule);
                        priv->fs.vlan.any_vlan_rule = NULL;
                }
                break;
        case MLX5E_VLAN_RULE_TYPE_MATCH_VID:
                mlx5e_vport_context_update_vlans(priv);
                if (priv->fs.vlan.active_vlans_rule[vid]) {
-                       mlx5_del_flow_rule(priv->fs.vlan.active_vlans_rule[vid]);
+                       mlx5_del_flow_rules(priv->fs.vlan.active_vlans_rule[vid]);
                        priv->fs.vlan.active_vlans_rule[vid] = NULL;
                }
                mlx5e_vport_context_update_vlans(priv);
@@ -560,7 +562,7 @@ static void mlx5e_cleanup_ttc_rules(struct mlx5e_ttc_table *ttc)
 
        for (i = 0; i < MLX5E_NUM_TT; i++) {
                if (!IS_ERR_OR_NULL(ttc->rules[i])) {
-                       mlx5_del_flow_rule(ttc->rules[i]);
+                       mlx5_del_flow_rules(ttc->rules[i]);
                        ttc->rules[i] = NULL;
                }
        }
@@ -616,13 +618,19 @@ static struct {
        },
 };
 
-static struct mlx5_flow_rule *mlx5e_generate_ttc_rule(struct mlx5e_priv *priv,
-                                                     struct mlx5_flow_table *ft,
-                                                     struct mlx5_flow_destination *dest,
-                                                     u16 etype,
-                                                     u8 proto)
+static struct mlx5_flow_handle *
+mlx5e_generate_ttc_rule(struct mlx5e_priv *priv,
+                       struct mlx5_flow_table *ft,
+                       struct mlx5_flow_destination *dest,
+                       u16 etype,
+                       u8 proto)
 {
-       struct mlx5_flow_rule *rule;
+       struct mlx5_flow_act flow_act = {
+               .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+               .flow_tag = MLX5_FS_DEFAULT_FLOW_TAG,
+               .encap_id = 0,
+       };
+       struct mlx5_flow_handle *rule;
        struct mlx5_flow_spec *spec;
        int err = 0;
 
@@ -643,10 +651,7 @@ static struct mlx5_flow_rule *mlx5e_generate_ttc_rule(struct mlx5e_priv *priv,
                MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, etype);
        }
 
-       rule = mlx5_add_flow_rule(ft, spec,
-                                 MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
-                                 MLX5_FS_DEFAULT_FLOW_TAG,
-                                 dest);
+       rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, 1);
        if (IS_ERR(rule)) {
                err = PTR_ERR(rule);
                netdev_err(priv->netdev, "%s: add rule failed\n", __func__);
@@ -660,7 +665,7 @@ static int mlx5e_generate_ttc_table_rules(struct mlx5e_priv *priv)
 {
        struct mlx5_flow_destination dest;
        struct mlx5e_ttc_table *ttc;
-       struct mlx5_flow_rule **rules;
+       struct mlx5_flow_handle **rules;
        struct mlx5_flow_table *ft;
        int tt;
        int err;
@@ -776,7 +781,7 @@ static int mlx5e_create_ttc_table(struct mlx5e_priv *priv)
        int err;
 
        ft->t = mlx5_create_flow_table(priv->fs.ns, MLX5E_NIC_PRIO,
-                                      MLX5E_TTC_TABLE_SIZE, MLX5E_TTC_FT_LEVEL);
+                                      MLX5E_TTC_TABLE_SIZE, MLX5E_TTC_FT_LEVEL, 0);
        if (IS_ERR(ft->t)) {
                err = PTR_ERR(ft->t);
                ft->t = NULL;
@@ -801,7 +806,7 @@ static void mlx5e_del_l2_flow_rule(struct mlx5e_priv *priv,
                                   struct mlx5e_l2_rule *ai)
 {
        if (!IS_ERR_OR_NULL(ai->rule)) {
-               mlx5_del_flow_rule(ai->rule);
+               mlx5_del_flow_rules(ai->rule);
                ai->rule = NULL;
        }
 }
@@ -809,6 +814,11 @@ static void mlx5e_del_l2_flow_rule(struct mlx5e_priv *priv,
 static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv,
                                  struct mlx5e_l2_rule *ai, int type)
 {
+       struct mlx5_flow_act flow_act = {
+               .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+               .flow_tag = MLX5_FS_DEFAULT_FLOW_TAG,
+               .encap_id = 0,
+       };
        struct mlx5_flow_table *ft = priv->fs.l2.ft.t;
        struct mlx5_flow_destination dest;
        struct mlx5_flow_spec *spec;
@@ -847,9 +857,7 @@ static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv,
                break;
        }
 
-       ai->rule = mlx5_add_flow_rule(ft, spec,
-                                     MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
-                                     MLX5_FS_DEFAULT_FLOW_TAG, &dest);
+       ai->rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
        if (IS_ERR(ai->rule)) {
                netdev_err(priv->netdev, "%s: add l2 rule(mac:%pM) failed\n",
                           __func__, mv_dmac);
@@ -947,7 +955,7 @@ static int mlx5e_create_l2_table(struct mlx5e_priv *priv)
 
        ft->num_groups = 0;
        ft->t = mlx5_create_flow_table(priv->fs.ns, MLX5E_NIC_PRIO,
-                                      MLX5E_L2_TABLE_SIZE, MLX5E_L2_FT_LEVEL);
+                                      MLX5E_L2_TABLE_SIZE, MLX5E_L2_FT_LEVEL, 0);
 
        if (IS_ERR(ft->t)) {
                err = PTR_ERR(ft->t);
@@ -1037,7 +1045,7 @@ static int mlx5e_create_vlan_table(struct mlx5e_priv *priv)
 
        ft->num_groups = 0;
        ft->t = mlx5_create_flow_table(priv->fs.ns, MLX5E_NIC_PRIO,
-                                      MLX5E_VLAN_TABLE_SIZE, MLX5E_VLAN_FT_LEVEL);
+                                      MLX5E_VLAN_TABLE_SIZE, MLX5E_VLAN_FT_LEVEL, 0);
 
        if (IS_ERR(ft->t)) {
                err = PTR_ERR(ft->t);
index d17c242279003bcc5a867b464c6f531c2fd6ff7e..3691451c728c0c4731e4fbc7946a8549490edc4d 100644 (file)
@@ -36,7 +36,7 @@
 struct mlx5e_ethtool_rule {
        struct list_head             list;
        struct ethtool_rx_flow_spec  flow_spec;
-       struct mlx5_flow_rule        *rule;
+       struct mlx5_flow_handle      *rule;
        struct mlx5e_ethtool_table   *eth_ft;
 };
 
@@ -99,7 +99,7 @@ static struct mlx5e_ethtool_table *get_flow_table(struct mlx5e_priv *priv,
                           MLX5E_ETHTOOL_NUM_ENTRIES);
        ft = mlx5_create_auto_grouped_flow_table(ns, prio,
                                                 table_size,
-                                                MLX5E_ETHTOOL_NUM_GROUPS, 0);
+                                                MLX5E_ETHTOOL_NUM_GROUPS, 0, 0);
        if (IS_ERR(ft))
                return (void *)ft;
 
@@ -284,15 +284,16 @@ static bool outer_header_zero(u32 *match_criteria)
                                                  size - 1);
 }
 
-static struct mlx5_flow_rule *add_ethtool_flow_rule(struct mlx5e_priv *priv,
-                                                   struct mlx5_flow_table *ft,
-                                                   struct ethtool_rx_flow_spec *fs)
+static struct mlx5_flow_handle *
+add_ethtool_flow_rule(struct mlx5e_priv *priv,
+                     struct mlx5_flow_table *ft,
+                     struct ethtool_rx_flow_spec *fs)
 {
        struct mlx5_flow_destination *dst = NULL;
+       struct mlx5_flow_act flow_act = {0};
        struct mlx5_flow_spec *spec;
-       struct mlx5_flow_rule *rule;
+       struct mlx5_flow_handle *rule;
        int err = 0;
-       u32 action;
 
        spec = mlx5_vzalloc(sizeof(*spec));
        if (!spec)
@@ -303,7 +304,7 @@ static struct mlx5_flow_rule *add_ethtool_flow_rule(struct mlx5e_priv *priv,
                goto free;
 
        if (fs->ring_cookie == RX_CLS_FLOW_DISC) {
-               action = MLX5_FLOW_CONTEXT_ACTION_DROP;
+               flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
        } else {
                dst = kzalloc(sizeof(*dst), GFP_KERNEL);
                if (!dst) {
@@ -313,12 +314,12 @@ static struct mlx5_flow_rule *add_ethtool_flow_rule(struct mlx5e_priv *priv,
 
                dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR;
                dst->tir_num = priv->direct_tir[fs->ring_cookie].tirn;
-               action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+               flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
        }
 
        spec->match_criteria_enable = (!outer_header_zero(spec->match_criteria));
-       rule = mlx5_add_flow_rule(ft, spec, action,
-                                 MLX5_FS_DEFAULT_FLOW_TAG, dst);
+       flow_act.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
+       rule = mlx5_add_flow_rules(ft, spec, &flow_act, dst, 1);
        if (IS_ERR(rule)) {
                err = PTR_ERR(rule);
                netdev_err(priv->netdev, "%s: failed to add ethtool steering rule: %d\n",
@@ -335,7 +336,7 @@ static void del_ethtool_rule(struct mlx5e_priv *priv,
                             struct mlx5e_ethtool_rule *eth_rule)
 {
        if (eth_rule->rule)
-               mlx5_del_flow_rule(eth_rule->rule);
+               mlx5_del_flow_rules(eth_rule->rule);
        list_del(&eth_rule->list);
        priv->fs.ethtool.tot_num_rules--;
        put_flow_table(eth_rule->eth_ft);
@@ -475,7 +476,7 @@ int mlx5e_ethtool_flow_replace(struct mlx5e_priv *priv,
 {
        struct mlx5e_ethtool_table *eth_ft;
        struct mlx5e_ethtool_rule *eth_rule;
-       struct mlx5_flow_rule *rule;
+       struct mlx5_flow_handle *rule;
        int num_tuples;
        int err;
 
index 03183eba7003e2901804d05412cd2a12f7c3bab0..9def5cc378a3ae0f955785f0c93f6fa9c2720da7 100644 (file)
@@ -84,7 +84,8 @@ static void mlx5e_set_rq_type_params(struct mlx5e_priv *priv, u8 rq_type)
        switch (priv->params.rq_wq_type) {
        case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
                priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW;
-               priv->params.mpwqe_log_stride_sz = priv->params.rx_cqe_compress ?
+               priv->params.mpwqe_log_stride_sz =
+                       MLX5E_GET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_COMPRESS) ?
                        MLX5_MPWRQ_LOG_STRIDE_SIZE_CQE_COMPRESS :
                        MLX5_MPWRQ_LOG_STRIDE_SIZE;
                priv->params.mpwqe_log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ -
@@ -101,7 +102,7 @@ static void mlx5e_set_rq_type_params(struct mlx5e_priv *priv, u8 rq_type)
                       priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ,
                       BIT(priv->params.log_rq_size),
                       BIT(priv->params.mpwqe_log_stride_sz),
-                      priv->params.rx_cqe_compress_admin);
+                      MLX5E_GET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_COMPRESS));
 }
 
 static void mlx5e_set_rq_priv_params(struct mlx5e_priv *priv)
@@ -290,12 +291,36 @@ static void mlx5e_update_q_counter(struct mlx5e_priv *priv)
                                      &qcnt->rx_out_of_buffer);
 }
 
+static void mlx5e_update_pcie_counters(struct mlx5e_priv *priv)
+{
+       struct mlx5e_pcie_stats *pcie_stats = &priv->stats.pcie;
+       struct mlx5_core_dev *mdev = priv->mdev;
+       int sz = MLX5_ST_SZ_BYTES(mpcnt_reg);
+       void *out;
+       u32 *in;
+
+       in = mlx5_vzalloc(sz);
+       if (!in)
+               return;
+
+       out = pcie_stats->pcie_perf_counters;
+       MLX5_SET(mpcnt_reg, in, grp, MLX5_PCIE_PERFORMANCE_COUNTERS_GROUP);
+       mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_MPCNT, 0, 0);
+
+       out = pcie_stats->pcie_tas_counters;
+       MLX5_SET(mpcnt_reg, in, grp, MLX5_PCIE_TIMERS_AND_STATES_COUNTERS_GROUP);
+       mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_MPCNT, 0, 0);
+
+       kvfree(in);
+}
+
 void mlx5e_update_stats(struct mlx5e_priv *priv)
 {
        mlx5e_update_q_counter(priv);
        mlx5e_update_vport_counters(priv);
        mlx5e_update_pport_counters(priv);
        mlx5e_update_sw_counters(priv);
+       mlx5e_update_pcie_counters(priv);
 }
 
 void mlx5e_update_stats_work(struct work_struct *work)
@@ -446,14 +471,50 @@ static void mlx5e_rq_free_mpwqe_info(struct mlx5e_rq *rq)
        kfree(rq->mpwqe.info);
 }
 
-static bool mlx5e_is_vf_vport_rep(struct mlx5e_priv *priv)
+static int mlx5e_create_umr_mkey(struct mlx5e_priv *priv,
+                                u64 npages, u8 page_shift,
+                                struct mlx5_core_mkey *umr_mkey)
 {
-       struct mlx5_eswitch_rep *rep = (struct mlx5_eswitch_rep *)priv->ppriv;
+       struct mlx5_core_dev *mdev = priv->mdev;
+       int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
+       void *mkc;
+       u32 *in;
+       int err;
+
+       if (!MLX5E_VALID_NUM_MTTS(npages))
+               return -EINVAL;
+
+       in = mlx5_vzalloc(inlen);
+       if (!in)
+               return -ENOMEM;
+
+       mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+
+       MLX5_SET(mkc, mkc, free, 1);
+       MLX5_SET(mkc, mkc, umr_en, 1);
+       MLX5_SET(mkc, mkc, lw, 1);
+       MLX5_SET(mkc, mkc, lr, 1);
+       MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_MTT);
+
+       MLX5_SET(mkc, mkc, qpn, 0xffffff);
+       MLX5_SET(mkc, mkc, pd, mdev->mlx5e_res.pdn);
+       MLX5_SET64(mkc, mkc, len, npages << page_shift);
+       MLX5_SET(mkc, mkc, translations_octword_size,
+                MLX5_MTT_OCTW(npages));
+       MLX5_SET(mkc, mkc, log_page_size, page_shift);
+
+       err = mlx5_core_create_mkey(mdev, umr_mkey, in, inlen);
+
+       kvfree(in);
+       return err;
+}
 
-       if (rep && rep->vport != FDB_UPLINK_VPORT)
-               return true;
+static int mlx5e_create_rq_umr_mkey(struct mlx5e_rq *rq)
+{
+       struct mlx5e_priv *priv = rq->priv;
+       u64 num_mtts = MLX5E_REQUIRED_MTTS(BIT(priv->params.log_rq_size));
 
-       return false;
+       return mlx5e_create_umr_mkey(priv, num_mtts, PAGE_SHIFT, &rq->umr_mkey);
 }
 
 static int mlx5e_create_rq(struct mlx5e_channel *c,
@@ -489,7 +550,13 @@ static int mlx5e_create_rq(struct mlx5e_channel *c,
        rq->channel = c;
        rq->ix      = c->ix;
        rq->priv    = c->priv;
-       rq->xdp_prog = priv->xdp_prog;
+
+       rq->xdp_prog = priv->xdp_prog ? bpf_prog_inc(priv->xdp_prog) : NULL;
+       if (IS_ERR(rq->xdp_prog)) {
+               err = PTR_ERR(rq->xdp_prog);
+               rq->xdp_prog = NULL;
+               goto err_rq_wq_destroy;
+       }
 
        rq->buff.map_dir = DMA_FROM_DEVICE;
        if (rq->xdp_prog)
@@ -506,18 +573,20 @@ static int mlx5e_create_rq(struct mlx5e_channel *c,
                rq->alloc_wqe = mlx5e_alloc_rx_mpwqe;
                rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe;
 
-               rq->mpwqe.mtt_offset = c->ix *
-                       MLX5E_REQUIRED_MTTS(1, BIT(priv->params.log_rq_size));
-
                rq->mpwqe_stride_sz = BIT(priv->params.mpwqe_log_stride_sz);
                rq->mpwqe_num_strides = BIT(priv->params.mpwqe_log_num_strides);
 
                rq->buff.wqe_sz = rq->mpwqe_stride_sz * rq->mpwqe_num_strides;
                byte_count = rq->buff.wqe_sz;
-               rq->mkey_be = cpu_to_be32(c->priv->umr_mkey.key);
-               err = mlx5e_rq_alloc_mpwqe_info(rq, c);
+
+               err = mlx5e_create_rq_umr_mkey(rq);
                if (err)
                        goto err_rq_wq_destroy;
+               rq->mkey_be = cpu_to_be32(rq->umr_mkey.key);
+
+               err = mlx5e_rq_alloc_mpwqe_info(rq, c);
+               if (err)
+                       goto err_destroy_umr_mkey;
                break;
        default: /* MLX5_WQ_TYPE_LINKED_LIST */
                rq->dma_info = kzalloc_node(wq_sz * sizeof(*rq->dma_info),
@@ -566,12 +635,14 @@ static int mlx5e_create_rq(struct mlx5e_channel *c,
        rq->page_cache.head = 0;
        rq->page_cache.tail = 0;
 
-       if (rq->xdp_prog)
-               bpf_prog_add(rq->xdp_prog, 1);
-
        return 0;
 
+err_destroy_umr_mkey:
+       mlx5_core_destroy_mkey(mdev, &rq->umr_mkey);
+
 err_rq_wq_destroy:
+       if (rq->xdp_prog)
+               bpf_prog_put(rq->xdp_prog);
        mlx5_wq_destroy(&rq->wq_ctrl);
 
        return err;
@@ -587,6 +658,7 @@ static void mlx5e_destroy_rq(struct mlx5e_rq *rq)
        switch (rq->wq_type) {
        case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
                mlx5e_rq_free_mpwqe_info(rq);
+               mlx5_core_destroy_mkey(rq->priv->mdev, &rq->umr_mkey);
                break;
        default: /* MLX5_WQ_TYPE_LINKED_LIST */
                kfree(rq->dma_info);
@@ -938,7 +1010,7 @@ static int mlx5e_create_sq(struct mlx5e_channel *c,
        sq->bf_buf_size = (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2;
        sq->max_inline  = param->max_inline;
        sq->min_inline_mode =
-               MLX5_CAP_ETH(mdev, wqe_inline_mode) == MLX5E_INLINE_MODE_VPORT_CONTEXT ?
+               MLX5_CAP_ETH(mdev, wqe_inline_mode) == MLX5_CAP_INLINE_MODE_VPORT_CONTEXT ?
                param->min_inline_mode : 0;
 
        err = mlx5e_alloc_sq_db(sq, cpu_to_node(c->cpu));
@@ -1181,7 +1253,7 @@ static int mlx5e_create_cq(struct mlx5e_channel *c,
 
 static void mlx5e_destroy_cq(struct mlx5e_cq *cq)
 {
-       mlx5_wq_destroy(&cq->wq_ctrl);
+       mlx5_cqwq_destroy(&cq->wq_ctrl);
 }
 
 static int mlx5e_enable_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param)
@@ -1198,7 +1270,7 @@ static int mlx5e_enable_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param)
        int err;
 
        inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
-               sizeof(u64) * cq->wq_ctrl.buf.npages;
+               sizeof(u64) * cq->wq_ctrl.frag_buf.npages;
        in = mlx5_vzalloc(inlen);
        if (!in)
                return -ENOMEM;
@@ -1207,15 +1279,15 @@ static int mlx5e_enable_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param)
 
        memcpy(cqc, param->cqc, sizeof(param->cqc));
 
-       mlx5_fill_page_array(&cq->wq_ctrl.buf,
-                            (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas));
+       mlx5_fill_page_frag_array(&cq->wq_ctrl.frag_buf,
+                                 (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas));
 
        mlx5_vector2eqn(mdev, param->eq_ix, &eqn, &irqn_not_used);
 
        MLX5_SET(cqc,   cqc, cq_period_mode, param->cq_period_mode);
        MLX5_SET(cqc,   cqc, c_eqn,         eqn);
        MLX5_SET(cqc,   cqc, uar_page,      mcq->uar->index);
-       MLX5_SET(cqc,   cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
+       MLX5_SET(cqc,   cqc, log_page_size, cq->wq_ctrl.frag_buf.page_shift -
                                            MLX5_ADAPTER_PAGE_SHIFT);
        MLX5_SET64(cqc, cqc, dbr_addr,      cq->wq_ctrl.db.dma);
 
@@ -1445,6 +1517,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
        c->netdev   = priv->netdev;
        c->mkey_be  = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key);
        c->num_tc   = priv->params.num_tc;
+       c->xdp      = !!priv->xdp_prog;
 
        if (priv->params.rx_am_enabled)
                rx_cq_profile = mlx5e_am_get_def_profile(priv->params.rx_cq_period_mode);
@@ -1468,6 +1541,12 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
        if (err)
                goto err_close_tx_cqs;
 
+       /* XDP SQ CQ params are same as normal TXQ sq CQ params */
+       err = c->xdp ? mlx5e_open_cq(c, &cparam->tx_cq, &c->xdp_sq.cq,
+                                    priv->params.tx_cq_moderation) : 0;
+       if (err)
+               goto err_close_rx_cq;
+
        napi_enable(&c->napi);
 
        err = mlx5e_open_sq(c, 0, &cparam->icosq, &c->icosq);
@@ -1488,21 +1567,10 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
                }
        }
 
-       if (priv->xdp_prog) {
-               /* XDP SQ CQ params are same as normal TXQ sq CQ params */
-               err = mlx5e_open_cq(c, &cparam->tx_cq, &c->xdp_sq.cq,
-                                   priv->params.tx_cq_moderation);
-               if (err)
-                       goto err_close_sqs;
-
-               err = mlx5e_open_sq(c, 0, &cparam->xdp_sq, &c->xdp_sq);
-               if (err) {
-                       mlx5e_close_cq(&c->xdp_sq.cq);
-                       goto err_close_sqs;
-               }
-       }
+       err = c->xdp ? mlx5e_open_sq(c, 0, &cparam->xdp_sq, &c->xdp_sq) : 0;
+       if (err)
+               goto err_close_sqs;
 
-       c->xdp = !!priv->xdp_prog;
        err = mlx5e_open_rq(c, &cparam->rq, &c->rq);
        if (err)
                goto err_close_xdp_sq;
@@ -1512,7 +1580,8 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
 
        return 0;
 err_close_xdp_sq:
-       mlx5e_close_sq(&c->xdp_sq);
+       if (c->xdp)
+               mlx5e_close_sq(&c->xdp_sq);
 
 err_close_sqs:
        mlx5e_close_sqs(c);
@@ -1522,6 +1591,10 @@ err_close_icosq:
 
 err_disable_napi:
        napi_disable(&c->napi);
+       if (c->xdp)
+               mlx5e_close_cq(&c->xdp_sq.cq);
+
+err_close_rx_cq:
        mlx5e_close_cq(&c->rq.cq);
 
 err_close_tx_cqs:
@@ -1532,7 +1605,6 @@ err_close_icosq_cq:
 
 err_napi_del:
        netif_napi_del(&c->napi);
-       napi_hash_del(&c->napi);
        kfree(c);
 
        return err;
@@ -1553,9 +1625,6 @@ static void mlx5e_close_channel(struct mlx5e_channel *c)
        mlx5e_close_cq(&c->icosq.cq);
        netif_napi_del(&c->napi);
 
-       napi_hash_del(&c->napi);
-       synchronize_rcu();
-
        kfree(c);
 }
 
@@ -1648,7 +1717,7 @@ static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
        }
 
        MLX5_SET(cqc, cqc, log_cq_size, log_cq_size);
-       if (priv->params.rx_cqe_compress) {
+       if (MLX5E_GET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_COMPRESS)) {
                MLX5_SET(cqc, cqc, mini_cqe_res_format, MLX5_CQE_FORMAT_CSUM);
                MLX5_SET(cqc, cqc, cqe_comp_en, 1);
        }
@@ -1971,9 +2040,7 @@ static void mlx5e_build_tir_ctx_lro(void *tirc, struct mlx5e_priv *priv)
        MLX5_SET(tirc, tirc, lro_max_ip_payload_size,
                 (priv->params.lro_wqe_sz -
                  ROUGH_MAX_L2_L3_HDR_SZ) >> 8);
-       MLX5_SET(tirc, tirc, lro_timeout_period_usecs,
-                MLX5_CAP_ETH(priv->mdev,
-                             lro_timer_supported_periods[2]));
+       MLX5_SET(tirc, tirc, lro_timeout_period_usecs, priv->params.lro_timeout);
 }
 
 void mlx5e_build_tir_ctx_hash(void *tirc, struct mlx5e_priv *priv)
@@ -2122,7 +2189,7 @@ int mlx5e_open_locked(struct net_device *netdev)
                goto err_clear_state_opened_flag;
        }
 
-       err = mlx5e_refresh_tirs_self_loopback_enable(priv->mdev);
+       err = mlx5e_refresh_tirs_self_loopback(priv->mdev, false);
        if (err) {
                netdev_err(netdev, "%s: mlx5e_refresh_tirs_self_loopback_enable failed, %d\n",
                           __func__, err);
@@ -2640,7 +2707,7 @@ mqprio:
        return mlx5e_setup_tc(dev, tc->tc);
 }
 
-struct rtnl_link_stats64 *
+static struct rtnl_link_stats64 *
 mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
 {
        struct mlx5e_priv *priv = netdev_priv(dev);
@@ -2648,13 +2715,20 @@ mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
        struct mlx5e_vport_stats *vstats = &priv->stats.vport;
        struct mlx5e_pport_stats *pstats = &priv->stats.pport;
 
-       stats->rx_packets = sstats->rx_packets;
-       stats->rx_bytes   = sstats->rx_bytes;
-       stats->tx_packets = sstats->tx_packets;
-       stats->tx_bytes   = sstats->tx_bytes;
+       if (mlx5e_is_uplink_rep(priv)) {
+               stats->rx_packets = PPORT_802_3_GET(pstats, a_frames_received_ok);
+               stats->rx_bytes   = PPORT_802_3_GET(pstats, a_octets_received_ok);
+               stats->tx_packets = PPORT_802_3_GET(pstats, a_frames_transmitted_ok);
+               stats->tx_bytes   = PPORT_802_3_GET(pstats, a_octets_transmitted_ok);
+       } else {
+               stats->rx_packets = sstats->rx_packets;
+               stats->rx_bytes   = sstats->rx_bytes;
+               stats->tx_packets = sstats->tx_packets;
+               stats->tx_bytes   = sstats->tx_bytes;
+               stats->tx_dropped = sstats->tx_queue_dropped;
+       }
 
        stats->rx_dropped = priv->stats.qcnt.rx_out_of_buffer;
-       stats->tx_dropped = sstats->tx_queue_dropped;
 
        stats->rx_length_errors =
                PPORT_802_3_GET(pstats, a_in_range_length_errors) +
@@ -2927,6 +3001,20 @@ static int mlx5e_set_vf_trust(struct net_device *dev, int vf, bool setting)
 
        return mlx5_eswitch_set_vport_trust(mdev->priv.eswitch, vf + 1, setting);
 }
+
+static int mlx5e_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate,
+                            int max_tx_rate)
+{
+       struct mlx5e_priv *priv = netdev_priv(dev);
+       struct mlx5_core_dev *mdev = priv->mdev;
+
+       if (min_tx_rate)
+               return -EOPNOTSUPP;
+
+       return mlx5_eswitch_set_vport_rate(mdev->priv.eswitch, vf + 1,
+                                          max_tx_rate);
+}
+
 static int mlx5_vport_link2ifla(u8 esw_link)
 {
        switch (esw_link) {
@@ -2983,8 +3071,8 @@ static int mlx5e_get_vf_stats(struct net_device *dev,
                                            vf_stats);
 }
 
-static void mlx5e_add_vxlan_port(struct net_device *netdev,
-                                struct udp_tunnel_info *ti)
+void mlx5e_add_vxlan_port(struct net_device *netdev,
+                         struct udp_tunnel_info *ti)
 {
        struct mlx5e_priv *priv = netdev_priv(netdev);
 
@@ -2997,8 +3085,8 @@ static void mlx5e_add_vxlan_port(struct net_device *netdev,
        mlx5e_vxlan_queue_work(priv, ti->sa_family, be16_to_cpu(ti->port), 1);
 }
 
-static void mlx5e_del_vxlan_port(struct net_device *netdev,
-                                struct udp_tunnel_info *ti)
+void mlx5e_del_vxlan_port(struct net_device *netdev,
+                         struct udp_tunnel_info *ti)
 {
        struct mlx5e_priv *priv = netdev_priv(netdev);
 
@@ -3106,11 +3194,21 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
 
        if (was_opened && reset)
                mlx5e_close_locked(netdev);
+       if (was_opened && !reset) {
+               /* num_channels is invariant here, so we can take the
+                * batched reference right upfront.
+                */
+               prog = bpf_prog_add(prog, priv->params.num_channels);
+               if (IS_ERR(prog)) {
+                       err = PTR_ERR(prog);
+                       goto unlock;
+               }
+       }
 
-       /* exchange programs */
+       /* exchange programs, extra prog reference we got from caller
+        * as long as we don't fail from this point onwards.
+        */
        old_prog = xchg(&priv->xdp_prog, prog);
-       if (prog)
-               bpf_prog_add(prog, 1);
        if (old_prog)
                bpf_prog_put(old_prog);
 
@@ -3126,7 +3224,6 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
        /* exchanging programs w/o reset, we update ref counts on behalf
         * of the channels RQs here.
         */
-       bpf_prog_add(prog, priv->params.num_channels);
        for (i = 0; i < priv->params.num_channels; i++) {
                struct mlx5e_channel *c = priv->channel[i];
 
@@ -3234,6 +3331,7 @@ static const struct net_device_ops mlx5e_netdev_ops_sriov = {
        .ndo_set_vf_vlan         = mlx5e_set_vf_vlan,
        .ndo_set_vf_spoofchk     = mlx5e_set_vf_spoofchk,
        .ndo_set_vf_trust        = mlx5e_set_vf_trust,
+       .ndo_set_vf_rate         = mlx5e_set_vf_rate,
        .ndo_get_vf_config       = mlx5e_get_vf_config,
        .ndo_set_vf_link_state   = mlx5e_set_vf_link_state,
        .ndo_get_vf_stats        = mlx5e_get_vf_stats,
@@ -3242,6 +3340,8 @@ static const struct net_device_ops mlx5e_netdev_ops_sriov = {
 #ifdef CONFIG_NET_POLL_CONTROLLER
        .ndo_poll_controller     = mlx5e_netpoll,
 #endif
+       .ndo_has_offload_stats   = mlx5e_has_offload_stats,
+       .ndo_get_offload_stats   = mlx5e_get_offload_stats,
 };
 
 static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev)
@@ -3278,24 +3378,6 @@ u16 mlx5e_get_max_inline_cap(struct mlx5_core_dev *mdev)
               2 /*sizeof(mlx5e_tx_wqe.inline_hdr_start)*/;
 }
 
-#ifdef CONFIG_MLX5_CORE_EN_DCB
-static void mlx5e_ets_init(struct mlx5e_priv *priv)
-{
-       int i;
-
-       priv->params.ets.ets_cap = mlx5_max_tc(priv->mdev) + 1;
-       for (i = 0; i < priv->params.ets.ets_cap; i++) {
-               priv->params.ets.tc_tx_bw[i] = MLX5E_MAX_BW_ALLOC;
-               priv->params.ets.tc_tsa[i] = IEEE_8021QAZ_TSA_VENDOR;
-               priv->params.ets.prio_tc[i] = i;
-       }
-
-       /* tclass[prio=0]=1, tclass[prio=1]=0, tclass[prio=i]=i (for i>1) */
-       priv->params.ets.prio_tc[0] = 1;
-       priv->params.ets.prio_tc[1] = 0;
-}
-#endif
-
 void mlx5e_build_default_indir_rqt(struct mlx5_core_dev *mdev,
                                   u32 *indirection_rqt, int len,
                                   int num_channels)
@@ -3370,19 +3452,30 @@ static void mlx5e_query_min_inline(struct mlx5_core_dev *mdev,
                                   u8 *min_inline_mode)
 {
        switch (MLX5_CAP_ETH(mdev, wqe_inline_mode)) {
-       case MLX5E_INLINE_MODE_L2:
+       case MLX5_CAP_INLINE_MODE_L2:
                *min_inline_mode = MLX5_INLINE_MODE_L2;
                break;
-       case MLX5E_INLINE_MODE_VPORT_CONTEXT:
-               mlx5_query_nic_vport_min_inline(mdev,
-                                               min_inline_mode);
+       case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT:
+               mlx5_query_nic_vport_min_inline(mdev, 0, min_inline_mode);
                break;
-       case MLX5_INLINE_MODE_NOT_REQUIRED:
+       case MLX5_CAP_INLINE_MODE_NOT_REQUIRED:
                *min_inline_mode = MLX5_INLINE_MODE_NONE;
                break;
        }
 }
 
+u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout)
+{
+       int i;
+
+       /* The supported periods are organized in ascending order */
+       for (i = 0; i < MLX5E_LRO_TIMEOUT_ARR_SIZE - 1; i++)
+               if (MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]) >= wanted_timeout)
+                       break;
+
+       return MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]);
+}
+
 static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev,
                                        struct net_device *netdev,
                                        const struct mlx5e_profile *profile,
@@ -3401,20 +3494,22 @@ static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev,
        priv->profile                      = profile;
        priv->ppriv                        = ppriv;
 
+       priv->params.lro_timeout =
+               mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT);
+
        priv->params.log_sq_size = MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
 
        /* set CQE compression */
-       priv->params.rx_cqe_compress_admin = false;
+       priv->params.rx_cqe_compress_def = false;
        if (MLX5_CAP_GEN(mdev, cqe_compression) &&
            MLX5_CAP_GEN(mdev, vport_group_manager)) {
                mlx5e_get_max_linkspeed(mdev, &link_speed);
                mlx5e_get_pci_bw(mdev, &pci_bw);
                mlx5_core_dbg(mdev, "Max link speed = %d, PCI BW = %d\n",
                              link_speed, pci_bw);
-               priv->params.rx_cqe_compress_admin =
+               priv->params.rx_cqe_compress_def =
                        cqe_compress_heuristic(link_speed, pci_bw);
        }
-       priv->params.rx_cqe_compress = priv->params.rx_cqe_compress_admin;
 
        mlx5e_set_rq_priv_params(priv);
        if (priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
@@ -3445,12 +3540,9 @@ static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev,
                SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
 
        /* Initialize pflags */
-       MLX5E_SET_PRIV_FLAG(priv, MLX5E_PFLAG_RX_CQE_BASED_MODER,
-                           priv->params.rx_cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
-
-#ifdef CONFIG_MLX5_CORE_EN_DCB
-       mlx5e_ets_init(priv);
-#endif
+       MLX5E_SET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_BASED_MODER,
+                       priv->params.rx_cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
+       MLX5E_SET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_COMPRESS, priv->params.rx_cqe_compress_def);
 
        mutex_init(&priv->state_lock);
 
@@ -3488,7 +3580,8 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
        if (MLX5_CAP_GEN(mdev, vport_group_manager)) {
                netdev->netdev_ops = &mlx5e_netdev_ops_sriov;
 #ifdef CONFIG_MLX5_CORE_EN_DCB
-               netdev->dcbnl_ops = &mlx5e_dcbnl_ops;
+               if (MLX5_CAP_GEN(mdev, qos))
+                       netdev->dcbnl_ops = &mlx5e_dcbnl_ops;
 #endif
        } else {
                netdev->netdev_ops = &mlx5e_netdev_ops_basic;
@@ -3584,43 +3677,6 @@ static void mlx5e_destroy_q_counter(struct mlx5e_priv *priv)
        mlx5_core_dealloc_q_counter(priv->mdev, priv->q_counter);
 }
 
-static int mlx5e_create_umr_mkey(struct mlx5e_priv *priv)
-{
-       struct mlx5_core_dev *mdev = priv->mdev;
-       u64 npages = MLX5E_REQUIRED_MTTS(priv->profile->max_nch(mdev),
-                                        BIT(MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW));
-       int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
-       void *mkc;
-       u32 *in;
-       int err;
-
-       in = mlx5_vzalloc(inlen);
-       if (!in)
-               return -ENOMEM;
-
-       mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
-
-       npages = min_t(u32, ALIGN(U16_MAX, 4) * 2, npages);
-
-       MLX5_SET(mkc, mkc, free, 1);
-       MLX5_SET(mkc, mkc, umr_en, 1);
-       MLX5_SET(mkc, mkc, lw, 1);
-       MLX5_SET(mkc, mkc, lr, 1);
-       MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_MTT);
-
-       MLX5_SET(mkc, mkc, qpn, 0xffffff);
-       MLX5_SET(mkc, mkc, pd, mdev->mlx5e_res.pdn);
-       MLX5_SET64(mkc, mkc, len, npages << PAGE_SHIFT);
-       MLX5_SET(mkc, mkc, translations_octword_size,
-                MLX5_MTT_OCTW(npages));
-       MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT);
-
-       err = mlx5_core_create_mkey(mdev, &priv->umr_mkey, in, inlen);
-
-       kvfree(in);
-       return err;
-}
-
 static void mlx5e_nic_init(struct mlx5_core_dev *mdev,
                           struct net_device *netdev,
                           const struct mlx5e_profile *profile,
@@ -3642,6 +3698,9 @@ static void mlx5e_nic_cleanup(struct mlx5e_priv *priv)
 
        if (MLX5_CAP_GEN(mdev, vport_group_manager))
                mlx5_eswitch_unregister_vport_rep(esw, 0);
+
+       if (priv->xdp_prog)
+               bpf_prog_put(priv->xdp_prog);
 }
 
 static int mlx5e_init_nic_rx(struct mlx5e_priv *priv)
@@ -3724,7 +3783,7 @@ static int mlx5e_init_nic_tx(struct mlx5e_priv *priv)
        }
 
 #ifdef CONFIG_MLX5_CORE_EN_DCB
-       mlx5e_dcbnl_ieee_setets_core(priv, &priv->params.ets);
+       mlx5e_dcbnl_initialize(priv);
 #endif
        return 0;
 }
@@ -3752,7 +3811,7 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv)
                rep.load = mlx5e_nic_rep_load;
                rep.unload = mlx5e_nic_rep_unload;
                rep.vport = FDB_UPLINK_VPORT;
-               rep.priv_data = priv;
+               rep.netdev = netdev;
                mlx5_eswitch_register_vport_rep(esw, 0, &rep);
        }
 }
@@ -3824,15 +3883,9 @@ int mlx5e_attach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev)
        profile = priv->profile;
        clear_bit(MLX5E_STATE_DESTROYING, &priv->state);
 
-       err = mlx5e_create_umr_mkey(priv);
-       if (err) {
-               mlx5_core_err(mdev, "create umr mkey failed, %d\n", err);
-               goto out;
-       }
-
        err = profile->init_tx(priv);
        if (err)
-               goto err_destroy_umr_mkey;
+               goto out;
 
        err = mlx5e_open_drop_rq(priv);
        if (err) {
@@ -3872,9 +3925,6 @@ err_close_drop_rq:
 err_cleanup_tx:
        profile->cleanup_tx(priv);
 
-err_destroy_umr_mkey:
-       mlx5_core_destroy_mkey(mdev, &priv->umr_mkey);
-
 out:
        return err;
 }
@@ -3923,7 +3973,6 @@ void mlx5e_detach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev)
        profile->cleanup_rx(priv);
        mlx5e_close_drop_rq(priv);
        profile->cleanup_tx(priv);
-       mlx5_core_destroy_mkey(priv->mdev, &priv->umr_mkey);
        cancel_delayed_work_sync(&priv->update_stats_work);
 }
 
@@ -4023,7 +4072,6 @@ void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv)
        const struct mlx5e_profile *profile = priv->profile;
        struct net_device *netdev = priv->netdev;
 
-       unregister_netdev(netdev);
        destroy_workqueue(priv->wq);
        if (profile->cleanup)
                profile->cleanup(priv);
@@ -4040,6 +4088,7 @@ static void mlx5e_remove(struct mlx5_core_dev *mdev, void *vpriv)
        for (vport = 1; vport < total_vfs; vport++)
                mlx5_eswitch_unregister_vport_rep(esw, vport);
 
+       unregister_netdev(priv->netdev);
        mlx5e_detach(mdev, vpriv);
        mlx5e_destroy_netdev(mdev, priv);
 }
index 3c97da103d30e8abd0adcd2bf8b5bcb021d517cd..850378893b259c860e15f1a324fe88037b0335ad 100644 (file)
@@ -72,7 +72,29 @@ static void mlx5e_rep_get_strings(struct net_device *dev,
        }
 }
 
-static void mlx5e_update_sw_rep_counters(struct mlx5e_priv *priv)
+static void mlx5e_rep_update_hw_counters(struct mlx5e_priv *priv)
+{
+       struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+       struct mlx5_eswitch_rep *rep = priv->ppriv;
+       struct rtnl_link_stats64 *vport_stats;
+       struct ifla_vf_stats vf_stats;
+       int err;
+
+       err = mlx5_eswitch_get_vport_stats(esw, rep->vport, &vf_stats);
+       if (err) {
+               pr_warn("vport %d error %d reading stats\n", rep->vport, err);
+               return;
+       }
+
+       vport_stats = &priv->stats.vf_vport;
+       /* flip tx/rx as we are reporting the counters for the switch vport */
+       vport_stats->rx_packets = vf_stats.tx_packets;
+       vport_stats->rx_bytes   = vf_stats.tx_bytes;
+       vport_stats->tx_packets = vf_stats.rx_packets;
+       vport_stats->tx_bytes   = vf_stats.rx_bytes;
+}
+
+static void mlx5e_rep_update_sw_counters(struct mlx5e_priv *priv)
 {
        struct mlx5e_sw_stats *s = &priv->stats.sw;
        struct mlx5e_rq_stats *rq_stats;
@@ -95,6 +117,12 @@ static void mlx5e_update_sw_rep_counters(struct mlx5e_priv *priv)
        }
 }
 
+static void mlx5e_rep_update_stats(struct mlx5e_priv *priv)
+{
+       mlx5e_rep_update_sw_counters(priv);
+       mlx5e_rep_update_hw_counters(priv);
+}
+
 static void mlx5e_rep_get_ethtool_stats(struct net_device *dev,
                                        struct ethtool_stats *stats, u64 *data)
 {
@@ -106,7 +134,7 @@ static void mlx5e_rep_get_ethtool_stats(struct net_device *dev,
 
        mutex_lock(&priv->state_lock);
        if (test_bit(MLX5E_STATE_OPENED, &priv->state))
-               mlx5e_update_sw_rep_counters(priv);
+               mlx5e_rep_update_sw_counters(priv);
        mutex_unlock(&priv->state_lock);
 
        for (i = 0; i < NUM_VPORT_REP_COUNTERS; i++)
@@ -180,7 +208,8 @@ int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv)
 
 int mlx5e_nic_rep_load(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep)
 {
-       struct mlx5e_priv *priv = rep->priv_data;
+       struct net_device *netdev = rep->netdev;
+       struct mlx5e_priv *priv = netdev_priv(netdev);
 
        if (test_bit(MLX5E_STATE_OPENED, &priv->state))
                return mlx5e_add_sqs_fwd_rules(priv);
@@ -198,7 +227,8 @@ void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv)
 void mlx5e_nic_rep_unload(struct mlx5_eswitch *esw,
                          struct mlx5_eswitch_rep *rep)
 {
-       struct mlx5e_priv *priv = rep->priv_data;
+       struct net_device *netdev = rep->netdev;
+       struct mlx5e_priv *priv = netdev_priv(netdev);
 
        if (test_bit(MLX5E_STATE_OPENED, &priv->state))
                mlx5e_remove_sqs_fwd_rules(priv);
@@ -208,6 +238,35 @@ void mlx5e_nic_rep_unload(struct mlx5_eswitch *esw,
        mlx5e_tc_init(priv);
 }
 
+static int mlx5e_rep_open(struct net_device *dev)
+{
+       struct mlx5e_priv *priv = netdev_priv(dev);
+       struct mlx5_eswitch_rep *rep = priv->ppriv;
+       struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+       int err;
+
+       err = mlx5e_open(dev);
+       if (err)
+               return err;
+
+       err = mlx5_eswitch_set_vport_state(esw, rep->vport, MLX5_ESW_VPORT_ADMIN_STATE_UP);
+       if (!err)
+               netif_carrier_on(dev);
+
+       return 0;
+}
+
+static int mlx5e_rep_close(struct net_device *dev)
+{
+       struct mlx5e_priv *priv = netdev_priv(dev);
+       struct mlx5_eswitch_rep *rep = priv->ppriv;
+       struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+       (void)mlx5_eswitch_set_vport_state(esw, rep->vport, MLX5_ESW_VPORT_ADMIN_STATE_DOWN);
+
+       return mlx5e_close(dev);
+}
+
 static int mlx5e_rep_get_phys_port_name(struct net_device *dev,
                                        char *buf, size_t len)
 {
@@ -230,6 +289,14 @@ static int mlx5e_rep_ndo_setup_tc(struct net_device *dev, u32 handle,
        if (TC_H_MAJ(handle) != TC_H_MAJ(TC_H_INGRESS))
                return -EOPNOTSUPP;
 
+       if (tc->egress_dev) {
+               struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+               struct net_device *uplink_dev = mlx5_eswitch_get_uplink_netdev(esw);
+
+               return uplink_dev->netdev_ops->ndo_setup_tc(uplink_dev, handle,
+                                                           proto, tc);
+       }
+
        switch (tc->type) {
        case TC_SETUP_CLSFLOWER:
                switch (tc->cls_flower->command) {
@@ -245,17 +312,92 @@ static int mlx5e_rep_ndo_setup_tc(struct net_device *dev, u32 handle,
        }
 }
 
+bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv)
+{
+       struct mlx5_eswitch_rep *rep = (struct mlx5_eswitch_rep *)priv->ppriv;
+       struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+       if (rep && rep->vport == FDB_UPLINK_VPORT && esw->mode == SRIOV_OFFLOADS)
+               return true;
+
+       return false;
+}
+
+bool mlx5e_is_vf_vport_rep(struct mlx5e_priv *priv)
+{
+       struct mlx5_eswitch_rep *rep = (struct mlx5_eswitch_rep *)priv->ppriv;
+
+       if (rep && rep->vport != FDB_UPLINK_VPORT)
+               return true;
+
+       return false;
+}
+
+bool mlx5e_has_offload_stats(const struct net_device *dev, int attr_id)
+{
+       struct mlx5e_priv *priv = netdev_priv(dev);
+
+       switch (attr_id) {
+       case IFLA_OFFLOAD_XSTATS_CPU_HIT:
+               if (mlx5e_is_vf_vport_rep(priv) || mlx5e_is_uplink_rep(priv))
+                       return true;
+       }
+
+       return false;
+}
+
+static int
+mlx5e_get_sw_stats64(const struct net_device *dev,
+                    struct rtnl_link_stats64 *stats)
+{
+       struct mlx5e_priv *priv = netdev_priv(dev);
+       struct mlx5e_sw_stats *sstats = &priv->stats.sw;
+
+       stats->rx_packets = sstats->rx_packets;
+       stats->rx_bytes   = sstats->rx_bytes;
+       stats->tx_packets = sstats->tx_packets;
+       stats->tx_bytes   = sstats->tx_bytes;
+
+       stats->tx_dropped = sstats->tx_queue_dropped;
+
+       return 0;
+}
+
+int mlx5e_get_offload_stats(int attr_id, const struct net_device *dev,
+                           void *sp)
+{
+       switch (attr_id) {
+       case IFLA_OFFLOAD_XSTATS_CPU_HIT:
+               return mlx5e_get_sw_stats64(dev, sp);
+       }
+
+       return -EINVAL;
+}
+
+static struct rtnl_link_stats64 *
+mlx5e_rep_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
+{
+       struct mlx5e_priv *priv = netdev_priv(dev);
+
+       memcpy(stats, &priv->stats.vf_vport, sizeof(*stats));
+       return stats;
+}
+
 static const struct switchdev_ops mlx5e_rep_switchdev_ops = {
        .switchdev_port_attr_get        = mlx5e_attr_get,
 };
 
 static const struct net_device_ops mlx5e_netdev_ops_rep = {
-       .ndo_open                = mlx5e_open,
-       .ndo_stop                = mlx5e_close,
+       .ndo_open                = mlx5e_rep_open,
+       .ndo_stop                = mlx5e_rep_close,
        .ndo_start_xmit          = mlx5e_xmit,
        .ndo_get_phys_port_name  = mlx5e_rep_get_phys_port_name,
        .ndo_setup_tc            = mlx5e_rep_ndo_setup_tc,
-       .ndo_get_stats64         = mlx5e_get_stats,
+       .ndo_get_stats64         = mlx5e_rep_get_stats,
+       .ndo_udp_tunnel_add      = mlx5e_add_vxlan_port,
+       .ndo_udp_tunnel_del      = mlx5e_del_vxlan_port,
+       .ndo_has_offload_stats   = mlx5e_has_offload_stats,
+       .ndo_get_offload_stats   = mlx5e_get_offload_stats,
 };
 
 static void mlx5e_build_rep_netdev_priv(struct mlx5_core_dev *mdev,
@@ -308,7 +450,7 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev)
        netdev->switchdev_ops = &mlx5e_rep_switchdev_ops;
 #endif
 
-       netdev->features         |= NETIF_F_VLAN_CHALLENGED | NETIF_F_HW_TC;
+       netdev->features         |= NETIF_F_VLAN_CHALLENGED | NETIF_F_HW_TC | NETIF_F_NETNS_LOCAL;
        netdev->hw_features      |= NETIF_F_HW_TC;
 
        eth_hw_addr_random(netdev);
@@ -328,7 +470,7 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
        struct mlx5_eswitch_rep *rep = priv->ppriv;
        struct mlx5_core_dev *mdev = priv->mdev;
-       struct mlx5_flow_rule *flow_rule;
+       struct mlx5_flow_handle *flow_rule;
        int err;
        int i;
 
@@ -360,7 +502,7 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
        return 0;
 
 err_del_flow_rule:
-       mlx5_del_flow_rule(rep->vport_rx_rule);
+       mlx5_del_flow_rules(rep->vport_rx_rule);
 err_destroy_direct_tirs:
        mlx5e_destroy_direct_tirs(priv);
 err_destroy_direct_rqts:
@@ -375,7 +517,7 @@ static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv)
        int i;
 
        mlx5e_tc_cleanup(priv);
-       mlx5_del_flow_rule(rep->vport_rx_rule);
+       mlx5_del_flow_rules(rep->vport_rx_rule);
        mlx5e_destroy_direct_tirs(priv);
        for (i = 0; i < priv->params.num_channels; i++)
                mlx5e_destroy_rqt(priv, &priv->direct_tir[i].rqt);
@@ -405,7 +547,7 @@ static struct mlx5e_profile mlx5e_rep_profile = {
        .cleanup_rx             = mlx5e_cleanup_rep_rx,
        .init_tx                = mlx5e_init_rep_tx,
        .cleanup_tx             = mlx5e_cleanup_nic_tx,
-       .update_stats           = mlx5e_update_sw_rep_counters,
+       .update_stats           = mlx5e_rep_update_stats,
        .max_nch                = mlx5e_get_rep_max_num_channels,
        .max_tc                 = 1,
 };
@@ -423,7 +565,7 @@ int mlx5e_vport_rep_load(struct mlx5_eswitch *esw,
                return -EINVAL;
        }
 
-       rep->priv_data = netdev_priv(netdev);
+       rep->netdev = netdev;
 
        err = mlx5e_attach_netdev(esw->dev, netdev);
        if (err) {
@@ -445,7 +587,7 @@ err_detach_netdev:
        mlx5e_detach_netdev(esw->dev, netdev);
 
 err_destroy_netdev:
-       mlx5e_destroy_netdev(esw->dev, rep->priv_data);
+       mlx5e_destroy_netdev(esw->dev, netdev_priv(netdev));
 
        return err;
 
@@ -454,9 +596,9 @@ err_destroy_netdev:
 void mlx5e_vport_rep_unload(struct mlx5_eswitch *esw,
                            struct mlx5_eswitch_rep *rep)
 {
-       struct mlx5e_priv *priv = rep->priv_data;
-       struct net_device *netdev = priv->netdev;
+       struct net_device *netdev = rep->netdev;
 
+       unregister_netdev(netdev);
        mlx5e_detach_netdev(esw->dev, netdev);
-       mlx5e_destroy_netdev(esw->dev, priv);
+       mlx5e_destroy_netdev(esw->dev, netdev_priv(netdev));
 }
index c6de6fba5843e08a2b92e73fc6549369b48096da..42cd687e6608145be1dcbbe030162e73e57b7a52 100644 (file)
@@ -164,14 +164,14 @@ void mlx5e_modify_rx_cqe_compression(struct mlx5e_priv *priv, bool val)
 
        mutex_lock(&priv->state_lock);
 
-       if (priv->params.rx_cqe_compress == val)
+       if (MLX5E_GET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_COMPRESS) == val)
                goto unlock;
 
        was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
        if (was_opened)
                mlx5e_close_locked(priv->netdev);
 
-       priv->params.rx_cqe_compress = val;
+       MLX5E_SET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_COMPRESS, val);
 
        if (was_opened)
                mlx5e_open_locked(priv->netdev);
@@ -737,10 +737,10 @@ static inline
 struct sk_buff *skb_from_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
                             u16 wqe_counter, u32 cqe_bcnt)
 {
-       struct bpf_prog *xdp_prog = READ_ONCE(rq->xdp_prog);
        struct mlx5e_dma_info *di;
        struct sk_buff *skb;
        void *va, *data;
+       bool consumed;
 
        di             = &rq->dma_info[wqe_counter];
        va             = page_address(di->page);
@@ -759,7 +759,11 @@ struct sk_buff *skb_from_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
                return NULL;
        }
 
-       if (mlx5e_xdp_handle(rq, xdp_prog, di, data, cqe_bcnt))
+       rcu_read_lock();
+       consumed = mlx5e_xdp_handle(rq, READ_ONCE(rq->xdp_prog), di, data,
+                                   cqe_bcnt);
+       rcu_read_unlock();
+       if (consumed)
                return NULL; /* page/packet was consumed by XDP */
 
        skb = build_skb(va, RQ_PAGE_SIZE(rq));
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
new file mode 100644 (file)
index 0000000..65442c3
--- /dev/null
@@ -0,0 +1,352 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies, Ltd.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <net/udp.h>
+#include "en.h"
+
+enum {
+       MLX5E_ST_LINK_STATE,
+       MLX5E_ST_LINK_SPEED,
+       MLX5E_ST_HEALTH_INFO,
+#ifdef CONFIG_INET
+       MLX5E_ST_LOOPBACK,
+#endif
+       MLX5E_ST_NUM,
+};
+
+const char mlx5e_self_tests[MLX5E_ST_NUM][ETH_GSTRING_LEN] = {
+       "Link Test",
+       "Speed Test",
+       "Health Test",
+#ifdef CONFIG_INET
+       "Loopback Test",
+#endif
+};
+
+int mlx5e_self_test_num(struct mlx5e_priv *priv)
+{
+       return ARRAY_SIZE(mlx5e_self_tests);
+}
+
+static int mlx5e_test_health_info(struct mlx5e_priv *priv)
+{
+       struct mlx5_core_health *health = &priv->mdev->priv.health;
+
+       return health->sick ? 1 : 0;
+}
+
+static int mlx5e_test_link_state(struct mlx5e_priv *priv)
+{
+       u8 port_state;
+
+       if (!netif_carrier_ok(priv->netdev))
+               return 1;
+
+       port_state = mlx5_query_vport_state(priv->mdev, MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT, 0);
+       return port_state == VPORT_STATE_UP ? 0 : 1;
+}
+
+static int mlx5e_test_link_speed(struct mlx5e_priv *priv)
+{
+       u32 out[MLX5_ST_SZ_DW(ptys_reg)];
+       u32 eth_proto_oper;
+       int i;
+
+       if (!netif_carrier_ok(priv->netdev))
+               return 1;
+
+       if (mlx5_query_port_ptys(priv->mdev, out, sizeof(out), MLX5_PTYS_EN, 1))
+               return 1;
+
+       eth_proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper);
+       for (i = 0; i < MLX5E_LINK_MODES_NUMBER; i++) {
+               if (eth_proto_oper & MLX5E_PROT_MASK(i))
+                       return 0;
+       }
+       return 1;
+}
+
+#ifdef CONFIG_INET
+/* loopback test */
+#define MLX5E_TEST_PKT_SIZE (MLX5_MPWRQ_SMALL_PACKET_THRESHOLD - NET_IP_ALIGN)
+static const char mlx5e_test_text[ETH_GSTRING_LEN] = "MLX5E SELF TEST";
+#define MLX5E_TEST_MAGIC 0x5AEED15C001ULL
+
+struct mlx5ehdr {
+       __be32 version;
+       __be64 magic;
+       char   text[ETH_GSTRING_LEN];
+};
+
+static struct sk_buff *mlx5e_test_get_udp_skb(struct mlx5e_priv *priv)
+{
+       struct sk_buff *skb = NULL;
+       struct mlx5ehdr *mlxh;
+       struct ethhdr *ethh;
+       struct udphdr *udph;
+       struct iphdr *iph;
+       int datalen, iplen;
+
+       datalen = MLX5E_TEST_PKT_SIZE -
+                 (sizeof(*ethh) + sizeof(*iph) + sizeof(*udph));
+
+       skb = netdev_alloc_skb(priv->netdev, MLX5E_TEST_PKT_SIZE);
+       if (!skb) {
+               netdev_err(priv->netdev, "\tFailed to alloc loopback skb\n");
+               return NULL;
+       }
+
+       prefetchw(skb->data);
+       skb_reserve(skb, NET_IP_ALIGN);
+
+       /*  Reserve for ethernet and IP header  */
+       ethh = (struct ethhdr *)skb_push(skb, ETH_HLEN);
+       skb_reset_mac_header(skb);
+
+       skb_set_network_header(skb, skb->len);
+       iph = (struct iphdr *)skb_put(skb, sizeof(struct iphdr));
+
+       skb_set_transport_header(skb, skb->len);
+       udph = (struct udphdr *)skb_put(skb, sizeof(struct udphdr));
+
+       /* Fill ETH header */
+       ether_addr_copy(ethh->h_dest, priv->netdev->dev_addr);
+       eth_zero_addr(ethh->h_source);
+       ethh->h_proto = htons(ETH_P_IP);
+
+       /* Fill UDP header */
+       udph->source = htons(9);
+       udph->dest = htons(9); /* Discard Protocol */
+       udph->len = htons(datalen + sizeof(struct udphdr));
+       udph->check = 0;
+
+       /* Fill IP header */
+       iph->ihl = 5;
+       iph->ttl = 32;
+       iph->version = 4;
+       iph->protocol = IPPROTO_UDP;
+       iplen = sizeof(struct iphdr) + sizeof(struct udphdr) + datalen;
+       iph->tot_len = htons(iplen);
+       iph->frag_off = 0;
+       iph->saddr = 0;
+       iph->daddr = 0;
+       iph->tos = 0;
+       iph->id = 0;
+       ip_send_check(iph);
+
+       /* Fill test header and data */
+       mlxh = (struct mlx5ehdr *)skb_put(skb, sizeof(*mlxh));
+       mlxh->version = 0;
+       mlxh->magic = cpu_to_be64(MLX5E_TEST_MAGIC);
+       strlcpy(mlxh->text, mlx5e_test_text, sizeof(mlxh->text));
+       datalen -= sizeof(*mlxh);
+       memset(skb_put(skb, datalen), 0, datalen);
+
+       skb->csum = 0;
+       skb->ip_summed = CHECKSUM_PARTIAL;
+       udp4_hwcsum(skb, iph->saddr, iph->daddr);
+
+       skb->protocol = htons(ETH_P_IP);
+       skb->pkt_type = PACKET_HOST;
+       skb->dev = priv->netdev;
+
+       return skb;
+}
+
+struct mlx5e_lbt_priv {
+       struct packet_type pt;
+       struct completion comp;
+       bool loopback_ok;
+};
+
+static int
+mlx5e_test_loopback_validate(struct sk_buff *skb,
+                            struct net_device *ndev,
+                            struct packet_type *pt,
+                            struct net_device *orig_ndev)
+{
+       struct mlx5e_lbt_priv *lbtp = pt->af_packet_priv;
+       struct mlx5ehdr *mlxh;
+       struct ethhdr *ethh;
+       struct udphdr *udph;
+       struct iphdr *iph;
+
+       /* We are only going to peek, no need to clone the SKB */
+       if (skb->protocol != htons(ETH_P_IP))
+               goto out;
+
+       if (MLX5E_TEST_PKT_SIZE - ETH_HLEN > skb_headlen(skb))
+               goto out;
+
+       ethh = (struct ethhdr *)skb_mac_header(skb);
+       if (!ether_addr_equal(ethh->h_dest, orig_ndev->dev_addr))
+               goto out;
+
+       iph = ip_hdr(skb);
+       if (iph->protocol != IPPROTO_UDP)
+               goto out;
+
+       udph = udp_hdr(skb);
+       if (udph->dest != htons(9))
+               goto out;
+
+       mlxh = (struct mlx5ehdr *)((char *)udph + sizeof(*udph));
+       if (mlxh->magic != cpu_to_be64(MLX5E_TEST_MAGIC))
+               goto out; /* so close ! */
+
+       /* bingo */
+       lbtp->loopback_ok = true;
+       complete(&lbtp->comp);
+out:
+       kfree_skb(skb);
+       return 0;
+}
+
+static int mlx5e_test_loopback_setup(struct mlx5e_priv *priv,
+                                    struct mlx5e_lbt_priv *lbtp)
+{
+       int err = 0;
+
+       err = mlx5e_refresh_tirs_self_loopback(priv->mdev, true);
+       if (err) {
+               netdev_err(priv->netdev,
+                          "\tFailed to enable UC loopback err(%d)\n", err);
+               return err;
+       }
+
+       lbtp->loopback_ok = false;
+       init_completion(&lbtp->comp);
+
+       lbtp->pt.type = htons(ETH_P_ALL);
+       lbtp->pt.func = mlx5e_test_loopback_validate;
+       lbtp->pt.dev = priv->netdev;
+       lbtp->pt.af_packet_priv = lbtp;
+       dev_add_pack(&lbtp->pt);
+       return err;
+}
+
+static void mlx5e_test_loopback_cleanup(struct mlx5e_priv *priv,
+                                       struct mlx5e_lbt_priv *lbtp)
+{
+       dev_remove_pack(&lbtp->pt);
+       mlx5e_refresh_tirs_self_loopback(priv->mdev, false);
+}
+
+#define MLX5E_LB_VERIFY_TIMEOUT (msecs_to_jiffies(200))
+static int mlx5e_test_loopback(struct mlx5e_priv *priv)
+{
+       struct mlx5e_lbt_priv *lbtp;
+       struct sk_buff *skb = NULL;
+       int err;
+
+       if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+               netdev_err(priv->netdev,
+                          "\tCan't perform loobpack test while device is down\n");
+               return -ENODEV;
+       }
+
+       lbtp = kzalloc(sizeof(*lbtp), GFP_KERNEL);
+       if (!lbtp)
+               return -ENOMEM;
+       lbtp->loopback_ok = false;
+
+       err = mlx5e_test_loopback_setup(priv, lbtp);
+       if (err)
+               goto out;
+
+       skb = mlx5e_test_get_udp_skb(priv);
+       if (!skb) {
+               err = -ENOMEM;
+               goto cleanup;
+       }
+
+       skb_set_queue_mapping(skb, 0);
+       err = dev_queue_xmit(skb);
+       if (err) {
+               netdev_err(priv->netdev,
+                          "\tFailed to xmit loopback packet err(%d)\n",
+                          err);
+               goto cleanup;
+       }
+
+       wait_for_completion_timeout(&lbtp->comp, MLX5E_LB_VERIFY_TIMEOUT);
+       err = !lbtp->loopback_ok;
+
+cleanup:
+       mlx5e_test_loopback_cleanup(priv, lbtp);
+out:
+       kfree(lbtp);
+       return err;
+}
+#endif
+
+static int (*mlx5e_st_func[MLX5E_ST_NUM])(struct mlx5e_priv *) = {
+       mlx5e_test_link_state,
+       mlx5e_test_link_speed,
+       mlx5e_test_health_info,
+#ifdef CONFIG_INET
+       mlx5e_test_loopback,
+#endif
+};
+
+void mlx5e_self_test(struct net_device *ndev, struct ethtool_test *etest,
+                    u64 *buf)
+{
+       struct mlx5e_priv *priv = netdev_priv(ndev);
+       int i;
+
+       memset(buf, 0, sizeof(u64) * MLX5E_ST_NUM);
+
+       mutex_lock(&priv->state_lock);
+       netdev_info(ndev, "Self test begin..\n");
+
+       for (i = 0; i < MLX5E_ST_NUM; i++) {
+               netdev_info(ndev, "\t[%d] %s start..\n",
+                           i, mlx5e_self_tests[i]);
+               buf[i] = mlx5e_st_func[i](priv);
+               netdev_info(ndev, "\t[%d] %s end: result(%lld)\n",
+                           i, mlx5e_self_tests[i], buf[i]);
+       }
+
+       mutex_unlock(&priv->state_lock);
+
+       for (i = 0; i < MLX5E_ST_NUM; i++) {
+               if (buf[i]) {
+                       etest->flags |= ETH_TEST_FL_FAILED;
+                       break;
+               }
+       }
+       netdev_info(ndev, "Self test out: status flags(0x%x)\n",
+                   etest->flags);
+}
index 57452fdc515406b4a6020d81886d5e0f52cafb2a..f202f872f57f6ee0c3e4aad428ffafa7be1a71a2 100644 (file)
@@ -39,7 +39,7 @@
 #define MLX5E_READ_CTR32_CPU(ptr, dsc, i) \
        (*(u32 *)((char *)ptr + dsc[i].offset))
 #define MLX5E_READ_CTR32_BE(ptr, dsc, i) \
-       be64_to_cpu(*(__be32 *)((char *)ptr + dsc[i].offset))
+       be32_to_cpu(*(__be32 *)((char *)ptr + dsc[i].offset))
 
 #define MLX5E_DECLARE_STAT(type, fld) #fld, offsetof(type, fld)
 #define MLX5E_DECLARE_RX_STAT(type, fld) "rx%d_"#fld, offsetof(type, fld)
@@ -276,6 +276,32 @@ static const struct counter_desc pport_per_prio_pfc_stats_desc[] = {
        { "rx_%s_pause_transition", PPORT_PER_PRIO_OFF(rx_pause_transition) },
 };
 
+#define PCIE_PERF_OFF(c) \
+       MLX5_BYTE_OFF(mpcnt_reg, counter_set.pcie_perf_cntrs_grp_data_layout.c)
+#define PCIE_PERF_GET(pcie_stats, c) \
+       MLX5_GET(mpcnt_reg, pcie_stats->pcie_perf_counters, \
+                counter_set.pcie_perf_cntrs_grp_data_layout.c)
+#define PCIE_TAS_OFF(c) \
+       MLX5_BYTE_OFF(mpcnt_reg, counter_set.pcie_tas_cntrs_grp_data_layout.c)
+#define PCIE_TAS_GET(pcie_stats, c) \
+       MLX5_GET(mpcnt_reg, pcie_stats->pcie_tas_counters, \
+                counter_set.pcie_tas_cntrs_grp_data_layout.c)
+
+struct mlx5e_pcie_stats {
+       __be64 pcie_perf_counters[MLX5_ST_SZ_QW(mpcnt_reg)];
+       __be64 pcie_tas_counters[MLX5_ST_SZ_QW(mpcnt_reg)];
+};
+
+static const struct counter_desc pcie_perf_stats_desc[] = {
+       { "rx_pci_signal_integrity", PCIE_PERF_OFF(rx_errors) },
+       { "tx_pci_signal_integrity", PCIE_PERF_OFF(tx_errors) },
+};
+
+static const struct counter_desc pcie_tas_stats_desc[] = {
+       { "tx_pci_transport_nonfatal_msg", PCIE_TAS_OFF(non_fatal_err_msg_sent) },
+       { "tx_pci_transport_fatal_msg", PCIE_TAS_OFF(fatal_err_msg_sent) },
+};
+
 struct mlx5e_rq_stats {
        u64 packets;
        u64 bytes;
@@ -360,6 +386,8 @@ static const struct counter_desc sq_stats_desc[] = {
 #define NUM_PPORT_802_3_COUNTERS       ARRAY_SIZE(pport_802_3_stats_desc)
 #define NUM_PPORT_2863_COUNTERS                ARRAY_SIZE(pport_2863_stats_desc)
 #define NUM_PPORT_2819_COUNTERS                ARRAY_SIZE(pport_2819_stats_desc)
+#define NUM_PCIE_PERF_COUNTERS         ARRAY_SIZE(pcie_perf_stats_desc)
+#define NUM_PCIE_TAS_COUNTERS          ARRAY_SIZE(pcie_tas_stats_desc)
 #define NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS \
        ARRAY_SIZE(pport_per_prio_traffic_stats_desc)
 #define NUM_PPORT_PER_PRIO_PFC_COUNTERS \
@@ -369,6 +397,7 @@ static const struct counter_desc sq_stats_desc[] = {
                                         NUM_PPORT_2819_COUNTERS  + \
                                         NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS * \
                                         NUM_PPORT_PRIO)
+#define NUM_PCIE_COUNTERS              (NUM_PCIE_PERF_COUNTERS + NUM_PCIE_TAS_COUNTERS)
 #define NUM_RQ_STATS                   ARRAY_SIZE(rq_stats_desc)
 #define NUM_SQ_STATS                   ARRAY_SIZE(sq_stats_desc)
 
@@ -377,6 +406,25 @@ struct mlx5e_stats {
        struct mlx5e_qcounter_stats qcnt;
        struct mlx5e_vport_stats vport;
        struct mlx5e_pport_stats pport;
+       struct mlx5e_pcie_stats pcie;
+       struct rtnl_link_stats64 vf_vport;
+};
+
+static const struct counter_desc mlx5e_pme_status_desc[] = {
+       { "module_plug", 0 },
+       { "module_unplug", 8 },
+};
+
+static const struct counter_desc mlx5e_pme_error_desc[] = {
+       { "module_pwr_budget_exd", 0 },  /* power budget exceed */
+       { "module_long_range", 8 },      /* long range for non MLNX cable */
+       { "module_bus_stuck", 16 },      /* bus stuck (I2C or data shorted) */
+       { "module_no_eeprom", 24 },      /* no eeprom/retry time out */
+       { "module_enforce_part", 32 },   /* enforce part number list */
+       { "module_unknown_id", 40 },     /* unknown identifier */
+       { "module_high_temp", 48 },      /* high temperature */
+       { "module_bad_shorted", 56 },    /* bad or shorted cable/module */
+       { "module_unknown_status", 64 },
 };
 
 #endif /* __MLX5_EN_STATS_H__ */
index 135a95bcc3929d81b236cebd2c8f574275f99040..f07ef8c7da559985112d202cf9b26e2b82d4b3ca 100644 (file)
 #include <net/switchdev.h>
 #include <net/tc_act/tc_mirred.h>
 #include <net/tc_act/tc_vlan.h>
+#include <net/tc_act/tc_tunnel_key.h>
+#include <net/vxlan.h>
 #include "en.h"
 #include "en_tc.h"
 #include "eswitch.h"
+#include "vxlan.h"
 
 struct mlx5e_tc_flow {
        struct rhash_head       node;
        u64                     cookie;
-       struct mlx5_flow_rule   *rule;
+       struct mlx5_flow_handle *rule;
+       struct list_head        encap; /* flows sharing the same encap */
        struct mlx5_esw_flow_attr *attr;
 };
 
+enum {
+       MLX5_HEADER_TYPE_VXLAN = 0x0,
+       MLX5_HEADER_TYPE_NVGRE = 0x1,
+};
+
 #define MLX5E_TC_TABLE_NUM_ENTRIES 1024
 #define MLX5E_TC_TABLE_NUM_GROUPS 4
 
-static struct mlx5_flow_rule *mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
-                                                   struct mlx5_flow_spec *spec,
-                                                   u32 action, u32 flow_tag)
+static struct mlx5_flow_handle *
+mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
+                     struct mlx5_flow_spec *spec,
+                     u32 action, u32 flow_tag)
 {
        struct mlx5_core_dev *dev = priv->mdev;
        struct mlx5_flow_destination dest = { 0 };
+       struct mlx5_flow_act flow_act = {
+               .action = action,
+               .flow_tag = flow_tag,
+               .encap_id = 0,
+       };
        struct mlx5_fc *counter = NULL;
-       struct mlx5_flow_rule *rule;
+       struct mlx5_flow_handle *rule;
        bool table_created = false;
 
        if (action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
@@ -82,7 +97,7 @@ static struct mlx5_flow_rule *mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
                                                            MLX5E_TC_PRIO,
                                                            MLX5E_TC_TABLE_NUM_ENTRIES,
                                                            MLX5E_TC_TABLE_NUM_GROUPS,
-                                                           0);
+                                                           0, 0);
                if (IS_ERR(priv->fs.tc.t)) {
                        netdev_err(priv->netdev,
                                   "Failed to create tc offload table\n");
@@ -94,9 +109,7 @@ static struct mlx5_flow_rule *mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
        }
 
        spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
-       rule = mlx5_add_flow_rule(priv->fs.tc.t, spec,
-                                 action, flow_tag,
-                                 &dest);
+       rule = mlx5_add_flow_rules(priv->fs.tc.t, spec, &flow_act, &dest, 1);
 
        if (IS_ERR(rule))
                goto err_add_rule;
@@ -114,9 +127,10 @@ err_create_ft:
        return rule;
 }
 
-static struct mlx5_flow_rule *mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
-                                                   struct mlx5_flow_spec *spec,
-                                                   struct mlx5_esw_flow_attr *attr)
+static struct mlx5_flow_handle *
+mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
+                     struct mlx5_flow_spec *spec,
+                     struct mlx5_esw_flow_attr *attr)
 {
        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
        int err;
@@ -128,19 +142,39 @@ static struct mlx5_flow_rule *mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
        return mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
 }
 
+static void mlx5e_detach_encap(struct mlx5e_priv *priv,
+                              struct mlx5e_tc_flow *flow) {
+       struct list_head *next = flow->encap.next;
+
+       list_del(&flow->encap);
+       if (list_empty(next)) {
+               struct mlx5_encap_entry *e;
+
+               e = list_entry(next, struct mlx5_encap_entry, flows);
+               if (e->n) {
+                       mlx5_encap_dealloc(priv->mdev, e->encap_id);
+                       neigh_release(e->n);
+               }
+               hlist_del_rcu(&e->encap_hlist);
+               kfree(e);
+       }
+}
+
 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
-                             struct mlx5_flow_rule *rule,
-                             struct mlx5_esw_flow_attr *attr)
+                             struct mlx5e_tc_flow *flow)
 {
        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
        struct mlx5_fc *counter = NULL;
 
-       counter = mlx5_flow_rule_counter(rule);
+       counter = mlx5_flow_rule_counter(flow->rule);
 
-       if (esw && esw->mode == SRIOV_OFFLOADS)
-               mlx5_eswitch_del_vlan_action(esw, attr);
+       mlx5_del_flow_rules(flow->rule);
 
-       mlx5_del_flow_rule(rule);
+       if (esw && esw->mode == SRIOV_OFFLOADS) {
+               mlx5_eswitch_del_vlan_action(esw, flow->attr);
+               if (flow->attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP)
+                       mlx5e_detach_encap(priv, flow);
+       }
 
        mlx5_fc_destroy(priv->mdev, counter);
 
@@ -150,8 +184,125 @@ static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
        }
 }
 
-static int parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec,
-                           struct tc_cls_flower_offload *f)
+static void parse_vxlan_attr(struct mlx5_flow_spec *spec,
+                            struct tc_cls_flower_offload *f)
+{
+       void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+                                      outer_headers);
+       void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+                                      outer_headers);
+       void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+                                   misc_parameters);
+       void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+                                   misc_parameters);
+
+       MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol);
+       MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_UDP);
+
+       if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
+               struct flow_dissector_key_keyid *key =
+                       skb_flow_dissector_target(f->dissector,
+                                                 FLOW_DISSECTOR_KEY_ENC_KEYID,
+                                                 f->key);
+               struct flow_dissector_key_keyid *mask =
+                       skb_flow_dissector_target(f->dissector,
+                                                 FLOW_DISSECTOR_KEY_ENC_KEYID,
+                                                 f->mask);
+               MLX5_SET(fte_match_set_misc, misc_c, vxlan_vni,
+                        be32_to_cpu(mask->keyid));
+               MLX5_SET(fte_match_set_misc, misc_v, vxlan_vni,
+                        be32_to_cpu(key->keyid));
+       }
+}
+
+static int parse_tunnel_attr(struct mlx5e_priv *priv,
+                            struct mlx5_flow_spec *spec,
+                            struct tc_cls_flower_offload *f)
+{
+       void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+                                      outer_headers);
+       void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+                                      outer_headers);
+
+       if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) {
+               struct flow_dissector_key_ports *key =
+                       skb_flow_dissector_target(f->dissector,
+                                                 FLOW_DISSECTOR_KEY_ENC_PORTS,
+                                                 f->key);
+               struct flow_dissector_key_ports *mask =
+                       skb_flow_dissector_target(f->dissector,
+                                                 FLOW_DISSECTOR_KEY_ENC_PORTS,
+                                                 f->mask);
+
+               /* Full udp dst port must be given */
+               if (memchr_inv(&mask->dst, 0xff, sizeof(mask->dst)))
+                       return -EOPNOTSUPP;
+
+               /* udp src port isn't supported */
+               if (memchr_inv(&mask->src, 0, sizeof(mask->src)))
+                       return -EOPNOTSUPP;
+
+               if (mlx5e_vxlan_lookup_port(priv, be16_to_cpu(key->dst)) &&
+                   MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap))
+                       parse_vxlan_attr(spec, f);
+               else
+                       return -EOPNOTSUPP;
+
+               MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+                        udp_dport, ntohs(mask->dst));
+               MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+                        udp_dport, ntohs(key->dst));
+
+       } else { /* udp dst port must be given */
+                       return -EOPNOTSUPP;
+       }
+
+       if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) {
+               struct flow_dissector_key_ipv4_addrs *key =
+                       skb_flow_dissector_target(f->dissector,
+                                                 FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
+                                                 f->key);
+               struct flow_dissector_key_ipv4_addrs *mask =
+                       skb_flow_dissector_target(f->dissector,
+                                                 FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
+                                                 f->mask);
+               MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+                        src_ipv4_src_ipv6.ipv4_layout.ipv4,
+                        ntohl(mask->src));
+               MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+                        src_ipv4_src_ipv6.ipv4_layout.ipv4,
+                        ntohl(key->src));
+
+               MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+                        dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
+                        ntohl(mask->dst));
+               MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+                        dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
+                        ntohl(key->dst));
+       }
+
+       MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype);
+       MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IP);
+
+       /* Enforce DMAC when offloading incoming tunneled flows.
+        * Flow counters require a match on the DMAC.
+        */
+       MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_47_16);
+       MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_15_0);
+       ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+                                    dmac_47_16), priv->netdev->dev_addr);
+
+       /* let software handle IP fragments */
+       MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1);
+       MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag, 0);
+
+       return 0;
+}
+
+static int __parse_cls_flower(struct mlx5e_priv *priv,
+                             struct mlx5_flow_spec *spec,
+                             struct tc_cls_flower_offload *f,
+                             u8 *min_inline)
 {
        void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
                                       outer_headers);
@@ -160,6 +311,8 @@ static int parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec
        u16 addr_type = 0;
        u8 ip_proto = 0;
 
+       *min_inline = MLX5_INLINE_MODE_L2;
+
        if (f->dissector->used_keys &
            ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) |
              BIT(FLOW_DISSECTOR_KEY_BASIC) |
@@ -167,12 +320,44 @@ static int parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec
              BIT(FLOW_DISSECTOR_KEY_VLAN) |
              BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
              BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
-             BIT(FLOW_DISSECTOR_KEY_PORTS))) {
+             BIT(FLOW_DISSECTOR_KEY_PORTS) |
+             BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) |
+             BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) |
+             BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) |
+             BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) |
+             BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL))) {
                netdev_warn(priv->netdev, "Unsupported key used: 0x%x\n",
                            f->dissector->used_keys);
                return -EOPNOTSUPP;
        }
 
+       if ((dissector_uses_key(f->dissector,
+                               FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) ||
+            dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID) ||
+            dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) &&
+           dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_CONTROL)) {
+               struct flow_dissector_key_control *key =
+                       skb_flow_dissector_target(f->dissector,
+                                                 FLOW_DISSECTOR_KEY_ENC_CONTROL,
+                                                 f->key);
+               switch (key->addr_type) {
+               case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
+                       if (parse_tunnel_attr(priv, spec, f))
+                               return -EOPNOTSUPP;
+                       break;
+               default:
+                       return -EOPNOTSUPP;
+               }
+
+               /* In decap flow, header pointers should point to the inner
+                * headers, outer header were already set by parse_tunnel_attr
+                */
+               headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+                                        inner_headers);
+               headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+                                        inner_headers);
+       }
+
        if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) {
                struct flow_dissector_key_control *key =
                        skb_flow_dissector_target(f->dissector,
@@ -201,6 +386,9 @@ static int parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec
                         mask->ip_proto);
                MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
                         key->ip_proto);
+
+               if (mask->ip_proto)
+                       *min_inline = MLX5_INLINE_MODE_IP;
        }
 
        if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
@@ -237,12 +425,15 @@ static int parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec
                        skb_flow_dissector_target(f->dissector,
                                                  FLOW_DISSECTOR_KEY_VLAN,
                                                  f->mask);
-               if (mask->vlan_id) {
+               if (mask->vlan_id || mask->vlan_priority) {
                        MLX5_SET(fte_match_set_lyr_2_4, headers_c, vlan_tag, 1);
                        MLX5_SET(fte_match_set_lyr_2_4, headers_v, vlan_tag, 1);
 
                        MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid, mask->vlan_id);
                        MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, key->vlan_id);
+
+                       MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_prio, mask->vlan_priority);
+                       MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_prio, key->vlan_priority);
                }
        }
 
@@ -268,6 +459,9 @@ static int parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec
                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
                                    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
                       &key->dst, sizeof(key->dst));
+
+               if (mask->src || mask->dst)
+                       *min_inline = MLX5_INLINE_MODE_IP;
        }
 
        if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
@@ -293,6 +487,10 @@ static int parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec
                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
                                    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
                       &key->dst, sizeof(key->dst));
+
+               if (ipv6_addr_type(&mask->src) != IPV6_ADDR_ANY ||
+                   ipv6_addr_type(&mask->dst) != IPV6_ADDR_ANY)
+                       *min_inline = MLX5_INLINE_MODE_IP;
        }
 
        if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_PORTS)) {
@@ -333,11 +531,39 @@ static int parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec
                                   "Only UDP and TCP transport are supported\n");
                        return -EINVAL;
                }
+
+               if (mask->src || mask->dst)
+                       *min_inline = MLX5_INLINE_MODE_TCP_UDP;
        }
 
        return 0;
 }
 
+static int parse_cls_flower(struct mlx5e_priv *priv,
+                           struct mlx5_flow_spec *spec,
+                           struct tc_cls_flower_offload *f)
+{
+       struct mlx5_core_dev *dev = priv->mdev;
+       struct mlx5_eswitch *esw = dev->priv.eswitch;
+       struct mlx5_eswitch_rep *rep = priv->ppriv;
+       u8 min_inline;
+       int err;
+
+       err = __parse_cls_flower(priv, spec, f, &min_inline);
+
+       if (!err && esw->mode == SRIOV_OFFLOADS &&
+           rep->vport != FDB_UPLINK_VPORT) {
+               if (min_inline > esw->offloads.inline_mode) {
+                       netdev_warn(priv->netdev,
+                                   "Flow is not offloaded due to min inline setting, required %d actual %d\n",
+                                   min_inline, esw->offloads.inline_mode);
+                       return -EOPNOTSUPP;
+               }
+       }
+
+       return err;
+}
+
 static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
                                u32 *action, u32 *flow_tag)
 {
@@ -384,11 +610,243 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
        return 0;
 }
 
+static inline int cmp_encap_info(struct mlx5_encap_info *a,
+                                struct mlx5_encap_info *b)
+{
+       return memcmp(a, b, sizeof(*a));
+}
+
+static inline int hash_encap_info(struct mlx5_encap_info *info)
+{
+       return jhash(info, sizeof(*info), 0);
+}
+
+static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv,
+                                  struct net_device *mirred_dev,
+                                  struct net_device **out_dev,
+                                  struct flowi4 *fl4,
+                                  struct neighbour **out_n,
+                                  __be32 *saddr,
+                                  int *out_ttl)
+{
+       struct rtable *rt;
+       struct neighbour *n = NULL;
+       int ttl;
+
+#if IS_ENABLED(CONFIG_INET)
+       rt = ip_route_output_key(dev_net(mirred_dev), fl4);
+       if (IS_ERR(rt)) {
+               pr_warn("%s: no route to %pI4\n", __func__, &fl4->daddr);
+               return -EOPNOTSUPP;
+       }
+#else
+       return -EOPNOTSUPP;
+#endif
+
+       if (!switchdev_port_same_parent_id(priv->netdev, rt->dst.dev)) {
+               pr_warn("%s: Can't offload the flow, netdevices aren't on the same HW e-switch\n",
+                       __func__);
+               ip_rt_put(rt);
+               return -EOPNOTSUPP;
+       }
+
+       ttl = ip4_dst_hoplimit(&rt->dst);
+       n = dst_neigh_lookup(&rt->dst, &fl4->daddr);
+       ip_rt_put(rt);
+       if (!n)
+               return -ENOMEM;
+
+       *out_n = n;
+       *saddr = fl4->saddr;
+       *out_ttl = ttl;
+       *out_dev = rt->dst.dev;
+
+       return 0;
+}
+
+static int gen_vxlan_header_ipv4(struct net_device *out_dev,
+                                char buf[],
+                                unsigned char h_dest[ETH_ALEN],
+                                int ttl,
+                                __be32 daddr,
+                                __be32 saddr,
+                                __be16 udp_dst_port,
+                                __be32 vx_vni)
+{
+       int encap_size = VXLAN_HLEN + sizeof(struct iphdr) + ETH_HLEN;
+       struct ethhdr *eth = (struct ethhdr *)buf;
+       struct iphdr  *ip = (struct iphdr *)((char *)eth + sizeof(struct ethhdr));
+       struct udphdr *udp = (struct udphdr *)((char *)ip + sizeof(struct iphdr));
+       struct vxlanhdr *vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr));
+
+       memset(buf, 0, encap_size);
+
+       ether_addr_copy(eth->h_dest, h_dest);
+       ether_addr_copy(eth->h_source, out_dev->dev_addr);
+       eth->h_proto = htons(ETH_P_IP);
+
+       ip->daddr = daddr;
+       ip->saddr = saddr;
+
+       ip->ttl = ttl;
+       ip->protocol = IPPROTO_UDP;
+       ip->version = 0x4;
+       ip->ihl = 0x5;
+
+       udp->dest = udp_dst_port;
+       vxh->vx_flags = VXLAN_HF_VNI;
+       vxh->vx_vni = vxlan_vni_field(vx_vni);
+
+       return encap_size;
+}
+
+static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv,
+                                         struct net_device *mirred_dev,
+                                         struct mlx5_encap_entry *e,
+                                         struct net_device **out_dev)
+{
+       int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
+       struct flowi4 fl4 = {};
+       struct neighbour *n;
+       char *encap_header;
+       int encap_size;
+       __be32 saddr;
+       int ttl;
+       int err;
+
+       encap_header = kzalloc(max_encap_size, GFP_KERNEL);
+       if (!encap_header)
+               return -ENOMEM;
+
+       switch (e->tunnel_type) {
+       case MLX5_HEADER_TYPE_VXLAN:
+               fl4.flowi4_proto = IPPROTO_UDP;
+               fl4.fl4_dport = e->tun_info.tp_dst;
+               break;
+       default:
+               err = -EOPNOTSUPP;
+               goto out;
+       }
+       fl4.daddr = e->tun_info.daddr;
+
+       err = mlx5e_route_lookup_ipv4(priv, mirred_dev, out_dev,
+                                     &fl4, &n, &saddr, &ttl);
+       if (err)
+               goto out;
+
+       e->n = n;
+       e->out_dev = *out_dev;
+
+       if (!(n->nud_state & NUD_VALID)) {
+               err = -ENOTSUPP;
+               goto out;
+       }
+
+       neigh_ha_snapshot(e->h_dest, n, *out_dev);
+
+       switch (e->tunnel_type) {
+       case MLX5_HEADER_TYPE_VXLAN:
+               encap_size = gen_vxlan_header_ipv4(*out_dev, encap_header,
+                                                  e->h_dest, ttl,
+                                                  e->tun_info.daddr,
+                                                  saddr, e->tun_info.tp_dst,
+                                                  e->tun_info.tun_id);
+               break;
+       default:
+               err = -EOPNOTSUPP;
+               goto out;
+       }
+
+       err = mlx5_encap_alloc(priv->mdev, e->tunnel_type,
+                              encap_size, encap_header, &e->encap_id);
+out:
+       kfree(encap_header);
+       return err;
+}
+
+static int mlx5e_attach_encap(struct mlx5e_priv *priv,
+                             struct ip_tunnel_info *tun_info,
+                             struct net_device *mirred_dev,
+                             struct mlx5_esw_flow_attr *attr)
+{
+       struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+       unsigned short family = ip_tunnel_info_af(tun_info);
+       struct ip_tunnel_key *key = &tun_info->key;
+       struct mlx5_encap_info info;
+       struct mlx5_encap_entry *e;
+       struct net_device *out_dev;
+       uintptr_t hash_key;
+       bool found = false;
+       int tunnel_type;
+       int err;
+
+       /* udp dst port must be given */
+       if (!memchr_inv(&key->tp_dst, 0, sizeof(key->tp_dst)))
+               return -EOPNOTSUPP;
+
+       if (mlx5e_vxlan_lookup_port(priv, be16_to_cpu(key->tp_dst)) &&
+           MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) {
+               info.tp_dst = key->tp_dst;
+               info.tun_id = tunnel_id_to_key32(key->tun_id);
+               tunnel_type = MLX5_HEADER_TYPE_VXLAN;
+       } else {
+               return -EOPNOTSUPP;
+       }
+
+       switch (family) {
+       case AF_INET:
+               info.daddr = key->u.ipv4.dst;
+               break;
+       default:
+               return -EOPNOTSUPP;
+       }
+
+       hash_key = hash_encap_info(&info);
+
+       hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
+                                  encap_hlist, hash_key) {
+               if (!cmp_encap_info(&e->tun_info, &info)) {
+                       found = true;
+                       break;
+               }
+       }
+
+       if (found) {
+               attr->encap = e;
+               return 0;
+       }
+
+       e = kzalloc(sizeof(*e), GFP_KERNEL);
+       if (!e)
+               return -ENOMEM;
+
+       e->tun_info = info;
+       e->tunnel_type = tunnel_type;
+       INIT_LIST_HEAD(&e->flows);
+
+       err = mlx5e_create_encap_header_ipv4(priv, mirred_dev, e, &out_dev);
+       if (err)
+               goto out_err;
+
+       attr->encap = e;
+       hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
+
+       return err;
+
+out_err:
+       kfree(e);
+       return err;
+}
+
 static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
-                               struct mlx5_esw_flow_attr *attr)
+                               struct mlx5e_tc_flow *flow)
 {
+       struct mlx5_esw_flow_attr *attr = flow->attr;
+       struct ip_tunnel_info *info = NULL;
        const struct tc_action *a;
        LIST_HEAD(actions);
+       bool encap = false;
+       int err;
 
        if (tc_no_actions(exts))
                return -EINVAL;
@@ -411,15 +869,37 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
 
                        out_dev = __dev_get_by_index(dev_net(priv->netdev), ifindex);
 
-                       if (!switchdev_port_same_parent_id(priv->netdev, out_dev)) {
+                       if (switchdev_port_same_parent_id(priv->netdev,
+                                                         out_dev)) {
+                               attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+                                       MLX5_FLOW_CONTEXT_ACTION_COUNT;
+                               out_priv = netdev_priv(out_dev);
+                               attr->out_rep = out_priv->ppriv;
+                       } else if (encap) {
+                               err = mlx5e_attach_encap(priv, info,
+                                                        out_dev, attr);
+                               if (err)
+                                       return err;
+                               list_add(&flow->encap, &attr->encap->flows);
+                               attr->action |= MLX5_FLOW_CONTEXT_ACTION_ENCAP |
+                                       MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+                                       MLX5_FLOW_CONTEXT_ACTION_COUNT;
+                               out_priv = netdev_priv(attr->encap->out_dev);
+                               attr->out_rep = out_priv->ppriv;
+                       } else {
                                pr_err("devices %s %s not on same switch HW, can't offload forwarding\n",
                                       priv->netdev->name, out_dev->name);
                                return -EINVAL;
                        }
+                       continue;
+               }
 
-                       attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
-                       out_priv = netdev_priv(out_dev);
-                       attr->out_rep = out_priv->ppriv;
+               if (is_tcf_tunnel_set(a)) {
+                       info = tcf_tunnel_info(a);
+                       if (info)
+                               encap = true;
+                       else
+                               return -EOPNOTSUPP;
                        continue;
                }
 
@@ -436,6 +916,11 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
                        continue;
                }
 
+               if (is_tcf_tunnel_release(a)) {
+                       attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
+                       continue;
+               }
+
                return -EINVAL;
        }
        return 0;
@@ -450,25 +935,17 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol,
        u32 flow_tag, action;
        struct mlx5e_tc_flow *flow;
        struct mlx5_flow_spec *spec;
-       struct mlx5_flow_rule *old = NULL;
-       struct mlx5_esw_flow_attr *old_attr = NULL;
        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 
        if (esw && esw->mode == SRIOV_OFFLOADS)
                fdb_flow = true;
 
-       flow = rhashtable_lookup_fast(&tc->ht, &f->cookie,
-                                     tc->ht_params);
-       if (flow) {
-               old = flow->rule;
-               old_attr = flow->attr;
-       } else {
-               if (fdb_flow)
-                       flow = kzalloc(sizeof(*flow) + sizeof(struct mlx5_esw_flow_attr),
-                                      GFP_KERNEL);
-               else
-                       flow = kzalloc(sizeof(*flow), GFP_KERNEL);
-       }
+       if (fdb_flow)
+               flow = kzalloc(sizeof(*flow) +
+                              sizeof(struct mlx5_esw_flow_attr),
+                              GFP_KERNEL);
+       else
+               flow = kzalloc(sizeof(*flow), GFP_KERNEL);
 
        spec = mlx5_vzalloc(sizeof(*spec));
        if (!spec || !flow) {
@@ -484,7 +961,7 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol,
 
        if (fdb_flow) {
                flow->attr  = (struct mlx5_esw_flow_attr *)(flow + 1);
-               err = parse_tc_fdb_actions(priv, f->exts, flow->attr);
+               err = parse_tc_fdb_actions(priv, f->exts, flow);
                if (err < 0)
                        goto err_free;
                flow->rule = mlx5e_tc_add_fdb_flow(priv, spec, flow->attr);
@@ -505,17 +982,13 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol,
        if (err)
                goto err_del_rule;
 
-       if (old)
-               mlx5e_tc_del_flow(priv, old, old_attr);
-
        goto out;
 
 err_del_rule:
-       mlx5_del_flow_rule(flow->rule);
+       mlx5_del_flow_rules(flow->rule);
 
 err_free:
-       if (!old)
-               kfree(flow);
+       kfree(flow);
 out:
        kvfree(spec);
        return err;
@@ -534,7 +1007,8 @@ int mlx5e_delete_flower(struct mlx5e_priv *priv,
 
        rhashtable_remove_fast(&tc->ht, &flow->node, tc->ht_params);
 
-       mlx5e_tc_del_flow(priv, flow->rule, flow->attr);
+       mlx5e_tc_del_flow(priv, flow);
+
 
        kfree(flow);
 
@@ -591,7 +1065,7 @@ static void _mlx5e_tc_del_flow(void *ptr, void *arg)
        struct mlx5e_tc_flow *flow = ptr;
        struct mlx5e_priv *priv = arg;
 
-       mlx5e_tc_del_flow(priv, flow->rule, flow->attr);
+       mlx5e_tc_del_flow(priv, flow);
        kfree(flow);
 }
 
index aaca09002ca678c9794cb01ba2443f51578ecc29..8ffcc8808e50015c93296b38d0c78037a484d398 100644 (file)
@@ -139,6 +139,8 @@ static const char *eqe_type_str(u8 type)
                return "MLX5_EVENT_TYPE_PORT_CHANGE";
        case MLX5_EVENT_TYPE_GPIO_EVENT:
                return "MLX5_EVENT_TYPE_GPIO_EVENT";
+       case MLX5_EVENT_TYPE_PORT_MODULE_EVENT:
+               return "MLX5_EVENT_TYPE_PORT_MODULE_EVENT";
        case MLX5_EVENT_TYPE_REMOTE_CONFIG:
                return "MLX5_EVENT_TYPE_REMOTE_CONFIG";
        case MLX5_EVENT_TYPE_DB_BF_CONGESTION:
@@ -285,6 +287,11 @@ static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
                        mlx5_eswitch_vport_event(dev->priv.eswitch, eqe);
                        break;
 #endif
+
+               case MLX5_EVENT_TYPE_PORT_MODULE_EVENT:
+                       mlx5_port_module_event(dev, eqe);
+                       break;
+
                default:
                        mlx5_core_warn(dev, "Unhandled event 0x%x on EQ 0x%x\n",
                                       eqe->type, eq->eqn);
@@ -469,7 +476,7 @@ void mlx5_eq_cleanup(struct mlx5_core_dev *dev)
 int mlx5_start_eqs(struct mlx5_core_dev *dev)
 {
        struct mlx5_eq_table *table = &dev->priv.eq_table;
-       u32 async_event_mask = MLX5_ASYNC_EVENT_MASK;
+       u64 async_event_mask = MLX5_ASYNC_EVENT_MASK;
        int err;
 
        if (MLX5_CAP_GEN(dev, pg))
@@ -480,6 +487,11 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev)
            mlx5_core_is_pf(dev))
                async_event_mask |= (1ull << MLX5_EVENT_TYPE_NIC_VPORT_CHANGE);
 
+       if (MLX5_CAP_GEN(dev, port_module_event))
+               async_event_mask |= (1ull << MLX5_EVENT_TYPE_PORT_MODULE_EVENT);
+       else
+               mlx5_core_dbg(dev, "port_module_event is not set\n");
+
        err = mlx5_create_map_eq(dev, &table->cmd_eq, MLX5_EQ_VEC_CMD,
                                 MLX5_NUM_CMD_EQE, 1ull << MLX5_EVENT_TYPE_CMD,
                                 "mlx5_cmd_eq", &dev->priv.uuari.uars[0]);
index abbf2c369923d02534b1ebfa82e212bcefce0333..d6807c3cc461f001aa01072e395adf2e93095bfd 100644 (file)
@@ -56,7 +56,7 @@ struct esw_uc_addr {
 /* E-Switch MC FDB table hash node */
 struct esw_mc_addr { /* SRIOV only */
        struct l2addr_node     node;
-       struct mlx5_flow_rule *uplink_rule; /* Forward to uplink rule */
+       struct mlx5_flow_handle *uplink_rule; /* Forward to uplink rule */
        u32                    refcnt;
 };
 
@@ -65,7 +65,7 @@ struct vport_addr {
        struct l2addr_node     node;
        u8                     action;
        u32                    vport;
-       struct mlx5_flow_rule *flow_rule; /* SRIOV only */
+       struct mlx5_flow_handle *flow_rule; /* SRIOV only */
        /* A flag indicating that mac was added due to mc promiscuous vport */
        bool mc_promisc;
 };
@@ -237,13 +237,14 @@ static void del_l2_table_entry(struct mlx5_core_dev *dev, u32 index)
 }
 
 /* E-Switch FDB */
-static struct mlx5_flow_rule *
+static struct mlx5_flow_handle *
 __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule,
                         u8 mac_c[ETH_ALEN], u8 mac_v[ETH_ALEN])
 {
        int match_header = (is_zero_ether_addr(mac_c) ? 0 :
                            MLX5_MATCH_OUTER_HEADERS);
-       struct mlx5_flow_rule *flow_rule = NULL;
+       struct mlx5_flow_handle *flow_rule = NULL;
+       struct mlx5_flow_act flow_act = {0};
        struct mlx5_flow_destination dest;
        struct mlx5_flow_spec *spec;
        void *mv_misc = NULL;
@@ -285,10 +286,10 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule,
                  "\tFDB add rule dmac_v(%pM) dmac_c(%pM) -> vport(%d)\n",
                  dmac_v, dmac_c, vport);
        spec->match_criteria_enable = match_header;
+       flow_act.action =  MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
        flow_rule =
-               mlx5_add_flow_rule(esw->fdb_table.fdb, spec,
-                                  MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
-                                  0, &dest);
+               mlx5_add_flow_rules(esw->fdb_table.fdb, spec,
+                                   &flow_act, &dest, 1);
        if (IS_ERR(flow_rule)) {
                esw_warn(esw->dev,
                         "FDB: Failed to add flow rule: dmac_v(%pM) dmac_c(%pM) -> vport(%d), err(%ld)\n",
@@ -300,7 +301,7 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule,
        return flow_rule;
 }
 
-static struct mlx5_flow_rule *
+static struct mlx5_flow_handle *
 esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u8 mac[ETH_ALEN], u32 vport)
 {
        u8 mac_c[ETH_ALEN];
@@ -309,7 +310,7 @@ esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u8 mac[ETH_ALEN], u32 vport)
        return __esw_fdb_set_vport_rule(esw, vport, false, mac_c, mac);
 }
 
-static struct mlx5_flow_rule *
+static struct mlx5_flow_handle *
 esw_fdb_set_vport_allmulti_rule(struct mlx5_eswitch *esw, u32 vport)
 {
        u8 mac_c[ETH_ALEN];
@@ -322,7 +323,7 @@ esw_fdb_set_vport_allmulti_rule(struct mlx5_eswitch *esw, u32 vport)
        return __esw_fdb_set_vport_rule(esw, vport, false, mac_c, mac_v);
 }
 
-static struct mlx5_flow_rule *
+static struct mlx5_flow_handle *
 esw_fdb_set_vport_promisc_rule(struct mlx5_eswitch *esw, u32 vport)
 {
        u8 mac_c[ETH_ALEN];
@@ -361,7 +362,7 @@ static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw, int nvports)
        memset(flow_group_in, 0, inlen);
 
        table_size = BIT(MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size));
-       fdb = mlx5_create_flow_table(root_ns, 0, table_size, 0);
+       fdb = mlx5_create_flow_table(root_ns, 0, table_size, 0, 0);
        if (IS_ERR(fdb)) {
                err = PTR_ERR(fdb);
                esw_warn(dev, "Failed to create FDB Table err %d\n", err);
@@ -515,7 +516,7 @@ static int esw_del_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr)
        del_l2_table_entry(esw->dev, esw_uc->table_index);
 
        if (vaddr->flow_rule)
-               mlx5_del_flow_rule(vaddr->flow_rule);
+               mlx5_del_flow_rules(vaddr->flow_rule);
        vaddr->flow_rule = NULL;
 
        l2addr_hash_del(esw_uc);
@@ -562,7 +563,7 @@ static void update_allmulti_vports(struct mlx5_eswitch *esw,
                case MLX5_ACTION_DEL:
                        if (!iter_vaddr)
                                continue;
-                       mlx5_del_flow_rule(iter_vaddr->flow_rule);
+                       mlx5_del_flow_rules(iter_vaddr->flow_rule);
                        l2addr_hash_del(iter_vaddr);
                        break;
                }
@@ -632,7 +633,7 @@ static int esw_del_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr)
                  esw_mc->uplink_rule);
 
        if (vaddr->flow_rule)
-               mlx5_del_flow_rule(vaddr->flow_rule);
+               mlx5_del_flow_rules(vaddr->flow_rule);
        vaddr->flow_rule = NULL;
 
        /* If the multicast mac is added as a result of mc promiscuous vport,
@@ -645,7 +646,7 @@ static int esw_del_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr)
        update_allmulti_vports(esw, vaddr, esw_mc);
 
        if (esw_mc->uplink_rule)
-               mlx5_del_flow_rule(esw_mc->uplink_rule);
+               mlx5_del_flow_rules(esw_mc->uplink_rule);
 
        l2addr_hash_del(esw_mc);
        return 0;
@@ -828,14 +829,14 @@ static void esw_apply_vport_rx_mode(struct mlx5_eswitch *esw, u32 vport_num,
                                                                UPLINK_VPORT);
                allmulti_addr->refcnt++;
        } else if (vport->allmulti_rule) {
-               mlx5_del_flow_rule(vport->allmulti_rule);
+               mlx5_del_flow_rules(vport->allmulti_rule);
                vport->allmulti_rule = NULL;
 
                if (--allmulti_addr->refcnt > 0)
                        goto promisc;
 
                if (allmulti_addr->uplink_rule)
-                       mlx5_del_flow_rule(allmulti_addr->uplink_rule);
+                       mlx5_del_flow_rules(allmulti_addr->uplink_rule);
                allmulti_addr->uplink_rule = NULL;
        }
 
@@ -847,7 +848,7 @@ promisc:
                vport->promisc_rule = esw_fdb_set_vport_promisc_rule(esw,
                                                                     vport_num);
        } else if (vport->promisc_rule) {
-               mlx5_del_flow_rule(vport->promisc_rule);
+               mlx5_del_flow_rules(vport->promisc_rule);
                vport->promisc_rule = NULL;
        }
 }
@@ -931,8 +932,8 @@ static void esw_vport_change_handler(struct work_struct *work)
        mutex_unlock(&esw->state_lock);
 }
 
-static void esw_vport_enable_egress_acl(struct mlx5_eswitch *esw,
-                                       struct mlx5_vport *vport)
+static int esw_vport_enable_egress_acl(struct mlx5_eswitch *esw,
+                                      struct mlx5_vport *vport)
 {
        int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
        struct mlx5_flow_group *vlan_grp = NULL;
@@ -949,9 +950,11 @@ static void esw_vport_enable_egress_acl(struct mlx5_eswitch *esw,
        int table_size = 2;
        int err = 0;
 
-       if (!MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support) ||
-           !IS_ERR_OR_NULL(vport->egress.acl))
-               return;
+       if (!MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support))
+               return -EOPNOTSUPP;
+
+       if (!IS_ERR_OR_NULL(vport->egress.acl))
+               return 0;
 
        esw_debug(dev, "Create vport[%d] egress ACL log_max_size(%d)\n",
                  vport->vport, MLX5_CAP_ESW_EGRESS_ACL(dev, log_max_ft_size));
@@ -959,12 +962,12 @@ static void esw_vport_enable_egress_acl(struct mlx5_eswitch *esw,
        root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_ESW_EGRESS);
        if (!root_ns) {
                esw_warn(dev, "Failed to get E-Switch egress flow namespace\n");
-               return;
+               return -EIO;
        }
 
        flow_group_in = mlx5_vzalloc(inlen);
        if (!flow_group_in)
-               return;
+               return -ENOMEM;
 
        acl = mlx5_create_vport_flow_table(root_ns, 0, table_size, 0, vport->vport);
        if (IS_ERR(acl)) {
@@ -1009,16 +1012,17 @@ out:
                mlx5_destroy_flow_group(vlan_grp);
        if (err && !IS_ERR_OR_NULL(acl))
                mlx5_destroy_flow_table(acl);
+       return err;
 }
 
 static void esw_vport_cleanup_egress_rules(struct mlx5_eswitch *esw,
                                           struct mlx5_vport *vport)
 {
        if (!IS_ERR_OR_NULL(vport->egress.allowed_vlan))
-               mlx5_del_flow_rule(vport->egress.allowed_vlan);
+               mlx5_del_flow_rules(vport->egress.allowed_vlan);
 
        if (!IS_ERR_OR_NULL(vport->egress.drop_rule))
-               mlx5_del_flow_rule(vport->egress.drop_rule);
+               mlx5_del_flow_rules(vport->egress.drop_rule);
 
        vport->egress.allowed_vlan = NULL;
        vport->egress.drop_rule = NULL;
@@ -1041,8 +1045,8 @@ static void esw_vport_disable_egress_acl(struct mlx5_eswitch *esw,
        vport->egress.acl = NULL;
 }
 
-static void esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw,
-                                        struct mlx5_vport *vport)
+static int esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw,
+                                       struct mlx5_vport *vport)
 {
        int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
        struct mlx5_core_dev *dev = esw->dev;
@@ -1063,9 +1067,11 @@ static void esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw,
        int table_size = 4;
        int err = 0;
 
-       if (!MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support) ||
-           !IS_ERR_OR_NULL(vport->ingress.acl))
-               return;
+       if (!MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support))
+               return -EOPNOTSUPP;
+
+       if (!IS_ERR_OR_NULL(vport->ingress.acl))
+               return 0;
 
        esw_debug(dev, "Create vport[%d] ingress ACL log_max_size(%d)\n",
                  vport->vport, MLX5_CAP_ESW_INGRESS_ACL(dev, log_max_ft_size));
@@ -1073,12 +1079,12 @@ static void esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw,
        root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS);
        if (!root_ns) {
                esw_warn(dev, "Failed to get E-Switch ingress flow namespace\n");
-               return;
+               return -EIO;
        }
 
        flow_group_in = mlx5_vzalloc(inlen);
        if (!flow_group_in)
-               return;
+               return -ENOMEM;
 
        acl = mlx5_create_vport_flow_table(root_ns, 0, table_size, 0, vport->vport);
        if (IS_ERR(acl)) {
@@ -1167,16 +1173,17 @@ out:
        }
 
        kvfree(flow_group_in);
+       return err;
 }
 
 static void esw_vport_cleanup_ingress_rules(struct mlx5_eswitch *esw,
                                            struct mlx5_vport *vport)
 {
        if (!IS_ERR_OR_NULL(vport->ingress.drop_rule))
-               mlx5_del_flow_rule(vport->ingress.drop_rule);
+               mlx5_del_flow_rules(vport->ingress.drop_rule);
 
        if (!IS_ERR_OR_NULL(vport->ingress.allow_rule))
-               mlx5_del_flow_rule(vport->ingress.allow_rule);
+               mlx5_del_flow_rules(vport->ingress.allow_rule);
 
        vport->ingress.drop_rule = NULL;
        vport->ingress.allow_rule = NULL;
@@ -1206,6 +1213,7 @@ static void esw_vport_disable_ingress_acl(struct mlx5_eswitch *esw,
 static int esw_vport_ingress_config(struct mlx5_eswitch *esw,
                                    struct mlx5_vport *vport)
 {
+       struct mlx5_flow_act flow_act = {0};
        struct mlx5_flow_spec *spec;
        int err = 0;
        u8 *smac_v;
@@ -1225,7 +1233,13 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw,
                return 0;
        }
 
-       esw_vport_enable_ingress_acl(esw, vport);
+       err = esw_vport_enable_ingress_acl(esw, vport);
+       if (err) {
+               mlx5_core_warn(esw->dev,
+                              "failed to enable ingress acl (%d) on vport[%d]\n",
+                              err, vport->vport);
+               return err;
+       }
 
        esw_debug(esw->dev,
                  "vport[%d] configure ingress rules, vlan(%d) qos(%d)\n",
@@ -1252,10 +1266,10 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw,
        }
 
        spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+       flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW;
        vport->ingress.allow_rule =
-               mlx5_add_flow_rule(vport->ingress.acl, spec,
-                                  MLX5_FLOW_CONTEXT_ACTION_ALLOW,
-                                  0, NULL);
+               mlx5_add_flow_rules(vport->ingress.acl, spec,
+                                   &flow_act, NULL, 0);
        if (IS_ERR(vport->ingress.allow_rule)) {
                err = PTR_ERR(vport->ingress.allow_rule);
                esw_warn(esw->dev,
@@ -1266,10 +1280,10 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw,
        }
 
        memset(spec, 0, sizeof(*spec));
+       flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
        vport->ingress.drop_rule =
-               mlx5_add_flow_rule(vport->ingress.acl, spec,
-                                  MLX5_FLOW_CONTEXT_ACTION_DROP,
-                                  0, NULL);
+               mlx5_add_flow_rules(vport->ingress.acl, spec,
+                                   &flow_act, NULL, 0);
        if (IS_ERR(vport->ingress.drop_rule)) {
                err = PTR_ERR(vport->ingress.drop_rule);
                esw_warn(esw->dev,
@@ -1289,6 +1303,7 @@ out:
 static int esw_vport_egress_config(struct mlx5_eswitch *esw,
                                   struct mlx5_vport *vport)
 {
+       struct mlx5_flow_act flow_act = {0};
        struct mlx5_flow_spec *spec;
        int err = 0;
 
@@ -1299,7 +1314,13 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw,
                return 0;
        }
 
-       esw_vport_enable_egress_acl(esw, vport);
+       err = esw_vport_enable_egress_acl(esw, vport);
+       if (err) {
+               mlx5_core_warn(esw->dev,
+                              "failed to enable egress acl (%d) on vport[%d]\n",
+                              err, vport->vport);
+               return err;
+       }
 
        esw_debug(esw->dev,
                  "vport[%d] configure egress rules, vlan(%d) qos(%d)\n",
@@ -1320,10 +1341,10 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw,
        MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid, vport->info.vlan);
 
        spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+       flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW;
        vport->egress.allowed_vlan =
-               mlx5_add_flow_rule(vport->egress.acl, spec,
-                                  MLX5_FLOW_CONTEXT_ACTION_ALLOW,
-                                  0, NULL);
+               mlx5_add_flow_rules(vport->egress.acl, spec,
+                                   &flow_act, NULL, 0);
        if (IS_ERR(vport->egress.allowed_vlan)) {
                err = PTR_ERR(vport->egress.allowed_vlan);
                esw_warn(esw->dev,
@@ -1335,10 +1356,10 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw,
 
        /* Drop others rule (star rule) */
        memset(spec, 0, sizeof(*spec));
+       flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
        vport->egress.drop_rule =
-               mlx5_add_flow_rule(vport->egress.acl, spec,
-                                  MLX5_FLOW_CONTEXT_ACTION_DROP,
-                                  0, NULL);
+               mlx5_add_flow_rules(vport->egress.acl, spec,
+                                   &flow_act, NULL, 0);
        if (IS_ERR(vport->egress.drop_rule)) {
                err = PTR_ERR(vport->egress.drop_rule);
                esw_warn(esw->dev,
@@ -1351,6 +1372,147 @@ out:
        return err;
 }
 
+/* Vport QoS management */
+static int esw_create_tsar(struct mlx5_eswitch *esw)
+{
+       u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0};
+       struct mlx5_core_dev *dev = esw->dev;
+       int err;
+
+       if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
+               return 0;
+
+       if (esw->qos.enabled)
+               return -EEXIST;
+
+       err = mlx5_create_scheduling_element_cmd(dev,
+                                                SCHEDULING_HIERARCHY_E_SWITCH,
+                                                &tsar_ctx,
+                                                &esw->qos.root_tsar_id);
+       if (err) {
+               esw_warn(esw->dev, "E-Switch create TSAR failed (%d)\n", err);
+               return err;
+       }
+
+       esw->qos.enabled = true;
+       return 0;
+}
+
+static void esw_destroy_tsar(struct mlx5_eswitch *esw)
+{
+       int err;
+
+       if (!esw->qos.enabled)
+               return;
+
+       err = mlx5_destroy_scheduling_element_cmd(esw->dev,
+                                                 SCHEDULING_HIERARCHY_E_SWITCH,
+                                                 esw->qos.root_tsar_id);
+       if (err)
+               esw_warn(esw->dev, "E-Switch destroy TSAR failed (%d)\n", err);
+
+       esw->qos.enabled = false;
+}
+
+static int esw_vport_enable_qos(struct mlx5_eswitch *esw, int vport_num,
+                               u32 initial_max_rate)
+{
+       u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0};
+       struct mlx5_vport *vport = &esw->vports[vport_num];
+       struct mlx5_core_dev *dev = esw->dev;
+       void *vport_elem;
+       int err = 0;
+
+       if (!esw->qos.enabled || !MLX5_CAP_GEN(dev, qos) ||
+           !MLX5_CAP_QOS(dev, esw_scheduling))
+               return 0;
+
+       if (vport->qos.enabled)
+               return -EEXIST;
+
+       MLX5_SET(scheduling_context, &sched_ctx, element_type,
+                SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
+       vport_elem = MLX5_ADDR_OF(scheduling_context, &sched_ctx,
+                                 element_attributes);
+       MLX5_SET(vport_element, vport_elem, vport_number, vport_num);
+       MLX5_SET(scheduling_context, &sched_ctx, parent_element_id,
+                esw->qos.root_tsar_id);
+       MLX5_SET(scheduling_context, &sched_ctx, max_average_bw,
+                initial_max_rate);
+
+       err = mlx5_create_scheduling_element_cmd(dev,
+                                                SCHEDULING_HIERARCHY_E_SWITCH,
+                                                &sched_ctx,
+                                                &vport->qos.esw_tsar_ix);
+       if (err) {
+               esw_warn(esw->dev, "E-Switch create TSAR vport element failed (vport=%d,err=%d)\n",
+                        vport_num, err);
+               return err;
+       }
+
+       vport->qos.enabled = true;
+       return 0;
+}
+
+static void esw_vport_disable_qos(struct mlx5_eswitch *esw, int vport_num)
+{
+       struct mlx5_vport *vport = &esw->vports[vport_num];
+       int err = 0;
+
+       if (!vport->qos.enabled)
+               return;
+
+       err = mlx5_destroy_scheduling_element_cmd(esw->dev,
+                                                 SCHEDULING_HIERARCHY_E_SWITCH,
+                                                 vport->qos.esw_tsar_ix);
+       if (err)
+               esw_warn(esw->dev, "E-Switch destroy TSAR vport element failed (vport=%d,err=%d)\n",
+                        vport_num, err);
+
+       vport->qos.enabled = false;
+}
+
+static int esw_vport_qos_config(struct mlx5_eswitch *esw, int vport_num,
+                               u32 max_rate)
+{
+       u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0};
+       struct mlx5_vport *vport = &esw->vports[vport_num];
+       struct mlx5_core_dev *dev = esw->dev;
+       void *vport_elem;
+       u32 bitmask = 0;
+       int err = 0;
+
+       if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
+               return -EOPNOTSUPP;
+
+       if (!vport->qos.enabled)
+               return -EIO;
+
+       MLX5_SET(scheduling_context, &sched_ctx, element_type,
+                SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
+       vport_elem = MLX5_ADDR_OF(scheduling_context, &sched_ctx,
+                                 element_attributes);
+       MLX5_SET(vport_element, vport_elem, vport_number, vport_num);
+       MLX5_SET(scheduling_context, &sched_ctx, parent_element_id,
+                esw->qos.root_tsar_id);
+       MLX5_SET(scheduling_context, &sched_ctx, max_average_bw,
+                max_rate);
+       bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW;
+
+       err = mlx5_modify_scheduling_element_cmd(dev,
+                                                SCHEDULING_HIERARCHY_E_SWITCH,
+                                                &sched_ctx,
+                                                vport->qos.esw_tsar_ix,
+                                                bitmask);
+       if (err) {
+               esw_warn(esw->dev, "E-Switch modify TSAR vport element failed (vport=%d,err=%d)\n",
+                        vport_num, err);
+               return err;
+       }
+
+       return 0;
+}
+
 static void node_guid_gen_from_mac(u64 *node_guid, u8 mac[ETH_ALEN])
 {
        ((u8 *)node_guid)[7] = mac[0];
@@ -1386,6 +1548,7 @@ static void esw_apply_vport_conf(struct mlx5_eswitch *esw,
                esw_vport_egress_config(esw, vport);
        }
 }
+
 static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num,
                             int enable_events)
 {
@@ -1399,6 +1562,10 @@ static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num,
        /* Restore old vport configuration */
        esw_apply_vport_conf(esw, vport);
 
+       /* Attach vport to the eswitch rate limiter */
+       if (esw_vport_enable_qos(esw, vport_num, vport->info.max_rate))
+               esw_warn(esw->dev, "Failed to attach vport %d to eswitch rate limiter", vport_num);
+
        /* Sync with current vport context */
        vport->enabled_events = enable_events;
        vport->enabled = true;
@@ -1437,7 +1604,7 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num)
         */
        esw_vport_change_handle_locked(vport);
        vport->enabled_events = 0;
-
+       esw_vport_disable_qos(esw, vport_num);
        if (vport_num && esw->mode == SRIOV_LEGACY) {
                mlx5_modify_vport_admin_state(esw->dev,
                                              MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT,
@@ -1483,6 +1650,10 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
        if (err)
                goto abort;
 
+       err = esw_create_tsar(esw);
+       if (err)
+               esw_warn(esw->dev, "Failed to create eswitch TSAR");
+
        enabled_events = (mode == SRIOV_LEGACY) ? SRIOV_VPORT_EVENTS : UC_ADDR_CHANGE;
        for (i = 0; i <= nvfs; i++)
                esw_enable_vport(esw, i, enabled_events);
@@ -1517,7 +1688,9 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
                esw_disable_vport(esw, i);
 
        if (mc_promisc && mc_promisc->uplink_rule)
-               mlx5_del_flow_rule(mc_promisc->uplink_rule);
+               mlx5_del_flow_rules(mc_promisc->uplink_rule);
+
+       esw_destroy_tsar(esw);
 
        if (esw->mode == SRIOV_LEGACY)
                esw_destroy_legacy_fdb_table(esw);
@@ -1609,6 +1782,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
                goto abort;
        }
 
+       hash_init(esw->offloads.encap_tbl);
        mutex_init(&esw->state_lock);
 
        for (vport_num = 0; vport_num < total_vports; vport_num++) {
@@ -1624,6 +1798,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
        esw->total_vports = total_vports;
        esw->enabled_vports = 0;
        esw->mode = SRIOV_NONE;
+       esw->offloads.inline_mode = MLX5_INLINE_MODE_NONE;
 
        dev->priv.eswitch = esw;
        return 0;
@@ -1777,6 +1952,7 @@ int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw,
        ivi->qos = evport->info.qos;
        ivi->spoofchk = evport->info.spoofchk;
        ivi->trusted = evport->info.trusted;
+       ivi->max_tx_rate = evport->info.max_rate;
        mutex_unlock(&esw->state_lock);
 
        return 0;
@@ -1870,6 +2046,27 @@ int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw,
        return 0;
 }
 
+int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw,
+                               int vport, u32 max_rate)
+{
+       struct mlx5_vport *evport;
+       int err = 0;
+
+       if (!ESW_ALLOWED(esw))
+               return -EPERM;
+       if (!LEGAL_VPORT(esw, vport))
+               return -EINVAL;
+
+       mutex_lock(&esw->state_lock);
+       evport = &esw->vports[vport];
+       err = esw_vport_qos_config(esw, vport, max_rate);
+       if (!err)
+               evport->info.max_rate = max_rate;
+
+       mutex_unlock(&esw->state_lock);
+       return err;
+}
+
 int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
                                 int vport,
                                 struct ifla_vf_stats *vf_stats)
index 2e2938e08cdae9838efc7d0f2f48f926e7c21575..8661dd3f542c4cda5d875720eeb8c1042d5621da 100644 (file)
@@ -97,16 +97,16 @@ struct vport_ingress {
        struct mlx5_flow_group *allow_spoofchk_only_grp;
        struct mlx5_flow_group *allow_untagged_only_grp;
        struct mlx5_flow_group *drop_grp;
-       struct mlx5_flow_rule  *allow_rule;
-       struct mlx5_flow_rule  *drop_rule;
+       struct mlx5_flow_handle  *allow_rule;
+       struct mlx5_flow_handle  *drop_rule;
 };
 
 struct vport_egress {
        struct mlx5_flow_table *acl;
        struct mlx5_flow_group *allowed_vlans_grp;
        struct mlx5_flow_group *drop_grp;
-       struct mlx5_flow_rule  *allowed_vlan;
-       struct mlx5_flow_rule  *drop_rule;
+       struct mlx5_flow_handle  *allowed_vlan;
+       struct mlx5_flow_handle  *drop_rule;
 };
 
 struct mlx5_vport_info {
@@ -115,6 +115,7 @@ struct mlx5_vport_info {
        u8                      qos;
        u64                     node_guid;
        int                     link_state;
+       u32                     max_rate;
        bool                    spoofchk;
        bool                    trusted;
 };
@@ -124,8 +125,8 @@ struct mlx5_vport {
        int                     vport;
        struct hlist_head       uc_list[MLX5_L2_ADDR_HASH_SIZE];
        struct hlist_head       mc_list[MLX5_L2_ADDR_HASH_SIZE];
-       struct mlx5_flow_rule   *promisc_rule;
-       struct mlx5_flow_rule   *allmulti_rule;
+       struct mlx5_flow_handle *promisc_rule;
+       struct mlx5_flow_handle *allmulti_rule;
        struct work_struct      vport_change_handler;
 
        struct vport_ingress    ingress;
@@ -133,6 +134,11 @@ struct mlx5_vport {
 
        struct mlx5_vport_info  info;
 
+       struct {
+               bool            enabled;
+               u32             esw_tsar_ix;
+       } qos;
+
        bool                    enabled;
        u16                     enabled_events;
 };
@@ -156,7 +162,7 @@ struct mlx5_eswitch_fdb {
                        struct mlx5_flow_table *fdb;
                        struct mlx5_flow_group *send_to_vport_grp;
                        struct mlx5_flow_group *miss_grp;
-                       struct mlx5_flow_rule  *miss_rule;
+                       struct mlx5_flow_handle *miss_rule;
                        int vlan_push_pop_refcount;
                } offloads;
        };
@@ -169,7 +175,7 @@ enum {
 };
 
 struct mlx5_esw_sq {
-       struct mlx5_flow_rule   *send_to_vport_rule;
+       struct mlx5_flow_handle *send_to_vport_rule;
        struct list_head         list;
 };
 
@@ -180,9 +186,9 @@ struct mlx5_eswitch_rep {
                                         struct mlx5_eswitch_rep *rep);
        u16                    vport;
        u8                     hw_id[ETH_ALEN];
-       void                  *priv_data;
+       struct net_device      *netdev;
 
-       struct mlx5_flow_rule *vport_rx_rule;
+       struct mlx5_flow_handle *vport_rx_rule;
        struct list_head       vport_sqs_list;
        u16                    vlan;
        u32                    vlan_refcount;
@@ -193,6 +199,8 @@ struct mlx5_esw_offload {
        struct mlx5_flow_table *ft_offloads;
        struct mlx5_flow_group *vport_rx_group;
        struct mlx5_eswitch_rep *vport_reps;
+       DECLARE_HASHTABLE(encap_tbl, 8);
+       u8 inline_mode;
 };
 
 struct mlx5_eswitch {
@@ -209,6 +217,12 @@ struct mlx5_eswitch {
         */
        struct mutex            state_lock;
        struct esw_mc_addr      *mc_promisc;
+
+       struct {
+               bool            enabled;
+               u32             root_tsar_id;
+       } qos;
+
        struct mlx5_esw_offload offloads;
        int                     mode;
 };
@@ -234,6 +248,8 @@ int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw,
                                    int vport, bool spoofchk);
 int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw,
                                 int vport_num, bool setting);
+int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw,
+                               int vport, u32 max_rate);
 int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw,
                                  int vport, struct ifla_vf_info *ivi);
 int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
@@ -243,11 +259,11 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
 struct mlx5_flow_spec;
 struct mlx5_esw_flow_attr;
 
-struct mlx5_flow_rule *
+struct mlx5_flow_handle *
 mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
                                struct mlx5_flow_spec *spec,
                                struct mlx5_esw_flow_attr *attr);
-struct mlx5_flow_rule *
+struct mlx5_flow_handle *
 mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn);
 
 enum {
@@ -258,6 +274,24 @@ enum {
 #define MLX5_FLOW_CONTEXT_ACTION_VLAN_POP  0x40
 #define MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH 0x80
 
+struct mlx5_encap_info {
+       __be32 daddr;
+       __be32 tun_id;
+       __be16 tp_dst;
+};
+
+struct mlx5_encap_entry {
+       struct hlist_node encap_hlist;
+       struct list_head flows;
+       u32 encap_id;
+       struct neighbour *n;
+       struct mlx5_encap_info tun_info;
+       unsigned char h_dest[ETH_ALEN]; /* destination eth addr */
+
+       struct net_device *out_dev;
+       int tunnel_type;
+};
+
 struct mlx5_esw_flow_attr {
        struct mlx5_eswitch_rep *in_rep;
        struct mlx5_eswitch_rep *out_rep;
@@ -265,6 +299,7 @@ struct mlx5_esw_flow_attr {
        int     action;
        u16     vlan;
        bool    vlan_handled;
+       struct mlx5_encap_entry *encap;
 };
 
 int mlx5_eswitch_sqs2vport_start(struct mlx5_eswitch *esw,
@@ -275,11 +310,15 @@ void mlx5_eswitch_sqs2vport_stop(struct mlx5_eswitch *esw,
 
 int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode);
 int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode);
+int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode);
+int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode);
+int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode);
 void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw,
                                     int vport_index,
                                     struct mlx5_eswitch_rep *rep);
 void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw,
                                       int vport_index);
+struct net_device *mlx5_eswitch_get_uplink_netdev(struct mlx5_eswitch *esw);
 
 int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
                                 struct mlx5_esw_flow_attr *attr);
index c55ad8d00c05714710b36b568ed57ab1026bd8cb..466e161010f759e08ab2253326da4f2a0192aa1d 100644 (file)
@@ -43,32 +43,36 @@ enum {
        FDB_SLOW_PATH
 };
 
-struct mlx5_flow_rule *
+struct mlx5_flow_handle *
 mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
                                struct mlx5_flow_spec *spec,
                                struct mlx5_esw_flow_attr *attr)
 {
-       struct mlx5_flow_destination dest = { 0 };
+       struct mlx5_flow_destination dest[2] = {};
+       struct mlx5_flow_act flow_act = {0};
        struct mlx5_fc *counter = NULL;
-       struct mlx5_flow_rule *rule;
+       struct mlx5_flow_handle *rule;
        void *misc;
-       int action;
+       int i = 0;
 
        if (esw->mode != SRIOV_OFFLOADS)
                return ERR_PTR(-EOPNOTSUPP);
 
-       action = attr->action;
+       /* per flow vlan pop/push is emulated, don't set that into the firmware */
+       flow_act.action = attr->action & ~(MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH | MLX5_FLOW_CONTEXT_ACTION_VLAN_POP);
 
-       if (action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
-               dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
-               dest.vport_num = attr->out_rep->vport;
-               action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
-       } else if (action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+       if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
+               dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+               dest[i].vport_num = attr->out_rep->vport;
+               i++;
+       }
+       if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
                counter = mlx5_fc_create(esw->dev, true);
                if (IS_ERR(counter))
                        return ERR_CAST(counter);
-               dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
-               dest.counter = counter;
+               dest[i].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+               dest[i].counter = counter;
+               i++;
        }
 
        misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
@@ -79,10 +83,14 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
 
        spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS |
                                      MLX5_MATCH_MISC_PARAMETERS;
+       if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DECAP)
+               spec->match_criteria_enable |= MLX5_MATCH_INNER_HEADERS;
 
-       rule = mlx5_add_flow_rule((struct mlx5_flow_table *)esw->fdb_table.fdb,
-                                 spec, action, 0, &dest);
+       if (attr->encap)
+               flow_act.encap_id = attr->encap->encap_id;
 
+       rule = mlx5_add_flow_rules((struct mlx5_flow_table *)esw->fdb_table.fdb,
+                                  spec, &flow_act, dest, i);
        if (IS_ERR(rule))
                mlx5_fc_destroy(esw->dev, counter);
 
@@ -269,11 +277,12 @@ out:
        return err;
 }
 
-static struct mlx5_flow_rule *
+static struct mlx5_flow_handle *
 mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn)
 {
+       struct mlx5_flow_act flow_act = {0};
        struct mlx5_flow_destination dest;
-       struct mlx5_flow_rule *flow_rule;
+       struct mlx5_flow_handle *flow_rule;
        struct mlx5_flow_spec *spec;
        void *misc;
 
@@ -295,10 +304,10 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn
        spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
        dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
        dest.vport_num = vport;
+       flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
 
-       flow_rule = mlx5_add_flow_rule(esw->fdb_table.offloads.fdb, spec,
-                                      MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
-                                      0, &dest);
+       flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.fdb, spec,
+                                       &flow_act, &dest, 1);
        if (IS_ERR(flow_rule))
                esw_warn(esw->dev, "FDB: Failed to add send to vport rule err %ld\n", PTR_ERR(flow_rule));
 out:
@@ -315,7 +324,7 @@ void mlx5_eswitch_sqs2vport_stop(struct mlx5_eswitch *esw,
                return;
 
        list_for_each_entry_safe(esw_sq, tmp, &rep->vport_sqs_list, list) {
-               mlx5_del_flow_rule(esw_sq->send_to_vport_rule);
+               mlx5_del_flow_rules(esw_sq->send_to_vport_rule);
                list_del(&esw_sq->list);
                kfree(esw_sq);
        }
@@ -325,7 +334,7 @@ int mlx5_eswitch_sqs2vport_start(struct mlx5_eswitch *esw,
                                 struct mlx5_eswitch_rep *rep,
                                 u16 *sqns_array, int sqns_num)
 {
-       struct mlx5_flow_rule *flow_rule;
+       struct mlx5_flow_handle *flow_rule;
        struct mlx5_esw_sq *esw_sq;
        int err;
        int i;
@@ -361,8 +370,9 @@ out_err:
 
 static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
 {
+       struct mlx5_flow_act flow_act = {0};
        struct mlx5_flow_destination dest;
-       struct mlx5_flow_rule *flow_rule = NULL;
+       struct mlx5_flow_handle *flow_rule = NULL;
        struct mlx5_flow_spec *spec;
        int err = 0;
 
@@ -375,10 +385,10 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
 
        dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
        dest.vport_num = 0;
+       flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
 
-       flow_rule = mlx5_add_flow_rule(esw->fdb_table.offloads.fdb, spec,
-                                      MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
-                                      0, &dest);
+       flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.fdb, spec,
+                                       &flow_act, &dest, 1);
        if (IS_ERR(flow_rule)) {
                err = PTR_ERR(flow_rule);
                esw_warn(esw->dev,  "FDB: Failed to add miss flow rule err %d\n", err);
@@ -405,6 +415,7 @@ static int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports)
        u32 *flow_group_in;
        void *match_criteria;
        int table_size, ix, err = 0;
+       u32 flags = 0;
 
        flow_group_in = mlx5_vzalloc(inlen);
        if (!flow_group_in)
@@ -419,9 +430,14 @@ static int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports)
        esw_debug(dev, "Create offloads FDB table, log_max_size(%d)\n",
                  MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size));
 
+       if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, encap) &&
+           MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap))
+               flags |= MLX5_FLOW_TABLE_TUNNEL_EN;
+
        fdb = mlx5_create_auto_grouped_flow_table(root_ns, FDB_FAST_PATH,
                                                  ESW_OFFLOADS_NUM_ENTRIES,
-                                                 ESW_OFFLOADS_NUM_GROUPS, 0);
+                                                 ESW_OFFLOADS_NUM_GROUPS, 0,
+                                                 flags);
        if (IS_ERR(fdb)) {
                err = PTR_ERR(fdb);
                esw_warn(dev, "Failed to create Fast path FDB Table err %d\n", err);
@@ -430,7 +446,7 @@ static int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports)
        esw->fdb_table.fdb = fdb;
 
        table_size = nvports + MAX_PF_SQ + 1;
-       fdb = mlx5_create_flow_table(root_ns, FDB_SLOW_PATH, table_size, 0);
+       fdb = mlx5_create_flow_table(root_ns, FDB_SLOW_PATH, table_size, 0, 0);
        if (IS_ERR(fdb)) {
                err = PTR_ERR(fdb);
                esw_warn(dev, "Failed to create slow path FDB Table err %d\n", err);
@@ -501,7 +517,7 @@ static void esw_destroy_offloads_fdb_table(struct mlx5_eswitch *esw)
                return;
 
        esw_debug(esw->dev, "Destroy offloads FDB Table\n");
-       mlx5_del_flow_rule(esw->fdb_table.offloads.miss_rule);
+       mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule);
        mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp);
        mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp);
 
@@ -522,7 +538,7 @@ static int esw_create_offloads_table(struct mlx5_eswitch *esw)
                return -ENOMEM;
        }
 
-       ft_offloads = mlx5_create_flow_table(ns, 0, dev->priv.sriov.num_vfs + 2, 0);
+       ft_offloads = mlx5_create_flow_table(ns, 0, dev->priv.sriov.num_vfs + 2, 0, 0);
        if (IS_ERR(ft_offloads)) {
                err = PTR_ERR(ft_offloads);
                esw_warn(esw->dev, "Failed to create offloads table, err %d\n", err);
@@ -585,11 +601,12 @@ static void esw_destroy_vport_rx_group(struct mlx5_eswitch *esw)
        mlx5_destroy_flow_group(esw->offloads.vport_rx_group);
 }
 
-struct mlx5_flow_rule *
+struct mlx5_flow_handle *
 mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn)
 {
+       struct mlx5_flow_act flow_act = {0};
        struct mlx5_flow_destination dest;
-       struct mlx5_flow_rule *flow_rule;
+       struct mlx5_flow_handle *flow_rule;
        struct mlx5_flow_spec *spec;
        void *misc;
 
@@ -610,9 +627,9 @@ mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn)
        dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
        dest.tir_num = tirn;
 
-       flow_rule = mlx5_add_flow_rule(esw->offloads.ft_offloads, spec,
-                                      MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
-                                      0, &dest);
+       flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+       flow_rule = mlx5_add_flow_rules(esw->offloads.ft_offloads, spec,
+                                      &flow_act, &dest, 1);
        if (IS_ERR(flow_rule)) {
                esw_warn(esw->dev, "fs offloads: Failed to add vport rx rule err %ld\n", PTR_ERR(flow_rule));
                goto out;
@@ -640,6 +657,14 @@ static int esw_offloads_start(struct mlx5_eswitch *esw)
                if (err1)
                        esw_warn(esw->dev, "Failed setting eswitch back to legacy, err %d\n", err);
        }
+       if (esw->offloads.inline_mode == MLX5_INLINE_MODE_NONE) {
+               if (mlx5_eswitch_inline_mode_get(esw,
+                                                num_vfs,
+                                                &esw->offloads.inline_mode)) {
+                       esw->offloads.inline_mode = MLX5_INLINE_MODE_L2;
+                       esw_warn(esw->dev, "Inline mode is different between vports\n");
+               }
+       }
        return err;
 }
 
@@ -754,6 +779,50 @@ static int esw_mode_to_devlink(u16 mlx5_mode, u16 *mode)
        return 0;
 }
 
+static int esw_inline_mode_from_devlink(u8 mode, u8 *mlx5_mode)
+{
+       switch (mode) {
+       case DEVLINK_ESWITCH_INLINE_MODE_NONE:
+               *mlx5_mode = MLX5_INLINE_MODE_NONE;
+               break;
+       case DEVLINK_ESWITCH_INLINE_MODE_LINK:
+               *mlx5_mode = MLX5_INLINE_MODE_L2;
+               break;
+       case DEVLINK_ESWITCH_INLINE_MODE_NETWORK:
+               *mlx5_mode = MLX5_INLINE_MODE_IP;
+               break;
+       case DEVLINK_ESWITCH_INLINE_MODE_TRANSPORT:
+               *mlx5_mode = MLX5_INLINE_MODE_TCP_UDP;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static int esw_inline_mode_to_devlink(u8 mlx5_mode, u8 *mode)
+{
+       switch (mlx5_mode) {
+       case MLX5_INLINE_MODE_NONE:
+               *mode = DEVLINK_ESWITCH_INLINE_MODE_NONE;
+               break;
+       case MLX5_INLINE_MODE_L2:
+               *mode = DEVLINK_ESWITCH_INLINE_MODE_LINK;
+               break;
+       case MLX5_INLINE_MODE_IP:
+               *mode = DEVLINK_ESWITCH_INLINE_MODE_NETWORK;
+               break;
+       case MLX5_INLINE_MODE_TCP_UDP:
+               *mode = DEVLINK_ESWITCH_INLINE_MODE_TRANSPORT;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
 int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode)
 {
        struct mlx5_core_dev *dev;
@@ -798,6 +867,95 @@ int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode)
        return esw_mode_to_devlink(dev->priv.eswitch->mode, mode);
 }
 
+int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode)
+{
+       struct mlx5_core_dev *dev = devlink_priv(devlink);
+       struct mlx5_eswitch *esw = dev->priv.eswitch;
+       int num_vports = esw->enabled_vports;
+       int err;
+       int vport;
+       u8 mlx5_mode;
+
+       if (!MLX5_CAP_GEN(dev, vport_group_manager))
+               return -EOPNOTSUPP;
+
+       if (esw->mode == SRIOV_NONE)
+               return -EOPNOTSUPP;
+
+       if (MLX5_CAP_ETH(dev, wqe_inline_mode) !=
+           MLX5_CAP_INLINE_MODE_VPORT_CONTEXT)
+               return -EOPNOTSUPP;
+
+       err = esw_inline_mode_from_devlink(mode, &mlx5_mode);
+       if (err)
+               goto out;
+
+       for (vport = 1; vport < num_vports; vport++) {
+               err = mlx5_modify_nic_vport_min_inline(dev, vport, mlx5_mode);
+               if (err) {
+                       esw_warn(dev, "Failed to set min inline on vport %d\n",
+                                vport);
+                       goto revert_inline_mode;
+               }
+       }
+
+       esw->offloads.inline_mode = mlx5_mode;
+       return 0;
+
+revert_inline_mode:
+       while (--vport > 0)
+               mlx5_modify_nic_vport_min_inline(dev,
+                                                vport,
+                                                esw->offloads.inline_mode);
+out:
+       return err;
+}
+
+int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode)
+{
+       struct mlx5_core_dev *dev = devlink_priv(devlink);
+       struct mlx5_eswitch *esw = dev->priv.eswitch;
+
+       if (!MLX5_CAP_GEN(dev, vport_group_manager))
+               return -EOPNOTSUPP;
+
+       if (esw->mode == SRIOV_NONE)
+               return -EOPNOTSUPP;
+
+       if (MLX5_CAP_ETH(dev, wqe_inline_mode) !=
+           MLX5_CAP_INLINE_MODE_VPORT_CONTEXT)
+               return -EOPNOTSUPP;
+
+       return esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode);
+}
+
+int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode)
+{
+       struct mlx5_core_dev *dev = esw->dev;
+       int vport;
+       u8 prev_mlx5_mode, mlx5_mode = MLX5_INLINE_MODE_L2;
+
+       if (!MLX5_CAP_GEN(dev, vport_group_manager))
+               return -EOPNOTSUPP;
+
+       if (esw->mode == SRIOV_NONE)
+               return -EOPNOTSUPP;
+
+       if (MLX5_CAP_ETH(dev, wqe_inline_mode) !=
+           MLX5_CAP_INLINE_MODE_VPORT_CONTEXT)
+               return -EOPNOTSUPP;
+
+       for (vport = 1; vport <= nvfs; vport++) {
+               mlx5_query_nic_vport_min_inline(dev, vport, &mlx5_mode);
+               if (vport > 1 && prev_mlx5_mode != mlx5_mode)
+                       return -EINVAL;
+               prev_mlx5_mode = mlx5_mode;
+       }
+
+       *mode = mlx5_mode;
+       return 0;
+}
+
 void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw,
                                     int vport_index,
                                     struct mlx5_eswitch_rep *__rep)
@@ -812,7 +970,7 @@ void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw,
        rep->load   = __rep->load;
        rep->unload = __rep->unload;
        rep->vport  = __rep->vport;
-       rep->priv_data = __rep->priv_data;
+       rep->netdev = __rep->netdev;
        ether_addr_copy(rep->hw_id, __rep->hw_id);
 
        INIT_LIST_HEAD(&rep->vport_sqs_list);
@@ -832,3 +990,13 @@ void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw,
 
        rep->valid = false;
 }
+
+struct net_device *mlx5_eswitch_get_uplink_netdev(struct mlx5_eswitch *esw)
+{
+#define UPLINK_REP_INDEX 0
+       struct mlx5_esw_offload *offloads = &esw->offloads;
+       struct mlx5_eswitch_rep *rep;
+
+       rep = &offloads->vport_reps[UPLINK_REP_INDEX];
+       return rep->netdev;
+}
index 113c32326333b07d85f2185097d54e02f4f4ef73..c4478ecd8056e42de2c359eb7a2abfd9e6400090 100644 (file)
@@ -37,6 +37,7 @@
 #include "fs_core.h"
 #include "fs_cmd.h"
 #include "mlx5_core.h"
+#include "eswitch.h"
 
 int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev,
                            struct mlx5_flow_table *ft)
@@ -61,8 +62,9 @@ int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev,
                               enum fs_flow_table_op_mod op_mod,
                               enum fs_flow_table_type type, unsigned int level,
                               unsigned int log_size, struct mlx5_flow_table
-                              *next_ft, unsigned int *table_id)
+                              *next_ft, unsigned int *table_id, u32 flags)
 {
+       int en_encap_decap = !!(flags & MLX5_FLOW_TABLE_TUNNEL_EN);
        u32 out[MLX5_ST_SZ_DW(create_flow_table_out)] = {0};
        u32 in[MLX5_ST_SZ_DW(create_flow_table_in)]   = {0};
        int err;
@@ -78,6 +80,9 @@ int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev,
                MLX5_SET(create_flow_table_in, in, other_vport, 1);
        }
 
+       MLX5_SET(create_flow_table_in, in, decap_en, en_encap_decap);
+       MLX5_SET(create_flow_table_in, in, encap_en, en_encap_decap);
+
        switch (op_mod) {
        case FS_FT_OP_MOD_NORMAL:
                if (next_ft) {
@@ -243,6 +248,7 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
        MLX5_SET(flow_context, in_flow_context, group_id, group_id);
        MLX5_SET(flow_context, in_flow_context, flow_tag, fte->flow_tag);
        MLX5_SET(flow_context, in_flow_context, action, fte->action);
+       MLX5_SET(flow_context, in_flow_context, encap_id, fte->encap_id);
        in_match_value = MLX5_ADDR_OF(flow_context, in_flow_context,
                                      match_value);
        memcpy(in_match_value, &fte->val, MLX5_ST_SZ_BYTES(fte_match_param));
@@ -453,27 +459,32 @@ void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev,
        *bytes = MLX5_GET64(traffic_counter, stats, octets);
 }
 
-#define MAX_ENCAP_SIZE (128)
-
-int mlx5_cmd_alloc_encap(struct mlx5_core_dev *dev,
-                        int header_type,
-                        size_t size,
-                        void *encap_header,
-                        u32 *encap_id)
+int mlx5_encap_alloc(struct mlx5_core_dev *dev,
+                    int header_type,
+                    size_t size,
+                    void *encap_header,
+                    u32 *encap_id)
 {
+       int max_encap_size = MLX5_CAP_ESW(dev, max_encap_header_size);
        u32 out[MLX5_ST_SZ_DW(alloc_encap_header_out)];
-       u32 in[MLX5_ST_SZ_DW(alloc_encap_header_in) +
-             (MAX_ENCAP_SIZE / sizeof(u32))];
-       void *encap_header_in = MLX5_ADDR_OF(alloc_encap_header_in, in,
-                                            encap_header);
-       void *header = MLX5_ADDR_OF(encap_header_in, encap_header_in,
-                                   encap_header);
-       int inlen = header - (void *)in  + size;
+       void *encap_header_in;
+       void *header;
+       int inlen;
        int err;
+       u32 *in;
 
-       if (size > MAX_ENCAP_SIZE)
+       if (size > MLX5_CAP_ESW(dev, max_encap_header_size))
                return -EINVAL;
 
+       in = kzalloc(MLX5_ST_SZ_BYTES(alloc_encap_header_in) + max_encap_size,
+                    GFP_KERNEL);
+       if (!in)
+               return -ENOMEM;
+
+       encap_header_in = MLX5_ADDR_OF(alloc_encap_header_in, in, encap_header);
+       header = MLX5_ADDR_OF(encap_header_in, encap_header_in, encap_header);
+       inlen = header - (void *)in  + size;
+
        memset(in, 0, inlen);
        MLX5_SET(alloc_encap_header_in, in, opcode,
                 MLX5_CMD_OP_ALLOC_ENCAP_HEADER);
@@ -485,10 +496,11 @@ int mlx5_cmd_alloc_encap(struct mlx5_core_dev *dev,
        err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
 
        *encap_id = MLX5_GET(alloc_encap_header_out, out, encap_id);
+       kfree(in);
        return err;
 }
 
-void mlx5_cmd_dealloc_encap(struct mlx5_core_dev *dev, u32 encap_id)
+void mlx5_encap_dealloc(struct mlx5_core_dev *dev, u32 encap_id)
 {
        u32 in[MLX5_ST_SZ_DW(dealloc_encap_header_in)];
        u32 out[MLX5_ST_SZ_DW(dealloc_encap_header_out)];
index c5bc4686c832414c9f8307ec0745df8de36d78fa..8fad806885362dce727791213422b7a9c0d5b897 100644 (file)
@@ -38,7 +38,7 @@ int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev,
                               enum fs_flow_table_op_mod op_mod,
                               enum fs_flow_table_type type, unsigned int level,
                               unsigned int log_size, struct mlx5_flow_table
-                              *next_ft, unsigned int *table_id);
+                              *next_ft, unsigned int *table_id, u32 flags);
 
 int mlx5_cmd_destroy_flow_table(struct mlx5_core_dev *dev,
                                struct mlx5_flow_table *ft);
@@ -89,11 +89,4 @@ void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev,
                          struct mlx5_cmd_fc_bulk *b, u16 id,
                          u64 *packets, u64 *bytes);
 
-int mlx5_cmd_alloc_encap(struct mlx5_core_dev *dev,
-                        int header_type,
-                        size_t size,
-                        void *encap_header,
-                        u32 *encap_id);
-void mlx5_cmd_dealloc_encap(struct mlx5_core_dev *dev, u32 encap_id);
-
 #endif
index 5da2cc878582438cef2caf89884d3065c23aae21..a263d8904a4cf84de718ad9dd4b2d8ddca9a5194 100644 (file)
@@ -153,6 +153,11 @@ static void del_rule(struct fs_node *node);
 static void del_flow_table(struct fs_node *node);
 static void del_flow_group(struct fs_node *node);
 static void del_fte(struct fs_node *node);
+static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1,
+                               struct mlx5_flow_destination *d2);
+static struct mlx5_flow_rule *
+find_flow_rule(struct fs_fte *fte,
+              struct mlx5_flow_destination *dest);
 
 static void tree_init_node(struct fs_node *node,
                           unsigned int refcount,
@@ -369,6 +374,7 @@ static void del_rule(struct fs_node *node)
        struct mlx5_core_dev *dev = get_dev(node);
        int match_len = MLX5_ST_SZ_BYTES(fte_match_param);
        int err;
+       bool update_fte = false;
 
        match_value = mlx5_vzalloc(match_len);
        if (!match_value) {
@@ -387,13 +393,23 @@ static void del_rule(struct fs_node *node)
                list_del(&rule->next_ft);
                mutex_unlock(&rule->dest_attr.ft->lock);
        }
+
+       if (rule->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER  &&
+           --fte->dests_size) {
+               modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION);
+               fte->action &= ~MLX5_FLOW_CONTEXT_ACTION_COUNT;
+               update_fte = true;
+               goto out;
+       }
+
        if ((fte->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) &&
            --fte->dests_size) {
                modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST),
-               err = mlx5_cmd_update_fte(dev, ft,
-                                         fg->id,
-                                         modify_mask,
-                                         fte);
+               update_fte = true;
+       }
+out:
+       if (update_fte && fte->dests_size) {
+               err = mlx5_cmd_update_fte(dev, ft, fg->id, modify_mask, fte);
                if (err)
                        mlx5_core_warn(dev,
                                       "%s can't del rule fg id=%d fte_index=%d\n",
@@ -436,13 +452,15 @@ static void del_flow_group(struct fs_node *node)
        fs_get_obj(ft, fg->node.parent);
        dev = get_dev(&ft->node);
 
+       if (ft->autogroup.active)
+               ft->autogroup.num_groups--;
+
        if (mlx5_cmd_destroy_flow_group(dev, ft, fg->id))
                mlx5_core_warn(dev, "flow steering can't destroy fg %d of ft %d\n",
                               fg->id, ft->id);
 }
 
-static struct fs_fte *alloc_fte(u8 action,
-                               u32 flow_tag,
+static struct fs_fte *alloc_fte(struct mlx5_flow_act *flow_act,
                                u32 *match_value,
                                unsigned int index)
 {
@@ -454,9 +472,10 @@ static struct fs_fte *alloc_fte(u8 action,
 
        memcpy(fte->val, match_value, sizeof(fte->val));
        fte->node.type =  FS_TYPE_FLOW_ENTRY;
-       fte->flow_tag = flow_tag;
+       fte->flow_tag = flow_act->flow_tag;
        fte->index = index;
-       fte->action = action;
+       fte->action = flow_act->action;
+       fte->encap_id = flow_act->encap_id;
 
        return fte;
 }
@@ -486,7 +505,8 @@ static struct mlx5_flow_group *alloc_flow_group(u32 *create_fg_in)
 
 static struct mlx5_flow_table *alloc_flow_table(int level, u16 vport, int max_fte,
                                                enum fs_flow_table_type table_type,
-                                               enum fs_flow_table_op_mod op_mod)
+                                               enum fs_flow_table_op_mod op_mod,
+                                               u32 flags)
 {
        struct mlx5_flow_table *ft;
 
@@ -500,6 +520,7 @@ static struct mlx5_flow_table *alloc_flow_table(int level, u16 vport, int max_ft
        ft->type = table_type;
        ft->vport = vport;
        ft->max_fte = max_fte;
+       ft->flags = flags;
        INIT_LIST_HEAD(&ft->fwd_rules);
        mutex_init(&ft->lock);
 
@@ -638,8 +659,8 @@ static int update_root_ft_create(struct mlx5_flow_table *ft, struct fs_prio
        return err;
 }
 
-int mlx5_modify_rule_destination(struct mlx5_flow_rule *rule,
-                                struct mlx5_flow_destination *dest)
+static int _mlx5_modify_rule_destination(struct mlx5_flow_rule *rule,
+                                        struct mlx5_flow_destination *dest)
 {
        struct mlx5_flow_table *ft;
        struct mlx5_flow_group *fg;
@@ -664,6 +685,28 @@ int mlx5_modify_rule_destination(struct mlx5_flow_rule *rule,
        return err;
 }
 
+int mlx5_modify_rule_destination(struct mlx5_flow_handle *handle,
+                                struct mlx5_flow_destination *new_dest,
+                                struct mlx5_flow_destination *old_dest)
+{
+       int i;
+
+       if (!old_dest) {
+               if (handle->num_rules != 1)
+                       return -EINVAL;
+               return _mlx5_modify_rule_destination(handle->rule[0],
+                                                    new_dest);
+       }
+
+       for (i = 0; i < handle->num_rules; i++) {
+               if (mlx5_flow_dests_cmp(new_dest, &handle->rule[i]->dest_attr))
+                       return _mlx5_modify_rule_destination(handle->rule[i],
+                                                            new_dest);
+       }
+
+       return -EINVAL;
+}
+
 /* Modify/set FWD rules that point on old_next_ft to point on new_next_ft  */
 static int connect_fwd_rules(struct mlx5_core_dev *dev,
                             struct mlx5_flow_table *new_next_ft,
@@ -686,7 +729,7 @@ static int connect_fwd_rules(struct mlx5_core_dev *dev,
        list_splice_init(&old_next_ft->fwd_rules, &new_next_ft->fwd_rules);
        mutex_unlock(&old_next_ft->lock);
        list_for_each_entry(iter, &new_next_ft->fwd_rules, next_ft) {
-               err = mlx5_modify_rule_destination(iter, &dest);
+               err = _mlx5_modify_rule_destination(iter, &dest);
                if (err)
                        pr_err("mlx5_core: failed to modify rule to point on flow table %d\n",
                               new_next_ft->id);
@@ -736,7 +779,8 @@ static void list_add_flow_table(struct mlx5_flow_table *ft,
 static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespace *ns,
                                                        enum fs_flow_table_op_mod op_mod,
                                                        u16 vport, int prio,
-                                                       int max_fte, u32 level)
+                                                       int max_fte, u32 level,
+                                                       u32 flags)
 {
        struct mlx5_flow_table *next_ft = NULL;
        struct mlx5_flow_table *ft;
@@ -769,7 +813,7 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa
                              vport,
                              max_fte ? roundup_pow_of_two(max_fte) : 0,
                              root->table_type,
-                             op_mod);
+                             op_mod, flags);
        if (!ft) {
                err = -ENOMEM;
                goto unlock_root;
@@ -779,7 +823,8 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa
        log_table_sz = ft->max_fte ? ilog2(ft->max_fte) : 0;
        next_ft = find_next_chained_ft(fs_prio);
        err = mlx5_cmd_create_flow_table(root->dev, ft->vport, ft->op_mod, ft->type,
-                                        ft->level, log_table_sz, next_ft, &ft->id);
+                                        ft->level, log_table_sz, next_ft, &ft->id,
+                                        ft->flags);
        if (err)
                goto free_ft;
 
@@ -804,10 +849,11 @@ unlock_root:
 
 struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns,
                                               int prio, int max_fte,
-                                              u32 level)
+                                              u32 level,
+                                              u32 flags)
 {
        return __mlx5_create_flow_table(ns, FS_FT_OP_MOD_NORMAL, 0, prio,
-                                       max_fte, level);
+                                       max_fte, level, flags);
 }
 
 struct mlx5_flow_table *mlx5_create_vport_flow_table(struct mlx5_flow_namespace *ns,
@@ -815,7 +861,7 @@ struct mlx5_flow_table *mlx5_create_vport_flow_table(struct mlx5_flow_namespace
                                                     u32 level, u16 vport)
 {
        return __mlx5_create_flow_table(ns, FS_FT_OP_MOD_NORMAL, vport, prio,
-                                       max_fte, level);
+                                       max_fte, level, 0);
 }
 
 struct mlx5_flow_table *mlx5_create_lag_demux_flow_table(
@@ -823,7 +869,7 @@ struct mlx5_flow_table *mlx5_create_lag_demux_flow_table(
                                               int prio, u32 level)
 {
        return __mlx5_create_flow_table(ns, FS_FT_OP_MOD_LAG_DEMUX, 0, prio, 0,
-                                       level);
+                                       level, 0);
 }
 EXPORT_SYMBOL(mlx5_create_lag_demux_flow_table);
 
@@ -831,14 +877,15 @@ struct mlx5_flow_table *mlx5_create_auto_grouped_flow_table(struct mlx5_flow_nam
                                                            int prio,
                                                            int num_flow_table_entries,
                                                            int max_num_groups,
-                                                           u32 level)
+                                                           u32 level,
+                                                           u32 flags)
 {
        struct mlx5_flow_table *ft;
 
        if (max_num_groups > num_flow_table_entries)
                return ERR_PTR(-EINVAL);
 
-       ft = mlx5_create_flow_table(ns, prio, num_flow_table_entries, level);
+       ft = mlx5_create_flow_table(ns, prio, num_flow_table_entries, level, flags);
        if (IS_ERR(ft))
                return ft;
 
@@ -879,7 +926,7 @@ static struct mlx5_flow_group *create_flow_group_common(struct mlx5_flow_table *
        tree_init_node(&fg->node, !is_auto_fg, del_flow_group);
        tree_add_node(&fg->node, &ft->node);
        /* Add node to group list */
-       list_add(&fg->node.list, ft->node.children.prev);
+       list_add(&fg->node.list, prev_fg);
 
        return fg;
 }
@@ -893,7 +940,7 @@ struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft,
                return ERR_PTR(-EPERM);
 
        lock_ref_node(&ft->node);
-       fg = create_flow_group_common(ft, fg_in, &ft->node.children, false);
+       fg = create_flow_group_common(ft, fg_in, ft->node.children.prev, false);
        unlock_ref_node(&ft->node);
 
        return fg;
@@ -915,55 +962,133 @@ static struct mlx5_flow_rule *alloc_rule(struct mlx5_flow_destination *dest)
        return rule;
 }
 
-/* fte should not be deleted while calling this function */
-static struct mlx5_flow_rule *add_rule_fte(struct fs_fte *fte,
-                                          struct mlx5_flow_group *fg,
-                                          struct mlx5_flow_destination *dest)
+static struct mlx5_flow_handle *alloc_handle(int num_rules)
 {
+       struct mlx5_flow_handle *handle;
+
+       handle = kzalloc(sizeof(*handle) + sizeof(handle->rule[0]) *
+                         num_rules, GFP_KERNEL);
+       if (!handle)
+               return NULL;
+
+       handle->num_rules = num_rules;
+
+       return handle;
+}
+
+static void destroy_flow_handle(struct fs_fte *fte,
+                               struct mlx5_flow_handle *handle,
+                               struct mlx5_flow_destination *dest,
+                               int i)
+{
+       for (; --i >= 0;) {
+               if (atomic_dec_and_test(&handle->rule[i]->node.refcount)) {
+                       fte->dests_size--;
+                       list_del(&handle->rule[i]->node.list);
+                       kfree(handle->rule[i]);
+               }
+       }
+       kfree(handle);
+}
+
+static struct mlx5_flow_handle *
+create_flow_handle(struct fs_fte *fte,
+                  struct mlx5_flow_destination *dest,
+                  int dest_num,
+                  int *modify_mask,
+                  bool *new_rule)
+{
+       struct mlx5_flow_handle *handle;
+       struct mlx5_flow_rule *rule = NULL;
+       static int count = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_FLOW_COUNTERS);
+       static int dst = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST);
+       int type;
+       int i = 0;
+
+       handle = alloc_handle((dest_num) ? dest_num : 1);
+       if (!handle)
+               return ERR_PTR(-ENOMEM);
+
+       do {
+               if (dest) {
+                       rule = find_flow_rule(fte, dest + i);
+                       if (rule) {
+                               atomic_inc(&rule->node.refcount);
+                               goto rule_found;
+                       }
+               }
+
+               *new_rule = true;
+               rule = alloc_rule(dest + i);
+               if (!rule)
+                       goto free_rules;
+
+               /* Add dest to dests list- we need flow tables to be in the
+                * end of the list for forward to next prio rules.
+                */
+               tree_init_node(&rule->node, 1, del_rule);
+               if (dest &&
+                   dest[i].type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE)
+                       list_add(&rule->node.list, &fte->node.children);
+               else
+                       list_add_tail(&rule->node.list, &fte->node.children);
+               if (dest) {
+                       fte->dests_size++;
+
+                       type = dest[i].type ==
+                               MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+                       *modify_mask |= type ? count : dst;
+               }
+rule_found:
+               handle->rule[i] = rule;
+       } while (++i < dest_num);
+
+       return handle;
+
+free_rules:
+       destroy_flow_handle(fte, handle, dest, i);
+       return ERR_PTR(-ENOMEM);
+}
+
+/* fte should not be deleted while calling this function */
+static struct mlx5_flow_handle *
+add_rule_fte(struct fs_fte *fte,
+            struct mlx5_flow_group *fg,
+            struct mlx5_flow_destination *dest,
+            int dest_num,
+            bool update_action)
+{
+       struct mlx5_flow_handle *handle;
        struct mlx5_flow_table *ft;
-       struct mlx5_flow_rule *rule;
        int modify_mask = 0;
        int err;
+       bool new_rule = false;
 
-       rule = alloc_rule(dest);
-       if (!rule)
-               return ERR_PTR(-ENOMEM);
-
-       fs_get_obj(ft, fg->node.parent);
-       /* Add dest to dests list- we need flow tables to be in the
-        * end of the list for forward to next prio rules.
-        */
-       tree_init_node(&rule->node, 1, del_rule);
-       if (dest && dest->type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE)
-               list_add(&rule->node.list, &fte->node.children);
-       else
-               list_add_tail(&rule->node.list, &fte->node.children);
-       if (dest) {
-               fte->dests_size++;
+       handle = create_flow_handle(fte, dest, dest_num, &modify_mask,
+                                   &new_rule);
+       if (IS_ERR(handle) || !new_rule)
+               goto out;
 
-               modify_mask |= dest->type == MLX5_FLOW_DESTINATION_TYPE_COUNTER ?
-                       BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_FLOW_COUNTERS) :
-                       BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST);
-       }
+       if (update_action)
+               modify_mask |= BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION);
 
-       if (fte->dests_size == 1 || !dest)
+       fs_get_obj(ft, fg->node.parent);
+       if (!(fte->status & FS_FTE_STATUS_EXISTING))
                err = mlx5_cmd_create_fte(get_dev(&ft->node),
                                          ft, fg->id, fte);
        else
                err = mlx5_cmd_update_fte(get_dev(&ft->node),
                                          ft, fg->id, modify_mask, fte);
        if (err)
-               goto free_rule;
+               goto free_handle;
 
        fte->status |= FS_FTE_STATUS_EXISTING;
 
-       return rule;
+out:
+       return handle;
 
-free_rule:
-       list_del(&rule->node.list);
-       kfree(rule);
-       if (dest)
-               fte->dests_size--;
+free_handle:
+       destroy_flow_handle(fte, handle, dest, handle->num_rules);
        return ERR_PTR(err);
 }
 
@@ -992,15 +1117,14 @@ static unsigned int get_free_fte_index(struct mlx5_flow_group *fg,
 /* prev is output, prev->next = new_fte */
 static struct fs_fte *create_fte(struct mlx5_flow_group *fg,
                                 u32 *match_value,
-                                u8 action,
-                                u32 flow_tag,
+                                struct mlx5_flow_act *flow_act,
                                 struct list_head **prev)
 {
        struct fs_fte *fte;
        int index;
 
        index = get_free_fte_index(fg, prev);
-       fte = alloc_fte(action, flow_tag, match_value, index);
+       fte = alloc_fte(flow_act, match_value, index);
        if (IS_ERR(fte))
                return fte;
 
@@ -1012,7 +1136,7 @@ static struct mlx5_flow_group *create_autogroup(struct mlx5_flow_table *ft,
                                                u32 *match_criteria)
 {
        int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
-       struct list_head *prev = &ft->node.children;
+       struct list_head *prev = ft->node.children.prev;
        unsigned int candidate_index = 0;
        struct mlx5_flow_group *fg;
        void *match_criteria_addr;
@@ -1064,71 +1188,81 @@ out:
        return fg;
 }
 
+static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1,
+                               struct mlx5_flow_destination *d2)
+{
+       if (d1->type == d2->type) {
+               if ((d1->type == MLX5_FLOW_DESTINATION_TYPE_VPORT &&
+                    d1->vport_num == d2->vport_num) ||
+                   (d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE &&
+                    d1->ft == d2->ft) ||
+                   (d1->type == MLX5_FLOW_DESTINATION_TYPE_TIR &&
+                    d1->tir_num == d2->tir_num))
+                       return true;
+       }
+
+       return false;
+}
+
 static struct mlx5_flow_rule *find_flow_rule(struct fs_fte *fte,
                                             struct mlx5_flow_destination *dest)
 {
        struct mlx5_flow_rule *rule;
 
        list_for_each_entry(rule, &fte->node.children, node.list) {
-               if (rule->dest_attr.type == dest->type) {
-                       if ((dest->type == MLX5_FLOW_DESTINATION_TYPE_VPORT &&
-                            dest->vport_num == rule->dest_attr.vport_num) ||
-                           (dest->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE &&
-                            dest->ft == rule->dest_attr.ft) ||
-                           (dest->type == MLX5_FLOW_DESTINATION_TYPE_TIR &&
-                            dest->tir_num == rule->dest_attr.tir_num))
-                               return rule;
-               }
+               if (mlx5_flow_dests_cmp(&rule->dest_attr, dest))
+                       return rule;
        }
        return NULL;
 }
 
-static struct mlx5_flow_rule *add_rule_fg(struct mlx5_flow_group *fg,
-                                         u32 *match_value,
-                                         u8 action,
-                                         u32 flow_tag,
-                                         struct mlx5_flow_destination *dest)
+static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg,
+                                           u32 *match_value,
+                                           struct mlx5_flow_act *flow_act,
+                                           struct mlx5_flow_destination *dest,
+                                           int dest_num)
 {
-       struct fs_fte *fte;
-       struct mlx5_flow_rule *rule;
+       struct mlx5_flow_handle *handle;
        struct mlx5_flow_table *ft;
        struct list_head *prev;
+       struct fs_fte *fte;
+       int i;
 
        nested_lock_ref_node(&fg->node, FS_MUTEX_PARENT);
        fs_for_each_fte(fte, fg) {
                nested_lock_ref_node(&fte->node, FS_MUTEX_CHILD);
                if (compare_match_value(&fg->mask, match_value, &fte->val) &&
-                   action == fte->action && flow_tag == fte->flow_tag) {
-                       rule = find_flow_rule(fte, dest);
-                       if (rule) {
-                               atomic_inc(&rule->node.refcount);
-                               unlock_ref_node(&fte->node);
-                               unlock_ref_node(&fg->node);
-                               return rule;
+                   (flow_act->action & fte->action) &&
+                   flow_act->flow_tag == fte->flow_tag) {
+                       int old_action = fte->action;
+
+                       fte->action |= flow_act->action;
+                       handle = add_rule_fte(fte, fg, dest, dest_num,
+                                             old_action != flow_act->action);
+                       if (IS_ERR(handle)) {
+                               fte->action = old_action;
+                               goto unlock_fte;
+                       } else {
+                               goto add_rules;
                        }
-                       rule = add_rule_fte(fte, fg, dest);
-                       unlock_ref_node(&fte->node);
-                       if (IS_ERR(rule))
-                               goto unlock_fg;
-                       else
-                               goto add_rule;
                }
                unlock_ref_node(&fte->node);
        }
        fs_get_obj(ft, fg->node.parent);
        if (fg->num_ftes >= fg->max_ftes) {
-               rule = ERR_PTR(-ENOSPC);
+               handle = ERR_PTR(-ENOSPC);
                goto unlock_fg;
        }
 
-       fte = create_fte(fg, match_value, action, flow_tag, &prev);
+       fte = create_fte(fg, match_value, flow_act, &prev);
        if (IS_ERR(fte)) {
-               rule = (void *)fte;
+               handle = (void *)fte;
                goto unlock_fg;
        }
        tree_init_node(&fte->node, 0, del_fte);
-       rule = add_rule_fte(fte, fg, dest);
-       if (IS_ERR(rule)) {
+       nested_lock_ref_node(&fte->node, FS_MUTEX_CHILD);
+       handle = add_rule_fte(fte, fg, dest, dest_num, false);
+       if (IS_ERR(handle)) {
                kfree(fte);
                goto unlock_fg;
        }
@@ -1137,19 +1271,24 @@ static struct mlx5_flow_rule *add_rule_fg(struct mlx5_flow_group *fg,
 
        tree_add_node(&fte->node, &fg->node);
        list_add(&fte->node.list, prev);
-add_rule:
-       tree_add_node(&rule->node, &fte->node);
+add_rules:
+       for (i = 0; i < handle->num_rules; i++) {
+               if (atomic_read(&handle->rule[i]->node.refcount) == 1)
+                       tree_add_node(&handle->rule[i]->node, &fte->node);
+       }
+unlock_fte:
+       unlock_ref_node(&fte->node);
 unlock_fg:
        unlock_ref_node(&fg->node);
-       return rule;
+       return handle;
 }
 
-struct mlx5_fc *mlx5_flow_rule_counter(struct mlx5_flow_rule *rule)
+struct mlx5_fc *mlx5_flow_rule_counter(struct mlx5_flow_handle *handle)
 {
        struct mlx5_flow_rule *dst;
        struct fs_fte *fte;
 
-       fs_get_obj(fte, rule->node.parent);
+       fs_get_obj(fte, handle->rule[0]->node.parent);
 
        fs_for_each_dst(dst, fte) {
                if (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER)
@@ -1167,8 +1306,8 @@ static bool counter_is_valid(struct mlx5_fc *counter, u32 action)
        if (!counter)
                return false;
 
-       /* Hardware support counter for a drop action only */
-       return action == (MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT);
+       return (action & (MLX5_FLOW_CONTEXT_ACTION_DROP |
+                         MLX5_FLOW_CONTEXT_ACTION_FWD_DEST));
 }
 
 static bool dest_is_valid(struct mlx5_flow_destination *dest,
@@ -1188,18 +1327,22 @@ static bool dest_is_valid(struct mlx5_flow_destination *dest,
        return true;
 }
 
-static struct mlx5_flow_rule *
-_mlx5_add_flow_rule(struct mlx5_flow_table *ft,
-                  struct mlx5_flow_spec *spec,
-                   u32 action,
-                   u32 flow_tag,
-                   struct mlx5_flow_destination *dest)
+static struct mlx5_flow_handle *
+_mlx5_add_flow_rules(struct mlx5_flow_table *ft,
+                    struct mlx5_flow_spec *spec,
+                    struct mlx5_flow_act *flow_act,
+                    struct mlx5_flow_destination *dest,
+                    int dest_num)
+
 {
        struct mlx5_flow_group *g;
-       struct mlx5_flow_rule *rule;
+       struct mlx5_flow_handle *rule;
+       int i;
 
-       if (!dest_is_valid(dest, action, ft))
-               return ERR_PTR(-EINVAL);
+       for (i = 0; i < dest_num; i++) {
+               if (!dest_is_valid(&dest[i], flow_act->action, ft))
+                       return ERR_PTR(-EINVAL);
+       }
 
        nested_lock_ref_node(&ft->node, FS_MUTEX_GRANDPARENT);
        fs_for_each_fg(g, ft)
@@ -1208,7 +1351,7 @@ _mlx5_add_flow_rule(struct mlx5_flow_table *ft,
                                           g->mask.match_criteria,
                                           spec->match_criteria)) {
                        rule = add_rule_fg(g, spec->match_value,
-                                          action, flow_tag, dest);
+                                          flow_act, dest, dest_num);
                        if (!IS_ERR(rule) || PTR_ERR(rule) != -ENOSPC)
                                goto unlock;
                }
@@ -1220,8 +1363,7 @@ _mlx5_add_flow_rule(struct mlx5_flow_table *ft,
                goto unlock;
        }
 
-       rule = add_rule_fg(g, spec->match_value,
-                          action, flow_tag, dest);
+       rule = add_rule_fg(g, spec->match_value, flow_act, dest, dest_num);
        if (IS_ERR(rule)) {
                /* Remove assumes refcount > 0 and autogroup creates a group
                 * with a refcount = 0.
@@ -1242,22 +1384,22 @@ static bool fwd_next_prio_supported(struct mlx5_flow_table *ft)
                (MLX5_CAP_FLOWTABLE(get_dev(&ft->node), nic_rx_multi_path_tirs)));
 }
 
-struct mlx5_flow_rule *
-mlx5_add_flow_rule(struct mlx5_flow_table *ft,
-                  struct mlx5_flow_spec *spec,
-                  u32 action,
-                  u32 flow_tag,
-                  struct mlx5_flow_destination *dest)
+struct mlx5_flow_handle *
+mlx5_add_flow_rules(struct mlx5_flow_table *ft,
+                   struct mlx5_flow_spec *spec,
+                   struct mlx5_flow_act *flow_act,
+                   struct mlx5_flow_destination *dest,
+                   int dest_num)
 {
        struct mlx5_flow_root_namespace *root = find_root(&ft->node);
        struct mlx5_flow_destination gen_dest;
        struct mlx5_flow_table *next_ft = NULL;
-       struct mlx5_flow_rule *rule = NULL;
-       u32 sw_action = action;
+       struct mlx5_flow_handle *handle = NULL;
+       u32 sw_action = flow_act->action;
        struct fs_prio *prio;
 
        fs_get_obj(prio, ft->node.parent);
-       if (action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) {
+       if (flow_act->action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) {
                if (!fwd_next_prio_supported(ft))
                        return ERR_PTR(-EOPNOTSUPP);
                if (dest)
@@ -1268,34 +1410,40 @@ mlx5_add_flow_rule(struct mlx5_flow_table *ft,
                        gen_dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
                        gen_dest.ft = next_ft;
                        dest = &gen_dest;
-                       action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+                       dest_num = 1;
+                       flow_act->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
                } else {
                        mutex_unlock(&root->chain_lock);
                        return ERR_PTR(-EOPNOTSUPP);
                }
        }
 
-       rule = _mlx5_add_flow_rule(ft, spec, action, flow_tag, dest);
+       handle = _mlx5_add_flow_rules(ft, spec, flow_act, dest, dest_num);
 
        if (sw_action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) {
-               if (!IS_ERR_OR_NULL(rule) &&
-                   (list_empty(&rule->next_ft))) {
+               if (!IS_ERR_OR_NULL(handle) &&
+                   (list_empty(&handle->rule[0]->next_ft))) {
                        mutex_lock(&next_ft->lock);
-                       list_add(&rule->next_ft, &next_ft->fwd_rules);
+                       list_add(&handle->rule[0]->next_ft,
+                                &next_ft->fwd_rules);
                        mutex_unlock(&next_ft->lock);
-                       rule->sw_action = MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
+                       handle->rule[0]->sw_action = MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
                }
                mutex_unlock(&root->chain_lock);
        }
-       return rule;
+       return handle;
 }
-EXPORT_SYMBOL(mlx5_add_flow_rule);
+EXPORT_SYMBOL(mlx5_add_flow_rules);
 
-void mlx5_del_flow_rule(struct mlx5_flow_rule *rule)
+void mlx5_del_flow_rules(struct mlx5_flow_handle *handle)
 {
-       tree_remove_node(&rule->node);
+       int i;
+
+       for (i = handle->num_rules - 1; i >= 0; i--)
+               tree_remove_node(&handle->rule[i]->node);
+       kfree(handle);
 }
-EXPORT_SYMBOL(mlx5_del_flow_rule);
+EXPORT_SYMBOL(mlx5_del_flow_rules);
 
 /* Assuming prio->node.children(flow tables) is sorted by level */
 static struct mlx5_flow_table *find_next_ft(struct mlx5_flow_table *ft)
@@ -1675,7 +1823,7 @@ static int create_anchor_flow_table(struct mlx5_flow_steering *steering)
        ns = mlx5_get_flow_namespace(steering->dev, MLX5_FLOW_NAMESPACE_ANCHOR);
        if (!ns)
                return -EINVAL;
-       ft = mlx5_create_flow_table(ns, ANCHOR_PRIO, ANCHOR_SIZE, ANCHOR_LEVEL);
+       ft = mlx5_create_flow_table(ns, ANCHOR_PRIO, ANCHOR_SIZE, ANCHOR_LEVEL, 0);
        if (IS_ERR(ft)) {
                mlx5_core_err(steering->dev, "Failed to create last anchor flow table");
                return PTR_ERR(ft);
@@ -1687,7 +1835,7 @@ static int init_root_ns(struct mlx5_flow_steering *steering)
 {
 
        steering->root_ns = create_root_ns(steering, FS_FT_NIC_RX);
-       if (IS_ERR_OR_NULL(steering->root_ns))
+       if (!steering->root_ns)
                goto cleanup;
 
        if (init_root_tree(steering, &root_fs, &steering->root_ns->ns.node))
index 71ff03bceabb81b3b159f4e6b5f90b7a6df7ad9e..8e668c63f69ec4afefb197f1f4c0a32ca3760179 100644 (file)
@@ -94,6 +94,11 @@ struct mlx5_flow_rule {
        u32                                     sw_action;
 };
 
+struct mlx5_flow_handle {
+       int num_rules;
+       struct mlx5_flow_rule *rule[];
+};
+
 /* Type of children is mlx5_flow_group */
 struct mlx5_flow_table {
        struct fs_node                  node;
@@ -112,6 +117,7 @@ struct mlx5_flow_table {
        struct mutex                    lock;
        /* FWD rules that point on this flow table */
        struct list_head                fwd_rules;
+       u32                             flags;
 };
 
 struct mlx5_fc_cache {
@@ -145,6 +151,7 @@ struct fs_fte {
        u32                             flow_tag;
        u32                             index;
        u32                             action;
+       u32                             encap_id;
        enum fs_fte_status              status;
        struct mlx5_fc                  *counter;
 };
index 3a9195b4169dc0b1cbbb60b31ac3d2a2ddadcecb..3b026c151cf24f370137b4655b417d6e024d6dec 100644 (file)
@@ -218,6 +218,7 @@ struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging)
                goto err_out;
 
        if (aging) {
+               counter->cache.lastuse = jiffies;
                counter->aging = true;
 
                spin_lock(&fc_stats->addlist_lock);
index 1a05fb965c8dd5051cc929f1fb4fbaaf4c4584e4..5bcf93422ee0b28337040138d026c2ab443642a9 100644 (file)
@@ -61,10 +61,15 @@ enum {
 enum {
        MLX5_NIC_IFC_FULL               = 0,
        MLX5_NIC_IFC_DISABLED           = 1,
-       MLX5_NIC_IFC_NO_DRAM_NIC        = 2
+       MLX5_NIC_IFC_NO_DRAM_NIC        = 2,
+       MLX5_NIC_IFC_INVALID            = 3
 };
 
-static u8 get_nic_interface(struct mlx5_core_dev *dev)
+enum {
+       MLX5_DROP_NEW_HEALTH_WORK,
+};
+
+static u8 get_nic_state(struct mlx5_core_dev *dev)
 {
        return (ioread32be(&dev->iseg->cmdq_addr_l_sz) >> 8) & 3;
 }
@@ -97,7 +102,7 @@ static int in_fatal(struct mlx5_core_dev *dev)
        struct mlx5_core_health *health = &dev->priv.health;
        struct health_buffer __iomem *h = health->health;
 
-       if (get_nic_interface(dev) == MLX5_NIC_IFC_DISABLED)
+       if (get_nic_state(dev) == MLX5_NIC_IFC_DISABLED)
                return 1;
 
        if (ioread32be(&h->fw_ver) == 0xffffffff)
@@ -127,7 +132,7 @@ unlock:
 
 static void mlx5_handle_bad_state(struct mlx5_core_dev *dev)
 {
-       u8 nic_interface = get_nic_interface(dev);
+       u8 nic_interface = get_nic_state(dev);
 
        switch (nic_interface) {
        case MLX5_NIC_IFC_FULL:
@@ -149,8 +154,34 @@ static void mlx5_handle_bad_state(struct mlx5_core_dev *dev)
        mlx5_disable_device(dev);
 }
 
+static void health_recover(struct work_struct *work)
+{
+       struct mlx5_core_health *health;
+       struct delayed_work *dwork;
+       struct mlx5_core_dev *dev;
+       struct mlx5_priv *priv;
+       u8 nic_state;
+
+       dwork = container_of(work, struct delayed_work, work);
+       health = container_of(dwork, struct mlx5_core_health, recover_work);
+       priv = container_of(health, struct mlx5_priv, health);
+       dev = container_of(priv, struct mlx5_core_dev, priv);
+
+       nic_state = get_nic_state(dev);
+       if (nic_state == MLX5_NIC_IFC_INVALID) {
+               dev_err(&dev->pdev->dev, "health recovery flow aborted since the nic state is invalid\n");
+               return;
+       }
+
+       dev_err(&dev->pdev->dev, "starting health recovery flow\n");
+       mlx5_recover_device(dev);
+}
+
+/* How much time to wait until health resetting the driver (in msecs) */
+#define MLX5_RECOVERY_DELAY_MSECS 60000
 static void health_care(struct work_struct *work)
 {
+       unsigned long recover_delay = msecs_to_jiffies(MLX5_RECOVERY_DELAY_MSECS);
        struct mlx5_core_health *health;
        struct mlx5_core_dev *dev;
        struct mlx5_priv *priv;
@@ -160,6 +191,14 @@ static void health_care(struct work_struct *work)
        dev = container_of(priv, struct mlx5_core_dev, priv);
        mlx5_core_warn(dev, "handling bad device here\n");
        mlx5_handle_bad_state(dev);
+
+       spin_lock(&health->wq_lock);
+       if (!test_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags))
+               schedule_delayed_work(&health->recover_work, recover_delay);
+       else
+               dev_err(&dev->pdev->dev,
+                       "new health works are not permitted at this stage\n");
+       spin_unlock(&health->wq_lock);
 }
 
 static const char *hsynd_str(u8 synd)
@@ -272,7 +311,13 @@ static void poll_health(unsigned long data)
        if (in_fatal(dev) && !health->sick) {
                health->sick = true;
                print_health_info(dev);
-               schedule_work(&health->work);
+               spin_lock(&health->wq_lock);
+               if (!test_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags))
+                       queue_work(health->wq, &health->work);
+               else
+                       dev_err(&dev->pdev->dev,
+                               "new health works are not permitted at this stage\n");
+               spin_unlock(&health->wq_lock);
        }
 }
 
@@ -281,6 +326,8 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev)
        struct mlx5_core_health *health = &dev->priv.health;
 
        init_timer(&health->timer);
+       health->sick = 0;
+       clear_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags);
        health->health = &dev->iseg->health;
        health->health_counter = &dev->iseg->health_counter;
 
@@ -297,11 +344,22 @@ void mlx5_stop_health_poll(struct mlx5_core_dev *dev)
        del_timer_sync(&health->timer);
 }
 
+void mlx5_drain_health_wq(struct mlx5_core_dev *dev)
+{
+       struct mlx5_core_health *health = &dev->priv.health;
+
+       spin_lock(&health->wq_lock);
+       set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags);
+       spin_unlock(&health->wq_lock);
+       cancel_delayed_work_sync(&health->recover_work);
+       cancel_work_sync(&health->work);
+}
+
 void mlx5_health_cleanup(struct mlx5_core_dev *dev)
 {
        struct mlx5_core_health *health = &dev->priv.health;
 
-       flush_work(&health->work);
+       destroy_workqueue(health->wq);
 }
 
 int mlx5_health_init(struct mlx5_core_dev *dev)
@@ -316,9 +374,13 @@ int mlx5_health_init(struct mlx5_core_dev *dev)
 
        strcpy(name, "mlx5_health");
        strcat(name, dev_name(&dev->pdev->dev));
+       health->wq = create_singlethread_workqueue(name);
        kfree(name);
-
+       if (!health->wq)
+               return -ENOMEM;
+       spin_lock_init(&health->wq_lock);
        INIT_WORK(&health->work, health_care);
+       INIT_DELAYED_WORK(&health->recover_work, health_recover);
 
        return 0;
 }
index d9c3c70b29e4799f87a0daed286c272be6180d01..2dc28695196c6cbea077b2448162ce0832f7944c 100644 (file)
@@ -46,7 +46,6 @@
 #include <linux/mlx5/srq.h>
 #include <linux/debugfs.h>
 #include <linux/kmod.h>
-#include <linux/delay.h>
 #include <linux/mlx5/mlx5_ifc.h>
 #ifdef CONFIG_RFS_ACCEL
 #include <linux/cpu_rmap.h>
@@ -175,6 +174,41 @@ static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili)
        return err;
 }
 
+static void mlx5_set_driver_version(struct mlx5_core_dev *dev)
+{
+       int driver_ver_sz = MLX5_FLD_SZ_BYTES(set_driver_version_in,
+                                             driver_version);
+       u8 in[MLX5_ST_SZ_BYTES(set_driver_version_in)] = {0};
+       u8 out[MLX5_ST_SZ_BYTES(set_driver_version_out)] = {0};
+       int remaining_size = driver_ver_sz;
+       char *string;
+
+       if (!MLX5_CAP_GEN(dev, driver_version))
+               return;
+
+       string = MLX5_ADDR_OF(set_driver_version_in, in, driver_version);
+
+       strncpy(string, "Linux", remaining_size);
+
+       remaining_size = max_t(int, 0, driver_ver_sz - strlen(string));
+       strncat(string, ",", remaining_size);
+
+       remaining_size = max_t(int, 0, driver_ver_sz - strlen(string));
+       strncat(string, DRIVER_NAME, remaining_size);
+
+       remaining_size = max_t(int, 0, driver_ver_sz - strlen(string));
+       strncat(string, ",", remaining_size);
+
+       remaining_size = max_t(int, 0, driver_ver_sz - strlen(string));
+       strncat(string, DRIVER_VERSION, remaining_size);
+
+       /*Send the command*/
+       MLX5_SET(set_driver_version_in, in, opcode,
+                MLX5_CMD_OP_SET_DRIVER_VERSION);
+
+       mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
 static int set_dma_caps(struct pci_dev *pdev)
 {
        int err;
@@ -844,12 +878,6 @@ static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
        struct pci_dev *pdev = dev->pdev;
        int err;
 
-       err = mlx5_query_hca_caps(dev);
-       if (err) {
-               dev_err(&pdev->dev, "query hca failed\n");
-               goto out;
-       }
-
        err = mlx5_query_board_id(dev);
        if (err) {
                dev_err(&pdev->dev, "query board id failed\n");
@@ -1021,8 +1049,16 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
                goto err_pagealloc_stop;
        }
 
+       mlx5_set_driver_version(dev);
+
        mlx5_start_health_poll(dev);
 
+       err = mlx5_query_hca_caps(dev);
+       if (err) {
+               dev_err(&pdev->dev, "query hca failed\n");
+               goto err_stop_poll;
+       }
+
        if (boot && mlx5_init_once(dev, priv)) {
                dev_err(&pdev->dev, "sw objs init failed\n");
                goto err_stop_poll;
@@ -1202,6 +1238,8 @@ static const struct devlink_ops mlx5_devlink_ops = {
 #ifdef CONFIG_MLX5_CORE_EN
        .eswitch_mode_set = mlx5_devlink_eswitch_mode_set,
        .eswitch_mode_get = mlx5_devlink_eswitch_mode_get,
+       .eswitch_inline_mode_set = mlx5_devlink_eswitch_inline_mode_set,
+       .eswitch_inline_mode_get = mlx5_devlink_eswitch_inline_mode_get,
 #endif
 };
 
@@ -1226,6 +1264,9 @@ static int init_one(struct pci_dev *pdev,
 
        pci_set_drvdata(pdev, dev);
 
+       dev->pdev = pdev;
+       dev->event = mlx5_core_event;
+
        if (prof_sel < 0 || prof_sel >= ARRAY_SIZE(profile)) {
                mlx5_core_warn(dev,
                               "selected profile out of range, selecting default (%d)\n",
@@ -1233,8 +1274,6 @@ static int init_one(struct pci_dev *pdev,
                prof_sel = MLX5_DEFAULT_PROF;
        }
        dev->profile = &profile[prof_sel];
-       dev->pdev = pdev;
-       dev->event = mlx5_core_event;
 
        INIT_LIST_HEAD(&priv->ctx_list);
        spin_lock_init(&priv->ctx_lock);
@@ -1313,10 +1352,16 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev,
        struct mlx5_priv *priv = &dev->priv;
 
        dev_info(&pdev->dev, "%s was called\n", __func__);
+
        mlx5_enter_error_state(dev);
        mlx5_unload_one(dev, priv, false);
-       pci_save_state(pdev);
-       mlx5_pci_disable_device(dev);
+       /* In case of kernel call save the pci state and drain health wq */
+       if (state) {
+               pci_save_state(pdev);
+               mlx5_drain_health_wq(dev);
+               mlx5_pci_disable_device(dev);
+       }
+
        return state == pci_channel_io_perm_failure ?
                PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET;
 }
@@ -1373,11 +1418,6 @@ static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev)
        return PCI_ERS_RESULT_RECOVERED;
 }
 
-void mlx5_disable_device(struct mlx5_core_dev *dev)
-{
-       mlx5_pci_err_detected(dev->pdev, 0);
-}
-
 static void mlx5_pci_resume(struct pci_dev *pdev)
 {
        struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
@@ -1422,11 +1462,24 @@ static const struct pci_device_id mlx5_core_pci_table[] = {
        { PCI_VDEVICE(MELLANOX, 0x1017) },                      /* ConnectX-5, PCIe 3.0 */
        { PCI_VDEVICE(MELLANOX, 0x1018), MLX5_PCI_DEV_IS_VF},   /* ConnectX-5 VF */
        { PCI_VDEVICE(MELLANOX, 0x1019) },                      /* ConnectX-5, PCIe 4.0 */
+       { PCI_VDEVICE(MELLANOX, 0x101a), MLX5_PCI_DEV_IS_VF},   /* ConnectX-5, PCIe 4.0 VF */
        { 0, }
 };
 
 MODULE_DEVICE_TABLE(pci, mlx5_core_pci_table);
 
+void mlx5_disable_device(struct mlx5_core_dev *dev)
+{
+       mlx5_pci_err_detected(dev->pdev, 0);
+}
+
+void mlx5_recover_device(struct mlx5_core_dev *dev)
+{
+       mlx5_pci_disable_device(dev);
+       if (mlx5_pci_slot_reset(dev->pdev) == PCI_ERS_RESULT_RECOVERED)
+               mlx5_pci_resume(dev->pdev);
+}
+
 static struct pci_driver mlx5_core_driver = {
        .name           = DRIVER_NAME,
        .id_table       = mlx5_core_pci_table,
index 3d0cfb9f18f99154e6f904625a41ff457c4e2508..7e635ebda199cddf89eb397758ff94c981d2b2f0 100644 (file)
@@ -81,8 +81,10 @@ int mlx5_cmd_init_hca(struct mlx5_core_dev *dev);
 int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev);
 void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
                     unsigned long param);
+void mlx5_port_module_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe);
 void mlx5_enter_error_state(struct mlx5_core_dev *dev);
 void mlx5_disable_device(struct mlx5_core_dev *dev);
+void mlx5_recover_device(struct mlx5_core_dev *dev);
 int mlx5_sriov_init(struct mlx5_core_dev *dev);
 void mlx5_sriov_cleanup(struct mlx5_core_dev *dev);
 int mlx5_sriov_attach(struct mlx5_core_dev *dev);
@@ -91,6 +93,13 @@ int mlx5_core_sriov_configure(struct pci_dev *dev, int num_vfs);
 bool mlx5_sriov_is_enabled(struct mlx5_core_dev *dev);
 int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id);
 int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id);
+int mlx5_create_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
+                                      void *context, u32 *element_id);
+int mlx5_modify_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
+                                      void *context, u32 element_id,
+                                      u32 modify_bitmask);
+int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
+                                       u32 element_id);
 int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev);
 cycle_t mlx5_read_internal_timer(struct mlx5_core_dev *dev);
 u32 mlx5_get_msix_vec(struct mlx5_core_dev *dev, int vecidx);
@@ -113,6 +122,12 @@ struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev);
 void mlx5_dev_list_lock(void);
 void mlx5_dev_list_unlock(void);
 int mlx5_dev_list_trylock(void);
+int mlx5_encap_alloc(struct mlx5_core_dev *dev,
+                    int header_type,
+                    size_t size,
+                    void *encap_header,
+                    u32 *encap_id);
+void mlx5_encap_dealloc(struct mlx5_core_dev *dev, u32 encap_id);
 
 bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv);
 
index cc4fd61914d30b567d962f24f15bfea3bb1da92c..a57d5a81eb05dbbb30054519ceb543ad4879c6ef 100644 (file)
@@ -209,6 +209,7 @@ static void free_4k(struct mlx5_core_dev *dev, u64 addr)
 static int alloc_system_page(struct mlx5_core_dev *dev, u16 func_id)
 {
        struct page *page;
+       u64 zero_addr = 1;
        u64 addr;
        int err;
        int nid = dev_to_node(&dev->pdev->dev);
@@ -218,26 +219,35 @@ static int alloc_system_page(struct mlx5_core_dev *dev, u16 func_id)
                mlx5_core_warn(dev, "failed to allocate page\n");
                return -ENOMEM;
        }
+map:
        addr = dma_map_page(&dev->pdev->dev, page, 0,
                            PAGE_SIZE, DMA_BIDIRECTIONAL);
        if (dma_mapping_error(&dev->pdev->dev, addr)) {
                mlx5_core_warn(dev, "failed dma mapping page\n");
                err = -ENOMEM;
-               goto out_alloc;
+               goto err_mapping;
        }
+
+       /* Firmware doesn't support page with physical address 0 */
+       if (addr == 0) {
+               zero_addr = addr;
+               goto map;
+       }
+
        err = insert_page(dev, addr, page, func_id);
        if (err) {
                mlx5_core_err(dev, "failed to track allocated page\n");
-               goto out_mapping;
+               dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE,
+                              DMA_BIDIRECTIONAL);
        }
 
-       return 0;
-
-out_mapping:
-       dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+err_mapping:
+       if (err)
+               __free_page(page);
 
-out_alloc:
-       __free_page(page);
+       if (zero_addr == 0)
+               dma_unmap_page(&dev->pdev->dev, zero_addr, PAGE_SIZE,
+                              DMA_BIDIRECTIONAL);
 
        return err;
 }
index 34e7184e23c9bac44a7d06d1e4e2e85990441e2e..d2ec9d232a70727df71d0c733f60c78a55415392 100644 (file)
@@ -548,6 +548,26 @@ int mlx5_max_tc(struct mlx5_core_dev *mdev)
        return num_tc - 1;
 }
 
+int mlx5_query_port_dcbx_param(struct mlx5_core_dev *mdev, u32 *out)
+{
+       u32 in[MLX5_ST_SZ_DW(dcbx_param)] = {0};
+
+       MLX5_SET(dcbx_param, in, port_number, 1);
+
+       return  mlx5_core_access_reg(mdev, in, sizeof(in), out,
+                                   sizeof(in), MLX5_REG_DCBX_PARAM, 0, 0);
+}
+
+int mlx5_set_port_dcbx_param(struct mlx5_core_dev *mdev, u32 *in)
+{
+       u32 out[MLX5_ST_SZ_DW(dcbx_param)];
+
+       MLX5_SET(dcbx_param, in, port_number, 1);
+
+       return mlx5_core_access_reg(mdev, in, sizeof(out), out,
+                                   sizeof(out), MLX5_REG_DCBX_PARAM, 0, 1);
+}
+
 int mlx5_set_port_prio_tc(struct mlx5_core_dev *mdev, u8 *prio_tc)
 {
        u32 in[MLX5_ST_SZ_DW(qtct_reg)] = {0};
@@ -572,6 +592,28 @@ int mlx5_set_port_prio_tc(struct mlx5_core_dev *mdev, u8 *prio_tc)
 }
 EXPORT_SYMBOL_GPL(mlx5_set_port_prio_tc);
 
+int mlx5_query_port_prio_tc(struct mlx5_core_dev *mdev,
+                           u8 prio, u8 *tc)
+{
+       u32 in[MLX5_ST_SZ_DW(qtct_reg)];
+       u32 out[MLX5_ST_SZ_DW(qtct_reg)];
+       int err;
+
+       memset(in, 0, sizeof(in));
+       memset(out, 0, sizeof(out));
+
+       MLX5_SET(qtct_reg, in, port_number, 1);
+       MLX5_SET(qtct_reg, in, prio, prio);
+
+       err = mlx5_core_access_reg(mdev, in, sizeof(in), out,
+                                  sizeof(out), MLX5_REG_QTCT, 0, 0);
+       if (!err)
+               *tc = MLX5_GET(qtct_reg, out, tclass);
+
+       return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_port_prio_tc);
+
 static int mlx5_set_port_qetcr_reg(struct mlx5_core_dev *mdev, u32 *in,
                                   int inlen)
 {
@@ -625,6 +667,27 @@ int mlx5_set_port_tc_bw_alloc(struct mlx5_core_dev *mdev, u8 *tc_bw)
 }
 EXPORT_SYMBOL_GPL(mlx5_set_port_tc_bw_alloc);
 
+int mlx5_query_port_tc_bw_alloc(struct mlx5_core_dev *mdev,
+                               u8 tc, u8 *bw_pct)
+{
+       u32 out[MLX5_ST_SZ_DW(qetc_reg)];
+       void *ets_tcn_conf;
+       int err;
+
+       err = mlx5_query_port_qetcr_reg(mdev, out, sizeof(out));
+       if (err)
+               return err;
+
+       ets_tcn_conf = MLX5_ADDR_OF(qetc_reg, out,
+                                   tc_configuration[tc]);
+
+       *bw_pct = MLX5_GET(ets_tcn_config_reg, ets_tcn_conf,
+                          bw_allocation);
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_port_tc_bw_alloc);
+
 int mlx5_modify_port_ets_rate_limit(struct mlx5_core_dev *mdev,
                                    u8 *max_bw_value,
                                    u8 *max_bw_units)
@@ -746,3 +809,60 @@ void mlx5_query_port_fcs(struct mlx5_core_dev *mdev, bool *supported,
        *supported = !!(MLX5_GET(pcmr_reg, out, fcs_cap));
        *enabled = !!(MLX5_GET(pcmr_reg, out, fcs_chk));
 }
+
+static const char *mlx5_pme_status[MLX5_MODULE_STATUS_NUM] = {
+       "Cable plugged",   /* MLX5_MODULE_STATUS_PLUGGED    = 0x1 */
+       "Cable unplugged", /* MLX5_MODULE_STATUS_UNPLUGGED  = 0x2 */
+       "Cable error",     /* MLX5_MODULE_STATUS_ERROR      = 0x3 */
+};
+
+static const char *mlx5_pme_error[MLX5_MODULE_EVENT_ERROR_NUM] = {
+       "Power budget exceeded",
+       "Long Range for non MLNX cable",
+       "Bus stuck(I2C or data shorted)",
+       "No EEPROM/retry timeout",
+       "Enforce part number list",
+       "Unknown identifier",
+       "High Temperature",
+       "Bad or shorted cable/module",
+       "Unknown status",
+};
+
+void mlx5_port_module_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe)
+{
+       enum port_module_event_status_type module_status;
+       enum port_module_event_error_type error_type;
+       struct mlx5_eqe_port_module *module_event_eqe;
+       struct mlx5_priv *priv = &dev->priv;
+       u8 module_num;
+
+       module_event_eqe = &eqe->data.port_module;
+       module_num = module_event_eqe->module;
+       module_status = module_event_eqe->module_status &
+                       PORT_MODULE_EVENT_MODULE_STATUS_MASK;
+       error_type = module_event_eqe->error_type &
+                    PORT_MODULE_EVENT_ERROR_TYPE_MASK;
+
+       if (module_status < MLX5_MODULE_STATUS_ERROR) {
+               priv->pme_stats.status_counters[module_status - 1]++;
+       } else if (module_status == MLX5_MODULE_STATUS_ERROR) {
+               if (error_type >= MLX5_MODULE_EVENT_ERROR_UNKNOWN)
+                       /* Unknown error type */
+                       error_type = MLX5_MODULE_EVENT_ERROR_UNKNOWN;
+               priv->pme_stats.error_counters[error_type]++;
+       }
+
+       if (!printk_ratelimit())
+               return;
+
+       if (module_status < MLX5_MODULE_STATUS_ERROR)
+               mlx5_core_info(dev,
+                              "Port module event: module %u, %s\n",
+                              module_num, mlx5_pme_status[module_status - 1]);
+
+       else if (module_status == MLX5_MODULE_STATUS_ERROR)
+               mlx5_core_info(dev,
+                              "Port module event[error]: module %u, %s, %s\n",
+                              module_num, mlx5_pme_status[module_status - 1],
+                              mlx5_pme_error[error_type]);
+}
index 104902a93a0b577f22ecd43824c6c42b5a9b059d..e651e4c02867740d35c07bfcf485860f26ad6409 100644 (file)
 #include <linux/mlx5/cmd.h>
 #include "mlx5_core.h"
 
+/* Scheduling element fw management */
+int mlx5_create_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
+                                      void *ctx, u32 *element_id)
+{
+       u32 in[MLX5_ST_SZ_DW(create_scheduling_element_in)]  = {0};
+       u32 out[MLX5_ST_SZ_DW(create_scheduling_element_in)] = {0};
+       void *schedc;
+       int err;
+
+       schedc = MLX5_ADDR_OF(create_scheduling_element_in, in,
+                             scheduling_context);
+       MLX5_SET(create_scheduling_element_in, in, opcode,
+                MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT);
+       MLX5_SET(create_scheduling_element_in, in, scheduling_hierarchy,
+                hierarchy);
+       memcpy(schedc, ctx, MLX5_ST_SZ_BYTES(scheduling_context));
+
+       err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+       if (err)
+               return err;
+
+       *element_id = MLX5_GET(create_scheduling_element_out, out,
+                              scheduling_element_id);
+       return 0;
+}
+
+int mlx5_modify_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
+                                      void *ctx, u32 element_id,
+                                      u32 modify_bitmask)
+{
+       u32 in[MLX5_ST_SZ_DW(modify_scheduling_element_in)]  = {0};
+       u32 out[MLX5_ST_SZ_DW(modify_scheduling_element_in)] = {0};
+       void *schedc;
+
+       schedc = MLX5_ADDR_OF(modify_scheduling_element_in, in,
+                             scheduling_context);
+       MLX5_SET(modify_scheduling_element_in, in, opcode,
+                MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT);
+       MLX5_SET(modify_scheduling_element_in, in, scheduling_element_id,
+                element_id);
+       MLX5_SET(modify_scheduling_element_in, in, modify_bitmask,
+                modify_bitmask);
+       MLX5_SET(modify_scheduling_element_in, in, scheduling_hierarchy,
+                hierarchy);
+       memcpy(schedc, ctx, MLX5_ST_SZ_BYTES(scheduling_context));
+
+       return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
+                                       u32 element_id)
+{
+       u32 in[MLX5_ST_SZ_DW(destroy_scheduling_element_in)]  = {0};
+       u32 out[MLX5_ST_SZ_DW(destroy_scheduling_element_in)] = {0};
+
+       MLX5_SET(destroy_scheduling_element_in, in, opcode,
+                MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT);
+       MLX5_SET(destroy_scheduling_element_in, in, scheduling_element_id,
+                element_id);
+       MLX5_SET(destroy_scheduling_element_in, in, scheduling_hierarchy,
+                hierarchy);
+
+       return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
 /* Finds an entry where we can register the given rate
  * If the rate already exists, return the entry where it is registered,
  * otherwise return the first available entry.
index 525f17af108e35ea808e1dfe19345aad967d8c27..269e4401c342d1375e70a40ba9905dddf9b65cef 100644 (file)
@@ -113,15 +113,17 @@ static int mlx5_modify_nic_vport_context(struct mlx5_core_dev *mdev, void *in,
        return mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
 }
 
-void mlx5_query_nic_vport_min_inline(struct mlx5_core_dev *mdev,
-                                    u8 *min_inline_mode)
+int mlx5_query_nic_vport_min_inline(struct mlx5_core_dev *mdev,
+                                   u16 vport, u8 *min_inline)
 {
        u32 out[MLX5_ST_SZ_DW(query_nic_vport_context_out)] = {0};
+       int err;
 
-       mlx5_query_nic_vport_context(mdev, 0, out, sizeof(out));
-
-       *min_inline_mode = MLX5_GET(query_nic_vport_context_out, out,
-                                   nic_vport_context.min_wqe_inline_mode);
+       err = mlx5_query_nic_vport_context(mdev, vport, out, sizeof(out));
+       if (!err)
+               *min_inline = MLX5_GET(query_nic_vport_context_out, out,
+                                      nic_vport_context.min_wqe_inline_mode);
+       return err;
 }
 EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_min_inline);
 
index 821a087c7ae221200f1a79c54ef399910325f644..921673c42bc98b3335ab65ed9b63737fe18f29e1 100644 (file)
@@ -101,13 +101,15 @@ err_db_free:
 
 int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
                     void *cqc, struct mlx5_cqwq *wq,
-                    struct mlx5_wq_ctrl *wq_ctrl)
+                    struct mlx5_frag_wq_ctrl *wq_ctrl)
 {
        int err;
 
-       wq->log_stride = 6 + MLX5_GET(cqc, cqc, cqe_sz);
-       wq->log_sz = MLX5_GET(cqc, cqc, log_cq_size);
-       wq->sz_m1 = (1 << wq->log_sz) - 1;
+       wq->log_stride  = 6 + MLX5_GET(cqc, cqc, cqe_sz);
+       wq->log_sz      = MLX5_GET(cqc, cqc, log_cq_size);
+       wq->sz_m1       = (1 << wq->log_sz) - 1;
+       wq->log_frag_strides = PAGE_SHIFT - wq->log_stride;
+       wq->frag_sz_m1  = (1 << wq->log_frag_strides) - 1;
 
        err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node);
        if (err) {
@@ -115,14 +117,16 @@ int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
                return err;
        }
 
-       err = mlx5_buf_alloc_node(mdev, mlx5_cqwq_get_byte_size(wq),
-                                 &wq_ctrl->buf, param->buf_numa_node);
+       err = mlx5_frag_buf_alloc_node(mdev, mlx5_cqwq_get_byte_size(wq),
+                                      &wq_ctrl->frag_buf,
+                                      param->buf_numa_node);
        if (err) {
-               mlx5_core_warn(mdev, "mlx5_buf_alloc_node() failed, %d\n", err);
+               mlx5_core_warn(mdev, "mlx5_frag_buf_alloc_node() failed, %d\n",
+                              err);
                goto err_db_free;
        }
 
-       wq->buf = wq_ctrl->buf.direct.buf;
+       wq->frag_buf = wq_ctrl->frag_buf;
        wq->db  = wq_ctrl->db.db;
 
        wq_ctrl->mdev = mdev;
@@ -184,3 +188,9 @@ void mlx5_wq_destroy(struct mlx5_wq_ctrl *wq_ctrl)
        mlx5_buf_free(wq_ctrl->mdev, &wq_ctrl->buf);
        mlx5_db_free(wq_ctrl->mdev, &wq_ctrl->db);
 }
+
+void mlx5_cqwq_destroy(struct mlx5_frag_wq_ctrl *wq_ctrl)
+{
+       mlx5_frag_buf_free(wq_ctrl->mdev, &wq_ctrl->frag_buf);
+       mlx5_db_free(wq_ctrl->mdev, &wq_ctrl->db);
+}
index 6c2a8f95093c6b0ea1212ac8ae9b1e29f421dad0..d8afed898c31d3719d6d44d3d46649e52e9baa10 100644 (file)
@@ -47,6 +47,12 @@ struct mlx5_wq_ctrl {
        struct mlx5_db          db;
 };
 
+struct mlx5_frag_wq_ctrl {
+       struct mlx5_core_dev    *mdev;
+       struct mlx5_frag_buf    frag_buf;
+       struct mlx5_db          db;
+};
+
 struct mlx5_wq_cyc {
        void                    *buf;
        __be32                  *db;
@@ -55,12 +61,14 @@ struct mlx5_wq_cyc {
 };
 
 struct mlx5_cqwq {
-       void                    *buf;
+       struct mlx5_frag_buf    frag_buf;
        __be32                  *db;
        u32                     sz_m1;
+       u32                     frag_sz_m1;
        u32                     cc; /* consumer counter */
        u8                      log_sz;
        u8                      log_stride;
+       u8                      log_frag_strides;
 };
 
 struct mlx5_wq_ll {
@@ -81,7 +89,7 @@ u32 mlx5_wq_cyc_get_size(struct mlx5_wq_cyc *wq);
 
 int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
                     void *cqc, struct mlx5_cqwq *wq,
-                    struct mlx5_wq_ctrl *wq_ctrl);
+                    struct mlx5_frag_wq_ctrl *wq_ctrl);
 u32 mlx5_cqwq_get_size(struct mlx5_cqwq *wq);
 
 int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
@@ -90,6 +98,7 @@ int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 u32 mlx5_wq_ll_get_size(struct mlx5_wq_ll *wq);
 
 void mlx5_wq_destroy(struct mlx5_wq_ctrl *wq_ctrl);
+void mlx5_cqwq_destroy(struct mlx5_frag_wq_ctrl *wq_ctrl);
 
 static inline u16 mlx5_wq_cyc_ctr2ix(struct mlx5_wq_cyc *wq, u16 ctr)
 {
@@ -116,7 +125,10 @@ static inline u32 mlx5_cqwq_get_ci(struct mlx5_cqwq *wq)
 
 static inline void *mlx5_cqwq_get_wqe(struct mlx5_cqwq *wq, u32 ix)
 {
-       return wq->buf + (ix << wq->log_stride);
+       unsigned int frag = (ix >> wq->log_frag_strides);
+
+       return wq->frag_buf.frags[frag].buf +
+               ((wq->frag_sz_m1 & ix) << wq->log_stride);
 }
 
 static inline u32 mlx5_cqwq_get_wrap_cnt(struct mlx5_cqwq *wq)
index 5989f7cb546235850cf3c3740b21d1a26d0417da..16f44b9aa07611cddf35b128d4e75fda20857161 100644 (file)
@@ -19,6 +19,15 @@ config MLXSW_CORE_HWMON
        ---help---
          Say Y here if you want to expose HWMON interface on mlxsw devices.
 
+config MLXSW_CORE_THERMAL
+       bool "Thermal zone support for Mellanox Technologies Switch ASICs"
+       depends on MLXSW_CORE && THERMAL
+       depends on !(MLXSW_CORE=y && THERMAL=m)
+       default y
+       ---help---
+        Say Y here if you want to automatically control fans speed according
+        ambient temperature reported by ASIC.
+
 config MLXSW_PCI
        tristate "PCI bus implementation for Mellanox Technologies Switch ASICs"
        depends on PCI && HAS_DMA && HAS_IOMEM && MLXSW_CORE
@@ -29,9 +38,30 @@ config MLXSW_PCI
          To compile this driver as a module, choose M here: the
          module will be called mlxsw_pci.
 
+config MLXSW_I2C
+       tristate "I2C bus implementation for Mellanox Technologies Switch ASICs"
+       depends on I2C && MLXSW_CORE
+       default m
+       ---help---
+         This is I2C bus implementation for Mellanox Technologies Switch ASICs.
+
+         To compile this driver as a module, choose M here: the
+         module will be called mlxsw_i2c.
+
+config MLXSW_SWITCHIB
+       tristate "Mellanox Technologies SwitchIB and SwitchIB-2 support"
+       depends on MLXSW_CORE && MLXSW_PCI && NET_SWITCHDEV
+       default m
+       ---help---
+         This driver supports Mellanox Technologies SwitchIB and SwitchIB-2
+         Infiniband Switch ASICs.
+
+         To compile this driver as a module, choose M here: the
+         module will be called mlxsw_switchib.
+
 config MLXSW_SWITCHX2
        tristate "Mellanox Technologies SwitchX-2 support"
-       depends on MLXSW_CORE && NET_SWITCHDEV
+       depends on MLXSW_CORE && MLXSW_PCI && NET_SWITCHDEV
        default m
        ---help---
          This driver supports Mellanox Technologies SwitchX-2 Ethernet
@@ -42,7 +72,7 @@ config MLXSW_SWITCHX2
 
 config MLXSW_SPECTRUM
        tristate "Mellanox Technologies Spectrum support"
-       depends on MLXSW_CORE && NET_SWITCHDEV && VLAN_8021Q
+       depends on MLXSW_CORE && MLXSW_PCI && NET_SWITCHDEV && VLAN_8021Q
        default m
        ---help---
          This driver supports Mellanox Technologies Spectrum Ethernet
@@ -58,3 +88,14 @@ config MLXSW_SPECTRUM_DCB
        ---help---
          Say Y here if you want to use Data Center Bridging (DCB) in the
          driver.
+
+config MLXSW_MINIMAL
+       tristate "Mellanox Technologies minimal I2C support"
+       depends on MLXSW_CORE && MLXSW_I2C
+       default m
+       ---help---
+         This driver supports I2C access for Mellanox Technologies Switch
+         ASICs.
+
+         To compile this driver as a module, choose M here: the
+         module will be called mlxsw_minimal.
index d20ae1838a64d2f444c3cadfb0eeb3ca80183108..fe8dadba15abe7dfd9e706364ad3497abbef7b14 100644 (file)
@@ -1,8 +1,13 @@
 obj-$(CONFIG_MLXSW_CORE)       += mlxsw_core.o
 mlxsw_core-objs                        := core.o
 mlxsw_core-$(CONFIG_MLXSW_CORE_HWMON) += core_hwmon.o
+mlxsw_core-$(CONFIG_MLXSW_CORE_THERMAL) += core_thermal.o
 obj-$(CONFIG_MLXSW_PCI)                += mlxsw_pci.o
 mlxsw_pci-objs                 := pci.o
+obj-$(CONFIG_MLXSW_I2C)                += mlxsw_i2c.o
+mlxsw_i2c-objs                 := i2c.o
+obj-$(CONFIG_MLXSW_SWITCHIB)   += mlxsw_switchib.o
+mlxsw_switchib-objs            := switchib.o
 obj-$(CONFIG_MLXSW_SWITCHX2)   += mlxsw_switchx2.o
 mlxsw_switchx2-objs            := switchx2.o
 obj-$(CONFIG_MLXSW_SPECTRUM)   += mlxsw_spectrum.o
@@ -10,3 +15,5 @@ mlxsw_spectrum-objs           := spectrum.o spectrum_buffers.o \
                                   spectrum_switchdev.o spectrum_router.o \
                                   spectrum_kvdl.o
 mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB)    += spectrum_dcb.o
+obj-$(CONFIG_MLXSW_MINIMAL)    += mlxsw_minimal.o
+mlxsw_minimal-objs             := minimal.o
index a37d471728ede96fe8157657d49f3ddb7a99eb36..4dc028bb4a33330cca34c7eaeb5496c136499fca 100644 (file)
@@ -90,6 +90,23 @@ struct mlxsw_core_pcpu_stats {
        u32                     port_rx_invalid;
 };
 
+struct mlxsw_core_port {
+       struct devlink_port devlink_port;
+       void *port_driver_priv;
+       u8 local_port;
+};
+
+void *mlxsw_core_port_driver_priv(struct mlxsw_core_port *mlxsw_core_port)
+{
+       return mlxsw_core_port->port_driver_priv;
+}
+EXPORT_SYMBOL(mlxsw_core_port_driver_priv);
+
+static bool mlxsw_core_port_check(struct mlxsw_core_port *mlxsw_core_port)
+{
+       return mlxsw_core_port->port_driver_priv != NULL;
+}
+
 struct mlxsw_core {
        struct mlxsw_driver *driver;
        const struct mlxsw_bus *bus;
@@ -114,6 +131,8 @@ struct mlxsw_core {
        } lag;
        struct mlxsw_res res;
        struct mlxsw_hwmon *hwmon;
+       struct mlxsw_thermal *thermal;
+       struct mlxsw_core_port ports[MLXSW_PORT_MAX_PORTS];
        unsigned long driver_priv[0];
        /* driver_priv has to be always the last item */
 };
@@ -553,33 +572,18 @@ free_skb:
        dev_kfree_skb(skb);
 }
 
-static const struct mlxsw_rx_listener mlxsw_emad_rx_listener = {
-       .func = mlxsw_emad_rx_listener_func,
-       .local_port = MLXSW_PORT_DONT_CARE,
-       .trap_id = MLXSW_TRAP_ID_ETHEMAD,
-};
-
-static int mlxsw_emad_traps_set(struct mlxsw_core *mlxsw_core)
-{
-       char htgt_pl[MLXSW_REG_HTGT_LEN];
-       char hpkt_pl[MLXSW_REG_HPKT_LEN];
-       int err;
-
-       mlxsw_reg_htgt_pack(htgt_pl, MLXSW_REG_HTGT_TRAP_GROUP_EMAD);
-       err = mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl);
-       if (err)
-               return err;
-
-       mlxsw_reg_hpkt_pack(hpkt_pl, MLXSW_REG_HPKT_ACTION_TRAP_TO_CPU,
-                           MLXSW_TRAP_ID_ETHEMAD);
-       return mlxsw_reg_write(mlxsw_core, MLXSW_REG(hpkt), hpkt_pl);
-}
+static const struct mlxsw_listener mlxsw_emad_rx_listener =
+       MLXSW_RXL(mlxsw_emad_rx_listener_func, ETHEMAD, TRAP_TO_CPU, false,
+                 EMAD, DISCARD);
 
 static int mlxsw_emad_init(struct mlxsw_core *mlxsw_core)
 {
        u64 tid;
        int err;
 
+       if (!(mlxsw_core->bus->features & MLXSW_BUS_F_TXRX))
+               return 0;
+
        /* Set the upper 32 bits of the transaction ID field to a random
         * number. This allows us to discard EMADs addressed to other
         * devices.
@@ -591,39 +595,33 @@ static int mlxsw_emad_init(struct mlxsw_core *mlxsw_core)
        INIT_LIST_HEAD(&mlxsw_core->emad.trans_list);
        spin_lock_init(&mlxsw_core->emad.trans_list_lock);
 
-       err = mlxsw_core_rx_listener_register(mlxsw_core,
-                                             &mlxsw_emad_rx_listener,
-                                             mlxsw_core);
+       err = mlxsw_core_trap_register(mlxsw_core, &mlxsw_emad_rx_listener,
+                                      mlxsw_core);
        if (err)
                return err;
 
-       err = mlxsw_emad_traps_set(mlxsw_core);
+       err = mlxsw_core->driver->basic_trap_groups_set(mlxsw_core);
        if (err)
                goto err_emad_trap_set;
-
        mlxsw_core->emad.use_emad = true;
 
        return 0;
 
 err_emad_trap_set:
-       mlxsw_core_rx_listener_unregister(mlxsw_core,
-                                         &mlxsw_emad_rx_listener,
-                                         mlxsw_core);
+       mlxsw_core_trap_unregister(mlxsw_core, &mlxsw_emad_rx_listener,
+                                  mlxsw_core);
        return err;
 }
 
 static void mlxsw_emad_fini(struct mlxsw_core *mlxsw_core)
 {
-       char hpkt_pl[MLXSW_REG_HPKT_LEN];
 
-       mlxsw_core->emad.use_emad = false;
-       mlxsw_reg_hpkt_pack(hpkt_pl, MLXSW_REG_HPKT_ACTION_DISCARD,
-                           MLXSW_TRAP_ID_ETHEMAD);
-       mlxsw_reg_write(mlxsw_core, MLXSW_REG(hpkt), hpkt_pl);
+       if (!(mlxsw_core->bus->features & MLXSW_BUS_F_TXRX))
+               return;
 
-       mlxsw_core_rx_listener_unregister(mlxsw_core,
-                                         &mlxsw_emad_rx_listener,
-                                         mlxsw_core);
+       mlxsw_core->emad.use_emad = false;
+       mlxsw_core_trap_unregister(mlxsw_core, &mlxsw_emad_rx_listener,
+                                  mlxsw_core);
 }
 
 static struct sk_buff *mlxsw_emad_alloc(const struct mlxsw_core *mlxsw_core,
@@ -823,17 +821,6 @@ static struct mlxsw_driver *mlxsw_core_driver_get(const char *kind)
 
        spin_lock(&mlxsw_core_driver_list_lock);
        mlxsw_driver = __driver_find(kind);
-       if (!mlxsw_driver) {
-               spin_unlock(&mlxsw_core_driver_list_lock);
-               request_module(MLXSW_MODULE_ALIAS_PREFIX "%s", kind);
-               spin_lock(&mlxsw_core_driver_list_lock);
-               mlxsw_driver = __driver_find(kind);
-       }
-       if (mlxsw_driver) {
-               if (!try_module_get(mlxsw_driver->owner))
-                       mlxsw_driver = NULL;
-       }
-
        spin_unlock(&mlxsw_core_driver_list_lock);
        return mlxsw_driver;
 }
@@ -845,9 +832,6 @@ static void mlxsw_core_driver_put(const char *kind)
        spin_lock(&mlxsw_core_driver_list_lock);
        mlxsw_driver = __driver_find(kind);
        spin_unlock(&mlxsw_core_driver_list_lock);
-       if (!mlxsw_driver)
-               return;
-       module_put(mlxsw_driver->owner);
 }
 
 static int mlxsw_core_debugfs_init(struct mlxsw_core *mlxsw_core)
@@ -934,6 +918,21 @@ static void *__dl_port(struct devlink_port *devlink_port)
        return container_of(devlink_port, struct mlxsw_core_port, devlink_port);
 }
 
+static int mlxsw_devlink_port_type_set(struct devlink_port *devlink_port,
+                                      enum devlink_port_type port_type)
+{
+       struct mlxsw_core *mlxsw_core = devlink_priv(devlink_port->devlink);
+       struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver;
+       struct mlxsw_core_port *mlxsw_core_port = __dl_port(devlink_port);
+
+       if (!mlxsw_driver->port_type_set)
+               return -EOPNOTSUPP;
+
+       return mlxsw_driver->port_type_set(mlxsw_core,
+                                          mlxsw_core_port->local_port,
+                                          port_type);
+}
+
 static int mlxsw_devlink_sb_port_pool_get(struct devlink_port *devlink_port,
                                          unsigned int sb_index, u16 pool_index,
                                          u32 *p_threshold)
@@ -942,7 +941,8 @@ static int mlxsw_devlink_sb_port_pool_get(struct devlink_port *devlink_port,
        struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver;
        struct mlxsw_core_port *mlxsw_core_port = __dl_port(devlink_port);
 
-       if (!mlxsw_driver->sb_port_pool_get)
+       if (!mlxsw_driver->sb_port_pool_get ||
+           !mlxsw_core_port_check(mlxsw_core_port))
                return -EOPNOTSUPP;
        return mlxsw_driver->sb_port_pool_get(mlxsw_core_port, sb_index,
                                              pool_index, p_threshold);
@@ -956,7 +956,8 @@ static int mlxsw_devlink_sb_port_pool_set(struct devlink_port *devlink_port,
        struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver;
        struct mlxsw_core_port *mlxsw_core_port = __dl_port(devlink_port);
 
-       if (!mlxsw_driver->sb_port_pool_set)
+       if (!mlxsw_driver->sb_port_pool_set ||
+           !mlxsw_core_port_check(mlxsw_core_port))
                return -EOPNOTSUPP;
        return mlxsw_driver->sb_port_pool_set(mlxsw_core_port, sb_index,
                                              pool_index, threshold);
@@ -972,7 +973,8 @@ mlxsw_devlink_sb_tc_pool_bind_get(struct devlink_port *devlink_port,
        struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver;
        struct mlxsw_core_port *mlxsw_core_port = __dl_port(devlink_port);
 
-       if (!mlxsw_driver->sb_tc_pool_bind_get)
+       if (!mlxsw_driver->sb_tc_pool_bind_get ||
+           !mlxsw_core_port_check(mlxsw_core_port))
                return -EOPNOTSUPP;
        return mlxsw_driver->sb_tc_pool_bind_get(mlxsw_core_port, sb_index,
                                                 tc_index, pool_type,
@@ -989,7 +991,8 @@ mlxsw_devlink_sb_tc_pool_bind_set(struct devlink_port *devlink_port,
        struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver;
        struct mlxsw_core_port *mlxsw_core_port = __dl_port(devlink_port);
 
-       if (!mlxsw_driver->sb_tc_pool_bind_set)
+       if (!mlxsw_driver->sb_tc_pool_bind_set ||
+           !mlxsw_core_port_check(mlxsw_core_port))
                return -EOPNOTSUPP;
        return mlxsw_driver->sb_tc_pool_bind_set(mlxsw_core_port, sb_index,
                                                 tc_index, pool_type,
@@ -1027,7 +1030,8 @@ mlxsw_devlink_sb_occ_port_pool_get(struct devlink_port *devlink_port,
        struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver;
        struct mlxsw_core_port *mlxsw_core_port = __dl_port(devlink_port);
 
-       if (!mlxsw_driver->sb_occ_port_pool_get)
+       if (!mlxsw_driver->sb_occ_port_pool_get ||
+           !mlxsw_core_port_check(mlxsw_core_port))
                return -EOPNOTSUPP;
        return mlxsw_driver->sb_occ_port_pool_get(mlxsw_core_port, sb_index,
                                                  pool_index, p_cur, p_max);
@@ -1043,7 +1047,8 @@ mlxsw_devlink_sb_occ_tc_port_bind_get(struct devlink_port *devlink_port,
        struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver;
        struct mlxsw_core_port *mlxsw_core_port = __dl_port(devlink_port);
 
-       if (!mlxsw_driver->sb_occ_tc_port_bind_get)
+       if (!mlxsw_driver->sb_occ_tc_port_bind_get ||
+           !mlxsw_core_port_check(mlxsw_core_port))
                return -EOPNOTSUPP;
        return mlxsw_driver->sb_occ_tc_port_bind_get(mlxsw_core_port,
                                                     sb_index, tc_index,
@@ -1051,6 +1056,7 @@ mlxsw_devlink_sb_occ_tc_port_bind_get(struct devlink_port *devlink_port,
 }
 
 static const struct devlink_ops mlxsw_devlink_ops = {
+       .port_type_set                  = mlxsw_devlink_port_type_set,
        .port_split                     = mlxsw_devlink_port_split,
        .port_unsplit                   = mlxsw_devlink_port_unsplit,
        .sb_pool_get                    = mlxsw_devlink_sb_pool_get,
@@ -1130,9 +1136,16 @@ int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
        if (err)
                goto err_hwmon_init;
 
-       err = mlxsw_driver->init(mlxsw_core, mlxsw_bus_info);
+       err = mlxsw_thermal_init(mlxsw_core, mlxsw_bus_info,
+                                &mlxsw_core->thermal);
        if (err)
-               goto err_driver_init;
+               goto err_thermal_init;
+
+       if (mlxsw_driver->init) {
+               err = mlxsw_driver->init(mlxsw_core, mlxsw_bus_info);
+               if (err)
+                       goto err_driver_init;
+       }
 
        err = mlxsw_core_debugfs_init(mlxsw_core);
        if (err)
@@ -1141,8 +1154,11 @@ int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
        return 0;
 
 err_debugfs_init:
-       mlxsw_core->driver->fini(mlxsw_core);
+       if (mlxsw_core->driver->fini)
+               mlxsw_core->driver->fini(mlxsw_core);
 err_driver_init:
+       mlxsw_thermal_fini(mlxsw_core->thermal);
+err_thermal_init:
 err_hwmon_init:
        devlink_unregister(devlink);
 err_devlink_register:
@@ -1167,11 +1183,13 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core)
        struct devlink *devlink = priv_to_devlink(mlxsw_core);
 
        mlxsw_core_debugfs_fini(mlxsw_core);
-       mlxsw_core->driver->fini(mlxsw_core);
+       if (mlxsw_core->driver->fini)
+               mlxsw_core->driver->fini(mlxsw_core);
+       mlxsw_thermal_fini(mlxsw_core->thermal);
        devlink_unregister(devlink);
        mlxsw_emad_fini(mlxsw_core);
-       mlxsw_core->bus->fini(mlxsw_core->bus_priv);
        kfree(mlxsw_core->lag.mapping);
+       mlxsw_core->bus->fini(mlxsw_core->bus_priv);
        free_percpu(mlxsw_core->pcpu_stats);
        devlink_free(devlink);
        mlxsw_core_driver_put(device_kind);
@@ -1348,6 +1366,75 @@ void mlxsw_core_event_listener_unregister(struct mlxsw_core *mlxsw_core,
 }
 EXPORT_SYMBOL(mlxsw_core_event_listener_unregister);
 
+static int mlxsw_core_listener_register(struct mlxsw_core *mlxsw_core,
+                                       const struct mlxsw_listener *listener,
+                                       void *priv)
+{
+       if (listener->is_event)
+               return mlxsw_core_event_listener_register(mlxsw_core,
+                                               &listener->u.event_listener,
+                                               priv);
+       else
+               return mlxsw_core_rx_listener_register(mlxsw_core,
+                                               &listener->u.rx_listener,
+                                               priv);
+}
+
+static void mlxsw_core_listener_unregister(struct mlxsw_core *mlxsw_core,
+                                     const struct mlxsw_listener *listener,
+                                     void *priv)
+{
+       if (listener->is_event)
+               mlxsw_core_event_listener_unregister(mlxsw_core,
+                                                    &listener->u.event_listener,
+                                                    priv);
+       else
+               mlxsw_core_rx_listener_unregister(mlxsw_core,
+                                                 &listener->u.rx_listener,
+                                                 priv);
+}
+
+int mlxsw_core_trap_register(struct mlxsw_core *mlxsw_core,
+                            const struct mlxsw_listener *listener, void *priv)
+{
+       char hpkt_pl[MLXSW_REG_HPKT_LEN];
+       int err;
+
+       err = mlxsw_core_listener_register(mlxsw_core, listener, priv);
+       if (err)
+               return err;
+
+       mlxsw_reg_hpkt_pack(hpkt_pl, listener->action, listener->trap_id,
+                           listener->trap_group, listener->is_ctrl);
+       err = mlxsw_reg_write(mlxsw_core,  MLXSW_REG(hpkt), hpkt_pl);
+       if (err)
+               goto err_trap_set;
+
+       return 0;
+
+err_trap_set:
+       mlxsw_core_listener_unregister(mlxsw_core, listener, priv);
+       return err;
+}
+EXPORT_SYMBOL(mlxsw_core_trap_register);
+
+void mlxsw_core_trap_unregister(struct mlxsw_core *mlxsw_core,
+                               const struct mlxsw_listener *listener,
+                               void *priv)
+{
+       char hpkt_pl[MLXSW_REG_HPKT_LEN];
+
+       if (!listener->is_event) {
+               mlxsw_reg_hpkt_pack(hpkt_pl, listener->unreg_action,
+                                   listener->trap_id, listener->trap_group,
+                                   listener->is_ctrl);
+               mlxsw_reg_write(mlxsw_core, MLXSW_REG(hpkt), hpkt_pl);
+       }
+
+       mlxsw_core_listener_unregister(mlxsw_core, listener, priv);
+}
+EXPORT_SYMBOL(mlxsw_core_trap_unregister);
+
 static u64 mlxsw_core_tid_get(struct mlxsw_core *mlxsw_core)
 {
        return atomic64_inc_return(&mlxsw_core->emad.tid);
@@ -1670,28 +1757,83 @@ u64 mlxsw_core_res_get(struct mlxsw_core *mlxsw_core,
 }
 EXPORT_SYMBOL(mlxsw_core_res_get);
 
-int mlxsw_core_port_init(struct mlxsw_core *mlxsw_core,
-                        struct mlxsw_core_port *mlxsw_core_port, u8 local_port,
-                        struct net_device *dev, bool split, u32 split_group)
+int mlxsw_core_port_init(struct mlxsw_core *mlxsw_core, u8 local_port)
 {
        struct devlink *devlink = priv_to_devlink(mlxsw_core);
+       struct mlxsw_core_port *mlxsw_core_port =
+                                       &mlxsw_core->ports[local_port];
        struct devlink_port *devlink_port = &mlxsw_core_port->devlink_port;
+       int err;
 
-       if (split)
-               devlink_port_split_set(devlink_port, split_group);
-       devlink_port_type_eth_set(devlink_port, dev);
-       return devlink_port_register(devlink, devlink_port, local_port);
+       mlxsw_core_port->local_port = local_port;
+       err = devlink_port_register(devlink, devlink_port, local_port);
+       if (err)
+               memset(mlxsw_core_port, 0, sizeof(*mlxsw_core_port));
+       return err;
 }
 EXPORT_SYMBOL(mlxsw_core_port_init);
 
-void mlxsw_core_port_fini(struct mlxsw_core_port *mlxsw_core_port)
+void mlxsw_core_port_fini(struct mlxsw_core *mlxsw_core, u8 local_port)
 {
+       struct mlxsw_core_port *mlxsw_core_port =
+                                       &mlxsw_core->ports[local_port];
        struct devlink_port *devlink_port = &mlxsw_core_port->devlink_port;
 
        devlink_port_unregister(devlink_port);
+       memset(mlxsw_core_port, 0, sizeof(*mlxsw_core_port));
 }
 EXPORT_SYMBOL(mlxsw_core_port_fini);
 
+void mlxsw_core_port_eth_set(struct mlxsw_core *mlxsw_core, u8 local_port,
+                            void *port_driver_priv, struct net_device *dev,
+                            bool split, u32 split_group)
+{
+       struct mlxsw_core_port *mlxsw_core_port =
+                                       &mlxsw_core->ports[local_port];
+       struct devlink_port *devlink_port = &mlxsw_core_port->devlink_port;
+
+       mlxsw_core_port->port_driver_priv = port_driver_priv;
+       if (split)
+               devlink_port_split_set(devlink_port, split_group);
+       devlink_port_type_eth_set(devlink_port, dev);
+}
+EXPORT_SYMBOL(mlxsw_core_port_eth_set);
+
+void mlxsw_core_port_ib_set(struct mlxsw_core *mlxsw_core, u8 local_port,
+                           void *port_driver_priv)
+{
+       struct mlxsw_core_port *mlxsw_core_port =
+                                       &mlxsw_core->ports[local_port];
+       struct devlink_port *devlink_port = &mlxsw_core_port->devlink_port;
+
+       mlxsw_core_port->port_driver_priv = port_driver_priv;
+       devlink_port_type_ib_set(devlink_port, NULL);
+}
+EXPORT_SYMBOL(mlxsw_core_port_ib_set);
+
+void mlxsw_core_port_clear(struct mlxsw_core *mlxsw_core, u8 local_port,
+                          void *port_driver_priv)
+{
+       struct mlxsw_core_port *mlxsw_core_port =
+                                       &mlxsw_core->ports[local_port];
+       struct devlink_port *devlink_port = &mlxsw_core_port->devlink_port;
+
+       mlxsw_core_port->port_driver_priv = port_driver_priv;
+       devlink_port_type_clear(devlink_port);
+}
+EXPORT_SYMBOL(mlxsw_core_port_clear);
+
+enum devlink_port_type mlxsw_core_port_type_get(struct mlxsw_core *mlxsw_core,
+                                               u8 local_port)
+{
+       struct mlxsw_core_port *mlxsw_core_port =
+                                       &mlxsw_core->ports[local_port];
+       struct devlink_port *devlink_port = &mlxsw_core_port->devlink_port;
+
+       return devlink_port->type;
+}
+EXPORT_SYMBOL(mlxsw_core_port_type_get);
+
 static void mlxsw_core_buf_dump_dbg(struct mlxsw_core *mlxsw_core,
                                    const char *buf, size_t size)
 {
index 94a846d34c2f2168445e7e0d195f078efa9cb858..e856b49b83deaa86513504853ac8c69e4a9158cb 100644 (file)
 #include "cmd.h"
 #include "resources.h"
 
-#define MLXSW_MODULE_ALIAS_PREFIX "mlxsw-driver-"
-#define MODULE_MLXSW_DRIVER_ALIAS(kind)        \
-       MODULE_ALIAS(MLXSW_MODULE_ALIAS_PREFIX kind)
-
-#define MLXSW_DEVICE_KIND_SWITCHX2 "switchx2"
-#define MLXSW_DEVICE_KIND_SPECTRUM "spectrum"
-
 struct mlxsw_core;
+struct mlxsw_core_port;
 struct mlxsw_driver;
 struct mlxsw_bus;
 struct mlxsw_bus_info;
@@ -96,6 +90,50 @@ struct mlxsw_event_listener {
        enum mlxsw_event_trap_id trap_id;
 };
 
+struct mlxsw_listener {
+       u16 trap_id;
+       union {
+               struct mlxsw_rx_listener rx_listener;
+               struct mlxsw_event_listener event_listener;
+       } u;
+       enum mlxsw_reg_hpkt_action action;
+       enum mlxsw_reg_hpkt_action unreg_action;
+       u8 trap_group;
+       bool is_ctrl; /* should go via control buffer or not */
+       bool is_event;
+};
+
+#define MLXSW_RXL(_func, _trap_id, _action, _is_ctrl, _trap_group,     \
+                 _unreg_action)                                        \
+       {                                                               \
+               .trap_id = MLXSW_TRAP_ID_##_trap_id,                    \
+               .u.rx_listener =                                        \
+               {                                                       \
+                       .func = _func,                                  \
+                       .local_port = MLXSW_PORT_DONT_CARE,             \
+                       .trap_id = MLXSW_TRAP_ID_##_trap_id,            \
+               },                                                      \
+               .action = MLXSW_REG_HPKT_ACTION_##_action,              \
+               .unreg_action = MLXSW_REG_HPKT_ACTION_##_unreg_action,  \
+               .trap_group = MLXSW_REG_HTGT_TRAP_GROUP_##_trap_group,  \
+               .is_ctrl = _is_ctrl,                                    \
+               .is_event = false,                                      \
+       }
+
+#define MLXSW_EVENTL(_func, _trap_id, _trap_group)                     \
+       {                                                               \
+               .trap_id = MLXSW_TRAP_ID_##_trap_id,                    \
+               .u.event_listener =                                     \
+               {                                                       \
+                       .func = _func,                                  \
+                       .trap_id = MLXSW_TRAP_ID_##_trap_id,            \
+               },                                                      \
+               .action = MLXSW_REG_HPKT_ACTION_TRAP_TO_CPU,            \
+               .trap_group = MLXSW_REG_HTGT_TRAP_GROUP_##_trap_group,  \
+               .is_ctrl = false,                                       \
+               .is_event = true,                                       \
+       }
+
 int mlxsw_core_rx_listener_register(struct mlxsw_core *mlxsw_core,
                                    const struct mlxsw_rx_listener *rxl,
                                    void *priv);
@@ -110,6 +148,13 @@ void mlxsw_core_event_listener_unregister(struct mlxsw_core *mlxsw_core,
                                          const struct mlxsw_event_listener *el,
                                          void *priv);
 
+int mlxsw_core_trap_register(struct mlxsw_core *mlxsw_core,
+                            const struct mlxsw_listener *listener,
+                            void *priv);
+void mlxsw_core_trap_unregister(struct mlxsw_core *mlxsw_core,
+                               const struct mlxsw_listener *listener,
+                               void *priv);
+
 typedef void mlxsw_reg_trans_cb_t(struct mlxsw_core *mlxsw_core, char *payload,
                                  size_t payload_len, unsigned long cb_priv);
 
@@ -148,23 +193,18 @@ u8 mlxsw_core_lag_mapping_get(struct mlxsw_core *mlxsw_core,
 void mlxsw_core_lag_mapping_clear(struct mlxsw_core *mlxsw_core,
                                  u16 lag_id, u8 local_port);
 
-struct mlxsw_core_port {
-       struct devlink_port devlink_port;
-};
-
-static inline void *
-mlxsw_core_port_driver_priv(struct mlxsw_core_port *mlxsw_core_port)
-{
-       /* mlxsw_core_port is ensured to always be the first field in driver
-        * port structure.
-        */
-       return mlxsw_core_port;
-}
-
-int mlxsw_core_port_init(struct mlxsw_core *mlxsw_core,
-                        struct mlxsw_core_port *mlxsw_core_port, u8 local_port,
-                        struct net_device *dev, bool split, u32 split_group);
-void mlxsw_core_port_fini(struct mlxsw_core_port *mlxsw_core_port);
+void *mlxsw_core_port_driver_priv(struct mlxsw_core_port *mlxsw_core_port);
+int mlxsw_core_port_init(struct mlxsw_core *mlxsw_core, u8 local_port);
+void mlxsw_core_port_fini(struct mlxsw_core *mlxsw_core, u8 local_port);
+void mlxsw_core_port_eth_set(struct mlxsw_core *mlxsw_core, u8 local_port,
+                            void *port_driver_priv, struct net_device *dev,
+                            bool split, u32 split_group);
+void mlxsw_core_port_ib_set(struct mlxsw_core *mlxsw_core, u8 local_port,
+                           void *port_driver_priv);
+void mlxsw_core_port_clear(struct mlxsw_core *mlxsw_core, u8 local_port,
+                          void *port_driver_priv);
+enum devlink_port_type mlxsw_core_port_type_get(struct mlxsw_core *mlxsw_core,
+                                               u8 local_port);
 
 int mlxsw_core_schedule_dw(struct delayed_work *dwork, unsigned long delay);
 
@@ -221,11 +261,13 @@ struct mlxsw_config_profile {
 struct mlxsw_driver {
        struct list_head list;
        const char *kind;
-       struct module *owner;
        size_t priv_size;
        int (*init)(struct mlxsw_core *mlxsw_core,
                    const struct mlxsw_bus_info *mlxsw_bus_info);
        void (*fini)(struct mlxsw_core *mlxsw_core);
+       int (*basic_trap_groups_set)(struct mlxsw_core *mlxsw_core);
+       int (*port_type_set)(struct mlxsw_core *mlxsw_core, u8 local_port,
+                            enum devlink_port_type new_type);
        int (*port_split)(struct mlxsw_core *mlxsw_core, u8 local_port,
                          unsigned int count);
        int (*port_unsplit)(struct mlxsw_core *mlxsw_core, u8 local_port);
@@ -278,6 +320,8 @@ u64 mlxsw_core_res_get(struct mlxsw_core *mlxsw_core,
 #define MLXSW_CORE_RES_GET(res, short_res_id)                  \
        mlxsw_core_res_get(res, MLXSW_RES_ID_##short_res_id)
 
+#define MLXSW_BUS_F_TXRX       BIT(0)
+
 struct mlxsw_bus {
        const char *kind;
        int (*init)(void *bus_priv, struct mlxsw_core *mlxsw_core,
@@ -293,6 +337,7 @@ struct mlxsw_bus {
                        char *in_mbox, size_t in_mbox_size,
                        char *out_mbox, size_t out_mbox_size,
                        u8 *p_status);
+       u8 features;
 };
 
 struct mlxsw_bus_info {
@@ -328,4 +373,28 @@ static inline int mlxsw_hwmon_init(struct mlxsw_core *mlxsw_core,
 
 #endif
 
+struct mlxsw_thermal;
+
+#ifdef CONFIG_MLXSW_CORE_THERMAL
+
+int mlxsw_thermal_init(struct mlxsw_core *mlxsw_core,
+                      const struct mlxsw_bus_info *mlxsw_bus_info,
+                      struct mlxsw_thermal **p_thermal);
+void mlxsw_thermal_fini(struct mlxsw_thermal *thermal);
+
+#else
+
+static inline int mlxsw_thermal_init(struct mlxsw_core *mlxsw_core,
+                                    const struct mlxsw_bus_info *mlxsw_bus_info,
+                                    struct mlxsw_thermal **p_thermal)
+{
+       return 0;
+}
+
+static inline void mlxsw_thermal_fini(struct mlxsw_thermal *thermal)
+{
+}
+
+#endif
+
 #endif
index 1ac8bf187168be0ad2ce41f10a0793078481fda8..ab710e37af99dcbaf514fc0b0433b55e7353c597 100644 (file)
@@ -262,7 +262,7 @@ static void mlxsw_hwmon_attr_add(struct mlxsw_hwmon *mlxsw_hwmon,
 
 static int mlxsw_hwmon_temp_init(struct mlxsw_hwmon *mlxsw_hwmon)
 {
-       char mtcap_pl[MLXSW_REG_MTCAP_LEN];
+       char mtcap_pl[MLXSW_REG_MTCAP_LEN] = {0};
        char mtmp_pl[MLXSW_REG_MTMP_LEN];
        u8 sensor_count;
        int i;
@@ -295,7 +295,7 @@ static int mlxsw_hwmon_temp_init(struct mlxsw_hwmon *mlxsw_hwmon)
 
 static int mlxsw_hwmon_fans_init(struct mlxsw_hwmon *mlxsw_hwmon)
 {
-       char mfcr_pl[MLXSW_REG_MFCR_LEN];
+       char mfcr_pl[MLXSW_REG_MFCR_LEN] = {0};
        enum mlxsw_reg_mfcr_pwm_frequency freq;
        unsigned int type_index;
        unsigned int num;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
new file mode 100644 (file)
index 0000000..d866c98
--- /dev/null
@@ -0,0 +1,442 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
+ * Copyright (c) 2016 Ivan Vecera <cera@cera.cz>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/device.h>
+#include <linux/sysfs.h>
+#include <linux/thermal.h>
+#include <linux/err.h>
+
+#include "core.h"
+
+#define MLXSW_THERMAL_POLL_INT 1000    /* ms */
+#define MLXSW_THERMAL_MAX_TEMP 110000  /* 110C */
+#define MLXSW_THERMAL_MAX_STATE        10
+#define MLXSW_THERMAL_MAX_DUTY 255
+
+struct mlxsw_thermal_trip {
+       int     type;
+       int     temp;
+       int     min_state;
+       int     max_state;
+};
+
+static const struct mlxsw_thermal_trip default_thermal_trips[] = {
+       {       /* In range - 0-40% PWM */
+               .type           = THERMAL_TRIP_ACTIVE,
+               .temp           = 75000,
+               .min_state      = 0,
+               .max_state      = (4 * MLXSW_THERMAL_MAX_STATE) / 10,
+       },
+       {       /* High - 40-100% PWM */
+               .type           = THERMAL_TRIP_ACTIVE,
+               .temp           = 80000,
+               .min_state      = (4 * MLXSW_THERMAL_MAX_STATE) / 10,
+               .max_state      = MLXSW_THERMAL_MAX_STATE,
+       },
+       {
+               /* Very high - 100% PWM */
+               .type           = THERMAL_TRIP_ACTIVE,
+               .temp           = 85000,
+               .min_state      = MLXSW_THERMAL_MAX_STATE,
+               .max_state      = MLXSW_THERMAL_MAX_STATE,
+       },
+       {       /* Warning */
+               .type           = THERMAL_TRIP_HOT,
+               .temp           = 105000,
+               .min_state      = MLXSW_THERMAL_MAX_STATE,
+               .max_state      = MLXSW_THERMAL_MAX_STATE,
+       },
+       {       /* Critical - soft poweroff */
+               .type           = THERMAL_TRIP_CRITICAL,
+               .temp           = MLXSW_THERMAL_MAX_TEMP,
+               .min_state      = MLXSW_THERMAL_MAX_STATE,
+               .max_state      = MLXSW_THERMAL_MAX_STATE,
+       }
+};
+
+#define MLXSW_THERMAL_NUM_TRIPS        ARRAY_SIZE(default_thermal_trips)
+
+/* Make sure all trips are writable */
+#define MLXSW_THERMAL_TRIP_MASK        (BIT(MLXSW_THERMAL_NUM_TRIPS) - 1)
+
+struct mlxsw_thermal {
+       struct mlxsw_core *core;
+       const struct mlxsw_bus_info *bus_info;
+       struct thermal_zone_device *tzdev;
+       struct thermal_cooling_device *cdevs[MLXSW_MFCR_PWMS_MAX];
+       struct mlxsw_thermal_trip trips[MLXSW_THERMAL_NUM_TRIPS];
+       enum thermal_device_mode mode;
+};
+
+static inline u8 mlxsw_state_to_duty(int state)
+{
+       return DIV_ROUND_CLOSEST(state * MLXSW_THERMAL_MAX_DUTY,
+                                MLXSW_THERMAL_MAX_STATE);
+}
+
+static inline int mlxsw_duty_to_state(u8 duty)
+{
+       return DIV_ROUND_CLOSEST(duty * MLXSW_THERMAL_MAX_STATE,
+                                MLXSW_THERMAL_MAX_DUTY);
+}
+
+static int mlxsw_get_cooling_device_idx(struct mlxsw_thermal *thermal,
+                                       struct thermal_cooling_device *cdev)
+{
+       int i;
+
+       for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++)
+               if (thermal->cdevs[i] == cdev)
+                       return i;
+
+       return -ENODEV;
+}
+
+static int mlxsw_thermal_bind(struct thermal_zone_device *tzdev,
+                             struct thermal_cooling_device *cdev)
+{
+       struct mlxsw_thermal *thermal = tzdev->devdata;
+       struct device *dev = thermal->bus_info->dev;
+       int i, err;
+
+       /* If the cooling device is one of ours bind it */
+       if (mlxsw_get_cooling_device_idx(thermal, cdev) < 0)
+               return 0;
+
+       for (i = 0; i < MLXSW_THERMAL_NUM_TRIPS; i++) {
+               const struct mlxsw_thermal_trip *trip = &thermal->trips[i];
+
+               err = thermal_zone_bind_cooling_device(tzdev, i, cdev,
+                                                      trip->max_state,
+                                                      trip->min_state,
+                                                      THERMAL_WEIGHT_DEFAULT);
+               if (err < 0) {
+                       dev_err(dev, "Failed to bind cooling device to trip %d\n", i);
+                       return err;
+               }
+       }
+       return 0;
+}
+
+static int mlxsw_thermal_unbind(struct thermal_zone_device *tzdev,
+                               struct thermal_cooling_device *cdev)
+{
+       struct mlxsw_thermal *thermal = tzdev->devdata;
+       struct device *dev = thermal->bus_info->dev;
+       int i;
+       int err;
+
+       /* If the cooling device is our one unbind it */
+       if (mlxsw_get_cooling_device_idx(thermal, cdev) < 0)
+               return 0;
+
+       for (i = 0; i < MLXSW_THERMAL_NUM_TRIPS; i++) {
+               err = thermal_zone_unbind_cooling_device(tzdev, i, cdev);
+               if (err < 0) {
+                       dev_err(dev, "Failed to unbind cooling device\n");
+                       return err;
+               }
+       }
+       return 0;
+}
+
+static int mlxsw_thermal_get_mode(struct thermal_zone_device *tzdev,
+                                 enum thermal_device_mode *mode)
+{
+       struct mlxsw_thermal *thermal = tzdev->devdata;
+
+       *mode = thermal->mode;
+
+       return 0;
+}
+
+static int mlxsw_thermal_set_mode(struct thermal_zone_device *tzdev,
+                                 enum thermal_device_mode mode)
+{
+       struct mlxsw_thermal *thermal = tzdev->devdata;
+
+       mutex_lock(&tzdev->lock);
+
+       if (mode == THERMAL_DEVICE_ENABLED)
+               tzdev->polling_delay = MLXSW_THERMAL_POLL_INT;
+       else
+               tzdev->polling_delay = 0;
+
+       mutex_unlock(&tzdev->lock);
+
+       thermal->mode = mode;
+       thermal_zone_device_update(tzdev, THERMAL_EVENT_UNSPECIFIED);
+
+       return 0;
+}
+
+static int mlxsw_thermal_get_temp(struct thermal_zone_device *tzdev,
+                                 int *p_temp)
+{
+       struct mlxsw_thermal *thermal = tzdev->devdata;
+       struct device *dev = thermal->bus_info->dev;
+       char mtmp_pl[MLXSW_REG_MTMP_LEN];
+       unsigned int temp;
+       int err;
+
+       mlxsw_reg_mtmp_pack(mtmp_pl, 0, false, false);
+
+       err = mlxsw_reg_query(thermal->core, MLXSW_REG(mtmp), mtmp_pl);
+       if (err) {
+               dev_err(dev, "Failed to query temp sensor\n");
+               return err;
+       }
+       mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL);
+
+       *p_temp = (int) temp;
+       return 0;
+}
+
+static int mlxsw_thermal_get_trip_type(struct thermal_zone_device *tzdev,
+                                      int trip,
+                                      enum thermal_trip_type *p_type)
+{
+       struct mlxsw_thermal *thermal = tzdev->devdata;
+
+       if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS)
+               return -EINVAL;
+
+       *p_type = thermal->trips[trip].type;
+       return 0;
+}
+
+static int mlxsw_thermal_get_trip_temp(struct thermal_zone_device *tzdev,
+                                      int trip, int *p_temp)
+{
+       struct mlxsw_thermal *thermal = tzdev->devdata;
+
+       if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS)
+               return -EINVAL;
+
+       *p_temp = thermal->trips[trip].temp;
+       return 0;
+}
+
+static int mlxsw_thermal_set_trip_temp(struct thermal_zone_device *tzdev,
+                                      int trip, int temp)
+{
+       struct mlxsw_thermal *thermal = tzdev->devdata;
+
+       if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS ||
+           temp > MLXSW_THERMAL_MAX_TEMP)
+               return -EINVAL;
+
+       thermal->trips[trip].temp = temp;
+       return 0;
+}
+
+static struct thermal_zone_device_ops mlxsw_thermal_ops = {
+       .bind = mlxsw_thermal_bind,
+       .unbind = mlxsw_thermal_unbind,
+       .get_mode = mlxsw_thermal_get_mode,
+       .set_mode = mlxsw_thermal_set_mode,
+       .get_temp = mlxsw_thermal_get_temp,
+       .get_trip_type  = mlxsw_thermal_get_trip_type,
+       .get_trip_temp  = mlxsw_thermal_get_trip_temp,
+       .set_trip_temp  = mlxsw_thermal_set_trip_temp,
+};
+
+static int mlxsw_thermal_get_max_state(struct thermal_cooling_device *cdev,
+                                      unsigned long *p_state)
+{
+       *p_state = MLXSW_THERMAL_MAX_STATE;
+       return 0;
+}
+
+static int mlxsw_thermal_get_cur_state(struct thermal_cooling_device *cdev,
+                                      unsigned long *p_state)
+
+{
+       struct mlxsw_thermal *thermal = cdev->devdata;
+       struct device *dev = thermal->bus_info->dev;
+       char mfsc_pl[MLXSW_REG_MFSC_LEN];
+       int err, idx;
+       u8 duty;
+
+       idx = mlxsw_get_cooling_device_idx(thermal, cdev);
+       if (idx < 0)
+               return idx;
+
+       mlxsw_reg_mfsc_pack(mfsc_pl, idx, 0);
+       err = mlxsw_reg_query(thermal->core, MLXSW_REG(mfsc), mfsc_pl);
+       if (err) {
+               dev_err(dev, "Failed to query PWM duty\n");
+               return err;
+       }
+
+       duty = mlxsw_reg_mfsc_pwm_duty_cycle_get(mfsc_pl);
+       *p_state = mlxsw_duty_to_state(duty);
+       return 0;
+}
+
+static int mlxsw_thermal_set_cur_state(struct thermal_cooling_device *cdev,
+                                      unsigned long state)
+
+{
+       struct mlxsw_thermal *thermal = cdev->devdata;
+       struct device *dev = thermal->bus_info->dev;
+       char mfsc_pl[MLXSW_REG_MFSC_LEN];
+       int err, idx;
+
+       idx = mlxsw_get_cooling_device_idx(thermal, cdev);
+       if (idx < 0)
+               return idx;
+
+       mlxsw_reg_mfsc_pack(mfsc_pl, idx, mlxsw_state_to_duty(state));
+       err = mlxsw_reg_write(thermal->core, MLXSW_REG(mfsc), mfsc_pl);
+       if (err) {
+               dev_err(dev, "Failed to write PWM duty\n");
+               return err;
+       }
+       return 0;
+}
+
+static const struct thermal_cooling_device_ops mlxsw_cooling_ops = {
+       .get_max_state  = mlxsw_thermal_get_max_state,
+       .get_cur_state  = mlxsw_thermal_get_cur_state,
+       .set_cur_state  = mlxsw_thermal_set_cur_state,
+};
+
+int mlxsw_thermal_init(struct mlxsw_core *core,
+                      const struct mlxsw_bus_info *bus_info,
+                      struct mlxsw_thermal **p_thermal)
+{
+       char mfcr_pl[MLXSW_REG_MFCR_LEN] = { 0 };
+       enum mlxsw_reg_mfcr_pwm_frequency freq;
+       struct device *dev = bus_info->dev;
+       struct mlxsw_thermal *thermal;
+       u16 tacho_active;
+       u8 pwm_active;
+       int err, i;
+
+       thermal = devm_kzalloc(dev, sizeof(*thermal),
+                              GFP_KERNEL);
+       if (!thermal)
+               return -ENOMEM;
+
+       thermal->core = core;
+       thermal->bus_info = bus_info;
+       memcpy(thermal->trips, default_thermal_trips, sizeof(thermal->trips));
+
+       err = mlxsw_reg_query(thermal->core, MLXSW_REG(mfcr), mfcr_pl);
+       if (err) {
+               dev_err(dev, "Failed to probe PWMs\n");
+               goto err_free_thermal;
+       }
+       mlxsw_reg_mfcr_unpack(mfcr_pl, &freq, &tacho_active, &pwm_active);
+
+       for (i = 0; i < MLXSW_MFCR_TACHOS_MAX; i++) {
+               if (tacho_active & BIT(i)) {
+                       char mfsl_pl[MLXSW_REG_MFSL_LEN];
+
+                       mlxsw_reg_mfsl_pack(mfsl_pl, i, 0, 0);
+
+                       /* We need to query the register to preserve maximum */
+                       err = mlxsw_reg_query(thermal->core, MLXSW_REG(mfsl),
+                                             mfsl_pl);
+                       if (err)
+                               goto err_free_thermal;
+
+                       /* set the minimal RPMs to 0 */
+                       mlxsw_reg_mfsl_tach_min_set(mfsl_pl, 0);
+                       err = mlxsw_reg_write(thermal->core, MLXSW_REG(mfsl),
+                                             mfsl_pl);
+                       if (err)
+                               goto err_free_thermal;
+               }
+       }
+       for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++) {
+               if (pwm_active & BIT(i)) {
+                       struct thermal_cooling_device *cdev;
+
+                       cdev = thermal_cooling_device_register("Fan", thermal,
+                                                       &mlxsw_cooling_ops);
+                       if (IS_ERR(cdev)) {
+                               err = PTR_ERR(cdev);
+                               dev_err(dev, "Failed to register cooling device\n");
+                               goto err_unreg_cdevs;
+                       }
+                       thermal->cdevs[i] = cdev;
+               }
+       }
+
+       thermal->tzdev = thermal_zone_device_register("mlxsw",
+                                                     MLXSW_THERMAL_NUM_TRIPS,
+                                                     MLXSW_THERMAL_TRIP_MASK,
+                                                     thermal,
+                                                     &mlxsw_thermal_ops,
+                                                     NULL, 0,
+                                                     MLXSW_THERMAL_POLL_INT);
+       if (IS_ERR(thermal->tzdev)) {
+               err = PTR_ERR(thermal->tzdev);
+               dev_err(dev, "Failed to register thermal zone\n");
+               goto err_unreg_cdevs;
+       }
+
+       thermal->mode = THERMAL_DEVICE_ENABLED;
+       *p_thermal = thermal;
+       return 0;
+err_unreg_cdevs:
+       for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++)
+               if (thermal->cdevs[i])
+                       thermal_cooling_device_unregister(thermal->cdevs[i]);
+err_free_thermal:
+       devm_kfree(dev, thermal);
+       return err;
+}
+
+void mlxsw_thermal_fini(struct mlxsw_thermal *thermal)
+{
+       int i;
+
+       if (thermal->tzdev) {
+               thermal_zone_device_unregister(thermal->tzdev);
+               thermal->tzdev = NULL;
+       }
+
+       for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++) {
+               if (thermal->cdevs[i]) {
+                       thermal_cooling_device_unregister(thermal->cdevs[i]);
+                       thermal->cdevs[i] = NULL;
+               }
+       }
+
+       devm_kfree(thermal->bus_info->dev, thermal);
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/i2c.c b/drivers/net/ethernet/mellanox/mlxsw/i2c.c
new file mode 100644 (file)
index 0000000..e50c8db
--- /dev/null
@@ -0,0 +1,582 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/i2c.c
+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016 Vadim Pasternak <vadimp@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/init.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/mutex.h>
+#include <linux/module.h>
+#include <linux/mod_devicetable.h>
+#include <linux/slab.h>
+
+#include "cmd.h"
+#include "core.h"
+#include "i2c.h"
+
+static const char mlxsw_i2c_driver_name[] = "mlxsw_i2c";
+
+#define MLXSW_I2C_CIR2_BASE            0x72000
+#define MLXSW_I2C_CIR_STATUS_OFF       0x18
+#define MLXSW_I2C_CIR2_OFF_STATUS      (MLXSW_I2C_CIR2_BASE + \
+                                        MLXSW_I2C_CIR_STATUS_OFF)
+#define MLXSW_I2C_OPMOD_SHIFT          12
+#define MLXSW_I2C_GO_BIT_SHIFT         23
+#define MLXSW_I2C_CIR_CTRL_STATUS_SHIFT        24
+#define MLXSW_I2C_GO_BIT               BIT(MLXSW_I2C_GO_BIT_SHIFT)
+#define MLXSW_I2C_GO_OPMODE            BIT(MLXSW_I2C_OPMOD_SHIFT)
+#define MLXSW_I2C_SET_IMM_CMD          (MLXSW_I2C_GO_OPMODE | \
+                                        MLXSW_CMD_OPCODE_QUERY_FW)
+#define MLXSW_I2C_PUSH_IMM_CMD         (MLXSW_I2C_GO_BIT | \
+                                        MLXSW_I2C_SET_IMM_CMD)
+#define MLXSW_I2C_SET_CMD              (MLXSW_CMD_OPCODE_ACCESS_REG)
+#define MLXSW_I2C_PUSH_CMD             (MLXSW_I2C_GO_BIT | MLXSW_I2C_SET_CMD)
+#define MLXSW_I2C_TLV_HDR_SIZE         0x10
+#define MLXSW_I2C_ADDR_WIDTH           4
+#define MLXSW_I2C_PUSH_CMD_SIZE                (MLXSW_I2C_ADDR_WIDTH + 4)
+#define MLXSW_I2C_READ_SEMA_SIZE       4
+#define MLXSW_I2C_PREP_SIZE            (MLXSW_I2C_ADDR_WIDTH + 28)
+#define MLXSW_I2C_MBOX_SIZE            20
+#define MLXSW_I2C_MBOX_OUT_PARAM_OFF   12
+#define MLXSW_I2C_MAX_BUFF_SIZE                32
+#define MLXSW_I2C_MBOX_OFFSET_BITS     20
+#define MLXSW_I2C_MBOX_SIZE_BITS       12
+#define MLXSW_I2C_ADDR_BUF_SIZE                4
+#define MLXSW_I2C_BLK_MAX              32
+#define MLXSW_I2C_RETRY                        5
+#define MLXSW_I2C_TIMEOUT_MSECS                5000
+
+/**
+ * struct mlxsw_i2c - device private data:
+ * @cmd.mb_size_in: input mailbox size;
+ * @cmd.mb_off_in: input mailbox offset in register space;
+ * @cmd.mb_size_out: output mailbox size;
+ * @cmd.mb_off_out: output mailbox offset in register space;
+ * @cmd.lock: command execution lock;
+ * @dev: I2C device;
+ * @core: switch core pointer;
+ * @bus_info: bus info block;
+ */
+struct mlxsw_i2c {
+       struct {
+               u32 mb_size_in;
+               u32 mb_off_in;
+               u32 mb_size_out;
+               u32 mb_off_out;
+               struct mutex lock;
+       } cmd;
+       struct device *dev;
+       struct mlxsw_core *core;
+       struct mlxsw_bus_info bus_info;
+};
+
+#define MLXSW_I2C_READ_MSG(_client, _addr_buf, _buf, _len) {   \
+       { .addr = (_client)->addr,                              \
+         .buf = (_addr_buf),                                   \
+         .len = MLXSW_I2C_ADDR_BUF_SIZE,                       \
+         .flags = 0 },                                         \
+       { .addr = (_client)->addr,                              \
+         .buf = (_buf),                                        \
+         .len = (_len),                                        \
+         .flags = I2C_M_RD } }
+
+#define MLXSW_I2C_WRITE_MSG(_client, _buf, _len)               \
+       { .addr = (_client)->addr,                              \
+         .buf = (u8 *)(_buf),                                  \
+         .len = (_len),                                        \
+         .flags = 0 }
+
+/* Routine converts in and out mail boxes offset and size. */
+static inline void
+mlxsw_i2c_convert_mbox(struct mlxsw_i2c *mlxsw_i2c, u8 *buf)
+{
+       u32 tmp;
+
+       /* Local in/out mailboxes: 20 bits for offset, 12 for size */
+       tmp = be32_to_cpup((__be32 *) buf);
+       mlxsw_i2c->cmd.mb_off_in = tmp &
+                                  GENMASK(MLXSW_I2C_MBOX_OFFSET_BITS - 1, 0);
+       mlxsw_i2c->cmd.mb_size_in = (tmp & GENMASK(31,
+                                       MLXSW_I2C_MBOX_OFFSET_BITS)) >>
+                                       MLXSW_I2C_MBOX_OFFSET_BITS;
+
+       tmp = be32_to_cpup((__be32 *) (buf + MLXSW_I2C_ADDR_WIDTH));
+       mlxsw_i2c->cmd.mb_off_out = tmp &
+                                   GENMASK(MLXSW_I2C_MBOX_OFFSET_BITS - 1, 0);
+       mlxsw_i2c->cmd.mb_size_out = (tmp & GENMASK(31,
+                                       MLXSW_I2C_MBOX_OFFSET_BITS)) >>
+                                       MLXSW_I2C_MBOX_OFFSET_BITS;
+}
+
+/* Routine obtains register size from mail box buffer. */
+static inline int mlxsw_i2c_get_reg_size(u8 *in_mbox)
+{
+       u16  tmp = be16_to_cpup((__be16 *) (in_mbox + MLXSW_I2C_TLV_HDR_SIZE));
+
+       return (tmp & 0x7ff) * 4 + MLXSW_I2C_TLV_HDR_SIZE;
+}
+
+/* Routine sets I2C device internal offset in the transaction buffer. */
+static inline void mlxsw_i2c_set_slave_addr(u8 *buf, u32 off)
+{
+       __be32 *val = (__be32 *) buf;
+
+       *val = htonl(off);
+}
+
+/* Routine waits until go bit is cleared. */
+static int mlxsw_i2c_wait_go_bit(struct i2c_client *client,
+                                struct mlxsw_i2c *mlxsw_i2c, u8 *p_status)
+{
+       u8 addr_buf[MLXSW_I2C_ADDR_BUF_SIZE];
+       u8 buf[MLXSW_I2C_READ_SEMA_SIZE];
+       int len = MLXSW_I2C_READ_SEMA_SIZE;
+       struct i2c_msg read_sema[] =
+               MLXSW_I2C_READ_MSG(client, addr_buf, buf, len);
+       bool wait_done = false;
+       unsigned long end;
+       int i = 0, err;
+
+       mlxsw_i2c_set_slave_addr(addr_buf, MLXSW_I2C_CIR2_OFF_STATUS);
+
+       end = jiffies + msecs_to_jiffies(MLXSW_I2C_TIMEOUT_MSECS);
+       do {
+               u32 ctrl;
+
+               err = i2c_transfer(client->adapter, read_sema,
+                                  ARRAY_SIZE(read_sema));
+
+               ctrl = be32_to_cpu(*(__be32 *) buf);
+               if (err == ARRAY_SIZE(read_sema)) {
+                       if (!(ctrl & MLXSW_I2C_GO_BIT)) {
+                               wait_done = true;
+                               *p_status = ctrl >>
+                                           MLXSW_I2C_CIR_CTRL_STATUS_SHIFT;
+                               break;
+                       }
+               }
+               cond_resched();
+       } while ((time_before(jiffies, end)) || (i++ < MLXSW_I2C_RETRY));
+
+       if (wait_done) {
+               if (*p_status)
+                       err = -EIO;
+       } else {
+               return -ETIMEDOUT;
+       }
+
+       return err > 0 ? 0 : err;
+}
+
+/* Routine posts a command to ASIC though mail box. */
+static int mlxsw_i2c_write_cmd(struct i2c_client *client,
+                              struct mlxsw_i2c *mlxsw_i2c,
+                              int immediate)
+{
+       __be32 push_cmd_buf[MLXSW_I2C_PUSH_CMD_SIZE / 4] = {
+               0, cpu_to_be32(MLXSW_I2C_PUSH_IMM_CMD)
+       };
+       __be32 prep_cmd_buf[MLXSW_I2C_PREP_SIZE / 4] = {
+               0, 0, 0, 0, 0, 0,
+               cpu_to_be32(client->adapter->nr & 0xffff),
+               cpu_to_be32(MLXSW_I2C_SET_IMM_CMD)
+       };
+       struct i2c_msg push_cmd =
+               MLXSW_I2C_WRITE_MSG(client, push_cmd_buf,
+                                   MLXSW_I2C_PUSH_CMD_SIZE);
+       struct i2c_msg prep_cmd =
+               MLXSW_I2C_WRITE_MSG(client, prep_cmd_buf, MLXSW_I2C_PREP_SIZE);
+       int err;
+
+       if (!immediate) {
+               push_cmd_buf[1] = cpu_to_be32(MLXSW_I2C_PUSH_CMD);
+               prep_cmd_buf[7] = cpu_to_be32(MLXSW_I2C_SET_CMD);
+       }
+       mlxsw_i2c_set_slave_addr((u8 *)prep_cmd_buf,
+                                MLXSW_I2C_CIR2_BASE);
+       mlxsw_i2c_set_slave_addr((u8 *)push_cmd_buf,
+                                MLXSW_I2C_CIR2_OFF_STATUS);
+
+       /* Prepare Command Interface Register for transaction */
+       err = i2c_transfer(client->adapter, &prep_cmd, 1);
+       if (err < 0)
+               return err;
+       else if (err != 1)
+               return -EIO;
+
+       /* Write out Command Interface Register GO bit to push transaction */
+       err = i2c_transfer(client->adapter, &push_cmd, 1);
+       if (err < 0)
+               return err;
+       else if (err != 1)
+               return -EIO;
+
+       return 0;
+}
+
+/* Routine obtains mail box offsets from ASIC register space. */
+static int mlxsw_i2c_get_mbox(struct i2c_client *client,
+                             struct mlxsw_i2c *mlxsw_i2c)
+{
+       u8 addr_buf[MLXSW_I2C_ADDR_BUF_SIZE];
+       u8 buf[MLXSW_I2C_MBOX_SIZE];
+       struct i2c_msg mbox_cmd[] =
+               MLXSW_I2C_READ_MSG(client, addr_buf, buf, MLXSW_I2C_MBOX_SIZE);
+       int err;
+
+       /* Read mail boxes offsets. */
+       mlxsw_i2c_set_slave_addr(addr_buf, MLXSW_I2C_CIR2_BASE);
+       err = i2c_transfer(client->adapter, mbox_cmd, 2);
+       if (err != 2) {
+               dev_err(&client->dev, "Could not obtain mail boxes\n");
+               if (!err)
+                       return -EIO;
+               else
+                       return err;
+       }
+
+       /* Convert mail boxes. */
+       mlxsw_i2c_convert_mbox(mlxsw_i2c, &buf[MLXSW_I2C_MBOX_OUT_PARAM_OFF]);
+
+       return err;
+}
+
+/* Routine sends I2C write transaction to ASIC device. */
+static int
+mlxsw_i2c_write(struct device *dev, size_t in_mbox_size, u8 *in_mbox, int num,
+               u8 *p_status)
+{
+       struct i2c_client *client = to_i2c_client(dev);
+       struct mlxsw_i2c *mlxsw_i2c = i2c_get_clientdata(client);
+       unsigned long timeout = msecs_to_jiffies(MLXSW_I2C_TIMEOUT_MSECS);
+       u8 tran_buf[MLXSW_I2C_MAX_BUFF_SIZE + MLXSW_I2C_ADDR_BUF_SIZE];
+       int off = mlxsw_i2c->cmd.mb_off_in, chunk_size, i, j;
+       unsigned long end;
+       struct i2c_msg write_tran =
+               MLXSW_I2C_WRITE_MSG(client, tran_buf, MLXSW_I2C_PUSH_CMD_SIZE);
+       int err;
+
+       for (i = 0; i < num; i++) {
+               chunk_size = (in_mbox_size > MLXSW_I2C_BLK_MAX) ?
+                            MLXSW_I2C_BLK_MAX : in_mbox_size;
+               write_tran.len = MLXSW_I2C_ADDR_WIDTH + chunk_size;
+               mlxsw_i2c_set_slave_addr(tran_buf, off);
+               memcpy(&tran_buf[MLXSW_I2C_ADDR_BUF_SIZE], in_mbox +
+                      chunk_size * i, chunk_size);
+
+               j = 0;
+               end = jiffies + timeout;
+               do {
+                       err = i2c_transfer(client->adapter, &write_tran, 1);
+                       if (err == 1)
+                               break;
+
+                       cond_resched();
+               } while ((time_before(jiffies, end)) ||
+                        (j++ < MLXSW_I2C_RETRY));
+
+               if (err != 1) {
+                       if (!err)
+                               err = -EIO;
+                       return err;
+               }
+
+               off += chunk_size;
+               in_mbox_size -= chunk_size;
+       }
+
+       /* Prepare and write out Command Interface Register for transaction. */
+       err = mlxsw_i2c_write_cmd(client, mlxsw_i2c, 0);
+       if (err) {
+               dev_err(&client->dev, "Could not start transaction");
+               return -EIO;
+       }
+
+       /* Wait until go bit is cleared. */
+       err = mlxsw_i2c_wait_go_bit(client, mlxsw_i2c, p_status);
+       if (err) {
+               dev_err(&client->dev, "HW semaphore is not released");
+               return err;
+       }
+
+       /* Validate transaction completion status. */
+       if (*p_status) {
+               dev_err(&client->dev, "Bad transaction completion status %x\n",
+                       *p_status);
+               return -EIO;
+       }
+
+       return err > 0 ? 0 : err;
+}
+
+/* Routine executes I2C command. */
+static int
+mlxsw_i2c_cmd(struct device *dev, size_t in_mbox_size, u8 *in_mbox,
+             size_t out_mbox_size, u8 *out_mbox, u8 *status)
+{
+       struct i2c_client *client = to_i2c_client(dev);
+       struct mlxsw_i2c *mlxsw_i2c = i2c_get_clientdata(client);
+       unsigned long timeout = msecs_to_jiffies(MLXSW_I2C_TIMEOUT_MSECS);
+       u8 tran_buf[MLXSW_I2C_ADDR_BUF_SIZE];
+       int num, chunk_size, reg_size, i, j;
+       int off = mlxsw_i2c->cmd.mb_off_out;
+       unsigned long end;
+       struct i2c_msg read_tran[] =
+               MLXSW_I2C_READ_MSG(client, tran_buf, NULL, 0);
+       int err;
+
+       WARN_ON(in_mbox_size % sizeof(u32) || out_mbox_size % sizeof(u32));
+
+       reg_size = mlxsw_i2c_get_reg_size(in_mbox);
+       num = reg_size / MLXSW_I2C_BLK_MAX;
+       if (reg_size % MLXSW_I2C_BLK_MAX)
+               num++;
+
+       if (mutex_lock_interruptible(&mlxsw_i2c->cmd.lock) < 0) {
+               dev_err(&client->dev, "Could not acquire lock");
+               return -EINVAL;
+       }
+
+       err = mlxsw_i2c_write(dev, reg_size, in_mbox, num, status);
+       if (err)
+               goto cmd_fail;
+
+       /* No out mailbox is case of write transaction. */
+       if (!out_mbox) {
+               mutex_unlock(&mlxsw_i2c->cmd.lock);
+               return 0;
+       }
+
+       /* Send read transaction to get output mailbox content. */
+       read_tran[1].buf = out_mbox;
+       for (i = 0; i < num; i++) {
+               chunk_size = (reg_size > MLXSW_I2C_BLK_MAX) ?
+                            MLXSW_I2C_BLK_MAX : reg_size;
+               read_tran[1].len = chunk_size;
+               mlxsw_i2c_set_slave_addr(tran_buf, off);
+
+               j = 0;
+               end = jiffies + timeout;
+               do {
+                       err = i2c_transfer(client->adapter, read_tran,
+                                          ARRAY_SIZE(read_tran));
+                       if (err == ARRAY_SIZE(read_tran))
+                               break;
+
+                       cond_resched();
+               } while ((time_before(jiffies, end)) ||
+                        (j++ < MLXSW_I2C_RETRY));
+
+               if (err != ARRAY_SIZE(read_tran)) {
+                       if (!err)
+                               err = -EIO;
+
+                       goto cmd_fail;
+               }
+
+               off += chunk_size;
+               reg_size -= chunk_size;
+               read_tran[1].buf += chunk_size;
+       }
+
+       mutex_unlock(&mlxsw_i2c->cmd.lock);
+
+       return 0;
+
+cmd_fail:
+       mutex_unlock(&mlxsw_i2c->cmd.lock);
+       return err;
+}
+
+static int mlxsw_i2c_cmd_exec(void *bus_priv, u16 opcode, u8 opcode_mod,
+                             u32 in_mod, bool out_mbox_direct,
+                             char *in_mbox, size_t in_mbox_size,
+                             char *out_mbox, size_t out_mbox_size,
+                             u8 *status)
+{
+       struct mlxsw_i2c *mlxsw_i2c = bus_priv;
+
+       return mlxsw_i2c_cmd(mlxsw_i2c->dev, in_mbox_size, in_mbox,
+                            out_mbox_size, out_mbox, status);
+}
+
+static bool mlxsw_i2c_skb_transmit_busy(void *bus_priv,
+                                       const struct mlxsw_tx_info *tx_info)
+{
+       return false;
+}
+
+static int mlxsw_i2c_skb_transmit(void *bus_priv, struct sk_buff *skb,
+                                 const struct mlxsw_tx_info *tx_info)
+{
+       return 0;
+}
+
+static int
+mlxsw_i2c_init(void *bus_priv, struct mlxsw_core *mlxsw_core,
+              const struct mlxsw_config_profile *profile,
+              struct mlxsw_res *resources)
+{
+       struct mlxsw_i2c *mlxsw_i2c = bus_priv;
+
+       mlxsw_i2c->core = mlxsw_core;
+
+       return 0;
+}
+
+static void mlxsw_i2c_fini(void *bus_priv)
+{
+       struct mlxsw_i2c *mlxsw_i2c = bus_priv;
+
+       mlxsw_i2c->core = NULL;
+}
+
+static const struct mlxsw_bus mlxsw_i2c_bus = {
+       .kind                   = "i2c",
+       .init                   = mlxsw_i2c_init,
+       .fini                   = mlxsw_i2c_fini,
+       .skb_transmit_busy      = mlxsw_i2c_skb_transmit_busy,
+       .skb_transmit           = mlxsw_i2c_skb_transmit,
+       .cmd_exec               = mlxsw_i2c_cmd_exec,
+};
+
+static int mlxsw_i2c_probe(struct i2c_client *client,
+                          const struct i2c_device_id *id)
+{
+       struct mlxsw_i2c *mlxsw_i2c;
+       u8 status;
+       int err;
+
+       mlxsw_i2c = devm_kzalloc(&client->dev, sizeof(*mlxsw_i2c), GFP_KERNEL);
+       if (!mlxsw_i2c)
+               return -ENOMEM;
+
+       i2c_set_clientdata(client, mlxsw_i2c);
+       mutex_init(&mlxsw_i2c->cmd.lock);
+
+       /* In order to use mailboxes through the i2c, special area is reserved
+        * on the i2c address space that can be used for input and output
+        * mailboxes. Such mailboxes are called local mailboxes. When using a
+        * local mailbox, software should specify 0 as the Input/Output
+        * parameters. The location of the Local Mailbox addresses on the i2c
+        * space can be retrieved through the QUERY_FW command.
+        * For this purpose QUERY_FW is to be issued with opcode modifier equal
+        * 0x01. For such command the output parameter is an immediate value.
+        * Here QUERY_FW command is invoked for ASIC probing and for getting
+        * local mailboxes addresses from immedate output parameters.
+        */
+
+       /* Prepare and write out Command Interface Register for transaction */
+       err = mlxsw_i2c_write_cmd(client, mlxsw_i2c, 1);
+       if (err) {
+               dev_err(&client->dev, "Could not start transaction");
+               goto errout;
+       }
+
+       /* Wait until go bit is cleared. */
+       err = mlxsw_i2c_wait_go_bit(client, mlxsw_i2c, &status);
+       if (err) {
+               dev_err(&client->dev, "HW semaphore is not released");
+               goto errout;
+       }
+
+       /* Validate transaction completion status. */
+       if (status) {
+               dev_err(&client->dev, "Bad transaction completion status %x\n",
+                       status);
+               err = -EIO;
+               goto errout;
+       }
+
+       /* Get mailbox offsets. */
+       err = mlxsw_i2c_get_mbox(client, mlxsw_i2c);
+       if (err < 0) {
+               dev_err(&client->dev, "Fail to get mailboxes\n");
+               goto errout;
+       }
+
+       dev_info(&client->dev, "%s mb size=%x off=0x%08x out mb size=%x off=0x%08x\n",
+                id->name, mlxsw_i2c->cmd.mb_size_in,
+                mlxsw_i2c->cmd.mb_off_in, mlxsw_i2c->cmd.mb_size_out,
+                mlxsw_i2c->cmd.mb_off_out);
+
+       /* Register device bus. */
+       mlxsw_i2c->bus_info.device_kind = id->name;
+       mlxsw_i2c->bus_info.device_name = client->name;
+       mlxsw_i2c->bus_info.dev = &client->dev;
+       mlxsw_i2c->dev = &client->dev;
+
+       err = mlxsw_core_bus_device_register(&mlxsw_i2c->bus_info,
+                                            &mlxsw_i2c_bus, mlxsw_i2c);
+       if (err) {
+               dev_err(&client->dev, "Fail to register core bus\n");
+               return err;
+       }
+
+       return 0;
+
+errout:
+       i2c_set_clientdata(client, NULL);
+
+       return err;
+}
+
+static int mlxsw_i2c_remove(struct i2c_client *client)
+{
+       struct mlxsw_i2c *mlxsw_i2c = i2c_get_clientdata(client);
+
+       mlxsw_core_bus_device_unregister(mlxsw_i2c->core);
+       mutex_destroy(&mlxsw_i2c->cmd.lock);
+
+       return 0;
+}
+
+int mlxsw_i2c_driver_register(struct i2c_driver *i2c_driver)
+{
+       i2c_driver->probe = mlxsw_i2c_probe;
+       i2c_driver->remove = mlxsw_i2c_remove;
+       return i2c_add_driver(i2c_driver);
+}
+EXPORT_SYMBOL(mlxsw_i2c_driver_register);
+
+void mlxsw_i2c_driver_unregister(struct i2c_driver *i2c_driver)
+{
+       i2c_del_driver(i2c_driver);
+}
+EXPORT_SYMBOL(mlxsw_i2c_driver_unregister);
+
+MODULE_AUTHOR("Vadim Pasternak <vadimp@mellanox.com>");
+MODULE_DESCRIPTION("Mellanox switch I2C interface driver");
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/ethernet/mellanox/mlxsw/i2c.h b/drivers/net/ethernet/mellanox/mlxsw/i2c.h
new file mode 100644 (file)
index 0000000..daa24b2
--- /dev/null
@@ -0,0 +1,60 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/i2c.h
+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016 Vadim Pasternak <vadimp@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _MLXSW_I2C_H
+#define _MLXSW_I2C_H
+
+#include <linux/i2c.h>
+
+#if IS_ENABLED(CONFIG_MLXSW_I2C)
+
+int mlxsw_i2c_driver_register(struct i2c_driver *i2c_driver);
+void mlxsw_i2c_driver_unregister(struct i2c_driver *i2c_driver);
+
+#else
+
+static inline int
+mlxsw_i2c_driver_register(struct i2c_driver *i2c_driver)
+{
+       return -ENODEV;
+}
+
+static inline void
+mlxsw_i2c_driver_unregister(struct i2c_driver *i2c_driver)
+{
+}
+
+#endif
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/ib.h b/drivers/net/ethernet/mellanox/mlxsw/ib.h
new file mode 100644 (file)
index 0000000..ce313aa
--- /dev/null
@@ -0,0 +1,39 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/ib.h
+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016 Elad Raz <eladr@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef _MLXSW_IB_H
+#define _MLXSW_IB_H
+
+#define MLXSW_IB_DEFAULT_MTU 4096
+
+#endif /* _MLXSW_IB_H */
diff --git a/drivers/net/ethernet/mellanox/mlxsw/minimal.c b/drivers/net/ethernet/mellanox/mlxsw/minimal.c
new file mode 100644 (file)
index 0000000..3dd1626
--- /dev/null
@@ -0,0 +1,97 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/minimal.c
+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016 Vadim Pasternak <vadimp@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/i2c.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mod_devicetable.h>
+#include <linux/types.h>
+
+#include "core.h"
+#include "i2c.h"
+
+static const char mlxsw_minimal_driver_name[] = "mlxsw_minimal";
+
+static const struct mlxsw_config_profile mlxsw_minimal_config_profile;
+
+static struct mlxsw_driver mlxsw_minimal_driver = {
+       .kind           = mlxsw_minimal_driver_name,
+       .priv_size      = 1,
+       .profile        = &mlxsw_minimal_config_profile,
+};
+
+static const struct i2c_device_id mlxsw_minimal_i2c_id[] = {
+       { "mlxsw_minimal", 0},
+       { },
+};
+
+static struct i2c_driver mlxsw_minimal_i2c_driver = {
+       .driver.name = "mlxsw_minimal",
+       .class = I2C_CLASS_HWMON,
+       .id_table = mlxsw_minimal_i2c_id,
+};
+
+static int __init mlxsw_minimal_module_init(void)
+{
+       int err;
+
+       err = mlxsw_core_driver_register(&mlxsw_minimal_driver);
+       if (err)
+               return err;
+
+       err = mlxsw_i2c_driver_register(&mlxsw_minimal_i2c_driver);
+       if (err)
+               goto err_i2c_driver_register;
+
+       return 0;
+
+err_i2c_driver_register:
+       mlxsw_core_driver_unregister(&mlxsw_minimal_driver);
+
+       return err;
+}
+
+static void __exit mlxsw_minimal_module_exit(void)
+{
+       mlxsw_i2c_driver_unregister(&mlxsw_minimal_i2c_driver);
+       mlxsw_core_driver_unregister(&mlxsw_minimal_driver);
+}
+
+module_init(mlxsw_minimal_module_init);
+module_exit(mlxsw_minimal_module_exit);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Vadim Pasternak <vadimp@mellanox.com>");
+MODULE_DESCRIPTION("Mellanox minimal driver");
+MODULE_DEVICE_TABLE(i2c, mlxsw_minimal_i2c_id);
index c88f2ab90a3923c7570b8c8eb7b6a49f4b9fadc2..a223c85dfde064eee873eb6ffd6aae818a4f46ba 100644 (file)
@@ -48,6 +48,7 @@
 #include <linux/seq_file.h>
 #include <linux/string.h>
 
+#include "pci_hw.h"
 #include "pci.h"
 #include "core.h"
 #include "cmd.h"
 
 static const char mlxsw_pci_driver_name[] = "mlxsw_pci";
 
-static const struct pci_device_id mlxsw_pci_id_table[] = {
-       {PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_SWITCHX2), 0},
-       {PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_SPECTRUM), 0},
-       {0, }
-};
-
 static struct dentry *mlxsw_pci_dbg_root;
 
-static const char *mlxsw_pci_device_kind_get(const struct pci_device_id *id)
-{
-       switch (id->device) {
-       case PCI_DEVICE_ID_MELLANOX_SWITCHX2:
-               return MLXSW_DEVICE_KIND_SWITCHX2;
-       case PCI_DEVICE_ID_MELLANOX_SPECTRUM:
-               return MLXSW_DEVICE_KIND_SPECTRUM;
-       default:
-               BUG();
-       }
-}
-
 #define mlxsw_pci_write32(mlxsw_pci, reg, val) \
        iowrite32be(val, (mlxsw_pci)->hw_addr + (MLXSW_PCI_ ## reg))
 #define mlxsw_pci_read32(mlxsw_pci, reg) \
@@ -1553,7 +1536,7 @@ static int mlxsw_pci_init(void *bus_priv, struct mlxsw_core *mlxsw_core,
 
        err = request_irq(mlxsw_pci->msix_entry.vector,
                          mlxsw_pci_eq_irq_handler, 0,
-                         mlxsw_pci_driver_name, mlxsw_pci);
+                         mlxsw_pci->bus_info.device_kind, mlxsw_pci);
        if (err) {
                dev_err(&pdev->dev, "IRQ request failed\n");
                goto err_request_eq_irq;
@@ -1772,13 +1755,20 @@ static const struct mlxsw_bus mlxsw_pci_bus = {
        .skb_transmit_busy      = mlxsw_pci_skb_transmit_busy,
        .skb_transmit           = mlxsw_pci_skb_transmit,
        .cmd_exec               = mlxsw_pci_cmd_exec,
+       .features               = MLXSW_BUS_F_TXRX,
 };
 
-static int mlxsw_pci_sw_reset(struct mlxsw_pci *mlxsw_pci)
+static int mlxsw_pci_sw_reset(struct mlxsw_pci *mlxsw_pci,
+                             const struct pci_device_id *id)
 {
        unsigned long end;
 
        mlxsw_pci_write32(mlxsw_pci, SW_RESET, MLXSW_PCI_SW_RESET_RST_BIT);
+       if (id->device == PCI_DEVICE_ID_MELLANOX_SWITCHX2) {
+               msleep(MLXSW_PCI_SW_RESET_TIMEOUT_MSECS);
+               return 0;
+       }
+
        wmb(); /* reset needs to be written before we read control register */
        end = jiffies + msecs_to_jiffies(MLXSW_PCI_SW_RESET_TIMEOUT_MSECS);
        do {
@@ -1793,6 +1783,7 @@ static int mlxsw_pci_sw_reset(struct mlxsw_pci *mlxsw_pci)
 
 static int mlxsw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
+       const char *driver_name = pdev->driver->name;
        struct mlxsw_pci *mlxsw_pci;
        int err;
 
@@ -1806,7 +1797,7 @@ static int mlxsw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
                goto err_pci_enable_device;
        }
 
-       err = pci_request_regions(pdev, mlxsw_pci_driver_name);
+       err = pci_request_regions(pdev, driver_name);
        if (err) {
                dev_err(&pdev->dev, "pci_request_regions failed\n");
                goto err_pci_request_regions;
@@ -1845,7 +1836,7 @@ static int mlxsw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
        mlxsw_pci->pdev = pdev;
        pci_set_drvdata(pdev, mlxsw_pci);
 
-       err = mlxsw_pci_sw_reset(mlxsw_pci);
+       err = mlxsw_pci_sw_reset(mlxsw_pci, id);
        if (err) {
                dev_err(&pdev->dev, "Software reset failed\n");
                goto err_sw_reset;
@@ -1857,7 +1848,7 @@ static int mlxsw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
                goto err_msix_init;
        }
 
-       mlxsw_pci->bus_info.device_kind = mlxsw_pci_device_kind_get(id);
+       mlxsw_pci->bus_info.device_kind = driver_name;
        mlxsw_pci->bus_info.device_name = pci_name(mlxsw_pci->pdev);
        mlxsw_pci->bus_info.dev = &pdev->dev;
 
@@ -1909,33 +1900,30 @@ static void mlxsw_pci_remove(struct pci_dev *pdev)
        kfree(mlxsw_pci);
 }
 
-static struct pci_driver mlxsw_pci_driver = {
-       .name           = mlxsw_pci_driver_name,
-       .id_table       = mlxsw_pci_id_table,
-       .probe          = mlxsw_pci_probe,
-       .remove         = mlxsw_pci_remove,
-};
+int mlxsw_pci_driver_register(struct pci_driver *pci_driver)
+{
+       pci_driver->probe = mlxsw_pci_probe;
+       pci_driver->remove = mlxsw_pci_remove;
+       return pci_register_driver(pci_driver);
+}
+EXPORT_SYMBOL(mlxsw_pci_driver_register);
 
-static int __init mlxsw_pci_module_init(void)
+void mlxsw_pci_driver_unregister(struct pci_driver *pci_driver)
 {
-       int err;
+       pci_unregister_driver(pci_driver);
+}
+EXPORT_SYMBOL(mlxsw_pci_driver_unregister);
 
+static int __init mlxsw_pci_module_init(void)
+{
        mlxsw_pci_dbg_root = debugfs_create_dir(mlxsw_pci_driver_name, NULL);
        if (!mlxsw_pci_dbg_root)
                return -ENOMEM;
-       err = pci_register_driver(&mlxsw_pci_driver);
-       if (err)
-               goto err_register_driver;
        return 0;
-
-err_register_driver:
-       debugfs_remove_recursive(mlxsw_pci_dbg_root);
-       return err;
 }
 
 static void __exit mlxsw_pci_module_exit(void)
 {
-       pci_unregister_driver(&mlxsw_pci_driver);
        debugfs_remove_recursive(mlxsw_pci_dbg_root);
 }
 
@@ -1945,4 +1933,3 @@ module_exit(mlxsw_pci_module_exit);
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_AUTHOR("Jiri Pirko <jiri@mellanox.com>");
 MODULE_DESCRIPTION("Mellanox switch PCI interface driver");
-MODULE_DEVICE_TABLE(pci, mlxsw_pci_id_table);
index d942a3e6fa4151cbc10f4207b25c95ea8aa734c6..d65582325cd54e3334133f11fea13d500a8d711b 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * drivers/net/ethernet/mellanox/mlxsw/pci.h
- * Copyright (c) 2015 Mellanox Technologies. All rights reserved.
- * Copyright (c) 2015 Jiri Pirko <jiri@mellanox.com>
+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
 #ifndef _MLXSW_PCI_H
 #define _MLXSW_PCI_H
 
-#include <linux/bitops.h>
+#include <linux/pci.h>
 
-#include "item.h"
+#define PCI_DEVICE_ID_MELLANOX_SWITCHX2                0xc738
+#define PCI_DEVICE_ID_MELLANOX_SPECTRUM                0xcb84
+#define PCI_DEVICE_ID_MELLANOX_SWITCHIB                0xcb20
+#define PCI_DEVICE_ID_MELLANOX_SWITCHIB2       0xcf08
 
-#define PCI_DEVICE_ID_MELLANOX_SWITCHX2        0xc738
-#define PCI_DEVICE_ID_MELLANOX_SPECTRUM        0xcb84
-#define MLXSW_PCI_BAR0_SIZE            (1024 * 1024) /* 1MB */
-#define MLXSW_PCI_PAGE_SIZE            4096
+#if IS_ENABLED(CONFIG_MLXSW_PCI)
 
-#define MLXSW_PCI_CIR_BASE                     0x71000
-#define MLXSW_PCI_CIR_IN_PARAM_HI              MLXSW_PCI_CIR_BASE
-#define MLXSW_PCI_CIR_IN_PARAM_LO              (MLXSW_PCI_CIR_BASE + 0x04)
-#define MLXSW_PCI_CIR_IN_MODIFIER              (MLXSW_PCI_CIR_BASE + 0x08)
-#define MLXSW_PCI_CIR_OUT_PARAM_HI             (MLXSW_PCI_CIR_BASE + 0x0C)
-#define MLXSW_PCI_CIR_OUT_PARAM_LO             (MLXSW_PCI_CIR_BASE + 0x10)
-#define MLXSW_PCI_CIR_TOKEN                    (MLXSW_PCI_CIR_BASE + 0x14)
-#define MLXSW_PCI_CIR_CTRL                     (MLXSW_PCI_CIR_BASE + 0x18)
-#define MLXSW_PCI_CIR_CTRL_GO_BIT              BIT(23)
-#define MLXSW_PCI_CIR_CTRL_EVREQ_BIT           BIT(22)
-#define MLXSW_PCI_CIR_CTRL_OPCODE_MOD_SHIFT    12
-#define MLXSW_PCI_CIR_CTRL_STATUS_SHIFT                24
-#define MLXSW_PCI_CIR_TIMEOUT_MSECS            1000
+int mlxsw_pci_driver_register(struct pci_driver *pci_driver);
+void mlxsw_pci_driver_unregister(struct pci_driver *pci_driver);
 
-#define MLXSW_PCI_SW_RESET                     0xF0010
-#define MLXSW_PCI_SW_RESET_RST_BIT             BIT(0)
-#define MLXSW_PCI_SW_RESET_TIMEOUT_MSECS       5000
-#define MLXSW_PCI_FW_READY                     0xA1844
-#define MLXSW_PCI_FW_READY_MASK                        0xFF
-#define MLXSW_PCI_FW_READY_MAGIC               0x5E
+#else
 
-#define MLXSW_PCI_DOORBELL_SDQ_OFFSET          0x000
-#define MLXSW_PCI_DOORBELL_RDQ_OFFSET          0x200
-#define MLXSW_PCI_DOORBELL_CQ_OFFSET           0x400
-#define MLXSW_PCI_DOORBELL_EQ_OFFSET           0x600
-#define MLXSW_PCI_DOORBELL_ARM_CQ_OFFSET       0x800
-#define MLXSW_PCI_DOORBELL_ARM_EQ_OFFSET       0xA00
+static inline int
+mlxsw_pci_driver_register(struct pci_driver *pci_driver)
+{
+       return 0;
+}
 
-#define MLXSW_PCI_DOORBELL(offset, type_offset, num)   \
-       ((offset) + (type_offset) + (num) * 4)
+static inline void
+mlxsw_pci_driver_unregister(struct pci_driver *pci_driver)
+{
+}
 
-#define MLXSW_PCI_CQS_MAX      96
-#define MLXSW_PCI_EQS_COUNT    2
-#define MLXSW_PCI_EQ_ASYNC_NUM 0
-#define MLXSW_PCI_EQ_COMP_NUM  1
-
-#define MLXSW_PCI_AQ_PAGES     8
-#define MLXSW_PCI_AQ_SIZE      (MLXSW_PCI_PAGE_SIZE * MLXSW_PCI_AQ_PAGES)
-#define MLXSW_PCI_WQE_SIZE     32 /* 32 bytes per element */
-#define MLXSW_PCI_CQE_SIZE     16 /* 16 bytes per element */
-#define MLXSW_PCI_EQE_SIZE     16 /* 16 bytes per element */
-#define MLXSW_PCI_WQE_COUNT    (MLXSW_PCI_AQ_SIZE / MLXSW_PCI_WQE_SIZE)
-#define MLXSW_PCI_CQE_COUNT    (MLXSW_PCI_AQ_SIZE / MLXSW_PCI_CQE_SIZE)
-#define MLXSW_PCI_EQE_COUNT    (MLXSW_PCI_AQ_SIZE / MLXSW_PCI_EQE_SIZE)
-#define MLXSW_PCI_EQE_UPDATE_COUNT     0x80
-
-#define MLXSW_PCI_WQE_SG_ENTRIES       3
-#define MLXSW_PCI_WQE_TYPE_ETHERNET    0xA
-
-/* pci_wqe_c
- * If set it indicates that a completion should be reported upon
- * execution of this descriptor.
- */
-MLXSW_ITEM32(pci, wqe, c, 0x00, 31, 1);
-
-/* pci_wqe_lp
- * Local Processing, set if packet should be processed by the local
- * switch hardware:
- * For Ethernet EMAD (Direct Route and non Direct Route) -
- * must be set if packet destination is local device
- * For InfiniBand CTL - must be set if packet destination is local device
- * Otherwise it must be clear
- * Local Process packets must not exceed the size of 2K (including payload
- * and headers).
- */
-MLXSW_ITEM32(pci, wqe, lp, 0x00, 30, 1);
-
-/* pci_wqe_type
- * Packet type.
- */
-MLXSW_ITEM32(pci, wqe, type, 0x00, 23, 4);
-
-/* pci_wqe_byte_count
- * Size of i-th scatter/gather entry, 0 if entry is unused.
- */
-MLXSW_ITEM16_INDEXED(pci, wqe, byte_count, 0x02, 0, 14, 0x02, 0x00, false);
-
-/* pci_wqe_address
- * Physical address of i-th scatter/gather entry.
- * Gather Entries must be 2Byte aligned.
- */
-MLXSW_ITEM64_INDEXED(pci, wqe, address, 0x08, 0, 64, 0x8, 0x0, false);
-
-/* pci_cqe_lag
- * Packet arrives from a port which is a LAG
- */
-MLXSW_ITEM32(pci, cqe, lag, 0x00, 23, 1);
-
-/* pci_cqe_system_port/lag_id
- * When lag=0: System port on which the packet was received
- * When lag=1:
- * bits [15:4] LAG ID on which the packet was received
- * bits [3:0] sub_port on which the packet was received
- */
-MLXSW_ITEM32(pci, cqe, system_port, 0x00, 0, 16);
-MLXSW_ITEM32(pci, cqe, lag_id, 0x00, 4, 12);
-MLXSW_ITEM32(pci, cqe, lag_port_index, 0x00, 0, 4);
-
-/* pci_cqe_wqe_counter
- * WQE count of the WQEs completed on the associated dqn
- */
-MLXSW_ITEM32(pci, cqe, wqe_counter, 0x04, 16, 16);
-
-/* pci_cqe_byte_count
- * Byte count of received packets including additional two
- * Reserved Bytes that are append to the end of the frame.
- * Reserved for Send CQE.
- */
-MLXSW_ITEM32(pci, cqe, byte_count, 0x04, 0, 14);
-
-/* pci_cqe_trap_id
- * Trap ID that captured the packet.
- */
-MLXSW_ITEM32(pci, cqe, trap_id, 0x08, 0, 8);
-
-/* pci_cqe_crc
- * Length include CRC. Indicates the length field includes
- * the packet's CRC.
- */
-MLXSW_ITEM32(pci, cqe, crc, 0x0C, 8, 1);
-
-/* pci_cqe_e
- * CQE with Error.
- */
-MLXSW_ITEM32(pci, cqe, e, 0x0C, 7, 1);
-
-/* pci_cqe_sr
- * 1 - Send Queue
- * 0 - Receive Queue
- */
-MLXSW_ITEM32(pci, cqe, sr, 0x0C, 6, 1);
-
-/* pci_cqe_dqn
- * Descriptor Queue (DQ) Number.
- */
-MLXSW_ITEM32(pci, cqe, dqn, 0x0C, 1, 5);
-
-/* pci_cqe_owner
- * Ownership bit.
- */
-MLXSW_ITEM32(pci, cqe, owner, 0x0C, 0, 1);
-
-/* pci_eqe_event_type
- * Event type.
- */
-MLXSW_ITEM32(pci, eqe, event_type, 0x0C, 24, 8);
-#define MLXSW_PCI_EQE_EVENT_TYPE_COMP  0x00
-#define MLXSW_PCI_EQE_EVENT_TYPE_CMD   0x0A
-
-/* pci_eqe_event_sub_type
- * Event type.
- */
-MLXSW_ITEM32(pci, eqe, event_sub_type, 0x0C, 16, 8);
-
-/* pci_eqe_cqn
- * Completion Queue that triggeret this EQE.
- */
-MLXSW_ITEM32(pci, eqe, cqn, 0x0C, 8, 7);
-
-/* pci_eqe_owner
- * Ownership bit.
- */
-MLXSW_ITEM32(pci, eqe, owner, 0x0C, 0, 1);
-
-/* pci_eqe_cmd_token
- * Command completion event - token
- */
-MLXSW_ITEM32(pci, eqe, cmd_token, 0x08, 16, 16);
-
-/* pci_eqe_cmd_status
- * Command completion event - status
- */
-MLXSW_ITEM32(pci, eqe, cmd_status, 0x08, 0, 8);
-
-/* pci_eqe_cmd_out_param_h
- * Command completion event - output parameter - higher part
- */
-MLXSW_ITEM32(pci, eqe, cmd_out_param_h, 0x0C, 0, 32);
-
-/* pci_eqe_cmd_out_param_l
- * Command completion event - output parameter - lower part
- */
-MLXSW_ITEM32(pci, eqe, cmd_out_param_l, 0x10, 0, 32);
+#endif
 
 #endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
new file mode 100644 (file)
index 0000000..d147ddd
--- /dev/null
@@ -0,0 +1,229 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
+ * Copyright (c) 2015-2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2015-2016 Jiri Pirko <jiri@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _MLXSW_PCI_HW_H
+#define _MLXSW_PCI_HW_H
+
+#include <linux/bitops.h>
+
+#include "item.h"
+
+#define MLXSW_PCI_BAR0_SIZE            (1024 * 1024) /* 1MB */
+#define MLXSW_PCI_PAGE_SIZE            4096
+
+#define MLXSW_PCI_CIR_BASE                     0x71000
+#define MLXSW_PCI_CIR_IN_PARAM_HI              MLXSW_PCI_CIR_BASE
+#define MLXSW_PCI_CIR_IN_PARAM_LO              (MLXSW_PCI_CIR_BASE + 0x04)
+#define MLXSW_PCI_CIR_IN_MODIFIER              (MLXSW_PCI_CIR_BASE + 0x08)
+#define MLXSW_PCI_CIR_OUT_PARAM_HI             (MLXSW_PCI_CIR_BASE + 0x0C)
+#define MLXSW_PCI_CIR_OUT_PARAM_LO             (MLXSW_PCI_CIR_BASE + 0x10)
+#define MLXSW_PCI_CIR_TOKEN                    (MLXSW_PCI_CIR_BASE + 0x14)
+#define MLXSW_PCI_CIR_CTRL                     (MLXSW_PCI_CIR_BASE + 0x18)
+#define MLXSW_PCI_CIR_CTRL_GO_BIT              BIT(23)
+#define MLXSW_PCI_CIR_CTRL_EVREQ_BIT           BIT(22)
+#define MLXSW_PCI_CIR_CTRL_OPCODE_MOD_SHIFT    12
+#define MLXSW_PCI_CIR_CTRL_STATUS_SHIFT                24
+#define MLXSW_PCI_CIR_TIMEOUT_MSECS            1000
+
+#define MLXSW_PCI_SW_RESET                     0xF0010
+#define MLXSW_PCI_SW_RESET_RST_BIT             BIT(0)
+#define MLXSW_PCI_SW_RESET_TIMEOUT_MSECS       5000
+#define MLXSW_PCI_FW_READY                     0xA1844
+#define MLXSW_PCI_FW_READY_MASK                        0xFFFF
+#define MLXSW_PCI_FW_READY_MAGIC               0x5E
+
+#define MLXSW_PCI_DOORBELL_SDQ_OFFSET          0x000
+#define MLXSW_PCI_DOORBELL_RDQ_OFFSET          0x200
+#define MLXSW_PCI_DOORBELL_CQ_OFFSET           0x400
+#define MLXSW_PCI_DOORBELL_EQ_OFFSET           0x600
+#define MLXSW_PCI_DOORBELL_ARM_CQ_OFFSET       0x800
+#define MLXSW_PCI_DOORBELL_ARM_EQ_OFFSET       0xA00
+
+#define MLXSW_PCI_DOORBELL(offset, type_offset, num)   \
+       ((offset) + (type_offset) + (num) * 4)
+
+#define MLXSW_PCI_CQS_MAX      96
+#define MLXSW_PCI_EQS_COUNT    2
+#define MLXSW_PCI_EQ_ASYNC_NUM 0
+#define MLXSW_PCI_EQ_COMP_NUM  1
+
+#define MLXSW_PCI_AQ_PAGES     8
+#define MLXSW_PCI_AQ_SIZE      (MLXSW_PCI_PAGE_SIZE * MLXSW_PCI_AQ_PAGES)
+#define MLXSW_PCI_WQE_SIZE     32 /* 32 bytes per element */
+#define MLXSW_PCI_CQE_SIZE     16 /* 16 bytes per element */
+#define MLXSW_PCI_EQE_SIZE     16 /* 16 bytes per element */
+#define MLXSW_PCI_WQE_COUNT    (MLXSW_PCI_AQ_SIZE / MLXSW_PCI_WQE_SIZE)
+#define MLXSW_PCI_CQE_COUNT    (MLXSW_PCI_AQ_SIZE / MLXSW_PCI_CQE_SIZE)
+#define MLXSW_PCI_EQE_COUNT    (MLXSW_PCI_AQ_SIZE / MLXSW_PCI_EQE_SIZE)
+#define MLXSW_PCI_EQE_UPDATE_COUNT     0x80
+
+#define MLXSW_PCI_WQE_SG_ENTRIES       3
+#define MLXSW_PCI_WQE_TYPE_ETHERNET    0xA
+
+/* pci_wqe_c
+ * If set it indicates that a completion should be reported upon
+ * execution of this descriptor.
+ */
+MLXSW_ITEM32(pci, wqe, c, 0x00, 31, 1);
+
+/* pci_wqe_lp
+ * Local Processing, set if packet should be processed by the local
+ * switch hardware:
+ * For Ethernet EMAD (Direct Route and non Direct Route) -
+ * must be set if packet destination is local device
+ * For InfiniBand CTL - must be set if packet destination is local device
+ * Otherwise it must be clear
+ * Local Process packets must not exceed the size of 2K (including payload
+ * and headers).
+ */
+MLXSW_ITEM32(pci, wqe, lp, 0x00, 30, 1);
+
+/* pci_wqe_type
+ * Packet type.
+ */
+MLXSW_ITEM32(pci, wqe, type, 0x00, 23, 4);
+
+/* pci_wqe_byte_count
+ * Size of i-th scatter/gather entry, 0 if entry is unused.
+ */
+MLXSW_ITEM16_INDEXED(pci, wqe, byte_count, 0x02, 0, 14, 0x02, 0x00, false);
+
+/* pci_wqe_address
+ * Physical address of i-th scatter/gather entry.
+ * Gather Entries must be 2Byte aligned.
+ */
+MLXSW_ITEM64_INDEXED(pci, wqe, address, 0x08, 0, 64, 0x8, 0x0, false);
+
+/* pci_cqe_lag
+ * Packet arrives from a port which is a LAG
+ */
+MLXSW_ITEM32(pci, cqe, lag, 0x00, 23, 1);
+
+/* pci_cqe_system_port/lag_id
+ * When lag=0: System port on which the packet was received
+ * When lag=1:
+ * bits [15:4] LAG ID on which the packet was received
+ * bits [3:0] sub_port on which the packet was received
+ */
+MLXSW_ITEM32(pci, cqe, system_port, 0x00, 0, 16);
+MLXSW_ITEM32(pci, cqe, lag_id, 0x00, 4, 12);
+MLXSW_ITEM32(pci, cqe, lag_port_index, 0x00, 0, 4);
+
+/* pci_cqe_wqe_counter
+ * WQE count of the WQEs completed on the associated dqn
+ */
+MLXSW_ITEM32(pci, cqe, wqe_counter, 0x04, 16, 16);
+
+/* pci_cqe_byte_count
+ * Byte count of received packets including additional two
+ * Reserved Bytes that are append to the end of the frame.
+ * Reserved for Send CQE.
+ */
+MLXSW_ITEM32(pci, cqe, byte_count, 0x04, 0, 14);
+
+/* pci_cqe_trap_id
+ * Trap ID that captured the packet.
+ */
+MLXSW_ITEM32(pci, cqe, trap_id, 0x08, 0, 8);
+
+/* pci_cqe_crc
+ * Length include CRC. Indicates the length field includes
+ * the packet's CRC.
+ */
+MLXSW_ITEM32(pci, cqe, crc, 0x0C, 8, 1);
+
+/* pci_cqe_e
+ * CQE with Error.
+ */
+MLXSW_ITEM32(pci, cqe, e, 0x0C, 7, 1);
+
+/* pci_cqe_sr
+ * 1 - Send Queue
+ * 0 - Receive Queue
+ */
+MLXSW_ITEM32(pci, cqe, sr, 0x0C, 6, 1);
+
+/* pci_cqe_dqn
+ * Descriptor Queue (DQ) Number.
+ */
+MLXSW_ITEM32(pci, cqe, dqn, 0x0C, 1, 5);
+
+/* pci_cqe_owner
+ * Ownership bit.
+ */
+MLXSW_ITEM32(pci, cqe, owner, 0x0C, 0, 1);
+
+/* pci_eqe_event_type
+ * Event type.
+ */
+MLXSW_ITEM32(pci, eqe, event_type, 0x0C, 24, 8);
+#define MLXSW_PCI_EQE_EVENT_TYPE_COMP  0x00
+#define MLXSW_PCI_EQE_EVENT_TYPE_CMD   0x0A
+
+/* pci_eqe_event_sub_type
+ * Event type.
+ */
+MLXSW_ITEM32(pci, eqe, event_sub_type, 0x0C, 16, 8);
+
+/* pci_eqe_cqn
+ * Completion Queue that triggeret this EQE.
+ */
+MLXSW_ITEM32(pci, eqe, cqn, 0x0C, 8, 7);
+
+/* pci_eqe_owner
+ * Ownership bit.
+ */
+MLXSW_ITEM32(pci, eqe, owner, 0x0C, 0, 1);
+
+/* pci_eqe_cmd_token
+ * Command completion event - token
+ */
+MLXSW_ITEM32(pci, eqe, cmd_token, 0x08, 16, 16);
+
+/* pci_eqe_cmd_status
+ * Command completion event - status
+ */
+MLXSW_ITEM32(pci, eqe, cmd_status, 0x08, 0, 8);
+
+/* pci_eqe_cmd_out_param_h
+ * Command completion event - output parameter - higher part
+ */
+MLXSW_ITEM32(pci, eqe, cmd_out_param_h, 0x0C, 0, 32);
+
+/* pci_eqe_cmd_out_param_l
+ * Command completion event - output parameter - lower part
+ */
+MLXSW_ITEM32(pci, eqe, cmd_out_param_l, 0x10, 0, 32);
+
+#endif
index af371a82c35ba82e284d5b687dcf655e1ae84b50..3d42146473b30a786629ec06091eb4364cdfde37 100644 (file)
@@ -44,6 +44,7 @@
 
 #define MLXSW_PORT_SWID_DISABLED_PORT  255
 #define MLXSW_PORT_SWID_ALL_SWIDS      254
+#define MLXSW_PORT_SWID_TYPE_IB                1
 #define MLXSW_PORT_SWID_TYPE_ETH       2
 
 #define MLXSW_PORT_MID                 0xd000
@@ -51,6 +52,9 @@
 #define MLXSW_PORT_MAX_PHY_PORTS       0x40
 #define MLXSW_PORT_MAX_PORTS           (MLXSW_PORT_MAX_PHY_PORTS + 1)
 
+#define MLXSW_PORT_MAX_IB_PHY_PORTS    36
+#define MLXSW_PORT_MAX_IB_PORTS                (MLXSW_PORT_MAX_IB_PHY_PORTS + 1)
+
 #define MLXSW_PORT_DEVID_BITS_OFFSET   10
 #define MLXSW_PORT_PHY_BITS_OFFSET     4
 #define MLXSW_PORT_PHY_BITS_MASK       (MLXSW_PORT_MAX_PHY_PORTS - 1)
index debcf264839844018f43bcff970956cfe3e9b993..1357fe04391bbf89b5604af346b411a329cf6d6b 100644 (file)
@@ -1757,6 +1757,146 @@ static inline void mlxsw_reg_spvmlr_pack(char *payload, u8 local_port,
        }
 }
 
+/* QPCR - QoS Policer Configuration Register
+ * -----------------------------------------
+ * The QPCR register is used to create policers - that limit
+ * the rate of bytes or packets via some trap group.
+ */
+#define MLXSW_REG_QPCR_ID 0x4004
+#define MLXSW_REG_QPCR_LEN 0x28
+
+MLXSW_REG_DEFINE(qpcr, MLXSW_REG_QPCR_ID, MLXSW_REG_QPCR_LEN);
+
+enum mlxsw_reg_qpcr_g {
+       MLXSW_REG_QPCR_G_GLOBAL = 2,
+       MLXSW_REG_QPCR_G_STORM_CONTROL = 3,
+};
+
+/* reg_qpcr_g
+ * The policer type.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, qpcr, g, 0x00, 14, 2);
+
+/* reg_qpcr_pid
+ * Policer ID.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, qpcr, pid, 0x00, 0, 14);
+
+/* reg_qpcr_color_aware
+ * Is the policer aware of colors.
+ * Must be 0 (unaware) for cpu port.
+ * Access: RW for unbounded policer. RO for bounded policer.
+ */
+MLXSW_ITEM32(reg, qpcr, color_aware, 0x04, 15, 1);
+
+/* reg_qpcr_bytes
+ * Is policer limit is for bytes per sec or packets per sec.
+ * 0 - packets
+ * 1 - bytes
+ * Access: RW for unbounded policer. RO for bounded policer.
+ */
+MLXSW_ITEM32(reg, qpcr, bytes, 0x04, 14, 1);
+
+enum mlxsw_reg_qpcr_ir_units {
+       MLXSW_REG_QPCR_IR_UNITS_M,
+       MLXSW_REG_QPCR_IR_UNITS_K,
+};
+
+/* reg_qpcr_ir_units
+ * Policer's units for cir and eir fields (for bytes limits only)
+ * 1 - 10^3
+ * 0 - 10^6
+ * Access: OP
+ */
+MLXSW_ITEM32(reg, qpcr, ir_units, 0x04, 12, 1);
+
+enum mlxsw_reg_qpcr_rate_type {
+       MLXSW_REG_QPCR_RATE_TYPE_SINGLE = 1,
+       MLXSW_REG_QPCR_RATE_TYPE_DOUBLE = 2,
+};
+
+/* reg_qpcr_rate_type
+ * Policer can have one limit (single rate) or 2 limits with specific operation
+ * for packets that exceed the lower rate but not the upper one.
+ * (For cpu port must be single rate)
+ * Access: RW for unbounded policer. RO for bounded policer.
+ */
+MLXSW_ITEM32(reg, qpcr, rate_type, 0x04, 8, 2);
+
+/* reg_qpc_cbs
+ * Policer's committed burst size.
+ * The policer is working with time slices of 50 nano sec. By default every
+ * slice is granted the proportionate share of the committed rate. If we want to
+ * allow a slice to exceed that share (while still keeping the rate per sec) we
+ * can allow burst. The burst size is between the default proportionate share
+ * (and no lower than 8) to 32Gb. (Even though giving a number higher than the
+ * committed rate will result in exceeding the rate). The burst size must be a
+ * log of 2 and will be determined by 2^cbs.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qpcr, cbs, 0x08, 24, 6);
+
+/* reg_qpcr_cir
+ * Policer's committed rate.
+ * The rate used for sungle rate, the lower rate for double rate.
+ * For bytes limits, the rate will be this value * the unit from ir_units.
+ * (Resolution error is up to 1%).
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qpcr, cir, 0x0C, 0, 32);
+
+/* reg_qpcr_eir
+ * Policer's exceed rate.
+ * The higher rate for double rate, reserved for single rate.
+ * Lower rate for double rate policer.
+ * For bytes limits, the rate will be this value * the unit from ir_units.
+ * (Resolution error is up to 1%).
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qpcr, eir, 0x10, 0, 32);
+
+#define MLXSW_REG_QPCR_DOUBLE_RATE_ACTION 2
+
+/* reg_qpcr_exceed_action.
+ * What to do with packets between the 2 limits for double rate.
+ * Access: RW for unbounded policer. RO for bounded policer.
+ */
+MLXSW_ITEM32(reg, qpcr, exceed_action, 0x14, 0, 4);
+
+enum mlxsw_reg_qpcr_action {
+       /* Discard */
+       MLXSW_REG_QPCR_ACTION_DISCARD = 1,
+       /* Forward and set color to red.
+        * If the packet is intended to cpu port, it will be dropped.
+        */
+       MLXSW_REG_QPCR_ACTION_FORWARD = 2,
+};
+
+/* reg_qpcr_violate_action
+ * What to do with packets that cross the cir limit (for single rate) or the eir
+ * limit (for double rate).
+ * Access: RW for unbounded policer. RO for bounded policer.
+ */
+MLXSW_ITEM32(reg, qpcr, violate_action, 0x18, 0, 4);
+
+static inline void mlxsw_reg_qpcr_pack(char *payload, u16 pid,
+                                      enum mlxsw_reg_qpcr_ir_units ir_units,
+                                      bool bytes, u32 cir, u16 cbs)
+{
+       MLXSW_REG_ZERO(qpcr, payload);
+       mlxsw_reg_qpcr_pid_set(payload, pid);
+       mlxsw_reg_qpcr_g_set(payload, MLXSW_REG_QPCR_G_GLOBAL);
+       mlxsw_reg_qpcr_rate_type_set(payload, MLXSW_REG_QPCR_RATE_TYPE_SINGLE);
+       mlxsw_reg_qpcr_violate_action_set(payload,
+                                         MLXSW_REG_QPCR_ACTION_DISCARD);
+       mlxsw_reg_qpcr_cir_set(payload, cir);
+       mlxsw_reg_qpcr_ir_units_set(payload, ir_units);
+       mlxsw_reg_qpcr_bytes_set(payload, bytes);
+       mlxsw_reg_qpcr_cbs_set(payload, cbs);
+}
+
 /* QTCT - QoS Switch Traffic Class Table
  * -------------------------------------
  * Configures the mapping between the packet switch priority and the
@@ -2054,6 +2194,7 @@ MLXSW_REG_DEFINE(ptys, MLXSW_REG_PTYS_ID, MLXSW_REG_PTYS_LEN);
  */
 MLXSW_ITEM32(reg, ptys, local_port, 0x00, 16, 8);
 
+#define MLXSW_REG_PTYS_PROTO_MASK_IB   BIT(0)
 #define MLXSW_REG_PTYS_PROTO_MASK_ETH  BIT(2)
 
 /* reg_ptys_proto_mask
@@ -2112,18 +2253,61 @@ MLXSW_ITEM32(reg, ptys, an_status, 0x04, 28, 4);
  */
 MLXSW_ITEM32(reg, ptys, eth_proto_cap, 0x0C, 0, 32);
 
+/* reg_ptys_ib_link_width_cap
+ * IB port supported widths.
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, ptys, ib_link_width_cap, 0x10, 16, 16);
+
+#define MLXSW_REG_PTYS_IB_SPEED_SDR    BIT(0)
+#define MLXSW_REG_PTYS_IB_SPEED_DDR    BIT(1)
+#define MLXSW_REG_PTYS_IB_SPEED_QDR    BIT(2)
+#define MLXSW_REG_PTYS_IB_SPEED_FDR10  BIT(3)
+#define MLXSW_REG_PTYS_IB_SPEED_FDR    BIT(4)
+#define MLXSW_REG_PTYS_IB_SPEED_EDR    BIT(5)
+
+/* reg_ptys_ib_proto_cap
+ * IB port supported speeds and protocols.
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, ptys, ib_proto_cap, 0x10, 0, 16);
+
 /* reg_ptys_eth_proto_admin
  * Speed and protocol to set port to.
  * Access: RW
  */
 MLXSW_ITEM32(reg, ptys, eth_proto_admin, 0x18, 0, 32);
 
+/* reg_ptys_ib_link_width_admin
+ * IB width to set port to.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ptys, ib_link_width_admin, 0x1C, 16, 16);
+
+/* reg_ptys_ib_proto_admin
+ * IB speeds and protocols to set port to.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ptys, ib_proto_admin, 0x1C, 0, 16);
+
 /* reg_ptys_eth_proto_oper
  * The current speed and protocol configured for the port.
  * Access: RO
  */
 MLXSW_ITEM32(reg, ptys, eth_proto_oper, 0x24, 0, 32);
 
+/* reg_ptys_ib_link_width_oper
+ * The current IB width to set port to.
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, ptys, ib_link_width_oper, 0x28, 16, 16);
+
+/* reg_ptys_ib_proto_oper
+ * The current IB speed and protocol.
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, ptys, ib_proto_oper, 0x28, 0, 16);
+
 /* reg_ptys_eth_proto_lp_advertise
  * The protocols that were advertised by the link partner during
  * autonegotiation.
@@ -2131,8 +2315,8 @@ MLXSW_ITEM32(reg, ptys, eth_proto_oper, 0x24, 0, 32);
  */
 MLXSW_ITEM32(reg, ptys, eth_proto_lp_advertise, 0x30, 0, 32);
 
-static inline void mlxsw_reg_ptys_pack(char *payload, u8 local_port,
-                                      u32 proto_admin)
+static inline void mlxsw_reg_ptys_eth_pack(char *payload, u8 local_port,
+                                          u32 proto_admin)
 {
        MLXSW_REG_ZERO(ptys, payload);
        mlxsw_reg_ptys_local_port_set(payload, local_port);
@@ -2140,9 +2324,10 @@ static inline void mlxsw_reg_ptys_pack(char *payload, u8 local_port,
        mlxsw_reg_ptys_eth_proto_admin_set(payload, proto_admin);
 }
 
-static inline void mlxsw_reg_ptys_unpack(char *payload, u32 *p_eth_proto_cap,
-                                        u32 *p_eth_proto_adm,
-                                        u32 *p_eth_proto_oper)
+static inline void mlxsw_reg_ptys_eth_unpack(char *payload,
+                                            u32 *p_eth_proto_cap,
+                                            u32 *p_eth_proto_adm,
+                                            u32 *p_eth_proto_oper)
 {
        if (p_eth_proto_cap)
                *p_eth_proto_cap = mlxsw_reg_ptys_eth_proto_cap_get(payload);
@@ -2152,6 +2337,33 @@ static inline void mlxsw_reg_ptys_unpack(char *payload, u32 *p_eth_proto_cap,
                *p_eth_proto_oper = mlxsw_reg_ptys_eth_proto_oper_get(payload);
 }
 
+static inline void mlxsw_reg_ptys_ib_pack(char *payload, u8 local_port,
+                                         u16 proto_admin, u16 link_width)
+{
+       MLXSW_REG_ZERO(ptys, payload);
+       mlxsw_reg_ptys_local_port_set(payload, local_port);
+       mlxsw_reg_ptys_proto_mask_set(payload, MLXSW_REG_PTYS_PROTO_MASK_IB);
+       mlxsw_reg_ptys_ib_proto_admin_set(payload, proto_admin);
+       mlxsw_reg_ptys_ib_link_width_admin_set(payload, link_width);
+}
+
+static inline void mlxsw_reg_ptys_ib_unpack(char *payload, u16 *p_ib_proto_cap,
+                                           u16 *p_ib_link_width_cap,
+                                           u16 *p_ib_proto_oper,
+                                           u16 *p_ib_link_width_oper)
+{
+       if (p_ib_proto_cap)
+               *p_ib_proto_cap = mlxsw_reg_ptys_ib_proto_cap_get(payload);
+       if (p_ib_link_width_cap)
+               *p_ib_link_width_cap =
+                       mlxsw_reg_ptys_ib_link_width_cap_get(payload);
+       if (p_ib_proto_oper)
+               *p_ib_proto_oper = mlxsw_reg_ptys_ib_proto_oper_get(payload);
+       if (p_ib_link_width_oper)
+               *p_ib_link_width_oper =
+                       mlxsw_reg_ptys_ib_link_width_oper_get(payload);
+}
+
 /* PPAD - Port Physical Address Register
  * -------------------------------------
  * The PPAD register configures the per port physical MAC address.
@@ -2676,6 +2888,27 @@ static inline void mlxsw_reg_ppcnt_pack(char *payload, u8 local_port,
        mlxsw_reg_ppcnt_prio_tc_set(payload, prio_tc);
 }
 
+/* PLIB - Port Local to InfiniBand Port
+ * ------------------------------------
+ * The PLIB register performs mapping from Local Port into InfiniBand Port.
+ */
+#define MLXSW_REG_PLIB_ID 0x500A
+#define MLXSW_REG_PLIB_LEN 0x10
+
+MLXSW_REG_DEFINE(plib, MLXSW_REG_PLIB_ID, MLXSW_REG_PLIB_LEN);
+
+/* reg_plib_local_port
+ * Local port number.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, plib, local_port, 0x00, 16, 8);
+
+/* reg_plib_ib_port
+ * InfiniBand port remapping for local_port.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, plib, ib_port, 0x00, 0, 8);
+
 /* PPTB - Port Prio To Buffer Register
  * -----------------------------------
  * Configures the switch priority to buffer table.
@@ -2941,8 +3174,21 @@ MLXSW_ITEM32(reg, htgt, type, 0x00, 8, 4);
 
 enum mlxsw_reg_htgt_trap_group {
        MLXSW_REG_HTGT_TRAP_GROUP_EMAD,
-       MLXSW_REG_HTGT_TRAP_GROUP_RX,
-       MLXSW_REG_HTGT_TRAP_GROUP_CTRL,
+       MLXSW_REG_HTGT_TRAP_GROUP_SX2_RX,
+       MLXSW_REG_HTGT_TRAP_GROUP_SX2_CTRL,
+       MLXSW_REG_HTGT_TRAP_GROUP_SP_STP,
+       MLXSW_REG_HTGT_TRAP_GROUP_SP_LACP,
+       MLXSW_REG_HTGT_TRAP_GROUP_SP_LLDP,
+       MLXSW_REG_HTGT_TRAP_GROUP_SP_IGMP,
+       MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP_IPV4,
+       MLXSW_REG_HTGT_TRAP_GROUP_SP_OSPF,
+       MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP,
+       MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP_MISS,
+       MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP,
+       MLXSW_REG_HTGT_TRAP_GROUP_SP_REMOTE_ROUTE,
+       MLXSW_REG_HTGT_TRAP_GROUP_SP_IP2ME,
+       MLXSW_REG_HTGT_TRAP_GROUP_SP_DHCP,
+       MLXSW_REG_HTGT_TRAP_GROUP_SP_EVENT,
 };
 
 /* reg_htgt_trap_group
@@ -2964,6 +3210,8 @@ enum {
  */
 MLXSW_ITEM32(reg, htgt, pide, 0x04, 15, 1);
 
+#define MLXSW_REG_HTGT_INVALID_POLICER 0xff
+
 /* reg_htgt_pid
  * Policer ID for the trap group.
  * Access: RW
@@ -2989,6 +3237,8 @@ MLXSW_ITEM32(reg, htgt, mirror_action, 0x08, 8, 2);
  */
 MLXSW_ITEM32(reg, htgt, mirroring_agent, 0x08, 0, 3);
 
+#define MLXSW_REG_HTGT_DEFAULT_PRIORITY 0
+
 /* reg_htgt_priority
  * Trap group priority.
  * In case a packet matches multiple classification rules, the packet will
@@ -3002,52 +3252,47 @@ MLXSW_ITEM32(reg, htgt, mirroring_agent, 0x08, 0, 3);
  */
 MLXSW_ITEM32(reg, htgt, priority, 0x0C, 0, 4);
 
+#define MLXSW_REG_HTGT_DEFAULT_TC 7
+
 /* reg_htgt_local_path_cpu_tclass
  * CPU ingress traffic class for the trap group.
  * Access: RW
  */
 MLXSW_ITEM32(reg, htgt, local_path_cpu_tclass, 0x10, 16, 6);
 
-#define MLXSW_REG_HTGT_LOCAL_PATH_RDQ_EMAD     0x15
-#define MLXSW_REG_HTGT_LOCAL_PATH_RDQ_RX       0x14
-#define MLXSW_REG_HTGT_LOCAL_PATH_RDQ_CTRL     0x13
-
+enum mlxsw_reg_htgt_local_path_rdq {
+       MLXSW_REG_HTGT_LOCAL_PATH_RDQ_SX2_CTRL = 0x13,
+       MLXSW_REG_HTGT_LOCAL_PATH_RDQ_SX2_RX = 0x14,
+       MLXSW_REG_HTGT_LOCAL_PATH_RDQ_SX2_EMAD = 0x15,
+       MLXSW_REG_HTGT_LOCAL_PATH_RDQ_SIB_EMAD = 0x15,
+};
 /* reg_htgt_local_path_rdq
  * Receive descriptor queue (RDQ) to use for the trap group.
  * Access: RW
  */
 MLXSW_ITEM32(reg, htgt, local_path_rdq, 0x10, 0, 6);
 
-static inline void mlxsw_reg_htgt_pack(char *payload,
-                                      enum mlxsw_reg_htgt_trap_group group)
+static inline void mlxsw_reg_htgt_pack(char *payload, u8 group, u8 policer_id,
+                                      u8 priority, u8 tc)
 {
-       u8 swid, rdq;
-
        MLXSW_REG_ZERO(htgt, payload);
-       switch (group) {
-       case MLXSW_REG_HTGT_TRAP_GROUP_EMAD:
-               swid = MLXSW_PORT_SWID_ALL_SWIDS;
-               rdq = MLXSW_REG_HTGT_LOCAL_PATH_RDQ_EMAD;
-               break;
-       case MLXSW_REG_HTGT_TRAP_GROUP_RX:
-               swid = 0;
-               rdq = MLXSW_REG_HTGT_LOCAL_PATH_RDQ_RX;
-               break;
-       case MLXSW_REG_HTGT_TRAP_GROUP_CTRL:
-               swid = 0;
-               rdq = MLXSW_REG_HTGT_LOCAL_PATH_RDQ_CTRL;
-               break;
+
+       if (policer_id == MLXSW_REG_HTGT_INVALID_POLICER) {
+               mlxsw_reg_htgt_pide_set(payload,
+                                       MLXSW_REG_HTGT_POLICER_DISABLE);
+       } else {
+               mlxsw_reg_htgt_pide_set(payload,
+                                       MLXSW_REG_HTGT_POLICER_ENABLE);
+               mlxsw_reg_htgt_pid_set(payload, policer_id);
        }
-       mlxsw_reg_htgt_swid_set(payload, swid);
+
        mlxsw_reg_htgt_type_set(payload, MLXSW_REG_HTGT_PATH_TYPE_LOCAL);
        mlxsw_reg_htgt_trap_group_set(payload, group);
-       mlxsw_reg_htgt_pide_set(payload, MLXSW_REG_HTGT_POLICER_DISABLE);
-       mlxsw_reg_htgt_pid_set(payload, 0);
        mlxsw_reg_htgt_mirror_action_set(payload, MLXSW_REG_HTGT_TRAP_TO_CPU);
        mlxsw_reg_htgt_mirroring_agent_set(payload, 0);
-       mlxsw_reg_htgt_priority_set(payload, 0);
-       mlxsw_reg_htgt_local_path_cpu_tclass_set(payload, 7);
-       mlxsw_reg_htgt_local_path_rdq_set(payload, rdq);
+       mlxsw_reg_htgt_priority_set(payload, priority);
+       mlxsw_reg_htgt_local_path_cpu_tclass_set(payload, tc);
+       mlxsw_reg_htgt_local_path_rdq_set(payload, group);
 }
 
 /* HPKT - Host Packet Trap
@@ -3121,6 +3366,7 @@ enum {
 
 /* reg_hpkt_ctrl
  * Configure dedicated buffer resources for control packets.
+ * Ignored by SwitchX-2.
  * 0 - Keep factory defaults.
  * 1 - Do not use control buffer for this trap ID.
  * 2 - Use control buffer for this trap ID.
@@ -3128,25 +3374,18 @@ enum {
  */
 MLXSW_ITEM32(reg, hpkt, ctrl, 0x04, 16, 2);
 
-static inline void mlxsw_reg_hpkt_pack(char *payload, u8 action, u16 trap_id)
+static inline void mlxsw_reg_hpkt_pack(char *payload, u8 action, u16 trap_id,
+                                      enum mlxsw_reg_htgt_trap_group trap_group,
+                                      bool is_ctrl)
 {
-       enum mlxsw_reg_htgt_trap_group trap_group;
-
        MLXSW_REG_ZERO(hpkt, payload);
        mlxsw_reg_hpkt_ack_set(payload, MLXSW_REG_HPKT_ACK_NOT_REQUIRED);
        mlxsw_reg_hpkt_action_set(payload, action);
-       switch (trap_id) {
-       case MLXSW_TRAP_ID_ETHEMAD:
-       case MLXSW_TRAP_ID_PUDE:
-               trap_group = MLXSW_REG_HTGT_TRAP_GROUP_EMAD;
-               break;
-       default:
-               trap_group = MLXSW_REG_HTGT_TRAP_GROUP_RX;
-               break;
-       }
        mlxsw_reg_hpkt_trap_group_set(payload, trap_group);
        mlxsw_reg_hpkt_trap_id_set(payload, trap_id);
-       mlxsw_reg_hpkt_ctrl_set(payload, MLXSW_REG_HPKT_CTRL_PACKET_DEFAULT);
+       mlxsw_reg_hpkt_ctrl_set(payload, is_ctrl ?
+                               MLXSW_REG_HPKT_CTRL_PACKET_USE_BUFFER :
+                               MLXSW_REG_HPKT_CTRL_PACKET_NO_BUFFER);
 }
 
 /* RGCR - Router General Configuration Register
@@ -4331,7 +4570,7 @@ enum mlxsw_reg_mfcr_pwm_frequency {
  * Controls the frequency of the PWM signal.
  * Access: RW
  */
-MLXSW_ITEM32(reg, mfcr, pwm_frequency, 0x00, 0, 6);
+MLXSW_ITEM32(reg, mfcr, pwm_frequency, 0x00, 0, 7);
 
 #define MLXSW_MFCR_TACHOS_MAX 10
 
@@ -4425,6 +4664,54 @@ static inline void mlxsw_reg_mfsm_pack(char *payload, u8 tacho)
        mlxsw_reg_mfsm_tacho_set(payload, tacho);
 }
 
+/* MFSL - Management Fan Speed Limit Register
+ * ------------------------------------------
+ * The Fan Speed Limit register is used to configure the fan speed
+ * event / interrupt notification mechanism. Fan speed threshold are
+ * defined for both under-speed and over-speed.
+ */
+#define MLXSW_REG_MFSL_ID 0x9004
+#define MLXSW_REG_MFSL_LEN 0x0C
+
+MLXSW_REG_DEFINE(mfsl, MLXSW_REG_MFSL_ID, MLXSW_REG_MFSL_LEN);
+
+/* reg_mfsl_tacho
+ * Fan tachometer index.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, mfsl, tacho, 0x00, 24, 4);
+
+/* reg_mfsl_tach_min
+ * Tachometer minimum value (minimum RPM).
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mfsl, tach_min, 0x04, 0, 16);
+
+/* reg_mfsl_tach_max
+ * Tachometer maximum value (maximum RPM).
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mfsl, tach_max, 0x08, 0, 16);
+
+static inline void mlxsw_reg_mfsl_pack(char *payload, u8 tacho,
+                                      u16 tach_min, u16 tach_max)
+{
+       MLXSW_REG_ZERO(mfsl, payload);
+       mlxsw_reg_mfsl_tacho_set(payload, tacho);
+       mlxsw_reg_mfsl_tach_min_set(payload, tach_min);
+       mlxsw_reg_mfsl_tach_max_set(payload, tach_max);
+}
+
+static inline void mlxsw_reg_mfsl_unpack(char *payload, u8 tacho,
+                                        u16 *p_tach_min, u16 *p_tach_max)
+{
+       if (p_tach_min)
+               *p_tach_min = mlxsw_reg_mfsl_tach_min_get(payload);
+
+       if (p_tach_max)
+               *p_tach_max = mlxsw_reg_mfsl_tach_max_get(payload);
+}
+
 /* MTCAP - Management Temperature Capabilities
  * -------------------------------------------
  * This register exposes the capabilities of the device and
@@ -5107,6 +5394,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = {
        MLXSW_REG(svpe),
        MLXSW_REG(sfmr),
        MLXSW_REG(spvmlr),
+       MLXSW_REG(qpcr),
        MLXSW_REG(qtct),
        MLXSW_REG(qeec),
        MLXSW_REG(pmlp),
@@ -5116,6 +5404,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = {
        MLXSW_REG(paos),
        MLXSW_REG(pfcc),
        MLXSW_REG(ppcnt),
+       MLXSW_REG(plib),
        MLXSW_REG(pptb),
        MLXSW_REG(pbmc),
        MLXSW_REG(pspa),
@@ -5134,6 +5423,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = {
        MLXSW_REG(mfcr),
        MLXSW_REG(mfsc),
        MLXSW_REG(mfsm),
+       MLXSW_REG(mfsl),
        MLXSW_REG(mtcap),
        MLXSW_REG(mtmp),
        MLXSW_REG(mpat),
index a031e45c8b0634213dde81fb81f78f34391627df..3c2171dbdba4e0a9abc53ecc7cdc80530e5ffc32 100644 (file)
@@ -42,10 +42,13 @@ enum mlxsw_res_id {
        MLXSW_RES_ID_KVD_SIZE,
        MLXSW_RES_ID_KVD_SINGLE_MIN_SIZE,
        MLXSW_RES_ID_KVD_DOUBLE_MIN_SIZE,
+       MLXSW_RES_ID_MAX_TRAP_GROUPS,
        MLXSW_RES_ID_MAX_SPAN,
        MLXSW_RES_ID_MAX_SYSTEM_PORT,
        MLXSW_RES_ID_MAX_LAG,
        MLXSW_RES_ID_MAX_LAG_MEMBERS,
+       MLXSW_RES_ID_MAX_BUFFER_SIZE,
+       MLXSW_RES_ID_MAX_CPU_POLICERS,
        MLXSW_RES_ID_MAX_VRS,
        MLXSW_RES_ID_MAX_RIFS,
 
@@ -63,10 +66,13 @@ static u16 mlxsw_res_ids[] = {
        [MLXSW_RES_ID_KVD_SIZE] = 0x1001,
        [MLXSW_RES_ID_KVD_SINGLE_MIN_SIZE] = 0x1002,
        [MLXSW_RES_ID_KVD_DOUBLE_MIN_SIZE] = 0x1003,
+       [MLXSW_RES_ID_MAX_TRAP_GROUPS] = 0x2201,
        [MLXSW_RES_ID_MAX_SPAN] = 0x2420,
        [MLXSW_RES_ID_MAX_SYSTEM_PORT] = 0x2502,
        [MLXSW_RES_ID_MAX_LAG] = 0x2520,
        [MLXSW_RES_ID_MAX_LAG_MEMBERS] = 0x2521,
+       [MLXSW_RES_ID_MAX_BUFFER_SIZE] = 0x2802,        /* Bytes */
+       [MLXSW_RES_ID_MAX_CPU_POLICERS] = 0x2A13,
        [MLXSW_RES_ID_MAX_VRS] = 0x2C01,
        [MLXSW_RES_ID_MAX_RIFS] = 0x2C02,
 };
index 5d8f1d51a403c31d936945e9ce49645335cb53ce..fece974b4edd7fe8f49a98baf75e462b445b6c04 100644 (file)
@@ -37,6 +37,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/types.h>
+#include <linux/pci.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 #include <linux/ethtool.h>
 #include <linux/dcbnl.h>
 #include <linux/inetdevice.h>
 #include <net/switchdev.h>
-#include <generated/utsrelease.h>
 #include <net/pkt_cls.h>
 #include <net/tc_act/tc_mirred.h>
 #include <net/netevent.h>
 
 #include "spectrum.h"
+#include "pci.h"
 #include "core.h"
 #include "reg.h"
 #include "port.h"
@@ -156,7 +157,7 @@ static void mlxsw_sp_txhdr_construct(struct sk_buff *skb,
 
 static int mlxsw_sp_base_mac_get(struct mlxsw_sp *mlxsw_sp)
 {
-       char spad_pl[MLXSW_REG_SPAD_LEN];
+       char spad_pl[MLXSW_REG_SPAD_LEN] = {0};
        int err;
 
        err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(spad), spad_pl);
@@ -230,7 +231,7 @@ mlxsw_sp_span_entry_create(struct mlxsw_sp_port *port)
 
        span_entry->used = true;
        span_entry->id = index;
-       span_entry->ref_count = 0;
+       span_entry->ref_count = 1;
        span_entry->local_port = local_port;
        return span_entry;
 }
@@ -269,6 +270,7 @@ static struct mlxsw_sp_span_entry
 
        span_entry = mlxsw_sp_span_entry_find(port);
        if (span_entry) {
+               /* Already exists, just take a reference */
                span_entry->ref_count++;
                return span_entry;
        }
@@ -279,6 +281,7 @@ static struct mlxsw_sp_span_entry
 static int mlxsw_sp_span_entry_put(struct mlxsw_sp *mlxsw_sp,
                                   struct mlxsw_sp_span_entry *span_entry)
 {
+       WARN_ON(!span_entry->ref_count);
        if (--span_entry->ref_count == 0)
                mlxsw_sp_span_entry_destroy(mlxsw_sp, span_entry);
        return 0;
@@ -854,7 +857,7 @@ mlxsw_sp_port_get_sw_stats64(const struct net_device *dev,
        return 0;
 }
 
-static bool mlxsw_sp_port_has_offload_stats(int attr_id)
+static bool mlxsw_sp_port_has_offload_stats(const struct net_device *dev, int attr_id)
 {
        switch (attr_id) {
        case IFLA_OFFLOAD_XSTATS_CPU_HIT:
@@ -2001,12 +2004,12 @@ static int mlxsw_sp_port_get_link_ksettings(struct net_device *dev,
        int err;
 
        autoneg = mlxsw_sp_port->link.autoneg;
-       mlxsw_reg_ptys_pack(ptys_pl, mlxsw_sp_port->local_port, 0);
+       mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sp_port->local_port, 0);
        err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl);
        if (err)
                return err;
-       mlxsw_reg_ptys_unpack(ptys_pl, &eth_proto_cap, &eth_proto_admin,
-                             &eth_proto_oper);
+       mlxsw_reg_ptys_eth_unpack(ptys_pl, &eth_proto_cap, &eth_proto_admin,
+                                 &eth_proto_oper);
 
        mlxsw_sp_port_get_link_supported(eth_proto_cap, cmd);
 
@@ -2035,11 +2038,11 @@ mlxsw_sp_port_set_link_ksettings(struct net_device *dev,
        bool autoneg;
        int err;
 
-       mlxsw_reg_ptys_pack(ptys_pl, mlxsw_sp_port->local_port, 0);
+       mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sp_port->local_port, 0);
        err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl);
        if (err)
                return err;
-       mlxsw_reg_ptys_unpack(ptys_pl, &eth_proto_cap, NULL, NULL);
+       mlxsw_reg_ptys_eth_unpack(ptys_pl, &eth_proto_cap, NULL, NULL);
 
        autoneg = cmd->base.autoneg == AUTONEG_ENABLE;
        eth_proto_new = autoneg ?
@@ -2052,7 +2055,8 @@ mlxsw_sp_port_set_link_ksettings(struct net_device *dev,
                return -EINVAL;
        }
 
-       mlxsw_reg_ptys_pack(ptys_pl, mlxsw_sp_port->local_port, eth_proto_new);
+       mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sp_port->local_port,
+                               eth_proto_new);
        err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl);
        if (err)
                return err;
@@ -2090,8 +2094,8 @@ mlxsw_sp_port_speed_by_width_set(struct mlxsw_sp_port *mlxsw_sp_port, u8 width)
        u32 eth_proto_admin;
 
        eth_proto_admin = mlxsw_sp_to_ptys_upper_speed(upper_speed);
-       mlxsw_reg_ptys_pack(ptys_pl, mlxsw_sp_port->local_port,
-                           eth_proto_admin);
+       mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sp_port->local_port,
+                               eth_proto_admin);
        return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl);
 }
 
@@ -2209,8 +2213,8 @@ static int mlxsw_sp_port_pvid_vport_destroy(struct mlxsw_sp_port *mlxsw_sp_port)
        return mlxsw_sp_port_kill_vid(mlxsw_sp_port->dev, 0, 1);
 }
 
-static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
-                               bool split, u8 module, u8 width, u8 lane)
+static int __mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
+                                 bool split, u8 module, u8 width, u8 lane)
 {
        struct mlxsw_sp_port *mlxsw_sp_port;
        struct net_device *dev;
@@ -2220,6 +2224,7 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
        dev = alloc_etherdev(sizeof(struct mlxsw_sp_port));
        if (!dev)
                return -ENOMEM;
+       SET_NETDEV_DEV(dev, mlxsw_sp->bus_info->dev);
        mlxsw_sp_port = netdev_priv(dev);
        mlxsw_sp_port->dev = dev;
        mlxsw_sp_port->mlxsw_sp = mlxsw_sp;
@@ -2354,20 +2359,12 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
                goto err_register_netdev;
        }
 
-       err = mlxsw_core_port_init(mlxsw_sp->core, &mlxsw_sp_port->core_port,
-                                  mlxsw_sp_port->local_port, dev,
-                                  mlxsw_sp_port->split, module);
-       if (err) {
-               dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to init core port\n",
-                       mlxsw_sp_port->local_port);
-               goto err_core_port_init;
-       }
-
+       mlxsw_core_port_eth_set(mlxsw_sp->core, mlxsw_sp_port->local_port,
+                               mlxsw_sp_port, dev, mlxsw_sp_port->split,
+                               module);
        mlxsw_core_schedule_dw(&mlxsw_sp_port->hw_stats.update_dw, 0);
        return 0;
 
-err_core_port_init:
-       unregister_netdev(dev);
 err_register_netdev:
        mlxsw_sp->ports[local_port] = NULL;
        mlxsw_sp_port_switchdev_fini(mlxsw_sp_port);
@@ -2396,14 +2393,34 @@ err_port_active_vlans_alloc:
        return err;
 }
 
-static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port)
+static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
+                               bool split, u8 module, u8 width, u8 lane)
+{
+       int err;
+
+       err = mlxsw_core_port_init(mlxsw_sp->core, local_port);
+       if (err) {
+               dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to init core port\n",
+                       local_port);
+               return err;
+       }
+       err = __mlxsw_sp_port_create(mlxsw_sp, local_port, false,
+                                    module, width, lane);
+       if (err)
+               goto err_port_create;
+       return 0;
+
+err_port_create:
+       mlxsw_core_port_fini(mlxsw_sp->core, local_port);
+       return err;
+}
+
+static void __mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port)
 {
        struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp->ports[local_port];
 
-       if (!mlxsw_sp_port)
-               return;
        cancel_delayed_work_sync(&mlxsw_sp_port->hw_stats.update_dw);
-       mlxsw_core_port_fini(&mlxsw_sp_port->core_port);
+       mlxsw_core_port_clear(mlxsw_sp->core, local_port, mlxsw_sp);
        unregister_netdev(mlxsw_sp_port->dev); /* This calls ndo_stop */
        mlxsw_sp->ports[local_port] = NULL;
        mlxsw_sp_port_switchdev_fini(mlxsw_sp_port);
@@ -2419,12 +2436,24 @@ static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port)
        free_netdev(mlxsw_sp_port->dev);
 }
 
+static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port)
+{
+       __mlxsw_sp_port_remove(mlxsw_sp, local_port);
+       mlxsw_core_port_fini(mlxsw_sp->core, local_port);
+}
+
+static bool mlxsw_sp_port_created(struct mlxsw_sp *mlxsw_sp, u8 local_port)
+{
+       return mlxsw_sp->ports[local_port] != NULL;
+}
+
 static void mlxsw_sp_ports_remove(struct mlxsw_sp *mlxsw_sp)
 {
        int i;
 
        for (i = 1; i < MLXSW_PORT_MAX_PORTS; i++)
-               mlxsw_sp_port_remove(mlxsw_sp, i);
+               if (mlxsw_sp_port_created(mlxsw_sp, i))
+                       mlxsw_sp_port_remove(mlxsw_sp, i);
        kfree(mlxsw_sp->ports);
 }
 
@@ -2448,8 +2477,8 @@ static int mlxsw_sp_ports_create(struct mlxsw_sp *mlxsw_sp)
                if (!width)
                        continue;
                mlxsw_sp->port_to_module[i] = module;
-               err = mlxsw_sp_port_create(mlxsw_sp, i, false, module, width,
-                                          lane);
+               err = mlxsw_sp_port_create(mlxsw_sp, i, false,
+                                          module, width, lane);
                if (err)
                        goto err_port_create;
        }
@@ -2458,7 +2487,8 @@ static int mlxsw_sp_ports_create(struct mlxsw_sp *mlxsw_sp)
 err_port_create:
 err_port_module_info_get:
        for (i--; i >= 1; i--)
-               mlxsw_sp_port_remove(mlxsw_sp, i);
+               if (mlxsw_sp_port_created(mlxsw_sp, i))
+                       mlxsw_sp_port_remove(mlxsw_sp, i);
        kfree(mlxsw_sp->ports);
        return err;
 }
@@ -2500,7 +2530,8 @@ static int mlxsw_sp_port_split_create(struct mlxsw_sp *mlxsw_sp, u8 base_port,
 
 err_port_create:
        for (i--; i >= 0; i--)
-               mlxsw_sp_port_remove(mlxsw_sp, base_port + i);
+               if (mlxsw_sp_port_created(mlxsw_sp, base_port + i))
+                       mlxsw_sp_port_remove(mlxsw_sp, base_port + i);
        i = count;
 err_port_swid_set:
        for (i--; i >= 0; i--)
@@ -2590,7 +2621,8 @@ static int mlxsw_sp_port_split(struct mlxsw_core *mlxsw_core, u8 local_port,
        }
 
        for (i = 0; i < count; i++)
-               mlxsw_sp_port_remove(mlxsw_sp, base_port + i);
+               if (mlxsw_sp_port_created(mlxsw_sp, base_port + i))
+                       mlxsw_sp_port_remove(mlxsw_sp, base_port + i);
 
        err = mlxsw_sp_port_split_create(mlxsw_sp, base_port, module, count);
        if (err) {
@@ -2635,7 +2667,8 @@ static int mlxsw_sp_port_unsplit(struct mlxsw_core *mlxsw_core, u8 local_port)
                base_port = base_port + 2;
 
        for (i = 0; i < count; i++)
-               mlxsw_sp_port_remove(mlxsw_sp, base_port + i);
+               if (mlxsw_sp_port_created(mlxsw_sp, base_port + i))
+                       mlxsw_sp_port_remove(mlxsw_sp, base_port + i);
 
        mlxsw_sp_port_unsplit_create(mlxsw_sp, base_port, count);
 
@@ -2665,54 +2698,8 @@ static void mlxsw_sp_pude_event_func(const struct mlxsw_reg_info *reg,
        }
 }
 
-static struct mlxsw_event_listener mlxsw_sp_pude_event = {
-       .func = mlxsw_sp_pude_event_func,
-       .trap_id = MLXSW_TRAP_ID_PUDE,
-};
-
-static int mlxsw_sp_event_register(struct mlxsw_sp *mlxsw_sp,
-                                  enum mlxsw_event_trap_id trap_id)
-{
-       struct mlxsw_event_listener *el;
-       char hpkt_pl[MLXSW_REG_HPKT_LEN];
-       int err;
-
-       switch (trap_id) {
-       case MLXSW_TRAP_ID_PUDE:
-               el = &mlxsw_sp_pude_event;
-               break;
-       }
-       err = mlxsw_core_event_listener_register(mlxsw_sp->core, el, mlxsw_sp);
-       if (err)
-               return err;
-
-       mlxsw_reg_hpkt_pack(hpkt_pl, MLXSW_REG_HPKT_ACTION_FORWARD, trap_id);
-       err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(hpkt), hpkt_pl);
-       if (err)
-               goto err_event_trap_set;
-
-       return 0;
-
-err_event_trap_set:
-       mlxsw_core_event_listener_unregister(mlxsw_sp->core, el, mlxsw_sp);
-       return err;
-}
-
-static void mlxsw_sp_event_unregister(struct mlxsw_sp *mlxsw_sp,
-                                     enum mlxsw_event_trap_id trap_id)
-{
-       struct mlxsw_event_listener *el;
-
-       switch (trap_id) {
-       case MLXSW_TRAP_ID_PUDE:
-               el = &mlxsw_sp_pude_event;
-               break;
-       }
-       mlxsw_core_event_listener_unregister(mlxsw_sp->core, el, mlxsw_sp);
-}
-
-static void mlxsw_sp_rx_listener_func(struct sk_buff *skb, u8 local_port,
-                                     void *priv)
+static void mlxsw_sp_rx_listener_no_mark_func(struct sk_buff *skb,
+                                             u8 local_port, void *priv)
 {
        struct mlxsw_sp *mlxsw_sp = priv;
        struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp->ports[local_port];
@@ -2740,107 +2727,212 @@ static void mlxsw_sp_rx_listener_mark_func(struct sk_buff *skb, u8 local_port,
                                           void *priv)
 {
        skb->offload_fwd_mark = 1;
-       return mlxsw_sp_rx_listener_func(skb, local_port, priv);
+       return mlxsw_sp_rx_listener_no_mark_func(skb, local_port, priv);
+}
+
+#define MLXSW_SP_RXL_NO_MARK(_trap_id, _action, _trap_group, _is_ctrl) \
+       MLXSW_RXL(mlxsw_sp_rx_listener_no_mark_func, _trap_id, _action, \
+                 _is_ctrl, SP_##_trap_group, DISCARD)
+
+#define MLXSW_SP_RXL_MARK(_trap_id, _action, _trap_group, _is_ctrl)    \
+       MLXSW_RXL(mlxsw_sp_rx_listener_mark_func, _trap_id, _action,    \
+               _is_ctrl, SP_##_trap_group, DISCARD)
+
+#define MLXSW_SP_EVENTL(_func, _trap_id)               \
+       MLXSW_EVENTL(_func, _trap_id, SP_EVENT)
+
+static const struct mlxsw_listener mlxsw_sp_listener[] = {
+       /* Events */
+       MLXSW_SP_EVENTL(mlxsw_sp_pude_event_func, PUDE),
+       /* L2 traps */
+       MLXSW_SP_RXL_NO_MARK(STP, TRAP_TO_CPU, STP, true),
+       MLXSW_SP_RXL_NO_MARK(LACP, TRAP_TO_CPU, LACP, true),
+       MLXSW_SP_RXL_NO_MARK(LLDP, TRAP_TO_CPU, LLDP, true),
+       MLXSW_SP_RXL_MARK(DHCP, MIRROR_TO_CPU, DHCP, false),
+       MLXSW_SP_RXL_MARK(IGMP_QUERY, MIRROR_TO_CPU, IGMP, false),
+       MLXSW_SP_RXL_NO_MARK(IGMP_V1_REPORT, TRAP_TO_CPU, IGMP, false),
+       MLXSW_SP_RXL_NO_MARK(IGMP_V2_REPORT, TRAP_TO_CPU, IGMP, false),
+       MLXSW_SP_RXL_NO_MARK(IGMP_V2_LEAVE, TRAP_TO_CPU, IGMP, false),
+       MLXSW_SP_RXL_NO_MARK(IGMP_V3_REPORT, TRAP_TO_CPU, IGMP, false),
+       MLXSW_SP_RXL_MARK(ARPBC, MIRROR_TO_CPU, ARP, false),
+       MLXSW_SP_RXL_MARK(ARPUC, MIRROR_TO_CPU, ARP, false),
+       /* L3 traps */
+       MLXSW_SP_RXL_NO_MARK(MTUERROR, TRAP_TO_CPU, ROUTER_EXP, false),
+       MLXSW_SP_RXL_NO_MARK(TTLERROR, TRAP_TO_CPU, ROUTER_EXP, false),
+       MLXSW_SP_RXL_NO_MARK(LBERROR, TRAP_TO_CPU, ROUTER_EXP, false),
+       MLXSW_SP_RXL_MARK(OSPF, TRAP_TO_CPU, OSPF, false),
+       MLXSW_SP_RXL_NO_MARK(IP2ME, TRAP_TO_CPU, IP2ME, false),
+       MLXSW_SP_RXL_NO_MARK(RTR_INGRESS0, TRAP_TO_CPU, REMOTE_ROUTE, false),
+       MLXSW_SP_RXL_NO_MARK(HOST_MISS_IPV4, TRAP_TO_CPU, ARP_MISS, false),
+       MLXSW_SP_RXL_NO_MARK(BGP_IPV4, TRAP_TO_CPU, BGP_IPV4, false),
+};
+
+static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core)
+{
+       char qpcr_pl[MLXSW_REG_QPCR_LEN];
+       enum mlxsw_reg_qpcr_ir_units ir_units;
+       int max_cpu_policers;
+       bool is_bytes;
+       u8 burst_size;
+       u32 rate;
+       int i, err;
+
+       if (!MLXSW_CORE_RES_VALID(mlxsw_core, MAX_CPU_POLICERS))
+               return -EIO;
+
+       max_cpu_policers = MLXSW_CORE_RES_GET(mlxsw_core, MAX_CPU_POLICERS);
+
+       ir_units = MLXSW_REG_QPCR_IR_UNITS_M;
+       for (i = 0; i < max_cpu_policers; i++) {
+               is_bytes = false;
+               switch (i) {
+               case MLXSW_REG_HTGT_TRAP_GROUP_SP_STP:
+               case MLXSW_REG_HTGT_TRAP_GROUP_SP_LACP:
+               case MLXSW_REG_HTGT_TRAP_GROUP_SP_LLDP:
+               case MLXSW_REG_HTGT_TRAP_GROUP_SP_OSPF:
+                       rate = 128;
+                       burst_size = 7;
+                       break;
+               case MLXSW_REG_HTGT_TRAP_GROUP_SP_IGMP:
+                       rate = 16 * 1024;
+                       burst_size = 10;
+                       break;
+               case MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP_IPV4:
+               case MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP:
+               case MLXSW_REG_HTGT_TRAP_GROUP_SP_DHCP:
+               case MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP_MISS:
+               case MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP:
+               case MLXSW_REG_HTGT_TRAP_GROUP_SP_REMOTE_ROUTE:
+                       rate = 1024;
+                       burst_size = 7;
+                       break;
+               case MLXSW_REG_HTGT_TRAP_GROUP_SP_IP2ME:
+                       is_bytes = true;
+                       rate = 4 * 1024;
+                       burst_size = 4;
+                       break;
+               default:
+                       continue;
+               }
+
+               mlxsw_reg_qpcr_pack(qpcr_pl, i, ir_units, is_bytes, rate,
+                                   burst_size);
+               err = mlxsw_reg_write(mlxsw_core, MLXSW_REG(qpcr), qpcr_pl);
+               if (err)
+                       return err;
+       }
+
+       return 0;
 }
 
-#define MLXSW_SP_RXL(_func, _trap_id, _action)                 \
-       {                                                       \
-               .func = _func,                                  \
-               .local_port = MLXSW_PORT_DONT_CARE,             \
-               .trap_id = MLXSW_TRAP_ID_##_trap_id,            \
-               .action = MLXSW_REG_HPKT_ACTION_##_action,      \
+static int mlxsw_sp_trap_groups_set(struct mlxsw_core *mlxsw_core)
+{
+       char htgt_pl[MLXSW_REG_HTGT_LEN];
+       enum mlxsw_reg_htgt_trap_group i;
+       int max_cpu_policers;
+       int max_trap_groups;
+       u8 priority, tc;
+       u16 policer_id;
+       int err;
+
+       if (!MLXSW_CORE_RES_VALID(mlxsw_core, MAX_TRAP_GROUPS))
+               return -EIO;
+
+       max_trap_groups = MLXSW_CORE_RES_GET(mlxsw_core, MAX_TRAP_GROUPS);
+       max_cpu_policers = MLXSW_CORE_RES_GET(mlxsw_core, MAX_CPU_POLICERS);
+
+       for (i = 0; i < max_trap_groups; i++) {
+               policer_id = i;
+               switch (i) {
+               case MLXSW_REG_HTGT_TRAP_GROUP_SP_STP:
+               case MLXSW_REG_HTGT_TRAP_GROUP_SP_LACP:
+               case MLXSW_REG_HTGT_TRAP_GROUP_SP_LLDP:
+               case MLXSW_REG_HTGT_TRAP_GROUP_SP_OSPF:
+                       priority = 5;
+                       tc = 5;
+                       break;
+               case MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP_IPV4:
+               case MLXSW_REG_HTGT_TRAP_GROUP_SP_DHCP:
+                       priority = 4;
+                       tc = 4;
+                       break;
+               case MLXSW_REG_HTGT_TRAP_GROUP_SP_IGMP:
+               case MLXSW_REG_HTGT_TRAP_GROUP_SP_IP2ME:
+                       priority = 3;
+                       tc = 3;
+                       break;
+               case MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP:
+                       priority = 2;
+                       tc = 2;
+                       break;
+               case MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP_MISS:
+               case MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP:
+               case MLXSW_REG_HTGT_TRAP_GROUP_SP_REMOTE_ROUTE:
+                       priority = 1;
+                       tc = 1;
+                       break;
+               case MLXSW_REG_HTGT_TRAP_GROUP_SP_EVENT:
+                       priority = MLXSW_REG_HTGT_DEFAULT_PRIORITY;
+                       tc = MLXSW_REG_HTGT_DEFAULT_TC;
+                       policer_id = MLXSW_REG_HTGT_INVALID_POLICER;
+                       break;
+               default:
+                       continue;
+               }
+
+               if (max_cpu_policers <= policer_id &&
+                   policer_id != MLXSW_REG_HTGT_INVALID_POLICER)
+                       return -EIO;
+
+               mlxsw_reg_htgt_pack(htgt_pl, i, policer_id, priority, tc);
+               err = mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl);
+               if (err)
+                       return err;
        }
 
-static const struct mlxsw_rx_listener mlxsw_sp_rx_listener[] = {
-       MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, FDB_MC, TRAP_TO_CPU),
-       /* Traps for specific L2 packet types, not trapped as FDB MC */
-       MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, STP, TRAP_TO_CPU),
-       MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, LACP, TRAP_TO_CPU),
-       MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, EAPOL, TRAP_TO_CPU),
-       MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, LLDP, TRAP_TO_CPU),
-       MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, MMRP, TRAP_TO_CPU),
-       MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, MVRP, TRAP_TO_CPU),
-       MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, RPVST, TRAP_TO_CPU),
-       MLXSW_SP_RXL(mlxsw_sp_rx_listener_mark_func, DHCP, MIRROR_TO_CPU),
-       MLXSW_SP_RXL(mlxsw_sp_rx_listener_mark_func, IGMP_QUERY, MIRROR_TO_CPU),
-       MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, IGMP_V1_REPORT, TRAP_TO_CPU),
-       MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, IGMP_V2_REPORT, TRAP_TO_CPU),
-       MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, IGMP_V2_LEAVE, TRAP_TO_CPU),
-       MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, IGMP_V3_REPORT, TRAP_TO_CPU),
-       MLXSW_SP_RXL(mlxsw_sp_rx_listener_mark_func, ARPBC, MIRROR_TO_CPU),
-       MLXSW_SP_RXL(mlxsw_sp_rx_listener_mark_func, ARPUC, MIRROR_TO_CPU),
-       /* L3 traps */
-       MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, MTUERROR, TRAP_TO_CPU),
-       MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, TTLERROR, TRAP_TO_CPU),
-       MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, LBERROR, TRAP_TO_CPU),
-       MLXSW_SP_RXL(mlxsw_sp_rx_listener_mark_func, OSPF, TRAP_TO_CPU),
-       MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, IP2ME, TRAP_TO_CPU),
-       MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, RTR_INGRESS0, TRAP_TO_CPU),
-       MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, HOST_MISS_IPV4, TRAP_TO_CPU),
-};
+       return 0;
+}
 
 static int mlxsw_sp_traps_init(struct mlxsw_sp *mlxsw_sp)
 {
-       char htgt_pl[MLXSW_REG_HTGT_LEN];
-       char hpkt_pl[MLXSW_REG_HPKT_LEN];
        int i;
        int err;
 
-       mlxsw_reg_htgt_pack(htgt_pl, MLXSW_REG_HTGT_TRAP_GROUP_RX);
-       err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(htgt), htgt_pl);
+       err = mlxsw_sp_cpu_policers_set(mlxsw_sp->core);
        if (err)
                return err;
 
-       mlxsw_reg_htgt_pack(htgt_pl, MLXSW_REG_HTGT_TRAP_GROUP_CTRL);
-       err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(htgt), htgt_pl);
+       err = mlxsw_sp_trap_groups_set(mlxsw_sp->core);
        if (err)
                return err;
 
-       for (i = 0; i < ARRAY_SIZE(mlxsw_sp_rx_listener); i++) {
-               err = mlxsw_core_rx_listener_register(mlxsw_sp->core,
-                                                     &mlxsw_sp_rx_listener[i],
-                                                     mlxsw_sp);
+       for (i = 0; i < ARRAY_SIZE(mlxsw_sp_listener); i++) {
+               err = mlxsw_core_trap_register(mlxsw_sp->core,
+                                              &mlxsw_sp_listener[i],
+                                              mlxsw_sp);
                if (err)
-                       goto err_rx_listener_register;
+                       goto err_listener_register;
 
-               mlxsw_reg_hpkt_pack(hpkt_pl, mlxsw_sp_rx_listener[i].action,
-                                   mlxsw_sp_rx_listener[i].trap_id);
-               err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(hpkt), hpkt_pl);
-               if (err)
-                       goto err_rx_trap_set;
        }
        return 0;
 
-err_rx_trap_set:
-       mlxsw_core_rx_listener_unregister(mlxsw_sp->core,
-                                         &mlxsw_sp_rx_listener[i],
-                                         mlxsw_sp);
-err_rx_listener_register:
+err_listener_register:
        for (i--; i >= 0; i--) {
-               mlxsw_reg_hpkt_pack(hpkt_pl, MLXSW_REG_HPKT_ACTION_DISCARD,
-                                   mlxsw_sp_rx_listener[i].trap_id);
-               mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(hpkt), hpkt_pl);
-
-               mlxsw_core_rx_listener_unregister(mlxsw_sp->core,
-                                                 &mlxsw_sp_rx_listener[i],
-                                                 mlxsw_sp);
+               mlxsw_core_trap_unregister(mlxsw_sp->core,
+                                          &mlxsw_sp_listener[i],
+                                          mlxsw_sp);
        }
        return err;
 }
 
 static void mlxsw_sp_traps_fini(struct mlxsw_sp *mlxsw_sp)
 {
-       char hpkt_pl[MLXSW_REG_HPKT_LEN];
        int i;
 
-       for (i = 0; i < ARRAY_SIZE(mlxsw_sp_rx_listener); i++) {
-               mlxsw_reg_hpkt_pack(hpkt_pl, MLXSW_REG_HPKT_ACTION_DISCARD,
-                                   mlxsw_sp_rx_listener[i].trap_id);
-               mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(hpkt), hpkt_pl);
-
-               mlxsw_core_rx_listener_unregister(mlxsw_sp->core,
-                                                 &mlxsw_sp_rx_listener[i],
-                                                 mlxsw_sp);
+       for (i = 0; i < ARRAY_SIZE(mlxsw_sp_listener); i++) {
+               mlxsw_core_trap_unregister(mlxsw_sp->core,
+                                          &mlxsw_sp_listener[i],
+                                          mlxsw_sp);
        }
 }
 
@@ -2925,6 +3017,17 @@ static void mlxsw_sp_lag_fini(struct mlxsw_sp *mlxsw_sp)
        kfree(mlxsw_sp->lags);
 }
 
+static int mlxsw_sp_basic_trap_groups_set(struct mlxsw_core *mlxsw_core)
+{
+       char htgt_pl[MLXSW_REG_HTGT_LEN];
+
+       mlxsw_reg_htgt_pack(htgt_pl, MLXSW_REG_HTGT_TRAP_GROUP_EMAD,
+                           MLXSW_REG_HTGT_INVALID_POLICER,
+                           MLXSW_REG_HTGT_DEFAULT_PRIORITY,
+                           MLXSW_REG_HTGT_DEFAULT_TC);
+       return mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl);
+}
+
 static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
                         const struct mlxsw_bus_info *mlxsw_bus_info)
 {
@@ -2943,16 +3046,10 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
                return err;
        }
 
-       err = mlxsw_sp_event_register(mlxsw_sp, MLXSW_TRAP_ID_PUDE);
-       if (err) {
-               dev_err(mlxsw_sp->bus_info->dev, "Failed to register for PUDE events\n");
-               return err;
-       }
-
        err = mlxsw_sp_traps_init(mlxsw_sp);
        if (err) {
-               dev_err(mlxsw_sp->bus_info->dev, "Failed to set traps for RX\n");
-               goto err_rx_listener_register;
+               dev_err(mlxsw_sp->bus_info->dev, "Failed to set traps\n");
+               return err;
        }
 
        err = mlxsw_sp_flood_init(mlxsw_sp);
@@ -3012,8 +3109,6 @@ err_lag_init:
 err_buffers_init:
 err_flood_init:
        mlxsw_sp_traps_fini(mlxsw_sp);
-err_rx_listener_register:
-       mlxsw_sp_event_unregister(mlxsw_sp, MLXSW_TRAP_ID_PUDE);
        return err;
 }
 
@@ -3028,7 +3123,6 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core)
        mlxsw_sp_lag_fini(mlxsw_sp);
        mlxsw_sp_buffers_fini(mlxsw_sp);
        mlxsw_sp_traps_fini(mlxsw_sp);
-       mlxsw_sp_event_unregister(mlxsw_sp, MLXSW_TRAP_ID_PUDE);
        WARN_ON(!list_empty(&mlxsw_sp->vfids.list));
        WARN_ON(!list_empty(&mlxsw_sp->fids));
 }
@@ -3066,11 +3160,11 @@ static struct mlxsw_config_profile mlxsw_sp_config_profile = {
 };
 
 static struct mlxsw_driver mlxsw_sp_driver = {
-       .kind                           = MLXSW_DEVICE_KIND_SPECTRUM,
-       .owner                          = THIS_MODULE,
+       .kind                           = mlxsw_sp_driver_name,
        .priv_size                      = sizeof(struct mlxsw_sp),
        .init                           = mlxsw_sp_init,
        .fini                           = mlxsw_sp_fini,
+       .basic_trap_groups_set          = mlxsw_sp_basic_trap_groups_set,
        .port_split                     = mlxsw_sp_port_split,
        .port_unsplit                   = mlxsw_sp_port_unsplit,
        .sb_pool_get                    = mlxsw_sp_sb_pool_get,
@@ -4662,6 +4756,16 @@ static struct notifier_block mlxsw_sp_router_netevent_nb __read_mostly = {
        .notifier_call = mlxsw_sp_router_netevent_event,
 };
 
+static const struct pci_device_id mlxsw_sp_pci_id_table[] = {
+       {PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_SPECTRUM), 0},
+       {0, },
+};
+
+static struct pci_driver mlxsw_sp_pci_driver = {
+       .name = mlxsw_sp_driver_name,
+       .id_table = mlxsw_sp_pci_id_table,
+};
+
 static int __init mlxsw_sp_module_init(void)
 {
        int err;
@@ -4673,8 +4777,15 @@ static int __init mlxsw_sp_module_init(void)
        err = mlxsw_core_driver_register(&mlxsw_sp_driver);
        if (err)
                goto err_core_driver_register;
+
+       err = mlxsw_pci_driver_register(&mlxsw_sp_pci_driver);
+       if (err)
+               goto err_pci_driver_register;
+
        return 0;
 
+err_pci_driver_register:
+       mlxsw_core_driver_unregister(&mlxsw_sp_driver);
 err_core_driver_register:
        unregister_netevent_notifier(&mlxsw_sp_router_netevent_nb);
        unregister_inetaddr_notifier(&mlxsw_sp_inetaddr_nb);
@@ -4684,6 +4795,7 @@ err_core_driver_register:
 
 static void __exit mlxsw_sp_module_exit(void)
 {
+       mlxsw_pci_driver_unregister(&mlxsw_sp_pci_driver);
        mlxsw_core_driver_unregister(&mlxsw_sp_driver);
        unregister_netevent_notifier(&mlxsw_sp_router_netevent_nb);
        unregister_inetaddr_notifier(&mlxsw_sp_inetaddr_nb);
@@ -4696,4 +4808,4 @@ module_exit(mlxsw_sp_module_exit);
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_AUTHOR("Jiri Pirko <jiri@mellanox.com>");
 MODULE_DESCRIPTION("Mellanox Spectrum driver");
-MODULE_MLXSW_DRIVER_ALIAS(MLXSW_DEVICE_KIND_SPECTRUM);
+MODULE_DEVICE_TABLE(pci, mlxsw_sp_pci_id_table);
index cc5462556a83260a6ff6d062f90a8f19f59a575e..cc1af19d699afc1b7b7ddfbc4ec9e7b1b24715fe 100644 (file)
@@ -115,7 +115,7 @@ struct mlxsw_sp_rif {
 struct mlxsw_sp_mid {
        struct list_head list;
        unsigned char addr[ETH_ALEN];
-       u16 vid;
+       u16 fid;
        u16 mid;
        unsigned int ref_count;
 };
@@ -316,7 +316,6 @@ struct mlxsw_sp_port_pcpu_stats {
 };
 
 struct mlxsw_sp_port {
-       struct mlxsw_core_port core_port; /* must be first */
        struct net_device *dev;
        struct mlxsw_sp_port_pcpu_stats __percpu *pcpu_stats;
        struct mlxsw_sp *mlxsw_sp;
index bcaed8a38037b0461de3e534158f5196e970c992..a7468262f118979c8914e8acea042361b838f69e 100644 (file)
@@ -611,6 +611,9 @@ int mlxsw_sp_sb_pool_set(struct mlxsw_core *mlxsw_core,
        u32 pool_size = MLXSW_SP_BYTES_TO_CELLS(size);
        enum mlxsw_reg_sbpr_mode mode;
 
+       if (size > MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_BUFFER_SIZE))
+               return -EINVAL;
+
        mode = (enum mlxsw_reg_sbpr_mode) threshold_type;
        return mlxsw_sp_sb_pr_write(mlxsw_sp, pool, dir, mode, pool_size);
 }
index 113f667e59dfa859024f7bbc7ff4a58c2f15ce3f..683f0454170c713df33227e8ccb2f023aa9487c1 100644 (file)
@@ -320,6 +320,8 @@ mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp,
                                                lpm_tree);
        if (err)
                goto err_left_struct_set;
+       memcpy(&lpm_tree->prefix_usage, prefix_usage,
+              sizeof(lpm_tree->prefix_usage));
        return lpm_tree;
 
 err_left_struct_set:
@@ -343,7 +345,8 @@ mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp,
 
        for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) {
                lpm_tree = &mlxsw_sp->router.lpm_trees[i];
-               if (lpm_tree->proto == proto &&
+               if (lpm_tree->ref_count != 0 &&
+                   lpm_tree->proto == proto &&
                    mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage,
                                             prefix_usage))
                        goto inc_ref_count;
@@ -586,21 +589,22 @@ static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
        return 0;
 }
 
+static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp);
+
 static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp)
 {
+       mlxsw_sp_router_fib_flush(mlxsw_sp);
        kfree(mlxsw_sp->router.vrs);
 }
 
 struct mlxsw_sp_neigh_key {
-       unsigned char addr[sizeof(struct in6_addr)];
-       struct net_device *dev;
+       struct neighbour *n;
 };
 
 struct mlxsw_sp_neigh_entry {
        struct rhash_head ht_node;
        struct mlxsw_sp_neigh_key key;
        u16 rif;
-       struct neighbour *n;
        bool offloaded;
        struct delayed_work dw;
        struct mlxsw_sp_port *mlxsw_sp_port;
@@ -638,19 +642,15 @@ mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp,
 static void mlxsw_sp_router_neigh_update_hw(struct work_struct *work);
 
 static struct mlxsw_sp_neigh_entry *
-mlxsw_sp_neigh_entry_create(const void *addr, size_t addr_len,
-                           struct net_device *dev, u16 rif,
-                           struct neighbour *n)
+mlxsw_sp_neigh_entry_create(struct neighbour *n, u16 rif)
 {
        struct mlxsw_sp_neigh_entry *neigh_entry;
 
        neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_ATOMIC);
        if (!neigh_entry)
                return NULL;
-       memcpy(neigh_entry->key.addr, addr, addr_len);
-       neigh_entry->key.dev = dev;
+       neigh_entry->key.n = n;
        neigh_entry->rif = rif;
-       neigh_entry->n = n;
        INIT_DELAYED_WORK(&neigh_entry->dw, mlxsw_sp_router_neigh_update_hw);
        INIT_LIST_HEAD(&neigh_entry->nexthop_list);
        return neigh_entry;
@@ -663,13 +663,11 @@ mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp_neigh_entry *neigh_entry)
 }
 
 static struct mlxsw_sp_neigh_entry *
-mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, const void *addr,
-                           size_t addr_len, struct net_device *dev)
+mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
 {
-       struct mlxsw_sp_neigh_key key = {{ 0 } };
+       struct mlxsw_sp_neigh_key key;
 
-       memcpy(key.addr, addr, addr_len);
-       key.dev = dev;
+       key.n = n;
        return rhashtable_lookup_fast(&mlxsw_sp->router.neigh_ht,
                                      &key, mlxsw_sp_neigh_ht_params);
 }
@@ -681,26 +679,20 @@ int mlxsw_sp_router_neigh_construct(struct net_device *dev,
        struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
        struct mlxsw_sp_neigh_entry *neigh_entry;
        struct mlxsw_sp_rif *r;
-       u32 dip;
        int err;
 
        if (n->tbl != &arp_tbl)
                return 0;
 
-       dip = ntohl(*((__be32 *) n->primary_key));
-       neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, &dip, sizeof(dip),
-                                                 n->dev);
-       if (neigh_entry) {
-               WARN_ON(neigh_entry->n != n);
+       neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
+       if (neigh_entry)
                return 0;
-       }
 
        r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev);
        if (WARN_ON(!r))
                return -EINVAL;
 
-       neigh_entry = mlxsw_sp_neigh_entry_create(&dip, sizeof(dip), n->dev,
-                                                 r->rif, n);
+       neigh_entry = mlxsw_sp_neigh_entry_create(n, r->rif);
        if (!neigh_entry)
                return -ENOMEM;
        err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
@@ -719,14 +711,11 @@ void mlxsw_sp_router_neigh_destroy(struct net_device *dev,
        struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
        struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
        struct mlxsw_sp_neigh_entry *neigh_entry;
-       u32 dip;
 
        if (n->tbl != &arp_tbl)
                return;
 
-       dip = ntohl(*((__be32 *) n->primary_key));
-       neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, &dip, sizeof(dip),
-                                                 n->dev);
+       neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
        if (!neigh_entry)
                return;
        mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
@@ -809,6 +798,26 @@ static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
        }
 }
 
+static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl)
+{
+       u8 num_rec, last_rec_index, num_entries;
+
+       num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
+       last_rec_index = num_rec - 1;
+
+       if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM)
+               return false;
+       if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) ==
+           MLXSW_REG_RAUHTD_TYPE_IPV6)
+               return true;
+
+       num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
+                                                               last_rec_index);
+       if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC)
+               return true;
+       return false;
+}
+
 static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
 {
        char *rauhtd_pl;
@@ -835,7 +844,7 @@ static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
                for (i = 0; i < num_rec; i++)
                        mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl,
                                                          i);
-       } while (num_rec);
+       } while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl));
        rtnl_unlock();
 
        kfree(rauhtd_pl);
@@ -854,7 +863,7 @@ static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp)
                 * is active regardless of the traffic.
                 */
                if (!list_empty(&neigh_entry->nexthop_list))
-                       neigh_event_send(neigh_entry->n, NULL);
+                       neigh_event_send(neigh_entry->key.n, NULL);
        }
        rtnl_unlock();
 }
@@ -900,9 +909,9 @@ static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work)
        rtnl_lock();
        list_for_each_entry(neigh_entry, &mlxsw_sp->router.nexthop_neighs_list,
                            nexthop_neighs_list_node) {
-               if (!(neigh_entry->n->nud_state & NUD_VALID) &&
+               if (!(neigh_entry->key.n->nud_state & NUD_VALID) &&
                    !list_empty(&neigh_entry->nexthop_list))
-                       neigh_event_send(neigh_entry->n, NULL);
+                       neigh_event_send(neigh_entry->key.n, NULL);
        }
        rtnl_unlock();
 
@@ -919,7 +928,7 @@ static void mlxsw_sp_router_neigh_update_hw(struct work_struct *work)
 {
        struct mlxsw_sp_neigh_entry *neigh_entry =
                container_of(work, struct mlxsw_sp_neigh_entry, dw.work);
-       struct neighbour *n = neigh_entry->n;
+       struct neighbour *n = neigh_entry->key.n;
        struct mlxsw_sp_port *mlxsw_sp_port = neigh_entry->mlxsw_sp_port;
        struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
        char rauht_pl[MLXSW_REG_RAUHT_LEN];
@@ -1022,11 +1031,8 @@ int mlxsw_sp_router_netevent_event(struct notifier_block *unused,
 
                mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
                dip = ntohl(*((__be32 *) n->primary_key));
-               neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp,
-                                                         &dip,
-                                                         sizeof(__be32),
-                                                         dev);
-               if (WARN_ON(!neigh_entry) || WARN_ON(neigh_entry->n != n)) {
+               neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
+               if (WARN_ON(!neigh_entry)) {
                        mlxsw_sp_port_dev_put(mlxsw_sp_port);
                        return NOTIFY_DONE;
                }
@@ -1335,33 +1341,26 @@ static int mlxsw_sp_nexthop_init(struct mlxsw_sp *mlxsw_sp,
                                 struct fib_nh *fib_nh)
 {
        struct mlxsw_sp_neigh_entry *neigh_entry;
-       u32 gwip = ntohl(fib_nh->nh_gw);
        struct net_device *dev = fib_nh->nh_dev;
        struct neighbour *n;
        u8 nud_state;
 
-       neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, &gwip,
-                                                 sizeof(gwip), dev);
-       if (!neigh_entry) {
-               __be32 gwipn = htonl(gwip);
-
-               n = neigh_create(&arp_tbl, &gwipn, dev);
+       /* Take a reference of neigh here ensuring that neigh would
+        * not be detructed before the nexthop entry is finished.
+        * The reference is taken either in neigh_lookup() or
+        * in neith_create() in case n is not found.
+        */
+       n = neigh_lookup(&arp_tbl, &fib_nh->nh_gw, dev);
+       if (!n) {
+               n = neigh_create(&arp_tbl, &fib_nh->nh_gw, dev);
                if (IS_ERR(n))
                        return PTR_ERR(n);
                neigh_event_send(n, NULL);
-               neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, &gwip,
-                                                         sizeof(gwip), dev);
-               if (!neigh_entry) {
-                       neigh_release(n);
-                       return -EINVAL;
-               }
-       } else {
-               /* Take a reference of neigh here ensuring that neigh would
-                * not be detructed before the nexthop entry is finished.
-                * The second branch takes the reference in neith_create()
-                */
-               n = neigh_entry->n;
-               neigh_clone(n);
+       }
+       neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
+       if (!neigh_entry) {
+               neigh_release(n);
+               return -EINVAL;
        }
 
        /* If that is the first nexthop connected to that neigh, add to
@@ -1395,7 +1394,7 @@ static void mlxsw_sp_nexthop_fini(struct mlxsw_sp *mlxsw_sp,
        if (list_empty(&nh->neigh_entry->nexthop_list))
                list_del(&nh->neigh_entry->nexthop_neighs_list_node);
 
-       neigh_release(neigh_entry->n);
+       neigh_release(neigh_entry->key.n);
 }
 
 static struct mlxsw_sp_nexthop_group *
@@ -1455,11 +1454,11 @@ static bool mlxsw_sp_nexthop_match(struct mlxsw_sp_nexthop *nh,
 
        for (i = 0; i < fi->fib_nhs; i++) {
                struct fib_nh *fib_nh = &fi->fib_nh[i];
-               u32 gwip = ntohl(fib_nh->nh_gw);
+               struct neighbour *n = nh->neigh_entry->key.n;
 
-               if (memcmp(nh->neigh_entry->key.addr,
-                          &gwip, sizeof(u32)) == 0 &&
-                   nh->neigh_entry->key.dev == fib_nh->nh_dev)
+               if (memcmp(n->primary_key, &fib_nh->nh_gw,
+                          sizeof(fib_nh->nh_gw)) == 0 &&
+                   n->dev == fib_nh->nh_dev)
                        return true;
        }
        return false;
@@ -1815,19 +1814,17 @@ err_fib_entry_insert:
        return err;
 }
 
-static int mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
-                                   struct fib_entry_notifier_info *fen_info)
+static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
+                                    struct fib_entry_notifier_info *fen_info)
 {
        struct mlxsw_sp_fib_entry *fib_entry;
 
        if (mlxsw_sp->router.aborted)
-               return 0;
+               return;
 
        fib_entry = mlxsw_sp_fib_entry_find(mlxsw_sp, fen_info);
-       if (!fib_entry) {
-               dev_warn(mlxsw_sp->bus_info->dev, "Failed to find FIB4 entry being removed.\n");
-               return -ENOENT;
-       }
+       if (!fib_entry)
+               return;
 
        if (fib_entry->ref_count == 1) {
                mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry);
@@ -1835,7 +1832,6 @@ static int mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
        }
 
        mlxsw_sp_fib_entry_put(mlxsw_sp, fib_entry);
-       return 0;
 }
 
 static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
@@ -1857,7 +1853,8 @@ static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
        if (err)
                return err;
 
-       mlxsw_reg_raltb_pack(raltb_pl, 0, MLXSW_REG_RALXX_PROTOCOL_IPV4, 0);
+       mlxsw_reg_raltb_pack(raltb_pl, 0, MLXSW_REG_RALXX_PROTOCOL_IPV4,
+                            MLXSW_SP_LPM_TREE_MIN);
        err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
        if (err)
                return err;
@@ -1868,16 +1865,16 @@ static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
        return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
 }
 
-static void mlxsw_sp_router_fib4_abort(struct mlxsw_sp *mlxsw_sp)
+static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
 {
        struct mlxsw_sp_fib_entry *fib_entry;
        struct mlxsw_sp_fib_entry *tmp;
        struct mlxsw_sp_vr *vr;
        int i;
-       int err;
 
        for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
                vr = &mlxsw_sp->router.vrs[i];
+
                if (!vr->used)
                        continue;
 
@@ -1893,6 +1890,16 @@ static void mlxsw_sp_router_fib4_abort(struct mlxsw_sp *mlxsw_sp)
                                break;
                }
        }
+}
+
+static void mlxsw_sp_router_fib4_abort(struct mlxsw_sp *mlxsw_sp)
+{
+       int err;
+
+       if (mlxsw_sp->router.aborted)
+               return;
+       dev_warn(mlxsw_sp->bus_info->dev, "FIB abort triggered. Note that FIB entries are no longer being offloaded to this device.\n");
+       mlxsw_sp_router_fib_flush(mlxsw_sp);
        mlxsw_sp->router.aborted = true;
        err = mlxsw_sp_router_set_abort_trap(mlxsw_sp);
        if (err)
@@ -1948,6 +1955,9 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
        struct fib_entry_notifier_info *fen_info = ptr;
        int err;
 
+       if (!net_eq(fen_info->info.net, &init_net))
+               return NOTIFY_DONE;
+
        switch (event) {
        case FIB_EVENT_ENTRY_ADD:
                err = mlxsw_sp_router_fib4_add(mlxsw_sp, fen_info);
@@ -1980,7 +1990,7 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
        if (err)
                goto err_vrs_init;
 
-       err =  mlxsw_sp_neigh_init(mlxsw_sp);
+       err = mlxsw_sp_neigh_init(mlxsw_sp);
        if (err)
                goto err_neigh_init;
 
index b19552a7277882f92466c3f02fb1b875f59f7a1e..b87ba7d36bc4af98b7464b14c536b9626abf15fe 100644 (file)
@@ -929,12 +929,12 @@ static int mlxsw_sp_port_smid_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 mid,
 
 static struct mlxsw_sp_mid *__mlxsw_sp_mc_get(struct mlxsw_sp *mlxsw_sp,
                                              const unsigned char *addr,
-                                             u16 vid)
+                                             u16 fid)
 {
        struct mlxsw_sp_mid *mid;
 
        list_for_each_entry(mid, &mlxsw_sp->br_mids.list, list) {
-               if (ether_addr_equal(mid->addr, addr) && mid->vid == vid)
+               if (ether_addr_equal(mid->addr, addr) && mid->fid == fid)
                        return mid;
        }
        return NULL;
@@ -942,7 +942,7 @@ static struct mlxsw_sp_mid *__mlxsw_sp_mc_get(struct mlxsw_sp *mlxsw_sp,
 
 static struct mlxsw_sp_mid *__mlxsw_sp_mc_alloc(struct mlxsw_sp *mlxsw_sp,
                                                const unsigned char *addr,
-                                               u16 vid)
+                                               u16 fid)
 {
        struct mlxsw_sp_mid *mid;
        u16 mid_idx;
@@ -958,7 +958,7 @@ static struct mlxsw_sp_mid *__mlxsw_sp_mc_alloc(struct mlxsw_sp *mlxsw_sp,
 
        set_bit(mid_idx, mlxsw_sp->br_mids.mapped);
        ether_addr_copy(mid->addr, addr);
-       mid->vid = vid;
+       mid->fid = fid;
        mid->mid = mid_idx;
        mid->ref_count = 0;
        list_add_tail(&mid->list, &mlxsw_sp->br_mids.list);
@@ -991,9 +991,9 @@ static int mlxsw_sp_port_mdb_add(struct mlxsw_sp_port *mlxsw_sp_port,
        if (switchdev_trans_ph_prepare(trans))
                return 0;
 
-       mid = __mlxsw_sp_mc_get(mlxsw_sp, mdb->addr, mdb->vid);
+       mid = __mlxsw_sp_mc_get(mlxsw_sp, mdb->addr, fid);
        if (!mid) {
-               mid = __mlxsw_sp_mc_alloc(mlxsw_sp, mdb->addr, mdb->vid);
+               mid = __mlxsw_sp_mc_alloc(mlxsw_sp, mdb->addr, fid);
                if (!mid) {
                        netdev_err(dev, "Unable to allocate MC group\n");
                        return -ENOMEM;
@@ -1137,7 +1137,7 @@ static int mlxsw_sp_port_mdb_del(struct mlxsw_sp_port *mlxsw_sp_port,
        u16 mid_idx;
        int err = 0;
 
-       mid = __mlxsw_sp_mc_get(mlxsw_sp, mdb->addr, mdb->vid);
+       mid = __mlxsw_sp_mc_get(mlxsw_sp, mdb->addr, fid);
        if (!mid) {
                netdev_err(dev, "Unable to remove port from MC DB\n");
                return -EINVAL;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchib.c b/drivers/net/ethernet/mellanox/mlxsw/switchib.c
new file mode 100644 (file)
index 0000000..74341fe
--- /dev/null
@@ -0,0 +1,605 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/switchib.c
+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016 Elad Raz <eladr@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/slab.h>
+#include <linux/device.h>
+#include <linux/skbuff.h>
+#include <linux/if_vlan.h>
+#include <net/switchdev.h>
+
+#include "pci.h"
+#include "core.h"
+#include "reg.h"
+#include "port.h"
+#include "trap.h"
+#include "txheader.h"
+#include "ib.h"
+
+static const char mlxsw_sib_driver_name[] = "mlxsw_switchib";
+static const char mlxsw_sib2_driver_name[] = "mlxsw_switchib2";
+
+struct mlxsw_sib_port;
+
+struct mlxsw_sib {
+       struct mlxsw_sib_port **ports;
+       struct mlxsw_core *core;
+       const struct mlxsw_bus_info *bus_info;
+};
+
+struct mlxsw_sib_port {
+       struct mlxsw_sib *mlxsw_sib;
+       u8 local_port;
+       struct {
+               u8 module;
+       } mapping;
+};
+
+/* tx_v1_hdr_version
+ * Tx header version.
+ * Must be set to 1.
+ */
+MLXSW_ITEM32(tx_v1, hdr, version, 0x00, 28, 4);
+
+/* tx_v1_hdr_ctl
+ * Packet control type.
+ * 0 - Ethernet control (e.g. EMADs, LACP)
+ * 1 - Ethernet data
+ */
+MLXSW_ITEM32(tx_v1, hdr, ctl, 0x00, 26, 2);
+
+/* tx_v1_hdr_proto
+ * Packet protocol type. Must be set to 1 (Ethernet).
+ */
+MLXSW_ITEM32(tx_v1, hdr, proto, 0x00, 21, 3);
+
+/* tx_v1_hdr_swid
+ * Switch partition ID. Must be set to 0.
+ */
+MLXSW_ITEM32(tx_v1, hdr, swid, 0x00, 12, 3);
+
+/* tx_v1_hdr_control_tclass
+ * Indicates if the packet should use the control TClass and not one
+ * of the data TClasses.
+ */
+MLXSW_ITEM32(tx_v1, hdr, control_tclass, 0x00, 6, 1);
+
+/* tx_v1_hdr_port_mid
+ * Destination local port for unicast packets.
+ * Destination multicast ID for multicast packets.
+ *
+ * Control packets are directed to a specific egress port, while data
+ * packets are transmitted through the CPU port (0) into the switch partition,
+ * where forwarding rules are applied.
+ */
+MLXSW_ITEM32(tx_v1, hdr, port_mid, 0x04, 16, 16);
+
+/* tx_v1_hdr_type
+ * 0 - Data packets
+ * 6 - Control packets
+ */
+MLXSW_ITEM32(tx_v1, hdr, type, 0x0C, 0, 4);
+
+static void
+mlxsw_sib_tx_v1_hdr_construct(struct sk_buff *skb,
+                             const struct mlxsw_tx_info *tx_info)
+{
+       char *txhdr = skb_push(skb, MLXSW_TXHDR_LEN);
+
+       memset(txhdr, 0, MLXSW_TXHDR_LEN);
+
+       mlxsw_tx_v1_hdr_version_set(txhdr, MLXSW_TXHDR_VERSION_1);
+       mlxsw_tx_v1_hdr_ctl_set(txhdr, MLXSW_TXHDR_ETH_CTL);
+       mlxsw_tx_v1_hdr_proto_set(txhdr, MLXSW_TXHDR_PROTO_ETH);
+       mlxsw_tx_v1_hdr_swid_set(txhdr, 0);
+       mlxsw_tx_v1_hdr_control_tclass_set(txhdr, 1);
+       mlxsw_tx_v1_hdr_port_mid_set(txhdr, tx_info->local_port);
+       mlxsw_tx_v1_hdr_type_set(txhdr, MLXSW_TXHDR_TYPE_CONTROL);
+}
+
+static int
+mlxsw_sib_port_admin_status_set(struct mlxsw_sib_port *mlxsw_sib_port,
+                               bool is_up)
+{
+       struct mlxsw_sib *mlxsw_sib = mlxsw_sib_port->mlxsw_sib;
+       char paos_pl[MLXSW_REG_PAOS_LEN];
+
+       mlxsw_reg_paos_pack(paos_pl, mlxsw_sib_port->local_port,
+                           is_up ? MLXSW_PORT_ADMIN_STATUS_UP :
+                           MLXSW_PORT_ADMIN_STATUS_DOWN);
+       return mlxsw_reg_write(mlxsw_sib->core, MLXSW_REG(paos), paos_pl);
+}
+
+static int mlxsw_sib_port_mtu_set(struct mlxsw_sib_port *mlxsw_sib_port,
+                                 u16 mtu)
+{
+       struct mlxsw_sib *mlxsw_sib = mlxsw_sib_port->mlxsw_sib;
+       char pmtu_pl[MLXSW_REG_PMTU_LEN];
+       int max_mtu;
+       int err;
+
+       mlxsw_reg_pmtu_pack(pmtu_pl, mlxsw_sib_port->local_port, 0);
+       err = mlxsw_reg_query(mlxsw_sib->core, MLXSW_REG(pmtu), pmtu_pl);
+       if (err)
+               return err;
+       max_mtu = mlxsw_reg_pmtu_max_mtu_get(pmtu_pl);
+
+       if (mtu > max_mtu)
+               return -EINVAL;
+
+       mlxsw_reg_pmtu_pack(pmtu_pl, mlxsw_sib_port->local_port, mtu);
+       return mlxsw_reg_write(mlxsw_sib->core, MLXSW_REG(pmtu), pmtu_pl);
+}
+
+static int mlxsw_sib_port_set(struct mlxsw_sib_port *mlxsw_sib_port, u8 port)
+{
+       struct mlxsw_sib *mlxsw_sib = mlxsw_sib_port->mlxsw_sib;
+       char plib_pl[MLXSW_REG_PLIB_LEN] = {0};
+       int err;
+
+       mlxsw_reg_plib_local_port_set(plib_pl, mlxsw_sib_port->local_port);
+       mlxsw_reg_plib_ib_port_set(plib_pl, port);
+       err = mlxsw_reg_write(mlxsw_sib->core, MLXSW_REG(plib), plib_pl);
+       return err;
+}
+
+static int mlxsw_sib_port_swid_set(struct mlxsw_sib_port *mlxsw_sib_port,
+                                  u8 swid)
+{
+       struct mlxsw_sib *mlxsw_sib = mlxsw_sib_port->mlxsw_sib;
+       char pspa_pl[MLXSW_REG_PSPA_LEN];
+
+       mlxsw_reg_pspa_pack(pspa_pl, swid, mlxsw_sib_port->local_port);
+       return mlxsw_reg_write(mlxsw_sib->core, MLXSW_REG(pspa), pspa_pl);
+}
+
+static int mlxsw_sib_port_module_info_get(struct mlxsw_sib *mlxsw_sib,
+                                         u8 local_port, u8 *p_module,
+                                         u8 *p_width)
+{
+       char pmlp_pl[MLXSW_REG_PMLP_LEN];
+       int err;
+
+       mlxsw_reg_pmlp_pack(pmlp_pl, local_port);
+       err = mlxsw_reg_query(mlxsw_sib->core, MLXSW_REG(pmlp), pmlp_pl);
+       if (err)
+               return err;
+       *p_module = mlxsw_reg_pmlp_module_get(pmlp_pl, 0);
+       *p_width = mlxsw_reg_pmlp_width_get(pmlp_pl);
+       return 0;
+}
+
+static int mlxsw_sib_port_speed_set(struct mlxsw_sib_port *mlxsw_sib_port,
+                                   u16 speed, u16 width)
+{
+       struct mlxsw_sib *mlxsw_sib = mlxsw_sib_port->mlxsw_sib;
+       char ptys_pl[MLXSW_REG_PTYS_LEN];
+
+       mlxsw_reg_ptys_ib_pack(ptys_pl, mlxsw_sib_port->local_port, speed,
+                              width);
+       return mlxsw_reg_write(mlxsw_sib->core, MLXSW_REG(ptys), ptys_pl);
+}
+
+static bool mlxsw_sib_port_created(struct mlxsw_sib *mlxsw_sib, u8 local_port)
+{
+       return mlxsw_sib->ports[local_port] != NULL;
+}
+
+static int __mlxsw_sib_port_create(struct mlxsw_sib *mlxsw_sib, u8 local_port,
+                                  u8 module, u8 width)
+{
+       struct mlxsw_sib_port *mlxsw_sib_port;
+       int err;
+
+       mlxsw_sib_port = kzalloc(sizeof(*mlxsw_sib_port), GFP_KERNEL);
+       if (!mlxsw_sib_port)
+               return -ENOMEM;
+       mlxsw_sib_port->mlxsw_sib = mlxsw_sib;
+       mlxsw_sib_port->local_port = local_port;
+       mlxsw_sib_port->mapping.module = module;
+
+       err = mlxsw_sib_port_swid_set(mlxsw_sib_port, 0);
+       if (err) {
+               dev_err(mlxsw_sib->bus_info->dev, "Port %d: Failed to set SWID\n",
+                       mlxsw_sib_port->local_port);
+               goto err_port_swid_set;
+       }
+
+       /* Expose the IB port number as it's front panel name */
+       err = mlxsw_sib_port_set(mlxsw_sib_port, module + 1);
+       if (err) {
+               dev_err(mlxsw_sib->bus_info->dev, "Port %d: Failed to set IB port\n",
+                       mlxsw_sib_port->local_port);
+               goto err_port_ib_set;
+       }
+
+       /* Supports all speeds from SDR to FDR (bitmask) and support bus width
+        * of 1x, 2x and 4x (3 bits bitmask)
+        */
+       err = mlxsw_sib_port_speed_set(mlxsw_sib_port,
+                                      MLXSW_REG_PTYS_IB_SPEED_EDR - 1,
+                                      BIT(3) - 1);
+       if (err) {
+               dev_err(mlxsw_sib->bus_info->dev, "Port %d: Failed to set speed\n",
+                       mlxsw_sib_port->local_port);
+               goto err_port_speed_set;
+       }
+
+       /* Change to the maximum MTU the device supports, the SMA will take
+        * care of the active MTU
+        */
+       err = mlxsw_sib_port_mtu_set(mlxsw_sib_port, MLXSW_IB_DEFAULT_MTU);
+       if (err) {
+               dev_err(mlxsw_sib->bus_info->dev, "Port %d: Failed to set MTU\n",
+                       mlxsw_sib_port->local_port);
+               goto err_port_mtu_set;
+       }
+
+       err = mlxsw_sib_port_admin_status_set(mlxsw_sib_port, true);
+       if (err) {
+               dev_err(mlxsw_sib->bus_info->dev, "Port %d: Failed to change admin state to UP\n",
+                       mlxsw_sib_port->local_port);
+               goto err_port_admin_set;
+       }
+
+       mlxsw_core_port_ib_set(mlxsw_sib->core, mlxsw_sib_port->local_port,
+                              mlxsw_sib_port);
+       mlxsw_sib->ports[local_port] = mlxsw_sib_port;
+       return 0;
+
+err_port_admin_set:
+err_port_mtu_set:
+err_port_speed_set:
+err_port_ib_set:
+       mlxsw_sib_port_swid_set(mlxsw_sib_port, MLXSW_PORT_SWID_DISABLED_PORT);
+err_port_swid_set:
+       kfree(mlxsw_sib_port);
+       return err;
+}
+
+static int mlxsw_sib_port_create(struct mlxsw_sib *mlxsw_sib, u8 local_port,
+                                u8 module, u8 width)
+{
+       int err;
+
+       err = mlxsw_core_port_init(mlxsw_sib->core, local_port);
+       if (err) {
+               dev_err(mlxsw_sib->bus_info->dev, "Port %d: Failed to init core port\n",
+                       local_port);
+               return err;
+       }
+       err = __mlxsw_sib_port_create(mlxsw_sib, local_port, module, width);
+       if (err)
+               goto err_port_create;
+
+       return 0;
+
+err_port_create:
+       mlxsw_core_port_fini(mlxsw_sib->core, local_port);
+       return err;
+}
+
+static void __mlxsw_sib_port_remove(struct mlxsw_sib *mlxsw_sib, u8 local_port)
+{
+       struct mlxsw_sib_port *mlxsw_sib_port = mlxsw_sib->ports[local_port];
+
+       mlxsw_core_port_clear(mlxsw_sib->core, local_port, mlxsw_sib);
+       mlxsw_sib->ports[local_port] = NULL;
+       mlxsw_sib_port_admin_status_set(mlxsw_sib_port, false);
+       mlxsw_sib_port_swid_set(mlxsw_sib_port, MLXSW_PORT_SWID_DISABLED_PORT);
+       kfree(mlxsw_sib_port);
+}
+
+static void mlxsw_sib_port_remove(struct mlxsw_sib *mlxsw_sib, u8 local_port)
+{
+       __mlxsw_sib_port_remove(mlxsw_sib, local_port);
+       mlxsw_core_port_fini(mlxsw_sib->core, local_port);
+}
+
+static void mlxsw_sib_ports_remove(struct mlxsw_sib *mlxsw_sib)
+{
+       int i;
+
+       for (i = 1; i < MLXSW_PORT_MAX_IB_PORTS; i++)
+               if (mlxsw_sib_port_created(mlxsw_sib, i))
+                       mlxsw_sib_port_remove(mlxsw_sib, i);
+       kfree(mlxsw_sib->ports);
+}
+
+static int mlxsw_sib_ports_create(struct mlxsw_sib *mlxsw_sib)
+{
+       size_t alloc_size;
+       u8 module, width;
+       int i;
+       int err;
+
+       alloc_size = sizeof(struct mlxsw_sib_port *) * MLXSW_PORT_MAX_IB_PORTS;
+       mlxsw_sib->ports = kzalloc(alloc_size, GFP_KERNEL);
+       if (!mlxsw_sib->ports)
+               return -ENOMEM;
+
+       for (i = 1; i < MLXSW_PORT_MAX_IB_PORTS; i++) {
+               err = mlxsw_sib_port_module_info_get(mlxsw_sib, i, &module,
+                                                    &width);
+               if (err)
+                       goto err_port_module_info_get;
+               if (!width)
+                       continue;
+               err = mlxsw_sib_port_create(mlxsw_sib, i, module, width);
+               if (err)
+                       goto err_port_create;
+       }
+       return 0;
+
+err_port_create:
+err_port_module_info_get:
+       for (i--; i >= 1; i--)
+               if (mlxsw_sib_port_created(mlxsw_sib, i))
+                       mlxsw_sib_port_remove(mlxsw_sib, i);
+       kfree(mlxsw_sib->ports);
+       return err;
+}
+
+static void
+mlxsw_sib_pude_ib_event_func(struct mlxsw_sib_port *mlxsw_sib_port,
+                            enum mlxsw_reg_pude_oper_status status)
+{
+       if (status == MLXSW_PORT_OPER_STATUS_UP)
+               pr_info("ib link for port %d - up\n",
+                       mlxsw_sib_port->mapping.module + 1);
+       else
+               pr_info("ib link for port %d - down\n",
+                       mlxsw_sib_port->mapping.module + 1);
+}
+
+static void mlxsw_sib_pude_event_func(const struct mlxsw_reg_info *reg,
+                                     char *pude_pl, void *priv)
+{
+       struct mlxsw_sib *mlxsw_sib = priv;
+       struct mlxsw_sib_port *mlxsw_sib_port;
+       enum mlxsw_reg_pude_oper_status status;
+       u8 local_port;
+
+       local_port = mlxsw_reg_pude_local_port_get(pude_pl);
+       mlxsw_sib_port = mlxsw_sib->ports[local_port];
+       if (!mlxsw_sib_port) {
+               dev_warn(mlxsw_sib->bus_info->dev, "Port %d: Link event received for non-existent port\n",
+                        local_port);
+               return;
+       }
+
+       status = mlxsw_reg_pude_oper_status_get(pude_pl);
+       mlxsw_sib_pude_ib_event_func(mlxsw_sib_port, status);
+}
+
+static const struct mlxsw_listener mlxsw_sib_listener[] = {
+       MLXSW_EVENTL(mlxsw_sib_pude_event_func, PUDE, EMAD),
+};
+
+static int mlxsw_sib_taps_init(struct mlxsw_sib *mlxsw_sib)
+{
+       int i;
+       int err;
+
+       for (i = 0; i < ARRAY_SIZE(mlxsw_sib_listener); i++) {
+               err = mlxsw_core_trap_register(mlxsw_sib->core,
+                                              &mlxsw_sib_listener[i],
+                                              mlxsw_sib);
+               if (err)
+                       goto err_rx_listener_register;
+       }
+
+       return 0;
+
+err_rx_listener_register:
+       for (i--; i >= 0; i--) {
+               mlxsw_core_trap_unregister(mlxsw_sib->core,
+                                          &mlxsw_sib_listener[i],
+                                          mlxsw_sib);
+       }
+
+       return err;
+}
+
+static void mlxsw_sib_traps_fini(struct mlxsw_sib *mlxsw_sib)
+{
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(mlxsw_sib_listener); i++) {
+               mlxsw_core_trap_unregister(mlxsw_sib->core,
+                                          &mlxsw_sib_listener[i], mlxsw_sib);
+       }
+}
+
+static int mlxsw_sib_basic_trap_groups_set(struct mlxsw_core *mlxsw_core)
+{
+       char htgt_pl[MLXSW_REG_HTGT_LEN];
+
+       mlxsw_reg_htgt_pack(htgt_pl, MLXSW_REG_HTGT_TRAP_GROUP_EMAD,
+                           MLXSW_REG_HTGT_INVALID_POLICER,
+                           MLXSW_REG_HTGT_DEFAULT_PRIORITY,
+                           MLXSW_REG_HTGT_DEFAULT_TC);
+       mlxsw_reg_htgt_swid_set(htgt_pl, MLXSW_PORT_SWID_ALL_SWIDS);
+       mlxsw_reg_htgt_local_path_rdq_set(htgt_pl,
+                                       MLXSW_REG_HTGT_LOCAL_PATH_RDQ_SIB_EMAD);
+       return mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl);
+}
+
+static int mlxsw_sib_init(struct mlxsw_core *mlxsw_core,
+                         const struct mlxsw_bus_info *mlxsw_bus_info)
+{
+       struct mlxsw_sib *mlxsw_sib = mlxsw_core_driver_priv(mlxsw_core);
+       int err;
+
+       mlxsw_sib->core = mlxsw_core;
+       mlxsw_sib->bus_info = mlxsw_bus_info;
+
+       err = mlxsw_sib_ports_create(mlxsw_sib);
+       if (err) {
+               dev_err(mlxsw_sib->bus_info->dev, "Failed to create ports\n");
+               return err;
+       }
+
+       err = mlxsw_sib_taps_init(mlxsw_sib);
+       if (err) {
+               dev_err(mlxsw_sib->bus_info->dev, "Failed to set traps\n");
+               goto err_traps_init_err;
+       }
+
+       return 0;
+
+err_traps_init_err:
+       mlxsw_sib_ports_remove(mlxsw_sib);
+       return err;
+}
+
+static void mlxsw_sib_fini(struct mlxsw_core *mlxsw_core)
+{
+       struct mlxsw_sib *mlxsw_sib = mlxsw_core_driver_priv(mlxsw_core);
+
+       mlxsw_sib_traps_fini(mlxsw_sib);
+       mlxsw_sib_ports_remove(mlxsw_sib);
+}
+
+static struct mlxsw_config_profile mlxsw_sib_config_profile = {
+       .used_max_system_port           = 1,
+       .max_system_port                = 48000,
+       .used_max_ib_mc                 = 1,
+       .max_ib_mc                      = 27,
+       .used_max_pkey                  = 1,
+       .max_pkey                       = 32,
+       .swid_config                    = {
+               {
+                       .used_type      = 1,
+                       .type           = MLXSW_PORT_SWID_TYPE_IB,
+               }
+       },
+       .resource_query_enable          = 0,
+};
+
+static struct mlxsw_driver mlxsw_sib_driver = {
+       .kind                   = mlxsw_sib_driver_name,
+       .priv_size              = sizeof(struct mlxsw_sib),
+       .init                   = mlxsw_sib_init,
+       .fini                   = mlxsw_sib_fini,
+       .basic_trap_groups_set  = mlxsw_sib_basic_trap_groups_set,
+       .txhdr_construct        = mlxsw_sib_tx_v1_hdr_construct,
+       .txhdr_len              = MLXSW_TXHDR_LEN,
+       .profile                = &mlxsw_sib_config_profile,
+};
+
+static struct mlxsw_driver mlxsw_sib2_driver = {
+       .kind                   = mlxsw_sib2_driver_name,
+       .priv_size              = sizeof(struct mlxsw_sib),
+       .init                   = mlxsw_sib_init,
+       .fini                   = mlxsw_sib_fini,
+       .basic_trap_groups_set  = mlxsw_sib_basic_trap_groups_set,
+       .txhdr_construct        = mlxsw_sib_tx_v1_hdr_construct,
+       .txhdr_len              = MLXSW_TXHDR_LEN,
+       .profile                = &mlxsw_sib_config_profile,
+};
+
+static const struct pci_device_id mlxsw_sib_pci_id_table[] = {
+       {PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_SWITCHIB), 0},
+       {0, },
+};
+
+static struct pci_driver mlxsw_sib_pci_driver = {
+       .name = mlxsw_sib_driver_name,
+       .id_table = mlxsw_sib_pci_id_table,
+};
+
+static const struct pci_device_id mlxsw_sib2_pci_id_table[] = {
+       {PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_SWITCHIB2), 0},
+       {0, },
+};
+
+static struct pci_driver mlxsw_sib2_pci_driver = {
+       .name = mlxsw_sib2_driver_name,
+       .id_table = mlxsw_sib2_pci_id_table,
+};
+
+static int __init mlxsw_sib_module_init(void)
+{
+       int err;
+
+       err = mlxsw_core_driver_register(&mlxsw_sib_driver);
+       if (err)
+               return err;
+
+       err = mlxsw_core_driver_register(&mlxsw_sib2_driver);
+       if (err)
+               goto err_sib2_driver_register;
+
+       err = mlxsw_pci_driver_register(&mlxsw_sib_pci_driver);
+       if (err)
+               goto err_sib_pci_driver_register;
+
+       err = mlxsw_pci_driver_register(&mlxsw_sib2_pci_driver);
+       if (err)
+               goto err_sib2_pci_driver_register;
+
+       return 0;
+
+err_sib2_pci_driver_register:
+       mlxsw_pci_driver_unregister(&mlxsw_sib_pci_driver);
+err_sib_pci_driver_register:
+       mlxsw_core_driver_unregister(&mlxsw_sib2_driver);
+err_sib2_driver_register:
+       mlxsw_core_driver_unregister(&mlxsw_sib_driver);
+       return err;
+}
+
+static void __exit mlxsw_sib_module_exit(void)
+{
+       mlxsw_pci_driver_unregister(&mlxsw_sib2_pci_driver);
+       mlxsw_pci_driver_unregister(&mlxsw_sib_pci_driver);
+       mlxsw_core_driver_unregister(&mlxsw_sib2_driver);
+       mlxsw_core_driver_unregister(&mlxsw_sib_driver);
+}
+
+module_init(mlxsw_sib_module_init);
+module_exit(mlxsw_sib_module_exit);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Elad Raz <eladr@@mellanox.com>");
+MODULE_DESCRIPTION("Mellanox SwitchIB and SwitchIB-2 driver");
+MODULE_ALIAS("mlxsw_switchib2");
+MODULE_DEVICE_TABLE(pci, mlxsw_sib_pci_id_table);
+MODULE_DEVICE_TABLE(pci, mlxsw_sib2_pci_id_table);
index 963618da81a9d59adffd6fc7b3ce960578b948dc..150ccf5192a9895d8e6dc1d41c3510c207d870e5 100644 (file)
@@ -3,7 +3,7 @@
  * Copyright (c) 2015 Mellanox Technologies. All rights reserved.
  * Copyright (c) 2015 Jiri Pirko <jiri@mellanox.com>
  * Copyright (c) 2015 Ido Schimmel <idosch@mellanox.com>
- * Copyright (c) 2015 Elad Raz <eladr@mellanox.com>
+ * Copyright (c) 2015-2016 Elad Raz <eladr@mellanox.com>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -37,6 +37,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/types.h>
+#include <linux/pci.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 #include <linux/slab.h>
 #include <linux/skbuff.h>
 #include <linux/if_vlan.h>
 #include <net/switchdev.h>
-#include <generated/utsrelease.h>
 
+#include "pci.h"
 #include "core.h"
 #include "reg.h"
 #include "port.h"
 #include "trap.h"
 #include "txheader.h"
+#include "ib.h"
 
 static const char mlxsw_sx_driver_name[] = "mlxsw_switchx2";
 static const char mlxsw_sx_driver_version[] = "1.0";
@@ -74,11 +76,13 @@ struct mlxsw_sx_port_pcpu_stats {
 };
 
 struct mlxsw_sx_port {
-       struct mlxsw_core_port core_port; /* must be first */
        struct net_device *dev;
        struct mlxsw_sx_port_pcpu_stats __percpu *pcpu_stats;
        struct mlxsw_sx *mlxsw_sx;
        u8 local_port;
+       struct {
+               u8 module;
+       } mapping;
 };
 
 /* tx_hdr_version
@@ -214,14 +218,14 @@ static int mlxsw_sx_port_oper_status_get(struct mlxsw_sx_port *mlxsw_sx_port,
        return 0;
 }
 
-static int mlxsw_sx_port_mtu_set(struct mlxsw_sx_port *mlxsw_sx_port, u16 mtu)
+static int __mlxsw_sx_port_mtu_set(struct mlxsw_sx_port *mlxsw_sx_port,
+                                  u16 mtu)
 {
        struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx;
        char pmtu_pl[MLXSW_REG_PMTU_LEN];
        int max_mtu;
        int err;
 
-       mtu += MLXSW_TXHDR_LEN + ETH_HLEN;
        mlxsw_reg_pmtu_pack(pmtu_pl, mlxsw_sx_port->local_port, 0);
        err = mlxsw_reg_query(mlxsw_sx->core, MLXSW_REG(pmtu), pmtu_pl);
        if (err)
@@ -235,6 +239,32 @@ static int mlxsw_sx_port_mtu_set(struct mlxsw_sx_port *mlxsw_sx_port, u16 mtu)
        return mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(pmtu), pmtu_pl);
 }
 
+static int mlxsw_sx_port_mtu_eth_set(struct mlxsw_sx_port *mlxsw_sx_port,
+                                    u16 mtu)
+{
+       mtu += MLXSW_TXHDR_LEN + ETH_HLEN;
+       return __mlxsw_sx_port_mtu_set(mlxsw_sx_port, mtu);
+}
+
+static int mlxsw_sx_port_mtu_ib_set(struct mlxsw_sx_port *mlxsw_sx_port,
+                                   u16 mtu)
+{
+       return __mlxsw_sx_port_mtu_set(mlxsw_sx_port, mtu);
+}
+
+static int mlxsw_sx_port_ib_port_set(struct mlxsw_sx_port *mlxsw_sx_port,
+                                    u8 ib_port)
+{
+       struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx;
+       char plib_pl[MLXSW_REG_PLIB_LEN] = {0};
+       int err;
+
+       mlxsw_reg_plib_local_port_set(plib_pl, mlxsw_sx_port->local_port);
+       mlxsw_reg_plib_ib_port_set(plib_pl, ib_port);
+       err = mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(plib), plib_pl);
+       return err;
+}
+
 static int mlxsw_sx_port_swid_set(struct mlxsw_sx_port *mlxsw_sx_port, u8 swid)
 {
        struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx;
@@ -254,18 +284,19 @@ mlxsw_sx_port_system_port_mapping_set(struct mlxsw_sx_port *mlxsw_sx_port)
        return mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(sspr), sspr_pl);
 }
 
-static int mlxsw_sx_port_module_check(struct mlxsw_sx_port *mlxsw_sx_port,
-                                     bool *p_usable)
+static int mlxsw_sx_port_module_info_get(struct mlxsw_sx *mlxsw_sx,
+                                        u8 local_port, u8 *p_module,
+                                        u8 *p_width)
 {
-       struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx;
        char pmlp_pl[MLXSW_REG_PMLP_LEN];
        int err;
 
-       mlxsw_reg_pmlp_pack(pmlp_pl, mlxsw_sx_port->local_port);
+       mlxsw_reg_pmlp_pack(pmlp_pl, local_port);
        err = mlxsw_reg_query(mlxsw_sx->core, MLXSW_REG(pmlp), pmlp_pl);
        if (err)
                return err;
-       *p_usable = mlxsw_reg_pmlp_width_get(pmlp_pl) ? true : false;
+       *p_module = mlxsw_reg_pmlp_module_get(pmlp_pl, 0);
+       *p_width = mlxsw_reg_pmlp_width_get(pmlp_pl);
        return 0;
 }
 
@@ -343,7 +374,7 @@ static int mlxsw_sx_port_change_mtu(struct net_device *dev, int mtu)
        struct mlxsw_sx_port *mlxsw_sx_port = netdev_priv(dev);
        int err;
 
-       err = mlxsw_sx_port_mtu_set(mlxsw_sx_port, mtu);
+       err = mlxsw_sx_port_mtu_eth_set(mlxsw_sx_port, mtu);
        if (err)
                return err;
        dev->mtu = mtu;
@@ -382,12 +413,26 @@ mlxsw_sx_port_get_stats64(struct net_device *dev,
        return stats;
 }
 
+static int mlxsw_sx_port_get_phys_port_name(struct net_device *dev, char *name,
+                                           size_t len)
+{
+       struct mlxsw_sx_port *mlxsw_sx_port = netdev_priv(dev);
+       int err;
+
+       err = snprintf(name, len, "p%d", mlxsw_sx_port->mapping.module + 1);
+       if (err >= len)
+               return -EINVAL;
+
+       return 0;
+}
+
 static const struct net_device_ops mlxsw_sx_port_netdev_ops = {
        .ndo_open               = mlxsw_sx_port_open,
        .ndo_stop               = mlxsw_sx_port_stop,
        .ndo_start_xmit         = mlxsw_sx_port_xmit,
        .ndo_change_mtu         = mlxsw_sx_port_change_mtu,
        .ndo_get_stats64        = mlxsw_sx_port_get_stats64,
+       .ndo_get_phys_port_name = mlxsw_sx_port_get_phys_port_name,
 };
 
 static void mlxsw_sx_port_get_drvinfo(struct net_device *dev,
@@ -642,6 +687,7 @@ static const struct mlxsw_sx_port_link_mode mlxsw_sx_port_link_mode[] = {
 };
 
 #define MLXSW_SX_PORT_LINK_MODE_LEN ARRAY_SIZE(mlxsw_sx_port_link_mode)
+#define MLXSW_SX_PORT_BASE_SPEED 10000 /* Mb/s */
 
 static u32 mlxsw_sx_from_ptys_supported_port(u32 ptys_eth_proto)
 {
@@ -741,14 +787,14 @@ static int mlxsw_sx_port_get_settings(struct net_device *dev,
        u32 eth_proto_oper;
        int err;
 
-       mlxsw_reg_ptys_pack(ptys_pl, mlxsw_sx_port->local_port, 0);
+       mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sx_port->local_port, 0);
        err = mlxsw_reg_query(mlxsw_sx->core, MLXSW_REG(ptys), ptys_pl);
        if (err) {
                netdev_err(dev, "Failed to get proto");
                return err;
        }
-       mlxsw_reg_ptys_unpack(ptys_pl, &eth_proto_cap,
-                             &eth_proto_admin, &eth_proto_oper);
+       mlxsw_reg_ptys_eth_unpack(ptys_pl, &eth_proto_cap,
+                                 &eth_proto_admin, &eth_proto_oper);
 
        cmd->supported = mlxsw_sx_from_ptys_supported_port(eth_proto_cap) |
                         mlxsw_sx_from_ptys_supported_link(eth_proto_cap) |
@@ -789,6 +835,18 @@ static u32 mlxsw_sx_to_ptys_speed(u32 speed)
        return ptys_proto;
 }
 
+static u32 mlxsw_sx_to_ptys_upper_speed(u32 upper_speed)
+{
+       u32 ptys_proto = 0;
+       int i;
+
+       for (i = 0; i < MLXSW_SX_PORT_LINK_MODE_LEN; i++) {
+               if (mlxsw_sx_port_link_mode[i].speed <= upper_speed)
+                       ptys_proto |= mlxsw_sx_port_link_mode[i].mask;
+       }
+       return ptys_proto;
+}
+
 static int mlxsw_sx_port_set_settings(struct net_device *dev,
                                      struct ethtool_cmd *cmd)
 {
@@ -808,13 +866,14 @@ static int mlxsw_sx_port_set_settings(struct net_device *dev,
                mlxsw_sx_to_ptys_advert_link(cmd->advertising) :
                mlxsw_sx_to_ptys_speed(speed);
 
-       mlxsw_reg_ptys_pack(ptys_pl, mlxsw_sx_port->local_port, 0);
+       mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sx_port->local_port, 0);
        err = mlxsw_reg_query(mlxsw_sx->core, MLXSW_REG(ptys), ptys_pl);
        if (err) {
                netdev_err(dev, "Failed to get proto");
                return err;
        }
-       mlxsw_reg_ptys_unpack(ptys_pl, &eth_proto_cap, &eth_proto_admin, NULL);
+       mlxsw_reg_ptys_eth_unpack(ptys_pl, &eth_proto_cap, &eth_proto_admin,
+                                 NULL);
 
        eth_proto_new = eth_proto_new & eth_proto_cap;
        if (!eth_proto_new) {
@@ -824,7 +883,8 @@ static int mlxsw_sx_port_set_settings(struct net_device *dev,
        if (eth_proto_new == eth_proto_admin)
                return 0;
 
-       mlxsw_reg_ptys_pack(ptys_pl, mlxsw_sx_port->local_port, eth_proto_new);
+       mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sx_port->local_port,
+                               eth_proto_new);
        err = mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(ptys), ptys_pl);
        if (err) {
                netdev_err(dev, "Failed to set proto admin");
@@ -888,7 +948,7 @@ static const struct switchdev_ops mlxsw_sx_port_switchdev_ops = {
 
 static int mlxsw_sx_hw_id_get(struct mlxsw_sx *mlxsw_sx)
 {
-       char spad_pl[MLXSW_REG_SPAD_LEN];
+       char spad_pl[MLXSW_REG_SPAD_LEN] = {0};
        int err;
 
        err = mlxsw_reg_query(mlxsw_sx->core, MLXSW_REG(spad), spad_pl);
@@ -935,13 +995,28 @@ static int mlxsw_sx_port_stp_state_set(struct mlxsw_sx_port *mlxsw_sx_port,
        return err;
 }
 
-static int mlxsw_sx_port_speed_set(struct mlxsw_sx_port *mlxsw_sx_port,
-                                  u32 speed)
+static int mlxsw_sx_port_ib_speed_set(struct mlxsw_sx_port *mlxsw_sx_port,
+                                     u16 speed, u16 width)
 {
        struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx;
        char ptys_pl[MLXSW_REG_PTYS_LEN];
 
-       mlxsw_reg_ptys_pack(ptys_pl, mlxsw_sx_port->local_port, speed);
+       mlxsw_reg_ptys_ib_pack(ptys_pl, mlxsw_sx_port->local_port, speed,
+                              width);
+       return mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(ptys), ptys_pl);
+}
+
+static int
+mlxsw_sx_port_speed_by_width_set(struct mlxsw_sx_port *mlxsw_sx_port, u8 width)
+{
+       struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx;
+       u32 upper_speed = MLXSW_SX_PORT_BASE_SPEED * width;
+       char ptys_pl[MLXSW_REG_PTYS_LEN];
+       u32 eth_proto_admin;
+
+       eth_proto_admin = mlxsw_sx_to_ptys_upper_speed(upper_speed);
+       mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sx_port->local_port,
+                               eth_proto_admin);
        return mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(ptys), ptys_pl);
 }
 
@@ -956,20 +1031,22 @@ mlxsw_sx_port_mac_learning_mode_set(struct mlxsw_sx_port *mlxsw_sx_port,
        return mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(spmlr), spmlr_pl);
 }
 
-static int mlxsw_sx_port_create(struct mlxsw_sx *mlxsw_sx, u8 local_port)
+static int __mlxsw_sx_port_eth_create(struct mlxsw_sx *mlxsw_sx, u8 local_port,
+                                     u8 module, u8 width)
 {
        struct mlxsw_sx_port *mlxsw_sx_port;
        struct net_device *dev;
-       bool usable;
        int err;
 
        dev = alloc_etherdev(sizeof(struct mlxsw_sx_port));
        if (!dev)
                return -ENOMEM;
+       SET_NETDEV_DEV(dev, mlxsw_sx->bus_info->dev);
        mlxsw_sx_port = netdev_priv(dev);
        mlxsw_sx_port->dev = dev;
        mlxsw_sx_port->mlxsw_sx = mlxsw_sx;
        mlxsw_sx_port->local_port = local_port;
+       mlxsw_sx_port->mapping.module = module;
 
        mlxsw_sx_port->pcpu_stats =
                netdev_alloc_pcpu_stats(struct mlxsw_sx_port_pcpu_stats);
@@ -1002,19 +1079,6 @@ static int mlxsw_sx_port_create(struct mlxsw_sx *mlxsw_sx, u8 local_port)
         */
        dev->needed_headroom = MLXSW_TXHDR_LEN;
 
-       err = mlxsw_sx_port_module_check(mlxsw_sx_port, &usable);
-       if (err) {
-               dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to check module\n",
-                       mlxsw_sx_port->local_port);
-               goto err_port_module_check;
-       }
-
-       if (!usable) {
-               dev_dbg(mlxsw_sx->bus_info->dev, "Port %d: Not usable, skipping initialization\n",
-                       mlxsw_sx_port->local_port);
-               goto port_not_usable;
-       }
-
        err = mlxsw_sx_port_system_port_mapping_set(mlxsw_sx_port);
        if (err) {
                dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to set system port mapping\n",
@@ -1029,15 +1093,14 @@ static int mlxsw_sx_port_create(struct mlxsw_sx *mlxsw_sx, u8 local_port)
                goto err_port_swid_set;
        }
 
-       err = mlxsw_sx_port_speed_set(mlxsw_sx_port,
-                                     MLXSW_REG_PTYS_ETH_SPEED_40GBASE_CR4);
+       err = mlxsw_sx_port_speed_by_width_set(mlxsw_sx_port, width);
        if (err) {
                dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to set speed\n",
                        mlxsw_sx_port->local_port);
                goto err_port_speed_set;
        }
 
-       err = mlxsw_sx_port_mtu_set(mlxsw_sx_port, ETH_DATA_LEN);
+       err = mlxsw_sx_port_mtu_eth_set(mlxsw_sx_port, ETH_DATA_LEN);
        if (err) {
                dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to set MTU\n",
                        mlxsw_sx_port->local_port);
@@ -1072,29 +1135,20 @@ static int mlxsw_sx_port_create(struct mlxsw_sx *mlxsw_sx, u8 local_port)
                goto err_register_netdev;
        }
 
-       err = mlxsw_core_port_init(mlxsw_sx->core, &mlxsw_sx_port->core_port,
-                                  mlxsw_sx_port->local_port, dev, false, 0);
-       if (err) {
-               dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to init core port\n",
-                       mlxsw_sx_port->local_port);
-               goto err_core_port_init;
-       }
-
+       mlxsw_core_port_eth_set(mlxsw_sx->core, mlxsw_sx_port->local_port,
+                               mlxsw_sx_port, dev, false, 0);
        mlxsw_sx->ports[local_port] = mlxsw_sx_port;
        return 0;
 
-err_core_port_init:
-       unregister_netdev(dev);
 err_register_netdev:
 err_port_mac_learning_mode_set:
 err_port_stp_state_set:
 err_port_admin_status_set:
 err_port_mtu_set:
 err_port_speed_set:
+       mlxsw_sx_port_swid_set(mlxsw_sx_port, MLXSW_PORT_SWID_DISABLED_PORT);
 err_port_swid_set:
 err_port_system_port_mapping_set:
-port_not_usable:
-err_port_module_check:
 err_dev_addr_get:
        free_percpu(mlxsw_sx_port->pcpu_stats);
 err_alloc_stats:
@@ -1102,31 +1156,168 @@ err_alloc_stats:
        return err;
 }
 
-static void mlxsw_sx_port_remove(struct mlxsw_sx *mlxsw_sx, u8 local_port)
+static int mlxsw_sx_port_eth_create(struct mlxsw_sx *mlxsw_sx, u8 local_port,
+                                   u8 module, u8 width)
+{
+       int err;
+
+       err = mlxsw_core_port_init(mlxsw_sx->core, local_port);
+       if (err) {
+               dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to init core port\n",
+                       local_port);
+               return err;
+       }
+       err = __mlxsw_sx_port_eth_create(mlxsw_sx, local_port, module, width);
+       if (err)
+               goto err_port_create;
+
+       return 0;
+
+err_port_create:
+       mlxsw_core_port_fini(mlxsw_sx->core, local_port);
+       return err;
+}
+
+static void __mlxsw_sx_port_eth_remove(struct mlxsw_sx *mlxsw_sx, u8 local_port)
 {
        struct mlxsw_sx_port *mlxsw_sx_port = mlxsw_sx->ports[local_port];
 
-       if (!mlxsw_sx_port)
-               return;
-       mlxsw_core_port_fini(&mlxsw_sx_port->core_port);
+       mlxsw_core_port_clear(mlxsw_sx->core, local_port, mlxsw_sx);
        unregister_netdev(mlxsw_sx_port->dev); /* This calls ndo_stop */
+       mlxsw_sx->ports[local_port] = NULL;
        mlxsw_sx_port_swid_set(mlxsw_sx_port, MLXSW_PORT_SWID_DISABLED_PORT);
        free_percpu(mlxsw_sx_port->pcpu_stats);
        free_netdev(mlxsw_sx_port->dev);
 }
 
+static bool mlxsw_sx_port_created(struct mlxsw_sx *mlxsw_sx, u8 local_port)
+{
+       return mlxsw_sx->ports[local_port] != NULL;
+}
+
+static int __mlxsw_sx_port_ib_create(struct mlxsw_sx *mlxsw_sx, u8 local_port,
+                                    u8 module, u8 width)
+{
+       struct mlxsw_sx_port *mlxsw_sx_port;
+       int err;
+
+       mlxsw_sx_port = kzalloc(sizeof(*mlxsw_sx_port), GFP_KERNEL);
+       if (!mlxsw_sx_port)
+               return -ENOMEM;
+       mlxsw_sx_port->mlxsw_sx = mlxsw_sx;
+       mlxsw_sx_port->local_port = local_port;
+       mlxsw_sx_port->mapping.module = module;
+
+       err = mlxsw_sx_port_system_port_mapping_set(mlxsw_sx_port);
+       if (err) {
+               dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to set system port mapping\n",
+                       mlxsw_sx_port->local_port);
+               goto err_port_system_port_mapping_set;
+       }
+
+       /* Adding port to Infiniband swid (1) */
+       err = mlxsw_sx_port_swid_set(mlxsw_sx_port, 1);
+       if (err) {
+               dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to set SWID\n",
+                       mlxsw_sx_port->local_port);
+               goto err_port_swid_set;
+       }
+
+       /* Expose the IB port number as it's front panel name */
+       err = mlxsw_sx_port_ib_port_set(mlxsw_sx_port, module + 1);
+       if (err) {
+               dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to set IB port\n",
+                       mlxsw_sx_port->local_port);
+               goto err_port_ib_set;
+       }
+
+       /* Supports all speeds from SDR to FDR (bitmask) and support bus width
+        * of 1x, 2x and 4x (3 bits bitmask)
+        */
+       err = mlxsw_sx_port_ib_speed_set(mlxsw_sx_port,
+                                        MLXSW_REG_PTYS_IB_SPEED_EDR - 1,
+                                        BIT(3) - 1);
+       if (err) {
+               dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to set speed\n",
+                       mlxsw_sx_port->local_port);
+               goto err_port_speed_set;
+       }
+
+       /* Change to the maximum MTU the device supports, the SMA will take
+        * care of the active MTU
+        */
+       err = mlxsw_sx_port_mtu_ib_set(mlxsw_sx_port, MLXSW_IB_DEFAULT_MTU);
+       if (err) {
+               dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to set MTU\n",
+                       mlxsw_sx_port->local_port);
+               goto err_port_mtu_set;
+       }
+
+       err = mlxsw_sx_port_admin_status_set(mlxsw_sx_port, true);
+       if (err) {
+               dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to change admin state to UP\n",
+                       mlxsw_sx_port->local_port);
+               goto err_port_admin_set;
+       }
+
+       mlxsw_core_port_ib_set(mlxsw_sx->core, mlxsw_sx_port->local_port,
+                              mlxsw_sx_port);
+       mlxsw_sx->ports[local_port] = mlxsw_sx_port;
+       return 0;
+
+err_port_admin_set:
+err_port_mtu_set:
+err_port_speed_set:
+err_port_ib_set:
+       mlxsw_sx_port_swid_set(mlxsw_sx_port, MLXSW_PORT_SWID_DISABLED_PORT);
+err_port_swid_set:
+err_port_system_port_mapping_set:
+       kfree(mlxsw_sx_port);
+       return err;
+}
+
+static void __mlxsw_sx_port_ib_remove(struct mlxsw_sx *mlxsw_sx, u8 local_port)
+{
+       struct mlxsw_sx_port *mlxsw_sx_port = mlxsw_sx->ports[local_port];
+
+       mlxsw_core_port_clear(mlxsw_sx->core, local_port, mlxsw_sx);
+       mlxsw_sx->ports[local_port] = NULL;
+       mlxsw_sx_port_admin_status_set(mlxsw_sx_port, false);
+       mlxsw_sx_port_swid_set(mlxsw_sx_port, MLXSW_PORT_SWID_DISABLED_PORT);
+       kfree(mlxsw_sx_port);
+}
+
+static void __mlxsw_sx_port_remove(struct mlxsw_sx *mlxsw_sx, u8 local_port)
+{
+       enum devlink_port_type port_type =
+               mlxsw_core_port_type_get(mlxsw_sx->core, local_port);
+
+       if (port_type == DEVLINK_PORT_TYPE_ETH)
+               __mlxsw_sx_port_eth_remove(mlxsw_sx, local_port);
+       else if (port_type == DEVLINK_PORT_TYPE_IB)
+               __mlxsw_sx_port_ib_remove(mlxsw_sx, local_port);
+}
+
+static void mlxsw_sx_port_remove(struct mlxsw_sx *mlxsw_sx, u8 local_port)
+{
+       __mlxsw_sx_port_remove(mlxsw_sx, local_port);
+       mlxsw_core_port_fini(mlxsw_sx->core, local_port);
+}
+
 static void mlxsw_sx_ports_remove(struct mlxsw_sx *mlxsw_sx)
 {
        int i;
 
        for (i = 1; i < MLXSW_PORT_MAX_PORTS; i++)
-               mlxsw_sx_port_remove(mlxsw_sx, i);
+               if (mlxsw_sx_port_created(mlxsw_sx, i))
+                       mlxsw_sx_port_remove(mlxsw_sx, i);
        kfree(mlxsw_sx->ports);
 }
 
 static int mlxsw_sx_ports_create(struct mlxsw_sx *mlxsw_sx)
 {
        size_t alloc_size;
+       u8 module, width;
        int i;
        int err;
 
@@ -1136,25 +1327,57 @@ static int mlxsw_sx_ports_create(struct mlxsw_sx *mlxsw_sx)
                return -ENOMEM;
 
        for (i = 1; i < MLXSW_PORT_MAX_PORTS; i++) {
-               err = mlxsw_sx_port_create(mlxsw_sx, i);
+               err = mlxsw_sx_port_module_info_get(mlxsw_sx, i, &module,
+                                                   &width);
+               if (err)
+                       goto err_port_module_info_get;
+               if (!width)
+                       continue;
+               err = mlxsw_sx_port_eth_create(mlxsw_sx, i, module, width);
                if (err)
                        goto err_port_create;
        }
        return 0;
 
 err_port_create:
+err_port_module_info_get:
        for (i--; i >= 1; i--)
-               mlxsw_sx_port_remove(mlxsw_sx, i);
+               if (mlxsw_sx_port_created(mlxsw_sx, i))
+                       mlxsw_sx_port_remove(mlxsw_sx, i);
        kfree(mlxsw_sx->ports);
        return err;
 }
 
+static void mlxsw_sx_pude_eth_event_func(struct mlxsw_sx_port *mlxsw_sx_port,
+                                        enum mlxsw_reg_pude_oper_status status)
+{
+       if (status == MLXSW_PORT_OPER_STATUS_UP) {
+               netdev_info(mlxsw_sx_port->dev, "link up\n");
+               netif_carrier_on(mlxsw_sx_port->dev);
+       } else {
+               netdev_info(mlxsw_sx_port->dev, "link down\n");
+               netif_carrier_off(mlxsw_sx_port->dev);
+       }
+}
+
+static void mlxsw_sx_pude_ib_event_func(struct mlxsw_sx_port *mlxsw_sx_port,
+                                       enum mlxsw_reg_pude_oper_status status)
+{
+       if (status == MLXSW_PORT_OPER_STATUS_UP)
+               pr_info("ib link for port %d - up\n",
+                       mlxsw_sx_port->mapping.module + 1);
+       else
+               pr_info("ib link for port %d - down\n",
+                       mlxsw_sx_port->mapping.module + 1);
+}
+
 static void mlxsw_sx_pude_event_func(const struct mlxsw_reg_info *reg,
                                     char *pude_pl, void *priv)
 {
        struct mlxsw_sx *mlxsw_sx = priv;
        struct mlxsw_sx_port *mlxsw_sx_port;
        enum mlxsw_reg_pude_oper_status status;
+       enum devlink_port_type port_type;
        u8 local_port;
 
        local_port = mlxsw_reg_pude_local_port_get(pude_pl);
@@ -1166,59 +1389,11 @@ static void mlxsw_sx_pude_event_func(const struct mlxsw_reg_info *reg,
        }
 
        status = mlxsw_reg_pude_oper_status_get(pude_pl);
-       if (status == MLXSW_PORT_OPER_STATUS_UP) {
-               netdev_info(mlxsw_sx_port->dev, "link up\n");
-               netif_carrier_on(mlxsw_sx_port->dev);
-       } else {
-               netdev_info(mlxsw_sx_port->dev, "link down\n");
-               netif_carrier_off(mlxsw_sx_port->dev);
-       }
-}
-
-static struct mlxsw_event_listener mlxsw_sx_pude_event = {
-       .func = mlxsw_sx_pude_event_func,
-       .trap_id = MLXSW_TRAP_ID_PUDE,
-};
-
-static int mlxsw_sx_event_register(struct mlxsw_sx *mlxsw_sx,
-                                  enum mlxsw_event_trap_id trap_id)
-{
-       struct mlxsw_event_listener *el;
-       char hpkt_pl[MLXSW_REG_HPKT_LEN];
-       int err;
-
-       switch (trap_id) {
-       case MLXSW_TRAP_ID_PUDE:
-               el = &mlxsw_sx_pude_event;
-               break;
-       }
-       err = mlxsw_core_event_listener_register(mlxsw_sx->core, el, mlxsw_sx);
-       if (err)
-               return err;
-
-       mlxsw_reg_hpkt_pack(hpkt_pl, MLXSW_REG_HPKT_ACTION_FORWARD, trap_id);
-       err = mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(hpkt), hpkt_pl);
-       if (err)
-               goto err_event_trap_set;
-
-       return 0;
-
-err_event_trap_set:
-       mlxsw_core_event_listener_unregister(mlxsw_sx->core, el, mlxsw_sx);
-       return err;
-}
-
-static void mlxsw_sx_event_unregister(struct mlxsw_sx *mlxsw_sx,
-                                     enum mlxsw_event_trap_id trap_id)
-{
-       struct mlxsw_event_listener *el;
-
-       switch (trap_id) {
-       case MLXSW_TRAP_ID_PUDE:
-               el = &mlxsw_sx_pude_event;
-               break;
-       }
-       mlxsw_core_event_listener_unregister(mlxsw_sx->core, el, mlxsw_sx);
+       port_type = mlxsw_core_port_type_get(mlxsw_sx->core, local_port);
+       if (port_type == DEVLINK_PORT_TYPE_ETH)
+               mlxsw_sx_pude_eth_event_func(mlxsw_sx_port, status);
+       else if (port_type == DEVLINK_PORT_TYPE_IB)
+               mlxsw_sx_pude_ib_event_func(mlxsw_sx_port, status);
 }
 
 static void mlxsw_sx_rx_listener_func(struct sk_buff *skb, u8 local_port,
@@ -1246,142 +1421,110 @@ static void mlxsw_sx_rx_listener_func(struct sk_buff *skb, u8 local_port,
        netif_receive_skb(skb);
 }
 
-static const struct mlxsw_rx_listener mlxsw_sx_rx_listener[] = {
-       {
-               .func = mlxsw_sx_rx_listener_func,
-               .local_port = MLXSW_PORT_DONT_CARE,
-               .trap_id = MLXSW_TRAP_ID_FDB_MC,
-       },
-       /* Traps for specific L2 packet types, not trapped as FDB MC */
-       {
-               .func = mlxsw_sx_rx_listener_func,
-               .local_port = MLXSW_PORT_DONT_CARE,
-               .trap_id = MLXSW_TRAP_ID_STP,
-       },
-       {
-               .func = mlxsw_sx_rx_listener_func,
-               .local_port = MLXSW_PORT_DONT_CARE,
-               .trap_id = MLXSW_TRAP_ID_LACP,
-       },
-       {
-               .func = mlxsw_sx_rx_listener_func,
-               .local_port = MLXSW_PORT_DONT_CARE,
-               .trap_id = MLXSW_TRAP_ID_EAPOL,
-       },
-       {
-               .func = mlxsw_sx_rx_listener_func,
-               .local_port = MLXSW_PORT_DONT_CARE,
-               .trap_id = MLXSW_TRAP_ID_LLDP,
-       },
-       {
-               .func = mlxsw_sx_rx_listener_func,
-               .local_port = MLXSW_PORT_DONT_CARE,
-               .trap_id = MLXSW_TRAP_ID_MMRP,
-       },
-       {
-               .func = mlxsw_sx_rx_listener_func,
-               .local_port = MLXSW_PORT_DONT_CARE,
-               .trap_id = MLXSW_TRAP_ID_MVRP,
-       },
-       {
-               .func = mlxsw_sx_rx_listener_func,
-               .local_port = MLXSW_PORT_DONT_CARE,
-               .trap_id = MLXSW_TRAP_ID_RPVST,
-       },
-       {
-               .func = mlxsw_sx_rx_listener_func,
-               .local_port = MLXSW_PORT_DONT_CARE,
-               .trap_id = MLXSW_TRAP_ID_DHCP,
-       },
-       {
-               .func = mlxsw_sx_rx_listener_func,
-               .local_port = MLXSW_PORT_DONT_CARE,
-               .trap_id = MLXSW_TRAP_ID_IGMP_QUERY,
-       },
-       {
-               .func = mlxsw_sx_rx_listener_func,
-               .local_port = MLXSW_PORT_DONT_CARE,
-               .trap_id = MLXSW_TRAP_ID_IGMP_V1_REPORT,
-       },
-       {
-               .func = mlxsw_sx_rx_listener_func,
-               .local_port = MLXSW_PORT_DONT_CARE,
-               .trap_id = MLXSW_TRAP_ID_IGMP_V2_REPORT,
-       },
-       {
-               .func = mlxsw_sx_rx_listener_func,
-               .local_port = MLXSW_PORT_DONT_CARE,
-               .trap_id = MLXSW_TRAP_ID_IGMP_V2_LEAVE,
-       },
-       {
-               .func = mlxsw_sx_rx_listener_func,
-               .local_port = MLXSW_PORT_DONT_CARE,
-               .trap_id = MLXSW_TRAP_ID_IGMP_V3_REPORT,
-       },
+static int mlxsw_sx_port_type_set(struct mlxsw_core *mlxsw_core, u8 local_port,
+                                 enum devlink_port_type new_type)
+{
+       struct mlxsw_sx *mlxsw_sx = mlxsw_core_driver_priv(mlxsw_core);
+       u8 module, width;
+       int err;
+
+       if (new_type == DEVLINK_PORT_TYPE_AUTO)
+               return -EOPNOTSUPP;
+
+       __mlxsw_sx_port_remove(mlxsw_sx, local_port);
+       err = mlxsw_sx_port_module_info_get(mlxsw_sx, local_port, &module,
+                                           &width);
+       if (err)
+               goto err_port_module_info_get;
+
+       if (new_type == DEVLINK_PORT_TYPE_ETH)
+               err = __mlxsw_sx_port_eth_create(mlxsw_sx, local_port, module,
+                                                width);
+       else if (new_type == DEVLINK_PORT_TYPE_IB)
+               err = __mlxsw_sx_port_ib_create(mlxsw_sx, local_port, module,
+                                               width);
+
+err_port_module_info_get:
+       return err;
+}
+
+#define MLXSW_SX_RXL(_trap_id) \
+       MLXSW_RXL(mlxsw_sx_rx_listener_func, _trap_id, TRAP_TO_CPU,     \
+                 false, SX2_RX, FORWARD)
+
+static const struct mlxsw_listener mlxsw_sx_listener[] = {
+       MLXSW_EVENTL(mlxsw_sx_pude_event_func, PUDE, EMAD),
+       MLXSW_SX_RXL(FDB_MC),
+       MLXSW_SX_RXL(STP),
+       MLXSW_SX_RXL(LACP),
+       MLXSW_SX_RXL(EAPOL),
+       MLXSW_SX_RXL(LLDP),
+       MLXSW_SX_RXL(MMRP),
+       MLXSW_SX_RXL(MVRP),
+       MLXSW_SX_RXL(RPVST),
+       MLXSW_SX_RXL(DHCP),
+       MLXSW_SX_RXL(IGMP_QUERY),
+       MLXSW_SX_RXL(IGMP_V1_REPORT),
+       MLXSW_SX_RXL(IGMP_V2_REPORT),
+       MLXSW_SX_RXL(IGMP_V2_LEAVE),
+       MLXSW_SX_RXL(IGMP_V3_REPORT),
 };
 
 static int mlxsw_sx_traps_init(struct mlxsw_sx *mlxsw_sx)
 {
        char htgt_pl[MLXSW_REG_HTGT_LEN];
-       char hpkt_pl[MLXSW_REG_HPKT_LEN];
        int i;
        int err;
 
-       mlxsw_reg_htgt_pack(htgt_pl, MLXSW_REG_HTGT_TRAP_GROUP_RX);
+       mlxsw_reg_htgt_pack(htgt_pl, MLXSW_REG_HTGT_TRAP_GROUP_SX2_RX,
+                           MLXSW_REG_HTGT_INVALID_POLICER,
+                           MLXSW_REG_HTGT_DEFAULT_PRIORITY,
+                           MLXSW_REG_HTGT_DEFAULT_TC);
+       mlxsw_reg_htgt_local_path_rdq_set(htgt_pl,
+                                         MLXSW_REG_HTGT_LOCAL_PATH_RDQ_SX2_RX);
+
        err = mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(htgt), htgt_pl);
        if (err)
                return err;
 
-       mlxsw_reg_htgt_pack(htgt_pl, MLXSW_REG_HTGT_TRAP_GROUP_CTRL);
+       mlxsw_reg_htgt_pack(htgt_pl, MLXSW_REG_HTGT_TRAP_GROUP_SX2_CTRL,
+                           MLXSW_REG_HTGT_INVALID_POLICER,
+                           MLXSW_REG_HTGT_DEFAULT_PRIORITY,
+                           MLXSW_REG_HTGT_DEFAULT_TC);
+       mlxsw_reg_htgt_local_path_rdq_set(htgt_pl,
+                                       MLXSW_REG_HTGT_LOCAL_PATH_RDQ_SX2_CTRL);
+
        err = mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(htgt), htgt_pl);
        if (err)
                return err;
 
-       for (i = 0; i < ARRAY_SIZE(mlxsw_sx_rx_listener); i++) {
-               err = mlxsw_core_rx_listener_register(mlxsw_sx->core,
-                                                     &mlxsw_sx_rx_listener[i],
-                                                     mlxsw_sx);
+       for (i = 0; i < ARRAY_SIZE(mlxsw_sx_listener); i++) {
+               err = mlxsw_core_trap_register(mlxsw_sx->core,
+                                              &mlxsw_sx_listener[i],
+                                              mlxsw_sx);
                if (err)
-                       goto err_rx_listener_register;
+                       goto err_listener_register;
 
-               mlxsw_reg_hpkt_pack(hpkt_pl, MLXSW_REG_HPKT_ACTION_TRAP_TO_CPU,
-                                   mlxsw_sx_rx_listener[i].trap_id);
-               err = mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(hpkt), hpkt_pl);
-               if (err)
-                       goto err_rx_trap_set;
        }
        return 0;
 
-err_rx_trap_set:
-       mlxsw_core_rx_listener_unregister(mlxsw_sx->core,
-                                         &mlxsw_sx_rx_listener[i],
-                                         mlxsw_sx);
-err_rx_listener_register:
+err_listener_register:
        for (i--; i >= 0; i--) {
-               mlxsw_reg_hpkt_pack(hpkt_pl, MLXSW_REG_HPKT_ACTION_FORWARD,
-                                   mlxsw_sx_rx_listener[i].trap_id);
-               mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(hpkt), hpkt_pl);
-
-               mlxsw_core_rx_listener_unregister(mlxsw_sx->core,
-                                                 &mlxsw_sx_rx_listener[i],
-                                                 mlxsw_sx);
+               mlxsw_core_trap_unregister(mlxsw_sx->core,
+                                          &mlxsw_sx_listener[i],
+                                          mlxsw_sx);
        }
        return err;
 }
 
 static void mlxsw_sx_traps_fini(struct mlxsw_sx *mlxsw_sx)
 {
-       char hpkt_pl[MLXSW_REG_HPKT_LEN];
        int i;
 
-       for (i = 0; i < ARRAY_SIZE(mlxsw_sx_rx_listener); i++) {
-               mlxsw_reg_hpkt_pack(hpkt_pl, MLXSW_REG_HPKT_ACTION_FORWARD,
-                                   mlxsw_sx_rx_listener[i].trap_id);
-               mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(hpkt), hpkt_pl);
-
-               mlxsw_core_rx_listener_unregister(mlxsw_sx->core,
-                                                 &mlxsw_sx_rx_listener[i],
-                                                 mlxsw_sx);
+       for (i = 0; i < ARRAY_SIZE(mlxsw_sx_listener); i++) {
+               mlxsw_core_trap_unregister(mlxsw_sx->core,
+                                          &mlxsw_sx_listener[i],
+                                          mlxsw_sx);
        }
 }
 
@@ -1453,6 +1596,20 @@ static int mlxsw_sx_flood_init(struct mlxsw_sx *mlxsw_sx)
        return mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(sgcr), sgcr_pl);
 }
 
+static int mlxsw_sx_basic_trap_groups_set(struct mlxsw_core *mlxsw_core)
+{
+       char htgt_pl[MLXSW_REG_HTGT_LEN];
+
+       mlxsw_reg_htgt_pack(htgt_pl, MLXSW_REG_HTGT_TRAP_GROUP_EMAD,
+                           MLXSW_REG_HTGT_INVALID_POLICER,
+                           MLXSW_REG_HTGT_DEFAULT_PRIORITY,
+                           MLXSW_REG_HTGT_DEFAULT_TC);
+       mlxsw_reg_htgt_swid_set(htgt_pl, MLXSW_PORT_SWID_ALL_SWIDS);
+       mlxsw_reg_htgt_local_path_rdq_set(htgt_pl,
+                                       MLXSW_REG_HTGT_LOCAL_PATH_RDQ_SX2_EMAD);
+       return mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl);
+}
+
 static int mlxsw_sx_init(struct mlxsw_core *mlxsw_core,
                         const struct mlxsw_bus_info *mlxsw_bus_info)
 {
@@ -1474,16 +1631,10 @@ static int mlxsw_sx_init(struct mlxsw_core *mlxsw_core,
                return err;
        }
 
-       err = mlxsw_sx_event_register(mlxsw_sx, MLXSW_TRAP_ID_PUDE);
-       if (err) {
-               dev_err(mlxsw_sx->bus_info->dev, "Failed to register for PUDE events\n");
-               goto err_event_register;
-       }
-
        err = mlxsw_sx_traps_init(mlxsw_sx);
        if (err) {
-               dev_err(mlxsw_sx->bus_info->dev, "Failed to set traps for RX\n");
-               goto err_rx_listener_register;
+               dev_err(mlxsw_sx->bus_info->dev, "Failed to set traps\n");
+               goto err_listener_register;
        }
 
        err = mlxsw_sx_flood_init(mlxsw_sx);
@@ -1496,9 +1647,7 @@ static int mlxsw_sx_init(struct mlxsw_core *mlxsw_core,
 
 err_flood_init:
        mlxsw_sx_traps_fini(mlxsw_sx);
-err_rx_listener_register:
-       mlxsw_sx_event_unregister(mlxsw_sx, MLXSW_TRAP_ID_PUDE);
-err_event_register:
+err_listener_register:
        mlxsw_sx_ports_remove(mlxsw_sx);
        return err;
 }
@@ -1508,7 +1657,6 @@ static void mlxsw_sx_fini(struct mlxsw_core *mlxsw_core)
        struct mlxsw_sx *mlxsw_sx = mlxsw_core_driver_priv(mlxsw_core);
 
        mlxsw_sx_traps_fini(mlxsw_sx);
-       mlxsw_sx_event_unregister(mlxsw_sx, MLXSW_TRAP_ID_PUDE);
        mlxsw_sx_ports_remove(mlxsw_sx);
 }
 
@@ -1531,36 +1679,66 @@ static struct mlxsw_config_profile mlxsw_sx_config_profile = {
        .used_flood_mode                = 1,
        .flood_mode                     = 3,
        .used_max_ib_mc                 = 1,
-       .max_ib_mc                      = 0,
+       .max_ib_mc                      = 6,
        .used_max_pkey                  = 1,
        .max_pkey                       = 0,
        .swid_config                    = {
                {
                        .used_type      = 1,
                        .type           = MLXSW_PORT_SWID_TYPE_ETH,
+               },
+               {
+                       .used_type      = 1,
+                       .type           = MLXSW_PORT_SWID_TYPE_IB,
                }
        },
        .resource_query_enable          = 0,
 };
 
 static struct mlxsw_driver mlxsw_sx_driver = {
-       .kind                   = MLXSW_DEVICE_KIND_SWITCHX2,
-       .owner                  = THIS_MODULE,
+       .kind                   = mlxsw_sx_driver_name,
        .priv_size              = sizeof(struct mlxsw_sx),
        .init                   = mlxsw_sx_init,
        .fini                   = mlxsw_sx_fini,
+       .basic_trap_groups_set  = mlxsw_sx_basic_trap_groups_set,
        .txhdr_construct        = mlxsw_sx_txhdr_construct,
        .txhdr_len              = MLXSW_TXHDR_LEN,
        .profile                = &mlxsw_sx_config_profile,
+       .port_type_set          = mlxsw_sx_port_type_set,
+};
+
+static const struct pci_device_id mlxsw_sx_pci_id_table[] = {
+       {PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_SWITCHX2), 0},
+       {0, },
+};
+
+static struct pci_driver mlxsw_sx_pci_driver = {
+       .name = mlxsw_sx_driver_name,
+       .id_table = mlxsw_sx_pci_id_table,
 };
 
 static int __init mlxsw_sx_module_init(void)
 {
-       return mlxsw_core_driver_register(&mlxsw_sx_driver);
+       int err;
+
+       err = mlxsw_core_driver_register(&mlxsw_sx_driver);
+       if (err)
+               return err;
+
+       err = mlxsw_pci_driver_register(&mlxsw_sx_pci_driver);
+       if (err)
+               goto err_pci_driver_register;
+
+       return 0;
+
+err_pci_driver_register:
+       mlxsw_core_driver_unregister(&mlxsw_sx_driver);
+       return err;
 }
 
 static void __exit mlxsw_sx_module_exit(void)
 {
+       mlxsw_pci_driver_unregister(&mlxsw_sx_pci_driver);
        mlxsw_core_driver_unregister(&mlxsw_sx_driver);
 }
 
@@ -1570,4 +1748,4 @@ module_exit(mlxsw_sx_module_exit);
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_AUTHOR("Jiri Pirko <jiri@mellanox.com>");
 MODULE_DESCRIPTION("Mellanox SwitchX-2 driver");
-MODULE_MLXSW_DRIVER_ALIAS(MLXSW_DEVICE_KIND_SWITCHX2);
+MODULE_DEVICE_TABLE(pci, mlxsw_sx_pci_id_table);
index ed8e301864004f8092bcf82caf50edc730a4b7d4..7ab275deacacbc51165eed9d5ec3f54281533566 100644 (file)
@@ -62,6 +62,7 @@ enum {
        MLXSW_TRAP_ID_OSPF = 0x55,
        MLXSW_TRAP_ID_IP2ME = 0x5F,
        MLXSW_TRAP_ID_RTR_INGRESS0 = 0x70,
+       MLXSW_TRAP_ID_BGP_IPV4 = 0x88,
        MLXSW_TRAP_ID_HOST_MISS_IPV4 = 0x90,
 
        MLXSW_TRAP_ID_MAX = 0x1FF
index 87aa8a3e9112f613da68e741fe9d791470b9c708..76a19f1796af71a338e0f47636ef70dcaa6ae8ba 100644 (file)
@@ -62,6 +62,7 @@ enum nfp_bpf_action_type {
        NN_ACT_TC_DROP,
        NN_ACT_TC_REDIR,
        NN_ACT_DIRECT,
+       NN_ACT_XDP,
 };
 
 /* Software register representation, hardware encoding in asm.h */
index f8df5300f49c1182e271d444c3b42714f30951e6..335beb8b8b45c75193a604550257be821fd2a1b5 100644 (file)
@@ -1126,7 +1126,7 @@ static int data_ind_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
                                     meta->insn.src_reg * 2, true, 4);
 }
 
-static int mem_ldx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+static int mem_ldx4_skb(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
        if (meta->insn.off == offsetof(struct sk_buff, len))
                emit_alu(nfp_prog, reg_both(meta->insn.dst_reg * 2),
@@ -1134,12 +1134,42 @@ static int mem_ldx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
        else
                return -ENOTSUPP;
 
-       wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
+       return 0;
+}
+
+static int mem_ldx4_xdp(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+       u32 dst = reg_both(meta->insn.dst_reg * 2);
+
+       if (meta->insn.off != offsetof(struct xdp_md, data) &&
+           meta->insn.off != offsetof(struct xdp_md, data_end))
+               return -ENOTSUPP;
+
+       emit_alu(nfp_prog, dst, reg_none(), ALU_OP_NONE, NFP_BPF_ABI_PKT);
+
+       if (meta->insn.off == offsetof(struct xdp_md, data))
+               return 0;
+
+       emit_alu(nfp_prog, dst, dst, ALU_OP_ADD, NFP_BPF_ABI_LEN);
 
        return 0;
 }
 
-static int mem_stx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+static int mem_ldx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+       int ret;
+
+       if (nfp_prog->act == NN_ACT_XDP)
+               ret = mem_ldx4_xdp(nfp_prog, meta);
+       else
+               ret = mem_ldx4_skb(nfp_prog, meta);
+
+       wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
+
+       return ret;
+}
+
+static int mem_stx4_skb(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
        if (meta->insn.off == offsetof(struct sk_buff, mark))
                return wrp_set_mark(nfp_prog, meta->insn.src_reg * 2);
@@ -1147,6 +1177,18 @@ static int mem_stx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
        return -ENOTSUPP;
 }
 
+static int mem_stx4_xdp(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+       return -ENOTSUPP;
+}
+
+static int mem_stx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+       if (nfp_prog->act == NN_ACT_XDP)
+               return mem_stx4_xdp(nfp_prog, meta);
+       return mem_stx4_skb(nfp_prog, meta);
+}
+
 static int jump(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
        if (meta->insn.off < 0) /* TODO */
@@ -1530,6 +1572,47 @@ static void nfp_outro_tc_da(struct nfp_prog *nfp_prog)
        emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16);
 }
 
+static void nfp_outro_xdp(struct nfp_prog *nfp_prog)
+{
+       /* XDP return codes:
+        *   0 aborted  0x82 -> drop,  count as stat3
+        *   1    drop  0x22 -> drop,  count as stat1
+        *   2    pass  0x11 -> pass,  count as stat0
+        *   3      tx  0x44 -> redir, count as stat2
+        *   * unknown  0x82 -> drop,  count as stat3
+        */
+       /* Target for aborts */
+       nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog);
+
+       emit_br_def(nfp_prog, nfp_prog->tgt_done, 2);
+
+       emit_alu(nfp_prog, reg_a(0),
+                reg_none(), ALU_OP_NONE, NFP_BPF_ABI_FLAGS);
+       emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x82), SHF_SC_L_SHF, 16);
+
+       /* Target for normal exits */
+       nfp_prog->tgt_out = nfp_prog_current_offset(nfp_prog);
+
+       /* if R0 > 3 jump to abort */
+       emit_alu(nfp_prog, reg_none(), reg_imm(3), ALU_OP_SUB, reg_b(0));
+       emit_br(nfp_prog, BR_BLO, nfp_prog->tgt_abort, 0);
+
+       wrp_immed(nfp_prog, reg_b(2), 0x44112282);
+
+       emit_shf(nfp_prog, reg_a(1),
+                reg_none(), SHF_OP_NONE, reg_b(0), SHF_SC_L_SHF, 3);
+
+       emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0));
+       emit_shf(nfp_prog, reg_b(2),
+                reg_imm(0xff), SHF_OP_AND, reg_b(2), SHF_SC_R_SHF, 0);
+
+       emit_br_def(nfp_prog, nfp_prog->tgt_done, 2);
+
+       emit_alu(nfp_prog, reg_a(0),
+                reg_none(), ALU_OP_NONE, NFP_BPF_ABI_FLAGS);
+       emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16);
+}
+
 static void nfp_outro(struct nfp_prog *nfp_prog)
 {
        switch (nfp_prog->act) {
@@ -1540,6 +1623,9 @@ static void nfp_outro(struct nfp_prog *nfp_prog)
        case NN_ACT_TC_REDIR:
                nfp_outro_tc_legacy(nfp_prog);
                break;
+       case NN_ACT_XDP:
+               nfp_outro_xdp(nfp_prog);
+               break;
        }
 }
 
index 144cae87f63a19b441f385661f2705b37d826a71..b3361f9b8e5c156c9ce58b156126993e5258fa68 100644 (file)
@@ -80,6 +80,9 @@ nfp_bpf_check_exit(struct nfp_prog *nfp_prog,
 {
        const struct bpf_reg_state *reg0 = &env->cur_state.regs[0];
 
+       if (nfp_prog->act == NN_ACT_XDP)
+               return 0;
+
        if (reg0->type != CONST_IMM) {
                pr_info("unsupported exit state: %d, imm: %llx\n",
                        reg0->type, reg0->imm);
index ed824e11a1e3b5127138656dbbeaba2efcc3013b..2115f446031ef46c3e2c6c586d58f31e064efa38 100644 (file)
@@ -75,7 +75,6 @@
 
 /* Default size for MTU and freelist buffer sizes */
 #define NFP_NET_DEFAULT_MTU            1500
-#define NFP_NET_DEFAULT_RX_BUFSZ       2048
 
 /* Maximum number of bytes prepended to a packet */
 #define NFP_NET_MAX_PREPEND            64
@@ -88,6 +87,9 @@
 /* Queue/Ring definitions */
 #define NFP_NET_MAX_TX_RINGS   64      /* Max. # of Tx rings per device */
 #define NFP_NET_MAX_RX_RINGS   64      /* Max. # of Rx rings per device */
+#define NFP_NET_MAX_R_VECS     (NFP_NET_MAX_TX_RINGS > NFP_NET_MAX_RX_RINGS ? \
+                                NFP_NET_MAX_TX_RINGS : NFP_NET_MAX_RX_RINGS)
+#define NFP_NET_MAX_IRQS       (NFP_NET_NON_Q_VECTORS + NFP_NET_MAX_R_VECS)
 
 #define NFP_NET_MIN_TX_DESCS   256     /* Min. # of Tx descs per ring */
 #define NFP_NET_MIN_RX_DESCS   256     /* Min. # of Rx descs per ring */
 /* Offload definitions */
 #define NFP_NET_N_VXLAN_PORTS  (NFP_NET_CFG_VXLAN_SZ / sizeof(__be16))
 
+#define NFP_NET_RX_BUF_HEADROOM        (NET_SKB_PAD + NET_IP_ALIGN)
+#define NFP_NET_RX_BUF_NON_DATA        (NFP_NET_RX_BUF_HEADROOM +              \
+                                SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
+
 /* Forward declarations */
 struct nfp_net;
 struct nfp_net_r_vector;
@@ -165,7 +171,10 @@ struct nfp_net_tx_desc {
  *             on the head's buffer). Equal to skb->len for non-TSO packets.
  */
 struct nfp_net_tx_buf {
-       struct sk_buff *skb;
+       union {
+               struct sk_buff *skb;
+               void *frag;
+       };
        dma_addr_t dma_addr;
        short int fidx;
        u16 pkt_cnt;
@@ -278,11 +287,11 @@ struct nfp_net_rx_hash {
 
 /**
  * struct nfp_net_rx_buf - software RX buffer descriptor
- * @skb:       sk_buff associated with this buffer
+ * @frag:      page fragment buffer
  * @dma_addr:  DMA mapping address of the buffer
  */
 struct nfp_net_rx_buf {
-       struct sk_buff *skb;
+       void *frag;
        dma_addr_t dma_addr;
 };
 
@@ -335,6 +344,7 @@ struct nfp_net_rx_ring {
  * @napi:           NAPI structure for this ring vec
  * @tx_ring:        Pointer to TX ring
  * @rx_ring:        Pointer to RX ring
+ * @xdp_ring:      Pointer to an extra TX ring for XDP
  * @irq_idx:        Index into MSI-X table
  * @rx_sync:       Seqlock for atomic updates of RX stats
  * @rx_pkts:        Number of received packets
@@ -378,6 +388,8 @@ struct nfp_net_r_vector {
        u64 hw_csum_rx_inner_ok;
        u64 hw_csum_rx_error;
 
+       struct nfp_net_tx_ring *xdp_ring;
+
        struct u64_stats_sync tx_sync;
        u64 tx_pkts;
        u64 tx_bytes;
@@ -421,12 +433,13 @@ struct nfp_stat_pair {
  * @netdev:             Backpointer to net_device structure
  * @nfp_fallback:       Is the driver used in fallback mode?
  * @is_vf:              Is the driver attached to a VF?
- * @is_nfp3200:         Is the driver for a NFP-3200 card?
  * @fw_loaded:          Is the firmware loaded?
  * @bpf_offload_skip_sw:  Offloaded BPF program will not be rerun by cls_bpf
+ * @bpf_offload_xdp:   Offloaded BPF program is XDP
  * @ctrl:               Local copy of the control register/word.
  * @fl_bufsz:           Currently configured size of the freelist buffers
  * @rx_offset:         Offset in the RX buffers where packet data starts
+ * @xdp_prog:          Installed XDP program
  * @cpp:                Pointer to the CPP handle
  * @nfp_dev_cpp:        Pointer to the NFP Device handle
  * @ctrl_area:          Pointer to the CPP area for the control BAR
@@ -446,12 +459,13 @@ struct nfp_stat_pair {
  * @max_tx_rings:       Maximum number of TX rings supported by the Firmware
  * @max_rx_rings:       Maximum number of RX rings supported by the Firmware
  * @num_tx_rings:       Currently configured number of TX rings
+ * @num_stack_tx_rings:        Number of TX rings used by the stack (not XDP)
  * @num_rx_rings:       Currently configured number of RX rings
  * @txd_cnt:            Size of the TX ring in number of descriptors
  * @rxd_cnt:            Size of the RX ring in number of descriptors
  * @tx_rings:           Array of pre-allocated TX ring structures
  * @rx_rings:           Array of pre-allocated RX ring structures
- * @num_irqs:          Number of allocated interrupt vectors
+ * @max_r_vecs:                Number of allocated interrupt vectors for RX/TX
  * @num_r_vecs:         Number of used ring vectors
  * @r_vecs:             Pre-allocated array of ring vectors
  * @irq_entries:        Pre-allocated array of MSI-X entries
@@ -487,15 +501,17 @@ struct nfp_net {
 
        unsigned nfp_fallback:1;
        unsigned is_vf:1;
-       unsigned is_nfp3200:1;
        unsigned fw_loaded:1;
        unsigned bpf_offload_skip_sw:1;
+       unsigned bpf_offload_xdp:1;
 
        u32 ctrl;
        u32 fl_bufsz;
 
        u32 rx_offset;
 
+       struct bpf_prog *xdp_prog;
+
        struct nfp_net_tx_ring *tx_rings;
        struct nfp_net_rx_ring *rx_rings;
 
@@ -524,11 +540,12 @@ struct nfp_net {
        struct timer_list rx_filter_stats_timer;
        spinlock_t rx_filter_lock;
 
-       int max_tx_rings;
-       int max_rx_rings;
+       unsigned int max_tx_rings;
+       unsigned int max_rx_rings;
 
-       int num_tx_rings;
-       int num_rx_rings;
+       unsigned int num_tx_rings;
+       unsigned int num_stack_tx_rings;
+       unsigned int num_rx_rings;
 
        int stride_tx;
        int stride_rx;
@@ -536,11 +553,10 @@ struct nfp_net {
        int txd_cnt;
        int rxd_cnt;
 
-       u8 num_irqs;
-       u8 num_r_vecs;
-       struct nfp_net_r_vector r_vecs[NFP_NET_MAX_TX_RINGS];
-       struct msix_entry irq_entries[NFP_NET_NON_Q_VECTORS +
-                                     NFP_NET_MAX_TX_RINGS];
+       unsigned int max_r_vecs;
+       unsigned int num_r_vecs;
+       struct nfp_net_r_vector r_vecs[NFP_NET_MAX_R_VECS];
+       struct msix_entry irq_entries[NFP_NET_MAX_IRQS];
 
        irq_handler_t lsc_handler;
        char lsc_name[IFNAMSIZ + 8];
@@ -580,6 +596,13 @@ struct nfp_net {
        struct dentry *debugfs_dir;
 };
 
+struct nfp_net_ring_set {
+       unsigned int n_rings;
+       unsigned int mtu;
+       unsigned int dcnt;
+       void *rings;
+};
+
 /* Functions to read/write from/to a BAR
  * Performs any endian conversion necessary.
  */
@@ -593,16 +616,13 @@ static inline void nn_writeb(struct nfp_net *nn, int off, u8 val)
        writeb(val, nn->ctrl_bar + off);
 }
 
-/* NFP-3200 can't handle 16-bit accesses too well */
 static inline u16 nn_readw(struct nfp_net *nn, int off)
 {
-       WARN_ON_ONCE(nn->is_nfp3200);
        return readw(nn->ctrl_bar + off);
 }
 
 static inline void nn_writew(struct nfp_net *nn, int off, u16 val)
 {
-       WARN_ON_ONCE(nn->is_nfp3200);
        writew(val, nn->ctrl_bar + off);
 }
 
@@ -650,7 +670,7 @@ static inline void nn_pci_flush(struct nfp_net *nn)
 #define NFP_QCP_QUEUE_STS_HI                   0x000c
 #define NFP_QCP_QUEUE_STS_HI_WRITEPTR_mask     0x3ffff
 
-/* The offset of a QCP queues in the PCIe Target (same on NFP3200 and NFP6000 */
+/* The offset of a QCP queues in the PCIe Target */
 #define NFP_PCIE_QUEUE(_q) (0x80000 + (NFP_QCP_QUEUE_ADDR_SZ * ((_q) & 0xff)))
 
 /* nfp_qcp_ptr - Read or Write Pointer of a queue */
@@ -757,8 +777,9 @@ extern const char nfp_net_driver_version[];
 void nfp_net_get_fw_version(struct nfp_net_fw_version *fw_ver,
                            void __iomem *ctrl_bar);
 
-struct nfp_net *nfp_net_netdev_alloc(struct pci_dev *pdev,
-                                    int max_tx_rings, int max_rx_rings);
+struct nfp_net *
+nfp_net_netdev_alloc(struct pci_dev *pdev,
+                    unsigned int max_tx_rings, unsigned int max_rx_rings);
 void nfp_net_netdev_free(struct nfp_net *nn);
 int nfp_net_netdev_init(struct net_device *netdev);
 void nfp_net_netdev_clean(struct net_device *netdev);
@@ -770,7 +791,9 @@ void nfp_net_rss_write_key(struct nfp_net *nn);
 void nfp_net_coalesce_write_cfg(struct nfp_net *nn);
 int nfp_net_irqs_alloc(struct nfp_net *nn);
 void nfp_net_irqs_disable(struct nfp_net *nn);
-int nfp_net_set_ring_size(struct nfp_net *nn, u32 rxd_cnt, u32 txd_cnt);
+int
+nfp_net_ring_reconfig(struct nfp_net *nn, struct bpf_prog **xdp_prog,
+                     struct nfp_net_ring_set *rx, struct nfp_net_ring_set *tx);
 
 #ifdef CONFIG_NFP_NET_DEBUG
 void nfp_net_debugfs_create(void);
@@ -796,8 +819,6 @@ static inline void nfp_net_debugfs_adapter_del(struct nfp_net *nn)
 #endif /* CONFIG_NFP_NET_DEBUG */
 
 void nfp_net_filter_stats_timer(unsigned long data);
-int
-nfp_net_bpf_offload(struct nfp_net *nn, u32 handle, __be16 proto,
-                   struct tc_cls_bpf_offload *cls_bpf);
+int nfp_net_bpf_offload(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf);
 
 #endif /* _NFP_NET_H_ */
index d365760fa75b7e9f299b66c0e9d6ecdf14640ab2..00d9a03be31df518b986eb5a14c1fb6377c1e1f8 100644 (file)
@@ -41,6 +41,7 @@
  *          Chris Telfer <chris.telfer@netronome.com>
  */
 
+#include <linux/bpf.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
@@ -50,6 +51,7 @@
 #include <linux/interrupt.h>
 #include <linux/ip.h>
 #include <linux/ipv6.h>
+#include <linux/page_ref.h>
 #include <linux/pci.h>
 #include <linux/pci_regs.h>
 #include <linux/msi.h>
@@ -80,6 +82,22 @@ void nfp_net_get_fw_version(struct nfp_net_fw_version *fw_ver,
        put_unaligned_le32(reg, fw_ver);
 }
 
+static dma_addr_t
+nfp_net_dma_map_rx(struct nfp_net *nn, void *frag, unsigned int bufsz,
+                  int direction)
+{
+       return dma_map_single(&nn->pdev->dev, frag + NFP_NET_RX_BUF_HEADROOM,
+                             bufsz - NFP_NET_RX_BUF_NON_DATA, direction);
+}
+
+static void
+nfp_net_dma_unmap_rx(struct nfp_net *nn, dma_addr_t dma_addr,
+                    unsigned int bufsz, int direction)
+{
+       dma_unmap_single(&nn->pdev->dev, dma_addr,
+                        bufsz - NFP_NET_RX_BUF_NON_DATA, direction);
+}
+
 /* Firmware reconfig
  *
  * Firmware reconfig may take a while so we have two versions of it -
@@ -248,44 +266,15 @@ int nfp_net_reconfig(struct nfp_net *nn, u32 update)
 /* Interrupt configuration and handling
  */
 
-/**
- * nfp_net_irq_unmask_msix() - Unmask MSI-X after automasking
- * @nn:       NFP Network structure
- * @entry_nr: MSI-X table entry
- *
- * Clear the MSI-X table mask bit for the given entry bypassing Linux irq
- * handling subsystem.  Use *only* to reenable automasked vectors.
- */
-static void nfp_net_irq_unmask_msix(struct nfp_net *nn, unsigned int entry_nr)
-{
-       struct list_head *msi_head = &nn->pdev->dev.msi_list;
-       struct msi_desc *entry;
-       u32 off;
-
-       /* All MSI-Xs have the same mask_base */
-       entry = list_first_entry(msi_head, struct msi_desc, list);
-
-       off = (PCI_MSIX_ENTRY_SIZE * entry_nr) +
-               PCI_MSIX_ENTRY_VECTOR_CTRL;
-       writel(0, entry->mask_base + off);
-       readl(entry->mask_base);
-}
-
 /**
  * nfp_net_irq_unmask() - Unmask automasked interrupt
  * @nn:       NFP Network structure
  * @entry_nr: MSI-X table entry
  *
- * If MSI-X auto-masking is enabled clear the mask bit, otherwise
- * clear the ICR for the entry.
+ * Clear the ICR for the IRQ entry.
  */
 static void nfp_net_irq_unmask(struct nfp_net *nn, unsigned int entry_nr)
 {
-       if (nn->ctrl & NFP_NET_CFG_CTRL_MSIXAUTO) {
-               nfp_net_irq_unmask_msix(nn, entry_nr);
-               return;
-       }
-
        nn_writeb(nn, NFP_NET_CFG_ICR(entry_nr), NFP_NET_CFG_ICR_UNMASKED);
        nn_pci_flush(nn);
 }
@@ -319,28 +308,6 @@ static int nfp_net_msix_alloc(struct nfp_net *nn, int nr_vecs)
        return nvecs;
 }
 
-/**
- * nfp_net_irqs_wanted() - Work out how many interrupt vectors we want
- * @nn:       NFP Network structure
- *
- * We want a vector per CPU (or ring), whatever is smaller plus
- * NFP_NET_NON_Q_VECTORS for LSC etc.
- *
- * Return: Number of interrupts wanted
- */
-static int nfp_net_irqs_wanted(struct nfp_net *nn)
-{
-       int ncpus;
-       int vecs;
-
-       ncpus = num_online_cpus();
-
-       vecs = max_t(int, nn->num_tx_rings, nn->num_rx_rings);
-       vecs = min_t(int, vecs, ncpus);
-
-       return vecs + NFP_NET_NON_Q_VECTORS;
-}
-
 /**
  * nfp_net_irqs_alloc() - allocates MSI-X irqs
  * @nn:       NFP Network structure
@@ -350,22 +317,24 @@ static int nfp_net_irqs_wanted(struct nfp_net *nn)
 int nfp_net_irqs_alloc(struct nfp_net *nn)
 {
        int wanted_irqs;
+       unsigned int n;
 
-       wanted_irqs = nfp_net_irqs_wanted(nn);
+       wanted_irqs = nn->num_r_vecs + NFP_NET_NON_Q_VECTORS;
 
-       nn->num_irqs = nfp_net_msix_alloc(nn, wanted_irqs);
-       if (nn->num_irqs == 0) {
+       n = nfp_net_msix_alloc(nn, wanted_irqs);
+       if (n == 0) {
                nn_err(nn, "Failed to allocate MSI-X IRQs\n");
                return 0;
        }
 
-       nn->num_r_vecs = nn->num_irqs - NFP_NET_NON_Q_VECTORS;
+       nn->max_r_vecs = n - NFP_NET_NON_Q_VECTORS;
+       nn->num_r_vecs = nn->max_r_vecs;
 
-       if (nn->num_irqs < wanted_irqs)
+       if (n < wanted_irqs)
                nn_warn(nn, "Unable to allocate %d vectors. Got %d instead\n",
-                       wanted_irqs, nn->num_irqs);
+                       wanted_irqs, n);
 
-       return nn->num_irqs;
+       return n;
 }
 
 /**
@@ -515,18 +484,19 @@ static void nfp_net_irqs_assign(struct net_device *netdev)
        struct nfp_net_r_vector *r_vec;
        int r;
 
-       /* Assumes nn->num_tx_rings == nn->num_rx_rings */
-       if (nn->num_tx_rings > nn->num_r_vecs) {
-               nn_warn(nn, "More rings (%d) than vectors (%d).\n",
-                       nn->num_tx_rings, nn->num_r_vecs);
-               nn->num_tx_rings = nn->num_r_vecs;
-               nn->num_rx_rings = nn->num_r_vecs;
-       }
+       if (nn->num_rx_rings > nn->num_r_vecs ||
+           nn->num_tx_rings > nn->num_r_vecs)
+               nn_warn(nn, "More rings (%d,%d) than vectors (%d).\n",
+                       nn->num_rx_rings, nn->num_tx_rings, nn->num_r_vecs);
+
+       nn->num_rx_rings = min(nn->num_r_vecs, nn->num_rx_rings);
+       nn->num_tx_rings = min(nn->num_r_vecs, nn->num_tx_rings);
+       nn->num_stack_tx_rings = nn->num_tx_rings;
 
        nn->lsc_handler = nfp_net_irq_lsc;
        nn->exn_handler = nfp_net_irq_exn;
 
-       for (r = 0; r < nn->num_r_vecs; r++) {
+       for (r = 0; r < nn->max_r_vecs; r++) {
                r_vec = &nn->r_vecs[r];
                r_vec->nfp_net = nn;
                r_vec->handler = nfp_net_irq_rxtx;
@@ -605,7 +575,7 @@ static void nfp_net_aux_irq_free(struct nfp_net *nn, u32 ctrl_offset,
  *
  * Return: True if the ring is full.
  */
-static inline int nfp_net_tx_full(struct nfp_net_tx_ring *tx_ring, int dcnt)
+static int nfp_net_tx_full(struct nfp_net_tx_ring *tx_ring, int dcnt)
 {
        return (tx_ring->wr_p - tx_ring->rd_p) >= (tx_ring->cnt - dcnt);
 }
@@ -745,6 +715,13 @@ static void nfp_net_tx_csum(struct nfp_net *nn, struct nfp_net_r_vector *r_vec,
        u64_stats_update_end(&r_vec->tx_sync);
 }
 
+static void nfp_net_tx_xmit_more_flush(struct nfp_net_tx_ring *tx_ring)
+{
+       wmb();
+       nfp_qcp_wr_ptr_add(tx_ring->qcp_q, tx_ring->wr_ptr_add);
+       tx_ring->wr_ptr_add = 0;
+}
+
 /**
  * nfp_net_tx() - Main transmit entry point
  * @skb:    SKB to transmit
@@ -790,7 +767,7 @@ static int nfp_net_tx(struct sk_buff *skb, struct net_device *netdev)
        if (dma_mapping_error(&nn->pdev->dev, dma_addr))
                goto err_free;
 
-       wr_idx = tx_ring->wr_p % tx_ring->cnt;
+       wr_idx = tx_ring->wr_p & (tx_ring->cnt - 1);
 
        /* Stash the soft descriptor of the head then initialize it */
        txbuf = &tx_ring->txbufs[wr_idx];
@@ -834,7 +811,7 @@ static int nfp_net_tx(struct sk_buff *skb, struct net_device *netdev)
                        if (dma_mapping_error(&nn->pdev->dev, dma_addr))
                                goto err_unmap;
 
-                       wr_idx = (wr_idx + 1) % tx_ring->cnt;
+                       wr_idx = (wr_idx + 1) & (tx_ring->cnt - 1);
                        tx_ring->txbufs[wr_idx].skb = skb;
                        tx_ring->txbufs[wr_idx].dma_addr = dma_addr;
                        tx_ring->txbufs[wr_idx].fidx = f;
@@ -859,12 +836,8 @@ static int nfp_net_tx(struct sk_buff *skb, struct net_device *netdev)
                nfp_net_tx_ring_stop(nd_q, tx_ring);
 
        tx_ring->wr_ptr_add += nr_frags + 1;
-       if (!skb->xmit_more || netif_xmit_stopped(nd_q)) {
-               /* force memory write before we let HW know */
-               wmb();
-               nfp_qcp_wr_ptr_add(tx_ring->qcp_q, tx_ring->wr_ptr_add);
-               tx_ring->wr_ptr_add = 0;
-       }
+       if (!skb->xmit_more || netif_xmit_stopped(nd_q))
+               nfp_net_tx_xmit_more_flush(tx_ring);
 
        skb_tx_timestamp(skb);
 
@@ -929,7 +902,7 @@ static void nfp_net_tx_complete(struct nfp_net_tx_ring *tx_ring)
                todo = qcp_rd_p + tx_ring->cnt - tx_ring->qcp_rd_p;
 
        while (todo--) {
-               idx = tx_ring->rd_p % tx_ring->cnt;
+               idx = tx_ring->rd_p & (tx_ring->cnt - 1);
                tx_ring->rd_p++;
 
                skb = tx_ring->txbufs[idx].skb;
@@ -986,6 +959,56 @@ static void nfp_net_tx_complete(struct nfp_net_tx_ring *tx_ring)
                  tx_ring->rd_p, tx_ring->wr_p, tx_ring->cnt);
 }
 
+static void nfp_net_xdp_complete(struct nfp_net_tx_ring *tx_ring)
+{
+       struct nfp_net_r_vector *r_vec = tx_ring->r_vec;
+       struct nfp_net *nn = r_vec->nfp_net;
+       u32 done_pkts = 0, done_bytes = 0;
+       int idx, todo;
+       u32 qcp_rd_p;
+
+       /* Work out how many descriptors have been transmitted */
+       qcp_rd_p = nfp_qcp_rd_ptr_read(tx_ring->qcp_q);
+
+       if (qcp_rd_p == tx_ring->qcp_rd_p)
+               return;
+
+       if (qcp_rd_p > tx_ring->qcp_rd_p)
+               todo = qcp_rd_p - tx_ring->qcp_rd_p;
+       else
+               todo = qcp_rd_p + tx_ring->cnt - tx_ring->qcp_rd_p;
+
+       while (todo--) {
+               idx = tx_ring->rd_p & (tx_ring->cnt - 1);
+               tx_ring->rd_p++;
+
+               if (!tx_ring->txbufs[idx].frag)
+                       continue;
+
+               nfp_net_dma_unmap_rx(nn, tx_ring->txbufs[idx].dma_addr,
+                                    nn->fl_bufsz, DMA_BIDIRECTIONAL);
+               __free_page(virt_to_page(tx_ring->txbufs[idx].frag));
+
+               done_pkts++;
+               done_bytes += tx_ring->txbufs[idx].real_len;
+
+               tx_ring->txbufs[idx].dma_addr = 0;
+               tx_ring->txbufs[idx].frag = NULL;
+               tx_ring->txbufs[idx].fidx = -2;
+       }
+
+       tx_ring->qcp_rd_p = qcp_rd_p;
+
+       u64_stats_update_begin(&r_vec->tx_sync);
+       r_vec->tx_bytes += done_bytes;
+       r_vec->tx_pkts += done_pkts;
+       u64_stats_update_end(&r_vec->tx_sync);
+
+       WARN_ONCE(tx_ring->wr_p - tx_ring->rd_p > tx_ring->cnt,
+                 "TX ring corruption rd_p=%u wr_p=%u cnt=%u\n",
+                 tx_ring->rd_p, tx_ring->wr_p, tx_ring->cnt);
+}
+
 /**
  * nfp_net_tx_ring_reset() - Free any untransmitted buffers and reset pointers
  * @nn:                NFP Net device
@@ -996,39 +1019,47 @@ static void nfp_net_tx_complete(struct nfp_net_tx_ring *tx_ring)
 static void
 nfp_net_tx_ring_reset(struct nfp_net *nn, struct nfp_net_tx_ring *tx_ring)
 {
+       struct nfp_net_r_vector *r_vec = tx_ring->r_vec;
        const struct skb_frag_struct *frag;
-       struct netdev_queue *nd_q;
        struct pci_dev *pdev = nn->pdev;
+       struct netdev_queue *nd_q;
 
        while (tx_ring->rd_p != tx_ring->wr_p) {
-               int nr_frags, fidx, idx;
-               struct sk_buff *skb;
+               struct nfp_net_tx_buf *tx_buf;
+               int idx;
 
-               idx = tx_ring->rd_p % tx_ring->cnt;
-               skb = tx_ring->txbufs[idx].skb;
-               nr_frags = skb_shinfo(skb)->nr_frags;
-               fidx = tx_ring->txbufs[idx].fidx;
+               idx = tx_ring->rd_p & (tx_ring->cnt - 1);
+               tx_buf = &tx_ring->txbufs[idx];
 
-               if (fidx == -1) {
-                       /* unmap head */
-                       dma_unmap_single(&pdev->dev,
-                                        tx_ring->txbufs[idx].dma_addr,
-                                        skb_headlen(skb), DMA_TO_DEVICE);
+               if (tx_ring == r_vec->xdp_ring) {
+                       nfp_net_dma_unmap_rx(nn, tx_buf->dma_addr,
+                                            nn->fl_bufsz, DMA_BIDIRECTIONAL);
+                       __free_page(virt_to_page(tx_ring->txbufs[idx].frag));
                } else {
-                       /* unmap fragment */
-                       frag = &skb_shinfo(skb)->frags[fidx];
-                       dma_unmap_page(&pdev->dev,
-                                      tx_ring->txbufs[idx].dma_addr,
-                                      skb_frag_size(frag), DMA_TO_DEVICE);
-               }
+                       struct sk_buff *skb = tx_ring->txbufs[idx].skb;
+                       int nr_frags = skb_shinfo(skb)->nr_frags;
+
+                       if (tx_buf->fidx == -1) {
+                               /* unmap head */
+                               dma_unmap_single(&pdev->dev, tx_buf->dma_addr,
+                                                skb_headlen(skb),
+                                                DMA_TO_DEVICE);
+                       } else {
+                               /* unmap fragment */
+                               frag = &skb_shinfo(skb)->frags[tx_buf->fidx];
+                               dma_unmap_page(&pdev->dev, tx_buf->dma_addr,
+                                              skb_frag_size(frag),
+                                              DMA_TO_DEVICE);
+                       }
 
-               /* check for last gather fragment */
-               if (fidx == nr_frags - 1)
-                       dev_kfree_skb_any(skb);
+                       /* check for last gather fragment */
+                       if (tx_buf->fidx == nr_frags - 1)
+                               dev_kfree_skb_any(skb);
+               }
 
-               tx_ring->txbufs[idx].dma_addr = 0;
-               tx_ring->txbufs[idx].skb = NULL;
-               tx_ring->txbufs[idx].fidx = -2;
+               tx_buf->dma_addr = 0;
+               tx_buf->skb = NULL;
+               tx_buf->fidx = -2;
 
                tx_ring->qcp_rd_p++;
                tx_ring->rd_p++;
@@ -1040,6 +1071,9 @@ nfp_net_tx_ring_reset(struct nfp_net *nn, struct nfp_net_tx_ring *tx_ring)
        tx_ring->qcp_rd_p = 0;
        tx_ring->wr_ptr_add = 0;
 
+       if (tx_ring == r_vec->xdp_ring)
+               return;
+
        nd_q = netdev_get_tx_queue(nn->netdev, tx_ring->idx);
        netdev_tx_reset_queue(nd_q);
 }
@@ -1049,7 +1083,7 @@ static void nfp_net_tx_timeout(struct net_device *netdev)
        struct nfp_net *nn = netdev_priv(netdev);
        int i;
 
-       for (i = 0; i < nn->num_tx_rings; i++) {
+       for (i = 0; i < nn->netdev->real_num_tx_queues; i++) {
                if (!netif_tx_queue_stopped(netdev_get_tx_queue(netdev, i)))
                        continue;
                nn_warn(nn, "TX timeout on ring: %d\n", i);
@@ -1059,69 +1093,112 @@ static void nfp_net_tx_timeout(struct net_device *netdev)
 
 /* Receive processing
  */
+static unsigned int
+nfp_net_calc_fl_bufsz(struct nfp_net *nn, unsigned int mtu)
+{
+       unsigned int fl_bufsz;
 
-/**
- * nfp_net_rx_space() - return the number of free slots on the RX ring
- * @rx_ring:   RX ring structure
- *
- * Make sure we leave at least one slot free.
- *
- * Return: True if there is space on the RX ring
- */
-static inline int nfp_net_rx_space(struct nfp_net_rx_ring *rx_ring)
+       fl_bufsz = NFP_NET_RX_BUF_HEADROOM;
+       if (nn->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC)
+               fl_bufsz += NFP_NET_MAX_PREPEND;
+       else
+               fl_bufsz += nn->rx_offset;
+       fl_bufsz += ETH_HLEN + VLAN_HLEN * 2 + mtu;
+
+       fl_bufsz = SKB_DATA_ALIGN(fl_bufsz);
+       fl_bufsz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+
+       return fl_bufsz;
+}
+
+static void
+nfp_net_free_frag(void *frag, bool xdp)
 {
-       return (rx_ring->cnt - 1) - (rx_ring->wr_p - rx_ring->rd_p);
+       if (!xdp)
+               skb_free_frag(frag);
+       else
+               __free_page(virt_to_page(frag));
 }
 
 /**
- * nfp_net_rx_alloc_one() - Allocate and map skb for RX
+ * nfp_net_rx_alloc_one() - Allocate and map page frag for RX
  * @rx_ring:   RX ring structure of the skb
  * @dma_addr:  Pointer to storage for DMA address (output param)
  * @fl_bufsz:  size of freelist buffers
+ * @xdp:       Whether XDP is enabled
  *
- * This function will allcate a new skb, map it for DMA.
+ * This function will allcate a new page frag, map it for DMA.
  *
- * Return: allocated skb or NULL on failure.
+ * Return: allocated page frag or NULL on failure.
  */
-static struct sk_buff *
+static void *
 nfp_net_rx_alloc_one(struct nfp_net_rx_ring *rx_ring, dma_addr_t *dma_addr,
-                    unsigned int fl_bufsz)
+                    unsigned int fl_bufsz, bool xdp)
 {
        struct nfp_net *nn = rx_ring->r_vec->nfp_net;
-       struct sk_buff *skb;
+       int direction;
+       void *frag;
 
-       skb = netdev_alloc_skb(nn->netdev, fl_bufsz);
-       if (!skb) {
-               nn_warn_ratelimit(nn, "Failed to alloc receive SKB\n");
+       if (!xdp)
+               frag = netdev_alloc_frag(fl_bufsz);
+       else
+               frag = page_address(alloc_page(GFP_KERNEL | __GFP_COLD));
+       if (!frag) {
+               nn_warn_ratelimit(nn, "Failed to alloc receive page frag\n");
                return NULL;
        }
 
-       *dma_addr = dma_map_single(&nn->pdev->dev, skb->data,
-                                  fl_bufsz, DMA_FROM_DEVICE);
+       direction = xdp ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
+
+       *dma_addr = nfp_net_dma_map_rx(nn, frag, fl_bufsz, direction);
        if (dma_mapping_error(&nn->pdev->dev, *dma_addr)) {
-               dev_kfree_skb_any(skb);
+               nfp_net_free_frag(frag, xdp);
+               nn_warn_ratelimit(nn, "Failed to map DMA RX buffer\n");
+               return NULL;
+       }
+
+       return frag;
+}
+
+static void *
+nfp_net_napi_alloc_one(struct nfp_net *nn, int direction, dma_addr_t *dma_addr)
+{
+       void *frag;
+
+       if (!nn->xdp_prog)
+               frag = napi_alloc_frag(nn->fl_bufsz);
+       else
+               frag = page_address(alloc_page(GFP_ATOMIC | __GFP_COLD));
+       if (!frag) {
+               nn_warn_ratelimit(nn, "Failed to alloc receive page frag\n");
+               return NULL;
+       }
+
+       *dma_addr = nfp_net_dma_map_rx(nn, frag, nn->fl_bufsz, direction);
+       if (dma_mapping_error(&nn->pdev->dev, *dma_addr)) {
+               nfp_net_free_frag(frag, nn->xdp_prog);
                nn_warn_ratelimit(nn, "Failed to map DMA RX buffer\n");
                return NULL;
        }
 
-       return skb;
+       return frag;
 }
 
 /**
  * nfp_net_rx_give_one() - Put mapped skb on the software and hardware rings
  * @rx_ring:   RX ring structure
- * @skb:       Skb to put on rings
+ * @frag:      page fragment buffer
  * @dma_addr:  DMA address of skb mapping
  */
 static void nfp_net_rx_give_one(struct nfp_net_rx_ring *rx_ring,
-                               struct sk_buff *skb, dma_addr_t dma_addr)
+                               void *frag, dma_addr_t dma_addr)
 {
        unsigned int wr_idx;
 
-       wr_idx = rx_ring->wr_p % rx_ring->cnt;
+       wr_idx = rx_ring->wr_p & (rx_ring->cnt - 1);
 
        /* Stash SKB and DMA address away */
-       rx_ring->rxbufs[wr_idx].skb = skb;
+       rx_ring->rxbufs[wr_idx].frag = frag;
        rx_ring->rxbufs[wr_idx].dma_addr = dma_addr;
 
        /* Fill freelist descriptor */
@@ -1153,12 +1230,12 @@ static void nfp_net_rx_ring_reset(struct nfp_net_rx_ring *rx_ring)
        unsigned int wr_idx, last_idx;
 
        /* Move the empty entry to the end of the list */
-       wr_idx = rx_ring->wr_p % rx_ring->cnt;
+       wr_idx = rx_ring->wr_p & (rx_ring->cnt - 1);
        last_idx = rx_ring->cnt - 1;
        rx_ring->rxbufs[wr_idx].dma_addr = rx_ring->rxbufs[last_idx].dma_addr;
-       rx_ring->rxbufs[wr_idx].skb = rx_ring->rxbufs[last_idx].skb;
+       rx_ring->rxbufs[wr_idx].frag = rx_ring->rxbufs[last_idx].frag;
        rx_ring->rxbufs[last_idx].dma_addr = 0;
-       rx_ring->rxbufs[last_idx].skb = NULL;
+       rx_ring->rxbufs[last_idx].frag = NULL;
 
        memset(rx_ring->rxds, 0, sizeof(*rx_ring->rxds) * rx_ring->cnt);
        rx_ring->wr_p = 0;
@@ -1170,15 +1247,17 @@ static void nfp_net_rx_ring_reset(struct nfp_net_rx_ring *rx_ring)
  * nfp_net_rx_ring_bufs_free() - Free any buffers currently on the RX ring
  * @nn:                NFP Net device
  * @rx_ring:   RX ring to remove buffers from
+ * @xdp:       Whether XDP is enabled
  *
  * Assumes that the device is stopped and buffers are in [0, ring->cnt - 1)
  * entries.  After device is disabled nfp_net_rx_ring_reset() must be called
  * to restore required ring geometry.
  */
 static void
-nfp_net_rx_ring_bufs_free(struct nfp_net *nn, struct nfp_net_rx_ring *rx_ring)
+nfp_net_rx_ring_bufs_free(struct nfp_net *nn, struct nfp_net_rx_ring *rx_ring,
+                         bool xdp)
 {
-       struct pci_dev *pdev = nn->pdev;
+       int direction = xdp ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
        unsigned int i;
 
        for (i = 0; i < rx_ring->cnt - 1; i++) {
@@ -1186,14 +1265,14 @@ nfp_net_rx_ring_bufs_free(struct nfp_net *nn, struct nfp_net_rx_ring *rx_ring)
                 * fails to allocate enough buffers and calls here to free
                 * already allocated ones.
                 */
-               if (!rx_ring->rxbufs[i].skb)
+               if (!rx_ring->rxbufs[i].frag)
                        continue;
 
-               dma_unmap_single(&pdev->dev, rx_ring->rxbufs[i].dma_addr,
-                                rx_ring->bufsz, DMA_FROM_DEVICE);
-               dev_kfree_skb_any(rx_ring->rxbufs[i].skb);
+               nfp_net_dma_unmap_rx(nn, rx_ring->rxbufs[i].dma_addr,
+                                    rx_ring->bufsz, direction);
+               nfp_net_free_frag(rx_ring->rxbufs[i].frag, xdp);
                rx_ring->rxbufs[i].dma_addr = 0;
-               rx_ring->rxbufs[i].skb = NULL;
+               rx_ring->rxbufs[i].frag = NULL;
        }
 }
 
@@ -1201,9 +1280,11 @@ nfp_net_rx_ring_bufs_free(struct nfp_net *nn, struct nfp_net_rx_ring *rx_ring)
  * nfp_net_rx_ring_bufs_alloc() - Fill RX ring with buffers (don't give to FW)
  * @nn:                NFP Net device
  * @rx_ring:   RX ring to remove buffers from
+ * @xdp:       Whether XDP is enabled
  */
 static int
-nfp_net_rx_ring_bufs_alloc(struct nfp_net *nn, struct nfp_net_rx_ring *rx_ring)
+nfp_net_rx_ring_bufs_alloc(struct nfp_net *nn, struct nfp_net_rx_ring *rx_ring,
+                          bool xdp)
 {
        struct nfp_net_rx_buf *rxbufs;
        unsigned int i;
@@ -1211,11 +1292,11 @@ nfp_net_rx_ring_bufs_alloc(struct nfp_net *nn, struct nfp_net_rx_ring *rx_ring)
        rxbufs = rx_ring->rxbufs;
 
        for (i = 0; i < rx_ring->cnt - 1; i++) {
-               rxbufs[i].skb =
+               rxbufs[i].frag =
                        nfp_net_rx_alloc_one(rx_ring, &rxbufs[i].dma_addr,
-                                            rx_ring->bufsz);
-               if (!rxbufs[i].skb) {
-                       nfp_net_rx_ring_bufs_free(nn, rx_ring);
+                                            rx_ring->bufsz, xdp);
+               if (!rxbufs[i].frag) {
+                       nfp_net_rx_ring_bufs_free(nn, rx_ring, xdp);
                        return -ENOMEM;
                }
        }
@@ -1232,7 +1313,7 @@ static void nfp_net_rx_ring_fill_freelist(struct nfp_net_rx_ring *rx_ring)
        unsigned int i;
 
        for (i = 0; i < rx_ring->cnt - 1; i++)
-               nfp_net_rx_give_one(rx_ring, rx_ring->rxbufs[i].skb,
+               nfp_net_rx_give_one(rx_ring, rx_ring->rxbufs[i].frag,
                                    rx_ring->rxbufs[i].dma_addr);
 }
 
@@ -1359,6 +1440,87 @@ nfp_net_parse_meta(struct net_device *netdev, struct sk_buff *skb,
        return data;
 }
 
+static void
+nfp_net_rx_drop(struct nfp_net_r_vector *r_vec, struct nfp_net_rx_ring *rx_ring,
+               struct nfp_net_rx_buf *rxbuf, struct sk_buff *skb)
+{
+       u64_stats_update_begin(&r_vec->rx_sync);
+       r_vec->rx_drops++;
+       u64_stats_update_end(&r_vec->rx_sync);
+
+       /* skb is build based on the frag, free_skb() would free the frag
+        * so to be able to reuse it we need an extra ref.
+        */
+       if (skb && rxbuf && skb->head == rxbuf->frag)
+               page_ref_inc(virt_to_head_page(rxbuf->frag));
+       if (rxbuf)
+               nfp_net_rx_give_one(rx_ring, rxbuf->frag, rxbuf->dma_addr);
+       if (skb)
+               dev_kfree_skb_any(skb);
+}
+
+static void
+nfp_net_tx_xdp_buf(struct nfp_net *nn, struct nfp_net_rx_ring *rx_ring,
+                  struct nfp_net_tx_ring *tx_ring,
+                  struct nfp_net_rx_buf *rxbuf, unsigned int pkt_off,
+                  unsigned int pkt_len)
+{
+       struct nfp_net_tx_buf *txbuf;
+       struct nfp_net_tx_desc *txd;
+       dma_addr_t new_dma_addr;
+       void *new_frag;
+       int wr_idx;
+
+       if (unlikely(nfp_net_tx_full(tx_ring, 1))) {
+               nfp_net_rx_drop(rx_ring->r_vec, rx_ring, rxbuf, NULL);
+               return;
+       }
+
+       new_frag = nfp_net_napi_alloc_one(nn, DMA_BIDIRECTIONAL, &new_dma_addr);
+       if (unlikely(!new_frag)) {
+               nfp_net_rx_drop(rx_ring->r_vec, rx_ring, rxbuf, NULL);
+               return;
+       }
+       nfp_net_rx_give_one(rx_ring, new_frag, new_dma_addr);
+
+       wr_idx = tx_ring->wr_p & (tx_ring->cnt - 1);
+
+       /* Stash the soft descriptor of the head then initialize it */
+       txbuf = &tx_ring->txbufs[wr_idx];
+       txbuf->frag = rxbuf->frag;
+       txbuf->dma_addr = rxbuf->dma_addr;
+       txbuf->fidx = -1;
+       txbuf->pkt_cnt = 1;
+       txbuf->real_len = pkt_len;
+
+       dma_sync_single_for_device(&nn->pdev->dev, rxbuf->dma_addr + pkt_off,
+                                  pkt_len, DMA_TO_DEVICE);
+
+       /* Build TX descriptor */
+       txd = &tx_ring->txds[wr_idx];
+       txd->offset_eop = PCIE_DESC_TX_EOP;
+       txd->dma_len = cpu_to_le16(pkt_len);
+       nfp_desc_set_dma_addr(txd, rxbuf->dma_addr + pkt_off);
+       txd->data_len = cpu_to_le16(pkt_len);
+
+       txd->flags = 0;
+       txd->mss = 0;
+       txd->l4_offset = 0;
+
+       tx_ring->wr_p++;
+       tx_ring->wr_ptr_add++;
+}
+
+static int nfp_net_run_xdp(struct bpf_prog *prog, void *data, unsigned int len)
+{
+       struct xdp_buff xdp;
+
+       xdp.data = data;
+       xdp.data_end = data + len;
+
+       return bpf_prog_run_xdp(prog, &xdp);
+}
+
 /**
  * nfp_net_rx() - receive up to @budget packets on @rx_ring
  * @rx_ring:   RX ring to receive from
@@ -1368,62 +1530,39 @@ nfp_net_parse_meta(struct net_device *netdev, struct sk_buff *skb,
  * more cleanly separate packet receive code from other bookkeeping
  * functions performed in the napi poll function.
  *
- * There are differences between the NFP-3200 firmware and the
- * NFP-6000 firmware.  The NFP-3200 firmware uses a dedicated RX queue
- * to indicate that new packets have arrived.  The NFP-6000 does not
- * have this queue and uses the DD bit in the RX descriptor. This
- * method cannot be used on the NFP-3200 as it causes a race
- * condition: The RX ring write pointer on the NFP-3200 is updated
- * after packets (and descriptors) have been DMAed.  If the DD bit is
- * used and subsequently the read pointer is updated this may lead to
- * the RX queue to underflow (if the firmware has not yet update the
- * write pointer).  Therefore we use slightly ugly conditional code
- * below to handle the differences.  We may, in the future update the
- * NFP-3200 firmware to behave the same as the firmware on the
- * NFP-6000.
- *
  * Return: Number of packets received.
  */
 static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
 {
        struct nfp_net_r_vector *r_vec = rx_ring->r_vec;
        struct nfp_net *nn = r_vec->nfp_net;
-       unsigned int data_len, meta_len;
-       int avail = 0, pkts_polled = 0;
-       struct sk_buff *skb, *new_skb;
-       struct nfp_net_rx_desc *rxd;
-       dma_addr_t new_dma_addr;
-       u32 qcp_wr_p;
+       struct nfp_net_tx_ring *tx_ring;
+       struct bpf_prog *xdp_prog;
+       unsigned int true_bufsz;
+       struct sk_buff *skb;
+       int pkts_polled = 0;
+       int rx_dma_map_dir;
        int idx;
 
-       if (nn->is_nfp3200) {
-               /* Work out how many packets arrived */
-               qcp_wr_p = nfp_qcp_wr_ptr_read(rx_ring->qcp_rx);
-               idx = rx_ring->rd_p % rx_ring->cnt;
+       rcu_read_lock();
+       xdp_prog = READ_ONCE(nn->xdp_prog);
+       rx_dma_map_dir = xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
+       true_bufsz = xdp_prog ? PAGE_SIZE : nn->fl_bufsz;
+       tx_ring = r_vec->xdp_ring;
 
-               if (qcp_wr_p == idx)
-                       /* No new packets */
-                       return 0;
+       while (pkts_polled < budget) {
+               unsigned int meta_len, data_len, data_off, pkt_len, pkt_off;
+               struct nfp_net_rx_buf *rxbuf;
+               struct nfp_net_rx_desc *rxd;
+               dma_addr_t new_dma_addr;
+               void *new_frag;
 
-               if (qcp_wr_p > idx)
-                       avail = qcp_wr_p - idx;
-               else
-                       avail = qcp_wr_p + rx_ring->cnt - idx;
-       } else {
-               avail = budget + 1;
-       }
-
-       while (avail > 0 && pkts_polled < budget) {
-               idx = rx_ring->rd_p % rx_ring->cnt;
+               idx = rx_ring->rd_p & (rx_ring->cnt - 1);
 
                rxd = &rx_ring->rxds[idx];
-               if (!(rxd->rxd.meta_len_dd & PCIE_DESC_RX_DD)) {
-                       if (nn->is_nfp3200)
-                               nn_dbg(nn, "RX descriptor not valid (DD)%d:%u rxd[0]=%#x rxd[1]=%#x\n",
-                                      rx_ring->idx, idx,
-                                      rxd->vals[0], rxd->vals[1]);
+               if (!(rxd->rxd.meta_len_dd & PCIE_DESC_RX_DD))
                        break;
-               }
+
                /* Memory barrier to ensure that we won't do other reads
                 * before the DD bit.
                 */
@@ -1431,27 +1570,8 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
 
                rx_ring->rd_p++;
                pkts_polled++;
-               avail--;
-
-               skb = rx_ring->rxbufs[idx].skb;
-
-               new_skb = nfp_net_rx_alloc_one(rx_ring, &new_dma_addr,
-                                              nn->fl_bufsz);
-               if (!new_skb) {
-                       nfp_net_rx_give_one(rx_ring, rx_ring->rxbufs[idx].skb,
-                                           rx_ring->rxbufs[idx].dma_addr);
-                       u64_stats_update_begin(&r_vec->rx_sync);
-                       r_vec->rx_drops++;
-                       u64_stats_update_end(&r_vec->rx_sync);
-                       continue;
-               }
-
-               dma_unmap_single(&nn->pdev->dev,
-                                rx_ring->rxbufs[idx].dma_addr,
-                                nn->fl_bufsz, DMA_FROM_DEVICE);
-
-               nfp_net_rx_give_one(rx_ring, new_skb, new_dma_addr);
 
+               rxbuf = &rx_ring->rxbufs[idx];
                /*         < meta_len >
                 *  <-- [rx_offset] -->
                 *  ---------------------------------------------------------
@@ -1466,19 +1586,66 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
                 */
                meta_len = rxd->rxd.meta_len_dd & PCIE_DESC_RX_META_LEN_MASK;
                data_len = le16_to_cpu(rxd->rxd.data_len);
+               pkt_len = data_len - meta_len;
 
                if (nn->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC)
-                       skb_reserve(skb, meta_len);
+                       pkt_off = meta_len;
                else
-                       skb_reserve(skb, nn->rx_offset);
-               skb_put(skb, data_len - meta_len);
+                       pkt_off = nn->rx_offset;
+               data_off = NFP_NET_RX_BUF_HEADROOM + pkt_off;
 
                /* Stats update */
                u64_stats_update_begin(&r_vec->rx_sync);
                r_vec->rx_pkts++;
-               r_vec->rx_bytes += skb->len;
+               r_vec->rx_bytes += pkt_len;
                u64_stats_update_end(&r_vec->rx_sync);
 
+               if (xdp_prog && !(rxd->rxd.flags & PCIE_DESC_RX_BPF &&
+                                 nn->bpf_offload_xdp)) {
+                       int act;
+
+                       dma_sync_single_for_cpu(&nn->pdev->dev,
+                                               rxbuf->dma_addr + pkt_off,
+                                               pkt_len, DMA_FROM_DEVICE);
+                       act = nfp_net_run_xdp(xdp_prog, rxbuf->frag + data_off,
+                                             pkt_len);
+                       switch (act) {
+                       case XDP_PASS:
+                               break;
+                       case XDP_TX:
+                               nfp_net_tx_xdp_buf(nn, rx_ring, tx_ring, rxbuf,
+                                                  pkt_off, pkt_len);
+                               continue;
+                       default:
+                               bpf_warn_invalid_xdp_action(act);
+                       case XDP_ABORTED:
+                       case XDP_DROP:
+                               nfp_net_rx_give_one(rx_ring, rxbuf->frag,
+                                                   rxbuf->dma_addr);
+                               continue;
+                       }
+               }
+
+               skb = build_skb(rxbuf->frag, true_bufsz);
+               if (unlikely(!skb)) {
+                       nfp_net_rx_drop(r_vec, rx_ring, rxbuf, NULL);
+                       continue;
+               }
+               new_frag = nfp_net_napi_alloc_one(nn, rx_dma_map_dir,
+                                                 &new_dma_addr);
+               if (unlikely(!new_frag)) {
+                       nfp_net_rx_drop(r_vec, rx_ring, rxbuf, skb);
+                       continue;
+               }
+
+               nfp_net_dma_unmap_rx(nn, rxbuf->dma_addr, nn->fl_bufsz,
+                                    rx_dma_map_dir);
+
+               nfp_net_rx_give_one(rx_ring, new_frag, new_dma_addr);
+
+               skb_reserve(skb, data_off);
+               skb_put(skb, pkt_len);
+
                if (nn->fw_ver.major <= 3) {
                        nfp_net_set_hash_desc(nn->netdev, skb, rxd);
                } else if (meta_len) {
@@ -1486,12 +1653,8 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
 
                        end = nfp_net_parse_meta(nn->netdev, skb, meta_len);
                        if (unlikely(end != skb->data)) {
-                               u64_stats_update_begin(&r_vec->rx_sync);
-                               r_vec->rx_drops++;
-                               u64_stats_update_end(&r_vec->rx_sync);
-
-                               dev_kfree_skb_any(skb);
                                nn_warn_ratelimit(nn, "invalid RX packet metadata\n");
+                               nfp_net_rx_drop(r_vec, rx_ring, NULL, skb);
                                continue;
                        }
                }
@@ -1508,8 +1671,9 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
                napi_gro_receive(&rx_ring->r_vec->napi, skb);
        }
 
-       if (nn->is_nfp3200)
-               nfp_qcp_rd_ptr_add(rx_ring->qcp_rx, pkts_polled);
+       if (xdp_prog && tx_ring->wr_ptr_add)
+               nfp_net_tx_xmit_more_flush(tx_ring);
+       rcu_read_unlock();
 
        return pkts_polled;
 }
@@ -1525,21 +1689,19 @@ static int nfp_net_poll(struct napi_struct *napi, int budget)
 {
        struct nfp_net_r_vector *r_vec =
                container_of(napi, struct nfp_net_r_vector, napi);
-       struct nfp_net_rx_ring *rx_ring = r_vec->rx_ring;
-       struct nfp_net_tx_ring *tx_ring = r_vec->tx_ring;
-       struct nfp_net *nn = r_vec->nfp_net;
-       struct netdev_queue *txq;
-       unsigned int pkts_polled;
-
-       tx_ring = &nn->tx_rings[rx_ring->idx];
-       txq = netdev_get_tx_queue(nn->netdev, tx_ring->idx);
-       nfp_net_tx_complete(tx_ring);
+       unsigned int pkts_polled = 0;
 
-       pkts_polled = nfp_net_rx(rx_ring, budget);
+       if (r_vec->tx_ring)
+               nfp_net_tx_complete(r_vec->tx_ring);
+       if (r_vec->rx_ring) {
+               pkts_polled = nfp_net_rx(r_vec->rx_ring, budget);
+               if (r_vec->xdp_ring)
+                       nfp_net_xdp_complete(r_vec->xdp_ring);
+       }
 
        if (pkts_polled < budget) {
                napi_complete_done(napi, pkts_polled);
-               nfp_net_irq_unmask(nn, r_vec->irq_idx);
+               nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_idx);
        }
 
        return pkts_polled;
@@ -1575,10 +1737,12 @@ static void nfp_net_tx_ring_free(struct nfp_net_tx_ring *tx_ring)
  * nfp_net_tx_ring_alloc() - Allocate resource for a TX ring
  * @tx_ring:   TX Ring structure to allocate
  * @cnt:       Ring buffer count
+ * @is_xdp:    True if ring will be used for XDP
  *
  * Return: 0 on success, negative errno otherwise.
  */
-static int nfp_net_tx_ring_alloc(struct nfp_net_tx_ring *tx_ring, u32 cnt)
+static int
+nfp_net_tx_ring_alloc(struct nfp_net_tx_ring *tx_ring, u32 cnt, bool is_xdp)
 {
        struct nfp_net_r_vector *r_vec = tx_ring->r_vec;
        struct nfp_net *nn = r_vec->nfp_net;
@@ -1598,11 +1762,14 @@ static int nfp_net_tx_ring_alloc(struct nfp_net_tx_ring *tx_ring, u32 cnt)
        if (!tx_ring->txbufs)
                goto err_alloc;
 
-       netif_set_xps_queue(nn->netdev, &r_vec->affinity_mask, tx_ring->idx);
+       if (!is_xdp)
+               netif_set_xps_queue(nn->netdev, &r_vec->affinity_mask,
+                                   tx_ring->idx);
 
-       nn_dbg(nn, "TxQ%02d: QCidx=%02d cnt=%d dma=%#llx host=%p\n",
+       nn_dbg(nn, "TxQ%02d: QCidx=%02d cnt=%d dma=%#llx host=%p %s\n",
               tx_ring->idx, tx_ring->qcidx,
-              tx_ring->cnt, (unsigned long long)tx_ring->dma, tx_ring->txds);
+              tx_ring->cnt, (unsigned long long)tx_ring->dma, tx_ring->txds,
+              is_xdp ? "XDP" : "");
 
        return 0;
 
@@ -1612,23 +1779,29 @@ err_alloc:
 }
 
 static struct nfp_net_tx_ring *
-nfp_net_shadow_tx_rings_prepare(struct nfp_net *nn, u32 buf_cnt)
+nfp_net_tx_ring_set_prepare(struct nfp_net *nn, struct nfp_net_ring_set *s,
+                           unsigned int num_stack_tx_rings)
 {
        struct nfp_net_tx_ring *rings;
        unsigned int r;
 
-       rings = kcalloc(nn->num_tx_rings, sizeof(*rings), GFP_KERNEL);
+       rings = kcalloc(s->n_rings, sizeof(*rings), GFP_KERNEL);
        if (!rings)
                return NULL;
 
-       for (r = 0; r < nn->num_tx_rings; r++) {
-               nfp_net_tx_ring_init(&rings[r], nn->tx_rings[r].r_vec, r);
+       for (r = 0; r < s->n_rings; r++) {
+               int bias = 0;
+
+               if (r >= num_stack_tx_rings)
+                       bias = num_stack_tx_rings;
 
-               if (nfp_net_tx_ring_alloc(&rings[r], buf_cnt))
+               nfp_net_tx_ring_init(&rings[r], &nn->r_vecs[r - bias], r);
+
+               if (nfp_net_tx_ring_alloc(&rings[r], s->dcnt, bias))
                        goto err_free_prev;
        }
 
-       return rings;
+       return s->rings = rings;
 
 err_free_prev:
        while (r--)
@@ -1637,28 +1810,27 @@ err_free_prev:
        return NULL;
 }
 
-static struct nfp_net_tx_ring *
-nfp_net_shadow_tx_rings_swap(struct nfp_net *nn, struct nfp_net_tx_ring *rings)
+static void
+nfp_net_tx_ring_set_swap(struct nfp_net *nn, struct nfp_net_ring_set *s)
 {
-       struct nfp_net_tx_ring *old = nn->tx_rings;
-       unsigned int r;
+       struct nfp_net_ring_set new = *s;
 
-       for (r = 0; r < nn->num_tx_rings; r++)
-               old[r].r_vec->tx_ring = &rings[r];
+       s->dcnt = nn->txd_cnt;
+       s->rings = nn->tx_rings;
+       s->n_rings = nn->num_tx_rings;
 
-       nn->tx_rings = rings;
-       return old;
+       nn->txd_cnt = new.dcnt;
+       nn->tx_rings = new.rings;
+       nn->num_tx_rings = new.n_rings;
 }
 
 static void
-nfp_net_shadow_tx_rings_free(struct nfp_net *nn, struct nfp_net_tx_ring *rings)
+nfp_net_tx_ring_set_free(struct nfp_net *nn, struct nfp_net_ring_set *s)
 {
+       struct nfp_net_tx_ring *rings = s->rings;
        unsigned int r;
 
-       if (!rings)
-               return;
-
-       for (r = 0; r < nn->num_tx_rings; r++)
+       for (r = 0; r < s->n_rings; r++)
                nfp_net_tx_ring_free(&rings[r]);
 
        kfree(rings);
@@ -1730,31 +1902,32 @@ err_alloc:
 }
 
 static struct nfp_net_rx_ring *
-nfp_net_shadow_rx_rings_prepare(struct nfp_net *nn, unsigned int fl_bufsz,
-                               u32 buf_cnt)
+nfp_net_rx_ring_set_prepare(struct nfp_net *nn, struct nfp_net_ring_set *s,
+                           bool xdp)
 {
+       unsigned int fl_bufsz = nfp_net_calc_fl_bufsz(nn, s->mtu);
        struct nfp_net_rx_ring *rings;
        unsigned int r;
 
-       rings = kcalloc(nn->num_rx_rings, sizeof(*rings), GFP_KERNEL);
+       rings = kcalloc(s->n_rings, sizeof(*rings), GFP_KERNEL);
        if (!rings)
                return NULL;
 
-       for (r = 0; r < nn->num_rx_rings; r++) {
-               nfp_net_rx_ring_init(&rings[r], nn->rx_rings[r].r_vec, r);
+       for (r = 0; r < s->n_rings; r++) {
+               nfp_net_rx_ring_init(&rings[r], &nn->r_vecs[r], r);
 
-               if (nfp_net_rx_ring_alloc(&rings[r], fl_bufsz, buf_cnt))
+               if (nfp_net_rx_ring_alloc(&rings[r], fl_bufsz, s->dcnt))
                        goto err_free_prev;
 
-               if (nfp_net_rx_ring_bufs_alloc(nn, &rings[r]))
+               if (nfp_net_rx_ring_bufs_alloc(nn, &rings[r], xdp))
                        goto err_free_ring;
        }
 
-       return rings;
+       return s->rings = rings;
 
 err_free_prev:
        while (r--) {
-               nfp_net_rx_ring_bufs_free(nn, &rings[r]);
+               nfp_net_rx_ring_bufs_free(nn, &rings[r], xdp);
 err_free_ring:
                nfp_net_rx_ring_free(&rings[r]);
        }
@@ -1762,35 +1935,50 @@ err_free_ring:
        return NULL;
 }
 
-static struct nfp_net_rx_ring *
-nfp_net_shadow_rx_rings_swap(struct nfp_net *nn, struct nfp_net_rx_ring *rings)
+static void
+nfp_net_rx_ring_set_swap(struct nfp_net *nn, struct nfp_net_ring_set *s)
 {
-       struct nfp_net_rx_ring *old = nn->rx_rings;
-       unsigned int r;
+       struct nfp_net_ring_set new = *s;
 
-       for (r = 0; r < nn->num_rx_rings; r++)
-               old[r].r_vec->rx_ring = &rings[r];
+       s->mtu = nn->netdev->mtu;
+       s->dcnt = nn->rxd_cnt;
+       s->rings = nn->rx_rings;
+       s->n_rings = nn->num_rx_rings;
 
-       nn->rx_rings = rings;
-       return old;
+       nn->netdev->mtu = new.mtu;
+       nn->fl_bufsz = nfp_net_calc_fl_bufsz(nn, new.mtu);
+       nn->rxd_cnt = new.dcnt;
+       nn->rx_rings = new.rings;
+       nn->num_rx_rings = new.n_rings;
 }
 
 static void
-nfp_net_shadow_rx_rings_free(struct nfp_net *nn, struct nfp_net_rx_ring *rings)
+nfp_net_rx_ring_set_free(struct nfp_net *nn, struct nfp_net_ring_set *s,
+                        bool xdp)
 {
+       struct nfp_net_rx_ring *rings = s->rings;
        unsigned int r;
 
-       if (!rings)
-               return;
-
-       for (r = 0; r < nn->num_r_vecs; r++) {
-               nfp_net_rx_ring_bufs_free(nn, &rings[r]);
+       for (r = 0; r < s->n_rings; r++) {
+               nfp_net_rx_ring_bufs_free(nn, &rings[r], xdp);
                nfp_net_rx_ring_free(&rings[r]);
        }
 
        kfree(rings);
 }
 
+static void
+nfp_net_vector_assign_rings(struct nfp_net *nn, struct nfp_net_r_vector *r_vec,
+                           int idx)
+{
+       r_vec->rx_ring = idx < nn->num_rx_rings ? &nn->rx_rings[idx] : NULL;
+       r_vec->tx_ring =
+               idx < nn->num_stack_tx_rings ? &nn->tx_rings[idx] : NULL;
+
+       r_vec->xdp_ring = idx < nn->num_tx_rings - nn->num_stack_tx_rings ?
+               &nn->tx_rings[nn->num_stack_tx_rings + idx] : NULL;
+}
+
 static int
 nfp_net_prepare_vector(struct nfp_net *nn, struct nfp_net_r_vector *r_vec,
                       int idx)
@@ -1798,25 +1986,20 @@ nfp_net_prepare_vector(struct nfp_net *nn, struct nfp_net_r_vector *r_vec,
        struct msix_entry *entry = &nn->irq_entries[r_vec->irq_idx];
        int err;
 
-       r_vec->tx_ring = &nn->tx_rings[idx];
-       nfp_net_tx_ring_init(r_vec->tx_ring, r_vec, idx);
-
-       r_vec->rx_ring = &nn->rx_rings[idx];
-       nfp_net_rx_ring_init(r_vec->rx_ring, r_vec, idx);
+       /* Setup NAPI */
+       netif_napi_add(nn->netdev, &r_vec->napi,
+                      nfp_net_poll, NAPI_POLL_WEIGHT);
 
        snprintf(r_vec->name, sizeof(r_vec->name),
                 "%s-rxtx-%d", nn->netdev->name, idx);
        err = request_irq(entry->vector, r_vec->handler, 0, r_vec->name, r_vec);
        if (err) {
+               netif_napi_del(&r_vec->napi);
                nn_err(nn, "Error requesting IRQ %d\n", entry->vector);
                return err;
        }
        disable_irq(entry->vector);
 
-       /* Setup NAPI */
-       netif_napi_add(nn->netdev, &r_vec->napi,
-                      nfp_net_poll, NAPI_POLL_WEIGHT);
-
        irq_set_affinity_hint(entry->vector, &r_vec->affinity_mask);
 
        nn_dbg(nn, "RV%02d: irq=%03d/%03d\n", idx, entry->vector, entry->entry);
@@ -1879,13 +2062,13 @@ void nfp_net_coalesce_write_cfg(struct nfp_net *nn)
        /* copy RX interrupt coalesce parameters */
        value = (nn->rx_coalesce_max_frames << 16) |
                (factor * nn->rx_coalesce_usecs);
-       for (i = 0; i < nn->num_r_vecs; i++)
+       for (i = 0; i < nn->num_rx_rings; i++)
                nn_writel(nn, NFP_NET_CFG_RXR_IRQ_MOD(i), value);
 
        /* copy TX interrupt coalesce parameters */
        value = (nn->tx_coalesce_max_frames << 16) |
                (factor * nn->tx_coalesce_usecs);
-       for (i = 0; i < nn->num_r_vecs; i++)
+       for (i = 0; i < nn->num_tx_rings; i++)
                nn_writel(nn, NFP_NET_CFG_TXR_IRQ_MOD(i), value);
 }
 
@@ -1901,9 +2084,8 @@ static void nfp_net_write_mac_addr(struct nfp_net *nn)
 {
        nn_writel(nn, NFP_NET_CFG_MACADDR + 0,
                  get_unaligned_be32(nn->netdev->dev_addr));
-       /* We can't do writew for NFP-3200 compatibility */
-       nn_writel(nn, NFP_NET_CFG_MACADDR + 4,
-                 get_unaligned_be16(nn->netdev->dev_addr + 4) << 16);
+       nn_writew(nn, NFP_NET_CFG_MACADDR + 6,
+                 get_unaligned_be16(nn->netdev->dev_addr + 4));
 }
 
 static void nfp_net_vec_clear_ring_data(struct nfp_net *nn, unsigned int idx)
@@ -1944,27 +2126,33 @@ static void nfp_net_clear_config_and_disable(struct nfp_net *nn)
        if (err)
                nn_err(nn, "Could not disable device: %d\n", err);
 
-       for (r = 0; r < nn->num_r_vecs; r++) {
-               nfp_net_rx_ring_reset(nn->r_vecs[r].rx_ring);
-               nfp_net_tx_ring_reset(nn, nn->r_vecs[r].tx_ring);
+       for (r = 0; r < nn->num_rx_rings; r++)
+               nfp_net_rx_ring_reset(&nn->rx_rings[r]);
+       for (r = 0; r < nn->num_tx_rings; r++)
+               nfp_net_tx_ring_reset(nn, &nn->tx_rings[r]);
+       for (r = 0; r < nn->num_r_vecs; r++)
                nfp_net_vec_clear_ring_data(nn, r);
-       }
 
        nn->ctrl = new_ctrl;
 }
 
 static void
-nfp_net_vec_write_ring_data(struct nfp_net *nn, struct nfp_net_r_vector *r_vec,
-                           unsigned int idx)
+nfp_net_rx_ring_hw_cfg_write(struct nfp_net *nn,
+                            struct nfp_net_rx_ring *rx_ring, unsigned int idx)
 {
        /* Write the DMA address, size and MSI-X info to the device */
-       nn_writeq(nn, NFP_NET_CFG_RXR_ADDR(idx), r_vec->rx_ring->dma);
-       nn_writeb(nn, NFP_NET_CFG_RXR_SZ(idx), ilog2(r_vec->rx_ring->cnt));
-       nn_writeb(nn, NFP_NET_CFG_RXR_VEC(idx), r_vec->irq_idx);
+       nn_writeq(nn, NFP_NET_CFG_RXR_ADDR(idx), rx_ring->dma);
+       nn_writeb(nn, NFP_NET_CFG_RXR_SZ(idx), ilog2(rx_ring->cnt));
+       nn_writeb(nn, NFP_NET_CFG_RXR_VEC(idx), rx_ring->r_vec->irq_idx);
+}
 
-       nn_writeq(nn, NFP_NET_CFG_TXR_ADDR(idx), r_vec->tx_ring->dma);
-       nn_writeb(nn, NFP_NET_CFG_TXR_SZ(idx), ilog2(r_vec->tx_ring->cnt));
-       nn_writeb(nn, NFP_NET_CFG_TXR_VEC(idx), r_vec->irq_idx);
+static void
+nfp_net_tx_ring_hw_cfg_write(struct nfp_net *nn,
+                            struct nfp_net_tx_ring *tx_ring, unsigned int idx)
+{
+       nn_writeq(nn, NFP_NET_CFG_TXR_ADDR(idx), tx_ring->dma);
+       nn_writeb(nn, NFP_NET_CFG_TXR_SZ(idx), ilog2(tx_ring->cnt));
+       nn_writeb(nn, NFP_NET_CFG_TXR_VEC(idx), tx_ring->r_vec->irq_idx);
 }
 
 static int __nfp_net_set_config_and_enable(struct nfp_net *nn)
@@ -1989,8 +2177,10 @@ static int __nfp_net_set_config_and_enable(struct nfp_net *nn)
                update |= NFP_NET_CFG_UPDATE_IRQMOD;
        }
 
-       for (r = 0; r < nn->num_r_vecs; r++)
-               nfp_net_vec_write_ring_data(nn, &nn->r_vecs[r], r);
+       for (r = 0; r < nn->num_tx_rings; r++)
+               nfp_net_tx_ring_hw_cfg_write(nn, &nn->tx_rings[r], r);
+       for (r = 0; r < nn->num_rx_rings; r++)
+               nfp_net_rx_ring_hw_cfg_write(nn, &nn->rx_rings[r], r);
 
        nn_writeq(nn, NFP_NET_CFG_TXRS_ENABLE, nn->num_tx_rings == 64 ?
                  0xffffffffffffffffULL : ((u64)1 << nn->num_tx_rings) - 1);
@@ -2016,8 +2206,8 @@ static int __nfp_net_set_config_and_enable(struct nfp_net *nn)
 
        nn->ctrl = new_ctrl;
 
-       for (r = 0; r < nn->num_r_vecs; r++)
-               nfp_net_rx_ring_fill_freelist(nn->r_vecs[r].rx_ring);
+       for (r = 0; r < nn->num_rx_rings; r++)
+               nfp_net_rx_ring_fill_freelist(&nn->rx_rings[r]);
 
        /* Since reconfiguration requests while NFP is down are ignored we
         * have to wipe the entire VXLAN configuration and reinitialize it.
@@ -2068,6 +2258,15 @@ static void nfp_net_open_stack(struct nfp_net *nn)
 static int nfp_net_netdev_open(struct net_device *netdev)
 {
        struct nfp_net *nn = netdev_priv(netdev);
+       struct nfp_net_ring_set rx = {
+               .n_rings = nn->num_rx_rings,
+               .mtu = nn->netdev->mtu,
+               .dcnt = nn->rxd_cnt,
+       };
+       struct nfp_net_ring_set tx = {
+               .n_rings = nn->num_tx_rings,
+               .dcnt = nn->txd_cnt,
+       };
        int err, r;
 
        if (nn->ctrl & NFP_NET_CFG_CTRL_ENABLE) {
@@ -2092,39 +2291,29 @@ static int nfp_net_netdev_open(struct net_device *netdev)
                goto err_free_exn;
        disable_irq(nn->irq_entries[NFP_NET_IRQ_LSC_IDX].vector);
 
-       nn->rx_rings = kcalloc(nn->num_rx_rings, sizeof(*nn->rx_rings),
-                              GFP_KERNEL);
+       for (r = 0; r < nn->num_r_vecs; r++) {
+               err = nfp_net_prepare_vector(nn, &nn->r_vecs[r], r);
+               if (err)
+                       goto err_cleanup_vec_p;
+       }
+
+       nn->rx_rings = nfp_net_rx_ring_set_prepare(nn, &rx, nn->xdp_prog);
        if (!nn->rx_rings) {
                err = -ENOMEM;
-               goto err_free_lsc;
+               goto err_cleanup_vec;
        }
-       nn->tx_rings = kcalloc(nn->num_tx_rings, sizeof(*nn->tx_rings),
-                              GFP_KERNEL);
+
+       nn->tx_rings = nfp_net_tx_ring_set_prepare(nn, &tx,
+                                                  nn->num_stack_tx_rings);
        if (!nn->tx_rings) {
                err = -ENOMEM;
                goto err_free_rx_rings;
        }
 
-       for (r = 0; r < nn->num_r_vecs; r++) {
-               err = nfp_net_prepare_vector(nn, &nn->r_vecs[r], r);
-               if (err)
-                       goto err_free_prev_vecs;
-
-               err = nfp_net_tx_ring_alloc(nn->r_vecs[r].tx_ring, nn->txd_cnt);
-               if (err)
-                       goto err_cleanup_vec_p;
-
-               err = nfp_net_rx_ring_alloc(nn->r_vecs[r].rx_ring,
-                                           nn->fl_bufsz, nn->rxd_cnt);
-               if (err)
-                       goto err_free_tx_ring_p;
-
-               err = nfp_net_rx_ring_bufs_alloc(nn, nn->r_vecs[r].rx_ring);
-               if (err)
-                       goto err_flush_rx_ring_p;
-       }
+       for (r = 0; r < nn->max_r_vecs; r++)
+               nfp_net_vector_assign_rings(nn, &nn->r_vecs[r], r);
 
-       err = netif_set_real_num_tx_queues(netdev, nn->num_tx_rings);
+       err = netif_set_real_num_tx_queues(netdev, nn->num_stack_tx_rings);
        if (err)
                goto err_free_rings;
 
@@ -2154,21 +2343,14 @@ static int nfp_net_netdev_open(struct net_device *netdev)
        return 0;
 
 err_free_rings:
+       nfp_net_tx_ring_set_free(nn, &tx);
+err_free_rx_rings:
+       nfp_net_rx_ring_set_free(nn, &rx, nn->xdp_prog);
+err_cleanup_vec:
        r = nn->num_r_vecs;
-err_free_prev_vecs:
-       while (r--) {
-               nfp_net_rx_ring_bufs_free(nn, nn->r_vecs[r].rx_ring);
-err_flush_rx_ring_p:
-               nfp_net_rx_ring_free(nn->r_vecs[r].rx_ring);
-err_free_tx_ring_p:
-               nfp_net_tx_ring_free(nn->r_vecs[r].tx_ring);
 err_cleanup_vec_p:
+       while (r--)
                nfp_net_cleanup_vector(nn, &nn->r_vecs[r]);
-       }
-       kfree(nn->tx_rings);
-err_free_rx_rings:
-       kfree(nn->rx_rings);
-err_free_lsc:
        nfp_net_aux_irq_free(nn, NFP_NET_CFG_LSC, NFP_NET_IRQ_LSC_IDX);
 err_free_exn:
        nfp_net_aux_irq_free(nn, NFP_NET_CFG_EXN, NFP_NET_IRQ_EXN_IDX);
@@ -2203,12 +2385,14 @@ static void nfp_net_close_free_all(struct nfp_net *nn)
 {
        unsigned int r;
 
-       for (r = 0; r < nn->num_r_vecs; r++) {
-               nfp_net_rx_ring_bufs_free(nn, nn->r_vecs[r].rx_ring);
-               nfp_net_rx_ring_free(nn->r_vecs[r].rx_ring);
-               nfp_net_tx_ring_free(nn->r_vecs[r].tx_ring);
-               nfp_net_cleanup_vector(nn, &nn->r_vecs[r]);
+       for (r = 0; r < nn->num_rx_rings; r++) {
+               nfp_net_rx_ring_bufs_free(nn, &nn->rx_rings[r], nn->xdp_prog);
+               nfp_net_rx_ring_free(&nn->rx_rings[r]);
        }
+       for (r = 0; r < nn->num_tx_rings; r++)
+               nfp_net_tx_ring_free(&nn->tx_rings[r]);
+       for (r = 0; r < nn->num_r_vecs; r++)
+               nfp_net_cleanup_vector(nn, &nn->r_vecs[r]);
 
        kfree(nn->rx_rings);
        kfree(nn->tx_rings);
@@ -2271,89 +2455,135 @@ static void nfp_net_set_rx_mode(struct net_device *netdev)
        nn->ctrl = new_ctrl;
 }
 
-static int nfp_net_change_mtu(struct net_device *netdev, int new_mtu)
+static void nfp_net_rss_init_itbl(struct nfp_net *nn)
 {
-       unsigned int old_mtu, old_fl_bufsz, new_fl_bufsz;
-       struct nfp_net *nn = netdev_priv(netdev);
-       struct nfp_net_rx_ring *tmp_rings;
-       int err;
-
-       old_mtu = netdev->mtu;
-       old_fl_bufsz = nn->fl_bufsz;
-       new_fl_bufsz = NFP_NET_MAX_PREPEND + ETH_HLEN + VLAN_HLEN * 2 + new_mtu;
+       int i;
 
-       if (!netif_running(netdev)) {
-               netdev->mtu = new_mtu;
-               nn->fl_bufsz = new_fl_bufsz;
-               return 0;
-       }
+       for (i = 0; i < sizeof(nn->rss_itbl); i++)
+               nn->rss_itbl[i] =
+                       ethtool_rxfh_indir_default(i, nn->num_rx_rings);
+}
 
-       /* Prepare new rings */
-       tmp_rings = nfp_net_shadow_rx_rings_prepare(nn, new_fl_bufsz,
-                                                   nn->rxd_cnt);
-       if (!tmp_rings)
-               return -ENOMEM;
+static int
+nfp_net_ring_swap_enable(struct nfp_net *nn, unsigned int *num_vecs,
+                        unsigned int *stack_tx_rings,
+                        struct bpf_prog **xdp_prog,
+                        struct nfp_net_ring_set *rx,
+                        struct nfp_net_ring_set *tx)
+{
+       unsigned int r;
+       int err;
 
-       /* Stop device, swap in new rings, try to start the firmware */
-       nfp_net_close_stack(nn);
-       nfp_net_clear_config_and_disable(nn);
+       if (rx)
+               nfp_net_rx_ring_set_swap(nn, rx);
+       if (tx)
+               nfp_net_tx_ring_set_swap(nn, tx);
 
-       tmp_rings = nfp_net_shadow_rx_rings_swap(nn, tmp_rings);
+       swap(*num_vecs, nn->num_r_vecs);
+       swap(*stack_tx_rings, nn->num_stack_tx_rings);
+       *xdp_prog = xchg(&nn->xdp_prog, *xdp_prog);
 
-       netdev->mtu = new_mtu;
-       nn->fl_bufsz = new_fl_bufsz;
+       for (r = 0; r < nn->max_r_vecs; r++)
+               nfp_net_vector_assign_rings(nn, &nn->r_vecs[r], r);
 
-       err = nfp_net_set_config_and_enable(nn);
-       if (err) {
-               const int err_new = err;
+       if (!netif_is_rxfh_configured(nn->netdev))
+               nfp_net_rss_init_itbl(nn);
 
-               /* Try with old configuration and old rings */
-               tmp_rings = nfp_net_shadow_rx_rings_swap(nn, tmp_rings);
-
-               netdev->mtu = old_mtu;
-               nn->fl_bufsz = old_fl_bufsz;
+       err = netif_set_real_num_rx_queues(nn->netdev,
+                                          nn->num_rx_rings);
+       if (err)
+               return err;
 
-               err = __nfp_net_set_config_and_enable(nn);
+       if (nn->netdev->real_num_tx_queues != nn->num_stack_tx_rings) {
+               err = netif_set_real_num_tx_queues(nn->netdev,
+                                                  nn->num_stack_tx_rings);
                if (err)
-                       nn_err(nn, "Can't restore MTU - FW communication failed (%d,%d)\n",
-                              err_new, err);
+                       return err;
        }
 
-       nfp_net_shadow_rx_rings_free(nn, tmp_rings);
+       return __nfp_net_set_config_and_enable(nn);
+}
 
-       nfp_net_open_stack(nn);
+static int
+nfp_net_check_config(struct nfp_net *nn, struct bpf_prog *xdp_prog,
+                    struct nfp_net_ring_set *rx, struct nfp_net_ring_set *tx)
+{
+       /* XDP-enabled tests */
+       if (!xdp_prog)
+               return 0;
+       if (rx && nfp_net_calc_fl_bufsz(nn, rx->mtu) > PAGE_SIZE) {
+               nn_warn(nn, "MTU too large w/ XDP enabled\n");
+               return -EINVAL;
+       }
+       if (tx && tx->n_rings > nn->max_tx_rings) {
+               nn_warn(nn, "Insufficient number of TX rings w/ XDP enabled\n");
+               return -EINVAL;
+       }
 
-       return err;
+       return 0;
 }
 
-int nfp_net_set_ring_size(struct nfp_net *nn, u32 rxd_cnt, u32 txd_cnt)
-{
-       struct nfp_net_tx_ring *tx_rings = NULL;
-       struct nfp_net_rx_ring *rx_rings = NULL;
-       u32 old_rxd_cnt, old_txd_cnt;
+static void
+nfp_net_ring_reconfig_down(struct nfp_net *nn, struct bpf_prog **xdp_prog,
+                          struct nfp_net_ring_set *rx,
+                          struct nfp_net_ring_set *tx,
+                          unsigned int stack_tx_rings, unsigned int num_vecs)
+{
+       nn->netdev->mtu = rx ? rx->mtu : nn->netdev->mtu;
+       nn->fl_bufsz = nfp_net_calc_fl_bufsz(nn, nn->netdev->mtu);
+       nn->rxd_cnt = rx ? rx->dcnt : nn->rxd_cnt;
+       nn->txd_cnt = tx ? tx->dcnt : nn->txd_cnt;
+       nn->num_rx_rings = rx ? rx->n_rings : nn->num_rx_rings;
+       nn->num_tx_rings = tx ? tx->n_rings : nn->num_tx_rings;
+       nn->num_stack_tx_rings = stack_tx_rings;
+       nn->num_r_vecs = num_vecs;
+       *xdp_prog = xchg(&nn->xdp_prog, *xdp_prog);
+
+       if (!netif_is_rxfh_configured(nn->netdev))
+               nfp_net_rss_init_itbl(nn);
+}
+
+int
+nfp_net_ring_reconfig(struct nfp_net *nn, struct bpf_prog **xdp_prog,
+                     struct nfp_net_ring_set *rx, struct nfp_net_ring_set *tx)
+{
+       unsigned int stack_tx_rings, num_vecs, r;
        int err;
 
+       stack_tx_rings = tx ? tx->n_rings : nn->num_tx_rings;
+       if (*xdp_prog)
+               stack_tx_rings -= rx ? rx->n_rings : nn->num_rx_rings;
+
+       num_vecs = max(rx ? rx->n_rings : nn->num_rx_rings, stack_tx_rings);
+
+       err = nfp_net_check_config(nn, *xdp_prog, rx, tx);
+       if (err)
+               return err;
+
        if (!netif_running(nn->netdev)) {
-               nn->rxd_cnt = rxd_cnt;
-               nn->txd_cnt = txd_cnt;
+               nfp_net_ring_reconfig_down(nn, xdp_prog, rx, tx,
+                                          stack_tx_rings, num_vecs);
                return 0;
        }
 
-       old_rxd_cnt = nn->rxd_cnt;
-       old_txd_cnt = nn->txd_cnt;
-
        /* Prepare new rings */
-       if (nn->rxd_cnt != rxd_cnt) {
-               rx_rings = nfp_net_shadow_rx_rings_prepare(nn, nn->fl_bufsz,
-                                                          rxd_cnt);
-               if (!rx_rings)
-                       return -ENOMEM;
+       for (r = nn->num_r_vecs; r < num_vecs; r++) {
+               err = nfp_net_prepare_vector(nn, &nn->r_vecs[r], r);
+               if (err) {
+                       num_vecs = r;
+                       goto err_cleanup_vecs;
+               }
        }
-       if (nn->txd_cnt != txd_cnt) {
-               tx_rings = nfp_net_shadow_tx_rings_prepare(nn, txd_cnt);
-               if (!tx_rings) {
-                       nfp_net_shadow_rx_rings_free(nn, rx_rings);
-                       return -ENOMEM;
+       if (rx) {
+               if (!nfp_net_rx_ring_set_prepare(nn, rx, *xdp_prog)) {
+                       err = -ENOMEM;
+                       goto err_cleanup_vecs;
+               }
+       }
+       if (tx) {
+               if (!nfp_net_tx_ring_set_prepare(nn, tx, stack_tx_rings)) {
+                       err = -ENOMEM;
+                       goto err_free_rx;
                }
        }
 
@@ -2361,39 +2591,51 @@ int nfp_net_set_ring_size(struct nfp_net *nn, u32 rxd_cnt, u32 txd_cnt)
        nfp_net_close_stack(nn);
        nfp_net_clear_config_and_disable(nn);
 
-       if (rx_rings)
-               rx_rings = nfp_net_shadow_rx_rings_swap(nn, rx_rings);
-       if (tx_rings)
-               tx_rings = nfp_net_shadow_tx_rings_swap(nn, tx_rings);
-
-       nn->rxd_cnt = rxd_cnt;
-       nn->txd_cnt = txd_cnt;
-
-       err = nfp_net_set_config_and_enable(nn);
+       err = nfp_net_ring_swap_enable(nn, &num_vecs, &stack_tx_rings,
+                                      xdp_prog, rx, tx);
        if (err) {
-               const int err_new = err;
+               int err2;
 
-               /* Try with old configuration and old rings */
-               if (rx_rings)
-                       rx_rings = nfp_net_shadow_rx_rings_swap(nn, rx_rings);
-               if (tx_rings)
-                       tx_rings = nfp_net_shadow_tx_rings_swap(nn, tx_rings);
-
-               nn->rxd_cnt = old_rxd_cnt;
-               nn->txd_cnt = old_txd_cnt;
+               nfp_net_clear_config_and_disable(nn);
 
-               err = __nfp_net_set_config_and_enable(nn);
-               if (err)
+               /* Try with old configuration and old rings */
+               err2 = nfp_net_ring_swap_enable(nn, &num_vecs, &stack_tx_rings,
+                                               xdp_prog, rx, tx);
+               if (err2)
                        nn_err(nn, "Can't restore ring config - FW communication failed (%d,%d)\n",
-                              err_new, err);
+                              err, err2);
        }
+       for (r = num_vecs - 1; r >= nn->num_r_vecs; r--)
+               nfp_net_cleanup_vector(nn, &nn->r_vecs[r]);
 
-       nfp_net_shadow_rx_rings_free(nn, rx_rings);
-       nfp_net_shadow_tx_rings_free(nn, tx_rings);
+       if (rx)
+               nfp_net_rx_ring_set_free(nn, rx, *xdp_prog);
+       if (tx)
+               nfp_net_tx_ring_set_free(nn, tx);
 
        nfp_net_open_stack(nn);
 
        return err;
+
+err_free_rx:
+       if (rx)
+               nfp_net_rx_ring_set_free(nn, rx, *xdp_prog);
+err_cleanup_vecs:
+       for (r = num_vecs - 1; r >= nn->num_r_vecs; r--)
+               nfp_net_cleanup_vector(nn, &nn->r_vecs[r]);
+       return err;
+}
+
+static int nfp_net_change_mtu(struct net_device *netdev, int new_mtu)
+{
+       struct nfp_net *nn = netdev_priv(netdev);
+       struct nfp_net_ring_set rx = {
+               .n_rings = nn->num_rx_rings,
+               .mtu = new_mtu,
+               .dcnt = nn->rxd_cnt,
+       };
+
+       return nfp_net_ring_reconfig(nn, &nn->xdp_prog, &rx, NULL);
 }
 
 static struct rtnl_link_stats64 *nfp_net_stat64(struct net_device *netdev,
@@ -2450,8 +2692,12 @@ nfp_net_setup_tc(struct net_device *netdev, u32 handle, __be16 proto,
        if (proto != htons(ETH_P_ALL))
                return -ENOTSUPP;
 
-       if (tc->type == TC_SETUP_CLSBPF && nfp_net_ebpf_capable(nn))
-               return nfp_net_bpf_offload(nn, handle, proto, tc->cls_bpf);
+       if (tc->type == TC_SETUP_CLSBPF && nfp_net_ebpf_capable(nn)) {
+               if (!nn->bpf_offload_xdp)
+                       return nfp_net_bpf_offload(nn, tc->cls_bpf);
+               else
+                       return -EBUSY;
+       }
 
        return -EINVAL;
 }
@@ -2659,6 +2905,87 @@ static void nfp_net_del_vxlan_port(struct net_device *netdev,
                nfp_net_set_vxlan_port(nn, idx, 0);
 }
 
+static int nfp_net_xdp_offload(struct nfp_net *nn, struct bpf_prog *prog)
+{
+       struct tc_cls_bpf_offload cmd = {
+               .prog = prog,
+       };
+       int ret;
+
+       if (!nfp_net_ebpf_capable(nn))
+               return -EINVAL;
+
+       if (nn->ctrl & NFP_NET_CFG_CTRL_BPF) {
+               if (!nn->bpf_offload_xdp)
+                       return prog ? -EBUSY : 0;
+               cmd.command = prog ? TC_CLSBPF_REPLACE : TC_CLSBPF_DESTROY;
+       } else {
+               if (!prog)
+                       return 0;
+               cmd.command = TC_CLSBPF_ADD;
+       }
+
+       ret = nfp_net_bpf_offload(nn, &cmd);
+       /* Stop offload if replace not possible */
+       if (ret && cmd.command == TC_CLSBPF_REPLACE)
+               nfp_net_xdp_offload(nn, NULL);
+       nn->bpf_offload_xdp = prog && !ret;
+       return ret;
+}
+
+static int nfp_net_xdp_setup(struct nfp_net *nn, struct bpf_prog *prog)
+{
+       struct nfp_net_ring_set rx = {
+               .n_rings = nn->num_rx_rings,
+               .mtu = nn->netdev->mtu,
+               .dcnt = nn->rxd_cnt,
+       };
+       struct nfp_net_ring_set tx = {
+               .n_rings = nn->num_tx_rings,
+               .dcnt = nn->txd_cnt,
+       };
+       int err;
+
+       if (!prog && !nn->xdp_prog)
+               return 0;
+       if (prog && nn->xdp_prog) {
+               prog = xchg(&nn->xdp_prog, prog);
+               bpf_prog_put(prog);
+               nfp_net_xdp_offload(nn, nn->xdp_prog);
+               return 0;
+       }
+
+       tx.n_rings += prog ? nn->num_rx_rings : -nn->num_rx_rings;
+
+       /* We need RX reconfig to remap the buffers (BIDIR vs FROM_DEV) */
+       err = nfp_net_ring_reconfig(nn, &prog, &rx, &tx);
+       if (err)
+               return err;
+
+       /* @prog got swapped and is now the old one */
+       if (prog)
+               bpf_prog_put(prog);
+
+       nfp_net_xdp_offload(nn, nn->xdp_prog);
+
+       return 0;
+}
+
+static int nfp_net_xdp(struct net_device *netdev, struct netdev_xdp *xdp)
+{
+       struct nfp_net *nn = netdev_priv(netdev);
+
+       switch (xdp->command) {
+       case XDP_SETUP_PROG:
+               return nfp_net_xdp_setup(nn, xdp->prog);
+       case XDP_QUERY_PROG:
+               xdp->prog_attached = !!nn->xdp_prog;
+               return 0;
+       default:
+               return -EINVAL;
+       }
+}
+
 static const struct net_device_ops nfp_net_netdev_ops = {
        .ndo_open               = nfp_net_netdev_open,
        .ndo_stop               = nfp_net_netdev_close,
@@ -2673,6 +3000,7 @@ static const struct net_device_ops nfp_net_netdev_ops = {
        .ndo_features_check     = nfp_net_features_check,
        .ndo_udp_tunnel_add     = nfp_net_add_vxlan_port,
        .ndo_udp_tunnel_del     = nfp_net_del_vxlan_port,
+       .ndo_xdp                = nfp_net_xdp,
 };
 
 /**
@@ -2681,8 +3009,7 @@ static const struct net_device_ops nfp_net_netdev_ops = {
  */
 void nfp_net_info(struct nfp_net *nn)
 {
-       nn_info(nn, "Netronome %s %sNetdev: TxQs=%d/%d RxQs=%d/%d\n",
-               nn->is_nfp3200 ? "NFP-32xx" : "NFP-6xxx",
+       nn_info(nn, "Netronome NFP-6xxx %sNetdev: TxQs=%d/%d RxQs=%d/%d\n",
                nn->is_vf ? "VF " : "",
                nn->num_tx_rings, nn->max_tx_rings,
                nn->num_rx_rings, nn->max_rx_rings);
@@ -2723,11 +3050,11 @@ void nfp_net_info(struct nfp_net *nn)
  * Return: NFP Net device structure, or ERR_PTR on error.
  */
 struct nfp_net *nfp_net_netdev_alloc(struct pci_dev *pdev,
-                                    int max_tx_rings, int max_rx_rings)
+                                    unsigned int max_tx_rings,
+                                    unsigned int max_rx_rings)
 {
        struct net_device *netdev;
        struct nfp_net *nn;
-       int nqs;
 
        netdev = alloc_etherdev_mqs(sizeof(struct nfp_net),
                                    max_tx_rings, max_rx_rings);
@@ -2743,9 +3070,12 @@ struct nfp_net *nfp_net_netdev_alloc(struct pci_dev *pdev,
        nn->max_tx_rings = max_tx_rings;
        nn->max_rx_rings = max_rx_rings;
 
-       nqs = netif_get_num_default_rss_queues();
-       nn->num_tx_rings = min_t(int, nqs, max_tx_rings);
-       nn->num_rx_rings = min_t(int, nqs, max_rx_rings);
+       nn->num_tx_rings = min_t(unsigned int, max_tx_rings, num_online_cpus());
+       nn->num_rx_rings = min_t(unsigned int, max_rx_rings,
+                                netif_get_num_default_rss_queues());
+
+       nn->num_r_vecs = max(nn->num_tx_rings, nn->num_rx_rings);
+       nn->num_r_vecs = min_t(unsigned int, nn->num_r_vecs, num_online_cpus());
 
        nn->txd_cnt = NFP_NET_TX_DESCS_DEFAULT;
        nn->rxd_cnt = NFP_NET_RX_DESCS_DEFAULT;
@@ -2777,13 +3107,9 @@ void nfp_net_netdev_free(struct nfp_net *nn)
  */
 static void nfp_net_rss_init(struct nfp_net *nn)
 {
-       int i;
-
        netdev_rss_key_fill(nn->rss_key, NFP_NET_CFG_RSS_KEY_SZ);
 
-       for (i = 0; i < sizeof(nn->rss_itbl); i++)
-               nn->rss_itbl[i] =
-                       ethtool_rxfh_indir_default(i, nn->num_rx_rings);
+       nfp_net_rss_init_itbl(nn);
 
        /* Enable IPv4/IPv6 TCP by default */
        nn->rss_cfg = NFP_NET_CFG_RSS_IPV4_TCP |
@@ -2821,12 +3147,18 @@ int nfp_net_netdev_init(struct net_device *netdev)
 
        nfp_net_write_mac_addr(nn);
 
+       /* Determine RX packet/metadata boundary offset */
+       if (nn->fw_ver.major >= 2)
+               nn->rx_offset = nn_readl(nn, NFP_NET_CFG_RX_OFFSET);
+       else
+               nn->rx_offset = NFP_NET_RX_OFFSET;
+
        /* Set default MTU and Freelist buffer size */
        if (nn->max_mtu < NFP_NET_DEFAULT_MTU)
                netdev->mtu = nn->max_mtu;
        else
                netdev->mtu = NFP_NET_DEFAULT_MTU;
-       nn->fl_bufsz = NFP_NET_DEFAULT_RX_BUFSZ;
+       nn->fl_bufsz = nfp_net_calc_fl_bufsz(nn, netdev->mtu);
 
        /* Advertise/enable offloads based on capabilities
         *
@@ -2897,18 +3229,6 @@ int nfp_net_netdev_init(struct net_device *netdev)
                nn->ctrl |= NFP_NET_CFG_CTRL_IRQMOD;
        }
 
-       /* On NFP-3200 enable MSI-X auto-masking, if supported and the
-        * interrupts are not shared.
-        */
-       if (nn->is_nfp3200 && nn->cap & NFP_NET_CFG_CTRL_MSIXAUTO)
-               nn->ctrl |= NFP_NET_CFG_CTRL_MSIXAUTO;
-
-       /* On NFP4000/NFP6000, determine RX packet/metadata boundary offset */
-       if (nn->fw_ver.major >= 2)
-               nn->rx_offset = nn_readl(nn, NFP_NET_CFG_RX_OFFSET);
-       else
-               nn->rx_offset = NFP_NET_RX_OFFSET;
-
        /* Stash the re-configuration queue away.  First odd queue in TX Bar */
        nn->qcp_cfg = nn->tx_bar + NFP_QCP_QUEUE_ADDR_SZ;
 
@@ -2922,7 +3242,6 @@ int nfp_net_netdev_init(struct net_device *netdev)
                return err;
 
        /* Finalise the netdev setup */
-       ether_setup(netdev);
        netdev->netdev_ops = &nfp_net_netdev_ops;
        netdev->watchdog_timeo = msecs_to_jiffies(5 * 1000);
 
@@ -2944,5 +3263,11 @@ int nfp_net_netdev_init(struct net_device *netdev)
  */
 void nfp_net_netdev_clean(struct net_device *netdev)
 {
-       unregister_netdev(netdev);
+       struct nfp_net *nn = netdev_priv(netdev);
+
+       if (nn->xdp_prog)
+               bpf_prog_put(nn->xdp_prog);
+       if (nn->bpf_offload_xdp)
+               nfp_net_xdp_offload(nn, NULL);
+       unregister_netdev(nn->netdev);
 }
index 93b10b441acbfc98c78429d0d6cd31035e3b9c39..385ba355c965c35cf81ecd09f25e3c70c29b76e7 100644 (file)
@@ -50,7 +50,7 @@
 /**
  * Configuration BAR size.
  *
- * The configuration BAR is 8K in size, but on the NFP6000, due to
+ * The configuration BAR is 8K in size, but due to
  * THB-350, 32k needs to be reserved.
  */
 #define NFP_NET_CFG_BAR_SZ              (32 * 1024)
 #define NFP_NET_CFG_START_RXQ           0x004c
 
 /**
- * NFP-3200 workaround (0x0050 - 0x0058)
- * @NFP_NET_CFG_SPARE_ADDR:  DMA address for ME code to use (e.g. YDS-155 fix)
- */
-#define NFP_NET_CFG_SPARE_ADDR          0x0050
-/**
- * NFP6000/NFP4000 - Prepend configuration
+ * Prepend configuration
  */
 #define NFP_NET_CFG_RX_OFFSET          0x0050
 #define NFP_NET_CFG_RX_OFFSET_DYNAMIC          0       /* Prepend mode */
 
 /**
- * NFP6000/NFP4000 - VXLAN/UDP encap configuration
+ * VXLAN/UDP encap configuration
  * @NFP_NET_CFG_VXLAN_PORT:    Base address of table of tunnels' UDP dst ports
  * @NFP_NET_CFG_VXLAN_SZ:      Size of the UDP port table in bytes
  */
 #define NFP_NET_CFG_VXLAN_SZ             0x0008
 
 /**
- * NFP6000 - BPF section
+ * BPF section
  * @NFP_NET_CFG_BPF_ABI:       BPF ABI version
  * @NFP_NET_CFG_BPF_CAP:       BPF capabilities
  * @NFP_NET_CFG_BPF_MAX_LEN:   Maximum size of JITed BPF code in bytes
index f7c9a5bc4aa334ee0f35f99b6b590795ce612149..c66f3f954aa8816b6817f37a49e25dedcb99a8de 100644 (file)
@@ -44,8 +44,8 @@ static int nfp_net_debugfs_rx_q_read(struct seq_file *file, void *data)
        struct nfp_net_r_vector *r_vec = file->private;
        struct nfp_net_rx_ring *rx_ring;
        struct nfp_net_rx_desc *rxd;
-       struct sk_buff *skb;
        struct nfp_net *nn;
+       void *frag;
        int i;
 
        rtnl_lock();
@@ -73,10 +73,9 @@ static int nfp_net_debugfs_rx_q_read(struct seq_file *file, void *data)
                seq_printf(file, "%04d: 0x%08x 0x%08x", i,
                           rxd->vals[0], rxd->vals[1]);
 
-               skb = READ_ONCE(rx_ring->rxbufs[i].skb);
-               if (skb)
-                       seq_printf(file, " skb->head=%p skb->data=%p",
-                                  skb->head, skb->data);
+               frag = READ_ONCE(rx_ring->rxbufs[i].frag);
+               if (frag)
+                       seq_printf(file, " frag=%p", frag);
 
                if (rx_ring->rxbufs[i].dma_addr)
                        seq_printf(file, " dma_addr=%pad",
@@ -115,6 +114,16 @@ static const struct file_operations nfp_rx_q_fops = {
        .llseek = seq_lseek
 };
 
+static int nfp_net_debugfs_tx_q_open(struct inode *inode, struct file *f);
+
+static const struct file_operations nfp_tx_q_fops = {
+       .owner = THIS_MODULE,
+       .open = nfp_net_debugfs_tx_q_open,
+       .release = single_release,
+       .read = seq_read,
+       .llseek = seq_lseek
+};
+
 static int nfp_net_debugfs_tx_q_read(struct seq_file *file, void *data)
 {
        struct nfp_net_r_vector *r_vec = file->private;
@@ -127,10 +136,13 @@ static int nfp_net_debugfs_tx_q_read(struct seq_file *file, void *data)
 
        rtnl_lock();
 
-       if (!r_vec->nfp_net || !r_vec->tx_ring)
+       if (debugfs_real_fops(file->file) == &nfp_tx_q_fops)
+               tx_ring = r_vec->tx_ring;
+       else
+               tx_ring = r_vec->xdp_ring;
+       if (!r_vec->nfp_net || !tx_ring)
                goto out;
        nn = r_vec->nfp_net;
-       tx_ring = r_vec->tx_ring;
        if (!netif_running(nn->netdev))
                goto out;
 
@@ -149,9 +161,14 @@ static int nfp_net_debugfs_tx_q_read(struct seq_file *file, void *data)
                           txd->vals[2], txd->vals[3]);
 
                skb = READ_ONCE(tx_ring->txbufs[i].skb);
-               if (skb)
-                       seq_printf(file, " skb->head=%p skb->data=%p",
-                                  skb->head, skb->data);
+               if (skb) {
+                       if (tx_ring == r_vec->tx_ring)
+                               seq_printf(file, " skb->head=%p skb->data=%p",
+                                          skb->head, skb->data);
+                       else
+                               seq_printf(file, " frag=%p", skb);
+               }
+
                if (tx_ring->txbufs[i].dma_addr)
                        seq_printf(file, " dma_addr=%pad",
                                   &tx_ring->txbufs[i].dma_addr);
@@ -177,7 +194,7 @@ static int nfp_net_debugfs_tx_q_open(struct inode *inode, struct file *f)
        return single_open(f, nfp_net_debugfs_tx_q_read, inode->i_private);
 }
 
-static const struct file_operations nfp_tx_q_fops = {
+static const struct file_operations nfp_xdp_q_fops = {
        .owner = THIS_MODULE,
        .open = nfp_net_debugfs_tx_q_open,
        .release = single_release,
@@ -187,7 +204,7 @@ static const struct file_operations nfp_tx_q_fops = {
 
 void nfp_net_debugfs_adapter_add(struct nfp_net *nn)
 {
-       struct dentry *queues, *tx, *rx;
+       struct dentry *queues, *tx, *rx, *xdp;
        char int_name[16];
        int i;
 
@@ -205,16 +222,19 @@ void nfp_net_debugfs_adapter_add(struct nfp_net *nn)
 
        rx = debugfs_create_dir("rx", queues);
        tx = debugfs_create_dir("tx", queues);
-       if (IS_ERR_OR_NULL(rx) || IS_ERR_OR_NULL(tx))
+       xdp = debugfs_create_dir("xdp", queues);
+       if (IS_ERR_OR_NULL(rx) || IS_ERR_OR_NULL(tx) || IS_ERR_OR_NULL(xdp))
                return;
 
-       for (i = 0; i < nn->num_rx_rings; i++) {
+       for (i = 0; i < min(nn->max_rx_rings, nn->max_r_vecs); i++) {
                sprintf(int_name, "%d", i);
                debugfs_create_file(int_name, S_IRUSR, rx,
                                    &nn->r_vecs[i], &nfp_rx_q_fops);
+               debugfs_create_file(int_name, S_IRUSR, xdp,
+                                   &nn->r_vecs[i], &nfp_xdp_q_fops);
        }
 
-       for (i = 0; i < nn->num_tx_rings; i++) {
+       for (i = 0; i < min(nn->max_tx_rings, nn->max_r_vecs); i++) {
                sprintf(int_name, "%d", i);
                debugfs_create_file(int_name, S_IRUSR, tx,
                                    &nn->r_vecs[i], &nfp_tx_q_fops);
index 3418f2277e9d6f808665b5f3ce54a4fdffd95e0c..1b26e964657421fe9608b791a204a48b3e0e4b5f 100644 (file)
@@ -158,6 +158,28 @@ static void nfp_net_get_ringparam(struct net_device *netdev,
        ring->tx_pending = nn->txd_cnt;
 }
 
+static int nfp_net_set_ring_size(struct nfp_net *nn, u32 rxd_cnt, u32 txd_cnt)
+{
+       struct nfp_net_ring_set *reconfig_rx = NULL, *reconfig_tx = NULL;
+       struct nfp_net_ring_set rx = {
+               .n_rings = nn->num_rx_rings,
+               .mtu = nn->netdev->mtu,
+               .dcnt = rxd_cnt,
+       };
+       struct nfp_net_ring_set tx = {
+               .n_rings = nn->num_tx_rings,
+               .dcnt = txd_cnt,
+       };
+
+       if (nn->rxd_cnt != rxd_cnt)
+               reconfig_rx = &rx;
+       if (nn->txd_cnt != txd_cnt)
+               reconfig_tx = &tx;
+
+       return nfp_net_ring_reconfig(nn, &nn->xdp_prog,
+                                    reconfig_rx, reconfig_tx);
+}
+
 static int nfp_net_set_ringparam(struct net_device *netdev,
                                 struct ethtool_ringparam *ring)
 {
@@ -614,6 +636,76 @@ static int nfp_net_set_coalesce(struct net_device *netdev,
        return nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_IRQMOD);
 }
 
+static void nfp_net_get_channels(struct net_device *netdev,
+                                struct ethtool_channels *channel)
+{
+       struct nfp_net *nn = netdev_priv(netdev);
+       unsigned int num_tx_rings;
+
+       num_tx_rings = nn->num_tx_rings;
+       if (nn->xdp_prog)
+               num_tx_rings -= nn->num_rx_rings;
+
+       channel->max_rx = min(nn->max_rx_rings, nn->max_r_vecs);
+       channel->max_tx = min(nn->max_tx_rings, nn->max_r_vecs);
+       channel->max_combined = min(channel->max_rx, channel->max_tx);
+       channel->max_other = NFP_NET_NON_Q_VECTORS;
+       channel->combined_count = min(nn->num_rx_rings, num_tx_rings);
+       channel->rx_count = nn->num_rx_rings - channel->combined_count;
+       channel->tx_count = num_tx_rings - channel->combined_count;
+       channel->other_count = NFP_NET_NON_Q_VECTORS;
+}
+
+static int nfp_net_set_num_rings(struct nfp_net *nn, unsigned int total_rx,
+                                unsigned int total_tx)
+{
+       struct nfp_net_ring_set *reconfig_rx = NULL, *reconfig_tx = NULL;
+       struct nfp_net_ring_set rx = {
+               .n_rings = total_rx,
+               .mtu = nn->netdev->mtu,
+               .dcnt = nn->rxd_cnt,
+       };
+       struct nfp_net_ring_set tx = {
+               .n_rings = total_tx,
+               .dcnt = nn->txd_cnt,
+       };
+
+       if (nn->num_rx_rings != total_rx)
+               reconfig_rx = &rx;
+       if (nn->num_stack_tx_rings != total_tx ||
+           (nn->xdp_prog && reconfig_rx))
+               reconfig_tx = &tx;
+
+       /* nfp_net_check_config() will catch tx.n_rings > nn->max_tx_rings */
+       if (nn->xdp_prog)
+               tx.n_rings += total_rx;
+
+       return nfp_net_ring_reconfig(nn, &nn->xdp_prog,
+                                    reconfig_rx, reconfig_tx);
+}
+
+static int nfp_net_set_channels(struct net_device *netdev,
+                               struct ethtool_channels *channel)
+{
+       struct nfp_net *nn = netdev_priv(netdev);
+       unsigned int total_rx, total_tx;
+
+       /* Reject unsupported */
+       if (!channel->combined_count ||
+           channel->other_count != NFP_NET_NON_Q_VECTORS ||
+           (channel->rx_count && channel->tx_count))
+               return -EINVAL;
+
+       total_rx = channel->combined_count + channel->rx_count;
+       total_tx = channel->combined_count + channel->tx_count;
+
+       if (total_rx > min(nn->max_rx_rings, nn->max_r_vecs) ||
+           total_tx > min(nn->max_tx_rings, nn->max_r_vecs))
+               return -EINVAL;
+
+       return nfp_net_set_num_rings(nn, total_rx, total_tx);
+}
+
 static const struct ethtool_ops nfp_net_ethtool_ops = {
        .get_drvinfo            = nfp_net_get_drvinfo,
        .get_link               = ethtool_op_get_link,
@@ -632,6 +724,8 @@ static const struct ethtool_ops nfp_net_ethtool_ops = {
        .get_regs               = nfp_net_get_regs,
        .get_coalesce           = nfp_net_get_coalesce,
        .set_coalesce           = nfp_net_set_coalesce,
+       .get_channels           = nfp_net_get_channels,
+       .set_channels           = nfp_net_set_channels,
 };
 
 void nfp_net_set_ethtool_ops(struct net_device *netdev)
index cfed40c0e310aaacb524202141a808db944caa48..18a851eb35084397dd6fa003b3def76ed42a6960 100644 (file)
@@ -111,6 +111,9 @@ nfp_net_bpf_get_act(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf)
        const struct tc_action *a;
        LIST_HEAD(actions);
 
+       if (!cls_bpf->exts)
+               return NN_ACT_XDP;
+
        /* TC direct action */
        if (cls_bpf->exts_integrated) {
                if (tc_no_actions(cls_bpf->exts))
@@ -233,9 +236,7 @@ static int nfp_net_bpf_stop(struct nfp_net *nn)
        return nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN);
 }
 
-int
-nfp_net_bpf_offload(struct nfp_net *nn, u32 handle, __be16 proto,
-                   struct tc_cls_bpf_offload *cls_bpf)
+int nfp_net_bpf_offload(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf)
 {
        struct nfp_bpf_result res;
        dma_addr_t dma_addr;
index 2800bbf65a891e14dcfc06d9a432f66c1add0640..d065235034d484e8e33160eb1cbf8f3b6b5d3a04 100644 (file)
@@ -63,9 +63,7 @@ static void nfp_netvf_get_mac_addr(struct nfp_net *nn)
        u8 mac_addr[ETH_ALEN];
 
        put_unaligned_be32(nn_readl(nn, NFP_NET_CFG_MACADDR + 0), &mac_addr[0]);
-       /* We can't do readw for NFP-3200 compatibility */
-       put_unaligned_be16(nn_readl(nn, NFP_NET_CFG_MACADDR + 4) >> 16,
-                          &mac_addr[4]);
+       put_unaligned_be16(nn_readw(nn, NFP_NET_CFG_MACADDR + 6), &mac_addr[4]);
 
        if (!is_valid_ether_addr(mac_addr)) {
                eth_hw_addr_random(nn->netdev);
@@ -86,7 +84,6 @@ static int nfp_netvf_pci_probe(struct pci_dev *pdev,
        int tx_bar_no, rx_bar_no;
        u8 __iomem *ctrl_bar;
        struct nfp_net *nn;
-       int is_nfp3200;
        u32 startq;
        int stride;
        int err;
@@ -101,15 +98,6 @@ static int nfp_netvf_pci_probe(struct pci_dev *pdev,
                goto err_pci_disable;
        }
 
-       switch (pdev->device) {
-       case PCI_DEVICE_NFP6000VF:
-               is_nfp3200 = 0;
-               break;
-       default:
-               err = -ENODEV;
-               goto err_pci_regions;
-       }
-
        pci_set_master(pdev);
 
        err = dma_set_mask_and_coherent(&pdev->dev,
@@ -149,15 +137,9 @@ static int nfp_netvf_pci_probe(struct pci_dev *pdev,
        } else {
                switch (fw_ver.major) {
                case 1 ... 4:
-                       if (is_nfp3200) {
-                               stride = 2;
-                               tx_bar_no = NFP_NET_Q0_BAR;
-                               rx_bar_no = NFP_NET_Q1_BAR;
-                       } else {
-                               stride = 4;
-                               tx_bar_no = NFP_NET_Q0_BAR;
-                               rx_bar_no = tx_bar_no;
-                       }
+                       stride = 4;
+                       tx_bar_no = NFP_NET_Q0_BAR;
+                       rx_bar_no = tx_bar_no;
                        break;
                default:
                        dev_err(&pdev->dev, "Unsupported Firmware ABI %d.%d.%d.%d\n",
@@ -189,20 +171,10 @@ static int nfp_netvf_pci_probe(struct pci_dev *pdev,
                max_rx_rings = (rx_bar_sz / NFP_QCP_QUEUE_ADDR_SZ) / 2;
        }
 
-       /* XXX Implement a workaround for THB-350 here.  Ideally, we
-        * have a different PCI ID for A rev VFs.
-        */
-       switch (pdev->device) {
-       case PCI_DEVICE_NFP6000VF:
-               startq = readl(ctrl_bar + NFP_NET_CFG_START_TXQ);
-               tx_bar_off = NFP_PCIE_QUEUE(startq);
-               startq = readl(ctrl_bar + NFP_NET_CFG_START_RXQ);
-               rx_bar_off = NFP_PCIE_QUEUE(startq);
-               break;
-       default:
-               err = -ENODEV;
-               goto err_ctrl_unmap;
-       }
+       startq = readl(ctrl_bar + NFP_NET_CFG_START_TXQ);
+       tx_bar_off = NFP_PCIE_QUEUE(startq);
+       startq = readl(ctrl_bar + NFP_NET_CFG_START_RXQ);
+       rx_bar_off = NFP_PCIE_QUEUE(startq);
 
        /* Allocate and initialise the netdev */
        nn = nfp_net_netdev_alloc(pdev, max_tx_rings, max_rx_rings);
@@ -214,7 +186,6 @@ static int nfp_netvf_pci_probe(struct pci_dev *pdev,
        nn->fw_ver = fw_ver;
        nn->ctrl_bar = ctrl_bar;
        nn->is_vf = 1;
-       nn->is_nfp3200 = is_nfp3200;
        nn->stride_tx = stride;
        nn->stride_rx = stride;
 
index 0df1391f9663b257a170e5bbf6503268207b3d2d..3cfd105034463f5d503b8748a6dec5f720ab902b 100644 (file)
@@ -107,15 +107,10 @@ config QEDE
        ---help---
          This enables the support for ...
 
-config INFINIBAND_QEDR
-       tristate "QLogic qede RoCE sources [debug]"
-       depends on QEDE && 64BIT
-       select QED_LL2
-       default n
-       ---help---
-         This provides a temporary node that allows the compilation
-         and logical testing of the InfiniBand over Ethernet support
-         for QLogic QED. This would be replaced by the 'real' option
-         once the QEDR driver is added [+relocated].
+config QED_RDMA
+       bool
+
+config QED_ISCSI
+       bool
 
 endif # NET_VENDOR_QLOGIC
index cda0af7fbc20dae2bdaeef710faa3e42486f9c64..729e43768e99d48ca57561df3233bb9d3a33b5df 100644 (file)
@@ -5,4 +5,5 @@ qed-y := qed_cxt.o qed_dev.o qed_hw.o qed_init_fw_funcs.o qed_init_ops.o \
         qed_selftest.o qed_dcbx.o qed_debug.o
 qed-$(CONFIG_QED_SRIOV) += qed_sriov.o qed_vf.o
 qed-$(CONFIG_QED_LL2) += qed_ll2.o
-qed-$(CONFIG_INFINIBAND_QEDR) += qed_roce.o
+qed-$(CONFIG_QED_RDMA) += qed_roce.o
+qed-$(CONFIG_QED_ISCSI) += qed_iscsi.o qed_ooo.o
index 653bb5735f0c63b979b36004f2e0616289c7c9c9..44c184ebe3b0da5f9438fc5568c1bdd105e17f81 100644 (file)
@@ -35,6 +35,7 @@ extern const struct qed_common_ops qed_common_ops_pass;
 
 #define QED_WFQ_UNIT   100
 
+#define ISCSI_BDQ_ID(_port_id) (_port_id)
 #define QED_WID_SIZE            (1024)
 #define QED_PF_DEMS_SIZE        (4)
 
@@ -154,7 +155,10 @@ struct qed_qm_iids {
        u32 tids;
 };
 
-enum QED_RESOURCES {
+/* HW / FW resources, output of features supported below, most information
+ * is received from MFW.
+ */
+enum qed_resources {
        QED_SB,
        QED_L2_QUEUE,
        QED_VPORT,
@@ -166,6 +170,7 @@ enum QED_RESOURCES {
        QED_RDMA_CNQ_RAM,
        QED_ILT,
        QED_LL2_QUEUE,
+       QED_CMDQS_CQS,
        QED_RDMA_STATS_QUEUE,
        QED_MAX_RESC,
 };
@@ -174,6 +179,7 @@ enum QED_FEATURE {
        QED_PF_L2_QUE,
        QED_VF,
        QED_RDMA_CNQ,
+       QED_VF_L2_QUE,
        QED_MAX_FEATURES,
 };
 
@@ -195,6 +201,11 @@ enum qed_dev_cap {
        QED_DEV_CAP_ROCE,
 };
 
+enum qed_wol_support {
+       QED_WOL_SUPPORT_NONE,
+       QED_WOL_SUPPORT_PME,
+};
+
 struct qed_hw_info {
        /* PCI personality */
        enum qed_pci_personality        personality;
@@ -226,15 +237,9 @@ struct qed_hw_info {
        u32                             port_mode;
        u32                             hw_mode;
        unsigned long           device_capabilities;
-};
+       u16                             mtu;
 
-struct qed_hw_cid_data {
-       u32     cid;
-       bool    b_cid_allocated;
-
-       /* Additional identifiers */
-       u16     opaque_fid;
-       u8      vport_id;
+       enum qed_wol_support b_wol_support;
 };
 
 /* maximun size of read/write commands (HW limit) */
@@ -378,7 +383,9 @@ struct qed_hwfn {
        /* Protocol related */
        bool                            using_ll2;
        struct qed_ll2_info             *p_ll2_info;
+       struct qed_ooo_info             *p_ooo_info;
        struct qed_rdma_info            *p_rdma_info;
+       struct qed_iscsi_info           *p_iscsi_info;
        struct qed_pf_params            pf_params;
 
        bool b_rdma_enabled_in_prs;
@@ -403,9 +410,6 @@ struct qed_hwfn {
 
        struct qed_dcbx_info            *p_dcbx_info;
 
-       struct qed_hw_cid_data          *p_tx_cids;
-       struct qed_hw_cid_data          *p_rx_cids;
-
        struct qed_dmae_info            dmae_info;
 
        /* QM init */
@@ -538,7 +542,9 @@ struct qed_dev {
        u8                              mcp_rev;
        u8                              boot_mode;
 
-       u8                              wol;
+       /* WoL related configurations */
+       u8 wol_config;
+       u8 wol_mac[ETH_ALEN];
 
        u32                             int_mode;
        enum qed_coalescing_mode        int_coalescing_mode;
@@ -578,6 +584,8 @@ struct qed_dev {
        /* Linux specific here */
        struct  qede_dev                *edev;
        struct  pci_dev                 *pdev;
+       u32 flags;
+#define QED_FLAG_STORAGE_STARTED       (BIT(0))
        int                             msg_enable;
 
        struct pci_params               pci_params;
@@ -591,6 +599,7 @@ struct qed_dev {
        union {
                struct qed_common_cb_ops        *common;
                struct qed_eth_cb_ops           *eth;
+               struct qed_iscsi_cb_ops         *iscsi;
        } protocol_ops;
        void                            *ops_cookie;
 
@@ -600,7 +609,7 @@ struct qed_dev {
        struct qed_cb_ll2_info          *ll2;
        u8                              ll2_mac_address[ETH_ALEN];
 #endif
-
+       DECLARE_HASHTABLE(connections, 10);
        const struct firmware           *firmware;
 
        u32 rdma_max_sge;
index 82370a1a59ad9e3a4e316c223d3c83d2641a80c5..0c42c240b5cfdff66dc8ef021b2401812a0fec17 100644 (file)
 #define TM_ALIGN        BIT(TM_SHIFT)
 #define TM_ELEM_SIZE    4
 
-/* ILT constants */
-#if IS_ENABLED(CONFIG_INFINIBAND_QEDR)
 /* For RoCE we configure to 64K to cover for RoCE max tasks 256K purpose. */
-#define ILT_DEFAULT_HW_P_SIZE          4
-#else
-#define ILT_DEFAULT_HW_P_SIZE          3
-#endif
+#define ILT_DEFAULT_HW_P_SIZE  (IS_ENABLED(CONFIG_QED_RDMA) ? 4 : 3)
 
 #define ILT_PAGE_IN_BYTES(hw_p_size)   (1U << ((hw_p_size) + 12))
 #define ILT_CFG_REG(cli, reg)  PSWRQ2_REG_ ## cli ## _ ## reg ## _RT_OFFSET
@@ -349,14 +344,14 @@ static struct qed_tid_seg *qed_cxt_tid_seg_info(struct qed_hwfn *p_hwfn,
        return NULL;
 }
 
-void qed_cxt_set_srq_count(struct qed_hwfn *p_hwfn, u32 num_srqs)
+static void qed_cxt_set_srq_count(struct qed_hwfn *p_hwfn, u32 num_srqs)
 {
        struct qed_cxt_mngr *p_mgr = p_hwfn->p_cxt_mngr;
 
        p_mgr->srq_count = num_srqs;
 }
 
-u32 qed_cxt_get_srq_count(struct qed_hwfn *p_hwfn)
+static u32 qed_cxt_get_srq_count(struct qed_hwfn *p_hwfn)
 {
        struct qed_cxt_mngr *p_mgr = p_hwfn->p_cxt_mngr;
 
@@ -1804,8 +1799,8 @@ int qed_cxt_get_cid_info(struct qed_hwfn *p_hwfn, struct qed_cxt_info *p_info)
        return 0;
 }
 
-void qed_rdma_set_pf_params(struct qed_hwfn *p_hwfn,
-                           struct qed_rdma_pf_params *p_params)
+static void qed_rdma_set_pf_params(struct qed_hwfn *p_hwfn,
+                                  struct qed_rdma_pf_params *p_params)
 {
        u32 num_cons, num_tasks, num_qps, num_mrs, num_srqs;
        enum protocol_type proto;
index 130da1c0490be6ff482e563c088aee66d23db131..a4789a93b69267cd749b92962355083cd007afa4 100644 (file)
@@ -1190,6 +1190,7 @@ int qed_dcbx_get_config_params(struct qed_hwfn *p_hwfn,
        if (!dcbx_info)
                return -ENOMEM;
 
+       memset(dcbx_info, 0, sizeof(*dcbx_info));
        rc = qed_dcbx_query_params(p_hwfn, dcbx_info, QED_DCBX_OPERATIONAL_MIB);
        if (rc) {
                kfree(dcbx_info);
@@ -1225,6 +1226,7 @@ static struct qed_dcbx_get *qed_dcbnl_get_dcbx(struct qed_hwfn *hwfn,
        if (!dcbx_info)
                return NULL;
 
+       memset(dcbx_info, 0, sizeof(*dcbx_info));
        if (qed_dcbx_query_params(hwfn, dcbx_info, type)) {
                kfree(dcbx_info);
                return NULL;
index 88e7d5bef9098462fa06ca1f0851a396ac8386d8..68f19ca57f965b13d6fbf32c85e86d65e500b881 100644 (file)
@@ -405,7 +405,7 @@ struct phy_defs {
 /***************************** Constant Arrays *******************************/
 
 /* Debug arrays */
-static struct dbg_array s_dbg_arrays[MAX_BIN_DBG_BUFFER_TYPE] = { {0} };
+static struct dbg_array s_dbg_arrays[MAX_BIN_DBG_BUFFER_TYPE] = { {NULL} };
 
 /* Chip constant definitions array */
 static struct chip_defs s_chip_defs[MAX_CHIP_IDS] = {
@@ -4028,10 +4028,10 @@ static enum dbg_status qed_mcp_trace_read_meta(struct qed_hwfn *p_hwfn,
 }
 
 /* Dump MCP Trace */
-enum dbg_status qed_mcp_trace_dump(struct qed_hwfn *p_hwfn,
-                                  struct qed_ptt *p_ptt,
-                                  u32 *dump_buf,
-                                  bool dump, u32 *num_dumped_dwords)
+static enum dbg_status qed_mcp_trace_dump(struct qed_hwfn *p_hwfn,
+                                         struct qed_ptt *p_ptt,
+                                         u32 *dump_buf,
+                                         bool dump, u32 *num_dumped_dwords)
 {
        u32 trace_data_grc_addr, trace_data_size_bytes, trace_data_size_dwords;
        u32 trace_meta_size_dwords, running_bundle_id, offset = 0;
@@ -4130,10 +4130,10 @@ enum dbg_status qed_mcp_trace_dump(struct qed_hwfn *p_hwfn,
 }
 
 /* Dump GRC FIFO */
-enum dbg_status qed_reg_fifo_dump(struct qed_hwfn *p_hwfn,
-                                 struct qed_ptt *p_ptt,
-                                 u32 *dump_buf,
-                                 bool dump, u32 *num_dumped_dwords)
+static enum dbg_status qed_reg_fifo_dump(struct qed_hwfn *p_hwfn,
+                                        struct qed_ptt *p_ptt,
+                                        u32 *dump_buf,
+                                        bool dump, u32 *num_dumped_dwords)
 {
        u32 offset = 0, dwords_read, size_param_offset;
        bool fifo_has_data;
@@ -4192,10 +4192,10 @@ enum dbg_status qed_reg_fifo_dump(struct qed_hwfn *p_hwfn,
 }
 
 /* Dump IGU FIFO */
-enum dbg_status qed_igu_fifo_dump(struct qed_hwfn *p_hwfn,
-                                 struct qed_ptt *p_ptt,
-                                 u32 *dump_buf,
-                                 bool dump, u32 *num_dumped_dwords)
+static enum dbg_status qed_igu_fifo_dump(struct qed_hwfn *p_hwfn,
+                                        struct qed_ptt *p_ptt,
+                                        u32 *dump_buf,
+                                        bool dump, u32 *num_dumped_dwords)
 {
        u32 offset = 0, dwords_read, size_param_offset;
        bool fifo_has_data;
@@ -4255,10 +4255,11 @@ enum dbg_status qed_igu_fifo_dump(struct qed_hwfn *p_hwfn,
 }
 
 /* Protection Override dump */
-enum dbg_status qed_protection_override_dump(struct qed_hwfn *p_hwfn,
-                                            struct qed_ptt *p_ptt,
-                                            u32 *dump_buf,
-                                            bool dump, u32 *num_dumped_dwords)
+static enum dbg_status qed_protection_override_dump(struct qed_hwfn *p_hwfn,
+                                                   struct qed_ptt *p_ptt,
+                                                   u32 *dump_buf,
+                                                   bool dump,
+                                                   u32 *num_dumped_dwords)
 {
        u32 offset = 0, size_param_offset, override_window_dwords;
 
@@ -6339,10 +6340,11 @@ enum dbg_status qed_print_fw_asserts_results(struct qed_hwfn *p_hwfn,
 }
 
 /* Wrapper for unifying the idle_chk and mcp_trace api */
-enum dbg_status qed_print_idle_chk_results_wrapper(struct qed_hwfn *p_hwfn,
-                                                  u32 *dump_buf,
-                                                  u32 num_dumped_dwords,
-                                                  char *results_buf)
+static enum dbg_status
+qed_print_idle_chk_results_wrapper(struct qed_hwfn *p_hwfn,
+                                  u32 *dump_buf,
+                                  u32 num_dumped_dwords,
+                                  char *results_buf)
 {
        u32 num_errors, num_warnnings;
 
@@ -6413,8 +6415,8 @@ static void qed_dbg_print_feature(u8 *p_text_buf, u32 text_size)
 
 #define QED_RESULTS_BUF_MIN_SIZE 16
 /* Generic function for decoding debug feature info */
-enum dbg_status format_feature(struct qed_hwfn *p_hwfn,
-                              enum qed_dbg_features feature_idx)
+static enum dbg_status format_feature(struct qed_hwfn *p_hwfn,
+                                     enum qed_dbg_features feature_idx)
 {
        struct qed_dbg_feature *feature =
            &p_hwfn->cdev->dbg_params.features[feature_idx];
@@ -6480,8 +6482,9 @@ enum dbg_status format_feature(struct qed_hwfn *p_hwfn,
 }
 
 /* Generic function for performing the dump of a debug feature. */
-enum dbg_status qed_dbg_dump(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
-                            enum qed_dbg_features feature_idx)
+static enum dbg_status qed_dbg_dump(struct qed_hwfn *p_hwfn,
+                                   struct qed_ptt *p_ptt,
+                                   enum qed_dbg_features feature_idx)
 {
        struct qed_dbg_feature *feature =
            &p_hwfn->cdev->dbg_params.features[feature_idx];
index 754f6a908858dda8eec6f529a6281c7ba42c9ab2..3b2250021c5f29813eb3e62257f0daa1d33ab393 100644 (file)
 #include "qed_hw.h"
 #include "qed_init_ops.h"
 #include "qed_int.h"
+#include "qed_iscsi.h"
 #include "qed_ll2.h"
 #include "qed_mcp.h"
+#include "qed_ooo.h"
 #include "qed_reg_addr.h"
 #include "qed_sp.h"
 #include "qed_sriov.h"
@@ -134,15 +136,6 @@ void qed_resc_free(struct qed_dev *cdev)
 
        kfree(cdev->reset_stats);
 
-       for_each_hwfn(cdev, i) {
-               struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
-
-               kfree(p_hwfn->p_tx_cids);
-               p_hwfn->p_tx_cids = NULL;
-               kfree(p_hwfn->p_rx_cids);
-               p_hwfn->p_rx_cids = NULL;
-       }
-
        for_each_hwfn(cdev, i) {
                struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
 
@@ -155,6 +148,10 @@ void qed_resc_free(struct qed_dev *cdev)
 #ifdef CONFIG_QED_LL2
                qed_ll2_free(p_hwfn, p_hwfn->p_ll2_info);
 #endif
+               if (p_hwfn->hw_info.personality == QED_PCI_ISCSI) {
+                       qed_iscsi_free(p_hwfn, p_hwfn->p_iscsi_info);
+                       qed_ooo_free(p_hwfn, p_hwfn->p_ooo_info);
+               }
                qed_iov_free(p_hwfn);
                qed_dmae_info_free(p_hwfn);
                qed_dcbx_info_free(p_hwfn, p_hwfn->p_dcbx_info);
@@ -411,6 +408,8 @@ int qed_qm_reconf(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 
 int qed_resc_alloc(struct qed_dev *cdev)
 {
+       struct qed_iscsi_info *p_iscsi_info;
+       struct qed_ooo_info *p_ooo_info;
 #ifdef CONFIG_QED_LL2
        struct qed_ll2_info *p_ll2_info;
 #endif
@@ -425,23 +424,6 @@ int qed_resc_alloc(struct qed_dev *cdev)
        if (!cdev->fw_data)
                return -ENOMEM;
 
-       /* Allocate Memory for the Queue->CID mapping */
-       for_each_hwfn(cdev, i) {
-               struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
-               int tx_size = sizeof(struct qed_hw_cid_data) *
-                                    RESC_NUM(p_hwfn, QED_L2_QUEUE);
-               int rx_size = sizeof(struct qed_hw_cid_data) *
-                                    RESC_NUM(p_hwfn, QED_L2_QUEUE);
-
-               p_hwfn->p_tx_cids = kzalloc(tx_size, GFP_KERNEL);
-               if (!p_hwfn->p_tx_cids)
-                       goto alloc_no_mem;
-
-               p_hwfn->p_rx_cids = kzalloc(rx_size, GFP_KERNEL);
-               if (!p_hwfn->p_rx_cids)
-                       goto alloc_no_mem;
-       }
-
        for_each_hwfn(cdev, i) {
                struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
                u32 n_eqes, num_cons;
@@ -497,12 +479,13 @@ int qed_resc_alloc(struct qed_dev *cdev)
                if (p_hwfn->hw_info.personality == QED_PCI_ETH_ROCE) {
                        num_cons = qed_cxt_get_proto_cid_count(p_hwfn,
                                                               PROTOCOLID_ROCE,
-                                                              0) * 2;
+                                                              NULL) * 2;
                        n_eqes += num_cons + 2 * MAX_NUM_VFS_BB;
                } else if (p_hwfn->hw_info.personality == QED_PCI_ISCSI) {
                        num_cons =
                            qed_cxt_get_proto_cid_count(p_hwfn,
-                                                       PROTOCOLID_ISCSI, 0);
+                                                       PROTOCOLID_ISCSI,
+                                                       NULL);
                        n_eqes += 2 * num_cons;
                }
 
@@ -532,6 +515,16 @@ int qed_resc_alloc(struct qed_dev *cdev)
                        p_hwfn->p_ll2_info = p_ll2_info;
                }
 #endif
+               if (p_hwfn->hw_info.personality == QED_PCI_ISCSI) {
+                       p_iscsi_info = qed_iscsi_alloc(p_hwfn);
+                       if (!p_iscsi_info)
+                               goto alloc_no_mem;
+                       p_hwfn->p_iscsi_info = p_iscsi_info;
+                       p_ooo_info = qed_ooo_alloc(p_hwfn);
+                       if (!p_ooo_info)
+                               goto alloc_no_mem;
+                       p_hwfn->p_ooo_info = p_ooo_info;
+               }
 
                /* DMA info initialization */
                rc = qed_dmae_info_alloc(p_hwfn);
@@ -585,6 +578,10 @@ void qed_resc_setup(struct qed_dev *cdev)
                if (p_hwfn->using_ll2)
                        qed_ll2_setup(p_hwfn, p_hwfn->p_ll2_info);
 #endif
+               if (p_hwfn->hw_info.personality == QED_PCI_ISCSI) {
+                       qed_iscsi_setup(p_hwfn, p_hwfn->p_iscsi_info);
+                       qed_ooo_setup(p_hwfn, p_hwfn->p_ooo_info);
+               }
        }
 }
 
@@ -1056,8 +1053,10 @@ int qed_hw_init(struct qed_dev *cdev,
                bool allow_npar_tx_switch,
                const u8 *bin_fw_data)
 {
-       u32 load_code, param;
-       int rc, mfw_rc, i;
+       u32 load_code, param, drv_mb_param;
+       bool b_default_mtu = true;
+       struct qed_hwfn *p_hwfn;
+       int rc = 0, mfw_rc, i;
 
        if ((int_mode == QED_INT_MODE_MSI) && (cdev->num_hwfns > 1)) {
                DP_NOTICE(cdev, "MSI mode is not supported for CMT devices\n");
@@ -1073,6 +1072,12 @@ int qed_hw_init(struct qed_dev *cdev,
        for_each_hwfn(cdev, i) {
                struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
 
+               /* If management didn't provide a default, set one of our own */
+               if (!p_hwfn->hw_info.mtu) {
+                       p_hwfn->hw_info.mtu = 1500;
+                       b_default_mtu = false;
+               }
+
                if (IS_VF(cdev)) {
                        p_hwfn->b_int_enabled = 1;
                        continue;
@@ -1156,6 +1161,38 @@ int qed_hw_init(struct qed_dev *cdev,
                p_hwfn->hw_init_done = true;
        }
 
+       if (IS_PF(cdev)) {
+               p_hwfn = QED_LEADING_HWFN(cdev);
+               drv_mb_param = (FW_MAJOR_VERSION << 24) |
+                              (FW_MINOR_VERSION << 16) |
+                              (FW_REVISION_VERSION << 8) |
+                              (FW_ENGINEERING_VERSION);
+               rc = qed_mcp_cmd(p_hwfn, p_hwfn->p_main_ptt,
+                                DRV_MSG_CODE_OV_UPDATE_STORM_FW_VER,
+                                drv_mb_param, &load_code, &param);
+               if (rc)
+                       DP_INFO(p_hwfn, "Failed to update firmware version\n");
+
+               if (!b_default_mtu) {
+                       rc = qed_mcp_ov_update_mtu(p_hwfn, p_hwfn->p_main_ptt,
+                                                  p_hwfn->hw_info.mtu);
+                       if (rc)
+                               DP_INFO(p_hwfn,
+                                       "Failed to update default mtu\n");
+               }
+
+               rc = qed_mcp_ov_update_driver_state(p_hwfn,
+                                                   p_hwfn->p_main_ptt,
+                                                 QED_OV_DRIVER_STATE_DISABLED);
+               if (rc)
+                       DP_INFO(p_hwfn, "Failed to update driver state\n");
+
+               rc = qed_mcp_ov_update_eswitch(p_hwfn, p_hwfn->p_main_ptt,
+                                              QED_OV_ESWITCH_VEB);
+               if (rc)
+                       DP_INFO(p_hwfn, "Failed to update eswitch mode\n");
+       }
+
        return 0;
 }
 
@@ -1323,8 +1360,24 @@ int qed_hw_reset(struct qed_dev *cdev)
 {
        int rc = 0;
        u32 unload_resp, unload_param;
+       u32 wol_param;
        int i;
 
+       switch (cdev->wol_config) {
+       case QED_OV_WOL_DISABLED:
+               wol_param = DRV_MB_PARAM_UNLOAD_WOL_DISABLED;
+               break;
+       case QED_OV_WOL_ENABLED:
+               wol_param = DRV_MB_PARAM_UNLOAD_WOL_ENABLED;
+               break;
+       default:
+               DP_NOTICE(cdev,
+                         "Unknown WoL configuration %02x\n", cdev->wol_config);
+               /* Fallthrough */
+       case QED_OV_WOL_DEFAULT:
+               wol_param = DRV_MB_PARAM_UNLOAD_WOL_MCP;
+       }
+
        for_each_hwfn(cdev, i) {
                struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
 
@@ -1353,8 +1406,7 @@ int qed_hw_reset(struct qed_dev *cdev)
 
                /* Send unload command to MCP */
                rc = qed_mcp_cmd(p_hwfn, p_hwfn->p_main_ptt,
-                                DRV_MSG_CODE_UNLOAD_REQ,
-                                DRV_MB_PARAM_UNLOAD_WOL_MCP,
+                                DRV_MSG_CODE_UNLOAD_REQ, wol_param,
                                 &unload_resp, &unload_param);
                if (rc) {
                        DP_NOTICE(p_hwfn, "qed_hw_reset: UNLOAD_REQ failed\n");
@@ -1420,71 +1472,276 @@ static void get_function_id(struct qed_hwfn *p_hwfn)
 static void qed_hw_set_feat(struct qed_hwfn *p_hwfn)
 {
        u32 *feat_num = p_hwfn->hw_info.feat_num;
+       struct qed_sb_cnt_info sb_cnt_info;
        int num_features = 1;
 
-#if IS_ENABLED(CONFIG_INFINIBAND_QEDR)
-       /* Roce CNQ each requires: 1 status block + 1 CNQ. We divide the
-        * status blocks equally between L2 / RoCE but with consideration as
-        * to how many l2 queues / cnqs we have
-        */
-       if (p_hwfn->hw_info.personality == QED_PCI_ETH_ROCE) {
+       if (IS_ENABLED(CONFIG_QED_RDMA) &&
+           p_hwfn->hw_info.personality == QED_PCI_ETH_ROCE) {
+               /* Roce CNQ each requires: 1 status block + 1 CNQ. We divide
+                * the status blocks equally between L2 / RoCE but with
+                * consideration as to how many l2 queues / cnqs we have.
+                */
                num_features++;
 
                feat_num[QED_RDMA_CNQ] =
                        min_t(u32, RESC_NUM(p_hwfn, QED_SB) / num_features,
                              RESC_NUM(p_hwfn, QED_RDMA_CNQ_RAM));
        }
-#endif
+
        feat_num[QED_PF_L2_QUE] = min_t(u32, RESC_NUM(p_hwfn, QED_SB) /
                                                num_features,
                                        RESC_NUM(p_hwfn, QED_L2_QUEUE));
-       DP_VERBOSE(p_hwfn, NETIF_MSG_PROBE,
-                  "#PF_L2_QUEUES=%d #SBS=%d num_features=%d\n",
-                  feat_num[QED_PF_L2_QUE], RESC_NUM(p_hwfn, QED_SB),
-                  num_features);
+
+       memset(&sb_cnt_info, 0, sizeof(sb_cnt_info));
+       qed_int_get_num_sbs(p_hwfn, &sb_cnt_info);
+       feat_num[QED_VF_L2_QUE] =
+           min_t(u32,
+                 RESC_NUM(p_hwfn, QED_L2_QUEUE) -
+                 FEAT_NUM(p_hwfn, QED_PF_L2_QUE), sb_cnt_info.sb_iov_cnt);
+
+       DP_VERBOSE(p_hwfn,
+                  NETIF_MSG_PROBE,
+                  "#PF_L2_QUEUES=%d VF_L2_QUEUES=%d #ROCE_CNQ=%d #SBS=%d num_features=%d\n",
+                  (int)FEAT_NUM(p_hwfn, QED_PF_L2_QUE),
+                  (int)FEAT_NUM(p_hwfn, QED_VF_L2_QUE),
+                  (int)FEAT_NUM(p_hwfn, QED_RDMA_CNQ),
+                  RESC_NUM(p_hwfn, QED_SB), num_features);
 }
 
-static int qed_hw_get_resc(struct qed_hwfn *p_hwfn)
+static enum resource_id_enum qed_hw_get_mfw_res_id(enum qed_resources res_id)
+{
+       enum resource_id_enum mfw_res_id = RESOURCE_NUM_INVALID;
+
+       switch (res_id) {
+       case QED_SB:
+               mfw_res_id = RESOURCE_NUM_SB_E;
+               break;
+       case QED_L2_QUEUE:
+               mfw_res_id = RESOURCE_NUM_L2_QUEUE_E;
+               break;
+       case QED_VPORT:
+               mfw_res_id = RESOURCE_NUM_VPORT_E;
+               break;
+       case QED_RSS_ENG:
+               mfw_res_id = RESOURCE_NUM_RSS_ENGINES_E;
+               break;
+       case QED_PQ:
+               mfw_res_id = RESOURCE_NUM_PQ_E;
+               break;
+       case QED_RL:
+               mfw_res_id = RESOURCE_NUM_RL_E;
+               break;
+       case QED_MAC:
+       case QED_VLAN:
+               /* Each VFC resource can accommodate both a MAC and a VLAN */
+               mfw_res_id = RESOURCE_VFC_FILTER_E;
+               break;
+       case QED_ILT:
+               mfw_res_id = RESOURCE_ILT_E;
+               break;
+       case QED_LL2_QUEUE:
+               mfw_res_id = RESOURCE_LL2_QUEUE_E;
+               break;
+       case QED_RDMA_CNQ_RAM:
+       case QED_CMDQS_CQS:
+               /* CNQ/CMDQS are the same resource */
+               mfw_res_id = RESOURCE_CQS_E;
+               break;
+       case QED_RDMA_STATS_QUEUE:
+               mfw_res_id = RESOURCE_RDMA_STATS_QUEUE_E;
+               break;
+       default:
+               break;
+       }
+
+       return mfw_res_id;
+}
+
+static u32 qed_hw_get_dflt_resc_num(struct qed_hwfn *p_hwfn,
+                                   enum qed_resources res_id)
 {
-       u8 enabled_func_idx = p_hwfn->enabled_func_idx;
-       u32 *resc_start = p_hwfn->hw_info.resc_start;
        u8 num_funcs = p_hwfn->num_funcs_on_engine;
-       u32 *resc_num = p_hwfn->hw_info.resc_num;
        struct qed_sb_cnt_info sb_cnt_info;
-       int i, max_vf_vlan_filters;
+       u32 dflt_resc_num = 0;
 
-       memset(&sb_cnt_info, 0, sizeof(sb_cnt_info));
+       switch (res_id) {
+       case QED_SB:
+               memset(&sb_cnt_info, 0, sizeof(sb_cnt_info));
+               qed_int_get_num_sbs(p_hwfn, &sb_cnt_info);
+               dflt_resc_num = sb_cnt_info.sb_cnt;
+               break;
+       case QED_L2_QUEUE:
+               dflt_resc_num = MAX_NUM_L2_QUEUES_BB / num_funcs;
+               break;
+       case QED_VPORT:
+               dflt_resc_num = MAX_NUM_VPORTS_BB / num_funcs;
+               break;
+       case QED_RSS_ENG:
+               dflt_resc_num = ETH_RSS_ENGINE_NUM_BB / num_funcs;
+               break;
+       case QED_PQ:
+               /* The granularity of the PQs is 8 */
+               dflt_resc_num = MAX_QM_TX_QUEUES_BB / num_funcs;
+               dflt_resc_num &= ~0x7;
+               break;
+       case QED_RL:
+               dflt_resc_num = MAX_QM_GLOBAL_RLS / num_funcs;
+               break;
+       case QED_MAC:
+       case QED_VLAN:
+               /* Each VFC resource can accommodate both a MAC and a VLAN */
+               dflt_resc_num = ETH_NUM_MAC_FILTERS / num_funcs;
+               break;
+       case QED_ILT:
+               dflt_resc_num = PXP_NUM_ILT_RECORDS_BB / num_funcs;
+               break;
+       case QED_LL2_QUEUE:
+               dflt_resc_num = MAX_NUM_LL2_RX_QUEUES / num_funcs;
+               break;
+       case QED_RDMA_CNQ_RAM:
+       case QED_CMDQS_CQS:
+               /* CNQ/CMDQS are the same resource */
+               dflt_resc_num = NUM_OF_CMDQS_CQS / num_funcs;
+               break;
+       case QED_RDMA_STATS_QUEUE:
+               dflt_resc_num = RDMA_NUM_STATISTIC_COUNTERS_BB / num_funcs;
+               break;
+       default:
+               break;
+       }
 
-#ifdef CONFIG_QED_SRIOV
-       max_vf_vlan_filters = QED_ETH_MAX_VF_NUM_VLAN_FILTERS;
-#else
-       max_vf_vlan_filters = 0;
-#endif
+       return dflt_resc_num;
+}
+
+static const char *qed_hw_get_resc_name(enum qed_resources res_id)
+{
+       switch (res_id) {
+       case QED_SB:
+               return "SB";
+       case QED_L2_QUEUE:
+               return "L2_QUEUE";
+       case QED_VPORT:
+               return "VPORT";
+       case QED_RSS_ENG:
+               return "RSS_ENG";
+       case QED_PQ:
+               return "PQ";
+       case QED_RL:
+               return "RL";
+       case QED_MAC:
+               return "MAC";
+       case QED_VLAN:
+               return "VLAN";
+       case QED_RDMA_CNQ_RAM:
+               return "RDMA_CNQ_RAM";
+       case QED_ILT:
+               return "ILT";
+       case QED_LL2_QUEUE:
+               return "LL2_QUEUE";
+       case QED_CMDQS_CQS:
+               return "CMDQS_CQS";
+       case QED_RDMA_STATS_QUEUE:
+               return "RDMA_STATS_QUEUE";
+       default:
+               return "UNKNOWN_RESOURCE";
+       }
+}
 
-       qed_int_get_num_sbs(p_hwfn, &sb_cnt_info);
+static int qed_hw_set_resc_info(struct qed_hwfn *p_hwfn,
+                               enum qed_resources res_id)
+{
+       u32 dflt_resc_num = 0, dflt_resc_start = 0, mcp_resp, mcp_param;
+       u32 *p_resc_num, *p_resc_start;
+       struct resource_info resc_info;
+       int rc;
+
+       p_resc_num = &RESC_NUM(p_hwfn, res_id);
+       p_resc_start = &RESC_START(p_hwfn, res_id);
+
+       /* Default values assumes that each function received equal share */
+       dflt_resc_num = qed_hw_get_dflt_resc_num(p_hwfn, res_id);
+       if (!dflt_resc_num) {
+               DP_ERR(p_hwfn,
+                      "Failed to get default amount for resource %d [%s]\n",
+                      res_id, qed_hw_get_resc_name(res_id));
+               return -EINVAL;
+       }
+       dflt_resc_start = dflt_resc_num * p_hwfn->enabled_func_idx;
+
+       memset(&resc_info, 0, sizeof(resc_info));
+       resc_info.res_id = qed_hw_get_mfw_res_id(res_id);
+       if (resc_info.res_id == RESOURCE_NUM_INVALID) {
+               DP_ERR(p_hwfn,
+                      "Failed to match resource %d [%s] with the MFW resources\n",
+                      res_id, qed_hw_get_resc_name(res_id));
+               return -EINVAL;
+       }
 
-       resc_num[QED_SB] = min_t(u32,
-                                (MAX_SB_PER_PATH_BB / num_funcs),
-                                sb_cnt_info.sb_cnt);
-       resc_num[QED_L2_QUEUE] = MAX_NUM_L2_QUEUES_BB / num_funcs;
-       resc_num[QED_VPORT] = MAX_NUM_VPORTS_BB / num_funcs;
-       resc_num[QED_RSS_ENG] = ETH_RSS_ENGINE_NUM_BB / num_funcs;
-       resc_num[QED_PQ] = MAX_QM_TX_QUEUES_BB / num_funcs;
-       resc_num[QED_RL] = min_t(u32, 64, resc_num[QED_VPORT]);
-       resc_num[QED_MAC] = ETH_NUM_MAC_FILTERS / num_funcs;
-       resc_num[QED_VLAN] = (ETH_NUM_VLAN_FILTERS - 1 /*For vlan0*/) /
-                            num_funcs;
-       resc_num[QED_ILT] = PXP_NUM_ILT_RECORDS_BB / num_funcs;
-       resc_num[QED_LL2_QUEUE] = MAX_NUM_LL2_RX_QUEUES / num_funcs;
-       resc_num[QED_RDMA_CNQ_RAM] = NUM_OF_CMDQS_CQS / num_funcs;
-       resc_num[QED_RDMA_STATS_QUEUE] = RDMA_NUM_STATISTIC_COUNTERS_BB /
-                                        num_funcs;
-
-       for (i = 0; i < QED_MAX_RESC; i++)
-               resc_start[i] = resc_num[i] * enabled_func_idx;
+       rc = qed_mcp_get_resc_info(p_hwfn, p_hwfn->p_main_ptt, &resc_info,
+                                  &mcp_resp, &mcp_param);
+       if (rc) {
+               DP_NOTICE(p_hwfn,
+                         "MFW response failure for an allocation request for resource %d [%s]\n",
+                         res_id, qed_hw_get_resc_name(res_id));
+               return rc;
+       }
+
+       /* Default driver values are applied in the following cases:
+        * - The resource allocation MB command is not supported by the MFW
+        * - There is an internal error in the MFW while processing the request
+        * - The resource ID is unknown to the MFW
+        */
+       if (mcp_resp != FW_MSG_CODE_RESOURCE_ALLOC_OK &&
+           mcp_resp != FW_MSG_CODE_RESOURCE_ALLOC_DEPRECATED) {
+               DP_NOTICE(p_hwfn,
+                         "Resource %d [%s]: No allocation info was received [mcp_resp 0x%x]. Applying default values [num %d, start %d].\n",
+                         res_id,
+                         qed_hw_get_resc_name(res_id),
+                         mcp_resp, dflt_resc_num, dflt_resc_start);
+               *p_resc_num = dflt_resc_num;
+               *p_resc_start = dflt_resc_start;
+               goto out;
+       }
+
+       /* Special handling for status blocks; Would be revised in future */
+       if (res_id == QED_SB) {
+               resc_info.size -= 1;
+               resc_info.offset -= p_hwfn->enabled_func_idx;
+       }
+
+       *p_resc_num = resc_info.size;
+       *p_resc_start = resc_info.offset;
+
+out:
+       /* PQs have to divide by 8 [that's the HW granularity].
+        * Reduce number so it would fit.
+        */
+       if ((res_id == QED_PQ) && ((*p_resc_num % 8) || (*p_resc_start % 8))) {
+               DP_INFO(p_hwfn,
+                       "PQs need to align by 8; Number %08x --> %08x, Start %08x --> %08x\n",
+                       *p_resc_num,
+                       (*p_resc_num) & ~0x7,
+                       *p_resc_start, (*p_resc_start) & ~0x7);
+               *p_resc_num &= ~0x7;
+               *p_resc_start &= ~0x7;
+       }
+
+       return 0;
+}
+
+static int qed_hw_get_resc(struct qed_hwfn *p_hwfn)
+{
+       u8 res_id;
+       int rc;
+
+       for (res_id = 0; res_id < QED_MAX_RESC; res_id++) {
+               rc = qed_hw_set_resc_info(p_hwfn, res_id);
+               if (rc)
+                       return rc;
+       }
 
        /* Sanity for ILT */
-       if (RESC_END(p_hwfn, QED_ILT) > PXP_NUM_ILT_RECORDS_BB) {
+       if ((RESC_END(p_hwfn, QED_ILT) > PXP_NUM_ILT_RECORDS_BB)) {
                DP_NOTICE(p_hwfn, "Can't assign ILT pages [%08x,...,%08x]\n",
                          RESC_START(p_hwfn, QED_ILT),
                          RESC_END(p_hwfn, QED_ILT) - 1);
@@ -1494,34 +1751,12 @@ static int qed_hw_get_resc(struct qed_hwfn *p_hwfn)
        qed_hw_set_feat(p_hwfn);
 
        DP_VERBOSE(p_hwfn, NETIF_MSG_PROBE,
-                  "The numbers for each resource are:\n"
-                  "SB = %d start = %d\n"
-                  "L2_QUEUE = %d start = %d\n"
-                  "VPORT = %d start = %d\n"
-                  "PQ = %d start = %d\n"
-                  "RL = %d start = %d\n"
-                  "MAC = %d start = %d\n"
-                  "VLAN = %d start = %d\n"
-                  "ILT = %d start = %d\n"
-                  "LL2_QUEUE = %d start = %d\n",
-                  p_hwfn->hw_info.resc_num[QED_SB],
-                  p_hwfn->hw_info.resc_start[QED_SB],
-                  p_hwfn->hw_info.resc_num[QED_L2_QUEUE],
-                  p_hwfn->hw_info.resc_start[QED_L2_QUEUE],
-                  p_hwfn->hw_info.resc_num[QED_VPORT],
-                  p_hwfn->hw_info.resc_start[QED_VPORT],
-                  p_hwfn->hw_info.resc_num[QED_PQ],
-                  p_hwfn->hw_info.resc_start[QED_PQ],
-                  p_hwfn->hw_info.resc_num[QED_RL],
-                  p_hwfn->hw_info.resc_start[QED_RL],
-                  p_hwfn->hw_info.resc_num[QED_MAC],
-                  p_hwfn->hw_info.resc_start[QED_MAC],
-                  p_hwfn->hw_info.resc_num[QED_VLAN],
-                  p_hwfn->hw_info.resc_start[QED_VLAN],
-                  p_hwfn->hw_info.resc_num[QED_ILT],
-                  p_hwfn->hw_info.resc_start[QED_ILT],
-                  RESC_NUM(p_hwfn, QED_LL2_QUEUE),
-                  RESC_START(p_hwfn, QED_LL2_QUEUE));
+                  "The numbers for each resource are:\n");
+       for (res_id = 0; res_id < QED_MAX_RESC; res_id++)
+               DP_VERBOSE(p_hwfn, NETIF_MSG_PROBE, "%s = %d start = %d\n",
+                          qed_hw_get_resc_name(res_id),
+                          RESC_NUM(p_hwfn, res_id),
+                          RESC_START(p_hwfn, res_id));
 
        return 0;
 }
@@ -1800,6 +2035,9 @@ qed_get_hw_info(struct qed_hwfn *p_hwfn,
 
        qed_get_num_funcs(p_hwfn, p_ptt);
 
+       if (qed_mcp_is_init(p_hwfn))
+               p_hwfn->hw_info.mtu = p_hwfn->mcp_info->func_info.mtu;
+
        return qed_hw_get_resc(p_hwfn);
 }
 
@@ -1974,8 +2212,13 @@ int qed_hw_prepare(struct qed_dev *cdev,
 
 void qed_hw_remove(struct qed_dev *cdev)
 {
+       struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
        int i;
 
+       if (IS_PF(cdev))
+               qed_mcp_ov_update_driver_state(p_hwfn, p_hwfn->p_main_ptt,
+                                              QED_OV_DRIVER_STATE_NOT_LOADED);
+
        for_each_hwfn(cdev, i) {
                struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
 
@@ -2036,12 +2279,12 @@ static void qed_chain_free_pbl(struct qed_dev *cdev, struct qed_chain *p_chain)
 {
        void **pp_virt_addr_tbl = p_chain->pbl.pp_virt_addr_tbl;
        u32 page_cnt = p_chain->page_cnt, i, pbl_size;
-       u8 *p_pbl_virt = p_chain->pbl.p_virt_table;
+       u8 *p_pbl_virt = p_chain->pbl_sp.p_virt_table;
 
        if (!pp_virt_addr_tbl)
                return;
 
-       if (!p_chain->pbl.p_virt_table)
+       if (!p_pbl_virt)
                goto out;
 
        for (i = 0; i < page_cnt; i++) {
@@ -2059,7 +2302,8 @@ static void qed_chain_free_pbl(struct qed_dev *cdev, struct qed_chain *p_chain)
        pbl_size = page_cnt * QED_CHAIN_PBL_ENTRY_SIZE;
        dma_free_coherent(&cdev->pdev->dev,
                          pbl_size,
-                         p_chain->pbl.p_virt_table, p_chain->pbl.p_phys_table);
+                         p_chain->pbl_sp.p_virt_table,
+                         p_chain->pbl_sp.p_phys_table);
 out:
        vfree(p_chain->pbl.pp_virt_addr_tbl);
 }
index 72eee29c677f153e6ed65fbf0d57436abbe87f84..785ab03683ebc4e89d2d3fc349aa2bd8112c6b3e 100644 (file)
@@ -727,9 +727,6 @@ struct core_tx_bd_flags {
 #define CORE_TX_BD_FLAGS_L4_PROTOCOL_SHIFT     6
 #define CORE_TX_BD_FLAGS_L4_PSEUDO_CSUM_MODE_MASK      0x1
 #define CORE_TX_BD_FLAGS_L4_PSEUDO_CSUM_MODE_SHIFT 7
-#define CORE_TX_BD_FLAGS_ROCE_FLAV_MASK                0x1
-#define CORE_TX_BD_FLAGS_ROCE_FLAV_SHIFT       12
-
 };
 
 struct core_tx_bd {
@@ -8529,6 +8526,41 @@ struct mdump_config_stc {
        u32 valid_logs;
 };
 
+enum resource_id_enum {
+       RESOURCE_NUM_SB_E = 0,
+       RESOURCE_NUM_L2_QUEUE_E = 1,
+       RESOURCE_NUM_VPORT_E = 2,
+       RESOURCE_NUM_VMQ_E = 3,
+       RESOURCE_FACTOR_NUM_RSS_PF_E = 4,
+       RESOURCE_FACTOR_RSS_PER_VF_E = 5,
+       RESOURCE_NUM_RL_E = 6,
+       RESOURCE_NUM_PQ_E = 7,
+       RESOURCE_NUM_VF_E = 8,
+       RESOURCE_VFC_FILTER_E = 9,
+       RESOURCE_ILT_E = 10,
+       RESOURCE_CQS_E = 11,
+       RESOURCE_GFT_PROFILES_E = 12,
+       RESOURCE_NUM_TC_E = 13,
+       RESOURCE_NUM_RSS_ENGINES_E = 14,
+       RESOURCE_LL2_QUEUE_E = 15,
+       RESOURCE_RDMA_STATS_QUEUE_E = 16,
+       RESOURCE_MAX_NUM,
+       RESOURCE_NUM_INVALID = 0xFFFFFFFF
+};
+
+/* Resource ID is to be filled by the driver in the MB request
+ * Size, offset & flags to be filled by the MFW in the MB response
+ */
+struct resource_info {
+       enum resource_id_enum res_id;
+       u32 size;               /* number of allocated resources */
+       u32 offset;             /* Offset of the 1st resource */
+       u32 vf_size;
+       u32 vf_offset;
+       u32 flags;
+#define RESOURCE_ELEMENT_STRICT (1 << 0)
+};
+
 union drv_union_data {
        u32 ver_str[MCP_DRV_VER_STR_SIZE_DWORD];
        struct mcp_mac wol_mac;
@@ -8546,9 +8578,9 @@ union drv_union_data {
        struct drv_version_stc drv_version;
 
        struct lan_stats_stc lan_stats;
-       u64 reserved_stats[11];
        struct ocbb_data_stc ocbb_info;
        struct temperature_status_stc temp_info;
+       struct resource_info resource;
        struct bist_nvm_image_att nvm_image_att;
        struct mdump_config_stc mdump_config;
 };
@@ -8564,9 +8596,19 @@ struct public_drv_mb {
 #define DRV_MSG_CODE_INIT_PHY                  0x22000000
 #define DRV_MSG_CODE_LINK_RESET                        0x23000000
 #define DRV_MSG_CODE_SET_DCBX                  0x25000000
+#define DRV_MSG_CODE_OV_UPDATE_CURR_CFG         0x26000000
+#define DRV_MSG_CODE_OV_UPDATE_BUS_NUM          0x27000000
+#define DRV_MSG_CODE_OV_UPDATE_BOOT_PROGRESS    0x28000000
+#define DRV_MSG_CODE_OV_UPDATE_STORM_FW_VER     0x29000000
+#define DRV_MSG_CODE_OV_UPDATE_DRIVER_STATE     0x31000000
+#define DRV_MSG_CODE_BW_UPDATE_ACK              0x32000000
+#define DRV_MSG_CODE_OV_UPDATE_MTU              0x33000000
+#define DRV_MSG_CODE_OV_UPDATE_WOL              0x38000000
+#define DRV_MSG_CODE_OV_UPDATE_ESWITCH_MODE     0x39000000
 
 #define DRV_MSG_CODE_BW_UPDATE_ACK             0x32000000
 #define DRV_MSG_CODE_NIG_DRAIN                 0x30000000
+#define DRV_MSG_GET_RESOURCE_ALLOC_MSG          0x34000000
 #define DRV_MSG_CODE_VF_DISABLED_DONE          0xc0000000
 #define DRV_MSG_CODE_CFG_VF_MSIX               0xc0010000
 #define DRV_MSG_CODE_NVM_GET_FILE_ATT          0x00030000
@@ -8574,6 +8616,13 @@ struct public_drv_mb {
 #define DRV_MSG_CODE_MCP_RESET                 0x00090000
 #define DRV_MSG_CODE_SET_VERSION               0x000f0000
 #define DRV_MSG_CODE_MCP_HALT                   0x00100000
+#define DRV_MSG_CODE_SET_VMAC                   0x00110000
+#define DRV_MSG_CODE_GET_VMAC                   0x00120000
+#define DRV_MSG_CODE_VMAC_TYPE_SHIFT            4
+#define DRV_MSG_CODE_VMAC_TYPE_MASK             0x30
+#define DRV_MSG_CODE_VMAC_TYPE_MAC              1
+#define DRV_MSG_CODE_VMAC_TYPE_WWNN             2
+#define DRV_MSG_CODE_VMAC_TYPE_WWPN             3
 
 #define DRV_MSG_CODE_GET_STATS                  0x00130000
 #define DRV_MSG_CODE_STATS_TYPE_LAN             1
@@ -8585,11 +8634,16 @@ struct public_drv_mb {
 
 #define DRV_MSG_CODE_BIST_TEST                 0x001e0000
 #define DRV_MSG_CODE_SET_LED_MODE              0x00200000
+#define DRV_MSG_CODE_GET_PF_RDMA_PROTOCOL      0x002b0000
+#define DRV_MSG_CODE_OS_WOL                    0x002e0000
 
 #define DRV_MSG_SEQ_NUMBER_MASK                        0x0000ffff
 
        u32 drv_mb_param;
-#define DRV_MB_PARAM_UNLOAD_WOL_MCP            0x00000001
+#define DRV_MB_PARAM_UNLOAD_WOL_UNKNOWN         0x00000000
+#define DRV_MB_PARAM_UNLOAD_WOL_MCP             0x00000001
+#define DRV_MB_PARAM_UNLOAD_WOL_DISABLED        0x00000002
+#define DRV_MB_PARAM_UNLOAD_WOL_ENABLED         0x00000003
 #define DRV_MB_PARAM_DCBX_NOTIFY_MASK          0x000000FF
 #define DRV_MB_PARAM_DCBX_NOTIFY_SHIFT         3
 
@@ -8602,13 +8656,59 @@ struct public_drv_mb {
 #define DRV_MB_PARAM_LLDP_SEND_MASK            0x00000001
 #define DRV_MB_PARAM_LLDP_SEND_SHIFT           0
 
+#define DRV_MB_PARAM_OV_CURR_CFG_SHIFT         0
+#define DRV_MB_PARAM_OV_CURR_CFG_MASK          0x0000000F
+#define DRV_MB_PARAM_OV_CURR_CFG_NONE          0
+#define DRV_MB_PARAM_OV_CURR_CFG_OS            1
+#define DRV_MB_PARAM_OV_CURR_CFG_VENDOR_SPEC   2
+#define DRV_MB_PARAM_OV_CURR_CFG_OTHER         3
+
+#define DRV_MB_PARAM_OV_STORM_FW_VER_SHIFT     0
+#define DRV_MB_PARAM_OV_STORM_FW_VER_MASK      0xFFFFFFFF
+#define DRV_MB_PARAM_OV_STORM_FW_VER_MAJOR_MASK        0xFF000000
+#define DRV_MB_PARAM_OV_STORM_FW_VER_MINOR_MASK        0x00FF0000
+#define DRV_MB_PARAM_OV_STORM_FW_VER_BUILD_MASK        0x0000FF00
+#define DRV_MB_PARAM_OV_STORM_FW_VER_DROP_MASK 0x000000FF
+
+#define DRV_MSG_CODE_OV_UPDATE_DRIVER_STATE_SHIFT      0
+#define DRV_MSG_CODE_OV_UPDATE_DRIVER_STATE_MASK       0xF
+#define DRV_MSG_CODE_OV_UPDATE_DRIVER_STATE_UNKNOWN    0x1
+#define DRV_MSG_CODE_OV_UPDATE_DRIVER_STATE_NOT_LOADED 0x2
+#define DRV_MSG_CODE_OV_UPDATE_DRIVER_STATE_LOADING    0x3
+#define DRV_MSG_CODE_OV_UPDATE_DRIVER_STATE_DISABLED   0x4
+#define DRV_MSG_CODE_OV_UPDATE_DRIVER_STATE_ACTIVE     0x5
+
+#define DRV_MB_PARAM_OV_MTU_SIZE_SHIFT 0
+#define DRV_MB_PARAM_OV_MTU_SIZE_MASK  0xFFFFFFFF
+
+#define DRV_MB_PARAM_WOL_MASK  (DRV_MB_PARAM_WOL_DEFAULT | \
+                                DRV_MB_PARAM_WOL_DISABLED | \
+                                DRV_MB_PARAM_WOL_ENABLED)
+#define DRV_MB_PARAM_WOL_DEFAULT       DRV_MB_PARAM_UNLOAD_WOL_MCP
+#define DRV_MB_PARAM_WOL_DISABLED      DRV_MB_PARAM_UNLOAD_WOL_DISABLED
+#define DRV_MB_PARAM_WOL_ENABLED       DRV_MB_PARAM_UNLOAD_WOL_ENABLED
+
+#define DRV_MB_PARAM_ESWITCH_MODE_MASK (DRV_MB_PARAM_ESWITCH_MODE_NONE | \
+                                        DRV_MB_PARAM_ESWITCH_MODE_VEB | \
+                                        DRV_MB_PARAM_ESWITCH_MODE_VEPA)
+#define DRV_MB_PARAM_ESWITCH_MODE_NONE 0x0
+#define DRV_MB_PARAM_ESWITCH_MODE_VEB  0x1
+#define DRV_MB_PARAM_ESWITCH_MODE_VEPA 0x2
 
 #define DRV_MB_PARAM_SET_LED_MODE_OPER         0x0
 #define DRV_MB_PARAM_SET_LED_MODE_ON           0x1
 #define DRV_MB_PARAM_SET_LED_MODE_OFF          0x2
 
+       /* Resource Allocation params - Driver version support */
+#define DRV_MB_PARAM_RESOURCE_ALLOC_VERSION_MAJOR_MASK 0xFFFF0000
+#define DRV_MB_PARAM_RESOURCE_ALLOC_VERSION_MAJOR_SHIFT        16
+#define DRV_MB_PARAM_RESOURCE_ALLOC_VERSION_MINOR_MASK 0x0000FFFF
+#define DRV_MB_PARAM_RESOURCE_ALLOC_VERSION_MINOR_SHIFT        0
+
 #define DRV_MB_PARAM_BIST_REGISTER_TEST                1
 #define DRV_MB_PARAM_BIST_CLOCK_TEST           2
+#define DRV_MB_PARAM_BIST_NVM_TEST_NUM_IMAGES  3
+#define DRV_MB_PARAM_BIST_NVM_TEST_IMAGE_BY_INDEX      4
 
 #define DRV_MB_PARAM_BIST_RC_UNKNOWN           0
 #define DRV_MB_PARAM_BIST_RC_PASSED            1
@@ -8617,6 +8717,8 @@ struct public_drv_mb {
 
 #define DRV_MB_PARAM_BIST_TEST_INDEX_SHIFT     0
 #define DRV_MB_PARAM_BIST_TEST_INDEX_MASK      0x000000FF
+#define DRV_MB_PARAM_BIST_TEST_IMAGE_INDEX_SHIFT       8
+#define DRV_MB_PARAM_BIST_TEST_IMAGE_INDEX_MASK                0x0000FF00
 
        u32 fw_mb_header;
 #define FW_MSG_CODE_MASK                       0xffff0000
@@ -8631,15 +8733,27 @@ struct public_drv_mb {
 #define FW_MSG_CODE_DRV_UNLOAD_PORT            0x20120000
 #define FW_MSG_CODE_DRV_UNLOAD_FUNCTION                0x20130000
 #define FW_MSG_CODE_DRV_UNLOAD_DONE            0x21100000
+#define FW_MSG_CODE_RESOURCE_ALLOC_OK           0x34000000
+#define FW_MSG_CODE_RESOURCE_ALLOC_UNKNOWN      0x35000000
+#define FW_MSG_CODE_RESOURCE_ALLOC_DEPRECATED   0x36000000
 #define FW_MSG_CODE_DRV_CFG_VF_MSIX_DONE       0xb0010000
 
 #define FW_MSG_CODE_NVM_OK                     0x00010000
 #define FW_MSG_CODE_OK                         0x00160000
 
+#define FW_MSG_CODE_OS_WOL_SUPPORTED            0x00800000
+#define FW_MSG_CODE_OS_WOL_NOT_SUPPORTED        0x00810000
+
 #define FW_MSG_SEQ_NUMBER_MASK                 0x0000ffff
 
        u32 fw_mb_param;
 
+       /* get pf rdma protocol command responce */
+#define FW_MB_PARAM_GET_PF_RDMA_NONE           0x0
+#define FW_MB_PARAM_GET_PF_RDMA_ROCE           0x1
+#define FW_MB_PARAM_GET_PF_RDMA_IWARP          0x2
+#define FW_MB_PARAM_GET_PF_RDMA_BOTH           0x3
+
        u32 drv_pulse_mb;
 #define DRV_PULSE_SEQ_MASK                     0x00007fff
 #define DRV_PULSE_SYSTEM_TIME_MASK             0xffff0000
index 2adedc6fb6cf49915aaa7b37a6e2bff9d1397e50..bb74e1c10ffe6a58b7b2b6714f6935397a74b4db 100644 (file)
@@ -3030,6 +3030,31 @@ int qed_int_igu_read_cam(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
                        }
                }
        }
+
+       /* There's a possibility the igu_sb_cnt_iov doesn't properly reflect
+        * the number of VF SBs [especially for first VF on engine, as we can't
+        * diffrentiate between empty entries and its entries].
+        * Since we don't really support more SBs than VFs today, prevent any
+        * such configuration by sanitizing the number of SBs to equal the
+        * number of VFs.
+        */
+       if (IS_PF_SRIOV(p_hwfn)) {
+               u16 total_vfs = p_hwfn->cdev->p_iov_info->total_vfs;
+
+               if (total_vfs < p_igu_info->free_blks) {
+                       DP_VERBOSE(p_hwfn,
+                                  (NETIF_MSG_INTR | QED_MSG_IOV),
+                                  "Limiting number of SBs for IOV - %04x --> %04x\n",
+                                  p_igu_info->free_blks,
+                                  p_hwfn->cdev->p_iov_info->total_vfs);
+                       p_igu_info->free_blks = total_vfs;
+               } else if (total_vfs > p_igu_info->free_blks) {
+                       DP_NOTICE(p_hwfn,
+                                 "IGU has only %04x SBs for VFs while the device has %04x VFs\n",
+                                 p_igu_info->free_blks, total_vfs);
+                       return -EINVAL;
+               }
+       }
        p_igu_info->igu_sb_cnt_iov = p_igu_info->free_blks;
 
        DP_VERBOSE(
@@ -3163,7 +3188,12 @@ u16 qed_int_queue_id_from_sb_id(struct qed_hwfn *p_hwfn, u16 sb_id)
                return sb_id - p_info->igu_base_sb;
        } else if ((sb_id >= p_info->igu_base_sb_iov) &&
                   (sb_id < p_info->igu_base_sb_iov + p_info->igu_sb_cnt_iov)) {
-               return sb_id - p_info->igu_base_sb_iov + p_info->igu_sb_cnt;
+               /* We want the first VF queue to be adjacent to the
+                * last PF queue. Since L2 queues can be partial to
+                * SBs, we'll use the feature instead.
+                */
+               return sb_id - p_info->igu_base_sb_iov +
+                      FEAT_NUM(p_hwfn, QED_PF_L2_QUE);
        } else {
                DP_NOTICE(p_hwfn, "SB %d not in range for function\n", sb_id);
                return 0;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_iscsi.c b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
new file mode 100644 (file)
index 0000000..00efb1c
--- /dev/null
@@ -0,0 +1,1277 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015 QLogic Corporation
+ *
+ * This software is available under the terms of the GNU General Public License
+ * (GPL) Version 2, available from the file COPYING in the main directory of
+ * this source tree.
+ */
+
+#include <linux/types.h>
+#include <asm/byteorder.h>
+#include <asm/param.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/etherdevice.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/log2.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/stddef.h>
+#include <linux/string.h>
+#include <linux/version.h>
+#include <linux/workqueue.h>
+#include <linux/errno.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/qed/qed_iscsi_if.h>
+#include "qed.h"
+#include "qed_cxt.h"
+#include "qed_dev_api.h"
+#include "qed_hsi.h"
+#include "qed_hw.h"
+#include "qed_int.h"
+#include "qed_iscsi.h"
+#include "qed_ll2.h"
+#include "qed_mcp.h"
+#include "qed_sp.h"
+#include "qed_sriov.h"
+#include "qed_reg_addr.h"
+
+struct qed_iscsi_conn {
+       struct list_head list_entry;
+       bool free_on_delete;
+
+       u16 conn_id;
+       u32 icid;
+       u32 fw_cid;
+
+       u8 layer_code;
+       u8 offl_flags;
+       u8 connect_mode;
+       u32 initial_ack;
+       dma_addr_t sq_pbl_addr;
+       struct qed_chain r2tq;
+       struct qed_chain xhq;
+       struct qed_chain uhq;
+
+       struct tcp_upload_params *tcp_upload_params_virt_addr;
+       dma_addr_t tcp_upload_params_phys_addr;
+       struct scsi_terminate_extra_params *queue_cnts_virt_addr;
+       dma_addr_t queue_cnts_phys_addr;
+       dma_addr_t syn_phy_addr;
+
+       u16 syn_ip_payload_length;
+       u8 local_mac[6];
+       u8 remote_mac[6];
+       u16 vlan_id;
+       u8 tcp_flags;
+       u8 ip_version;
+       u32 remote_ip[4];
+       u32 local_ip[4];
+       u8 ka_max_probe_cnt;
+       u8 dup_ack_theshold;
+       u32 rcv_next;
+       u32 snd_una;
+       u32 snd_next;
+       u32 snd_max;
+       u32 snd_wnd;
+       u32 rcv_wnd;
+       u32 snd_wl1;
+       u32 cwnd;
+       u32 ss_thresh;
+       u16 srtt;
+       u16 rtt_var;
+       u32 ts_time;
+       u32 ts_recent;
+       u32 ts_recent_age;
+       u32 total_rt;
+       u32 ka_timeout_delta;
+       u32 rt_timeout_delta;
+       u8 dup_ack_cnt;
+       u8 snd_wnd_probe_cnt;
+       u8 ka_probe_cnt;
+       u8 rt_cnt;
+       u32 flow_label;
+       u32 ka_timeout;
+       u32 ka_interval;
+       u32 max_rt_time;
+       u32 initial_rcv_wnd;
+       u8 ttl;
+       u8 tos_or_tc;
+       u16 remote_port;
+       u16 local_port;
+       u16 mss;
+       u8 snd_wnd_scale;
+       u8 rcv_wnd_scale;
+       u32 ts_ticks_per_second;
+       u16 da_timeout_value;
+       u8 ack_frequency;
+
+       u8 update_flag;
+       u8 default_cq;
+       u32 max_seq_size;
+       u32 max_recv_pdu_length;
+       u32 max_send_pdu_length;
+       u32 first_seq_length;
+       u32 exp_stat_sn;
+       u32 stat_sn;
+       u16 physical_q0;
+       u16 physical_q1;
+       u8 abortive_dsconnect;
+};
+
+static int
+qed_sp_iscsi_func_start(struct qed_hwfn *p_hwfn,
+                       enum spq_mode comp_mode,
+                       struct qed_spq_comp_cb *p_comp_addr,
+                       void *event_context, iscsi_event_cb_t async_event_cb)
+{
+       struct iscsi_init_ramrod_params *p_ramrod = NULL;
+       struct scsi_init_func_queues *p_queue = NULL;
+       struct qed_iscsi_pf_params *p_params = NULL;
+       struct iscsi_spe_func_init *p_init = NULL;
+       struct qed_spq_entry *p_ent = NULL;
+       struct qed_sp_init_data init_data;
+       int rc = 0;
+       u32 dval;
+       u16 val;
+       u8 i;
+
+       /* Get SPQ entry */
+       memset(&init_data, 0, sizeof(init_data));
+       init_data.cid = qed_spq_get_cid(p_hwfn);
+       init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+       init_data.comp_mode = comp_mode;
+       init_data.p_comp_data = p_comp_addr;
+
+       rc = qed_sp_init_request(p_hwfn, &p_ent,
+                                ISCSI_RAMROD_CMD_ID_INIT_FUNC,
+                                PROTOCOLID_ISCSI, &init_data);
+       if (rc)
+               return rc;
+
+       p_ramrod = &p_ent->ramrod.iscsi_init;
+       p_init = &p_ramrod->iscsi_init_spe;
+       p_params = &p_hwfn->pf_params.iscsi_pf_params;
+       p_queue = &p_init->q_params;
+
+       SET_FIELD(p_init->hdr.flags,
+                 ISCSI_SLOW_PATH_HDR_LAYER_CODE, ISCSI_SLOW_PATH_LAYER_CODE);
+       p_init->hdr.op_code = ISCSI_RAMROD_CMD_ID_INIT_FUNC;
+
+       val = p_params->half_way_close_timeout;
+       p_init->half_way_close_timeout = cpu_to_le16(val);
+       p_init->num_sq_pages_in_ring = p_params->num_sq_pages_in_ring;
+       p_init->num_r2tq_pages_in_ring = p_params->num_r2tq_pages_in_ring;
+       p_init->num_uhq_pages_in_ring = p_params->num_uhq_pages_in_ring;
+       p_init->func_params.log_page_size = p_params->log_page_size;
+       val = p_params->num_tasks;
+       p_init->func_params.num_tasks = cpu_to_le16(val);
+       p_init->debug_mode.flags = p_params->debug_mode;
+
+       DMA_REGPAIR_LE(p_queue->glbl_q_params_addr,
+                      p_params->glbl_q_params_addr);
+
+       val = p_params->cq_num_entries;
+       p_queue->cq_num_entries = cpu_to_le16(val);
+       val = p_params->cmdq_num_entries;
+       p_queue->cmdq_num_entries = cpu_to_le16(val);
+       p_queue->num_queues = p_params->num_queues;
+       dval = (u8)p_hwfn->hw_info.resc_start[QED_CMDQS_CQS];
+       p_queue->queue_relative_offset = (u8)dval;
+       p_queue->cq_sb_pi = p_params->gl_rq_pi;
+       p_queue->cmdq_sb_pi = p_params->gl_cmd_pi;
+
+       for (i = 0; i < p_params->num_queues; i++) {
+               val = p_hwfn->sbs_info[i]->igu_sb_id;
+               p_queue->cq_cmdq_sb_num_arr[i] = cpu_to_le16(val);
+       }
+
+       p_queue->bdq_resource_id = ISCSI_BDQ_ID(p_hwfn->port_id);
+
+       DMA_REGPAIR_LE(p_queue->bdq_pbl_base_address[BDQ_ID_RQ],
+                      p_params->bdq_pbl_base_addr[BDQ_ID_RQ]);
+       p_queue->bdq_pbl_num_entries[BDQ_ID_RQ] =
+           p_params->bdq_pbl_num_entries[BDQ_ID_RQ];
+       val = p_params->bdq_xoff_threshold[BDQ_ID_RQ];
+       p_queue->bdq_xoff_threshold[BDQ_ID_RQ] = cpu_to_le16(val);
+       val = p_params->bdq_xon_threshold[BDQ_ID_RQ];
+       p_queue->bdq_xon_threshold[BDQ_ID_RQ] = cpu_to_le16(val);
+
+       DMA_REGPAIR_LE(p_queue->bdq_pbl_base_address[BDQ_ID_IMM_DATA],
+                      p_params->bdq_pbl_base_addr[BDQ_ID_IMM_DATA]);
+       p_queue->bdq_pbl_num_entries[BDQ_ID_IMM_DATA] =
+           p_params->bdq_pbl_num_entries[BDQ_ID_IMM_DATA];
+       val = p_params->bdq_xoff_threshold[BDQ_ID_IMM_DATA];
+       p_queue->bdq_xoff_threshold[BDQ_ID_IMM_DATA] = cpu_to_le16(val);
+       val = p_params->bdq_xon_threshold[BDQ_ID_IMM_DATA];
+       p_queue->bdq_xon_threshold[BDQ_ID_IMM_DATA] = cpu_to_le16(val);
+       val = p_params->rq_buffer_size;
+       p_queue->rq_buffer_size = cpu_to_le16(val);
+       if (p_params->is_target) {
+               SET_FIELD(p_queue->q_validity,
+                         SCSI_INIT_FUNC_QUEUES_RQ_VALID, 1);
+               if (p_queue->bdq_pbl_num_entries[BDQ_ID_IMM_DATA])
+                       SET_FIELD(p_queue->q_validity,
+                                 SCSI_INIT_FUNC_QUEUES_IMM_DATA_VALID, 1);
+               SET_FIELD(p_queue->q_validity,
+                         SCSI_INIT_FUNC_QUEUES_CMD_VALID, 1);
+       } else {
+               SET_FIELD(p_queue->q_validity,
+                         SCSI_INIT_FUNC_QUEUES_RQ_VALID, 1);
+       }
+       p_ramrod->tcp_init.two_msl_timer = cpu_to_le32(p_params->two_msl_timer);
+       val = p_params->tx_sws_timer;
+       p_ramrod->tcp_init.tx_sws_timer = cpu_to_le16(val);
+       p_ramrod->tcp_init.maxfinrt = p_params->max_fin_rt;
+
+       p_hwfn->p_iscsi_info->event_context = event_context;
+       p_hwfn->p_iscsi_info->event_cb = async_event_cb;
+
+       return qed_spq_post(p_hwfn, p_ent, NULL);
+}
+
+static int qed_sp_iscsi_conn_offload(struct qed_hwfn *p_hwfn,
+                                    struct qed_iscsi_conn *p_conn,
+                                    enum spq_mode comp_mode,
+                                    struct qed_spq_comp_cb *p_comp_addr)
+{
+       struct iscsi_spe_conn_offload *p_ramrod = NULL;
+       struct tcp_offload_params_opt2 *p_tcp2 = NULL;
+       struct tcp_offload_params *p_tcp = NULL;
+       struct qed_spq_entry *p_ent = NULL;
+       struct qed_sp_init_data init_data;
+       union qed_qm_pq_params pq_params;
+       u16 pq0_id = 0, pq1_id = 0;
+       dma_addr_t r2tq_pbl_addr;
+       dma_addr_t xhq_pbl_addr;
+       dma_addr_t uhq_pbl_addr;
+       int rc = 0;
+       u32 dval;
+       u16 wval;
+       u8 i;
+       u16 *p;
+
+       /* Get SPQ entry */
+       memset(&init_data, 0, sizeof(init_data));
+       init_data.cid = p_conn->icid;
+       init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+       init_data.comp_mode = comp_mode;
+       init_data.p_comp_data = p_comp_addr;
+
+       rc = qed_sp_init_request(p_hwfn, &p_ent,
+                                ISCSI_RAMROD_CMD_ID_OFFLOAD_CONN,
+                                PROTOCOLID_ISCSI, &init_data);
+       if (rc)
+               return rc;
+
+       p_ramrod = &p_ent->ramrod.iscsi_conn_offload;
+
+       /* Transmission PQ is the first of the PF */
+       memset(&pq_params, 0, sizeof(pq_params));
+       pq0_id = qed_get_qm_pq(p_hwfn, PROTOCOLID_ISCSI, &pq_params);
+       p_conn->physical_q0 = cpu_to_le16(pq0_id);
+       p_ramrod->iscsi.physical_q0 = cpu_to_le16(pq0_id);
+
+       /* iSCSI Pure-ACK PQ */
+       pq_params.iscsi.q_idx = 1;
+       pq1_id = qed_get_qm_pq(p_hwfn, PROTOCOLID_ISCSI, &pq_params);
+       p_conn->physical_q1 = cpu_to_le16(pq1_id);
+       p_ramrod->iscsi.physical_q1 = cpu_to_le16(pq1_id);
+
+       p_ramrod->hdr.op_code = ISCSI_RAMROD_CMD_ID_OFFLOAD_CONN;
+       SET_FIELD(p_ramrod->hdr.flags, ISCSI_SLOW_PATH_HDR_LAYER_CODE,
+                 p_conn->layer_code);
+
+       p_ramrod->conn_id = cpu_to_le16(p_conn->conn_id);
+       p_ramrod->fw_cid = cpu_to_le32(p_conn->icid);
+
+       DMA_REGPAIR_LE(p_ramrod->iscsi.sq_pbl_addr, p_conn->sq_pbl_addr);
+
+       r2tq_pbl_addr = qed_chain_get_pbl_phys(&p_conn->r2tq);
+       DMA_REGPAIR_LE(p_ramrod->iscsi.r2tq_pbl_addr, r2tq_pbl_addr);
+
+       xhq_pbl_addr = qed_chain_get_pbl_phys(&p_conn->xhq);
+       DMA_REGPAIR_LE(p_ramrod->iscsi.xhq_pbl_addr, xhq_pbl_addr);
+
+       uhq_pbl_addr = qed_chain_get_pbl_phys(&p_conn->uhq);
+       DMA_REGPAIR_LE(p_ramrod->iscsi.uhq_pbl_addr, uhq_pbl_addr);
+
+       p_ramrod->iscsi.initial_ack = cpu_to_le32(p_conn->initial_ack);
+       p_ramrod->iscsi.flags = p_conn->offl_flags;
+       p_ramrod->iscsi.default_cq = p_conn->default_cq;
+       p_ramrod->iscsi.stat_sn = cpu_to_le32(p_conn->stat_sn);
+
+       if (!GET_FIELD(p_ramrod->iscsi.flags,
+                      ISCSI_CONN_OFFLOAD_PARAMS_TCP_ON_CHIP_1B)) {
+               p_tcp = &p_ramrod->tcp;
+
+               p = (u16 *)p_conn->local_mac;
+               p_tcp->local_mac_addr_hi = swab16(get_unaligned(p));
+               p_tcp->local_mac_addr_mid = swab16(get_unaligned(p + 1));
+               p_tcp->local_mac_addr_lo = swab16(get_unaligned(p + 2));
+
+               p = (u16 *)p_conn->remote_mac;
+               p_tcp->remote_mac_addr_hi = swab16(get_unaligned(p));
+               p_tcp->remote_mac_addr_mid = swab16(get_unaligned(p + 1));
+               p_tcp->remote_mac_addr_lo = swab16(get_unaligned(p + 2));
+
+               p_tcp->vlan_id = cpu_to_le16(p_conn->vlan_id);
+
+               p_tcp->flags = p_conn->tcp_flags;
+               p_tcp->ip_version = p_conn->ip_version;
+               for (i = 0; i < 4; i++) {
+                       dval = p_conn->remote_ip[i];
+                       p_tcp->remote_ip[i] = cpu_to_le32(dval);
+                       dval = p_conn->local_ip[i];
+                       p_tcp->local_ip[i] = cpu_to_le32(dval);
+               }
+               p_tcp->ka_max_probe_cnt = p_conn->ka_max_probe_cnt;
+               p_tcp->dup_ack_theshold = p_conn->dup_ack_theshold;
+
+               p_tcp->rcv_next = cpu_to_le32(p_conn->rcv_next);
+               p_tcp->snd_una = cpu_to_le32(p_conn->snd_una);
+               p_tcp->snd_next = cpu_to_le32(p_conn->snd_next);
+               p_tcp->snd_max = cpu_to_le32(p_conn->snd_max);
+               p_tcp->snd_wnd = cpu_to_le32(p_conn->snd_wnd);
+               p_tcp->rcv_wnd = cpu_to_le32(p_conn->rcv_wnd);
+               p_tcp->snd_wl1 = cpu_to_le32(p_conn->snd_wl1);
+               p_tcp->cwnd = cpu_to_le32(p_conn->cwnd);
+               p_tcp->ss_thresh = cpu_to_le32(p_conn->ss_thresh);
+               p_tcp->srtt = cpu_to_le16(p_conn->srtt);
+               p_tcp->rtt_var = cpu_to_le16(p_conn->rtt_var);
+               p_tcp->ts_time = cpu_to_le32(p_conn->ts_time);
+               p_tcp->ts_recent = cpu_to_le32(p_conn->ts_recent);
+               p_tcp->ts_recent_age = cpu_to_le32(p_conn->ts_recent_age);
+               p_tcp->total_rt = cpu_to_le32(p_conn->total_rt);
+               dval = p_conn->ka_timeout_delta;
+               p_tcp->ka_timeout_delta = cpu_to_le32(dval);
+               dval = p_conn->rt_timeout_delta;
+               p_tcp->rt_timeout_delta = cpu_to_le32(dval);
+               p_tcp->dup_ack_cnt = p_conn->dup_ack_cnt;
+               p_tcp->snd_wnd_probe_cnt = p_conn->snd_wnd_probe_cnt;
+               p_tcp->ka_probe_cnt = p_conn->ka_probe_cnt;
+               p_tcp->rt_cnt = p_conn->rt_cnt;
+               p_tcp->flow_label = cpu_to_le32(p_conn->flow_label);
+               p_tcp->ka_timeout = cpu_to_le32(p_conn->ka_timeout);
+               p_tcp->ka_interval = cpu_to_le32(p_conn->ka_interval);
+               p_tcp->max_rt_time = cpu_to_le32(p_conn->max_rt_time);
+               dval = p_conn->initial_rcv_wnd;
+               p_tcp->initial_rcv_wnd = cpu_to_le32(dval);
+               p_tcp->ttl = p_conn->ttl;
+               p_tcp->tos_or_tc = p_conn->tos_or_tc;
+               p_tcp->remote_port = cpu_to_le16(p_conn->remote_port);
+               p_tcp->local_port = cpu_to_le16(p_conn->local_port);
+               p_tcp->mss = cpu_to_le16(p_conn->mss);
+               p_tcp->snd_wnd_scale = p_conn->snd_wnd_scale;
+               p_tcp->rcv_wnd_scale = p_conn->rcv_wnd_scale;
+               dval = p_conn->ts_ticks_per_second;
+               p_tcp->ts_ticks_per_second = cpu_to_le32(dval);
+               wval = p_conn->da_timeout_value;
+               p_tcp->da_timeout_value = cpu_to_le16(wval);
+               p_tcp->ack_frequency = p_conn->ack_frequency;
+               p_tcp->connect_mode = p_conn->connect_mode;
+       } else {
+               p_tcp2 =
+                   &((struct iscsi_spe_conn_offload_option2 *)p_ramrod)->tcp;
+
+               p = (u16 *)p_conn->local_mac;
+               p_tcp2->local_mac_addr_hi = swab16(get_unaligned(p));
+               p_tcp2->local_mac_addr_mid = swab16(get_unaligned(p + 1));
+               p_tcp2->local_mac_addr_lo = swab16(get_unaligned(p + 2));
+
+               p = (u16 *)p_conn->remote_mac;
+               p_tcp2->remote_mac_addr_hi = swab16(get_unaligned(p));
+               p_tcp2->remote_mac_addr_mid = swab16(get_unaligned(p + 1));
+               p_tcp2->remote_mac_addr_lo = swab16(get_unaligned(p + 2));
+
+               p_tcp2->vlan_id = cpu_to_le16(p_conn->vlan_id);
+               p_tcp2->flags = p_conn->tcp_flags;
+
+               p_tcp2->ip_version = p_conn->ip_version;
+               for (i = 0; i < 4; i++) {
+                       dval = p_conn->remote_ip[i];
+                       p_tcp2->remote_ip[i] = cpu_to_le32(dval);
+                       dval = p_conn->local_ip[i];
+                       p_tcp2->local_ip[i] = cpu_to_le32(dval);
+               }
+
+               p_tcp2->flow_label = cpu_to_le32(p_conn->flow_label);
+               p_tcp2->ttl = p_conn->ttl;
+               p_tcp2->tos_or_tc = p_conn->tos_or_tc;
+               p_tcp2->remote_port = cpu_to_le16(p_conn->remote_port);
+               p_tcp2->local_port = cpu_to_le16(p_conn->local_port);
+               p_tcp2->mss = cpu_to_le16(p_conn->mss);
+               p_tcp2->rcv_wnd_scale = p_conn->rcv_wnd_scale;
+               p_tcp2->connect_mode = p_conn->connect_mode;
+               wval = p_conn->syn_ip_payload_length;
+               p_tcp2->syn_ip_payload_length = cpu_to_le16(wval);
+               p_tcp2->syn_phy_addr_lo = DMA_LO_LE(p_conn->syn_phy_addr);
+               p_tcp2->syn_phy_addr_hi = DMA_HI_LE(p_conn->syn_phy_addr);
+       }
+
+       return qed_spq_post(p_hwfn, p_ent, NULL);
+}
+
+static int qed_sp_iscsi_conn_update(struct qed_hwfn *p_hwfn,
+                                   struct qed_iscsi_conn *p_conn,
+                                   enum spq_mode comp_mode,
+                                   struct qed_spq_comp_cb *p_comp_addr)
+{
+       struct iscsi_conn_update_ramrod_params *p_ramrod = NULL;
+       struct qed_spq_entry *p_ent = NULL;
+       struct qed_sp_init_data init_data;
+       int rc = -EINVAL;
+       u32 dval;
+
+       /* Get SPQ entry */
+       memset(&init_data, 0, sizeof(init_data));
+       init_data.cid = p_conn->icid;
+       init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+       init_data.comp_mode = comp_mode;
+       init_data.p_comp_data = p_comp_addr;
+
+       rc = qed_sp_init_request(p_hwfn, &p_ent,
+                                ISCSI_RAMROD_CMD_ID_UPDATE_CONN,
+                                PROTOCOLID_ISCSI, &init_data);
+       if (rc)
+               return rc;
+
+       p_ramrod = &p_ent->ramrod.iscsi_conn_update;
+       p_ramrod->hdr.op_code = ISCSI_RAMROD_CMD_ID_UPDATE_CONN;
+       SET_FIELD(p_ramrod->hdr.flags,
+                 ISCSI_SLOW_PATH_HDR_LAYER_CODE, p_conn->layer_code);
+
+       p_ramrod->conn_id = cpu_to_le16(p_conn->conn_id);
+       p_ramrod->fw_cid = cpu_to_le32(p_conn->icid);
+       p_ramrod->flags = p_conn->update_flag;
+       p_ramrod->max_seq_size = cpu_to_le32(p_conn->max_seq_size);
+       dval = p_conn->max_recv_pdu_length;
+       p_ramrod->max_recv_pdu_length = cpu_to_le32(dval);
+       dval = p_conn->max_send_pdu_length;
+       p_ramrod->max_send_pdu_length = cpu_to_le32(dval);
+       dval = p_conn->first_seq_length;
+       p_ramrod->first_seq_length = cpu_to_le32(dval);
+       p_ramrod->exp_stat_sn = cpu_to_le32(p_conn->exp_stat_sn);
+
+       return qed_spq_post(p_hwfn, p_ent, NULL);
+}
+
+static int qed_sp_iscsi_conn_terminate(struct qed_hwfn *p_hwfn,
+                                      struct qed_iscsi_conn *p_conn,
+                                      enum spq_mode comp_mode,
+                                      struct qed_spq_comp_cb *p_comp_addr)
+{
+       struct iscsi_spe_conn_termination *p_ramrod = NULL;
+       struct qed_spq_entry *p_ent = NULL;
+       struct qed_sp_init_data init_data;
+       int rc = -EINVAL;
+
+       /* Get SPQ entry */
+       memset(&init_data, 0, sizeof(init_data));
+       init_data.cid = p_conn->icid;
+       init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+       init_data.comp_mode = comp_mode;
+       init_data.p_comp_data = p_comp_addr;
+
+       rc = qed_sp_init_request(p_hwfn, &p_ent,
+                                ISCSI_RAMROD_CMD_ID_TERMINATION_CONN,
+                                PROTOCOLID_ISCSI, &init_data);
+       if (rc)
+               return rc;
+
+       p_ramrod = &p_ent->ramrod.iscsi_conn_terminate;
+       p_ramrod->hdr.op_code = ISCSI_RAMROD_CMD_ID_TERMINATION_CONN;
+       SET_FIELD(p_ramrod->hdr.flags,
+                 ISCSI_SLOW_PATH_HDR_LAYER_CODE, p_conn->layer_code);
+
+       p_ramrod->conn_id = cpu_to_le16(p_conn->conn_id);
+       p_ramrod->fw_cid = cpu_to_le32(p_conn->icid);
+       p_ramrod->abortive = p_conn->abortive_dsconnect;
+
+       DMA_REGPAIR_LE(p_ramrod->query_params_addr,
+                      p_conn->tcp_upload_params_phys_addr);
+       DMA_REGPAIR_LE(p_ramrod->queue_cnts_addr, p_conn->queue_cnts_phys_addr);
+
+       return qed_spq_post(p_hwfn, p_ent, NULL);
+}
+
+static int qed_sp_iscsi_conn_clear_sq(struct qed_hwfn *p_hwfn,
+                                     struct qed_iscsi_conn *p_conn,
+                                     enum spq_mode comp_mode,
+                                     struct qed_spq_comp_cb *p_comp_addr)
+{
+       struct iscsi_slow_path_hdr *p_ramrod = NULL;
+       struct qed_spq_entry *p_ent = NULL;
+       struct qed_sp_init_data init_data;
+       int rc = -EINVAL;
+
+       /* Get SPQ entry */
+       memset(&init_data, 0, sizeof(init_data));
+       init_data.cid = p_conn->icid;
+       init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+       init_data.comp_mode = comp_mode;
+       init_data.p_comp_data = p_comp_addr;
+
+       rc = qed_sp_init_request(p_hwfn, &p_ent,
+                                ISCSI_RAMROD_CMD_ID_CLEAR_SQ,
+                                PROTOCOLID_ISCSI, &init_data);
+       if (rc)
+               return rc;
+
+       p_ramrod = &p_ent->ramrod.iscsi_empty;
+       p_ramrod->op_code = ISCSI_RAMROD_CMD_ID_CLEAR_SQ;
+       SET_FIELD(p_ramrod->flags,
+                 ISCSI_SLOW_PATH_HDR_LAYER_CODE, p_conn->layer_code);
+
+       return qed_spq_post(p_hwfn, p_ent, NULL);
+}
+
+static int qed_sp_iscsi_func_stop(struct qed_hwfn *p_hwfn,
+                                 enum spq_mode comp_mode,
+                                 struct qed_spq_comp_cb *p_comp_addr)
+{
+       struct iscsi_spe_func_dstry *p_ramrod = NULL;
+       struct qed_spq_entry *p_ent = NULL;
+       struct qed_sp_init_data init_data;
+       int rc = 0;
+
+       /* Get SPQ entry */
+       memset(&init_data, 0, sizeof(init_data));
+       init_data.cid = qed_spq_get_cid(p_hwfn);
+       init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+       init_data.comp_mode = comp_mode;
+       init_data.p_comp_data = p_comp_addr;
+
+       rc = qed_sp_init_request(p_hwfn, &p_ent,
+                                ISCSI_RAMROD_CMD_ID_DESTROY_FUNC,
+                                PROTOCOLID_ISCSI, &init_data);
+       if (rc)
+               return rc;
+
+       p_ramrod = &p_ent->ramrod.iscsi_destroy;
+       p_ramrod->hdr.op_code = ISCSI_RAMROD_CMD_ID_DESTROY_FUNC;
+
+       return qed_spq_post(p_hwfn, p_ent, NULL);
+}
+
+static void __iomem *qed_iscsi_get_db_addr(struct qed_hwfn *p_hwfn, u32 cid)
+{
+       return (u8 __iomem *)p_hwfn->doorbells +
+                            qed_db_addr(cid, DQ_DEMS_LEGACY);
+}
+
+static void __iomem *qed_iscsi_get_primary_bdq_prod(struct qed_hwfn *p_hwfn,
+                                                   u8 bdq_id)
+{
+       u8 bdq_function_id = ISCSI_BDQ_ID(p_hwfn->port_id);
+
+       return (u8 __iomem *)p_hwfn->regview + GTT_BAR0_MAP_REG_MSDM_RAM +
+                            MSTORM_SCSI_BDQ_EXT_PROD_OFFSET(bdq_function_id,
+                                                            bdq_id);
+}
+
+static void __iomem *qed_iscsi_get_secondary_bdq_prod(struct qed_hwfn *p_hwfn,
+                                                     u8 bdq_id)
+{
+       u8 bdq_function_id = ISCSI_BDQ_ID(p_hwfn->port_id);
+
+       return (u8 __iomem *)p_hwfn->regview + GTT_BAR0_MAP_REG_TSDM_RAM +
+                            TSTORM_SCSI_BDQ_EXT_PROD_OFFSET(bdq_function_id,
+                                                            bdq_id);
+}
+
+static int qed_iscsi_setup_connection(struct qed_hwfn *p_hwfn,
+                                     struct qed_iscsi_conn *p_conn)
+{
+       if (!p_conn->queue_cnts_virt_addr)
+               goto nomem;
+       memset(p_conn->queue_cnts_virt_addr, 0,
+              sizeof(*p_conn->queue_cnts_virt_addr));
+
+       if (!p_conn->tcp_upload_params_virt_addr)
+               goto nomem;
+       memset(p_conn->tcp_upload_params_virt_addr, 0,
+              sizeof(*p_conn->tcp_upload_params_virt_addr));
+
+       if (!p_conn->r2tq.p_virt_addr)
+               goto nomem;
+       qed_chain_pbl_zero_mem(&p_conn->r2tq);
+
+       if (!p_conn->uhq.p_virt_addr)
+               goto nomem;
+       qed_chain_pbl_zero_mem(&p_conn->uhq);
+
+       if (!p_conn->xhq.p_virt_addr)
+               goto nomem;
+       qed_chain_pbl_zero_mem(&p_conn->xhq);
+
+       return 0;
+nomem:
+       return -ENOMEM;
+}
+
+static int qed_iscsi_allocate_connection(struct qed_hwfn *p_hwfn,
+                                        struct qed_iscsi_conn **p_out_conn)
+{
+       u16 uhq_num_elements = 0, xhq_num_elements = 0, r2tq_num_elements = 0;
+       struct scsi_terminate_extra_params *p_q_cnts = NULL;
+       struct qed_iscsi_pf_params *p_params = NULL;
+       struct tcp_upload_params *p_tcp = NULL;
+       struct qed_iscsi_conn *p_conn = NULL;
+       int rc = 0;
+
+       /* Try finding a free connection that can be used */
+       spin_lock_bh(&p_hwfn->p_iscsi_info->lock);
+       if (!list_empty(&p_hwfn->p_iscsi_info->free_list))
+               p_conn = list_first_entry(&p_hwfn->p_iscsi_info->free_list,
+                                         struct qed_iscsi_conn, list_entry);
+       if (p_conn) {
+               list_del(&p_conn->list_entry);
+               spin_unlock_bh(&p_hwfn->p_iscsi_info->lock);
+               *p_out_conn = p_conn;
+               return 0;
+       }
+       spin_unlock_bh(&p_hwfn->p_iscsi_info->lock);
+
+       /* Need to allocate a new connection */
+       p_params = &p_hwfn->pf_params.iscsi_pf_params;
+
+       p_conn = kzalloc(sizeof(*p_conn), GFP_KERNEL);
+       if (!p_conn)
+               return -ENOMEM;
+
+       p_q_cnts = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
+                                     sizeof(*p_q_cnts),
+                                     &p_conn->queue_cnts_phys_addr,
+                                     GFP_KERNEL);
+       if (!p_q_cnts)
+               goto nomem_queue_cnts_param;
+       p_conn->queue_cnts_virt_addr = p_q_cnts;
+
+       p_tcp = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
+                                  sizeof(*p_tcp),
+                                  &p_conn->tcp_upload_params_phys_addr,
+                                  GFP_KERNEL);
+       if (!p_tcp)
+               goto nomem_upload_param;
+       p_conn->tcp_upload_params_virt_addr = p_tcp;
+
+       r2tq_num_elements = p_params->num_r2tq_pages_in_ring *
+                           QED_CHAIN_PAGE_SIZE / 0x80;
+       rc = qed_chain_alloc(p_hwfn->cdev,
+                            QED_CHAIN_USE_TO_CONSUME_PRODUCE,
+                            QED_CHAIN_MODE_PBL,
+                            QED_CHAIN_CNT_TYPE_U16,
+                            r2tq_num_elements, 0x80, &p_conn->r2tq);
+       if (rc)
+               goto nomem_r2tq;
+
+       uhq_num_elements = p_params->num_uhq_pages_in_ring *
+                          QED_CHAIN_PAGE_SIZE / sizeof(struct iscsi_uhqe);
+       rc = qed_chain_alloc(p_hwfn->cdev,
+                            QED_CHAIN_USE_TO_CONSUME_PRODUCE,
+                            QED_CHAIN_MODE_PBL,
+                            QED_CHAIN_CNT_TYPE_U16,
+                            uhq_num_elements,
+                            sizeof(struct iscsi_uhqe), &p_conn->uhq);
+       if (rc)
+               goto nomem_uhq;
+
+       xhq_num_elements = uhq_num_elements;
+       rc = qed_chain_alloc(p_hwfn->cdev,
+                            QED_CHAIN_USE_TO_CONSUME_PRODUCE,
+                            QED_CHAIN_MODE_PBL,
+                            QED_CHAIN_CNT_TYPE_U16,
+                            xhq_num_elements,
+                            sizeof(struct iscsi_xhqe), &p_conn->xhq);
+       if (rc)
+               goto nomem;
+
+       p_conn->free_on_delete = true;
+       *p_out_conn = p_conn;
+       return 0;
+
+nomem:
+       qed_chain_free(p_hwfn->cdev, &p_conn->uhq);
+nomem_uhq:
+       qed_chain_free(p_hwfn->cdev, &p_conn->r2tq);
+nomem_r2tq:
+       dma_free_coherent(&p_hwfn->cdev->pdev->dev,
+                         sizeof(struct tcp_upload_params),
+                         p_conn->tcp_upload_params_virt_addr,
+                         p_conn->tcp_upload_params_phys_addr);
+nomem_upload_param:
+       dma_free_coherent(&p_hwfn->cdev->pdev->dev,
+                         sizeof(struct scsi_terminate_extra_params),
+                         p_conn->queue_cnts_virt_addr,
+                         p_conn->queue_cnts_phys_addr);
+nomem_queue_cnts_param:
+       kfree(p_conn);
+
+       return -ENOMEM;
+}
+
+static int qed_iscsi_acquire_connection(struct qed_hwfn *p_hwfn,
+                                       struct qed_iscsi_conn *p_in_conn,
+                                       struct qed_iscsi_conn **p_out_conn)
+{
+       struct qed_iscsi_conn *p_conn = NULL;
+       int rc = 0;
+       u32 icid;
+
+       spin_lock_bh(&p_hwfn->p_iscsi_info->lock);
+       rc = qed_cxt_acquire_cid(p_hwfn, PROTOCOLID_ISCSI, &icid);
+       spin_unlock_bh(&p_hwfn->p_iscsi_info->lock);
+       if (rc)
+               return rc;
+
+       /* Use input connection or allocate a new one */
+       if (p_in_conn)
+               p_conn = p_in_conn;
+       else
+               rc = qed_iscsi_allocate_connection(p_hwfn, &p_conn);
+
+       if (!rc)
+               rc = qed_iscsi_setup_connection(p_hwfn, p_conn);
+
+       if (rc) {
+               spin_lock_bh(&p_hwfn->p_iscsi_info->lock);
+               qed_cxt_release_cid(p_hwfn, icid);
+               spin_unlock_bh(&p_hwfn->p_iscsi_info->lock);
+               return rc;
+       }
+
+       p_conn->icid = icid;
+       p_conn->conn_id = (u16)icid;
+       p_conn->fw_cid = (p_hwfn->hw_info.opaque_fid << 16) | icid;
+
+       *p_out_conn = p_conn;
+
+       return rc;
+}
+
+static void qed_iscsi_release_connection(struct qed_hwfn *p_hwfn,
+                                        struct qed_iscsi_conn *p_conn)
+{
+       spin_lock_bh(&p_hwfn->p_iscsi_info->lock);
+       list_add_tail(&p_conn->list_entry, &p_hwfn->p_iscsi_info->free_list);
+       qed_cxt_release_cid(p_hwfn, p_conn->icid);
+       spin_unlock_bh(&p_hwfn->p_iscsi_info->lock);
+}
+
+struct qed_iscsi_info *qed_iscsi_alloc(struct qed_hwfn *p_hwfn)
+{
+       struct qed_iscsi_info *p_iscsi_info;
+
+       p_iscsi_info = kzalloc(sizeof(*p_iscsi_info), GFP_KERNEL);
+       if (!p_iscsi_info)
+               return NULL;
+
+       INIT_LIST_HEAD(&p_iscsi_info->free_list);
+       return p_iscsi_info;
+}
+
+void qed_iscsi_setup(struct qed_hwfn *p_hwfn,
+                    struct qed_iscsi_info *p_iscsi_info)
+{
+       spin_lock_init(&p_iscsi_info->lock);
+}
+
+void qed_iscsi_free(struct qed_hwfn *p_hwfn,
+                   struct qed_iscsi_info *p_iscsi_info)
+{
+       kfree(p_iscsi_info);
+}
+
+static void _qed_iscsi_get_tstats(struct qed_hwfn *p_hwfn,
+                                 struct qed_ptt *p_ptt,
+                                 struct qed_iscsi_stats *p_stats)
+{
+       struct tstorm_iscsi_stats_drv tstats;
+       u32 tstats_addr;
+
+       memset(&tstats, 0, sizeof(tstats));
+       tstats_addr = BAR0_MAP_REG_TSDM_RAM +
+                     TSTORM_ISCSI_RX_STATS_OFFSET(p_hwfn->rel_pf_id);
+       qed_memcpy_from(p_hwfn, p_ptt, &tstats, tstats_addr, sizeof(tstats));
+
+       p_stats->iscsi_rx_bytes_cnt =
+           HILO_64_REGPAIR(tstats.iscsi_rx_bytes_cnt);
+       p_stats->iscsi_rx_packet_cnt =
+           HILO_64_REGPAIR(tstats.iscsi_rx_packet_cnt);
+       p_stats->iscsi_cmdq_threshold_cnt =
+           le32_to_cpu(tstats.iscsi_cmdq_threshold_cnt);
+       p_stats->iscsi_rq_threshold_cnt =
+           le32_to_cpu(tstats.iscsi_rq_threshold_cnt);
+       p_stats->iscsi_immq_threshold_cnt =
+           le32_to_cpu(tstats.iscsi_immq_threshold_cnt);
+}
+
+static void _qed_iscsi_get_mstats(struct qed_hwfn *p_hwfn,
+                                 struct qed_ptt *p_ptt,
+                                 struct qed_iscsi_stats *p_stats)
+{
+       struct mstorm_iscsi_stats_drv mstats;
+       u32 mstats_addr;
+
+       memset(&mstats, 0, sizeof(mstats));
+       mstats_addr = BAR0_MAP_REG_MSDM_RAM +
+                     MSTORM_ISCSI_RX_STATS_OFFSET(p_hwfn->rel_pf_id);
+       qed_memcpy_from(p_hwfn, p_ptt, &mstats, mstats_addr, sizeof(mstats));
+
+       p_stats->iscsi_rx_dropped_pdus_task_not_valid =
+           HILO_64_REGPAIR(mstats.iscsi_rx_dropped_pdus_task_not_valid);
+}
+
+static void _qed_iscsi_get_ustats(struct qed_hwfn *p_hwfn,
+                                 struct qed_ptt *p_ptt,
+                                 struct qed_iscsi_stats *p_stats)
+{
+       struct ustorm_iscsi_stats_drv ustats;
+       u32 ustats_addr;
+
+       memset(&ustats, 0, sizeof(ustats));
+       ustats_addr = BAR0_MAP_REG_USDM_RAM +
+                     USTORM_ISCSI_RX_STATS_OFFSET(p_hwfn->rel_pf_id);
+       qed_memcpy_from(p_hwfn, p_ptt, &ustats, ustats_addr, sizeof(ustats));
+
+       p_stats->iscsi_rx_data_pdu_cnt =
+           HILO_64_REGPAIR(ustats.iscsi_rx_data_pdu_cnt);
+       p_stats->iscsi_rx_r2t_pdu_cnt =
+           HILO_64_REGPAIR(ustats.iscsi_rx_r2t_pdu_cnt);
+       p_stats->iscsi_rx_total_pdu_cnt =
+           HILO_64_REGPAIR(ustats.iscsi_rx_total_pdu_cnt);
+}
+
+static void _qed_iscsi_get_xstats(struct qed_hwfn *p_hwfn,
+                                 struct qed_ptt *p_ptt,
+                                 struct qed_iscsi_stats *p_stats)
+{
+       struct xstorm_iscsi_stats_drv xstats;
+       u32 xstats_addr;
+
+       memset(&xstats, 0, sizeof(xstats));
+       xstats_addr = BAR0_MAP_REG_XSDM_RAM +
+                     XSTORM_ISCSI_TX_STATS_OFFSET(p_hwfn->rel_pf_id);
+       qed_memcpy_from(p_hwfn, p_ptt, &xstats, xstats_addr, sizeof(xstats));
+
+       p_stats->iscsi_tx_go_to_slow_start_event_cnt =
+           HILO_64_REGPAIR(xstats.iscsi_tx_go_to_slow_start_event_cnt);
+       p_stats->iscsi_tx_fast_retransmit_event_cnt =
+           HILO_64_REGPAIR(xstats.iscsi_tx_fast_retransmit_event_cnt);
+}
+
+static void _qed_iscsi_get_ystats(struct qed_hwfn *p_hwfn,
+                                 struct qed_ptt *p_ptt,
+                                 struct qed_iscsi_stats *p_stats)
+{
+       struct ystorm_iscsi_stats_drv ystats;
+       u32 ystats_addr;
+
+       memset(&ystats, 0, sizeof(ystats));
+       ystats_addr = BAR0_MAP_REG_YSDM_RAM +
+                     YSTORM_ISCSI_TX_STATS_OFFSET(p_hwfn->rel_pf_id);
+       qed_memcpy_from(p_hwfn, p_ptt, &ystats, ystats_addr, sizeof(ystats));
+
+       p_stats->iscsi_tx_data_pdu_cnt =
+           HILO_64_REGPAIR(ystats.iscsi_tx_data_pdu_cnt);
+       p_stats->iscsi_tx_r2t_pdu_cnt =
+           HILO_64_REGPAIR(ystats.iscsi_tx_r2t_pdu_cnt);
+       p_stats->iscsi_tx_total_pdu_cnt =
+           HILO_64_REGPAIR(ystats.iscsi_tx_total_pdu_cnt);
+}
+
+static void _qed_iscsi_get_pstats(struct qed_hwfn *p_hwfn,
+                                 struct qed_ptt *p_ptt,
+                                 struct qed_iscsi_stats *p_stats)
+{
+       struct pstorm_iscsi_stats_drv pstats;
+       u32 pstats_addr;
+
+       memset(&pstats, 0, sizeof(pstats));
+       pstats_addr = BAR0_MAP_REG_PSDM_RAM +
+                     PSTORM_ISCSI_TX_STATS_OFFSET(p_hwfn->rel_pf_id);
+       qed_memcpy_from(p_hwfn, p_ptt, &pstats, pstats_addr, sizeof(pstats));
+
+       p_stats->iscsi_tx_bytes_cnt =
+           HILO_64_REGPAIR(pstats.iscsi_tx_bytes_cnt);
+       p_stats->iscsi_tx_packet_cnt =
+           HILO_64_REGPAIR(pstats.iscsi_tx_packet_cnt);
+}
+
+static int qed_iscsi_get_stats(struct qed_hwfn *p_hwfn,
+                              struct qed_iscsi_stats *stats)
+{
+       struct qed_ptt *p_ptt;
+
+       memset(stats, 0, sizeof(*stats));
+
+       p_ptt = qed_ptt_acquire(p_hwfn);
+       if (!p_ptt) {
+               DP_ERR(p_hwfn, "Failed to acquire ptt\n");
+               return -EAGAIN;
+       }
+
+       _qed_iscsi_get_tstats(p_hwfn, p_ptt, stats);
+       _qed_iscsi_get_mstats(p_hwfn, p_ptt, stats);
+       _qed_iscsi_get_ustats(p_hwfn, p_ptt, stats);
+
+       _qed_iscsi_get_xstats(p_hwfn, p_ptt, stats);
+       _qed_iscsi_get_ystats(p_hwfn, p_ptt, stats);
+       _qed_iscsi_get_pstats(p_hwfn, p_ptt, stats);
+
+       qed_ptt_release(p_hwfn, p_ptt);
+
+       return 0;
+}
+
+struct qed_hash_iscsi_con {
+       struct hlist_node node;
+       struct qed_iscsi_conn *con;
+};
+
+static int qed_fill_iscsi_dev_info(struct qed_dev *cdev,
+                                  struct qed_dev_iscsi_info *info)
+{
+       struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+
+       int rc;
+
+       memset(info, 0, sizeof(*info));
+       rc = qed_fill_dev_info(cdev, &info->common);
+
+       info->primary_dbq_rq_addr =
+           qed_iscsi_get_primary_bdq_prod(hwfn, BDQ_ID_RQ);
+       info->secondary_bdq_rq_addr =
+           qed_iscsi_get_secondary_bdq_prod(hwfn, BDQ_ID_RQ);
+
+       return rc;
+}
+
+static void qed_register_iscsi_ops(struct qed_dev *cdev,
+                                  struct qed_iscsi_cb_ops *ops, void *cookie)
+{
+       cdev->protocol_ops.iscsi = ops;
+       cdev->ops_cookie = cookie;
+}
+
+static struct qed_hash_iscsi_con *qed_iscsi_get_hash(struct qed_dev *cdev,
+                                                    u32 handle)
+{
+       struct qed_hash_iscsi_con *hash_con = NULL;
+
+       if (!(cdev->flags & QED_FLAG_STORAGE_STARTED))
+               return NULL;
+
+       hash_for_each_possible(cdev->connections, hash_con, node, handle) {
+               if (hash_con->con->icid == handle)
+                       break;
+       }
+
+       if (!hash_con || (hash_con->con->icid != handle))
+               return NULL;
+
+       return hash_con;
+}
+
+static int qed_iscsi_stop(struct qed_dev *cdev)
+{
+       int rc;
+
+       if (!(cdev->flags & QED_FLAG_STORAGE_STARTED)) {
+               DP_NOTICE(cdev, "iscsi already stopped\n");
+               return 0;
+       }
+
+       if (!hash_empty(cdev->connections)) {
+               DP_NOTICE(cdev,
+                         "Can't stop iscsi - not all connections were returned\n");
+               return -EINVAL;
+       }
+
+       /* Stop the iscsi */
+       rc = qed_sp_iscsi_func_stop(QED_LEADING_HWFN(cdev),
+                                   QED_SPQ_MODE_EBLOCK, NULL);
+       cdev->flags &= ~QED_FLAG_STORAGE_STARTED;
+
+       return rc;
+}
+
+static int qed_iscsi_start(struct qed_dev *cdev,
+                          struct qed_iscsi_tid *tasks,
+                          void *event_context,
+                          iscsi_event_cb_t async_event_cb)
+{
+       int rc;
+       struct qed_tid_mem *tid_info;
+
+       if (cdev->flags & QED_FLAG_STORAGE_STARTED) {
+               DP_NOTICE(cdev, "iscsi already started;\n");
+               return 0;
+       }
+
+       rc = qed_sp_iscsi_func_start(QED_LEADING_HWFN(cdev),
+                                    QED_SPQ_MODE_EBLOCK, NULL, event_context,
+                                    async_event_cb);
+       if (rc) {
+               DP_NOTICE(cdev, "Failed to start iscsi\n");
+               return rc;
+       }
+
+       cdev->flags |= QED_FLAG_STORAGE_STARTED;
+       hash_init(cdev->connections);
+
+       if (!tasks)
+               return 0;
+
+       tid_info = kzalloc(sizeof(*tid_info), GFP_KERNEL);
+
+       if (!tid_info) {
+               qed_iscsi_stop(cdev);
+               return -ENOMEM;
+       }
+
+       rc = qed_cxt_get_tid_mem_info(QED_LEADING_HWFN(cdev),
+                                     tid_info);
+       if (rc) {
+               DP_NOTICE(cdev, "Failed to gather task information\n");
+               qed_iscsi_stop(cdev);
+               kfree(tid_info);
+               return rc;
+       }
+
+       /* Fill task information */
+       tasks->size = tid_info->tid_size;
+       tasks->num_tids_per_block = tid_info->num_tids_per_block;
+       memcpy(tasks->blocks, tid_info->blocks,
+              MAX_TID_BLOCKS_ISCSI * sizeof(u8 *));
+
+       kfree(tid_info);
+
+       return 0;
+}
+
+static int qed_iscsi_acquire_conn(struct qed_dev *cdev,
+                                 u32 *handle,
+                                 u32 *fw_cid, void __iomem **p_doorbell)
+{
+       struct qed_hash_iscsi_con *hash_con;
+       int rc;
+
+       /* Allocate a hashed connection */
+       hash_con = kzalloc(sizeof(*hash_con), GFP_ATOMIC);
+       if (!hash_con)
+               return -ENOMEM;
+
+       /* Acquire the connection */
+       rc = qed_iscsi_acquire_connection(QED_LEADING_HWFN(cdev), NULL,
+                                         &hash_con->con);
+       if (rc) {
+               DP_NOTICE(cdev, "Failed to acquire Connection\n");
+               kfree(hash_con);
+               return rc;
+       }
+
+       /* Added the connection to hash table */
+       *handle = hash_con->con->icid;
+       *fw_cid = hash_con->con->fw_cid;
+       hash_add(cdev->connections, &hash_con->node, *handle);
+
+       if (p_doorbell)
+               *p_doorbell = qed_iscsi_get_db_addr(QED_LEADING_HWFN(cdev),
+                                                   *handle);
+
+       return 0;
+}
+
+static int qed_iscsi_release_conn(struct qed_dev *cdev, u32 handle)
+{
+       struct qed_hash_iscsi_con *hash_con;
+
+       hash_con = qed_iscsi_get_hash(cdev, handle);
+       if (!hash_con) {
+               DP_NOTICE(cdev, "Failed to find connection for handle %d\n",
+                         handle);
+               return -EINVAL;
+       }
+
+       hlist_del(&hash_con->node);
+       qed_iscsi_release_connection(QED_LEADING_HWFN(cdev), hash_con->con);
+       kfree(hash_con);
+
+       return 0;
+}
+
+static int qed_iscsi_offload_conn(struct qed_dev *cdev,
+                                 u32 handle,
+                                 struct qed_iscsi_params_offload *conn_info)
+{
+       struct qed_hash_iscsi_con *hash_con;
+       struct qed_iscsi_conn *con;
+
+       hash_con = qed_iscsi_get_hash(cdev, handle);
+       if (!hash_con) {
+               DP_NOTICE(cdev, "Failed to find connection for handle %d\n",
+                         handle);
+               return -EINVAL;
+       }
+
+       /* Update the connection with information from the params */
+       con = hash_con->con;
+
+       ether_addr_copy(con->local_mac, conn_info->src.mac);
+       ether_addr_copy(con->remote_mac, conn_info->dst.mac);
+       memcpy(con->local_ip, conn_info->src.ip, sizeof(con->local_ip));
+       memcpy(con->remote_ip, conn_info->dst.ip, sizeof(con->remote_ip));
+       con->local_port = conn_info->src.port;
+       con->remote_port = conn_info->dst.port;
+
+       con->layer_code = conn_info->layer_code;
+       con->sq_pbl_addr = conn_info->sq_pbl_addr;
+       con->initial_ack = conn_info->initial_ack;
+       con->vlan_id = conn_info->vlan_id;
+       con->tcp_flags = conn_info->tcp_flags;
+       con->ip_version = conn_info->ip_version;
+       con->default_cq = conn_info->default_cq;
+       con->ka_max_probe_cnt = conn_info->ka_max_probe_cnt;
+       con->dup_ack_theshold = conn_info->dup_ack_theshold;
+       con->rcv_next = conn_info->rcv_next;
+       con->snd_una = conn_info->snd_una;
+       con->snd_next = conn_info->snd_next;
+       con->snd_max = conn_info->snd_max;
+       con->snd_wnd = conn_info->snd_wnd;
+       con->rcv_wnd = conn_info->rcv_wnd;
+       con->snd_wl1 = conn_info->snd_wl1;
+       con->cwnd = conn_info->cwnd;
+       con->ss_thresh = conn_info->ss_thresh;
+       con->srtt = conn_info->srtt;
+       con->rtt_var = conn_info->rtt_var;
+       con->ts_time = conn_info->ts_time;
+       con->ts_recent = conn_info->ts_recent;
+       con->ts_recent_age = conn_info->ts_recent_age;
+       con->total_rt = conn_info->total_rt;
+       con->ka_timeout_delta = conn_info->ka_timeout_delta;
+       con->rt_timeout_delta = conn_info->rt_timeout_delta;
+       con->dup_ack_cnt = conn_info->dup_ack_cnt;
+       con->snd_wnd_probe_cnt = conn_info->snd_wnd_probe_cnt;
+       con->ka_probe_cnt = conn_info->ka_probe_cnt;
+       con->rt_cnt = conn_info->rt_cnt;
+       con->flow_label = conn_info->flow_label;
+       con->ka_timeout = conn_info->ka_timeout;
+       con->ka_interval = conn_info->ka_interval;
+       con->max_rt_time = conn_info->max_rt_time;
+       con->initial_rcv_wnd = conn_info->initial_rcv_wnd;
+       con->ttl = conn_info->ttl;
+       con->tos_or_tc = conn_info->tos_or_tc;
+       con->remote_port = conn_info->remote_port;
+       con->local_port = conn_info->local_port;
+       con->mss = conn_info->mss;
+       con->snd_wnd_scale = conn_info->snd_wnd_scale;
+       con->rcv_wnd_scale = conn_info->rcv_wnd_scale;
+       con->ts_ticks_per_second = conn_info->ts_ticks_per_second;
+       con->da_timeout_value = conn_info->da_timeout_value;
+       con->ack_frequency = conn_info->ack_frequency;
+
+       /* Set default values on other connection fields */
+       con->offl_flags = 0x1;
+
+       return qed_sp_iscsi_conn_offload(QED_LEADING_HWFN(cdev), con,
+                                        QED_SPQ_MODE_EBLOCK, NULL);
+}
+
+static int qed_iscsi_update_conn(struct qed_dev *cdev,
+                                u32 handle,
+                                struct qed_iscsi_params_update *conn_info)
+{
+       struct qed_hash_iscsi_con *hash_con;
+       struct qed_iscsi_conn *con;
+
+       hash_con = qed_iscsi_get_hash(cdev, handle);
+       if (!hash_con) {
+               DP_NOTICE(cdev, "Failed to find connection for handle %d\n",
+                         handle);
+               return -EINVAL;
+       }
+
+       /* Update the connection with information from the params */
+       con = hash_con->con;
+       con->update_flag = conn_info->update_flag;
+       con->max_seq_size = conn_info->max_seq_size;
+       con->max_recv_pdu_length = conn_info->max_recv_pdu_length;
+       con->max_send_pdu_length = conn_info->max_send_pdu_length;
+       con->first_seq_length = conn_info->first_seq_length;
+       con->exp_stat_sn = conn_info->exp_stat_sn;
+
+       return qed_sp_iscsi_conn_update(QED_LEADING_HWFN(cdev), con,
+                                       QED_SPQ_MODE_EBLOCK, NULL);
+}
+
+static int qed_iscsi_clear_conn_sq(struct qed_dev *cdev, u32 handle)
+{
+       struct qed_hash_iscsi_con *hash_con;
+
+       hash_con = qed_iscsi_get_hash(cdev, handle);
+       if (!hash_con) {
+               DP_NOTICE(cdev, "Failed to find connection for handle %d\n",
+                         handle);
+               return -EINVAL;
+       }
+
+       return qed_sp_iscsi_conn_clear_sq(QED_LEADING_HWFN(cdev),
+                                         hash_con->con,
+                                         QED_SPQ_MODE_EBLOCK, NULL);
+}
+
+static int qed_iscsi_destroy_conn(struct qed_dev *cdev,
+                                 u32 handle, u8 abrt_conn)
+{
+       struct qed_hash_iscsi_con *hash_con;
+
+       hash_con = qed_iscsi_get_hash(cdev, handle);
+       if (!hash_con) {
+               DP_NOTICE(cdev, "Failed to find connection for handle %d\n",
+                         handle);
+               return -EINVAL;
+       }
+
+       hash_con->con->abortive_dsconnect = abrt_conn;
+
+       return qed_sp_iscsi_conn_terminate(QED_LEADING_HWFN(cdev),
+                                          hash_con->con,
+                                          QED_SPQ_MODE_EBLOCK, NULL);
+}
+
+static int qed_iscsi_stats(struct qed_dev *cdev, struct qed_iscsi_stats *stats)
+{
+       return qed_iscsi_get_stats(QED_LEADING_HWFN(cdev), stats);
+}
+
+static const struct qed_iscsi_ops qed_iscsi_ops_pass = {
+       .common = &qed_common_ops_pass,
+       .ll2 = &qed_ll2_ops_pass,
+       .fill_dev_info = &qed_fill_iscsi_dev_info,
+       .register_ops = &qed_register_iscsi_ops,
+       .start = &qed_iscsi_start,
+       .stop = &qed_iscsi_stop,
+       .acquire_conn = &qed_iscsi_acquire_conn,
+       .release_conn = &qed_iscsi_release_conn,
+       .offload_conn = &qed_iscsi_offload_conn,
+       .update_conn = &qed_iscsi_update_conn,
+       .destroy_conn = &qed_iscsi_destroy_conn,
+       .clear_sq = &qed_iscsi_clear_conn_sq,
+       .get_stats = &qed_iscsi_stats,
+};
+
+const struct qed_iscsi_ops *qed_get_iscsi_ops()
+{
+       return &qed_iscsi_ops_pass;
+}
+EXPORT_SYMBOL(qed_get_iscsi_ops);
+
+void qed_put_iscsi_ops(void)
+{
+}
+EXPORT_SYMBOL(qed_put_iscsi_ops);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_iscsi.h b/drivers/net/ethernet/qlogic/qed/qed_iscsi.h
new file mode 100644 (file)
index 0000000..67c25f3
--- /dev/null
@@ -0,0 +1,52 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015 QLogic Corporation
+ *
+ * This software is available under the terms of the GNU General Public License
+ * (GPL) Version 2, available from the file COPYING in the main directory of
+ * this source tree.
+ */
+
+#ifndef _QED_ISCSI_H
+#define _QED_ISCSI_H
+#include <linux/types.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/qed/tcp_common.h>
+#include <linux/qed/qed_iscsi_if.h>
+#include <linux/qed/qed_chain.h>
+#include "qed.h"
+#include "qed_hsi.h"
+#include "qed_mcp.h"
+#include "qed_sp.h"
+
+struct qed_iscsi_info {
+       spinlock_t lock; /* Connection resources. */
+       struct list_head free_list;
+       u16 max_num_outstanding_tasks;
+       void *event_context;
+       iscsi_event_cb_t event_cb;
+};
+
+#ifdef CONFIG_QED_LL2
+extern const struct qed_ll2_ops qed_ll2_ops_pass;
+#endif
+
+#if IS_ENABLED(CONFIG_QED_ISCSI)
+struct qed_iscsi_info *qed_iscsi_alloc(struct qed_hwfn *p_hwfn);
+
+void qed_iscsi_setup(struct qed_hwfn *p_hwfn,
+                    struct qed_iscsi_info *p_iscsi_info);
+
+void qed_iscsi_free(struct qed_hwfn *p_hwfn,
+                   struct qed_iscsi_info *p_iscsi_info);
+#else /* IS_ENABLED(CONFIG_QED_ISCSI) */
+static inline struct qed_iscsi_info *qed_iscsi_alloc(
+               struct qed_hwfn *p_hwfn) { return NULL; }
+static inline void qed_iscsi_setup(struct qed_hwfn *p_hwfn,
+                                  struct qed_iscsi_info *p_iscsi_info) {}
+static inline void qed_iscsi_free(struct qed_hwfn *p_hwfn,
+                                 struct qed_iscsi_info *p_iscsi_info) {}
+#endif /* IS_ENABLED(CONFIG_QED_ISCSI) */
+
+#endif
index 6b0e22d9fe4cf1c5b40df81606ec36d6f45364c2..6a3727c4c0c61a9a17aa1d5851c263f7a5eb2296 100644 (file)
@@ -23,6 +23,7 @@
 #include <linux/workqueue.h>
 #include <linux/bitops.h>
 #include <linux/bug.h>
+#include <linux/vmalloc.h>
 #include "qed.h"
 #include <linux/qed/qed_chain.h>
 #include "qed_cxt.h"
 #define QED_MAX_SGES_NUM 16
 #define CRC32_POLY 0x1edc6f41
 
+void qed_eth_queue_cid_release(struct qed_hwfn *p_hwfn,
+                              struct qed_queue_cid *p_cid)
+{
+       /* VFs' CIDs are 0-based in PF-view, and uninitialized on VF */
+       if (!p_cid->is_vf && IS_PF(p_hwfn->cdev))
+               qed_cxt_release_cid(p_hwfn, p_cid->cid);
+       vfree(p_cid);
+}
+
+/* The internal is only meant to be directly called by PFs initializeing CIDs
+ * for their VFs.
+ */
+struct qed_queue_cid *
+_qed_eth_queue_to_cid(struct qed_hwfn *p_hwfn,
+                     u16 opaque_fid,
+                     u32 cid,
+                     u8 vf_qid,
+                     struct qed_queue_start_common_params *p_params)
+{
+       bool b_is_same = (p_hwfn->hw_info.opaque_fid == opaque_fid);
+       struct qed_queue_cid *p_cid;
+       int rc;
+
+       p_cid = vmalloc(sizeof(*p_cid));
+       if (!p_cid)
+               return NULL;
+       memset(p_cid, 0, sizeof(*p_cid));
+
+       p_cid->opaque_fid = opaque_fid;
+       p_cid->cid = cid;
+       p_cid->vf_qid = vf_qid;
+       p_cid->rel = *p_params;
+
+       /* Don't try calculating the absolute indices for VFs */
+       if (IS_VF(p_hwfn->cdev)) {
+               p_cid->abs = p_cid->rel;
+               goto out;
+       }
+
+       /* Calculate the engine-absolute indices of the resources.
+        * This would guarantee they're valid later on.
+        * In some cases [SBs] we already have the right values.
+        */
+       rc = qed_fw_vport(p_hwfn, p_cid->rel.vport_id, &p_cid->abs.vport_id);
+       if (rc)
+               goto fail;
+
+       rc = qed_fw_l2_queue(p_hwfn, p_cid->rel.queue_id, &p_cid->abs.queue_id);
+       if (rc)
+               goto fail;
+
+       /* In case of a PF configuring its VF's queues, the stats-id is already
+        * absolute [since there's a single index that's suitable per-VF].
+        */
+       if (b_is_same) {
+               rc = qed_fw_vport(p_hwfn, p_cid->rel.stats_id,
+                                 &p_cid->abs.stats_id);
+               if (rc)
+                       goto fail;
+       } else {
+               p_cid->abs.stats_id = p_cid->rel.stats_id;
+       }
+
+       /* SBs relevant information was already provided as absolute */
+       p_cid->abs.sb = p_cid->rel.sb;
+       p_cid->abs.sb_idx = p_cid->rel.sb_idx;
+
+       /* This is tricky - we're actually interested in whehter this is a PF
+        * entry meant for the VF.
+        */
+       if (!b_is_same)
+               p_cid->is_vf = true;
+out:
+       DP_VERBOSE(p_hwfn,
+                  QED_MSG_SP,
+                  "opaque_fid: %04x CID %08x vport %02x [%02x] qzone %04x [%04x] stats %02x [%02x] SB %04x PI %02x\n",
+                  p_cid->opaque_fid,
+                  p_cid->cid,
+                  p_cid->rel.vport_id,
+                  p_cid->abs.vport_id,
+                  p_cid->rel.queue_id,
+                  p_cid->abs.queue_id,
+                  p_cid->rel.stats_id,
+                  p_cid->abs.stats_id, p_cid->abs.sb, p_cid->abs.sb_idx);
+
+       return p_cid;
+
+fail:
+       vfree(p_cid);
+       return NULL;
+}
+
+static struct qed_queue_cid *qed_eth_queue_to_cid(struct qed_hwfn *p_hwfn,
+                                                 u16 opaque_fid, struct
+                                                 qed_queue_start_common_params
+                                                 *p_params)
+{
+       struct qed_queue_cid *p_cid;
+       u32 cid = 0;
+
+       /* Get a unique firmware CID for this queue, in case it's a PF.
+        * VF's don't need a CID as the queue configuration will be done
+        * by PF.
+        */
+       if (IS_PF(p_hwfn->cdev)) {
+               if (qed_cxt_acquire_cid(p_hwfn, PROTOCOLID_ETH, &cid)) {
+                       DP_NOTICE(p_hwfn, "Failed to acquire cid\n");
+                       return NULL;
+               }
+       }
+
+       p_cid = _qed_eth_queue_to_cid(p_hwfn, opaque_fid, cid, 0, p_params);
+       if (!p_cid && IS_PF(p_hwfn->cdev))
+               qed_cxt_release_cid(p_hwfn, cid);
+
+       return p_cid;
+}
+
 int qed_sp_eth_vport_start(struct qed_hwfn *p_hwfn,
                           struct qed_sp_vport_start_params *p_params)
 {
@@ -496,61 +615,26 @@ static int qed_filter_accept_cmd(struct qed_dev *cdev,
        return 0;
 }
 
-static int qed_sp_release_queue_cid(
-       struct qed_hwfn *p_hwfn,
-       struct qed_hw_cid_data *p_cid_data)
-{
-       if (!p_cid_data->b_cid_allocated)
-               return 0;
-
-       qed_cxt_release_cid(p_hwfn, p_cid_data->cid);
-
-       p_cid_data->b_cid_allocated = false;
-
-       return 0;
-}
-
-int qed_sp_eth_rxq_start_ramrod(struct qed_hwfn *p_hwfn,
-                               u16 opaque_fid,
-                               u32 cid,
-                               struct qed_queue_start_common_params *p_params,
-                               u8 stats_id,
-                               u16 bd_max_bytes,
-                               dma_addr_t bd_chain_phys_addr,
-                               dma_addr_t cqe_pbl_addr,
-                               u16 cqe_pbl_size, bool b_use_zone_a_prod)
+int qed_eth_rxq_start_ramrod(struct qed_hwfn *p_hwfn,
+                            struct qed_queue_cid *p_cid,
+                            u16 bd_max_bytes,
+                            dma_addr_t bd_chain_phys_addr,
+                            dma_addr_t cqe_pbl_addr, u16 cqe_pbl_size)
 {
        struct rx_queue_start_ramrod_data *p_ramrod = NULL;
        struct qed_spq_entry *p_ent = NULL;
        struct qed_sp_init_data init_data;
-       struct qed_hw_cid_data *p_rx_cid;
-       u16 abs_rx_q_id = 0;
-       u8 abs_vport_id = 0;
        int rc = -EINVAL;
 
-       /* Store information for the stop */
-       p_rx_cid = &p_hwfn->p_rx_cids[p_params->queue_id];
-       p_rx_cid->cid = cid;
-       p_rx_cid->opaque_fid = opaque_fid;
-       p_rx_cid->vport_id = p_params->vport_id;
-
-       rc = qed_fw_vport(p_hwfn, p_params->vport_id, &abs_vport_id);
-       if (rc)
-               return rc;
-
-       rc = qed_fw_l2_queue(p_hwfn, p_params->queue_id, &abs_rx_q_id);
-       if (rc)
-               return rc;
-
        DP_VERBOSE(p_hwfn, QED_MSG_SP,
-                  "opaque_fid=0x%x, cid=0x%x, rx_qid=0x%x, vport_id=0x%x, sb_id=0x%x\n",
-                  opaque_fid,
-                  cid, p_params->queue_id, p_params->vport_id, p_params->sb);
+                  "opaque_fid=0x%x, cid=0x%x, rx_qzone=0x%x, vport_id=0x%x, sb_id=0x%x\n",
+                  p_cid->opaque_fid, p_cid->cid,
+                  p_cid->abs.queue_id, p_cid->abs.vport_id, p_cid->abs.sb);
 
        /* Get SPQ entry */
        memset(&init_data, 0, sizeof(init_data));
-       init_data.cid = cid;
-       init_data.opaque_fid = opaque_fid;
+       init_data.cid = p_cid->cid;
+       init_data.opaque_fid = p_cid->opaque_fid;
        init_data.comp_mode = QED_SPQ_MODE_EBLOCK;
 
        rc = qed_sp_init_request(p_hwfn, &p_ent,
@@ -561,11 +645,11 @@ int qed_sp_eth_rxq_start_ramrod(struct qed_hwfn *p_hwfn,
 
        p_ramrod = &p_ent->ramrod.rx_queue_start;
 
-       p_ramrod->sb_id = cpu_to_le16(p_params->sb);
-       p_ramrod->sb_index = p_params->sb_idx;
-       p_ramrod->vport_id = abs_vport_id;
-       p_ramrod->stats_counter_id = stats_id;
-       p_ramrod->rx_queue_id = cpu_to_le16(abs_rx_q_id);
+       p_ramrod->sb_id = cpu_to_le16(p_cid->abs.sb);
+       p_ramrod->sb_index = p_cid->abs.sb_idx;
+       p_ramrod->vport_id = p_cid->abs.vport_id;
+       p_ramrod->stats_counter_id = p_cid->abs.stats_id;
+       p_ramrod->rx_queue_id = cpu_to_le16(p_cid->abs.queue_id);
        p_ramrod->complete_cqe_flg = 0;
        p_ramrod->complete_event_flg = 1;
 
@@ -575,85 +659,85 @@ int qed_sp_eth_rxq_start_ramrod(struct qed_hwfn *p_hwfn,
        p_ramrod->num_of_pbl_pages = cpu_to_le16(cqe_pbl_size);
        DMA_REGPAIR_LE(p_ramrod->cqe_pbl_addr, cqe_pbl_addr);
 
-       if (p_params->vf_qid || b_use_zone_a_prod) {
-               p_ramrod->vf_rx_prod_index = p_params->vf_qid;
+       if (p_cid->is_vf) {
+               p_ramrod->vf_rx_prod_index = p_cid->vf_qid;
                DP_VERBOSE(p_hwfn, QED_MSG_SP,
                           "Queue%s is meant for VF rxq[%02x]\n",
-                          b_use_zone_a_prod ? " [legacy]" : "",
-                          p_params->vf_qid);
-               p_ramrod->vf_rx_prod_use_zone_a = b_use_zone_a_prod;
+                          !!p_cid->b_legacy_vf ? " [legacy]" : "",
+                          p_cid->vf_qid);
+               p_ramrod->vf_rx_prod_use_zone_a = !!p_cid->b_legacy_vf;
        }
 
        return qed_spq_post(p_hwfn, p_ent, NULL);
 }
 
 static int
-qed_sp_eth_rx_queue_start(struct qed_hwfn *p_hwfn,
-                         u16 opaque_fid,
-                         struct qed_queue_start_common_params *p_params,
+qed_eth_pf_rx_queue_start(struct qed_hwfn *p_hwfn,
+                         struct qed_queue_cid *p_cid,
                          u16 bd_max_bytes,
                          dma_addr_t bd_chain_phys_addr,
                          dma_addr_t cqe_pbl_addr,
                          u16 cqe_pbl_size, void __iomem **pp_prod)
 {
-       struct qed_hw_cid_data *p_rx_cid;
        u32 init_prod_val = 0;
-       u16 abs_l2_queue = 0;
-       u8 abs_stats_id = 0;
-       int rc;
 
-       if (IS_VF(p_hwfn->cdev)) {
-               return qed_vf_pf_rxq_start(p_hwfn,
-                                          p_params->queue_id,
-                                          p_params->sb,
-                                          (u8)p_params->sb_idx,
-                                          bd_max_bytes,
-                                          bd_chain_phys_addr,
-                                          cqe_pbl_addr, cqe_pbl_size, pp_prod);
-       }
-
-       rc = qed_fw_l2_queue(p_hwfn, p_params->queue_id, &abs_l2_queue);
-       if (rc)
-               return rc;
-
-       rc = qed_fw_vport(p_hwfn, p_params->vport_id, &abs_stats_id);
-       if (rc)
-               return rc;
-
-       *pp_prod = (u8 __iomem *)p_hwfn->regview +
-                                GTT_BAR0_MAP_REG_MSDM_RAM +
-                                MSTORM_ETH_PF_PRODS_OFFSET(abs_l2_queue);
+       *pp_prod = p_hwfn->regview +
+                  GTT_BAR0_MAP_REG_MSDM_RAM +
+                   MSTORM_ETH_PF_PRODS_OFFSET(p_cid->abs.queue_id);
 
        /* Init the rcq, rx bd and rx sge (if valid) producers to 0 */
        __internal_ram_wr(p_hwfn, *pp_prod, sizeof(u32),
                          (u32 *)(&init_prod_val));
 
+       return qed_eth_rxq_start_ramrod(p_hwfn, p_cid,
+                                       bd_max_bytes,
+                                       bd_chain_phys_addr,
+                                       cqe_pbl_addr, cqe_pbl_size);
+}
+
+static int
+qed_eth_rx_queue_start(struct qed_hwfn *p_hwfn,
+                      u16 opaque_fid,
+                      struct qed_queue_start_common_params *p_params,
+                      u16 bd_max_bytes,
+                      dma_addr_t bd_chain_phys_addr,
+                      dma_addr_t cqe_pbl_addr,
+                      u16 cqe_pbl_size,
+                      struct qed_rxq_start_ret_params *p_ret_params)
+{
+       struct qed_queue_cid *p_cid;
+       int rc;
+
        /* Allocate a CID for the queue */
-       p_rx_cid = &p_hwfn->p_rx_cids[p_params->queue_id];
-       rc = qed_cxt_acquire_cid(p_hwfn, PROTOCOLID_ETH, &p_rx_cid->cid);
-       if (rc) {
-               DP_NOTICE(p_hwfn, "Failed to acquire cid\n");
-               return rc;
-       }
-       p_rx_cid->b_cid_allocated = true;
+       p_cid = qed_eth_queue_to_cid(p_hwfn, opaque_fid, p_params);
+       if (!p_cid)
+               return -ENOMEM;
 
-       rc = qed_sp_eth_rxq_start_ramrod(p_hwfn,
-                                        opaque_fid,
-                                        p_rx_cid->cid,
-                                        p_params,
-                                        abs_stats_id,
+       if (IS_PF(p_hwfn->cdev)) {
+               rc = qed_eth_pf_rx_queue_start(p_hwfn, p_cid,
+                                              bd_max_bytes,
+                                              bd_chain_phys_addr,
+                                              cqe_pbl_addr, cqe_pbl_size,
+                                              &p_ret_params->p_prod);
+       } else {
+               rc = qed_vf_pf_rxq_start(p_hwfn, p_cid,
                                         bd_max_bytes,
                                         bd_chain_phys_addr,
-                                        cqe_pbl_addr, cqe_pbl_size, false);
+                                        cqe_pbl_addr,
+                                        cqe_pbl_size, &p_ret_params->p_prod);
+       }
 
+       /* Provide the caller with a reference to as handler */
        if (rc)
-               qed_sp_release_queue_cid(p_hwfn, p_rx_cid);
+               qed_eth_queue_cid_release(p_hwfn, p_cid);
+       else
+               p_ret_params->p_handle = (void *)p_cid;
 
        return rc;
 }
 
 int qed_sp_eth_rx_queues_update(struct qed_hwfn *p_hwfn,
-                               u16 rx_queue_id,
+                               void **pp_rxq_handles,
                                u8 num_rxqs,
                                u8 complete_cqe_flg,
                                u8 complete_event_flg,
@@ -663,8 +747,7 @@ int qed_sp_eth_rx_queues_update(struct qed_hwfn *p_hwfn,
        struct rx_queue_update_ramrod_data *p_ramrod = NULL;
        struct qed_spq_entry *p_ent = NULL;
        struct qed_sp_init_data init_data;
-       struct qed_hw_cid_data *p_rx_cid;
-       u16 qid, abs_rx_q_id = 0;
+       struct qed_queue_cid *p_cid;
        int rc = -EINVAL;
        u8 i;
 
@@ -673,12 +756,11 @@ int qed_sp_eth_rx_queues_update(struct qed_hwfn *p_hwfn,
        init_data.p_comp_data = p_comp_data;
 
        for (i = 0; i < num_rxqs; i++) {
-               qid = rx_queue_id + i;
-               p_rx_cid = &p_hwfn->p_rx_cids[qid];
+               p_cid = ((struct qed_queue_cid **)pp_rxq_handles)[i];
 
                /* Get SPQ entry */
-               init_data.cid = p_rx_cid->cid;
-               init_data.opaque_fid = p_rx_cid->opaque_fid;
+               init_data.cid = p_cid->cid;
+               init_data.opaque_fid = p_cid->opaque_fid;
 
                rc = qed_sp_init_request(p_hwfn, &p_ent,
                                         ETH_RAMROD_RX_QUEUE_UPDATE,
@@ -687,10 +769,9 @@ int qed_sp_eth_rx_queues_update(struct qed_hwfn *p_hwfn,
                        return rc;
 
                p_ramrod = &p_ent->ramrod.rx_queue_update;
+               p_ramrod->vport_id = p_cid->abs.vport_id;
 
-               qed_fw_vport(p_hwfn, p_rx_cid->vport_id, &p_ramrod->vport_id);
-               qed_fw_l2_queue(p_hwfn, qid, &abs_rx_q_id);
-               p_ramrod->rx_queue_id = cpu_to_le16(abs_rx_q_id);
+               p_ramrod->rx_queue_id = cpu_to_le16(p_cid->abs.queue_id);
                p_ramrod->complete_cqe_flg = complete_cqe_flg;
                p_ramrod->complete_event_flg = complete_event_flg;
 
@@ -702,24 +783,19 @@ int qed_sp_eth_rx_queues_update(struct qed_hwfn *p_hwfn,
        return rc;
 }
 
-int qed_sp_eth_rx_queue_stop(struct qed_hwfn *p_hwfn,
-                            u16 rx_queue_id,
-                            bool eq_completion_only, bool cqe_completion)
+static int
+qed_eth_pf_rx_queue_stop(struct qed_hwfn *p_hwfn,
+                        struct qed_queue_cid *p_cid,
+                        bool b_eq_completion_only, bool b_cqe_completion)
 {
-       struct qed_hw_cid_data *p_rx_cid = &p_hwfn->p_rx_cids[rx_queue_id];
        struct rx_queue_stop_ramrod_data *p_ramrod = NULL;
        struct qed_spq_entry *p_ent = NULL;
        struct qed_sp_init_data init_data;
-       u16 abs_rx_q_id = 0;
-       int rc = -EINVAL;
-
-       if (IS_VF(p_hwfn->cdev))
-               return qed_vf_pf_rxq_stop(p_hwfn, rx_queue_id, cqe_completion);
+       int rc;
 
-       /* Get SPQ entry */
        memset(&init_data, 0, sizeof(init_data));
-       init_data.cid = p_rx_cid->cid;
-       init_data.opaque_fid = p_rx_cid->opaque_fid;
+       init_data.cid = p_cid->cid;
+       init_data.opaque_fid = p_cid->opaque_fid;
        init_data.comp_mode = QED_SPQ_MODE_EBLOCK;
 
        rc = qed_sp_init_request(p_hwfn, &p_ent,
@@ -729,62 +805,53 @@ int qed_sp_eth_rx_queue_stop(struct qed_hwfn *p_hwfn,
                return rc;
 
        p_ramrod = &p_ent->ramrod.rx_queue_stop;
-
-       qed_fw_vport(p_hwfn, p_rx_cid->vport_id, &p_ramrod->vport_id);
-       qed_fw_l2_queue(p_hwfn, rx_queue_id, &abs_rx_q_id);
-       p_ramrod->rx_queue_id = cpu_to_le16(abs_rx_q_id);
+       p_ramrod->vport_id = p_cid->abs.vport_id;
+       p_ramrod->rx_queue_id = cpu_to_le16(p_cid->abs.queue_id);
 
        /* Cleaning the queue requires the completion to arrive there.
         * In addition, VFs require the answer to come as eqe to PF.
         */
-       p_ramrod->complete_cqe_flg =
-               (!!(p_rx_cid->opaque_fid == p_hwfn->hw_info.opaque_fid) &&
-                !eq_completion_only) || cqe_completion;
-       p_ramrod->complete_event_flg =
-               !(p_rx_cid->opaque_fid == p_hwfn->hw_info.opaque_fid) ||
-               eq_completion_only;
+       p_ramrod->complete_cqe_flg = (!p_cid->is_vf &&
+                                     !b_eq_completion_only) ||
+                                    b_cqe_completion;
+       p_ramrod->complete_event_flg = p_cid->is_vf || b_eq_completion_only;
 
-       rc = qed_spq_post(p_hwfn, p_ent, NULL);
-       if (rc)
-               return rc;
+       return qed_spq_post(p_hwfn, p_ent, NULL);
+}
+
+int qed_eth_rx_queue_stop(struct qed_hwfn *p_hwfn,
+                         void *p_rxq,
+                         bool eq_completion_only, bool cqe_completion)
+{
+       struct qed_queue_cid *p_cid = (struct qed_queue_cid *)p_rxq;
+       int rc = -EINVAL;
 
-       return qed_sp_release_queue_cid(p_hwfn, p_rx_cid);
+       if (IS_PF(p_hwfn->cdev))
+               rc = qed_eth_pf_rx_queue_stop(p_hwfn, p_cid,
+                                             eq_completion_only,
+                                             cqe_completion);
+       else
+               rc = qed_vf_pf_rxq_stop(p_hwfn, p_cid, cqe_completion);
+
+       if (!rc)
+               qed_eth_queue_cid_release(p_hwfn, p_cid);
+       return rc;
 }
 
-int qed_sp_eth_txq_start_ramrod(struct qed_hwfn  *p_hwfn,
-                               u16  opaque_fid,
-                               u32  cid,
-                               struct qed_queue_start_common_params *p_params,
-                               u8  stats_id,
-                               dma_addr_t pbl_addr,
-                               u16 pbl_size,
-                               union qed_qm_pq_params *p_pq_params)
+int
+qed_eth_txq_start_ramrod(struct qed_hwfn *p_hwfn,
+                        struct qed_queue_cid *p_cid,
+                        dma_addr_t pbl_addr, u16 pbl_size, u16 pq_id)
 {
        struct tx_queue_start_ramrod_data *p_ramrod = NULL;
        struct qed_spq_entry *p_ent = NULL;
        struct qed_sp_init_data init_data;
-       struct qed_hw_cid_data *p_tx_cid;
-       u16 pq_id, abs_tx_q_id = 0;
        int rc = -EINVAL;
-       u8 abs_vport_id;
-
-       /* Store information for the stop */
-       p_tx_cid = &p_hwfn->p_tx_cids[p_params->queue_id];
-       p_tx_cid->cid           = cid;
-       p_tx_cid->opaque_fid    = opaque_fid;
-
-       rc = qed_fw_vport(p_hwfn, p_params->vport_id, &abs_vport_id);
-       if (rc)
-               return rc;
-
-       rc = qed_fw_l2_queue(p_hwfn, p_params->queue_id, &abs_tx_q_id);
-       if (rc)
-               return rc;
 
        /* Get SPQ entry */
        memset(&init_data, 0, sizeof(init_data));
-       init_data.cid = cid;
-       init_data.opaque_fid = opaque_fid;
+       init_data.cid = p_cid->cid;
+       init_data.opaque_fid = p_cid->opaque_fid;
        init_data.comp_mode = QED_SPQ_MODE_EBLOCK;
 
        rc = qed_sp_init_request(p_hwfn, &p_ent,
@@ -794,96 +861,92 @@ int qed_sp_eth_txq_start_ramrod(struct qed_hwfn  *p_hwfn,
                return rc;
 
        p_ramrod = &p_ent->ramrod.tx_queue_start;
-       p_ramrod->vport_id = abs_vport_id;
+       p_ramrod->vport_id = p_cid->abs.vport_id;
 
-       p_ramrod->sb_id = cpu_to_le16(p_params->sb);
-       p_ramrod->sb_index = p_params->sb_idx;
-       p_ramrod->stats_counter_id = stats_id;
+       p_ramrod->sb_id = cpu_to_le16(p_cid->abs.sb);
+       p_ramrod->sb_index = p_cid->abs.sb_idx;
+       p_ramrod->stats_counter_id = p_cid->abs.stats_id;
 
-       p_ramrod->queue_zone_id = cpu_to_le16(abs_tx_q_id);
+       p_ramrod->queue_zone_id = cpu_to_le16(p_cid->abs.queue_id);
+       p_ramrod->same_as_last_id = cpu_to_le16(p_cid->abs.queue_id);
 
        p_ramrod->pbl_size = cpu_to_le16(pbl_size);
        DMA_REGPAIR_LE(p_ramrod->pbl_base_addr, pbl_addr);
 
-       pq_id = qed_get_qm_pq(p_hwfn, PROTOCOLID_ETH, p_pq_params);
        p_ramrod->qm_pq_id = cpu_to_le16(pq_id);
 
        return qed_spq_post(p_hwfn, p_ent, NULL);
 }
 
 static int
-qed_sp_eth_tx_queue_start(struct qed_hwfn *p_hwfn,
-                         u16 opaque_fid,
-                         struct qed_queue_start_common_params *p_params,
+qed_eth_pf_tx_queue_start(struct qed_hwfn *p_hwfn,
+                         struct qed_queue_cid *p_cid,
+                         u8 tc,
                          dma_addr_t pbl_addr,
                          u16 pbl_size, void __iomem **pp_doorbell)
 {
-       struct qed_hw_cid_data *p_tx_cid;
        union qed_qm_pq_params pq_params;
-       u8 abs_stats_id = 0;
        int rc;
 
-       if (IS_VF(p_hwfn->cdev)) {
-               return qed_vf_pf_txq_start(p_hwfn,
-                                          p_params->queue_id,
-                                          p_params->sb,
-                                          p_params->sb_idx,
-                                          pbl_addr, pbl_size, pp_doorbell);
-       }
+       memset(&pq_params, 0, sizeof(pq_params));
 
-       rc = qed_fw_vport(p_hwfn, p_params->vport_id, &abs_stats_id);
+       rc = qed_eth_txq_start_ramrod(p_hwfn, p_cid,
+                                     pbl_addr, pbl_size,
+                                     qed_get_qm_pq(p_hwfn, PROTOCOLID_ETH,
+                                                   &pq_params));
        if (rc)
                return rc;
 
-       p_tx_cid = &p_hwfn->p_tx_cids[p_params->queue_id];
-       memset(p_tx_cid, 0, sizeof(*p_tx_cid));
-       memset(&pq_params, 0, sizeof(pq_params));
+       /* Provide the caller with the necessary return values */
+       *pp_doorbell = p_hwfn->doorbells +
+                      qed_db_addr(p_cid->cid, DQ_DEMS_LEGACY);
 
-       /* Allocate a CID for the queue */
-       rc = qed_cxt_acquire_cid(p_hwfn, PROTOCOLID_ETH, &p_tx_cid->cid);
-       if (rc) {
-               DP_NOTICE(p_hwfn, "Failed to acquire cid\n");
-               return rc;
-       }
-       p_tx_cid->b_cid_allocated = true;
+       return 0;
+}
 
-       DP_VERBOSE(p_hwfn, QED_MSG_SP,
-                  "opaque_fid=0x%x, cid=0x%x, tx_qid=0x%x, vport_id=0x%x, sb_id=0x%x\n",
-                  opaque_fid, p_tx_cid->cid,
-                  p_params->queue_id, p_params->vport_id, p_params->sb);
-
-       rc = qed_sp_eth_txq_start_ramrod(p_hwfn,
-                                        opaque_fid,
-                                        p_tx_cid->cid,
-                                        p_params,
-                                        abs_stats_id,
-                                        pbl_addr,
-                                        pbl_size,
-                                        &pq_params);
-
-       *pp_doorbell = (u8 __iomem *)p_hwfn->doorbells +
-                                    qed_db_addr(p_tx_cid->cid, DQ_DEMS_LEGACY);
+static int
+qed_eth_tx_queue_start(struct qed_hwfn *p_hwfn,
+                      u16 opaque_fid,
+                      struct qed_queue_start_common_params *p_params,
+                      u8 tc,
+                      dma_addr_t pbl_addr,
+                      u16 pbl_size,
+                      struct qed_txq_start_ret_params *p_ret_params)
+{
+       struct qed_queue_cid *p_cid;
+       int rc;
+
+       p_cid = qed_eth_queue_to_cid(p_hwfn, opaque_fid, p_params);
+       if (!p_cid)
+               return -EINVAL;
+
+       if (IS_PF(p_hwfn->cdev))
+               rc = qed_eth_pf_tx_queue_start(p_hwfn, p_cid, tc,
+                                              pbl_addr, pbl_size,
+                                              &p_ret_params->p_doorbell);
+       else
+               rc = qed_vf_pf_txq_start(p_hwfn, p_cid,
+                                        pbl_addr, pbl_size,
+                                        &p_ret_params->p_doorbell);
 
        if (rc)
-               qed_sp_release_queue_cid(p_hwfn, p_tx_cid);
+               qed_eth_queue_cid_release(p_hwfn, p_cid);
+       else
+               p_ret_params->p_handle = (void *)p_cid;
 
        return rc;
 }
 
-int qed_sp_eth_tx_queue_stop(struct qed_hwfn *p_hwfn, u16 tx_queue_id)
+static int
+qed_eth_pf_tx_queue_stop(struct qed_hwfn *p_hwfn, struct qed_queue_cid *p_cid)
 {
-       struct qed_hw_cid_data *p_tx_cid = &p_hwfn->p_tx_cids[tx_queue_id];
        struct qed_spq_entry *p_ent = NULL;
        struct qed_sp_init_data init_data;
-       int rc = -EINVAL;
-
-       if (IS_VF(p_hwfn->cdev))
-               return qed_vf_pf_txq_stop(p_hwfn, tx_queue_id);
+       int rc;
 
-       /* Get SPQ entry */
        memset(&init_data, 0, sizeof(init_data));
-       init_data.cid = p_tx_cid->cid;
-       init_data.opaque_fid = p_tx_cid->opaque_fid;
+       init_data.cid = p_cid->cid;
+       init_data.opaque_fid = p_cid->opaque_fid;
        init_data.comp_mode = QED_SPQ_MODE_EBLOCK;
 
        rc = qed_sp_init_request(p_hwfn, &p_ent,
@@ -892,11 +955,22 @@ int qed_sp_eth_tx_queue_stop(struct qed_hwfn *p_hwfn, u16 tx_queue_id)
        if (rc)
                return rc;
 
-       rc = qed_spq_post(p_hwfn, p_ent, NULL);
-       if (rc)
-               return rc;
+       return qed_spq_post(p_hwfn, p_ent, NULL);
+}
+
+int qed_eth_tx_queue_stop(struct qed_hwfn *p_hwfn, void *p_handle)
+{
+       struct qed_queue_cid *p_cid = (struct qed_queue_cid *)p_handle;
+       int rc;
+
+       if (IS_PF(p_hwfn->cdev))
+               rc = qed_eth_pf_tx_queue_stop(p_hwfn, p_cid);
+       else
+               rc = qed_vf_pf_txq_stop(p_hwfn, p_cid);
 
-       return qed_sp_release_queue_cid(p_hwfn, p_tx_cid);
+       if (!rc)
+               qed_eth_queue_cid_release(p_hwfn, p_cid);
+       return rc;
 }
 
 static enum eth_filter_action qed_filter_action(enum qed_filter_opcode opcode)
@@ -1691,7 +1765,9 @@ static int qed_fill_eth_dev_info(struct qed_dev *cdev,
                }
 
                qed_vf_get_num_vlan_filters(&cdev->hwfns[0],
-                                           &info->num_vlan_filters);
+                                           (u8 *)&info->num_vlan_filters);
+               qed_vf_get_num_mac_filters(&cdev->hwfns[0],
+                                          (u8 *)&info->num_mac_filters);
                qed_vf_get_port_mac(&cdev->hwfns[0], info->port_mac);
 
                info->is_legacy = !!cdev->hwfns[0].vf_iov_info->b_pre_fp_hsi;
@@ -1878,58 +1954,53 @@ static int qed_update_vport(struct qed_dev *cdev,
 }
 
 static int qed_start_rxq(struct qed_dev *cdev,
-                        struct qed_queue_start_common_params *params,
+                        u8 rss_num,
+                        struct qed_queue_start_common_params *p_params,
                         u16 bd_max_bytes,
                         dma_addr_t bd_chain_phys_addr,
                         dma_addr_t cqe_pbl_addr,
                         u16 cqe_pbl_size,
-                        void __iomem **pp_prod)
+                        struct qed_rxq_start_ret_params *ret_params)
 {
        struct qed_hwfn *p_hwfn;
        int rc, hwfn_index;
 
-       hwfn_index = params->rss_id % cdev->num_hwfns;
+       hwfn_index = rss_num % cdev->num_hwfns;
        p_hwfn = &cdev->hwfns[hwfn_index];
 
-       /* Fix queue ID in 100g mode */
-       params->queue_id /= cdev->num_hwfns;
-
-       rc = qed_sp_eth_rx_queue_start(p_hwfn,
-                                      p_hwfn->hw_info.opaque_fid,
-                                      params,
-                                      bd_max_bytes,
-                                      bd_chain_phys_addr,
-                                      cqe_pbl_addr,
-                                      cqe_pbl_size,
-                                      pp_prod);
+       p_params->queue_id = p_params->queue_id / cdev->num_hwfns;
+       p_params->stats_id = p_params->vport_id;
 
+       rc = qed_eth_rx_queue_start(p_hwfn,
+                                   p_hwfn->hw_info.opaque_fid,
+                                   p_params,
+                                   bd_max_bytes,
+                                   bd_chain_phys_addr,
+                                   cqe_pbl_addr, cqe_pbl_size, ret_params);
        if (rc) {
-               DP_ERR(cdev, "Failed to start RXQ#%d\n", params->queue_id);
+               DP_ERR(cdev, "Failed to start RXQ#%d\n", p_params->queue_id);
                return rc;
        }
 
        DP_VERBOSE(cdev, (QED_MSG_SPQ | NETIF_MSG_IFUP),
-                  "Started RX-Q %d [rss %d] on V-PORT %d and SB %d\n",
-                  params->queue_id, params->rss_id, params->vport_id,
-                  params->sb);
+                  "Started RX-Q %d [rss_num %d] on V-PORT %d and SB %d\n",
+                  p_params->queue_id, rss_num, p_params->vport_id,
+                  p_params->sb);
 
        return 0;
 }
 
-static int qed_stop_rxq(struct qed_dev *cdev,
-                       struct qed_stop_rxq_params *params)
+static int qed_stop_rxq(struct qed_dev *cdev, u8 rss_id, void *handle)
 {
        int rc, hwfn_index;
        struct qed_hwfn *p_hwfn;
 
-       hwfn_index      = params->rss_id % cdev->num_hwfns;
-       p_hwfn          = &cdev->hwfns[hwfn_index];
+       hwfn_index rss_id % cdev->num_hwfns;
+       p_hwfn = &cdev->hwfns[hwfn_index];
 
-       rc = qed_sp_eth_rx_queue_stop(p_hwfn,
-                                     params->rx_queue_id / cdev->num_hwfns,
-                                     params->eq_completion_only, false);
+       rc = qed_eth_rx_queue_stop(p_hwfn, handle, false, false);
        if (rc) {
-               DP_ERR(cdev, "Failed to stop RXQ#%d\n", params->rx_queue_id);
+               DP_ERR(cdev, "Failed to stop RXQ#%02x\n", rss_id);
                return rc;
        }
 
@@ -1937,26 +2008,24 @@ static int qed_stop_rxq(struct qed_dev *cdev,
 }
 
 static int qed_start_txq(struct qed_dev *cdev,
+                        u8 rss_num,
                         struct qed_queue_start_common_params *p_params,
                         dma_addr_t pbl_addr,
                         u16 pbl_size,
-                        void __iomem **pp_doorbell)
+                        struct qed_txq_start_ret_params *ret_params)
 {
        struct qed_hwfn *p_hwfn;
        int rc, hwfn_index;
 
-       hwfn_index      = p_params->rss_id % cdev->num_hwfns;
-       p_hwfn          = &cdev->hwfns[hwfn_index];
-
-       /* Fix queue ID in 100g mode */
-       p_params->queue_id /= cdev->num_hwfns;
+       hwfn_index = rss_num % cdev->num_hwfns;
+       p_hwfn = &cdev->hwfns[hwfn_index];
+       p_params->queue_id = p_params->queue_id / cdev->num_hwfns;
+       p_params->stats_id = p_params->vport_id;
 
-       rc = qed_sp_eth_tx_queue_start(p_hwfn,
-                                      p_hwfn->hw_info.opaque_fid,
-                                      p_params,
-                                      pbl_addr,
-                                      pbl_size,
-                                      pp_doorbell);
+       rc = qed_eth_tx_queue_start(p_hwfn,
+                                   p_hwfn->hw_info.opaque_fid,
+                                   p_params, 0,
+                                   pbl_addr, pbl_size, ret_params);
 
        if (rc) {
                DP_ERR(cdev, "Failed to start TXQ#%d\n", p_params->queue_id);
@@ -1964,8 +2033,8 @@ static int qed_start_txq(struct qed_dev *cdev,
        }
 
        DP_VERBOSE(cdev, (QED_MSG_SPQ | NETIF_MSG_IFUP),
-                  "Started TX-Q %d [rss %d] on V-PORT %d and SB %d\n",
-                  p_params->queue_id, p_params->rss_id, p_params->vport_id,
+                  "Started TX-Q %d [rss_num %d] on V-PORT %d and SB %d\n",
+                  p_params->queue_id, rss_num, p_params->vport_id,
                   p_params->sb);
 
        return 0;
@@ -1979,19 +2048,17 @@ static int qed_fastpath_stop(struct qed_dev *cdev)
        return 0;
 }
 
-static int qed_stop_txq(struct qed_dev *cdev,
-                       struct qed_stop_txq_params *params)
+static int qed_stop_txq(struct qed_dev *cdev, u8 rss_id, void *handle)
 {
        struct qed_hwfn *p_hwfn;
        int rc, hwfn_index;
 
-       hwfn_index      = params->rss_id % cdev->num_hwfns;
-       p_hwfn          = &cdev->hwfns[hwfn_index];
+       hwfn_index rss_id % cdev->num_hwfns;
+       p_hwfn = &cdev->hwfns[hwfn_index];
 
-       rc = qed_sp_eth_tx_queue_stop(p_hwfn,
-                                     params->tx_queue_id / cdev->num_hwfns);
+       rc = qed_eth_tx_queue_stop(p_hwfn, handle);
        if (rc) {
-               DP_ERR(cdev, "Failed to stop TXQ#%d\n", params->tx_queue_id);
+               DP_ERR(cdev, "Failed to stop TXQ#%02x\n", rss_id);
                return rc;
        }
 
index e495d62fcc0375922a687e4913bd2ec03f677382..48c9bfc2814082ecb68b07884b1c772ca8a3bf33 100644 (file)
@@ -78,11 +78,34 @@ struct qed_filter_mcast {
        unsigned char mac[QED_MAX_MC_ADDRS][ETH_ALEN];
 };
 
-int qed_sp_eth_rx_queue_stop(struct qed_hwfn *p_hwfn,
-                            u16 rx_queue_id,
-                            bool eq_completion_only, bool cqe_completion);
+/**
+ * @brief qed_eth_rx_queue_stop - This ramrod closes an Rx queue
+ *
+ * @param p_hwfn
+ * @param p_rxq                        Handler of queue to close
+ * @param eq_completion_only   If True completion will be on
+ *                             EQe, if False completion will be
+ *                             on EQe if p_hwfn opaque
+ *                             different from the RXQ opaque
+ *                             otherwise on CQe.
+ * @param cqe_completion       If True completion will be
+ *                             receive on CQe.
+ * @return int
+ */
+int
+qed_eth_rx_queue_stop(struct qed_hwfn *p_hwfn,
+                     void *p_rxq,
+                     bool eq_completion_only, bool cqe_completion);
 
-int qed_sp_eth_tx_queue_stop(struct qed_hwfn *p_hwfn, u16 tx_queue_id);
+/**
+ * @brief qed_eth_tx_queue_stop - closes a Tx queue
+ *
+ * @param p_hwfn
+ * @param p_txq - handle to Tx queue needed to be closed
+ *
+ * @return int
+ */
+int qed_eth_tx_queue_stop(struct qed_hwfn *p_hwfn, void *p_txq);
 
 enum qed_tpa_mode {
        QED_TPA_MODE_NONE,
@@ -196,19 +219,19 @@ int qed_sp_eth_filter_ucast(struct qed_hwfn *p_hwfn,
  * @note At the moment - only used by non-linux VFs.
  *
  * @param p_hwfn
- * @param rx_queue_id          RX Queue ID
- * @param num_rxqs             Allow to update multiple rx
- *                             queues, from rx_queue_id to
- *                             (rx_queue_id + num_rxqs)
+ * @param pp_rxq_handlers      An array of queue handlers to be updated.
+ * @param num_rxqs              number of queues to update.
  * @param complete_cqe_flg     Post completion to the CQE Ring if set
  * @param complete_event_flg   Post completion to the Event Ring if set
+ * @param comp_mode
+ * @param p_comp_data
  *
  * @return int
  */
 
 int
 qed_sp_eth_rx_queues_update(struct qed_hwfn *p_hwfn,
-                           u16 rx_queue_id,
+                           void **pp_rxq_handlers,
                            u8 num_rxqs,
                            u8 complete_cqe_flg,
                            u8 complete_event_flg,
@@ -217,27 +240,79 @@ qed_sp_eth_rx_queues_update(struct qed_hwfn *p_hwfn,
 
 void qed_get_vport_stats(struct qed_dev *cdev, struct qed_eth_stats *stats);
 
-int qed_sp_eth_vport_start(struct qed_hwfn *p_hwfn,
-                          struct qed_sp_vport_start_params *p_params);
+void qed_reset_vport_stats(struct qed_dev *cdev);
+
+struct qed_queue_cid {
+       /* 'Relative' is a relative term ;-). Usually the indices [not counting
+        * SBs] would be PF-relative, but there are some cases where that isn't
+        * the case - specifically for a PF configuring its VF indices it's
+        * possible some fields [E.g., stats-id] in 'rel' would already be abs.
+        */
+       struct qed_queue_start_common_params rel;
+       struct qed_queue_start_common_params abs;
+       u32 cid;
+       u16 opaque_fid;
+
+       /* VFs queues are mapped differently, so we need to know the
+        * relative queue associated with them [0-based].
+        * Notice this is relevant on the *PF* queue-cid of its VF's queues,
+        * and not on the VF itself.
+        */
+       bool is_vf;
+       u8 vf_qid;
+
+       /* Legacy VFs might have Rx producer located elsewhere */
+       bool b_legacy_vf;
+};
 
-int qed_sp_eth_rxq_start_ramrod(struct qed_hwfn *p_hwfn,
-                               u16 opaque_fid,
-                               u32 cid,
-                               struct qed_queue_start_common_params *params,
-                               u8 stats_id,
-                               u16 bd_max_bytes,
-                               dma_addr_t bd_chain_phys_addr,
-                               dma_addr_t cqe_pbl_addr,
-                               u16 cqe_pbl_size, bool b_use_zone_a_prod);
-
-int qed_sp_eth_txq_start_ramrod(struct qed_hwfn  *p_hwfn,
-                               u16  opaque_fid,
-                               u32  cid,
-                               struct qed_queue_start_common_params *p_params,
-                               u8  stats_id,
-                               dma_addr_t pbl_addr,
-                               u16 pbl_size,
-                               union qed_qm_pq_params *p_pq_params);
+void qed_eth_queue_cid_release(struct qed_hwfn *p_hwfn,
+                              struct qed_queue_cid *p_cid);
+
+struct qed_queue_cid *_qed_eth_queue_to_cid(struct qed_hwfn *p_hwfn,
+                                           u16 opaque_fid,
+                                           u32 cid,
+                                           u8 vf_qid,
+                                           struct qed_queue_start_common_params
+                                           *p_params);
+
+int
+qed_sp_eth_vport_start(struct qed_hwfn *p_hwfn,
+                      struct qed_sp_vport_start_params *p_params);
+
+/**
+ * @brief - Starts an Rx queue, when queue_cid is already prepared
+ *
+ * @param p_hwfn
+ * @param p_cid
+ * @param bd_max_bytes
+ * @param bd_chain_phys_addr
+ * @param cqe_pbl_addr
+ * @param cqe_pbl_size
+ *
+ * @return int
+ */
+int
+qed_eth_rxq_start_ramrod(struct qed_hwfn *p_hwfn,
+                        struct qed_queue_cid *p_cid,
+                        u16 bd_max_bytes,
+                        dma_addr_t bd_chain_phys_addr,
+                        dma_addr_t cqe_pbl_addr, u16 cqe_pbl_size);
+
+/**
+ * @brief - Starts a Tx queue, where queue_cid is already prepared
+ *
+ * @param p_hwfn
+ * @param p_cid
+ * @param pbl_addr
+ * @param pbl_size
+ * @param p_pq_params - parameters for choosing the PQ for this Tx queue
+ *
+ * @return int
+ */
+int
+qed_eth_txq_start_ramrod(struct qed_hwfn *p_hwfn,
+                        struct qed_queue_cid *p_cid,
+                        dma_addr_t pbl_addr, u16 pbl_size, u16 pq_id);
 
 u8 qed_mcast_bin_from_mac(u8 *mac);
 
index 02a8be2faed7fd9fdb7c66442d613f2d7c61e565..de4e2a240d88e54f45274a9c2769b5b2be6826e2 100644 (file)
 #include "qed_int.h"
 #include "qed_ll2.h"
 #include "qed_mcp.h"
+#include "qed_ooo.h"
 #include "qed_reg_addr.h"
 #include "qed_sp.h"
+#include "qed_roce.h"
 
 #define QED_LL2_RX_REGISTERED(ll2)     ((ll2)->rx_queue.b_cb_registred)
 #define QED_LL2_TX_REGISTERED(ll2)     ((ll2)->tx_queue.b_cb_registred)
@@ -140,11 +142,11 @@ static void qed_ll2_kill_buffers(struct qed_dev *cdev)
                qed_ll2_dealloc_buffer(cdev, buffer);
 }
 
-void qed_ll2b_complete_rx_packet(struct qed_hwfn *p_hwfn,
-                                u8 connection_handle,
-                                struct qed_ll2_rx_packet *p_pkt,
-                                struct core_rx_fast_path_cqe *p_cqe,
-                                bool b_last_packet)
+static void qed_ll2b_complete_rx_packet(struct qed_hwfn *p_hwfn,
+                                       u8 connection_handle,
+                                       struct qed_ll2_rx_packet *p_pkt,
+                                       struct core_rx_fast_path_cqe *p_cqe,
+                                       bool b_last_packet)
 {
        u16 packet_length = le16_to_cpu(p_cqe->packet_length);
        struct qed_ll2_buffer *buffer = p_pkt->cookie;
@@ -295,25 +297,34 @@ static void qed_ll2_txq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle)
                list_del(&p_pkt->list_entry);
                b_last_packet = list_empty(&p_tx->active_descq);
                list_add_tail(&p_pkt->list_entry, &p_tx->free_descq);
-               p_tx->cur_completing_packet = *p_pkt;
-               p_tx->cur_completing_bd_idx = 1;
-               b_last_frag = p_tx->cur_completing_bd_idx == p_pkt->bd_used;
-               tx_frag = p_pkt->bds_set[0].tx_frag;
-               if (p_ll2_conn->gsi_enable)
-                       qed_ll2b_release_tx_gsi_packet(p_hwfn,
-                                                      p_ll2_conn->my_id,
-                                                      p_pkt->cookie,
-                                                      tx_frag,
-                                                      b_last_frag,
-                                                      b_last_packet);
-               else
-                       qed_ll2b_complete_tx_packet(p_hwfn,
-                                                   p_ll2_conn->my_id,
-                                                   p_pkt->cookie,
-                                                   tx_frag,
-                                                   b_last_frag,
-                                                   b_last_packet);
+               if (p_ll2_conn->conn_type == QED_LL2_TYPE_ISCSI_OOO) {
+                       struct qed_ooo_buffer *p_buffer;
 
+                       p_buffer = (struct qed_ooo_buffer *)p_pkt->cookie;
+                       qed_ooo_put_free_buffer(p_hwfn, p_hwfn->p_ooo_info,
+                                               p_buffer);
+               } else {
+                       p_tx->cur_completing_packet = *p_pkt;
+                       p_tx->cur_completing_bd_idx = 1;
+                       b_last_frag =
+                               p_tx->cur_completing_bd_idx == p_pkt->bd_used;
+                       tx_frag = p_pkt->bds_set[0].tx_frag;
+                       if (p_ll2_conn->gsi_enable)
+                               qed_ll2b_release_tx_gsi_packet(p_hwfn,
+                                                              p_ll2_conn->
+                                                              my_id,
+                                                              p_pkt->cookie,
+                                                              tx_frag,
+                                                              b_last_frag,
+                                                              b_last_packet);
+                       else
+                               qed_ll2b_complete_tx_packet(p_hwfn,
+                                                           p_ll2_conn->my_id,
+                                                           p_pkt->cookie,
+                                                           tx_frag,
+                                                           b_last_frag,
+                                                           b_last_packet);
+               }
        }
 }
 
@@ -515,7 +526,7 @@ static int qed_ll2_rxq_completion(struct qed_hwfn *p_hwfn, void *cookie)
        return rc;
 }
 
-void qed_ll2_rxq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle)
+static void qed_ll2_rxq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle)
 {
        struct qed_ll2_info *p_ll2_conn = NULL;
        struct qed_ll2_rx_packet *p_pkt = NULL;
@@ -537,16 +548,460 @@ void qed_ll2_rxq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle)
                if (!p_pkt)
                        break;
 
+               list_move_tail(&p_pkt->list_entry, &p_rx->free_descq);
+
+               if (p_ll2_conn->conn_type == QED_LL2_TYPE_ISCSI_OOO) {
+                       struct qed_ooo_buffer *p_buffer;
+
+                       p_buffer = (struct qed_ooo_buffer *)p_pkt->cookie;
+                       qed_ooo_put_free_buffer(p_hwfn, p_hwfn->p_ooo_info,
+                                               p_buffer);
+               } else {
+                       rx_buf_addr = p_pkt->rx_buf_addr;
+                       cookie = p_pkt->cookie;
+
+                       b_last = list_empty(&p_rx->active_descq);
+               }
+       }
+}
+
+#if IS_ENABLED(CONFIG_QED_ISCSI)
+static u8 qed_ll2_convert_rx_parse_to_tx_flags(u16 parse_flags)
+{
+       u8 bd_flags = 0;
+
+       if (GET_FIELD(parse_flags, PARSING_AND_ERR_FLAGS_TAG8021QEXIST))
+               SET_FIELD(bd_flags, CORE_TX_BD_FLAGS_VLAN_INSERTION, 1);
+
+       return bd_flags;
+}
+
+static int qed_ll2_lb_rxq_handler(struct qed_hwfn *p_hwfn,
+                                 struct qed_ll2_info *p_ll2_conn)
+{
+       struct qed_ll2_rx_queue *p_rx = &p_ll2_conn->rx_queue;
+       u16 packet_length = 0, parse_flags = 0, vlan = 0;
+       struct qed_ll2_rx_packet *p_pkt = NULL;
+       u32 num_ooo_add_to_peninsula = 0, cid;
+       union core_rx_cqe_union *cqe = NULL;
+       u16 cq_new_idx = 0, cq_old_idx = 0;
+       struct qed_ooo_buffer *p_buffer;
+       struct ooo_opaque *iscsi_ooo;
+       u8 placement_offset = 0;
+       u8 cqe_type;
+
+       cq_new_idx = le16_to_cpu(*p_rx->p_fw_cons);
+       cq_old_idx = qed_chain_get_cons_idx(&p_rx->rcq_chain);
+       if (cq_new_idx == cq_old_idx)
+               return 0;
+
+       while (cq_new_idx != cq_old_idx) {
+               struct core_rx_fast_path_cqe *p_cqe_fp;
+
+               cqe = qed_chain_consume(&p_rx->rcq_chain);
+               cq_old_idx = qed_chain_get_cons_idx(&p_rx->rcq_chain);
+               cqe_type = cqe->rx_cqe_sp.type;
+
+               if (cqe_type != CORE_RX_CQE_TYPE_REGULAR) {
+                       DP_NOTICE(p_hwfn,
+                                 "Got a non-regular LB LL2 completion [type 0x%02x]\n",
+                                 cqe_type);
+                       return -EINVAL;
+               }
+               p_cqe_fp = &cqe->rx_cqe_fp;
+
+               placement_offset = p_cqe_fp->placement_offset;
+               parse_flags = le16_to_cpu(p_cqe_fp->parse_flags.flags);
+               packet_length = le16_to_cpu(p_cqe_fp->packet_length);
+               vlan = le16_to_cpu(p_cqe_fp->vlan);
+               iscsi_ooo = (struct ooo_opaque *)&p_cqe_fp->opaque_data;
+               qed_ooo_save_history_entry(p_hwfn, p_hwfn->p_ooo_info,
+                                          iscsi_ooo);
+               cid = le32_to_cpu(iscsi_ooo->cid);
+
+               /* Process delete isle first */
+               if (iscsi_ooo->drop_size)
+                       qed_ooo_delete_isles(p_hwfn, p_hwfn->p_ooo_info, cid,
+                                            iscsi_ooo->drop_isle,
+                                            iscsi_ooo->drop_size);
+
+               if (iscsi_ooo->ooo_opcode == TCP_EVENT_NOP)
+                       continue;
+
+               /* Now process create/add/join isles */
+               if (list_empty(&p_rx->active_descq)) {
+                       DP_NOTICE(p_hwfn,
+                                 "LL2 OOO RX chain has no submitted buffers\n"
+                                 );
+                       return -EIO;
+               }
+
+               p_pkt = list_first_entry(&p_rx->active_descq,
+                                        struct qed_ll2_rx_packet, list_entry);
+
+               if ((iscsi_ooo->ooo_opcode == TCP_EVENT_ADD_NEW_ISLE) ||
+                   (iscsi_ooo->ooo_opcode == TCP_EVENT_ADD_ISLE_RIGHT) ||
+                   (iscsi_ooo->ooo_opcode == TCP_EVENT_ADD_ISLE_LEFT) ||
+                   (iscsi_ooo->ooo_opcode == TCP_EVENT_ADD_PEN) ||
+                   (iscsi_ooo->ooo_opcode == TCP_EVENT_JOIN)) {
+                       if (!p_pkt) {
+                               DP_NOTICE(p_hwfn,
+                                         "LL2 OOO RX packet is not valid\n");
+                               return -EIO;
+                       }
+                       list_del(&p_pkt->list_entry);
+                       p_buffer = (struct qed_ooo_buffer *)p_pkt->cookie;
+                       p_buffer->packet_length = packet_length;
+                       p_buffer->parse_flags = parse_flags;
+                       p_buffer->vlan = vlan;
+                       p_buffer->placement_offset = placement_offset;
+                       qed_chain_consume(&p_rx->rxq_chain);
+                       list_add_tail(&p_pkt->list_entry, &p_rx->free_descq);
+
+                       switch (iscsi_ooo->ooo_opcode) {
+                       case TCP_EVENT_ADD_NEW_ISLE:
+                               qed_ooo_add_new_isle(p_hwfn,
+                                                    p_hwfn->p_ooo_info,
+                                                    cid,
+                                                    iscsi_ooo->ooo_isle,
+                                                    p_buffer);
+                               break;
+                       case TCP_EVENT_ADD_ISLE_RIGHT:
+                               qed_ooo_add_new_buffer(p_hwfn,
+                                                      p_hwfn->p_ooo_info,
+                                                      cid,
+                                                      iscsi_ooo->ooo_isle,
+                                                      p_buffer,
+                                                      QED_OOO_RIGHT_BUF);
+                               break;
+                       case TCP_EVENT_ADD_ISLE_LEFT:
+                               qed_ooo_add_new_buffer(p_hwfn,
+                                                      p_hwfn->p_ooo_info,
+                                                      cid,
+                                                      iscsi_ooo->ooo_isle,
+                                                      p_buffer,
+                                                      QED_OOO_LEFT_BUF);
+                               break;
+                       case TCP_EVENT_JOIN:
+                               qed_ooo_add_new_buffer(p_hwfn,
+                                                      p_hwfn->p_ooo_info,
+                                                      cid,
+                                                      iscsi_ooo->ooo_isle +
+                                                      1,
+                                                      p_buffer,
+                                                      QED_OOO_LEFT_BUF);
+                               qed_ooo_join_isles(p_hwfn,
+                                                  p_hwfn->p_ooo_info,
+                                                  cid, iscsi_ooo->ooo_isle);
+                               break;
+                       case TCP_EVENT_ADD_PEN:
+                               num_ooo_add_to_peninsula++;
+                               qed_ooo_put_ready_buffer(p_hwfn,
+                                                        p_hwfn->p_ooo_info,
+                                                        p_buffer, true);
+                               break;
+                       }
+               } else {
+                       DP_NOTICE(p_hwfn,
+                                 "Unexpected event (%d) TX OOO completion\n",
+                                 iscsi_ooo->ooo_opcode);
+               }
+       }
+
+       return 0;
+}
+
+static void
+qed_ooo_submit_tx_buffers(struct qed_hwfn *p_hwfn,
+                         struct qed_ll2_info *p_ll2_conn)
+{
+       struct qed_ooo_buffer *p_buffer;
+       int rc;
+       u16 l4_hdr_offset_w;
+       dma_addr_t first_frag;
+       u16 parse_flags;
+       u8 bd_flags;
+
+       /* Submit Tx buffers here */
+       while ((p_buffer = qed_ooo_get_ready_buffer(p_hwfn,
+                                                   p_hwfn->p_ooo_info))) {
+               l4_hdr_offset_w = 0;
+               bd_flags = 0;
+
+               first_frag = p_buffer->rx_buffer_phys_addr +
+                            p_buffer->placement_offset;
+               parse_flags = p_buffer->parse_flags;
+               bd_flags = qed_ll2_convert_rx_parse_to_tx_flags(parse_flags);
+               SET_FIELD(bd_flags, CORE_TX_BD_FLAGS_FORCE_VLAN_MODE, 1);
+               SET_FIELD(bd_flags, CORE_TX_BD_FLAGS_L4_PROTOCOL, 1);
+
+               rc = qed_ll2_prepare_tx_packet(p_hwfn, p_ll2_conn->my_id, 1,
+                                              p_buffer->vlan, bd_flags,
+                                              l4_hdr_offset_w,
+                                              p_ll2_conn->tx_dest, 0,
+                                              first_frag,
+                                              p_buffer->packet_length,
+                                              p_buffer, true);
+               if (rc) {
+                       qed_ooo_put_ready_buffer(p_hwfn, p_hwfn->p_ooo_info,
+                                                p_buffer, false);
+                       break;
+               }
+       }
+}
+
+static void
+qed_ooo_submit_rx_buffers(struct qed_hwfn *p_hwfn,
+                         struct qed_ll2_info *p_ll2_conn)
+{
+       struct qed_ooo_buffer *p_buffer;
+       int rc;
+
+       while ((p_buffer = qed_ooo_get_free_buffer(p_hwfn,
+                                                  p_hwfn->p_ooo_info))) {
+               rc = qed_ll2_post_rx_buffer(p_hwfn,
+                                           p_ll2_conn->my_id,
+                                           p_buffer->rx_buffer_phys_addr,
+                                           0, p_buffer, true);
+               if (rc) {
+                       qed_ooo_put_free_buffer(p_hwfn,
+                                               p_hwfn->p_ooo_info, p_buffer);
+                       break;
+               }
+       }
+}
+
+static int qed_ll2_lb_rxq_completion(struct qed_hwfn *p_hwfn, void *p_cookie)
+{
+       struct qed_ll2_info *p_ll2_conn = (struct qed_ll2_info *)p_cookie;
+       int rc;
+
+       rc = qed_ll2_lb_rxq_handler(p_hwfn, p_ll2_conn);
+       if (rc)
+               return rc;
+
+       qed_ooo_submit_rx_buffers(p_hwfn, p_ll2_conn);
+       qed_ooo_submit_tx_buffers(p_hwfn, p_ll2_conn);
+
+       return 0;
+}
+
+static int qed_ll2_lb_txq_completion(struct qed_hwfn *p_hwfn, void *p_cookie)
+{
+       struct qed_ll2_info *p_ll2_conn = (struct qed_ll2_info *)p_cookie;
+       struct qed_ll2_tx_queue *p_tx = &p_ll2_conn->tx_queue;
+       struct qed_ll2_tx_packet *p_pkt = NULL;
+       struct qed_ooo_buffer *p_buffer;
+       bool b_dont_submit_rx = false;
+       u16 new_idx = 0, num_bds = 0;
+       int rc;
+
+       new_idx = le16_to_cpu(*p_tx->p_fw_cons);
+       num_bds = ((s16)new_idx - (s16)p_tx->bds_idx);
+
+       if (!num_bds)
+               return 0;
+
+       while (num_bds) {
+               if (list_empty(&p_tx->active_descq))
+                       return -EINVAL;
+
+               p_pkt = list_first_entry(&p_tx->active_descq,
+                                        struct qed_ll2_tx_packet, list_entry);
+               if (!p_pkt)
+                       return -EINVAL;
+
+               if (p_pkt->bd_used != 1) {
+                       DP_NOTICE(p_hwfn,
+                                 "Unexpectedly many BDs(%d) in TX OOO completion\n",
+                                 p_pkt->bd_used);
+                       return -EINVAL;
+               }
+
                list_del(&p_pkt->list_entry);
-               list_add_tail(&p_pkt->list_entry, &p_rx->free_descq);
 
-               rx_buf_addr = p_pkt->rx_buf_addr;
-               cookie = p_pkt->cookie;
+               num_bds--;
+               p_tx->bds_idx++;
+               qed_chain_consume(&p_tx->txq_chain);
+
+               p_buffer = (struct qed_ooo_buffer *)p_pkt->cookie;
+               list_add_tail(&p_pkt->list_entry, &p_tx->free_descq);
 
-               b_last = list_empty(&p_rx->active_descq);
+               if (b_dont_submit_rx) {
+                       qed_ooo_put_free_buffer(p_hwfn, p_hwfn->p_ooo_info,
+                                               p_buffer);
+                       continue;
+               }
+
+               rc = qed_ll2_post_rx_buffer(p_hwfn, p_ll2_conn->my_id,
+                                           p_buffer->rx_buffer_phys_addr, 0,
+                                           p_buffer, true);
+               if (rc != 0) {
+                       qed_ooo_put_free_buffer(p_hwfn,
+                                               p_hwfn->p_ooo_info, p_buffer);
+                       b_dont_submit_rx = true;
+               }
        }
+
+       qed_ooo_submit_tx_buffers(p_hwfn, p_ll2_conn);
+
+       return 0;
 }
 
+static int
+qed_ll2_acquire_connection_ooo(struct qed_hwfn *p_hwfn,
+                              struct qed_ll2_info *p_ll2_info,
+                              u16 rx_num_ooo_buffers, u16 mtu)
+{
+       struct qed_ooo_buffer *p_buf = NULL;
+       void *p_virt;
+       u16 buf_idx;
+       int rc = 0;
+
+       if (p_ll2_info->conn_type != QED_LL2_TYPE_ISCSI_OOO)
+               return rc;
+
+       if (!rx_num_ooo_buffers)
+               return -EINVAL;
+
+       for (buf_idx = 0; buf_idx < rx_num_ooo_buffers; buf_idx++) {
+               p_buf = kzalloc(sizeof(*p_buf), GFP_KERNEL);
+               if (!p_buf) {
+                       rc = -ENOMEM;
+                       goto out;
+               }
+
+               p_buf->rx_buffer_size = mtu + 26 + ETH_CACHE_LINE_SIZE;
+               p_buf->rx_buffer_size = (p_buf->rx_buffer_size +
+                                        ETH_CACHE_LINE_SIZE - 1) &
+                                       ~(ETH_CACHE_LINE_SIZE - 1);
+               p_virt = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
+                                           p_buf->rx_buffer_size,
+                                           &p_buf->rx_buffer_phys_addr,
+                                           GFP_KERNEL);
+               if (!p_virt) {
+                       kfree(p_buf);
+                       rc = -ENOMEM;
+                       goto out;
+               }
+
+               p_buf->rx_buffer_virt_addr = p_virt;
+               qed_ooo_put_free_buffer(p_hwfn, p_hwfn->p_ooo_info, p_buf);
+       }
+
+       DP_VERBOSE(p_hwfn, QED_MSG_LL2,
+                  "Allocated [%04x] LL2 OOO buffers [each of size 0x%08x]\n",
+                  rx_num_ooo_buffers, p_buf->rx_buffer_size);
+
+out:
+       return rc;
+}
+
+static void
+qed_ll2_establish_connection_ooo(struct qed_hwfn *p_hwfn,
+                                struct qed_ll2_info *p_ll2_conn)
+{
+       if (p_ll2_conn->conn_type != QED_LL2_TYPE_ISCSI_OOO)
+               return;
+
+       qed_ooo_release_all_isles(p_hwfn, p_hwfn->p_ooo_info);
+       qed_ooo_submit_rx_buffers(p_hwfn, p_ll2_conn);
+}
+
+static void qed_ll2_release_connection_ooo(struct qed_hwfn *p_hwfn,
+                                          struct qed_ll2_info *p_ll2_conn)
+{
+       struct qed_ooo_buffer *p_buffer;
+
+       if (p_ll2_conn->conn_type != QED_LL2_TYPE_ISCSI_OOO)
+               return;
+
+       qed_ooo_release_all_isles(p_hwfn, p_hwfn->p_ooo_info);
+       while ((p_buffer = qed_ooo_get_free_buffer(p_hwfn,
+                                                  p_hwfn->p_ooo_info))) {
+               dma_free_coherent(&p_hwfn->cdev->pdev->dev,
+                                 p_buffer->rx_buffer_size,
+                                 p_buffer->rx_buffer_virt_addr,
+                                 p_buffer->rx_buffer_phys_addr);
+               kfree(p_buffer);
+       }
+}
+
+static void qed_ll2_stop_ooo(struct qed_dev *cdev)
+{
+       struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+       u8 *handle = &hwfn->pf_params.iscsi_pf_params.ll2_ooo_queue_id;
+
+       DP_VERBOSE(cdev, QED_MSG_STORAGE, "Stopping LL2 OOO queue [%02x]\n",
+                  *handle);
+
+       qed_ll2_terminate_connection(hwfn, *handle);
+       qed_ll2_release_connection(hwfn, *handle);
+       *handle = QED_LL2_UNUSED_HANDLE;
+}
+
+static int qed_ll2_start_ooo(struct qed_dev *cdev,
+                            struct qed_ll2_params *params)
+{
+       struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+       u8 *handle = &hwfn->pf_params.iscsi_pf_params.ll2_ooo_queue_id;
+       struct qed_ll2_info *ll2_info;
+       int rc;
+
+       ll2_info = kzalloc(sizeof(*ll2_info), GFP_KERNEL);
+       if (!ll2_info)
+               return -ENOMEM;
+       ll2_info->conn_type = QED_LL2_TYPE_ISCSI_OOO;
+       ll2_info->mtu = params->mtu;
+       ll2_info->rx_drop_ttl0_flg = params->drop_ttl0_packets;
+       ll2_info->rx_vlan_removal_en = params->rx_vlan_stripping;
+       ll2_info->tx_tc = OOO_LB_TC;
+       ll2_info->tx_dest = CORE_TX_DEST_LB;
+
+       rc = qed_ll2_acquire_connection(hwfn, ll2_info,
+                                       QED_LL2_RX_SIZE, QED_LL2_TX_SIZE,
+                                       handle);
+       kfree(ll2_info);
+       if (rc) {
+               DP_INFO(cdev, "Failed to acquire LL2 OOO connection\n");
+               goto out;
+       }
+
+       rc = qed_ll2_establish_connection(hwfn, *handle);
+       if (rc) {
+               DP_INFO(cdev, "Failed to establist LL2 OOO connection\n");
+               goto fail;
+       }
+
+       return 0;
+
+fail:
+       qed_ll2_release_connection(hwfn, *handle);
+out:
+       *handle = QED_LL2_UNUSED_HANDLE;
+       return rc;
+}
+#else /* IS_ENABLED(CONFIG_QED_ISCSI) */
+static int qed_ll2_lb_rxq_completion(struct qed_hwfn *p_hwfn,
+                                    void *p_cookie) { return -EINVAL; }
+static int qed_ll2_lb_txq_completion(struct qed_hwfn *p_hwfn,
+                                    void *p_cookie) { return -EINVAL; }
+static inline int
+qed_ll2_acquire_connection_ooo(struct qed_hwfn *p_hwfn,
+                              struct qed_ll2_info *p_ll2_info,
+                              u16 rx_num_ooo_buffers, u16 mtu) { return 0; }
+static inline void
+qed_ll2_establish_connection_ooo(struct qed_hwfn *p_hwfn,
+                                struct qed_ll2_info *p_ll2_conn) { return; }
+static inline void
+qed_ll2_release_connection_ooo(struct qed_hwfn *p_hwfn,
+                              struct qed_ll2_info *p_ll2_conn) { return; }
+static inline void qed_ll2_stop_ooo(struct qed_dev *cdev) { return; }
+static inline int qed_ll2_start_ooo(struct qed_dev *cdev,
+                                   struct qed_ll2_params *params)
+                                   { return -EINVAL; }
+#endif /* IS_ENABLED(CONFIG_QED_ISCSI) */
+
 static int qed_sp_ll2_rx_queue_start(struct qed_hwfn *p_hwfn,
                                     struct qed_ll2_info *p_ll2_conn,
                                     u8 action_on_error)
@@ -588,7 +1043,8 @@ static int qed_sp_ll2_rx_queue_start(struct qed_hwfn *p_hwfn,
        p_ramrod->drop_ttl0_flg = p_ll2_conn->rx_drop_ttl0_flg;
        p_ramrod->inner_vlan_removal_en = p_ll2_conn->rx_vlan_removal_en;
        p_ramrod->queue_id = p_ll2_conn->queue_id;
-       p_ramrod->main_func_queue = 1;
+       p_ramrod->main_func_queue = (conn_type == QED_LL2_TYPE_ISCSI_OOO) ? 0
+                                                                         : 1;
 
        if ((IS_MF_DEFAULT(p_hwfn) || IS_MF_SI(p_hwfn)) &&
            p_ramrod->main_func_queue && (conn_type != QED_LL2_TYPE_ROCE)) {
@@ -619,6 +1075,11 @@ static int qed_sp_ll2_tx_queue_start(struct qed_hwfn *p_hwfn,
        if (!QED_LL2_TX_REGISTERED(p_ll2_conn))
                return 0;
 
+       if (p_ll2_conn->conn_type == QED_LL2_TYPE_ISCSI_OOO)
+               p_ll2_conn->tx_stats_en = 0;
+       else
+               p_ll2_conn->tx_stats_en = 1;
+
        /* Get SPQ entry */
        memset(&init_data, 0, sizeof(init_data));
        init_data.cid = p_ll2_conn->cid;
@@ -636,7 +1097,6 @@ static int qed_sp_ll2_tx_queue_start(struct qed_hwfn *p_hwfn,
        p_ramrod->sb_id = cpu_to_le16(qed_int_get_sp_sb_id(p_hwfn));
        p_ramrod->sb_index = p_tx->tx_sb_index;
        p_ramrod->mtu = cpu_to_le16(p_ll2_conn->mtu);
-       p_ll2_conn->tx_stats_en = 1;
        p_ramrod->stats_en = p_ll2_conn->tx_stats_en;
        p_ramrod->stats_id = p_ll2_conn->tx_stats_id;
 
@@ -860,9 +1320,19 @@ int qed_ll2_acquire_connection(struct qed_hwfn *p_hwfn,
        if (rc)
                goto q_allocate_fail;
 
+       rc = qed_ll2_acquire_connection_ooo(p_hwfn, p_ll2_info,
+                                           rx_num_desc * 2, p_params->mtu);
+       if (rc)
+               goto q_allocate_fail;
+
        /* Register callbacks for the Rx/Tx queues */
-       comp_rx_cb = qed_ll2_rxq_completion;
-       comp_tx_cb = qed_ll2_txq_completion;
+       if (p_params->conn_type == QED_LL2_TYPE_ISCSI_OOO) {
+               comp_rx_cb = qed_ll2_lb_rxq_completion;
+               comp_tx_cb = qed_ll2_lb_txq_completion;
+       } else {
+               comp_rx_cb = qed_ll2_rxq_completion;
+               comp_tx_cb = qed_ll2_txq_completion;
+       }
 
        if (rx_num_desc) {
                qed_int_register_cb(p_hwfn, comp_rx_cb,
@@ -975,6 +1445,8 @@ int qed_ll2_establish_connection(struct qed_hwfn *p_hwfn, u8 connection_handle)
        if (p_hwfn->hw_info.personality != QED_PCI_ETH_ROCE)
                qed_wr(p_hwfn, p_hwfn->p_main_ptt, PRS_REG_USE_LIGHT_L2, 1);
 
+       qed_ll2_establish_connection_ooo(p_hwfn, p_ll2_conn);
+
        return rc;
 }
 
@@ -992,9 +1464,8 @@ static void qed_ll2_post_rx_buffer_notify_fw(struct qed_hwfn *p_hwfn,
                p_posting_packet = list_first_entry(&p_rx->posting_descq,
                                                    struct qed_ll2_rx_packet,
                                                    list_entry);
-               list_del(&p_posting_packet->list_entry);
-               list_add_tail(&p_posting_packet->list_entry,
-                             &p_rx->active_descq);
+               list_move_tail(&p_posting_packet->list_entry,
+                              &p_rx->active_descq);
                b_notify_fw = true;
        }
 
@@ -1120,12 +1591,10 @@ static void qed_ll2_prepare_tx_packet_set_bd(struct qed_hwfn *p_hwfn,
        start_bd->bd_flags.as_bitfield |= CORE_TX_BD_FLAGS_START_BD_MASK <<
            CORE_TX_BD_FLAGS_START_BD_SHIFT;
        SET_FIELD(start_bd->bitfield0, CORE_TX_BD_NBDS, num_of_bds);
+       SET_FIELD(start_bd->bitfield0, CORE_TX_BD_ROCE_FLAV, type);
        DMA_REGPAIR_LE(start_bd->addr, first_frag);
        start_bd->nbytes = cpu_to_le16(first_frag_len);
 
-       SET_FIELD(start_bd->bd_flags.as_bitfield, CORE_TX_BD_FLAGS_ROCE_FLAV,
-                 type);
-
        DP_VERBOSE(p_hwfn,
                   (NETIF_MSG_TX_QUEUED | QED_MSG_LL2),
                   "LL2 [q 0x%02x cid 0x%08x type 0x%08x] Tx Producer at [0x%04x] - set with a %04x bytes %02x BDs buffer at %08x:%08x\n",
@@ -1188,8 +1657,7 @@ static void qed_ll2_tx_packet_notify(struct qed_hwfn *p_hwfn,
                if (!p_pkt)
                        break;
 
-               list_del(&p_pkt->list_entry);
-               list_add_tail(&p_pkt->list_entry, &p_tx->active_descq);
+               list_move_tail(&p_pkt->list_entry, &p_tx->active_descq);
        }
 
        SET_FIELD(db_msg.params, CORE_DB_DATA_DEST, DB_DEST_XCM);
@@ -1217,6 +1685,7 @@ int qed_ll2_prepare_tx_packet(struct qed_hwfn *p_hwfn,
                              u16 vlan,
                              u8 bd_flags,
                              u16 l4_hdr_offset_w,
+                             enum qed_ll2_tx_dest e_tx_dest,
                              enum qed_ll2_roce_flavor_type qed_roce_flavor,
                              dma_addr_t first_frag,
                              u16 first_frag_len, void *cookie, u8 notify_fw)
@@ -1226,6 +1695,7 @@ int qed_ll2_prepare_tx_packet(struct qed_hwfn *p_hwfn,
        enum core_roce_flavor_type roce_flavor;
        struct qed_ll2_tx_queue *p_tx;
        struct qed_chain *p_tx_chain;
+       enum core_tx_dest tx_dest;
        unsigned long flags;
        int rc = 0;
 
@@ -1256,6 +1726,8 @@ int qed_ll2_prepare_tx_packet(struct qed_hwfn *p_hwfn,
                goto out;
        }
 
+       tx_dest = e_tx_dest == QED_LL2_TX_DEST_NW ? CORE_TX_DEST_NW :
+                                                   CORE_TX_DEST_LB;
        if (qed_roce_flavor == QED_LL2_ROCE) {
                roce_flavor = CORE_ROCE;
        } else if (qed_roce_flavor == QED_LL2_RROCE) {
@@ -1270,7 +1742,7 @@ int qed_ll2_prepare_tx_packet(struct qed_hwfn *p_hwfn,
                                      num_of_bds, first_frag,
                                      first_frag_len, cookie, notify_fw);
        qed_ll2_prepare_tx_packet_set_bd(p_hwfn, p_ll2_conn, p_curp,
-                                        num_of_bds, CORE_TX_DEST_NW,
+                                        num_of_bds, tx_dest,
                                         vlan, bd_flags, l4_hdr_offset_w,
                                         roce_flavor,
                                         first_frag, first_frag_len);
@@ -1345,6 +1817,9 @@ int qed_ll2_terminate_connection(struct qed_hwfn *p_hwfn, u8 connection_handle)
                qed_ll2_rxq_flush(p_hwfn, connection_handle);
        }
 
+       if (p_ll2_conn->conn_type == QED_LL2_TYPE_ISCSI_OOO)
+               qed_ooo_release_all_isles(p_hwfn, p_hwfn->p_ooo_info);
+
        return rc;
 }
 
@@ -1375,6 +1850,8 @@ void qed_ll2_release_connection(struct qed_hwfn *p_hwfn, u8 connection_handle)
 
        qed_cxt_release_cid(p_hwfn, p_ll2_conn->cid);
 
+       qed_ll2_release_connection_ooo(p_hwfn, p_ll2_conn);
+
        mutex_lock(&p_ll2_conn->mutex);
        p_ll2_conn->b_active = false;
        mutex_unlock(&p_ll2_conn->mutex);
@@ -1521,6 +1998,7 @@ static int qed_ll2_start(struct qed_dev *cdev, struct qed_ll2_params *params)
        enum qed_ll2_conn_type conn_type;
        struct qed_ptt *p_ptt;
        int rc, i;
+       u8 gsi_enable = 1;
 
        /* Initialize LL2 locks & lists */
        INIT_LIST_HEAD(&cdev->ll2->list);
@@ -1552,6 +2030,7 @@ static int qed_ll2_start(struct qed_dev *cdev, struct qed_ll2_params *params)
        switch (QED_LEADING_HWFN(cdev)->hw_info.personality) {
        case QED_PCI_ISCSI:
                conn_type = QED_LL2_TYPE_ISCSI;
+               gsi_enable = 0;
                break;
        case QED_PCI_ETH_ROCE:
                conn_type = QED_LL2_TYPE_ROCE;
@@ -1568,7 +2047,7 @@ static int qed_ll2_start(struct qed_dev *cdev, struct qed_ll2_params *params)
        ll2_info.rx_vlan_removal_en = params->rx_vlan_stripping;
        ll2_info.tx_tc = 0;
        ll2_info.tx_dest = CORE_TX_DEST_NW;
-       ll2_info.gsi_enable = 1;
+       ll2_info.gsi_enable = gsi_enable;
 
        rc = qed_ll2_acquire_connection(QED_LEADING_HWFN(cdev), &ll2_info,
                                        QED_LL2_RX_SIZE, QED_LL2_TX_SIZE,
@@ -1615,6 +2094,17 @@ static int qed_ll2_start(struct qed_dev *cdev, struct qed_ll2_params *params)
                goto release_terminate;
        }
 
+       if (cdev->hwfns[0].hw_info.personality == QED_PCI_ISCSI &&
+           cdev->hwfns[0].pf_params.iscsi_pf_params.ooo_enable) {
+               DP_VERBOSE(cdev, QED_MSG_STORAGE, "Starting OOO LL2 queue\n");
+               rc = qed_ll2_start_ooo(cdev, params);
+               if (rc) {
+                       DP_INFO(cdev,
+                               "Failed to initialize the OOO LL2 queue\n");
+                       goto release_terminate;
+               }
+       }
+
        p_ptt = qed_ptt_acquire(QED_LEADING_HWFN(cdev));
        if (!p_ptt) {
                DP_INFO(cdev, "Failed to acquire PTT\n");
@@ -1664,6 +2154,10 @@ static int qed_ll2_stop(struct qed_dev *cdev)
        qed_ptt_release(QED_LEADING_HWFN(cdev), p_ptt);
        eth_zero_addr(cdev->ll2_mac_address);
 
+       if (cdev->hwfns[0].hw_info.personality == QED_PCI_ISCSI &&
+           cdev->hwfns[0].pf_params.iscsi_pf_params.ooo_enable)
+               qed_ll2_stop_ooo(cdev);
+
        rc = qed_ll2_terminate_connection(QED_LEADING_HWFN(cdev),
                                          cdev->ll2->handle);
        if (rc)
@@ -1718,7 +2212,8 @@ static int qed_ll2_start_xmit(struct qed_dev *cdev, struct sk_buff *skb)
        rc = qed_ll2_prepare_tx_packet(QED_LEADING_HWFN(cdev),
                                       cdev->ll2->handle,
                                       1 + skb_shinfo(skb)->nr_frags,
-                                      vlan, flags, 0, 0 /* RoCE FLAVOR */,
+                                      vlan, flags, 0, QED_LL2_TX_DEST_NW,
+                                      0 /* RoCE FLAVOR */,
                                       mapping, skb->len, skb, 1);
        if (rc)
                goto err;
index 80a5dc2d652d3f9b364dee97182d16bd987661d3..6625a3ae5a335bcace6f99b0e3914e5472bdc06d 100644 (file)
@@ -41,6 +41,12 @@ enum qed_ll2_conn_type {
        MAX_QED_LL2_RX_CONN_TYPE
 };
 
+enum qed_ll2_tx_dest {
+       QED_LL2_TX_DEST_NW, /* Light L2 TX Destination to the Network */
+       QED_LL2_TX_DEST_LB, /* Light L2 TX Destination to the Loopback */
+       QED_LL2_TX_DEST_MAX
+};
+
 struct qed_ll2_rx_packet {
        struct list_head list_entry;
        struct core_rx_bd_with_buff_len *rxq_bd;
@@ -192,6 +198,8 @@ int qed_ll2_post_rx_buffer(struct qed_hwfn *p_hwfn,
  * @param l4_hdr_offset_w      L4 Header Offset from start of packet
  *                             (in words). This is needed if both l4_csum
  *                             and ipv6_ext are set
+ * @param e_tx_dest             indicates if the packet is to be transmitted via
+ *                              loopback or to the network
  * @param first_frag
  * @param first_frag_len
  * @param cookie
@@ -206,6 +214,7 @@ int qed_ll2_prepare_tx_packet(struct qed_hwfn *p_hwfn,
                              u16 vlan,
                              u8 bd_flags,
                              u16 l4_hdr_offset_w,
+                             enum qed_ll2_tx_dest e_tx_dest,
                              enum qed_ll2_roce_flavor_type qed_roce_flavor,
                              dma_addr_t first_frag,
                              u16 first_frag_len, void *cookie, u8 notify_fw);
@@ -293,24 +302,4 @@ void qed_ll2_setup(struct qed_hwfn *p_hwfn,
  */
 void qed_ll2_free(struct qed_hwfn *p_hwfn,
                  struct qed_ll2_info *p_ll2_connections);
-void qed_ll2b_complete_rx_gsi_packet(struct qed_hwfn *p_hwfn,
-                                    u8 connection_handle,
-                                    void *cookie,
-                                    dma_addr_t rx_buf_addr,
-                                    u16 data_length,
-                                    u8 data_length_error,
-                                    u16 parse_flags,
-                                    u16 vlan,
-                                    u32 src_mac_addr_hi,
-                                    u16 src_mac_addr_lo, bool b_last_packet);
-void qed_ll2b_complete_tx_gsi_packet(struct qed_hwfn *p_hwfn,
-                                    u8 connection_handle,
-                                    void *cookie,
-                                    dma_addr_t first_frag_addr,
-                                    bool b_last_fragment, bool b_last_packet);
-void qed_ll2b_release_tx_gsi_packet(struct qed_hwfn *p_hwfn,
-                                   u8 connection_handle,
-                                   void *cookie,
-                                   dma_addr_t first_frag_addr,
-                                   bool b_last_fragment, bool b_last_packet);
 #endif
index 4ee3151e80c244036ac5fb44bb537c15a9fb24b6..aeb98d8c56264c53109bbad46b8d9b42add1d36d 100644 (file)
 #include "qed_hw.h"
 #include "qed_selftest.h"
 
-#if IS_ENABLED(CONFIG_INFINIBAND_QEDR)
 #define QED_ROCE_QPS                   (8192)
 #define QED_ROCE_DPIS                  (8)
-#endif
 
 static char version[] =
        "QLogic FastLinQ 4xxxx Core Module qed " DRV_MODULE_VERSION "\n";
@@ -223,6 +221,10 @@ int qed_fill_dev_info(struct qed_dev *cdev,
                dev_info->fw_eng = FW_ENGINEERING_VERSION;
                dev_info->mf_mode = cdev->mf_mode;
                dev_info->tx_switching = true;
+
+               if (QED_LEADING_HWFN(cdev)->hw_info.b_wol_support ==
+                   QED_WOL_SUPPORT_PME)
+                       dev_info->wol_support = true;
        } else {
                qed_vf_get_fw_version(&cdev->hwfns[0], &dev_info->fw_major,
                                      &dev_info->fw_minor, &dev_info->fw_rev,
@@ -245,6 +247,8 @@ int qed_fill_dev_info(struct qed_dev *cdev,
                                    &dev_info->mfw_rev, NULL);
        }
 
+       dev_info->mtu = QED_LEADING_HWFN(cdev)->hw_info.mtu;
+
        return 0;
 }
 
@@ -682,9 +686,7 @@ static int qed_slowpath_setup_int(struct qed_dev *cdev,
                                  enum qed_int_mode int_mode)
 {
        struct qed_sb_cnt_info sb_cnt_info;
-#if IS_ENABLED(CONFIG_INFINIBAND_QEDR)
-       int num_l2_queues;
-#endif
+       int num_l2_queues = 0;
        int rc;
        int i;
 
@@ -715,8 +717,9 @@ static int qed_slowpath_setup_int(struct qed_dev *cdev,
        cdev->int_params.fp_msix_cnt = cdev->int_params.out.num_vectors -
                                       cdev->num_hwfns;
 
-#if IS_ENABLED(CONFIG_INFINIBAND_QEDR)
-       num_l2_queues = 0;
+       if (!IS_ENABLED(CONFIG_QED_RDMA))
+               return 0;
+
        for_each_hwfn(cdev, i)
                num_l2_queues += FEAT_NUM(&cdev->hwfns[i], QED_PF_L2_QUE);
 
@@ -738,7 +741,6 @@ static int qed_slowpath_setup_int(struct qed_dev *cdev,
        DP_VERBOSE(cdev, QED_MSG_RDMA, "roce_msix_cnt=%d roce_msix_base=%d\n",
                   cdev->int_params.rdma_msix_cnt,
                   cdev->int_params.rdma_msix_base);
-#endif
 
        return 0;
 }
@@ -843,13 +845,14 @@ static void qed_update_pf_params(struct qed_dev *cdev,
 {
        int i;
 
-#if IS_ENABLED(CONFIG_INFINIBAND_QEDR)
-       params->rdma_pf_params.num_qps = QED_ROCE_QPS;
-       params->rdma_pf_params.min_dpis = QED_ROCE_DPIS;
-       /* divide by 3 the MRs to avoid MF ILT overflow */
-       params->rdma_pf_params.num_mrs = RDMA_MAX_TIDS;
-       params->rdma_pf_params.gl_pi = QED_ROCE_PROTOCOL_INDEX;
-#endif
+       if (IS_ENABLED(CONFIG_QED_RDMA)) {
+               params->rdma_pf_params.num_qps = QED_ROCE_QPS;
+               params->rdma_pf_params.min_dpis = QED_ROCE_DPIS;
+               /* divide by 3 the MRs to avoid MF ILT overflow */
+               params->rdma_pf_params.num_mrs = RDMA_MAX_TIDS;
+               params->rdma_pf_params.gl_pi = QED_ROCE_PROTOCOL_INDEX;
+       }
+
        for (i = 0; i < cdev->num_hwfns; i++) {
                struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
 
@@ -880,6 +883,7 @@ static int qed_slowpath_start(struct qed_dev *cdev,
                }
        }
 
+       cdev->rx_coalesce_usecs = QED_DEFAULT_RX_USECS;
        rc = qed_nic_setup(cdev);
        if (rc)
                goto err;
@@ -1432,11 +1436,106 @@ static int qed_set_led(struct qed_dev *cdev, enum qed_led_mode mode)
        return status;
 }
 
-struct qed_selftest_ops qed_selftest_ops_pass = {
+static int qed_update_wol(struct qed_dev *cdev, bool enabled)
+{
+       struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+       struct qed_ptt *ptt;
+       int rc = 0;
+
+       if (IS_VF(cdev))
+               return 0;
+
+       ptt = qed_ptt_acquire(hwfn);
+       if (!ptt)
+               return -EAGAIN;
+
+       rc = qed_mcp_ov_update_wol(hwfn, ptt, enabled ? QED_OV_WOL_ENABLED
+                                  : QED_OV_WOL_DISABLED);
+       if (rc)
+               goto out;
+       rc = qed_mcp_ov_update_current_config(hwfn, ptt, QED_OV_CLIENT_DRV);
+
+out:
+       qed_ptt_release(hwfn, ptt);
+       return rc;
+}
+
+static int qed_update_drv_state(struct qed_dev *cdev, bool active)
+{
+       struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+       struct qed_ptt *ptt;
+       int status = 0;
+
+       if (IS_VF(cdev))
+               return 0;
+
+       ptt = qed_ptt_acquire(hwfn);
+       if (!ptt)
+               return -EAGAIN;
+
+       status = qed_mcp_ov_update_driver_state(hwfn, ptt, active ?
+                                               QED_OV_DRIVER_STATE_ACTIVE :
+                                               QED_OV_DRIVER_STATE_DISABLED);
+
+       qed_ptt_release(hwfn, ptt);
+
+       return status;
+}
+
+static int qed_update_mac(struct qed_dev *cdev, u8 *mac)
+{
+       struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+       struct qed_ptt *ptt;
+       int status = 0;
+
+       if (IS_VF(cdev))
+               return 0;
+
+       ptt = qed_ptt_acquire(hwfn);
+       if (!ptt)
+               return -EAGAIN;
+
+       status = qed_mcp_ov_update_mac(hwfn, ptt, mac);
+       if (status)
+               goto out;
+
+       status = qed_mcp_ov_update_current_config(hwfn, ptt, QED_OV_CLIENT_DRV);
+
+out:
+       qed_ptt_release(hwfn, ptt);
+       return status;
+}
+
+static int qed_update_mtu(struct qed_dev *cdev, u16 mtu)
+{
+       struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+       struct qed_ptt *ptt;
+       int status = 0;
+
+       if (IS_VF(cdev))
+               return 0;
+
+       ptt = qed_ptt_acquire(hwfn);
+       if (!ptt)
+               return -EAGAIN;
+
+       status = qed_mcp_ov_update_mtu(hwfn, ptt, mtu);
+       if (status)
+               goto out;
+
+       status = qed_mcp_ov_update_current_config(hwfn, ptt, QED_OV_CLIENT_DRV);
+
+out:
+       qed_ptt_release(hwfn, ptt);
+       return status;
+}
+
+static struct qed_selftest_ops qed_selftest_ops_pass = {
        .selftest_memory = &qed_selftest_memory,
        .selftest_interrupt = &qed_selftest_interrupt,
        .selftest_register = &qed_selftest_register,
        .selftest_clock = &qed_selftest_clock,
+       .selftest_nvram = &qed_selftest_nvram,
 };
 
 const struct qed_common_ops qed_common_ops_pass = {
@@ -1466,6 +1565,10 @@ const struct qed_common_ops qed_common_ops_pass = {
        .get_coalesce = &qed_get_coalesce,
        .set_coalesce = &qed_set_coalesce,
        .set_led = &qed_set_led,
+       .update_drv_state = &qed_update_drv_state,
+       .update_mac = &qed_update_mac,
+       .update_mtu = &qed_update_mtu,
+       .update_wol = &qed_update_wol,
 };
 
 void qed_get_protocol_stats(struct qed_dev *cdev,
index bdc9ba92f6d4569a0d391a127245f290ae083924..6dd3ce443484b5f75cb0a1b7d3cfcdece593fc96 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/string.h>
+#include <linux/etherdevice.h>
 #include "qed.h"
 #include "qed_dcbx.h"
 #include "qed_hsi.h"
@@ -329,6 +330,7 @@ static int qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn,
                                 struct qed_mcp_mb_params *p_mb_params)
 {
        u32 union_data_addr;
+
        int rc;
 
        /* MCP not initialized */
@@ -374,11 +376,32 @@ int qed_mcp_cmd(struct qed_hwfn *p_hwfn,
                u32 *o_mcp_param)
 {
        struct qed_mcp_mb_params mb_params;
+       union drv_union_data data_src;
        int rc;
 
        memset(&mb_params, 0, sizeof(mb_params));
+       memset(&data_src, 0, sizeof(data_src));
        mb_params.cmd = cmd;
        mb_params.param = param;
+
+       /* In case of UNLOAD_DONE, set the primary MAC */
+       if ((cmd == DRV_MSG_CODE_UNLOAD_DONE) &&
+           (p_hwfn->cdev->wol_config == QED_OV_WOL_ENABLED)) {
+               u8 *p_mac = p_hwfn->cdev->wol_mac;
+
+               data_src.wol_mac.mac_upper = p_mac[0] << 8 | p_mac[1];
+               data_src.wol_mac.mac_lower = p_mac[2] << 24 | p_mac[3] << 16 |
+                                            p_mac[4] << 8 | p_mac[5];
+
+               DP_VERBOSE(p_hwfn,
+                          (QED_MSG_SP | NETIF_MSG_IFDOWN),
+                          "Setting WoL MAC: %pM --> [%08x,%08x]\n",
+                          p_mac, data_src.wol_mac.mac_upper,
+                          data_src.wol_mac.mac_lower);
+
+               mb_params.p_data_src = &data_src;
+       }
+
        rc = qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params);
        if (rc)
                return rc;
@@ -1001,28 +1024,89 @@ int qed_mcp_get_media_type(struct qed_dev *cdev, u32 *p_media_type)
        return 0;
 }
 
+/* Old MFW has a global configuration for all PFs regarding RDMA support */
+static void
+qed_mcp_get_shmem_proto_legacy(struct qed_hwfn *p_hwfn,
+                              enum qed_pci_personality *p_proto)
+{
+       /* There wasn't ever a legacy MFW that published iwarp.
+        * So at this point, this is either plain l2 or RoCE.
+        */
+       if (test_bit(QED_DEV_CAP_ROCE, &p_hwfn->hw_info.device_capabilities))
+               *p_proto = QED_PCI_ETH_ROCE;
+       else
+               *p_proto = QED_PCI_ETH;
+
+       DP_VERBOSE(p_hwfn, NETIF_MSG_IFUP,
+                  "According to Legacy capabilities, L2 personality is %08x\n",
+                  (u32) *p_proto);
+}
+
+static int
+qed_mcp_get_shmem_proto_mfw(struct qed_hwfn *p_hwfn,
+                           struct qed_ptt *p_ptt,
+                           enum qed_pci_personality *p_proto)
+{
+       u32 resp = 0, param = 0;
+       int rc;
+
+       rc = qed_mcp_cmd(p_hwfn, p_ptt,
+                        DRV_MSG_CODE_GET_PF_RDMA_PROTOCOL, 0, &resp, &param);
+       if (rc)
+               return rc;
+       if (resp != FW_MSG_CODE_OK) {
+               DP_VERBOSE(p_hwfn, NETIF_MSG_IFUP,
+                          "MFW lacks support for command; Returns %08x\n",
+                          resp);
+               return -EINVAL;
+       }
+
+       switch (param) {
+       case FW_MB_PARAM_GET_PF_RDMA_NONE:
+               *p_proto = QED_PCI_ETH;
+               break;
+       case FW_MB_PARAM_GET_PF_RDMA_ROCE:
+               *p_proto = QED_PCI_ETH_ROCE;
+               break;
+       case FW_MB_PARAM_GET_PF_RDMA_BOTH:
+               DP_NOTICE(p_hwfn,
+                         "Current day drivers don't support RoCE & iWARP. Default to RoCE-only\n");
+               *p_proto = QED_PCI_ETH_ROCE;
+               break;
+       case FW_MB_PARAM_GET_PF_RDMA_IWARP:
+       default:
+               DP_NOTICE(p_hwfn,
+                         "MFW answers GET_PF_RDMA_PROTOCOL but param is %08x\n",
+                         param);
+               return -EINVAL;
+       }
+
+       DP_VERBOSE(p_hwfn,
+                  NETIF_MSG_IFUP,
+                  "According to capabilities, L2 personality is %08x [resp %08x param %08x]\n",
+                  (u32) *p_proto, resp, param);
+       return 0;
+}
+
 static int
 qed_mcp_get_shmem_proto(struct qed_hwfn *p_hwfn,
                        struct public_func *p_info,
+                       struct qed_ptt *p_ptt,
                        enum qed_pci_personality *p_proto)
 {
        int rc = 0;
 
        switch (p_info->config & FUNC_MF_CFG_PROTOCOL_MASK) {
        case FUNC_MF_CFG_PROTOCOL_ETHERNET:
-               if (test_bit(QED_DEV_CAP_ROCE,
-                            &p_hwfn->hw_info.device_capabilities))
-                       *p_proto = QED_PCI_ETH_ROCE;
-               else
-                       *p_proto = QED_PCI_ETH;
+               if (qed_mcp_get_shmem_proto_mfw(p_hwfn, p_ptt, p_proto))
+                       qed_mcp_get_shmem_proto_legacy(p_hwfn, p_proto);
                break;
        case FUNC_MF_CFG_PROTOCOL_ISCSI:
                *p_proto = QED_PCI_ISCSI;
                break;
        case FUNC_MF_CFG_PROTOCOL_ROCE:
                DP_NOTICE(p_hwfn, "RoCE personality is not a valid value!\n");
-               rc = -EINVAL;
-               break;
+       /* Fallthrough */
        default:
                rc = -EINVAL;
        }
@@ -1042,7 +1126,8 @@ int qed_mcp_fill_shmem_func_info(struct qed_hwfn *p_hwfn,
        info->pause_on_host = (shmem_info.config &
                               FUNC_MF_CFG_PAUSE_ON_HOST_RING) ? 1 : 0;
 
-       if (qed_mcp_get_shmem_proto(p_hwfn, &shmem_info, &info->protocol)) {
+       if (qed_mcp_get_shmem_proto(p_hwfn, &shmem_info, p_ptt,
+                                   &info->protocol)) {
                DP_ERR(p_hwfn, "Unknown personality %08x\n",
                       (u32)(shmem_info.config & FUNC_MF_CFG_PROTOCOL_MASK));
                return -EINVAL;
@@ -1057,6 +1142,9 @@ int qed_mcp_fill_shmem_func_info(struct qed_hwfn *p_hwfn,
                info->mac[3] = (u8)(shmem_info.mac_lower >> 16);
                info->mac[4] = (u8)(shmem_info.mac_lower >> 8);
                info->mac[5] = (u8)(shmem_info.mac_lower);
+
+               /* Store primary MAC for later possible WoL */
+               memcpy(&p_hwfn->cdev->wol_mac, info->mac, ETH_ALEN);
        } else {
                DP_NOTICE(p_hwfn, "MAC is 0 in shmem\n");
        }
@@ -1068,13 +1156,30 @@ int qed_mcp_fill_shmem_func_info(struct qed_hwfn *p_hwfn,
 
        info->ovlan = (u16)(shmem_info.ovlan_stag & FUNC_MF_CFG_OV_STAG_MASK);
 
+       info->mtu = (u16)shmem_info.mtu_size;
+
+       p_hwfn->hw_info.b_wol_support = QED_WOL_SUPPORT_NONE;
+       p_hwfn->cdev->wol_config = (u8)QED_OV_WOL_DEFAULT;
+       if (qed_mcp_is_init(p_hwfn)) {
+               u32 resp = 0, param = 0;
+               int rc;
+
+               rc = qed_mcp_cmd(p_hwfn, p_ptt,
+                                DRV_MSG_CODE_OS_WOL, 0, &resp, &param);
+               if (rc)
+                       return rc;
+               if (resp == FW_MSG_CODE_OS_WOL_SUPPORTED)
+                       p_hwfn->hw_info.b_wol_support = QED_WOL_SUPPORT_PME;
+       }
+
        DP_VERBOSE(p_hwfn, (QED_MSG_SP | NETIF_MSG_IFUP),
-                  "Read configuration from shmem: pause_on_host %02x protocol %02x BW [%02x - %02x] MAC %02x:%02x:%02x:%02x:%02x:%02x wwn port %llx node %llx ovlan %04x\n",
+                  "Read configuration from shmem: pause_on_host %02x protocol %02x BW [%02x - %02x] MAC %02x:%02x:%02x:%02x:%02x:%02x wwn port %llx node %llx ovlan %04x wol %02x\n",
                info->pause_on_host, info->protocol,
                info->bandwidth_min, info->bandwidth_max,
                info->mac[0], info->mac[1], info->mac[2],
                info->mac[3], info->mac[4], info->mac[5],
-               info->wwn_port, info->wwn_node, info->ovlan);
+               info->wwn_port, info->wwn_node,
+               info->ovlan, (u8)p_hwfn->hw_info.b_wol_support);
 
        return 0;
 }
@@ -1223,6 +1328,178 @@ int qed_mcp_resume(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
        return (cpu_mode & MCP_REG_CPU_MODE_SOFT_HALT) ? -EAGAIN : 0;
 }
 
+int qed_mcp_ov_update_current_config(struct qed_hwfn *p_hwfn,
+                                    struct qed_ptt *p_ptt,
+                                    enum qed_ov_client client)
+{
+       u32 resp = 0, param = 0;
+       u32 drv_mb_param;
+       int rc;
+
+       switch (client) {
+       case QED_OV_CLIENT_DRV:
+               drv_mb_param = DRV_MB_PARAM_OV_CURR_CFG_OS;
+               break;
+       case QED_OV_CLIENT_USER:
+               drv_mb_param = DRV_MB_PARAM_OV_CURR_CFG_OTHER;
+               break;
+       case QED_OV_CLIENT_VENDOR_SPEC:
+               drv_mb_param = DRV_MB_PARAM_OV_CURR_CFG_VENDOR_SPEC;
+               break;
+       default:
+               DP_NOTICE(p_hwfn, "Invalid client type %d\n", client);
+               return -EINVAL;
+       }
+
+       rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_OV_UPDATE_CURR_CFG,
+                        drv_mb_param, &resp, &param);
+       if (rc)
+               DP_ERR(p_hwfn, "MCP response failure, aborting\n");
+
+       return rc;
+}
+
+int qed_mcp_ov_update_driver_state(struct qed_hwfn *p_hwfn,
+                                  struct qed_ptt *p_ptt,
+                                  enum qed_ov_driver_state drv_state)
+{
+       u32 resp = 0, param = 0;
+       u32 drv_mb_param;
+       int rc;
+
+       switch (drv_state) {
+       case QED_OV_DRIVER_STATE_NOT_LOADED:
+               drv_mb_param = DRV_MSG_CODE_OV_UPDATE_DRIVER_STATE_NOT_LOADED;
+               break;
+       case QED_OV_DRIVER_STATE_DISABLED:
+               drv_mb_param = DRV_MSG_CODE_OV_UPDATE_DRIVER_STATE_DISABLED;
+               break;
+       case QED_OV_DRIVER_STATE_ACTIVE:
+               drv_mb_param = DRV_MSG_CODE_OV_UPDATE_DRIVER_STATE_ACTIVE;
+               break;
+       default:
+               DP_NOTICE(p_hwfn, "Invalid driver state %d\n", drv_state);
+               return -EINVAL;
+       }
+
+       rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_OV_UPDATE_DRIVER_STATE,
+                        drv_mb_param, &resp, &param);
+       if (rc)
+               DP_ERR(p_hwfn, "Failed to send driver state\n");
+
+       return rc;
+}
+
+int qed_mcp_ov_update_mtu(struct qed_hwfn *p_hwfn,
+                         struct qed_ptt *p_ptt, u16 mtu)
+{
+       u32 resp = 0, param = 0;
+       u32 drv_mb_param;
+       int rc;
+
+       drv_mb_param = (u32)mtu << DRV_MB_PARAM_OV_MTU_SIZE_SHIFT;
+       rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_OV_UPDATE_MTU,
+                        drv_mb_param, &resp, &param);
+       if (rc)
+               DP_ERR(p_hwfn, "Failed to send mtu value, rc = %d\n", rc);
+
+       return rc;
+}
+
+int qed_mcp_ov_update_mac(struct qed_hwfn *p_hwfn,
+                         struct qed_ptt *p_ptt, u8 *mac)
+{
+       struct qed_mcp_mb_params mb_params;
+       union drv_union_data union_data;
+       int rc;
+
+       memset(&mb_params, 0, sizeof(mb_params));
+       mb_params.cmd = DRV_MSG_CODE_SET_VMAC;
+       mb_params.param = DRV_MSG_CODE_VMAC_TYPE_MAC <<
+                         DRV_MSG_CODE_VMAC_TYPE_SHIFT;
+       mb_params.param |= MCP_PF_ID(p_hwfn);
+       ether_addr_copy(&union_data.raw_data[0], mac);
+       mb_params.p_data_src = &union_data;
+       rc = qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params);
+       if (rc)
+               DP_ERR(p_hwfn, "Failed to send mac address, rc = %d\n", rc);
+
+       /* Store primary MAC for later possible WoL */
+       memcpy(p_hwfn->cdev->wol_mac, mac, ETH_ALEN);
+
+       return rc;
+}
+
+int qed_mcp_ov_update_wol(struct qed_hwfn *p_hwfn,
+                         struct qed_ptt *p_ptt, enum qed_ov_wol wol)
+{
+       u32 resp = 0, param = 0;
+       u32 drv_mb_param;
+       int rc;
+
+       if (p_hwfn->hw_info.b_wol_support == QED_WOL_SUPPORT_NONE) {
+               DP_VERBOSE(p_hwfn, QED_MSG_SP,
+                          "Can't change WoL configuration when WoL isn't supported\n");
+               return -EINVAL;
+       }
+
+       switch (wol) {
+       case QED_OV_WOL_DEFAULT:
+               drv_mb_param = DRV_MB_PARAM_WOL_DEFAULT;
+               break;
+       case QED_OV_WOL_DISABLED:
+               drv_mb_param = DRV_MB_PARAM_WOL_DISABLED;
+               break;
+       case QED_OV_WOL_ENABLED:
+               drv_mb_param = DRV_MB_PARAM_WOL_ENABLED;
+               break;
+       default:
+               DP_ERR(p_hwfn, "Invalid wol state %d\n", wol);
+               return -EINVAL;
+       }
+
+       rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_OV_UPDATE_WOL,
+                        drv_mb_param, &resp, &param);
+       if (rc)
+               DP_ERR(p_hwfn, "Failed to send wol mode, rc = %d\n", rc);
+
+       /* Store the WoL update for a future unload */
+       p_hwfn->cdev->wol_config = (u8)wol;
+
+       return rc;
+}
+
+int qed_mcp_ov_update_eswitch(struct qed_hwfn *p_hwfn,
+                             struct qed_ptt *p_ptt,
+                             enum qed_ov_eswitch eswitch)
+{
+       u32 resp = 0, param = 0;
+       u32 drv_mb_param;
+       int rc;
+
+       switch (eswitch) {
+       case QED_OV_ESWITCH_NONE:
+               drv_mb_param = DRV_MB_PARAM_ESWITCH_MODE_NONE;
+               break;
+       case QED_OV_ESWITCH_VEB:
+               drv_mb_param = DRV_MB_PARAM_ESWITCH_MODE_VEB;
+               break;
+       case QED_OV_ESWITCH_VEPA:
+               drv_mb_param = DRV_MB_PARAM_ESWITCH_MODE_VEPA;
+               break;
+       default:
+               DP_ERR(p_hwfn, "Invalid eswitch mode %d\n", eswitch);
+               return -EINVAL;
+       }
+
+       rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_OV_UPDATE_ESWITCH_MODE,
+                        drv_mb_param, &resp, &param);
+       if (rc)
+               DP_ERR(p_hwfn, "Failed to send eswitch mode, rc = %d\n", rc);
+
+       return rc;
+}
+
 int qed_mcp_set_led(struct qed_hwfn *p_hwfn,
                    struct qed_ptt *p_ptt, enum qed_led_mode mode)
 {
@@ -1271,6 +1548,52 @@ int qed_mcp_mask_parities(struct qed_hwfn *p_hwfn,
        return rc;
 }
 
+int qed_mcp_nvm_read(struct qed_dev *cdev, u32 addr, u8 *p_buf, u32 len)
+{
+       u32 bytes_left = len, offset = 0, bytes_to_copy, read_len = 0;
+       struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
+       u32 resp = 0, resp_param = 0;
+       struct qed_ptt *p_ptt;
+       int rc = 0;
+
+       p_ptt = qed_ptt_acquire(p_hwfn);
+       if (!p_ptt)
+               return -EBUSY;
+
+       while (bytes_left > 0) {
+               bytes_to_copy = min_t(u32, bytes_left, MCP_DRV_NVM_BUF_LEN);
+
+               rc = qed_mcp_nvm_rd_cmd(p_hwfn, p_ptt,
+                                       DRV_MSG_CODE_NVM_READ_NVRAM,
+                                       addr + offset +
+                                       (bytes_to_copy <<
+                                        DRV_MB_PARAM_NVM_LEN_SHIFT),
+                                       &resp, &resp_param,
+                                       &read_len,
+                                       (u32 *)(p_buf + offset));
+
+               if (rc || (resp != FW_MSG_CODE_NVM_OK)) {
+                       DP_NOTICE(cdev, "MCP command rc = %d\n", rc);
+                       break;
+               }
+
+               /* This can be a lengthy process, and it's possible scheduler
+                * isn't preemptable. Sleep a bit to prevent CPU hogging.
+                */
+               if (bytes_left % 0x1000 <
+                   (bytes_left - read_len) % 0x1000)
+                       usleep_range(1000, 2000);
+
+               offset += read_len;
+               bytes_left -= read_len;
+       }
+
+       cdev->mcp_nvm_resp = resp;
+       qed_ptt_release(p_hwfn, p_ptt);
+
+       return rc;
+}
+
 int qed_mcp_bist_register_test(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
        u32 drv_mb_param = 0, rsp, param;
@@ -1312,3 +1635,101 @@ int qed_mcp_bist_clock_test(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 
        return rc;
 }
+
+int qed_mcp_bist_nvm_test_get_num_images(struct qed_hwfn *p_hwfn,
+                                        struct qed_ptt *p_ptt,
+                                        u32 *num_images)
+{
+       u32 drv_mb_param = 0, rsp;
+       int rc = 0;
+
+       drv_mb_param = (DRV_MB_PARAM_BIST_NVM_TEST_NUM_IMAGES <<
+                       DRV_MB_PARAM_BIST_TEST_INDEX_SHIFT);
+
+       rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_BIST_TEST,
+                        drv_mb_param, &rsp, num_images);
+       if (rc)
+               return rc;
+
+       if (((rsp & FW_MSG_CODE_MASK) != FW_MSG_CODE_OK))
+               rc = -EINVAL;
+
+       return rc;
+}
+
+int qed_mcp_bist_nvm_test_get_image_att(struct qed_hwfn *p_hwfn,
+                                       struct qed_ptt *p_ptt,
+                                       struct bist_nvm_image_att *p_image_att,
+                                       u32 image_index)
+{
+       u32 buf_size = 0, param, resp = 0, resp_param = 0;
+       int rc;
+
+       param = DRV_MB_PARAM_BIST_NVM_TEST_IMAGE_BY_INDEX <<
+               DRV_MB_PARAM_BIST_TEST_INDEX_SHIFT;
+       param |= image_index << DRV_MB_PARAM_BIST_TEST_IMAGE_INDEX_SHIFT;
+
+       rc = qed_mcp_nvm_rd_cmd(p_hwfn, p_ptt,
+                               DRV_MSG_CODE_BIST_TEST, param,
+                               &resp, &resp_param,
+                               &buf_size,
+                               (u32 *)p_image_att);
+       if (rc)
+               return rc;
+
+       if (((resp & FW_MSG_CODE_MASK) != FW_MSG_CODE_OK) ||
+           (p_image_att->return_code != 1))
+               rc = -EINVAL;
+
+       return rc;
+}
+
+#define QED_RESC_ALLOC_VERSION_MAJOR    1
+#define QED_RESC_ALLOC_VERSION_MINOR    0
+#define QED_RESC_ALLOC_VERSION                              \
+       ((QED_RESC_ALLOC_VERSION_MAJOR <<                    \
+         DRV_MB_PARAM_RESOURCE_ALLOC_VERSION_MAJOR_SHIFT) | \
+        (QED_RESC_ALLOC_VERSION_MINOR <<                    \
+         DRV_MB_PARAM_RESOURCE_ALLOC_VERSION_MINOR_SHIFT))
+int qed_mcp_get_resc_info(struct qed_hwfn *p_hwfn,
+                         struct qed_ptt *p_ptt,
+                         struct resource_info *p_resc_info,
+                         u32 *p_mcp_resp, u32 *p_mcp_param)
+{
+       struct qed_mcp_mb_params mb_params;
+       union drv_union_data union_data;
+       int rc;
+
+       memset(&mb_params, 0, sizeof(mb_params));
+       memset(&union_data, 0, sizeof(union_data));
+       mb_params.cmd = DRV_MSG_GET_RESOURCE_ALLOC_MSG;
+       mb_params.param = QED_RESC_ALLOC_VERSION;
+
+       /* Need to have a sufficient large struct, as the cmd_and_union
+        * is going to do memcpy from and to it.
+        */
+       memcpy(&union_data.resource, p_resc_info, sizeof(*p_resc_info));
+
+       mb_params.p_data_src = &union_data;
+       mb_params.p_data_dst = &union_data;
+       rc = qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params);
+       if (rc)
+               return rc;
+
+       /* Copy the data back */
+       memcpy(p_resc_info, &union_data.resource, sizeof(*p_resc_info));
+       *p_mcp_resp = mb_params.mcp_resp;
+       *p_mcp_param = mb_params.mcp_param;
+
+       DP_VERBOSE(p_hwfn,
+                  QED_MSG_SP,
+                  "MFW resource_info: version 0x%x, res_id 0x%x, size 0x%x, offset 0x%x, vf_size 0x%x, vf_offset 0x%x, flags 0x%x\n",
+                  *p_mcp_param,
+                  p_resc_info->res_id,
+                  p_resc_info->size,
+                  p_resc_info->offset,
+                  p_resc_info->vf_size,
+                  p_resc_info->vf_offset, p_resc_info->flags);
+
+       return 0;
+}
index dff520ed069bf61d951076bee5b82adf89f95c72..407a2c1830fb6f480a2523942c9730b4d1b50f58 100644 (file)
@@ -92,6 +92,8 @@ struct qed_mcp_function_info {
 
 #define QED_MCP_VLAN_UNSET              (0xffff)
        u16                             ovlan;
+
+       u16                             mtu;
 };
 
 struct qed_mcp_nvm_common {
@@ -147,6 +149,30 @@ union qed_mcp_protocol_stats {
        struct qed_mcp_rdma_stats rdma_stats;
 };
 
+enum qed_ov_eswitch {
+       QED_OV_ESWITCH_NONE,
+       QED_OV_ESWITCH_VEB,
+       QED_OV_ESWITCH_VEPA
+};
+
+enum qed_ov_client {
+       QED_OV_CLIENT_DRV,
+       QED_OV_CLIENT_USER,
+       QED_OV_CLIENT_VENDOR_SPEC
+};
+
+enum qed_ov_driver_state {
+       QED_OV_DRIVER_STATE_NOT_LOADED,
+       QED_OV_DRIVER_STATE_DISABLED,
+       QED_OV_DRIVER_STATE_ACTIVE
+};
+
+enum qed_ov_wol {
+       QED_OV_WOL_DEFAULT,
+       QED_OV_WOL_DISABLED,
+       QED_OV_WOL_ENABLED
+};
+
 /**
  * @brief - returns the link params of the hw function
  *
@@ -277,6 +303,69 @@ qed_mcp_send_drv_version(struct qed_hwfn *p_hwfn,
                         struct qed_ptt *p_ptt,
                         struct qed_mcp_drv_version *p_ver);
 
+/**
+ * @brief Notify MFW about the change in base device properties
+ *
+ *  @param p_hwfn
+ *  @param p_ptt
+ *  @param client - qed client type
+ *
+ * @return int - 0 - operation was successful.
+ */
+int qed_mcp_ov_update_current_config(struct qed_hwfn *p_hwfn,
+                                    struct qed_ptt *p_ptt,
+                                    enum qed_ov_client client);
+
+/**
+ * @brief Notify MFW about the driver state
+ *
+ *  @param p_hwfn
+ *  @param p_ptt
+ *  @param drv_state - Driver state
+ *
+ * @return int - 0 - operation was successful.
+ */
+int qed_mcp_ov_update_driver_state(struct qed_hwfn *p_hwfn,
+                                  struct qed_ptt *p_ptt,
+                                  enum qed_ov_driver_state drv_state);
+
+/**
+ * @brief Send MTU size to MFW
+ *
+ *  @param p_hwfn
+ *  @param p_ptt
+ *  @param mtu - MTU size
+ *
+ * @return int - 0 - operation was successful.
+ */
+int qed_mcp_ov_update_mtu(struct qed_hwfn *p_hwfn,
+                         struct qed_ptt *p_ptt, u16 mtu);
+
+/**
+ * @brief Send MAC address to MFW
+ *
+ *  @param p_hwfn
+ *  @param p_ptt
+ *  @param mac - MAC address
+ *
+ * @return int - 0 - operation was successful.
+ */
+int qed_mcp_ov_update_mac(struct qed_hwfn *p_hwfn,
+                         struct qed_ptt *p_ptt, u8 *mac);
+
+/**
+ * @brief Send WOL mode to MFW
+ *
+ *  @param p_hwfn
+ *  @param p_ptt
+ *  @param wol - WOL mode
+ *
+ * @return int - 0 - operation was successful.
+ */
+int qed_mcp_ov_update_wol(struct qed_hwfn *p_hwfn,
+                         struct qed_ptt *p_ptt,
+                         enum qed_ov_wol wol);
+
 /**
  * @brief Set LED status
  *
@@ -290,6 +379,18 @@ int qed_mcp_set_led(struct qed_hwfn *p_hwfn,
                    struct qed_ptt *p_ptt,
                    enum qed_led_mode mode);
 
+/**
+ * @brief Read from nvm
+ *
+ *  @param cdev
+ *  @param addr - nvm offset
+ *  @param p_buf - nvm read buffer
+ *  @param len - buffer len
+ *
+ * @return int - 0 - operation was successful.
+ */
+int qed_mcp_nvm_read(struct qed_dev *cdev, u32 addr, u8 *p_buf, u32 len);
+
 /**
  * @brief Bist register test
  *
@@ -312,6 +413,35 @@ int qed_mcp_bist_register_test(struct qed_hwfn *p_hwfn,
 int qed_mcp_bist_clock_test(struct qed_hwfn *p_hwfn,
                            struct qed_ptt *p_ptt);
 
+/**
+ * @brief Bist nvm test - get number of images
+ *
+ *  @param p_hwfn       - hw function
+ *  @param p_ptt        - PTT required for register access
+ *  @param num_images   - number of images if operation was
+ *                       successful. 0 if not.
+ *
+ * @return int - 0 - operation was successful.
+ */
+int qed_mcp_bist_nvm_test_get_num_images(struct qed_hwfn *p_hwfn,
+                                        struct qed_ptt *p_ptt,
+                                        u32 *num_images);
+
+/**
+ * @brief Bist nvm test - get image attributes by index
+ *
+ *  @param p_hwfn      - hw function
+ *  @param p_ptt       - PTT required for register access
+ *  @param p_image_att - Attributes of image
+ *  @param image_index - Index of image to get information for
+ *
+ * @return int - 0 - operation was successful.
+ */
+int qed_mcp_bist_nvm_test_get_image_att(struct qed_hwfn *p_hwfn,
+                                       struct qed_ptt *p_ptt,
+                                       struct bist_nvm_image_att *p_image_att,
+                                       u32 image_index);
+
 /* Using hwfn number (and not pf_num) is required since in CMT mode,
  * same pf_num may be used by two different hwfn
  * TODO - this shouldn't really be in .h file, but until all fields
@@ -546,4 +676,32 @@ int __qed_configure_pf_min_bandwidth(struct qed_hwfn *p_hwfn,
 int qed_mcp_mask_parities(struct qed_hwfn *p_hwfn,
                          struct qed_ptt *p_ptt, u32 mask_parities);
 
+/**
+ * @brief Send eswitch mode to MFW
+ *
+ *  @param p_hwfn
+ *  @param p_ptt
+ *  @param eswitch - eswitch mode
+ *
+ * @return int - 0 - operation was successful.
+ */
+int qed_mcp_ov_update_eswitch(struct qed_hwfn *p_hwfn,
+                             struct qed_ptt *p_ptt,
+                             enum qed_ov_eswitch eswitch);
+
+/**
+ * @brief - Gets the MFW allocation info for the given resource
+ *
+ *  @param p_hwfn
+ *  @param p_ptt
+ *  @param p_resc_info - descriptor of requested resource
+ *  @param p_mcp_resp
+ *  @param p_mcp_param
+ *
+ * @return int - 0 - operation was successful.
+ */
+int qed_mcp_get_resc_info(struct qed_hwfn *p_hwfn,
+                         struct qed_ptt *p_ptt,
+                         struct resource_info *p_resc_info,
+                         u32 *p_mcp_resp, u32 *p_mcp_param);
 #endif
diff --git a/drivers/net/ethernet/qlogic/qed/qed_ooo.c b/drivers/net/ethernet/qlogic/qed/qed_ooo.c
new file mode 100644 (file)
index 0000000..155abcb
--- /dev/null
@@ -0,0 +1,501 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015 QLogic Corporation
+ *
+ * This software is available under the terms of the GNU General Public License
+ * (GPL) Version 2, available from the file COPYING in the main directory of
+ * this source tree.
+ */
+
+#include <linux/types.h>
+#include <linux/dma-mapping.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include "qed.h"
+#include "qed_iscsi.h"
+#include "qed_ll2.h"
+#include "qed_ooo.h"
+
+static struct qed_ooo_archipelago
+*qed_ooo_seek_archipelago(struct qed_hwfn *p_hwfn,
+                         struct qed_ooo_info
+                         *p_ooo_info,
+                         u32 cid)
+{
+       struct qed_ooo_archipelago *p_archipelago = NULL;
+
+       list_for_each_entry(p_archipelago,
+                           &p_ooo_info->archipelagos_list, list_entry) {
+               if (p_archipelago->cid == cid)
+                       return p_archipelago;
+       }
+
+       return NULL;
+}
+
+static struct qed_ooo_isle *qed_ooo_seek_isle(struct qed_hwfn *p_hwfn,
+                                             struct qed_ooo_info *p_ooo_info,
+                                             u32 cid, u8 isle)
+{
+       struct qed_ooo_archipelago *p_archipelago = NULL;
+       struct qed_ooo_isle *p_isle = NULL;
+       u8 the_num_of_isle = 1;
+
+       p_archipelago = qed_ooo_seek_archipelago(p_hwfn, p_ooo_info, cid);
+       if (!p_archipelago) {
+               DP_NOTICE(p_hwfn,
+                         "Connection %d is not found in OOO list\n", cid);
+               return NULL;
+       }
+
+       list_for_each_entry(p_isle, &p_archipelago->isles_list, list_entry) {
+               if (the_num_of_isle == isle)
+                       return p_isle;
+               the_num_of_isle++;
+       }
+
+       return NULL;
+}
+
+void qed_ooo_save_history_entry(struct qed_hwfn *p_hwfn,
+                               struct qed_ooo_info *p_ooo_info,
+                               struct ooo_opaque *p_cqe)
+{
+       struct qed_ooo_history *p_history = &p_ooo_info->ooo_history;
+
+       if (p_history->head_idx == p_history->num_of_cqes)
+               p_history->head_idx = 0;
+       p_history->p_cqes[p_history->head_idx] = *p_cqe;
+       p_history->head_idx++;
+}
+
+struct qed_ooo_info *qed_ooo_alloc(struct qed_hwfn *p_hwfn)
+{
+       struct qed_ooo_info *p_ooo_info;
+       u16 max_num_archipelagos = 0;
+       u16 max_num_isles = 0;
+       u32 i;
+
+       if (p_hwfn->hw_info.personality != QED_PCI_ISCSI) {
+               DP_NOTICE(p_hwfn,
+                         "Failed to allocate qed_ooo_info: unknown personality\n");
+               return NULL;
+       }
+
+       max_num_archipelagos = p_hwfn->pf_params.iscsi_pf_params.num_cons;
+       max_num_isles = QED_MAX_NUM_ISLES + max_num_archipelagos;
+
+       if (!max_num_archipelagos) {
+               DP_NOTICE(p_hwfn,
+                         "Failed to allocate qed_ooo_info: unknown amount of connections\n");
+               return NULL;
+       }
+
+       p_ooo_info = kzalloc(sizeof(*p_ooo_info), GFP_KERNEL);
+       if (!p_ooo_info)
+               return NULL;
+
+       INIT_LIST_HEAD(&p_ooo_info->free_buffers_list);
+       INIT_LIST_HEAD(&p_ooo_info->ready_buffers_list);
+       INIT_LIST_HEAD(&p_ooo_info->free_isles_list);
+       INIT_LIST_HEAD(&p_ooo_info->free_archipelagos_list);
+       INIT_LIST_HEAD(&p_ooo_info->archipelagos_list);
+
+       p_ooo_info->p_isles_mem = kcalloc(max_num_isles,
+                                         sizeof(struct qed_ooo_isle),
+                                         GFP_KERNEL);
+       if (!p_ooo_info->p_isles_mem)
+               goto no_isles_mem;
+
+       for (i = 0; i < max_num_isles; i++) {
+               INIT_LIST_HEAD(&p_ooo_info->p_isles_mem[i].buffers_list);
+               list_add_tail(&p_ooo_info->p_isles_mem[i].list_entry,
+                             &p_ooo_info->free_isles_list);
+       }
+
+       p_ooo_info->p_archipelagos_mem =
+                               kcalloc(max_num_archipelagos,
+                                       sizeof(struct qed_ooo_archipelago),
+                                       GFP_KERNEL);
+       if (!p_ooo_info->p_archipelagos_mem)
+               goto no_archipelagos_mem;
+
+       for (i = 0; i < max_num_archipelagos; i++) {
+               INIT_LIST_HEAD(&p_ooo_info->p_archipelagos_mem[i].isles_list);
+               list_add_tail(&p_ooo_info->p_archipelagos_mem[i].list_entry,
+                             &p_ooo_info->free_archipelagos_list);
+       }
+
+       p_ooo_info->ooo_history.p_cqes =
+                               kcalloc(QED_MAX_NUM_OOO_HISTORY_ENTRIES,
+                                       sizeof(struct ooo_opaque),
+                                       GFP_KERNEL);
+       if (!p_ooo_info->ooo_history.p_cqes)
+               goto no_history_mem;
+
+       return p_ooo_info;
+
+no_history_mem:
+       kfree(p_ooo_info->p_archipelagos_mem);
+no_archipelagos_mem:
+       kfree(p_ooo_info->p_isles_mem);
+no_isles_mem:
+       kfree(p_ooo_info);
+       return NULL;
+}
+
+void qed_ooo_release_connection_isles(struct qed_hwfn *p_hwfn,
+                                     struct qed_ooo_info *p_ooo_info, u32 cid)
+{
+       struct qed_ooo_archipelago *p_archipelago;
+       struct qed_ooo_buffer *p_buffer;
+       struct qed_ooo_isle *p_isle;
+       bool b_found = false;
+
+       if (list_empty(&p_ooo_info->archipelagos_list))
+               return;
+
+       list_for_each_entry(p_archipelago,
+                           &p_ooo_info->archipelagos_list, list_entry) {
+               if (p_archipelago->cid == cid) {
+                       list_del(&p_archipelago->list_entry);
+                       b_found = true;
+                       break;
+               }
+       }
+
+       if (!b_found)
+               return;
+
+       while (!list_empty(&p_archipelago->isles_list)) {
+               p_isle = list_first_entry(&p_archipelago->isles_list,
+                                         struct qed_ooo_isle, list_entry);
+
+               list_del(&p_isle->list_entry);
+
+               while (!list_empty(&p_isle->buffers_list)) {
+                       p_buffer = list_first_entry(&p_isle->buffers_list,
+                                                   struct qed_ooo_buffer,
+                                                   list_entry);
+
+                       if (!p_buffer)
+                               break;
+
+                       list_del(&p_buffer->list_entry);
+                       list_add_tail(&p_buffer->list_entry,
+                                     &p_ooo_info->free_buffers_list);
+               }
+               list_add_tail(&p_isle->list_entry,
+                             &p_ooo_info->free_isles_list);
+       }
+
+       list_add_tail(&p_archipelago->list_entry,
+                     &p_ooo_info->free_archipelagos_list);
+}
+
+void qed_ooo_release_all_isles(struct qed_hwfn *p_hwfn,
+                              struct qed_ooo_info *p_ooo_info)
+{
+       struct qed_ooo_archipelago *p_arch;
+       struct qed_ooo_buffer *p_buffer;
+       struct qed_ooo_isle *p_isle;
+
+       while (!list_empty(&p_ooo_info->archipelagos_list)) {
+               p_arch = list_first_entry(&p_ooo_info->archipelagos_list,
+                                         struct qed_ooo_archipelago,
+                                         list_entry);
+
+               list_del(&p_arch->list_entry);
+
+               while (!list_empty(&p_arch->isles_list)) {
+                       p_isle = list_first_entry(&p_arch->isles_list,
+                                                 struct qed_ooo_isle,
+                                                 list_entry);
+
+                       list_del(&p_isle->list_entry);
+
+                       while (!list_empty(&p_isle->buffers_list)) {
+                               p_buffer =
+                                   list_first_entry(&p_isle->buffers_list,
+                                                    struct qed_ooo_buffer,
+                                                    list_entry);
+
+                               if (!p_buffer)
+                                       break;
+
+                       list_del(&p_buffer->list_entry);
+                               list_add_tail(&p_buffer->list_entry,
+                                             &p_ooo_info->free_buffers_list);
+                       }
+                       list_add_tail(&p_isle->list_entry,
+                                     &p_ooo_info->free_isles_list);
+               }
+               list_add_tail(&p_arch->list_entry,
+                             &p_ooo_info->free_archipelagos_list);
+       }
+       if (!list_empty(&p_ooo_info->ready_buffers_list))
+               list_splice_tail_init(&p_ooo_info->ready_buffers_list,
+                                     &p_ooo_info->free_buffers_list);
+}
+
+void qed_ooo_setup(struct qed_hwfn *p_hwfn, struct qed_ooo_info *p_ooo_info)
+{
+       qed_ooo_release_all_isles(p_hwfn, p_ooo_info);
+       memset(p_ooo_info->ooo_history.p_cqes, 0,
+              p_ooo_info->ooo_history.num_of_cqes *
+              sizeof(struct ooo_opaque));
+       p_ooo_info->ooo_history.head_idx = 0;
+}
+
+void qed_ooo_free(struct qed_hwfn *p_hwfn, struct qed_ooo_info *p_ooo_info)
+{
+       struct qed_ooo_buffer *p_buffer;
+
+       qed_ooo_release_all_isles(p_hwfn, p_ooo_info);
+       while (!list_empty(&p_ooo_info->free_buffers_list)) {
+               p_buffer = list_first_entry(&p_ooo_info->free_buffers_list,
+                                           struct qed_ooo_buffer, list_entry);
+
+               if (!p_buffer)
+                       break;
+
+               list_del(&p_buffer->list_entry);
+               dma_free_coherent(&p_hwfn->cdev->pdev->dev,
+                                 p_buffer->rx_buffer_size,
+                                 p_buffer->rx_buffer_virt_addr,
+                                 p_buffer->rx_buffer_phys_addr);
+               kfree(p_buffer);
+       }
+
+       kfree(p_ooo_info->p_isles_mem);
+       kfree(p_ooo_info->p_archipelagos_mem);
+       kfree(p_ooo_info->ooo_history.p_cqes);
+       kfree(p_ooo_info);
+}
+
+void qed_ooo_put_free_buffer(struct qed_hwfn *p_hwfn,
+                            struct qed_ooo_info *p_ooo_info,
+                            struct qed_ooo_buffer *p_buffer)
+{
+       list_add_tail(&p_buffer->list_entry, &p_ooo_info->free_buffers_list);
+}
+
+struct qed_ooo_buffer *qed_ooo_get_free_buffer(struct qed_hwfn *p_hwfn,
+                                              struct qed_ooo_info *p_ooo_info)
+{
+       struct qed_ooo_buffer *p_buffer = NULL;
+
+       if (!list_empty(&p_ooo_info->free_buffers_list)) {
+               p_buffer = list_first_entry(&p_ooo_info->free_buffers_list,
+                                           struct qed_ooo_buffer, list_entry);
+
+               list_del(&p_buffer->list_entry);
+       }
+
+       return p_buffer;
+}
+
+void qed_ooo_put_ready_buffer(struct qed_hwfn *p_hwfn,
+                             struct qed_ooo_info *p_ooo_info,
+                             struct qed_ooo_buffer *p_buffer, u8 on_tail)
+{
+       if (on_tail)
+               list_add_tail(&p_buffer->list_entry,
+                             &p_ooo_info->ready_buffers_list);
+       else
+               list_add(&p_buffer->list_entry,
+                        &p_ooo_info->ready_buffers_list);
+}
+
+struct qed_ooo_buffer *qed_ooo_get_ready_buffer(struct qed_hwfn *p_hwfn,
+                                               struct qed_ooo_info *p_ooo_info)
+{
+       struct qed_ooo_buffer *p_buffer = NULL;
+
+       if (!list_empty(&p_ooo_info->ready_buffers_list)) {
+               p_buffer = list_first_entry(&p_ooo_info->ready_buffers_list,
+                                           struct qed_ooo_buffer, list_entry);
+
+               list_del(&p_buffer->list_entry);
+       }
+
+       return p_buffer;
+}
+
+void qed_ooo_delete_isles(struct qed_hwfn *p_hwfn,
+                         struct qed_ooo_info *p_ooo_info,
+                         u32 cid, u8 drop_isle, u8 drop_size)
+{
+       struct qed_ooo_archipelago *p_archipelago = NULL;
+       struct qed_ooo_isle *p_isle = NULL;
+       u8 isle_idx;
+
+       p_archipelago = qed_ooo_seek_archipelago(p_hwfn, p_ooo_info, cid);
+       for (isle_idx = 0; isle_idx < drop_size; isle_idx++) {
+               p_isle = qed_ooo_seek_isle(p_hwfn, p_ooo_info, cid, drop_isle);
+               if (!p_isle) {
+                       DP_NOTICE(p_hwfn,
+                                 "Isle %d is not found(cid %d)\n",
+                                 drop_isle, cid);
+                       return;
+               }
+               if (list_empty(&p_isle->buffers_list))
+                       DP_NOTICE(p_hwfn,
+                                 "Isle %d is empty(cid %d)\n", drop_isle, cid);
+               else
+                       list_splice_tail_init(&p_isle->buffers_list,
+                                             &p_ooo_info->free_buffers_list);
+
+               list_del(&p_isle->list_entry);
+               p_ooo_info->cur_isles_number--;
+               list_add(&p_isle->list_entry, &p_ooo_info->free_isles_list);
+       }
+
+       if (list_empty(&p_archipelago->isles_list)) {
+               list_del(&p_archipelago->list_entry);
+               list_add(&p_archipelago->list_entry,
+                        &p_ooo_info->free_archipelagos_list);
+       }
+}
+
+void qed_ooo_add_new_isle(struct qed_hwfn *p_hwfn,
+                         struct qed_ooo_info *p_ooo_info,
+                         u32 cid, u8 ooo_isle,
+                         struct qed_ooo_buffer *p_buffer)
+{
+       struct qed_ooo_archipelago *p_archipelago = NULL;
+       struct qed_ooo_isle *p_prev_isle = NULL;
+       struct qed_ooo_isle *p_isle = NULL;
+
+       if (ooo_isle > 1) {
+               p_prev_isle = qed_ooo_seek_isle(p_hwfn,
+                                               p_ooo_info, cid, ooo_isle - 1);
+               if (!p_prev_isle) {
+                       DP_NOTICE(p_hwfn,
+                                 "Isle %d is not found(cid %d)\n",
+                                 ooo_isle - 1, cid);
+                       return;
+               }
+       }
+       p_archipelago = qed_ooo_seek_archipelago(p_hwfn, p_ooo_info, cid);
+       if (!p_archipelago && (ooo_isle != 1)) {
+               DP_NOTICE(p_hwfn,
+                         "Connection %d is not found in OOO list\n", cid);
+               return;
+       }
+
+       if (!list_empty(&p_ooo_info->free_isles_list)) {
+               p_isle = list_first_entry(&p_ooo_info->free_isles_list,
+                                         struct qed_ooo_isle, list_entry);
+
+               list_del(&p_isle->list_entry);
+               if (!list_empty(&p_isle->buffers_list)) {
+                       DP_NOTICE(p_hwfn, "Free isle is not empty\n");
+                       INIT_LIST_HEAD(&p_isle->buffers_list);
+               }
+       } else {
+               DP_NOTICE(p_hwfn, "No more free isles\n");
+               return;
+       }
+
+       if (!p_archipelago &&
+           !list_empty(&p_ooo_info->free_archipelagos_list)) {
+               p_archipelago =
+                   list_first_entry(&p_ooo_info->free_archipelagos_list,
+                                    struct qed_ooo_archipelago, list_entry);
+
+               list_del(&p_archipelago->list_entry);
+               if (!list_empty(&p_archipelago->isles_list)) {
+                       DP_NOTICE(p_hwfn,
+                                 "Free OOO connection is not empty\n");
+                       INIT_LIST_HEAD(&p_archipelago->isles_list);
+               }
+               p_archipelago->cid = cid;
+               list_add(&p_archipelago->list_entry,
+                        &p_ooo_info->archipelagos_list);
+       } else if (!p_archipelago) {
+               DP_NOTICE(p_hwfn, "No more free OOO connections\n");
+               list_add(&p_isle->list_entry,
+                        &p_ooo_info->free_isles_list);
+               list_add(&p_buffer->list_entry,
+                        &p_ooo_info->free_buffers_list);
+               return;
+       }
+
+       list_add(&p_buffer->list_entry, &p_isle->buffers_list);
+       p_ooo_info->cur_isles_number++;
+       p_ooo_info->gen_isles_number++;
+
+       if (p_ooo_info->cur_isles_number > p_ooo_info->max_isles_number)
+               p_ooo_info->max_isles_number = p_ooo_info->cur_isles_number;
+
+       if (!p_prev_isle)
+               list_add(&p_isle->list_entry, &p_archipelago->isles_list);
+       else
+               list_add(&p_isle->list_entry, &p_prev_isle->list_entry);
+}
+
+void qed_ooo_add_new_buffer(struct qed_hwfn *p_hwfn,
+                           struct qed_ooo_info *p_ooo_info,
+                           u32 cid,
+                           u8 ooo_isle,
+                           struct qed_ooo_buffer *p_buffer, u8 buffer_side)
+{
+       struct qed_ooo_isle *p_isle = NULL;
+
+       p_isle = qed_ooo_seek_isle(p_hwfn, p_ooo_info, cid, ooo_isle);
+       if (!p_isle) {
+               DP_NOTICE(p_hwfn,
+                         "Isle %d is not found(cid %d)\n", ooo_isle, cid);
+               return;
+       }
+
+       if (buffer_side == QED_OOO_LEFT_BUF)
+               list_add(&p_buffer->list_entry, &p_isle->buffers_list);
+       else
+               list_add_tail(&p_buffer->list_entry, &p_isle->buffers_list);
+}
+
+void qed_ooo_join_isles(struct qed_hwfn *p_hwfn,
+                       struct qed_ooo_info *p_ooo_info, u32 cid, u8 left_isle)
+{
+       struct qed_ooo_archipelago *p_archipelago = NULL;
+       struct qed_ooo_isle *p_right_isle = NULL;
+       struct qed_ooo_isle *p_left_isle = NULL;
+
+       p_right_isle = qed_ooo_seek_isle(p_hwfn, p_ooo_info, cid,
+                                        left_isle + 1);
+       if (!p_right_isle) {
+               DP_NOTICE(p_hwfn,
+                         "Right isle %d is not found(cid %d)\n",
+                         left_isle + 1, cid);
+               return;
+       }
+
+       p_archipelago = qed_ooo_seek_archipelago(p_hwfn, p_ooo_info, cid);
+       list_del(&p_right_isle->list_entry);
+       p_ooo_info->cur_isles_number--;
+       if (left_isle) {
+               p_left_isle = qed_ooo_seek_isle(p_hwfn, p_ooo_info, cid,
+                                               left_isle);
+               if (!p_left_isle) {
+                       DP_NOTICE(p_hwfn,
+                                 "Left isle %d is not found(cid %d)\n",
+                                 left_isle, cid);
+                       return;
+               }
+               list_splice_tail_init(&p_right_isle->buffers_list,
+                                     &p_left_isle->buffers_list);
+       } else {
+               list_splice_tail_init(&p_right_isle->buffers_list,
+                                     &p_ooo_info->ready_buffers_list);
+               if (list_empty(&p_archipelago->isles_list)) {
+                       list_del(&p_archipelago->list_entry);
+                       list_add(&p_archipelago->list_entry,
+                                &p_ooo_info->free_archipelagos_list);
+               }
+       }
+       list_add_tail(&p_right_isle->list_entry, &p_ooo_info->free_isles_list);
+}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_ooo.h b/drivers/net/ethernet/qlogic/qed/qed_ooo.h
new file mode 100644 (file)
index 0000000..7a0670a
--- /dev/null
@@ -0,0 +1,176 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015 QLogic Corporation
+ *
+ * This software is available under the terms of the GNU General Public License
+ * (GPL) Version 2, available from the file COPYING in the main directory of
+ * this source tree.
+ */
+
+#ifndef _QED_OOO_H
+#define _QED_OOO_H
+#include <linux/types.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include "qed.h"
+
+#define QED_MAX_NUM_ISLES      256
+#define QED_MAX_NUM_OOO_HISTORY_ENTRIES        512
+
+#define QED_OOO_LEFT_BUF       0
+#define QED_OOO_RIGHT_BUF      1
+
+struct qed_ooo_buffer {
+       struct list_head list_entry;
+       void *rx_buffer_virt_addr;
+       dma_addr_t rx_buffer_phys_addr;
+       u32 rx_buffer_size;
+       u16 packet_length;
+       u16 parse_flags;
+       u16 vlan;
+       u8 placement_offset;
+};
+
+struct qed_ooo_isle {
+       struct list_head list_entry;
+       struct list_head buffers_list;
+};
+
+struct qed_ooo_archipelago {
+       struct list_head list_entry;
+       struct list_head isles_list;
+       u32 cid;
+};
+
+struct qed_ooo_history {
+       struct ooo_opaque *p_cqes;
+       u32 head_idx;
+       u32 num_of_cqes;
+};
+
+struct qed_ooo_info {
+       struct list_head free_buffers_list;
+       struct list_head ready_buffers_list;
+       struct list_head free_isles_list;
+       struct list_head free_archipelagos_list;
+       struct list_head archipelagos_list;
+       struct qed_ooo_archipelago *p_archipelagos_mem;
+       struct qed_ooo_isle *p_isles_mem;
+       struct qed_ooo_history ooo_history;
+       u32 cur_isles_number;
+       u32 max_isles_number;
+       u32 gen_isles_number;
+};
+
+#if IS_ENABLED(CONFIG_QED_ISCSI)
+void qed_ooo_save_history_entry(struct qed_hwfn *p_hwfn,
+                               struct qed_ooo_info *p_ooo_info,
+                               struct ooo_opaque *p_cqe);
+
+struct qed_ooo_info *qed_ooo_alloc(struct qed_hwfn *p_hwfn);
+
+void qed_ooo_release_connection_isles(struct qed_hwfn *p_hwfn,
+                                     struct qed_ooo_info *p_ooo_info,
+                                     u32 cid);
+
+void qed_ooo_release_all_isles(struct qed_hwfn *p_hwfn,
+                              struct qed_ooo_info *p_ooo_info);
+
+void qed_ooo_setup(struct qed_hwfn *p_hwfn, struct qed_ooo_info *p_ooo_info);
+
+void qed_ooo_free(struct qed_hwfn *p_hwfn, struct qed_ooo_info *p_ooo_info);
+
+void qed_ooo_put_free_buffer(struct qed_hwfn *p_hwfn,
+                            struct qed_ooo_info *p_ooo_info,
+                            struct qed_ooo_buffer *p_buffer);
+
+struct qed_ooo_buffer *
+qed_ooo_get_free_buffer(struct qed_hwfn *p_hwfn,
+                       struct qed_ooo_info *p_ooo_info);
+
+void qed_ooo_put_ready_buffer(struct qed_hwfn *p_hwfn,
+                             struct qed_ooo_info *p_ooo_info,
+                             struct qed_ooo_buffer *p_buffer, u8 on_tail);
+
+struct qed_ooo_buffer *
+qed_ooo_get_ready_buffer(struct qed_hwfn *p_hwfn,
+                        struct qed_ooo_info *p_ooo_info);
+
+void qed_ooo_delete_isles(struct qed_hwfn *p_hwfn,
+                         struct qed_ooo_info *p_ooo_info,
+                         u32 cid, u8 drop_isle, u8 drop_size);
+
+void qed_ooo_add_new_isle(struct qed_hwfn *p_hwfn,
+                         struct qed_ooo_info *p_ooo_info,
+                         u32 cid,
+                         u8 ooo_isle, struct qed_ooo_buffer *p_buffer);
+
+void qed_ooo_add_new_buffer(struct qed_hwfn *p_hwfn,
+                           struct qed_ooo_info *p_ooo_info,
+                           u32 cid,
+                           u8 ooo_isle,
+                           struct qed_ooo_buffer *p_buffer, u8 buffer_side);
+
+void qed_ooo_join_isles(struct qed_hwfn *p_hwfn,
+                       struct qed_ooo_info *p_ooo_info, u32 cid,
+                       u8 left_isle);
+#else /* IS_ENABLED(CONFIG_QED_ISCSI) */
+static inline void qed_ooo_save_history_entry(struct qed_hwfn *p_hwfn,
+                                             struct qed_ooo_info *p_ooo_info,
+                                             struct ooo_opaque *p_cqe) {}
+
+static inline struct qed_ooo_info *qed_ooo_alloc(
+                               struct qed_hwfn *p_hwfn) { return NULL; }
+
+static inline void
+qed_ooo_release_connection_isles(struct qed_hwfn *p_hwfn,
+                                struct qed_ooo_info *p_ooo_info,
+                                u32 cid) {}
+
+static inline void qed_ooo_release_all_isles(struct qed_hwfn *p_hwfn,
+                                            struct qed_ooo_info *p_ooo_info)
+                                            {}
+
+static inline void qed_ooo_setup(struct qed_hwfn *p_hwfn,
+                                struct qed_ooo_info *p_ooo_info) {}
+
+static inline void qed_ooo_free(struct qed_hwfn *p_hwfn,
+                               struct qed_ooo_info *p_ooo_info) {}
+
+static inline void qed_ooo_put_free_buffer(struct qed_hwfn *p_hwfn,
+                                          struct qed_ooo_info *p_ooo_info,
+                                          struct qed_ooo_buffer *p_buffer) {}
+
+static inline struct qed_ooo_buffer *
+qed_ooo_get_free_buffer(struct qed_hwfn *p_hwfn,
+                       struct qed_ooo_info *p_ooo_info) { return NULL; }
+
+static inline void qed_ooo_put_ready_buffer(struct qed_hwfn *p_hwfn,
+                                           struct qed_ooo_info *p_ooo_info,
+                                           struct qed_ooo_buffer *p_buffer,
+                                           u8 on_tail) {}
+
+static inline struct qed_ooo_buffer *
+qed_ooo_get_ready_buffer(struct qed_hwfn *p_hwfn,
+                        struct qed_ooo_info *p_ooo_info) { return NULL; }
+
+static inline void qed_ooo_delete_isles(struct qed_hwfn *p_hwfn,
+                                       struct qed_ooo_info *p_ooo_info,
+                                       u32 cid, u8 drop_isle, u8 drop_size) {}
+
+static inline void qed_ooo_add_new_isle(struct qed_hwfn *p_hwfn,
+                                       struct qed_ooo_info *p_ooo_info,
+                                       u32 cid, u8 ooo_isle,
+                                       struct qed_ooo_buffer *p_buffer) {}
+
+static inline void qed_ooo_add_new_buffer(struct qed_hwfn *p_hwfn,
+                                         struct qed_ooo_info *p_ooo_info,
+                                         u32 cid, u8 ooo_isle,
+                                         struct qed_ooo_buffer *p_buffer,
+                                         u8 buffer_side) {}
+
+static inline void qed_ooo_join_isles(struct qed_hwfn *p_hwfn,
+                                     struct qed_ooo_info *p_ooo_info, u32 cid,
+                                     u8 left_isle) {}
+#endif /* IS_ENABLED(CONFIG_QED_ISCSI) */
+
+#endif
index b414a05421775fc3448960d9a6ec85f4327d1d11..97544205a8c193540a2e8a40b640cfad4034e68a 100644 (file)
@@ -82,6 +82,8 @@
        0x1c80000UL
 #define BAR0_MAP_REG_XSDM_RAM \
        0x1e00000UL
+#define BAR0_MAP_REG_YSDM_RAM \
+       0x1e80000UL
 #define  NIG_REG_RX_LLH_BRB_GATE_DNTFWD_PERPF \
        0x5011f4UL
 #define  PRS_REG_SEARCH_TCP \
index b11beb559981d908ab427b804a8f4e9cbaa18321..2a16547c89661f1a35b8a575283cc39a5eaf7f83 100644 (file)
@@ -129,17 +129,12 @@ static void qed_bmap_release_id(struct qed_hwfn *p_hwfn,
        }
 }
 
-u32 qed_rdma_get_sb_id(void *p_hwfn, u32 rel_sb_id)
+static u32 qed_rdma_get_sb_id(void *p_hwfn, u32 rel_sb_id)
 {
        /* First sb id for RoCE is after all the l2 sb */
        return FEAT_NUM((struct qed_hwfn *)p_hwfn, QED_PF_L2_QUE) + rel_sb_id;
 }
 
-u32 qed_rdma_query_cau_timer_res(void *rdma_cxt)
-{
-       return QED_CAU_DEF_RX_TIMER_RES;
-}
-
 static int qed_rdma_alloc(struct qed_hwfn *p_hwfn,
                          struct qed_ptt *p_ptt,
                          struct qed_rdma_start_in_params *params)
@@ -162,7 +157,8 @@ static int qed_rdma_alloc(struct qed_hwfn *p_hwfn,
        p_hwfn->p_rdma_info = p_rdma_info;
        p_rdma_info->proto = PROTOCOLID_ROCE;
 
-       num_cons = qed_cxt_get_proto_cid_count(p_hwfn, p_rdma_info->proto, 0);
+       num_cons = qed_cxt_get_proto_cid_count(p_hwfn, p_rdma_info->proto,
+                                              NULL);
 
        p_rdma_info->num_qps = num_cons / 2;
 
@@ -275,7 +271,7 @@ free_rdma_info:
        return rc;
 }
 
-void qed_rdma_resc_free(struct qed_hwfn *p_hwfn)
+static void qed_rdma_resc_free(struct qed_hwfn *p_hwfn)
 {
        struct qed_rdma_info *p_rdma_info = p_hwfn->p_rdma_info;
 
@@ -527,6 +523,26 @@ static int qed_rdma_start_fw(struct qed_hwfn *p_hwfn,
        return qed_spq_post(p_hwfn, p_ent, NULL);
 }
 
+static int qed_rdma_alloc_tid(void *rdma_cxt, u32 *itid)
+{
+       struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt;
+       int rc;
+
+       DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Allocate TID\n");
+
+       spin_lock_bh(&p_hwfn->p_rdma_info->lock);
+       rc = qed_rdma_bmap_alloc_id(p_hwfn,
+                                   &p_hwfn->p_rdma_info->tid_map, itid);
+       spin_unlock_bh(&p_hwfn->p_rdma_info->lock);
+       if (rc)
+               goto out;
+
+       rc = qed_cxt_dynamic_ilt_alloc(p_hwfn, QED_ELEM_TASK, *itid);
+out:
+       DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Allocate TID - done, rc = %d\n", rc);
+       return rc;
+}
+
 static int qed_rdma_reserve_lkey(struct qed_hwfn *p_hwfn)
 {
        struct qed_rdma_device *dev = p_hwfn->p_rdma_info->dev;
@@ -573,7 +589,7 @@ static int qed_rdma_setup(struct qed_hwfn *p_hwfn,
        return qed_rdma_start_fw(p_hwfn, params, p_ptt);
 }
 
-int qed_rdma_stop(void *rdma_cxt)
+static int qed_rdma_stop(void *rdma_cxt)
 {
        struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt;
        struct rdma_close_func_ramrod_data *p_ramrod;
@@ -629,8 +645,8 @@ out:
        return rc;
 }
 
-int qed_rdma_add_user(void *rdma_cxt,
-                     struct qed_rdma_add_user_out_params *out_params)
+static int qed_rdma_add_user(void *rdma_cxt,
+                            struct qed_rdma_add_user_out_params *out_params)
 {
        struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt;
        u32 dpi_start_offset;
@@ -664,7 +680,7 @@ int qed_rdma_add_user(void *rdma_cxt,
        return rc;
 }
 
-struct qed_rdma_port *qed_rdma_query_port(void *rdma_cxt)
+static struct qed_rdma_port *qed_rdma_query_port(void *rdma_cxt)
 {
        struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt;
        struct qed_rdma_port *p_port = p_hwfn->p_rdma_info->port;
@@ -680,7 +696,7 @@ struct qed_rdma_port *qed_rdma_query_port(void *rdma_cxt)
        return p_port;
 }
 
-struct qed_rdma_device *qed_rdma_query_device(void *rdma_cxt)
+static struct qed_rdma_device *qed_rdma_query_device(void *rdma_cxt)
 {
        struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt;
 
@@ -690,7 +706,7 @@ struct qed_rdma_device *qed_rdma_query_device(void *rdma_cxt)
        return p_hwfn->p_rdma_info->dev;
 }
 
-void qed_rdma_free_tid(void *rdma_cxt, u32 itid)
+static void qed_rdma_free_tid(void *rdma_cxt, u32 itid)
 {
        struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt;
 
@@ -701,27 +717,7 @@ void qed_rdma_free_tid(void *rdma_cxt, u32 itid)
        spin_unlock_bh(&p_hwfn->p_rdma_info->lock);
 }
 
-int qed_rdma_alloc_tid(void *rdma_cxt, u32 *itid)
-{
-       struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt;
-       int rc;
-
-       DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Allocate TID\n");
-
-       spin_lock_bh(&p_hwfn->p_rdma_info->lock);
-       rc = qed_rdma_bmap_alloc_id(p_hwfn,
-                                   &p_hwfn->p_rdma_info->tid_map, itid);
-       spin_unlock_bh(&p_hwfn->p_rdma_info->lock);
-       if (rc)
-               goto out;
-
-       rc = qed_cxt_dynamic_ilt_alloc(p_hwfn, QED_ELEM_TASK, *itid);
-out:
-       DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Allocate TID - done, rc = %d\n", rc);
-       return rc;
-}
-
-void qed_rdma_cnq_prod_update(void *rdma_cxt, u8 qz_offset, u16 prod)
+static void qed_rdma_cnq_prod_update(void *rdma_cxt, u8 qz_offset, u16 prod)
 {
        struct qed_hwfn *p_hwfn;
        u16 qz_num;
@@ -816,7 +812,7 @@ static int qed_rdma_get_int(struct qed_dev *cdev, struct qed_int_info *info)
        return 0;
 }
 
-int qed_rdma_alloc_pd(void *rdma_cxt, u16 *pd)
+static int qed_rdma_alloc_pd(void *rdma_cxt, u16 *pd)
 {
        struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt;
        u32 returned_id;
@@ -836,7 +832,7 @@ int qed_rdma_alloc_pd(void *rdma_cxt, u16 *pd)
        return rc;
 }
 
-void qed_rdma_free_pd(void *rdma_cxt, u16 pd)
+static void qed_rdma_free_pd(void *rdma_cxt, u16 pd)
 {
        struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt;
 
@@ -873,8 +869,9 @@ qed_rdma_toggle_bit_create_resize_cq(struct qed_hwfn *p_hwfn, u16 icid)
        return toggle_bit;
 }
 
-int qed_rdma_create_cq(void *rdma_cxt,
-                      struct qed_rdma_create_cq_in_params *params, u16 *icid)
+static int qed_rdma_create_cq(void *rdma_cxt,
+                             struct qed_rdma_create_cq_in_params *params,
+                             u16 *icid)
 {
        struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt;
        struct qed_rdma_info *p_info = p_hwfn->p_rdma_info;
@@ -957,98 +954,10 @@ err:
        return rc;
 }
 
-int qed_rdma_resize_cq(void *rdma_cxt,
-                      struct qed_rdma_resize_cq_in_params *in_params,
-                      struct qed_rdma_resize_cq_out_params *out_params)
-{
-       struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt;
-       struct rdma_resize_cq_output_params *p_ramrod_res;
-       struct rdma_resize_cq_ramrod_data *p_ramrod;
-       enum qed_rdma_toggle_bit toggle_bit;
-       struct qed_sp_init_data init_data;
-       struct qed_spq_entry *p_ent;
-       dma_addr_t ramrod_res_phys;
-       u8 fw_return_code;
-       int rc = -ENOMEM;
-
-       DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", in_params->icid);
-
-       p_ramrod_res =
-           (struct rdma_resize_cq_output_params *)
-           dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
-                              sizeof(struct rdma_resize_cq_output_params),
-                              &ramrod_res_phys, GFP_KERNEL);
-       if (!p_ramrod_res) {
-               DP_NOTICE(p_hwfn,
-                         "qed resize cq failed: cannot allocate memory (ramrod)\n");
-               return rc;
-       }
-
-       /* Get SPQ entry */
-       memset(&init_data, 0, sizeof(init_data));
-       init_data.cid = in_params->icid;
-       init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
-       init_data.comp_mode = QED_SPQ_MODE_EBLOCK;
-
-       rc = qed_sp_init_request(p_hwfn, &p_ent,
-                                RDMA_RAMROD_RESIZE_CQ,
-                                p_hwfn->p_rdma_info->proto, &init_data);
-       if (rc)
-               goto err;
-
-       p_ramrod = &p_ent->ramrod.rdma_resize_cq;
-
-       p_ramrod->flags = 0;
-
-       /* toggle the bit for every resize or create cq for a given icid */
-       toggle_bit = qed_rdma_toggle_bit_create_resize_cq(p_hwfn,
-                                                         in_params->icid);
-
-       SET_FIELD(p_ramrod->flags,
-                 RDMA_RESIZE_CQ_RAMROD_DATA_TOGGLE_BIT, toggle_bit);
-
-       SET_FIELD(p_ramrod->flags,
-                 RDMA_RESIZE_CQ_RAMROD_DATA_IS_TWO_LEVEL_PBL,
-                 in_params->pbl_two_level);
-
-       p_ramrod->pbl_log_page_size = in_params->pbl_page_size_log - 12;
-       p_ramrod->pbl_num_pages = cpu_to_le16(in_params->pbl_num_pages);
-       p_ramrod->max_cqes = cpu_to_le32(in_params->cq_size);
-       DMA_REGPAIR_LE(p_ramrod->pbl_addr, in_params->pbl_ptr);
-       DMA_REGPAIR_LE(p_ramrod->output_params_addr, ramrod_res_phys);
-
-       rc = qed_spq_post(p_hwfn, p_ent, &fw_return_code);
-       if (rc)
-               goto err;
-
-       if (fw_return_code != RDMA_RETURN_OK) {
-               DP_NOTICE(p_hwfn, "fw_return_code = %d\n", fw_return_code);
-               rc = -EINVAL;
-               goto err;
-       }
-
-       out_params->prod = le32_to_cpu(p_ramrod_res->old_cq_prod);
-       out_params->cons = le32_to_cpu(p_ramrod_res->old_cq_cons);
-
-       dma_free_coherent(&p_hwfn->cdev->pdev->dev,
-                         sizeof(struct rdma_resize_cq_output_params),
-                         p_ramrod_res, ramrod_res_phys);
-
-       DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Resized CQ, rc = %d\n", rc);
-
-       return rc;
-
-err:   dma_free_coherent(&p_hwfn->cdev->pdev->dev,
-                         sizeof(struct rdma_resize_cq_output_params),
-                         p_ramrod_res, ramrod_res_phys);
-       DP_NOTICE(p_hwfn, "Resized CQ, Failed - rc = %d\n", rc);
-
-       return rc;
-}
-
-int qed_rdma_destroy_cq(void *rdma_cxt,
-                       struct qed_rdma_destroy_cq_in_params *in_params,
-                       struct qed_rdma_destroy_cq_out_params *out_params)
+static int
+qed_rdma_destroy_cq(void *rdma_cxt,
+                   struct qed_rdma_destroy_cq_in_params *in_params,
+                   struct qed_rdma_destroy_cq_out_params *out_params)
 {
        struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt;
        struct rdma_destroy_cq_output_params *p_ramrod_res;
@@ -1169,7 +1078,7 @@ static enum roce_flavor qed_roce_mode_to_flavor(enum roce_mode roce_mode)
        return flavor;
 }
 
-int qed_roce_alloc_cid(struct qed_hwfn *p_hwfn, u16 *cid)
+static int qed_roce_alloc_cid(struct qed_hwfn *p_hwfn, u16 *cid)
 {
        struct qed_rdma_info *p_rdma_info = p_hwfn->p_rdma_info;
        u32 responder_icid;
@@ -1793,9 +1702,9 @@ err:
        return rc;
 }
 
-int qed_roce_query_qp(struct qed_hwfn *p_hwfn,
-                     struct qed_rdma_qp *qp,
-                     struct qed_rdma_query_qp_out_params *out_params)
+static int qed_roce_query_qp(struct qed_hwfn *p_hwfn,
+                            struct qed_rdma_qp *qp,
+                            struct qed_rdma_query_qp_out_params *out_params)
 {
        struct roce_query_qp_resp_output_params *p_resp_ramrod_res;
        struct roce_query_qp_req_output_params *p_req_ramrod_res;
@@ -1936,7 +1845,7 @@ err_resp:
        return rc;
 }
 
-int qed_roce_destroy_qp(struct qed_hwfn *p_hwfn, struct qed_rdma_qp *qp)
+static int qed_roce_destroy_qp(struct qed_hwfn *p_hwfn, struct qed_rdma_qp *qp)
 {
        u32 num_invalidated_mw = 0;
        u32 num_bound_mw = 0;
@@ -1985,9 +1894,9 @@ int qed_roce_destroy_qp(struct qed_hwfn *p_hwfn, struct qed_rdma_qp *qp)
        return 0;
 }
 
-int qed_rdma_query_qp(void *rdma_cxt,
-                     struct qed_rdma_qp *qp,
-                     struct qed_rdma_query_qp_out_params *out_params)
+static int qed_rdma_query_qp(void *rdma_cxt,
+                            struct qed_rdma_qp *qp,
+                            struct qed_rdma_query_qp_out_params *out_params)
 {
        struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt;
        int rc;
@@ -2022,7 +1931,7 @@ int qed_rdma_query_qp(void *rdma_cxt,
        return rc;
 }
 
-int qed_rdma_destroy_qp(void *rdma_cxt, struct qed_rdma_qp *qp)
+static int qed_rdma_destroy_qp(void *rdma_cxt, struct qed_rdma_qp *qp)
 {
        struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt;
        int rc = 0;
@@ -2038,7 +1947,7 @@ int qed_rdma_destroy_qp(void *rdma_cxt, struct qed_rdma_qp *qp)
        return rc;
 }
 
-struct qed_rdma_qp *
+static struct qed_rdma_qp *
 qed_rdma_create_qp(void *rdma_cxt,
                   struct qed_rdma_create_qp_in_params *in_params,
                   struct qed_rdma_create_qp_out_params *out_params)
@@ -2215,9 +2124,9 @@ static int qed_roce_modify_qp(struct qed_hwfn *p_hwfn,
        return rc;
 }
 
-int qed_rdma_modify_qp(void *rdma_cxt,
-                      struct qed_rdma_qp *qp,
-                      struct qed_rdma_modify_qp_in_params *params)
+static int qed_rdma_modify_qp(void *rdma_cxt,
+                             struct qed_rdma_qp *qp,
+                             struct qed_rdma_modify_qp_in_params *params)
 {
        struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt;
        enum qed_roce_qp_state prev_state;
@@ -2312,8 +2221,9 @@ int qed_rdma_modify_qp(void *rdma_cxt,
        return rc;
 }
 
-int qed_rdma_register_tid(void *rdma_cxt,
-                         struct qed_rdma_register_tid_in_params *params)
+static int
+qed_rdma_register_tid(void *rdma_cxt,
+                     struct qed_rdma_register_tid_in_params *params)
 {
        struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt;
        struct rdma_register_tid_ramrod_data *p_ramrod;
@@ -2450,7 +2360,7 @@ int qed_rdma_register_tid(void *rdma_cxt,
        return rc;
 }
 
-int qed_rdma_deregister_tid(void *rdma_cxt, u32 itid)
+static int qed_rdma_deregister_tid(void *rdma_cxt, u32 itid)
 {
        struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt;
        struct rdma_deregister_tid_ramrod_data *p_ramrod;
@@ -2561,7 +2471,8 @@ void qed_rdma_dpm_bar(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
        qed_rdma_dpm_conf(p_hwfn, p_ptt);
 }
 
-int qed_rdma_start(void *rdma_cxt, struct qed_rdma_start_in_params *params)
+static int qed_rdma_start(void *rdma_cxt,
+                         struct qed_rdma_start_in_params *params)
 {
        struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt;
        struct qed_ptt *p_ptt;
@@ -2601,7 +2512,7 @@ static int qed_rdma_init(struct qed_dev *cdev,
        return qed_rdma_start(QED_LEADING_HWFN(cdev), params);
 }
 
-void qed_rdma_remove_user(void *rdma_cxt, u16 dpi)
+static void qed_rdma_remove_user(void *rdma_cxt, u16 dpi)
 {
        struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt;
 
@@ -2808,11 +2719,6 @@ static int qed_roce_ll2_stop(struct qed_dev *cdev)
        struct qed_roce_ll2_info *roce_ll2 = hwfn->ll2;
        int rc;
 
-       if (!cdev) {
-               DP_ERR(cdev, "qed roce ll2 stop: invalid cdev\n");
-               return -EINVAL;
-       }
-
        if (roce_ll2->handle == QED_LL2_UNUSED_HANDLE) {
                DP_ERR(cdev, "qed roce ll2 stop: cannot stop an unused LL2\n");
                return -EINVAL;
@@ -2849,7 +2755,7 @@ static int qed_roce_ll2_tx(struct qed_dev *cdev,
        int rc;
        int i;
 
-       if (!cdev || !pkt || !params) {
+       if (!pkt || !params) {
                DP_ERR(cdev,
                       "roce ll2 tx: failed tx because one of the following is NULL - drv=%p, pkt=%p, params=%p\n",
                       cdev, pkt, params);
@@ -2865,6 +2771,7 @@ static int qed_roce_ll2_tx(struct qed_dev *cdev,
        /* Tx header */
        rc = qed_ll2_prepare_tx_packet(QED_LEADING_HWFN(cdev), roce_ll2->handle,
                                       1 + pkt->n_seg, 0, flags, 0,
+                                      QED_LL2_TX_DEST_NW,
                                       qed_roce_flavor, pkt->header.baddr,
                                       pkt->header.len, pkt, 1);
        if (rc) {
index 2f091e8a0f40b7bb266e24ab183da88a5135c56e..279f342af8db1c91272fa89bf80fbde565f5e135 100644 (file)
@@ -95,26 +95,6 @@ struct qed_rdma_info {
        enum protocol_type proto;
 };
 
-struct qed_rdma_resize_cq_in_params {
-       u16 icid;
-       u32 cq_size;
-       bool pbl_two_level;
-       u64 pbl_ptr;
-       u16 pbl_num_pages;
-       u8 pbl_page_size_log;
-};
-
-struct qed_rdma_resize_cq_out_params {
-       u32 prod;
-       u32 cons;
-};
-
-struct qed_rdma_resize_cnq_in_params {
-       u32 cnq_id;
-       u32 pbl_page_size_log;
-       u64 pbl_ptr;
-};
-
 struct qed_rdma_qp {
        struct regpair qp_handle;
        struct regpair qp_handle_async;
@@ -181,36 +161,55 @@ struct qed_rdma_qp {
        dma_addr_t shared_queue_phys_addr;
 };
 
-int
-qed_rdma_add_user(void *rdma_cxt,
-                 struct qed_rdma_add_user_out_params *out_params);
-int qed_rdma_alloc_pd(void *rdma_cxt, u16 *pd);
-int qed_rdma_alloc_tid(void *rdma_cxt, u32 *tid);
-int qed_rdma_deregister_tid(void *rdma_cxt, u32 tid);
-void qed_rdma_free_tid(void *rdma_cxt, u32 tid);
-struct qed_rdma_device *qed_rdma_query_device(void *rdma_cxt);
-struct qed_rdma_port *qed_rdma_query_port(void *rdma_cxt);
-int
-qed_rdma_register_tid(void *rdma_cxt,
-                     struct qed_rdma_register_tid_in_params *params);
-void qed_rdma_remove_user(void *rdma_cxt, u16 dpi);
-int qed_rdma_start(void *p_hwfn, struct qed_rdma_start_in_params *params);
-int qed_rdma_stop(void *rdma_cxt);
-u32 qed_rdma_get_sb_id(void *p_hwfn, u32 rel_sb_id);
-u32 qed_rdma_query_cau_timer_res(void *p_hwfn);
-void qed_rdma_cnq_prod_update(void *rdma_cxt, u8 cnq_index, u16 prod);
-void qed_rdma_resc_free(struct qed_hwfn *p_hwfn);
+#if IS_ENABLED(CONFIG_QED_RDMA)
+void qed_rdma_dpm_bar(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
 void qed_async_roce_event(struct qed_hwfn *p_hwfn,
                          struct event_ring_entry *p_eqe);
-int qed_rdma_destroy_qp(void *rdma_cxt, struct qed_rdma_qp *qp);
-int qed_rdma_modify_qp(void *rdma_cxt, struct qed_rdma_qp *qp,
-                      struct qed_rdma_modify_qp_in_params *params);
-int qed_rdma_query_qp(void *rdma_cxt, struct qed_rdma_qp *qp,
-                     struct qed_rdma_query_qp_out_params *out_params);
-
-#if IS_ENABLED(CONFIG_INFINIBAND_QEDR)
-void qed_rdma_dpm_bar(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
+void qed_ll2b_complete_tx_gsi_packet(struct qed_hwfn *p_hwfn,
+                                    u8 connection_handle,
+                                    void *cookie,
+                                    dma_addr_t first_frag_addr,
+                                    bool b_last_fragment, bool b_last_packet);
+void qed_ll2b_release_tx_gsi_packet(struct qed_hwfn *p_hwfn,
+                                   u8 connection_handle,
+                                   void *cookie,
+                                   dma_addr_t first_frag_addr,
+                                   bool b_last_fragment, bool b_last_packet);
+void qed_ll2b_complete_rx_gsi_packet(struct qed_hwfn *p_hwfn,
+                                    u8 connection_handle,
+                                    void *cookie,
+                                    dma_addr_t rx_buf_addr,
+                                    u16 data_length,
+                                    u8 data_length_error,
+                                    u16 parse_flags,
+                                    u16 vlan,
+                                    u32 src_mac_addr_hi,
+                                    u16 src_mac_addr_lo, bool b_last_packet);
 #else
-void qed_rdma_dpm_bar(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) {}
+static inline void qed_rdma_dpm_bar(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) {}
+static inline void qed_async_roce_event(struct qed_hwfn *p_hwfn, struct event_ring_entry *p_eqe) {}
+static inline void qed_ll2b_complete_tx_gsi_packet(struct qed_hwfn *p_hwfn,
+                                                  u8 connection_handle,
+                                                  void *cookie,
+                                                  dma_addr_t first_frag_addr,
+                                                  bool b_last_fragment,
+                                                  bool b_last_packet) {}
+static inline void qed_ll2b_release_tx_gsi_packet(struct qed_hwfn *p_hwfn,
+                                                 u8 connection_handle,
+                                                 void *cookie,
+                                                 dma_addr_t first_frag_addr,
+                                                 bool b_last_fragment,
+                                                 bool b_last_packet) {}
+static inline void qed_ll2b_complete_rx_gsi_packet(struct qed_hwfn *p_hwfn,
+                                                  u8 connection_handle,
+                                                  void *cookie,
+                                                  dma_addr_t rx_buf_addr,
+                                                  u16 data_length,
+                                                  u8 data_length_error,
+                                                  u16 parse_flags,
+                                                  u16 vlan,
+                                                  u32 src_mac_addr_hi,
+                                                  u16 src_mac_addr_lo,
+                                                  bool b_last_packet) {}
 #endif
 #endif
index 9b7678f26909a5c014cf86d23be87ed77c29418f..48bfaecaf6dca30fc3113c92d739dfd8187f3ca8 100644 (file)
@@ -1,3 +1,4 @@
+#include <linux/crc32.h>
 #include "qed.h"
 #include "qed_dev_api.h"
 #include "qed_mcp.h"
@@ -75,3 +76,103 @@ int qed_selftest_clock(struct qed_dev *cdev)
 
        return rc;
 }
+
+int qed_selftest_nvram(struct qed_dev *cdev)
+{
+       struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
+       struct qed_ptt *p_ptt = qed_ptt_acquire(p_hwfn);
+       u32 num_images, i, j, nvm_crc, calc_crc;
+       struct bist_nvm_image_att image_att;
+       u8 *buf = NULL;
+       __be32 val;
+       int rc;
+
+       if (!p_ptt) {
+               DP_ERR(p_hwfn, "failed to acquire ptt\n");
+               return -EBUSY;
+       }
+
+       /* Acquire from MFW the amount of available images */
+       rc = qed_mcp_bist_nvm_test_get_num_images(p_hwfn, p_ptt, &num_images);
+       if (rc || !num_images) {
+               DP_ERR(p_hwfn, "Failed getting number of images\n");
+               return -EINVAL;
+       }
+
+       /* Iterate over images and validate CRC */
+       for (i = 0; i < num_images; i++) {
+               /* This mailbox returns information about the image required for
+                * reading it.
+                */
+               rc = qed_mcp_bist_nvm_test_get_image_att(p_hwfn, p_ptt,
+                                                        &image_att, i);
+               if (rc) {
+                       DP_ERR(p_hwfn,
+                              "Failed getting image index %d attributes\n",
+                              i);
+                       goto err0;
+               }
+
+               /* After MFW crash dump is collected - the image's CRC stops
+                * being valid.
+                */
+               if (image_att.image_type == NVM_TYPE_MDUMP)
+                       continue;
+
+               DP_VERBOSE(p_hwfn, QED_MSG_SP, "image index %d, size %x\n",
+                          i, image_att.len);
+
+               /* Allocate a buffer for holding the nvram image */
+               buf = kzalloc(image_att.len, GFP_KERNEL);
+               if (!buf) {
+                       rc = -ENOMEM;
+                       goto err0;
+               }
+
+               /* Read image into buffer */
+               rc = qed_mcp_nvm_read(p_hwfn->cdev, image_att.nvm_start_addr,
+                                     buf, image_att.len);
+               if (rc) {
+                       DP_ERR(p_hwfn,
+                              "Failed reading image index %d from nvm.\n", i);
+                       goto err1;
+               }
+
+               /* Convert the buffer into big-endian format (excluding the
+                * closing 4 bytes of CRC).
+                */
+               for (j = 0; j < image_att.len - 4; j += 4) {
+                       val = cpu_to_be32(*(u32 *)&buf[j]);
+                       *(u32 *)&buf[j] = (__force u32)val;
+               }
+
+               /* Calc CRC for the "actual" image buffer, i.e. not including
+                * the last 4 CRC bytes.
+                */
+               nvm_crc = *(u32 *)(buf + image_att.len - 4);
+               calc_crc = crc32(0xffffffff, buf, image_att.len - 4);
+               calc_crc = (__force u32)~cpu_to_be32(calc_crc);
+               DP_VERBOSE(p_hwfn, QED_MSG_SP,
+                          "nvm crc 0x%x, calc_crc 0x%x\n", nvm_crc, calc_crc);
+
+               if (calc_crc != nvm_crc) {
+                       rc = -EINVAL;
+                       goto err1;
+               }
+
+               /* Done with this image; Free to prevent double release
+                * on subsequent failure.
+                */
+               kfree(buf);
+               buf = NULL;
+       }
+
+       qed_ptt_release(p_hwfn, p_ptt);
+       return 0;
+
+err1:
+       kfree(buf);
+err0:
+       qed_ptt_release(p_hwfn, p_ptt);
+       return rc;
+}
index 50eb0b49950f69471a388b6568197653d1d73a40..739ddb73096794e5ede557723aefe19f72646491 100644 (file)
@@ -37,4 +37,14 @@ int qed_selftest_register(struct qed_dev *cdev);
  * @return int
  */
 int qed_selftest_clock(struct qed_dev *cdev);
+
+/**
+ * @brief qed_selftest_nvram - Perform nvram test
+ *
+ * @param cdev
+ *
+ * @return int
+ */
+int qed_selftest_nvram(struct qed_dev *cdev);
+
 #endif
index 27c450fd219327e5b291eead936dc3e87d9c9286..9c897bc68d05545a88cfddf582ef19616042ce89 100644 (file)
@@ -80,7 +80,6 @@ union ramrod_data {
        struct roce_destroy_qp_resp_ramrod_data roce_destroy_qp_resp;
        struct roce_destroy_qp_req_ramrod_data roce_destroy_qp_req;
        struct rdma_create_cq_ramrod_data rdma_create_cq;
-       struct rdma_resize_cq_ramrod_data rdma_resize_cq;
        struct rdma_destroy_cq_ramrod_data rdma_destroy_cq;
        struct rdma_srq_create_ramrod_data rdma_create_srq;
        struct rdma_srq_destroy_ramrod_data rdma_destroy_srq;
index 2888eb0628f815446609aa035adf655d3d143720..d0a58282f2a8d3c93747fc892d6e011513d3e420 100644 (file)
@@ -347,11 +347,11 @@ int qed_sp_pf_start(struct qed_hwfn *p_hwfn,
 
        /* Place EQ address in RAMROD */
        DMA_REGPAIR_LE(p_ramrod->event_ring_pbl_addr,
-                      p_hwfn->p_eq->chain.pbl.p_phys_table);
+                      p_hwfn->p_eq->chain.pbl_sp.p_phys_table);
        page_cnt = (u8)qed_chain_get_page_cnt(&p_hwfn->p_eq->chain);
        p_ramrod->event_ring_num_pages = page_cnt;
        DMA_REGPAIR_LE(p_ramrod->consolid_q_pbl_addr,
-                      p_hwfn->p_consq->chain.pbl.p_phys_table);
+                      p_hwfn->p_consq->chain.pbl_sp.p_phys_table);
 
        qed_tunn_set_pf_start_params(p_hwfn, p_tunn, &p_ramrod->tunnel_config);
 
index 6c05402ea4dca24efdaad579fb78938935f17680..f022469bdcf82bdfce2a9b21fe6566daad384949 100644 (file)
 #include "qed_hsi.h"
 #include "qed_hw.h"
 #include "qed_int.h"
+#include "qed_iscsi.h"
 #include "qed_mcp.h"
+#include "qed_ooo.h"
 #include "qed_reg_addr.h"
 #include "qed_sp.h"
 #include "qed_sriov.h"
-#if IS_ENABLED(CONFIG_INFINIBAND_QEDR)
 #include "qed_roce.h"
-#endif
 
 /***************************************************************************
 * Structures & Definitions
@@ -272,15 +272,35 @@ qed_async_event_completion(struct qed_hwfn *p_hwfn,
                           struct event_ring_entry *p_eqe)
 {
        switch (p_eqe->protocol_id) {
-#if IS_ENABLED(CONFIG_INFINIBAND_QEDR)
        case PROTOCOLID_ROCE:
                qed_async_roce_event(p_hwfn, p_eqe);
                return 0;
-#endif
        case PROTOCOLID_COMMON:
                return qed_sriov_eqe_event(p_hwfn,
                                           p_eqe->opcode,
                                           p_eqe->echo, &p_eqe->data);
+       case PROTOCOLID_ISCSI:
+               if (!IS_ENABLED(CONFIG_QED_ISCSI))
+                       return -EINVAL;
+               if (p_eqe->opcode == ISCSI_EVENT_TYPE_ASYN_DELETE_OOO_ISLES) {
+                       u32 cid = le32_to_cpu(p_eqe->data.iscsi_info.cid);
+
+                       qed_ooo_release_connection_isles(p_hwfn,
+                                                        p_hwfn->p_ooo_info,
+                                                        cid);
+                       return 0;
+               }
+
+               if (p_hwfn->p_iscsi_info->event_cb) {
+                       struct qed_iscsi_info *p_iscsi = p_hwfn->p_iscsi_info;
+
+                       return p_iscsi->event_cb(p_iscsi->event_context,
+                                                p_eqe->opcode, &p_eqe->data);
+               } else {
+                       DP_NOTICE(p_hwfn,
+                                 "iSCSI async completion is not set\n");
+                       return -EINVAL;
+               }
        default:
                DP_NOTICE(p_hwfn,
                          "Unknown Async completion for protocol: %d\n",
index 6f029f91e4dee76494c3bfd0a6170b591a91654a..85b09dd1787acaa78ca72c1013eb466c6e1f70b7 100644 (file)
@@ -808,37 +808,70 @@ static void qed_iov_free_vf_igu_sbs(struct qed_hwfn *p_hwfn,
 
 static int qed_iov_init_hw_for_vf(struct qed_hwfn *p_hwfn,
                                  struct qed_ptt *p_ptt,
-                                 u16 rel_vf_id, u16 num_rx_queues)
+                                 struct qed_iov_vf_init_params *p_params)
 {
        u8 num_of_vf_avaiable_chains = 0;
        struct qed_vf_info *vf = NULL;
+       u16 qid, num_irqs;
        int rc = 0;
        u32 cids;
        u8 i;
 
-       vf = qed_iov_get_vf_info(p_hwfn, rel_vf_id, false);
+       vf = qed_iov_get_vf_info(p_hwfn, p_params->rel_vf_id, false);
        if (!vf) {
                DP_ERR(p_hwfn, "qed_iov_init_hw_for_vf : vf is NULL\n");
                return -EINVAL;
        }
 
        if (vf->b_init) {
-               DP_NOTICE(p_hwfn, "VF[%d] is already active.\n", rel_vf_id);
+               DP_NOTICE(p_hwfn, "VF[%d] is already active.\n",
+                         p_params->rel_vf_id);
                return -EINVAL;
        }
 
+       /* Perform sanity checking on the requested queue_id */
+       for (i = 0; i < p_params->num_queues; i++) {
+               u16 min_vf_qzone = FEAT_NUM(p_hwfn, QED_PF_L2_QUE);
+               u16 max_vf_qzone = min_vf_qzone +
+                   FEAT_NUM(p_hwfn, QED_VF_L2_QUE) - 1;
+
+               qid = p_params->req_rx_queue[i];
+               if (qid < min_vf_qzone || qid > max_vf_qzone) {
+                       DP_NOTICE(p_hwfn,
+                                 "Can't enable Rx qid [%04x] for VF[%d]: qids [0x%04x,...,0x%04x] available\n",
+                                 qid,
+                                 p_params->rel_vf_id,
+                                 min_vf_qzone, max_vf_qzone);
+                       return -EINVAL;
+               }
+
+               qid = p_params->req_tx_queue[i];
+               if (qid > max_vf_qzone) {
+                       DP_NOTICE(p_hwfn,
+                                 "Can't enable Tx qid [%04x] for VF[%d]: max qid 0x%04x\n",
+                                 qid, p_params->rel_vf_id, max_vf_qzone);
+                       return -EINVAL;
+               }
+
+               /* If client *really* wants, Tx qid can be shared with PF */
+               if (qid < min_vf_qzone)
+                       DP_VERBOSE(p_hwfn,
+                                  QED_MSG_IOV,
+                                  "VF[%d] is using PF qid [0x%04x] for Txq[0x%02x]\n",
+                                  p_params->rel_vf_id, qid, i);
+       }
+
        /* Limit number of queues according to number of CIDs */
        qed_cxt_get_proto_cid_count(p_hwfn, PROTOCOLID_ETH, &cids);
        DP_VERBOSE(p_hwfn,
                   QED_MSG_IOV,
                   "VF[%d] - requesting to initialize for 0x%04x queues [0x%04x CIDs available]\n",
-                  vf->relative_vf_id, num_rx_queues, (u16) cids);
-       num_rx_queues = min_t(u16, num_rx_queues, ((u16) cids));
+                  vf->relative_vf_id, p_params->num_queues, (u16)cids);
+       num_irqs = min_t(u16, p_params->num_queues, ((u16)cids));
 
        num_of_vf_avaiable_chains = qed_iov_alloc_vf_igu_sbs(p_hwfn,
                                                             p_ptt,
-                                                            vf,
-                                                            num_rx_queues);
+                                                            vf, num_irqs);
        if (!num_of_vf_avaiable_chains) {
                DP_ERR(p_hwfn, "no available igu sbs\n");
                return -ENOMEM;
@@ -849,25 +882,22 @@ static int qed_iov_init_hw_for_vf(struct qed_hwfn *p_hwfn,
        vf->num_txqs = num_of_vf_avaiable_chains;
 
        for (i = 0; i < vf->num_rxqs; i++) {
-               u16 queue_id = qed_int_queue_id_from_sb_id(p_hwfn,
-                                                          vf->igu_sbs[i]);
+               struct qed_vf_q_info *p_queue = &vf->vf_queues[i];
 
-               if (queue_id > RESC_NUM(p_hwfn, QED_L2_QUEUE)) {
-                       DP_NOTICE(p_hwfn,
-                                 "VF[%d] will require utilizing of out-of-bounds queues - %04x\n",
-                                 vf->relative_vf_id, queue_id);
-                       return -EINVAL;
-               }
+               p_queue->fw_rx_qid = p_params->req_rx_queue[i];
+               p_queue->fw_tx_qid = p_params->req_tx_queue[i];
 
                /* CIDs are per-VF, so no problem having them 0-based. */
-               vf->vf_queues[i].fw_rx_qid = queue_id;
-               vf->vf_queues[i].fw_tx_qid = queue_id;
-               vf->vf_queues[i].fw_cid = i;
+               p_queue->fw_cid = i;
 
                DP_VERBOSE(p_hwfn, QED_MSG_IOV,
-                          "VF[%d] - [%d] SB %04x, Tx/Rx queue %04x CID %04x\n",
-                          vf->relative_vf_id, i, vf->igu_sbs[i], queue_id, i);
+                          "VF[%d] - Q[%d] SB %04x, qid [Rx %04x Tx %04x]  CID %04x\n",
+                          vf->relative_vf_id,
+                          i, vf->igu_sbs[i],
+                          p_queue->fw_rx_qid,
+                          p_queue->fw_tx_qid, p_queue->fw_cid);
        }
+
        rc = qed_iov_enable_vf_access(p_hwfn, p_ptt, vf);
        if (!rc) {
                vf->b_init = true;
@@ -1187,8 +1217,19 @@ static void qed_iov_vf_cleanup(struct qed_hwfn *p_hwfn,
 
        p_vf->num_active_rxqs = 0;
 
-       for (i = 0; i < QED_MAX_VF_CHAINS_PER_PF; i++)
-               p_vf->vf_queues[i].rxq_active = 0;
+       for (i = 0; i < QED_MAX_VF_CHAINS_PER_PF; i++) {
+               struct qed_vf_q_info *p_queue = &p_vf->vf_queues[i];
+
+               if (p_queue->p_rx_cid) {
+                       qed_eth_queue_cid_release(p_hwfn, p_queue->p_rx_cid);
+                       p_queue->p_rx_cid = NULL;
+               }
+
+               if (p_queue->p_tx_cid) {
+                       qed_eth_queue_cid_release(p_hwfn, p_queue->p_tx_cid);
+                       p_queue->p_tx_cid = NULL;
+               }
+       }
 
        memset(&p_vf->shadow_config, 0, sizeof(p_vf->shadow_config));
        memset(&p_vf->acquire, 0, sizeof(p_vf->acquire));
@@ -1594,21 +1635,21 @@ static int qed_iov_configure_vport_forced(struct qed_hwfn *p_hwfn,
 
                /* Update all the Rx queues */
                for (i = 0; i < QED_MAX_VF_CHAINS_PER_PF; i++) {
-                       u16 qid;
+                       struct qed_queue_cid *p_cid;
 
-                       if (!p_vf->vf_queues[i].rxq_active)
+                       p_cid = p_vf->vf_queues[i].p_rx_cid;
+                       if (!p_cid)
                                continue;
 
-                       qid = p_vf->vf_queues[i].fw_rx_qid;
-
-                       rc = qed_sp_eth_rx_queues_update(p_hwfn, qid,
+                       rc = qed_sp_eth_rx_queues_update(p_hwfn,
+                                                        (void **)&p_cid,
                                                         1, 0, 1,
                                                         QED_SPQ_MODE_EBLOCK,
                                                         NULL);
                        if (rc) {
                                DP_NOTICE(p_hwfn,
                                          "Failed to send Rx update fo queue[0x%04x]\n",
-                                         qid);
+                                         p_cid->rel.queue_id);
                                return rc;
                        }
                }
@@ -1782,23 +1823,34 @@ static void qed_iov_vf_mbx_start_rxq(struct qed_hwfn *p_hwfn,
        struct qed_queue_start_common_params params;
        struct qed_iov_vf_mbx *mbx = &vf->vf_mbx;
        u8 status = PFVF_STATUS_NO_RESOURCE;
+       struct qed_vf_q_info *p_queue;
        struct vfpf_start_rxq_tlv *req;
        bool b_legacy_vf = false;
        int rc;
 
-       memset(&params, 0, sizeof(params));
        req = &mbx->req_virt->start_rxq;
 
        if (!qed_iov_validate_rxq(p_hwfn, vf, req->rx_qid) ||
            !qed_iov_validate_sb(p_hwfn, vf, req->hw_sb))
                goto out;
 
-       params.queue_id =  vf->vf_queues[req->rx_qid].fw_rx_qid;
-       params.vf_qid = req->rx_qid;
+       /* Acquire a new queue-cid */
+       p_queue = &vf->vf_queues[req->rx_qid];
+
+       memset(&params, 0, sizeof(params));
+       params.queue_id = p_queue->fw_rx_qid;
        params.vport_id = vf->vport_id;
+       params.stats_id = vf->abs_vf_id + 0x10;
        params.sb = req->hw_sb;
        params.sb_idx = req->sb_index;
 
+       p_queue->p_rx_cid = _qed_eth_queue_to_cid(p_hwfn,
+                                                 vf->opaque_fid,
+                                                 p_queue->fw_cid,
+                                                 req->rx_qid, &params);
+       if (!p_queue->p_rx_cid)
+               goto out;
+
        /* Legacy VFs have their Producers in a different location, which they
         * calculate on their own and clean the producer prior to this.
         */
@@ -1811,21 +1863,19 @@ static void qed_iov_vf_mbx_start_rxq(struct qed_hwfn *p_hwfn,
                       MSTORM_ETH_VF_PRODS_OFFSET(vf->abs_vf_id, req->rx_qid),
                       0);
        }
+       p_queue->p_rx_cid->b_legacy_vf = b_legacy_vf;
 
-       rc = qed_sp_eth_rxq_start_ramrod(p_hwfn, vf->opaque_fid,
-                                        vf->vf_queues[req->rx_qid].fw_cid,
-                                        &params,
-                                        vf->abs_vf_id + 0x10,
-                                        req->bd_max_bytes,
-                                        req->rxq_addr,
-                                        req->cqe_pbl_addr, req->cqe_pbl_size,
-                                        b_legacy_vf);
-
+       rc = qed_eth_rxq_start_ramrod(p_hwfn,
+                                     p_queue->p_rx_cid,
+                                     req->bd_max_bytes,
+                                     req->rxq_addr,
+                                     req->cqe_pbl_addr, req->cqe_pbl_size);
        if (rc) {
                status = PFVF_STATUS_FAILURE;
+               qed_eth_queue_cid_release(p_hwfn, p_queue->p_rx_cid);
+               p_queue->p_rx_cid = NULL;
        } else {
                status = PFVF_STATUS_SUCCESS;
-               vf->vf_queues[req->rx_qid].rxq_active = true;
                vf->num_active_rxqs++;
        }
 
@@ -1882,7 +1932,9 @@ static void qed_iov_vf_mbx_start_txq(struct qed_hwfn *p_hwfn,
        u8 status = PFVF_STATUS_NO_RESOURCE;
        union qed_qm_pq_params pq_params;
        struct vfpf_start_txq_tlv *req;
+       struct qed_vf_q_info *p_queue;
        int rc;
+       u16 pq;
 
        /* Prepare the parameters which would choose the right PQ */
        memset(&pq_params, 0, sizeof(pq_params));
@@ -1896,24 +1948,31 @@ static void qed_iov_vf_mbx_start_txq(struct qed_hwfn *p_hwfn,
            !qed_iov_validate_sb(p_hwfn, vf, req->hw_sb))
                goto out;
 
-       params.queue_id =  vf->vf_queues[req->tx_qid].fw_tx_qid;
+       /* Acquire a new queue-cid */
+       p_queue = &vf->vf_queues[req->tx_qid];
+
+       params.queue_id = p_queue->fw_tx_qid;
        params.vport_id = vf->vport_id;
+       params.stats_id = vf->abs_vf_id + 0x10;
        params.sb = req->hw_sb;
        params.sb_idx = req->sb_index;
 
-       rc = qed_sp_eth_txq_start_ramrod(p_hwfn,
-                                        vf->opaque_fid,
-                                        vf->vf_queues[req->tx_qid].fw_cid,
-                                        &params,
-                                        vf->abs_vf_id + 0x10,
-                                        req->pbl_addr,
-                                        req->pbl_size, &pq_params);
+       p_queue->p_tx_cid = _qed_eth_queue_to_cid(p_hwfn,
+                                                 vf->opaque_fid,
+                                                 p_queue->fw_cid,
+                                                 req->tx_qid, &params);
+       if (!p_queue->p_tx_cid)
+               goto out;
 
+       pq = qed_get_qm_pq(p_hwfn, PROTOCOLID_ETH, &pq_params);
+       rc = qed_eth_txq_start_ramrod(p_hwfn, p_queue->p_tx_cid,
+                                     req->pbl_addr, req->pbl_size, pq);
        if (rc) {
                status = PFVF_STATUS_FAILURE;
+               qed_eth_queue_cid_release(p_hwfn, p_queue->p_tx_cid);
+               p_queue->p_tx_cid = NULL;
        } else {
                status = PFVF_STATUS_SUCCESS;
-               vf->vf_queues[req->tx_qid].txq_active = true;
        }
 
 out:
@@ -1924,6 +1983,7 @@ static int qed_iov_vf_stop_rxqs(struct qed_hwfn *p_hwfn,
                                struct qed_vf_info *vf,
                                u16 rxq_id, u8 num_rxqs, bool cqe_completion)
 {
+       struct qed_vf_q_info *p_queue;
        int rc = 0;
        int qid;
 
@@ -1931,16 +1991,18 @@ static int qed_iov_vf_stop_rxqs(struct qed_hwfn *p_hwfn,
                return -EINVAL;
 
        for (qid = rxq_id; qid < rxq_id + num_rxqs; qid++) {
-               if (vf->vf_queues[qid].rxq_active) {
-                       rc = qed_sp_eth_rx_queue_stop(p_hwfn,
-                                                     vf->vf_queues[qid].
-                                                     fw_rx_qid, false,
-                                                     cqe_completion);
+               p_queue = &vf->vf_queues[qid];
 
-                       if (rc)
-                               return rc;
-               }
-               vf->vf_queues[qid].rxq_active = false;
+               if (!p_queue->p_rx_cid)
+                       continue;
+
+               rc = qed_eth_rx_queue_stop(p_hwfn,
+                                          p_queue->p_rx_cid,
+                                          false, cqe_completion);
+               if (rc)
+                       return rc;
+
+               vf->vf_queues[qid].p_rx_cid = NULL;
                vf->num_active_rxqs--;
        }
 
@@ -1951,22 +2013,24 @@ static int qed_iov_vf_stop_txqs(struct qed_hwfn *p_hwfn,
                                struct qed_vf_info *vf, u16 txq_id, u8 num_txqs)
 {
        int rc = 0;
+       struct qed_vf_q_info *p_queue;
        int qid;
 
        if (txq_id + num_txqs > ARRAY_SIZE(vf->vf_queues))
                return -EINVAL;
 
        for (qid = txq_id; qid < txq_id + num_txqs; qid++) {
-               if (vf->vf_queues[qid].txq_active) {
-                       rc = qed_sp_eth_tx_queue_stop(p_hwfn,
-                                                     vf->vf_queues[qid].
-                                                     fw_tx_qid);
+               p_queue = &vf->vf_queues[qid];
+               if (!p_queue->p_tx_cid)
+                       continue;
 
-                       if (rc)
-                               return rc;
-               }
-               vf->vf_queues[qid].txq_active = false;
+               rc = qed_eth_tx_queue_stop(p_hwfn, p_queue->p_tx_cid);
+               if (rc)
+                       return rc;
+
+               p_queue->p_tx_cid = NULL;
        }
+
        return rc;
 }
 
@@ -2021,10 +2085,11 @@ static void qed_iov_vf_mbx_update_rxqs(struct qed_hwfn *p_hwfn,
                                       struct qed_ptt *p_ptt,
                                       struct qed_vf_info *vf)
 {
+       struct qed_queue_cid *handlers[QED_MAX_VF_CHAINS_PER_PF];
        u16 length = sizeof(struct pfvf_def_resp_tlv);
        struct qed_iov_vf_mbx *mbx = &vf->vf_mbx;
        struct vfpf_update_rxq_tlv *req;
-       u8 status = PFVF_STATUS_SUCCESS;
+       u8 status = PFVF_STATUS_FAILURE;
        u8 complete_event_flg;
        u8 complete_cqe_flg;
        u16 qid;
@@ -2035,29 +2100,36 @@ static void qed_iov_vf_mbx_update_rxqs(struct qed_hwfn *p_hwfn,
        complete_cqe_flg = !!(req->flags & VFPF_RXQ_UPD_COMPLETE_CQE_FLAG);
        complete_event_flg = !!(req->flags & VFPF_RXQ_UPD_COMPLETE_EVENT_FLAG);
 
+       /* Validate inputs */
+       if (req->num_rxqs + req->rx_qid > QED_MAX_VF_CHAINS_PER_PF ||
+           !qed_iov_validate_rxq(p_hwfn, vf, req->rx_qid)) {
+               DP_INFO(p_hwfn, "VF[%d]: Incorrect Rxqs [%04x, %02x]\n",
+                       vf->relative_vf_id, req->rx_qid, req->num_rxqs);
+               goto out;
+       }
+
        for (i = 0; i < req->num_rxqs; i++) {
                qid = req->rx_qid + i;
-
-               if (!vf->vf_queues[qid].rxq_active) {
-                       DP_NOTICE(p_hwfn, "VF rx_qid = %d isn`t active!\n",
-                                 qid);
-                       status = PFVF_STATUS_FAILURE;
-                       break;
+               if (!vf->vf_queues[qid].p_rx_cid) {
+                       DP_INFO(p_hwfn,
+                               "VF[%d] rx_qid = %d isn`t active!\n",
+                               vf->relative_vf_id, qid);
+                       goto out;
                }
 
-               rc = qed_sp_eth_rx_queues_update(p_hwfn,
-                                                vf->vf_queues[qid].fw_rx_qid,
-                                                1,
-                                                complete_cqe_flg,
-                                                complete_event_flg,
-                                                QED_SPQ_MODE_EBLOCK, NULL);
-
-               if (rc) {
-                       status = PFVF_STATUS_FAILURE;
-                       break;
-               }
+               handlers[i] = vf->vf_queues[qid].p_rx_cid;
        }
 
+       rc = qed_sp_eth_rx_queues_update(p_hwfn, (void **)&handlers,
+                                        req->num_rxqs,
+                                        complete_cqe_flg,
+                                        complete_event_flg,
+                                        QED_SPQ_MODE_EBLOCK, NULL);
+       if (rc)
+               goto out;
+
+       status = PFVF_STATUS_SUCCESS;
+out:
        qed_iov_prepare_resp(p_hwfn, p_ptt, vf, CHANNEL_TLV_UPDATE_RXQ,
                             length, status);
 }
@@ -2268,7 +2340,7 @@ qed_iov_vp_update_rss_param(struct qed_hwfn *p_hwfn,
                        DP_NOTICE(p_hwfn,
                                  "rss_ind_table[%d] = %d, rxq is out of range\n",
                                  i, q_idx);
-               else if (!vf->vf_queues[q_idx].rxq_active)
+               else if (!vf->vf_queues[q_idx].p_rx_cid)
                        DP_NOTICE(p_hwfn,
                                  "rss_ind_table[%d] = %d, rxq is not active\n",
                                  i, q_idx);
@@ -3468,9 +3540,28 @@ int qed_sriov_disable(struct qed_dev *cdev, bool pci_enabled)
        return 0;
 }
 
+static void qed_sriov_enable_qid_config(struct qed_hwfn *hwfn,
+                                       u16 vfid,
+                                       struct qed_iov_vf_init_params *params)
+{
+       u16 base, i;
+
+       /* Since we have an equal resource distribution per-VF, and we assume
+        * PF has acquired the QED_PF_L2_QUE first queues, we start setting
+        * sequentially from there.
+        */
+       base = FEAT_NUM(hwfn, QED_PF_L2_QUE) + vfid * params->num_queues;
+
+       params->rel_vf_id = vfid;
+       for (i = 0; i < params->num_queues; i++) {
+               params->req_rx_queue[i] = base + i;
+               params->req_tx_queue[i] = base + i;
+       }
+}
+
 static int qed_sriov_enable(struct qed_dev *cdev, int num)
 {
-       struct qed_sb_cnt_info sb_cnt_info;
+       struct qed_iov_vf_init_params params;
        int i, j, rc;
 
        if (num >= RESC_NUM(&cdev->hwfns[0], QED_VPORT)) {
@@ -3479,11 +3570,17 @@ static int qed_sriov_enable(struct qed_dev *cdev, int num)
                return -EINVAL;
        }
 
+       memset(&params, 0, sizeof(params));
+
        /* Initialize HW for VF access */
        for_each_hwfn(cdev, j) {
                struct qed_hwfn *hwfn = &cdev->hwfns[j];
                struct qed_ptt *ptt = qed_ptt_acquire(hwfn);
-               int num_sbs = 0, limit = 16;
+
+               /* Make sure not to use more than 16 queues per VF */
+               params.num_queues = min_t(int,
+                                         FEAT_NUM(hwfn, QED_VF_L2_QUE) / num,
+                                         16);
 
                if (!ptt) {
                        DP_ERR(hwfn, "Failed to acquire ptt\n");
@@ -3491,19 +3588,12 @@ static int qed_sriov_enable(struct qed_dev *cdev, int num)
                        goto err;
                }
 
-               if (IS_MF_DEFAULT(hwfn))
-                       limit = MAX_NUM_VFS_BB / hwfn->num_funcs_on_engine;
-
-               memset(&sb_cnt_info, 0, sizeof(sb_cnt_info));
-               qed_int_get_num_sbs(hwfn, &sb_cnt_info);
-               num_sbs = min_t(int, sb_cnt_info.sb_free_blk, limit);
-
                for (i = 0; i < num; i++) {
                        if (!qed_iov_is_valid_vfid(hwfn, i, false, true))
                                continue;
 
-                       rc = qed_iov_init_hw_for_vf(hwfn,
-                                                   ptt, i, num_sbs / num);
+                       qed_sriov_enable_qid_config(hwfn, i, &params);
+                       rc = qed_iov_init_hw_for_vf(hwfn, ptt, &params);
                        if (rc) {
                                DP_ERR(cdev, "Failed to enable VF[%d]\n", i);
                                qed_ptt_release(hwfn, ptt);
index 3cf515b1b4278fe5697923958b1e4eb7aaf54913..509c02b4772e087f3a655111f3ea8c687e375faf 100644 (file)
@@ -58,6 +58,23 @@ struct qed_public_vf_info {
        int tx_rate;
 };
 
+struct qed_iov_vf_init_params {
+       u16 rel_vf_id;
+
+       /* Number of requested Queues; Currently, don't support different
+        * number of Rx/Tx queues.
+        */
+
+       u16 num_queues;
+
+       /* Allow the client to choose which qzones to use for Rx/Tx,
+        * and which queue_base to use for Tx queues on a per-queue basis.
+        * Notice values should be relative to the PF resources.
+        */
+       u16 req_rx_queue[QED_MAX_VF_CHAINS_PER_PF];
+       u16 req_tx_queue[QED_MAX_VF_CHAINS_PER_PF];
+};
+
 /* This struct is part of qed_dev and contains data relevant to all hwfns;
  * Initialized only if SR-IOV cpabability is exposed in PCIe config space.
  */
@@ -99,10 +116,10 @@ struct qed_iov_vf_mbx {
 
 struct qed_vf_q_info {
        u16 fw_rx_qid;
+       struct qed_queue_cid *p_rx_cid;
        u16 fw_tx_qid;
+       struct qed_queue_cid *p_tx_cid;
        u8 fw_cid;
-       u8 rxq_active;
-       u8 txq_active;
 };
 
 enum vf_state {
index f580bf4c97f0950b53c1b960f2a4cb3223723e79..60b31a8ede73f81c76e9091362b92e2b800d71f5 100644 (file)
@@ -388,18 +388,18 @@ free_p_iov:
 #define MSTORM_QZONE_START(dev)   (TSTORM_QZONE_START +        \
                                   (TSTORM_QZONE_SIZE * NUM_OF_L2_QUEUES(dev)))
 
-int qed_vf_pf_rxq_start(struct qed_hwfn *p_hwfn,
-                       u8 rx_qid,
-                       u16 sb,
-                       u8 sb_index,
-                       u16 bd_max_bytes,
-                       dma_addr_t bd_chain_phys_addr,
-                       dma_addr_t cqe_pbl_addr,
-                       u16 cqe_pbl_size, void __iomem **pp_prod)
+int
+qed_vf_pf_rxq_start(struct qed_hwfn *p_hwfn,
+                   struct qed_queue_cid *p_cid,
+                   u16 bd_max_bytes,
+                   dma_addr_t bd_chain_phys_addr,
+                   dma_addr_t cqe_pbl_addr,
+                   u16 cqe_pbl_size, void __iomem **pp_prod)
 {
        struct qed_vf_iov *p_iov = p_hwfn->vf_iov_info;
        struct pfvf_start_queue_resp_tlv *resp;
        struct vfpf_start_rxq_tlv *req;
+       u8 rx_qid = p_cid->rel.queue_id;
        int rc;
 
        /* clear mailbox and prep first tlv */
@@ -409,21 +409,22 @@ int qed_vf_pf_rxq_start(struct qed_hwfn *p_hwfn,
        req->cqe_pbl_addr = cqe_pbl_addr;
        req->cqe_pbl_size = cqe_pbl_size;
        req->rxq_addr = bd_chain_phys_addr;
-       req->hw_sb = sb;
-       req->sb_index = sb_index;
+       req->hw_sb = p_cid->rel.sb;
+       req->sb_index = p_cid->rel.sb_idx;
        req->bd_max_bytes = bd_max_bytes;
        req->stat_id = -1;
 
        /* If PF is legacy, we'll need to calculate producers ourselves
         * as well as clean them.
         */
-       if (pp_prod && p_iov->b_pre_fp_hsi) {
+       if (p_iov->b_pre_fp_hsi) {
                u8 hw_qid = p_iov->acquire_resp.resc.hw_qid[rx_qid];
                u32 init_prod_val = 0;
 
-               *pp_prod = (u8 __iomem *)p_hwfn->regview +
-                                        MSTORM_QZONE_START(p_hwfn->cdev) +
-                                        hw_qid * MSTORM_QZONE_SIZE;
+               *pp_prod = (u8 __iomem *)
+                   p_hwfn->regview +
+                   MSTORM_QZONE_START(p_hwfn->cdev) +
+                   hw_qid * MSTORM_QZONE_SIZE;
 
                /* Init the rcq, rx bd and rx sge (if valid) producers to 0 */
                __internal_ram_wr(p_hwfn, *pp_prod, sizeof(u32),
@@ -444,7 +445,7 @@ int qed_vf_pf_rxq_start(struct qed_hwfn *p_hwfn,
        }
 
        /* Learn the address of the producer from the response */
-       if (pp_prod && !p_iov->b_pre_fp_hsi) {
+       if (!p_iov->b_pre_fp_hsi) {
                u32 init_prod_val = 0;
 
                *pp_prod = (u8 __iomem *)p_hwfn->regview + resp->offset;
@@ -462,7 +463,8 @@ exit:
        return rc;
 }
 
-int qed_vf_pf_rxq_stop(struct qed_hwfn *p_hwfn, u16 rx_qid, bool cqe_completion)
+int qed_vf_pf_rxq_stop(struct qed_hwfn *p_hwfn,
+                      struct qed_queue_cid *p_cid, bool cqe_completion)
 {
        struct qed_vf_iov *p_iov = p_hwfn->vf_iov_info;
        struct vfpf_stop_rxqs_tlv *req;
@@ -472,7 +474,7 @@ int qed_vf_pf_rxq_stop(struct qed_hwfn *p_hwfn, u16 rx_qid, bool cqe_completion)
        /* clear mailbox and prep first tlv */
        req = qed_vf_pf_prep(p_hwfn, CHANNEL_TLV_STOP_RXQS, sizeof(*req));
 
-       req->rx_qid = rx_qid;
+       req->rx_qid = p_cid->rel.queue_id;
        req->num_rxqs = 1;
        req->cqe_completion = cqe_completion;
 
@@ -496,28 +498,28 @@ exit:
        return rc;
 }
 
-int qed_vf_pf_txq_start(struct qed_hwfn *p_hwfn,
-                       u16 tx_queue_id,
-                       u16 sb,
-                       u8 sb_index,
-                       dma_addr_t pbl_addr,
-                       u16 pbl_size, void __iomem **pp_doorbell)
+int
+qed_vf_pf_txq_start(struct qed_hwfn *p_hwfn,
+                   struct qed_queue_cid *p_cid,
+                   dma_addr_t pbl_addr,
+                   u16 pbl_size, void __iomem **pp_doorbell)
 {
        struct qed_vf_iov *p_iov = p_hwfn->vf_iov_info;
        struct pfvf_start_queue_resp_tlv *resp;
        struct vfpf_start_txq_tlv *req;
+       u16 qid = p_cid->rel.queue_id;
        int rc;
 
        /* clear mailbox and prep first tlv */
        req = qed_vf_pf_prep(p_hwfn, CHANNEL_TLV_START_TXQ, sizeof(*req));
 
-       req->tx_qid = tx_queue_id;
+       req->tx_qid = qid;
 
        /* Tx */
        req->pbl_addr = pbl_addr;
        req->pbl_size = pbl_size;
-       req->hw_sb = sb;
-       req->sb_index = sb_index;
+       req->hw_sb = p_cid->rel.sb;
+       req->sb_index = p_cid->rel.sb_idx;
 
        /* add list termination tlv */
        qed_add_tlv(p_hwfn, &p_iov->offset,
@@ -533,33 +535,29 @@ int qed_vf_pf_txq_start(struct qed_hwfn *p_hwfn,
                goto exit;
        }
 
-       if (pp_doorbell) {
-               /* Modern PFs provide the actual offsets, while legacy
-                * provided only the queue id.
-                */
-               if (!p_iov->b_pre_fp_hsi) {
-                       *pp_doorbell = (u8 __iomem *)p_hwfn->doorbells +
-                                                    resp->offset;
-               } else {
-                       u8 cid = p_iov->acquire_resp.resc.cid[tx_queue_id];
-                       u32 db_addr;
-
-                       db_addr = qed_db_addr_vf(cid, DQ_DEMS_LEGACY);
-                       *pp_doorbell = (u8 __iomem *)p_hwfn->doorbells +
-                                                    db_addr;
-               }
+       /* Modern PFs provide the actual offsets, while legacy
+        * provided only the queue id.
+        */
+       if (!p_iov->b_pre_fp_hsi) {
+               *pp_doorbell = (u8 __iomem *)p_hwfn->doorbells + resp->offset;
+       } else {
+               u8 cid = p_iov->acquire_resp.resc.cid[qid];
 
-               DP_VERBOSE(p_hwfn, QED_MSG_IOV,
-                          "Txq[0x%02x]: doorbell at %p [offset 0x%08x]\n",
-                          tx_queue_id, *pp_doorbell, resp->offset);
+               *pp_doorbell = (u8 __iomem *)p_hwfn->doorbells +
+                                            qed_db_addr_vf(cid,
+                                                           DQ_DEMS_LEGACY);
        }
+
+       DP_VERBOSE(p_hwfn, QED_MSG_IOV,
+                  "Txq[0x%02x]: doorbell at %p [offset 0x%08x]\n",
+                  qid, *pp_doorbell, resp->offset);
 exit:
        qed_vf_pf_req_end(p_hwfn, rc);
 
        return rc;
 }
 
-int qed_vf_pf_txq_stop(struct qed_hwfn *p_hwfn, u16 tx_qid)
+int qed_vf_pf_txq_stop(struct qed_hwfn *p_hwfn, struct qed_queue_cid *p_cid)
 {
        struct qed_vf_iov *p_iov = p_hwfn->vf_iov_info;
        struct vfpf_stop_txqs_tlv *req;
@@ -569,7 +567,7 @@ int qed_vf_pf_txq_stop(struct qed_hwfn *p_hwfn, u16 tx_qid)
        /* clear mailbox and prep first tlv */
        req = qed_vf_pf_prep(p_hwfn, CHANNEL_TLV_STOP_TXQS, sizeof(*req));
 
-       req->tx_qid = tx_qid;
+       req->tx_qid = p_cid->rel.queue_id;
        req->num_txqs = 1;
 
        /* add list termination tlv */
@@ -1171,6 +1169,13 @@ void qed_vf_get_num_vlan_filters(struct qed_hwfn *p_hwfn, u8 *num_vlan_filters)
        *num_vlan_filters = p_vf->acquire_resp.resc.num_vlan_filters;
 }
 
+void qed_vf_get_num_mac_filters(struct qed_hwfn *p_hwfn, u8 *num_mac_filters)
+{
+       struct qed_vf_iov *p_vf = p_hwfn->vf_iov_info;
+
+       *num_mac_filters = p_vf->acquire_resp.resc.num_mac_filters;
+}
+
 bool qed_vf_check_mac(struct qed_hwfn *p_hwfn, u8 *mac)
 {
        struct qed_bulletin_content *bulletin;
index 944745b7c4c0f107d8aea0c618421af1668a2728..11eb3854e6f293ee0de6305dc46b94e9d23b9b36 100644 (file)
@@ -622,6 +622,14 @@ void qed_vf_get_port_mac(struct qed_hwfn *p_hwfn, u8 *port_mac);
 void qed_vf_get_num_vlan_filters(struct qed_hwfn *p_hwfn,
                                 u8 *num_vlan_filters);
 
+/**
+ * @brief Get number of MAC filters allocated for VF by qed
+ *
+ *  @param p_hwfn
+ *  @param num_rxqs - allocated MAC filters
+ */
+void qed_vf_get_num_mac_filters(struct qed_hwfn *p_hwfn, u8 *num_mac_filters);
+
 /**
  * @brief Check if VF can set a MAC address
  *
@@ -658,10 +666,7 @@ int qed_vf_hw_prepare(struct qed_hwfn *p_hwfn);
 /**
  * @brief VF - start the RX Queue by sending a message to the PF
  * @param p_hwfn
- * @param cid                   - zero based within the VF
- * @param rx_queue_id           - zero based within the VF
- * @param sb                    - VF status block for this queue
- * @param sb_index              - Index within the status block
+ * @param p_cid                        - Only relative fields are relevant
  * @param bd_max_bytes          - maximum number of bytes per bd
  * @param bd_chain_phys_addr    - physical address of bd chain
  * @param cqe_pbl_addr          - physical address of pbl
@@ -672,9 +677,7 @@ int qed_vf_hw_prepare(struct qed_hwfn *p_hwfn);
  * @return int
  */
 int qed_vf_pf_rxq_start(struct qed_hwfn *p_hwfn,
-                       u8 rx_queue_id,
-                       u16 sb,
-                       u8 sb_index,
+                       struct qed_queue_cid *p_cid,
                        u16 bd_max_bytes,
                        dma_addr_t bd_chain_phys_addr,
                        dma_addr_t cqe_pbl_addr,
@@ -694,24 +697,23 @@ int qed_vf_pf_rxq_start(struct qed_hwfn *p_hwfn,
  *
  * @return int
  */
-int qed_vf_pf_txq_start(struct qed_hwfn *p_hwfn,
-                       u16 tx_queue_id,
-                       u16 sb,
-                       u8 sb_index,
-                       dma_addr_t pbl_addr,
-                       u16 pbl_size, void __iomem **pp_doorbell);
+int
+qed_vf_pf_txq_start(struct qed_hwfn *p_hwfn,
+                   struct qed_queue_cid *p_cid,
+                   dma_addr_t pbl_addr,
+                   u16 pbl_size, void __iomem **pp_doorbell);
 
 /**
  * @brief VF - stop the RX queue by sending a message to the PF
  *
  * @param p_hwfn
- * @param rx_qid
+ * @param p_cid
  * @param cqe_completion
  *
  * @return int
  */
 int qed_vf_pf_rxq_stop(struct qed_hwfn *p_hwfn,
-                      u16 rx_qid, bool cqe_completion);
+                      struct qed_queue_cid *p_cid, bool cqe_completion);
 
 /**
  * @brief VF - stop the TX queue by sending a message to the PF
@@ -721,7 +723,7 @@ int qed_vf_pf_rxq_stop(struct qed_hwfn *p_hwfn,
  *
  * @return int
  */
-int qed_vf_pf_txq_stop(struct qed_hwfn *p_hwfn, u16 tx_qid);
+int qed_vf_pf_txq_stop(struct qed_hwfn *p_hwfn, struct qed_queue_cid *p_cid);
 
 /**
  * @brief VF - send a vport update command
@@ -872,6 +874,11 @@ static inline void qed_vf_get_num_vlan_filters(struct qed_hwfn *p_hwfn,
 {
 }
 
+static inline void qed_vf_get_num_mac_filters(struct qed_hwfn *p_hwfn,
+                                             u8 *num_mac_filters)
+{
+}
+
 static inline bool qed_vf_check_mac(struct qed_hwfn *p_hwfn, u8 *mac)
 {
        return false;
@@ -889,9 +896,7 @@ static inline int qed_vf_hw_prepare(struct qed_hwfn *p_hwfn)
 }
 
 static inline int qed_vf_pf_rxq_start(struct qed_hwfn *p_hwfn,
-                                     u8 rx_queue_id,
-                                     u16 sb,
-                                     u8 sb_index,
+                                     struct qed_queue_cid *p_cid,
                                      u16 bd_max_bytes,
                                      dma_addr_t bd_chain_phys_adr,
                                      dma_addr_t cqe_pbl_addr,
@@ -901,9 +906,7 @@ static inline int qed_vf_pf_rxq_start(struct qed_hwfn *p_hwfn,
 }
 
 static inline int qed_vf_pf_txq_start(struct qed_hwfn *p_hwfn,
-                                     u16 tx_queue_id,
-                                     u16 sb,
-                                     u8 sb_index,
+                                     struct qed_queue_cid *p_cid,
                                      dma_addr_t pbl_addr,
                                      u16 pbl_size, void __iomem **pp_doorbell)
 {
@@ -911,12 +914,14 @@ static inline int qed_vf_pf_txq_start(struct qed_hwfn *p_hwfn,
 }
 
 static inline int qed_vf_pf_rxq_stop(struct qed_hwfn *p_hwfn,
-                                    u16 rx_qid, bool cqe_completion)
+                                    struct qed_queue_cid *p_cid,
+                                    bool cqe_completion)
 {
        return -EINVAL;
 }
 
-static inline int qed_vf_pf_txq_stop(struct qed_hwfn *p_hwfn, u16 tx_qid)
+static inline int qed_vf_pf_txq_stop(struct qed_hwfn *p_hwfn,
+                                    struct qed_queue_cid *p_cid)
 {
        return -EINVAL;
 }
index 28dc58919c851f008aebbfca750dbbe962b97d48..048a230c3ce0c5bc807bc52a7484f6b53a2438b2 100644 (file)
@@ -2,4 +2,4 @@ obj-$(CONFIG_QEDE) := qede.o
 
 qede-y := qede_main.o qede_ethtool.o
 qede-$(CONFIG_DCB) += qede_dcbnl.o
-qede-$(CONFIG_INFINIBAND_QEDR) += qede_roce.o
+qede-$(CONFIG_QED_RDMA) += qede_roce.o
index 9135b9d37dfaa46a506ca7d28be1fb6584b98a81..c79dc78746fcbf27e25530c4c94fe0e4f588eef8 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/bitmap.h>
 #include <linux/kernel.h>
 #include <linux/mutex.h>
+#include <linux/bpf.h>
 #include <linux/io.h>
 #include <linux/qed/common_hsi.h>
 #include <linux/qed/eth_common.h>
@@ -127,10 +128,9 @@ struct qede_dev {
 
        const struct qed_eth_ops        *ops;
 
-       struct qed_dev_eth_info dev_info;
+       struct qed_dev_eth_info dev_info;
 #define QEDE_MAX_RSS_CNT(edev) ((edev)->dev_info.num_queues)
-#define QEDE_MAX_TSS_CNT(edev) ((edev)->dev_info.num_queues * \
-                                (edev)->dev_info.num_tc)
+#define QEDE_MAX_TSS_CNT(edev) ((edev)->dev_info.num_queues)
 
        struct qede_fastpath            *fp_array;
        u8                              req_num_tx;
@@ -139,17 +139,9 @@ struct qede_dev {
        u8                              fp_num_rx;
        u16                             req_queues;
        u16                             num_queues;
-       u8                              num_tc;
 #define QEDE_QUEUE_CNT(edev)   ((edev)->num_queues)
 #define QEDE_RSS_COUNT(edev)   ((edev)->num_queues - (edev)->fp_num_tx)
-#define QEDE_TSS_COUNT(edev)   (((edev)->num_queues - (edev)->fp_num_rx) * \
-                                (edev)->num_tc)
-#define QEDE_TX_IDX(edev, txqidx)      ((edev)->fp_num_rx + (txqidx) % \
-                                        QEDE_TSS_COUNT(edev))
-#define QEDE_TC_IDX(edev, txqidx)      ((txqidx) / QEDE_TSS_COUNT(edev))
-#define QEDE_TX_QUEUE(edev, txqidx)    \
-       (&(edev)->fp_array[QEDE_TX_IDX((edev), (txqidx))].txqs[QEDE_TC_IDX(\
-                                                       (edev), (txqidx))])
+#define QEDE_TSS_COUNT(edev)   ((edev)->num_queues - (edev)->fp_num_rx)
 
        struct qed_int_info             int_info;
        unsigned char                   primary_mac[ETH_ALEN];
@@ -193,7 +185,11 @@ struct qede_dev {
        u16                             vxlan_dst_port;
        u16                             geneve_dst_port;
 
+       bool wol_enabled;
+
        struct qede_rdma_dev            rdma_info;
+
+       struct bpf_prog *xdp_prog;
 };
 
 enum QEDE_STATE {
@@ -223,39 +219,67 @@ enum qede_agg_state {
 };
 
 struct qede_agg_info {
-       struct sw_rx_data replace_buf;
-       dma_addr_t replace_buf_mapping;
-       struct sw_rx_data start_buf;
-       dma_addr_t start_buf_mapping;
-       struct eth_fast_path_rx_tpa_start_cqe start_cqe;
-       enum qede_agg_state agg_state;
+       /* rx_buf is a data buffer that can be placed / consumed from rx bd
+        * chain. It has two purposes: We will preallocate the data buffer
+        * for each aggregation when we open the interface and will place this
+        * buffer on the rx-bd-ring when we receive TPA_START. We don't want
+        * to be in a state where allocation fails, as we can't reuse the
+        * consumer buffer in the rx-chain since FW may still be writing to it
+        * (since header needs to be modified for TPA).
+        * The second purpose is to keep a pointer to the bd buffer during
+        * aggregation.
+        */
+       struct sw_rx_data buffer;
+       dma_addr_t buffer_mapping;
+
        struct sk_buff *skb;
-       int frag_id;
+
+       /* We need some structs from the start cookie until termination */
        u16 vlan_tag;
+       u16 start_cqe_bd_len;
+       u8 start_cqe_placement_offset;
+
+       u8 state;
+       u8 frag_id;
+
+       u8 tunnel_type;
 };
 
 struct qede_rx_queue {
-       __le16                  *hw_cons_ptr;
-       struct sw_rx_data       *sw_rx_ring;
-       u16                     sw_rx_cons;
-       u16                     sw_rx_prod;
-       struct qed_chain        rx_bd_ring;
-       struct qed_chain        rx_comp_ring;
-       void __iomem            *hw_rxq_prod_addr;
+       __le16 *hw_cons_ptr;
+       void __iomem *hw_rxq_prod_addr;
+
+       /* Required for the allocation of replacement buffers */
+       struct device *dev;
+
+       struct bpf_prog *xdp_prog;
+
+       u16 sw_rx_cons;
+       u16 sw_rx_prod;
+
+       u16 num_rx_buffers; /* Slowpath */
+       u8 data_direction;
+       u8 rxq_id;
+
+       u32 rx_buf_size;
+       u32 rx_buf_seg_size;
+
+       u64 rcv_pkts;
+
+       struct sw_rx_data *sw_rx_ring;
+       struct qed_chain rx_bd_ring;
+       struct qed_chain rx_comp_ring ____cacheline_aligned;
 
        /* GRO */
-       struct qede_agg_info    tpa_info[ETH_TPA_MAX_AGGS_NUM];
+       struct qede_agg_info tpa_info[ETH_TPA_MAX_AGGS_NUM];
 
-       int                     rx_buf_size;
-       unsigned int            rx_buf_seg_size;
+       u64 rx_hw_errors;
+       u64 rx_alloc_errors;
+       u64 rx_ip_frags;
 
-       u16                     num_rx_buffers;
-       u16                     rxq_id;
+       u64 xdp_no_pass;
 
-       u64                     rcv_pkts;
-       u64                     rx_hw_errors;
-       u64                     rx_alloc_errors;
-       u64                     rx_ip_frags;
+       void *handle;
 };
 
 union db_prod {
@@ -271,20 +295,39 @@ struct sw_tx_bd {
 };
 
 struct qede_tx_queue {
-       int                     index; /* Queue index */
-       __le16                  *hw_cons_ptr;
-       struct sw_tx_bd         *sw_tx_ring;
-       u16                     sw_tx_cons;
-       u16                     sw_tx_prod;
-       struct qed_chain        tx_pbl;
-       void __iomem            *doorbell_addr;
-       union db_prod           tx_db;
-
-       u16                     num_tx_buffers;
-       u64                     xmit_pkts;
-       u64                     stopped_cnt;
-
-       bool                    is_legacy;
+       u8 is_xdp;
+       bool is_legacy;
+       u16 sw_tx_cons;
+       u16 sw_tx_prod;
+       u16 num_tx_buffers; /* Slowpath only */
+
+       u64 xmit_pkts;
+       u64 stopped_cnt;
+
+       __le16 *hw_cons_ptr;
+
+       /* Needed for the mapping of packets */
+       struct device *dev;
+
+       void __iomem *doorbell_addr;
+       union db_prod tx_db;
+       int index; /* Slowpath only */
+#define QEDE_TXQ_XDP_TO_IDX(edev, txq) ((txq)->index - \
+                                        QEDE_MAX_TSS_CNT(edev))
+#define QEDE_TXQ_IDX_TO_XDP(edev, idx) ((idx) + QEDE_MAX_TSS_CNT(edev))
+
+       /* Regular Tx requires skb + metadata for release purpose,
+        * while XDP requires only the pages themselves.
+        */
+       union {
+               struct sw_tx_bd *skbs;
+               struct page **pages;
+       } sw_tx_ring;
+
+       struct qed_chain tx_pbl;
+
+       /* Slowpath; Should be kept in end [unless missing padding] */
+       void *handle;
 };
 
 #define BD_UNMAP_ADDR(bd)              HILO_U64(le32_to_cpu((bd)->addr.hi), \
@@ -301,13 +344,16 @@ struct qede_fastpath {
        struct qede_dev *edev;
 #define QEDE_FASTPATH_TX       BIT(0)
 #define QEDE_FASTPATH_RX       BIT(1)
+#define QEDE_FASTPATH_XDP      BIT(2)
 #define QEDE_FASTPATH_COMBINED (QEDE_FASTPATH_TX | QEDE_FASTPATH_RX)
        u8                      type;
        u8                      id;
+       u8                      xdp_xmit;
        struct napi_struct      napi;
        struct qed_sb_info      *sb_info;
        struct qede_rx_queue    *rxq;
-       struct qede_tx_queue    *txqs;
+       struct qede_tx_queue    *txq;
+       struct qede_tx_queue    *xdp_tx;
 
 #define VEC_NAME_SIZE  (sizeof(((struct net_device *)0)->name) + 8)
        char    name[VEC_NAME_SIZE];
@@ -330,8 +376,13 @@ struct qede_fastpath {
 #define QEDE_SP_VXLAN_PORT_CONFIG      2
 #define QEDE_SP_GENEVE_PORT_CONFIG     3
 
-union qede_reload_args {
-       u16 mtu;
+struct qede_reload_args {
+       void (*func)(struct qede_dev *edev, struct qede_reload_args *args);
+       union {
+               netdev_features_t features;
+               struct bpf_prog *new_prog;
+               u16 mtu;
+       } u;
 };
 
 #ifdef CONFIG_DCB
@@ -340,21 +391,21 @@ void qede_set_dcbnl_ops(struct net_device *ndev);
 void qede_config_debug(uint debug, u32 *p_dp_module, u8 *p_dp_level);
 void qede_set_ethtool_ops(struct net_device *netdev);
 void qede_reload(struct qede_dev *edev,
-                void (*func)(struct qede_dev *edev,
-                             union qede_reload_args *args),
-                union qede_reload_args *args);
+                struct qede_reload_args *args, bool is_locked);
 int qede_change_mtu(struct net_device *dev, int new_mtu);
 void qede_fill_by_demand_stats(struct qede_dev *edev);
+void __qede_lock(struct qede_dev *edev);
+void __qede_unlock(struct qede_dev *edev);
 bool qede_has_rx_work(struct qede_rx_queue *rxq);
 int qede_txq_has_work(struct qede_tx_queue *txq);
-void qede_recycle_rx_bd_ring(struct qede_rx_queue *rxq, struct qede_dev *edev,
-                            u8 count);
+void qede_recycle_rx_bd_ring(struct qede_rx_queue *rxq, u8 count);
+void qede_update_rx_prod(struct qede_dev *edev, struct qede_rx_queue *rxq);
 
 #define RX_RING_SIZE_POW       13
 #define RX_RING_SIZE           ((u16)BIT(RX_RING_SIZE_POW))
 #define NUM_RX_BDS_MAX         (RX_RING_SIZE - 1)
 #define NUM_RX_BDS_MIN         128
-#define NUM_RX_BDS_DEF         NUM_RX_BDS_MAX
+#define NUM_RX_BDS_DEF         ((u16)BIT(10) - 1)
 
 #define TX_RING_SIZE_POW       13
 #define TX_RING_SIZE           ((u16)BIT(TX_RING_SIZE_POW))
index b7dbb4493a645724636c225a79d951eb88225e94..1c48f445c93bd2b10f261e3cf05ac90a96cf2bba 100644 (file)
 #include <linux/capability.h>
 #include "qede.h"
 
-#define QEDE_STAT_OFFSET(stat_name) (offsetof(struct qede_stats, stat_name))
-#define QEDE_STAT_STRING(stat_name) (#stat_name)
-#define _QEDE_STAT(stat_name, pf_only) \
-        {QEDE_STAT_OFFSET(stat_name), QEDE_STAT_STRING(stat_name), pf_only}
-#define QEDE_PF_STAT(stat_name)                _QEDE_STAT(stat_name, true)
-#define QEDE_STAT(stat_name)           _QEDE_STAT(stat_name, false)
-
 #define QEDE_RQSTAT_OFFSET(stat_name) \
         (offsetof(struct qede_rx_queue, stat_name))
 #define QEDE_RQSTAT_STRING(stat_name) (#stat_name)
@@ -39,12 +32,10 @@ static const struct {
        QEDE_RQSTAT(rx_hw_errors),
        QEDE_RQSTAT(rx_alloc_errors),
        QEDE_RQSTAT(rx_ip_frags),
+       QEDE_RQSTAT(xdp_no_pass),
 };
 
 #define QEDE_NUM_RQSTATS ARRAY_SIZE(qede_rqstats_arr)
-#define QEDE_RQSTATS_DATA(dev, sindex, rqindex) \
-       (*((u64 *)(((char *)(dev->fp_array[(rqindex)].rxq)) +\
-                   qede_rqstats_arr[(sindex)].offset)))
 #define QEDE_TQSTAT_OFFSET(stat_name) \
        (offsetof(struct qede_tx_queue, stat_name))
 #define QEDE_TQSTAT_STRING(stat_name) (#stat_name)
@@ -59,10 +50,12 @@ static const struct {
        QEDE_TQSTAT(stopped_cnt),
 };
 
-#define QEDE_TQSTATS_DATA(dev, sindex, tssid, tcid) \
-       (*((u64 *)(((void *)(&dev->fp_array[tssid].txqs[tcid])) +\
-                  qede_tqstats_arr[(sindex)].offset)))
-
+#define QEDE_STAT_OFFSET(stat_name) (offsetof(struct qede_stats, stat_name))
+#define QEDE_STAT_STRING(stat_name) (#stat_name)
+#define _QEDE_STAT(stat_name, pf_only) \
+        {QEDE_STAT_OFFSET(stat_name), QEDE_STAT_STRING(stat_name), pf_only}
+#define QEDE_PF_STAT(stat_name)        _QEDE_STAT(stat_name, true)
+#define QEDE_STAT(stat_name)   _QEDE_STAT(stat_name, false)
 static const struct {
        u64 offset;
        char string[ETH_GSTRING_LEN];
@@ -136,10 +129,6 @@ static const struct {
        QEDE_STAT(coalesced_bytes),
 };
 
-#define QEDE_STATS_DATA(dev, index) \
-       (*((u64 *)(((char *)(dev)) + offsetof(struct qede_dev, stats) \
-                       + qede_stats_arr[(index)].offset)))
-
 #define QEDE_NUM_STATS ARRAY_SIZE(qede_stats_arr)
 
 enum {
@@ -157,6 +146,7 @@ enum qede_ethtool_tests {
        QEDE_ETHTOOL_MEMORY_TEST,
        QEDE_ETHTOOL_REGISTER_TEST,
        QEDE_ETHTOOL_CLOCK_TEST,
+       QEDE_ETHTOOL_NVRAM_TEST,
        QEDE_ETHTOOL_TEST_MAX
 };
 
@@ -166,34 +156,63 @@ static const char qede_tests_str_arr[QEDE_ETHTOOL_TEST_MAX][ETH_GSTRING_LEN] = {
        "Memory (online)\t\t",
        "Register (online)\t",
        "Clock (online)\t\t",
+       "Nvram (online)\t\t",
 };
 
+static void qede_get_strings_stats_txq(struct qede_dev *edev,
+                                      struct qede_tx_queue *txq, u8 **buf)
+{
+       int i;
+
+       for (i = 0; i < QEDE_NUM_TQSTATS; i++) {
+               if (txq->is_xdp)
+                       sprintf(*buf, "%d [XDP]: %s",
+                               QEDE_TXQ_XDP_TO_IDX(edev, txq),
+                               qede_tqstats_arr[i].string);
+               else
+                       sprintf(*buf, "%d: %s", txq->index,
+                               qede_tqstats_arr[i].string);
+               *buf += ETH_GSTRING_LEN;
+       }
+}
+
+static void qede_get_strings_stats_rxq(struct qede_dev *edev,
+                                      struct qede_rx_queue *rxq, u8 **buf)
+{
+       int i;
+
+       for (i = 0; i < QEDE_NUM_RQSTATS; i++) {
+               sprintf(*buf, "%d: %s", rxq->rxq_id,
+                       qede_rqstats_arr[i].string);
+               *buf += ETH_GSTRING_LEN;
+       }
+}
+
 static void qede_get_strings_stats(struct qede_dev *edev, u8 *buf)
 {
-       int i, j, k;
-
-       for (i = 0, k = 0; i < QEDE_QUEUE_CNT(edev); i++) {
-               int tc;
-
-               for (j = 0; j < QEDE_NUM_RQSTATS; j++)
-                       sprintf(buf + (k + j) * ETH_GSTRING_LEN,
-                               "%d:   %s", i, qede_rqstats_arr[j].string);
-               k += QEDE_NUM_RQSTATS;
-               for (tc = 0; tc < edev->num_tc; tc++) {
-                       for (j = 0; j < QEDE_NUM_TQSTATS; j++)
-                               sprintf(buf + (k + j) * ETH_GSTRING_LEN,
-                                       "%d.%d: %s", i, tc,
-                                       qede_tqstats_arr[j].string);
-                       k += QEDE_NUM_TQSTATS;
-               }
+       struct qede_fastpath *fp;
+       int i;
+
+       /* Account for queue statistics */
+       for (i = 0; i < QEDE_QUEUE_CNT(edev); i++) {
+               fp = &edev->fp_array[i];
+
+               if (fp->type & QEDE_FASTPATH_RX)
+                       qede_get_strings_stats_rxq(edev, fp->rxq, &buf);
+
+               if (fp->type & QEDE_FASTPATH_XDP)
+                       qede_get_strings_stats_txq(edev, fp->xdp_tx, &buf);
+
+               if (fp->type & QEDE_FASTPATH_TX)
+                       qede_get_strings_stats_txq(edev, fp->txq, &buf);
        }
 
-       for (i = 0, j = 0; i < QEDE_NUM_STATS; i++) {
+       /* Account for non-queue statistics */
+       for (i = 0; i < QEDE_NUM_STATS; i++) {
                if (IS_VF(edev) && qede_stats_arr[i].pf_only)
                        continue;
-               strcpy(buf + (k + j) * ETH_GSTRING_LEN,
-                      qede_stats_arr[i].string);
-               j++;
+               strcpy(buf, qede_stats_arr[i].string);
+               buf += ETH_GSTRING_LEN;
        }
 }
 
@@ -219,42 +238,61 @@ static void qede_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
        }
 }
 
+static void qede_get_ethtool_stats_txq(struct qede_tx_queue *txq, u64 **buf)
+{
+       int i;
+
+       for (i = 0; i < QEDE_NUM_TQSTATS; i++) {
+               **buf = *((u64 *)(((void *)txq) + qede_tqstats_arr[i].offset));
+               (*buf)++;
+       }
+}
+
+static void qede_get_ethtool_stats_rxq(struct qede_rx_queue *rxq, u64 **buf)
+{
+       int i;
+
+       for (i = 0; i < QEDE_NUM_RQSTATS; i++) {
+               **buf = *((u64 *)(((void *)rxq) + qede_rqstats_arr[i].offset));
+               (*buf)++;
+       }
+}
+
 static void qede_get_ethtool_stats(struct net_device *dev,
                                   struct ethtool_stats *stats, u64 *buf)
 {
        struct qede_dev *edev = netdev_priv(dev);
-       int sidx, cnt = 0;
-       int qid;
+       struct qede_fastpath *fp;
+       int i;
 
        qede_fill_by_demand_stats(edev);
 
-       mutex_lock(&edev->qede_lock);
+       /* Need to protect the access to the fastpath array */
+       __qede_lock(edev);
 
-       for (qid = 0; qid < QEDE_QUEUE_CNT(edev); qid++) {
-               int tc;
+       for (i = 0; i < QEDE_QUEUE_CNT(edev); i++) {
+               fp = &edev->fp_array[i];
 
-               if (edev->fp_array[qid].type & QEDE_FASTPATH_RX) {
-                       for (sidx = 0; sidx < QEDE_NUM_RQSTATS; sidx++)
-                               buf[cnt++] = QEDE_RQSTATS_DATA(edev, sidx, qid);
-               }
+               if (fp->type & QEDE_FASTPATH_RX)
+                       qede_get_ethtool_stats_rxq(fp->rxq, &buf);
 
-               if (edev->fp_array[qid].type & QEDE_FASTPATH_TX) {
-                       for (tc = 0; tc < edev->num_tc; tc++) {
-                               for (sidx = 0; sidx < QEDE_NUM_TQSTATS; sidx++)
-                                       buf[cnt++] = QEDE_TQSTATS_DATA(edev,
-                                                                      sidx,
-                                                                      qid, tc);
-                       }
-               }
+               if (fp->type & QEDE_FASTPATH_XDP)
+                       qede_get_ethtool_stats_txq(fp->xdp_tx, &buf);
+
+               if (fp->type & QEDE_FASTPATH_TX)
+                       qede_get_ethtool_stats_txq(fp->txq, &buf);
        }
 
-       for (sidx = 0; sidx < QEDE_NUM_STATS; sidx++) {
-               if (IS_VF(edev) && qede_stats_arr[sidx].pf_only)
+       for (i = 0; i < QEDE_NUM_STATS; i++) {
+               if (IS_VF(edev) && qede_stats_arr[i].pf_only)
                        continue;
-               buf[cnt++] = QEDE_STATS_DATA(edev, sidx);
+               *buf = *((u64 *)(((void *)&edev->stats) +
+                                qede_stats_arr[i].offset));
+
+               buf++;
        }
 
-       mutex_unlock(&edev->qede_lock);
+       __qede_unlock(edev);
 }
 
 static int qede_get_sset_count(struct net_device *dev, int stringset)
@@ -271,8 +309,18 @@ static int qede_get_sset_count(struct net_device *dev, int stringset)
                                if (qede_stats_arr[i].pf_only)
                                        num_stats--;
                }
-               return num_stats + QEDE_RSS_COUNT(edev) * QEDE_NUM_RQSTATS +
-                      QEDE_TSS_COUNT(edev) * QEDE_NUM_TQSTATS * edev->num_tc;
+
+               /* Account for the Regular Tx statistics */
+               num_stats += QEDE_TSS_COUNT(edev) * QEDE_NUM_TQSTATS;
+
+               /* Account for the Regular Rx statistics */
+               num_stats += QEDE_RSS_COUNT(edev) * QEDE_NUM_RQSTATS;
+
+               /* Account for XDP statistics [if needed] */
+               if (edev->xdp_prog)
+                       num_stats += QEDE_RSS_COUNT(edev) * QEDE_NUM_TQSTATS;
+               return num_stats;
+
        case ETH_SS_PRIV_FLAGS:
                return QEDE_PRI_FLAG_LEN;
        case ETH_SS_TEST:
@@ -318,7 +366,7 @@ static const struct qede_link_mode_mapping qed_lm_map[] = {
 {                                                              \
        int i;                                                  \
                                                                \
-       for (i = 0; i < QED_LM_COUNT; i++) {                    \
+       for (i = 0; i < ARRAY_SIZE(qed_lm_map); i++) {          \
                if ((caps) & (qed_lm_map[i].qed_link_mode))     \
                        __set_bit(qed_lm_map[i].ethtool_link_mode,\
                                  lk_ksettings->link_modes.name); \
@@ -329,7 +377,7 @@ static const struct qede_link_mode_mapping qed_lm_map[] = {
 {                                                              \
        int i;                                                  \
                                                                \
-       for (i = 0; i < QED_LM_COUNT; i++) {                    \
+       for (i = 0; i < ARRAY_SIZE(qed_lm_map); i++) {          \
                if (test_bit(qed_lm_map[i].ethtool_link_mode,   \
                             lk_ksettings->link_modes.name))    \
                        caps |= qed_lm_map[i].qed_link_mode;    \
@@ -343,6 +391,8 @@ static int qede_get_link_ksettings(struct net_device *dev,
        struct qede_dev *edev = netdev_priv(dev);
        struct qed_link_output current_link;
 
+       __qede_lock(edev);
+
        memset(&current_link, 0, sizeof(current_link));
        edev->ops->common->get_link(edev->cdev, &current_link);
 
@@ -362,6 +412,9 @@ static int qede_get_link_ksettings(struct net_device *dev,
                base->speed = SPEED_UNKNOWN;
                base->duplex = DUPLEX_UNKNOWN;
        }
+
+       __qede_unlock(edev);
+
        base->port = current_link.port;
        base->autoneg = (current_link.autoneg) ? AUTONEG_ENABLE :
                        AUTONEG_DISABLE;
@@ -481,6 +534,45 @@ static void qede_get_drvinfo(struct net_device *ndev,
        strlcpy(info->bus_info, pci_name(edev->pdev), sizeof(info->bus_info));
 }
 
+static void qede_get_wol(struct net_device *ndev, struct ethtool_wolinfo *wol)
+{
+       struct qede_dev *edev = netdev_priv(ndev);
+
+       if (edev->dev_info.common.wol_support) {
+               wol->supported = WAKE_MAGIC;
+               wol->wolopts = edev->wol_enabled ? WAKE_MAGIC : 0;
+       }
+}
+
+static int qede_set_wol(struct net_device *ndev, struct ethtool_wolinfo *wol)
+{
+       struct qede_dev *edev = netdev_priv(ndev);
+       bool wol_requested;
+       int rc;
+
+       if (wol->wolopts & ~WAKE_MAGIC) {
+               DP_INFO(edev,
+                       "Can't support WoL options other than magic-packet\n");
+               return -EINVAL;
+       }
+
+       wol_requested = !!(wol->wolopts & WAKE_MAGIC);
+       if (wol_requested == edev->wol_enabled)
+               return 0;
+
+       /* Need to actually change configuration */
+       if (!edev->dev_info.common.wol_support) {
+               DP_INFO(edev, "Device doesn't support WoL\n");
+               return -EINVAL;
+       }
+
+       rc = edev->ops->common->update_wol(edev->cdev, wol_requested);
+       if (!rc)
+               edev->wol_enabled = wol_requested;
+
+       return rc;
+}
+
 static u32 qede_get_msglevel(struct net_device *ndev)
 {
        struct qede_dev *edev = netdev_priv(ndev);
@@ -631,8 +723,7 @@ static int qede_set_ringparam(struct net_device *dev,
        edev->q_num_rx_buffers = ering->rx_pending;
        edev->q_num_tx_buffers = ering->tx_pending;
 
-       if (netif_running(edev->ndev))
-               qede_reload(edev, NULL, NULL);
+       qede_reload(edev, NULL, false);
 
        return 0;
 }
@@ -717,27 +808,27 @@ static int qede_get_regs_len(struct net_device *ndev)
                return -EINVAL;
 }
 
-static void qede_update_mtu(struct qede_dev *edev, union qede_reload_args *args)
+static void qede_update_mtu(struct qede_dev *edev,
+                           struct qede_reload_args *args)
 {
-       edev->ndev->mtu = args->mtu;
+       edev->ndev->mtu = args->u.mtu;
 }
 
 /* Netdevice NDOs */
 int qede_change_mtu(struct net_device *ndev, int new_mtu)
 {
        struct qede_dev *edev = netdev_priv(ndev);
-       union qede_reload_args args;
+       struct qede_reload_args args;
 
        DP_VERBOSE(edev, (NETIF_MSG_IFUP | NETIF_MSG_IFDOWN),
                   "Configuring MTU size of %d\n", new_mtu);
 
-       /* Set the mtu field and re-start the interface if needed*/
-       args.mtu = new_mtu;
-
-       if (netif_running(edev->ndev))
-               qede_reload(edev, &qede_update_mtu, &args);
+       /* Set the mtu field and re-start the interface if needed */
+       args.u.mtu = new_mtu;
+       args.func = &qede_update_mtu;
+       qede_reload(edev, &args, false);
 
-       qede_update_mtu(edev, &args);
+       edev->ops->common->update_mtu(edev->cdev, new_mtu);
 
        return 0;
 }
@@ -748,6 +839,8 @@ static void qede_get_channels(struct net_device *dev,
        struct qede_dev *edev = netdev_priv(dev);
 
        channels->max_combined = QEDE_MAX_RSS_CNT(edev);
+       channels->max_rx = QEDE_MAX_RSS_CNT(edev);
+       channels->max_tx = QEDE_MAX_RSS_CNT(edev);
        channels->combined_count = QEDE_QUEUE_CNT(edev) - edev->fp_num_tx -
                                        edev->fp_num_rx;
        channels->tx_count = edev->fp_num_tx;
@@ -812,8 +905,14 @@ static int qede_set_channels(struct net_device *dev,
        edev->req_queues = count;
        edev->req_num_tx = channels->tx_count;
        edev->req_num_rx = channels->rx_count;
-       if (netif_running(dev))
-               qede_reload(edev, NULL, NULL);
+       /* Reset the indirection table if rx queue count is updated */
+       if ((edev->req_queues - edev->req_num_tx) != QEDE_RSS_COUNT(edev)) {
+               edev->rss_params_inited &= ~QEDE_RSS_INDIR_INITED;
+               memset(&edev->rss_params.rss_ind_table, 0,
+                      sizeof(edev->rss_params.rss_ind_table));
+       }
+
+       qede_reload(edev, NULL, false);
 
        return 0;
 }
@@ -1045,6 +1144,12 @@ static int qede_set_rxfh(struct net_device *dev, const u32 *indir,
        struct qede_dev *edev = netdev_priv(dev);
        int i;
 
+       if (edev->dev_info.common.num_hwfns > 1) {
+               DP_INFO(edev,
+                       "RSS configuration is not supported for 100G devices\n");
+               return -EOPNOTSUPP;
+       }
+
        if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)
                return -EOPNOTSUPP;
 
@@ -1113,7 +1218,7 @@ static int qede_selftest_transmit_traffic(struct qede_dev *edev,
 
        for_each_queue(i) {
                if (edev->fp_array[i].type & QEDE_FASTPATH_TX) {
-                       txq = edev->fp_array[i].txqs;
+                       txq = edev->fp_array[i].txq;
                        break;
                }
        }
@@ -1125,7 +1230,7 @@ static int qede_selftest_transmit_traffic(struct qede_dev *edev,
 
        /* Fill the entry in the SW ring and the BDs in the FW ring */
        idx = txq->sw_tx_prod & NUM_TX_BDS_MAX;
-       txq->sw_tx_ring[idx].skb = skb;
+       txq->sw_tx_ring.skbs[idx].skb = skb;
        first_bd = qed_chain_produce(&txq->tx_pbl);
        memset(first_bd, 0, sizeof(*first_bd));
        val = 1 << ETH_TX_1ST_BD_FLAGS_START_BD_SHIFT;
@@ -1176,10 +1281,10 @@ static int qede_selftest_transmit_traffic(struct qede_dev *edev,
        }
 
        first_bd = (struct eth_tx_1st_bd *)qed_chain_consume(&txq->tx_pbl);
-       dma_unmap_page(&edev->pdev->dev, BD_UNMAP_ADDR(first_bd),
-                      BD_UNMAP_LEN(first_bd), DMA_TO_DEVICE);
+       dma_unmap_single(&edev->pdev->dev, BD_UNMAP_ADDR(first_bd),
+                        BD_UNMAP_LEN(first_bd), DMA_TO_DEVICE);
        txq->sw_tx_cons++;
-       txq->sw_tx_ring[idx].skb = NULL;
+       txq->sw_tx_ring.skbs[idx].skb = NULL;
 
        return 0;
 }
@@ -1191,8 +1296,8 @@ static int qede_selftest_receive_traffic(struct qede_dev *edev)
        struct qede_rx_queue *rxq = NULL;
        struct sw_rx_data *sw_rx_data;
        union eth_rx_cqe *cqe;
+       int i, rc = 0;
        u8 *data_ptr;
-       int i;
 
        for_each_queue(i) {
                if (edev->fp_array[i].type & QEDE_FASTPATH_RX) {
@@ -1211,46 +1316,60 @@ static int qede_selftest_receive_traffic(struct qede_dev *edev)
         * queue and that the loopback traffic is not IP.
         */
        for (i = 0; i < QEDE_SELFTEST_POLL_COUNT; i++) {
-               if (qede_has_rx_work(rxq))
+               if (!qede_has_rx_work(rxq)) {
+                       usleep_range(100, 200);
+                       continue;
+               }
+
+               hw_comp_cons = le16_to_cpu(*rxq->hw_cons_ptr);
+               sw_comp_cons = qed_chain_get_cons_idx(&rxq->rx_comp_ring);
+
+               /* Memory barrier to prevent the CPU from doing speculative
+                * reads of CQE/BD before reading hw_comp_cons. If the CQE is
+                * read before it is written by FW, then FW writes CQE and SB,
+                * and then the CPU reads the hw_comp_cons, it will use an old
+                * CQE.
+                */
+               rmb();
+
+               /* Get the CQE from the completion ring */
+               cqe = (union eth_rx_cqe *)qed_chain_consume(&rxq->rx_comp_ring);
+
+               /* Get the data from the SW ring */
+               sw_rx_index = rxq->sw_rx_cons & NUM_RX_BDS_MAX;
+               sw_rx_data = &rxq->sw_rx_ring[sw_rx_index];
+               fp_cqe = &cqe->fast_path_regular;
+               len =  le16_to_cpu(fp_cqe->len_on_first_bd);
+               data_ptr = (u8 *)(page_address(sw_rx_data->data) +
+                                 fp_cqe->placement_offset +
+                                 sw_rx_data->page_offset);
+               if (ether_addr_equal(data_ptr,  edev->ndev->dev_addr) &&
+                   ether_addr_equal(data_ptr + ETH_ALEN,
+                                    edev->ndev->dev_addr)) {
+                       for (i = ETH_HLEN; i < len; i++)
+                               if (data_ptr[i] != (unsigned char)(i & 0xff)) {
+                                       rc = -1;
+                                       break;
+                               }
+
+                       qede_recycle_rx_bd_ring(rxq, 1);
+                       qed_chain_recycle_consumed(&rxq->rx_comp_ring);
                        break;
-               usleep_range(100, 200);
+               }
+
+               DP_INFO(edev, "Not the transmitted packet\n");
+               qede_recycle_rx_bd_ring(rxq, 1);
+               qed_chain_recycle_consumed(&rxq->rx_comp_ring);
        }
 
-       if (!qede_has_rx_work(rxq)) {
+       if (i == QEDE_SELFTEST_POLL_COUNT) {
                DP_NOTICE(edev, "Failed to receive the traffic\n");
                return -1;
        }
 
-       hw_comp_cons = le16_to_cpu(*rxq->hw_cons_ptr);
-       sw_comp_cons = qed_chain_get_cons_idx(&rxq->rx_comp_ring);
-
-       /* Memory barrier to prevent the CPU from doing speculative reads of CQE
-        * / BD before reading hw_comp_cons. If the CQE is read before it is
-        * written by FW, then FW writes CQE and SB, and then the CPU reads the
-        * hw_comp_cons, it will use an old CQE.
-        */
-       rmb();
-
-       /* Get the CQE from the completion ring */
-       cqe = (union eth_rx_cqe *)qed_chain_consume(&rxq->rx_comp_ring);
-
-       /* Get the data from the SW ring */
-       sw_rx_index = rxq->sw_rx_cons & NUM_RX_BDS_MAX;
-       sw_rx_data = &rxq->sw_rx_ring[sw_rx_index];
-       fp_cqe = &cqe->fast_path_regular;
-       len =  le16_to_cpu(fp_cqe->len_on_first_bd);
-       data_ptr = (u8 *)(page_address(sw_rx_data->data) +
-                    fp_cqe->placement_offset + sw_rx_data->page_offset);
-       for (i = ETH_HLEN; i < len; i++)
-               if (data_ptr[i] != (unsigned char)(i & 0xff)) {
-                       DP_NOTICE(edev, "Loopback test failed\n");
-                       qede_recycle_rx_bd_ring(rxq, edev, 1);
-                       return -1;
-               }
-
-       qede_recycle_rx_bd_ring(rxq, edev, 1);
+       qede_update_rx_prod(edev, rxq);
 
-       return 0;
+       return rc;
 }
 
 static int qede_selftest_run_loopback(struct qede_dev *edev, u32 loopback_mode)
@@ -1361,6 +1480,11 @@ static void qede_self_test(struct net_device *dev,
                buf[QEDE_ETHTOOL_CLOCK_TEST] = 1;
                etest->flags |= ETH_TEST_FL_FAILED;
        }
+
+       if (edev->ops->common->selftest->selftest_nvram(edev->cdev)) {
+               buf[QEDE_ETHTOOL_NVRAM_TEST] = 1;
+               etest->flags |= ETH_TEST_FL_FAILED;
+       }
 }
 
 static int qede_set_tunable(struct net_device *dev,
@@ -1411,6 +1535,8 @@ static const struct ethtool_ops qede_ethtool_ops = {
        .get_drvinfo = qede_get_drvinfo,
        .get_regs_len = qede_get_regs_len,
        .get_regs = qede_get_regs,
+       .get_wol = qede_get_wol,
+       .set_wol = qede_set_wol,
        .get_msglevel = qede_get_msglevel,
        .set_msglevel = qede_set_msglevel,
        .nway_reset = qede_nway_reset,
index 4f298656bf47409e966a0a7cf5377ff089c0efd5..faeaa9f3b197b58422f4d0308230f95e328ea0fb 100644 (file)
@@ -94,11 +94,26 @@ static int qede_probe(struct pci_dev *pdev, const struct pci_device_id *id);
 
 #define TX_TIMEOUT             (5 * HZ)
 
+/* Utilize last protocol index for XDP */
+#define XDP_PI 11
+
 static void qede_remove(struct pci_dev *pdev);
-static int qede_alloc_rx_buffer(struct qede_dev *edev,
-                               struct qede_rx_queue *rxq);
+static void qede_shutdown(struct pci_dev *pdev);
 static void qede_link_update(void *dev, struct qed_link_output *link);
 
+/* The qede lock is used to protect driver state change and driver flows that
+ * are not reentrant.
+ */
+void __qede_lock(struct qede_dev *edev)
+{
+       mutex_lock(&edev->qede_lock);
+}
+
+void __qede_unlock(struct qede_dev *edev)
+{
+       mutex_unlock(&edev->qede_lock);
+}
+
 #ifdef CONFIG_QED_SRIOV
 static int qede_set_vf_vlan(struct net_device *ndev, int vf, u16 vlan, u8 qos,
                            __be16 vlan_proto)
@@ -166,6 +181,7 @@ static struct pci_driver qede_pci_driver = {
        .id_table = qede_pci_tbl,
        .probe = qede_probe,
        .remove = qede_remove,
+       .shutdown = qede_shutdown,
 #ifdef CONFIG_QED_SRIOV
        .sriov_configure = qede_sriov_configure,
 #endif
@@ -288,12 +304,12 @@ static int qede_free_tx_pkt(struct qede_dev *edev,
                            struct qede_tx_queue *txq, int *len)
 {
        u16 idx = txq->sw_tx_cons & NUM_TX_BDS_MAX;
-       struct sk_buff *skb = txq->sw_tx_ring[idx].skb;
+       struct sk_buff *skb = txq->sw_tx_ring.skbs[idx].skb;
        struct eth_tx_1st_bd *first_bd;
        struct eth_tx_bd *tx_data_bd;
        int bds_consumed = 0;
        int nbds;
-       bool data_split = txq->sw_tx_ring[idx].flags & QEDE_TSO_SPLIT_BD;
+       bool data_split = txq->sw_tx_ring.skbs[idx].flags & QEDE_TSO_SPLIT_BD;
        int i, split_bd_len = 0;
 
        if (unlikely(!skb)) {
@@ -317,8 +333,8 @@ static int qede_free_tx_pkt(struct qede_dev *edev,
                split_bd_len = BD_UNMAP_LEN(split);
                bds_consumed++;
        }
-       dma_unmap_page(&edev->pdev->dev, BD_UNMAP_ADDR(first_bd),
-                      BD_UNMAP_LEN(first_bd) + split_bd_len, DMA_TO_DEVICE);
+       dma_unmap_single(&edev->pdev->dev, BD_UNMAP_ADDR(first_bd),
+                        BD_UNMAP_LEN(first_bd) + split_bd_len, DMA_TO_DEVICE);
 
        /* Unmap the data of the skb frags */
        for (i = 0; i < skb_shinfo(skb)->nr_frags; i++, bds_consumed++) {
@@ -333,20 +349,19 @@ static int qede_free_tx_pkt(struct qede_dev *edev,
 
        /* Free skb */
        dev_kfree_skb_any(skb);
-       txq->sw_tx_ring[idx].skb = NULL;
-       txq->sw_tx_ring[idx].flags = 0;
+       txq->sw_tx_ring.skbs[idx].skb = NULL;
+       txq->sw_tx_ring.skbs[idx].flags = 0;
 
        return 0;
 }
 
 /* Unmap the data and free skb when mapping failed during start_xmit */
-static void qede_free_failed_tx_pkt(struct qede_dev *edev,
-                                   struct qede_tx_queue *txq,
+static void qede_free_failed_tx_pkt(struct qede_tx_queue *txq,
                                    struct eth_tx_1st_bd *first_bd,
                                    int nbd, bool data_split)
 {
        u16 idx = txq->sw_tx_prod & NUM_TX_BDS_MAX;
-       struct sk_buff *skb = txq->sw_tx_ring[idx].skb;
+       struct sk_buff *skb = txq->sw_tx_ring.skbs[idx].skb;
        struct eth_tx_bd *tx_data_bd;
        int i, split_bd_len = 0;
 
@@ -363,15 +378,15 @@ static void qede_free_failed_tx_pkt(struct qede_dev *edev,
                nbd--;
        }
 
-       dma_unmap_page(&edev->pdev->dev, BD_UNMAP_ADDR(first_bd),
-                      BD_UNMAP_LEN(first_bd) + split_bd_len, DMA_TO_DEVICE);
+       dma_unmap_single(txq->dev, BD_UNMAP_ADDR(first_bd),
+                        BD_UNMAP_LEN(first_bd) + split_bd_len, DMA_TO_DEVICE);
 
        /* Unmap the data of the skb frags */
        for (i = 0; i < nbd; i++) {
                tx_data_bd = (struct eth_tx_bd *)
                        qed_chain_produce(&txq->tx_pbl);
                if (tx_data_bd->nbytes)
-                       dma_unmap_page(&edev->pdev->dev,
+                       dma_unmap_page(txq->dev,
                                       BD_UNMAP_ADDR(tx_data_bd),
                                       BD_UNMAP_LEN(tx_data_bd), DMA_TO_DEVICE);
        }
@@ -382,12 +397,11 @@ static void qede_free_failed_tx_pkt(struct qede_dev *edev,
 
        /* Free skb */
        dev_kfree_skb_any(skb);
-       txq->sw_tx_ring[idx].skb = NULL;
-       txq->sw_tx_ring[idx].flags = 0;
+       txq->sw_tx_ring.skbs[idx].skb = NULL;
+       txq->sw_tx_ring.skbs[idx].flags = 0;
 }
 
-static u32 qede_xmit_type(struct qede_dev *edev,
-                         struct sk_buff *skb, int *ipv6_ext)
+static u32 qede_xmit_type(struct sk_buff *skb, int *ipv6_ext)
 {
        u32 rc = XMIT_L4_CSUM;
        __be16 l3_proto;
@@ -454,18 +468,16 @@ static void qede_set_params_for_ipv6_ext(struct sk_buff *skb,
        second_bd->data.bitfields2 = cpu_to_le16(bd2_bits2);
 }
 
-static int map_frag_to_bd(struct qede_dev *edev,
+static int map_frag_to_bd(struct qede_tx_queue *txq,
                          skb_frag_t *frag, struct eth_tx_bd *bd)
 {
        dma_addr_t mapping;
 
        /* Map skb non-linear frag data for DMA */
-       mapping = skb_frag_dma_map(&edev->pdev->dev, frag, 0,
+       mapping = skb_frag_dma_map(txq->dev, frag, 0,
                                   skb_frag_size(frag), DMA_TO_DEVICE);
-       if (unlikely(dma_mapping_error(&edev->pdev->dev, mapping))) {
-               DP_NOTICE(edev, "Unable to map frag - dropping packet\n");
+       if (unlikely(dma_mapping_error(txq->dev, mapping)))
                return -ENOMEM;
-       }
 
        /* Setup the data pointer of the frag data */
        BD_SET_UNMAP_ADDR_LEN(bd, mapping, skb_frag_size(frag));
@@ -485,8 +497,7 @@ static u16 qede_get_skb_hlen(struct sk_buff *skb, bool is_encap_pkt)
 
 /* +2 for 1st BD for headers and 2nd BD for headlen (if required) */
 #if ((MAX_SKB_FRAGS + 2) > ETH_TX_MAX_BDS_PER_NON_LSO_PACKET)
-static bool qede_pkt_req_lin(struct qede_dev *edev, struct sk_buff *skb,
-                            u8 xmit_type)
+static bool qede_pkt_req_lin(struct sk_buff *skb, u8 xmit_type)
 {
        int allowed_frags = ETH_TX_MAX_BDS_PER_NON_LSO_PACKET - 1;
 
@@ -522,6 +533,47 @@ static inline void qede_update_tx_producer(struct qede_tx_queue *txq)
        mmiowb();
 }
 
+static int qede_xdp_xmit(struct qede_dev *edev, struct qede_fastpath *fp,
+                        struct sw_rx_data *metadata, u16 padding, u16 length)
+{
+       struct qede_tx_queue *txq = fp->xdp_tx;
+       u16 idx = txq->sw_tx_prod & NUM_TX_BDS_MAX;
+       struct eth_tx_1st_bd *first_bd;
+
+       if (!qed_chain_get_elem_left(&txq->tx_pbl)) {
+               txq->stopped_cnt++;
+               return -ENOMEM;
+       }
+
+       first_bd = (struct eth_tx_1st_bd *)qed_chain_produce(&txq->tx_pbl);
+
+       memset(first_bd, 0, sizeof(*first_bd));
+       first_bd->data.bd_flags.bitfields =
+           BIT(ETH_TX_1ST_BD_FLAGS_START_BD_SHIFT);
+       first_bd->data.bitfields |=
+           (length & ETH_TX_DATA_1ST_BD_PKT_LEN_MASK) <<
+           ETH_TX_DATA_1ST_BD_PKT_LEN_SHIFT;
+       first_bd->data.nbds = 1;
+
+       /* We can safely ignore the offset, as it's 0 for XDP */
+       BD_SET_UNMAP_ADDR_LEN(first_bd, metadata->mapping + padding, length);
+
+       /* Synchronize the buffer back to device, as program [probably]
+        * has changed it.
+        */
+       dma_sync_single_for_device(&edev->pdev->dev,
+                                  metadata->mapping + padding,
+                                  length, PCI_DMA_TODEVICE);
+
+       txq->sw_tx_ring.pages[idx] = metadata->data;
+       txq->sw_tx_prod++;
+
+       /* Mark the fastpath for future XDP doorbell */
+       fp->xdp_xmit = 1;
+
+       return 0;
+}
+
 /* Main transmit function */
 static netdev_tx_t qede_start_xmit(struct sk_buff *skb,
                                   struct net_device *ndev)
@@ -545,15 +597,15 @@ static netdev_tx_t qede_start_xmit(struct sk_buff *skb,
        /* Get tx-queue context and netdev index */
        txq_index = skb_get_queue_mapping(skb);
        WARN_ON(txq_index >= QEDE_TSS_COUNT(edev));
-       txq = QEDE_TX_QUEUE(edev, txq_index);
+       txq = edev->fp_array[edev->fp_num_rx + txq_index].txq;
        netdev_txq = netdev_get_tx_queue(ndev, txq_index);
 
        WARN_ON(qed_chain_get_elem_left(&txq->tx_pbl) < (MAX_SKB_FRAGS + 1));
 
-       xmit_type = qede_xmit_type(edev, skb, &ipv6_ext);
+       xmit_type = qede_xmit_type(skb, &ipv6_ext);
 
 #if ((MAX_SKB_FRAGS + 2) > ETH_TX_MAX_BDS_PER_NON_LSO_PACKET)
-       if (qede_pkt_req_lin(edev, skb, xmit_type)) {
+       if (qede_pkt_req_lin(skb, xmit_type)) {
                if (skb_linearize(skb)) {
                        DP_NOTICE(edev,
                                  "SKB linearization failed - silently dropping this SKB\n");
@@ -565,7 +617,7 @@ static netdev_tx_t qede_start_xmit(struct sk_buff *skb,
 
        /* Fill the entry in the SW ring and the BDs in the FW ring */
        idx = txq->sw_tx_prod & NUM_TX_BDS_MAX;
-       txq->sw_tx_ring[idx].skb = skb;
+       txq->sw_tx_ring.skbs[idx].skb = skb;
        first_bd = (struct eth_tx_1st_bd *)
                   qed_chain_produce(&txq->tx_pbl);
        memset(first_bd, 0, sizeof(*first_bd));
@@ -573,11 +625,11 @@ static netdev_tx_t qede_start_xmit(struct sk_buff *skb,
                1 << ETH_TX_1ST_BD_FLAGS_START_BD_SHIFT;
 
        /* Map skb linear data for DMA and set in the first BD */
-       mapping = dma_map_single(&edev->pdev->dev, skb->data,
+       mapping = dma_map_single(txq->dev, skb->data,
                                 skb_headlen(skb), DMA_TO_DEVICE);
-       if (unlikely(dma_mapping_error(&edev->pdev->dev, mapping))) {
+       if (unlikely(dma_mapping_error(txq->dev, mapping))) {
                DP_NOTICE(edev, "SKB mapping failed\n");
-               qede_free_failed_tx_pkt(edev, txq, first_bd, 0, false);
+               qede_free_failed_tx_pkt(txq, first_bd, 0, false);
                qede_update_tx_producer(txq);
                return NETDEV_TX_OK;
        }
@@ -685,7 +737,7 @@ static netdev_tx_t qede_start_xmit(struct sk_buff *skb,
                        /* this marks the BD as one that has no
                         * individual mapping
                         */
-                       txq->sw_tx_ring[idx].flags |= QEDE_TSO_SPLIT_BD;
+                       txq->sw_tx_ring.skbs[idx].flags |= QEDE_TSO_SPLIT_BD;
 
                        first_bd->nbytes = cpu_to_le16(hlen);
 
@@ -701,12 +753,11 @@ static netdev_tx_t qede_start_xmit(struct sk_buff *skb,
        /* Handle fragmented skb */
        /* special handle for frags inside 2nd and 3rd bds.. */
        while (tx_data_bd && frag_idx < skb_shinfo(skb)->nr_frags) {
-               rc = map_frag_to_bd(edev,
+               rc = map_frag_to_bd(txq,
                                    &skb_shinfo(skb)->frags[frag_idx],
                                    tx_data_bd);
                if (rc) {
-                       qede_free_failed_tx_pkt(edev, txq, first_bd, nbd,
-                                               data_split);
+                       qede_free_failed_tx_pkt(txq, first_bd, nbd, data_split);
                        qede_update_tx_producer(txq);
                        return NETDEV_TX_OK;
                }
@@ -726,12 +777,11 @@ static netdev_tx_t qede_start_xmit(struct sk_buff *skb,
 
                memset(tx_data_bd, 0, sizeof(*tx_data_bd));
 
-               rc = map_frag_to_bd(edev,
+               rc = map_frag_to_bd(txq,
                                    &skb_shinfo(skb)->frags[frag_idx],
                                    tx_data_bd);
                if (rc) {
-                       qede_free_failed_tx_pkt(edev, txq, first_bd, nbd,
-                                               data_split);
+                       qede_free_failed_tx_pkt(txq, first_bd, nbd, data_split);
                        qede_update_tx_producer(txq);
                        return NETDEV_TX_OK;
                }
@@ -796,6 +846,27 @@ int qede_txq_has_work(struct qede_tx_queue *txq)
        return hw_bd_cons != qed_chain_get_cons_idx(&txq->tx_pbl);
 }
 
+static void qede_xdp_tx_int(struct qede_dev *edev, struct qede_tx_queue *txq)
+{
+       struct eth_tx_1st_bd *bd;
+       u16 hw_bd_cons;
+
+       hw_bd_cons = le16_to_cpu(*txq->hw_cons_ptr);
+       barrier();
+
+       while (hw_bd_cons != qed_chain_get_cons_idx(&txq->tx_pbl)) {
+               bd = (struct eth_tx_1st_bd *)qed_chain_consume(&txq->tx_pbl);
+
+               dma_unmap_single(&edev->pdev->dev, BD_UNMAP_ADDR(bd),
+                                PAGE_SIZE, DMA_BIDIRECTIONAL);
+               __free_page(txq->sw_tx_ring.pages[txq->sw_tx_cons &
+                                                 NUM_TX_BDS_MAX]);
+
+               txq->sw_tx_cons++;
+               txq->xmit_pkts++;
+       }
+}
+
 static int qede_tx_int(struct qede_dev *edev, struct qede_tx_queue *txq)
 {
        struct netdev_queue *netdev_txq;
@@ -879,16 +950,6 @@ bool qede_has_rx_work(struct qede_rx_queue *rxq)
        return hw_comp_cons != sw_comp_cons;
 }
 
-static bool qede_has_tx_work(struct qede_fastpath *fp)
-{
-       u8 tc;
-
-       for (tc = 0; tc < fp->edev->num_tc; tc++)
-               if (qede_txq_has_work(&fp->txqs[tc]))
-                       return true;
-       return false;
-}
-
 static inline void qede_rx_bd_ring_consume(struct qede_rx_queue *rxq)
 {
        qed_chain_consume(&rxq->rx_bd_ring);
@@ -898,8 +959,7 @@ static inline void qede_rx_bd_ring_consume(struct qede_rx_queue *rxq)
 /* This function reuses the buffer(from an offset) from
  * consumer index to producer index in the bd ring
  */
-static inline void qede_reuse_page(struct qede_dev *edev,
-                                  struct qede_rx_queue *rxq,
+static inline void qede_reuse_page(struct qede_rx_queue *rxq,
                                   struct sw_rx_data *curr_cons)
 {
        struct eth_rx_bd *rx_bd_prod = qed_chain_produce(&rxq->rx_bd_ring);
@@ -921,27 +981,62 @@ static inline void qede_reuse_page(struct qede_dev *edev,
 /* In case of allocation failures reuse buffers
  * from consumer index to produce buffers for firmware
  */
-void qede_recycle_rx_bd_ring(struct qede_rx_queue *rxq,
-                            struct qede_dev *edev, u8 count)
+void qede_recycle_rx_bd_ring(struct qede_rx_queue *rxq, u8 count)
 {
        struct sw_rx_data *curr_cons;
 
        for (; count > 0; count--) {
                curr_cons = &rxq->sw_rx_ring[rxq->sw_rx_cons & NUM_RX_BDS_MAX];
-               qede_reuse_page(edev, rxq, curr_cons);
+               qede_reuse_page(rxq, curr_cons);
                qede_rx_bd_ring_consume(rxq);
        }
 }
 
-static inline int qede_realloc_rx_buffer(struct qede_dev *edev,
-                                        struct qede_rx_queue *rxq,
+static int qede_alloc_rx_buffer(struct qede_rx_queue *rxq)
+{
+       struct sw_rx_data *sw_rx_data;
+       struct eth_rx_bd *rx_bd;
+       dma_addr_t mapping;
+       struct page *data;
+
+       data = alloc_pages(GFP_ATOMIC, 0);
+       if (unlikely(!data))
+               return -ENOMEM;
+
+       /* Map the entire page as it would be used
+        * for multiple RX buffer segment size mapping.
+        */
+       mapping = dma_map_page(rxq->dev, data, 0,
+                              PAGE_SIZE, rxq->data_direction);
+       if (unlikely(dma_mapping_error(rxq->dev, mapping))) {
+               __free_page(data);
+               return -ENOMEM;
+       }
+
+       sw_rx_data = &rxq->sw_rx_ring[rxq->sw_rx_prod & NUM_RX_BDS_MAX];
+       sw_rx_data->page_offset = 0;
+       sw_rx_data->data = data;
+       sw_rx_data->mapping = mapping;
+
+       /* Advance PROD and get BD pointer */
+       rx_bd = (struct eth_rx_bd *)qed_chain_produce(&rxq->rx_bd_ring);
+       WARN_ON(!rx_bd);
+       rx_bd->addr.hi = cpu_to_le32(upper_32_bits(mapping));
+       rx_bd->addr.lo = cpu_to_le32(lower_32_bits(mapping));
+
+       rxq->sw_rx_prod++;
+
+       return 0;
+}
+
+static inline int qede_realloc_rx_buffer(struct qede_rx_queue *rxq,
                                         struct sw_rx_data *curr_cons)
 {
        /* Move to the next segment in the page */
        curr_cons->page_offset += rxq->rx_buf_seg_size;
 
        if (curr_cons->page_offset == PAGE_SIZE) {
-               if (unlikely(qede_alloc_rx_buffer(edev, rxq))) {
+               if (unlikely(qede_alloc_rx_buffer(rxq))) {
                        /* Since we failed to allocate new buffer
                         * current buffer can be used again.
                         */
@@ -950,22 +1045,21 @@ static inline int qede_realloc_rx_buffer(struct qede_dev *edev,
                        return -ENOMEM;
                }
 
-               dma_unmap_page(&edev->pdev->dev, curr_cons->mapping,
-                              PAGE_SIZE, DMA_FROM_DEVICE);
+               dma_unmap_page(rxq->dev, curr_cons->mapping,
+                              PAGE_SIZE, rxq->data_direction);
        } else {
                /* Increment refcount of the page as we don't want
                 * network stack to take the ownership of the page
                 * which can be recycled multiple times by the driver.
                 */
                page_ref_inc(curr_cons->data);
-               qede_reuse_page(edev, rxq, curr_cons);
+               qede_reuse_page(rxq, curr_cons);
        }
 
        return 0;
 }
 
-static inline void qede_update_rx_prod(struct qede_dev *edev,
-                                      struct qede_rx_queue *rxq)
+void qede_update_rx_prod(struct qede_dev *edev, struct qede_rx_queue *rxq)
 {
        u16 bd_prod = qed_chain_get_prod_idx(&rxq->rx_bd_ring);
        u16 cqe_prod = qed_chain_get_prod_idx(&rxq->rx_comp_ring);
@@ -993,22 +1087,20 @@ static inline void qede_update_rx_prod(struct qede_dev *edev,
        mmiowb();
 }
 
-static u32 qede_get_rxhash(struct qede_dev *edev,
-                          u8 bitfields,
-                          __le32 rss_hash, enum pkt_hash_types *rxhash_type)
+static void qede_get_rxhash(struct sk_buff *skb, u8 bitfields, __le32 rss_hash)
 {
+       enum pkt_hash_types hash_type = PKT_HASH_TYPE_NONE;
        enum rss_hash_type htype;
+       u32 hash = 0;
 
        htype = GET_FIELD(bitfields, ETH_FAST_PATH_RX_REG_CQE_RSS_HASH_TYPE);
-
-       if ((edev->ndev->features & NETIF_F_RXHASH) && htype) {
-               *rxhash_type = ((htype == RSS_HASH_TYPE_IPV4) ||
-                               (htype == RSS_HASH_TYPE_IPV6)) ?
-                               PKT_HASH_TYPE_L3 : PKT_HASH_TYPE_L4;
-               return le32_to_cpu(rss_hash);
+       if (htype) {
+               hash_type = ((htype == RSS_HASH_TYPE_IPV4) ||
+                            (htype == RSS_HASH_TYPE_IPV6)) ?
+                           PKT_HASH_TYPE_L3 : PKT_HASH_TYPE_L4;
+               hash = le32_to_cpu(rss_hash);
        }
-       *rxhash_type = PKT_HASH_TYPE_NONE;
-       return 0;
+       skb_set_hash(skb, hash, hash_type);
 }
 
 static void qede_set_skb_csum(struct sk_buff *skb, u8 csum_flag)
@@ -1024,12 +1116,14 @@ static void qede_set_skb_csum(struct sk_buff *skb, u8 csum_flag)
 
 static inline void qede_skb_receive(struct qede_dev *edev,
                                    struct qede_fastpath *fp,
+                                   struct qede_rx_queue *rxq,
                                    struct sk_buff *skb, u16 vlan_tag)
 {
        if (vlan_tag)
                __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);
 
        napi_gro_receive(&fp->napi, skb);
+       fp->rxq->rcv_pkts++;
 }
 
 static void qede_set_gro_params(struct qede_dev *edev,
@@ -1057,7 +1151,7 @@ static int qede_fill_frag_skb(struct qede_dev *edev,
        struct qede_agg_info *tpa_info = &rxq->tpa_info[tpa_agg_index];
        struct sk_buff *skb = tpa_info->skb;
 
-       if (unlikely(tpa_info->agg_state != QEDE_AGG_STATE_START))
+       if (unlikely(tpa_info->state != QEDE_AGG_STATE_START))
                goto out;
 
        /* Add one frag and update the appropriate fields in the skb */
@@ -1065,7 +1159,7 @@ static int qede_fill_frag_skb(struct qede_dev *edev,
                           current_bd->data, current_bd->page_offset,
                           len_on_bd);
 
-       if (unlikely(qede_realloc_rx_buffer(edev, rxq, current_bd))) {
+       if (unlikely(qede_realloc_rx_buffer(rxq, current_bd))) {
                /* Incr page ref count to reuse on allocation failure
                 * so that it doesn't get freed while freeing SKB.
                 */
@@ -1083,8 +1177,9 @@ static int qede_fill_frag_skb(struct qede_dev *edev,
        return 0;
 
 out:
-       tpa_info->agg_state = QEDE_AGG_STATE_ERROR;
-       qede_recycle_rx_bd_ring(rxq, edev, 1);
+       tpa_info->state = QEDE_AGG_STATE_ERROR;
+       qede_recycle_rx_bd_ring(rxq, 1);
+
        return -ENOMEM;
 }
 
@@ -1095,12 +1190,10 @@ static void qede_tpa_start(struct qede_dev *edev,
        struct qede_agg_info *tpa_info = &rxq->tpa_info[cqe->tpa_agg_index];
        struct eth_rx_bd *rx_bd_cons = qed_chain_consume(&rxq->rx_bd_ring);
        struct eth_rx_bd *rx_bd_prod = qed_chain_produce(&rxq->rx_bd_ring);
-       struct sw_rx_data *replace_buf = &tpa_info->replace_buf;
-       dma_addr_t mapping = tpa_info->replace_buf_mapping;
+       struct sw_rx_data *replace_buf = &tpa_info->buffer;
+       dma_addr_t mapping = tpa_info->buffer_mapping;
        struct sw_rx_data *sw_rx_data_cons;
        struct sw_rx_data *sw_rx_data_prod;
-       enum pkt_hash_types rxhash_type;
-       u32 rxhash;
 
        sw_rx_data_cons = &rxq->sw_rx_ring[rxq->sw_rx_cons & NUM_RX_BDS_MAX];
        sw_rx_data_prod = &rxq->sw_rx_ring[rxq->sw_rx_prod & NUM_RX_BDS_MAX];
@@ -1121,11 +1214,11 @@ static void qede_tpa_start(struct qede_dev *edev,
        /* move partial skb from cons to pool (don't unmap yet)
         * save mapping, incase we drop the packet later on.
         */
-       tpa_info->start_buf = *sw_rx_data_cons;
+       tpa_info->buffer = *sw_rx_data_cons;
        mapping = HILO_U64(le32_to_cpu(rx_bd_cons->addr.hi),
                           le32_to_cpu(rx_bd_cons->addr.lo));
 
-       tpa_info->start_buf_mapping = mapping;
+       tpa_info->buffer_mapping = mapping;
        rxq->sw_rx_cons++;
 
        /* set tpa state to start only if we are able to allocate skb
@@ -1136,27 +1229,27 @@ static void qede_tpa_start(struct qede_dev *edev,
                                         le16_to_cpu(cqe->len_on_first_bd));
        if (unlikely(!tpa_info->skb)) {
                DP_NOTICE(edev, "Failed to allocate SKB for gro\n");
-               tpa_info->agg_state = QEDE_AGG_STATE_ERROR;
+               tpa_info->state = QEDE_AGG_STATE_ERROR;
                goto cons_buf;
        }
 
-       skb_put(tpa_info->skb, le16_to_cpu(cqe->len_on_first_bd));
-       memcpy(&tpa_info->start_cqe, cqe, sizeof(tpa_info->start_cqe));
-
        /* Start filling in the aggregation info */
+       skb_put(tpa_info->skb, le16_to_cpu(cqe->len_on_first_bd));
        tpa_info->frag_id = 0;
-       tpa_info->agg_state = QEDE_AGG_STATE_START;
+       tpa_info->state = QEDE_AGG_STATE_START;
 
-       rxhash = qede_get_rxhash(edev, cqe->bitfields,
-                                cqe->rss_hash, &rxhash_type);
-       skb_set_hash(tpa_info->skb, rxhash, rxhash_type);
+       /* Store some information from first CQE */
+       tpa_info->start_cqe_placement_offset = cqe->placement_offset;
+       tpa_info->start_cqe_bd_len = le16_to_cpu(cqe->len_on_first_bd);
        if ((le16_to_cpu(cqe->pars_flags.flags) >>
             PARSING_AND_ERR_FLAGS_TAG8021QEXIST_SHIFT) &
-                   PARSING_AND_ERR_FLAGS_TAG8021QEXIST_MASK)
+           PARSING_AND_ERR_FLAGS_TAG8021QEXIST_MASK)
                tpa_info->vlan_tag = le16_to_cpu(cqe->vlan_tag);
        else
                tpa_info->vlan_tag = 0;
 
+       qede_get_rxhash(tpa_info->skb, cqe->bitfields, cqe->rss_hash);
+
        /* This is needed in order to enable forwarding support */
        qede_set_gro_params(edev, tpa_info->skb, cqe);
 
@@ -1168,7 +1261,7 @@ cons_buf: /* We still need to handle bd_len_list to consume buffers */
        if (unlikely(cqe->ext_bd_len_list[1])) {
                DP_ERR(edev,
                       "Unlikely - got a TPA aggregation with more than one ext_bd_len_list entry in the TPA start\n");
-               tpa_info->agg_state = QEDE_AGG_STATE_ERROR;
+               tpa_info->state = QEDE_AGG_STATE_ERROR;
        }
 }
 
@@ -1238,7 +1331,7 @@ static void qede_gro_receive(struct qede_dev *edev,
 
 send_skb:
        skb_record_rx_queue(skb, fp->rxq->rxq_id);
-       qede_skb_receive(edev, fp, skb, vlan_tag);
+       qede_skb_receive(edev, fp, fp->rxq, skb, vlan_tag);
 }
 
 static inline void qede_tpa_cont(struct qede_dev *edev,
@@ -1275,7 +1368,7 @@ static void qede_tpa_end(struct qede_dev *edev,
                DP_ERR(edev,
                       "Strange - TPA emd with more than a single len_list entry\n");
 
-       if (unlikely(tpa_info->agg_state != QEDE_AGG_STATE_START))
+       if (unlikely(tpa_info->state != QEDE_AGG_STATE_START))
                goto err;
 
        /* Sanity */
@@ -1289,14 +1382,9 @@ static void qede_tpa_end(struct qede_dev *edev,
                       le16_to_cpu(cqe->total_packet_len), skb->len);
 
        memcpy(skb->data,
-              page_address(tpa_info->start_buf.data) +
-               tpa_info->start_cqe.placement_offset +
-               tpa_info->start_buf.page_offset,
-              le16_to_cpu(tpa_info->start_cqe.len_on_first_bd));
-
-       /* Recycle [mapped] start buffer for the next replacement */
-       tpa_info->replace_buf = tpa_info->start_buf;
-       tpa_info->replace_buf_mapping = tpa_info->start_buf_mapping;
+              page_address(tpa_info->buffer.data) +
+              tpa_info->start_cqe_placement_offset +
+              tpa_info->buffer.page_offset, tpa_info->start_cqe_bd_len);
 
        /* Finalize the SKB */
        skb->protocol = eth_type_trans(skb, edev->ndev);
@@ -1309,18 +1397,11 @@ static void qede_tpa_end(struct qede_dev *edev,
 
        qede_gro_receive(edev, fp, skb, tpa_info->vlan_tag);
 
-       tpa_info->agg_state = QEDE_AGG_STATE_NONE;
+       tpa_info->state = QEDE_AGG_STATE_NONE;
 
        return;
 err:
-       /* The BD starting the aggregation is still mapped; Re-use it for
-        * future aggregations [as replacement buffer]
-        */
-       memcpy(&tpa_info->replace_buf, &tpa_info->start_buf,
-              sizeof(struct sw_rx_data));
-       tpa_info->replace_buf_mapping = tpa_info->start_buf_mapping;
-       tpa_info->start_buf.data = NULL;
-       tpa_info->agg_state = QEDE_AGG_STATE_NONE;
+       tpa_info->state = QEDE_AGG_STATE_NONE;
        dev_kfree_skb_any(tpa_info->skb);
        tpa_info->skb = NULL;
 }
@@ -1402,238 +1483,364 @@ static bool qede_pkt_is_ip_fragmented(struct eth_fast_path_rx_reg_cqe *cqe,
        return false;
 }
 
-static int qede_rx_int(struct qede_fastpath *fp, int budget)
+/* Return true iff packet is to be passed to stack */
+static bool qede_rx_xdp(struct qede_dev *edev,
+                       struct qede_fastpath *fp,
+                       struct qede_rx_queue *rxq,
+                       struct bpf_prog *prog,
+                       struct sw_rx_data *bd,
+                       struct eth_fast_path_rx_reg_cqe *cqe)
 {
-       struct qede_dev *edev = fp->edev;
-       struct qede_rx_queue *rxq = fp->rxq;
+       u16 len = le16_to_cpu(cqe->len_on_first_bd);
+       struct xdp_buff xdp;
+       enum xdp_action act;
 
-       u16 hw_comp_cons, sw_comp_cons, sw_rx_index, parse_flag;
-       int rx_pkt = 0;
-       u8 csum_flag;
+       xdp.data = page_address(bd->data) + cqe->placement_offset;
+       xdp.data_end = xdp.data + len;
 
-       hw_comp_cons = le16_to_cpu(*rxq->hw_cons_ptr);
-       sw_comp_cons = qed_chain_get_cons_idx(&rxq->rx_comp_ring);
-
-       /* Memory barrier to prevent the CPU from doing speculative reads of CQE
-        * / BD in the while-loop before reading hw_comp_cons. If the CQE is
-        * read before it is written by FW, then FW writes CQE and SB, and then
-        * the CPU reads the hw_comp_cons, it will use an old CQE.
+       /* Queues always have a full reset currently, so for the time
+        * being until there's atomic program replace just mark read
+        * side for map helpers.
         */
-       rmb();
+       rcu_read_lock();
+       act = bpf_prog_run_xdp(prog, &xdp);
+       rcu_read_unlock();
 
-       /* Loop to complete all indicated BDs */
-       while (sw_comp_cons != hw_comp_cons) {
-               struct eth_fast_path_rx_reg_cqe *fp_cqe;
-               enum pkt_hash_types rxhash_type;
-               enum eth_rx_cqe_type cqe_type;
-               struct sw_rx_data *sw_rx_data;
-               union eth_rx_cqe *cqe;
-               struct sk_buff *skb;
-               struct page *data;
-               __le16 flags;
-               u16 len, pad;
-               u32 rx_hash;
-
-               /* Get the CQE from the completion ring */
-               cqe = (union eth_rx_cqe *)
-                       qed_chain_consume(&rxq->rx_comp_ring);
-               cqe_type = cqe->fast_path_regular.type;
-
-               if (unlikely(cqe_type == ETH_RX_CQE_TYPE_SLOW_PATH)) {
-                       edev->ops->eth_cqe_completion(
-                                       edev->cdev, fp->id,
-                                       (struct eth_slow_path_rx_cqe *)cqe);
-                       goto next_cqe;
+       if (act == XDP_PASS)
+               return true;
+
+       /* Count number of packets not to be passed to stack */
+       rxq->xdp_no_pass++;
+
+       switch (act) {
+       case XDP_TX:
+               /* We need the replacement buffer before transmit. */
+               if (qede_alloc_rx_buffer(rxq)) {
+                       qede_recycle_rx_bd_ring(rxq, 1);
+                       return false;
                }
 
-               if (cqe_type != ETH_RX_CQE_TYPE_REGULAR) {
-                       switch (cqe_type) {
-                       case ETH_RX_CQE_TYPE_TPA_START:
-                               qede_tpa_start(edev, rxq,
-                                              &cqe->fast_path_tpa_start);
-                               goto next_cqe;
-                       case ETH_RX_CQE_TYPE_TPA_CONT:
-                               qede_tpa_cont(edev, rxq,
-                                             &cqe->fast_path_tpa_cont);
-                               goto next_cqe;
-                       case ETH_RX_CQE_TYPE_TPA_END:
-                               qede_tpa_end(edev, fp,
-                                            &cqe->fast_path_tpa_end);
-                               goto next_rx_only;
-                       default:
-                               break;
-                       }
+               /* Now if there's a transmission problem, we'd still have to
+                * throw current buffer, as replacement was already allocated.
+                */
+               if (qede_xdp_xmit(edev, fp, bd, cqe->placement_offset, len)) {
+                       dma_unmap_page(rxq->dev, bd->mapping,
+                                      PAGE_SIZE, DMA_BIDIRECTIONAL);
+                       __free_page(bd->data);
                }
 
-               /* Get the data from the SW ring */
-               sw_rx_index = rxq->sw_rx_cons & NUM_RX_BDS_MAX;
-               sw_rx_data = &rxq->sw_rx_ring[sw_rx_index];
-               data = sw_rx_data->data;
-
-               fp_cqe = &cqe->fast_path_regular;
-               len =  le16_to_cpu(fp_cqe->len_on_first_bd);
-               pad = fp_cqe->placement_offset;
-               flags = cqe->fast_path_regular.pars_flags.flags;
-
-               /* If this is an error packet then drop it */
-               parse_flag = le16_to_cpu(flags);
-
-               csum_flag = qede_check_csum(parse_flag);
-               if (unlikely(csum_flag == QEDE_CSUM_ERROR)) {
-                       if (qede_pkt_is_ip_fragmented(&cqe->fast_path_regular,
-                                                     parse_flag)) {
-                               rxq->rx_ip_frags++;
-                               goto alloc_skb;
-                       }
+               /* Regardless, we've consumed an Rx BD */
+               qede_rx_bd_ring_consume(rxq);
+               return false;
 
-                       DP_NOTICE(edev,
-                                 "CQE in CONS = %u has error, flags = %x, dropping incoming packet\n",
-                                 sw_comp_cons, parse_flag);
-                       rxq->rx_hw_errors++;
-                       qede_recycle_rx_bd_ring(rxq, edev, fp_cqe->bd_num);
-                       goto next_cqe;
-               }
+       default:
+               bpf_warn_invalid_xdp_action(act);
+       case XDP_ABORTED:
+       case XDP_DROP:
+               qede_recycle_rx_bd_ring(rxq, cqe->bd_num);
+       }
 
-alloc_skb:
-               skb = netdev_alloc_skb(edev->ndev, QEDE_RX_HDR_SIZE);
-               if (unlikely(!skb)) {
-                       DP_NOTICE(edev,
-                                 "skb allocation failed, dropping incoming packet\n");
-                       qede_recycle_rx_bd_ring(rxq, edev, fp_cqe->bd_num);
-                       rxq->rx_alloc_errors++;
-                       goto next_cqe;
+       return false;
+}
+
+static struct sk_buff *qede_rx_allocate_skb(struct qede_dev *edev,
+                                           struct qede_rx_queue *rxq,
+                                           struct sw_rx_data *bd, u16 len,
+                                           u16 pad)
+{
+       unsigned int offset = bd->page_offset;
+       struct skb_frag_struct *frag;
+       struct page *page = bd->data;
+       unsigned int pull_len;
+       struct sk_buff *skb;
+       unsigned char *va;
+
+       /* Allocate a new SKB with a sufficient large header len */
+       skb = netdev_alloc_skb(edev->ndev, QEDE_RX_HDR_SIZE);
+       if (unlikely(!skb))
+               return NULL;
+
+       /* Copy data into SKB - if it's small, we can simply copy it and
+        * re-use the already allcoated & mapped memory.
+        */
+       if (len + pad <= edev->rx_copybreak) {
+               memcpy(skb_put(skb, len),
+                      page_address(page) + pad + offset, len);
+               qede_reuse_page(rxq, bd);
+               goto out;
+       }
+
+       frag = &skb_shinfo(skb)->frags[0];
+
+       skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
+                       page, pad + offset, len, rxq->rx_buf_seg_size);
+
+       va = skb_frag_address(frag);
+       pull_len = eth_get_headlen(va, QEDE_RX_HDR_SIZE);
+
+       /* Align the pull_len to optimize memcpy */
+       memcpy(skb->data, va, ALIGN(pull_len, sizeof(long)));
+
+       /* Correct the skb & frag sizes offset after the pull */
+       skb_frag_size_sub(frag, pull_len);
+       frag->page_offset += pull_len;
+       skb->data_len -= pull_len;
+       skb->tail += pull_len;
+
+       if (unlikely(qede_realloc_rx_buffer(rxq, bd))) {
+               /* Incr page ref count to reuse on allocation failure so
+                * that it doesn't get freed while freeing SKB [as its
+                * already mapped there].
+                */
+               page_ref_inc(page);
+               dev_kfree_skb_any(skb);
+               return NULL;
+       }
+
+out:
+       /* We've consumed the first BD and prepared an SKB */
+       qede_rx_bd_ring_consume(rxq);
+       return skb;
+}
+
+static int qede_rx_build_jumbo(struct qede_dev *edev,
+                              struct qede_rx_queue *rxq,
+                              struct sk_buff *skb,
+                              struct eth_fast_path_rx_reg_cqe *cqe,
+                              u16 first_bd_len)
+{
+       u16 pkt_len = le16_to_cpu(cqe->pkt_len);
+       struct sw_rx_data *bd;
+       u16 bd_cons_idx;
+       u8 num_frags;
+
+       pkt_len -= first_bd_len;
+
+       /* We've already used one BD for the SKB. Now take care of the rest */
+       for (num_frags = cqe->bd_num - 1; num_frags > 0; num_frags--) {
+               u16 cur_size = pkt_len > rxq->rx_buf_size ? rxq->rx_buf_size :
+                   pkt_len;
+
+               if (unlikely(!cur_size)) {
+                       DP_ERR(edev,
+                              "Still got %d BDs for mapping jumbo, but length became 0\n",
+                              num_frags);
+                       goto out;
                }
 
-               /* Copy data into SKB */
-               if (len + pad <= edev->rx_copybreak) {
-                       memcpy(skb_put(skb, len),
-                              page_address(data) + pad +
-                               sw_rx_data->page_offset, len);
-                       qede_reuse_page(edev, rxq, sw_rx_data);
+               /* We need a replacement buffer for each BD */
+               if (unlikely(qede_alloc_rx_buffer(rxq)))
+                       goto out;
+
+               /* Now that we've allocated the replacement buffer,
+                * we can safely consume the next BD and map it to the SKB.
+                */
+               bd_cons_idx = rxq->sw_rx_cons & NUM_RX_BDS_MAX;
+               bd = &rxq->sw_rx_ring[bd_cons_idx];
+               qede_rx_bd_ring_consume(rxq);
+
+               dma_unmap_page(rxq->dev, bd->mapping,
+                              PAGE_SIZE, DMA_FROM_DEVICE);
+
+               skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags++,
+                                  bd->data, 0, cur_size);
+
+               skb->truesize += PAGE_SIZE;
+               skb->data_len += cur_size;
+               skb->len += cur_size;
+               pkt_len -= cur_size;
+       }
+
+       if (unlikely(pkt_len))
+               DP_ERR(edev,
+                      "Mapped all BDs of jumbo, but still have %d bytes\n",
+                      pkt_len);
+
+out:
+       return num_frags;
+}
+
+static int qede_rx_process_tpa_cqe(struct qede_dev *edev,
+                                  struct qede_fastpath *fp,
+                                  struct qede_rx_queue *rxq,
+                                  union eth_rx_cqe *cqe,
+                                  enum eth_rx_cqe_type type)
+{
+       switch (type) {
+       case ETH_RX_CQE_TYPE_TPA_START:
+               qede_tpa_start(edev, rxq, &cqe->fast_path_tpa_start);
+               return 0;
+       case ETH_RX_CQE_TYPE_TPA_CONT:
+               qede_tpa_cont(edev, rxq, &cqe->fast_path_tpa_cont);
+               return 0;
+       case ETH_RX_CQE_TYPE_TPA_END:
+               qede_tpa_end(edev, fp, &cqe->fast_path_tpa_end);
+               return 1;
+       default:
+               return 0;
+       }
+}
+
+static int qede_rx_process_cqe(struct qede_dev *edev,
+                              struct qede_fastpath *fp,
+                              struct qede_rx_queue *rxq)
+{
+       struct bpf_prog *xdp_prog = READ_ONCE(rxq->xdp_prog);
+       struct eth_fast_path_rx_reg_cqe *fp_cqe;
+       u16 len, pad, bd_cons_idx, parse_flag;
+       enum eth_rx_cqe_type cqe_type;
+       union eth_rx_cqe *cqe;
+       struct sw_rx_data *bd;
+       struct sk_buff *skb;
+       __le16 flags;
+       u8 csum_flag;
+
+       /* Get the CQE from the completion ring */
+       cqe = (union eth_rx_cqe *)qed_chain_consume(&rxq->rx_comp_ring);
+       cqe_type = cqe->fast_path_regular.type;
+
+       /* Process an unlikely slowpath event */
+       if (unlikely(cqe_type == ETH_RX_CQE_TYPE_SLOW_PATH)) {
+               struct eth_slow_path_rx_cqe *sp_cqe;
+
+               sp_cqe = (struct eth_slow_path_rx_cqe *)cqe;
+               edev->ops->eth_cqe_completion(edev->cdev, fp->id, sp_cqe);
+               return 0;
+       }
+
+       /* Handle TPA cqes */
+       if (cqe_type != ETH_RX_CQE_TYPE_REGULAR)
+               return qede_rx_process_tpa_cqe(edev, fp, rxq, cqe, cqe_type);
+
+       /* Get the data from the SW ring; Consume it only after it's evident
+        * we wouldn't recycle it.
+        */
+       bd_cons_idx = rxq->sw_rx_cons & NUM_RX_BDS_MAX;
+       bd = &rxq->sw_rx_ring[bd_cons_idx];
+
+       fp_cqe = &cqe->fast_path_regular;
+       len = le16_to_cpu(fp_cqe->len_on_first_bd);
+       pad = fp_cqe->placement_offset;
+
+       /* Run eBPF program if one is attached */
+       if (xdp_prog)
+               if (!qede_rx_xdp(edev, fp, rxq, xdp_prog, bd, fp_cqe))
+                       return 1;
+
+       /* If this is an error packet then drop it */
+       flags = cqe->fast_path_regular.pars_flags.flags;
+       parse_flag = le16_to_cpu(flags);
+
+       csum_flag = qede_check_csum(parse_flag);
+       if (unlikely(csum_flag == QEDE_CSUM_ERROR)) {
+               if (qede_pkt_is_ip_fragmented(fp_cqe, parse_flag)) {
+                       rxq->rx_ip_frags++;
                } else {
-                       struct skb_frag_struct *frag;
-                       unsigned int pull_len;
-                       unsigned char *va;
-
-                       frag = &skb_shinfo(skb)->frags[0];
-
-                       skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, data,
-                                       pad + sw_rx_data->page_offset,
-                                       len, rxq->rx_buf_seg_size);
-
-                       va = skb_frag_address(frag);
-                       pull_len = eth_get_headlen(va, QEDE_RX_HDR_SIZE);
-
-                       /* Align the pull_len to optimize memcpy */
-                       memcpy(skb->data, va, ALIGN(pull_len, sizeof(long)));
-
-                       skb_frag_size_sub(frag, pull_len);
-                       frag->page_offset += pull_len;
-                       skb->data_len -= pull_len;
-                       skb->tail += pull_len;
-
-                       if (unlikely(qede_realloc_rx_buffer(edev, rxq,
-                                                           sw_rx_data))) {
-                               DP_ERR(edev, "Failed to allocate rx buffer\n");
-                               /* Incr page ref count to reuse on allocation
-                                * failure so that it doesn't get freed while
-                                * freeing SKB.
-                                */
-
-                               page_ref_inc(sw_rx_data->data);
-                               rxq->rx_alloc_errors++;
-                               qede_recycle_rx_bd_ring(rxq, edev,
-                                                       fp_cqe->bd_num);
-                               dev_kfree_skb_any(skb);
-                               goto next_cqe;
-                       }
+                       DP_NOTICE(edev,
+                                 "CQE has error, flags = %x, dropping incoming packet\n",
+                                 parse_flag);
+                       rxq->rx_hw_errors++;
+                       qede_recycle_rx_bd_ring(rxq, fp_cqe->bd_num);
+                       return 0;
                }
+       }
 
-               qede_rx_bd_ring_consume(rxq);
+       /* Basic validation passed; Need to prepare an SKB. This would also
+        * guarantee to finally consume the first BD upon success.
+        */
+       skb = qede_rx_allocate_skb(edev, rxq, bd, len, pad);
+       if (!skb) {
+               rxq->rx_alloc_errors++;
+               qede_recycle_rx_bd_ring(rxq, fp_cqe->bd_num);
+               return 0;
+       }
 
-               if (fp_cqe->bd_num != 1) {
-                       u16 pkt_len = le16_to_cpu(fp_cqe->pkt_len);
-                       u8 num_frags;
-
-                       pkt_len -= len;
-
-                       for (num_frags = fp_cqe->bd_num - 1; num_frags > 0;
-                            num_frags--) {
-                               u16 cur_size = pkt_len > rxq->rx_buf_size ?
-                                               rxq->rx_buf_size : pkt_len;
-                               if (unlikely(!cur_size)) {
-                                       DP_ERR(edev,
-                                              "Still got %d BDs for mapping jumbo, but length became 0\n",
-                                              num_frags);
-                                       qede_recycle_rx_bd_ring(rxq, edev,
-                                                               num_frags);
-                                       dev_kfree_skb_any(skb);
-                                       goto next_cqe;
-                               }
-
-                               if (unlikely(qede_alloc_rx_buffer(edev, rxq))) {
-                                       qede_recycle_rx_bd_ring(rxq, edev,
-                                                               num_frags);
-                                       dev_kfree_skb_any(skb);
-                                       goto next_cqe;
-                               }
-
-                               sw_rx_index = rxq->sw_rx_cons & NUM_RX_BDS_MAX;
-                               sw_rx_data = &rxq->sw_rx_ring[sw_rx_index];
-                               qede_rx_bd_ring_consume(rxq);
-
-                               dma_unmap_page(&edev->pdev->dev,
-                                              sw_rx_data->mapping,
-                                              PAGE_SIZE, DMA_FROM_DEVICE);
-
-                               skb_fill_page_desc(skb,
-                                                  skb_shinfo(skb)->nr_frags++,
-                                                  sw_rx_data->data, 0,
-                                                  cur_size);
-
-                               skb->truesize += PAGE_SIZE;
-                               skb->data_len += cur_size;
-                               skb->len += cur_size;
-                               pkt_len -= cur_size;
-                       }
+       /* In case of Jumbo packet, several PAGE_SIZEd buffers will be pointed
+        * by a single cqe.
+        */
+       if (fp_cqe->bd_num > 1) {
+               u16 unmapped_frags = qede_rx_build_jumbo(edev, rxq, skb,
+                                                        fp_cqe, len);
 
-                       if (unlikely(pkt_len))
-                               DP_ERR(edev,
-                                      "Mapped all BDs of jumbo, but still have %d bytes\n",
-                                      pkt_len);
+               if (unlikely(unmapped_frags > 0)) {
+                       qede_recycle_rx_bd_ring(rxq, unmapped_frags);
+                       dev_kfree_skb_any(skb);
+                       return 0;
                }
+       }
 
-               skb->protocol = eth_type_trans(skb, edev->ndev);
+       /* The SKB contains all the data. Now prepare meta-magic */
+       skb->protocol = eth_type_trans(skb, edev->ndev);
+       qede_get_rxhash(skb, fp_cqe->bitfields, fp_cqe->rss_hash);
+       qede_set_skb_csum(skb, csum_flag);
+       skb_record_rx_queue(skb, rxq->rxq_id);
 
-               rx_hash = qede_get_rxhash(edev, fp_cqe->bitfields,
-                                         fp_cqe->rss_hash, &rxhash_type);
+       /* SKB is prepared - pass it to stack */
+       qede_skb_receive(edev, fp, rxq, skb, le16_to_cpu(fp_cqe->vlan_tag));
 
-               skb_set_hash(skb, rx_hash, rxhash_type);
+       return 1;
+}
 
-               qede_set_skb_csum(skb, csum_flag);
+static int qede_rx_int(struct qede_fastpath *fp, int budget)
+{
+       struct qede_rx_queue *rxq = fp->rxq;
+       struct qede_dev *edev = fp->edev;
+       u16 hw_comp_cons, sw_comp_cons;
+       int work_done = 0;
 
-               skb_record_rx_queue(skb, fp->rxq->rxq_id);
+       hw_comp_cons = le16_to_cpu(*rxq->hw_cons_ptr);
+       sw_comp_cons = qed_chain_get_cons_idx(&rxq->rx_comp_ring);
 
-               qede_skb_receive(edev, fp, skb, le16_to_cpu(fp_cqe->vlan_tag));
-next_rx_only:
-               rx_pkt++;
+       /* Memory barrier to prevent the CPU from doing speculative reads of CQE
+        * / BD in the while-loop before reading hw_comp_cons. If the CQE is
+        * read before it is written by FW, then FW writes CQE and SB, and then
+        * the CPU reads the hw_comp_cons, it will use an old CQE.
+        */
+       rmb();
 
-next_cqe: /* don't consume bd rx buffer */
+       /* Loop to complete all indicated BDs */
+       while ((sw_comp_cons != hw_comp_cons) && (work_done < budget)) {
+               qede_rx_process_cqe(edev, fp, rxq);
                qed_chain_recycle_consumed(&rxq->rx_comp_ring);
                sw_comp_cons = qed_chain_get_cons_idx(&rxq->rx_comp_ring);
-               /* CR TPA - revisit how to handle budget in TPA perhaps
-                * increase on "end"
-                */
-               if (rx_pkt == budget)
-                       break;
-       } /* repeat while sw_comp_cons != hw_comp_cons... */
+               work_done++;
+       }
 
        /* Update producers */
        qede_update_rx_prod(edev, rxq);
 
-       rxq->rcv_pkts += rx_pkt;
+       return work_done;
+}
+
+static bool qede_poll_is_more_work(struct qede_fastpath *fp)
+{
+       qed_sb_update_sb_idx(fp->sb_info);
+
+       /* *_has_*_work() reads the status block, thus we need to ensure that
+        * status block indices have been actually read (qed_sb_update_sb_idx)
+        * prior to this check (*_has_*_work) so that we won't write the
+        * "newer" value of the status block to HW (if there was a DMA right
+        * after qede_has_rx_work and if there is no rmb, the memory reading
+        * (qed_sb_update_sb_idx) may be postponed to right before *_ack_sb).
+        * In this case there will never be another interrupt until there is
+        * another update of the status block, while there is still unhandled
+        * work.
+        */
+       rmb();
+
+       if (likely(fp->type & QEDE_FASTPATH_RX))
+               if (qede_has_rx_work(fp->rxq))
+                       return true;
+
+       if (fp->type & QEDE_FASTPATH_XDP)
+               if (qede_txq_has_work(fp->xdp_tx))
+                       return true;
+
+       if (likely(fp->type & QEDE_FASTPATH_TX))
+               if (qede_txq_has_work(fp->txq))
+                       return true;
 
-       return rx_pkt;
+       return false;
 }
 
 static int qede_poll(struct napi_struct *napi, int budget)
@@ -1642,48 +1849,35 @@ static int qede_poll(struct napi_struct *napi, int budget)
                                                napi);
        struct qede_dev *edev = fp->edev;
        int rx_work_done = 0;
-       u8 tc;
 
-       for (tc = 0; tc < edev->num_tc; tc++)
-               if (likely(fp->type & QEDE_FASTPATH_TX) &&
-                   qede_txq_has_work(&fp->txqs[tc]))
-                       qede_tx_int(edev, &fp->txqs[tc]);
+       if (likely(fp->type & QEDE_FASTPATH_TX) && qede_txq_has_work(fp->txq))
+               qede_tx_int(edev, fp->txq);
+
+       if ((fp->type & QEDE_FASTPATH_XDP) && qede_txq_has_work(fp->xdp_tx))
+               qede_xdp_tx_int(edev, fp->xdp_tx);
 
        rx_work_done = (likely(fp->type & QEDE_FASTPATH_RX) &&
                        qede_has_rx_work(fp->rxq)) ?
                        qede_rx_int(fp, budget) : 0;
        if (rx_work_done < budget) {
-               qed_sb_update_sb_idx(fp->sb_info);
-               /* *_has_*_work() reads the status block,
-                * thus we need to ensure that status block indices
-                * have been actually read (qed_sb_update_sb_idx)
-                * prior to this check (*_has_*_work) so that
-                * we won't write the "newer" value of the status block
-                * to HW (if there was a DMA right after
-                * qede_has_rx_work and if there is no rmb, the memory
-                * reading (qed_sb_update_sb_idx) may be postponed
-                * to right before *_ack_sb). In this case there
-                * will never be another interrupt until there is
-                * another update of the status block, while there
-                * is still unhandled work.
-                */
-               rmb();
-
-               /* Fall out from the NAPI loop if needed */
-               if (!((likely(fp->type & QEDE_FASTPATH_RX) &&
-                      qede_has_rx_work(fp->rxq)) ||
-                     (likely(fp->type & QEDE_FASTPATH_TX) &&
-                      qede_has_tx_work(fp)))) {
+               if (!qede_poll_is_more_work(fp)) {
                        napi_complete(napi);
 
                        /* Update and reenable interrupts */
-                       qed_sb_ack(fp->sb_info, IGU_INT_ENABLE,
-                                  1 /*update*/);
+                       qed_sb_ack(fp->sb_info, IGU_INT_ENABLE, 1);
                } else {
                        rx_work_done = budget;
                }
        }
 
+       if (fp->xdp_xmit) {
+               u16 xdp_prod = qed_chain_get_prod_idx(&fp->xdp_tx->tx_pbl);
+
+               fp->xdp_xmit = 0;
+               fp->xdp_tx->tx_db.data.bd_prod = cpu_to_le16(xdp_prod);
+               qede_update_tx_producer(fp->xdp_tx);
+       }
+
        return rx_work_done;
 }
 
@@ -1934,7 +2128,7 @@ static int qede_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
 {
        struct qede_dev *edev = netdev_priv(dev);
        struct qede_vlan *vlan, *tmp;
-       int rc;
+       int rc = 0;
 
        DP_VERBOSE(edev, NETIF_MSG_IFUP, "Adding vlan 0x%04x\n", vid);
 
@@ -1958,6 +2152,7 @@ static int qede_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
        }
 
        /* If interface is down, cache this VLAN ID and return */
+       __qede_lock(edev);
        if (edev->state != QEDE_STATE_OPEN) {
                DP_VERBOSE(edev, NETIF_MSG_IFDOWN,
                           "Interface is down, VLAN %d will be configured when interface is up\n",
@@ -1965,8 +2160,7 @@ static int qede_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
                if (vid != 0)
                        edev->non_configured_vlans++;
                list_add(&vlan->list, &edev->vlan_list);
-
-               return 0;
+               goto out;
        }
 
        /* Check for the filter limit.
@@ -1982,7 +2176,7 @@ static int qede_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
                        DP_ERR(edev, "Failed to configure VLAN %d\n",
                               vlan->vid);
                        kfree(vlan);
-                       return -EINVAL;
+                       goto out;
                }
                vlan->configured = true;
 
@@ -1999,7 +2193,9 @@ static int qede_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
 
        list_add(&vlan->list, &edev->vlan_list);
 
-       return 0;
+out:
+       __qede_unlock(edev);
+       return rc;
 }
 
 static void qede_del_vlan_from_list(struct qede_dev *edev,
@@ -2076,11 +2272,12 @@ static int qede_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid)
 {
        struct qede_dev *edev = netdev_priv(dev);
        struct qede_vlan *vlan = NULL;
-       int rc;
+       int rc = 0;
 
        DP_VERBOSE(edev, NETIF_MSG_IFDOWN, "Removing vlan 0x%04x\n", vid);
 
        /* Find whether entry exists */
+       __qede_lock(edev);
        list_for_each_entry(vlan, &edev->vlan_list, list)
                if (vlan->vid == vid)
                        break;
@@ -2088,7 +2285,7 @@ static int qede_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid)
        if (!vlan || (vlan->vid != vid)) {
                DP_VERBOSE(edev, (NETIF_MSG_IFUP | NETIF_MSG_IFDOWN),
                           "Vlan isn't configured\n");
-               return 0;
+               goto out;
        }
 
        if (edev->state != QEDE_STATE_OPEN) {
@@ -2098,7 +2295,7 @@ static int qede_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid)
                DP_VERBOSE(edev, NETIF_MSG_IFDOWN,
                           "Interface is down, removing VLAN from list only\n");
                qede_del_vlan_from_list(edev, vlan);
-               return 0;
+               goto out;
        }
 
        /* Remove vlan */
@@ -2107,7 +2304,7 @@ static int qede_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid)
                                            vid);
                if (rc) {
                        DP_ERR(edev, "Failed to remove VLAN %d\n", vid);
-                       return -EINVAL;
+                       goto out;
                }
        }
 
@@ -2118,6 +2315,8 @@ static int qede_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid)
         */
        rc = qede_configure_vlan_filters(edev);
 
+out:
+       __qede_unlock(edev);
        return rc;
 }
 
@@ -2147,7 +2346,13 @@ static void qede_vlan_mark_nonconfigured(struct qede_dev *edev)
        edev->accept_any_vlan = false;
 }
 
-static int qede_set_features(struct net_device *dev, netdev_features_t features)
+static void qede_set_features_reload(struct qede_dev *edev,
+                                    struct qede_reload_args *args)
+{
+       edev->ndev->features = args->u.features;
+}
+
+int qede_set_features(struct net_device *dev, netdev_features_t features)
 {
        struct qede_dev *edev = netdev_priv(dev);
        netdev_features_t changes = features ^ dev->features;
@@ -2161,9 +2366,23 @@ static int qede_set_features(struct net_device *dev, netdev_features_t features)
                        need_reload = edev->gro_disable;
        }
 
-       if (need_reload && netif_running(edev->ndev)) {
-               dev->features = features;
-               qede_reload(edev, NULL, NULL);
+       if (need_reload) {
+               struct qede_reload_args args;
+
+               args.u.features = features;
+               args.func = &qede_set_features_reload;
+
+               /* Make sure that we definitely need to reload.
+                * In case of an eBPF attached program, there will be no FW
+                * aggregations, so no need to actually reload.
+                */
+               __qede_lock(edev);
+               if (edev->xdp_prog)
+                       args.func(edev, &args);
+               else
+                       qede_reload(edev, &args, true);
+               __qede_unlock(edev);
+
                return 1;
        }
 
@@ -2274,6 +2493,43 @@ static netdev_features_t qede_features_check(struct sk_buff *skb,
        return features;
 }
 
+static void qede_xdp_reload_func(struct qede_dev *edev,
+                                struct qede_reload_args *args)
+{
+       struct bpf_prog *old;
+
+       old = xchg(&edev->xdp_prog, args->u.new_prog);
+       if (old)
+               bpf_prog_put(old);
+}
+
+static int qede_xdp_set(struct qede_dev *edev, struct bpf_prog *prog)
+{
+       struct qede_reload_args args;
+
+       /* If we're called, there was already a bpf reference increment */
+       args.func = &qede_xdp_reload_func;
+       args.u.new_prog = prog;
+       qede_reload(edev, &args, false);
+
+       return 0;
+}
+
+static int qede_xdp(struct net_device *dev, struct netdev_xdp *xdp)
+{
+       struct qede_dev *edev = netdev_priv(dev);
+
+       switch (xdp->command) {
+       case XDP_SETUP_PROG:
+               return qede_xdp_set(edev, xdp->prog);
+       case XDP_QUERY_PROG:
+               xdp->prog_attached = !!edev->xdp_prog;
+               return 0;
+       default:
+               return -EINVAL;
+       }
+}
+
 static const struct net_device_ops qede_netdev_ops = {
        .ndo_open = qede_open,
        .ndo_stop = qede_close,
@@ -2299,6 +2555,7 @@ static const struct net_device_ops qede_netdev_ops = {
        .ndo_udp_tunnel_add = qede_udp_tunnel_add,
        .ndo_udp_tunnel_del = qede_udp_tunnel_del,
        .ndo_features_check = qede_features_check,
+       .ndo_xdp = qede_xdp,
 };
 
 /* -------------------------------------------------------------------------
@@ -2339,8 +2596,6 @@ static struct qede_dev *qede_alloc_etherdev(struct qed_dev *cdev,
        memset(&edev->stats, 0, sizeof(edev->stats));
        memcpy(&edev->dev_info, info, sizeof(*info));
 
-       edev->num_tc = edev->dev_info.num_tc;
-
        INIT_LIST_HEAD(&edev->vlan_list);
 
        return edev;
@@ -2365,7 +2620,7 @@ static void qede_init_ndev(struct qede_dev *edev)
 
        qede_set_ethtool_ops(ndev);
 
-       ndev->priv_flags = IFF_UNICAST_FLT;
+       ndev->priv_flags |= IFF_UNICAST_FLT;
 
        /* user-changeble features */
        hw_features = NETIF_F_GRO | NETIF_F_SG |
@@ -2397,6 +2652,8 @@ static void qede_init_ndev(struct qede_dev *edev)
 
        /* Set network device HW mac */
        ether_addr_copy(edev->ndev->dev_addr, edev->dev_info.common.hw_mac);
+
+       ndev->mtu = edev->dev_info.common.mtu;
 }
 
 /* This function converts from 32b param to two params of level and module
@@ -2436,7 +2693,8 @@ static void qede_free_fp_array(struct qede_dev *edev)
 
                        kfree(fp->sb_info);
                        kfree(fp->rxq);
-                       kfree(fp->txqs);
+                       kfree(fp->xdp_tx);
+                       kfree(fp->txq);
                }
                kfree(edev->fp_array);
        }
@@ -2469,7 +2727,7 @@ static int qede_alloc_fp_array(struct qede_dev *edev)
        for_each_queue(i) {
                fp = &edev->fp_array[i];
 
-               fp->sb_info = kcalloc(1, sizeof(*fp->sb_info), GFP_KERNEL);
+               fp->sb_info = kzalloc(sizeof(*fp->sb_info), GFP_KERNEL);
                if (!fp->sb_info) {
                        DP_NOTICE(edev, "sb info struct allocation failed\n");
                        goto err;
@@ -2486,21 +2744,22 @@ static int qede_alloc_fp_array(struct qede_dev *edev)
                }
 
                if (fp->type & QEDE_FASTPATH_TX) {
-                       fp->txqs = kcalloc(edev->num_tc, sizeof(*fp->txqs),
-                                          GFP_KERNEL);
-                       if (!fp->txqs) {
-                               DP_NOTICE(edev,
-                                         "TXQ array allocation failed\n");
+                       fp->txq = kzalloc(sizeof(*fp->txq), GFP_KERNEL);
+                       if (!fp->txq)
                                goto err;
-                       }
                }
 
                if (fp->type & QEDE_FASTPATH_RX) {
-                       fp->rxq = kcalloc(1, sizeof(*fp->rxq), GFP_KERNEL);
-                       if (!fp->rxq) {
-                               DP_NOTICE(edev,
-                                         "RXQ struct allocation failed\n");
+                       fp->rxq = kzalloc(sizeof(*fp->rxq), GFP_KERNEL);
+                       if (!fp->rxq)
                                goto err;
+
+                       if (edev->xdp_prog) {
+                               fp->xdp_tx = kzalloc(sizeof(*fp->xdp_tx),
+                                                    GFP_KERNEL);
+                               if (!fp->xdp_tx)
+                                       goto err;
+                               fp->type |= QEDE_FASTPATH_XDP;
                        }
                }
        }
@@ -2517,12 +2776,11 @@ static void qede_sp_task(struct work_struct *work)
                                             sp_task.work);
        struct qed_dev *cdev = edev->cdev;
 
-       mutex_lock(&edev->qede_lock);
+       __qede_lock(edev);
 
-       if (edev->state == QEDE_STATE_OPEN) {
-               if (test_and_clear_bit(QEDE_SP_RX_MODE, &edev->sp_flags))
+       if (test_and_clear_bit(QEDE_SP_RX_MODE, &edev->sp_flags))
+               if (edev->state == QEDE_STATE_OPEN)
                        qede_config_rx_mode(edev->ndev);
-       }
 
        if (test_and_clear_bit(QEDE_SP_VXLAN_PORT_CONFIG, &edev->sp_flags)) {
                struct qed_tunn_params tunn_params;
@@ -2542,16 +2800,16 @@ static void qede_sp_task(struct work_struct *work)
                qed_ops->tunn_config(cdev, &tunn_params);
        }
 
-       mutex_unlock(&edev->qede_lock);
+       __qede_unlock(edev);
 }
 
 static void qede_update_pf_params(struct qed_dev *cdev)
 {
        struct qed_pf_params pf_params;
 
-       /* 64 rx + 64 tx */
+       /* 64 rx + 64 tx + 64 XDP */
        memset(&pf_params, 0, sizeof(struct qed_pf_params));
-       pf_params.eth_pf_params.num_cons = 128;
+       pf_params.eth_pf_params.num_cons = 192;
        qed_ops->common->update_pf_params(cdev, &pf_params);
 }
 
@@ -2700,10 +2958,16 @@ static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode)
 
        pci_set_drvdata(pdev, NULL);
 
+       /* Release edev's reference to XDP's bpf if such exist */
+       if (edev->xdp_prog)
+               bpf_prog_put(edev->xdp_prog);
+
        free_netdev(ndev);
 
        /* Use global ops since we've freed edev */
        qed_ops->common->slowpath_stop(cdev);
+       if (system_state == SYSTEM_POWER_OFF)
+               return;
        qed_ops->common->remove(cdev);
 
        dev_info(&pdev->dev, "Ending qede_remove successfully\n");
@@ -2714,6 +2978,11 @@ static void qede_remove(struct pci_dev *pdev)
        __qede_remove(pdev, QEDE_REMOVE_NORMAL);
 }
 
+static void qede_shutdown(struct pci_dev *pdev)
+{
+       __qede_remove(pdev, QEDE_REMOVE_NORMAL);
+}
+
 /* -------------------------------------------------------------------------
  * START OF LOAD / UNLOAD
  * -------------------------------------------------------------------------
@@ -2797,7 +3066,7 @@ static void qede_free_rx_buffers(struct qede_dev *edev,
                data = rx_buf->data;
 
                dma_unmap_page(&edev->pdev->dev,
-                              rx_buf->mapping, PAGE_SIZE, DMA_FROM_DEVICE);
+                              rx_buf->mapping, PAGE_SIZE, rxq->data_direction);
 
                rx_buf->data = NULL;
                __free_page(data);
@@ -2813,7 +3082,7 @@ static void qede_free_sge_mem(struct qede_dev *edev, struct qede_rx_queue *rxq)
 
        for (i = 0; i < ETH_TPA_MAX_AGGS_NUM; i++) {
                struct qede_agg_info *tpa_info = &rxq->tpa_info[i];
-               struct sw_rx_data *replace_buf = &tpa_info->replace_buf;
+               struct sw_rx_data *replace_buf = &tpa_info->buffer;
 
                if (replace_buf->data) {
                        dma_unmap_page(&edev->pdev->dev,
@@ -2839,52 +3108,15 @@ static void qede_free_mem_rxq(struct qede_dev *edev, struct qede_rx_queue *rxq)
        edev->ops->common->chain_free(edev->cdev, &rxq->rx_comp_ring);
 }
 
-static int qede_alloc_rx_buffer(struct qede_dev *edev,
-                               struct qede_rx_queue *rxq)
-{
-       struct sw_rx_data *sw_rx_data;
-       struct eth_rx_bd *rx_bd;
-       dma_addr_t mapping;
-       struct page *data;
-
-       data = alloc_pages(GFP_ATOMIC, 0);
-       if (unlikely(!data)) {
-               DP_NOTICE(edev, "Failed to allocate Rx data [page]\n");
-               return -ENOMEM;
-       }
-
-       /* Map the entire page as it would be used
-        * for multiple RX buffer segment size mapping.
-        */
-       mapping = dma_map_page(&edev->pdev->dev, data, 0,
-                              PAGE_SIZE, DMA_FROM_DEVICE);
-       if (unlikely(dma_mapping_error(&edev->pdev->dev, mapping))) {
-               __free_page(data);
-               DP_NOTICE(edev, "Failed to map Rx buffer\n");
-               return -ENOMEM;
-       }
-
-       sw_rx_data = &rxq->sw_rx_ring[rxq->sw_rx_prod & NUM_RX_BDS_MAX];
-       sw_rx_data->page_offset = 0;
-       sw_rx_data->data = data;
-       sw_rx_data->mapping = mapping;
-
-       /* Advance PROD and get BD pointer */
-       rx_bd = (struct eth_rx_bd *)qed_chain_produce(&rxq->rx_bd_ring);
-       WARN_ON(!rx_bd);
-       rx_bd->addr.hi = cpu_to_le32(upper_32_bits(mapping));
-       rx_bd->addr.lo = cpu_to_le32(lower_32_bits(mapping));
-
-       rxq->sw_rx_prod++;
-
-       return 0;
-}
-
 static int qede_alloc_sge_mem(struct qede_dev *edev, struct qede_rx_queue *rxq)
 {
        dma_addr_t mapping;
        int i;
 
+       /* Don't perform FW aggregations in case of XDP */
+       if (edev->xdp_prog)
+               edev->gro_disable = 1;
+
        if (edev->gro_disable)
                return 0;
 
@@ -2895,7 +3127,7 @@ static int qede_alloc_sge_mem(struct qede_dev *edev, struct qede_rx_queue *rxq)
 
        for (i = 0; i < ETH_TPA_MAX_AGGS_NUM; i++) {
                struct qede_agg_info *tpa_info = &rxq->tpa_info[i];
-               struct sw_rx_data *replace_buf = &tpa_info->replace_buf;
+               struct sw_rx_data *replace_buf = &tpa_info->buffer;
 
                replace_buf->data = alloc_pages(GFP_ATOMIC, 0);
                if (unlikely(!replace_buf->data)) {
@@ -2905,7 +3137,7 @@ static int qede_alloc_sge_mem(struct qede_dev *edev, struct qede_rx_queue *rxq)
                }
 
                mapping = dma_map_page(&edev->pdev->dev, replace_buf->data, 0,
-                                      rxq->rx_buf_size, DMA_FROM_DEVICE);
+                                      PAGE_SIZE, DMA_FROM_DEVICE);
                if (unlikely(dma_mapping_error(&edev->pdev->dev, mapping))) {
                        DP_NOTICE(edev,
                                  "Failed to map TPA replacement buffer\n");
@@ -2913,10 +3145,9 @@ static int qede_alloc_sge_mem(struct qede_dev *edev, struct qede_rx_queue *rxq)
                }
 
                replace_buf->mapping = mapping;
-               tpa_info->replace_buf.page_offset = 0;
-
-               tpa_info->replace_buf_mapping = mapping;
-               tpa_info->agg_state = QEDE_AGG_STATE_NONE;
+               tpa_info->buffer.page_offset = 0;
+               tpa_info->buffer_mapping = mapping;
+               tpa_info->state = QEDE_AGG_STATE_NONE;
        }
 
        return 0;
@@ -2938,8 +3169,13 @@ static int qede_alloc_mem_rxq(struct qede_dev *edev, struct qede_rx_queue *rxq)
        if (rxq->rx_buf_size > PAGE_SIZE)
                rxq->rx_buf_size = PAGE_SIZE;
 
-       /* Segment size to spilt a page in multiple equal parts */
-       rxq->rx_buf_seg_size = roundup_pow_of_two(rxq->rx_buf_size);
+       /* Segment size to spilt a page in multiple equal parts,
+        * unless XDP is used in which case we'd use the entire page.
+        */
+       if (!edev->xdp_prog)
+               rxq->rx_buf_seg_size = roundup_pow_of_two(rxq->rx_buf_size);
+       else
+               rxq->rx_buf_seg_size = PAGE_SIZE;
 
        /* Allocate the parallel driver ring for Rx buffers */
        size = sizeof(*rxq->sw_rx_ring) * RX_RING_SIZE;
@@ -2975,7 +3211,7 @@ static int qede_alloc_mem_rxq(struct qede_dev *edev, struct qede_rx_queue *rxq)
 
        /* Allocate buffers for the Rx ring */
        for (i = 0; i < rxq->num_rx_buffers; i++) {
-               rc = qede_alloc_rx_buffer(edev, rxq);
+               rc = qede_alloc_rx_buffer(rxq);
                if (rc) {
                        DP_ERR(edev,
                               "Rx buffers allocation failed at index %d\n", i);
@@ -2991,7 +3227,10 @@ err:
 static void qede_free_mem_txq(struct qede_dev *edev, struct qede_tx_queue *txq)
 {
        /* Free the parallel SW ring */
-       kfree(txq->sw_tx_ring);
+       if (txq->is_xdp)
+               kfree(txq->sw_tx_ring.pages);
+       else
+               kfree(txq->sw_tx_ring.skbs);
 
        /* Free the real RQ ring used by FW */
        edev->ops->common->chain_free(edev->cdev, &txq->tx_pbl);
@@ -3000,24 +3239,29 @@ static void qede_free_mem_txq(struct qede_dev *edev, struct qede_tx_queue *txq)
 /* This function allocates all memory needed per Tx queue */
 static int qede_alloc_mem_txq(struct qede_dev *edev, struct qede_tx_queue *txq)
 {
-       int size, rc;
        union eth_tx_bd_types *p_virt;
+       int size, rc;
 
        txq->num_tx_buffers = edev->q_num_tx_buffers;
 
        /* Allocate the parallel driver ring for Tx buffers */
-       size = sizeof(*txq->sw_tx_ring) * NUM_TX_BDS_MAX;
-       txq->sw_tx_ring = kzalloc(size, GFP_KERNEL);
-       if (!txq->sw_tx_ring) {
-               DP_NOTICE(edev, "Tx buffers ring allocation failed\n");
-               goto err;
+       if (txq->is_xdp) {
+               size = sizeof(*txq->sw_tx_ring.pages) * TX_RING_SIZE;
+               txq->sw_tx_ring.pages = kzalloc(size, GFP_KERNEL);
+               if (!txq->sw_tx_ring.pages)
+                       goto err;
+       } else {
+               size = sizeof(*txq->sw_tx_ring.skbs) * TX_RING_SIZE;
+               txq->sw_tx_ring.skbs = kzalloc(size, GFP_KERNEL);
+               if (!txq->sw_tx_ring.skbs)
+                       goto err;
        }
 
        rc = edev->ops->common->chain_alloc(edev->cdev,
                                            QED_CHAIN_USE_TO_CONSUME_PRODUCE,
                                            QED_CHAIN_MODE_PBL,
                                            QED_CHAIN_CNT_TYPE_U16,
-                                           NUM_TX_BDS_MAX,
+                                           TX_RING_SIZE,
                                            sizeof(*p_virt), &txq->tx_pbl);
        if (rc)
                goto err;
@@ -3032,16 +3276,13 @@ err:
 /* This function frees all memory of a single fp */
 static void qede_free_mem_fp(struct qede_dev *edev, struct qede_fastpath *fp)
 {
-       int tc;
-
        qede_free_mem_sb(edev, fp->sb_info);
 
        if (fp->type & QEDE_FASTPATH_RX)
                qede_free_mem_rxq(edev, fp->rxq);
 
        if (fp->type & QEDE_FASTPATH_TX)
-               for (tc = 0; tc < edev->num_tc; tc++)
-                       qede_free_mem_txq(edev, &fp->txqs[tc]);
+               qede_free_mem_txq(edev, fp->txq);
 }
 
 /* This function allocates all memory needed for a single fp (i.e. an entity
@@ -3049,28 +3290,31 @@ static void qede_free_mem_fp(struct qede_dev *edev, struct qede_fastpath *fp)
  */
 static int qede_alloc_mem_fp(struct qede_dev *edev, struct qede_fastpath *fp)
 {
-       int rc, tc;
+       int rc = 0;
 
        rc = qede_alloc_mem_sb(edev, fp->sb_info, fp->id);
        if (rc)
-               goto err;
+               goto out;
 
        if (fp->type & QEDE_FASTPATH_RX) {
                rc = qede_alloc_mem_rxq(edev, fp->rxq);
                if (rc)
-                       goto err;
+                       goto out;
+       }
+
+       if (fp->type & QEDE_FASTPATH_XDP) {
+               rc = qede_alloc_mem_txq(edev, fp->xdp_tx);
+               if (rc)
+                       goto out;
        }
 
        if (fp->type & QEDE_FASTPATH_TX) {
-               for (tc = 0; tc < edev->num_tc; tc++) {
-                       rc = qede_alloc_mem_txq(edev, &fp->txqs[tc]);
-                       if (rc)
-                               goto err;
-               }
+               rc = qede_alloc_mem_txq(edev, fp->txq);
+               if (rc)
+                       goto out;
        }
 
-       return 0;
-err:
+out:
        return rc;
 }
 
@@ -3109,7 +3353,7 @@ static int qede_alloc_mem_load(struct qede_dev *edev)
 /* This function inits fp content and resets the SB, RXQ and TXQ structures */
 static void qede_init_fp(struct qede_dev *edev)
 {
-       int queue_id, rxq_index = 0, txq_index = 0, tc;
+       int queue_id, rxq_index = 0, txq_index = 0;
        struct qede_fastpath *fp;
 
        for_each_queue(queue_id) {
@@ -3118,25 +3362,28 @@ static void qede_init_fp(struct qede_dev *edev)
                fp->edev = edev;
                fp->id = queue_id;
 
-               memset((void *)&fp->napi, 0, sizeof(fp->napi));
-
-               memset((void *)fp->sb_info, 0, sizeof(*fp->sb_info));
+               if (fp->type & QEDE_FASTPATH_XDP) {
+                       fp->xdp_tx->index = QEDE_TXQ_IDX_TO_XDP(edev,
+                                                               rxq_index);
+                       fp->xdp_tx->is_xdp = 1;
+               }
 
                if (fp->type & QEDE_FASTPATH_RX) {
-                       memset((void *)fp->rxq, 0, sizeof(*fp->rxq));
                        fp->rxq->rxq_id = rxq_index++;
+
+                       /* Determine how to map buffers for this queue */
+                       if (fp->type & QEDE_FASTPATH_XDP)
+                               fp->rxq->data_direction = DMA_BIDIRECTIONAL;
+                       else
+                               fp->rxq->data_direction = DMA_FROM_DEVICE;
+                       fp->rxq->dev = &edev->pdev->dev;
                }
 
                if (fp->type & QEDE_FASTPATH_TX) {
-                       memset((void *)fp->txqs, 0,
-                              (edev->num_tc * sizeof(*fp->txqs)));
-                       for (tc = 0; tc < edev->num_tc; tc++) {
-                               fp->txqs[tc].index = txq_index +
-                                   tc * QEDE_TSS_COUNT(edev);
-                               if (edev->dev_info.is_legacy)
-                                       fp->txqs[tc].is_legacy = true;
-                       }
-                       txq_index++;
+                       fp->txq->index = txq_index++;
+                       if (edev->dev_info.is_legacy)
+                               fp->txq->is_legacy = 1;
+                       fp->txq->dev = &edev->pdev->dev;
                }
 
                snprintf(fp->name, sizeof(fp->name), "%s-fp-%d",
@@ -3304,11 +3551,18 @@ static int qede_drain_txq(struct qede_dev *edev,
        return 0;
 }
 
+static int qede_stop_txq(struct qede_dev *edev,
+                        struct qede_tx_queue *txq, int rss_id)
+{
+       return edev->ops->q_tx_stop(edev->cdev, rss_id, txq->handle);
+}
+
 static int qede_stop_queues(struct qede_dev *edev)
 {
        struct qed_update_vport_params vport_update_params;
        struct qed_dev *cdev = edev->cdev;
-       int rc, tc, i;
+       struct qede_fastpath *fp;
+       int rc, i;
 
        /* Disable the vport */
        memset(&vport_update_params, 0, sizeof(vport_update_params));
@@ -3325,53 +3579,49 @@ static int qede_stop_queues(struct qede_dev *edev)
 
        /* Flush Tx queues. If needed, request drain from MCP */
        for_each_queue(i) {
-               struct qede_fastpath *fp = &edev->fp_array[i];
+               fp = &edev->fp_array[i];
 
                if (fp->type & QEDE_FASTPATH_TX) {
-                       for (tc = 0; tc < edev->num_tc; tc++) {
-                               struct qede_tx_queue *txq = &fp->txqs[tc];
+                       rc = qede_drain_txq(edev, fp->txq, true);
+                       if (rc)
+                               return rc;
+               }
 
-                               rc = qede_drain_txq(edev, txq, true);
-                               if (rc)
-                                       return rc;
-                       }
+               if (fp->type & QEDE_FASTPATH_XDP) {
+                       rc = qede_drain_txq(edev, fp->xdp_tx, true);
+                       if (rc)
+                               return rc;
                }
        }
 
        /* Stop all Queues in reverse order */
        for (i = QEDE_QUEUE_CNT(edev) - 1; i >= 0; i--) {
-               struct qed_stop_rxq_params rx_params;
+               fp = &edev->fp_array[i];
 
                /* Stop the Tx Queue(s) */
-               if (edev->fp_array[i].type & QEDE_FASTPATH_TX) {
-                       for (tc = 0; tc < edev->num_tc; tc++) {
-                               struct qed_stop_txq_params tx_params;
-                               u8 val;
-
-                               tx_params.rss_id = i;
-                               val = edev->fp_array[i].txqs[tc].index;
-                               tx_params.tx_queue_id = val;
-                               rc = edev->ops->q_tx_stop(cdev, &tx_params);
-                               if (rc) {
-                                       DP_ERR(edev, "Failed to stop TXQ #%d\n",
-                                              tx_params.tx_queue_id);
-                                       return rc;
-                               }
-                       }
+               if (fp->type & QEDE_FASTPATH_TX) {
+                       rc = qede_stop_txq(edev, fp->txq, i);
+                       if (rc)
+                               return rc;
                }
 
                /* Stop the Rx Queue */
-               if (edev->fp_array[i].type & QEDE_FASTPATH_RX) {
-                       memset(&rx_params, 0, sizeof(rx_params));
-                       rx_params.rss_id = i;
-                       rx_params.rx_queue_id = edev->fp_array[i].rxq->rxq_id;
-
-                       rc = edev->ops->q_rx_stop(cdev, &rx_params);
+               if (fp->type & QEDE_FASTPATH_RX) {
+                       rc = edev->ops->q_rx_stop(cdev, i, fp->rxq->handle);
                        if (rc) {
                                DP_ERR(edev, "Failed to stop RXQ #%d\n", i);
                                return rc;
                        }
                }
+
+               /* Stop the XDP forwarding queue */
+               if (fp->type & QEDE_FASTPATH_XDP) {
+                       rc = qede_stop_txq(edev, fp->xdp_tx, i);
+                       if (rc)
+                               return rc;
+
+                       bpf_prog_put(fp->rxq->xdp_prog);
+               }
        }
 
        /* Stop the vport */
@@ -3382,9 +3632,55 @@ static int qede_stop_queues(struct qede_dev *edev)
        return rc;
 }
 
+static int qede_start_txq(struct qede_dev *edev,
+                         struct qede_fastpath *fp,
+                         struct qede_tx_queue *txq, u8 rss_id, u16 sb_idx)
+{
+       dma_addr_t phys_table = qed_chain_get_pbl_phys(&txq->tx_pbl);
+       u32 page_cnt = qed_chain_get_page_cnt(&txq->tx_pbl);
+       struct qed_queue_start_common_params params;
+       struct qed_txq_start_ret_params ret_params;
+       int rc;
+
+       memset(&params, 0, sizeof(params));
+       memset(&ret_params, 0, sizeof(ret_params));
+
+       /* Let the XDP queue share the queue-zone with one of the regular txq.
+        * We don't really care about its coalescing.
+        */
+       if (txq->is_xdp)
+               params.queue_id = QEDE_TXQ_XDP_TO_IDX(edev, txq);
+       else
+               params.queue_id = txq->index;
+
+       params.sb = fp->sb_info->igu_sb_id;
+       params.sb_idx = sb_idx;
+
+       rc = edev->ops->q_tx_start(edev->cdev, rss_id, &params, phys_table,
+                                  page_cnt, &ret_params);
+       if (rc) {
+               DP_ERR(edev, "Start TXQ #%d failed %d\n", txq->index, rc);
+               return rc;
+       }
+
+       txq->doorbell_addr = ret_params.p_doorbell;
+       txq->handle = ret_params.p_handle;
+
+       /* Determine the FW consumer address associated */
+       txq->hw_cons_ptr = &fp->sb_info->sb_virt->pi_array[sb_idx];
+
+       /* Prepare the doorbell parameters */
+       SET_FIELD(txq->tx_db.data.params, ETH_DB_DATA_DEST, DB_DEST_XCM);
+       SET_FIELD(txq->tx_db.data.params, ETH_DB_DATA_AGG_CMD, DB_AGG_CMD_SET);
+       SET_FIELD(txq->tx_db.data.params, ETH_DB_DATA_AGG_VAL_SEL,
+                 DQ_XCM_ETH_TX_BD_PROD_CMD);
+       txq->tx_db.data.agg_flags = DQ_XCM_ETH_DQ_CF_CMD;
+
+       return rc;
+}
+
 static int qede_start_queues(struct qede_dev *edev, bool clear_stats)
 {
-       int rc, tc, i;
        int vlan_removal_en = 1;
        struct qed_dev *cdev = edev->cdev;
        struct qed_update_vport_params vport_update_params;
@@ -3392,6 +3688,7 @@ static int qede_start_queues(struct qede_dev *edev, bool clear_stats)
        struct qed_dev_info *qed_info = &edev->dev_info.common;
        struct qed_start_vport_params start = {0};
        bool reset_rss_indir = false;
+       int rc, i;
 
        if (!edev->num_queues) {
                DP_ERR(edev,
@@ -3423,11 +3720,12 @@ static int qede_start_queues(struct qede_dev *edev, bool clear_stats)
                u32 page_cnt;
 
                if (fp->type & QEDE_FASTPATH_RX) {
+                       struct qed_rxq_start_ret_params ret_params;
                        struct qede_rx_queue *rxq = fp->rxq;
                        __le16 *val;
 
+                       memset(&ret_params, 0, sizeof(ret_params));
                        memset(&q_params, 0, sizeof(q_params));
-                       q_params.rss_id = i;
                        q_params.queue_id = rxq->rxq_id;
                        q_params.vport_id = 0;
                        q_params.sb = fp->sb_info->igu_sb_id;
@@ -3437,60 +3735,44 @@ static int qede_start_queues(struct qede_dev *edev, bool clear_stats)
                            qed_chain_get_pbl_phys(&rxq->rx_comp_ring);
                        page_cnt = qed_chain_get_page_cnt(&rxq->rx_comp_ring);
 
-                       rc = edev->ops->q_rx_start(cdev, &q_params,
+                       rc = edev->ops->q_rx_start(cdev, i, &q_params,
                                                   rxq->rx_buf_size,
                                                   rxq->rx_bd_ring.p_phys_addr,
                                                   p_phys_table,
-                                                  page_cnt,
-                                                  &rxq->hw_rxq_prod_addr);
+                                                  page_cnt, &ret_params);
                        if (rc) {
                                DP_ERR(edev, "Start RXQ #%d failed %d\n", i,
                                       rc);
                                return rc;
                        }
 
+                       /* Use the return parameters */
+                       rxq->hw_rxq_prod_addr = ret_params.p_prod;
+                       rxq->handle = ret_params.p_handle;
+
                        val = &fp->sb_info->sb_virt->pi_array[RX_PI];
                        rxq->hw_cons_ptr = val;
 
                        qede_update_rx_prod(edev, rxq);
                }
 
-               if (!(fp->type & QEDE_FASTPATH_TX))
-                       continue;
-
-               for (tc = 0; tc < edev->num_tc; tc++) {
-                       struct qede_tx_queue *txq = &fp->txqs[tc];
-
-                       p_phys_table = qed_chain_get_pbl_phys(&txq->tx_pbl);
-                       page_cnt = qed_chain_get_page_cnt(&txq->tx_pbl);
-
-                       memset(&q_params, 0, sizeof(q_params));
-                       q_params.rss_id = i;
-                       q_params.queue_id = txq->index;
-                       q_params.vport_id = 0;
-                       q_params.sb = fp->sb_info->igu_sb_id;
-                       q_params.sb_idx = TX_PI(tc);
+               if (fp->type & QEDE_FASTPATH_XDP) {
+                       rc = qede_start_txq(edev, fp, fp->xdp_tx, i, XDP_PI);
+                       if (rc)
+                               return rc;
 
-                       rc = edev->ops->q_tx_start(cdev, &q_params,
-                                                  p_phys_table, page_cnt,
-                                                  &txq->doorbell_addr);
-                       if (rc) {
-                               DP_ERR(edev, "Start TXQ #%d failed %d\n",
-                                      txq->index, rc);
+                       fp->rxq->xdp_prog = bpf_prog_add(edev->xdp_prog, 1);
+                       if (IS_ERR(fp->rxq->xdp_prog)) {
+                               rc = PTR_ERR(fp->rxq->xdp_prog);
+                               fp->rxq->xdp_prog = NULL;
                                return rc;
                        }
+               }
 
-                       txq->hw_cons_ptr =
-                               &fp->sb_info->sb_virt->pi_array[TX_PI(tc)];
-                       SET_FIELD(txq->tx_db.data.params,
-                                 ETH_DB_DATA_DEST, DB_DEST_XCM);
-                       SET_FIELD(txq->tx_db.data.params, ETH_DB_DATA_AGG_CMD,
-                                 DB_AGG_CMD_SET);
-                       SET_FIELD(txq->tx_db.data.params,
-                                 ETH_DB_DATA_AGG_VAL_SEL,
-                                 DQ_XCM_ETH_TX_BD_PROD_CMD);
-
-                       txq->tx_db.data.agg_flags = DQ_XCM_ETH_DQ_CF_CMD;
+               if (fp->type & QEDE_FASTPATH_TX) {
+                       rc = qede_start_txq(edev, fp, fp->txq, i, TX_PI(0));
+                       if (rc)
+                               return rc;
                }
        }
 
@@ -3585,15 +3867,18 @@ enum qede_unload_mode {
        QEDE_UNLOAD_NORMAL,
 };
 
-static void qede_unload(struct qede_dev *edev, enum qede_unload_mode mode)
+static void qede_unload(struct qede_dev *edev, enum qede_unload_mode mode,
+                       bool is_locked)
 {
        struct qed_link_params link_params;
        int rc;
 
        DP_INFO(edev, "Starting qede unload\n");
 
+       if (!is_locked)
+               __qede_lock(edev);
+
        qede_roce_dev_event_close(edev);
-       mutex_lock(&edev->qede_lock);
        edev->state = QEDE_STATE_CLOSED;
 
        /* Close OS Tx */
@@ -3625,7 +3910,8 @@ static void qede_unload(struct qede_dev *edev, enum qede_unload_mode mode)
        qede_free_fp_array(edev);
 
 out:
-       mutex_unlock(&edev->qede_lock);
+       if (!is_locked)
+               __qede_unlock(edev);
        DP_INFO(edev, "Ending qede unload\n");
 }
 
@@ -3634,7 +3920,8 @@ enum qede_load_mode {
        QEDE_LOAD_RELOAD,
 };
 
-static int qede_load(struct qede_dev *edev, enum qede_load_mode mode)
+static int qede_load(struct qede_dev *edev, enum qede_load_mode mode,
+                    bool is_locked)
 {
        struct qed_link_params link_params;
        struct qed_link_output link_output;
@@ -3642,21 +3929,24 @@ static int qede_load(struct qede_dev *edev, enum qede_load_mode mode)
 
        DP_INFO(edev, "Starting qede load\n");
 
+       if (!is_locked)
+               __qede_lock(edev);
+
        rc = qede_set_num_queues(edev);
        if (rc)
-               goto err0;
+               goto out;
 
        rc = qede_alloc_fp_array(edev);
        if (rc)
-               goto err0;
+               goto out;
 
        qede_init_fp(edev);
 
        rc = qede_alloc_mem_load(edev);
        if (rc)
                goto err1;
-       DP_INFO(edev, "Allocated %d RSS queues on %d TC/s\n",
-               QEDE_QUEUE_CNT(edev), edev->num_tc);
+       DP_INFO(edev, "Allocated %d Rx, %d Tx queues\n",
+               QEDE_RSS_COUNT(edev), QEDE_TSS_COUNT(edev));
 
        rc = qede_set_real_num_queues(edev);
        if (rc)
@@ -3678,10 +3968,6 @@ static int qede_load(struct qede_dev *edev, enum qede_load_mode mode)
        /* Add primary mac and set Rx filters */
        ether_addr_copy(edev->primary_mac, edev->ndev->dev_addr);
 
-       mutex_lock(&edev->qede_lock);
-       edev->state = QEDE_STATE_OPEN;
-       mutex_unlock(&edev->qede_lock);
-
        /* Program un-configured VLANs */
        qede_configure_vlan_filters(edev);
 
@@ -3696,10 +3982,12 @@ static int qede_load(struct qede_dev *edev, enum qede_load_mode mode)
        qede_roce_dev_event_open(edev);
        qede_link_update(edev, &link_output);
 
+       edev->state = QEDE_STATE_OPEN;
+
        DP_INFO(edev, "Ending successfully qede load\n");
 
-       return 0;
 
+       goto out;
 err4:
        qede_sync_free_irqs(edev);
        memset(&edev->int_info.msix_cnt, 0, sizeof(struct qed_int_info));
@@ -3713,26 +4001,40 @@ err1:
        edev->num_queues = 0;
        edev->fp_num_tx = 0;
        edev->fp_num_rx = 0;
-err0:
+out:
+       if (!is_locked)
+               __qede_unlock(edev);
+
        return rc;
 }
 
+/* 'func' should be able to run between unload and reload assuming interface
+ * is actually running, or afterwards in case it's currently DOWN.
+ */
 void qede_reload(struct qede_dev *edev,
-                void (*func)(struct qede_dev *, union qede_reload_args *),
-                union qede_reload_args *args)
+                struct qede_reload_args *args, bool is_locked)
 {
-       qede_unload(edev, QEDE_UNLOAD_NORMAL);
-       /* Call function handler to update parameters
-        * needed for function load.
-        */
-       if (func)
-               func(edev, args);
+       if (!is_locked)
+               __qede_lock(edev);
 
-       qede_load(edev, QEDE_LOAD_RELOAD);
+       /* Since qede_lock is held, internal state wouldn't change even
+        * if netdev state would start transitioning. Check whether current
+        * internal configuration indicates device is up, then reload.
+        */
+       if (edev->state == QEDE_STATE_OPEN) {
+               qede_unload(edev, QEDE_UNLOAD_NORMAL, true);
+               if (args)
+                       args->func(edev, args);
+               qede_load(edev, QEDE_LOAD_RELOAD, true);
+
+               /* Since no one is going to do it for us, re-configure */
+               qede_config_rx_mode(edev->ndev);
+       } else if (args) {
+               args->func(edev, args);
+       }
 
-       mutex_lock(&edev->qede_lock);
-       qede_config_rx_mode(edev->ndev);
-       mutex_unlock(&edev->qede_lock);
+       if (!is_locked)
+               __qede_unlock(edev);
 }
 
 /* called with rtnl_lock */
@@ -3745,13 +4047,14 @@ static int qede_open(struct net_device *ndev)
 
        edev->ops->common->set_power_state(edev->cdev, PCI_D0);
 
-       rc = qede_load(edev, QEDE_LOAD_NORMAL);
-
+       rc = qede_load(edev, QEDE_LOAD_NORMAL, false);
        if (rc)
                return rc;
 
        udp_tunnel_get_rx_info(ndev);
 
+       edev->ops->common->update_drv_state(edev->cdev, true);
+
        return 0;
 }
 
@@ -3759,7 +4062,9 @@ static int qede_close(struct net_device *ndev)
 {
        struct qede_dev *edev = netdev_priv(ndev);
 
-       qede_unload(edev, QEDE_UNLOAD_NORMAL);
+       qede_unload(edev, QEDE_UNLOAD_NORMAL, false);
+
+       edev->ops->common->update_drv_state(edev->cdev, false);
 
        return 0;
 }
@@ -3821,6 +4126,8 @@ static int qede_set_mac_addr(struct net_device *ndev, void *p)
        if (rc)
                return rc;
 
+       edev->ops->common->update_mac(edev->cdev, addr->sa_data);
+
        /* Add MAC filter according to the new unicast HW MAC address */
        ether_addr_copy(edev->primary_mac, ndev->dev_addr);
        return qede_set_ucast_rx_mac(edev, QED_FILTER_XCAST_TYPE_ADD,
@@ -3887,15 +4194,8 @@ static void qede_set_rx_mode(struct net_device *ndev)
 {
        struct qede_dev *edev = netdev_priv(ndev);
 
-       DP_INFO(edev, "qede_set_rx_mode called\n");
-
-       if (edev->state != QEDE_STATE_OPEN) {
-               DP_INFO(edev,
-                       "qede_set_rx_mode called while interface is down\n");
-       } else {
-               set_bit(QEDE_SP_RX_MODE, &edev->sp_flags);
-               schedule_delayed_work(&edev->sp_task, 0);
-       }
+       set_bit(QEDE_SP_RX_MODE, &edev->sp_flags);
+       schedule_delayed_work(&edev->sp_task, 0);
 }
 
 /* Must be called with qede_lock held */
index e97968ed4b8f7294cf6869ab4351711b8d9f55e5..0b4deb31e742fc4d48a2f91e865d7d750e0023c2 100644 (file)
@@ -575,10 +575,11 @@ void emac_mac_start(struct emac_adapter *adpt)
 
        mac |= TXEN | RXEN;     /* enable RX/TX */
 
-       /* We don't have ethtool support yet, so force flow-control mode
-        * to 'full' always.
-        */
-       mac |= TXFC | RXFC;
+       /* Configure MAC flow control to match the PHY's settings. */
+       if (phydev->pause)
+               mac |= RXFC;
+       if (phydev->pause != phydev->asym_pause)
+               mac |= TXFC;
 
        /* setup link speed */
        mac &= ~SPEED_MASK;
@@ -1003,6 +1004,12 @@ int emac_mac_up(struct emac_adapter *adpt)
        writel((u32)~DIS_INT, adpt->base + EMAC_INT_STATUS);
        writel(adpt->irq.mask, adpt->base + EMAC_INT_MASK);
 
+       /* Enable pause frames.  Without this feature, the EMAC has been shown
+        * to receive (and drop) frames with FCS errors at gigabit connections.
+        */
+       adpt->phydev->supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause;
+       adpt->phydev->advertising |= SUPPORTED_Pause | SUPPORTED_Asym_Pause;
+
        adpt->phydev->irq = PHY_IGNORE_INTERRUPT;
        phy_start(adpt->phydev);
 
@@ -1021,14 +1028,18 @@ void emac_mac_down(struct emac_adapter *adpt)
        napi_disable(&adpt->rx_q.napi);
 
        phy_stop(adpt->phydev);
-       phy_disconnect(adpt->phydev);
 
-       /* disable mac irq */
+       /* Interrupts must be disabled before the PHY is disconnected, to
+        * avoid a race condition where adjust_link is null when we get
+        * an interrupt.
+        */
        writel(DIS_INT, adpt->base + EMAC_INT_STATUS);
        writel(0, adpt->base + EMAC_INT_MASK);
        synchronize_irq(adpt->irq.irq);
        free_irq(adpt->irq.irq, &adpt->irq);
 
+       phy_disconnect(adpt->phydev);
+
        emac_mac_reset(adpt);
 
        emac_tx_q_descs_free(adpt);
index 75c1b530e39e8118f1788d0647988b97df260d23..72fe343c7a368d23de6c19f922add549aa1043f7 100644 (file)
@@ -421,7 +421,7 @@ static const struct emac_reg_write sgmii_v2_laned[] = {
        /* CDR Settings */
        {EMAC_SGMII_LN_UCDR_FO_GAIN_MODE0,
                UCDR_STEP_BY_TWO_MODE0 | UCDR_xO_GAIN_MODE(10)},
-       {EMAC_SGMII_LN_UCDR_SO_GAIN_MODE0, UCDR_xO_GAIN_MODE(6)},
+       {EMAC_SGMII_LN_UCDR_SO_GAIN_MODE0, UCDR_xO_GAIN_MODE(0)},
        {EMAC_SGMII_LN_UCDR_SO_CONFIG, UCDR_ENABLE | UCDR_SO_SATURATION(12)},
 
        /* TX/RX Settings */
index e4e1925d18a48475a8dbaa4bc37bc2eafa65d4a8..8be526af659a5d81a91a31bfdf464ed780cd5114 100644 (file)
@@ -568,6 +568,7 @@ static const struct of_device_id emac_dt_match[] = {
        },
        {}
 };
+MODULE_DEVICE_TABLE(of, emac_dt_match);
 
 #if IS_ENABLED(CONFIG_ACPI)
 static const struct acpi_device_id emac_acpi_match[] = {
index b698ea544bfc55cd197ef9e047b501c9d991c54a..2830190aaacec38444344348f232bf555e2f963a 100644 (file)
@@ -8269,7 +8269,8 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
        if ((sizeof(dma_addr_t) > 4) &&
            (use_dac == 1 || (use_dac == -1 && pci_is_pcie(pdev) &&
                              tp->mac_version >= RTL_GIGA_MAC_VER_18)) &&
-           !pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
+           !pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) &&
+           !pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64))) {
 
                /* CPlusCmd Dual Access Cycle is only needed for non-PCIe */
                if (!pci_is_pcie(pdev))
index 2e81b702a927b979abc8007f6de04fd4d056c177..67df4cf93362eaa1a7e9eeda9d3aab3dcc333f75 100644 (file)
@@ -1471,7 +1471,7 @@ static int rocker_world_check_init(struct rocker_port *rocker_port)
        if (rocker->wops) {
                if (rocker->wops->mode != mode) {
                        dev_err(&rocker->pdev->dev, "hardware has ports in different worlds, which is not supported\n");
-                       return err;
+                       return -EINVAL;
                }
                return 0;
        }
@@ -2534,7 +2534,7 @@ static void rocker_port_dev_addr_init(struct rocker_port *rocker_port)
 #define ROCKER_PORT_MAX_MTU    9000
 static int rocker_probe_port(struct rocker *rocker, unsigned int port_number)
 {
-       const struct pci_dev *pdev = rocker->pdev;
+       struct pci_dev *pdev = rocker->pdev;
        struct rocker_port *rocker_port;
        struct net_device *dev;
        int err;
@@ -2542,6 +2542,7 @@ static int rocker_probe_port(struct rocker *rocker, unsigned int port_number)
        dev = alloc_etherdev(sizeof(struct rocker_port));
        if (!dev)
                return -ENOMEM;
+       SET_NETDEV_DEV(dev, &pdev->dev);
        rocker_port = netdev_priv(dev);
        rocker_port->dev = dev;
        rocker_port->rocker = rocker;
index 431a608042727f7bb9cedf404a9429fa665e3143..4ca461322d6089553f05a623823f6e92cbe57455 100644 (file)
@@ -1493,8 +1493,6 @@ static int ofdpa_port_ipv4_nh(struct ofdpa_port *ofdpa_port,
        spin_lock_irqsave(&ofdpa->neigh_tbl_lock, lock_flags);
 
        found = ofdpa_neigh_tbl_find(ofdpa, ip_addr);
-       if (found)
-               *index = found->index;
 
        updating = found && adding;
        removing = found && !adding;
@@ -1508,9 +1506,11 @@ static int ofdpa_port_ipv4_nh(struct ofdpa_port *ofdpa_port,
                resolved = false;
        } else if (removing) {
                ofdpa_neigh_del(trans, found);
+               *index = found->index;
        } else if (updating) {
                ofdpa_neigh_update(found, trans, NULL, false);
                resolved = !is_zero_ether_addr(found->eth_dst);
+               *index = found->index;
        } else {
                err = -ENOENT;
        }
index 4dd92b7b80f41d2e2c0591f9d3930a24ed6325be..605ebc73b2b2cc26452ecdd4c950c21cfb8a3b0f 100644 (file)
@@ -1,5 +1,5 @@
 config SFC
-       tristate "Solarflare SFC4000/SFC9000/SFC9100-family support"
+       tristate "Solarflare SFC9000/SFC9100-family support"
        depends on PCI
        select MDIO
        select CRC32
@@ -8,13 +8,12 @@ config SFC
        select PTP_1588_CLOCK
        ---help---
          This driver supports 10/40-gigabit Ethernet cards based on
-         the Solarflare SFC4000, SFC9000-family and SFC9100-family
-         controllers.
+         the Solarflare SFC9000-family and SFC9100-family controllers.
 
          To compile this driver as a module, choose M here.  The module
          will be called sfc.
 config SFC_MTD
-       bool "Solarflare SFC4000/SFC9000/SFC9100-family MTD support"
+       bool "Solarflare SFC9000/SFC9100-family MTD support"
        depends on SFC && MTD && !(SFC=y && MTD=m)
        default y
        ---help---
index ce8470fe79d5524f8ac0b1536f70d04e5ede2007..520cfcc17785348ce5198e55031af0755d9e1f3d 100644 (file)
@@ -1,7 +1,6 @@
-sfc-y                  += efx.o nic.o farch.o falcon.o siena.o ef10.o tx.o \
-                          rx.o selftest.o ethtool.o qt202x_phy.o mdio_10g.o \
-                          tenxpress.o txc43128_phy.o falcon_boards.o \
-                          mcdi.o mcdi_port.o mcdi_mon.o ptp.o
+sfc-y                  += efx.o nic.o farch.o siena.o ef10.o tx.o rx.o \
+                          selftest.o ethtool.o ptp.o tx_tso.o \
+                          mcdi.o mcdi_port.o mcdi_mon.o
 sfc-$(CONFIG_SFC_MTD)  += mtd.o
 sfc-$(CONFIG_SFC_SRIOV)        += sriov.o siena_sriov.o ef10_sriov.o
 
index 00279da6a1e8b4adfd40df76da7d41da325ea6d2..0f58ea8147d47b3741713777d953e813a51f7814 100644 (file)
@@ -2086,6 +2086,92 @@ static inline void efx_ef10_push_tx_desc(struct efx_tx_queue *tx_queue,
                        ER_DZ_TX_DESC_UPD, tx_queue->queue);
 }
 
+/* Add Firmware-Assisted TSO v2 option descriptors to a queue.
+ */
+static int efx_ef10_tx_tso_desc(struct efx_tx_queue *tx_queue,
+                               struct sk_buff *skb,
+                               bool *data_mapped)
+{
+       struct efx_tx_buffer *buffer;
+       struct tcphdr *tcp;
+       struct iphdr *ip;
+
+       u16 ipv4_id;
+       u32 seqnum;
+       u32 mss;
+
+       EFX_BUG_ON_PARANOID(tx_queue->tso_version != 2);
+
+       mss = skb_shinfo(skb)->gso_size;
+
+       if (unlikely(mss < 4)) {
+               WARN_ONCE(1, "MSS of %u is too small for TSO v2\n", mss);
+               return -EINVAL;
+       }
+
+       ip = ip_hdr(skb);
+       if (ip->version == 4) {
+               /* Modify IPv4 header if needed. */
+               ip->tot_len = 0;
+               ip->check = 0;
+               ipv4_id = ip->id;
+       } else {
+               /* Modify IPv6 header if needed. */
+               struct ipv6hdr *ipv6 = ipv6_hdr(skb);
+
+               ipv6->payload_len = 0;
+               ipv4_id = 0;
+       }
+
+       tcp = tcp_hdr(skb);
+       seqnum = ntohl(tcp->seq);
+
+       buffer = efx_tx_queue_get_insert_buffer(tx_queue);
+
+       buffer->flags = EFX_TX_BUF_OPTION;
+       buffer->len = 0;
+       buffer->unmap_len = 0;
+       EFX_POPULATE_QWORD_5(buffer->option,
+                       ESF_DZ_TX_DESC_IS_OPT, 1,
+                       ESF_DZ_TX_OPTION_TYPE, ESE_DZ_TX_OPTION_DESC_TSO,
+                       ESF_DZ_TX_TSO_OPTION_TYPE,
+                       ESE_DZ_TX_TSO_OPTION_DESC_FATSO2A,
+                       ESF_DZ_TX_TSO_IP_ID, ipv4_id,
+                       ESF_DZ_TX_TSO_TCP_SEQNO, seqnum
+                       );
+       ++tx_queue->insert_count;
+
+       buffer = efx_tx_queue_get_insert_buffer(tx_queue);
+
+       buffer->flags = EFX_TX_BUF_OPTION;
+       buffer->len = 0;
+       buffer->unmap_len = 0;
+       EFX_POPULATE_QWORD_4(buffer->option,
+                       ESF_DZ_TX_DESC_IS_OPT, 1,
+                       ESF_DZ_TX_OPTION_TYPE, ESE_DZ_TX_OPTION_DESC_TSO,
+                       ESF_DZ_TX_TSO_OPTION_TYPE,
+                       ESE_DZ_TX_TSO_OPTION_DESC_FATSO2B,
+                       ESF_DZ_TX_TSO_TCP_MSS, mss
+                       );
+       ++tx_queue->insert_count;
+
+       return 0;
+}
+
+static u32 efx_ef10_tso_versions(struct efx_nic *efx)
+{
+       struct efx_ef10_nic_data *nic_data = efx->nic_data;
+       u32 tso_versions = 0;
+
+       if (nic_data->datapath_caps &
+           (1 << MC_CMD_GET_CAPABILITIES_OUT_TX_TSO_LBN))
+               tso_versions |= BIT(1);
+       if (nic_data->datapath_caps2 &
+           (1 << MC_CMD_GET_CAPABILITIES_V2_OUT_TX_TSO_V2_LBN))
+               tso_versions |= BIT(2);
+       return tso_versions;
+}
+
 static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue)
 {
        MCDI_DECLARE_BUF(inbuf, MC_CMD_INIT_TXQ_IN_LEN(EFX_MAX_DMAQ_SIZE * 8 /
@@ -2095,6 +2181,7 @@ static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue)
        struct efx_channel *channel = tx_queue->channel;
        struct efx_nic *efx = tx_queue->efx;
        struct efx_ef10_nic_data *nic_data = efx->nic_data;
+       bool tso_v2 = false;
        size_t inlen;
        dma_addr_t dma_addr;
        efx_qword_t *txd;
@@ -2102,13 +2189,21 @@ static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue)
        int i;
        BUILD_BUG_ON(MC_CMD_INIT_TXQ_OUT_LEN != 0);
 
+       /* TSOv2 is a limited resource that can only be configured on a limited
+        * number of queues. TSO without checksum offload is not really a thing,
+        * so we only enable it for those queues.
+        */
+       if (csum_offload && (nic_data->datapath_caps2 &
+                       (1 << MC_CMD_GET_CAPABILITIES_V2_OUT_TX_TSO_V2_LBN))) {
+               tso_v2 = true;
+               netif_dbg(efx, hw, efx->net_dev, "Using TSOv2 for channel %u\n",
+                               channel->channel);
+       }
+
        MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_SIZE, tx_queue->ptr_mask + 1);
        MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_TARGET_EVQ, channel->channel);
        MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_LABEL, tx_queue->queue);
        MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_INSTANCE, tx_queue->queue);
-       MCDI_POPULATE_DWORD_2(inbuf, INIT_TXQ_IN_FLAGS,
-                             INIT_TXQ_IN_FLAG_IP_CSUM_DIS, !csum_offload,
-                             INIT_TXQ_IN_FLAG_TCP_CSUM_DIS, !csum_offload);
        MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_OWNER_ID, 0);
        MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_PORT_ID, nic_data->vport_id);
 
@@ -2124,10 +2219,30 @@ static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue)
 
        inlen = MC_CMD_INIT_TXQ_IN_LEN(entries);
 
-       rc = efx_mcdi_rpc(efx, MC_CMD_INIT_TXQ, inbuf, inlen,
-                         NULL, 0, NULL);
-       if (rc)
-               goto fail;
+       do {
+               MCDI_POPULATE_DWORD_3(inbuf, INIT_TXQ_IN_FLAGS,
+                               /* This flag was removed from mcdi_pcol.h for
+                                * the non-_EXT version of INIT_TXQ.  However,
+                                * firmware still honours it.
+                                */
+                               INIT_TXQ_EXT_IN_FLAG_TSOV2_EN, tso_v2,
+                               INIT_TXQ_IN_FLAG_IP_CSUM_DIS, !csum_offload,
+                               INIT_TXQ_IN_FLAG_TCP_CSUM_DIS, !csum_offload);
+
+               rc = efx_mcdi_rpc_quiet(efx, MC_CMD_INIT_TXQ, inbuf, inlen,
+                                       NULL, 0, NULL);
+               if (rc == -ENOSPC && tso_v2) {
+                       /* Retry without TSOv2 if we're short on contexts. */
+                       tso_v2 = false;
+                       netif_warn(efx, probe, efx->net_dev,
+                                  "TSOv2 context not available to segment in hardware. TCP performance may be reduced.\n");
+               } else if (rc) {
+                       efx_mcdi_display_error(efx, MC_CMD_INIT_TXQ,
+                                              MC_CMD_INIT_TXQ_EXT_IN_LEN,
+                                              NULL, 0, rc);
+                       goto fail;
+               }
+       } while (rc);
 
        /* A previous user of this TX queue might have set us up the
         * bomb by writing a descriptor to the TX push collector but
@@ -2146,8 +2261,11 @@ static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue)
                             ESF_DZ_TX_OPTION_IP_CSUM, csum_offload);
        tx_queue->write_count = 1;
 
-       if (nic_data->datapath_caps &
-           (1 << MC_CMD_GET_CAPABILITIES_OUT_TX_TSO_LBN)) {
+       if (tso_v2) {
+               tx_queue->handle_tso = efx_ef10_tx_tso_desc;
+               tx_queue->tso_version = 2;
+       } else if (nic_data->datapath_caps &
+                       (1 << MC_CMD_GET_CAPABILITIES_OUT_TX_TSO_LBN)) {
                tx_queue->tso_version = 1;
        }
 
@@ -2202,6 +2320,25 @@ static inline void efx_ef10_notify_tx_desc(struct efx_tx_queue *tx_queue)
                        ER_DZ_TX_DESC_UPD_DWORD, tx_queue->queue);
 }
 
+#define EFX_EF10_MAX_TX_DESCRIPTOR_LEN 0x3fff
+
+static unsigned int efx_ef10_tx_limit_len(struct efx_tx_queue *tx_queue,
+                                         dma_addr_t dma_addr, unsigned int len)
+{
+       if (len > EFX_EF10_MAX_TX_DESCRIPTOR_LEN) {
+               /* If we need to break across multiple descriptors we should
+                * stop at a page boundary. This assumes the length limit is
+                * greater than the page size.
+                */
+               dma_addr_t end = dma_addr + EFX_EF10_MAX_TX_DESCRIPTOR_LEN;
+
+               BUILD_BUG_ON(EFX_EF10_MAX_TX_DESCRIPTOR_LEN < EFX_PAGE_SIZE);
+               len = (end & (~(EFX_PAGE_SIZE - 1))) - dma_addr;
+       }
+
+       return len;
+}
+
 static void efx_ef10_tx_write(struct efx_tx_queue *tx_queue)
 {
        unsigned int old_write_count = tx_queue->write_count;
@@ -2245,6 +2382,86 @@ static void efx_ef10_tx_write(struct efx_tx_queue *tx_queue)
        }
 }
 
+#define RSS_MODE_HASH_ADDRS    (1 << RSS_MODE_HASH_SRC_ADDR_LBN |\
+                                1 << RSS_MODE_HASH_DST_ADDR_LBN)
+#define RSS_MODE_HASH_PORTS    (1 << RSS_MODE_HASH_SRC_PORT_LBN |\
+                                1 << RSS_MODE_HASH_DST_PORT_LBN)
+#define RSS_CONTEXT_FLAGS_DEFAULT      (1 << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TOEPLITZ_IPV4_EN_LBN |\
+                                        1 << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TOEPLITZ_TCPV4_EN_LBN |\
+                                        1 << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TOEPLITZ_IPV6_EN_LBN |\
+                                        1 << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TOEPLITZ_TCPV6_EN_LBN |\
+                                        (RSS_MODE_HASH_ADDRS | RSS_MODE_HASH_PORTS) << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TCP_IPV4_RSS_MODE_LBN |\
+                                        RSS_MODE_HASH_ADDRS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_UDP_IPV4_RSS_MODE_LBN |\
+                                        RSS_MODE_HASH_ADDRS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_OTHER_IPV4_RSS_MODE_LBN |\
+                                        (RSS_MODE_HASH_ADDRS | RSS_MODE_HASH_PORTS) << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TCP_IPV6_RSS_MODE_LBN |\
+                                        RSS_MODE_HASH_ADDRS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_UDP_IPV6_RSS_MODE_LBN |\
+                                        RSS_MODE_HASH_ADDRS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_OTHER_IPV6_RSS_MODE_LBN)
+
+static int efx_ef10_get_rss_flags(struct efx_nic *efx, u32 context, u32 *flags)
+{
+       /* Firmware had a bug (sfc bug 61952) where it would not actually
+        * fill in the flags field in the response to MC_CMD_RSS_CONTEXT_GET_FLAGS.
+        * This meant that it would always contain whatever was previously
+        * in the MCDI buffer.  Fortunately, all firmware versions with
+        * this bug have the same default flags value for a newly-allocated
+        * RSS context, and the only time we want to get the flags is just
+        * after allocating.  Moreover, the response has a 32-bit hole
+        * where the context ID would be in the request, so we can use an
+        * overlength buffer in the request and pre-fill the flags field
+        * with what we believe the default to be.  Thus if the firmware
+        * has the bug, it will leave our pre-filled value in the flags
+        * field of the response, and we will get the right answer.
+        *
+        * However, this does mean that this function should NOT be used if
+        * the RSS context flags might not be their defaults - it is ONLY
+        * reliably correct for a newly-allocated RSS context.
+        */
+       MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_LEN);
+       MCDI_DECLARE_BUF(outbuf, MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_LEN);
+       size_t outlen;
+       int rc;
+
+       /* Check we have a hole for the context ID */
+       BUILD_BUG_ON(MC_CMD_RSS_CONTEXT_GET_FLAGS_IN_LEN != MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_FLAGS_OFST);
+       MCDI_SET_DWORD(inbuf, RSS_CONTEXT_GET_FLAGS_IN_RSS_CONTEXT_ID, context);
+       MCDI_SET_DWORD(inbuf, RSS_CONTEXT_GET_FLAGS_OUT_FLAGS,
+                      RSS_CONTEXT_FLAGS_DEFAULT);
+       rc = efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_GET_FLAGS, inbuf,
+                         sizeof(inbuf), outbuf, sizeof(outbuf), &outlen);
+       if (rc == 0) {
+               if (outlen < MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_LEN)
+                       rc = -EIO;
+               else
+                       *flags = MCDI_DWORD(outbuf, RSS_CONTEXT_GET_FLAGS_OUT_FLAGS);
+       }
+       return rc;
+}
+
+/* Attempt to enable 4-tuple UDP hashing on the specified RSS context.
+ * If we fail, we just leave the RSS context at its default hash settings,
+ * which is safe but may slightly reduce performance.
+ * Defaults are 4-tuple for TCP and 2-tuple for UDP and other-IP, so we
+ * just need to set the UDP ports flags (for both IP versions).
+ */
+static void efx_ef10_set_rss_flags(struct efx_nic *efx, u32 context)
+{
+       MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_SET_FLAGS_IN_LEN);
+       u32 flags;
+
+       BUILD_BUG_ON(MC_CMD_RSS_CONTEXT_SET_FLAGS_OUT_LEN != 0);
+
+       if (efx_ef10_get_rss_flags(efx, context, &flags) != 0)
+               return;
+       MCDI_SET_DWORD(inbuf, RSS_CONTEXT_SET_FLAGS_IN_RSS_CONTEXT_ID, context);
+       flags |= RSS_MODE_HASH_PORTS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_UDP_IPV4_RSS_MODE_LBN;
+       flags |= RSS_MODE_HASH_PORTS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_UDP_IPV6_RSS_MODE_LBN;
+       MCDI_SET_DWORD(inbuf, RSS_CONTEXT_SET_FLAGS_IN_FLAGS, flags);
+       if (!efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_SET_FLAGS, inbuf, sizeof(inbuf),
+                         NULL, 0, NULL))
+               /* Succeeded, so UDP 4-tuple is now enabled */
+               efx->rx_hash_udp_4tuple = true;
+}
+
 static int efx_ef10_alloc_rss_context(struct efx_nic *efx, u32 *context,
                                      bool exclusive, unsigned *context_size)
 {
@@ -2290,6 +2507,10 @@ static int efx_ef10_alloc_rss_context(struct efx_nic *efx, u32 *context,
        if (context_size)
                *context_size = rss_spread;
 
+       if (nic_data->datapath_caps &
+           1 << MC_CMD_GET_CAPABILITIES_OUT_ADDITIONAL_RSS_MODES_LBN)
+               efx_ef10_set_rss_flags(efx, *context);
+
        return 0;
 }
 
@@ -5385,6 +5606,7 @@ const struct efx_nic_type efx_hunt_a0_vf_nic_type = {
        .tx_init = efx_ef10_tx_init,
        .tx_remove = efx_ef10_tx_remove,
        .tx_write = efx_ef10_tx_write,
+       .tx_limit_len = efx_ef10_tx_limit_len,
        .rx_push_rss_config = efx_ef10_vf_rx_push_rss_config,
        .rx_probe = efx_ef10_rx_probe,
        .rx_init = efx_ef10_rx_init,
@@ -5491,6 +5713,7 @@ const struct efx_nic_type efx_hunt_a0_nic_type = {
        .tx_init = efx_ef10_tx_init,
        .tx_remove = efx_ef10_tx_remove,
        .tx_write = efx_ef10_tx_write,
+       .tx_limit_len = efx_ef10_tx_limit_len,
        .rx_push_rss_config = efx_ef10_pf_rx_push_rss_config,
        .rx_probe = efx_ef10_rx_probe,
        .rx_init = efx_ef10_rx_init,
@@ -5550,6 +5773,7 @@ const struct efx_nic_type efx_hunt_a0_nic_type = {
 #endif
        .get_mac_address = efx_ef10_get_mac_address_pf,
        .set_mac_address = efx_ef10_set_mac_address,
+       .tso_versions = efx_ef10_tso_versions,
 
        .revision = EFX_REV_HUNT_A0,
        .max_dma_mask = DMA_BIT_MASK(ESF_DZ_TX_KER_BUF_ADDR_WIDTH),
index 62a55dde61d570ff78604962808cb2eff21ea273..2c4bf9476c37656dfec3334fc95eb1e8680fe668 100644 (file)
@@ -1,6 +1,6 @@
 /****************************************************************************
  * Driver for Solarflare network controllers and boards
- * Copyright 2012-2013 Solarflare Communications Inc.
+ * Copyright 2012-2015 Solarflare Communications Inc.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License version 2 as published
 #define        ESF_DZ_RX_OVERRIDE_HOLDOFF_WIDTH 1
 #define        ESF_DZ_RX_DROP_EVENT_LBN 58
 #define        ESF_DZ_RX_DROP_EVENT_WIDTH 1
-#define        ESF_DZ_RX_EV_RSVD2_LBN 54
-#define        ESF_DZ_RX_EV_RSVD2_WIDTH 4
+#define        ESF_DD_RX_EV_RSVD2_LBN 54
+#define        ESF_DD_RX_EV_RSVD2_WIDTH 4
+#define        ESF_EZ_RX_TCP_UDP_INNER_CHKSUM_ERR_LBN 57
+#define        ESF_EZ_RX_TCP_UDP_INNER_CHKSUM_ERR_WIDTH 1
+#define        ESF_EZ_RX_IP_INNER_CHKSUM_ERR_LBN 56
+#define        ESF_EZ_RX_IP_INNER_CHKSUM_ERR_WIDTH 1
+#define        ESF_EZ_RX_EV_RSVD2_LBN 54
+#define        ESF_EZ_RX_EV_RSVD2_WIDTH 2
 #define        ESF_DZ_RX_EV_SOFT2_LBN 52
 #define        ESF_DZ_RX_EV_SOFT2_WIDTH 2
 #define        ESF_DZ_RX_DSC_PTR_LBITS_LBN 48
 #define        ESF_DZ_RX_MAC_CLASS_WIDTH 1
 #define        ESE_DZ_MAC_CLASS_MCAST 1
 #define        ESE_DZ_MAC_CLASS_UCAST 0
-#define        ESF_DZ_RX_EV_SOFT1_LBN 32
-#define        ESF_DZ_RX_EV_SOFT1_WIDTH 3
-#define        ESF_DZ_RX_EV_RSVD1_LBN 31
-#define        ESF_DZ_RX_EV_RSVD1_WIDTH 1
-#define        ESF_DZ_RX_ABORT_LBN 30
-#define        ESF_DZ_RX_ABORT_WIDTH 1
+#define        ESF_DD_RX_EV_SOFT1_LBN 32
+#define        ESF_DD_RX_EV_SOFT1_WIDTH 3
+#define        ESF_EZ_RX_EV_SOFT1_LBN 34
+#define        ESF_EZ_RX_EV_SOFT1_WIDTH 1
+#define        ESF_EZ_RX_ENCAP_HDR_LBN 32
+#define        ESF_EZ_RX_ENCAP_HDR_WIDTH 2
+#define        ESE_EZ_ENCAP_HDR_GRE 2
+#define        ESE_EZ_ENCAP_HDR_VXLAN 1
+#define        ESE_EZ_ENCAP_HDR_NONE 0
+#define        ESF_DD_RX_EV_RSVD1_LBN 30
+#define        ESF_DD_RX_EV_RSVD1_WIDTH 2
+#define        ESF_EZ_RX_EV_RSVD1_LBN 31
+#define        ESF_EZ_RX_EV_RSVD1_WIDTH 1
+#define        ESF_EZ_RX_ABORT_LBN 30
+#define        ESF_EZ_RX_ABORT_WIDTH 1
 #define        ESF_DZ_RX_ECC_ERR_LBN 29
 #define        ESF_DZ_RX_ECC_ERR_WIDTH 1
 #define        ESF_DZ_RX_CRC1_ERR_LBN 28
 #define        ESE_DZ_TX_OPTION_DESC_TSO 7
 #define        ESE_DZ_TX_OPTION_DESC_VLAN 6
 #define        ESE_DZ_TX_OPTION_DESC_CRC_CSUM 0
+#define        ESF_DZ_TX_OPTION_TS_AT_TXDP_LBN 8
+#define        ESF_DZ_TX_OPTION_TS_AT_TXDP_WIDTH 1
+#define        ESF_DZ_TX_OPTION_INNER_UDP_TCP_CSUM_LBN 7
+#define        ESF_DZ_TX_OPTION_INNER_UDP_TCP_CSUM_WIDTH 1
+#define        ESF_DZ_TX_OPTION_INNER_IP_CSUM_LBN 6
+#define        ESF_DZ_TX_OPTION_INNER_IP_CSUM_WIDTH 1
 #define        ESF_DZ_TX_TIMESTAMP_LBN 5
 #define        ESF_DZ_TX_TIMESTAMP_WIDTH 1
 #define        ESF_DZ_TX_OPTION_CRC_MODE_LBN 2
 #define        ESF_DZ_TX_OVERRIDE_HOLDOFF_WIDTH 1
 #define        ESF_DZ_TX_DROP_EVENT_LBN 58
 #define        ESF_DZ_TX_DROP_EVENT_WIDTH 1
-#define        ESF_DZ_TX_EV_RSVD_LBN 48
-#define        ESF_DZ_TX_EV_RSVD_WIDTH 10
+#define        ESF_DD_TX_EV_RSVD_LBN 48
+#define        ESF_DD_TX_EV_RSVD_WIDTH 10
+#define        ESF_EZ_TCP_UDP_INNER_CHKSUM_ERR_LBN 57
+#define        ESF_EZ_TCP_UDP_INNER_CHKSUM_ERR_WIDTH 1
+#define        ESF_EZ_IP_INNER_CHKSUM_ERR_LBN 56
+#define        ESF_EZ_IP_INNER_CHKSUM_ERR_WIDTH 1
+#define        ESF_EZ_TX_EV_RSVD_LBN 48
+#define        ESF_EZ_TX_EV_RSVD_WIDTH 8
 #define        ESF_DZ_TX_SOFT2_LBN 32
 #define        ESF_DZ_TX_SOFT2_WIDTH 16
-#define        ESF_DZ_TX_CAN_MERGE_LBN 31
-#define        ESF_DZ_TX_CAN_MERGE_WIDTH 1
-#define        ESF_DZ_TX_SOFT1_LBN 24
-#define        ESF_DZ_TX_SOFT1_WIDTH 7
+#define        ESF_DD_TX_SOFT1_LBN 24
+#define        ESF_DD_TX_SOFT1_WIDTH 8
+#define        ESF_EZ_TX_CAN_MERGE_LBN 31
+#define        ESF_EZ_TX_CAN_MERGE_WIDTH 1
+#define        ESF_EZ_TX_SOFT1_LBN 24
+#define        ESF_EZ_TX_SOFT1_WIDTH 7
 #define        ESF_DZ_TX_QLABEL_LBN 16
 #define        ESF_DZ_TX_QLABEL_WIDTH 5
 #define        ESF_DZ_TX_DESCR_INDX_LBN 0
 #define        ESE_DZ_TX_OPTION_DESC_TSO 7
 #define        ESE_DZ_TX_OPTION_DESC_VLAN 6
 #define        ESE_DZ_TX_OPTION_DESC_CRC_CSUM 0
+#define        ESF_DZ_TX_TSO_OPTION_TYPE_LBN 56
+#define        ESF_DZ_TX_TSO_OPTION_TYPE_WIDTH 4
+#define        ESE_DZ_TX_TSO_OPTION_DESC_ENCAP 1
+#define        ESE_DZ_TX_TSO_OPTION_DESC_NORMAL 0
 #define        ESF_DZ_TX_TSO_TCP_FLAGS_LBN 48
 #define        ESF_DZ_TX_TSO_TCP_FLAGS_WIDTH 8
 #define        ESF_DZ_TX_TSO_IP_ID_LBN 32
 #define        ESF_DZ_TX_TSO_TCP_SEQNO_LBN 0
 #define        ESF_DZ_TX_TSO_TCP_SEQNO_WIDTH 32
 
+/* TX_TSO_FATSO2A_DESC */
+#define        ESF_DZ_TX_DESC_IS_OPT_LBN 63
+#define        ESF_DZ_TX_DESC_IS_OPT_WIDTH 1
+#define        ESF_DZ_TX_OPTION_TYPE_LBN 60
+#define        ESF_DZ_TX_OPTION_TYPE_WIDTH 3
+#define        ESE_DZ_TX_OPTION_DESC_TSO 7
+#define        ESE_DZ_TX_OPTION_DESC_VLAN 6
+#define        ESE_DZ_TX_OPTION_DESC_CRC_CSUM 0
+#define        ESF_DZ_TX_TSO_OPTION_TYPE_LBN 56
+#define        ESF_DZ_TX_TSO_OPTION_TYPE_WIDTH 4
+#define        ESE_DZ_TX_TSO_OPTION_DESC_FATSO2B 3
+#define        ESE_DZ_TX_TSO_OPTION_DESC_FATSO2A 2
+#define        ESE_DZ_TX_TSO_OPTION_DESC_ENCAP 1
+#define        ESE_DZ_TX_TSO_OPTION_DESC_NORMAL 0
+#define        ESF_DZ_TX_TSO_IP_ID_LBN 32
+#define        ESF_DZ_TX_TSO_IP_ID_WIDTH 16
+#define        ESF_DZ_TX_TSO_TCP_SEQNO_LBN 0
+#define        ESF_DZ_TX_TSO_TCP_SEQNO_WIDTH 32
+
+
+/* TX_TSO_FATSO2B_DESC */
+#define        ESF_DZ_TX_DESC_IS_OPT_LBN 63
+#define        ESF_DZ_TX_DESC_IS_OPT_WIDTH 1
+#define        ESF_DZ_TX_OPTION_TYPE_LBN 60
+#define        ESF_DZ_TX_OPTION_TYPE_WIDTH 3
+#define        ESE_DZ_TX_OPTION_DESC_TSO 7
+#define        ESE_DZ_TX_OPTION_DESC_VLAN 6
+#define        ESE_DZ_TX_OPTION_DESC_CRC_CSUM 0
+#define        ESF_DZ_TX_TSO_OPTION_TYPE_LBN 56
+#define        ESF_DZ_TX_TSO_OPTION_TYPE_WIDTH 4
+#define        ESE_DZ_TX_TSO_OPTION_DESC_FATSO2B 3
+#define        ESE_DZ_TX_TSO_OPTION_DESC_FATSO2A 2
+#define        ESE_DZ_TX_TSO_OPTION_DESC_ENCAP 1
+#define        ESE_DZ_TX_TSO_OPTION_DESC_NORMAL 0
+#define        ESF_DZ_TX_TSO_OUTER_IP_ID_LBN 0
+#define        ESF_DZ_TX_TSO_OUTER_IP_ID_WIDTH 16
+#define        ESF_DZ_TX_TSO_TCP_MSS_LBN 32
+#define        ESF_DZ_TX_TSO_TCP_MSS_WIDTH 16
+
+
 /*************************************************************************/
 
 /* TX_DESC_UPD_REG: Transmit descriptor update register.
index b626da6e80a59a1a8fd673554b66e38dee4219e9..da7028df3fb6332912b3712dc8945812ea6b44d5 100644 (file)
@@ -82,7 +82,6 @@ const char *const efx_reset_type_names[] = {
        [RESET_TYPE_DISABLE]            = "DISABLE",
        [RESET_TYPE_TX_WATCHDOG]        = "TX_WATCHDOG",
        [RESET_TYPE_INT_ERROR]          = "INT_ERROR",
-       [RESET_TYPE_RX_RECOVERY]        = "RX_RECOVERY",
        [RESET_TYPE_DMA_ERROR]          = "DMA_ERROR",
        [RESET_TYPE_TX_SKIP]            = "TX_SKIP",
        [RESET_TYPE_MC_FAILURE]         = "MC_FAILURE",
@@ -485,6 +484,9 @@ efx_copy_channel(const struct efx_channel *old_channel)
        *channel = *old_channel;
 
        channel->napi_dev = NULL;
+       INIT_HLIST_NODE(&channel->napi_str.napi_hash_node);
+       channel->napi_str.napi_id = 0;
+       channel->napi_str.state = 0;
        memset(&channel->eventq, 0, sizeof(channel->eventq));
 
        for (j = 0; j < EFX_TXQ_TYPES; j++) {
@@ -730,16 +732,7 @@ static void efx_stop_datapath(struct efx_nic *efx)
        }
 
        rc = efx->type->fini_dmaq(efx);
-       if (rc && EFX_WORKAROUND_7803(efx)) {
-               /* Schedule a reset to recover from the flush failure. The
-                * descriptor caches reference memory we're about to free,
-                * but falcon_reconfigure_mac_wrapper() won't reconnect
-                * the MACs because of the pending reset.
-                */
-               netif_err(efx, drv, efx->net_dev,
-                         "Resetting to recover from flush failure\n");
-               efx_schedule_reset(efx, RESET_TYPE_ALL);
-       } else if (rc) {
+       if (rc) {
                netif_err(efx, drv, efx->net_dev, "failed to flush queues\n");
        } else {
                netif_dbg(efx, drv, efx->net_dev,
@@ -1889,15 +1882,13 @@ static void efx_start_all(struct efx_nic *efx)
                queue_delayed_work(efx->workqueue, &efx->monitor_work,
                                   efx_monitor_interval);
 
-       /* If link state detection is normally event-driven, we have
+       /* Link state detection is normally event-driven; we have
         * to poll now because we could have missed a change
         */
-       if (efx_nic_rev(efx) >= EFX_REV_SIENA_A0) {
-               mutex_lock(&efx->mac_lock);
-               if (efx->phy_op->poll(efx))
-                       efx_link_status_changed(efx);
-               mutex_unlock(&efx->mac_lock);
-       }
+       mutex_lock(&efx->mac_lock);
+       if (efx->phy_op->poll(efx))
+               efx_link_status_changed(efx);
+       mutex_unlock(&efx->mac_lock);
 
        efx->type->start_stats(efx);
        efx->type->pull_stats(efx);
@@ -2110,10 +2101,9 @@ static void efx_init_napi(struct efx_nic *efx)
 
 static void efx_fini_napi_channel(struct efx_channel *channel)
 {
-       if (channel->napi_dev) {
+       if (channel->napi_dev)
                netif_napi_del(&channel->napi_str);
-               napi_hash_del(&channel->napi_str);
-       }
+
        channel->napi_dev = NULL;
 }
 
@@ -2840,12 +2830,6 @@ void efx_schedule_reset(struct efx_nic *efx, enum reset_type type)
 
 /* PCI device ID table */
 static const struct pci_device_id efx_pci_table[] = {
-       {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE,
-                   PCI_DEVICE_ID_SOLARFLARE_SFC4000A_0),
-        .driver_data = (unsigned long) &falcon_a1_nic_type},
-       {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE,
-                   PCI_DEVICE_ID_SOLARFLARE_SFC4000B),
-        .driver_data = (unsigned long) &falcon_b0_nic_type},
        {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0803),  /* SFC9020 */
         .driver_data = (unsigned long) &siena_a0_nic_type},
        {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0813),  /* SFL9021 */
@@ -3198,23 +3182,6 @@ static int efx_pci_probe(struct pci_dev *pci_dev,
        efx = netdev_priv(net_dev);
        efx->type = (const struct efx_nic_type *) entry->driver_data;
        efx->fixed_features |= NETIF_F_HIGHDMA;
-       net_dev->features |= (efx->type->offload_features | NETIF_F_SG |
-                             NETIF_F_TSO | NETIF_F_RXCSUM);
-       if (efx->type->offload_features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
-               net_dev->features |= NETIF_F_TSO6;
-       /* Mask for features that also apply to VLAN devices */
-       net_dev->vlan_features |= (NETIF_F_HW_CSUM | NETIF_F_SG |
-                                  NETIF_F_HIGHDMA | NETIF_F_ALL_TSO |
-                                  NETIF_F_RXCSUM);
-
-       net_dev->hw_features = net_dev->features & ~efx->fixed_features;
-
-       /* Disable VLAN filtering by default.  It may be enforced if
-        * the feature is fixed (i.e. VLAN filters are required to
-        * receive VLAN tagged packets due to vPort restrictions).
-        */
-       net_dev->features &= ~NETIF_F_HW_VLAN_CTAG_FILTER;
-       net_dev->features |= efx->fixed_features;
 
        pci_set_drvdata(pci_dev, efx);
        SET_NETDEV_DEV(net_dev, &pci_dev->dev);
@@ -3237,6 +3204,27 @@ static int efx_pci_probe(struct pci_dev *pci_dev,
        if (rc)
                goto fail3;
 
+       net_dev->features |= (efx->type->offload_features | NETIF_F_SG |
+                             NETIF_F_TSO | NETIF_F_RXCSUM);
+       if (efx->type->offload_features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
+               net_dev->features |= NETIF_F_TSO6;
+       /* Check whether device supports TSO */
+       if (!efx->type->tso_versions || !efx->type->tso_versions(efx))
+               net_dev->features &= ~NETIF_F_ALL_TSO;
+       /* Mask for features that also apply to VLAN devices */
+       net_dev->vlan_features |= (NETIF_F_HW_CSUM | NETIF_F_SG |
+                                  NETIF_F_HIGHDMA | NETIF_F_ALL_TSO |
+                                  NETIF_F_RXCSUM);
+
+       net_dev->hw_features = net_dev->features & ~efx->fixed_features;
+
+       /* Disable VLAN filtering by default.  It may be enforced if
+        * the feature is fixed (i.e. VLAN filters are required to
+        * receive VLAN tagged packets due to vPort restrictions).
+        */
+       net_dev->features &= ~NETIF_F_HW_VLAN_CTAG_FILTER;
+       net_dev->features |= efx->fixed_features;
+
        rc = efx_register_netdev(efx);
        if (rc)
                goto fail4;
index c94f56271dd451eba1990720034ccc217cdf5ed2..6fa824211d914f0fa6673f215f98ed056e59d37d 100644 (file)
@@ -148,7 +148,6 @@ enum efx_loopback_mode {
  * @RESET_TYPE_DISABLE: Reset datapath, MAC and PHY; leave NIC disabled
  * @RESET_TYPE_TX_WATCHDOG: reset due to TX watchdog
  * @RESET_TYPE_INT_ERROR: reset due to internal error
- * @RESET_TYPE_RX_RECOVERY: reset to recover from RX datapath errors
  * @RESET_TYPE_DMA_ERROR: DMA error
  * @RESET_TYPE_TX_SKIP: hardware completed empty tx descriptors
  * @RESET_TYPE_MC_FAILURE: MC reboot/assertion
@@ -166,15 +165,13 @@ enum reset_type {
        RESET_TYPE_MAX_METHOD,
        RESET_TYPE_TX_WATCHDOG,
        RESET_TYPE_INT_ERROR,
-       RESET_TYPE_RX_RECOVERY,
        RESET_TYPE_DMA_ERROR,
        RESET_TYPE_TX_SKIP,
        RESET_TYPE_MC_FAILURE,
        /* RESET_TYPE_MCDI_TIMEOUT is actually a method, not just a reason, but
         * it doesn't fit the scope hierarchy (not well-ordered by inclusion).
         * We encode this by having its enum value be greater than
-        * RESET_TYPE_MAX_METHOD. This also prevents issuing it with
-        * efx_ioctl_reset.
+        * RESET_TYPE_MAX_METHOD.
         */
        RESET_TYPE_MCDI_TIMEOUT,
        RESET_TYPE_MAX,
index 445ccdb6bc6734123179c3155b6f7f5d89a50915..ca29d3d255a8920c0eb2cbe021cde010c8eb6c92 100644 (file)
@@ -69,8 +69,10 @@ static const struct efx_sw_stat_desc efx_sw_stat_desc[] = {
        EFX_ETHTOOL_UINT_TXQ_STAT(tso_bursts),
        EFX_ETHTOOL_UINT_TXQ_STAT(tso_long_headers),
        EFX_ETHTOOL_UINT_TXQ_STAT(tso_packets),
+       EFX_ETHTOOL_UINT_TXQ_STAT(tso_fallbacks),
        EFX_ETHTOOL_UINT_TXQ_STAT(pushes),
        EFX_ETHTOOL_UINT_TXQ_STAT(pio_packets),
+       EFX_ETHTOOL_UINT_TXQ_STAT(cb_packets),
        EFX_ETHTOOL_ATOMIC_NIC_ERROR_STAT(rx_reset),
        EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_tobe_disc),
        EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_ip_hdr_chksum_err),
@@ -167,9 +169,8 @@ static void efx_ethtool_get_drvinfo(struct net_device *net_dev,
 
        strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver));
        strlcpy(info->version, EFX_DRIVER_VERSION, sizeof(info->version));
-       if (efx_nic_rev(efx) >= EFX_REV_SIENA_A0)
-               efx_mcdi_print_fwver(efx, info->fw_version,
-                                    sizeof(info->fw_version));
+       efx_mcdi_print_fwver(efx, info->fw_version,
+                            sizeof(info->fw_version));
        strlcpy(info->bus_info, pci_name(efx->pci_dev), sizeof(info->bus_info));
 }
 
@@ -964,35 +965,33 @@ efx_ethtool_get_rxnfc(struct net_device *net_dev,
                return 0;
 
        case ETHTOOL_GRXFH: {
-               unsigned min_revision = 0;
-
                info->data = 0;
                switch (info->flow_type) {
+               case UDP_V4_FLOW:
+                       if (efx->rx_hash_udp_4tuple)
+                               /* fall through */
                case TCP_V4_FLOW:
-                       info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+                               info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
                        /* fall through */
-               case UDP_V4_FLOW:
                case SCTP_V4_FLOW:
                case AH_ESP_V4_FLOW:
                case IPV4_FLOW:
                        info->data |= RXH_IP_SRC | RXH_IP_DST;
-                       min_revision = EFX_REV_FALCON_B0;
                        break;
+               case UDP_V6_FLOW:
+                       if (efx->rx_hash_udp_4tuple)
+                               /* fall through */
                case TCP_V6_FLOW:
-                       info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+                               info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
                        /* fall through */
-               case UDP_V6_FLOW:
                case SCTP_V6_FLOW:
                case AH_ESP_V6_FLOW:
                case IPV6_FLOW:
                        info->data |= RXH_IP_SRC | RXH_IP_DST;
-                       min_revision = EFX_REV_SIENA_A0;
                        break;
                default:
                        break;
                }
-               if (efx_nic_rev(efx) < min_revision)
-                       info->data = 0;
                return 0;
        }
 
@@ -1265,9 +1264,7 @@ static u32 efx_ethtool_get_rxfh_indir_size(struct net_device *net_dev)
 {
        struct efx_nic *efx = netdev_priv(net_dev);
 
-       return ((efx_nic_rev(efx) < EFX_REV_FALCON_B0 ||
-                efx->n_rx_channels == 1) ?
-               0 : ARRAY_SIZE(efx->rx_indir_table));
+       return (efx->n_rx_channels == 1) ? 0 : ARRAY_SIZE(efx->rx_indir_table);
 }
 
 static int efx_ethtool_get_rxfh(struct net_device *net_dev, u32 *indir, u8 *key,
diff --git a/drivers/net/ethernet/sfc/falcon.c b/drivers/net/ethernet/sfc/falcon.c
deleted file mode 100644 (file)
index 1a70926..0000000
+++ /dev/null
@@ -1,2905 +0,0 @@
-/****************************************************************************
- * Driver for Solarflare network controllers and boards
- * Copyright 2005-2006 Fen Systems Ltd.
- * Copyright 2006-2013 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
- */
-
-#include <linux/bitops.h>
-#include <linux/delay.h>
-#include <linux/pci.h>
-#include <linux/module.h>
-#include <linux/seq_file.h>
-#include <linux/i2c.h>
-#include <linux/mii.h>
-#include <linux/slab.h>
-#include "net_driver.h"
-#include "bitfield.h"
-#include "efx.h"
-#include "nic.h"
-#include "farch_regs.h"
-#include "io.h"
-#include "phy.h"
-#include "workarounds.h"
-#include "selftest.h"
-#include "mdio_10g.h"
-
-/* Hardware control for SFC4000 (aka Falcon). */
-
-/**************************************************************************
- *
- * NIC stats
- *
- **************************************************************************
- */
-
-#define FALCON_MAC_STATS_SIZE 0x100
-
-#define XgRxOctets_offset 0x0
-#define XgRxOctets_WIDTH 48
-#define XgRxOctetsOK_offset 0x8
-#define XgRxOctetsOK_WIDTH 48
-#define XgRxPkts_offset 0x10
-#define XgRxPkts_WIDTH 32
-#define XgRxPktsOK_offset 0x14
-#define XgRxPktsOK_WIDTH 32
-#define XgRxBroadcastPkts_offset 0x18
-#define XgRxBroadcastPkts_WIDTH 32
-#define XgRxMulticastPkts_offset 0x1C
-#define XgRxMulticastPkts_WIDTH 32
-#define XgRxUnicastPkts_offset 0x20
-#define XgRxUnicastPkts_WIDTH 32
-#define XgRxUndersizePkts_offset 0x24
-#define XgRxUndersizePkts_WIDTH 32
-#define XgRxOversizePkts_offset 0x28
-#define XgRxOversizePkts_WIDTH 32
-#define XgRxJabberPkts_offset 0x2C
-#define XgRxJabberPkts_WIDTH 32
-#define XgRxUndersizeFCSerrorPkts_offset 0x30
-#define XgRxUndersizeFCSerrorPkts_WIDTH 32
-#define XgRxDropEvents_offset 0x34
-#define XgRxDropEvents_WIDTH 32
-#define XgRxFCSerrorPkts_offset 0x38
-#define XgRxFCSerrorPkts_WIDTH 32
-#define XgRxAlignError_offset 0x3C
-#define XgRxAlignError_WIDTH 32
-#define XgRxSymbolError_offset 0x40
-#define XgRxSymbolError_WIDTH 32
-#define XgRxInternalMACError_offset 0x44
-#define XgRxInternalMACError_WIDTH 32
-#define XgRxControlPkts_offset 0x48
-#define XgRxControlPkts_WIDTH 32
-#define XgRxPausePkts_offset 0x4C
-#define XgRxPausePkts_WIDTH 32
-#define XgRxPkts64Octets_offset 0x50
-#define XgRxPkts64Octets_WIDTH 32
-#define XgRxPkts65to127Octets_offset 0x54
-#define XgRxPkts65to127Octets_WIDTH 32
-#define XgRxPkts128to255Octets_offset 0x58
-#define XgRxPkts128to255Octets_WIDTH 32
-#define XgRxPkts256to511Octets_offset 0x5C
-#define XgRxPkts256to511Octets_WIDTH 32
-#define XgRxPkts512to1023Octets_offset 0x60
-#define XgRxPkts512to1023Octets_WIDTH 32
-#define XgRxPkts1024to15xxOctets_offset 0x64
-#define XgRxPkts1024to15xxOctets_WIDTH 32
-#define XgRxPkts15xxtoMaxOctets_offset 0x68
-#define XgRxPkts15xxtoMaxOctets_WIDTH 32
-#define XgRxLengthError_offset 0x6C
-#define XgRxLengthError_WIDTH 32
-#define XgTxPkts_offset 0x80
-#define XgTxPkts_WIDTH 32
-#define XgTxOctets_offset 0x88
-#define XgTxOctets_WIDTH 48
-#define XgTxMulticastPkts_offset 0x90
-#define XgTxMulticastPkts_WIDTH 32
-#define XgTxBroadcastPkts_offset 0x94
-#define XgTxBroadcastPkts_WIDTH 32
-#define XgTxUnicastPkts_offset 0x98
-#define XgTxUnicastPkts_WIDTH 32
-#define XgTxControlPkts_offset 0x9C
-#define XgTxControlPkts_WIDTH 32
-#define XgTxPausePkts_offset 0xA0
-#define XgTxPausePkts_WIDTH 32
-#define XgTxPkts64Octets_offset 0xA4
-#define XgTxPkts64Octets_WIDTH 32
-#define XgTxPkts65to127Octets_offset 0xA8
-#define XgTxPkts65to127Octets_WIDTH 32
-#define XgTxPkts128to255Octets_offset 0xAC
-#define XgTxPkts128to255Octets_WIDTH 32
-#define XgTxPkts256to511Octets_offset 0xB0
-#define XgTxPkts256to511Octets_WIDTH 32
-#define XgTxPkts512to1023Octets_offset 0xB4
-#define XgTxPkts512to1023Octets_WIDTH 32
-#define XgTxPkts1024to15xxOctets_offset 0xB8
-#define XgTxPkts1024to15xxOctets_WIDTH 32
-#define XgTxPkts1519toMaxOctets_offset 0xBC
-#define XgTxPkts1519toMaxOctets_WIDTH 32
-#define XgTxUndersizePkts_offset 0xC0
-#define XgTxUndersizePkts_WIDTH 32
-#define XgTxOversizePkts_offset 0xC4
-#define XgTxOversizePkts_WIDTH 32
-#define XgTxNonTcpUdpPkt_offset 0xC8
-#define XgTxNonTcpUdpPkt_WIDTH 16
-#define XgTxMacSrcErrPkt_offset 0xCC
-#define XgTxMacSrcErrPkt_WIDTH 16
-#define XgTxIpSrcErrPkt_offset 0xD0
-#define XgTxIpSrcErrPkt_WIDTH 16
-#define XgDmaDone_offset 0xD4
-#define XgDmaDone_WIDTH 32
-
-#define FALCON_XMAC_STATS_DMA_FLAG(efx)                                \
-       (*(u32 *)((efx)->stats_buffer.addr + XgDmaDone_offset))
-
-#define FALCON_DMA_STAT(ext_name, hw_name)                             \
-       [FALCON_STAT_ ## ext_name] =                                    \
-       { #ext_name,                                                    \
-         /* 48-bit stats are zero-padded to 64 on DMA */               \
-         hw_name ## _ ## WIDTH == 48 ? 64 : hw_name ## _ ## WIDTH,     \
-         hw_name ## _ ## offset }
-#define FALCON_OTHER_STAT(ext_name)                                    \
-       [FALCON_STAT_ ## ext_name] = { #ext_name, 0, 0 }
-#define GENERIC_SW_STAT(ext_name)                              \
-       [GENERIC_STAT_ ## ext_name] = { #ext_name, 0, 0 }
-
-static const struct efx_hw_stat_desc falcon_stat_desc[FALCON_STAT_COUNT] = {
-       FALCON_DMA_STAT(tx_bytes, XgTxOctets),
-       FALCON_DMA_STAT(tx_packets, XgTxPkts),
-       FALCON_DMA_STAT(tx_pause, XgTxPausePkts),
-       FALCON_DMA_STAT(tx_control, XgTxControlPkts),
-       FALCON_DMA_STAT(tx_unicast, XgTxUnicastPkts),
-       FALCON_DMA_STAT(tx_multicast, XgTxMulticastPkts),
-       FALCON_DMA_STAT(tx_broadcast, XgTxBroadcastPkts),
-       FALCON_DMA_STAT(tx_lt64, XgTxUndersizePkts),
-       FALCON_DMA_STAT(tx_64, XgTxPkts64Octets),
-       FALCON_DMA_STAT(tx_65_to_127, XgTxPkts65to127Octets),
-       FALCON_DMA_STAT(tx_128_to_255, XgTxPkts128to255Octets),
-       FALCON_DMA_STAT(tx_256_to_511, XgTxPkts256to511Octets),
-       FALCON_DMA_STAT(tx_512_to_1023, XgTxPkts512to1023Octets),
-       FALCON_DMA_STAT(tx_1024_to_15xx, XgTxPkts1024to15xxOctets),
-       FALCON_DMA_STAT(tx_15xx_to_jumbo, XgTxPkts1519toMaxOctets),
-       FALCON_DMA_STAT(tx_gtjumbo, XgTxOversizePkts),
-       FALCON_DMA_STAT(tx_non_tcpudp, XgTxNonTcpUdpPkt),
-       FALCON_DMA_STAT(tx_mac_src_error, XgTxMacSrcErrPkt),
-       FALCON_DMA_STAT(tx_ip_src_error, XgTxIpSrcErrPkt),
-       FALCON_DMA_STAT(rx_bytes, XgRxOctets),
-       FALCON_DMA_STAT(rx_good_bytes, XgRxOctetsOK),
-       FALCON_OTHER_STAT(rx_bad_bytes),
-       FALCON_DMA_STAT(rx_packets, XgRxPkts),
-       FALCON_DMA_STAT(rx_good, XgRxPktsOK),
-       FALCON_DMA_STAT(rx_bad, XgRxFCSerrorPkts),
-       FALCON_DMA_STAT(rx_pause, XgRxPausePkts),
-       FALCON_DMA_STAT(rx_control, XgRxControlPkts),
-       FALCON_DMA_STAT(rx_unicast, XgRxUnicastPkts),
-       FALCON_DMA_STAT(rx_multicast, XgRxMulticastPkts),
-       FALCON_DMA_STAT(rx_broadcast, XgRxBroadcastPkts),
-       FALCON_DMA_STAT(rx_lt64, XgRxUndersizePkts),
-       FALCON_DMA_STAT(rx_64, XgRxPkts64Octets),
-       FALCON_DMA_STAT(rx_65_to_127, XgRxPkts65to127Octets),
-       FALCON_DMA_STAT(rx_128_to_255, XgRxPkts128to255Octets),
-       FALCON_DMA_STAT(rx_256_to_511, XgRxPkts256to511Octets),
-       FALCON_DMA_STAT(rx_512_to_1023, XgRxPkts512to1023Octets),
-       FALCON_DMA_STAT(rx_1024_to_15xx, XgRxPkts1024to15xxOctets),
-       FALCON_DMA_STAT(rx_15xx_to_jumbo, XgRxPkts15xxtoMaxOctets),
-       FALCON_DMA_STAT(rx_gtjumbo, XgRxOversizePkts),
-       FALCON_DMA_STAT(rx_bad_lt64, XgRxUndersizeFCSerrorPkts),
-       FALCON_DMA_STAT(rx_bad_gtjumbo, XgRxJabberPkts),
-       FALCON_DMA_STAT(rx_overflow, XgRxDropEvents),
-       FALCON_DMA_STAT(rx_symbol_error, XgRxSymbolError),
-       FALCON_DMA_STAT(rx_align_error, XgRxAlignError),
-       FALCON_DMA_STAT(rx_length_error, XgRxLengthError),
-       FALCON_DMA_STAT(rx_internal_error, XgRxInternalMACError),
-       FALCON_OTHER_STAT(rx_nodesc_drop_cnt),
-       GENERIC_SW_STAT(rx_nodesc_trunc),
-       GENERIC_SW_STAT(rx_noskb_drops),
-};
-static const unsigned long falcon_stat_mask[] = {
-       [0 ... BITS_TO_LONGS(FALCON_STAT_COUNT) - 1] = ~0UL,
-};
-
-/**************************************************************************
- *
- * Basic SPI command set and bit definitions
- *
- *************************************************************************/
-
-#define SPI_WRSR 0x01          /* Write status register */
-#define SPI_WRITE 0x02         /* Write data to memory array */
-#define SPI_READ 0x03          /* Read data from memory array */
-#define SPI_WRDI 0x04          /* Reset write enable latch */
-#define SPI_RDSR 0x05          /* Read status register */
-#define SPI_WREN 0x06          /* Set write enable latch */
-#define SPI_SST_EWSR 0x50      /* SST: Enable write to status register */
-
-#define SPI_STATUS_WPEN 0x80   /* Write-protect pin enabled */
-#define SPI_STATUS_BP2 0x10    /* Block protection bit 2 */
-#define SPI_STATUS_BP1 0x08    /* Block protection bit 1 */
-#define SPI_STATUS_BP0 0x04    /* Block protection bit 0 */
-#define SPI_STATUS_WEN 0x02    /* State of the write enable latch */
-#define SPI_STATUS_NRDY 0x01   /* Device busy flag */
-
-/**************************************************************************
- *
- * Non-volatile memory layout
- *
- **************************************************************************
- */
-
-/* SFC4000 flash is partitioned into:
- *     0-0x400       chip and board config (see struct falcon_nvconfig)
- *     0x400-0x8000  unused (or may contain VPD if EEPROM not present)
- *     0x8000-end    boot code (mapped to PCI expansion ROM)
- * SFC4000 small EEPROM (size < 0x400) is used for VPD only.
- * SFC4000 large EEPROM (size >= 0x400) is partitioned into:
- *     0-0x400       chip and board config
- *     configurable  VPD
- *     0x800-0x1800  boot config
- * Aside from the chip and board config, all of these are optional and may
- * be absent or truncated depending on the devices used.
- */
-#define FALCON_NVCONFIG_END 0x400U
-#define FALCON_FLASH_BOOTCODE_START 0x8000U
-#define FALCON_EEPROM_BOOTCONFIG_START 0x800U
-#define FALCON_EEPROM_BOOTCONFIG_END 0x1800U
-
-/* Board configuration v2 (v1 is obsolete; later versions are compatible) */
-struct falcon_nvconfig_board_v2 {
-       __le16 nports;
-       u8 port0_phy_addr;
-       u8 port0_phy_type;
-       u8 port1_phy_addr;
-       u8 port1_phy_type;
-       __le16 asic_sub_revision;
-       __le16 board_revision;
-} __packed;
-
-/* Board configuration v3 extra information */
-struct falcon_nvconfig_board_v3 {
-       __le32 spi_device_type[2];
-} __packed;
-
-/* Bit numbers for spi_device_type */
-#define SPI_DEV_TYPE_SIZE_LBN 0
-#define SPI_DEV_TYPE_SIZE_WIDTH 5
-#define SPI_DEV_TYPE_ADDR_LEN_LBN 6
-#define SPI_DEV_TYPE_ADDR_LEN_WIDTH 2
-#define SPI_DEV_TYPE_ERASE_CMD_LBN 8
-#define SPI_DEV_TYPE_ERASE_CMD_WIDTH 8
-#define SPI_DEV_TYPE_ERASE_SIZE_LBN 16
-#define SPI_DEV_TYPE_ERASE_SIZE_WIDTH 5
-#define SPI_DEV_TYPE_BLOCK_SIZE_LBN 24
-#define SPI_DEV_TYPE_BLOCK_SIZE_WIDTH 5
-#define SPI_DEV_TYPE_FIELD(type, field)                                        \
-       (((type) >> EFX_LOW_BIT(field)) & EFX_MASK32(EFX_WIDTH(field)))
-
-#define FALCON_NVCONFIG_OFFSET 0x300
-
-#define FALCON_NVCONFIG_BOARD_MAGIC_NUM 0xFA1C
-struct falcon_nvconfig {
-       efx_oword_t ee_vpd_cfg_reg;                     /* 0x300 */
-       u8 mac_address[2][8];                   /* 0x310 */
-       efx_oword_t pcie_sd_ctl0123_reg;                /* 0x320 */
-       efx_oword_t pcie_sd_ctl45_reg;                  /* 0x330 */
-       efx_oword_t pcie_pcs_ctl_stat_reg;              /* 0x340 */
-       efx_oword_t hw_init_reg;                        /* 0x350 */
-       efx_oword_t nic_stat_reg;                       /* 0x360 */
-       efx_oword_t glb_ctl_reg;                        /* 0x370 */
-       efx_oword_t srm_cfg_reg;                        /* 0x380 */
-       efx_oword_t spare_reg;                          /* 0x390 */
-       __le16 board_magic_num;                 /* 0x3A0 */
-       __le16 board_struct_ver;
-       __le16 board_checksum;
-       struct falcon_nvconfig_board_v2 board_v2;
-       efx_oword_t ee_base_page_reg;                   /* 0x3B0 */
-       struct falcon_nvconfig_board_v3 board_v3;       /* 0x3C0 */
-} __packed;
-
-/*************************************************************************/
-
-static int falcon_reset_hw(struct efx_nic *efx, enum reset_type method);
-static void falcon_reconfigure_mac_wrapper(struct efx_nic *efx);
-
-static const unsigned int
-/* "Large" EEPROM device: Atmel AT25640 or similar
- * 8 KB, 16-bit address, 32 B write block */
-large_eeprom_type = ((13 << SPI_DEV_TYPE_SIZE_LBN)
-                    | (2 << SPI_DEV_TYPE_ADDR_LEN_LBN)
-                    | (5 << SPI_DEV_TYPE_BLOCK_SIZE_LBN)),
-/* Default flash device: Atmel AT25F1024
- * 128 KB, 24-bit address, 32 KB erase block, 256 B write block */
-default_flash_type = ((17 << SPI_DEV_TYPE_SIZE_LBN)
-                     | (3 << SPI_DEV_TYPE_ADDR_LEN_LBN)
-                     | (0x52 << SPI_DEV_TYPE_ERASE_CMD_LBN)
-                     | (15 << SPI_DEV_TYPE_ERASE_SIZE_LBN)
-                     | (8 << SPI_DEV_TYPE_BLOCK_SIZE_LBN));
-
-/**************************************************************************
- *
- * I2C bus - this is a bit-bashing interface using GPIO pins
- * Note that it uses the output enables to tristate the outputs
- * SDA is the data pin and SCL is the clock
- *
- **************************************************************************
- */
-static void falcon_setsda(void *data, int state)
-{
-       struct efx_nic *efx = (struct efx_nic *)data;
-       efx_oword_t reg;
-
-       efx_reado(efx, &reg, FR_AB_GPIO_CTL);
-       EFX_SET_OWORD_FIELD(reg, FRF_AB_GPIO3_OEN, !state);
-       efx_writeo(efx, &reg, FR_AB_GPIO_CTL);
-}
-
-static void falcon_setscl(void *data, int state)
-{
-       struct efx_nic *efx = (struct efx_nic *)data;
-       efx_oword_t reg;
-
-       efx_reado(efx, &reg, FR_AB_GPIO_CTL);
-       EFX_SET_OWORD_FIELD(reg, FRF_AB_GPIO0_OEN, !state);
-       efx_writeo(efx, &reg, FR_AB_GPIO_CTL);
-}
-
-static int falcon_getsda(void *data)
-{
-       struct efx_nic *efx = (struct efx_nic *)data;
-       efx_oword_t reg;
-
-       efx_reado(efx, &reg, FR_AB_GPIO_CTL);
-       return EFX_OWORD_FIELD(reg, FRF_AB_GPIO3_IN);
-}
-
-static int falcon_getscl(void *data)
-{
-       struct efx_nic *efx = (struct efx_nic *)data;
-       efx_oword_t reg;
-
-       efx_reado(efx, &reg, FR_AB_GPIO_CTL);
-       return EFX_OWORD_FIELD(reg, FRF_AB_GPIO0_IN);
-}
-
-static const struct i2c_algo_bit_data falcon_i2c_bit_operations = {
-       .setsda         = falcon_setsda,
-       .setscl         = falcon_setscl,
-       .getsda         = falcon_getsda,
-       .getscl         = falcon_getscl,
-       .udelay         = 5,
-       /* Wait up to 50 ms for slave to let us pull SCL high */
-       .timeout        = DIV_ROUND_UP(HZ, 20),
-};
-
-static void falcon_push_irq_moderation(struct efx_channel *channel)
-{
-       efx_dword_t timer_cmd;
-       struct efx_nic *efx = channel->efx;
-
-       /* Set timer register */
-       if (channel->irq_moderation_us) {
-               unsigned int ticks;
-
-               ticks = efx_usecs_to_ticks(efx, channel->irq_moderation_us);
-               EFX_POPULATE_DWORD_2(timer_cmd,
-                                    FRF_AB_TC_TIMER_MODE,
-                                    FFE_BB_TIMER_MODE_INT_HLDOFF,
-                                    FRF_AB_TC_TIMER_VAL,
-                                    ticks - 1);
-       } else {
-               EFX_POPULATE_DWORD_2(timer_cmd,
-                                    FRF_AB_TC_TIMER_MODE,
-                                    FFE_BB_TIMER_MODE_DIS,
-                                    FRF_AB_TC_TIMER_VAL, 0);
-       }
-       BUILD_BUG_ON(FR_AA_TIMER_COMMAND_KER != FR_BZ_TIMER_COMMAND_P0);
-       efx_writed_page_locked(efx, &timer_cmd, FR_BZ_TIMER_COMMAND_P0,
-                              channel->channel);
-}
-
-static void falcon_deconfigure_mac_wrapper(struct efx_nic *efx);
-
-static void falcon_prepare_flush(struct efx_nic *efx)
-{
-       falcon_deconfigure_mac_wrapper(efx);
-
-       /* Wait for the tx and rx fifo's to get to the next packet boundary
-        * (~1ms without back-pressure), then to drain the remainder of the
-        * fifo's at data path speeds (negligible), with a healthy margin. */
-       msleep(10);
-}
-
-/* Acknowledge a legacy interrupt from Falcon
- *
- * This acknowledges a legacy (not MSI) interrupt via INT_ACK_KER_REG.
- *
- * Due to SFC bug 3706 (silicon revision <=A1) reads can be duplicated in the
- * BIU. Interrupt acknowledge is read sensitive so must write instead
- * (then read to ensure the BIU collector is flushed)
- *
- * NB most hardware supports MSI interrupts
- */
-static inline void falcon_irq_ack_a1(struct efx_nic *efx)
-{
-       efx_dword_t reg;
-
-       EFX_POPULATE_DWORD_1(reg, FRF_AA_INT_ACK_KER_FIELD, 0xb7eb7e);
-       efx_writed(efx, &reg, FR_AA_INT_ACK_KER);
-       efx_readd(efx, &reg, FR_AA_WORK_AROUND_BROKEN_PCI_READS);
-}
-
-static irqreturn_t falcon_legacy_interrupt_a1(int irq, void *dev_id)
-{
-       struct efx_nic *efx = dev_id;
-       efx_oword_t *int_ker = efx->irq_status.addr;
-       int syserr;
-       int queues;
-
-       /* Check to see if this is our interrupt.  If it isn't, we
-        * exit without having touched the hardware.
-        */
-       if (unlikely(EFX_OWORD_IS_ZERO(*int_ker))) {
-               netif_vdbg(efx, intr, efx->net_dev,
-                          "IRQ %d on CPU %d not for me\n", irq,
-                          raw_smp_processor_id());
-               return IRQ_NONE;
-       }
-       efx->last_irq_cpu = raw_smp_processor_id();
-       netif_vdbg(efx, intr, efx->net_dev,
-                  "IRQ %d on CPU %d status " EFX_OWORD_FMT "\n",
-                  irq, raw_smp_processor_id(), EFX_OWORD_VAL(*int_ker));
-
-       if (!likely(ACCESS_ONCE(efx->irq_soft_enabled)))
-               return IRQ_HANDLED;
-
-       /* Check to see if we have a serious error condition */
-       syserr = EFX_OWORD_FIELD(*int_ker, FSF_AZ_NET_IVEC_FATAL_INT);
-       if (unlikely(syserr))
-               return efx_farch_fatal_interrupt(efx);
-
-       /* Determine interrupting queues, clear interrupt status
-        * register and acknowledge the device interrupt.
-        */
-       BUILD_BUG_ON(FSF_AZ_NET_IVEC_INT_Q_WIDTH > EFX_MAX_CHANNELS);
-       queues = EFX_OWORD_FIELD(*int_ker, FSF_AZ_NET_IVEC_INT_Q);
-       EFX_ZERO_OWORD(*int_ker);
-       wmb(); /* Ensure the vector is cleared before interrupt ack */
-       falcon_irq_ack_a1(efx);
-
-       if (queues & 1)
-               efx_schedule_channel_irq(efx_get_channel(efx, 0));
-       if (queues & 2)
-               efx_schedule_channel_irq(efx_get_channel(efx, 1));
-       return IRQ_HANDLED;
-}
-
-/**************************************************************************
- *
- * RSS
- *
- **************************************************************************
- */
-static int dummy_rx_push_rss_config(struct efx_nic *efx, bool user,
-                                   const u32 *rx_indir_table)
-{
-       (void) efx;
-       (void) user;
-       (void) rx_indir_table;
-       return -ENOSYS;
-}
-
-static int falcon_b0_rx_push_rss_config(struct efx_nic *efx, bool user,
-                                       const u32 *rx_indir_table)
-{
-       efx_oword_t temp;
-
-       (void) user;
-       /* Set hash key for IPv4 */
-       memcpy(&temp, efx->rx_hash_key, sizeof(temp));
-       efx_writeo(efx, &temp, FR_BZ_RX_RSS_TKEY);
-
-       memcpy(efx->rx_indir_table, rx_indir_table,
-              sizeof(efx->rx_indir_table));
-       efx_farch_rx_push_indir_table(efx);
-       return 0;
-}
-
-/**************************************************************************
- *
- * EEPROM/flash
- *
- **************************************************************************
- */
-
-#define FALCON_SPI_MAX_LEN sizeof(efx_oword_t)
-
-static int falcon_spi_poll(struct efx_nic *efx)
-{
-       efx_oword_t reg;
-       efx_reado(efx, &reg, FR_AB_EE_SPI_HCMD);
-       return EFX_OWORD_FIELD(reg, FRF_AB_EE_SPI_HCMD_CMD_EN) ? -EBUSY : 0;
-}
-
-/* Wait for SPI command completion */
-static int falcon_spi_wait(struct efx_nic *efx)
-{
-       /* Most commands will finish quickly, so we start polling at
-        * very short intervals.  Sometimes the command may have to
-        * wait for VPD or expansion ROM access outside of our
-        * control, so we allow up to 100 ms. */
-       unsigned long timeout = jiffies + 1 + DIV_ROUND_UP(HZ, 10);
-       int i;
-
-       for (i = 0; i < 10; i++) {
-               if (!falcon_spi_poll(efx))
-                       return 0;
-               udelay(10);
-       }
-
-       for (;;) {
-               if (!falcon_spi_poll(efx))
-                       return 0;
-               if (time_after_eq(jiffies, timeout)) {
-                       netif_err(efx, hw, efx->net_dev,
-                                 "timed out waiting for SPI\n");
-                       return -ETIMEDOUT;
-               }
-               schedule_timeout_uninterruptible(1);
-       }
-}
-
-static int
-falcon_spi_cmd(struct efx_nic *efx, const struct falcon_spi_device *spi,
-              unsigned int command, int address,
-              const void *in, void *out, size_t len)
-{
-       bool addressed = (address >= 0);
-       bool reading = (out != NULL);
-       efx_oword_t reg;
-       int rc;
-
-       /* Input validation */
-       if (len > FALCON_SPI_MAX_LEN)
-               return -EINVAL;
-
-       /* Check that previous command is not still running */
-       rc = falcon_spi_poll(efx);
-       if (rc)
-               return rc;
-
-       /* Program address register, if we have an address */
-       if (addressed) {
-               EFX_POPULATE_OWORD_1(reg, FRF_AB_EE_SPI_HADR_ADR, address);
-               efx_writeo(efx, &reg, FR_AB_EE_SPI_HADR);
-       }
-
-       /* Program data register, if we have data */
-       if (in != NULL) {
-               memcpy(&reg, in, len);
-               efx_writeo(efx, &reg, FR_AB_EE_SPI_HDATA);
-       }
-
-       /* Issue read/write command */
-       EFX_POPULATE_OWORD_7(reg,
-                            FRF_AB_EE_SPI_HCMD_CMD_EN, 1,
-                            FRF_AB_EE_SPI_HCMD_SF_SEL, spi->device_id,
-                            FRF_AB_EE_SPI_HCMD_DABCNT, len,
-                            FRF_AB_EE_SPI_HCMD_READ, reading,
-                            FRF_AB_EE_SPI_HCMD_DUBCNT, 0,
-                            FRF_AB_EE_SPI_HCMD_ADBCNT,
-                            (addressed ? spi->addr_len : 0),
-                            FRF_AB_EE_SPI_HCMD_ENC, command);
-       efx_writeo(efx, &reg, FR_AB_EE_SPI_HCMD);
-
-       /* Wait for read/write to complete */
-       rc = falcon_spi_wait(efx);
-       if (rc)
-               return rc;
-
-       /* Read data */
-       if (out != NULL) {
-               efx_reado(efx, &reg, FR_AB_EE_SPI_HDATA);
-               memcpy(out, &reg, len);
-       }
-
-       return 0;
-}
-
-static inline u8
-falcon_spi_munge_command(const struct falcon_spi_device *spi,
-                        const u8 command, const unsigned int address)
-{
-       return command | (((address >> 8) & spi->munge_address) << 3);
-}
-
-static int
-falcon_spi_read(struct efx_nic *efx, const struct falcon_spi_device *spi,
-               loff_t start, size_t len, size_t *retlen, u8 *buffer)
-{
-       size_t block_len, pos = 0;
-       unsigned int command;
-       int rc = 0;
-
-       while (pos < len) {
-               block_len = min(len - pos, FALCON_SPI_MAX_LEN);
-
-               command = falcon_spi_munge_command(spi, SPI_READ, start + pos);
-               rc = falcon_spi_cmd(efx, spi, command, start + pos, NULL,
-                                   buffer + pos, block_len);
-               if (rc)
-                       break;
-               pos += block_len;
-
-               /* Avoid locking up the system */
-               cond_resched();
-               if (signal_pending(current)) {
-                       rc = -EINTR;
-                       break;
-               }
-       }
-
-       if (retlen)
-               *retlen = pos;
-       return rc;
-}
-
-#ifdef CONFIG_SFC_MTD
-
-struct falcon_mtd_partition {
-       struct efx_mtd_partition common;
-       const struct falcon_spi_device *spi;
-       size_t offset;
-};
-
-#define to_falcon_mtd_partition(mtd)                           \
-       container_of(mtd, struct falcon_mtd_partition, common.mtd)
-
-static size_t
-falcon_spi_write_limit(const struct falcon_spi_device *spi, size_t start)
-{
-       return min(FALCON_SPI_MAX_LEN,
-                  (spi->block_size - (start & (spi->block_size - 1))));
-}
-
-/* Wait up to 10 ms for buffered write completion */
-static int
-falcon_spi_wait_write(struct efx_nic *efx, const struct falcon_spi_device *spi)
-{
-       unsigned long timeout = jiffies + 1 + DIV_ROUND_UP(HZ, 100);
-       u8 status;
-       int rc;
-
-       for (;;) {
-               rc = falcon_spi_cmd(efx, spi, SPI_RDSR, -1, NULL,
-                                   &status, sizeof(status));
-               if (rc)
-                       return rc;
-               if (!(status & SPI_STATUS_NRDY))
-                       return 0;
-               if (time_after_eq(jiffies, timeout)) {
-                       netif_err(efx, hw, efx->net_dev,
-                                 "SPI write timeout on device %d"
-                                 " last status=0x%02x\n",
-                                 spi->device_id, status);
-                       return -ETIMEDOUT;
-               }
-               schedule_timeout_uninterruptible(1);
-       }
-}
-
-static int
-falcon_spi_write(struct efx_nic *efx, const struct falcon_spi_device *spi,
-                loff_t start, size_t len, size_t *retlen, const u8 *buffer)
-{
-       u8 verify_buffer[FALCON_SPI_MAX_LEN];
-       size_t block_len, pos = 0;
-       unsigned int command;
-       int rc = 0;
-
-       while (pos < len) {
-               rc = falcon_spi_cmd(efx, spi, SPI_WREN, -1, NULL, NULL, 0);
-               if (rc)
-                       break;
-
-               block_len = min(len - pos,
-                               falcon_spi_write_limit(spi, start + pos));
-               command = falcon_spi_munge_command(spi, SPI_WRITE, start + pos);
-               rc = falcon_spi_cmd(efx, spi, command, start + pos,
-                                   buffer + pos, NULL, block_len);
-               if (rc)
-                       break;
-
-               rc = falcon_spi_wait_write(efx, spi);
-               if (rc)
-                       break;
-
-               command = falcon_spi_munge_command(spi, SPI_READ, start + pos);
-               rc = falcon_spi_cmd(efx, spi, command, start + pos,
-                                   NULL, verify_buffer, block_len);
-               if (memcmp(verify_buffer, buffer + pos, block_len)) {
-                       rc = -EIO;
-                       break;
-               }
-
-               pos += block_len;
-
-               /* Avoid locking up the system */
-               cond_resched();
-               if (signal_pending(current)) {
-                       rc = -EINTR;
-                       break;
-               }
-       }
-
-       if (retlen)
-               *retlen = pos;
-       return rc;
-}
-
-static int
-falcon_spi_slow_wait(struct falcon_mtd_partition *part, bool uninterruptible)
-{
-       const struct falcon_spi_device *spi = part->spi;
-       struct efx_nic *efx = part->common.mtd.priv;
-       u8 status;
-       int rc, i;
-
-       /* Wait up to 4s for flash/EEPROM to finish a slow operation. */
-       for (i = 0; i < 40; i++) {
-               __set_current_state(uninterruptible ?
-                                   TASK_UNINTERRUPTIBLE : TASK_INTERRUPTIBLE);
-               schedule_timeout(HZ / 10);
-               rc = falcon_spi_cmd(efx, spi, SPI_RDSR, -1, NULL,
-                                   &status, sizeof(status));
-               if (rc)
-                       return rc;
-               if (!(status & SPI_STATUS_NRDY))
-                       return 0;
-               if (signal_pending(current))
-                       return -EINTR;
-       }
-       pr_err("%s: timed out waiting for %s\n",
-              part->common.name, part->common.dev_type_name);
-       return -ETIMEDOUT;
-}
-
-static int
-falcon_spi_unlock(struct efx_nic *efx, const struct falcon_spi_device *spi)
-{
-       const u8 unlock_mask = (SPI_STATUS_BP2 | SPI_STATUS_BP1 |
-                               SPI_STATUS_BP0);
-       u8 status;
-       int rc;
-
-       rc = falcon_spi_cmd(efx, spi, SPI_RDSR, -1, NULL,
-                           &status, sizeof(status));
-       if (rc)
-               return rc;
-
-       if (!(status & unlock_mask))
-               return 0; /* already unlocked */
-
-       rc = falcon_spi_cmd(efx, spi, SPI_WREN, -1, NULL, NULL, 0);
-       if (rc)
-               return rc;
-       rc = falcon_spi_cmd(efx, spi, SPI_SST_EWSR, -1, NULL, NULL, 0);
-       if (rc)
-               return rc;
-
-       status &= ~unlock_mask;
-       rc = falcon_spi_cmd(efx, spi, SPI_WRSR, -1, &status,
-                           NULL, sizeof(status));
-       if (rc)
-               return rc;
-       rc = falcon_spi_wait_write(efx, spi);
-       if (rc)
-               return rc;
-
-       return 0;
-}
-
-#define FALCON_SPI_VERIFY_BUF_LEN 16
-
-static int
-falcon_spi_erase(struct falcon_mtd_partition *part, loff_t start, size_t len)
-{
-       const struct falcon_spi_device *spi = part->spi;
-       struct efx_nic *efx = part->common.mtd.priv;
-       unsigned pos, block_len;
-       u8 empty[FALCON_SPI_VERIFY_BUF_LEN];
-       u8 buffer[FALCON_SPI_VERIFY_BUF_LEN];
-       int rc;
-
-       if (len != spi->erase_size)
-               return -EINVAL;
-
-       if (spi->erase_command == 0)
-               return -EOPNOTSUPP;
-
-       rc = falcon_spi_unlock(efx, spi);
-       if (rc)
-               return rc;
-       rc = falcon_spi_cmd(efx, spi, SPI_WREN, -1, NULL, NULL, 0);
-       if (rc)
-               return rc;
-       rc = falcon_spi_cmd(efx, spi, spi->erase_command, start, NULL,
-                           NULL, 0);
-       if (rc)
-               return rc;
-       rc = falcon_spi_slow_wait(part, false);
-
-       /* Verify the entire region has been wiped */
-       memset(empty, 0xff, sizeof(empty));
-       for (pos = 0; pos < len; pos += block_len) {
-               block_len = min(len - pos, sizeof(buffer));
-               rc = falcon_spi_read(efx, spi, start + pos, block_len,
-                                    NULL, buffer);
-               if (rc)
-                       return rc;
-               if (memcmp(empty, buffer, block_len))
-                       return -EIO;
-
-               /* Avoid locking up the system */
-               cond_resched();
-               if (signal_pending(current))
-                       return -EINTR;
-       }
-
-       return rc;
-}
-
-static void falcon_mtd_rename(struct efx_mtd_partition *part)
-{
-       struct efx_nic *efx = part->mtd.priv;
-
-       snprintf(part->name, sizeof(part->name), "%s %s",
-                efx->name, part->type_name);
-}
-
-static int falcon_mtd_read(struct mtd_info *mtd, loff_t start,
-                          size_t len, size_t *retlen, u8 *buffer)
-{
-       struct falcon_mtd_partition *part = to_falcon_mtd_partition(mtd);
-       struct efx_nic *efx = mtd->priv;
-       struct falcon_nic_data *nic_data = efx->nic_data;
-       int rc;
-
-       rc = mutex_lock_interruptible(&nic_data->spi_lock);
-       if (rc)
-               return rc;
-       rc = falcon_spi_read(efx, part->spi, part->offset + start,
-                            len, retlen, buffer);
-       mutex_unlock(&nic_data->spi_lock);
-       return rc;
-}
-
-static int falcon_mtd_erase(struct mtd_info *mtd, loff_t start, size_t len)
-{
-       struct falcon_mtd_partition *part = to_falcon_mtd_partition(mtd);
-       struct efx_nic *efx = mtd->priv;
-       struct falcon_nic_data *nic_data = efx->nic_data;
-       int rc;
-
-       rc = mutex_lock_interruptible(&nic_data->spi_lock);
-       if (rc)
-               return rc;
-       rc = falcon_spi_erase(part, part->offset + start, len);
-       mutex_unlock(&nic_data->spi_lock);
-       return rc;
-}
-
-static int falcon_mtd_write(struct mtd_info *mtd, loff_t start,
-                           size_t len, size_t *retlen, const u8 *buffer)
-{
-       struct falcon_mtd_partition *part = to_falcon_mtd_partition(mtd);
-       struct efx_nic *efx = mtd->priv;
-       struct falcon_nic_data *nic_data = efx->nic_data;
-       int rc;
-
-       rc = mutex_lock_interruptible(&nic_data->spi_lock);
-       if (rc)
-               return rc;
-       rc = falcon_spi_write(efx, part->spi, part->offset + start,
-                             len, retlen, buffer);
-       mutex_unlock(&nic_data->spi_lock);
-       return rc;
-}
-
-static int falcon_mtd_sync(struct mtd_info *mtd)
-{
-       struct falcon_mtd_partition *part = to_falcon_mtd_partition(mtd);
-       struct efx_nic *efx = mtd->priv;
-       struct falcon_nic_data *nic_data = efx->nic_data;
-       int rc;
-
-       mutex_lock(&nic_data->spi_lock);
-       rc = falcon_spi_slow_wait(part, true);
-       mutex_unlock(&nic_data->spi_lock);
-       return rc;
-}
-
-static int falcon_mtd_probe(struct efx_nic *efx)
-{
-       struct falcon_nic_data *nic_data = efx->nic_data;
-       struct falcon_mtd_partition *parts;
-       struct falcon_spi_device *spi;
-       size_t n_parts;
-       int rc = -ENODEV;
-
-       ASSERT_RTNL();
-
-       /* Allocate space for maximum number of partitions */
-       parts = kcalloc(2, sizeof(*parts), GFP_KERNEL);
-       if (!parts)
-               return -ENOMEM;
-       n_parts = 0;
-
-       spi = &nic_data->spi_flash;
-       if (falcon_spi_present(spi) && spi->size > FALCON_FLASH_BOOTCODE_START) {
-               parts[n_parts].spi = spi;
-               parts[n_parts].offset = FALCON_FLASH_BOOTCODE_START;
-               parts[n_parts].common.dev_type_name = "flash";
-               parts[n_parts].common.type_name = "sfc_flash_bootrom";
-               parts[n_parts].common.mtd.type = MTD_NORFLASH;
-               parts[n_parts].common.mtd.flags = MTD_CAP_NORFLASH;
-               parts[n_parts].common.mtd.size = spi->size - FALCON_FLASH_BOOTCODE_START;
-               parts[n_parts].common.mtd.erasesize = spi->erase_size;
-               n_parts++;
-       }
-
-       spi = &nic_data->spi_eeprom;
-       if (falcon_spi_present(spi) && spi->size > FALCON_EEPROM_BOOTCONFIG_START) {
-               parts[n_parts].spi = spi;
-               parts[n_parts].offset = FALCON_EEPROM_BOOTCONFIG_START;
-               parts[n_parts].common.dev_type_name = "EEPROM";
-               parts[n_parts].common.type_name = "sfc_bootconfig";
-               parts[n_parts].common.mtd.type = MTD_RAM;
-               parts[n_parts].common.mtd.flags = MTD_CAP_RAM;
-               parts[n_parts].common.mtd.size =
-                       min(spi->size, FALCON_EEPROM_BOOTCONFIG_END) -
-                       FALCON_EEPROM_BOOTCONFIG_START;
-               parts[n_parts].common.mtd.erasesize = spi->erase_size;
-               n_parts++;
-       }
-
-       rc = efx_mtd_add(efx, &parts[0].common, n_parts, sizeof(*parts));
-       if (rc)
-               kfree(parts);
-       return rc;
-}
-
-#endif /* CONFIG_SFC_MTD */
-
-/**************************************************************************
- *
- * XMAC operations
- *
- **************************************************************************
- */
-
-/* Configure the XAUI driver that is an output from Falcon */
-static void falcon_setup_xaui(struct efx_nic *efx)
-{
-       efx_oword_t sdctl, txdrv;
-
-       /* Move the XAUI into low power, unless there is no PHY, in
-        * which case the XAUI will have to drive a cable. */
-       if (efx->phy_type == PHY_TYPE_NONE)
-               return;
-
-       efx_reado(efx, &sdctl, FR_AB_XX_SD_CTL);
-       EFX_SET_OWORD_FIELD(sdctl, FRF_AB_XX_HIDRVD, FFE_AB_XX_SD_CTL_DRV_DEF);
-       EFX_SET_OWORD_FIELD(sdctl, FRF_AB_XX_LODRVD, FFE_AB_XX_SD_CTL_DRV_DEF);
-       EFX_SET_OWORD_FIELD(sdctl, FRF_AB_XX_HIDRVC, FFE_AB_XX_SD_CTL_DRV_DEF);
-       EFX_SET_OWORD_FIELD(sdctl, FRF_AB_XX_LODRVC, FFE_AB_XX_SD_CTL_DRV_DEF);
-       EFX_SET_OWORD_FIELD(sdctl, FRF_AB_XX_HIDRVB, FFE_AB_XX_SD_CTL_DRV_DEF);
-       EFX_SET_OWORD_FIELD(sdctl, FRF_AB_XX_LODRVB, FFE_AB_XX_SD_CTL_DRV_DEF);
-       EFX_SET_OWORD_FIELD(sdctl, FRF_AB_XX_HIDRVA, FFE_AB_XX_SD_CTL_DRV_DEF);
-       EFX_SET_OWORD_FIELD(sdctl, FRF_AB_XX_LODRVA, FFE_AB_XX_SD_CTL_DRV_DEF);
-       efx_writeo(efx, &sdctl, FR_AB_XX_SD_CTL);
-
-       EFX_POPULATE_OWORD_8(txdrv,
-                            FRF_AB_XX_DEQD, FFE_AB_XX_TXDRV_DEQ_DEF,
-                            FRF_AB_XX_DEQC, FFE_AB_XX_TXDRV_DEQ_DEF,
-                            FRF_AB_XX_DEQB, FFE_AB_XX_TXDRV_DEQ_DEF,
-                            FRF_AB_XX_DEQA, FFE_AB_XX_TXDRV_DEQ_DEF,
-                            FRF_AB_XX_DTXD, FFE_AB_XX_TXDRV_DTX_DEF,
-                            FRF_AB_XX_DTXC, FFE_AB_XX_TXDRV_DTX_DEF,
-                            FRF_AB_XX_DTXB, FFE_AB_XX_TXDRV_DTX_DEF,
-                            FRF_AB_XX_DTXA, FFE_AB_XX_TXDRV_DTX_DEF);
-       efx_writeo(efx, &txdrv, FR_AB_XX_TXDRV_CTL);
-}
-
-int falcon_reset_xaui(struct efx_nic *efx)
-{
-       struct falcon_nic_data *nic_data = efx->nic_data;
-       efx_oword_t reg;
-       int count;
-
-       /* Don't fetch MAC statistics over an XMAC reset */
-       WARN_ON(nic_data->stats_disable_count == 0);
-
-       /* Start reset sequence */
-       EFX_POPULATE_OWORD_1(reg, FRF_AB_XX_RST_XX_EN, 1);
-       efx_writeo(efx, &reg, FR_AB_XX_PWR_RST);
-
-       /* Wait up to 10 ms for completion, then reinitialise */
-       for (count = 0; count < 1000; count++) {
-               efx_reado(efx, &reg, FR_AB_XX_PWR_RST);
-               if (EFX_OWORD_FIELD(reg, FRF_AB_XX_RST_XX_EN) == 0 &&
-                   EFX_OWORD_FIELD(reg, FRF_AB_XX_SD_RST_ACT) == 0) {
-                       falcon_setup_xaui(efx);
-                       return 0;
-               }
-               udelay(10);
-       }
-       netif_err(efx, hw, efx->net_dev,
-                 "timed out waiting for XAUI/XGXS reset\n");
-       return -ETIMEDOUT;
-}
-
-static void falcon_ack_status_intr(struct efx_nic *efx)
-{
-       struct falcon_nic_data *nic_data = efx->nic_data;
-       efx_oword_t reg;
-
-       if ((efx_nic_rev(efx) != EFX_REV_FALCON_B0) || LOOPBACK_INTERNAL(efx))
-               return;
-
-       /* We expect xgmii faults if the wireside link is down */
-       if (!efx->link_state.up)
-               return;
-
-       /* We can only use this interrupt to signal the negative edge of
-        * xaui_align [we have to poll the positive edge]. */
-       if (nic_data->xmac_poll_required)
-               return;
-
-       efx_reado(efx, &reg, FR_AB_XM_MGT_INT_MSK);
-}
-
-static bool falcon_xgxs_link_ok(struct efx_nic *efx)
-{
-       efx_oword_t reg;
-       bool align_done, link_ok = false;
-       int sync_status;
-
-       /* Read link status */
-       efx_reado(efx, &reg, FR_AB_XX_CORE_STAT);
-
-       align_done = EFX_OWORD_FIELD(reg, FRF_AB_XX_ALIGN_DONE);
-       sync_status = EFX_OWORD_FIELD(reg, FRF_AB_XX_SYNC_STAT);
-       if (align_done && (sync_status == FFE_AB_XX_STAT_ALL_LANES))
-               link_ok = true;
-
-       /* Clear link status ready for next read */
-       EFX_SET_OWORD_FIELD(reg, FRF_AB_XX_COMMA_DET, FFE_AB_XX_STAT_ALL_LANES);
-       EFX_SET_OWORD_FIELD(reg, FRF_AB_XX_CHAR_ERR, FFE_AB_XX_STAT_ALL_LANES);
-       EFX_SET_OWORD_FIELD(reg, FRF_AB_XX_DISPERR, FFE_AB_XX_STAT_ALL_LANES);
-       efx_writeo(efx, &reg, FR_AB_XX_CORE_STAT);
-
-       return link_ok;
-}
-
-static bool falcon_xmac_link_ok(struct efx_nic *efx)
-{
-       /*
-        * Check MAC's XGXS link status except when using XGMII loopback
-        * which bypasses the XGXS block.
-        * If possible, check PHY's XGXS link status except when using
-        * MAC loopback.
-        */
-       return (efx->loopback_mode == LOOPBACK_XGMII ||
-               falcon_xgxs_link_ok(efx)) &&
-               (!(efx->mdio.mmds & (1 << MDIO_MMD_PHYXS)) ||
-                LOOPBACK_INTERNAL(efx) ||
-                efx_mdio_phyxgxs_lane_sync(efx));
-}
-
-static void falcon_reconfigure_xmac_core(struct efx_nic *efx)
-{
-       unsigned int max_frame_len;
-       efx_oword_t reg;
-       bool rx_fc = !!(efx->link_state.fc & EFX_FC_RX);
-       bool tx_fc = !!(efx->link_state.fc & EFX_FC_TX);
-
-       /* Configure MAC  - cut-thru mode is hard wired on */
-       EFX_POPULATE_OWORD_3(reg,
-                            FRF_AB_XM_RX_JUMBO_MODE, 1,
-                            FRF_AB_XM_TX_STAT_EN, 1,
-                            FRF_AB_XM_RX_STAT_EN, 1);
-       efx_writeo(efx, &reg, FR_AB_XM_GLB_CFG);
-
-       /* Configure TX */
-       EFX_POPULATE_OWORD_6(reg,
-                            FRF_AB_XM_TXEN, 1,
-                            FRF_AB_XM_TX_PRMBL, 1,
-                            FRF_AB_XM_AUTO_PAD, 1,
-                            FRF_AB_XM_TXCRC, 1,
-                            FRF_AB_XM_FCNTL, tx_fc,
-                            FRF_AB_XM_IPG, 0x3);
-       efx_writeo(efx, &reg, FR_AB_XM_TX_CFG);
-
-       /* Configure RX */
-       EFX_POPULATE_OWORD_5(reg,
-                            FRF_AB_XM_RXEN, 1,
-                            FRF_AB_XM_AUTO_DEPAD, 0,
-                            FRF_AB_XM_ACPT_ALL_MCAST, 1,
-                            FRF_AB_XM_ACPT_ALL_UCAST, !efx->unicast_filter,
-                            FRF_AB_XM_PASS_CRC_ERR, 1);
-       efx_writeo(efx, &reg, FR_AB_XM_RX_CFG);
-
-       /* Set frame length */
-       max_frame_len = EFX_MAX_FRAME_LEN(efx->net_dev->mtu);
-       EFX_POPULATE_OWORD_1(reg, FRF_AB_XM_MAX_RX_FRM_SIZE, max_frame_len);
-       efx_writeo(efx, &reg, FR_AB_XM_RX_PARAM);
-       EFX_POPULATE_OWORD_2(reg,
-                            FRF_AB_XM_MAX_TX_FRM_SIZE, max_frame_len,
-                            FRF_AB_XM_TX_JUMBO_MODE, 1);
-       efx_writeo(efx, &reg, FR_AB_XM_TX_PARAM);
-
-       EFX_POPULATE_OWORD_2(reg,
-                            FRF_AB_XM_PAUSE_TIME, 0xfffe, /* MAX PAUSE TIME */
-                            FRF_AB_XM_DIS_FCNTL, !rx_fc);
-       efx_writeo(efx, &reg, FR_AB_XM_FC);
-
-       /* Set MAC address */
-       memcpy(&reg, &efx->net_dev->dev_addr[0], 4);
-       efx_writeo(efx, &reg, FR_AB_XM_ADR_LO);
-       memcpy(&reg, &efx->net_dev->dev_addr[4], 2);
-       efx_writeo(efx, &reg, FR_AB_XM_ADR_HI);
-}
-
-static void falcon_reconfigure_xgxs_core(struct efx_nic *efx)
-{
-       efx_oword_t reg;
-       bool xgxs_loopback = (efx->loopback_mode == LOOPBACK_XGXS);
-       bool xaui_loopback = (efx->loopback_mode == LOOPBACK_XAUI);
-       bool xgmii_loopback = (efx->loopback_mode == LOOPBACK_XGMII);
-       bool old_xgmii_loopback, old_xgxs_loopback, old_xaui_loopback;
-
-       /* XGXS block is flaky and will need to be reset if moving
-        * into our out of XGMII, XGXS or XAUI loopbacks. */
-       efx_reado(efx, &reg, FR_AB_XX_CORE_STAT);
-       old_xgxs_loopback = EFX_OWORD_FIELD(reg, FRF_AB_XX_XGXS_LB_EN);
-       old_xgmii_loopback = EFX_OWORD_FIELD(reg, FRF_AB_XX_XGMII_LB_EN);
-
-       efx_reado(efx, &reg, FR_AB_XX_SD_CTL);
-       old_xaui_loopback = EFX_OWORD_FIELD(reg, FRF_AB_XX_LPBKA);
-
-       /* The PHY driver may have turned XAUI off */
-       if ((xgxs_loopback != old_xgxs_loopback) ||
-           (xaui_loopback != old_xaui_loopback) ||
-           (xgmii_loopback != old_xgmii_loopback))
-               falcon_reset_xaui(efx);
-
-       efx_reado(efx, &reg, FR_AB_XX_CORE_STAT);
-       EFX_SET_OWORD_FIELD(reg, FRF_AB_XX_FORCE_SIG,
-                           (xgxs_loopback || xaui_loopback) ?
-                           FFE_AB_XX_FORCE_SIG_ALL_LANES : 0);
-       EFX_SET_OWORD_FIELD(reg, FRF_AB_XX_XGXS_LB_EN, xgxs_loopback);
-       EFX_SET_OWORD_FIELD(reg, FRF_AB_XX_XGMII_LB_EN, xgmii_loopback);
-       efx_writeo(efx, &reg, FR_AB_XX_CORE_STAT);
-
-       efx_reado(efx, &reg, FR_AB_XX_SD_CTL);
-       EFX_SET_OWORD_FIELD(reg, FRF_AB_XX_LPBKD, xaui_loopback);
-       EFX_SET_OWORD_FIELD(reg, FRF_AB_XX_LPBKC, xaui_loopback);
-       EFX_SET_OWORD_FIELD(reg, FRF_AB_XX_LPBKB, xaui_loopback);
-       EFX_SET_OWORD_FIELD(reg, FRF_AB_XX_LPBKA, xaui_loopback);
-       efx_writeo(efx, &reg, FR_AB_XX_SD_CTL);
-}
-
-
-/* Try to bring up the Falcon side of the Falcon-Phy XAUI link */
-static bool falcon_xmac_link_ok_retry(struct efx_nic *efx, int tries)
-{
-       bool mac_up = falcon_xmac_link_ok(efx);
-
-       if (LOOPBACK_MASK(efx) & LOOPBACKS_EXTERNAL(efx) & LOOPBACKS_WS ||
-           efx_phy_mode_disabled(efx->phy_mode))
-               /* XAUI link is expected to be down */
-               return mac_up;
-
-       falcon_stop_nic_stats(efx);
-
-       while (!mac_up && tries) {
-               netif_dbg(efx, hw, efx->net_dev, "bashing xaui\n");
-               falcon_reset_xaui(efx);
-               udelay(200);
-
-               mac_up = falcon_xmac_link_ok(efx);
-               --tries;
-       }
-
-       falcon_start_nic_stats(efx);
-
-       return mac_up;
-}
-
-static bool falcon_xmac_check_fault(struct efx_nic *efx)
-{
-       return !falcon_xmac_link_ok_retry(efx, 5);
-}
-
-static int falcon_reconfigure_xmac(struct efx_nic *efx)
-{
-       struct falcon_nic_data *nic_data = efx->nic_data;
-
-       efx_farch_filter_sync_rx_mode(efx);
-
-       falcon_reconfigure_xgxs_core(efx);
-       falcon_reconfigure_xmac_core(efx);
-
-       falcon_reconfigure_mac_wrapper(efx);
-
-       nic_data->xmac_poll_required = !falcon_xmac_link_ok_retry(efx, 5);
-       falcon_ack_status_intr(efx);
-
-       return 0;
-}
-
-static void falcon_poll_xmac(struct efx_nic *efx)
-{
-       struct falcon_nic_data *nic_data = efx->nic_data;
-
-       /* We expect xgmii faults if the wireside link is down */
-       if (!efx->link_state.up || !nic_data->xmac_poll_required)
-               return;
-
-       nic_data->xmac_poll_required = !falcon_xmac_link_ok_retry(efx, 1);
-       falcon_ack_status_intr(efx);
-}
-
-/**************************************************************************
- *
- * MAC wrapper
- *
- **************************************************************************
- */
-
-static void falcon_push_multicast_hash(struct efx_nic *efx)
-{
-       union efx_multicast_hash *mc_hash = &efx->multicast_hash;
-
-       WARN_ON(!mutex_is_locked(&efx->mac_lock));
-
-       efx_writeo(efx, &mc_hash->oword[0], FR_AB_MAC_MC_HASH_REG0);
-       efx_writeo(efx, &mc_hash->oword[1], FR_AB_MAC_MC_HASH_REG1);
-}
-
-static void falcon_reset_macs(struct efx_nic *efx)
-{
-       struct falcon_nic_data *nic_data = efx->nic_data;
-       efx_oword_t reg, mac_ctrl;
-       int count;
-
-       if (efx_nic_rev(efx) < EFX_REV_FALCON_B0) {
-               /* It's not safe to use GLB_CTL_REG to reset the
-                * macs, so instead use the internal MAC resets
-                */
-               EFX_POPULATE_OWORD_1(reg, FRF_AB_XM_CORE_RST, 1);
-               efx_writeo(efx, &reg, FR_AB_XM_GLB_CFG);
-
-               for (count = 0; count < 10000; count++) {
-                       efx_reado(efx, &reg, FR_AB_XM_GLB_CFG);
-                       if (EFX_OWORD_FIELD(reg, FRF_AB_XM_CORE_RST) ==
-                           0)
-                               return;
-                       udelay(10);
-               }
-
-               netif_err(efx, hw, efx->net_dev,
-                         "timed out waiting for XMAC core reset\n");
-       }
-
-       /* Mac stats will fail whist the TX fifo is draining */
-       WARN_ON(nic_data->stats_disable_count == 0);
-
-       efx_reado(efx, &mac_ctrl, FR_AB_MAC_CTRL);
-       EFX_SET_OWORD_FIELD(mac_ctrl, FRF_BB_TXFIFO_DRAIN_EN, 1);
-       efx_writeo(efx, &mac_ctrl, FR_AB_MAC_CTRL);
-
-       efx_reado(efx, &reg, FR_AB_GLB_CTL);
-       EFX_SET_OWORD_FIELD(reg, FRF_AB_RST_XGTX, 1);
-       EFX_SET_OWORD_FIELD(reg, FRF_AB_RST_XGRX, 1);
-       EFX_SET_OWORD_FIELD(reg, FRF_AB_RST_EM, 1);
-       efx_writeo(efx, &reg, FR_AB_GLB_CTL);
-
-       count = 0;
-       while (1) {
-               efx_reado(efx, &reg, FR_AB_GLB_CTL);
-               if (!EFX_OWORD_FIELD(reg, FRF_AB_RST_XGTX) &&
-                   !EFX_OWORD_FIELD(reg, FRF_AB_RST_XGRX) &&
-                   !EFX_OWORD_FIELD(reg, FRF_AB_RST_EM)) {
-                       netif_dbg(efx, hw, efx->net_dev,
-                                 "Completed MAC reset after %d loops\n",
-                                 count);
-                       break;
-               }
-               if (count > 20) {
-                       netif_err(efx, hw, efx->net_dev, "MAC reset failed\n");
-                       break;
-               }
-               count++;
-               udelay(10);
-       }
-
-       /* Ensure the correct MAC is selected before statistics
-        * are re-enabled by the caller */
-       efx_writeo(efx, &mac_ctrl, FR_AB_MAC_CTRL);
-
-       falcon_setup_xaui(efx);
-}
-
-static void falcon_drain_tx_fifo(struct efx_nic *efx)
-{
-       efx_oword_t reg;
-
-       if ((efx_nic_rev(efx) < EFX_REV_FALCON_B0) ||
-           (efx->loopback_mode != LOOPBACK_NONE))
-               return;
-
-       efx_reado(efx, &reg, FR_AB_MAC_CTRL);
-       /* There is no point in draining more than once */
-       if (EFX_OWORD_FIELD(reg, FRF_BB_TXFIFO_DRAIN_EN))
-               return;
-
-       falcon_reset_macs(efx);
-}
-
-static void falcon_deconfigure_mac_wrapper(struct efx_nic *efx)
-{
-       efx_oword_t reg;
-
-       if (efx_nic_rev(efx) < EFX_REV_FALCON_B0)
-               return;
-
-       /* Isolate the MAC -> RX */
-       efx_reado(efx, &reg, FR_AZ_RX_CFG);
-       EFX_SET_OWORD_FIELD(reg, FRF_BZ_RX_INGR_EN, 0);
-       efx_writeo(efx, &reg, FR_AZ_RX_CFG);
-
-       /* Isolate TX -> MAC */
-       falcon_drain_tx_fifo(efx);
-}
-
-static void falcon_reconfigure_mac_wrapper(struct efx_nic *efx)
-{
-       struct efx_link_state *link_state = &efx->link_state;
-       efx_oword_t reg;
-       int link_speed, isolate;
-
-       isolate = !!ACCESS_ONCE(efx->reset_pending);
-
-       switch (link_state->speed) {
-       case 10000: link_speed = 3; break;
-       case 1000:  link_speed = 2; break;
-       case 100:   link_speed = 1; break;
-       default:    link_speed = 0; break;
-       }
-
-       /* MAC_LINK_STATUS controls MAC backpressure but doesn't work
-        * as advertised.  Disable to ensure packets are not
-        * indefinitely held and TX queue can be flushed at any point
-        * while the link is down. */
-       EFX_POPULATE_OWORD_5(reg,
-                            FRF_AB_MAC_XOFF_VAL, 0xffff /* max pause time */,
-                            FRF_AB_MAC_BCAD_ACPT, 1,
-                            FRF_AB_MAC_UC_PROM, !efx->unicast_filter,
-                            FRF_AB_MAC_LINK_STATUS, 1, /* always set */
-                            FRF_AB_MAC_SPEED, link_speed);
-       /* On B0, MAC backpressure can be disabled and packets get
-        * discarded. */
-       if (efx_nic_rev(efx) >= EFX_REV_FALCON_B0) {
-               EFX_SET_OWORD_FIELD(reg, FRF_BB_TXFIFO_DRAIN_EN,
-                                   !link_state->up || isolate);
-       }
-
-       efx_writeo(efx, &reg, FR_AB_MAC_CTRL);
-
-       /* Restore the multicast hash registers. */
-       falcon_push_multicast_hash(efx);
-
-       efx_reado(efx, &reg, FR_AZ_RX_CFG);
-       /* Enable XOFF signal from RX FIFO (we enabled it during NIC
-        * initialisation but it may read back as 0) */
-       EFX_SET_OWORD_FIELD(reg, FRF_AZ_RX_XOFF_MAC_EN, 1);
-       /* Unisolate the MAC -> RX */
-       if (efx_nic_rev(efx) >= EFX_REV_FALCON_B0)
-               EFX_SET_OWORD_FIELD(reg, FRF_BZ_RX_INGR_EN, !isolate);
-       efx_writeo(efx, &reg, FR_AZ_RX_CFG);
-}
-
-static void falcon_stats_request(struct efx_nic *efx)
-{
-       struct falcon_nic_data *nic_data = efx->nic_data;
-       efx_oword_t reg;
-
-       WARN_ON(nic_data->stats_pending);
-       WARN_ON(nic_data->stats_disable_count);
-
-       FALCON_XMAC_STATS_DMA_FLAG(efx) = 0;
-       nic_data->stats_pending = true;
-       wmb(); /* ensure done flag is clear */
-
-       /* Initiate DMA transfer of stats */
-       EFX_POPULATE_OWORD_2(reg,
-                            FRF_AB_MAC_STAT_DMA_CMD, 1,
-                            FRF_AB_MAC_STAT_DMA_ADR,
-                            efx->stats_buffer.dma_addr);
-       efx_writeo(efx, &reg, FR_AB_MAC_STAT_DMA);
-
-       mod_timer(&nic_data->stats_timer, round_jiffies_up(jiffies + HZ / 2));
-}
-
-static void falcon_stats_complete(struct efx_nic *efx)
-{
-       struct falcon_nic_data *nic_data = efx->nic_data;
-
-       if (!nic_data->stats_pending)
-               return;
-
-       nic_data->stats_pending = false;
-       if (FALCON_XMAC_STATS_DMA_FLAG(efx)) {
-               rmb(); /* read the done flag before the stats */
-               efx_nic_update_stats(falcon_stat_desc, FALCON_STAT_COUNT,
-                                    falcon_stat_mask, nic_data->stats,
-                                    efx->stats_buffer.addr, true);
-       } else {
-               netif_err(efx, hw, efx->net_dev,
-                         "timed out waiting for statistics\n");
-       }
-}
-
-static void falcon_stats_timer_func(unsigned long context)
-{
-       struct efx_nic *efx = (struct efx_nic *)context;
-       struct falcon_nic_data *nic_data = efx->nic_data;
-
-       spin_lock(&efx->stats_lock);
-
-       falcon_stats_complete(efx);
-       if (nic_data->stats_disable_count == 0)
-               falcon_stats_request(efx);
-
-       spin_unlock(&efx->stats_lock);
-}
-
-static bool falcon_loopback_link_poll(struct efx_nic *efx)
-{
-       struct efx_link_state old_state = efx->link_state;
-
-       WARN_ON(!mutex_is_locked(&efx->mac_lock));
-       WARN_ON(!LOOPBACK_INTERNAL(efx));
-
-       efx->link_state.fd = true;
-       efx->link_state.fc = efx->wanted_fc;
-       efx->link_state.up = true;
-       efx->link_state.speed = 10000;
-
-       return !efx_link_state_equal(&efx->link_state, &old_state);
-}
-
-static int falcon_reconfigure_port(struct efx_nic *efx)
-{
-       int rc;
-
-       WARN_ON(efx_nic_rev(efx) > EFX_REV_FALCON_B0);
-
-       /* Poll the PHY link state *before* reconfiguring it. This means we
-        * will pick up the correct speed (in loopback) to select the correct
-        * MAC.
-        */
-       if (LOOPBACK_INTERNAL(efx))
-               falcon_loopback_link_poll(efx);
-       else
-               efx->phy_op->poll(efx);
-
-       falcon_stop_nic_stats(efx);
-       falcon_deconfigure_mac_wrapper(efx);
-
-       falcon_reset_macs(efx);
-
-       efx->phy_op->reconfigure(efx);
-       rc = falcon_reconfigure_xmac(efx);
-       BUG_ON(rc);
-
-       falcon_start_nic_stats(efx);
-
-       /* Synchronise efx->link_state with the kernel */
-       efx_link_status_changed(efx);
-
-       return 0;
-}
-
-/* TX flow control may automatically turn itself off if the link
- * partner (intermittently) stops responding to pause frames. There
- * isn't any indication that this has happened, so the best we do is
- * leave it up to the user to spot this and fix it by cycling transmit
- * flow control on this end.
- */
-
-static void falcon_a1_prepare_enable_fc_tx(struct efx_nic *efx)
-{
-       /* Schedule a reset to recover */
-       efx_schedule_reset(efx, RESET_TYPE_INVISIBLE);
-}
-
-static void falcon_b0_prepare_enable_fc_tx(struct efx_nic *efx)
-{
-       /* Recover by resetting the EM block */
-       falcon_stop_nic_stats(efx);
-       falcon_drain_tx_fifo(efx);
-       falcon_reconfigure_xmac(efx);
-       falcon_start_nic_stats(efx);
-}
-
-/**************************************************************************
- *
- * PHY access via GMII
- *
- **************************************************************************
- */
-
-/* Wait for GMII access to complete */
-static int falcon_gmii_wait(struct efx_nic *efx)
-{
-       efx_oword_t md_stat;
-       int count;
-
-       /* wait up to 50ms - taken max from datasheet */
-       for (count = 0; count < 5000; count++) {
-               efx_reado(efx, &md_stat, FR_AB_MD_STAT);
-               if (EFX_OWORD_FIELD(md_stat, FRF_AB_MD_BSY) == 0) {
-                       if (EFX_OWORD_FIELD(md_stat, FRF_AB_MD_LNFL) != 0 ||
-                           EFX_OWORD_FIELD(md_stat, FRF_AB_MD_BSERR) != 0) {
-                               netif_err(efx, hw, efx->net_dev,
-                                         "error from GMII access "
-                                         EFX_OWORD_FMT"\n",
-                                         EFX_OWORD_VAL(md_stat));
-                               return -EIO;
-                       }
-                       return 0;
-               }
-               udelay(10);
-       }
-       netif_err(efx, hw, efx->net_dev, "timed out waiting for GMII\n");
-       return -ETIMEDOUT;
-}
-
-/* Write an MDIO register of a PHY connected to Falcon. */
-static int falcon_mdio_write(struct net_device *net_dev,
-                            int prtad, int devad, u16 addr, u16 value)
-{
-       struct efx_nic *efx = netdev_priv(net_dev);
-       struct falcon_nic_data *nic_data = efx->nic_data;
-       efx_oword_t reg;
-       int rc;
-
-       netif_vdbg(efx, hw, efx->net_dev,
-                  "writing MDIO %d register %d.%d with 0x%04x\n",
-                   prtad, devad, addr, value);
-
-       mutex_lock(&nic_data->mdio_lock);
-
-       /* Check MDIO not currently being accessed */
-       rc = falcon_gmii_wait(efx);
-       if (rc)
-               goto out;
-
-       /* Write the address/ID register */
-       EFX_POPULATE_OWORD_1(reg, FRF_AB_MD_PHY_ADR, addr);
-       efx_writeo(efx, &reg, FR_AB_MD_PHY_ADR);
-
-       EFX_POPULATE_OWORD_2(reg, FRF_AB_MD_PRT_ADR, prtad,
-                            FRF_AB_MD_DEV_ADR, devad);
-       efx_writeo(efx, &reg, FR_AB_MD_ID);
-
-       /* Write data */
-       EFX_POPULATE_OWORD_1(reg, FRF_AB_MD_TXD, value);
-       efx_writeo(efx, &reg, FR_AB_MD_TXD);
-
-       EFX_POPULATE_OWORD_2(reg,
-                            FRF_AB_MD_WRC, 1,
-                            FRF_AB_MD_GC, 0);
-       efx_writeo(efx, &reg, FR_AB_MD_CS);
-
-       /* Wait for data to be written */
-       rc = falcon_gmii_wait(efx);
-       if (rc) {
-               /* Abort the write operation */
-               EFX_POPULATE_OWORD_2(reg,
-                                    FRF_AB_MD_WRC, 0,
-                                    FRF_AB_MD_GC, 1);
-               efx_writeo(efx, &reg, FR_AB_MD_CS);
-               udelay(10);
-       }
-
-out:
-       mutex_unlock(&nic_data->mdio_lock);
-       return rc;
-}
-
-/* Read an MDIO register of a PHY connected to Falcon. */
-static int falcon_mdio_read(struct net_device *net_dev,
-                           int prtad, int devad, u16 addr)
-{
-       struct efx_nic *efx = netdev_priv(net_dev);
-       struct falcon_nic_data *nic_data = efx->nic_data;
-       efx_oword_t reg;
-       int rc;
-
-       mutex_lock(&nic_data->mdio_lock);
-
-       /* Check MDIO not currently being accessed */
-       rc = falcon_gmii_wait(efx);
-       if (rc)
-               goto out;
-
-       EFX_POPULATE_OWORD_1(reg, FRF_AB_MD_PHY_ADR, addr);
-       efx_writeo(efx, &reg, FR_AB_MD_PHY_ADR);
-
-       EFX_POPULATE_OWORD_2(reg, FRF_AB_MD_PRT_ADR, prtad,
-                            FRF_AB_MD_DEV_ADR, devad);
-       efx_writeo(efx, &reg, FR_AB_MD_ID);
-
-       /* Request data to be read */
-       EFX_POPULATE_OWORD_2(reg, FRF_AB_MD_RDC, 1, FRF_AB_MD_GC, 0);
-       efx_writeo(efx, &reg, FR_AB_MD_CS);
-
-       /* Wait for data to become available */
-       rc = falcon_gmii_wait(efx);
-       if (rc == 0) {
-               efx_reado(efx, &reg, FR_AB_MD_RXD);
-               rc = EFX_OWORD_FIELD(reg, FRF_AB_MD_RXD);
-               netif_vdbg(efx, hw, efx->net_dev,
-                          "read from MDIO %d register %d.%d, got %04x\n",
-                          prtad, devad, addr, rc);
-       } else {
-               /* Abort the read operation */
-               EFX_POPULATE_OWORD_2(reg,
-                                    FRF_AB_MD_RIC, 0,
-                                    FRF_AB_MD_GC, 1);
-               efx_writeo(efx, &reg, FR_AB_MD_CS);
-
-               netif_dbg(efx, hw, efx->net_dev,
-                         "read from MDIO %d register %d.%d, got error %d\n",
-                         prtad, devad, addr, rc);
-       }
-
-out:
-       mutex_unlock(&nic_data->mdio_lock);
-       return rc;
-}
-
-/* This call is responsible for hooking in the MAC and PHY operations */
-static int falcon_probe_port(struct efx_nic *efx)
-{
-       struct falcon_nic_data *nic_data = efx->nic_data;
-       int rc;
-
-       switch (efx->phy_type) {
-       case PHY_TYPE_SFX7101:
-               efx->phy_op = &falcon_sfx7101_phy_ops;
-               break;
-       case PHY_TYPE_QT2022C2:
-       case PHY_TYPE_QT2025C:
-               efx->phy_op = &falcon_qt202x_phy_ops;
-               break;
-       case PHY_TYPE_TXC43128:
-               efx->phy_op = &falcon_txc_phy_ops;
-               break;
-       default:
-               netif_err(efx, probe, efx->net_dev, "Unknown PHY type %d\n",
-                         efx->phy_type);
-               return -ENODEV;
-       }
-
-       /* Fill out MDIO structure and loopback modes */
-       mutex_init(&nic_data->mdio_lock);
-       efx->mdio.mdio_read = falcon_mdio_read;
-       efx->mdio.mdio_write = falcon_mdio_write;
-       rc = efx->phy_op->probe(efx);
-       if (rc != 0)
-               return rc;
-
-       /* Initial assumption */
-       efx->link_state.speed = 10000;
-       efx->link_state.fd = true;
-
-       /* Hardware flow ctrl. FalconA RX FIFO too small for pause generation */
-       if (efx_nic_rev(efx) >= EFX_REV_FALCON_B0)
-               efx->wanted_fc = EFX_FC_RX | EFX_FC_TX;
-       else
-               efx->wanted_fc = EFX_FC_RX;
-       if (efx->mdio.mmds & MDIO_DEVS_AN)
-               efx->wanted_fc |= EFX_FC_AUTO;
-
-       /* Allocate buffer for stats */
-       rc = efx_nic_alloc_buffer(efx, &efx->stats_buffer,
-                                 FALCON_MAC_STATS_SIZE, GFP_KERNEL);
-       if (rc)
-               return rc;
-       netif_dbg(efx, probe, efx->net_dev,
-                 "stats buffer at %llx (virt %p phys %llx)\n",
-                 (u64)efx->stats_buffer.dma_addr,
-                 efx->stats_buffer.addr,
-                 (u64)virt_to_phys(efx->stats_buffer.addr));
-
-       return 0;
-}
-
-static void falcon_remove_port(struct efx_nic *efx)
-{
-       efx->phy_op->remove(efx);
-       efx_nic_free_buffer(efx, &efx->stats_buffer);
-}
-
-/* Global events are basically PHY events */
-static bool
-falcon_handle_global_event(struct efx_channel *channel, efx_qword_t *event)
-{
-       struct efx_nic *efx = channel->efx;
-       struct falcon_nic_data *nic_data = efx->nic_data;
-
-       if (EFX_QWORD_FIELD(*event, FSF_AB_GLB_EV_G_PHY0_INTR) ||
-           EFX_QWORD_FIELD(*event, FSF_AB_GLB_EV_XG_PHY0_INTR) ||
-           EFX_QWORD_FIELD(*event, FSF_AB_GLB_EV_XFP_PHY0_INTR))
-               /* Ignored */
-               return true;
-
-       if ((efx_nic_rev(efx) == EFX_REV_FALCON_B0) &&
-           EFX_QWORD_FIELD(*event, FSF_BB_GLB_EV_XG_MGT_INTR)) {
-               nic_data->xmac_poll_required = true;
-               return true;
-       }
-
-       if (efx_nic_rev(efx) <= EFX_REV_FALCON_A1 ?
-           EFX_QWORD_FIELD(*event, FSF_AA_GLB_EV_RX_RECOVERY) :
-           EFX_QWORD_FIELD(*event, FSF_BB_GLB_EV_RX_RECOVERY)) {
-               netif_err(efx, rx_err, efx->net_dev,
-                         "channel %d seen global RX_RESET event. Resetting.\n",
-                         channel->channel);
-
-               atomic_inc(&efx->rx_reset);
-               efx_schedule_reset(efx, EFX_WORKAROUND_6555(efx) ?
-                                  RESET_TYPE_RX_RECOVERY : RESET_TYPE_DISABLE);
-               return true;
-       }
-
-       return false;
-}
-
-/**************************************************************************
- *
- * Falcon test code
- *
- **************************************************************************/
-
-static int
-falcon_read_nvram(struct efx_nic *efx, struct falcon_nvconfig *nvconfig_out)
-{
-       struct falcon_nic_data *nic_data = efx->nic_data;
-       struct falcon_nvconfig *nvconfig;
-       struct falcon_spi_device *spi;
-       void *region;
-       int rc, magic_num, struct_ver;
-       __le16 *word, *limit;
-       u32 csum;
-
-       if (falcon_spi_present(&nic_data->spi_flash))
-               spi = &nic_data->spi_flash;
-       else if (falcon_spi_present(&nic_data->spi_eeprom))
-               spi = &nic_data->spi_eeprom;
-       else
-               return -EINVAL;
-
-       region = kmalloc(FALCON_NVCONFIG_END, GFP_KERNEL);
-       if (!region)
-               return -ENOMEM;
-       nvconfig = region + FALCON_NVCONFIG_OFFSET;
-
-       mutex_lock(&nic_data->spi_lock);
-       rc = falcon_spi_read(efx, spi, 0, FALCON_NVCONFIG_END, NULL, region);
-       mutex_unlock(&nic_data->spi_lock);
-       if (rc) {
-               netif_err(efx, hw, efx->net_dev, "Failed to read %s\n",
-                         falcon_spi_present(&nic_data->spi_flash) ?
-                         "flash" : "EEPROM");
-               rc = -EIO;
-               goto out;
-       }
-
-       magic_num = le16_to_cpu(nvconfig->board_magic_num);
-       struct_ver = le16_to_cpu(nvconfig->board_struct_ver);
-
-       rc = -EINVAL;
-       if (magic_num != FALCON_NVCONFIG_BOARD_MAGIC_NUM) {
-               netif_err(efx, hw, efx->net_dev,
-                         "NVRAM bad magic 0x%x\n", magic_num);
-               goto out;
-       }
-       if (struct_ver < 2) {
-               netif_err(efx, hw, efx->net_dev,
-                         "NVRAM has ancient version 0x%x\n", struct_ver);
-               goto out;
-       } else if (struct_ver < 4) {
-               word = &nvconfig->board_magic_num;
-               limit = (__le16 *) (nvconfig + 1);
-       } else {
-               word = region;
-               limit = region + FALCON_NVCONFIG_END;
-       }
-       for (csum = 0; word < limit; ++word)
-               csum += le16_to_cpu(*word);
-
-       if (~csum & 0xffff) {
-               netif_err(efx, hw, efx->net_dev,
-                         "NVRAM has incorrect checksum\n");
-               goto out;
-       }
-
-       rc = 0;
-       if (nvconfig_out)
-               memcpy(nvconfig_out, nvconfig, sizeof(*nvconfig));
-
- out:
-       kfree(region);
-       return rc;
-}
-
-static int falcon_test_nvram(struct efx_nic *efx)
-{
-       return falcon_read_nvram(efx, NULL);
-}
-
-static const struct efx_farch_register_test falcon_b0_register_tests[] = {
-       { FR_AZ_ADR_REGION,
-         EFX_OWORD32(0x0003FFFF, 0x0003FFFF, 0x0003FFFF, 0x0003FFFF) },
-       { FR_AZ_RX_CFG,
-         EFX_OWORD32(0xFFFFFFFE, 0x00017FFF, 0x00000000, 0x00000000) },
-       { FR_AZ_TX_CFG,
-         EFX_OWORD32(0x7FFF0037, 0x00000000, 0x00000000, 0x00000000) },
-       { FR_AZ_TX_RESERVED,
-         EFX_OWORD32(0xFFFEFE80, 0x1FFFFFFF, 0x020000FE, 0x007FFFFF) },
-       { FR_AB_MAC_CTRL,
-         EFX_OWORD32(0xFFFF0000, 0x00000000, 0x00000000, 0x00000000) },
-       { FR_AZ_SRM_TX_DC_CFG,
-         EFX_OWORD32(0x001FFFFF, 0x00000000, 0x00000000, 0x00000000) },
-       { FR_AZ_RX_DC_CFG,
-         EFX_OWORD32(0x0000000F, 0x00000000, 0x00000000, 0x00000000) },
-       { FR_AZ_RX_DC_PF_WM,
-         EFX_OWORD32(0x000003FF, 0x00000000, 0x00000000, 0x00000000) },
-       { FR_BZ_DP_CTRL,
-         EFX_OWORD32(0x00000FFF, 0x00000000, 0x00000000, 0x00000000) },
-       { FR_AB_GM_CFG2,
-         EFX_OWORD32(0x00007337, 0x00000000, 0x00000000, 0x00000000) },
-       { FR_AB_GMF_CFG0,
-         EFX_OWORD32(0x00001F1F, 0x00000000, 0x00000000, 0x00000000) },
-       { FR_AB_XM_GLB_CFG,
-         EFX_OWORD32(0x00000C68, 0x00000000, 0x00000000, 0x00000000) },
-       { FR_AB_XM_TX_CFG,
-         EFX_OWORD32(0x00080164, 0x00000000, 0x00000000, 0x00000000) },
-       { FR_AB_XM_RX_CFG,
-         EFX_OWORD32(0x07100A0C, 0x00000000, 0x00000000, 0x00000000) },
-       { FR_AB_XM_RX_PARAM,
-         EFX_OWORD32(0x00001FF8, 0x00000000, 0x00000000, 0x00000000) },
-       { FR_AB_XM_FC,
-         EFX_OWORD32(0xFFFF0001, 0x00000000, 0x00000000, 0x00000000) },
-       { FR_AB_XM_ADR_LO,
-         EFX_OWORD32(0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000) },
-       { FR_AB_XX_SD_CTL,
-         EFX_OWORD32(0x0003FF0F, 0x00000000, 0x00000000, 0x00000000) },
-};
-
-static int
-falcon_b0_test_chip(struct efx_nic *efx, struct efx_self_tests *tests)
-{
-       enum reset_type reset_method = RESET_TYPE_INVISIBLE;
-       int rc, rc2;
-
-       mutex_lock(&efx->mac_lock);
-       if (efx->loopback_modes) {
-               /* We need the 312 clock from the PHY to test the XMAC
-                * registers, so move into XGMII loopback if available */
-               if (efx->loopback_modes & (1 << LOOPBACK_XGMII))
-                       efx->loopback_mode = LOOPBACK_XGMII;
-               else
-                       efx->loopback_mode = __ffs(efx->loopback_modes);
-       }
-       __efx_reconfigure_port(efx);
-       mutex_unlock(&efx->mac_lock);
-
-       efx_reset_down(efx, reset_method);
-
-       tests->registers =
-               efx_farch_test_registers(efx, falcon_b0_register_tests,
-                                        ARRAY_SIZE(falcon_b0_register_tests))
-               ? -1 : 1;
-
-       rc = falcon_reset_hw(efx, reset_method);
-       rc2 = efx_reset_up(efx, reset_method, rc == 0);
-       return rc ? rc : rc2;
-}
-
-/**************************************************************************
- *
- * Device reset
- *
- **************************************************************************
- */
-
-static enum reset_type falcon_map_reset_reason(enum reset_type reason)
-{
-       switch (reason) {
-       case RESET_TYPE_RX_RECOVERY:
-       case RESET_TYPE_DMA_ERROR:
-       case RESET_TYPE_TX_SKIP:
-               /* These can occasionally occur due to hardware bugs.
-                * We try to reset without disrupting the link.
-                */
-               return RESET_TYPE_INVISIBLE;
-       default:
-               return RESET_TYPE_ALL;
-       }
-}
-
-static int falcon_map_reset_flags(u32 *flags)
-{
-       enum {
-               FALCON_RESET_INVISIBLE = (ETH_RESET_DMA | ETH_RESET_FILTER |
-                                         ETH_RESET_OFFLOAD | ETH_RESET_MAC),
-               FALCON_RESET_ALL = FALCON_RESET_INVISIBLE | ETH_RESET_PHY,
-               FALCON_RESET_WORLD = FALCON_RESET_ALL | ETH_RESET_IRQ,
-       };
-
-       if ((*flags & FALCON_RESET_WORLD) == FALCON_RESET_WORLD) {
-               *flags &= ~FALCON_RESET_WORLD;
-               return RESET_TYPE_WORLD;
-       }
-
-       if ((*flags & FALCON_RESET_ALL) == FALCON_RESET_ALL) {
-               *flags &= ~FALCON_RESET_ALL;
-               return RESET_TYPE_ALL;
-       }
-
-       if ((*flags & FALCON_RESET_INVISIBLE) == FALCON_RESET_INVISIBLE) {
-               *flags &= ~FALCON_RESET_INVISIBLE;
-               return RESET_TYPE_INVISIBLE;
-       }
-
-       return -EINVAL;
-}
-
-/* Resets NIC to known state.  This routine must be called in process
- * context and is allowed to sleep. */
-static int __falcon_reset_hw(struct efx_nic *efx, enum reset_type method)
-{
-       struct falcon_nic_data *nic_data = efx->nic_data;
-       efx_oword_t glb_ctl_reg_ker;
-       int rc;
-
-       netif_dbg(efx, hw, efx->net_dev, "performing %s hardware reset\n",
-                 RESET_TYPE(method));
-
-       /* Initiate device reset */
-       if (method == RESET_TYPE_WORLD) {
-               rc = pci_save_state(efx->pci_dev);
-               if (rc) {
-                       netif_err(efx, drv, efx->net_dev,
-                                 "failed to backup PCI state of primary "
-                                 "function prior to hardware reset\n");
-                       goto fail1;
-               }
-               if (efx_nic_is_dual_func(efx)) {
-                       rc = pci_save_state(nic_data->pci_dev2);
-                       if (rc) {
-                               netif_err(efx, drv, efx->net_dev,
-                                         "failed to backup PCI state of "
-                                         "secondary function prior to "
-                                         "hardware reset\n");
-                               goto fail2;
-                       }
-               }
-
-               EFX_POPULATE_OWORD_2(glb_ctl_reg_ker,
-                                    FRF_AB_EXT_PHY_RST_DUR,
-                                    FFE_AB_EXT_PHY_RST_DUR_10240US,
-                                    FRF_AB_SWRST, 1);
-       } else {
-               EFX_POPULATE_OWORD_7(glb_ctl_reg_ker,
-                                    /* exclude PHY from "invisible" reset */
-                                    FRF_AB_EXT_PHY_RST_CTL,
-                                    method == RESET_TYPE_INVISIBLE,
-                                    /* exclude EEPROM/flash and PCIe */
-                                    FRF_AB_PCIE_CORE_RST_CTL, 1,
-                                    FRF_AB_PCIE_NSTKY_RST_CTL, 1,
-                                    FRF_AB_PCIE_SD_RST_CTL, 1,
-                                    FRF_AB_EE_RST_CTL, 1,
-                                    FRF_AB_EXT_PHY_RST_DUR,
-                                    FFE_AB_EXT_PHY_RST_DUR_10240US,
-                                    FRF_AB_SWRST, 1);
-       }
-       efx_writeo(efx, &glb_ctl_reg_ker, FR_AB_GLB_CTL);
-
-       netif_dbg(efx, hw, efx->net_dev, "waiting for hardware reset\n");
-       schedule_timeout_uninterruptible(HZ / 20);
-
-       /* Restore PCI configuration if needed */
-       if (method == RESET_TYPE_WORLD) {
-               if (efx_nic_is_dual_func(efx))
-                       pci_restore_state(nic_data->pci_dev2);
-               pci_restore_state(efx->pci_dev);
-               netif_dbg(efx, drv, efx->net_dev,
-                         "successfully restored PCI config\n");
-       }
-
-       /* Assert that reset complete */
-       efx_reado(efx, &glb_ctl_reg_ker, FR_AB_GLB_CTL);
-       if (EFX_OWORD_FIELD(glb_ctl_reg_ker, FRF_AB_SWRST) != 0) {
-               rc = -ETIMEDOUT;
-               netif_err(efx, hw, efx->net_dev,
-                         "timed out waiting for hardware reset\n");
-               goto fail3;
-       }
-       netif_dbg(efx, hw, efx->net_dev, "hardware reset complete\n");
-
-       return 0;
-
-       /* pci_save_state() and pci_restore_state() MUST be called in pairs */
-fail2:
-       pci_restore_state(efx->pci_dev);
-fail1:
-fail3:
-       return rc;
-}
-
-static int falcon_reset_hw(struct efx_nic *efx, enum reset_type method)
-{
-       struct falcon_nic_data *nic_data = efx->nic_data;
-       int rc;
-
-       mutex_lock(&nic_data->spi_lock);
-       rc = __falcon_reset_hw(efx, method);
-       mutex_unlock(&nic_data->spi_lock);
-
-       return rc;
-}
-
-static void falcon_monitor(struct efx_nic *efx)
-{
-       bool link_changed;
-       int rc;
-
-       BUG_ON(!mutex_is_locked(&efx->mac_lock));
-
-       rc = falcon_board(efx)->type->monitor(efx);
-       if (rc) {
-               netif_err(efx, hw, efx->net_dev,
-                         "Board sensor %s; shutting down PHY\n",
-                         (rc == -ERANGE) ? "reported fault" : "failed");
-               efx->phy_mode |= PHY_MODE_LOW_POWER;
-               rc = __efx_reconfigure_port(efx);
-               WARN_ON(rc);
-       }
-
-       if (LOOPBACK_INTERNAL(efx))
-               link_changed = falcon_loopback_link_poll(efx);
-       else
-               link_changed = efx->phy_op->poll(efx);
-
-       if (link_changed) {
-               falcon_stop_nic_stats(efx);
-               falcon_deconfigure_mac_wrapper(efx);
-
-               falcon_reset_macs(efx);
-               rc = falcon_reconfigure_xmac(efx);
-               BUG_ON(rc);
-
-               falcon_start_nic_stats(efx);
-
-               efx_link_status_changed(efx);
-       }
-
-       falcon_poll_xmac(efx);
-}
-
-/* Zeroes out the SRAM contents.  This routine must be called in
- * process context and is allowed to sleep.
- */
-static int falcon_reset_sram(struct efx_nic *efx)
-{
-       efx_oword_t srm_cfg_reg_ker, gpio_cfg_reg_ker;
-       int count;
-
-       /* Set the SRAM wake/sleep GPIO appropriately. */
-       efx_reado(efx, &gpio_cfg_reg_ker, FR_AB_GPIO_CTL);
-       EFX_SET_OWORD_FIELD(gpio_cfg_reg_ker, FRF_AB_GPIO1_OEN, 1);
-       EFX_SET_OWORD_FIELD(gpio_cfg_reg_ker, FRF_AB_GPIO1_OUT, 1);
-       efx_writeo(efx, &gpio_cfg_reg_ker, FR_AB_GPIO_CTL);
-
-       /* Initiate SRAM reset */
-       EFX_POPULATE_OWORD_2(srm_cfg_reg_ker,
-                            FRF_AZ_SRM_INIT_EN, 1,
-                            FRF_AZ_SRM_NB_SZ, 0);
-       efx_writeo(efx, &srm_cfg_reg_ker, FR_AZ_SRM_CFG);
-
-       /* Wait for SRAM reset to complete */
-       count = 0;
-       do {
-               netif_dbg(efx, hw, efx->net_dev,
-                         "waiting for SRAM reset (attempt %d)...\n", count);
-
-               /* SRAM reset is slow; expect around 16ms */
-               schedule_timeout_uninterruptible(HZ / 50);
-
-               /* Check for reset complete */
-               efx_reado(efx, &srm_cfg_reg_ker, FR_AZ_SRM_CFG);
-               if (!EFX_OWORD_FIELD(srm_cfg_reg_ker, FRF_AZ_SRM_INIT_EN)) {
-                       netif_dbg(efx, hw, efx->net_dev,
-                                 "SRAM reset complete\n");
-
-                       return 0;
-               }
-       } while (++count < 20); /* wait up to 0.4 sec */
-
-       netif_err(efx, hw, efx->net_dev, "timed out waiting for SRAM reset\n");
-       return -ETIMEDOUT;
-}
-
-static void falcon_spi_device_init(struct efx_nic *efx,
-                                 struct falcon_spi_device *spi_device,
-                                 unsigned int device_id, u32 device_type)
-{
-       if (device_type != 0) {
-               spi_device->device_id = device_id;
-               spi_device->size =
-                       1 << SPI_DEV_TYPE_FIELD(device_type, SPI_DEV_TYPE_SIZE);
-               spi_device->addr_len =
-                       SPI_DEV_TYPE_FIELD(device_type, SPI_DEV_TYPE_ADDR_LEN);
-               spi_device->munge_address = (spi_device->size == 1 << 9 &&
-                                            spi_device->addr_len == 1);
-               spi_device->erase_command =
-                       SPI_DEV_TYPE_FIELD(device_type, SPI_DEV_TYPE_ERASE_CMD);
-               spi_device->erase_size =
-                       1 << SPI_DEV_TYPE_FIELD(device_type,
-                                               SPI_DEV_TYPE_ERASE_SIZE);
-               spi_device->block_size =
-                       1 << SPI_DEV_TYPE_FIELD(device_type,
-                                               SPI_DEV_TYPE_BLOCK_SIZE);
-       } else {
-               spi_device->size = 0;
-       }
-}
-
-/* Extract non-volatile configuration */
-static int falcon_probe_nvconfig(struct efx_nic *efx)
-{
-       struct falcon_nic_data *nic_data = efx->nic_data;
-       struct falcon_nvconfig *nvconfig;
-       int rc;
-
-       nvconfig = kmalloc(sizeof(*nvconfig), GFP_KERNEL);
-       if (!nvconfig)
-               return -ENOMEM;
-
-       rc = falcon_read_nvram(efx, nvconfig);
-       if (rc)
-               goto out;
-
-       efx->phy_type = nvconfig->board_v2.port0_phy_type;
-       efx->mdio.prtad = nvconfig->board_v2.port0_phy_addr;
-
-       if (le16_to_cpu(nvconfig->board_struct_ver) >= 3) {
-               falcon_spi_device_init(
-                       efx, &nic_data->spi_flash, FFE_AB_SPI_DEVICE_FLASH,
-                       le32_to_cpu(nvconfig->board_v3
-                                   .spi_device_type[FFE_AB_SPI_DEVICE_FLASH]));
-               falcon_spi_device_init(
-                       efx, &nic_data->spi_eeprom, FFE_AB_SPI_DEVICE_EEPROM,
-                       le32_to_cpu(nvconfig->board_v3
-                                   .spi_device_type[FFE_AB_SPI_DEVICE_EEPROM]));
-       }
-
-       /* Read the MAC addresses */
-       ether_addr_copy(efx->net_dev->perm_addr, nvconfig->mac_address[0]);
-
-       netif_dbg(efx, probe, efx->net_dev, "PHY is %d phy_id %d\n",
-                 efx->phy_type, efx->mdio.prtad);
-
-       rc = falcon_probe_board(efx,
-                               le16_to_cpu(nvconfig->board_v2.board_revision));
-out:
-       kfree(nvconfig);
-       return rc;
-}
-
-static int falcon_dimension_resources(struct efx_nic *efx)
-{
-       efx->rx_dc_base = 0x20000;
-       efx->tx_dc_base = 0x26000;
-       return 0;
-}
-
-/* Probe all SPI devices on the NIC */
-static void falcon_probe_spi_devices(struct efx_nic *efx)
-{
-       struct falcon_nic_data *nic_data = efx->nic_data;
-       efx_oword_t nic_stat, gpio_ctl, ee_vpd_cfg;
-       int boot_dev;
-
-       efx_reado(efx, &gpio_ctl, FR_AB_GPIO_CTL);
-       efx_reado(efx, &nic_stat, FR_AB_NIC_STAT);
-       efx_reado(efx, &ee_vpd_cfg, FR_AB_EE_VPD_CFG0);
-
-       if (EFX_OWORD_FIELD(gpio_ctl, FRF_AB_GPIO3_PWRUP_VALUE)) {
-               boot_dev = (EFX_OWORD_FIELD(nic_stat, FRF_AB_SF_PRST) ?
-                           FFE_AB_SPI_DEVICE_FLASH : FFE_AB_SPI_DEVICE_EEPROM);
-               netif_dbg(efx, probe, efx->net_dev, "Booted from %s\n",
-                         boot_dev == FFE_AB_SPI_DEVICE_FLASH ?
-                         "flash" : "EEPROM");
-       } else {
-               /* Disable VPD and set clock dividers to safe
-                * values for initial programming. */
-               boot_dev = -1;
-               netif_dbg(efx, probe, efx->net_dev,
-                         "Booted from internal ASIC settings;"
-                         " setting SPI config\n");
-               EFX_POPULATE_OWORD_3(ee_vpd_cfg, FRF_AB_EE_VPD_EN, 0,
-                                    /* 125 MHz / 7 ~= 20 MHz */
-                                    FRF_AB_EE_SF_CLOCK_DIV, 7,
-                                    /* 125 MHz / 63 ~= 2 MHz */
-                                    FRF_AB_EE_EE_CLOCK_DIV, 63);
-               efx_writeo(efx, &ee_vpd_cfg, FR_AB_EE_VPD_CFG0);
-       }
-
-       mutex_init(&nic_data->spi_lock);
-
-       if (boot_dev == FFE_AB_SPI_DEVICE_FLASH)
-               falcon_spi_device_init(efx, &nic_data->spi_flash,
-                                      FFE_AB_SPI_DEVICE_FLASH,
-                                      default_flash_type);
-       if (boot_dev == FFE_AB_SPI_DEVICE_EEPROM)
-               falcon_spi_device_init(efx, &nic_data->spi_eeprom,
-                                      FFE_AB_SPI_DEVICE_EEPROM,
-                                      large_eeprom_type);
-}
-
-static unsigned int falcon_a1_mem_map_size(struct efx_nic *efx)
-{
-       return 0x20000;
-}
-
-static unsigned int falcon_b0_mem_map_size(struct efx_nic *efx)
-{
-       /* Map everything up to and including the RSS indirection table.
-        * The PCI core takes care of mapping the MSI-X tables.
-        */
-       return FR_BZ_RX_INDIRECTION_TBL +
-               FR_BZ_RX_INDIRECTION_TBL_STEP * FR_BZ_RX_INDIRECTION_TBL_ROWS;
-}
-
-static int falcon_probe_nic(struct efx_nic *efx)
-{
-       struct falcon_nic_data *nic_data;
-       struct falcon_board *board;
-       int rc;
-
-       efx->primary = efx; /* only one usable function per controller */
-
-       /* Allocate storage for hardware specific data */
-       nic_data = kzalloc(sizeof(*nic_data), GFP_KERNEL);
-       if (!nic_data)
-               return -ENOMEM;
-       efx->nic_data = nic_data;
-
-       rc = -ENODEV;
-
-       if (efx_farch_fpga_ver(efx) != 0) {
-               netif_err(efx, probe, efx->net_dev,
-                         "Falcon FPGA not supported\n");
-               goto fail1;
-       }
-
-       if (efx_nic_rev(efx) <= EFX_REV_FALCON_A1) {
-               efx_oword_t nic_stat;
-               struct pci_dev *dev;
-               u8 pci_rev = efx->pci_dev->revision;
-
-               if ((pci_rev == 0xff) || (pci_rev == 0)) {
-                       netif_err(efx, probe, efx->net_dev,
-                                 "Falcon rev A0 not supported\n");
-                       goto fail1;
-               }
-               efx_reado(efx, &nic_stat, FR_AB_NIC_STAT);
-               if (EFX_OWORD_FIELD(nic_stat, FRF_AB_STRAP_10G) == 0) {
-                       netif_err(efx, probe, efx->net_dev,
-                                 "Falcon rev A1 1G not supported\n");
-                       goto fail1;
-               }
-               if (EFX_OWORD_FIELD(nic_stat, FRF_AA_STRAP_PCIE) == 0) {
-                       netif_err(efx, probe, efx->net_dev,
-                                 "Falcon rev A1 PCI-X not supported\n");
-                       goto fail1;
-               }
-
-               dev = pci_dev_get(efx->pci_dev);
-               while ((dev = pci_get_device(PCI_VENDOR_ID_SOLARFLARE,
-                                            PCI_DEVICE_ID_SOLARFLARE_SFC4000A_1,
-                                            dev))) {
-                       if (dev->bus == efx->pci_dev->bus &&
-                           dev->devfn == efx->pci_dev->devfn + 1) {
-                               nic_data->pci_dev2 = dev;
-                               break;
-                       }
-               }
-               if (!nic_data->pci_dev2) {
-                       netif_err(efx, probe, efx->net_dev,
-                                 "failed to find secondary function\n");
-                       rc = -ENODEV;
-                       goto fail2;
-               }
-       }
-
-       /* Now we can reset the NIC */
-       rc = __falcon_reset_hw(efx, RESET_TYPE_ALL);
-       if (rc) {
-               netif_err(efx, probe, efx->net_dev, "failed to reset NIC\n");
-               goto fail3;
-       }
-
-       /* Allocate memory for INT_KER */
-       rc = efx_nic_alloc_buffer(efx, &efx->irq_status, sizeof(efx_oword_t),
-                                 GFP_KERNEL);
-       if (rc)
-               goto fail4;
-       BUG_ON(efx->irq_status.dma_addr & 0x0f);
-
-       netif_dbg(efx, probe, efx->net_dev,
-                 "INT_KER at %llx (virt %p phys %llx)\n",
-                 (u64)efx->irq_status.dma_addr,
-                 efx->irq_status.addr,
-                 (u64)virt_to_phys(efx->irq_status.addr));
-
-       falcon_probe_spi_devices(efx);
-
-       /* Read in the non-volatile configuration */
-       rc = falcon_probe_nvconfig(efx);
-       if (rc) {
-               if (rc == -EINVAL)
-                       netif_err(efx, probe, efx->net_dev, "NVRAM is invalid\n");
-               goto fail5;
-       }
-
-       efx->max_channels = (efx_nic_rev(efx) <= EFX_REV_FALCON_A1 ? 4 :
-                            EFX_MAX_CHANNELS);
-       efx->max_tx_channels = efx->max_channels;
-       efx->timer_quantum_ns = 4968; /* 621 cycles */
-       efx->timer_max_ns = efx->type->timer_period_max *
-                           efx->timer_quantum_ns;
-
-       /* Initialise I2C adapter */
-       board = falcon_board(efx);
-       board->i2c_adap.owner = THIS_MODULE;
-       board->i2c_data = falcon_i2c_bit_operations;
-       board->i2c_data.data = efx;
-       board->i2c_adap.algo_data = &board->i2c_data;
-       board->i2c_adap.dev.parent = &efx->pci_dev->dev;
-       strlcpy(board->i2c_adap.name, "SFC4000 GPIO",
-               sizeof(board->i2c_adap.name));
-       rc = i2c_bit_add_bus(&board->i2c_adap);
-       if (rc)
-               goto fail5;
-
-       rc = falcon_board(efx)->type->init(efx);
-       if (rc) {
-               netif_err(efx, probe, efx->net_dev,
-                         "failed to initialise board\n");
-               goto fail6;
-       }
-
-       nic_data->stats_disable_count = 1;
-       setup_timer(&nic_data->stats_timer, &falcon_stats_timer_func,
-                   (unsigned long)efx);
-
-       return 0;
-
- fail6:
-       i2c_del_adapter(&board->i2c_adap);
-       memset(&board->i2c_adap, 0, sizeof(board->i2c_adap));
- fail5:
-       efx_nic_free_buffer(efx, &efx->irq_status);
- fail4:
- fail3:
-       if (nic_data->pci_dev2) {
-               pci_dev_put(nic_data->pci_dev2);
-               nic_data->pci_dev2 = NULL;
-       }
- fail2:
- fail1:
-       kfree(efx->nic_data);
-       return rc;
-}
-
-static void falcon_init_rx_cfg(struct efx_nic *efx)
-{
-       /* RX control FIFO thresholds (32 entries) */
-       const unsigned ctrl_xon_thr = 20;
-       const unsigned ctrl_xoff_thr = 25;
-       efx_oword_t reg;
-
-       efx_reado(efx, &reg, FR_AZ_RX_CFG);
-       if (efx_nic_rev(efx) <= EFX_REV_FALCON_A1) {
-               /* Data FIFO size is 5.5K.  The RX DMA engine only
-                * supports scattering for user-mode queues, but will
-                * split DMA writes at intervals of RX_USR_BUF_SIZE
-                * (32-byte units) even for kernel-mode queues.  We
-                * set it to be so large that that never happens.
-                */
-               EFX_SET_OWORD_FIELD(reg, FRF_AA_RX_DESC_PUSH_EN, 0);
-               EFX_SET_OWORD_FIELD(reg, FRF_AA_RX_USR_BUF_SIZE,
-                                   (3 * 4096) >> 5);
-               EFX_SET_OWORD_FIELD(reg, FRF_AA_RX_XON_MAC_TH, 512 >> 8);
-               EFX_SET_OWORD_FIELD(reg, FRF_AA_RX_XOFF_MAC_TH, 2048 >> 8);
-               EFX_SET_OWORD_FIELD(reg, FRF_AA_RX_XON_TX_TH, ctrl_xon_thr);
-               EFX_SET_OWORD_FIELD(reg, FRF_AA_RX_XOFF_TX_TH, ctrl_xoff_thr);
-       } else {
-               /* Data FIFO size is 80K; register fields moved */
-               EFX_SET_OWORD_FIELD(reg, FRF_BZ_RX_DESC_PUSH_EN, 0);
-               EFX_SET_OWORD_FIELD(reg, FRF_BZ_RX_USR_BUF_SIZE,
-                                   EFX_RX_USR_BUF_SIZE >> 5);
-               /* Send XON and XOFF at ~3 * max MTU away from empty/full */
-               EFX_SET_OWORD_FIELD(reg, FRF_BZ_RX_XON_MAC_TH, 27648 >> 8);
-               EFX_SET_OWORD_FIELD(reg, FRF_BZ_RX_XOFF_MAC_TH, 54272 >> 8);
-               EFX_SET_OWORD_FIELD(reg, FRF_BZ_RX_XON_TX_TH, ctrl_xon_thr);
-               EFX_SET_OWORD_FIELD(reg, FRF_BZ_RX_XOFF_TX_TH, ctrl_xoff_thr);
-               EFX_SET_OWORD_FIELD(reg, FRF_BZ_RX_INGR_EN, 1);
-
-               /* Enable hash insertion. This is broken for the
-                * 'Falcon' hash so also select Toeplitz TCP/IPv4 and
-                * IPv4 hashes. */
-               EFX_SET_OWORD_FIELD(reg, FRF_BZ_RX_HASH_INSRT_HDR, 1);
-               EFX_SET_OWORD_FIELD(reg, FRF_BZ_RX_HASH_ALG, 1);
-               EFX_SET_OWORD_FIELD(reg, FRF_BZ_RX_IP_HASH, 1);
-       }
-       /* Always enable XOFF signal from RX FIFO.  We enable
-        * or disable transmission of pause frames at the MAC. */
-       EFX_SET_OWORD_FIELD(reg, FRF_AZ_RX_XOFF_MAC_EN, 1);
-       efx_writeo(efx, &reg, FR_AZ_RX_CFG);
-}
-
-/* This call performs hardware-specific global initialisation, such as
- * defining the descriptor cache sizes and number of RSS channels.
- * It does not set up any buffers, descriptor rings or event queues.
- */
-static int falcon_init_nic(struct efx_nic *efx)
-{
-       efx_oword_t temp;
-       int rc;
-
-       /* Use on-chip SRAM */
-       efx_reado(efx, &temp, FR_AB_NIC_STAT);
-       EFX_SET_OWORD_FIELD(temp, FRF_AB_ONCHIP_SRAM, 1);
-       efx_writeo(efx, &temp, FR_AB_NIC_STAT);
-
-       rc = falcon_reset_sram(efx);
-       if (rc)
-               return rc;
-
-       /* Clear the parity enables on the TX data fifos as
-        * they produce false parity errors because of timing issues
-        */
-       if (EFX_WORKAROUND_5129(efx)) {
-               efx_reado(efx, &temp, FR_AZ_CSR_SPARE);
-               EFX_SET_OWORD_FIELD(temp, FRF_AB_MEM_PERR_EN_TX_DATA, 0);
-               efx_writeo(efx, &temp, FR_AZ_CSR_SPARE);
-       }
-
-       if (EFX_WORKAROUND_7244(efx)) {
-               efx_reado(efx, &temp, FR_BZ_RX_FILTER_CTL);
-               EFX_SET_OWORD_FIELD(temp, FRF_BZ_UDP_FULL_SRCH_LIMIT, 8);
-               EFX_SET_OWORD_FIELD(temp, FRF_BZ_UDP_WILD_SRCH_LIMIT, 8);
-               EFX_SET_OWORD_FIELD(temp, FRF_BZ_TCP_FULL_SRCH_LIMIT, 8);
-               EFX_SET_OWORD_FIELD(temp, FRF_BZ_TCP_WILD_SRCH_LIMIT, 8);
-               efx_writeo(efx, &temp, FR_BZ_RX_FILTER_CTL);
-       }
-
-       /* XXX This is documented only for Falcon A0/A1 */
-       /* Setup RX.  Wait for descriptor is broken and must
-        * be disabled.  RXDP recovery shouldn't be needed, but is.
-        */
-       efx_reado(efx, &temp, FR_AA_RX_SELF_RST);
-       EFX_SET_OWORD_FIELD(temp, FRF_AA_RX_NODESC_WAIT_DIS, 1);
-       EFX_SET_OWORD_FIELD(temp, FRF_AA_RX_SELF_RST_EN, 1);
-       if (EFX_WORKAROUND_5583(efx))
-               EFX_SET_OWORD_FIELD(temp, FRF_AA_RX_ISCSI_DIS, 1);
-       efx_writeo(efx, &temp, FR_AA_RX_SELF_RST);
-
-       /* Do not enable TX_NO_EOP_DISC_EN, since it limits packets to 16
-        * descriptors (which is bad).
-        */
-       efx_reado(efx, &temp, FR_AZ_TX_CFG);
-       EFX_SET_OWORD_FIELD(temp, FRF_AZ_TX_NO_EOP_DISC_EN, 0);
-       efx_writeo(efx, &temp, FR_AZ_TX_CFG);
-
-       falcon_init_rx_cfg(efx);
-
-       if (efx_nic_rev(efx) >= EFX_REV_FALCON_B0) {
-               falcon_b0_rx_push_rss_config(efx, false, efx->rx_indir_table);
-
-               /* Set destination of both TX and RX Flush events */
-               EFX_POPULATE_OWORD_1(temp, FRF_BZ_FLS_EVQ_ID, 0);
-               efx_writeo(efx, &temp, FR_BZ_DP_CTRL);
-       }
-
-       efx_farch_init_common(efx);
-
-       return 0;
-}
-
-static void falcon_remove_nic(struct efx_nic *efx)
-{
-       struct falcon_nic_data *nic_data = efx->nic_data;
-       struct falcon_board *board = falcon_board(efx);
-
-       board->type->fini(efx);
-
-       /* Remove I2C adapter and clear it in preparation for a retry */
-       i2c_del_adapter(&board->i2c_adap);
-       memset(&board->i2c_adap, 0, sizeof(board->i2c_adap));
-
-       efx_nic_free_buffer(efx, &efx->irq_status);
-
-       __falcon_reset_hw(efx, RESET_TYPE_ALL);
-
-       /* Release the second function after the reset */
-       if (nic_data->pci_dev2) {
-               pci_dev_put(nic_data->pci_dev2);
-               nic_data->pci_dev2 = NULL;
-       }
-
-       /* Tear down the private nic state */
-       kfree(efx->nic_data);
-       efx->nic_data = NULL;
-}
-
-static size_t falcon_describe_nic_stats(struct efx_nic *efx, u8 *names)
-{
-       return efx_nic_describe_stats(falcon_stat_desc, FALCON_STAT_COUNT,
-                                     falcon_stat_mask, names);
-}
-
-static size_t falcon_update_nic_stats(struct efx_nic *efx, u64 *full_stats,
-                                     struct rtnl_link_stats64 *core_stats)
-{
-       struct falcon_nic_data *nic_data = efx->nic_data;
-       u64 *stats = nic_data->stats;
-       efx_oword_t cnt;
-
-       if (!nic_data->stats_disable_count) {
-               efx_reado(efx, &cnt, FR_AZ_RX_NODESC_DROP);
-               stats[FALCON_STAT_rx_nodesc_drop_cnt] +=
-                       EFX_OWORD_FIELD(cnt, FRF_AB_RX_NODESC_DROP_CNT);
-
-               if (nic_data->stats_pending &&
-                   FALCON_XMAC_STATS_DMA_FLAG(efx)) {
-                       nic_data->stats_pending = false;
-                       rmb(); /* read the done flag before the stats */
-                       efx_nic_update_stats(
-                               falcon_stat_desc, FALCON_STAT_COUNT,
-                               falcon_stat_mask,
-                               stats, efx->stats_buffer.addr, true);
-               }
-
-               /* Update derived statistic */
-               efx_update_diff_stat(&stats[FALCON_STAT_rx_bad_bytes],
-                                    stats[FALCON_STAT_rx_bytes] -
-                                    stats[FALCON_STAT_rx_good_bytes] -
-                                    stats[FALCON_STAT_rx_control] * 64);
-               efx_update_sw_stats(efx, stats);
-       }
-
-       if (full_stats)
-               memcpy(full_stats, stats, sizeof(u64) * FALCON_STAT_COUNT);
-
-       if (core_stats) {
-               core_stats->rx_packets = stats[FALCON_STAT_rx_packets];
-               core_stats->tx_packets = stats[FALCON_STAT_tx_packets];
-               core_stats->rx_bytes = stats[FALCON_STAT_rx_bytes];
-               core_stats->tx_bytes = stats[FALCON_STAT_tx_bytes];
-               core_stats->rx_dropped = stats[FALCON_STAT_rx_nodesc_drop_cnt] +
-                                        stats[GENERIC_STAT_rx_nodesc_trunc] +
-                                        stats[GENERIC_STAT_rx_noskb_drops];
-               core_stats->multicast = stats[FALCON_STAT_rx_multicast];
-               core_stats->rx_length_errors =
-                       stats[FALCON_STAT_rx_gtjumbo] +
-                       stats[FALCON_STAT_rx_length_error];
-               core_stats->rx_crc_errors = stats[FALCON_STAT_rx_bad];
-               core_stats->rx_frame_errors = stats[FALCON_STAT_rx_align_error];
-               core_stats->rx_fifo_errors = stats[FALCON_STAT_rx_overflow];
-
-               core_stats->rx_errors = (core_stats->rx_length_errors +
-                                        core_stats->rx_crc_errors +
-                                        core_stats->rx_frame_errors +
-                                        stats[FALCON_STAT_rx_symbol_error]);
-       }
-
-       return FALCON_STAT_COUNT;
-}
-
-void falcon_start_nic_stats(struct efx_nic *efx)
-{
-       struct falcon_nic_data *nic_data = efx->nic_data;
-
-       spin_lock_bh(&efx->stats_lock);
-       if (--nic_data->stats_disable_count == 0)
-               falcon_stats_request(efx);
-       spin_unlock_bh(&efx->stats_lock);
-}
-
-/* We don't acutally pull stats on falcon. Wait 10ms so that
- * they arrive when we call this just after start_stats
- */
-static void falcon_pull_nic_stats(struct efx_nic *efx)
-{
-       msleep(10);
-}
-
-void falcon_stop_nic_stats(struct efx_nic *efx)
-{
-       struct falcon_nic_data *nic_data = efx->nic_data;
-       int i;
-
-       might_sleep();
-
-       spin_lock_bh(&efx->stats_lock);
-       ++nic_data->stats_disable_count;
-       spin_unlock_bh(&efx->stats_lock);
-
-       del_timer_sync(&nic_data->stats_timer);
-
-       /* Wait enough time for the most recent transfer to
-        * complete. */
-       for (i = 0; i < 4 && nic_data->stats_pending; i++) {
-               if (FALCON_XMAC_STATS_DMA_FLAG(efx))
-                       break;
-               msleep(1);
-       }
-
-       spin_lock_bh(&efx->stats_lock);
-       falcon_stats_complete(efx);
-       spin_unlock_bh(&efx->stats_lock);
-}
-
-static void falcon_set_id_led(struct efx_nic *efx, enum efx_led_mode mode)
-{
-       falcon_board(efx)->type->set_id_led(efx, mode);
-}
-
-/**************************************************************************
- *
- * Wake on LAN
- *
- **************************************************************************
- */
-
-static void falcon_get_wol(struct efx_nic *efx, struct ethtool_wolinfo *wol)
-{
-       wol->supported = 0;
-       wol->wolopts = 0;
-       memset(&wol->sopass, 0, sizeof(wol->sopass));
-}
-
-static int falcon_set_wol(struct efx_nic *efx, u32 type)
-{
-       if (type != 0)
-               return -EINVAL;
-       return 0;
-}
-
-/**************************************************************************
- *
- * Revision-dependent attributes used by efx.c and nic.c
- *
- **************************************************************************
- */
-
-const struct efx_nic_type falcon_a1_nic_type = {
-       .is_vf = false,
-       .mem_bar = EFX_MEM_BAR,
-       .mem_map_size = falcon_a1_mem_map_size,
-       .probe = falcon_probe_nic,
-       .remove = falcon_remove_nic,
-       .init = falcon_init_nic,
-       .dimension_resources = falcon_dimension_resources,
-       .fini = falcon_irq_ack_a1,
-       .monitor = falcon_monitor,
-       .map_reset_reason = falcon_map_reset_reason,
-       .map_reset_flags = falcon_map_reset_flags,
-       .reset = falcon_reset_hw,
-       .probe_port = falcon_probe_port,
-       .remove_port = falcon_remove_port,
-       .handle_global_event = falcon_handle_global_event,
-       .fini_dmaq = efx_farch_fini_dmaq,
-       .prepare_flush = falcon_prepare_flush,
-       .finish_flush = efx_port_dummy_op_void,
-       .prepare_flr = efx_port_dummy_op_void,
-       .finish_flr = efx_farch_finish_flr,
-       .describe_stats = falcon_describe_nic_stats,
-       .update_stats = falcon_update_nic_stats,
-       .start_stats = falcon_start_nic_stats,
-       .pull_stats = falcon_pull_nic_stats,
-       .stop_stats = falcon_stop_nic_stats,
-       .set_id_led = falcon_set_id_led,
-       .push_irq_moderation = falcon_push_irq_moderation,
-       .reconfigure_port = falcon_reconfigure_port,
-       .prepare_enable_fc_tx = falcon_a1_prepare_enable_fc_tx,
-       .reconfigure_mac = falcon_reconfigure_xmac,
-       .check_mac_fault = falcon_xmac_check_fault,
-       .get_wol = falcon_get_wol,
-       .set_wol = falcon_set_wol,
-       .resume_wol = efx_port_dummy_op_void,
-       .test_nvram = falcon_test_nvram,
-       .irq_enable_master = efx_farch_irq_enable_master,
-       .irq_test_generate = efx_farch_irq_test_generate,
-       .irq_disable_non_ev = efx_farch_irq_disable_master,
-       .irq_handle_msi = efx_farch_msi_interrupt,
-       .irq_handle_legacy = falcon_legacy_interrupt_a1,
-       .tx_probe = efx_farch_tx_probe,
-       .tx_init = efx_farch_tx_init,
-       .tx_remove = efx_farch_tx_remove,
-       .tx_write = efx_farch_tx_write,
-       .rx_push_rss_config = dummy_rx_push_rss_config,
-       .rx_probe = efx_farch_rx_probe,
-       .rx_init = efx_farch_rx_init,
-       .rx_remove = efx_farch_rx_remove,
-       .rx_write = efx_farch_rx_write,
-       .rx_defer_refill = efx_farch_rx_defer_refill,
-       .ev_probe = efx_farch_ev_probe,
-       .ev_init = efx_farch_ev_init,
-       .ev_fini = efx_farch_ev_fini,
-       .ev_remove = efx_farch_ev_remove,
-       .ev_process = efx_farch_ev_process,
-       .ev_read_ack = efx_farch_ev_read_ack,
-       .ev_test_generate = efx_farch_ev_test_generate,
-
-       /* We don't expose the filter table on Falcon A1 as it is not
-        * mapped into function 0, but these implementations still
-        * work with a degenerate case of all tables set to size 0.
-        */
-       .filter_table_probe = efx_farch_filter_table_probe,
-       .filter_table_restore = efx_farch_filter_table_restore,
-       .filter_table_remove = efx_farch_filter_table_remove,
-       .filter_insert = efx_farch_filter_insert,
-       .filter_remove_safe = efx_farch_filter_remove_safe,
-       .filter_get_safe = efx_farch_filter_get_safe,
-       .filter_clear_rx = efx_farch_filter_clear_rx,
-       .filter_count_rx_used = efx_farch_filter_count_rx_used,
-       .filter_get_rx_id_limit = efx_farch_filter_get_rx_id_limit,
-       .filter_get_rx_ids = efx_farch_filter_get_rx_ids,
-
-#ifdef CONFIG_SFC_MTD
-       .mtd_probe = falcon_mtd_probe,
-       .mtd_rename = falcon_mtd_rename,
-       .mtd_read = falcon_mtd_read,
-       .mtd_erase = falcon_mtd_erase,
-       .mtd_write = falcon_mtd_write,
-       .mtd_sync = falcon_mtd_sync,
-#endif
-
-       .revision = EFX_REV_FALCON_A1,
-       .txd_ptr_tbl_base = FR_AA_TX_DESC_PTR_TBL_KER,
-       .rxd_ptr_tbl_base = FR_AA_RX_DESC_PTR_TBL_KER,
-       .buf_tbl_base = FR_AA_BUF_FULL_TBL_KER,
-       .evq_ptr_tbl_base = FR_AA_EVQ_PTR_TBL_KER,
-       .evq_rptr_tbl_base = FR_AA_EVQ_RPTR_KER,
-       .max_dma_mask = DMA_BIT_MASK(FSF_AZ_TX_KER_BUF_ADDR_WIDTH),
-       .rx_buffer_padding = 0x24,
-       .can_rx_scatter = false,
-       .max_interrupt_mode = EFX_INT_MODE_MSI,
-       .timer_period_max =  1 << FRF_AB_TC_TIMER_VAL_WIDTH,
-       .offload_features = NETIF_F_IP_CSUM,
-       .mcdi_max_ver = -1,
-};
-
-const struct efx_nic_type falcon_b0_nic_type = {
-       .is_vf = false,
-       .mem_bar = EFX_MEM_BAR,
-       .mem_map_size = falcon_b0_mem_map_size,
-       .probe = falcon_probe_nic,
-       .remove = falcon_remove_nic,
-       .init = falcon_init_nic,
-       .dimension_resources = falcon_dimension_resources,
-       .fini = efx_port_dummy_op_void,
-       .monitor = falcon_monitor,
-       .map_reset_reason = falcon_map_reset_reason,
-       .map_reset_flags = falcon_map_reset_flags,
-       .reset = falcon_reset_hw,
-       .probe_port = falcon_probe_port,
-       .remove_port = falcon_remove_port,
-       .handle_global_event = falcon_handle_global_event,
-       .fini_dmaq = efx_farch_fini_dmaq,
-       .prepare_flush = falcon_prepare_flush,
-       .finish_flush = efx_port_dummy_op_void,
-       .prepare_flr = efx_port_dummy_op_void,
-       .finish_flr = efx_farch_finish_flr,
-       .describe_stats = falcon_describe_nic_stats,
-       .update_stats = falcon_update_nic_stats,
-       .start_stats = falcon_start_nic_stats,
-       .pull_stats = falcon_pull_nic_stats,
-       .stop_stats = falcon_stop_nic_stats,
-       .set_id_led = falcon_set_id_led,
-       .push_irq_moderation = falcon_push_irq_moderation,
-       .reconfigure_port = falcon_reconfigure_port,
-       .prepare_enable_fc_tx = falcon_b0_prepare_enable_fc_tx,
-       .reconfigure_mac = falcon_reconfigure_xmac,
-       .check_mac_fault = falcon_xmac_check_fault,
-       .get_wol = falcon_get_wol,
-       .set_wol = falcon_set_wol,
-       .resume_wol = efx_port_dummy_op_void,
-       .test_chip = falcon_b0_test_chip,
-       .test_nvram = falcon_test_nvram,
-       .irq_enable_master = efx_farch_irq_enable_master,
-       .irq_test_generate = efx_farch_irq_test_generate,
-       .irq_disable_non_ev = efx_farch_irq_disable_master,
-       .irq_handle_msi = efx_farch_msi_interrupt,
-       .irq_handle_legacy = efx_farch_legacy_interrupt,
-       .tx_probe = efx_farch_tx_probe,
-       .tx_init = efx_farch_tx_init,
-       .tx_remove = efx_farch_tx_remove,
-       .tx_write = efx_farch_tx_write,
-       .rx_push_rss_config = falcon_b0_rx_push_rss_config,
-       .rx_probe = efx_farch_rx_probe,
-       .rx_init = efx_farch_rx_init,
-       .rx_remove = efx_farch_rx_remove,
-       .rx_write = efx_farch_rx_write,
-       .rx_defer_refill = efx_farch_rx_defer_refill,
-       .ev_probe = efx_farch_ev_probe,
-       .ev_init = efx_farch_ev_init,
-       .ev_fini = efx_farch_ev_fini,
-       .ev_remove = efx_farch_ev_remove,
-       .ev_process = efx_farch_ev_process,
-       .ev_read_ack = efx_farch_ev_read_ack,
-       .ev_test_generate = efx_farch_ev_test_generate,
-       .filter_table_probe = efx_farch_filter_table_probe,
-       .filter_table_restore = efx_farch_filter_table_restore,
-       .filter_table_remove = efx_farch_filter_table_remove,
-       .filter_update_rx_scatter = efx_farch_filter_update_rx_scatter,
-       .filter_insert = efx_farch_filter_insert,
-       .filter_remove_safe = efx_farch_filter_remove_safe,
-       .filter_get_safe = efx_farch_filter_get_safe,
-       .filter_clear_rx = efx_farch_filter_clear_rx,
-       .filter_count_rx_used = efx_farch_filter_count_rx_used,
-       .filter_get_rx_id_limit = efx_farch_filter_get_rx_id_limit,
-       .filter_get_rx_ids = efx_farch_filter_get_rx_ids,
-#ifdef CONFIG_RFS_ACCEL
-       .filter_rfs_insert = efx_farch_filter_rfs_insert,
-       .filter_rfs_expire_one = efx_farch_filter_rfs_expire_one,
-#endif
-#ifdef CONFIG_SFC_MTD
-       .mtd_probe = falcon_mtd_probe,
-       .mtd_rename = falcon_mtd_rename,
-       .mtd_read = falcon_mtd_read,
-       .mtd_erase = falcon_mtd_erase,
-       .mtd_write = falcon_mtd_write,
-       .mtd_sync = falcon_mtd_sync,
-#endif
-
-       .revision = EFX_REV_FALCON_B0,
-       .txd_ptr_tbl_base = FR_BZ_TX_DESC_PTR_TBL,
-       .rxd_ptr_tbl_base = FR_BZ_RX_DESC_PTR_TBL,
-       .buf_tbl_base = FR_BZ_BUF_FULL_TBL,
-       .evq_ptr_tbl_base = FR_BZ_EVQ_PTR_TBL,
-       .evq_rptr_tbl_base = FR_BZ_EVQ_RPTR,
-       .max_dma_mask = DMA_BIT_MASK(FSF_AZ_TX_KER_BUF_ADDR_WIDTH),
-       .rx_prefix_size = FS_BZ_RX_PREFIX_SIZE,
-       .rx_hash_offset = FS_BZ_RX_PREFIX_HASH_OFST,
-       .rx_buffer_padding = 0,
-       .can_rx_scatter = true,
-       .max_interrupt_mode = EFX_INT_MODE_MSIX,
-       .timer_period_max =  1 << FRF_AB_TC_TIMER_VAL_WIDTH,
-       .offload_features = NETIF_F_IP_CSUM | NETIF_F_RXHASH | NETIF_F_NTUPLE,
-       .mcdi_max_ver = -1,
-       .max_rx_ip_filters = FR_BZ_RX_FILTER_TBL0_ROWS,
-};
diff --git a/drivers/net/ethernet/sfc/falcon/Kconfig b/drivers/net/ethernet/sfc/falcon/Kconfig
new file mode 100644 (file)
index 0000000..6248e96
--- /dev/null
@@ -0,0 +1,21 @@
+config SFC_FALCON
+       tristate "Solarflare SFC4000 support"
+       depends on PCI
+       select MDIO
+       select CRC32
+       select I2C
+       select I2C_ALGOBIT
+       ---help---
+         This driver supports 10-gigabit Ethernet cards based on
+         the Solarflare SFC4000 controller.
+
+         To compile this driver as a module, choose M here.  The module
+         will be called sfc-falcon.
+config SFC_FALCON_MTD
+       bool "Solarflare SFC4000 MTD support"
+       depends on SFC_FALCON && MTD && !(SFC_FALCON=y && MTD=m)
+       default y
+       ---help---
+         This exposes the on-board flash and/or EEPROM as MTD devices
+         (e.g. /dev/mtd1).  This is required to update the boot
+         configuration under Linux.
diff --git a/drivers/net/ethernet/sfc/falcon/Makefile b/drivers/net/ethernet/sfc/falcon/Makefile
new file mode 100644 (file)
index 0000000..aa1b459
--- /dev/null
@@ -0,0 +1,6 @@
+sfc-falcon-y           += efx.o nic.o farch.o falcon.o tx.o rx.o selftest.o \
+                          ethtool.o qt202x_phy.o mdio_10g.o tenxpress.o \
+                          txc43128_phy.o falcon_boards.o
+
+sfc-falcon-$(CONFIG_SFC_FALCON_MTD)    += mtd.o
+obj-$(CONFIG_SFC_FALCON)               += sfc-falcon.o
diff --git a/drivers/net/ethernet/sfc/falcon/bitfield.h b/drivers/net/ethernet/sfc/falcon/bitfield.h
new file mode 100644 (file)
index 0000000..230fd77
--- /dev/null
@@ -0,0 +1,542 @@
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2005-2006 Fen Systems Ltd.
+ * Copyright 2006-2013 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#ifndef EF4_BITFIELD_H
+#define EF4_BITFIELD_H
+
+/*
+ * Efx bitfield access
+ *
+ * Efx NICs make extensive use of bitfields up to 128 bits
+ * wide.  Since there is no native 128-bit datatype on most systems,
+ * and since 64-bit datatypes are inefficient on 32-bit systems and
+ * vice versa, we wrap accesses in a way that uses the most efficient
+ * datatype.
+ *
+ * The NICs are PCI devices and therefore little-endian.  Since most
+ * of the quantities that we deal with are DMAed to/from host memory,
+ * we define our datatypes (ef4_oword_t, ef4_qword_t and
+ * ef4_dword_t) to be little-endian.
+ */
+
+/* Lowest bit numbers and widths */
+#define EF4_DUMMY_FIELD_LBN 0
+#define EF4_DUMMY_FIELD_WIDTH 0
+#define EF4_WORD_0_LBN 0
+#define EF4_WORD_0_WIDTH 16
+#define EF4_WORD_1_LBN 16
+#define EF4_WORD_1_WIDTH 16
+#define EF4_DWORD_0_LBN 0
+#define EF4_DWORD_0_WIDTH 32
+#define EF4_DWORD_1_LBN 32
+#define EF4_DWORD_1_WIDTH 32
+#define EF4_DWORD_2_LBN 64
+#define EF4_DWORD_2_WIDTH 32
+#define EF4_DWORD_3_LBN 96
+#define EF4_DWORD_3_WIDTH 32
+#define EF4_QWORD_0_LBN 0
+#define EF4_QWORD_0_WIDTH 64
+
+/* Specified attribute (e.g. LBN) of the specified field */
+#define EF4_VAL(field, attribute) field ## _ ## attribute
+/* Low bit number of the specified field */
+#define EF4_LOW_BIT(field) EF4_VAL(field, LBN)
+/* Bit width of the specified field */
+#define EF4_WIDTH(field) EF4_VAL(field, WIDTH)
+/* High bit number of the specified field */
+#define EF4_HIGH_BIT(field) (EF4_LOW_BIT(field) + EF4_WIDTH(field) - 1)
+/* Mask equal in width to the specified field.
+ *
+ * For example, a field with width 5 would have a mask of 0x1f.
+ *
+ * The maximum width mask that can be generated is 64 bits.
+ */
+#define EF4_MASK64(width)                      \
+       ((width) == 64 ? ~((u64) 0) :           \
+        (((((u64) 1) << (width))) - 1))
+
+/* Mask equal in width to the specified field.
+ *
+ * For example, a field with width 5 would have a mask of 0x1f.
+ *
+ * The maximum width mask that can be generated is 32 bits.  Use
+ * EF4_MASK64 for higher width fields.
+ */
+#define EF4_MASK32(width)                      \
+       ((width) == 32 ? ~((u32) 0) :           \
+        (((((u32) 1) << (width))) - 1))
+
+/* A doubleword (i.e. 4 byte) datatype - little-endian in HW */
+typedef union ef4_dword {
+       __le32 u32[1];
+} ef4_dword_t;
+
+/* A quadword (i.e. 8 byte) datatype - little-endian in HW */
+typedef union ef4_qword {
+       __le64 u64[1];
+       __le32 u32[2];
+       ef4_dword_t dword[2];
+} ef4_qword_t;
+
+/* An octword (eight-word, i.e. 16 byte) datatype - little-endian in HW */
+typedef union ef4_oword {
+       __le64 u64[2];
+       ef4_qword_t qword[2];
+       __le32 u32[4];
+       ef4_dword_t dword[4];
+} ef4_oword_t;
+
+/* Format string and value expanders for printk */
+#define EF4_DWORD_FMT "%08x"
+#define EF4_QWORD_FMT "%08x:%08x"
+#define EF4_OWORD_FMT "%08x:%08x:%08x:%08x"
+#define EF4_DWORD_VAL(dword)                           \
+       ((unsigned int) le32_to_cpu((dword).u32[0]))
+#define EF4_QWORD_VAL(qword)                           \
+       ((unsigned int) le32_to_cpu((qword).u32[1])),   \
+       ((unsigned int) le32_to_cpu((qword).u32[0]))
+#define EF4_OWORD_VAL(oword)                           \
+       ((unsigned int) le32_to_cpu((oword).u32[3])),   \
+       ((unsigned int) le32_to_cpu((oword).u32[2])),   \
+       ((unsigned int) le32_to_cpu((oword).u32[1])),   \
+       ((unsigned int) le32_to_cpu((oword).u32[0]))
+
+/*
+ * Extract bit field portion [low,high) from the native-endian element
+ * which contains bits [min,max).
+ *
+ * For example, suppose "element" represents the high 32 bits of a
+ * 64-bit value, and we wish to extract the bits belonging to the bit
+ * field occupying bits 28-45 of this 64-bit value.
+ *
+ * Then EF4_EXTRACT ( element, 32, 63, 28, 45 ) would give
+ *
+ *   ( element ) << 4
+ *
+ * The result will contain the relevant bits filled in in the range
+ * [0,high-low), with garbage in bits [high-low+1,...).
+ */
+#define EF4_EXTRACT_NATIVE(native_element, min, max, low, high)                \
+       ((low) > (max) || (high) < (min) ? 0 :                          \
+        (low) > (min) ?                                                \
+        (native_element) >> ((low) - (min)) :                          \
+        (native_element) << ((min) - (low)))
+
+/*
+ * Extract bit field portion [low,high) from the 64-bit little-endian
+ * element which contains bits [min,max)
+ */
+#define EF4_EXTRACT64(element, min, max, low, high)                    \
+       EF4_EXTRACT_NATIVE(le64_to_cpu(element), min, max, low, high)
+
+/*
+ * Extract bit field portion [low,high) from the 32-bit little-endian
+ * element which contains bits [min,max)
+ */
+#define EF4_EXTRACT32(element, min, max, low, high)                    \
+       EF4_EXTRACT_NATIVE(le32_to_cpu(element), min, max, low, high)
+
+#define EF4_EXTRACT_OWORD64(oword, low, high)                          \
+       ((EF4_EXTRACT64((oword).u64[0], 0, 63, low, high) |             \
+         EF4_EXTRACT64((oword).u64[1], 64, 127, low, high)) &          \
+        EF4_MASK64((high) + 1 - (low)))
+
+#define EF4_EXTRACT_QWORD64(qword, low, high)                          \
+       (EF4_EXTRACT64((qword).u64[0], 0, 63, low, high) &              \
+        EF4_MASK64((high) + 1 - (low)))
+
+#define EF4_EXTRACT_OWORD32(oword, low, high)                          \
+       ((EF4_EXTRACT32((oword).u32[0], 0, 31, low, high) |             \
+         EF4_EXTRACT32((oword).u32[1], 32, 63, low, high) |            \
+         EF4_EXTRACT32((oword).u32[2], 64, 95, low, high) |            \
+         EF4_EXTRACT32((oword).u32[3], 96, 127, low, high)) &          \
+        EF4_MASK32((high) + 1 - (low)))
+
+#define EF4_EXTRACT_QWORD32(qword, low, high)                          \
+       ((EF4_EXTRACT32((qword).u32[0], 0, 31, low, high) |             \
+         EF4_EXTRACT32((qword).u32[1], 32, 63, low, high)) &           \
+        EF4_MASK32((high) + 1 - (low)))
+
+#define EF4_EXTRACT_DWORD(dword, low, high)                    \
+       (EF4_EXTRACT32((dword).u32[0], 0, 31, low, high) &      \
+        EF4_MASK32((high) + 1 - (low)))
+
+#define EF4_OWORD_FIELD64(oword, field)                                \
+       EF4_EXTRACT_OWORD64(oword, EF4_LOW_BIT(field),          \
+                           EF4_HIGH_BIT(field))
+
+#define EF4_QWORD_FIELD64(qword, field)                                \
+       EF4_EXTRACT_QWORD64(qword, EF4_LOW_BIT(field),          \
+                           EF4_HIGH_BIT(field))
+
+#define EF4_OWORD_FIELD32(oword, field)                                \
+       EF4_EXTRACT_OWORD32(oword, EF4_LOW_BIT(field),          \
+                           EF4_HIGH_BIT(field))
+
+#define EF4_QWORD_FIELD32(qword, field)                                \
+       EF4_EXTRACT_QWORD32(qword, EF4_LOW_BIT(field),          \
+                           EF4_HIGH_BIT(field))
+
+#define EF4_DWORD_FIELD(dword, field)                          \
+       EF4_EXTRACT_DWORD(dword, EF4_LOW_BIT(field),            \
+                         EF4_HIGH_BIT(field))
+
+#define EF4_OWORD_IS_ZERO64(oword)                                     \
+       (((oword).u64[0] | (oword).u64[1]) == (__force __le64) 0)
+
+#define EF4_QWORD_IS_ZERO64(qword)                                     \
+       (((qword).u64[0]) == (__force __le64) 0)
+
+#define EF4_OWORD_IS_ZERO32(oword)                                          \
+       (((oword).u32[0] | (oword).u32[1] | (oword).u32[2] | (oword).u32[3]) \
+        == (__force __le32) 0)
+
+#define EF4_QWORD_IS_ZERO32(qword)                                     \
+       (((qword).u32[0] | (qword).u32[1]) == (__force __le32) 0)
+
+#define EF4_DWORD_IS_ZERO(dword)                                       \
+       (((dword).u32[0]) == (__force __le32) 0)
+
+#define EF4_OWORD_IS_ALL_ONES64(oword)                                 \
+       (((oword).u64[0] & (oword).u64[1]) == ~((__force __le64) 0))
+
+#define EF4_QWORD_IS_ALL_ONES64(qword)                                 \
+       ((qword).u64[0] == ~((__force __le64) 0))
+
+#define EF4_OWORD_IS_ALL_ONES32(oword)                                 \
+       (((oword).u32[0] & (oword).u32[1] & (oword).u32[2] & (oword).u32[3]) \
+        == ~((__force __le32) 0))
+
+#define EF4_QWORD_IS_ALL_ONES32(qword)                                 \
+       (((qword).u32[0] & (qword).u32[1]) == ~((__force __le32) 0))
+
+#define EF4_DWORD_IS_ALL_ONES(dword)                                   \
+       ((dword).u32[0] == ~((__force __le32) 0))
+
+#if BITS_PER_LONG == 64
+#define EF4_OWORD_FIELD                EF4_OWORD_FIELD64
+#define EF4_QWORD_FIELD                EF4_QWORD_FIELD64
+#define EF4_OWORD_IS_ZERO      EF4_OWORD_IS_ZERO64
+#define EF4_QWORD_IS_ZERO      EF4_QWORD_IS_ZERO64
+#define EF4_OWORD_IS_ALL_ONES  EF4_OWORD_IS_ALL_ONES64
+#define EF4_QWORD_IS_ALL_ONES  EF4_QWORD_IS_ALL_ONES64
+#else
+#define EF4_OWORD_FIELD                EF4_OWORD_FIELD32
+#define EF4_QWORD_FIELD                EF4_QWORD_FIELD32
+#define EF4_OWORD_IS_ZERO      EF4_OWORD_IS_ZERO32
+#define EF4_QWORD_IS_ZERO      EF4_QWORD_IS_ZERO32
+#define EF4_OWORD_IS_ALL_ONES  EF4_OWORD_IS_ALL_ONES32
+#define EF4_QWORD_IS_ALL_ONES  EF4_QWORD_IS_ALL_ONES32
+#endif
+
+/*
+ * Construct bit field portion
+ *
+ * Creates the portion of the bit field [low,high) that lies within
+ * the range [min,max).
+ */
+#define EF4_INSERT_NATIVE64(min, max, low, high, value)                \
+       (((low > max) || (high < min)) ? 0 :                    \
+        ((low > min) ?                                         \
+         (((u64) (value)) << (low - min)) :            \
+         (((u64) (value)) >> (min - low))))
+
+#define EF4_INSERT_NATIVE32(min, max, low, high, value)                \
+       (((low > max) || (high < min)) ? 0 :                    \
+        ((low > min) ?                                         \
+         (((u32) (value)) << (low - min)) :            \
+         (((u32) (value)) >> (min - low))))
+
+#define EF4_INSERT_NATIVE(min, max, low, high, value)          \
+       ((((max - min) >= 32) || ((high - low) >= 32)) ?        \
+        EF4_INSERT_NATIVE64(min, max, low, high, value) :      \
+        EF4_INSERT_NATIVE32(min, max, low, high, value))
+
+/*
+ * Construct bit field portion
+ *
+ * Creates the portion of the named bit field that lies within the
+ * range [min,max).
+ */
+#define EF4_INSERT_FIELD_NATIVE(min, max, field, value)                \
+       EF4_INSERT_NATIVE(min, max, EF4_LOW_BIT(field),         \
+                         EF4_HIGH_BIT(field), value)
+
+/*
+ * Construct bit field
+ *
+ * Creates the portion of the named bit fields that lie within the
+ * range [min,max).
+ */
+#define EF4_INSERT_FIELDS_NATIVE(min, max,                             \
+                                field1, value1,                        \
+                                field2, value2,                        \
+                                field3, value3,                        \
+                                field4, value4,                        \
+                                field5, value5,                        \
+                                field6, value6,                        \
+                                field7, value7,                        \
+                                field8, value8,                        \
+                                field9, value9,                        \
+                                field10, value10)                      \
+       (EF4_INSERT_FIELD_NATIVE((min), (max), field1, (value1)) |      \
+        EF4_INSERT_FIELD_NATIVE((min), (max), field2, (value2)) |      \
+        EF4_INSERT_FIELD_NATIVE((min), (max), field3, (value3)) |      \
+        EF4_INSERT_FIELD_NATIVE((min), (max), field4, (value4)) |      \
+        EF4_INSERT_FIELD_NATIVE((min), (max), field5, (value5)) |      \
+        EF4_INSERT_FIELD_NATIVE((min), (max), field6, (value6)) |      \
+        EF4_INSERT_FIELD_NATIVE((min), (max), field7, (value7)) |      \
+        EF4_INSERT_FIELD_NATIVE((min), (max), field8, (value8)) |      \
+        EF4_INSERT_FIELD_NATIVE((min), (max), field9, (value9)) |      \
+        EF4_INSERT_FIELD_NATIVE((min), (max), field10, (value10)))
+
+#define EF4_INSERT_FIELDS64(...)                               \
+       cpu_to_le64(EF4_INSERT_FIELDS_NATIVE(__VA_ARGS__))
+
+#define EF4_INSERT_FIELDS32(...)                               \
+       cpu_to_le32(EF4_INSERT_FIELDS_NATIVE(__VA_ARGS__))
+
+#define EF4_POPULATE_OWORD64(oword, ...) do {                          \
+       (oword).u64[0] = EF4_INSERT_FIELDS64(0, 63, __VA_ARGS__);       \
+       (oword).u64[1] = EF4_INSERT_FIELDS64(64, 127, __VA_ARGS__);     \
+       } while (0)
+
+#define EF4_POPULATE_QWORD64(qword, ...) do {                          \
+       (qword).u64[0] = EF4_INSERT_FIELDS64(0, 63, __VA_ARGS__);       \
+       } while (0)
+
+#define EF4_POPULATE_OWORD32(oword, ...) do {                          \
+       (oword).u32[0] = EF4_INSERT_FIELDS32(0, 31, __VA_ARGS__);       \
+       (oword).u32[1] = EF4_INSERT_FIELDS32(32, 63, __VA_ARGS__);      \
+       (oword).u32[2] = EF4_INSERT_FIELDS32(64, 95, __VA_ARGS__);      \
+       (oword).u32[3] = EF4_INSERT_FIELDS32(96, 127, __VA_ARGS__);     \
+       } while (0)
+
+#define EF4_POPULATE_QWORD32(qword, ...) do {                          \
+       (qword).u32[0] = EF4_INSERT_FIELDS32(0, 31, __VA_ARGS__);       \
+       (qword).u32[1] = EF4_INSERT_FIELDS32(32, 63, __VA_ARGS__);      \
+       } while (0)
+
+#define EF4_POPULATE_DWORD(dword, ...) do {                            \
+       (dword).u32[0] = EF4_INSERT_FIELDS32(0, 31, __VA_ARGS__);       \
+       } while (0)
+
+#if BITS_PER_LONG == 64
+#define EF4_POPULATE_OWORD EF4_POPULATE_OWORD64
+#define EF4_POPULATE_QWORD EF4_POPULATE_QWORD64
+#else
+#define EF4_POPULATE_OWORD EF4_POPULATE_OWORD32
+#define EF4_POPULATE_QWORD EF4_POPULATE_QWORD32
+#endif
+
+/* Populate an octword field with various numbers of arguments */
+#define EF4_POPULATE_OWORD_10 EF4_POPULATE_OWORD
+#define EF4_POPULATE_OWORD_9(oword, ...) \
+       EF4_POPULATE_OWORD_10(oword, EF4_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EF4_POPULATE_OWORD_8(oword, ...) \
+       EF4_POPULATE_OWORD_9(oword, EF4_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EF4_POPULATE_OWORD_7(oword, ...) \
+       EF4_POPULATE_OWORD_8(oword, EF4_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EF4_POPULATE_OWORD_6(oword, ...) \
+       EF4_POPULATE_OWORD_7(oword, EF4_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EF4_POPULATE_OWORD_5(oword, ...) \
+       EF4_POPULATE_OWORD_6(oword, EF4_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EF4_POPULATE_OWORD_4(oword, ...) \
+       EF4_POPULATE_OWORD_5(oword, EF4_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EF4_POPULATE_OWORD_3(oword, ...) \
+       EF4_POPULATE_OWORD_4(oword, EF4_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EF4_POPULATE_OWORD_2(oword, ...) \
+       EF4_POPULATE_OWORD_3(oword, EF4_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EF4_POPULATE_OWORD_1(oword, ...) \
+       EF4_POPULATE_OWORD_2(oword, EF4_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EF4_ZERO_OWORD(oword) \
+       EF4_POPULATE_OWORD_1(oword, EF4_DUMMY_FIELD, 0)
+#define EF4_SET_OWORD(oword) \
+       EF4_POPULATE_OWORD_4(oword, \
+                            EF4_DWORD_0, 0xffffffff, \
+                            EF4_DWORD_1, 0xffffffff, \
+                            EF4_DWORD_2, 0xffffffff, \
+                            EF4_DWORD_3, 0xffffffff)
+
+/* Populate a quadword field with various numbers of arguments */
+#define EF4_POPULATE_QWORD_10 EF4_POPULATE_QWORD
+#define EF4_POPULATE_QWORD_9(qword, ...) \
+       EF4_POPULATE_QWORD_10(qword, EF4_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EF4_POPULATE_QWORD_8(qword, ...) \
+       EF4_POPULATE_QWORD_9(qword, EF4_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EF4_POPULATE_QWORD_7(qword, ...) \
+       EF4_POPULATE_QWORD_8(qword, EF4_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EF4_POPULATE_QWORD_6(qword, ...) \
+       EF4_POPULATE_QWORD_7(qword, EF4_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EF4_POPULATE_QWORD_5(qword, ...) \
+       EF4_POPULATE_QWORD_6(qword, EF4_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EF4_POPULATE_QWORD_4(qword, ...) \
+       EF4_POPULATE_QWORD_5(qword, EF4_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EF4_POPULATE_QWORD_3(qword, ...) \
+       EF4_POPULATE_QWORD_4(qword, EF4_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EF4_POPULATE_QWORD_2(qword, ...) \
+       EF4_POPULATE_QWORD_3(qword, EF4_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EF4_POPULATE_QWORD_1(qword, ...) \
+       EF4_POPULATE_QWORD_2(qword, EF4_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EF4_ZERO_QWORD(qword) \
+       EF4_POPULATE_QWORD_1(qword, EF4_DUMMY_FIELD, 0)
+#define EF4_SET_QWORD(qword) \
+       EF4_POPULATE_QWORD_2(qword, \
+                            EF4_DWORD_0, 0xffffffff, \
+                            EF4_DWORD_1, 0xffffffff)
+
+/* Populate a dword field with various numbers of arguments */
+#define EF4_POPULATE_DWORD_10 EF4_POPULATE_DWORD
+#define EF4_POPULATE_DWORD_9(dword, ...) \
+       EF4_POPULATE_DWORD_10(dword, EF4_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EF4_POPULATE_DWORD_8(dword, ...) \
+       EF4_POPULATE_DWORD_9(dword, EF4_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EF4_POPULATE_DWORD_7(dword, ...) \
+       EF4_POPULATE_DWORD_8(dword, EF4_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EF4_POPULATE_DWORD_6(dword, ...) \
+       EF4_POPULATE_DWORD_7(dword, EF4_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EF4_POPULATE_DWORD_5(dword, ...) \
+       EF4_POPULATE_DWORD_6(dword, EF4_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EF4_POPULATE_DWORD_4(dword, ...) \
+       EF4_POPULATE_DWORD_5(dword, EF4_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EF4_POPULATE_DWORD_3(dword, ...) \
+       EF4_POPULATE_DWORD_4(dword, EF4_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EF4_POPULATE_DWORD_2(dword, ...) \
+       EF4_POPULATE_DWORD_3(dword, EF4_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EF4_POPULATE_DWORD_1(dword, ...) \
+       EF4_POPULATE_DWORD_2(dword, EF4_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EF4_ZERO_DWORD(dword) \
+       EF4_POPULATE_DWORD_1(dword, EF4_DUMMY_FIELD, 0)
+#define EF4_SET_DWORD(dword) \
+       EF4_POPULATE_DWORD_1(dword, EF4_DWORD_0, 0xffffffff)
+
+/*
+ * Modify a named field within an already-populated structure.  Used
+ * for read-modify-write operations.
+ *
+ */
+#define EF4_INVERT_OWORD(oword) do {           \
+       (oword).u64[0] = ~((oword).u64[0]);     \
+       (oword).u64[1] = ~((oword).u64[1]);     \
+       } while (0)
+
+#define EF4_AND_OWORD(oword, from, mask)                       \
+       do {                                                    \
+               (oword).u64[0] = (from).u64[0] & (mask).u64[0]; \
+               (oword).u64[1] = (from).u64[1] & (mask).u64[1]; \
+       } while (0)
+
+#define EF4_OR_OWORD(oword, from, mask)                                \
+       do {                                                    \
+               (oword).u64[0] = (from).u64[0] | (mask).u64[0]; \
+               (oword).u64[1] = (from).u64[1] | (mask).u64[1]; \
+       } while (0)
+
+#define EF4_INSERT64(min, max, low, high, value)                       \
+       cpu_to_le64(EF4_INSERT_NATIVE(min, max, low, high, value))
+
+#define EF4_INSERT32(min, max, low, high, value)                       \
+       cpu_to_le32(EF4_INSERT_NATIVE(min, max, low, high, value))
+
+#define EF4_INPLACE_MASK64(min, max, low, high)                                \
+       EF4_INSERT64(min, max, low, high, EF4_MASK64((high) + 1 - (low)))
+
+#define EF4_INPLACE_MASK32(min, max, low, high)                                \
+       EF4_INSERT32(min, max, low, high, EF4_MASK32((high) + 1 - (low)))
+
+#define EF4_SET_OWORD64(oword, low, high, value) do {                  \
+       (oword).u64[0] = (((oword).u64[0]                               \
+                          & ~EF4_INPLACE_MASK64(0,  63, low, high))    \
+                         | EF4_INSERT64(0,  63, low, high, value));    \
+       (oword).u64[1] = (((oword).u64[1]                               \
+                          & ~EF4_INPLACE_MASK64(64, 127, low, high))   \
+                         | EF4_INSERT64(64, 127, low, high, value));   \
+       } while (0)
+
+#define EF4_SET_QWORD64(qword, low, high, value) do {                  \
+       (qword).u64[0] = (((qword).u64[0]                               \
+                          & ~EF4_INPLACE_MASK64(0, 63, low, high))     \
+                         | EF4_INSERT64(0, 63, low, high, value));     \
+       } while (0)
+
+#define EF4_SET_OWORD32(oword, low, high, value) do {                  \
+       (oword).u32[0] = (((oword).u32[0]                               \
+                          & ~EF4_INPLACE_MASK32(0, 31, low, high))     \
+                         | EF4_INSERT32(0, 31, low, high, value));     \
+       (oword).u32[1] = (((oword).u32[1]                               \
+                          & ~EF4_INPLACE_MASK32(32, 63, low, high))    \
+                         | EF4_INSERT32(32, 63, low, high, value));    \
+       (oword).u32[2] = (((oword).u32[2]                               \
+                          & ~EF4_INPLACE_MASK32(64, 95, low, high))    \
+                         | EF4_INSERT32(64, 95, low, high, value));    \
+       (oword).u32[3] = (((oword).u32[3]                               \
+                          & ~EF4_INPLACE_MASK32(96, 127, low, high))   \
+                         | EF4_INSERT32(96, 127, low, high, value));   \
+       } while (0)
+
+#define EF4_SET_QWORD32(qword, low, high, value) do {                  \
+       (qword).u32[0] = (((qword).u32[0]                               \
+                          & ~EF4_INPLACE_MASK32(0, 31, low, high))     \
+                         | EF4_INSERT32(0, 31, low, high, value));     \
+       (qword).u32[1] = (((qword).u32[1]                               \
+                          & ~EF4_INPLACE_MASK32(32, 63, low, high))    \
+                         | EF4_INSERT32(32, 63, low, high, value));    \
+       } while (0)
+
+#define EF4_SET_DWORD32(dword, low, high, value) do {                  \
+       (dword).u32[0] = (((dword).u32[0]                               \
+                          & ~EF4_INPLACE_MASK32(0, 31, low, high))     \
+                         | EF4_INSERT32(0, 31, low, high, value));     \
+       } while (0)
+
+#define EF4_SET_OWORD_FIELD64(oword, field, value)                     \
+       EF4_SET_OWORD64(oword, EF4_LOW_BIT(field),                      \
+                        EF4_HIGH_BIT(field), value)
+
+#define EF4_SET_QWORD_FIELD64(qword, field, value)                     \
+       EF4_SET_QWORD64(qword, EF4_LOW_BIT(field),                      \
+                        EF4_HIGH_BIT(field), value)
+
+#define EF4_SET_OWORD_FIELD32(oword, field, value)                     \
+       EF4_SET_OWORD32(oword, EF4_LOW_BIT(field),                      \
+                        EF4_HIGH_BIT(field), value)
+
+#define EF4_SET_QWORD_FIELD32(qword, field, value)                     \
+       EF4_SET_QWORD32(qword, EF4_LOW_BIT(field),                      \
+                        EF4_HIGH_BIT(field), value)
+
+#define EF4_SET_DWORD_FIELD(dword, field, value)                       \
+       EF4_SET_DWORD32(dword, EF4_LOW_BIT(field),                      \
+                        EF4_HIGH_BIT(field), value)
+
+
+
+#if BITS_PER_LONG == 64
+#define EF4_SET_OWORD_FIELD EF4_SET_OWORD_FIELD64
+#define EF4_SET_QWORD_FIELD EF4_SET_QWORD_FIELD64
+#else
+#define EF4_SET_OWORD_FIELD EF4_SET_OWORD_FIELD32
+#define EF4_SET_QWORD_FIELD EF4_SET_QWORD_FIELD32
+#endif
+
+/* Used to avoid compiler warnings about shift range exceeding width
+ * of the data types when dma_addr_t is only 32 bits wide.
+ */
+#define DMA_ADDR_T_WIDTH       (8 * sizeof(dma_addr_t))
+#define EF4_DMA_TYPE_WIDTH(width) \
+       (((width) < DMA_ADDR_T_WIDTH) ? (width) : DMA_ADDR_T_WIDTH)
+
+
+/* Static initialiser */
+#define EF4_OWORD32(a, b, c, d)                                \
+       { .u32 = { cpu_to_le32(a), cpu_to_le32(b),      \
+                  cpu_to_le32(c), cpu_to_le32(d) } }
+
+#endif /* EF4_BITFIELD_H */
diff --git a/drivers/net/ethernet/sfc/falcon/efx.c b/drivers/net/ethernet/sfc/falcon/efx.c
new file mode 100644 (file)
index 0000000..5c5cb3c
--- /dev/null
@@ -0,0 +1,3350 @@
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2005-2006 Fen Systems Ltd.
+ * Copyright 2005-2013 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/delay.h>
+#include <linux/notifier.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/in.h>
+#include <linux/ethtool.h>
+#include <linux/topology.h>
+#include <linux/gfp.h>
+#include <linux/aer.h>
+#include <linux/interrupt.h>
+#include "net_driver.h"
+#include "efx.h"
+#include "nic.h"
+#include "selftest.h"
+
+#include "workarounds.h"
+
+/**************************************************************************
+ *
+ * Type name strings
+ *
+ **************************************************************************
+ */
+
+/* Loopback mode names (see LOOPBACK_MODE()) */
+const unsigned int ef4_loopback_mode_max = LOOPBACK_MAX;
+const char *const ef4_loopback_mode_names[] = {
+       [LOOPBACK_NONE]         = "NONE",
+       [LOOPBACK_DATA]         = "DATAPATH",
+       [LOOPBACK_GMAC]         = "GMAC",
+       [LOOPBACK_XGMII]        = "XGMII",
+       [LOOPBACK_XGXS]         = "XGXS",
+       [LOOPBACK_XAUI]         = "XAUI",
+       [LOOPBACK_GMII]         = "GMII",
+       [LOOPBACK_SGMII]        = "SGMII",
+       [LOOPBACK_XGBR]         = "XGBR",
+       [LOOPBACK_XFI]          = "XFI",
+       [LOOPBACK_XAUI_FAR]     = "XAUI_FAR",
+       [LOOPBACK_GMII_FAR]     = "GMII_FAR",
+       [LOOPBACK_SGMII_FAR]    = "SGMII_FAR",
+       [LOOPBACK_XFI_FAR]      = "XFI_FAR",
+       [LOOPBACK_GPHY]         = "GPHY",
+       [LOOPBACK_PHYXS]        = "PHYXS",
+       [LOOPBACK_PCS]          = "PCS",
+       [LOOPBACK_PMAPMD]       = "PMA/PMD",
+       [LOOPBACK_XPORT]        = "XPORT",
+       [LOOPBACK_XGMII_WS]     = "XGMII_WS",
+       [LOOPBACK_XAUI_WS]      = "XAUI_WS",
+       [LOOPBACK_XAUI_WS_FAR]  = "XAUI_WS_FAR",
+       [LOOPBACK_XAUI_WS_NEAR] = "XAUI_WS_NEAR",
+       [LOOPBACK_GMII_WS]      = "GMII_WS",
+       [LOOPBACK_XFI_WS]       = "XFI_WS",
+       [LOOPBACK_XFI_WS_FAR]   = "XFI_WS_FAR",
+       [LOOPBACK_PHYXS_WS]     = "PHYXS_WS",
+};
+
+const unsigned int ef4_reset_type_max = RESET_TYPE_MAX;
+const char *const ef4_reset_type_names[] = {
+       [RESET_TYPE_INVISIBLE]          = "INVISIBLE",
+       [RESET_TYPE_ALL]                = "ALL",
+       [RESET_TYPE_RECOVER_OR_ALL]     = "RECOVER_OR_ALL",
+       [RESET_TYPE_WORLD]              = "WORLD",
+       [RESET_TYPE_RECOVER_OR_DISABLE] = "RECOVER_OR_DISABLE",
+       [RESET_TYPE_DATAPATH]           = "DATAPATH",
+       [RESET_TYPE_DISABLE]            = "DISABLE",
+       [RESET_TYPE_TX_WATCHDOG]        = "TX_WATCHDOG",
+       [RESET_TYPE_INT_ERROR]          = "INT_ERROR",
+       [RESET_TYPE_RX_RECOVERY]        = "RX_RECOVERY",
+       [RESET_TYPE_DMA_ERROR]          = "DMA_ERROR",
+       [RESET_TYPE_TX_SKIP]            = "TX_SKIP",
+};
+
+/* Reset workqueue. If any NIC has a hardware failure then a reset will be
+ * queued onto this work queue. This is not a per-nic work queue, because
+ * ef4_reset_work() acquires the rtnl lock, so resets are naturally serialised.
+ */
+static struct workqueue_struct *reset_workqueue;
+
+/* How often and how many times to poll for a reset while waiting for a
+ * BIST that another function started to complete.
+ */
+#define BIST_WAIT_DELAY_MS     100
+#define BIST_WAIT_DELAY_COUNT  100
+
+/**************************************************************************
+ *
+ * Configurable values
+ *
+ *************************************************************************/
+
+/*
+ * Use separate channels for TX and RX events
+ *
+ * Set this to 1 to use separate channels for TX and RX. It allows us
+ * to control interrupt affinity separately for TX and RX.
+ *
+ * This is only used in MSI-X interrupt mode
+ */
+bool ef4_separate_tx_channels;
+module_param(ef4_separate_tx_channels, bool, 0444);
+MODULE_PARM_DESC(ef4_separate_tx_channels,
+                "Use separate channels for TX and RX");
+
+/* This is the weight assigned to each of the (per-channel) virtual
+ * NAPI devices.
+ */
+static int napi_weight = 64;
+
+/* This is the time (in jiffies) between invocations of the hardware
+ * monitor.
+ * On Falcon-based NICs, this will:
+ * - Check the on-board hardware monitor;
+ * - Poll the link state and reconfigure the hardware as necessary.
+ * On Siena-based NICs for power systems with EEH support, this will give EEH a
+ * chance to start.
+ */
+static unsigned int ef4_monitor_interval = 1 * HZ;
+
+/* Initial interrupt moderation settings.  They can be modified after
+ * module load with ethtool.
+ *
+ * The default for RX should strike a balance between increasing the
+ * round-trip latency and reducing overhead.
+ */
+static unsigned int rx_irq_mod_usec = 60;
+
+/* Initial interrupt moderation settings.  They can be modified after
+ * module load with ethtool.
+ *
+ * This default is chosen to ensure that a 10G link does not go idle
+ * while a TX queue is stopped after it has become full.  A queue is
+ * restarted when it drops below half full.  The time this takes (assuming
+ * worst case 3 descriptors per packet and 1024 descriptors) is
+ *   512 / 3 * 1.2 = 205 usec.
+ */
+static unsigned int tx_irq_mod_usec = 150;
+
+/* This is the first interrupt mode to try out of:
+ * 0 => MSI-X
+ * 1 => MSI
+ * 2 => legacy
+ */
+static unsigned int interrupt_mode;
+
+/* This is the requested number of CPUs to use for Receive-Side Scaling (RSS),
+ * i.e. the number of CPUs among which we may distribute simultaneous
+ * interrupt handling.
+ *
+ * Cards without MSI-X will only target one CPU via legacy or MSI interrupt.
+ * The default (0) means to assign an interrupt to each core.
+ */
+static unsigned int rss_cpus;
+module_param(rss_cpus, uint, 0444);
+MODULE_PARM_DESC(rss_cpus, "Number of CPUs to use for Receive-Side Scaling");
+
+static bool phy_flash_cfg;
+module_param(phy_flash_cfg, bool, 0644);
+MODULE_PARM_DESC(phy_flash_cfg, "Set PHYs into reflash mode initially");
+
+static unsigned irq_adapt_low_thresh = 8000;
+module_param(irq_adapt_low_thresh, uint, 0644);
+MODULE_PARM_DESC(irq_adapt_low_thresh,
+                "Threshold score for reducing IRQ moderation");
+
+static unsigned irq_adapt_high_thresh = 16000;
+module_param(irq_adapt_high_thresh, uint, 0644);
+MODULE_PARM_DESC(irq_adapt_high_thresh,
+                "Threshold score for increasing IRQ moderation");
+
+static unsigned debug = (NETIF_MSG_DRV | NETIF_MSG_PROBE |
+                        NETIF_MSG_LINK | NETIF_MSG_IFDOWN |
+                        NETIF_MSG_IFUP | NETIF_MSG_RX_ERR |
+                        NETIF_MSG_TX_ERR | NETIF_MSG_HW);
+module_param(debug, uint, 0);
+MODULE_PARM_DESC(debug, "Bitmapped debugging message enable value");
+
+/**************************************************************************
+ *
+ * Utility functions and prototypes
+ *
+ *************************************************************************/
+
+static int ef4_soft_enable_interrupts(struct ef4_nic *efx);
+static void ef4_soft_disable_interrupts(struct ef4_nic *efx);
+static void ef4_remove_channel(struct ef4_channel *channel);
+static void ef4_remove_channels(struct ef4_nic *efx);
+static const struct ef4_channel_type ef4_default_channel_type;
+static void ef4_remove_port(struct ef4_nic *efx);
+static void ef4_init_napi_channel(struct ef4_channel *channel);
+static void ef4_fini_napi(struct ef4_nic *efx);
+static void ef4_fini_napi_channel(struct ef4_channel *channel);
+static void ef4_fini_struct(struct ef4_nic *efx);
+static void ef4_start_all(struct ef4_nic *efx);
+static void ef4_stop_all(struct ef4_nic *efx);
+
+#define EF4_ASSERT_RESET_SERIALISED(efx)               \
+       do {                                            \
+               if ((efx->state == STATE_READY) ||      \
+                   (efx->state == STATE_RECOVERY) ||   \
+                   (efx->state == STATE_DISABLED))     \
+                       ASSERT_RTNL();                  \
+       } while (0)
+
+static int ef4_check_disabled(struct ef4_nic *efx)
+{
+       if (efx->state == STATE_DISABLED || efx->state == STATE_RECOVERY) {
+               netif_err(efx, drv, efx->net_dev,
+                         "device is disabled due to earlier errors\n");
+               return -EIO;
+       }
+       return 0;
+}
+
+/**************************************************************************
+ *
+ * Event queue processing
+ *
+ *************************************************************************/
+
+/* Process channel's event queue
+ *
+ * This function is responsible for processing the event queue of a
+ * single channel.  The caller must guarantee that this function will
+ * never be concurrently called more than once on the same channel,
+ * though different channels may be being processed concurrently.
+ */
+static int ef4_process_channel(struct ef4_channel *channel, int budget)
+{
+       struct ef4_tx_queue *tx_queue;
+       int spent;
+
+       if (unlikely(!channel->enabled))
+               return 0;
+
+       ef4_for_each_channel_tx_queue(tx_queue, channel) {
+               tx_queue->pkts_compl = 0;
+               tx_queue->bytes_compl = 0;
+       }
+
+       spent = ef4_nic_process_eventq(channel, budget);
+       if (spent && ef4_channel_has_rx_queue(channel)) {
+               struct ef4_rx_queue *rx_queue =
+                       ef4_channel_get_rx_queue(channel);
+
+               ef4_rx_flush_packet(channel);
+               ef4_fast_push_rx_descriptors(rx_queue, true);
+       }
+
+       /* Update BQL */
+       ef4_for_each_channel_tx_queue(tx_queue, channel) {
+               if (tx_queue->bytes_compl) {
+                       netdev_tx_completed_queue(tx_queue->core_txq,
+                               tx_queue->pkts_compl, tx_queue->bytes_compl);
+               }
+       }
+
+       return spent;
+}
+
+/* NAPI poll handler
+ *
+ * NAPI guarantees serialisation of polls of the same device, which
+ * provides the guarantee required by ef4_process_channel().
+ */
+static void ef4_update_irq_mod(struct ef4_nic *efx, struct ef4_channel *channel)
+{
+       int step = efx->irq_mod_step_us;
+
+       if (channel->irq_mod_score < irq_adapt_low_thresh) {
+               if (channel->irq_moderation_us > step) {
+                       channel->irq_moderation_us -= step;
+                       efx->type->push_irq_moderation(channel);
+               }
+       } else if (channel->irq_mod_score > irq_adapt_high_thresh) {
+               if (channel->irq_moderation_us <
+                   efx->irq_rx_moderation_us) {
+                       channel->irq_moderation_us += step;
+                       efx->type->push_irq_moderation(channel);
+               }
+       }
+
+       channel->irq_count = 0;
+       channel->irq_mod_score = 0;
+}
+
+static int ef4_poll(struct napi_struct *napi, int budget)
+{
+       struct ef4_channel *channel =
+               container_of(napi, struct ef4_channel, napi_str);
+       struct ef4_nic *efx = channel->efx;
+       int spent;
+
+       if (!ef4_channel_lock_napi(channel))
+               return budget;
+
+       netif_vdbg(efx, intr, efx->net_dev,
+                  "channel %d NAPI poll executing on CPU %d\n",
+                  channel->channel, raw_smp_processor_id());
+
+       spent = ef4_process_channel(channel, budget);
+
+       if (spent < budget) {
+               if (ef4_channel_has_rx_queue(channel) &&
+                   efx->irq_rx_adaptive &&
+                   unlikely(++channel->irq_count == 1000)) {
+                       ef4_update_irq_mod(efx, channel);
+               }
+
+               ef4_filter_rfs_expire(channel);
+
+               /* There is no race here; although napi_disable() will
+                * only wait for napi_complete(), this isn't a problem
+                * since ef4_nic_eventq_read_ack() will have no effect if
+                * interrupts have already been disabled.
+                */
+               napi_complete(napi);
+               ef4_nic_eventq_read_ack(channel);
+       }
+
+       ef4_channel_unlock_napi(channel);
+       return spent;
+}
+
+/* Create event queue
+ * Event queue memory allocations are done only once.  If the channel
+ * is reset, the memory buffer will be reused; this guards against
+ * errors during channel reset and also simplifies interrupt handling.
+ */
+static int ef4_probe_eventq(struct ef4_channel *channel)
+{
+       struct ef4_nic *efx = channel->efx;
+       unsigned long entries;
+
+       netif_dbg(efx, probe, efx->net_dev,
+                 "chan %d create event queue\n", channel->channel);
+
+       /* Build an event queue with room for one event per tx and rx buffer,
+        * plus some extra for link state events and MCDI completions. */
+       entries = roundup_pow_of_two(efx->rxq_entries + efx->txq_entries + 128);
+       EF4_BUG_ON_PARANOID(entries > EF4_MAX_EVQ_SIZE);
+       channel->eventq_mask = max(entries, EF4_MIN_EVQ_SIZE) - 1;
+
+       return ef4_nic_probe_eventq(channel);
+}
+
+/* Prepare channel's event queue */
+static int ef4_init_eventq(struct ef4_channel *channel)
+{
+       struct ef4_nic *efx = channel->efx;
+       int rc;
+
+       EF4_WARN_ON_PARANOID(channel->eventq_init);
+
+       netif_dbg(efx, drv, efx->net_dev,
+                 "chan %d init event queue\n", channel->channel);
+
+       rc = ef4_nic_init_eventq(channel);
+       if (rc == 0) {
+               efx->type->push_irq_moderation(channel);
+               channel->eventq_read_ptr = 0;
+               channel->eventq_init = true;
+       }
+       return rc;
+}
+
+/* Enable event queue processing and NAPI */
+void ef4_start_eventq(struct ef4_channel *channel)
+{
+       netif_dbg(channel->efx, ifup, channel->efx->net_dev,
+                 "chan %d start event queue\n", channel->channel);
+
+       /* Make sure the NAPI handler sees the enabled flag set */
+       channel->enabled = true;
+       smp_wmb();
+
+       ef4_channel_enable(channel);
+       napi_enable(&channel->napi_str);
+       ef4_nic_eventq_read_ack(channel);
+}
+
+/* Disable event queue processing and NAPI */
+void ef4_stop_eventq(struct ef4_channel *channel)
+{
+       if (!channel->enabled)
+               return;
+
+       napi_disable(&channel->napi_str);
+       while (!ef4_channel_disable(channel))
+               usleep_range(1000, 20000);
+       channel->enabled = false;
+}
+
+static void ef4_fini_eventq(struct ef4_channel *channel)
+{
+       if (!channel->eventq_init)
+               return;
+
+       netif_dbg(channel->efx, drv, channel->efx->net_dev,
+                 "chan %d fini event queue\n", channel->channel);
+
+       ef4_nic_fini_eventq(channel);
+       channel->eventq_init = false;
+}
+
+static void ef4_remove_eventq(struct ef4_channel *channel)
+{
+       netif_dbg(channel->efx, drv, channel->efx->net_dev,
+                 "chan %d remove event queue\n", channel->channel);
+
+       ef4_nic_remove_eventq(channel);
+}
+
+/**************************************************************************
+ *
+ * Channel handling
+ *
+ *************************************************************************/
+
+/* Allocate and initialise a channel structure. */
+static struct ef4_channel *
+ef4_alloc_channel(struct ef4_nic *efx, int i, struct ef4_channel *old_channel)
+{
+       struct ef4_channel *channel;
+       struct ef4_rx_queue *rx_queue;
+       struct ef4_tx_queue *tx_queue;
+       int j;
+
+       channel = kzalloc(sizeof(*channel), GFP_KERNEL);
+       if (!channel)
+               return NULL;
+
+       channel->efx = efx;
+       channel->channel = i;
+       channel->type = &ef4_default_channel_type;
+
+       for (j = 0; j < EF4_TXQ_TYPES; j++) {
+               tx_queue = &channel->tx_queue[j];
+               tx_queue->efx = efx;
+               tx_queue->queue = i * EF4_TXQ_TYPES + j;
+               tx_queue->channel = channel;
+       }
+
+       rx_queue = &channel->rx_queue;
+       rx_queue->efx = efx;
+       setup_timer(&rx_queue->slow_fill, ef4_rx_slow_fill,
+                   (unsigned long)rx_queue);
+
+       return channel;
+}
+
+/* Allocate and initialise a channel structure, copying parameters
+ * (but not resources) from an old channel structure.
+ */
+static struct ef4_channel *
+ef4_copy_channel(const struct ef4_channel *old_channel)
+{
+       struct ef4_channel *channel;
+       struct ef4_rx_queue *rx_queue;
+       struct ef4_tx_queue *tx_queue;
+       int j;
+
+       channel = kmalloc(sizeof(*channel), GFP_KERNEL);
+       if (!channel)
+               return NULL;
+
+       *channel = *old_channel;
+
+       channel->napi_dev = NULL;
+       INIT_HLIST_NODE(&channel->napi_str.napi_hash_node);
+       channel->napi_str.napi_id = 0;
+       channel->napi_str.state = 0;
+       memset(&channel->eventq, 0, sizeof(channel->eventq));
+
+       for (j = 0; j < EF4_TXQ_TYPES; j++) {
+               tx_queue = &channel->tx_queue[j];
+               if (tx_queue->channel)
+                       tx_queue->channel = channel;
+               tx_queue->buffer = NULL;
+               memset(&tx_queue->txd, 0, sizeof(tx_queue->txd));
+       }
+
+       rx_queue = &channel->rx_queue;
+       rx_queue->buffer = NULL;
+       memset(&rx_queue->rxd, 0, sizeof(rx_queue->rxd));
+       setup_timer(&rx_queue->slow_fill, ef4_rx_slow_fill,
+                   (unsigned long)rx_queue);
+
+       return channel;
+}
+
+static int ef4_probe_channel(struct ef4_channel *channel)
+{
+       struct ef4_tx_queue *tx_queue;
+       struct ef4_rx_queue *rx_queue;
+       int rc;
+
+       netif_dbg(channel->efx, probe, channel->efx->net_dev,
+                 "creating channel %d\n", channel->channel);
+
+       rc = channel->type->pre_probe(channel);
+       if (rc)
+               goto fail;
+
+       rc = ef4_probe_eventq(channel);
+       if (rc)
+               goto fail;
+
+       ef4_for_each_channel_tx_queue(tx_queue, channel) {
+               rc = ef4_probe_tx_queue(tx_queue);
+               if (rc)
+                       goto fail;
+       }
+
+       ef4_for_each_channel_rx_queue(rx_queue, channel) {
+               rc = ef4_probe_rx_queue(rx_queue);
+               if (rc)
+                       goto fail;
+       }
+
+       return 0;
+
+fail:
+       ef4_remove_channel(channel);
+       return rc;
+}
+
+static void
+ef4_get_channel_name(struct ef4_channel *channel, char *buf, size_t len)
+{
+       struct ef4_nic *efx = channel->efx;
+       const char *type;
+       int number;
+
+       number = channel->channel;
+       if (efx->tx_channel_offset == 0) {
+               type = "";
+       } else if (channel->channel < efx->tx_channel_offset) {
+               type = "-rx";
+       } else {
+               type = "-tx";
+               number -= efx->tx_channel_offset;
+       }
+       snprintf(buf, len, "%s%s-%d", efx->name, type, number);
+}
+
+static void ef4_set_channel_names(struct ef4_nic *efx)
+{
+       struct ef4_channel *channel;
+
+       ef4_for_each_channel(channel, efx)
+               channel->type->get_name(channel,
+                                       efx->msi_context[channel->channel].name,
+                                       sizeof(efx->msi_context[0].name));
+}
+
+static int ef4_probe_channels(struct ef4_nic *efx)
+{
+       struct ef4_channel *channel;
+       int rc;
+
+       /* Restart special buffer allocation */
+       efx->next_buffer_table = 0;
+
+       /* Probe channels in reverse, so that any 'extra' channels
+        * use the start of the buffer table. This allows the traffic
+        * channels to be resized without moving them or wasting the
+        * entries before them.
+        */
+       ef4_for_each_channel_rev(channel, efx) {
+               rc = ef4_probe_channel(channel);
+               if (rc) {
+                       netif_err(efx, probe, efx->net_dev,
+                                 "failed to create channel %d\n",
+                                 channel->channel);
+                       goto fail;
+               }
+       }
+       ef4_set_channel_names(efx);
+
+       return 0;
+
+fail:
+       ef4_remove_channels(efx);
+       return rc;
+}
+
+/* Channels are shutdown and reinitialised whilst the NIC is running
+ * to propagate configuration changes (mtu, checksum offload), or
+ * to clear hardware error conditions
+ */
+static void ef4_start_datapath(struct ef4_nic *efx)
+{
+       netdev_features_t old_features = efx->net_dev->features;
+       bool old_rx_scatter = efx->rx_scatter;
+       struct ef4_tx_queue *tx_queue;
+       struct ef4_rx_queue *rx_queue;
+       struct ef4_channel *channel;
+       size_t rx_buf_len;
+
+       /* Calculate the rx buffer allocation parameters required to
+        * support the current MTU, including padding for header
+        * alignment and overruns.
+        */
+       efx->rx_dma_len = (efx->rx_prefix_size +
+                          EF4_MAX_FRAME_LEN(efx->net_dev->mtu) +
+                          efx->type->rx_buffer_padding);
+       rx_buf_len = (sizeof(struct ef4_rx_page_state) +
+                     efx->rx_ip_align + efx->rx_dma_len);
+       if (rx_buf_len <= PAGE_SIZE) {
+               efx->rx_scatter = efx->type->always_rx_scatter;
+               efx->rx_buffer_order = 0;
+       } else if (efx->type->can_rx_scatter) {
+               BUILD_BUG_ON(EF4_RX_USR_BUF_SIZE % L1_CACHE_BYTES);
+               BUILD_BUG_ON(sizeof(struct ef4_rx_page_state) +
+                            2 * ALIGN(NET_IP_ALIGN + EF4_RX_USR_BUF_SIZE,
+                                      EF4_RX_BUF_ALIGNMENT) >
+                            PAGE_SIZE);
+               efx->rx_scatter = true;
+               efx->rx_dma_len = EF4_RX_USR_BUF_SIZE;
+               efx->rx_buffer_order = 0;
+       } else {
+               efx->rx_scatter = false;
+               efx->rx_buffer_order = get_order(rx_buf_len);
+       }
+
+       ef4_rx_config_page_split(efx);
+       if (efx->rx_buffer_order)
+               netif_dbg(efx, drv, efx->net_dev,
+                         "RX buf len=%u; page order=%u batch=%u\n",
+                         efx->rx_dma_len, efx->rx_buffer_order,
+                         efx->rx_pages_per_batch);
+       else
+               netif_dbg(efx, drv, efx->net_dev,
+                         "RX buf len=%u step=%u bpp=%u; page batch=%u\n",
+                         efx->rx_dma_len, efx->rx_page_buf_step,
+                         efx->rx_bufs_per_page, efx->rx_pages_per_batch);
+
+       /* Restore previously fixed features in hw_features and remove
+        * features which are fixed now
+        */
+       efx->net_dev->hw_features |= efx->net_dev->features;
+       efx->net_dev->hw_features &= ~efx->fixed_features;
+       efx->net_dev->features |= efx->fixed_features;
+       if (efx->net_dev->features != old_features)
+               netdev_features_change(efx->net_dev);
+
+       /* RX filters may also have scatter-enabled flags */
+       if (efx->rx_scatter != old_rx_scatter)
+               efx->type->filter_update_rx_scatter(efx);
+
+       /* We must keep at least one descriptor in a TX ring empty.
+        * We could avoid this when the queue size does not exactly
+        * match the hardware ring size, but it's not that important.
+        * Therefore we stop the queue when one more skb might fill
+        * the ring completely.  We wake it when half way back to
+        * empty.
+        */
+       efx->txq_stop_thresh = efx->txq_entries - ef4_tx_max_skb_descs(efx);
+       efx->txq_wake_thresh = efx->txq_stop_thresh / 2;
+
+       /* Initialise the channels */
+       ef4_for_each_channel(channel, efx) {
+               ef4_for_each_channel_tx_queue(tx_queue, channel) {
+                       ef4_init_tx_queue(tx_queue);
+                       atomic_inc(&efx->active_queues);
+               }
+
+               ef4_for_each_channel_rx_queue(rx_queue, channel) {
+                       ef4_init_rx_queue(rx_queue);
+                       atomic_inc(&efx->active_queues);
+                       ef4_stop_eventq(channel);
+                       ef4_fast_push_rx_descriptors(rx_queue, false);
+                       ef4_start_eventq(channel);
+               }
+
+               WARN_ON(channel->rx_pkt_n_frags);
+       }
+
+       if (netif_device_present(efx->net_dev))
+               netif_tx_wake_all_queues(efx->net_dev);
+}
+
+static void ef4_stop_datapath(struct ef4_nic *efx)
+{
+       struct ef4_channel *channel;
+       struct ef4_tx_queue *tx_queue;
+       struct ef4_rx_queue *rx_queue;
+       int rc;
+
+       EF4_ASSERT_RESET_SERIALISED(efx);
+       BUG_ON(efx->port_enabled);
+
+       /* Stop RX refill */
+       ef4_for_each_channel(channel, efx) {
+               ef4_for_each_channel_rx_queue(rx_queue, channel)
+                       rx_queue->refill_enabled = false;
+       }
+
+       ef4_for_each_channel(channel, efx) {
+               /* RX packet processing is pipelined, so wait for the
+                * NAPI handler to complete.  At least event queue 0
+                * might be kept active by non-data events, so don't
+                * use napi_synchronize() but actually disable NAPI
+                * temporarily.
+                */
+               if (ef4_channel_has_rx_queue(channel)) {
+                       ef4_stop_eventq(channel);
+                       ef4_start_eventq(channel);
+               }
+       }
+
+       rc = efx->type->fini_dmaq(efx);
+       if (rc && EF4_WORKAROUND_7803(efx)) {
+               /* Schedule a reset to recover from the flush failure. The
+                * descriptor caches reference memory we're about to free,
+                * but falcon_reconfigure_mac_wrapper() won't reconnect
+                * the MACs because of the pending reset.
+                */
+               netif_err(efx, drv, efx->net_dev,
+                         "Resetting to recover from flush failure\n");
+               ef4_schedule_reset(efx, RESET_TYPE_ALL);
+       } else if (rc) {
+               netif_err(efx, drv, efx->net_dev, "failed to flush queues\n");
+       } else {
+               netif_dbg(efx, drv, efx->net_dev,
+                         "successfully flushed all queues\n");
+       }
+
+       ef4_for_each_channel(channel, efx) {
+               ef4_for_each_channel_rx_queue(rx_queue, channel)
+                       ef4_fini_rx_queue(rx_queue);
+               ef4_for_each_possible_channel_tx_queue(tx_queue, channel)
+                       ef4_fini_tx_queue(tx_queue);
+       }
+}
+
+static void ef4_remove_channel(struct ef4_channel *channel)
+{
+       struct ef4_tx_queue *tx_queue;
+       struct ef4_rx_queue *rx_queue;
+
+       netif_dbg(channel->efx, drv, channel->efx->net_dev,
+                 "destroy chan %d\n", channel->channel);
+
+       ef4_for_each_channel_rx_queue(rx_queue, channel)
+               ef4_remove_rx_queue(rx_queue);
+       ef4_for_each_possible_channel_tx_queue(tx_queue, channel)
+               ef4_remove_tx_queue(tx_queue);
+       ef4_remove_eventq(channel);
+       channel->type->post_remove(channel);
+}
+
+static void ef4_remove_channels(struct ef4_nic *efx)
+{
+       struct ef4_channel *channel;
+
+       ef4_for_each_channel(channel, efx)
+               ef4_remove_channel(channel);
+}
+
+int
+ef4_realloc_channels(struct ef4_nic *efx, u32 rxq_entries, u32 txq_entries)
+{
+       struct ef4_channel *other_channel[EF4_MAX_CHANNELS], *channel;
+       u32 old_rxq_entries, old_txq_entries;
+       unsigned i, next_buffer_table = 0;
+       int rc, rc2;
+
+       rc = ef4_check_disabled(efx);
+       if (rc)
+               return rc;
+
+       /* Not all channels should be reallocated. We must avoid
+        * reallocating their buffer table entries.
+        */
+       ef4_for_each_channel(channel, efx) {
+               struct ef4_rx_queue *rx_queue;
+               struct ef4_tx_queue *tx_queue;
+
+               if (channel->type->copy)
+                       continue;
+               next_buffer_table = max(next_buffer_table,
+                                       channel->eventq.index +
+                                       channel->eventq.entries);
+               ef4_for_each_channel_rx_queue(rx_queue, channel)
+                       next_buffer_table = max(next_buffer_table,
+                                               rx_queue->rxd.index +
+                                               rx_queue->rxd.entries);
+               ef4_for_each_channel_tx_queue(tx_queue, channel)
+                       next_buffer_table = max(next_buffer_table,
+                                               tx_queue->txd.index +
+                                               tx_queue->txd.entries);
+       }
+
+       ef4_device_detach_sync(efx);
+       ef4_stop_all(efx);
+       ef4_soft_disable_interrupts(efx);
+
+       /* Clone channels (where possible) */
+       memset(other_channel, 0, sizeof(other_channel));
+       for (i = 0; i < efx->n_channels; i++) {
+               channel = efx->channel[i];
+               if (channel->type->copy)
+                       channel = channel->type->copy(channel);
+               if (!channel) {
+                       rc = -ENOMEM;
+                       goto out;
+               }
+               other_channel[i] = channel;
+       }
+
+       /* Swap entry counts and channel pointers */
+       old_rxq_entries = efx->rxq_entries;
+       old_txq_entries = efx->txq_entries;
+       efx->rxq_entries = rxq_entries;
+       efx->txq_entries = txq_entries;
+       for (i = 0; i < efx->n_channels; i++) {
+               channel = efx->channel[i];
+               efx->channel[i] = other_channel[i];
+               other_channel[i] = channel;
+       }
+
+       /* Restart buffer table allocation */
+       efx->next_buffer_table = next_buffer_table;
+
+       for (i = 0; i < efx->n_channels; i++) {
+               channel = efx->channel[i];
+               if (!channel->type->copy)
+                       continue;
+               rc = ef4_probe_channel(channel);
+               if (rc)
+                       goto rollback;
+               ef4_init_napi_channel(efx->channel[i]);
+       }
+
+out:
+       /* Destroy unused channel structures */
+       for (i = 0; i < efx->n_channels; i++) {
+               channel = other_channel[i];
+               if (channel && channel->type->copy) {
+                       ef4_fini_napi_channel(channel);
+                       ef4_remove_channel(channel);
+                       kfree(channel);
+               }
+       }
+
+       rc2 = ef4_soft_enable_interrupts(efx);
+       if (rc2) {
+               rc = rc ? rc : rc2;
+               netif_err(efx, drv, efx->net_dev,
+                         "unable to restart interrupts on channel reallocation\n");
+               ef4_schedule_reset(efx, RESET_TYPE_DISABLE);
+       } else {
+               ef4_start_all(efx);
+               netif_device_attach(efx->net_dev);
+       }
+       return rc;
+
+rollback:
+       /* Swap back */
+       efx->rxq_entries = old_rxq_entries;
+       efx->txq_entries = old_txq_entries;
+       for (i = 0; i < efx->n_channels; i++) {
+               channel = efx->channel[i];
+               efx->channel[i] = other_channel[i];
+               other_channel[i] = channel;
+       }
+       goto out;
+}
+
+void ef4_schedule_slow_fill(struct ef4_rx_queue *rx_queue)
+{
+       mod_timer(&rx_queue->slow_fill, jiffies + msecs_to_jiffies(100));
+}
+
+static const struct ef4_channel_type ef4_default_channel_type = {
+       .pre_probe              = ef4_channel_dummy_op_int,
+       .post_remove            = ef4_channel_dummy_op_void,
+       .get_name               = ef4_get_channel_name,
+       .copy                   = ef4_copy_channel,
+       .keep_eventq            = false,
+};
+
+int ef4_channel_dummy_op_int(struct ef4_channel *channel)
+{
+       return 0;
+}
+
+void ef4_channel_dummy_op_void(struct ef4_channel *channel)
+{
+}
+
+/**************************************************************************
+ *
+ * Port handling
+ *
+ **************************************************************************/
+
+/* This ensures that the kernel is kept informed (via
+ * netif_carrier_on/off) of the link status, and also maintains the
+ * link status's stop on the port's TX queue.
+ */
+void ef4_link_status_changed(struct ef4_nic *efx)
+{
+       struct ef4_link_state *link_state = &efx->link_state;
+
+       /* SFC Bug 5356: A net_dev notifier is registered, so we must ensure
+        * that no events are triggered between unregister_netdev() and the
+        * driver unloading. A more general condition is that NETDEV_CHANGE
+        * can only be generated between NETDEV_UP and NETDEV_DOWN */
+       if (!netif_running(efx->net_dev))
+               return;
+
+       if (link_state->up != netif_carrier_ok(efx->net_dev)) {
+               efx->n_link_state_changes++;
+
+               if (link_state->up)
+                       netif_carrier_on(efx->net_dev);
+               else
+                       netif_carrier_off(efx->net_dev);
+       }
+
+       /* Status message for kernel log */
+       if (link_state->up)
+               netif_info(efx, link, efx->net_dev,
+                          "link up at %uMbps %s-duplex (MTU %d)\n",
+                          link_state->speed, link_state->fd ? "full" : "half",
+                          efx->net_dev->mtu);
+       else
+               netif_info(efx, link, efx->net_dev, "link down\n");
+}
+
+void ef4_link_set_advertising(struct ef4_nic *efx, u32 advertising)
+{
+       efx->link_advertising = advertising;
+       if (advertising) {
+               if (advertising & ADVERTISED_Pause)
+                       efx->wanted_fc |= (EF4_FC_TX | EF4_FC_RX);
+               else
+                       efx->wanted_fc &= ~(EF4_FC_TX | EF4_FC_RX);
+               if (advertising & ADVERTISED_Asym_Pause)
+                       efx->wanted_fc ^= EF4_FC_TX;
+       }
+}
+
+void ef4_link_set_wanted_fc(struct ef4_nic *efx, u8 wanted_fc)
+{
+       efx->wanted_fc = wanted_fc;
+       if (efx->link_advertising) {
+               if (wanted_fc & EF4_FC_RX)
+                       efx->link_advertising |= (ADVERTISED_Pause |
+                                                 ADVERTISED_Asym_Pause);
+               else
+                       efx->link_advertising &= ~(ADVERTISED_Pause |
+                                                  ADVERTISED_Asym_Pause);
+               if (wanted_fc & EF4_FC_TX)
+                       efx->link_advertising ^= ADVERTISED_Asym_Pause;
+       }
+}
+
+static void ef4_fini_port(struct ef4_nic *efx);
+
+/* We assume that efx->type->reconfigure_mac will always try to sync RX
+ * filters and therefore needs to read-lock the filter table against freeing
+ */
+void ef4_mac_reconfigure(struct ef4_nic *efx)
+{
+       down_read(&efx->filter_sem);
+       efx->type->reconfigure_mac(efx);
+       up_read(&efx->filter_sem);
+}
+
+/* Push loopback/power/transmit disable settings to the PHY, and reconfigure
+ * the MAC appropriately. All other PHY configuration changes are pushed
+ * through phy_op->set_settings(), and pushed asynchronously to the MAC
+ * through ef4_monitor().
+ *
+ * Callers must hold the mac_lock
+ */
+int __ef4_reconfigure_port(struct ef4_nic *efx)
+{
+       enum ef4_phy_mode phy_mode;
+       int rc;
+
+       WARN_ON(!mutex_is_locked(&efx->mac_lock));
+
+       /* Disable PHY transmit in mac level loopbacks */
+       phy_mode = efx->phy_mode;
+       if (LOOPBACK_INTERNAL(efx))
+               efx->phy_mode |= PHY_MODE_TX_DISABLED;
+       else
+               efx->phy_mode &= ~PHY_MODE_TX_DISABLED;
+
+       rc = efx->type->reconfigure_port(efx);
+
+       if (rc)
+               efx->phy_mode = phy_mode;
+
+       return rc;
+}
+
+/* Reinitialise the MAC to pick up new PHY settings, even if the port is
+ * disabled. */
+int ef4_reconfigure_port(struct ef4_nic *efx)
+{
+       int rc;
+
+       EF4_ASSERT_RESET_SERIALISED(efx);
+
+       mutex_lock(&efx->mac_lock);
+       rc = __ef4_reconfigure_port(efx);
+       mutex_unlock(&efx->mac_lock);
+
+       return rc;
+}
+
+/* Asynchronous work item for changing MAC promiscuity and multicast
+ * hash.  Avoid a drain/rx_ingress enable by reconfiguring the current
+ * MAC directly. */
+static void ef4_mac_work(struct work_struct *data)
+{
+       struct ef4_nic *efx = container_of(data, struct ef4_nic, mac_work);
+
+       mutex_lock(&efx->mac_lock);
+       if (efx->port_enabled)
+               ef4_mac_reconfigure(efx);
+       mutex_unlock(&efx->mac_lock);
+}
+
+static int ef4_probe_port(struct ef4_nic *efx)
+{
+       int rc;
+
+       netif_dbg(efx, probe, efx->net_dev, "create port\n");
+
+       if (phy_flash_cfg)
+               efx->phy_mode = PHY_MODE_SPECIAL;
+
+       /* Connect up MAC/PHY operations table */
+       rc = efx->type->probe_port(efx);
+       if (rc)
+               return rc;
+
+       /* Initialise MAC address to permanent address */
+       ether_addr_copy(efx->net_dev->dev_addr, efx->net_dev->perm_addr);
+
+       return 0;
+}
+
+static int ef4_init_port(struct ef4_nic *efx)
+{
+       int rc;
+
+       netif_dbg(efx, drv, efx->net_dev, "init port\n");
+
+       mutex_lock(&efx->mac_lock);
+
+       rc = efx->phy_op->init(efx);
+       if (rc)
+               goto fail1;
+
+       efx->port_initialized = true;
+
+       /* Reconfigure the MAC before creating dma queues (required for
+        * Falcon/A1 where RX_INGR_EN/TX_DRAIN_EN isn't supported) */
+       ef4_mac_reconfigure(efx);
+
+       /* Ensure the PHY advertises the correct flow control settings */
+       rc = efx->phy_op->reconfigure(efx);
+       if (rc && rc != -EPERM)
+               goto fail2;
+
+       mutex_unlock(&efx->mac_lock);
+       return 0;
+
+fail2:
+       efx->phy_op->fini(efx);
+fail1:
+       mutex_unlock(&efx->mac_lock);
+       return rc;
+}
+
+static void ef4_start_port(struct ef4_nic *efx)
+{
+       netif_dbg(efx, ifup, efx->net_dev, "start port\n");
+       BUG_ON(efx->port_enabled);
+
+       mutex_lock(&efx->mac_lock);
+       efx->port_enabled = true;
+
+       /* Ensure MAC ingress/egress is enabled */
+       ef4_mac_reconfigure(efx);
+
+       mutex_unlock(&efx->mac_lock);
+}
+
+/* Cancel work for MAC reconfiguration, periodic hardware monitoring
+ * and the async self-test, wait for them to finish and prevent them
+ * being scheduled again.  This doesn't cover online resets, which
+ * should only be cancelled when removing the device.
+ */
+static void ef4_stop_port(struct ef4_nic *efx)
+{
+       netif_dbg(efx, ifdown, efx->net_dev, "stop port\n");
+
+       EF4_ASSERT_RESET_SERIALISED(efx);
+
+       mutex_lock(&efx->mac_lock);
+       efx->port_enabled = false;
+       mutex_unlock(&efx->mac_lock);
+
+       /* Serialise against ef4_set_multicast_list() */
+       netif_addr_lock_bh(efx->net_dev);
+       netif_addr_unlock_bh(efx->net_dev);
+
+       cancel_delayed_work_sync(&efx->monitor_work);
+       ef4_selftest_async_cancel(efx);
+       cancel_work_sync(&efx->mac_work);
+}
+
+static void ef4_fini_port(struct ef4_nic *efx)
+{
+       netif_dbg(efx, drv, efx->net_dev, "shut down port\n");
+
+       if (!efx->port_initialized)
+               return;
+
+       efx->phy_op->fini(efx);
+       efx->port_initialized = false;
+
+       efx->link_state.up = false;
+       ef4_link_status_changed(efx);
+}
+
+static void ef4_remove_port(struct ef4_nic *efx)
+{
+       netif_dbg(efx, drv, efx->net_dev, "destroying port\n");
+
+       efx->type->remove_port(efx);
+}
+
+/**************************************************************************
+ *
+ * NIC handling
+ *
+ **************************************************************************/
+
+static LIST_HEAD(ef4_primary_list);
+static LIST_HEAD(ef4_unassociated_list);
+
+static bool ef4_same_controller(struct ef4_nic *left, struct ef4_nic *right)
+{
+       return left->type == right->type &&
+               left->vpd_sn && right->vpd_sn &&
+               !strcmp(left->vpd_sn, right->vpd_sn);
+}
+
+static void ef4_associate(struct ef4_nic *efx)
+{
+       struct ef4_nic *other, *next;
+
+       if (efx->primary == efx) {
+               /* Adding primary function; look for secondaries */
+
+               netif_dbg(efx, probe, efx->net_dev, "adding to primary list\n");
+               list_add_tail(&efx->node, &ef4_primary_list);
+
+               list_for_each_entry_safe(other, next, &ef4_unassociated_list,
+                                        node) {
+                       if (ef4_same_controller(efx, other)) {
+                               list_del(&other->node);
+                               netif_dbg(other, probe, other->net_dev,
+                                         "moving to secondary list of %s %s\n",
+                                         pci_name(efx->pci_dev),
+                                         efx->net_dev->name);
+                               list_add_tail(&other->node,
+                                             &efx->secondary_list);
+                               other->primary = efx;
+                       }
+               }
+       } else {
+               /* Adding secondary function; look for primary */
+
+               list_for_each_entry(other, &ef4_primary_list, node) {
+                       if (ef4_same_controller(efx, other)) {
+                               netif_dbg(efx, probe, efx->net_dev,
+                                         "adding to secondary list of %s %s\n",
+                                         pci_name(other->pci_dev),
+                                         other->net_dev->name);
+                               list_add_tail(&efx->node,
+                                             &other->secondary_list);
+                               efx->primary = other;
+                               return;
+                       }
+               }
+
+               netif_dbg(efx, probe, efx->net_dev,
+                         "adding to unassociated list\n");
+               list_add_tail(&efx->node, &ef4_unassociated_list);
+       }
+}
+
+static void ef4_dissociate(struct ef4_nic *efx)
+{
+       struct ef4_nic *other, *next;
+
+       list_del(&efx->node);
+       efx->primary = NULL;
+
+       list_for_each_entry_safe(other, next, &efx->secondary_list, node) {
+               list_del(&other->node);
+               netif_dbg(other, probe, other->net_dev,
+                         "moving to unassociated list\n");
+               list_add_tail(&other->node, &ef4_unassociated_list);
+               other->primary = NULL;
+       }
+}
+
+/* This configures the PCI device to enable I/O and DMA. */
+static int ef4_init_io(struct ef4_nic *efx)
+{
+       struct pci_dev *pci_dev = efx->pci_dev;
+       dma_addr_t dma_mask = efx->type->max_dma_mask;
+       unsigned int mem_map_size = efx->type->mem_map_size(efx);
+       int rc, bar;
+
+       netif_dbg(efx, probe, efx->net_dev, "initialising I/O\n");
+
+       bar = efx->type->mem_bar;
+
+       rc = pci_enable_device(pci_dev);
+       if (rc) {
+               netif_err(efx, probe, efx->net_dev,
+                         "failed to enable PCI device\n");
+               goto fail1;
+       }
+
+       pci_set_master(pci_dev);
+
+       /* Set the PCI DMA mask.  Try all possibilities from our
+        * genuine mask down to 32 bits, because some architectures
+        * (e.g. x86_64 with iommu_sac_force set) will allow 40 bit
+        * masks event though they reject 46 bit masks.
+        */
+       while (dma_mask > 0x7fffffffUL) {
+               rc = dma_set_mask_and_coherent(&pci_dev->dev, dma_mask);
+               if (rc == 0)
+                       break;
+               dma_mask >>= 1;
+       }
+       if (rc) {
+               netif_err(efx, probe, efx->net_dev,
+                         "could not find a suitable DMA mask\n");
+               goto fail2;
+       }
+       netif_dbg(efx, probe, efx->net_dev,
+                 "using DMA mask %llx\n", (unsigned long long) dma_mask);
+
+       efx->membase_phys = pci_resource_start(efx->pci_dev, bar);
+       rc = pci_request_region(pci_dev, bar, "sfc");
+       if (rc) {
+               netif_err(efx, probe, efx->net_dev,
+                         "request for memory BAR failed\n");
+               rc = -EIO;
+               goto fail3;
+       }
+       efx->membase = ioremap_nocache(efx->membase_phys, mem_map_size);
+       if (!efx->membase) {
+               netif_err(efx, probe, efx->net_dev,
+                         "could not map memory BAR at %llx+%x\n",
+                         (unsigned long long)efx->membase_phys, mem_map_size);
+               rc = -ENOMEM;
+               goto fail4;
+       }
+       netif_dbg(efx, probe, efx->net_dev,
+                 "memory BAR at %llx+%x (virtual %p)\n",
+                 (unsigned long long)efx->membase_phys, mem_map_size,
+                 efx->membase);
+
+       return 0;
+
+ fail4:
+       pci_release_region(efx->pci_dev, bar);
+ fail3:
+       efx->membase_phys = 0;
+ fail2:
+       pci_disable_device(efx->pci_dev);
+ fail1:
+       return rc;
+}
+
+static void ef4_fini_io(struct ef4_nic *efx)
+{
+       int bar;
+
+       netif_dbg(efx, drv, efx->net_dev, "shutting down I/O\n");
+
+       if (efx->membase) {
+               iounmap(efx->membase);
+               efx->membase = NULL;
+       }
+
+       if (efx->membase_phys) {
+               bar = efx->type->mem_bar;
+               pci_release_region(efx->pci_dev, bar);
+               efx->membase_phys = 0;
+       }
+
+       /* Don't disable bus-mastering if VFs are assigned */
+       if (!pci_vfs_assigned(efx->pci_dev))
+               pci_disable_device(efx->pci_dev);
+}
+
+void ef4_set_default_rx_indir_table(struct ef4_nic *efx)
+{
+       size_t i;
+
+       for (i = 0; i < ARRAY_SIZE(efx->rx_indir_table); i++)
+               efx->rx_indir_table[i] =
+                       ethtool_rxfh_indir_default(i, efx->rss_spread);
+}
+
+static unsigned int ef4_wanted_parallelism(struct ef4_nic *efx)
+{
+       cpumask_var_t thread_mask;
+       unsigned int count;
+       int cpu;
+
+       if (rss_cpus) {
+               count = rss_cpus;
+       } else {
+               if (unlikely(!zalloc_cpumask_var(&thread_mask, GFP_KERNEL))) {
+                       netif_warn(efx, probe, efx->net_dev,
+                                  "RSS disabled due to allocation failure\n");
+                       return 1;
+               }
+
+               count = 0;
+               for_each_online_cpu(cpu) {
+                       if (!cpumask_test_cpu(cpu, thread_mask)) {
+                               ++count;
+                               cpumask_or(thread_mask, thread_mask,
+                                          topology_sibling_cpumask(cpu));
+                       }
+               }
+
+               free_cpumask_var(thread_mask);
+       }
+
+       return count;
+}
+
+/* Probe the number and type of interrupts we are able to obtain, and
+ * the resulting numbers of channels and RX queues.
+ */
+static int ef4_probe_interrupts(struct ef4_nic *efx)
+{
+       unsigned int extra_channels = 0;
+       unsigned int i, j;
+       int rc;
+
+       for (i = 0; i < EF4_MAX_EXTRA_CHANNELS; i++)
+               if (efx->extra_channel_type[i])
+                       ++extra_channels;
+
+       if (efx->interrupt_mode == EF4_INT_MODE_MSIX) {
+               struct msix_entry xentries[EF4_MAX_CHANNELS];
+               unsigned int n_channels;
+
+               n_channels = ef4_wanted_parallelism(efx);
+               if (ef4_separate_tx_channels)
+                       n_channels *= 2;
+               n_channels += extra_channels;
+               n_channels = min(n_channels, efx->max_channels);
+
+               for (i = 0; i < n_channels; i++)
+                       xentries[i].entry = i;
+               rc = pci_enable_msix_range(efx->pci_dev,
+                                          xentries, 1, n_channels);
+               if (rc < 0) {
+                       /* Fall back to single channel MSI */
+                       efx->interrupt_mode = EF4_INT_MODE_MSI;
+                       netif_err(efx, drv, efx->net_dev,
+                                 "could not enable MSI-X\n");
+               } else if (rc < n_channels) {
+                       netif_err(efx, drv, efx->net_dev,
+                                 "WARNING: Insufficient MSI-X vectors"
+                                 " available (%d < %u).\n", rc, n_channels);
+                       netif_err(efx, drv, efx->net_dev,
+                                 "WARNING: Performance may be reduced.\n");
+                       n_channels = rc;
+               }
+
+               if (rc > 0) {
+                       efx->n_channels = n_channels;
+                       if (n_channels > extra_channels)
+                               n_channels -= extra_channels;
+                       if (ef4_separate_tx_channels) {
+                               efx->n_tx_channels = min(max(n_channels / 2,
+                                                            1U),
+                                                        efx->max_tx_channels);
+                               efx->n_rx_channels = max(n_channels -
+                                                        efx->n_tx_channels,
+                                                        1U);
+                       } else {
+                               efx->n_tx_channels = min(n_channels,
+                                                        efx->max_tx_channels);
+                               efx->n_rx_channels = n_channels;
+                       }
+                       for (i = 0; i < efx->n_channels; i++)
+                               ef4_get_channel(efx, i)->irq =
+                                       xentries[i].vector;
+               }
+       }
+
+       /* Try single interrupt MSI */
+       if (efx->interrupt_mode == EF4_INT_MODE_MSI) {
+               efx->n_channels = 1;
+               efx->n_rx_channels = 1;
+               efx->n_tx_channels = 1;
+               rc = pci_enable_msi(efx->pci_dev);
+               if (rc == 0) {
+                       ef4_get_channel(efx, 0)->irq = efx->pci_dev->irq;
+               } else {
+                       netif_err(efx, drv, efx->net_dev,
+                                 "could not enable MSI\n");
+                       efx->interrupt_mode = EF4_INT_MODE_LEGACY;
+               }
+       }
+
+       /* Assume legacy interrupts */
+       if (efx->interrupt_mode == EF4_INT_MODE_LEGACY) {
+               efx->n_channels = 1 + (ef4_separate_tx_channels ? 1 : 0);
+               efx->n_rx_channels = 1;
+               efx->n_tx_channels = 1;
+               efx->legacy_irq = efx->pci_dev->irq;
+       }
+
+       /* Assign extra channels if possible */
+       j = efx->n_channels;
+       for (i = 0; i < EF4_MAX_EXTRA_CHANNELS; i++) {
+               if (!efx->extra_channel_type[i])
+                       continue;
+               if (efx->interrupt_mode != EF4_INT_MODE_MSIX ||
+                   efx->n_channels <= extra_channels) {
+                       efx->extra_channel_type[i]->handle_no_channel(efx);
+               } else {
+                       --j;
+                       ef4_get_channel(efx, j)->type =
+                               efx->extra_channel_type[i];
+               }
+       }
+
+       efx->rss_spread = efx->n_rx_channels;
+
+       return 0;
+}
+
+static int ef4_soft_enable_interrupts(struct ef4_nic *efx)
+{
+       struct ef4_channel *channel, *end_channel;
+       int rc;
+
+       BUG_ON(efx->state == STATE_DISABLED);
+
+       efx->irq_soft_enabled = true;
+       smp_wmb();
+
+       ef4_for_each_channel(channel, efx) {
+               if (!channel->type->keep_eventq) {
+                       rc = ef4_init_eventq(channel);
+                       if (rc)
+                               goto fail;
+               }
+               ef4_start_eventq(channel);
+       }
+
+       return 0;
+fail:
+       end_channel = channel;
+       ef4_for_each_channel(channel, efx) {
+               if (channel == end_channel)
+                       break;
+               ef4_stop_eventq(channel);
+               if (!channel->type->keep_eventq)
+                       ef4_fini_eventq(channel);
+       }
+
+       return rc;
+}
+
+static void ef4_soft_disable_interrupts(struct ef4_nic *efx)
+{
+       struct ef4_channel *channel;
+
+       if (efx->state == STATE_DISABLED)
+               return;
+
+       efx->irq_soft_enabled = false;
+       smp_wmb();
+
+       if (efx->legacy_irq)
+               synchronize_irq(efx->legacy_irq);
+
+       ef4_for_each_channel(channel, efx) {
+               if (channel->irq)
+                       synchronize_irq(channel->irq);
+
+               ef4_stop_eventq(channel);
+               if (!channel->type->keep_eventq)
+                       ef4_fini_eventq(channel);
+       }
+}
+
+static int ef4_enable_interrupts(struct ef4_nic *efx)
+{
+       struct ef4_channel *channel, *end_channel;
+       int rc;
+
+       BUG_ON(efx->state == STATE_DISABLED);
+
+       if (efx->eeh_disabled_legacy_irq) {
+               enable_irq(efx->legacy_irq);
+               efx->eeh_disabled_legacy_irq = false;
+       }
+
+       efx->type->irq_enable_master(efx);
+
+       ef4_for_each_channel(channel, efx) {
+               if (channel->type->keep_eventq) {
+                       rc = ef4_init_eventq(channel);
+                       if (rc)
+                               goto fail;
+               }
+       }
+
+       rc = ef4_soft_enable_interrupts(efx);
+       if (rc)
+               goto fail;
+
+       return 0;
+
+fail:
+       end_channel = channel;
+       ef4_for_each_channel(channel, efx) {
+               if (channel == end_channel)
+                       break;
+               if (channel->type->keep_eventq)
+                       ef4_fini_eventq(channel);
+       }
+
+       efx->type->irq_disable_non_ev(efx);
+
+       return rc;
+}
+
+static void ef4_disable_interrupts(struct ef4_nic *efx)
+{
+       struct ef4_channel *channel;
+
+       ef4_soft_disable_interrupts(efx);
+
+       ef4_for_each_channel(channel, efx) {
+               if (channel->type->keep_eventq)
+                       ef4_fini_eventq(channel);
+       }
+
+       efx->type->irq_disable_non_ev(efx);
+}
+
+static void ef4_remove_interrupts(struct ef4_nic *efx)
+{
+       struct ef4_channel *channel;
+
+       /* Remove MSI/MSI-X interrupts */
+       ef4_for_each_channel(channel, efx)
+               channel->irq = 0;
+       pci_disable_msi(efx->pci_dev);
+       pci_disable_msix(efx->pci_dev);
+
+       /* Remove legacy interrupt */
+       efx->legacy_irq = 0;
+}
+
+static void ef4_set_channels(struct ef4_nic *efx)
+{
+       struct ef4_channel *channel;
+       struct ef4_tx_queue *tx_queue;
+
+       efx->tx_channel_offset =
+               ef4_separate_tx_channels ?
+               efx->n_channels - efx->n_tx_channels : 0;
+
+       /* We need to mark which channels really have RX and TX
+        * queues, and adjust the TX queue numbers if we have separate
+        * RX-only and TX-only channels.
+        */
+       ef4_for_each_channel(channel, efx) {
+               if (channel->channel < efx->n_rx_channels)
+                       channel->rx_queue.core_index = channel->channel;
+               else
+                       channel->rx_queue.core_index = -1;
+
+               ef4_for_each_channel_tx_queue(tx_queue, channel)
+                       tx_queue->queue -= (efx->tx_channel_offset *
+                                           EF4_TXQ_TYPES);
+       }
+}
+
+static int ef4_probe_nic(struct ef4_nic *efx)
+{
+       int rc;
+
+       netif_dbg(efx, probe, efx->net_dev, "creating NIC\n");
+
+       /* Carry out hardware-type specific initialisation */
+       rc = efx->type->probe(efx);
+       if (rc)
+               return rc;
+
+       do {
+               if (!efx->max_channels || !efx->max_tx_channels) {
+                       netif_err(efx, drv, efx->net_dev,
+                                 "Insufficient resources to allocate"
+                                 " any channels\n");
+                       rc = -ENOSPC;
+                       goto fail1;
+               }
+
+               /* Determine the number of channels and queues by trying
+                * to hook in MSI-X interrupts.
+                */
+               rc = ef4_probe_interrupts(efx);
+               if (rc)
+                       goto fail1;
+
+               ef4_set_channels(efx);
+
+               /* dimension_resources can fail with EAGAIN */
+               rc = efx->type->dimension_resources(efx);
+               if (rc != 0 && rc != -EAGAIN)
+                       goto fail2;
+
+               if (rc == -EAGAIN)
+                       /* try again with new max_channels */
+                       ef4_remove_interrupts(efx);
+
+       } while (rc == -EAGAIN);
+
+       if (efx->n_channels > 1)
+               netdev_rss_key_fill(&efx->rx_hash_key,
+                                   sizeof(efx->rx_hash_key));
+       ef4_set_default_rx_indir_table(efx);
+
+       netif_set_real_num_tx_queues(efx->net_dev, efx->n_tx_channels);
+       netif_set_real_num_rx_queues(efx->net_dev, efx->n_rx_channels);
+
+       /* Initialise the interrupt moderation settings */
+       efx->irq_mod_step_us = DIV_ROUND_UP(efx->timer_quantum_ns, 1000);
+       ef4_init_irq_moderation(efx, tx_irq_mod_usec, rx_irq_mod_usec, true,
+                               true);
+
+       return 0;
+
+fail2:
+       ef4_remove_interrupts(efx);
+fail1:
+       efx->type->remove(efx);
+       return rc;
+}
+
+static void ef4_remove_nic(struct ef4_nic *efx)
+{
+       netif_dbg(efx, drv, efx->net_dev, "destroying NIC\n");
+
+       ef4_remove_interrupts(efx);
+       efx->type->remove(efx);
+}
+
+static int ef4_probe_filters(struct ef4_nic *efx)
+{
+       int rc;
+
+       spin_lock_init(&efx->filter_lock);
+       init_rwsem(&efx->filter_sem);
+       mutex_lock(&efx->mac_lock);
+       down_write(&efx->filter_sem);
+       rc = efx->type->filter_table_probe(efx);
+       if (rc)
+               goto out_unlock;
+
+#ifdef CONFIG_RFS_ACCEL
+       if (efx->type->offload_features & NETIF_F_NTUPLE) {
+               struct ef4_channel *channel;
+               int i, success = 1;
+
+               ef4_for_each_channel(channel, efx) {
+                       channel->rps_flow_id =
+                               kcalloc(efx->type->max_rx_ip_filters,
+                                       sizeof(*channel->rps_flow_id),
+                                       GFP_KERNEL);
+                       if (!channel->rps_flow_id)
+                               success = 0;
+                       else
+                               for (i = 0;
+                                    i < efx->type->max_rx_ip_filters;
+                                    ++i)
+                                       channel->rps_flow_id[i] =
+                                               RPS_FLOW_ID_INVALID;
+               }
+
+               if (!success) {
+                       ef4_for_each_channel(channel, efx)
+                               kfree(channel->rps_flow_id);
+                       efx->type->filter_table_remove(efx);
+                       rc = -ENOMEM;
+                       goto out_unlock;
+               }
+
+               efx->rps_expire_index = efx->rps_expire_channel = 0;
+       }
+#endif
+out_unlock:
+       up_write(&efx->filter_sem);
+       mutex_unlock(&efx->mac_lock);
+       return rc;
+}
+
+static void ef4_remove_filters(struct ef4_nic *efx)
+{
+#ifdef CONFIG_RFS_ACCEL
+       struct ef4_channel *channel;
+
+       ef4_for_each_channel(channel, efx)
+               kfree(channel->rps_flow_id);
+#endif
+       down_write(&efx->filter_sem);
+       efx->type->filter_table_remove(efx);
+       up_write(&efx->filter_sem);
+}
+
+static void ef4_restore_filters(struct ef4_nic *efx)
+{
+       down_read(&efx->filter_sem);
+       efx->type->filter_table_restore(efx);
+       up_read(&efx->filter_sem);
+}
+
+/**************************************************************************
+ *
+ * NIC startup/shutdown
+ *
+ *************************************************************************/
+
+static int ef4_probe_all(struct ef4_nic *efx)
+{
+       int rc;
+
+       rc = ef4_probe_nic(efx);
+       if (rc) {
+               netif_err(efx, probe, efx->net_dev, "failed to create NIC\n");
+               goto fail1;
+       }
+
+       rc = ef4_probe_port(efx);
+       if (rc) {
+               netif_err(efx, probe, efx->net_dev, "failed to create port\n");
+               goto fail2;
+       }
+
+       BUILD_BUG_ON(EF4_DEFAULT_DMAQ_SIZE < EF4_RXQ_MIN_ENT);
+       if (WARN_ON(EF4_DEFAULT_DMAQ_SIZE < EF4_TXQ_MIN_ENT(efx))) {
+               rc = -EINVAL;
+               goto fail3;
+       }
+       efx->rxq_entries = efx->txq_entries = EF4_DEFAULT_DMAQ_SIZE;
+
+       rc = ef4_probe_filters(efx);
+       if (rc) {
+               netif_err(efx, probe, efx->net_dev,
+                         "failed to create filter tables\n");
+               goto fail4;
+       }
+
+       rc = ef4_probe_channels(efx);
+       if (rc)
+               goto fail5;
+
+       return 0;
+
+ fail5:
+       ef4_remove_filters(efx);
+ fail4:
+ fail3:
+       ef4_remove_port(efx);
+ fail2:
+       ef4_remove_nic(efx);
+ fail1:
+       return rc;
+}
+
+/* If the interface is supposed to be running but is not, start
+ * the hardware and software data path, regular activity for the port
+ * (MAC statistics, link polling, etc.) and schedule the port to be
+ * reconfigured.  Interrupts must already be enabled.  This function
+ * is safe to call multiple times, so long as the NIC is not disabled.
+ * Requires the RTNL lock.
+ */
+static void ef4_start_all(struct ef4_nic *efx)
+{
+       EF4_ASSERT_RESET_SERIALISED(efx);
+       BUG_ON(efx->state == STATE_DISABLED);
+
+       /* Check that it is appropriate to restart the interface. All
+        * of these flags are safe to read under just the rtnl lock */
+       if (efx->port_enabled || !netif_running(efx->net_dev) ||
+           efx->reset_pending)
+               return;
+
+       ef4_start_port(efx);
+       ef4_start_datapath(efx);
+
+       /* Start the hardware monitor if there is one */
+       if (efx->type->monitor != NULL)
+               queue_delayed_work(efx->workqueue, &efx->monitor_work,
+                                  ef4_monitor_interval);
+
+       efx->type->start_stats(efx);
+       efx->type->pull_stats(efx);
+       spin_lock_bh(&efx->stats_lock);
+       efx->type->update_stats(efx, NULL, NULL);
+       spin_unlock_bh(&efx->stats_lock);
+}
+
+/* Quiesce the hardware and software data path, and regular activity
+ * for the port without bringing the link down.  Safe to call multiple
+ * times with the NIC in almost any state, but interrupts should be
+ * enabled.  Requires the RTNL lock.
+ */
+static void ef4_stop_all(struct ef4_nic *efx)
+{
+       EF4_ASSERT_RESET_SERIALISED(efx);
+
+       /* port_enabled can be read safely under the rtnl lock */
+       if (!efx->port_enabled)
+               return;
+
+       /* update stats before we go down so we can accurately count
+        * rx_nodesc_drops
+        */
+       efx->type->pull_stats(efx);
+       spin_lock_bh(&efx->stats_lock);
+       efx->type->update_stats(efx, NULL, NULL);
+       spin_unlock_bh(&efx->stats_lock);
+       efx->type->stop_stats(efx);
+       ef4_stop_port(efx);
+
+       /* Stop the kernel transmit interface.  This is only valid if
+        * the device is stopped or detached; otherwise the watchdog
+        * may fire immediately.
+        */
+       WARN_ON(netif_running(efx->net_dev) &&
+               netif_device_present(efx->net_dev));
+       netif_tx_disable(efx->net_dev);
+
+       ef4_stop_datapath(efx);
+}
+
+static void ef4_remove_all(struct ef4_nic *efx)
+{
+       ef4_remove_channels(efx);
+       ef4_remove_filters(efx);
+       ef4_remove_port(efx);
+       ef4_remove_nic(efx);
+}
+
+/**************************************************************************
+ *
+ * Interrupt moderation
+ *
+ **************************************************************************/
+unsigned int ef4_usecs_to_ticks(struct ef4_nic *efx, unsigned int usecs)
+{
+       if (usecs == 0)
+               return 0;
+       if (usecs * 1000 < efx->timer_quantum_ns)
+               return 1; /* never round down to 0 */
+       return usecs * 1000 / efx->timer_quantum_ns;
+}
+
+unsigned int ef4_ticks_to_usecs(struct ef4_nic *efx, unsigned int ticks)
+{
+       /* We must round up when converting ticks to microseconds
+        * because we round down when converting the other way.
+        */
+       return DIV_ROUND_UP(ticks * efx->timer_quantum_ns, 1000);
+}
+
+/* Set interrupt moderation parameters */
+int ef4_init_irq_moderation(struct ef4_nic *efx, unsigned int tx_usecs,
+                           unsigned int rx_usecs, bool rx_adaptive,
+                           bool rx_may_override_tx)
+{
+       struct ef4_channel *channel;
+       unsigned int timer_max_us;
+
+       EF4_ASSERT_RESET_SERIALISED(efx);
+
+       timer_max_us = efx->timer_max_ns / 1000;
+
+       if (tx_usecs > timer_max_us || rx_usecs > timer_max_us)
+               return -EINVAL;
+
+       if (tx_usecs != rx_usecs && efx->tx_channel_offset == 0 &&
+           !rx_may_override_tx) {
+               netif_err(efx, drv, efx->net_dev, "Channels are shared. "
+                         "RX and TX IRQ moderation must be equal\n");
+               return -EINVAL;
+       }
+
+       efx->irq_rx_adaptive = rx_adaptive;
+       efx->irq_rx_moderation_us = rx_usecs;
+       ef4_for_each_channel(channel, efx) {
+               if (ef4_channel_has_rx_queue(channel))
+                       channel->irq_moderation_us = rx_usecs;
+               else if (ef4_channel_has_tx_queues(channel))
+                       channel->irq_moderation_us = tx_usecs;
+       }
+
+       return 0;
+}
+
+void ef4_get_irq_moderation(struct ef4_nic *efx, unsigned int *tx_usecs,
+                           unsigned int *rx_usecs, bool *rx_adaptive)
+{
+       *rx_adaptive = efx->irq_rx_adaptive;
+       *rx_usecs = efx->irq_rx_moderation_us;
+
+       /* If channels are shared between RX and TX, so is IRQ
+        * moderation.  Otherwise, IRQ moderation is the same for all
+        * TX channels and is not adaptive.
+        */
+       if (efx->tx_channel_offset == 0) {
+               *tx_usecs = *rx_usecs;
+       } else {
+               struct ef4_channel *tx_channel;
+
+               tx_channel = efx->channel[efx->tx_channel_offset];
+               *tx_usecs = tx_channel->irq_moderation_us;
+       }
+}
+
+/**************************************************************************
+ *
+ * Hardware monitor
+ *
+ **************************************************************************/
+
+/* Run periodically off the general workqueue */
+static void ef4_monitor(struct work_struct *data)
+{
+       struct ef4_nic *efx = container_of(data, struct ef4_nic,
+                                          monitor_work.work);
+
+       netif_vdbg(efx, timer, efx->net_dev,
+                  "hardware monitor executing on CPU %d\n",
+                  raw_smp_processor_id());
+       BUG_ON(efx->type->monitor == NULL);
+
+       /* If the mac_lock is already held then it is likely a port
+        * reconfiguration is already in place, which will likely do
+        * most of the work of monitor() anyway. */
+       if (mutex_trylock(&efx->mac_lock)) {
+               if (efx->port_enabled)
+                       efx->type->monitor(efx);
+               mutex_unlock(&efx->mac_lock);
+       }
+
+       queue_delayed_work(efx->workqueue, &efx->monitor_work,
+                          ef4_monitor_interval);
+}
+
+/**************************************************************************
+ *
+ * ioctls
+ *
+ *************************************************************************/
+
+/* Net device ioctl
+ * Context: process, rtnl_lock() held.
+ */
+static int ef4_ioctl(struct net_device *net_dev, struct ifreq *ifr, int cmd)
+{
+       struct ef4_nic *efx = netdev_priv(net_dev);
+       struct mii_ioctl_data *data = if_mii(ifr);
+
+       /* Convert phy_id from older PRTAD/DEVAD format */
+       if ((cmd == SIOCGMIIREG || cmd == SIOCSMIIREG) &&
+           (data->phy_id & 0xfc00) == 0x0400)
+               data->phy_id ^= MDIO_PHY_ID_C45 | 0x0400;
+
+       return mdio_mii_ioctl(&efx->mdio, data, cmd);
+}
+
+/**************************************************************************
+ *
+ * NAPI interface
+ *
+ **************************************************************************/
+
+static void ef4_init_napi_channel(struct ef4_channel *channel)
+{
+       struct ef4_nic *efx = channel->efx;
+
+       channel->napi_dev = efx->net_dev;
+       netif_napi_add(channel->napi_dev, &channel->napi_str,
+                      ef4_poll, napi_weight);
+       ef4_channel_busy_poll_init(channel);
+}
+
+static void ef4_init_napi(struct ef4_nic *efx)
+{
+       struct ef4_channel *channel;
+
+       ef4_for_each_channel(channel, efx)
+               ef4_init_napi_channel(channel);
+}
+
+static void ef4_fini_napi_channel(struct ef4_channel *channel)
+{
+       if (channel->napi_dev)
+               netif_napi_del(&channel->napi_str);
+
+       channel->napi_dev = NULL;
+}
+
+static void ef4_fini_napi(struct ef4_nic *efx)
+{
+       struct ef4_channel *channel;
+
+       ef4_for_each_channel(channel, efx)
+               ef4_fini_napi_channel(channel);
+}
+
+/**************************************************************************
+ *
+ * Kernel netpoll interface
+ *
+ *************************************************************************/
+
+#ifdef CONFIG_NET_POLL_CONTROLLER
+
+/* Although in the common case interrupts will be disabled, this is not
+ * guaranteed. However, all our work happens inside the NAPI callback,
+ * so no locking is required.
+ */
+static void ef4_netpoll(struct net_device *net_dev)
+{
+       struct ef4_nic *efx = netdev_priv(net_dev);
+       struct ef4_channel *channel;
+
+       ef4_for_each_channel(channel, efx)
+               ef4_schedule_channel(channel);
+}
+
+#endif
+
+#ifdef CONFIG_NET_RX_BUSY_POLL
+static int ef4_busy_poll(struct napi_struct *napi)
+{
+       struct ef4_channel *channel =
+               container_of(napi, struct ef4_channel, napi_str);
+       struct ef4_nic *efx = channel->efx;
+       int budget = 4;
+       int old_rx_packets, rx_packets;
+
+       if (!netif_running(efx->net_dev))
+               return LL_FLUSH_FAILED;
+
+       if (!ef4_channel_try_lock_poll(channel))
+               return LL_FLUSH_BUSY;
+
+       old_rx_packets = channel->rx_queue.rx_packets;
+       ef4_process_channel(channel, budget);
+
+       rx_packets = channel->rx_queue.rx_packets - old_rx_packets;
+
+       /* There is no race condition with NAPI here.
+        * NAPI will automatically be rescheduled if it yielded during busy
+        * polling, because it was not able to take the lock and thus returned
+        * the full budget.
+        */
+       ef4_channel_unlock_poll(channel);
+
+       return rx_packets;
+}
+#endif
+
+/**************************************************************************
+ *
+ * Kernel net device interface
+ *
+ *************************************************************************/
+
+/* Context: process, rtnl_lock() held. */
+int ef4_net_open(struct net_device *net_dev)
+{
+       struct ef4_nic *efx = netdev_priv(net_dev);
+       int rc;
+
+       netif_dbg(efx, ifup, efx->net_dev, "opening device on CPU %d\n",
+                 raw_smp_processor_id());
+
+       rc = ef4_check_disabled(efx);
+       if (rc)
+               return rc;
+       if (efx->phy_mode & PHY_MODE_SPECIAL)
+               return -EBUSY;
+
+       /* Notify the kernel of the link state polled during driver load,
+        * before the monitor starts running */
+       ef4_link_status_changed(efx);
+
+       ef4_start_all(efx);
+       ef4_selftest_async_start(efx);
+       return 0;
+}
+
+/* Context: process, rtnl_lock() held.
+ * Note that the kernel will ignore our return code; this method
+ * should really be a void.
+ */
+int ef4_net_stop(struct net_device *net_dev)
+{
+       struct ef4_nic *efx = netdev_priv(net_dev);
+
+       netif_dbg(efx, ifdown, efx->net_dev, "closing on CPU %d\n",
+                 raw_smp_processor_id());
+
+       /* Stop the device and flush all the channels */
+       ef4_stop_all(efx);
+
+       return 0;
+}
+
+/* Context: process, dev_base_lock or RTNL held, non-blocking. */
+static struct rtnl_link_stats64 *ef4_net_stats(struct net_device *net_dev,
+                                              struct rtnl_link_stats64 *stats)
+{
+       struct ef4_nic *efx = netdev_priv(net_dev);
+
+       spin_lock_bh(&efx->stats_lock);
+       efx->type->update_stats(efx, NULL, stats);
+       spin_unlock_bh(&efx->stats_lock);
+
+       return stats;
+}
+
+/* Context: netif_tx_lock held, BHs disabled. */
+static void ef4_watchdog(struct net_device *net_dev)
+{
+       struct ef4_nic *efx = netdev_priv(net_dev);
+
+       netif_err(efx, tx_err, efx->net_dev,
+                 "TX stuck with port_enabled=%d: resetting channels\n",
+                 efx->port_enabled);
+
+       ef4_schedule_reset(efx, RESET_TYPE_TX_WATCHDOG);
+}
+
+
+/* Context: process, rtnl_lock() held. */
+static int ef4_change_mtu(struct net_device *net_dev, int new_mtu)
+{
+       struct ef4_nic *efx = netdev_priv(net_dev);
+       int rc;
+
+       rc = ef4_check_disabled(efx);
+       if (rc)
+               return rc;
+
+       netif_dbg(efx, drv, efx->net_dev, "changing MTU to %d\n", new_mtu);
+
+       ef4_device_detach_sync(efx);
+       ef4_stop_all(efx);
+
+       mutex_lock(&efx->mac_lock);
+       net_dev->mtu = new_mtu;
+       ef4_mac_reconfigure(efx);
+       mutex_unlock(&efx->mac_lock);
+
+       ef4_start_all(efx);
+       netif_device_attach(efx->net_dev);
+       return 0;
+}
+
+static int ef4_set_mac_address(struct net_device *net_dev, void *data)
+{
+       struct ef4_nic *efx = netdev_priv(net_dev);
+       struct sockaddr *addr = data;
+       u8 *new_addr = addr->sa_data;
+       u8 old_addr[6];
+       int rc;
+
+       if (!is_valid_ether_addr(new_addr)) {
+               netif_err(efx, drv, efx->net_dev,
+                         "invalid ethernet MAC address requested: %pM\n",
+                         new_addr);
+               return -EADDRNOTAVAIL;
+       }
+
+       /* save old address */
+       ether_addr_copy(old_addr, net_dev->dev_addr);
+       ether_addr_copy(net_dev->dev_addr, new_addr);
+       if (efx->type->set_mac_address) {
+               rc = efx->type->set_mac_address(efx);
+               if (rc) {
+                       ether_addr_copy(net_dev->dev_addr, old_addr);
+                       return rc;
+               }
+       }
+
+       /* Reconfigure the MAC */
+       mutex_lock(&efx->mac_lock);
+       ef4_mac_reconfigure(efx);
+       mutex_unlock(&efx->mac_lock);
+
+       return 0;
+}
+
+/* Context: netif_addr_lock held, BHs disabled. */
+static void ef4_set_rx_mode(struct net_device *net_dev)
+{
+       struct ef4_nic *efx = netdev_priv(net_dev);
+
+       if (efx->port_enabled)
+               queue_work(efx->workqueue, &efx->mac_work);
+       /* Otherwise ef4_start_port() will do this */
+}
+
+static int ef4_set_features(struct net_device *net_dev, netdev_features_t data)
+{
+       struct ef4_nic *efx = netdev_priv(net_dev);
+       int rc;
+
+       /* If disabling RX n-tuple filtering, clear existing filters */
+       if (net_dev->features & ~data & NETIF_F_NTUPLE) {
+               rc = efx->type->filter_clear_rx(efx, EF4_FILTER_PRI_MANUAL);
+               if (rc)
+                       return rc;
+       }
+
+       /* If Rx VLAN filter is changed, update filters via mac_reconfigure */
+       if ((net_dev->features ^ data) & NETIF_F_HW_VLAN_CTAG_FILTER) {
+               /* ef4_set_rx_mode() will schedule MAC work to update filters
+                * when a new features are finally set in net_dev.
+                */
+               ef4_set_rx_mode(net_dev);
+       }
+
+       return 0;
+}
+
+static const struct net_device_ops ef4_netdev_ops = {
+       .ndo_open               = ef4_net_open,
+       .ndo_stop               = ef4_net_stop,
+       .ndo_get_stats64        = ef4_net_stats,
+       .ndo_tx_timeout         = ef4_watchdog,
+       .ndo_start_xmit         = ef4_hard_start_xmit,
+       .ndo_validate_addr      = eth_validate_addr,
+       .ndo_do_ioctl           = ef4_ioctl,
+       .ndo_change_mtu         = ef4_change_mtu,
+       .ndo_set_mac_address    = ef4_set_mac_address,
+       .ndo_set_rx_mode        = ef4_set_rx_mode,
+       .ndo_set_features       = ef4_set_features,
+#ifdef CONFIG_NET_POLL_CONTROLLER
+       .ndo_poll_controller = ef4_netpoll,
+#endif
+       .ndo_setup_tc           = ef4_setup_tc,
+#ifdef CONFIG_NET_RX_BUSY_POLL
+       .ndo_busy_poll          = ef4_busy_poll,
+#endif
+#ifdef CONFIG_RFS_ACCEL
+       .ndo_rx_flow_steer      = ef4_filter_rfs,
+#endif
+};
+
+static void ef4_update_name(struct ef4_nic *efx)
+{
+       strcpy(efx->name, efx->net_dev->name);
+       ef4_mtd_rename(efx);
+       ef4_set_channel_names(efx);
+}
+
+static int ef4_netdev_event(struct notifier_block *this,
+                           unsigned long event, void *ptr)
+{
+       struct net_device *net_dev = netdev_notifier_info_to_dev(ptr);
+
+       if ((net_dev->netdev_ops == &ef4_netdev_ops) &&
+           event == NETDEV_CHANGENAME)
+               ef4_update_name(netdev_priv(net_dev));
+
+       return NOTIFY_DONE;
+}
+
+static struct notifier_block ef4_netdev_notifier = {
+       .notifier_call = ef4_netdev_event,
+};
+
+static ssize_t
+show_phy_type(struct device *dev, struct device_attribute *attr, char *buf)
+{
+       struct ef4_nic *efx = pci_get_drvdata(to_pci_dev(dev));
+       return sprintf(buf, "%d\n", efx->phy_type);
+}
+static DEVICE_ATTR(phy_type, 0444, show_phy_type, NULL);
+
+static int ef4_register_netdev(struct ef4_nic *efx)
+{
+       struct net_device *net_dev = efx->net_dev;
+       struct ef4_channel *channel;
+       int rc;
+
+       net_dev->watchdog_timeo = 5 * HZ;
+       net_dev->irq = efx->pci_dev->irq;
+       net_dev->netdev_ops = &ef4_netdev_ops;
+       net_dev->ethtool_ops = &ef4_ethtool_ops;
+       net_dev->gso_max_segs = EF4_TSO_MAX_SEGS;
+       net_dev->min_mtu = EF4_MIN_MTU;
+       net_dev->max_mtu = EF4_MAX_MTU;
+
+       rtnl_lock();
+
+       /* Enable resets to be scheduled and check whether any were
+        * already requested.  If so, the NIC is probably hosed so we
+        * abort.
+        */
+       efx->state = STATE_READY;
+       smp_mb(); /* ensure we change state before checking reset_pending */
+       if (efx->reset_pending) {
+               netif_err(efx, probe, efx->net_dev,
+                         "aborting probe due to scheduled reset\n");
+               rc = -EIO;
+               goto fail_locked;
+       }
+
+       rc = dev_alloc_name(net_dev, net_dev->name);
+       if (rc < 0)
+               goto fail_locked;
+       ef4_update_name(efx);
+
+       /* Always start with carrier off; PHY events will detect the link */
+       netif_carrier_off(net_dev);
+
+       rc = register_netdevice(net_dev);
+       if (rc)
+               goto fail_locked;
+
+       ef4_for_each_channel(channel, efx) {
+               struct ef4_tx_queue *tx_queue;
+               ef4_for_each_channel_tx_queue(tx_queue, channel)
+                       ef4_init_tx_queue_core_txq(tx_queue);
+       }
+
+       ef4_associate(efx);
+
+       rtnl_unlock();
+
+       rc = device_create_file(&efx->pci_dev->dev, &dev_attr_phy_type);
+       if (rc) {
+               netif_err(efx, drv, efx->net_dev,
+                         "failed to init net dev attributes\n");
+               goto fail_registered;
+       }
+       return 0;
+
+fail_registered:
+       rtnl_lock();
+       ef4_dissociate(efx);
+       unregister_netdevice(net_dev);
+fail_locked:
+       efx->state = STATE_UNINIT;
+       rtnl_unlock();
+       netif_err(efx, drv, efx->net_dev, "could not register net dev\n");
+       return rc;
+}
+
+static void ef4_unregister_netdev(struct ef4_nic *efx)
+{
+       if (!efx->net_dev)
+               return;
+
+       BUG_ON(netdev_priv(efx->net_dev) != efx);
+
+       if (ef4_dev_registered(efx)) {
+               strlcpy(efx->name, pci_name(efx->pci_dev), sizeof(efx->name));
+               device_remove_file(&efx->pci_dev->dev, &dev_attr_phy_type);
+               unregister_netdev(efx->net_dev);
+       }
+}
+
+/**************************************************************************
+ *
+ * Device reset and suspend
+ *
+ **************************************************************************/
+
+/* Tears down the entire software state and most of the hardware state
+ * before reset.  */
+void ef4_reset_down(struct ef4_nic *efx, enum reset_type method)
+{
+       EF4_ASSERT_RESET_SERIALISED(efx);
+
+       ef4_stop_all(efx);
+       ef4_disable_interrupts(efx);
+
+       mutex_lock(&efx->mac_lock);
+       if (efx->port_initialized && method != RESET_TYPE_INVISIBLE &&
+           method != RESET_TYPE_DATAPATH)
+               efx->phy_op->fini(efx);
+       efx->type->fini(efx);
+}
+
+/* This function will always ensure that the locks acquired in
+ * ef4_reset_down() are released. A failure return code indicates
+ * that we were unable to reinitialise the hardware, and the
+ * driver should be disabled. If ok is false, then the rx and tx
+ * engines are not restarted, pending a RESET_DISABLE. */
+int ef4_reset_up(struct ef4_nic *efx, enum reset_type method, bool ok)
+{
+       int rc;
+
+       EF4_ASSERT_RESET_SERIALISED(efx);
+
+       /* Ensure that SRAM is initialised even if we're disabling the device */
+       rc = efx->type->init(efx);
+       if (rc) {
+               netif_err(efx, drv, efx->net_dev, "failed to initialise NIC\n");
+               goto fail;
+       }
+
+       if (!ok)
+               goto fail;
+
+       if (efx->port_initialized && method != RESET_TYPE_INVISIBLE &&
+           method != RESET_TYPE_DATAPATH) {
+               rc = efx->phy_op->init(efx);
+               if (rc)
+                       goto fail;
+               rc = efx->phy_op->reconfigure(efx);
+               if (rc && rc != -EPERM)
+                       netif_err(efx, drv, efx->net_dev,
+                                 "could not restore PHY settings\n");
+       }
+
+       rc = ef4_enable_interrupts(efx);
+       if (rc)
+               goto fail;
+
+       down_read(&efx->filter_sem);
+       ef4_restore_filters(efx);
+       up_read(&efx->filter_sem);
+
+       mutex_unlock(&efx->mac_lock);
+
+       ef4_start_all(efx);
+
+       return 0;
+
+fail:
+       efx->port_initialized = false;
+
+       mutex_unlock(&efx->mac_lock);
+
+       return rc;
+}
+
+/* Reset the NIC using the specified method.  Note that the reset may
+ * fail, in which case the card will be left in an unusable state.
+ *
+ * Caller must hold the rtnl_lock.
+ */
+int ef4_reset(struct ef4_nic *efx, enum reset_type method)
+{
+       int rc, rc2;
+       bool disabled;
+
+       netif_info(efx, drv, efx->net_dev, "resetting (%s)\n",
+                  RESET_TYPE(method));
+
+       ef4_device_detach_sync(efx);
+       ef4_reset_down(efx, method);
+
+       rc = efx->type->reset(efx, method);
+       if (rc) {
+               netif_err(efx, drv, efx->net_dev, "failed to reset hardware\n");
+               goto out;
+       }
+
+       /* Clear flags for the scopes we covered.  We assume the NIC and
+        * driver are now quiescent so that there is no race here.
+        */
+       if (method < RESET_TYPE_MAX_METHOD)
+               efx->reset_pending &= -(1 << (method + 1));
+       else /* it doesn't fit into the well-ordered scope hierarchy */
+               __clear_bit(method, &efx->reset_pending);
+
+       /* Reinitialise bus-mastering, which may have been turned off before
+        * the reset was scheduled. This is still appropriate, even in the
+        * RESET_TYPE_DISABLE since this driver generally assumes the hardware
+        * can respond to requests. */
+       pci_set_master(efx->pci_dev);
+
+out:
+       /* Leave device stopped if necessary */
+       disabled = rc ||
+               method == RESET_TYPE_DISABLE ||
+               method == RESET_TYPE_RECOVER_OR_DISABLE;
+       rc2 = ef4_reset_up(efx, method, !disabled);
+       if (rc2) {
+               disabled = true;
+               if (!rc)
+                       rc = rc2;
+       }
+
+       if (disabled) {
+               dev_close(efx->net_dev);
+               netif_err(efx, drv, efx->net_dev, "has been disabled\n");
+               efx->state = STATE_DISABLED;
+       } else {
+               netif_dbg(efx, drv, efx->net_dev, "reset complete\n");
+               netif_device_attach(efx->net_dev);
+       }
+       return rc;
+}
+
+/* Try recovery mechanisms.
+ * For now only EEH is supported.
+ * Returns 0 if the recovery mechanisms are unsuccessful.
+ * Returns a non-zero value otherwise.
+ */
+int ef4_try_recovery(struct ef4_nic *efx)
+{
+#ifdef CONFIG_EEH
+       /* A PCI error can occur and not be seen by EEH because nothing
+        * happens on the PCI bus. In this case the driver may fail and
+        * schedule a 'recover or reset', leading to this recovery handler.
+        * Manually call the eeh failure check function.
+        */
+       struct eeh_dev *eehdev = pci_dev_to_eeh_dev(efx->pci_dev);
+       if (eeh_dev_check_failure(eehdev)) {
+               /* The EEH mechanisms will handle the error and reset the
+                * device if necessary.
+                */
+               return 1;
+       }
+#endif
+       return 0;
+}
+
+/* The worker thread exists so that code that cannot sleep can
+ * schedule a reset for later.
+ */
+static void ef4_reset_work(struct work_struct *data)
+{
+       struct ef4_nic *efx = container_of(data, struct ef4_nic, reset_work);
+       unsigned long pending;
+       enum reset_type method;
+
+       pending = ACCESS_ONCE(efx->reset_pending);
+       method = fls(pending) - 1;
+
+       if ((method == RESET_TYPE_RECOVER_OR_DISABLE ||
+            method == RESET_TYPE_RECOVER_OR_ALL) &&
+           ef4_try_recovery(efx))
+               return;
+
+       if (!pending)
+               return;
+
+       rtnl_lock();
+
+       /* We checked the state in ef4_schedule_reset() but it may
+        * have changed by now.  Now that we have the RTNL lock,
+        * it cannot change again.
+        */
+       if (efx->state == STATE_READY)
+               (void)ef4_reset(efx, method);
+
+       rtnl_unlock();
+}
+
+void ef4_schedule_reset(struct ef4_nic *efx, enum reset_type type)
+{
+       enum reset_type method;
+
+       if (efx->state == STATE_RECOVERY) {
+               netif_dbg(efx, drv, efx->net_dev,
+                         "recovering: skip scheduling %s reset\n",
+                         RESET_TYPE(type));
+               return;
+       }
+
+       switch (type) {
+       case RESET_TYPE_INVISIBLE:
+       case RESET_TYPE_ALL:
+       case RESET_TYPE_RECOVER_OR_ALL:
+       case RESET_TYPE_WORLD:
+       case RESET_TYPE_DISABLE:
+       case RESET_TYPE_RECOVER_OR_DISABLE:
+       case RESET_TYPE_DATAPATH:
+               method = type;
+               netif_dbg(efx, drv, efx->net_dev, "scheduling %s reset\n",
+                         RESET_TYPE(method));
+               break;
+       default:
+               method = efx->type->map_reset_reason(type);
+               netif_dbg(efx, drv, efx->net_dev,
+                         "scheduling %s reset for %s\n",
+                         RESET_TYPE(method), RESET_TYPE(type));
+               break;
+       }
+
+       set_bit(method, &efx->reset_pending);
+       smp_mb(); /* ensure we change reset_pending before checking state */
+
+       /* If we're not READY then just leave the flags set as the cue
+        * to abort probing or reschedule the reset later.
+        */
+       if (ACCESS_ONCE(efx->state) != STATE_READY)
+               return;
+
+       queue_work(reset_workqueue, &efx->reset_work);
+}
+
+/**************************************************************************
+ *
+ * List of NICs we support
+ *
+ **************************************************************************/
+
+/* PCI device ID table */
+static const struct pci_device_id ef4_pci_table[] = {
+       {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE,
+                   PCI_DEVICE_ID_SOLARFLARE_SFC4000A_0),
+        .driver_data = (unsigned long) &falcon_a1_nic_type},
+       {PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE,
+                   PCI_DEVICE_ID_SOLARFLARE_SFC4000B),
+        .driver_data = (unsigned long) &falcon_b0_nic_type},
+       {0}                     /* end of list */
+};
+
+/**************************************************************************
+ *
+ * Dummy PHY/MAC operations
+ *
+ * Can be used for some unimplemented operations
+ * Needed so all function pointers are valid and do not have to be tested
+ * before use
+ *
+ **************************************************************************/
+int ef4_port_dummy_op_int(struct ef4_nic *efx)
+{
+       return 0;
+}
+void ef4_port_dummy_op_void(struct ef4_nic *efx) {}
+
+static bool ef4_port_dummy_op_poll(struct ef4_nic *efx)
+{
+       return false;
+}
+
+static const struct ef4_phy_operations ef4_dummy_phy_operations = {
+       .init            = ef4_port_dummy_op_int,
+       .reconfigure     = ef4_port_dummy_op_int,
+       .poll            = ef4_port_dummy_op_poll,
+       .fini            = ef4_port_dummy_op_void,
+};
+
+/**************************************************************************
+ *
+ * Data housekeeping
+ *
+ **************************************************************************/
+
+/* This zeroes out and then fills in the invariants in a struct
+ * ef4_nic (including all sub-structures).
+ */
+static int ef4_init_struct(struct ef4_nic *efx,
+                          struct pci_dev *pci_dev, struct net_device *net_dev)
+{
+       int i;
+
+       /* Initialise common structures */
+       INIT_LIST_HEAD(&efx->node);
+       INIT_LIST_HEAD(&efx->secondary_list);
+       spin_lock_init(&efx->biu_lock);
+#ifdef CONFIG_SFC_FALCON_MTD
+       INIT_LIST_HEAD(&efx->mtd_list);
+#endif
+       INIT_WORK(&efx->reset_work, ef4_reset_work);
+       INIT_DELAYED_WORK(&efx->monitor_work, ef4_monitor);
+       INIT_DELAYED_WORK(&efx->selftest_work, ef4_selftest_async_work);
+       efx->pci_dev = pci_dev;
+       efx->msg_enable = debug;
+       efx->state = STATE_UNINIT;
+       strlcpy(efx->name, pci_name(pci_dev), sizeof(efx->name));
+
+       efx->net_dev = net_dev;
+       efx->rx_prefix_size = efx->type->rx_prefix_size;
+       efx->rx_ip_align =
+               NET_IP_ALIGN ? (efx->rx_prefix_size + NET_IP_ALIGN) % 4 : 0;
+       efx->rx_packet_hash_offset =
+               efx->type->rx_hash_offset - efx->type->rx_prefix_size;
+       efx->rx_packet_ts_offset =
+               efx->type->rx_ts_offset - efx->type->rx_prefix_size;
+       spin_lock_init(&efx->stats_lock);
+       mutex_init(&efx->mac_lock);
+       efx->phy_op = &ef4_dummy_phy_operations;
+       efx->mdio.dev = net_dev;
+       INIT_WORK(&efx->mac_work, ef4_mac_work);
+       init_waitqueue_head(&efx->flush_wq);
+
+       for (i = 0; i < EF4_MAX_CHANNELS; i++) {
+               efx->channel[i] = ef4_alloc_channel(efx, i, NULL);
+               if (!efx->channel[i])
+                       goto fail;
+               efx->msi_context[i].efx = efx;
+               efx->msi_context[i].index = i;
+       }
+
+       /* Higher numbered interrupt modes are less capable! */
+       efx->interrupt_mode = max(efx->type->max_interrupt_mode,
+                                 interrupt_mode);
+
+       /* Would be good to use the net_dev name, but we're too early */
+       snprintf(efx->workqueue_name, sizeof(efx->workqueue_name), "sfc%s",
+                pci_name(pci_dev));
+       efx->workqueue = create_singlethread_workqueue(efx->workqueue_name);
+       if (!efx->workqueue)
+               goto fail;
+
+       return 0;
+
+fail:
+       ef4_fini_struct(efx);
+       return -ENOMEM;
+}
+
+static void ef4_fini_struct(struct ef4_nic *efx)
+{
+       int i;
+
+       for (i = 0; i < EF4_MAX_CHANNELS; i++)
+               kfree(efx->channel[i]);
+
+       kfree(efx->vpd_sn);
+
+       if (efx->workqueue) {
+               destroy_workqueue(efx->workqueue);
+               efx->workqueue = NULL;
+       }
+}
+
+void ef4_update_sw_stats(struct ef4_nic *efx, u64 *stats)
+{
+       u64 n_rx_nodesc_trunc = 0;
+       struct ef4_channel *channel;
+
+       ef4_for_each_channel(channel, efx)
+               n_rx_nodesc_trunc += channel->n_rx_nodesc_trunc;
+       stats[GENERIC_STAT_rx_nodesc_trunc] = n_rx_nodesc_trunc;
+       stats[GENERIC_STAT_rx_noskb_drops] = atomic_read(&efx->n_rx_noskb_drops);
+}
+
+/**************************************************************************
+ *
+ * PCI interface
+ *
+ **************************************************************************/
+
+/* Main body of final NIC shutdown code
+ * This is called only at module unload (or hotplug removal).
+ */
+static void ef4_pci_remove_main(struct ef4_nic *efx)
+{
+       /* Flush reset_work. It can no longer be scheduled since we
+        * are not READY.
+        */
+       BUG_ON(efx->state == STATE_READY);
+       cancel_work_sync(&efx->reset_work);
+
+       ef4_disable_interrupts(efx);
+       ef4_nic_fini_interrupt(efx);
+       ef4_fini_port(efx);
+       efx->type->fini(efx);
+       ef4_fini_napi(efx);
+       ef4_remove_all(efx);
+}
+
+/* Final NIC shutdown
+ * This is called only at module unload (or hotplug removal).  A PF can call
+ * this on its VFs to ensure they are unbound first.
+ */
+static void ef4_pci_remove(struct pci_dev *pci_dev)
+{
+       struct ef4_nic *efx;
+
+       efx = pci_get_drvdata(pci_dev);
+       if (!efx)
+               return;
+
+       /* Mark the NIC as fini, then stop the interface */
+       rtnl_lock();
+       ef4_dissociate(efx);
+       dev_close(efx->net_dev);
+       ef4_disable_interrupts(efx);
+       efx->state = STATE_UNINIT;
+       rtnl_unlock();
+
+       ef4_unregister_netdev(efx);
+
+       ef4_mtd_remove(efx);
+
+       ef4_pci_remove_main(efx);
+
+       ef4_fini_io(efx);
+       netif_dbg(efx, drv, efx->net_dev, "shutdown successful\n");
+
+       ef4_fini_struct(efx);
+       free_netdev(efx->net_dev);
+
+       pci_disable_pcie_error_reporting(pci_dev);
+};
+
+/* NIC VPD information
+ * Called during probe to display the part number of the
+ * installed NIC.  VPD is potentially very large but this should
+ * always appear within the first 512 bytes.
+ */
+#define SFC_VPD_LEN 512
+static void ef4_probe_vpd_strings(struct ef4_nic *efx)
+{
+       struct pci_dev *dev = efx->pci_dev;
+       char vpd_data[SFC_VPD_LEN];
+       ssize_t vpd_size;
+       int ro_start, ro_size, i, j;
+
+       /* Get the vpd data from the device */
+       vpd_size = pci_read_vpd(dev, 0, sizeof(vpd_data), vpd_data);
+       if (vpd_size <= 0) {
+               netif_err(efx, drv, efx->net_dev, "Unable to read VPD\n");
+               return;
+       }
+
+       /* Get the Read only section */
+       ro_start = pci_vpd_find_tag(vpd_data, 0, vpd_size, PCI_VPD_LRDT_RO_DATA);
+       if (ro_start < 0) {
+               netif_err(efx, drv, efx->net_dev, "VPD Read-only not found\n");
+               return;
+       }
+
+       ro_size = pci_vpd_lrdt_size(&vpd_data[ro_start]);
+       j = ro_size;
+       i = ro_start + PCI_VPD_LRDT_TAG_SIZE;
+       if (i + j > vpd_size)
+               j = vpd_size - i;
+
+       /* Get the Part number */
+       i = pci_vpd_find_info_keyword(vpd_data, i, j, "PN");
+       if (i < 0) {
+               netif_err(efx, drv, efx->net_dev, "Part number not found\n");
+               return;
+       }
+
+       j = pci_vpd_info_field_size(&vpd_data[i]);
+       i += PCI_VPD_INFO_FLD_HDR_SIZE;
+       if (i + j > vpd_size) {
+               netif_err(efx, drv, efx->net_dev, "Incomplete part number\n");
+               return;
+       }
+
+       netif_info(efx, drv, efx->net_dev,
+                  "Part Number : %.*s\n", j, &vpd_data[i]);
+
+       i = ro_start + PCI_VPD_LRDT_TAG_SIZE;
+       j = ro_size;
+       i = pci_vpd_find_info_keyword(vpd_data, i, j, "SN");
+       if (i < 0) {
+               netif_err(efx, drv, efx->net_dev, "Serial number not found\n");
+               return;
+       }
+
+       j = pci_vpd_info_field_size(&vpd_data[i]);
+       i += PCI_VPD_INFO_FLD_HDR_SIZE;
+       if (i + j > vpd_size) {
+               netif_err(efx, drv, efx->net_dev, "Incomplete serial number\n");
+               return;
+       }
+
+       efx->vpd_sn = kmalloc(j + 1, GFP_KERNEL);
+       if (!efx->vpd_sn)
+               return;
+
+       snprintf(efx->vpd_sn, j + 1, "%s", &vpd_data[i]);
+}
+
+
+/* Main body of NIC initialisation
+ * This is called at module load (or hotplug insertion, theoretically).
+ */
+static int ef4_pci_probe_main(struct ef4_nic *efx)
+{
+       int rc;
+
+       /* Do start-of-day initialisation */
+       rc = ef4_probe_all(efx);
+       if (rc)
+               goto fail1;
+
+       ef4_init_napi(efx);
+
+       rc = efx->type->init(efx);
+       if (rc) {
+               netif_err(efx, probe, efx->net_dev,
+                         "failed to initialise NIC\n");
+               goto fail3;
+       }
+
+       rc = ef4_init_port(efx);
+       if (rc) {
+               netif_err(efx, probe, efx->net_dev,
+                         "failed to initialise port\n");
+               goto fail4;
+       }
+
+       rc = ef4_nic_init_interrupt(efx);
+       if (rc)
+               goto fail5;
+       rc = ef4_enable_interrupts(efx);
+       if (rc)
+               goto fail6;
+
+       return 0;
+
+ fail6:
+       ef4_nic_fini_interrupt(efx);
+ fail5:
+       ef4_fini_port(efx);
+ fail4:
+       efx->type->fini(efx);
+ fail3:
+       ef4_fini_napi(efx);
+       ef4_remove_all(efx);
+ fail1:
+       return rc;
+}
+
+/* NIC initialisation
+ *
+ * This is called at module load (or hotplug insertion,
+ * theoretically).  It sets up PCI mappings, resets the NIC,
+ * sets up and registers the network devices with the kernel and hooks
+ * the interrupt service routine.  It does not prepare the device for
+ * transmission; this is left to the first time one of the network
+ * interfaces is brought up (i.e. ef4_net_open).
+ */
+static int ef4_pci_probe(struct pci_dev *pci_dev,
+                        const struct pci_device_id *entry)
+{
+       struct net_device *net_dev;
+       struct ef4_nic *efx;
+       int rc;
+
+       /* Allocate and initialise a struct net_device and struct ef4_nic */
+       net_dev = alloc_etherdev_mqs(sizeof(*efx), EF4_MAX_CORE_TX_QUEUES,
+                                    EF4_MAX_RX_QUEUES);
+       if (!net_dev)
+               return -ENOMEM;
+       efx = netdev_priv(net_dev);
+       efx->type = (const struct ef4_nic_type *) entry->driver_data;
+       efx->fixed_features |= NETIF_F_HIGHDMA;
+
+       pci_set_drvdata(pci_dev, efx);
+       SET_NETDEV_DEV(net_dev, &pci_dev->dev);
+       rc = ef4_init_struct(efx, pci_dev, net_dev);
+       if (rc)
+               goto fail1;
+
+       netif_info(efx, probe, efx->net_dev,
+                  "Solarflare NIC detected\n");
+
+       ef4_probe_vpd_strings(efx);
+
+       /* Set up basic I/O (BAR mappings etc) */
+       rc = ef4_init_io(efx);
+       if (rc)
+               goto fail2;
+
+       rc = ef4_pci_probe_main(efx);
+       if (rc)
+               goto fail3;
+
+       net_dev->features |= (efx->type->offload_features | NETIF_F_SG |
+                             NETIF_F_RXCSUM);
+       /* Mask for features that also apply to VLAN devices */
+       net_dev->vlan_features |= (NETIF_F_HW_CSUM | NETIF_F_SG |
+                                  NETIF_F_HIGHDMA | NETIF_F_RXCSUM);
+
+       net_dev->hw_features = net_dev->features & ~efx->fixed_features;
+
+       /* Disable VLAN filtering by default.  It may be enforced if
+        * the feature is fixed (i.e. VLAN filters are required to
+        * receive VLAN tagged packets due to vPort restrictions).
+        */
+       net_dev->features &= ~NETIF_F_HW_VLAN_CTAG_FILTER;
+       net_dev->features |= efx->fixed_features;
+
+       rc = ef4_register_netdev(efx);
+       if (rc)
+               goto fail4;
+
+       netif_dbg(efx, probe, efx->net_dev, "initialisation successful\n");
+
+       /* Try to create MTDs, but allow this to fail */
+       rtnl_lock();
+       rc = ef4_mtd_probe(efx);
+       rtnl_unlock();
+       if (rc && rc != -EPERM)
+               netif_warn(efx, probe, efx->net_dev,
+                          "failed to create MTDs (%d)\n", rc);
+
+       rc = pci_enable_pcie_error_reporting(pci_dev);
+       if (rc && rc != -EINVAL)
+               netif_notice(efx, probe, efx->net_dev,
+                            "PCIE error reporting unavailable (%d).\n",
+                            rc);
+
+       return 0;
+
+ fail4:
+       ef4_pci_remove_main(efx);
+ fail3:
+       ef4_fini_io(efx);
+ fail2:
+       ef4_fini_struct(efx);
+ fail1:
+       WARN_ON(rc > 0);
+       netif_dbg(efx, drv, efx->net_dev, "initialisation failed. rc=%d\n", rc);
+       free_netdev(net_dev);
+       return rc;
+}
+
+static int ef4_pm_freeze(struct device *dev)
+{
+       struct ef4_nic *efx = pci_get_drvdata(to_pci_dev(dev));
+
+       rtnl_lock();
+
+       if (efx->state != STATE_DISABLED) {
+               efx->state = STATE_UNINIT;
+
+               ef4_device_detach_sync(efx);
+
+               ef4_stop_all(efx);
+               ef4_disable_interrupts(efx);
+       }
+
+       rtnl_unlock();
+
+       return 0;
+}
+
+static int ef4_pm_thaw(struct device *dev)
+{
+       int rc;
+       struct ef4_nic *efx = pci_get_drvdata(to_pci_dev(dev));
+
+       rtnl_lock();
+
+       if (efx->state != STATE_DISABLED) {
+               rc = ef4_enable_interrupts(efx);
+               if (rc)
+                       goto fail;
+
+               mutex_lock(&efx->mac_lock);
+               efx->phy_op->reconfigure(efx);
+               mutex_unlock(&efx->mac_lock);
+
+               ef4_start_all(efx);
+
+               netif_device_attach(efx->net_dev);
+
+               efx->state = STATE_READY;
+
+               efx->type->resume_wol(efx);
+       }
+
+       rtnl_unlock();
+
+       /* Reschedule any quenched resets scheduled during ef4_pm_freeze() */
+       queue_work(reset_workqueue, &efx->reset_work);
+
+       return 0;
+
+fail:
+       rtnl_unlock();
+
+       return rc;
+}
+
+static int ef4_pm_poweroff(struct device *dev)
+{
+       struct pci_dev *pci_dev = to_pci_dev(dev);
+       struct ef4_nic *efx = pci_get_drvdata(pci_dev);
+
+       efx->type->fini(efx);
+
+       efx->reset_pending = 0;
+
+       pci_save_state(pci_dev);
+       return pci_set_power_state(pci_dev, PCI_D3hot);
+}
+
+/* Used for both resume and restore */
+static int ef4_pm_resume(struct device *dev)
+{
+       struct pci_dev *pci_dev = to_pci_dev(dev);
+       struct ef4_nic *efx = pci_get_drvdata(pci_dev);
+       int rc;
+
+       rc = pci_set_power_state(pci_dev, PCI_D0);
+       if (rc)
+               return rc;
+       pci_restore_state(pci_dev);
+       rc = pci_enable_device(pci_dev);
+       if (rc)
+               return rc;
+       pci_set_master(efx->pci_dev);
+       rc = efx->type->reset(efx, RESET_TYPE_ALL);
+       if (rc)
+               return rc;
+       rc = efx->type->init(efx);
+       if (rc)
+               return rc;
+       rc = ef4_pm_thaw(dev);
+       return rc;
+}
+
+static int ef4_pm_suspend(struct device *dev)
+{
+       int rc;
+
+       ef4_pm_freeze(dev);
+       rc = ef4_pm_poweroff(dev);
+       if (rc)
+               ef4_pm_resume(dev);
+       return rc;
+}
+
+static const struct dev_pm_ops ef4_pm_ops = {
+       .suspend        = ef4_pm_suspend,
+       .resume         = ef4_pm_resume,
+       .freeze         = ef4_pm_freeze,
+       .thaw           = ef4_pm_thaw,
+       .poweroff       = ef4_pm_poweroff,
+       .restore        = ef4_pm_resume,
+};
+
+/* A PCI error affecting this device was detected.
+ * At this point MMIO and DMA may be disabled.
+ * Stop the software path and request a slot reset.
+ */
+static pci_ers_result_t ef4_io_error_detected(struct pci_dev *pdev,
+                                             enum pci_channel_state state)
+{
+       pci_ers_result_t status = PCI_ERS_RESULT_RECOVERED;
+       struct ef4_nic *efx = pci_get_drvdata(pdev);
+
+       if (state == pci_channel_io_perm_failure)
+               return PCI_ERS_RESULT_DISCONNECT;
+
+       rtnl_lock();
+
+       if (efx->state != STATE_DISABLED) {
+               efx->state = STATE_RECOVERY;
+               efx->reset_pending = 0;
+
+               ef4_device_detach_sync(efx);
+
+               ef4_stop_all(efx);
+               ef4_disable_interrupts(efx);
+
+               status = PCI_ERS_RESULT_NEED_RESET;
+       } else {
+               /* If the interface is disabled we don't want to do anything
+                * with it.
+                */
+               status = PCI_ERS_RESULT_RECOVERED;
+       }
+
+       rtnl_unlock();
+
+       pci_disable_device(pdev);
+
+       return status;
+}
+
+/* Fake a successful reset, which will be performed later in ef4_io_resume. */
+static pci_ers_result_t ef4_io_slot_reset(struct pci_dev *pdev)
+{
+       struct ef4_nic *efx = pci_get_drvdata(pdev);
+       pci_ers_result_t status = PCI_ERS_RESULT_RECOVERED;
+       int rc;
+
+       if (pci_enable_device(pdev)) {
+               netif_err(efx, hw, efx->net_dev,
+                         "Cannot re-enable PCI device after reset.\n");
+               status =  PCI_ERS_RESULT_DISCONNECT;
+       }
+
+       rc = pci_cleanup_aer_uncorrect_error_status(pdev);
+       if (rc) {
+               netif_err(efx, hw, efx->net_dev,
+               "pci_cleanup_aer_uncorrect_error_status failed (%d)\n", rc);
+               /* Non-fatal error. Continue. */
+       }
+
+       return status;
+}
+
+/* Perform the actual reset and resume I/O operations. */
+static void ef4_io_resume(struct pci_dev *pdev)
+{
+       struct ef4_nic *efx = pci_get_drvdata(pdev);
+       int rc;
+
+       rtnl_lock();
+
+       if (efx->state == STATE_DISABLED)
+               goto out;
+
+       rc = ef4_reset(efx, RESET_TYPE_ALL);
+       if (rc) {
+               netif_err(efx, hw, efx->net_dev,
+                         "ef4_reset failed after PCI error (%d)\n", rc);
+       } else {
+               efx->state = STATE_READY;
+               netif_dbg(efx, hw, efx->net_dev,
+                         "Done resetting and resuming IO after PCI error.\n");
+       }
+
+out:
+       rtnl_unlock();
+}
+
+/* For simplicity and reliability, we always require a slot reset and try to
+ * reset the hardware when a pci error affecting the device is detected.
+ * We leave both the link_reset and mmio_enabled callback unimplemented:
+ * with our request for slot reset the mmio_enabled callback will never be
+ * called, and the link_reset callback is not used by AER or EEH mechanisms.
+ */
+static const struct pci_error_handlers ef4_err_handlers = {
+       .error_detected = ef4_io_error_detected,
+       .slot_reset     = ef4_io_slot_reset,
+       .resume         = ef4_io_resume,
+};
+
+static struct pci_driver ef4_pci_driver = {
+       .name           = KBUILD_MODNAME,
+       .id_table       = ef4_pci_table,
+       .probe          = ef4_pci_probe,
+       .remove         = ef4_pci_remove,
+       .driver.pm      = &ef4_pm_ops,
+       .err_handler    = &ef4_err_handlers,
+};
+
+/**************************************************************************
+ *
+ * Kernel module interface
+ *
+ *************************************************************************/
+
+module_param(interrupt_mode, uint, 0444);
+MODULE_PARM_DESC(interrupt_mode,
+                "Interrupt mode (0=>MSIX 1=>MSI 2=>legacy)");
+
+static int __init ef4_init_module(void)
+{
+       int rc;
+
+       printk(KERN_INFO "Solarflare Falcon driver v" EF4_DRIVER_VERSION "\n");
+
+       rc = register_netdevice_notifier(&ef4_netdev_notifier);
+       if (rc)
+               goto err_notifier;
+
+       reset_workqueue = create_singlethread_workqueue("sfc_reset");
+       if (!reset_workqueue) {
+               rc = -ENOMEM;
+               goto err_reset;
+       }
+
+       rc = pci_register_driver(&ef4_pci_driver);
+       if (rc < 0)
+               goto err_pci;
+
+       return 0;
+
+ err_pci:
+       destroy_workqueue(reset_workqueue);
+ err_reset:
+       unregister_netdevice_notifier(&ef4_netdev_notifier);
+ err_notifier:
+       return rc;
+}
+
+static void __exit ef4_exit_module(void)
+{
+       printk(KERN_INFO "Solarflare Falcon driver unloading\n");
+
+       pci_unregister_driver(&ef4_pci_driver);
+       destroy_workqueue(reset_workqueue);
+       unregister_netdevice_notifier(&ef4_netdev_notifier);
+
+}
+
+module_init(ef4_init_module);
+module_exit(ef4_exit_module);
+
+MODULE_AUTHOR("Solarflare Communications and "
+             "Michael Brown <mbrown@fensystems.co.uk>");
+MODULE_DESCRIPTION("Solarflare Falcon network driver");
+MODULE_LICENSE("GPL");
+MODULE_DEVICE_TABLE(pci, ef4_pci_table);
diff --git a/drivers/net/ethernet/sfc/falcon/efx.h b/drivers/net/ethernet/sfc/falcon/efx.h
new file mode 100644 (file)
index 0000000..c89456f
--- /dev/null
@@ -0,0 +1,277 @@
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2005-2006 Fen Systems Ltd.
+ * Copyright 2006-2013 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#ifndef EF4_EFX_H
+#define EF4_EFX_H
+
+#include "net_driver.h"
+#include "filter.h"
+
+/* All controllers use BAR 0 for I/O space and BAR 2(&3) for memory */
+/* All VFs use BAR 0/1 for memory */
+#define EF4_MEM_BAR 2
+#define EF4_MEM_VF_BAR 0
+
+int ef4_net_open(struct net_device *net_dev);
+int ef4_net_stop(struct net_device *net_dev);
+
+/* TX */
+int ef4_probe_tx_queue(struct ef4_tx_queue *tx_queue);
+void ef4_remove_tx_queue(struct ef4_tx_queue *tx_queue);
+void ef4_init_tx_queue(struct ef4_tx_queue *tx_queue);
+void ef4_init_tx_queue_core_txq(struct ef4_tx_queue *tx_queue);
+void ef4_fini_tx_queue(struct ef4_tx_queue *tx_queue);
+netdev_tx_t ef4_hard_start_xmit(struct sk_buff *skb,
+                               struct net_device *net_dev);
+netdev_tx_t ef4_enqueue_skb(struct ef4_tx_queue *tx_queue, struct sk_buff *skb);
+void ef4_xmit_done(struct ef4_tx_queue *tx_queue, unsigned int index);
+int ef4_setup_tc(struct net_device *net_dev, u32 handle, __be16 proto,
+                struct tc_to_netdev *tc);
+unsigned int ef4_tx_max_skb_descs(struct ef4_nic *efx);
+extern bool ef4_separate_tx_channels;
+
+/* RX */
+void ef4_set_default_rx_indir_table(struct ef4_nic *efx);
+void ef4_rx_config_page_split(struct ef4_nic *efx);
+int ef4_probe_rx_queue(struct ef4_rx_queue *rx_queue);
+void ef4_remove_rx_queue(struct ef4_rx_queue *rx_queue);
+void ef4_init_rx_queue(struct ef4_rx_queue *rx_queue);
+void ef4_fini_rx_queue(struct ef4_rx_queue *rx_queue);
+void ef4_fast_push_rx_descriptors(struct ef4_rx_queue *rx_queue, bool atomic);
+void ef4_rx_slow_fill(unsigned long context);
+void __ef4_rx_packet(struct ef4_channel *channel);
+void ef4_rx_packet(struct ef4_rx_queue *rx_queue, unsigned int index,
+                  unsigned int n_frags, unsigned int len, u16 flags);
+static inline void ef4_rx_flush_packet(struct ef4_channel *channel)
+{
+       if (channel->rx_pkt_n_frags)
+               __ef4_rx_packet(channel);
+}
+void ef4_schedule_slow_fill(struct ef4_rx_queue *rx_queue);
+
+#define EF4_MAX_DMAQ_SIZE 4096UL
+#define EF4_DEFAULT_DMAQ_SIZE 1024UL
+#define EF4_MIN_DMAQ_SIZE 512UL
+
+#define EF4_MAX_EVQ_SIZE 16384UL
+#define EF4_MIN_EVQ_SIZE 512UL
+
+/* Maximum number of TCP segments we support for soft-TSO */
+#define EF4_TSO_MAX_SEGS       100
+
+/* The smallest [rt]xq_entries that the driver supports.  RX minimum
+ * is a bit arbitrary.  For TX, we must have space for at least 2
+ * TSO skbs.
+ */
+#define EF4_RXQ_MIN_ENT                128U
+#define EF4_TXQ_MIN_ENT(efx)   (2 * ef4_tx_max_skb_descs(efx))
+
+static inline bool ef4_rss_enabled(struct ef4_nic *efx)
+{
+       return efx->rss_spread > 1;
+}
+
+/* Filters */
+
+void ef4_mac_reconfigure(struct ef4_nic *efx);
+
+/**
+ * ef4_filter_insert_filter - add or replace a filter
+ * @efx: NIC in which to insert the filter
+ * @spec: Specification for the filter
+ * @replace_equal: Flag for whether the specified filter may replace an
+ *     existing filter with equal priority
+ *
+ * On success, return the filter ID.
+ * On failure, return a negative error code.
+ *
+ * If existing filters have equal match values to the new filter spec,
+ * then the new filter might replace them or the function might fail,
+ * as follows.
+ *
+ * 1. If the existing filters have lower priority, or @replace_equal
+ *    is set and they have equal priority, replace them.
+ *
+ * 2. If the existing filters have higher priority, return -%EPERM.
+ *
+ * 3. If !ef4_filter_is_mc_recipient(@spec), or the NIC does not
+ *    support delivery to multiple recipients, return -%EEXIST.
+ *
+ * This implies that filters for multiple multicast recipients must
+ * all be inserted with the same priority and @replace_equal = %false.
+ */
+static inline s32 ef4_filter_insert_filter(struct ef4_nic *efx,
+                                          struct ef4_filter_spec *spec,
+                                          bool replace_equal)
+{
+       return efx->type->filter_insert(efx, spec, replace_equal);
+}
+
+/**
+ * ef4_filter_remove_id_safe - remove a filter by ID, carefully
+ * @efx: NIC from which to remove the filter
+ * @priority: Priority of filter, as passed to @ef4_filter_insert_filter
+ * @filter_id: ID of filter, as returned by @ef4_filter_insert_filter
+ *
+ * This function will range-check @filter_id, so it is safe to call
+ * with a value passed from userland.
+ */
+static inline int ef4_filter_remove_id_safe(struct ef4_nic *efx,
+                                           enum ef4_filter_priority priority,
+                                           u32 filter_id)
+{
+       return efx->type->filter_remove_safe(efx, priority, filter_id);
+}
+
+/**
+ * ef4_filter_get_filter_safe - retrieve a filter by ID, carefully
+ * @efx: NIC from which to remove the filter
+ * @priority: Priority of filter, as passed to @ef4_filter_insert_filter
+ * @filter_id: ID of filter, as returned by @ef4_filter_insert_filter
+ * @spec: Buffer in which to store filter specification
+ *
+ * This function will range-check @filter_id, so it is safe to call
+ * with a value passed from userland.
+ */
+static inline int
+ef4_filter_get_filter_safe(struct ef4_nic *efx,
+                          enum ef4_filter_priority priority,
+                          u32 filter_id, struct ef4_filter_spec *spec)
+{
+       return efx->type->filter_get_safe(efx, priority, filter_id, spec);
+}
+
+static inline u32 ef4_filter_count_rx_used(struct ef4_nic *efx,
+                                          enum ef4_filter_priority priority)
+{
+       return efx->type->filter_count_rx_used(efx, priority);
+}
+static inline u32 ef4_filter_get_rx_id_limit(struct ef4_nic *efx)
+{
+       return efx->type->filter_get_rx_id_limit(efx);
+}
+static inline s32 ef4_filter_get_rx_ids(struct ef4_nic *efx,
+                                       enum ef4_filter_priority priority,
+                                       u32 *buf, u32 size)
+{
+       return efx->type->filter_get_rx_ids(efx, priority, buf, size);
+}
+#ifdef CONFIG_RFS_ACCEL
+int ef4_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb,
+                  u16 rxq_index, u32 flow_id);
+bool __ef4_filter_rfs_expire(struct ef4_nic *efx, unsigned quota);
+static inline void ef4_filter_rfs_expire(struct ef4_channel *channel)
+{
+       if (channel->rfs_filters_added >= 60 &&
+           __ef4_filter_rfs_expire(channel->efx, 100))
+               channel->rfs_filters_added -= 60;
+}
+#define ef4_filter_rfs_enabled() 1
+#else
+static inline void ef4_filter_rfs_expire(struct ef4_channel *channel) {}
+#define ef4_filter_rfs_enabled() 0
+#endif
+bool ef4_filter_is_mc_recipient(const struct ef4_filter_spec *spec);
+
+/* Channels */
+int ef4_channel_dummy_op_int(struct ef4_channel *channel);
+void ef4_channel_dummy_op_void(struct ef4_channel *channel);
+int ef4_realloc_channels(struct ef4_nic *efx, u32 rxq_entries, u32 txq_entries);
+
+/* Ports */
+int ef4_reconfigure_port(struct ef4_nic *efx);
+int __ef4_reconfigure_port(struct ef4_nic *efx);
+
+/* Ethtool support */
+extern const struct ethtool_ops ef4_ethtool_ops;
+
+/* Reset handling */
+int ef4_reset(struct ef4_nic *efx, enum reset_type method);
+void ef4_reset_down(struct ef4_nic *efx, enum reset_type method);
+int ef4_reset_up(struct ef4_nic *efx, enum reset_type method, bool ok);
+int ef4_try_recovery(struct ef4_nic *efx);
+
+/* Global */
+void ef4_schedule_reset(struct ef4_nic *efx, enum reset_type type);
+unsigned int ef4_usecs_to_ticks(struct ef4_nic *efx, unsigned int usecs);
+unsigned int ef4_ticks_to_usecs(struct ef4_nic *efx, unsigned int ticks);
+int ef4_init_irq_moderation(struct ef4_nic *efx, unsigned int tx_usecs,
+                           unsigned int rx_usecs, bool rx_adaptive,
+                           bool rx_may_override_tx);
+void ef4_get_irq_moderation(struct ef4_nic *efx, unsigned int *tx_usecs,
+                           unsigned int *rx_usecs, bool *rx_adaptive);
+void ef4_stop_eventq(struct ef4_channel *channel);
+void ef4_start_eventq(struct ef4_channel *channel);
+
+/* Dummy PHY ops for PHY drivers */
+int ef4_port_dummy_op_int(struct ef4_nic *efx);
+void ef4_port_dummy_op_void(struct ef4_nic *efx);
+
+/* Update the generic software stats in the passed stats array */
+void ef4_update_sw_stats(struct ef4_nic *efx, u64 *stats);
+
+/* MTD */
+#ifdef CONFIG_SFC_FALCON_MTD
+int ef4_mtd_add(struct ef4_nic *efx, struct ef4_mtd_partition *parts,
+               size_t n_parts, size_t sizeof_part);
+static inline int ef4_mtd_probe(struct ef4_nic *efx)
+{
+       return efx->type->mtd_probe(efx);
+}
+void ef4_mtd_rename(struct ef4_nic *efx);
+void ef4_mtd_remove(struct ef4_nic *efx);
+#else
+static inline int ef4_mtd_probe(struct ef4_nic *efx) { return 0; }
+static inline void ef4_mtd_rename(struct ef4_nic *efx) {}
+static inline void ef4_mtd_remove(struct ef4_nic *efx) {}
+#endif
+
+static inline void ef4_schedule_channel(struct ef4_channel *channel)
+{
+       netif_vdbg(channel->efx, intr, channel->efx->net_dev,
+                  "channel %d scheduling NAPI poll on CPU%d\n",
+                  channel->channel, raw_smp_processor_id());
+
+       napi_schedule(&channel->napi_str);
+}
+
+static inline void ef4_schedule_channel_irq(struct ef4_channel *channel)
+{
+       channel->event_test_cpu = raw_smp_processor_id();
+       ef4_schedule_channel(channel);
+}
+
+void ef4_link_status_changed(struct ef4_nic *efx);
+void ef4_link_set_advertising(struct ef4_nic *efx, u32);
+void ef4_link_set_wanted_fc(struct ef4_nic *efx, u8);
+
+static inline void ef4_device_detach_sync(struct ef4_nic *efx)
+{
+       struct net_device *dev = efx->net_dev;
+
+       /* Lock/freeze all TX queues so that we can be sure the
+        * TX scheduler is stopped when we're done and before
+        * netif_device_present() becomes false.
+        */
+       netif_tx_lock_bh(dev);
+       netif_device_detach(dev);
+       netif_tx_unlock_bh(dev);
+}
+
+static inline bool ef4_rwsem_assert_write_locked(struct rw_semaphore *sem)
+{
+       if (WARN_ON(down_read_trylock(sem))) {
+               up_read(sem);
+               return false;
+       }
+       return true;
+}
+
+#endif /* EF4_EFX_H */
diff --git a/drivers/net/ethernet/sfc/falcon/enum.h b/drivers/net/ethernet/sfc/falcon/enum.h
new file mode 100644 (file)
index 0000000..30a1136
--- /dev/null
@@ -0,0 +1,171 @@
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2007-2013 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#ifndef EF4_ENUM_H
+#define EF4_ENUM_H
+
+/**
+ * enum ef4_loopback_mode - loopback modes
+ * @LOOPBACK_NONE: no loopback
+ * @LOOPBACK_DATA: data path loopback
+ * @LOOPBACK_GMAC: loopback within GMAC
+ * @LOOPBACK_XGMII: loopback after XMAC
+ * @LOOPBACK_XGXS: loopback within BPX after XGXS
+ * @LOOPBACK_XAUI: loopback within BPX before XAUI serdes
+ * @LOOPBACK_GMII: loopback within BPX after GMAC
+ * @LOOPBACK_SGMII: loopback within BPX within SGMII
+ * @LOOPBACK_XGBR: loopback within BPX within XGBR
+ * @LOOPBACK_XFI: loopback within BPX before XFI serdes
+ * @LOOPBACK_XAUI_FAR: loopback within BPX after XAUI serdes
+ * @LOOPBACK_GMII_FAR: loopback within BPX before SGMII
+ * @LOOPBACK_SGMII_FAR: loopback within BPX after SGMII
+ * @LOOPBACK_XFI_FAR: loopback after XFI serdes
+ * @LOOPBACK_GPHY: loopback within 1G PHY at unspecified level
+ * @LOOPBACK_PHYXS: loopback within 10G PHY at PHYXS level
+ * @LOOPBACK_PCS: loopback within 10G PHY at PCS level
+ * @LOOPBACK_PMAPMD: loopback within 10G PHY at PMAPMD level
+ * @LOOPBACK_XPORT: cross port loopback
+ * @LOOPBACK_XGMII_WS: wireside loopback excluding XMAC
+ * @LOOPBACK_XAUI_WS: wireside loopback within BPX within XAUI serdes
+ * @LOOPBACK_XAUI_WS_FAR: wireside loopback within BPX including XAUI serdes
+ * @LOOPBACK_XAUI_WS_NEAR: wireside loopback within BPX excluding XAUI serdes
+ * @LOOPBACK_GMII_WS: wireside loopback excluding GMAC
+ * @LOOPBACK_XFI_WS: wireside loopback excluding XFI serdes
+ * @LOOPBACK_XFI_WS_FAR: wireside loopback including XFI serdes
+ * @LOOPBACK_PHYXS_WS: wireside loopback within 10G PHY at PHYXS level
+ */
+/* Please keep up-to-date w.r.t the following two #defines */
+enum ef4_loopback_mode {
+       LOOPBACK_NONE = 0,
+       LOOPBACK_DATA = 1,
+       LOOPBACK_GMAC = 2,
+       LOOPBACK_XGMII = 3,
+       LOOPBACK_XGXS = 4,
+       LOOPBACK_XAUI = 5,
+       LOOPBACK_GMII = 6,
+       LOOPBACK_SGMII = 7,
+       LOOPBACK_XGBR = 8,
+       LOOPBACK_XFI = 9,
+       LOOPBACK_XAUI_FAR = 10,
+       LOOPBACK_GMII_FAR = 11,
+       LOOPBACK_SGMII_FAR = 12,
+       LOOPBACK_XFI_FAR = 13,
+       LOOPBACK_GPHY = 14,
+       LOOPBACK_PHYXS = 15,
+       LOOPBACK_PCS = 16,
+       LOOPBACK_PMAPMD = 17,
+       LOOPBACK_XPORT = 18,
+       LOOPBACK_XGMII_WS = 19,
+       LOOPBACK_XAUI_WS = 20,
+       LOOPBACK_XAUI_WS_FAR = 21,
+       LOOPBACK_XAUI_WS_NEAR = 22,
+       LOOPBACK_GMII_WS = 23,
+       LOOPBACK_XFI_WS = 24,
+       LOOPBACK_XFI_WS_FAR = 25,
+       LOOPBACK_PHYXS_WS = 26,
+       LOOPBACK_MAX
+};
+#define LOOPBACK_TEST_MAX LOOPBACK_PMAPMD
+
+/* These loopbacks occur within the controller */
+#define LOOPBACKS_INTERNAL ((1 << LOOPBACK_DATA) |             \
+                           (1 << LOOPBACK_GMAC) |              \
+                           (1 << LOOPBACK_XGMII)|              \
+                           (1 << LOOPBACK_XGXS) |              \
+                           (1 << LOOPBACK_XAUI) |              \
+                           (1 << LOOPBACK_GMII) |              \
+                           (1 << LOOPBACK_SGMII) |             \
+                           (1 << LOOPBACK_SGMII) |             \
+                           (1 << LOOPBACK_XGBR) |              \
+                           (1 << LOOPBACK_XFI) |               \
+                           (1 << LOOPBACK_XAUI_FAR) |          \
+                           (1 << LOOPBACK_GMII_FAR) |          \
+                           (1 << LOOPBACK_SGMII_FAR) |         \
+                           (1 << LOOPBACK_XFI_FAR) |           \
+                           (1 << LOOPBACK_XGMII_WS) |          \
+                           (1 << LOOPBACK_XAUI_WS) |           \
+                           (1 << LOOPBACK_XAUI_WS_FAR) |       \
+                           (1 << LOOPBACK_XAUI_WS_NEAR) |      \
+                           (1 << LOOPBACK_GMII_WS) |           \
+                           (1 << LOOPBACK_XFI_WS) |            \
+                           (1 << LOOPBACK_XFI_WS_FAR))
+
+#define LOOPBACKS_WS ((1 << LOOPBACK_XGMII_WS) |               \
+                     (1 << LOOPBACK_XAUI_WS) |                 \
+                     (1 << LOOPBACK_XAUI_WS_FAR) |             \
+                     (1 << LOOPBACK_XAUI_WS_NEAR) |            \
+                     (1 << LOOPBACK_GMII_WS) |                 \
+                     (1 << LOOPBACK_XFI_WS) |                  \
+                     (1 << LOOPBACK_XFI_WS_FAR) |              \
+                     (1 << LOOPBACK_PHYXS_WS))
+
+#define LOOPBACKS_EXTERNAL(_efx)                                       \
+       ((_efx)->loopback_modes & ~LOOPBACKS_INTERNAL &                 \
+        ~(1 << LOOPBACK_NONE))
+
+#define LOOPBACK_MASK(_efx)                    \
+       (1 << (_efx)->loopback_mode)
+
+#define LOOPBACK_INTERNAL(_efx)                                \
+       (!!(LOOPBACKS_INTERNAL & LOOPBACK_MASK(_efx)))
+
+#define LOOPBACK_EXTERNAL(_efx)                                \
+       (!!(LOOPBACK_MASK(_efx) & LOOPBACKS_EXTERNAL(_efx)))
+
+#define LOOPBACK_CHANGED(_from, _to, _mask)                            \
+       (!!((LOOPBACK_MASK(_from) ^ LOOPBACK_MASK(_to)) & (_mask)))
+
+#define LOOPBACK_OUT_OF(_from, _to, _mask)                             \
+       ((LOOPBACK_MASK(_from) & (_mask)) && !(LOOPBACK_MASK(_to) & (_mask)))
+
+/*****************************************************************************/
+
+/**
+ * enum reset_type - reset types
+ *
+ * %RESET_TYPE_INVSIBLE, %RESET_TYPE_ALL, %RESET_TYPE_WORLD and
+ * %RESET_TYPE_DISABLE specify the method/scope of the reset.  The
+ * other valuesspecify reasons, which ef4_schedule_reset() will choose
+ * a method for.
+ *
+ * Reset methods are numbered in order of increasing scope.
+ *
+ * @RESET_TYPE_INVISIBLE: Reset datapath and MAC
+ * @RESET_TYPE_RECOVER_OR_ALL: Try to recover. Apply RESET_TYPE_ALL
+ * if unsuccessful.
+ * @RESET_TYPE_ALL: Reset datapath, MAC and PHY
+ * @RESET_TYPE_WORLD: Reset as much as possible
+ * @RESET_TYPE_RECOVER_OR_DISABLE: Try to recover. Apply RESET_TYPE_DISABLE if
+ * unsuccessful.
+ * @RESET_TYPE_DATAPATH: Reset datapath only.
+ * @RESET_TYPE_DISABLE: Reset datapath, MAC and PHY; leave NIC disabled
+ * @RESET_TYPE_TX_WATCHDOG: reset due to TX watchdog
+ * @RESET_TYPE_INT_ERROR: reset due to internal error
+ * @RESET_TYPE_RX_RECOVERY: reset to recover from RX datapath errors
+ * @RESET_TYPE_DMA_ERROR: DMA error
+ * @RESET_TYPE_TX_SKIP: hardware completed empty tx descriptors
+ */
+enum reset_type {
+       RESET_TYPE_INVISIBLE,
+       RESET_TYPE_RECOVER_OR_ALL,
+       RESET_TYPE_ALL,
+       RESET_TYPE_WORLD,
+       RESET_TYPE_RECOVER_OR_DISABLE,
+       RESET_TYPE_DATAPATH,
+       RESET_TYPE_DISABLE,
+       RESET_TYPE_MAX_METHOD,
+       RESET_TYPE_TX_WATCHDOG,
+       RESET_TYPE_INT_ERROR,
+       RESET_TYPE_RX_RECOVERY,
+       RESET_TYPE_DMA_ERROR,
+       RESET_TYPE_TX_SKIP,
+       RESET_TYPE_MAX,
+};
+
+#endif /* EF4_ENUM_H */
diff --git a/drivers/net/ethernet/sfc/falcon/ethtool.c b/drivers/net/ethernet/sfc/falcon/ethtool.c
new file mode 100644 (file)
index 0000000..8e1929b
--- /dev/null
@@ -0,0 +1,1343 @@
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2005-2006 Fen Systems Ltd.
+ * Copyright 2006-2013 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#include <linux/netdevice.h>
+#include <linux/ethtool.h>
+#include <linux/rtnetlink.h>
+#include <linux/in.h>
+#include "net_driver.h"
+#include "workarounds.h"
+#include "selftest.h"
+#include "efx.h"
+#include "filter.h"
+#include "nic.h"
+
+struct ef4_sw_stat_desc {
+       const char *name;
+       enum {
+               EF4_ETHTOOL_STAT_SOURCE_nic,
+               EF4_ETHTOOL_STAT_SOURCE_channel,
+               EF4_ETHTOOL_STAT_SOURCE_tx_queue
+       } source;
+       unsigned offset;
+       u64(*get_stat) (void *field); /* Reader function */
+};
+
+/* Initialiser for a struct ef4_sw_stat_desc with type-checking */
+#define EF4_ETHTOOL_STAT(stat_name, source_name, field, field_type, \
+                               get_stat_function) {                    \
+       .name = #stat_name,                                             \
+       .source = EF4_ETHTOOL_STAT_SOURCE_##source_name,                \
+       .offset = ((((field_type *) 0) ==                               \
+                     &((struct ef4_##source_name *)0)->field) ?        \
+                   offsetof(struct ef4_##source_name, field) :         \
+                   offsetof(struct ef4_##source_name, field)),         \
+       .get_stat = get_stat_function,                                  \
+}
+
+static u64 ef4_get_uint_stat(void *field)
+{
+       return *(unsigned int *)field;
+}
+
+static u64 ef4_get_atomic_stat(void *field)
+{
+       return atomic_read((atomic_t *) field);
+}
+
+#define EF4_ETHTOOL_ATOMIC_NIC_ERROR_STAT(field)               \
+       EF4_ETHTOOL_STAT(field, nic, field,                     \
+                        atomic_t, ef4_get_atomic_stat)
+
+#define EF4_ETHTOOL_UINT_CHANNEL_STAT(field)                   \
+       EF4_ETHTOOL_STAT(field, channel, n_##field,             \
+                        unsigned int, ef4_get_uint_stat)
+
+#define EF4_ETHTOOL_UINT_TXQ_STAT(field)                       \
+       EF4_ETHTOOL_STAT(tx_##field, tx_queue, field,           \
+                        unsigned int, ef4_get_uint_stat)
+
+static const struct ef4_sw_stat_desc ef4_sw_stat_desc[] = {
+       EF4_ETHTOOL_UINT_TXQ_STAT(merge_events),
+       EF4_ETHTOOL_UINT_TXQ_STAT(pushes),
+       EF4_ETHTOOL_UINT_TXQ_STAT(cb_packets),
+       EF4_ETHTOOL_ATOMIC_NIC_ERROR_STAT(rx_reset),
+       EF4_ETHTOOL_UINT_CHANNEL_STAT(rx_tobe_disc),
+       EF4_ETHTOOL_UINT_CHANNEL_STAT(rx_ip_hdr_chksum_err),
+       EF4_ETHTOOL_UINT_CHANNEL_STAT(rx_tcp_udp_chksum_err),
+       EF4_ETHTOOL_UINT_CHANNEL_STAT(rx_mcast_mismatch),
+       EF4_ETHTOOL_UINT_CHANNEL_STAT(rx_frm_trunc),
+       EF4_ETHTOOL_UINT_CHANNEL_STAT(rx_merge_events),
+       EF4_ETHTOOL_UINT_CHANNEL_STAT(rx_merge_packets),
+};
+
+#define EF4_ETHTOOL_SW_STAT_COUNT ARRAY_SIZE(ef4_sw_stat_desc)
+
+#define EF4_ETHTOOL_EEPROM_MAGIC 0xEFAB
+
+/**************************************************************************
+ *
+ * Ethtool operations
+ *
+ **************************************************************************
+ */
+
+/* Identify device by flashing LEDs */
+static int ef4_ethtool_phys_id(struct net_device *net_dev,
+                              enum ethtool_phys_id_state state)
+{
+       struct ef4_nic *efx = netdev_priv(net_dev);
+       enum ef4_led_mode mode = EF4_LED_DEFAULT;
+
+       switch (state) {
+       case ETHTOOL_ID_ON:
+               mode = EF4_LED_ON;
+               break;
+       case ETHTOOL_ID_OFF:
+               mode = EF4_LED_OFF;
+               break;
+       case ETHTOOL_ID_INACTIVE:
+               mode = EF4_LED_DEFAULT;
+               break;
+       case ETHTOOL_ID_ACTIVE:
+               return 1;       /* cycle on/off once per second */
+       }
+
+       efx->type->set_id_led(efx, mode);
+       return 0;
+}
+
+/* This must be called with rtnl_lock held. */
+static int ef4_ethtool_get_settings(struct net_device *net_dev,
+                                   struct ethtool_cmd *ecmd)
+{
+       struct ef4_nic *efx = netdev_priv(net_dev);
+       struct ef4_link_state *link_state = &efx->link_state;
+
+       mutex_lock(&efx->mac_lock);
+       efx->phy_op->get_settings(efx, ecmd);
+       mutex_unlock(&efx->mac_lock);
+
+       /* Both MACs support pause frames (bidirectional and respond-only) */
+       ecmd->supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause;
+
+       if (LOOPBACK_INTERNAL(efx)) {
+               ethtool_cmd_speed_set(ecmd, link_state->speed);
+               ecmd->duplex = link_state->fd ? DUPLEX_FULL : DUPLEX_HALF;
+       }
+
+       return 0;
+}
+
+/* This must be called with rtnl_lock held. */
+static int ef4_ethtool_set_settings(struct net_device *net_dev,
+                                   struct ethtool_cmd *ecmd)
+{
+       struct ef4_nic *efx = netdev_priv(net_dev);
+       int rc;
+
+       /* GMAC does not support 1000Mbps HD */
+       if ((ethtool_cmd_speed(ecmd) == SPEED_1000) &&
+           (ecmd->duplex != DUPLEX_FULL)) {
+               netif_dbg(efx, drv, efx->net_dev,
+                         "rejecting unsupported 1000Mbps HD setting\n");
+               return -EINVAL;
+       }
+
+       mutex_lock(&efx->mac_lock);
+       rc = efx->phy_op->set_settings(efx, ecmd);
+       mutex_unlock(&efx->mac_lock);
+       return rc;
+}
+
+static void ef4_ethtool_get_drvinfo(struct net_device *net_dev,
+                                   struct ethtool_drvinfo *info)
+{
+       struct ef4_nic *efx = netdev_priv(net_dev);
+
+       strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver));
+       strlcpy(info->version, EF4_DRIVER_VERSION, sizeof(info->version));
+       strlcpy(info->bus_info, pci_name(efx->pci_dev), sizeof(info->bus_info));
+}
+
+static int ef4_ethtool_get_regs_len(struct net_device *net_dev)
+{
+       return ef4_nic_get_regs_len(netdev_priv(net_dev));
+}
+
+static void ef4_ethtool_get_regs(struct net_device *net_dev,
+                                struct ethtool_regs *regs, void *buf)
+{
+       struct ef4_nic *efx = netdev_priv(net_dev);
+
+       regs->version = efx->type->revision;
+       ef4_nic_get_regs(efx, buf);
+}
+
+static u32 ef4_ethtool_get_msglevel(struct net_device *net_dev)
+{
+       struct ef4_nic *efx = netdev_priv(net_dev);
+       return efx->msg_enable;
+}
+
+static void ef4_ethtool_set_msglevel(struct net_device *net_dev, u32 msg_enable)
+{
+       struct ef4_nic *efx = netdev_priv(net_dev);
+       efx->msg_enable = msg_enable;
+}
+
+/**
+ * ef4_fill_test - fill in an individual self-test entry
+ * @test_index:                Index of the test
+ * @strings:           Ethtool strings, or %NULL
+ * @data:              Ethtool test results, or %NULL
+ * @test:              Pointer to test result (used only if data != %NULL)
+ * @unit_format:       Unit name format (e.g. "chan\%d")
+ * @unit_id:           Unit id (e.g. 0 for "chan0")
+ * @test_format:       Test name format (e.g. "loopback.\%s.tx.sent")
+ * @test_id:           Test id (e.g. "PHYXS" for "loopback.PHYXS.tx_sent")
+ *
+ * Fill in an individual self-test entry.
+ */
+static void ef4_fill_test(unsigned int test_index, u8 *strings, u64 *data,
+                         int *test, const char *unit_format, int unit_id,
+                         const char *test_format, const char *test_id)
+{
+       char unit_str[ETH_GSTRING_LEN], test_str[ETH_GSTRING_LEN];
+
+       /* Fill data value, if applicable */
+       if (data)
+               data[test_index] = *test;
+
+       /* Fill string, if applicable */
+       if (strings) {
+               if (strchr(unit_format, '%'))
+                       snprintf(unit_str, sizeof(unit_str),
+                                unit_format, unit_id);
+               else
+                       strcpy(unit_str, unit_format);
+               snprintf(test_str, sizeof(test_str), test_format, test_id);
+               snprintf(strings + test_index * ETH_GSTRING_LEN,
+                        ETH_GSTRING_LEN,
+                        "%-6s %-24s", unit_str, test_str);
+       }
+}
+
+#define EF4_CHANNEL_NAME(_channel) "chan%d", _channel->channel
+#define EF4_TX_QUEUE_NAME(_tx_queue) "txq%d", _tx_queue->queue
+#define EF4_RX_QUEUE_NAME(_rx_queue) "rxq%d", _rx_queue->queue
+#define EF4_LOOPBACK_NAME(_mode, _counter)                     \
+       "loopback.%s." _counter, STRING_TABLE_LOOKUP(_mode, ef4_loopback_mode)
+
+/**
+ * ef4_fill_loopback_test - fill in a block of loopback self-test entries
+ * @efx:               Efx NIC
+ * @lb_tests:          Efx loopback self-test results structure
+ * @mode:              Loopback test mode
+ * @test_index:                Starting index of the test
+ * @strings:           Ethtool strings, or %NULL
+ * @data:              Ethtool test results, or %NULL
+ *
+ * Fill in a block of loopback self-test entries.  Return new test
+ * index.
+ */
+static int ef4_fill_loopback_test(struct ef4_nic *efx,
+                                 struct ef4_loopback_self_tests *lb_tests,
+                                 enum ef4_loopback_mode mode,
+                                 unsigned int test_index,
+                                 u8 *strings, u64 *data)
+{
+       struct ef4_channel *channel =
+               ef4_get_channel(efx, efx->tx_channel_offset);
+       struct ef4_tx_queue *tx_queue;
+
+       ef4_for_each_channel_tx_queue(tx_queue, channel) {
+               ef4_fill_test(test_index++, strings, data,
+                             &lb_tests->tx_sent[tx_queue->queue],
+                             EF4_TX_QUEUE_NAME(tx_queue),
+                             EF4_LOOPBACK_NAME(mode, "tx_sent"));
+               ef4_fill_test(test_index++, strings, data,
+                             &lb_tests->tx_done[tx_queue->queue],
+                             EF4_TX_QUEUE_NAME(tx_queue),
+                             EF4_LOOPBACK_NAME(mode, "tx_done"));
+       }
+       ef4_fill_test(test_index++, strings, data,
+                     &lb_tests->rx_good,
+                     "rx", 0,
+                     EF4_LOOPBACK_NAME(mode, "rx_good"));
+       ef4_fill_test(test_index++, strings, data,
+                     &lb_tests->rx_bad,
+                     "rx", 0,
+                     EF4_LOOPBACK_NAME(mode, "rx_bad"));
+
+       return test_index;
+}
+
+/**
+ * ef4_ethtool_fill_self_tests - get self-test details
+ * @efx:               Efx NIC
+ * @tests:             Efx self-test results structure, or %NULL
+ * @strings:           Ethtool strings, or %NULL
+ * @data:              Ethtool test results, or %NULL
+ *
+ * Get self-test number of strings, strings, and/or test results.
+ * Return number of strings (== number of test results).
+ *
+ * The reason for merging these three functions is to make sure that
+ * they can never be inconsistent.
+ */
+static int ef4_ethtool_fill_self_tests(struct ef4_nic *efx,
+                                      struct ef4_self_tests *tests,
+                                      u8 *strings, u64 *data)
+{
+       struct ef4_channel *channel;
+       unsigned int n = 0, i;
+       enum ef4_loopback_mode mode;
+
+       ef4_fill_test(n++, strings, data, &tests->phy_alive,
+                     "phy", 0, "alive", NULL);
+       ef4_fill_test(n++, strings, data, &tests->nvram,
+                     "core", 0, "nvram", NULL);
+       ef4_fill_test(n++, strings, data, &tests->interrupt,
+                     "core", 0, "interrupt", NULL);
+
+       /* Event queues */
+       ef4_for_each_channel(channel, efx) {
+               ef4_fill_test(n++, strings, data,
+                             &tests->eventq_dma[channel->channel],
+                             EF4_CHANNEL_NAME(channel),
+                             "eventq.dma", NULL);
+               ef4_fill_test(n++, strings, data,
+                             &tests->eventq_int[channel->channel],
+                             EF4_CHANNEL_NAME(channel),
+                             "eventq.int", NULL);
+       }
+
+       ef4_fill_test(n++, strings, data, &tests->memory,
+                     "core", 0, "memory", NULL);
+       ef4_fill_test(n++, strings, data, &tests->registers,
+                     "core", 0, "registers", NULL);
+
+       if (efx->phy_op->run_tests != NULL) {
+               EF4_BUG_ON_PARANOID(efx->phy_op->test_name == NULL);
+
+               for (i = 0; true; ++i) {
+                       const char *name;
+
+                       EF4_BUG_ON_PARANOID(i >= EF4_MAX_PHY_TESTS);
+                       name = efx->phy_op->test_name(efx, i);
+                       if (name == NULL)
+                               break;
+
+                       ef4_fill_test(n++, strings, data, &tests->phy_ext[i],
+                                     "phy", 0, name, NULL);
+               }
+       }
+
+       /* Loopback tests */
+       for (mode = LOOPBACK_NONE; mode <= LOOPBACK_TEST_MAX; mode++) {
+               if (!(efx->loopback_modes & (1 << mode)))
+                       continue;
+               n = ef4_fill_loopback_test(efx,
+                                          &tests->loopback[mode], mode, n,
+                                          strings, data);
+       }
+
+       return n;
+}
+
+static size_t ef4_describe_per_queue_stats(struct ef4_nic *efx, u8 *strings)
+{
+       size_t n_stats = 0;
+       struct ef4_channel *channel;
+
+       ef4_for_each_channel(channel, efx) {
+               if (ef4_channel_has_tx_queues(channel)) {
+                       n_stats++;
+                       if (strings != NULL) {
+                               snprintf(strings, ETH_GSTRING_LEN,
+                                        "tx-%u.tx_packets",
+                                        channel->tx_queue[0].queue /
+                                        EF4_TXQ_TYPES);
+
+                               strings += ETH_GSTRING_LEN;
+                       }
+               }
+       }
+       ef4_for_each_channel(channel, efx) {
+               if (ef4_channel_has_rx_queue(channel)) {
+                       n_stats++;
+                       if (strings != NULL) {
+                               snprintf(strings, ETH_GSTRING_LEN,
+                                        "rx-%d.rx_packets", channel->channel);
+                               strings += ETH_GSTRING_LEN;
+                       }
+               }
+       }
+       return n_stats;
+}
+
+static int ef4_ethtool_get_sset_count(struct net_device *net_dev,
+                                     int string_set)
+{
+       struct ef4_nic *efx = netdev_priv(net_dev);
+
+       switch (string_set) {
+       case ETH_SS_STATS:
+               return efx->type->describe_stats(efx, NULL) +
+                      EF4_ETHTOOL_SW_STAT_COUNT +
+                      ef4_describe_per_queue_stats(efx, NULL);
+       case ETH_SS_TEST:
+               return ef4_ethtool_fill_self_tests(efx, NULL, NULL, NULL);
+       default:
+               return -EINVAL;
+       }
+}
+
+static void ef4_ethtool_get_strings(struct net_device *net_dev,
+                                   u32 string_set, u8 *strings)
+{
+       struct ef4_nic *efx = netdev_priv(net_dev);
+       int i;
+
+       switch (string_set) {
+       case ETH_SS_STATS:
+               strings += (efx->type->describe_stats(efx, strings) *
+                           ETH_GSTRING_LEN);
+               for (i = 0; i < EF4_ETHTOOL_SW_STAT_COUNT; i++)
+                       strlcpy(strings + i * ETH_GSTRING_LEN,
+                               ef4_sw_stat_desc[i].name, ETH_GSTRING_LEN);
+               strings += EF4_ETHTOOL_SW_STAT_COUNT * ETH_GSTRING_LEN;
+               strings += (ef4_describe_per_queue_stats(efx, strings) *
+                           ETH_GSTRING_LEN);
+               break;
+       case ETH_SS_TEST:
+               ef4_ethtool_fill_self_tests(efx, NULL, strings, NULL);
+               break;
+       default:
+               /* No other string sets */
+               break;
+       }
+}
+
+static void ef4_ethtool_get_stats(struct net_device *net_dev,
+                                 struct ethtool_stats *stats,
+                                 u64 *data)
+{
+       struct ef4_nic *efx = netdev_priv(net_dev);
+       const struct ef4_sw_stat_desc *stat;
+       struct ef4_channel *channel;
+       struct ef4_tx_queue *tx_queue;
+       struct ef4_rx_queue *rx_queue;
+       int i;
+
+       spin_lock_bh(&efx->stats_lock);
+
+       /* Get NIC statistics */
+       data += efx->type->update_stats(efx, data, NULL);
+
+       /* Get software statistics */
+       for (i = 0; i < EF4_ETHTOOL_SW_STAT_COUNT; i++) {
+               stat = &ef4_sw_stat_desc[i];
+               switch (stat->source) {
+               case EF4_ETHTOOL_STAT_SOURCE_nic:
+                       data[i] = stat->get_stat((void *)efx + stat->offset);
+                       break;
+               case EF4_ETHTOOL_STAT_SOURCE_channel:
+                       data[i] = 0;
+                       ef4_for_each_channel(channel, efx)
+                               data[i] += stat->get_stat((void *)channel +
+                                                         stat->offset);
+                       break;
+               case EF4_ETHTOOL_STAT_SOURCE_tx_queue:
+                       data[i] = 0;
+                       ef4_for_each_channel(channel, efx) {
+                               ef4_for_each_channel_tx_queue(tx_queue, channel)
+                                       data[i] +=
+                                               stat->get_stat((void *)tx_queue
+                                                              + stat->offset);
+                       }
+                       break;
+               }
+       }
+       data += EF4_ETHTOOL_SW_STAT_COUNT;
+
+       spin_unlock_bh(&efx->stats_lock);
+
+       ef4_for_each_channel(channel, efx) {
+               if (ef4_channel_has_tx_queues(channel)) {
+                       *data = 0;
+                       ef4_for_each_channel_tx_queue(tx_queue, channel) {
+                               *data += tx_queue->tx_packets;
+                       }
+                       data++;
+               }
+       }
+       ef4_for_each_channel(channel, efx) {
+               if (ef4_channel_has_rx_queue(channel)) {
+                       *data = 0;
+                       ef4_for_each_channel_rx_queue(rx_queue, channel) {
+                               *data += rx_queue->rx_packets;
+                       }
+                       data++;
+               }
+       }
+}
+
+static void ef4_ethtool_self_test(struct net_device *net_dev,
+                                 struct ethtool_test *test, u64 *data)
+{
+       struct ef4_nic *efx = netdev_priv(net_dev);
+       struct ef4_self_tests *ef4_tests;
+       bool already_up;
+       int rc = -ENOMEM;
+
+       ef4_tests = kzalloc(sizeof(*ef4_tests), GFP_KERNEL);
+       if (!ef4_tests)
+               goto fail;
+
+       if (efx->state != STATE_READY) {
+               rc = -EBUSY;
+               goto out;
+       }
+
+       netif_info(efx, drv, efx->net_dev, "starting %sline testing\n",
+                  (test->flags & ETH_TEST_FL_OFFLINE) ? "off" : "on");
+
+       /* We need rx buffers and interrupts. */
+       already_up = (efx->net_dev->flags & IFF_UP);
+       if (!already_up) {
+               rc = dev_open(efx->net_dev);
+               if (rc) {
+                       netif_err(efx, drv, efx->net_dev,
+                                 "failed opening device.\n");
+                       goto out;
+               }
+       }
+
+       rc = ef4_selftest(efx, ef4_tests, test->flags);
+
+       if (!already_up)
+               dev_close(efx->net_dev);
+
+       netif_info(efx, drv, efx->net_dev, "%s %sline self-tests\n",
+                  rc == 0 ? "passed" : "failed",
+                  (test->flags & ETH_TEST_FL_OFFLINE) ? "off" : "on");
+
+out:
+       ef4_ethtool_fill_self_tests(efx, ef4_tests, NULL, data);
+       kfree(ef4_tests);
+fail:
+       if (rc)
+               test->flags |= ETH_TEST_FL_FAILED;
+}
+
+/* Restart autonegotiation */
+static int ef4_ethtool_nway_reset(struct net_device *net_dev)
+{
+       struct ef4_nic *efx = netdev_priv(net_dev);
+
+       return mdio45_nway_restart(&efx->mdio);
+}
+
+/*
+ * Each channel has a single IRQ and moderation timer, started by any
+ * completion (or other event).  Unless the module parameter
+ * separate_tx_channels is set, IRQs and moderation are therefore
+ * shared between RX and TX completions.  In this case, when RX IRQ
+ * moderation is explicitly changed then TX IRQ moderation is
+ * automatically changed too, but otherwise we fail if the two values
+ * are requested to be different.
+ *
+ * The hardware does not support a limit on the number of completions
+ * before an IRQ, so we do not use the max_frames fields.  We should
+ * report and require that max_frames == (usecs != 0), but this would
+ * invalidate existing user documentation.
+ *
+ * The hardware does not have distinct settings for interrupt
+ * moderation while the previous IRQ is being handled, so we should
+ * not use the 'irq' fields.  However, an earlier developer
+ * misunderstood the meaning of the 'irq' fields and the driver did
+ * not support the standard fields.  To avoid invalidating existing
+ * user documentation, we report and accept changes through either the
+ * standard or 'irq' fields.  If both are changed at the same time, we
+ * prefer the standard field.
+ *
+ * We implement adaptive IRQ moderation, but use a different algorithm
+ * from that assumed in the definition of struct ethtool_coalesce.
+ * Therefore we do not use any of the adaptive moderation parameters
+ * in it.
+ */
+
+static int ef4_ethtool_get_coalesce(struct net_device *net_dev,
+                                   struct ethtool_coalesce *coalesce)
+{
+       struct ef4_nic *efx = netdev_priv(net_dev);
+       unsigned int tx_usecs, rx_usecs;
+       bool rx_adaptive;
+
+       ef4_get_irq_moderation(efx, &tx_usecs, &rx_usecs, &rx_adaptive);
+
+       coalesce->tx_coalesce_usecs = tx_usecs;
+       coalesce->tx_coalesce_usecs_irq = tx_usecs;
+       coalesce->rx_coalesce_usecs = rx_usecs;
+       coalesce->rx_coalesce_usecs_irq = rx_usecs;
+       coalesce->use_adaptive_rx_coalesce = rx_adaptive;
+
+       return 0;
+}
+
+static int ef4_ethtool_set_coalesce(struct net_device *net_dev,
+                                   struct ethtool_coalesce *coalesce)
+{
+       struct ef4_nic *efx = netdev_priv(net_dev);
+       struct ef4_channel *channel;
+       unsigned int tx_usecs, rx_usecs;
+       bool adaptive, rx_may_override_tx;
+       int rc;
+
+       if (coalesce->use_adaptive_tx_coalesce)
+               return -EINVAL;
+
+       ef4_get_irq_moderation(efx, &tx_usecs, &rx_usecs, &adaptive);
+
+       if (coalesce->rx_coalesce_usecs != rx_usecs)
+               rx_usecs = coalesce->rx_coalesce_usecs;
+       else
+               rx_usecs = coalesce->rx_coalesce_usecs_irq;
+
+       adaptive = coalesce->use_adaptive_rx_coalesce;
+
+       /* If channels are shared, TX IRQ moderation can be quietly
+        * overridden unless it is changed from its old value.
+        */
+       rx_may_override_tx = (coalesce->tx_coalesce_usecs == tx_usecs &&
+                             coalesce->tx_coalesce_usecs_irq == tx_usecs);
+       if (coalesce->tx_coalesce_usecs != tx_usecs)
+               tx_usecs = coalesce->tx_coalesce_usecs;
+       else
+               tx_usecs = coalesce->tx_coalesce_usecs_irq;
+
+       rc = ef4_init_irq_moderation(efx, tx_usecs, rx_usecs, adaptive,
+                                    rx_may_override_tx);
+       if (rc != 0)
+               return rc;
+
+       ef4_for_each_channel(channel, efx)
+               efx->type->push_irq_moderation(channel);
+
+       return 0;
+}
+
+static void ef4_ethtool_get_ringparam(struct net_device *net_dev,
+                                     struct ethtool_ringparam *ring)
+{
+       struct ef4_nic *efx = netdev_priv(net_dev);
+
+       ring->rx_max_pending = EF4_MAX_DMAQ_SIZE;
+       ring->tx_max_pending = EF4_MAX_DMAQ_SIZE;
+       ring->rx_pending = efx->rxq_entries;
+       ring->tx_pending = efx->txq_entries;
+}
+
+static int ef4_ethtool_set_ringparam(struct net_device *net_dev,
+                                    struct ethtool_ringparam *ring)
+{
+       struct ef4_nic *efx = netdev_priv(net_dev);
+       u32 txq_entries;
+
+       if (ring->rx_mini_pending || ring->rx_jumbo_pending ||
+           ring->rx_pending > EF4_MAX_DMAQ_SIZE ||
+           ring->tx_pending > EF4_MAX_DMAQ_SIZE)
+               return -EINVAL;
+
+       if (ring->rx_pending < EF4_RXQ_MIN_ENT) {
+               netif_err(efx, drv, efx->net_dev,
+                         "RX queues cannot be smaller than %u\n",
+                         EF4_RXQ_MIN_ENT);
+               return -EINVAL;
+       }
+
+       txq_entries = max(ring->tx_pending, EF4_TXQ_MIN_ENT(efx));
+       if (txq_entries != ring->tx_pending)
+               netif_warn(efx, drv, efx->net_dev,
+                          "increasing TX queue size to minimum of %u\n",
+                          txq_entries);
+
+       return ef4_realloc_channels(efx, ring->rx_pending, txq_entries);
+}
+
+static int ef4_ethtool_set_pauseparam(struct net_device *net_dev,
+                                     struct ethtool_pauseparam *pause)
+{
+       struct ef4_nic *efx = netdev_priv(net_dev);
+       u8 wanted_fc, old_fc;
+       u32 old_adv;
+       int rc = 0;
+
+       mutex_lock(&efx->mac_lock);
+
+       wanted_fc = ((pause->rx_pause ? EF4_FC_RX : 0) |
+                    (pause->tx_pause ? EF4_FC_TX : 0) |
+                    (pause->autoneg ? EF4_FC_AUTO : 0));
+
+       if ((wanted_fc & EF4_FC_TX) && !(wanted_fc & EF4_FC_RX)) {
+               netif_dbg(efx, drv, efx->net_dev,
+                         "Flow control unsupported: tx ON rx OFF\n");
+               rc = -EINVAL;
+               goto out;
+       }
+
+       if ((wanted_fc & EF4_FC_AUTO) && !efx->link_advertising) {
+               netif_dbg(efx, drv, efx->net_dev,
+                         "Autonegotiation is disabled\n");
+               rc = -EINVAL;
+               goto out;
+       }
+
+       /* Hook for Falcon bug 11482 workaround */
+       if (efx->type->prepare_enable_fc_tx &&
+           (wanted_fc & EF4_FC_TX) && !(efx->wanted_fc & EF4_FC_TX))
+               efx->type->prepare_enable_fc_tx(efx);
+
+       old_adv = efx->link_advertising;
+       old_fc = efx->wanted_fc;
+       ef4_link_set_wanted_fc(efx, wanted_fc);
+       if (efx->link_advertising != old_adv ||
+           (efx->wanted_fc ^ old_fc) & EF4_FC_AUTO) {
+               rc = efx->phy_op->reconfigure(efx);
+               if (rc) {
+                       netif_err(efx, drv, efx->net_dev,
+                                 "Unable to advertise requested flow "
+                                 "control setting\n");
+                       goto out;
+               }
+       }
+
+       /* Reconfigure the MAC. The PHY *may* generate a link state change event
+        * if the user just changed the advertised capabilities, but there's no
+        * harm doing this twice */
+       ef4_mac_reconfigure(efx);
+
+out:
+       mutex_unlock(&efx->mac_lock);
+
+       return rc;
+}
+
+static void ef4_ethtool_get_pauseparam(struct net_device *net_dev,
+                                      struct ethtool_pauseparam *pause)
+{
+       struct ef4_nic *efx = netdev_priv(net_dev);
+
+       pause->rx_pause = !!(efx->wanted_fc & EF4_FC_RX);
+       pause->tx_pause = !!(efx->wanted_fc & EF4_FC_TX);
+       pause->autoneg = !!(efx->wanted_fc & EF4_FC_AUTO);
+}
+
+static void ef4_ethtool_get_wol(struct net_device *net_dev,
+                               struct ethtool_wolinfo *wol)
+{
+       struct ef4_nic *efx = netdev_priv(net_dev);
+       return efx->type->get_wol(efx, wol);
+}
+
+
+static int ef4_ethtool_set_wol(struct net_device *net_dev,
+                              struct ethtool_wolinfo *wol)
+{
+       struct ef4_nic *efx = netdev_priv(net_dev);
+       return efx->type->set_wol(efx, wol->wolopts);
+}
+
+static int ef4_ethtool_reset(struct net_device *net_dev, u32 *flags)
+{
+       struct ef4_nic *efx = netdev_priv(net_dev);
+       int rc;
+
+       rc = efx->type->map_reset_flags(flags);
+       if (rc < 0)
+               return rc;
+
+       return ef4_reset(efx, rc);
+}
+
+/* MAC address mask including only I/G bit */
+static const u8 mac_addr_ig_mask[ETH_ALEN] __aligned(2) = {0x01, 0, 0, 0, 0, 0};
+
+#define IP4_ADDR_FULL_MASK     ((__force __be32)~0)
+#define IP_PROTO_FULL_MASK     0xFF
+#define PORT_FULL_MASK         ((__force __be16)~0)
+#define ETHER_TYPE_FULL_MASK   ((__force __be16)~0)
+
+static inline void ip6_fill_mask(__be32 *mask)
+{
+       mask[0] = mask[1] = mask[2] = mask[3] = ~(__be32)0;
+}
+
+static int ef4_ethtool_get_class_rule(struct ef4_nic *efx,
+                                     struct ethtool_rx_flow_spec *rule)
+{
+       struct ethtool_tcpip4_spec *ip_entry = &rule->h_u.tcp_ip4_spec;
+       struct ethtool_tcpip4_spec *ip_mask = &rule->m_u.tcp_ip4_spec;
+       struct ethtool_usrip4_spec *uip_entry = &rule->h_u.usr_ip4_spec;
+       struct ethtool_usrip4_spec *uip_mask = &rule->m_u.usr_ip4_spec;
+       struct ethtool_tcpip6_spec *ip6_entry = &rule->h_u.tcp_ip6_spec;
+       struct ethtool_tcpip6_spec *ip6_mask = &rule->m_u.tcp_ip6_spec;
+       struct ethtool_usrip6_spec *uip6_entry = &rule->h_u.usr_ip6_spec;
+       struct ethtool_usrip6_spec *uip6_mask = &rule->m_u.usr_ip6_spec;
+       struct ethhdr *mac_entry = &rule->h_u.ether_spec;
+       struct ethhdr *mac_mask = &rule->m_u.ether_spec;
+       struct ef4_filter_spec spec;
+       int rc;
+
+       rc = ef4_filter_get_filter_safe(efx, EF4_FILTER_PRI_MANUAL,
+                                       rule->location, &spec);
+       if (rc)
+               return rc;
+
+       if (spec.dmaq_id == EF4_FILTER_RX_DMAQ_ID_DROP)
+               rule->ring_cookie = RX_CLS_FLOW_DISC;
+       else
+               rule->ring_cookie = spec.dmaq_id;
+
+       if ((spec.match_flags & EF4_FILTER_MATCH_ETHER_TYPE) &&
+           spec.ether_type == htons(ETH_P_IP) &&
+           (spec.match_flags & EF4_FILTER_MATCH_IP_PROTO) &&
+           (spec.ip_proto == IPPROTO_TCP || spec.ip_proto == IPPROTO_UDP) &&
+           !(spec.match_flags &
+             ~(EF4_FILTER_MATCH_ETHER_TYPE | EF4_FILTER_MATCH_OUTER_VID |
+               EF4_FILTER_MATCH_LOC_HOST | EF4_FILTER_MATCH_REM_HOST |
+               EF4_FILTER_MATCH_IP_PROTO |
+               EF4_FILTER_MATCH_LOC_PORT | EF4_FILTER_MATCH_REM_PORT))) {
+               rule->flow_type = ((spec.ip_proto == IPPROTO_TCP) ?
+                                  TCP_V4_FLOW : UDP_V4_FLOW);
+               if (spec.match_flags & EF4_FILTER_MATCH_LOC_HOST) {
+                       ip_entry->ip4dst = spec.loc_host[0];
+                       ip_mask->ip4dst = IP4_ADDR_FULL_MASK;
+               }
+               if (spec.match_flags & EF4_FILTER_MATCH_REM_HOST) {
+                       ip_entry->ip4src = spec.rem_host[0];
+                       ip_mask->ip4src = IP4_ADDR_FULL_MASK;
+               }
+               if (spec.match_flags & EF4_FILTER_MATCH_LOC_PORT) {
+                       ip_entry->pdst = spec.loc_port;
+                       ip_mask->pdst = PORT_FULL_MASK;
+               }
+               if (spec.match_flags & EF4_FILTER_MATCH_REM_PORT) {
+                       ip_entry->psrc = spec.rem_port;
+                       ip_mask->psrc = PORT_FULL_MASK;
+               }
+       } else if ((spec.match_flags & EF4_FILTER_MATCH_ETHER_TYPE) &&
+           spec.ether_type == htons(ETH_P_IPV6) &&
+           (spec.match_flags & EF4_FILTER_MATCH_IP_PROTO) &&
+           (spec.ip_proto == IPPROTO_TCP || spec.ip_proto == IPPROTO_UDP) &&
+           !(spec.match_flags &
+             ~(EF4_FILTER_MATCH_ETHER_TYPE | EF4_FILTER_MATCH_OUTER_VID |
+               EF4_FILTER_MATCH_LOC_HOST | EF4_FILTER_MATCH_REM_HOST |
+               EF4_FILTER_MATCH_IP_PROTO |
+               EF4_FILTER_MATCH_LOC_PORT | EF4_FILTER_MATCH_REM_PORT))) {
+               rule->flow_type = ((spec.ip_proto == IPPROTO_TCP) ?
+                                  TCP_V6_FLOW : UDP_V6_FLOW);
+               if (spec.match_flags & EF4_FILTER_MATCH_LOC_HOST) {
+                       memcpy(ip6_entry->ip6dst, spec.loc_host,
+                              sizeof(ip6_entry->ip6dst));
+                       ip6_fill_mask(ip6_mask->ip6dst);
+               }
+               if (spec.match_flags & EF4_FILTER_MATCH_REM_HOST) {
+                       memcpy(ip6_entry->ip6src, spec.rem_host,
+                              sizeof(ip6_entry->ip6src));
+                       ip6_fill_mask(ip6_mask->ip6src);
+               }
+               if (spec.match_flags & EF4_FILTER_MATCH_LOC_PORT) {
+                       ip6_entry->pdst = spec.loc_port;
+                       ip6_mask->pdst = PORT_FULL_MASK;
+               }
+               if (spec.match_flags & EF4_FILTER_MATCH_REM_PORT) {
+                       ip6_entry->psrc = spec.rem_port;
+                       ip6_mask->psrc = PORT_FULL_MASK;
+               }
+       } else if (!(spec.match_flags &
+                    ~(EF4_FILTER_MATCH_LOC_MAC | EF4_FILTER_MATCH_LOC_MAC_IG |
+                      EF4_FILTER_MATCH_REM_MAC | EF4_FILTER_MATCH_ETHER_TYPE |
+                      EF4_FILTER_MATCH_OUTER_VID))) {
+               rule->flow_type = ETHER_FLOW;
+               if (spec.match_flags &
+                   (EF4_FILTER_MATCH_LOC_MAC | EF4_FILTER_MATCH_LOC_MAC_IG)) {
+                       ether_addr_copy(mac_entry->h_dest, spec.loc_mac);
+                       if (spec.match_flags & EF4_FILTER_MATCH_LOC_MAC)
+                               eth_broadcast_addr(mac_mask->h_dest);
+                       else
+                               ether_addr_copy(mac_mask->h_dest,
+                                               mac_addr_ig_mask);
+               }
+               if (spec.match_flags & EF4_FILTER_MATCH_REM_MAC) {
+                       ether_addr_copy(mac_entry->h_source, spec.rem_mac);
+                       eth_broadcast_addr(mac_mask->h_source);
+               }
+               if (spec.match_flags & EF4_FILTER_MATCH_ETHER_TYPE) {
+                       mac_entry->h_proto = spec.ether_type;
+                       mac_mask->h_proto = ETHER_TYPE_FULL_MASK;
+               }
+       } else if (spec.match_flags & EF4_FILTER_MATCH_ETHER_TYPE &&
+                  spec.ether_type == htons(ETH_P_IP) &&
+                  !(spec.match_flags &
+                    ~(EF4_FILTER_MATCH_ETHER_TYPE | EF4_FILTER_MATCH_OUTER_VID |
+                      EF4_FILTER_MATCH_LOC_HOST | EF4_FILTER_MATCH_REM_HOST |
+                      EF4_FILTER_MATCH_IP_PROTO))) {
+               rule->flow_type = IPV4_USER_FLOW;
+               uip_entry->ip_ver = ETH_RX_NFC_IP4;
+               if (spec.match_flags & EF4_FILTER_MATCH_IP_PROTO) {
+                       uip_mask->proto = IP_PROTO_FULL_MASK;
+                       uip_entry->proto = spec.ip_proto;
+               }
+               if (spec.match_flags & EF4_FILTER_MATCH_LOC_HOST) {
+                       uip_entry->ip4dst = spec.loc_host[0];
+                       uip_mask->ip4dst = IP4_ADDR_FULL_MASK;
+               }
+               if (spec.match_flags & EF4_FILTER_MATCH_REM_HOST) {
+                       uip_entry->ip4src = spec.rem_host[0];
+                       uip_mask->ip4src = IP4_ADDR_FULL_MASK;
+               }
+       } else if (spec.match_flags & EF4_FILTER_MATCH_ETHER_TYPE &&
+                  spec.ether_type == htons(ETH_P_IPV6) &&
+                  !(spec.match_flags &
+                    ~(EF4_FILTER_MATCH_ETHER_TYPE | EF4_FILTER_MATCH_OUTER_VID |
+                      EF4_FILTER_MATCH_LOC_HOST | EF4_FILTER_MATCH_REM_HOST |
+                      EF4_FILTER_MATCH_IP_PROTO))) {
+               rule->flow_type = IPV6_USER_FLOW;
+               if (spec.match_flags & EF4_FILTER_MATCH_IP_PROTO) {
+                       uip6_mask->l4_proto = IP_PROTO_FULL_MASK;
+                       uip6_entry->l4_proto = spec.ip_proto;
+               }
+               if (spec.match_flags & EF4_FILTER_MATCH_LOC_HOST) {
+                       memcpy(uip6_entry->ip6dst, spec.loc_host,
+                              sizeof(uip6_entry->ip6dst));
+                       ip6_fill_mask(uip6_mask->ip6dst);
+               }
+               if (spec.match_flags & EF4_FILTER_MATCH_REM_HOST) {
+                       memcpy(uip6_entry->ip6src, spec.rem_host,
+                              sizeof(uip6_entry->ip6src));
+                       ip6_fill_mask(uip6_mask->ip6src);
+               }
+       } else {
+               /* The above should handle all filters that we insert */
+               WARN_ON(1);
+               return -EINVAL;
+       }
+
+       if (spec.match_flags & EF4_FILTER_MATCH_OUTER_VID) {
+               rule->flow_type |= FLOW_EXT;
+               rule->h_ext.vlan_tci = spec.outer_vid;
+               rule->m_ext.vlan_tci = htons(0xfff);
+       }
+
+       return rc;
+}
+
+static int
+ef4_ethtool_get_rxnfc(struct net_device *net_dev,
+                     struct ethtool_rxnfc *info, u32 *rule_locs)
+{
+       struct ef4_nic *efx = netdev_priv(net_dev);
+
+       switch (info->cmd) {
+       case ETHTOOL_GRXRINGS:
+               info->data = efx->n_rx_channels;
+               return 0;
+
+       case ETHTOOL_GRXFH: {
+               unsigned min_revision = 0;
+
+               info->data = 0;
+               switch (info->flow_type) {
+               case TCP_V4_FLOW:
+                       info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+               case UDP_V4_FLOW:
+               case SCTP_V4_FLOW:
+               case AH_ESP_V4_FLOW:
+               case IPV4_FLOW:
+                       info->data |= RXH_IP_SRC | RXH_IP_DST;
+                       min_revision = EF4_REV_FALCON_B0;
+                       break;
+               default:
+                       break;
+               }
+               if (ef4_nic_rev(efx) < min_revision)
+                       info->data = 0;
+               return 0;
+       }
+
+       case ETHTOOL_GRXCLSRLCNT:
+               info->data = ef4_filter_get_rx_id_limit(efx);
+               if (info->data == 0)
+                       return -EOPNOTSUPP;
+               info->data |= RX_CLS_LOC_SPECIAL;
+               info->rule_cnt =
+                       ef4_filter_count_rx_used(efx, EF4_FILTER_PRI_MANUAL);
+               return 0;
+
+       case ETHTOOL_GRXCLSRULE:
+               if (ef4_filter_get_rx_id_limit(efx) == 0)
+                       return -EOPNOTSUPP;
+               return ef4_ethtool_get_class_rule(efx, &info->fs);
+
+       case ETHTOOL_GRXCLSRLALL: {
+               s32 rc;
+               info->data = ef4_filter_get_rx_id_limit(efx);
+               if (info->data == 0)
+                       return -EOPNOTSUPP;
+               rc = ef4_filter_get_rx_ids(efx, EF4_FILTER_PRI_MANUAL,
+                                          rule_locs, info->rule_cnt);
+               if (rc < 0)
+                       return rc;
+               info->rule_cnt = rc;
+               return 0;
+       }
+
+       default:
+               return -EOPNOTSUPP;
+       }
+}
+
+static inline bool ip6_mask_is_full(__be32 mask[4])
+{
+       return !~(mask[0] & mask[1] & mask[2] & mask[3]);
+}
+
+static inline bool ip6_mask_is_empty(__be32 mask[4])
+{
+       return !(mask[0] | mask[1] | mask[2] | mask[3]);
+}
+
+static int ef4_ethtool_set_class_rule(struct ef4_nic *efx,
+                                     struct ethtool_rx_flow_spec *rule)
+{
+       struct ethtool_tcpip4_spec *ip_entry = &rule->h_u.tcp_ip4_spec;
+       struct ethtool_tcpip4_spec *ip_mask = &rule->m_u.tcp_ip4_spec;
+       struct ethtool_usrip4_spec *uip_entry = &rule->h_u.usr_ip4_spec;
+       struct ethtool_usrip4_spec *uip_mask = &rule->m_u.usr_ip4_spec;
+       struct ethtool_tcpip6_spec *ip6_entry = &rule->h_u.tcp_ip6_spec;
+       struct ethtool_tcpip6_spec *ip6_mask = &rule->m_u.tcp_ip6_spec;
+       struct ethtool_usrip6_spec *uip6_entry = &rule->h_u.usr_ip6_spec;
+       struct ethtool_usrip6_spec *uip6_mask = &rule->m_u.usr_ip6_spec;
+       struct ethhdr *mac_entry = &rule->h_u.ether_spec;
+       struct ethhdr *mac_mask = &rule->m_u.ether_spec;
+       struct ef4_filter_spec spec;
+       int rc;
+
+       /* Check that user wants us to choose the location */
+       if (rule->location != RX_CLS_LOC_ANY)
+               return -EINVAL;
+
+       /* Range-check ring_cookie */
+       if (rule->ring_cookie >= efx->n_rx_channels &&
+           rule->ring_cookie != RX_CLS_FLOW_DISC)
+               return -EINVAL;
+
+       /* Check for unsupported extensions */
+       if ((rule->flow_type & FLOW_EXT) &&
+           (rule->m_ext.vlan_etype || rule->m_ext.data[0] ||
+            rule->m_ext.data[1]))
+               return -EINVAL;
+
+       ef4_filter_init_rx(&spec, EF4_FILTER_PRI_MANUAL,
+                          efx->rx_scatter ? EF4_FILTER_FLAG_RX_SCATTER : 0,
+                          (rule->ring_cookie == RX_CLS_FLOW_DISC) ?
+                          EF4_FILTER_RX_DMAQ_ID_DROP : rule->ring_cookie);
+
+       switch (rule->flow_type & ~FLOW_EXT) {
+       case TCP_V4_FLOW:
+       case UDP_V4_FLOW:
+               spec.match_flags = (EF4_FILTER_MATCH_ETHER_TYPE |
+                                   EF4_FILTER_MATCH_IP_PROTO);
+               spec.ether_type = htons(ETH_P_IP);
+               spec.ip_proto = ((rule->flow_type & ~FLOW_EXT) == TCP_V4_FLOW ?
+                                IPPROTO_TCP : IPPROTO_UDP);
+               if (ip_mask->ip4dst) {
+                       if (ip_mask->ip4dst != IP4_ADDR_FULL_MASK)
+                               return -EINVAL;
+                       spec.match_flags |= EF4_FILTER_MATCH_LOC_HOST;
+                       spec.loc_host[0] = ip_entry->ip4dst;
+               }
+               if (ip_mask->ip4src) {
+                       if (ip_mask->ip4src != IP4_ADDR_FULL_MASK)
+                               return -EINVAL;
+                       spec.match_flags |= EF4_FILTER_MATCH_REM_HOST;
+                       spec.rem_host[0] = ip_entry->ip4src;
+               }
+               if (ip_mask->pdst) {
+                       if (ip_mask->pdst != PORT_FULL_MASK)
+                               return -EINVAL;
+                       spec.match_flags |= EF4_FILTER_MATCH_LOC_PORT;
+                       spec.loc_port = ip_entry->pdst;
+               }
+               if (ip_mask->psrc) {
+                       if (ip_mask->psrc != PORT_FULL_MASK)
+                               return -EINVAL;
+                       spec.match_flags |= EF4_FILTER_MATCH_REM_PORT;
+                       spec.rem_port = ip_entry->psrc;
+               }
+               if (ip_mask->tos)
+                       return -EINVAL;
+               break;
+
+       case TCP_V6_FLOW:
+       case UDP_V6_FLOW:
+               spec.match_flags = (EF4_FILTER_MATCH_ETHER_TYPE |
+                                   EF4_FILTER_MATCH_IP_PROTO);
+               spec.ether_type = htons(ETH_P_IPV6);
+               spec.ip_proto = ((rule->flow_type & ~FLOW_EXT) == TCP_V6_FLOW ?
+                                IPPROTO_TCP : IPPROTO_UDP);
+               if (!ip6_mask_is_empty(ip6_mask->ip6dst)) {
+                       if (!ip6_mask_is_full(ip6_mask->ip6dst))
+                               return -EINVAL;
+                       spec.match_flags |= EF4_FILTER_MATCH_LOC_HOST;
+                       memcpy(spec.loc_host, ip6_entry->ip6dst, sizeof(spec.loc_host));
+               }
+               if (!ip6_mask_is_empty(ip6_mask->ip6src)) {
+                       if (!ip6_mask_is_full(ip6_mask->ip6src))
+                               return -EINVAL;
+                       spec.match_flags |= EF4_FILTER_MATCH_REM_HOST;
+                       memcpy(spec.rem_host, ip6_entry->ip6src, sizeof(spec.rem_host));
+               }
+               if (ip6_mask->pdst) {
+                       if (ip6_mask->pdst != PORT_FULL_MASK)
+                               return -EINVAL;
+                       spec.match_flags |= EF4_FILTER_MATCH_LOC_PORT;
+                       spec.loc_port = ip6_entry->pdst;
+               }
+               if (ip6_mask->psrc) {
+                       if (ip6_mask->psrc != PORT_FULL_MASK)
+                               return -EINVAL;
+                       spec.match_flags |= EF4_FILTER_MATCH_REM_PORT;
+                       spec.rem_port = ip6_entry->psrc;
+               }
+               if (ip6_mask->tclass)
+                       return -EINVAL;
+               break;
+
+       case IPV4_USER_FLOW:
+               if (uip_mask->l4_4_bytes || uip_mask->tos || uip_mask->ip_ver ||
+                   uip_entry->ip_ver != ETH_RX_NFC_IP4)
+                       return -EINVAL;
+               spec.match_flags = EF4_FILTER_MATCH_ETHER_TYPE;
+               spec.ether_type = htons(ETH_P_IP);
+               if (uip_mask->ip4dst) {
+                       if (uip_mask->ip4dst != IP4_ADDR_FULL_MASK)
+                               return -EINVAL;
+                       spec.match_flags |= EF4_FILTER_MATCH_LOC_HOST;
+                       spec.loc_host[0] = uip_entry->ip4dst;
+               }
+               if (uip_mask->ip4src) {
+                       if (uip_mask->ip4src != IP4_ADDR_FULL_MASK)
+                               return -EINVAL;
+                       spec.match_flags |= EF4_FILTER_MATCH_REM_HOST;
+                       spec.rem_host[0] = uip_entry->ip4src;
+               }
+               if (uip_mask->proto) {
+                       if (uip_mask->proto != IP_PROTO_FULL_MASK)
+                               return -EINVAL;
+                       spec.match_flags |= EF4_FILTER_MATCH_IP_PROTO;
+                       spec.ip_proto = uip_entry->proto;
+               }
+               break;
+
+       case IPV6_USER_FLOW:
+               if (uip6_mask->l4_4_bytes || uip6_mask->tclass)
+                       return -EINVAL;
+               spec.match_flags = EF4_FILTER_MATCH_ETHER_TYPE;
+               spec.ether_type = htons(ETH_P_IPV6);
+               if (!ip6_mask_is_empty(uip6_mask->ip6dst)) {
+                       if (!ip6_mask_is_full(uip6_mask->ip6dst))
+                               return -EINVAL;
+                       spec.match_flags |= EF4_FILTER_MATCH_LOC_HOST;
+                       memcpy(spec.loc_host, uip6_entry->ip6dst, sizeof(spec.loc_host));
+               }
+               if (!ip6_mask_is_empty(uip6_mask->ip6src)) {
+                       if (!ip6_mask_is_full(uip6_mask->ip6src))
+                               return -EINVAL;
+                       spec.match_flags |= EF4_FILTER_MATCH_REM_HOST;
+                       memcpy(spec.rem_host, uip6_entry->ip6src, sizeof(spec.rem_host));
+               }
+               if (uip6_mask->l4_proto) {
+                       if (uip6_mask->l4_proto != IP_PROTO_FULL_MASK)
+                               return -EINVAL;
+                       spec.match_flags |= EF4_FILTER_MATCH_IP_PROTO;
+                       spec.ip_proto = uip6_entry->l4_proto;
+               }
+               break;
+
+       case ETHER_FLOW:
+               if (!is_zero_ether_addr(mac_mask->h_dest)) {
+                       if (ether_addr_equal(mac_mask->h_dest,
+                                            mac_addr_ig_mask))
+                               spec.match_flags |= EF4_FILTER_MATCH_LOC_MAC_IG;
+                       else if (is_broadcast_ether_addr(mac_mask->h_dest))
+                               spec.match_flags |= EF4_FILTER_MATCH_LOC_MAC;
+                       else
+                               return -EINVAL;
+                       ether_addr_copy(spec.loc_mac, mac_entry->h_dest);
+               }
+               if (!is_zero_ether_addr(mac_mask->h_source)) {
+                       if (!is_broadcast_ether_addr(mac_mask->h_source))
+                               return -EINVAL;
+                       spec.match_flags |= EF4_FILTER_MATCH_REM_MAC;
+                       ether_addr_copy(spec.rem_mac, mac_entry->h_source);
+               }
+               if (mac_mask->h_proto) {
+                       if (mac_mask->h_proto != ETHER_TYPE_FULL_MASK)
+                               return -EINVAL;
+                       spec.match_flags |= EF4_FILTER_MATCH_ETHER_TYPE;
+                       spec.ether_type = mac_entry->h_proto;
+               }
+               break;
+
+       default:
+               return -EINVAL;
+       }
+
+       if ((rule->flow_type & FLOW_EXT) && rule->m_ext.vlan_tci) {
+               if (rule->m_ext.vlan_tci != htons(0xfff))
+                       return -EINVAL;
+               spec.match_flags |= EF4_FILTER_MATCH_OUTER_VID;
+               spec.outer_vid = rule->h_ext.vlan_tci;
+       }
+
+       rc = ef4_filter_insert_filter(efx, &spec, true);
+       if (rc < 0)
+               return rc;
+
+       rule->location = rc;
+       return 0;
+}
+
+static int ef4_ethtool_set_rxnfc(struct net_device *net_dev,
+                                struct ethtool_rxnfc *info)
+{
+       struct ef4_nic *efx = netdev_priv(net_dev);
+
+       if (ef4_filter_get_rx_id_limit(efx) == 0)
+               return -EOPNOTSUPP;
+
+       switch (info->cmd) {
+       case ETHTOOL_SRXCLSRLINS:
+               return ef4_ethtool_set_class_rule(efx, &info->fs);
+
+       case ETHTOOL_SRXCLSRLDEL:
+               return ef4_filter_remove_id_safe(efx, EF4_FILTER_PRI_MANUAL,
+                                                info->fs.location);
+
+       default:
+               return -EOPNOTSUPP;
+       }
+}
+
+static u32 ef4_ethtool_get_rxfh_indir_size(struct net_device *net_dev)
+{
+       struct ef4_nic *efx = netdev_priv(net_dev);
+
+       return ((ef4_nic_rev(efx) < EF4_REV_FALCON_B0 ||
+                efx->n_rx_channels == 1) ?
+               0 : ARRAY_SIZE(efx->rx_indir_table));
+}
+
+static int ef4_ethtool_get_rxfh(struct net_device *net_dev, u32 *indir, u8 *key,
+                               u8 *hfunc)
+{
+       struct ef4_nic *efx = netdev_priv(net_dev);
+
+       if (hfunc)
+               *hfunc = ETH_RSS_HASH_TOP;
+       if (indir)
+               memcpy(indir, efx->rx_indir_table, sizeof(efx->rx_indir_table));
+       return 0;
+}
+
+static int ef4_ethtool_set_rxfh(struct net_device *net_dev, const u32 *indir,
+                               const u8 *key, const u8 hfunc)
+{
+       struct ef4_nic *efx = netdev_priv(net_dev);
+
+       /* We do not allow change in unsupported parameters */
+       if (key ||
+           (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP))
+               return -EOPNOTSUPP;
+       if (!indir)
+               return 0;
+
+       return efx->type->rx_push_rss_config(efx, true, indir);
+}
+
+static int ef4_ethtool_get_module_eeprom(struct net_device *net_dev,
+                                        struct ethtool_eeprom *ee,
+                                        u8 *data)
+{
+       struct ef4_nic *efx = netdev_priv(net_dev);
+       int ret;
+
+       if (!efx->phy_op || !efx->phy_op->get_module_eeprom)
+               return -EOPNOTSUPP;
+
+       mutex_lock(&efx->mac_lock);
+       ret = efx->phy_op->get_module_eeprom(efx, ee, data);
+       mutex_unlock(&efx->mac_lock);
+
+       return ret;
+}
+
+static int ef4_ethtool_get_module_info(struct net_device *net_dev,
+                                      struct ethtool_modinfo *modinfo)
+{
+       struct ef4_nic *efx = netdev_priv(net_dev);
+       int ret;
+
+       if (!efx->phy_op || !efx->phy_op->get_module_info)
+               return -EOPNOTSUPP;
+
+       mutex_lock(&efx->mac_lock);
+       ret = efx->phy_op->get_module_info(efx, modinfo);
+       mutex_unlock(&efx->mac_lock);
+
+       return ret;
+}
+
+const struct ethtool_ops ef4_ethtool_ops = {
+       .get_settings           = ef4_ethtool_get_settings,
+       .set_settings           = ef4_ethtool_set_settings,
+       .get_drvinfo            = ef4_ethtool_get_drvinfo,
+       .get_regs_len           = ef4_ethtool_get_regs_len,
+       .get_regs               = ef4_ethtool_get_regs,
+       .get_msglevel           = ef4_ethtool_get_msglevel,
+       .set_msglevel           = ef4_ethtool_set_msglevel,
+       .nway_reset             = ef4_ethtool_nway_reset,
+       .get_link               = ethtool_op_get_link,
+       .get_coalesce           = ef4_ethtool_get_coalesce,
+       .set_coalesce           = ef4_ethtool_set_coalesce,
+       .get_ringparam          = ef4_ethtool_get_ringparam,
+       .set_ringparam          = ef4_ethtool_set_ringparam,
+       .get_pauseparam         = ef4_ethtool_get_pauseparam,
+       .set_pauseparam         = ef4_ethtool_set_pauseparam,
+       .get_sset_count         = ef4_ethtool_get_sset_count,
+       .self_test              = ef4_ethtool_self_test,
+       .get_strings            = ef4_ethtool_get_strings,
+       .set_phys_id            = ef4_ethtool_phys_id,
+       .get_ethtool_stats      = ef4_ethtool_get_stats,
+       .get_wol                = ef4_ethtool_get_wol,
+       .set_wol                = ef4_ethtool_set_wol,
+       .reset                  = ef4_ethtool_reset,
+       .get_rxnfc              = ef4_ethtool_get_rxnfc,
+       .set_rxnfc              = ef4_ethtool_set_rxnfc,
+       .get_rxfh_indir_size    = ef4_ethtool_get_rxfh_indir_size,
+       .get_rxfh               = ef4_ethtool_get_rxfh,
+       .set_rxfh               = ef4_ethtool_set_rxfh,
+       .get_module_info        = ef4_ethtool_get_module_info,
+       .get_module_eeprom      = ef4_ethtool_get_module_eeprom,
+};
diff --git a/drivers/net/ethernet/sfc/falcon/falcon.c b/drivers/net/ethernet/sfc/falcon/falcon.c
new file mode 100644 (file)
index 0000000..c6ff0cc
--- /dev/null
@@ -0,0 +1,2903 @@
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2005-2006 Fen Systems Ltd.
+ * Copyright 2006-2013 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#include <linux/bitops.h>
+#include <linux/delay.h>
+#include <linux/pci.h>
+#include <linux/module.h>
+#include <linux/seq_file.h>
+#include <linux/i2c.h>
+#include <linux/mii.h>
+#include <linux/slab.h>
+#include "net_driver.h"
+#include "bitfield.h"
+#include "efx.h"
+#include "nic.h"
+#include "farch_regs.h"
+#include "io.h"
+#include "phy.h"
+#include "workarounds.h"
+#include "selftest.h"
+#include "mdio_10g.h"
+
+/* Hardware control for SFC4000 (aka Falcon). */
+
+/**************************************************************************
+ *
+ * NIC stats
+ *
+ **************************************************************************
+ */
+
+#define FALCON_MAC_STATS_SIZE 0x100
+
+#define XgRxOctets_offset 0x0
+#define XgRxOctets_WIDTH 48
+#define XgRxOctetsOK_offset 0x8
+#define XgRxOctetsOK_WIDTH 48
+#define XgRxPkts_offset 0x10
+#define XgRxPkts_WIDTH 32
+#define XgRxPktsOK_offset 0x14
+#define XgRxPktsOK_WIDTH 32
+#define XgRxBroadcastPkts_offset 0x18
+#define XgRxBroadcastPkts_WIDTH 32
+#define XgRxMulticastPkts_offset 0x1C
+#define XgRxMulticastPkts_WIDTH 32
+#define XgRxUnicastPkts_offset 0x20
+#define XgRxUnicastPkts_WIDTH 32
+#define XgRxUndersizePkts_offset 0x24
+#define XgRxUndersizePkts_WIDTH 32
+#define XgRxOversizePkts_offset 0x28
+#define XgRxOversizePkts_WIDTH 32
+#define XgRxJabberPkts_offset 0x2C
+#define XgRxJabberPkts_WIDTH 32
+#define XgRxUndersizeFCSerrorPkts_offset 0x30
+#define XgRxUndersizeFCSerrorPkts_WIDTH 32
+#define XgRxDropEvents_offset 0x34
+#define XgRxDropEvents_WIDTH 32
+#define XgRxFCSerrorPkts_offset 0x38
+#define XgRxFCSerrorPkts_WIDTH 32
+#define XgRxAlignError_offset 0x3C
+#define XgRxAlignError_WIDTH 32
+#define XgRxSymbolError_offset 0x40
+#define XgRxSymbolError_WIDTH 32
+#define XgRxInternalMACError_offset 0x44
+#define XgRxInternalMACError_WIDTH 32
+#define XgRxControlPkts_offset 0x48
+#define XgRxControlPkts_WIDTH 32
+#define XgRxPausePkts_offset 0x4C
+#define XgRxPausePkts_WIDTH 32
+#define XgRxPkts64Octets_offset 0x50
+#define XgRxPkts64Octets_WIDTH 32
+#define XgRxPkts65to127Octets_offset 0x54
+#define XgRxPkts65to127Octets_WIDTH 32
+#define XgRxPkts128to255Octets_offset 0x58
+#define XgRxPkts128to255Octets_WIDTH 32
+#define XgRxPkts256to511Octets_offset 0x5C
+#define XgRxPkts256to511Octets_WIDTH 32
+#define XgRxPkts512to1023Octets_offset 0x60
+#define XgRxPkts512to1023Octets_WIDTH 32
+#define XgRxPkts1024to15xxOctets_offset 0x64
+#define XgRxPkts1024to15xxOctets_WIDTH 32
+#define XgRxPkts15xxtoMaxOctets_offset 0x68
+#define XgRxPkts15xxtoMaxOctets_WIDTH 32
+#define XgRxLengthError_offset 0x6C
+#define XgRxLengthError_WIDTH 32
+#define XgTxPkts_offset 0x80
+#define XgTxPkts_WIDTH 32
+#define XgTxOctets_offset 0x88
+#define XgTxOctets_WIDTH 48
+#define XgTxMulticastPkts_offset 0x90
+#define XgTxMulticastPkts_WIDTH 32
+#define XgTxBroadcastPkts_offset 0x94
+#define XgTxBroadcastPkts_WIDTH 32
+#define XgTxUnicastPkts_offset 0x98
+#define XgTxUnicastPkts_WIDTH 32
+#define XgTxControlPkts_offset 0x9C
+#define XgTxControlPkts_WIDTH 32
+#define XgTxPausePkts_offset 0xA0
+#define XgTxPausePkts_WIDTH 32
+#define XgTxPkts64Octets_offset 0xA4
+#define XgTxPkts64Octets_WIDTH 32
+#define XgTxPkts65to127Octets_offset 0xA8
+#define XgTxPkts65to127Octets_WIDTH 32
+#define XgTxPkts128to255Octets_offset 0xAC
+#define XgTxPkts128to255Octets_WIDTH 32
+#define XgTxPkts256to511Octets_offset 0xB0
+#define XgTxPkts256to511Octets_WIDTH 32
+#define XgTxPkts512to1023Octets_offset 0xB4
+#define XgTxPkts512to1023Octets_WIDTH 32
+#define XgTxPkts1024to15xxOctets_offset 0xB8
+#define XgTxPkts1024to15xxOctets_WIDTH 32
+#define XgTxPkts1519toMaxOctets_offset 0xBC
+#define XgTxPkts1519toMaxOctets_WIDTH 32
+#define XgTxUndersizePkts_offset 0xC0
+#define XgTxUndersizePkts_WIDTH 32
+#define XgTxOversizePkts_offset 0xC4
+#define XgTxOversizePkts_WIDTH 32
+#define XgTxNonTcpUdpPkt_offset 0xC8
+#define XgTxNonTcpUdpPkt_WIDTH 16
+#define XgTxMacSrcErrPkt_offset 0xCC
+#define XgTxMacSrcErrPkt_WIDTH 16
+#define XgTxIpSrcErrPkt_offset 0xD0
+#define XgTxIpSrcErrPkt_WIDTH 16
+#define XgDmaDone_offset 0xD4
+#define XgDmaDone_WIDTH 32
+
+#define FALCON_XMAC_STATS_DMA_FLAG(efx)                                \
+       (*(u32 *)((efx)->stats_buffer.addr + XgDmaDone_offset))
+
+#define FALCON_DMA_STAT(ext_name, hw_name)                             \
+       [FALCON_STAT_ ## ext_name] =                                    \
+       { #ext_name,                                                    \
+         /* 48-bit stats are zero-padded to 64 on DMA */               \
+         hw_name ## _ ## WIDTH == 48 ? 64 : hw_name ## _ ## WIDTH,     \
+         hw_name ## _ ## offset }
+#define FALCON_OTHER_STAT(ext_name)                                    \
+       [FALCON_STAT_ ## ext_name] = { #ext_name, 0, 0 }
+#define GENERIC_SW_STAT(ext_name)                              \
+       [GENERIC_STAT_ ## ext_name] = { #ext_name, 0, 0 }
+
+static const struct ef4_hw_stat_desc falcon_stat_desc[FALCON_STAT_COUNT] = {
+       FALCON_DMA_STAT(tx_bytes, XgTxOctets),
+       FALCON_DMA_STAT(tx_packets, XgTxPkts),
+       FALCON_DMA_STAT(tx_pause, XgTxPausePkts),
+       FALCON_DMA_STAT(tx_control, XgTxControlPkts),
+       FALCON_DMA_STAT(tx_unicast, XgTxUnicastPkts),
+       FALCON_DMA_STAT(tx_multicast, XgTxMulticastPkts),
+       FALCON_DMA_STAT(tx_broadcast, XgTxBroadcastPkts),
+       FALCON_DMA_STAT(tx_lt64, XgTxUndersizePkts),
+       FALCON_DMA_STAT(tx_64, XgTxPkts64Octets),
+       FALCON_DMA_STAT(tx_65_to_127, XgTxPkts65to127Octets),
+       FALCON_DMA_STAT(tx_128_to_255, XgTxPkts128to255Octets),
+       FALCON_DMA_STAT(tx_256_to_511, XgTxPkts256to511Octets),
+       FALCON_DMA_STAT(tx_512_to_1023, XgTxPkts512to1023Octets),
+       FALCON_DMA_STAT(tx_1024_to_15xx, XgTxPkts1024to15xxOctets),
+       FALCON_DMA_STAT(tx_15xx_to_jumbo, XgTxPkts1519toMaxOctets),
+       FALCON_DMA_STAT(tx_gtjumbo, XgTxOversizePkts),
+       FALCON_DMA_STAT(tx_non_tcpudp, XgTxNonTcpUdpPkt),
+       FALCON_DMA_STAT(tx_mac_src_error, XgTxMacSrcErrPkt),
+       FALCON_DMA_STAT(tx_ip_src_error, XgTxIpSrcErrPkt),
+       FALCON_DMA_STAT(rx_bytes, XgRxOctets),
+       FALCON_DMA_STAT(rx_good_bytes, XgRxOctetsOK),
+       FALCON_OTHER_STAT(rx_bad_bytes),
+       FALCON_DMA_STAT(rx_packets, XgRxPkts),
+       FALCON_DMA_STAT(rx_good, XgRxPktsOK),
+       FALCON_DMA_STAT(rx_bad, XgRxFCSerrorPkts),
+       FALCON_DMA_STAT(rx_pause, XgRxPausePkts),
+       FALCON_DMA_STAT(rx_control, XgRxControlPkts),
+       FALCON_DMA_STAT(rx_unicast, XgRxUnicastPkts),
+       FALCON_DMA_STAT(rx_multicast, XgRxMulticastPkts),
+       FALCON_DMA_STAT(rx_broadcast, XgRxBroadcastPkts),
+       FALCON_DMA_STAT(rx_lt64, XgRxUndersizePkts),
+       FALCON_DMA_STAT(rx_64, XgRxPkts64Octets),
+       FALCON_DMA_STAT(rx_65_to_127, XgRxPkts65to127Octets),
+       FALCON_DMA_STAT(rx_128_to_255, XgRxPkts128to255Octets),
+       FALCON_DMA_STAT(rx_256_to_511, XgRxPkts256to511Octets),
+       FALCON_DMA_STAT(rx_512_to_1023, XgRxPkts512to1023Octets),
+       FALCON_DMA_STAT(rx_1024_to_15xx, XgRxPkts1024to15xxOctets),
+       FALCON_DMA_STAT(rx_15xx_to_jumbo, XgRxPkts15xxtoMaxOctets),
+       FALCON_DMA_STAT(rx_gtjumbo, XgRxOversizePkts),
+       FALCON_DMA_STAT(rx_bad_lt64, XgRxUndersizeFCSerrorPkts),
+       FALCON_DMA_STAT(rx_bad_gtjumbo, XgRxJabberPkts),
+       FALCON_DMA_STAT(rx_overflow, XgRxDropEvents),
+       FALCON_DMA_STAT(rx_symbol_error, XgRxSymbolError),
+       FALCON_DMA_STAT(rx_align_error, XgRxAlignError),
+       FALCON_DMA_STAT(rx_length_error, XgRxLengthError),
+       FALCON_DMA_STAT(rx_internal_error, XgRxInternalMACError),
+       FALCON_OTHER_STAT(rx_nodesc_drop_cnt),
+       GENERIC_SW_STAT(rx_nodesc_trunc),
+       GENERIC_SW_STAT(rx_noskb_drops),
+};
+static const unsigned long falcon_stat_mask[] = {
+       [0 ... BITS_TO_LONGS(FALCON_STAT_COUNT) - 1] = ~0UL,
+};
+
+/**************************************************************************
+ *
+ * Basic SPI command set and bit definitions
+ *
+ *************************************************************************/
+
+#define SPI_WRSR 0x01          /* Write status register */
+#define SPI_WRITE 0x02         /* Write data to memory array */
+#define SPI_READ 0x03          /* Read data from memory array */
+#define SPI_WRDI 0x04          /* Reset write enable latch */
+#define SPI_RDSR 0x05          /* Read status register */
+#define SPI_WREN 0x06          /* Set write enable latch */
+#define SPI_SST_EWSR 0x50      /* SST: Enable write to status register */
+
+#define SPI_STATUS_WPEN 0x80   /* Write-protect pin enabled */
+#define SPI_STATUS_BP2 0x10    /* Block protection bit 2 */
+#define SPI_STATUS_BP1 0x08    /* Block protection bit 1 */
+#define SPI_STATUS_BP0 0x04    /* Block protection bit 0 */
+#define SPI_STATUS_WEN 0x02    /* State of the write enable latch */
+#define SPI_STATUS_NRDY 0x01   /* Device busy flag */
+
+/**************************************************************************
+ *
+ * Non-volatile memory layout
+ *
+ **************************************************************************
+ */
+
+/* SFC4000 flash is partitioned into:
+ *     0-0x400       chip and board config (see struct falcon_nvconfig)
+ *     0x400-0x8000  unused (or may contain VPD if EEPROM not present)
+ *     0x8000-end    boot code (mapped to PCI expansion ROM)
+ * SFC4000 small EEPROM (size < 0x400) is used for VPD only.
+ * SFC4000 large EEPROM (size >= 0x400) is partitioned into:
+ *     0-0x400       chip and board config
+ *     configurable  VPD
+ *     0x800-0x1800  boot config
+ * Aside from the chip and board config, all of these are optional and may
+ * be absent or truncated depending on the devices used.
+ */
+#define FALCON_NVCONFIG_END 0x400U
+#define FALCON_FLASH_BOOTCODE_START 0x8000U
+#define FALCON_EEPROM_BOOTCONFIG_START 0x800U
+#define FALCON_EEPROM_BOOTCONFIG_END 0x1800U
+
+/* Board configuration v2 (v1 is obsolete; later versions are compatible) */
+struct falcon_nvconfig_board_v2 {
+       __le16 nports;
+       u8 port0_phy_addr;
+       u8 port0_phy_type;
+       u8 port1_phy_addr;
+       u8 port1_phy_type;
+       __le16 asic_sub_revision;
+       __le16 board_revision;
+} __packed;
+
+/* Board configuration v3 extra information */
+struct falcon_nvconfig_board_v3 {
+       __le32 spi_device_type[2];
+} __packed;
+
+/* Bit numbers for spi_device_type */
+#define SPI_DEV_TYPE_SIZE_LBN 0
+#define SPI_DEV_TYPE_SIZE_WIDTH 5
+#define SPI_DEV_TYPE_ADDR_LEN_LBN 6
+#define SPI_DEV_TYPE_ADDR_LEN_WIDTH 2
+#define SPI_DEV_TYPE_ERASE_CMD_LBN 8
+#define SPI_DEV_TYPE_ERASE_CMD_WIDTH 8
+#define SPI_DEV_TYPE_ERASE_SIZE_LBN 16
+#define SPI_DEV_TYPE_ERASE_SIZE_WIDTH 5
+#define SPI_DEV_TYPE_BLOCK_SIZE_LBN 24
+#define SPI_DEV_TYPE_BLOCK_SIZE_WIDTH 5
+#define SPI_DEV_TYPE_FIELD(type, field)                                        \
+       (((type) >> EF4_LOW_BIT(field)) & EF4_MASK32(EF4_WIDTH(field)))
+
+#define FALCON_NVCONFIG_OFFSET 0x300
+
+#define FALCON_NVCONFIG_BOARD_MAGIC_NUM 0xFA1C
+struct falcon_nvconfig {
+       ef4_oword_t ee_vpd_cfg_reg;                     /* 0x300 */
+       u8 mac_address[2][8];                   /* 0x310 */
+       ef4_oword_t pcie_sd_ctl0123_reg;                /* 0x320 */
+       ef4_oword_t pcie_sd_ctl45_reg;                  /* 0x330 */
+       ef4_oword_t pcie_pcs_ctl_stat_reg;              /* 0x340 */
+       ef4_oword_t hw_init_reg;                        /* 0x350 */
+       ef4_oword_t nic_stat_reg;                       /* 0x360 */
+       ef4_oword_t glb_ctl_reg;                        /* 0x370 */
+       ef4_oword_t srm_cfg_reg;                        /* 0x380 */
+       ef4_oword_t spare_reg;                          /* 0x390 */
+       __le16 board_magic_num;                 /* 0x3A0 */
+       __le16 board_struct_ver;
+       __le16 board_checksum;
+       struct falcon_nvconfig_board_v2 board_v2;
+       ef4_oword_t ee_base_page_reg;                   /* 0x3B0 */
+       struct falcon_nvconfig_board_v3 board_v3;       /* 0x3C0 */
+} __packed;
+
+/*************************************************************************/
+
+static int falcon_reset_hw(struct ef4_nic *efx, enum reset_type method);
+static void falcon_reconfigure_mac_wrapper(struct ef4_nic *efx);
+
+static const unsigned int
+/* "Large" EEPROM device: Atmel AT25640 or similar
+ * 8 KB, 16-bit address, 32 B write block */
+large_eeprom_type = ((13 << SPI_DEV_TYPE_SIZE_LBN)
+                    | (2 << SPI_DEV_TYPE_ADDR_LEN_LBN)
+                    | (5 << SPI_DEV_TYPE_BLOCK_SIZE_LBN)),
+/* Default flash device: Atmel AT25F1024
+ * 128 KB, 24-bit address, 32 KB erase block, 256 B write block */
+default_flash_type = ((17 << SPI_DEV_TYPE_SIZE_LBN)
+                     | (3 << SPI_DEV_TYPE_ADDR_LEN_LBN)
+                     | (0x52 << SPI_DEV_TYPE_ERASE_CMD_LBN)
+                     | (15 << SPI_DEV_TYPE_ERASE_SIZE_LBN)
+                     | (8 << SPI_DEV_TYPE_BLOCK_SIZE_LBN));
+
+/**************************************************************************
+ *
+ * I2C bus - this is a bit-bashing interface using GPIO pins
+ * Note that it uses the output enables to tristate the outputs
+ * SDA is the data pin and SCL is the clock
+ *
+ **************************************************************************
+ */
+static void falcon_setsda(void *data, int state)
+{
+       struct ef4_nic *efx = (struct ef4_nic *)data;
+       ef4_oword_t reg;
+
+       ef4_reado(efx, &reg, FR_AB_GPIO_CTL);
+       EF4_SET_OWORD_FIELD(reg, FRF_AB_GPIO3_OEN, !state);
+       ef4_writeo(efx, &reg, FR_AB_GPIO_CTL);
+}
+
+static void falcon_setscl(void *data, int state)
+{
+       struct ef4_nic *efx = (struct ef4_nic *)data;
+       ef4_oword_t reg;
+
+       ef4_reado(efx, &reg, FR_AB_GPIO_CTL);
+       EF4_SET_OWORD_FIELD(reg, FRF_AB_GPIO0_OEN, !state);
+       ef4_writeo(efx, &reg, FR_AB_GPIO_CTL);
+}
+
+static int falcon_getsda(void *data)
+{
+       struct ef4_nic *efx = (struct ef4_nic *)data;
+       ef4_oword_t reg;
+
+       ef4_reado(efx, &reg, FR_AB_GPIO_CTL);
+       return EF4_OWORD_FIELD(reg, FRF_AB_GPIO3_IN);
+}
+
+static int falcon_getscl(void *data)
+{
+       struct ef4_nic *efx = (struct ef4_nic *)data;
+       ef4_oword_t reg;
+
+       ef4_reado(efx, &reg, FR_AB_GPIO_CTL);
+       return EF4_OWORD_FIELD(reg, FRF_AB_GPIO0_IN);
+}
+
+static const struct i2c_algo_bit_data falcon_i2c_bit_operations = {
+       .setsda         = falcon_setsda,
+       .setscl         = falcon_setscl,
+       .getsda         = falcon_getsda,
+       .getscl         = falcon_getscl,
+       .udelay         = 5,
+       /* Wait up to 50 ms for slave to let us pull SCL high */
+       .timeout        = DIV_ROUND_UP(HZ, 20),
+};
+
+static void falcon_push_irq_moderation(struct ef4_channel *channel)
+{
+       ef4_dword_t timer_cmd;
+       struct ef4_nic *efx = channel->efx;
+
+       /* Set timer register */
+       if (channel->irq_moderation_us) {
+               unsigned int ticks;
+
+               ticks = ef4_usecs_to_ticks(efx, channel->irq_moderation_us);
+               EF4_POPULATE_DWORD_2(timer_cmd,
+                                    FRF_AB_TC_TIMER_MODE,
+                                    FFE_BB_TIMER_MODE_INT_HLDOFF,
+                                    FRF_AB_TC_TIMER_VAL,
+                                    ticks - 1);
+       } else {
+               EF4_POPULATE_DWORD_2(timer_cmd,
+                                    FRF_AB_TC_TIMER_MODE,
+                                    FFE_BB_TIMER_MODE_DIS,
+                                    FRF_AB_TC_TIMER_VAL, 0);
+       }
+       BUILD_BUG_ON(FR_AA_TIMER_COMMAND_KER != FR_BZ_TIMER_COMMAND_P0);
+       ef4_writed_page_locked(efx, &timer_cmd, FR_BZ_TIMER_COMMAND_P0,
+                              channel->channel);
+}
+
+static void falcon_deconfigure_mac_wrapper(struct ef4_nic *efx);
+
+static void falcon_prepare_flush(struct ef4_nic *efx)
+{
+       falcon_deconfigure_mac_wrapper(efx);
+
+       /* Wait for the tx and rx fifo's to get to the next packet boundary
+        * (~1ms without back-pressure), then to drain the remainder of the
+        * fifo's at data path speeds (negligible), with a healthy margin. */
+       msleep(10);
+}
+
+/* Acknowledge a legacy interrupt from Falcon
+ *
+ * This acknowledges a legacy (not MSI) interrupt via INT_ACK_KER_REG.
+ *
+ * Due to SFC bug 3706 (silicon revision <=A1) reads can be duplicated in the
+ * BIU. Interrupt acknowledge is read sensitive so must write instead
+ * (then read to ensure the BIU collector is flushed)
+ *
+ * NB most hardware supports MSI interrupts
+ */
+static inline void falcon_irq_ack_a1(struct ef4_nic *efx)
+{
+       ef4_dword_t reg;
+
+       EF4_POPULATE_DWORD_1(reg, FRF_AA_INT_ACK_KER_FIELD, 0xb7eb7e);
+       ef4_writed(efx, &reg, FR_AA_INT_ACK_KER);
+       ef4_readd(efx, &reg, FR_AA_WORK_AROUND_BROKEN_PCI_READS);
+}
+
+static irqreturn_t falcon_legacy_interrupt_a1(int irq, void *dev_id)
+{
+       struct ef4_nic *efx = dev_id;
+       ef4_oword_t *int_ker = efx->irq_status.addr;
+       int syserr;
+       int queues;
+
+       /* Check to see if this is our interrupt.  If it isn't, we
+        * exit without having touched the hardware.
+        */
+       if (unlikely(EF4_OWORD_IS_ZERO(*int_ker))) {
+               netif_vdbg(efx, intr, efx->net_dev,
+                          "IRQ %d on CPU %d not for me\n", irq,
+                          raw_smp_processor_id());
+               return IRQ_NONE;
+       }
+       efx->last_irq_cpu = raw_smp_processor_id();
+       netif_vdbg(efx, intr, efx->net_dev,
+                  "IRQ %d on CPU %d status " EF4_OWORD_FMT "\n",
+                  irq, raw_smp_processor_id(), EF4_OWORD_VAL(*int_ker));
+
+       if (!likely(ACCESS_ONCE(efx->irq_soft_enabled)))
+               return IRQ_HANDLED;
+
+       /* Check to see if we have a serious error condition */
+       syserr = EF4_OWORD_FIELD(*int_ker, FSF_AZ_NET_IVEC_FATAL_INT);
+       if (unlikely(syserr))
+               return ef4_farch_fatal_interrupt(efx);
+
+       /* Determine interrupting queues, clear interrupt status
+        * register and acknowledge the device interrupt.
+        */
+       BUILD_BUG_ON(FSF_AZ_NET_IVEC_INT_Q_WIDTH > EF4_MAX_CHANNELS);
+       queues = EF4_OWORD_FIELD(*int_ker, FSF_AZ_NET_IVEC_INT_Q);
+       EF4_ZERO_OWORD(*int_ker);
+       wmb(); /* Ensure the vector is cleared before interrupt ack */
+       falcon_irq_ack_a1(efx);
+
+       if (queues & 1)
+               ef4_schedule_channel_irq(ef4_get_channel(efx, 0));
+       if (queues & 2)
+               ef4_schedule_channel_irq(ef4_get_channel(efx, 1));
+       return IRQ_HANDLED;
+}
+
+/**************************************************************************
+ *
+ * RSS
+ *
+ **************************************************************************
+ */
+static int dummy_rx_push_rss_config(struct ef4_nic *efx, bool user,
+                                   const u32 *rx_indir_table)
+{
+       (void) efx;
+       (void) user;
+       (void) rx_indir_table;
+       return -ENOSYS;
+}
+
+static int falcon_b0_rx_push_rss_config(struct ef4_nic *efx, bool user,
+                                       const u32 *rx_indir_table)
+{
+       ef4_oword_t temp;
+
+       (void) user;
+       /* Set hash key for IPv4 */
+       memcpy(&temp, efx->rx_hash_key, sizeof(temp));
+       ef4_writeo(efx, &temp, FR_BZ_RX_RSS_TKEY);
+
+       memcpy(efx->rx_indir_table, rx_indir_table,
+              sizeof(efx->rx_indir_table));
+       ef4_farch_rx_push_indir_table(efx);
+       return 0;
+}
+
+/**************************************************************************
+ *
+ * EEPROM/flash
+ *
+ **************************************************************************
+ */
+
+#define FALCON_SPI_MAX_LEN sizeof(ef4_oword_t)
+
+static int falcon_spi_poll(struct ef4_nic *efx)
+{
+       ef4_oword_t reg;
+       ef4_reado(efx, &reg, FR_AB_EE_SPI_HCMD);
+       return EF4_OWORD_FIELD(reg, FRF_AB_EE_SPI_HCMD_CMD_EN) ? -EBUSY : 0;
+}
+
+/* Wait for SPI command completion */
+static int falcon_spi_wait(struct ef4_nic *efx)
+{
+       /* Most commands will finish quickly, so we start polling at
+        * very short intervals.  Sometimes the command may have to
+        * wait for VPD or expansion ROM access outside of our
+        * control, so we allow up to 100 ms. */
+       unsigned long timeout = jiffies + 1 + DIV_ROUND_UP(HZ, 10);
+       int i;
+
+       for (i = 0; i < 10; i++) {
+               if (!falcon_spi_poll(efx))
+                       return 0;
+               udelay(10);
+       }
+
+       for (;;) {
+               if (!falcon_spi_poll(efx))
+                       return 0;
+               if (time_after_eq(jiffies, timeout)) {
+                       netif_err(efx, hw, efx->net_dev,
+                                 "timed out waiting for SPI\n");
+                       return -ETIMEDOUT;
+               }
+               schedule_timeout_uninterruptible(1);
+       }
+}
+
+static int
+falcon_spi_cmd(struct ef4_nic *efx, const struct falcon_spi_device *spi,
+              unsigned int command, int address,
+              const void *in, void *out, size_t len)
+{
+       bool addressed = (address >= 0);
+       bool reading = (out != NULL);
+       ef4_oword_t reg;
+       int rc;
+
+       /* Input validation */
+       if (len > FALCON_SPI_MAX_LEN)
+               return -EINVAL;
+
+       /* Check that previous command is not still running */
+       rc = falcon_spi_poll(efx);
+       if (rc)
+               return rc;
+
+       /* Program address register, if we have an address */
+       if (addressed) {
+               EF4_POPULATE_OWORD_1(reg, FRF_AB_EE_SPI_HADR_ADR, address);
+               ef4_writeo(efx, &reg, FR_AB_EE_SPI_HADR);
+       }
+
+       /* Program data register, if we have data */
+       if (in != NULL) {
+               memcpy(&reg, in, len);
+               ef4_writeo(efx, &reg, FR_AB_EE_SPI_HDATA);
+       }
+
+       /* Issue read/write command */
+       EF4_POPULATE_OWORD_7(reg,
+                            FRF_AB_EE_SPI_HCMD_CMD_EN, 1,
+                            FRF_AB_EE_SPI_HCMD_SF_SEL, spi->device_id,
+                            FRF_AB_EE_SPI_HCMD_DABCNT, len,
+                            FRF_AB_EE_SPI_HCMD_READ, reading,
+                            FRF_AB_EE_SPI_HCMD_DUBCNT, 0,
+                            FRF_AB_EE_SPI_HCMD_ADBCNT,
+                            (addressed ? spi->addr_len : 0),
+                            FRF_AB_EE_SPI_HCMD_ENC, command);
+       ef4_writeo(efx, &reg, FR_AB_EE_SPI_HCMD);
+
+       /* Wait for read/write to complete */
+       rc = falcon_spi_wait(efx);
+       if (rc)
+               return rc;
+
+       /* Read data */
+       if (out != NULL) {
+               ef4_reado(efx, &reg, FR_AB_EE_SPI_HDATA);
+               memcpy(out, &reg, len);
+       }
+
+       return 0;
+}
+
+static inline u8
+falcon_spi_munge_command(const struct falcon_spi_device *spi,
+                        const u8 command, const unsigned int address)
+{
+       return command | (((address >> 8) & spi->munge_address) << 3);
+}
+
+static int
+falcon_spi_read(struct ef4_nic *efx, const struct falcon_spi_device *spi,
+               loff_t start, size_t len, size_t *retlen, u8 *buffer)
+{
+       size_t block_len, pos = 0;
+       unsigned int command;
+       int rc = 0;
+
+       while (pos < len) {
+               block_len = min(len - pos, FALCON_SPI_MAX_LEN);
+
+               command = falcon_spi_munge_command(spi, SPI_READ, start + pos);
+               rc = falcon_spi_cmd(efx, spi, command, start + pos, NULL,
+                                   buffer + pos, block_len);
+               if (rc)
+                       break;
+               pos += block_len;
+
+               /* Avoid locking up the system */
+               cond_resched();
+               if (signal_pending(current)) {
+                       rc = -EINTR;
+                       break;
+               }
+       }
+
+       if (retlen)
+               *retlen = pos;
+       return rc;
+}
+
+#ifdef CONFIG_SFC_FALCON_MTD
+
+struct falcon_mtd_partition {
+       struct ef4_mtd_partition common;
+       const struct falcon_spi_device *spi;
+       size_t offset;
+};
+
+#define to_falcon_mtd_partition(mtd)                           \
+       container_of(mtd, struct falcon_mtd_partition, common.mtd)
+
+static size_t
+falcon_spi_write_limit(const struct falcon_spi_device *spi, size_t start)
+{
+       return min(FALCON_SPI_MAX_LEN,
+                  (spi->block_size - (start & (spi->block_size - 1))));
+}
+
+/* Wait up to 10 ms for buffered write completion */
+static int
+falcon_spi_wait_write(struct ef4_nic *efx, const struct falcon_spi_device *spi)
+{
+       unsigned long timeout = jiffies + 1 + DIV_ROUND_UP(HZ, 100);
+       u8 status;
+       int rc;
+
+       for (;;) {
+               rc = falcon_spi_cmd(efx, spi, SPI_RDSR, -1, NULL,
+                                   &status, sizeof(status));
+               if (rc)
+                       return rc;
+               if (!(status & SPI_STATUS_NRDY))
+                       return 0;
+               if (time_after_eq(jiffies, timeout)) {
+                       netif_err(efx, hw, efx->net_dev,
+                                 "SPI write timeout on device %d"
+                                 " last status=0x%02x\n",
+                                 spi->device_id, status);
+                       return -ETIMEDOUT;
+               }
+               schedule_timeout_uninterruptible(1);
+       }
+}
+
+static int
+falcon_spi_write(struct ef4_nic *efx, const struct falcon_spi_device *spi,
+                loff_t start, size_t len, size_t *retlen, const u8 *buffer)
+{
+       u8 verify_buffer[FALCON_SPI_MAX_LEN];
+       size_t block_len, pos = 0;
+       unsigned int command;
+       int rc = 0;
+
+       while (pos < len) {
+               rc = falcon_spi_cmd(efx, spi, SPI_WREN, -1, NULL, NULL, 0);
+               if (rc)
+                       break;
+
+               block_len = min(len - pos,
+                               falcon_spi_write_limit(spi, start + pos));
+               command = falcon_spi_munge_command(spi, SPI_WRITE, start + pos);
+               rc = falcon_spi_cmd(efx, spi, command, start + pos,
+                                   buffer + pos, NULL, block_len);
+               if (rc)
+                       break;
+
+               rc = falcon_spi_wait_write(efx, spi);
+               if (rc)
+                       break;
+
+               command = falcon_spi_munge_command(spi, SPI_READ, start + pos);
+               rc = falcon_spi_cmd(efx, spi, command, start + pos,
+                                   NULL, verify_buffer, block_len);
+               if (memcmp(verify_buffer, buffer + pos, block_len)) {
+                       rc = -EIO;
+                       break;
+               }
+
+               pos += block_len;
+
+               /* Avoid locking up the system */
+               cond_resched();
+               if (signal_pending(current)) {
+                       rc = -EINTR;
+                       break;
+               }
+       }
+
+       if (retlen)
+               *retlen = pos;
+       return rc;
+}
+
+static int
+falcon_spi_slow_wait(struct falcon_mtd_partition *part, bool uninterruptible)
+{
+       const struct falcon_spi_device *spi = part->spi;
+       struct ef4_nic *efx = part->common.mtd.priv;
+       u8 status;
+       int rc, i;
+
+       /* Wait up to 4s for flash/EEPROM to finish a slow operation. */
+       for (i = 0; i < 40; i++) {
+               __set_current_state(uninterruptible ?
+                                   TASK_UNINTERRUPTIBLE : TASK_INTERRUPTIBLE);
+               schedule_timeout(HZ / 10);
+               rc = falcon_spi_cmd(efx, spi, SPI_RDSR, -1, NULL,
+                                   &status, sizeof(status));
+               if (rc)
+                       return rc;
+               if (!(status & SPI_STATUS_NRDY))
+                       return 0;
+               if (signal_pending(current))
+                       return -EINTR;
+       }
+       pr_err("%s: timed out waiting for %s\n",
+              part->common.name, part->common.dev_type_name);
+       return -ETIMEDOUT;
+}
+
+static int
+falcon_spi_unlock(struct ef4_nic *efx, const struct falcon_spi_device *spi)
+{
+       const u8 unlock_mask = (SPI_STATUS_BP2 | SPI_STATUS_BP1 |
+                               SPI_STATUS_BP0);
+       u8 status;
+       int rc;
+
+       rc = falcon_spi_cmd(efx, spi, SPI_RDSR, -1, NULL,
+                           &status, sizeof(status));
+       if (rc)
+               return rc;
+
+       if (!(status & unlock_mask))
+               return 0; /* already unlocked */
+
+       rc = falcon_spi_cmd(efx, spi, SPI_WREN, -1, NULL, NULL, 0);
+       if (rc)
+               return rc;
+       rc = falcon_spi_cmd(efx, spi, SPI_SST_EWSR, -1, NULL, NULL, 0);
+       if (rc)
+               return rc;
+
+       status &= ~unlock_mask;
+       rc = falcon_spi_cmd(efx, spi, SPI_WRSR, -1, &status,
+                           NULL, sizeof(status));
+       if (rc)
+               return rc;
+       rc = falcon_spi_wait_write(efx, spi);
+       if (rc)
+               return rc;
+
+       return 0;
+}
+
+#define FALCON_SPI_VERIFY_BUF_LEN 16
+
+static int
+falcon_spi_erase(struct falcon_mtd_partition *part, loff_t start, size_t len)
+{
+       const struct falcon_spi_device *spi = part->spi;
+       struct ef4_nic *efx = part->common.mtd.priv;
+       unsigned pos, block_len;
+       u8 empty[FALCON_SPI_VERIFY_BUF_LEN];
+       u8 buffer[FALCON_SPI_VERIFY_BUF_LEN];
+       int rc;
+
+       if (len != spi->erase_size)
+               return -EINVAL;
+
+       if (spi->erase_command == 0)
+               return -EOPNOTSUPP;
+
+       rc = falcon_spi_unlock(efx, spi);
+       if (rc)
+               return rc;
+       rc = falcon_spi_cmd(efx, spi, SPI_WREN, -1, NULL, NULL, 0);
+       if (rc)
+               return rc;
+       rc = falcon_spi_cmd(efx, spi, spi->erase_command, start, NULL,
+                           NULL, 0);
+       if (rc)
+               return rc;
+       rc = falcon_spi_slow_wait(part, false);
+
+       /* Verify the entire region has been wiped */
+       memset(empty, 0xff, sizeof(empty));
+       for (pos = 0; pos < len; pos += block_len) {
+               block_len = min(len - pos, sizeof(buffer));
+               rc = falcon_spi_read(efx, spi, start + pos, block_len,
+                                    NULL, buffer);
+               if (rc)
+                       return rc;
+               if (memcmp(empty, buffer, block_len))
+                       return -EIO;
+
+               /* Avoid locking up the system */
+               cond_resched();
+               if (signal_pending(current))
+                       return -EINTR;
+       }
+
+       return rc;
+}
+
+static void falcon_mtd_rename(struct ef4_mtd_partition *part)
+{
+       struct ef4_nic *efx = part->mtd.priv;
+
+       snprintf(part->name, sizeof(part->name), "%s %s",
+                efx->name, part->type_name);
+}
+
+static int falcon_mtd_read(struct mtd_info *mtd, loff_t start,
+                          size_t len, size_t *retlen, u8 *buffer)
+{
+       struct falcon_mtd_partition *part = to_falcon_mtd_partition(mtd);
+       struct ef4_nic *efx = mtd->priv;
+       struct falcon_nic_data *nic_data = efx->nic_data;
+       int rc;
+
+       rc = mutex_lock_interruptible(&nic_data->spi_lock);
+       if (rc)
+               return rc;
+       rc = falcon_spi_read(efx, part->spi, part->offset + start,
+                            len, retlen, buffer);
+       mutex_unlock(&nic_data->spi_lock);
+       return rc;
+}
+
+static int falcon_mtd_erase(struct mtd_info *mtd, loff_t start, size_t len)
+{
+       struct falcon_mtd_partition *part = to_falcon_mtd_partition(mtd);
+       struct ef4_nic *efx = mtd->priv;
+       struct falcon_nic_data *nic_data = efx->nic_data;
+       int rc;
+
+       rc = mutex_lock_interruptible(&nic_data->spi_lock);
+       if (rc)
+               return rc;
+       rc = falcon_spi_erase(part, part->offset + start, len);
+       mutex_unlock(&nic_data->spi_lock);
+       return rc;
+}
+
+static int falcon_mtd_write(struct mtd_info *mtd, loff_t start,
+                           size_t len, size_t *retlen, const u8 *buffer)
+{
+       struct falcon_mtd_partition *part = to_falcon_mtd_partition(mtd);
+       struct ef4_nic *efx = mtd->priv;
+       struct falcon_nic_data *nic_data = efx->nic_data;
+       int rc;
+
+       rc = mutex_lock_interruptible(&nic_data->spi_lock);
+       if (rc)
+               return rc;
+       rc = falcon_spi_write(efx, part->spi, part->offset + start,
+                             len, retlen, buffer);
+       mutex_unlock(&nic_data->spi_lock);
+       return rc;
+}
+
+static int falcon_mtd_sync(struct mtd_info *mtd)
+{
+       struct falcon_mtd_partition *part = to_falcon_mtd_partition(mtd);
+       struct ef4_nic *efx = mtd->priv;
+       struct falcon_nic_data *nic_data = efx->nic_data;
+       int rc;
+
+       mutex_lock(&nic_data->spi_lock);
+       rc = falcon_spi_slow_wait(part, true);
+       mutex_unlock(&nic_data->spi_lock);
+       return rc;
+}
+
+static int falcon_mtd_probe(struct ef4_nic *efx)
+{
+       struct falcon_nic_data *nic_data = efx->nic_data;
+       struct falcon_mtd_partition *parts;
+       struct falcon_spi_device *spi;
+       size_t n_parts;
+       int rc = -ENODEV;
+
+       ASSERT_RTNL();
+
+       /* Allocate space for maximum number of partitions */
+       parts = kcalloc(2, sizeof(*parts), GFP_KERNEL);
+       if (!parts)
+               return -ENOMEM;
+       n_parts = 0;
+
+       spi = &nic_data->spi_flash;
+       if (falcon_spi_present(spi) && spi->size > FALCON_FLASH_BOOTCODE_START) {
+               parts[n_parts].spi = spi;
+               parts[n_parts].offset = FALCON_FLASH_BOOTCODE_START;
+               parts[n_parts].common.dev_type_name = "flash";
+               parts[n_parts].common.type_name = "sfc_flash_bootrom";
+               parts[n_parts].common.mtd.type = MTD_NORFLASH;
+               parts[n_parts].common.mtd.flags = MTD_CAP_NORFLASH;
+               parts[n_parts].common.mtd.size = spi->size - FALCON_FLASH_BOOTCODE_START;
+               parts[n_parts].common.mtd.erasesize = spi->erase_size;
+               n_parts++;
+       }
+
+       spi = &nic_data->spi_eeprom;
+       if (falcon_spi_present(spi) && spi->size > FALCON_EEPROM_BOOTCONFIG_START) {
+               parts[n_parts].spi = spi;
+               parts[n_parts].offset = FALCON_EEPROM_BOOTCONFIG_START;
+               parts[n_parts].common.dev_type_name = "EEPROM";
+               parts[n_parts].common.type_name = "sfc_bootconfig";
+               parts[n_parts].common.mtd.type = MTD_RAM;
+               parts[n_parts].common.mtd.flags = MTD_CAP_RAM;
+               parts[n_parts].common.mtd.size =
+                       min(spi->size, FALCON_EEPROM_BOOTCONFIG_END) -
+                       FALCON_EEPROM_BOOTCONFIG_START;
+               parts[n_parts].common.mtd.erasesize = spi->erase_size;
+               n_parts++;
+       }
+
+       rc = ef4_mtd_add(efx, &parts[0].common, n_parts, sizeof(*parts));
+       if (rc)
+               kfree(parts);
+       return rc;
+}
+
+#endif /* CONFIG_SFC_FALCON_MTD */
+
+/**************************************************************************
+ *
+ * XMAC operations
+ *
+ **************************************************************************
+ */
+
+/* Configure the XAUI driver that is an output from Falcon */
+static void falcon_setup_xaui(struct ef4_nic *efx)
+{
+       ef4_oword_t sdctl, txdrv;
+
+       /* Move the XAUI into low power, unless there is no PHY, in
+        * which case the XAUI will have to drive a cable. */
+       if (efx->phy_type == PHY_TYPE_NONE)
+               return;
+
+       ef4_reado(efx, &sdctl, FR_AB_XX_SD_CTL);
+       EF4_SET_OWORD_FIELD(sdctl, FRF_AB_XX_HIDRVD, FFE_AB_XX_SD_CTL_DRV_DEF);
+       EF4_SET_OWORD_FIELD(sdctl, FRF_AB_XX_LODRVD, FFE_AB_XX_SD_CTL_DRV_DEF);
+       EF4_SET_OWORD_FIELD(sdctl, FRF_AB_XX_HIDRVC, FFE_AB_XX_SD_CTL_DRV_DEF);
+       EF4_SET_OWORD_FIELD(sdctl, FRF_AB_XX_LODRVC, FFE_AB_XX_SD_CTL_DRV_DEF);
+       EF4_SET_OWORD_FIELD(sdctl, FRF_AB_XX_HIDRVB, FFE_AB_XX_SD_CTL_DRV_DEF);
+       EF4_SET_OWORD_FIELD(sdctl, FRF_AB_XX_LODRVB, FFE_AB_XX_SD_CTL_DRV_DEF);
+       EF4_SET_OWORD_FIELD(sdctl, FRF_AB_XX_HIDRVA, FFE_AB_XX_SD_CTL_DRV_DEF);
+       EF4_SET_OWORD_FIELD(sdctl, FRF_AB_XX_LODRVA, FFE_AB_XX_SD_CTL_DRV_DEF);
+       ef4_writeo(efx, &sdctl, FR_AB_XX_SD_CTL);
+
+       EF4_POPULATE_OWORD_8(txdrv,
+                            FRF_AB_XX_DEQD, FFE_AB_XX_TXDRV_DEQ_DEF,
+                            FRF_AB_XX_DEQC, FFE_AB_XX_TXDRV_DEQ_DEF,
+                            FRF_AB_XX_DEQB, FFE_AB_XX_TXDRV_DEQ_DEF,
+                            FRF_AB_XX_DEQA, FFE_AB_XX_TXDRV_DEQ_DEF,
+                            FRF_AB_XX_DTXD, FFE_AB_XX_TXDRV_DTX_DEF,
+                            FRF_AB_XX_DTXC, FFE_AB_XX_TXDRV_DTX_DEF,
+                            FRF_AB_XX_DTXB, FFE_AB_XX_TXDRV_DTX_DEF,
+                            FRF_AB_XX_DTXA, FFE_AB_XX_TXDRV_DTX_DEF);
+       ef4_writeo(efx, &txdrv, FR_AB_XX_TXDRV_CTL);
+}
+
+int falcon_reset_xaui(struct ef4_nic *efx)
+{
+       struct falcon_nic_data *nic_data = efx->nic_data;
+       ef4_oword_t reg;
+       int count;
+
+       /* Don't fetch MAC statistics over an XMAC reset */
+       WARN_ON(nic_data->stats_disable_count == 0);
+
+       /* Start reset sequence */
+       EF4_POPULATE_OWORD_1(reg, FRF_AB_XX_RST_XX_EN, 1);
+       ef4_writeo(efx, &reg, FR_AB_XX_PWR_RST);
+
+       /* Wait up to 10 ms for completion, then reinitialise */
+       for (count = 0; count < 1000; count++) {
+               ef4_reado(efx, &reg, FR_AB_XX_PWR_RST);
+               if (EF4_OWORD_FIELD(reg, FRF_AB_XX_RST_XX_EN) == 0 &&
+                   EF4_OWORD_FIELD(reg, FRF_AB_XX_SD_RST_ACT) == 0) {
+                       falcon_setup_xaui(efx);
+                       return 0;
+               }
+               udelay(10);
+       }
+       netif_err(efx, hw, efx->net_dev,
+                 "timed out waiting for XAUI/XGXS reset\n");
+       return -ETIMEDOUT;
+}
+
+static void falcon_ack_status_intr(struct ef4_nic *efx)
+{
+       struct falcon_nic_data *nic_data = efx->nic_data;
+       ef4_oword_t reg;
+
+       if ((ef4_nic_rev(efx) != EF4_REV_FALCON_B0) || LOOPBACK_INTERNAL(efx))
+               return;
+
+       /* We expect xgmii faults if the wireside link is down */
+       if (!efx->link_state.up)
+               return;
+
+       /* We can only use this interrupt to signal the negative edge of
+        * xaui_align [we have to poll the positive edge]. */
+       if (nic_data->xmac_poll_required)
+               return;
+
+       ef4_reado(efx, &reg, FR_AB_XM_MGT_INT_MSK);
+}
+
+static bool falcon_xgxs_link_ok(struct ef4_nic *efx)
+{
+       ef4_oword_t reg;
+       bool align_done, link_ok = false;
+       int sync_status;
+
+       /* Read link status */
+       ef4_reado(efx, &reg, FR_AB_XX_CORE_STAT);
+
+       align_done = EF4_OWORD_FIELD(reg, FRF_AB_XX_ALIGN_DONE);
+       sync_status = EF4_OWORD_FIELD(reg, FRF_AB_XX_SYNC_STAT);
+       if (align_done && (sync_status == FFE_AB_XX_STAT_ALL_LANES))
+               link_ok = true;
+
+       /* Clear link status ready for next read */
+       EF4_SET_OWORD_FIELD(reg, FRF_AB_XX_COMMA_DET, FFE_AB_XX_STAT_ALL_LANES);
+       EF4_SET_OWORD_FIELD(reg, FRF_AB_XX_CHAR_ERR, FFE_AB_XX_STAT_ALL_LANES);
+       EF4_SET_OWORD_FIELD(reg, FRF_AB_XX_DISPERR, FFE_AB_XX_STAT_ALL_LANES);
+       ef4_writeo(efx, &reg, FR_AB_XX_CORE_STAT);
+
+       return link_ok;
+}
+
+static bool falcon_xmac_link_ok(struct ef4_nic *efx)
+{
+       /*
+        * Check MAC's XGXS link status except when using XGMII loopback
+        * which bypasses the XGXS block.
+        * If possible, check PHY's XGXS link status except when using
+        * MAC loopback.
+        */
+       return (efx->loopback_mode == LOOPBACK_XGMII ||
+               falcon_xgxs_link_ok(efx)) &&
+               (!(efx->mdio.mmds & (1 << MDIO_MMD_PHYXS)) ||
+                LOOPBACK_INTERNAL(efx) ||
+                ef4_mdio_phyxgxs_lane_sync(efx));
+}
+
+static void falcon_reconfigure_xmac_core(struct ef4_nic *efx)
+{
+       unsigned int max_frame_len;
+       ef4_oword_t reg;
+       bool rx_fc = !!(efx->link_state.fc & EF4_FC_RX);
+       bool tx_fc = !!(efx->link_state.fc & EF4_FC_TX);
+
+       /* Configure MAC  - cut-thru mode is hard wired on */
+       EF4_POPULATE_OWORD_3(reg,
+                            FRF_AB_XM_RX_JUMBO_MODE, 1,
+                            FRF_AB_XM_TX_STAT_EN, 1,
+                            FRF_AB_XM_RX_STAT_EN, 1);
+       ef4_writeo(efx, &reg, FR_AB_XM_GLB_CFG);
+
+       /* Configure TX */
+       EF4_POPULATE_OWORD_6(reg,
+                            FRF_AB_XM_TXEN, 1,
+                            FRF_AB_XM_TX_PRMBL, 1,
+                            FRF_AB_XM_AUTO_PAD, 1,
+                            FRF_AB_XM_TXCRC, 1,
+                            FRF_AB_XM_FCNTL, tx_fc,
+                            FRF_AB_XM_IPG, 0x3);
+       ef4_writeo(efx, &reg, FR_AB_XM_TX_CFG);
+
+       /* Configure RX */
+       EF4_POPULATE_OWORD_5(reg,
+                            FRF_AB_XM_RXEN, 1,
+                            FRF_AB_XM_AUTO_DEPAD, 0,
+                            FRF_AB_XM_ACPT_ALL_MCAST, 1,
+                            FRF_AB_XM_ACPT_ALL_UCAST, !efx->unicast_filter,
+                            FRF_AB_XM_PASS_CRC_ERR, 1);
+       ef4_writeo(efx, &reg, FR_AB_XM_RX_CFG);
+
+       /* Set frame length */
+       max_frame_len = EF4_MAX_FRAME_LEN(efx->net_dev->mtu);
+       EF4_POPULATE_OWORD_1(reg, FRF_AB_XM_MAX_RX_FRM_SIZE, max_frame_len);
+       ef4_writeo(efx, &reg, FR_AB_XM_RX_PARAM);
+       EF4_POPULATE_OWORD_2(reg,
+                            FRF_AB_XM_MAX_TX_FRM_SIZE, max_frame_len,
+                            FRF_AB_XM_TX_JUMBO_MODE, 1);
+       ef4_writeo(efx, &reg, FR_AB_XM_TX_PARAM);
+
+       EF4_POPULATE_OWORD_2(reg,
+                            FRF_AB_XM_PAUSE_TIME, 0xfffe, /* MAX PAUSE TIME */
+                            FRF_AB_XM_DIS_FCNTL, !rx_fc);
+       ef4_writeo(efx, &reg, FR_AB_XM_FC);
+
+       /* Set MAC address */
+       memcpy(&reg, &efx->net_dev->dev_addr[0], 4);
+       ef4_writeo(efx, &reg, FR_AB_XM_ADR_LO);
+       memcpy(&reg, &efx->net_dev->dev_addr[4], 2);
+       ef4_writeo(efx, &reg, FR_AB_XM_ADR_HI);
+}
+
+static void falcon_reconfigure_xgxs_core(struct ef4_nic *efx)
+{
+       ef4_oword_t reg;
+       bool xgxs_loopback = (efx->loopback_mode == LOOPBACK_XGXS);
+       bool xaui_loopback = (efx->loopback_mode == LOOPBACK_XAUI);
+       bool xgmii_loopback = (efx->loopback_mode == LOOPBACK_XGMII);
+       bool old_xgmii_loopback, old_xgxs_loopback, old_xaui_loopback;
+
+       /* XGXS block is flaky and will need to be reset if moving
+        * into our out of XGMII, XGXS or XAUI loopbacks. */
+       ef4_reado(efx, &reg, FR_AB_XX_CORE_STAT);
+       old_xgxs_loopback = EF4_OWORD_FIELD(reg, FRF_AB_XX_XGXS_LB_EN);
+       old_xgmii_loopback = EF4_OWORD_FIELD(reg, FRF_AB_XX_XGMII_LB_EN);
+
+       ef4_reado(efx, &reg, FR_AB_XX_SD_CTL);
+       old_xaui_loopback = EF4_OWORD_FIELD(reg, FRF_AB_XX_LPBKA);
+
+       /* The PHY driver may have turned XAUI off */
+       if ((xgxs_loopback != old_xgxs_loopback) ||
+           (xaui_loopback != old_xaui_loopback) ||
+           (xgmii_loopback != old_xgmii_loopback))
+               falcon_reset_xaui(efx);
+
+       ef4_reado(efx, &reg, FR_AB_XX_CORE_STAT);
+       EF4_SET_OWORD_FIELD(reg, FRF_AB_XX_FORCE_SIG,
+                           (xgxs_loopback || xaui_loopback) ?
+                           FFE_AB_XX_FORCE_SIG_ALL_LANES : 0);
+       EF4_SET_OWORD_FIELD(reg, FRF_AB_XX_XGXS_LB_EN, xgxs_loopback);
+       EF4_SET_OWORD_FIELD(reg, FRF_AB_XX_XGMII_LB_EN, xgmii_loopback);
+       ef4_writeo(efx, &reg, FR_AB_XX_CORE_STAT);
+
+       ef4_reado(efx, &reg, FR_AB_XX_SD_CTL);
+       EF4_SET_OWORD_FIELD(reg, FRF_AB_XX_LPBKD, xaui_loopback);
+       EF4_SET_OWORD_FIELD(reg, FRF_AB_XX_LPBKC, xaui_loopback);
+       EF4_SET_OWORD_FIELD(reg, FRF_AB_XX_LPBKB, xaui_loopback);
+       EF4_SET_OWORD_FIELD(reg, FRF_AB_XX_LPBKA, xaui_loopback);
+       ef4_writeo(efx, &reg, FR_AB_XX_SD_CTL);
+}
+
+
+/* Try to bring up the Falcon side of the Falcon-Phy XAUI link */
+static bool falcon_xmac_link_ok_retry(struct ef4_nic *efx, int tries)
+{
+       bool mac_up = falcon_xmac_link_ok(efx);
+
+       if (LOOPBACK_MASK(efx) & LOOPBACKS_EXTERNAL(efx) & LOOPBACKS_WS ||
+           ef4_phy_mode_disabled(efx->phy_mode))
+               /* XAUI link is expected to be down */
+               return mac_up;
+
+       falcon_stop_nic_stats(efx);
+
+       while (!mac_up && tries) {
+               netif_dbg(efx, hw, efx->net_dev, "bashing xaui\n");
+               falcon_reset_xaui(efx);
+               udelay(200);
+
+               mac_up = falcon_xmac_link_ok(efx);
+               --tries;
+       }
+
+       falcon_start_nic_stats(efx);
+
+       return mac_up;
+}
+
+static bool falcon_xmac_check_fault(struct ef4_nic *efx)
+{
+       return !falcon_xmac_link_ok_retry(efx, 5);
+}
+
+static int falcon_reconfigure_xmac(struct ef4_nic *efx)
+{
+       struct falcon_nic_data *nic_data = efx->nic_data;
+
+       ef4_farch_filter_sync_rx_mode(efx);
+
+       falcon_reconfigure_xgxs_core(efx);
+       falcon_reconfigure_xmac_core(efx);
+
+       falcon_reconfigure_mac_wrapper(efx);
+
+       nic_data->xmac_poll_required = !falcon_xmac_link_ok_retry(efx, 5);
+       falcon_ack_status_intr(efx);
+
+       return 0;
+}
+
+static void falcon_poll_xmac(struct ef4_nic *efx)
+{
+       struct falcon_nic_data *nic_data = efx->nic_data;
+
+       /* We expect xgmii faults if the wireside link is down */
+       if (!efx->link_state.up || !nic_data->xmac_poll_required)
+               return;
+
+       nic_data->xmac_poll_required = !falcon_xmac_link_ok_retry(efx, 1);
+       falcon_ack_status_intr(efx);
+}
+
+/**************************************************************************
+ *
+ * MAC wrapper
+ *
+ **************************************************************************
+ */
+
+static void falcon_push_multicast_hash(struct ef4_nic *efx)
+{
+       union ef4_multicast_hash *mc_hash = &efx->multicast_hash;
+
+       WARN_ON(!mutex_is_locked(&efx->mac_lock));
+
+       ef4_writeo(efx, &mc_hash->oword[0], FR_AB_MAC_MC_HASH_REG0);
+       ef4_writeo(efx, &mc_hash->oword[1], FR_AB_MAC_MC_HASH_REG1);
+}
+
+static void falcon_reset_macs(struct ef4_nic *efx)
+{
+       struct falcon_nic_data *nic_data = efx->nic_data;
+       ef4_oword_t reg, mac_ctrl;
+       int count;
+
+       if (ef4_nic_rev(efx) < EF4_REV_FALCON_B0) {
+               /* It's not safe to use GLB_CTL_REG to reset the
+                * macs, so instead use the internal MAC resets
+                */
+               EF4_POPULATE_OWORD_1(reg, FRF_AB_XM_CORE_RST, 1);
+               ef4_writeo(efx, &reg, FR_AB_XM_GLB_CFG);
+
+               for (count = 0; count < 10000; count++) {
+                       ef4_reado(efx, &reg, FR_AB_XM_GLB_CFG);
+                       if (EF4_OWORD_FIELD(reg, FRF_AB_XM_CORE_RST) ==
+                           0)
+                               return;
+                       udelay(10);
+               }
+
+               netif_err(efx, hw, efx->net_dev,
+                         "timed out waiting for XMAC core reset\n");
+       }
+
+       /* Mac stats will fail whist the TX fifo is draining */
+       WARN_ON(nic_data->stats_disable_count == 0);
+
+       ef4_reado(efx, &mac_ctrl, FR_AB_MAC_CTRL);
+       EF4_SET_OWORD_FIELD(mac_ctrl, FRF_BB_TXFIFO_DRAIN_EN, 1);
+       ef4_writeo(efx, &mac_ctrl, FR_AB_MAC_CTRL);
+
+       ef4_reado(efx, &reg, FR_AB_GLB_CTL);
+       EF4_SET_OWORD_FIELD(reg, FRF_AB_RST_XGTX, 1);
+       EF4_SET_OWORD_FIELD(reg, FRF_AB_RST_XGRX, 1);
+       EF4_SET_OWORD_FIELD(reg, FRF_AB_RST_EM, 1);
+       ef4_writeo(efx, &reg, FR_AB_GLB_CTL);
+
+       count = 0;
+       while (1) {
+               ef4_reado(efx, &reg, FR_AB_GLB_CTL);
+               if (!EF4_OWORD_FIELD(reg, FRF_AB_RST_XGTX) &&
+                   !EF4_OWORD_FIELD(reg, FRF_AB_RST_XGRX) &&
+                   !EF4_OWORD_FIELD(reg, FRF_AB_RST_EM)) {
+                       netif_dbg(efx, hw, efx->net_dev,
+                                 "Completed MAC reset after %d loops\n",
+                                 count);
+                       break;
+               }
+               if (count > 20) {
+                       netif_err(efx, hw, efx->net_dev, "MAC reset failed\n");
+                       break;
+               }
+               count++;
+               udelay(10);
+       }
+
+       /* Ensure the correct MAC is selected before statistics
+        * are re-enabled by the caller */
+       ef4_writeo(efx, &mac_ctrl, FR_AB_MAC_CTRL);
+
+       falcon_setup_xaui(efx);
+}
+
+static void falcon_drain_tx_fifo(struct ef4_nic *efx)
+{
+       ef4_oword_t reg;
+
+       if ((ef4_nic_rev(efx) < EF4_REV_FALCON_B0) ||
+           (efx->loopback_mode != LOOPBACK_NONE))
+               return;
+
+       ef4_reado(efx, &reg, FR_AB_MAC_CTRL);
+       /* There is no point in draining more than once */
+       if (EF4_OWORD_FIELD(reg, FRF_BB_TXFIFO_DRAIN_EN))
+               return;
+
+       falcon_reset_macs(efx);
+}
+
+static void falcon_deconfigure_mac_wrapper(struct ef4_nic *efx)
+{
+       ef4_oword_t reg;
+
+       if (ef4_nic_rev(efx) < EF4_REV_FALCON_B0)
+               return;
+
+       /* Isolate the MAC -> RX */
+       ef4_reado(efx, &reg, FR_AZ_RX_CFG);
+       EF4_SET_OWORD_FIELD(reg, FRF_BZ_RX_INGR_EN, 0);
+       ef4_writeo(efx, &reg, FR_AZ_RX_CFG);
+
+       /* Isolate TX -> MAC */
+       falcon_drain_tx_fifo(efx);
+}
+
+static void falcon_reconfigure_mac_wrapper(struct ef4_nic *efx)
+{
+       struct ef4_link_state *link_state = &efx->link_state;
+       ef4_oword_t reg;
+       int link_speed, isolate;
+
+       isolate = !!ACCESS_ONCE(efx->reset_pending);
+
+       switch (link_state->speed) {
+       case 10000: link_speed = 3; break;
+       case 1000:  link_speed = 2; break;
+       case 100:   link_speed = 1; break;
+       default:    link_speed = 0; break;
+       }
+
+       /* MAC_LINK_STATUS controls MAC backpressure but doesn't work
+        * as advertised.  Disable to ensure packets are not
+        * indefinitely held and TX queue can be flushed at any point
+        * while the link is down. */
+       EF4_POPULATE_OWORD_5(reg,
+                            FRF_AB_MAC_XOFF_VAL, 0xffff /* max pause time */,
+                            FRF_AB_MAC_BCAD_ACPT, 1,
+                            FRF_AB_MAC_UC_PROM, !efx->unicast_filter,
+                            FRF_AB_MAC_LINK_STATUS, 1, /* always set */
+                            FRF_AB_MAC_SPEED, link_speed);
+       /* On B0, MAC backpressure can be disabled and packets get
+        * discarded. */
+       if (ef4_nic_rev(efx) >= EF4_REV_FALCON_B0) {
+               EF4_SET_OWORD_FIELD(reg, FRF_BB_TXFIFO_DRAIN_EN,
+                                   !link_state->up || isolate);
+       }
+
+       ef4_writeo(efx, &reg, FR_AB_MAC_CTRL);
+
+       /* Restore the multicast hash registers. */
+       falcon_push_multicast_hash(efx);
+
+       ef4_reado(efx, &reg, FR_AZ_RX_CFG);
+       /* Enable XOFF signal from RX FIFO (we enabled it during NIC
+        * initialisation but it may read back as 0) */
+       EF4_SET_OWORD_FIELD(reg, FRF_AZ_RX_XOFF_MAC_EN, 1);
+       /* Unisolate the MAC -> RX */
+       if (ef4_nic_rev(efx) >= EF4_REV_FALCON_B0)
+               EF4_SET_OWORD_FIELD(reg, FRF_BZ_RX_INGR_EN, !isolate);
+       ef4_writeo(efx, &reg, FR_AZ_RX_CFG);
+}
+
+static void falcon_stats_request(struct ef4_nic *efx)
+{
+       struct falcon_nic_data *nic_data = efx->nic_data;
+       ef4_oword_t reg;
+
+       WARN_ON(nic_data->stats_pending);
+       WARN_ON(nic_data->stats_disable_count);
+
+       FALCON_XMAC_STATS_DMA_FLAG(efx) = 0;
+       nic_data->stats_pending = true;
+       wmb(); /* ensure done flag is clear */
+
+       /* Initiate DMA transfer of stats */
+       EF4_POPULATE_OWORD_2(reg,
+                            FRF_AB_MAC_STAT_DMA_CMD, 1,
+                            FRF_AB_MAC_STAT_DMA_ADR,
+                            efx->stats_buffer.dma_addr);
+       ef4_writeo(efx, &reg, FR_AB_MAC_STAT_DMA);
+
+       mod_timer(&nic_data->stats_timer, round_jiffies_up(jiffies + HZ / 2));
+}
+
+static void falcon_stats_complete(struct ef4_nic *efx)
+{
+       struct falcon_nic_data *nic_data = efx->nic_data;
+
+       if (!nic_data->stats_pending)
+               return;
+
+       nic_data->stats_pending = false;
+       if (FALCON_XMAC_STATS_DMA_FLAG(efx)) {
+               rmb(); /* read the done flag before the stats */
+               ef4_nic_update_stats(falcon_stat_desc, FALCON_STAT_COUNT,
+                                    falcon_stat_mask, nic_data->stats,
+                                    efx->stats_buffer.addr, true);
+       } else {
+               netif_err(efx, hw, efx->net_dev,
+                         "timed out waiting for statistics\n");
+       }
+}
+
+static void falcon_stats_timer_func(unsigned long context)
+{
+       struct ef4_nic *efx = (struct ef4_nic *)context;
+       struct falcon_nic_data *nic_data = efx->nic_data;
+
+       spin_lock(&efx->stats_lock);
+
+       falcon_stats_complete(efx);
+       if (nic_data->stats_disable_count == 0)
+               falcon_stats_request(efx);
+
+       spin_unlock(&efx->stats_lock);
+}
+
+static bool falcon_loopback_link_poll(struct ef4_nic *efx)
+{
+       struct ef4_link_state old_state = efx->link_state;
+
+       WARN_ON(!mutex_is_locked(&efx->mac_lock));
+       WARN_ON(!LOOPBACK_INTERNAL(efx));
+
+       efx->link_state.fd = true;
+       efx->link_state.fc = efx->wanted_fc;
+       efx->link_state.up = true;
+       efx->link_state.speed = 10000;
+
+       return !ef4_link_state_equal(&efx->link_state, &old_state);
+}
+
+static int falcon_reconfigure_port(struct ef4_nic *efx)
+{
+       int rc;
+
+       WARN_ON(ef4_nic_rev(efx) > EF4_REV_FALCON_B0);
+
+       /* Poll the PHY link state *before* reconfiguring it. This means we
+        * will pick up the correct speed (in loopback) to select the correct
+        * MAC.
+        */
+       if (LOOPBACK_INTERNAL(efx))
+               falcon_loopback_link_poll(efx);
+       else
+               efx->phy_op->poll(efx);
+
+       falcon_stop_nic_stats(efx);
+       falcon_deconfigure_mac_wrapper(efx);
+
+       falcon_reset_macs(efx);
+
+       efx->phy_op->reconfigure(efx);
+       rc = falcon_reconfigure_xmac(efx);
+       BUG_ON(rc);
+
+       falcon_start_nic_stats(efx);
+
+       /* Synchronise efx->link_state with the kernel */
+       ef4_link_status_changed(efx);
+
+       return 0;
+}
+
+/* TX flow control may automatically turn itself off if the link
+ * partner (intermittently) stops responding to pause frames. There
+ * isn't any indication that this has happened, so the best we do is
+ * leave it up to the user to spot this and fix it by cycling transmit
+ * flow control on this end.
+ */
+
+static void falcon_a1_prepare_enable_fc_tx(struct ef4_nic *efx)
+{
+       /* Schedule a reset to recover */
+       ef4_schedule_reset(efx, RESET_TYPE_INVISIBLE);
+}
+
+static void falcon_b0_prepare_enable_fc_tx(struct ef4_nic *efx)
+{
+       /* Recover by resetting the EM block */
+       falcon_stop_nic_stats(efx);
+       falcon_drain_tx_fifo(efx);
+       falcon_reconfigure_xmac(efx);
+       falcon_start_nic_stats(efx);
+}
+
+/**************************************************************************
+ *
+ * PHY access via GMII
+ *
+ **************************************************************************
+ */
+
+/* Wait for GMII access to complete */
+static int falcon_gmii_wait(struct ef4_nic *efx)
+{
+       ef4_oword_t md_stat;
+       int count;
+
+       /* wait up to 50ms - taken max from datasheet */
+       for (count = 0; count < 5000; count++) {
+               ef4_reado(efx, &md_stat, FR_AB_MD_STAT);
+               if (EF4_OWORD_FIELD(md_stat, FRF_AB_MD_BSY) == 0) {
+                       if (EF4_OWORD_FIELD(md_stat, FRF_AB_MD_LNFL) != 0 ||
+                           EF4_OWORD_FIELD(md_stat, FRF_AB_MD_BSERR) != 0) {
+                               netif_err(efx, hw, efx->net_dev,
+                                         "error from GMII access "
+                                         EF4_OWORD_FMT"\n",
+                                         EF4_OWORD_VAL(md_stat));
+                               return -EIO;
+                       }
+                       return 0;
+               }
+               udelay(10);
+       }
+       netif_err(efx, hw, efx->net_dev, "timed out waiting for GMII\n");
+       return -ETIMEDOUT;
+}
+
+/* Write an MDIO register of a PHY connected to Falcon. */
+static int falcon_mdio_write(struct net_device *net_dev,
+                            int prtad, int devad, u16 addr, u16 value)
+{
+       struct ef4_nic *efx = netdev_priv(net_dev);
+       struct falcon_nic_data *nic_data = efx->nic_data;
+       ef4_oword_t reg;
+       int rc;
+
+       netif_vdbg(efx, hw, efx->net_dev,
+                  "writing MDIO %d register %d.%d with 0x%04x\n",
+                   prtad, devad, addr, value);
+
+       mutex_lock(&nic_data->mdio_lock);
+
+       /* Check MDIO not currently being accessed */
+       rc = falcon_gmii_wait(efx);
+       if (rc)
+               goto out;
+
+       /* Write the address/ID register */
+       EF4_POPULATE_OWORD_1(reg, FRF_AB_MD_PHY_ADR, addr);
+       ef4_writeo(efx, &reg, FR_AB_MD_PHY_ADR);
+
+       EF4_POPULATE_OWORD_2(reg, FRF_AB_MD_PRT_ADR, prtad,
+                            FRF_AB_MD_DEV_ADR, devad);
+       ef4_writeo(efx, &reg, FR_AB_MD_ID);
+
+       /* Write data */
+       EF4_POPULATE_OWORD_1(reg, FRF_AB_MD_TXD, value);
+       ef4_writeo(efx, &reg, FR_AB_MD_TXD);
+
+       EF4_POPULATE_OWORD_2(reg,
+                            FRF_AB_MD_WRC, 1,
+                            FRF_AB_MD_GC, 0);
+       ef4_writeo(efx, &reg, FR_AB_MD_CS);
+
+       /* Wait for data to be written */
+       rc = falcon_gmii_wait(efx);
+       if (rc) {
+               /* Abort the write operation */
+               EF4_POPULATE_OWORD_2(reg,
+                                    FRF_AB_MD_WRC, 0,
+                                    FRF_AB_MD_GC, 1);
+               ef4_writeo(efx, &reg, FR_AB_MD_CS);
+               udelay(10);
+       }
+
+out:
+       mutex_unlock(&nic_data->mdio_lock);
+       return rc;
+}
+
+/* Read an MDIO register of a PHY connected to Falcon. */
+static int falcon_mdio_read(struct net_device *net_dev,
+                           int prtad, int devad, u16 addr)
+{
+       struct ef4_nic *efx = netdev_priv(net_dev);
+       struct falcon_nic_data *nic_data = efx->nic_data;
+       ef4_oword_t reg;
+       int rc;
+
+       mutex_lock(&nic_data->mdio_lock);
+
+       /* Check MDIO not currently being accessed */
+       rc = falcon_gmii_wait(efx);
+       if (rc)
+               goto out;
+
+       EF4_POPULATE_OWORD_1(reg, FRF_AB_MD_PHY_ADR, addr);
+       ef4_writeo(efx, &reg, FR_AB_MD_PHY_ADR);
+
+       EF4_POPULATE_OWORD_2(reg, FRF_AB_MD_PRT_ADR, prtad,
+                            FRF_AB_MD_DEV_ADR, devad);
+       ef4_writeo(efx, &reg, FR_AB_MD_ID);
+
+       /* Request data to be read */
+       EF4_POPULATE_OWORD_2(reg, FRF_AB_MD_RDC, 1, FRF_AB_MD_GC, 0);
+       ef4_writeo(efx, &reg, FR_AB_MD_CS);
+
+       /* Wait for data to become available */
+       rc = falcon_gmii_wait(efx);
+       if (rc == 0) {
+               ef4_reado(efx, &reg, FR_AB_MD_RXD);
+               rc = EF4_OWORD_FIELD(reg, FRF_AB_MD_RXD);
+               netif_vdbg(efx, hw, efx->net_dev,
+                          "read from MDIO %d register %d.%d, got %04x\n",
+                          prtad, devad, addr, rc);
+       } else {
+               /* Abort the read operation */
+               EF4_POPULATE_OWORD_2(reg,
+                                    FRF_AB_MD_RIC, 0,
+                                    FRF_AB_MD_GC, 1);
+               ef4_writeo(efx, &reg, FR_AB_MD_CS);
+
+               netif_dbg(efx, hw, efx->net_dev,
+                         "read from MDIO %d register %d.%d, got error %d\n",
+                         prtad, devad, addr, rc);
+       }
+
+out:
+       mutex_unlock(&nic_data->mdio_lock);
+       return rc;
+}
+
+/* This call is responsible for hooking in the MAC and PHY operations */
+static int falcon_probe_port(struct ef4_nic *efx)
+{
+       struct falcon_nic_data *nic_data = efx->nic_data;
+       int rc;
+
+       switch (efx->phy_type) {
+       case PHY_TYPE_SFX7101:
+               efx->phy_op = &falcon_sfx7101_phy_ops;
+               break;
+       case PHY_TYPE_QT2022C2:
+       case PHY_TYPE_QT2025C:
+               efx->phy_op = &falcon_qt202x_phy_ops;
+               break;
+       case PHY_TYPE_TXC43128:
+               efx->phy_op = &falcon_txc_phy_ops;
+               break;
+       default:
+               netif_err(efx, probe, efx->net_dev, "Unknown PHY type %d\n",
+                         efx->phy_type);
+               return -ENODEV;
+       }
+
+       /* Fill out MDIO structure and loopback modes */
+       mutex_init(&nic_data->mdio_lock);
+       efx->mdio.mdio_read = falcon_mdio_read;
+       efx->mdio.mdio_write = falcon_mdio_write;
+       rc = efx->phy_op->probe(efx);
+       if (rc != 0)
+               return rc;
+
+       /* Initial assumption */
+       efx->link_state.speed = 10000;
+       efx->link_state.fd = true;
+
+       /* Hardware flow ctrl. FalconA RX FIFO too small for pause generation */
+       if (ef4_nic_rev(efx) >= EF4_REV_FALCON_B0)
+               efx->wanted_fc = EF4_FC_RX | EF4_FC_TX;
+       else
+               efx->wanted_fc = EF4_FC_RX;
+       if (efx->mdio.mmds & MDIO_DEVS_AN)
+               efx->wanted_fc |= EF4_FC_AUTO;
+
+       /* Allocate buffer for stats */
+       rc = ef4_nic_alloc_buffer(efx, &efx->stats_buffer,
+                                 FALCON_MAC_STATS_SIZE, GFP_KERNEL);
+       if (rc)
+               return rc;
+       netif_dbg(efx, probe, efx->net_dev,
+                 "stats buffer at %llx (virt %p phys %llx)\n",
+                 (u64)efx->stats_buffer.dma_addr,
+                 efx->stats_buffer.addr,
+                 (u64)virt_to_phys(efx->stats_buffer.addr));
+
+       return 0;
+}
+
+static void falcon_remove_port(struct ef4_nic *efx)
+{
+       efx->phy_op->remove(efx);
+       ef4_nic_free_buffer(efx, &efx->stats_buffer);
+}
+
+/* Global events are basically PHY events */
+static bool
+falcon_handle_global_event(struct ef4_channel *channel, ef4_qword_t *event)
+{
+       struct ef4_nic *efx = channel->efx;
+       struct falcon_nic_data *nic_data = efx->nic_data;
+
+       if (EF4_QWORD_FIELD(*event, FSF_AB_GLB_EV_G_PHY0_INTR) ||
+           EF4_QWORD_FIELD(*event, FSF_AB_GLB_EV_XG_PHY0_INTR) ||
+           EF4_QWORD_FIELD(*event, FSF_AB_GLB_EV_XFP_PHY0_INTR))
+               /* Ignored */
+               return true;
+
+       if ((ef4_nic_rev(efx) == EF4_REV_FALCON_B0) &&
+           EF4_QWORD_FIELD(*event, FSF_BB_GLB_EV_XG_MGT_INTR)) {
+               nic_data->xmac_poll_required = true;
+               return true;
+       }
+
+       if (ef4_nic_rev(efx) <= EF4_REV_FALCON_A1 ?
+           EF4_QWORD_FIELD(*event, FSF_AA_GLB_EV_RX_RECOVERY) :
+           EF4_QWORD_FIELD(*event, FSF_BB_GLB_EV_RX_RECOVERY)) {
+               netif_err(efx, rx_err, efx->net_dev,
+                         "channel %d seen global RX_RESET event. Resetting.\n",
+                         channel->channel);
+
+               atomic_inc(&efx->rx_reset);
+               ef4_schedule_reset(efx, EF4_WORKAROUND_6555(efx) ?
+                                  RESET_TYPE_RX_RECOVERY : RESET_TYPE_DISABLE);
+               return true;
+       }
+
+       return false;
+}
+
+/**************************************************************************
+ *
+ * Falcon test code
+ *
+ **************************************************************************/
+
+static int
+falcon_read_nvram(struct ef4_nic *efx, struct falcon_nvconfig *nvconfig_out)
+{
+       struct falcon_nic_data *nic_data = efx->nic_data;
+       struct falcon_nvconfig *nvconfig;
+       struct falcon_spi_device *spi;
+       void *region;
+       int rc, magic_num, struct_ver;
+       __le16 *word, *limit;
+       u32 csum;
+
+       if (falcon_spi_present(&nic_data->spi_flash))
+               spi = &nic_data->spi_flash;
+       else if (falcon_spi_present(&nic_data->spi_eeprom))
+               spi = &nic_data->spi_eeprom;
+       else
+               return -EINVAL;
+
+       region = kmalloc(FALCON_NVCONFIG_END, GFP_KERNEL);
+       if (!region)
+               return -ENOMEM;
+       nvconfig = region + FALCON_NVCONFIG_OFFSET;
+
+       mutex_lock(&nic_data->spi_lock);
+       rc = falcon_spi_read(efx, spi, 0, FALCON_NVCONFIG_END, NULL, region);
+       mutex_unlock(&nic_data->spi_lock);
+       if (rc) {
+               netif_err(efx, hw, efx->net_dev, "Failed to read %s\n",
+                         falcon_spi_present(&nic_data->spi_flash) ?
+                         "flash" : "EEPROM");
+               rc = -EIO;
+               goto out;
+       }
+
+       magic_num = le16_to_cpu(nvconfig->board_magic_num);
+       struct_ver = le16_to_cpu(nvconfig->board_struct_ver);
+
+       rc = -EINVAL;
+       if (magic_num != FALCON_NVCONFIG_BOARD_MAGIC_NUM) {
+               netif_err(efx, hw, efx->net_dev,
+                         "NVRAM bad magic 0x%x\n", magic_num);
+               goto out;
+       }
+       if (struct_ver < 2) {
+               netif_err(efx, hw, efx->net_dev,
+                         "NVRAM has ancient version 0x%x\n", struct_ver);
+               goto out;
+       } else if (struct_ver < 4) {
+               word = &nvconfig->board_magic_num;
+               limit = (__le16 *) (nvconfig + 1);
+       } else {
+               word = region;
+               limit = region + FALCON_NVCONFIG_END;
+       }
+       for (csum = 0; word < limit; ++word)
+               csum += le16_to_cpu(*word);
+
+       if (~csum & 0xffff) {
+               netif_err(efx, hw, efx->net_dev,
+                         "NVRAM has incorrect checksum\n");
+               goto out;
+       }
+
+       rc = 0;
+       if (nvconfig_out)
+               memcpy(nvconfig_out, nvconfig, sizeof(*nvconfig));
+
+ out:
+       kfree(region);
+       return rc;
+}
+
+static int falcon_test_nvram(struct ef4_nic *efx)
+{
+       return falcon_read_nvram(efx, NULL);
+}
+
+static const struct ef4_farch_register_test falcon_b0_register_tests[] = {
+       { FR_AZ_ADR_REGION,
+         EF4_OWORD32(0x0003FFFF, 0x0003FFFF, 0x0003FFFF, 0x0003FFFF) },
+       { FR_AZ_RX_CFG,
+         EF4_OWORD32(0xFFFFFFFE, 0x00017FFF, 0x00000000, 0x00000000) },
+       { FR_AZ_TX_CFG,
+         EF4_OWORD32(0x7FFF0037, 0x00000000, 0x00000000, 0x00000000) },
+       { FR_AZ_TX_RESERVED,
+         EF4_OWORD32(0xFFFEFE80, 0x1FFFFFFF, 0x020000FE, 0x007FFFFF) },
+       { FR_AB_MAC_CTRL,
+         EF4_OWORD32(0xFFFF0000, 0x00000000, 0x00000000, 0x00000000) },
+       { FR_AZ_SRM_TX_DC_CFG,
+         EF4_OWORD32(0x001FFFFF, 0x00000000, 0x00000000, 0x00000000) },
+       { FR_AZ_RX_DC_CFG,
+         EF4_OWORD32(0x0000000F, 0x00000000, 0x00000000, 0x00000000) },
+       { FR_AZ_RX_DC_PF_WM,
+         EF4_OWORD32(0x000003FF, 0x00000000, 0x00000000, 0x00000000) },
+       { FR_BZ_DP_CTRL,
+         EF4_OWORD32(0x00000FFF, 0x00000000, 0x00000000, 0x00000000) },
+       { FR_AB_GM_CFG2,
+         EF4_OWORD32(0x00007337, 0x00000000, 0x00000000, 0x00000000) },
+       { FR_AB_GMF_CFG0,
+         EF4_OWORD32(0x00001F1F, 0x00000000, 0x00000000, 0x00000000) },
+       { FR_AB_XM_GLB_CFG,
+         EF4_OWORD32(0x00000C68, 0x00000000, 0x00000000, 0x00000000) },
+       { FR_AB_XM_TX_CFG,
+         EF4_OWORD32(0x00080164, 0x00000000, 0x00000000, 0x00000000) },
+       { FR_AB_XM_RX_CFG,
+         EF4_OWORD32(0x07100A0C, 0x00000000, 0x00000000, 0x00000000) },
+       { FR_AB_XM_RX_PARAM,
+         EF4_OWORD32(0x00001FF8, 0x00000000, 0x00000000, 0x00000000) },
+       { FR_AB_XM_FC,
+         EF4_OWORD32(0xFFFF0001, 0x00000000, 0x00000000, 0x00000000) },
+       { FR_AB_XM_ADR_LO,
+         EF4_OWORD32(0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000) },
+       { FR_AB_XX_SD_CTL,
+         EF4_OWORD32(0x0003FF0F, 0x00000000, 0x00000000, 0x00000000) },
+};
+
+static int
+falcon_b0_test_chip(struct ef4_nic *efx, struct ef4_self_tests *tests)
+{
+       enum reset_type reset_method = RESET_TYPE_INVISIBLE;
+       int rc, rc2;
+
+       mutex_lock(&efx->mac_lock);
+       if (efx->loopback_modes) {
+               /* We need the 312 clock from the PHY to test the XMAC
+                * registers, so move into XGMII loopback if available */
+               if (efx->loopback_modes & (1 << LOOPBACK_XGMII))
+                       efx->loopback_mode = LOOPBACK_XGMII;
+               else
+                       efx->loopback_mode = __ffs(efx->loopback_modes);
+       }
+       __ef4_reconfigure_port(efx);
+       mutex_unlock(&efx->mac_lock);
+
+       ef4_reset_down(efx, reset_method);
+
+       tests->registers =
+               ef4_farch_test_registers(efx, falcon_b0_register_tests,
+                                        ARRAY_SIZE(falcon_b0_register_tests))
+               ? -1 : 1;
+
+       rc = falcon_reset_hw(efx, reset_method);
+       rc2 = ef4_reset_up(efx, reset_method, rc == 0);
+       return rc ? rc : rc2;
+}
+
+/**************************************************************************
+ *
+ * Device reset
+ *
+ **************************************************************************
+ */
+
+static enum reset_type falcon_map_reset_reason(enum reset_type reason)
+{
+       switch (reason) {
+       case RESET_TYPE_RX_RECOVERY:
+       case RESET_TYPE_DMA_ERROR:
+       case RESET_TYPE_TX_SKIP:
+               /* These can occasionally occur due to hardware bugs.
+                * We try to reset without disrupting the link.
+                */
+               return RESET_TYPE_INVISIBLE;
+       default:
+               return RESET_TYPE_ALL;
+       }
+}
+
+static int falcon_map_reset_flags(u32 *flags)
+{
+       enum {
+               FALCON_RESET_INVISIBLE = (ETH_RESET_DMA | ETH_RESET_FILTER |
+                                         ETH_RESET_OFFLOAD | ETH_RESET_MAC),
+               FALCON_RESET_ALL = FALCON_RESET_INVISIBLE | ETH_RESET_PHY,
+               FALCON_RESET_WORLD = FALCON_RESET_ALL | ETH_RESET_IRQ,
+       };
+
+       if ((*flags & FALCON_RESET_WORLD) == FALCON_RESET_WORLD) {
+               *flags &= ~FALCON_RESET_WORLD;
+               return RESET_TYPE_WORLD;
+       }
+
+       if ((*flags & FALCON_RESET_ALL) == FALCON_RESET_ALL) {
+               *flags &= ~FALCON_RESET_ALL;
+               return RESET_TYPE_ALL;
+       }
+
+       if ((*flags & FALCON_RESET_INVISIBLE) == FALCON_RESET_INVISIBLE) {
+               *flags &= ~FALCON_RESET_INVISIBLE;
+               return RESET_TYPE_INVISIBLE;
+       }
+
+       return -EINVAL;
+}
+
+/* Resets NIC to known state.  This routine must be called in process
+ * context and is allowed to sleep. */
+static int __falcon_reset_hw(struct ef4_nic *efx, enum reset_type method)
+{
+       struct falcon_nic_data *nic_data = efx->nic_data;
+       ef4_oword_t glb_ctl_reg_ker;
+       int rc;
+
+       netif_dbg(efx, hw, efx->net_dev, "performing %s hardware reset\n",
+                 RESET_TYPE(method));
+
+       /* Initiate device reset */
+       if (method == RESET_TYPE_WORLD) {
+               rc = pci_save_state(efx->pci_dev);
+               if (rc) {
+                       netif_err(efx, drv, efx->net_dev,
+                                 "failed to backup PCI state of primary "
+                                 "function prior to hardware reset\n");
+                       goto fail1;
+               }
+               if (ef4_nic_is_dual_func(efx)) {
+                       rc = pci_save_state(nic_data->pci_dev2);
+                       if (rc) {
+                               netif_err(efx, drv, efx->net_dev,
+                                         "failed to backup PCI state of "
+                                         "secondary function prior to "
+                                         "hardware reset\n");
+                               goto fail2;
+                       }
+               }
+
+               EF4_POPULATE_OWORD_2(glb_ctl_reg_ker,
+                                    FRF_AB_EXT_PHY_RST_DUR,
+                                    FFE_AB_EXT_PHY_RST_DUR_10240US,
+                                    FRF_AB_SWRST, 1);
+       } else {
+               EF4_POPULATE_OWORD_7(glb_ctl_reg_ker,
+                                    /* exclude PHY from "invisible" reset */
+                                    FRF_AB_EXT_PHY_RST_CTL,
+                                    method == RESET_TYPE_INVISIBLE,
+                                    /* exclude EEPROM/flash and PCIe */
+                                    FRF_AB_PCIE_CORE_RST_CTL, 1,
+                                    FRF_AB_PCIE_NSTKY_RST_CTL, 1,
+                                    FRF_AB_PCIE_SD_RST_CTL, 1,
+                                    FRF_AB_EE_RST_CTL, 1,
+                                    FRF_AB_EXT_PHY_RST_DUR,
+                                    FFE_AB_EXT_PHY_RST_DUR_10240US,
+                                    FRF_AB_SWRST, 1);
+       }
+       ef4_writeo(efx, &glb_ctl_reg_ker, FR_AB_GLB_CTL);
+
+       netif_dbg(efx, hw, efx->net_dev, "waiting for hardware reset\n");
+       schedule_timeout_uninterruptible(HZ / 20);
+
+       /* Restore PCI configuration if needed */
+       if (method == RESET_TYPE_WORLD) {
+               if (ef4_nic_is_dual_func(efx))
+                       pci_restore_state(nic_data->pci_dev2);
+               pci_restore_state(efx->pci_dev);
+               netif_dbg(efx, drv, efx->net_dev,
+                         "successfully restored PCI config\n");
+       }
+
+       /* Assert that reset complete */
+       ef4_reado(efx, &glb_ctl_reg_ker, FR_AB_GLB_CTL);
+       if (EF4_OWORD_FIELD(glb_ctl_reg_ker, FRF_AB_SWRST) != 0) {
+               rc = -ETIMEDOUT;
+               netif_err(efx, hw, efx->net_dev,
+                         "timed out waiting for hardware reset\n");
+               goto fail3;
+       }
+       netif_dbg(efx, hw, efx->net_dev, "hardware reset complete\n");
+
+       return 0;
+
+       /* pci_save_state() and pci_restore_state() MUST be called in pairs */
+fail2:
+       pci_restore_state(efx->pci_dev);
+fail1:
+fail3:
+       return rc;
+}
+
+static int falcon_reset_hw(struct ef4_nic *efx, enum reset_type method)
+{
+       struct falcon_nic_data *nic_data = efx->nic_data;
+       int rc;
+
+       mutex_lock(&nic_data->spi_lock);
+       rc = __falcon_reset_hw(efx, method);
+       mutex_unlock(&nic_data->spi_lock);
+
+       return rc;
+}
+
+static void falcon_monitor(struct ef4_nic *efx)
+{
+       bool link_changed;
+       int rc;
+
+       BUG_ON(!mutex_is_locked(&efx->mac_lock));
+
+       rc = falcon_board(efx)->type->monitor(efx);
+       if (rc) {
+               netif_err(efx, hw, efx->net_dev,
+                         "Board sensor %s; shutting down PHY\n",
+                         (rc == -ERANGE) ? "reported fault" : "failed");
+               efx->phy_mode |= PHY_MODE_LOW_POWER;
+               rc = __ef4_reconfigure_port(efx);
+               WARN_ON(rc);
+       }
+
+       if (LOOPBACK_INTERNAL(efx))
+               link_changed = falcon_loopback_link_poll(efx);
+       else
+               link_changed = efx->phy_op->poll(efx);
+
+       if (link_changed) {
+               falcon_stop_nic_stats(efx);
+               falcon_deconfigure_mac_wrapper(efx);
+
+               falcon_reset_macs(efx);
+               rc = falcon_reconfigure_xmac(efx);
+               BUG_ON(rc);
+
+               falcon_start_nic_stats(efx);
+
+               ef4_link_status_changed(efx);
+       }
+
+       falcon_poll_xmac(efx);
+}
+
+/* Zeroes out the SRAM contents.  This routine must be called in
+ * process context and is allowed to sleep.
+ */
+static int falcon_reset_sram(struct ef4_nic *efx)
+{
+       ef4_oword_t srm_cfg_reg_ker, gpio_cfg_reg_ker;
+       int count;
+
+       /* Set the SRAM wake/sleep GPIO appropriately. */
+       ef4_reado(efx, &gpio_cfg_reg_ker, FR_AB_GPIO_CTL);
+       EF4_SET_OWORD_FIELD(gpio_cfg_reg_ker, FRF_AB_GPIO1_OEN, 1);
+       EF4_SET_OWORD_FIELD(gpio_cfg_reg_ker, FRF_AB_GPIO1_OUT, 1);
+       ef4_writeo(efx, &gpio_cfg_reg_ker, FR_AB_GPIO_CTL);
+
+       /* Initiate SRAM reset */
+       EF4_POPULATE_OWORD_2(srm_cfg_reg_ker,
+                            FRF_AZ_SRM_INIT_EN, 1,
+                            FRF_AZ_SRM_NB_SZ, 0);
+       ef4_writeo(efx, &srm_cfg_reg_ker, FR_AZ_SRM_CFG);
+
+       /* Wait for SRAM reset to complete */
+       count = 0;
+       do {
+               netif_dbg(efx, hw, efx->net_dev,
+                         "waiting for SRAM reset (attempt %d)...\n", count);
+
+               /* SRAM reset is slow; expect around 16ms */
+               schedule_timeout_uninterruptible(HZ / 50);
+
+               /* Check for reset complete */
+               ef4_reado(efx, &srm_cfg_reg_ker, FR_AZ_SRM_CFG);
+               if (!EF4_OWORD_FIELD(srm_cfg_reg_ker, FRF_AZ_SRM_INIT_EN)) {
+                       netif_dbg(efx, hw, efx->net_dev,
+                                 "SRAM reset complete\n");
+
+                       return 0;
+               }
+       } while (++count < 20); /* wait up to 0.4 sec */
+
+       netif_err(efx, hw, efx->net_dev, "timed out waiting for SRAM reset\n");
+       return -ETIMEDOUT;
+}
+
+static void falcon_spi_device_init(struct ef4_nic *efx,
+                                 struct falcon_spi_device *spi_device,
+                                 unsigned int device_id, u32 device_type)
+{
+       if (device_type != 0) {
+               spi_device->device_id = device_id;
+               spi_device->size =
+                       1 << SPI_DEV_TYPE_FIELD(device_type, SPI_DEV_TYPE_SIZE);
+               spi_device->addr_len =
+                       SPI_DEV_TYPE_FIELD(device_type, SPI_DEV_TYPE_ADDR_LEN);
+               spi_device->munge_address = (spi_device->size == 1 << 9 &&
+                                            spi_device->addr_len == 1);
+               spi_device->erase_command =
+                       SPI_DEV_TYPE_FIELD(device_type, SPI_DEV_TYPE_ERASE_CMD);
+               spi_device->erase_size =
+                       1 << SPI_DEV_TYPE_FIELD(device_type,
+                                               SPI_DEV_TYPE_ERASE_SIZE);
+               spi_device->block_size =
+                       1 << SPI_DEV_TYPE_FIELD(device_type,
+                                               SPI_DEV_TYPE_BLOCK_SIZE);
+       } else {
+               spi_device->size = 0;
+       }
+}
+
+/* Extract non-volatile configuration */
+static int falcon_probe_nvconfig(struct ef4_nic *efx)
+{
+       struct falcon_nic_data *nic_data = efx->nic_data;
+       struct falcon_nvconfig *nvconfig;
+       int rc;
+
+       nvconfig = kmalloc(sizeof(*nvconfig), GFP_KERNEL);
+       if (!nvconfig)
+               return -ENOMEM;
+
+       rc = falcon_read_nvram(efx, nvconfig);
+       if (rc)
+               goto out;
+
+       efx->phy_type = nvconfig->board_v2.port0_phy_type;
+       efx->mdio.prtad = nvconfig->board_v2.port0_phy_addr;
+
+       if (le16_to_cpu(nvconfig->board_struct_ver) >= 3) {
+               falcon_spi_device_init(
+                       efx, &nic_data->spi_flash, FFE_AB_SPI_DEVICE_FLASH,
+                       le32_to_cpu(nvconfig->board_v3
+                                   .spi_device_type[FFE_AB_SPI_DEVICE_FLASH]));
+               falcon_spi_device_init(
+                       efx, &nic_data->spi_eeprom, FFE_AB_SPI_DEVICE_EEPROM,
+                       le32_to_cpu(nvconfig->board_v3
+                                   .spi_device_type[FFE_AB_SPI_DEVICE_EEPROM]));
+       }
+
+       /* Read the MAC addresses */
+       ether_addr_copy(efx->net_dev->perm_addr, nvconfig->mac_address[0]);
+
+       netif_dbg(efx, probe, efx->net_dev, "PHY is %d phy_id %d\n",
+                 efx->phy_type, efx->mdio.prtad);
+
+       rc = falcon_probe_board(efx,
+                               le16_to_cpu(nvconfig->board_v2.board_revision));
+out:
+       kfree(nvconfig);
+       return rc;
+}
+
+static int falcon_dimension_resources(struct ef4_nic *efx)
+{
+       efx->rx_dc_base = 0x20000;
+       efx->tx_dc_base = 0x26000;
+       return 0;
+}
+
+/* Probe all SPI devices on the NIC */
+static void falcon_probe_spi_devices(struct ef4_nic *efx)
+{
+       struct falcon_nic_data *nic_data = efx->nic_data;
+       ef4_oword_t nic_stat, gpio_ctl, ee_vpd_cfg;
+       int boot_dev;
+
+       ef4_reado(efx, &gpio_ctl, FR_AB_GPIO_CTL);
+       ef4_reado(efx, &nic_stat, FR_AB_NIC_STAT);
+       ef4_reado(efx, &ee_vpd_cfg, FR_AB_EE_VPD_CFG0);
+
+       if (EF4_OWORD_FIELD(gpio_ctl, FRF_AB_GPIO3_PWRUP_VALUE)) {
+               boot_dev = (EF4_OWORD_FIELD(nic_stat, FRF_AB_SF_PRST) ?
+                           FFE_AB_SPI_DEVICE_FLASH : FFE_AB_SPI_DEVICE_EEPROM);
+               netif_dbg(efx, probe, efx->net_dev, "Booted from %s\n",
+                         boot_dev == FFE_AB_SPI_DEVICE_FLASH ?
+                         "flash" : "EEPROM");
+       } else {
+               /* Disable VPD and set clock dividers to safe
+                * values for initial programming. */
+               boot_dev = -1;
+               netif_dbg(efx, probe, efx->net_dev,
+                         "Booted from internal ASIC settings;"
+                         " setting SPI config\n");
+               EF4_POPULATE_OWORD_3(ee_vpd_cfg, FRF_AB_EE_VPD_EN, 0,
+                                    /* 125 MHz / 7 ~= 20 MHz */
+                                    FRF_AB_EE_SF_CLOCK_DIV, 7,
+                                    /* 125 MHz / 63 ~= 2 MHz */
+                                    FRF_AB_EE_EE_CLOCK_DIV, 63);
+               ef4_writeo(efx, &ee_vpd_cfg, FR_AB_EE_VPD_CFG0);
+       }
+
+       mutex_init(&nic_data->spi_lock);
+
+       if (boot_dev == FFE_AB_SPI_DEVICE_FLASH)
+               falcon_spi_device_init(efx, &nic_data->spi_flash,
+                                      FFE_AB_SPI_DEVICE_FLASH,
+                                      default_flash_type);
+       if (boot_dev == FFE_AB_SPI_DEVICE_EEPROM)
+               falcon_spi_device_init(efx, &nic_data->spi_eeprom,
+                                      FFE_AB_SPI_DEVICE_EEPROM,
+                                      large_eeprom_type);
+}
+
+static unsigned int falcon_a1_mem_map_size(struct ef4_nic *efx)
+{
+       return 0x20000;
+}
+
+static unsigned int falcon_b0_mem_map_size(struct ef4_nic *efx)
+{
+       /* Map everything up to and including the RSS indirection table.
+        * The PCI core takes care of mapping the MSI-X tables.
+        */
+       return FR_BZ_RX_INDIRECTION_TBL +
+               FR_BZ_RX_INDIRECTION_TBL_STEP * FR_BZ_RX_INDIRECTION_TBL_ROWS;
+}
+
+static int falcon_probe_nic(struct ef4_nic *efx)
+{
+       struct falcon_nic_data *nic_data;
+       struct falcon_board *board;
+       int rc;
+
+       efx->primary = efx; /* only one usable function per controller */
+
+       /* Allocate storage for hardware specific data */
+       nic_data = kzalloc(sizeof(*nic_data), GFP_KERNEL);
+       if (!nic_data)
+               return -ENOMEM;
+       efx->nic_data = nic_data;
+
+       rc = -ENODEV;
+
+       if (ef4_farch_fpga_ver(efx) != 0) {
+               netif_err(efx, probe, efx->net_dev,
+                         "Falcon FPGA not supported\n");
+               goto fail1;
+       }
+
+       if (ef4_nic_rev(efx) <= EF4_REV_FALCON_A1) {
+               ef4_oword_t nic_stat;
+               struct pci_dev *dev;
+               u8 pci_rev = efx->pci_dev->revision;
+
+               if ((pci_rev == 0xff) || (pci_rev == 0)) {
+                       netif_err(efx, probe, efx->net_dev,
+                                 "Falcon rev A0 not supported\n");
+                       goto fail1;
+               }
+               ef4_reado(efx, &nic_stat, FR_AB_NIC_STAT);
+               if (EF4_OWORD_FIELD(nic_stat, FRF_AB_STRAP_10G) == 0) {
+                       netif_err(efx, probe, efx->net_dev,
+                                 "Falcon rev A1 1G not supported\n");
+                       goto fail1;
+               }
+               if (EF4_OWORD_FIELD(nic_stat, FRF_AA_STRAP_PCIE) == 0) {
+                       netif_err(efx, probe, efx->net_dev,
+                                 "Falcon rev A1 PCI-X not supported\n");
+                       goto fail1;
+               }
+
+               dev = pci_dev_get(efx->pci_dev);
+               while ((dev = pci_get_device(PCI_VENDOR_ID_SOLARFLARE,
+                                            PCI_DEVICE_ID_SOLARFLARE_SFC4000A_1,
+                                            dev))) {
+                       if (dev->bus == efx->pci_dev->bus &&
+                           dev->devfn == efx->pci_dev->devfn + 1) {
+                               nic_data->pci_dev2 = dev;
+                               break;
+                       }
+               }
+               if (!nic_data->pci_dev2) {
+                       netif_err(efx, probe, efx->net_dev,
+                                 "failed to find secondary function\n");
+                       rc = -ENODEV;
+                       goto fail2;
+               }
+       }
+
+       /* Now we can reset the NIC */
+       rc = __falcon_reset_hw(efx, RESET_TYPE_ALL);
+       if (rc) {
+               netif_err(efx, probe, efx->net_dev, "failed to reset NIC\n");
+               goto fail3;
+       }
+
+       /* Allocate memory for INT_KER */
+       rc = ef4_nic_alloc_buffer(efx, &efx->irq_status, sizeof(ef4_oword_t),
+                                 GFP_KERNEL);
+       if (rc)
+               goto fail4;
+       BUG_ON(efx->irq_status.dma_addr & 0x0f);
+
+       netif_dbg(efx, probe, efx->net_dev,
+                 "INT_KER at %llx (virt %p phys %llx)\n",
+                 (u64)efx->irq_status.dma_addr,
+                 efx->irq_status.addr,
+                 (u64)virt_to_phys(efx->irq_status.addr));
+
+       falcon_probe_spi_devices(efx);
+
+       /* Read in the non-volatile configuration */
+       rc = falcon_probe_nvconfig(efx);
+       if (rc) {
+               if (rc == -EINVAL)
+                       netif_err(efx, probe, efx->net_dev, "NVRAM is invalid\n");
+               goto fail5;
+       }
+
+       efx->max_channels = (ef4_nic_rev(efx) <= EF4_REV_FALCON_A1 ? 4 :
+                            EF4_MAX_CHANNELS);
+       efx->max_tx_channels = efx->max_channels;
+       efx->timer_quantum_ns = 4968; /* 621 cycles */
+       efx->timer_max_ns = efx->type->timer_period_max *
+                           efx->timer_quantum_ns;
+
+       /* Initialise I2C adapter */
+       board = falcon_board(efx);
+       board->i2c_adap.owner = THIS_MODULE;
+       board->i2c_data = falcon_i2c_bit_operations;
+       board->i2c_data.data = efx;
+       board->i2c_adap.algo_data = &board->i2c_data;
+       board->i2c_adap.dev.parent = &efx->pci_dev->dev;
+       strlcpy(board->i2c_adap.name, "SFC4000 GPIO",
+               sizeof(board->i2c_adap.name));
+       rc = i2c_bit_add_bus(&board->i2c_adap);
+       if (rc)
+               goto fail5;
+
+       rc = falcon_board(efx)->type->init(efx);
+       if (rc) {
+               netif_err(efx, probe, efx->net_dev,
+                         "failed to initialise board\n");
+               goto fail6;
+       }
+
+       nic_data->stats_disable_count = 1;
+       setup_timer(&nic_data->stats_timer, &falcon_stats_timer_func,
+                   (unsigned long)efx);
+
+       return 0;
+
+ fail6:
+       i2c_del_adapter(&board->i2c_adap);
+       memset(&board->i2c_adap, 0, sizeof(board->i2c_adap));
+ fail5:
+       ef4_nic_free_buffer(efx, &efx->irq_status);
+ fail4:
+ fail3:
+       if (nic_data->pci_dev2) {
+               pci_dev_put(nic_data->pci_dev2);
+               nic_data->pci_dev2 = NULL;
+       }
+ fail2:
+ fail1:
+       kfree(efx->nic_data);
+       return rc;
+}
+
+static void falcon_init_rx_cfg(struct ef4_nic *efx)
+{
+       /* RX control FIFO thresholds (32 entries) */
+       const unsigned ctrl_xon_thr = 20;
+       const unsigned ctrl_xoff_thr = 25;
+       ef4_oword_t reg;
+
+       ef4_reado(efx, &reg, FR_AZ_RX_CFG);
+       if (ef4_nic_rev(efx) <= EF4_REV_FALCON_A1) {
+               /* Data FIFO size is 5.5K.  The RX DMA engine only
+                * supports scattering for user-mode queues, but will
+                * split DMA writes at intervals of RX_USR_BUF_SIZE
+                * (32-byte units) even for kernel-mode queues.  We
+                * set it to be so large that that never happens.
+                */
+               EF4_SET_OWORD_FIELD(reg, FRF_AA_RX_DESC_PUSH_EN, 0);
+               EF4_SET_OWORD_FIELD(reg, FRF_AA_RX_USR_BUF_SIZE,
+                                   (3 * 4096) >> 5);
+               EF4_SET_OWORD_FIELD(reg, FRF_AA_RX_XON_MAC_TH, 512 >> 8);
+               EF4_SET_OWORD_FIELD(reg, FRF_AA_RX_XOFF_MAC_TH, 2048 >> 8);
+               EF4_SET_OWORD_FIELD(reg, FRF_AA_RX_XON_TX_TH, ctrl_xon_thr);
+               EF4_SET_OWORD_FIELD(reg, FRF_AA_RX_XOFF_TX_TH, ctrl_xoff_thr);
+       } else {
+               /* Data FIFO size is 80K; register fields moved */
+               EF4_SET_OWORD_FIELD(reg, FRF_BZ_RX_DESC_PUSH_EN, 0);
+               EF4_SET_OWORD_FIELD(reg, FRF_BZ_RX_USR_BUF_SIZE,
+                                   EF4_RX_USR_BUF_SIZE >> 5);
+               /* Send XON and XOFF at ~3 * max MTU away from empty/full */
+               EF4_SET_OWORD_FIELD(reg, FRF_BZ_RX_XON_MAC_TH, 27648 >> 8);
+               EF4_SET_OWORD_FIELD(reg, FRF_BZ_RX_XOFF_MAC_TH, 54272 >> 8);
+               EF4_SET_OWORD_FIELD(reg, FRF_BZ_RX_XON_TX_TH, ctrl_xon_thr);
+               EF4_SET_OWORD_FIELD(reg, FRF_BZ_RX_XOFF_TX_TH, ctrl_xoff_thr);
+               EF4_SET_OWORD_FIELD(reg, FRF_BZ_RX_INGR_EN, 1);
+
+               /* Enable hash insertion. This is broken for the
+                * 'Falcon' hash so also select Toeplitz TCP/IPv4 and
+                * IPv4 hashes. */
+               EF4_SET_OWORD_FIELD(reg, FRF_BZ_RX_HASH_INSRT_HDR, 1);
+               EF4_SET_OWORD_FIELD(reg, FRF_BZ_RX_HASH_ALG, 1);
+               EF4_SET_OWORD_FIELD(reg, FRF_BZ_RX_IP_HASH, 1);
+       }
+       /* Always enable XOFF signal from RX FIFO.  We enable
+        * or disable transmission of pause frames at the MAC. */
+       EF4_SET_OWORD_FIELD(reg, FRF_AZ_RX_XOFF_MAC_EN, 1);
+       ef4_writeo(efx, &reg, FR_AZ_RX_CFG);
+}
+
+/* This call performs hardware-specific global initialisation, such as
+ * defining the descriptor cache sizes and number of RSS channels.
+ * It does not set up any buffers, descriptor rings or event queues.
+ */
+static int falcon_init_nic(struct ef4_nic *efx)
+{
+       ef4_oword_t temp;
+       int rc;
+
+       /* Use on-chip SRAM */
+       ef4_reado(efx, &temp, FR_AB_NIC_STAT);
+       EF4_SET_OWORD_FIELD(temp, FRF_AB_ONCHIP_SRAM, 1);
+       ef4_writeo(efx, &temp, FR_AB_NIC_STAT);
+
+       rc = falcon_reset_sram(efx);
+       if (rc)
+               return rc;
+
+       /* Clear the parity enables on the TX data fifos as
+        * they produce false parity errors because of timing issues
+        */
+       if (EF4_WORKAROUND_5129(efx)) {
+               ef4_reado(efx, &temp, FR_AZ_CSR_SPARE);
+               EF4_SET_OWORD_FIELD(temp, FRF_AB_MEM_PERR_EN_TX_DATA, 0);
+               ef4_writeo(efx, &temp, FR_AZ_CSR_SPARE);
+       }
+
+       if (EF4_WORKAROUND_7244(efx)) {
+               ef4_reado(efx, &temp, FR_BZ_RX_FILTER_CTL);
+               EF4_SET_OWORD_FIELD(temp, FRF_BZ_UDP_FULL_SRCH_LIMIT, 8);
+               EF4_SET_OWORD_FIELD(temp, FRF_BZ_UDP_WILD_SRCH_LIMIT, 8);
+               EF4_SET_OWORD_FIELD(temp, FRF_BZ_TCP_FULL_SRCH_LIMIT, 8);
+               EF4_SET_OWORD_FIELD(temp, FRF_BZ_TCP_WILD_SRCH_LIMIT, 8);
+               ef4_writeo(efx, &temp, FR_BZ_RX_FILTER_CTL);
+       }
+
+       /* XXX This is documented only for Falcon A0/A1 */
+       /* Setup RX.  Wait for descriptor is broken and must
+        * be disabled.  RXDP recovery shouldn't be needed, but is.
+        */
+       ef4_reado(efx, &temp, FR_AA_RX_SELF_RST);
+       EF4_SET_OWORD_FIELD(temp, FRF_AA_RX_NODESC_WAIT_DIS, 1);
+       EF4_SET_OWORD_FIELD(temp, FRF_AA_RX_SELF_RST_EN, 1);
+       if (EF4_WORKAROUND_5583(efx))
+               EF4_SET_OWORD_FIELD(temp, FRF_AA_RX_ISCSI_DIS, 1);
+       ef4_writeo(efx, &temp, FR_AA_RX_SELF_RST);
+
+       /* Do not enable TX_NO_EOP_DISC_EN, since it limits packets to 16
+        * descriptors (which is bad).
+        */
+       ef4_reado(efx, &temp, FR_AZ_TX_CFG);
+       EF4_SET_OWORD_FIELD(temp, FRF_AZ_TX_NO_EOP_DISC_EN, 0);
+       ef4_writeo(efx, &temp, FR_AZ_TX_CFG);
+
+       falcon_init_rx_cfg(efx);
+
+       if (ef4_nic_rev(efx) >= EF4_REV_FALCON_B0) {
+               falcon_b0_rx_push_rss_config(efx, false, efx->rx_indir_table);
+
+               /* Set destination of both TX and RX Flush events */
+               EF4_POPULATE_OWORD_1(temp, FRF_BZ_FLS_EVQ_ID, 0);
+               ef4_writeo(efx, &temp, FR_BZ_DP_CTRL);
+       }
+
+       ef4_farch_init_common(efx);
+
+       return 0;
+}
+
+static void falcon_remove_nic(struct ef4_nic *efx)
+{
+       struct falcon_nic_data *nic_data = efx->nic_data;
+       struct falcon_board *board = falcon_board(efx);
+
+       board->type->fini(efx);
+
+       /* Remove I2C adapter and clear it in preparation for a retry */
+       i2c_del_adapter(&board->i2c_adap);
+       memset(&board->i2c_adap, 0, sizeof(board->i2c_adap));
+
+       ef4_nic_free_buffer(efx, &efx->irq_status);
+
+       __falcon_reset_hw(efx, RESET_TYPE_ALL);
+
+       /* Release the second function after the reset */
+       if (nic_data->pci_dev2) {
+               pci_dev_put(nic_data->pci_dev2);
+               nic_data->pci_dev2 = NULL;
+       }
+
+       /* Tear down the private nic state */
+       kfree(efx->nic_data);
+       efx->nic_data = NULL;
+}
+
+static size_t falcon_describe_nic_stats(struct ef4_nic *efx, u8 *names)
+{
+       return ef4_nic_describe_stats(falcon_stat_desc, FALCON_STAT_COUNT,
+                                     falcon_stat_mask, names);
+}
+
+static size_t falcon_update_nic_stats(struct ef4_nic *efx, u64 *full_stats,
+                                     struct rtnl_link_stats64 *core_stats)
+{
+       struct falcon_nic_data *nic_data = efx->nic_data;
+       u64 *stats = nic_data->stats;
+       ef4_oword_t cnt;
+
+       if (!nic_data->stats_disable_count) {
+               ef4_reado(efx, &cnt, FR_AZ_RX_NODESC_DROP);
+               stats[FALCON_STAT_rx_nodesc_drop_cnt] +=
+                       EF4_OWORD_FIELD(cnt, FRF_AB_RX_NODESC_DROP_CNT);
+
+               if (nic_data->stats_pending &&
+                   FALCON_XMAC_STATS_DMA_FLAG(efx)) {
+                       nic_data->stats_pending = false;
+                       rmb(); /* read the done flag before the stats */
+                       ef4_nic_update_stats(
+                               falcon_stat_desc, FALCON_STAT_COUNT,
+                               falcon_stat_mask,
+                               stats, efx->stats_buffer.addr, true);
+               }
+
+               /* Update derived statistic */
+               ef4_update_diff_stat(&stats[FALCON_STAT_rx_bad_bytes],
+                                    stats[FALCON_STAT_rx_bytes] -
+                                    stats[FALCON_STAT_rx_good_bytes] -
+                                    stats[FALCON_STAT_rx_control] * 64);
+               ef4_update_sw_stats(efx, stats);
+       }
+
+       if (full_stats)
+               memcpy(full_stats, stats, sizeof(u64) * FALCON_STAT_COUNT);
+
+       if (core_stats) {
+               core_stats->rx_packets = stats[FALCON_STAT_rx_packets];
+               core_stats->tx_packets = stats[FALCON_STAT_tx_packets];
+               core_stats->rx_bytes = stats[FALCON_STAT_rx_bytes];
+               core_stats->tx_bytes = stats[FALCON_STAT_tx_bytes];
+               core_stats->rx_dropped = stats[FALCON_STAT_rx_nodesc_drop_cnt] +
+                                        stats[GENERIC_STAT_rx_nodesc_trunc] +
+                                        stats[GENERIC_STAT_rx_noskb_drops];
+               core_stats->multicast = stats[FALCON_STAT_rx_multicast];
+               core_stats->rx_length_errors =
+                       stats[FALCON_STAT_rx_gtjumbo] +
+                       stats[FALCON_STAT_rx_length_error];
+               core_stats->rx_crc_errors = stats[FALCON_STAT_rx_bad];
+               core_stats->rx_frame_errors = stats[FALCON_STAT_rx_align_error];
+               core_stats->rx_fifo_errors = stats[FALCON_STAT_rx_overflow];
+
+               core_stats->rx_errors = (core_stats->rx_length_errors +
+                                        core_stats->rx_crc_errors +
+                                        core_stats->rx_frame_errors +
+                                        stats[FALCON_STAT_rx_symbol_error]);
+       }
+
+       return FALCON_STAT_COUNT;
+}
+
+void falcon_start_nic_stats(struct ef4_nic *efx)
+{
+       struct falcon_nic_data *nic_data = efx->nic_data;
+
+       spin_lock_bh(&efx->stats_lock);
+       if (--nic_data->stats_disable_count == 0)
+               falcon_stats_request(efx);
+       spin_unlock_bh(&efx->stats_lock);
+}
+
+/* We don't acutally pull stats on falcon. Wait 10ms so that
+ * they arrive when we call this just after start_stats
+ */
+static void falcon_pull_nic_stats(struct ef4_nic *efx)
+{
+       msleep(10);
+}
+
+void falcon_stop_nic_stats(struct ef4_nic *efx)
+{
+       struct falcon_nic_data *nic_data = efx->nic_data;
+       int i;
+
+       might_sleep();
+
+       spin_lock_bh(&efx->stats_lock);
+       ++nic_data->stats_disable_count;
+       spin_unlock_bh(&efx->stats_lock);
+
+       del_timer_sync(&nic_data->stats_timer);
+
+       /* Wait enough time for the most recent transfer to
+        * complete. */
+       for (i = 0; i < 4 && nic_data->stats_pending; i++) {
+               if (FALCON_XMAC_STATS_DMA_FLAG(efx))
+                       break;
+               msleep(1);
+       }
+
+       spin_lock_bh(&efx->stats_lock);
+       falcon_stats_complete(efx);
+       spin_unlock_bh(&efx->stats_lock);
+}
+
+static void falcon_set_id_led(struct ef4_nic *efx, enum ef4_led_mode mode)
+{
+       falcon_board(efx)->type->set_id_led(efx, mode);
+}
+
+/**************************************************************************
+ *
+ * Wake on LAN
+ *
+ **************************************************************************
+ */
+
+static void falcon_get_wol(struct ef4_nic *efx, struct ethtool_wolinfo *wol)
+{
+       wol->supported = 0;
+       wol->wolopts = 0;
+       memset(&wol->sopass, 0, sizeof(wol->sopass));
+}
+
+static int falcon_set_wol(struct ef4_nic *efx, u32 type)
+{
+       if (type != 0)
+               return -EINVAL;
+       return 0;
+}
+
+/**************************************************************************
+ *
+ * Revision-dependent attributes used by efx.c and nic.c
+ *
+ **************************************************************************
+ */
+
+const struct ef4_nic_type falcon_a1_nic_type = {
+       .mem_bar = EF4_MEM_BAR,
+       .mem_map_size = falcon_a1_mem_map_size,
+       .probe = falcon_probe_nic,
+       .remove = falcon_remove_nic,
+       .init = falcon_init_nic,
+       .dimension_resources = falcon_dimension_resources,
+       .fini = falcon_irq_ack_a1,
+       .monitor = falcon_monitor,
+       .map_reset_reason = falcon_map_reset_reason,
+       .map_reset_flags = falcon_map_reset_flags,
+       .reset = falcon_reset_hw,
+       .probe_port = falcon_probe_port,
+       .remove_port = falcon_remove_port,
+       .handle_global_event = falcon_handle_global_event,
+       .fini_dmaq = ef4_farch_fini_dmaq,
+       .prepare_flush = falcon_prepare_flush,
+       .finish_flush = ef4_port_dummy_op_void,
+       .prepare_flr = ef4_port_dummy_op_void,
+       .finish_flr = ef4_farch_finish_flr,
+       .describe_stats = falcon_describe_nic_stats,
+       .update_stats = falcon_update_nic_stats,
+       .start_stats = falcon_start_nic_stats,
+       .pull_stats = falcon_pull_nic_stats,
+       .stop_stats = falcon_stop_nic_stats,
+       .set_id_led = falcon_set_id_led,
+       .push_irq_moderation = falcon_push_irq_moderation,
+       .reconfigure_port = falcon_reconfigure_port,
+       .prepare_enable_fc_tx = falcon_a1_prepare_enable_fc_tx,
+       .reconfigure_mac = falcon_reconfigure_xmac,
+       .check_mac_fault = falcon_xmac_check_fault,
+       .get_wol = falcon_get_wol,
+       .set_wol = falcon_set_wol,
+       .resume_wol = ef4_port_dummy_op_void,
+       .test_nvram = falcon_test_nvram,
+       .irq_enable_master = ef4_farch_irq_enable_master,
+       .irq_test_generate = ef4_farch_irq_test_generate,
+       .irq_disable_non_ev = ef4_farch_irq_disable_master,
+       .irq_handle_msi = ef4_farch_msi_interrupt,
+       .irq_handle_legacy = falcon_legacy_interrupt_a1,
+       .tx_probe = ef4_farch_tx_probe,
+       .tx_init = ef4_farch_tx_init,
+       .tx_remove = ef4_farch_tx_remove,
+       .tx_write = ef4_farch_tx_write,
+       .tx_limit_len = ef4_farch_tx_limit_len,
+       .rx_push_rss_config = dummy_rx_push_rss_config,
+       .rx_probe = ef4_farch_rx_probe,
+       .rx_init = ef4_farch_rx_init,
+       .rx_remove = ef4_farch_rx_remove,
+       .rx_write = ef4_farch_rx_write,
+       .rx_defer_refill = ef4_farch_rx_defer_refill,
+       .ev_probe = ef4_farch_ev_probe,
+       .ev_init = ef4_farch_ev_init,
+       .ev_fini = ef4_farch_ev_fini,
+       .ev_remove = ef4_farch_ev_remove,
+       .ev_process = ef4_farch_ev_process,
+       .ev_read_ack = ef4_farch_ev_read_ack,
+       .ev_test_generate = ef4_farch_ev_test_generate,
+
+       /* We don't expose the filter table on Falcon A1 as it is not
+        * mapped into function 0, but these implementations still
+        * work with a degenerate case of all tables set to size 0.
+        */
+       .filter_table_probe = ef4_farch_filter_table_probe,
+       .filter_table_restore = ef4_farch_filter_table_restore,
+       .filter_table_remove = ef4_farch_filter_table_remove,
+       .filter_insert = ef4_farch_filter_insert,
+       .filter_remove_safe = ef4_farch_filter_remove_safe,
+       .filter_get_safe = ef4_farch_filter_get_safe,
+       .filter_clear_rx = ef4_farch_filter_clear_rx,
+       .filter_count_rx_used = ef4_farch_filter_count_rx_used,
+       .filter_get_rx_id_limit = ef4_farch_filter_get_rx_id_limit,
+       .filter_get_rx_ids = ef4_farch_filter_get_rx_ids,
+
+#ifdef CONFIG_SFC_FALCON_MTD
+       .mtd_probe = falcon_mtd_probe,
+       .mtd_rename = falcon_mtd_rename,
+       .mtd_read = falcon_mtd_read,
+       .mtd_erase = falcon_mtd_erase,
+       .mtd_write = falcon_mtd_write,
+       .mtd_sync = falcon_mtd_sync,
+#endif
+
+       .revision = EF4_REV_FALCON_A1,
+       .txd_ptr_tbl_base = FR_AA_TX_DESC_PTR_TBL_KER,
+       .rxd_ptr_tbl_base = FR_AA_RX_DESC_PTR_TBL_KER,
+       .buf_tbl_base = FR_AA_BUF_FULL_TBL_KER,
+       .evq_ptr_tbl_base = FR_AA_EVQ_PTR_TBL_KER,
+       .evq_rptr_tbl_base = FR_AA_EVQ_RPTR_KER,
+       .max_dma_mask = DMA_BIT_MASK(FSF_AZ_TX_KER_BUF_ADDR_WIDTH),
+       .rx_buffer_padding = 0x24,
+       .can_rx_scatter = false,
+       .max_interrupt_mode = EF4_INT_MODE_MSI,
+       .timer_period_max =  1 << FRF_AB_TC_TIMER_VAL_WIDTH,
+       .offload_features = NETIF_F_IP_CSUM,
+};
+
+const struct ef4_nic_type falcon_b0_nic_type = {
+       .mem_bar = EF4_MEM_BAR,
+       .mem_map_size = falcon_b0_mem_map_size,
+       .probe = falcon_probe_nic,
+       .remove = falcon_remove_nic,
+       .init = falcon_init_nic,
+       .dimension_resources = falcon_dimension_resources,
+       .fini = ef4_port_dummy_op_void,
+       .monitor = falcon_monitor,
+       .map_reset_reason = falcon_map_reset_reason,
+       .map_reset_flags = falcon_map_reset_flags,
+       .reset = falcon_reset_hw,
+       .probe_port = falcon_probe_port,
+       .remove_port = falcon_remove_port,
+       .handle_global_event = falcon_handle_global_event,
+       .fini_dmaq = ef4_farch_fini_dmaq,
+       .prepare_flush = falcon_prepare_flush,
+       .finish_flush = ef4_port_dummy_op_void,
+       .prepare_flr = ef4_port_dummy_op_void,
+       .finish_flr = ef4_farch_finish_flr,
+       .describe_stats = falcon_describe_nic_stats,
+       .update_stats = falcon_update_nic_stats,
+       .start_stats = falcon_start_nic_stats,
+       .pull_stats = falcon_pull_nic_stats,
+       .stop_stats = falcon_stop_nic_stats,
+       .set_id_led = falcon_set_id_led,
+       .push_irq_moderation = falcon_push_irq_moderation,
+       .reconfigure_port = falcon_reconfigure_port,
+       .prepare_enable_fc_tx = falcon_b0_prepare_enable_fc_tx,
+       .reconfigure_mac = falcon_reconfigure_xmac,
+       .check_mac_fault = falcon_xmac_check_fault,
+       .get_wol = falcon_get_wol,
+       .set_wol = falcon_set_wol,
+       .resume_wol = ef4_port_dummy_op_void,
+       .test_chip = falcon_b0_test_chip,
+       .test_nvram = falcon_test_nvram,
+       .irq_enable_master = ef4_farch_irq_enable_master,
+       .irq_test_generate = ef4_farch_irq_test_generate,
+       .irq_disable_non_ev = ef4_farch_irq_disable_master,
+       .irq_handle_msi = ef4_farch_msi_interrupt,
+       .irq_handle_legacy = ef4_farch_legacy_interrupt,
+       .tx_probe = ef4_farch_tx_probe,
+       .tx_init = ef4_farch_tx_init,
+       .tx_remove = ef4_farch_tx_remove,
+       .tx_write = ef4_farch_tx_write,
+       .tx_limit_len = ef4_farch_tx_limit_len,
+       .rx_push_rss_config = falcon_b0_rx_push_rss_config,
+       .rx_probe = ef4_farch_rx_probe,
+       .rx_init = ef4_farch_rx_init,
+       .rx_remove = ef4_farch_rx_remove,
+       .rx_write = ef4_farch_rx_write,
+       .rx_defer_refill = ef4_farch_rx_defer_refill,
+       .ev_probe = ef4_farch_ev_probe,
+       .ev_init = ef4_farch_ev_init,
+       .ev_fini = ef4_farch_ev_fini,
+       .ev_remove = ef4_farch_ev_remove,
+       .ev_process = ef4_farch_ev_process,
+       .ev_read_ack = ef4_farch_ev_read_ack,
+       .ev_test_generate = ef4_farch_ev_test_generate,
+       .filter_table_probe = ef4_farch_filter_table_probe,
+       .filter_table_restore = ef4_farch_filter_table_restore,
+       .filter_table_remove = ef4_farch_filter_table_remove,
+       .filter_update_rx_scatter = ef4_farch_filter_update_rx_scatter,
+       .filter_insert = ef4_farch_filter_insert,
+       .filter_remove_safe = ef4_farch_filter_remove_safe,
+       .filter_get_safe = ef4_farch_filter_get_safe,
+       .filter_clear_rx = ef4_farch_filter_clear_rx,
+       .filter_count_rx_used = ef4_farch_filter_count_rx_used,
+       .filter_get_rx_id_limit = ef4_farch_filter_get_rx_id_limit,
+       .filter_get_rx_ids = ef4_farch_filter_get_rx_ids,
+#ifdef CONFIG_RFS_ACCEL
+       .filter_rfs_insert = ef4_farch_filter_rfs_insert,
+       .filter_rfs_expire_one = ef4_farch_filter_rfs_expire_one,
+#endif
+#ifdef CONFIG_SFC_FALCON_MTD
+       .mtd_probe = falcon_mtd_probe,
+       .mtd_rename = falcon_mtd_rename,
+       .mtd_read = falcon_mtd_read,
+       .mtd_erase = falcon_mtd_erase,
+       .mtd_write = falcon_mtd_write,
+       .mtd_sync = falcon_mtd_sync,
+#endif
+
+       .revision = EF4_REV_FALCON_B0,
+       .txd_ptr_tbl_base = FR_BZ_TX_DESC_PTR_TBL,
+       .rxd_ptr_tbl_base = FR_BZ_RX_DESC_PTR_TBL,
+       .buf_tbl_base = FR_BZ_BUF_FULL_TBL,
+       .evq_ptr_tbl_base = FR_BZ_EVQ_PTR_TBL,
+       .evq_rptr_tbl_base = FR_BZ_EVQ_RPTR,
+       .max_dma_mask = DMA_BIT_MASK(FSF_AZ_TX_KER_BUF_ADDR_WIDTH),
+       .rx_prefix_size = FS_BZ_RX_PREFIX_SIZE,
+       .rx_hash_offset = FS_BZ_RX_PREFIX_HASH_OFST,
+       .rx_buffer_padding = 0,
+       .can_rx_scatter = true,
+       .max_interrupt_mode = EF4_INT_MODE_MSIX,
+       .timer_period_max =  1 << FRF_AB_TC_TIMER_VAL_WIDTH,
+       .offload_features = NETIF_F_IP_CSUM | NETIF_F_RXHASH | NETIF_F_NTUPLE,
+       .max_rx_ip_filters = FR_BZ_RX_FILTER_TBL0_ROWS,
+};
diff --git a/drivers/net/ethernet/sfc/falcon/falcon_boards.c b/drivers/net/ethernet/sfc/falcon/falcon_boards.c
new file mode 100644 (file)
index 0000000..dec83a2
--- /dev/null
@@ -0,0 +1,764 @@
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2007-2012 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#include <linux/rtnetlink.h>
+
+#include "net_driver.h"
+#include "phy.h"
+#include "efx.h"
+#include "nic.h"
+#include "workarounds.h"
+
+/* Macros for unpacking the board revision */
+/* The revision info is in host byte order. */
+#define FALCON_BOARD_TYPE(_rev) (_rev >> 8)
+#define FALCON_BOARD_MAJOR(_rev) ((_rev >> 4) & 0xf)
+#define FALCON_BOARD_MINOR(_rev) (_rev & 0xf)
+
+/* Board types */
+#define FALCON_BOARD_SFE4001 0x01
+#define FALCON_BOARD_SFE4002 0x02
+#define FALCON_BOARD_SFE4003 0x03
+#define FALCON_BOARD_SFN4112F 0x52
+
+/* Board temperature is about 15°C above ambient when air flow is
+ * limited.  The maximum acceptable ambient temperature varies
+ * depending on the PHY specifications but the critical temperature
+ * above which we should shut down to avoid damage is 80°C. */
+#define FALCON_BOARD_TEMP_BIAS 15
+#define FALCON_BOARD_TEMP_CRIT (80 + FALCON_BOARD_TEMP_BIAS)
+
+/* SFC4000 datasheet says: 'The maximum permitted junction temperature
+ * is 125°C; the thermal design of the environment for the SFC4000
+ * should aim to keep this well below 100°C.' */
+#define FALCON_JUNC_TEMP_MIN   0
+#define FALCON_JUNC_TEMP_MAX   90
+#define FALCON_JUNC_TEMP_CRIT  125
+
+/*****************************************************************************
+ * Support for LM87 sensor chip used on several boards
+ */
+#define LM87_REG_TEMP_HW_INT_LOCK      0x13
+#define LM87_REG_TEMP_HW_EXT_LOCK      0x14
+#define LM87_REG_TEMP_HW_INT           0x17
+#define LM87_REG_TEMP_HW_EXT           0x18
+#define LM87_REG_TEMP_EXT1             0x26
+#define LM87_REG_TEMP_INT              0x27
+#define LM87_REG_ALARMS1               0x41
+#define LM87_REG_ALARMS2               0x42
+#define LM87_IN_LIMITS(nr, _min, _max)                 \
+       0x2B + (nr) * 2, _max, 0x2C + (nr) * 2, _min
+#define LM87_AIN_LIMITS(nr, _min, _max)                        \
+       0x3B + (nr), _max, 0x1A + (nr), _min
+#define LM87_TEMP_INT_LIMITS(_min, _max)               \
+       0x39, _max, 0x3A, _min
+#define LM87_TEMP_EXT1_LIMITS(_min, _max)              \
+       0x37, _max, 0x38, _min
+
+#define LM87_ALARM_TEMP_INT            0x10
+#define LM87_ALARM_TEMP_EXT1           0x20
+
+#if IS_ENABLED(CONFIG_SENSORS_LM87)
+
+static int ef4_poke_lm87(struct i2c_client *client, const u8 *reg_values)
+{
+       while (*reg_values) {
+               u8 reg = *reg_values++;
+               u8 value = *reg_values++;
+               int rc = i2c_smbus_write_byte_data(client, reg, value);
+               if (rc)
+                       return rc;
+       }
+       return 0;
+}
+
+static const u8 falcon_lm87_common_regs[] = {
+       LM87_REG_TEMP_HW_INT_LOCK, FALCON_BOARD_TEMP_CRIT,
+       LM87_REG_TEMP_HW_INT, FALCON_BOARD_TEMP_CRIT,
+       LM87_TEMP_EXT1_LIMITS(FALCON_JUNC_TEMP_MIN, FALCON_JUNC_TEMP_MAX),
+       LM87_REG_TEMP_HW_EXT_LOCK, FALCON_JUNC_TEMP_CRIT,
+       LM87_REG_TEMP_HW_EXT, FALCON_JUNC_TEMP_CRIT,
+       0
+};
+
+static int ef4_init_lm87(struct ef4_nic *efx, const struct i2c_board_info *info,
+                        const u8 *reg_values)
+{
+       struct falcon_board *board = falcon_board(efx);
+       struct i2c_client *client = i2c_new_device(&board->i2c_adap, info);
+       int rc;
+
+       if (!client)
+               return -EIO;
+
+       /* Read-to-clear alarm/interrupt status */
+       i2c_smbus_read_byte_data(client, LM87_REG_ALARMS1);
+       i2c_smbus_read_byte_data(client, LM87_REG_ALARMS2);
+
+       rc = ef4_poke_lm87(client, reg_values);
+       if (rc)
+               goto err;
+       rc = ef4_poke_lm87(client, falcon_lm87_common_regs);
+       if (rc)
+               goto err;
+
+       board->hwmon_client = client;
+       return 0;
+
+err:
+       i2c_unregister_device(client);
+       return rc;
+}
+
+static void ef4_fini_lm87(struct ef4_nic *efx)
+{
+       i2c_unregister_device(falcon_board(efx)->hwmon_client);
+}
+
+static int ef4_check_lm87(struct ef4_nic *efx, unsigned mask)
+{
+       struct i2c_client *client = falcon_board(efx)->hwmon_client;
+       bool temp_crit, elec_fault, is_failure;
+       u16 alarms;
+       s32 reg;
+
+       /* If link is up then do not monitor temperature */
+       if (EF4_WORKAROUND_7884(efx) && efx->link_state.up)
+               return 0;
+
+       reg = i2c_smbus_read_byte_data(client, LM87_REG_ALARMS1);
+       if (reg < 0)
+               return reg;
+       alarms = reg;
+       reg = i2c_smbus_read_byte_data(client, LM87_REG_ALARMS2);
+       if (reg < 0)
+               return reg;
+       alarms |= reg << 8;
+       alarms &= mask;
+
+       temp_crit = false;
+       if (alarms & LM87_ALARM_TEMP_INT) {
+               reg = i2c_smbus_read_byte_data(client, LM87_REG_TEMP_INT);
+               if (reg < 0)
+                       return reg;
+               if (reg > FALCON_BOARD_TEMP_CRIT)
+                       temp_crit = true;
+       }
+       if (alarms & LM87_ALARM_TEMP_EXT1) {
+               reg = i2c_smbus_read_byte_data(client, LM87_REG_TEMP_EXT1);
+               if (reg < 0)
+                       return reg;
+               if (reg > FALCON_JUNC_TEMP_CRIT)
+                       temp_crit = true;
+       }
+       elec_fault = alarms & ~(LM87_ALARM_TEMP_INT | LM87_ALARM_TEMP_EXT1);
+       is_failure = temp_crit || elec_fault;
+
+       if (alarms)
+               netif_err(efx, hw, efx->net_dev,
+                         "LM87 detected a hardware %s (status %02x:%02x)"
+                         "%s%s%s%s\n",
+                         is_failure ? "failure" : "problem",
+                         alarms & 0xff, alarms >> 8,
+                         (alarms & LM87_ALARM_TEMP_INT) ?
+                         "; board is overheating" : "",
+                         (alarms & LM87_ALARM_TEMP_EXT1) ?
+                         "; controller is overheating" : "",
+                         temp_crit ? "; reached critical temperature" : "",
+                         elec_fault ? "; electrical fault" : "");
+
+       return is_failure ? -ERANGE : 0;
+}
+
+#else /* !CONFIG_SENSORS_LM87 */
+
+static inline int
+ef4_init_lm87(struct ef4_nic *efx, const struct i2c_board_info *info,
+             const u8 *reg_values)
+{
+       return 0;
+}
+static inline void ef4_fini_lm87(struct ef4_nic *efx)
+{
+}
+static inline int ef4_check_lm87(struct ef4_nic *efx, unsigned mask)
+{
+       return 0;
+}
+
+#endif /* CONFIG_SENSORS_LM87 */
+
+/*****************************************************************************
+ * Support for the SFE4001 NIC.
+ *
+ * The SFE4001 does not power-up fully at reset due to its high power
+ * consumption.  We control its power via a PCA9539 I/O expander.
+ * It also has a MAX6647 temperature monitor which we expose to
+ * the lm90 driver.
+ *
+ * This also provides minimal support for reflashing the PHY, which is
+ * initiated by resetting it with the FLASH_CFG_1 pin pulled down.
+ * On SFE4001 rev A2 and later this is connected to the 3V3X output of
+ * the IO-expander.
+ * We represent reflash mode as PHY_MODE_SPECIAL and make it mutually
+ * exclusive with the network device being open.
+ */
+
+/**************************************************************************
+ * Support for I2C IO Expander device on SFE4001
+ */
+#define        PCA9539 0x74
+
+#define        P0_IN 0x00
+#define        P0_OUT 0x02
+#define        P0_INVERT 0x04
+#define        P0_CONFIG 0x06
+
+#define        P0_EN_1V0X_LBN 0
+#define        P0_EN_1V0X_WIDTH 1
+#define        P0_EN_1V2_LBN 1
+#define        P0_EN_1V2_WIDTH 1
+#define        P0_EN_2V5_LBN 2
+#define        P0_EN_2V5_WIDTH 1
+#define        P0_EN_3V3X_LBN 3
+#define        P0_EN_3V3X_WIDTH 1
+#define        P0_EN_5V_LBN 4
+#define        P0_EN_5V_WIDTH 1
+#define        P0_SHORTEN_JTAG_LBN 5
+#define        P0_SHORTEN_JTAG_WIDTH 1
+#define        P0_X_TRST_LBN 6
+#define        P0_X_TRST_WIDTH 1
+#define        P0_DSP_RESET_LBN 7
+#define        P0_DSP_RESET_WIDTH 1
+
+#define        P1_IN 0x01
+#define        P1_OUT 0x03
+#define        P1_INVERT 0x05
+#define        P1_CONFIG 0x07
+
+#define        P1_AFE_PWD_LBN 0
+#define        P1_AFE_PWD_WIDTH 1
+#define        P1_DSP_PWD25_LBN 1
+#define        P1_DSP_PWD25_WIDTH 1
+#define        P1_RESERVED_LBN 2
+#define        P1_RESERVED_WIDTH 2
+#define        P1_SPARE_LBN 4
+#define        P1_SPARE_WIDTH 4
+
+/* Temperature Sensor */
+#define MAX664X_REG_RSL                0x02
+#define MAX664X_REG_WLHO       0x0B
+
+static void sfe4001_poweroff(struct ef4_nic *efx)
+{
+       struct i2c_client *ioexp_client = falcon_board(efx)->ioexp_client;
+       struct i2c_client *hwmon_client = falcon_board(efx)->hwmon_client;
+
+       /* Turn off all power rails and disable outputs */
+       i2c_smbus_write_byte_data(ioexp_client, P0_OUT, 0xff);
+       i2c_smbus_write_byte_data(ioexp_client, P1_CONFIG, 0xff);
+       i2c_smbus_write_byte_data(ioexp_client, P0_CONFIG, 0xff);
+
+       /* Clear any over-temperature alert */
+       i2c_smbus_read_byte_data(hwmon_client, MAX664X_REG_RSL);
+}
+
+static int sfe4001_poweron(struct ef4_nic *efx)
+{
+       struct i2c_client *ioexp_client = falcon_board(efx)->ioexp_client;
+       struct i2c_client *hwmon_client = falcon_board(efx)->hwmon_client;
+       unsigned int i, j;
+       int rc;
+       u8 out;
+
+       /* Clear any previous over-temperature alert */
+       rc = i2c_smbus_read_byte_data(hwmon_client, MAX664X_REG_RSL);
+       if (rc < 0)
+               return rc;
+
+       /* Enable port 0 and port 1 outputs on IO expander */
+       rc = i2c_smbus_write_byte_data(ioexp_client, P0_CONFIG, 0x00);
+       if (rc)
+               return rc;
+       rc = i2c_smbus_write_byte_data(ioexp_client, P1_CONFIG,
+                                      0xff & ~(1 << P1_SPARE_LBN));
+       if (rc)
+               goto fail_on;
+
+       /* If PHY power is on, turn it all off and wait 1 second to
+        * ensure a full reset.
+        */
+       rc = i2c_smbus_read_byte_data(ioexp_client, P0_OUT);
+       if (rc < 0)
+               goto fail_on;
+       out = 0xff & ~((0 << P0_EN_1V2_LBN) | (0 << P0_EN_2V5_LBN) |
+                      (0 << P0_EN_3V3X_LBN) | (0 << P0_EN_5V_LBN) |
+                      (0 << P0_EN_1V0X_LBN));
+       if (rc != out) {
+               netif_info(efx, hw, efx->net_dev, "power-cycling PHY\n");
+               rc = i2c_smbus_write_byte_data(ioexp_client, P0_OUT, out);
+               if (rc)
+                       goto fail_on;
+               schedule_timeout_uninterruptible(HZ);
+       }
+
+       for (i = 0; i < 20; ++i) {
+               /* Turn on 1.2V, 2.5V, 3.3V and 5V power rails */
+               out = 0xff & ~((1 << P0_EN_1V2_LBN) | (1 << P0_EN_2V5_LBN) |
+                              (1 << P0_EN_3V3X_LBN) | (1 << P0_EN_5V_LBN) |
+                              (1 << P0_X_TRST_LBN));
+               if (efx->phy_mode & PHY_MODE_SPECIAL)
+                       out |= 1 << P0_EN_3V3X_LBN;
+
+               rc = i2c_smbus_write_byte_data(ioexp_client, P0_OUT, out);
+               if (rc)
+                       goto fail_on;
+               msleep(10);
+
+               /* Turn on 1V power rail */
+               out &= ~(1 << P0_EN_1V0X_LBN);
+               rc = i2c_smbus_write_byte_data(ioexp_client, P0_OUT, out);
+               if (rc)
+                       goto fail_on;
+
+               netif_info(efx, hw, efx->net_dev,
+                          "waiting for DSP boot (attempt %d)...\n", i);
+
+               /* In flash config mode, DSP does not turn on AFE, so
+                * just wait 1 second.
+                */
+               if (efx->phy_mode & PHY_MODE_SPECIAL) {
+                       schedule_timeout_uninterruptible(HZ);
+                       return 0;
+               }
+
+               for (j = 0; j < 10; ++j) {
+                       msleep(100);
+
+                       /* Check DSP has asserted AFE power line */
+                       rc = i2c_smbus_read_byte_data(ioexp_client, P1_IN);
+                       if (rc < 0)
+                               goto fail_on;
+                       if (rc & (1 << P1_AFE_PWD_LBN))
+                               return 0;
+               }
+       }
+
+       netif_info(efx, hw, efx->net_dev, "timed out waiting for DSP boot\n");
+       rc = -ETIMEDOUT;
+fail_on:
+       sfe4001_poweroff(efx);
+       return rc;
+}
+
+static ssize_t show_phy_flash_cfg(struct device *dev,
+                                 struct device_attribute *attr, char *buf)
+{
+       struct ef4_nic *efx = pci_get_drvdata(to_pci_dev(dev));
+       return sprintf(buf, "%d\n", !!(efx->phy_mode & PHY_MODE_SPECIAL));
+}
+
+static ssize_t set_phy_flash_cfg(struct device *dev,
+                                struct device_attribute *attr,
+                                const char *buf, size_t count)
+{
+       struct ef4_nic *efx = pci_get_drvdata(to_pci_dev(dev));
+       enum ef4_phy_mode old_mode, new_mode;
+       int err;
+
+       rtnl_lock();
+       old_mode = efx->phy_mode;
+       if (count == 0 || *buf == '0')
+               new_mode = old_mode & ~PHY_MODE_SPECIAL;
+       else
+               new_mode = PHY_MODE_SPECIAL;
+       if (!((old_mode ^ new_mode) & PHY_MODE_SPECIAL)) {
+               err = 0;
+       } else if (efx->state != STATE_READY || netif_running(efx->net_dev)) {
+               err = -EBUSY;
+       } else {
+               /* Reset the PHY, reconfigure the MAC and enable/disable
+                * MAC stats accordingly. */
+               efx->phy_mode = new_mode;
+               if (new_mode & PHY_MODE_SPECIAL)
+                       falcon_stop_nic_stats(efx);
+               err = sfe4001_poweron(efx);
+               if (!err)
+                       err = ef4_reconfigure_port(efx);
+               if (!(new_mode & PHY_MODE_SPECIAL))
+                       falcon_start_nic_stats(efx);
+       }
+       rtnl_unlock();
+
+       return err ? err : count;
+}
+
+static DEVICE_ATTR(phy_flash_cfg, 0644, show_phy_flash_cfg, set_phy_flash_cfg);
+
+static void sfe4001_fini(struct ef4_nic *efx)
+{
+       struct falcon_board *board = falcon_board(efx);
+
+       netif_info(efx, drv, efx->net_dev, "%s\n", __func__);
+
+       device_remove_file(&efx->pci_dev->dev, &dev_attr_phy_flash_cfg);
+       sfe4001_poweroff(efx);
+       i2c_unregister_device(board->ioexp_client);
+       i2c_unregister_device(board->hwmon_client);
+}
+
+static int sfe4001_check_hw(struct ef4_nic *efx)
+{
+       struct falcon_nic_data *nic_data = efx->nic_data;
+       s32 status;
+
+       /* If XAUI link is up then do not monitor */
+       if (EF4_WORKAROUND_7884(efx) && !nic_data->xmac_poll_required)
+               return 0;
+
+       /* Check the powered status of the PHY. Lack of power implies that
+        * the MAX6647 has shut down power to it, probably due to a temp.
+        * alarm. Reading the power status rather than the MAX6647 status
+        * directly because the later is read-to-clear and would thus
+        * start to power up the PHY again when polled, causing us to blip
+        * the power undesirably.
+        * We know we can read from the IO expander because we did
+        * it during power-on. Assume failure now is bad news. */
+       status = i2c_smbus_read_byte_data(falcon_board(efx)->ioexp_client, P1_IN);
+       if (status >= 0 &&
+           (status & ((1 << P1_AFE_PWD_LBN) | (1 << P1_DSP_PWD25_LBN))) != 0)
+               return 0;
+
+       /* Use board power control, not PHY power control */
+       sfe4001_poweroff(efx);
+       efx->phy_mode = PHY_MODE_OFF;
+
+       return (status < 0) ? -EIO : -ERANGE;
+}
+
+static const struct i2c_board_info sfe4001_hwmon_info = {
+       I2C_BOARD_INFO("max6647", 0x4e),
+};
+
+/* This board uses an I2C expander to provider power to the PHY, which needs to
+ * be turned on before the PHY can be used.
+ * Context: Process context, rtnl lock held
+ */
+static int sfe4001_init(struct ef4_nic *efx)
+{
+       struct falcon_board *board = falcon_board(efx);
+       int rc;
+
+#if IS_ENABLED(CONFIG_SENSORS_LM90)
+       board->hwmon_client =
+               i2c_new_device(&board->i2c_adap, &sfe4001_hwmon_info);
+#else
+       board->hwmon_client =
+               i2c_new_dummy(&board->i2c_adap, sfe4001_hwmon_info.addr);
+#endif
+       if (!board->hwmon_client)
+               return -EIO;
+
+       /* Raise board/PHY high limit from 85 to 90 degrees Celsius */
+       rc = i2c_smbus_write_byte_data(board->hwmon_client,
+                                      MAX664X_REG_WLHO, 90);
+       if (rc)
+               goto fail_hwmon;
+
+       board->ioexp_client = i2c_new_dummy(&board->i2c_adap, PCA9539);
+       if (!board->ioexp_client) {
+               rc = -EIO;
+               goto fail_hwmon;
+       }
+
+       if (efx->phy_mode & PHY_MODE_SPECIAL) {
+               /* PHY won't generate a 156.25 MHz clock and MAC stats fetch
+                * will fail. */
+               falcon_stop_nic_stats(efx);
+       }
+       rc = sfe4001_poweron(efx);
+       if (rc)
+               goto fail_ioexp;
+
+       rc = device_create_file(&efx->pci_dev->dev, &dev_attr_phy_flash_cfg);
+       if (rc)
+               goto fail_on;
+
+       netif_info(efx, hw, efx->net_dev, "PHY is powered on\n");
+       return 0;
+
+fail_on:
+       sfe4001_poweroff(efx);
+fail_ioexp:
+       i2c_unregister_device(board->ioexp_client);
+fail_hwmon:
+       i2c_unregister_device(board->hwmon_client);
+       return rc;
+}
+
+/*****************************************************************************
+ * Support for the SFE4002
+ *
+ */
+static u8 sfe4002_lm87_channel = 0x03; /* use AIN not FAN inputs */
+
+static const u8 sfe4002_lm87_regs[] = {
+       LM87_IN_LIMITS(0, 0x7c, 0x99),          /* 2.5V:  1.8V +/- 10% */
+       LM87_IN_LIMITS(1, 0x4c, 0x5e),          /* Vccp1: 1.2V +/- 10% */
+       LM87_IN_LIMITS(2, 0xac, 0xd4),          /* 3.3V:  3.3V +/- 10% */
+       LM87_IN_LIMITS(3, 0xac, 0xd4),          /* 5V:    5.0V +/- 10% */
+       LM87_IN_LIMITS(4, 0xac, 0xe0),          /* 12V:   10.8-14V */
+       LM87_IN_LIMITS(5, 0x3f, 0x4f),          /* Vccp2: 1.0V +/- 10% */
+       LM87_AIN_LIMITS(0, 0x98, 0xbb),         /* AIN1:  1.66V +/- 10% */
+       LM87_AIN_LIMITS(1, 0x8a, 0xa9),         /* AIN2:  1.5V +/- 10% */
+       LM87_TEMP_INT_LIMITS(0, 80 + FALCON_BOARD_TEMP_BIAS),
+       LM87_TEMP_EXT1_LIMITS(0, FALCON_JUNC_TEMP_MAX),
+       0
+};
+
+static const struct i2c_board_info sfe4002_hwmon_info = {
+       I2C_BOARD_INFO("lm87", 0x2e),
+       .platform_data  = &sfe4002_lm87_channel,
+};
+
+/****************************************************************************/
+/* LED allocations. Note that on rev A0 boards the schematic and the reality
+ * differ: red and green are swapped. Below is the fixed (A1) layout (there
+ * are only 3 A0 boards in existence, so no real reason to make this
+ * conditional).
+ */
+#define SFE4002_FAULT_LED (2)  /* Red */
+#define SFE4002_RX_LED    (0)  /* Green */
+#define SFE4002_TX_LED    (1)  /* Amber */
+
+static void sfe4002_init_phy(struct ef4_nic *efx)
+{
+       /* Set the TX and RX LEDs to reflect status and activity, and the
+        * fault LED off */
+       falcon_qt202x_set_led(efx, SFE4002_TX_LED,
+                             QUAKE_LED_TXLINK | QUAKE_LED_LINK_ACTSTAT);
+       falcon_qt202x_set_led(efx, SFE4002_RX_LED,
+                             QUAKE_LED_RXLINK | QUAKE_LED_LINK_ACTSTAT);
+       falcon_qt202x_set_led(efx, SFE4002_FAULT_LED, QUAKE_LED_OFF);
+}
+
+static void sfe4002_set_id_led(struct ef4_nic *efx, enum ef4_led_mode mode)
+{
+       falcon_qt202x_set_led(
+               efx, SFE4002_FAULT_LED,
+               (mode == EF4_LED_ON) ? QUAKE_LED_ON : QUAKE_LED_OFF);
+}
+
+static int sfe4002_check_hw(struct ef4_nic *efx)
+{
+       struct falcon_board *board = falcon_board(efx);
+
+       /* A0 board rev. 4002s report a temperature fault the whole time
+        * (bad sensor) so we mask it out. */
+       unsigned alarm_mask =
+               (board->major == 0 && board->minor == 0) ?
+               ~LM87_ALARM_TEMP_EXT1 : ~0;
+
+       return ef4_check_lm87(efx, alarm_mask);
+}
+
+static int sfe4002_init(struct ef4_nic *efx)
+{
+       return ef4_init_lm87(efx, &sfe4002_hwmon_info, sfe4002_lm87_regs);
+}
+
+/*****************************************************************************
+ * Support for the SFN4112F
+ *
+ */
+static u8 sfn4112f_lm87_channel = 0x03; /* use AIN not FAN inputs */
+
+static const u8 sfn4112f_lm87_regs[] = {
+       LM87_IN_LIMITS(0, 0x7c, 0x99),          /* 2.5V:  1.8V +/- 10% */
+       LM87_IN_LIMITS(1, 0x4c, 0x5e),          /* Vccp1: 1.2V +/- 10% */
+       LM87_IN_LIMITS(2, 0xac, 0xd4),          /* 3.3V:  3.3V +/- 10% */
+       LM87_IN_LIMITS(4, 0xac, 0xe0),          /* 12V:   10.8-14V */
+       LM87_IN_LIMITS(5, 0x3f, 0x4f),          /* Vccp2: 1.0V +/- 10% */
+       LM87_AIN_LIMITS(1, 0x8a, 0xa9),         /* AIN2:  1.5V +/- 10% */
+       LM87_TEMP_INT_LIMITS(0, 60 + FALCON_BOARD_TEMP_BIAS),
+       LM87_TEMP_EXT1_LIMITS(0, FALCON_JUNC_TEMP_MAX),
+       0
+};
+
+static const struct i2c_board_info sfn4112f_hwmon_info = {
+       I2C_BOARD_INFO("lm87", 0x2e),
+       .platform_data  = &sfn4112f_lm87_channel,
+};
+
+#define SFN4112F_ACT_LED       0
+#define SFN4112F_LINK_LED      1
+
+static void sfn4112f_init_phy(struct ef4_nic *efx)
+{
+       falcon_qt202x_set_led(efx, SFN4112F_ACT_LED,
+                             QUAKE_LED_RXLINK | QUAKE_LED_LINK_ACT);
+       falcon_qt202x_set_led(efx, SFN4112F_LINK_LED,
+                             QUAKE_LED_RXLINK | QUAKE_LED_LINK_STAT);
+}
+
+static void sfn4112f_set_id_led(struct ef4_nic *efx, enum ef4_led_mode mode)
+{
+       int reg;
+
+       switch (mode) {
+       case EF4_LED_OFF:
+               reg = QUAKE_LED_OFF;
+               break;
+       case EF4_LED_ON:
+               reg = QUAKE_LED_ON;
+               break;
+       default:
+               reg = QUAKE_LED_RXLINK | QUAKE_LED_LINK_STAT;
+               break;
+       }
+
+       falcon_qt202x_set_led(efx, SFN4112F_LINK_LED, reg);
+}
+
+static int sfn4112f_check_hw(struct ef4_nic *efx)
+{
+       /* Mask out unused sensors */
+       return ef4_check_lm87(efx, ~0x48);
+}
+
+static int sfn4112f_init(struct ef4_nic *efx)
+{
+       return ef4_init_lm87(efx, &sfn4112f_hwmon_info, sfn4112f_lm87_regs);
+}
+
+/*****************************************************************************
+ * Support for the SFE4003
+ *
+ */
+static u8 sfe4003_lm87_channel = 0x03; /* use AIN not FAN inputs */
+
+static const u8 sfe4003_lm87_regs[] = {
+       LM87_IN_LIMITS(0, 0x67, 0x7f),          /* 2.5V:  1.5V +/- 10% */
+       LM87_IN_LIMITS(1, 0x4c, 0x5e),          /* Vccp1: 1.2V +/- 10% */
+       LM87_IN_LIMITS(2, 0xac, 0xd4),          /* 3.3V:  3.3V +/- 10% */
+       LM87_IN_LIMITS(4, 0xac, 0xe0),          /* 12V:   10.8-14V */
+       LM87_IN_LIMITS(5, 0x3f, 0x4f),          /* Vccp2: 1.0V +/- 10% */
+       LM87_TEMP_INT_LIMITS(0, 70 + FALCON_BOARD_TEMP_BIAS),
+       0
+};
+
+static const struct i2c_board_info sfe4003_hwmon_info = {
+       I2C_BOARD_INFO("lm87", 0x2e),
+       .platform_data  = &sfe4003_lm87_channel,
+};
+
+/* Board-specific LED info. */
+#define SFE4003_RED_LED_GPIO   11
+#define SFE4003_LED_ON         1
+#define SFE4003_LED_OFF                0
+
+static void sfe4003_set_id_led(struct ef4_nic *efx, enum ef4_led_mode mode)
+{
+       struct falcon_board *board = falcon_board(efx);
+
+       /* The LEDs were not wired to GPIOs before A3 */
+       if (board->minor < 3 && board->major == 0)
+               return;
+
+       falcon_txc_set_gpio_val(
+               efx, SFE4003_RED_LED_GPIO,
+               (mode == EF4_LED_ON) ? SFE4003_LED_ON : SFE4003_LED_OFF);
+}
+
+static void sfe4003_init_phy(struct ef4_nic *efx)
+{
+       struct falcon_board *board = falcon_board(efx);
+
+       /* The LEDs were not wired to GPIOs before A3 */
+       if (board->minor < 3 && board->major == 0)
+               return;
+
+       falcon_txc_set_gpio_dir(efx, SFE4003_RED_LED_GPIO, TXC_GPIO_DIR_OUTPUT);
+       falcon_txc_set_gpio_val(efx, SFE4003_RED_LED_GPIO, SFE4003_LED_OFF);
+}
+
+static int sfe4003_check_hw(struct ef4_nic *efx)
+{
+       struct falcon_board *board = falcon_board(efx);
+
+       /* A0/A1/A2 board rev. 4003s  report a temperature fault the whole time
+        * (bad sensor) so we mask it out. */
+       unsigned alarm_mask =
+               (board->major == 0 && board->minor <= 2) ?
+               ~LM87_ALARM_TEMP_EXT1 : ~0;
+
+       return ef4_check_lm87(efx, alarm_mask);
+}
+
+static int sfe4003_init(struct ef4_nic *efx)
+{
+       return ef4_init_lm87(efx, &sfe4003_hwmon_info, sfe4003_lm87_regs);
+}
+
+static const struct falcon_board_type board_types[] = {
+       {
+               .id             = FALCON_BOARD_SFE4001,
+               .init           = sfe4001_init,
+               .init_phy       = ef4_port_dummy_op_void,
+               .fini           = sfe4001_fini,
+               .set_id_led     = tenxpress_set_id_led,
+               .monitor        = sfe4001_check_hw,
+       },
+       {
+               .id             = FALCON_BOARD_SFE4002,
+               .init           = sfe4002_init,
+               .init_phy       = sfe4002_init_phy,
+               .fini           = ef4_fini_lm87,
+               .set_id_led     = sfe4002_set_id_led,
+               .monitor        = sfe4002_check_hw,
+       },
+       {
+               .id             = FALCON_BOARD_SFE4003,
+               .init           = sfe4003_init,
+               .init_phy       = sfe4003_init_phy,
+               .fini           = ef4_fini_lm87,
+               .set_id_led     = sfe4003_set_id_led,
+               .monitor        = sfe4003_check_hw,
+       },
+       {
+               .id             = FALCON_BOARD_SFN4112F,
+               .init           = sfn4112f_init,
+               .init_phy       = sfn4112f_init_phy,
+               .fini           = ef4_fini_lm87,
+               .set_id_led     = sfn4112f_set_id_led,
+               .monitor        = sfn4112f_check_hw,
+       },
+};
+
+int falcon_probe_board(struct ef4_nic *efx, u16 revision_info)
+{
+       struct falcon_board *board = falcon_board(efx);
+       u8 type_id = FALCON_BOARD_TYPE(revision_info);
+       int i;
+
+       board->major = FALCON_BOARD_MAJOR(revision_info);
+       board->minor = FALCON_BOARD_MINOR(revision_info);
+
+       for (i = 0; i < ARRAY_SIZE(board_types); i++)
+               if (board_types[i].id == type_id)
+                       board->type = &board_types[i];
+
+       if (board->type) {
+               return 0;
+       } else {
+               netif_err(efx, probe, efx->net_dev, "unknown board type %d\n",
+                         type_id);
+               return -ENODEV;
+       }
+}
diff --git a/drivers/net/ethernet/sfc/falcon/farch.c b/drivers/net/ethernet/sfc/falcon/farch.c
new file mode 100644 (file)
index 0000000..05916c7
--- /dev/null
@@ -0,0 +1,2892 @@
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2005-2006 Fen Systems Ltd.
+ * Copyright 2006-2013 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#include <linux/bitops.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/module.h>
+#include <linux/seq_file.h>
+#include <linux/crc32.h>
+#include "net_driver.h"
+#include "bitfield.h"
+#include "efx.h"
+#include "nic.h"
+#include "farch_regs.h"
+#include "io.h"
+#include "workarounds.h"
+
+/* Falcon-architecture (SFC4000) support */
+
+/**************************************************************************
+ *
+ * Configurable values
+ *
+ **************************************************************************
+ */
+
+/* This is set to 16 for a good reason.  In summary, if larger than
+ * 16, the descriptor cache holds more than a default socket
+ * buffer's worth of packets (for UDP we can only have at most one
+ * socket buffer's worth outstanding).  This combined with the fact
+ * that we only get 1 TX event per descriptor cache means the NIC
+ * goes idle.
+ */
+#define TX_DC_ENTRIES 16
+#define TX_DC_ENTRIES_ORDER 1
+
+#define RX_DC_ENTRIES 64
+#define RX_DC_ENTRIES_ORDER 3
+
+/* If EF4_MAX_INT_ERRORS internal errors occur within
+ * EF4_INT_ERROR_EXPIRE seconds, we consider the NIC broken and
+ * disable it.
+ */
+#define EF4_INT_ERROR_EXPIRE 3600
+#define EF4_MAX_INT_ERRORS 5
+
+/* Depth of RX flush request fifo */
+#define EF4_RX_FLUSH_COUNT 4
+
+/* Driver generated events */
+#define _EF4_CHANNEL_MAGIC_TEST                0x000101
+#define _EF4_CHANNEL_MAGIC_FILL                0x000102
+#define _EF4_CHANNEL_MAGIC_RX_DRAIN    0x000103
+#define _EF4_CHANNEL_MAGIC_TX_DRAIN    0x000104
+
+#define _EF4_CHANNEL_MAGIC(_code, _data)       ((_code) << 8 | (_data))
+#define _EF4_CHANNEL_MAGIC_CODE(_magic)                ((_magic) >> 8)
+
+#define EF4_CHANNEL_MAGIC_TEST(_channel)                               \
+       _EF4_CHANNEL_MAGIC(_EF4_CHANNEL_MAGIC_TEST, (_channel)->channel)
+#define EF4_CHANNEL_MAGIC_FILL(_rx_queue)                              \
+       _EF4_CHANNEL_MAGIC(_EF4_CHANNEL_MAGIC_FILL,                     \
+                          ef4_rx_queue_index(_rx_queue))
+#define EF4_CHANNEL_MAGIC_RX_DRAIN(_rx_queue)                          \
+       _EF4_CHANNEL_MAGIC(_EF4_CHANNEL_MAGIC_RX_DRAIN,                 \
+                          ef4_rx_queue_index(_rx_queue))
+#define EF4_CHANNEL_MAGIC_TX_DRAIN(_tx_queue)                          \
+       _EF4_CHANNEL_MAGIC(_EF4_CHANNEL_MAGIC_TX_DRAIN,                 \
+                          (_tx_queue)->queue)
+
+static void ef4_farch_magic_event(struct ef4_channel *channel, u32 magic);
+
+/**************************************************************************
+ *
+ * Hardware access
+ *
+ **************************************************************************/
+
+static inline void ef4_write_buf_tbl(struct ef4_nic *efx, ef4_qword_t *value,
+                                    unsigned int index)
+{
+       ef4_sram_writeq(efx, efx->membase + efx->type->buf_tbl_base,
+                       value, index);
+}
+
+static bool ef4_masked_compare_oword(const ef4_oword_t *a, const ef4_oword_t *b,
+                                    const ef4_oword_t *mask)
+{
+       return ((a->u64[0] ^ b->u64[0]) & mask->u64[0]) ||
+               ((a->u64[1] ^ b->u64[1]) & mask->u64[1]);
+}
+
+int ef4_farch_test_registers(struct ef4_nic *efx,
+                            const struct ef4_farch_register_test *regs,
+                            size_t n_regs)
+{
+       unsigned address = 0;
+       int i, j;
+       ef4_oword_t mask, imask, original, reg, buf;
+
+       for (i = 0; i < n_regs; ++i) {
+               address = regs[i].address;
+               mask = imask = regs[i].mask;
+               EF4_INVERT_OWORD(imask);
+
+               ef4_reado(efx, &original, address);
+
+               /* bit sweep on and off */
+               for (j = 0; j < 128; j++) {
+                       if (!EF4_EXTRACT_OWORD32(mask, j, j))
+                               continue;
+
+                       /* Test this testable bit can be set in isolation */
+                       EF4_AND_OWORD(reg, original, mask);
+                       EF4_SET_OWORD32(reg, j, j, 1);
+
+                       ef4_writeo(efx, &reg, address);
+                       ef4_reado(efx, &buf, address);
+
+                       if (ef4_masked_compare_oword(&reg, &buf, &mask))
+                               goto fail;
+
+                       /* Test this testable bit can be cleared in isolation */
+                       EF4_OR_OWORD(reg, original, mask);
+                       EF4_SET_OWORD32(reg, j, j, 0);
+
+                       ef4_writeo(efx, &reg, address);
+                       ef4_reado(efx, &buf, address);
+
+                       if (ef4_masked_compare_oword(&reg, &buf, &mask))
+                               goto fail;
+               }
+
+               ef4_writeo(efx, &original, address);
+       }
+
+       return 0;
+
+fail:
+       netif_err(efx, hw, efx->net_dev,
+                 "wrote "EF4_OWORD_FMT" read "EF4_OWORD_FMT
+                 " at address 0x%x mask "EF4_OWORD_FMT"\n", EF4_OWORD_VAL(reg),
+                 EF4_OWORD_VAL(buf), address, EF4_OWORD_VAL(mask));
+       return -EIO;
+}
+
+/**************************************************************************
+ *
+ * Special buffer handling
+ * Special buffers are used for event queues and the TX and RX
+ * descriptor rings.
+ *
+ *************************************************************************/
+
+/*
+ * Initialise a special buffer
+ *
+ * This will define a buffer (previously allocated via
+ * ef4_alloc_special_buffer()) in the buffer table, allowing
+ * it to be used for event queues, descriptor rings etc.
+ */
+static void
+ef4_init_special_buffer(struct ef4_nic *efx, struct ef4_special_buffer *buffer)
+{
+       ef4_qword_t buf_desc;
+       unsigned int index;
+       dma_addr_t dma_addr;
+       int i;
+
+       EF4_BUG_ON_PARANOID(!buffer->buf.addr);
+
+       /* Write buffer descriptors to NIC */
+       for (i = 0; i < buffer->entries; i++) {
+               index = buffer->index + i;
+               dma_addr = buffer->buf.dma_addr + (i * EF4_BUF_SIZE);
+               netif_dbg(efx, probe, efx->net_dev,
+                         "mapping special buffer %d at %llx\n",
+                         index, (unsigned long long)dma_addr);
+               EF4_POPULATE_QWORD_3(buf_desc,
+                                    FRF_AZ_BUF_ADR_REGION, 0,
+                                    FRF_AZ_BUF_ADR_FBUF, dma_addr >> 12,
+                                    FRF_AZ_BUF_OWNER_ID_FBUF, 0);
+               ef4_write_buf_tbl(efx, &buf_desc, index);
+       }
+}
+
+/* Unmaps a buffer and clears the buffer table entries */
+static void
+ef4_fini_special_buffer(struct ef4_nic *efx, struct ef4_special_buffer *buffer)
+{
+       ef4_oword_t buf_tbl_upd;
+       unsigned int start = buffer->index;
+       unsigned int end = (buffer->index + buffer->entries - 1);
+
+       if (!buffer->entries)
+               return;
+
+       netif_dbg(efx, hw, efx->net_dev, "unmapping special buffers %d-%d\n",
+                 buffer->index, buffer->index + buffer->entries - 1);
+
+       EF4_POPULATE_OWORD_4(buf_tbl_upd,
+                            FRF_AZ_BUF_UPD_CMD, 0,
+                            FRF_AZ_BUF_CLR_CMD, 1,
+                            FRF_AZ_BUF_CLR_END_ID, end,
+                            FRF_AZ_BUF_CLR_START_ID, start);
+       ef4_writeo(efx, &buf_tbl_upd, FR_AZ_BUF_TBL_UPD);
+}
+
+/*
+ * Allocate a new special buffer
+ *
+ * This allocates memory for a new buffer, clears it and allocates a
+ * new buffer ID range.  It does not write into the buffer table.
+ *
+ * This call will allocate 4KB buffers, since 8KB buffers can't be
+ * used for event queues and descriptor rings.
+ */
+static int ef4_alloc_special_buffer(struct ef4_nic *efx,
+                                   struct ef4_special_buffer *buffer,
+                                   unsigned int len)
+{
+       len = ALIGN(len, EF4_BUF_SIZE);
+
+       if (ef4_nic_alloc_buffer(efx, &buffer->buf, len, GFP_KERNEL))
+               return -ENOMEM;
+       buffer->entries = len / EF4_BUF_SIZE;
+       BUG_ON(buffer->buf.dma_addr & (EF4_BUF_SIZE - 1));
+
+       /* Select new buffer ID */
+       buffer->index = efx->next_buffer_table;
+       efx->next_buffer_table += buffer->entries;
+
+       netif_dbg(efx, probe, efx->net_dev,
+                 "allocating special buffers %d-%d at %llx+%x "
+                 "(virt %p phys %llx)\n", buffer->index,
+                 buffer->index + buffer->entries - 1,
+                 (u64)buffer->buf.dma_addr, len,
+                 buffer->buf.addr, (u64)virt_to_phys(buffer->buf.addr));
+
+       return 0;
+}
+
+static void
+ef4_free_special_buffer(struct ef4_nic *efx, struct ef4_special_buffer *buffer)
+{
+       if (!buffer->buf.addr)
+               return;
+
+       netif_dbg(efx, hw, efx->net_dev,
+                 "deallocating special buffers %d-%d at %llx+%x "
+                 "(virt %p phys %llx)\n", buffer->index,
+                 buffer->index + buffer->entries - 1,
+                 (u64)buffer->buf.dma_addr, buffer->buf.len,
+                 buffer->buf.addr, (u64)virt_to_phys(buffer->buf.addr));
+
+       ef4_nic_free_buffer(efx, &buffer->buf);
+       buffer->entries = 0;
+}
+
+/**************************************************************************
+ *
+ * TX path
+ *
+ **************************************************************************/
+
+/* This writes to the TX_DESC_WPTR; write pointer for TX descriptor ring */
+static inline void ef4_farch_notify_tx_desc(struct ef4_tx_queue *tx_queue)
+{
+       unsigned write_ptr;
+       ef4_dword_t reg;
+
+       write_ptr = tx_queue->write_count & tx_queue->ptr_mask;
+       EF4_POPULATE_DWORD_1(reg, FRF_AZ_TX_DESC_WPTR_DWORD, write_ptr);
+       ef4_writed_page(tx_queue->efx, &reg,
+                       FR_AZ_TX_DESC_UPD_DWORD_P0, tx_queue->queue);
+}
+
+/* Write pointer and first descriptor for TX descriptor ring */
+static inline void ef4_farch_push_tx_desc(struct ef4_tx_queue *tx_queue,
+                                         const ef4_qword_t *txd)
+{
+       unsigned write_ptr;
+       ef4_oword_t reg;
+
+       BUILD_BUG_ON(FRF_AZ_TX_DESC_LBN != 0);
+       BUILD_BUG_ON(FR_AA_TX_DESC_UPD_KER != FR_BZ_TX_DESC_UPD_P0);
+
+       write_ptr = tx_queue->write_count & tx_queue->ptr_mask;
+       EF4_POPULATE_OWORD_2(reg, FRF_AZ_TX_DESC_PUSH_CMD, true,
+                            FRF_AZ_TX_DESC_WPTR, write_ptr);
+       reg.qword[0] = *txd;
+       ef4_writeo_page(tx_queue->efx, &reg,
+                       FR_BZ_TX_DESC_UPD_P0, tx_queue->queue);
+}
+
+
+/* For each entry inserted into the software descriptor ring, create a
+ * descriptor in the hardware TX descriptor ring (in host memory), and
+ * write a doorbell.
+ */
+void ef4_farch_tx_write(struct ef4_tx_queue *tx_queue)
+{
+       struct ef4_tx_buffer *buffer;
+       ef4_qword_t *txd;
+       unsigned write_ptr;
+       unsigned old_write_count = tx_queue->write_count;
+
+       tx_queue->xmit_more_available = false;
+       if (unlikely(tx_queue->write_count == tx_queue->insert_count))
+               return;
+
+       do {
+               write_ptr = tx_queue->write_count & tx_queue->ptr_mask;
+               buffer = &tx_queue->buffer[write_ptr];
+               txd = ef4_tx_desc(tx_queue, write_ptr);
+               ++tx_queue->write_count;
+
+               EF4_BUG_ON_PARANOID(buffer->flags & EF4_TX_BUF_OPTION);
+
+               /* Create TX descriptor ring entry */
+               BUILD_BUG_ON(EF4_TX_BUF_CONT != 1);
+               EF4_POPULATE_QWORD_4(*txd,
+                                    FSF_AZ_TX_KER_CONT,
+                                    buffer->flags & EF4_TX_BUF_CONT,
+                                    FSF_AZ_TX_KER_BYTE_COUNT, buffer->len,
+                                    FSF_AZ_TX_KER_BUF_REGION, 0,
+                                    FSF_AZ_TX_KER_BUF_ADDR, buffer->dma_addr);
+       } while (tx_queue->write_count != tx_queue->insert_count);
+
+       wmb(); /* Ensure descriptors are written before they are fetched */
+
+       if (ef4_nic_may_push_tx_desc(tx_queue, old_write_count)) {
+               txd = ef4_tx_desc(tx_queue,
+                                 old_write_count & tx_queue->ptr_mask);
+               ef4_farch_push_tx_desc(tx_queue, txd);
+               ++tx_queue->pushes;
+       } else {
+               ef4_farch_notify_tx_desc(tx_queue);
+       }
+}
+
+unsigned int ef4_farch_tx_limit_len(struct ef4_tx_queue *tx_queue,
+                                   dma_addr_t dma_addr, unsigned int len)
+{
+       /* Don't cross 4K boundaries with descriptors. */
+       unsigned int limit = (~dma_addr & (EF4_PAGE_SIZE - 1)) + 1;
+
+       len = min(limit, len);
+
+       if (EF4_WORKAROUND_5391(tx_queue->efx) && (dma_addr & 0xf))
+               len = min_t(unsigned int, len, 512 - (dma_addr & 0xf));
+
+       return len;
+}
+
+
+/* Allocate hardware resources for a TX queue */
+int ef4_farch_tx_probe(struct ef4_tx_queue *tx_queue)
+{
+       struct ef4_nic *efx = tx_queue->efx;
+       unsigned entries;
+
+       entries = tx_queue->ptr_mask + 1;
+       return ef4_alloc_special_buffer(efx, &tx_queue->txd,
+                                       entries * sizeof(ef4_qword_t));
+}
+
+void ef4_farch_tx_init(struct ef4_tx_queue *tx_queue)
+{
+       struct ef4_nic *efx = tx_queue->efx;
+       ef4_oword_t reg;
+
+       /* Pin TX descriptor ring */
+       ef4_init_special_buffer(efx, &tx_queue->txd);
+
+       /* Push TX descriptor ring to card */
+       EF4_POPULATE_OWORD_10(reg,
+                             FRF_AZ_TX_DESCQ_EN, 1,
+                             FRF_AZ_TX_ISCSI_DDIG_EN, 0,
+                             FRF_AZ_TX_ISCSI_HDIG_EN, 0,
+                             FRF_AZ_TX_DESCQ_BUF_BASE_ID, tx_queue->txd.index,
+                             FRF_AZ_TX_DESCQ_EVQ_ID,
+                             tx_queue->channel->channel,
+                             FRF_AZ_TX_DESCQ_OWNER_ID, 0,
+                             FRF_AZ_TX_DESCQ_LABEL, tx_queue->queue,
+                             FRF_AZ_TX_DESCQ_SIZE,
+                             __ffs(tx_queue->txd.entries),
+                             FRF_AZ_TX_DESCQ_TYPE, 0,
+                             FRF_BZ_TX_NON_IP_DROP_DIS, 1);
+
+       if (ef4_nic_rev(efx) >= EF4_REV_FALCON_B0) {
+               int csum = tx_queue->queue & EF4_TXQ_TYPE_OFFLOAD;
+               EF4_SET_OWORD_FIELD(reg, FRF_BZ_TX_IP_CHKSM_DIS, !csum);
+               EF4_SET_OWORD_FIELD(reg, FRF_BZ_TX_TCP_CHKSM_DIS,
+                                   !csum);
+       }
+
+       ef4_writeo_table(efx, &reg, efx->type->txd_ptr_tbl_base,
+                        tx_queue->queue);
+
+       if (ef4_nic_rev(efx) < EF4_REV_FALCON_B0) {
+               /* Only 128 bits in this register */
+               BUILD_BUG_ON(EF4_MAX_TX_QUEUES > 128);
+
+               ef4_reado(efx, &reg, FR_AA_TX_CHKSM_CFG);
+               if (tx_queue->queue & EF4_TXQ_TYPE_OFFLOAD)
+                       __clear_bit_le(tx_queue->queue, &reg);
+               else
+                       __set_bit_le(tx_queue->queue, &reg);
+               ef4_writeo(efx, &reg, FR_AA_TX_CHKSM_CFG);
+       }
+
+       if (ef4_nic_rev(efx) >= EF4_REV_FALCON_B0) {
+               EF4_POPULATE_OWORD_1(reg,
+                                    FRF_BZ_TX_PACE,
+                                    (tx_queue->queue & EF4_TXQ_TYPE_HIGHPRI) ?
+                                    FFE_BZ_TX_PACE_OFF :
+                                    FFE_BZ_TX_PACE_RESERVED);
+               ef4_writeo_table(efx, &reg, FR_BZ_TX_PACE_TBL,
+                                tx_queue->queue);
+       }
+}
+
+static void ef4_farch_flush_tx_queue(struct ef4_tx_queue *tx_queue)
+{
+       struct ef4_nic *efx = tx_queue->efx;
+       ef4_oword_t tx_flush_descq;
+
+       WARN_ON(atomic_read(&tx_queue->flush_outstanding));
+       atomic_set(&tx_queue->flush_outstanding, 1);
+
+       EF4_POPULATE_OWORD_2(tx_flush_descq,
+                            FRF_AZ_TX_FLUSH_DESCQ_CMD, 1,
+                            FRF_AZ_TX_FLUSH_DESCQ, tx_queue->queue);
+       ef4_writeo(efx, &tx_flush_descq, FR_AZ_TX_FLUSH_DESCQ);
+}
+
+void ef4_farch_tx_fini(struct ef4_tx_queue *tx_queue)
+{
+       struct ef4_nic *efx = tx_queue->efx;
+       ef4_oword_t tx_desc_ptr;
+
+       /* Remove TX descriptor ring from card */
+       EF4_ZERO_OWORD(tx_desc_ptr);
+       ef4_writeo_table(efx, &tx_desc_ptr, efx->type->txd_ptr_tbl_base,
+                        tx_queue->queue);
+
+       /* Unpin TX descriptor ring */
+       ef4_fini_special_buffer(efx, &tx_queue->txd);
+}
+
+/* Free buffers backing TX queue */
+void ef4_farch_tx_remove(struct ef4_tx_queue *tx_queue)
+{
+       ef4_free_special_buffer(tx_queue->efx, &tx_queue->txd);
+}
+
+/**************************************************************************
+ *
+ * RX path
+ *
+ **************************************************************************/
+
+/* This creates an entry in the RX descriptor queue */
+static inline void
+ef4_farch_build_rx_desc(struct ef4_rx_queue *rx_queue, unsigned index)
+{
+       struct ef4_rx_buffer *rx_buf;
+       ef4_qword_t *rxd;
+
+       rxd = ef4_rx_desc(rx_queue, index);
+       rx_buf = ef4_rx_buffer(rx_queue, index);
+       EF4_POPULATE_QWORD_3(*rxd,
+                            FSF_AZ_RX_KER_BUF_SIZE,
+                            rx_buf->len -
+                            rx_queue->efx->type->rx_buffer_padding,
+                            FSF_AZ_RX_KER_BUF_REGION, 0,
+                            FSF_AZ_RX_KER_BUF_ADDR, rx_buf->dma_addr);
+}
+
+/* This writes to the RX_DESC_WPTR register for the specified receive
+ * descriptor ring.
+ */
+void ef4_farch_rx_write(struct ef4_rx_queue *rx_queue)
+{
+       struct ef4_nic *efx = rx_queue->efx;
+       ef4_dword_t reg;
+       unsigned write_ptr;
+
+       while (rx_queue->notified_count != rx_queue->added_count) {
+               ef4_farch_build_rx_desc(
+                       rx_queue,
+                       rx_queue->notified_count & rx_queue->ptr_mask);
+               ++rx_queue->notified_count;
+       }
+
+       wmb();
+       write_ptr = rx_queue->added_count & rx_queue->ptr_mask;
+       EF4_POPULATE_DWORD_1(reg, FRF_AZ_RX_DESC_WPTR_DWORD, write_ptr);
+       ef4_writed_page(efx, &reg, FR_AZ_RX_DESC_UPD_DWORD_P0,
+                       ef4_rx_queue_index(rx_queue));
+}
+
+int ef4_farch_rx_probe(struct ef4_rx_queue *rx_queue)
+{
+       struct ef4_nic *efx = rx_queue->efx;
+       unsigned entries;
+
+       entries = rx_queue->ptr_mask + 1;
+       return ef4_alloc_special_buffer(efx, &rx_queue->rxd,
+                                       entries * sizeof(ef4_qword_t));
+}
+
+void ef4_farch_rx_init(struct ef4_rx_queue *rx_queue)
+{
+       ef4_oword_t rx_desc_ptr;
+       struct ef4_nic *efx = rx_queue->efx;
+       bool is_b0 = ef4_nic_rev(efx) >= EF4_REV_FALCON_B0;
+       bool iscsi_digest_en = is_b0;
+       bool jumbo_en;
+
+       /* For kernel-mode queues in Falcon A1, the JUMBO flag enables
+        * DMA to continue after a PCIe page boundary (and scattering
+        * is not possible).  In Falcon B0 and Siena, it enables
+        * scatter.
+        */
+       jumbo_en = !is_b0 || efx->rx_scatter;
+
+       netif_dbg(efx, hw, efx->net_dev,
+                 "RX queue %d ring in special buffers %d-%d\n",
+                 ef4_rx_queue_index(rx_queue), rx_queue->rxd.index,
+                 rx_queue->rxd.index + rx_queue->rxd.entries - 1);
+
+       rx_queue->scatter_n = 0;
+
+       /* Pin RX descriptor ring */
+       ef4_init_special_buffer(efx, &rx_queue->rxd);
+
+       /* Push RX descriptor ring to card */
+       EF4_POPULATE_OWORD_10(rx_desc_ptr,
+                             FRF_AZ_RX_ISCSI_DDIG_EN, iscsi_digest_en,
+                             FRF_AZ_RX_ISCSI_HDIG_EN, iscsi_digest_en,
+                             FRF_AZ_RX_DESCQ_BUF_BASE_ID, rx_queue->rxd.index,
+                             FRF_AZ_RX_DESCQ_EVQ_ID,
+                             ef4_rx_queue_channel(rx_queue)->channel,
+                             FRF_AZ_RX_DESCQ_OWNER_ID, 0,
+                             FRF_AZ_RX_DESCQ_LABEL,
+                             ef4_rx_queue_index(rx_queue),
+                             FRF_AZ_RX_DESCQ_SIZE,
+                             __ffs(rx_queue->rxd.entries),
+                             FRF_AZ_RX_DESCQ_TYPE, 0 /* kernel queue */ ,
+                             FRF_AZ_RX_DESCQ_JUMBO, jumbo_en,
+                             FRF_AZ_RX_DESCQ_EN, 1);
+       ef4_writeo_table(efx, &rx_desc_ptr, efx->type->rxd_ptr_tbl_base,
+                        ef4_rx_queue_index(rx_queue));
+}
+
+static void ef4_farch_flush_rx_queue(struct ef4_rx_queue *rx_queue)
+{
+       struct ef4_nic *efx = rx_queue->efx;
+       ef4_oword_t rx_flush_descq;
+
+       EF4_POPULATE_OWORD_2(rx_flush_descq,
+                            FRF_AZ_RX_FLUSH_DESCQ_CMD, 1,
+                            FRF_AZ_RX_FLUSH_DESCQ,
+                            ef4_rx_queue_index(rx_queue));
+       ef4_writeo(efx, &rx_flush_descq, FR_AZ_RX_FLUSH_DESCQ);
+}
+
+void ef4_farch_rx_fini(struct ef4_rx_queue *rx_queue)
+{
+       ef4_oword_t rx_desc_ptr;
+       struct ef4_nic *efx = rx_queue->efx;
+
+       /* Remove RX descriptor ring from card */
+       EF4_ZERO_OWORD(rx_desc_ptr);
+       ef4_writeo_table(efx, &rx_desc_ptr, efx->type->rxd_ptr_tbl_base,
+                        ef4_rx_queue_index(rx_queue));
+
+       /* Unpin RX descriptor ring */
+       ef4_fini_special_buffer(efx, &rx_queue->rxd);
+}
+
+/* Free buffers backing RX queue */
+void ef4_farch_rx_remove(struct ef4_rx_queue *rx_queue)
+{
+       ef4_free_special_buffer(rx_queue->efx, &rx_queue->rxd);
+}
+
+/**************************************************************************
+ *
+ * Flush handling
+ *
+ **************************************************************************/
+
+/* ef4_farch_flush_queues() must be woken up when all flushes are completed,
+ * or more RX flushes can be kicked off.
+ */
+static bool ef4_farch_flush_wake(struct ef4_nic *efx)
+{
+       /* Ensure that all updates are visible to ef4_farch_flush_queues() */
+       smp_mb();
+
+       return (atomic_read(&efx->active_queues) == 0 ||
+               (atomic_read(&efx->rxq_flush_outstanding) < EF4_RX_FLUSH_COUNT
+                && atomic_read(&efx->rxq_flush_pending) > 0));
+}
+
+static bool ef4_check_tx_flush_complete(struct ef4_nic *efx)
+{
+       bool i = true;
+       ef4_oword_t txd_ptr_tbl;
+       struct ef4_channel *channel;
+       struct ef4_tx_queue *tx_queue;
+
+       ef4_for_each_channel(channel, efx) {
+               ef4_for_each_channel_tx_queue(tx_queue, channel) {
+                       ef4_reado_table(efx, &txd_ptr_tbl,
+                                       FR_BZ_TX_DESC_PTR_TBL, tx_queue->queue);
+                       if (EF4_OWORD_FIELD(txd_ptr_tbl,
+                                           FRF_AZ_TX_DESCQ_FLUSH) ||
+                           EF4_OWORD_FIELD(txd_ptr_tbl,
+                                           FRF_AZ_TX_DESCQ_EN)) {
+                               netif_dbg(efx, hw, efx->net_dev,
+                                         "flush did not complete on TXQ %d\n",
+                                         tx_queue->queue);
+                               i = false;
+                       } else if (atomic_cmpxchg(&tx_queue->flush_outstanding,
+                                                 1, 0)) {
+                               /* The flush is complete, but we didn't
+                                * receive a flush completion event
+                                */
+                               netif_dbg(efx, hw, efx->net_dev,
+                                         "flush complete on TXQ %d, so drain "
+                                         "the queue\n", tx_queue->queue);
+                               /* Don't need to increment active_queues as it
+                                * has already been incremented for the queues
+                                * which did not drain
+                                */
+                               ef4_farch_magic_event(channel,
+                                                     EF4_CHANNEL_MAGIC_TX_DRAIN(
+                                                             tx_queue));
+                       }
+               }
+       }
+
+       return i;
+}
+
+/* Flush all the transmit queues, and continue flushing receive queues until
+ * they're all flushed. Wait for the DRAIN events to be received so that there
+ * are no more RX and TX events left on any channel. */
+static int ef4_farch_do_flush(struct ef4_nic *efx)
+{
+       unsigned timeout = msecs_to_jiffies(5000); /* 5s for all flushes and drains */
+       struct ef4_channel *channel;
+       struct ef4_rx_queue *rx_queue;
+       struct ef4_tx_queue *tx_queue;
+       int rc = 0;
+
+       ef4_for_each_channel(channel, efx) {
+               ef4_for_each_channel_tx_queue(tx_queue, channel) {
+                       ef4_farch_flush_tx_queue(tx_queue);
+               }
+               ef4_for_each_channel_rx_queue(rx_queue, channel) {
+                       rx_queue->flush_pending = true;
+                       atomic_inc(&efx->rxq_flush_pending);
+               }
+       }
+
+       while (timeout && atomic_read(&efx->active_queues) > 0) {
+               /* The hardware supports four concurrent rx flushes, each of
+                * which may need to be retried if there is an outstanding
+                * descriptor fetch
+                */
+               ef4_for_each_channel(channel, efx) {
+                       ef4_for_each_channel_rx_queue(rx_queue, channel) {
+                               if (atomic_read(&efx->rxq_flush_outstanding) >=
+                                   EF4_RX_FLUSH_COUNT)
+                                       break;
+
+                               if (rx_queue->flush_pending) {
+                                       rx_queue->flush_pending = false;
+                                       atomic_dec(&efx->rxq_flush_pending);
+                                       atomic_inc(&efx->rxq_flush_outstanding);
+                                       ef4_farch_flush_rx_queue(rx_queue);
+                               }
+                       }
+               }
+
+               timeout = wait_event_timeout(efx->flush_wq,
+                                            ef4_farch_flush_wake(efx),
+                                            timeout);
+       }
+
+       if (atomic_read(&efx->active_queues) &&
+           !ef4_check_tx_flush_complete(efx)) {
+               netif_err(efx, hw, efx->net_dev, "failed to flush %d queues "
+                         "(rx %d+%d)\n", atomic_read(&efx->active_queues),
+                         atomic_read(&efx->rxq_flush_outstanding),
+                         atomic_read(&efx->rxq_flush_pending));
+               rc = -ETIMEDOUT;
+
+               atomic_set(&efx->active_queues, 0);
+               atomic_set(&efx->rxq_flush_pending, 0);
+               atomic_set(&efx->rxq_flush_outstanding, 0);
+       }
+
+       return rc;
+}
+
+int ef4_farch_fini_dmaq(struct ef4_nic *efx)
+{
+       struct ef4_channel *channel;
+       struct ef4_tx_queue *tx_queue;
+       struct ef4_rx_queue *rx_queue;
+       int rc = 0;
+
+       /* Do not attempt to write to the NIC during EEH recovery */
+       if (efx->state != STATE_RECOVERY) {
+               /* Only perform flush if DMA is enabled */
+               if (efx->pci_dev->is_busmaster) {
+                       efx->type->prepare_flush(efx);
+                       rc = ef4_farch_do_flush(efx);
+                       efx->type->finish_flush(efx);
+               }
+
+               ef4_for_each_channel(channel, efx) {
+                       ef4_for_each_channel_rx_queue(rx_queue, channel)
+                               ef4_farch_rx_fini(rx_queue);
+                       ef4_for_each_channel_tx_queue(tx_queue, channel)
+                               ef4_farch_tx_fini(tx_queue);
+               }
+       }
+
+       return rc;
+}
+
+/* Reset queue and flush accounting after FLR
+ *
+ * One possible cause of FLR recovery is that DMA may be failing (eg. if bus
+ * mastering was disabled), in which case we don't receive (RXQ) flush
+ * completion events.  This means that efx->rxq_flush_outstanding remained at 4
+ * after the FLR; also, efx->active_queues was non-zero (as no flush completion
+ * events were received, and we didn't go through ef4_check_tx_flush_complete())
+ * If we don't fix this up, on the next call to ef4_realloc_channels() we won't
+ * flush any RX queues because efx->rxq_flush_outstanding is at the limit of 4
+ * for batched flush requests; and the efx->active_queues gets messed up because
+ * we keep incrementing for the newly initialised queues, but it never went to
+ * zero previously.  Then we get a timeout every time we try to restart the
+ * queues, as it doesn't go back to zero when we should be flushing the queues.
+ */
+void ef4_farch_finish_flr(struct ef4_nic *efx)
+{
+       atomic_set(&efx->rxq_flush_pending, 0);
+       atomic_set(&efx->rxq_flush_outstanding, 0);
+       atomic_set(&efx->active_queues, 0);
+}
+
+
+/**************************************************************************
+ *
+ * Event queue processing
+ * Event queues are processed by per-channel tasklets.
+ *
+ **************************************************************************/
+
+/* Update a channel's event queue's read pointer (RPTR) register
+ *
+ * This writes the EVQ_RPTR_REG register for the specified channel's
+ * event queue.
+ */
+void ef4_farch_ev_read_ack(struct ef4_channel *channel)
+{
+       ef4_dword_t reg;
+       struct ef4_nic *efx = channel->efx;
+
+       EF4_POPULATE_DWORD_1(reg, FRF_AZ_EVQ_RPTR,
+                            channel->eventq_read_ptr & channel->eventq_mask);
+
+       /* For Falcon A1, EVQ_RPTR_KER is documented as having a step size
+        * of 4 bytes, but it is really 16 bytes just like later revisions.
+        */
+       ef4_writed(efx, &reg,
+                  efx->type->evq_rptr_tbl_base +
+                  FR_BZ_EVQ_RPTR_STEP * channel->channel);
+}
+
+/* Use HW to insert a SW defined event */
+void ef4_farch_generate_event(struct ef4_nic *efx, unsigned int evq,
+                             ef4_qword_t *event)
+{
+       ef4_oword_t drv_ev_reg;
+
+       BUILD_BUG_ON(FRF_AZ_DRV_EV_DATA_LBN != 0 ||
+                    FRF_AZ_DRV_EV_DATA_WIDTH != 64);
+       drv_ev_reg.u32[0] = event->u32[0];
+       drv_ev_reg.u32[1] = event->u32[1];
+       drv_ev_reg.u32[2] = 0;
+       drv_ev_reg.u32[3] = 0;
+       EF4_SET_OWORD_FIELD(drv_ev_reg, FRF_AZ_DRV_EV_QID, evq);
+       ef4_writeo(efx, &drv_ev_reg, FR_AZ_DRV_EV);
+}
+
+static void ef4_farch_magic_event(struct ef4_channel *channel, u32 magic)
+{
+       ef4_qword_t event;
+
+       EF4_POPULATE_QWORD_2(event, FSF_AZ_EV_CODE,
+                            FSE_AZ_EV_CODE_DRV_GEN_EV,
+                            FSF_AZ_DRV_GEN_EV_MAGIC, magic);
+       ef4_farch_generate_event(channel->efx, channel->channel, &event);
+}
+
+/* Handle a transmit completion event
+ *
+ * The NIC batches TX completion events; the message we receive is of
+ * the form "complete all TX events up to this index".
+ */
+static int
+ef4_farch_handle_tx_event(struct ef4_channel *channel, ef4_qword_t *event)
+{
+       unsigned int tx_ev_desc_ptr;
+       unsigned int tx_ev_q_label;
+       struct ef4_tx_queue *tx_queue;
+       struct ef4_nic *efx = channel->efx;
+       int tx_packets = 0;
+
+       if (unlikely(ACCESS_ONCE(efx->reset_pending)))
+               return 0;
+
+       if (likely(EF4_QWORD_FIELD(*event, FSF_AZ_TX_EV_COMP))) {
+               /* Transmit completion */
+               tx_ev_desc_ptr = EF4_QWORD_FIELD(*event, FSF_AZ_TX_EV_DESC_PTR);
+               tx_ev_q_label = EF4_QWORD_FIELD(*event, FSF_AZ_TX_EV_Q_LABEL);
+               tx_queue = ef4_channel_get_tx_queue(
+                       channel, tx_ev_q_label % EF4_TXQ_TYPES);
+               tx_packets = ((tx_ev_desc_ptr - tx_queue->read_count) &
+                             tx_queue->ptr_mask);
+               ef4_xmit_done(tx_queue, tx_ev_desc_ptr);
+       } else if (EF4_QWORD_FIELD(*event, FSF_AZ_TX_EV_WQ_FF_FULL)) {
+               /* Rewrite the FIFO write pointer */
+               tx_ev_q_label = EF4_QWORD_FIELD(*event, FSF_AZ_TX_EV_Q_LABEL);
+               tx_queue = ef4_channel_get_tx_queue(
+                       channel, tx_ev_q_label % EF4_TXQ_TYPES);
+
+               netif_tx_lock(efx->net_dev);
+               ef4_farch_notify_tx_desc(tx_queue);
+               netif_tx_unlock(efx->net_dev);
+       } else if (EF4_QWORD_FIELD(*event, FSF_AZ_TX_EV_PKT_ERR)) {
+               ef4_schedule_reset(efx, RESET_TYPE_DMA_ERROR);
+       } else {
+               netif_err(efx, tx_err, efx->net_dev,
+                         "channel %d unexpected TX event "
+                         EF4_QWORD_FMT"\n", channel->channel,
+                         EF4_QWORD_VAL(*event));
+       }
+
+       return tx_packets;
+}
+
+/* Detect errors included in the rx_evt_pkt_ok bit. */
+static u16 ef4_farch_handle_rx_not_ok(struct ef4_rx_queue *rx_queue,
+                                     const ef4_qword_t *event)
+{
+       struct ef4_channel *channel = ef4_rx_queue_channel(rx_queue);
+       struct ef4_nic *efx = rx_queue->efx;
+       bool rx_ev_buf_owner_id_err, rx_ev_ip_hdr_chksum_err;
+       bool rx_ev_tcp_udp_chksum_err, rx_ev_eth_crc_err;
+       bool rx_ev_frm_trunc, rx_ev_drib_nib, rx_ev_tobe_disc;
+       bool rx_ev_other_err, rx_ev_pause_frm;
+       bool rx_ev_hdr_type, rx_ev_mcast_pkt;
+       unsigned rx_ev_pkt_type;
+
+       rx_ev_hdr_type = EF4_QWORD_FIELD(*event, FSF_AZ_RX_EV_HDR_TYPE);
+       rx_ev_mcast_pkt = EF4_QWORD_FIELD(*event, FSF_AZ_RX_EV_MCAST_PKT);
+       rx_ev_tobe_disc = EF4_QWORD_FIELD(*event, FSF_AZ_RX_EV_TOBE_DISC);
+       rx_ev_pkt_type = EF4_QWORD_FIELD(*event, FSF_AZ_RX_EV_PKT_TYPE);
+       rx_ev_buf_owner_id_err = EF4_QWORD_FIELD(*event,
+                                                FSF_AZ_RX_EV_BUF_OWNER_ID_ERR);
+       rx_ev_ip_hdr_chksum_err = EF4_QWORD_FIELD(*event,
+                                                 FSF_AZ_RX_EV_IP_HDR_CHKSUM_ERR);
+       rx_ev_tcp_udp_chksum_err = EF4_QWORD_FIELD(*event,
+                                                  FSF_AZ_RX_EV_TCP_UDP_CHKSUM_ERR);
+       rx_ev_eth_crc_err = EF4_QWORD_FIELD(*event, FSF_AZ_RX_EV_ETH_CRC_ERR);
+       rx_ev_frm_trunc = EF4_QWORD_FIELD(*event, FSF_AZ_RX_EV_FRM_TRUNC);
+       rx_ev_drib_nib = ((ef4_nic_rev(efx) >= EF4_REV_FALCON_B0) ?
+                         0 : EF4_QWORD_FIELD(*event, FSF_AA_RX_EV_DRIB_NIB));
+       rx_ev_pause_frm = EF4_QWORD_FIELD(*event, FSF_AZ_RX_EV_PAUSE_FRM_ERR);
+
+       /* Every error apart from tobe_disc and pause_frm */
+       rx_ev_other_err = (rx_ev_drib_nib | rx_ev_tcp_udp_chksum_err |
+                          rx_ev_buf_owner_id_err | rx_ev_eth_crc_err |
+                          rx_ev_frm_trunc | rx_ev_ip_hdr_chksum_err);
+
+       /* Count errors that are not in MAC stats.  Ignore expected
+        * checksum errors during self-test. */
+       if (rx_ev_frm_trunc)
+               ++channel->n_rx_frm_trunc;
+       else if (rx_ev_tobe_disc)
+               ++channel->n_rx_tobe_disc;
+       else if (!efx->loopback_selftest) {
+               if (rx_ev_ip_hdr_chksum_err)
+                       ++channel->n_rx_ip_hdr_chksum_err;
+               else if (rx_ev_tcp_udp_chksum_err)
+                       ++channel->n_rx_tcp_udp_chksum_err;
+       }
+
+       /* TOBE_DISC is expected on unicast mismatches; don't print out an
+        * error message.  FRM_TRUNC indicates RXDP dropped the packet due
+        * to a FIFO overflow.
+        */
+#ifdef DEBUG
+       if (rx_ev_other_err && net_ratelimit()) {
+               netif_dbg(efx, rx_err, efx->net_dev,
+                         " RX queue %d unexpected RX event "
+                         EF4_QWORD_FMT "%s%s%s%s%s%s%s%s\n",
+                         ef4_rx_queue_index(rx_queue), EF4_QWORD_VAL(*event),
+                         rx_ev_buf_owner_id_err ? " [OWNER_ID_ERR]" : "",
+                         rx_ev_ip_hdr_chksum_err ?
+                         " [IP_HDR_CHKSUM_ERR]" : "",
+                         rx_ev_tcp_udp_chksum_err ?
+                         " [TCP_UDP_CHKSUM_ERR]" : "",
+                         rx_ev_eth_crc_err ? " [ETH_CRC_ERR]" : "",
+                         rx_ev_frm_trunc ? " [FRM_TRUNC]" : "",
+                         rx_ev_drib_nib ? " [DRIB_NIB]" : "",
+                         rx_ev_tobe_disc ? " [TOBE_DISC]" : "",
+                         rx_ev_pause_frm ? " [PAUSE]" : "");
+       }
+#endif
+
+       /* The frame must be discarded if any of these are true. */
+       return (rx_ev_eth_crc_err | rx_ev_frm_trunc | rx_ev_drib_nib |
+               rx_ev_tobe_disc | rx_ev_pause_frm) ?
+               EF4_RX_PKT_DISCARD : 0;
+}
+
+/* Handle receive events that are not in-order. Return true if this
+ * can be handled as a partial packet discard, false if it's more
+ * serious.
+ */
+static bool
+ef4_farch_handle_rx_bad_index(struct ef4_rx_queue *rx_queue, unsigned index)
+{
+       struct ef4_channel *channel = ef4_rx_queue_channel(rx_queue);
+       struct ef4_nic *efx = rx_queue->efx;
+       unsigned expected, dropped;
+
+       if (rx_queue->scatter_n &&
+           index == ((rx_queue->removed_count + rx_queue->scatter_n - 1) &
+                     rx_queue->ptr_mask)) {
+               ++channel->n_rx_nodesc_trunc;
+               return true;
+       }
+
+       expected = rx_queue->removed_count & rx_queue->ptr_mask;
+       dropped = (index - expected) & rx_queue->ptr_mask;
+       netif_info(efx, rx_err, efx->net_dev,
+                  "dropped %d events (index=%d expected=%d)\n",
+                  dropped, index, expected);
+
+       ef4_schedule_reset(efx, EF4_WORKAROUND_5676(efx) ?
+                          RESET_TYPE_RX_RECOVERY : RESET_TYPE_DISABLE);
+       return false;
+}
+
+/* Handle a packet received event
+ *
+ * The NIC gives a "discard" flag if it's a unicast packet with the
+ * wrong destination address
+ * Also "is multicast" and "matches multicast filter" flags can be used to
+ * discard non-matching multicast packets.
+ */
+static void
+ef4_farch_handle_rx_event(struct ef4_channel *channel, const ef4_qword_t *event)
+{
+       unsigned int rx_ev_desc_ptr, rx_ev_byte_cnt;
+       unsigned int rx_ev_hdr_type, rx_ev_mcast_pkt;
+       unsigned expected_ptr;
+       bool rx_ev_pkt_ok, rx_ev_sop, rx_ev_cont;
+       u16 flags;
+       struct ef4_rx_queue *rx_queue;
+       struct ef4_nic *efx = channel->efx;
+
+       if (unlikely(ACCESS_ONCE(efx->reset_pending)))
+               return;
+
+       rx_ev_cont = EF4_QWORD_FIELD(*event, FSF_AZ_RX_EV_JUMBO_CONT);
+       rx_ev_sop = EF4_QWORD_FIELD(*event, FSF_AZ_RX_EV_SOP);
+       WARN_ON(EF4_QWORD_FIELD(*event, FSF_AZ_RX_EV_Q_LABEL) !=
+               channel->channel);
+
+       rx_queue = ef4_channel_get_rx_queue(channel);
+
+       rx_ev_desc_ptr = EF4_QWORD_FIELD(*event, FSF_AZ_RX_EV_DESC_PTR);
+       expected_ptr = ((rx_queue->removed_count + rx_queue->scatter_n) &
+                       rx_queue->ptr_mask);
+
+       /* Check for partial drops and other errors */
+       if (unlikely(rx_ev_desc_ptr != expected_ptr) ||
+           unlikely(rx_ev_sop != (rx_queue->scatter_n == 0))) {
+               if (rx_ev_desc_ptr != expected_ptr &&
+                   !ef4_farch_handle_rx_bad_index(rx_queue, rx_ev_desc_ptr))
+                       return;
+
+               /* Discard all pending fragments */
+               if (rx_queue->scatter_n) {
+                       ef4_rx_packet(
+                               rx_queue,
+                               rx_queue->removed_count & rx_queue->ptr_mask,
+                               rx_queue->scatter_n, 0, EF4_RX_PKT_DISCARD);
+                       rx_queue->removed_count += rx_queue->scatter_n;
+                       rx_queue->scatter_n = 0;
+               }
+
+               /* Return if there is no new fragment */
+               if (rx_ev_desc_ptr != expected_ptr)
+                       return;
+
+               /* Discard new fragment if not SOP */
+               if (!rx_ev_sop) {
+                       ef4_rx_packet(
+                               rx_queue,
+                               rx_queue->removed_count & rx_queue->ptr_mask,
+                               1, 0, EF4_RX_PKT_DISCARD);
+                       ++rx_queue->removed_count;
+                       return;
+               }
+       }
+
+       ++rx_queue->scatter_n;
+       if (rx_ev_cont)
+               return;
+
+       rx_ev_byte_cnt = EF4_QWORD_FIELD(*event, FSF_AZ_RX_EV_BYTE_CNT);
+       rx_ev_pkt_ok = EF4_QWORD_FIELD(*event, FSF_AZ_RX_EV_PKT_OK);
+       rx_ev_hdr_type = EF4_QWORD_FIELD(*event, FSF_AZ_RX_EV_HDR_TYPE);
+
+       if (likely(rx_ev_pkt_ok)) {
+               /* If packet is marked as OK then we can rely on the
+                * hardware checksum and classification.
+                */
+               flags = 0;
+               switch (rx_ev_hdr_type) {
+               case FSE_CZ_RX_EV_HDR_TYPE_IPV4V6_TCP:
+                       flags |= EF4_RX_PKT_TCP;
+                       /* fall through */
+               case FSE_CZ_RX_EV_HDR_TYPE_IPV4V6_UDP:
+                       flags |= EF4_RX_PKT_CSUMMED;
+                       /* fall through */
+               case FSE_CZ_RX_EV_HDR_TYPE_IPV4V6_OTHER:
+               case FSE_AZ_RX_EV_HDR_TYPE_OTHER:
+                       break;
+               }
+       } else {
+               flags = ef4_farch_handle_rx_not_ok(rx_queue, event);
+       }
+
+       /* Detect multicast packets that didn't match the filter */
+       rx_ev_mcast_pkt = EF4_QWORD_FIELD(*event, FSF_AZ_RX_EV_MCAST_PKT);
+       if (rx_ev_mcast_pkt) {
+               unsigned int rx_ev_mcast_hash_match =
+                       EF4_QWORD_FIELD(*event, FSF_AZ_RX_EV_MCAST_HASH_MATCH);
+
+               if (unlikely(!rx_ev_mcast_hash_match)) {
+                       ++channel->n_rx_mcast_mismatch;
+                       flags |= EF4_RX_PKT_DISCARD;
+               }
+       }
+
+       channel->irq_mod_score += 2;
+
+       /* Handle received packet */
+       ef4_rx_packet(rx_queue,
+                     rx_queue->removed_count & rx_queue->ptr_mask,
+                     rx_queue->scatter_n, rx_ev_byte_cnt, flags);
+       rx_queue->removed_count += rx_queue->scatter_n;
+       rx_queue->scatter_n = 0;
+}
+
+/* If this flush done event corresponds to a &struct ef4_tx_queue, then
+ * send an %EF4_CHANNEL_MAGIC_TX_DRAIN event to drain the event queue
+ * of all transmit completions.
+ */
+static void
+ef4_farch_handle_tx_flush_done(struct ef4_nic *efx, ef4_qword_t *event)
+{
+       struct ef4_tx_queue *tx_queue;
+       int qid;
+
+       qid = EF4_QWORD_FIELD(*event, FSF_AZ_DRIVER_EV_SUBDATA);
+       if (qid < EF4_TXQ_TYPES * efx->n_tx_channels) {
+               tx_queue = ef4_get_tx_queue(efx, qid / EF4_TXQ_TYPES,
+                                           qid % EF4_TXQ_TYPES);
+               if (atomic_cmpxchg(&tx_queue->flush_outstanding, 1, 0)) {
+                       ef4_farch_magic_event(tx_queue->channel,
+                                             EF4_CHANNEL_MAGIC_TX_DRAIN(tx_queue));
+               }
+       }
+}
+
+/* If this flush done event corresponds to a &struct ef4_rx_queue: If the flush
+ * was successful then send an %EF4_CHANNEL_MAGIC_RX_DRAIN, otherwise add
+ * the RX queue back to the mask of RX queues in need of flushing.
+ */
+static void
+ef4_farch_handle_rx_flush_done(struct ef4_nic *efx, ef4_qword_t *event)
+{
+       struct ef4_channel *channel;
+       struct ef4_rx_queue *rx_queue;
+       int qid;
+       bool failed;
+
+       qid = EF4_QWORD_FIELD(*event, FSF_AZ_DRIVER_EV_RX_DESCQ_ID);
+       failed = EF4_QWORD_FIELD(*event, FSF_AZ_DRIVER_EV_RX_FLUSH_FAIL);
+       if (qid >= efx->n_channels)
+               return;
+       channel = ef4_get_channel(efx, qid);
+       if (!ef4_channel_has_rx_queue(channel))
+               return;
+       rx_queue = ef4_channel_get_rx_queue(channel);
+
+       if (failed) {
+               netif_info(efx, hw, efx->net_dev,
+                          "RXQ %d flush retry\n", qid);
+               rx_queue->flush_pending = true;
+               atomic_inc(&efx->rxq_flush_pending);
+       } else {
+               ef4_farch_magic_event(ef4_rx_queue_channel(rx_queue),
+                                     EF4_CHANNEL_MAGIC_RX_DRAIN(rx_queue));
+       }
+       atomic_dec(&efx->rxq_flush_outstanding);
+       if (ef4_farch_flush_wake(efx))
+               wake_up(&efx->flush_wq);
+}
+
+static void
+ef4_farch_handle_drain_event(struct ef4_channel *channel)
+{
+       struct ef4_nic *efx = channel->efx;
+
+       WARN_ON(atomic_read(&efx->active_queues) == 0);
+       atomic_dec(&efx->active_queues);
+       if (ef4_farch_flush_wake(efx))
+               wake_up(&efx->flush_wq);
+}
+
+static void ef4_farch_handle_generated_event(struct ef4_channel *channel,
+                                            ef4_qword_t *event)
+{
+       struct ef4_nic *efx = channel->efx;
+       struct ef4_rx_queue *rx_queue =
+               ef4_channel_has_rx_queue(channel) ?
+               ef4_channel_get_rx_queue(channel) : NULL;
+       unsigned magic, code;
+
+       magic = EF4_QWORD_FIELD(*event, FSF_AZ_DRV_GEN_EV_MAGIC);
+       code = _EF4_CHANNEL_MAGIC_CODE(magic);
+
+       if (magic == EF4_CHANNEL_MAGIC_TEST(channel)) {
+               channel->event_test_cpu = raw_smp_processor_id();
+       } else if (rx_queue && magic == EF4_CHANNEL_MAGIC_FILL(rx_queue)) {
+               /* The queue must be empty, so we won't receive any rx
+                * events, so ef4_process_channel() won't refill the
+                * queue. Refill it here */
+               ef4_fast_push_rx_descriptors(rx_queue, true);
+       } else if (rx_queue && magic == EF4_CHANNEL_MAGIC_RX_DRAIN(rx_queue)) {
+               ef4_farch_handle_drain_event(channel);
+       } else if (code == _EF4_CHANNEL_MAGIC_TX_DRAIN) {
+               ef4_farch_handle_drain_event(channel);
+       } else {
+               netif_dbg(efx, hw, efx->net_dev, "channel %d received "
+                         "generated event "EF4_QWORD_FMT"\n",
+                         channel->channel, EF4_QWORD_VAL(*event));
+       }
+}
+
+static void
+ef4_farch_handle_driver_event(struct ef4_channel *channel, ef4_qword_t *event)
+{
+       struct ef4_nic *efx = channel->efx;
+       unsigned int ev_sub_code;
+       unsigned int ev_sub_data;
+
+       ev_sub_code = EF4_QWORD_FIELD(*event, FSF_AZ_DRIVER_EV_SUBCODE);
+       ev_sub_data = EF4_QWORD_FIELD(*event, FSF_AZ_DRIVER_EV_SUBDATA);
+
+       switch (ev_sub_code) {
+       case FSE_AZ_TX_DESCQ_FLS_DONE_EV:
+               netif_vdbg(efx, hw, efx->net_dev, "channel %d TXQ %d flushed\n",
+                          channel->channel, ev_sub_data);
+               ef4_farch_handle_tx_flush_done(efx, event);
+               break;
+       case FSE_AZ_RX_DESCQ_FLS_DONE_EV:
+               netif_vdbg(efx, hw, efx->net_dev, "channel %d RXQ %d flushed\n",
+                          channel->channel, ev_sub_data);
+               ef4_farch_handle_rx_flush_done(efx, event);
+               break;
+       case FSE_AZ_EVQ_INIT_DONE_EV:
+               netif_dbg(efx, hw, efx->net_dev,
+                         "channel %d EVQ %d initialised\n",
+                         channel->channel, ev_sub_data);
+               break;
+       case FSE_AZ_SRM_UPD_DONE_EV:
+               netif_vdbg(efx, hw, efx->net_dev,
+                          "channel %d SRAM update done\n", channel->channel);
+               break;
+       case FSE_AZ_WAKE_UP_EV:
+               netif_vdbg(efx, hw, efx->net_dev,
+                          "channel %d RXQ %d wakeup event\n",
+                          channel->channel, ev_sub_data);
+               break;
+       case FSE_AZ_TIMER_EV:
+               netif_vdbg(efx, hw, efx->net_dev,
+                          "channel %d RX queue %d timer expired\n",
+                          channel->channel, ev_sub_data);
+               break;
+       case FSE_AA_RX_RECOVER_EV:
+               netif_err(efx, rx_err, efx->net_dev,
+                         "channel %d seen DRIVER RX_RESET event. "
+                       "Resetting.\n", channel->channel);
+               atomic_inc(&efx->rx_reset);
+               ef4_schedule_reset(efx,
+                                  EF4_WORKAROUND_6555(efx) ?
+                                  RESET_TYPE_RX_RECOVERY :
+                                  RESET_TYPE_DISABLE);
+               break;
+       case FSE_BZ_RX_DSC_ERROR_EV:
+               netif_err(efx, rx_err, efx->net_dev,
+                         "RX DMA Q %d reports descriptor fetch error."
+                         " RX Q %d is disabled.\n", ev_sub_data,
+                         ev_sub_data);
+               ef4_schedule_reset(efx, RESET_TYPE_DMA_ERROR);
+               break;
+       case FSE_BZ_TX_DSC_ERROR_EV:
+               netif_err(efx, tx_err, efx->net_dev,
+                         "TX DMA Q %d reports descriptor fetch error."
+                         " TX Q %d is disabled.\n", ev_sub_data,
+                         ev_sub_data);
+               ef4_schedule_reset(efx, RESET_TYPE_DMA_ERROR);
+               break;
+       default:
+               netif_vdbg(efx, hw, efx->net_dev,
+                          "channel %d unknown driver event code %d "
+                          "data %04x\n", channel->channel, ev_sub_code,
+                          ev_sub_data);
+               break;
+       }
+}
+
+int ef4_farch_ev_process(struct ef4_channel *channel, int budget)
+{
+       struct ef4_nic *efx = channel->efx;
+       unsigned int read_ptr;
+       ef4_qword_t event, *p_event;
+       int ev_code;
+       int tx_packets = 0;
+       int spent = 0;
+
+       if (budget <= 0)
+               return spent;
+
+       read_ptr = channel->eventq_read_ptr;
+
+       for (;;) {
+               p_event = ef4_event(channel, read_ptr);
+               event = *p_event;
+
+               if (!ef4_event_present(&event))
+                       /* End of events */
+                       break;
+
+               netif_vdbg(channel->efx, intr, channel->efx->net_dev,
+                          "channel %d event is "EF4_QWORD_FMT"\n",
+                          channel->channel, EF4_QWORD_VAL(event));
+
+               /* Clear this event by marking it all ones */
+               EF4_SET_QWORD(*p_event);
+
+               ++read_ptr;
+
+               ev_code = EF4_QWORD_FIELD(event, FSF_AZ_EV_CODE);
+
+               switch (ev_code) {
+               case FSE_AZ_EV_CODE_RX_EV:
+                       ef4_farch_handle_rx_event(channel, &event);
+                       if (++spent == budget)
+                               goto out;
+                       break;
+               case FSE_AZ_EV_CODE_TX_EV:
+                       tx_packets += ef4_farch_handle_tx_event(channel,
+                                                               &event);
+                       if (tx_packets > efx->txq_entries) {
+                               spent = budget;
+                               goto out;
+                       }
+                       break;
+               case FSE_AZ_EV_CODE_DRV_GEN_EV:
+                       ef4_farch_handle_generated_event(channel, &event);
+                       break;
+               case FSE_AZ_EV_CODE_DRIVER_EV:
+                       ef4_farch_handle_driver_event(channel, &event);
+                       break;
+               case FSE_AZ_EV_CODE_GLOBAL_EV:
+                       if (efx->type->handle_global_event &&
+                           efx->type->handle_global_event(channel, &event))
+                               break;
+                       /* else fall through */
+               default:
+                       netif_err(channel->efx, hw, channel->efx->net_dev,
+                                 "channel %d unknown event type %d (data "
+                                 EF4_QWORD_FMT ")\n", channel->channel,
+                                 ev_code, EF4_QWORD_VAL(event));
+               }
+       }
+
+out:
+       channel->eventq_read_ptr = read_ptr;
+       return spent;
+}
+
+/* Allocate buffer table entries for event queue */
+int ef4_farch_ev_probe(struct ef4_channel *channel)
+{
+       struct ef4_nic *efx = channel->efx;
+       unsigned entries;
+
+       entries = channel->eventq_mask + 1;
+       return ef4_alloc_special_buffer(efx, &channel->eventq,
+                                       entries * sizeof(ef4_qword_t));
+}
+
+int ef4_farch_ev_init(struct ef4_channel *channel)
+{
+       ef4_oword_t reg;
+       struct ef4_nic *efx = channel->efx;
+
+       netif_dbg(efx, hw, efx->net_dev,
+                 "channel %d event queue in special buffers %d-%d\n",
+                 channel->channel, channel->eventq.index,
+                 channel->eventq.index + channel->eventq.entries - 1);
+
+       /* Pin event queue buffer */
+       ef4_init_special_buffer(efx, &channel->eventq);
+
+       /* Fill event queue with all ones (i.e. empty events) */
+       memset(channel->eventq.buf.addr, 0xff, channel->eventq.buf.len);
+
+       /* Push event queue to card */
+       EF4_POPULATE_OWORD_3(reg,
+                            FRF_AZ_EVQ_EN, 1,
+                            FRF_AZ_EVQ_SIZE, __ffs(channel->eventq.entries),
+                            FRF_AZ_EVQ_BUF_BASE_ID, channel->eventq.index);
+       ef4_writeo_table(efx, &reg, efx->type->evq_ptr_tbl_base,
+                        channel->channel);
+
+       return 0;
+}
+
+void ef4_farch_ev_fini(struct ef4_channel *channel)
+{
+       ef4_oword_t reg;
+       struct ef4_nic *efx = channel->efx;
+
+       /* Remove event queue from card */
+       EF4_ZERO_OWORD(reg);
+       ef4_writeo_table(efx, &reg, efx->type->evq_ptr_tbl_base,
+                        channel->channel);
+
+       /* Unpin event queue */
+       ef4_fini_special_buffer(efx, &channel->eventq);
+}
+
+/* Free buffers backing event queue */
+void ef4_farch_ev_remove(struct ef4_channel *channel)
+{
+       ef4_free_special_buffer(channel->efx, &channel->eventq);
+}
+
+
+void ef4_farch_ev_test_generate(struct ef4_channel *channel)
+{
+       ef4_farch_magic_event(channel, EF4_CHANNEL_MAGIC_TEST(channel));
+}
+
+void ef4_farch_rx_defer_refill(struct ef4_rx_queue *rx_queue)
+{
+       ef4_farch_magic_event(ef4_rx_queue_channel(rx_queue),
+                             EF4_CHANNEL_MAGIC_FILL(rx_queue));
+}
+
+/**************************************************************************
+ *
+ * Hardware interrupts
+ * The hardware interrupt handler does very little work; all the event
+ * queue processing is carried out by per-channel tasklets.
+ *
+ **************************************************************************/
+
+/* Enable/disable/generate interrupts */
+static inline void ef4_farch_interrupts(struct ef4_nic *efx,
+                                     bool enabled, bool force)
+{
+       ef4_oword_t int_en_reg_ker;
+
+       EF4_POPULATE_OWORD_3(int_en_reg_ker,
+                            FRF_AZ_KER_INT_LEVE_SEL, efx->irq_level,
+                            FRF_AZ_KER_INT_KER, force,
+                            FRF_AZ_DRV_INT_EN_KER, enabled);
+       ef4_writeo(efx, &int_en_reg_ker, FR_AZ_INT_EN_KER);
+}
+
+void ef4_farch_irq_enable_master(struct ef4_nic *efx)
+{
+       EF4_ZERO_OWORD(*((ef4_oword_t *) efx->irq_status.addr));
+       wmb(); /* Ensure interrupt vector is clear before interrupts enabled */
+
+       ef4_farch_interrupts(efx, true, false);
+}
+
+void ef4_farch_irq_disable_master(struct ef4_nic *efx)
+{
+       /* Disable interrupts */
+       ef4_farch_interrupts(efx, false, false);
+}
+
+/* Generate a test interrupt
+ * Interrupt must already have been enabled, otherwise nasty things
+ * may happen.
+ */
+int ef4_farch_irq_test_generate(struct ef4_nic *efx)
+{
+       ef4_farch_interrupts(efx, true, true);
+       return 0;
+}
+
+/* Process a fatal interrupt
+ * Disable bus mastering ASAP and schedule a reset
+ */
+irqreturn_t ef4_farch_fatal_interrupt(struct ef4_nic *efx)
+{
+       struct falcon_nic_data *nic_data = efx->nic_data;
+       ef4_oword_t *int_ker = efx->irq_status.addr;
+       ef4_oword_t fatal_intr;
+       int error, mem_perr;
+
+       ef4_reado(efx, &fatal_intr, FR_AZ_FATAL_INTR_KER);
+       error = EF4_OWORD_FIELD(fatal_intr, FRF_AZ_FATAL_INTR);
+
+       netif_err(efx, hw, efx->net_dev, "SYSTEM ERROR "EF4_OWORD_FMT" status "
+                 EF4_OWORD_FMT ": %s\n", EF4_OWORD_VAL(*int_ker),
+                 EF4_OWORD_VAL(fatal_intr),
+                 error ? "disabling bus mastering" : "no recognised error");
+
+       /* If this is a memory parity error dump which blocks are offending */
+       mem_perr = (EF4_OWORD_FIELD(fatal_intr, FRF_AZ_MEM_PERR_INT_KER) ||
+                   EF4_OWORD_FIELD(fatal_intr, FRF_AZ_SRM_PERR_INT_KER));
+       if (mem_perr) {
+               ef4_oword_t reg;
+               ef4_reado(efx, &reg, FR_AZ_MEM_STAT);
+               netif_err(efx, hw, efx->net_dev,
+                         "SYSTEM ERROR: memory parity error "EF4_OWORD_FMT"\n",
+                         EF4_OWORD_VAL(reg));
+       }
+
+       /* Disable both devices */
+       pci_clear_master(efx->pci_dev);
+       if (ef4_nic_is_dual_func(efx))
+               pci_clear_master(nic_data->pci_dev2);
+       ef4_farch_irq_disable_master(efx);
+
+       /* Count errors and reset or disable the NIC accordingly */
+       if (efx->int_error_count == 0 ||
+           time_after(jiffies, efx->int_error_expire)) {
+               efx->int_error_count = 0;
+               efx->int_error_expire =
+                       jiffies + EF4_INT_ERROR_EXPIRE * HZ;
+       }
+       if (++efx->int_error_count < EF4_MAX_INT_ERRORS) {
+               netif_err(efx, hw, efx->net_dev,
+                         "SYSTEM ERROR - reset scheduled\n");
+               ef4_schedule_reset(efx, RESET_TYPE_INT_ERROR);
+       } else {
+               netif_err(efx, hw, efx->net_dev,
+                         "SYSTEM ERROR - max number of errors seen."
+                         "NIC will be disabled\n");
+               ef4_schedule_reset(efx, RESET_TYPE_DISABLE);
+       }
+
+       return IRQ_HANDLED;
+}
+
+/* Handle a legacy interrupt
+ * Acknowledges the interrupt and schedule event queue processing.
+ */
+irqreturn_t ef4_farch_legacy_interrupt(int irq, void *dev_id)
+{
+       struct ef4_nic *efx = dev_id;
+       bool soft_enabled = ACCESS_ONCE(efx->irq_soft_enabled);
+       ef4_oword_t *int_ker = efx->irq_status.addr;
+       irqreturn_t result = IRQ_NONE;
+       struct ef4_channel *channel;
+       ef4_dword_t reg;
+       u32 queues;
+       int syserr;
+
+       /* Read the ISR which also ACKs the interrupts */
+       ef4_readd(efx, &reg, FR_BZ_INT_ISR0);
+       queues = EF4_EXTRACT_DWORD(reg, 0, 31);
+
+       /* Legacy interrupts are disabled too late by the EEH kernel
+        * code. Disable them earlier.
+        * If an EEH error occurred, the read will have returned all ones.
+        */
+       if (EF4_DWORD_IS_ALL_ONES(reg) && ef4_try_recovery(efx) &&
+           !efx->eeh_disabled_legacy_irq) {
+               disable_irq_nosync(efx->legacy_irq);
+               efx->eeh_disabled_legacy_irq = true;
+       }
+
+       /* Handle non-event-queue sources */
+       if (queues & (1U << efx->irq_level) && soft_enabled) {
+               syserr = EF4_OWORD_FIELD(*int_ker, FSF_AZ_NET_IVEC_FATAL_INT);
+               if (unlikely(syserr))
+                       return ef4_farch_fatal_interrupt(efx);
+               efx->last_irq_cpu = raw_smp_processor_id();
+       }
+
+       if (queues != 0) {
+               efx->irq_zero_count = 0;
+
+               /* Schedule processing of any interrupting queues */
+               if (likely(soft_enabled)) {
+                       ef4_for_each_channel(channel, efx) {
+                               if (queues & 1)
+                                       ef4_schedule_channel_irq(channel);
+                               queues >>= 1;
+                       }
+               }
+               result = IRQ_HANDLED;
+
+       } else {
+               ef4_qword_t *event;
+
+               /* Legacy ISR read can return zero once (SF bug 15783) */
+
+               /* We can't return IRQ_HANDLED more than once on seeing ISR=0
+                * because this might be a shared interrupt. */
+               if (efx->irq_zero_count++ == 0)
+                       result = IRQ_HANDLED;
+
+               /* Ensure we schedule or rearm all event queues */
+               if (likely(soft_enabled)) {
+                       ef4_for_each_channel(channel, efx) {
+                               event = ef4_event(channel,
+                                                 channel->eventq_read_ptr);
+                               if (ef4_event_present(event))
+                                       ef4_schedule_channel_irq(channel);
+                               else
+                                       ef4_farch_ev_read_ack(channel);
+                       }
+               }
+       }
+
+       if (result == IRQ_HANDLED)
+               netif_vdbg(efx, intr, efx->net_dev,
+                          "IRQ %d on CPU %d status " EF4_DWORD_FMT "\n",
+                          irq, raw_smp_processor_id(), EF4_DWORD_VAL(reg));
+
+       return result;
+}
+
+/* Handle an MSI interrupt
+ *
+ * Handle an MSI hardware interrupt.  This routine schedules event
+ * queue processing.  No interrupt acknowledgement cycle is necessary.
+ * Also, we never need to check that the interrupt is for us, since
+ * MSI interrupts cannot be shared.
+ */
+irqreturn_t ef4_farch_msi_interrupt(int irq, void *dev_id)
+{
+       struct ef4_msi_context *context = dev_id;
+       struct ef4_nic *efx = context->efx;
+       ef4_oword_t *int_ker = efx->irq_status.addr;
+       int syserr;
+
+       netif_vdbg(efx, intr, efx->net_dev,
+                  "IRQ %d on CPU %d status " EF4_OWORD_FMT "\n",
+                  irq, raw_smp_processor_id(), EF4_OWORD_VAL(*int_ker));
+
+       if (!likely(ACCESS_ONCE(efx->irq_soft_enabled)))
+               return IRQ_HANDLED;
+
+       /* Handle non-event-queue sources */
+       if (context->index == efx->irq_level) {
+               syserr = EF4_OWORD_FIELD(*int_ker, FSF_AZ_NET_IVEC_FATAL_INT);
+               if (unlikely(syserr))
+                       return ef4_farch_fatal_interrupt(efx);
+               efx->last_irq_cpu = raw_smp_processor_id();
+       }
+
+       /* Schedule processing of the channel */
+       ef4_schedule_channel_irq(efx->channel[context->index]);
+
+       return IRQ_HANDLED;
+}
+
+/* Setup RSS indirection table.
+ * This maps from the hash value of the packet to RXQ
+ */
+void ef4_farch_rx_push_indir_table(struct ef4_nic *efx)
+{
+       size_t i = 0;
+       ef4_dword_t dword;
+
+       BUG_ON(ef4_nic_rev(efx) < EF4_REV_FALCON_B0);
+
+       BUILD_BUG_ON(ARRAY_SIZE(efx->rx_indir_table) !=
+                    FR_BZ_RX_INDIRECTION_TBL_ROWS);
+
+       for (i = 0; i < FR_BZ_RX_INDIRECTION_TBL_ROWS; i++) {
+               EF4_POPULATE_DWORD_1(dword, FRF_BZ_IT_QUEUE,
+                                    efx->rx_indir_table[i]);
+               ef4_writed(efx, &dword,
+                          FR_BZ_RX_INDIRECTION_TBL +
+                          FR_BZ_RX_INDIRECTION_TBL_STEP * i);
+       }
+}
+
+/* Looks at available SRAM resources and works out how many queues we
+ * can support, and where things like descriptor caches should live.
+ *
+ * SRAM is split up as follows:
+ * 0                          buftbl entries for channels
+ * efx->vf_buftbl_base        buftbl entries for SR-IOV
+ * efx->rx_dc_base            RX descriptor caches
+ * efx->tx_dc_base            TX descriptor caches
+ */
+void ef4_farch_dimension_resources(struct ef4_nic *efx, unsigned sram_lim_qw)
+{
+       unsigned vi_count, buftbl_min;
+
+       /* Account for the buffer table entries backing the datapath channels
+        * and the descriptor caches for those channels.
+        */
+       buftbl_min = ((efx->n_rx_channels * EF4_MAX_DMAQ_SIZE +
+                      efx->n_tx_channels * EF4_TXQ_TYPES * EF4_MAX_DMAQ_SIZE +
+                      efx->n_channels * EF4_MAX_EVQ_SIZE)
+                     * sizeof(ef4_qword_t) / EF4_BUF_SIZE);
+       vi_count = max(efx->n_channels, efx->n_tx_channels * EF4_TXQ_TYPES);
+
+       efx->tx_dc_base = sram_lim_qw - vi_count * TX_DC_ENTRIES;
+       efx->rx_dc_base = efx->tx_dc_base - vi_count * RX_DC_ENTRIES;
+}
+
+u32 ef4_farch_fpga_ver(struct ef4_nic *efx)
+{
+       ef4_oword_t altera_build;
+       ef4_reado(efx, &altera_build, FR_AZ_ALTERA_BUILD);
+       return EF4_OWORD_FIELD(altera_build, FRF_AZ_ALTERA_BUILD_VER);
+}
+
+void ef4_farch_init_common(struct ef4_nic *efx)
+{
+       ef4_oword_t temp;
+
+       /* Set positions of descriptor caches in SRAM. */
+       EF4_POPULATE_OWORD_1(temp, FRF_AZ_SRM_TX_DC_BASE_ADR, efx->tx_dc_base);
+       ef4_writeo(efx, &temp, FR_AZ_SRM_TX_DC_CFG);
+       EF4_POPULATE_OWORD_1(temp, FRF_AZ_SRM_RX_DC_BASE_ADR, efx->rx_dc_base);
+       ef4_writeo(efx, &temp, FR_AZ_SRM_RX_DC_CFG);
+
+       /* Set TX descriptor cache size. */
+       BUILD_BUG_ON(TX_DC_ENTRIES != (8 << TX_DC_ENTRIES_ORDER));
+       EF4_POPULATE_OWORD_1(temp, FRF_AZ_TX_DC_SIZE, TX_DC_ENTRIES_ORDER);
+       ef4_writeo(efx, &temp, FR_AZ_TX_DC_CFG);
+
+       /* Set RX descriptor cache size.  Set low watermark to size-8, as
+        * this allows most efficient prefetching.
+        */
+       BUILD_BUG_ON(RX_DC_ENTRIES != (8 << RX_DC_ENTRIES_ORDER));
+       EF4_POPULATE_OWORD_1(temp, FRF_AZ_RX_DC_SIZE, RX_DC_ENTRIES_ORDER);
+       ef4_writeo(efx, &temp, FR_AZ_RX_DC_CFG);
+       EF4_POPULATE_OWORD_1(temp, FRF_AZ_RX_DC_PF_LWM, RX_DC_ENTRIES - 8);
+       ef4_writeo(efx, &temp, FR_AZ_RX_DC_PF_WM);
+
+       /* Program INT_KER address */
+       EF4_POPULATE_OWORD_2(temp,
+                            FRF_AZ_NORM_INT_VEC_DIS_KER,
+                            EF4_INT_MODE_USE_MSI(efx),
+                            FRF_AZ_INT_ADR_KER, efx->irq_status.dma_addr);
+       ef4_writeo(efx, &temp, FR_AZ_INT_ADR_KER);
+
+       /* Use a valid MSI-X vector */
+       efx->irq_level = 0;
+
+       /* Enable all the genuinely fatal interrupts.  (They are still
+        * masked by the overall interrupt mask, controlled by
+        * falcon_interrupts()).
+        *
+        * Note: All other fatal interrupts are enabled
+        */
+       EF4_POPULATE_OWORD_3(temp,
+                            FRF_AZ_ILL_ADR_INT_KER_EN, 1,
+                            FRF_AZ_RBUF_OWN_INT_KER_EN, 1,
+                            FRF_AZ_TBUF_OWN_INT_KER_EN, 1);
+       EF4_INVERT_OWORD(temp);
+       ef4_writeo(efx, &temp, FR_AZ_FATAL_INTR_KER);
+
+       /* Disable the ugly timer-based TX DMA backoff and allow TX DMA to be
+        * controlled by the RX FIFO fill level. Set arbitration to one pkt/Q.
+        */
+       ef4_reado(efx, &temp, FR_AZ_TX_RESERVED);
+       EF4_SET_OWORD_FIELD(temp, FRF_AZ_TX_RX_SPACER, 0xfe);
+       EF4_SET_OWORD_FIELD(temp, FRF_AZ_TX_RX_SPACER_EN, 1);
+       EF4_SET_OWORD_FIELD(temp, FRF_AZ_TX_ONE_PKT_PER_Q, 1);
+       EF4_SET_OWORD_FIELD(temp, FRF_AZ_TX_PUSH_EN, 1);
+       EF4_SET_OWORD_FIELD(temp, FRF_AZ_TX_DIS_NON_IP_EV, 1);
+       /* Enable SW_EV to inherit in char driver - assume harmless here */
+       EF4_SET_OWORD_FIELD(temp, FRF_AZ_TX_SOFT_EVT_EN, 1);
+       /* Prefetch threshold 2 => fetch when descriptor cache half empty */
+       EF4_SET_OWORD_FIELD(temp, FRF_AZ_TX_PREF_THRESHOLD, 2);
+       /* Disable hardware watchdog which can misfire */
+       EF4_SET_OWORD_FIELD(temp, FRF_AZ_TX_PREF_WD_TMR, 0x3fffff);
+       /* Squash TX of packets of 16 bytes or less */
+       if (ef4_nic_rev(efx) >= EF4_REV_FALCON_B0)
+               EF4_SET_OWORD_FIELD(temp, FRF_BZ_TX_FLUSH_MIN_LEN_EN, 1);
+       ef4_writeo(efx, &temp, FR_AZ_TX_RESERVED);
+
+       if (ef4_nic_rev(efx) >= EF4_REV_FALCON_B0) {
+               EF4_POPULATE_OWORD_4(temp,
+                                    /* Default values */
+                                    FRF_BZ_TX_PACE_SB_NOT_AF, 0x15,
+                                    FRF_BZ_TX_PACE_SB_AF, 0xb,
+                                    FRF_BZ_TX_PACE_FB_BASE, 0,
+                                    /* Allow large pace values in the
+                                     * fast bin. */
+                                    FRF_BZ_TX_PACE_BIN_TH,
+                                    FFE_BZ_TX_PACE_RESERVED);
+               ef4_writeo(efx, &temp, FR_BZ_TX_PACE);
+       }
+}
+
+/**************************************************************************
+ *
+ * Filter tables
+ *
+ **************************************************************************
+ */
+
+/* "Fudge factors" - difference between programmed value and actual depth.
+ * Due to pipelined implementation we need to program H/W with a value that
+ * is larger than the hop limit we want.
+ */
+#define EF4_FARCH_FILTER_CTL_SRCH_FUDGE_WILD 3
+#define EF4_FARCH_FILTER_CTL_SRCH_FUDGE_FULL 1
+
+/* Hard maximum search limit.  Hardware will time-out beyond 200-something.
+ * We also need to avoid infinite loops in ef4_farch_filter_search() when the
+ * table is full.
+ */
+#define EF4_FARCH_FILTER_CTL_SRCH_MAX 200
+
+/* Don't try very hard to find space for performance hints, as this is
+ * counter-productive. */
+#define EF4_FARCH_FILTER_CTL_SRCH_HINT_MAX 5
+
+enum ef4_farch_filter_type {
+       EF4_FARCH_FILTER_TCP_FULL = 0,
+       EF4_FARCH_FILTER_TCP_WILD,
+       EF4_FARCH_FILTER_UDP_FULL,
+       EF4_FARCH_FILTER_UDP_WILD,
+       EF4_FARCH_FILTER_MAC_FULL = 4,
+       EF4_FARCH_FILTER_MAC_WILD,
+       EF4_FARCH_FILTER_UC_DEF = 8,
+       EF4_FARCH_FILTER_MC_DEF,
+       EF4_FARCH_FILTER_TYPE_COUNT,            /* number of specific types */
+};
+
+enum ef4_farch_filter_table_id {
+       EF4_FARCH_FILTER_TABLE_RX_IP = 0,
+       EF4_FARCH_FILTER_TABLE_RX_MAC,
+       EF4_FARCH_FILTER_TABLE_RX_DEF,
+       EF4_FARCH_FILTER_TABLE_TX_MAC,
+       EF4_FARCH_FILTER_TABLE_COUNT,
+};
+
+enum ef4_farch_filter_index {
+       EF4_FARCH_FILTER_INDEX_UC_DEF,
+       EF4_FARCH_FILTER_INDEX_MC_DEF,
+       EF4_FARCH_FILTER_SIZE_RX_DEF,
+};
+
+struct ef4_farch_filter_spec {
+       u8      type:4;
+       u8      priority:4;
+       u8      flags;
+       u16     dmaq_id;
+       u32     data[3];
+};
+
+struct ef4_farch_filter_table {
+       enum ef4_farch_filter_table_id id;
+       u32             offset;         /* address of table relative to BAR */
+       unsigned        size;           /* number of entries */
+       unsigned        step;           /* step between entries */
+       unsigned        used;           /* number currently used */
+       unsigned long   *used_bitmap;
+       struct ef4_farch_filter_spec *spec;
+       unsigned        search_limit[EF4_FARCH_FILTER_TYPE_COUNT];
+};
+
+struct ef4_farch_filter_state {
+       struct ef4_farch_filter_table table[EF4_FARCH_FILTER_TABLE_COUNT];
+};
+
+static void
+ef4_farch_filter_table_clear_entry(struct ef4_nic *efx,
+                                  struct ef4_farch_filter_table *table,
+                                  unsigned int filter_idx);
+
+/* The filter hash function is LFSR polynomial x^16 + x^3 + 1 of a 32-bit
+ * key derived from the n-tuple.  The initial LFSR state is 0xffff. */
+static u16 ef4_farch_filter_hash(u32 key)
+{
+       u16 tmp;
+
+       /* First 16 rounds */
+       tmp = 0x1fff ^ key >> 16;
+       tmp = tmp ^ tmp >> 3 ^ tmp >> 6;
+       tmp = tmp ^ tmp >> 9;
+       /* Last 16 rounds */
+       tmp = tmp ^ tmp << 13 ^ key;
+       tmp = tmp ^ tmp >> 3 ^ tmp >> 6;
+       return tmp ^ tmp >> 9;
+}
+
+/* To allow for hash collisions, filter search continues at these
+ * increments from the first possible entry selected by the hash. */
+static u16 ef4_farch_filter_increment(u32 key)
+{
+       return key * 2 - 1;
+}
+
+static enum ef4_farch_filter_table_id
+ef4_farch_filter_spec_table_id(const struct ef4_farch_filter_spec *spec)
+{
+       BUILD_BUG_ON(EF4_FARCH_FILTER_TABLE_RX_IP !=
+                    (EF4_FARCH_FILTER_TCP_FULL >> 2));
+       BUILD_BUG_ON(EF4_FARCH_FILTER_TABLE_RX_IP !=
+                    (EF4_FARCH_FILTER_TCP_WILD >> 2));
+       BUILD_BUG_ON(EF4_FARCH_FILTER_TABLE_RX_IP !=
+                    (EF4_FARCH_FILTER_UDP_FULL >> 2));
+       BUILD_BUG_ON(EF4_FARCH_FILTER_TABLE_RX_IP !=
+                    (EF4_FARCH_FILTER_UDP_WILD >> 2));
+       BUILD_BUG_ON(EF4_FARCH_FILTER_TABLE_RX_MAC !=
+                    (EF4_FARCH_FILTER_MAC_FULL >> 2));
+       BUILD_BUG_ON(EF4_FARCH_FILTER_TABLE_RX_MAC !=
+                    (EF4_FARCH_FILTER_MAC_WILD >> 2));
+       BUILD_BUG_ON(EF4_FARCH_FILTER_TABLE_TX_MAC !=
+                    EF4_FARCH_FILTER_TABLE_RX_MAC + 2);
+       return (spec->type >> 2) + ((spec->flags & EF4_FILTER_FLAG_TX) ? 2 : 0);
+}
+
+static void ef4_farch_filter_push_rx_config(struct ef4_nic *efx)
+{
+       struct ef4_farch_filter_state *state = efx->filter_state;
+       struct ef4_farch_filter_table *table;
+       ef4_oword_t filter_ctl;
+
+       ef4_reado(efx, &filter_ctl, FR_BZ_RX_FILTER_CTL);
+
+       table = &state->table[EF4_FARCH_FILTER_TABLE_RX_IP];
+       EF4_SET_OWORD_FIELD(filter_ctl, FRF_BZ_TCP_FULL_SRCH_LIMIT,
+                           table->search_limit[EF4_FARCH_FILTER_TCP_FULL] +
+                           EF4_FARCH_FILTER_CTL_SRCH_FUDGE_FULL);
+       EF4_SET_OWORD_FIELD(filter_ctl, FRF_BZ_TCP_WILD_SRCH_LIMIT,
+                           table->search_limit[EF4_FARCH_FILTER_TCP_WILD] +
+                           EF4_FARCH_FILTER_CTL_SRCH_FUDGE_WILD);
+       EF4_SET_OWORD_FIELD(filter_ctl, FRF_BZ_UDP_FULL_SRCH_LIMIT,
+                           table->search_limit[EF4_FARCH_FILTER_UDP_FULL] +
+                           EF4_FARCH_FILTER_CTL_SRCH_FUDGE_FULL);
+       EF4_SET_OWORD_FIELD(filter_ctl, FRF_BZ_UDP_WILD_SRCH_LIMIT,
+                           table->search_limit[EF4_FARCH_FILTER_UDP_WILD] +
+                           EF4_FARCH_FILTER_CTL_SRCH_FUDGE_WILD);
+
+       table = &state->table[EF4_FARCH_FILTER_TABLE_RX_MAC];
+       if (table->size) {
+               EF4_SET_OWORD_FIELD(
+                       filter_ctl, FRF_CZ_ETHERNET_FULL_SEARCH_LIMIT,
+                       table->search_limit[EF4_FARCH_FILTER_MAC_FULL] +
+                       EF4_FARCH_FILTER_CTL_SRCH_FUDGE_FULL);
+               EF4_SET_OWORD_FIELD(
+                       filter_ctl, FRF_CZ_ETHERNET_WILDCARD_SEARCH_LIMIT,
+                       table->search_limit[EF4_FARCH_FILTER_MAC_WILD] +
+                       EF4_FARCH_FILTER_CTL_SRCH_FUDGE_WILD);
+       }
+
+       table = &state->table[EF4_FARCH_FILTER_TABLE_RX_DEF];
+       if (table->size) {
+               EF4_SET_OWORD_FIELD(
+                       filter_ctl, FRF_CZ_UNICAST_NOMATCH_Q_ID,
+                       table->spec[EF4_FARCH_FILTER_INDEX_UC_DEF].dmaq_id);
+               EF4_SET_OWORD_FIELD(
+                       filter_ctl, FRF_CZ_UNICAST_NOMATCH_RSS_ENABLED,
+                       !!(table->spec[EF4_FARCH_FILTER_INDEX_UC_DEF].flags &
+                          EF4_FILTER_FLAG_RX_RSS));
+               EF4_SET_OWORD_FIELD(
+                       filter_ctl, FRF_CZ_MULTICAST_NOMATCH_Q_ID,
+                       table->spec[EF4_FARCH_FILTER_INDEX_MC_DEF].dmaq_id);
+               EF4_SET_OWORD_FIELD(
+                       filter_ctl, FRF_CZ_MULTICAST_NOMATCH_RSS_ENABLED,
+                       !!(table->spec[EF4_FARCH_FILTER_INDEX_MC_DEF].flags &
+                          EF4_FILTER_FLAG_RX_RSS));
+
+               /* There is a single bit to enable RX scatter for all
+                * unmatched packets.  Only set it if scatter is
+                * enabled in both filter specs.
+                */
+               EF4_SET_OWORD_FIELD(
+                       filter_ctl, FRF_BZ_SCATTER_ENBL_NO_MATCH_Q,
+                       !!(table->spec[EF4_FARCH_FILTER_INDEX_UC_DEF].flags &
+                          table->spec[EF4_FARCH_FILTER_INDEX_MC_DEF].flags &
+                          EF4_FILTER_FLAG_RX_SCATTER));
+       } else if (ef4_nic_rev(efx) >= EF4_REV_FALCON_B0) {
+               /* We don't expose 'default' filters because unmatched
+                * packets always go to the queue number found in the
+                * RSS table.  But we still need to set the RX scatter
+                * bit here.
+                */
+               EF4_SET_OWORD_FIELD(
+                       filter_ctl, FRF_BZ_SCATTER_ENBL_NO_MATCH_Q,
+                       efx->rx_scatter);
+       }
+
+       ef4_writeo(efx, &filter_ctl, FR_BZ_RX_FILTER_CTL);
+}
+
+static void ef4_farch_filter_push_tx_limits(struct ef4_nic *efx)
+{
+       struct ef4_farch_filter_state *state = efx->filter_state;
+       struct ef4_farch_filter_table *table;
+       ef4_oword_t tx_cfg;
+
+       ef4_reado(efx, &tx_cfg, FR_AZ_TX_CFG);
+
+       table = &state->table[EF4_FARCH_FILTER_TABLE_TX_MAC];
+       if (table->size) {
+               EF4_SET_OWORD_FIELD(
+                       tx_cfg, FRF_CZ_TX_ETH_FILTER_FULL_SEARCH_RANGE,
+                       table->search_limit[EF4_FARCH_FILTER_MAC_FULL] +
+                       EF4_FARCH_FILTER_CTL_SRCH_FUDGE_FULL);
+               EF4_SET_OWORD_FIELD(
+                       tx_cfg, FRF_CZ_TX_ETH_FILTER_WILD_SEARCH_RANGE,
+                       table->search_limit[EF4_FARCH_FILTER_MAC_WILD] +
+                       EF4_FARCH_FILTER_CTL_SRCH_FUDGE_WILD);
+       }
+
+       ef4_writeo(efx, &tx_cfg, FR_AZ_TX_CFG);
+}
+
+static int
+ef4_farch_filter_from_gen_spec(struct ef4_farch_filter_spec *spec,
+                              const struct ef4_filter_spec *gen_spec)
+{
+       bool is_full = false;
+
+       if ((gen_spec->flags & EF4_FILTER_FLAG_RX_RSS) &&
+           gen_spec->rss_context != EF4_FILTER_RSS_CONTEXT_DEFAULT)
+               return -EINVAL;
+
+       spec->priority = gen_spec->priority;
+       spec->flags = gen_spec->flags;
+       spec->dmaq_id = gen_spec->dmaq_id;
+
+       switch (gen_spec->match_flags) {
+       case (EF4_FILTER_MATCH_ETHER_TYPE | EF4_FILTER_MATCH_IP_PROTO |
+             EF4_FILTER_MATCH_LOC_HOST | EF4_FILTER_MATCH_LOC_PORT |
+             EF4_FILTER_MATCH_REM_HOST | EF4_FILTER_MATCH_REM_PORT):
+               is_full = true;
+               /* fall through */
+       case (EF4_FILTER_MATCH_ETHER_TYPE | EF4_FILTER_MATCH_IP_PROTO |
+             EF4_FILTER_MATCH_LOC_HOST | EF4_FILTER_MATCH_LOC_PORT): {
+               __be32 rhost, host1, host2;
+               __be16 rport, port1, port2;
+
+               EF4_BUG_ON_PARANOID(!(gen_spec->flags & EF4_FILTER_FLAG_RX));
+
+               if (gen_spec->ether_type != htons(ETH_P_IP))
+                       return -EPROTONOSUPPORT;
+               if (gen_spec->loc_port == 0 ||
+                   (is_full && gen_spec->rem_port == 0))
+                       return -EADDRNOTAVAIL;
+               switch (gen_spec->ip_proto) {
+               case IPPROTO_TCP:
+                       spec->type = (is_full ? EF4_FARCH_FILTER_TCP_FULL :
+                                     EF4_FARCH_FILTER_TCP_WILD);
+                       break;
+               case IPPROTO_UDP:
+                       spec->type = (is_full ? EF4_FARCH_FILTER_UDP_FULL :
+                                     EF4_FARCH_FILTER_UDP_WILD);
+                       break;
+               default:
+                       return -EPROTONOSUPPORT;
+               }
+
+               /* Filter is constructed in terms of source and destination,
+                * with the odd wrinkle that the ports are swapped in a UDP
+                * wildcard filter.  We need to convert from local and remote
+                * (= zero for wildcard) addresses.
+                */
+               rhost = is_full ? gen_spec->rem_host[0] : 0;
+               rport = is_full ? gen_spec->rem_port : 0;
+               host1 = rhost;
+               host2 = gen_spec->loc_host[0];
+               if (!is_full && gen_spec->ip_proto == IPPROTO_UDP) {
+                       port1 = gen_spec->loc_port;
+                       port2 = rport;
+               } else {
+                       port1 = rport;
+                       port2 = gen_spec->loc_port;
+               }
+               spec->data[0] = ntohl(host1) << 16 | ntohs(port1);
+               spec->data[1] = ntohs(port2) << 16 | ntohl(host1) >> 16;
+               spec->data[2] = ntohl(host2);
+
+               break;
+       }
+
+       case EF4_FILTER_MATCH_LOC_MAC | EF4_FILTER_MATCH_OUTER_VID:
+               is_full = true;
+               /* fall through */
+       case EF4_FILTER_MATCH_LOC_MAC:
+               spec->type = (is_full ? EF4_FARCH_FILTER_MAC_FULL :
+                             EF4_FARCH_FILTER_MAC_WILD);
+               spec->data[0] = is_full ? ntohs(gen_spec->outer_vid) : 0;
+               spec->data[1] = (gen_spec->loc_mac[2] << 24 |
+                                gen_spec->loc_mac[3] << 16 |
+                                gen_spec->loc_mac[4] << 8 |
+                                gen_spec->loc_mac[5]);
+               spec->data[2] = (gen_spec->loc_mac[0] << 8 |
+                                gen_spec->loc_mac[1]);
+               break;
+
+       case EF4_FILTER_MATCH_LOC_MAC_IG:
+               spec->type = (is_multicast_ether_addr(gen_spec->loc_mac) ?
+                             EF4_FARCH_FILTER_MC_DEF :
+                             EF4_FARCH_FILTER_UC_DEF);
+               memset(spec->data, 0, sizeof(spec->data)); /* ensure equality */
+               break;
+
+       default:
+               return -EPROTONOSUPPORT;
+       }
+
+       return 0;
+}
+
+static void
+ef4_farch_filter_to_gen_spec(struct ef4_filter_spec *gen_spec,
+                            const struct ef4_farch_filter_spec *spec)
+{
+       bool is_full = false;
+
+       /* *gen_spec should be completely initialised, to be consistent
+        * with ef4_filter_init_{rx,tx}() and in case we want to copy
+        * it back to userland.
+        */
+       memset(gen_spec, 0, sizeof(*gen_spec));
+
+       gen_spec->priority = spec->priority;
+       gen_spec->flags = spec->flags;
+       gen_spec->dmaq_id = spec->dmaq_id;
+
+       switch (spec->type) {
+       case EF4_FARCH_FILTER_TCP_FULL:
+       case EF4_FARCH_FILTER_UDP_FULL:
+               is_full = true;
+               /* fall through */
+       case EF4_FARCH_FILTER_TCP_WILD:
+       case EF4_FARCH_FILTER_UDP_WILD: {
+               __be32 host1, host2;
+               __be16 port1, port2;
+
+               gen_spec->match_flags =
+                       EF4_FILTER_MATCH_ETHER_TYPE |
+                       EF4_FILTER_MATCH_IP_PROTO |
+                       EF4_FILTER_MATCH_LOC_HOST | EF4_FILTER_MATCH_LOC_PORT;
+               if (is_full)
+                       gen_spec->match_flags |= (EF4_FILTER_MATCH_REM_HOST |
+                                                 EF4_FILTER_MATCH_REM_PORT);
+               gen_spec->ether_type = htons(ETH_P_IP);
+               gen_spec->ip_proto =
+                       (spec->type == EF4_FARCH_FILTER_TCP_FULL ||
+                        spec->type == EF4_FARCH_FILTER_TCP_WILD) ?
+                       IPPROTO_TCP : IPPROTO_UDP;
+
+               host1 = htonl(spec->data[0] >> 16 | spec->data[1] << 16);
+               port1 = htons(spec->data[0]);
+               host2 = htonl(spec->data[2]);
+               port2 = htons(spec->data[1] >> 16);
+               if (spec->flags & EF4_FILTER_FLAG_TX) {
+                       gen_spec->loc_host[0] = host1;
+                       gen_spec->rem_host[0] = host2;
+               } else {
+                       gen_spec->loc_host[0] = host2;
+                       gen_spec->rem_host[0] = host1;
+               }
+               if (!!(gen_spec->flags & EF4_FILTER_FLAG_TX) ^
+                   (!is_full && gen_spec->ip_proto == IPPROTO_UDP)) {
+                       gen_spec->loc_port = port1;
+                       gen_spec->rem_port = port2;
+               } else {
+                       gen_spec->loc_port = port2;
+                       gen_spec->rem_port = port1;
+               }
+
+               break;
+       }
+
+       case EF4_FARCH_FILTER_MAC_FULL:
+               is_full = true;
+               /* fall through */
+       case EF4_FARCH_FILTER_MAC_WILD:
+               gen_spec->match_flags = EF4_FILTER_MATCH_LOC_MAC;
+               if (is_full)
+                       gen_spec->match_flags |= EF4_FILTER_MATCH_OUTER_VID;
+               gen_spec->loc_mac[0] = spec->data[2] >> 8;
+               gen_spec->loc_mac[1] = spec->data[2];
+               gen_spec->loc_mac[2] = spec->data[1] >> 24;
+               gen_spec->loc_mac[3] = spec->data[1] >> 16;
+               gen_spec->loc_mac[4] = spec->data[1] >> 8;
+               gen_spec->loc_mac[5] = spec->data[1];
+               gen_spec->outer_vid = htons(spec->data[0]);
+               break;
+
+       case EF4_FARCH_FILTER_UC_DEF:
+       case EF4_FARCH_FILTER_MC_DEF:
+               gen_spec->match_flags = EF4_FILTER_MATCH_LOC_MAC_IG;
+               gen_spec->loc_mac[0] = spec->type == EF4_FARCH_FILTER_MC_DEF;
+               break;
+
+       default:
+               WARN_ON(1);
+               break;
+       }
+}
+
+static void
+ef4_farch_filter_init_rx_auto(struct ef4_nic *efx,
+                             struct ef4_farch_filter_spec *spec)
+{
+       /* If there's only one channel then disable RSS for non VF
+        * traffic, thereby allowing VFs to use RSS when the PF can't.
+        */
+       spec->priority = EF4_FILTER_PRI_AUTO;
+       spec->flags = (EF4_FILTER_FLAG_RX |
+                      (ef4_rss_enabled(efx) ? EF4_FILTER_FLAG_RX_RSS : 0) |
+                      (efx->rx_scatter ? EF4_FILTER_FLAG_RX_SCATTER : 0));
+       spec->dmaq_id = 0;
+}
+
+/* Build a filter entry and return its n-tuple key. */
+static u32 ef4_farch_filter_build(ef4_oword_t *filter,
+                                 struct ef4_farch_filter_spec *spec)
+{
+       u32 data3;
+
+       switch (ef4_farch_filter_spec_table_id(spec)) {
+       case EF4_FARCH_FILTER_TABLE_RX_IP: {
+               bool is_udp = (spec->type == EF4_FARCH_FILTER_UDP_FULL ||
+                              spec->type == EF4_FARCH_FILTER_UDP_WILD);
+               EF4_POPULATE_OWORD_7(
+                       *filter,
+                       FRF_BZ_RSS_EN,
+                       !!(spec->flags & EF4_FILTER_FLAG_RX_RSS),
+                       FRF_BZ_SCATTER_EN,
+                       !!(spec->flags & EF4_FILTER_FLAG_RX_SCATTER),
+                       FRF_BZ_TCP_UDP, is_udp,
+                       FRF_BZ_RXQ_ID, spec->dmaq_id,
+                       EF4_DWORD_2, spec->data[2],
+                       EF4_DWORD_1, spec->data[1],
+                       EF4_DWORD_0, spec->data[0]);
+               data3 = is_udp;
+               break;
+       }
+
+       case EF4_FARCH_FILTER_TABLE_RX_MAC: {
+               bool is_wild = spec->type == EF4_FARCH_FILTER_MAC_WILD;
+               EF4_POPULATE_OWORD_7(
+                       *filter,
+                       FRF_CZ_RMFT_RSS_EN,
+                       !!(spec->flags & EF4_FILTER_FLAG_RX_RSS),
+                       FRF_CZ_RMFT_SCATTER_EN,
+                       !!(spec->flags & EF4_FILTER_FLAG_RX_SCATTER),
+                       FRF_CZ_RMFT_RXQ_ID, spec->dmaq_id,
+                       FRF_CZ_RMFT_WILDCARD_MATCH, is_wild,
+                       FRF_CZ_RMFT_DEST_MAC_HI, spec->data[2],
+                       FRF_CZ_RMFT_DEST_MAC_LO, spec->data[1],
+                       FRF_CZ_RMFT_VLAN_ID, spec->data[0]);
+               data3 = is_wild;
+               break;
+       }
+
+       case EF4_FARCH_FILTER_TABLE_TX_MAC: {
+               bool is_wild = spec->type == EF4_FARCH_FILTER_MAC_WILD;
+               EF4_POPULATE_OWORD_5(*filter,
+                                    FRF_CZ_TMFT_TXQ_ID, spec->dmaq_id,
+                                    FRF_CZ_TMFT_WILDCARD_MATCH, is_wild,
+                                    FRF_CZ_TMFT_SRC_MAC_HI, spec->data[2],
+                                    FRF_CZ_TMFT_SRC_MAC_LO, spec->data[1],
+                                    FRF_CZ_TMFT_VLAN_ID, spec->data[0]);
+               data3 = is_wild | spec->dmaq_id << 1;
+               break;
+       }
+
+       default:
+               BUG();
+       }
+
+       return spec->data[0] ^ spec->data[1] ^ spec->data[2] ^ data3;
+}
+
+static bool ef4_farch_filter_equal(const struct ef4_farch_filter_spec *left,
+                                  const struct ef4_farch_filter_spec *right)
+{
+       if (left->type != right->type ||
+           memcmp(left->data, right->data, sizeof(left->data)))
+               return false;
+
+       if (left->flags & EF4_FILTER_FLAG_TX &&
+           left->dmaq_id != right->dmaq_id)
+               return false;
+
+       return true;
+}
+
+/*
+ * Construct/deconstruct external filter IDs.  At least the RX filter
+ * IDs must be ordered by matching priority, for RX NFC semantics.
+ *
+ * Deconstruction needs to be robust against invalid IDs so that
+ * ef4_filter_remove_id_safe() and ef4_filter_get_filter_safe() can
+ * accept user-provided IDs.
+ */
+
+#define EF4_FARCH_FILTER_MATCH_PRI_COUNT       5
+
+static const u8 ef4_farch_filter_type_match_pri[EF4_FARCH_FILTER_TYPE_COUNT] = {
+       [EF4_FARCH_FILTER_TCP_FULL]     = 0,
+       [EF4_FARCH_FILTER_UDP_FULL]     = 0,
+       [EF4_FARCH_FILTER_TCP_WILD]     = 1,
+       [EF4_FARCH_FILTER_UDP_WILD]     = 1,
+       [EF4_FARCH_FILTER_MAC_FULL]     = 2,
+       [EF4_FARCH_FILTER_MAC_WILD]     = 3,
+       [EF4_FARCH_FILTER_UC_DEF]       = 4,
+       [EF4_FARCH_FILTER_MC_DEF]       = 4,
+};
+
+static const enum ef4_farch_filter_table_id ef4_farch_filter_range_table[] = {
+       EF4_FARCH_FILTER_TABLE_RX_IP,   /* RX match pri 0 */
+       EF4_FARCH_FILTER_TABLE_RX_IP,
+       EF4_FARCH_FILTER_TABLE_RX_MAC,
+       EF4_FARCH_FILTER_TABLE_RX_MAC,
+       EF4_FARCH_FILTER_TABLE_RX_DEF,  /* RX match pri 4 */
+       EF4_FARCH_FILTER_TABLE_TX_MAC,  /* TX match pri 0 */
+       EF4_FARCH_FILTER_TABLE_TX_MAC,  /* TX match pri 1 */
+};
+
+#define EF4_FARCH_FILTER_INDEX_WIDTH 13
+#define EF4_FARCH_FILTER_INDEX_MASK ((1 << EF4_FARCH_FILTER_INDEX_WIDTH) - 1)
+
+static inline u32
+ef4_farch_filter_make_id(const struct ef4_farch_filter_spec *spec,
+                        unsigned int index)
+{
+       unsigned int range;
+
+       range = ef4_farch_filter_type_match_pri[spec->type];
+       if (!(spec->flags & EF4_FILTER_FLAG_RX))
+               range += EF4_FARCH_FILTER_MATCH_PRI_COUNT;
+
+       return range << EF4_FARCH_FILTER_INDEX_WIDTH | index;
+}
+
+static inline enum ef4_farch_filter_table_id
+ef4_farch_filter_id_table_id(u32 id)
+{
+       unsigned int range = id >> EF4_FARCH_FILTER_INDEX_WIDTH;
+
+       if (range < ARRAY_SIZE(ef4_farch_filter_range_table))
+               return ef4_farch_filter_range_table[range];
+       else
+               return EF4_FARCH_FILTER_TABLE_COUNT; /* invalid */
+}
+
+static inline unsigned int ef4_farch_filter_id_index(u32 id)
+{
+       return id & EF4_FARCH_FILTER_INDEX_MASK;
+}
+
+u32 ef4_farch_filter_get_rx_id_limit(struct ef4_nic *efx)
+{
+       struct ef4_farch_filter_state *state = efx->filter_state;
+       unsigned int range = EF4_FARCH_FILTER_MATCH_PRI_COUNT - 1;
+       enum ef4_farch_filter_table_id table_id;
+
+       do {
+               table_id = ef4_farch_filter_range_table[range];
+               if (state->table[table_id].size != 0)
+                       return range << EF4_FARCH_FILTER_INDEX_WIDTH |
+                               state->table[table_id].size;
+       } while (range--);
+
+       return 0;
+}
+
+s32 ef4_farch_filter_insert(struct ef4_nic *efx,
+                           struct ef4_filter_spec *gen_spec,
+                           bool replace_equal)
+{
+       struct ef4_farch_filter_state *state = efx->filter_state;
+       struct ef4_farch_filter_table *table;
+       struct ef4_farch_filter_spec spec;
+       ef4_oword_t filter;
+       int rep_index, ins_index;
+       unsigned int depth = 0;
+       int rc;
+
+       rc = ef4_farch_filter_from_gen_spec(&spec, gen_spec);
+       if (rc)
+               return rc;
+
+       table = &state->table[ef4_farch_filter_spec_table_id(&spec)];
+       if (table->size == 0)
+               return -EINVAL;
+
+       netif_vdbg(efx, hw, efx->net_dev,
+                  "%s: type %d search_limit=%d", __func__, spec.type,
+                  table->search_limit[spec.type]);
+
+       if (table->id == EF4_FARCH_FILTER_TABLE_RX_DEF) {
+               /* One filter spec per type */
+               BUILD_BUG_ON(EF4_FARCH_FILTER_INDEX_UC_DEF != 0);
+               BUILD_BUG_ON(EF4_FARCH_FILTER_INDEX_MC_DEF !=
+                            EF4_FARCH_FILTER_MC_DEF - EF4_FARCH_FILTER_UC_DEF);
+               rep_index = spec.type - EF4_FARCH_FILTER_UC_DEF;
+               ins_index = rep_index;
+
+               spin_lock_bh(&efx->filter_lock);
+       } else {
+               /* Search concurrently for
+                * (1) a filter to be replaced (rep_index): any filter
+                *     with the same match values, up to the current
+                *     search depth for this type, and
+                * (2) the insertion point (ins_index): (1) or any
+                *     free slot before it or up to the maximum search
+                *     depth for this priority
+                * We fail if we cannot find (2).
+                *
+                * We can stop once either
+                * (a) we find (1), in which case we have definitely
+                *     found (2) as well; or
+                * (b) we have searched exhaustively for (1), and have
+                *     either found (2) or searched exhaustively for it
+                */
+               u32 key = ef4_farch_filter_build(&filter, &spec);
+               unsigned int hash = ef4_farch_filter_hash(key);
+               unsigned int incr = ef4_farch_filter_increment(key);
+               unsigned int max_rep_depth = table->search_limit[spec.type];
+               unsigned int max_ins_depth =
+                       spec.priority <= EF4_FILTER_PRI_HINT ?
+                       EF4_FARCH_FILTER_CTL_SRCH_HINT_MAX :
+                       EF4_FARCH_FILTER_CTL_SRCH_MAX;
+               unsigned int i = hash & (table->size - 1);
+
+               ins_index = -1;
+               depth = 1;
+
+               spin_lock_bh(&efx->filter_lock);
+
+               for (;;) {
+                       if (!test_bit(i, table->used_bitmap)) {
+                               if (ins_index < 0)
+                                       ins_index = i;
+                       } else if (ef4_farch_filter_equal(&spec,
+                                                         &table->spec[i])) {
+                               /* Case (a) */
+                               if (ins_index < 0)
+                                       ins_index = i;
+                               rep_index = i;
+                               break;
+                       }
+
+                       if (depth >= max_rep_depth &&
+                           (ins_index >= 0 || depth >= max_ins_depth)) {
+                               /* Case (b) */
+                               if (ins_index < 0) {
+                                       rc = -EBUSY;
+                                       goto out;
+                               }
+                               rep_index = -1;
+                               break;
+                       }
+
+                       i = (i + incr) & (table->size - 1);
+                       ++depth;
+               }
+       }
+
+       /* If we found a filter to be replaced, check whether we
+        * should do so
+        */
+       if (rep_index >= 0) {
+               struct ef4_farch_filter_spec *saved_spec =
+                       &table->spec[rep_index];
+
+               if (spec.priority == saved_spec->priority && !replace_equal) {
+                       rc = -EEXIST;
+                       goto out;
+               }
+               if (spec.priority < saved_spec->priority) {
+                       rc = -EPERM;
+                       goto out;
+               }
+               if (saved_spec->priority == EF4_FILTER_PRI_AUTO ||
+                   saved_spec->flags & EF4_FILTER_FLAG_RX_OVER_AUTO)
+                       spec.flags |= EF4_FILTER_FLAG_RX_OVER_AUTO;
+       }
+
+       /* Insert the filter */
+       if (ins_index != rep_index) {
+               __set_bit(ins_index, table->used_bitmap);
+               ++table->used;
+       }
+       table->spec[ins_index] = spec;
+
+       if (table->id == EF4_FARCH_FILTER_TABLE_RX_DEF) {
+               ef4_farch_filter_push_rx_config(efx);
+       } else {
+               if (table->search_limit[spec.type] < depth) {
+                       table->search_limit[spec.type] = depth;
+                       if (spec.flags & EF4_FILTER_FLAG_TX)
+                               ef4_farch_filter_push_tx_limits(efx);
+                       else
+                               ef4_farch_filter_push_rx_config(efx);
+               }
+
+               ef4_writeo(efx, &filter,
+                          table->offset + table->step * ins_index);
+
+               /* If we were able to replace a filter by inserting
+                * at a lower depth, clear the replaced filter
+                */
+               if (ins_index != rep_index && rep_index >= 0)
+                       ef4_farch_filter_table_clear_entry(efx, table,
+                                                          rep_index);
+       }
+
+       netif_vdbg(efx, hw, efx->net_dev,
+                  "%s: filter type %d index %d rxq %u set",
+                  __func__, spec.type, ins_index, spec.dmaq_id);
+       rc = ef4_farch_filter_make_id(&spec, ins_index);
+
+out:
+       spin_unlock_bh(&efx->filter_lock);
+       return rc;
+}
+
+static void
+ef4_farch_filter_table_clear_entry(struct ef4_nic *efx,
+                                  struct ef4_farch_filter_table *table,
+                                  unsigned int filter_idx)
+{
+       static ef4_oword_t filter;
+
+       EF4_WARN_ON_PARANOID(!test_bit(filter_idx, table->used_bitmap));
+       BUG_ON(table->offset == 0); /* can't clear MAC default filters */
+
+       __clear_bit(filter_idx, table->used_bitmap);
+       --table->used;
+       memset(&table->spec[filter_idx], 0, sizeof(table->spec[0]));
+
+       ef4_writeo(efx, &filter, table->offset + table->step * filter_idx);
+
+       /* If this filter required a greater search depth than
+        * any other, the search limit for its type can now be
+        * decreased.  However, it is hard to determine that
+        * unless the table has become completely empty - in
+        * which case, all its search limits can be set to 0.
+        */
+       if (unlikely(table->used == 0)) {
+               memset(table->search_limit, 0, sizeof(table->search_limit));
+               if (table->id == EF4_FARCH_FILTER_TABLE_TX_MAC)
+                       ef4_farch_filter_push_tx_limits(efx);
+               else
+                       ef4_farch_filter_push_rx_config(efx);
+       }
+}
+
+static int ef4_farch_filter_remove(struct ef4_nic *efx,
+                                  struct ef4_farch_filter_table *table,
+                                  unsigned int filter_idx,
+                                  enum ef4_filter_priority priority)
+{
+       struct ef4_farch_filter_spec *spec = &table->spec[filter_idx];
+
+       if (!test_bit(filter_idx, table->used_bitmap) ||
+           spec->priority != priority)
+               return -ENOENT;
+
+       if (spec->flags & EF4_FILTER_FLAG_RX_OVER_AUTO) {
+               ef4_farch_filter_init_rx_auto(efx, spec);
+               ef4_farch_filter_push_rx_config(efx);
+       } else {
+               ef4_farch_filter_table_clear_entry(efx, table, filter_idx);
+       }
+
+       return 0;
+}
+
+int ef4_farch_filter_remove_safe(struct ef4_nic *efx,
+                                enum ef4_filter_priority priority,
+                                u32 filter_id)
+{
+       struct ef4_farch_filter_state *state = efx->filter_state;
+       enum ef4_farch_filter_table_id table_id;
+       struct ef4_farch_filter_table *table;
+       unsigned int filter_idx;
+       struct ef4_farch_filter_spec *spec;
+       int rc;
+
+       table_id = ef4_farch_filter_id_table_id(filter_id);
+       if ((unsigned int)table_id >= EF4_FARCH_FILTER_TABLE_COUNT)
+               return -ENOENT;
+       table = &state->table[table_id];
+
+       filter_idx = ef4_farch_filter_id_index(filter_id);
+       if (filter_idx >= table->size)
+               return -ENOENT;
+       spec = &table->spec[filter_idx];
+
+       spin_lock_bh(&efx->filter_lock);
+       rc = ef4_farch_filter_remove(efx, table, filter_idx, priority);
+       spin_unlock_bh(&efx->filter_lock);
+
+       return rc;
+}
+
+int ef4_farch_filter_get_safe(struct ef4_nic *efx,
+                             enum ef4_filter_priority priority,
+                             u32 filter_id, struct ef4_filter_spec *spec_buf)
+{
+       struct ef4_farch_filter_state *state = efx->filter_state;
+       enum ef4_farch_filter_table_id table_id;
+       struct ef4_farch_filter_table *table;
+       struct ef4_farch_filter_spec *spec;
+       unsigned int filter_idx;
+       int rc;
+
+       table_id = ef4_farch_filter_id_table_id(filter_id);
+       if ((unsigned int)table_id >= EF4_FARCH_FILTER_TABLE_COUNT)
+               return -ENOENT;
+       table = &state->table[table_id];
+
+       filter_idx = ef4_farch_filter_id_index(filter_id);
+       if (filter_idx >= table->size)
+               return -ENOENT;
+       spec = &table->spec[filter_idx];
+
+       spin_lock_bh(&efx->filter_lock);
+
+       if (test_bit(filter_idx, table->used_bitmap) &&
+           spec->priority == priority) {
+               ef4_farch_filter_to_gen_spec(spec_buf, spec);
+               rc = 0;
+       } else {
+               rc = -ENOENT;
+       }
+
+       spin_unlock_bh(&efx->filter_lock);
+
+       return rc;
+}
+
+static void
+ef4_farch_filter_table_clear(struct ef4_nic *efx,
+                            enum ef4_farch_filter_table_id table_id,
+                            enum ef4_filter_priority priority)
+{
+       struct ef4_farch_filter_state *state = efx->filter_state;
+       struct ef4_farch_filter_table *table = &state->table[table_id];
+       unsigned int filter_idx;
+
+       spin_lock_bh(&efx->filter_lock);
+       for (filter_idx = 0; filter_idx < table->size; ++filter_idx) {
+               if (table->spec[filter_idx].priority != EF4_FILTER_PRI_AUTO)
+                       ef4_farch_filter_remove(efx, table,
+                                               filter_idx, priority);
+       }
+       spin_unlock_bh(&efx->filter_lock);
+}
+
+int ef4_farch_filter_clear_rx(struct ef4_nic *efx,
+                              enum ef4_filter_priority priority)
+{
+       ef4_farch_filter_table_clear(efx, EF4_FARCH_FILTER_TABLE_RX_IP,
+                                    priority);
+       ef4_farch_filter_table_clear(efx, EF4_FARCH_FILTER_TABLE_RX_MAC,
+                                    priority);
+       ef4_farch_filter_table_clear(efx, EF4_FARCH_FILTER_TABLE_RX_DEF,
+                                    priority);
+       return 0;
+}
+
+u32 ef4_farch_filter_count_rx_used(struct ef4_nic *efx,
+                                  enum ef4_filter_priority priority)
+{
+       struct ef4_farch_filter_state *state = efx->filter_state;
+       enum ef4_farch_filter_table_id table_id;
+       struct ef4_farch_filter_table *table;
+       unsigned int filter_idx;
+       u32 count = 0;
+
+       spin_lock_bh(&efx->filter_lock);
+
+       for (table_id = EF4_FARCH_FILTER_TABLE_RX_IP;
+            table_id <= EF4_FARCH_FILTER_TABLE_RX_DEF;
+            table_id++) {
+               table = &state->table[table_id];
+               for (filter_idx = 0; filter_idx < table->size; filter_idx++) {
+                       if (test_bit(filter_idx, table->used_bitmap) &&
+                           table->spec[filter_idx].priority == priority)
+                               ++count;
+               }
+       }
+
+       spin_unlock_bh(&efx->filter_lock);
+
+       return count;
+}
+
+s32 ef4_farch_filter_get_rx_ids(struct ef4_nic *efx,
+                               enum ef4_filter_priority priority,
+                               u32 *buf, u32 size)
+{
+       struct ef4_farch_filter_state *state = efx->filter_state;
+       enum ef4_farch_filter_table_id table_id;
+       struct ef4_farch_filter_table *table;
+       unsigned int filter_idx;
+       s32 count = 0;
+
+       spin_lock_bh(&efx->filter_lock);
+
+       for (table_id = EF4_FARCH_FILTER_TABLE_RX_IP;
+            table_id <= EF4_FARCH_FILTER_TABLE_RX_DEF;
+            table_id++) {
+               table = &state->table[table_id];
+               for (filter_idx = 0; filter_idx < table->size; filter_idx++) {
+                       if (test_bit(filter_idx, table->used_bitmap) &&
+                           table->spec[filter_idx].priority == priority) {
+                               if (count == size) {
+                                       count = -EMSGSIZE;
+                                       goto out;
+                               }
+                               buf[count++] = ef4_farch_filter_make_id(
+                                       &table->spec[filter_idx], filter_idx);
+                       }
+               }
+       }
+out:
+       spin_unlock_bh(&efx->filter_lock);
+
+       return count;
+}
+
+/* Restore filter stater after reset */
+void ef4_farch_filter_table_restore(struct ef4_nic *efx)
+{
+       struct ef4_farch_filter_state *state = efx->filter_state;
+       enum ef4_farch_filter_table_id table_id;
+       struct ef4_farch_filter_table *table;
+       ef4_oword_t filter;
+       unsigned int filter_idx;
+
+       spin_lock_bh(&efx->filter_lock);
+
+       for (table_id = 0; table_id < EF4_FARCH_FILTER_TABLE_COUNT; table_id++) {
+               table = &state->table[table_id];
+
+               /* Check whether this is a regular register table */
+               if (table->step == 0)
+                       continue;
+
+               for (filter_idx = 0; filter_idx < table->size; filter_idx++) {
+                       if (!test_bit(filter_idx, table->used_bitmap))
+                               continue;
+                       ef4_farch_filter_build(&filter, &table->spec[filter_idx]);
+                       ef4_writeo(efx, &filter,
+                                  table->offset + table->step * filter_idx);
+               }
+       }
+
+       ef4_farch_filter_push_rx_config(efx);
+       ef4_farch_filter_push_tx_limits(efx);
+
+       spin_unlock_bh(&efx->filter_lock);
+}
+
+void ef4_farch_filter_table_remove(struct ef4_nic *efx)
+{
+       struct ef4_farch_filter_state *state = efx->filter_state;
+       enum ef4_farch_filter_table_id table_id;
+
+       for (table_id = 0; table_id < EF4_FARCH_FILTER_TABLE_COUNT; table_id++) {
+               kfree(state->table[table_id].used_bitmap);
+               vfree(state->table[table_id].spec);
+       }
+       kfree(state);
+}
+
+int ef4_farch_filter_table_probe(struct ef4_nic *efx)
+{
+       struct ef4_farch_filter_state *state;
+       struct ef4_farch_filter_table *table;
+       unsigned table_id;
+
+       state = kzalloc(sizeof(struct ef4_farch_filter_state), GFP_KERNEL);
+       if (!state)
+               return -ENOMEM;
+       efx->filter_state = state;
+
+       if (ef4_nic_rev(efx) >= EF4_REV_FALCON_B0) {
+               table = &state->table[EF4_FARCH_FILTER_TABLE_RX_IP];
+               table->id = EF4_FARCH_FILTER_TABLE_RX_IP;
+               table->offset = FR_BZ_RX_FILTER_TBL0;
+               table->size = FR_BZ_RX_FILTER_TBL0_ROWS;
+               table->step = FR_BZ_RX_FILTER_TBL0_STEP;
+       }
+
+       for (table_id = 0; table_id < EF4_FARCH_FILTER_TABLE_COUNT; table_id++) {
+               table = &state->table[table_id];
+               if (table->size == 0)
+                       continue;
+               table->used_bitmap = kcalloc(BITS_TO_LONGS(table->size),
+                                            sizeof(unsigned long),
+                                            GFP_KERNEL);
+               if (!table->used_bitmap)
+                       goto fail;
+               table->spec = vzalloc(table->size * sizeof(*table->spec));
+               if (!table->spec)
+                       goto fail;
+       }
+
+       table = &state->table[EF4_FARCH_FILTER_TABLE_RX_DEF];
+       if (table->size) {
+               /* RX default filters must always exist */
+               struct ef4_farch_filter_spec *spec;
+               unsigned i;
+
+               for (i = 0; i < EF4_FARCH_FILTER_SIZE_RX_DEF; i++) {
+                       spec = &table->spec[i];
+                       spec->type = EF4_FARCH_FILTER_UC_DEF + i;
+                       ef4_farch_filter_init_rx_auto(efx, spec);
+                       __set_bit(i, table->used_bitmap);
+               }
+       }
+
+       ef4_farch_filter_push_rx_config(efx);
+
+       return 0;
+
+fail:
+       ef4_farch_filter_table_remove(efx);
+       return -ENOMEM;
+}
+
+/* Update scatter enable flags for filters pointing to our own RX queues */
+void ef4_farch_filter_update_rx_scatter(struct ef4_nic *efx)
+{
+       struct ef4_farch_filter_state *state = efx->filter_state;
+       enum ef4_farch_filter_table_id table_id;
+       struct ef4_farch_filter_table *table;
+       ef4_oword_t filter;
+       unsigned int filter_idx;
+
+       spin_lock_bh(&efx->filter_lock);
+
+       for (table_id = EF4_FARCH_FILTER_TABLE_RX_IP;
+            table_id <= EF4_FARCH_FILTER_TABLE_RX_DEF;
+            table_id++) {
+               table = &state->table[table_id];
+
+               for (filter_idx = 0; filter_idx < table->size; filter_idx++) {
+                       if (!test_bit(filter_idx, table->used_bitmap) ||
+                           table->spec[filter_idx].dmaq_id >=
+                           efx->n_rx_channels)
+                               continue;
+
+                       if (efx->rx_scatter)
+                               table->spec[filter_idx].flags |=
+                                       EF4_FILTER_FLAG_RX_SCATTER;
+                       else
+                               table->spec[filter_idx].flags &=
+                                       ~EF4_FILTER_FLAG_RX_SCATTER;
+
+                       if (table_id == EF4_FARCH_FILTER_TABLE_RX_DEF)
+                               /* Pushed by ef4_farch_filter_push_rx_config() */
+                               continue;
+
+                       ef4_farch_filter_build(&filter, &table->spec[filter_idx]);
+                       ef4_writeo(efx, &filter,
+                                  table->offset + table->step * filter_idx);
+               }
+       }
+
+       ef4_farch_filter_push_rx_config(efx);
+
+       spin_unlock_bh(&efx->filter_lock);
+}
+
+#ifdef CONFIG_RFS_ACCEL
+
+s32 ef4_farch_filter_rfs_insert(struct ef4_nic *efx,
+                               struct ef4_filter_spec *gen_spec)
+{
+       return ef4_farch_filter_insert(efx, gen_spec, true);
+}
+
+bool ef4_farch_filter_rfs_expire_one(struct ef4_nic *efx, u32 flow_id,
+                                    unsigned int index)
+{
+       struct ef4_farch_filter_state *state = efx->filter_state;
+       struct ef4_farch_filter_table *table =
+               &state->table[EF4_FARCH_FILTER_TABLE_RX_IP];
+
+       if (test_bit(index, table->used_bitmap) &&
+           table->spec[index].priority == EF4_FILTER_PRI_HINT &&
+           rps_may_expire_flow(efx->net_dev, table->spec[index].dmaq_id,
+                               flow_id, index)) {
+               ef4_farch_filter_table_clear_entry(efx, table, index);
+               return true;
+       }
+
+       return false;
+}
+
+#endif /* CONFIG_RFS_ACCEL */
+
+void ef4_farch_filter_sync_rx_mode(struct ef4_nic *efx)
+{
+       struct net_device *net_dev = efx->net_dev;
+       struct netdev_hw_addr *ha;
+       union ef4_multicast_hash *mc_hash = &efx->multicast_hash;
+       u32 crc;
+       int bit;
+
+       if (!ef4_dev_registered(efx))
+               return;
+
+       netif_addr_lock_bh(net_dev);
+
+       efx->unicast_filter = !(net_dev->flags & IFF_PROMISC);
+
+       /* Build multicast hash table */
+       if (net_dev->flags & (IFF_PROMISC | IFF_ALLMULTI)) {
+               memset(mc_hash, 0xff, sizeof(*mc_hash));
+       } else {
+               memset(mc_hash, 0x00, sizeof(*mc_hash));
+               netdev_for_each_mc_addr(ha, net_dev) {
+                       crc = ether_crc_le(ETH_ALEN, ha->addr);
+                       bit = crc & (EF4_MCAST_HASH_ENTRIES - 1);
+                       __set_bit_le(bit, mc_hash);
+               }
+
+               /* Broadcast packets go through the multicast hash filter.
+                * ether_crc_le() of the broadcast address is 0xbe2612ff
+                * so we always add bit 0xff to the mask.
+                */
+               __set_bit_le(0xff, mc_hash);
+       }
+
+       netif_addr_unlock_bh(net_dev);
+}
diff --git a/drivers/net/ethernet/sfc/falcon/farch_regs.h b/drivers/net/ethernet/sfc/falcon/farch_regs.h
new file mode 100644 (file)
index 0000000..8095f27
--- /dev/null
@@ -0,0 +1,2932 @@
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2005-2006 Fen Systems Ltd.
+ * Copyright 2006-2012 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#ifndef EF4_FARCH_REGS_H
+#define EF4_FARCH_REGS_H
+
+/*
+ * Falcon hardware architecture definitions have a name prefix following
+ * the format:
+ *
+ *     F<type>_<min-rev><max-rev>_
+ *
+ * The following <type> strings are used:
+ *
+ *             MMIO register  MC register  Host memory structure
+ * -------------------------------------------------------------
+ * Address     R              MCR
+ * Bitfield    RF             MCRF         SF
+ * Enumerator  FE             MCFE         SE
+ *
+ * <min-rev> is the first revision to which the definition applies:
+ *
+ *     A: Falcon A1 (SFC4000AB)
+ *     B: Falcon B0 (SFC4000BA)
+ *     C: Siena A0 (SFL9021AA)
+ *
+ * If the definition has been changed or removed in later revisions
+ * then <max-rev> is the last revision to which the definition applies;
+ * otherwise it is "Z".
+ */
+
+/**************************************************************************
+ *
+ * Falcon/Siena registers and descriptors
+ *
+ **************************************************************************
+ */
+
+/* ADR_REGION_REG: Address region register */
+#define        FR_AZ_ADR_REGION 0x00000000
+#define        FRF_AZ_ADR_REGION3_LBN 96
+#define        FRF_AZ_ADR_REGION3_WIDTH 18
+#define        FRF_AZ_ADR_REGION2_LBN 64
+#define        FRF_AZ_ADR_REGION2_WIDTH 18
+#define        FRF_AZ_ADR_REGION1_LBN 32
+#define        FRF_AZ_ADR_REGION1_WIDTH 18
+#define        FRF_AZ_ADR_REGION0_LBN 0
+#define        FRF_AZ_ADR_REGION0_WIDTH 18
+
+/* INT_EN_REG_KER: Kernel driver Interrupt enable register */
+#define        FR_AZ_INT_EN_KER 0x00000010
+#define        FRF_AZ_KER_INT_LEVE_SEL_LBN 8
+#define        FRF_AZ_KER_INT_LEVE_SEL_WIDTH 6
+#define        FRF_AZ_KER_INT_CHAR_LBN 4
+#define        FRF_AZ_KER_INT_CHAR_WIDTH 1
+#define        FRF_AZ_KER_INT_KER_LBN 3
+#define        FRF_AZ_KER_INT_KER_WIDTH 1
+#define        FRF_AZ_DRV_INT_EN_KER_LBN 0
+#define        FRF_AZ_DRV_INT_EN_KER_WIDTH 1
+
+/* INT_EN_REG_CHAR: Char Driver interrupt enable register */
+#define        FR_BZ_INT_EN_CHAR 0x00000020
+#define        FRF_BZ_CHAR_INT_LEVE_SEL_LBN 8
+#define        FRF_BZ_CHAR_INT_LEVE_SEL_WIDTH 6
+#define        FRF_BZ_CHAR_INT_CHAR_LBN 4
+#define        FRF_BZ_CHAR_INT_CHAR_WIDTH 1
+#define        FRF_BZ_CHAR_INT_KER_LBN 3
+#define        FRF_BZ_CHAR_INT_KER_WIDTH 1
+#define        FRF_BZ_DRV_INT_EN_CHAR_LBN 0
+#define        FRF_BZ_DRV_INT_EN_CHAR_WIDTH 1
+
+/* INT_ADR_REG_KER: Interrupt host address for Kernel driver */
+#define        FR_AZ_INT_ADR_KER 0x00000030
+#define        FRF_AZ_NORM_INT_VEC_DIS_KER_LBN 64
+#define        FRF_AZ_NORM_INT_VEC_DIS_KER_WIDTH 1
+#define        FRF_AZ_INT_ADR_KER_LBN 0
+#define        FRF_AZ_INT_ADR_KER_WIDTH 64
+
+/* INT_ADR_REG_CHAR: Interrupt host address for Char driver */
+#define        FR_BZ_INT_ADR_CHAR 0x00000040
+#define        FRF_BZ_NORM_INT_VEC_DIS_CHAR_LBN 64
+#define        FRF_BZ_NORM_INT_VEC_DIS_CHAR_WIDTH 1
+#define        FRF_BZ_INT_ADR_CHAR_LBN 0
+#define        FRF_BZ_INT_ADR_CHAR_WIDTH 64
+
+/* INT_ACK_KER: Kernel interrupt acknowledge register */
+#define        FR_AA_INT_ACK_KER 0x00000050
+#define        FRF_AA_INT_ACK_KER_FIELD_LBN 0
+#define        FRF_AA_INT_ACK_KER_FIELD_WIDTH 32
+
+/* INT_ISR0_REG: Function 0 Interrupt Acknowledge Status register */
+#define        FR_BZ_INT_ISR0 0x00000090
+#define        FRF_BZ_INT_ISR_REG_LBN 0
+#define        FRF_BZ_INT_ISR_REG_WIDTH 64
+
+/* HW_INIT_REG: Hardware initialization register */
+#define        FR_AZ_HW_INIT 0x000000c0
+#define        FRF_BB_BDMRD_CPLF_FULL_LBN 124
+#define        FRF_BB_BDMRD_CPLF_FULL_WIDTH 1
+#define        FRF_BB_PCIE_CPL_TIMEOUT_CTRL_LBN 121
+#define        FRF_BB_PCIE_CPL_TIMEOUT_CTRL_WIDTH 3
+#define        FRF_CZ_TX_MRG_TAGS_LBN 120
+#define        FRF_CZ_TX_MRG_TAGS_WIDTH 1
+#define        FRF_AB_TRGT_MASK_ALL_LBN 100
+#define        FRF_AB_TRGT_MASK_ALL_WIDTH 1
+#define        FRF_AZ_DOORBELL_DROP_LBN 92
+#define        FRF_AZ_DOORBELL_DROP_WIDTH 8
+#define        FRF_AB_TX_RREQ_MASK_EN_LBN 76
+#define        FRF_AB_TX_RREQ_MASK_EN_WIDTH 1
+#define        FRF_AB_PE_EIDLE_DIS_LBN 75
+#define        FRF_AB_PE_EIDLE_DIS_WIDTH 1
+#define        FRF_AA_FC_BLOCKING_EN_LBN 45
+#define        FRF_AA_FC_BLOCKING_EN_WIDTH 1
+#define        FRF_BZ_B2B_REQ_EN_LBN 45
+#define        FRF_BZ_B2B_REQ_EN_WIDTH 1
+#define        FRF_AA_B2B_REQ_EN_LBN 44
+#define        FRF_AA_B2B_REQ_EN_WIDTH 1
+#define        FRF_BB_FC_BLOCKING_EN_LBN 44
+#define        FRF_BB_FC_BLOCKING_EN_WIDTH 1
+#define        FRF_AZ_POST_WR_MASK_LBN 40
+#define        FRF_AZ_POST_WR_MASK_WIDTH 4
+#define        FRF_AZ_TLP_TC_LBN 34
+#define        FRF_AZ_TLP_TC_WIDTH 3
+#define        FRF_AZ_TLP_ATTR_LBN 32
+#define        FRF_AZ_TLP_ATTR_WIDTH 2
+#define        FRF_AB_INTB_VEC_LBN 24
+#define        FRF_AB_INTB_VEC_WIDTH 5
+#define        FRF_AB_INTA_VEC_LBN 16
+#define        FRF_AB_INTA_VEC_WIDTH 5
+#define        FRF_AZ_WD_TIMER_LBN 8
+#define        FRF_AZ_WD_TIMER_WIDTH 8
+#define        FRF_AZ_US_DISABLE_LBN 5
+#define        FRF_AZ_US_DISABLE_WIDTH 1
+#define        FRF_AZ_TLP_EP_LBN 4
+#define        FRF_AZ_TLP_EP_WIDTH 1
+#define        FRF_AZ_ATTR_SEL_LBN 3
+#define        FRF_AZ_ATTR_SEL_WIDTH 1
+#define        FRF_AZ_TD_SEL_LBN 1
+#define        FRF_AZ_TD_SEL_WIDTH 1
+#define        FRF_AZ_TLP_TD_LBN 0
+#define        FRF_AZ_TLP_TD_WIDTH 1
+
+/* EE_SPI_HCMD_REG: SPI host command register */
+#define        FR_AB_EE_SPI_HCMD 0x00000100
+#define        FRF_AB_EE_SPI_HCMD_CMD_EN_LBN 31
+#define        FRF_AB_EE_SPI_HCMD_CMD_EN_WIDTH 1
+#define        FRF_AB_EE_WR_TIMER_ACTIVE_LBN 28
+#define        FRF_AB_EE_WR_TIMER_ACTIVE_WIDTH 1
+#define        FRF_AB_EE_SPI_HCMD_SF_SEL_LBN 24
+#define        FRF_AB_EE_SPI_HCMD_SF_SEL_WIDTH 1
+#define        FRF_AB_EE_SPI_HCMD_DABCNT_LBN 16
+#define        FRF_AB_EE_SPI_HCMD_DABCNT_WIDTH 5
+#define        FRF_AB_EE_SPI_HCMD_READ_LBN 15
+#define        FRF_AB_EE_SPI_HCMD_READ_WIDTH 1
+#define        FRF_AB_EE_SPI_HCMD_DUBCNT_LBN 12
+#define        FRF_AB_EE_SPI_HCMD_DUBCNT_WIDTH 2
+#define        FRF_AB_EE_SPI_HCMD_ADBCNT_LBN 8
+#define        FRF_AB_EE_SPI_HCMD_ADBCNT_WIDTH 2
+#define        FRF_AB_EE_SPI_HCMD_ENC_LBN 0
+#define        FRF_AB_EE_SPI_HCMD_ENC_WIDTH 8
+
+/* USR_EV_CFG: User Level Event Configuration register */
+#define        FR_CZ_USR_EV_CFG 0x00000100
+#define        FRF_CZ_USREV_DIS_LBN 16
+#define        FRF_CZ_USREV_DIS_WIDTH 1
+#define        FRF_CZ_DFLT_EVQ_LBN 0
+#define        FRF_CZ_DFLT_EVQ_WIDTH 10
+
+/* EE_SPI_HADR_REG: SPI host address register */
+#define        FR_AB_EE_SPI_HADR 0x00000110
+#define        FRF_AB_EE_SPI_HADR_DUBYTE_LBN 24
+#define        FRF_AB_EE_SPI_HADR_DUBYTE_WIDTH 8
+#define        FRF_AB_EE_SPI_HADR_ADR_LBN 0
+#define        FRF_AB_EE_SPI_HADR_ADR_WIDTH 24
+
+/* EE_SPI_HDATA_REG: SPI host data register */
+#define        FR_AB_EE_SPI_HDATA 0x00000120
+#define        FRF_AB_EE_SPI_HDATA3_LBN 96
+#define        FRF_AB_EE_SPI_HDATA3_WIDTH 32
+#define        FRF_AB_EE_SPI_HDATA2_LBN 64
+#define        FRF_AB_EE_SPI_HDATA2_WIDTH 32
+#define        FRF_AB_EE_SPI_HDATA1_LBN 32
+#define        FRF_AB_EE_SPI_HDATA1_WIDTH 32
+#define        FRF_AB_EE_SPI_HDATA0_LBN 0
+#define        FRF_AB_EE_SPI_HDATA0_WIDTH 32
+
+/* EE_BASE_PAGE_REG: Expansion ROM base mirror register */
+#define        FR_AB_EE_BASE_PAGE 0x00000130
+#define        FRF_AB_EE_EXPROM_MASK_LBN 16
+#define        FRF_AB_EE_EXPROM_MASK_WIDTH 13
+#define        FRF_AB_EE_EXP_ROM_WINDOW_BASE_LBN 0
+#define        FRF_AB_EE_EXP_ROM_WINDOW_BASE_WIDTH 13
+
+/* EE_VPD_CFG0_REG: SPI/VPD configuration register 0 */
+#define        FR_AB_EE_VPD_CFG0 0x00000140
+#define        FRF_AB_EE_SF_FASTRD_EN_LBN 127
+#define        FRF_AB_EE_SF_FASTRD_EN_WIDTH 1
+#define        FRF_AB_EE_SF_CLOCK_DIV_LBN 120
+#define        FRF_AB_EE_SF_CLOCK_DIV_WIDTH 7
+#define        FRF_AB_EE_VPD_WIP_POLL_LBN 119
+#define        FRF_AB_EE_VPD_WIP_POLL_WIDTH 1
+#define        FRF_AB_EE_EE_CLOCK_DIV_LBN 112
+#define        FRF_AB_EE_EE_CLOCK_DIV_WIDTH 7
+#define        FRF_AB_EE_EE_WR_TMR_VALUE_LBN 96
+#define        FRF_AB_EE_EE_WR_TMR_VALUE_WIDTH 16
+#define        FRF_AB_EE_VPDW_LENGTH_LBN 80
+#define        FRF_AB_EE_VPDW_LENGTH_WIDTH 15
+#define        FRF_AB_EE_VPDW_BASE_LBN 64
+#define        FRF_AB_EE_VPDW_BASE_WIDTH 15
+#define        FRF_AB_EE_VPD_WR_CMD_EN_LBN 56
+#define        FRF_AB_EE_VPD_WR_CMD_EN_WIDTH 8
+#define        FRF_AB_EE_VPD_BASE_LBN 32
+#define        FRF_AB_EE_VPD_BASE_WIDTH 24
+#define        FRF_AB_EE_VPD_LENGTH_LBN 16
+#define        FRF_AB_EE_VPD_LENGTH_WIDTH 15
+#define        FRF_AB_EE_VPD_AD_SIZE_LBN 8
+#define        FRF_AB_EE_VPD_AD_SIZE_WIDTH 5
+#define        FRF_AB_EE_VPD_ACCESS_ON_LBN 5
+#define        FRF_AB_EE_VPD_ACCESS_ON_WIDTH 1
+#define        FRF_AB_EE_VPD_ACCESS_BLOCK_LBN 4
+#define        FRF_AB_EE_VPD_ACCESS_BLOCK_WIDTH 1
+#define        FRF_AB_EE_VPD_DEV_SF_SEL_LBN 2
+#define        FRF_AB_EE_VPD_DEV_SF_SEL_WIDTH 1
+#define        FRF_AB_EE_VPD_EN_AD9_MODE_LBN 1
+#define        FRF_AB_EE_VPD_EN_AD9_MODE_WIDTH 1
+#define        FRF_AB_EE_VPD_EN_LBN 0
+#define        FRF_AB_EE_VPD_EN_WIDTH 1
+
+/* EE_VPD_SW_CNTL_REG: VPD access SW control register */
+#define        FR_AB_EE_VPD_SW_CNTL 0x00000150
+#define        FRF_AB_EE_VPD_CYCLE_PENDING_LBN 31
+#define        FRF_AB_EE_VPD_CYCLE_PENDING_WIDTH 1
+#define        FRF_AB_EE_VPD_CYC_WRITE_LBN 28
+#define        FRF_AB_EE_VPD_CYC_WRITE_WIDTH 1
+#define        FRF_AB_EE_VPD_CYC_ADR_LBN 0
+#define        FRF_AB_EE_VPD_CYC_ADR_WIDTH 15
+
+/* EE_VPD_SW_DATA_REG: VPD access SW data register */
+#define        FR_AB_EE_VPD_SW_DATA 0x00000160
+#define        FRF_AB_EE_VPD_CYC_DAT_LBN 0
+#define        FRF_AB_EE_VPD_CYC_DAT_WIDTH 32
+
+/* PBMX_DBG_IADDR_REG: Capture Module address register */
+#define        FR_CZ_PBMX_DBG_IADDR 0x000001f0
+#define        FRF_CZ_PBMX_DBG_IADDR_LBN 0
+#define        FRF_CZ_PBMX_DBG_IADDR_WIDTH 32
+
+/* PCIE_CORE_INDIRECT_REG: Indirect Access to PCIE Core registers */
+#define        FR_BB_PCIE_CORE_INDIRECT 0x000001f0
+#define        FRF_BB_PCIE_CORE_TARGET_DATA_LBN 32
+#define        FRF_BB_PCIE_CORE_TARGET_DATA_WIDTH 32
+#define        FRF_BB_PCIE_CORE_INDIRECT_ACCESS_DIR_LBN 15
+#define        FRF_BB_PCIE_CORE_INDIRECT_ACCESS_DIR_WIDTH 1
+#define        FRF_BB_PCIE_CORE_TARGET_REG_ADRS_LBN 0
+#define        FRF_BB_PCIE_CORE_TARGET_REG_ADRS_WIDTH 12
+
+/* PBMX_DBG_IDATA_REG: Capture Module data register */
+#define        FR_CZ_PBMX_DBG_IDATA 0x000001f8
+#define        FRF_CZ_PBMX_DBG_IDATA_LBN 0
+#define        FRF_CZ_PBMX_DBG_IDATA_WIDTH 64
+
+/* NIC_STAT_REG: NIC status register */
+#define        FR_AB_NIC_STAT 0x00000200
+#define        FRF_BB_AER_DIS_LBN 34
+#define        FRF_BB_AER_DIS_WIDTH 1
+#define        FRF_BB_EE_STRAP_EN_LBN 31
+#define        FRF_BB_EE_STRAP_EN_WIDTH 1
+#define        FRF_BB_EE_STRAP_LBN 24
+#define        FRF_BB_EE_STRAP_WIDTH 4
+#define        FRF_BB_REVISION_ID_LBN 17
+#define        FRF_BB_REVISION_ID_WIDTH 7
+#define        FRF_AB_ONCHIP_SRAM_LBN 16
+#define        FRF_AB_ONCHIP_SRAM_WIDTH 1
+#define        FRF_AB_SF_PRST_LBN 9
+#define        FRF_AB_SF_PRST_WIDTH 1
+#define        FRF_AB_EE_PRST_LBN 8
+#define        FRF_AB_EE_PRST_WIDTH 1
+#define        FRF_AB_ATE_MODE_LBN 3
+#define        FRF_AB_ATE_MODE_WIDTH 1
+#define        FRF_AB_STRAP_PINS_LBN 0
+#define        FRF_AB_STRAP_PINS_WIDTH 3
+
+/* GPIO_CTL_REG: GPIO control register */
+#define        FR_AB_GPIO_CTL 0x00000210
+#define        FRF_AB_GPIO_OUT3_LBN 112
+#define        FRF_AB_GPIO_OUT3_WIDTH 16
+#define        FRF_AB_GPIO_IN3_LBN 104
+#define        FRF_AB_GPIO_IN3_WIDTH 8
+#define        FRF_AB_GPIO_PWRUP_VALUE3_LBN 96
+#define        FRF_AB_GPIO_PWRUP_VALUE3_WIDTH 8
+#define        FRF_AB_GPIO_OUT2_LBN 80
+#define        FRF_AB_GPIO_OUT2_WIDTH 16
+#define        FRF_AB_GPIO_IN2_LBN 72
+#define        FRF_AB_GPIO_IN2_WIDTH 8
+#define        FRF_AB_GPIO_PWRUP_VALUE2_LBN 64
+#define        FRF_AB_GPIO_PWRUP_VALUE2_WIDTH 8
+#define        FRF_AB_GPIO15_OEN_LBN 63
+#define        FRF_AB_GPIO15_OEN_WIDTH 1
+#define        FRF_AB_GPIO14_OEN_LBN 62
+#define        FRF_AB_GPIO14_OEN_WIDTH 1
+#define        FRF_AB_GPIO13_OEN_LBN 61
+#define        FRF_AB_GPIO13_OEN_WIDTH 1
+#define        FRF_AB_GPIO12_OEN_LBN 60
+#define        FRF_AB_GPIO12_OEN_WIDTH 1
+#define        FRF_AB_GPIO11_OEN_LBN 59
+#define        FRF_AB_GPIO11_OEN_WIDTH 1
+#define        FRF_AB_GPIO10_OEN_LBN 58
+#define        FRF_AB_GPIO10_OEN_WIDTH 1
+#define        FRF_AB_GPIO9_OEN_LBN 57
+#define        FRF_AB_GPIO9_OEN_WIDTH 1
+#define        FRF_AB_GPIO8_OEN_LBN 56
+#define        FRF_AB_GPIO8_OEN_WIDTH 1
+#define        FRF_AB_GPIO15_OUT_LBN 55
+#define        FRF_AB_GPIO15_OUT_WIDTH 1
+#define        FRF_AB_GPIO14_OUT_LBN 54
+#define        FRF_AB_GPIO14_OUT_WIDTH 1
+#define        FRF_AB_GPIO13_OUT_LBN 53
+#define        FRF_AB_GPIO13_OUT_WIDTH 1
+#define        FRF_AB_GPIO12_OUT_LBN 52
+#define        FRF_AB_GPIO12_OUT_WIDTH 1
+#define        FRF_AB_GPIO11_OUT_LBN 51
+#define        FRF_AB_GPIO11_OUT_WIDTH 1
+#define        FRF_AB_GPIO10_OUT_LBN 50
+#define        FRF_AB_GPIO10_OUT_WIDTH 1
+#define        FRF_AB_GPIO9_OUT_LBN 49
+#define        FRF_AB_GPIO9_OUT_WIDTH 1
+#define        FRF_AB_GPIO8_OUT_LBN 48
+#define        FRF_AB_GPIO8_OUT_WIDTH 1
+#define        FRF_AB_GPIO15_IN_LBN 47
+#define        FRF_AB_GPIO15_IN_WIDTH 1
+#define        FRF_AB_GPIO14_IN_LBN 46
+#define        FRF_AB_GPIO14_IN_WIDTH 1
+#define        FRF_AB_GPIO13_IN_LBN 45
+#define        FRF_AB_GPIO13_IN_WIDTH 1
+#define        FRF_AB_GPIO12_IN_LBN 44
+#define        FRF_AB_GPIO12_IN_WIDTH 1
+#define        FRF_AB_GPIO11_IN_LBN 43
+#define        FRF_AB_GPIO11_IN_WIDTH 1
+#define        FRF_AB_GPIO10_IN_LBN 42
+#define        FRF_AB_GPIO10_IN_WIDTH 1
+#define        FRF_AB_GPIO9_IN_LBN 41
+#define        FRF_AB_GPIO9_IN_WIDTH 1
+#define        FRF_AB_GPIO8_IN_LBN 40
+#define        FRF_AB_GPIO8_IN_WIDTH 1
+#define        FRF_AB_GPIO15_PWRUP_VALUE_LBN 39
+#define        FRF_AB_GPIO15_PWRUP_VALUE_WIDTH 1
+#define        FRF_AB_GPIO14_PWRUP_VALUE_LBN 38
+#define        FRF_AB_GPIO14_PWRUP_VALUE_WIDTH 1
+#define        FRF_AB_GPIO13_PWRUP_VALUE_LBN 37
+#define        FRF_AB_GPIO13_PWRUP_VALUE_WIDTH 1
+#define        FRF_AB_GPIO12_PWRUP_VALUE_LBN 36
+#define        FRF_AB_GPIO12_PWRUP_VALUE_WIDTH 1
+#define        FRF_AB_GPIO11_PWRUP_VALUE_LBN 35
+#define        FRF_AB_GPIO11_PWRUP_VALUE_WIDTH 1
+#define        FRF_AB_GPIO10_PWRUP_VALUE_LBN 34
+#define        FRF_AB_GPIO10_PWRUP_VALUE_WIDTH 1
+#define        FRF_AB_GPIO9_PWRUP_VALUE_LBN 33
+#define        FRF_AB_GPIO9_PWRUP_VALUE_WIDTH 1
+#define        FRF_AB_GPIO8_PWRUP_VALUE_LBN 32
+#define        FRF_AB_GPIO8_PWRUP_VALUE_WIDTH 1
+#define        FRF_AB_CLK156_OUT_EN_LBN 31
+#define        FRF_AB_CLK156_OUT_EN_WIDTH 1
+#define        FRF_AB_USE_NIC_CLK_LBN 30
+#define        FRF_AB_USE_NIC_CLK_WIDTH 1
+#define        FRF_AB_GPIO5_OEN_LBN 29
+#define        FRF_AB_GPIO5_OEN_WIDTH 1
+#define        FRF_AB_GPIO4_OEN_LBN 28
+#define        FRF_AB_GPIO4_OEN_WIDTH 1
+#define        FRF_AB_GPIO3_OEN_LBN 27
+#define        FRF_AB_GPIO3_OEN_WIDTH 1
+#define        FRF_AB_GPIO2_OEN_LBN 26
+#define        FRF_AB_GPIO2_OEN_WIDTH 1
+#define        FRF_AB_GPIO1_OEN_LBN 25
+#define        FRF_AB_GPIO1_OEN_WIDTH 1
+#define        FRF_AB_GPIO0_OEN_LBN 24
+#define        FRF_AB_GPIO0_OEN_WIDTH 1
+#define        FRF_AB_GPIO7_OUT_LBN 23
+#define        FRF_AB_GPIO7_OUT_WIDTH 1
+#define        FRF_AB_GPIO6_OUT_LBN 22
+#define        FRF_AB_GPIO6_OUT_WIDTH 1
+#define        FRF_AB_GPIO5_OUT_LBN 21
+#define        FRF_AB_GPIO5_OUT_WIDTH 1
+#define        FRF_AB_GPIO4_OUT_LBN 20
+#define        FRF_AB_GPIO4_OUT_WIDTH 1
+#define        FRF_AB_GPIO3_OUT_LBN 19
+#define        FRF_AB_GPIO3_OUT_WIDTH 1
+#define        FRF_AB_GPIO2_OUT_LBN 18
+#define        FRF_AB_GPIO2_OUT_WIDTH 1
+#define        FRF_AB_GPIO1_OUT_LBN 17
+#define        FRF_AB_GPIO1_OUT_WIDTH 1
+#define        FRF_AB_GPIO0_OUT_LBN 16
+#define        FRF_AB_GPIO0_OUT_WIDTH 1
+#define        FRF_AB_GPIO7_IN_LBN 15
+#define        FRF_AB_GPIO7_IN_WIDTH 1
+#define        FRF_AB_GPIO6_IN_LBN 14
+#define        FRF_AB_GPIO6_IN_WIDTH 1
+#define        FRF_AB_GPIO5_IN_LBN 13
+#define        FRF_AB_GPIO5_IN_WIDTH 1
+#define        FRF_AB_GPIO4_IN_LBN 12
+#define        FRF_AB_GPIO4_IN_WIDTH 1
+#define        FRF_AB_GPIO3_IN_LBN 11
+#define        FRF_AB_GPIO3_IN_WIDTH 1
+#define        FRF_AB_GPIO2_IN_LBN 10
+#define        FRF_AB_GPIO2_IN_WIDTH 1
+#define        FRF_AB_GPIO1_IN_LBN 9
+#define        FRF_AB_GPIO1_IN_WIDTH 1
+#define        FRF_AB_GPIO0_IN_LBN 8
+#define        FRF_AB_GPIO0_IN_WIDTH 1
+#define        FRF_AB_GPIO7_PWRUP_VALUE_LBN 7
+#define        FRF_AB_GPIO7_PWRUP_VALUE_WIDTH 1
+#define        FRF_AB_GPIO6_PWRUP_VALUE_LBN 6
+#define        FRF_AB_GPIO6_PWRUP_VALUE_WIDTH 1
+#define        FRF_AB_GPIO5_PWRUP_VALUE_LBN 5
+#define        FRF_AB_GPIO5_PWRUP_VALUE_WIDTH 1
+#define        FRF_AB_GPIO4_PWRUP_VALUE_LBN 4
+#define        FRF_AB_GPIO4_PWRUP_VALUE_WIDTH 1
+#define        FRF_AB_GPIO3_PWRUP_VALUE_LBN 3
+#define        FRF_AB_GPIO3_PWRUP_VALUE_WIDTH 1
+#define        FRF_AB_GPIO2_PWRUP_VALUE_LBN 2
+#define        FRF_AB_GPIO2_PWRUP_VALUE_WIDTH 1
+#define        FRF_AB_GPIO1_PWRUP_VALUE_LBN 1
+#define        FRF_AB_GPIO1_PWRUP_VALUE_WIDTH 1
+#define        FRF_AB_GPIO0_PWRUP_VALUE_LBN 0
+#define        FRF_AB_GPIO0_PWRUP_VALUE_WIDTH 1
+
+/* GLB_CTL_REG: Global control register */
+#define        FR_AB_GLB_CTL 0x00000220
+#define        FRF_AB_EXT_PHY_RST_CTL_LBN 63
+#define        FRF_AB_EXT_PHY_RST_CTL_WIDTH 1
+#define        FRF_AB_XAUI_SD_RST_CTL_LBN 62
+#define        FRF_AB_XAUI_SD_RST_CTL_WIDTH 1
+#define        FRF_AB_PCIE_SD_RST_CTL_LBN 61
+#define        FRF_AB_PCIE_SD_RST_CTL_WIDTH 1
+#define        FRF_AA_PCIX_RST_CTL_LBN 60
+#define        FRF_AA_PCIX_RST_CTL_WIDTH 1
+#define        FRF_BB_BIU_RST_CTL_LBN 60
+#define        FRF_BB_BIU_RST_CTL_WIDTH 1
+#define        FRF_AB_PCIE_STKY_RST_CTL_LBN 59
+#define        FRF_AB_PCIE_STKY_RST_CTL_WIDTH 1
+#define        FRF_AB_PCIE_NSTKY_RST_CTL_LBN 58
+#define        FRF_AB_PCIE_NSTKY_RST_CTL_WIDTH 1
+#define        FRF_AB_PCIE_CORE_RST_CTL_LBN 57
+#define        FRF_AB_PCIE_CORE_RST_CTL_WIDTH 1
+#define        FRF_AB_XGRX_RST_CTL_LBN 56
+#define        FRF_AB_XGRX_RST_CTL_WIDTH 1
+#define        FRF_AB_XGTX_RST_CTL_LBN 55
+#define        FRF_AB_XGTX_RST_CTL_WIDTH 1
+#define        FRF_AB_EM_RST_CTL_LBN 54
+#define        FRF_AB_EM_RST_CTL_WIDTH 1
+#define        FRF_AB_EV_RST_CTL_LBN 53
+#define        FRF_AB_EV_RST_CTL_WIDTH 1
+#define        FRF_AB_SR_RST_CTL_LBN 52
+#define        FRF_AB_SR_RST_CTL_WIDTH 1
+#define        FRF_AB_RX_RST_CTL_LBN 51
+#define        FRF_AB_RX_RST_CTL_WIDTH 1
+#define        FRF_AB_TX_RST_CTL_LBN 50
+#define        FRF_AB_TX_RST_CTL_WIDTH 1
+#define        FRF_AB_EE_RST_CTL_LBN 49
+#define        FRF_AB_EE_RST_CTL_WIDTH 1
+#define        FRF_AB_CS_RST_CTL_LBN 48
+#define        FRF_AB_CS_RST_CTL_WIDTH 1
+#define        FRF_AB_HOT_RST_CTL_LBN 40
+#define        FRF_AB_HOT_RST_CTL_WIDTH 2
+#define        FRF_AB_RST_EXT_PHY_LBN 31
+#define        FRF_AB_RST_EXT_PHY_WIDTH 1
+#define        FRF_AB_RST_XAUI_SD_LBN 30
+#define        FRF_AB_RST_XAUI_SD_WIDTH 1
+#define        FRF_AB_RST_PCIE_SD_LBN 29
+#define        FRF_AB_RST_PCIE_SD_WIDTH 1
+#define        FRF_AA_RST_PCIX_LBN 28
+#define        FRF_AA_RST_PCIX_WIDTH 1
+#define        FRF_BB_RST_BIU_LBN 28
+#define        FRF_BB_RST_BIU_WIDTH 1
+#define        FRF_AB_RST_PCIE_STKY_LBN 27
+#define        FRF_AB_RST_PCIE_STKY_WIDTH 1
+#define        FRF_AB_RST_PCIE_NSTKY_LBN 26
+#define        FRF_AB_RST_PCIE_NSTKY_WIDTH 1
+#define        FRF_AB_RST_PCIE_CORE_LBN 25
+#define        FRF_AB_RST_PCIE_CORE_WIDTH 1
+#define        FRF_AB_RST_XGRX_LBN 24
+#define        FRF_AB_RST_XGRX_WIDTH 1
+#define        FRF_AB_RST_XGTX_LBN 23
+#define        FRF_AB_RST_XGTX_WIDTH 1
+#define        FRF_AB_RST_EM_LBN 22
+#define        FRF_AB_RST_EM_WIDTH 1
+#define        FRF_AB_RST_EV_LBN 21
+#define        FRF_AB_RST_EV_WIDTH 1
+#define        FRF_AB_RST_SR_LBN 20
+#define        FRF_AB_RST_SR_WIDTH 1
+#define        FRF_AB_RST_RX_LBN 19
+#define        FRF_AB_RST_RX_WIDTH 1
+#define        FRF_AB_RST_TX_LBN 18
+#define        FRF_AB_RST_TX_WIDTH 1
+#define        FRF_AB_RST_SF_LBN 17
+#define        FRF_AB_RST_SF_WIDTH 1
+#define        FRF_AB_RST_CS_LBN 16
+#define        FRF_AB_RST_CS_WIDTH 1
+#define        FRF_AB_INT_RST_DUR_LBN 4
+#define        FRF_AB_INT_RST_DUR_WIDTH 3
+#define        FRF_AB_EXT_PHY_RST_DUR_LBN 1
+#define        FRF_AB_EXT_PHY_RST_DUR_WIDTH 3
+#define        FFE_AB_EXT_PHY_RST_DUR_10240US 7
+#define        FFE_AB_EXT_PHY_RST_DUR_5120US 6
+#define        FFE_AB_EXT_PHY_RST_DUR_2560US 5
+#define        FFE_AB_EXT_PHY_RST_DUR_1280US 4
+#define        FFE_AB_EXT_PHY_RST_DUR_640US 3
+#define        FFE_AB_EXT_PHY_RST_DUR_320US 2
+#define        FFE_AB_EXT_PHY_RST_DUR_160US 1
+#define        FFE_AB_EXT_PHY_RST_DUR_80US 0
+#define        FRF_AB_SWRST_LBN 0
+#define        FRF_AB_SWRST_WIDTH 1
+
+/* FATAL_INTR_REG_KER: Fatal interrupt register for Kernel */
+#define        FR_AZ_FATAL_INTR_KER 0x00000230
+#define        FRF_CZ_SRAM_PERR_INT_P_KER_EN_LBN 44
+#define        FRF_CZ_SRAM_PERR_INT_P_KER_EN_WIDTH 1
+#define        FRF_AB_PCI_BUSERR_INT_KER_EN_LBN 43
+#define        FRF_AB_PCI_BUSERR_INT_KER_EN_WIDTH 1
+#define        FRF_CZ_MBU_PERR_INT_KER_EN_LBN 43
+#define        FRF_CZ_MBU_PERR_INT_KER_EN_WIDTH 1
+#define        FRF_AZ_SRAM_OOB_INT_KER_EN_LBN 42
+#define        FRF_AZ_SRAM_OOB_INT_KER_EN_WIDTH 1
+#define        FRF_AZ_BUFID_OOB_INT_KER_EN_LBN 41
+#define        FRF_AZ_BUFID_OOB_INT_KER_EN_WIDTH 1
+#define        FRF_AZ_MEM_PERR_INT_KER_EN_LBN 40
+#define        FRF_AZ_MEM_PERR_INT_KER_EN_WIDTH 1
+#define        FRF_AZ_RBUF_OWN_INT_KER_EN_LBN 39
+#define        FRF_AZ_RBUF_OWN_INT_KER_EN_WIDTH 1
+#define        FRF_AZ_TBUF_OWN_INT_KER_EN_LBN 38
+#define        FRF_AZ_TBUF_OWN_INT_KER_EN_WIDTH 1
+#define        FRF_AZ_RDESCQ_OWN_INT_KER_EN_LBN 37
+#define        FRF_AZ_RDESCQ_OWN_INT_KER_EN_WIDTH 1
+#define        FRF_AZ_TDESCQ_OWN_INT_KER_EN_LBN 36
+#define        FRF_AZ_TDESCQ_OWN_INT_KER_EN_WIDTH 1
+#define        FRF_AZ_EVQ_OWN_INT_KER_EN_LBN 35
+#define        FRF_AZ_EVQ_OWN_INT_KER_EN_WIDTH 1
+#define        FRF_AZ_EVF_OFLO_INT_KER_EN_LBN 34
+#define        FRF_AZ_EVF_OFLO_INT_KER_EN_WIDTH 1
+#define        FRF_AZ_ILL_ADR_INT_KER_EN_LBN 33
+#define        FRF_AZ_ILL_ADR_INT_KER_EN_WIDTH 1
+#define        FRF_AZ_SRM_PERR_INT_KER_EN_LBN 32
+#define        FRF_AZ_SRM_PERR_INT_KER_EN_WIDTH 1
+#define        FRF_CZ_SRAM_PERR_INT_P_KER_LBN 12
+#define        FRF_CZ_SRAM_PERR_INT_P_KER_WIDTH 1
+#define        FRF_AB_PCI_BUSERR_INT_KER_LBN 11
+#define        FRF_AB_PCI_BUSERR_INT_KER_WIDTH 1
+#define        FRF_CZ_MBU_PERR_INT_KER_LBN 11
+#define        FRF_CZ_MBU_PERR_INT_KER_WIDTH 1
+#define        FRF_AZ_SRAM_OOB_INT_KER_LBN 10
+#define        FRF_AZ_SRAM_OOB_INT_KER_WIDTH 1
+#define        FRF_AZ_BUFID_DC_OOB_INT_KER_LBN 9
+#define        FRF_AZ_BUFID_DC_OOB_INT_KER_WIDTH 1
+#define        FRF_AZ_MEM_PERR_INT_KER_LBN 8
+#define        FRF_AZ_MEM_PERR_INT_KER_WIDTH 1
+#define        FRF_AZ_RBUF_OWN_INT_KER_LBN 7
+#define        FRF_AZ_RBUF_OWN_INT_KER_WIDTH 1
+#define        FRF_AZ_TBUF_OWN_INT_KER_LBN 6
+#define        FRF_AZ_TBUF_OWN_INT_KER_WIDTH 1
+#define        FRF_AZ_RDESCQ_OWN_INT_KER_LBN 5
+#define        FRF_AZ_RDESCQ_OWN_INT_KER_WIDTH 1
+#define        FRF_AZ_TDESCQ_OWN_INT_KER_LBN 4
+#define        FRF_AZ_TDESCQ_OWN_INT_KER_WIDTH 1
+#define        FRF_AZ_EVQ_OWN_INT_KER_LBN 3
+#define        FRF_AZ_EVQ_OWN_INT_KER_WIDTH 1
+#define        FRF_AZ_EVF_OFLO_INT_KER_LBN 2
+#define        FRF_AZ_EVF_OFLO_INT_KER_WIDTH 1
+#define        FRF_AZ_ILL_ADR_INT_KER_LBN 1
+#define        FRF_AZ_ILL_ADR_INT_KER_WIDTH 1
+#define        FRF_AZ_SRM_PERR_INT_KER_LBN 0
+#define        FRF_AZ_SRM_PERR_INT_KER_WIDTH 1
+
+/* FATAL_INTR_REG_CHAR: Fatal interrupt register for Char */
+#define        FR_BZ_FATAL_INTR_CHAR 0x00000240
+#define        FRF_CZ_SRAM_PERR_INT_P_CHAR_EN_LBN 44
+#define        FRF_CZ_SRAM_PERR_INT_P_CHAR_EN_WIDTH 1
+#define        FRF_BB_PCI_BUSERR_INT_CHAR_EN_LBN 43
+#define        FRF_BB_PCI_BUSERR_INT_CHAR_EN_WIDTH 1
+#define        FRF_CZ_MBU_PERR_INT_CHAR_EN_LBN 43
+#define        FRF_CZ_MBU_PERR_INT_CHAR_EN_WIDTH 1
+#define        FRF_BZ_SRAM_OOB_INT_CHAR_EN_LBN 42
+#define        FRF_BZ_SRAM_OOB_INT_CHAR_EN_WIDTH 1
+#define        FRF_BZ_BUFID_OOB_INT_CHAR_EN_LBN 41
+#define        FRF_BZ_BUFID_OOB_INT_CHAR_EN_WIDTH 1
+#define        FRF_BZ_MEM_PERR_INT_CHAR_EN_LBN 40
+#define        FRF_BZ_MEM_PERR_INT_CHAR_EN_WIDTH 1
+#define        FRF_BZ_RBUF_OWN_INT_CHAR_EN_LBN 39
+#define        FRF_BZ_RBUF_OWN_INT_CHAR_EN_WIDTH 1
+#define        FRF_BZ_TBUF_OWN_INT_CHAR_EN_LBN 38
+#define        FRF_BZ_TBUF_OWN_INT_CHAR_EN_WIDTH 1
+#define        FRF_BZ_RDESCQ_OWN_INT_CHAR_EN_LBN 37
+#define        FRF_BZ_RDESCQ_OWN_INT_CHAR_EN_WIDTH 1
+#define        FRF_BZ_TDESCQ_OWN_INT_CHAR_EN_LBN 36
+#define        FRF_BZ_TDESCQ_OWN_INT_CHAR_EN_WIDTH 1
+#define        FRF_BZ_EVQ_OWN_INT_CHAR_EN_LBN 35
+#define        FRF_BZ_EVQ_OWN_INT_CHAR_EN_WIDTH 1
+#define        FRF_BZ_EVF_OFLO_INT_CHAR_EN_LBN 34
+#define        FRF_BZ_EVF_OFLO_INT_CHAR_EN_WIDTH 1
+#define        FRF_BZ_ILL_ADR_INT_CHAR_EN_LBN 33
+#define        FRF_BZ_ILL_ADR_INT_CHAR_EN_WIDTH 1
+#define        FRF_BZ_SRM_PERR_INT_CHAR_EN_LBN 32
+#define        FRF_BZ_SRM_PERR_INT_CHAR_EN_WIDTH 1
+#define        FRF_CZ_SRAM_PERR_INT_P_CHAR_LBN 12
+#define        FRF_CZ_SRAM_PERR_INT_P_CHAR_WIDTH 1
+#define        FRF_BB_PCI_BUSERR_INT_CHAR_LBN 11
+#define        FRF_BB_PCI_BUSERR_INT_CHAR_WIDTH 1
+#define        FRF_CZ_MBU_PERR_INT_CHAR_LBN 11
+#define        FRF_CZ_MBU_PERR_INT_CHAR_WIDTH 1
+#define        FRF_BZ_SRAM_OOB_INT_CHAR_LBN 10
+#define        FRF_BZ_SRAM_OOB_INT_CHAR_WIDTH 1
+#define        FRF_BZ_BUFID_DC_OOB_INT_CHAR_LBN 9
+#define        FRF_BZ_BUFID_DC_OOB_INT_CHAR_WIDTH 1
+#define        FRF_BZ_MEM_PERR_INT_CHAR_LBN 8
+#define        FRF_BZ_MEM_PERR_INT_CHAR_WIDTH 1
+#define        FRF_BZ_RBUF_OWN_INT_CHAR_LBN 7
+#define        FRF_BZ_RBUF_OWN_INT_CHAR_WIDTH 1
+#define        FRF_BZ_TBUF_OWN_INT_CHAR_LBN 6
+#define        FRF_BZ_TBUF_OWN_INT_CHAR_WIDTH 1
+#define        FRF_BZ_RDESCQ_OWN_INT_CHAR_LBN 5
+#define        FRF_BZ_RDESCQ_OWN_INT_CHAR_WIDTH 1
+#define        FRF_BZ_TDESCQ_OWN_INT_CHAR_LBN 4
+#define        FRF_BZ_TDESCQ_OWN_INT_CHAR_WIDTH 1
+#define        FRF_BZ_EVQ_OWN_INT_CHAR_LBN 3
+#define        FRF_BZ_EVQ_OWN_INT_CHAR_WIDTH 1
+#define        FRF_BZ_EVF_OFLO_INT_CHAR_LBN 2
+#define        FRF_BZ_EVF_OFLO_INT_CHAR_WIDTH 1
+#define        FRF_BZ_ILL_ADR_INT_CHAR_LBN 1
+#define        FRF_BZ_ILL_ADR_INT_CHAR_WIDTH 1
+#define        FRF_BZ_SRM_PERR_INT_CHAR_LBN 0
+#define        FRF_BZ_SRM_PERR_INT_CHAR_WIDTH 1
+
+/* DP_CTRL_REG: Datapath control register */
+#define        FR_BZ_DP_CTRL 0x00000250
+#define        FRF_BZ_FLS_EVQ_ID_LBN 0
+#define        FRF_BZ_FLS_EVQ_ID_WIDTH 12
+
+/* MEM_STAT_REG: Memory status register */
+#define        FR_AZ_MEM_STAT 0x00000260
+#define        FRF_AB_MEM_PERR_VEC_LBN 53
+#define        FRF_AB_MEM_PERR_VEC_WIDTH 38
+#define        FRF_AB_MBIST_CORR_LBN 38
+#define        FRF_AB_MBIST_CORR_WIDTH 15
+#define        FRF_AB_MBIST_ERR_LBN 0
+#define        FRF_AB_MBIST_ERR_WIDTH 40
+#define        FRF_CZ_MEM_PERR_VEC_LBN 0
+#define        FRF_CZ_MEM_PERR_VEC_WIDTH 35
+
+/* CS_DEBUG_REG: Debug register */
+#define        FR_AZ_CS_DEBUG 0x00000270
+#define        FRF_AB_GLB_DEBUG2_SEL_LBN 50
+#define        FRF_AB_GLB_DEBUG2_SEL_WIDTH 3
+#define        FRF_AB_DEBUG_BLK_SEL2_LBN 47
+#define        FRF_AB_DEBUG_BLK_SEL2_WIDTH 3
+#define        FRF_AB_DEBUG_BLK_SEL1_LBN 44
+#define        FRF_AB_DEBUG_BLK_SEL1_WIDTH 3
+#define        FRF_AB_DEBUG_BLK_SEL0_LBN 41
+#define        FRF_AB_DEBUG_BLK_SEL0_WIDTH 3
+#define        FRF_CZ_CS_PORT_NUM_LBN 40
+#define        FRF_CZ_CS_PORT_NUM_WIDTH 2
+#define        FRF_AB_MISC_DEBUG_ADDR_LBN 36
+#define        FRF_AB_MISC_DEBUG_ADDR_WIDTH 5
+#define        FRF_AB_SERDES_DEBUG_ADDR_LBN 31
+#define        FRF_AB_SERDES_DEBUG_ADDR_WIDTH 5
+#define        FRF_CZ_CS_PORT_FPE_LBN 1
+#define        FRF_CZ_CS_PORT_FPE_WIDTH 35
+#define        FRF_AB_EM_DEBUG_ADDR_LBN 26
+#define        FRF_AB_EM_DEBUG_ADDR_WIDTH 5
+#define        FRF_AB_SR_DEBUG_ADDR_LBN 21
+#define        FRF_AB_SR_DEBUG_ADDR_WIDTH 5
+#define        FRF_AB_EV_DEBUG_ADDR_LBN 16
+#define        FRF_AB_EV_DEBUG_ADDR_WIDTH 5
+#define        FRF_AB_RX_DEBUG_ADDR_LBN 11
+#define        FRF_AB_RX_DEBUG_ADDR_WIDTH 5
+#define        FRF_AB_TX_DEBUG_ADDR_LBN 6
+#define        FRF_AB_TX_DEBUG_ADDR_WIDTH 5
+#define        FRF_AB_CS_BIU_DEBUG_ADDR_LBN 1
+#define        FRF_AB_CS_BIU_DEBUG_ADDR_WIDTH 5
+#define        FRF_AZ_CS_DEBUG_EN_LBN 0
+#define        FRF_AZ_CS_DEBUG_EN_WIDTH 1
+
+/* DRIVER_REG: Driver scratch register [0-7] */
+#define        FR_AZ_DRIVER 0x00000280
+#define        FR_AZ_DRIVER_STEP 16
+#define        FR_AZ_DRIVER_ROWS 8
+#define        FRF_AZ_DRIVER_DW0_LBN 0
+#define        FRF_AZ_DRIVER_DW0_WIDTH 32
+
+/* ALTERA_BUILD_REG: Altera build register */
+#define        FR_AZ_ALTERA_BUILD 0x00000300
+#define        FRF_AZ_ALTERA_BUILD_VER_LBN 0
+#define        FRF_AZ_ALTERA_BUILD_VER_WIDTH 32
+
+/* CSR_SPARE_REG: Spare register */
+#define        FR_AZ_CSR_SPARE 0x00000310
+#define        FRF_AB_MEM_PERR_EN_LBN 64
+#define        FRF_AB_MEM_PERR_EN_WIDTH 38
+#define        FRF_CZ_MEM_PERR_EN_LBN 64
+#define        FRF_CZ_MEM_PERR_EN_WIDTH 35
+#define        FRF_AB_MEM_PERR_EN_TX_DATA_LBN 72
+#define        FRF_AB_MEM_PERR_EN_TX_DATA_WIDTH 2
+#define        FRF_AZ_CSR_SPARE_BITS_LBN 0
+#define        FRF_AZ_CSR_SPARE_BITS_WIDTH 32
+
+/* PCIE_SD_CTL0123_REG: PCIE SerDes control register 0 to 3 */
+#define        FR_AB_PCIE_SD_CTL0123 0x00000320
+#define        FRF_AB_PCIE_TESTSIG_H_LBN 96
+#define        FRF_AB_PCIE_TESTSIG_H_WIDTH 19
+#define        FRF_AB_PCIE_TESTSIG_L_LBN 64
+#define        FRF_AB_PCIE_TESTSIG_L_WIDTH 19
+#define        FRF_AB_PCIE_OFFSET_LBN 56
+#define        FRF_AB_PCIE_OFFSET_WIDTH 8
+#define        FRF_AB_PCIE_OFFSETEN_H_LBN 55
+#define        FRF_AB_PCIE_OFFSETEN_H_WIDTH 1
+#define        FRF_AB_PCIE_OFFSETEN_L_LBN 54
+#define        FRF_AB_PCIE_OFFSETEN_L_WIDTH 1
+#define        FRF_AB_PCIE_HIVMODE_H_LBN 53
+#define        FRF_AB_PCIE_HIVMODE_H_WIDTH 1
+#define        FRF_AB_PCIE_HIVMODE_L_LBN 52
+#define        FRF_AB_PCIE_HIVMODE_L_WIDTH 1
+#define        FRF_AB_PCIE_PARRESET_H_LBN 51
+#define        FRF_AB_PCIE_PARRESET_H_WIDTH 1
+#define        FRF_AB_PCIE_PARRESET_L_LBN 50
+#define        FRF_AB_PCIE_PARRESET_L_WIDTH 1
+#define        FRF_AB_PCIE_LPBKWDRV_H_LBN 49
+#define        FRF_AB_PCIE_LPBKWDRV_H_WIDTH 1
+#define        FRF_AB_PCIE_LPBKWDRV_L_LBN 48
+#define        FRF_AB_PCIE_LPBKWDRV_L_WIDTH 1
+#define        FRF_AB_PCIE_LPBK_LBN 40
+#define        FRF_AB_PCIE_LPBK_WIDTH 8
+#define        FRF_AB_PCIE_PARLPBK_LBN 32
+#define        FRF_AB_PCIE_PARLPBK_WIDTH 8
+#define        FRF_AB_PCIE_RXTERMADJ_H_LBN 30
+#define        FRF_AB_PCIE_RXTERMADJ_H_WIDTH 2
+#define        FRF_AB_PCIE_RXTERMADJ_L_LBN 28
+#define        FRF_AB_PCIE_RXTERMADJ_L_WIDTH 2
+#define        FFE_AB_PCIE_RXTERMADJ_MIN15PCNT 3
+#define        FFE_AB_PCIE_RXTERMADJ_PL10PCNT 2
+#define        FFE_AB_PCIE_RXTERMADJ_MIN17PCNT 1
+#define        FFE_AB_PCIE_RXTERMADJ_NOMNL 0
+#define        FRF_AB_PCIE_TXTERMADJ_H_LBN 26
+#define        FRF_AB_PCIE_TXTERMADJ_H_WIDTH 2
+#define        FRF_AB_PCIE_TXTERMADJ_L_LBN 24
+#define        FRF_AB_PCIE_TXTERMADJ_L_WIDTH 2
+#define        FFE_AB_PCIE_TXTERMADJ_MIN15PCNT 3
+#define        FFE_AB_PCIE_TXTERMADJ_PL10PCNT 2
+#define        FFE_AB_PCIE_TXTERMADJ_MIN17PCNT 1
+#define        FFE_AB_PCIE_TXTERMADJ_NOMNL 0
+#define        FRF_AB_PCIE_RXEQCTL_H_LBN 18
+#define        FRF_AB_PCIE_RXEQCTL_H_WIDTH 2
+#define        FRF_AB_PCIE_RXEQCTL_L_LBN 16
+#define        FRF_AB_PCIE_RXEQCTL_L_WIDTH 2
+#define        FFE_AB_PCIE_RXEQCTL_OFF_ALT 3
+#define        FFE_AB_PCIE_RXEQCTL_OFF 2
+#define        FFE_AB_PCIE_RXEQCTL_MIN 1
+#define        FFE_AB_PCIE_RXEQCTL_MAX 0
+#define        FRF_AB_PCIE_HIDRV_LBN 8
+#define        FRF_AB_PCIE_HIDRV_WIDTH 8
+#define        FRF_AB_PCIE_LODRV_LBN 0
+#define        FRF_AB_PCIE_LODRV_WIDTH 8
+
+/* PCIE_SD_CTL45_REG: PCIE SerDes control register 4 and 5 */
+#define        FR_AB_PCIE_SD_CTL45 0x00000330
+#define        FRF_AB_PCIE_DTX7_LBN 60
+#define        FRF_AB_PCIE_DTX7_WIDTH 4
+#define        FRF_AB_PCIE_DTX6_LBN 56
+#define        FRF_AB_PCIE_DTX6_WIDTH 4
+#define        FRF_AB_PCIE_DTX5_LBN 52
+#define        FRF_AB_PCIE_DTX5_WIDTH 4
+#define        FRF_AB_PCIE_DTX4_LBN 48
+#define        FRF_AB_PCIE_DTX4_WIDTH 4
+#define        FRF_AB_PCIE_DTX3_LBN 44
+#define        FRF_AB_PCIE_DTX3_WIDTH 4
+#define        FRF_AB_PCIE_DTX2_LBN 40
+#define        FRF_AB_PCIE_DTX2_WIDTH 4
+#define        FRF_AB_PCIE_DTX1_LBN 36
+#define        FRF_AB_PCIE_DTX1_WIDTH 4
+#define        FRF_AB_PCIE_DTX0_LBN 32
+#define        FRF_AB_PCIE_DTX0_WIDTH 4
+#define        FRF_AB_PCIE_DEQ7_LBN 28
+#define        FRF_AB_PCIE_DEQ7_WIDTH 4
+#define        FRF_AB_PCIE_DEQ6_LBN 24
+#define        FRF_AB_PCIE_DEQ6_WIDTH 4
+#define        FRF_AB_PCIE_DEQ5_LBN 20
+#define        FRF_AB_PCIE_DEQ5_WIDTH 4
+#define        FRF_AB_PCIE_DEQ4_LBN 16
+#define        FRF_AB_PCIE_DEQ4_WIDTH 4
+#define        FRF_AB_PCIE_DEQ3_LBN 12
+#define        FRF_AB_PCIE_DEQ3_WIDTH 4
+#define        FRF_AB_PCIE_DEQ2_LBN 8
+#define        FRF_AB_PCIE_DEQ2_WIDTH 4
+#define        FRF_AB_PCIE_DEQ1_LBN 4
+#define        FRF_AB_PCIE_DEQ1_WIDTH 4
+#define        FRF_AB_PCIE_DEQ0_LBN 0
+#define        FRF_AB_PCIE_DEQ0_WIDTH 4
+
+/* PCIE_PCS_CTL_STAT_REG: PCIE PCS control and status register */
+#define        FR_AB_PCIE_PCS_CTL_STAT 0x00000340
+#define        FRF_AB_PCIE_PRBSERRCOUNT0_H_LBN 52
+#define        FRF_AB_PCIE_PRBSERRCOUNT0_H_WIDTH 4
+#define        FRF_AB_PCIE_PRBSERRCOUNT0_L_LBN 48
+#define        FRF_AB_PCIE_PRBSERRCOUNT0_L_WIDTH 4
+#define        FRF_AB_PCIE_PRBSERR_LBN 40
+#define        FRF_AB_PCIE_PRBSERR_WIDTH 8
+#define        FRF_AB_PCIE_PRBSERRH0_LBN 32
+#define        FRF_AB_PCIE_PRBSERRH0_WIDTH 8
+#define        FRF_AB_PCIE_FASTINIT_H_LBN 15
+#define        FRF_AB_PCIE_FASTINIT_H_WIDTH 1
+#define        FRF_AB_PCIE_FASTINIT_L_LBN 14
+#define        FRF_AB_PCIE_FASTINIT_L_WIDTH 1
+#define        FRF_AB_PCIE_CTCDISABLE_H_LBN 13
+#define        FRF_AB_PCIE_CTCDISABLE_H_WIDTH 1
+#define        FRF_AB_PCIE_CTCDISABLE_L_LBN 12
+#define        FRF_AB_PCIE_CTCDISABLE_L_WIDTH 1
+#define        FRF_AB_PCIE_PRBSSYNC_H_LBN 11
+#define        FRF_AB_PCIE_PRBSSYNC_H_WIDTH 1
+#define        FRF_AB_PCIE_PRBSSYNC_L_LBN 10
+#define        FRF_AB_PCIE_PRBSSYNC_L_WIDTH 1
+#define        FRF_AB_PCIE_PRBSERRACK_H_LBN 9
+#define        FRF_AB_PCIE_PRBSERRACK_H_WIDTH 1
+#define        FRF_AB_PCIE_PRBSERRACK_L_LBN 8
+#define        FRF_AB_PCIE_PRBSERRACK_L_WIDTH 1
+#define        FRF_AB_PCIE_PRBSSEL_LBN 0
+#define        FRF_AB_PCIE_PRBSSEL_WIDTH 8
+
+/* DEBUG_DATA_OUT_REG: Live Debug and Debug 2 out ports */
+#define        FR_BB_DEBUG_DATA_OUT 0x00000350
+#define        FRF_BB_DEBUG2_PORT_LBN 25
+#define        FRF_BB_DEBUG2_PORT_WIDTH 15
+#define        FRF_BB_DEBUG1_PORT_LBN 0
+#define        FRF_BB_DEBUG1_PORT_WIDTH 25
+
+/* EVQ_RPTR_REGP0: Event queue read pointer register */
+#define        FR_BZ_EVQ_RPTR_P0 0x00000400
+#define        FR_BZ_EVQ_RPTR_P0_STEP 8192
+#define        FR_BZ_EVQ_RPTR_P0_ROWS 1024
+/* EVQ_RPTR_REG_KER: Event queue read pointer register */
+#define        FR_AA_EVQ_RPTR_KER 0x00011b00
+#define        FR_AA_EVQ_RPTR_KER_STEP 4
+#define        FR_AA_EVQ_RPTR_KER_ROWS 4
+/* EVQ_RPTR_REG: Event queue read pointer register */
+#define        FR_BZ_EVQ_RPTR 0x00fa0000
+#define        FR_BZ_EVQ_RPTR_STEP 16
+#define        FR_BB_EVQ_RPTR_ROWS 4096
+#define        FR_CZ_EVQ_RPTR_ROWS 1024
+/* EVQ_RPTR_REGP123: Event queue read pointer register */
+#define        FR_BB_EVQ_RPTR_P123 0x01000400
+#define        FR_BB_EVQ_RPTR_P123_STEP 8192
+#define        FR_BB_EVQ_RPTR_P123_ROWS 3072
+#define        FRF_AZ_EVQ_RPTR_VLD_LBN 15
+#define        FRF_AZ_EVQ_RPTR_VLD_WIDTH 1
+#define        FRF_AZ_EVQ_RPTR_LBN 0
+#define        FRF_AZ_EVQ_RPTR_WIDTH 15
+
+/* TIMER_COMMAND_REGP0: Timer Command Registers */
+#define        FR_BZ_TIMER_COMMAND_P0 0x00000420
+#define        FR_BZ_TIMER_COMMAND_P0_STEP 8192
+#define        FR_BZ_TIMER_COMMAND_P0_ROWS 1024
+/* TIMER_COMMAND_REG_KER: Timer Command Registers */
+#define        FR_AA_TIMER_COMMAND_KER 0x00000420
+#define        FR_AA_TIMER_COMMAND_KER_STEP 8192
+#define        FR_AA_TIMER_COMMAND_KER_ROWS 4
+/* TIMER_COMMAND_REGP123: Timer Command Registers */
+#define        FR_BB_TIMER_COMMAND_P123 0x01000420
+#define        FR_BB_TIMER_COMMAND_P123_STEP 8192
+#define        FR_BB_TIMER_COMMAND_P123_ROWS 3072
+#define        FRF_CZ_TC_TIMER_MODE_LBN 14
+#define        FRF_CZ_TC_TIMER_MODE_WIDTH 2
+#define        FRF_AB_TC_TIMER_MODE_LBN 12
+#define        FRF_AB_TC_TIMER_MODE_WIDTH 2
+#define        FRF_CZ_TC_TIMER_VAL_LBN 0
+#define        FRF_CZ_TC_TIMER_VAL_WIDTH 14
+#define        FRF_AB_TC_TIMER_VAL_LBN 0
+#define        FRF_AB_TC_TIMER_VAL_WIDTH 12
+
+/* DRV_EV_REG: Driver generated event register */
+#define        FR_AZ_DRV_EV 0x00000440
+#define        FRF_AZ_DRV_EV_QID_LBN 64
+#define        FRF_AZ_DRV_EV_QID_WIDTH 12
+#define        FRF_AZ_DRV_EV_DATA_LBN 0
+#define        FRF_AZ_DRV_EV_DATA_WIDTH 64
+
+/* EVQ_CTL_REG: Event queue control register */
+#define        FR_AZ_EVQ_CTL 0x00000450
+#define        FRF_CZ_RX_EVQ_WAKEUP_MASK_LBN 15
+#define        FRF_CZ_RX_EVQ_WAKEUP_MASK_WIDTH 10
+#define        FRF_BB_RX_EVQ_WAKEUP_MASK_LBN 15
+#define        FRF_BB_RX_EVQ_WAKEUP_MASK_WIDTH 6
+#define        FRF_AZ_EVQ_OWNERR_CTL_LBN 14
+#define        FRF_AZ_EVQ_OWNERR_CTL_WIDTH 1
+#define        FRF_AZ_EVQ_FIFO_AF_TH_LBN 7
+#define        FRF_AZ_EVQ_FIFO_AF_TH_WIDTH 7
+#define        FRF_AZ_EVQ_FIFO_NOTAF_TH_LBN 0
+#define        FRF_AZ_EVQ_FIFO_NOTAF_TH_WIDTH 7
+
+/* EVQ_CNT1_REG: Event counter 1 register */
+#define        FR_AZ_EVQ_CNT1 0x00000460
+#define        FRF_AZ_EVQ_CNT_PRE_FIFO_LBN 120
+#define        FRF_AZ_EVQ_CNT_PRE_FIFO_WIDTH 7
+#define        FRF_AZ_EVQ_CNT_TOBIU_LBN 100
+#define        FRF_AZ_EVQ_CNT_TOBIU_WIDTH 20
+#define        FRF_AZ_EVQ_TX_REQ_CNT_LBN 80
+#define        FRF_AZ_EVQ_TX_REQ_CNT_WIDTH 20
+#define        FRF_AZ_EVQ_RX_REQ_CNT_LBN 60
+#define        FRF_AZ_EVQ_RX_REQ_CNT_WIDTH 20
+#define        FRF_AZ_EVQ_EM_REQ_CNT_LBN 40
+#define        FRF_AZ_EVQ_EM_REQ_CNT_WIDTH 20
+#define        FRF_AZ_EVQ_CSR_REQ_CNT_LBN 20
+#define        FRF_AZ_EVQ_CSR_REQ_CNT_WIDTH 20
+#define        FRF_AZ_EVQ_ERR_REQ_CNT_LBN 0
+#define        FRF_AZ_EVQ_ERR_REQ_CNT_WIDTH 20
+
+/* EVQ_CNT2_REG: Event counter 2 register */
+#define        FR_AZ_EVQ_CNT2 0x00000470
+#define        FRF_AZ_EVQ_UPD_REQ_CNT_LBN 104
+#define        FRF_AZ_EVQ_UPD_REQ_CNT_WIDTH 20
+#define        FRF_AZ_EVQ_CLR_REQ_CNT_LBN 84
+#define        FRF_AZ_EVQ_CLR_REQ_CNT_WIDTH 20
+#define        FRF_AZ_EVQ_RDY_CNT_LBN 80
+#define        FRF_AZ_EVQ_RDY_CNT_WIDTH 4
+#define        FRF_AZ_EVQ_WU_REQ_CNT_LBN 60
+#define        FRF_AZ_EVQ_WU_REQ_CNT_WIDTH 20
+#define        FRF_AZ_EVQ_WET_REQ_CNT_LBN 40
+#define        FRF_AZ_EVQ_WET_REQ_CNT_WIDTH 20
+#define        FRF_AZ_EVQ_INIT_REQ_CNT_LBN 20
+#define        FRF_AZ_EVQ_INIT_REQ_CNT_WIDTH 20
+#define        FRF_AZ_EVQ_TM_REQ_CNT_LBN 0
+#define        FRF_AZ_EVQ_TM_REQ_CNT_WIDTH 20
+
+/* USR_EV_REG: Event mailbox register */
+#define        FR_CZ_USR_EV 0x00000540
+#define        FR_CZ_USR_EV_STEP 8192
+#define        FR_CZ_USR_EV_ROWS 1024
+#define        FRF_CZ_USR_EV_DATA_LBN 0
+#define        FRF_CZ_USR_EV_DATA_WIDTH 32
+
+/* BUF_TBL_CFG_REG: Buffer table configuration register */
+#define        FR_AZ_BUF_TBL_CFG 0x00000600
+#define        FRF_AZ_BUF_TBL_MODE_LBN 3
+#define        FRF_AZ_BUF_TBL_MODE_WIDTH 1
+
+/* SRM_RX_DC_CFG_REG: SRAM receive descriptor cache configuration register */
+#define        FR_AZ_SRM_RX_DC_CFG 0x00000610
+#define        FRF_AZ_SRM_CLK_TMP_EN_LBN 21
+#define        FRF_AZ_SRM_CLK_TMP_EN_WIDTH 1
+#define        FRF_AZ_SRM_RX_DC_BASE_ADR_LBN 0
+#define        FRF_AZ_SRM_RX_DC_BASE_ADR_WIDTH 21
+
+/* SRM_TX_DC_CFG_REG: SRAM transmit descriptor cache configuration register */
+#define        FR_AZ_SRM_TX_DC_CFG 0x00000620
+#define        FRF_AZ_SRM_TX_DC_BASE_ADR_LBN 0
+#define        FRF_AZ_SRM_TX_DC_BASE_ADR_WIDTH 21
+
+/* SRM_CFG_REG: SRAM configuration register */
+#define        FR_AZ_SRM_CFG 0x00000630
+#define        FRF_AZ_SRM_OOB_ADR_INTEN_LBN 5
+#define        FRF_AZ_SRM_OOB_ADR_INTEN_WIDTH 1
+#define        FRF_AZ_SRM_OOB_BUF_INTEN_LBN 4
+#define        FRF_AZ_SRM_OOB_BUF_INTEN_WIDTH 1
+#define        FRF_AZ_SRM_INIT_EN_LBN 3
+#define        FRF_AZ_SRM_INIT_EN_WIDTH 1
+#define        FRF_AZ_SRM_NUM_BANK_LBN 2
+#define        FRF_AZ_SRM_NUM_BANK_WIDTH 1
+#define        FRF_AZ_SRM_BANK_SIZE_LBN 0
+#define        FRF_AZ_SRM_BANK_SIZE_WIDTH 2
+
+/* BUF_TBL_UPD_REG: Buffer table update register */
+#define        FR_AZ_BUF_TBL_UPD 0x00000650
+#define        FRF_AZ_BUF_UPD_CMD_LBN 63
+#define        FRF_AZ_BUF_UPD_CMD_WIDTH 1
+#define        FRF_AZ_BUF_CLR_CMD_LBN 62
+#define        FRF_AZ_BUF_CLR_CMD_WIDTH 1
+#define        FRF_AZ_BUF_CLR_END_ID_LBN 32
+#define        FRF_AZ_BUF_CLR_END_ID_WIDTH 20
+#define        FRF_AZ_BUF_CLR_START_ID_LBN 0
+#define        FRF_AZ_BUF_CLR_START_ID_WIDTH 20
+
+/* SRM_UPD_EVQ_REG: Buffer table update register */
+#define        FR_AZ_SRM_UPD_EVQ 0x00000660
+#define        FRF_AZ_SRM_UPD_EVQ_ID_LBN 0
+#define        FRF_AZ_SRM_UPD_EVQ_ID_WIDTH 12
+
+/* SRAM_PARITY_REG: SRAM parity register. */
+#define        FR_AZ_SRAM_PARITY 0x00000670
+#define        FRF_CZ_BYPASS_ECC_LBN 3
+#define        FRF_CZ_BYPASS_ECC_WIDTH 1
+#define        FRF_CZ_SEC_INT_LBN 2
+#define        FRF_CZ_SEC_INT_WIDTH 1
+#define        FRF_CZ_FORCE_SRAM_DOUBLE_ERR_LBN 1
+#define        FRF_CZ_FORCE_SRAM_DOUBLE_ERR_WIDTH 1
+#define        FRF_AB_FORCE_SRAM_PERR_LBN 0
+#define        FRF_AB_FORCE_SRAM_PERR_WIDTH 1
+#define        FRF_CZ_FORCE_SRAM_SINGLE_ERR_LBN 0
+#define        FRF_CZ_FORCE_SRAM_SINGLE_ERR_WIDTH 1
+
+/* RX_CFG_REG: Receive configuration register */
+#define        FR_AZ_RX_CFG 0x00000800
+#define        FRF_CZ_RX_MIN_KBUF_SIZE_LBN 72
+#define        FRF_CZ_RX_MIN_KBUF_SIZE_WIDTH 14
+#define        FRF_CZ_RX_HDR_SPLIT_EN_LBN 71
+#define        FRF_CZ_RX_HDR_SPLIT_EN_WIDTH 1
+#define        FRF_CZ_RX_HDR_SPLIT_PLD_BUF_SIZE_LBN 62
+#define        FRF_CZ_RX_HDR_SPLIT_PLD_BUF_SIZE_WIDTH 9
+#define        FRF_CZ_RX_HDR_SPLIT_HDR_BUF_SIZE_LBN 53
+#define        FRF_CZ_RX_HDR_SPLIT_HDR_BUF_SIZE_WIDTH 9
+#define        FRF_CZ_RX_PRE_RFF_IPG_LBN 49
+#define        FRF_CZ_RX_PRE_RFF_IPG_WIDTH 4
+#define        FRF_BZ_RX_TCP_SUP_LBN 48
+#define        FRF_BZ_RX_TCP_SUP_WIDTH 1
+#define        FRF_BZ_RX_INGR_EN_LBN 47
+#define        FRF_BZ_RX_INGR_EN_WIDTH 1
+#define        FRF_BZ_RX_IP_HASH_LBN 46
+#define        FRF_BZ_RX_IP_HASH_WIDTH 1
+#define        FRF_BZ_RX_HASH_ALG_LBN 45
+#define        FRF_BZ_RX_HASH_ALG_WIDTH 1
+#define        FRF_BZ_RX_HASH_INSRT_HDR_LBN 44
+#define        FRF_BZ_RX_HASH_INSRT_HDR_WIDTH 1
+#define        FRF_BZ_RX_DESC_PUSH_EN_LBN 43
+#define        FRF_BZ_RX_DESC_PUSH_EN_WIDTH 1
+#define        FRF_BZ_RX_RDW_PATCH_EN_LBN 42
+#define        FRF_BZ_RX_RDW_PATCH_EN_WIDTH 1
+#define        FRF_BB_RX_PCI_BURST_SIZE_LBN 39
+#define        FRF_BB_RX_PCI_BURST_SIZE_WIDTH 3
+#define        FRF_BZ_RX_OWNERR_CTL_LBN 38
+#define        FRF_BZ_RX_OWNERR_CTL_WIDTH 1
+#define        FRF_BZ_RX_XON_TX_TH_LBN 33
+#define        FRF_BZ_RX_XON_TX_TH_WIDTH 5
+#define        FRF_AA_RX_DESC_PUSH_EN_LBN 35
+#define        FRF_AA_RX_DESC_PUSH_EN_WIDTH 1
+#define        FRF_AA_RX_RDW_PATCH_EN_LBN 34
+#define        FRF_AA_RX_RDW_PATCH_EN_WIDTH 1
+#define        FRF_AA_RX_PCI_BURST_SIZE_LBN 31
+#define        FRF_AA_RX_PCI_BURST_SIZE_WIDTH 3
+#define        FRF_BZ_RX_XOFF_TX_TH_LBN 28
+#define        FRF_BZ_RX_XOFF_TX_TH_WIDTH 5
+#define        FRF_AA_RX_OWNERR_CTL_LBN 30
+#define        FRF_AA_RX_OWNERR_CTL_WIDTH 1
+#define        FRF_AA_RX_XON_TX_TH_LBN 25
+#define        FRF_AA_RX_XON_TX_TH_WIDTH 5
+#define        FRF_BZ_RX_USR_BUF_SIZE_LBN 19
+#define        FRF_BZ_RX_USR_BUF_SIZE_WIDTH 9
+#define        FRF_AA_RX_XOFF_TX_TH_LBN 20
+#define        FRF_AA_RX_XOFF_TX_TH_WIDTH 5
+#define        FRF_AA_RX_USR_BUF_SIZE_LBN 11
+#define        FRF_AA_RX_USR_BUF_SIZE_WIDTH 9
+#define        FRF_BZ_RX_XON_MAC_TH_LBN 10
+#define        FRF_BZ_RX_XON_MAC_TH_WIDTH 9
+#define        FRF_AA_RX_XON_MAC_TH_LBN 6
+#define        FRF_AA_RX_XON_MAC_TH_WIDTH 5
+#define        FRF_BZ_RX_XOFF_MAC_TH_LBN 1
+#define        FRF_BZ_RX_XOFF_MAC_TH_WIDTH 9
+#define        FRF_AA_RX_XOFF_MAC_TH_LBN 1
+#define        FRF_AA_RX_XOFF_MAC_TH_WIDTH 5
+#define        FRF_AZ_RX_XOFF_MAC_EN_LBN 0
+#define        FRF_AZ_RX_XOFF_MAC_EN_WIDTH 1
+
+/* RX_FILTER_CTL_REG: Receive filter control registers */
+#define        FR_BZ_RX_FILTER_CTL 0x00000810
+#define        FRF_CZ_ETHERNET_WILDCARD_SEARCH_LIMIT_LBN 94
+#define        FRF_CZ_ETHERNET_WILDCARD_SEARCH_LIMIT_WIDTH 8
+#define        FRF_CZ_ETHERNET_FULL_SEARCH_LIMIT_LBN 86
+#define        FRF_CZ_ETHERNET_FULL_SEARCH_LIMIT_WIDTH 8
+#define        FRF_CZ_RX_FILTER_ALL_VLAN_ETHERTYPES_LBN 85
+#define        FRF_CZ_RX_FILTER_ALL_VLAN_ETHERTYPES_WIDTH 1
+#define        FRF_CZ_RX_VLAN_MATCH_ETHERTYPE_LBN 69
+#define        FRF_CZ_RX_VLAN_MATCH_ETHERTYPE_WIDTH 16
+#define        FRF_CZ_MULTICAST_NOMATCH_Q_ID_LBN 57
+#define        FRF_CZ_MULTICAST_NOMATCH_Q_ID_WIDTH 12
+#define        FRF_CZ_MULTICAST_NOMATCH_RSS_ENABLED_LBN 56
+#define        FRF_CZ_MULTICAST_NOMATCH_RSS_ENABLED_WIDTH 1
+#define        FRF_CZ_MULTICAST_NOMATCH_IP_OVERRIDE_LBN 55
+#define        FRF_CZ_MULTICAST_NOMATCH_IP_OVERRIDE_WIDTH 1
+#define        FRF_CZ_UNICAST_NOMATCH_Q_ID_LBN 43
+#define        FRF_CZ_UNICAST_NOMATCH_Q_ID_WIDTH 12
+#define        FRF_CZ_UNICAST_NOMATCH_RSS_ENABLED_LBN 42
+#define        FRF_CZ_UNICAST_NOMATCH_RSS_ENABLED_WIDTH 1
+#define        FRF_CZ_UNICAST_NOMATCH_IP_OVERRIDE_LBN 41
+#define        FRF_CZ_UNICAST_NOMATCH_IP_OVERRIDE_WIDTH 1
+#define        FRF_BZ_SCATTER_ENBL_NO_MATCH_Q_LBN 40
+#define        FRF_BZ_SCATTER_ENBL_NO_MATCH_Q_WIDTH 1
+#define        FRF_BZ_UDP_FULL_SRCH_LIMIT_LBN 32
+#define        FRF_BZ_UDP_FULL_SRCH_LIMIT_WIDTH 8
+#define        FRF_BZ_NUM_KER_LBN 24
+#define        FRF_BZ_NUM_KER_WIDTH 2
+#define        FRF_BZ_UDP_WILD_SRCH_LIMIT_LBN 16
+#define        FRF_BZ_UDP_WILD_SRCH_LIMIT_WIDTH 8
+#define        FRF_BZ_TCP_WILD_SRCH_LIMIT_LBN 8
+#define        FRF_BZ_TCP_WILD_SRCH_LIMIT_WIDTH 8
+#define        FRF_BZ_TCP_FULL_SRCH_LIMIT_LBN 0
+#define        FRF_BZ_TCP_FULL_SRCH_LIMIT_WIDTH 8
+
+/* RX_FLUSH_DESCQ_REG: Receive flush descriptor queue register */
+#define        FR_AZ_RX_FLUSH_DESCQ 0x00000820
+#define        FRF_AZ_RX_FLUSH_DESCQ_CMD_LBN 24
+#define        FRF_AZ_RX_FLUSH_DESCQ_CMD_WIDTH 1
+#define        FRF_AZ_RX_FLUSH_DESCQ_LBN 0
+#define        FRF_AZ_RX_FLUSH_DESCQ_WIDTH 12
+
+/* RX_DESC_UPD_REGP0: Receive descriptor update register. */
+#define        FR_BZ_RX_DESC_UPD_P0 0x00000830
+#define        FR_BZ_RX_DESC_UPD_P0_STEP 8192
+#define        FR_BZ_RX_DESC_UPD_P0_ROWS 1024
+/* RX_DESC_UPD_REG_KER: Receive descriptor update register. */
+#define        FR_AA_RX_DESC_UPD_KER 0x00000830
+#define        FR_AA_RX_DESC_UPD_KER_STEP 8192
+#define        FR_AA_RX_DESC_UPD_KER_ROWS 4
+/* RX_DESC_UPD_REGP123: Receive descriptor update register. */
+#define        FR_BB_RX_DESC_UPD_P123 0x01000830
+#define        FR_BB_RX_DESC_UPD_P123_STEP 8192
+#define        FR_BB_RX_DESC_UPD_P123_ROWS 3072
+#define        FRF_AZ_RX_DESC_WPTR_LBN 96
+#define        FRF_AZ_RX_DESC_WPTR_WIDTH 12
+#define        FRF_AZ_RX_DESC_PUSH_CMD_LBN 95
+#define        FRF_AZ_RX_DESC_PUSH_CMD_WIDTH 1
+#define        FRF_AZ_RX_DESC_LBN 0
+#define        FRF_AZ_RX_DESC_WIDTH 64
+
+/* RX_DC_CFG_REG: Receive descriptor cache configuration register */
+#define        FR_AZ_RX_DC_CFG 0x00000840
+#define        FRF_AB_RX_MAX_PF_LBN 2
+#define        FRF_AB_RX_MAX_PF_WIDTH 2
+#define        FRF_AZ_RX_DC_SIZE_LBN 0
+#define        FRF_AZ_RX_DC_SIZE_WIDTH 2
+#define        FFE_AZ_RX_DC_SIZE_64 3
+#define        FFE_AZ_RX_DC_SIZE_32 2
+#define        FFE_AZ_RX_DC_SIZE_16 1
+#define        FFE_AZ_RX_DC_SIZE_8 0
+
+/* RX_DC_PF_WM_REG: Receive descriptor cache pre-fetch watermark register */
+#define        FR_AZ_RX_DC_PF_WM 0x00000850
+#define        FRF_AZ_RX_DC_PF_HWM_LBN 6
+#define        FRF_AZ_RX_DC_PF_HWM_WIDTH 6
+#define        FRF_AZ_RX_DC_PF_LWM_LBN 0
+#define        FRF_AZ_RX_DC_PF_LWM_WIDTH 6
+
+/* RX_RSS_TKEY_REG: RSS Toeplitz hash key */
+#define        FR_BZ_RX_RSS_TKEY 0x00000860
+#define        FRF_BZ_RX_RSS_TKEY_HI_LBN 64
+#define        FRF_BZ_RX_RSS_TKEY_HI_WIDTH 64
+#define        FRF_BZ_RX_RSS_TKEY_LO_LBN 0
+#define        FRF_BZ_RX_RSS_TKEY_LO_WIDTH 64
+
+/* RX_NODESC_DROP_REG: Receive dropped packet counter register */
+#define        FR_AZ_RX_NODESC_DROP 0x00000880
+#define        FRF_CZ_RX_NODESC_DROP_CNT_LBN 0
+#define        FRF_CZ_RX_NODESC_DROP_CNT_WIDTH 32
+#define        FRF_AB_RX_NODESC_DROP_CNT_LBN 0
+#define        FRF_AB_RX_NODESC_DROP_CNT_WIDTH 16
+
+/* RX_SELF_RST_REG: Receive self reset register */
+#define        FR_AA_RX_SELF_RST 0x00000890
+#define        FRF_AA_RX_ISCSI_DIS_LBN 17
+#define        FRF_AA_RX_ISCSI_DIS_WIDTH 1
+#define        FRF_AA_RX_SW_RST_REG_LBN 16
+#define        FRF_AA_RX_SW_RST_REG_WIDTH 1
+#define FRF_AA_RX_NODESC_WAIT_DIS_LBN 9
+#define FRF_AA_RX_NODESC_WAIT_DIS_WIDTH 1
+#define        FRF_AA_RX_SELF_RST_EN_LBN 8
+#define        FRF_AA_RX_SELF_RST_EN_WIDTH 1
+#define        FRF_AA_RX_MAX_PF_LAT_LBN 4
+#define        FRF_AA_RX_MAX_PF_LAT_WIDTH 4
+#define        FRF_AA_RX_MAX_LU_LAT_LBN 0
+#define        FRF_AA_RX_MAX_LU_LAT_WIDTH 4
+
+/* RX_DEBUG_REG: undocumented register */
+#define        FR_AZ_RX_DEBUG 0x000008a0
+#define        FRF_AZ_RX_DEBUG_LBN 0
+#define        FRF_AZ_RX_DEBUG_WIDTH 64
+
+/* RX_PUSH_DROP_REG: Receive descriptor push dropped counter register */
+#define        FR_AZ_RX_PUSH_DROP 0x000008b0
+#define        FRF_AZ_RX_PUSH_DROP_CNT_LBN 0
+#define        FRF_AZ_RX_PUSH_DROP_CNT_WIDTH 32
+
+/* RX_RSS_IPV6_REG1: IPv6 RSS Toeplitz hash key low bytes */
+#define        FR_CZ_RX_RSS_IPV6_REG1 0x000008d0
+#define        FRF_CZ_RX_RSS_IPV6_TKEY_LO_LBN 0
+#define        FRF_CZ_RX_RSS_IPV6_TKEY_LO_WIDTH 128
+
+/* RX_RSS_IPV6_REG2: IPv6 RSS Toeplitz hash key middle bytes */
+#define        FR_CZ_RX_RSS_IPV6_REG2 0x000008e0
+#define        FRF_CZ_RX_RSS_IPV6_TKEY_MID_LBN 0
+#define        FRF_CZ_RX_RSS_IPV6_TKEY_MID_WIDTH 128
+
+/* RX_RSS_IPV6_REG3: IPv6 RSS Toeplitz hash key upper bytes and IPv6 RSS settings */
+#define        FR_CZ_RX_RSS_IPV6_REG3 0x000008f0
+#define        FRF_CZ_RX_RSS_IPV6_THASH_ENABLE_LBN 66
+#define        FRF_CZ_RX_RSS_IPV6_THASH_ENABLE_WIDTH 1
+#define        FRF_CZ_RX_RSS_IPV6_IP_THASH_ENABLE_LBN 65
+#define        FRF_CZ_RX_RSS_IPV6_IP_THASH_ENABLE_WIDTH 1
+#define        FRF_CZ_RX_RSS_IPV6_TCP_SUPPRESS_LBN 64
+#define        FRF_CZ_RX_RSS_IPV6_TCP_SUPPRESS_WIDTH 1
+#define        FRF_CZ_RX_RSS_IPV6_TKEY_HI_LBN 0
+#define        FRF_CZ_RX_RSS_IPV6_TKEY_HI_WIDTH 64
+
+/* TX_FLUSH_DESCQ_REG: Transmit flush descriptor queue register */
+#define        FR_AZ_TX_FLUSH_DESCQ 0x00000a00
+#define        FRF_AZ_TX_FLUSH_DESCQ_CMD_LBN 12
+#define        FRF_AZ_TX_FLUSH_DESCQ_CMD_WIDTH 1
+#define        FRF_AZ_TX_FLUSH_DESCQ_LBN 0
+#define        FRF_AZ_TX_FLUSH_DESCQ_WIDTH 12
+
+/* TX_DESC_UPD_REGP0: Transmit descriptor update register. */
+#define        FR_BZ_TX_DESC_UPD_P0 0x00000a10
+#define        FR_BZ_TX_DESC_UPD_P0_STEP 8192
+#define        FR_BZ_TX_DESC_UPD_P0_ROWS 1024
+/* TX_DESC_UPD_REG_KER: Transmit descriptor update register. */
+#define        FR_AA_TX_DESC_UPD_KER 0x00000a10
+#define        FR_AA_TX_DESC_UPD_KER_STEP 8192
+#define        FR_AA_TX_DESC_UPD_KER_ROWS 8
+/* TX_DESC_UPD_REGP123: Transmit descriptor update register. */
+#define        FR_BB_TX_DESC_UPD_P123 0x01000a10
+#define        FR_BB_TX_DESC_UPD_P123_STEP 8192
+#define        FR_BB_TX_DESC_UPD_P123_ROWS 3072
+#define        FRF_AZ_TX_DESC_WPTR_LBN 96
+#define        FRF_AZ_TX_DESC_WPTR_WIDTH 12
+#define        FRF_AZ_TX_DESC_PUSH_CMD_LBN 95
+#define        FRF_AZ_TX_DESC_PUSH_CMD_WIDTH 1
+#define        FRF_AZ_TX_DESC_LBN 0
+#define        FRF_AZ_TX_DESC_WIDTH 95
+
+/* TX_DC_CFG_REG: Transmit descriptor cache configuration register */
+#define        FR_AZ_TX_DC_CFG 0x00000a20
+#define        FRF_AZ_TX_DC_SIZE_LBN 0
+#define        FRF_AZ_TX_DC_SIZE_WIDTH 2
+#define        FFE_AZ_TX_DC_SIZE_32 2
+#define        FFE_AZ_TX_DC_SIZE_16 1
+#define        FFE_AZ_TX_DC_SIZE_8 0
+
+/* TX_CHKSM_CFG_REG: Transmit checksum configuration register */
+#define        FR_AA_TX_CHKSM_CFG 0x00000a30
+#define        FRF_AA_TX_Q_CHKSM_DIS_96_127_LBN 96
+#define        FRF_AA_TX_Q_CHKSM_DIS_96_127_WIDTH 32
+#define        FRF_AA_TX_Q_CHKSM_DIS_64_95_LBN 64
+#define        FRF_AA_TX_Q_CHKSM_DIS_64_95_WIDTH 32
+#define        FRF_AA_TX_Q_CHKSM_DIS_32_63_LBN 32
+#define        FRF_AA_TX_Q_CHKSM_DIS_32_63_WIDTH 32
+#define        FRF_AA_TX_Q_CHKSM_DIS_0_31_LBN 0
+#define        FRF_AA_TX_Q_CHKSM_DIS_0_31_WIDTH 32
+
+/* TX_CFG_REG: Transmit configuration register */
+#define        FR_AZ_TX_CFG 0x00000a50
+#define        FRF_CZ_TX_CONT_LOOKUP_THRESH_RANGE_LBN 114
+#define        FRF_CZ_TX_CONT_LOOKUP_THRESH_RANGE_WIDTH 8
+#define        FRF_CZ_TX_FILTER_TEST_MODE_BIT_LBN 113
+#define        FRF_CZ_TX_FILTER_TEST_MODE_BIT_WIDTH 1
+#define        FRF_CZ_TX_ETH_FILTER_WILD_SEARCH_RANGE_LBN 105
+#define        FRF_CZ_TX_ETH_FILTER_WILD_SEARCH_RANGE_WIDTH 8
+#define        FRF_CZ_TX_ETH_FILTER_FULL_SEARCH_RANGE_LBN 97
+#define        FRF_CZ_TX_ETH_FILTER_FULL_SEARCH_RANGE_WIDTH 8
+#define        FRF_CZ_TX_UDPIP_FILTER_WILD_SEARCH_RANGE_LBN 89
+#define        FRF_CZ_TX_UDPIP_FILTER_WILD_SEARCH_RANGE_WIDTH 8
+#define        FRF_CZ_TX_UDPIP_FILTER_FULL_SEARCH_RANGE_LBN 81
+#define        FRF_CZ_TX_UDPIP_FILTER_FULL_SEARCH_RANGE_WIDTH 8
+#define        FRF_CZ_TX_TCPIP_FILTER_WILD_SEARCH_RANGE_LBN 73
+#define        FRF_CZ_TX_TCPIP_FILTER_WILD_SEARCH_RANGE_WIDTH 8
+#define        FRF_CZ_TX_TCPIP_FILTER_FULL_SEARCH_RANGE_LBN 65
+#define        FRF_CZ_TX_TCPIP_FILTER_FULL_SEARCH_RANGE_WIDTH 8
+#define        FRF_CZ_TX_FILTER_ALL_VLAN_ETHERTYPES_BIT_LBN 64
+#define        FRF_CZ_TX_FILTER_ALL_VLAN_ETHERTYPES_BIT_WIDTH 1
+#define        FRF_CZ_TX_VLAN_MATCH_ETHERTYPE_RANGE_LBN 48
+#define        FRF_CZ_TX_VLAN_MATCH_ETHERTYPE_RANGE_WIDTH 16
+#define        FRF_CZ_TX_FILTER_EN_BIT_LBN 47
+#define        FRF_CZ_TX_FILTER_EN_BIT_WIDTH 1
+#define        FRF_AZ_TX_IP_ID_P0_OFS_LBN 16
+#define        FRF_AZ_TX_IP_ID_P0_OFS_WIDTH 15
+#define        FRF_AZ_TX_NO_EOP_DISC_EN_LBN 5
+#define        FRF_AZ_TX_NO_EOP_DISC_EN_WIDTH 1
+#define        FRF_AZ_TX_P1_PRI_EN_LBN 4
+#define        FRF_AZ_TX_P1_PRI_EN_WIDTH 1
+#define        FRF_AZ_TX_OWNERR_CTL_LBN 2
+#define        FRF_AZ_TX_OWNERR_CTL_WIDTH 1
+#define        FRF_AA_TX_NON_IP_DROP_DIS_LBN 1
+#define        FRF_AA_TX_NON_IP_DROP_DIS_WIDTH 1
+#define        FRF_AZ_TX_IP_ID_REP_EN_LBN 0
+#define        FRF_AZ_TX_IP_ID_REP_EN_WIDTH 1
+
+/* TX_PUSH_DROP_REG: Transmit push dropped register */
+#define        FR_AZ_TX_PUSH_DROP 0x00000a60
+#define        FRF_AZ_TX_PUSH_DROP_CNT_LBN 0
+#define        FRF_AZ_TX_PUSH_DROP_CNT_WIDTH 32
+
+/* TX_RESERVED_REG: Transmit configuration register */
+#define        FR_AZ_TX_RESERVED 0x00000a80
+#define        FRF_AZ_TX_EVT_CNT_LBN 121
+#define        FRF_AZ_TX_EVT_CNT_WIDTH 7
+#define        FRF_AZ_TX_PREF_AGE_CNT_LBN 119
+#define        FRF_AZ_TX_PREF_AGE_CNT_WIDTH 2
+#define        FRF_AZ_TX_RD_COMP_TMR_LBN 96
+#define        FRF_AZ_TX_RD_COMP_TMR_WIDTH 23
+#define        FRF_AZ_TX_PUSH_EN_LBN 89
+#define        FRF_AZ_TX_PUSH_EN_WIDTH 1
+#define        FRF_AZ_TX_PUSH_CHK_DIS_LBN 88
+#define        FRF_AZ_TX_PUSH_CHK_DIS_WIDTH 1
+#define        FRF_AZ_TX_D_FF_FULL_P0_LBN 85
+#define        FRF_AZ_TX_D_FF_FULL_P0_WIDTH 1
+#define        FRF_AZ_TX_DMAR_ST_P0_LBN 81
+#define        FRF_AZ_TX_DMAR_ST_P0_WIDTH 1
+#define        FRF_AZ_TX_DMAQ_ST_LBN 78
+#define        FRF_AZ_TX_DMAQ_ST_WIDTH 1
+#define        FRF_AZ_TX_RX_SPACER_LBN 64
+#define        FRF_AZ_TX_RX_SPACER_WIDTH 8
+#define        FRF_AZ_TX_DROP_ABORT_EN_LBN 60
+#define        FRF_AZ_TX_DROP_ABORT_EN_WIDTH 1
+#define        FRF_AZ_TX_SOFT_EVT_EN_LBN 59
+#define        FRF_AZ_TX_SOFT_EVT_EN_WIDTH 1
+#define        FRF_AZ_TX_PS_EVT_DIS_LBN 58
+#define        FRF_AZ_TX_PS_EVT_DIS_WIDTH 1
+#define        FRF_AZ_TX_RX_SPACER_EN_LBN 57
+#define        FRF_AZ_TX_RX_SPACER_EN_WIDTH 1
+#define        FRF_AZ_TX_XP_TIMER_LBN 52
+#define        FRF_AZ_TX_XP_TIMER_WIDTH 5
+#define        FRF_AZ_TX_PREF_SPACER_LBN 44
+#define        FRF_AZ_TX_PREF_SPACER_WIDTH 8
+#define        FRF_AZ_TX_PREF_WD_TMR_LBN 22
+#define        FRF_AZ_TX_PREF_WD_TMR_WIDTH 22
+#define        FRF_AZ_TX_ONLY1TAG_LBN 21
+#define        FRF_AZ_TX_ONLY1TAG_WIDTH 1
+#define        FRF_AZ_TX_PREF_THRESHOLD_LBN 19
+#define        FRF_AZ_TX_PREF_THRESHOLD_WIDTH 2
+#define        FRF_AZ_TX_ONE_PKT_PER_Q_LBN 18
+#define        FRF_AZ_TX_ONE_PKT_PER_Q_WIDTH 1
+#define        FRF_AZ_TX_DIS_NON_IP_EV_LBN 17
+#define        FRF_AZ_TX_DIS_NON_IP_EV_WIDTH 1
+#define        FRF_AA_TX_DMA_FF_THR_LBN 16
+#define        FRF_AA_TX_DMA_FF_THR_WIDTH 1
+#define        FRF_AZ_TX_DMA_SPACER_LBN 8
+#define        FRF_AZ_TX_DMA_SPACER_WIDTH 8
+#define        FRF_AA_TX_TCP_DIS_LBN 7
+#define        FRF_AA_TX_TCP_DIS_WIDTH 1
+#define        FRF_BZ_TX_FLUSH_MIN_LEN_EN_LBN 7
+#define        FRF_BZ_TX_FLUSH_MIN_LEN_EN_WIDTH 1
+#define        FRF_AA_TX_IP_DIS_LBN 6
+#define        FRF_AA_TX_IP_DIS_WIDTH 1
+#define        FRF_AZ_TX_MAX_CPL_LBN 2
+#define        FRF_AZ_TX_MAX_CPL_WIDTH 2
+#define        FFE_AZ_TX_MAX_CPL_16 3
+#define        FFE_AZ_TX_MAX_CPL_8 2
+#define        FFE_AZ_TX_MAX_CPL_4 1
+#define        FFE_AZ_TX_MAX_CPL_NOLIMIT 0
+#define        FRF_AZ_TX_MAX_PREF_LBN 0
+#define        FRF_AZ_TX_MAX_PREF_WIDTH 2
+#define        FFE_AZ_TX_MAX_PREF_32 3
+#define        FFE_AZ_TX_MAX_PREF_16 2
+#define        FFE_AZ_TX_MAX_PREF_8 1
+#define        FFE_AZ_TX_MAX_PREF_OFF 0
+
+/* TX_PACE_REG: Transmit pace control register */
+#define        FR_BZ_TX_PACE 0x00000a90
+#define        FRF_BZ_TX_PACE_SB_NOT_AF_LBN 19
+#define        FRF_BZ_TX_PACE_SB_NOT_AF_WIDTH 10
+#define        FRF_BZ_TX_PACE_SB_AF_LBN 9
+#define        FRF_BZ_TX_PACE_SB_AF_WIDTH 10
+#define        FRF_BZ_TX_PACE_FB_BASE_LBN 5
+#define        FRF_BZ_TX_PACE_FB_BASE_WIDTH 4
+#define        FRF_BZ_TX_PACE_BIN_TH_LBN 0
+#define        FRF_BZ_TX_PACE_BIN_TH_WIDTH 5
+
+/* TX_PACE_DROP_QID_REG: PACE Drop QID Counter */
+#define        FR_BZ_TX_PACE_DROP_QID 0x00000aa0
+#define        FRF_BZ_TX_PACE_QID_DRP_CNT_LBN 0
+#define        FRF_BZ_TX_PACE_QID_DRP_CNT_WIDTH 16
+
+/* TX_VLAN_REG: Transmit VLAN tag register */
+#define        FR_BB_TX_VLAN 0x00000ae0
+#define        FRF_BB_TX_VLAN_EN_LBN 127
+#define        FRF_BB_TX_VLAN_EN_WIDTH 1
+#define        FRF_BB_TX_VLAN7_PORT1_EN_LBN 125
+#define        FRF_BB_TX_VLAN7_PORT1_EN_WIDTH 1
+#define        FRF_BB_TX_VLAN7_PORT0_EN_LBN 124
+#define        FRF_BB_TX_VLAN7_PORT0_EN_WIDTH 1
+#define        FRF_BB_TX_VLAN7_LBN 112
+#define        FRF_BB_TX_VLAN7_WIDTH 12
+#define        FRF_BB_TX_VLAN6_PORT1_EN_LBN 109
+#define        FRF_BB_TX_VLAN6_PORT1_EN_WIDTH 1
+#define        FRF_BB_TX_VLAN6_PORT0_EN_LBN 108
+#define        FRF_BB_TX_VLAN6_PORT0_EN_WIDTH 1
+#define        FRF_BB_TX_VLAN6_LBN 96
+#define        FRF_BB_TX_VLAN6_WIDTH 12
+#define        FRF_BB_TX_VLAN5_PORT1_EN_LBN 93
+#define        FRF_BB_TX_VLAN5_PORT1_EN_WIDTH 1
+#define        FRF_BB_TX_VLAN5_PORT0_EN_LBN 92
+#define        FRF_BB_TX_VLAN5_PORT0_EN_WIDTH 1
+#define        FRF_BB_TX_VLAN5_LBN 80
+#define        FRF_BB_TX_VLAN5_WIDTH 12
+#define        FRF_BB_TX_VLAN4_PORT1_EN_LBN 77
+#define        FRF_BB_TX_VLAN4_PORT1_EN_WIDTH 1
+#define        FRF_BB_TX_VLAN4_PORT0_EN_LBN 76
+#define        FRF_BB_TX_VLAN4_PORT0_EN_WIDTH 1
+#define        FRF_BB_TX_VLAN4_LBN 64
+#define        FRF_BB_TX_VLAN4_WIDTH 12
+#define        FRF_BB_TX_VLAN3_PORT1_EN_LBN 61
+#define        FRF_BB_TX_VLAN3_PORT1_EN_WIDTH 1
+#define        FRF_BB_TX_VLAN3_PORT0_EN_LBN 60
+#define        FRF_BB_TX_VLAN3_PORT0_EN_WIDTH 1
+#define        FRF_BB_TX_VLAN3_LBN 48
+#define        FRF_BB_TX_VLAN3_WIDTH 12
+#define        FRF_BB_TX_VLAN2_PORT1_EN_LBN 45
+#define        FRF_BB_TX_VLAN2_PORT1_EN_WIDTH 1
+#define        FRF_BB_TX_VLAN2_PORT0_EN_LBN 44
+#define        FRF_BB_TX_VLAN2_PORT0_EN_WIDTH 1
+#define        FRF_BB_TX_VLAN2_LBN 32
+#define        FRF_BB_TX_VLAN2_WIDTH 12
+#define        FRF_BB_TX_VLAN1_PORT1_EN_LBN 29
+#define        FRF_BB_TX_VLAN1_PORT1_EN_WIDTH 1
+#define        FRF_BB_TX_VLAN1_PORT0_EN_LBN 28
+#define        FRF_BB_TX_VLAN1_PORT0_EN_WIDTH 1
+#define        FRF_BB_TX_VLAN1_LBN 16
+#define        FRF_BB_TX_VLAN1_WIDTH 12
+#define        FRF_BB_TX_VLAN0_PORT1_EN_LBN 13
+#define        FRF_BB_TX_VLAN0_PORT1_EN_WIDTH 1
+#define        FRF_BB_TX_VLAN0_PORT0_EN_LBN 12
+#define        FRF_BB_TX_VLAN0_PORT0_EN_WIDTH 1
+#define        FRF_BB_TX_VLAN0_LBN 0
+#define        FRF_BB_TX_VLAN0_WIDTH 12
+
+/* TX_IPFIL_PORTEN_REG: Transmit filter control register */
+#define        FR_BZ_TX_IPFIL_PORTEN 0x00000af0
+#define        FRF_BZ_TX_MADR0_FIL_EN_LBN 64
+#define        FRF_BZ_TX_MADR0_FIL_EN_WIDTH 1
+#define        FRF_BB_TX_IPFIL31_PORT_EN_LBN 62
+#define        FRF_BB_TX_IPFIL31_PORT_EN_WIDTH 1
+#define        FRF_BB_TX_IPFIL30_PORT_EN_LBN 60
+#define        FRF_BB_TX_IPFIL30_PORT_EN_WIDTH 1
+#define        FRF_BB_TX_IPFIL29_PORT_EN_LBN 58
+#define        FRF_BB_TX_IPFIL29_PORT_EN_WIDTH 1
+#define        FRF_BB_TX_IPFIL28_PORT_EN_LBN 56
+#define        FRF_BB_TX_IPFIL28_PORT_EN_WIDTH 1
+#define        FRF_BB_TX_IPFIL27_PORT_EN_LBN 54
+#define        FRF_BB_TX_IPFIL27_PORT_EN_WIDTH 1
+#define        FRF_BB_TX_IPFIL26_PORT_EN_LBN 52
+#define        FRF_BB_TX_IPFIL26_PORT_EN_WIDTH 1
+#define        FRF_BB_TX_IPFIL25_PORT_EN_LBN 50
+#define        FRF_BB_TX_IPFIL25_PORT_EN_WIDTH 1
+#define        FRF_BB_TX_IPFIL24_PORT_EN_LBN 48
+#define        FRF_BB_TX_IPFIL24_PORT_EN_WIDTH 1
+#define        FRF_BB_TX_IPFIL23_PORT_EN_LBN 46
+#define        FRF_BB_TX_IPFIL23_PORT_EN_WIDTH 1
+#define        FRF_BB_TX_IPFIL22_PORT_EN_LBN 44
+#define        FRF_BB_TX_IPFIL22_PORT_EN_WIDTH 1
+#define        FRF_BB_TX_IPFIL21_PORT_EN_LBN 42
+#define        FRF_BB_TX_IPFIL21_PORT_EN_WIDTH 1
+#define        FRF_BB_TX_IPFIL20_PORT_EN_LBN 40
+#define        FRF_BB_TX_IPFIL20_PORT_EN_WIDTH 1
+#define        FRF_BB_TX_IPFIL19_PORT_EN_LBN 38
+#define        FRF_BB_TX_IPFIL19_PORT_EN_WIDTH 1
+#define        FRF_BB_TX_IPFIL18_PORT_EN_LBN 36
+#define        FRF_BB_TX_IPFIL18_PORT_EN_WIDTH 1
+#define        FRF_BB_TX_IPFIL17_PORT_EN_LBN 34
+#define        FRF_BB_TX_IPFIL17_PORT_EN_WIDTH 1
+#define        FRF_BB_TX_IPFIL16_PORT_EN_LBN 32
+#define        FRF_BB_TX_IPFIL16_PORT_EN_WIDTH 1
+#define        FRF_BB_TX_IPFIL15_PORT_EN_LBN 30
+#define        FRF_BB_TX_IPFIL15_PORT_EN_WIDTH 1
+#define        FRF_BB_TX_IPFIL14_PORT_EN_LBN 28
+#define        FRF_BB_TX_IPFIL14_PORT_EN_WIDTH 1
+#define        FRF_BB_TX_IPFIL13_PORT_EN_LBN 26
+#define        FRF_BB_TX_IPFIL13_PORT_EN_WIDTH 1
+#define        FRF_BB_TX_IPFIL12_PORT_EN_LBN 24
+#define        FRF_BB_TX_IPFIL12_PORT_EN_WIDTH 1
+#define        FRF_BB_TX_IPFIL11_PORT_EN_LBN 22
+#define        FRF_BB_TX_IPFIL11_PORT_EN_WIDTH 1
+#define        FRF_BB_TX_IPFIL10_PORT_EN_LBN 20
+#define        FRF_BB_TX_IPFIL10_PORT_EN_WIDTH 1
+#define        FRF_BB_TX_IPFIL9_PORT_EN_LBN 18
+#define        FRF_BB_TX_IPFIL9_PORT_EN_WIDTH 1
+#define        FRF_BB_TX_IPFIL8_PORT_EN_LBN 16
+#define        FRF_BB_TX_IPFIL8_PORT_EN_WIDTH 1
+#define        FRF_BB_TX_IPFIL7_PORT_EN_LBN 14
+#define        FRF_BB_TX_IPFIL7_PORT_EN_WIDTH 1
+#define        FRF_BB_TX_IPFIL6_PORT_EN_LBN 12
+#define        FRF_BB_TX_IPFIL6_PORT_EN_WIDTH 1
+#define        FRF_BB_TX_IPFIL5_PORT_EN_LBN 10
+#define        FRF_BB_TX_IPFIL5_PORT_EN_WIDTH 1
+#define        FRF_BB_TX_IPFIL4_PORT_EN_LBN 8
+#define        FRF_BB_TX_IPFIL4_PORT_EN_WIDTH 1
+#define        FRF_BB_TX_IPFIL3_PORT_EN_LBN 6
+#define        FRF_BB_TX_IPFIL3_PORT_EN_WIDTH 1
+#define        FRF_BB_TX_IPFIL2_PORT_EN_LBN 4
+#define        FRF_BB_TX_IPFIL2_PORT_EN_WIDTH 1
+#define        FRF_BB_TX_IPFIL1_PORT_EN_LBN 2
+#define        FRF_BB_TX_IPFIL1_PORT_EN_WIDTH 1
+#define        FRF_BB_TX_IPFIL0_PORT_EN_LBN 0
+#define        FRF_BB_TX_IPFIL0_PORT_EN_WIDTH 1
+
+/* TX_IPFIL_TBL: Transmit IP source address filter table */
+#define        FR_BB_TX_IPFIL_TBL 0x00000b00
+#define        FR_BB_TX_IPFIL_TBL_STEP 16
+#define        FR_BB_TX_IPFIL_TBL_ROWS 16
+#define        FRF_BB_TX_IPFIL_MASK_1_LBN 96
+#define        FRF_BB_TX_IPFIL_MASK_1_WIDTH 32
+#define        FRF_BB_TX_IP_SRC_ADR_1_LBN 64
+#define        FRF_BB_TX_IP_SRC_ADR_1_WIDTH 32
+#define        FRF_BB_TX_IPFIL_MASK_0_LBN 32
+#define        FRF_BB_TX_IPFIL_MASK_0_WIDTH 32
+#define        FRF_BB_TX_IP_SRC_ADR_0_LBN 0
+#define        FRF_BB_TX_IP_SRC_ADR_0_WIDTH 32
+
+/* MD_TXD_REG: PHY management transmit data register */
+#define        FR_AB_MD_TXD 0x00000c00
+#define        FRF_AB_MD_TXD_LBN 0
+#define        FRF_AB_MD_TXD_WIDTH 16
+
+/* MD_RXD_REG: PHY management receive data register */
+#define        FR_AB_MD_RXD 0x00000c10
+#define        FRF_AB_MD_RXD_LBN 0
+#define        FRF_AB_MD_RXD_WIDTH 16
+
+/* MD_CS_REG: PHY management configuration & status register */
+#define        FR_AB_MD_CS 0x00000c20
+#define        FRF_AB_MD_RD_EN_CMD_LBN 15
+#define        FRF_AB_MD_RD_EN_CMD_WIDTH 1
+#define        FRF_AB_MD_WR_EN_CMD_LBN 14
+#define        FRF_AB_MD_WR_EN_CMD_WIDTH 1
+#define        FRF_AB_MD_ADDR_CMD_LBN 13
+#define        FRF_AB_MD_ADDR_CMD_WIDTH 1
+#define        FRF_AB_MD_PT_LBN 7
+#define        FRF_AB_MD_PT_WIDTH 3
+#define        FRF_AB_MD_PL_LBN 6
+#define        FRF_AB_MD_PL_WIDTH 1
+#define        FRF_AB_MD_INT_CLR_LBN 5
+#define        FRF_AB_MD_INT_CLR_WIDTH 1
+#define        FRF_AB_MD_GC_LBN 4
+#define        FRF_AB_MD_GC_WIDTH 1
+#define        FRF_AB_MD_PRSP_LBN 3
+#define        FRF_AB_MD_PRSP_WIDTH 1
+#define        FRF_AB_MD_RIC_LBN 2
+#define        FRF_AB_MD_RIC_WIDTH 1
+#define        FRF_AB_MD_RDC_LBN 1
+#define        FRF_AB_MD_RDC_WIDTH 1
+#define        FRF_AB_MD_WRC_LBN 0
+#define        FRF_AB_MD_WRC_WIDTH 1
+
+/* MD_PHY_ADR_REG: PHY management PHY address register */
+#define        FR_AB_MD_PHY_ADR 0x00000c30
+#define        FRF_AB_MD_PHY_ADR_LBN 0
+#define        FRF_AB_MD_PHY_ADR_WIDTH 16
+
+/* MD_ID_REG: PHY management ID register */
+#define        FR_AB_MD_ID 0x00000c40
+#define        FRF_AB_MD_PRT_ADR_LBN 11
+#define        FRF_AB_MD_PRT_ADR_WIDTH 5
+#define        FRF_AB_MD_DEV_ADR_LBN 6
+#define        FRF_AB_MD_DEV_ADR_WIDTH 5
+
+/* MD_STAT_REG: PHY management status & mask register */
+#define        FR_AB_MD_STAT 0x00000c50
+#define        FRF_AB_MD_PINT_LBN 4
+#define        FRF_AB_MD_PINT_WIDTH 1
+#define        FRF_AB_MD_DONE_LBN 3
+#define        FRF_AB_MD_DONE_WIDTH 1
+#define        FRF_AB_MD_BSERR_LBN 2
+#define        FRF_AB_MD_BSERR_WIDTH 1
+#define        FRF_AB_MD_LNFL_LBN 1
+#define        FRF_AB_MD_LNFL_WIDTH 1
+#define        FRF_AB_MD_BSY_LBN 0
+#define        FRF_AB_MD_BSY_WIDTH 1
+
+/* MAC_STAT_DMA_REG: Port MAC statistical counter DMA register */
+#define        FR_AB_MAC_STAT_DMA 0x00000c60
+#define        FRF_AB_MAC_STAT_DMA_CMD_LBN 48
+#define        FRF_AB_MAC_STAT_DMA_CMD_WIDTH 1
+#define        FRF_AB_MAC_STAT_DMA_ADR_LBN 0
+#define        FRF_AB_MAC_STAT_DMA_ADR_WIDTH 48
+
+/* MAC_CTRL_REG: Port MAC control register */
+#define        FR_AB_MAC_CTRL 0x00000c80
+#define        FRF_AB_MAC_XOFF_VAL_LBN 16
+#define        FRF_AB_MAC_XOFF_VAL_WIDTH 16
+#define        FRF_BB_TXFIFO_DRAIN_EN_LBN 7
+#define        FRF_BB_TXFIFO_DRAIN_EN_WIDTH 1
+#define        FRF_AB_MAC_XG_DISTXCRC_LBN 5
+#define        FRF_AB_MAC_XG_DISTXCRC_WIDTH 1
+#define        FRF_AB_MAC_BCAD_ACPT_LBN 4
+#define        FRF_AB_MAC_BCAD_ACPT_WIDTH 1
+#define        FRF_AB_MAC_UC_PROM_LBN 3
+#define        FRF_AB_MAC_UC_PROM_WIDTH 1
+#define        FRF_AB_MAC_LINK_STATUS_LBN 2
+#define        FRF_AB_MAC_LINK_STATUS_WIDTH 1
+#define        FRF_AB_MAC_SPEED_LBN 0
+#define        FRF_AB_MAC_SPEED_WIDTH 2
+#define        FFE_AB_MAC_SPEED_10G 3
+#define        FFE_AB_MAC_SPEED_1G 2
+#define        FFE_AB_MAC_SPEED_100M 1
+#define        FFE_AB_MAC_SPEED_10M 0
+
+/* GEN_MODE_REG: General Purpose mode register (external interrupt mask) */
+#define        FR_BB_GEN_MODE 0x00000c90
+#define        FRF_BB_XFP_PHY_INT_POL_SEL_LBN 3
+#define        FRF_BB_XFP_PHY_INT_POL_SEL_WIDTH 1
+#define        FRF_BB_XG_PHY_INT_POL_SEL_LBN 2
+#define        FRF_BB_XG_PHY_INT_POL_SEL_WIDTH 1
+#define        FRF_BB_XFP_PHY_INT_MASK_LBN 1
+#define        FRF_BB_XFP_PHY_INT_MASK_WIDTH 1
+#define        FRF_BB_XG_PHY_INT_MASK_LBN 0
+#define        FRF_BB_XG_PHY_INT_MASK_WIDTH 1
+
+/* MAC_MC_HASH_REG0: Multicast address hash table */
+#define        FR_AB_MAC_MC_HASH_REG0 0x00000ca0
+#define        FRF_AB_MAC_MCAST_HASH0_LBN 0
+#define        FRF_AB_MAC_MCAST_HASH0_WIDTH 128
+
+/* MAC_MC_HASH_REG1: Multicast address hash table */
+#define        FR_AB_MAC_MC_HASH_REG1 0x00000cb0
+#define        FRF_AB_MAC_MCAST_HASH1_LBN 0
+#define        FRF_AB_MAC_MCAST_HASH1_WIDTH 128
+
+/* GM_CFG1_REG: GMAC configuration register 1 */
+#define        FR_AB_GM_CFG1 0x00000e00
+#define        FRF_AB_GM_SW_RST_LBN 31
+#define        FRF_AB_GM_SW_RST_WIDTH 1
+#define        FRF_AB_GM_SIM_RST_LBN 30
+#define        FRF_AB_GM_SIM_RST_WIDTH 1
+#define        FRF_AB_GM_RST_RX_MAC_CTL_LBN 19
+#define        FRF_AB_GM_RST_RX_MAC_CTL_WIDTH 1
+#define        FRF_AB_GM_RST_TX_MAC_CTL_LBN 18
+#define        FRF_AB_GM_RST_TX_MAC_CTL_WIDTH 1
+#define        FRF_AB_GM_RST_RX_FUNC_LBN 17
+#define        FRF_AB_GM_RST_RX_FUNC_WIDTH 1
+#define        FRF_AB_GM_RST_TX_FUNC_LBN 16
+#define        FRF_AB_GM_RST_TX_FUNC_WIDTH 1
+#define        FRF_AB_GM_LOOP_LBN 8
+#define        FRF_AB_GM_LOOP_WIDTH 1
+#define        FRF_AB_GM_RX_FC_EN_LBN 5
+#define        FRF_AB_GM_RX_FC_EN_WIDTH 1
+#define        FRF_AB_GM_TX_FC_EN_LBN 4
+#define        FRF_AB_GM_TX_FC_EN_WIDTH 1
+#define        FRF_AB_GM_SYNC_RXEN_LBN 3
+#define        FRF_AB_GM_SYNC_RXEN_WIDTH 1
+#define        FRF_AB_GM_RX_EN_LBN 2
+#define        FRF_AB_GM_RX_EN_WIDTH 1
+#define        FRF_AB_GM_SYNC_TXEN_LBN 1
+#define        FRF_AB_GM_SYNC_TXEN_WIDTH 1
+#define        FRF_AB_GM_TX_EN_LBN 0
+#define        FRF_AB_GM_TX_EN_WIDTH 1
+
+/* GM_CFG2_REG: GMAC configuration register 2 */
+#define        FR_AB_GM_CFG2 0x00000e10
+#define        FRF_AB_GM_PAMBL_LEN_LBN 12
+#define        FRF_AB_GM_PAMBL_LEN_WIDTH 4
+#define        FRF_AB_GM_IF_MODE_LBN 8
+#define        FRF_AB_GM_IF_MODE_WIDTH 2
+#define        FFE_AB_IF_MODE_BYTE_MODE 2
+#define        FFE_AB_IF_MODE_NIBBLE_MODE 1
+#define        FRF_AB_GM_HUGE_FRM_EN_LBN 5
+#define        FRF_AB_GM_HUGE_FRM_EN_WIDTH 1
+#define        FRF_AB_GM_LEN_CHK_LBN 4
+#define        FRF_AB_GM_LEN_CHK_WIDTH 1
+#define        FRF_AB_GM_PAD_CRC_EN_LBN 2
+#define        FRF_AB_GM_PAD_CRC_EN_WIDTH 1
+#define        FRF_AB_GM_CRC_EN_LBN 1
+#define        FRF_AB_GM_CRC_EN_WIDTH 1
+#define        FRF_AB_GM_FD_LBN 0
+#define        FRF_AB_GM_FD_WIDTH 1
+
+/* GM_IPG_REG: GMAC IPG register */
+#define        FR_AB_GM_IPG 0x00000e20
+#define        FRF_AB_GM_NONB2B_IPG1_LBN 24
+#define        FRF_AB_GM_NONB2B_IPG1_WIDTH 7
+#define        FRF_AB_GM_NONB2B_IPG2_LBN 16
+#define        FRF_AB_GM_NONB2B_IPG2_WIDTH 7
+#define        FRF_AB_GM_MIN_IPG_ENF_LBN 8
+#define        FRF_AB_GM_MIN_IPG_ENF_WIDTH 8
+#define        FRF_AB_GM_B2B_IPG_LBN 0
+#define        FRF_AB_GM_B2B_IPG_WIDTH 7
+
+/* GM_HD_REG: GMAC half duplex register */
+#define        FR_AB_GM_HD 0x00000e30
+#define        FRF_AB_GM_ALT_BOFF_VAL_LBN 20
+#define        FRF_AB_GM_ALT_BOFF_VAL_WIDTH 4
+#define        FRF_AB_GM_ALT_BOFF_EN_LBN 19
+#define        FRF_AB_GM_ALT_BOFF_EN_WIDTH 1
+#define        FRF_AB_GM_BP_NO_BOFF_LBN 18
+#define        FRF_AB_GM_BP_NO_BOFF_WIDTH 1
+#define        FRF_AB_GM_DIS_BOFF_LBN 17
+#define        FRF_AB_GM_DIS_BOFF_WIDTH 1
+#define        FRF_AB_GM_EXDEF_TX_EN_LBN 16
+#define        FRF_AB_GM_EXDEF_TX_EN_WIDTH 1
+#define        FRF_AB_GM_RTRY_LIMIT_LBN 12
+#define        FRF_AB_GM_RTRY_LIMIT_WIDTH 4
+#define        FRF_AB_GM_COL_WIN_LBN 0
+#define        FRF_AB_GM_COL_WIN_WIDTH 10
+
+/* GM_MAX_FLEN_REG: GMAC maximum frame length register */
+#define        FR_AB_GM_MAX_FLEN 0x00000e40
+#define        FRF_AB_GM_MAX_FLEN_LBN 0
+#define        FRF_AB_GM_MAX_FLEN_WIDTH 16
+
+/* GM_TEST_REG: GMAC test register */
+#define        FR_AB_GM_TEST 0x00000e70
+#define        FRF_AB_GM_MAX_BOFF_LBN 3
+#define        FRF_AB_GM_MAX_BOFF_WIDTH 1
+#define        FRF_AB_GM_REG_TX_FLOW_EN_LBN 2
+#define        FRF_AB_GM_REG_TX_FLOW_EN_WIDTH 1
+#define        FRF_AB_GM_TEST_PAUSE_LBN 1
+#define        FRF_AB_GM_TEST_PAUSE_WIDTH 1
+#define        FRF_AB_GM_SHORT_SLOT_LBN 0
+#define        FRF_AB_GM_SHORT_SLOT_WIDTH 1
+
+/* GM_ADR1_REG: GMAC station address register 1 */
+#define        FR_AB_GM_ADR1 0x00000f00
+#define        FRF_AB_GM_ADR_B0_LBN 24
+#define        FRF_AB_GM_ADR_B0_WIDTH 8
+#define        FRF_AB_GM_ADR_B1_LBN 16
+#define        FRF_AB_GM_ADR_B1_WIDTH 8
+#define        FRF_AB_GM_ADR_B2_LBN 8
+#define        FRF_AB_GM_ADR_B2_WIDTH 8
+#define        FRF_AB_GM_ADR_B3_LBN 0
+#define        FRF_AB_GM_ADR_B3_WIDTH 8
+
+/* GM_ADR2_REG: GMAC station address register 2 */
+#define        FR_AB_GM_ADR2 0x00000f10
+#define        FRF_AB_GM_ADR_B4_LBN 24
+#define        FRF_AB_GM_ADR_B4_WIDTH 8
+#define        FRF_AB_GM_ADR_B5_LBN 16
+#define        FRF_AB_GM_ADR_B5_WIDTH 8
+
+/* GMF_CFG0_REG: GMAC FIFO configuration register 0 */
+#define        FR_AB_GMF_CFG0 0x00000f20
+#define        FRF_AB_GMF_FTFENRPLY_LBN 20
+#define        FRF_AB_GMF_FTFENRPLY_WIDTH 1
+#define        FRF_AB_GMF_STFENRPLY_LBN 19
+#define        FRF_AB_GMF_STFENRPLY_WIDTH 1
+#define        FRF_AB_GMF_FRFENRPLY_LBN 18
+#define        FRF_AB_GMF_FRFENRPLY_WIDTH 1
+#define        FRF_AB_GMF_SRFENRPLY_LBN 17
+#define        FRF_AB_GMF_SRFENRPLY_WIDTH 1
+#define        FRF_AB_GMF_WTMENRPLY_LBN 16
+#define        FRF_AB_GMF_WTMENRPLY_WIDTH 1
+#define        FRF_AB_GMF_FTFENREQ_LBN 12
+#define        FRF_AB_GMF_FTFENREQ_WIDTH 1
+#define        FRF_AB_GMF_STFENREQ_LBN 11
+#define        FRF_AB_GMF_STFENREQ_WIDTH 1
+#define        FRF_AB_GMF_FRFENREQ_LBN 10
+#define        FRF_AB_GMF_FRFENREQ_WIDTH 1
+#define        FRF_AB_GMF_SRFENREQ_LBN 9
+#define        FRF_AB_GMF_SRFENREQ_WIDTH 1
+#define        FRF_AB_GMF_WTMENREQ_LBN 8
+#define        FRF_AB_GMF_WTMENREQ_WIDTH 1
+#define        FRF_AB_GMF_HSTRSTFT_LBN 4
+#define        FRF_AB_GMF_HSTRSTFT_WIDTH 1
+#define        FRF_AB_GMF_HSTRSTST_LBN 3
+#define        FRF_AB_GMF_HSTRSTST_WIDTH 1
+#define        FRF_AB_GMF_HSTRSTFR_LBN 2
+#define        FRF_AB_GMF_HSTRSTFR_WIDTH 1
+#define        FRF_AB_GMF_HSTRSTSR_LBN 1
+#define        FRF_AB_GMF_HSTRSTSR_WIDTH 1
+#define        FRF_AB_GMF_HSTRSTWT_LBN 0
+#define        FRF_AB_GMF_HSTRSTWT_WIDTH 1
+
+/* GMF_CFG1_REG: GMAC FIFO configuration register 1 */
+#define        FR_AB_GMF_CFG1 0x00000f30
+#define        FRF_AB_GMF_CFGFRTH_LBN 16
+#define        FRF_AB_GMF_CFGFRTH_WIDTH 5
+#define        FRF_AB_GMF_CFGXOFFRTX_LBN 0
+#define        FRF_AB_GMF_CFGXOFFRTX_WIDTH 16
+
+/* GMF_CFG2_REG: GMAC FIFO configuration register 2 */
+#define        FR_AB_GMF_CFG2 0x00000f40
+#define        FRF_AB_GMF_CFGHWM_LBN 16
+#define        FRF_AB_GMF_CFGHWM_WIDTH 6
+#define        FRF_AB_GMF_CFGLWM_LBN 0
+#define        FRF_AB_GMF_CFGLWM_WIDTH 6
+
+/* GMF_CFG3_REG: GMAC FIFO configuration register 3 */
+#define        FR_AB_GMF_CFG3 0x00000f50
+#define        FRF_AB_GMF_CFGHWMFT_LBN 16
+#define        FRF_AB_GMF_CFGHWMFT_WIDTH 6
+#define        FRF_AB_GMF_CFGFTTH_LBN 0
+#define        FRF_AB_GMF_CFGFTTH_WIDTH 6
+
+/* GMF_CFG4_REG: GMAC FIFO configuration register 4 */
+#define        FR_AB_GMF_CFG4 0x00000f60
+#define        FRF_AB_GMF_HSTFLTRFRM_LBN 0
+#define        FRF_AB_GMF_HSTFLTRFRM_WIDTH 18
+
+/* GMF_CFG5_REG: GMAC FIFO configuration register 5 */
+#define        FR_AB_GMF_CFG5 0x00000f70
+#define        FRF_AB_GMF_CFGHDPLX_LBN 22
+#define        FRF_AB_GMF_CFGHDPLX_WIDTH 1
+#define        FRF_AB_GMF_SRFULL_LBN 21
+#define        FRF_AB_GMF_SRFULL_WIDTH 1
+#define        FRF_AB_GMF_HSTSRFULLCLR_LBN 20
+#define        FRF_AB_GMF_HSTSRFULLCLR_WIDTH 1
+#define        FRF_AB_GMF_CFGBYTMODE_LBN 19
+#define        FRF_AB_GMF_CFGBYTMODE_WIDTH 1
+#define        FRF_AB_GMF_HSTDRPLT64_LBN 18
+#define        FRF_AB_GMF_HSTDRPLT64_WIDTH 1
+#define        FRF_AB_GMF_HSTFLTRFRMDC_LBN 0
+#define        FRF_AB_GMF_HSTFLTRFRMDC_WIDTH 18
+
+/* TX_SRC_MAC_TBL: Transmit IP source address filter table */
+#define        FR_BB_TX_SRC_MAC_TBL 0x00001000
+#define        FR_BB_TX_SRC_MAC_TBL_STEP 16
+#define        FR_BB_TX_SRC_MAC_TBL_ROWS 16
+#define        FRF_BB_TX_SRC_MAC_ADR_1_LBN 64
+#define        FRF_BB_TX_SRC_MAC_ADR_1_WIDTH 48
+#define        FRF_BB_TX_SRC_MAC_ADR_0_LBN 0
+#define        FRF_BB_TX_SRC_MAC_ADR_0_WIDTH 48
+
+/* TX_SRC_MAC_CTL_REG: Transmit MAC source address filter control */
+#define        FR_BB_TX_SRC_MAC_CTL 0x00001100
+#define        FRF_BB_TX_SRC_DROP_CTR_LBN 16
+#define        FRF_BB_TX_SRC_DROP_CTR_WIDTH 16
+#define        FRF_BB_TX_SRC_FLTR_EN_LBN 15
+#define        FRF_BB_TX_SRC_FLTR_EN_WIDTH 1
+#define        FRF_BB_TX_DROP_CTR_CLR_LBN 12
+#define        FRF_BB_TX_DROP_CTR_CLR_WIDTH 1
+#define        FRF_BB_TX_MAC_QID_SEL_LBN 0
+#define        FRF_BB_TX_MAC_QID_SEL_WIDTH 3
+
+/* XM_ADR_LO_REG: XGMAC address register low */
+#define        FR_AB_XM_ADR_LO 0x00001200
+#define        FRF_AB_XM_ADR_LO_LBN 0
+#define        FRF_AB_XM_ADR_LO_WIDTH 32
+
+/* XM_ADR_HI_REG: XGMAC address register high */
+#define        FR_AB_XM_ADR_HI 0x00001210
+#define        FRF_AB_XM_ADR_HI_LBN 0
+#define        FRF_AB_XM_ADR_HI_WIDTH 16
+
+/* XM_GLB_CFG_REG: XGMAC global configuration */
+#define        FR_AB_XM_GLB_CFG 0x00001220
+#define        FRF_AB_XM_RMTFLT_GEN_LBN 17
+#define        FRF_AB_XM_RMTFLT_GEN_WIDTH 1
+#define        FRF_AB_XM_DEBUG_MODE_LBN 16
+#define        FRF_AB_XM_DEBUG_MODE_WIDTH 1
+#define        FRF_AB_XM_RX_STAT_EN_LBN 11
+#define        FRF_AB_XM_RX_STAT_EN_WIDTH 1
+#define        FRF_AB_XM_TX_STAT_EN_LBN 10
+#define        FRF_AB_XM_TX_STAT_EN_WIDTH 1
+#define        FRF_AB_XM_RX_JUMBO_MODE_LBN 6
+#define        FRF_AB_XM_RX_JUMBO_MODE_WIDTH 1
+#define        FRF_AB_XM_WAN_MODE_LBN 5
+#define        FRF_AB_XM_WAN_MODE_WIDTH 1
+#define        FRF_AB_XM_INTCLR_MODE_LBN 3
+#define        FRF_AB_XM_INTCLR_MODE_WIDTH 1
+#define        FRF_AB_XM_CORE_RST_LBN 0
+#define        FRF_AB_XM_CORE_RST_WIDTH 1
+
+/* XM_TX_CFG_REG: XGMAC transmit configuration */
+#define        FR_AB_XM_TX_CFG 0x00001230
+#define        FRF_AB_XM_TX_PROG_LBN 24
+#define        FRF_AB_XM_TX_PROG_WIDTH 1
+#define        FRF_AB_XM_IPG_LBN 16
+#define        FRF_AB_XM_IPG_WIDTH 4
+#define        FRF_AB_XM_FCNTL_LBN 10
+#define        FRF_AB_XM_FCNTL_WIDTH 1
+#define        FRF_AB_XM_TXCRC_LBN 8
+#define        FRF_AB_XM_TXCRC_WIDTH 1
+#define        FRF_AB_XM_EDRC_LBN 6
+#define        FRF_AB_XM_EDRC_WIDTH 1
+#define        FRF_AB_XM_AUTO_PAD_LBN 5
+#define        FRF_AB_XM_AUTO_PAD_WIDTH 1
+#define        FRF_AB_XM_TX_PRMBL_LBN 2
+#define        FRF_AB_XM_TX_PRMBL_WIDTH 1
+#define        FRF_AB_XM_TXEN_LBN 1
+#define        FRF_AB_XM_TXEN_WIDTH 1
+#define        FRF_AB_XM_TX_RST_LBN 0
+#define        FRF_AB_XM_TX_RST_WIDTH 1
+
+/* XM_RX_CFG_REG: XGMAC receive configuration */
+#define        FR_AB_XM_RX_CFG 0x00001240
+#define        FRF_AB_XM_PASS_LENERR_LBN 26
+#define        FRF_AB_XM_PASS_LENERR_WIDTH 1
+#define        FRF_AB_XM_PASS_CRC_ERR_LBN 25
+#define        FRF_AB_XM_PASS_CRC_ERR_WIDTH 1
+#define        FRF_AB_XM_PASS_PRMBLE_ERR_LBN 24
+#define        FRF_AB_XM_PASS_PRMBLE_ERR_WIDTH 1
+#define        FRF_AB_XM_REJ_BCAST_LBN 20
+#define        FRF_AB_XM_REJ_BCAST_WIDTH 1
+#define        FRF_AB_XM_ACPT_ALL_MCAST_LBN 11
+#define        FRF_AB_XM_ACPT_ALL_MCAST_WIDTH 1
+#define        FRF_AB_XM_ACPT_ALL_UCAST_LBN 9
+#define        FRF_AB_XM_ACPT_ALL_UCAST_WIDTH 1
+#define        FRF_AB_XM_AUTO_DEPAD_LBN 8
+#define        FRF_AB_XM_AUTO_DEPAD_WIDTH 1
+#define        FRF_AB_XM_RXCRC_LBN 3
+#define        FRF_AB_XM_RXCRC_WIDTH 1
+#define        FRF_AB_XM_RX_PRMBL_LBN 2
+#define        FRF_AB_XM_RX_PRMBL_WIDTH 1
+#define        FRF_AB_XM_RXEN_LBN 1
+#define        FRF_AB_XM_RXEN_WIDTH 1
+#define        FRF_AB_XM_RX_RST_LBN 0
+#define        FRF_AB_XM_RX_RST_WIDTH 1
+
+/* XM_MGT_INT_MASK: documentation to be written for sum_XM_MGT_INT_MASK */
+#define        FR_AB_XM_MGT_INT_MASK 0x00001250
+#define        FRF_AB_XM_MSK_STA_INTR_LBN 16
+#define        FRF_AB_XM_MSK_STA_INTR_WIDTH 1
+#define        FRF_AB_XM_MSK_STAT_CNTR_HF_LBN 9
+#define        FRF_AB_XM_MSK_STAT_CNTR_HF_WIDTH 1
+#define        FRF_AB_XM_MSK_STAT_CNTR_OF_LBN 8
+#define        FRF_AB_XM_MSK_STAT_CNTR_OF_WIDTH 1
+#define        FRF_AB_XM_MSK_PRMBLE_ERR_LBN 2
+#define        FRF_AB_XM_MSK_PRMBLE_ERR_WIDTH 1
+#define        FRF_AB_XM_MSK_RMTFLT_LBN 1
+#define        FRF_AB_XM_MSK_RMTFLT_WIDTH 1
+#define        FRF_AB_XM_MSK_LCLFLT_LBN 0
+#define        FRF_AB_XM_MSK_LCLFLT_WIDTH 1
+
+/* XM_FC_REG: XGMAC flow control register */
+#define        FR_AB_XM_FC 0x00001270
+#define        FRF_AB_XM_PAUSE_TIME_LBN 16
+#define        FRF_AB_XM_PAUSE_TIME_WIDTH 16
+#define        FRF_AB_XM_RX_MAC_STAT_LBN 11
+#define        FRF_AB_XM_RX_MAC_STAT_WIDTH 1
+#define        FRF_AB_XM_TX_MAC_STAT_LBN 10
+#define        FRF_AB_XM_TX_MAC_STAT_WIDTH 1
+#define        FRF_AB_XM_MCNTL_PASS_LBN 8
+#define        FRF_AB_XM_MCNTL_PASS_WIDTH 2
+#define        FRF_AB_XM_REJ_CNTL_UCAST_LBN 6
+#define        FRF_AB_XM_REJ_CNTL_UCAST_WIDTH 1
+#define        FRF_AB_XM_REJ_CNTL_MCAST_LBN 5
+#define        FRF_AB_XM_REJ_CNTL_MCAST_WIDTH 1
+#define        FRF_AB_XM_ZPAUSE_LBN 2
+#define        FRF_AB_XM_ZPAUSE_WIDTH 1
+#define        FRF_AB_XM_XMIT_PAUSE_LBN 1
+#define        FRF_AB_XM_XMIT_PAUSE_WIDTH 1
+#define        FRF_AB_XM_DIS_FCNTL_LBN 0
+#define        FRF_AB_XM_DIS_FCNTL_WIDTH 1
+
+/* XM_PAUSE_TIME_REG: XGMAC pause time register */
+#define        FR_AB_XM_PAUSE_TIME 0x00001290
+#define        FRF_AB_XM_TX_PAUSE_CNT_LBN 16
+#define        FRF_AB_XM_TX_PAUSE_CNT_WIDTH 16
+#define        FRF_AB_XM_RX_PAUSE_CNT_LBN 0
+#define        FRF_AB_XM_RX_PAUSE_CNT_WIDTH 16
+
+/* XM_TX_PARAM_REG: XGMAC transmit parameter register */
+#define        FR_AB_XM_TX_PARAM 0x000012d0
+#define        FRF_AB_XM_TX_JUMBO_MODE_LBN 31
+#define        FRF_AB_XM_TX_JUMBO_MODE_WIDTH 1
+#define        FRF_AB_XM_MAX_TX_FRM_SIZE_HI_LBN 19
+#define        FRF_AB_XM_MAX_TX_FRM_SIZE_HI_WIDTH 11
+#define        FRF_AB_XM_MAX_TX_FRM_SIZE_LO_LBN 16
+#define        FRF_AB_XM_MAX_TX_FRM_SIZE_LO_WIDTH 3
+#define        FRF_AB_XM_PAD_CHAR_LBN 0
+#define        FRF_AB_XM_PAD_CHAR_WIDTH 8
+
+/* XM_RX_PARAM_REG: XGMAC receive parameter register */
+#define        FR_AB_XM_RX_PARAM 0x000012e0
+#define        FRF_AB_XM_MAX_RX_FRM_SIZE_HI_LBN 3
+#define        FRF_AB_XM_MAX_RX_FRM_SIZE_HI_WIDTH 11
+#define        FRF_AB_XM_MAX_RX_FRM_SIZE_LO_LBN 0
+#define        FRF_AB_XM_MAX_RX_FRM_SIZE_LO_WIDTH 3
+
+/* XM_MGT_INT_MSK_REG: XGMAC management interrupt mask register */
+#define        FR_AB_XM_MGT_INT_MSK 0x000012f0
+#define        FRF_AB_XM_STAT_CNTR_OF_LBN 9
+#define        FRF_AB_XM_STAT_CNTR_OF_WIDTH 1
+#define        FRF_AB_XM_STAT_CNTR_HF_LBN 8
+#define        FRF_AB_XM_STAT_CNTR_HF_WIDTH 1
+#define        FRF_AB_XM_PRMBLE_ERR_LBN 2
+#define        FRF_AB_XM_PRMBLE_ERR_WIDTH 1
+#define        FRF_AB_XM_RMTFLT_LBN 1
+#define        FRF_AB_XM_RMTFLT_WIDTH 1
+#define        FRF_AB_XM_LCLFLT_LBN 0
+#define        FRF_AB_XM_LCLFLT_WIDTH 1
+
+/* XX_PWR_RST_REG: XGXS/XAUI powerdown/reset register */
+#define        FR_AB_XX_PWR_RST 0x00001300
+#define        FRF_AB_XX_PWRDND_SIG_LBN 31
+#define        FRF_AB_XX_PWRDND_SIG_WIDTH 1
+#define        FRF_AB_XX_PWRDNC_SIG_LBN 30
+#define        FRF_AB_XX_PWRDNC_SIG_WIDTH 1
+#define        FRF_AB_XX_PWRDNB_SIG_LBN 29
+#define        FRF_AB_XX_PWRDNB_SIG_WIDTH 1
+#define        FRF_AB_XX_PWRDNA_SIG_LBN 28
+#define        FRF_AB_XX_PWRDNA_SIG_WIDTH 1
+#define        FRF_AB_XX_SIM_MODE_LBN 27
+#define        FRF_AB_XX_SIM_MODE_WIDTH 1
+#define        FRF_AB_XX_RSTPLLCD_SIG_LBN 25
+#define        FRF_AB_XX_RSTPLLCD_SIG_WIDTH 1
+#define        FRF_AB_XX_RSTPLLAB_SIG_LBN 24
+#define        FRF_AB_XX_RSTPLLAB_SIG_WIDTH 1
+#define        FRF_AB_XX_RESETD_SIG_LBN 23
+#define        FRF_AB_XX_RESETD_SIG_WIDTH 1
+#define        FRF_AB_XX_RESETC_SIG_LBN 22
+#define        FRF_AB_XX_RESETC_SIG_WIDTH 1
+#define        FRF_AB_XX_RESETB_SIG_LBN 21
+#define        FRF_AB_XX_RESETB_SIG_WIDTH 1
+#define        FRF_AB_XX_RESETA_SIG_LBN 20
+#define        FRF_AB_XX_RESETA_SIG_WIDTH 1
+#define        FRF_AB_XX_RSTXGXSRX_SIG_LBN 18
+#define        FRF_AB_XX_RSTXGXSRX_SIG_WIDTH 1
+#define        FRF_AB_XX_RSTXGXSTX_SIG_LBN 17
+#define        FRF_AB_XX_RSTXGXSTX_SIG_WIDTH 1
+#define        FRF_AB_XX_SD_RST_ACT_LBN 16
+#define        FRF_AB_XX_SD_RST_ACT_WIDTH 1
+#define        FRF_AB_XX_PWRDND_EN_LBN 15
+#define        FRF_AB_XX_PWRDND_EN_WIDTH 1
+#define        FRF_AB_XX_PWRDNC_EN_LBN 14
+#define        FRF_AB_XX_PWRDNC_EN_WIDTH 1
+#define        FRF_AB_XX_PWRDNB_EN_LBN 13
+#define        FRF_AB_XX_PWRDNB_EN_WIDTH 1
+#define        FRF_AB_XX_PWRDNA_EN_LBN 12
+#define        FRF_AB_XX_PWRDNA_EN_WIDTH 1
+#define        FRF_AB_XX_RSTPLLCD_EN_LBN 9
+#define        FRF_AB_XX_RSTPLLCD_EN_WIDTH 1
+#define        FRF_AB_XX_RSTPLLAB_EN_LBN 8
+#define        FRF_AB_XX_RSTPLLAB_EN_WIDTH 1
+#define        FRF_AB_XX_RESETD_EN_LBN 7
+#define        FRF_AB_XX_RESETD_EN_WIDTH 1
+#define        FRF_AB_XX_RESETC_EN_LBN 6
+#define        FRF_AB_XX_RESETC_EN_WIDTH 1
+#define        FRF_AB_XX_RESETB_EN_LBN 5
+#define        FRF_AB_XX_RESETB_EN_WIDTH 1
+#define        FRF_AB_XX_RESETA_EN_LBN 4
+#define        FRF_AB_XX_RESETA_EN_WIDTH 1
+#define        FRF_AB_XX_RSTXGXSRX_EN_LBN 2
+#define        FRF_AB_XX_RSTXGXSRX_EN_WIDTH 1
+#define        FRF_AB_XX_RSTXGXSTX_EN_LBN 1
+#define        FRF_AB_XX_RSTXGXSTX_EN_WIDTH 1
+#define        FRF_AB_XX_RST_XX_EN_LBN 0
+#define        FRF_AB_XX_RST_XX_EN_WIDTH 1
+
+/* XX_SD_CTL_REG: XGXS/XAUI powerdown/reset control register */
+#define        FR_AB_XX_SD_CTL 0x00001310
+#define        FRF_AB_XX_TERMADJ1_LBN 17
+#define        FRF_AB_XX_TERMADJ1_WIDTH 1
+#define        FRF_AB_XX_TERMADJ0_LBN 16
+#define        FRF_AB_XX_TERMADJ0_WIDTH 1
+#define        FRF_AB_XX_HIDRVD_LBN 15
+#define        FRF_AB_XX_HIDRVD_WIDTH 1
+#define        FRF_AB_XX_LODRVD_LBN 14
+#define        FRF_AB_XX_LODRVD_WIDTH 1
+#define        FRF_AB_XX_HIDRVC_LBN 13
+#define        FRF_AB_XX_HIDRVC_WIDTH 1
+#define        FRF_AB_XX_LODRVC_LBN 12
+#define        FRF_AB_XX_LODRVC_WIDTH 1
+#define        FRF_AB_XX_HIDRVB_LBN 11
+#define        FRF_AB_XX_HIDRVB_WIDTH 1
+#define        FRF_AB_XX_LODRVB_LBN 10
+#define        FRF_AB_XX_LODRVB_WIDTH 1
+#define        FRF_AB_XX_HIDRVA_LBN 9
+#define        FRF_AB_XX_HIDRVA_WIDTH 1
+#define        FRF_AB_XX_LODRVA_LBN 8
+#define        FRF_AB_XX_LODRVA_WIDTH 1
+#define        FRF_AB_XX_LPBKD_LBN 3
+#define        FRF_AB_XX_LPBKD_WIDTH 1
+#define        FRF_AB_XX_LPBKC_LBN 2
+#define        FRF_AB_XX_LPBKC_WIDTH 1
+#define        FRF_AB_XX_LPBKB_LBN 1
+#define        FRF_AB_XX_LPBKB_WIDTH 1
+#define        FRF_AB_XX_LPBKA_LBN 0
+#define        FRF_AB_XX_LPBKA_WIDTH 1
+
+/* XX_TXDRV_CTL_REG: XAUI SerDes transmit drive control register */
+#define        FR_AB_XX_TXDRV_CTL 0x00001320
+#define        FRF_AB_XX_DEQD_LBN 28
+#define        FRF_AB_XX_DEQD_WIDTH 4
+#define        FRF_AB_XX_DEQC_LBN 24
+#define        FRF_AB_XX_DEQC_WIDTH 4
+#define        FRF_AB_XX_DEQB_LBN 20
+#define        FRF_AB_XX_DEQB_WIDTH 4
+#define        FRF_AB_XX_DEQA_LBN 16
+#define        FRF_AB_XX_DEQA_WIDTH 4
+#define        FRF_AB_XX_DTXD_LBN 12
+#define        FRF_AB_XX_DTXD_WIDTH 4
+#define        FRF_AB_XX_DTXC_LBN 8
+#define        FRF_AB_XX_DTXC_WIDTH 4
+#define        FRF_AB_XX_DTXB_LBN 4
+#define        FRF_AB_XX_DTXB_WIDTH 4
+#define        FRF_AB_XX_DTXA_LBN 0
+#define        FRF_AB_XX_DTXA_WIDTH 4
+
+/* XX_PRBS_CTL_REG: documentation to be written for sum_XX_PRBS_CTL_REG */
+#define        FR_AB_XX_PRBS_CTL 0x00001330
+#define        FRF_AB_XX_CH3_RX_PRBS_SEL_LBN 30
+#define        FRF_AB_XX_CH3_RX_PRBS_SEL_WIDTH 2
+#define        FRF_AB_XX_CH3_RX_PRBS_INV_LBN 29
+#define        FRF_AB_XX_CH3_RX_PRBS_INV_WIDTH 1
+#define        FRF_AB_XX_CH3_RX_PRBS_CHKEN_LBN 28
+#define        FRF_AB_XX_CH3_RX_PRBS_CHKEN_WIDTH 1
+#define        FRF_AB_XX_CH2_RX_PRBS_SEL_LBN 26
+#define        FRF_AB_XX_CH2_RX_PRBS_SEL_WIDTH 2
+#define        FRF_AB_XX_CH2_RX_PRBS_INV_LBN 25
+#define        FRF_AB_XX_CH2_RX_PRBS_INV_WIDTH 1
+#define        FRF_AB_XX_CH2_RX_PRBS_CHKEN_LBN 24
+#define        FRF_AB_XX_CH2_RX_PRBS_CHKEN_WIDTH 1
+#define        FRF_AB_XX_CH1_RX_PRBS_SEL_LBN 22
+#define        FRF_AB_XX_CH1_RX_PRBS_SEL_WIDTH 2
+#define        FRF_AB_XX_CH1_RX_PRBS_INV_LBN 21
+#define        FRF_AB_XX_CH1_RX_PRBS_INV_WIDTH 1
+#define        FRF_AB_XX_CH1_RX_PRBS_CHKEN_LBN 20
+#define        FRF_AB_XX_CH1_RX_PRBS_CHKEN_WIDTH 1
+#define        FRF_AB_XX_CH0_RX_PRBS_SEL_LBN 18
+#define        FRF_AB_XX_CH0_RX_PRBS_SEL_WIDTH 2
+#define        FRF_AB_XX_CH0_RX_PRBS_INV_LBN 17
+#define        FRF_AB_XX_CH0_RX_PRBS_INV_WIDTH 1
+#define        FRF_AB_XX_CH0_RX_PRBS_CHKEN_LBN 16
+#define        FRF_AB_XX_CH0_RX_PRBS_CHKEN_WIDTH 1
+#define        FRF_AB_XX_CH3_TX_PRBS_SEL_LBN 14
+#define        FRF_AB_XX_CH3_TX_PRBS_SEL_WIDTH 2
+#define        FRF_AB_XX_CH3_TX_PRBS_INV_LBN 13
+#define        FRF_AB_XX_CH3_TX_PRBS_INV_WIDTH 1
+#define        FRF_AB_XX_CH3_TX_PRBS_CHKEN_LBN 12
+#define        FRF_AB_XX_CH3_TX_PRBS_CHKEN_WIDTH 1
+#define        FRF_AB_XX_CH2_TX_PRBS_SEL_LBN 10
+#define        FRF_AB_XX_CH2_TX_PRBS_SEL_WIDTH 2
+#define        FRF_AB_XX_CH2_TX_PRBS_INV_LBN 9
+#define        FRF_AB_XX_CH2_TX_PRBS_INV_WIDTH 1
+#define        FRF_AB_XX_CH2_TX_PRBS_CHKEN_LBN 8
+#define        FRF_AB_XX_CH2_TX_PRBS_CHKEN_WIDTH 1
+#define        FRF_AB_XX_CH1_TX_PRBS_SEL_LBN 6
+#define        FRF_AB_XX_CH1_TX_PRBS_SEL_WIDTH 2
+#define        FRF_AB_XX_CH1_TX_PRBS_INV_LBN 5
+#define        FRF_AB_XX_CH1_TX_PRBS_INV_WIDTH 1
+#define        FRF_AB_XX_CH1_TX_PRBS_CHKEN_LBN 4
+#define        FRF_AB_XX_CH1_TX_PRBS_CHKEN_WIDTH 1
+#define        FRF_AB_XX_CH0_TX_PRBS_SEL_LBN 2
+#define        FRF_AB_XX_CH0_TX_PRBS_SEL_WIDTH 2
+#define        FRF_AB_XX_CH0_TX_PRBS_INV_LBN 1
+#define        FRF_AB_XX_CH0_TX_PRBS_INV_WIDTH 1
+#define        FRF_AB_XX_CH0_TX_PRBS_CHKEN_LBN 0
+#define        FRF_AB_XX_CH0_TX_PRBS_CHKEN_WIDTH 1
+
+/* XX_PRBS_CHK_REG: documentation to be written for sum_XX_PRBS_CHK_REG */
+#define        FR_AB_XX_PRBS_CHK 0x00001340
+#define        FRF_AB_XX_REV_LB_EN_LBN 16
+#define        FRF_AB_XX_REV_LB_EN_WIDTH 1
+#define        FRF_AB_XX_CH3_DEG_DET_LBN 15
+#define        FRF_AB_XX_CH3_DEG_DET_WIDTH 1
+#define        FRF_AB_XX_CH3_LFSR_LOCK_IND_LBN 14
+#define        FRF_AB_XX_CH3_LFSR_LOCK_IND_WIDTH 1
+#define        FRF_AB_XX_CH3_PRBS_FRUN_LBN 13
+#define        FRF_AB_XX_CH3_PRBS_FRUN_WIDTH 1
+#define        FRF_AB_XX_CH3_ERR_CHK_LBN 12
+#define        FRF_AB_XX_CH3_ERR_CHK_WIDTH 1
+#define        FRF_AB_XX_CH2_DEG_DET_LBN 11
+#define        FRF_AB_XX_CH2_DEG_DET_WIDTH 1
+#define        FRF_AB_XX_CH2_LFSR_LOCK_IND_LBN 10
+#define        FRF_AB_XX_CH2_LFSR_LOCK_IND_WIDTH 1
+#define        FRF_AB_XX_CH2_PRBS_FRUN_LBN 9
+#define        FRF_AB_XX_CH2_PRBS_FRUN_WIDTH 1
+#define        FRF_AB_XX_CH2_ERR_CHK_LBN 8
+#define        FRF_AB_XX_CH2_ERR_CHK_WIDTH 1
+#define        FRF_AB_XX_CH1_DEG_DET_LBN 7
+#define        FRF_AB_XX_CH1_DEG_DET_WIDTH 1
+#define        FRF_AB_XX_CH1_LFSR_LOCK_IND_LBN 6
+#define        FRF_AB_XX_CH1_LFSR_LOCK_IND_WIDTH 1
+#define        FRF_AB_XX_CH1_PRBS_FRUN_LBN 5
+#define        FRF_AB_XX_CH1_PRBS_FRUN_WIDTH 1
+#define        FRF_AB_XX_CH1_ERR_CHK_LBN 4
+#define        FRF_AB_XX_CH1_ERR_CHK_WIDTH 1
+#define        FRF_AB_XX_CH0_DEG_DET_LBN 3
+#define        FRF_AB_XX_CH0_DEG_DET_WIDTH 1
+#define        FRF_AB_XX_CH0_LFSR_LOCK_IND_LBN 2
+#define        FRF_AB_XX_CH0_LFSR_LOCK_IND_WIDTH 1
+#define        FRF_AB_XX_CH0_PRBS_FRUN_LBN 1
+#define        FRF_AB_XX_CH0_PRBS_FRUN_WIDTH 1
+#define        FRF_AB_XX_CH0_ERR_CHK_LBN 0
+#define        FRF_AB_XX_CH0_ERR_CHK_WIDTH 1
+
+/* XX_PRBS_ERR_REG: documentation to be written for sum_XX_PRBS_ERR_REG */
+#define        FR_AB_XX_PRBS_ERR 0x00001350
+#define        FRF_AB_XX_CH3_PRBS_ERR_CNT_LBN 24
+#define        FRF_AB_XX_CH3_PRBS_ERR_CNT_WIDTH 8
+#define        FRF_AB_XX_CH2_PRBS_ERR_CNT_LBN 16
+#define        FRF_AB_XX_CH2_PRBS_ERR_CNT_WIDTH 8
+#define        FRF_AB_XX_CH1_PRBS_ERR_CNT_LBN 8
+#define        FRF_AB_XX_CH1_PRBS_ERR_CNT_WIDTH 8
+#define        FRF_AB_XX_CH0_PRBS_ERR_CNT_LBN 0
+#define        FRF_AB_XX_CH0_PRBS_ERR_CNT_WIDTH 8
+
+/* XX_CORE_STAT_REG: XAUI XGXS core status register */
+#define        FR_AB_XX_CORE_STAT 0x00001360
+#define        FRF_AB_XX_FORCE_SIG3_LBN 31
+#define        FRF_AB_XX_FORCE_SIG3_WIDTH 1
+#define        FRF_AB_XX_FORCE_SIG3_VAL_LBN 30
+#define        FRF_AB_XX_FORCE_SIG3_VAL_WIDTH 1
+#define        FRF_AB_XX_FORCE_SIG2_LBN 29
+#define        FRF_AB_XX_FORCE_SIG2_WIDTH 1
+#define        FRF_AB_XX_FORCE_SIG2_VAL_LBN 28
+#define        FRF_AB_XX_FORCE_SIG2_VAL_WIDTH 1
+#define        FRF_AB_XX_FORCE_SIG1_LBN 27
+#define        FRF_AB_XX_FORCE_SIG1_WIDTH 1
+#define        FRF_AB_XX_FORCE_SIG1_VAL_LBN 26
+#define        FRF_AB_XX_FORCE_SIG1_VAL_WIDTH 1
+#define        FRF_AB_XX_FORCE_SIG0_LBN 25
+#define        FRF_AB_XX_FORCE_SIG0_WIDTH 1
+#define        FRF_AB_XX_FORCE_SIG0_VAL_LBN 24
+#define        FRF_AB_XX_FORCE_SIG0_VAL_WIDTH 1
+#define        FRF_AB_XX_XGXS_LB_EN_LBN 23
+#define        FRF_AB_XX_XGXS_LB_EN_WIDTH 1
+#define        FRF_AB_XX_XGMII_LB_EN_LBN 22
+#define        FRF_AB_XX_XGMII_LB_EN_WIDTH 1
+#define        FRF_AB_XX_MATCH_FAULT_LBN 21
+#define        FRF_AB_XX_MATCH_FAULT_WIDTH 1
+#define        FRF_AB_XX_ALIGN_DONE_LBN 20
+#define        FRF_AB_XX_ALIGN_DONE_WIDTH 1
+#define        FRF_AB_XX_SYNC_STAT3_LBN 19
+#define        FRF_AB_XX_SYNC_STAT3_WIDTH 1
+#define        FRF_AB_XX_SYNC_STAT2_LBN 18
+#define        FRF_AB_XX_SYNC_STAT2_WIDTH 1
+#define        FRF_AB_XX_SYNC_STAT1_LBN 17
+#define        FRF_AB_XX_SYNC_STAT1_WIDTH 1
+#define        FRF_AB_XX_SYNC_STAT0_LBN 16
+#define        FRF_AB_XX_SYNC_STAT0_WIDTH 1
+#define        FRF_AB_XX_COMMA_DET_CH3_LBN 15
+#define        FRF_AB_XX_COMMA_DET_CH3_WIDTH 1
+#define        FRF_AB_XX_COMMA_DET_CH2_LBN 14
+#define        FRF_AB_XX_COMMA_DET_CH2_WIDTH 1
+#define        FRF_AB_XX_COMMA_DET_CH1_LBN 13
+#define        FRF_AB_XX_COMMA_DET_CH1_WIDTH 1
+#define        FRF_AB_XX_COMMA_DET_CH0_LBN 12
+#define        FRF_AB_XX_COMMA_DET_CH0_WIDTH 1
+#define        FRF_AB_XX_CGRP_ALIGN_CH3_LBN 11
+#define        FRF_AB_XX_CGRP_ALIGN_CH3_WIDTH 1
+#define        FRF_AB_XX_CGRP_ALIGN_CH2_LBN 10
+#define        FRF_AB_XX_CGRP_ALIGN_CH2_WIDTH 1
+#define        FRF_AB_XX_CGRP_ALIGN_CH1_LBN 9
+#define        FRF_AB_XX_CGRP_ALIGN_CH1_WIDTH 1
+#define        FRF_AB_XX_CGRP_ALIGN_CH0_LBN 8
+#define        FRF_AB_XX_CGRP_ALIGN_CH0_WIDTH 1
+#define        FRF_AB_XX_CHAR_ERR_CH3_LBN 7
+#define        FRF_AB_XX_CHAR_ERR_CH3_WIDTH 1
+#define        FRF_AB_XX_CHAR_ERR_CH2_LBN 6
+#define        FRF_AB_XX_CHAR_ERR_CH2_WIDTH 1
+#define        FRF_AB_XX_CHAR_ERR_CH1_LBN 5
+#define        FRF_AB_XX_CHAR_ERR_CH1_WIDTH 1
+#define        FRF_AB_XX_CHAR_ERR_CH0_LBN 4
+#define        FRF_AB_XX_CHAR_ERR_CH0_WIDTH 1
+#define        FRF_AB_XX_DISPERR_CH3_LBN 3
+#define        FRF_AB_XX_DISPERR_CH3_WIDTH 1
+#define        FRF_AB_XX_DISPERR_CH2_LBN 2
+#define        FRF_AB_XX_DISPERR_CH2_WIDTH 1
+#define        FRF_AB_XX_DISPERR_CH1_LBN 1
+#define        FRF_AB_XX_DISPERR_CH1_WIDTH 1
+#define        FRF_AB_XX_DISPERR_CH0_LBN 0
+#define        FRF_AB_XX_DISPERR_CH0_WIDTH 1
+
+/* RX_DESC_PTR_TBL_KER: Receive descriptor pointer table */
+#define        FR_AA_RX_DESC_PTR_TBL_KER 0x00011800
+#define        FR_AA_RX_DESC_PTR_TBL_KER_STEP 16
+#define        FR_AA_RX_DESC_PTR_TBL_KER_ROWS 4
+/* RX_DESC_PTR_TBL: Receive descriptor pointer table */
+#define        FR_BZ_RX_DESC_PTR_TBL 0x00f40000
+#define        FR_BZ_RX_DESC_PTR_TBL_STEP 16
+#define        FR_BB_RX_DESC_PTR_TBL_ROWS 4096
+#define        FR_CZ_RX_DESC_PTR_TBL_ROWS 1024
+#define        FRF_CZ_RX_HDR_SPLIT_LBN 90
+#define        FRF_CZ_RX_HDR_SPLIT_WIDTH 1
+#define        FRF_AA_RX_RESET_LBN 89
+#define        FRF_AA_RX_RESET_WIDTH 1
+#define        FRF_AZ_RX_ISCSI_DDIG_EN_LBN 88
+#define        FRF_AZ_RX_ISCSI_DDIG_EN_WIDTH 1
+#define        FRF_AZ_RX_ISCSI_HDIG_EN_LBN 87
+#define        FRF_AZ_RX_ISCSI_HDIG_EN_WIDTH 1
+#define        FRF_AZ_RX_DESC_PREF_ACT_LBN 86
+#define        FRF_AZ_RX_DESC_PREF_ACT_WIDTH 1
+#define        FRF_AZ_RX_DC_HW_RPTR_LBN 80
+#define        FRF_AZ_RX_DC_HW_RPTR_WIDTH 6
+#define        FRF_AZ_RX_DESCQ_HW_RPTR_LBN 68
+#define        FRF_AZ_RX_DESCQ_HW_RPTR_WIDTH 12
+#define        FRF_AZ_RX_DESCQ_SW_WPTR_LBN 56
+#define        FRF_AZ_RX_DESCQ_SW_WPTR_WIDTH 12
+#define        FRF_AZ_RX_DESCQ_BUF_BASE_ID_LBN 36
+#define        FRF_AZ_RX_DESCQ_BUF_BASE_ID_WIDTH 20
+#define        FRF_AZ_RX_DESCQ_EVQ_ID_LBN 24
+#define        FRF_AZ_RX_DESCQ_EVQ_ID_WIDTH 12
+#define        FRF_AZ_RX_DESCQ_OWNER_ID_LBN 10
+#define        FRF_AZ_RX_DESCQ_OWNER_ID_WIDTH 14
+#define        FRF_AZ_RX_DESCQ_LABEL_LBN 5
+#define        FRF_AZ_RX_DESCQ_LABEL_WIDTH 5
+#define        FRF_AZ_RX_DESCQ_SIZE_LBN 3
+#define        FRF_AZ_RX_DESCQ_SIZE_WIDTH 2
+#define        FFE_AZ_RX_DESCQ_SIZE_4K 3
+#define        FFE_AZ_RX_DESCQ_SIZE_2K 2
+#define        FFE_AZ_RX_DESCQ_SIZE_1K 1
+#define        FFE_AZ_RX_DESCQ_SIZE_512 0
+#define        FRF_AZ_RX_DESCQ_TYPE_LBN 2
+#define        FRF_AZ_RX_DESCQ_TYPE_WIDTH 1
+#define        FRF_AZ_RX_DESCQ_JUMBO_LBN 1
+#define        FRF_AZ_RX_DESCQ_JUMBO_WIDTH 1
+#define        FRF_AZ_RX_DESCQ_EN_LBN 0
+#define        FRF_AZ_RX_DESCQ_EN_WIDTH 1
+
+/* TX_DESC_PTR_TBL_KER: Transmit descriptor pointer */
+#define        FR_AA_TX_DESC_PTR_TBL_KER 0x00011900
+#define        FR_AA_TX_DESC_PTR_TBL_KER_STEP 16
+#define        FR_AA_TX_DESC_PTR_TBL_KER_ROWS 8
+/* TX_DESC_PTR_TBL: Transmit descriptor pointer */
+#define        FR_BZ_TX_DESC_PTR_TBL 0x00f50000
+#define        FR_BZ_TX_DESC_PTR_TBL_STEP 16
+#define        FR_BB_TX_DESC_PTR_TBL_ROWS 4096
+#define        FR_CZ_TX_DESC_PTR_TBL_ROWS 1024
+#define        FRF_CZ_TX_DPT_Q_MASK_WIDTH_LBN 94
+#define        FRF_CZ_TX_DPT_Q_MASK_WIDTH_WIDTH 2
+#define        FRF_CZ_TX_DPT_ETH_FILT_EN_LBN 93
+#define        FRF_CZ_TX_DPT_ETH_FILT_EN_WIDTH 1
+#define        FRF_CZ_TX_DPT_IP_FILT_EN_LBN 92
+#define        FRF_CZ_TX_DPT_IP_FILT_EN_WIDTH 1
+#define        FRF_BZ_TX_NON_IP_DROP_DIS_LBN 91
+#define        FRF_BZ_TX_NON_IP_DROP_DIS_WIDTH 1
+#define        FRF_BZ_TX_IP_CHKSM_DIS_LBN 90
+#define        FRF_BZ_TX_IP_CHKSM_DIS_WIDTH 1
+#define        FRF_BZ_TX_TCP_CHKSM_DIS_LBN 89
+#define        FRF_BZ_TX_TCP_CHKSM_DIS_WIDTH 1
+#define        FRF_AZ_TX_DESCQ_EN_LBN 88
+#define        FRF_AZ_TX_DESCQ_EN_WIDTH 1
+#define        FRF_AZ_TX_ISCSI_DDIG_EN_LBN 87
+#define        FRF_AZ_TX_ISCSI_DDIG_EN_WIDTH 1
+#define        FRF_AZ_TX_ISCSI_HDIG_EN_LBN 86
+#define        FRF_AZ_TX_ISCSI_HDIG_EN_WIDTH 1
+#define        FRF_AZ_TX_DC_HW_RPTR_LBN 80
+#define        FRF_AZ_TX_DC_HW_RPTR_WIDTH 6
+#define        FRF_AZ_TX_DESCQ_HW_RPTR_LBN 68
+#define        FRF_AZ_TX_DESCQ_HW_RPTR_WIDTH 12
+#define        FRF_AZ_TX_DESCQ_SW_WPTR_LBN 56
+#define        FRF_AZ_TX_DESCQ_SW_WPTR_WIDTH 12
+#define        FRF_AZ_TX_DESCQ_BUF_BASE_ID_LBN 36
+#define        FRF_AZ_TX_DESCQ_BUF_BASE_ID_WIDTH 20
+#define        FRF_AZ_TX_DESCQ_EVQ_ID_LBN 24
+#define        FRF_AZ_TX_DESCQ_EVQ_ID_WIDTH 12
+#define        FRF_AZ_TX_DESCQ_OWNER_ID_LBN 10
+#define        FRF_AZ_TX_DESCQ_OWNER_ID_WIDTH 14
+#define        FRF_AZ_TX_DESCQ_LABEL_LBN 5
+#define        FRF_AZ_TX_DESCQ_LABEL_WIDTH 5
+#define        FRF_AZ_TX_DESCQ_SIZE_LBN 3
+#define        FRF_AZ_TX_DESCQ_SIZE_WIDTH 2
+#define        FFE_AZ_TX_DESCQ_SIZE_4K 3
+#define        FFE_AZ_TX_DESCQ_SIZE_2K 2
+#define        FFE_AZ_TX_DESCQ_SIZE_1K 1
+#define        FFE_AZ_TX_DESCQ_SIZE_512 0
+#define        FRF_AZ_TX_DESCQ_TYPE_LBN 1
+#define        FRF_AZ_TX_DESCQ_TYPE_WIDTH 2
+#define        FRF_AZ_TX_DESCQ_FLUSH_LBN 0
+#define        FRF_AZ_TX_DESCQ_FLUSH_WIDTH 1
+
+/* EVQ_PTR_TBL_KER: Event queue pointer table */
+#define        FR_AA_EVQ_PTR_TBL_KER 0x00011a00
+#define        FR_AA_EVQ_PTR_TBL_KER_STEP 16
+#define        FR_AA_EVQ_PTR_TBL_KER_ROWS 4
+/* EVQ_PTR_TBL: Event queue pointer table */
+#define        FR_BZ_EVQ_PTR_TBL 0x00f60000
+#define        FR_BZ_EVQ_PTR_TBL_STEP 16
+#define        FR_CZ_EVQ_PTR_TBL_ROWS 1024
+#define        FR_BB_EVQ_PTR_TBL_ROWS 4096
+#define        FRF_BZ_EVQ_RPTR_IGN_LBN 40
+#define        FRF_BZ_EVQ_RPTR_IGN_WIDTH 1
+#define        FRF_AB_EVQ_WKUP_OR_INT_EN_LBN 39
+#define        FRF_AB_EVQ_WKUP_OR_INT_EN_WIDTH 1
+#define        FRF_CZ_EVQ_DOS_PROTECT_EN_LBN 39
+#define        FRF_CZ_EVQ_DOS_PROTECT_EN_WIDTH 1
+#define        FRF_AZ_EVQ_NXT_WPTR_LBN 24
+#define        FRF_AZ_EVQ_NXT_WPTR_WIDTH 15
+#define        FRF_AZ_EVQ_EN_LBN 23
+#define        FRF_AZ_EVQ_EN_WIDTH 1
+#define        FRF_AZ_EVQ_SIZE_LBN 20
+#define        FRF_AZ_EVQ_SIZE_WIDTH 3
+#define        FFE_AZ_EVQ_SIZE_32K 6
+#define        FFE_AZ_EVQ_SIZE_16K 5
+#define        FFE_AZ_EVQ_SIZE_8K 4
+#define        FFE_AZ_EVQ_SIZE_4K 3
+#define        FFE_AZ_EVQ_SIZE_2K 2
+#define        FFE_AZ_EVQ_SIZE_1K 1
+#define        FFE_AZ_EVQ_SIZE_512 0
+#define        FRF_AZ_EVQ_BUF_BASE_ID_LBN 0
+#define        FRF_AZ_EVQ_BUF_BASE_ID_WIDTH 20
+
+/* BUF_HALF_TBL_KER: Buffer table in half buffer table mode direct access by driver */
+#define        FR_AA_BUF_HALF_TBL_KER 0x00018000
+#define        FR_AA_BUF_HALF_TBL_KER_STEP 8
+#define        FR_AA_BUF_HALF_TBL_KER_ROWS 4096
+/* BUF_HALF_TBL: Buffer table in half buffer table mode direct access by driver */
+#define        FR_BZ_BUF_HALF_TBL 0x00800000
+#define        FR_BZ_BUF_HALF_TBL_STEP 8
+#define        FR_CZ_BUF_HALF_TBL_ROWS 147456
+#define        FR_BB_BUF_HALF_TBL_ROWS 524288
+#define        FRF_AZ_BUF_ADR_HBUF_ODD_LBN 44
+#define        FRF_AZ_BUF_ADR_HBUF_ODD_WIDTH 20
+#define        FRF_AZ_BUF_OWNER_ID_HBUF_ODD_LBN 32
+#define        FRF_AZ_BUF_OWNER_ID_HBUF_ODD_WIDTH 12
+#define        FRF_AZ_BUF_ADR_HBUF_EVEN_LBN 12
+#define        FRF_AZ_BUF_ADR_HBUF_EVEN_WIDTH 20
+#define        FRF_AZ_BUF_OWNER_ID_HBUF_EVEN_LBN 0
+#define        FRF_AZ_BUF_OWNER_ID_HBUF_EVEN_WIDTH 12
+
+/* BUF_FULL_TBL_KER: Buffer table in full buffer table mode direct access by driver */
+#define        FR_AA_BUF_FULL_TBL_KER 0x00018000
+#define        FR_AA_BUF_FULL_TBL_KER_STEP 8
+#define        FR_AA_BUF_FULL_TBL_KER_ROWS 4096
+/* BUF_FULL_TBL: Buffer table in full buffer table mode direct access by driver */
+#define        FR_BZ_BUF_FULL_TBL 0x00800000
+#define        FR_BZ_BUF_FULL_TBL_STEP 8
+#define        FR_CZ_BUF_FULL_TBL_ROWS 147456
+#define        FR_BB_BUF_FULL_TBL_ROWS 917504
+#define        FRF_AZ_BUF_FULL_UNUSED_LBN 51
+#define        FRF_AZ_BUF_FULL_UNUSED_WIDTH 13
+#define        FRF_AZ_IP_DAT_BUF_SIZE_LBN 50
+#define        FRF_AZ_IP_DAT_BUF_SIZE_WIDTH 1
+#define        FRF_AZ_BUF_ADR_REGION_LBN 48
+#define        FRF_AZ_BUF_ADR_REGION_WIDTH 2
+#define        FFE_AZ_BUF_ADR_REGN3 3
+#define        FFE_AZ_BUF_ADR_REGN2 2
+#define        FFE_AZ_BUF_ADR_REGN1 1
+#define        FFE_AZ_BUF_ADR_REGN0 0
+#define        FRF_AZ_BUF_ADR_FBUF_LBN 14
+#define        FRF_AZ_BUF_ADR_FBUF_WIDTH 34
+#define        FRF_AZ_BUF_OWNER_ID_FBUF_LBN 0
+#define        FRF_AZ_BUF_OWNER_ID_FBUF_WIDTH 14
+
+/* RX_FILTER_TBL0: TCP/IPv4 Receive filter table */
+#define        FR_BZ_RX_FILTER_TBL0 0x00f00000
+#define        FR_BZ_RX_FILTER_TBL0_STEP 32
+#define        FR_BZ_RX_FILTER_TBL0_ROWS 8192
+/* RX_FILTER_TBL1: TCP/IPv4 Receive filter table */
+#define        FR_BB_RX_FILTER_TBL1 0x00f00010
+#define        FR_BB_RX_FILTER_TBL1_STEP 32
+#define        FR_BB_RX_FILTER_TBL1_ROWS 8192
+#define        FRF_BZ_RSS_EN_LBN 110
+#define        FRF_BZ_RSS_EN_WIDTH 1
+#define        FRF_BZ_SCATTER_EN_LBN 109
+#define        FRF_BZ_SCATTER_EN_WIDTH 1
+#define        FRF_BZ_TCP_UDP_LBN 108
+#define        FRF_BZ_TCP_UDP_WIDTH 1
+#define        FRF_BZ_RXQ_ID_LBN 96
+#define        FRF_BZ_RXQ_ID_WIDTH 12
+#define        FRF_BZ_DEST_IP_LBN 64
+#define        FRF_BZ_DEST_IP_WIDTH 32
+#define        FRF_BZ_DEST_PORT_TCP_LBN 48
+#define        FRF_BZ_DEST_PORT_TCP_WIDTH 16
+#define        FRF_BZ_SRC_IP_LBN 16
+#define        FRF_BZ_SRC_IP_WIDTH 32
+#define        FRF_BZ_SRC_TCP_DEST_UDP_LBN 0
+#define        FRF_BZ_SRC_TCP_DEST_UDP_WIDTH 16
+
+/* RX_MAC_FILTER_TBL0: Receive Ethernet filter table */
+#define        FR_CZ_RX_MAC_FILTER_TBL0 0x00f00010
+#define        FR_CZ_RX_MAC_FILTER_TBL0_STEP 32
+#define        FR_CZ_RX_MAC_FILTER_TBL0_ROWS 512
+#define        FRF_CZ_RMFT_RSS_EN_LBN 75
+#define        FRF_CZ_RMFT_RSS_EN_WIDTH 1
+#define        FRF_CZ_RMFT_SCATTER_EN_LBN 74
+#define        FRF_CZ_RMFT_SCATTER_EN_WIDTH 1
+#define        FRF_CZ_RMFT_IP_OVERRIDE_LBN 73
+#define        FRF_CZ_RMFT_IP_OVERRIDE_WIDTH 1
+#define        FRF_CZ_RMFT_RXQ_ID_LBN 61
+#define        FRF_CZ_RMFT_RXQ_ID_WIDTH 12
+#define        FRF_CZ_RMFT_WILDCARD_MATCH_LBN 60
+#define        FRF_CZ_RMFT_WILDCARD_MATCH_WIDTH 1
+#define        FRF_CZ_RMFT_DEST_MAC_LBN 12
+#define        FRF_CZ_RMFT_DEST_MAC_WIDTH 48
+#define        FRF_CZ_RMFT_VLAN_ID_LBN 0
+#define        FRF_CZ_RMFT_VLAN_ID_WIDTH 12
+
+/* TIMER_TBL: Timer table */
+#define        FR_BZ_TIMER_TBL 0x00f70000
+#define        FR_BZ_TIMER_TBL_STEP 16
+#define        FR_CZ_TIMER_TBL_ROWS 1024
+#define        FR_BB_TIMER_TBL_ROWS 4096
+#define        FRF_CZ_TIMER_Q_EN_LBN 33
+#define        FRF_CZ_TIMER_Q_EN_WIDTH 1
+#define        FRF_CZ_INT_ARMD_LBN 32
+#define        FRF_CZ_INT_ARMD_WIDTH 1
+#define        FRF_CZ_INT_PEND_LBN 31
+#define        FRF_CZ_INT_PEND_WIDTH 1
+#define        FRF_CZ_HOST_NOTIFY_MODE_LBN 30
+#define        FRF_CZ_HOST_NOTIFY_MODE_WIDTH 1
+#define        FRF_CZ_RELOAD_TIMER_VAL_LBN 16
+#define        FRF_CZ_RELOAD_TIMER_VAL_WIDTH 14
+#define        FRF_CZ_TIMER_MODE_LBN 14
+#define        FRF_CZ_TIMER_MODE_WIDTH 2
+#define        FFE_CZ_TIMER_MODE_INT_HLDOFF 3
+#define        FFE_CZ_TIMER_MODE_TRIG_START 2
+#define        FFE_CZ_TIMER_MODE_IMMED_START 1
+#define        FFE_CZ_TIMER_MODE_DIS 0
+#define        FRF_BB_TIMER_MODE_LBN 12
+#define        FRF_BB_TIMER_MODE_WIDTH 2
+#define        FFE_BB_TIMER_MODE_INT_HLDOFF 2
+#define        FFE_BB_TIMER_MODE_TRIG_START 2
+#define        FFE_BB_TIMER_MODE_IMMED_START 1
+#define        FFE_BB_TIMER_MODE_DIS 0
+#define        FRF_CZ_TIMER_VAL_LBN 0
+#define        FRF_CZ_TIMER_VAL_WIDTH 14
+#define        FRF_BB_TIMER_VAL_LBN 0
+#define        FRF_BB_TIMER_VAL_WIDTH 12
+
+/* TX_PACE_TBL: Transmit pacing table */
+#define        FR_BZ_TX_PACE_TBL 0x00f80000
+#define        FR_BZ_TX_PACE_TBL_STEP 16
+#define        FR_CZ_TX_PACE_TBL_ROWS 1024
+#define        FR_BB_TX_PACE_TBL_ROWS 4096
+#define        FRF_BZ_TX_PACE_LBN 0
+#define        FRF_BZ_TX_PACE_WIDTH 5
+
+/* RX_INDIRECTION_TBL: RX Indirection Table */
+#define        FR_BZ_RX_INDIRECTION_TBL 0x00fb0000
+#define        FR_BZ_RX_INDIRECTION_TBL_STEP 16
+#define        FR_BZ_RX_INDIRECTION_TBL_ROWS 128
+#define        FRF_BZ_IT_QUEUE_LBN 0
+#define        FRF_BZ_IT_QUEUE_WIDTH 6
+
+/* TX_FILTER_TBL0: TCP/IPv4 Transmit filter table */
+#define        FR_CZ_TX_FILTER_TBL0 0x00fc0000
+#define        FR_CZ_TX_FILTER_TBL0_STEP 16
+#define        FR_CZ_TX_FILTER_TBL0_ROWS 8192
+#define        FRF_CZ_TIFT_TCP_UDP_LBN 108
+#define        FRF_CZ_TIFT_TCP_UDP_WIDTH 1
+#define        FRF_CZ_TIFT_TXQ_ID_LBN 96
+#define        FRF_CZ_TIFT_TXQ_ID_WIDTH 12
+#define        FRF_CZ_TIFT_DEST_IP_LBN 64
+#define        FRF_CZ_TIFT_DEST_IP_WIDTH 32
+#define        FRF_CZ_TIFT_DEST_PORT_TCP_LBN 48
+#define        FRF_CZ_TIFT_DEST_PORT_TCP_WIDTH 16
+#define        FRF_CZ_TIFT_SRC_IP_LBN 16
+#define        FRF_CZ_TIFT_SRC_IP_WIDTH 32
+#define        FRF_CZ_TIFT_SRC_TCP_DEST_UDP_LBN 0
+#define        FRF_CZ_TIFT_SRC_TCP_DEST_UDP_WIDTH 16
+
+/* TX_MAC_FILTER_TBL0: Transmit Ethernet filter table */
+#define        FR_CZ_TX_MAC_FILTER_TBL0 0x00fe0000
+#define        FR_CZ_TX_MAC_FILTER_TBL0_STEP 16
+#define        FR_CZ_TX_MAC_FILTER_TBL0_ROWS 512
+#define        FRF_CZ_TMFT_TXQ_ID_LBN 61
+#define        FRF_CZ_TMFT_TXQ_ID_WIDTH 12
+#define        FRF_CZ_TMFT_WILDCARD_MATCH_LBN 60
+#define        FRF_CZ_TMFT_WILDCARD_MATCH_WIDTH 1
+#define        FRF_CZ_TMFT_SRC_MAC_LBN 12
+#define        FRF_CZ_TMFT_SRC_MAC_WIDTH 48
+#define        FRF_CZ_TMFT_VLAN_ID_LBN 0
+#define        FRF_CZ_TMFT_VLAN_ID_WIDTH 12
+
+/* MC_TREG_SMEM: MC Shared Memory */
+#define        FR_CZ_MC_TREG_SMEM 0x00ff0000
+#define        FR_CZ_MC_TREG_SMEM_STEP 4
+#define        FR_CZ_MC_TREG_SMEM_ROWS 512
+#define        FRF_CZ_MC_TREG_SMEM_ROW_LBN 0
+#define        FRF_CZ_MC_TREG_SMEM_ROW_WIDTH 32
+
+/* MSIX_VECTOR_TABLE: MSIX Vector Table */
+#define        FR_BB_MSIX_VECTOR_TABLE 0x00ff0000
+#define        FR_BZ_MSIX_VECTOR_TABLE_STEP 16
+#define        FR_BB_MSIX_VECTOR_TABLE_ROWS 64
+/* MSIX_VECTOR_TABLE: MSIX Vector Table */
+#define        FR_CZ_MSIX_VECTOR_TABLE 0x00000000
+/* FR_BZ_MSIX_VECTOR_TABLE_STEP 16 */
+#define        FR_CZ_MSIX_VECTOR_TABLE_ROWS 1024
+#define        FRF_BZ_MSIX_VECTOR_RESERVED_LBN 97
+#define        FRF_BZ_MSIX_VECTOR_RESERVED_WIDTH 31
+#define        FRF_BZ_MSIX_VECTOR_MASK_LBN 96
+#define        FRF_BZ_MSIX_VECTOR_MASK_WIDTH 1
+#define        FRF_BZ_MSIX_MESSAGE_DATA_LBN 64
+#define        FRF_BZ_MSIX_MESSAGE_DATA_WIDTH 32
+#define        FRF_BZ_MSIX_MESSAGE_ADDRESS_HI_LBN 32
+#define        FRF_BZ_MSIX_MESSAGE_ADDRESS_HI_WIDTH 32
+#define        FRF_BZ_MSIX_MESSAGE_ADDRESS_LO_LBN 0
+#define        FRF_BZ_MSIX_MESSAGE_ADDRESS_LO_WIDTH 32
+
+/* MSIX_PBA_TABLE: MSIX Pending Bit Array */
+#define        FR_BB_MSIX_PBA_TABLE 0x00ff2000
+#define        FR_BZ_MSIX_PBA_TABLE_STEP 4
+#define        FR_BB_MSIX_PBA_TABLE_ROWS 2
+/* MSIX_PBA_TABLE: MSIX Pending Bit Array */
+#define        FR_CZ_MSIX_PBA_TABLE 0x00008000
+/* FR_BZ_MSIX_PBA_TABLE_STEP 4 */
+#define        FR_CZ_MSIX_PBA_TABLE_ROWS 32
+#define        FRF_BZ_MSIX_PBA_PEND_DWORD_LBN 0
+#define        FRF_BZ_MSIX_PBA_PEND_DWORD_WIDTH 32
+
+/* SRM_DBG_REG: SRAM debug access */
+#define        FR_BZ_SRM_DBG 0x03000000
+#define        FR_BZ_SRM_DBG_STEP 8
+#define        FR_CZ_SRM_DBG_ROWS 262144
+#define        FR_BB_SRM_DBG_ROWS 2097152
+#define        FRF_BZ_SRM_DBG_LBN 0
+#define        FRF_BZ_SRM_DBG_WIDTH 64
+
+/* TB_MSIX_PBA_TABLE: MSIX Pending Bit Array */
+#define        FR_CZ_TB_MSIX_PBA_TABLE 0x00008000
+#define        FR_CZ_TB_MSIX_PBA_TABLE_STEP 4
+#define        FR_CZ_TB_MSIX_PBA_TABLE_ROWS 1024
+#define        FRF_CZ_TB_MSIX_PBA_PEND_DWORD_LBN 0
+#define        FRF_CZ_TB_MSIX_PBA_PEND_DWORD_WIDTH 32
+
+/* DRIVER_EV */
+#define        FSF_AZ_DRIVER_EV_SUBCODE_LBN 56
+#define        FSF_AZ_DRIVER_EV_SUBCODE_WIDTH 4
+#define        FSE_BZ_TX_DSC_ERROR_EV 15
+#define        FSE_BZ_RX_DSC_ERROR_EV 14
+#define        FSE_AA_RX_RECOVER_EV 11
+#define        FSE_AZ_TIMER_EV 10
+#define        FSE_AZ_TX_PKT_NON_TCP_UDP 9
+#define        FSE_AZ_WAKE_UP_EV 6
+#define        FSE_AZ_SRM_UPD_DONE_EV 5
+#define        FSE_AB_EVQ_NOT_EN_EV 3
+#define        FSE_AZ_EVQ_INIT_DONE_EV 2
+#define        FSE_AZ_RX_DESCQ_FLS_DONE_EV 1
+#define        FSE_AZ_TX_DESCQ_FLS_DONE_EV 0
+#define        FSF_AZ_DRIVER_EV_SUBDATA_LBN 0
+#define        FSF_AZ_DRIVER_EV_SUBDATA_WIDTH 14
+
+/* EVENT_ENTRY */
+#define        FSF_AZ_EV_CODE_LBN 60
+#define        FSF_AZ_EV_CODE_WIDTH 4
+#define        FSE_CZ_EV_CODE_MCDI_EV 12
+#define        FSE_CZ_EV_CODE_USER_EV 8
+#define        FSE_AZ_EV_CODE_DRV_GEN_EV 7
+#define        FSE_AZ_EV_CODE_GLOBAL_EV 6
+#define        FSE_AZ_EV_CODE_DRIVER_EV 5
+#define        FSE_AZ_EV_CODE_TX_EV 2
+#define        FSE_AZ_EV_CODE_RX_EV 0
+#define        FSF_AZ_EV_DATA_LBN 0
+#define        FSF_AZ_EV_DATA_WIDTH 60
+
+/* GLOBAL_EV */
+#define        FSF_BB_GLB_EV_RX_RECOVERY_LBN 12
+#define        FSF_BB_GLB_EV_RX_RECOVERY_WIDTH 1
+#define        FSF_AA_GLB_EV_RX_RECOVERY_LBN 11
+#define        FSF_AA_GLB_EV_RX_RECOVERY_WIDTH 1
+#define        FSF_BB_GLB_EV_XG_MGT_INTR_LBN 11
+#define        FSF_BB_GLB_EV_XG_MGT_INTR_WIDTH 1
+#define        FSF_AB_GLB_EV_XFP_PHY0_INTR_LBN 10
+#define        FSF_AB_GLB_EV_XFP_PHY0_INTR_WIDTH 1
+#define        FSF_AB_GLB_EV_XG_PHY0_INTR_LBN 9
+#define        FSF_AB_GLB_EV_XG_PHY0_INTR_WIDTH 1
+#define        FSF_AB_GLB_EV_G_PHY0_INTR_LBN 7
+#define        FSF_AB_GLB_EV_G_PHY0_INTR_WIDTH 1
+
+/* LEGACY_INT_VEC */
+#define        FSF_AZ_NET_IVEC_FATAL_INT_LBN 64
+#define        FSF_AZ_NET_IVEC_FATAL_INT_WIDTH 1
+#define        FSF_AZ_NET_IVEC_INT_Q_LBN 40
+#define        FSF_AZ_NET_IVEC_INT_Q_WIDTH 4
+#define        FSF_AZ_NET_IVEC_INT_FLAG_LBN 32
+#define        FSF_AZ_NET_IVEC_INT_FLAG_WIDTH 1
+#define        FSF_AZ_NET_IVEC_EVQ_FIFO_HF_LBN 1
+#define        FSF_AZ_NET_IVEC_EVQ_FIFO_HF_WIDTH 1
+#define        FSF_AZ_NET_IVEC_EVQ_FIFO_AF_LBN 0
+#define        FSF_AZ_NET_IVEC_EVQ_FIFO_AF_WIDTH 1
+
+/* MC_XGMAC_FLTR_RULE_DEF */
+#define        FSF_CZ_MC_XFRC_MODE_LBN 416
+#define        FSF_CZ_MC_XFRC_MODE_WIDTH 1
+#define        FSE_CZ_MC_XFRC_MODE_LAYERED 1
+#define        FSE_CZ_MC_XFRC_MODE_SIMPLE 0
+#define        FSF_CZ_MC_XFRC_HASH_LBN 384
+#define        FSF_CZ_MC_XFRC_HASH_WIDTH 32
+#define        FSF_CZ_MC_XFRC_LAYER4_BYTE_MASK_LBN 256
+#define        FSF_CZ_MC_XFRC_LAYER4_BYTE_MASK_WIDTH 128
+#define        FSF_CZ_MC_XFRC_LAYER3_BYTE_MASK_LBN 128
+#define        FSF_CZ_MC_XFRC_LAYER3_BYTE_MASK_WIDTH 128
+#define        FSF_CZ_MC_XFRC_LAYER2_OR_SIMPLE_BYTE_MASK_LBN 0
+#define        FSF_CZ_MC_XFRC_LAYER2_OR_SIMPLE_BYTE_MASK_WIDTH 128
+
+/* RX_EV */
+#define        FSF_CZ_RX_EV_PKT_NOT_PARSED_LBN 58
+#define        FSF_CZ_RX_EV_PKT_NOT_PARSED_WIDTH 1
+#define        FSF_CZ_RX_EV_IPV6_PKT_LBN 57
+#define        FSF_CZ_RX_EV_IPV6_PKT_WIDTH 1
+#define        FSF_AZ_RX_EV_PKT_OK_LBN 56
+#define        FSF_AZ_RX_EV_PKT_OK_WIDTH 1
+#define        FSF_AZ_RX_EV_PAUSE_FRM_ERR_LBN 55
+#define        FSF_AZ_RX_EV_PAUSE_FRM_ERR_WIDTH 1
+#define        FSF_AZ_RX_EV_BUF_OWNER_ID_ERR_LBN 54
+#define        FSF_AZ_RX_EV_BUF_OWNER_ID_ERR_WIDTH 1
+#define        FSF_AZ_RX_EV_IP_FRAG_ERR_LBN 53
+#define        FSF_AZ_RX_EV_IP_FRAG_ERR_WIDTH 1
+#define        FSF_AZ_RX_EV_IP_HDR_CHKSUM_ERR_LBN 52
+#define        FSF_AZ_RX_EV_IP_HDR_CHKSUM_ERR_WIDTH 1
+#define        FSF_AZ_RX_EV_TCP_UDP_CHKSUM_ERR_LBN 51
+#define        FSF_AZ_RX_EV_TCP_UDP_CHKSUM_ERR_WIDTH 1
+#define        FSF_AZ_RX_EV_ETH_CRC_ERR_LBN 50
+#define        FSF_AZ_RX_EV_ETH_CRC_ERR_WIDTH 1
+#define        FSF_AZ_RX_EV_FRM_TRUNC_LBN 49
+#define        FSF_AZ_RX_EV_FRM_TRUNC_WIDTH 1
+#define        FSF_AA_RX_EV_DRIB_NIB_LBN 49
+#define        FSF_AA_RX_EV_DRIB_NIB_WIDTH 1
+#define        FSF_AZ_RX_EV_TOBE_DISC_LBN 47
+#define        FSF_AZ_RX_EV_TOBE_DISC_WIDTH 1
+#define        FSF_AZ_RX_EV_PKT_TYPE_LBN 44
+#define        FSF_AZ_RX_EV_PKT_TYPE_WIDTH 3
+#define        FSE_AZ_RX_EV_PKT_TYPE_VLAN_JUMBO 5
+#define        FSE_AZ_RX_EV_PKT_TYPE_VLAN_LLC 4
+#define        FSE_AZ_RX_EV_PKT_TYPE_VLAN 3
+#define        FSE_AZ_RX_EV_PKT_TYPE_JUMBO 2
+#define        FSE_AZ_RX_EV_PKT_TYPE_LLC 1
+#define        FSE_AZ_RX_EV_PKT_TYPE_ETH 0
+#define        FSF_AZ_RX_EV_HDR_TYPE_LBN 42
+#define        FSF_AZ_RX_EV_HDR_TYPE_WIDTH 2
+#define        FSE_AZ_RX_EV_HDR_TYPE_OTHER 3
+#define        FSE_AB_RX_EV_HDR_TYPE_IPV4_OTHER 2
+#define        FSE_CZ_RX_EV_HDR_TYPE_IPV4V6_OTHER 2
+#define        FSE_AB_RX_EV_HDR_TYPE_IPV4_UDP 1
+#define        FSE_CZ_RX_EV_HDR_TYPE_IPV4V6_UDP 1
+#define        FSE_AB_RX_EV_HDR_TYPE_IPV4_TCP 0
+#define        FSE_CZ_RX_EV_HDR_TYPE_IPV4V6_TCP 0
+#define        FSF_AZ_RX_EV_DESC_Q_EMPTY_LBN 41
+#define        FSF_AZ_RX_EV_DESC_Q_EMPTY_WIDTH 1
+#define        FSF_AZ_RX_EV_MCAST_HASH_MATCH_LBN 40
+#define        FSF_AZ_RX_EV_MCAST_HASH_MATCH_WIDTH 1
+#define        FSF_AZ_RX_EV_MCAST_PKT_LBN 39
+#define        FSF_AZ_RX_EV_MCAST_PKT_WIDTH 1
+#define        FSF_AA_RX_EV_RECOVERY_FLAG_LBN 37
+#define        FSF_AA_RX_EV_RECOVERY_FLAG_WIDTH 1
+#define        FSF_AZ_RX_EV_Q_LABEL_LBN 32
+#define        FSF_AZ_RX_EV_Q_LABEL_WIDTH 5
+#define        FSF_AZ_RX_EV_JUMBO_CONT_LBN 31
+#define        FSF_AZ_RX_EV_JUMBO_CONT_WIDTH 1
+#define        FSF_AZ_RX_EV_PORT_LBN 30
+#define        FSF_AZ_RX_EV_PORT_WIDTH 1
+#define        FSF_AZ_RX_EV_BYTE_CNT_LBN 16
+#define        FSF_AZ_RX_EV_BYTE_CNT_WIDTH 14
+#define        FSF_AZ_RX_EV_SOP_LBN 15
+#define        FSF_AZ_RX_EV_SOP_WIDTH 1
+#define        FSF_AZ_RX_EV_ISCSI_PKT_OK_LBN 14
+#define        FSF_AZ_RX_EV_ISCSI_PKT_OK_WIDTH 1
+#define        FSF_AZ_RX_EV_ISCSI_DDIG_ERR_LBN 13
+#define        FSF_AZ_RX_EV_ISCSI_DDIG_ERR_WIDTH 1
+#define        FSF_AZ_RX_EV_ISCSI_HDIG_ERR_LBN 12
+#define        FSF_AZ_RX_EV_ISCSI_HDIG_ERR_WIDTH 1
+#define        FSF_AZ_RX_EV_DESC_PTR_LBN 0
+#define        FSF_AZ_RX_EV_DESC_PTR_WIDTH 12
+
+/* RX_KER_DESC */
+#define        FSF_AZ_RX_KER_BUF_SIZE_LBN 48
+#define        FSF_AZ_RX_KER_BUF_SIZE_WIDTH 14
+#define        FSF_AZ_RX_KER_BUF_REGION_LBN 46
+#define        FSF_AZ_RX_KER_BUF_REGION_WIDTH 2
+#define        FSF_AZ_RX_KER_BUF_ADDR_LBN 0
+#define        FSF_AZ_RX_KER_BUF_ADDR_WIDTH 46
+
+/* RX_USER_DESC */
+#define        FSF_AZ_RX_USER_2BYTE_OFFSET_LBN 20
+#define        FSF_AZ_RX_USER_2BYTE_OFFSET_WIDTH 12
+#define        FSF_AZ_RX_USER_BUF_ID_LBN 0
+#define        FSF_AZ_RX_USER_BUF_ID_WIDTH 20
+
+/* TX_EV */
+#define        FSF_AZ_TX_EV_PKT_ERR_LBN 38
+#define        FSF_AZ_TX_EV_PKT_ERR_WIDTH 1
+#define        FSF_AZ_TX_EV_PKT_TOO_BIG_LBN 37
+#define        FSF_AZ_TX_EV_PKT_TOO_BIG_WIDTH 1
+#define        FSF_AZ_TX_EV_Q_LABEL_LBN 32
+#define        FSF_AZ_TX_EV_Q_LABEL_WIDTH 5
+#define        FSF_AZ_TX_EV_PORT_LBN 16
+#define        FSF_AZ_TX_EV_PORT_WIDTH 1
+#define        FSF_AZ_TX_EV_WQ_FF_FULL_LBN 15
+#define        FSF_AZ_TX_EV_WQ_FF_FULL_WIDTH 1
+#define        FSF_AZ_TX_EV_BUF_OWNER_ID_ERR_LBN 14
+#define        FSF_AZ_TX_EV_BUF_OWNER_ID_ERR_WIDTH 1
+#define        FSF_AZ_TX_EV_COMP_LBN 12
+#define        FSF_AZ_TX_EV_COMP_WIDTH 1
+#define        FSF_AZ_TX_EV_DESC_PTR_LBN 0
+#define        FSF_AZ_TX_EV_DESC_PTR_WIDTH 12
+
+/* TX_KER_DESC */
+#define        FSF_AZ_TX_KER_CONT_LBN 62
+#define        FSF_AZ_TX_KER_CONT_WIDTH 1
+#define        FSF_AZ_TX_KER_BYTE_COUNT_LBN 48
+#define        FSF_AZ_TX_KER_BYTE_COUNT_WIDTH 14
+#define        FSF_AZ_TX_KER_BUF_REGION_LBN 46
+#define        FSF_AZ_TX_KER_BUF_REGION_WIDTH 2
+#define        FSF_AZ_TX_KER_BUF_ADDR_LBN 0
+#define        FSF_AZ_TX_KER_BUF_ADDR_WIDTH 46
+
+/* TX_USER_DESC */
+#define        FSF_AZ_TX_USER_SW_EV_EN_LBN 48
+#define        FSF_AZ_TX_USER_SW_EV_EN_WIDTH 1
+#define        FSF_AZ_TX_USER_CONT_LBN 46
+#define        FSF_AZ_TX_USER_CONT_WIDTH 1
+#define        FSF_AZ_TX_USER_BYTE_CNT_LBN 33
+#define        FSF_AZ_TX_USER_BYTE_CNT_WIDTH 13
+#define        FSF_AZ_TX_USER_BUF_ID_LBN 13
+#define        FSF_AZ_TX_USER_BUF_ID_WIDTH 20
+#define        FSF_AZ_TX_USER_BYTE_OFS_LBN 0
+#define        FSF_AZ_TX_USER_BYTE_OFS_WIDTH 13
+
+/* USER_EV */
+#define        FSF_CZ_USER_QID_LBN 32
+#define        FSF_CZ_USER_QID_WIDTH 10
+#define        FSF_CZ_USER_EV_REG_VALUE_LBN 0
+#define        FSF_CZ_USER_EV_REG_VALUE_WIDTH 32
+
+/**************************************************************************
+ *
+ * Falcon B0 PCIe core indirect registers
+ *
+ **************************************************************************
+ */
+
+#define FPCR_BB_PCIE_DEVICE_CTRL_STAT 0x68
+
+#define FPCR_BB_PCIE_LINK_CTRL_STAT 0x70
+
+#define FPCR_BB_ACK_RPL_TIMER 0x700
+#define FPCRF_BB_ACK_TL_LBN 0
+#define FPCRF_BB_ACK_TL_WIDTH 16
+#define FPCRF_BB_RPL_TL_LBN 16
+#define FPCRF_BB_RPL_TL_WIDTH 16
+
+#define FPCR_BB_ACK_FREQ 0x70C
+#define FPCRF_BB_ACK_FREQ_LBN 0
+#define FPCRF_BB_ACK_FREQ_WIDTH 7
+
+/**************************************************************************
+ *
+ * Pseudo-registers and fields
+ *
+ **************************************************************************
+ */
+
+/* Interrupt acknowledge work-around register (A0/A1 only) */
+#define FR_AA_WORK_AROUND_BROKEN_PCI_READS 0x0070
+
+/* EE_SPI_HCMD_REG: SPI host command register */
+/* Values for the EE_SPI_HCMD_SF_SEL register field */
+#define FFE_AB_SPI_DEVICE_EEPROM 0
+#define FFE_AB_SPI_DEVICE_FLASH 1
+
+/* NIC_STAT_REG: NIC status register */
+#define FRF_AB_STRAP_10G_LBN 2
+#define FRF_AB_STRAP_10G_WIDTH 1
+#define FRF_AA_STRAP_PCIE_LBN 0
+#define FRF_AA_STRAP_PCIE_WIDTH 1
+
+/* FATAL_INTR_REG_KER: Fatal interrupt register for Kernel */
+#define FRF_AZ_FATAL_INTR_LBN 0
+#define FRF_AZ_FATAL_INTR_WIDTH 12
+
+/* SRM_CFG_REG: SRAM configuration register */
+/* We treat the number of SRAM banks and bank size as a single field */
+#define        FRF_AZ_SRM_NB_SZ_LBN FRF_AZ_SRM_BANK_SIZE_LBN
+#define        FRF_AZ_SRM_NB_SZ_WIDTH \
+       (FRF_AZ_SRM_BANK_SIZE_WIDTH + FRF_AZ_SRM_NUM_BANK_WIDTH)
+#define FFE_AB_SRM_NB1_SZ2M 0
+#define FFE_AB_SRM_NB1_SZ4M 1
+#define FFE_AB_SRM_NB1_SZ8M 2
+#define FFE_AB_SRM_NB_SZ_DEF 3
+#define FFE_AB_SRM_NB2_SZ4M 4
+#define FFE_AB_SRM_NB2_SZ8M 5
+#define FFE_AB_SRM_NB2_SZ16M 6
+#define FFE_AB_SRM_NB_SZ_RES 7
+
+/* RX_DESC_UPD_REGP0: Receive descriptor update register. */
+/* We write just the last dword of these registers */
+#define        FR_AZ_RX_DESC_UPD_DWORD_P0 \
+       (BUILD_BUG_ON_ZERO(FR_AA_RX_DESC_UPD_KER != FR_BZ_RX_DESC_UPD_P0) + \
+        FR_BZ_RX_DESC_UPD_P0 + 3 * 4)
+#define        FRF_AZ_RX_DESC_WPTR_DWORD_LBN (FRF_AZ_RX_DESC_WPTR_LBN - 3 * 32)
+#define        FRF_AZ_RX_DESC_WPTR_DWORD_WIDTH FRF_AZ_RX_DESC_WPTR_WIDTH
+
+/* TX_DESC_UPD_REGP0: Transmit descriptor update register. */
+#define FR_AZ_TX_DESC_UPD_DWORD_P0 \
+       (BUILD_BUG_ON_ZERO(FR_AA_TX_DESC_UPD_KER != FR_BZ_TX_DESC_UPD_P0) + \
+        FR_BZ_TX_DESC_UPD_P0 + 3 * 4)
+#define        FRF_AZ_TX_DESC_WPTR_DWORD_LBN (FRF_AZ_TX_DESC_WPTR_LBN - 3 * 32)
+#define        FRF_AZ_TX_DESC_WPTR_DWORD_WIDTH FRF_AZ_TX_DESC_WPTR_WIDTH
+
+/* GMF_CFG4_REG: GMAC FIFO configuration register 4 */
+#define FRF_AB_GMF_HSTFLTRFRM_PAUSE_LBN 12
+#define FRF_AB_GMF_HSTFLTRFRM_PAUSE_WIDTH 1
+
+/* GMF_CFG5_REG: GMAC FIFO configuration register 5 */
+#define FRF_AB_GMF_HSTFLTRFRMDC_PAUSE_LBN 12
+#define FRF_AB_GMF_HSTFLTRFRMDC_PAUSE_WIDTH 1
+
+/* XM_TX_PARAM_REG: XGMAC transmit parameter register */
+#define        FRF_AB_XM_MAX_TX_FRM_SIZE_LBN FRF_AB_XM_MAX_TX_FRM_SIZE_LO_LBN
+#define        FRF_AB_XM_MAX_TX_FRM_SIZE_WIDTH (FRF_AB_XM_MAX_TX_FRM_SIZE_HI_WIDTH + \
+                                        FRF_AB_XM_MAX_TX_FRM_SIZE_LO_WIDTH)
+
+/* XM_RX_PARAM_REG: XGMAC receive parameter register */
+#define        FRF_AB_XM_MAX_RX_FRM_SIZE_LBN FRF_AB_XM_MAX_RX_FRM_SIZE_LO_LBN
+#define        FRF_AB_XM_MAX_RX_FRM_SIZE_WIDTH (FRF_AB_XM_MAX_RX_FRM_SIZE_HI_WIDTH + \
+                                        FRF_AB_XM_MAX_RX_FRM_SIZE_LO_WIDTH)
+
+/* XX_TXDRV_CTL_REG: XAUI SerDes transmit drive control register */
+/* Default values */
+#define FFE_AB_XX_TXDRV_DEQ_DEF 0xe /* deq=.6 */
+#define FFE_AB_XX_TXDRV_DTX_DEF 0x5 /* 1.25 */
+#define FFE_AB_XX_SD_CTL_DRV_DEF 0  /* 20mA */
+
+/* XX_CORE_STAT_REG: XAUI XGXS core status register */
+/* XGXS all-lanes status fields */
+#define        FRF_AB_XX_SYNC_STAT_LBN FRF_AB_XX_SYNC_STAT0_LBN
+#define        FRF_AB_XX_SYNC_STAT_WIDTH 4
+#define        FRF_AB_XX_COMMA_DET_LBN FRF_AB_XX_COMMA_DET_CH0_LBN
+#define        FRF_AB_XX_COMMA_DET_WIDTH 4
+#define        FRF_AB_XX_CHAR_ERR_LBN FRF_AB_XX_CHAR_ERR_CH0_LBN
+#define        FRF_AB_XX_CHAR_ERR_WIDTH 4
+#define        FRF_AB_XX_DISPERR_LBN FRF_AB_XX_DISPERR_CH0_LBN
+#define        FRF_AB_XX_DISPERR_WIDTH 4
+#define        FFE_AB_XX_STAT_ALL_LANES 0xf
+#define        FRF_AB_XX_FORCE_SIG_LBN FRF_AB_XX_FORCE_SIG0_VAL_LBN
+#define        FRF_AB_XX_FORCE_SIG_WIDTH 8
+#define        FFE_AB_XX_FORCE_SIG_ALL_LANES 0xff
+
+/* RX_MAC_FILTER_TBL0 */
+/* RMFT_DEST_MAC is wider than 32 bits */
+#define FRF_CZ_RMFT_DEST_MAC_LO_LBN FRF_CZ_RMFT_DEST_MAC_LBN
+#define FRF_CZ_RMFT_DEST_MAC_LO_WIDTH 32
+#define FRF_CZ_RMFT_DEST_MAC_HI_LBN (FRF_CZ_RMFT_DEST_MAC_LBN + 32)
+#define FRF_CZ_RMFT_DEST_MAC_HI_WIDTH (FRF_CZ_RMFT_DEST_MAC_WIDTH - 32)
+
+/* TX_MAC_FILTER_TBL0 */
+/* TMFT_SRC_MAC is wider than 32 bits */
+#define FRF_CZ_TMFT_SRC_MAC_LO_LBN FRF_CZ_TMFT_SRC_MAC_LBN
+#define FRF_CZ_TMFT_SRC_MAC_LO_WIDTH 32
+#define FRF_CZ_TMFT_SRC_MAC_HI_LBN (FRF_CZ_TMFT_SRC_MAC_LBN + 32)
+#define FRF_CZ_TMFT_SRC_MAC_HI_WIDTH (FRF_CZ_TMFT_SRC_MAC_WIDTH - 32)
+
+/* TX_PACE_TBL */
+/* Values >20 are documented as reserved, but will result in a queue going
+ * into the fast bin with a pace value of zero. */
+#define FFE_BZ_TX_PACE_OFF 0
+#define FFE_BZ_TX_PACE_RESERVED 21
+
+/* DRIVER_EV */
+/* Sub-fields of an RX flush completion event */
+#define FSF_AZ_DRIVER_EV_RX_FLUSH_FAIL_LBN 12
+#define FSF_AZ_DRIVER_EV_RX_FLUSH_FAIL_WIDTH 1
+#define FSF_AZ_DRIVER_EV_RX_DESCQ_ID_LBN 0
+#define FSF_AZ_DRIVER_EV_RX_DESCQ_ID_WIDTH 12
+
+/* EVENT_ENTRY */
+/* Magic number field for event test */
+#define FSF_AZ_DRV_GEN_EV_MAGIC_LBN 0
+#define FSF_AZ_DRV_GEN_EV_MAGIC_WIDTH 32
+
+/* RX packet prefix */
+#define FS_BZ_RX_PREFIX_HASH_OFST 12
+#define FS_BZ_RX_PREFIX_SIZE 16
+
+#endif /* EF4_FARCH_REGS_H */
diff --git a/drivers/net/ethernet/sfc/falcon/filter.h b/drivers/net/ethernet/sfc/falcon/filter.h
new file mode 100644 (file)
index 0000000..647f6b2
--- /dev/null
@@ -0,0 +1,272 @@
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2005-2013 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#ifndef EF4_FILTER_H
+#define EF4_FILTER_H
+
+#include <linux/types.h>
+#include <linux/if_ether.h>
+#include <asm/byteorder.h>
+
+/**
+ * enum ef4_filter_match_flags - Flags for hardware filter match type
+ * @EF4_FILTER_MATCH_REM_HOST: Match by remote IP host address
+ * @EF4_FILTER_MATCH_LOC_HOST: Match by local IP host address
+ * @EF4_FILTER_MATCH_REM_MAC: Match by remote MAC address
+ * @EF4_FILTER_MATCH_REM_PORT: Match by remote TCP/UDP port
+ * @EF4_FILTER_MATCH_LOC_MAC: Match by local MAC address
+ * @EF4_FILTER_MATCH_LOC_PORT: Match by local TCP/UDP port
+ * @EF4_FILTER_MATCH_ETHER_TYPE: Match by Ether-type
+ * @EF4_FILTER_MATCH_INNER_VID: Match by inner VLAN ID
+ * @EF4_FILTER_MATCH_OUTER_VID: Match by outer VLAN ID
+ * @EF4_FILTER_MATCH_IP_PROTO: Match by IP transport protocol
+ * @EF4_FILTER_MATCH_LOC_MAC_IG: Match by local MAC address I/G bit.
+ *     Used for RX default unicast and multicast/broadcast filters.
+ *
+ * Only some combinations are supported, depending on NIC type:
+ *
+ * - Falcon supports RX filters matching by {TCP,UDP}/IPv4 4-tuple or
+ *   local 2-tuple (only implemented for Falcon B0)
+ *
+ * - Siena supports RX and TX filters matching by {TCP,UDP}/IPv4 4-tuple
+ *   or local 2-tuple, or local MAC with or without outer VID, and RX
+ *   default filters
+ *
+ * - Huntington supports filter matching controlled by firmware, potentially
+ *   using {TCP,UDP}/IPv{4,6} 4-tuple or local 2-tuple, local MAC or I/G bit,
+ *   with or without outer and inner VID
+ */
+enum ef4_filter_match_flags {
+       EF4_FILTER_MATCH_REM_HOST =     0x0001,
+       EF4_FILTER_MATCH_LOC_HOST =     0x0002,
+       EF4_FILTER_MATCH_REM_MAC =      0x0004,
+       EF4_FILTER_MATCH_REM_PORT =     0x0008,
+       EF4_FILTER_MATCH_LOC_MAC =      0x0010,
+       EF4_FILTER_MATCH_LOC_PORT =     0x0020,
+       EF4_FILTER_MATCH_ETHER_TYPE =   0x0040,
+       EF4_FILTER_MATCH_INNER_VID =    0x0080,
+       EF4_FILTER_MATCH_OUTER_VID =    0x0100,
+       EF4_FILTER_MATCH_IP_PROTO =     0x0200,
+       EF4_FILTER_MATCH_LOC_MAC_IG =   0x0400,
+};
+
+/**
+ * enum ef4_filter_priority - priority of a hardware filter specification
+ * @EF4_FILTER_PRI_HINT: Performance hint
+ * @EF4_FILTER_PRI_AUTO: Automatic filter based on device address list
+ *     or hardware requirements.  This may only be used by the filter
+ *     implementation for each NIC type.
+ * @EF4_FILTER_PRI_MANUAL: Manually configured filter
+ * @EF4_FILTER_PRI_REQUIRED: Required for correct behaviour (user-level
+ *     networking and SR-IOV)
+ */
+enum ef4_filter_priority {
+       EF4_FILTER_PRI_HINT = 0,
+       EF4_FILTER_PRI_AUTO,
+       EF4_FILTER_PRI_MANUAL,
+       EF4_FILTER_PRI_REQUIRED,
+};
+
+/**
+ * enum ef4_filter_flags - flags for hardware filter specifications
+ * @EF4_FILTER_FLAG_RX_RSS: Use RSS to spread across multiple queues.
+ *     By default, matching packets will be delivered only to the
+ *     specified queue. If this flag is set, they will be delivered
+ *     to a range of queues offset from the specified queue number
+ *     according to the indirection table.
+ * @EF4_FILTER_FLAG_RX_SCATTER: Enable DMA scatter on the receiving
+ *     queue.
+ * @EF4_FILTER_FLAG_RX_OVER_AUTO: Indicates a filter that is
+ *     overriding an automatic filter (priority
+ *     %EF4_FILTER_PRI_AUTO).  This may only be set by the filter
+ *     implementation for each type.  A removal request will restore
+ *     the automatic filter in its place.
+ * @EF4_FILTER_FLAG_RX: Filter is for RX
+ * @EF4_FILTER_FLAG_TX: Filter is for TX
+ */
+enum ef4_filter_flags {
+       EF4_FILTER_FLAG_RX_RSS = 0x01,
+       EF4_FILTER_FLAG_RX_SCATTER = 0x02,
+       EF4_FILTER_FLAG_RX_OVER_AUTO = 0x04,
+       EF4_FILTER_FLAG_RX = 0x08,
+       EF4_FILTER_FLAG_TX = 0x10,
+};
+
+/**
+ * struct ef4_filter_spec - specification for a hardware filter
+ * @match_flags: Match type flags, from &enum ef4_filter_match_flags
+ * @priority: Priority of the filter, from &enum ef4_filter_priority
+ * @flags: Miscellaneous flags, from &enum ef4_filter_flags
+ * @rss_context: RSS context to use, if %EF4_FILTER_FLAG_RX_RSS is set
+ * @dmaq_id: Source/target queue index, or %EF4_FILTER_RX_DMAQ_ID_DROP for
+ *     an RX drop filter
+ * @outer_vid: Outer VLAN ID to match, if %EF4_FILTER_MATCH_OUTER_VID is set
+ * @inner_vid: Inner VLAN ID to match, if %EF4_FILTER_MATCH_INNER_VID is set
+ * @loc_mac: Local MAC address to match, if %EF4_FILTER_MATCH_LOC_MAC or
+ *     %EF4_FILTER_MATCH_LOC_MAC_IG is set
+ * @rem_mac: Remote MAC address to match, if %EF4_FILTER_MATCH_REM_MAC is set
+ * @ether_type: Ether-type to match, if %EF4_FILTER_MATCH_ETHER_TYPE is set
+ * @ip_proto: IP transport protocol to match, if %EF4_FILTER_MATCH_IP_PROTO
+ *     is set
+ * @loc_host: Local IP host to match, if %EF4_FILTER_MATCH_LOC_HOST is set
+ * @rem_host: Remote IP host to match, if %EF4_FILTER_MATCH_REM_HOST is set
+ * @loc_port: Local TCP/UDP port to match, if %EF4_FILTER_MATCH_LOC_PORT is set
+ * @rem_port: Remote TCP/UDP port to match, if %EF4_FILTER_MATCH_REM_PORT is set
+ *
+ * The ef4_filter_init_rx() or ef4_filter_init_tx() function *must* be
+ * used to initialise the structure.  The ef4_filter_set_*() functions
+ * may then be used to set @rss_context, @match_flags and related
+ * fields.
+ *
+ * The @priority field is used by software to determine whether a new
+ * filter may replace an old one.  The hardware priority of a filter
+ * depends on which fields are matched.
+ */
+struct ef4_filter_spec {
+       u32     match_flags:12;
+       u32     priority:2;
+       u32     flags:6;
+       u32     dmaq_id:12;
+       u32     rss_context;
+       __be16  outer_vid __aligned(4); /* allow jhash2() of match values */
+       __be16  inner_vid;
+       u8      loc_mac[ETH_ALEN];
+       u8      rem_mac[ETH_ALEN];
+       __be16  ether_type;
+       u8      ip_proto;
+       __be32  loc_host[4];
+       __be32  rem_host[4];
+       __be16  loc_port;
+       __be16  rem_port;
+       /* total 64 bytes */
+};
+
+enum {
+       EF4_FILTER_RSS_CONTEXT_DEFAULT = 0xffffffff,
+       EF4_FILTER_RX_DMAQ_ID_DROP = 0xfff
+};
+
+static inline void ef4_filter_init_rx(struct ef4_filter_spec *spec,
+                                     enum ef4_filter_priority priority,
+                                     enum ef4_filter_flags flags,
+                                     unsigned rxq_id)
+{
+       memset(spec, 0, sizeof(*spec));
+       spec->priority = priority;
+       spec->flags = EF4_FILTER_FLAG_RX | flags;
+       spec->rss_context = EF4_FILTER_RSS_CONTEXT_DEFAULT;
+       spec->dmaq_id = rxq_id;
+}
+
+static inline void ef4_filter_init_tx(struct ef4_filter_spec *spec,
+                                     unsigned txq_id)
+{
+       memset(spec, 0, sizeof(*spec));
+       spec->priority = EF4_FILTER_PRI_REQUIRED;
+       spec->flags = EF4_FILTER_FLAG_TX;
+       spec->dmaq_id = txq_id;
+}
+
+/**
+ * ef4_filter_set_ipv4_local - specify IPv4 host, transport protocol and port
+ * @spec: Specification to initialise
+ * @proto: Transport layer protocol number
+ * @host: Local host address (network byte order)
+ * @port: Local port (network byte order)
+ */
+static inline int
+ef4_filter_set_ipv4_local(struct ef4_filter_spec *spec, u8 proto,
+                         __be32 host, __be16 port)
+{
+       spec->match_flags |=
+               EF4_FILTER_MATCH_ETHER_TYPE | EF4_FILTER_MATCH_IP_PROTO |
+               EF4_FILTER_MATCH_LOC_HOST | EF4_FILTER_MATCH_LOC_PORT;
+       spec->ether_type = htons(ETH_P_IP);
+       spec->ip_proto = proto;
+       spec->loc_host[0] = host;
+       spec->loc_port = port;
+       return 0;
+}
+
+/**
+ * ef4_filter_set_ipv4_full - specify IPv4 hosts, transport protocol and ports
+ * @spec: Specification to initialise
+ * @proto: Transport layer protocol number
+ * @lhost: Local host address (network byte order)
+ * @lport: Local port (network byte order)
+ * @rhost: Remote host address (network byte order)
+ * @rport: Remote port (network byte order)
+ */
+static inline int
+ef4_filter_set_ipv4_full(struct ef4_filter_spec *spec, u8 proto,
+                        __be32 lhost, __be16 lport,
+                        __be32 rhost, __be16 rport)
+{
+       spec->match_flags |=
+               EF4_FILTER_MATCH_ETHER_TYPE | EF4_FILTER_MATCH_IP_PROTO |
+               EF4_FILTER_MATCH_LOC_HOST | EF4_FILTER_MATCH_LOC_PORT |
+               EF4_FILTER_MATCH_REM_HOST | EF4_FILTER_MATCH_REM_PORT;
+       spec->ether_type = htons(ETH_P_IP);
+       spec->ip_proto = proto;
+       spec->loc_host[0] = lhost;
+       spec->loc_port = lport;
+       spec->rem_host[0] = rhost;
+       spec->rem_port = rport;
+       return 0;
+}
+
+enum {
+       EF4_FILTER_VID_UNSPEC = 0xffff,
+};
+
+/**
+ * ef4_filter_set_eth_local - specify local Ethernet address and/or VID
+ * @spec: Specification to initialise
+ * @vid: Outer VLAN ID to match, or %EF4_FILTER_VID_UNSPEC
+ * @addr: Local Ethernet MAC address, or %NULL
+ */
+static inline int ef4_filter_set_eth_local(struct ef4_filter_spec *spec,
+                                          u16 vid, const u8 *addr)
+{
+       if (vid == EF4_FILTER_VID_UNSPEC && addr == NULL)
+               return -EINVAL;
+
+       if (vid != EF4_FILTER_VID_UNSPEC) {
+               spec->match_flags |= EF4_FILTER_MATCH_OUTER_VID;
+               spec->outer_vid = htons(vid);
+       }
+       if (addr != NULL) {
+               spec->match_flags |= EF4_FILTER_MATCH_LOC_MAC;
+               ether_addr_copy(spec->loc_mac, addr);
+       }
+       return 0;
+}
+
+/**
+ * ef4_filter_set_uc_def - specify matching otherwise-unmatched unicast
+ * @spec: Specification to initialise
+ */
+static inline int ef4_filter_set_uc_def(struct ef4_filter_spec *spec)
+{
+       spec->match_flags |= EF4_FILTER_MATCH_LOC_MAC_IG;
+       return 0;
+}
+
+/**
+ * ef4_filter_set_mc_def - specify matching otherwise-unmatched multicast
+ * @spec: Specification to initialise
+ */
+static inline int ef4_filter_set_mc_def(struct ef4_filter_spec *spec)
+{
+       spec->match_flags |= EF4_FILTER_MATCH_LOC_MAC_IG;
+       spec->loc_mac[0] = 1;
+       return 0;
+}
+
+#endif /* EF4_FILTER_H */
diff --git a/drivers/net/ethernet/sfc/falcon/io.h b/drivers/net/ethernet/sfc/falcon/io.h
new file mode 100644 (file)
index 0000000..7085ee1
--- /dev/null
@@ -0,0 +1,290 @@
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2005-2006 Fen Systems Ltd.
+ * Copyright 2006-2013 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#ifndef EF4_IO_H
+#define EF4_IO_H
+
+#include <linux/io.h>
+#include <linux/spinlock.h>
+
+/**************************************************************************
+ *
+ * NIC register I/O
+ *
+ **************************************************************************
+ *
+ * Notes on locking strategy for the Falcon architecture:
+ *
+ * Many CSRs are very wide and cannot be read or written atomically.
+ * Writes from the host are buffered by the Bus Interface Unit (BIU)
+ * up to 128 bits.  Whenever the host writes part of such a register,
+ * the BIU collects the written value and does not write to the
+ * underlying register until all 4 dwords have been written.  A
+ * similar buffering scheme applies to host access to the NIC's 64-bit
+ * SRAM.
+ *
+ * Writes to different CSRs and 64-bit SRAM words must be serialised,
+ * since interleaved access can result in lost writes.  We use
+ * ef4_nic::biu_lock for this.
+ *
+ * We also serialise reads from 128-bit CSRs and SRAM with the same
+ * spinlock.  This may not be necessary, but it doesn't really matter
+ * as there are no such reads on the fast path.
+ *
+ * The DMA descriptor pointers (RX_DESC_UPD and TX_DESC_UPD) are
+ * 128-bit but are special-cased in the BIU to avoid the need for
+ * locking in the host:
+ *
+ * - They are write-only.
+ * - The semantics of writing to these registers are such that
+ *   replacing the low 96 bits with zero does not affect functionality.
+ * - If the host writes to the last dword address of such a register
+ *   (i.e. the high 32 bits) the underlying register will always be
+ *   written.  If the collector and the current write together do not
+ *   provide values for all 128 bits of the register, the low 96 bits
+ *   will be written as zero.
+ * - If the host writes to the address of any other part of such a
+ *   register while the collector already holds values for some other
+ *   register, the write is discarded and the collector maintains its
+ *   current state.
+ *
+ * The EF10 architecture exposes very few registers to the host and
+ * most of them are only 32 bits wide.  The only exceptions are the MC
+ * doorbell register pair, which has its own latching, and
+ * TX_DESC_UPD, which works in a similar way to the Falcon
+ * architecture.
+ */
+
+#if BITS_PER_LONG == 64
+#define EF4_USE_QWORD_IO 1
+#endif
+
+#ifdef EF4_USE_QWORD_IO
+static inline void _ef4_writeq(struct ef4_nic *efx, __le64 value,
+                                 unsigned int reg)
+{
+       __raw_writeq((__force u64)value, efx->membase + reg);
+}
+static inline __le64 _ef4_readq(struct ef4_nic *efx, unsigned int reg)
+{
+       return (__force __le64)__raw_readq(efx->membase + reg);
+}
+#endif
+
+static inline void _ef4_writed(struct ef4_nic *efx, __le32 value,
+                                 unsigned int reg)
+{
+       __raw_writel((__force u32)value, efx->membase + reg);
+}
+static inline __le32 _ef4_readd(struct ef4_nic *efx, unsigned int reg)
+{
+       return (__force __le32)__raw_readl(efx->membase + reg);
+}
+
+/* Write a normal 128-bit CSR, locking as appropriate. */
+static inline void ef4_writeo(struct ef4_nic *efx, const ef4_oword_t *value,
+                             unsigned int reg)
+{
+       unsigned long flags __attribute__ ((unused));
+
+       netif_vdbg(efx, hw, efx->net_dev,
+                  "writing register %x with " EF4_OWORD_FMT "\n", reg,
+                  EF4_OWORD_VAL(*value));
+
+       spin_lock_irqsave(&efx->biu_lock, flags);
+#ifdef EF4_USE_QWORD_IO
+       _ef4_writeq(efx, value->u64[0], reg + 0);
+       _ef4_writeq(efx, value->u64[1], reg + 8);
+#else
+       _ef4_writed(efx, value->u32[0], reg + 0);
+       _ef4_writed(efx, value->u32[1], reg + 4);
+       _ef4_writed(efx, value->u32[2], reg + 8);
+       _ef4_writed(efx, value->u32[3], reg + 12);
+#endif
+       mmiowb();
+       spin_unlock_irqrestore(&efx->biu_lock, flags);
+}
+
+/* Write 64-bit SRAM through the supplied mapping, locking as appropriate. */
+static inline void ef4_sram_writeq(struct ef4_nic *efx, void __iomem *membase,
+                                  const ef4_qword_t *value, unsigned int index)
+{
+       unsigned int addr = index * sizeof(*value);
+       unsigned long flags __attribute__ ((unused));
+
+       netif_vdbg(efx, hw, efx->net_dev,
+                  "writing SRAM address %x with " EF4_QWORD_FMT "\n",
+                  addr, EF4_QWORD_VAL(*value));
+
+       spin_lock_irqsave(&efx->biu_lock, flags);
+#ifdef EF4_USE_QWORD_IO
+       __raw_writeq((__force u64)value->u64[0], membase + addr);
+#else
+       __raw_writel((__force u32)value->u32[0], membase + addr);
+       __raw_writel((__force u32)value->u32[1], membase + addr + 4);
+#endif
+       mmiowb();
+       spin_unlock_irqrestore(&efx->biu_lock, flags);
+}
+
+/* Write a 32-bit CSR or the last dword of a special 128-bit CSR */
+static inline void ef4_writed(struct ef4_nic *efx, const ef4_dword_t *value,
+                             unsigned int reg)
+{
+       netif_vdbg(efx, hw, efx->net_dev,
+                  "writing register %x with "EF4_DWORD_FMT"\n",
+                  reg, EF4_DWORD_VAL(*value));
+
+       /* No lock required */
+       _ef4_writed(efx, value->u32[0], reg);
+}
+
+/* Read a 128-bit CSR, locking as appropriate. */
+static inline void ef4_reado(struct ef4_nic *efx, ef4_oword_t *value,
+                            unsigned int reg)
+{
+       unsigned long flags __attribute__ ((unused));
+
+       spin_lock_irqsave(&efx->biu_lock, flags);
+       value->u32[0] = _ef4_readd(efx, reg + 0);
+       value->u32[1] = _ef4_readd(efx, reg + 4);
+       value->u32[2] = _ef4_readd(efx, reg + 8);
+       value->u32[3] = _ef4_readd(efx, reg + 12);
+       spin_unlock_irqrestore(&efx->biu_lock, flags);
+
+       netif_vdbg(efx, hw, efx->net_dev,
+                  "read from register %x, got " EF4_OWORD_FMT "\n", reg,
+                  EF4_OWORD_VAL(*value));
+}
+
+/* Read 64-bit SRAM through the supplied mapping, locking as appropriate. */
+static inline void ef4_sram_readq(struct ef4_nic *efx, void __iomem *membase,
+                                 ef4_qword_t *value, unsigned int index)
+{
+       unsigned int addr = index * sizeof(*value);
+       unsigned long flags __attribute__ ((unused));
+
+       spin_lock_irqsave(&efx->biu_lock, flags);
+#ifdef EF4_USE_QWORD_IO
+       value->u64[0] = (__force __le64)__raw_readq(membase + addr);
+#else
+       value->u32[0] = (__force __le32)__raw_readl(membase + addr);
+       value->u32[1] = (__force __le32)__raw_readl(membase + addr + 4);
+#endif
+       spin_unlock_irqrestore(&efx->biu_lock, flags);
+
+       netif_vdbg(efx, hw, efx->net_dev,
+                  "read from SRAM address %x, got "EF4_QWORD_FMT"\n",
+                  addr, EF4_QWORD_VAL(*value));
+}
+
+/* Read a 32-bit CSR or SRAM */
+static inline void ef4_readd(struct ef4_nic *efx, ef4_dword_t *value,
+                               unsigned int reg)
+{
+       value->u32[0] = _ef4_readd(efx, reg);
+       netif_vdbg(efx, hw, efx->net_dev,
+                  "read from register %x, got "EF4_DWORD_FMT"\n",
+                  reg, EF4_DWORD_VAL(*value));
+}
+
+/* Write a 128-bit CSR forming part of a table */
+static inline void
+ef4_writeo_table(struct ef4_nic *efx, const ef4_oword_t *value,
+                unsigned int reg, unsigned int index)
+{
+       ef4_writeo(efx, value, reg + index * sizeof(ef4_oword_t));
+}
+
+/* Read a 128-bit CSR forming part of a table */
+static inline void ef4_reado_table(struct ef4_nic *efx, ef4_oword_t *value,
+                                    unsigned int reg, unsigned int index)
+{
+       ef4_reado(efx, value, reg + index * sizeof(ef4_oword_t));
+}
+
+/* Page size used as step between per-VI registers */
+#define EF4_VI_PAGE_SIZE 0x2000
+
+/* Calculate offset to page-mapped register */
+#define EF4_PAGED_REG(page, reg) \
+       ((page) * EF4_VI_PAGE_SIZE + (reg))
+
+/* Write the whole of RX_DESC_UPD or TX_DESC_UPD */
+static inline void _ef4_writeo_page(struct ef4_nic *efx, ef4_oword_t *value,
+                                   unsigned int reg, unsigned int page)
+{
+       reg = EF4_PAGED_REG(page, reg);
+
+       netif_vdbg(efx, hw, efx->net_dev,
+                  "writing register %x with " EF4_OWORD_FMT "\n", reg,
+                  EF4_OWORD_VAL(*value));
+
+#ifdef EF4_USE_QWORD_IO
+       _ef4_writeq(efx, value->u64[0], reg + 0);
+       _ef4_writeq(efx, value->u64[1], reg + 8);
+#else
+       _ef4_writed(efx, value->u32[0], reg + 0);
+       _ef4_writed(efx, value->u32[1], reg + 4);
+       _ef4_writed(efx, value->u32[2], reg + 8);
+       _ef4_writed(efx, value->u32[3], reg + 12);
+#endif
+}
+#define ef4_writeo_page(efx, value, reg, page)                         \
+       _ef4_writeo_page(efx, value,                                    \
+                        reg +                                          \
+                        BUILD_BUG_ON_ZERO((reg) != 0x830 && (reg) != 0xa10), \
+                        page)
+
+/* Write a page-mapped 32-bit CSR (EVQ_RPTR, EVQ_TMR (EF10), or the
+ * high bits of RX_DESC_UPD or TX_DESC_UPD)
+ */
+static inline void
+_ef4_writed_page(struct ef4_nic *efx, const ef4_dword_t *value,
+                unsigned int reg, unsigned int page)
+{
+       ef4_writed(efx, value, EF4_PAGED_REG(page, reg));
+}
+#define ef4_writed_page(efx, value, reg, page)                         \
+       _ef4_writed_page(efx, value,                                    \
+                        reg +                                          \
+                        BUILD_BUG_ON_ZERO((reg) != 0x400 &&            \
+                                          (reg) != 0x420 &&            \
+                                          (reg) != 0x830 &&            \
+                                          (reg) != 0x83c &&            \
+                                          (reg) != 0xa18 &&            \
+                                          (reg) != 0xa1c),             \
+                        page)
+
+/* Write TIMER_COMMAND.  This is a page-mapped 32-bit CSR, but a bug
+ * in the BIU means that writes to TIMER_COMMAND[0] invalidate the
+ * collector register.
+ */
+static inline void _ef4_writed_page_locked(struct ef4_nic *efx,
+                                          const ef4_dword_t *value,
+                                          unsigned int reg,
+                                          unsigned int page)
+{
+       unsigned long flags __attribute__ ((unused));
+
+       if (page == 0) {
+               spin_lock_irqsave(&efx->biu_lock, flags);
+               ef4_writed(efx, value, EF4_PAGED_REG(page, reg));
+               spin_unlock_irqrestore(&efx->biu_lock, flags);
+       } else {
+               ef4_writed(efx, value, EF4_PAGED_REG(page, reg));
+       }
+}
+#define ef4_writed_page_locked(efx, value, reg, page)                  \
+       _ef4_writed_page_locked(efx, value,                             \
+                               reg + BUILD_BUG_ON_ZERO((reg) != 0x420), \
+                               page)
+
+#endif /* EF4_IO_H */
diff --git a/drivers/net/ethernet/sfc/falcon/mdio_10g.c b/drivers/net/ethernet/sfc/falcon/mdio_10g.c
new file mode 100644 (file)
index 0000000..e7d7c09
--- /dev/null
@@ -0,0 +1,323 @@
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2006-2011 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+/*
+ * Useful functions for working with MDIO clause 45 PHYs
+ */
+#include <linux/types.h>
+#include <linux/ethtool.h>
+#include <linux/delay.h>
+#include "net_driver.h"
+#include "mdio_10g.h"
+#include "workarounds.h"
+
+unsigned ef4_mdio_id_oui(u32 id)
+{
+       unsigned oui = 0;
+       int i;
+
+       /* The bits of the OUI are designated a..x, with a=0 and b variable.
+        * In the id register c is the MSB but the OUI is conventionally
+        * written as bytes h..a, p..i, x..q.  Reorder the bits accordingly. */
+       for (i = 0; i < 22; ++i)
+               if (id & (1 << (i + 10)))
+                       oui |= 1 << (i ^ 7);
+
+       return oui;
+}
+
+int ef4_mdio_reset_mmd(struct ef4_nic *port, int mmd,
+                           int spins, int spintime)
+{
+       u32 ctrl;
+
+       /* Catch callers passing values in the wrong units (or just silly) */
+       EF4_BUG_ON_PARANOID(spins * spintime >= 5000);
+
+       ef4_mdio_write(port, mmd, MDIO_CTRL1, MDIO_CTRL1_RESET);
+       /* Wait for the reset bit to clear. */
+       do {
+               msleep(spintime);
+               ctrl = ef4_mdio_read(port, mmd, MDIO_CTRL1);
+               spins--;
+
+       } while (spins && (ctrl & MDIO_CTRL1_RESET));
+
+       return spins ? spins : -ETIMEDOUT;
+}
+
+static int ef4_mdio_check_mmd(struct ef4_nic *efx, int mmd)
+{
+       int status;
+
+       if (mmd != MDIO_MMD_AN) {
+               /* Read MMD STATUS2 to check it is responding. */
+               status = ef4_mdio_read(efx, mmd, MDIO_STAT2);
+               if ((status & MDIO_STAT2_DEVPRST) != MDIO_STAT2_DEVPRST_VAL) {
+                       netif_err(efx, hw, efx->net_dev,
+                                 "PHY MMD %d not responding.\n", mmd);
+                       return -EIO;
+               }
+       }
+
+       return 0;
+}
+
+/* This ought to be ridiculous overkill. We expect it to fail rarely */
+#define MDIO45_RESET_TIME      1000 /* ms */
+#define MDIO45_RESET_ITERS     100
+
+int ef4_mdio_wait_reset_mmds(struct ef4_nic *efx, unsigned int mmd_mask)
+{
+       const int spintime = MDIO45_RESET_TIME / MDIO45_RESET_ITERS;
+       int tries = MDIO45_RESET_ITERS;
+       int rc = 0;
+       int in_reset;
+
+       while (tries) {
+               int mask = mmd_mask;
+               int mmd = 0;
+               int stat;
+               in_reset = 0;
+               while (mask) {
+                       if (mask & 1) {
+                               stat = ef4_mdio_read(efx, mmd, MDIO_CTRL1);
+                               if (stat < 0) {
+                                       netif_err(efx, hw, efx->net_dev,
+                                                 "failed to read status of"
+                                                 " MMD %d\n", mmd);
+                                       return -EIO;
+                               }
+                               if (stat & MDIO_CTRL1_RESET)
+                                       in_reset |= (1 << mmd);
+                       }
+                       mask = mask >> 1;
+                       mmd++;
+               }
+               if (!in_reset)
+                       break;
+               tries--;
+               msleep(spintime);
+       }
+       if (in_reset != 0) {
+               netif_err(efx, hw, efx->net_dev,
+                         "not all MMDs came out of reset in time."
+                         " MMDs still in reset: %x\n", in_reset);
+               rc = -ETIMEDOUT;
+       }
+       return rc;
+}
+
+int ef4_mdio_check_mmds(struct ef4_nic *efx, unsigned int mmd_mask)
+{
+       int mmd = 0, probe_mmd, devs1, devs2;
+       u32 devices;
+
+       /* Historically we have probed the PHYXS to find out what devices are
+        * present,but that doesn't work so well if the PHYXS isn't expected
+        * to exist, if so just find the first item in the list supplied. */
+       probe_mmd = (mmd_mask & MDIO_DEVS_PHYXS) ? MDIO_MMD_PHYXS :
+           __ffs(mmd_mask);
+
+       /* Check all the expected MMDs are present */
+       devs1 = ef4_mdio_read(efx, probe_mmd, MDIO_DEVS1);
+       devs2 = ef4_mdio_read(efx, probe_mmd, MDIO_DEVS2);
+       if (devs1 < 0 || devs2 < 0) {
+               netif_err(efx, hw, efx->net_dev,
+                         "failed to read devices present\n");
+               return -EIO;
+       }
+       devices = devs1 | (devs2 << 16);
+       if ((devices & mmd_mask) != mmd_mask) {
+               netif_err(efx, hw, efx->net_dev,
+                         "required MMDs not present: got %x, wanted %x\n",
+                         devices, mmd_mask);
+               return -ENODEV;
+       }
+       netif_vdbg(efx, hw, efx->net_dev, "Devices present: %x\n", devices);
+
+       /* Check all required MMDs are responding and happy. */
+       while (mmd_mask) {
+               if ((mmd_mask & 1) && ef4_mdio_check_mmd(efx, mmd))
+                       return -EIO;
+               mmd_mask = mmd_mask >> 1;
+               mmd++;
+       }
+
+       return 0;
+}
+
+bool ef4_mdio_links_ok(struct ef4_nic *efx, unsigned int mmd_mask)
+{
+       /* If the port is in loopback, then we should only consider a subset
+        * of mmd's */
+       if (LOOPBACK_INTERNAL(efx))
+               return true;
+       else if (LOOPBACK_MASK(efx) & LOOPBACKS_WS)
+               return false;
+       else if (ef4_phy_mode_disabled(efx->phy_mode))
+               return false;
+       else if (efx->loopback_mode == LOOPBACK_PHYXS)
+               mmd_mask &= ~(MDIO_DEVS_PHYXS |
+                             MDIO_DEVS_PCS |
+                             MDIO_DEVS_PMAPMD |
+                             MDIO_DEVS_AN);
+       else if (efx->loopback_mode == LOOPBACK_PCS)
+               mmd_mask &= ~(MDIO_DEVS_PCS |
+                             MDIO_DEVS_PMAPMD |
+                             MDIO_DEVS_AN);
+       else if (efx->loopback_mode == LOOPBACK_PMAPMD)
+               mmd_mask &= ~(MDIO_DEVS_PMAPMD |
+                             MDIO_DEVS_AN);
+
+       return mdio45_links_ok(&efx->mdio, mmd_mask);
+}
+
+void ef4_mdio_transmit_disable(struct ef4_nic *efx)
+{
+       ef4_mdio_set_flag(efx, MDIO_MMD_PMAPMD,
+                         MDIO_PMA_TXDIS, MDIO_PMD_TXDIS_GLOBAL,
+                         efx->phy_mode & PHY_MODE_TX_DISABLED);
+}
+
+void ef4_mdio_phy_reconfigure(struct ef4_nic *efx)
+{
+       ef4_mdio_set_flag(efx, MDIO_MMD_PMAPMD,
+                         MDIO_CTRL1, MDIO_PMA_CTRL1_LOOPBACK,
+                         efx->loopback_mode == LOOPBACK_PMAPMD);
+       ef4_mdio_set_flag(efx, MDIO_MMD_PCS,
+                         MDIO_CTRL1, MDIO_PCS_CTRL1_LOOPBACK,
+                         efx->loopback_mode == LOOPBACK_PCS);
+       ef4_mdio_set_flag(efx, MDIO_MMD_PHYXS,
+                         MDIO_CTRL1, MDIO_PHYXS_CTRL1_LOOPBACK,
+                         efx->loopback_mode == LOOPBACK_PHYXS_WS);
+}
+
+static void ef4_mdio_set_mmd_lpower(struct ef4_nic *efx,
+                                   int lpower, int mmd)
+{
+       int stat = ef4_mdio_read(efx, mmd, MDIO_STAT1);
+
+       netif_vdbg(efx, drv, efx->net_dev, "Setting low power mode for MMD %d to %d\n",
+                 mmd, lpower);
+
+       if (stat & MDIO_STAT1_LPOWERABLE) {
+               ef4_mdio_set_flag(efx, mmd, MDIO_CTRL1,
+                                 MDIO_CTRL1_LPOWER, lpower);
+       }
+}
+
+void ef4_mdio_set_mmds_lpower(struct ef4_nic *efx,
+                             int low_power, unsigned int mmd_mask)
+{
+       int mmd = 0;
+       mmd_mask &= ~MDIO_DEVS_AN;
+       while (mmd_mask) {
+               if (mmd_mask & 1)
+                       ef4_mdio_set_mmd_lpower(efx, low_power, mmd);
+               mmd_mask = (mmd_mask >> 1);
+               mmd++;
+       }
+}
+
+/**
+ * ef4_mdio_set_settings - Set (some of) the PHY settings over MDIO.
+ * @efx:               Efx NIC
+ * @ecmd:              New settings
+ */
+int ef4_mdio_set_settings(struct ef4_nic *efx, struct ethtool_cmd *ecmd)
+{
+       struct ethtool_cmd prev = { .cmd = ETHTOOL_GSET };
+
+       efx->phy_op->get_settings(efx, &prev);
+
+       if (ecmd->advertising == prev.advertising &&
+           ethtool_cmd_speed(ecmd) == ethtool_cmd_speed(&prev) &&
+           ecmd->duplex == prev.duplex &&
+           ecmd->port == prev.port &&
+           ecmd->autoneg == prev.autoneg)
+               return 0;
+
+       /* We can only change these settings for -T PHYs */
+       if (prev.port != PORT_TP || ecmd->port != PORT_TP)
+               return -EINVAL;
+
+       /* Check that PHY supports these settings */
+       if (!ecmd->autoneg ||
+           (ecmd->advertising | SUPPORTED_Autoneg) & ~prev.supported)
+               return -EINVAL;
+
+       ef4_link_set_advertising(efx, ecmd->advertising | ADVERTISED_Autoneg);
+       ef4_mdio_an_reconfigure(efx);
+       return 0;
+}
+
+/**
+ * ef4_mdio_an_reconfigure - Push advertising flags and restart autonegotiation
+ * @efx:               Efx NIC
+ */
+void ef4_mdio_an_reconfigure(struct ef4_nic *efx)
+{
+       int reg;
+
+       WARN_ON(!(efx->mdio.mmds & MDIO_DEVS_AN));
+
+       /* Set up the base page */
+       reg = ADVERTISE_CSMA | ADVERTISE_RESV;
+       if (efx->link_advertising & ADVERTISED_Pause)
+               reg |= ADVERTISE_PAUSE_CAP;
+       if (efx->link_advertising & ADVERTISED_Asym_Pause)
+               reg |= ADVERTISE_PAUSE_ASYM;
+       ef4_mdio_write(efx, MDIO_MMD_AN, MDIO_AN_ADVERTISE, reg);
+
+       /* Set up the (extended) next page */
+       efx->phy_op->set_npage_adv(efx, efx->link_advertising);
+
+       /* Enable and restart AN */
+       reg = ef4_mdio_read(efx, MDIO_MMD_AN, MDIO_CTRL1);
+       reg |= MDIO_AN_CTRL1_ENABLE | MDIO_AN_CTRL1_RESTART | MDIO_AN_CTRL1_XNP;
+       ef4_mdio_write(efx, MDIO_MMD_AN, MDIO_CTRL1, reg);
+}
+
+u8 ef4_mdio_get_pause(struct ef4_nic *efx)
+{
+       BUILD_BUG_ON(EF4_FC_AUTO & (EF4_FC_RX | EF4_FC_TX));
+
+       if (!(efx->wanted_fc & EF4_FC_AUTO))
+               return efx->wanted_fc;
+
+       WARN_ON(!(efx->mdio.mmds & MDIO_DEVS_AN));
+
+       return mii_resolve_flowctrl_fdx(
+               mii_advertise_flowctrl(efx->wanted_fc),
+               ef4_mdio_read(efx, MDIO_MMD_AN, MDIO_AN_LPA));
+}
+
+int ef4_mdio_test_alive(struct ef4_nic *efx)
+{
+       int rc;
+       int devad = __ffs(efx->mdio.mmds);
+       u16 physid1, physid2;
+
+       mutex_lock(&efx->mac_lock);
+
+       physid1 = ef4_mdio_read(efx, devad, MDIO_DEVID1);
+       physid2 = ef4_mdio_read(efx, devad, MDIO_DEVID2);
+
+       if ((physid1 == 0x0000) || (physid1 == 0xffff) ||
+           (physid2 == 0x0000) || (physid2 == 0xffff)) {
+               netif_err(efx, hw, efx->net_dev,
+                         "no MDIO PHY present with ID %d\n", efx->mdio.prtad);
+               rc = -EINVAL;
+       } else {
+               rc = ef4_mdio_check_mmds(efx, efx->mdio.mmds);
+       }
+
+       mutex_unlock(&efx->mac_lock);
+       return rc;
+}
diff --git a/drivers/net/ethernet/sfc/falcon/mdio_10g.h b/drivers/net/ethernet/sfc/falcon/mdio_10g.h
new file mode 100644 (file)
index 0000000..885cf7a
--- /dev/null
@@ -0,0 +1,110 @@
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2006-2011 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#ifndef EF4_MDIO_10G_H
+#define EF4_MDIO_10G_H
+
+#include <linux/mdio.h>
+
+/*
+ * Helper functions for doing 10G MDIO as specified in IEEE 802.3 clause 45.
+ */
+
+#include "efx.h"
+
+static inline unsigned ef4_mdio_id_rev(u32 id) { return id & 0xf; }
+static inline unsigned ef4_mdio_id_model(u32 id) { return (id >> 4) & 0x3f; }
+unsigned ef4_mdio_id_oui(u32 id);
+
+static inline int ef4_mdio_read(struct ef4_nic *efx, int devad, int addr)
+{
+       return efx->mdio.mdio_read(efx->net_dev, efx->mdio.prtad, devad, addr);
+}
+
+static inline void
+ef4_mdio_write(struct ef4_nic *efx, int devad, int addr, int value)
+{
+       efx->mdio.mdio_write(efx->net_dev, efx->mdio.prtad, devad, addr, value);
+}
+
+static inline u32 ef4_mdio_read_id(struct ef4_nic *efx, int mmd)
+{
+       u16 id_low = ef4_mdio_read(efx, mmd, MDIO_DEVID2);
+       u16 id_hi = ef4_mdio_read(efx, mmd, MDIO_DEVID1);
+       return (id_hi << 16) | (id_low);
+}
+
+static inline bool ef4_mdio_phyxgxs_lane_sync(struct ef4_nic *efx)
+{
+       int i, lane_status;
+       bool sync;
+
+       for (i = 0; i < 2; ++i)
+               lane_status = ef4_mdio_read(efx, MDIO_MMD_PHYXS,
+                                           MDIO_PHYXS_LNSTAT);
+
+       sync = !!(lane_status & MDIO_PHYXS_LNSTAT_ALIGN);
+       if (!sync)
+               netif_dbg(efx, hw, efx->net_dev, "XGXS lane status: %x\n",
+                         lane_status);
+       return sync;
+}
+
+const char *ef4_mdio_mmd_name(int mmd);
+
+/*
+ * Reset a specific MMD and wait for reset to clear.
+ * Return number of spins left (>0) on success, -%ETIMEDOUT on failure.
+ *
+ * This function will sleep
+ */
+int ef4_mdio_reset_mmd(struct ef4_nic *efx, int mmd, int spins, int spintime);
+
+/* As ef4_mdio_check_mmd but for multiple MMDs */
+int ef4_mdio_check_mmds(struct ef4_nic *efx, unsigned int mmd_mask);
+
+/* Check the link status of specified mmds in bit mask */
+bool ef4_mdio_links_ok(struct ef4_nic *efx, unsigned int mmd_mask);
+
+/* Generic transmit disable support though PMAPMD */
+void ef4_mdio_transmit_disable(struct ef4_nic *efx);
+
+/* Generic part of reconfigure: set/clear loopback bits */
+void ef4_mdio_phy_reconfigure(struct ef4_nic *efx);
+
+/* Set the power state of the specified MMDs */
+void ef4_mdio_set_mmds_lpower(struct ef4_nic *efx, int low_power,
+                             unsigned int mmd_mask);
+
+/* Set (some of) the PHY settings over MDIO */
+int ef4_mdio_set_settings(struct ef4_nic *efx, struct ethtool_cmd *ecmd);
+
+/* Push advertising flags and restart autonegotiation */
+void ef4_mdio_an_reconfigure(struct ef4_nic *efx);
+
+/* Get pause parameters from AN if available (otherwise return
+ * requested pause parameters)
+ */
+u8 ef4_mdio_get_pause(struct ef4_nic *efx);
+
+/* Wait for specified MMDs to exit reset within a timeout */
+int ef4_mdio_wait_reset_mmds(struct ef4_nic *efx, unsigned int mmd_mask);
+
+/* Set or clear flag, debouncing */
+static inline void
+ef4_mdio_set_flag(struct ef4_nic *efx, int devad, int addr,
+                 int mask, bool state)
+{
+       mdio_set_flag(&efx->mdio, efx->mdio.prtad, devad, addr, mask, state);
+}
+
+/* Liveness self-test for MDIO PHYs */
+int ef4_mdio_test_alive(struct ef4_nic *efx);
+
+#endif /* EF4_MDIO_10G_H */
diff --git a/drivers/net/ethernet/sfc/falcon/mtd.c b/drivers/net/ethernet/sfc/falcon/mtd.c
new file mode 100644 (file)
index 0000000..cde593c
--- /dev/null
@@ -0,0 +1,133 @@
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2005-2006 Fen Systems Ltd.
+ * Copyright 2006-2013 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#include <linux/module.h>
+#include <linux/mtd/mtd.h>
+#include <linux/slab.h>
+#include <linux/rtnetlink.h>
+
+#include "net_driver.h"
+#include "efx.h"
+
+#define to_ef4_mtd_partition(mtd)                              \
+       container_of(mtd, struct ef4_mtd_partition, mtd)
+
+/* MTD interface */
+
+static int ef4_mtd_erase(struct mtd_info *mtd, struct erase_info *erase)
+{
+       struct ef4_nic *efx = mtd->priv;
+       int rc;
+
+       rc = efx->type->mtd_erase(mtd, erase->addr, erase->len);
+       if (rc == 0) {
+               erase->state = MTD_ERASE_DONE;
+       } else {
+               erase->state = MTD_ERASE_FAILED;
+               erase->fail_addr = MTD_FAIL_ADDR_UNKNOWN;
+       }
+       mtd_erase_callback(erase);
+       return rc;
+}
+
+static void ef4_mtd_sync(struct mtd_info *mtd)
+{
+       struct ef4_mtd_partition *part = to_ef4_mtd_partition(mtd);
+       struct ef4_nic *efx = mtd->priv;
+       int rc;
+
+       rc = efx->type->mtd_sync(mtd);
+       if (rc)
+               pr_err("%s: %s sync failed (%d)\n",
+                      part->name, part->dev_type_name, rc);
+}
+
+static void ef4_mtd_remove_partition(struct ef4_mtd_partition *part)
+{
+       int rc;
+
+       for (;;) {
+               rc = mtd_device_unregister(&part->mtd);
+               if (rc != -EBUSY)
+                       break;
+               ssleep(1);
+       }
+       WARN_ON(rc);
+       list_del(&part->node);
+}
+
+int ef4_mtd_add(struct ef4_nic *efx, struct ef4_mtd_partition *parts,
+               size_t n_parts, size_t sizeof_part)
+{
+       struct ef4_mtd_partition *part;
+       size_t i;
+
+       for (i = 0; i < n_parts; i++) {
+               part = (struct ef4_mtd_partition *)((char *)parts +
+                                                   i * sizeof_part);
+
+               part->mtd.writesize = 1;
+
+               part->mtd.owner = THIS_MODULE;
+               part->mtd.priv = efx;
+               part->mtd.name = part->name;
+               part->mtd._erase = ef4_mtd_erase;
+               part->mtd._read = efx->type->mtd_read;
+               part->mtd._write = efx->type->mtd_write;
+               part->mtd._sync = ef4_mtd_sync;
+
+               efx->type->mtd_rename(part);
+
+               if (mtd_device_register(&part->mtd, NULL, 0))
+                       goto fail;
+
+               /* Add to list in order - ef4_mtd_remove() depends on this */
+               list_add_tail(&part->node, &efx->mtd_list);
+       }
+
+       return 0;
+
+fail:
+       while (i--) {
+               part = (struct ef4_mtd_partition *)((char *)parts +
+                                                   i * sizeof_part);
+               ef4_mtd_remove_partition(part);
+       }
+       /* Failure is unlikely here, but probably means we're out of memory */
+       return -ENOMEM;
+}
+
+void ef4_mtd_remove(struct ef4_nic *efx)
+{
+       struct ef4_mtd_partition *parts, *part, *next;
+
+       WARN_ON(ef4_dev_registered(efx));
+
+       if (list_empty(&efx->mtd_list))
+               return;
+
+       parts = list_first_entry(&efx->mtd_list, struct ef4_mtd_partition,
+                                node);
+
+       list_for_each_entry_safe(part, next, &efx->mtd_list, node)
+               ef4_mtd_remove_partition(part);
+
+       kfree(parts);
+}
+
+void ef4_mtd_rename(struct ef4_nic *efx)
+{
+       struct ef4_mtd_partition *part;
+
+       ASSERT_RTNL();
+
+       list_for_each_entry(part, &efx->mtd_list, node)
+               efx->type->mtd_rename(part);
+}
diff --git a/drivers/net/ethernet/sfc/falcon/net_driver.h b/drivers/net/ethernet/sfc/falcon/net_driver.h
new file mode 100644 (file)
index 0000000..210b28f
--- /dev/null
@@ -0,0 +1,1464 @@
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2005-2006 Fen Systems Ltd.
+ * Copyright 2005-2013 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+/* Common definitions for all Efx net driver code */
+
+#ifndef EF4_NET_DRIVER_H
+#define EF4_NET_DRIVER_H
+
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/ethtool.h>
+#include <linux/if_vlan.h>
+#include <linux/timer.h>
+#include <linux/mdio.h>
+#include <linux/list.h>
+#include <linux/pci.h>
+#include <linux/device.h>
+#include <linux/highmem.h>
+#include <linux/workqueue.h>
+#include <linux/mutex.h>
+#include <linux/rwsem.h>
+#include <linux/vmalloc.h>
+#include <linux/i2c.h>
+#include <linux/mtd/mtd.h>
+#include <net/busy_poll.h>
+
+#include "enum.h"
+#include "bitfield.h"
+#include "filter.h"
+
+/**************************************************************************
+ *
+ * Build definitions
+ *
+ **************************************************************************/
+
+#define EF4_DRIVER_VERSION     "4.1"
+
+#ifdef DEBUG
+#define EF4_BUG_ON_PARANOID(x) BUG_ON(x)
+#define EF4_WARN_ON_PARANOID(x) WARN_ON(x)
+#else
+#define EF4_BUG_ON_PARANOID(x) do {} while (0)
+#define EF4_WARN_ON_PARANOID(x) do {} while (0)
+#endif
+
+/**************************************************************************
+ *
+ * Efx data structures
+ *
+ **************************************************************************/
+
+#define EF4_MAX_CHANNELS 32U
+#define EF4_MAX_RX_QUEUES EF4_MAX_CHANNELS
+#define EF4_EXTRA_CHANNEL_IOV  0
+#define EF4_EXTRA_CHANNEL_PTP  1
+#define EF4_MAX_EXTRA_CHANNELS 2U
+
+/* Checksum generation is a per-queue option in hardware, so each
+ * queue visible to the networking core is backed by two hardware TX
+ * queues. */
+#define EF4_MAX_TX_TC          2
+#define EF4_MAX_CORE_TX_QUEUES (EF4_MAX_TX_TC * EF4_MAX_CHANNELS)
+#define EF4_TXQ_TYPE_OFFLOAD   1       /* flag */
+#define EF4_TXQ_TYPE_HIGHPRI   2       /* flag */
+#define EF4_TXQ_TYPES          4
+#define EF4_MAX_TX_QUEUES      (EF4_TXQ_TYPES * EF4_MAX_CHANNELS)
+
+/* Maximum possible MTU the driver supports */
+#define EF4_MAX_MTU (9 * 1024)
+
+/* Minimum MTU, from RFC791 (IP) */
+#define EF4_MIN_MTU 68
+
+/* Size of an RX scatter buffer.  Small enough to pack 2 into a 4K page,
+ * and should be a multiple of the cache line size.
+ */
+#define EF4_RX_USR_BUF_SIZE    (2048 - 256)
+
+/* If possible, we should ensure cache line alignment at start and end
+ * of every buffer.  Otherwise, we just need to ensure 4-byte
+ * alignment of the network header.
+ */
+#if NET_IP_ALIGN == 0
+#define EF4_RX_BUF_ALIGNMENT   L1_CACHE_BYTES
+#else
+#define EF4_RX_BUF_ALIGNMENT   4
+#endif
+
+struct ef4_self_tests;
+
+/**
+ * struct ef4_buffer - A general-purpose DMA buffer
+ * @addr: host base address of the buffer
+ * @dma_addr: DMA base address of the buffer
+ * @len: Buffer length, in bytes
+ *
+ * The NIC uses these buffers for its interrupt status registers and
+ * MAC stats dumps.
+ */
+struct ef4_buffer {
+       void *addr;
+       dma_addr_t dma_addr;
+       unsigned int len;
+};
+
+/**
+ * struct ef4_special_buffer - DMA buffer entered into buffer table
+ * @buf: Standard &struct ef4_buffer
+ * @index: Buffer index within controller;s buffer table
+ * @entries: Number of buffer table entries
+ *
+ * The NIC has a buffer table that maps buffers of size %EF4_BUF_SIZE.
+ * Event and descriptor rings are addressed via one or more buffer
+ * table entries (and so can be physically non-contiguous, although we
+ * currently do not take advantage of that).  On Falcon and Siena we
+ * have to take care of allocating and initialising the entries
+ * ourselves.  On later hardware this is managed by the firmware and
+ * @index and @entries are left as 0.
+ */
+struct ef4_special_buffer {
+       struct ef4_buffer buf;
+       unsigned int index;
+       unsigned int entries;
+};
+
+/**
+ * struct ef4_tx_buffer - buffer state for a TX descriptor
+ * @skb: When @flags & %EF4_TX_BUF_SKB, the associated socket buffer to be
+ *     freed when descriptor completes
+ * @option: When @flags & %EF4_TX_BUF_OPTION, a NIC-specific option descriptor.
+ * @dma_addr: DMA address of the fragment.
+ * @flags: Flags for allocation and DMA mapping type
+ * @len: Length of this fragment.
+ *     This field is zero when the queue slot is empty.
+ * @unmap_len: Length of this fragment to unmap
+ * @dma_offset: Offset of @dma_addr from the address of the backing DMA mapping.
+ * Only valid if @unmap_len != 0.
+ */
+struct ef4_tx_buffer {
+       const struct sk_buff *skb;
+       union {
+               ef4_qword_t option;
+               dma_addr_t dma_addr;
+       };
+       unsigned short flags;
+       unsigned short len;
+       unsigned short unmap_len;
+       unsigned short dma_offset;
+};
+#define EF4_TX_BUF_CONT                1       /* not last descriptor of packet */
+#define EF4_TX_BUF_SKB         2       /* buffer is last part of skb */
+#define EF4_TX_BUF_MAP_SINGLE  8       /* buffer was mapped with dma_map_single() */
+#define EF4_TX_BUF_OPTION      0x10    /* empty buffer for option descriptor */
+
+/**
+ * struct ef4_tx_queue - An Efx TX queue
+ *
+ * This is a ring buffer of TX fragments.
+ * Since the TX completion path always executes on the same
+ * CPU and the xmit path can operate on different CPUs,
+ * performance is increased by ensuring that the completion
+ * path and the xmit path operate on different cache lines.
+ * This is particularly important if the xmit path is always
+ * executing on one CPU which is different from the completion
+ * path.  There is also a cache line for members which are
+ * read but not written on the fast path.
+ *
+ * @efx: The associated Efx NIC
+ * @queue: DMA queue number
+ * @channel: The associated channel
+ * @core_txq: The networking core TX queue structure
+ * @buffer: The software buffer ring
+ * @cb_page: Array of pages of copy buffers.  Carved up according to
+ *     %EF4_TX_CB_ORDER into %EF4_TX_CB_SIZE-sized chunks.
+ * @txd: The hardware descriptor ring
+ * @ptr_mask: The size of the ring minus 1.
+ * @initialised: Has hardware queue been initialised?
+ * @tx_min_size: Minimum transmit size for this queue. Depends on HW.
+ * @read_count: Current read pointer.
+ *     This is the number of buffers that have been removed from both rings.
+ * @old_write_count: The value of @write_count when last checked.
+ *     This is here for performance reasons.  The xmit path will
+ *     only get the up-to-date value of @write_count if this
+ *     variable indicates that the queue is empty.  This is to
+ *     avoid cache-line ping-pong between the xmit path and the
+ *     completion path.
+ * @merge_events: Number of TX merged completion events
+ * @insert_count: Current insert pointer
+ *     This is the number of buffers that have been added to the
+ *     software ring.
+ * @write_count: Current write pointer
+ *     This is the number of buffers that have been added to the
+ *     hardware ring.
+ * @old_read_count: The value of read_count when last checked.
+ *     This is here for performance reasons.  The xmit path will
+ *     only get the up-to-date value of read_count if this
+ *     variable indicates that the queue is full.  This is to
+ *     avoid cache-line ping-pong between the xmit path and the
+ *     completion path.
+ * @pushes: Number of times the TX push feature has been used
+ * @xmit_more_available: Are any packets waiting to be pushed to the NIC
+ * @cb_packets: Number of times the TX copybreak feature has been used
+ * @empty_read_count: If the completion path has seen the queue as empty
+ *     and the transmission path has not yet checked this, the value of
+ *     @read_count bitwise-added to %EF4_EMPTY_COUNT_VALID; otherwise 0.
+ */
+struct ef4_tx_queue {
+       /* Members which don't change on the fast path */
+       struct ef4_nic *efx ____cacheline_aligned_in_smp;
+       unsigned queue;
+       struct ef4_channel *channel;
+       struct netdev_queue *core_txq;
+       struct ef4_tx_buffer *buffer;
+       struct ef4_buffer *cb_page;
+       struct ef4_special_buffer txd;
+       unsigned int ptr_mask;
+       bool initialised;
+       unsigned int tx_min_size;
+
+       /* Function pointers used in the fast path. */
+       int (*handle_tso)(struct ef4_tx_queue*, struct sk_buff*, bool *);
+
+       /* Members used mainly on the completion path */
+       unsigned int read_count ____cacheline_aligned_in_smp;
+       unsigned int old_write_count;
+       unsigned int merge_events;
+       unsigned int bytes_compl;
+       unsigned int pkts_compl;
+
+       /* Members used only on the xmit path */
+       unsigned int insert_count ____cacheline_aligned_in_smp;
+       unsigned int write_count;
+       unsigned int old_read_count;
+       unsigned int pushes;
+       bool xmit_more_available;
+       unsigned int cb_packets;
+       /* Statistics to supplement MAC stats */
+       unsigned long tx_packets;
+
+       /* Members shared between paths and sometimes updated */
+       unsigned int empty_read_count ____cacheline_aligned_in_smp;
+#define EF4_EMPTY_COUNT_VALID 0x80000000
+       atomic_t flush_outstanding;
+};
+
+#define EF4_TX_CB_ORDER        7
+#define EF4_TX_CB_SIZE (1 << EF4_TX_CB_ORDER) - NET_IP_ALIGN
+
+/**
+ * struct ef4_rx_buffer - An Efx RX data buffer
+ * @dma_addr: DMA base address of the buffer
+ * @page: The associated page buffer.
+ *     Will be %NULL if the buffer slot is currently free.
+ * @page_offset: If pending: offset in @page of DMA base address.
+ *     If completed: offset in @page of Ethernet header.
+ * @len: If pending: length for DMA descriptor.
+ *     If completed: received length, excluding hash prefix.
+ * @flags: Flags for buffer and packet state.  These are only set on the
+ *     first buffer of a scattered packet.
+ */
+struct ef4_rx_buffer {
+       dma_addr_t dma_addr;
+       struct page *page;
+       u16 page_offset;
+       u16 len;
+       u16 flags;
+};
+#define EF4_RX_BUF_LAST_IN_PAGE        0x0001
+#define EF4_RX_PKT_CSUMMED     0x0002
+#define EF4_RX_PKT_DISCARD     0x0004
+#define EF4_RX_PKT_TCP         0x0040
+#define EF4_RX_PKT_PREFIX_LEN  0x0080  /* length is in prefix only */
+
+/**
+ * struct ef4_rx_page_state - Page-based rx buffer state
+ *
+ * Inserted at the start of every page allocated for receive buffers.
+ * Used to facilitate sharing dma mappings between recycled rx buffers
+ * and those passed up to the kernel.
+ *
+ * @dma_addr: The dma address of this page.
+ */
+struct ef4_rx_page_state {
+       dma_addr_t dma_addr;
+
+       unsigned int __pad[0] ____cacheline_aligned;
+};
+
+/**
+ * struct ef4_rx_queue - An Efx RX queue
+ * @efx: The associated Efx NIC
+ * @core_index:  Index of network core RX queue.  Will be >= 0 iff this
+ *     is associated with a real RX queue.
+ * @buffer: The software buffer ring
+ * @rxd: The hardware descriptor ring
+ * @ptr_mask: The size of the ring minus 1.
+ * @refill_enabled: Enable refill whenever fill level is low
+ * @flush_pending: Set when a RX flush is pending. Has the same lifetime as
+ *     @rxq_flush_pending.
+ * @added_count: Number of buffers added to the receive queue.
+ * @notified_count: Number of buffers given to NIC (<= @added_count).
+ * @removed_count: Number of buffers removed from the receive queue.
+ * @scatter_n: Used by NIC specific receive code.
+ * @scatter_len: Used by NIC specific receive code.
+ * @page_ring: The ring to store DMA mapped pages for reuse.
+ * @page_add: Counter to calculate the write pointer for the recycle ring.
+ * @page_remove: Counter to calculate the read pointer for the recycle ring.
+ * @page_recycle_count: The number of pages that have been recycled.
+ * @page_recycle_failed: The number of pages that couldn't be recycled because
+ *      the kernel still held a reference to them.
+ * @page_recycle_full: The number of pages that were released because the
+ *      recycle ring was full.
+ * @page_ptr_mask: The number of pages in the RX recycle ring minus 1.
+ * @max_fill: RX descriptor maximum fill level (<= ring size)
+ * @fast_fill_trigger: RX descriptor fill level that will trigger a fast fill
+ *     (<= @max_fill)
+ * @min_fill: RX descriptor minimum non-zero fill level.
+ *     This records the minimum fill level observed when a ring
+ *     refill was triggered.
+ * @recycle_count: RX buffer recycle counter.
+ * @slow_fill: Timer used to defer ef4_nic_generate_fill_event().
+ */
+struct ef4_rx_queue {
+       struct ef4_nic *efx;
+       int core_index;
+       struct ef4_rx_buffer *buffer;
+       struct ef4_special_buffer rxd;
+       unsigned int ptr_mask;
+       bool refill_enabled;
+       bool flush_pending;
+
+       unsigned int added_count;
+       unsigned int notified_count;
+       unsigned int removed_count;
+       unsigned int scatter_n;
+       unsigned int scatter_len;
+       struct page **page_ring;
+       unsigned int page_add;
+       unsigned int page_remove;
+       unsigned int page_recycle_count;
+       unsigned int page_recycle_failed;
+       unsigned int page_recycle_full;
+       unsigned int page_ptr_mask;
+       unsigned int max_fill;
+       unsigned int fast_fill_trigger;
+       unsigned int min_fill;
+       unsigned int min_overfill;
+       unsigned int recycle_count;
+       struct timer_list slow_fill;
+       unsigned int slow_fill_count;
+       /* Statistics to supplement MAC stats */
+       unsigned long rx_packets;
+};
+
+/**
+ * struct ef4_channel - An Efx channel
+ *
+ * A channel comprises an event queue, at least one TX queue, at least
+ * one RX queue, and an associated tasklet for processing the event
+ * queue.
+ *
+ * @efx: Associated Efx NIC
+ * @channel: Channel instance number
+ * @type: Channel type definition
+ * @eventq_init: Event queue initialised flag
+ * @enabled: Channel enabled indicator
+ * @irq: IRQ number (MSI and MSI-X only)
+ * @irq_moderation_us: IRQ moderation value (in microseconds)
+ * @napi_dev: Net device used with NAPI
+ * @napi_str: NAPI control structure
+ * @state: state for NAPI vs busy polling
+ * @state_lock: lock protecting @state
+ * @eventq: Event queue buffer
+ * @eventq_mask: Event queue pointer mask
+ * @eventq_read_ptr: Event queue read pointer
+ * @event_test_cpu: Last CPU to handle interrupt or test event for this channel
+ * @irq_count: Number of IRQs since last adaptive moderation decision
+ * @irq_mod_score: IRQ moderation score
+ * @rps_flow_id: Flow IDs of filters allocated for accelerated RFS,
+ *      indexed by filter ID
+ * @n_rx_tobe_disc: Count of RX_TOBE_DISC errors
+ * @n_rx_ip_hdr_chksum_err: Count of RX IP header checksum errors
+ * @n_rx_tcp_udp_chksum_err: Count of RX TCP and UDP checksum errors
+ * @n_rx_mcast_mismatch: Count of unmatched multicast frames
+ * @n_rx_frm_trunc: Count of RX_FRM_TRUNC errors
+ * @n_rx_overlength: Count of RX_OVERLENGTH errors
+ * @n_skbuff_leaks: Count of skbuffs leaked due to RX overrun
+ * @n_rx_nodesc_trunc: Number of RX packets truncated and then dropped due to
+ *     lack of descriptors
+ * @n_rx_merge_events: Number of RX merged completion events
+ * @n_rx_merge_packets: Number of RX packets completed by merged events
+ * @rx_pkt_n_frags: Number of fragments in next packet to be delivered by
+ *     __ef4_rx_packet(), or zero if there is none
+ * @rx_pkt_index: Ring index of first buffer for next packet to be delivered
+ *     by __ef4_rx_packet(), if @rx_pkt_n_frags != 0
+ * @rx_queue: RX queue for this channel
+ * @tx_queue: TX queues for this channel
+ */
+struct ef4_channel {
+       struct ef4_nic *efx;
+       int channel;
+       const struct ef4_channel_type *type;
+       bool eventq_init;
+       bool enabled;
+       int irq;
+       unsigned int irq_moderation_us;
+       struct net_device *napi_dev;
+       struct napi_struct napi_str;
+#ifdef CONFIG_NET_RX_BUSY_POLL
+       unsigned long busy_poll_state;
+#endif
+       struct ef4_special_buffer eventq;
+       unsigned int eventq_mask;
+       unsigned int eventq_read_ptr;
+       int event_test_cpu;
+
+       unsigned int irq_count;
+       unsigned int irq_mod_score;
+#ifdef CONFIG_RFS_ACCEL
+       unsigned int rfs_filters_added;
+#define RPS_FLOW_ID_INVALID 0xFFFFFFFF
+       u32 *rps_flow_id;
+#endif
+
+       unsigned n_rx_tobe_disc;
+       unsigned n_rx_ip_hdr_chksum_err;
+       unsigned n_rx_tcp_udp_chksum_err;
+       unsigned n_rx_mcast_mismatch;
+       unsigned n_rx_frm_trunc;
+       unsigned n_rx_overlength;
+       unsigned n_skbuff_leaks;
+       unsigned int n_rx_nodesc_trunc;
+       unsigned int n_rx_merge_events;
+       unsigned int n_rx_merge_packets;
+
+       unsigned int rx_pkt_n_frags;
+       unsigned int rx_pkt_index;
+
+       struct ef4_rx_queue rx_queue;
+       struct ef4_tx_queue tx_queue[EF4_TXQ_TYPES];
+};
+
+#ifdef CONFIG_NET_RX_BUSY_POLL
+enum ef4_channel_busy_poll_state {
+       EF4_CHANNEL_STATE_IDLE = 0,
+       EF4_CHANNEL_STATE_NAPI = BIT(0),
+       EF4_CHANNEL_STATE_NAPI_REQ_BIT = 1,
+       EF4_CHANNEL_STATE_NAPI_REQ = BIT(1),
+       EF4_CHANNEL_STATE_POLL_BIT = 2,
+       EF4_CHANNEL_STATE_POLL = BIT(2),
+       EF4_CHANNEL_STATE_DISABLE_BIT = 3,
+};
+
+static inline void ef4_channel_busy_poll_init(struct ef4_channel *channel)
+{
+       WRITE_ONCE(channel->busy_poll_state, EF4_CHANNEL_STATE_IDLE);
+}
+
+/* Called from the device poll routine to get ownership of a channel. */
+static inline bool ef4_channel_lock_napi(struct ef4_channel *channel)
+{
+       unsigned long prev, old = READ_ONCE(channel->busy_poll_state);
+
+       while (1) {
+               switch (old) {
+               case EF4_CHANNEL_STATE_POLL:
+                       /* Ensure ef4_channel_try_lock_poll() wont starve us */
+                       set_bit(EF4_CHANNEL_STATE_NAPI_REQ_BIT,
+                               &channel->busy_poll_state);
+                       /* fallthrough */
+               case EF4_CHANNEL_STATE_POLL | EF4_CHANNEL_STATE_NAPI_REQ:
+                       return false;
+               default:
+                       break;
+               }
+               prev = cmpxchg(&channel->busy_poll_state, old,
+                              EF4_CHANNEL_STATE_NAPI);
+               if (unlikely(prev != old)) {
+                       /* This is likely to mean we've just entered polling
+                        * state. Go back round to set the REQ bit.
+                        */
+                       old = prev;
+                       continue;
+               }
+               return true;
+       }
+}
+
+static inline void ef4_channel_unlock_napi(struct ef4_channel *channel)
+{
+       /* Make sure write has completed from ef4_channel_lock_napi() */
+       smp_wmb();
+       WRITE_ONCE(channel->busy_poll_state, EF4_CHANNEL_STATE_IDLE);
+}
+
+/* Called from ef4_busy_poll(). */
+static inline bool ef4_channel_try_lock_poll(struct ef4_channel *channel)
+{
+       return cmpxchg(&channel->busy_poll_state, EF4_CHANNEL_STATE_IDLE,
+                       EF4_CHANNEL_STATE_POLL) == EF4_CHANNEL_STATE_IDLE;
+}
+
+static inline void ef4_channel_unlock_poll(struct ef4_channel *channel)
+{
+       clear_bit_unlock(EF4_CHANNEL_STATE_POLL_BIT, &channel->busy_poll_state);
+}
+
+static inline bool ef4_channel_busy_polling(struct ef4_channel *channel)
+{
+       return test_bit(EF4_CHANNEL_STATE_POLL_BIT, &channel->busy_poll_state);
+}
+
+static inline void ef4_channel_enable(struct ef4_channel *channel)
+{
+       clear_bit_unlock(EF4_CHANNEL_STATE_DISABLE_BIT,
+                        &channel->busy_poll_state);
+}
+
+/* Stop further polling or napi access.
+ * Returns false if the channel is currently busy polling.
+ */
+static inline bool ef4_channel_disable(struct ef4_channel *channel)
+{
+       set_bit(EF4_CHANNEL_STATE_DISABLE_BIT, &channel->busy_poll_state);
+       /* Implicit barrier in ef4_channel_busy_polling() */
+       return !ef4_channel_busy_polling(channel);
+}
+
+#else /* CONFIG_NET_RX_BUSY_POLL */
+
+static inline void ef4_channel_busy_poll_init(struct ef4_channel *channel)
+{
+}
+
+static inline bool ef4_channel_lock_napi(struct ef4_channel *channel)
+{
+       return true;
+}
+
+static inline void ef4_channel_unlock_napi(struct ef4_channel *channel)
+{
+}
+
+static inline bool ef4_channel_try_lock_poll(struct ef4_channel *channel)
+{
+       return false;
+}
+
+static inline void ef4_channel_unlock_poll(struct ef4_channel *channel)
+{
+}
+
+static inline bool ef4_channel_busy_polling(struct ef4_channel *channel)
+{
+       return false;
+}
+
+static inline void ef4_channel_enable(struct ef4_channel *channel)
+{
+}
+
+static inline bool ef4_channel_disable(struct ef4_channel *channel)
+{
+       return true;
+}
+#endif /* CONFIG_NET_RX_BUSY_POLL */
+
+/**
+ * struct ef4_msi_context - Context for each MSI
+ * @efx: The associated NIC
+ * @index: Index of the channel/IRQ
+ * @name: Name of the channel/IRQ
+ *
+ * Unlike &struct ef4_channel, this is never reallocated and is always
+ * safe for the IRQ handler to access.
+ */
+struct ef4_msi_context {
+       struct ef4_nic *efx;
+       unsigned int index;
+       char name[IFNAMSIZ + 6];
+};
+
+/**
+ * struct ef4_channel_type - distinguishes traffic and extra channels
+ * @handle_no_channel: Handle failure to allocate an extra channel
+ * @pre_probe: Set up extra state prior to initialisation
+ * @post_remove: Tear down extra state after finalisation, if allocated.
+ *     May be called on channels that have not been probed.
+ * @get_name: Generate the channel's name (used for its IRQ handler)
+ * @copy: Copy the channel state prior to reallocation.  May be %NULL if
+ *     reallocation is not supported.
+ * @receive_skb: Handle an skb ready to be passed to netif_receive_skb()
+ * @keep_eventq: Flag for whether event queue should be kept initialised
+ *     while the device is stopped
+ */
+struct ef4_channel_type {
+       void (*handle_no_channel)(struct ef4_nic *);
+       int (*pre_probe)(struct ef4_channel *);
+       void (*post_remove)(struct ef4_channel *);
+       void (*get_name)(struct ef4_channel *, char *buf, size_t len);
+       struct ef4_channel *(*copy)(const struct ef4_channel *);
+       bool (*receive_skb)(struct ef4_channel *, struct sk_buff *);
+       bool keep_eventq;
+};
+
+enum ef4_led_mode {
+       EF4_LED_OFF     = 0,
+       EF4_LED_ON      = 1,
+       EF4_LED_DEFAULT = 2
+};
+
+#define STRING_TABLE_LOOKUP(val, member) \
+       ((val) < member ## _max) ? member ## _names[val] : "(invalid)"
+
+extern const char *const ef4_loopback_mode_names[];
+extern const unsigned int ef4_loopback_mode_max;
+#define LOOPBACK_MODE(efx) \
+       STRING_TABLE_LOOKUP((efx)->loopback_mode, ef4_loopback_mode)
+
+extern const char *const ef4_reset_type_names[];
+extern const unsigned int ef4_reset_type_max;
+#define RESET_TYPE(type) \
+       STRING_TABLE_LOOKUP(type, ef4_reset_type)
+
+enum ef4_int_mode {
+       /* Be careful if altering to correct macro below */
+       EF4_INT_MODE_MSIX = 0,
+       EF4_INT_MODE_MSI = 1,
+       EF4_INT_MODE_LEGACY = 2,
+       EF4_INT_MODE_MAX        /* Insert any new items before this */
+};
+#define EF4_INT_MODE_USE_MSI(x) (((x)->interrupt_mode) <= EF4_INT_MODE_MSI)
+
+enum nic_state {
+       STATE_UNINIT = 0,       /* device being probed/removed or is frozen */
+       STATE_READY = 1,        /* hardware ready and netdev registered */
+       STATE_DISABLED = 2,     /* device disabled due to hardware errors */
+       STATE_RECOVERY = 3,     /* device recovering from PCI error */
+};
+
+/* Forward declaration */
+struct ef4_nic;
+
+/* Pseudo bit-mask flow control field */
+#define EF4_FC_RX      FLOW_CTRL_RX
+#define EF4_FC_TX      FLOW_CTRL_TX
+#define EF4_FC_AUTO    4
+
+/**
+ * struct ef4_link_state - Current state of the link
+ * @up: Link is up
+ * @fd: Link is full-duplex
+ * @fc: Actual flow control flags
+ * @speed: Link speed (Mbps)
+ */
+struct ef4_link_state {
+       bool up;
+       bool fd;
+       u8 fc;
+       unsigned int speed;
+};
+
+static inline bool ef4_link_state_equal(const struct ef4_link_state *left,
+                                       const struct ef4_link_state *right)
+{
+       return left->up == right->up && left->fd == right->fd &&
+               left->fc == right->fc && left->speed == right->speed;
+}
+
+/**
+ * struct ef4_phy_operations - Efx PHY operations table
+ * @probe: Probe PHY and initialise efx->mdio.mode_support, efx->mdio.mmds,
+ *     efx->loopback_modes.
+ * @init: Initialise PHY
+ * @fini: Shut down PHY
+ * @reconfigure: Reconfigure PHY (e.g. for new link parameters)
+ * @poll: Update @link_state and report whether it changed.
+ *     Serialised by the mac_lock.
+ * @get_settings: Get ethtool settings. Serialised by the mac_lock.
+ * @set_settings: Set ethtool settings. Serialised by the mac_lock.
+ * @set_npage_adv: Set abilities advertised in (Extended) Next Page
+ *     (only needed where AN bit is set in mmds)
+ * @test_alive: Test that PHY is 'alive' (online)
+ * @test_name: Get the name of a PHY-specific test/result
+ * @run_tests: Run tests and record results as appropriate (offline).
+ *     Flags are the ethtool tests flags.
+ */
+struct ef4_phy_operations {
+       int (*probe) (struct ef4_nic *efx);
+       int (*init) (struct ef4_nic *efx);
+       void (*fini) (struct ef4_nic *efx);
+       void (*remove) (struct ef4_nic *efx);
+       int (*reconfigure) (struct ef4_nic *efx);
+       bool (*poll) (struct ef4_nic *efx);
+       void (*get_settings) (struct ef4_nic *efx,
+                             struct ethtool_cmd *ecmd);
+       int (*set_settings) (struct ef4_nic *efx,
+                            struct ethtool_cmd *ecmd);
+       void (*set_npage_adv) (struct ef4_nic *efx, u32);
+       int (*test_alive) (struct ef4_nic *efx);
+       const char *(*test_name) (struct ef4_nic *efx, unsigned int index);
+       int (*run_tests) (struct ef4_nic *efx, int *results, unsigned flags);
+       int (*get_module_eeprom) (struct ef4_nic *efx,
+                              struct ethtool_eeprom *ee,
+                              u8 *data);
+       int (*get_module_info) (struct ef4_nic *efx,
+                               struct ethtool_modinfo *modinfo);
+};
+
+/**
+ * enum ef4_phy_mode - PHY operating mode flags
+ * @PHY_MODE_NORMAL: on and should pass traffic
+ * @PHY_MODE_TX_DISABLED: on with TX disabled
+ * @PHY_MODE_LOW_POWER: set to low power through MDIO
+ * @PHY_MODE_OFF: switched off through external control
+ * @PHY_MODE_SPECIAL: on but will not pass traffic
+ */
+enum ef4_phy_mode {
+       PHY_MODE_NORMAL         = 0,
+       PHY_MODE_TX_DISABLED    = 1,
+       PHY_MODE_LOW_POWER      = 2,
+       PHY_MODE_OFF            = 4,
+       PHY_MODE_SPECIAL        = 8,
+};
+
+static inline bool ef4_phy_mode_disabled(enum ef4_phy_mode mode)
+{
+       return !!(mode & ~PHY_MODE_TX_DISABLED);
+}
+
+/**
+ * struct ef4_hw_stat_desc - Description of a hardware statistic
+ * @name: Name of the statistic as visible through ethtool, or %NULL if
+ *     it should not be exposed
+ * @dma_width: Width in bits (0 for non-DMA statistics)
+ * @offset: Offset within stats (ignored for non-DMA statistics)
+ */
+struct ef4_hw_stat_desc {
+       const char *name;
+       u16 dma_width;
+       u16 offset;
+};
+
+/* Number of bits used in a multicast filter hash address */
+#define EF4_MCAST_HASH_BITS 8
+
+/* Number of (single-bit) entries in a multicast filter hash */
+#define EF4_MCAST_HASH_ENTRIES (1 << EF4_MCAST_HASH_BITS)
+
+/* An Efx multicast filter hash */
+union ef4_multicast_hash {
+       u8 byte[EF4_MCAST_HASH_ENTRIES / 8];
+       ef4_oword_t oword[EF4_MCAST_HASH_ENTRIES / sizeof(ef4_oword_t) / 8];
+};
+
+/**
+ * struct ef4_nic - an Efx NIC
+ * @name: Device name (net device name or bus id before net device registered)
+ * @pci_dev: The PCI device
+ * @node: List node for maintaning primary/secondary function lists
+ * @primary: &struct ef4_nic instance for the primary function of this
+ *     controller.  May be the same structure, and may be %NULL if no
+ *     primary function is bound.  Serialised by rtnl_lock.
+ * @secondary_list: List of &struct ef4_nic instances for the secondary PCI
+ *     functions of the controller, if this is for the primary function.
+ *     Serialised by rtnl_lock.
+ * @type: Controller type attributes
+ * @legacy_irq: IRQ number
+ * @workqueue: Workqueue for port reconfigures and the HW monitor.
+ *     Work items do not hold and must not acquire RTNL.
+ * @workqueue_name: Name of workqueue
+ * @reset_work: Scheduled reset workitem
+ * @membase_phys: Memory BAR value as physical address
+ * @membase: Memory BAR value
+ * @interrupt_mode: Interrupt mode
+ * @timer_quantum_ns: Interrupt timer quantum, in nanoseconds
+ * @timer_max_ns: Interrupt timer maximum value, in nanoseconds
+ * @irq_rx_adaptive: Adaptive IRQ moderation enabled for RX event queues
+ * @irq_rx_mod_step_us: Step size for IRQ moderation for RX event queues
+ * @irq_rx_moderation_us: IRQ moderation time for RX event queues
+ * @msg_enable: Log message enable flags
+ * @state: Device state number (%STATE_*). Serialised by the rtnl_lock.
+ * @reset_pending: Bitmask for pending resets
+ * @tx_queue: TX DMA queues
+ * @rx_queue: RX DMA queues
+ * @channel: Channels
+ * @msi_context: Context for each MSI
+ * @extra_channel_types: Types of extra (non-traffic) channels that
+ *     should be allocated for this NIC
+ * @rxq_entries: Size of receive queues requested by user.
+ * @txq_entries: Size of transmit queues requested by user.
+ * @txq_stop_thresh: TX queue fill level at or above which we stop it.
+ * @txq_wake_thresh: TX queue fill level at or below which we wake it.
+ * @tx_dc_base: Base qword address in SRAM of TX queue descriptor caches
+ * @rx_dc_base: Base qword address in SRAM of RX queue descriptor caches
+ * @sram_lim_qw: Qword address limit of SRAM
+ * @next_buffer_table: First available buffer table id
+ * @n_channels: Number of channels in use
+ * @n_rx_channels: Number of channels used for RX (= number of RX queues)
+ * @n_tx_channels: Number of channels used for TX
+ * @rx_ip_align: RX DMA address offset to have IP header aligned in
+ *     in accordance with NET_IP_ALIGN
+ * @rx_dma_len: Current maximum RX DMA length
+ * @rx_buffer_order: Order (log2) of number of pages for each RX buffer
+ * @rx_buffer_truesize: Amortised allocation size of an RX buffer,
+ *     for use in sk_buff::truesize
+ * @rx_prefix_size: Size of RX prefix before packet data
+ * @rx_packet_hash_offset: Offset of RX flow hash from start of packet data
+ *     (valid only if @rx_prefix_size != 0; always negative)
+ * @rx_packet_len_offset: Offset of RX packet length from start of packet data
+ *     (valid only for NICs that set %EF4_RX_PKT_PREFIX_LEN; always negative)
+ * @rx_packet_ts_offset: Offset of timestamp from start of packet data
+ *     (valid only if channel->sync_timestamps_enabled; always negative)
+ * @rx_hash_key: Toeplitz hash key for RSS
+ * @rx_indir_table: Indirection table for RSS
+ * @rx_scatter: Scatter mode enabled for receives
+ * @int_error_count: Number of internal errors seen recently
+ * @int_error_expire: Time at which error count will be expired
+ * @irq_soft_enabled: Are IRQs soft-enabled? If not, IRQ handler will
+ *     acknowledge but do nothing else.
+ * @irq_status: Interrupt status buffer
+ * @irq_zero_count: Number of legacy IRQs seen with queue flags == 0
+ * @irq_level: IRQ level/index for IRQs not triggered by an event queue
+ * @selftest_work: Work item for asynchronous self-test
+ * @mtd_list: List of MTDs attached to the NIC
+ * @nic_data: Hardware dependent state
+ * @mac_lock: MAC access lock. Protects @port_enabled, @phy_mode,
+ *     ef4_monitor() and ef4_reconfigure_port()
+ * @port_enabled: Port enabled indicator.
+ *     Serialises ef4_stop_all(), ef4_start_all(), ef4_monitor() and
+ *     ef4_mac_work() with kernel interfaces. Safe to read under any
+ *     one of the rtnl_lock, mac_lock, or netif_tx_lock, but all three must
+ *     be held to modify it.
+ * @port_initialized: Port initialized?
+ * @net_dev: Operating system network device. Consider holding the rtnl lock
+ * @fixed_features: Features which cannot be turned off
+ * @stats_buffer: DMA buffer for statistics
+ * @phy_type: PHY type
+ * @phy_op: PHY interface
+ * @phy_data: PHY private data (including PHY-specific stats)
+ * @mdio: PHY MDIO interface
+ * @phy_mode: PHY operating mode. Serialised by @mac_lock.
+ * @link_advertising: Autonegotiation advertising flags
+ * @link_state: Current state of the link
+ * @n_link_state_changes: Number of times the link has changed state
+ * @unicast_filter: Flag for Falcon-arch simple unicast filter.
+ *     Protected by @mac_lock.
+ * @multicast_hash: Multicast hash table for Falcon-arch.
+ *     Protected by @mac_lock.
+ * @wanted_fc: Wanted flow control flags
+ * @fc_disable: When non-zero flow control is disabled. Typically used to
+ *     ensure that network back pressure doesn't delay dma queue flushes.
+ *     Serialised by the rtnl lock.
+ * @mac_work: Work item for changing MAC promiscuity and multicast hash
+ * @loopback_mode: Loopback status
+ * @loopback_modes: Supported loopback mode bitmask
+ * @loopback_selftest: Offline self-test private state
+ * @filter_sem: Filter table rw_semaphore, for freeing the table
+ * @filter_lock: Filter table lock, for mere content changes
+ * @filter_state: Architecture-dependent filter table state
+ * @rps_expire_channel: Next channel to check for expiry
+ * @rps_expire_index: Next index to check for expiry in
+ *     @rps_expire_channel's @rps_flow_id
+ * @active_queues: Count of RX and TX queues that haven't been flushed and drained.
+ * @rxq_flush_pending: Count of number of receive queues that need to be flushed.
+ *     Decremented when the ef4_flush_rx_queue() is called.
+ * @rxq_flush_outstanding: Count of number of RX flushes started but not yet
+ *     completed (either success or failure). Not used when MCDI is used to
+ *     flush receive queues.
+ * @flush_wq: wait queue used by ef4_nic_flush_queues() to wait for flush completions.
+ * @vpd_sn: Serial number read from VPD
+ * @monitor_work: Hardware monitor workitem
+ * @biu_lock: BIU (bus interface unit) lock
+ * @last_irq_cpu: Last CPU to handle a possible test interrupt.  This
+ *     field is used by ef4_test_interrupts() to verify that an
+ *     interrupt has occurred.
+ * @stats_lock: Statistics update lock. Must be held when calling
+ *     ef4_nic_type::{update,start,stop}_stats.
+ * @n_rx_noskb_drops: Count of RX packets dropped due to failure to allocate an skb
+ *
+ * This is stored in the private area of the &struct net_device.
+ */
+struct ef4_nic {
+       /* The following fields should be written very rarely */
+
+       char name[IFNAMSIZ];
+       struct list_head node;
+       struct ef4_nic *primary;
+       struct list_head secondary_list;
+       struct pci_dev *pci_dev;
+       unsigned int port_num;
+       const struct ef4_nic_type *type;
+       int legacy_irq;
+       bool eeh_disabled_legacy_irq;
+       struct workqueue_struct *workqueue;
+       char workqueue_name[16];
+       struct work_struct reset_work;
+       resource_size_t membase_phys;
+       void __iomem *membase;
+
+       enum ef4_int_mode interrupt_mode;
+       unsigned int timer_quantum_ns;
+       unsigned int timer_max_ns;
+       bool irq_rx_adaptive;
+       unsigned int irq_mod_step_us;
+       unsigned int irq_rx_moderation_us;
+       u32 msg_enable;
+
+       enum nic_state state;
+       unsigned long reset_pending;
+
+       struct ef4_channel *channel[EF4_MAX_CHANNELS];
+       struct ef4_msi_context msi_context[EF4_MAX_CHANNELS];
+       const struct ef4_channel_type *
+       extra_channel_type[EF4_MAX_EXTRA_CHANNELS];
+
+       unsigned rxq_entries;
+       unsigned txq_entries;
+       unsigned int txq_stop_thresh;
+       unsigned int txq_wake_thresh;
+
+       unsigned tx_dc_base;
+       unsigned rx_dc_base;
+       unsigned sram_lim_qw;
+       unsigned next_buffer_table;
+
+       unsigned int max_channels;
+       unsigned int max_tx_channels;
+       unsigned n_channels;
+       unsigned n_rx_channels;
+       unsigned rss_spread;
+       unsigned tx_channel_offset;
+       unsigned n_tx_channels;
+       unsigned int rx_ip_align;
+       unsigned int rx_dma_len;
+       unsigned int rx_buffer_order;
+       unsigned int rx_buffer_truesize;
+       unsigned int rx_page_buf_step;
+       unsigned int rx_bufs_per_page;
+       unsigned int rx_pages_per_batch;
+       unsigned int rx_prefix_size;
+       int rx_packet_hash_offset;
+       int rx_packet_len_offset;
+       int rx_packet_ts_offset;
+       u8 rx_hash_key[40];
+       u32 rx_indir_table[128];
+       bool rx_scatter;
+
+       unsigned int_error_count;
+       unsigned long int_error_expire;
+
+       bool irq_soft_enabled;
+       struct ef4_buffer irq_status;
+       unsigned irq_zero_count;
+       unsigned irq_level;
+       struct delayed_work selftest_work;
+
+#ifdef CONFIG_SFC_FALCON_MTD
+       struct list_head mtd_list;
+#endif
+
+       void *nic_data;
+
+       struct mutex mac_lock;
+       struct work_struct mac_work;
+       bool port_enabled;
+
+       bool mc_bist_for_other_fn;
+       bool port_initialized;
+       struct net_device *net_dev;
+
+       netdev_features_t fixed_features;
+
+       struct ef4_buffer stats_buffer;
+       u64 rx_nodesc_drops_total;
+       u64 rx_nodesc_drops_while_down;
+       bool rx_nodesc_drops_prev_state;
+
+       unsigned int phy_type;
+       const struct ef4_phy_operations *phy_op;
+       void *phy_data;
+       struct mdio_if_info mdio;
+       enum ef4_phy_mode phy_mode;
+
+       u32 link_advertising;
+       struct ef4_link_state link_state;
+       unsigned int n_link_state_changes;
+
+       bool unicast_filter;
+       union ef4_multicast_hash multicast_hash;
+       u8 wanted_fc;
+       unsigned fc_disable;
+
+       atomic_t rx_reset;
+       enum ef4_loopback_mode loopback_mode;
+       u64 loopback_modes;
+
+       void *loopback_selftest;
+
+       struct rw_semaphore filter_sem;
+       spinlock_t filter_lock;
+       void *filter_state;
+#ifdef CONFIG_RFS_ACCEL
+       unsigned int rps_expire_channel;
+       unsigned int rps_expire_index;
+#endif
+
+       atomic_t active_queues;
+       atomic_t rxq_flush_pending;
+       atomic_t rxq_flush_outstanding;
+       wait_queue_head_t flush_wq;
+
+       char *vpd_sn;
+
+       /* The following fields may be written more often */
+
+       struct delayed_work monitor_work ____cacheline_aligned_in_smp;
+       spinlock_t biu_lock;
+       int last_irq_cpu;
+       spinlock_t stats_lock;
+       atomic_t n_rx_noskb_drops;
+};
+
+static inline int ef4_dev_registered(struct ef4_nic *efx)
+{
+       return efx->net_dev->reg_state == NETREG_REGISTERED;
+}
+
+static inline unsigned int ef4_port_num(struct ef4_nic *efx)
+{
+       return efx->port_num;
+}
+
+struct ef4_mtd_partition {
+       struct list_head node;
+       struct mtd_info mtd;
+       const char *dev_type_name;
+       const char *type_name;
+       char name[IFNAMSIZ + 20];
+};
+
+/**
+ * struct ef4_nic_type - Efx device type definition
+ * @mem_bar: Get the memory BAR
+ * @mem_map_size: Get memory BAR mapped size
+ * @probe: Probe the controller
+ * @remove: Free resources allocated by probe()
+ * @init: Initialise the controller
+ * @dimension_resources: Dimension controller resources (buffer table,
+ *     and VIs once the available interrupt resources are clear)
+ * @fini: Shut down the controller
+ * @monitor: Periodic function for polling link state and hardware monitor
+ * @map_reset_reason: Map ethtool reset reason to a reset method
+ * @map_reset_flags: Map ethtool reset flags to a reset method, if possible
+ * @reset: Reset the controller hardware and possibly the PHY.  This will
+ *     be called while the controller is uninitialised.
+ * @probe_port: Probe the MAC and PHY
+ * @remove_port: Free resources allocated by probe_port()
+ * @handle_global_event: Handle a "global" event (may be %NULL)
+ * @fini_dmaq: Flush and finalise DMA queues (RX and TX queues)
+ * @prepare_flush: Prepare the hardware for flushing the DMA queues
+ *     (for Falcon architecture)
+ * @finish_flush: Clean up after flushing the DMA queues (for Falcon
+ *     architecture)
+ * @prepare_flr: Prepare for an FLR
+ * @finish_flr: Clean up after an FLR
+ * @describe_stats: Describe statistics for ethtool
+ * @update_stats: Update statistics not provided by event handling.
+ *     Either argument may be %NULL.
+ * @start_stats: Start the regular fetching of statistics
+ * @pull_stats: Pull stats from the NIC and wait until they arrive.
+ * @stop_stats: Stop the regular fetching of statistics
+ * @set_id_led: Set state of identifying LED or revert to automatic function
+ * @push_irq_moderation: Apply interrupt moderation value
+ * @reconfigure_port: Push loopback/power/txdis changes to the MAC and PHY
+ * @prepare_enable_fc_tx: Prepare MAC to enable pause frame TX (may be %NULL)
+ * @reconfigure_mac: Push MAC address, MTU, flow control and filter settings
+ *     to the hardware.  Serialised by the mac_lock.
+ * @check_mac_fault: Check MAC fault state. True if fault present.
+ * @get_wol: Get WoL configuration from driver state
+ * @set_wol: Push WoL configuration to the NIC
+ * @resume_wol: Synchronise WoL state between driver and MC (e.g. after resume)
+ * @test_chip: Test registers.  May use ef4_farch_test_registers(), and is
+ *     expected to reset the NIC.
+ * @test_nvram: Test validity of NVRAM contents
+ * @irq_enable_master: Enable IRQs on the NIC.  Each event queue must
+ *     be separately enabled after this.
+ * @irq_test_generate: Generate a test IRQ
+ * @irq_disable_non_ev: Disable non-event IRQs on the NIC.  Each event
+ *     queue must be separately disabled before this.
+ * @irq_handle_msi: Handle MSI for a channel.  The @dev_id argument is
+ *     a pointer to the &struct ef4_msi_context for the channel.
+ * @irq_handle_legacy: Handle legacy interrupt.  The @dev_id argument
+ *     is a pointer to the &struct ef4_nic.
+ * @tx_probe: Allocate resources for TX queue
+ * @tx_init: Initialise TX queue on the NIC
+ * @tx_remove: Free resources for TX queue
+ * @tx_write: Write TX descriptors and doorbell
+ * @rx_push_rss_config: Write RSS hash key and indirection table to the NIC
+ * @rx_probe: Allocate resources for RX queue
+ * @rx_init: Initialise RX queue on the NIC
+ * @rx_remove: Free resources for RX queue
+ * @rx_write: Write RX descriptors and doorbell
+ * @rx_defer_refill: Generate a refill reminder event
+ * @ev_probe: Allocate resources for event queue
+ * @ev_init: Initialise event queue on the NIC
+ * @ev_fini: Deinitialise event queue on the NIC
+ * @ev_remove: Free resources for event queue
+ * @ev_process: Process events for a queue, up to the given NAPI quota
+ * @ev_read_ack: Acknowledge read events on a queue, rearming its IRQ
+ * @ev_test_generate: Generate a test event
+ * @filter_table_probe: Probe filter capabilities and set up filter software state
+ * @filter_table_restore: Restore filters removed from hardware
+ * @filter_table_remove: Remove filters from hardware and tear down software state
+ * @filter_update_rx_scatter: Update filters after change to rx scatter setting
+ * @filter_insert: add or replace a filter
+ * @filter_remove_safe: remove a filter by ID, carefully
+ * @filter_get_safe: retrieve a filter by ID, carefully
+ * @filter_clear_rx: Remove all RX filters whose priority is less than or
+ *     equal to the given priority and is not %EF4_FILTER_PRI_AUTO
+ * @filter_count_rx_used: Get the number of filters in use at a given priority
+ * @filter_get_rx_id_limit: Get maximum value of a filter id, plus 1
+ * @filter_get_rx_ids: Get list of RX filters at a given priority
+ * @filter_rfs_insert: Add or replace a filter for RFS.  This must be
+ *     atomic.  The hardware change may be asynchronous but should
+ *     not be delayed for long.  It may fail if this can't be done
+ *     atomically.
+ * @filter_rfs_expire_one: Consider expiring a filter inserted for RFS.
+ *     This must check whether the specified table entry is used by RFS
+ *     and that rps_may_expire_flow() returns true for it.
+ * @mtd_probe: Probe and add MTD partitions associated with this net device,
+ *      using ef4_mtd_add()
+ * @mtd_rename: Set an MTD partition name using the net device name
+ * @mtd_read: Read from an MTD partition
+ * @mtd_erase: Erase part of an MTD partition
+ * @mtd_write: Write to an MTD partition
+ * @mtd_sync: Wait for write-back to complete on MTD partition.  This
+ *     also notifies the driver that a writer has finished using this
+ *     partition.
+ * @set_mac_address: Set the MAC address of the device
+ * @revision: Hardware architecture revision
+ * @txd_ptr_tbl_base: TX descriptor ring base address
+ * @rxd_ptr_tbl_base: RX descriptor ring base address
+ * @buf_tbl_base: Buffer table base address
+ * @evq_ptr_tbl_base: Event queue pointer table base address
+ * @evq_rptr_tbl_base: Event queue read-pointer table base address
+ * @max_dma_mask: Maximum possible DMA mask
+ * @rx_prefix_size: Size of RX prefix before packet data
+ * @rx_hash_offset: Offset of RX flow hash within prefix
+ * @rx_ts_offset: Offset of timestamp within prefix
+ * @rx_buffer_padding: Size of padding at end of RX packet
+ * @can_rx_scatter: NIC is able to scatter packets to multiple buffers
+ * @always_rx_scatter: NIC will always scatter packets to multiple buffers
+ * @max_interrupt_mode: Highest capability interrupt mode supported
+ *     from &enum ef4_init_mode.
+ * @timer_period_max: Maximum period of interrupt timer (in ticks)
+ * @offload_features: net_device feature flags for protocol offload
+ *     features implemented in hardware
+ */
+struct ef4_nic_type {
+       unsigned int mem_bar;
+       unsigned int (*mem_map_size)(struct ef4_nic *efx);
+       int (*probe)(struct ef4_nic *efx);
+       void (*remove)(struct ef4_nic *efx);
+       int (*init)(struct ef4_nic *efx);
+       int (*dimension_resources)(struct ef4_nic *efx);
+       void (*fini)(struct ef4_nic *efx);
+       void (*monitor)(struct ef4_nic *efx);
+       enum reset_type (*map_reset_reason)(enum reset_type reason);
+       int (*map_reset_flags)(u32 *flags);
+       int (*reset)(struct ef4_nic *efx, enum reset_type method);
+       int (*probe_port)(struct ef4_nic *efx);
+       void (*remove_port)(struct ef4_nic *efx);
+       bool (*handle_global_event)(struct ef4_channel *channel, ef4_qword_t *);
+       int (*fini_dmaq)(struct ef4_nic *efx);
+       void (*prepare_flush)(struct ef4_nic *efx);
+       void (*finish_flush)(struct ef4_nic *efx);
+       void (*prepare_flr)(struct ef4_nic *efx);
+       void (*finish_flr)(struct ef4_nic *efx);
+       size_t (*describe_stats)(struct ef4_nic *efx, u8 *names);
+       size_t (*update_stats)(struct ef4_nic *efx, u64 *full_stats,
+                              struct rtnl_link_stats64 *core_stats);
+       void (*start_stats)(struct ef4_nic *efx);
+       void (*pull_stats)(struct ef4_nic *efx);
+       void (*stop_stats)(struct ef4_nic *efx);
+       void (*set_id_led)(struct ef4_nic *efx, enum ef4_led_mode mode);
+       void (*push_irq_moderation)(struct ef4_channel *channel);
+       int (*reconfigure_port)(struct ef4_nic *efx);
+       void (*prepare_enable_fc_tx)(struct ef4_nic *efx);
+       int (*reconfigure_mac)(struct ef4_nic *efx);
+       bool (*check_mac_fault)(struct ef4_nic *efx);
+       void (*get_wol)(struct ef4_nic *efx, struct ethtool_wolinfo *wol);
+       int (*set_wol)(struct ef4_nic *efx, u32 type);
+       void (*resume_wol)(struct ef4_nic *efx);
+       int (*test_chip)(struct ef4_nic *efx, struct ef4_self_tests *tests);
+       int (*test_nvram)(struct ef4_nic *efx);
+       void (*irq_enable_master)(struct ef4_nic *efx);
+       int (*irq_test_generate)(struct ef4_nic *efx);
+       void (*irq_disable_non_ev)(struct ef4_nic *efx);
+       irqreturn_t (*irq_handle_msi)(int irq, void *dev_id);
+       irqreturn_t (*irq_handle_legacy)(int irq, void *dev_id);
+       int (*tx_probe)(struct ef4_tx_queue *tx_queue);
+       void (*tx_init)(struct ef4_tx_queue *tx_queue);
+       void (*tx_remove)(struct ef4_tx_queue *tx_queue);
+       void (*tx_write)(struct ef4_tx_queue *tx_queue);
+       unsigned int (*tx_limit_len)(struct ef4_tx_queue *tx_queue,
+                                    dma_addr_t dma_addr, unsigned int len);
+       int (*rx_push_rss_config)(struct ef4_nic *efx, bool user,
+                                 const u32 *rx_indir_table);
+       int (*rx_probe)(struct ef4_rx_queue *rx_queue);
+       void (*rx_init)(struct ef4_rx_queue *rx_queue);
+       void (*rx_remove)(struct ef4_rx_queue *rx_queue);
+       void (*rx_write)(struct ef4_rx_queue *rx_queue);
+       void (*rx_defer_refill)(struct ef4_rx_queue *rx_queue);
+       int (*ev_probe)(struct ef4_channel *channel);
+       int (*ev_init)(struct ef4_channel *channel);
+       void (*ev_fini)(struct ef4_channel *channel);
+       void (*ev_remove)(struct ef4_channel *channel);
+       int (*ev_process)(struct ef4_channel *channel, int quota);
+       void (*ev_read_ack)(struct ef4_channel *channel);
+       void (*ev_test_generate)(struct ef4_channel *channel);
+       int (*filter_table_probe)(struct ef4_nic *efx);
+       void (*filter_table_restore)(struct ef4_nic *efx);
+       void (*filter_table_remove)(struct ef4_nic *efx);
+       void (*filter_update_rx_scatter)(struct ef4_nic *efx);
+       s32 (*filter_insert)(struct ef4_nic *efx,
+                            struct ef4_filter_spec *spec, bool replace);
+       int (*filter_remove_safe)(struct ef4_nic *efx,
+                                 enum ef4_filter_priority priority,
+                                 u32 filter_id);
+       int (*filter_get_safe)(struct ef4_nic *efx,
+                              enum ef4_filter_priority priority,
+                              u32 filter_id, struct ef4_filter_spec *);
+       int (*filter_clear_rx)(struct ef4_nic *efx,
+                              enum ef4_filter_priority priority);
+       u32 (*filter_count_rx_used)(struct ef4_nic *efx,
+                                   enum ef4_filter_priority priority);
+       u32 (*filter_get_rx_id_limit)(struct ef4_nic *efx);
+       s32 (*filter_get_rx_ids)(struct ef4_nic *efx,
+                                enum ef4_filter_priority priority,
+                                u32 *buf, u32 size);
+#ifdef CONFIG_RFS_ACCEL
+       s32 (*filter_rfs_insert)(struct ef4_nic *efx,
+                                struct ef4_filter_spec *spec);
+       bool (*filter_rfs_expire_one)(struct ef4_nic *efx, u32 flow_id,
+                                     unsigned int index);
+#endif
+#ifdef CONFIG_SFC_FALCON_MTD
+       int (*mtd_probe)(struct ef4_nic *efx);
+       void (*mtd_rename)(struct ef4_mtd_partition *part);
+       int (*mtd_read)(struct mtd_info *mtd, loff_t start, size_t len,
+                       size_t *retlen, u8 *buffer);
+       int (*mtd_erase)(struct mtd_info *mtd, loff_t start, size_t len);
+       int (*mtd_write)(struct mtd_info *mtd, loff_t start, size_t len,
+                        size_t *retlen, const u8 *buffer);
+       int (*mtd_sync)(struct mtd_info *mtd);
+#endif
+       int (*get_mac_address)(struct ef4_nic *efx, unsigned char *perm_addr);
+       int (*set_mac_address)(struct ef4_nic *efx);
+
+       int revision;
+       unsigned int txd_ptr_tbl_base;
+       unsigned int rxd_ptr_tbl_base;
+       unsigned int buf_tbl_base;
+       unsigned int evq_ptr_tbl_base;
+       unsigned int evq_rptr_tbl_base;
+       u64 max_dma_mask;
+       unsigned int rx_prefix_size;
+       unsigned int rx_hash_offset;
+       unsigned int rx_ts_offset;
+       unsigned int rx_buffer_padding;
+       bool can_rx_scatter;
+       bool always_rx_scatter;
+       unsigned int max_interrupt_mode;
+       unsigned int timer_period_max;
+       netdev_features_t offload_features;
+       unsigned int max_rx_ip_filters;
+};
+
+/**************************************************************************
+ *
+ * Prototypes and inline functions
+ *
+ *************************************************************************/
+
+static inline struct ef4_channel *
+ef4_get_channel(struct ef4_nic *efx, unsigned index)
+{
+       EF4_BUG_ON_PARANOID(index >= efx->n_channels);
+       return efx->channel[index];
+}
+
+/* Iterate over all used channels */
+#define ef4_for_each_channel(_channel, _efx)                           \
+       for (_channel = (_efx)->channel[0];                             \
+            _channel;                                                  \
+            _channel = (_channel->channel + 1 < (_efx)->n_channels) ?  \
+                    (_efx)->channel[_channel->channel + 1] : NULL)
+
+/* Iterate over all used channels in reverse */
+#define ef4_for_each_channel_rev(_channel, _efx)                       \
+       for (_channel = (_efx)->channel[(_efx)->n_channels - 1];        \
+            _channel;                                                  \
+            _channel = _channel->channel ?                             \
+                    (_efx)->channel[_channel->channel - 1] : NULL)
+
+static inline struct ef4_tx_queue *
+ef4_get_tx_queue(struct ef4_nic *efx, unsigned index, unsigned type)
+{
+       EF4_BUG_ON_PARANOID(index >= efx->n_tx_channels ||
+                           type >= EF4_TXQ_TYPES);
+       return &efx->channel[efx->tx_channel_offset + index]->tx_queue[type];
+}
+
+static inline bool ef4_channel_has_tx_queues(struct ef4_channel *channel)
+{
+       return channel->channel - channel->efx->tx_channel_offset <
+               channel->efx->n_tx_channels;
+}
+
+static inline struct ef4_tx_queue *
+ef4_channel_get_tx_queue(struct ef4_channel *channel, unsigned type)
+{
+       EF4_BUG_ON_PARANOID(!ef4_channel_has_tx_queues(channel) ||
+                           type >= EF4_TXQ_TYPES);
+       return &channel->tx_queue[type];
+}
+
+static inline bool ef4_tx_queue_used(struct ef4_tx_queue *tx_queue)
+{
+       return !(tx_queue->efx->net_dev->num_tc < 2 &&
+                tx_queue->queue & EF4_TXQ_TYPE_HIGHPRI);
+}
+
+/* Iterate over all TX queues belonging to a channel */
+#define ef4_for_each_channel_tx_queue(_tx_queue, _channel)             \
+       if (!ef4_channel_has_tx_queues(_channel))                       \
+               ;                                                       \
+       else                                                            \
+               for (_tx_queue = (_channel)->tx_queue;                  \
+                    _tx_queue < (_channel)->tx_queue + EF4_TXQ_TYPES && \
+                            ef4_tx_queue_used(_tx_queue);              \
+                    _tx_queue++)
+
+/* Iterate over all possible TX queues belonging to a channel */
+#define ef4_for_each_possible_channel_tx_queue(_tx_queue, _channel)    \
+       if (!ef4_channel_has_tx_queues(_channel))                       \
+               ;                                                       \
+       else                                                            \
+               for (_tx_queue = (_channel)->tx_queue;                  \
+                    _tx_queue < (_channel)->tx_queue + EF4_TXQ_TYPES;  \
+                    _tx_queue++)
+
+static inline bool ef4_channel_has_rx_queue(struct ef4_channel *channel)
+{
+       return channel->rx_queue.core_index >= 0;
+}
+
+static inline struct ef4_rx_queue *
+ef4_channel_get_rx_queue(struct ef4_channel *channel)
+{
+       EF4_BUG_ON_PARANOID(!ef4_channel_has_rx_queue(channel));
+       return &channel->rx_queue;
+}
+
+/* Iterate over all RX queues belonging to a channel */
+#define ef4_for_each_channel_rx_queue(_rx_queue, _channel)             \
+       if (!ef4_channel_has_rx_queue(_channel))                        \
+               ;                                                       \
+       else                                                            \
+               for (_rx_queue = &(_channel)->rx_queue;                 \
+                    _rx_queue;                                         \
+                    _rx_queue = NULL)
+
+static inline struct ef4_channel *
+ef4_rx_queue_channel(struct ef4_rx_queue *rx_queue)
+{
+       return container_of(rx_queue, struct ef4_channel, rx_queue);
+}
+
+static inline int ef4_rx_queue_index(struct ef4_rx_queue *rx_queue)
+{
+       return ef4_rx_queue_channel(rx_queue)->channel;
+}
+
+/* Returns a pointer to the specified receive buffer in the RX
+ * descriptor queue.
+ */
+static inline struct ef4_rx_buffer *ef4_rx_buffer(struct ef4_rx_queue *rx_queue,
+                                                 unsigned int index)
+{
+       return &rx_queue->buffer[index];
+}
+
+/**
+ * EF4_MAX_FRAME_LEN - calculate maximum frame length
+ *
+ * This calculates the maximum frame length that will be used for a
+ * given MTU.  The frame length will be equal to the MTU plus a
+ * constant amount of header space and padding.  This is the quantity
+ * that the net driver will program into the MAC as the maximum frame
+ * length.
+ *
+ * The 10G MAC requires 8-byte alignment on the frame
+ * length, so we round up to the nearest 8.
+ *
+ * Re-clocking by the XGXS on RX can reduce an IPG to 32 bits (half an
+ * XGMII cycle).  If the frame length reaches the maximum value in the
+ * same cycle, the XMAC can miss the IPG altogether.  We work around
+ * this by adding a further 16 bytes.
+ */
+#define EF4_FRAME_PAD  16
+#define EF4_MAX_FRAME_LEN(mtu) \
+       (ALIGN(((mtu) + ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN + EF4_FRAME_PAD), 8))
+
+/* Get all supported features.
+ * If a feature is not fixed, it is present in hw_features.
+ * If a feature is fixed, it does not present in hw_features, but
+ * always in features.
+ */
+static inline netdev_features_t ef4_supported_features(const struct ef4_nic *efx)
+{
+       const struct net_device *net_dev = efx->net_dev;
+
+       return net_dev->features | net_dev->hw_features;
+}
+
+/* Get the current TX queue insert index. */
+static inline unsigned int
+ef4_tx_queue_get_insert_index(const struct ef4_tx_queue *tx_queue)
+{
+       return tx_queue->insert_count & tx_queue->ptr_mask;
+}
+
+/* Get a TX buffer. */
+static inline struct ef4_tx_buffer *
+__ef4_tx_queue_get_insert_buffer(const struct ef4_tx_queue *tx_queue)
+{
+       return &tx_queue->buffer[ef4_tx_queue_get_insert_index(tx_queue)];
+}
+
+/* Get a TX buffer, checking it's not currently in use. */
+static inline struct ef4_tx_buffer *
+ef4_tx_queue_get_insert_buffer(const struct ef4_tx_queue *tx_queue)
+{
+       struct ef4_tx_buffer *buffer =
+               __ef4_tx_queue_get_insert_buffer(tx_queue);
+
+       EF4_BUG_ON_PARANOID(buffer->len);
+       EF4_BUG_ON_PARANOID(buffer->flags);
+       EF4_BUG_ON_PARANOID(buffer->unmap_len);
+
+       return buffer;
+}
+
+#endif /* EF4_NET_DRIVER_H */
diff --git a/drivers/net/ethernet/sfc/falcon/nic.c b/drivers/net/ethernet/sfc/falcon/nic.c
new file mode 100644 (file)
index 0000000..a8ecb33
--- /dev/null
@@ -0,0 +1,527 @@
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2005-2006 Fen Systems Ltd.
+ * Copyright 2006-2013 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#include <linux/bitops.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/module.h>
+#include <linux/seq_file.h>
+#include <linux/cpu_rmap.h>
+#include "net_driver.h"
+#include "bitfield.h"
+#include "efx.h"
+#include "nic.h"
+#include "farch_regs.h"
+#include "io.h"
+#include "workarounds.h"
+
+/**************************************************************************
+ *
+ * Generic buffer handling
+ * These buffers are used for interrupt status, MAC stats, etc.
+ *
+ **************************************************************************/
+
+int ef4_nic_alloc_buffer(struct ef4_nic *efx, struct ef4_buffer *buffer,
+                        unsigned int len, gfp_t gfp_flags)
+{
+       buffer->addr = dma_zalloc_coherent(&efx->pci_dev->dev, len,
+                                          &buffer->dma_addr, gfp_flags);
+       if (!buffer->addr)
+               return -ENOMEM;
+       buffer->len = len;
+       return 0;
+}
+
+void ef4_nic_free_buffer(struct ef4_nic *efx, struct ef4_buffer *buffer)
+{
+       if (buffer->addr) {
+               dma_free_coherent(&efx->pci_dev->dev, buffer->len,
+                                 buffer->addr, buffer->dma_addr);
+               buffer->addr = NULL;
+       }
+}
+
+/* Check whether an event is present in the eventq at the current
+ * read pointer.  Only useful for self-test.
+ */
+bool ef4_nic_event_present(struct ef4_channel *channel)
+{
+       return ef4_event_present(ef4_event(channel, channel->eventq_read_ptr));
+}
+
+void ef4_nic_event_test_start(struct ef4_channel *channel)
+{
+       channel->event_test_cpu = -1;
+       smp_wmb();
+       channel->efx->type->ev_test_generate(channel);
+}
+
+int ef4_nic_irq_test_start(struct ef4_nic *efx)
+{
+       efx->last_irq_cpu = -1;
+       smp_wmb();
+       return efx->type->irq_test_generate(efx);
+}
+
+/* Hook interrupt handler(s)
+ * Try MSI and then legacy interrupts.
+ */
+int ef4_nic_init_interrupt(struct ef4_nic *efx)
+{
+       struct ef4_channel *channel;
+       unsigned int n_irqs;
+       int rc;
+
+       if (!EF4_INT_MODE_USE_MSI(efx)) {
+               rc = request_irq(efx->legacy_irq,
+                                efx->type->irq_handle_legacy, IRQF_SHARED,
+                                efx->name, efx);
+               if (rc) {
+                       netif_err(efx, drv, efx->net_dev,
+                                 "failed to hook legacy IRQ %d\n",
+                                 efx->pci_dev->irq);
+                       goto fail1;
+               }
+               return 0;
+       }
+
+#ifdef CONFIG_RFS_ACCEL
+       if (efx->interrupt_mode == EF4_INT_MODE_MSIX) {
+               efx->net_dev->rx_cpu_rmap =
+                       alloc_irq_cpu_rmap(efx->n_rx_channels);
+               if (!efx->net_dev->rx_cpu_rmap) {
+                       rc = -ENOMEM;
+                       goto fail1;
+               }
+       }
+#endif
+
+       /* Hook MSI or MSI-X interrupt */
+       n_irqs = 0;
+       ef4_for_each_channel(channel, efx) {
+               rc = request_irq(channel->irq, efx->type->irq_handle_msi,
+                                IRQF_PROBE_SHARED, /* Not shared */
+                                efx->msi_context[channel->channel].name,
+                                &efx->msi_context[channel->channel]);
+               if (rc) {
+                       netif_err(efx, drv, efx->net_dev,
+                                 "failed to hook IRQ %d\n", channel->irq);
+                       goto fail2;
+               }
+               ++n_irqs;
+
+#ifdef CONFIG_RFS_ACCEL
+               if (efx->interrupt_mode == EF4_INT_MODE_MSIX &&
+                   channel->channel < efx->n_rx_channels) {
+                       rc = irq_cpu_rmap_add(efx->net_dev->rx_cpu_rmap,
+                                             channel->irq);
+                       if (rc)
+                               goto fail2;
+               }
+#endif
+       }
+
+       return 0;
+
+ fail2:
+#ifdef CONFIG_RFS_ACCEL
+       free_irq_cpu_rmap(efx->net_dev->rx_cpu_rmap);
+       efx->net_dev->rx_cpu_rmap = NULL;
+#endif
+       ef4_for_each_channel(channel, efx) {
+               if (n_irqs-- == 0)
+                       break;
+               free_irq(channel->irq, &efx->msi_context[channel->channel]);
+       }
+ fail1:
+       return rc;
+}
+
+void ef4_nic_fini_interrupt(struct ef4_nic *efx)
+{
+       struct ef4_channel *channel;
+
+#ifdef CONFIG_RFS_ACCEL
+       free_irq_cpu_rmap(efx->net_dev->rx_cpu_rmap);
+       efx->net_dev->rx_cpu_rmap = NULL;
+#endif
+
+       if (EF4_INT_MODE_USE_MSI(efx)) {
+               /* Disable MSI/MSI-X interrupts */
+               ef4_for_each_channel(channel, efx)
+                       free_irq(channel->irq,
+                                &efx->msi_context[channel->channel]);
+       } else {
+               /* Disable legacy interrupt */
+               free_irq(efx->legacy_irq, efx);
+       }
+}
+
+/* Register dump */
+
+#define REGISTER_REVISION_FA   1
+#define REGISTER_REVISION_FB   2
+#define REGISTER_REVISION_FC   3
+#define REGISTER_REVISION_FZ   3       /* last Falcon arch revision */
+#define REGISTER_REVISION_ED   4
+#define REGISTER_REVISION_EZ   4       /* latest EF10 revision */
+
+struct ef4_nic_reg {
+       u32 offset:24;
+       u32 min_revision:3, max_revision:3;
+};
+
+#define REGISTER(name, arch, min_rev, max_rev) {                       \
+       arch ## R_ ## min_rev ## max_rev ## _ ## name,                  \
+       REGISTER_REVISION_ ## arch ## min_rev,                          \
+       REGISTER_REVISION_ ## arch ## max_rev                           \
+}
+#define REGISTER_AA(name) REGISTER(name, F, A, A)
+#define REGISTER_AB(name) REGISTER(name, F, A, B)
+#define REGISTER_AZ(name) REGISTER(name, F, A, Z)
+#define REGISTER_BB(name) REGISTER(name, F, B, B)
+#define REGISTER_BZ(name) REGISTER(name, F, B, Z)
+#define REGISTER_CZ(name) REGISTER(name, F, C, Z)
+
+static const struct ef4_nic_reg ef4_nic_regs[] = {
+       REGISTER_AZ(ADR_REGION),
+       REGISTER_AZ(INT_EN_KER),
+       REGISTER_BZ(INT_EN_CHAR),
+       REGISTER_AZ(INT_ADR_KER),
+       REGISTER_BZ(INT_ADR_CHAR),
+       /* INT_ACK_KER is WO */
+       /* INT_ISR0 is RC */
+       REGISTER_AZ(HW_INIT),
+       REGISTER_CZ(USR_EV_CFG),
+       REGISTER_AB(EE_SPI_HCMD),
+       REGISTER_AB(EE_SPI_HADR),
+       REGISTER_AB(EE_SPI_HDATA),
+       REGISTER_AB(EE_BASE_PAGE),
+       REGISTER_AB(EE_VPD_CFG0),
+       /* EE_VPD_SW_CNTL and EE_VPD_SW_DATA are not used */
+       /* PMBX_DBG_IADDR and PBMX_DBG_IDATA are indirect */
+       /* PCIE_CORE_INDIRECT is indirect */
+       REGISTER_AB(NIC_STAT),
+       REGISTER_AB(GPIO_CTL),
+       REGISTER_AB(GLB_CTL),
+       /* FATAL_INTR_KER and FATAL_INTR_CHAR are partly RC */
+       REGISTER_BZ(DP_CTRL),
+       REGISTER_AZ(MEM_STAT),
+       REGISTER_AZ(CS_DEBUG),
+       REGISTER_AZ(ALTERA_BUILD),
+       REGISTER_AZ(CSR_SPARE),
+       REGISTER_AB(PCIE_SD_CTL0123),
+       REGISTER_AB(PCIE_SD_CTL45),
+       REGISTER_AB(PCIE_PCS_CTL_STAT),
+       /* DEBUG_DATA_OUT is not used */
+       /* DRV_EV is WO */
+       REGISTER_AZ(EVQ_CTL),
+       REGISTER_AZ(EVQ_CNT1),
+       REGISTER_AZ(EVQ_CNT2),
+       REGISTER_AZ(BUF_TBL_CFG),
+       REGISTER_AZ(SRM_RX_DC_CFG),
+       REGISTER_AZ(SRM_TX_DC_CFG),
+       REGISTER_AZ(SRM_CFG),
+       /* BUF_TBL_UPD is WO */
+       REGISTER_AZ(SRM_UPD_EVQ),
+       REGISTER_AZ(SRAM_PARITY),
+       REGISTER_AZ(RX_CFG),
+       REGISTER_BZ(RX_FILTER_CTL),
+       /* RX_FLUSH_DESCQ is WO */
+       REGISTER_AZ(RX_DC_CFG),
+       REGISTER_AZ(RX_DC_PF_WM),
+       REGISTER_BZ(RX_RSS_TKEY),
+       /* RX_NODESC_DROP is RC */
+       REGISTER_AA(RX_SELF_RST),
+       /* RX_DEBUG, RX_PUSH_DROP are not used */
+       REGISTER_CZ(RX_RSS_IPV6_REG1),
+       REGISTER_CZ(RX_RSS_IPV6_REG2),
+       REGISTER_CZ(RX_RSS_IPV6_REG3),
+       /* TX_FLUSH_DESCQ is WO */
+       REGISTER_AZ(TX_DC_CFG),
+       REGISTER_AA(TX_CHKSM_CFG),
+       REGISTER_AZ(TX_CFG),
+       /* TX_PUSH_DROP is not used */
+       REGISTER_AZ(TX_RESERVED),
+       REGISTER_BZ(TX_PACE),
+       /* TX_PACE_DROP_QID is RC */
+       REGISTER_BB(TX_VLAN),
+       REGISTER_BZ(TX_IPFIL_PORTEN),
+       REGISTER_AB(MD_TXD),
+       REGISTER_AB(MD_RXD),
+       REGISTER_AB(MD_CS),
+       REGISTER_AB(MD_PHY_ADR),
+       REGISTER_AB(MD_ID),
+       /* MD_STAT is RC */
+       REGISTER_AB(MAC_STAT_DMA),
+       REGISTER_AB(MAC_CTRL),
+       REGISTER_BB(GEN_MODE),
+       REGISTER_AB(MAC_MC_HASH_REG0),
+       REGISTER_AB(MAC_MC_HASH_REG1),
+       REGISTER_AB(GM_CFG1),
+       REGISTER_AB(GM_CFG2),
+       /* GM_IPG and GM_HD are not used */
+       REGISTER_AB(GM_MAX_FLEN),
+       /* GM_TEST is not used */
+       REGISTER_AB(GM_ADR1),
+       REGISTER_AB(GM_ADR2),
+       REGISTER_AB(GMF_CFG0),
+       REGISTER_AB(GMF_CFG1),
+       REGISTER_AB(GMF_CFG2),
+       REGISTER_AB(GMF_CFG3),
+       REGISTER_AB(GMF_CFG4),
+       REGISTER_AB(GMF_CFG5),
+       REGISTER_BB(TX_SRC_MAC_CTL),
+       REGISTER_AB(XM_ADR_LO),
+       REGISTER_AB(XM_ADR_HI),
+       REGISTER_AB(XM_GLB_CFG),
+       REGISTER_AB(XM_TX_CFG),
+       REGISTER_AB(XM_RX_CFG),
+       REGISTER_AB(XM_MGT_INT_MASK),
+       REGISTER_AB(XM_FC),
+       REGISTER_AB(XM_PAUSE_TIME),
+       REGISTER_AB(XM_TX_PARAM),
+       REGISTER_AB(XM_RX_PARAM),
+       /* XM_MGT_INT_MSK (note no 'A') is RC */
+       REGISTER_AB(XX_PWR_RST),
+       REGISTER_AB(XX_SD_CTL),
+       REGISTER_AB(XX_TXDRV_CTL),
+       /* XX_PRBS_CTL, XX_PRBS_CHK and XX_PRBS_ERR are not used */
+       /* XX_CORE_STAT is partly RC */
+};
+
+struct ef4_nic_reg_table {
+       u32 offset:24;
+       u32 min_revision:3, max_revision:3;
+       u32 step:6, rows:21;
+};
+
+#define REGISTER_TABLE_DIMENSIONS(_, offset, arch, min_rev, max_rev, step, rows) { \
+       offset,                                                         \
+       REGISTER_REVISION_ ## arch ## min_rev,                          \
+       REGISTER_REVISION_ ## arch ## max_rev,                          \
+       step, rows                                                      \
+}
+#define REGISTER_TABLE(name, arch, min_rev, max_rev)                   \
+       REGISTER_TABLE_DIMENSIONS(                                      \
+               name, arch ## R_ ## min_rev ## max_rev ## _ ## name,    \
+               arch, min_rev, max_rev,                                 \
+               arch ## R_ ## min_rev ## max_rev ## _ ## name ## _STEP, \
+               arch ## R_ ## min_rev ## max_rev ## _ ## name ## _ROWS)
+#define REGISTER_TABLE_AA(name) REGISTER_TABLE(name, F, A, A)
+#define REGISTER_TABLE_AZ(name) REGISTER_TABLE(name, F, A, Z)
+#define REGISTER_TABLE_BB(name) REGISTER_TABLE(name, F, B, B)
+#define REGISTER_TABLE_BZ(name) REGISTER_TABLE(name, F, B, Z)
+#define REGISTER_TABLE_BB_CZ(name)                                     \
+       REGISTER_TABLE_DIMENSIONS(name, FR_BZ_ ## name, F, B, B,        \
+                                 FR_BZ_ ## name ## _STEP,              \
+                                 FR_BB_ ## name ## _ROWS),             \
+       REGISTER_TABLE_DIMENSIONS(name, FR_BZ_ ## name, F, C, Z,        \
+                                 FR_BZ_ ## name ## _STEP,              \
+                                 FR_CZ_ ## name ## _ROWS)
+#define REGISTER_TABLE_CZ(name) REGISTER_TABLE(name, F, C, Z)
+
+static const struct ef4_nic_reg_table ef4_nic_reg_tables[] = {
+       /* DRIVER is not used */
+       /* EVQ_RPTR, TIMER_COMMAND, USR_EV and {RX,TX}_DESC_UPD are WO */
+       REGISTER_TABLE_BB(TX_IPFIL_TBL),
+       REGISTER_TABLE_BB(TX_SRC_MAC_TBL),
+       REGISTER_TABLE_AA(RX_DESC_PTR_TBL_KER),
+       REGISTER_TABLE_BB_CZ(RX_DESC_PTR_TBL),
+       REGISTER_TABLE_AA(TX_DESC_PTR_TBL_KER),
+       REGISTER_TABLE_BB_CZ(TX_DESC_PTR_TBL),
+       REGISTER_TABLE_AA(EVQ_PTR_TBL_KER),
+       REGISTER_TABLE_BB_CZ(EVQ_PTR_TBL),
+       /* We can't reasonably read all of the buffer table (up to 8MB!).
+        * However this driver will only use a few entries.  Reading
+        * 1K entries allows for some expansion of queue count and
+        * size before we need to change the version. */
+       REGISTER_TABLE_DIMENSIONS(BUF_FULL_TBL_KER, FR_AA_BUF_FULL_TBL_KER,
+                                 F, A, A, 8, 1024),
+       REGISTER_TABLE_DIMENSIONS(BUF_FULL_TBL, FR_BZ_BUF_FULL_TBL,
+                                 F, B, Z, 8, 1024),
+       REGISTER_TABLE_CZ(RX_MAC_FILTER_TBL0),
+       REGISTER_TABLE_BB_CZ(TIMER_TBL),
+       REGISTER_TABLE_BB_CZ(TX_PACE_TBL),
+       REGISTER_TABLE_BZ(RX_INDIRECTION_TBL),
+       /* TX_FILTER_TBL0 is huge and not used by this driver */
+       REGISTER_TABLE_CZ(TX_MAC_FILTER_TBL0),
+       REGISTER_TABLE_CZ(MC_TREG_SMEM),
+       /* MSIX_PBA_TABLE is not mapped */
+       /* SRM_DBG is not mapped (and is redundant with BUF_FLL_TBL) */
+       REGISTER_TABLE_BZ(RX_FILTER_TBL0),
+};
+
+size_t ef4_nic_get_regs_len(struct ef4_nic *efx)
+{
+       const struct ef4_nic_reg *reg;
+       const struct ef4_nic_reg_table *table;
+       size_t len = 0;
+
+       for (reg = ef4_nic_regs;
+            reg < ef4_nic_regs + ARRAY_SIZE(ef4_nic_regs);
+            reg++)
+               if (efx->type->revision >= reg->min_revision &&
+                   efx->type->revision <= reg->max_revision)
+                       len += sizeof(ef4_oword_t);
+
+       for (table = ef4_nic_reg_tables;
+            table < ef4_nic_reg_tables + ARRAY_SIZE(ef4_nic_reg_tables);
+            table++)
+               if (efx->type->revision >= table->min_revision &&
+                   efx->type->revision <= table->max_revision)
+                       len += table->rows * min_t(size_t, table->step, 16);
+
+       return len;
+}
+
+void ef4_nic_get_regs(struct ef4_nic *efx, void *buf)
+{
+       const struct ef4_nic_reg *reg;
+       const struct ef4_nic_reg_table *table;
+
+       for (reg = ef4_nic_regs;
+            reg < ef4_nic_regs + ARRAY_SIZE(ef4_nic_regs);
+            reg++) {
+               if (efx->type->revision >= reg->min_revision &&
+                   efx->type->revision <= reg->max_revision) {
+                       ef4_reado(efx, (ef4_oword_t *)buf, reg->offset);
+                       buf += sizeof(ef4_oword_t);
+               }
+       }
+
+       for (table = ef4_nic_reg_tables;
+            table < ef4_nic_reg_tables + ARRAY_SIZE(ef4_nic_reg_tables);
+            table++) {
+               size_t size, i;
+
+               if (!(efx->type->revision >= table->min_revision &&
+                     efx->type->revision <= table->max_revision))
+                       continue;
+
+               size = min_t(size_t, table->step, 16);
+
+               for (i = 0; i < table->rows; i++) {
+                       switch (table->step) {
+                       case 4: /* 32-bit SRAM */
+                               ef4_readd(efx, buf, table->offset + 4 * i);
+                               break;
+                       case 8: /* 64-bit SRAM */
+                               ef4_sram_readq(efx,
+                                              efx->membase + table->offset,
+                                              buf, i);
+                               break;
+                       case 16: /* 128-bit-readable register */
+                               ef4_reado_table(efx, buf, table->offset, i);
+                               break;
+                       case 32: /* 128-bit register, interleaved */
+                               ef4_reado_table(efx, buf, table->offset, 2 * i);
+                               break;
+                       default:
+                               WARN_ON(1);
+                               return;
+                       }
+                       buf += size;
+               }
+       }
+}
+
+/**
+ * ef4_nic_describe_stats - Describe supported statistics for ethtool
+ * @desc: Array of &struct ef4_hw_stat_desc describing the statistics
+ * @count: Length of the @desc array
+ * @mask: Bitmask of which elements of @desc are enabled
+ * @names: Buffer to copy names to, or %NULL.  The names are copied
+ *     starting at intervals of %ETH_GSTRING_LEN bytes.
+ *
+ * Returns the number of visible statistics, i.e. the number of set
+ * bits in the first @count bits of @mask for which a name is defined.
+ */
+size_t ef4_nic_describe_stats(const struct ef4_hw_stat_desc *desc, size_t count,
+                             const unsigned long *mask, u8 *names)
+{
+       size_t visible = 0;
+       size_t index;
+
+       for_each_set_bit(index, mask, count) {
+               if (desc[index].name) {
+                       if (names) {
+                               strlcpy(names, desc[index].name,
+                                       ETH_GSTRING_LEN);
+                               names += ETH_GSTRING_LEN;
+                       }
+                       ++visible;
+               }
+       }
+
+       return visible;
+}
+
+/**
+ * ef4_nic_update_stats - Convert statistics DMA buffer to array of u64
+ * @desc: Array of &struct ef4_hw_stat_desc describing the DMA buffer
+ *     layout.  DMA widths of 0, 16, 32 and 64 are supported; where
+ *     the width is specified as 0 the corresponding element of
+ *     @stats is not updated.
+ * @count: Length of the @desc array
+ * @mask: Bitmask of which elements of @desc are enabled
+ * @stats: Buffer to update with the converted statistics.  The length
+ *     of this array must be at least @count.
+ * @dma_buf: DMA buffer containing hardware statistics
+ * @accumulate: If set, the converted values will be added rather than
+ *     directly stored to the corresponding elements of @stats
+ */
+void ef4_nic_update_stats(const struct ef4_hw_stat_desc *desc, size_t count,
+                         const unsigned long *mask,
+                         u64 *stats, const void *dma_buf, bool accumulate)
+{
+       size_t index;
+
+       for_each_set_bit(index, mask, count) {
+               if (desc[index].dma_width) {
+                       const void *addr = dma_buf + desc[index].offset;
+                       u64 val;
+
+                       switch (desc[index].dma_width) {
+                       case 16:
+                               val = le16_to_cpup((__le16 *)addr);
+                               break;
+                       case 32:
+                               val = le32_to_cpup((__le32 *)addr);
+                               break;
+                       case 64:
+                               val = le64_to_cpup((__le64 *)addr);
+                               break;
+                       default:
+                               WARN_ON(1);
+                               val = 0;
+                               break;
+                       }
+
+                       if (accumulate)
+                               stats[index] += val;
+                       else
+                               stats[index] = val;
+               }
+       }
+}
+
+void ef4_nic_fix_nodesc_drop_stat(struct ef4_nic *efx, u64 *rx_nodesc_drops)
+{
+       /* if down, or this is the first update after coming up */
+       if (!(efx->net_dev->flags & IFF_UP) || !efx->rx_nodesc_drops_prev_state)
+               efx->rx_nodesc_drops_while_down +=
+                       *rx_nodesc_drops - efx->rx_nodesc_drops_total;
+       efx->rx_nodesc_drops_total = *rx_nodesc_drops;
+       efx->rx_nodesc_drops_prev_state = !!(efx->net_dev->flags & IFF_UP);
+       *rx_nodesc_drops -= efx->rx_nodesc_drops_while_down;
+}
diff --git a/drivers/net/ethernet/sfc/falcon/nic.h b/drivers/net/ethernet/sfc/falcon/nic.h
new file mode 100644 (file)
index 0000000..a4c4592
--- /dev/null
@@ -0,0 +1,513 @@
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2005-2006 Fen Systems Ltd.
+ * Copyright 2006-2013 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#ifndef EF4_NIC_H
+#define EF4_NIC_H
+
+#include <linux/net_tstamp.h>
+#include <linux/i2c-algo-bit.h>
+#include "net_driver.h"
+#include "efx.h"
+
+enum {
+       EF4_REV_FALCON_A0 = 0,
+       EF4_REV_FALCON_A1 = 1,
+       EF4_REV_FALCON_B0 = 2,
+};
+
+static inline int ef4_nic_rev(struct ef4_nic *efx)
+{
+       return efx->type->revision;
+}
+
+u32 ef4_farch_fpga_ver(struct ef4_nic *efx);
+
+/* NIC has two interlinked PCI functions for the same port. */
+static inline bool ef4_nic_is_dual_func(struct ef4_nic *efx)
+{
+       return ef4_nic_rev(efx) < EF4_REV_FALCON_B0;
+}
+
+/* Read the current event from the event queue */
+static inline ef4_qword_t *ef4_event(struct ef4_channel *channel,
+                                    unsigned int index)
+{
+       return ((ef4_qword_t *) (channel->eventq.buf.addr)) +
+               (index & channel->eventq_mask);
+}
+
+/* See if an event is present
+ *
+ * We check both the high and low dword of the event for all ones.  We
+ * wrote all ones when we cleared the event, and no valid event can
+ * have all ones in either its high or low dwords.  This approach is
+ * robust against reordering.
+ *
+ * Note that using a single 64-bit comparison is incorrect; even
+ * though the CPU read will be atomic, the DMA write may not be.
+ */
+static inline int ef4_event_present(ef4_qword_t *event)
+{
+       return !(EF4_DWORD_IS_ALL_ONES(event->dword[0]) |
+                 EF4_DWORD_IS_ALL_ONES(event->dword[1]));
+}
+
+/* Returns a pointer to the specified transmit descriptor in the TX
+ * descriptor queue belonging to the specified channel.
+ */
+static inline ef4_qword_t *
+ef4_tx_desc(struct ef4_tx_queue *tx_queue, unsigned int index)
+{
+       return ((ef4_qword_t *) (tx_queue->txd.buf.addr)) + index;
+}
+
+/* Get partner of a TX queue, seen as part of the same net core queue */
+static inline struct ef4_tx_queue *ef4_tx_queue_partner(struct ef4_tx_queue *tx_queue)
+{
+       if (tx_queue->queue & EF4_TXQ_TYPE_OFFLOAD)
+               return tx_queue - EF4_TXQ_TYPE_OFFLOAD;
+       else
+               return tx_queue + EF4_TXQ_TYPE_OFFLOAD;
+}
+
+/* Report whether this TX queue would be empty for the given write_count.
+ * May return false negative.
+ */
+static inline bool __ef4_nic_tx_is_empty(struct ef4_tx_queue *tx_queue,
+                                        unsigned int write_count)
+{
+       unsigned int empty_read_count = ACCESS_ONCE(tx_queue->empty_read_count);
+
+       if (empty_read_count == 0)
+               return false;
+
+       return ((empty_read_count ^ write_count) & ~EF4_EMPTY_COUNT_VALID) == 0;
+}
+
+/* Decide whether to push a TX descriptor to the NIC vs merely writing
+ * the doorbell.  This can reduce latency when we are adding a single
+ * descriptor to an empty queue, but is otherwise pointless.  Further,
+ * Falcon and Siena have hardware bugs (SF bug 33851) that may be
+ * triggered if we don't check this.
+ * We use the write_count used for the last doorbell push, to get the
+ * NIC's view of the tx queue.
+ */
+static inline bool ef4_nic_may_push_tx_desc(struct ef4_tx_queue *tx_queue,
+                                           unsigned int write_count)
+{
+       bool was_empty = __ef4_nic_tx_is_empty(tx_queue, write_count);
+
+       tx_queue->empty_read_count = 0;
+       return was_empty && tx_queue->write_count - write_count == 1;
+}
+
+/* Returns a pointer to the specified descriptor in the RX descriptor queue */
+static inline ef4_qword_t *
+ef4_rx_desc(struct ef4_rx_queue *rx_queue, unsigned int index)
+{
+       return ((ef4_qword_t *) (rx_queue->rxd.buf.addr)) + index;
+}
+
+enum {
+       PHY_TYPE_NONE = 0,
+       PHY_TYPE_TXC43128 = 1,
+       PHY_TYPE_88E1111 = 2,
+       PHY_TYPE_SFX7101 = 3,
+       PHY_TYPE_QT2022C2 = 4,
+       PHY_TYPE_PM8358 = 6,
+       PHY_TYPE_SFT9001A = 8,
+       PHY_TYPE_QT2025C = 9,
+       PHY_TYPE_SFT9001B = 10,
+};
+
+#define FALCON_XMAC_LOOPBACKS                  \
+       ((1 << LOOPBACK_XGMII) |                \
+        (1 << LOOPBACK_XGXS) |                 \
+        (1 << LOOPBACK_XAUI))
+
+/* Alignment of PCIe DMA boundaries (4KB) */
+#define EF4_PAGE_SIZE  4096
+/* Size and alignment of buffer table entries (same) */
+#define EF4_BUF_SIZE   EF4_PAGE_SIZE
+
+/* NIC-generic software stats */
+enum {
+       GENERIC_STAT_rx_noskb_drops,
+       GENERIC_STAT_rx_nodesc_trunc,
+       GENERIC_STAT_COUNT
+};
+
+/**
+ * struct falcon_board_type - board operations and type information
+ * @id: Board type id, as found in NVRAM
+ * @init: Allocate resources and initialise peripheral hardware
+ * @init_phy: Do board-specific PHY initialisation
+ * @fini: Shut down hardware and free resources
+ * @set_id_led: Set state of identifying LED or revert to automatic function
+ * @monitor: Board-specific health check function
+ */
+struct falcon_board_type {
+       u8 id;
+       int (*init) (struct ef4_nic *nic);
+       void (*init_phy) (struct ef4_nic *efx);
+       void (*fini) (struct ef4_nic *nic);
+       void (*set_id_led) (struct ef4_nic *efx, enum ef4_led_mode mode);
+       int (*monitor) (struct ef4_nic *nic);
+};
+
+/**
+ * struct falcon_board - board information
+ * @type: Type of board
+ * @major: Major rev. ('A', 'B' ...)
+ * @minor: Minor rev. (0, 1, ...)
+ * @i2c_adap: I2C adapter for on-board peripherals
+ * @i2c_data: Data for bit-banging algorithm
+ * @hwmon_client: I2C client for hardware monitor
+ * @ioexp_client: I2C client for power/port control
+ */
+struct falcon_board {
+       const struct falcon_board_type *type;
+       int major;
+       int minor;
+       struct i2c_adapter i2c_adap;
+       struct i2c_algo_bit_data i2c_data;
+       struct i2c_client *hwmon_client, *ioexp_client;
+};
+
+/**
+ * struct falcon_spi_device - a Falcon SPI (Serial Peripheral Interface) device
+ * @device_id:         Controller's id for the device
+ * @size:              Size (in bytes)
+ * @addr_len:          Number of address bytes in read/write commands
+ * @munge_address:     Flag whether addresses should be munged.
+ *     Some devices with 9-bit addresses (e.g. AT25040A EEPROM)
+ *     use bit 3 of the command byte as address bit A8, rather
+ *     than having a two-byte address.  If this flag is set, then
+ *     commands should be munged in this way.
+ * @erase_command:     Erase command (or 0 if sector erase not needed).
+ * @erase_size:                Erase sector size (in bytes)
+ *     Erase commands affect sectors with this size and alignment.
+ *     This must be a power of two.
+ * @block_size:                Write block size (in bytes).
+ *     Write commands are limited to blocks with this size and alignment.
+ */
+struct falcon_spi_device {
+       int device_id;
+       unsigned int size;
+       unsigned int addr_len;
+       unsigned int munge_address:1;
+       u8 erase_command;
+       unsigned int erase_size;
+       unsigned int block_size;
+};
+
+static inline bool falcon_spi_present(const struct falcon_spi_device *spi)
+{
+       return spi->size != 0;
+}
+
+enum {
+       FALCON_STAT_tx_bytes = GENERIC_STAT_COUNT,
+       FALCON_STAT_tx_packets,
+       FALCON_STAT_tx_pause,
+       FALCON_STAT_tx_control,
+       FALCON_STAT_tx_unicast,
+       FALCON_STAT_tx_multicast,
+       FALCON_STAT_tx_broadcast,
+       FALCON_STAT_tx_lt64,
+       FALCON_STAT_tx_64,
+       FALCON_STAT_tx_65_to_127,
+       FALCON_STAT_tx_128_to_255,
+       FALCON_STAT_tx_256_to_511,
+       FALCON_STAT_tx_512_to_1023,
+       FALCON_STAT_tx_1024_to_15xx,
+       FALCON_STAT_tx_15xx_to_jumbo,
+       FALCON_STAT_tx_gtjumbo,
+       FALCON_STAT_tx_non_tcpudp,
+       FALCON_STAT_tx_mac_src_error,
+       FALCON_STAT_tx_ip_src_error,
+       FALCON_STAT_rx_bytes,
+       FALCON_STAT_rx_good_bytes,
+       FALCON_STAT_rx_bad_bytes,
+       FALCON_STAT_rx_packets,
+       FALCON_STAT_rx_good,
+       FALCON_STAT_rx_bad,
+       FALCON_STAT_rx_pause,
+       FALCON_STAT_rx_control,
+       FALCON_STAT_rx_unicast,
+       FALCON_STAT_rx_multicast,
+       FALCON_STAT_rx_broadcast,
+       FALCON_STAT_rx_lt64,
+       FALCON_STAT_rx_64,
+       FALCON_STAT_rx_65_to_127,
+       FALCON_STAT_rx_128_to_255,
+       FALCON_STAT_rx_256_to_511,
+       FALCON_STAT_rx_512_to_1023,
+       FALCON_STAT_rx_1024_to_15xx,
+       FALCON_STAT_rx_15xx_to_jumbo,
+       FALCON_STAT_rx_gtjumbo,
+       FALCON_STAT_rx_bad_lt64,
+       FALCON_STAT_rx_bad_gtjumbo,
+       FALCON_STAT_rx_overflow,
+       FALCON_STAT_rx_symbol_error,
+       FALCON_STAT_rx_align_error,
+       FALCON_STAT_rx_length_error,
+       FALCON_STAT_rx_internal_error,
+       FALCON_STAT_rx_nodesc_drop_cnt,
+       FALCON_STAT_COUNT
+};
+
+/**
+ * struct falcon_nic_data - Falcon NIC state
+ * @pci_dev2: Secondary function of Falcon A
+ * @board: Board state and functions
+ * @stats: Hardware statistics
+ * @stats_disable_count: Nest count for disabling statistics fetches
+ * @stats_pending: Is there a pending DMA of MAC statistics.
+ * @stats_timer: A timer for regularly fetching MAC statistics.
+ * @spi_flash: SPI flash device
+ * @spi_eeprom: SPI EEPROM device
+ * @spi_lock: SPI bus lock
+ * @mdio_lock: MDIO bus lock
+ * @xmac_poll_required: XMAC link state needs polling
+ */
+struct falcon_nic_data {
+       struct pci_dev *pci_dev2;
+       struct falcon_board board;
+       u64 stats[FALCON_STAT_COUNT];
+       unsigned int stats_disable_count;
+       bool stats_pending;
+       struct timer_list stats_timer;
+       struct falcon_spi_device spi_flash;
+       struct falcon_spi_device spi_eeprom;
+       struct mutex spi_lock;
+       struct mutex mdio_lock;
+       bool xmac_poll_required;
+};
+
+static inline struct falcon_board *falcon_board(struct ef4_nic *efx)
+{
+       struct falcon_nic_data *data = efx->nic_data;
+       return &data->board;
+}
+
+struct ethtool_ts_info;
+
+extern const struct ef4_nic_type falcon_a1_nic_type;
+extern const struct ef4_nic_type falcon_b0_nic_type;
+
+/**************************************************************************
+ *
+ * Externs
+ *
+ **************************************************************************
+ */
+
+int falcon_probe_board(struct ef4_nic *efx, u16 revision_info);
+
+/* TX data path */
+static inline int ef4_nic_probe_tx(struct ef4_tx_queue *tx_queue)
+{
+       return tx_queue->efx->type->tx_probe(tx_queue);
+}
+static inline void ef4_nic_init_tx(struct ef4_tx_queue *tx_queue)
+{
+       tx_queue->efx->type->tx_init(tx_queue);
+}
+static inline void ef4_nic_remove_tx(struct ef4_tx_queue *tx_queue)
+{
+       tx_queue->efx->type->tx_remove(tx_queue);
+}
+static inline void ef4_nic_push_buffers(struct ef4_tx_queue *tx_queue)
+{
+       tx_queue->efx->type->tx_write(tx_queue);
+}
+
+/* RX data path */
+static inline int ef4_nic_probe_rx(struct ef4_rx_queue *rx_queue)
+{
+       return rx_queue->efx->type->rx_probe(rx_queue);
+}
+static inline void ef4_nic_init_rx(struct ef4_rx_queue *rx_queue)
+{
+       rx_queue->efx->type->rx_init(rx_queue);
+}
+static inline void ef4_nic_remove_rx(struct ef4_rx_queue *rx_queue)
+{
+       rx_queue->efx->type->rx_remove(rx_queue);
+}
+static inline void ef4_nic_notify_rx_desc(struct ef4_rx_queue *rx_queue)
+{
+       rx_queue->efx->type->rx_write(rx_queue);
+}
+static inline void ef4_nic_generate_fill_event(struct ef4_rx_queue *rx_queue)
+{
+       rx_queue->efx->type->rx_defer_refill(rx_queue);
+}
+
+/* Event data path */
+static inline int ef4_nic_probe_eventq(struct ef4_channel *channel)
+{
+       return channel->efx->type->ev_probe(channel);
+}
+static inline int ef4_nic_init_eventq(struct ef4_channel *channel)
+{
+       return channel->efx->type->ev_init(channel);
+}
+static inline void ef4_nic_fini_eventq(struct ef4_channel *channel)
+{
+       channel->efx->type->ev_fini(channel);
+}
+static inline void ef4_nic_remove_eventq(struct ef4_channel *channel)
+{
+       channel->efx->type->ev_remove(channel);
+}
+static inline int
+ef4_nic_process_eventq(struct ef4_channel *channel, int quota)
+{
+       return channel->efx->type->ev_process(channel, quota);
+}
+static inline void ef4_nic_eventq_read_ack(struct ef4_channel *channel)
+{
+       channel->efx->type->ev_read_ack(channel);
+}
+void ef4_nic_event_test_start(struct ef4_channel *channel);
+
+/* queue operations */
+int ef4_farch_tx_probe(struct ef4_tx_queue *tx_queue);
+void ef4_farch_tx_init(struct ef4_tx_queue *tx_queue);
+void ef4_farch_tx_fini(struct ef4_tx_queue *tx_queue);
+void ef4_farch_tx_remove(struct ef4_tx_queue *tx_queue);
+void ef4_farch_tx_write(struct ef4_tx_queue *tx_queue);
+unsigned int ef4_farch_tx_limit_len(struct ef4_tx_queue *tx_queue,
+                                   dma_addr_t dma_addr, unsigned int len);
+int ef4_farch_rx_probe(struct ef4_rx_queue *rx_queue);
+void ef4_farch_rx_init(struct ef4_rx_queue *rx_queue);
+void ef4_farch_rx_fini(struct ef4_rx_queue *rx_queue);
+void ef4_farch_rx_remove(struct ef4_rx_queue *rx_queue);
+void ef4_farch_rx_write(struct ef4_rx_queue *rx_queue);
+void ef4_farch_rx_defer_refill(struct ef4_rx_queue *rx_queue);
+int ef4_farch_ev_probe(struct ef4_channel *channel);
+int ef4_farch_ev_init(struct ef4_channel *channel);
+void ef4_farch_ev_fini(struct ef4_channel *channel);
+void ef4_farch_ev_remove(struct ef4_channel *channel);
+int ef4_farch_ev_process(struct ef4_channel *channel, int quota);
+void ef4_farch_ev_read_ack(struct ef4_channel *channel);
+void ef4_farch_ev_test_generate(struct ef4_channel *channel);
+
+/* filter operations */
+int ef4_farch_filter_table_probe(struct ef4_nic *efx);
+void ef4_farch_filter_table_restore(struct ef4_nic *efx);
+void ef4_farch_filter_table_remove(struct ef4_nic *efx);
+void ef4_farch_filter_update_rx_scatter(struct ef4_nic *efx);
+s32 ef4_farch_filter_insert(struct ef4_nic *efx, struct ef4_filter_spec *spec,
+                           bool replace);
+int ef4_farch_filter_remove_safe(struct ef4_nic *efx,
+                                enum ef4_filter_priority priority,
+                                u32 filter_id);
+int ef4_farch_filter_get_safe(struct ef4_nic *efx,
+                             enum ef4_filter_priority priority, u32 filter_id,
+                             struct ef4_filter_spec *);
+int ef4_farch_filter_clear_rx(struct ef4_nic *efx,
+                             enum ef4_filter_priority priority);
+u32 ef4_farch_filter_count_rx_used(struct ef4_nic *efx,
+                                  enum ef4_filter_priority priority);
+u32 ef4_farch_filter_get_rx_id_limit(struct ef4_nic *efx);
+s32 ef4_farch_filter_get_rx_ids(struct ef4_nic *efx,
+                               enum ef4_filter_priority priority, u32 *buf,
+                               u32 size);
+#ifdef CONFIG_RFS_ACCEL
+s32 ef4_farch_filter_rfs_insert(struct ef4_nic *efx,
+                               struct ef4_filter_spec *spec);
+bool ef4_farch_filter_rfs_expire_one(struct ef4_nic *efx, u32 flow_id,
+                                    unsigned int index);
+#endif
+void ef4_farch_filter_sync_rx_mode(struct ef4_nic *efx);
+
+bool ef4_nic_event_present(struct ef4_channel *channel);
+
+/* Some statistics are computed as A - B where A and B each increase
+ * linearly with some hardware counter(s) and the counters are read
+ * asynchronously.  If the counters contributing to B are always read
+ * after those contributing to A, the computed value may be lower than
+ * the true value by some variable amount, and may decrease between
+ * subsequent computations.
+ *
+ * We should never allow statistics to decrease or to exceed the true
+ * value.  Since the computed value will never be greater than the
+ * true value, we can achieve this by only storing the computed value
+ * when it increases.
+ */
+static inline void ef4_update_diff_stat(u64 *stat, u64 diff)
+{
+       if ((s64)(diff - *stat) > 0)
+               *stat = diff;
+}
+
+/* Interrupts */
+int ef4_nic_init_interrupt(struct ef4_nic *efx);
+int ef4_nic_irq_test_start(struct ef4_nic *efx);
+void ef4_nic_fini_interrupt(struct ef4_nic *efx);
+void ef4_farch_irq_enable_master(struct ef4_nic *efx);
+int ef4_farch_irq_test_generate(struct ef4_nic *efx);
+void ef4_farch_irq_disable_master(struct ef4_nic *efx);
+irqreturn_t ef4_farch_msi_interrupt(int irq, void *dev_id);
+irqreturn_t ef4_farch_legacy_interrupt(int irq, void *dev_id);
+irqreturn_t ef4_farch_fatal_interrupt(struct ef4_nic *efx);
+
+static inline int ef4_nic_event_test_irq_cpu(struct ef4_channel *channel)
+{
+       return ACCESS_ONCE(channel->event_test_cpu);
+}
+static inline int ef4_nic_irq_test_irq_cpu(struct ef4_nic *efx)
+{
+       return ACCESS_ONCE(efx->last_irq_cpu);
+}
+
+/* Global Resources */
+int ef4_nic_flush_queues(struct ef4_nic *efx);
+int ef4_farch_fini_dmaq(struct ef4_nic *efx);
+void ef4_farch_finish_flr(struct ef4_nic *efx);
+void falcon_start_nic_stats(struct ef4_nic *efx);
+void falcon_stop_nic_stats(struct ef4_nic *efx);
+int falcon_reset_xaui(struct ef4_nic *efx);
+void ef4_farch_dimension_resources(struct ef4_nic *efx, unsigned sram_lim_qw);
+void ef4_farch_init_common(struct ef4_nic *efx);
+void ef4_farch_rx_push_indir_table(struct ef4_nic *efx);
+
+int ef4_nic_alloc_buffer(struct ef4_nic *efx, struct ef4_buffer *buffer,
+                        unsigned int len, gfp_t gfp_flags);
+void ef4_nic_free_buffer(struct ef4_nic *efx, struct ef4_buffer *buffer);
+
+/* Tests */
+struct ef4_farch_register_test {
+       unsigned address;
+       ef4_oword_t mask;
+};
+int ef4_farch_test_registers(struct ef4_nic *efx,
+                            const struct ef4_farch_register_test *regs,
+                            size_t n_regs);
+
+size_t ef4_nic_get_regs_len(struct ef4_nic *efx);
+void ef4_nic_get_regs(struct ef4_nic *efx, void *buf);
+
+size_t ef4_nic_describe_stats(const struct ef4_hw_stat_desc *desc, size_t count,
+                             const unsigned long *mask, u8 *names);
+void ef4_nic_update_stats(const struct ef4_hw_stat_desc *desc, size_t count,
+                         const unsigned long *mask, u64 *stats,
+                         const void *dma_buf, bool accumulate);
+void ef4_nic_fix_nodesc_drop_stat(struct ef4_nic *efx, u64 *stat);
+
+#define EF4_MAX_FLUSH_TIME 5000
+
+void ef4_farch_generate_event(struct ef4_nic *efx, unsigned int evq,
+                             ef4_qword_t *event);
+
+#endif /* EF4_NIC_H */
diff --git a/drivers/net/ethernet/sfc/falcon/phy.h b/drivers/net/ethernet/sfc/falcon/phy.h
new file mode 100644 (file)
index 0000000..362141c
--- /dev/null
@@ -0,0 +1,50 @@
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2007-2010 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#ifndef EF4_PHY_H
+#define EF4_PHY_H
+
+/****************************************************************************
+ * 10Xpress (SFX7101) PHY
+ */
+extern const struct ef4_phy_operations falcon_sfx7101_phy_ops;
+
+void tenxpress_set_id_led(struct ef4_nic *efx, enum ef4_led_mode mode);
+
+/****************************************************************************
+ * AMCC/Quake QT202x PHYs
+ */
+extern const struct ef4_phy_operations falcon_qt202x_phy_ops;
+
+/* These PHYs provide various H/W control states for LEDs */
+#define QUAKE_LED_LINK_INVAL   (0)
+#define QUAKE_LED_LINK_STAT    (1)
+#define QUAKE_LED_LINK_ACT     (2)
+#define QUAKE_LED_LINK_ACTSTAT (3)
+#define QUAKE_LED_OFF          (4)
+#define QUAKE_LED_ON           (5)
+#define QUAKE_LED_LINK_INPUT   (6)     /* Pin is an input. */
+/* What link the LED tracks */
+#define QUAKE_LED_TXLINK       (0)
+#define QUAKE_LED_RXLINK       (8)
+
+void falcon_qt202x_set_led(struct ef4_nic *p, int led, int state);
+
+/****************************************************************************
+* Transwitch CX4 retimer
+*/
+extern const struct ef4_phy_operations falcon_txc_phy_ops;
+
+#define TXC_GPIO_DIR_INPUT     0
+#define TXC_GPIO_DIR_OUTPUT    1
+
+void falcon_txc_set_gpio_dir(struct ef4_nic *efx, int pin, int dir);
+void falcon_txc_set_gpio_val(struct ef4_nic *efx, int pin, int val);
+
+#endif
diff --git a/drivers/net/ethernet/sfc/falcon/qt202x_phy.c b/drivers/net/ethernet/sfc/falcon/qt202x_phy.c
new file mode 100644 (file)
index 0000000..d293316
--- /dev/null
@@ -0,0 +1,495 @@
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2006-2012 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+/*
+ * Driver for AMCC QT202x SFP+ and XFP adapters; see www.amcc.com for details
+ */
+
+#include <linux/slab.h>
+#include <linux/timer.h>
+#include <linux/delay.h>
+#include "efx.h"
+#include "mdio_10g.h"
+#include "phy.h"
+#include "nic.h"
+
+#define QT202X_REQUIRED_DEVS (MDIO_DEVS_PCS |          \
+                             MDIO_DEVS_PMAPMD |        \
+                             MDIO_DEVS_PHYXS)
+
+#define QT202X_LOOPBACKS ((1 << LOOPBACK_PCS) |                \
+                         (1 << LOOPBACK_PMAPMD) |      \
+                         (1 << LOOPBACK_PHYXS_WS))
+
+/****************************************************************************/
+/* Quake-specific MDIO registers */
+#define MDIO_QUAKE_LED0_REG    (0xD006)
+
+/* QT2025C only */
+#define PCS_FW_HEARTBEAT_REG   0xd7ee
+#define PCS_FW_HEARTB_LBN      0
+#define PCS_FW_HEARTB_WIDTH    8
+#define PCS_FW_PRODUCT_CODE_1  0xd7f0
+#define PCS_FW_VERSION_1       0xd7f3
+#define PCS_FW_BUILD_1         0xd7f6
+#define PCS_UC8051_STATUS_REG  0xd7fd
+#define PCS_UC_STATUS_LBN      0
+#define PCS_UC_STATUS_WIDTH    8
+#define PCS_UC_STATUS_FW_SAVE  0x20
+#define PMA_PMD_MODE_REG       0xc301
+#define PMA_PMD_RXIN_SEL_LBN   6
+#define PMA_PMD_FTX_CTRL2_REG  0xc309
+#define PMA_PMD_FTX_STATIC_LBN 13
+#define PMA_PMD_VEND1_REG      0xc001
+#define PMA_PMD_VEND1_LBTXD_LBN        15
+#define PCS_VEND1_REG          0xc000
+#define PCS_VEND1_LBTXD_LBN    5
+
+void falcon_qt202x_set_led(struct ef4_nic *p, int led, int mode)
+{
+       int addr = MDIO_QUAKE_LED0_REG + led;
+       ef4_mdio_write(p, MDIO_MMD_PMAPMD, addr, mode);
+}
+
+struct qt202x_phy_data {
+       enum ef4_phy_mode phy_mode;
+       bool bug17190_in_bad_state;
+       unsigned long bug17190_timer;
+       u32 firmware_ver;
+};
+
+#define QT2022C2_MAX_RESET_TIME 500
+#define QT2022C2_RESET_WAIT 10
+
+#define QT2025C_MAX_HEARTB_TIME (5 * HZ)
+#define QT2025C_HEARTB_WAIT 100
+#define QT2025C_MAX_FWSTART_TIME (25 * HZ / 10)
+#define QT2025C_FWSTART_WAIT 100
+
+#define BUG17190_INTERVAL (2 * HZ)
+
+static int qt2025c_wait_heartbeat(struct ef4_nic *efx)
+{
+       unsigned long timeout = jiffies + QT2025C_MAX_HEARTB_TIME;
+       int reg, old_counter = 0;
+
+       /* Wait for firmware heartbeat to start */
+       for (;;) {
+               int counter;
+               reg = ef4_mdio_read(efx, MDIO_MMD_PCS, PCS_FW_HEARTBEAT_REG);
+               if (reg < 0)
+                       return reg;
+               counter = ((reg >> PCS_FW_HEARTB_LBN) &
+                           ((1 << PCS_FW_HEARTB_WIDTH) - 1));
+               if (old_counter == 0)
+                       old_counter = counter;
+               else if (counter != old_counter)
+                       break;
+               if (time_after(jiffies, timeout)) {
+                       /* Some cables have EEPROMs that conflict with the
+                        * PHY's on-board EEPROM so it cannot load firmware */
+                       netif_err(efx, hw, efx->net_dev,
+                                 "If an SFP+ direct attach cable is"
+                                 " connected, please check that it complies"
+                                 " with the SFP+ specification\n");
+                       return -ETIMEDOUT;
+               }
+               msleep(QT2025C_HEARTB_WAIT);
+       }
+
+       return 0;
+}
+
+static int qt2025c_wait_fw_status_good(struct ef4_nic *efx)
+{
+       unsigned long timeout = jiffies + QT2025C_MAX_FWSTART_TIME;
+       int reg;
+
+       /* Wait for firmware status to look good */
+       for (;;) {
+               reg = ef4_mdio_read(efx, MDIO_MMD_PCS, PCS_UC8051_STATUS_REG);
+               if (reg < 0)
+                       return reg;
+               if ((reg &
+                    ((1 << PCS_UC_STATUS_WIDTH) - 1) << PCS_UC_STATUS_LBN) >=
+                   PCS_UC_STATUS_FW_SAVE)
+                       break;
+               if (time_after(jiffies, timeout))
+                       return -ETIMEDOUT;
+               msleep(QT2025C_FWSTART_WAIT);
+       }
+
+       return 0;
+}
+
+static void qt2025c_restart_firmware(struct ef4_nic *efx)
+{
+       /* Restart microcontroller execution of firmware from RAM */
+       ef4_mdio_write(efx, 3, 0xe854, 0x00c0);
+       ef4_mdio_write(efx, 3, 0xe854, 0x0040);
+       msleep(50);
+}
+
+static int qt2025c_wait_reset(struct ef4_nic *efx)
+{
+       int rc;
+
+       rc = qt2025c_wait_heartbeat(efx);
+       if (rc != 0)
+               return rc;
+
+       rc = qt2025c_wait_fw_status_good(efx);
+       if (rc == -ETIMEDOUT) {
+               /* Bug 17689: occasionally heartbeat starts but firmware status
+                * code never progresses beyond 0x00.  Try again, once, after
+                * restarting execution of the firmware image. */
+               netif_dbg(efx, hw, efx->net_dev,
+                         "bashing QT2025C microcontroller\n");
+               qt2025c_restart_firmware(efx);
+               rc = qt2025c_wait_heartbeat(efx);
+               if (rc != 0)
+                       return rc;
+               rc = qt2025c_wait_fw_status_good(efx);
+       }
+
+       return rc;
+}
+
+static void qt2025c_firmware_id(struct ef4_nic *efx)
+{
+       struct qt202x_phy_data *phy_data = efx->phy_data;
+       u8 firmware_id[9];
+       size_t i;
+
+       for (i = 0; i < sizeof(firmware_id); i++)
+               firmware_id[i] = ef4_mdio_read(efx, MDIO_MMD_PCS,
+                                              PCS_FW_PRODUCT_CODE_1 + i);
+       netif_info(efx, probe, efx->net_dev,
+                  "QT2025C firmware %xr%d v%d.%d.%d.%d [20%02d-%02d-%02d]\n",
+                  (firmware_id[0] << 8) | firmware_id[1], firmware_id[2],
+                  firmware_id[3] >> 4, firmware_id[3] & 0xf,
+                  firmware_id[4], firmware_id[5],
+                  firmware_id[6], firmware_id[7], firmware_id[8]);
+       phy_data->firmware_ver = ((firmware_id[3] & 0xf0) << 20) |
+                                ((firmware_id[3] & 0x0f) << 16) |
+                                (firmware_id[4] << 8) | firmware_id[5];
+}
+
+static void qt2025c_bug17190_workaround(struct ef4_nic *efx)
+{
+       struct qt202x_phy_data *phy_data = efx->phy_data;
+
+       /* The PHY can get stuck in a state where it reports PHY_XS and PMA/PMD
+        * layers up, but PCS down (no block_lock).  If we notice this state
+        * persisting for a couple of seconds, we switch PMA/PMD loopback
+        * briefly on and then off again, which is normally sufficient to
+        * recover it.
+        */
+       if (efx->link_state.up ||
+           !ef4_mdio_links_ok(efx, MDIO_DEVS_PMAPMD | MDIO_DEVS_PHYXS)) {
+               phy_data->bug17190_in_bad_state = false;
+               return;
+       }
+
+       if (!phy_data->bug17190_in_bad_state) {
+               phy_data->bug17190_in_bad_state = true;
+               phy_data->bug17190_timer = jiffies + BUG17190_INTERVAL;
+               return;
+       }
+
+       if (time_after_eq(jiffies, phy_data->bug17190_timer)) {
+               netif_dbg(efx, hw, efx->net_dev, "bashing QT2025C PMA/PMD\n");
+               ef4_mdio_set_flag(efx, MDIO_MMD_PMAPMD, MDIO_CTRL1,
+                                 MDIO_PMA_CTRL1_LOOPBACK, true);
+               msleep(100);
+               ef4_mdio_set_flag(efx, MDIO_MMD_PMAPMD, MDIO_CTRL1,
+                                 MDIO_PMA_CTRL1_LOOPBACK, false);
+               phy_data->bug17190_timer = jiffies + BUG17190_INTERVAL;
+       }
+}
+
+static int qt2025c_select_phy_mode(struct ef4_nic *efx)
+{
+       struct qt202x_phy_data *phy_data = efx->phy_data;
+       struct falcon_board *board = falcon_board(efx);
+       int reg, rc, i;
+       uint16_t phy_op_mode;
+
+       /* Only 2.0.1.0+ PHY firmware supports the more optimal SFP+
+        * Self-Configure mode.  Don't attempt any switching if we encounter
+        * older firmware. */
+       if (phy_data->firmware_ver < 0x02000100)
+               return 0;
+
+       /* In general we will get optimal behaviour in "SFP+ Self-Configure"
+        * mode; however, that powers down most of the PHY when no module is
+        * present, so we must use a different mode (any fixed mode will do)
+        * to be sure that loopbacks will work. */
+       phy_op_mode = (efx->loopback_mode == LOOPBACK_NONE) ? 0x0038 : 0x0020;
+
+       /* Only change mode if really necessary */
+       reg = ef4_mdio_read(efx, 1, 0xc319);
+       if ((reg & 0x0038) == phy_op_mode)
+               return 0;
+       netif_dbg(efx, hw, efx->net_dev, "Switching PHY to mode 0x%04x\n",
+                 phy_op_mode);
+
+       /* This sequence replicates the register writes configured in the boot
+        * EEPROM (including the differences between board revisions), except
+        * that the operating mode is changed, and the PHY is prevented from
+        * unnecessarily reloading the main firmware image again. */
+       ef4_mdio_write(efx, 1, 0xc300, 0x0000);
+       /* (Note: this portion of the boot EEPROM sequence, which bit-bashes 9
+        * STOPs onto the firmware/module I2C bus to reset it, varies across
+        * board revisions, as the bus is connected to different GPIO/LED
+        * outputs on the PHY.) */
+       if (board->major == 0 && board->minor < 2) {
+               ef4_mdio_write(efx, 1, 0xc303, 0x4498);
+               for (i = 0; i < 9; i++) {
+                       ef4_mdio_write(efx, 1, 0xc303, 0x4488);
+                       ef4_mdio_write(efx, 1, 0xc303, 0x4480);
+                       ef4_mdio_write(efx, 1, 0xc303, 0x4490);
+                       ef4_mdio_write(efx, 1, 0xc303, 0x4498);
+               }
+       } else {
+               ef4_mdio_write(efx, 1, 0xc303, 0x0920);
+               ef4_mdio_write(efx, 1, 0xd008, 0x0004);
+               for (i = 0; i < 9; i++) {
+                       ef4_mdio_write(efx, 1, 0xc303, 0x0900);
+                       ef4_mdio_write(efx, 1, 0xd008, 0x0005);
+                       ef4_mdio_write(efx, 1, 0xc303, 0x0920);
+                       ef4_mdio_write(efx, 1, 0xd008, 0x0004);
+               }
+               ef4_mdio_write(efx, 1, 0xc303, 0x4900);
+       }
+       ef4_mdio_write(efx, 1, 0xc303, 0x4900);
+       ef4_mdio_write(efx, 1, 0xc302, 0x0004);
+       ef4_mdio_write(efx, 1, 0xc316, 0x0013);
+       ef4_mdio_write(efx, 1, 0xc318, 0x0054);
+       ef4_mdio_write(efx, 1, 0xc319, phy_op_mode);
+       ef4_mdio_write(efx, 1, 0xc31a, 0x0098);
+       ef4_mdio_write(efx, 3, 0x0026, 0x0e00);
+       ef4_mdio_write(efx, 3, 0x0027, 0x0013);
+       ef4_mdio_write(efx, 3, 0x0028, 0xa528);
+       ef4_mdio_write(efx, 1, 0xd006, 0x000a);
+       ef4_mdio_write(efx, 1, 0xd007, 0x0009);
+       ef4_mdio_write(efx, 1, 0xd008, 0x0004);
+       /* This additional write is not present in the boot EEPROM.  It
+        * prevents the PHY's internal boot ROM doing another pointless (and
+        * slow) reload of the firmware image (the microcontroller's code
+        * memory is not affected by the microcontroller reset). */
+       ef4_mdio_write(efx, 1, 0xc317, 0x00ff);
+       /* PMA/PMD loopback sets RXIN to inverse polarity and the firmware
+        * restart doesn't reset it. We need to do that ourselves. */
+       ef4_mdio_set_flag(efx, 1, PMA_PMD_MODE_REG,
+                         1 << PMA_PMD_RXIN_SEL_LBN, false);
+       ef4_mdio_write(efx, 1, 0xc300, 0x0002);
+       msleep(20);
+
+       /* Restart microcontroller execution of firmware from RAM */
+       qt2025c_restart_firmware(efx);
+
+       /* Wait for the microcontroller to be ready again */
+       rc = qt2025c_wait_reset(efx);
+       if (rc < 0) {
+               netif_err(efx, hw, efx->net_dev,
+                         "PHY microcontroller reset during mode switch "
+                         "timed out\n");
+               return rc;
+       }
+
+       return 0;
+}
+
+static int qt202x_reset_phy(struct ef4_nic *efx)
+{
+       int rc;
+
+       if (efx->phy_type == PHY_TYPE_QT2025C) {
+               /* Wait for the reset triggered by falcon_reset_hw()
+                * to complete */
+               rc = qt2025c_wait_reset(efx);
+               if (rc < 0)
+                       goto fail;
+       } else {
+               /* Reset the PHYXS MMD. This is documented as doing
+                * a complete soft reset. */
+               rc = ef4_mdio_reset_mmd(efx, MDIO_MMD_PHYXS,
+                                       QT2022C2_MAX_RESET_TIME /
+                                       QT2022C2_RESET_WAIT,
+                                       QT2022C2_RESET_WAIT);
+               if (rc < 0)
+                       goto fail;
+       }
+
+       /* Wait 250ms for the PHY to complete bootup */
+       msleep(250);
+
+       falcon_board(efx)->type->init_phy(efx);
+
+       return 0;
+
+ fail:
+       netif_err(efx, hw, efx->net_dev, "PHY reset timed out\n");
+       return rc;
+}
+
+static int qt202x_phy_probe(struct ef4_nic *efx)
+{
+       struct qt202x_phy_data *phy_data;
+
+       phy_data = kzalloc(sizeof(struct qt202x_phy_data), GFP_KERNEL);
+       if (!phy_data)
+               return -ENOMEM;
+       efx->phy_data = phy_data;
+       phy_data->phy_mode = efx->phy_mode;
+       phy_data->bug17190_in_bad_state = false;
+       phy_data->bug17190_timer = 0;
+
+       efx->mdio.mmds = QT202X_REQUIRED_DEVS;
+       efx->mdio.mode_support = MDIO_SUPPORTS_C45 | MDIO_EMULATE_C22;
+       efx->loopback_modes = QT202X_LOOPBACKS | FALCON_XMAC_LOOPBACKS;
+       return 0;
+}
+
+static int qt202x_phy_init(struct ef4_nic *efx)
+{
+       u32 devid;
+       int rc;
+
+       rc = qt202x_reset_phy(efx);
+       if (rc) {
+               netif_err(efx, probe, efx->net_dev, "PHY init failed\n");
+               return rc;
+       }
+
+       devid = ef4_mdio_read_id(efx, MDIO_MMD_PHYXS);
+       netif_info(efx, probe, efx->net_dev,
+                  "PHY ID reg %x (OUI %06x model %02x revision %x)\n",
+                  devid, ef4_mdio_id_oui(devid), ef4_mdio_id_model(devid),
+                  ef4_mdio_id_rev(devid));
+
+       if (efx->phy_type == PHY_TYPE_QT2025C)
+               qt2025c_firmware_id(efx);
+
+       return 0;
+}
+
+static int qt202x_link_ok(struct ef4_nic *efx)
+{
+       return ef4_mdio_links_ok(efx, QT202X_REQUIRED_DEVS);
+}
+
+static bool qt202x_phy_poll(struct ef4_nic *efx)
+{
+       bool was_up = efx->link_state.up;
+
+       efx->link_state.up = qt202x_link_ok(efx);
+       efx->link_state.speed = 10000;
+       efx->link_state.fd = true;
+       efx->link_state.fc = efx->wanted_fc;
+
+       if (efx->phy_type == PHY_TYPE_QT2025C)
+               qt2025c_bug17190_workaround(efx);
+
+       return efx->link_state.up != was_up;
+}
+
+static int qt202x_phy_reconfigure(struct ef4_nic *efx)
+{
+       struct qt202x_phy_data *phy_data = efx->phy_data;
+
+       if (efx->phy_type == PHY_TYPE_QT2025C) {
+               int rc = qt2025c_select_phy_mode(efx);
+               if (rc)
+                       return rc;
+
+               /* There are several different register bits which can
+                * disable TX (and save power) on direct-attach cables
+                * or optical transceivers, varying somewhat between
+                * firmware versions.  Only 'static mode' appears to
+                * cover everything. */
+               mdio_set_flag(
+                       &efx->mdio, efx->mdio.prtad, MDIO_MMD_PMAPMD,
+                       PMA_PMD_FTX_CTRL2_REG, 1 << PMA_PMD_FTX_STATIC_LBN,
+                       efx->phy_mode & PHY_MODE_TX_DISABLED ||
+                       efx->phy_mode & PHY_MODE_LOW_POWER ||
+                       efx->loopback_mode == LOOPBACK_PCS ||
+                       efx->loopback_mode == LOOPBACK_PMAPMD);
+       } else {
+               /* Reset the PHY when moving from tx off to tx on */
+               if (!(efx->phy_mode & PHY_MODE_TX_DISABLED) &&
+                   (phy_data->phy_mode & PHY_MODE_TX_DISABLED))
+                       qt202x_reset_phy(efx);
+
+               ef4_mdio_transmit_disable(efx);
+       }
+
+       ef4_mdio_phy_reconfigure(efx);
+
+       phy_data->phy_mode = efx->phy_mode;
+
+       return 0;
+}
+
+static void qt202x_phy_get_settings(struct ef4_nic *efx, struct ethtool_cmd *ecmd)
+{
+       mdio45_ethtool_gset(&efx->mdio, ecmd);
+}
+
+static void qt202x_phy_remove(struct ef4_nic *efx)
+{
+       /* Free the context block */
+       kfree(efx->phy_data);
+       efx->phy_data = NULL;
+}
+
+static int qt202x_phy_get_module_info(struct ef4_nic *efx,
+                                     struct ethtool_modinfo *modinfo)
+{
+       modinfo->type = ETH_MODULE_SFF_8079;
+       modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN;
+       return 0;
+}
+
+static int qt202x_phy_get_module_eeprom(struct ef4_nic *efx,
+                                       struct ethtool_eeprom *ee, u8 *data)
+{
+       int mmd, reg_base, rc, i;
+
+       if (efx->phy_type == PHY_TYPE_QT2025C) {
+               mmd = MDIO_MMD_PCS;
+               reg_base = 0xd000;
+       } else {
+               mmd = MDIO_MMD_PMAPMD;
+               reg_base = 0x8007;
+       }
+
+       for (i = 0; i < ee->len; i++) {
+               rc = ef4_mdio_read(efx, mmd, reg_base + ee->offset + i);
+               if (rc < 0)
+                       return rc;
+               data[i] = rc;
+       }
+
+       return 0;
+}
+
+const struct ef4_phy_operations falcon_qt202x_phy_ops = {
+       .probe           = qt202x_phy_probe,
+       .init            = qt202x_phy_init,
+       .reconfigure     = qt202x_phy_reconfigure,
+       .poll            = qt202x_phy_poll,
+       .fini            = ef4_port_dummy_op_void,
+       .remove          = qt202x_phy_remove,
+       .get_settings    = qt202x_phy_get_settings,
+       .set_settings    = ef4_mdio_set_settings,
+       .test_alive      = ef4_mdio_test_alive,
+       .get_module_eeprom = qt202x_phy_get_module_eeprom,
+       .get_module_info = qt202x_phy_get_module_info,
+};
diff --git a/drivers/net/ethernet/sfc/falcon/rx.c b/drivers/net/ethernet/sfc/falcon/rx.c
new file mode 100644 (file)
index 0000000..250458c
--- /dev/null
@@ -0,0 +1,974 @@
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2005-2006 Fen Systems Ltd.
+ * Copyright 2005-2013 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/slab.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/prefetch.h>
+#include <linux/moduleparam.h>
+#include <linux/iommu.h>
+#include <net/ip.h>
+#include <net/checksum.h>
+#include "net_driver.h"
+#include "efx.h"
+#include "filter.h"
+#include "nic.h"
+#include "selftest.h"
+#include "workarounds.h"
+
+/* Preferred number of descriptors to fill at once */
+#define EF4_RX_PREFERRED_BATCH 8U
+
+/* Number of RX buffers to recycle pages for.  When creating the RX page recycle
+ * ring, this number is divided by the number of buffers per page to calculate
+ * the number of pages to store in the RX page recycle ring.
+ */
+#define EF4_RECYCLE_RING_SIZE_IOMMU 4096
+#define EF4_RECYCLE_RING_SIZE_NOIOMMU (2 * EF4_RX_PREFERRED_BATCH)
+
+/* Size of buffer allocated for skb header area. */
+#define EF4_SKB_HEADERS  128u
+
+/* This is the percentage fill level below which new RX descriptors
+ * will be added to the RX descriptor ring.
+ */
+static unsigned int rx_refill_threshold;
+
+/* Each packet can consume up to ceil(max_frame_len / buffer_size) buffers */
+#define EF4_RX_MAX_FRAGS DIV_ROUND_UP(EF4_MAX_FRAME_LEN(EF4_MAX_MTU), \
+                                     EF4_RX_USR_BUF_SIZE)
+
+/*
+ * RX maximum head room required.
+ *
+ * This must be at least 1 to prevent overflow, plus one packet-worth
+ * to allow pipelined receives.
+ */
+#define EF4_RXD_HEAD_ROOM (1 + EF4_RX_MAX_FRAGS)
+
+static inline u8 *ef4_rx_buf_va(struct ef4_rx_buffer *buf)
+{
+       return page_address(buf->page) + buf->page_offset;
+}
+
+static inline u32 ef4_rx_buf_hash(struct ef4_nic *efx, const u8 *eh)
+{
+#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
+       return __le32_to_cpup((const __le32 *)(eh + efx->rx_packet_hash_offset));
+#else
+       const u8 *data = eh + efx->rx_packet_hash_offset;
+       return (u32)data[0]       |
+              (u32)data[1] << 8  |
+              (u32)data[2] << 16 |
+              (u32)data[3] << 24;
+#endif
+}
+
+static inline struct ef4_rx_buffer *
+ef4_rx_buf_next(struct ef4_rx_queue *rx_queue, struct ef4_rx_buffer *rx_buf)
+{
+       if (unlikely(rx_buf == ef4_rx_buffer(rx_queue, rx_queue->ptr_mask)))
+               return ef4_rx_buffer(rx_queue, 0);
+       else
+               return rx_buf + 1;
+}
+
+static inline void ef4_sync_rx_buffer(struct ef4_nic *efx,
+                                     struct ef4_rx_buffer *rx_buf,
+                                     unsigned int len)
+{
+       dma_sync_single_for_cpu(&efx->pci_dev->dev, rx_buf->dma_addr, len,
+                               DMA_FROM_DEVICE);
+}
+
+void ef4_rx_config_page_split(struct ef4_nic *efx)
+{
+       efx->rx_page_buf_step = ALIGN(efx->rx_dma_len + efx->rx_ip_align,
+                                     EF4_RX_BUF_ALIGNMENT);
+       efx->rx_bufs_per_page = efx->rx_buffer_order ? 1 :
+               ((PAGE_SIZE - sizeof(struct ef4_rx_page_state)) /
+                efx->rx_page_buf_step);
+       efx->rx_buffer_truesize = (PAGE_SIZE << efx->rx_buffer_order) /
+               efx->rx_bufs_per_page;
+       efx->rx_pages_per_batch = DIV_ROUND_UP(EF4_RX_PREFERRED_BATCH,
+                                              efx->rx_bufs_per_page);
+}
+
+/* Check the RX page recycle ring for a page that can be reused. */
+static struct page *ef4_reuse_page(struct ef4_rx_queue *rx_queue)
+{
+       struct ef4_nic *efx = rx_queue->efx;
+       struct page *page;
+       struct ef4_rx_page_state *state;
+       unsigned index;
+
+       index = rx_queue->page_remove & rx_queue->page_ptr_mask;
+       page = rx_queue->page_ring[index];
+       if (page == NULL)
+               return NULL;
+
+       rx_queue->page_ring[index] = NULL;
+       /* page_remove cannot exceed page_add. */
+       if (rx_queue->page_remove != rx_queue->page_add)
+               ++rx_queue->page_remove;
+
+       /* If page_count is 1 then we hold the only reference to this page. */
+       if (page_count(page) == 1) {
+               ++rx_queue->page_recycle_count;
+               return page;
+       } else {
+               state = page_address(page);
+               dma_unmap_page(&efx->pci_dev->dev, state->dma_addr,
+                              PAGE_SIZE << efx->rx_buffer_order,
+                              DMA_FROM_DEVICE);
+               put_page(page);
+               ++rx_queue->page_recycle_failed;
+       }
+
+       return NULL;
+}
+
+/**
+ * ef4_init_rx_buffers - create EF4_RX_BATCH page-based RX buffers
+ *
+ * @rx_queue:          Efx RX queue
+ *
+ * This allocates a batch of pages, maps them for DMA, and populates
+ * struct ef4_rx_buffers for each one. Return a negative error code or
+ * 0 on success. If a single page can be used for multiple buffers,
+ * then the page will either be inserted fully, or not at all.
+ */
+static int ef4_init_rx_buffers(struct ef4_rx_queue *rx_queue, bool atomic)
+{
+       struct ef4_nic *efx = rx_queue->efx;
+       struct ef4_rx_buffer *rx_buf;
+       struct page *page;
+       unsigned int page_offset;
+       struct ef4_rx_page_state *state;
+       dma_addr_t dma_addr;
+       unsigned index, count;
+
+       count = 0;
+       do {
+               page = ef4_reuse_page(rx_queue);
+               if (page == NULL) {
+                       page = alloc_pages(__GFP_COLD | __GFP_COMP |
+                                          (atomic ? GFP_ATOMIC : GFP_KERNEL),
+                                          efx->rx_buffer_order);
+                       if (unlikely(page == NULL))
+                               return -ENOMEM;
+                       dma_addr =
+                               dma_map_page(&efx->pci_dev->dev, page, 0,
+                                            PAGE_SIZE << efx->rx_buffer_order,
+                                            DMA_FROM_DEVICE);
+                       if (unlikely(dma_mapping_error(&efx->pci_dev->dev,
+                                                      dma_addr))) {
+                               __free_pages(page, efx->rx_buffer_order);
+                               return -EIO;
+                       }
+                       state = page_address(page);
+                       state->dma_addr = dma_addr;
+               } else {
+                       state = page_address(page);
+                       dma_addr = state->dma_addr;
+               }
+
+               dma_addr += sizeof(struct ef4_rx_page_state);
+               page_offset = sizeof(struct ef4_rx_page_state);
+
+               do {
+                       index = rx_queue->added_count & rx_queue->ptr_mask;
+                       rx_buf = ef4_rx_buffer(rx_queue, index);
+                       rx_buf->dma_addr = dma_addr + efx->rx_ip_align;
+                       rx_buf->page = page;
+                       rx_buf->page_offset = page_offset + efx->rx_ip_align;
+                       rx_buf->len = efx->rx_dma_len;
+                       rx_buf->flags = 0;
+                       ++rx_queue->added_count;
+                       get_page(page);
+                       dma_addr += efx->rx_page_buf_step;
+                       page_offset += efx->rx_page_buf_step;
+               } while (page_offset + efx->rx_page_buf_step <= PAGE_SIZE);
+
+               rx_buf->flags = EF4_RX_BUF_LAST_IN_PAGE;
+       } while (++count < efx->rx_pages_per_batch);
+
+       return 0;
+}
+
+/* Unmap a DMA-mapped page.  This function is only called for the final RX
+ * buffer in a page.
+ */
+static void ef4_unmap_rx_buffer(struct ef4_nic *efx,
+                               struct ef4_rx_buffer *rx_buf)
+{
+       struct page *page = rx_buf->page;
+
+       if (page) {
+               struct ef4_rx_page_state *state = page_address(page);
+               dma_unmap_page(&efx->pci_dev->dev,
+                              state->dma_addr,
+                              PAGE_SIZE << efx->rx_buffer_order,
+                              DMA_FROM_DEVICE);
+       }
+}
+
+static void ef4_free_rx_buffers(struct ef4_rx_queue *rx_queue,
+                               struct ef4_rx_buffer *rx_buf,
+                               unsigned int num_bufs)
+{
+       do {
+               if (rx_buf->page) {
+                       put_page(rx_buf->page);
+                       rx_buf->page = NULL;
+               }
+               rx_buf = ef4_rx_buf_next(rx_queue, rx_buf);
+       } while (--num_bufs);
+}
+
+/* Attempt to recycle the page if there is an RX recycle ring; the page can
+ * only be added if this is the final RX buffer, to prevent pages being used in
+ * the descriptor ring and appearing in the recycle ring simultaneously.
+ */
+static void ef4_recycle_rx_page(struct ef4_channel *channel,
+                               struct ef4_rx_buffer *rx_buf)
+{
+       struct page *page = rx_buf->page;
+       struct ef4_rx_queue *rx_queue = ef4_channel_get_rx_queue(channel);
+       struct ef4_nic *efx = rx_queue->efx;
+       unsigned index;
+
+       /* Only recycle the page after processing the final buffer. */
+       if (!(rx_buf->flags & EF4_RX_BUF_LAST_IN_PAGE))
+               return;
+
+       index = rx_queue->page_add & rx_queue->page_ptr_mask;
+       if (rx_queue->page_ring[index] == NULL) {
+               unsigned read_index = rx_queue->page_remove &
+                       rx_queue->page_ptr_mask;
+
+               /* The next slot in the recycle ring is available, but
+                * increment page_remove if the read pointer currently
+                * points here.
+                */
+               if (read_index == index)
+                       ++rx_queue->page_remove;
+               rx_queue->page_ring[index] = page;
+               ++rx_queue->page_add;
+               return;
+       }
+       ++rx_queue->page_recycle_full;
+       ef4_unmap_rx_buffer(efx, rx_buf);
+       put_page(rx_buf->page);
+}
+
+static void ef4_fini_rx_buffer(struct ef4_rx_queue *rx_queue,
+                              struct ef4_rx_buffer *rx_buf)
+{
+       /* Release the page reference we hold for the buffer. */
+       if (rx_buf->page)
+               put_page(rx_buf->page);
+
+       /* If this is the last buffer in a page, unmap and free it. */
+       if (rx_buf->flags & EF4_RX_BUF_LAST_IN_PAGE) {
+               ef4_unmap_rx_buffer(rx_queue->efx, rx_buf);
+               ef4_free_rx_buffers(rx_queue, rx_buf, 1);
+       }
+       rx_buf->page = NULL;
+}
+
+/* Recycle the pages that are used by buffers that have just been received. */
+static void ef4_recycle_rx_pages(struct ef4_channel *channel,
+                                struct ef4_rx_buffer *rx_buf,
+                                unsigned int n_frags)
+{
+       struct ef4_rx_queue *rx_queue = ef4_channel_get_rx_queue(channel);
+
+       do {
+               ef4_recycle_rx_page(channel, rx_buf);
+               rx_buf = ef4_rx_buf_next(rx_queue, rx_buf);
+       } while (--n_frags);
+}
+
+static void ef4_discard_rx_packet(struct ef4_channel *channel,
+                                 struct ef4_rx_buffer *rx_buf,
+                                 unsigned int n_frags)
+{
+       struct ef4_rx_queue *rx_queue = ef4_channel_get_rx_queue(channel);
+
+       ef4_recycle_rx_pages(channel, rx_buf, n_frags);
+
+       ef4_free_rx_buffers(rx_queue, rx_buf, n_frags);
+}
+
+/**
+ * ef4_fast_push_rx_descriptors - push new RX descriptors quickly
+ * @rx_queue:          RX descriptor queue
+ *
+ * This will aim to fill the RX descriptor queue up to
+ * @rx_queue->@max_fill. If there is insufficient atomic
+ * memory to do so, a slow fill will be scheduled.
+ *
+ * The caller must provide serialisation (none is used here). In practise,
+ * this means this function must run from the NAPI handler, or be called
+ * when NAPI is disabled.
+ */
+void ef4_fast_push_rx_descriptors(struct ef4_rx_queue *rx_queue, bool atomic)
+{
+       struct ef4_nic *efx = rx_queue->efx;
+       unsigned int fill_level, batch_size;
+       int space, rc = 0;
+
+       if (!rx_queue->refill_enabled)
+               return;
+
+       /* Calculate current fill level, and exit if we don't need to fill */
+       fill_level = (rx_queue->added_count - rx_queue->removed_count);
+       EF4_BUG_ON_PARANOID(fill_level > rx_queue->efx->rxq_entries);
+       if (fill_level >= rx_queue->fast_fill_trigger)
+               goto out;
+
+       /* Record minimum fill level */
+       if (unlikely(fill_level < rx_queue->min_fill)) {
+               if (fill_level)
+                       rx_queue->min_fill = fill_level;
+       }
+
+       batch_size = efx->rx_pages_per_batch * efx->rx_bufs_per_page;
+       space = rx_queue->max_fill - fill_level;
+       EF4_BUG_ON_PARANOID(space < batch_size);
+
+       netif_vdbg(rx_queue->efx, rx_status, rx_queue->efx->net_dev,
+                  "RX queue %d fast-filling descriptor ring from"
+                  " level %d to level %d\n",
+                  ef4_rx_queue_index(rx_queue), fill_level,
+                  rx_queue->max_fill);
+
+
+       do {
+               rc = ef4_init_rx_buffers(rx_queue, atomic);
+               if (unlikely(rc)) {
+                       /* Ensure that we don't leave the rx queue empty */
+                       if (rx_queue->added_count == rx_queue->removed_count)
+                               ef4_schedule_slow_fill(rx_queue);
+                       goto out;
+               }
+       } while ((space -= batch_size) >= batch_size);
+
+       netif_vdbg(rx_queue->efx, rx_status, rx_queue->efx->net_dev,
+                  "RX queue %d fast-filled descriptor ring "
+                  "to level %d\n", ef4_rx_queue_index(rx_queue),
+                  rx_queue->added_count - rx_queue->removed_count);
+
+ out:
+       if (rx_queue->notified_count != rx_queue->added_count)
+               ef4_nic_notify_rx_desc(rx_queue);
+}
+
+void ef4_rx_slow_fill(unsigned long context)
+{
+       struct ef4_rx_queue *rx_queue = (struct ef4_rx_queue *)context;
+
+       /* Post an event to cause NAPI to run and refill the queue */
+       ef4_nic_generate_fill_event(rx_queue);
+       ++rx_queue->slow_fill_count;
+}
+
+static void ef4_rx_packet__check_len(struct ef4_rx_queue *rx_queue,
+                                    struct ef4_rx_buffer *rx_buf,
+                                    int len)
+{
+       struct ef4_nic *efx = rx_queue->efx;
+       unsigned max_len = rx_buf->len - efx->type->rx_buffer_padding;
+
+       if (likely(len <= max_len))
+               return;
+
+       /* The packet must be discarded, but this is only a fatal error
+        * if the caller indicated it was
+        */
+       rx_buf->flags |= EF4_RX_PKT_DISCARD;
+
+       if ((len > rx_buf->len) && EF4_WORKAROUND_8071(efx)) {
+               if (net_ratelimit())
+                       netif_err(efx, rx_err, efx->net_dev,
+                                 " RX queue %d seriously overlength "
+                                 "RX event (0x%x > 0x%x+0x%x). Leaking\n",
+                                 ef4_rx_queue_index(rx_queue), len, max_len,
+                                 efx->type->rx_buffer_padding);
+               ef4_schedule_reset(efx, RESET_TYPE_RX_RECOVERY);
+       } else {
+               if (net_ratelimit())
+                       netif_err(efx, rx_err, efx->net_dev,
+                                 " RX queue %d overlength RX event "
+                                 "(0x%x > 0x%x)\n",
+                                 ef4_rx_queue_index(rx_queue), len, max_len);
+       }
+
+       ef4_rx_queue_channel(rx_queue)->n_rx_overlength++;
+}
+
+/* Pass a received packet up through GRO.  GRO can handle pages
+ * regardless of checksum state and skbs with a good checksum.
+ */
+static void
+ef4_rx_packet_gro(struct ef4_channel *channel, struct ef4_rx_buffer *rx_buf,
+                 unsigned int n_frags, u8 *eh)
+{
+       struct napi_struct *napi = &channel->napi_str;
+       gro_result_t gro_result;
+       struct ef4_nic *efx = channel->efx;
+       struct sk_buff *skb;
+
+       skb = napi_get_frags(napi);
+       if (unlikely(!skb)) {
+               struct ef4_rx_queue *rx_queue;
+
+               rx_queue = ef4_channel_get_rx_queue(channel);
+               ef4_free_rx_buffers(rx_queue, rx_buf, n_frags);
+               return;
+       }
+
+       if (efx->net_dev->features & NETIF_F_RXHASH)
+               skb_set_hash(skb, ef4_rx_buf_hash(efx, eh),
+                            PKT_HASH_TYPE_L3);
+       skb->ip_summed = ((rx_buf->flags & EF4_RX_PKT_CSUMMED) ?
+                         CHECKSUM_UNNECESSARY : CHECKSUM_NONE);
+
+       for (;;) {
+               skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
+                                  rx_buf->page, rx_buf->page_offset,
+                                  rx_buf->len);
+               rx_buf->page = NULL;
+               skb->len += rx_buf->len;
+               if (skb_shinfo(skb)->nr_frags == n_frags)
+                       break;
+
+               rx_buf = ef4_rx_buf_next(&channel->rx_queue, rx_buf);
+       }
+
+       skb->data_len = skb->len;
+       skb->truesize += n_frags * efx->rx_buffer_truesize;
+
+       skb_record_rx_queue(skb, channel->rx_queue.core_index);
+
+       gro_result = napi_gro_frags(napi);
+       if (gro_result != GRO_DROP)
+               channel->irq_mod_score += 2;
+}
+
+/* Allocate and construct an SKB around page fragments */
+static struct sk_buff *ef4_rx_mk_skb(struct ef4_channel *channel,
+                                    struct ef4_rx_buffer *rx_buf,
+                                    unsigned int n_frags,
+                                    u8 *eh, int hdr_len)
+{
+       struct ef4_nic *efx = channel->efx;
+       struct sk_buff *skb;
+
+       /* Allocate an SKB to store the headers */
+       skb = netdev_alloc_skb(efx->net_dev,
+                              efx->rx_ip_align + efx->rx_prefix_size +
+                              hdr_len);
+       if (unlikely(skb == NULL)) {
+               atomic_inc(&efx->n_rx_noskb_drops);
+               return NULL;
+       }
+
+       EF4_BUG_ON_PARANOID(rx_buf->len < hdr_len);
+
+       memcpy(skb->data + efx->rx_ip_align, eh - efx->rx_prefix_size,
+              efx->rx_prefix_size + hdr_len);
+       skb_reserve(skb, efx->rx_ip_align + efx->rx_prefix_size);
+       __skb_put(skb, hdr_len);
+
+       /* Append the remaining page(s) onto the frag list */
+       if (rx_buf->len > hdr_len) {
+               rx_buf->page_offset += hdr_len;
+               rx_buf->len -= hdr_len;
+
+               for (;;) {
+                       skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
+                                          rx_buf->page, rx_buf->page_offset,
+                                          rx_buf->len);
+                       rx_buf->page = NULL;
+                       skb->len += rx_buf->len;
+                       skb->data_len += rx_buf->len;
+                       if (skb_shinfo(skb)->nr_frags == n_frags)
+                               break;
+
+                       rx_buf = ef4_rx_buf_next(&channel->rx_queue, rx_buf);
+               }
+       } else {
+               __free_pages(rx_buf->page, efx->rx_buffer_order);
+               rx_buf->page = NULL;
+               n_frags = 0;
+       }
+
+       skb->truesize += n_frags * efx->rx_buffer_truesize;
+
+       /* Move past the ethernet header */
+       skb->protocol = eth_type_trans(skb, efx->net_dev);
+
+       skb_mark_napi_id(skb, &channel->napi_str);
+
+       return skb;
+}
+
+void ef4_rx_packet(struct ef4_rx_queue *rx_queue, unsigned int index,
+                  unsigned int n_frags, unsigned int len, u16 flags)
+{
+       struct ef4_nic *efx = rx_queue->efx;
+       struct ef4_channel *channel = ef4_rx_queue_channel(rx_queue);
+       struct ef4_rx_buffer *rx_buf;
+
+       rx_queue->rx_packets++;
+
+       rx_buf = ef4_rx_buffer(rx_queue, index);
+       rx_buf->flags |= flags;
+
+       /* Validate the number of fragments and completed length */
+       if (n_frags == 1) {
+               if (!(flags & EF4_RX_PKT_PREFIX_LEN))
+                       ef4_rx_packet__check_len(rx_queue, rx_buf, len);
+       } else if (unlikely(n_frags > EF4_RX_MAX_FRAGS) ||
+                  unlikely(len <= (n_frags - 1) * efx->rx_dma_len) ||
+                  unlikely(len > n_frags * efx->rx_dma_len) ||
+                  unlikely(!efx->rx_scatter)) {
+               /* If this isn't an explicit discard request, either
+                * the hardware or the driver is broken.
+                */
+               WARN_ON(!(len == 0 && rx_buf->flags & EF4_RX_PKT_DISCARD));
+               rx_buf->flags |= EF4_RX_PKT_DISCARD;
+       }
+
+       netif_vdbg(efx, rx_status, efx->net_dev,
+                  "RX queue %d received ids %x-%x len %d %s%s\n",
+                  ef4_rx_queue_index(rx_queue), index,
+                  (index + n_frags - 1) & rx_queue->ptr_mask, len,
+                  (rx_buf->flags & EF4_RX_PKT_CSUMMED) ? " [SUMMED]" : "",
+                  (rx_buf->flags & EF4_RX_PKT_DISCARD) ? " [DISCARD]" : "");
+
+       /* Discard packet, if instructed to do so.  Process the
+        * previous receive first.
+        */
+       if (unlikely(rx_buf->flags & EF4_RX_PKT_DISCARD)) {
+               ef4_rx_flush_packet(channel);
+               ef4_discard_rx_packet(channel, rx_buf, n_frags);
+               return;
+       }
+
+       if (n_frags == 1 && !(flags & EF4_RX_PKT_PREFIX_LEN))
+               rx_buf->len = len;
+
+       /* Release and/or sync the DMA mapping - assumes all RX buffers
+        * consumed in-order per RX queue.
+        */
+       ef4_sync_rx_buffer(efx, rx_buf, rx_buf->len);
+
+       /* Prefetch nice and early so data will (hopefully) be in cache by
+        * the time we look at it.
+        */
+       prefetch(ef4_rx_buf_va(rx_buf));
+
+       rx_buf->page_offset += efx->rx_prefix_size;
+       rx_buf->len -= efx->rx_prefix_size;
+
+       if (n_frags > 1) {
+               /* Release/sync DMA mapping for additional fragments.
+                * Fix length for last fragment.
+                */
+               unsigned int tail_frags = n_frags - 1;
+
+               for (;;) {
+                       rx_buf = ef4_rx_buf_next(rx_queue, rx_buf);
+                       if (--tail_frags == 0)
+                               break;
+                       ef4_sync_rx_buffer(efx, rx_buf, efx->rx_dma_len);
+               }
+               rx_buf->len = len - (n_frags - 1) * efx->rx_dma_len;
+               ef4_sync_rx_buffer(efx, rx_buf, rx_buf->len);
+       }
+
+       /* All fragments have been DMA-synced, so recycle pages. */
+       rx_buf = ef4_rx_buffer(rx_queue, index);
+       ef4_recycle_rx_pages(channel, rx_buf, n_frags);
+
+       /* Pipeline receives so that we give time for packet headers to be
+        * prefetched into cache.
+        */
+       ef4_rx_flush_packet(channel);
+       channel->rx_pkt_n_frags = n_frags;
+       channel->rx_pkt_index = index;
+}
+
+static void ef4_rx_deliver(struct ef4_channel *channel, u8 *eh,
+                          struct ef4_rx_buffer *rx_buf,
+                          unsigned int n_frags)
+{
+       struct sk_buff *skb;
+       u16 hdr_len = min_t(u16, rx_buf->len, EF4_SKB_HEADERS);
+
+       skb = ef4_rx_mk_skb(channel, rx_buf, n_frags, eh, hdr_len);
+       if (unlikely(skb == NULL)) {
+               struct ef4_rx_queue *rx_queue;
+
+               rx_queue = ef4_channel_get_rx_queue(channel);
+               ef4_free_rx_buffers(rx_queue, rx_buf, n_frags);
+               return;
+       }
+       skb_record_rx_queue(skb, channel->rx_queue.core_index);
+
+       /* Set the SKB flags */
+       skb_checksum_none_assert(skb);
+       if (likely(rx_buf->flags & EF4_RX_PKT_CSUMMED))
+               skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+       if (channel->type->receive_skb)
+               if (channel->type->receive_skb(channel, skb))
+                       return;
+
+       /* Pass the packet up */
+       netif_receive_skb(skb);
+}
+
+/* Handle a received packet.  Second half: Touches packet payload. */
+void __ef4_rx_packet(struct ef4_channel *channel)
+{
+       struct ef4_nic *efx = channel->efx;
+       struct ef4_rx_buffer *rx_buf =
+               ef4_rx_buffer(&channel->rx_queue, channel->rx_pkt_index);
+       u8 *eh = ef4_rx_buf_va(rx_buf);
+
+       /* Read length from the prefix if necessary.  This already
+        * excludes the length of the prefix itself.
+        */
+       if (rx_buf->flags & EF4_RX_PKT_PREFIX_LEN)
+               rx_buf->len = le16_to_cpup((__le16 *)
+                                          (eh + efx->rx_packet_len_offset));
+
+       /* If we're in loopback test, then pass the packet directly to the
+        * loopback layer, and free the rx_buf here
+        */
+       if (unlikely(efx->loopback_selftest)) {
+               struct ef4_rx_queue *rx_queue;
+
+               ef4_loopback_rx_packet(efx, eh, rx_buf->len);
+               rx_queue = ef4_channel_get_rx_queue(channel);
+               ef4_free_rx_buffers(rx_queue, rx_buf,
+                                   channel->rx_pkt_n_frags);
+               goto out;
+       }
+
+       if (unlikely(!(efx->net_dev->features & NETIF_F_RXCSUM)))
+               rx_buf->flags &= ~EF4_RX_PKT_CSUMMED;
+
+       if ((rx_buf->flags & EF4_RX_PKT_TCP) && !channel->type->receive_skb &&
+           !ef4_channel_busy_polling(channel))
+               ef4_rx_packet_gro(channel, rx_buf, channel->rx_pkt_n_frags, eh);
+       else
+               ef4_rx_deliver(channel, eh, rx_buf, channel->rx_pkt_n_frags);
+out:
+       channel->rx_pkt_n_frags = 0;
+}
+
+int ef4_probe_rx_queue(struct ef4_rx_queue *rx_queue)
+{
+       struct ef4_nic *efx = rx_queue->efx;
+       unsigned int entries;
+       int rc;
+
+       /* Create the smallest power-of-two aligned ring */
+       entries = max(roundup_pow_of_two(efx->rxq_entries), EF4_MIN_DMAQ_SIZE);
+       EF4_BUG_ON_PARANOID(entries > EF4_MAX_DMAQ_SIZE);
+       rx_queue->ptr_mask = entries - 1;
+
+       netif_dbg(efx, probe, efx->net_dev,
+                 "creating RX queue %d size %#x mask %#x\n",
+                 ef4_rx_queue_index(rx_queue), efx->rxq_entries,
+                 rx_queue->ptr_mask);
+
+       /* Allocate RX buffers */
+       rx_queue->buffer = kcalloc(entries, sizeof(*rx_queue->buffer),
+                                  GFP_KERNEL);
+       if (!rx_queue->buffer)
+               return -ENOMEM;
+
+       rc = ef4_nic_probe_rx(rx_queue);
+       if (rc) {
+               kfree(rx_queue->buffer);
+               rx_queue->buffer = NULL;
+       }
+
+       return rc;
+}
+
+static void ef4_init_rx_recycle_ring(struct ef4_nic *efx,
+                                    struct ef4_rx_queue *rx_queue)
+{
+       unsigned int bufs_in_recycle_ring, page_ring_size;
+
+       /* Set the RX recycle ring size */
+#ifdef CONFIG_PPC64
+       bufs_in_recycle_ring = EF4_RECYCLE_RING_SIZE_IOMMU;
+#else
+       if (iommu_present(&pci_bus_type))
+               bufs_in_recycle_ring = EF4_RECYCLE_RING_SIZE_IOMMU;
+       else
+               bufs_in_recycle_ring = EF4_RECYCLE_RING_SIZE_NOIOMMU;
+#endif /* CONFIG_PPC64 */
+
+       page_ring_size = roundup_pow_of_two(bufs_in_recycle_ring /
+                                           efx->rx_bufs_per_page);
+       rx_queue->page_ring = kcalloc(page_ring_size,
+                                     sizeof(*rx_queue->page_ring), GFP_KERNEL);
+       rx_queue->page_ptr_mask = page_ring_size - 1;
+}
+
+void ef4_init_rx_queue(struct ef4_rx_queue *rx_queue)
+{
+       struct ef4_nic *efx = rx_queue->efx;
+       unsigned int max_fill, trigger, max_trigger;
+
+       netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev,
+                 "initialising RX queue %d\n", ef4_rx_queue_index(rx_queue));
+
+       /* Initialise ptr fields */
+       rx_queue->added_count = 0;
+       rx_queue->notified_count = 0;
+       rx_queue->removed_count = 0;
+       rx_queue->min_fill = -1U;
+       ef4_init_rx_recycle_ring(efx, rx_queue);
+
+       rx_queue->page_remove = 0;
+       rx_queue->page_add = rx_queue->page_ptr_mask + 1;
+       rx_queue->page_recycle_count = 0;
+       rx_queue->page_recycle_failed = 0;
+       rx_queue->page_recycle_full = 0;
+
+       /* Initialise limit fields */
+       max_fill = efx->rxq_entries - EF4_RXD_HEAD_ROOM;
+       max_trigger =
+               max_fill - efx->rx_pages_per_batch * efx->rx_bufs_per_page;
+       if (rx_refill_threshold != 0) {
+               trigger = max_fill * min(rx_refill_threshold, 100U) / 100U;
+               if (trigger > max_trigger)
+                       trigger = max_trigger;
+       } else {
+               trigger = max_trigger;
+       }
+
+       rx_queue->max_fill = max_fill;
+       rx_queue->fast_fill_trigger = trigger;
+       rx_queue->refill_enabled = true;
+
+       /* Set up RX descriptor ring */
+       ef4_nic_init_rx(rx_queue);
+}
+
+void ef4_fini_rx_queue(struct ef4_rx_queue *rx_queue)
+{
+       int i;
+       struct ef4_nic *efx = rx_queue->efx;
+       struct ef4_rx_buffer *rx_buf;
+
+       netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev,
+                 "shutting down RX queue %d\n", ef4_rx_queue_index(rx_queue));
+
+       del_timer_sync(&rx_queue->slow_fill);
+
+       /* Release RX buffers from the current read ptr to the write ptr */
+       if (rx_queue->buffer) {
+               for (i = rx_queue->removed_count; i < rx_queue->added_count;
+                    i++) {
+                       unsigned index = i & rx_queue->ptr_mask;
+                       rx_buf = ef4_rx_buffer(rx_queue, index);
+                       ef4_fini_rx_buffer(rx_queue, rx_buf);
+               }
+       }
+
+       /* Unmap and release the pages in the recycle ring. Remove the ring. */
+       for (i = 0; i <= rx_queue->page_ptr_mask; i++) {
+               struct page *page = rx_queue->page_ring[i];
+               struct ef4_rx_page_state *state;
+
+               if (page == NULL)
+                       continue;
+
+               state = page_address(page);
+               dma_unmap_page(&efx->pci_dev->dev, state->dma_addr,
+                              PAGE_SIZE << efx->rx_buffer_order,
+                              DMA_FROM_DEVICE);
+               put_page(page);
+       }
+       kfree(rx_queue->page_ring);
+       rx_queue->page_ring = NULL;
+}
+
+void ef4_remove_rx_queue(struct ef4_rx_queue *rx_queue)
+{
+       netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev,
+                 "destroying RX queue %d\n", ef4_rx_queue_index(rx_queue));
+
+       ef4_nic_remove_rx(rx_queue);
+
+       kfree(rx_queue->buffer);
+       rx_queue->buffer = NULL;
+}
+
+
+module_param(rx_refill_threshold, uint, 0444);
+MODULE_PARM_DESC(rx_refill_threshold,
+                "RX descriptor ring refill threshold (%)");
+
+#ifdef CONFIG_RFS_ACCEL
+
+int ef4_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb,
+                  u16 rxq_index, u32 flow_id)
+{
+       struct ef4_nic *efx = netdev_priv(net_dev);
+       struct ef4_channel *channel;
+       struct ef4_filter_spec spec;
+       struct flow_keys fk;
+       int rc;
+
+       if (flow_id == RPS_FLOW_ID_INVALID)
+               return -EINVAL;
+
+       if (!skb_flow_dissect_flow_keys(skb, &fk, 0))
+               return -EPROTONOSUPPORT;
+
+       if (fk.basic.n_proto != htons(ETH_P_IP) && fk.basic.n_proto != htons(ETH_P_IPV6))
+               return -EPROTONOSUPPORT;
+       if (fk.control.flags & FLOW_DIS_IS_FRAGMENT)
+               return -EPROTONOSUPPORT;
+
+       ef4_filter_init_rx(&spec, EF4_FILTER_PRI_HINT,
+                          efx->rx_scatter ? EF4_FILTER_FLAG_RX_SCATTER : 0,
+                          rxq_index);
+       spec.match_flags =
+               EF4_FILTER_MATCH_ETHER_TYPE | EF4_FILTER_MATCH_IP_PROTO |
+               EF4_FILTER_MATCH_LOC_HOST | EF4_FILTER_MATCH_LOC_PORT |
+               EF4_FILTER_MATCH_REM_HOST | EF4_FILTER_MATCH_REM_PORT;
+       spec.ether_type = fk.basic.n_proto;
+       spec.ip_proto = fk.basic.ip_proto;
+
+       if (fk.basic.n_proto == htons(ETH_P_IP)) {
+               spec.rem_host[0] = fk.addrs.v4addrs.src;
+               spec.loc_host[0] = fk.addrs.v4addrs.dst;
+       } else {
+               memcpy(spec.rem_host, &fk.addrs.v6addrs.src, sizeof(struct in6_addr));
+               memcpy(spec.loc_host, &fk.addrs.v6addrs.dst, sizeof(struct in6_addr));
+       }
+
+       spec.rem_port = fk.ports.src;
+       spec.loc_port = fk.ports.dst;
+
+       rc = efx->type->filter_rfs_insert(efx, &spec);
+       if (rc < 0)
+               return rc;
+
+       /* Remember this so we can check whether to expire the filter later */
+       channel = ef4_get_channel(efx, rxq_index);
+       channel->rps_flow_id[rc] = flow_id;
+       ++channel->rfs_filters_added;
+
+       if (spec.ether_type == htons(ETH_P_IP))
+               netif_info(efx, rx_status, efx->net_dev,
+                          "steering %s %pI4:%u:%pI4:%u to queue %u [flow %u filter %d]\n",
+                          (spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
+                          spec.rem_host, ntohs(spec.rem_port), spec.loc_host,
+                          ntohs(spec.loc_port), rxq_index, flow_id, rc);
+       else
+               netif_info(efx, rx_status, efx->net_dev,
+                          "steering %s [%pI6]:%u:[%pI6]:%u to queue %u [flow %u filter %d]\n",
+                          (spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
+                          spec.rem_host, ntohs(spec.rem_port), spec.loc_host,
+                          ntohs(spec.loc_port), rxq_index, flow_id, rc);
+
+       return rc;
+}
+
+bool __ef4_filter_rfs_expire(struct ef4_nic *efx, unsigned int quota)
+{
+       bool (*expire_one)(struct ef4_nic *efx, u32 flow_id, unsigned int index);
+       unsigned int channel_idx, index, size;
+       u32 flow_id;
+
+       if (!spin_trylock_bh(&efx->filter_lock))
+               return false;
+
+       expire_one = efx->type->filter_rfs_expire_one;
+       channel_idx = efx->rps_expire_channel;
+       index = efx->rps_expire_index;
+       size = efx->type->max_rx_ip_filters;
+       while (quota--) {
+               struct ef4_channel *channel = ef4_get_channel(efx, channel_idx);
+               flow_id = channel->rps_flow_id[index];
+
+               if (flow_id != RPS_FLOW_ID_INVALID &&
+                   expire_one(efx, flow_id, index)) {
+                       netif_info(efx, rx_status, efx->net_dev,
+                                  "expired filter %d [queue %u flow %u]\n",
+                                  index, channel_idx, flow_id);
+                       channel->rps_flow_id[index] = RPS_FLOW_ID_INVALID;
+               }
+               if (++index == size) {
+                       if (++channel_idx == efx->n_channels)
+                               channel_idx = 0;
+                       index = 0;
+               }
+       }
+       efx->rps_expire_channel = channel_idx;
+       efx->rps_expire_index = index;
+
+       spin_unlock_bh(&efx->filter_lock);
+       return true;
+}
+
+#endif /* CONFIG_RFS_ACCEL */
+
+/**
+ * ef4_filter_is_mc_recipient - test whether spec is a multicast recipient
+ * @spec: Specification to test
+ *
+ * Return: %true if the specification is a non-drop RX filter that
+ * matches a local MAC address I/G bit value of 1 or matches a local
+ * IPv4 or IPv6 address value in the respective multicast address
+ * range.  Otherwise %false.
+ */
+bool ef4_filter_is_mc_recipient(const struct ef4_filter_spec *spec)
+{
+       if (!(spec->flags & EF4_FILTER_FLAG_RX) ||
+           spec->dmaq_id == EF4_FILTER_RX_DMAQ_ID_DROP)
+               return false;
+
+       if (spec->match_flags &
+           (EF4_FILTER_MATCH_LOC_MAC | EF4_FILTER_MATCH_LOC_MAC_IG) &&
+           is_multicast_ether_addr(spec->loc_mac))
+               return true;
+
+       if ((spec->match_flags &
+            (EF4_FILTER_MATCH_ETHER_TYPE | EF4_FILTER_MATCH_LOC_HOST)) ==
+           (EF4_FILTER_MATCH_ETHER_TYPE | EF4_FILTER_MATCH_LOC_HOST)) {
+               if (spec->ether_type == htons(ETH_P_IP) &&
+                   ipv4_is_multicast(spec->loc_host[0]))
+                       return true;
+               if (spec->ether_type == htons(ETH_P_IPV6) &&
+                   ((const u8 *)spec->loc_host)[0] == 0xff)
+                       return true;
+       }
+
+       return false;
+}
diff --git a/drivers/net/ethernet/sfc/falcon/selftest.c b/drivers/net/ethernet/sfc/falcon/selftest.c
new file mode 100644 (file)
index 0000000..92bc34c
--- /dev/null
@@ -0,0 +1,808 @@
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2005-2006 Fen Systems Ltd.
+ * Copyright 2006-2012 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#include <linux/netdevice.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/kernel_stat.h>
+#include <linux/pci.h>
+#include <linux/ethtool.h>
+#include <linux/ip.h>
+#include <linux/in.h>
+#include <linux/udp.h>
+#include <linux/rtnetlink.h>
+#include <linux/slab.h>
+#include "net_driver.h"
+#include "efx.h"
+#include "nic.h"
+#include "selftest.h"
+#include "workarounds.h"
+
+/* IRQ latency can be enormous because:
+ * - All IRQs may be disabled on a CPU for a *long* time by e.g. a
+ *   slow serial console or an old IDE driver doing error recovery
+ * - The PREEMPT_RT patches mostly deal with this, but also allow a
+ *   tasklet or normal task to be given higher priority than our IRQ
+ *   threads
+ * Try to avoid blaming the hardware for this.
+ */
+#define IRQ_TIMEOUT HZ
+
+/*
+ * Loopback test packet structure
+ *
+ * The self-test should stress every RSS vector, and unfortunately
+ * Falcon only performs RSS on TCP/UDP packets.
+ */
+struct ef4_loopback_payload {
+       struct ethhdr header;
+       struct iphdr ip;
+       struct udphdr udp;
+       __be16 iteration;
+       char msg[64];
+} __packed;
+
+/* Loopback test source MAC address */
+static const u8 payload_source[ETH_ALEN] __aligned(2) = {
+       0x00, 0x0f, 0x53, 0x1b, 0x1b, 0x1b,
+};
+
+static const char payload_msg[] =
+       "Hello world! This is an Efx loopback test in progress!";
+
+/* Interrupt mode names */
+static const unsigned int ef4_interrupt_mode_max = EF4_INT_MODE_MAX;
+static const char *const ef4_interrupt_mode_names[] = {
+       [EF4_INT_MODE_MSIX]   = "MSI-X",
+       [EF4_INT_MODE_MSI]    = "MSI",
+       [EF4_INT_MODE_LEGACY] = "legacy",
+};
+#define INT_MODE(efx) \
+       STRING_TABLE_LOOKUP(efx->interrupt_mode, ef4_interrupt_mode)
+
+/**
+ * ef4_loopback_state - persistent state during a loopback selftest
+ * @flush:             Drop all packets in ef4_loopback_rx_packet
+ * @packet_count:      Number of packets being used in this test
+ * @skbs:              An array of skbs transmitted
+ * @offload_csum:      Checksums are being offloaded
+ * @rx_good:           RX good packet count
+ * @rx_bad:            RX bad packet count
+ * @payload:           Payload used in tests
+ */
+struct ef4_loopback_state {
+       bool flush;
+       int packet_count;
+       struct sk_buff **skbs;
+       bool offload_csum;
+       atomic_t rx_good;
+       atomic_t rx_bad;
+       struct ef4_loopback_payload payload;
+};
+
+/* How long to wait for all the packets to arrive (in ms) */
+#define LOOPBACK_TIMEOUT_MS 1000
+
+/**************************************************************************
+ *
+ * MII, NVRAM and register tests
+ *
+ **************************************************************************/
+
+static int ef4_test_phy_alive(struct ef4_nic *efx, struct ef4_self_tests *tests)
+{
+       int rc = 0;
+
+       if (efx->phy_op->test_alive) {
+               rc = efx->phy_op->test_alive(efx);
+               tests->phy_alive = rc ? -1 : 1;
+       }
+
+       return rc;
+}
+
+static int ef4_test_nvram(struct ef4_nic *efx, struct ef4_self_tests *tests)
+{
+       int rc = 0;
+
+       if (efx->type->test_nvram) {
+               rc = efx->type->test_nvram(efx);
+               if (rc == -EPERM)
+                       rc = 0;
+               else
+                       tests->nvram = rc ? -1 : 1;
+       }
+
+       return rc;
+}
+
+/**************************************************************************
+ *
+ * Interrupt and event queue testing
+ *
+ **************************************************************************/
+
+/* Test generation and receipt of interrupts */
+static int ef4_test_interrupts(struct ef4_nic *efx,
+                              struct ef4_self_tests *tests)
+{
+       unsigned long timeout, wait;
+       int cpu;
+       int rc;
+
+       netif_dbg(efx, drv, efx->net_dev, "testing interrupts\n");
+       tests->interrupt = -1;
+
+       rc = ef4_nic_irq_test_start(efx);
+       if (rc == -ENOTSUPP) {
+               netif_dbg(efx, drv, efx->net_dev,
+                         "direct interrupt testing not supported\n");
+               tests->interrupt = 0;
+               return 0;
+       }
+
+       timeout = jiffies + IRQ_TIMEOUT;
+       wait = 1;
+
+       /* Wait for arrival of test interrupt. */
+       netif_dbg(efx, drv, efx->net_dev, "waiting for test interrupt\n");
+       do {
+               schedule_timeout_uninterruptible(wait);
+               cpu = ef4_nic_irq_test_irq_cpu(efx);
+               if (cpu >= 0)
+                       goto success;
+               wait *= 2;
+       } while (time_before(jiffies, timeout));
+
+       netif_err(efx, drv, efx->net_dev, "timed out waiting for interrupt\n");
+       return -ETIMEDOUT;
+
+ success:
+       netif_dbg(efx, drv, efx->net_dev, "%s test interrupt seen on CPU%d\n",
+                 INT_MODE(efx), cpu);
+       tests->interrupt = 1;
+       return 0;
+}
+
+/* Test generation and receipt of interrupting events */
+static int ef4_test_eventq_irq(struct ef4_nic *efx,
+                              struct ef4_self_tests *tests)
+{
+       struct ef4_channel *channel;
+       unsigned int read_ptr[EF4_MAX_CHANNELS];
+       unsigned long napi_ran = 0, dma_pend = 0, int_pend = 0;
+       unsigned long timeout, wait;
+
+       BUILD_BUG_ON(EF4_MAX_CHANNELS > BITS_PER_LONG);
+
+       ef4_for_each_channel(channel, efx) {
+               read_ptr[channel->channel] = channel->eventq_read_ptr;
+               set_bit(channel->channel, &dma_pend);
+               set_bit(channel->channel, &int_pend);
+               ef4_nic_event_test_start(channel);
+       }
+
+       timeout = jiffies + IRQ_TIMEOUT;
+       wait = 1;
+
+       /* Wait for arrival of interrupts.  NAPI processing may or may
+        * not complete in time, but we can cope in any case.
+        */
+       do {
+               schedule_timeout_uninterruptible(wait);
+
+               ef4_for_each_channel(channel, efx) {
+                       ef4_stop_eventq(channel);
+                       if (channel->eventq_read_ptr !=
+                           read_ptr[channel->channel]) {
+                               set_bit(channel->channel, &napi_ran);
+                               clear_bit(channel->channel, &dma_pend);
+                               clear_bit(channel->channel, &int_pend);
+                       } else {
+                               if (ef4_nic_event_present(channel))
+                                       clear_bit(channel->channel, &dma_pend);
+                               if (ef4_nic_event_test_irq_cpu(channel) >= 0)
+                                       clear_bit(channel->channel, &int_pend);
+                       }
+                       ef4_start_eventq(channel);
+               }
+
+               wait *= 2;
+       } while ((dma_pend || int_pend) && time_before(jiffies, timeout));
+
+       ef4_for_each_channel(channel, efx) {
+               bool dma_seen = !test_bit(channel->channel, &dma_pend);
+               bool int_seen = !test_bit(channel->channel, &int_pend);
+
+               tests->eventq_dma[channel->channel] = dma_seen ? 1 : -1;
+               tests->eventq_int[channel->channel] = int_seen ? 1 : -1;
+
+               if (dma_seen && int_seen) {
+                       netif_dbg(efx, drv, efx->net_dev,
+                                 "channel %d event queue passed (with%s NAPI)\n",
+                                 channel->channel,
+                                 test_bit(channel->channel, &napi_ran) ?
+                                 "" : "out");
+               } else {
+                       /* Report failure and whether either interrupt or DMA
+                        * worked
+                        */
+                       netif_err(efx, drv, efx->net_dev,
+                                 "channel %d timed out waiting for event queue\n",
+                                 channel->channel);
+                       if (int_seen)
+                               netif_err(efx, drv, efx->net_dev,
+                                         "channel %d saw interrupt "
+                                         "during event queue test\n",
+                                         channel->channel);
+                       if (dma_seen)
+                               netif_err(efx, drv, efx->net_dev,
+                                         "channel %d event was generated, but "
+                                         "failed to trigger an interrupt\n",
+                                         channel->channel);
+               }
+       }
+
+       return (dma_pend || int_pend) ? -ETIMEDOUT : 0;
+}
+
+static int ef4_test_phy(struct ef4_nic *efx, struct ef4_self_tests *tests,
+                       unsigned flags)
+{
+       int rc;
+
+       if (!efx->phy_op->run_tests)
+               return 0;
+
+       mutex_lock(&efx->mac_lock);
+       rc = efx->phy_op->run_tests(efx, tests->phy_ext, flags);
+       mutex_unlock(&efx->mac_lock);
+       if (rc == -EPERM)
+               rc = 0;
+       else
+               netif_info(efx, drv, efx->net_dev,
+                          "%s phy selftest\n", rc ? "Failed" : "Passed");
+
+       return rc;
+}
+
+/**************************************************************************
+ *
+ * Loopback testing
+ * NB Only one loopback test can be executing concurrently.
+ *
+ **************************************************************************/
+
+/* Loopback test RX callback
+ * This is called for each received packet during loopback testing.
+ */
+void ef4_loopback_rx_packet(struct ef4_nic *efx,
+                           const char *buf_ptr, int pkt_len)
+{
+       struct ef4_loopback_state *state = efx->loopback_selftest;
+       struct ef4_loopback_payload *received;
+       struct ef4_loopback_payload *payload;
+
+       BUG_ON(!buf_ptr);
+
+       /* If we are just flushing, then drop the packet */
+       if ((state == NULL) || state->flush)
+               return;
+
+       payload = &state->payload;
+
+       received = (struct ef4_loopback_payload *) buf_ptr;
+       received->ip.saddr = payload->ip.saddr;
+       if (state->offload_csum)
+               received->ip.check = payload->ip.check;
+
+       /* Check that header exists */
+       if (pkt_len < sizeof(received->header)) {
+               netif_err(efx, drv, efx->net_dev,
+                         "saw runt RX packet (length %d) in %s loopback "
+                         "test\n", pkt_len, LOOPBACK_MODE(efx));
+               goto err;
+       }
+
+       /* Check that the ethernet header exists */
+       if (memcmp(&received->header, &payload->header, ETH_HLEN) != 0) {
+               netif_err(efx, drv, efx->net_dev,
+                         "saw non-loopback RX packet in %s loopback test\n",
+                         LOOPBACK_MODE(efx));
+               goto err;
+       }
+
+       /* Check packet length */
+       if (pkt_len != sizeof(*payload)) {
+               netif_err(efx, drv, efx->net_dev,
+                         "saw incorrect RX packet length %d (wanted %d) in "
+                         "%s loopback test\n", pkt_len, (int)sizeof(*payload),
+                         LOOPBACK_MODE(efx));
+               goto err;
+       }
+
+       /* Check that IP header matches */
+       if (memcmp(&received->ip, &payload->ip, sizeof(payload->ip)) != 0) {
+               netif_err(efx, drv, efx->net_dev,
+                         "saw corrupted IP header in %s loopback test\n",
+                         LOOPBACK_MODE(efx));
+               goto err;
+       }
+
+       /* Check that msg and padding matches */
+       if (memcmp(&received->msg, &payload->msg, sizeof(received->msg)) != 0) {
+               netif_err(efx, drv, efx->net_dev,
+                         "saw corrupted RX packet in %s loopback test\n",
+                         LOOPBACK_MODE(efx));
+               goto err;
+       }
+
+       /* Check that iteration matches */
+       if (received->iteration != payload->iteration) {
+               netif_err(efx, drv, efx->net_dev,
+                         "saw RX packet from iteration %d (wanted %d) in "
+                         "%s loopback test\n", ntohs(received->iteration),
+                         ntohs(payload->iteration), LOOPBACK_MODE(efx));
+               goto err;
+       }
+
+       /* Increase correct RX count */
+       netif_vdbg(efx, drv, efx->net_dev,
+                  "got loopback RX in %s loopback test\n", LOOPBACK_MODE(efx));
+
+       atomic_inc(&state->rx_good);
+       return;
+
+ err:
+#ifdef DEBUG
+       if (atomic_read(&state->rx_bad) == 0) {
+               netif_err(efx, drv, efx->net_dev, "received packet:\n");
+               print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 0x10, 1,
+                              buf_ptr, pkt_len, 0);
+               netif_err(efx, drv, efx->net_dev, "expected packet:\n");
+               print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 0x10, 1,
+                              &state->payload, sizeof(state->payload), 0);
+       }
+#endif
+       atomic_inc(&state->rx_bad);
+}
+
+/* Initialise an ef4_selftest_state for a new iteration */
+static void ef4_iterate_state(struct ef4_nic *efx)
+{
+       struct ef4_loopback_state *state = efx->loopback_selftest;
+       struct net_device *net_dev = efx->net_dev;
+       struct ef4_loopback_payload *payload = &state->payload;
+
+       /* Initialise the layerII header */
+       ether_addr_copy((u8 *)&payload->header.h_dest, net_dev->dev_addr);
+       ether_addr_copy((u8 *)&payload->header.h_source, payload_source);
+       payload->header.h_proto = htons(ETH_P_IP);
+
+       /* saddr set later and used as incrementing count */
+       payload->ip.daddr = htonl(INADDR_LOOPBACK);
+       payload->ip.ihl = 5;
+       payload->ip.check = (__force __sum16) htons(0xdead);
+       payload->ip.tot_len = htons(sizeof(*payload) - sizeof(struct ethhdr));
+       payload->ip.version = IPVERSION;
+       payload->ip.protocol = IPPROTO_UDP;
+
+       /* Initialise udp header */
+       payload->udp.source = 0;
+       payload->udp.len = htons(sizeof(*payload) - sizeof(struct ethhdr) -
+                                sizeof(struct iphdr));
+       payload->udp.check = 0; /* checksum ignored */
+
+       /* Fill out payload */
+       payload->iteration = htons(ntohs(payload->iteration) + 1);
+       memcpy(&payload->msg, payload_msg, sizeof(payload_msg));
+
+       /* Fill out remaining state members */
+       atomic_set(&state->rx_good, 0);
+       atomic_set(&state->rx_bad, 0);
+       smp_wmb();
+}
+
+static int ef4_begin_loopback(struct ef4_tx_queue *tx_queue)
+{
+       struct ef4_nic *efx = tx_queue->efx;
+       struct ef4_loopback_state *state = efx->loopback_selftest;
+       struct ef4_loopback_payload *payload;
+       struct sk_buff *skb;
+       int i;
+       netdev_tx_t rc;
+
+       /* Transmit N copies of buffer */
+       for (i = 0; i < state->packet_count; i++) {
+               /* Allocate an skb, holding an extra reference for
+                * transmit completion counting */
+               skb = alloc_skb(sizeof(state->payload), GFP_KERNEL);
+               if (!skb)
+                       return -ENOMEM;
+               state->skbs[i] = skb;
+               skb_get(skb);
+
+               /* Copy the payload in, incrementing the source address to
+                * exercise the rss vectors */
+               payload = ((struct ef4_loopback_payload *)
+                          skb_put(skb, sizeof(state->payload)));
+               memcpy(payload, &state->payload, sizeof(state->payload));
+               payload->ip.saddr = htonl(INADDR_LOOPBACK | (i << 2));
+
+               /* Ensure everything we've written is visible to the
+                * interrupt handler. */
+               smp_wmb();
+
+               netif_tx_lock_bh(efx->net_dev);
+               rc = ef4_enqueue_skb(tx_queue, skb);
+               netif_tx_unlock_bh(efx->net_dev);
+
+               if (rc != NETDEV_TX_OK) {
+                       netif_err(efx, drv, efx->net_dev,
+                                 "TX queue %d could not transmit packet %d of "
+                                 "%d in %s loopback test\n", tx_queue->queue,
+                                 i + 1, state->packet_count,
+                                 LOOPBACK_MODE(efx));
+
+                       /* Defer cleaning up the other skbs for the caller */
+                       kfree_skb(skb);
+                       return -EPIPE;
+               }
+       }
+
+       return 0;
+}
+
+static int ef4_poll_loopback(struct ef4_nic *efx)
+{
+       struct ef4_loopback_state *state = efx->loopback_selftest;
+
+       return atomic_read(&state->rx_good) == state->packet_count;
+}
+
+static int ef4_end_loopback(struct ef4_tx_queue *tx_queue,
+                           struct ef4_loopback_self_tests *lb_tests)
+{
+       struct ef4_nic *efx = tx_queue->efx;
+       struct ef4_loopback_state *state = efx->loopback_selftest;
+       struct sk_buff *skb;
+       int tx_done = 0, rx_good, rx_bad;
+       int i, rc = 0;
+
+       netif_tx_lock_bh(efx->net_dev);
+
+       /* Count the number of tx completions, and decrement the refcnt. Any
+        * skbs not already completed will be free'd when the queue is flushed */
+       for (i = 0; i < state->packet_count; i++) {
+               skb = state->skbs[i];
+               if (skb && !skb_shared(skb))
+                       ++tx_done;
+               dev_kfree_skb(skb);
+       }
+
+       netif_tx_unlock_bh(efx->net_dev);
+
+       /* Check TX completion and received packet counts */
+       rx_good = atomic_read(&state->rx_good);
+       rx_bad = atomic_read(&state->rx_bad);
+       if (tx_done != state->packet_count) {
+               /* Don't free the skbs; they will be picked up on TX
+                * overflow or channel teardown.
+                */
+               netif_err(efx, drv, efx->net_dev,
+                         "TX queue %d saw only %d out of an expected %d "
+                         "TX completion events in %s loopback test\n",
+                         tx_queue->queue, tx_done, state->packet_count,
+                         LOOPBACK_MODE(efx));
+               rc = -ETIMEDOUT;
+               /* Allow to fall through so we see the RX errors as well */
+       }
+
+       /* We may always be up to a flush away from our desired packet total */
+       if (rx_good != state->packet_count) {
+               netif_dbg(efx, drv, efx->net_dev,
+                         "TX queue %d saw only %d out of an expected %d "
+                         "received packets in %s loopback test\n",
+                         tx_queue->queue, rx_good, state->packet_count,
+                         LOOPBACK_MODE(efx));
+               rc = -ETIMEDOUT;
+               /* Fall through */
+       }
+
+       /* Update loopback test structure */
+       lb_tests->tx_sent[tx_queue->queue] += state->packet_count;
+       lb_tests->tx_done[tx_queue->queue] += tx_done;
+       lb_tests->rx_good += rx_good;
+       lb_tests->rx_bad += rx_bad;
+
+       return rc;
+}
+
+static int
+ef4_test_loopback(struct ef4_tx_queue *tx_queue,
+                 struct ef4_loopback_self_tests *lb_tests)
+{
+       struct ef4_nic *efx = tx_queue->efx;
+       struct ef4_loopback_state *state = efx->loopback_selftest;
+       int i, begin_rc, end_rc;
+
+       for (i = 0; i < 3; i++) {
+               /* Determine how many packets to send */
+               state->packet_count = efx->txq_entries / 3;
+               state->packet_count = min(1 << (i << 2), state->packet_count);
+               state->skbs = kcalloc(state->packet_count,
+                                     sizeof(state->skbs[0]), GFP_KERNEL);
+               if (!state->skbs)
+                       return -ENOMEM;
+               state->flush = false;
+
+               netif_dbg(efx, drv, efx->net_dev,
+                         "TX queue %d testing %s loopback with %d packets\n",
+                         tx_queue->queue, LOOPBACK_MODE(efx),
+                         state->packet_count);
+
+               ef4_iterate_state(efx);
+               begin_rc = ef4_begin_loopback(tx_queue);
+
+               /* This will normally complete very quickly, but be
+                * prepared to wait much longer. */
+               msleep(1);
+               if (!ef4_poll_loopback(efx)) {
+                       msleep(LOOPBACK_TIMEOUT_MS);
+                       ef4_poll_loopback(efx);
+               }
+
+               end_rc = ef4_end_loopback(tx_queue, lb_tests);
+               kfree(state->skbs);
+
+               if (begin_rc || end_rc) {
+                       /* Wait a while to ensure there are no packets
+                        * floating around after a failure. */
+                       schedule_timeout_uninterruptible(HZ / 10);
+                       return begin_rc ? begin_rc : end_rc;
+               }
+       }
+
+       netif_dbg(efx, drv, efx->net_dev,
+                 "TX queue %d passed %s loopback test with a burst length "
+                 "of %d packets\n", tx_queue->queue, LOOPBACK_MODE(efx),
+                 state->packet_count);
+
+       return 0;
+}
+
+/* Wait for link up. On Falcon, we would prefer to rely on ef4_monitor, but
+ * any contention on the mac lock (via e.g. ef4_mac_mcast_work) causes it
+ * to delay and retry. Therefore, it's safer to just poll directly. Wait
+ * for link up and any faults to dissipate. */
+static int ef4_wait_for_link(struct ef4_nic *efx)
+{
+       struct ef4_link_state *link_state = &efx->link_state;
+       int count, link_up_count = 0;
+       bool link_up;
+
+       for (count = 0; count < 40; count++) {
+               schedule_timeout_uninterruptible(HZ / 10);
+
+               if (efx->type->monitor != NULL) {
+                       mutex_lock(&efx->mac_lock);
+                       efx->type->monitor(efx);
+                       mutex_unlock(&efx->mac_lock);
+               }
+
+               mutex_lock(&efx->mac_lock);
+               link_up = link_state->up;
+               if (link_up)
+                       link_up = !efx->type->check_mac_fault(efx);
+               mutex_unlock(&efx->mac_lock);
+
+               if (link_up) {
+                       if (++link_up_count == 2)
+                               return 0;
+               } else {
+                       link_up_count = 0;
+               }
+       }
+
+       return -ETIMEDOUT;
+}
+
+static int ef4_test_loopbacks(struct ef4_nic *efx, struct ef4_self_tests *tests,
+                             unsigned int loopback_modes)
+{
+       enum ef4_loopback_mode mode;
+       struct ef4_loopback_state *state;
+       struct ef4_channel *channel =
+               ef4_get_channel(efx, efx->tx_channel_offset);
+       struct ef4_tx_queue *tx_queue;
+       int rc = 0;
+
+       /* Set the port loopback_selftest member. From this point on
+        * all received packets will be dropped. Mark the state as
+        * "flushing" so all inflight packets are dropped */
+       state = kzalloc(sizeof(*state), GFP_KERNEL);
+       if (state == NULL)
+               return -ENOMEM;
+       BUG_ON(efx->loopback_selftest);
+       state->flush = true;
+       efx->loopback_selftest = state;
+
+       /* Test all supported loopback modes */
+       for (mode = LOOPBACK_NONE; mode <= LOOPBACK_TEST_MAX; mode++) {
+               if (!(loopback_modes & (1 << mode)))
+                       continue;
+
+               /* Move the port into the specified loopback mode. */
+               state->flush = true;
+               mutex_lock(&efx->mac_lock);
+               efx->loopback_mode = mode;
+               rc = __ef4_reconfigure_port(efx);
+               mutex_unlock(&efx->mac_lock);
+               if (rc) {
+                       netif_err(efx, drv, efx->net_dev,
+                                 "unable to move into %s loopback\n",
+                                 LOOPBACK_MODE(efx));
+                       goto out;
+               }
+
+               rc = ef4_wait_for_link(efx);
+               if (rc) {
+                       netif_err(efx, drv, efx->net_dev,
+                                 "loopback %s never came up\n",
+                                 LOOPBACK_MODE(efx));
+                       goto out;
+               }
+
+               /* Test all enabled types of TX queue */
+               ef4_for_each_channel_tx_queue(tx_queue, channel) {
+                       state->offload_csum = (tx_queue->queue &
+                                              EF4_TXQ_TYPE_OFFLOAD);
+                       rc = ef4_test_loopback(tx_queue,
+                                              &tests->loopback[mode]);
+                       if (rc)
+                               goto out;
+               }
+       }
+
+ out:
+       /* Remove the flush. The caller will remove the loopback setting */
+       state->flush = true;
+       efx->loopback_selftest = NULL;
+       wmb();
+       kfree(state);
+
+       if (rc == -EPERM)
+               rc = 0;
+
+       return rc;
+}
+
+/**************************************************************************
+ *
+ * Entry point
+ *
+ *************************************************************************/
+
+int ef4_selftest(struct ef4_nic *efx, struct ef4_self_tests *tests,
+                unsigned flags)
+{
+       enum ef4_loopback_mode loopback_mode = efx->loopback_mode;
+       int phy_mode = efx->phy_mode;
+       int rc_test = 0, rc_reset, rc;
+
+       ef4_selftest_async_cancel(efx);
+
+       /* Online (i.e. non-disruptive) testing
+        * This checks interrupt generation, event delivery and PHY presence. */
+
+       rc = ef4_test_phy_alive(efx, tests);
+       if (rc && !rc_test)
+               rc_test = rc;
+
+       rc = ef4_test_nvram(efx, tests);
+       if (rc && !rc_test)
+               rc_test = rc;
+
+       rc = ef4_test_interrupts(efx, tests);
+       if (rc && !rc_test)
+               rc_test = rc;
+
+       rc = ef4_test_eventq_irq(efx, tests);
+       if (rc && !rc_test)
+               rc_test = rc;
+
+       if (rc_test)
+               return rc_test;
+
+       if (!(flags & ETH_TEST_FL_OFFLINE))
+               return ef4_test_phy(efx, tests, flags);
+
+       /* Offline (i.e. disruptive) testing
+        * This checks MAC and PHY loopback on the specified port. */
+
+       /* Detach the device so the kernel doesn't transmit during the
+        * loopback test and the watchdog timeout doesn't fire.
+        */
+       ef4_device_detach_sync(efx);
+
+       if (efx->type->test_chip) {
+               rc_reset = efx->type->test_chip(efx, tests);
+               if (rc_reset) {
+                       netif_err(efx, hw, efx->net_dev,
+                                 "Unable to recover from chip test\n");
+                       ef4_schedule_reset(efx, RESET_TYPE_DISABLE);
+                       return rc_reset;
+               }
+
+               if ((tests->memory < 0 || tests->registers < 0) && !rc_test)
+                       rc_test = -EIO;
+       }
+
+       /* Ensure that the phy is powered and out of loopback
+        * for the bist and loopback tests */
+       mutex_lock(&efx->mac_lock);
+       efx->phy_mode &= ~PHY_MODE_LOW_POWER;
+       efx->loopback_mode = LOOPBACK_NONE;
+       __ef4_reconfigure_port(efx);
+       mutex_unlock(&efx->mac_lock);
+
+       rc = ef4_test_phy(efx, tests, flags);
+       if (rc && !rc_test)
+               rc_test = rc;
+
+       rc = ef4_test_loopbacks(efx, tests, efx->loopback_modes);
+       if (rc && !rc_test)
+               rc_test = rc;
+
+       /* restore the PHY to the previous state */
+       mutex_lock(&efx->mac_lock);
+       efx->phy_mode = phy_mode;
+       efx->loopback_mode = loopback_mode;
+       __ef4_reconfigure_port(efx);
+       mutex_unlock(&efx->mac_lock);
+
+       netif_device_attach(efx->net_dev);
+
+       return rc_test;
+}
+
+void ef4_selftest_async_start(struct ef4_nic *efx)
+{
+       struct ef4_channel *channel;
+
+       ef4_for_each_channel(channel, efx)
+               ef4_nic_event_test_start(channel);
+       schedule_delayed_work(&efx->selftest_work, IRQ_TIMEOUT);
+}
+
+void ef4_selftest_async_cancel(struct ef4_nic *efx)
+{
+       cancel_delayed_work_sync(&efx->selftest_work);
+}
+
+void ef4_selftest_async_work(struct work_struct *data)
+{
+       struct ef4_nic *efx = container_of(data, struct ef4_nic,
+                                          selftest_work.work);
+       struct ef4_channel *channel;
+       int cpu;
+
+       ef4_for_each_channel(channel, efx) {
+               cpu = ef4_nic_event_test_irq_cpu(channel);
+               if (cpu < 0)
+                       netif_err(efx, ifup, efx->net_dev,
+                                 "channel %d failed to trigger an interrupt\n",
+                                 channel->channel);
+               else
+                       netif_dbg(efx, ifup, efx->net_dev,
+                                 "channel %d triggered interrupt on CPU %d\n",
+                                 channel->channel, cpu);
+       }
+}
diff --git a/drivers/net/ethernet/sfc/falcon/selftest.h b/drivers/net/ethernet/sfc/falcon/selftest.h
new file mode 100644 (file)
index 0000000..be52a49
--- /dev/null
@@ -0,0 +1,55 @@
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2005-2006 Fen Systems Ltd.
+ * Copyright 2006-2012 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#ifndef EF4_SELFTEST_H
+#define EF4_SELFTEST_H
+
+#include "net_driver.h"
+
+/*
+ * Self tests
+ */
+
+struct ef4_loopback_self_tests {
+       int tx_sent[EF4_TXQ_TYPES];
+       int tx_done[EF4_TXQ_TYPES];
+       int rx_good;
+       int rx_bad;
+};
+
+#define EF4_MAX_PHY_TESTS 20
+
+/* Efx self test results
+ * For fields which are not counters, 1 indicates success and -1
+ * indicates failure; 0 indicates test could not be run.
+ */
+struct ef4_self_tests {
+       /* online tests */
+       int phy_alive;
+       int nvram;
+       int interrupt;
+       int eventq_dma[EF4_MAX_CHANNELS];
+       int eventq_int[EF4_MAX_CHANNELS];
+       /* offline tests */
+       int memory;
+       int registers;
+       int phy_ext[EF4_MAX_PHY_TESTS];
+       struct ef4_loopback_self_tests loopback[LOOPBACK_TEST_MAX + 1];
+};
+
+void ef4_loopback_rx_packet(struct ef4_nic *efx, const char *buf_ptr,
+                           int pkt_len);
+int ef4_selftest(struct ef4_nic *efx, struct ef4_self_tests *tests,
+                unsigned flags);
+void ef4_selftest_async_start(struct ef4_nic *efx);
+void ef4_selftest_async_cancel(struct ef4_nic *efx);
+void ef4_selftest_async_work(struct work_struct *data);
+
+#endif /* EF4_SELFTEST_H */
diff --git a/drivers/net/ethernet/sfc/falcon/tenxpress.c b/drivers/net/ethernet/sfc/falcon/tenxpress.c
new file mode 100644 (file)
index 0000000..acc548a
--- /dev/null
@@ -0,0 +1,494 @@
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2007-2011 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#include <linux/delay.h>
+#include <linux/rtnetlink.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include "efx.h"
+#include "mdio_10g.h"
+#include "nic.h"
+#include "phy.h"
+#include "workarounds.h"
+
+/* We expect these MMDs to be in the package. */
+#define TENXPRESS_REQUIRED_DEVS (MDIO_DEVS_PMAPMD      | \
+                                MDIO_DEVS_PCS          | \
+                                MDIO_DEVS_PHYXS        | \
+                                MDIO_DEVS_AN)
+
+#define SFX7101_LOOPBACKS ((1 << LOOPBACK_PHYXS) |     \
+                          (1 << LOOPBACK_PCS) |        \
+                          (1 << LOOPBACK_PMAPMD) |     \
+                          (1 << LOOPBACK_PHYXS_WS))
+
+/* We complain if we fail to see the link partner as 10G capable this many
+ * times in a row (must be > 1 as sampling the autoneg. registers is racy)
+ */
+#define MAX_BAD_LP_TRIES       (5)
+
+/* Extended control register */
+#define PMA_PMD_XCONTROL_REG   49152
+#define PMA_PMD_EXT_GMII_EN_LBN        1
+#define PMA_PMD_EXT_GMII_EN_WIDTH 1
+#define PMA_PMD_EXT_CLK_OUT_LBN        2
+#define PMA_PMD_EXT_CLK_OUT_WIDTH 1
+#define PMA_PMD_LNPGA_POWERDOWN_LBN 8
+#define PMA_PMD_LNPGA_POWERDOWN_WIDTH 1
+#define PMA_PMD_EXT_CLK312_WIDTH 1
+#define PMA_PMD_EXT_LPOWER_LBN  12
+#define PMA_PMD_EXT_LPOWER_WIDTH 1
+#define PMA_PMD_EXT_ROBUST_LBN 14
+#define PMA_PMD_EXT_ROBUST_WIDTH 1
+#define PMA_PMD_EXT_SSR_LBN    15
+#define PMA_PMD_EXT_SSR_WIDTH  1
+
+/* extended status register */
+#define PMA_PMD_XSTATUS_REG    49153
+#define PMA_PMD_XSTAT_MDIX_LBN 14
+#define PMA_PMD_XSTAT_FLP_LBN   (12)
+
+/* LED control register */
+#define PMA_PMD_LED_CTRL_REG   49159
+#define PMA_PMA_LED_ACTIVITY_LBN       (3)
+
+/* LED function override register */
+#define PMA_PMD_LED_OVERR_REG  49161
+/* Bit positions for different LEDs (there are more but not wired on SFE4001)*/
+#define PMA_PMD_LED_LINK_LBN   (0)
+#define PMA_PMD_LED_SPEED_LBN  (2)
+#define PMA_PMD_LED_TX_LBN     (4)
+#define PMA_PMD_LED_RX_LBN     (6)
+/* Override settings */
+#define        PMA_PMD_LED_AUTO        (0)     /* H/W control */
+#define        PMA_PMD_LED_ON          (1)
+#define        PMA_PMD_LED_OFF         (2)
+#define PMA_PMD_LED_FLASH      (3)
+#define PMA_PMD_LED_MASK       3
+/* All LEDs under hardware control */
+/* Green and Amber under hardware control, Red off */
+#define SFX7101_PMA_PMD_LED_DEFAULT (PMA_PMD_LED_OFF << PMA_PMD_LED_RX_LBN)
+
+#define PMA_PMD_SPEED_ENABLE_REG 49192
+#define PMA_PMD_100TX_ADV_LBN    1
+#define PMA_PMD_100TX_ADV_WIDTH  1
+#define PMA_PMD_1000T_ADV_LBN    2
+#define PMA_PMD_1000T_ADV_WIDTH  1
+#define PMA_PMD_10000T_ADV_LBN   3
+#define PMA_PMD_10000T_ADV_WIDTH 1
+#define PMA_PMD_SPEED_LBN        4
+#define PMA_PMD_SPEED_WIDTH      4
+
+/* Misc register defines */
+#define PCS_CLOCK_CTRL_REG     55297
+#define PLL312_RST_N_LBN 2
+
+#define PCS_SOFT_RST2_REG      55302
+#define SERDES_RST_N_LBN 13
+#define XGXS_RST_N_LBN 12
+
+#define        PCS_TEST_SELECT_REG     55303   /* PRM 10.5.8 */
+#define        CLK312_EN_LBN 3
+
+/* PHYXS registers */
+#define PHYXS_XCONTROL_REG     49152
+#define PHYXS_RESET_LBN                15
+#define PHYXS_RESET_WIDTH      1
+
+#define PHYXS_TEST1         (49162)
+#define LOOPBACK_NEAR_LBN   (8)
+#define LOOPBACK_NEAR_WIDTH (1)
+
+/* Boot status register */
+#define PCS_BOOT_STATUS_REG            53248
+#define PCS_BOOT_FATAL_ERROR_LBN       0
+#define PCS_BOOT_PROGRESS_LBN          1
+#define PCS_BOOT_PROGRESS_WIDTH                2
+#define PCS_BOOT_PROGRESS_INIT         0
+#define PCS_BOOT_PROGRESS_WAIT_MDIO    1
+#define PCS_BOOT_PROGRESS_CHECKSUM     2
+#define PCS_BOOT_PROGRESS_JUMP         3
+#define PCS_BOOT_DOWNLOAD_WAIT_LBN     3
+#define PCS_BOOT_CODE_STARTED_LBN      4
+
+/* 100M/1G PHY registers */
+#define GPHY_XCONTROL_REG      49152
+#define GPHY_ISOLATE_LBN       10
+#define GPHY_ISOLATE_WIDTH     1
+#define GPHY_DUPLEX_LBN                8
+#define GPHY_DUPLEX_WIDTH      1
+#define GPHY_LOOPBACK_NEAR_LBN 14
+#define GPHY_LOOPBACK_NEAR_WIDTH 1
+
+#define C22EXT_STATUS_REG       49153
+#define C22EXT_STATUS_LINK_LBN  2
+#define C22EXT_STATUS_LINK_WIDTH 1
+
+#define C22EXT_MSTSLV_CTRL                     49161
+#define C22EXT_MSTSLV_CTRL_ADV_1000_HD_LBN     8
+#define C22EXT_MSTSLV_CTRL_ADV_1000_FD_LBN     9
+
+#define C22EXT_MSTSLV_STATUS                   49162
+#define C22EXT_MSTSLV_STATUS_LP_1000_HD_LBN    10
+#define C22EXT_MSTSLV_STATUS_LP_1000_FD_LBN    11
+
+/* Time to wait between powering down the LNPGA and turning off the power
+ * rails */
+#define LNPGA_PDOWN_WAIT       (HZ / 5)
+
+struct tenxpress_phy_data {
+       enum ef4_loopback_mode loopback_mode;
+       enum ef4_phy_mode phy_mode;
+       int bad_lp_tries;
+};
+
+static int tenxpress_init(struct ef4_nic *efx)
+{
+       /* Enable 312.5 MHz clock */
+       ef4_mdio_write(efx, MDIO_MMD_PCS, PCS_TEST_SELECT_REG,
+                      1 << CLK312_EN_LBN);
+
+       /* Set the LEDs up as: Green = Link, Amber = Link/Act, Red = Off */
+       ef4_mdio_set_flag(efx, MDIO_MMD_PMAPMD, PMA_PMD_LED_CTRL_REG,
+                         1 << PMA_PMA_LED_ACTIVITY_LBN, true);
+       ef4_mdio_write(efx, MDIO_MMD_PMAPMD, PMA_PMD_LED_OVERR_REG,
+                      SFX7101_PMA_PMD_LED_DEFAULT);
+
+       return 0;
+}
+
+static int tenxpress_phy_probe(struct ef4_nic *efx)
+{
+       struct tenxpress_phy_data *phy_data;
+
+       /* Allocate phy private storage */
+       phy_data = kzalloc(sizeof(*phy_data), GFP_KERNEL);
+       if (!phy_data)
+               return -ENOMEM;
+       efx->phy_data = phy_data;
+       phy_data->phy_mode = efx->phy_mode;
+
+       efx->mdio.mmds = TENXPRESS_REQUIRED_DEVS;
+       efx->mdio.mode_support = MDIO_SUPPORTS_C45;
+
+       efx->loopback_modes = SFX7101_LOOPBACKS | FALCON_XMAC_LOOPBACKS;
+
+       efx->link_advertising = (ADVERTISED_TP | ADVERTISED_Autoneg |
+                                ADVERTISED_10000baseT_Full);
+
+       return 0;
+}
+
+static int tenxpress_phy_init(struct ef4_nic *efx)
+{
+       int rc;
+
+       falcon_board(efx)->type->init_phy(efx);
+
+       if (!(efx->phy_mode & PHY_MODE_SPECIAL)) {
+               rc = ef4_mdio_wait_reset_mmds(efx, TENXPRESS_REQUIRED_DEVS);
+               if (rc < 0)
+                       return rc;
+
+               rc = ef4_mdio_check_mmds(efx, TENXPRESS_REQUIRED_DEVS);
+               if (rc < 0)
+                       return rc;
+       }
+
+       rc = tenxpress_init(efx);
+       if (rc < 0)
+               return rc;
+
+       /* Reinitialise flow control settings */
+       ef4_link_set_wanted_fc(efx, efx->wanted_fc);
+       ef4_mdio_an_reconfigure(efx);
+
+       schedule_timeout_uninterruptible(HZ / 5); /* 200ms */
+
+       /* Let XGXS and SerDes out of reset */
+       falcon_reset_xaui(efx);
+
+       return 0;
+}
+
+/* Perform a "special software reset" on the PHY. The caller is
+ * responsible for saving and restoring the PHY hardware registers
+ * properly, and masking/unmasking LASI */
+static int tenxpress_special_reset(struct ef4_nic *efx)
+{
+       int rc, reg;
+
+       /* The XGMAC clock is driven from the SFX7101 312MHz clock, so
+        * a special software reset can glitch the XGMAC sufficiently for stats
+        * requests to fail. */
+       falcon_stop_nic_stats(efx);
+
+       /* Initiate reset */
+       reg = ef4_mdio_read(efx, MDIO_MMD_PMAPMD, PMA_PMD_XCONTROL_REG);
+       reg |= (1 << PMA_PMD_EXT_SSR_LBN);
+       ef4_mdio_write(efx, MDIO_MMD_PMAPMD, PMA_PMD_XCONTROL_REG, reg);
+
+       mdelay(200);
+
+       /* Wait for the blocks to come out of reset */
+       rc = ef4_mdio_wait_reset_mmds(efx, TENXPRESS_REQUIRED_DEVS);
+       if (rc < 0)
+               goto out;
+
+       /* Try and reconfigure the device */
+       rc = tenxpress_init(efx);
+       if (rc < 0)
+               goto out;
+
+       /* Wait for the XGXS state machine to churn */
+       mdelay(10);
+out:
+       falcon_start_nic_stats(efx);
+       return rc;
+}
+
+static void sfx7101_check_bad_lp(struct ef4_nic *efx, bool link_ok)
+{
+       struct tenxpress_phy_data *pd = efx->phy_data;
+       bool bad_lp;
+       int reg;
+
+       if (link_ok) {
+               bad_lp = false;
+       } else {
+               /* Check that AN has started but not completed. */
+               reg = ef4_mdio_read(efx, MDIO_MMD_AN, MDIO_STAT1);
+               if (!(reg & MDIO_AN_STAT1_LPABLE))
+                       return; /* LP status is unknown */
+               bad_lp = !(reg & MDIO_AN_STAT1_COMPLETE);
+               if (bad_lp)
+                       pd->bad_lp_tries++;
+       }
+
+       /* Nothing to do if all is well and was previously so. */
+       if (!pd->bad_lp_tries)
+               return;
+
+       /* Use the RX (red) LED as an error indicator once we've seen AN
+        * failure several times in a row, and also log a message. */
+       if (!bad_lp || pd->bad_lp_tries == MAX_BAD_LP_TRIES) {
+               reg = ef4_mdio_read(efx, MDIO_MMD_PMAPMD,
+                                   PMA_PMD_LED_OVERR_REG);
+               reg &= ~(PMA_PMD_LED_MASK << PMA_PMD_LED_RX_LBN);
+               if (!bad_lp) {
+                       reg |= PMA_PMD_LED_OFF << PMA_PMD_LED_RX_LBN;
+               } else {
+                       reg |= PMA_PMD_LED_FLASH << PMA_PMD_LED_RX_LBN;
+                       netif_err(efx, link, efx->net_dev,
+                                 "appears to be plugged into a port"
+                                 " that is not 10GBASE-T capable. The PHY"
+                                 " supports 10GBASE-T ONLY, so no link can"
+                                 " be established\n");
+               }
+               ef4_mdio_write(efx, MDIO_MMD_PMAPMD,
+                              PMA_PMD_LED_OVERR_REG, reg);
+               pd->bad_lp_tries = bad_lp;
+       }
+}
+
+static bool sfx7101_link_ok(struct ef4_nic *efx)
+{
+       return ef4_mdio_links_ok(efx,
+                                MDIO_DEVS_PMAPMD |
+                                MDIO_DEVS_PCS |
+                                MDIO_DEVS_PHYXS);
+}
+
+static void tenxpress_ext_loopback(struct ef4_nic *efx)
+{
+       ef4_mdio_set_flag(efx, MDIO_MMD_PHYXS, PHYXS_TEST1,
+                         1 << LOOPBACK_NEAR_LBN,
+                         efx->loopback_mode == LOOPBACK_PHYXS);
+}
+
+static void tenxpress_low_power(struct ef4_nic *efx)
+{
+       ef4_mdio_set_mmds_lpower(
+               efx, !!(efx->phy_mode & PHY_MODE_LOW_POWER),
+               TENXPRESS_REQUIRED_DEVS);
+}
+
+static int tenxpress_phy_reconfigure(struct ef4_nic *efx)
+{
+       struct tenxpress_phy_data *phy_data = efx->phy_data;
+       bool phy_mode_change, loop_reset;
+
+       if (efx->phy_mode & (PHY_MODE_OFF | PHY_MODE_SPECIAL)) {
+               phy_data->phy_mode = efx->phy_mode;
+               return 0;
+       }
+
+       phy_mode_change = (efx->phy_mode == PHY_MODE_NORMAL &&
+                          phy_data->phy_mode != PHY_MODE_NORMAL);
+       loop_reset = (LOOPBACK_OUT_OF(phy_data, efx, LOOPBACKS_EXTERNAL(efx)) ||
+                     LOOPBACK_CHANGED(phy_data, efx, 1 << LOOPBACK_GPHY));
+
+       if (loop_reset || phy_mode_change) {
+               tenxpress_special_reset(efx);
+               falcon_reset_xaui(efx);
+       }
+
+       tenxpress_low_power(efx);
+       ef4_mdio_transmit_disable(efx);
+       ef4_mdio_phy_reconfigure(efx);
+       tenxpress_ext_loopback(efx);
+       ef4_mdio_an_reconfigure(efx);
+
+       phy_data->loopback_mode = efx->loopback_mode;
+       phy_data->phy_mode = efx->phy_mode;
+
+       return 0;
+}
+
+static void
+tenxpress_get_settings(struct ef4_nic *efx, struct ethtool_cmd *ecmd);
+
+/* Poll for link state changes */
+static bool tenxpress_phy_poll(struct ef4_nic *efx)
+{
+       struct ef4_link_state old_state = efx->link_state;
+
+       efx->link_state.up = sfx7101_link_ok(efx);
+       efx->link_state.speed = 10000;
+       efx->link_state.fd = true;
+       efx->link_state.fc = ef4_mdio_get_pause(efx);
+
+       sfx7101_check_bad_lp(efx, efx->link_state.up);
+
+       return !ef4_link_state_equal(&efx->link_state, &old_state);
+}
+
+static void sfx7101_phy_fini(struct ef4_nic *efx)
+{
+       int reg;
+
+       /* Power down the LNPGA */
+       reg = (1 << PMA_PMD_LNPGA_POWERDOWN_LBN);
+       ef4_mdio_write(efx, MDIO_MMD_PMAPMD, PMA_PMD_XCONTROL_REG, reg);
+
+       /* Waiting here ensures that the board fini, which can turn
+        * off the power to the PHY, won't get run until the LNPGA
+        * powerdown has been given long enough to complete. */
+       schedule_timeout_uninterruptible(LNPGA_PDOWN_WAIT); /* 200 ms */
+}
+
+static void tenxpress_phy_remove(struct ef4_nic *efx)
+{
+       kfree(efx->phy_data);
+       efx->phy_data = NULL;
+}
+
+
+/* Override the RX, TX and link LEDs */
+void tenxpress_set_id_led(struct ef4_nic *efx, enum ef4_led_mode mode)
+{
+       int reg;
+
+       switch (mode) {
+       case EF4_LED_OFF:
+               reg = (PMA_PMD_LED_OFF << PMA_PMD_LED_TX_LBN) |
+                       (PMA_PMD_LED_OFF << PMA_PMD_LED_RX_LBN) |
+                       (PMA_PMD_LED_OFF << PMA_PMD_LED_LINK_LBN);
+               break;
+       case EF4_LED_ON:
+               reg = (PMA_PMD_LED_ON << PMA_PMD_LED_TX_LBN) |
+                       (PMA_PMD_LED_ON << PMA_PMD_LED_RX_LBN) |
+                       (PMA_PMD_LED_ON << PMA_PMD_LED_LINK_LBN);
+               break;
+       default:
+               reg = SFX7101_PMA_PMD_LED_DEFAULT;
+               break;
+       }
+
+       ef4_mdio_write(efx, MDIO_MMD_PMAPMD, PMA_PMD_LED_OVERR_REG, reg);
+}
+
+static const char *const sfx7101_test_names[] = {
+       "bist"
+};
+
+static const char *sfx7101_test_name(struct ef4_nic *efx, unsigned int index)
+{
+       if (index < ARRAY_SIZE(sfx7101_test_names))
+               return sfx7101_test_names[index];
+       return NULL;
+}
+
+static int
+sfx7101_run_tests(struct ef4_nic *efx, int *results, unsigned flags)
+{
+       int rc;
+
+       if (!(flags & ETH_TEST_FL_OFFLINE))
+               return 0;
+
+       /* BIST is automatically run after a special software reset */
+       rc = tenxpress_special_reset(efx);
+       results[0] = rc ? -1 : 1;
+
+       ef4_mdio_an_reconfigure(efx);
+
+       return rc;
+}
+
+static void
+tenxpress_get_settings(struct ef4_nic *efx, struct ethtool_cmd *ecmd)
+{
+       u32 adv = 0, lpa = 0;
+       int reg;
+
+       reg = ef4_mdio_read(efx, MDIO_MMD_AN, MDIO_AN_10GBT_CTRL);
+       if (reg & MDIO_AN_10GBT_CTRL_ADV10G)
+               adv |= ADVERTISED_10000baseT_Full;
+       reg = ef4_mdio_read(efx, MDIO_MMD_AN, MDIO_AN_10GBT_STAT);
+       if (reg & MDIO_AN_10GBT_STAT_LP10G)
+               lpa |= ADVERTISED_10000baseT_Full;
+
+       mdio45_ethtool_gset_npage(&efx->mdio, ecmd, adv, lpa);
+
+       /* In loopback, the PHY automatically brings up the correct interface,
+        * but doesn't advertise the correct speed. So override it */
+       if (LOOPBACK_EXTERNAL(efx))
+               ethtool_cmd_speed_set(ecmd, SPEED_10000);
+}
+
+static int tenxpress_set_settings(struct ef4_nic *efx, struct ethtool_cmd *ecmd)
+{
+       if (!ecmd->autoneg)
+               return -EINVAL;
+
+       return ef4_mdio_set_settings(efx, ecmd);
+}
+
+static void sfx7101_set_npage_adv(struct ef4_nic *efx, u32 advertising)
+{
+       ef4_mdio_set_flag(efx, MDIO_MMD_AN, MDIO_AN_10GBT_CTRL,
+                         MDIO_AN_10GBT_CTRL_ADV10G,
+                         advertising & ADVERTISED_10000baseT_Full);
+}
+
+const struct ef4_phy_operations falcon_sfx7101_phy_ops = {
+       .probe            = tenxpress_phy_probe,
+       .init             = tenxpress_phy_init,
+       .reconfigure      = tenxpress_phy_reconfigure,
+       .poll             = tenxpress_phy_poll,
+       .fini             = sfx7101_phy_fini,
+       .remove           = tenxpress_phy_remove,
+       .get_settings     = tenxpress_get_settings,
+       .set_settings     = tenxpress_set_settings,
+       .set_npage_adv    = sfx7101_set_npage_adv,
+       .test_alive       = ef4_mdio_test_alive,
+       .test_name        = sfx7101_test_name,
+       .run_tests        = sfx7101_run_tests,
+};
diff --git a/drivers/net/ethernet/sfc/falcon/tx.c b/drivers/net/ethernet/sfc/falcon/tx.c
new file mode 100644 (file)
index 0000000..104fb15
--- /dev/null
@@ -0,0 +1,649 @@
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2005-2006 Fen Systems Ltd.
+ * Copyright 2005-2013 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#include <linux/pci.h>
+#include <linux/tcp.h>
+#include <linux/ip.h>
+#include <linux/in.h>
+#include <linux/ipv6.h>
+#include <linux/slab.h>
+#include <net/ipv6.h>
+#include <linux/if_ether.h>
+#include <linux/highmem.h>
+#include <linux/cache.h>
+#include "net_driver.h"
+#include "efx.h"
+#include "io.h"
+#include "nic.h"
+#include "tx.h"
+#include "workarounds.h"
+
+static inline u8 *ef4_tx_get_copy_buffer(struct ef4_tx_queue *tx_queue,
+                                        struct ef4_tx_buffer *buffer)
+{
+       unsigned int index = ef4_tx_queue_get_insert_index(tx_queue);
+       struct ef4_buffer *page_buf =
+               &tx_queue->cb_page[index >> (PAGE_SHIFT - EF4_TX_CB_ORDER)];
+       unsigned int offset =
+               ((index << EF4_TX_CB_ORDER) + NET_IP_ALIGN) & (PAGE_SIZE - 1);
+
+       if (unlikely(!page_buf->addr) &&
+           ef4_nic_alloc_buffer(tx_queue->efx, page_buf, PAGE_SIZE,
+                                GFP_ATOMIC))
+               return NULL;
+       buffer->dma_addr = page_buf->dma_addr + offset;
+       buffer->unmap_len = 0;
+       return (u8 *)page_buf->addr + offset;
+}
+
+u8 *ef4_tx_get_copy_buffer_limited(struct ef4_tx_queue *tx_queue,
+                                  struct ef4_tx_buffer *buffer, size_t len)
+{
+       if (len > EF4_TX_CB_SIZE)
+               return NULL;
+       return ef4_tx_get_copy_buffer(tx_queue, buffer);
+}
+
+static void ef4_dequeue_buffer(struct ef4_tx_queue *tx_queue,
+                              struct ef4_tx_buffer *buffer,
+                              unsigned int *pkts_compl,
+                              unsigned int *bytes_compl)
+{
+       if (buffer->unmap_len) {
+               struct device *dma_dev = &tx_queue->efx->pci_dev->dev;
+               dma_addr_t unmap_addr = buffer->dma_addr - buffer->dma_offset;
+               if (buffer->flags & EF4_TX_BUF_MAP_SINGLE)
+                       dma_unmap_single(dma_dev, unmap_addr, buffer->unmap_len,
+                                        DMA_TO_DEVICE);
+               else
+                       dma_unmap_page(dma_dev, unmap_addr, buffer->unmap_len,
+                                      DMA_TO_DEVICE);
+               buffer->unmap_len = 0;
+       }
+
+       if (buffer->flags & EF4_TX_BUF_SKB) {
+               (*pkts_compl)++;
+               (*bytes_compl) += buffer->skb->len;
+               dev_consume_skb_any((struct sk_buff *)buffer->skb);
+               netif_vdbg(tx_queue->efx, tx_done, tx_queue->efx->net_dev,
+                          "TX queue %d transmission id %x complete\n",
+                          tx_queue->queue, tx_queue->read_count);
+       }
+
+       buffer->len = 0;
+       buffer->flags = 0;
+}
+
+unsigned int ef4_tx_max_skb_descs(struct ef4_nic *efx)
+{
+       /* This is probably too much since we don't have any TSO support;
+        * it's a left-over from when we had Software TSO.  But it's safer
+        * to leave it as-is than try to determine a new bound.
+        */
+       /* Header and payload descriptor for each output segment, plus
+        * one for every input fragment boundary within a segment
+        */
+       unsigned int max_descs = EF4_TSO_MAX_SEGS * 2 + MAX_SKB_FRAGS;
+
+       /* Possibly one more per segment for the alignment workaround,
+        * or for option descriptors
+        */
+       if (EF4_WORKAROUND_5391(efx))
+               max_descs += EF4_TSO_MAX_SEGS;
+
+       /* Possibly more for PCIe page boundaries within input fragments */
+       if (PAGE_SIZE > EF4_PAGE_SIZE)
+               max_descs += max_t(unsigned int, MAX_SKB_FRAGS,
+                                  DIV_ROUND_UP(GSO_MAX_SIZE, EF4_PAGE_SIZE));
+
+       return max_descs;
+}
+
+static void ef4_tx_maybe_stop_queue(struct ef4_tx_queue *txq1)
+{
+       /* We need to consider both queues that the net core sees as one */
+       struct ef4_tx_queue *txq2 = ef4_tx_queue_partner(txq1);
+       struct ef4_nic *efx = txq1->efx;
+       unsigned int fill_level;
+
+       fill_level = max(txq1->insert_count - txq1->old_read_count,
+                        txq2->insert_count - txq2->old_read_count);
+       if (likely(fill_level < efx->txq_stop_thresh))
+               return;
+
+       /* We used the stale old_read_count above, which gives us a
+        * pessimistic estimate of the fill level (which may even
+        * validly be >= efx->txq_entries).  Now try again using
+        * read_count (more likely to be a cache miss).
+        *
+        * If we read read_count and then conditionally stop the
+        * queue, it is possible for the completion path to race with
+        * us and complete all outstanding descriptors in the middle,
+        * after which there will be no more completions to wake it.
+        * Therefore we stop the queue first, then read read_count
+        * (with a memory barrier to ensure the ordering), then
+        * restart the queue if the fill level turns out to be low
+        * enough.
+        */
+       netif_tx_stop_queue(txq1->core_txq);
+       smp_mb();
+       txq1->old_read_count = ACCESS_ONCE(txq1->read_count);
+       txq2->old_read_count = ACCESS_ONCE(txq2->read_count);
+
+       fill_level = max(txq1->insert_count - txq1->old_read_count,
+                        txq2->insert_count - txq2->old_read_count);
+       EF4_BUG_ON_PARANOID(fill_level >= efx->txq_entries);
+       if (likely(fill_level < efx->txq_stop_thresh)) {
+               smp_mb();
+               if (likely(!efx->loopback_selftest))
+                       netif_tx_start_queue(txq1->core_txq);
+       }
+}
+
+static int ef4_enqueue_skb_copy(struct ef4_tx_queue *tx_queue,
+                               struct sk_buff *skb)
+{
+       unsigned int min_len = tx_queue->tx_min_size;
+       unsigned int copy_len = skb->len;
+       struct ef4_tx_buffer *buffer;
+       u8 *copy_buffer;
+       int rc;
+
+       EF4_BUG_ON_PARANOID(copy_len > EF4_TX_CB_SIZE);
+
+       buffer = ef4_tx_queue_get_insert_buffer(tx_queue);
+
+       copy_buffer = ef4_tx_get_copy_buffer(tx_queue, buffer);
+       if (unlikely(!copy_buffer))
+               return -ENOMEM;
+
+       rc = skb_copy_bits(skb, 0, copy_buffer, copy_len);
+       EF4_WARN_ON_PARANOID(rc);
+       if (unlikely(copy_len < min_len)) {
+               memset(copy_buffer + copy_len, 0, min_len - copy_len);
+               buffer->len = min_len;
+       } else {
+               buffer->len = copy_len;
+       }
+
+       buffer->skb = skb;
+       buffer->flags = EF4_TX_BUF_SKB;
+
+       ++tx_queue->insert_count;
+       return rc;
+}
+
+static struct ef4_tx_buffer *ef4_tx_map_chunk(struct ef4_tx_queue *tx_queue,
+                                             dma_addr_t dma_addr,
+                                             size_t len)
+{
+       const struct ef4_nic_type *nic_type = tx_queue->efx->type;
+       struct ef4_tx_buffer *buffer;
+       unsigned int dma_len;
+
+       /* Map the fragment taking account of NIC-dependent DMA limits. */
+       do {
+               buffer = ef4_tx_queue_get_insert_buffer(tx_queue);
+               dma_len = nic_type->tx_limit_len(tx_queue, dma_addr, len);
+
+               buffer->len = dma_len;
+               buffer->dma_addr = dma_addr;
+               buffer->flags = EF4_TX_BUF_CONT;
+               len -= dma_len;
+               dma_addr += dma_len;
+               ++tx_queue->insert_count;
+       } while (len);
+
+       return buffer;
+}
+
+/* Map all data from an SKB for DMA and create descriptors on the queue.
+ */
+static int ef4_tx_map_data(struct ef4_tx_queue *tx_queue, struct sk_buff *skb)
+{
+       struct ef4_nic *efx = tx_queue->efx;
+       struct device *dma_dev = &efx->pci_dev->dev;
+       unsigned int frag_index, nr_frags;
+       dma_addr_t dma_addr, unmap_addr;
+       unsigned short dma_flags;
+       size_t len, unmap_len;
+
+       nr_frags = skb_shinfo(skb)->nr_frags;
+       frag_index = 0;
+
+       /* Map header data. */
+       len = skb_headlen(skb);
+       dma_addr = dma_map_single(dma_dev, skb->data, len, DMA_TO_DEVICE);
+       dma_flags = EF4_TX_BUF_MAP_SINGLE;
+       unmap_len = len;
+       unmap_addr = dma_addr;
+
+       if (unlikely(dma_mapping_error(dma_dev, dma_addr)))
+               return -EIO;
+
+       /* Add descriptors for each fragment. */
+       do {
+               struct ef4_tx_buffer *buffer;
+               skb_frag_t *fragment;
+
+               buffer = ef4_tx_map_chunk(tx_queue, dma_addr, len);
+
+               /* The final descriptor for a fragment is responsible for
+                * unmapping the whole fragment.
+                */
+               buffer->flags = EF4_TX_BUF_CONT | dma_flags;
+               buffer->unmap_len = unmap_len;
+               buffer->dma_offset = buffer->dma_addr - unmap_addr;
+
+               if (frag_index >= nr_frags) {
+                       /* Store SKB details with the final buffer for
+                        * the completion.
+                        */
+                       buffer->skb = skb;
+                       buffer->flags = EF4_TX_BUF_SKB | dma_flags;
+                       return 0;
+               }
+
+               /* Move on to the next fragment. */
+               fragment = &skb_shinfo(skb)->frags[frag_index++];
+               len = skb_frag_size(fragment);
+               dma_addr = skb_frag_dma_map(dma_dev, fragment,
+                               0, len, DMA_TO_DEVICE);
+               dma_flags = 0;
+               unmap_len = len;
+               unmap_addr = dma_addr;
+
+               if (unlikely(dma_mapping_error(dma_dev, dma_addr)))
+                       return -EIO;
+       } while (1);
+}
+
+/* Remove buffers put into a tx_queue.  None of the buffers must have
+ * an skb attached.
+ */
+static void ef4_enqueue_unwind(struct ef4_tx_queue *tx_queue)
+{
+       struct ef4_tx_buffer *buffer;
+
+       /* Work backwards until we hit the original insert pointer value */
+       while (tx_queue->insert_count != tx_queue->write_count) {
+               --tx_queue->insert_count;
+               buffer = __ef4_tx_queue_get_insert_buffer(tx_queue);
+               ef4_dequeue_buffer(tx_queue, buffer, NULL, NULL);
+       }
+}
+
+/*
+ * Add a socket buffer to a TX queue
+ *
+ * This maps all fragments of a socket buffer for DMA and adds them to
+ * the TX queue.  The queue's insert pointer will be incremented by
+ * the number of fragments in the socket buffer.
+ *
+ * If any DMA mapping fails, any mapped fragments will be unmapped,
+ * the queue's insert pointer will be restored to its original value.
+ *
+ * This function is split out from ef4_hard_start_xmit to allow the
+ * loopback test to direct packets via specific TX queues.
+ *
+ * Returns NETDEV_TX_OK.
+ * You must hold netif_tx_lock() to call this function.
+ */
+netdev_tx_t ef4_enqueue_skb(struct ef4_tx_queue *tx_queue, struct sk_buff *skb)
+{
+       bool data_mapped = false;
+       unsigned int skb_len;
+
+       skb_len = skb->len;
+       EF4_WARN_ON_PARANOID(skb_is_gso(skb));
+
+       if (skb_len < tx_queue->tx_min_size ||
+                       (skb->data_len && skb_len <= EF4_TX_CB_SIZE)) {
+               /* Pad short packets or coalesce short fragmented packets. */
+               if (ef4_enqueue_skb_copy(tx_queue, skb))
+                       goto err;
+               tx_queue->cb_packets++;
+               data_mapped = true;
+       }
+
+       /* Map for DMA and create descriptors if we haven't done so already. */
+       if (!data_mapped && (ef4_tx_map_data(tx_queue, skb)))
+               goto err;
+
+       /* Update BQL */
+       netdev_tx_sent_queue(tx_queue->core_txq, skb_len);
+
+       /* Pass off to hardware */
+       if (!skb->xmit_more || netif_xmit_stopped(tx_queue->core_txq)) {
+               struct ef4_tx_queue *txq2 = ef4_tx_queue_partner(tx_queue);
+
+               /* There could be packets left on the partner queue if those
+                * SKBs had skb->xmit_more set. If we do not push those they
+                * could be left for a long time and cause a netdev watchdog.
+                */
+               if (txq2->xmit_more_available)
+                       ef4_nic_push_buffers(txq2);
+
+               ef4_nic_push_buffers(tx_queue);
+       } else {
+               tx_queue->xmit_more_available = skb->xmit_more;
+       }
+
+       tx_queue->tx_packets++;
+
+       ef4_tx_maybe_stop_queue(tx_queue);
+
+       return NETDEV_TX_OK;
+
+
+err:
+       ef4_enqueue_unwind(tx_queue);
+       dev_kfree_skb_any(skb);
+       return NETDEV_TX_OK;
+}
+
+/* Remove packets from the TX queue
+ *
+ * This removes packets from the TX queue, up to and including the
+ * specified index.
+ */
+static void ef4_dequeue_buffers(struct ef4_tx_queue *tx_queue,
+                               unsigned int index,
+                               unsigned int *pkts_compl,
+                               unsigned int *bytes_compl)
+{
+       struct ef4_nic *efx = tx_queue->efx;
+       unsigned int stop_index, read_ptr;
+
+       stop_index = (index + 1) & tx_queue->ptr_mask;
+       read_ptr = tx_queue->read_count & tx_queue->ptr_mask;
+
+       while (read_ptr != stop_index) {
+               struct ef4_tx_buffer *buffer = &tx_queue->buffer[read_ptr];
+
+               if (!(buffer->flags & EF4_TX_BUF_OPTION) &&
+                   unlikely(buffer->len == 0)) {
+                       netif_err(efx, tx_err, efx->net_dev,
+                                 "TX queue %d spurious TX completion id %x\n",
+                                 tx_queue->queue, read_ptr);
+                       ef4_schedule_reset(efx, RESET_TYPE_TX_SKIP);
+                       return;
+               }
+
+               ef4_dequeue_buffer(tx_queue, buffer, pkts_compl, bytes_compl);
+
+               ++tx_queue->read_count;
+               read_ptr = tx_queue->read_count & tx_queue->ptr_mask;
+       }
+}
+
+/* Initiate a packet transmission.  We use one channel per CPU
+ * (sharing when we have more CPUs than channels).  On Falcon, the TX
+ * completion events will be directed back to the CPU that transmitted
+ * the packet, which should be cache-efficient.
+ *
+ * Context: non-blocking.
+ * Note that returning anything other than NETDEV_TX_OK will cause the
+ * OS to free the skb.
+ */
+netdev_tx_t ef4_hard_start_xmit(struct sk_buff *skb,
+                               struct net_device *net_dev)
+{
+       struct ef4_nic *efx = netdev_priv(net_dev);
+       struct ef4_tx_queue *tx_queue;
+       unsigned index, type;
+
+       EF4_WARN_ON_PARANOID(!netif_device_present(net_dev));
+
+       index = skb_get_queue_mapping(skb);
+       type = skb->ip_summed == CHECKSUM_PARTIAL ? EF4_TXQ_TYPE_OFFLOAD : 0;
+       if (index >= efx->n_tx_channels) {
+               index -= efx->n_tx_channels;
+               type |= EF4_TXQ_TYPE_HIGHPRI;
+       }
+       tx_queue = ef4_get_tx_queue(efx, index, type);
+
+       return ef4_enqueue_skb(tx_queue, skb);
+}
+
+void ef4_init_tx_queue_core_txq(struct ef4_tx_queue *tx_queue)
+{
+       struct ef4_nic *efx = tx_queue->efx;
+
+       /* Must be inverse of queue lookup in ef4_hard_start_xmit() */
+       tx_queue->core_txq =
+               netdev_get_tx_queue(efx->net_dev,
+                                   tx_queue->queue / EF4_TXQ_TYPES +
+                                   ((tx_queue->queue & EF4_TXQ_TYPE_HIGHPRI) ?
+                                    efx->n_tx_channels : 0));
+}
+
+int ef4_setup_tc(struct net_device *net_dev, u32 handle, __be16 proto,
+                struct tc_to_netdev *ntc)
+{
+       struct ef4_nic *efx = netdev_priv(net_dev);
+       struct ef4_channel *channel;
+       struct ef4_tx_queue *tx_queue;
+       unsigned tc, num_tc;
+       int rc;
+
+       if (ntc->type != TC_SETUP_MQPRIO)
+               return -EINVAL;
+
+       num_tc = ntc->tc;
+
+       if (ef4_nic_rev(efx) < EF4_REV_FALCON_B0 || num_tc > EF4_MAX_TX_TC)
+               return -EINVAL;
+
+       if (num_tc == net_dev->num_tc)
+               return 0;
+
+       for (tc = 0; tc < num_tc; tc++) {
+               net_dev->tc_to_txq[tc].offset = tc * efx->n_tx_channels;
+               net_dev->tc_to_txq[tc].count = efx->n_tx_channels;
+       }
+
+       if (num_tc > net_dev->num_tc) {
+               /* Initialise high-priority queues as necessary */
+               ef4_for_each_channel(channel, efx) {
+                       ef4_for_each_possible_channel_tx_queue(tx_queue,
+                                                              channel) {
+                               if (!(tx_queue->queue & EF4_TXQ_TYPE_HIGHPRI))
+                                       continue;
+                               if (!tx_queue->buffer) {
+                                       rc = ef4_probe_tx_queue(tx_queue);
+                                       if (rc)
+                                               return rc;
+                               }
+                               if (!tx_queue->initialised)
+                                       ef4_init_tx_queue(tx_queue);
+                               ef4_init_tx_queue_core_txq(tx_queue);
+                       }
+               }
+       } else {
+               /* Reduce number of classes before number of queues */
+               net_dev->num_tc = num_tc;
+       }
+
+       rc = netif_set_real_num_tx_queues(net_dev,
+                                         max_t(int, num_tc, 1) *
+                                         efx->n_tx_channels);
+       if (rc)
+               return rc;
+
+       /* Do not destroy high-priority queues when they become
+        * unused.  We would have to flush them first, and it is
+        * fairly difficult to flush a subset of TX queues.  Leave
+        * it to ef4_fini_channels().
+        */
+
+       net_dev->num_tc = num_tc;
+       return 0;
+}
+
+void ef4_xmit_done(struct ef4_tx_queue *tx_queue, unsigned int index)
+{
+       unsigned fill_level;
+       struct ef4_nic *efx = tx_queue->efx;
+       struct ef4_tx_queue *txq2;
+       unsigned int pkts_compl = 0, bytes_compl = 0;
+
+       EF4_BUG_ON_PARANOID(index > tx_queue->ptr_mask);
+
+       ef4_dequeue_buffers(tx_queue, index, &pkts_compl, &bytes_compl);
+       tx_queue->pkts_compl += pkts_compl;
+       tx_queue->bytes_compl += bytes_compl;
+
+       if (pkts_compl > 1)
+               ++tx_queue->merge_events;
+
+       /* See if we need to restart the netif queue.  This memory
+        * barrier ensures that we write read_count (inside
+        * ef4_dequeue_buffers()) before reading the queue status.
+        */
+       smp_mb();
+       if (unlikely(netif_tx_queue_stopped(tx_queue->core_txq)) &&
+           likely(efx->port_enabled) &&
+           likely(netif_device_present(efx->net_dev))) {
+               txq2 = ef4_tx_queue_partner(tx_queue);
+               fill_level = max(tx_queue->insert_count - tx_queue->read_count,
+                                txq2->insert_count - txq2->read_count);
+               if (fill_level <= efx->txq_wake_thresh)
+                       netif_tx_wake_queue(tx_queue->core_txq);
+       }
+
+       /* Check whether the hardware queue is now empty */
+       if ((int)(tx_queue->read_count - tx_queue->old_write_count) >= 0) {
+               tx_queue->old_write_count = ACCESS_ONCE(tx_queue->write_count);
+               if (tx_queue->read_count == tx_queue->old_write_count) {
+                       smp_mb();
+                       tx_queue->empty_read_count =
+                               tx_queue->read_count | EF4_EMPTY_COUNT_VALID;
+               }
+       }
+}
+
+static unsigned int ef4_tx_cb_page_count(struct ef4_tx_queue *tx_queue)
+{
+       return DIV_ROUND_UP(tx_queue->ptr_mask + 1, PAGE_SIZE >> EF4_TX_CB_ORDER);
+}
+
+int ef4_probe_tx_queue(struct ef4_tx_queue *tx_queue)
+{
+       struct ef4_nic *efx = tx_queue->efx;
+       unsigned int entries;
+       int rc;
+
+       /* Create the smallest power-of-two aligned ring */
+       entries = max(roundup_pow_of_two(efx->txq_entries), EF4_MIN_DMAQ_SIZE);
+       EF4_BUG_ON_PARANOID(entries > EF4_MAX_DMAQ_SIZE);
+       tx_queue->ptr_mask = entries - 1;
+
+       netif_dbg(efx, probe, efx->net_dev,
+                 "creating TX queue %d size %#x mask %#x\n",
+                 tx_queue->queue, efx->txq_entries, tx_queue->ptr_mask);
+
+       /* Allocate software ring */
+       tx_queue->buffer = kcalloc(entries, sizeof(*tx_queue->buffer),
+                                  GFP_KERNEL);
+       if (!tx_queue->buffer)
+               return -ENOMEM;
+
+       tx_queue->cb_page = kcalloc(ef4_tx_cb_page_count(tx_queue),
+                                   sizeof(tx_queue->cb_page[0]), GFP_KERNEL);
+       if (!tx_queue->cb_page) {
+               rc = -ENOMEM;
+               goto fail1;
+       }
+
+       /* Allocate hardware ring */
+       rc = ef4_nic_probe_tx(tx_queue);
+       if (rc)
+               goto fail2;
+
+       return 0;
+
+fail2:
+       kfree(tx_queue->cb_page);
+       tx_queue->cb_page = NULL;
+fail1:
+       kfree(tx_queue->buffer);
+       tx_queue->buffer = NULL;
+       return rc;
+}
+
+void ef4_init_tx_queue(struct ef4_tx_queue *tx_queue)
+{
+       struct ef4_nic *efx = tx_queue->efx;
+
+       netif_dbg(efx, drv, efx->net_dev,
+                 "initialising TX queue %d\n", tx_queue->queue);
+
+       tx_queue->insert_count = 0;
+       tx_queue->write_count = 0;
+       tx_queue->old_write_count = 0;
+       tx_queue->read_count = 0;
+       tx_queue->old_read_count = 0;
+       tx_queue->empty_read_count = 0 | EF4_EMPTY_COUNT_VALID;
+       tx_queue->xmit_more_available = false;
+
+       /* Some older hardware requires Tx writes larger than 32. */
+       tx_queue->tx_min_size = EF4_WORKAROUND_15592(efx) ? 33 : 0;
+
+       /* Set up TX descriptor ring */
+       ef4_nic_init_tx(tx_queue);
+
+       tx_queue->initialised = true;
+}
+
+void ef4_fini_tx_queue(struct ef4_tx_queue *tx_queue)
+{
+       struct ef4_tx_buffer *buffer;
+
+       netif_dbg(tx_queue->efx, drv, tx_queue->efx->net_dev,
+                 "shutting down TX queue %d\n", tx_queue->queue);
+
+       if (!tx_queue->buffer)
+               return;
+
+       /* Free any buffers left in the ring */
+       while (tx_queue->read_count != tx_queue->write_count) {
+               unsigned int pkts_compl = 0, bytes_compl = 0;
+               buffer = &tx_queue->buffer[tx_queue->read_count & tx_queue->ptr_mask];
+               ef4_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl);
+
+               ++tx_queue->read_count;
+       }
+       tx_queue->xmit_more_available = false;
+       netdev_tx_reset_queue(tx_queue->core_txq);
+}
+
+void ef4_remove_tx_queue(struct ef4_tx_queue *tx_queue)
+{
+       int i;
+
+       if (!tx_queue->buffer)
+               return;
+
+       netif_dbg(tx_queue->efx, drv, tx_queue->efx->net_dev,
+                 "destroying TX queue %d\n", tx_queue->queue);
+       ef4_nic_remove_tx(tx_queue);
+
+       if (tx_queue->cb_page) {
+               for (i = 0; i < ef4_tx_cb_page_count(tx_queue); i++)
+                       ef4_nic_free_buffer(tx_queue->efx,
+                                           &tx_queue->cb_page[i]);
+               kfree(tx_queue->cb_page);
+               tx_queue->cb_page = NULL;
+       }
+
+       kfree(tx_queue->buffer);
+       tx_queue->buffer = NULL;
+}
diff --git a/drivers/net/ethernet/sfc/falcon/tx.h b/drivers/net/ethernet/sfc/falcon/tx.h
new file mode 100644 (file)
index 0000000..a607eb0
--- /dev/null
@@ -0,0 +1,27 @@
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2005-2006 Fen Systems Ltd.
+ * Copyright 2006-2015 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#ifndef EF4_TX_H
+#define EF4_TX_H
+
+#include <linux/types.h>
+
+/* Driver internal tx-path related declarations. */
+
+unsigned int ef4_tx_limit_len(struct ef4_tx_queue *tx_queue,
+                             dma_addr_t dma_addr, unsigned int len);
+
+u8 *ef4_tx_get_copy_buffer_limited(struct ef4_tx_queue *tx_queue,
+                                  struct ef4_tx_buffer *buffer, size_t len);
+
+int ef4_enqueue_skb_tso(struct ef4_tx_queue *tx_queue, struct sk_buff *skb,
+                       bool *data_mapped);
+
+#endif /* EF4_TX_H */
diff --git a/drivers/net/ethernet/sfc/falcon/txc43128_phy.c b/drivers/net/ethernet/sfc/falcon/txc43128_phy.c
new file mode 100644 (file)
index 0000000..18421f5
--- /dev/null
@@ -0,0 +1,560 @@
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2006-2011 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+/*
+ * Driver for Transwitch/Mysticom CX4 retimer
+ * see www.transwitch.com, part is TXC-43128
+ */
+
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include "efx.h"
+#include "mdio_10g.h"
+#include "phy.h"
+#include "nic.h"
+
+/* We expect these MMDs to be in the package */
+#define TXC_REQUIRED_DEVS (MDIO_DEVS_PCS |     \
+                          MDIO_DEVS_PMAPMD |   \
+                          MDIO_DEVS_PHYXS)
+
+#define TXC_LOOPBACKS ((1 << LOOPBACK_PCS) |   \
+                      (1 << LOOPBACK_PMAPMD) | \
+                      (1 << LOOPBACK_PHYXS_WS))
+
+/**************************************************************************
+ *
+ * Compile-time config
+ *
+ **************************************************************************
+ */
+#define TXCNAME "TXC43128"
+/* Total length of time we'll wait for the PHY to come out of reset (ms) */
+#define TXC_MAX_RESET_TIME     500
+/* Interval between checks (ms) */
+#define TXC_RESET_WAIT         10
+/* How long to run BIST (us) */
+#define TXC_BIST_DURATION      50
+
+/**************************************************************************
+ *
+ * Register definitions
+ *
+ **************************************************************************
+ */
+
+/* Command register */
+#define TXC_GLRGS_GLCMD                0xc004
+/* Useful bits in command register */
+/* Lane power-down */
+#define TXC_GLCMD_L01PD_LBN    5
+#define TXC_GLCMD_L23PD_LBN    6
+/* Limited SW reset: preserves configuration but
+ * initiates a logic reset. Self-clearing */
+#define TXC_GLCMD_LMTSWRST_LBN 14
+
+/* Signal Quality Control */
+#define TXC_GLRGS_GSGQLCTL     0xc01a
+/* Enable bit */
+#define TXC_GSGQLCT_SGQLEN_LBN 15
+/* Lane selection */
+#define TXC_GSGQLCT_LNSL_LBN   13
+#define TXC_GSGQLCT_LNSL_WIDTH 2
+
+/* Analog TX control */
+#define TXC_ALRGS_ATXCTL       0xc040
+/* Lane power-down */
+#define TXC_ATXCTL_TXPD3_LBN   15
+#define TXC_ATXCTL_TXPD2_LBN   14
+#define TXC_ATXCTL_TXPD1_LBN   13
+#define TXC_ATXCTL_TXPD0_LBN   12
+
+/* Amplitude on lanes 0, 1 */
+#define TXC_ALRGS_ATXAMP0      0xc041
+/* Amplitude on lanes 2, 3 */
+#define TXC_ALRGS_ATXAMP1      0xc042
+/* Bit position of value for lane 0 (or 2) */
+#define TXC_ATXAMP_LANE02_LBN  3
+/* Bit position of value for lane 1 (or 3) */
+#define TXC_ATXAMP_LANE13_LBN  11
+
+#define TXC_ATXAMP_1280_mV     0
+#define TXC_ATXAMP_1200_mV     8
+#define TXC_ATXAMP_1120_mV     12
+#define TXC_ATXAMP_1060_mV     14
+#define TXC_ATXAMP_0820_mV     25
+#define TXC_ATXAMP_0720_mV     26
+#define TXC_ATXAMP_0580_mV     27
+#define TXC_ATXAMP_0440_mV     28
+
+#define TXC_ATXAMP_0820_BOTH                                   \
+       ((TXC_ATXAMP_0820_mV << TXC_ATXAMP_LANE02_LBN)          \
+        | (TXC_ATXAMP_0820_mV << TXC_ATXAMP_LANE13_LBN))
+
+#define TXC_ATXAMP_DEFAULT     0x6060 /* From databook */
+
+/* Preemphasis on lanes 0, 1 */
+#define TXC_ALRGS_ATXPRE0      0xc043
+/* Preemphasis on lanes 2, 3 */
+#define TXC_ALRGS_ATXPRE1      0xc044
+
+#define TXC_ATXPRE_NONE 0
+#define TXC_ATXPRE_DEFAULT     0x1010 /* From databook */
+
+#define TXC_ALRGS_ARXCTL       0xc045
+/* Lane power-down */
+#define TXC_ARXCTL_RXPD3_LBN   15
+#define TXC_ARXCTL_RXPD2_LBN   14
+#define TXC_ARXCTL_RXPD1_LBN   13
+#define TXC_ARXCTL_RXPD0_LBN   12
+
+/* Main control */
+#define TXC_MRGS_CTL           0xc340
+/* Bits in main control */
+#define TXC_MCTL_RESET_LBN     15      /* Self clear */
+#define TXC_MCTL_TXLED_LBN     14      /* 1 to show align status */
+#define TXC_MCTL_RXLED_LBN     13      /* 1 to show align status */
+
+/* GPIO output */
+#define TXC_GPIO_OUTPUT                0xc346
+#define TXC_GPIO_DIR           0xc348
+
+/* Vendor-specific BIST registers */
+#define TXC_BIST_CTL           0xc280
+#define TXC_BIST_TXFRMCNT      0xc281
+#define TXC_BIST_RX0FRMCNT     0xc282
+#define TXC_BIST_RX1FRMCNT     0xc283
+#define TXC_BIST_RX2FRMCNT     0xc284
+#define TXC_BIST_RX3FRMCNT     0xc285
+#define TXC_BIST_RX0ERRCNT     0xc286
+#define TXC_BIST_RX1ERRCNT     0xc287
+#define TXC_BIST_RX2ERRCNT     0xc288
+#define TXC_BIST_RX3ERRCNT     0xc289
+
+/* BIST type (controls bit patter in test) */
+#define TXC_BIST_CTRL_TYPE_LBN 10
+#define TXC_BIST_CTRL_TYPE_TSD 0       /* TranSwitch Deterministic */
+#define TXC_BIST_CTRL_TYPE_CRP 1       /* CRPAT standard */
+#define TXC_BIST_CTRL_TYPE_CJP 2       /* CJPAT standard */
+#define TXC_BIST_CTRL_TYPE_TSR 3       /* TranSwitch pseudo-random */
+/* Set this to 1 for 10 bit and 0 for 8 bit */
+#define TXC_BIST_CTRL_B10EN_LBN        12
+/* Enable BIST (write 0 to disable) */
+#define TXC_BIST_CTRL_ENAB_LBN 13
+/* Stop BIST (self-clears when stop complete) */
+#define TXC_BIST_CTRL_STOP_LBN 14
+/* Start BIST (cleared by writing 1 to STOP) */
+#define TXC_BIST_CTRL_STRT_LBN 15
+
+/* Mt. Diablo test configuration */
+#define TXC_MTDIABLO_CTRL      0xc34f
+#define TXC_MTDIABLO_CTRL_PMA_LOOP_LBN 10
+
+struct txc43128_data {
+       unsigned long bug10934_timer;
+       enum ef4_phy_mode phy_mode;
+       enum ef4_loopback_mode loopback_mode;
+};
+
+/* The PHY sometimes needs a reset to bring the link back up.  So long as
+ * it reports link down, we reset it every 5 seconds.
+ */
+#define BUG10934_RESET_INTERVAL (5 * HZ)
+
+/* Perform a reset that doesn't clear configuration changes */
+static void txc_reset_logic(struct ef4_nic *efx);
+
+/* Set the output value of a gpio */
+void falcon_txc_set_gpio_val(struct ef4_nic *efx, int pin, int on)
+{
+       ef4_mdio_set_flag(efx, MDIO_MMD_PHYXS, TXC_GPIO_OUTPUT, 1 << pin, on);
+}
+
+/* Set up the GPIO direction register */
+void falcon_txc_set_gpio_dir(struct ef4_nic *efx, int pin, int dir)
+{
+       ef4_mdio_set_flag(efx, MDIO_MMD_PHYXS, TXC_GPIO_DIR, 1 << pin, dir);
+}
+
+/* Reset the PMA/PMD MMD. The documentation is explicit that this does a
+ * global reset (it's less clear what reset of other MMDs does).*/
+static int txc_reset_phy(struct ef4_nic *efx)
+{
+       int rc = ef4_mdio_reset_mmd(efx, MDIO_MMD_PMAPMD,
+                                   TXC_MAX_RESET_TIME / TXC_RESET_WAIT,
+                                   TXC_RESET_WAIT);
+       if (rc < 0)
+               goto fail;
+
+       /* Check that all the MMDs we expect are present and responding. */
+       rc = ef4_mdio_check_mmds(efx, TXC_REQUIRED_DEVS);
+       if (rc < 0)
+               goto fail;
+
+       return 0;
+
+fail:
+       netif_err(efx, hw, efx->net_dev, TXCNAME ": reset timed out!\n");
+       return rc;
+}
+
+/* Run a single BIST on one MMD */
+static int txc_bist_one(struct ef4_nic *efx, int mmd, int test)
+{
+       int ctrl, bctl;
+       int lane;
+       int rc = 0;
+
+       /* Set PMA to test into loopback using Mt Diablo reg as per app note */
+       ctrl = ef4_mdio_read(efx, MDIO_MMD_PCS, TXC_MTDIABLO_CTRL);
+       ctrl |= (1 << TXC_MTDIABLO_CTRL_PMA_LOOP_LBN);
+       ef4_mdio_write(efx, MDIO_MMD_PCS, TXC_MTDIABLO_CTRL, ctrl);
+
+       /* The BIST app. note lists these  as 3 distinct steps. */
+       /* Set the BIST type */
+       bctl = (test << TXC_BIST_CTRL_TYPE_LBN);
+       ef4_mdio_write(efx, mmd, TXC_BIST_CTL, bctl);
+
+       /* Set the BSTEN bit in the BIST Control register to enable */
+       bctl |= (1 << TXC_BIST_CTRL_ENAB_LBN);
+       ef4_mdio_write(efx, mmd, TXC_BIST_CTL, bctl);
+
+       /* Set the BSTRT bit in the BIST Control register */
+       ef4_mdio_write(efx, mmd, TXC_BIST_CTL,
+                      bctl | (1 << TXC_BIST_CTRL_STRT_LBN));
+
+       /* Wait. */
+       udelay(TXC_BIST_DURATION);
+
+       /* Set the BSTOP bit in the BIST Control register */
+       bctl |= (1 << TXC_BIST_CTRL_STOP_LBN);
+       ef4_mdio_write(efx, mmd, TXC_BIST_CTL, bctl);
+
+       /* The STOP bit should go off when things have stopped */
+       while (bctl & (1 << TXC_BIST_CTRL_STOP_LBN))
+               bctl = ef4_mdio_read(efx, mmd, TXC_BIST_CTL);
+
+       /* Check all the error counts are 0 and all the frame counts are
+          non-zero */
+       for (lane = 0; lane < 4; lane++) {
+               int count = ef4_mdio_read(efx, mmd, TXC_BIST_RX0ERRCNT + lane);
+               if (count != 0) {
+                       netif_err(efx, hw, efx->net_dev, TXCNAME": BIST error. "
+                                 "Lane %d had %d errs\n", lane, count);
+                       rc = -EIO;
+               }
+               count = ef4_mdio_read(efx, mmd, TXC_BIST_RX0FRMCNT + lane);
+               if (count == 0) {
+                       netif_err(efx, hw, efx->net_dev, TXCNAME": BIST error. "
+                                 "Lane %d got 0 frames\n", lane);
+                       rc = -EIO;
+               }
+       }
+
+       if (rc == 0)
+               netif_info(efx, hw, efx->net_dev, TXCNAME": BIST pass\n");
+
+       /* Disable BIST */
+       ef4_mdio_write(efx, mmd, TXC_BIST_CTL, 0);
+
+       /* Turn off loopback */
+       ctrl &= ~(1 << TXC_MTDIABLO_CTRL_PMA_LOOP_LBN);
+       ef4_mdio_write(efx, MDIO_MMD_PCS, TXC_MTDIABLO_CTRL, ctrl);
+
+       return rc;
+}
+
+static int txc_bist(struct ef4_nic *efx)
+{
+       return txc_bist_one(efx, MDIO_MMD_PCS, TXC_BIST_CTRL_TYPE_TSD);
+}
+
+/* Push the non-configurable defaults into the PHY. This must be
+ * done after every full reset */
+static void txc_apply_defaults(struct ef4_nic *efx)
+{
+       int mctrl;
+
+       /* Turn amplitude down and preemphasis off on the host side
+        * (PHY<->MAC) as this is believed less likely to upset Falcon
+        * and no adverse effects have been noted. It probably also
+        * saves a picowatt or two */
+
+       /* Turn off preemphasis */
+       ef4_mdio_write(efx, MDIO_MMD_PHYXS, TXC_ALRGS_ATXPRE0, TXC_ATXPRE_NONE);
+       ef4_mdio_write(efx, MDIO_MMD_PHYXS, TXC_ALRGS_ATXPRE1, TXC_ATXPRE_NONE);
+
+       /* Turn down the amplitude */
+       ef4_mdio_write(efx, MDIO_MMD_PHYXS,
+                      TXC_ALRGS_ATXAMP0, TXC_ATXAMP_0820_BOTH);
+       ef4_mdio_write(efx, MDIO_MMD_PHYXS,
+                      TXC_ALRGS_ATXAMP1, TXC_ATXAMP_0820_BOTH);
+
+       /* Set the line side amplitude and preemphasis to the databook
+        * defaults as an erratum causes them to be 0 on at least some
+        * PHY rev.s */
+       ef4_mdio_write(efx, MDIO_MMD_PMAPMD,
+                      TXC_ALRGS_ATXPRE0, TXC_ATXPRE_DEFAULT);
+       ef4_mdio_write(efx, MDIO_MMD_PMAPMD,
+                      TXC_ALRGS_ATXPRE1, TXC_ATXPRE_DEFAULT);
+       ef4_mdio_write(efx, MDIO_MMD_PMAPMD,
+                      TXC_ALRGS_ATXAMP0, TXC_ATXAMP_DEFAULT);
+       ef4_mdio_write(efx, MDIO_MMD_PMAPMD,
+                      TXC_ALRGS_ATXAMP1, TXC_ATXAMP_DEFAULT);
+
+       /* Set up the LEDs  */
+       mctrl = ef4_mdio_read(efx, MDIO_MMD_PHYXS, TXC_MRGS_CTL);
+
+       /* Set the Green and Red LEDs to their default modes */
+       mctrl &= ~((1 << TXC_MCTL_TXLED_LBN) | (1 << TXC_MCTL_RXLED_LBN));
+       ef4_mdio_write(efx, MDIO_MMD_PHYXS, TXC_MRGS_CTL, mctrl);
+
+       /* Databook recommends doing this after configuration changes */
+       txc_reset_logic(efx);
+
+       falcon_board(efx)->type->init_phy(efx);
+}
+
+static int txc43128_phy_probe(struct ef4_nic *efx)
+{
+       struct txc43128_data *phy_data;
+
+       /* Allocate phy private storage */
+       phy_data = kzalloc(sizeof(*phy_data), GFP_KERNEL);
+       if (!phy_data)
+               return -ENOMEM;
+       efx->phy_data = phy_data;
+       phy_data->phy_mode = efx->phy_mode;
+
+       efx->mdio.mmds = TXC_REQUIRED_DEVS;
+       efx->mdio.mode_support = MDIO_SUPPORTS_C45 | MDIO_EMULATE_C22;
+
+       efx->loopback_modes = TXC_LOOPBACKS | FALCON_XMAC_LOOPBACKS;
+
+       return 0;
+}
+
+/* Initialisation entry point for this PHY driver */
+static int txc43128_phy_init(struct ef4_nic *efx)
+{
+       int rc;
+
+       rc = txc_reset_phy(efx);
+       if (rc < 0)
+               return rc;
+
+       rc = txc_bist(efx);
+       if (rc < 0)
+               return rc;
+
+       txc_apply_defaults(efx);
+
+       return 0;
+}
+
+/* Set the lane power down state in the global registers */
+static void txc_glrgs_lane_power(struct ef4_nic *efx, int mmd)
+{
+       int pd = (1 << TXC_GLCMD_L01PD_LBN) | (1 << TXC_GLCMD_L23PD_LBN);
+       int ctl = ef4_mdio_read(efx, mmd, TXC_GLRGS_GLCMD);
+
+       if (!(efx->phy_mode & PHY_MODE_LOW_POWER))
+               ctl &= ~pd;
+       else
+               ctl |= pd;
+
+       ef4_mdio_write(efx, mmd, TXC_GLRGS_GLCMD, ctl);
+}
+
+/* Set the lane power down state in the analog control registers */
+static void txc_analog_lane_power(struct ef4_nic *efx, int mmd)
+{
+       int txpd = (1 << TXC_ATXCTL_TXPD3_LBN) | (1 << TXC_ATXCTL_TXPD2_LBN)
+               | (1 << TXC_ATXCTL_TXPD1_LBN) | (1 << TXC_ATXCTL_TXPD0_LBN);
+       int rxpd = (1 << TXC_ARXCTL_RXPD3_LBN) | (1 << TXC_ARXCTL_RXPD2_LBN)
+               | (1 << TXC_ARXCTL_RXPD1_LBN) | (1 << TXC_ARXCTL_RXPD0_LBN);
+       int txctl = ef4_mdio_read(efx, mmd, TXC_ALRGS_ATXCTL);
+       int rxctl = ef4_mdio_read(efx, mmd, TXC_ALRGS_ARXCTL);
+
+       if (!(efx->phy_mode & PHY_MODE_LOW_POWER)) {
+               txctl &= ~txpd;
+               rxctl &= ~rxpd;
+       } else {
+               txctl |= txpd;
+               rxctl |= rxpd;
+       }
+
+       ef4_mdio_write(efx, mmd, TXC_ALRGS_ATXCTL, txctl);
+       ef4_mdio_write(efx, mmd, TXC_ALRGS_ARXCTL, rxctl);
+}
+
+static void txc_set_power(struct ef4_nic *efx)
+{
+       /* According to the data book, all the MMDs can do low power */
+       ef4_mdio_set_mmds_lpower(efx,
+                                !!(efx->phy_mode & PHY_MODE_LOW_POWER),
+                                TXC_REQUIRED_DEVS);
+
+       /* Global register bank is in PCS, PHY XS. These control the host
+        * side and line side settings respectively. */
+       txc_glrgs_lane_power(efx, MDIO_MMD_PCS);
+       txc_glrgs_lane_power(efx, MDIO_MMD_PHYXS);
+
+       /* Analog register bank in PMA/PMD, PHY XS */
+       txc_analog_lane_power(efx, MDIO_MMD_PMAPMD);
+       txc_analog_lane_power(efx, MDIO_MMD_PHYXS);
+}
+
+static void txc_reset_logic_mmd(struct ef4_nic *efx, int mmd)
+{
+       int val = ef4_mdio_read(efx, mmd, TXC_GLRGS_GLCMD);
+       int tries = 50;
+
+       val |= (1 << TXC_GLCMD_LMTSWRST_LBN);
+       ef4_mdio_write(efx, mmd, TXC_GLRGS_GLCMD, val);
+       while (--tries) {
+               val = ef4_mdio_read(efx, mmd, TXC_GLRGS_GLCMD);
+               if (!(val & (1 << TXC_GLCMD_LMTSWRST_LBN)))
+                       break;
+               udelay(1);
+       }
+       if (!tries)
+               netif_info(efx, hw, efx->net_dev,
+                          TXCNAME " Logic reset timed out!\n");
+}
+
+/* Perform a logic reset. This preserves the configuration registers
+ * and is needed for some configuration changes to take effect */
+static void txc_reset_logic(struct ef4_nic *efx)
+{
+       /* The data sheet claims we can do the logic reset on either the
+        * PCS or the PHYXS and the result is a reset of both host- and
+        * line-side logic. */
+       txc_reset_logic_mmd(efx, MDIO_MMD_PCS);
+}
+
+static bool txc43128_phy_read_link(struct ef4_nic *efx)
+{
+       return ef4_mdio_links_ok(efx, TXC_REQUIRED_DEVS);
+}
+
+static int txc43128_phy_reconfigure(struct ef4_nic *efx)
+{
+       struct txc43128_data *phy_data = efx->phy_data;
+       enum ef4_phy_mode mode_change = efx->phy_mode ^ phy_data->phy_mode;
+       bool loop_change = LOOPBACK_CHANGED(phy_data, efx, TXC_LOOPBACKS);
+
+       if (efx->phy_mode & mode_change & PHY_MODE_TX_DISABLED) {
+               txc_reset_phy(efx);
+               txc_apply_defaults(efx);
+               falcon_reset_xaui(efx);
+               mode_change &= ~PHY_MODE_TX_DISABLED;
+       }
+
+       ef4_mdio_transmit_disable(efx);
+       ef4_mdio_phy_reconfigure(efx);
+       if (mode_change & PHY_MODE_LOW_POWER)
+               txc_set_power(efx);
+
+       /* The data sheet claims this is required after every reconfiguration
+        * (note at end of 7.1), but we mustn't do it when nothing changes as
+        * it glitches the link, and reconfigure gets called on link change,
+        * so we get an IRQ storm on link up. */
+       if (loop_change || mode_change)
+               txc_reset_logic(efx);
+
+       phy_data->phy_mode = efx->phy_mode;
+       phy_data->loopback_mode = efx->loopback_mode;
+
+       return 0;
+}
+
+static void txc43128_phy_fini(struct ef4_nic *efx)
+{
+       /* Disable link events */
+       ef4_mdio_write(efx, MDIO_MMD_PMAPMD, MDIO_PMA_LASI_CTRL, 0);
+}
+
+static void txc43128_phy_remove(struct ef4_nic *efx)
+{
+       kfree(efx->phy_data);
+       efx->phy_data = NULL;
+}
+
+/* Periodic callback: this exists mainly to poll link status as we
+ * don't use LASI interrupts */
+static bool txc43128_phy_poll(struct ef4_nic *efx)
+{
+       struct txc43128_data *data = efx->phy_data;
+       bool was_up = efx->link_state.up;
+
+       efx->link_state.up = txc43128_phy_read_link(efx);
+       efx->link_state.speed = 10000;
+       efx->link_state.fd = true;
+       efx->link_state.fc = efx->wanted_fc;
+
+       if (efx->link_state.up || (efx->loopback_mode != LOOPBACK_NONE)) {
+               data->bug10934_timer = jiffies;
+       } else {
+               if (time_after_eq(jiffies, (data->bug10934_timer +
+                                           BUG10934_RESET_INTERVAL))) {
+                       data->bug10934_timer = jiffies;
+                       txc_reset_logic(efx);
+               }
+       }
+
+       return efx->link_state.up != was_up;
+}
+
+static const char *const txc43128_test_names[] = {
+       "bist"
+};
+
+static const char *txc43128_test_name(struct ef4_nic *efx, unsigned int index)
+{
+       if (index < ARRAY_SIZE(txc43128_test_names))
+               return txc43128_test_names[index];
+       return NULL;
+}
+
+static int txc43128_run_tests(struct ef4_nic *efx, int *results, unsigned flags)
+{
+       int rc;
+
+       if (!(flags & ETH_TEST_FL_OFFLINE))
+               return 0;
+
+       rc = txc_reset_phy(efx);
+       if (rc < 0)
+               return rc;
+
+       rc = txc_bist(efx);
+       txc_apply_defaults(efx);
+       results[0] = rc ? -1 : 1;
+       return rc;
+}
+
+static void txc43128_get_settings(struct ef4_nic *efx, struct ethtool_cmd *ecmd)
+{
+       mdio45_ethtool_gset(&efx->mdio, ecmd);
+}
+
+const struct ef4_phy_operations falcon_txc_phy_ops = {
+       .probe          = txc43128_phy_probe,
+       .init           = txc43128_phy_init,
+       .reconfigure    = txc43128_phy_reconfigure,
+       .poll           = txc43128_phy_poll,
+       .fini           = txc43128_phy_fini,
+       .remove         = txc43128_phy_remove,
+       .get_settings   = txc43128_get_settings,
+       .set_settings   = ef4_mdio_set_settings,
+       .test_alive     = ef4_mdio_test_alive,
+       .run_tests      = txc43128_run_tests,
+       .test_name      = txc43128_test_name,
+};
diff --git a/drivers/net/ethernet/sfc/falcon/workarounds.h b/drivers/net/ethernet/sfc/falcon/workarounds.h
new file mode 100644 (file)
index 0000000..6af800b
--- /dev/null
@@ -0,0 +1,44 @@
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2006-2013 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#ifndef EF4_WORKAROUNDS_H
+#define EF4_WORKAROUNDS_H
+
+/*
+ * Hardware workarounds.
+ * Bug numbers are from Solarflare's Bugzilla.
+ */
+
+#define EF4_WORKAROUND_FALCON_A(efx) (ef4_nic_rev(efx) <= EF4_REV_FALCON_A1)
+#define EF4_WORKAROUND_FALCON_AB(efx) (ef4_nic_rev(efx) <= EF4_REV_FALCON_B0)
+#define EF4_WORKAROUND_10G(efx) 1
+
+/* Bit-bashed I2C reads cause performance drop */
+#define EF4_WORKAROUND_7884 EF4_WORKAROUND_10G
+/* Truncated IPv4 packets can confuse the TX packet parser */
+#define EF4_WORKAROUND_15592 EF4_WORKAROUND_FALCON_AB
+
+/* Spurious parity errors in TSORT buffers */
+#define EF4_WORKAROUND_5129 EF4_WORKAROUND_FALCON_A
+/* Unaligned read request >512 bytes after aligning may break TSORT */
+#define EF4_WORKAROUND_5391 EF4_WORKAROUND_FALCON_A
+/* iSCSI parsing errors */
+#define EF4_WORKAROUND_5583 EF4_WORKAROUND_FALCON_A
+/* RX events go missing */
+#define EF4_WORKAROUND_5676 EF4_WORKAROUND_FALCON_A
+/* RX_RESET on A1 */
+#define EF4_WORKAROUND_6555 EF4_WORKAROUND_FALCON_A
+/* Increase filter depth to avoid RX_RESET */
+#define EF4_WORKAROUND_7244 EF4_WORKAROUND_FALCON_A
+/* Flushes may never complete */
+#define EF4_WORKAROUND_7803 EF4_WORKAROUND_FALCON_AB
+/* Leak overlength packets rather than free */
+#define EF4_WORKAROUND_8071 EF4_WORKAROUND_FALCON_A
+
+#endif /* EF4_WORKAROUNDS_H */
diff --git a/drivers/net/ethernet/sfc/falcon_boards.c b/drivers/net/ethernet/sfc/falcon_boards.c
deleted file mode 100644 (file)
index f6883b2..0000000
+++ /dev/null
@@ -1,764 +0,0 @@
-/****************************************************************************
- * Driver for Solarflare network controllers and boards
- * Copyright 2007-2012 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
- */
-
-#include <linux/rtnetlink.h>
-
-#include "net_driver.h"
-#include "phy.h"
-#include "efx.h"
-#include "nic.h"
-#include "workarounds.h"
-
-/* Macros for unpacking the board revision */
-/* The revision info is in host byte order. */
-#define FALCON_BOARD_TYPE(_rev) (_rev >> 8)
-#define FALCON_BOARD_MAJOR(_rev) ((_rev >> 4) & 0xf)
-#define FALCON_BOARD_MINOR(_rev) (_rev & 0xf)
-
-/* Board types */
-#define FALCON_BOARD_SFE4001 0x01
-#define FALCON_BOARD_SFE4002 0x02
-#define FALCON_BOARD_SFE4003 0x03
-#define FALCON_BOARD_SFN4112F 0x52
-
-/* Board temperature is about 15°C above ambient when air flow is
- * limited.  The maximum acceptable ambient temperature varies
- * depending on the PHY specifications but the critical temperature
- * above which we should shut down to avoid damage is 80°C. */
-#define FALCON_BOARD_TEMP_BIAS 15
-#define FALCON_BOARD_TEMP_CRIT (80 + FALCON_BOARD_TEMP_BIAS)
-
-/* SFC4000 datasheet says: 'The maximum permitted junction temperature
- * is 125°C; the thermal design of the environment for the SFC4000
- * should aim to keep this well below 100°C.' */
-#define FALCON_JUNC_TEMP_MIN   0
-#define FALCON_JUNC_TEMP_MAX   90
-#define FALCON_JUNC_TEMP_CRIT  125
-
-/*****************************************************************************
- * Support for LM87 sensor chip used on several boards
- */
-#define LM87_REG_TEMP_HW_INT_LOCK      0x13
-#define LM87_REG_TEMP_HW_EXT_LOCK      0x14
-#define LM87_REG_TEMP_HW_INT           0x17
-#define LM87_REG_TEMP_HW_EXT           0x18
-#define LM87_REG_TEMP_EXT1             0x26
-#define LM87_REG_TEMP_INT              0x27
-#define LM87_REG_ALARMS1               0x41
-#define LM87_REG_ALARMS2               0x42
-#define LM87_IN_LIMITS(nr, _min, _max)                 \
-       0x2B + (nr) * 2, _max, 0x2C + (nr) * 2, _min
-#define LM87_AIN_LIMITS(nr, _min, _max)                        \
-       0x3B + (nr), _max, 0x1A + (nr), _min
-#define LM87_TEMP_INT_LIMITS(_min, _max)               \
-       0x39, _max, 0x3A, _min
-#define LM87_TEMP_EXT1_LIMITS(_min, _max)              \
-       0x37, _max, 0x38, _min
-
-#define LM87_ALARM_TEMP_INT            0x10
-#define LM87_ALARM_TEMP_EXT1           0x20
-
-#if IS_ENABLED(CONFIG_SENSORS_LM87)
-
-static int efx_poke_lm87(struct i2c_client *client, const u8 *reg_values)
-{
-       while (*reg_values) {
-               u8 reg = *reg_values++;
-               u8 value = *reg_values++;
-               int rc = i2c_smbus_write_byte_data(client, reg, value);
-               if (rc)
-                       return rc;
-       }
-       return 0;
-}
-
-static const u8 falcon_lm87_common_regs[] = {
-       LM87_REG_TEMP_HW_INT_LOCK, FALCON_BOARD_TEMP_CRIT,
-       LM87_REG_TEMP_HW_INT, FALCON_BOARD_TEMP_CRIT,
-       LM87_TEMP_EXT1_LIMITS(FALCON_JUNC_TEMP_MIN, FALCON_JUNC_TEMP_MAX),
-       LM87_REG_TEMP_HW_EXT_LOCK, FALCON_JUNC_TEMP_CRIT,
-       LM87_REG_TEMP_HW_EXT, FALCON_JUNC_TEMP_CRIT,
-       0
-};
-
-static int efx_init_lm87(struct efx_nic *efx, const struct i2c_board_info *info,
-                        const u8 *reg_values)
-{
-       struct falcon_board *board = falcon_board(efx);
-       struct i2c_client *client = i2c_new_device(&board->i2c_adap, info);
-       int rc;
-
-       if (!client)
-               return -EIO;
-
-       /* Read-to-clear alarm/interrupt status */
-       i2c_smbus_read_byte_data(client, LM87_REG_ALARMS1);
-       i2c_smbus_read_byte_data(client, LM87_REG_ALARMS2);
-
-       rc = efx_poke_lm87(client, reg_values);
-       if (rc)
-               goto err;
-       rc = efx_poke_lm87(client, falcon_lm87_common_regs);
-       if (rc)
-               goto err;
-
-       board->hwmon_client = client;
-       return 0;
-
-err:
-       i2c_unregister_device(client);
-       return rc;
-}
-
-static void efx_fini_lm87(struct efx_nic *efx)
-{
-       i2c_unregister_device(falcon_board(efx)->hwmon_client);
-}
-
-static int efx_check_lm87(struct efx_nic *efx, unsigned mask)
-{
-       struct i2c_client *client = falcon_board(efx)->hwmon_client;
-       bool temp_crit, elec_fault, is_failure;
-       u16 alarms;
-       s32 reg;
-
-       /* If link is up then do not monitor temperature */
-       if (EFX_WORKAROUND_7884(efx) && efx->link_state.up)
-               return 0;
-
-       reg = i2c_smbus_read_byte_data(client, LM87_REG_ALARMS1);
-       if (reg < 0)
-               return reg;
-       alarms = reg;
-       reg = i2c_smbus_read_byte_data(client, LM87_REG_ALARMS2);
-       if (reg < 0)
-               return reg;
-       alarms |= reg << 8;
-       alarms &= mask;
-
-       temp_crit = false;
-       if (alarms & LM87_ALARM_TEMP_INT) {
-               reg = i2c_smbus_read_byte_data(client, LM87_REG_TEMP_INT);
-               if (reg < 0)
-                       return reg;
-               if (reg > FALCON_BOARD_TEMP_CRIT)
-                       temp_crit = true;
-       }
-       if (alarms & LM87_ALARM_TEMP_EXT1) {
-               reg = i2c_smbus_read_byte_data(client, LM87_REG_TEMP_EXT1);
-               if (reg < 0)
-                       return reg;
-               if (reg > FALCON_JUNC_TEMP_CRIT)
-                       temp_crit = true;
-       }
-       elec_fault = alarms & ~(LM87_ALARM_TEMP_INT | LM87_ALARM_TEMP_EXT1);
-       is_failure = temp_crit || elec_fault;
-
-       if (alarms)
-               netif_err(efx, hw, efx->net_dev,
-                         "LM87 detected a hardware %s (status %02x:%02x)"
-                         "%s%s%s%s\n",
-                         is_failure ? "failure" : "problem",
-                         alarms & 0xff, alarms >> 8,
-                         (alarms & LM87_ALARM_TEMP_INT) ?
-                         "; board is overheating" : "",
-                         (alarms & LM87_ALARM_TEMP_EXT1) ?
-                         "; controller is overheating" : "",
-                         temp_crit ? "; reached critical temperature" : "",
-                         elec_fault ? "; electrical fault" : "");
-
-       return is_failure ? -ERANGE : 0;
-}
-
-#else /* !CONFIG_SENSORS_LM87 */
-
-static inline int
-efx_init_lm87(struct efx_nic *efx, const struct i2c_board_info *info,
-             const u8 *reg_values)
-{
-       return 0;
-}
-static inline void efx_fini_lm87(struct efx_nic *efx)
-{
-}
-static inline int efx_check_lm87(struct efx_nic *efx, unsigned mask)
-{
-       return 0;
-}
-
-#endif /* CONFIG_SENSORS_LM87 */
-
-/*****************************************************************************
- * Support for the SFE4001 NIC.
- *
- * The SFE4001 does not power-up fully at reset due to its high power
- * consumption.  We control its power via a PCA9539 I/O expander.
- * It also has a MAX6647 temperature monitor which we expose to
- * the lm90 driver.
- *
- * This also provides minimal support for reflashing the PHY, which is
- * initiated by resetting it with the FLASH_CFG_1 pin pulled down.
- * On SFE4001 rev A2 and later this is connected to the 3V3X output of
- * the IO-expander.
- * We represent reflash mode as PHY_MODE_SPECIAL and make it mutually
- * exclusive with the network device being open.
- */
-
-/**************************************************************************
- * Support for I2C IO Expander device on SFE4001
- */
-#define        PCA9539 0x74
-
-#define        P0_IN 0x00
-#define        P0_OUT 0x02
-#define        P0_INVERT 0x04
-#define        P0_CONFIG 0x06
-
-#define        P0_EN_1V0X_LBN 0
-#define        P0_EN_1V0X_WIDTH 1
-#define        P0_EN_1V2_LBN 1
-#define        P0_EN_1V2_WIDTH 1
-#define        P0_EN_2V5_LBN 2
-#define        P0_EN_2V5_WIDTH 1
-#define        P0_EN_3V3X_LBN 3
-#define        P0_EN_3V3X_WIDTH 1
-#define        P0_EN_5V_LBN 4
-#define        P0_EN_5V_WIDTH 1
-#define        P0_SHORTEN_JTAG_LBN 5
-#define        P0_SHORTEN_JTAG_WIDTH 1
-#define        P0_X_TRST_LBN 6
-#define        P0_X_TRST_WIDTH 1
-#define        P0_DSP_RESET_LBN 7
-#define        P0_DSP_RESET_WIDTH 1
-
-#define        P1_IN 0x01
-#define        P1_OUT 0x03
-#define        P1_INVERT 0x05
-#define        P1_CONFIG 0x07
-
-#define        P1_AFE_PWD_LBN 0
-#define        P1_AFE_PWD_WIDTH 1
-#define        P1_DSP_PWD25_LBN 1
-#define        P1_DSP_PWD25_WIDTH 1
-#define        P1_RESERVED_LBN 2
-#define        P1_RESERVED_WIDTH 2
-#define        P1_SPARE_LBN 4
-#define        P1_SPARE_WIDTH 4
-
-/* Temperature Sensor */
-#define MAX664X_REG_RSL                0x02
-#define MAX664X_REG_WLHO       0x0B
-
-static void sfe4001_poweroff(struct efx_nic *efx)
-{
-       struct i2c_client *ioexp_client = falcon_board(efx)->ioexp_client;
-       struct i2c_client *hwmon_client = falcon_board(efx)->hwmon_client;
-
-       /* Turn off all power rails and disable outputs */
-       i2c_smbus_write_byte_data(ioexp_client, P0_OUT, 0xff);
-       i2c_smbus_write_byte_data(ioexp_client, P1_CONFIG, 0xff);
-       i2c_smbus_write_byte_data(ioexp_client, P0_CONFIG, 0xff);
-
-       /* Clear any over-temperature alert */
-       i2c_smbus_read_byte_data(hwmon_client, MAX664X_REG_RSL);
-}
-
-static int sfe4001_poweron(struct efx_nic *efx)
-{
-       struct i2c_client *ioexp_client = falcon_board(efx)->ioexp_client;
-       struct i2c_client *hwmon_client = falcon_board(efx)->hwmon_client;
-       unsigned int i, j;
-       int rc;
-       u8 out;
-
-       /* Clear any previous over-temperature alert */
-       rc = i2c_smbus_read_byte_data(hwmon_client, MAX664X_REG_RSL);
-       if (rc < 0)
-               return rc;
-
-       /* Enable port 0 and port 1 outputs on IO expander */
-       rc = i2c_smbus_write_byte_data(ioexp_client, P0_CONFIG, 0x00);
-       if (rc)
-               return rc;
-       rc = i2c_smbus_write_byte_data(ioexp_client, P1_CONFIG,
-                                      0xff & ~(1 << P1_SPARE_LBN));
-       if (rc)
-               goto fail_on;
-
-       /* If PHY power is on, turn it all off and wait 1 second to
-        * ensure a full reset.
-        */
-       rc = i2c_smbus_read_byte_data(ioexp_client, P0_OUT);
-       if (rc < 0)
-               goto fail_on;
-       out = 0xff & ~((0 << P0_EN_1V2_LBN) | (0 << P0_EN_2V5_LBN) |
-                      (0 << P0_EN_3V3X_LBN) | (0 << P0_EN_5V_LBN) |
-                      (0 << P0_EN_1V0X_LBN));
-       if (rc != out) {
-               netif_info(efx, hw, efx->net_dev, "power-cycling PHY\n");
-               rc = i2c_smbus_write_byte_data(ioexp_client, P0_OUT, out);
-               if (rc)
-                       goto fail_on;
-               schedule_timeout_uninterruptible(HZ);
-       }
-
-       for (i = 0; i < 20; ++i) {
-               /* Turn on 1.2V, 2.5V, 3.3V and 5V power rails */
-               out = 0xff & ~((1 << P0_EN_1V2_LBN) | (1 << P0_EN_2V5_LBN) |
-                              (1 << P0_EN_3V3X_LBN) | (1 << P0_EN_5V_LBN) |
-                              (1 << P0_X_TRST_LBN));
-               if (efx->phy_mode & PHY_MODE_SPECIAL)
-                       out |= 1 << P0_EN_3V3X_LBN;
-
-               rc = i2c_smbus_write_byte_data(ioexp_client, P0_OUT, out);
-               if (rc)
-                       goto fail_on;
-               msleep(10);
-
-               /* Turn on 1V power rail */
-               out &= ~(1 << P0_EN_1V0X_LBN);
-               rc = i2c_smbus_write_byte_data(ioexp_client, P0_OUT, out);
-               if (rc)
-                       goto fail_on;
-
-               netif_info(efx, hw, efx->net_dev,
-                          "waiting for DSP boot (attempt %d)...\n", i);
-
-               /* In flash config mode, DSP does not turn on AFE, so
-                * just wait 1 second.
-                */
-               if (efx->phy_mode & PHY_MODE_SPECIAL) {
-                       schedule_timeout_uninterruptible(HZ);
-                       return 0;
-               }
-
-               for (j = 0; j < 10; ++j) {
-                       msleep(100);
-
-                       /* Check DSP has asserted AFE power line */
-                       rc = i2c_smbus_read_byte_data(ioexp_client, P1_IN);
-                       if (rc < 0)
-                               goto fail_on;
-                       if (rc & (1 << P1_AFE_PWD_LBN))
-                               return 0;
-               }
-       }
-
-       netif_info(efx, hw, efx->net_dev, "timed out waiting for DSP boot\n");
-       rc = -ETIMEDOUT;
-fail_on:
-       sfe4001_poweroff(efx);
-       return rc;
-}
-
-static ssize_t show_phy_flash_cfg(struct device *dev,
-                                 struct device_attribute *attr, char *buf)
-{
-       struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev));
-       return sprintf(buf, "%d\n", !!(efx->phy_mode & PHY_MODE_SPECIAL));
-}
-
-static ssize_t set_phy_flash_cfg(struct device *dev,
-                                struct device_attribute *attr,
-                                const char *buf, size_t count)
-{
-       struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev));
-       enum efx_phy_mode old_mode, new_mode;
-       int err;
-
-       rtnl_lock();
-       old_mode = efx->phy_mode;
-       if (count == 0 || *buf == '0')
-               new_mode = old_mode & ~PHY_MODE_SPECIAL;
-       else
-               new_mode = PHY_MODE_SPECIAL;
-       if (!((old_mode ^ new_mode) & PHY_MODE_SPECIAL)) {
-               err = 0;
-       } else if (efx->state != STATE_READY || netif_running(efx->net_dev)) {
-               err = -EBUSY;
-       } else {
-               /* Reset the PHY, reconfigure the MAC and enable/disable
-                * MAC stats accordingly. */
-               efx->phy_mode = new_mode;
-               if (new_mode & PHY_MODE_SPECIAL)
-                       falcon_stop_nic_stats(efx);
-               err = sfe4001_poweron(efx);
-               if (!err)
-                       err = efx_reconfigure_port(efx);
-               if (!(new_mode & PHY_MODE_SPECIAL))
-                       falcon_start_nic_stats(efx);
-       }
-       rtnl_unlock();
-
-       return err ? err : count;
-}
-
-static DEVICE_ATTR(phy_flash_cfg, 0644, show_phy_flash_cfg, set_phy_flash_cfg);
-
-static void sfe4001_fini(struct efx_nic *efx)
-{
-       struct falcon_board *board = falcon_board(efx);
-
-       netif_info(efx, drv, efx->net_dev, "%s\n", __func__);
-
-       device_remove_file(&efx->pci_dev->dev, &dev_attr_phy_flash_cfg);
-       sfe4001_poweroff(efx);
-       i2c_unregister_device(board->ioexp_client);
-       i2c_unregister_device(board->hwmon_client);
-}
-
-static int sfe4001_check_hw(struct efx_nic *efx)
-{
-       struct falcon_nic_data *nic_data = efx->nic_data;
-       s32 status;
-
-       /* If XAUI link is up then do not monitor */
-       if (EFX_WORKAROUND_7884(efx) && !nic_data->xmac_poll_required)
-               return 0;
-
-       /* Check the powered status of the PHY. Lack of power implies that
-        * the MAX6647 has shut down power to it, probably due to a temp.
-        * alarm. Reading the power status rather than the MAX6647 status
-        * directly because the later is read-to-clear and would thus
-        * start to power up the PHY again when polled, causing us to blip
-        * the power undesirably.
-        * We know we can read from the IO expander because we did
-        * it during power-on. Assume failure now is bad news. */
-       status = i2c_smbus_read_byte_data(falcon_board(efx)->ioexp_client, P1_IN);
-       if (status >= 0 &&
-           (status & ((1 << P1_AFE_PWD_LBN) | (1 << P1_DSP_PWD25_LBN))) != 0)
-               return 0;
-
-       /* Use board power control, not PHY power control */
-       sfe4001_poweroff(efx);
-       efx->phy_mode = PHY_MODE_OFF;
-
-       return (status < 0) ? -EIO : -ERANGE;
-}
-
-static const struct i2c_board_info sfe4001_hwmon_info = {
-       I2C_BOARD_INFO("max6647", 0x4e),
-};
-
-/* This board uses an I2C expander to provider power to the PHY, which needs to
- * be turned on before the PHY can be used.
- * Context: Process context, rtnl lock held
- */
-static int sfe4001_init(struct efx_nic *efx)
-{
-       struct falcon_board *board = falcon_board(efx);
-       int rc;
-
-#if IS_ENABLED(CONFIG_SENSORS_LM90)
-       board->hwmon_client =
-               i2c_new_device(&board->i2c_adap, &sfe4001_hwmon_info);
-#else
-       board->hwmon_client =
-               i2c_new_dummy(&board->i2c_adap, sfe4001_hwmon_info.addr);
-#endif
-       if (!board->hwmon_client)
-               return -EIO;
-
-       /* Raise board/PHY high limit from 85 to 90 degrees Celsius */
-       rc = i2c_smbus_write_byte_data(board->hwmon_client,
-                                      MAX664X_REG_WLHO, 90);
-       if (rc)
-               goto fail_hwmon;
-
-       board->ioexp_client = i2c_new_dummy(&board->i2c_adap, PCA9539);
-       if (!board->ioexp_client) {
-               rc = -EIO;
-               goto fail_hwmon;
-       }
-
-       if (efx->phy_mode & PHY_MODE_SPECIAL) {
-               /* PHY won't generate a 156.25 MHz clock and MAC stats fetch
-                * will fail. */
-               falcon_stop_nic_stats(efx);
-       }
-       rc = sfe4001_poweron(efx);
-       if (rc)
-               goto fail_ioexp;
-
-       rc = device_create_file(&efx->pci_dev->dev, &dev_attr_phy_flash_cfg);
-       if (rc)
-               goto fail_on;
-
-       netif_info(efx, hw, efx->net_dev, "PHY is powered on\n");
-       return 0;
-
-fail_on:
-       sfe4001_poweroff(efx);
-fail_ioexp:
-       i2c_unregister_device(board->ioexp_client);
-fail_hwmon:
-       i2c_unregister_device(board->hwmon_client);
-       return rc;
-}
-
-/*****************************************************************************
- * Support for the SFE4002
- *
- */
-static u8 sfe4002_lm87_channel = 0x03; /* use AIN not FAN inputs */
-
-static const u8 sfe4002_lm87_regs[] = {
-       LM87_IN_LIMITS(0, 0x7c, 0x99),          /* 2.5V:  1.8V +/- 10% */
-       LM87_IN_LIMITS(1, 0x4c, 0x5e),          /* Vccp1: 1.2V +/- 10% */
-       LM87_IN_LIMITS(2, 0xac, 0xd4),          /* 3.3V:  3.3V +/- 10% */
-       LM87_IN_LIMITS(3, 0xac, 0xd4),          /* 5V:    5.0V +/- 10% */
-       LM87_IN_LIMITS(4, 0xac, 0xe0),          /* 12V:   10.8-14V */
-       LM87_IN_LIMITS(5, 0x3f, 0x4f),          /* Vccp2: 1.0V +/- 10% */
-       LM87_AIN_LIMITS(0, 0x98, 0xbb),         /* AIN1:  1.66V +/- 10% */
-       LM87_AIN_LIMITS(1, 0x8a, 0xa9),         /* AIN2:  1.5V +/- 10% */
-       LM87_TEMP_INT_LIMITS(0, 80 + FALCON_BOARD_TEMP_BIAS),
-       LM87_TEMP_EXT1_LIMITS(0, FALCON_JUNC_TEMP_MAX),
-       0
-};
-
-static const struct i2c_board_info sfe4002_hwmon_info = {
-       I2C_BOARD_INFO("lm87", 0x2e),
-       .platform_data  = &sfe4002_lm87_channel,
-};
-
-/****************************************************************************/
-/* LED allocations. Note that on rev A0 boards the schematic and the reality
- * differ: red and green are swapped. Below is the fixed (A1) layout (there
- * are only 3 A0 boards in existence, so no real reason to make this
- * conditional).
- */
-#define SFE4002_FAULT_LED (2)  /* Red */
-#define SFE4002_RX_LED    (0)  /* Green */
-#define SFE4002_TX_LED    (1)  /* Amber */
-
-static void sfe4002_init_phy(struct efx_nic *efx)
-{
-       /* Set the TX and RX LEDs to reflect status and activity, and the
-        * fault LED off */
-       falcon_qt202x_set_led(efx, SFE4002_TX_LED,
-                             QUAKE_LED_TXLINK | QUAKE_LED_LINK_ACTSTAT);
-       falcon_qt202x_set_led(efx, SFE4002_RX_LED,
-                             QUAKE_LED_RXLINK | QUAKE_LED_LINK_ACTSTAT);
-       falcon_qt202x_set_led(efx, SFE4002_FAULT_LED, QUAKE_LED_OFF);
-}
-
-static void sfe4002_set_id_led(struct efx_nic *efx, enum efx_led_mode mode)
-{
-       falcon_qt202x_set_led(
-               efx, SFE4002_FAULT_LED,
-               (mode == EFX_LED_ON) ? QUAKE_LED_ON : QUAKE_LED_OFF);
-}
-
-static int sfe4002_check_hw(struct efx_nic *efx)
-{
-       struct falcon_board *board = falcon_board(efx);
-
-       /* A0 board rev. 4002s report a temperature fault the whole time
-        * (bad sensor) so we mask it out. */
-       unsigned alarm_mask =
-               (board->major == 0 && board->minor == 0) ?
-               ~LM87_ALARM_TEMP_EXT1 : ~0;
-
-       return efx_check_lm87(efx, alarm_mask);
-}
-
-static int sfe4002_init(struct efx_nic *efx)
-{
-       return efx_init_lm87(efx, &sfe4002_hwmon_info, sfe4002_lm87_regs);
-}
-
-/*****************************************************************************
- * Support for the SFN4112F
- *
- */
-static u8 sfn4112f_lm87_channel = 0x03; /* use AIN not FAN inputs */
-
-static const u8 sfn4112f_lm87_regs[] = {
-       LM87_IN_LIMITS(0, 0x7c, 0x99),          /* 2.5V:  1.8V +/- 10% */
-       LM87_IN_LIMITS(1, 0x4c, 0x5e),          /* Vccp1: 1.2V +/- 10% */
-       LM87_IN_LIMITS(2, 0xac, 0xd4),          /* 3.3V:  3.3V +/- 10% */
-       LM87_IN_LIMITS(4, 0xac, 0xe0),          /* 12V:   10.8-14V */
-       LM87_IN_LIMITS(5, 0x3f, 0x4f),          /* Vccp2: 1.0V +/- 10% */
-       LM87_AIN_LIMITS(1, 0x8a, 0xa9),         /* AIN2:  1.5V +/- 10% */
-       LM87_TEMP_INT_LIMITS(0, 60 + FALCON_BOARD_TEMP_BIAS),
-       LM87_TEMP_EXT1_LIMITS(0, FALCON_JUNC_TEMP_MAX),
-       0
-};
-
-static const struct i2c_board_info sfn4112f_hwmon_info = {
-       I2C_BOARD_INFO("lm87", 0x2e),
-       .platform_data  = &sfn4112f_lm87_channel,
-};
-
-#define SFN4112F_ACT_LED       0
-#define SFN4112F_LINK_LED      1
-
-static void sfn4112f_init_phy(struct efx_nic *efx)
-{
-       falcon_qt202x_set_led(efx, SFN4112F_ACT_LED,
-                             QUAKE_LED_RXLINK | QUAKE_LED_LINK_ACT);
-       falcon_qt202x_set_led(efx, SFN4112F_LINK_LED,
-                             QUAKE_LED_RXLINK | QUAKE_LED_LINK_STAT);
-}
-
-static void sfn4112f_set_id_led(struct efx_nic *efx, enum efx_led_mode mode)
-{
-       int reg;
-
-       switch (mode) {
-       case EFX_LED_OFF:
-               reg = QUAKE_LED_OFF;
-               break;
-       case EFX_LED_ON:
-               reg = QUAKE_LED_ON;
-               break;
-       default:
-               reg = QUAKE_LED_RXLINK | QUAKE_LED_LINK_STAT;
-               break;
-       }
-
-       falcon_qt202x_set_led(efx, SFN4112F_LINK_LED, reg);
-}
-
-static int sfn4112f_check_hw(struct efx_nic *efx)
-{
-       /* Mask out unused sensors */
-       return efx_check_lm87(efx, ~0x48);
-}
-
-static int sfn4112f_init(struct efx_nic *efx)
-{
-       return efx_init_lm87(efx, &sfn4112f_hwmon_info, sfn4112f_lm87_regs);
-}
-
-/*****************************************************************************
- * Support for the SFE4003
- *
- */
-static u8 sfe4003_lm87_channel = 0x03; /* use AIN not FAN inputs */
-
-static const u8 sfe4003_lm87_regs[] = {
-       LM87_IN_LIMITS(0, 0x67, 0x7f),          /* 2.5V:  1.5V +/- 10% */
-       LM87_IN_LIMITS(1, 0x4c, 0x5e),          /* Vccp1: 1.2V +/- 10% */
-       LM87_IN_LIMITS(2, 0xac, 0xd4),          /* 3.3V:  3.3V +/- 10% */
-       LM87_IN_LIMITS(4, 0xac, 0xe0),          /* 12V:   10.8-14V */
-       LM87_IN_LIMITS(5, 0x3f, 0x4f),          /* Vccp2: 1.0V +/- 10% */
-       LM87_TEMP_INT_LIMITS(0, 70 + FALCON_BOARD_TEMP_BIAS),
-       0
-};
-
-static const struct i2c_board_info sfe4003_hwmon_info = {
-       I2C_BOARD_INFO("lm87", 0x2e),
-       .platform_data  = &sfe4003_lm87_channel,
-};
-
-/* Board-specific LED info. */
-#define SFE4003_RED_LED_GPIO   11
-#define SFE4003_LED_ON         1
-#define SFE4003_LED_OFF                0
-
-static void sfe4003_set_id_led(struct efx_nic *efx, enum efx_led_mode mode)
-{
-       struct falcon_board *board = falcon_board(efx);
-
-       /* The LEDs were not wired to GPIOs before A3 */
-       if (board->minor < 3 && board->major == 0)
-               return;
-
-       falcon_txc_set_gpio_val(
-               efx, SFE4003_RED_LED_GPIO,
-               (mode == EFX_LED_ON) ? SFE4003_LED_ON : SFE4003_LED_OFF);
-}
-
-static void sfe4003_init_phy(struct efx_nic *efx)
-{
-       struct falcon_board *board = falcon_board(efx);
-
-       /* The LEDs were not wired to GPIOs before A3 */
-       if (board->minor < 3 && board->major == 0)
-               return;
-
-       falcon_txc_set_gpio_dir(efx, SFE4003_RED_LED_GPIO, TXC_GPIO_DIR_OUTPUT);
-       falcon_txc_set_gpio_val(efx, SFE4003_RED_LED_GPIO, SFE4003_LED_OFF);
-}
-
-static int sfe4003_check_hw(struct efx_nic *efx)
-{
-       struct falcon_board *board = falcon_board(efx);
-
-       /* A0/A1/A2 board rev. 4003s  report a temperature fault the whole time
-        * (bad sensor) so we mask it out. */
-       unsigned alarm_mask =
-               (board->major == 0 && board->minor <= 2) ?
-               ~LM87_ALARM_TEMP_EXT1 : ~0;
-
-       return efx_check_lm87(efx, alarm_mask);
-}
-
-static int sfe4003_init(struct efx_nic *efx)
-{
-       return efx_init_lm87(efx, &sfe4003_hwmon_info, sfe4003_lm87_regs);
-}
-
-static const struct falcon_board_type board_types[] = {
-       {
-               .id             = FALCON_BOARD_SFE4001,
-               .init           = sfe4001_init,
-               .init_phy       = efx_port_dummy_op_void,
-               .fini           = sfe4001_fini,
-               .set_id_led     = tenxpress_set_id_led,
-               .monitor        = sfe4001_check_hw,
-       },
-       {
-               .id             = FALCON_BOARD_SFE4002,
-               .init           = sfe4002_init,
-               .init_phy       = sfe4002_init_phy,
-               .fini           = efx_fini_lm87,
-               .set_id_led     = sfe4002_set_id_led,
-               .monitor        = sfe4002_check_hw,
-       },
-       {
-               .id             = FALCON_BOARD_SFE4003,
-               .init           = sfe4003_init,
-               .init_phy       = sfe4003_init_phy,
-               .fini           = efx_fini_lm87,
-               .set_id_led     = sfe4003_set_id_led,
-               .monitor        = sfe4003_check_hw,
-       },
-       {
-               .id             = FALCON_BOARD_SFN4112F,
-               .init           = sfn4112f_init,
-               .init_phy       = sfn4112f_init_phy,
-               .fini           = efx_fini_lm87,
-               .set_id_led     = sfn4112f_set_id_led,
-               .monitor        = sfn4112f_check_hw,
-       },
-};
-
-int falcon_probe_board(struct efx_nic *efx, u16 revision_info)
-{
-       struct falcon_board *board = falcon_board(efx);
-       u8 type_id = FALCON_BOARD_TYPE(revision_info);
-       int i;
-
-       board->major = FALCON_BOARD_MAJOR(revision_info);
-       board->minor = FALCON_BOARD_MINOR(revision_info);
-
-       for (i = 0; i < ARRAY_SIZE(board_types); i++)
-               if (board_types[i].id == type_id)
-                       board->type = &board_types[i];
-
-       if (board->type) {
-               return 0;
-       } else {
-               netif_err(efx, probe, efx->net_dev, "unknown board type %d\n",
-                         type_id);
-               return -ENODEV;
-       }
-}
index 4762ec444cb8ee985cbc09985d07fade1027208b..91aa3ec77e054c13df4ac0e9f518adaf7f8dddd8 100644 (file)
@@ -25,7 +25,7 @@
 #include "io.h"
 #include "workarounds.h"
 
-/* Falcon-architecture (SFC4000 and SFC9000-family) support */
+/* Falcon-architecture (SFC9000-family) support */
 
 /**************************************************************************
  *
@@ -356,6 +356,18 @@ void efx_farch_tx_write(struct efx_tx_queue *tx_queue)
        }
 }
 
+unsigned int efx_farch_tx_limit_len(struct efx_tx_queue *tx_queue,
+                                   dma_addr_t dma_addr, unsigned int len)
+{
+       /* Don't cross 4K boundaries with descriptors. */
+       unsigned int limit = (~dma_addr & (EFX_PAGE_SIZE - 1)) + 1;
+
+       len = min(limit, len);
+
+       return len;
+}
+
+
 /* Allocate hardware resources for a TX queue */
 int efx_farch_tx_probe(struct efx_tx_queue *tx_queue)
 {
@@ -369,6 +381,7 @@ int efx_farch_tx_probe(struct efx_tx_queue *tx_queue)
 
 void efx_farch_tx_init(struct efx_tx_queue *tx_queue)
 {
+       int csum = tx_queue->queue & EFX_TXQ_TYPE_OFFLOAD;
        struct efx_nic *efx = tx_queue->efx;
        efx_oword_t reg;
 
@@ -390,37 +403,18 @@ void efx_farch_tx_init(struct efx_tx_queue *tx_queue)
                              FRF_AZ_TX_DESCQ_TYPE, 0,
                              FRF_BZ_TX_NON_IP_DROP_DIS, 1);
 
-       if (efx_nic_rev(efx) >= EFX_REV_FALCON_B0) {
-               int csum = tx_queue->queue & EFX_TXQ_TYPE_OFFLOAD;
-               EFX_SET_OWORD_FIELD(reg, FRF_BZ_TX_IP_CHKSM_DIS, !csum);
-               EFX_SET_OWORD_FIELD(reg, FRF_BZ_TX_TCP_CHKSM_DIS,
-                                   !csum);
-       }
+       EFX_SET_OWORD_FIELD(reg, FRF_BZ_TX_IP_CHKSM_DIS, !csum);
+       EFX_SET_OWORD_FIELD(reg, FRF_BZ_TX_TCP_CHKSM_DIS, !csum);
 
        efx_writeo_table(efx, &reg, efx->type->txd_ptr_tbl_base,
                         tx_queue->queue);
 
-       if (efx_nic_rev(efx) < EFX_REV_FALCON_B0) {
-               /* Only 128 bits in this register */
-               BUILD_BUG_ON(EFX_MAX_TX_QUEUES > 128);
-
-               efx_reado(efx, &reg, FR_AA_TX_CHKSM_CFG);
-               if (tx_queue->queue & EFX_TXQ_TYPE_OFFLOAD)
-                       __clear_bit_le(tx_queue->queue, &reg);
-               else
-                       __set_bit_le(tx_queue->queue, &reg);
-               efx_writeo(efx, &reg, FR_AA_TX_CHKSM_CFG);
-       }
-
-       if (efx_nic_rev(efx) >= EFX_REV_FALCON_B0) {
-               EFX_POPULATE_OWORD_1(reg,
-                                    FRF_BZ_TX_PACE,
-                                    (tx_queue->queue & EFX_TXQ_TYPE_HIGHPRI) ?
-                                    FFE_BZ_TX_PACE_OFF :
-                                    FFE_BZ_TX_PACE_RESERVED);
-               efx_writeo_table(efx, &reg, FR_BZ_TX_PACE_TBL,
-                                tx_queue->queue);
-       }
+       EFX_POPULATE_OWORD_1(reg,
+                            FRF_BZ_TX_PACE,
+                            (tx_queue->queue & EFX_TXQ_TYPE_HIGHPRI) ?
+                            FFE_BZ_TX_PACE_OFF :
+                            FFE_BZ_TX_PACE_RESERVED);
+       efx_writeo_table(efx, &reg, FR_BZ_TX_PACE_TBL, tx_queue->queue);
 }
 
 static void efx_farch_flush_tx_queue(struct efx_tx_queue *tx_queue)
@@ -517,16 +511,10 @@ void efx_farch_rx_init(struct efx_rx_queue *rx_queue)
 {
        efx_oword_t rx_desc_ptr;
        struct efx_nic *efx = rx_queue->efx;
-       bool is_b0 = efx_nic_rev(efx) >= EFX_REV_FALCON_B0;
-       bool iscsi_digest_en = is_b0;
        bool jumbo_en;
 
-       /* For kernel-mode queues in Falcon A1, the JUMBO flag enables
-        * DMA to continue after a PCIe page boundary (and scattering
-        * is not possible).  In Falcon B0 and Siena, it enables
-        * scatter.
-        */
-       jumbo_en = !is_b0 || efx->rx_scatter;
+       /* For kernel-mode queues in Siena, the JUMBO flag enables scatter. */
+       jumbo_en = efx->rx_scatter;
 
        netif_dbg(efx, hw, efx->net_dev,
                  "RX queue %d ring in special buffers %d-%d\n",
@@ -540,8 +528,8 @@ void efx_farch_rx_init(struct efx_rx_queue *rx_queue)
 
        /* Push RX descriptor ring to card */
        EFX_POPULATE_OWORD_10(rx_desc_ptr,
-                             FRF_AZ_RX_ISCSI_DDIG_EN, iscsi_digest_en,
-                             FRF_AZ_RX_ISCSI_HDIG_EN, iscsi_digest_en,
+                             FRF_AZ_RX_ISCSI_DDIG_EN, true,
+                             FRF_AZ_RX_ISCSI_HDIG_EN, true,
                              FRF_AZ_RX_DESCQ_BUF_BASE_ID, rx_queue->rxd.index,
                              FRF_AZ_RX_DESCQ_EVQ_ID,
                              efx_rx_queue_channel(rx_queue)->channel,
@@ -880,7 +868,7 @@ static u16 efx_farch_handle_rx_not_ok(struct efx_rx_queue *rx_queue,
        struct efx_nic *efx = rx_queue->efx;
        bool rx_ev_buf_owner_id_err, rx_ev_ip_hdr_chksum_err;
        bool rx_ev_tcp_udp_chksum_err, rx_ev_eth_crc_err;
-       bool rx_ev_frm_trunc, rx_ev_drib_nib, rx_ev_tobe_disc;
+       bool rx_ev_frm_trunc, rx_ev_tobe_disc;
        bool rx_ev_other_err, rx_ev_pause_frm;
        bool rx_ev_hdr_type, rx_ev_mcast_pkt;
        unsigned rx_ev_pkt_type;
@@ -897,12 +885,10 @@ static u16 efx_farch_handle_rx_not_ok(struct efx_rx_queue *rx_queue,
                                                   FSF_AZ_RX_EV_TCP_UDP_CHKSUM_ERR);
        rx_ev_eth_crc_err = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_ETH_CRC_ERR);
        rx_ev_frm_trunc = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_FRM_TRUNC);
-       rx_ev_drib_nib = ((efx_nic_rev(efx) >= EFX_REV_FALCON_B0) ?
-                         0 : EFX_QWORD_FIELD(*event, FSF_AA_RX_EV_DRIB_NIB));
        rx_ev_pause_frm = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_PAUSE_FRM_ERR);
 
        /* Every error apart from tobe_disc and pause_frm */
-       rx_ev_other_err = (rx_ev_drib_nib | rx_ev_tcp_udp_chksum_err |
+       rx_ev_other_err = (rx_ev_tcp_udp_chksum_err |
                           rx_ev_buf_owner_id_err | rx_ev_eth_crc_err |
                           rx_ev_frm_trunc | rx_ev_ip_hdr_chksum_err);
 
@@ -927,7 +913,7 @@ static u16 efx_farch_handle_rx_not_ok(struct efx_rx_queue *rx_queue,
        if (rx_ev_other_err && net_ratelimit()) {
                netif_dbg(efx, rx_err, efx->net_dev,
                          " RX queue %d unexpected RX event "
-                         EFX_QWORD_FMT "%s%s%s%s%s%s%s%s\n",
+                         EFX_QWORD_FMT "%s%s%s%s%s%s%s\n",
                          efx_rx_queue_index(rx_queue), EFX_QWORD_VAL(*event),
                          rx_ev_buf_owner_id_err ? " [OWNER_ID_ERR]" : "",
                          rx_ev_ip_hdr_chksum_err ?
@@ -936,14 +922,13 @@ static u16 efx_farch_handle_rx_not_ok(struct efx_rx_queue *rx_queue,
                          " [TCP_UDP_CHKSUM_ERR]" : "",
                          rx_ev_eth_crc_err ? " [ETH_CRC_ERR]" : "",
                          rx_ev_frm_trunc ? " [FRM_TRUNC]" : "",
-                         rx_ev_drib_nib ? " [DRIB_NIB]" : "",
                          rx_ev_tobe_disc ? " [TOBE_DISC]" : "",
                          rx_ev_pause_frm ? " [PAUSE]" : "");
        }
 #endif
 
        /* The frame must be discarded if any of these are true. */
-       return (rx_ev_eth_crc_err | rx_ev_frm_trunc | rx_ev_drib_nib |
+       return (rx_ev_eth_crc_err | rx_ev_frm_trunc |
                rx_ev_tobe_disc | rx_ev_pause_frm) ?
                EFX_RX_PKT_DISCARD : 0;
 }
@@ -972,8 +957,7 @@ efx_farch_handle_rx_bad_index(struct efx_rx_queue *rx_queue, unsigned index)
                   "dropped %d events (index=%d expected=%d)\n",
                   dropped, index, expected);
 
-       efx_schedule_reset(efx, EFX_WORKAROUND_5676(efx) ?
-                          RESET_TYPE_RX_RECOVERY : RESET_TYPE_DISABLE);
+       efx_schedule_reset(efx, RESET_TYPE_DISABLE);
        return false;
 }
 
@@ -1239,10 +1223,7 @@ efx_farch_handle_driver_event(struct efx_channel *channel, efx_qword_t *event)
                          "channel %d seen DRIVER RX_RESET event. "
                        "Resetting.\n", channel->channel);
                atomic_inc(&efx->rx_reset);
-               efx_schedule_reset(efx,
-                                  EFX_WORKAROUND_6555(efx) ?
-                                  RESET_TYPE_RX_RECOVERY :
-                                  RESET_TYPE_DISABLE);
+               efx_schedule_reset(efx, RESET_TYPE_DISABLE);
                break;
        case FSE_BZ_RX_DSC_ERROR_EV:
                if (ev_sub_data < EFX_VI_BASE) {
@@ -1379,13 +1360,11 @@ int efx_farch_ev_init(struct efx_channel *channel)
                  channel->channel, channel->eventq.index,
                  channel->eventq.index + channel->eventq.entries - 1);
 
-       if (efx_nic_rev(efx) >= EFX_REV_SIENA_A0) {
-               EFX_POPULATE_OWORD_3(reg,
-                                    FRF_CZ_TIMER_Q_EN, 1,
-                                    FRF_CZ_HOST_NOTIFY_MODE, 0,
-                                    FRF_CZ_TIMER_MODE, FFE_CZ_TIMER_MODE_DIS);
-               efx_writeo_table(efx, &reg, FR_BZ_TIMER_TBL, channel->channel);
-       }
+       EFX_POPULATE_OWORD_3(reg,
+                            FRF_CZ_TIMER_Q_EN, 1,
+                            FRF_CZ_HOST_NOTIFY_MODE, 0,
+                            FRF_CZ_TIMER_MODE, FFE_CZ_TIMER_MODE_DIS);
+       efx_writeo_table(efx, &reg, FR_BZ_TIMER_TBL, channel->channel);
 
        /* Pin event queue buffer */
        efx_init_special_buffer(efx, &channel->eventq);
@@ -1413,8 +1392,7 @@ void efx_farch_ev_fini(struct efx_channel *channel)
        EFX_ZERO_OWORD(reg);
        efx_writeo_table(efx, &reg, efx->type->evq_ptr_tbl_base,
                         channel->channel);
-       if (efx_nic_rev(efx) >= EFX_REV_SIENA_A0)
-               efx_writeo_table(efx, &reg, FR_BZ_TIMER_TBL, channel->channel);
+       efx_writeo_table(efx, &reg, FR_BZ_TIMER_TBL, channel->channel);
 
        /* Unpin event queue */
        efx_fini_special_buffer(efx, &channel->eventq);
@@ -1488,7 +1466,6 @@ int efx_farch_irq_test_generate(struct efx_nic *efx)
  */
 irqreturn_t efx_farch_fatal_interrupt(struct efx_nic *efx)
 {
-       struct falcon_nic_data *nic_data = efx->nic_data;
        efx_oword_t *int_ker = efx->irq_status.addr;
        efx_oword_t fatal_intr;
        int error, mem_perr;
@@ -1514,8 +1491,6 @@ irqreturn_t efx_farch_fatal_interrupt(struct efx_nic *efx)
 
        /* Disable both devices */
        pci_clear_master(efx->pci_dev);
-       if (efx_nic_is_dual_func(efx))
-               pci_clear_master(nic_data->pci_dev2);
        efx_farch_irq_disable_master(efx);
 
        /* Count errors and reset or disable the NIC accordingly */
@@ -1662,8 +1637,6 @@ void efx_farch_rx_push_indir_table(struct efx_nic *efx)
        size_t i = 0;
        efx_dword_t dword;
 
-       BUG_ON(efx_nic_rev(efx) < EFX_REV_FALCON_B0);
-
        BUILD_BUG_ON(ARRAY_SIZE(efx->rx_indir_table) !=
                     FR_BZ_RX_INDIRECTION_TBL_ROWS);
 
@@ -1791,8 +1764,7 @@ void efx_farch_init_common(struct efx_nic *efx)
                             FRF_AZ_ILL_ADR_INT_KER_EN, 1,
                             FRF_AZ_RBUF_OWN_INT_KER_EN, 1,
                             FRF_AZ_TBUF_OWN_INT_KER_EN, 1);
-       if (efx_nic_rev(efx) >= EFX_REV_SIENA_A0)
-               EFX_SET_OWORD_FIELD(temp, FRF_CZ_SRAM_PERR_INT_P_KER_EN, 1);
+       EFX_SET_OWORD_FIELD(temp, FRF_CZ_SRAM_PERR_INT_P_KER_EN, 1);
        EFX_INVERT_OWORD(temp);
        efx_writeo(efx, &temp, FR_AZ_FATAL_INTR_KER);
 
@@ -1812,22 +1784,18 @@ void efx_farch_init_common(struct efx_nic *efx)
        /* Disable hardware watchdog which can misfire */
        EFX_SET_OWORD_FIELD(temp, FRF_AZ_TX_PREF_WD_TMR, 0x3fffff);
        /* Squash TX of packets of 16 bytes or less */
-       if (efx_nic_rev(efx) >= EFX_REV_FALCON_B0)
-               EFX_SET_OWORD_FIELD(temp, FRF_BZ_TX_FLUSH_MIN_LEN_EN, 1);
+       EFX_SET_OWORD_FIELD(temp, FRF_BZ_TX_FLUSH_MIN_LEN_EN, 1);
        efx_writeo(efx, &temp, FR_AZ_TX_RESERVED);
 
-       if (efx_nic_rev(efx) >= EFX_REV_FALCON_B0) {
-               EFX_POPULATE_OWORD_4(temp,
-                                    /* Default values */
-                                    FRF_BZ_TX_PACE_SB_NOT_AF, 0x15,
-                                    FRF_BZ_TX_PACE_SB_AF, 0xb,
-                                    FRF_BZ_TX_PACE_FB_BASE, 0,
-                                    /* Allow large pace values in the
-                                     * fast bin. */
-                                    FRF_BZ_TX_PACE_BIN_TH,
-                                    FFE_BZ_TX_PACE_RESERVED);
-               efx_writeo(efx, &temp, FR_BZ_TX_PACE);
-       }
+       EFX_POPULATE_OWORD_4(temp,
+                            /* Default values */
+                            FRF_BZ_TX_PACE_SB_NOT_AF, 0x15,
+                            FRF_BZ_TX_PACE_SB_AF, 0xb,
+                            FRF_BZ_TX_PACE_FB_BASE, 0,
+                            /* Allow large pace values in the fast bin. */
+                            FRF_BZ_TX_PACE_BIN_TH,
+                            FFE_BZ_TX_PACE_RESERVED);
+       efx_writeo(efx, &temp, FR_BZ_TX_PACE);
 }
 
 /**************************************************************************
@@ -2011,7 +1979,7 @@ static void efx_farch_filter_push_rx_config(struct efx_nic *efx)
                        !!(table->spec[EFX_FARCH_FILTER_INDEX_UC_DEF].flags &
                           table->spec[EFX_FARCH_FILTER_INDEX_MC_DEF].flags &
                           EFX_FILTER_FLAG_RX_SCATTER));
-       } else if (efx_nic_rev(efx) >= EFX_REV_FALCON_B0) {
+       } else {
                /* We don't expose 'default' filters because unmatched
                 * packets always go to the queue number found in the
                 * RSS table.  But we still need to set the RX scatter
@@ -2819,31 +2787,27 @@ int efx_farch_filter_table_probe(struct efx_nic *efx)
                return -ENOMEM;
        efx->filter_state = state;
 
-       if (efx_nic_rev(efx) >= EFX_REV_FALCON_B0) {
-               table = &state->table[EFX_FARCH_FILTER_TABLE_RX_IP];
-               table->id = EFX_FARCH_FILTER_TABLE_RX_IP;
-               table->offset = FR_BZ_RX_FILTER_TBL0;
-               table->size = FR_BZ_RX_FILTER_TBL0_ROWS;
-               table->step = FR_BZ_RX_FILTER_TBL0_STEP;
-       }
+       table = &state->table[EFX_FARCH_FILTER_TABLE_RX_IP];
+       table->id = EFX_FARCH_FILTER_TABLE_RX_IP;
+       table->offset = FR_BZ_RX_FILTER_TBL0;
+       table->size = FR_BZ_RX_FILTER_TBL0_ROWS;
+       table->step = FR_BZ_RX_FILTER_TBL0_STEP;
 
-       if (efx_nic_rev(efx) >= EFX_REV_SIENA_A0) {
-               table = &state->table[EFX_FARCH_FILTER_TABLE_RX_MAC];
-               table->id = EFX_FARCH_FILTER_TABLE_RX_MAC;
-               table->offset = FR_CZ_RX_MAC_FILTER_TBL0;
-               table->size = FR_CZ_RX_MAC_FILTER_TBL0_ROWS;
-               table->step = FR_CZ_RX_MAC_FILTER_TBL0_STEP;
-
-               table = &state->table[EFX_FARCH_FILTER_TABLE_RX_DEF];
-               table->id = EFX_FARCH_FILTER_TABLE_RX_DEF;
-               table->size = EFX_FARCH_FILTER_SIZE_RX_DEF;
-
-               table = &state->table[EFX_FARCH_FILTER_TABLE_TX_MAC];
-               table->id = EFX_FARCH_FILTER_TABLE_TX_MAC;
-               table->offset = FR_CZ_TX_MAC_FILTER_TBL0;
-               table->size = FR_CZ_TX_MAC_FILTER_TBL0_ROWS;
-               table->step = FR_CZ_TX_MAC_FILTER_TBL0_STEP;
-       }
+       table = &state->table[EFX_FARCH_FILTER_TABLE_RX_MAC];
+       table->id = EFX_FARCH_FILTER_TABLE_RX_MAC;
+       table->offset = FR_CZ_RX_MAC_FILTER_TBL0;
+       table->size = FR_CZ_RX_MAC_FILTER_TBL0_ROWS;
+       table->step = FR_CZ_RX_MAC_FILTER_TBL0_STEP;
+
+       table = &state->table[EFX_FARCH_FILTER_TABLE_RX_DEF];
+       table->id = EFX_FARCH_FILTER_TABLE_RX_DEF;
+       table->size = EFX_FARCH_FILTER_SIZE_RX_DEF;
+
+       table = &state->table[EFX_FARCH_FILTER_TABLE_TX_MAC];
+       table->id = EFX_FARCH_FILTER_TABLE_TX_MAC;
+       table->offset = FR_CZ_TX_MAC_FILTER_TBL0;
+       table->size = FR_CZ_TX_MAC_FILTER_TBL0_ROWS;
+       table->step = FR_CZ_TX_MAC_FILTER_TBL0_STEP;
 
        for (table_id = 0; table_id < EFX_FARCH_FILTER_TABLE_COUNT; table_id++) {
                table = &state->table[table_id];
index 241520943adaebf353aaf7eb53d7fc8d5c450310..995651341b94d658f6f50f01c80356cd029ee600 100644 (file)
@@ -15,7 +15,6 @@
 #include "io.h"
 #include "farch_regs.h"
 #include "mcdi_pcol.h"
-#include "phy.h"
 
 /**************************************************************************
  *
index ccceafc1589601d6b6efd2808c425cd7024bf29c..35cc3d4fa5f692a3ae58503ecd94c995b260fc0e 100644 (file)
 /* The clock whose frequency you've attempted to set set
  * doesn't exist on this NIC */
 #define MC_CMD_ERR_NO_CLOCK 0x1015
+/* Returned by MC_CMD_TESTASSERT if the action that should
+ * have caused an assertion failed to do so.  */
+#define MC_CMD_ERR_UNREACHABLE 0x1016
 
 #define MC_CMD_ERR_CODE_OFST 0
 
 #define        MC_CMD_COPYCODE_IN_BOOT_MAGIC_SKIP_BOOT_ICORE_SYNC_WIDTH 1
 #define        MC_CMD_COPYCODE_IN_BOOT_MAGIC_FORCE_STANDALONE_LBN 5
 #define        MC_CMD_COPYCODE_IN_BOOT_MAGIC_FORCE_STANDALONE_WIDTH 1
+#define        MC_CMD_COPYCODE_IN_BOOT_MAGIC_DISABLE_XIP_LBN 6
+#define        MC_CMD_COPYCODE_IN_BOOT_MAGIC_DISABLE_XIP_WIDTH 1
 /* Destination address */
 #define       MC_CMD_COPYCODE_IN_DEST_ADDR_OFST 4
 #define       MC_CMD_COPYCODE_IN_NUMWORDS_OFST 8
 #define       MC_CMD_PTP_OUT_GET_ATTRIBUTES_CAPABILITIES_OFST 8
 #define        MC_CMD_PTP_OUT_GET_ATTRIBUTES_REPORT_SYNC_STATUS_LBN 0
 #define        MC_CMD_PTP_OUT_GET_ATTRIBUTES_REPORT_SYNC_STATUS_WIDTH 1
+#define        MC_CMD_PTP_OUT_GET_ATTRIBUTES_RX_TSTAMP_OOB_LBN 1
+#define        MC_CMD_PTP_OUT_GET_ATTRIBUTES_RX_TSTAMP_OOB_WIDTH 1
 #define       MC_CMD_PTP_OUT_GET_ATTRIBUTES_RESERVED0_OFST 12
 #define       MC_CMD_PTP_OUT_GET_ATTRIBUTES_RESERVED1_OFST 16
 #define       MC_CMD_PTP_OUT_GET_ATTRIBUTES_RESERVED2_OFST 20
 #define          MC_CMD_FW_HIGH_TX_RATE 0x3
 /* enum: Reserved value */
 #define          MC_CMD_FW_PACKED_STREAM_HASH_MODE_1 0x4
+/* enum: Prefer to use firmware with additional "rules engine" filtering
+ * support
+ */
+#define          MC_CMD_FW_RULES_ENGINE 0x5
 /* enum: Only this option is allowed for non-admin functions */
 #define          MC_CMD_FW_DONT_CARE  0xffffffff
 
 
 #define MC_CMD_0x38_PRIVILEGE_CTG SRIOV_CTG_ADMIN
 
-/* MC_CMD_NVRAM_UPDATE_START_IN msgrequest */
+/* MC_CMD_NVRAM_UPDATE_START_IN msgrequest: Legacy NVRAM_UPDATE_START request.
+ * Use NVRAM_UPDATE_START_V2_IN in new code
+ */
 #define    MC_CMD_NVRAM_UPDATE_START_IN_LEN 4
 #define       MC_CMD_NVRAM_UPDATE_START_IN_TYPE_OFST 0
 /*            Enum values, see field(s): */
 /*               MC_CMD_NVRAM_TYPES/MC_CMD_NVRAM_TYPES_OUT/TYPES */
 
+/* MC_CMD_NVRAM_UPDATE_START_V2_IN msgrequest: Extended NVRAM_UPDATE_START
+ * request with additional flags indicating version of command in use. See
+ * MC_CMD_NVRAM_UPDATE_FINISH_V2_OUT for details of extended functionality. Use
+ * paired up with NVRAM_UPDATE_FINISH_V2_IN.
+ */
+#define    MC_CMD_NVRAM_UPDATE_START_V2_IN_LEN 8
+#define       MC_CMD_NVRAM_UPDATE_START_V2_IN_TYPE_OFST 0
+/*            Enum values, see field(s): */
+/*               MC_CMD_NVRAM_TYPES/MC_CMD_NVRAM_TYPES_OUT/TYPES */
+#define       MC_CMD_NVRAM_UPDATE_START_V2_IN_FLAGS_OFST 4
+#define        MC_CMD_NVRAM_UPDATE_START_V2_IN_FLAG_REPORT_VERIFY_RESULT_LBN 0
+#define        MC_CMD_NVRAM_UPDATE_START_V2_IN_FLAG_REPORT_VERIFY_RESULT_WIDTH 1
+
 /* MC_CMD_NVRAM_UPDATE_START_OUT msgresponse */
 #define    MC_CMD_NVRAM_UPDATE_START_OUT_LEN 0
 
 
 #define MC_CMD_0x3c_PRIVILEGE_CTG SRIOV_CTG_ADMIN
 
-/* MC_CMD_NVRAM_UPDATE_FINISH_IN msgrequest */
+/* MC_CMD_NVRAM_UPDATE_FINISH_IN msgrequest: Legacy NVRAM_UPDATE_FINISH
+ * request. Use NVRAM_UPDATE_FINISH_V2_IN in new code
+ */
 #define    MC_CMD_NVRAM_UPDATE_FINISH_IN_LEN 8
 #define       MC_CMD_NVRAM_UPDATE_FINISH_IN_TYPE_OFST 0
 /*            Enum values, see field(s): */
 /*               MC_CMD_NVRAM_TYPES/MC_CMD_NVRAM_TYPES_OUT/TYPES */
 #define       MC_CMD_NVRAM_UPDATE_FINISH_IN_REBOOT_OFST 4
 
-/* MC_CMD_NVRAM_UPDATE_FINISH_OUT msgresponse */
+/* MC_CMD_NVRAM_UPDATE_FINISH_V2_IN msgrequest: Extended NVRAM_UPDATE_FINISH
+ * request with additional flags indicating version of NVRAM_UPDATE commands in
+ * use. See MC_CMD_NVRAM_UPDATE_FINISH_V2_OUT for details of extended
+ * functionality. Use paired up with NVRAM_UPDATE_START_V2_IN.
+ */
+#define    MC_CMD_NVRAM_UPDATE_FINISH_V2_IN_LEN 12
+#define       MC_CMD_NVRAM_UPDATE_FINISH_V2_IN_TYPE_OFST 0
+/*            Enum values, see field(s): */
+/*               MC_CMD_NVRAM_TYPES/MC_CMD_NVRAM_TYPES_OUT/TYPES */
+#define       MC_CMD_NVRAM_UPDATE_FINISH_V2_IN_REBOOT_OFST 4
+#define       MC_CMD_NVRAM_UPDATE_FINISH_V2_IN_FLAGS_OFST 8
+#define        MC_CMD_NVRAM_UPDATE_FINISH_V2_IN_FLAG_REPORT_VERIFY_RESULT_LBN 0
+#define        MC_CMD_NVRAM_UPDATE_FINISH_V2_IN_FLAG_REPORT_VERIFY_RESULT_WIDTH 1
+
+/* MC_CMD_NVRAM_UPDATE_FINISH_OUT msgresponse: Legacy NVRAM_UPDATE_FINISH
+ * response. Use NVRAM_UPDATE_FINISH_V2_OUT in new code
+ */
 #define    MC_CMD_NVRAM_UPDATE_FINISH_OUT_LEN 0
 
+/* MC_CMD_NVRAM_UPDATE_FINISH_V2_OUT msgresponse:
+ *
+ * Extended NVRAM_UPDATE_FINISH response that communicates the result of secure
+ * firmware validation where applicable back to the host.
+ *
+ * Medford only: For signed firmware images, such as those for medford, the MC
+ * firmware verifies the signature before marking the firmware image as valid.
+ * This process takes a few seconds to complete. So is likely to take more than
+ * the MCDI timeout. Hence signature verification is initiated when
+ * MC_CMD_NVRAM_UPDATE_FINISH_V2_IN is received by the firmware, however, the
+ * MCDI command returns immediately with error code EAGAIN. Subsequent
+ * NVRAM_UPDATE_FINISH_V2_IN requests also return EAGAIN if the verification is
+ * in progress. Once the verification has completed, this response payload
+ * includes the results of the signature verification. Note that the nvram lock
+ * in firmware is only released after the verification has completed and the
+ * host has read back the result code from firmware.
+ */
+#define    MC_CMD_NVRAM_UPDATE_FINISH_V2_OUT_LEN 4
+/* Result of nvram update completion processing */
+#define       MC_CMD_NVRAM_UPDATE_FINISH_V2_OUT_RESULT_CODE_OFST 0
+/* enum: Verify succeeded without any errors. */
+#define          MC_CMD_NVRAM_VERIFY_RC_SUCCESS 0x1
+/* enum: CMS format verification failed due to an internal error. */
+#define          MC_CMD_NVRAM_VERIFY_RC_CMS_CHECK_FAILED 0x2
+/* enum: Invalid CMS format in image metadata. */
+#define          MC_CMD_NVRAM_VERIFY_RC_INVALID_CMS_FORMAT 0x3
+/* enum: Message digest verification failed due to an internal error. */
+#define          MC_CMD_NVRAM_VERIFY_RC_MESSAGE_DIGEST_CHECK_FAILED 0x4
+/* enum: Error in message digest calculated over the reflash-header, payload
+ * and reflash-trailer.
+ */
+#define          MC_CMD_NVRAM_VERIFY_RC_BAD_MESSAGE_DIGEST 0x5
+/* enum: Signature verification failed due to an internal error. */
+#define          MC_CMD_NVRAM_VERIFY_RC_SIGNATURE_CHECK_FAILED 0x6
+/* enum: There are no valid signatures in the image. */
+#define          MC_CMD_NVRAM_VERIFY_RC_NO_VALID_SIGNATURES 0x7
+/* enum: Trusted approvers verification failed due to an internal error. */
+#define          MC_CMD_NVRAM_VERIFY_RC_TRUSTED_APPROVERS_CHECK_FAILED 0x8
+/* enum: The Trusted approver's list is empty. */
+#define          MC_CMD_NVRAM_VERIFY_RC_NO_TRUSTED_APPROVERS 0x9
+/* enum: Signature chain verification failed due to an internal error. */
+#define          MC_CMD_NVRAM_VERIFY_RC_SIGNATURE_CHAIN_CHECK_FAILED 0xa
+/* enum: The signers of the signatures in the image are not listed in the
+ * Trusted approver's list.
+ */
+#define          MC_CMD_NVRAM_VERIFY_RC_NO_SIGNATURE_MATCH 0xb
+
 
 /***********************************/
 /* MC_CMD_REBOOT
 /* MC_CMD_TESTASSERT_OUT msgresponse */
 #define    MC_CMD_TESTASSERT_OUT_LEN 0
 
+/* MC_CMD_TESTASSERT_V2_IN msgrequest */
+#define    MC_CMD_TESTASSERT_V2_IN_LEN 4
+/* How to provoke the assertion */
+#define       MC_CMD_TESTASSERT_V2_IN_TYPE_OFST 0
+/* enum: Assert using the FAIL_ASSERTION_WITH_USEFUL_VALUES macro. Unless
+ * you're testing firmware, this is what you want.
+ */
+#define          MC_CMD_TESTASSERT_V2_IN_FAIL_ASSERTION_WITH_USEFUL_VALUES  0x0
+/* enum: Assert using assert(0); */
+#define          MC_CMD_TESTASSERT_V2_IN_ASSERT_FALSE  0x1
+/* enum: Deliberately trigger a watchdog */
+#define          MC_CMD_TESTASSERT_V2_IN_WATCHDOG  0x2
+/* enum: Deliberately trigger a trap by loading from an invalid address */
+#define          MC_CMD_TESTASSERT_V2_IN_LOAD_TRAP  0x3
+/* enum: Deliberately trigger a trap by storing to an invalid address */
+#define          MC_CMD_TESTASSERT_V2_IN_STORE_TRAP  0x4
+/* enum: Jump to an invalid address */
+#define          MC_CMD_TESTASSERT_V2_IN_JUMP_TRAP  0x5
+
+/* MC_CMD_TESTASSERT_V2_OUT msgresponse */
+#define    MC_CMD_TESTASSERT_V2_OUT_LEN 0
+
 
 /***********************************/
 /* MC_CMD_WORKAROUND
  * (GET_PHY_CFG_OUT_MEDIA_TYPE); the valid 'page number' input values, and the
  * output data, are interpreted on a per-type basis. For SFP+: PAGE=0 or 1
  * returns a 128-byte block read from module I2C address 0xA0 offset 0 or 0x80.
+ * Anything else: currently undefined. Locks required: None. Return code: 0.
  */
 #define MC_CMD_GET_PHY_MEDIA_INFO 0x4b
 
 #define          NVRAM_PARTITION_TYPE_EXPANSION_UEFI       0xd00
 /* enum: Spare partition 0 */
 #define          NVRAM_PARTITION_TYPE_SPARE_0              0x1000
-/* enum: Spare partition 1 */
-#define          NVRAM_PARTITION_TYPE_SPARE_1              0x1100
+/* enum: Used for XIP code of shmbooted images */
+#define          NVRAM_PARTITION_TYPE_XIP_SCRATCH          0x1100
 /* enum: Spare partition 2 */
 #define          NVRAM_PARTITION_TYPE_SPARE_2              0x1200
-/* enum: Spare partition 3 */
-#define          NVRAM_PARTITION_TYPE_SPARE_3              0x1300
+/* enum: Manufacturing partition. Used during manufacture to pass information
+ * between XJTAG and Manftest.
+ */
+#define          NVRAM_PARTITION_TYPE_MANUFACTURING        0x1300
 /* enum: Spare partition 4 */
 #define          NVRAM_PARTITION_TYPE_SPARE_4              0x1400
 /* enum: Spare partition 5 */
 #define          LICENSED_APP_ID_CAPTURE_SOLARSYSTEM     0x40
 /* enum: Network Access Control */
 #define          LICENSED_APP_ID_NETWORK_ACCESS_CONTROL  0x80
+/* enum: TCP Direct */
+#define          LICENSED_APP_ID_TCP_DIRECT              0x100
+/* enum: Low Latency */
+#define          LICENSED_APP_ID_LOW_LATENCY             0x200
+/* enum: SolarCapture Tap */
+#define          LICENSED_APP_ID_SOLARCAPTURE_TAP        0x400
+/* enum: Capture SolarSystem 40G */
+#define          LICENSED_APP_ID_CAPTURE_SOLARSYSTEM_40G 0x800
 #define       LICENSED_APP_ID_ID_LBN 0
 #define       LICENSED_APP_ID_ID_WIDTH 32
 
 #define        LICENSED_V3_APPS_CAPTURE_SOLARSYSTEM_WIDTH 1
 #define        LICENSED_V3_APPS_NETWORK_ACCESS_CONTROL_LBN 7
 #define        LICENSED_V3_APPS_NETWORK_ACCESS_CONTROL_WIDTH 1
+#define        LICENSED_V3_APPS_TCP_DIRECT_LBN 8
+#define        LICENSED_V3_APPS_TCP_DIRECT_WIDTH 1
+#define        LICENSED_V3_APPS_LOW_LATENCY_LBN 9
+#define        LICENSED_V3_APPS_LOW_LATENCY_WIDTH 1
+#define        LICENSED_V3_APPS_SOLARCAPTURE_TAP_LBN 10
+#define        LICENSED_V3_APPS_SOLARCAPTURE_TAP_WIDTH 1
+#define        LICENSED_V3_APPS_CAPTURE_SOLARSYSTEM_40G_LBN 11
+#define        LICENSED_V3_APPS_CAPTURE_SOLARSYSTEM_40G_WIDTH 1
 #define       LICENSED_V3_APPS_MASK_LBN 0
 #define       LICENSED_V3_APPS_MASK_WIDTH 64
 
 #define        MC_CMD_INIT_TXQ_EXT_IN_FLAG_INNER_TCP_CSUM_EN_WIDTH 1
 #define        MC_CMD_INIT_TXQ_EXT_IN_FLAG_TSOV2_EN_LBN 12
 #define        MC_CMD_INIT_TXQ_EXT_IN_FLAG_TSOV2_EN_WIDTH 1
+#define        MC_CMD_INIT_TXQ_EXT_IN_FLAG_CTPIO_LBN 13
+#define        MC_CMD_INIT_TXQ_EXT_IN_FLAG_CTPIO_WIDTH 1
 /* Owner ID to use if in buffer mode (zero if physical) */
 #define       MC_CMD_INIT_TXQ_EXT_IN_OWNER_ID_OFST 20
 /* The port ID associated with the v-adaptor which should contain this DMAQ. */
  * tests (Medford development only)
  */
 #define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_LAYER2_PERF  0x7
+/* enum: Rules engine RX PD production firmware */
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_RULES_ENGINE  0x8
 /* enum: RX PD firmware for GUE parsing prototype (Medford development only) */
 #define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE  0xe
 /* enum: RX PD firmware parsing but not filtering network overlay tunnel
  * tests (Medford development only)
  */
 #define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_LAYER2_PERF  0x7
+/* enum: Rules engine TX PD production firmware */
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_RULES_ENGINE  0x8
 /* enum: RX PD firmware for GUE parsing prototype (Medford development only) */
 #define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE  0xe
 /* Hardware capabilities of NIC */
  * tests (Medford development only)
  */
 #define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_LAYER2_PERF  0x7
+/* enum: Rules engine RX PD production firmware */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_RULES_ENGINE  0x8
 /* enum: RX PD firmware for GUE parsing prototype (Medford development only) */
 #define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE  0xe
 /* enum: RX PD firmware parsing but not filtering network overlay tunnel
  * tests (Medford development only)
  */
 #define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_LAYER2_PERF  0x7
+/* enum: Rules engine TX PD production firmware */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_RULES_ENGINE  0x8
 /* enum: RX PD firmware for GUE parsing prototype (Medford development only) */
 #define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE  0xe
 /* Hardware capabilities of NIC */
 #define        MC_CMD_GET_CAPABILITIES_V2_OUT_RX_SNIFF_WIDTH 1
 #define        MC_CMD_GET_CAPABILITIES_V2_OUT_TX_SNIFF_LBN 11
 #define        MC_CMD_GET_CAPABILITIES_V2_OUT_TX_SNIFF_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_NVRAM_UPDATE_REPORT_VERIFY_RESULT_LBN 12
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_NVRAM_UPDATE_REPORT_VERIFY_RESULT_WIDTH 1
 /* Number of FATSOv2 contexts per datapath supported by this NIC. Not present
  * on older firmware (check the length).
  */
 #define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_SNIFF_WIDTH 1
 #define        MC_CMD_GET_CAPABILITIES_V3_OUT_TX_SNIFF_LBN 11
 #define        MC_CMD_GET_CAPABILITIES_V3_OUT_TX_SNIFF_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_NVRAM_UPDATE_REPORT_VERIFY_RESULT_LBN 12
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_NVRAM_UPDATE_REPORT_VERIFY_RESULT_WIDTH 1
 /* Number of FATSOv2 contexts per datapath supported by this NIC. Not present
  * on older firmware (check the length).
  */
 #define       MC_CMD_GET_CAPABILITIES_V3_OUT_SIZE_PIO_BUFF_LEN 2
 /* On chips later than Medford the amount of address space assigned to each VI
  * is configurable. This is a global setting that the driver must query to
- * discover the VI to address mapping. Cut-through PIO (CTPIO) in not available
+ * discover the VI to address mapping. Cut-through PIO (CTPIO) is not available
  * with 8k VI windows.
  */
 #define       MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_OFST 72
  * more data is returned.
  */
 #define          MC_CMD_PCIE_TUNE_IN_POLL_EYE_PLOT  0x6
+/* enum: Enable the SERDES BIST and set it to generate a 200MHz square wave */
+#define          MC_CMD_PCIE_TUNE_IN_BIST_SQUARE_WAVE  0x7
 /* Align the arguments to 32 bits */
 #define       MC_CMD_PCIE_TUNE_IN_PCIE_TUNE_RSVD_OFST 1
 #define       MC_CMD_PCIE_TUNE_IN_PCIE_TUNE_RSVD_LEN 3
 #define       MC_CMD_PCIE_TUNE_POLL_EYE_PLOT_OUT_SAMPLES_MINNUM 0
 #define       MC_CMD_PCIE_TUNE_POLL_EYE_PLOT_OUT_SAMPLES_MAXNUM 126
 
+/* MC_CMD_PCIE_TUNE_BIST_SQUARE_WAVE_IN msgrequest */
+#define    MC_CMD_PCIE_TUNE_BIST_SQUARE_WAVE_IN_LEN 0
+
+/* MC_CMD_PCIE_TUNE_BIST_SQUARE_WAVE_OUT msgrequest */
+#define    MC_CMD_PCIE_TUNE_BIST_SQUARE_WAVE_OUT_LEN 0
+
 
 /***********************************/
 /* MC_CMD_LICENSING
 #define MC_CMD_0xd4_PRIVILEGE_CTG SRIOV_CTG_GENERAL
 
 /* MC_CMD_LICENSED_V3_VALIDATE_APP_IN msgrequest */
-#define    MC_CMD_LICENSED_V3_VALIDATE_APP_IN_LEN 72
+#define    MC_CMD_LICENSED_V3_VALIDATE_APP_IN_LEN 56
+/* challenge for validation (384 bits) */
+#define       MC_CMD_LICENSED_V3_VALIDATE_APP_IN_CHALLENGE_OFST 0
+#define       MC_CMD_LICENSED_V3_VALIDATE_APP_IN_CHALLENGE_LEN 48
 /* application ID expressed as a single bit mask */
-#define       MC_CMD_LICENSED_V3_VALIDATE_APP_IN_APP_ID_OFST 0
+#define       MC_CMD_LICENSED_V3_VALIDATE_APP_IN_APP_ID_OFST 48
 #define       MC_CMD_LICENSED_V3_VALIDATE_APP_IN_APP_ID_LEN 8
-#define       MC_CMD_LICENSED_V3_VALIDATE_APP_IN_APP_ID_LO_OFST 0
-#define       MC_CMD_LICENSED_V3_VALIDATE_APP_IN_APP_ID_HI_OFST 4
-/* challenge for validation */
-#define       MC_CMD_LICENSED_V3_VALIDATE_APP_IN_CHALLENGE_OFST 8
-#define       MC_CMD_LICENSED_V3_VALIDATE_APP_IN_CHALLENGE_LEN 64
+#define       MC_CMD_LICENSED_V3_VALIDATE_APP_IN_APP_ID_LO_OFST 48
+#define       MC_CMD_LICENSED_V3_VALIDATE_APP_IN_APP_ID_HI_OFST 52
 
 /* MC_CMD_LICENSED_V3_VALIDATE_APP_OUT msgresponse */
-#define    MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_LEN 72
+#define    MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_LEN 116
+/* validation response to challenge in the form of ECDSA signature consisting
+ * of two 384-bit integers, r and s, in big-endian order. The signature signs a
+ * SHA-384 digest of a message constructed from the concatenation of the input
+ * message and the remaining fields of this output message, e.g. challenge[48
+ * bytes] ... expiry_time[4 bytes] ...
+ */
+#define       MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_RESPONSE_OFST 0
+#define       MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_RESPONSE_LEN 96
 /* application expiry time */
-#define       MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_EXPIRY_TIME_OFST 0
+#define       MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_EXPIRY_TIME_OFST 96
 /* application expiry units */
-#define       MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_EXPIRY_UNITS_OFST 4
+#define       MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_EXPIRY_UNITS_OFST 100
 /* enum: expiry units are accounting units */
 #define          MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_EXPIRY_UNIT_ACC  0x0
 /* enum: expiry units are calendar days */
 #define          MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_EXPIRY_UNIT_DAYS  0x1
-/* validation response to challenge */
-#define       MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_RESPONSE_OFST 8
-#define       MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_RESPONSE_LEN 64
+/* base MAC address of the NIC stored in NVRAM (note that this is a constant
+ * value for a given NIC regardless which function is calling, effectively this
+ * is PF0 base MAC address)
+ */
+#define       MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_BASE_MACADDR_OFST 104
+#define       MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_BASE_MACADDR_LEN 6
+/* MAC address of v-adaptor associated with the client. If no such v-adapator
+ * exists, then the field is filled with 0xFF.
+ */
+#define       MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_VADAPTOR_MACADDR_OFST 110
+#define       MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_VADAPTOR_MACADDR_LEN 6
 
 
 /***********************************/
 #define    MC_CMD_LICENSED_V3_MASK_FEATURES_OUT_LEN 0
 
 
+/***********************************/
+/* MC_CMD_LICENSING_V3_TEMPORARY
+ * Perform operations to support installation of a single temporary license in
+ * the adapter, in addition to those found in the licensing partition. See
+ * SF-116124-SW for an overview of how this could be used. The license is
+ * stored in MC persistent data and so will survive a MC reboot, but will be
+ * erased when the adapter is power cycled
+ */
+#define MC_CMD_LICENSING_V3_TEMPORARY 0xd6
+
+#define MC_CMD_0xd6_PRIVILEGE_CTG SRIOV_CTG_GENERAL
+
+/* MC_CMD_LICENSING_V3_TEMPORARY_IN msgrequest */
+#define    MC_CMD_LICENSING_V3_TEMPORARY_IN_LEN 4
+/* operation code */
+#define       MC_CMD_LICENSING_V3_TEMPORARY_IN_OP_OFST 0
+/* enum: install a new license, overwriting any existing temporary license.
+ * This is an asynchronous operation owing to the time taken to validate an
+ * ECDSA license
+ */
+#define          MC_CMD_LICENSING_V3_TEMPORARY_SET  0x0
+/* enum: clear the license immediately rather than waiting for the next power
+ * cycle
+ */
+#define          MC_CMD_LICENSING_V3_TEMPORARY_CLEAR  0x1
+/* enum: get the status of the asynchronous MC_CMD_LICENSING_V3_TEMPORARY_SET
+ * operation
+ */
+#define          MC_CMD_LICENSING_V3_TEMPORARY_STATUS  0x2
+
+/* MC_CMD_LICENSING_V3_TEMPORARY_IN_SET msgrequest */
+#define    MC_CMD_LICENSING_V3_TEMPORARY_IN_SET_LEN 164
+#define       MC_CMD_LICENSING_V3_TEMPORARY_IN_SET_OP_OFST 0
+/* ECDSA license and signature */
+#define       MC_CMD_LICENSING_V3_TEMPORARY_IN_SET_LICENSE_OFST 4
+#define       MC_CMD_LICENSING_V3_TEMPORARY_IN_SET_LICENSE_LEN 160
+
+/* MC_CMD_LICENSING_V3_TEMPORARY_IN_CLEAR msgrequest */
+#define    MC_CMD_LICENSING_V3_TEMPORARY_IN_CLEAR_LEN 4
+#define       MC_CMD_LICENSING_V3_TEMPORARY_IN_CLEAR_OP_OFST 0
+
+/* MC_CMD_LICENSING_V3_TEMPORARY_IN_STATUS msgrequest */
+#define    MC_CMD_LICENSING_V3_TEMPORARY_IN_STATUS_LEN 4
+#define       MC_CMD_LICENSING_V3_TEMPORARY_IN_STATUS_OP_OFST 0
+
+/* MC_CMD_LICENSING_V3_TEMPORARY_OUT_STATUS msgresponse */
+#define    MC_CMD_LICENSING_V3_TEMPORARY_OUT_STATUS_LEN 12
+/* status code */
+#define       MC_CMD_LICENSING_V3_TEMPORARY_OUT_STATUS_STATUS_OFST 0
+/* enum: finished validating and installing license */
+#define          MC_CMD_LICENSING_V3_TEMPORARY_STATUS_OK  0x0
+/* enum: license validation and installation in progress */
+#define          MC_CMD_LICENSING_V3_TEMPORARY_STATUS_IN_PROGRESS  0x1
+/* enum: licensing error. More specific error messages are not provided to
+ * avoid exposing details of the licensing system to the client
+ */
+#define          MC_CMD_LICENSING_V3_TEMPORARY_STATUS_ERROR  0x2
+/* bitmask of licensed features */
+#define       MC_CMD_LICENSING_V3_TEMPORARY_OUT_STATUS_LICENSED_FEATURES_OFST 4
+#define       MC_CMD_LICENSING_V3_TEMPORARY_OUT_STATUS_LICENSED_FEATURES_LEN 8
+#define       MC_CMD_LICENSING_V3_TEMPORARY_OUT_STATUS_LICENSED_FEATURES_LO_OFST 4
+#define       MC_CMD_LICENSING_V3_TEMPORARY_OUT_STATUS_LICENSED_FEATURES_HI_OFST 8
+
+
 /***********************************/
 /* MC_CMD_SET_PORT_SNIFF_CONFIG
  * Configure RX port sniffing for the physical port associated with the calling
 /* MC_CMD_RX_BALANCING_OUT msgresponse */
 #define    MC_CMD_RX_BALANCING_OUT_LEN 0
 
+
+/***********************************/
+/* MC_CMD_NVRAM_PRIVATE_APPEND
+ * Append a single TLV to the MC_USAGE_TLV partition. Returns MC_CMD_ERR_EEXIST
+ * if the tag is already present.
+ */
+#define MC_CMD_NVRAM_PRIVATE_APPEND 0x11c
+
+#define MC_CMD_0x11c_PRIVILEGE_CTG SRIOV_CTG_ADMIN
+
+/* MC_CMD_NVRAM_PRIVATE_APPEND_IN msgrequest */
+#define    MC_CMD_NVRAM_PRIVATE_APPEND_IN_LENMIN 9
+#define    MC_CMD_NVRAM_PRIVATE_APPEND_IN_LENMAX 252
+#define    MC_CMD_NVRAM_PRIVATE_APPEND_IN_LEN(num) (8+1*(num))
+/* The tag to be appended */
+#define       MC_CMD_NVRAM_PRIVATE_APPEND_IN_TAG_OFST 0
+/* The length of the data */
+#define       MC_CMD_NVRAM_PRIVATE_APPEND_IN_LENGTH_OFST 4
+/* The data to be contained in the TLV structure */
+#define       MC_CMD_NVRAM_PRIVATE_APPEND_IN_DATA_BUFFER_OFST 8
+#define       MC_CMD_NVRAM_PRIVATE_APPEND_IN_DATA_BUFFER_LEN 1
+#define       MC_CMD_NVRAM_PRIVATE_APPEND_IN_DATA_BUFFER_MINNUM 1
+#define       MC_CMD_NVRAM_PRIVATE_APPEND_IN_DATA_BUFFER_MAXNUM 244
+
+/* MC_CMD_NVRAM_PRIVATE_APPEND_OUT msgresponse */
+#define    MC_CMD_NVRAM_PRIVATE_APPEND_OUT_LEN 0
+
+
+/***********************************/
+/* MC_CMD_XPM_VERIFY_CONTENTS
+ * Verify that the contents of the XPM memory is correct (Medford only). This
+ * is used during manufacture to check that the XPM memory has been programmed
+ * correctly at ATE.
+ */
+#define MC_CMD_XPM_VERIFY_CONTENTS 0x11b
+
+#define MC_CMD_0x11b_PRIVILEGE_CTG SRIOV_CTG_ADMIN
+
+/* MC_CMD_XPM_VERIFY_CONTENTS_IN msgrequest */
+#define    MC_CMD_XPM_VERIFY_CONTENTS_IN_LEN 4
+/* Data type to be checked */
+#define       MC_CMD_XPM_VERIFY_CONTENTS_IN_DATA_TYPE_OFST 0
+
+/* MC_CMD_XPM_VERIFY_CONTENTS_OUT msgresponse */
+#define    MC_CMD_XPM_VERIFY_CONTENTS_OUT_LENMIN 12
+#define    MC_CMD_XPM_VERIFY_CONTENTS_OUT_LENMAX 252
+#define    MC_CMD_XPM_VERIFY_CONTENTS_OUT_LEN(num) (12+1*(num))
+/* Number of sectors found (test builds only) */
+#define       MC_CMD_XPM_VERIFY_CONTENTS_OUT_NUM_SECTORS_OFST 0
+/* Number of bytes found (test builds only) */
+#define       MC_CMD_XPM_VERIFY_CONTENTS_OUT_NUM_BYTES_OFST 4
+/* Length of signature */
+#define       MC_CMD_XPM_VERIFY_CONTENTS_OUT_SIG_LENGTH_OFST 8
+/* Signature */
+#define       MC_CMD_XPM_VERIFY_CONTENTS_OUT_SIGNATURE_OFST 12
+#define       MC_CMD_XPM_VERIFY_CONTENTS_OUT_SIGNATURE_LEN 1
+#define       MC_CMD_XPM_VERIFY_CONTENTS_OUT_SIGNATURE_MINNUM 0
+#define       MC_CMD_XPM_VERIFY_CONTENTS_OUT_SIGNATURE_MAXNUM 240
+
+
 /***********************************/
 /* MC_CMD_SET_EVQ_TMR
  * Update the timer load, timer reload and timer mode values for a given EVQ.
  */
 #define       MC_CMD_GET_EVQ_TMR_PROPERTIES_OUT_BUG35388_TMR_STEP_OFST 32
 
+
+/***********************************/
+/* MC_CMD_ALLOCATE_TX_VFIFO_CP
+ * When we use the TX_vFIFO_ULL mode, we can allocate common pools using the
+ * non used switch buffers.
+ */
+#define MC_CMD_ALLOCATE_TX_VFIFO_CP 0x11d
+
+#define MC_CMD_0x11d_PRIVILEGE_CTG SRIOV_CTG_ADMIN
+
+/* MC_CMD_ALLOCATE_TX_VFIFO_CP_IN msgrequest */
+#define    MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_LEN 20
+/* Desired instance. Must be set to a specific instance, which is a function
+ * local queue index.
+ */
+#define       MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_INSTANCE_OFST 0
+/* Will the common pool be used as TX_vFIFO_ULL (1) */
+#define       MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_MODE_OFST 4
+#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_ENABLED       0x1 /* enum */
+/* enum: Using this interface without TX_vFIFO_ULL is not supported for now */
+#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_DISABLED      0x0
+/* Number of buffers to reserve for the common pool */
+#define       MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_SIZE_OFST 8
+/* TX datapath to which the Common Pool is connected to. */
+#define       MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_INGRESS_OFST 12
+/* enum: Extracts information from function */
+#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_USE_FUNCTION_VALUE          -0x1
+/* Network port or RX Engine to which the common pool connects. */
+#define       MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_EGRESS_OFST 16
+/* enum: Extracts information from function */
+/*               MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_USE_FUNCTION_VALUE          -0x1 */
+#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_PORT0          0x0 /* enum */
+#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_PORT1          0x1 /* enum */
+#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_PORT2          0x2 /* enum */
+#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_PORT3          0x3 /* enum */
+/* enum: To enable Switch loopback with Rx engine 0 */
+#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_RX_ENGINE0     0x4
+/* enum: To enable Switch loopback with Rx engine 1 */
+#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_RX_ENGINE1     0x5
+
+/* MC_CMD_ALLOCATE_TX_VFIFO_CP_OUT msgresponse */
+#define    MC_CMD_ALLOCATE_TX_VFIFO_CP_OUT_LEN 4
+/* ID of the common pool allocated */
+#define       MC_CMD_ALLOCATE_TX_VFIFO_CP_OUT_CP_ID_OFST 0
+
+
+/***********************************/
+/* MC_CMD_ALLOCATE_TX_VFIFO_VFIFO
+ * When we use the TX_vFIFO_ULL mode, we can allocate vFIFOs using the
+ * previously allocated common pools.
+ */
+#define MC_CMD_ALLOCATE_TX_VFIFO_VFIFO 0x11e
+
+#define MC_CMD_0x11e_PRIVILEGE_CTG SRIOV_CTG_ADMIN
+
+/* MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN msgrequest */
+#define    MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_LEN 20
+/* Common pool previously allocated to which the new vFIFO will be associated
+ */
+#define       MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_CP_OFST 0
+/* Port or RX engine to associate the vFIFO egress */
+#define       MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_EGRESS_OFST 4
+/* enum: Extracts information from common pool */
+#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_USE_CP_VALUE   -0x1
+#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_PORT0          0x0 /* enum */
+#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_PORT1          0x1 /* enum */
+#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_PORT2          0x2 /* enum */
+#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_PORT3          0x3 /* enum */
+/* enum: To enable Switch loopback with Rx engine 0 */
+#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_RX_ENGINE0     0x4
+/* enum: To enable Switch loopback with Rx engine 1 */
+#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_RX_ENGINE1     0x5
+/* Minimum number of buffers that the pool must have */
+#define       MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_SIZE_OFST 8
+/* enum: Do not check the space available */
+#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_NO_MINIMUM     0x0
+/* Will the vFIFO be used as TX_vFIFO_ULL */
+#define       MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_MODE_OFST 12
+/* Network priority of the vFIFO,if applicable */
+#define       MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_PRIORITY_OFST 16
+/* enum: Search for the lowest unused priority */
+#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_LOWEST_AVAILABLE  -0x1
+
+/* MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_OUT msgresponse */
+#define    MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_OUT_LEN 8
+/* Short vFIFO ID */
+#define       MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_OUT_VID_OFST 0
+/* Network priority of the vFIFO */
+#define       MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_OUT_PRIORITY_OFST 4
+
+
+/***********************************/
+/* MC_CMD_TEARDOWN_TX_VFIFO_VF
+ * This interface clears the configuration of the given vFIFO and leaves it
+ * ready to be re-used.
+ */
+#define MC_CMD_TEARDOWN_TX_VFIFO_VF 0x11f
+
+#define MC_CMD_0x11f_PRIVILEGE_CTG SRIOV_CTG_ADMIN
+
+/* MC_CMD_TEARDOWN_TX_VFIFO_VF_IN msgrequest */
+#define    MC_CMD_TEARDOWN_TX_VFIFO_VF_IN_LEN 4
+/* Short vFIFO ID */
+#define       MC_CMD_TEARDOWN_TX_VFIFO_VF_IN_VFIFO_OFST 0
+
+/* MC_CMD_TEARDOWN_TX_VFIFO_VF_OUT msgresponse */
+#define    MC_CMD_TEARDOWN_TX_VFIFO_VF_OUT_LEN 0
+
+
+/***********************************/
+/* MC_CMD_DEALLOCATE_TX_VFIFO_CP
+ * This interface clears the configuration of the given common pool and leaves
+ * it ready to be re-used.
+ */
+#define MC_CMD_DEALLOCATE_TX_VFIFO_CP 0x121
+
+#define MC_CMD_0x121_PRIVILEGE_CTG SRIOV_CTG_ADMIN
+
+/* MC_CMD_DEALLOCATE_TX_VFIFO_CP_IN msgrequest */
+#define    MC_CMD_DEALLOCATE_TX_VFIFO_CP_IN_LEN 4
+/* Common pool ID given when pool allocated */
+#define       MC_CMD_DEALLOCATE_TX_VFIFO_CP_IN_POOL_ID_OFST 0
+
+/* MC_CMD_DEALLOCATE_TX_VFIFO_CP_OUT msgresponse */
+#define    MC_CMD_DEALLOCATE_TX_VFIFO_CP_OUT_LEN 0
+
+
+/***********************************/
+/* MC_CMD_SWITCH_GET_UNASSIGNED_BUFFERS
+ * This interface allows the host to find out how many common pool buffers are
+ * not yet assigned.
+ */
+#define MC_CMD_SWITCH_GET_UNASSIGNED_BUFFERS 0x124
+
+#define MC_CMD_0x124_PRIVILEGE_CTG SRIOV_CTG_ADMIN
+
+/* MC_CMD_SWITCH_GET_UNASSIGNED_BUFFERS_IN msgrequest */
+#define    MC_CMD_SWITCH_GET_UNASSIGNED_BUFFERS_IN_LEN 0
+
+/* MC_CMD_SWITCH_GET_UNASSIGNED_BUFFERS_OUT msgresponse */
+#define    MC_CMD_SWITCH_GET_UNASSIGNED_BUFFERS_OUT_LEN 8
+/* Available buffers for the ENG to NET vFIFOs. */
+#define       MC_CMD_SWITCH_GET_UNASSIGNED_BUFFERS_OUT_NET_OFST 0
+/* Available buffers for the ENG to ENG and NET to ENG vFIFOs. */
+#define       MC_CMD_SWITCH_GET_UNASSIGNED_BUFFERS_OUT_ENG_OFST 4
+
+
 #endif /* MCDI_PCOL_H */
index 2a9228a6e4a08cea22ba5bed9e1eae24cc25f591..0f0eb271fa73bf24da7d75f3eeebb08b626a3830 100644 (file)
@@ -13,7 +13,6 @@
 
 #include <linux/slab.h>
 #include "efx.h"
-#include "phy.h"
 #include "mcdi.h"
 #include "mcdi_pcol.h"
 #include "nic.h"
diff --git a/drivers/net/ethernet/sfc/mdio_10g.c b/drivers/net/ethernet/sfc/mdio_10g.c
deleted file mode 100644 (file)
index 8ff954c..0000000
+++ /dev/null
@@ -1,323 +0,0 @@
-/****************************************************************************
- * Driver for Solarflare network controllers and boards
- * Copyright 2006-2011 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
- */
-/*
- * Useful functions for working with MDIO clause 45 PHYs
- */
-#include <linux/types.h>
-#include <linux/ethtool.h>
-#include <linux/delay.h>
-#include "net_driver.h"
-#include "mdio_10g.h"
-#include "workarounds.h"
-
-unsigned efx_mdio_id_oui(u32 id)
-{
-       unsigned oui = 0;
-       int i;
-
-       /* The bits of the OUI are designated a..x, with a=0 and b variable.
-        * In the id register c is the MSB but the OUI is conventionally
-        * written as bytes h..a, p..i, x..q.  Reorder the bits accordingly. */
-       for (i = 0; i < 22; ++i)
-               if (id & (1 << (i + 10)))
-                       oui |= 1 << (i ^ 7);
-
-       return oui;
-}
-
-int efx_mdio_reset_mmd(struct efx_nic *port, int mmd,
-                           int spins, int spintime)
-{
-       u32 ctrl;
-
-       /* Catch callers passing values in the wrong units (or just silly) */
-       EFX_BUG_ON_PARANOID(spins * spintime >= 5000);
-
-       efx_mdio_write(port, mmd, MDIO_CTRL1, MDIO_CTRL1_RESET);
-       /* Wait for the reset bit to clear. */
-       do {
-               msleep(spintime);
-               ctrl = efx_mdio_read(port, mmd, MDIO_CTRL1);
-               spins--;
-
-       } while (spins && (ctrl & MDIO_CTRL1_RESET));
-
-       return spins ? spins : -ETIMEDOUT;
-}
-
-static int efx_mdio_check_mmd(struct efx_nic *efx, int mmd)
-{
-       int status;
-
-       if (mmd != MDIO_MMD_AN) {
-               /* Read MMD STATUS2 to check it is responding. */
-               status = efx_mdio_read(efx, mmd, MDIO_STAT2);
-               if ((status & MDIO_STAT2_DEVPRST) != MDIO_STAT2_DEVPRST_VAL) {
-                       netif_err(efx, hw, efx->net_dev,
-                                 "PHY MMD %d not responding.\n", mmd);
-                       return -EIO;
-               }
-       }
-
-       return 0;
-}
-
-/* This ought to be ridiculous overkill. We expect it to fail rarely */
-#define MDIO45_RESET_TIME      1000 /* ms */
-#define MDIO45_RESET_ITERS     100
-
-int efx_mdio_wait_reset_mmds(struct efx_nic *efx, unsigned int mmd_mask)
-{
-       const int spintime = MDIO45_RESET_TIME / MDIO45_RESET_ITERS;
-       int tries = MDIO45_RESET_ITERS;
-       int rc = 0;
-       int in_reset;
-
-       while (tries) {
-               int mask = mmd_mask;
-               int mmd = 0;
-               int stat;
-               in_reset = 0;
-               while (mask) {
-                       if (mask & 1) {
-                               stat = efx_mdio_read(efx, mmd, MDIO_CTRL1);
-                               if (stat < 0) {
-                                       netif_err(efx, hw, efx->net_dev,
-                                                 "failed to read status of"
-                                                 " MMD %d\n", mmd);
-                                       return -EIO;
-                               }
-                               if (stat & MDIO_CTRL1_RESET)
-                                       in_reset |= (1 << mmd);
-                       }
-                       mask = mask >> 1;
-                       mmd++;
-               }
-               if (!in_reset)
-                       break;
-               tries--;
-               msleep(spintime);
-       }
-       if (in_reset != 0) {
-               netif_err(efx, hw, efx->net_dev,
-                         "not all MMDs came out of reset in time."
-                         " MMDs still in reset: %x\n", in_reset);
-               rc = -ETIMEDOUT;
-       }
-       return rc;
-}
-
-int efx_mdio_check_mmds(struct efx_nic *efx, unsigned int mmd_mask)
-{
-       int mmd = 0, probe_mmd, devs1, devs2;
-       u32 devices;
-
-       /* Historically we have probed the PHYXS to find out what devices are
-        * present,but that doesn't work so well if the PHYXS isn't expected
-        * to exist, if so just find the first item in the list supplied. */
-       probe_mmd = (mmd_mask & MDIO_DEVS_PHYXS) ? MDIO_MMD_PHYXS :
-           __ffs(mmd_mask);
-
-       /* Check all the expected MMDs are present */
-       devs1 = efx_mdio_read(efx, probe_mmd, MDIO_DEVS1);
-       devs2 = efx_mdio_read(efx, probe_mmd, MDIO_DEVS2);
-       if (devs1 < 0 || devs2 < 0) {
-               netif_err(efx, hw, efx->net_dev,
-                         "failed to read devices present\n");
-               return -EIO;
-       }
-       devices = devs1 | (devs2 << 16);
-       if ((devices & mmd_mask) != mmd_mask) {
-               netif_err(efx, hw, efx->net_dev,
-                         "required MMDs not present: got %x, wanted %x\n",
-                         devices, mmd_mask);
-               return -ENODEV;
-       }
-       netif_vdbg(efx, hw, efx->net_dev, "Devices present: %x\n", devices);
-
-       /* Check all required MMDs are responding and happy. */
-       while (mmd_mask) {
-               if ((mmd_mask & 1) && efx_mdio_check_mmd(efx, mmd))
-                       return -EIO;
-               mmd_mask = mmd_mask >> 1;
-               mmd++;
-       }
-
-       return 0;
-}
-
-bool efx_mdio_links_ok(struct efx_nic *efx, unsigned int mmd_mask)
-{
-       /* If the port is in loopback, then we should only consider a subset
-        * of mmd's */
-       if (LOOPBACK_INTERNAL(efx))
-               return true;
-       else if (LOOPBACK_MASK(efx) & LOOPBACKS_WS)
-               return false;
-       else if (efx_phy_mode_disabled(efx->phy_mode))
-               return false;
-       else if (efx->loopback_mode == LOOPBACK_PHYXS)
-               mmd_mask &= ~(MDIO_DEVS_PHYXS |
-                             MDIO_DEVS_PCS |
-                             MDIO_DEVS_PMAPMD |
-                             MDIO_DEVS_AN);
-       else if (efx->loopback_mode == LOOPBACK_PCS)
-               mmd_mask &= ~(MDIO_DEVS_PCS |
-                             MDIO_DEVS_PMAPMD |
-                             MDIO_DEVS_AN);
-       else if (efx->loopback_mode == LOOPBACK_PMAPMD)
-               mmd_mask &= ~(MDIO_DEVS_PMAPMD |
-                             MDIO_DEVS_AN);
-
-       return mdio45_links_ok(&efx->mdio, mmd_mask);
-}
-
-void efx_mdio_transmit_disable(struct efx_nic *efx)
-{
-       efx_mdio_set_flag(efx, MDIO_MMD_PMAPMD,
-                         MDIO_PMA_TXDIS, MDIO_PMD_TXDIS_GLOBAL,
-                         efx->phy_mode & PHY_MODE_TX_DISABLED);
-}
-
-void efx_mdio_phy_reconfigure(struct efx_nic *efx)
-{
-       efx_mdio_set_flag(efx, MDIO_MMD_PMAPMD,
-                         MDIO_CTRL1, MDIO_PMA_CTRL1_LOOPBACK,
-                         efx->loopback_mode == LOOPBACK_PMAPMD);
-       efx_mdio_set_flag(efx, MDIO_MMD_PCS,
-                         MDIO_CTRL1, MDIO_PCS_CTRL1_LOOPBACK,
-                         efx->loopback_mode == LOOPBACK_PCS);
-       efx_mdio_set_flag(efx, MDIO_MMD_PHYXS,
-                         MDIO_CTRL1, MDIO_PHYXS_CTRL1_LOOPBACK,
-                         efx->loopback_mode == LOOPBACK_PHYXS_WS);
-}
-
-static void efx_mdio_set_mmd_lpower(struct efx_nic *efx,
-                                   int lpower, int mmd)
-{
-       int stat = efx_mdio_read(efx, mmd, MDIO_STAT1);
-
-       netif_vdbg(efx, drv, efx->net_dev, "Setting low power mode for MMD %d to %d\n",
-                 mmd, lpower);
-
-       if (stat & MDIO_STAT1_LPOWERABLE) {
-               efx_mdio_set_flag(efx, mmd, MDIO_CTRL1,
-                                 MDIO_CTRL1_LPOWER, lpower);
-       }
-}
-
-void efx_mdio_set_mmds_lpower(struct efx_nic *efx,
-                             int low_power, unsigned int mmd_mask)
-{
-       int mmd = 0;
-       mmd_mask &= ~MDIO_DEVS_AN;
-       while (mmd_mask) {
-               if (mmd_mask & 1)
-                       efx_mdio_set_mmd_lpower(efx, low_power, mmd);
-               mmd_mask = (mmd_mask >> 1);
-               mmd++;
-       }
-}
-
-/**
- * efx_mdio_set_settings - Set (some of) the PHY settings over MDIO.
- * @efx:               Efx NIC
- * @ecmd:              New settings
- */
-int efx_mdio_set_settings(struct efx_nic *efx, struct ethtool_cmd *ecmd)
-{
-       struct ethtool_cmd prev = { .cmd = ETHTOOL_GSET };
-
-       efx->phy_op->get_settings(efx, &prev);
-
-       if (ecmd->advertising == prev.advertising &&
-           ethtool_cmd_speed(ecmd) == ethtool_cmd_speed(&prev) &&
-           ecmd->duplex == prev.duplex &&
-           ecmd->port == prev.port &&
-           ecmd->autoneg == prev.autoneg)
-               return 0;
-
-       /* We can only change these settings for -T PHYs */
-       if (prev.port != PORT_TP || ecmd->port != PORT_TP)
-               return -EINVAL;
-
-       /* Check that PHY supports these settings */
-       if (!ecmd->autoneg ||
-           (ecmd->advertising | SUPPORTED_Autoneg) & ~prev.supported)
-               return -EINVAL;
-
-       efx_link_set_advertising(efx, ecmd->advertising | ADVERTISED_Autoneg);
-       efx_mdio_an_reconfigure(efx);
-       return 0;
-}
-
-/**
- * efx_mdio_an_reconfigure - Push advertising flags and restart autonegotiation
- * @efx:               Efx NIC
- */
-void efx_mdio_an_reconfigure(struct efx_nic *efx)
-{
-       int reg;
-
-       WARN_ON(!(efx->mdio.mmds & MDIO_DEVS_AN));
-
-       /* Set up the base page */
-       reg = ADVERTISE_CSMA | ADVERTISE_RESV;
-       if (efx->link_advertising & ADVERTISED_Pause)
-               reg |= ADVERTISE_PAUSE_CAP;
-       if (efx->link_advertising & ADVERTISED_Asym_Pause)
-               reg |= ADVERTISE_PAUSE_ASYM;
-       efx_mdio_write(efx, MDIO_MMD_AN, MDIO_AN_ADVERTISE, reg);
-
-       /* Set up the (extended) next page */
-       efx->phy_op->set_npage_adv(efx, efx->link_advertising);
-
-       /* Enable and restart AN */
-       reg = efx_mdio_read(efx, MDIO_MMD_AN, MDIO_CTRL1);
-       reg |= MDIO_AN_CTRL1_ENABLE | MDIO_AN_CTRL1_RESTART | MDIO_AN_CTRL1_XNP;
-       efx_mdio_write(efx, MDIO_MMD_AN, MDIO_CTRL1, reg);
-}
-
-u8 efx_mdio_get_pause(struct efx_nic *efx)
-{
-       BUILD_BUG_ON(EFX_FC_AUTO & (EFX_FC_RX | EFX_FC_TX));
-
-       if (!(efx->wanted_fc & EFX_FC_AUTO))
-               return efx->wanted_fc;
-
-       WARN_ON(!(efx->mdio.mmds & MDIO_DEVS_AN));
-
-       return mii_resolve_flowctrl_fdx(
-               mii_advertise_flowctrl(efx->wanted_fc),
-               efx_mdio_read(efx, MDIO_MMD_AN, MDIO_AN_LPA));
-}
-
-int efx_mdio_test_alive(struct efx_nic *efx)
-{
-       int rc;
-       int devad = __ffs(efx->mdio.mmds);
-       u16 physid1, physid2;
-
-       mutex_lock(&efx->mac_lock);
-
-       physid1 = efx_mdio_read(efx, devad, MDIO_DEVID1);
-       physid2 = efx_mdio_read(efx, devad, MDIO_DEVID2);
-
-       if ((physid1 == 0x0000) || (physid1 == 0xffff) ||
-           (physid2 == 0x0000) || (physid2 == 0xffff)) {
-               netif_err(efx, hw, efx->net_dev,
-                         "no MDIO PHY present with ID %d\n", efx->mdio.prtad);
-               rc = -EINVAL;
-       } else {
-               rc = efx_mdio_check_mmds(efx, efx->mdio.mmds);
-       }
-
-       mutex_unlock(&efx->mac_lock);
-       return rc;
-}
diff --git a/drivers/net/ethernet/sfc/mdio_10g.h b/drivers/net/ethernet/sfc/mdio_10g.h
deleted file mode 100644 (file)
index 4a2dc4c..0000000
+++ /dev/null
@@ -1,110 +0,0 @@
-/****************************************************************************
- * Driver for Solarflare network controllers and boards
- * Copyright 2006-2011 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
- */
-
-#ifndef EFX_MDIO_10G_H
-#define EFX_MDIO_10G_H
-
-#include <linux/mdio.h>
-
-/*
- * Helper functions for doing 10G MDIO as specified in IEEE 802.3 clause 45.
- */
-
-#include "efx.h"
-
-static inline unsigned efx_mdio_id_rev(u32 id) { return id & 0xf; }
-static inline unsigned efx_mdio_id_model(u32 id) { return (id >> 4) & 0x3f; }
-unsigned efx_mdio_id_oui(u32 id);
-
-static inline int efx_mdio_read(struct efx_nic *efx, int devad, int addr)
-{
-       return efx->mdio.mdio_read(efx->net_dev, efx->mdio.prtad, devad, addr);
-}
-
-static inline void
-efx_mdio_write(struct efx_nic *efx, int devad, int addr, int value)
-{
-       efx->mdio.mdio_write(efx->net_dev, efx->mdio.prtad, devad, addr, value);
-}
-
-static inline u32 efx_mdio_read_id(struct efx_nic *efx, int mmd)
-{
-       u16 id_low = efx_mdio_read(efx, mmd, MDIO_DEVID2);
-       u16 id_hi = efx_mdio_read(efx, mmd, MDIO_DEVID1);
-       return (id_hi << 16) | (id_low);
-}
-
-static inline bool efx_mdio_phyxgxs_lane_sync(struct efx_nic *efx)
-{
-       int i, lane_status;
-       bool sync;
-
-       for (i = 0; i < 2; ++i)
-               lane_status = efx_mdio_read(efx, MDIO_MMD_PHYXS,
-                                           MDIO_PHYXS_LNSTAT);
-
-       sync = !!(lane_status & MDIO_PHYXS_LNSTAT_ALIGN);
-       if (!sync)
-               netif_dbg(efx, hw, efx->net_dev, "XGXS lane status: %x\n",
-                         lane_status);
-       return sync;
-}
-
-const char *efx_mdio_mmd_name(int mmd);
-
-/*
- * Reset a specific MMD and wait for reset to clear.
- * Return number of spins left (>0) on success, -%ETIMEDOUT on failure.
- *
- * This function will sleep
- */
-int efx_mdio_reset_mmd(struct efx_nic *efx, int mmd, int spins, int spintime);
-
-/* As efx_mdio_check_mmd but for multiple MMDs */
-int efx_mdio_check_mmds(struct efx_nic *efx, unsigned int mmd_mask);
-
-/* Check the link status of specified mmds in bit mask */
-bool efx_mdio_links_ok(struct efx_nic *efx, unsigned int mmd_mask);
-
-/* Generic transmit disable support though PMAPMD */
-void efx_mdio_transmit_disable(struct efx_nic *efx);
-
-/* Generic part of reconfigure: set/clear loopback bits */
-void efx_mdio_phy_reconfigure(struct efx_nic *efx);
-
-/* Set the power state of the specified MMDs */
-void efx_mdio_set_mmds_lpower(struct efx_nic *efx, int low_power,
-                             unsigned int mmd_mask);
-
-/* Set (some of) the PHY settings over MDIO */
-int efx_mdio_set_settings(struct efx_nic *efx, struct ethtool_cmd *ecmd);
-
-/* Push advertising flags and restart autonegotiation */
-void efx_mdio_an_reconfigure(struct efx_nic *efx);
-
-/* Get pause parameters from AN if available (otherwise return
- * requested pause parameters)
- */
-u8 efx_mdio_get_pause(struct efx_nic *efx);
-
-/* Wait for specified MMDs to exit reset within a timeout */
-int efx_mdio_wait_reset_mmds(struct efx_nic *efx, unsigned int mmd_mask);
-
-/* Set or clear flag, debouncing */
-static inline void
-efx_mdio_set_flag(struct efx_nic *efx, int devad, int addr,
-                 int mask, bool state)
-{
-       mdio_set_flag(&efx->mdio, efx->mdio.prtad, devad, addr, mask, state);
-}
-
-/* Liveness self-test for MDIO PHYs */
-int efx_mdio_test_alive(struct efx_nic *efx);
-
-#endif /* EFX_MDIO_10G_H */
index 99d8c82124bb5e2541085f8e92521b9c01722354..0bbd7e252a036358edcd6611446f2c0533fae6c6 100644 (file)
@@ -41,7 +41,7 @@
  *
  **************************************************************************/
 
-#define EFX_DRIVER_VERSION     "4.0"
+#define EFX_DRIVER_VERSION     "4.1"
 
 #ifdef DEBUG
 #define EFX_BUG_ON_PARANOID(x) BUG_ON(x)
@@ -139,8 +139,6 @@ struct efx_special_buffer {
  * struct efx_tx_buffer - buffer state for a TX descriptor
  * @skb: When @flags & %EFX_TX_BUF_SKB, the associated socket buffer to be
  *     freed when descriptor completes
- * @heap_buf: When @flags & %EFX_TX_BUF_HEAP, the associated heap buffer to be
- *     freed when descriptor completes.
  * @option: When @flags & %EFX_TX_BUF_OPTION, a NIC-specific option descriptor.
  * @dma_addr: DMA address of the fragment.
  * @flags: Flags for allocation and DMA mapping type
@@ -151,10 +149,7 @@ struct efx_special_buffer {
  * Only valid if @unmap_len != 0.
  */
 struct efx_tx_buffer {
-       union {
-               const struct sk_buff *skb;
-               void *heap_buf;
-       };
+       const struct sk_buff *skb;
        union {
                efx_qword_t option;
                dma_addr_t dma_addr;
@@ -166,7 +161,6 @@ struct efx_tx_buffer {
 };
 #define EFX_TX_BUF_CONT                1       /* not last descriptor of packet */
 #define EFX_TX_BUF_SKB         2       /* buffer is last part of skb */
-#define EFX_TX_BUF_HEAP                4       /* buffer was allocated with kmalloc() */
 #define EFX_TX_BUF_MAP_SINGLE  8       /* buffer was mapped with dma_map_single() */
 #define EFX_TX_BUF_OPTION      0x10    /* empty buffer for option descriptor */
 
@@ -189,13 +183,16 @@ struct efx_tx_buffer {
  * @channel: The associated channel
  * @core_txq: The networking core TX queue structure
  * @buffer: The software buffer ring
- * @tsoh_page: Array of pages of TSO header buffers
+ * @cb_page: Array of pages of copy buffers.  Carved up according to
+ *     %EFX_TX_CB_ORDER into %EFX_TX_CB_SIZE-sized chunks.
  * @txd: The hardware descriptor ring
  * @ptr_mask: The size of the ring minus 1.
  * @piobuf: PIO buffer region for this TX queue (shared with its partner).
  *     Size of the region is efx_piobuf_size.
  * @piobuf_offset: Buffer offset to be specified in PIO descriptors
  * @initialised: Has hardware queue been initialised?
+ * @handle_tso: TSO xmit preparation handler.  Sets up the TSO metadata and
+ *     may also map tx data, depending on the nature of the TSO implementation.
  * @read_count: Current read pointer.
  *     This is the number of buffers that have been removed from both rings.
  * @old_write_count: The value of @write_count when last checked.
@@ -221,9 +218,11 @@ struct efx_tx_buffer {
  * @tso_long_headers: Number of packets with headers too long for standard
  *     blocks
  * @tso_packets: Number of packets via the TSO xmit path
+ * @tso_fallbacks: Number of times TSO fallback used
  * @pushes: Number of times the TX push feature has been used
  * @pio_packets: Number of times the TX PIO feature has been used
  * @xmit_more_available: Are any packets waiting to be pushed to the NIC
+ * @cb_packets: Number of times the TX copybreak feature has been used
  * @empty_read_count: If the completion path has seen the queue as empty
  *     and the transmission path has not yet checked this, the value of
  *     @read_count bitwise-added to %EFX_EMPTY_COUNT_VALID; otherwise 0.
@@ -236,13 +235,16 @@ struct efx_tx_queue {
        struct efx_channel *channel;
        struct netdev_queue *core_txq;
        struct efx_tx_buffer *buffer;
-       struct efx_buffer *tsoh_page;
+       struct efx_buffer *cb_page;
        struct efx_special_buffer txd;
        unsigned int ptr_mask;
        void __iomem *piobuf;
        unsigned int piobuf_offset;
        bool initialised;
 
+       /* Function pointers used in the fast path. */
+       int (*handle_tso)(struct efx_tx_queue*, struct sk_buff*, bool *);
+
        /* Members used mainly on the completion path */
        unsigned int read_count ____cacheline_aligned_in_smp;
        unsigned int old_write_count;
@@ -257,9 +259,11 @@ struct efx_tx_queue {
        unsigned int tso_bursts;
        unsigned int tso_long_headers;
        unsigned int tso_packets;
+       unsigned int tso_fallbacks;
        unsigned int pushes;
        unsigned int pio_packets;
        bool xmit_more_available;
+       unsigned int cb_packets;
        /* Statistics to supplement MAC stats */
        unsigned long tx_packets;
 
@@ -269,6 +273,9 @@ struct efx_tx_queue {
        atomic_t flush_outstanding;
 };
 
+#define EFX_TX_CB_ORDER        7
+#define EFX_TX_CB_SIZE (1 << EFX_TX_CB_ORDER) - NET_IP_ALIGN
+
 /**
  * struct efx_rx_buffer - An Efx RX data buffer
  * @dma_addr: DMA base address of the buffer
@@ -853,6 +860,7 @@ struct vfdi_status;
  * @rx_hash_key: Toeplitz hash key for RSS
  * @rx_indir_table: Indirection table for RSS
  * @rx_scatter: Scatter mode enabled for receives
+ * @rx_hash_udp_4tuple: UDP 4-tuple hashing enabled
  * @int_error_count: Number of internal errors seen recently
  * @int_error_expire: Time at which error count will be expired
  * @irq_soft_enabled: Are IRQs soft-enabled? If not, IRQ handler will
@@ -990,6 +998,7 @@ struct efx_nic {
        u8 rx_hash_key[40];
        u32 rx_indir_table[128];
        bool rx_scatter;
+       bool rx_hash_udp_4tuple;
 
        unsigned int_error_count;
        unsigned long int_error_expire;
@@ -1210,6 +1219,8 @@ struct efx_mtd_partition {
  *     and tx_type will already have been validated but this operation
  *     must validate and update rx_filter.
  * @set_mac_address: Set the MAC address of the device
+ * @tso_versions: Returns mask of firmware-assisted TSO versions supported.
+ *     If %NULL, then device does not support any TSO version.
  * @revision: Hardware architecture revision
  * @txd_ptr_tbl_base: TX descriptor ring base address
  * @rxd_ptr_tbl_base: RX descriptor ring base address
@@ -1286,6 +1297,8 @@ struct efx_nic_type {
        void (*tx_init)(struct efx_tx_queue *tx_queue);
        void (*tx_remove)(struct efx_tx_queue *tx_queue);
        void (*tx_write)(struct efx_tx_queue *tx_queue);
+       unsigned int (*tx_limit_len)(struct efx_tx_queue *tx_queue,
+                                    dma_addr_t dma_addr, unsigned int len);
        int (*rx_push_rss_config)(struct efx_nic *efx, bool user,
                                  const u32 *rx_indir_table);
        int (*rx_probe)(struct efx_rx_queue *rx_queue);
@@ -1364,6 +1377,7 @@ struct efx_nic_type {
        void (*vswitching_remove)(struct efx_nic *efx);
        int (*get_mac_address)(struct efx_nic *efx, unsigned char *perm_addr);
        int (*set_mac_address)(struct efx_nic *efx);
+       u32 (*tso_versions)(struct efx_nic *efx);
 
        int revision;
        unsigned int txd_ptr_tbl_base;
@@ -1543,4 +1557,32 @@ static inline netdev_features_t efx_supported_features(const struct efx_nic *efx
        return net_dev->features | net_dev->hw_features;
 }
 
+/* Get the current TX queue insert index. */
+static inline unsigned int
+efx_tx_queue_get_insert_index(const struct efx_tx_queue *tx_queue)
+{
+       return tx_queue->insert_count & tx_queue->ptr_mask;
+}
+
+/* Get a TX buffer. */
+static inline struct efx_tx_buffer *
+__efx_tx_queue_get_insert_buffer(const struct efx_tx_queue *tx_queue)
+{
+       return &tx_queue->buffer[efx_tx_queue_get_insert_index(tx_queue)];
+}
+
+/* Get a TX buffer, checking it's not currently in use. */
+static inline struct efx_tx_buffer *
+efx_tx_queue_get_insert_buffer(const struct efx_tx_queue *tx_queue)
+{
+       struct efx_tx_buffer *buffer =
+               __efx_tx_queue_get_insert_buffer(tx_queue);
+
+       EFX_BUG_ON_PARANOID(buffer->len);
+       EFX_BUG_ON_PARANOID(buffer->flags);
+       EFX_BUG_ON_PARANOID(buffer->unmap_len);
+
+       return buffer;
+}
+
 #endif /* EFX_NET_DRIVER_H */
index 73bee7ea332a9178ae663fa040ec518345af22a6..223774635cbabf23a439cb90a983427b0f0fef38 100644 (file)
 #include "mcdi.h"
 
 enum {
-       EFX_REV_FALCON_A0 = 0,
-       EFX_REV_FALCON_A1 = 1,
-       EFX_REV_FALCON_B0 = 2,
-       EFX_REV_SIENA_A0 = 3,
-       EFX_REV_HUNT_A0 = 4,
+       EFX_REV_SIENA_A0 = 0,
+       EFX_REV_HUNT_A0 = 1,
 };
 
 static inline int efx_nic_rev(struct efx_nic *efx)
@@ -32,12 +29,6 @@ static inline int efx_nic_rev(struct efx_nic *efx)
 
 u32 efx_farch_fpga_ver(struct efx_nic *efx);
 
-/* NIC has two interlinked PCI functions for the same port. */
-static inline bool efx_nic_is_dual_func(struct efx_nic *efx)
-{
-       return efx_nic_rev(efx) < EFX_REV_FALCON_B0;
-}
-
 /* Read the current event from the event queue */
 static inline efx_qword_t *efx_event(struct efx_channel *channel,
                                     unsigned int index)
@@ -144,11 +135,6 @@ enum {
        PHY_TYPE_SFT9001B = 10,
 };
 
-#define FALCON_XMAC_LOOPBACKS                  \
-       ((1 << LOOPBACK_XGMII) |                \
-        (1 << LOOPBACK_XGXS) |                 \
-        (1 << LOOPBACK_XAUI))
-
 /* Alignment of PCIe DMA boundaries (4KB) */
 #define EFX_PAGE_SIZE  4096
 /* Size and alignment of buffer table entries (same) */
@@ -161,160 +147,6 @@ enum {
        GENERIC_STAT_COUNT
 };
 
-/**
- * struct falcon_board_type - board operations and type information
- * @id: Board type id, as found in NVRAM
- * @init: Allocate resources and initialise peripheral hardware
- * @init_phy: Do board-specific PHY initialisation
- * @fini: Shut down hardware and free resources
- * @set_id_led: Set state of identifying LED or revert to automatic function
- * @monitor: Board-specific health check function
- */
-struct falcon_board_type {
-       u8 id;
-       int (*init) (struct efx_nic *nic);
-       void (*init_phy) (struct efx_nic *efx);
-       void (*fini) (struct efx_nic *nic);
-       void (*set_id_led) (struct efx_nic *efx, enum efx_led_mode mode);
-       int (*monitor) (struct efx_nic *nic);
-};
-
-/**
- * struct falcon_board - board information
- * @type: Type of board
- * @major: Major rev. ('A', 'B' ...)
- * @minor: Minor rev. (0, 1, ...)
- * @i2c_adap: I2C adapter for on-board peripherals
- * @i2c_data: Data for bit-banging algorithm
- * @hwmon_client: I2C client for hardware monitor
- * @ioexp_client: I2C client for power/port control
- */
-struct falcon_board {
-       const struct falcon_board_type *type;
-       int major;
-       int minor;
-       struct i2c_adapter i2c_adap;
-       struct i2c_algo_bit_data i2c_data;
-       struct i2c_client *hwmon_client, *ioexp_client;
-};
-
-/**
- * struct falcon_spi_device - a Falcon SPI (Serial Peripheral Interface) device
- * @device_id:         Controller's id for the device
- * @size:              Size (in bytes)
- * @addr_len:          Number of address bytes in read/write commands
- * @munge_address:     Flag whether addresses should be munged.
- *     Some devices with 9-bit addresses (e.g. AT25040A EEPROM)
- *     use bit 3 of the command byte as address bit A8, rather
- *     than having a two-byte address.  If this flag is set, then
- *     commands should be munged in this way.
- * @erase_command:     Erase command (or 0 if sector erase not needed).
- * @erase_size:                Erase sector size (in bytes)
- *     Erase commands affect sectors with this size and alignment.
- *     This must be a power of two.
- * @block_size:                Write block size (in bytes).
- *     Write commands are limited to blocks with this size and alignment.
- */
-struct falcon_spi_device {
-       int device_id;
-       unsigned int size;
-       unsigned int addr_len;
-       unsigned int munge_address:1;
-       u8 erase_command;
-       unsigned int erase_size;
-       unsigned int block_size;
-};
-
-static inline bool falcon_spi_present(const struct falcon_spi_device *spi)
-{
-       return spi->size != 0;
-}
-
-enum {
-       FALCON_STAT_tx_bytes = GENERIC_STAT_COUNT,
-       FALCON_STAT_tx_packets,
-       FALCON_STAT_tx_pause,
-       FALCON_STAT_tx_control,
-       FALCON_STAT_tx_unicast,
-       FALCON_STAT_tx_multicast,
-       FALCON_STAT_tx_broadcast,
-       FALCON_STAT_tx_lt64,
-       FALCON_STAT_tx_64,
-       FALCON_STAT_tx_65_to_127,
-       FALCON_STAT_tx_128_to_255,
-       FALCON_STAT_tx_256_to_511,
-       FALCON_STAT_tx_512_to_1023,
-       FALCON_STAT_tx_1024_to_15xx,
-       FALCON_STAT_tx_15xx_to_jumbo,
-       FALCON_STAT_tx_gtjumbo,
-       FALCON_STAT_tx_non_tcpudp,
-       FALCON_STAT_tx_mac_src_error,
-       FALCON_STAT_tx_ip_src_error,
-       FALCON_STAT_rx_bytes,
-       FALCON_STAT_rx_good_bytes,
-       FALCON_STAT_rx_bad_bytes,
-       FALCON_STAT_rx_packets,
-       FALCON_STAT_rx_good,
-       FALCON_STAT_rx_bad,
-       FALCON_STAT_rx_pause,
-       FALCON_STAT_rx_control,
-       FALCON_STAT_rx_unicast,
-       FALCON_STAT_rx_multicast,
-       FALCON_STAT_rx_broadcast,
-       FALCON_STAT_rx_lt64,
-       FALCON_STAT_rx_64,
-       FALCON_STAT_rx_65_to_127,
-       FALCON_STAT_rx_128_to_255,
-       FALCON_STAT_rx_256_to_511,
-       FALCON_STAT_rx_512_to_1023,
-       FALCON_STAT_rx_1024_to_15xx,
-       FALCON_STAT_rx_15xx_to_jumbo,
-       FALCON_STAT_rx_gtjumbo,
-       FALCON_STAT_rx_bad_lt64,
-       FALCON_STAT_rx_bad_gtjumbo,
-       FALCON_STAT_rx_overflow,
-       FALCON_STAT_rx_symbol_error,
-       FALCON_STAT_rx_align_error,
-       FALCON_STAT_rx_length_error,
-       FALCON_STAT_rx_internal_error,
-       FALCON_STAT_rx_nodesc_drop_cnt,
-       FALCON_STAT_COUNT
-};
-
-/**
- * struct falcon_nic_data - Falcon NIC state
- * @pci_dev2: Secondary function of Falcon A
- * @board: Board state and functions
- * @stats: Hardware statistics
- * @stats_disable_count: Nest count for disabling statistics fetches
- * @stats_pending: Is there a pending DMA of MAC statistics.
- * @stats_timer: A timer for regularly fetching MAC statistics.
- * @spi_flash: SPI flash device
- * @spi_eeprom: SPI EEPROM device
- * @spi_lock: SPI bus lock
- * @mdio_lock: MDIO bus lock
- * @xmac_poll_required: XMAC link state needs polling
- */
-struct falcon_nic_data {
-       struct pci_dev *pci_dev2;
-       struct falcon_board board;
-       u64 stats[FALCON_STAT_COUNT];
-       unsigned int stats_disable_count;
-       bool stats_pending;
-       struct timer_list stats_timer;
-       struct falcon_spi_device spi_flash;
-       struct falcon_spi_device spi_eeprom;
-       struct mutex spi_lock;
-       struct mutex mdio_lock;
-       bool xmac_poll_required;
-};
-
-static inline struct falcon_board *falcon_board(struct efx_nic *efx)
-{
-       struct falcon_nic_data *data = efx->nic_data;
-       return &data->board;
-}
-
 enum {
        SIENA_STAT_tx_bytes = GENERIC_STAT_COUNT,
        SIENA_STAT_tx_good_bytes,
@@ -681,6 +513,8 @@ void efx_farch_tx_init(struct efx_tx_queue *tx_queue);
 void efx_farch_tx_fini(struct efx_tx_queue *tx_queue);
 void efx_farch_tx_remove(struct efx_tx_queue *tx_queue);
 void efx_farch_tx_write(struct efx_tx_queue *tx_queue);
+unsigned int efx_farch_tx_limit_len(struct efx_tx_queue *tx_queue,
+                                   dma_addr_t dma_addr, unsigned int len);
 int efx_farch_rx_probe(struct efx_rx_queue *rx_queue);
 void efx_farch_rx_init(struct efx_rx_queue *rx_queue);
 void efx_farch_rx_fini(struct efx_rx_queue *rx_queue);
diff --git a/drivers/net/ethernet/sfc/phy.h b/drivers/net/ethernet/sfc/phy.h
deleted file mode 100644 (file)
index 803bf44..0000000
+++ /dev/null
@@ -1,50 +0,0 @@
-/****************************************************************************
- * Driver for Solarflare network controllers and boards
- * Copyright 2007-2010 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
- */
-
-#ifndef EFX_PHY_H
-#define EFX_PHY_H
-
-/****************************************************************************
- * 10Xpress (SFX7101) PHY
- */
-extern const struct efx_phy_operations falcon_sfx7101_phy_ops;
-
-void tenxpress_set_id_led(struct efx_nic *efx, enum efx_led_mode mode);
-
-/****************************************************************************
- * AMCC/Quake QT202x PHYs
- */
-extern const struct efx_phy_operations falcon_qt202x_phy_ops;
-
-/* These PHYs provide various H/W control states for LEDs */
-#define QUAKE_LED_LINK_INVAL   (0)
-#define QUAKE_LED_LINK_STAT    (1)
-#define QUAKE_LED_LINK_ACT     (2)
-#define QUAKE_LED_LINK_ACTSTAT (3)
-#define QUAKE_LED_OFF          (4)
-#define QUAKE_LED_ON           (5)
-#define QUAKE_LED_LINK_INPUT   (6)     /* Pin is an input. */
-/* What link the LED tracks */
-#define QUAKE_LED_TXLINK       (0)
-#define QUAKE_LED_RXLINK       (8)
-
-void falcon_qt202x_set_led(struct efx_nic *p, int led, int state);
-
-/****************************************************************************
-* Transwitch CX4 retimer
-*/
-extern const struct efx_phy_operations falcon_txc_phy_ops;
-
-#define TXC_GPIO_DIR_INPUT     0
-#define TXC_GPIO_DIR_OUTPUT    1
-
-void falcon_txc_set_gpio_dir(struct efx_nic *efx, int pin, int dir);
-void falcon_txc_set_gpio_val(struct efx_nic *efx, int pin, int val);
-
-#endif
diff --git a/drivers/net/ethernet/sfc/qt202x_phy.c b/drivers/net/ethernet/sfc/qt202x_phy.c
deleted file mode 100644 (file)
index efa3612..0000000
+++ /dev/null
@@ -1,495 +0,0 @@
-/****************************************************************************
- * Driver for Solarflare network controllers and boards
- * Copyright 2006-2012 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
- */
-/*
- * Driver for AMCC QT202x SFP+ and XFP adapters; see www.amcc.com for details
- */
-
-#include <linux/slab.h>
-#include <linux/timer.h>
-#include <linux/delay.h>
-#include "efx.h"
-#include "mdio_10g.h"
-#include "phy.h"
-#include "nic.h"
-
-#define QT202X_REQUIRED_DEVS (MDIO_DEVS_PCS |          \
-                             MDIO_DEVS_PMAPMD |        \
-                             MDIO_DEVS_PHYXS)
-
-#define QT202X_LOOPBACKS ((1 << LOOPBACK_PCS) |                \
-                         (1 << LOOPBACK_PMAPMD) |      \
-                         (1 << LOOPBACK_PHYXS_WS))
-
-/****************************************************************************/
-/* Quake-specific MDIO registers */
-#define MDIO_QUAKE_LED0_REG    (0xD006)
-
-/* QT2025C only */
-#define PCS_FW_HEARTBEAT_REG   0xd7ee
-#define PCS_FW_HEARTB_LBN      0
-#define PCS_FW_HEARTB_WIDTH    8
-#define PCS_FW_PRODUCT_CODE_1  0xd7f0
-#define PCS_FW_VERSION_1       0xd7f3
-#define PCS_FW_BUILD_1         0xd7f6
-#define PCS_UC8051_STATUS_REG  0xd7fd
-#define PCS_UC_STATUS_LBN      0
-#define PCS_UC_STATUS_WIDTH    8
-#define PCS_UC_STATUS_FW_SAVE  0x20
-#define PMA_PMD_MODE_REG       0xc301
-#define PMA_PMD_RXIN_SEL_LBN   6
-#define PMA_PMD_FTX_CTRL2_REG  0xc309
-#define PMA_PMD_FTX_STATIC_LBN 13
-#define PMA_PMD_VEND1_REG      0xc001
-#define PMA_PMD_VEND1_LBTXD_LBN        15
-#define PCS_VEND1_REG          0xc000
-#define PCS_VEND1_LBTXD_LBN    5
-
-void falcon_qt202x_set_led(struct efx_nic *p, int led, int mode)
-{
-       int addr = MDIO_QUAKE_LED0_REG + led;
-       efx_mdio_write(p, MDIO_MMD_PMAPMD, addr, mode);
-}
-
-struct qt202x_phy_data {
-       enum efx_phy_mode phy_mode;
-       bool bug17190_in_bad_state;
-       unsigned long bug17190_timer;
-       u32 firmware_ver;
-};
-
-#define QT2022C2_MAX_RESET_TIME 500
-#define QT2022C2_RESET_WAIT 10
-
-#define QT2025C_MAX_HEARTB_TIME (5 * HZ)
-#define QT2025C_HEARTB_WAIT 100
-#define QT2025C_MAX_FWSTART_TIME (25 * HZ / 10)
-#define QT2025C_FWSTART_WAIT 100
-
-#define BUG17190_INTERVAL (2 * HZ)
-
-static int qt2025c_wait_heartbeat(struct efx_nic *efx)
-{
-       unsigned long timeout = jiffies + QT2025C_MAX_HEARTB_TIME;
-       int reg, old_counter = 0;
-
-       /* Wait for firmware heartbeat to start */
-       for (;;) {
-               int counter;
-               reg = efx_mdio_read(efx, MDIO_MMD_PCS, PCS_FW_HEARTBEAT_REG);
-               if (reg < 0)
-                       return reg;
-               counter = ((reg >> PCS_FW_HEARTB_LBN) &
-                           ((1 << PCS_FW_HEARTB_WIDTH) - 1));
-               if (old_counter == 0)
-                       old_counter = counter;
-               else if (counter != old_counter)
-                       break;
-               if (time_after(jiffies, timeout)) {
-                       /* Some cables have EEPROMs that conflict with the
-                        * PHY's on-board EEPROM so it cannot load firmware */
-                       netif_err(efx, hw, efx->net_dev,
-                                 "If an SFP+ direct attach cable is"
-                                 " connected, please check that it complies"
-                                 " with the SFP+ specification\n");
-                       return -ETIMEDOUT;
-               }
-               msleep(QT2025C_HEARTB_WAIT);
-       }
-
-       return 0;
-}
-
-static int qt2025c_wait_fw_status_good(struct efx_nic *efx)
-{
-       unsigned long timeout = jiffies + QT2025C_MAX_FWSTART_TIME;
-       int reg;
-
-       /* Wait for firmware status to look good */
-       for (;;) {
-               reg = efx_mdio_read(efx, MDIO_MMD_PCS, PCS_UC8051_STATUS_REG);
-               if (reg < 0)
-                       return reg;
-               if ((reg &
-                    ((1 << PCS_UC_STATUS_WIDTH) - 1) << PCS_UC_STATUS_LBN) >=
-                   PCS_UC_STATUS_FW_SAVE)
-                       break;
-               if (time_after(jiffies, timeout))
-                       return -ETIMEDOUT;
-               msleep(QT2025C_FWSTART_WAIT);
-       }
-
-       return 0;
-}
-
-static void qt2025c_restart_firmware(struct efx_nic *efx)
-{
-       /* Restart microcontroller execution of firmware from RAM */
-       efx_mdio_write(efx, 3, 0xe854, 0x00c0);
-       efx_mdio_write(efx, 3, 0xe854, 0x0040);
-       msleep(50);
-}
-
-static int qt2025c_wait_reset(struct efx_nic *efx)
-{
-       int rc;
-
-       rc = qt2025c_wait_heartbeat(efx);
-       if (rc != 0)
-               return rc;
-
-       rc = qt2025c_wait_fw_status_good(efx);
-       if (rc == -ETIMEDOUT) {
-               /* Bug 17689: occasionally heartbeat starts but firmware status
-                * code never progresses beyond 0x00.  Try again, once, after
-                * restarting execution of the firmware image. */
-               netif_dbg(efx, hw, efx->net_dev,
-                         "bashing QT2025C microcontroller\n");
-               qt2025c_restart_firmware(efx);
-               rc = qt2025c_wait_heartbeat(efx);
-               if (rc != 0)
-                       return rc;
-               rc = qt2025c_wait_fw_status_good(efx);
-       }
-
-       return rc;
-}
-
-static void qt2025c_firmware_id(struct efx_nic *efx)
-{
-       struct qt202x_phy_data *phy_data = efx->phy_data;
-       u8 firmware_id[9];
-       size_t i;
-
-       for (i = 0; i < sizeof(firmware_id); i++)
-               firmware_id[i] = efx_mdio_read(efx, MDIO_MMD_PCS,
-                                              PCS_FW_PRODUCT_CODE_1 + i);
-       netif_info(efx, probe, efx->net_dev,
-                  "QT2025C firmware %xr%d v%d.%d.%d.%d [20%02d-%02d-%02d]\n",
-                  (firmware_id[0] << 8) | firmware_id[1], firmware_id[2],
-                  firmware_id[3] >> 4, firmware_id[3] & 0xf,
-                  firmware_id[4], firmware_id[5],
-                  firmware_id[6], firmware_id[7], firmware_id[8]);
-       phy_data->firmware_ver = ((firmware_id[3] & 0xf0) << 20) |
-                                ((firmware_id[3] & 0x0f) << 16) |
-                                (firmware_id[4] << 8) | firmware_id[5];
-}
-
-static void qt2025c_bug17190_workaround(struct efx_nic *efx)
-{
-       struct qt202x_phy_data *phy_data = efx->phy_data;
-
-       /* The PHY can get stuck in a state where it reports PHY_XS and PMA/PMD
-        * layers up, but PCS down (no block_lock).  If we notice this state
-        * persisting for a couple of seconds, we switch PMA/PMD loopback
-        * briefly on and then off again, which is normally sufficient to
-        * recover it.
-        */
-       if (efx->link_state.up ||
-           !efx_mdio_links_ok(efx, MDIO_DEVS_PMAPMD | MDIO_DEVS_PHYXS)) {
-               phy_data->bug17190_in_bad_state = false;
-               return;
-       }
-
-       if (!phy_data->bug17190_in_bad_state) {
-               phy_data->bug17190_in_bad_state = true;
-               phy_data->bug17190_timer = jiffies + BUG17190_INTERVAL;
-               return;
-       }
-
-       if (time_after_eq(jiffies, phy_data->bug17190_timer)) {
-               netif_dbg(efx, hw, efx->net_dev, "bashing QT2025C PMA/PMD\n");
-               efx_mdio_set_flag(efx, MDIO_MMD_PMAPMD, MDIO_CTRL1,
-                                 MDIO_PMA_CTRL1_LOOPBACK, true);
-               msleep(100);
-               efx_mdio_set_flag(efx, MDIO_MMD_PMAPMD, MDIO_CTRL1,
-                                 MDIO_PMA_CTRL1_LOOPBACK, false);
-               phy_data->bug17190_timer = jiffies + BUG17190_INTERVAL;
-       }
-}
-
-static int qt2025c_select_phy_mode(struct efx_nic *efx)
-{
-       struct qt202x_phy_data *phy_data = efx->phy_data;
-       struct falcon_board *board = falcon_board(efx);
-       int reg, rc, i;
-       uint16_t phy_op_mode;
-
-       /* Only 2.0.1.0+ PHY firmware supports the more optimal SFP+
-        * Self-Configure mode.  Don't attempt any switching if we encounter
-        * older firmware. */
-       if (phy_data->firmware_ver < 0x02000100)
-               return 0;
-
-       /* In general we will get optimal behaviour in "SFP+ Self-Configure"
-        * mode; however, that powers down most of the PHY when no module is
-        * present, so we must use a different mode (any fixed mode will do)
-        * to be sure that loopbacks will work. */
-       phy_op_mode = (efx->loopback_mode == LOOPBACK_NONE) ? 0x0038 : 0x0020;
-
-       /* Only change mode if really necessary */
-       reg = efx_mdio_read(efx, 1, 0xc319);
-       if ((reg & 0x0038) == phy_op_mode)
-               return 0;
-       netif_dbg(efx, hw, efx->net_dev, "Switching PHY to mode 0x%04x\n",
-                 phy_op_mode);
-
-       /* This sequence replicates the register writes configured in the boot
-        * EEPROM (including the differences between board revisions), except
-        * that the operating mode is changed, and the PHY is prevented from
-        * unnecessarily reloading the main firmware image again. */
-       efx_mdio_write(efx, 1, 0xc300, 0x0000);
-       /* (Note: this portion of the boot EEPROM sequence, which bit-bashes 9
-        * STOPs onto the firmware/module I2C bus to reset it, varies across
-        * board revisions, as the bus is connected to different GPIO/LED
-        * outputs on the PHY.) */
-       if (board->major == 0 && board->minor < 2) {
-               efx_mdio_write(efx, 1, 0xc303, 0x4498);
-               for (i = 0; i < 9; i++) {
-                       efx_mdio_write(efx, 1, 0xc303, 0x4488);
-                       efx_mdio_write(efx, 1, 0xc303, 0x4480);
-                       efx_mdio_write(efx, 1, 0xc303, 0x4490);
-                       efx_mdio_write(efx, 1, 0xc303, 0x4498);
-               }
-       } else {
-               efx_mdio_write(efx, 1, 0xc303, 0x0920);
-               efx_mdio_write(efx, 1, 0xd008, 0x0004);
-               for (i = 0; i < 9; i++) {
-                       efx_mdio_write(efx, 1, 0xc303, 0x0900);
-                       efx_mdio_write(efx, 1, 0xd008, 0x0005);
-                       efx_mdio_write(efx, 1, 0xc303, 0x0920);
-                       efx_mdio_write(efx, 1, 0xd008, 0x0004);
-               }
-               efx_mdio_write(efx, 1, 0xc303, 0x4900);
-       }
-       efx_mdio_write(efx, 1, 0xc303, 0x4900);
-       efx_mdio_write(efx, 1, 0xc302, 0x0004);
-       efx_mdio_write(efx, 1, 0xc316, 0x0013);
-       efx_mdio_write(efx, 1, 0xc318, 0x0054);
-       efx_mdio_write(efx, 1, 0xc319, phy_op_mode);
-       efx_mdio_write(efx, 1, 0xc31a, 0x0098);
-       efx_mdio_write(efx, 3, 0x0026, 0x0e00);
-       efx_mdio_write(efx, 3, 0x0027, 0x0013);
-       efx_mdio_write(efx, 3, 0x0028, 0xa528);
-       efx_mdio_write(efx, 1, 0xd006, 0x000a);
-       efx_mdio_write(efx, 1, 0xd007, 0x0009);
-       efx_mdio_write(efx, 1, 0xd008, 0x0004);
-       /* This additional write is not present in the boot EEPROM.  It
-        * prevents the PHY's internal boot ROM doing another pointless (and
-        * slow) reload of the firmware image (the microcontroller's code
-        * memory is not affected by the microcontroller reset). */
-       efx_mdio_write(efx, 1, 0xc317, 0x00ff);
-       /* PMA/PMD loopback sets RXIN to inverse polarity and the firmware
-        * restart doesn't reset it. We need to do that ourselves. */
-       efx_mdio_set_flag(efx, 1, PMA_PMD_MODE_REG,
-                         1 << PMA_PMD_RXIN_SEL_LBN, false);
-       efx_mdio_write(efx, 1, 0xc300, 0x0002);
-       msleep(20);
-
-       /* Restart microcontroller execution of firmware from RAM */
-       qt2025c_restart_firmware(efx);
-
-       /* Wait for the microcontroller to be ready again */
-       rc = qt2025c_wait_reset(efx);
-       if (rc < 0) {
-               netif_err(efx, hw, efx->net_dev,
-                         "PHY microcontroller reset during mode switch "
-                         "timed out\n");
-               return rc;
-       }
-
-       return 0;
-}
-
-static int qt202x_reset_phy(struct efx_nic *efx)
-{
-       int rc;
-
-       if (efx->phy_type == PHY_TYPE_QT2025C) {
-               /* Wait for the reset triggered by falcon_reset_hw()
-                * to complete */
-               rc = qt2025c_wait_reset(efx);
-               if (rc < 0)
-                       goto fail;
-       } else {
-               /* Reset the PHYXS MMD. This is documented as doing
-                * a complete soft reset. */
-               rc = efx_mdio_reset_mmd(efx, MDIO_MMD_PHYXS,
-                                       QT2022C2_MAX_RESET_TIME /
-                                       QT2022C2_RESET_WAIT,
-                                       QT2022C2_RESET_WAIT);
-               if (rc < 0)
-                       goto fail;
-       }
-
-       /* Wait 250ms for the PHY to complete bootup */
-       msleep(250);
-
-       falcon_board(efx)->type->init_phy(efx);
-
-       return 0;
-
- fail:
-       netif_err(efx, hw, efx->net_dev, "PHY reset timed out\n");
-       return rc;
-}
-
-static int qt202x_phy_probe(struct efx_nic *efx)
-{
-       struct qt202x_phy_data *phy_data;
-
-       phy_data = kzalloc(sizeof(struct qt202x_phy_data), GFP_KERNEL);
-       if (!phy_data)
-               return -ENOMEM;
-       efx->phy_data = phy_data;
-       phy_data->phy_mode = efx->phy_mode;
-       phy_data->bug17190_in_bad_state = false;
-       phy_data->bug17190_timer = 0;
-
-       efx->mdio.mmds = QT202X_REQUIRED_DEVS;
-       efx->mdio.mode_support = MDIO_SUPPORTS_C45 | MDIO_EMULATE_C22;
-       efx->loopback_modes = QT202X_LOOPBACKS | FALCON_XMAC_LOOPBACKS;
-       return 0;
-}
-
-static int qt202x_phy_init(struct efx_nic *efx)
-{
-       u32 devid;
-       int rc;
-
-       rc = qt202x_reset_phy(efx);
-       if (rc) {
-               netif_err(efx, probe, efx->net_dev, "PHY init failed\n");
-               return rc;
-       }
-
-       devid = efx_mdio_read_id(efx, MDIO_MMD_PHYXS);
-       netif_info(efx, probe, efx->net_dev,
-                  "PHY ID reg %x (OUI %06x model %02x revision %x)\n",
-                  devid, efx_mdio_id_oui(devid), efx_mdio_id_model(devid),
-                  efx_mdio_id_rev(devid));
-
-       if (efx->phy_type == PHY_TYPE_QT2025C)
-               qt2025c_firmware_id(efx);
-
-       return 0;
-}
-
-static int qt202x_link_ok(struct efx_nic *efx)
-{
-       return efx_mdio_links_ok(efx, QT202X_REQUIRED_DEVS);
-}
-
-static bool qt202x_phy_poll(struct efx_nic *efx)
-{
-       bool was_up = efx->link_state.up;
-
-       efx->link_state.up = qt202x_link_ok(efx);
-       efx->link_state.speed = 10000;
-       efx->link_state.fd = true;
-       efx->link_state.fc = efx->wanted_fc;
-
-       if (efx->phy_type == PHY_TYPE_QT2025C)
-               qt2025c_bug17190_workaround(efx);
-
-       return efx->link_state.up != was_up;
-}
-
-static int qt202x_phy_reconfigure(struct efx_nic *efx)
-{
-       struct qt202x_phy_data *phy_data = efx->phy_data;
-
-       if (efx->phy_type == PHY_TYPE_QT2025C) {
-               int rc = qt2025c_select_phy_mode(efx);
-               if (rc)
-                       return rc;
-
-               /* There are several different register bits which can
-                * disable TX (and save power) on direct-attach cables
-                * or optical transceivers, varying somewhat between
-                * firmware versions.  Only 'static mode' appears to
-                * cover everything. */
-               mdio_set_flag(
-                       &efx->mdio, efx->mdio.prtad, MDIO_MMD_PMAPMD,
-                       PMA_PMD_FTX_CTRL2_REG, 1 << PMA_PMD_FTX_STATIC_LBN,
-                       efx->phy_mode & PHY_MODE_TX_DISABLED ||
-                       efx->phy_mode & PHY_MODE_LOW_POWER ||
-                       efx->loopback_mode == LOOPBACK_PCS ||
-                       efx->loopback_mode == LOOPBACK_PMAPMD);
-       } else {
-               /* Reset the PHY when moving from tx off to tx on */
-               if (!(efx->phy_mode & PHY_MODE_TX_DISABLED) &&
-                   (phy_data->phy_mode & PHY_MODE_TX_DISABLED))
-                       qt202x_reset_phy(efx);
-
-               efx_mdio_transmit_disable(efx);
-       }
-
-       efx_mdio_phy_reconfigure(efx);
-
-       phy_data->phy_mode = efx->phy_mode;
-
-       return 0;
-}
-
-static void qt202x_phy_get_settings(struct efx_nic *efx, struct ethtool_cmd *ecmd)
-{
-       mdio45_ethtool_gset(&efx->mdio, ecmd);
-}
-
-static void qt202x_phy_remove(struct efx_nic *efx)
-{
-       /* Free the context block */
-       kfree(efx->phy_data);
-       efx->phy_data = NULL;
-}
-
-static int qt202x_phy_get_module_info(struct efx_nic *efx,
-                                     struct ethtool_modinfo *modinfo)
-{
-       modinfo->type = ETH_MODULE_SFF_8079;
-       modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN;
-       return 0;
-}
-
-static int qt202x_phy_get_module_eeprom(struct efx_nic *efx,
-                                       struct ethtool_eeprom *ee, u8 *data)
-{
-       int mmd, reg_base, rc, i;               
-
-       if (efx->phy_type == PHY_TYPE_QT2025C) {
-               mmd = MDIO_MMD_PCS;
-               reg_base = 0xd000;
-       } else {
-               mmd = MDIO_MMD_PMAPMD;
-               reg_base = 0x8007;
-       }
-
-       for (i = 0; i < ee->len; i++) {
-               rc = efx_mdio_read(efx, mmd, reg_base + ee->offset + i);
-               if (rc < 0)
-                       return rc;
-               data[i] = rc;
-       }
-
-       return 0;
-}
-
-const struct efx_phy_operations falcon_qt202x_phy_ops = {
-       .probe           = qt202x_phy_probe,
-       .init            = qt202x_phy_init,
-       .reconfigure     = qt202x_phy_reconfigure,
-       .poll            = qt202x_phy_poll,
-       .fini            = efx_port_dummy_op_void,
-       .remove          = qt202x_phy_remove,
-       .get_settings    = qt202x_phy_get_settings,
-       .set_settings    = efx_mdio_set_settings,
-       .test_alive      = efx_mdio_test_alive,
-       .get_module_eeprom = qt202x_phy_get_module_eeprom,
-       .get_module_info = qt202x_phy_get_module_info,
-};
index 02b0b5272c14f6148c1999a06a8b71baacf54edc..7893a739ea414065d58df32d95e4d81a9df424fb 100644 (file)
@@ -400,21 +400,10 @@ static void efx_rx_packet__check_len(struct efx_rx_queue *rx_queue,
         */
        rx_buf->flags |= EFX_RX_PKT_DISCARD;
 
-       if ((len > rx_buf->len) && EFX_WORKAROUND_8071(efx)) {
-               if (net_ratelimit())
-                       netif_err(efx, rx_err, efx->net_dev,
-                                 " RX queue %d seriously overlength "
-                                 "RX event (0x%x > 0x%x+0x%x). Leaking\n",
-                                 efx_rx_queue_index(rx_queue), len, max_len,
-                                 efx->type->rx_buffer_padding);
-               efx_schedule_reset(efx, RESET_TYPE_RX_RECOVERY);
-       } else {
-               if (net_ratelimit())
-                       netif_err(efx, rx_err, efx->net_dev,
-                                 " RX queue %d overlength RX event "
-                                 "(0x%x > 0x%x)\n",
-                                 efx_rx_queue_index(rx_queue), len, max_len);
-       }
+       if (net_ratelimit())
+               netif_err(efx, rx_err, efx->net_dev,
+                         "RX queue %d overlength RX event (%#x > %#x)\n",
+                         efx_rx_queue_index(rx_queue), len, max_len);
 
        efx_rx_queue_channel(rx_queue)->n_rx_overlength++;
 }
index 04ed1b4c7cd98ac424d83e54dda2f4f6aee76af7..0c4a8dd56094cc775bd62b8a47d66ec6717f3d3f 100644 (file)
@@ -20,7 +20,6 @@
 #include "nic.h"
 #include "farch_regs.h"
 #include "io.h"
-#include "phy.h"
 #include "workarounds.h"
 #include "mcdi.h"
 #include "mcdi_pcol.h"
@@ -977,6 +976,7 @@ const struct efx_nic_type siena_a0_nic_type = {
        .tx_init = efx_farch_tx_init,
        .tx_remove = efx_farch_tx_remove,
        .tx_write = efx_farch_tx_write,
+       .tx_limit_len = efx_farch_tx_limit_len,
        .rx_push_rss_config = siena_rx_push_rss_config,
        .rx_probe = efx_farch_rx_probe,
        .rx_init = efx_farch_rx_init,
diff --git a/drivers/net/ethernet/sfc/tenxpress.c b/drivers/net/ethernet/sfc/tenxpress.c
deleted file mode 100644 (file)
index 2c90e6b..0000000
+++ /dev/null
@@ -1,494 +0,0 @@
-/****************************************************************************
- * Driver for Solarflare network controllers and boards
- * Copyright 2007-2011 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
- */
-
-#include <linux/delay.h>
-#include <linux/rtnetlink.h>
-#include <linux/seq_file.h>
-#include <linux/slab.h>
-#include "efx.h"
-#include "mdio_10g.h"
-#include "nic.h"
-#include "phy.h"
-#include "workarounds.h"
-
-/* We expect these MMDs to be in the package. */
-#define TENXPRESS_REQUIRED_DEVS (MDIO_DEVS_PMAPMD      | \
-                                MDIO_DEVS_PCS          | \
-                                MDIO_DEVS_PHYXS        | \
-                                MDIO_DEVS_AN)
-
-#define SFX7101_LOOPBACKS ((1 << LOOPBACK_PHYXS) |     \
-                          (1 << LOOPBACK_PCS) |        \
-                          (1 << LOOPBACK_PMAPMD) |     \
-                          (1 << LOOPBACK_PHYXS_WS))
-
-/* We complain if we fail to see the link partner as 10G capable this many
- * times in a row (must be > 1 as sampling the autoneg. registers is racy)
- */
-#define MAX_BAD_LP_TRIES       (5)
-
-/* Extended control register */
-#define PMA_PMD_XCONTROL_REG   49152
-#define PMA_PMD_EXT_GMII_EN_LBN        1
-#define PMA_PMD_EXT_GMII_EN_WIDTH 1
-#define PMA_PMD_EXT_CLK_OUT_LBN        2
-#define PMA_PMD_EXT_CLK_OUT_WIDTH 1
-#define PMA_PMD_LNPGA_POWERDOWN_LBN 8
-#define PMA_PMD_LNPGA_POWERDOWN_WIDTH 1
-#define PMA_PMD_EXT_CLK312_WIDTH 1
-#define PMA_PMD_EXT_LPOWER_LBN  12
-#define PMA_PMD_EXT_LPOWER_WIDTH 1
-#define PMA_PMD_EXT_ROBUST_LBN 14
-#define PMA_PMD_EXT_ROBUST_WIDTH 1
-#define PMA_PMD_EXT_SSR_LBN    15
-#define PMA_PMD_EXT_SSR_WIDTH  1
-
-/* extended status register */
-#define PMA_PMD_XSTATUS_REG    49153
-#define PMA_PMD_XSTAT_MDIX_LBN 14
-#define PMA_PMD_XSTAT_FLP_LBN   (12)
-
-/* LED control register */
-#define PMA_PMD_LED_CTRL_REG   49159
-#define PMA_PMA_LED_ACTIVITY_LBN       (3)
-
-/* LED function override register */
-#define PMA_PMD_LED_OVERR_REG  49161
-/* Bit positions for different LEDs (there are more but not wired on SFE4001)*/
-#define PMA_PMD_LED_LINK_LBN   (0)
-#define PMA_PMD_LED_SPEED_LBN  (2)
-#define PMA_PMD_LED_TX_LBN     (4)
-#define PMA_PMD_LED_RX_LBN     (6)
-/* Override settings */
-#define        PMA_PMD_LED_AUTO        (0)     /* H/W control */
-#define        PMA_PMD_LED_ON          (1)
-#define        PMA_PMD_LED_OFF         (2)
-#define PMA_PMD_LED_FLASH      (3)
-#define PMA_PMD_LED_MASK       3
-/* All LEDs under hardware control */
-/* Green and Amber under hardware control, Red off */
-#define SFX7101_PMA_PMD_LED_DEFAULT (PMA_PMD_LED_OFF << PMA_PMD_LED_RX_LBN)
-
-#define PMA_PMD_SPEED_ENABLE_REG 49192
-#define PMA_PMD_100TX_ADV_LBN    1
-#define PMA_PMD_100TX_ADV_WIDTH  1
-#define PMA_PMD_1000T_ADV_LBN    2
-#define PMA_PMD_1000T_ADV_WIDTH  1
-#define PMA_PMD_10000T_ADV_LBN   3
-#define PMA_PMD_10000T_ADV_WIDTH 1
-#define PMA_PMD_SPEED_LBN        4
-#define PMA_PMD_SPEED_WIDTH      4
-
-/* Misc register defines */
-#define PCS_CLOCK_CTRL_REG     55297
-#define PLL312_RST_N_LBN 2
-
-#define PCS_SOFT_RST2_REG      55302
-#define SERDES_RST_N_LBN 13
-#define XGXS_RST_N_LBN 12
-
-#define        PCS_TEST_SELECT_REG     55303   /* PRM 10.5.8 */
-#define        CLK312_EN_LBN 3
-
-/* PHYXS registers */
-#define PHYXS_XCONTROL_REG     49152
-#define PHYXS_RESET_LBN                15
-#define PHYXS_RESET_WIDTH      1
-
-#define PHYXS_TEST1         (49162)
-#define LOOPBACK_NEAR_LBN   (8)
-#define LOOPBACK_NEAR_WIDTH (1)
-
-/* Boot status register */
-#define PCS_BOOT_STATUS_REG            53248
-#define PCS_BOOT_FATAL_ERROR_LBN       0
-#define PCS_BOOT_PROGRESS_LBN          1
-#define PCS_BOOT_PROGRESS_WIDTH                2
-#define PCS_BOOT_PROGRESS_INIT         0
-#define PCS_BOOT_PROGRESS_WAIT_MDIO    1
-#define PCS_BOOT_PROGRESS_CHECKSUM     2
-#define PCS_BOOT_PROGRESS_JUMP         3
-#define PCS_BOOT_DOWNLOAD_WAIT_LBN     3
-#define PCS_BOOT_CODE_STARTED_LBN      4
-
-/* 100M/1G PHY registers */
-#define GPHY_XCONTROL_REG      49152
-#define GPHY_ISOLATE_LBN       10
-#define GPHY_ISOLATE_WIDTH     1
-#define GPHY_DUPLEX_LBN                8
-#define GPHY_DUPLEX_WIDTH      1
-#define GPHY_LOOPBACK_NEAR_LBN 14
-#define GPHY_LOOPBACK_NEAR_WIDTH 1
-
-#define C22EXT_STATUS_REG       49153
-#define C22EXT_STATUS_LINK_LBN  2
-#define C22EXT_STATUS_LINK_WIDTH 1
-
-#define C22EXT_MSTSLV_CTRL                     49161
-#define C22EXT_MSTSLV_CTRL_ADV_1000_HD_LBN     8
-#define C22EXT_MSTSLV_CTRL_ADV_1000_FD_LBN     9
-
-#define C22EXT_MSTSLV_STATUS                   49162
-#define C22EXT_MSTSLV_STATUS_LP_1000_HD_LBN    10
-#define C22EXT_MSTSLV_STATUS_LP_1000_FD_LBN    11
-
-/* Time to wait between powering down the LNPGA and turning off the power
- * rails */
-#define LNPGA_PDOWN_WAIT       (HZ / 5)
-
-struct tenxpress_phy_data {
-       enum efx_loopback_mode loopback_mode;
-       enum efx_phy_mode phy_mode;
-       int bad_lp_tries;
-};
-
-static int tenxpress_init(struct efx_nic *efx)
-{
-       /* Enable 312.5 MHz clock */
-       efx_mdio_write(efx, MDIO_MMD_PCS, PCS_TEST_SELECT_REG,
-                      1 << CLK312_EN_LBN);
-
-       /* Set the LEDs up as: Green = Link, Amber = Link/Act, Red = Off */
-       efx_mdio_set_flag(efx, MDIO_MMD_PMAPMD, PMA_PMD_LED_CTRL_REG,
-                         1 << PMA_PMA_LED_ACTIVITY_LBN, true);
-       efx_mdio_write(efx, MDIO_MMD_PMAPMD, PMA_PMD_LED_OVERR_REG,
-                      SFX7101_PMA_PMD_LED_DEFAULT);
-
-       return 0;
-}
-
-static int tenxpress_phy_probe(struct efx_nic *efx)
-{
-       struct tenxpress_phy_data *phy_data;
-
-       /* Allocate phy private storage */
-       phy_data = kzalloc(sizeof(*phy_data), GFP_KERNEL);
-       if (!phy_data)
-               return -ENOMEM;
-       efx->phy_data = phy_data;
-       phy_data->phy_mode = efx->phy_mode;
-
-       efx->mdio.mmds = TENXPRESS_REQUIRED_DEVS;
-       efx->mdio.mode_support = MDIO_SUPPORTS_C45;
-
-       efx->loopback_modes = SFX7101_LOOPBACKS | FALCON_XMAC_LOOPBACKS;
-
-       efx->link_advertising = (ADVERTISED_TP | ADVERTISED_Autoneg |
-                                ADVERTISED_10000baseT_Full);
-
-       return 0;
-}
-
-static int tenxpress_phy_init(struct efx_nic *efx)
-{
-       int rc;
-
-       falcon_board(efx)->type->init_phy(efx);
-
-       if (!(efx->phy_mode & PHY_MODE_SPECIAL)) {
-               rc = efx_mdio_wait_reset_mmds(efx, TENXPRESS_REQUIRED_DEVS);
-               if (rc < 0)
-                       return rc;
-
-               rc = efx_mdio_check_mmds(efx, TENXPRESS_REQUIRED_DEVS);
-               if (rc < 0)
-                       return rc;
-       }
-
-       rc = tenxpress_init(efx);
-       if (rc < 0)
-               return rc;
-
-       /* Reinitialise flow control settings */
-       efx_link_set_wanted_fc(efx, efx->wanted_fc);
-       efx_mdio_an_reconfigure(efx);
-
-       schedule_timeout_uninterruptible(HZ / 5); /* 200ms */
-
-       /* Let XGXS and SerDes out of reset */
-       falcon_reset_xaui(efx);
-
-       return 0;
-}
-
-/* Perform a "special software reset" on the PHY. The caller is
- * responsible for saving and restoring the PHY hardware registers
- * properly, and masking/unmasking LASI */
-static int tenxpress_special_reset(struct efx_nic *efx)
-{
-       int rc, reg;
-
-       /* The XGMAC clock is driven from the SFX7101 312MHz clock, so
-        * a special software reset can glitch the XGMAC sufficiently for stats
-        * requests to fail. */
-       falcon_stop_nic_stats(efx);
-
-       /* Initiate reset */
-       reg = efx_mdio_read(efx, MDIO_MMD_PMAPMD, PMA_PMD_XCONTROL_REG);
-       reg |= (1 << PMA_PMD_EXT_SSR_LBN);
-       efx_mdio_write(efx, MDIO_MMD_PMAPMD, PMA_PMD_XCONTROL_REG, reg);
-
-       mdelay(200);
-
-       /* Wait for the blocks to come out of reset */
-       rc = efx_mdio_wait_reset_mmds(efx, TENXPRESS_REQUIRED_DEVS);
-       if (rc < 0)
-               goto out;
-
-       /* Try and reconfigure the device */
-       rc = tenxpress_init(efx);
-       if (rc < 0)
-               goto out;
-
-       /* Wait for the XGXS state machine to churn */
-       mdelay(10);
-out:
-       falcon_start_nic_stats(efx);
-       return rc;
-}
-
-static void sfx7101_check_bad_lp(struct efx_nic *efx, bool link_ok)
-{
-       struct tenxpress_phy_data *pd = efx->phy_data;
-       bool bad_lp;
-       int reg;
-
-       if (link_ok) {
-               bad_lp = false;
-       } else {
-               /* Check that AN has started but not completed. */
-               reg = efx_mdio_read(efx, MDIO_MMD_AN, MDIO_STAT1);
-               if (!(reg & MDIO_AN_STAT1_LPABLE))
-                       return; /* LP status is unknown */
-               bad_lp = !(reg & MDIO_AN_STAT1_COMPLETE);
-               if (bad_lp)
-                       pd->bad_lp_tries++;
-       }
-
-       /* Nothing to do if all is well and was previously so. */
-       if (!pd->bad_lp_tries)
-               return;
-
-       /* Use the RX (red) LED as an error indicator once we've seen AN
-        * failure several times in a row, and also log a message. */
-       if (!bad_lp || pd->bad_lp_tries == MAX_BAD_LP_TRIES) {
-               reg = efx_mdio_read(efx, MDIO_MMD_PMAPMD,
-                                   PMA_PMD_LED_OVERR_REG);
-               reg &= ~(PMA_PMD_LED_MASK << PMA_PMD_LED_RX_LBN);
-               if (!bad_lp) {
-                       reg |= PMA_PMD_LED_OFF << PMA_PMD_LED_RX_LBN;
-               } else {
-                       reg |= PMA_PMD_LED_FLASH << PMA_PMD_LED_RX_LBN;
-                       netif_err(efx, link, efx->net_dev,
-                                 "appears to be plugged into a port"
-                                 " that is not 10GBASE-T capable. The PHY"
-                                 " supports 10GBASE-T ONLY, so no link can"
-                                 " be established\n");
-               }
-               efx_mdio_write(efx, MDIO_MMD_PMAPMD,
-                              PMA_PMD_LED_OVERR_REG, reg);
-               pd->bad_lp_tries = bad_lp;
-       }
-}
-
-static bool sfx7101_link_ok(struct efx_nic *efx)
-{
-       return efx_mdio_links_ok(efx,
-                                MDIO_DEVS_PMAPMD |
-                                MDIO_DEVS_PCS |
-                                MDIO_DEVS_PHYXS);
-}
-
-static void tenxpress_ext_loopback(struct efx_nic *efx)
-{
-       efx_mdio_set_flag(efx, MDIO_MMD_PHYXS, PHYXS_TEST1,
-                         1 << LOOPBACK_NEAR_LBN,
-                         efx->loopback_mode == LOOPBACK_PHYXS);
-}
-
-static void tenxpress_low_power(struct efx_nic *efx)
-{
-       efx_mdio_set_mmds_lpower(
-               efx, !!(efx->phy_mode & PHY_MODE_LOW_POWER),
-               TENXPRESS_REQUIRED_DEVS);
-}
-
-static int tenxpress_phy_reconfigure(struct efx_nic *efx)
-{
-       struct tenxpress_phy_data *phy_data = efx->phy_data;
-       bool phy_mode_change, loop_reset;
-
-       if (efx->phy_mode & (PHY_MODE_OFF | PHY_MODE_SPECIAL)) {
-               phy_data->phy_mode = efx->phy_mode;
-               return 0;
-       }
-
-       phy_mode_change = (efx->phy_mode == PHY_MODE_NORMAL &&
-                          phy_data->phy_mode != PHY_MODE_NORMAL);
-       loop_reset = (LOOPBACK_OUT_OF(phy_data, efx, LOOPBACKS_EXTERNAL(efx)) ||
-                     LOOPBACK_CHANGED(phy_data, efx, 1 << LOOPBACK_GPHY));
-
-       if (loop_reset || phy_mode_change) {
-               tenxpress_special_reset(efx);
-               falcon_reset_xaui(efx);
-       }
-
-       tenxpress_low_power(efx);
-       efx_mdio_transmit_disable(efx);
-       efx_mdio_phy_reconfigure(efx);
-       tenxpress_ext_loopback(efx);
-       efx_mdio_an_reconfigure(efx);
-
-       phy_data->loopback_mode = efx->loopback_mode;
-       phy_data->phy_mode = efx->phy_mode;
-
-       return 0;
-}
-
-static void
-tenxpress_get_settings(struct efx_nic *efx, struct ethtool_cmd *ecmd);
-
-/* Poll for link state changes */
-static bool tenxpress_phy_poll(struct efx_nic *efx)
-{
-       struct efx_link_state old_state = efx->link_state;
-
-       efx->link_state.up = sfx7101_link_ok(efx);
-       efx->link_state.speed = 10000;
-       efx->link_state.fd = true;
-       efx->link_state.fc = efx_mdio_get_pause(efx);
-
-       sfx7101_check_bad_lp(efx, efx->link_state.up);
-
-       return !efx_link_state_equal(&efx->link_state, &old_state);
-}
-
-static void sfx7101_phy_fini(struct efx_nic *efx)
-{
-       int reg;
-
-       /* Power down the LNPGA */
-       reg = (1 << PMA_PMD_LNPGA_POWERDOWN_LBN);
-       efx_mdio_write(efx, MDIO_MMD_PMAPMD, PMA_PMD_XCONTROL_REG, reg);
-
-       /* Waiting here ensures that the board fini, which can turn
-        * off the power to the PHY, won't get run until the LNPGA
-        * powerdown has been given long enough to complete. */
-       schedule_timeout_uninterruptible(LNPGA_PDOWN_WAIT); /* 200 ms */
-}
-
-static void tenxpress_phy_remove(struct efx_nic *efx)
-{
-       kfree(efx->phy_data);
-       efx->phy_data = NULL;
-}
-
-
-/* Override the RX, TX and link LEDs */
-void tenxpress_set_id_led(struct efx_nic *efx, enum efx_led_mode mode)
-{
-       int reg;
-
-       switch (mode) {
-       case EFX_LED_OFF:
-               reg = (PMA_PMD_LED_OFF << PMA_PMD_LED_TX_LBN) |
-                       (PMA_PMD_LED_OFF << PMA_PMD_LED_RX_LBN) |
-                       (PMA_PMD_LED_OFF << PMA_PMD_LED_LINK_LBN);
-               break;
-       case EFX_LED_ON:
-               reg = (PMA_PMD_LED_ON << PMA_PMD_LED_TX_LBN) |
-                       (PMA_PMD_LED_ON << PMA_PMD_LED_RX_LBN) |
-                       (PMA_PMD_LED_ON << PMA_PMD_LED_LINK_LBN);
-               break;
-       default:
-               reg = SFX7101_PMA_PMD_LED_DEFAULT;
-               break;
-       }
-
-       efx_mdio_write(efx, MDIO_MMD_PMAPMD, PMA_PMD_LED_OVERR_REG, reg);
-}
-
-static const char *const sfx7101_test_names[] = {
-       "bist"
-};
-
-static const char *sfx7101_test_name(struct efx_nic *efx, unsigned int index)
-{
-       if (index < ARRAY_SIZE(sfx7101_test_names))
-               return sfx7101_test_names[index];
-       return NULL;
-}
-
-static int
-sfx7101_run_tests(struct efx_nic *efx, int *results, unsigned flags)
-{
-       int rc;
-
-       if (!(flags & ETH_TEST_FL_OFFLINE))
-               return 0;
-
-       /* BIST is automatically run after a special software reset */
-       rc = tenxpress_special_reset(efx);
-       results[0] = rc ? -1 : 1;
-
-       efx_mdio_an_reconfigure(efx);
-
-       return rc;
-}
-
-static void
-tenxpress_get_settings(struct efx_nic *efx, struct ethtool_cmd *ecmd)
-{
-       u32 adv = 0, lpa = 0;
-       int reg;
-
-       reg = efx_mdio_read(efx, MDIO_MMD_AN, MDIO_AN_10GBT_CTRL);
-       if (reg & MDIO_AN_10GBT_CTRL_ADV10G)
-               adv |= ADVERTISED_10000baseT_Full;
-       reg = efx_mdio_read(efx, MDIO_MMD_AN, MDIO_AN_10GBT_STAT);
-       if (reg & MDIO_AN_10GBT_STAT_LP10G)
-               lpa |= ADVERTISED_10000baseT_Full;
-
-       mdio45_ethtool_gset_npage(&efx->mdio, ecmd, adv, lpa);
-
-       /* In loopback, the PHY automatically brings up the correct interface,
-        * but doesn't advertise the correct speed. So override it */
-       if (LOOPBACK_EXTERNAL(efx))
-               ethtool_cmd_speed_set(ecmd, SPEED_10000);
-}
-
-static int tenxpress_set_settings(struct efx_nic *efx, struct ethtool_cmd *ecmd)
-{
-       if (!ecmd->autoneg)
-               return -EINVAL;
-
-       return efx_mdio_set_settings(efx, ecmd);
-}
-
-static void sfx7101_set_npage_adv(struct efx_nic *efx, u32 advertising)
-{
-       efx_mdio_set_flag(efx, MDIO_MMD_AN, MDIO_AN_10GBT_CTRL,
-                         MDIO_AN_10GBT_CTRL_ADV10G,
-                         advertising & ADVERTISED_10000baseT_Full);
-}
-
-const struct efx_phy_operations falcon_sfx7101_phy_ops = {
-       .probe            = tenxpress_phy_probe,
-       .init             = tenxpress_phy_init,
-       .reconfigure      = tenxpress_phy_reconfigure,
-       .poll             = tenxpress_phy_poll,
-       .fini             = sfx7101_phy_fini,
-       .remove           = tenxpress_phy_remove,
-       .get_settings     = tenxpress_get_settings,
-       .set_settings     = tenxpress_set_settings,
-       .set_npage_adv    = sfx7101_set_npage_adv,
-       .test_alive       = efx_mdio_test_alive,
-       .test_name        = sfx7101_test_name,
-       .run_tests        = sfx7101_run_tests,
-};
index 2337789115579972ae4608445872af0b933b3759..f11a36ac74074f884eabe87508e1f6cbe93a3639 100644 (file)
@@ -22,6 +22,7 @@
 #include "efx.h"
 #include "io.h"
 #include "nic.h"
+#include "tx.h"
 #include "workarounds.h"
 #include "ef10_regs.h"
 
@@ -33,29 +34,30 @@ unsigned int efx_piobuf_size __read_mostly = EFX_PIOBUF_SIZE_DEF;
 
 #endif /* EFX_USE_PIO */
 
-static inline unsigned int
-efx_tx_queue_get_insert_index(const struct efx_tx_queue *tx_queue)
+static inline u8 *efx_tx_get_copy_buffer(struct efx_tx_queue *tx_queue,
+                                        struct efx_tx_buffer *buffer)
 {
-       return tx_queue->insert_count & tx_queue->ptr_mask;
-}
+       unsigned int index = efx_tx_queue_get_insert_index(tx_queue);
+       struct efx_buffer *page_buf =
+               &tx_queue->cb_page[index >> (PAGE_SHIFT - EFX_TX_CB_ORDER)];
+       unsigned int offset =
+               ((index << EFX_TX_CB_ORDER) + NET_IP_ALIGN) & (PAGE_SIZE - 1);
 
-static inline struct efx_tx_buffer *
-__efx_tx_queue_get_insert_buffer(const struct efx_tx_queue *tx_queue)
-{
-       return &tx_queue->buffer[efx_tx_queue_get_insert_index(tx_queue)];
+       if (unlikely(!page_buf->addr) &&
+           efx_nic_alloc_buffer(tx_queue->efx, page_buf, PAGE_SIZE,
+                                GFP_ATOMIC))
+               return NULL;
+       buffer->dma_addr = page_buf->dma_addr + offset;
+       buffer->unmap_len = 0;
+       return (u8 *)page_buf->addr + offset;
 }
 
-static inline struct efx_tx_buffer *
-efx_tx_queue_get_insert_buffer(const struct efx_tx_queue *tx_queue)
+u8 *efx_tx_get_copy_buffer_limited(struct efx_tx_queue *tx_queue,
+                                  struct efx_tx_buffer *buffer, size_t len)
 {
-       struct efx_tx_buffer *buffer =
-               __efx_tx_queue_get_insert_buffer(tx_queue);
-
-       EFX_BUG_ON_PARANOID(buffer->len);
-       EFX_BUG_ON_PARANOID(buffer->flags);
-       EFX_BUG_ON_PARANOID(buffer->unmap_len);
-
-       return buffer;
+       if (len > EFX_TX_CB_SIZE)
+               return NULL;
+       return efx_tx_get_copy_buffer(tx_queue, buffer);
 }
 
 static void efx_dequeue_buffer(struct efx_tx_queue *tx_queue,
@@ -82,35 +84,12 @@ static void efx_dequeue_buffer(struct efx_tx_queue *tx_queue,
                netif_vdbg(tx_queue->efx, tx_done, tx_queue->efx->net_dev,
                           "TX queue %d transmission id %x complete\n",
                           tx_queue->queue, tx_queue->read_count);
-       } else if (buffer->flags & EFX_TX_BUF_HEAP) {
-               kfree(buffer->heap_buf);
        }
 
        buffer->len = 0;
        buffer->flags = 0;
 }
 
-static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue,
-                              struct sk_buff *skb);
-
-static inline unsigned
-efx_max_tx_len(struct efx_nic *efx, dma_addr_t dma_addr)
-{
-       /* Depending on the NIC revision, we can use descriptor
-        * lengths up to 8K or 8K-1.  However, since PCI Express
-        * devices must split read requests at 4K boundaries, there is
-        * little benefit from using descriptors that cross those
-        * boundaries and we keep things simple by not doing so.
-        */
-       unsigned len = (~dma_addr & (EFX_PAGE_SIZE - 1)) + 1;
-
-       /* Work around hardware bug for unaligned buffers. */
-       if (EFX_WORKAROUND_5391(efx) && (dma_addr & 0xf))
-               len = min_t(unsigned, len, 512 - (dma_addr & 0xf));
-
-       return len;
-}
-
 unsigned int efx_tx_max_skb_descs(struct efx_nic *efx)
 {
        /* Header and payload descriptor for each output segment, plus
@@ -118,10 +97,8 @@ unsigned int efx_tx_max_skb_descs(struct efx_nic *efx)
         */
        unsigned int max_descs = EFX_TSO_MAX_SEGS * 2 + MAX_SKB_FRAGS;
 
-       /* Possibly one more per segment for the alignment workaround,
-        * or for option descriptors
-        */
-       if (EFX_WORKAROUND_5391(efx) || efx_nic_rev(efx) >= EFX_REV_HUNT_A0)
+       /* Possibly one more per segment for option descriptors */
+       if (efx_nic_rev(efx) >= EFX_REV_HUNT_A0)
                max_descs += EFX_TSO_MAX_SEGS;
 
        /* Possibly more for PCIe page boundaries within input fragments */
@@ -173,6 +150,33 @@ static void efx_tx_maybe_stop_queue(struct efx_tx_queue *txq1)
        }
 }
 
+static int efx_enqueue_skb_copy(struct efx_tx_queue *tx_queue,
+                               struct sk_buff *skb)
+{
+       unsigned int copy_len = skb->len;
+       struct efx_tx_buffer *buffer;
+       u8 *copy_buffer;
+       int rc;
+
+       EFX_BUG_ON_PARANOID(copy_len > EFX_TX_CB_SIZE);
+
+       buffer = efx_tx_queue_get_insert_buffer(tx_queue);
+
+       copy_buffer = efx_tx_get_copy_buffer(tx_queue, buffer);
+       if (unlikely(!copy_buffer))
+               return -ENOMEM;
+
+       rc = skb_copy_bits(skb, 0, copy_buffer, copy_len);
+       EFX_WARN_ON_PARANOID(rc);
+       buffer->len = copy_len;
+
+       buffer->skb = skb;
+       buffer->flags = EFX_TX_BUF_SKB;
+
+       ++tx_queue->insert_count;
+       return rc;
+}
+
 #ifdef EFX_USE_PIO
 
 struct efx_short_copy_buffer {
@@ -267,8 +271,8 @@ static void efx_skb_copy_bits_to_pio(struct efx_nic *efx, struct sk_buff *skb,
        EFX_BUG_ON_PARANOID(skb_shinfo(skb)->frag_list);
 }
 
-static struct efx_tx_buffer *
-efx_enqueue_skb_pio(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
+static int efx_enqueue_skb_pio(struct efx_tx_queue *tx_queue,
+                              struct sk_buff *skb)
 {
        struct efx_tx_buffer *buffer =
                efx_tx_queue_get_insert_buffer(tx_queue);
@@ -292,7 +296,7 @@ efx_enqueue_skb_pio(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
                efx_flush_copy_buffer(tx_queue->efx, piobuf, &copy_buf);
        } else {
                /* Pad the write to the size of a cache line.
-                * We can do this because we know the skb_shared_info sruct is
+                * We can do this because we know the skb_shared_info struct is
                 * after the source, and the destination buffer is big enough.
                 */
                BUILD_BUG_ON(L1_CACHE_BYTES >
@@ -301,6 +305,9 @@ efx_enqueue_skb_pio(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
                                 ALIGN(skb->len, L1_CACHE_BYTES) >> 3);
        }
 
+       buffer->skb = skb;
+       buffer->flags = EFX_TX_BUF_SKB | EFX_TX_BUF_OPTION;
+
        EFX_POPULATE_QWORD_5(buffer->option,
                             ESF_DZ_TX_DESC_IS_OPT, 1,
                             ESF_DZ_TX_OPTION_TYPE, ESE_DZ_TX_OPTION_DESC_PIO,
@@ -308,127 +315,227 @@ efx_enqueue_skb_pio(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
                             ESF_DZ_TX_PIO_BYTE_CNT, skb->len,
                             ESF_DZ_TX_PIO_BUF_ADDR,
                             tx_queue->piobuf_offset);
-       ++tx_queue->pio_packets;
        ++tx_queue->insert_count;
-       return buffer;
+       return 0;
 }
 #endif /* EFX_USE_PIO */
 
-/*
- * Add a socket buffer to a TX queue
- *
- * This maps all fragments of a socket buffer for DMA and adds them to
- * the TX queue.  The queue's insert pointer will be incremented by
- * the number of fragments in the socket buffer.
- *
- * If any DMA mapping fails, any mapped fragments will be unmapped,
- * the queue's insert pointer will be restored to its original value.
- *
- * This function is split out from efx_hard_start_xmit to allow the
- * loopback test to direct packets via specific TX queues.
- *
- * Returns NETDEV_TX_OK.
- * You must hold netif_tx_lock() to call this function.
+static struct efx_tx_buffer *efx_tx_map_chunk(struct efx_tx_queue *tx_queue,
+                                             dma_addr_t dma_addr,
+                                             size_t len)
+{
+       const struct efx_nic_type *nic_type = tx_queue->efx->type;
+       struct efx_tx_buffer *buffer;
+       unsigned int dma_len;
+
+       /* Map the fragment taking account of NIC-dependent DMA limits. */
+       do {
+               buffer = efx_tx_queue_get_insert_buffer(tx_queue);
+               dma_len = nic_type->tx_limit_len(tx_queue, dma_addr, len);
+
+               buffer->len = dma_len;
+               buffer->dma_addr = dma_addr;
+               buffer->flags = EFX_TX_BUF_CONT;
+               len -= dma_len;
+               dma_addr += dma_len;
+               ++tx_queue->insert_count;
+       } while (len);
+
+       return buffer;
+}
+
+/* Map all data from an SKB for DMA and create descriptors on the queue.
  */
-netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
+static int efx_tx_map_data(struct efx_tx_queue *tx_queue, struct sk_buff *skb,
+                          unsigned int segment_count)
 {
        struct efx_nic *efx = tx_queue->efx;
        struct device *dma_dev = &efx->pci_dev->dev;
-       struct efx_tx_buffer *buffer;
-       unsigned int old_insert_count = tx_queue->insert_count;
-       skb_frag_t *fragment;
-       unsigned int len, unmap_len = 0;
-       dma_addr_t dma_addr, unmap_addr = 0;
-       unsigned int dma_len;
+       unsigned int frag_index, nr_frags;
+       dma_addr_t dma_addr, unmap_addr;
        unsigned short dma_flags;
-       int i = 0;
+       size_t len, unmap_len;
 
-       if (skb_shinfo(skb)->gso_size)
-               return efx_enqueue_skb_tso(tx_queue, skb);
+       nr_frags = skb_shinfo(skb)->nr_frags;
+       frag_index = 0;
 
-       /* Get size of the initial fragment */
+       /* Map header data. */
        len = skb_headlen(skb);
+       dma_addr = dma_map_single(dma_dev, skb->data, len, DMA_TO_DEVICE);
+       dma_flags = EFX_TX_BUF_MAP_SINGLE;
+       unmap_len = len;
+       unmap_addr = dma_addr;
 
-       /* Pad if necessary */
-       if (EFX_WORKAROUND_15592(efx) && skb->len <= 32) {
-               EFX_BUG_ON_PARANOID(skb->data_len);
-               len = 32 + 1;
-               if (skb_pad(skb, len - skb->len))
-                       return NETDEV_TX_OK;
-       }
+       if (unlikely(dma_mapping_error(dma_dev, dma_addr)))
+               return -EIO;
 
-       /* Consider using PIO for short packets */
-#ifdef EFX_USE_PIO
-       if (skb->len <= efx_piobuf_size && !skb->xmit_more &&
-           efx_nic_may_tx_pio(tx_queue)) {
-               buffer = efx_enqueue_skb_pio(tx_queue, skb);
-               dma_flags = EFX_TX_BUF_OPTION;
-               goto finish_packet;
+       if (segment_count) {
+               /* For TSO we need to put the header in to a separate
+                * descriptor. Map this separately if necessary.
+                */
+               size_t header_len = skb_transport_header(skb) - skb->data +
+                               (tcp_hdr(skb)->doff << 2u);
+
+               if (header_len != len) {
+                       tx_queue->tso_long_headers++;
+                       efx_tx_map_chunk(tx_queue, dma_addr, header_len);
+                       len -= header_len;
+                       dma_addr += header_len;
+               }
        }
-#endif
 
-       /* Map for DMA.  Use dma_map_single rather than dma_map_page
-        * since this is more efficient on machines with sparse
-        * memory.
-        */
-       dma_flags = EFX_TX_BUF_MAP_SINGLE;
-       dma_addr = dma_map_single(dma_dev, skb->data, len, PCI_DMA_TODEVICE);
+       /* Add descriptors for each fragment. */
+       do {
+               struct efx_tx_buffer *buffer;
+               skb_frag_t *fragment;
 
-       /* Process all fragments */
-       while (1) {
-               if (unlikely(dma_mapping_error(dma_dev, dma_addr)))
-                       goto dma_err;
+               buffer = efx_tx_map_chunk(tx_queue, dma_addr, len);
+
+               /* The final descriptor for a fragment is responsible for
+                * unmapping the whole fragment.
+                */
+               buffer->flags = EFX_TX_BUF_CONT | dma_flags;
+               buffer->unmap_len = unmap_len;
+               buffer->dma_offset = buffer->dma_addr - unmap_addr;
+
+               if (frag_index >= nr_frags) {
+                       /* Store SKB details with the final buffer for
+                        * the completion.
+                        */
+                       buffer->skb = skb;
+                       buffer->flags = EFX_TX_BUF_SKB | dma_flags;
+                       return 0;
+               }
 
-               /* Store fields for marking in the per-fragment final
-                * descriptor */
+               /* Move on to the next fragment. */
+               fragment = &skb_shinfo(skb)->frags[frag_index++];
+               len = skb_frag_size(fragment);
+               dma_addr = skb_frag_dma_map(dma_dev, fragment,
+                               0, len, DMA_TO_DEVICE);
+               dma_flags = 0;
                unmap_len = len;
                unmap_addr = dma_addr;
 
-               /* Add to TX queue, splitting across DMA boundaries */
-               do {
-                       buffer = efx_tx_queue_get_insert_buffer(tx_queue);
+               if (unlikely(dma_mapping_error(dma_dev, dma_addr)))
+                       return -EIO;
+       } while (1);
+}
+
+/* Remove buffers put into a tx_queue.  None of the buffers must have
+ * an skb attached.
+ */
+static void efx_enqueue_unwind(struct efx_tx_queue *tx_queue)
+{
+       struct efx_tx_buffer *buffer;
+
+       /* Work backwards until we hit the original insert pointer value */
+       while (tx_queue->insert_count != tx_queue->write_count) {
+               --tx_queue->insert_count;
+               buffer = __efx_tx_queue_get_insert_buffer(tx_queue);
+               efx_dequeue_buffer(tx_queue, buffer, NULL, NULL);
+       }
+}
+
+/*
+ * Fallback to software TSO.
+ *
+ * This is used if we are unable to send a GSO packet through hardware TSO.
+ * This should only ever happen due to per-queue restrictions - unsupported
+ * packets should first be filtered by the feature flags.
+ *
+ * Returns 0 on success, error code otherwise.
+ */
+static int efx_tx_tso_fallback(struct efx_tx_queue *tx_queue,
+                              struct sk_buff *skb)
+{
+       struct sk_buff *segments, *next;
 
-                       dma_len = efx_max_tx_len(efx, dma_addr);
-                       if (likely(dma_len >= len))
-                               dma_len = len;
+       segments = skb_gso_segment(skb, 0);
+       if (IS_ERR(segments))
+               return PTR_ERR(segments);
 
-                       /* Fill out per descriptor fields */
-                       buffer->len = dma_len;
-                       buffer->dma_addr = dma_addr;
-                       buffer->flags = EFX_TX_BUF_CONT;
-                       len -= dma_len;
-                       dma_addr += dma_len;
-                       ++tx_queue->insert_count;
-               } while (len);
+       dev_kfree_skb_any(skb);
+       skb = segments;
 
-               /* Transfer ownership of the unmapping to the final buffer */
-               buffer->flags = EFX_TX_BUF_CONT | dma_flags;
-               buffer->unmap_len = unmap_len;
-               buffer->dma_offset = buffer->dma_addr - unmap_addr;
-               unmap_len = 0;
+       while (skb) {
+               next = skb->next;
+               skb->next = NULL;
 
-               /* Get address and size of next fragment */
-               if (i >= skb_shinfo(skb)->nr_frags)
-                       break;
-               fragment = &skb_shinfo(skb)->frags[i];
-               len = skb_frag_size(fragment);
-               i++;
-               /* Map for DMA */
-               dma_flags = 0;
-               dma_addr = skb_frag_dma_map(dma_dev, fragment, 0, len,
-                                           DMA_TO_DEVICE);
+               if (next)
+                       skb->xmit_more = true;
+               efx_enqueue_skb(tx_queue, skb);
+               skb = next;
        }
 
-       /* Transfer ownership of the skb to the final buffer */
+       return 0;
+}
+
+/*
+ * Add a socket buffer to a TX queue
+ *
+ * This maps all fragments of a socket buffer for DMA and adds them to
+ * the TX queue.  The queue's insert pointer will be incremented by
+ * the number of fragments in the socket buffer.
+ *
+ * If any DMA mapping fails, any mapped fragments will be unmapped,
+ * the queue's insert pointer will be restored to its original value.
+ *
+ * This function is split out from efx_hard_start_xmit to allow the
+ * loopback test to direct packets via specific TX queues.
+ *
+ * Returns NETDEV_TX_OK.
+ * You must hold netif_tx_lock() to call this function.
+ */
+netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
+{
+       bool data_mapped = false;
+       unsigned int segments;
+       unsigned int skb_len;
+       int rc;
+
+       skb_len = skb->len;
+       segments = skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 0;
+       if (segments == 1)
+               segments = 0; /* Don't use TSO for a single segment. */
+
+       /* Handle TSO first - it's *possible* (although unlikely) that we might
+        * be passed a packet to segment that's smaller than the copybreak/PIO
+        * size limit.
+        */
+       if (segments) {
+               EFX_BUG_ON_PARANOID(!tx_queue->handle_tso);
+               rc = tx_queue->handle_tso(tx_queue, skb, &data_mapped);
+               if (rc == -EINVAL) {
+                       rc = efx_tx_tso_fallback(tx_queue, skb);
+                       tx_queue->tso_fallbacks++;
+                       if (rc == 0)
+                               return 0;
+               }
+               if (rc)
+                       goto err;
 #ifdef EFX_USE_PIO
-finish_packet:
+       } else if (skb_len <= efx_piobuf_size && !skb->xmit_more &&
+                  efx_nic_may_tx_pio(tx_queue)) {
+               /* Use PIO for short packets with an empty queue. */
+               if (efx_enqueue_skb_pio(tx_queue, skb))
+                       goto err;
+               tx_queue->pio_packets++;
+               data_mapped = true;
 #endif
-       buffer->skb = skb;
-       buffer->flags = EFX_TX_BUF_SKB | dma_flags;
+       } else if (skb->data_len && skb_len <= EFX_TX_CB_SIZE) {
+               /* Pad short packets or coalesce short fragmented packets. */
+               if (efx_enqueue_skb_copy(tx_queue, skb))
+                       goto err;
+               tx_queue->cb_packets++;
+               data_mapped = true;
+       }
 
-       netdev_tx_sent_queue(tx_queue->core_txq, skb->len);
+       /* Map for DMA and create descriptors if we haven't done so already. */
+       if (!data_mapped && (efx_tx_map_data(tx_queue, skb, segments)))
+               goto err;
 
-       efx_tx_maybe_stop_queue(tx_queue);
+       /* Update BQL */
+       netdev_tx_sent_queue(tx_queue->core_txq, skb_len);
 
        /* Pass off to hardware */
        if (!skb->xmit_more || netif_xmit_stopped(tx_queue->core_txq)) {
@@ -446,37 +553,22 @@ finish_packet:
                tx_queue->xmit_more_available = skb->xmit_more;
        }
 
-       tx_queue->tx_packets++;
+       if (segments) {
+               tx_queue->tso_bursts++;
+               tx_queue->tso_packets += segments;
+               tx_queue->tx_packets  += segments;
+       } else {
+               tx_queue->tx_packets++;
+       }
+
+       efx_tx_maybe_stop_queue(tx_queue);
 
        return NETDEV_TX_OK;
 
- dma_err:
-       netif_err(efx, tx_err, efx->net_dev,
-                 " TX queue %d could not map skb with %d bytes %d "
-                 "fragments for DMA\n", tx_queue->queue, skb->len,
-                 skb_shinfo(skb)->nr_frags + 1);
 
-       /* Mark the packet as transmitted, and free the SKB ourselves */
+err:
+       efx_enqueue_unwind(tx_queue);
        dev_kfree_skb_any(skb);
-
-       /* Work backwards until we hit the original insert pointer value */
-       while (tx_queue->insert_count != old_insert_count) {
-               unsigned int pkts_compl = 0, bytes_compl = 0;
-               --tx_queue->insert_count;
-               buffer = __efx_tx_queue_get_insert_buffer(tx_queue);
-               efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl);
-       }
-
-       /* Free the fragment we were mid-way through pushing */
-       if (unmap_len) {
-               if (dma_flags & EFX_TX_BUF_MAP_SINGLE)
-                       dma_unmap_single(dma_dev, unmap_addr, unmap_len,
-                                        DMA_TO_DEVICE);
-               else
-                       dma_unmap_page(dma_dev, unmap_addr, unmap_len,
-                                      DMA_TO_DEVICE);
-       }
-
        return NETDEV_TX_OK;
 }
 
@@ -576,7 +668,7 @@ int efx_setup_tc(struct net_device *net_dev, u32 handle, __be16 proto,
 
        num_tc = ntc->tc;
 
-       if (efx_nic_rev(efx) < EFX_REV_FALCON_B0 || num_tc > EFX_MAX_TX_TC)
+       if (num_tc > EFX_MAX_TX_TC)
                return -EINVAL;
 
        if (num_tc == net_dev->num_tc)
@@ -667,19 +759,9 @@ void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index)
        }
 }
 
-/* Size of page-based TSO header buffers.  Larger blocks must be
- * allocated from the heap.
- */
-#define TSOH_STD_SIZE  128
-#define TSOH_PER_PAGE  (PAGE_SIZE / TSOH_STD_SIZE)
-
-/* At most half the descriptors in the queue at any time will refer to
- * a TSO header buffer, since they must always be followed by a
- * payload descriptor referring to an skb.
- */
-static unsigned int efx_tsoh_page_count(struct efx_tx_queue *tx_queue)
+static unsigned int efx_tx_cb_page_count(struct efx_tx_queue *tx_queue)
 {
-       return DIV_ROUND_UP(tx_queue->ptr_mask + 1, 2 * TSOH_PER_PAGE);
+       return DIV_ROUND_UP(tx_queue->ptr_mask + 1, PAGE_SIZE >> EFX_TX_CB_ORDER);
 }
 
 int efx_probe_tx_queue(struct efx_tx_queue *tx_queue)
@@ -703,14 +785,11 @@ int efx_probe_tx_queue(struct efx_tx_queue *tx_queue)
        if (!tx_queue->buffer)
                return -ENOMEM;
 
-       if (tx_queue->queue & EFX_TXQ_TYPE_OFFLOAD) {
-               tx_queue->tsoh_page =
-                       kcalloc(efx_tsoh_page_count(tx_queue),
-                               sizeof(tx_queue->tsoh_page[0]), GFP_KERNEL);
-               if (!tx_queue->tsoh_page) {
-                       rc = -ENOMEM;
-                       goto fail1;
-               }
+       tx_queue->cb_page = kcalloc(efx_tx_cb_page_count(tx_queue),
+                                   sizeof(tx_queue->cb_page[0]), GFP_KERNEL);
+       if (!tx_queue->cb_page) {
+               rc = -ENOMEM;
+               goto fail1;
        }
 
        /* Allocate hardware ring */
@@ -721,8 +800,8 @@ int efx_probe_tx_queue(struct efx_tx_queue *tx_queue)
        return 0;
 
 fail2:
-       kfree(tx_queue->tsoh_page);
-       tx_queue->tsoh_page = NULL;
+       kfree(tx_queue->cb_page);
+       tx_queue->cb_page = NULL;
 fail1:
        kfree(tx_queue->buffer);
        tx_queue->buffer = NULL;
@@ -731,7 +810,9 @@ fail1:
 
 void efx_init_tx_queue(struct efx_tx_queue *tx_queue)
 {
-       netif_dbg(tx_queue->efx, drv, tx_queue->efx->net_dev,
+       struct efx_nic *efx = tx_queue->efx;
+
+       netif_dbg(efx, drv, efx->net_dev,
                  "initialising TX queue %d\n", tx_queue->queue);
 
        tx_queue->insert_count = 0;
@@ -742,6 +823,11 @@ void efx_init_tx_queue(struct efx_tx_queue *tx_queue)
        tx_queue->empty_read_count = 0 | EFX_EMPTY_COUNT_VALID;
        tx_queue->xmit_more_available = false;
 
+       /* Set up default function pointers. These may get replaced by
+        * efx_nic_init_tx() based off NIC/queue capabilities.
+        */
+       tx_queue->handle_tso = efx_enqueue_skb_tso;
+
        /* Set up TX descriptor ring */
        efx_nic_init_tx(tx_queue);
 
@@ -781,589 +867,14 @@ void efx_remove_tx_queue(struct efx_tx_queue *tx_queue)
                  "destroying TX queue %d\n", tx_queue->queue);
        efx_nic_remove_tx(tx_queue);
 
-       if (tx_queue->tsoh_page) {
-               for (i = 0; i < efx_tsoh_page_count(tx_queue); i++)
+       if (tx_queue->cb_page) {
+               for (i = 0; i < efx_tx_cb_page_count(tx_queue); i++)
                        efx_nic_free_buffer(tx_queue->efx,
-                                           &tx_queue->tsoh_page[i]);
-               kfree(tx_queue->tsoh_page);
-               tx_queue->tsoh_page = NULL;
+                                           &tx_queue->cb_page[i]);
+               kfree(tx_queue->cb_page);
+               tx_queue->cb_page = NULL;
        }
 
        kfree(tx_queue->buffer);
        tx_queue->buffer = NULL;
 }
-
-
-/* Efx TCP segmentation acceleration.
- *
- * Why?  Because by doing it here in the driver we can go significantly
- * faster than the GSO.
- *
- * Requires TX checksum offload support.
- */
-
-#define PTR_DIFF(p1, p2)  ((u8 *)(p1) - (u8 *)(p2))
-
-/**
- * struct tso_state - TSO state for an SKB
- * @out_len: Remaining length in current segment
- * @seqnum: Current sequence number
- * @ipv4_id: Current IPv4 ID, host endian
- * @packet_space: Remaining space in current packet
- * @dma_addr: DMA address of current position
- * @in_len: Remaining length in current SKB fragment
- * @unmap_len: Length of SKB fragment
- * @unmap_addr: DMA address of SKB fragment
- * @dma_flags: TX buffer flags for DMA mapping - %EFX_TX_BUF_MAP_SINGLE or 0
- * @protocol: Network protocol (after any VLAN header)
- * @ip_off: Offset of IP header
- * @tcp_off: Offset of TCP header
- * @header_len: Number of bytes of header
- * @ip_base_len: IPv4 tot_len or IPv6 payload_len, before TCP payload
- * @header_dma_addr: Header DMA address, when using option descriptors
- * @header_unmap_len: Header DMA mapped length, or 0 if not using option
- *     descriptors
- *
- * The state used during segmentation.  It is put into this data structure
- * just to make it easy to pass into inline functions.
- */
-struct tso_state {
-       /* Output position */
-       unsigned out_len;
-       unsigned seqnum;
-       u16 ipv4_id;
-       unsigned packet_space;
-
-       /* Input position */
-       dma_addr_t dma_addr;
-       unsigned in_len;
-       unsigned unmap_len;
-       dma_addr_t unmap_addr;
-       unsigned short dma_flags;
-
-       __be16 protocol;
-       unsigned int ip_off;
-       unsigned int tcp_off;
-       unsigned header_len;
-       unsigned int ip_base_len;
-       dma_addr_t header_dma_addr;
-       unsigned int header_unmap_len;
-};
-
-
-/*
- * Verify that our various assumptions about sk_buffs and the conditions
- * under which TSO will be attempted hold true.  Return the protocol number.
- */
-static __be16 efx_tso_check_protocol(struct sk_buff *skb)
-{
-       __be16 protocol = skb->protocol;
-
-       EFX_BUG_ON_PARANOID(((struct ethhdr *)skb->data)->h_proto !=
-                           protocol);
-       if (protocol == htons(ETH_P_8021Q)) {
-               struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
-               protocol = veh->h_vlan_encapsulated_proto;
-       }
-
-       if (protocol == htons(ETH_P_IP)) {
-               EFX_BUG_ON_PARANOID(ip_hdr(skb)->protocol != IPPROTO_TCP);
-       } else {
-               EFX_BUG_ON_PARANOID(protocol != htons(ETH_P_IPV6));
-               EFX_BUG_ON_PARANOID(ipv6_hdr(skb)->nexthdr != NEXTHDR_TCP);
-       }
-       EFX_BUG_ON_PARANOID((PTR_DIFF(tcp_hdr(skb), skb->data)
-                            + (tcp_hdr(skb)->doff << 2u)) >
-                           skb_headlen(skb));
-
-       return protocol;
-}
-
-static u8 *efx_tsoh_get_buffer(struct efx_tx_queue *tx_queue,
-                              struct efx_tx_buffer *buffer, unsigned int len)
-{
-       u8 *result;
-
-       EFX_BUG_ON_PARANOID(buffer->len);
-       EFX_BUG_ON_PARANOID(buffer->flags);
-       EFX_BUG_ON_PARANOID(buffer->unmap_len);
-
-       if (likely(len <= TSOH_STD_SIZE - NET_IP_ALIGN)) {
-               unsigned index =
-                       (tx_queue->insert_count & tx_queue->ptr_mask) / 2;
-               struct efx_buffer *page_buf =
-                       &tx_queue->tsoh_page[index / TSOH_PER_PAGE];
-               unsigned offset =
-                       TSOH_STD_SIZE * (index % TSOH_PER_PAGE) + NET_IP_ALIGN;
-
-               if (unlikely(!page_buf->addr) &&
-                   efx_nic_alloc_buffer(tx_queue->efx, page_buf, PAGE_SIZE,
-                                        GFP_ATOMIC))
-                       return NULL;
-
-               result = (u8 *)page_buf->addr + offset;
-               buffer->dma_addr = page_buf->dma_addr + offset;
-               buffer->flags = EFX_TX_BUF_CONT;
-       } else {
-               tx_queue->tso_long_headers++;
-
-               buffer->heap_buf = kmalloc(NET_IP_ALIGN + len, GFP_ATOMIC);
-               if (unlikely(!buffer->heap_buf))
-                       return NULL;
-               result = (u8 *)buffer->heap_buf + NET_IP_ALIGN;
-               buffer->flags = EFX_TX_BUF_CONT | EFX_TX_BUF_HEAP;
-       }
-
-       buffer->len = len;
-
-       return result;
-}
-
-/**
- * efx_tx_queue_insert - push descriptors onto the TX queue
- * @tx_queue:          Efx TX queue
- * @dma_addr:          DMA address of fragment
- * @len:               Length of fragment
- * @final_buffer:      The final buffer inserted into the queue
- *
- * Push descriptors onto the TX queue.
- */
-static void efx_tx_queue_insert(struct efx_tx_queue *tx_queue,
-                               dma_addr_t dma_addr, unsigned len,
-                               struct efx_tx_buffer **final_buffer)
-{
-       struct efx_tx_buffer *buffer;
-       struct efx_nic *efx = tx_queue->efx;
-       unsigned dma_len;
-
-       EFX_BUG_ON_PARANOID(len <= 0);
-
-       while (1) {
-               buffer = efx_tx_queue_get_insert_buffer(tx_queue);
-               ++tx_queue->insert_count;
-
-               EFX_BUG_ON_PARANOID(tx_queue->insert_count -
-                                   tx_queue->read_count >=
-                                   efx->txq_entries);
-
-               buffer->dma_addr = dma_addr;
-
-               dma_len = efx_max_tx_len(efx, dma_addr);
-
-               /* If there is enough space to send then do so */
-               if (dma_len >= len)
-                       break;
-
-               buffer->len = dma_len;
-               buffer->flags = EFX_TX_BUF_CONT;
-               dma_addr += dma_len;
-               len -= dma_len;
-       }
-
-       EFX_BUG_ON_PARANOID(!len);
-       buffer->len = len;
-       *final_buffer = buffer;
-}
-
-
-/*
- * Put a TSO header into the TX queue.
- *
- * This is special-cased because we know that it is small enough to fit in
- * a single fragment, and we know it doesn't cross a page boundary.  It
- * also allows us to not worry about end-of-packet etc.
- */
-static int efx_tso_put_header(struct efx_tx_queue *tx_queue,
-                             struct efx_tx_buffer *buffer, u8 *header)
-{
-       if (unlikely(buffer->flags & EFX_TX_BUF_HEAP)) {
-               buffer->dma_addr = dma_map_single(&tx_queue->efx->pci_dev->dev,
-                                                 header, buffer->len,
-                                                 DMA_TO_DEVICE);
-               if (unlikely(dma_mapping_error(&tx_queue->efx->pci_dev->dev,
-                                              buffer->dma_addr))) {
-                       kfree(buffer->heap_buf);
-                       buffer->len = 0;
-                       buffer->flags = 0;
-                       return -ENOMEM;
-               }
-               buffer->unmap_len = buffer->len;
-               buffer->dma_offset = 0;
-               buffer->flags |= EFX_TX_BUF_MAP_SINGLE;
-       }
-
-       ++tx_queue->insert_count;
-       return 0;
-}
-
-
-/* Remove buffers put into a tx_queue.  None of the buffers must have
- * an skb attached.
- */
-static void efx_enqueue_unwind(struct efx_tx_queue *tx_queue,
-                              unsigned int insert_count)
-{
-       struct efx_tx_buffer *buffer;
-
-       /* Work backwards until we hit the original insert pointer value */
-       while (tx_queue->insert_count != insert_count) {
-               --tx_queue->insert_count;
-               buffer = __efx_tx_queue_get_insert_buffer(tx_queue);
-               efx_dequeue_buffer(tx_queue, buffer, NULL, NULL);
-       }
-}
-
-
-/* Parse the SKB header and initialise state. */
-static int tso_start(struct tso_state *st, struct efx_nic *efx,
-                    struct efx_tx_queue *tx_queue,
-                    const struct sk_buff *skb)
-{
-       struct device *dma_dev = &efx->pci_dev->dev;
-       unsigned int header_len, in_len;
-       bool use_opt_desc = false;
-       dma_addr_t dma_addr;
-
-       if (tx_queue->tso_version == 1)
-               use_opt_desc = true;
-
-       st->ip_off = skb_network_header(skb) - skb->data;
-       st->tcp_off = skb_transport_header(skb) - skb->data;
-       header_len = st->tcp_off + (tcp_hdr(skb)->doff << 2u);
-       in_len = skb_headlen(skb) - header_len;
-       st->header_len = header_len;
-       st->in_len = in_len;
-       if (st->protocol == htons(ETH_P_IP)) {
-               st->ip_base_len = st->header_len - st->ip_off;
-               st->ipv4_id = ntohs(ip_hdr(skb)->id);
-       } else {
-               st->ip_base_len = st->header_len - st->tcp_off;
-               st->ipv4_id = 0;
-       }
-       st->seqnum = ntohl(tcp_hdr(skb)->seq);
-
-       EFX_BUG_ON_PARANOID(tcp_hdr(skb)->urg);
-       EFX_BUG_ON_PARANOID(tcp_hdr(skb)->syn);
-       EFX_BUG_ON_PARANOID(tcp_hdr(skb)->rst);
-
-       st->out_len = skb->len - header_len;
-
-       if (!use_opt_desc) {
-               st->header_unmap_len = 0;
-
-               if (likely(in_len == 0)) {
-                       st->dma_flags = 0;
-                       st->unmap_len = 0;
-                       return 0;
-               }
-
-               dma_addr = dma_map_single(dma_dev, skb->data + header_len,
-                                         in_len, DMA_TO_DEVICE);
-               st->dma_flags = EFX_TX_BUF_MAP_SINGLE;
-               st->dma_addr = dma_addr;
-               st->unmap_addr = dma_addr;
-               st->unmap_len = in_len;
-       } else {
-               dma_addr = dma_map_single(dma_dev, skb->data,
-                                         skb_headlen(skb), DMA_TO_DEVICE);
-               st->header_dma_addr = dma_addr;
-               st->header_unmap_len = skb_headlen(skb);
-               st->dma_flags = 0;
-               st->dma_addr = dma_addr + header_len;
-               st->unmap_len = 0;
-       }
-
-       return unlikely(dma_mapping_error(dma_dev, dma_addr)) ? -ENOMEM : 0;
-}
-
-static int tso_get_fragment(struct tso_state *st, struct efx_nic *efx,
-                           skb_frag_t *frag)
-{
-       st->unmap_addr = skb_frag_dma_map(&efx->pci_dev->dev, frag, 0,
-                                         skb_frag_size(frag), DMA_TO_DEVICE);
-       if (likely(!dma_mapping_error(&efx->pci_dev->dev, st->unmap_addr))) {
-               st->dma_flags = 0;
-               st->unmap_len = skb_frag_size(frag);
-               st->in_len = skb_frag_size(frag);
-               st->dma_addr = st->unmap_addr;
-               return 0;
-       }
-       return -ENOMEM;
-}
-
-
-/**
- * tso_fill_packet_with_fragment - form descriptors for the current fragment
- * @tx_queue:          Efx TX queue
- * @skb:               Socket buffer
- * @st:                        TSO state
- *
- * Form descriptors for the current fragment, until we reach the end
- * of fragment or end-of-packet.
- */
-static void tso_fill_packet_with_fragment(struct efx_tx_queue *tx_queue,
-                                         const struct sk_buff *skb,
-                                         struct tso_state *st)
-{
-       struct efx_tx_buffer *buffer;
-       int n;
-
-       if (st->in_len == 0)
-               return;
-       if (st->packet_space == 0)
-               return;
-
-       EFX_BUG_ON_PARANOID(st->in_len <= 0);
-       EFX_BUG_ON_PARANOID(st->packet_space <= 0);
-
-       n = min(st->in_len, st->packet_space);
-
-       st->packet_space -= n;
-       st->out_len -= n;
-       st->in_len -= n;
-
-       efx_tx_queue_insert(tx_queue, st->dma_addr, n, &buffer);
-
-       if (st->out_len == 0) {
-               /* Transfer ownership of the skb */
-               buffer->skb = skb;
-               buffer->flags = EFX_TX_BUF_SKB;
-       } else if (st->packet_space != 0) {
-               buffer->flags = EFX_TX_BUF_CONT;
-       }
-
-       if (st->in_len == 0) {
-               /* Transfer ownership of the DMA mapping */
-               buffer->unmap_len = st->unmap_len;
-               buffer->dma_offset = buffer->unmap_len - buffer->len;
-               buffer->flags |= st->dma_flags;
-               st->unmap_len = 0;
-       }
-
-       st->dma_addr += n;
-}
-
-
-/**
- * tso_start_new_packet - generate a new header and prepare for the new packet
- * @tx_queue:          Efx TX queue
- * @skb:               Socket buffer
- * @st:                        TSO state
- *
- * Generate a new header and prepare for the new packet.  Return 0 on
- * success, or -%ENOMEM if failed to alloc header.
- */
-static int tso_start_new_packet(struct efx_tx_queue *tx_queue,
-                               const struct sk_buff *skb,
-                               struct tso_state *st)
-{
-       struct efx_tx_buffer *buffer =
-               efx_tx_queue_get_insert_buffer(tx_queue);
-       bool is_last = st->out_len <= skb_shinfo(skb)->gso_size;
-       u8 tcp_flags_clear;
-
-       if (!is_last) {
-               st->packet_space = skb_shinfo(skb)->gso_size;
-               tcp_flags_clear = 0x09; /* mask out FIN and PSH */
-       } else {
-               st->packet_space = st->out_len;
-               tcp_flags_clear = 0x00;
-       }
-
-       if (!st->header_unmap_len) {
-               /* Allocate and insert a DMA-mapped header buffer. */
-               struct tcphdr *tsoh_th;
-               unsigned ip_length;
-               u8 *header;
-               int rc;
-
-               header = efx_tsoh_get_buffer(tx_queue, buffer, st->header_len);
-               if (!header)
-                       return -ENOMEM;
-
-               tsoh_th = (struct tcphdr *)(header + st->tcp_off);
-
-               /* Copy and update the headers. */
-               memcpy(header, skb->data, st->header_len);
-
-               tsoh_th->seq = htonl(st->seqnum);
-               ((u8 *)tsoh_th)[13] &= ~tcp_flags_clear;
-
-               ip_length = st->ip_base_len + st->packet_space;
-
-               if (st->protocol == htons(ETH_P_IP)) {
-                       struct iphdr *tsoh_iph =
-                               (struct iphdr *)(header + st->ip_off);
-
-                       tsoh_iph->tot_len = htons(ip_length);
-                       tsoh_iph->id = htons(st->ipv4_id);
-               } else {
-                       struct ipv6hdr *tsoh_iph =
-                               (struct ipv6hdr *)(header + st->ip_off);
-
-                       tsoh_iph->payload_len = htons(ip_length);
-               }
-
-               rc = efx_tso_put_header(tx_queue, buffer, header);
-               if (unlikely(rc))
-                       return rc;
-       } else {
-               /* Send the original headers with a TSO option descriptor
-                * in front
-                */
-               u8 tcp_flags = ((u8 *)tcp_hdr(skb))[13] & ~tcp_flags_clear;
-
-               buffer->flags = EFX_TX_BUF_OPTION;
-               buffer->len = 0;
-               buffer->unmap_len = 0;
-               EFX_POPULATE_QWORD_5(buffer->option,
-                                    ESF_DZ_TX_DESC_IS_OPT, 1,
-                                    ESF_DZ_TX_OPTION_TYPE,
-                                    ESE_DZ_TX_OPTION_DESC_TSO,
-                                    ESF_DZ_TX_TSO_TCP_FLAGS, tcp_flags,
-                                    ESF_DZ_TX_TSO_IP_ID, st->ipv4_id,
-                                    ESF_DZ_TX_TSO_TCP_SEQNO, st->seqnum);
-               ++tx_queue->insert_count;
-
-               /* We mapped the headers in tso_start().  Unmap them
-                * when the last segment is completed.
-                */
-               buffer = efx_tx_queue_get_insert_buffer(tx_queue);
-               buffer->dma_addr = st->header_dma_addr;
-               buffer->len = st->header_len;
-               if (is_last) {
-                       buffer->flags = EFX_TX_BUF_CONT | EFX_TX_BUF_MAP_SINGLE;
-                       buffer->unmap_len = st->header_unmap_len;
-                       buffer->dma_offset = 0;
-                       /* Ensure we only unmap them once in case of a
-                        * later DMA mapping error and rollback
-                        */
-                       st->header_unmap_len = 0;
-               } else {
-                       buffer->flags = EFX_TX_BUF_CONT;
-                       buffer->unmap_len = 0;
-               }
-               ++tx_queue->insert_count;
-       }
-
-       st->seqnum += skb_shinfo(skb)->gso_size;
-
-       /* Linux leaves suitable gaps in the IP ID space for us to fill. */
-       ++st->ipv4_id;
-
-       ++tx_queue->tso_packets;
-
-       ++tx_queue->tx_packets;
-
-       return 0;
-}
-
-
-/**
- * efx_enqueue_skb_tso - segment and transmit a TSO socket buffer
- * @tx_queue:          Efx TX queue
- * @skb:               Socket buffer
- *
- * Context: You must hold netif_tx_lock() to call this function.
- *
- * Add socket buffer @skb to @tx_queue, doing TSO or return != 0 if
- * @skb was not enqueued.  In all cases @skb is consumed.  Return
- * %NETDEV_TX_OK.
- */
-static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue,
-                              struct sk_buff *skb)
-{
-       struct efx_nic *efx = tx_queue->efx;
-       unsigned int old_insert_count = tx_queue->insert_count;
-       int frag_i, rc;
-       struct tso_state state;
-
-       /* Find the packet protocol and sanity-check it */
-       state.protocol = efx_tso_check_protocol(skb);
-
-       rc = tso_start(&state, efx, tx_queue, skb);
-       if (rc)
-               goto mem_err;
-
-       if (likely(state.in_len == 0)) {
-               /* Grab the first payload fragment. */
-               EFX_BUG_ON_PARANOID(skb_shinfo(skb)->nr_frags < 1);
-               frag_i = 0;
-               rc = tso_get_fragment(&state, efx,
-                                     skb_shinfo(skb)->frags + frag_i);
-               if (rc)
-                       goto mem_err;
-       } else {
-               /* Payload starts in the header area. */
-               frag_i = -1;
-       }
-
-       if (tso_start_new_packet(tx_queue, skb, &state) < 0)
-               goto mem_err;
-
-       while (1) {
-               tso_fill_packet_with_fragment(tx_queue, skb, &state);
-
-               /* Move onto the next fragment? */
-               if (state.in_len == 0) {
-                       if (++frag_i >= skb_shinfo(skb)->nr_frags)
-                               /* End of payload reached. */
-                               break;
-                       rc = tso_get_fragment(&state, efx,
-                                             skb_shinfo(skb)->frags + frag_i);
-                       if (rc)
-                               goto mem_err;
-               }
-
-               /* Start at new packet? */
-               if (state.packet_space == 0 &&
-                   tso_start_new_packet(tx_queue, skb, &state) < 0)
-                       goto mem_err;
-       }
-
-       netdev_tx_sent_queue(tx_queue->core_txq, skb->len);
-
-       efx_tx_maybe_stop_queue(tx_queue);
-
-       /* Pass off to hardware */
-       if (!skb->xmit_more || netif_xmit_stopped(tx_queue->core_txq)) {
-               struct efx_tx_queue *txq2 = efx_tx_queue_partner(tx_queue);
-
-               /* There could be packets left on the partner queue if those
-                * SKBs had skb->xmit_more set. If we do not push those they
-                * could be left for a long time and cause a netdev watchdog.
-                */
-               if (txq2->xmit_more_available)
-                       efx_nic_push_buffers(txq2);
-
-               efx_nic_push_buffers(tx_queue);
-       } else {
-               tx_queue->xmit_more_available = skb->xmit_more;
-       }
-
-       tx_queue->tso_bursts++;
-       return NETDEV_TX_OK;
-
- mem_err:
-       netif_err(efx, tx_err, efx->net_dev,
-                 "Out of memory for TSO headers, or DMA mapping error\n");
-       dev_kfree_skb_any(skb);
-
-       /* Free the DMA mapping we were in the process of writing out */
-       if (state.unmap_len) {
-               if (state.dma_flags & EFX_TX_BUF_MAP_SINGLE)
-                       dma_unmap_single(&efx->pci_dev->dev, state.unmap_addr,
-                                        state.unmap_len, DMA_TO_DEVICE);
-               else
-                       dma_unmap_page(&efx->pci_dev->dev, state.unmap_addr,
-                                      state.unmap_len, DMA_TO_DEVICE);
-       }
-
-       /* Free the header DMA mapping, if using option descriptors */
-       if (state.header_unmap_len)
-               dma_unmap_single(&efx->pci_dev->dev, state.header_dma_addr,
-                                state.header_unmap_len, DMA_TO_DEVICE);
-
-       efx_enqueue_unwind(tx_queue, old_insert_count);
-       return NETDEV_TX_OK;
-}
diff --git a/drivers/net/ethernet/sfc/tx.h b/drivers/net/ethernet/sfc/tx.h
new file mode 100644 (file)
index 0000000..1cccc97
--- /dev/null
@@ -0,0 +1,27 @@
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2005-2006 Fen Systems Ltd.
+ * Copyright 2006-2015 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#ifndef EFX_TX_H
+#define EFX_TX_H
+
+#include <linux/types.h>
+
+/* Driver internal tx-path related declarations. */
+
+unsigned int efx_tx_limit_len(struct efx_tx_queue *tx_queue,
+                             dma_addr_t dma_addr, unsigned int len);
+
+u8 *efx_tx_get_copy_buffer_limited(struct efx_tx_queue *tx_queue,
+                                  struct efx_tx_buffer *buffer, size_t len);
+
+int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, struct sk_buff *skb,
+                       bool *data_mapped);
+
+#endif /* EFX_TX_H */
diff --git a/drivers/net/ethernet/sfc/tx_tso.c b/drivers/net/ethernet/sfc/tx_tso.c
new file mode 100644 (file)
index 0000000..6032887
--- /dev/null
@@ -0,0 +1,452 @@
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2005-2006 Fen Systems Ltd.
+ * Copyright 2005-2015 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#include <linux/pci.h>
+#include <linux/tcp.h>
+#include <linux/ip.h>
+#include <linux/in.h>
+#include <linux/ipv6.h>
+#include <linux/slab.h>
+#include <net/ipv6.h>
+#include <linux/if_ether.h>
+#include <linux/highmem.h>
+#include <linux/moduleparam.h>
+#include <linux/cache.h>
+#include "net_driver.h"
+#include "efx.h"
+#include "io.h"
+#include "nic.h"
+#include "tx.h"
+#include "workarounds.h"
+#include "ef10_regs.h"
+
+/* Efx legacy TCP segmentation acceleration.
+ *
+ * Utilises firmware support to go faster than GSO (but not as fast as TSOv2).
+ *
+ * Requires TX checksum offload support.
+ */
+
+#define PTR_DIFF(p1, p2)  ((u8 *)(p1) - (u8 *)(p2))
+
+/**
+ * struct tso_state - TSO state for an SKB
+ * @out_len: Remaining length in current segment
+ * @seqnum: Current sequence number
+ * @ipv4_id: Current IPv4 ID, host endian
+ * @packet_space: Remaining space in current packet
+ * @dma_addr: DMA address of current position
+ * @in_len: Remaining length in current SKB fragment
+ * @unmap_len: Length of SKB fragment
+ * @unmap_addr: DMA address of SKB fragment
+ * @protocol: Network protocol (after any VLAN header)
+ * @ip_off: Offset of IP header
+ * @tcp_off: Offset of TCP header
+ * @header_len: Number of bytes of header
+ * @ip_base_len: IPv4 tot_len or IPv6 payload_len, before TCP payload
+ * @header_dma_addr: Header DMA address
+ * @header_unmap_len: Header DMA mapped length
+ *
+ * The state used during segmentation.  It is put into this data structure
+ * just to make it easy to pass into inline functions.
+ */
+struct tso_state {
+       /* Output position */
+       unsigned int out_len;
+       unsigned int seqnum;
+       u16 ipv4_id;
+       unsigned int packet_space;
+
+       /* Input position */
+       dma_addr_t dma_addr;
+       unsigned int in_len;
+       unsigned int unmap_len;
+       dma_addr_t unmap_addr;
+
+       __be16 protocol;
+       unsigned int ip_off;
+       unsigned int tcp_off;
+       unsigned int header_len;
+       unsigned int ip_base_len;
+       dma_addr_t header_dma_addr;
+       unsigned int header_unmap_len;
+};
+
+static inline void prefetch_ptr(struct efx_tx_queue *tx_queue)
+{
+       unsigned int insert_ptr = efx_tx_queue_get_insert_index(tx_queue);
+       char *ptr;
+
+       ptr = (char *) (tx_queue->buffer + insert_ptr);
+       prefetch(ptr);
+       prefetch(ptr + 0x80);
+
+       ptr = (char *) (((efx_qword_t *)tx_queue->txd.buf.addr) + insert_ptr);
+       prefetch(ptr);
+       prefetch(ptr + 0x80);
+}
+
+/**
+ * efx_tx_queue_insert - push descriptors onto the TX queue
+ * @tx_queue:          Efx TX queue
+ * @dma_addr:          DMA address of fragment
+ * @len:               Length of fragment
+ * @final_buffer:      The final buffer inserted into the queue
+ *
+ * Push descriptors onto the TX queue.
+ */
+static void efx_tx_queue_insert(struct efx_tx_queue *tx_queue,
+                               dma_addr_t dma_addr, unsigned int len,
+                               struct efx_tx_buffer **final_buffer)
+{
+       struct efx_tx_buffer *buffer;
+       unsigned int dma_len;
+
+       EFX_BUG_ON_PARANOID(len <= 0);
+
+       while (1) {
+               buffer = efx_tx_queue_get_insert_buffer(tx_queue);
+               ++tx_queue->insert_count;
+
+               EFX_BUG_ON_PARANOID(tx_queue->insert_count -
+                                   tx_queue->read_count >=
+                                   tx_queue->efx->txq_entries);
+
+               buffer->dma_addr = dma_addr;
+
+               dma_len = tx_queue->efx->type->tx_limit_len(tx_queue,
+                               dma_addr, len);
+
+               /* If there's space for everything this is our last buffer. */
+               if (dma_len >= len)
+                       break;
+
+               buffer->len = dma_len;
+               buffer->flags = EFX_TX_BUF_CONT;
+               dma_addr += dma_len;
+               len -= dma_len;
+       }
+
+       EFX_BUG_ON_PARANOID(!len);
+       buffer->len = len;
+       *final_buffer = buffer;
+}
+
+/*
+ * Verify that our various assumptions about sk_buffs and the conditions
+ * under which TSO will be attempted hold true.  Return the protocol number.
+ */
+static __be16 efx_tso_check_protocol(struct sk_buff *skb)
+{
+       __be16 protocol = skb->protocol;
+
+       EFX_BUG_ON_PARANOID(((struct ethhdr *)skb->data)->h_proto !=
+                           protocol);
+       if (protocol == htons(ETH_P_8021Q)) {
+               struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
+
+               protocol = veh->h_vlan_encapsulated_proto;
+       }
+
+       if (protocol == htons(ETH_P_IP)) {
+               EFX_BUG_ON_PARANOID(ip_hdr(skb)->protocol != IPPROTO_TCP);
+       } else {
+               EFX_BUG_ON_PARANOID(protocol != htons(ETH_P_IPV6));
+               EFX_BUG_ON_PARANOID(ipv6_hdr(skb)->nexthdr != NEXTHDR_TCP);
+       }
+       EFX_BUG_ON_PARANOID((PTR_DIFF(tcp_hdr(skb), skb->data)
+                            + (tcp_hdr(skb)->doff << 2u)) >
+                           skb_headlen(skb));
+
+       return protocol;
+}
+
+
+/* Parse the SKB header and initialise state. */
+static int tso_start(struct tso_state *st, struct efx_nic *efx,
+                    struct efx_tx_queue *tx_queue,
+                    const struct sk_buff *skb)
+{
+       struct device *dma_dev = &efx->pci_dev->dev;
+       unsigned int header_len, in_len;
+       dma_addr_t dma_addr;
+
+       st->ip_off = skb_network_header(skb) - skb->data;
+       st->tcp_off = skb_transport_header(skb) - skb->data;
+       header_len = st->tcp_off + (tcp_hdr(skb)->doff << 2u);
+       in_len = skb_headlen(skb) - header_len;
+       st->header_len = header_len;
+       st->in_len = in_len;
+       if (st->protocol == htons(ETH_P_IP)) {
+               st->ip_base_len = st->header_len - st->ip_off;
+               st->ipv4_id = ntohs(ip_hdr(skb)->id);
+       } else {
+               st->ip_base_len = st->header_len - st->tcp_off;
+               st->ipv4_id = 0;
+       }
+       st->seqnum = ntohl(tcp_hdr(skb)->seq);
+
+       EFX_BUG_ON_PARANOID(tcp_hdr(skb)->urg);
+       EFX_BUG_ON_PARANOID(tcp_hdr(skb)->syn);
+       EFX_BUG_ON_PARANOID(tcp_hdr(skb)->rst);
+
+       st->out_len = skb->len - header_len;
+
+       dma_addr = dma_map_single(dma_dev, skb->data,
+                                 skb_headlen(skb), DMA_TO_DEVICE);
+       st->header_dma_addr = dma_addr;
+       st->header_unmap_len = skb_headlen(skb);
+       st->dma_addr = dma_addr + header_len;
+       st->unmap_len = 0;
+
+       return unlikely(dma_mapping_error(dma_dev, dma_addr)) ? -ENOMEM : 0;
+}
+
+static int tso_get_fragment(struct tso_state *st, struct efx_nic *efx,
+                           skb_frag_t *frag)
+{
+       st->unmap_addr = skb_frag_dma_map(&efx->pci_dev->dev, frag, 0,
+                                         skb_frag_size(frag), DMA_TO_DEVICE);
+       if (likely(!dma_mapping_error(&efx->pci_dev->dev, st->unmap_addr))) {
+               st->unmap_len = skb_frag_size(frag);
+               st->in_len = skb_frag_size(frag);
+               st->dma_addr = st->unmap_addr;
+               return 0;
+       }
+       return -ENOMEM;
+}
+
+
+/**
+ * tso_fill_packet_with_fragment - form descriptors for the current fragment
+ * @tx_queue:          Efx TX queue
+ * @skb:               Socket buffer
+ * @st:                        TSO state
+ *
+ * Form descriptors for the current fragment, until we reach the end
+ * of fragment or end-of-packet.
+ */
+static void tso_fill_packet_with_fragment(struct efx_tx_queue *tx_queue,
+                                         const struct sk_buff *skb,
+                                         struct tso_state *st)
+{
+       struct efx_tx_buffer *buffer;
+       int n;
+
+       if (st->in_len == 0)
+               return;
+       if (st->packet_space == 0)
+               return;
+
+       EFX_BUG_ON_PARANOID(st->in_len <= 0);
+       EFX_BUG_ON_PARANOID(st->packet_space <= 0);
+
+       n = min(st->in_len, st->packet_space);
+
+       st->packet_space -= n;
+       st->out_len -= n;
+       st->in_len -= n;
+
+       efx_tx_queue_insert(tx_queue, st->dma_addr, n, &buffer);
+
+       if (st->out_len == 0) {
+               /* Transfer ownership of the skb */
+               buffer->skb = skb;
+               buffer->flags = EFX_TX_BUF_SKB;
+       } else if (st->packet_space != 0) {
+               buffer->flags = EFX_TX_BUF_CONT;
+       }
+
+       if (st->in_len == 0) {
+               /* Transfer ownership of the DMA mapping */
+               buffer->unmap_len = st->unmap_len;
+               buffer->dma_offset = buffer->unmap_len - buffer->len;
+               st->unmap_len = 0;
+       }
+
+       st->dma_addr += n;
+}
+
+
+#define TCP_FLAGS_OFFSET 13
+
+/**
+ * tso_start_new_packet - generate a new header and prepare for the new packet
+ * @tx_queue:          Efx TX queue
+ * @skb:               Socket buffer
+ * @st:                        TSO state
+ *
+ * Generate a new header and prepare for the new packet.  Return 0 on
+ * success, or -%ENOMEM if failed to alloc header, or other negative error.
+ */
+static int tso_start_new_packet(struct efx_tx_queue *tx_queue,
+                               const struct sk_buff *skb,
+                               struct tso_state *st)
+{
+       struct efx_tx_buffer *buffer =
+               efx_tx_queue_get_insert_buffer(tx_queue);
+       bool is_last = st->out_len <= skb_shinfo(skb)->gso_size;
+       u8 tcp_flags_mask, tcp_flags;
+
+       if (!is_last) {
+               st->packet_space = skb_shinfo(skb)->gso_size;
+               tcp_flags_mask = 0x09; /* mask out FIN and PSH */
+       } else {
+               st->packet_space = st->out_len;
+               tcp_flags_mask = 0x00;
+       }
+
+       if (WARN_ON(!st->header_unmap_len))
+               return -EINVAL;
+       /* Send the original headers with a TSO option descriptor
+        * in front
+        */
+       tcp_flags = ((u8 *)tcp_hdr(skb))[TCP_FLAGS_OFFSET] & ~tcp_flags_mask;
+
+       buffer->flags = EFX_TX_BUF_OPTION;
+       buffer->len = 0;
+       buffer->unmap_len = 0;
+       EFX_POPULATE_QWORD_5(buffer->option,
+                            ESF_DZ_TX_DESC_IS_OPT, 1,
+                            ESF_DZ_TX_OPTION_TYPE,
+                            ESE_DZ_TX_OPTION_DESC_TSO,
+                            ESF_DZ_TX_TSO_TCP_FLAGS, tcp_flags,
+                            ESF_DZ_TX_TSO_IP_ID, st->ipv4_id,
+                            ESF_DZ_TX_TSO_TCP_SEQNO, st->seqnum);
+       ++tx_queue->insert_count;
+
+       /* We mapped the headers in tso_start().  Unmap them
+        * when the last segment is completed.
+        */
+       buffer = efx_tx_queue_get_insert_buffer(tx_queue);
+       buffer->dma_addr = st->header_dma_addr;
+       buffer->len = st->header_len;
+       if (is_last) {
+               buffer->flags = EFX_TX_BUF_CONT | EFX_TX_BUF_MAP_SINGLE;
+               buffer->unmap_len = st->header_unmap_len;
+               buffer->dma_offset = 0;
+               /* Ensure we only unmap them once in case of a
+                * later DMA mapping error and rollback
+                */
+               st->header_unmap_len = 0;
+       } else {
+               buffer->flags = EFX_TX_BUF_CONT;
+               buffer->unmap_len = 0;
+       }
+       ++tx_queue->insert_count;
+
+       st->seqnum += skb_shinfo(skb)->gso_size;
+
+       /* Linux leaves suitable gaps in the IP ID space for us to fill. */
+       ++st->ipv4_id;
+
+       return 0;
+}
+
+/**
+ * efx_enqueue_skb_tso - segment and transmit a TSO socket buffer
+ * @tx_queue:          Efx TX queue
+ * @skb:               Socket buffer
+ * @data_mapped:        Did we map the data? Always set to true
+ *                      by this on success.
+ *
+ * Context: You must hold netif_tx_lock() to call this function.
+ *
+ * Add socket buffer @skb to @tx_queue, doing TSO or return != 0 if
+ * @skb was not enqueued.  @skb is consumed unless return value is
+ * %EINVAL.
+ */
+int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue,
+                       struct sk_buff *skb,
+                       bool *data_mapped)
+{
+       struct efx_nic *efx = tx_queue->efx;
+       int frag_i, rc;
+       struct tso_state state;
+
+       if (tx_queue->tso_version != 1)
+               return -EINVAL;
+
+       prefetch(skb->data);
+
+       /* Find the packet protocol and sanity-check it */
+       state.protocol = efx_tso_check_protocol(skb);
+
+       EFX_BUG_ON_PARANOID(tx_queue->write_count != tx_queue->insert_count);
+
+       rc = tso_start(&state, efx, tx_queue, skb);
+       if (rc)
+               goto fail;
+
+       if (likely(state.in_len == 0)) {
+               /* Grab the first payload fragment. */
+               EFX_BUG_ON_PARANOID(skb_shinfo(skb)->nr_frags < 1);
+               frag_i = 0;
+               rc = tso_get_fragment(&state, efx,
+                                     skb_shinfo(skb)->frags + frag_i);
+               if (rc)
+                       goto fail;
+       } else {
+               /* Payload starts in the header area. */
+               frag_i = -1;
+       }
+
+       rc = tso_start_new_packet(tx_queue, skb, &state);
+       if (rc)
+               goto fail;
+
+       prefetch_ptr(tx_queue);
+
+       while (1) {
+               tso_fill_packet_with_fragment(tx_queue, skb, &state);
+
+               /* Move onto the next fragment? */
+               if (state.in_len == 0) {
+                       if (++frag_i >= skb_shinfo(skb)->nr_frags)
+                               /* End of payload reached. */
+                               break;
+                       rc = tso_get_fragment(&state, efx,
+                                             skb_shinfo(skb)->frags + frag_i);
+                       if (rc)
+                               goto fail;
+               }
+
+               /* Start at new packet? */
+               if (state.packet_space == 0) {
+                       rc = tso_start_new_packet(tx_queue, skb, &state);
+                       if (rc)
+                               goto fail;
+               }
+       }
+
+       *data_mapped = true;
+
+       return 0;
+
+fail:
+       if (rc == -ENOMEM)
+               netif_err(efx, tx_err, efx->net_dev,
+                         "Out of memory for TSO headers, or DMA mapping error\n");
+       else
+               netif_err(efx, tx_err, efx->net_dev, "TSO failed, rc = %d\n", rc);
+
+       /* Free the DMA mapping we were in the process of writing out */
+       if (state.unmap_len) {
+               dma_unmap_page(&efx->pci_dev->dev, state.unmap_addr,
+                              state.unmap_len, DMA_TO_DEVICE);
+       }
+
+       /* Free the header DMA mapping */
+       if (state.header_unmap_len)
+               dma_unmap_single(&efx->pci_dev->dev, state.header_dma_addr,
+                                state.header_unmap_len, DMA_TO_DEVICE);
+
+       return rc;
+}
diff --git a/drivers/net/ethernet/sfc/txc43128_phy.c b/drivers/net/ethernet/sfc/txc43128_phy.c
deleted file mode 100644 (file)
index 194f67d..0000000
+++ /dev/null
@@ -1,560 +0,0 @@
-/****************************************************************************
- * Driver for Solarflare network controllers and boards
- * Copyright 2006-2011 Solarflare Communications Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation, incorporated herein by reference.
- */
-
-/*
- * Driver for Transwitch/Mysticom CX4 retimer
- * see www.transwitch.com, part is TXC-43128
- */
-
-#include <linux/delay.h>
-#include <linux/slab.h>
-#include "efx.h"
-#include "mdio_10g.h"
-#include "phy.h"
-#include "nic.h"
-
-/* We expect these MMDs to be in the package */
-#define TXC_REQUIRED_DEVS (MDIO_DEVS_PCS |     \
-                          MDIO_DEVS_PMAPMD |   \
-                          MDIO_DEVS_PHYXS)
-
-#define TXC_LOOPBACKS ((1 << LOOPBACK_PCS) |   \
-                      (1 << LOOPBACK_PMAPMD) | \
-                      (1 << LOOPBACK_PHYXS_WS))
-
-/**************************************************************************
- *
- * Compile-time config
- *
- **************************************************************************
- */
-#define TXCNAME "TXC43128"
-/* Total length of time we'll wait for the PHY to come out of reset (ms) */
-#define TXC_MAX_RESET_TIME     500
-/* Interval between checks (ms) */
-#define TXC_RESET_WAIT         10
-/* How long to run BIST (us) */
-#define TXC_BIST_DURATION      50
-
-/**************************************************************************
- *
- * Register definitions
- *
- **************************************************************************
- */
-
-/* Command register */
-#define TXC_GLRGS_GLCMD                0xc004
-/* Useful bits in command register */
-/* Lane power-down */
-#define TXC_GLCMD_L01PD_LBN    5
-#define TXC_GLCMD_L23PD_LBN    6
-/* Limited SW reset: preserves configuration but
- * initiates a logic reset. Self-clearing */
-#define TXC_GLCMD_LMTSWRST_LBN 14
-
-/* Signal Quality Control */
-#define TXC_GLRGS_GSGQLCTL     0xc01a
-/* Enable bit */
-#define TXC_GSGQLCT_SGQLEN_LBN 15
-/* Lane selection */
-#define TXC_GSGQLCT_LNSL_LBN   13
-#define TXC_GSGQLCT_LNSL_WIDTH 2
-
-/* Analog TX control */
-#define TXC_ALRGS_ATXCTL       0xc040
-/* Lane power-down */
-#define TXC_ATXCTL_TXPD3_LBN   15
-#define TXC_ATXCTL_TXPD2_LBN   14
-#define TXC_ATXCTL_TXPD1_LBN   13
-#define TXC_ATXCTL_TXPD0_LBN   12
-
-/* Amplitude on lanes 0, 1 */
-#define TXC_ALRGS_ATXAMP0      0xc041
-/* Amplitude on lanes 2, 3 */
-#define TXC_ALRGS_ATXAMP1      0xc042
-/* Bit position of value for lane 0 (or 2) */
-#define TXC_ATXAMP_LANE02_LBN  3
-/* Bit position of value for lane 1 (or 3) */
-#define TXC_ATXAMP_LANE13_LBN  11
-
-#define TXC_ATXAMP_1280_mV     0
-#define TXC_ATXAMP_1200_mV     8
-#define TXC_ATXAMP_1120_mV     12
-#define TXC_ATXAMP_1060_mV     14
-#define TXC_ATXAMP_0820_mV     25
-#define TXC_ATXAMP_0720_mV     26
-#define TXC_ATXAMP_0580_mV     27
-#define TXC_ATXAMP_0440_mV     28
-
-#define TXC_ATXAMP_0820_BOTH                                   \
-       ((TXC_ATXAMP_0820_mV << TXC_ATXAMP_LANE02_LBN)          \
-        | (TXC_ATXAMP_0820_mV << TXC_ATXAMP_LANE13_LBN))
-
-#define TXC_ATXAMP_DEFAULT     0x6060 /* From databook */
-
-/* Preemphasis on lanes 0, 1 */
-#define TXC_ALRGS_ATXPRE0      0xc043
-/* Preemphasis on lanes 2, 3 */
-#define TXC_ALRGS_ATXPRE1      0xc044
-
-#define TXC_ATXPRE_NONE 0
-#define TXC_ATXPRE_DEFAULT     0x1010 /* From databook */
-
-#define TXC_ALRGS_ARXCTL       0xc045
-/* Lane power-down */
-#define TXC_ARXCTL_RXPD3_LBN   15
-#define TXC_ARXCTL_RXPD2_LBN   14
-#define TXC_ARXCTL_RXPD1_LBN   13
-#define TXC_ARXCTL_RXPD0_LBN   12
-
-/* Main control */
-#define TXC_MRGS_CTL           0xc340
-/* Bits in main control */
-#define TXC_MCTL_RESET_LBN     15      /* Self clear */
-#define TXC_MCTL_TXLED_LBN     14      /* 1 to show align status */
-#define TXC_MCTL_RXLED_LBN     13      /* 1 to show align status */
-
-/* GPIO output */
-#define TXC_GPIO_OUTPUT                0xc346
-#define TXC_GPIO_DIR           0xc348
-
-/* Vendor-specific BIST registers */
-#define TXC_BIST_CTL           0xc280
-#define TXC_BIST_TXFRMCNT      0xc281
-#define TXC_BIST_RX0FRMCNT     0xc282
-#define TXC_BIST_RX1FRMCNT     0xc283
-#define TXC_BIST_RX2FRMCNT     0xc284
-#define TXC_BIST_RX3FRMCNT     0xc285
-#define TXC_BIST_RX0ERRCNT     0xc286
-#define TXC_BIST_RX1ERRCNT     0xc287
-#define TXC_BIST_RX2ERRCNT     0xc288
-#define TXC_BIST_RX3ERRCNT     0xc289
-
-/* BIST type (controls bit patter in test) */
-#define TXC_BIST_CTRL_TYPE_LBN 10
-#define TXC_BIST_CTRL_TYPE_TSD 0       /* TranSwitch Deterministic */
-#define TXC_BIST_CTRL_TYPE_CRP 1       /* CRPAT standard */
-#define TXC_BIST_CTRL_TYPE_CJP 2       /* CJPAT standard */
-#define TXC_BIST_CTRL_TYPE_TSR 3       /* TranSwitch pseudo-random */
-/* Set this to 1 for 10 bit and 0 for 8 bit */
-#define TXC_BIST_CTRL_B10EN_LBN        12
-/* Enable BIST (write 0 to disable) */
-#define TXC_BIST_CTRL_ENAB_LBN 13
-/* Stop BIST (self-clears when stop complete) */
-#define TXC_BIST_CTRL_STOP_LBN 14
-/* Start BIST (cleared by writing 1 to STOP) */
-#define TXC_BIST_CTRL_STRT_LBN 15
-
-/* Mt. Diablo test configuration */
-#define TXC_MTDIABLO_CTRL      0xc34f
-#define TXC_MTDIABLO_CTRL_PMA_LOOP_LBN 10
-
-struct txc43128_data {
-       unsigned long bug10934_timer;
-       enum efx_phy_mode phy_mode;
-       enum efx_loopback_mode loopback_mode;
-};
-
-/* The PHY sometimes needs a reset to bring the link back up.  So long as
- * it reports link down, we reset it every 5 seconds.
- */
-#define BUG10934_RESET_INTERVAL (5 * HZ)
-
-/* Perform a reset that doesn't clear configuration changes */
-static void txc_reset_logic(struct efx_nic *efx);
-
-/* Set the output value of a gpio */
-void falcon_txc_set_gpio_val(struct efx_nic *efx, int pin, int on)
-{
-       efx_mdio_set_flag(efx, MDIO_MMD_PHYXS, TXC_GPIO_OUTPUT, 1 << pin, on);
-}
-
-/* Set up the GPIO direction register */
-void falcon_txc_set_gpio_dir(struct efx_nic *efx, int pin, int dir)
-{
-       efx_mdio_set_flag(efx, MDIO_MMD_PHYXS, TXC_GPIO_DIR, 1 << pin, dir);
-}
-
-/* Reset the PMA/PMD MMD. The documentation is explicit that this does a
- * global reset (it's less clear what reset of other MMDs does).*/
-static int txc_reset_phy(struct efx_nic *efx)
-{
-       int rc = efx_mdio_reset_mmd(efx, MDIO_MMD_PMAPMD,
-                                   TXC_MAX_RESET_TIME / TXC_RESET_WAIT,
-                                   TXC_RESET_WAIT);
-       if (rc < 0)
-               goto fail;
-
-       /* Check that all the MMDs we expect are present and responding. */
-       rc = efx_mdio_check_mmds(efx, TXC_REQUIRED_DEVS);
-       if (rc < 0)
-               goto fail;
-
-       return 0;
-
-fail:
-       netif_err(efx, hw, efx->net_dev, TXCNAME ": reset timed out!\n");
-       return rc;
-}
-
-/* Run a single BIST on one MMD */
-static int txc_bist_one(struct efx_nic *efx, int mmd, int test)
-{
-       int ctrl, bctl;
-       int lane;
-       int rc = 0;
-
-       /* Set PMA to test into loopback using Mt Diablo reg as per app note */
-       ctrl = efx_mdio_read(efx, MDIO_MMD_PCS, TXC_MTDIABLO_CTRL);
-       ctrl |= (1 << TXC_MTDIABLO_CTRL_PMA_LOOP_LBN);
-       efx_mdio_write(efx, MDIO_MMD_PCS, TXC_MTDIABLO_CTRL, ctrl);
-
-       /* The BIST app. note lists these  as 3 distinct steps. */
-       /* Set the BIST type */
-       bctl = (test << TXC_BIST_CTRL_TYPE_LBN);
-       efx_mdio_write(efx, mmd, TXC_BIST_CTL, bctl);
-
-       /* Set the BSTEN bit in the BIST Control register to enable */
-       bctl |= (1 << TXC_BIST_CTRL_ENAB_LBN);
-       efx_mdio_write(efx, mmd, TXC_BIST_CTL, bctl);
-
-       /* Set the BSTRT bit in the BIST Control register */
-       efx_mdio_write(efx, mmd, TXC_BIST_CTL,
-                      bctl | (1 << TXC_BIST_CTRL_STRT_LBN));
-
-       /* Wait. */
-       udelay(TXC_BIST_DURATION);
-
-       /* Set the BSTOP bit in the BIST Control register */
-       bctl |= (1 << TXC_BIST_CTRL_STOP_LBN);
-       efx_mdio_write(efx, mmd, TXC_BIST_CTL, bctl);
-
-       /* The STOP bit should go off when things have stopped */
-       while (bctl & (1 << TXC_BIST_CTRL_STOP_LBN))
-               bctl = efx_mdio_read(efx, mmd, TXC_BIST_CTL);
-
-       /* Check all the error counts are 0 and all the frame counts are
-          non-zero */
-       for (lane = 0; lane < 4; lane++) {
-               int count = efx_mdio_read(efx, mmd, TXC_BIST_RX0ERRCNT + lane);
-               if (count != 0) {
-                       netif_err(efx, hw, efx->net_dev, TXCNAME": BIST error. "
-                                 "Lane %d had %d errs\n", lane, count);
-                       rc = -EIO;
-               }
-               count = efx_mdio_read(efx, mmd, TXC_BIST_RX0FRMCNT + lane);
-               if (count == 0) {
-                       netif_err(efx, hw, efx->net_dev, TXCNAME": BIST error. "
-                                 "Lane %d got 0 frames\n", lane);
-                       rc = -EIO;
-               }
-       }
-
-       if (rc == 0)
-               netif_info(efx, hw, efx->net_dev, TXCNAME": BIST pass\n");
-
-       /* Disable BIST */
-       efx_mdio_write(efx, mmd, TXC_BIST_CTL, 0);
-
-       /* Turn off loopback */
-       ctrl &= ~(1 << TXC_MTDIABLO_CTRL_PMA_LOOP_LBN);
-       efx_mdio_write(efx, MDIO_MMD_PCS, TXC_MTDIABLO_CTRL, ctrl);
-
-       return rc;
-}
-
-static int txc_bist(struct efx_nic *efx)
-{
-       return txc_bist_one(efx, MDIO_MMD_PCS, TXC_BIST_CTRL_TYPE_TSD);
-}
-
-/* Push the non-configurable defaults into the PHY. This must be
- * done after every full reset */
-static void txc_apply_defaults(struct efx_nic *efx)
-{
-       int mctrl;
-
-       /* Turn amplitude down and preemphasis off on the host side
-        * (PHY<->MAC) as this is believed less likely to upset Falcon
-        * and no adverse effects have been noted. It probably also
-        * saves a picowatt or two */
-
-       /* Turn off preemphasis */
-       efx_mdio_write(efx, MDIO_MMD_PHYXS, TXC_ALRGS_ATXPRE0, TXC_ATXPRE_NONE);
-       efx_mdio_write(efx, MDIO_MMD_PHYXS, TXC_ALRGS_ATXPRE1, TXC_ATXPRE_NONE);
-
-       /* Turn down the amplitude */
-       efx_mdio_write(efx, MDIO_MMD_PHYXS,
-                      TXC_ALRGS_ATXAMP0, TXC_ATXAMP_0820_BOTH);
-       efx_mdio_write(efx, MDIO_MMD_PHYXS,
-                      TXC_ALRGS_ATXAMP1, TXC_ATXAMP_0820_BOTH);
-
-       /* Set the line side amplitude and preemphasis to the databook
-        * defaults as an erratum causes them to be 0 on at least some
-        * PHY rev.s */
-       efx_mdio_write(efx, MDIO_MMD_PMAPMD,
-                      TXC_ALRGS_ATXPRE0, TXC_ATXPRE_DEFAULT);
-       efx_mdio_write(efx, MDIO_MMD_PMAPMD,
-                      TXC_ALRGS_ATXPRE1, TXC_ATXPRE_DEFAULT);
-       efx_mdio_write(efx, MDIO_MMD_PMAPMD,
-                      TXC_ALRGS_ATXAMP0, TXC_ATXAMP_DEFAULT);
-       efx_mdio_write(efx, MDIO_MMD_PMAPMD,
-                      TXC_ALRGS_ATXAMP1, TXC_ATXAMP_DEFAULT);
-
-       /* Set up the LEDs  */
-       mctrl = efx_mdio_read(efx, MDIO_MMD_PHYXS, TXC_MRGS_CTL);
-
-       /* Set the Green and Red LEDs to their default modes */
-       mctrl &= ~((1 << TXC_MCTL_TXLED_LBN) | (1 << TXC_MCTL_RXLED_LBN));
-       efx_mdio_write(efx, MDIO_MMD_PHYXS, TXC_MRGS_CTL, mctrl);
-
-       /* Databook recommends doing this after configuration changes */
-       txc_reset_logic(efx);
-
-       falcon_board(efx)->type->init_phy(efx);
-}
-
-static int txc43128_phy_probe(struct efx_nic *efx)
-{
-       struct txc43128_data *phy_data;
-
-       /* Allocate phy private storage */
-       phy_data = kzalloc(sizeof(*phy_data), GFP_KERNEL);
-       if (!phy_data)
-               return -ENOMEM;
-       efx->phy_data = phy_data;
-       phy_data->phy_mode = efx->phy_mode;
-
-       efx->mdio.mmds = TXC_REQUIRED_DEVS;
-       efx->mdio.mode_support = MDIO_SUPPORTS_C45 | MDIO_EMULATE_C22;
-
-       efx->loopback_modes = TXC_LOOPBACKS | FALCON_XMAC_LOOPBACKS;
-
-       return 0;
-}
-
-/* Initialisation entry point for this PHY driver */
-static int txc43128_phy_init(struct efx_nic *efx)
-{
-       int rc;
-
-       rc = txc_reset_phy(efx);
-       if (rc < 0)
-               return rc;
-
-       rc = txc_bist(efx);
-       if (rc < 0)
-               return rc;
-
-       txc_apply_defaults(efx);
-
-       return 0;
-}
-
-/* Set the lane power down state in the global registers */
-static void txc_glrgs_lane_power(struct efx_nic *efx, int mmd)
-{
-       int pd = (1 << TXC_GLCMD_L01PD_LBN) | (1 << TXC_GLCMD_L23PD_LBN);
-       int ctl = efx_mdio_read(efx, mmd, TXC_GLRGS_GLCMD);
-
-       if (!(efx->phy_mode & PHY_MODE_LOW_POWER))
-               ctl &= ~pd;
-       else
-               ctl |= pd;
-
-       efx_mdio_write(efx, mmd, TXC_GLRGS_GLCMD, ctl);
-}
-
-/* Set the lane power down state in the analog control registers */
-static void txc_analog_lane_power(struct efx_nic *efx, int mmd)
-{
-       int txpd = (1 << TXC_ATXCTL_TXPD3_LBN) | (1 << TXC_ATXCTL_TXPD2_LBN)
-               | (1 << TXC_ATXCTL_TXPD1_LBN) | (1 << TXC_ATXCTL_TXPD0_LBN);
-       int rxpd = (1 << TXC_ARXCTL_RXPD3_LBN) | (1 << TXC_ARXCTL_RXPD2_LBN)
-               | (1 << TXC_ARXCTL_RXPD1_LBN) | (1 << TXC_ARXCTL_RXPD0_LBN);
-       int txctl = efx_mdio_read(efx, mmd, TXC_ALRGS_ATXCTL);
-       int rxctl = efx_mdio_read(efx, mmd, TXC_ALRGS_ARXCTL);
-
-       if (!(efx->phy_mode & PHY_MODE_LOW_POWER)) {
-               txctl &= ~txpd;
-               rxctl &= ~rxpd;
-       } else {
-               txctl |= txpd;
-               rxctl |= rxpd;
-       }
-
-       efx_mdio_write(efx, mmd, TXC_ALRGS_ATXCTL, txctl);
-       efx_mdio_write(efx, mmd, TXC_ALRGS_ARXCTL, rxctl);
-}
-
-static void txc_set_power(struct efx_nic *efx)
-{
-       /* According to the data book, all the MMDs can do low power */
-       efx_mdio_set_mmds_lpower(efx,
-                                !!(efx->phy_mode & PHY_MODE_LOW_POWER),
-                                TXC_REQUIRED_DEVS);
-
-       /* Global register bank is in PCS, PHY XS. These control the host
-        * side and line side settings respectively. */
-       txc_glrgs_lane_power(efx, MDIO_MMD_PCS);
-       txc_glrgs_lane_power(efx, MDIO_MMD_PHYXS);
-
-       /* Analog register bank in PMA/PMD, PHY XS */
-       txc_analog_lane_power(efx, MDIO_MMD_PMAPMD);
-       txc_analog_lane_power(efx, MDIO_MMD_PHYXS);
-}
-
-static void txc_reset_logic_mmd(struct efx_nic *efx, int mmd)
-{
-       int val = efx_mdio_read(efx, mmd, TXC_GLRGS_GLCMD);
-       int tries = 50;
-
-       val |= (1 << TXC_GLCMD_LMTSWRST_LBN);
-       efx_mdio_write(efx, mmd, TXC_GLRGS_GLCMD, val);
-       while (--tries) {
-               val = efx_mdio_read(efx, mmd, TXC_GLRGS_GLCMD);
-               if (!(val & (1 << TXC_GLCMD_LMTSWRST_LBN)))
-                       break;
-               udelay(1);
-       }
-       if (!tries)
-               netif_info(efx, hw, efx->net_dev,
-                          TXCNAME " Logic reset timed out!\n");
-}
-
-/* Perform a logic reset. This preserves the configuration registers
- * and is needed for some configuration changes to take effect */
-static void txc_reset_logic(struct efx_nic *efx)
-{
-       /* The data sheet claims we can do the logic reset on either the
-        * PCS or the PHYXS and the result is a reset of both host- and
-        * line-side logic. */
-       txc_reset_logic_mmd(efx, MDIO_MMD_PCS);
-}
-
-static bool txc43128_phy_read_link(struct efx_nic *efx)
-{
-       return efx_mdio_links_ok(efx, TXC_REQUIRED_DEVS);
-}
-
-static int txc43128_phy_reconfigure(struct efx_nic *efx)
-{
-       struct txc43128_data *phy_data = efx->phy_data;
-       enum efx_phy_mode mode_change = efx->phy_mode ^ phy_data->phy_mode;
-       bool loop_change = LOOPBACK_CHANGED(phy_data, efx, TXC_LOOPBACKS);
-
-       if (efx->phy_mode & mode_change & PHY_MODE_TX_DISABLED) {
-               txc_reset_phy(efx);
-               txc_apply_defaults(efx);
-               falcon_reset_xaui(efx);
-               mode_change &= ~PHY_MODE_TX_DISABLED;
-       }
-
-       efx_mdio_transmit_disable(efx);
-       efx_mdio_phy_reconfigure(efx);
-       if (mode_change & PHY_MODE_LOW_POWER)
-               txc_set_power(efx);
-
-       /* The data sheet claims this is required after every reconfiguration
-        * (note at end of 7.1), but we mustn't do it when nothing changes as
-        * it glitches the link, and reconfigure gets called on link change,
-        * so we get an IRQ storm on link up. */
-       if (loop_change || mode_change)
-               txc_reset_logic(efx);
-
-       phy_data->phy_mode = efx->phy_mode;
-       phy_data->loopback_mode = efx->loopback_mode;
-
-       return 0;
-}
-
-static void txc43128_phy_fini(struct efx_nic *efx)
-{
-       /* Disable link events */
-       efx_mdio_write(efx, MDIO_MMD_PMAPMD, MDIO_PMA_LASI_CTRL, 0);
-}
-
-static void txc43128_phy_remove(struct efx_nic *efx)
-{
-       kfree(efx->phy_data);
-       efx->phy_data = NULL;
-}
-
-/* Periodic callback: this exists mainly to poll link status as we
- * don't use LASI interrupts */
-static bool txc43128_phy_poll(struct efx_nic *efx)
-{
-       struct txc43128_data *data = efx->phy_data;
-       bool was_up = efx->link_state.up;
-
-       efx->link_state.up = txc43128_phy_read_link(efx);
-       efx->link_state.speed = 10000;
-       efx->link_state.fd = true;
-       efx->link_state.fc = efx->wanted_fc;
-
-       if (efx->link_state.up || (efx->loopback_mode != LOOPBACK_NONE)) {
-               data->bug10934_timer = jiffies;
-       } else {
-               if (time_after_eq(jiffies, (data->bug10934_timer +
-                                           BUG10934_RESET_INTERVAL))) {
-                       data->bug10934_timer = jiffies;
-                       txc_reset_logic(efx);
-               }
-       }
-
-       return efx->link_state.up != was_up;
-}
-
-static const char *const txc43128_test_names[] = {
-       "bist"
-};
-
-static const char *txc43128_test_name(struct efx_nic *efx, unsigned int index)
-{
-       if (index < ARRAY_SIZE(txc43128_test_names))
-               return txc43128_test_names[index];
-       return NULL;
-}
-
-static int txc43128_run_tests(struct efx_nic *efx, int *results, unsigned flags)
-{
-       int rc;
-
-       if (!(flags & ETH_TEST_FL_OFFLINE))
-               return 0;
-
-       rc = txc_reset_phy(efx);
-       if (rc < 0)
-               return rc;
-
-       rc = txc_bist(efx);
-       txc_apply_defaults(efx);
-       results[0] = rc ? -1 : 1;
-       return rc;
-}
-
-static void txc43128_get_settings(struct efx_nic *efx, struct ethtool_cmd *ecmd)
-{
-       mdio45_ethtool_gset(&efx->mdio, ecmd);
-}
-
-const struct efx_phy_operations falcon_txc_phy_ops = {
-       .probe          = txc43128_phy_probe,
-       .init           = txc43128_phy_init,
-       .reconfigure    = txc43128_phy_reconfigure,
-       .poll           = txc43128_phy_poll,
-       .fini           = txc43128_phy_fini,
-       .remove         = txc43128_phy_remove,
-       .get_settings   = txc43128_get_settings,
-       .set_settings   = efx_mdio_set_settings,
-       .test_alive     = efx_mdio_test_alive,
-       .run_tests      = txc43128_run_tests,
-       .test_name      = txc43128_test_name,
-};
index 351cd14cb9f952429f969c117b0c8faf3ae27995..103f827a16231e058160ea8e283adc51823a1928 100644 (file)
  * Bug numbers are from Solarflare's Bugzilla.
  */
 
-#define EFX_WORKAROUND_FALCON_A(efx) (efx_nic_rev(efx) <= EFX_REV_FALCON_A1)
-#define EFX_WORKAROUND_FALCON_AB(efx) (efx_nic_rev(efx) <= EFX_REV_FALCON_B0)
 #define EFX_WORKAROUND_SIENA(efx) (efx_nic_rev(efx) == EFX_REV_SIENA_A0)
 #define EFX_WORKAROUND_10G(efx) 1
 
 /* Bit-bashed I2C reads cause performance drop */
 #define EFX_WORKAROUND_7884 EFX_WORKAROUND_10G
-/* Truncated IPv4 packets can confuse the TX packet parser */
-#define EFX_WORKAROUND_15592 EFX_WORKAROUND_FALCON_AB
 /* Legacy interrupt storm when interrupt fifo fills */
 #define EFX_WORKAROUND_17213 EFX_WORKAROUND_SIENA
 
-/* Spurious parity errors in TSORT buffers */
-#define EFX_WORKAROUND_5129 EFX_WORKAROUND_FALCON_A
-/* Unaligned read request >512 bytes after aligning may break TSORT */
-#define EFX_WORKAROUND_5391 EFX_WORKAROUND_FALCON_A
-/* iSCSI parsing errors */
-#define EFX_WORKAROUND_5583 EFX_WORKAROUND_FALCON_A
-/* RX events go missing */
-#define EFX_WORKAROUND_5676 EFX_WORKAROUND_FALCON_A
-/* RX_RESET on A1 */
-#define EFX_WORKAROUND_6555 EFX_WORKAROUND_FALCON_A
-/* Increase filter depth to avoid RX_RESET */
-#define EFX_WORKAROUND_7244 EFX_WORKAROUND_FALCON_A
-/* Flushes may never complete */
-#define EFX_WORKAROUND_7803 EFX_WORKAROUND_FALCON_AB
-/* Leak overlength packets rather than free */
-#define EFX_WORKAROUND_8071 EFX_WORKAROUND_FALCON_A
-
 /* Lockup when writing event block registers at gen2/gen3 */
 #define EFX_EF10_WORKAROUND_35388(efx)                                 \
        (((struct efx_ef10_nic_data *)efx->nic_data)->workaround_35388)
index cdb343f0c6e0a92178d94f8078f1398a659c5218..be09573c6cedb9c31c66600db1ea5b54a0eadd48 100644 (file)
@@ -1956,11 +1956,6 @@ static void smsc911x_ethtool_getdrvinfo(struct net_device *dev,
                sizeof(info->bus_info));
 }
 
-static int smsc911x_ethtool_nwayreset(struct net_device *dev)
-{
-       return phy_start_aneg(dev->phydev);
-}
-
 static u32 smsc911x_ethtool_getmsglevel(struct net_device *dev)
 {
        struct smsc911x_data *pdata = netdev_priv(dev);
@@ -2132,7 +2127,7 @@ static int smsc911x_ethtool_set_eeprom(struct net_device *dev,
 static const struct ethtool_ops smsc911x_ethtool_ops = {
        .get_link = ethtool_op_get_link,
        .get_drvinfo = smsc911x_ethtool_getdrvinfo,
-       .nway_reset = smsc911x_ethtool_nwayreset,
+       .nway_reset = phy_ethtool_nway_reset,
        .get_msglevel = smsc911x_ethtool_getmsglevel,
        .set_msglevel = smsc911x_ethtool_setmsglevel,
        .get_regs_len = smsc911x_ethtool_getregslen,
index b7bfed4bc96bb4670f997f8ab7b93067892d7ad5..3174aebb322fe98e00a93466840aae2de7db33f7 100644 (file)
@@ -254,14 +254,6 @@ static void smsc9420_ethtool_set_msglevel(struct net_device *netdev, u32 data)
        pd->msg_enable = data;
 }
 
-static int smsc9420_ethtool_nway_reset(struct net_device *netdev)
-{
-       if (!netdev->phydev)
-               return -ENODEV;
-
-       return phy_start_aneg(netdev->phydev);
-}
-
 static int smsc9420_ethtool_getregslen(struct net_device *dev)
 {
        /* all smsc9420 registers plus all phy registers */
@@ -417,7 +409,7 @@ static const struct ethtool_ops smsc9420_ethtool_ops = {
        .get_drvinfo = smsc9420_ethtool_get_drvinfo,
        .get_msglevel = smsc9420_ethtool_get_msglevel,
        .set_msglevel = smsc9420_ethtool_set_msglevel,
-       .nway_reset = smsc9420_ethtool_nway_reset,
+       .nway_reset = phy_ethtool_nway_reset,
        .get_link = ethtool_op_get_link,
        .get_eeprom_len = smsc9420_ethtool_get_eeprom_len,
        .get_eeprom = smsc9420_ethtool_get_eeprom,
index 3818c5e06ebac5099f8051c813222349a2f6a6a5..d37e32d55ca9cb7b679ddf22cd2e2e33ae3180f8 100644 (file)
@@ -69,6 +69,17 @@ config DWMAC_MESON
          the stmmac device driver. This driver is used for Meson6,
          Meson8, Meson8b and GXBB SoCs.
 
+config DWMAC_OXNAS
+       tristate "Oxford Semiconductor OXNAS dwmac support"
+       default ARCH_OXNAS
+       depends on OF && COMMON_CLK && (ARCH_OXNAS || COMPILE_TEST)
+       select MFD_SYSCON
+       help
+         Support for Ethernet controller on Oxford Semiconductor OXNAS SoCs.
+
+         This selects the Oxford Semiconductor OXNASSoC glue layer support for
+         the stmmac device driver. This driver is used for OX820.
+
 config DWMAC_ROCKCHIP
        tristate "Rockchip dwmac support"
        default ARCH_ROCKCHIP
@@ -107,7 +118,7 @@ config DWMAC_STI
 config DWMAC_STM32
        tristate "STM32 DWMAC support"
        default ARCH_STM32
-       depends on OF && HAS_IOMEM
+       depends on OF && HAS_IOMEM && (ARCH_STM32 || COMPILE_TEST)
        select MFD_SYSCON
        ---help---
          Support for ethernet controller on STM32 SOCs.
index 5d6ece5919b3e85639eceed1f3e84fe06dc3b972..8f83a86ba13c69e052a5077f834ebc8f21302d10 100644 (file)
@@ -10,6 +10,7 @@ obj-$(CONFIG_STMMAC_PLATFORM) += stmmac-platform.o
 obj-$(CONFIG_DWMAC_IPQ806X)    += dwmac-ipq806x.o
 obj-$(CONFIG_DWMAC_LPC18XX)    += dwmac-lpc18xx.o
 obj-$(CONFIG_DWMAC_MESON)      += dwmac-meson.o dwmac-meson8b.o
+obj-$(CONFIG_DWMAC_OXNAS)      += dwmac-oxnas.o
 obj-$(CONFIG_DWMAC_ROCKCHIP)   += dwmac-rk.o
 obj-$(CONFIG_DWMAC_SOCFPGA)    += dwmac-altr-socfpga.o
 obj-$(CONFIG_DWMAC_STI)                += dwmac-sti.o
index 2920e2ee38647095afa97653c3d4dd9901f77d23..489ef146201e61c629c17010f672a621642e94b3 100644 (file)
@@ -63,8 +63,8 @@
 #define TSE_PCS_SGMII_LINK_TIMER_0                     0x0D40
 #define TSE_PCS_SGMII_LINK_TIMER_1                     0x0003
 #define TSE_PCS_SW_RESET_TIMEOUT                       100
-#define TSE_PCS_USE_SGMII_AN_MASK                      BIT(2)
-#define TSE_PCS_USE_SGMII_ENA                          BIT(1)
+#define TSE_PCS_USE_SGMII_AN_MASK                      BIT(1)
+#define TSE_PCS_USE_SGMII_ENA                          BIT(0)
 
 #define SGMII_ADAPTER_CTRL_REG                         0x00
 #define SGMII_ADAPTER_DISABLE                          0x0001
index b3e669af30055e234a8910e95f232ba2c47443d1..026e8e9cb9429bf60c775a531917322994946035 100644 (file)
@@ -34,7 +34,7 @@ static int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum)
        unsigned int entry = priv->cur_tx;
        struct dma_desc *desc = priv->dma_tx + entry;
        unsigned int nopaged_len = skb_headlen(skb);
-       unsigned int bmax;
+       unsigned int bmax, des2;
        unsigned int i = 1, len;
 
        if (priv->plat->enh_desc)
@@ -44,11 +44,12 @@ static int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum)
 
        len = nopaged_len - bmax;
 
-       desc->des2 = dma_map_single(priv->device, skb->data,
-                                   bmax, DMA_TO_DEVICE);
-       if (dma_mapping_error(priv->device, desc->des2))
+       des2 = dma_map_single(priv->device, skb->data,
+                             bmax, DMA_TO_DEVICE);
+       desc->des2 = cpu_to_le32(des2);
+       if (dma_mapping_error(priv->device, des2))
                return -1;
-       priv->tx_skbuff_dma[entry].buf = desc->des2;
+       priv->tx_skbuff_dma[entry].buf = des2;
        priv->tx_skbuff_dma[entry].len = bmax;
        /* do not close the descriptor and do not set own bit */
        priv->hw->desc->prepare_tx_desc(desc, 1, bmax, csum, STMMAC_CHAIN_MODE,
@@ -60,12 +61,13 @@ static int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum)
                desc = priv->dma_tx + entry;
 
                if (len > bmax) {
-                       desc->des2 = dma_map_single(priv->device,
-                                                   (skb->data + bmax * i),
-                                                   bmax, DMA_TO_DEVICE);
-                       if (dma_mapping_error(priv->device, desc->des2))
+                       des2 = dma_map_single(priv->device,
+                                             (skb->data + bmax * i),
+                                             bmax, DMA_TO_DEVICE);
+                       desc->des2 = cpu_to_le32(des2);
+                       if (dma_mapping_error(priv->device, des2))
                                return -1;
-                       priv->tx_skbuff_dma[entry].buf = desc->des2;
+                       priv->tx_skbuff_dma[entry].buf = des2;
                        priv->tx_skbuff_dma[entry].len = bmax;
                        priv->hw->desc->prepare_tx_desc(desc, 0, bmax, csum,
                                                        STMMAC_CHAIN_MODE, 1,
@@ -73,12 +75,13 @@ static int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum)
                        len -= bmax;
                        i++;
                } else {
-                       desc->des2 = dma_map_single(priv->device,
-                                                   (skb->data + bmax * i), len,
-                                                   DMA_TO_DEVICE);
-                       if (dma_mapping_error(priv->device, desc->des2))
+                       des2 = dma_map_single(priv->device,
+                                             (skb->data + bmax * i), len,
+                                             DMA_TO_DEVICE);
+                       desc->des2 = cpu_to_le32(des2);
+                       if (dma_mapping_error(priv->device, des2))
                                return -1;
-                       priv->tx_skbuff_dma[entry].buf = desc->des2;
+                       priv->tx_skbuff_dma[entry].buf = des2;
                        priv->tx_skbuff_dma[entry].len = len;
                        /* last descriptor can be set now */
                        priv->hw->desc->prepare_tx_desc(desc, 0, len, csum,
@@ -119,19 +122,19 @@ static void stmmac_init_dma_chain(void *des, dma_addr_t phy_addr,
                struct dma_extended_desc *p = (struct dma_extended_desc *)des;
                for (i = 0; i < (size - 1); i++) {
                        dma_phy += sizeof(struct dma_extended_desc);
-                       p->basic.des3 = (unsigned int)dma_phy;
+                       p->basic.des3 = cpu_to_le32((unsigned int)dma_phy);
                        p++;
                }
-               p->basic.des3 = (unsigned int)phy_addr;
+               p->basic.des3 = cpu_to_le32((unsigned int)phy_addr);
 
        } else {
                struct dma_desc *p = (struct dma_desc *)des;
                for (i = 0; i < (size - 1); i++) {
                        dma_phy += sizeof(struct dma_desc);
-                       p->des3 = (unsigned int)dma_phy;
+                       p->des3 = cpu_to_le32((unsigned int)dma_phy);
                        p++;
                }
-               p->des3 = (unsigned int)phy_addr;
+               p->des3 = cpu_to_le32((unsigned int)phy_addr);
        }
 }
 
@@ -144,10 +147,10 @@ static void stmmac_refill_desc3(void *priv_ptr, struct dma_desc *p)
                 * 1588-2002 time stamping is enabled, hence reinitialize it
                 * to keep explicit chaining in the descriptor.
                 */
-               p->des3 = (unsigned int)(priv->dma_rx_phy +
-                                        (((priv->dirty_rx) + 1) %
-                                         DMA_RX_SIZE) *
-                                        sizeof(struct dma_desc));
+               p->des3 = cpu_to_le32((unsigned int)(priv->dma_rx_phy +
+                                     (((priv->dirty_rx) + 1) %
+                                      DMA_RX_SIZE) *
+                                     sizeof(struct dma_desc)));
 }
 
 static void stmmac_clean_desc3(void *priv_ptr, struct dma_desc *p)
@@ -161,9 +164,9 @@ static void stmmac_clean_desc3(void *priv_ptr, struct dma_desc *p)
                 * 1588-2002 time stamping is enabled, hence reinitialize it
                 * to keep explicit chaining in the descriptor.
                 */
-               p->des3 = (unsigned int)((priv->dma_tx_phy +
-                                         ((priv->dirty_tx + 1) % DMA_TX_SIZE))
-                                         * sizeof(struct dma_desc));
+               p->des3 = cpu_to_le32((unsigned int)((priv->dma_tx_phy +
+                                     ((priv->dirty_tx + 1) % DMA_TX_SIZE))
+                                     * sizeof(struct dma_desc)));
 }
 
 const struct stmmac_mode_ops chain_mode_ops = {
index d3292c4a6eda3f6f9f17fecfc18e86880ebc4d61..5bd4c0549a73b2800e72fa00dc9501ae3fc3a168 100644 (file)
@@ -44,6 +44,7 @@
 #define        DWMAC_CORE_4_00 0x40
 #define STMMAC_CHAN0   0       /* Always supported and default for all chips */
 
+/* These need to be power of two, and >= 4 */
 #define DMA_TX_SIZE 512
 #define DMA_RX_SIZE 512
 #define STMMAC_GET_ENTRY(x, size)      ((x + 1) & (size - 1))
@@ -120,14 +121,17 @@ struct stmmac_extra_stats {
        unsigned long ip_csum_bypassed;
        unsigned long ipv4_pkt_rcvd;
        unsigned long ipv6_pkt_rcvd;
-       unsigned long rx_msg_type_ext_no_ptp;
-       unsigned long rx_msg_type_sync;
-       unsigned long rx_msg_type_follow_up;
-       unsigned long rx_msg_type_delay_req;
-       unsigned long rx_msg_type_delay_resp;
-       unsigned long rx_msg_type_pdelay_req;
-       unsigned long rx_msg_type_pdelay_resp;
-       unsigned long rx_msg_type_pdelay_follow_up;
+       unsigned long no_ptp_rx_msg_type_ext;
+       unsigned long ptp_rx_msg_type_sync;
+       unsigned long ptp_rx_msg_type_follow_up;
+       unsigned long ptp_rx_msg_type_delay_req;
+       unsigned long ptp_rx_msg_type_delay_resp;
+       unsigned long ptp_rx_msg_type_pdelay_req;
+       unsigned long ptp_rx_msg_type_pdelay_resp;
+       unsigned long ptp_rx_msg_type_pdelay_follow_up;
+       unsigned long ptp_rx_msg_type_announce;
+       unsigned long ptp_rx_msg_type_management;
+       unsigned long ptp_rx_msg_pkt_reserved_type;
        unsigned long ptp_frame_type;
        unsigned long ptp_ver;
        unsigned long timestamp_dropped;
@@ -482,11 +486,12 @@ struct stmmac_ops {
 /* PTP and HW Timer helpers */
 struct stmmac_hwtimestamp {
        void (*config_hw_tstamping) (void __iomem *ioaddr, u32 data);
-       u32 (*config_sub_second_increment) (void __iomem *ioaddr, u32 clk_rate);
+       u32 (*config_sub_second_increment)(void __iomem *ioaddr, u32 ptp_clock,
+                                          int gmac4);
        int (*init_systime) (void __iomem *ioaddr, u32 sec, u32 nsec);
        int (*config_addend) (void __iomem *ioaddr, u32 addend);
        int (*adjust_systime) (void __iomem *ioaddr, u32 sec, u32 nsec,
-                              int add_sub);
+                              int add_sub, int gmac4);
         u64(*get_systime) (void __iomem *ioaddr);
 };
 
index 2e4c171a2b4146f6276855ab14a3ba83e85680ad..faeeef75d7f17edbb69f483609d3b9c8e483fad3 100644 (file)
@@ -87,7 +87,7 @@
 #define        TDES0_ERROR_SUMMARY             BIT(15)
 #define        TDES0_IP_HEADER_ERROR           BIT(16)
 #define        TDES0_TIME_STAMP_STATUS         BIT(17)
-#define        TDES0_OWN                       BIT(31)
+#define        TDES0_OWN                       ((u32)BIT(31))  /* silence sparse */
 /* TDES1 */
 #define        TDES1_BUFFER1_SIZE_MASK         GENMASK(10, 0)
 #define        TDES1_BUFFER2_SIZE_MASK         GENMASK(21, 11)
 #define        ETDES0_FIRST_SEGMENT            BIT(28)
 #define        ETDES0_LAST_SEGMENT             BIT(29)
 #define        ETDES0_INTERRUPT                BIT(30)
-#define        ETDES0_OWN                      BIT(31)
+#define        ETDES0_OWN                      ((u32)BIT(31))  /* silence sparse */
 /* TDES1 */
 #define        ETDES1_BUFFER1_SIZE_MASK        GENMASK(12, 0)
 #define        ETDES1_BUFFER2_SIZE_MASK        GENMASK(28, 16)
 #define        ERDES4_L3_L4_FILT_NO_MATCH_MASK GENMASK(27, 26)
 
 /* Extended RDES4 message type definitions */
-#define RDES_EXT_NO_PTP                        0
-#define RDES_EXT_SYNC                  1
-#define RDES_EXT_FOLLOW_UP             2
-#define RDES_EXT_DELAY_REQ             3
-#define RDES_EXT_DELAY_RESP            4
-#define RDES_EXT_PDELAY_REQ            5
-#define RDES_EXT_PDELAY_RESP           6
-#define RDES_EXT_PDELAY_FOLLOW_UP      7
+#define RDES_EXT_NO_PTP                        0x0
+#define RDES_EXT_SYNC                  0x1
+#define RDES_EXT_FOLLOW_UP             0x2
+#define RDES_EXT_DELAY_REQ             0x3
+#define RDES_EXT_DELAY_RESP            0x4
+#define RDES_EXT_PDELAY_REQ            0x5
+#define RDES_EXT_PDELAY_RESP           0x6
+#define RDES_EXT_PDELAY_FOLLOW_UP      0x7
+#define RDES_PTP_ANNOUNCE              0x8
+#define RDES_PTP_MANAGEMENT            0x9
+#define RDES_PTP_SIGNALING             0xa
+#define RDES_PTP_PKT_RESERVED_TYPE     0xf
 
 /* Basic descriptor structure for normal and alternate descriptors */
 struct dma_desc {
-       unsigned int des0;
-       unsigned int des1;
-       unsigned int des2;
-       unsigned int des3;
+       __le32 des0;
+       __le32 des1;
+       __le32 des2;
+       __le32 des3;
 };
 
 /* Extended descriptor structure (e.g. >= databook 3.50a) */
 struct dma_extended_desc {
        struct dma_desc basic;  /* Basic descriptors */
-       unsigned int des4;      /* Extended Status */
-       unsigned int des5;      /* Reserved */
-       unsigned int des6;      /* Tx/Rx Timestamp Low */
-       unsigned int des7;      /* Tx/Rx Timestamp High */
+       __le32 des4;    /* Extended Status */
+       __le32 des5;    /* Reserved */
+       __le32 des6;    /* Tx/Rx Timestamp Low */
+       __le32 des7;    /* Tx/Rx Timestamp High */
 };
 
 /* Transmit checksum insertion control */
index 7635a464ce41c536b796665e8ad913c1426299c3..1d181e205d6ecbb49c5d173fcb6f3ea5bfe6fe40 100644 (file)
 /* Enhanced descriptors */
 static inline void ehn_desc_rx_set_on_ring(struct dma_desc *p, int end)
 {
-       p->des1 |= ((BUF_SIZE_8KiB - 1) << ERDES1_BUFFER2_SIZE_SHIFT)
-                  & ERDES1_BUFFER2_SIZE_MASK;
+       p->des1 |= cpu_to_le32(((BUF_SIZE_8KiB - 1)
+                       << ERDES1_BUFFER2_SIZE_SHIFT)
+                  & ERDES1_BUFFER2_SIZE_MASK);
 
        if (end)
-               p->des1 |= ERDES1_END_RING;
+               p->des1 |= cpu_to_le32(ERDES1_END_RING);
 }
 
 static inline void enh_desc_end_tx_desc_on_ring(struct dma_desc *p, int end)
 {
        if (end)
-               p->des0 |= ETDES0_END_RING;
+               p->des0 |= cpu_to_le32(ETDES0_END_RING);
        else
-               p->des0 &= ~ETDES0_END_RING;
+               p->des0 &= cpu_to_le32(~ETDES0_END_RING);
 }
 
 static inline void enh_set_tx_desc_len_on_ring(struct dma_desc *p, int len)
 {
        if (unlikely(len > BUF_SIZE_4KiB)) {
-               p->des1 |= (((len - BUF_SIZE_4KiB) << ETDES1_BUFFER2_SIZE_SHIFT)
+               p->des1 |= cpu_to_le32((((len - BUF_SIZE_4KiB)
+                                       << ETDES1_BUFFER2_SIZE_SHIFT)
                            & ETDES1_BUFFER2_SIZE_MASK) | (BUF_SIZE_4KiB
-                           & ETDES1_BUFFER1_SIZE_MASK);
+                           & ETDES1_BUFFER1_SIZE_MASK));
        } else
-               p->des1 |= (len & ETDES1_BUFFER1_SIZE_MASK);
+               p->des1 |= cpu_to_le32((len & ETDES1_BUFFER1_SIZE_MASK));
 }
 
 /* Normal descriptors */
 static inline void ndesc_rx_set_on_ring(struct dma_desc *p, int end)
 {
-       p->des1 |= ((BUF_SIZE_2KiB - 1) << RDES1_BUFFER2_SIZE_SHIFT)
-                   & RDES1_BUFFER2_SIZE_MASK;
+       p->des1 |= cpu_to_le32(((BUF_SIZE_2KiB - 1)
+                               << RDES1_BUFFER2_SIZE_SHIFT)
+                   & RDES1_BUFFER2_SIZE_MASK);
 
        if (end)
-               p->des1 |= RDES1_END_RING;
+               p->des1 |= cpu_to_le32(RDES1_END_RING);
 }
 
 static inline void ndesc_end_tx_desc_on_ring(struct dma_desc *p, int end)
 {
        if (end)
-               p->des1 |= TDES1_END_RING;
+               p->des1 |= cpu_to_le32(TDES1_END_RING);
        else
-               p->des1 &= ~TDES1_END_RING;
+               p->des1 &= cpu_to_le32(~TDES1_END_RING);
 }
 
 static inline void norm_set_tx_desc_len_on_ring(struct dma_desc *p, int len)
@@ -83,10 +86,11 @@ static inline void norm_set_tx_desc_len_on_ring(struct dma_desc *p, int len)
        if (unlikely(len > BUF_SIZE_2KiB)) {
                unsigned int buffer1 = (BUF_SIZE_2KiB - 1)
                                        & TDES1_BUFFER1_SIZE_MASK;
-               p->des1 |= ((((len - buffer1) << TDES1_BUFFER2_SIZE_SHIFT)
-                           & TDES1_BUFFER2_SIZE_MASK) | buffer1);
+               p->des1 |= cpu_to_le32((((len - buffer1)
+                                       << TDES1_BUFFER2_SIZE_SHIFT)
+                               & TDES1_BUFFER2_SIZE_MASK) | buffer1);
        } else
-               p->des1 |= (len & TDES1_BUFFER1_SIZE_MASK);
+               p->des1 |= cpu_to_le32((len & TDES1_BUFFER1_SIZE_MASK));
 }
 
 /* Specific functions used for Chain mode */
@@ -94,32 +98,32 @@ static inline void norm_set_tx_desc_len_on_ring(struct dma_desc *p, int len)
 /* Enhanced descriptors */
 static inline void ehn_desc_rx_set_on_chain(struct dma_desc *p)
 {
-       p->des1 |= ERDES1_SECOND_ADDRESS_CHAINED;
+       p->des1 |= cpu_to_le32(ERDES1_SECOND_ADDRESS_CHAINED);
 }
 
 static inline void enh_desc_end_tx_desc_on_chain(struct dma_desc *p)
 {
-       p->des0 |= ETDES0_SECOND_ADDRESS_CHAINED;
+       p->des0 |= cpu_to_le32(ETDES0_SECOND_ADDRESS_CHAINED);
 }
 
 static inline void enh_set_tx_desc_len_on_chain(struct dma_desc *p, int len)
 {
-       p->des1 |= (len & ETDES1_BUFFER1_SIZE_MASK);
+       p->des1 |= cpu_to_le32(len & ETDES1_BUFFER1_SIZE_MASK);
 }
 
 /* Normal descriptors */
 static inline void ndesc_rx_set_on_chain(struct dma_desc *p, int end)
 {
-       p->des1 |= RDES1_SECOND_ADDRESS_CHAINED;
+       p->des1 |= cpu_to_le32(RDES1_SECOND_ADDRESS_CHAINED);
 }
 
 static inline void ndesc_tx_set_on_chain(struct dma_desc *p)
 {
-       p->des1 |= TDES1_SECOND_ADDRESS_CHAINED;
+       p->des1 |= cpu_to_le32(TDES1_SECOND_ADDRESS_CHAINED);
 }
 
 static inline void norm_set_tx_desc_len_on_chain(struct dma_desc *p, int len)
 {
-       p->des1 |= len & TDES1_BUFFER1_SIZE_MASK;
+       p->des1 |= cpu_to_le32(len & TDES1_BUFFER1_SIZE_MASK);
 }
 #endif /* __DESC_COM_H__ */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c
new file mode 100644 (file)
index 0000000..c355975
--- /dev/null
@@ -0,0 +1,217 @@
+/*
+ * Oxford Semiconductor OXNAS DWMAC glue layer
+ *
+ * Copyright (C) 2016 Neil Armstrong <narmstrong@baylibre.com>
+ * Copyright (C) 2014 Daniel Golle <daniel@makrotopia.org>
+ * Copyright (C) 2013 Ma Haijun <mahaijuns@gmail.com>
+ * Copyright (C) 2012 John Crispin <blogic@openwrt.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/mfd/syscon.h>
+#include <linux/stmmac.h>
+
+#include "stmmac_platform.h"
+
+/* System Control regmap offsets */
+#define OXNAS_DWMAC_CTRL_REGOFFSET     0x78
+#define OXNAS_DWMAC_DELAY_REGOFFSET    0x100
+
+/* Control Register */
+#define DWMAC_CKEN_RX_IN        14
+#define DWMAC_CKEN_RXN_OUT      13
+#define DWMAC_CKEN_RX_OUT       12
+#define DWMAC_CKEN_TX_IN        10
+#define DWMAC_CKEN_TXN_OUT      9
+#define DWMAC_CKEN_TX_OUT       8
+#define DWMAC_RX_SOURCE         7
+#define DWMAC_TX_SOURCE         6
+#define DWMAC_LOW_TX_SOURCE     4
+#define DWMAC_AUTO_TX_SOURCE    3
+#define DWMAC_RGMII             2
+#define DWMAC_SIMPLE_MUX        1
+#define DWMAC_CKEN_GTX          0
+
+/* Delay register */
+#define DWMAC_TX_VARDELAY_SHIFT                0
+#define DWMAC_TXN_VARDELAY_SHIFT       8
+#define DWMAC_RX_VARDELAY_SHIFT                16
+#define DWMAC_RXN_VARDELAY_SHIFT       24
+#define DWMAC_TX_VARDELAY(d)           ((d) << DWMAC_TX_VARDELAY_SHIFT)
+#define DWMAC_TXN_VARDELAY(d)          ((d) << DWMAC_TXN_VARDELAY_SHIFT)
+#define DWMAC_RX_VARDELAY(d)           ((d) << DWMAC_RX_VARDELAY_SHIFT)
+#define DWMAC_RXN_VARDELAY(d)          ((d) << DWMAC_RXN_VARDELAY_SHIFT)
+
+struct oxnas_dwmac {
+       struct device   *dev;
+       struct clk      *clk;
+       struct regmap   *regmap;
+};
+
+static int oxnas_dwmac_init(struct oxnas_dwmac *dwmac)
+{
+       unsigned int value;
+       int ret;
+
+       /* Reset HW here before changing the glue configuration */
+       ret = device_reset(dwmac->dev);
+       if (ret)
+               return ret;
+
+       ret = clk_prepare_enable(dwmac->clk);
+       if (ret)
+               return ret;
+
+       ret = regmap_read(dwmac->regmap, OXNAS_DWMAC_CTRL_REGOFFSET, &value);
+       if (ret < 0) {
+               clk_disable_unprepare(dwmac->clk);
+               return ret;
+       }
+
+       /* Enable GMII_GTXCLK to follow GMII_REFCLK, required for gigabit PHY */
+       value |= BIT(DWMAC_CKEN_GTX)            |
+                /* Use simple mux for 25/125 Mhz clock switching */
+                BIT(DWMAC_SIMPLE_MUX)          |
+                /* set auto switch tx clock source */
+                BIT(DWMAC_AUTO_TX_SOURCE)      |
+                /* enable tx & rx vardelay */
+                BIT(DWMAC_CKEN_TX_OUT)         |
+                BIT(DWMAC_CKEN_TXN_OUT)        |
+                BIT(DWMAC_CKEN_TX_IN)          |
+                BIT(DWMAC_CKEN_RX_OUT)         |
+                BIT(DWMAC_CKEN_RXN_OUT)        |
+                BIT(DWMAC_CKEN_RX_IN);
+       regmap_write(dwmac->regmap, OXNAS_DWMAC_CTRL_REGOFFSET, value);
+
+       /* set tx & rx vardelay */
+       value = DWMAC_TX_VARDELAY(4)    |
+               DWMAC_TXN_VARDELAY(2)   |
+               DWMAC_RX_VARDELAY(10)   |
+               DWMAC_RXN_VARDELAY(8);
+       regmap_write(dwmac->regmap, OXNAS_DWMAC_DELAY_REGOFFSET, value);
+
+       return 0;
+}
+
+static int oxnas_dwmac_probe(struct platform_device *pdev)
+{
+       struct plat_stmmacenet_data *plat_dat;
+       struct stmmac_resources stmmac_res;
+       struct device_node *sysctrl;
+       struct oxnas_dwmac *dwmac;
+       int ret;
+
+       sysctrl = of_parse_phandle(pdev->dev.of_node, "oxsemi,sys-ctrl", 0);
+       if (!sysctrl) {
+               dev_err(&pdev->dev, "failed to get sys-ctrl node\n");
+               return -EINVAL;
+       }
+
+       ret = stmmac_get_platform_resources(pdev, &stmmac_res);
+       if (ret)
+               return ret;
+
+       plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac);
+       if (IS_ERR(plat_dat))
+               return PTR_ERR(plat_dat);
+
+       dwmac = devm_kzalloc(&pdev->dev, sizeof(*dwmac), GFP_KERNEL);
+       if (!dwmac)
+               return -ENOMEM;
+
+       dwmac->dev = &pdev->dev;
+       plat_dat->bsp_priv = dwmac;
+
+       dwmac->regmap = syscon_node_to_regmap(sysctrl);
+       if (IS_ERR(dwmac->regmap)) {
+               dev_err(&pdev->dev, "failed to have sysctrl regmap\n");
+               return PTR_ERR(dwmac->regmap);
+       }
+
+       dwmac->clk = devm_clk_get(&pdev->dev, "gmac");
+       if (IS_ERR(dwmac->clk))
+               return PTR_ERR(dwmac->clk);
+
+       ret = oxnas_dwmac_init(dwmac);
+       if (ret)
+               return ret;
+
+       ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
+       if (ret)
+               clk_disable_unprepare(dwmac->clk);
+
+       return ret;
+}
+
+static int oxnas_dwmac_remove(struct platform_device *pdev)
+{
+       struct oxnas_dwmac *dwmac = get_stmmac_bsp_priv(&pdev->dev);
+       int ret = stmmac_dvr_remove(&pdev->dev);
+
+       clk_disable_unprepare(dwmac->clk);
+
+       return ret;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int oxnas_dwmac_suspend(struct device *dev)
+{
+       struct oxnas_dwmac *dwmac = get_stmmac_bsp_priv(dev);
+       int ret;
+
+       ret = stmmac_suspend(dev);
+       clk_disable_unprepare(dwmac->clk);
+
+       return ret;
+}
+
+static int oxnas_dwmac_resume(struct device *dev)
+{
+       struct oxnas_dwmac *dwmac = get_stmmac_bsp_priv(dev);
+       int ret;
+
+       ret = oxnas_dwmac_init(dwmac);
+       if (ret)
+               return ret;
+
+       ret = stmmac_resume(dev);
+
+       return ret;
+}
+#endif /* CONFIG_PM_SLEEP */
+
+static SIMPLE_DEV_PM_OPS(oxnas_dwmac_pm_ops,
+       oxnas_dwmac_suspend, oxnas_dwmac_resume);
+
+static const struct of_device_id oxnas_dwmac_match[] = {
+       { .compatible = "oxsemi,ox820-dwmac" },
+       { }
+};
+MODULE_DEVICE_TABLE(of, oxnas_dwmac_match);
+
+static struct platform_driver oxnas_dwmac_driver = {
+       .probe  = oxnas_dwmac_probe,
+       .remove = oxnas_dwmac_remove,
+       .driver = {
+               .name           = "oxnas-dwmac",
+               .pm             = &oxnas_dwmac_pm_ops,
+               .of_match_table = oxnas_dwmac_match,
+       },
+};
+module_platform_driver(oxnas_dwmac_driver);
+
+MODULE_AUTHOR("Neil Armstrong <narmstrong@baylibre.com>");
+MODULE_DESCRIPTION("Oxford Semiconductor OXNAS DWMAC glue layer");
+MODULE_LICENSE("GPL v2");
index 3740a4417fa0297c2631a08785dd9345e7cb0371..6b787d73b32ab94c84fb967a824577c06cc873a6 100644 (file)
@@ -901,44 +901,6 @@ static void rk_gmac_powerdown(struct rk_priv_data *gmac)
        gmac_clk_enable(gmac, false);
 }
 
-static int rk_gmac_init(struct platform_device *pdev, void *priv)
-{
-       struct rk_priv_data *bsp_priv = priv;
-
-       return rk_gmac_powerup(bsp_priv);
-}
-
-static void rk_gmac_exit(struct platform_device *pdev, void *priv)
-{
-       struct rk_priv_data *bsp_priv = priv;
-
-       rk_gmac_powerdown(bsp_priv);
-}
-
-static void rk_gmac_suspend(struct platform_device *pdev, void *priv)
-{
-       struct rk_priv_data *bsp_priv = priv;
-
-       /* Keep the PHY up if we use Wake-on-Lan. */
-       if (device_may_wakeup(&pdev->dev))
-               return;
-
-       rk_gmac_powerdown(bsp_priv);
-       bsp_priv->suspended = true;
-}
-
-static void rk_gmac_resume(struct platform_device *pdev, void *priv)
-{
-       struct rk_priv_data *bsp_priv = priv;
-
-       /* The PHY was up for Wake-on-Lan. */
-       if (!bsp_priv->suspended)
-               return;
-
-       rk_gmac_powerup(bsp_priv);
-       bsp_priv->suspended = false;
-}
-
 static void rk_fix_speed(void *priv, unsigned int speed)
 {
        struct rk_priv_data *bsp_priv = priv;
@@ -974,23 +936,60 @@ static int rk_gmac_probe(struct platform_device *pdev)
                return PTR_ERR(plat_dat);
 
        plat_dat->has_gmac = true;
-       plat_dat->init = rk_gmac_init;
-       plat_dat->exit = rk_gmac_exit;
        plat_dat->fix_mac_speed = rk_fix_speed;
-       plat_dat->suspend = rk_gmac_suspend;
-       plat_dat->resume = rk_gmac_resume;
 
        plat_dat->bsp_priv = rk_gmac_setup(pdev, data);
        if (IS_ERR(plat_dat->bsp_priv))
                return PTR_ERR(plat_dat->bsp_priv);
 
-       ret = rk_gmac_init(pdev, plat_dat->bsp_priv);
+       ret = rk_gmac_powerup(plat_dat->bsp_priv);
        if (ret)
                return ret;
 
        return stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
 }
 
+static int rk_gmac_remove(struct platform_device *pdev)
+{
+       struct rk_priv_data *bsp_priv = get_stmmac_bsp_priv(&pdev->dev);
+       int ret = stmmac_dvr_remove(&pdev->dev);
+
+       rk_gmac_powerdown(bsp_priv);
+
+       return ret;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int rk_gmac_suspend(struct device *dev)
+{
+       struct rk_priv_data *bsp_priv = get_stmmac_bsp_priv(dev);
+       int ret = stmmac_suspend(dev);
+
+       /* Keep the PHY up if we use Wake-on-Lan. */
+       if (!device_may_wakeup(dev)) {
+               rk_gmac_powerdown(bsp_priv);
+               bsp_priv->suspended = true;
+       }
+
+       return ret;
+}
+
+static int rk_gmac_resume(struct device *dev)
+{
+       struct rk_priv_data *bsp_priv = get_stmmac_bsp_priv(dev);
+
+       /* The PHY was up for Wake-on-Lan. */
+       if (bsp_priv->suspended) {
+               rk_gmac_powerup(bsp_priv);
+               bsp_priv->suspended = false;
+       }
+
+       return stmmac_resume(dev);
+}
+#endif /* CONFIG_PM_SLEEP */
+
+static SIMPLE_DEV_PM_OPS(rk_gmac_pm_ops, rk_gmac_suspend, rk_gmac_resume);
+
 static const struct of_device_id rk_gmac_dwmac_match[] = {
        { .compatible = "rockchip,rk3228-gmac", .data = &rk3228_ops },
        { .compatible = "rockchip,rk3288-gmac", .data = &rk3288_ops },
@@ -1003,10 +1002,10 @@ MODULE_DEVICE_TABLE(of, rk_gmac_dwmac_match);
 
 static struct platform_driver rk_gmac_dwmac_driver = {
        .probe  = rk_gmac_probe,
-       .remove = stmmac_pltfr_remove,
+       .remove = rk_gmac_remove,
        .driver = {
                .name           = "rk_gmac-dwmac",
-               .pm             = &stmmac_pltfr_pm_ops,
+               .pm             = &rk_gmac_pm_ops,
                .of_match_table = rk_gmac_dwmac_match,
        },
 };
index 58c05acc2aabbdf63419874605ae11af298471ca..c9006ab083d5a08a799d4493a677221abc5d41a4 100644 (file)
@@ -126,8 +126,8 @@ struct sti_dwmac {
        struct clk *clk;        /* PHY clock */
        u32 ctrl_reg;           /* GMAC glue-logic control register */
        int clk_sel_reg;        /* GMAC ext clk selection register */
-       struct device *dev;
        struct regmap *regmap;
+       bool gmac_en;
        u32 speed;
        void (*fix_retime_src)(void *priv, unsigned int speed);
 };
@@ -191,7 +191,7 @@ static void stih4xx_fix_retime_src(void *priv, u32 spd)
                }
        }
 
-       if (src == TX_RETIME_SRC_CLKGEN && dwmac->clk && freq)
+       if (src == TX_RETIME_SRC_CLKGEN && freq)
                clk_set_rate(dwmac->clk, freq);
 
        regmap_update_bits(dwmac->regmap, reg, STIH4XX_RETIME_SRC_MASK,
@@ -222,26 +222,20 @@ static void stid127_fix_retime_src(void *priv, u32 spd)
                        freq = DWMAC_2_5MHZ;
        }
 
-       if (dwmac->clk && freq)
+       if (freq)
                clk_set_rate(dwmac->clk, freq);
 
        regmap_update_bits(dwmac->regmap, reg, STID127_RETIME_SRC_MASK, val);
 }
 
-static int sti_dwmac_init(struct platform_device *pdev, void *priv)
+static int sti_dwmac_set_mode(struct sti_dwmac *dwmac)
 {
-       struct sti_dwmac *dwmac = priv;
        struct regmap *regmap = dwmac->regmap;
        int iface = dwmac->interface;
-       struct device *dev = dwmac->dev;
-       struct device_node *np = dev->of_node;
        u32 reg = dwmac->ctrl_reg;
        u32 val;
 
-       if (dwmac->clk)
-               clk_prepare_enable(dwmac->clk);
-
-       if (of_property_read_bool(np, "st,gmac_en"))
+       if (dwmac->gmac_en)
                regmap_update_bits(regmap, reg, EN_MASK, EN);
 
        regmap_update_bits(regmap, reg, MII_PHY_SEL_MASK, phy_intf_sels[iface]);
@@ -249,18 +243,11 @@ static int sti_dwmac_init(struct platform_device *pdev, void *priv)
        val = (iface == PHY_INTERFACE_MODE_REVMII) ? 0 : ENMII;
        regmap_update_bits(regmap, reg, ENMII_MASK, val);
 
-       dwmac->fix_retime_src(priv, dwmac->speed);
+       dwmac->fix_retime_src(dwmac, dwmac->speed);
 
        return 0;
 }
 
-static void sti_dwmac_exit(struct platform_device *pdev, void *priv)
-{
-       struct sti_dwmac *dwmac = priv;
-
-       if (dwmac->clk)
-               clk_disable_unprepare(dwmac->clk);
-}
 static int sti_dwmac_parse_data(struct sti_dwmac *dwmac,
                                struct platform_device *pdev)
 {
@@ -270,9 +257,6 @@ static int sti_dwmac_parse_data(struct sti_dwmac *dwmac,
        struct regmap *regmap;
        int err;
 
-       if (!np)
-               return -EINVAL;
-
        /* clk selection from extra syscfg register */
        dwmac->clk_sel_reg = -ENXIO;
        res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "sti-clkconf");
@@ -289,9 +273,9 @@ static int sti_dwmac_parse_data(struct sti_dwmac *dwmac,
                return err;
        }
 
-       dwmac->dev = dev;
        dwmac->interface = of_get_phy_mode(np);
        dwmac->regmap = regmap;
+       dwmac->gmac_en = of_property_read_bool(np, "st,gmac_en");
        dwmac->ext_phyclk = of_property_read_bool(np, "st,ext-phyclk");
        dwmac->tx_retime_src = TX_RETIME_SRC_NA;
        dwmac->speed = SPEED_100;
@@ -357,17 +341,62 @@ static int sti_dwmac_probe(struct platform_device *pdev)
        dwmac->fix_retime_src = data->fix_retime_src;
 
        plat_dat->bsp_priv = dwmac;
-       plat_dat->init = sti_dwmac_init;
-       plat_dat->exit = sti_dwmac_exit;
        plat_dat->fix_mac_speed = data->fix_retime_src;
 
-       ret = sti_dwmac_init(pdev, plat_dat->bsp_priv);
+       ret = clk_prepare_enable(dwmac->clk);
        if (ret)
                return ret;
 
-       return stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
+       ret = sti_dwmac_set_mode(dwmac);
+       if (ret)
+               goto disable_clk;
+
+       ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
+       if (ret)
+               goto disable_clk;
+
+       return 0;
+
+disable_clk:
+       clk_disable_unprepare(dwmac->clk);
+       return ret;
 }
 
+static int sti_dwmac_remove(struct platform_device *pdev)
+{
+       struct sti_dwmac *dwmac = get_stmmac_bsp_priv(&pdev->dev);
+       int ret = stmmac_dvr_remove(&pdev->dev);
+
+       clk_disable_unprepare(dwmac->clk);
+
+       return ret;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int sti_dwmac_suspend(struct device *dev)
+{
+       struct sti_dwmac *dwmac = get_stmmac_bsp_priv(dev);
+       int ret = stmmac_suspend(dev);
+
+       clk_disable_unprepare(dwmac->clk);
+
+       return ret;
+}
+
+static int sti_dwmac_resume(struct device *dev)
+{
+       struct sti_dwmac *dwmac = get_stmmac_bsp_priv(dev);
+
+       clk_prepare_enable(dwmac->clk);
+       sti_dwmac_set_mode(dwmac);
+
+       return stmmac_resume(dev);
+}
+#endif /* CONFIG_PM_SLEEP */
+
+static SIMPLE_DEV_PM_OPS(sti_dwmac_pm_ops, sti_dwmac_suspend,
+                                          sti_dwmac_resume);
+
 static const struct sti_dwmac_of_data stih4xx_dwmac_data = {
        .fix_retime_src = stih4xx_fix_retime_src,
 };
@@ -387,10 +416,10 @@ MODULE_DEVICE_TABLE(of, sti_dwmac_match);
 
 static struct platform_driver sti_dwmac_driver = {
        .probe  = sti_dwmac_probe,
-       .remove = stmmac_pltfr_remove,
+       .remove = sti_dwmac_remove,
        .driver = {
                .name           = "sti-dwmac",
-               .pm             = &stmmac_pltfr_pm_ops,
+               .pm             = &sti_dwmac_pm_ops,
                .of_match_table = sti_dwmac_match,
        },
 };
index 6f4f5ce2511466e1b2547544b3010eaa07c3b706..3e8d4fefa5e0cb0131415bf4b6862620d4515abc 100644 (file)
@@ -155,8 +155,11 @@ enum power_event {
 #define MTL_CHAN_RX_DEBUG(x)           (MTL_CHANX_BASE_ADDR(x) + 0x38)
 
 #define MTL_OP_MODE_RSF                        BIT(5)
+#define MTL_OP_MODE_TXQEN              BIT(3)
 #define MTL_OP_MODE_TSF                        BIT(1)
 
+#define MTL_OP_MODE_TQS_MASK           GENMASK(24, 16)
+
 #define MTL_OP_MODE_TTC_MASK           0x70
 #define MTL_OP_MODE_TTC_SHIFT          4
 
index 4ec7397e7fb378d1d82368c5fb9aff28e5340c6c..a340fc8bd0debf305ff2cb2d6b72b1925ec8b86e 100644 (file)
@@ -23,7 +23,7 @@ static int dwmac4_wrback_get_tx_status(void *data, struct stmmac_extra_stats *x,
        unsigned int tdes3;
        int ret = tx_done;
 
-       tdes3 = p->des3;
+       tdes3 = le32_to_cpu(p->des3);
 
        /* Get tx owner first */
        if (unlikely(tdes3 & TDES3_OWN))
@@ -77,9 +77,9 @@ static int dwmac4_wrback_get_rx_status(void *data, struct stmmac_extra_stats *x,
                                       struct dma_desc *p)
 {
        struct net_device_stats *stats = (struct net_device_stats *)data;
-       unsigned int rdes1 = p->des1;
-       unsigned int rdes2 = p->des2;
-       unsigned int rdes3 = p->des3;
+       unsigned int rdes1 = le32_to_cpu(p->des1);
+       unsigned int rdes2 = le32_to_cpu(p->des2);
+       unsigned int rdes3 = le32_to_cpu(p->des3);
        int message_type;
        int ret = good_frame;
 
@@ -123,22 +123,29 @@ static int dwmac4_wrback_get_rx_status(void *data, struct stmmac_extra_stats *x,
                x->ipv4_pkt_rcvd++;
        if (rdes1 & RDES1_IPV6_HEADER)
                x->ipv6_pkt_rcvd++;
-       if (message_type == RDES_EXT_SYNC)
-               x->rx_msg_type_sync++;
+
+       if (message_type == RDES_EXT_NO_PTP)
+               x->no_ptp_rx_msg_type_ext++;
+       else if (message_type == RDES_EXT_SYNC)
+               x->ptp_rx_msg_type_sync++;
        else if (message_type == RDES_EXT_FOLLOW_UP)
-               x->rx_msg_type_follow_up++;
+               x->ptp_rx_msg_type_follow_up++;
        else if (message_type == RDES_EXT_DELAY_REQ)
-               x->rx_msg_type_delay_req++;
+               x->ptp_rx_msg_type_delay_req++;
        else if (message_type == RDES_EXT_DELAY_RESP)
-               x->rx_msg_type_delay_resp++;
+               x->ptp_rx_msg_type_delay_resp++;
        else if (message_type == RDES_EXT_PDELAY_REQ)
-               x->rx_msg_type_pdelay_req++;
+               x->ptp_rx_msg_type_pdelay_req++;
        else if (message_type == RDES_EXT_PDELAY_RESP)
-               x->rx_msg_type_pdelay_resp++;
+               x->ptp_rx_msg_type_pdelay_resp++;
        else if (message_type == RDES_EXT_PDELAY_FOLLOW_UP)
-               x->rx_msg_type_pdelay_follow_up++;
-       else
-               x->rx_msg_type_ext_no_ptp++;
+               x->ptp_rx_msg_type_pdelay_follow_up++;
+       else if (message_type == RDES_PTP_ANNOUNCE)
+               x->ptp_rx_msg_type_announce++;
+       else if (message_type == RDES_PTP_MANAGEMENT)
+               x->ptp_rx_msg_type_management++;
+       else if (message_type == RDES_PTP_PKT_RESERVED_TYPE)
+               x->ptp_rx_msg_pkt_reserved_type++;
 
        if (rdes1 & RDES1_PTP_PACKET_TYPE)
                x->ptp_frame_type++;
@@ -169,75 +176,122 @@ static int dwmac4_wrback_get_rx_status(void *data, struct stmmac_extra_stats *x,
 
 static int dwmac4_rd_get_tx_len(struct dma_desc *p)
 {
-       return (p->des2 & TDES2_BUFFER1_SIZE_MASK);
+       return (le32_to_cpu(p->des2) & TDES2_BUFFER1_SIZE_MASK);
 }
 
 static int dwmac4_get_tx_owner(struct dma_desc *p)
 {
-       return (p->des3 & TDES3_OWN) >> TDES3_OWN_SHIFT;
+       return (le32_to_cpu(p->des3) & TDES3_OWN) >> TDES3_OWN_SHIFT;
 }
 
 static void dwmac4_set_tx_owner(struct dma_desc *p)
 {
-       p->des3 |= TDES3_OWN;
+       p->des3 |= cpu_to_le32(TDES3_OWN);
 }
 
 static void dwmac4_set_rx_owner(struct dma_desc *p)
 {
-       p->des3 |= RDES3_OWN;
+       p->des3 |= cpu_to_le32(RDES3_OWN);
 }
 
 static int dwmac4_get_tx_ls(struct dma_desc *p)
 {
-       return (p->des3 & TDES3_LAST_DESCRIPTOR) >> TDES3_LAST_DESCRIPTOR_SHIFT;
+       return (le32_to_cpu(p->des3) & TDES3_LAST_DESCRIPTOR)
+               >> TDES3_LAST_DESCRIPTOR_SHIFT;
 }
 
 static int dwmac4_wrback_get_rx_frame_len(struct dma_desc *p, int rx_coe)
 {
-       return (p->des3 & RDES3_PACKET_SIZE_MASK);
+       return (le32_to_cpu(p->des3) & RDES3_PACKET_SIZE_MASK);
 }
 
 static void dwmac4_rd_enable_tx_timestamp(struct dma_desc *p)
 {
-       p->des2 |= TDES2_TIMESTAMP_ENABLE;
+       p->des2 |= cpu_to_le32(TDES2_TIMESTAMP_ENABLE);
 }
 
 static int dwmac4_wrback_get_tx_timestamp_status(struct dma_desc *p)
 {
-       return (p->des3 & TDES3_TIMESTAMP_STATUS)
-               >> TDES3_TIMESTAMP_STATUS_SHIFT;
+       /* Context type from W/B descriptor must be zero */
+       if (le32_to_cpu(p->des3) & TDES3_CONTEXT_TYPE)
+               return -EINVAL;
+
+       /* Tx Timestamp Status is 1 so des0 and des1'll have valid values */
+       if (le32_to_cpu(p->des3) & TDES3_TIMESTAMP_STATUS)
+               return 0;
+
+       return 1;
 }
 
-/*  NOTE: For RX CTX bit has to be checked before
- *  HAVE a specific function for TX and another one for RX
- */
-static u64 dwmac4_wrback_get_timestamp(void *desc, u32 ats)
+static inline u64 dwmac4_get_timestamp(void *desc, u32 ats)
 {
        struct dma_desc *p = (struct dma_desc *)desc;
        u64 ns;
 
-       ns = p->des0;
+       ns = le32_to_cpu(p->des0);
        /* convert high/sec time stamp value to nanosecond */
-       ns += p->des1 * 1000000000ULL;
+       ns += le32_to_cpu(p->des1) * 1000000000ULL;
 
        return ns;
 }
 
-static int dwmac4_context_get_rx_timestamp_status(void *desc, u32 ats)
+static int dwmac4_rx_check_timestamp(void *desc)
+{
+       struct dma_desc *p = (struct dma_desc *)desc;
+       u32 own, ctxt;
+       int ret = 1;
+
+       own = p->des3 & RDES3_OWN;
+       ctxt = ((p->des3 & RDES3_CONTEXT_DESCRIPTOR)
+               >> RDES3_CONTEXT_DESCRIPTOR_SHIFT);
+
+       if (likely(!own && ctxt)) {
+               if ((p->des0 == 0xffffffff) && (p->des1 == 0xffffffff))
+                       /* Corrupted value */
+                       ret = -EINVAL;
+               else
+                       /* A valid Timestamp is ready to be read */
+                       ret = 0;
+       }
+
+       /* Timestamp not ready */
+       return ret;
+}
+
+static int dwmac4_wrback_get_rx_timestamp_status(void *desc, u32 ats)
 {
        struct dma_desc *p = (struct dma_desc *)desc;
+       int ret = -EINVAL;
+
+       /* Get the status from normal w/b descriptor */
+       if (likely(p->des3 & TDES3_RS1V)) {
+               if (likely(le32_to_cpu(p->des1) & RDES1_TIMESTAMP_AVAILABLE)) {
+                       int i = 0;
+
+                       /* Check if timestamp is OK from context descriptor */
+                       do {
+                               ret = dwmac4_rx_check_timestamp(desc);
+                               if (ret < 0)
+                                       goto exit;
+                               i++;
 
-       return (p->des1 & RDES1_TIMESTAMP_AVAILABLE)
-               >> RDES1_TIMESTAMP_AVAILABLE_SHIFT;
+                       } while ((ret == 1) || (i < 10));
+
+                       if (i == 10)
+                               ret = -EBUSY;
+               }
+       }
+exit:
+       return ret;
 }
 
 static void dwmac4_rd_init_rx_desc(struct dma_desc *p, int disable_rx_ic,
                                   int mode, int end)
 {
-       p->des3 = RDES3_OWN | RDES3_BUFFER1_VALID_ADDR;
+       p->des3 = cpu_to_le32(RDES3_OWN | RDES3_BUFFER1_VALID_ADDR);
 
        if (!disable_rx_ic)
-               p->des3 |= RDES3_INT_ON_COMPLETION_EN;
+               p->des3 |= cpu_to_le32(RDES3_INT_ON_COMPLETION_EN);
 }
 
 static void dwmac4_rd_init_tx_desc(struct dma_desc *p, int mode, int end)
@@ -252,9 +306,9 @@ static void dwmac4_rd_prepare_tx_desc(struct dma_desc *p, int is_fs, int len,
                                      bool csum_flag, int mode, bool tx_own,
                                      bool ls)
 {
-       unsigned int tdes3 = p->des3;
+       unsigned int tdes3 = le32_to_cpu(p->des3);
 
-       p->des2 |= (len & TDES2_BUFFER1_SIZE_MASK);
+       p->des2 |= cpu_to_le32(len & TDES2_BUFFER1_SIZE_MASK);
 
        if (is_fs)
                tdes3 |= TDES3_FIRST_DESCRIPTOR;
@@ -282,7 +336,7 @@ static void dwmac4_rd_prepare_tx_desc(struct dma_desc *p, int is_fs, int len,
                 */
                wmb();
 
-       p->des3 = tdes3;
+       p->des3 = cpu_to_le32(tdes3);
 }
 
 static void dwmac4_rd_prepare_tso_tx_desc(struct dma_desc *p, int is_fs,
@@ -290,14 +344,14 @@ static void dwmac4_rd_prepare_tso_tx_desc(struct dma_desc *p, int is_fs,
                                          bool ls, unsigned int tcphdrlen,
                                          unsigned int tcppayloadlen)
 {
-       unsigned int tdes3 = p->des3;
+       unsigned int tdes3 = le32_to_cpu(p->des3);
 
        if (len1)
-               p->des2 |= (len1 & TDES2_BUFFER1_SIZE_MASK);
+               p->des2 |= cpu_to_le32((len1 & TDES2_BUFFER1_SIZE_MASK));
 
        if (len2)
-               p->des2 |= (len2 << TDES2_BUFFER2_SIZE_MASK_SHIFT)
-                           & TDES2_BUFFER2_SIZE_MASK;
+               p->des2 |= cpu_to_le32((len2 << TDES2_BUFFER2_SIZE_MASK_SHIFT)
+                           & TDES2_BUFFER2_SIZE_MASK);
 
        if (is_fs) {
                tdes3 |= TDES3_FIRST_DESCRIPTOR |
@@ -325,7 +379,7 @@ static void dwmac4_rd_prepare_tso_tx_desc(struct dma_desc *p, int is_fs,
                 */
                wmb();
 
-       p->des3 = tdes3;
+       p->des3 = cpu_to_le32(tdes3);
 }
 
 static void dwmac4_release_tx_desc(struct dma_desc *p, int mode)
@@ -336,7 +390,7 @@ static void dwmac4_release_tx_desc(struct dma_desc *p, int mode)
 
 static void dwmac4_rd_set_tx_ic(struct dma_desc *p)
 {
-       p->des2 |= TDES2_INTERRUPT_ON_COMPLETION;
+       p->des2 |= cpu_to_le32(TDES2_INTERRUPT_ON_COMPLETION);
 }
 
 static void dwmac4_display_ring(void *head, unsigned int size, bool rx)
@@ -347,10 +401,10 @@ static void dwmac4_display_ring(void *head, unsigned int size, bool rx)
        pr_info("%s descriptor ring:\n", rx ? "RX" : "TX");
 
        for (i = 0; i < size; i++) {
-               if (p->des0)
-                       pr_info("%d [0x%x]: 0x%x 0x%x 0x%x 0x%x\n",
-                               i, (unsigned int)virt_to_phys(p),
-                               p->des0, p->des1, p->des2, p->des3);
+               pr_info("%d [0x%x]: 0x%x 0x%x 0x%x 0x%x\n",
+                       i, (unsigned int)virt_to_phys(p),
+                       le32_to_cpu(p->des0), le32_to_cpu(p->des1),
+                       le32_to_cpu(p->des2), le32_to_cpu(p->des3));
                p++;
        }
 }
@@ -359,8 +413,8 @@ static void dwmac4_set_mss_ctxt(struct dma_desc *p, unsigned int mss)
 {
        p->des0 = 0;
        p->des1 = 0;
-       p->des2 = mss;
-       p->des3 = TDES3_CONTEXT_TYPE | TDES3_CTXT_TCMSSV;
+       p->des2 = cpu_to_le32(mss);
+       p->des3 = cpu_to_le32(TDES3_CONTEXT_TYPE | TDES3_CTXT_TCMSSV);
 }
 
 const struct stmmac_desc_ops dwmac4_desc_ops = {
@@ -374,8 +428,8 @@ const struct stmmac_desc_ops dwmac4_desc_ops = {
        .get_rx_frame_len = dwmac4_wrback_get_rx_frame_len,
        .enable_tx_timestamp = dwmac4_rd_enable_tx_timestamp,
        .get_tx_timestamp_status = dwmac4_wrback_get_tx_timestamp_status,
-       .get_timestamp = dwmac4_wrback_get_timestamp,
-       .get_rx_timestamp_status = dwmac4_context_get_rx_timestamp_status,
+       .get_rx_timestamp_status = dwmac4_wrback_get_rx_timestamp_status,
+       .get_timestamp = dwmac4_get_timestamp,
        .set_tx_ic = dwmac4_rd_set_tx_ic,
        .prepare_tx_desc = dwmac4_rd_prepare_tx_desc,
        .prepare_tso_tx_desc = dwmac4_rd_prepare_tso_tx_desc,
index 0902a2edeaa9414cedd370f196d6bddab0c11c3a..9736c505211add1db476c3426c701a04961f4ed4 100644 (file)
 #define TDES3_CTXT_TCMSSV              BIT(26)
 
 /* TDES3 Common */
+#define        TDES3_RS1V                      BIT(26)
+#define        TDES3_RS1V_SHIFT                26
 #define TDES3_LAST_DESCRIPTOR          BIT(28)
 #define TDES3_LAST_DESCRIPTOR_SHIFT    28
 #define TDES3_FIRST_DESCRIPTOR         BIT(29)
 #define TDES3_CONTEXT_TYPE             BIT(30)
+#define        TDES3_CONTEXT_TYPE_SHIFT        30
 
 /* TDS3 use for both format (read and write back) */
 #define TDES3_OWN                      BIT(31)
 #define RDES3_LAST_DESCRIPTOR          BIT(28)
 #define RDES3_FIRST_DESCRIPTOR         BIT(29)
 #define RDES3_CONTEXT_DESCRIPTOR       BIT(30)
+#define RDES3_CONTEXT_DESCRIPTOR_SHIFT 30
 
 /* RDES3 (read format) */
 #define RDES3_BUFFER1_VALID_ADDR       BIT(24)
index 116151cd6a952378ba8bdc297d8e2e568edabc97..577316de6ba8716f0bdc8bee0b2da10f5aa0f2a3 100644 (file)
@@ -213,7 +213,17 @@ static void dwmac4_dma_chan_op_mode(void __iomem *ioaddr, int txmode,
                else
                        mtl_tx_op |= MTL_OP_MODE_TTC_512;
        }
-
+       /* For an IP with DWC_EQOS_NUM_TXQ == 1, the fields TXQEN and TQS are RO
+        * with reset values: TXQEN on, TQS == DWC_EQOS_TXFIFO_SIZE.
+        * For an IP with DWC_EQOS_NUM_TXQ > 1, the fields TXQEN and TQS are R/W
+        * with reset values: TXQEN off, TQS 256 bytes.
+        *
+        * Write the bits in both cases, since it will have no effect when RO.
+        * For DWC_EQOS_NUM_TXQ > 1, the top bits in MTL_OP_MODE_TQS_MASK might
+        * be RO, however, writing the whole TQS field will result in a value
+        * equal to DWC_EQOS_TXFIFO_SIZE, just like for DWC_EQOS_NUM_TXQ == 1.
+        */
+       mtl_tx_op |= MTL_OP_MODE_TXQEN | MTL_OP_MODE_TQS_MASK;
        writel(mtl_tx_op, ioaddr +  MTL_CHAN_TX_OP_MODE(channel));
 
        mtl_rx_op = readl(ioaddr + MTL_CHAN_RX_OP_MODE(channel));
index 38f19c99cf59e7382cc6a72d15deb2a50f1c4b96..ce97e522566a8910e05707870d5c3f511a20ee3b 100644 (file)
@@ -30,7 +30,7 @@ static int enh_desc_get_tx_status(void *data, struct stmmac_extra_stats *x,
                                  struct dma_desc *p, void __iomem *ioaddr)
 {
        struct net_device_stats *stats = (struct net_device_stats *)data;
-       unsigned int tdes0 = p->des0;
+       unsigned int tdes0 = le32_to_cpu(p->des0);
        int ret = tx_done;
 
        /* Get tx owner first */
@@ -95,7 +95,7 @@ static int enh_desc_get_tx_status(void *data, struct stmmac_extra_stats *x,
 
 static int enh_desc_get_tx_len(struct dma_desc *p)
 {
-       return (p->des1 & ETDES1_BUFFER1_SIZE_MASK);
+       return (le32_to_cpu(p->des1) & ETDES1_BUFFER1_SIZE_MASK);
 }
 
 static int enh_desc_coe_rdes0(int ipc_err, int type, int payload_err)
@@ -134,8 +134,8 @@ static int enh_desc_coe_rdes0(int ipc_err, int type, int payload_err)
 static void enh_desc_get_ext_status(void *data, struct stmmac_extra_stats *x,
                                    struct dma_extended_desc *p)
 {
-       unsigned int rdes0 = p->basic.des0;
-       unsigned int rdes4 = p->des4;
+       unsigned int rdes0 = le32_to_cpu(p->basic.des0);
+       unsigned int rdes4 = le32_to_cpu(p->des4);
 
        if (unlikely(rdes0 & ERDES0_RX_MAC_ADDR)) {
                int message_type = (rdes4 & ERDES4_MSG_TYPE_MASK) >> 8;
@@ -150,22 +150,30 @@ static void enh_desc_get_ext_status(void *data, struct stmmac_extra_stats *x,
                        x->ipv4_pkt_rcvd++;
                if (rdes4 & ERDES4_IPV6_PKT_RCVD)
                        x->ipv6_pkt_rcvd++;
-               if (message_type == RDES_EXT_SYNC)
-                       x->rx_msg_type_sync++;
+
+               if (message_type == RDES_EXT_NO_PTP)
+                       x->no_ptp_rx_msg_type_ext++;
+               else if (message_type == RDES_EXT_SYNC)
+                       x->ptp_rx_msg_type_sync++;
                else if (message_type == RDES_EXT_FOLLOW_UP)
-                       x->rx_msg_type_follow_up++;
+                       x->ptp_rx_msg_type_follow_up++;
                else if (message_type == RDES_EXT_DELAY_REQ)
-                       x->rx_msg_type_delay_req++;
+                       x->ptp_rx_msg_type_delay_req++;
                else if (message_type == RDES_EXT_DELAY_RESP)
-                       x->rx_msg_type_delay_resp++;
+                       x->ptp_rx_msg_type_delay_resp++;
                else if (message_type == RDES_EXT_PDELAY_REQ)
-                       x->rx_msg_type_pdelay_req++;
+                       x->ptp_rx_msg_type_pdelay_req++;
                else if (message_type == RDES_EXT_PDELAY_RESP)
-                       x->rx_msg_type_pdelay_resp++;
+                       x->ptp_rx_msg_type_pdelay_resp++;
                else if (message_type == RDES_EXT_PDELAY_FOLLOW_UP)
-                       x->rx_msg_type_pdelay_follow_up++;
-               else
-                       x->rx_msg_type_ext_no_ptp++;
+                       x->ptp_rx_msg_type_pdelay_follow_up++;
+               else if (message_type == RDES_PTP_ANNOUNCE)
+                       x->ptp_rx_msg_type_announce++;
+               else if (message_type == RDES_PTP_MANAGEMENT)
+                       x->ptp_rx_msg_type_management++;
+               else if (message_type == RDES_PTP_PKT_RESERVED_TYPE)
+                       x->ptp_rx_msg_pkt_reserved_type++;
+
                if (rdes4 & ERDES4_PTP_FRAME_TYPE)
                        x->ptp_frame_type++;
                if (rdes4 & ERDES4_PTP_VER)
@@ -191,7 +199,7 @@ static int enh_desc_get_rx_status(void *data, struct stmmac_extra_stats *x,
                                  struct dma_desc *p)
 {
        struct net_device_stats *stats = (struct net_device_stats *)data;
-       unsigned int rdes0 = p->des0;
+       unsigned int rdes0 = le32_to_cpu(p->des0);
        int ret = good_frame;
 
        if (unlikely(rdes0 & RDES0_OWN))
@@ -257,8 +265,8 @@ static int enh_desc_get_rx_status(void *data, struct stmmac_extra_stats *x,
 static void enh_desc_init_rx_desc(struct dma_desc *p, int disable_rx_ic,
                                  int mode, int end)
 {
-       p->des0 |= RDES0_OWN;
-       p->des1 |= ((BUF_SIZE_8KiB - 1) & ERDES1_BUFFER1_SIZE_MASK);
+       p->des0 |= cpu_to_le32(RDES0_OWN);
+       p->des1 |= cpu_to_le32((BUF_SIZE_8KiB - 1) & ERDES1_BUFFER1_SIZE_MASK);
 
        if (mode == STMMAC_CHAIN_MODE)
                ehn_desc_rx_set_on_chain(p);
@@ -266,12 +274,12 @@ static void enh_desc_init_rx_desc(struct dma_desc *p, int disable_rx_ic,
                ehn_desc_rx_set_on_ring(p, end);
 
        if (disable_rx_ic)
-               p->des1 |= ERDES1_DISABLE_IC;
+               p->des1 |= cpu_to_le32(ERDES1_DISABLE_IC);
 }
 
 static void enh_desc_init_tx_desc(struct dma_desc *p, int mode, int end)
 {
-       p->des0 &= ~ETDES0_OWN;
+       p->des0 &= cpu_to_le32(~ETDES0_OWN);
        if (mode == STMMAC_CHAIN_MODE)
                enh_desc_end_tx_desc_on_chain(p);
        else
@@ -280,27 +288,27 @@ static void enh_desc_init_tx_desc(struct dma_desc *p, int mode, int end)
 
 static int enh_desc_get_tx_owner(struct dma_desc *p)
 {
-       return (p->des0 & ETDES0_OWN) >> 31;
+       return (le32_to_cpu(p->des0) & ETDES0_OWN) >> 31;
 }
 
 static void enh_desc_set_tx_owner(struct dma_desc *p)
 {
-       p->des0 |= ETDES0_OWN;
+       p->des0 |= cpu_to_le32(ETDES0_OWN);
 }
 
 static void enh_desc_set_rx_owner(struct dma_desc *p)
 {
-       p->des0 |= RDES0_OWN;
+       p->des0 |= cpu_to_le32(RDES0_OWN);
 }
 
 static int enh_desc_get_tx_ls(struct dma_desc *p)
 {
-       return (p->des0 & ETDES0_LAST_SEGMENT) >> 29;
+       return (le32_to_cpu(p->des0) & ETDES0_LAST_SEGMENT) >> 29;
 }
 
 static void enh_desc_release_tx_desc(struct dma_desc *p, int mode)
 {
-       int ter = (p->des0 & ETDES0_END_RING) >> 21;
+       int ter = (le32_to_cpu(p->des0) & ETDES0_END_RING) >> 21;
 
        memset(p, 0, offsetof(struct dma_desc, des2));
        if (mode == STMMAC_CHAIN_MODE)
@@ -313,7 +321,7 @@ static void enh_desc_prepare_tx_desc(struct dma_desc *p, int is_fs, int len,
                                     bool csum_flag, int mode, bool tx_own,
                                     bool ls)
 {
-       unsigned int tdes0 = p->des0;
+       unsigned int tdes0 = le32_to_cpu(p->des0);
 
        if (mode == STMMAC_CHAIN_MODE)
                enh_set_tx_desc_len_on_chain(p, len);
@@ -344,12 +352,12 @@ static void enh_desc_prepare_tx_desc(struct dma_desc *p, int is_fs, int len,
                 */
                wmb();
 
-       p->des0 = tdes0;
+       p->des0 = cpu_to_le32(tdes0);
 }
 
 static void enh_desc_set_tx_ic(struct dma_desc *p)
 {
-       p->des0 |= ETDES0_INTERRUPT;
+       p->des0 |= cpu_to_le32(ETDES0_INTERRUPT);
 }
 
 static int enh_desc_get_rx_frame_len(struct dma_desc *p, int rx_coe_type)
@@ -364,18 +372,18 @@ static int enh_desc_get_rx_frame_len(struct dma_desc *p, int rx_coe_type)
        if (rx_coe_type == STMMAC_RX_COE_TYPE1)
                csum = 2;
 
-       return (((p->des0 & RDES0_FRAME_LEN_MASK) >> RDES0_FRAME_LEN_SHIFT) -
-               csum);
+       return (((le32_to_cpu(p->des0) & RDES0_FRAME_LEN_MASK)
+                               >> RDES0_FRAME_LEN_SHIFT) - csum);
 }
 
 static void enh_desc_enable_tx_timestamp(struct dma_desc *p)
 {
-       p->des0 |= ETDES0_TIME_STAMP_ENABLE;
+       p->des0 |= cpu_to_le32(ETDES0_TIME_STAMP_ENABLE);
 }
 
 static int enh_desc_get_tx_timestamp_status(struct dma_desc *p)
 {
-       return (p->des0 & ETDES0_TIME_STAMP_STATUS) >> 17;
+       return (le32_to_cpu(p->des0) & ETDES0_TIME_STAMP_STATUS) >> 17;
 }
 
 static u64 enh_desc_get_timestamp(void *desc, u32 ats)
@@ -384,13 +392,13 @@ static u64 enh_desc_get_timestamp(void *desc, u32 ats)
 
        if (ats) {
                struct dma_extended_desc *p = (struct dma_extended_desc *)desc;
-               ns = p->des6;
+               ns = le32_to_cpu(p->des6);
                /* convert high/sec time stamp value to nanosecond */
-               ns += p->des7 * 1000000000ULL;
+               ns += le32_to_cpu(p->des7) * 1000000000ULL;
        } else {
                struct dma_desc *p = (struct dma_desc *)desc;
-               ns = p->des2;
-               ns += p->des3 * 1000000000ULL;
+               ns = le32_to_cpu(p->des2);
+               ns += le32_to_cpu(p->des3) * 1000000000ULL;
        }
 
        return ns;
@@ -400,10 +408,11 @@ static int enh_desc_get_rx_timestamp_status(void *desc, u32 ats)
 {
        if (ats) {
                struct dma_extended_desc *p = (struct dma_extended_desc *)desc;
-               return (p->basic.des0 & RDES0_IPC_CSUM_ERROR) >> 7;
+               return (le32_to_cpu(p->basic.des0) & RDES0_IPC_CSUM_ERROR) >> 7;
        } else {
                struct dma_desc *p = (struct dma_desc *)desc;
-               if ((p->des2 == 0xffffffff) && (p->des3 == 0xffffffff))
+               if ((le32_to_cpu(p->des2) == 0xffffffff) &&
+                   (le32_to_cpu(p->des3) == 0xffffffff))
                        /* timestamp is corrupted, hence don't store it */
                        return 0;
                else
index 2beacd0d3043a77b163456912d9d8a5378593858..fd78406e2e9afb198f0f0c6645e162be69ef713d 100644 (file)
@@ -30,8 +30,8 @@ static int ndesc_get_tx_status(void *data, struct stmmac_extra_stats *x,
                               struct dma_desc *p, void __iomem *ioaddr)
 {
        struct net_device_stats *stats = (struct net_device_stats *)data;
-       unsigned int tdes0 = p->des0;
-       unsigned int tdes1 = p->des1;
+       unsigned int tdes0 = le32_to_cpu(p->des0);
+       unsigned int tdes1 = le32_to_cpu(p->des1);
        int ret = tx_done;
 
        /* Get tx owner first */
@@ -77,7 +77,7 @@ static int ndesc_get_tx_status(void *data, struct stmmac_extra_stats *x,
 
 static int ndesc_get_tx_len(struct dma_desc *p)
 {
-       return (p->des1 & RDES1_BUFFER1_SIZE_MASK);
+       return (le32_to_cpu(p->des1) & RDES1_BUFFER1_SIZE_MASK);
 }
 
 /* This function verifies if each incoming frame has some errors
@@ -88,7 +88,7 @@ static int ndesc_get_rx_status(void *data, struct stmmac_extra_stats *x,
                               struct dma_desc *p)
 {
        int ret = good_frame;
-       unsigned int rdes0 = p->des0;
+       unsigned int rdes0 = le32_to_cpu(p->des0);
        struct net_device_stats *stats = (struct net_device_stats *)data;
 
        if (unlikely(rdes0 & RDES0_OWN))
@@ -141,8 +141,8 @@ static int ndesc_get_rx_status(void *data, struct stmmac_extra_stats *x,
 static void ndesc_init_rx_desc(struct dma_desc *p, int disable_rx_ic, int mode,
                               int end)
 {
-       p->des0 |= RDES0_OWN;
-       p->des1 |= (BUF_SIZE_2KiB - 1) & RDES1_BUFFER1_SIZE_MASK;
+       p->des0 |= cpu_to_le32(RDES0_OWN);
+       p->des1 |= cpu_to_le32((BUF_SIZE_2KiB - 1) & RDES1_BUFFER1_SIZE_MASK);
 
        if (mode == STMMAC_CHAIN_MODE)
                ndesc_rx_set_on_chain(p, end);
@@ -150,12 +150,12 @@ static void ndesc_init_rx_desc(struct dma_desc *p, int disable_rx_ic, int mode,
                ndesc_rx_set_on_ring(p, end);
 
        if (disable_rx_ic)
-               p->des1 |= RDES1_DISABLE_IC;
+               p->des1 |= cpu_to_le32(RDES1_DISABLE_IC);
 }
 
 static void ndesc_init_tx_desc(struct dma_desc *p, int mode, int end)
 {
-       p->des0 &= ~TDES0_OWN;
+       p->des0 &= cpu_to_le32(~TDES0_OWN);
        if (mode == STMMAC_CHAIN_MODE)
                ndesc_tx_set_on_chain(p);
        else
@@ -164,27 +164,27 @@ static void ndesc_init_tx_desc(struct dma_desc *p, int mode, int end)
 
 static int ndesc_get_tx_owner(struct dma_desc *p)
 {
-       return (p->des0 & TDES0_OWN) >> 31;
+       return (le32_to_cpu(p->des0) & TDES0_OWN) >> 31;
 }
 
 static void ndesc_set_tx_owner(struct dma_desc *p)
 {
-       p->des0 |= TDES0_OWN;
+       p->des0 |= cpu_to_le32(TDES0_OWN);
 }
 
 static void ndesc_set_rx_owner(struct dma_desc *p)
 {
-       p->des0 |= RDES0_OWN;
+       p->des0 |= cpu_to_le32(RDES0_OWN);
 }
 
 static int ndesc_get_tx_ls(struct dma_desc *p)
 {
-       return (p->des1 & TDES1_LAST_SEGMENT) >> 30;
+       return (le32_to_cpu(p->des1) & TDES1_LAST_SEGMENT) >> 30;
 }
 
 static void ndesc_release_tx_desc(struct dma_desc *p, int mode)
 {
-       int ter = (p->des1 & TDES1_END_RING) >> 25;
+       int ter = (le32_to_cpu(p->des1) & TDES1_END_RING) >> 25;
 
        memset(p, 0, offsetof(struct dma_desc, des2));
        if (mode == STMMAC_CHAIN_MODE)
@@ -197,7 +197,7 @@ static void ndesc_prepare_tx_desc(struct dma_desc *p, int is_fs, int len,
                                  bool csum_flag, int mode, bool tx_own,
                                  bool ls)
 {
-       unsigned int tdes1 = p->des1;
+       unsigned int tdes1 = le32_to_cpu(p->des1);
 
        if (is_fs)
                tdes1 |= TDES1_FIRST_SEGMENT;
@@ -212,7 +212,7 @@ static void ndesc_prepare_tx_desc(struct dma_desc *p, int is_fs, int len,
        if (ls)
                tdes1 |= TDES1_LAST_SEGMENT;
 
-       p->des1 = tdes1;
+       p->des1 = cpu_to_le32(tdes1);
 
        if (mode == STMMAC_CHAIN_MODE)
                norm_set_tx_desc_len_on_chain(p, len);
@@ -220,12 +220,12 @@ static void ndesc_prepare_tx_desc(struct dma_desc *p, int is_fs, int len,
                norm_set_tx_desc_len_on_ring(p, len);
 
        if (tx_own)
-               p->des0 |= TDES0_OWN;
+               p->des0 |= cpu_to_le32(TDES0_OWN);
 }
 
 static void ndesc_set_tx_ic(struct dma_desc *p)
 {
-       p->des1 |= TDES1_INTERRUPT;
+       p->des1 |= cpu_to_le32(TDES1_INTERRUPT);
 }
 
 static int ndesc_get_rx_frame_len(struct dma_desc *p, int rx_coe_type)
@@ -241,19 +241,20 @@ static int ndesc_get_rx_frame_len(struct dma_desc *p, int rx_coe_type)
        if (rx_coe_type == STMMAC_RX_COE_TYPE1)
                csum = 2;
 
-       return (((p->des0 & RDES0_FRAME_LEN_MASK) >> RDES0_FRAME_LEN_SHIFT) -
+       return (((le32_to_cpu(p->des0) & RDES0_FRAME_LEN_MASK)
+                               >> RDES0_FRAME_LEN_SHIFT) -
                csum);
 
 }
 
 static void ndesc_enable_tx_timestamp(struct dma_desc *p)
 {
-       p->des1 |= TDES1_TIME_STAMP_ENABLE;
+       p->des1 |= cpu_to_le32(TDES1_TIME_STAMP_ENABLE);
 }
 
 static int ndesc_get_tx_timestamp_status(struct dma_desc *p)
 {
-       return (p->des0 & TDES0_TIME_STAMP_STATUS) >> 17;
+       return (le32_to_cpu(p->des0) & TDES0_TIME_STAMP_STATUS) >> 17;
 }
 
 static u64 ndesc_get_timestamp(void *desc, u32 ats)
@@ -261,9 +262,9 @@ static u64 ndesc_get_timestamp(void *desc, u32 ats)
        struct dma_desc *p = (struct dma_desc *)desc;
        u64 ns;
 
-       ns = p->des2;
+       ns = le32_to_cpu(p->des2);
        /* convert high/sec time stamp value to nanosecond */
-       ns += p->des3 * 1000000000ULL;
+       ns += le32_to_cpu(p->des3) * 1000000000ULL;
 
        return ns;
 }
@@ -272,7 +273,8 @@ static int ndesc_get_rx_timestamp_status(void *desc, u32 ats)
 {
        struct dma_desc *p = (struct dma_desc *)desc;
 
-       if ((p->des2 == 0xffffffff) && (p->des3 == 0xffffffff))
+       if ((le32_to_cpu(p->des2) == 0xffffffff) &&
+           (le32_to_cpu(p->des3) == 0xffffffff))
                /* timestamp is corrupted, hence don't store it */
                return 0;
        else
index 7723b5d2499a1f8e7111d0e74eb8e5634726b30f..9983ce9bd90de6a24b60ff396e04a403630fb9d3 100644 (file)
@@ -34,7 +34,7 @@ static int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum)
        unsigned int entry = priv->cur_tx;
        struct dma_desc *desc;
        unsigned int nopaged_len = skb_headlen(skb);
-       unsigned int bmax, len;
+       unsigned int bmax, len, des2;
 
        if (priv->extend_desc)
                desc = (struct dma_desc *)(priv->dma_etx + entry);
@@ -50,16 +50,17 @@ static int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum)
 
        if (nopaged_len > BUF_SIZE_8KiB) {
 
-               desc->des2 = dma_map_single(priv->device, skb->data,
-                                           bmax, DMA_TO_DEVICE);
-               if (dma_mapping_error(priv->device, desc->des2))
+               des2 = dma_map_single(priv->device, skb->data, bmax,
+                                     DMA_TO_DEVICE);
+               desc->des2 = cpu_to_le32(des2);
+               if (dma_mapping_error(priv->device, des2))
                        return -1;
 
-               priv->tx_skbuff_dma[entry].buf = desc->des2;
+               priv->tx_skbuff_dma[entry].buf = des2;
                priv->tx_skbuff_dma[entry].len = bmax;
                priv->tx_skbuff_dma[entry].is_jumbo = true;
 
-               desc->des3 = desc->des2 + BUF_SIZE_4KiB;
+               desc->des3 = cpu_to_le32(des2 + BUF_SIZE_4KiB);
                priv->hw->desc->prepare_tx_desc(desc, 1, bmax, csum,
                                                STMMAC_RING_MODE, 0, false);
                priv->tx_skbuff[entry] = NULL;
@@ -70,26 +71,28 @@ static int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum)
                else
                        desc = priv->dma_tx + entry;
 
-               desc->des2 = dma_map_single(priv->device, skb->data + bmax,
-                                           len, DMA_TO_DEVICE);
-               if (dma_mapping_error(priv->device, desc->des2))
+               des2 = dma_map_single(priv->device, skb->data + bmax, len,
+                                     DMA_TO_DEVICE);
+               desc->des2 = cpu_to_le32(des2);
+               if (dma_mapping_error(priv->device, des2))
                        return -1;
-               priv->tx_skbuff_dma[entry].buf = desc->des2;
+               priv->tx_skbuff_dma[entry].buf = des2;
                priv->tx_skbuff_dma[entry].len = len;
                priv->tx_skbuff_dma[entry].is_jumbo = true;
 
-               desc->des3 = desc->des2 + BUF_SIZE_4KiB;
+               desc->des3 = cpu_to_le32(des2 + BUF_SIZE_4KiB);
                priv->hw->desc->prepare_tx_desc(desc, 0, len, csum,
                                                STMMAC_RING_MODE, 1, true);
        } else {
-               desc->des2 = dma_map_single(priv->device, skb->data,
-                                           nopaged_len, DMA_TO_DEVICE);
-               if (dma_mapping_error(priv->device, desc->des2))
+               des2 = dma_map_single(priv->device, skb->data,
+                                     nopaged_len, DMA_TO_DEVICE);
+               desc->des2 = cpu_to_le32(des2);
+               if (dma_mapping_error(priv->device, des2))
                        return -1;
-               priv->tx_skbuff_dma[entry].buf = desc->des2;
+               priv->tx_skbuff_dma[entry].buf = des2;
                priv->tx_skbuff_dma[entry].len = nopaged_len;
                priv->tx_skbuff_dma[entry].is_jumbo = true;
-               desc->des3 = desc->des2 + BUF_SIZE_4KiB;
+               desc->des3 = cpu_to_le32(des2 + BUF_SIZE_4KiB);
                priv->hw->desc->prepare_tx_desc(desc, 1, nopaged_len, csum,
                                                STMMAC_RING_MODE, 0, true);
        }
@@ -115,13 +118,13 @@ static void stmmac_refill_desc3(void *priv_ptr, struct dma_desc *p)
 
        /* Fill DES3 in case of RING mode */
        if (priv->dma_buf_sz >= BUF_SIZE_8KiB)
-               p->des3 = p->des2 + BUF_SIZE_8KiB;
+               p->des3 = cpu_to_le32(le32_to_cpu(p->des2) + BUF_SIZE_8KiB);
 }
 
 /* In ring mode we need to fill the desc3 because it is used as buffer */
 static void stmmac_init_desc3(struct dma_desc *p)
 {
-       p->des3 = p->des2 + BUF_SIZE_8KiB;
+       p->des3 = cpu_to_le32(le32_to_cpu(p->des2) + BUF_SIZE_8KiB);
 }
 
 static void stmmac_clean_desc3(void *priv_ptr, struct dma_desc *p)
index f94e0282451b8e4852707a0dd30943e74b4275c3..dbacb804eb1542a6e367ec3affa3c8458261aa82 100644 (file)
@@ -128,6 +128,7 @@ struct stmmac_priv {
        int irq_wake;
        spinlock_t ptp_lock;
        void __iomem *mmcaddr;
+       void __iomem *ptpaddr;
        u32 rx_tail_addr;
        u32 tx_tail_addr;
        u32 mss;
@@ -144,7 +145,7 @@ int stmmac_mdio_register(struct net_device *ndev);
 int stmmac_mdio_reset(struct mii_bus *mii);
 void stmmac_set_ethtool_ops(struct net_device *netdev);
 
-int stmmac_ptp_register(struct stmmac_priv *priv);
+void stmmac_ptp_register(struct stmmac_priv *priv);
 void stmmac_ptp_unregister(struct stmmac_priv *priv);
 int stmmac_resume(struct device *dev);
 int stmmac_suspend(struct device *dev);
index 3fe9340b748f511739d8cbbed6a62cda7a09bd23..d5a8122b60331cc51388d3b62948b04bf35d6336 100644 (file)
@@ -115,14 +115,17 @@ static const struct stmmac_stats stmmac_gstrings_stats[] = {
        STMMAC_STAT(ip_csum_bypassed),
        STMMAC_STAT(ipv4_pkt_rcvd),
        STMMAC_STAT(ipv6_pkt_rcvd),
-       STMMAC_STAT(rx_msg_type_ext_no_ptp),
-       STMMAC_STAT(rx_msg_type_sync),
-       STMMAC_STAT(rx_msg_type_follow_up),
-       STMMAC_STAT(rx_msg_type_delay_req),
-       STMMAC_STAT(rx_msg_type_delay_resp),
-       STMMAC_STAT(rx_msg_type_pdelay_req),
-       STMMAC_STAT(rx_msg_type_pdelay_resp),
-       STMMAC_STAT(rx_msg_type_pdelay_follow_up),
+       STMMAC_STAT(no_ptp_rx_msg_type_ext),
+       STMMAC_STAT(ptp_rx_msg_type_sync),
+       STMMAC_STAT(ptp_rx_msg_type_follow_up),
+       STMMAC_STAT(ptp_rx_msg_type_delay_req),
+       STMMAC_STAT(ptp_rx_msg_type_delay_resp),
+       STMMAC_STAT(ptp_rx_msg_type_pdelay_req),
+       STMMAC_STAT(ptp_rx_msg_type_pdelay_resp),
+       STMMAC_STAT(ptp_rx_msg_type_pdelay_follow_up),
+       STMMAC_STAT(ptp_rx_msg_type_announce),
+       STMMAC_STAT(ptp_rx_msg_type_management),
+       STMMAC_STAT(ptp_rx_msg_pkt_reserved_type),
        STMMAC_STAT(ptp_frame_type),
        STMMAC_STAT(ptp_ver),
        STMMAC_STAT(timestamp_dropped),
@@ -870,6 +873,7 @@ static const struct ethtool_ops stmmac_ethtool_ops = {
        .get_regs = stmmac_ethtool_gregs,
        .get_regs_len = stmmac_ethtool_get_regs_len,
        .get_link = ethtool_op_get_link,
+       .nway_reset = phy_ethtool_nway_reset,
        .get_pauseparam = stmmac_get_pauseparam,
        .set_pauseparam = stmmac_set_pauseparam,
        .get_ethtool_stats = stmmac_get_ethtool_stats,
index a77f68918010d3a511c4a97cd3b2c83671f9f542..10d6059b2f26555af9963812f847b68109b9c959 100644 (file)
@@ -34,21 +34,29 @@ static void stmmac_config_hw_tstamping(void __iomem *ioaddr, u32 data)
 }
 
 static u32 stmmac_config_sub_second_increment(void __iomem *ioaddr,
-                                             u32 ptp_clock)
+                                             u32 ptp_clock, int gmac4)
 {
        u32 value = readl(ioaddr + PTP_TCR);
        unsigned long data;
 
-       /* Convert the ptp_clock to nano second
-        * formula = (2/ptp_clock) * 1000000000
-        * where, ptp_clock = 50MHz.
+       /* For GMAC3.x, 4.x versions, convert the ptp_clock to nano second
+        *      formula = (1/ptp_clock) * 1000000000
+        * where ptp_clock is 50MHz if fine method is used to update system
         */
-       data = (2000000000ULL / ptp_clock);
+       if (value & PTP_TCR_TSCFUPDT)
+               data = (1000000000ULL / 50000000);
+       else
+               data = (1000000000ULL / ptp_clock);
 
        /* 0.465ns accuracy */
        if (!(value & PTP_TCR_TSCTRLSSR))
                data = (data * 1000) / 465;
 
+       data &= PTP_SSIR_SSINC_MASK;
+
+       if (gmac4)
+               data = data << GMAC4_PTP_SSIR_SSINC_SHIFT;
+
        writel(data, ioaddr + PTP_SSIR);
 
        return data;
@@ -104,14 +112,30 @@ static int stmmac_config_addend(void __iomem *ioaddr, u32 addend)
 }
 
 static int stmmac_adjust_systime(void __iomem *ioaddr, u32 sec, u32 nsec,
-                                int add_sub)
+                                int add_sub, int gmac4)
 {
        u32 value;
        int limit;
 
+       if (add_sub) {
+               /* If the new sec value needs to be subtracted with
+                * the system time, then MAC_STSUR reg should be
+                * programmed with (2^32 â€“ <new_sec_value>)
+                */
+               if (gmac4)
+                       sec = (100000000ULL - sec);
+
+               value = readl(ioaddr + PTP_TCR);
+               if (value & PTP_TCR_TSCTRLSSR)
+                       nsec = (PTP_DIGITAL_ROLLOVER_MODE - nsec);
+               else
+                       nsec = (PTP_BINARY_ROLLOVER_MODE - nsec);
+       }
+
        writel(sec, ioaddr + PTP_STSUR);
-       writel(((add_sub << PTP_STNSUR_ADDSUB_SHIFT) | nsec),
-               ioaddr + PTP_STNSUR);
+       value = (add_sub << PTP_STNSUR_ADDSUB_SHIFT) | nsec;
+       writel(value, ioaddr + PTP_STNSUR);
+
        /* issue command to initialize the system time value */
        value = readl(ioaddr + PTP_TCR);
        value |= PTP_TCR_TSUPDT;
@@ -134,8 +158,9 @@ static u64 stmmac_get_systime(void __iomem *ioaddr)
 {
        u64 ns;
 
+       /* Get the TSSS value */
        ns = readl(ioaddr + PTP_STNSR);
-       /* convert sec time value to nanosecond */
+       /* Get the TSS and convert sec time value to nanosecond */
        ns += readl(ioaddr + PTP_STSR) * 1000000000ULL;
 
        return ns;
index fa4a82f4656fb2eccec71e0245dfa490b3389012..48a4e841956b0f552e67b532a1bf7a06289438ec 100644 (file)
@@ -105,8 +105,8 @@ module_param(eee_timer, int, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(eee_timer, "LPI tx expiration time in msec");
 #define STMMAC_LPI_T(x) (jiffies + msecs_to_jiffies(x))
 
-/* By default the driver will use the ring mode to manage tx and rx descriptors
- * but passing this value so user can force to use the chain instead of the ring
+/* By default the driver will use the ring mode to manage tx and rx descriptors,
+ * but allow user to force to use the chain instead of the ring
  */
 static unsigned int chain_mode;
 module_param(chain_mode, int, S_IRUGO);
@@ -305,7 +305,7 @@ bool stmmac_eee_init(struct stmmac_priv *priv)
                         */
                        spin_lock_irqsave(&priv->lock, flags);
                        if (priv->eee_active) {
-                               pr_debug("stmmac: disable EEE\n");
+                               netdev_dbg(priv->dev, "disable EEE\n");
                                del_timer_sync(&priv->eee_ctrl_timer);
                                priv->hw->mac->set_eee_timer(priv->hw, 0,
                                                             tx_lpi_timer);
@@ -334,7 +334,7 @@ bool stmmac_eee_init(struct stmmac_priv *priv)
                ret = true;
                spin_unlock_irqrestore(&priv->lock, flags);
 
-               pr_debug("stmmac: Energy-Efficient Ethernet initialized\n");
+               netdev_dbg(priv->dev, "Energy-Efficient Ethernet initialized\n");
        }
 out:
        return ret;
@@ -342,18 +342,17 @@ out:
 
 /* stmmac_get_tx_hwtstamp - get HW TX timestamps
  * @priv: driver private structure
- * @entry : descriptor index to be used.
+ * @p : descriptor pointer
  * @skb : the socket buffer
  * Description :
  * This function will read timestamp from the descriptor & pass it to stack.
  * and also perform some sanity checks.
  */
 static void stmmac_get_tx_hwtstamp(struct stmmac_priv *priv,
-                                  unsigned int entry, struct sk_buff *skb)
+                                  struct dma_desc *p, struct sk_buff *skb)
 {
        struct skb_shared_hwtstamps shhwtstamp;
        u64 ns;
-       void *desc = NULL;
 
        if (!priv->hwts_tx_en)
                return;
@@ -362,58 +361,55 @@ static void stmmac_get_tx_hwtstamp(struct stmmac_priv *priv,
        if (likely(!skb || !(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)))
                return;
 
-       if (priv->adv_ts)
-               desc = (priv->dma_etx + entry);
-       else
-               desc = (priv->dma_tx + entry);
-
        /* check tx tstamp status */
-       if (!priv->hw->desc->get_tx_timestamp_status((struct dma_desc *)desc))
-               return;
+       if (!priv->hw->desc->get_tx_timestamp_status(p)) {
+               /* get the valid tstamp */
+               ns = priv->hw->desc->get_timestamp(p, priv->adv_ts);
 
-       /* get the valid tstamp */
-       ns = priv->hw->desc->get_timestamp(desc, priv->adv_ts);
+               memset(&shhwtstamp, 0, sizeof(struct skb_shared_hwtstamps));
+               shhwtstamp.hwtstamp = ns_to_ktime(ns);
 
-       memset(&shhwtstamp, 0, sizeof(struct skb_shared_hwtstamps));
-       shhwtstamp.hwtstamp = ns_to_ktime(ns);
-       /* pass tstamp to stack */
-       skb_tstamp_tx(skb, &shhwtstamp);
+               netdev_info(priv->dev, "get valid TX hw timestamp %llu\n", ns);
+               /* pass tstamp to stack */
+               skb_tstamp_tx(skb, &shhwtstamp);
+       }
 
        return;
 }
 
 /* stmmac_get_rx_hwtstamp - get HW RX timestamps
  * @priv: driver private structure
- * @entry : descriptor index to be used.
+ * @p : descriptor pointer
+ * @np : next descriptor pointer
  * @skb : the socket buffer
  * Description :
  * This function will read received packet's timestamp from the descriptor
  * and pass it to stack. It also perform some sanity checks.
  */
-static void stmmac_get_rx_hwtstamp(struct stmmac_priv *priv,
-                                  unsigned int entry, struct sk_buff *skb)
+static void stmmac_get_rx_hwtstamp(struct stmmac_priv *priv, struct dma_desc *p,
+                                  struct dma_desc *np, struct sk_buff *skb)
 {
        struct skb_shared_hwtstamps *shhwtstamp = NULL;
        u64 ns;
-       void *desc = NULL;
 
        if (!priv->hwts_rx_en)
                return;
 
-       if (priv->adv_ts)
-               desc = (priv->dma_erx + entry);
-       else
-               desc = (priv->dma_rx + entry);
-
-       /* exit if rx tstamp is not valid */
-       if (!priv->hw->desc->get_rx_timestamp_status(desc, priv->adv_ts))
-               return;
+       /* Check if timestamp is available */
+       if (!priv->hw->desc->get_rx_timestamp_status(p, priv->adv_ts)) {
+               /* For GMAC4, the valid timestamp is from CTX next desc. */
+               if (priv->plat->has_gmac4)
+                       ns = priv->hw->desc->get_timestamp(np, priv->adv_ts);
+               else
+                       ns = priv->hw->desc->get_timestamp(p, priv->adv_ts);
 
-       /* get valid tstamp */
-       ns = priv->hw->desc->get_timestamp(desc, priv->adv_ts);
-       shhwtstamp = skb_hwtstamps(skb);
-       memset(shhwtstamp, 0, sizeof(struct skb_shared_hwtstamps));
-       shhwtstamp->hwtstamp = ns_to_ktime(ns);
+               netdev_info(priv->dev, "get valid RX hw timestamp %llu\n", ns);
+               shhwtstamp = skb_hwtstamps(skb);
+               memset(shhwtstamp, 0, sizeof(struct skb_shared_hwtstamps));
+               shhwtstamp->hwtstamp = ns_to_ktime(ns);
+       } else  {
+               netdev_err(priv->dev, "cannot get RX hw timestamp\n");
+       }
 }
 
 /**
@@ -456,8 +452,8 @@ static int stmmac_hwtstamp_ioctl(struct net_device *dev, struct ifreq *ifr)
                           sizeof(struct hwtstamp_config)))
                return -EFAULT;
 
-       pr_debug("%s config flags:0x%x, tx_type:0x%x, rx_filter:0x%x\n",
-                __func__, config.flags, config.tx_type, config.rx_filter);
+       netdev_dbg(priv->dev, "%s config flags:0x%x, tx_type:0x%x, rx_filter:0x%x\n",
+                  __func__, config.flags, config.tx_type, config.rx_filter);
 
        /* reserved for future extensions */
        if (config.flags)
@@ -600,17 +596,18 @@ static int stmmac_hwtstamp_ioctl(struct net_device *dev, struct ifreq *ifr)
        priv->hwts_tx_en = config.tx_type == HWTSTAMP_TX_ON;
 
        if (!priv->hwts_tx_en && !priv->hwts_rx_en)
-               priv->hw->ptp->config_hw_tstamping(priv->ioaddr, 0);
+               priv->hw->ptp->config_hw_tstamping(priv->ptpaddr, 0);
        else {
                value = (PTP_TCR_TSENA | PTP_TCR_TSCFUPDT | PTP_TCR_TSCTRLSSR |
                         tstamp_all | ptp_v2 | ptp_over_ethernet |
                         ptp_over_ipv6_udp | ptp_over_ipv4_udp | ts_event_en |
                         ts_master_en | snap_type_sel);
-               priv->hw->ptp->config_hw_tstamping(priv->ioaddr, value);
+               priv->hw->ptp->config_hw_tstamping(priv->ptpaddr, value);
 
                /* program Sub Second Increment reg */
                sec_inc = priv->hw->ptp->config_sub_second_increment(
-                       priv->ioaddr, priv->clk_ptp_rate);
+                       priv->ptpaddr, priv->clk_ptp_rate,
+                       priv->plat->has_gmac4);
                temp = div_u64(1000000000ULL, sec_inc);
 
                /* calculate default added value:
@@ -620,14 +617,14 @@ static int stmmac_hwtstamp_ioctl(struct net_device *dev, struct ifreq *ifr)
                 */
                temp = (u64)(temp << 32);
                priv->default_addend = div_u64(temp, priv->clk_ptp_rate);
-               priv->hw->ptp->config_addend(priv->ioaddr,
+               priv->hw->ptp->config_addend(priv->ptpaddr,
                                             priv->default_addend);
 
                /* initialize system time */
                ktime_get_real_ts64(&now);
 
                /* lower 32 bits of tv_sec are safe until y2106 */
-               priv->hw->ptp->init_systime(priv->ioaddr, (u32)now.tv_sec,
+               priv->hw->ptp->init_systime(priv->ptpaddr, (u32)now.tv_sec,
                                            now.tv_nsec);
        }
 
@@ -678,7 +675,9 @@ static int stmmac_init_ptp(struct stmmac_priv *priv)
        priv->hwts_tx_en = 0;
        priv->hwts_rx_en = 0;
 
-       return stmmac_ptp_register(priv);
+       stmmac_ptp_register(priv);
+
+       return 0;
 }
 
 static void stmmac_release_ptp(struct stmmac_priv *priv)
@@ -753,9 +752,9 @@ static void stmmac_adjust_link(struct net_device *dev)
                                stmmac_hw_fix_mac_speed(priv);
                                break;
                        default:
-                               if (netif_msg_link(priv))
-                                       pr_warn("%s: Speed (%d) not 10/100\n",
-                                               dev->name, phydev->speed);
+                               netif_warn(priv, link, priv->dev,
+                                          "Speed (%d) not 10/100\n",
+                                          phydev->speed);
                                break;
                        }
 
@@ -808,10 +807,10 @@ static void stmmac_check_pcs_mode(struct stmmac_priv *priv)
                    (interface == PHY_INTERFACE_MODE_RGMII_ID) ||
                    (interface == PHY_INTERFACE_MODE_RGMII_RXID) ||
                    (interface == PHY_INTERFACE_MODE_RGMII_TXID)) {
-                       pr_debug("STMMAC: PCS RGMII support enable\n");
+                       netdev_dbg(priv->dev, "PCS RGMII support enabled\n");
                        priv->hw->pcs = STMMAC_PCS_RGMII;
                } else if (interface == PHY_INTERFACE_MODE_SGMII) {
-                       pr_debug("STMMAC: PCS SGMII support enable\n");
+                       netdev_dbg(priv->dev, "PCS SGMII support enabled\n");
                        priv->hw->pcs = STMMAC_PCS_SGMII;
                }
        }
@@ -846,15 +845,15 @@ static int stmmac_init_phy(struct net_device *dev)
 
                snprintf(phy_id_fmt, MII_BUS_ID_SIZE + 3, PHY_ID_FMT, bus_id,
                         priv->plat->phy_addr);
-               pr_debug("stmmac_init_phy:  trying to attach to %s\n",
-                        phy_id_fmt);
+               netdev_dbg(priv->dev, "%s: trying to attach to %s\n", __func__,
+                          phy_id_fmt);
 
                phydev = phy_connect(dev, phy_id_fmt, &stmmac_adjust_link,
                                     interface);
        }
 
        if (IS_ERR_OR_NULL(phydev)) {
-               pr_err("%s: Could not attach to PHY\n", dev->name);
+               netdev_err(priv->dev, "Could not attach to PHY\n");
                if (!phydev)
                        return -ENODEV;
 
@@ -880,8 +879,15 @@ static int stmmac_init_phy(struct net_device *dev)
                return -ENODEV;
        }
 
-       pr_debug("stmmac_init_phy:  %s: attached to PHY (UID 0x%x)"
-                " Link = %d\n", dev->name, phydev->phy_id, phydev->link);
+       /* stmmac_adjust_link will change this to PHY_IGNORE_INTERRUPT to avoid
+        * subsequent PHY polling, make sure we force a link transition if
+        * we have a UP/DOWN/UP transition
+        */
+       if (phydev->is_pseudo_fixed_link)
+               phydev->irq = PHY_POLL;
+
+       netdev_dbg(priv->dev, "%s: attached to PHY (UID 0x%x) Link = %d\n",
+                  __func__, phydev->phy_id, phydev->link);
 
        return 0;
 }
@@ -967,7 +973,8 @@ static int stmmac_init_rx_buffers(struct stmmac_priv *priv, struct dma_desc *p,
 
        skb = __netdev_alloc_skb_ip_align(priv->dev, priv->dma_buf_sz, flags);
        if (!skb) {
-               pr_err("%s: Rx init fails; skb is NULL\n", __func__);
+               netdev_err(priv->dev,
+                          "%s: Rx init fails; skb is NULL\n", __func__);
                return -ENOMEM;
        }
        priv->rx_skbuff[i] = skb;
@@ -975,15 +982,15 @@ static int stmmac_init_rx_buffers(struct stmmac_priv *priv, struct dma_desc *p,
                                                priv->dma_buf_sz,
                                                DMA_FROM_DEVICE);
        if (dma_mapping_error(priv->device, priv->rx_skbuff_dma[i])) {
-               pr_err("%s: DMA mapping error\n", __func__);
+               netdev_err(priv->dev, "%s: DMA mapping error\n", __func__);
                dev_kfree_skb_any(skb);
                return -EINVAL;
        }
 
        if (priv->synopsys_id >= DWMAC_CORE_4_00)
-               p->des0 = priv->rx_skbuff_dma[i];
+               p->des0 = cpu_to_le32(priv->rx_skbuff_dma[i]);
        else
-               p->des2 = priv->rx_skbuff_dma[i];
+               p->des2 = cpu_to_le32(priv->rx_skbuff_dma[i]);
 
        if ((priv->hw->mode->init_desc3) &&
            (priv->dma_buf_sz == BUF_SIZE_16KiB))
@@ -1025,13 +1032,14 @@ static int init_dma_desc_rings(struct net_device *dev, gfp_t flags)
 
        priv->dma_buf_sz = bfsize;
 
-       if (netif_msg_probe(priv)) {
-               pr_debug("(%s) dma_rx_phy=0x%08x dma_tx_phy=0x%08x\n", __func__,
-                        (u32) priv->dma_rx_phy, (u32) priv->dma_tx_phy);
+       netif_dbg(priv, probe, priv->dev,
+                 "(%s) dma_rx_phy=0x%08x dma_tx_phy=0x%08x\n",
+                 __func__, (u32)priv->dma_rx_phy, (u32)priv->dma_tx_phy);
+
+       /* RX INITIALIZATION */
+       netif_dbg(priv, probe, priv->dev,
+                 "SKB addresses:\nskb\t\tskb data\tdma data\n");
 
-               /* RX INITIALIZATION */
-               pr_debug("\tSKB addresses:\nskb\t\tskb data\tdma data\n");
-       }
        for (i = 0; i < DMA_RX_SIZE; i++) {
                struct dma_desc *p;
                if (priv->extend_desc)
@@ -1043,10 +1051,9 @@ static int init_dma_desc_rings(struct net_device *dev, gfp_t flags)
                if (ret)
                        goto err_init_rx_buffers;
 
-               if (netif_msg_probe(priv))
-                       pr_debug("[%p]\t[%p]\t[%x]\n", priv->rx_skbuff[i],
-                                priv->rx_skbuff[i]->data,
-                                (unsigned int)priv->rx_skbuff_dma[i]);
+               netif_dbg(priv, probe, priv->dev, "[%p]\t[%p]\t[%x]\n",
+                         priv->rx_skbuff[i], priv->rx_skbuff[i]->data,
+                         (unsigned int)priv->rx_skbuff_dma[i]);
        }
        priv->cur_rx = 0;
        priv->dirty_rx = (unsigned int)(i - DMA_RX_SIZE);
@@ -1331,7 +1338,7 @@ static void stmmac_tx_clean(struct stmmac_priv *priv)
                                priv->dev->stats.tx_packets++;
                                priv->xstats.tx_pkt_n++;
                        }
-                       stmmac_get_tx_hwtstamp(priv, entry, skb);
+                       stmmac_get_tx_hwtstamp(priv, p, skb);
                }
 
                if (likely(priv->tx_skbuff_dma[entry].buf)) {
@@ -1376,8 +1383,8 @@ static void stmmac_tx_clean(struct stmmac_priv *priv)
                netif_tx_lock(priv->dev);
                if (netif_queue_stopped(priv->dev) &&
                    stmmac_tx_avail(priv) > STMMAC_TX_THRESH) {
-                       if (netif_msg_tx_done(priv))
-                               pr_debug("%s: restart transmit\n", __func__);
+                       netif_dbg(priv, tx_done, priv->dev,
+                                 "%s: restart transmit\n", __func__);
                        netif_wake_queue(priv->dev);
                }
                netif_tx_unlock(priv->dev);
@@ -1477,10 +1484,13 @@ static void stmmac_mmc_setup(struct stmmac_priv *priv)
        unsigned int mode = MMC_CNTRL_RESET_ON_READ | MMC_CNTRL_COUNTER_RESET |
                            MMC_CNTRL_PRESET | MMC_CNTRL_FULL_HALF_PRESET;
 
-       if (priv->synopsys_id >= DWMAC_CORE_4_00)
+       if (priv->synopsys_id >= DWMAC_CORE_4_00) {
+               priv->ptpaddr = priv->ioaddr + PTP_GMAC4_OFFSET;
                priv->mmcaddr = priv->ioaddr + MMC_GMAC4_OFFSET;
-       else
+       } else {
+               priv->ptpaddr = priv->ioaddr + PTP_GMAC3_X_OFFSET;
                priv->mmcaddr = priv->ioaddr + MMC_GMAC3_X_OFFSET;
+       }
 
        dwmac_mmc_intr_all_mask(priv->mmcaddr);
 
@@ -1488,7 +1498,7 @@ static void stmmac_mmc_setup(struct stmmac_priv *priv)
                dwmac_mmc_ctrl(priv->mmcaddr, mode);
                memset(&priv->mmc, 0, sizeof(struct stmmac_counters));
        } else
-               pr_info(" No MAC Management Counters available\n");
+               netdev_info(priv->dev, "No MAC Management Counters available\n");
 }
 
 /**
@@ -1501,18 +1511,18 @@ static void stmmac_mmc_setup(struct stmmac_priv *priv)
 static void stmmac_selec_desc_mode(struct stmmac_priv *priv)
 {
        if (priv->plat->enh_desc) {
-               pr_info(" Enhanced/Alternate descriptors\n");
+               dev_info(priv->device, "Enhanced/Alternate descriptors\n");
 
                /* GMAC older than 3.50 has no extended descriptors */
                if (priv->synopsys_id >= DWMAC_CORE_3_50) {
-                       pr_info("\tEnabled extended descriptors\n");
+                       dev_info(priv->device, "Enabled extended descriptors\n");
                        priv->extend_desc = 1;
                } else
-                       pr_warn("Extended descriptors not supported\n");
+                       dev_warn(priv->device, "Extended descriptors not supported\n");
 
                priv->hw->desc = &enh_desc_ops;
        } else {
-               pr_info(" Normal descriptors\n");
+               dev_info(priv->device, "Normal descriptors\n");
                priv->hw->desc = &ndesc_ops;
        }
 }
@@ -1553,8 +1563,8 @@ static void stmmac_check_ether_addr(struct stmmac_priv *priv)
                                             priv->dev->dev_addr, 0);
                if (!is_valid_ether_addr(priv->dev->dev_addr))
                        eth_hw_addr_random(priv->dev);
-               pr_info("%s: device MAC address %pM\n", priv->dev->name,
-                       priv->dev->dev_addr);
+               netdev_info(priv->dev, "device MAC address %pM\n",
+                           priv->dev->dev_addr);
        }
 }
 
@@ -1662,7 +1672,8 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
        /* DMA initialization and SW reset */
        ret = stmmac_init_dma_engine(priv);
        if (ret < 0) {
-               pr_err("%s: DMA engine initialization failed\n", __func__);
+               netdev_err(priv->dev, "%s: DMA engine initialization failed\n",
+                          __func__);
                return ret;
        }
 
@@ -1691,7 +1702,7 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
 
        ret = priv->hw->mac->rx_ipc(priv->hw);
        if (!ret) {
-               pr_warn(" RX IPC Checksum Offload disabled\n");
+               netdev_warn(priv->dev, "RX IPC Checksum Offload disabled\n");
                priv->plat->rx_coe = STMMAC_RX_COE_NONE;
                priv->hw->rx_csum = 0;
        }
@@ -1710,16 +1721,17 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
        if (init_ptp) {
                ret = stmmac_init_ptp(priv);
                if (ret)
-                       netdev_warn(priv->dev, "PTP support cannot init.\n");
+                       netdev_warn(priv->dev, "fail to init PTP.\n");
        }
 
 #ifdef CONFIG_DEBUG_FS
        ret = stmmac_init_fs(dev);
        if (ret < 0)
-               pr_warn("%s: failed debugFS registration\n", __func__);
+               netdev_warn(priv->dev, "%s: failed debugFS registration\n",
+                           __func__);
 #endif
        /* Start the ball rolling... */
-       pr_debug("%s: DMA RX/TX processes started...\n", dev->name);
+       netdev_dbg(priv->dev, "DMA RX/TX processes started...\n");
        priv->hw->dma->start_tx(priv->ioaddr);
        priv->hw->dma->start_rx(priv->ioaddr);
 
@@ -1774,8 +1786,9 @@ static int stmmac_open(struct net_device *dev)
            priv->hw->pcs != STMMAC_PCS_RTBI) {
                ret = stmmac_init_phy(dev);
                if (ret) {
-                       pr_err("%s: Cannot attach to PHY (error: %d)\n",
-                              __func__, ret);
+                       netdev_err(priv->dev,
+                                  "%s: Cannot attach to PHY (error: %d)\n",
+                                  __func__, ret);
                        return ret;
                }
        }
@@ -1789,19 +1802,21 @@ static int stmmac_open(struct net_device *dev)
 
        ret = alloc_dma_desc_resources(priv);
        if (ret < 0) {
-               pr_err("%s: DMA descriptors allocation failed\n", __func__);
+               netdev_err(priv->dev, "%s: DMA descriptors allocation failed\n",
+                          __func__);
                goto dma_desc_error;
        }
 
        ret = init_dma_desc_rings(dev, GFP_KERNEL);
        if (ret < 0) {
-               pr_err("%s: DMA descriptors initialization failed\n", __func__);
+               netdev_err(priv->dev, "%s: DMA descriptors initialization failed\n",
+                          __func__);
                goto init_error;
        }
 
        ret = stmmac_hw_setup(dev, true);
        if (ret < 0) {
-               pr_err("%s: Hw setup failed\n", __func__);
+               netdev_err(priv->dev, "%s: Hw setup failed\n", __func__);
                goto init_error;
        }
 
@@ -1814,8 +1829,9 @@ static int stmmac_open(struct net_device *dev)
        ret = request_irq(dev->irq, stmmac_interrupt,
                          IRQF_SHARED, dev->name, dev);
        if (unlikely(ret < 0)) {
-               pr_err("%s: ERROR: allocating the IRQ %d (error: %d)\n",
-                      __func__, dev->irq, ret);
+               netdev_err(priv->dev,
+                          "%s: ERROR: allocating the IRQ %d (error: %d)\n",
+                          __func__, dev->irq, ret);
                goto init_error;
        }
 
@@ -1824,8 +1840,9 @@ static int stmmac_open(struct net_device *dev)
                ret = request_irq(priv->wol_irq, stmmac_interrupt,
                                  IRQF_SHARED, dev->name, dev);
                if (unlikely(ret < 0)) {
-                       pr_err("%s: ERROR: allocating the WoL IRQ %d (%d)\n",
-                              __func__, priv->wol_irq, ret);
+                       netdev_err(priv->dev,
+                                  "%s: ERROR: allocating the WoL IRQ %d (%d)\n",
+                                  __func__, priv->wol_irq, ret);
                        goto wolirq_error;
                }
        }
@@ -1835,8 +1852,9 @@ static int stmmac_open(struct net_device *dev)
                ret = request_irq(priv->lpi_irq, stmmac_interrupt, IRQF_SHARED,
                                  dev->name, dev);
                if (unlikely(ret < 0)) {
-                       pr_err("%s: ERROR: allocating the LPI IRQ %d (%d)\n",
-                              __func__, priv->lpi_irq, ret);
+                       netdev_err(priv->dev,
+                                  "%s: ERROR: allocating the LPI IRQ %d (%d)\n",
+                                  __func__, priv->lpi_irq, ret);
                        goto lpiirq_error;
                }
        }
@@ -1937,7 +1955,7 @@ static void stmmac_tso_allocator(struct stmmac_priv *priv, unsigned int des,
                priv->cur_tx = STMMAC_GET_ENTRY(priv->cur_tx, DMA_TX_SIZE);
                desc = priv->dma_tx + priv->cur_tx;
 
-               desc->des0 = des + (total_len - tmp_len);
+               desc->des0 = cpu_to_le32(des + (total_len - tmp_len));
                buff_size = tmp_len >= TSO_MAX_BUFF_SIZE ?
                            TSO_MAX_BUFF_SIZE : tmp_len;
 
@@ -1999,7 +2017,9 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
                if (!netif_queue_stopped(dev)) {
                        netif_stop_queue(dev);
                        /* This is a hard error, log it. */
-                       pr_err("%s: Tx Ring full when queue awake\n", __func__);
+                       netdev_err(priv->dev,
+                                  "%s: Tx Ring full when queue awake\n",
+                                  __func__);
                }
                spin_unlock(&priv->tx_lock);
                return NETDEV_TX_BUSY;
@@ -2039,11 +2059,11 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
        priv->tx_skbuff_dma[first_entry].len = skb_headlen(skb);
        priv->tx_skbuff[first_entry] = skb;
 
-       first->des0 = des;
+       first->des0 = cpu_to_le32(des);
 
        /* Fill start of payload in buff2 of first descriptor */
        if (pay_len)
-               first->des1 =  des + proto_hdr_len;
+               first->des1 = cpu_to_le32(des + proto_hdr_len);
 
        /* If needed take extra descriptors to fill the remaining payload */
        tmp_pay_len = pay_len - TSO_MAX_BUFF_SIZE;
@@ -2072,8 +2092,8 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
        priv->cur_tx = STMMAC_GET_ENTRY(priv->cur_tx, DMA_TX_SIZE);
 
        if (unlikely(stmmac_tx_avail(priv) <= (MAX_SKB_FRAGS + 1))) {
-               if (netif_msg_hw(priv))
-                       pr_debug("%s: stop transmitted packets\n", __func__);
+               netif_dbg(priv, hw, priv->dev, "%s: stop transmitted packets\n",
+                         __func__);
                netif_stop_queue(dev);
        }
 
@@ -2179,7 +2199,9 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
                if (!netif_queue_stopped(dev)) {
                        netif_stop_queue(dev);
                        /* This is a hard error, log it. */
-                       pr_err("%s: Tx Ring full when queue awake\n", __func__);
+                       netdev_err(priv->dev,
+                                  "%s: Tx Ring full when queue awake\n",
+                                  __func__);
                }
                return NETDEV_TX_BUSY;
        }
@@ -2232,13 +2254,11 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 
                priv->tx_skbuff[entry] = NULL;
 
-               if (unlikely(priv->synopsys_id >= DWMAC_CORE_4_00)) {
-                       desc->des0 = des;
-                       priv->tx_skbuff_dma[entry].buf = desc->des0;
-               } else {
-                       desc->des2 = des;
-                       priv->tx_skbuff_dma[entry].buf = desc->des2;
-               }
+               priv->tx_skbuff_dma[entry].buf = des;
+               if (unlikely(priv->synopsys_id >= DWMAC_CORE_4_00))
+                       desc->des0 = cpu_to_le32(des);
+               else
+                       desc->des2 = cpu_to_le32(des);
 
                priv->tx_skbuff_dma[entry].map_as_page = true;
                priv->tx_skbuff_dma[entry].len = len;
@@ -2256,9 +2276,10 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
        if (netif_msg_pktdata(priv)) {
                void *tx_head;
 
-               pr_debug("%s: curr=%d dirty=%d f=%d, e=%d, first=%p, nfrags=%d",
-                        __func__, priv->cur_tx, priv->dirty_tx, first_entry,
-                        entry, first, nfrags);
+               netdev_dbg(priv->dev,
+                          "%s: curr=%d dirty=%d f=%d, e=%d, first=%p, nfrags=%d",
+                          __func__, priv->cur_tx, priv->dirty_tx, first_entry,
+                          entry, first, nfrags);
 
                if (priv->extend_desc)
                        tx_head = (void *)priv->dma_etx;
@@ -2267,13 +2288,13 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 
                priv->hw->desc->display_ring(tx_head, DMA_TX_SIZE, false);
 
-               pr_debug(">>> frame to be transmitted: ");
+               netdev_dbg(priv->dev, ">>> frame to be transmitted: ");
                print_pkt(skb->data, skb->len);
        }
 
        if (unlikely(stmmac_tx_avail(priv) <= (MAX_SKB_FRAGS + 1))) {
-               if (netif_msg_hw(priv))
-                       pr_debug("%s: stop transmitted packets\n", __func__);
+               netif_dbg(priv, hw, priv->dev, "%s: stop transmitted packets\n",
+                         __func__);
                netif_stop_queue(dev);
        }
 
@@ -2309,13 +2330,11 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
                if (dma_mapping_error(priv->device, des))
                        goto dma_map_err;
 
-               if (unlikely(priv->synopsys_id >= DWMAC_CORE_4_00)) {
-                       first->des0 = des;
-                       priv->tx_skbuff_dma[first_entry].buf = first->des0;
-               } else {
-                       first->des2 = des;
-                       priv->tx_skbuff_dma[first_entry].buf = first->des2;
-               }
+               priv->tx_skbuff_dma[first_entry].buf = des;
+               if (unlikely(priv->synopsys_id >= DWMAC_CORE_4_00))
+                       first->des0 = cpu_to_le32(des);
+               else
+                       first->des2 = cpu_to_le32(des);
 
                priv->tx_skbuff_dma[first_entry].len = nopaged_len;
                priv->tx_skbuff_dma[first_entry].last_segment = last_segment;
@@ -2352,7 +2371,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 
 dma_map_err:
        spin_unlock(&priv->tx_lock);
-       dev_err(priv->device, "Tx dma map failed\n");
+       netdev_err(priv->dev, "Tx DMA map failed\n");
        dev_kfree_skb(skb);
        priv->dev->stats.tx_dropped++;
        return NETDEV_TX_OK;
@@ -2423,16 +2442,16 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv)
                                           DMA_FROM_DEVICE);
                        if (dma_mapping_error(priv->device,
                                              priv->rx_skbuff_dma[entry])) {
-                               dev_err(priv->device, "Rx dma map failed\n");
+                               netdev_err(priv->dev, "Rx DMA map failed\n");
                                dev_kfree_skb(skb);
                                break;
                        }
 
                        if (unlikely(priv->synopsys_id >= DWMAC_CORE_4_00)) {
-                               p->des0 = priv->rx_skbuff_dma[entry];
+                               p->des0 = cpu_to_le32(priv->rx_skbuff_dma[entry]);
                                p->des1 = 0;
                        } else {
-                               p->des2 = priv->rx_skbuff_dma[entry];
+                               p->des2 = cpu_to_le32(priv->rx_skbuff_dma[entry]);
                        }
                        if (priv->hw->mode->refill_desc3)
                                priv->hw->mode->refill_desc3(priv, p);
@@ -2440,8 +2459,8 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv)
                        if (priv->rx_zeroc_thresh > 0)
                                priv->rx_zeroc_thresh--;
 
-                       if (netif_msg_rx_status(priv))
-                               pr_debug("\trefill entry #%d\n", entry);
+                       netif_dbg(priv, rx_status, priv->dev,
+                                 "refill entry #%d\n", entry);
                }
                wmb();
 
@@ -2474,7 +2493,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit)
        if (netif_msg_rx_status(priv)) {
                void *rx_head;
 
-               pr_debug("%s: descriptor ring:\n", __func__);
+               netdev_dbg(priv->dev, "%s: descriptor ring:\n", __func__);
                if (priv->extend_desc)
                        rx_head = (void *)priv->dma_erx;
                else
@@ -2485,6 +2504,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit)
        while (count < limit) {
                int status;
                struct dma_desc *p;
+               struct dma_desc *np;
 
                if (priv->extend_desc)
                        p = (struct dma_desc *)(priv->dma_erx + entry);
@@ -2504,9 +2524,11 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit)
                next_entry = priv->cur_rx;
 
                if (priv->extend_desc)
-                       prefetch(priv->dma_erx + next_entry);
+                       np = (struct dma_desc *)(priv->dma_erx + next_entry);
                else
-                       prefetch(priv->dma_rx + next_entry);
+                       np = priv->dma_rx + next_entry;
+
+               prefetch(np);
 
                if ((priv->extend_desc) && (priv->hw->desc->rx_extended_status))
                        priv->hw->desc->rx_extended_status(&priv->dev->stats,
@@ -2533,9 +2555,9 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit)
                        unsigned int des;
 
                        if (unlikely(priv->synopsys_id >= DWMAC_CORE_4_00))
-                               des = p->des0;
+                               des = le32_to_cpu(p->des0);
                        else
-                               des = p->des2;
+                               des = le32_to_cpu(p->des2);
 
                        frame_len = priv->hw->desc->get_rx_frame_len(p, coe);
 
@@ -2544,9 +2566,9 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit)
                         *  ignored
                         */
                        if (frame_len > priv->dma_buf_sz) {
-                               pr_err("%s: len %d larger than size (%d)\n",
-                                      priv->dev->name, frame_len,
-                                      priv->dma_buf_sz);
+                               netdev_err(priv->dev,
+                                          "len %d larger than size (%d)\n",
+                                          frame_len, priv->dma_buf_sz);
                                priv->dev->stats.rx_length_errors++;
                                break;
                        }
@@ -2558,11 +2580,11 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit)
                                frame_len -= ETH_FCS_LEN;
 
                        if (netif_msg_rx_status(priv)) {
-                               pr_debug("\tdesc: %p [entry %d] buff=0x%x\n",
-                                       p, entry, des);
+                               netdev_dbg(priv->dev, "\tdesc: %p [entry %d] buff=0x%x\n",
+                                          p, entry, des);
                                if (frame_len > ETH_FRAME_LEN)
-                                       pr_debug("\tframe size %d, COE: %d\n",
-                                                frame_len, status);
+                                       netdev_dbg(priv->dev, "frame size %d, COE: %d\n",
+                                                  frame_len, status);
                        }
 
                        /* The zero-copy is always used for all the sizes
@@ -2599,8 +2621,9 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit)
                        } else {
                                skb = priv->rx_skbuff[entry];
                                if (unlikely(!skb)) {
-                                       pr_err("%s: Inconsistent Rx chain\n",
-                                              priv->dev->name);
+                                       netdev_err(priv->dev,
+                                                  "%s: Inconsistent Rx chain\n",
+                                                  priv->dev->name);
                                        priv->dev->stats.rx_dropped++;
                                        break;
                                }
@@ -2615,13 +2638,14 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit)
                                                 DMA_FROM_DEVICE);
                        }
 
-                       stmmac_get_rx_hwtstamp(priv, entry, skb);
-
                        if (netif_msg_pktdata(priv)) {
-                               pr_debug("frame received (%dbytes)", frame_len);
+                               netdev_dbg(priv->dev, "frame received (%dbytes)",
+                                          frame_len);
                                print_pkt(skb->data, frame_len);
                        }
 
+                       stmmac_get_rx_hwtstamp(priv, p, np, skb);
+
                        stmmac_rx_vlan(priv->dev, skb);
 
                        skb->protocol = eth_type_trans(skb, priv->dev);
@@ -2715,8 +2739,10 @@ static void stmmac_set_rx_mode(struct net_device *dev)
  */
 static int stmmac_change_mtu(struct net_device *dev, int new_mtu)
 {
+       struct stmmac_priv *priv = netdev_priv(dev);
+
        if (netif_running(dev)) {
-               pr_err("%s: must be stopped to change its MTU\n", dev->name);
+               netdev_err(priv->dev, "must be stopped to change its MTU\n");
                return -EBUSY;
        }
 
@@ -2795,7 +2821,7 @@ static irqreturn_t stmmac_interrupt(int irq, void *dev_id)
                pm_wakeup_event(priv->device, 0);
 
        if (unlikely(!dev)) {
-               pr_err("%s: invalid dev pointer\n", __func__);
+               netdev_err(priv->dev, "%s: invalid dev pointer\n", __func__);
                return IRQ_NONE;
        }
 
@@ -2892,14 +2918,17 @@ static void sysfs_display_ring(void *head, int size, int extend_desc,
                        x = *(u64 *) ep;
                        seq_printf(seq, "%d [0x%x]: 0x%x 0x%x 0x%x 0x%x\n",
                                   i, (unsigned int)virt_to_phys(ep),
-                                  ep->basic.des0, ep->basic.des1,
-                                  ep->basic.des2, ep->basic.des3);
+                                  le32_to_cpu(ep->basic.des0),
+                                  le32_to_cpu(ep->basic.des1),
+                                  le32_to_cpu(ep->basic.des2),
+                                  le32_to_cpu(ep->basic.des3));
                        ep++;
                } else {
                        x = *(u64 *) p;
                        seq_printf(seq, "%d [0x%x]: 0x%x 0x%x 0x%x 0x%x\n",
                                   i, (unsigned int)virt_to_phys(ep),
-                                  p->des0, p->des1, p->des2, p->des3);
+                                  le32_to_cpu(p->des0), le32_to_cpu(p->des1),
+                                  le32_to_cpu(p->des2), le32_to_cpu(p->des3));
                        p++;
                }
                seq_printf(seq, "\n");
@@ -2931,6 +2960,8 @@ static int stmmac_sysfs_ring_open(struct inode *inode, struct file *file)
        return single_open(file, stmmac_sysfs_ring_read, inode->i_private);
 }
 
+/* Debugfs files, should appear in /sys/kernel/debug/stmmaceth/eth0 */
+
 static const struct file_operations stmmac_rings_status_fops = {
        .owner = THIS_MODULE,
        .open = stmmac_sysfs_ring_open,
@@ -2953,11 +2984,11 @@ static int stmmac_sysfs_dma_cap_read(struct seq_file *seq, void *v)
        seq_printf(seq, "\tDMA HW features\n");
        seq_printf(seq, "==============================\n");
 
-       seq_printf(seq, "\t10/100 Mbps %s\n",
+       seq_printf(seq, "\t10/100 Mbps: %s\n",
                   (priv->dma_cap.mbps_10_100) ? "Y" : "N");
-       seq_printf(seq, "\t1000 Mbps %s\n",
+       seq_printf(seq, "\t1000 Mbps: %s\n",
                   (priv->dma_cap.mbps_1000) ? "Y" : "N");
-       seq_printf(seq, "\tHalf duple %s\n",
+       seq_printf(seq, "\tHalf duplex: %s\n",
                   (priv->dma_cap.half_duplex) ? "Y" : "N");
        seq_printf(seq, "\tHash Filter: %s\n",
                   (priv->dma_cap.hash_filter) ? "Y" : "N");
@@ -2975,9 +3006,9 @@ static int stmmac_sysfs_dma_cap_read(struct seq_file *seq, void *v)
                   (priv->dma_cap.rmon) ? "Y" : "N");
        seq_printf(seq, "\tIEEE 1588-2002 Time Stamp: %s\n",
                   (priv->dma_cap.time_stamp) ? "Y" : "N");
-       seq_printf(seq, "\tIEEE 1588-2008 Advanced Time Stamp:%s\n",
+       seq_printf(seq, "\tIEEE 1588-2008 Advanced Time Stamp: %s\n",
                   (priv->dma_cap.atime_stamp) ? "Y" : "N");
-       seq_printf(seq, "\t802.3az - Energy-Efficient Ethernet (EEE) %s\n",
+       seq_printf(seq, "\t802.3az - Energy-Efficient Ethernet (EEE): %s\n",
                   (priv->dma_cap.eee) ? "Y" : "N");
        seq_printf(seq, "\tAV features: %s\n", (priv->dma_cap.av) ? "Y" : "N");
        seq_printf(seq, "\tChecksum Offload in TX: %s\n",
@@ -3024,8 +3055,7 @@ static int stmmac_init_fs(struct net_device *dev)
        priv->dbgfs_dir = debugfs_create_dir(dev->name, stmmac_fs_dir);
 
        if (!priv->dbgfs_dir || IS_ERR(priv->dbgfs_dir)) {
-               pr_err("ERROR %s/%s, debugfs create directory failed\n",
-                      STMMAC_RESOURCE_NAME, dev->name);
+               netdev_err(priv->dev, "ERROR failed to create debugfs directory\n");
 
                return -ENOMEM;
        }
@@ -3037,7 +3067,7 @@ static int stmmac_init_fs(struct net_device *dev)
                                    &stmmac_rings_status_fops);
 
        if (!priv->dbgfs_rings_status || IS_ERR(priv->dbgfs_rings_status)) {
-               pr_info("ERROR creating stmmac ring debugfs file\n");
+               netdev_err(priv->dev, "ERROR creating stmmac ring debugfs file\n");
                debugfs_remove_recursive(priv->dbgfs_dir);
 
                return -ENOMEM;
@@ -3049,7 +3079,7 @@ static int stmmac_init_fs(struct net_device *dev)
                                            dev, &stmmac_dma_cap_fops);
 
        if (!priv->dbgfs_dma_cap || IS_ERR(priv->dbgfs_dma_cap)) {
-               pr_info("ERROR creating stmmac MMC debugfs file\n");
+               netdev_err(priv->dev, "ERROR creating stmmac MMC debugfs file\n");
                debugfs_remove_recursive(priv->dbgfs_dir);
 
                return -ENOMEM;
@@ -3121,11 +3151,11 @@ static int stmmac_hw_init(struct stmmac_priv *priv)
        } else {
                if (chain_mode) {
                        priv->hw->mode = &chain_mode_ops;
-                       pr_info(" Chain mode enabled\n");
+                       dev_info(priv->device, "Chain mode enabled\n");
                        priv->mode = STMMAC_CHAIN_MODE;
                } else {
                        priv->hw->mode = &ring_mode_ops;
-                       pr_info(" Ring mode enabled\n");
+                       dev_info(priv->device, "Ring mode enabled\n");
                        priv->mode = STMMAC_RING_MODE;
                }
        }
@@ -3133,7 +3163,7 @@ static int stmmac_hw_init(struct stmmac_priv *priv)
        /* Get the HW capability (new GMAC newer than 3.50a) */
        priv->hw_cap_support = stmmac_get_hw_features(priv);
        if (priv->hw_cap_support) {
-               pr_info(" DMA HW capability register supported");
+               dev_info(priv->device, "DMA HW capability register supported\n");
 
                /* We can override some gmac/dma configuration fields: e.g.
                 * enh_desc, tx_coe (e.g. that are passed through the
@@ -3158,8 +3188,9 @@ static int stmmac_hw_init(struct stmmac_priv *priv)
                else if (priv->dma_cap.rx_coe_type1)
                        priv->plat->rx_coe = STMMAC_RX_COE_TYPE1;
 
-       } else
-               pr_info(" No HW DMA feature register supported");
+       } else {
+               dev_info(priv->device, "No HW DMA feature register supported\n");
+       }
 
        /* To use alternate (extended), normal or GMAC4 descriptor structures */
        if (priv->synopsys_id >= DWMAC_CORE_4_00)
@@ -3169,20 +3200,20 @@ static int stmmac_hw_init(struct stmmac_priv *priv)
 
        if (priv->plat->rx_coe) {
                priv->hw->rx_csum = priv->plat->rx_coe;
-               pr_info(" RX Checksum Offload Engine supported\n");
+               dev_info(priv->device, "RX Checksum Offload Engine supported\n");
                if (priv->synopsys_id < DWMAC_CORE_4_00)
-                       pr_info("\tCOE Type %d\n", priv->hw->rx_csum);
+                       dev_info(priv->device, "COE Type %d\n", priv->hw->rx_csum);
        }
        if (priv->plat->tx_coe)
-               pr_info(" TX Checksum insertion supported\n");
+               dev_info(priv->device, "TX Checksum insertion supported\n");
 
        if (priv->plat->pmt) {
-               pr_info(" Wake-Up On Lan supported\n");
+               dev_info(priv->device, "Wake-Up On Lan supported\n");
                device_set_wakeup_capable(priv->device, 1);
        }
 
        if (priv->dma_cap.tsoen)
-               pr_info(" TSO supported\n");
+               dev_info(priv->device, "TSO supported\n");
 
        return 0;
 }
@@ -3241,8 +3272,8 @@ int stmmac_dvr_probe(struct device *device,
 
        priv->stmmac_clk = devm_clk_get(priv->device, STMMAC_RESOURCE_NAME);
        if (IS_ERR(priv->stmmac_clk)) {
-               dev_warn(priv->device, "%s: warning: cannot get CSR clock\n",
-                        __func__);
+               netdev_warn(priv->dev, "%s: warning: cannot get CSR clock\n",
+                           __func__);
                /* If failed to obtain stmmac_clk and specific clk_csr value
                 * is NOT passed from the platform, probe fail.
                 */
@@ -3291,7 +3322,7 @@ int stmmac_dvr_probe(struct device *device,
        if ((priv->plat->tso_en) && (priv->dma_cap.tsoen)) {
                ndev->hw_features |= NETIF_F_TSO;
                priv->tso = true;
-               pr_info(" TSO feature enabled\n");
+               dev_info(priv->device, "TSO feature enabled\n");
        }
        ndev->features |= ndev->hw_features | NETIF_F_HIGHDMA;
        ndev->watchdog_timeo = msecs_to_jiffies(watchdog);
@@ -3320,7 +3351,7 @@ int stmmac_dvr_probe(struct device *device,
         */
        if ((priv->synopsys_id >= DWMAC_CORE_3_50) && (!priv->plat->riwt_off)) {
                priv->use_riwt = 1;
-               pr_info(" Enable RX Mitigation via HW Watchdog Timer\n");
+               netdev_info(priv->dev, "Enable RX Mitigation via HW Watchdog Timer\n");
        }
 
        netif_napi_add(ndev, &priv->napi, stmmac_poll, 64);
@@ -3330,7 +3361,8 @@ int stmmac_dvr_probe(struct device *device,
 
        ret = register_netdev(ndev);
        if (ret) {
-               pr_err("%s: ERROR %i registering the device\n", __func__, ret);
+               netdev_err(priv->dev, "%s: ERROR %i registering the device\n",
+                          __func__, ret);
                goto error_netdev_register;
        }
 
@@ -3353,8 +3385,9 @@ int stmmac_dvr_probe(struct device *device,
                /* MDIO bus Registration */
                ret = stmmac_mdio_register(ndev);
                if (ret < 0) {
-                       pr_debug("%s: MDIO bus (id: %d) registration failed",
-                                __func__, priv->plat->bus_id);
+                       netdev_err(priv->dev,
+                                  "%s: MDIO bus (id: %d) registration failed",
+                                  __func__, priv->plat->bus_id);
                        goto error_mdio_register;
                }
        }
@@ -3387,7 +3420,7 @@ int stmmac_dvr_remove(struct device *dev)
        struct net_device *ndev = dev_get_drvdata(dev);
        struct stmmac_priv *priv = netdev_priv(ndev);
 
-       pr_info("%s:\n\tremoving driver", __func__);
+       netdev_info(priv->dev, "%s: removing driver", __func__);
 
        priv->hw->dma->stop_rx(priv->ioaddr);
        priv->hw->dma->stop_tx(priv->ioaddr);
index ec295851812b0fcc299f09aedcbc750369670b53..e3216e5e153461ca9e50123817e7a850894bc331 100644 (file)
@@ -260,7 +260,7 @@ int stmmac_mdio_reset(struct mii_bus *bus)
 #endif
 
        if (data->phy_reset) {
-               pr_debug("stmmac_mdio_reset: calling phy_reset\n");
+               netdev_dbg(ndev, "stmmac_mdio_reset: calling phy_reset\n");
                data->phy_reset(priv->plat->bsp_priv);
        }
 
@@ -325,7 +325,7 @@ int stmmac_mdio_register(struct net_device *ndev)
        else
                err = mdiobus_register(new_bus);
        if (err != 0) {
-               pr_err("%s: Cannot register as MDIO bus\n", new_bus->name);
+               netdev_err(ndev, "Cannot register the MDIO bus\n");
                goto bus_register_fail;
        }
 
@@ -372,16 +372,16 @@ int stmmac_mdio_register(struct net_device *ndev)
                                irq_str = irq_num;
                                break;
                        }
-                       pr_info("%s: PHY ID %08x at %d IRQ %s (%s)%s\n",
-                               ndev->name, phydev->phy_id, addr,
-                               irq_str, phydev_name(phydev),
-                               act ? " active" : "");
+                       netdev_info(ndev, "PHY ID %08x at %d IRQ %s (%s)%s\n",
+                                   phydev->phy_id, addr,
+                                   irq_str, phydev_name(phydev),
+                                   act ? " active" : "");
                        found = 1;
                }
        }
 
        if (!found && !mdio_node) {
-               pr_warn("%s: No PHY found\n", ndev->name);
+               netdev_warn(ndev, "No PHY found\n");
                mdiobus_unregister(new_bus);
                mdiobus_free(new_bus);
                return -ENODEV;
index 0a0d6a86f3979d260e2cda42282503e0d50e59ec..4d544c34c1f24ee4a7cce54878e167d19762c550 100644 (file)
@@ -417,9 +417,7 @@ static int stmmac_pltfr_suspend(struct device *dev)
        struct platform_device *pdev = to_platform_device(dev);
 
        ret = stmmac_suspend(dev);
-       if (priv->plat->suspend)
-               priv->plat->suspend(pdev, priv->plat->bsp_priv);
-       else if (priv->plat->exit)
+       if (priv->plat->exit)
                priv->plat->exit(pdev, priv->plat->bsp_priv);
 
        return ret;
@@ -438,9 +436,7 @@ static int stmmac_pltfr_resume(struct device *dev)
        struct stmmac_priv *priv = netdev_priv(ndev);
        struct platform_device *pdev = to_platform_device(dev);
 
-       if (priv->plat->resume)
-               priv->plat->resume(pdev, priv->plat->bsp_priv);
-       else if (priv->plat->init)
+       if (priv->plat->init)
                priv->plat->init(pdev, priv->plat->bsp_priv);
 
        return stmmac_resume(dev);
index 289d52725a6c172dc70a6f2db1255afc7161b2cd..3eb281d1db08a94ff76a3d3d21df367966a036d6 100644 (file)
@@ -54,7 +54,7 @@ static int stmmac_adjust_freq(struct ptp_clock_info *ptp, s32 ppb)
 
        spin_lock_irqsave(&priv->ptp_lock, flags);
 
-       priv->hw->ptp->config_addend(priv->ioaddr, addend);
+       priv->hw->ptp->config_addend(priv->ptpaddr, addend);
 
        spin_unlock_irqrestore(&priv->ptp_lock, flags);
 
@@ -89,7 +89,8 @@ static int stmmac_adjust_time(struct ptp_clock_info *ptp, s64 delta)
 
        spin_lock_irqsave(&priv->ptp_lock, flags);
 
-       priv->hw->ptp->adjust_systime(priv->ioaddr, sec, nsec, neg_adj);
+       priv->hw->ptp->adjust_systime(priv->ptpaddr, sec, nsec, neg_adj,
+                                     priv->plat->has_gmac4);
 
        spin_unlock_irqrestore(&priv->ptp_lock, flags);
 
@@ -114,7 +115,7 @@ static int stmmac_get_time(struct ptp_clock_info *ptp, struct timespec64 *ts)
 
        spin_lock_irqsave(&priv->ptp_lock, flags);
 
-       ns = priv->hw->ptp->get_systime(priv->ioaddr);
+       ns = priv->hw->ptp->get_systime(priv->ptpaddr);
 
        spin_unlock_irqrestore(&priv->ptp_lock, flags);
 
@@ -141,7 +142,7 @@ static int stmmac_set_time(struct ptp_clock_info *ptp,
 
        spin_lock_irqsave(&priv->ptp_lock, flags);
 
-       priv->hw->ptp->init_systime(priv->ioaddr, ts->tv_sec, ts->tv_nsec);
+       priv->hw->ptp->init_systime(priv->ptpaddr, ts->tv_sec, ts->tv_nsec);
 
        spin_unlock_irqrestore(&priv->ptp_lock, flags);
 
@@ -177,7 +178,7 @@ static struct ptp_clock_info stmmac_ptp_clock_ops = {
  * Description: this function will register the ptp clock driver
  * to kernel. It also does some house keeping work.
  */
-int stmmac_ptp_register(struct stmmac_priv *priv)
+void stmmac_ptp_register(struct stmmac_priv *priv)
 {
        spin_lock_init(&priv->ptp_lock);
        priv->ptp_clock_ops = stmmac_ptp_clock_ops;
@@ -185,15 +186,10 @@ int stmmac_ptp_register(struct stmmac_priv *priv)
        priv->ptp_clock = ptp_clock_register(&priv->ptp_clock_ops,
                                             priv->device);
        if (IS_ERR(priv->ptp_clock)) {
+               netdev_err(priv->dev, "ptp_clock_register failed\n");
                priv->ptp_clock = NULL;
-               return PTR_ERR(priv->ptp_clock);
-       }
-
-       spin_lock_init(&priv->ptp_lock);
-
-       netdev_dbg(priv->dev, "Added PTP HW clock successfully\n");
-
-       return 0;
+       } else if (priv->ptp_clock)
+               netdev_info(priv->dev, "registered PTP clock\n");
 }
 
 /**
index 4535df37c22767824d1f7bbe6db56a8e3d0644ab..c06938c47af5549658c19e9c64a568595d80510a 100644 (file)
   Author: Rayagond Kokatanur <rayagond@vayavyalabs.com>
 ******************************************************************************/
 
-#ifndef __STMMAC_PTP_H__
-#define __STMMAC_PTP_H__
+#ifndef        __STMMAC_PTP_H__
+#define        __STMMAC_PTP_H__
 
-/* IEEE 1588 PTP register offsets */
-#define PTP_TCR                0x0700  /* Timestamp Control Reg */
-#define PTP_SSIR       0x0704  /* Sub-Second Increment Reg */
-#define PTP_STSR       0x0708  /* System Time â€“ Seconds Regr */
-#define PTP_STNSR      0x070C  /* System Time â€“ Nanoseconds Reg */
-#define PTP_STSUR      0x0710  /* System Time â€“ Seconds Update Reg */
-#define PTP_STNSUR     0x0714  /* System Time â€“ Nanoseconds Update Reg */
-#define PTP_TAR                0x0718  /* Timestamp Addend Reg */
-#define PTP_TTSR       0x071C  /* Target Time Seconds Reg */
-#define PTP_TTNSR      0x0720  /* Target Time Nanoseconds Reg */
-#define        PTP_STHWSR      0x0724  /* System Time - Higher Word Seconds Reg */
-#define PTP_TSR                0x0728  /* Timestamp Status */
+#define        PTP_GMAC4_OFFSET        0xb00
+#define        PTP_GMAC3_X_OFFSET      0x700
 
-#define PTP_STNSUR_ADDSUB_SHIFT 31
+/* IEEE 1588 PTP register offsets */
+#define        PTP_TCR         0x00    /* Timestamp Control Reg */
+#define        PTP_SSIR        0x04    /* Sub-Second Increment Reg */
+#define        PTP_STSR        0x08    /* System Time â€“ Seconds Regr */
+#define        PTP_STNSR       0x0c    /* System Time â€“ Nanoseconds Reg */
+#define        PTP_STSUR       0x10    /* System Time â€“ Seconds Update Reg */
+#define        PTP_STNSUR      0x14    /* System Time â€“ Nanoseconds Update Reg */
+#define        PTP_TAR         0x18    /* Timestamp Addend Reg */
 
-/* PTP TCR defines */
-#define PTP_TCR_TSENA          0x00000001 /* Timestamp Enable */
-#define PTP_TCR_TSCFUPDT       0x00000002 /* Timestamp Fine/Coarse Update */
-#define PTP_TCR_TSINIT         0x00000004 /* Timestamp Initialize */
-#define PTP_TCR_TSUPDT         0x00000008 /* Timestamp Update */
-/* Timestamp Interrupt Trigger Enable */
-#define PTP_TCR_TSTRIG         0x00000010
-#define PTP_TCR_TSADDREG       0x00000020 /* Addend Reg Update */
-#define PTP_TCR_TSENALL                0x00000100 /* Enable Timestamp for All Frames */
-/* Timestamp Digital or Binary Rollover Control */
-#define PTP_TCR_TSCTRLSSR      0x00000200
+#define        PTP_STNSUR_ADDSUB_SHIFT 31
+#define        PTP_DIGITAL_ROLLOVER_MODE       0x3B9ACA00      /* 10e9-1 ns */
+#define        PTP_BINARY_ROLLOVER_MODE        0x80000000      /* ~0.466 ns */
 
+/* PTP Timestamp control register defines */
+#define        PTP_TCR_TSENA           BIT(0)  /* Timestamp Enable */
+#define        PTP_TCR_TSCFUPDT        BIT(1)  /* Timestamp Fine/Coarse Update */
+#define        PTP_TCR_TSINIT          BIT(2)  /* Timestamp Initialize */
+#define        PTP_TCR_TSUPDT          BIT(3)  /* Timestamp Update */
+#define        PTP_TCR_TSTRIG          BIT(4)  /* Timestamp Interrupt Trigger Enable */
+#define        PTP_TCR_TSADDREG        BIT(5)  /* Addend Reg Update */
+#define        PTP_TCR_TSENALL         BIT(8)  /* Enable Timestamp for All Frames */
+#define        PTP_TCR_TSCTRLSSR       BIT(9)  /* Digital or Binary Rollover Control */
 /* Enable PTP packet Processing for Version 2 Format */
-#define PTP_TCR_TSVER2ENA      0x00000400
+#define        PTP_TCR_TSVER2ENA       BIT(10)
 /* Enable Processing of PTP over Ethernet Frames */
-#define PTP_TCR_TSIPENA                0x00000800
+#define        PTP_TCR_TSIPENA         BIT(11)
 /* Enable Processing of PTP Frames Sent over IPv6-UDP */
-#define PTP_TCR_TSIPV6ENA      0x00001000
+#define        PTP_TCR_TSIPV6ENA       BIT(12)
 /* Enable Processing of PTP Frames Sent over IPv4-UDP */
-#define PTP_TCR_TSIPV4ENA      0x00002000
+#define        PTP_TCR_TSIPV4ENA       BIT(13)
 /* Enable Timestamp Snapshot for Event Messages */
-#define PTP_TCR_TSEVNTENA      0x00004000
+#define        PTP_TCR_TSEVNTENA       BIT(14)
 /* Enable Snapshot for Messages Relevant to Master */
-#define PTP_TCR_TSMSTRENA      0x00008000
+#define        PTP_TCR_TSMSTRENA       BIT(15)
 /* Select PTP packets for Taking Snapshots */
-#define PTP_TCR_SNAPTYPSEL_1   0x00010000
+#define        PTP_TCR_SNAPTYPSEL_1    GENMASK(17, 16)
 /* Enable MAC address for PTP Frame Filtering */
-#define PTP_TCR_TSENMACADDR    0x00040000
+#define        PTP_TCR_TSENMACADDR     BIT(18)
+
+/* SSIR defines */
+#define        PTP_SSIR_SSINC_MASK             0xff
+#define        GMAC4_PTP_SSIR_SSINC_SHIFT      16
 
-#endif /* __STMMAC_PTP_H__ */
+#endif /* __STMMAC_PTP_H__ */
index ea89ef3b48fb82524ec431e89178763a93f42d78..c4caf486cbeffee85ca7ca829ae3fef0d6ad9fe4 100644 (file)
@@ -623,6 +623,7 @@ static int bigmac_init_hw(struct bigmac *bp, int from_irq)
        void __iomem *gregs        = bp->gregs;
        void __iomem *cregs        = bp->creg;
        void __iomem *bregs        = bp->bregs;
+       __u32 bblk_dvma = (__u32)bp->bblock_dvma;
        unsigned char *e = &bp->dev->dev_addr[0];
 
        /* Latch current counters into statistics. */
@@ -671,9 +672,9 @@ static int bigmac_init_hw(struct bigmac *bp, int from_irq)
                    bregs + BMAC_XIFCFG);
 
        /* Tell the QEC where the ring descriptors are. */
-       sbus_writel(bp->bblock_dvma + bib_offset(be_rxd, 0),
+       sbus_writel(bblk_dvma + bib_offset(be_rxd, 0),
                    cregs + CREG_RXDS);
-       sbus_writel(bp->bblock_dvma + bib_offset(be_txd, 0),
+       sbus_writel(bblk_dvma + bib_offset(be_txd, 0),
                    cregs + CREG_TXDS);
 
        /* Setup the FIFO pointers into QEC local memory. */
index 06dd21707353594b2150ec8afac12c440232160d..532fc56830cf319b3067b3caa6c8149f1ac115d6 100644 (file)
@@ -291,7 +291,7 @@ struct bigmac {
        void __iomem    *bregs; /* BigMAC Registers                   */
        void __iomem    *tregs; /* BigMAC Transceiver                 */
        struct bmac_init_block  *bmac_block;    /* RX and TX descriptors */
-       __u32                    bblock_dvma;   /* RX and TX descriptors */
+       dma_addr_t              bblock_dvma;    /* RX and TX descriptors */
 
        spinlock_t              lock;
 
index c5ef711f656707888f8c71c3d346e4da81eb6836..a6bcdcdd947e336768af4f65220fcec59ae64406 100644 (file)
@@ -124,7 +124,7 @@ static void qe_init_rings(struct sunqe *qep)
 {
        struct qe_init_block *qb = qep->qe_block;
        struct sunqe_buffers *qbufs = qep->buffers;
-       __u32 qbufs_dvma = qep->buffers_dvma;
+       __u32 qbufs_dvma = (__u32)qep->buffers_dvma;
        int i;
 
        qep->rx_new = qep->rx_old = qep->tx_new = qep->tx_old = 0;
@@ -144,6 +144,7 @@ static int qe_init(struct sunqe *qep, int from_irq)
        void __iomem *mregs = qep->mregs;
        void __iomem *gregs = qecp->gregs;
        unsigned char *e = &qep->dev->dev_addr[0];
+       __u32 qblk_dvma = (__u32)qep->qblock_dvma;
        u32 tmp;
        int i;
 
@@ -152,8 +153,8 @@ static int qe_init(struct sunqe *qep, int from_irq)
                return -EAGAIN;
 
        /* Setup initial rx/tx init block pointers. */
-       sbus_writel(qep->qblock_dvma + qib_offset(qe_rxd, 0), cregs + CREG_RXDS);
-       sbus_writel(qep->qblock_dvma + qib_offset(qe_txd, 0), cregs + CREG_TXDS);
+       sbus_writel(qblk_dvma + qib_offset(qe_rxd, 0), cregs + CREG_RXDS);
+       sbus_writel(qblk_dvma + qib_offset(qe_txd, 0), cregs + CREG_TXDS);
 
        /* Enable/mask the various irq's. */
        sbus_writel(0, cregs + CREG_RIMASK);
@@ -413,7 +414,7 @@ static void qe_rx(struct sunqe *qep)
        struct net_device *dev = qep->dev;
        struct qe_rxd *this;
        struct sunqe_buffers *qbufs = qep->buffers;
-       __u32 qbufs_dvma = qep->buffers_dvma;
+       __u32 qbufs_dvma = (__u32)qep->buffers_dvma;
        int elem = qep->rx_new;
        u32 flags;
 
@@ -572,7 +573,7 @@ static int qe_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
        struct sunqe *qep = netdev_priv(dev);
        struct sunqe_buffers *qbufs = qep->buffers;
-       __u32 txbuf_dvma, qbufs_dvma = qep->buffers_dvma;
+       __u32 txbuf_dvma, qbufs_dvma = (__u32)qep->buffers_dvma;
        unsigned char *txbuf;
        int len, entry;
 
index 581781b6b2fac9d6bfb4bfae9735d674f6d46eec..ae190b77431b14b8f63b465155e9762727ccad77 100644 (file)
@@ -334,12 +334,12 @@ struct sunqe {
        void __iomem                    *qcregs;                /* QEC per-channel Registers   */
        void __iomem                    *mregs;         /* Per-channel MACE Registers  */
        struct qe_init_block            *qe_block;      /* RX and TX descriptors       */
-       __u32                           qblock_dvma;    /* RX and TX descriptors       */
+       dma_addr_t                      qblock_dvma;    /* RX and TX descriptors       */
        spinlock_t                      lock;           /* Protects txfull state       */
        int                             rx_new, rx_old; /* RX ring extents             */
        int                             tx_new, tx_old; /* TX ring extents             */
        struct sunqe_buffers            *buffers;       /* CPU visible address.        */
-       __u32                           buffers_dvma;   /* DVMA visible address.       */
+       dma_addr_t                      buffers_dvma;   /* DVMA visible address.       */
        struct sunqec                   *parent;
        u8                              mconfig;        /* Base MACE mconfig value     */
        struct platform_device          *op;            /* QE's OF device struct       */
index 58efe69b7ba7ea6b326d7082e70465ca57e2c24a..8878b75d68b4ddec99fb014e0e51cd91081380d2 100644 (file)
@@ -704,9 +704,8 @@ static int handle_mcast(struct vnet_port *port, void *msgbuf)
        return 0;
 }
 
-/* Got back a STOPPED LDC message on port. If the queue is stopped,
- * wake it up so that we'll send out another START message at the
- * next TX.
+/* If the queue is stopped, wake it up so that we'll
+ * send out another START message at the next TX.
  */
 static void maybe_tx_wakeup(struct vnet_port *port)
 {
@@ -734,6 +733,7 @@ EXPORT_SYMBOL_GPL(sunvnet_port_is_up_common);
 
 static int vnet_event_napi(struct vnet_port *port, int budget)
 {
+       struct net_device *dev = VNET_PORT_TO_NET_DEVICE(port);
        struct vio_driver_state *vio = &port->vio;
        int tx_wakeup, err;
        int npkts = 0;
@@ -747,6 +747,16 @@ ldc_ctrl:
                if (event == LDC_EVENT_RESET) {
                        vnet_port_reset(port);
                        vio_port_up(vio);
+
+                       /* If the device is running but its tx queue was
+                        * stopped (due to flow control), restart it.
+                        * This is necessary since vnet_port_reset()
+                        * clears the tx drings and thus we may never get
+                        * back a VIO_TYPE_DATA ACK packet - which is
+                        * the normal mechanism to restart the tx queue.
+                        */
+                       if (netif_running(dev))
+                               maybe_tx_wakeup(port);
                }
                port->rx_event = 0;
                return 0;
index eaa51ce8bd6d44b3fa0bfaf02e4e575a347a4ca0..acce385f69d4df52a528c0752c87a4a6dfe93cea 100644 (file)
@@ -33,7 +33,6 @@
 #include <linux/stat.h>
 #include <linux/types.h>
 
-#include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/delay.h>
 #include <linux/mm.h>
@@ -43,7 +42,6 @@
 
 #include <linux/phy.h>
 #include <linux/mii.h>
-#include <linux/delay.h>
 #include <linux/dma-mapping.h>
 #include <linux/vmalloc.h>
 
@@ -982,11 +980,13 @@ static int dwceqos_mii_probe(struct net_device *ndev)
        if (netif_msg_probe(lp))
                phy_attached_info(phydev);
 
-       phydev->supported &= PHY_GBIT_FEATURES;
+       phydev->supported &= PHY_GBIT_FEATURES | SUPPORTED_Pause |
+                            SUPPORTED_Asym_Pause;
 
        lp->link    = 0;
        lp->speed   = 0;
        lp->duplex  = DUPLEX_UNKNOWN;
+       lp->flowcontrol.autoneg = AUTONEG_ENABLE;
 
        return 0;
 }
index 054a8dd23dae0df966fc508064734946ed8f6d2b..ba1e45ff6aaec2ed3dcce6a3a4fc089c3be711d4 100644 (file)
@@ -176,9 +176,12 @@ void cpsw_phy_sel(struct device *dev, phy_interface_t phy_mode, int slave)
        }
 
        dev = bus_find_device(&platform_bus_type, NULL, node, match);
+       of_node_put(node);
        priv = dev_get_drvdata(dev);
 
        priv->cpsw_phy_sel(priv, phy_mode, slave);
+
+       put_device(dev);
 }
 EXPORT_SYMBOL_GPL(cpsw_phy_sel);
 
index b1ddf89a19bea4748e1f31755267f1bb8c52019d..dd5d830868059c983f8044004496e3cbad6885d9 100644 (file)
@@ -365,6 +365,11 @@ static inline void slave_write(struct cpsw_slave *slave, u32 val, u32 offset)
        __raw_writel(val, slave->regs + offset);
 }
 
+struct cpsw_vector {
+       struct cpdma_chan *ch;
+       int budget;
+};
+
 struct cpsw_common {
        struct device                   *dev;
        struct cpsw_platform_data       data;
@@ -380,8 +385,8 @@ struct cpsw_common {
        int                             rx_packet_max;
        struct cpsw_slave               *slaves;
        struct cpdma_ctlr               *dma;
-       struct cpdma_chan               *txch[CPSW_MAX_QUEUES];
-       struct cpdma_chan               *rxch[CPSW_MAX_QUEUES];
+       struct cpsw_vector              txv[CPSW_MAX_QUEUES];
+       struct cpsw_vector              rxv[CPSW_MAX_QUEUES];
        struct cpsw_ale                 *ale;
        bool                            quirk_irq;
        bool                            rx_irq_disabled;
@@ -741,7 +746,7 @@ requeue:
                return;
        }
 
-       ch = cpsw->rxch[skb_get_queue_mapping(new_skb)];
+       ch = cpsw->rxv[skb_get_queue_mapping(new_skb)].ch;
        ret = cpdma_chan_submit(ch, new_skb, new_skb->data,
                                skb_tailroom(new_skb), 0);
        if (WARN_ON(ret < 0))
@@ -783,24 +788,25 @@ static irqreturn_t cpsw_rx_interrupt(int irq, void *dev_id)
 static int cpsw_tx_poll(struct napi_struct *napi_tx, int budget)
 {
        u32                     ch_map;
-       int                     num_tx, ch;
+       int                     num_tx, cur_budget, ch;
        struct cpsw_common      *cpsw = napi_to_cpsw(napi_tx);
+       struct cpsw_vector      *txv;
 
        /* process every unprocessed channel */
        ch_map = cpdma_ctrl_txchs_state(cpsw->dma);
-       for (ch = 0, num_tx = 0; num_tx < budget; ch_map >>= 1, ch++) {
-               if (!ch_map) {
-                       ch_map = cpdma_ctrl_txchs_state(cpsw->dma);
-                       if (!ch_map)
-                               break;
-
-                       ch = 0;
-               }
-
+       for (ch = 0, num_tx = 0; ch_map; ch_map >>= 1, ch++) {
                if (!(ch_map & 0x01))
                        continue;
 
-               num_tx += cpdma_chan_process(cpsw->txch[ch], budget - num_tx);
+               txv = &cpsw->txv[ch];
+               if (unlikely(txv->budget > budget - num_tx))
+                       cur_budget = budget - num_tx;
+               else
+                       cur_budget = txv->budget;
+
+               num_tx += cpdma_chan_process(txv->ch, cur_budget);
+               if (num_tx >= budget)
+                       break;
        }
 
        if (num_tx < budget) {
@@ -818,24 +824,25 @@ static int cpsw_tx_poll(struct napi_struct *napi_tx, int budget)
 static int cpsw_rx_poll(struct napi_struct *napi_rx, int budget)
 {
        u32                     ch_map;
-       int                     num_rx, ch;
+       int                     num_rx, cur_budget, ch;
        struct cpsw_common      *cpsw = napi_to_cpsw(napi_rx);
+       struct cpsw_vector      *rxv;
 
        /* process every unprocessed channel */
        ch_map = cpdma_ctrl_rxchs_state(cpsw->dma);
-       for (ch = 0, num_rx = 0; num_rx < budget; ch_map >>= 1, ch++) {
-               if (!ch_map) {
-                       ch_map = cpdma_ctrl_rxchs_state(cpsw->dma);
-                       if (!ch_map)
-                               break;
-
-                       ch = 0;
-               }
-
+       for (ch = 0, num_rx = 0; ch_map; ch_map >>= 1, ch++) {
                if (!(ch_map & 0x01))
                        continue;
 
-               num_rx += cpdma_chan_process(cpsw->rxch[ch], budget - num_rx);
+               rxv = &cpsw->rxv[ch];
+               if (unlikely(rxv->budget > budget - num_rx))
+                       cur_budget = budget - num_rx;
+               else
+                       cur_budget = rxv->budget;
+
+               num_rx += cpdma_chan_process(rxv->ch, cur_budget);
+               if (num_rx >= budget)
+                       break;
        }
 
        if (num_rx < budget) {
@@ -1075,7 +1082,7 @@ static void cpsw_get_ethtool_stats(struct net_device *ndev,
                                cpsw_gstrings_stats[l].stat_offset);
 
        for (ch = 0; ch < cpsw->rx_ch_num; ch++) {
-               cpdma_chan_get_stats(cpsw->rxch[ch], &ch_stats);
+               cpdma_chan_get_stats(cpsw->rxv[ch].ch, &ch_stats);
                for (i = 0; i < CPSW_STATS_CH_LEN; i++, l++) {
                        p = (u8 *)&ch_stats +
                                cpsw_gstrings_ch_stats[i].stat_offset;
@@ -1084,7 +1091,7 @@ static void cpsw_get_ethtool_stats(struct net_device *ndev,
        }
 
        for (ch = 0; ch < cpsw->tx_ch_num; ch++) {
-               cpdma_chan_get_stats(cpsw->txch[ch], &ch_stats);
+               cpdma_chan_get_stats(cpsw->txv[ch].ch, &ch_stats);
                for (i = 0; i < CPSW_STATS_CH_LEN; i++, l++) {
                        p = (u8 *)&ch_stats +
                                cpsw_gstrings_ch_stats[i].stat_offset;
@@ -1273,6 +1280,82 @@ static void cpsw_init_host_port(struct cpsw_priv *priv)
        }
 }
 
+/* split budget depending on channel rates */
+static void cpsw_split_budget(struct net_device *ndev)
+{
+       struct cpsw_priv *priv = netdev_priv(ndev);
+       struct cpsw_common *cpsw = priv->cpsw;
+       struct cpsw_vector *txv = cpsw->txv;
+       u32 consumed_rate, bigest_rate = 0;
+       int budget, bigest_rate_ch = 0;
+       struct cpsw_slave *slave;
+       int i, rlim_ch_num = 0;
+       u32 ch_rate, max_rate;
+       int ch_budget = 0;
+
+       if (cpsw->data.dual_emac)
+               slave = &cpsw->slaves[priv->emac_port];
+       else
+               slave = &cpsw->slaves[cpsw->data.active_slave];
+
+       max_rate = slave->phy->speed * 1000;
+
+       consumed_rate = 0;
+       for (i = 0; i < cpsw->tx_ch_num; i++) {
+               ch_rate = cpdma_chan_get_rate(txv[i].ch);
+               if (!ch_rate)
+                       continue;
+
+               rlim_ch_num++;
+               consumed_rate += ch_rate;
+       }
+
+       if (cpsw->tx_ch_num == rlim_ch_num) {
+               max_rate = consumed_rate;
+       } else {
+               ch_budget = (consumed_rate * CPSW_POLL_WEIGHT) / max_rate;
+               ch_budget = (CPSW_POLL_WEIGHT - ch_budget) /
+                           (cpsw->tx_ch_num - rlim_ch_num);
+               bigest_rate = (max_rate - consumed_rate) /
+                             (cpsw->tx_ch_num - rlim_ch_num);
+       }
+
+       /* split tx budget */
+       budget = CPSW_POLL_WEIGHT;
+       for (i = 0; i < cpsw->tx_ch_num; i++) {
+               ch_rate = cpdma_chan_get_rate(txv[i].ch);
+               if (ch_rate) {
+                       txv[i].budget = (ch_rate * CPSW_POLL_WEIGHT) / max_rate;
+                       if (!txv[i].budget)
+                               txv[i].budget = 1;
+                       if (ch_rate > bigest_rate) {
+                               bigest_rate_ch = i;
+                               bigest_rate = ch_rate;
+                       }
+               } else {
+                       txv[i].budget = ch_budget;
+                       if (!bigest_rate_ch)
+                               bigest_rate_ch = i;
+               }
+
+               budget -= txv[i].budget;
+       }
+
+       if (budget)
+               txv[bigest_rate_ch].budget += budget;
+
+       /* split rx budget */
+       budget = CPSW_POLL_WEIGHT;
+       ch_budget = budget / cpsw->rx_ch_num;
+       for (i = 0; i < cpsw->rx_ch_num; i++) {
+               cpsw->rxv[i].budget = ch_budget;
+               budget -= ch_budget;
+       }
+
+       if (budget)
+               cpsw->rxv[0].budget += budget;
+}
+
 static int cpsw_fill_rx_channels(struct cpsw_priv *priv)
 {
        struct cpsw_common *cpsw = priv->cpsw;
@@ -1281,7 +1364,7 @@ static int cpsw_fill_rx_channels(struct cpsw_priv *priv)
        int ch, i, ret;
 
        for (ch = 0; ch < cpsw->rx_ch_num; ch++) {
-               ch_buf_num = cpdma_chan_get_rx_buf_num(cpsw->rxch[ch]);
+               ch_buf_num = cpdma_chan_get_rx_buf_num(cpsw->rxv[ch].ch);
                for (i = 0; i < ch_buf_num; i++) {
                        skb = __netdev_alloc_skb_ip_align(priv->ndev,
                                                          cpsw->rx_packet_max,
@@ -1292,8 +1375,9 @@ static int cpsw_fill_rx_channels(struct cpsw_priv *priv)
                        }
 
                        skb_set_queue_mapping(skb, ch);
-                       ret = cpdma_chan_submit(cpsw->rxch[ch], skb, skb->data,
-                                               skb_tailroom(skb), 0);
+                       ret = cpdma_chan_submit(cpsw->rxv[ch].ch, skb,
+                                               skb->data, skb_tailroom(skb),
+                                               0);
                        if (ret < 0) {
                                cpsw_err(priv, ifup,
                                         "cannot submit skb to channel %d rx, error %d\n",
@@ -1376,10 +1460,6 @@ static int cpsw_ndo_open(struct net_device *ndev)
                                  ALE_ALL_PORTS, ALE_ALL_PORTS, 0, 0);
 
        if (!cpsw_common_res_usage_state(cpsw)) {
-               /* setup tx dma to fixed prio and zero offset */
-               cpdma_control_set(cpsw->dma, CPDMA_TX_PRIO_FIXED, 1);
-               cpdma_control_set(cpsw->dma, CPDMA_RX_BUFFER_OFFSET, 0);
-
                /* disable priority elevation */
                __raw_writel(0, &cpsw->regs->ptype);
 
@@ -1421,6 +1501,7 @@ static int cpsw_ndo_open(struct net_device *ndev)
                cpsw_set_coalesce(ndev, &coal);
        }
 
+       cpsw_split_budget(ndev);
        cpdma_ctlr_start(cpsw->dma);
        cpsw_intr_enable(cpsw);
 
@@ -1490,7 +1571,7 @@ static netdev_tx_t cpsw_ndo_start_xmit(struct sk_buff *skb,
        if (q_idx >= cpsw->tx_ch_num)
                q_idx = q_idx % cpsw->tx_ch_num;
 
-       txch = cpsw->txch[q_idx];
+       txch = cpsw->txv[q_idx].ch;
        ret = cpsw_tx_packet_submit(priv, skb, txch);
        if (unlikely(ret != 0)) {
                cpsw_err(priv, tx_err, "desc submit failed\n");
@@ -1697,8 +1778,8 @@ static void cpsw_ndo_tx_timeout(struct net_device *ndev)
        ndev->stats.tx_errors++;
        cpsw_intr_disable(cpsw);
        for (ch = 0; ch < cpsw->tx_ch_num; ch++) {
-               cpdma_chan_stop(cpsw->txch[ch]);
-               cpdma_chan_start(cpsw->txch[ch]);
+               cpdma_chan_stop(cpsw->txv[ch].ch);
+               cpdma_chan_start(cpsw->txv[ch].ch);
        }
 
        cpsw_intr_enable(cpsw);
@@ -1876,6 +1957,90 @@ static int cpsw_ndo_vlan_rx_kill_vid(struct net_device *ndev,
        return ret;
 }
 
+static int cpsw_ndo_set_tx_maxrate(struct net_device *ndev, int queue, u32 rate)
+{
+       struct cpsw_priv *priv = netdev_priv(ndev);
+       int tx_ch_num = ndev->real_num_tx_queues;
+       u32 consumed_rate, min_rate, max_rate;
+       struct cpsw_common *cpsw = priv->cpsw;
+       struct cpsw_slave *slave;
+       int ret, i, weight;
+       int rlim_num = 0;
+       u32 ch_rate;
+
+       ch_rate = netdev_get_tx_queue(ndev, queue)->tx_maxrate;
+       if (ch_rate == rate)
+               return 0;
+
+       if (cpsw->data.dual_emac)
+               slave = &cpsw->slaves[priv->emac_port];
+       else
+               slave = &cpsw->slaves[cpsw->data.active_slave];
+       max_rate = slave->phy->speed;
+
+       consumed_rate = 0;
+       for (i = 0; i < tx_ch_num; i++) {
+               if (i == queue)
+                       ch_rate = rate;
+               else
+                       ch_rate = netdev_get_tx_queue(ndev, i)->tx_maxrate;
+               if (!ch_rate)
+                       continue;
+
+               rlim_num++;
+               consumed_rate += ch_rate;
+       }
+
+       if (consumed_rate > max_rate)
+               dev_info(priv->dev, "The common rate shouldn't be more than %dMbps",
+                        max_rate);
+
+       if (consumed_rate > max_rate) {
+               if (max_rate == 10 && consumed_rate <= 100) {
+                       max_rate = 100;
+               } else if (max_rate <= 100 && consumed_rate <= 1000) {
+                       max_rate = 1000;
+               } else {
+                       dev_err(priv->dev, "The common rate cannot be more than %dMbps",
+                               max_rate);
+                       return -EINVAL;
+               }
+       }
+
+       if (consumed_rate > max_rate) {
+               dev_err(priv->dev, "The common rate cannot be more than %dMbps",
+                       max_rate);
+               return -EINVAL;
+       }
+
+       rate *= 1000;
+       min_rate = cpdma_chan_get_min_rate(cpsw->dma);
+       if ((rate < min_rate && rate)) {
+               dev_err(priv->dev, "The common rate cannot be less than %dMbps",
+                       min_rate);
+               return -EINVAL;
+       }
+
+       ret = pm_runtime_get_sync(cpsw->dev);
+       if (ret < 0) {
+               pm_runtime_put_noidle(cpsw->dev);
+               return ret;
+       }
+
+       if (rlim_num == tx_ch_num)
+               max_rate = consumed_rate;
+
+       weight = (rate * 100) / (max_rate * 1000);
+       cpdma_chan_set_weight(cpsw->txv[queue].ch, weight);
+       ret = cpdma_chan_set_rate(cpsw->txv[queue].ch, rate);
+
+       /* re-split budget between channels */
+       if (!rate)
+               cpsw_split_budget(ndev);
+       pm_runtime_put(cpsw->dev);
+       return ret;
+}
+
 static const struct net_device_ops cpsw_netdev_ops = {
        .ndo_open               = cpsw_ndo_open,
        .ndo_stop               = cpsw_ndo_stop,
@@ -1885,6 +2050,7 @@ static const struct net_device_ops cpsw_netdev_ops = {
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_tx_timeout         = cpsw_ndo_tx_timeout,
        .ndo_set_rx_mode        = cpsw_ndo_set_rx_mode,
+       .ndo_set_tx_maxrate     = cpsw_ndo_set_tx_maxrate,
 #ifdef CONFIG_NET_POLL_CONTROLLER
        .ndo_poll_controller    = cpsw_ndo_poll_controller,
 #endif
@@ -2104,28 +2270,31 @@ static int cpsw_update_channels_res(struct cpsw_priv *priv, int ch_num, int rx)
        int (*poll)(struct napi_struct *, int);
        struct cpsw_common *cpsw = priv->cpsw;
        void (*handler)(void *, int, int);
-       struct cpdma_chan **chan;
+       struct netdev_queue *queue;
+       struct cpsw_vector *vec;
        int ret, *ch;
 
        if (rx) {
                ch = &cpsw->rx_ch_num;
-               chan = cpsw->rxch;
+               vec = cpsw->rxv;
                handler = cpsw_rx_handler;
                poll = cpsw_rx_poll;
        } else {
                ch = &cpsw->tx_ch_num;
-               chan = cpsw->txch;
+               vec = cpsw->txv;
                handler = cpsw_tx_handler;
                poll = cpsw_tx_poll;
        }
 
        while (*ch < ch_num) {
-               chan[*ch] = cpdma_chan_create(cpsw->dma, *ch, handler, rx);
+               vec[*ch].ch = cpdma_chan_create(cpsw->dma, *ch, handler, rx);
+               queue = netdev_get_tx_queue(priv->ndev, *ch);
+               queue->tx_maxrate = 0;
 
-               if (IS_ERR(chan[*ch]))
-                       return PTR_ERR(chan[*ch]);
+               if (IS_ERR(vec[*ch].ch))
+                       return PTR_ERR(vec[*ch].ch);
 
-               if (!chan[*ch])
+               if (!vec[*ch].ch)
                        return -EINVAL;
 
                cpsw_info(priv, ifup, "created new %d %s channel\n", *ch,
@@ -2136,7 +2305,7 @@ static int cpsw_update_channels_res(struct cpsw_priv *priv, int ch_num, int rx)
        while (*ch > ch_num) {
                (*ch)--;
 
-               ret = cpdma_chan_destroy(chan[*ch]);
+               ret = cpdma_chan_destroy(vec[*ch].ch);
                if (ret)
                        return ret;
 
@@ -2223,6 +2392,8 @@ static int cpsw_set_channels(struct net_device *ndev,
                if (ret)
                        goto err;
 
+               cpsw_split_budget(ndev);
+
                /* After this receive is started */
                cpdma_ctlr_start(cpsw->dma);
                cpsw_intr_enable(cpsw);
@@ -2241,6 +2412,42 @@ err:
        return ret;
 }
 
+static int cpsw_get_eee(struct net_device *ndev, struct ethtool_eee *edata)
+{
+       struct cpsw_priv *priv = netdev_priv(ndev);
+       struct cpsw_common *cpsw = priv->cpsw;
+       int slave_no = cpsw_slave_index(cpsw, priv);
+
+       if (cpsw->slaves[slave_no].phy)
+               return phy_ethtool_get_eee(cpsw->slaves[slave_no].phy, edata);
+       else
+               return -EOPNOTSUPP;
+}
+
+static int cpsw_set_eee(struct net_device *ndev, struct ethtool_eee *edata)
+{
+       struct cpsw_priv *priv = netdev_priv(ndev);
+       struct cpsw_common *cpsw = priv->cpsw;
+       int slave_no = cpsw_slave_index(cpsw, priv);
+
+       if (cpsw->slaves[slave_no].phy)
+               return phy_ethtool_set_eee(cpsw->slaves[slave_no].phy, edata);
+       else
+               return -EOPNOTSUPP;
+}
+
+static int cpsw_nway_reset(struct net_device *ndev)
+{
+       struct cpsw_priv *priv = netdev_priv(ndev);
+       struct cpsw_common *cpsw = priv->cpsw;
+       int slave_no = cpsw_slave_index(cpsw, priv);
+
+       if (cpsw->slaves[slave_no].phy)
+               return genphy_restart_aneg(cpsw->slaves[slave_no].phy);
+       else
+               return -EOPNOTSUPP;
+}
+
 static const struct ethtool_ops cpsw_ethtool_ops = {
        .get_drvinfo    = cpsw_get_drvinfo,
        .get_msglevel   = cpsw_get_msglevel,
@@ -2264,6 +2471,9 @@ static const struct ethtool_ops cpsw_ethtool_ops = {
        .set_channels   = cpsw_set_channels,
        .get_link_ksettings     = cpsw_get_link_ksettings,
        .set_link_ksettings     = cpsw_set_link_ksettings,
+       .get_eee        = cpsw_get_eee,
+       .set_eee        = cpsw_set_eee,
+       .nway_reset     = cpsw_nway_reset,
 };
 
 static void cpsw_slave_init(struct cpsw_slave *slave, struct cpsw_common *cpsw,
@@ -2377,8 +2587,11 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data,
                         * to the PHY is the Ethernet MAC DT node.
                         */
                        ret = of_phy_register_fixed_link(slave_node);
-                       if (ret)
+                       if (ret) {
+                               if (ret != -EPROBE_DEFER)
+                                       dev_err(&pdev->dev, "failed to register fixed-link phy: %d\n", ret);
                                return ret;
+                       }
                        slave_data->phy_node = of_node_get(slave_node);
                } else if (parp) {
                        u32 phyid;
@@ -2399,6 +2612,7 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data,
                        }
                        snprintf(slave_data->phy_id, sizeof(slave_data->phy_id),
                                 PHY_ID_FMT, mdio->name, phyid);
+                       put_device(&mdio->dev);
                } else {
                        dev_err(&pdev->dev,
                                "No slave[%d] phy_id, phy-handle, or fixed-link property\n",
@@ -2442,6 +2656,46 @@ no_phy_slave:
        return 0;
 }
 
+static void cpsw_remove_dt(struct platform_device *pdev)
+{
+       struct net_device *ndev = platform_get_drvdata(pdev);
+       struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
+       struct cpsw_platform_data *data = &cpsw->data;
+       struct device_node *node = pdev->dev.of_node;
+       struct device_node *slave_node;
+       int i = 0;
+
+       for_each_available_child_of_node(node, slave_node) {
+               struct cpsw_slave_data *slave_data = &data->slave_data[i];
+
+               if (strcmp(slave_node->name, "slave"))
+                       continue;
+
+               if (of_phy_is_fixed_link(slave_node)) {
+                       struct phy_device *phydev;
+
+                       phydev = of_phy_find_device(slave_node);
+                       if (phydev) {
+                               fixed_phy_unregister(phydev);
+                               /* Put references taken by
+                                * of_phy_find_device() and
+                                * of_phy_register_fixed_link().
+                                */
+                               phy_device_free(phydev);
+                               phy_device_free(phydev);
+                       }
+               }
+
+               of_node_put(slave_data->phy_node);
+
+               i++;
+               if (i == data->slaves)
+                       break;
+       }
+
+       of_platform_depopulate(&pdev->dev);
+}
+
 static int cpsw_probe_dual_emac(struct cpsw_priv *priv)
 {
        struct cpsw_common              *cpsw = priv->cpsw;
@@ -2549,6 +2803,9 @@ static int cpsw_probe(struct platform_device *pdev)
        int irq;
 
        cpsw = devm_kzalloc(&pdev->dev, sizeof(struct cpsw_common), GFP_KERNEL);
+       if (!cpsw)
+               return -ENOMEM;
+
        cpsw->dev = &pdev->dev;
 
        ndev = alloc_etherdev_mq(sizeof(struct cpsw_priv), CPSW_MAX_QUEUES);
@@ -2586,11 +2843,19 @@ static int cpsw_probe(struct platform_device *pdev)
        /* Select default pin state */
        pinctrl_pm_select_default_state(&pdev->dev);
 
-       if (cpsw_probe_dt(&cpsw->data, pdev)) {
-               dev_err(&pdev->dev, "cpsw: platform data missing\n");
-               ret = -ENODEV;
+       /* Need to enable clocks with runtime PM api to access module
+        * registers
+        */
+       ret = pm_runtime_get_sync(&pdev->dev);
+       if (ret < 0) {
+               pm_runtime_put_noidle(&pdev->dev);
                goto clean_runtime_disable_ret;
        }
+
+       ret = cpsw_probe_dt(&cpsw->data, pdev);
+       if (ret)
+               goto clean_dt_ret;
+
        data = &cpsw->data;
        cpsw->rx_ch_num = 1;
        cpsw->tx_ch_num = 1;
@@ -2610,7 +2875,7 @@ static int cpsw_probe(struct platform_device *pdev)
                                    GFP_KERNEL);
        if (!cpsw->slaves) {
                ret = -ENOMEM;
-               goto clean_runtime_disable_ret;
+               goto clean_dt_ret;
        }
        for (i = 0; i < data->slaves; i++)
                cpsw->slaves[i].slave_num = i;
@@ -2622,7 +2887,7 @@ static int cpsw_probe(struct platform_device *pdev)
        if (IS_ERR(clk)) {
                dev_err(priv->dev, "fck is not found\n");
                ret = -ENODEV;
-               goto clean_runtime_disable_ret;
+               goto clean_dt_ret;
        }
        cpsw->bus_freq_mhz = clk_get_rate(clk) / 1000000;
 
@@ -2630,26 +2895,17 @@ static int cpsw_probe(struct platform_device *pdev)
        ss_regs = devm_ioremap_resource(&pdev->dev, ss_res);
        if (IS_ERR(ss_regs)) {
                ret = PTR_ERR(ss_regs);
-               goto clean_runtime_disable_ret;
+               goto clean_dt_ret;
        }
        cpsw->regs = ss_regs;
 
-       /* Need to enable clocks with runtime PM api to access module
-        * registers
-        */
-       ret = pm_runtime_get_sync(&pdev->dev);
-       if (ret < 0) {
-               pm_runtime_put_noidle(&pdev->dev);
-               goto clean_runtime_disable_ret;
-       }
        cpsw->version = readl(&cpsw->regs->id_ver);
-       pm_runtime_put_sync(&pdev->dev);
 
        res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
        cpsw->wr_regs = devm_ioremap_resource(&pdev->dev, res);
        if (IS_ERR(cpsw->wr_regs)) {
                ret = PTR_ERR(cpsw->wr_regs);
-               goto clean_runtime_disable_ret;
+               goto clean_dt_ret;
        }
 
        memset(&dma_params, 0, sizeof(dma_params));
@@ -2686,7 +2942,7 @@ static int cpsw_probe(struct platform_device *pdev)
        default:
                dev_err(priv->dev, "unknown version 0x%08x\n", cpsw->version);
                ret = -ENODEV;
-               goto clean_runtime_disable_ret;
+               goto clean_dt_ret;
        }
        for (i = 0; i < cpsw->data.slaves; i++) {
                struct cpsw_slave *slave = &cpsw->slaves[i];
@@ -2710,17 +2966,18 @@ static int cpsw_probe(struct platform_device *pdev)
        dma_params.desc_align           = 16;
        dma_params.has_ext_regs         = true;
        dma_params.desc_hw_addr         = dma_params.desc_mem_phys;
+       dma_params.bus_freq_mhz         = cpsw->bus_freq_mhz;
 
        cpsw->dma = cpdma_ctlr_create(&dma_params);
        if (!cpsw->dma) {
                dev_err(priv->dev, "error initializing dma\n");
                ret = -ENOMEM;
-               goto clean_runtime_disable_ret;
+               goto clean_dt_ret;
        }
 
-       cpsw->txch[0] = cpdma_chan_create(cpsw->dma, 0, cpsw_tx_handler, 0);
-       cpsw->rxch[0] = cpdma_chan_create(cpsw->dma, 0, cpsw_rx_handler, 1);
-       if (WARN_ON(!cpsw->rxch[0] || !cpsw->txch[0])) {
+       cpsw->txv[0].ch = cpdma_chan_create(cpsw->dma, 0, cpsw_tx_handler, 0);
+       cpsw->rxv[0].ch = cpdma_chan_create(cpsw->dma, 0, cpsw_rx_handler, 1);
+       if (WARN_ON(!cpsw->rxv[0].ch || !cpsw->txv[0].ch)) {
                dev_err(priv->dev, "error initializing dma channels\n");
                ret = -ENOMEM;
                goto clean_dma_ret;
@@ -2813,16 +3070,23 @@ static int cpsw_probe(struct platform_device *pdev)
                ret = cpsw_probe_dual_emac(priv);
                if (ret) {
                        cpsw_err(priv, probe, "error probe slave 2 emac interface\n");
-                       goto clean_ale_ret;
+                       goto clean_unregister_netdev_ret;
                }
        }
 
+       pm_runtime_put(&pdev->dev);
+
        return 0;
 
+clean_unregister_netdev_ret:
+       unregister_netdev(ndev);
 clean_ale_ret:
        cpsw_ale_destroy(cpsw->ale);
 clean_dma_ret:
        cpdma_ctlr_destroy(cpsw->dma);
+clean_dt_ret:
+       cpsw_remove_dt(pdev);
+       pm_runtime_put_sync(&pdev->dev);
 clean_runtime_disable_ret:
        pm_runtime_disable(&pdev->dev);
 clean_ndev_ret:
@@ -2848,7 +3112,7 @@ static int cpsw_remove(struct platform_device *pdev)
 
        cpsw_ale_destroy(cpsw->ale);
        cpdma_ctlr_destroy(cpsw->dma);
-       of_platform_depopulate(&pdev->dev);
+       cpsw_remove_dt(pdev);
        pm_runtime_put_sync(&pdev->dev);
        pm_runtime_disable(&pdev->dev);
        if (cpsw->data.dual_emac)
index c3f35f11a8fd53d59ea1ac0c8d69653ec049c443..c776e4575d2da2d6d0b0cf584798c08ef705dd34 100644 (file)
@@ -32,6 +32,7 @@
 #define CPDMA_RXCONTROL                0x14
 #define CPDMA_SOFTRESET                0x1c
 #define CPDMA_RXTEARDOWN       0x18
+#define CPDMA_TX_PRI0_RATE     0x30
 #define CPDMA_TXINTSTATRAW     0x80
 #define CPDMA_TXINTSTATMASKED  0x84
 #define CPDMA_TXINTMASKSET     0x88
@@ -68,6 +69,8 @@
 
 #define CPDMA_TEARDOWN_VALUE   0xfffffffc
 
+#define CPDMA_MAX_RLIM_CNT     16384
+
 struct cpdma_desc {
        /* hardware fields */
        u32                     hw_next;
@@ -122,6 +125,33 @@ struct cpdma_chan {
        struct cpdma_chan_stats         stats;
        /* offsets into dmaregs */
        int     int_set, int_clear, td;
+       int                             weight;
+       u32                             rate_factor;
+       u32                             rate;
+};
+
+struct cpdma_control_info {
+       u32             reg;
+       u32             shift, mask;
+       int             access;
+#define ACCESS_RO      BIT(0)
+#define ACCESS_WO      BIT(1)
+#define ACCESS_RW      (ACCESS_RO | ACCESS_WO)
+};
+
+static struct cpdma_control_info controls[] = {
+       [CPDMA_TX_RLIM]           = {CPDMA_DMACONTROL,  8,  0xffff, ACCESS_RW},
+       [CPDMA_CMD_IDLE]          = {CPDMA_DMACONTROL,  3,  1,      ACCESS_WO},
+       [CPDMA_COPY_ERROR_FRAMES] = {CPDMA_DMACONTROL,  4,  1,      ACCESS_RW},
+       [CPDMA_RX_OFF_LEN_UPDATE] = {CPDMA_DMACONTROL,  2,  1,      ACCESS_RW},
+       [CPDMA_RX_OWNERSHIP_FLIP] = {CPDMA_DMACONTROL,  1,  1,      ACCESS_RW},
+       [CPDMA_TX_PRIO_FIXED]     = {CPDMA_DMACONTROL,  0,  1,      ACCESS_RW},
+       [CPDMA_STAT_IDLE]         = {CPDMA_DMASTATUS,   31, 1,      ACCESS_RO},
+       [CPDMA_STAT_TX_ERR_CODE]  = {CPDMA_DMASTATUS,   20, 0xf,    ACCESS_RW},
+       [CPDMA_STAT_TX_ERR_CHAN]  = {CPDMA_DMASTATUS,   16, 0x7,    ACCESS_RW},
+       [CPDMA_STAT_RX_ERR_CODE]  = {CPDMA_DMASTATUS,   12, 0xf,    ACCESS_RW},
+       [CPDMA_STAT_RX_ERR_CHAN]  = {CPDMA_DMASTATUS,   8,  0x7,    ACCESS_RW},
+       [CPDMA_RX_BUFFER_OFFSET]  = {CPDMA_RXBUFFOFS,   0,  0xffff, ACCESS_RW},
 };
 
 #define tx_chan_num(chan)      (chan)
@@ -253,6 +283,211 @@ static void cpdma_desc_free(struct cpdma_desc_pool *pool,
        gen_pool_free(pool->gen_pool, (unsigned long)desc, pool->desc_size);
 }
 
+static int _cpdma_control_set(struct cpdma_ctlr *ctlr, int control, int value)
+{
+       struct cpdma_control_info *info = &controls[control];
+       u32 val;
+
+       if (!ctlr->params.has_ext_regs)
+               return -ENOTSUPP;
+
+       if (ctlr->state != CPDMA_STATE_ACTIVE)
+               return -EINVAL;
+
+       if (control < 0 || control >= ARRAY_SIZE(controls))
+               return -ENOENT;
+
+       if ((info->access & ACCESS_WO) != ACCESS_WO)
+               return -EPERM;
+
+       val  = dma_reg_read(ctlr, info->reg);
+       val &= ~(info->mask << info->shift);
+       val |= (value & info->mask) << info->shift;
+       dma_reg_write(ctlr, info->reg, val);
+
+       return 0;
+}
+
+static int _cpdma_control_get(struct cpdma_ctlr *ctlr, int control)
+{
+       struct cpdma_control_info *info = &controls[control];
+       int ret;
+
+       if (!ctlr->params.has_ext_regs)
+               return -ENOTSUPP;
+
+       if (ctlr->state != CPDMA_STATE_ACTIVE)
+               return -EINVAL;
+
+       if (control < 0 || control >= ARRAY_SIZE(controls))
+               return -ENOENT;
+
+       if ((info->access & ACCESS_RO) != ACCESS_RO)
+               return -EPERM;
+
+       ret = (dma_reg_read(ctlr, info->reg) >> info->shift) & info->mask;
+       return ret;
+}
+
+/* cpdma_chan_set_chan_shaper - set shaper for a channel
+ * Has to be called under ctlr lock
+ */
+static int cpdma_chan_set_chan_shaper(struct cpdma_chan *chan)
+{
+       struct cpdma_ctlr *ctlr = chan->ctlr;
+       u32 rate_reg;
+       u32 rmask;
+       int ret;
+
+       if (!chan->rate)
+               return 0;
+
+       rate_reg = CPDMA_TX_PRI0_RATE + 4 * chan->chan_num;
+       dma_reg_write(ctlr, rate_reg, chan->rate_factor);
+
+       rmask = _cpdma_control_get(ctlr, CPDMA_TX_RLIM);
+       rmask |= chan->mask;
+
+       ret = _cpdma_control_set(ctlr, CPDMA_TX_RLIM, rmask);
+       return ret;
+}
+
+static int cpdma_chan_on(struct cpdma_chan *chan)
+{
+       struct cpdma_ctlr *ctlr = chan->ctlr;
+       struct cpdma_desc_pool  *pool = ctlr->pool;
+       unsigned long flags;
+
+       spin_lock_irqsave(&chan->lock, flags);
+       if (chan->state != CPDMA_STATE_IDLE) {
+               spin_unlock_irqrestore(&chan->lock, flags);
+               return -EBUSY;
+       }
+       if (ctlr->state != CPDMA_STATE_ACTIVE) {
+               spin_unlock_irqrestore(&chan->lock, flags);
+               return -EINVAL;
+       }
+       dma_reg_write(ctlr, chan->int_set, chan->mask);
+       chan->state = CPDMA_STATE_ACTIVE;
+       if (chan->head) {
+               chan_write(chan, hdp, desc_phys(pool, chan->head));
+               if (chan->rxfree)
+                       chan_write(chan, rxfree, chan->count);
+       }
+
+       spin_unlock_irqrestore(&chan->lock, flags);
+       return 0;
+}
+
+/* cpdma_chan_fit_rate - set rate for a channel and check if it's possible.
+ * rmask - mask of rate limited channels
+ * Returns min rate in Kb/s
+ */
+static int cpdma_chan_fit_rate(struct cpdma_chan *ch, u32 rate,
+                              u32 *rmask, int *prio_mode)
+{
+       struct cpdma_ctlr *ctlr = ch->ctlr;
+       struct cpdma_chan *chan;
+       u32 old_rate = ch->rate;
+       u32 new_rmask = 0;
+       int rlim = 1;
+       int i;
+
+       *prio_mode = 0;
+       for (i = tx_chan_num(0); i < tx_chan_num(CPDMA_MAX_CHANNELS); i++) {
+               chan = ctlr->channels[i];
+               if (!chan) {
+                       rlim = 0;
+                       continue;
+               }
+
+               if (chan == ch)
+                       chan->rate = rate;
+
+               if (chan->rate) {
+                       if (rlim) {
+                               new_rmask |= chan->mask;
+                       } else {
+                               ch->rate = old_rate;
+                               dev_err(ctlr->dev, "Prev channel of %dch is not rate limited\n",
+                                       chan->chan_num);
+                               return -EINVAL;
+                       }
+               } else {
+                       *prio_mode = 1;
+                       rlim = 0;
+               }
+       }
+
+       *rmask = new_rmask;
+       return 0;
+}
+
+static u32 cpdma_chan_set_factors(struct cpdma_ctlr *ctlr,
+                                 struct cpdma_chan *ch)
+{
+       u32 delta = UINT_MAX, prev_delta = UINT_MAX, best_delta = UINT_MAX;
+       u32 best_send_cnt = 0, best_idle_cnt = 0;
+       u32 new_rate, best_rate = 0, rate_reg;
+       u64 send_cnt, idle_cnt;
+       u32 min_send_cnt, freq;
+       u64 divident, divisor;
+
+       if (!ch->rate) {
+               ch->rate_factor = 0;
+               goto set_factor;
+       }
+
+       freq = ctlr->params.bus_freq_mhz * 1000 * 32;
+       if (!freq) {
+               dev_err(ctlr->dev, "The bus frequency is not set\n");
+               return -EINVAL;
+       }
+
+       min_send_cnt = freq - ch->rate;
+       send_cnt = DIV_ROUND_UP(min_send_cnt, ch->rate);
+       while (send_cnt <= CPDMA_MAX_RLIM_CNT) {
+               divident = ch->rate * send_cnt;
+               divisor = min_send_cnt;
+               idle_cnt = DIV_ROUND_CLOSEST_ULL(divident, divisor);
+
+               divident = freq * idle_cnt;
+               divisor = idle_cnt + send_cnt;
+               new_rate = DIV_ROUND_CLOSEST_ULL(divident, divisor);
+
+               delta = new_rate >= ch->rate ? new_rate - ch->rate : delta;
+               if (delta < best_delta) {
+                       best_delta = delta;
+                       best_send_cnt = send_cnt;
+                       best_idle_cnt = idle_cnt;
+                       best_rate = new_rate;
+
+                       if (!delta)
+                               break;
+               }
+
+               if (prev_delta >= delta) {
+                       prev_delta = delta;
+                       send_cnt++;
+                       continue;
+               }
+
+               idle_cnt++;
+               divident = freq * idle_cnt;
+               send_cnt = DIV_ROUND_CLOSEST_ULL(divident, ch->rate);
+               send_cnt -= idle_cnt;
+               prev_delta = UINT_MAX;
+       }
+
+       ch->rate = best_rate;
+       ch->rate_factor = best_send_cnt | (best_idle_cnt << 16);
+
+set_factor:
+       rate_reg = CPDMA_TX_PRI0_RATE + 4 * ch->chan_num;
+       dma_reg_write(ctlr, rate_reg, ch->rate_factor);
+       return 0;
+}
+
 struct cpdma_ctlr *cpdma_ctlr_create(struct cpdma_params *params)
 {
        struct cpdma_ctlr *ctlr;
@@ -283,8 +518,9 @@ EXPORT_SYMBOL_GPL(cpdma_ctlr_create);
 
 int cpdma_ctlr_start(struct cpdma_ctlr *ctlr)
 {
+       struct cpdma_chan *chan;
        unsigned long flags;
-       int i;
+       int i, prio_mode;
 
        spin_lock_irqsave(&ctlr->lock, flags);
        if (ctlr->state != CPDMA_STATE_IDLE) {
@@ -320,10 +556,22 @@ int cpdma_ctlr_start(struct cpdma_ctlr *ctlr)
 
        ctlr->state = CPDMA_STATE_ACTIVE;
 
+       prio_mode = 0;
        for (i = 0; i < ARRAY_SIZE(ctlr->channels); i++) {
-               if (ctlr->channels[i])
-                       cpdma_chan_start(ctlr->channels[i]);
+               chan = ctlr->channels[i];
+               if (chan) {
+                       cpdma_chan_set_chan_shaper(chan);
+                       cpdma_chan_on(chan);
+
+                       /* off prio mode if all tx channels are rate limited */
+                       if (is_tx_chan(chan) && !chan->rate)
+                               prio_mode = 1;
+               }
        }
+
+       _cpdma_control_set(ctlr, CPDMA_TX_PRIO_FIXED, prio_mode);
+       _cpdma_control_set(ctlr, CPDMA_RX_BUFFER_OFFSET, 0);
+
        spin_unlock_irqrestore(&ctlr->lock, flags);
        return 0;
 }
@@ -335,7 +583,7 @@ int cpdma_ctlr_stop(struct cpdma_ctlr *ctlr)
        int i;
 
        spin_lock_irqsave(&ctlr->lock, flags);
-       if (ctlr->state == CPDMA_STATE_TEARDOWN) {
+       if (ctlr->state != CPDMA_STATE_ACTIVE) {
                spin_unlock_irqrestore(&ctlr->lock, flags);
                return -EINVAL;
        }
@@ -422,29 +670,200 @@ u32 cpdma_ctrl_txchs_state(struct cpdma_ctlr *ctlr)
 }
 EXPORT_SYMBOL_GPL(cpdma_ctrl_txchs_state);
 
+static void cpdma_chan_set_descs(struct cpdma_ctlr *ctlr,
+                                int rx, int desc_num,
+                                int per_ch_desc)
+{
+       struct cpdma_chan *chan, *most_chan = NULL;
+       int desc_cnt = desc_num;
+       int most_dnum = 0;
+       int min, max, i;
+
+       if (!desc_num)
+               return;
+
+       if (rx) {
+               min = rx_chan_num(0);
+               max = rx_chan_num(CPDMA_MAX_CHANNELS);
+       } else {
+               min = tx_chan_num(0);
+               max = tx_chan_num(CPDMA_MAX_CHANNELS);
+       }
+
+       for (i = min; i < max; i++) {
+               chan = ctlr->channels[i];
+               if (!chan)
+                       continue;
+
+               if (chan->weight)
+                       chan->desc_num = (chan->weight * desc_num) / 100;
+               else
+                       chan->desc_num = per_ch_desc;
+
+               desc_cnt -= chan->desc_num;
+
+               if (most_dnum < chan->desc_num) {
+                       most_dnum = chan->desc_num;
+                       most_chan = chan;
+               }
+       }
+       /* use remains */
+       most_chan->desc_num += desc_cnt;
+}
+
 /**
  * cpdma_chan_split_pool - Splits ctrl pool between all channels.
  * Has to be called under ctlr lock
  */
-static void cpdma_chan_split_pool(struct cpdma_ctlr *ctlr)
+static int cpdma_chan_split_pool(struct cpdma_ctlr *ctlr)
 {
+       int tx_per_ch_desc = 0, rx_per_ch_desc = 0;
        struct cpdma_desc_pool *pool = ctlr->pool;
+       int free_rx_num = 0, free_tx_num = 0;
+       int rx_weight = 0, tx_weight = 0;
+       int tx_desc_num, rx_desc_num;
        struct cpdma_chan *chan;
-       int ch_desc_num;
-       int i;
+       int i, tx_num = 0;
 
        if (!ctlr->chan_num)
-               return;
-
-       /* calculate average size of pool slice */
-       ch_desc_num = pool->num_desc / ctlr->chan_num;
+               return 0;
 
-       /* split ctlr pool */
        for (i = 0; i < ARRAY_SIZE(ctlr->channels); i++) {
                chan = ctlr->channels[i];
-               if (chan)
-                       chan->desc_num = ch_desc_num;
+               if (!chan)
+                       continue;
+
+               if (is_rx_chan(chan)) {
+                       if (!chan->weight)
+                               free_rx_num++;
+                       rx_weight += chan->weight;
+               } else {
+                       if (!chan->weight)
+                               free_tx_num++;
+                       tx_weight += chan->weight;
+                       tx_num++;
+               }
+       }
+
+       if (rx_weight > 100 || tx_weight > 100)
+               return -EINVAL;
+
+       tx_desc_num = (tx_num * pool->num_desc) / ctlr->chan_num;
+       rx_desc_num = pool->num_desc - tx_desc_num;
+
+       if (free_tx_num) {
+               tx_per_ch_desc = tx_desc_num - (tx_weight * tx_desc_num) / 100;
+               tx_per_ch_desc /= free_tx_num;
+       }
+       if (free_rx_num) {
+               rx_per_ch_desc = rx_desc_num - (rx_weight * rx_desc_num) / 100;
+               rx_per_ch_desc /= free_rx_num;
+       }
+
+       cpdma_chan_set_descs(ctlr, 0, tx_desc_num, tx_per_ch_desc);
+       cpdma_chan_set_descs(ctlr, 1, rx_desc_num, rx_per_ch_desc);
+
+       return 0;
+}
+
+/* cpdma_chan_set_weight - set weight of a channel in percentage.
+ * Tx and Rx channels have separate weights. That is 100% for RX
+ * and 100% for Tx. The weight is used to split cpdma resources
+ * in correct proportion required by the channels, including number
+ * of descriptors. The channel rate is not enough to know the
+ * weight of a channel as the maximum rate of an interface is needed.
+ * If weight = 0, then channel uses rest of descriptors leaved by
+ * weighted channels.
+ */
+int cpdma_chan_set_weight(struct cpdma_chan *ch, int weight)
+{
+       struct cpdma_ctlr *ctlr = ch->ctlr;
+       unsigned long flags, ch_flags;
+       int ret;
+
+       spin_lock_irqsave(&ctlr->lock, flags);
+       spin_lock_irqsave(&ch->lock, ch_flags);
+       if (ch->weight == weight) {
+               spin_unlock_irqrestore(&ch->lock, ch_flags);
+               spin_unlock_irqrestore(&ctlr->lock, flags);
+               return 0;
        }
+       ch->weight = weight;
+       spin_unlock_irqrestore(&ch->lock, ch_flags);
+
+       /* re-split pool using new channel weight */
+       ret = cpdma_chan_split_pool(ctlr);
+       spin_unlock_irqrestore(&ctlr->lock, flags);
+       return ret;
+}
+
+/* cpdma_chan_get_min_rate - get minimum allowed rate for channel
+ * Should be called before cpdma_chan_set_rate.
+ * Returns min rate in Kb/s
+ */
+u32 cpdma_chan_get_min_rate(struct cpdma_ctlr *ctlr)
+{
+       unsigned int divident, divisor;
+
+       divident = ctlr->params.bus_freq_mhz * 32 * 1000;
+       divisor = 1 + CPDMA_MAX_RLIM_CNT;
+
+       return DIV_ROUND_UP(divident, divisor);
+}
+
+/* cpdma_chan_set_rate - limits bandwidth for transmit channel.
+ * The bandwidth * limited channels have to be in order beginning from lowest.
+ * ch - transmit channel the bandwidth is configured for
+ * rate - bandwidth in Kb/s, if 0 - then off shaper
+ */
+int cpdma_chan_set_rate(struct cpdma_chan *ch, u32 rate)
+{
+       struct cpdma_ctlr *ctlr = ch->ctlr;
+       unsigned long flags, ch_flags;
+       int ret, prio_mode;
+       u32 rmask;
+
+       if (!ch || !is_tx_chan(ch))
+               return -EINVAL;
+
+       if (ch->rate == rate)
+               return rate;
+
+       spin_lock_irqsave(&ctlr->lock, flags);
+       spin_lock_irqsave(&ch->lock, ch_flags);
+
+       ret = cpdma_chan_fit_rate(ch, rate, &rmask, &prio_mode);
+       if (ret)
+               goto err;
+
+       ret = cpdma_chan_set_factors(ctlr, ch);
+       if (ret)
+               goto err;
+
+       spin_unlock_irqrestore(&ch->lock, ch_flags);
+
+       /* on shapers */
+       _cpdma_control_set(ctlr, CPDMA_TX_RLIM, rmask);
+       _cpdma_control_set(ctlr, CPDMA_TX_PRIO_FIXED, prio_mode);
+       spin_unlock_irqrestore(&ctlr->lock, flags);
+       return ret;
+
+err:
+       spin_unlock_irqrestore(&ch->lock, ch_flags);
+       spin_unlock_irqrestore(&ctlr->lock, flags);
+       return ret;
+}
+
+u32 cpdma_chan_get_rate(struct cpdma_chan *ch)
+{
+       unsigned long flags;
+       u32 rate;
+
+       spin_lock_irqsave(&ch->lock, flags);
+       rate = ch->rate;
+       spin_unlock_irqrestore(&ch->lock, flags);
+
+       return rate;
 }
 
 struct cpdma_chan *cpdma_chan_create(struct cpdma_ctlr *ctlr, int chan_num,
@@ -474,7 +893,9 @@ struct cpdma_chan *cpdma_chan_create(struct cpdma_ctlr *ctlr, int chan_num,
        chan->state     = CPDMA_STATE_IDLE;
        chan->chan_num  = chan_num;
        chan->handler   = handler;
+       chan->rate      = 0;
        chan->desc_num = ctlr->pool->num_desc / 2;
+       chan->weight    = 0;
 
        if (is_rx_chan(chan)) {
                chan->hdp       = ctlr->params.rxhdp + offset;
@@ -533,7 +954,7 @@ int cpdma_chan_destroy(struct cpdma_chan *chan)
                cpdma_chan_stop(chan);
        ctlr->channels[chan->chan_num] = NULL;
        ctlr->chan_num--;
-
+       devm_kfree(ctlr->dev, chan);
        cpdma_chan_split_pool(ctlr);
 
        spin_unlock_irqrestore(&ctlr->lock, flags);
@@ -768,28 +1189,20 @@ EXPORT_SYMBOL_GPL(cpdma_chan_process);
 
 int cpdma_chan_start(struct cpdma_chan *chan)
 {
-       struct cpdma_ctlr       *ctlr = chan->ctlr;
-       struct cpdma_desc_pool  *pool = ctlr->pool;
-       unsigned long           flags;
+       struct cpdma_ctlr *ctlr = chan->ctlr;
+       unsigned long flags;
+       int ret;
 
-       spin_lock_irqsave(&chan->lock, flags);
-       if (chan->state != CPDMA_STATE_IDLE) {
-               spin_unlock_irqrestore(&chan->lock, flags);
-               return -EBUSY;
-       }
-       if (ctlr->state != CPDMA_STATE_ACTIVE) {
-               spin_unlock_irqrestore(&chan->lock, flags);
-               return -EINVAL;
-       }
-       dma_reg_write(ctlr, chan->int_set, chan->mask);
-       chan->state = CPDMA_STATE_ACTIVE;
-       if (chan->head) {
-               chan_write(chan, hdp, desc_phys(pool, chan->head));
-               if (chan->rxfree)
-                       chan_write(chan, rxfree, chan->count);
-       }
+       spin_lock_irqsave(&ctlr->lock, flags);
+       ret = cpdma_chan_set_chan_shaper(chan);
+       spin_unlock_irqrestore(&ctlr->lock, flags);
+       if (ret)
+               return ret;
+
+       ret = cpdma_chan_on(chan);
+       if (ret)
+               return ret;
 
-       spin_unlock_irqrestore(&chan->lock, flags);
        return 0;
 }
 EXPORT_SYMBOL_GPL(cpdma_chan_start);
@@ -874,93 +1287,27 @@ int cpdma_chan_int_ctrl(struct cpdma_chan *chan, bool enable)
        return 0;
 }
 
-struct cpdma_control_info {
-       u32             reg;
-       u32             shift, mask;
-       int             access;
-#define ACCESS_RO      BIT(0)
-#define ACCESS_WO      BIT(1)
-#define ACCESS_RW      (ACCESS_RO | ACCESS_WO)
-};
-
-static struct cpdma_control_info controls[] = {
-       [CPDMA_CMD_IDLE]          = {CPDMA_DMACONTROL,  3,  1,      ACCESS_WO},
-       [CPDMA_COPY_ERROR_FRAMES] = {CPDMA_DMACONTROL,  4,  1,      ACCESS_RW},
-       [CPDMA_RX_OFF_LEN_UPDATE] = {CPDMA_DMACONTROL,  2,  1,      ACCESS_RW},
-       [CPDMA_RX_OWNERSHIP_FLIP] = {CPDMA_DMACONTROL,  1,  1,      ACCESS_RW},
-       [CPDMA_TX_PRIO_FIXED]     = {CPDMA_DMACONTROL,  0,  1,      ACCESS_RW},
-       [CPDMA_STAT_IDLE]         = {CPDMA_DMASTATUS,   31, 1,      ACCESS_RO},
-       [CPDMA_STAT_TX_ERR_CODE]  = {CPDMA_DMASTATUS,   20, 0xf,    ACCESS_RW},
-       [CPDMA_STAT_TX_ERR_CHAN]  = {CPDMA_DMASTATUS,   16, 0x7,    ACCESS_RW},
-       [CPDMA_STAT_RX_ERR_CODE]  = {CPDMA_DMASTATUS,   12, 0xf,    ACCESS_RW},
-       [CPDMA_STAT_RX_ERR_CHAN]  = {CPDMA_DMASTATUS,   8,  0x7,    ACCESS_RW},
-       [CPDMA_RX_BUFFER_OFFSET]  = {CPDMA_RXBUFFOFS,   0,  0xffff, ACCESS_RW},
-};
-
 int cpdma_control_get(struct cpdma_ctlr *ctlr, int control)
 {
        unsigned long flags;
-       struct cpdma_control_info *info = &controls[control];
        int ret;
 
        spin_lock_irqsave(&ctlr->lock, flags);
-
-       ret = -ENOTSUPP;
-       if (!ctlr->params.has_ext_regs)
-               goto unlock_ret;
-
-       ret = -EINVAL;
-       if (ctlr->state != CPDMA_STATE_ACTIVE)
-               goto unlock_ret;
-
-       ret = -ENOENT;
-       if (control < 0 || control >= ARRAY_SIZE(controls))
-               goto unlock_ret;
-
-       ret = -EPERM;
-       if ((info->access & ACCESS_RO) != ACCESS_RO)
-               goto unlock_ret;
-
-       ret = (dma_reg_read(ctlr, info->reg) >> info->shift) & info->mask;
-
-unlock_ret:
+       ret = _cpdma_control_get(ctlr, control);
        spin_unlock_irqrestore(&ctlr->lock, flags);
+
        return ret;
 }
 
 int cpdma_control_set(struct cpdma_ctlr *ctlr, int control, int value)
 {
        unsigned long flags;
-       struct cpdma_control_info *info = &controls[control];
        int ret;
-       u32 val;
 
        spin_lock_irqsave(&ctlr->lock, flags);
-
-       ret = -ENOTSUPP;
-       if (!ctlr->params.has_ext_regs)
-               goto unlock_ret;
-
-       ret = -EINVAL;
-       if (ctlr->state != CPDMA_STATE_ACTIVE)
-               goto unlock_ret;
-
-       ret = -ENOENT;
-       if (control < 0 || control >= ARRAY_SIZE(controls))
-               goto unlock_ret;
-
-       ret = -EPERM;
-       if ((info->access & ACCESS_WO) != ACCESS_WO)
-               goto unlock_ret;
-
-       val  = dma_reg_read(ctlr, info->reg);
-       val &= ~(info->mask << info->shift);
-       val |= (value & info->mask) << info->shift;
-       dma_reg_write(ctlr, info->reg, val);
-       ret = 0;
-
-unlock_ret:
+       ret = _cpdma_control_set(ctlr, control, value);
        spin_unlock_irqrestore(&ctlr->lock, flags);
+
        return ret;
 }
 EXPORT_SYMBOL_GPL(cpdma_control_set);
index a07b22b12bc1fca73b9460c74fade03f45e78fd5..4a167db2ababfed5adad4d9f8448dd78bc8971e8 100644 (file)
@@ -36,6 +36,7 @@ struct cpdma_params {
        u32                     desc_hw_addr;
        int                     desc_mem_size;
        int                     desc_align;
+       u32                     bus_freq_mhz;
 
        /*
         * Some instances of embedded cpdma controllers have extra control and
@@ -90,8 +91,13 @@ int cpdma_chan_int_ctrl(struct cpdma_chan *chan, bool enable);
 u32 cpdma_ctrl_rxchs_state(struct cpdma_ctlr *ctlr);
 u32 cpdma_ctrl_txchs_state(struct cpdma_ctlr *ctlr);
 bool cpdma_check_free_tx_desc(struct cpdma_chan *chan);
+int cpdma_chan_set_weight(struct cpdma_chan *ch, int weight);
+int cpdma_chan_set_rate(struct cpdma_chan *ch, u32 rate);
+u32 cpdma_chan_get_rate(struct cpdma_chan *ch);
+u32 cpdma_chan_get_min_rate(struct cpdma_ctlr *ctlr);
 
 enum cpdma_control {
+       CPDMA_TX_RLIM,                  /* read-write */
        CPDMA_CMD_IDLE,                 /* write-only */
        CPDMA_COPY_ERROR_FRAMES,        /* read-write */
        CPDMA_RX_OFF_LEN_UPDATE,        /* read-write */
index 2fd94a5bc1f3a653bebff3c5b71642fe49b97c24..84fbe5714f8b50edd3902223b02cc42511b20244 100644 (file)
@@ -1410,6 +1410,7 @@ static int emac_dev_open(struct net_device *ndev)
        int i = 0;
        struct emac_priv *priv = netdev_priv(ndev);
        struct phy_device *phydev = NULL;
+       struct device *phy = NULL;
 
        ret = pm_runtime_get_sync(&priv->pdev->dev);
        if (ret < 0) {
@@ -1488,19 +1489,20 @@ static int emac_dev_open(struct net_device *ndev)
 
        /* use the first phy on the bus if pdata did not give us a phy id */
        if (!phydev && !priv->phy_id) {
-               struct device *phy;
-
                phy = bus_find_device(&mdio_bus_type, NULL, NULL,
                                      match_first_device);
-               if (phy)
+               if (phy) {
                        priv->phy_id = dev_name(phy);
+                       if (!priv->phy_id || !*priv->phy_id)
+                               put_device(phy);
+               }
        }
 
        if (!phydev && priv->phy_id && *priv->phy_id) {
                phydev = phy_connect(ndev, priv->phy_id,
                                     &emac_adjust_link,
                                     PHY_INTERFACE_MODE_MII);
-
+               put_device(phy);        /* reference taken by bus_find_device */
                if (IS_ERR(phydev)) {
                        dev_err(emac_dev, "could not connect to phy %s\n",
                                priv->phy_id);
index 78b4c831f5ad39757bebd9259ed8e259c292bc84..7981b99ea06e32424fabbdf830795a896a856daf 100644 (file)
@@ -1568,7 +1568,7 @@ static int netcp_setup_navigator_resources(struct net_device *ndev)
        /* open Tx completion queue */
        snprintf(name, sizeof(name), "tx-compl-%s", ndev->name);
        netcp->tx_compl_q = knav_queue_open(name, netcp->tx_compl_qid, 0);
-       if (IS_ERR_OR_NULL(netcp->tx_compl_q)) {
+       if (IS_ERR(netcp->tx_compl_q)) {
                ret = PTR_ERR(netcp->tx_compl_q);
                goto fail;
        }
@@ -1588,7 +1588,7 @@ static int netcp_setup_navigator_resources(struct net_device *ndev)
        /* open Rx completion queue */
        snprintf(name, sizeof(name), "rx-compl-%s", ndev->name);
        netcp->rx_queue = knav_queue_open(name, netcp->rx_queue_id, 0);
-       if (IS_ERR_OR_NULL(netcp->rx_queue)) {
+       if (IS_ERR(netcp->rx_queue)) {
                ret = PTR_ERR(netcp->rx_queue);
                goto fail;
        }
@@ -1610,7 +1610,7 @@ static int netcp_setup_navigator_resources(struct net_device *ndev)
             ++i) {
                snprintf(name, sizeof(name), "rx-fdq-%s-%d", ndev->name, i);
                netcp->rx_fdq[i] = knav_queue_open(name, KNAV_QUEUE_GP, 0);
-               if (IS_ERR_OR_NULL(netcp->rx_fdq[i])) {
+               if (IS_ERR(netcp->rx_fdq[i])) {
                        ret = PTR_ERR(netcp->rx_fdq[i]);
                        goto fail;
                }
index b3abd02dc94925c865f56f739e90fb54eb886e88..eed18f88bdff7f6b253aa02ffaa05a2a859f84a7 100644 (file)
@@ -1694,7 +1694,7 @@ struct gelic_wl_scan_info *gelic_wl_find_best_bss(struct gelic_wl_info *wl)
                                pr_debug("%s: bssid matched\n", __func__);
                                break;
                        } else {
-                               pr_debug("%s: bssid unmached\n", __func__);
+                               pr_debug("%s: bssid unmatched\n", __func__);
                                continue;
                        }
                }
index a9bd665fd1225be2a42f389c0ccb5d600124c9ab..bcd7b76dde9f8a84f3bcdac94cfec0c47132bf5e 100644 (file)
@@ -967,13 +967,8 @@ static const struct attribute_group temac_attr_group = {
 };
 
 /* ethtool support */
-static int temac_nway_reset(struct net_device *ndev)
-{
-       return phy_start_aneg(ndev->phydev);
-}
-
 static const struct ethtool_ops temac_ethtool_ops = {
-       .nway_reset = temac_nway_reset,
+       .nway_reset = phy_ethtool_nway_reset,
        .get_link = ethtool_op_get_link,
        .get_ts_info = ethtool_op_get_ts_info,
        .get_link_ksettings = phy_ethtool_get_link_ksettings,
index 46cc33b9e9263c572cb34b05ec17f8ab021eb648..aee55c03def0d4d462c9d8a6ea67f03e4c840c11 100644 (file)
@@ -708,8 +708,7 @@ static int eth_poll(struct napi_struct *napi, int budget)
                        if (!qmgr_stat_below_low_watermark(rxq) &&
                            napi_reschedule(napi)) { /* not empty again */
 #if DEBUG_RX
-                               printk(KERN_DEBUG "%s: eth_poll"
-                                      " napi_reschedule successed\n",
+                               printk(KERN_DEBUG "%s: eth_poll napi_reschedule succeeded\n",
                                       dev->name);
 #endif
                                qmgr_disable_irq(rxq);
@@ -1002,11 +1001,6 @@ static void ixp4xx_get_drvinfo(struct net_device *dev,
        strlcpy(info->bus_info, "internal", sizeof(info->bus_info));
 }
 
-static int ixp4xx_nway_reset(struct net_device *dev)
-{
-       return phy_start_aneg(dev->phydev);
-}
-
 int ixp46x_phc_index = -1;
 EXPORT_SYMBOL_GPL(ixp46x_phc_index);
 
@@ -1038,7 +1032,7 @@ static int ixp4xx_get_ts_info(struct net_device *dev,
 
 static const struct ethtool_ops ixp4xx_ethtool_ops = {
        .get_drvinfo = ixp4xx_get_drvinfo,
-       .nway_reset = ixp4xx_nway_reset,
+       .nway_reset = phy_ethtool_nway_reset,
        .get_link = ethtool_op_get_link,
        .get_ts_info = ixp4xx_get_ts_info,
        .get_link_ksettings = phy_ethtool_get_link_ksettings,
index 752bcaa852e48cb8bd3688b718a808345e6f810b..45301cb98bc1c279a760e2b19cd1ed32b4659a9f 100644 (file)
@@ -43,43 +43,24 @@ struct geneve_net {
        struct list_head        sock_list;
 };
 
-static int geneve_net_id;
-
-union geneve_addr {
-       struct sockaddr_in sin;
-       struct sockaddr_in6 sin6;
-       struct sockaddr sa;
-};
-
-static union geneve_addr geneve_remote_unspec = { .sa.sa_family = AF_UNSPEC, };
+static unsigned int geneve_net_id;
 
 /* Pseudo network device */
 struct geneve_dev {
        struct hlist_node  hlist;       /* vni hash table */
        struct net         *net;        /* netns for packet i/o */
        struct net_device  *dev;        /* netdev for geneve tunnel */
-       struct geneve_sock *sock4;      /* IPv4 socket used for geneve tunnel */
+       struct ip_tunnel_info info;
+       struct geneve_sock __rcu *sock4;        /* IPv4 socket used for geneve tunnel */
 #if IS_ENABLED(CONFIG_IPV6)
-       struct geneve_sock *sock6;      /* IPv6 socket used for geneve tunnel */
+       struct geneve_sock __rcu *sock6;        /* IPv6 socket used for geneve tunnel */
 #endif
-       u8                 vni[3];      /* virtual network ID for tunnel */
-       u8                 ttl;         /* TTL override */
-       u8                 tos;         /* TOS override */
-       union geneve_addr  remote;      /* IP address for link partner */
        struct list_head   next;        /* geneve's per namespace list */
-       __be32             label;       /* IPv6 flowlabel override */
-       __be16             dst_port;
-       bool               collect_md;
        struct gro_cells   gro_cells;
-       u32                flags;
-       struct dst_cache   dst_cache;
+       bool               collect_md;
+       bool               use_udp6_rx_checksums;
 };
 
-/* Geneve device flags */
-#define GENEVE_F_UDP_ZERO_CSUM_TX      BIT(0)
-#define GENEVE_F_UDP_ZERO_CSUM6_TX     BIT(1)
-#define GENEVE_F_UDP_ZERO_CSUM6_RX     BIT(2)
-
 struct geneve_sock {
        bool                    collect_md;
        struct list_head        list;
@@ -87,7 +68,6 @@ struct geneve_sock {
        struct rcu_head         rcu;
        int                     refcnt;
        struct hlist_head       vni_list[VNI_HASH_SIZE];
-       u32                     flags;
 };
 
 static inline __u32 geneve_net_vni_hash(u8 vni[3])
@@ -109,6 +89,31 @@ static __be64 vni_to_tunnel_id(const __u8 *vni)
 #endif
 }
 
+/* Convert 64 bit tunnel ID to 24 bit VNI. */
+static void tunnel_id_to_vni(__be64 tun_id, __u8 *vni)
+{
+#ifdef __BIG_ENDIAN
+       vni[0] = (__force __u8)(tun_id >> 16);
+       vni[1] = (__force __u8)(tun_id >> 8);
+       vni[2] = (__force __u8)tun_id;
+#else
+       vni[0] = (__force __u8)((__force u64)tun_id >> 40);
+       vni[1] = (__force __u8)((__force u64)tun_id >> 48);
+       vni[2] = (__force __u8)((__force u64)tun_id >> 56);
+#endif
+}
+
+static bool eq_tun_id_and_vni(u8 *tun_id, u8 *vni)
+{
+#ifdef __BIG_ENDIAN
+       return (vni[0] == tun_id[2]) &&
+              (vni[1] == tun_id[1]) &&
+              (vni[2] == tun_id[0]);
+#else
+       return !memcmp(vni, &tun_id[5], 3);
+#endif
+}
+
 static sa_family_t geneve_get_sk_family(struct geneve_sock *gs)
 {
        return gs->sock->sk->sk_family;
@@ -125,8 +130,8 @@ static struct geneve_dev *geneve_lookup(struct geneve_sock *gs,
        hash = geneve_net_vni_hash(vni);
        vni_list_head = &gs->vni_list[hash];
        hlist_for_each_entry_rcu(geneve, vni_list_head, hlist) {
-               if (!memcmp(vni, geneve->vni, sizeof(geneve->vni)) &&
-                   addr == geneve->remote.sin.sin_addr.s_addr)
+               if (eq_tun_id_and_vni((u8 *)&geneve->info.key.tun_id, vni) &&
+                   addr == geneve->info.key.u.ipv4.dst)
                        return geneve;
        }
        return NULL;
@@ -144,8 +149,8 @@ static struct geneve_dev *geneve6_lookup(struct geneve_sock *gs,
        hash = geneve_net_vni_hash(vni);
        vni_list_head = &gs->vni_list[hash];
        hlist_for_each_entry_rcu(geneve, vni_list_head, hlist) {
-               if (!memcmp(vni, geneve->vni, sizeof(geneve->vni)) &&
-                   ipv6_addr_equal(&addr6, &geneve->remote.sin6.sin6_addr))
+               if (eq_tun_id_and_vni((u8 *)&geneve->info.key.tun_id, vni) &&
+                   ipv6_addr_equal(&addr6, &geneve->info.key.u.ipv6.dst))
                        return geneve;
        }
        return NULL;
@@ -160,15 +165,12 @@ static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb)
 static struct geneve_dev *geneve_lookup_skb(struct geneve_sock *gs,
                                            struct sk_buff *skb)
 {
-       u8 *vni;
-       __be32 addr;
        static u8 zero_vni[3];
-#if IS_ENABLED(CONFIG_IPV6)
-       static struct in6_addr zero_addr6;
-#endif
+       u8 *vni;
 
        if (geneve_get_sk_family(gs) == AF_INET) {
                struct iphdr *iph;
+               __be32 addr;
 
                iph = ip_hdr(skb); /* outer IP header... */
 
@@ -183,6 +185,7 @@ static struct geneve_dev *geneve_lookup_skb(struct geneve_sock *gs,
                return geneve_lookup(gs, addr, vni);
 #if IS_ENABLED(CONFIG_IPV6)
        } else if (geneve_get_sk_family(gs) == AF_INET6) {
+               static struct in6_addr zero_addr6;
                struct ipv6hdr *ip6h;
                struct in6_addr addr6;
 
@@ -305,13 +308,12 @@ static int geneve_init(struct net_device *dev)
                return err;
        }
 
-       err = dst_cache_init(&geneve->dst_cache, GFP_KERNEL);
+       err = dst_cache_init(&geneve->info.dst_cache, GFP_KERNEL);
        if (err) {
                free_percpu(dev->tstats);
                gro_cells_destroy(&geneve->gro_cells);
                return err;
        }
-
        return 0;
 }
 
@@ -319,7 +321,7 @@ static void geneve_uninit(struct net_device *dev)
 {
        struct geneve_dev *geneve = netdev_priv(dev);
 
-       dst_cache_destroy(&geneve->dst_cache);
+       dst_cache_destroy(&geneve->info.dst_cache);
        gro_cells_destroy(&geneve->gro_cells);
        free_percpu(dev->tstats);
 }
@@ -368,7 +370,7 @@ drop:
 }
 
 static struct socket *geneve_create_sock(struct net *net, bool ipv6,
-                                        __be16 port, u32 flags)
+                                        __be16 port, bool ipv6_rx_csum)
 {
        struct socket *sock;
        struct udp_port_cfg udp_conf;
@@ -379,8 +381,7 @@ static struct socket *geneve_create_sock(struct net *net, bool ipv6,
        if (ipv6) {
                udp_conf.family = AF_INET6;
                udp_conf.ipv6_v6only = 1;
-               udp_conf.use_udp6_rx_checksums =
-                   !(flags & GENEVE_F_UDP_ZERO_CSUM6_RX);
+               udp_conf.use_udp6_rx_checksums = ipv6_rx_csum;
        } else {
                udp_conf.family = AF_INET;
                udp_conf.local_ip.s_addr = htonl(INADDR_ANY);
@@ -453,7 +454,7 @@ static struct sk_buff **geneve_gro_receive(struct sock *sk,
 
        skb_gro_pull(skb, gh_len);
        skb_gro_postpull_rcsum(skb, gh, gh_len);
-       pp = ptype->callbacks.gro_receive(head, skb);
+       pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb);
        flush = 0;
 
 out_unlock:
@@ -491,7 +492,7 @@ static int geneve_gro_complete(struct sock *sk, struct sk_buff *skb,
 
 /* Create new listen socket if needed */
 static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port,
-                                               bool ipv6, u32 flags)
+                                               bool ipv6, bool ipv6_rx_csum)
 {
        struct geneve_net *gn = net_generic(net, geneve_net_id);
        struct geneve_sock *gs;
@@ -503,7 +504,7 @@ static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port,
        if (!gs)
                return ERR_PTR(-ENOMEM);
 
-       sock = geneve_create_sock(net, ipv6, port, flags);
+       sock = geneve_create_sock(net, ipv6, port, ipv6_rx_csum);
        if (IS_ERR(sock)) {
                kfree(gs);
                return ERR_CAST(sock);
@@ -543,9 +544,19 @@ static void __geneve_sock_release(struct geneve_sock *gs)
 
 static void geneve_sock_release(struct geneve_dev *geneve)
 {
-       __geneve_sock_release(geneve->sock4);
+       struct geneve_sock *gs4 = rtnl_dereference(geneve->sock4);
 #if IS_ENABLED(CONFIG_IPV6)
-       __geneve_sock_release(geneve->sock6);
+       struct geneve_sock *gs6 = rtnl_dereference(geneve->sock6);
+
+       rcu_assign_pointer(geneve->sock6, NULL);
+#endif
+
+       rcu_assign_pointer(geneve->sock4, NULL);
+       synchronize_net();
+
+       __geneve_sock_release(gs4);
+#if IS_ENABLED(CONFIG_IPV6)
+       __geneve_sock_release(gs6);
 #endif
 }
 
@@ -569,29 +580,31 @@ static int geneve_sock_add(struct geneve_dev *geneve, bool ipv6)
        struct net *net = geneve->net;
        struct geneve_net *gn = net_generic(net, geneve_net_id);
        struct geneve_sock *gs;
+       __u8 vni[3];
        __u32 hash;
 
-       gs = geneve_find_sock(gn, ipv6 ? AF_INET6 : AF_INET, geneve->dst_port);
+       gs = geneve_find_sock(gn, ipv6 ? AF_INET6 : AF_INET, geneve->info.key.tp_dst);
        if (gs) {
                gs->refcnt++;
                goto out;
        }
 
-       gs = geneve_socket_create(net, geneve->dst_port, ipv6, geneve->flags);
+       gs = geneve_socket_create(net, geneve->info.key.tp_dst, ipv6,
+                                 geneve->use_udp6_rx_checksums);
        if (IS_ERR(gs))
                return PTR_ERR(gs);
 
 out:
        gs->collect_md = geneve->collect_md;
-       gs->flags = geneve->flags;
 #if IS_ENABLED(CONFIG_IPV6)
        if (ipv6)
-               geneve->sock6 = gs;
+               rcu_assign_pointer(geneve->sock6, gs);
        else
 #endif
-               geneve->sock4 = gs;
+               rcu_assign_pointer(geneve->sock4, gs);
 
-       hash = geneve_net_vni_hash(geneve->vni);
+       tunnel_id_to_vni(geneve->info.key.tun_id, vni);
+       hash = geneve_net_vni_hash(vni);
        hlist_add_head_rcu(&geneve->hlist, &gs->vni_list[hash]);
        return 0;
 }
@@ -599,13 +612,11 @@ out:
 static int geneve_open(struct net_device *dev)
 {
        struct geneve_dev *geneve = netdev_priv(dev);
-       bool ipv6 = geneve->remote.sa.sa_family == AF_INET6;
+       bool ipv6 = !!(geneve->info.mode & IP_TUNNEL_INFO_IPV6);
        bool metadata = geneve->collect_md;
        int ret = 0;
 
-       geneve->sock4 = NULL;
 #if IS_ENABLED(CONFIG_IPV6)
-       geneve->sock6 = NULL;
        if (ipv6 || metadata)
                ret = geneve_sock_add(geneve, true);
 #endif
@@ -628,67 +639,34 @@ static int geneve_stop(struct net_device *dev)
 }
 
 static void geneve_build_header(struct genevehdr *geneveh,
-                               __be16 tun_flags, u8 vni[3],
-                               u8 options_len, u8 *options)
+                               const struct ip_tunnel_info *info)
 {
        geneveh->ver = GENEVE_VER;
-       geneveh->opt_len = options_len / 4;
-       geneveh->oam = !!(tun_flags & TUNNEL_OAM);
-       geneveh->critical = !!(tun_flags & TUNNEL_CRIT_OPT);
+       geneveh->opt_len = info->options_len / 4;
+       geneveh->oam = !!(info->key.tun_flags & TUNNEL_OAM);
+       geneveh->critical = !!(info->key.tun_flags & TUNNEL_CRIT_OPT);
        geneveh->rsvd1 = 0;
-       memcpy(geneveh->vni, vni, 3);
+       tunnel_id_to_vni(info->key.tun_id, geneveh->vni);
        geneveh->proto_type = htons(ETH_P_TEB);
        geneveh->rsvd2 = 0;
 
-       memcpy(geneveh->options, options, options_len);
+       ip_tunnel_info_opts_get(geneveh->options, info);
 }
 
-static int geneve_build_skb(struct rtable *rt, struct sk_buff *skb,
-                           __be16 tun_flags, u8 vni[3], u8 opt_len, u8 *opt,
-                           u32 flags, bool xnet)
+static int geneve_build_skb(struct dst_entry *dst, struct sk_buff *skb,
+                           const struct ip_tunnel_info *info,
+                           bool xnet, int ip_hdr_len)
 {
+       bool udp_sum = !!(info->key.tun_flags & TUNNEL_CSUM);
        struct genevehdr *gnvh;
        int min_headroom;
        int err;
-       bool udp_sum = !(flags & GENEVE_F_UDP_ZERO_CSUM_TX);
-
-       skb_scrub_packet(skb, xnet);
-
-       min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
-                       + GENEVE_BASE_HLEN + opt_len + sizeof(struct iphdr);
-       err = skb_cow_head(skb, min_headroom);
-       if (unlikely(err))
-               goto free_rt;
-
-       err = udp_tunnel_handle_offloads(skb, udp_sum);
-       if (err)
-               goto free_rt;
-
-       gnvh = (struct genevehdr *)__skb_push(skb, sizeof(*gnvh) + opt_len);
-       geneve_build_header(gnvh, tun_flags, vni, opt_len, opt);
-
-       skb_set_inner_protocol(skb, htons(ETH_P_TEB));
-       return 0;
-
-free_rt:
-       ip_rt_put(rt);
-       return err;
-}
-
-#if IS_ENABLED(CONFIG_IPV6)
-static int geneve6_build_skb(struct dst_entry *dst, struct sk_buff *skb,
-                            __be16 tun_flags, u8 vni[3], u8 opt_len, u8 *opt,
-                            u32 flags, bool xnet)
-{
-       struct genevehdr *gnvh;
-       int min_headroom;
-       int err;
-       bool udp_sum = !(flags & GENEVE_F_UDP_ZERO_CSUM6_TX);
 
+       skb_reset_mac_header(skb);
        skb_scrub_packet(skb, xnet);
 
-       min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len
-                       + GENEVE_BASE_HLEN + opt_len + sizeof(struct ipv6hdr);
+       min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len +
+                      GENEVE_BASE_HLEN + info->options_len + ip_hdr_len;
        err = skb_cow_head(skb, min_headroom);
        if (unlikely(err))
                goto free_dst;
@@ -697,9 +675,9 @@ static int geneve6_build_skb(struct dst_entry *dst, struct sk_buff *skb,
        if (err)
                goto free_dst;
 
-       gnvh = (struct genevehdr *)__skb_push(skb, sizeof(*gnvh) + opt_len);
-       geneve_build_header(gnvh, tun_flags, vni, opt_len, opt);
-
+       gnvh = (struct genevehdr *)__skb_push(skb, sizeof(*gnvh) +
+                                                  info->options_len);
+       geneve_build_header(gnvh, info);
        skb_set_inner_protocol(skb, htons(ETH_P_TEB));
        return 0;
 
@@ -707,12 +685,11 @@ free_dst:
        dst_release(dst);
        return err;
 }
-#endif
 
 static struct rtable *geneve_get_v4_rt(struct sk_buff *skb,
                                       struct net_device *dev,
                                       struct flowi4 *fl4,
-                                      struct ip_tunnel_info *info)
+                                      const struct ip_tunnel_info *info)
 {
        bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
        struct geneve_dev *geneve = netdev_priv(dev);
@@ -720,35 +697,28 @@ static struct rtable *geneve_get_v4_rt(struct sk_buff *skb,
        struct rtable *rt = NULL;
        __u8 tos;
 
+       if (!rcu_dereference(geneve->sock4))
+               return ERR_PTR(-EIO);
+
        memset(fl4, 0, sizeof(*fl4));
        fl4->flowi4_mark = skb->mark;
        fl4->flowi4_proto = IPPROTO_UDP;
+       fl4->daddr = info->key.u.ipv4.dst;
+       fl4->saddr = info->key.u.ipv4.src;
 
-       if (info) {
-               fl4->daddr = info->key.u.ipv4.dst;
-               fl4->saddr = info->key.u.ipv4.src;
-               fl4->flowi4_tos = RT_TOS(info->key.tos);
-               dst_cache = &info->dst_cache;
-       } else {
-               tos = geneve->tos;
-               if (tos == 1) {
-                       const struct iphdr *iip = ip_hdr(skb);
-
-                       tos = ip_tunnel_get_dsfield(iip, skb);
-                       use_cache = false;
-               }
-
-               fl4->flowi4_tos = RT_TOS(tos);
-               fl4->daddr = geneve->remote.sin.sin_addr.s_addr;
-               dst_cache = &geneve->dst_cache;
+       tos = info->key.tos;
+       if ((tos == 1) && !geneve->collect_md) {
+               tos = ip_tunnel_get_dsfield(ip_hdr(skb), skb);
+               use_cache = false;
        }
+       fl4->flowi4_tos = RT_TOS(tos);
 
+       dst_cache = (struct dst_cache *)&info->dst_cache;
        if (use_cache) {
                rt = dst_cache_get_ip4(dst_cache, &fl4->saddr);
                if (rt)
                        return rt;
        }
-
        rt = ip_route_output_key(geneve->net, fl4);
        if (IS_ERR(rt)) {
                netdev_dbg(dev, "no route to %pI4\n", &fl4->daddr);
@@ -768,46 +738,38 @@ static struct rtable *geneve_get_v4_rt(struct sk_buff *skb,
 static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb,
                                           struct net_device *dev,
                                           struct flowi6 *fl6,
-                                          struct ip_tunnel_info *info)
+                                          const struct ip_tunnel_info *info)
 {
        bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
        struct geneve_dev *geneve = netdev_priv(dev);
-       struct geneve_sock *gs6 = geneve->sock6;
        struct dst_entry *dst = NULL;
        struct dst_cache *dst_cache;
+       struct geneve_sock *gs6;
        __u8 prio;
 
+       gs6 = rcu_dereference(geneve->sock6);
+       if (!gs6)
+               return ERR_PTR(-EIO);
+
        memset(fl6, 0, sizeof(*fl6));
        fl6->flowi6_mark = skb->mark;
        fl6->flowi6_proto = IPPROTO_UDP;
-
-       if (info) {
-               fl6->daddr = info->key.u.ipv6.dst;
-               fl6->saddr = info->key.u.ipv6.src;
-               fl6->flowlabel = ip6_make_flowinfo(RT_TOS(info->key.tos),
-                                                  info->key.label);
-               dst_cache = &info->dst_cache;
-       } else {
-               prio = geneve->tos;
-               if (prio == 1) {
-                       const struct iphdr *iip = ip_hdr(skb);
-
-                       prio = ip_tunnel_get_dsfield(iip, skb);
-                       use_cache = false;
-               }
-
-               fl6->flowlabel = ip6_make_flowinfo(RT_TOS(prio),
-                                                  geneve->label);
-               fl6->daddr = geneve->remote.sin6.sin6_addr;
-               dst_cache = &geneve->dst_cache;
+       fl6->daddr = info->key.u.ipv6.dst;
+       fl6->saddr = info->key.u.ipv6.src;
+       prio = info->key.tos;
+       if ((prio == 1) && !geneve->collect_md) {
+               prio = ip_tunnel_get_dsfield(ip_hdr(skb), skb);
+               use_cache = false;
        }
 
+       fl6->flowlabel = ip6_make_flowinfo(RT_TOS(prio),
+                                          info->key.label);
+       dst_cache = (struct dst_cache *)&info->dst_cache;
        if (use_cache) {
                dst = dst_cache_get_ip6(dst_cache, &fl6->saddr);
                if (dst)
                        return dst;
        }
-
        if (ipv6_stub->ipv6_dst_lookup(geneve->net, gs6->sock->sk, &dst, fl6)) {
                netdev_dbg(dev, "no route to %pI6\n", &fl6->daddr);
                return ERR_PTR(-ENETUNREACH);
@@ -824,197 +786,81 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb,
 }
 #endif
 
-/* Convert 64 bit tunnel ID to 24 bit VNI. */
-static void tunnel_id_to_vni(__be64 tun_id, __u8 *vni)
+static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
+                          struct geneve_dev *geneve,
+                          const struct ip_tunnel_info *info)
 {
-#ifdef __BIG_ENDIAN
-       vni[0] = (__force __u8)(tun_id >> 16);
-       vni[1] = (__force __u8)(tun_id >> 8);
-       vni[2] = (__force __u8)tun_id;
-#else
-       vni[0] = (__force __u8)((__force u64)tun_id >> 40);
-       vni[1] = (__force __u8)((__force u64)tun_id >> 48);
-       vni[2] = (__force __u8)((__force u64)tun_id >> 56);
-#endif
-}
-
-static netdev_tx_t geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
-                                  struct ip_tunnel_info *info)
-{
-       struct geneve_dev *geneve = netdev_priv(dev);
-       struct geneve_sock *gs4 = geneve->sock4;
-       struct rtable *rt = NULL;
-       const struct iphdr *iip; /* interior IP header */
-       int err = -EINVAL;
+       bool xnet = !net_eq(geneve->net, dev_net(geneve->dev));
+       struct geneve_sock *gs4 = rcu_dereference(geneve->sock4);
+       const struct ip_tunnel_key *key = &info->key;
+       struct rtable *rt;
        struct flowi4 fl4;
        __u8 tos, ttl;
        __be16 sport;
        __be16 df;
-       bool xnet = !net_eq(geneve->net, dev_net(geneve->dev));
-       u32 flags = geneve->flags;
-
-       if (geneve->collect_md) {
-               if (unlikely(!info || !(info->mode & IP_TUNNEL_INFO_TX))) {
-                       netdev_dbg(dev, "no tunnel metadata\n");
-                       goto tx_error;
-               }
-               if (info && ip_tunnel_info_af(info) != AF_INET)
-                       goto tx_error;
-       }
+       int err;
 
        rt = geneve_get_v4_rt(skb, dev, &fl4, info);
-       if (IS_ERR(rt)) {
-               err = PTR_ERR(rt);
-               goto tx_error;
-       }
+       if (IS_ERR(rt))
+               return PTR_ERR(rt);
 
        sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
-       skb_reset_mac_header(skb);
-
-       iip = ip_hdr(skb);
-
-       if (info) {
-               const struct ip_tunnel_key *key = &info->key;
-               u8 *opts = NULL;
-               u8 vni[3];
-
-               tunnel_id_to_vni(key->tun_id, vni);
-               if (info->options_len)
-                       opts = ip_tunnel_info_opts(info);
-
-               if (key->tun_flags & TUNNEL_CSUM)
-                       flags &= ~GENEVE_F_UDP_ZERO_CSUM_TX;
-               else
-                       flags |= GENEVE_F_UDP_ZERO_CSUM_TX;
-
-               err = geneve_build_skb(rt, skb, key->tun_flags, vni,
-                                      info->options_len, opts, flags, xnet);
-               if (unlikely(err))
-                       goto tx_error;
-
-               tos = ip_tunnel_ecn_encap(key->tos, iip, skb);
+       if (geneve->collect_md) {
+               tos = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb);
                ttl = key->ttl;
-               df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
        } else {
-               err = geneve_build_skb(rt, skb, 0, geneve->vni,
-                                      0, NULL, flags, xnet);
-               if (unlikely(err))
-                       goto tx_error;
-
-               tos = ip_tunnel_ecn_encap(fl4.flowi4_tos, iip, skb);
-               ttl = geneve->ttl;
-               if (!ttl && IN_MULTICAST(ntohl(fl4.daddr)))
-                       ttl = 1;
-               ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
-               df = 0;
+               tos = ip_tunnel_ecn_encap(fl4.flowi4_tos, ip_hdr(skb), skb);
+               ttl = key->ttl ? : ip4_dst_hoplimit(&rt->dst);
        }
-       udp_tunnel_xmit_skb(rt, gs4->sock->sk, skb, fl4.saddr, fl4.daddr,
-                           tos, ttl, df, sport, geneve->dst_port,
-                           !net_eq(geneve->net, dev_net(geneve->dev)),
-                           !!(flags & GENEVE_F_UDP_ZERO_CSUM_TX));
-
-       return NETDEV_TX_OK;
+       df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
 
-tx_error:
-       dev_kfree_skb(skb);
-
-       if (err == -ELOOP)
-               dev->stats.collisions++;
-       else if (err == -ENETUNREACH)
-               dev->stats.tx_carrier_errors++;
+       err = geneve_build_skb(&rt->dst, skb, info, xnet, sizeof(struct iphdr));
+       if (unlikely(err))
+               return err;
 
-       dev->stats.tx_errors++;
-       return NETDEV_TX_OK;
+       udp_tunnel_xmit_skb(rt, gs4->sock->sk, skb, fl4.saddr, fl4.daddr,
+                           tos, ttl, df, sport, geneve->info.key.tp_dst,
+                           !net_eq(geneve->net, dev_net(geneve->dev)),
+                           !(info->key.tun_flags & TUNNEL_CSUM));
+       return 0;
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
-static netdev_tx_t geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
-                                   struct ip_tunnel_info *info)
+static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
+                           struct geneve_dev *geneve,
+                           const struct ip_tunnel_info *info)
 {
-       struct geneve_dev *geneve = netdev_priv(dev);
-       struct geneve_sock *gs6 = geneve->sock6;
+       bool xnet = !net_eq(geneve->net, dev_net(geneve->dev));
+       struct geneve_sock *gs6 = rcu_dereference(geneve->sock6);
+       const struct ip_tunnel_key *key = &info->key;
        struct dst_entry *dst = NULL;
-       const struct iphdr *iip; /* interior IP header */
-       int err = -EINVAL;
        struct flowi6 fl6;
        __u8 prio, ttl;
        __be16 sport;
-       __be32 label;
-       bool xnet = !net_eq(geneve->net, dev_net(geneve->dev));
-       u32 flags = geneve->flags;
-
-       if (geneve->collect_md) {
-               if (unlikely(!info || !(info->mode & IP_TUNNEL_INFO_TX))) {
-                       netdev_dbg(dev, "no tunnel metadata\n");
-                       goto tx_error;
-               }
-       }
+       int err;
 
        dst = geneve_get_v6_dst(skb, dev, &fl6, info);
-       if (IS_ERR(dst)) {
-               err = PTR_ERR(dst);
-               goto tx_error;
-       }
+       if (IS_ERR(dst))
+               return PTR_ERR(dst);
 
        sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
-       skb_reset_mac_header(skb);
-
-       iip = ip_hdr(skb);
-
-       if (info) {
-               const struct ip_tunnel_key *key = &info->key;
-               u8 *opts = NULL;
-               u8 vni[3];
-
-               tunnel_id_to_vni(key->tun_id, vni);
-               if (info->options_len)
-                       opts = ip_tunnel_info_opts(info);
-
-               if (key->tun_flags & TUNNEL_CSUM)
-                       flags &= ~GENEVE_F_UDP_ZERO_CSUM6_TX;
-               else
-                       flags |= GENEVE_F_UDP_ZERO_CSUM6_TX;
-
-               err = geneve6_build_skb(dst, skb, key->tun_flags, vni,
-                                       info->options_len, opts,
-                                       flags, xnet);
-               if (unlikely(err))
-                       goto tx_error;
-
-               prio = ip_tunnel_ecn_encap(key->tos, iip, skb);
+       if (geneve->collect_md) {
+               prio = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb);
                ttl = key->ttl;
-               label = info->key.label;
        } else {
-               err = geneve6_build_skb(dst, skb, 0, geneve->vni,
-                                       0, NULL, flags, xnet);
-               if (unlikely(err))
-                       goto tx_error;
-
                prio = ip_tunnel_ecn_encap(ip6_tclass(fl6.flowlabel),
-                                          iip, skb);
-               ttl = geneve->ttl;
-               if (!ttl && ipv6_addr_is_multicast(&fl6.daddr))
-                       ttl = 1;
-               ttl = ttl ? : ip6_dst_hoplimit(dst);
-               label = geneve->label;
+                                          ip_hdr(skb), skb);
+               ttl = key->ttl ? : ip6_dst_hoplimit(dst);
        }
+       err = geneve_build_skb(dst, skb, info, xnet, sizeof(struct ipv6hdr));
+       if (unlikely(err))
+               return err;
 
        udp_tunnel6_xmit_skb(dst, gs6->sock->sk, skb, dev,
-                            &fl6.saddr, &fl6.daddr, prio, ttl, label,
-                            sport, geneve->dst_port,
-                            !!(flags & GENEVE_F_UDP_ZERO_CSUM6_TX));
-       return NETDEV_TX_OK;
-
-tx_error:
-       dev_kfree_skb(skb);
-
-       if (err == -ELOOP)
-               dev->stats.collisions++;
-       else if (err == -ENETUNREACH)
-               dev->stats.tx_carrier_errors++;
-
-       dev->stats.tx_errors++;
-       return NETDEV_TX_OK;
+                            &fl6.saddr, &fl6.daddr, prio, ttl,
+                            info->key.label, sport, geneve->info.key.tp_dst,
+                            !(info->key.tun_flags & TUNNEL_CSUM));
+       return 0;
 }
 #endif
 
@@ -1022,16 +868,38 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev)
 {
        struct geneve_dev *geneve = netdev_priv(dev);
        struct ip_tunnel_info *info = NULL;
+       int err;
 
-       if (geneve->collect_md)
+       if (geneve->collect_md) {
                info = skb_tunnel_info(skb);
+               if (unlikely(!info || !(info->mode & IP_TUNNEL_INFO_TX))) {
+                       err = -EINVAL;
+                       netdev_dbg(dev, "no tunnel metadata\n");
+                       goto tx_error;
+               }
+       } else {
+               info = &geneve->info;
+       }
 
 #if IS_ENABLED(CONFIG_IPV6)
-       if ((info && ip_tunnel_info_af(info) == AF_INET6) ||
-           (!info && geneve->remote.sa.sa_family == AF_INET6))
-               return geneve6_xmit_skb(skb, dev, info);
+       if (info->mode & IP_TUNNEL_INFO_IPV6)
+               err = geneve6_xmit_skb(skb, dev, geneve, info);
+       else
 #endif
-       return geneve_xmit_skb(skb, dev, info);
+               err = geneve_xmit_skb(skb, dev, geneve, info);
+
+       if (likely(!err))
+               return NETDEV_TX_OK;
+tx_error:
+       dev_kfree_skb(skb);
+
+       if (err == -ELOOP)
+               dev->stats.collisions++;
+       else if (err == -ENETUNREACH)
+               dev->stats.tx_carrier_errors++;
+
+       dev->stats.tx_errors++;
+       return NETDEV_TX_OK;
 }
 
 static int geneve_change_mtu(struct net_device *dev, int new_mtu)
@@ -1050,14 +918,11 @@ static int geneve_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
 {
        struct ip_tunnel_info *info = skb_tunnel_info(skb);
        struct geneve_dev *geneve = netdev_priv(dev);
-       struct rtable *rt;
-       struct flowi4 fl4;
-#if IS_ENABLED(CONFIG_IPV6)
-       struct dst_entry *dst;
-       struct flowi6 fl6;
-#endif
 
        if (ip_tunnel_info_af(info) == AF_INET) {
+               struct rtable *rt;
+               struct flowi4 fl4;
+
                rt = geneve_get_v4_rt(skb, dev, &fl4, info);
                if (IS_ERR(rt))
                        return PTR_ERR(rt);
@@ -1066,6 +931,9 @@ static int geneve_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
                info->key.u.ipv4.src = fl4.saddr;
 #if IS_ENABLED(CONFIG_IPV6)
        } else if (ip_tunnel_info_af(info) == AF_INET6) {
+               struct dst_entry *dst;
+               struct flowi6 fl6;
+
                dst = geneve_get_v6_dst(skb, dev, &fl6, info);
                if (IS_ERR(dst))
                        return PTR_ERR(dst);
@@ -1079,7 +947,7 @@ static int geneve_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
 
        info->key.tp_src = udp_flow_src_port(geneve->net, skb,
                                             1, USHRT_MAX, true);
-       info->key.tp_dst = geneve->dst_port;
+       info->key.tp_dst = geneve->info.key.tp_dst;
        return 0;
 }
 
@@ -1201,78 +1069,69 @@ static int geneve_validate(struct nlattr *tb[], struct nlattr *data[])
 }
 
 static struct geneve_dev *geneve_find_dev(struct geneve_net *gn,
-                                         __be16 dst_port,
-                                         union geneve_addr *remote,
-                                         u8 vni[],
+                                         const struct ip_tunnel_info *info,
                                          bool *tun_on_same_port,
                                          bool *tun_collect_md)
 {
-       struct geneve_dev *geneve, *t;
+       struct geneve_dev *geneve, *t = NULL;
 
        *tun_on_same_port = false;
        *tun_collect_md = false;
-       t = NULL;
        list_for_each_entry(geneve, &gn->geneve_list, next) {
-               if (geneve->dst_port == dst_port) {
+               if (info->key.tp_dst == geneve->info.key.tp_dst) {
                        *tun_collect_md = geneve->collect_md;
                        *tun_on_same_port = true;
                }
-               if (!memcmp(vni, geneve->vni, sizeof(geneve->vni)) &&
-                   !memcmp(remote, &geneve->remote, sizeof(geneve->remote)) &&
-                   dst_port == geneve->dst_port)
+               if (info->key.tun_id == geneve->info.key.tun_id &&
+                   info->key.tp_dst == geneve->info.key.tp_dst &&
+                   !memcmp(&info->key.u, &geneve->info.key.u, sizeof(info->key.u)))
                        t = geneve;
        }
        return t;
 }
 
+static bool is_all_zero(const u8 *fp, size_t size)
+{
+       int i;
+
+       for (i = 0; i < size; i++)
+               if (fp[i])
+                       return false;
+       return true;
+}
+
+static bool is_tnl_info_zero(const struct ip_tunnel_info *info)
+{
+       if (info->key.tun_id || info->key.tun_flags || info->key.tos ||
+           info->key.ttl || info->key.label || info->key.tp_src ||
+           !is_all_zero((const u8 *)&info->key.u, sizeof(info->key.u)))
+               return false;
+       else
+               return true;
+}
+
 static int geneve_configure(struct net *net, struct net_device *dev,
-                           union geneve_addr *remote,
-                           __u32 vni, __u8 ttl, __u8 tos, __be32 label,
-                           __be16 dst_port, bool metadata, u32 flags)
+                           const struct ip_tunnel_info *info,
+                           bool metadata, bool ipv6_rx_csum)
 {
        struct geneve_net *gn = net_generic(net, geneve_net_id);
        struct geneve_dev *t, *geneve = netdev_priv(dev);
        bool tun_collect_md, tun_on_same_port;
        int err, encap_len;
 
-       if (!remote)
-               return -EINVAL;
-       if (metadata &&
-           (remote->sa.sa_family != AF_UNSPEC || vni || tos || ttl || label))
+       if (metadata && !is_tnl_info_zero(info))
                return -EINVAL;
 
        geneve->net = net;
        geneve->dev = dev;
 
-       geneve->vni[0] = (vni & 0x00ff0000) >> 16;
-       geneve->vni[1] = (vni & 0x0000ff00) >> 8;
-       geneve->vni[2] =  vni & 0x000000ff;
-
-       if ((remote->sa.sa_family == AF_INET &&
-            IN_MULTICAST(ntohl(remote->sin.sin_addr.s_addr))) ||
-           (remote->sa.sa_family == AF_INET6 &&
-            ipv6_addr_is_multicast(&remote->sin6.sin6_addr)))
-               return -EINVAL;
-       if (label && remote->sa.sa_family != AF_INET6)
-               return -EINVAL;
-
-       geneve->remote = *remote;
-
-       geneve->ttl = ttl;
-       geneve->tos = tos;
-       geneve->label = label;
-       geneve->dst_port = dst_port;
-       geneve->collect_md = metadata;
-       geneve->flags = flags;
-
-       t = geneve_find_dev(gn, dst_port, remote, geneve->vni,
-                           &tun_on_same_port, &tun_collect_md);
+       t = geneve_find_dev(gn, info, &tun_on_same_port, &tun_collect_md);
        if (t)
                return -EBUSY;
 
        /* make enough headroom for basic scenario */
        encap_len = GENEVE_BASE_HLEN + ETH_HLEN;
-       if (remote->sa.sa_family == AF_INET) {
+       if (ip_tunnel_info_af(info) == AF_INET) {
                encap_len += sizeof(struct iphdr);
                dev->max_mtu -= sizeof(struct iphdr);
        } else {
@@ -1289,7 +1148,10 @@ static int geneve_configure(struct net *net, struct net_device *dev,
                        return -EPERM;
        }
 
-       dst_cache_reset(&geneve->dst_cache);
+       dst_cache_reset(&geneve->info.dst_cache);
+       geneve->info = *info;
+       geneve->collect_md = metadata;
+       geneve->use_udp6_rx_checksums = ipv6_rx_csum;
 
        err = register_netdevice(dev);
        if (err)
@@ -1299,74 +1161,99 @@ static int geneve_configure(struct net *net, struct net_device *dev,
        return 0;
 }
 
+static void init_tnl_info(struct ip_tunnel_info *info, __u16 dst_port)
+{
+       memset(info, 0, sizeof(*info));
+       info->key.tp_dst = htons(dst_port);
+}
+
 static int geneve_newlink(struct net *net, struct net_device *dev,
                          struct nlattr *tb[], struct nlattr *data[])
 {
-       __be16 dst_port = htons(GENEVE_UDP_PORT);
-       __u8 ttl = 0, tos = 0;
+       bool use_udp6_rx_checksums = false;
+       struct ip_tunnel_info info;
        bool metadata = false;
-       union geneve_addr remote = geneve_remote_unspec;
-       __be32 label = 0;
-       __u32 vni = 0;
-       u32 flags = 0;
+
+       init_tnl_info(&info, GENEVE_UDP_PORT);
 
        if (data[IFLA_GENEVE_REMOTE] && data[IFLA_GENEVE_REMOTE6])
                return -EINVAL;
 
        if (data[IFLA_GENEVE_REMOTE]) {
-               remote.sa.sa_family = AF_INET;
-               remote.sin.sin_addr.s_addr =
+               info.key.u.ipv4.dst =
                        nla_get_in_addr(data[IFLA_GENEVE_REMOTE]);
+
+               if (IN_MULTICAST(ntohl(info.key.u.ipv4.dst))) {
+                       netdev_dbg(dev, "multicast remote is unsupported\n");
+                       return -EINVAL;
+               }
        }
 
        if (data[IFLA_GENEVE_REMOTE6]) {
-               if (!IS_ENABLED(CONFIG_IPV6))
-                       return -EPFNOSUPPORT;
-
-               remote.sa.sa_family = AF_INET6;
-               remote.sin6.sin6_addr =
+ #if IS_ENABLED(CONFIG_IPV6)
+               info.mode = IP_TUNNEL_INFO_IPV6;
+               info.key.u.ipv6.dst =
                        nla_get_in6_addr(data[IFLA_GENEVE_REMOTE6]);
 
-               if (ipv6_addr_type(&remote.sin6.sin6_addr) &
+               if (ipv6_addr_type(&info.key.u.ipv6.dst) &
                    IPV6_ADDR_LINKLOCAL) {
                        netdev_dbg(dev, "link-local remote is unsupported\n");
                        return -EINVAL;
                }
+               if (ipv6_addr_is_multicast(&info.key.u.ipv6.dst)) {
+                       netdev_dbg(dev, "multicast remote is unsupported\n");
+                       return -EINVAL;
+               }
+               info.key.tun_flags |= TUNNEL_CSUM;
+               use_udp6_rx_checksums = true;
+#else
+               return -EPFNOSUPPORT;
+#endif
        }
 
-       if (data[IFLA_GENEVE_ID])
+       if (data[IFLA_GENEVE_ID]) {
+               __u32 vni;
+               __u8 tvni[3];
+
                vni = nla_get_u32(data[IFLA_GENEVE_ID]);
+               tvni[0] = (vni & 0x00ff0000) >> 16;
+               tvni[1] = (vni & 0x0000ff00) >> 8;
+               tvni[2] =  vni & 0x000000ff;
 
+               info.key.tun_id = vni_to_tunnel_id(tvni);
+       }
        if (data[IFLA_GENEVE_TTL])
-               ttl = nla_get_u8(data[IFLA_GENEVE_TTL]);
+               info.key.ttl = nla_get_u8(data[IFLA_GENEVE_TTL]);
 
        if (data[IFLA_GENEVE_TOS])
-               tos = nla_get_u8(data[IFLA_GENEVE_TOS]);
+               info.key.tos = nla_get_u8(data[IFLA_GENEVE_TOS]);
 
-       if (data[IFLA_GENEVE_LABEL])
-               label = nla_get_be32(data[IFLA_GENEVE_LABEL]) &
-                       IPV6_FLOWLABEL_MASK;
+       if (data[IFLA_GENEVE_LABEL]) {
+               info.key.label = nla_get_be32(data[IFLA_GENEVE_LABEL]) &
+                                 IPV6_FLOWLABEL_MASK;
+               if (info.key.label && (!(info.mode & IP_TUNNEL_INFO_IPV6)))
+                       return -EINVAL;
+       }
 
        if (data[IFLA_GENEVE_PORT])
-               dst_port = nla_get_be16(data[IFLA_GENEVE_PORT]);
+               info.key.tp_dst = nla_get_be16(data[IFLA_GENEVE_PORT]);
 
        if (data[IFLA_GENEVE_COLLECT_METADATA])
                metadata = true;
 
        if (data[IFLA_GENEVE_UDP_CSUM] &&
            !nla_get_u8(data[IFLA_GENEVE_UDP_CSUM]))
-               flags |= GENEVE_F_UDP_ZERO_CSUM_TX;
+               info.key.tun_flags |= TUNNEL_CSUM;
 
        if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX] &&
            nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX]))
-               flags |= GENEVE_F_UDP_ZERO_CSUM6_TX;
+               info.key.tun_flags &= ~TUNNEL_CSUM;
 
        if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX] &&
            nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX]))
-               flags |= GENEVE_F_UDP_ZERO_CSUM6_RX;
+               use_udp6_rx_checksums = false;
 
-       return geneve_configure(net, dev, &remote, vni, ttl, tos, label,
-                               dst_port, metadata, flags);
+       return geneve_configure(net, dev, &info, metadata, use_udp6_rx_checksums);
 }
 
 static void geneve_dellink(struct net_device *dev, struct list_head *head)
@@ -1395,45 +1282,52 @@ static size_t geneve_get_size(const struct net_device *dev)
 static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev)
 {
        struct geneve_dev *geneve = netdev_priv(dev);
+       struct ip_tunnel_info *info = &geneve->info;
+       __u8 tmp_vni[3];
        __u32 vni;
 
-       vni = (geneve->vni[0] << 16) | (geneve->vni[1] << 8) | geneve->vni[2];
+       tunnel_id_to_vni(info->key.tun_id, tmp_vni);
+       vni = (tmp_vni[0] << 16) | (tmp_vni[1] << 8) | tmp_vni[2];
        if (nla_put_u32(skb, IFLA_GENEVE_ID, vni))
                goto nla_put_failure;
 
-       if (geneve->remote.sa.sa_family == AF_INET) {
+       if (ip_tunnel_info_af(info) == AF_INET) {
                if (nla_put_in_addr(skb, IFLA_GENEVE_REMOTE,
-                                   geneve->remote.sin.sin_addr.s_addr))
+                                   info->key.u.ipv4.dst))
+                       goto nla_put_failure;
+
+               if (nla_put_u8(skb, IFLA_GENEVE_UDP_CSUM,
+                              !!(info->key.tun_flags & TUNNEL_CSUM)))
                        goto nla_put_failure;
+
 #if IS_ENABLED(CONFIG_IPV6)
        } else {
                if (nla_put_in6_addr(skb, IFLA_GENEVE_REMOTE6,
-                                    &geneve->remote.sin6.sin6_addr))
+                                    &info->key.u.ipv6.dst))
+                       goto nla_put_failure;
+
+               if (nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_TX,
+                              !(info->key.tun_flags & TUNNEL_CSUM)))
+                       goto nla_put_failure;
+
+               if (nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_RX,
+                              !geneve->use_udp6_rx_checksums))
                        goto nla_put_failure;
 #endif
        }
 
-       if (nla_put_u8(skb, IFLA_GENEVE_TTL, geneve->ttl) ||
-           nla_put_u8(skb, IFLA_GENEVE_TOS, geneve->tos) ||
-           nla_put_be32(skb, IFLA_GENEVE_LABEL, geneve->label))
+       if (nla_put_u8(skb, IFLA_GENEVE_TTL, info->key.ttl) ||
+           nla_put_u8(skb, IFLA_GENEVE_TOS, info->key.tos) ||
+           nla_put_be32(skb, IFLA_GENEVE_LABEL, info->key.label))
                goto nla_put_failure;
 
-       if (nla_put_be16(skb, IFLA_GENEVE_PORT, geneve->dst_port))
+       if (nla_put_be16(skb, IFLA_GENEVE_PORT, info->key.tp_dst))
                goto nla_put_failure;
 
        if (geneve->collect_md) {
                if (nla_put_flag(skb, IFLA_GENEVE_COLLECT_METADATA))
                        goto nla_put_failure;
        }
-
-       if (nla_put_u8(skb, IFLA_GENEVE_UDP_CSUM,
-                      !(geneve->flags & GENEVE_F_UDP_ZERO_CSUM_TX)) ||
-           nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_TX,
-                      !!(geneve->flags & GENEVE_F_UDP_ZERO_CSUM6_TX)) ||
-           nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_RX,
-                      !!(geneve->flags & GENEVE_F_UDP_ZERO_CSUM6_RX)))
-               goto nla_put_failure;
-
        return 0;
 
 nla_put_failure:
@@ -1457,6 +1351,7 @@ struct net_device *geneve_dev_create_fb(struct net *net, const char *name,
                                        u8 name_assign_type, u16 dst_port)
 {
        struct nlattr *tb[IFLA_MAX + 1];
+       struct ip_tunnel_info info;
        struct net_device *dev;
        LIST_HEAD(list_kill);
        int err;
@@ -1467,9 +1362,8 @@ struct net_device *geneve_dev_create_fb(struct net *net, const char *name,
        if (IS_ERR(dev))
                return dev;
 
-       err = geneve_configure(net, dev, &geneve_remote_unspec,
-                              0, 0, 0, 0, htons(dst_port), true,
-                              GENEVE_F_UDP_ZERO_CSUM6_RX);
+       init_tnl_info(&info, dst_port);
+       err = geneve_configure(net, dev, &info, true, true);
        if (err) {
                free_netdev(dev);
                return ERR_PTR(err);
@@ -1487,8 +1381,7 @@ struct net_device *geneve_dev_create_fb(struct net *net, const char *name,
                goto err;
 
        return dev;
-
- err:
+err:
        geneve_dellink(dev, &list_kill);
        unregister_netdevice_many(&list_kill);
        return ERR_PTR(err);
@@ -1571,7 +1464,6 @@ static int __init geneve_init_module(void)
                goto out3;
 
        return 0;
-
 out3:
        unregister_netdevice_notifier(&geneve_notifier_block);
 out2:
index 97e0cbca0a08a15af666fdb17ebc0c87e2a09bda..98f10c21652140a57cf2ee89ee1ff3b25a7802ba 100644 (file)
@@ -77,7 +77,7 @@ struct gtp_dev {
        struct hlist_head       *addr_hash;
 };
 
-static int gtp_net_id __read_mostly;
+static unsigned int gtp_net_id __read_mostly;
 
 struct gtp_net {
        struct list_head gtp_dev_list;
@@ -1094,14 +1094,7 @@ static int gtp_genl_del_pdp(struct sk_buff *skb, struct genl_info *info)
        return 0;
 }
 
-static struct genl_family gtp_genl_family = {
-       .id             = GENL_ID_GENERATE,
-       .name           = "gtp",
-       .version        = 0,
-       .hdrsize        = 0,
-       .maxattr        = GTPA_MAX,
-       .netnsok        = true,
-};
+static struct genl_family gtp_genl_family;
 
 static int gtp_genl_fill_info(struct sk_buff *skb, u32 snd_portid, u32 snd_seq,
                              u32 type, struct pdp_ctx *pctx)
@@ -1297,6 +1290,17 @@ static const struct genl_ops gtp_genl_ops[] = {
        },
 };
 
+static struct genl_family gtp_genl_family __ro_after_init = {
+       .name           = "gtp",
+       .version        = 0,
+       .hdrsize        = 0,
+       .maxattr        = GTPA_MAX,
+       .netnsok        = true,
+       .module         = THIS_MODULE,
+       .ops            = gtp_genl_ops,
+       .n_ops          = ARRAY_SIZE(gtp_genl_ops),
+};
+
 static int __net_init gtp_net_init(struct net *net)
 {
        struct gtp_net *gn = net_generic(net, gtp_net_id);
@@ -1336,7 +1340,7 @@ static int __init gtp_init(void)
        if (err < 0)
                goto error_out;
 
-       err = genl_register_family_with_ops(&gtp_genl_family, gtp_genl_ops);
+       err = genl_register_family(&gtp_genl_family);
        if (err < 0)
                goto unreg_rtnl_link;
 
index e2bfaac1801dedc698a83657a0f1f5fdcddefca0..5a1cc089acb7fd2e79c18876cd7951f6dfb6e747 100644 (file)
@@ -410,8 +410,8 @@ static int netvsc_init_buf(struct hv_device *device)
        net_device->send_section_cnt =
                net_device->send_buf_size / net_device->send_section_size;
 
-       dev_info(&device->device, "Send section size: %d, Section count:%d\n",
-                net_device->send_section_size, net_device->send_section_cnt);
+       netdev_dbg(ndev, "Send section size: %d, Section count:%d\n",
+                  net_device->send_section_size, net_device->send_section_cnt);
 
        /* Setup state for managing the send buffer. */
        net_device->map_words = DIV_ROUND_UP(net_device->send_section_cnt,
@@ -578,7 +578,7 @@ void netvsc_device_remove(struct hv_device *device)
         * At this point, no one should be accessing net_device
         * except in here
         */
-       dev_notice(&device->device, "net device safe to remove\n");
+       netdev_dbg(ndev, "net device safe to remove\n");
 
        /* Now, we can close the channel safely */
        vmbus_close(device->channel);
@@ -1387,7 +1387,7 @@ int netvsc_device_add(struct hv_device *device, void *additional_info)
        }
 
        /* Channel is opened */
-       pr_info("hv_netvsc channel opened successfully\n");
+       netdev_dbg(ndev, "hv_netvsc channel opened successfully\n");
 
        /* If we're reopening the device we may have multiple queues, fill the
         * chn_table with the default channel to use it before subchannels are
index 3b28cf127f9d5de1378a06d19fccc0f8dbb1392b..9522763c8faf4aea389a958f6248cc5f2b2847dc 100644 (file)
@@ -447,7 +447,7 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
         * Setup the sendside checksum offload only if this is not a
         * GSO packet.
         */
-       if (skb_is_gso(skb)) {
+       if ((net_trans_info & (INFO_TCP | INFO_UDP)) && skb_is_gso(skb)) {
                struct ndis_tcp_lso_info *lso_info;
 
                rndis_msg_size += NDIS_LSO_PPI_SIZE;
@@ -607,15 +607,18 @@ static struct sk_buff *netvsc_alloc_recv_skb(struct net_device *net,
               packet->total_data_buflen);
 
        skb->protocol = eth_type_trans(skb, net);
-       if (csum_info) {
-               /* We only look at the IP checksum here.
-                * Should we be dropping the packet if checksum
-                * failed? How do we deal with other checksums - TCP/UDP?
-                */
-               if (csum_info->receive.ip_checksum_succeeded)
+
+       /* skb is already created with CHECKSUM_NONE */
+       skb_checksum_none_assert(skb);
+
+       /*
+        * In Linux, the IP checksum is always checked.
+        * Do L4 checksum offload if enabled and present.
+        */
+       if (csum_info && (net->features & NETIF_F_RXCSUM)) {
+               if (csum_info->receive.tcp_checksum_succeeded ||
+                   csum_info->receive.udp_checksum_succeeded)
                        skb->ip_summed = CHECKSUM_UNNECESSARY;
-               else
-                       skb->ip_summed = CHECKSUM_NONE;
        }
 
        if (vlan_tci & VLAN_TAG_PRESENT)
@@ -696,12 +699,8 @@ int netvsc_recv_callback(struct hv_device *device_obj,
 static void netvsc_get_drvinfo(struct net_device *net,
                               struct ethtool_drvinfo *info)
 {
-       struct net_device_context *net_device_ctx = netdev_priv(net);
-       struct hv_device *dev = net_device_ctx->device_ctx;
-
        strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver));
        strlcpy(info->fw_version, "N/A", sizeof(info->fw_version));
-       strlcpy(info->bus_info, vmbus_dev_name(dev), sizeof(info->bus_info));
 }
 
 static void netvsc_get_channels(struct net_device *net,
index 9195d5da8485d50ee669e5dc1fb36ec80ebf7b82..8d90904e0e49f4333bac23d7128af1255f8e8a54 100644 (file)
@@ -1059,9 +1059,9 @@ int rndis_filter_device_add(struct hv_device *dev,
 
        device_info->link_state = rndis_device->link_state;
 
-       dev_info(&dev->device, "Device MAC %pM link state %s\n",
-                rndis_device->hw_mac_adr,
-                device_info->link_state ? "down" : "up");
+       netdev_dbg(net, "Device MAC %pM link state %s\n",
+                  rndis_device->hw_mac_adr,
+                  device_info->link_state ? "down" : "up");
 
        if (net_device->nvsp_version < NVSP_PROTOCOL_VERSION_5)
                return 0;
index 9fa7ac9f8e68f1314b71858cc05ea497df0b5c20..3e4c8b21403c81daf699e5463db44f33f198f807 100644 (file)
@@ -20,7 +20,6 @@
 #include <linux/skbuff.h>
 #include <linux/of.h>
 #include <linux/irq.h>
-#include <linux/delay.h>
 #include <linux/debugfs.h>
 #include <linux/bitops.h>
 #include <linux/ieee802154.h>
@@ -874,7 +873,7 @@ static int adf7242_rx(struct adf7242_local *lp)
        return 0;
 }
 
-static struct ieee802154_ops adf7242_ops = {
+static const struct ieee802154_ops adf7242_ops = {
        .owner = THIS_MODULE,
        .xmit_sync = adf7242_xmit,
        .ed = adf7242_ed,
index 1056ed142411d3c75082fced68c59f1b269d0532..322864a1a94b8e8d286467e84a9b128f9d994129 100644 (file)
@@ -567,7 +567,7 @@ atusb_set_promiscuous_mode(struct ieee802154_hw *hw, const bool on)
        return 0;
 }
 
-static struct ieee802154_ops atusb_ops = {
+static const struct ieee802154_ops atusb_ops = {
        .owner                  = THIS_MODULE,
        .xmit_async             = atusb_xmit,
        .ed                     = atusb_ed,
index 7e0732f5ea0747dd4df3d53b7c92f3864f6cd0cb..05a62d2216c54651f6158c35d446d2e395b38dc3 100644 (file)
@@ -73,7 +73,6 @@ struct ipvl_dev {
        DECLARE_BITMAP(mac_filters, IPVLAN_MAC_FILTER_SIZE);
        netdev_features_t       sfeatures;
        u32                     msg_enable;
-       u16                     mtu_adj;
 };
 
 struct ipvl_addr {
index ab90b22e778c9e85f89146d4094dd903f159438c..c6aa667b50cdfcaf78f9af0d9111cc8c1c627dc8 100644 (file)
@@ -32,7 +32,7 @@ static const struct l3mdev_ops ipvl_l3mdev_ops = {
 
 static void ipvlan_adjust_mtu(struct ipvl_dev *ipvlan, struct net_device *dev)
 {
-       ipvlan->dev->mtu = dev->mtu - ipvlan->mtu_adj;
+       ipvlan->dev->mtu = dev->mtu;
 }
 
 static int ipvlan_register_nf_hook(void)
index 1a134cb2d52cba5afc1dd8d53143cdb4587e1304..cc00eb0db5d2d99d2be3c9ad682230db14fad8c3 100644 (file)
@@ -397,6 +397,14 @@ static struct macsec_cb *macsec_skb_cb(struct sk_buff *skb)
 #define DEFAULT_ENCRYPT false
 #define DEFAULT_ENCODING_SA 0
 
+static bool send_sci(const struct macsec_secy *secy)
+{
+       const struct macsec_tx_sc *tx_sc = &secy->tx_sc;
+
+       return tx_sc->send_sci ||
+               (secy->n_rx_sc > 1 && !tx_sc->end_station && !tx_sc->scb);
+}
+
 static sci_t make_sci(u8 *addr, __be16 port)
 {
        sci_t sci;
@@ -437,15 +445,15 @@ static unsigned int macsec_extra_len(bool sci_present)
 
 /* Fill SecTAG according to IEEE 802.1AE-2006 10.5.3 */
 static void macsec_fill_sectag(struct macsec_eth_header *h,
-                              const struct macsec_secy *secy, u32 pn)
+                              const struct macsec_secy *secy, u32 pn,
+                              bool sci_present)
 {
        const struct macsec_tx_sc *tx_sc = &secy->tx_sc;
 
-       memset(&h->tci_an, 0, macsec_sectag_len(tx_sc->send_sci));
+       memset(&h->tci_an, 0, macsec_sectag_len(sci_present));
        h->eth.h_proto = htons(ETH_P_MACSEC);
 
-       if (tx_sc->send_sci ||
-           (secy->n_rx_sc > 1 && !tx_sc->end_station && !tx_sc->scb)) {
+       if (sci_present) {
                h->tci_an |= MACSEC_TCI_SC;
                memcpy(&h->secure_channel_id, &secy->sci,
                       sizeof(h->secure_channel_id));
@@ -650,6 +658,7 @@ static struct sk_buff *macsec_encrypt(struct sk_buff *skb,
        struct macsec_tx_sc *tx_sc;
        struct macsec_tx_sa *tx_sa;
        struct macsec_dev *macsec = macsec_priv(dev);
+       bool sci_present;
        u32 pn;
 
        secy = &macsec->secy;
@@ -687,7 +696,8 @@ static struct sk_buff *macsec_encrypt(struct sk_buff *skb,
 
        unprotected_len = skb->len;
        eth = eth_hdr(skb);
-       hh = (struct macsec_eth_header *)skb_push(skb, macsec_extra_len(tx_sc->send_sci));
+       sci_present = send_sci(secy);
+       hh = (struct macsec_eth_header *)skb_push(skb, macsec_extra_len(sci_present));
        memmove(hh, eth, 2 * ETH_ALEN);
 
        pn = tx_sa_update_pn(tx_sa, secy);
@@ -696,7 +706,7 @@ static struct sk_buff *macsec_encrypt(struct sk_buff *skb,
                kfree_skb(skb);
                return ERR_PTR(-ENOLINK);
        }
-       macsec_fill_sectag(hh, secy, pn);
+       macsec_fill_sectag(hh, secy, pn, sci_present);
        macsec_set_shortlen(hh, unprotected_len - 2 * ETH_ALEN);
 
        skb_put(skb, secy->icv_len);
@@ -726,10 +736,10 @@ static struct sk_buff *macsec_encrypt(struct sk_buff *skb,
        skb_to_sgvec(skb, sg, 0, skb->len);
 
        if (tx_sc->encrypt) {
-               int len = skb->len - macsec_hdr_len(tx_sc->send_sci) -
+               int len = skb->len - macsec_hdr_len(sci_present) -
                          secy->icv_len;
                aead_request_set_crypt(req, sg, sg, len, iv);
-               aead_request_set_ad(req, macsec_hdr_len(tx_sc->send_sci));
+               aead_request_set_ad(req, macsec_hdr_len(sci_present));
        } else {
                aead_request_set_crypt(req, sg, sg, 0, iv);
                aead_request_set_ad(req, skb->len - secy->icv_len);
@@ -1421,14 +1431,7 @@ static void clear_tx_sa(struct macsec_tx_sa *tx_sa)
        macsec_txsa_put(tx_sa);
 }
 
-static struct genl_family macsec_fam = {
-       .id             = GENL_ID_GENERATE,
-       .name           = MACSEC_GENL_NAME,
-       .hdrsize        = 0,
-       .version        = MACSEC_GENL_VERSION,
-       .maxattr        = MACSEC_ATTR_MAX,
-       .netnsok        = true,
-};
+static struct genl_family macsec_fam;
 
 static struct net_device *get_dev_from_nl(struct net *net,
                                          struct nlattr **attrs)
@@ -2655,6 +2658,17 @@ static const struct genl_ops macsec_genl_ops[] = {
        },
 };
 
+static struct genl_family macsec_fam __ro_after_init = {
+       .name           = MACSEC_GENL_NAME,
+       .hdrsize        = 0,
+       .version        = MACSEC_GENL_VERSION,
+       .maxattr        = MACSEC_ATTR_MAX,
+       .netnsok        = true,
+       .module         = THIS_MODULE,
+       .ops            = macsec_genl_ops,
+       .n_ops          = ARRAY_SIZE(macsec_genl_ops),
+};
+
 static netdev_tx_t macsec_start_xmit(struct sk_buff *skb,
                                     struct net_device *dev)
 {
@@ -3462,7 +3476,7 @@ static int __init macsec_init(void)
        if (err)
                goto notifier;
 
-       err = genl_register_family_with_ops(&macsec_fam, macsec_genl_ops);
+       err = genl_register_family(&macsec_fam);
        if (err)
                goto rtnl;
 
index a0644158647a86af5930cc4c35a9bca02a2770a8..3c0a1714977b495697f2f9f6fda9d8bc3318998d 100644 (file)
@@ -179,20 +179,20 @@ static void macvlan_hash_change_addr(struct macvlan_dev *vlan,
        macvlan_hash_add(vlan);
 }
 
-static int macvlan_addr_busy(const struct macvlan_port *port,
-                               const unsigned char *addr)
+static bool macvlan_addr_busy(const struct macvlan_port *port,
+                             const unsigned char *addr)
 {
        /* Test to see if the specified multicast address is
         * currently in use by the underlying device or
         * another macvlan.
         */
        if (ether_addr_equal_64bits(port->dev->dev_addr, addr))
-               return 1;
+               return true;
 
        if (macvlan_hash_lookup(port, addr))
-               return 1;
+               return true;
 
-       return 0;
+       return false;
 }
 
 
@@ -400,8 +400,7 @@ static void macvlan_forward_source(struct sk_buff *skb,
 
        hlist_for_each_entry_rcu(entry, h, hlist) {
                if (ether_addr_equal_64bits(entry->addr, addr))
-                       if (entry->vlan->dev->flags & IFF_UP)
-                               macvlan_forward_source_one(skb, entry->vlan);
+                       macvlan_forward_source_one(skb, entry->vlan);
        }
 }
 
@@ -623,7 +622,8 @@ hash_add:
        return 0;
 
 clear_multi:
-       dev_set_allmulti(lowerdev, -1);
+       if (dev->flags & IFF_ALLMULTI)
+               dev_set_allmulti(lowerdev, -1);
 del_unicast:
        dev_uc_del(lowerdev, dev->dev_addr);
 out:
@@ -1280,6 +1280,7 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
        struct net_device *lowerdev;
        int err;
        int macmode;
+       bool create = false;
 
        if (!tb[IFLA_LINK])
                return -EINVAL;
@@ -1310,12 +1311,18 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
                err = macvlan_port_create(lowerdev);
                if (err < 0)
                        return err;
+               create = true;
        }
        port = macvlan_port_get_rtnl(lowerdev);
 
        /* Only 1 macvlan device can be created in passthru mode */
-       if (port->passthru)
-               return -EINVAL;
+       if (port->passthru) {
+               /* The macvlan port must be not created this time,
+                * still goto destroy_macvlan_port for readability.
+                */
+               err = -EINVAL;
+               goto destroy_macvlan_port;
+       }
 
        vlan->lowerdev = lowerdev;
        vlan->dev      = dev;
@@ -1331,24 +1338,28 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
                vlan->flags = nla_get_u16(data[IFLA_MACVLAN_FLAGS]);
 
        if (vlan->mode == MACVLAN_MODE_PASSTHRU) {
-               if (port->count)
-                       return -EINVAL;
+               if (port->count) {
+                       err = -EINVAL;
+                       goto destroy_macvlan_port;
+               }
                port->passthru = true;
                eth_hw_addr_inherit(dev, lowerdev);
        }
 
        if (data && data[IFLA_MACVLAN_MACADDR_MODE]) {
-               if (vlan->mode != MACVLAN_MODE_SOURCE)
-                       return -EINVAL;
+               if (vlan->mode != MACVLAN_MODE_SOURCE) {
+                       err = -EINVAL;
+                       goto destroy_macvlan_port;
+               }
                macmode = nla_get_u32(data[IFLA_MACVLAN_MACADDR_MODE]);
                err = macvlan_changelink_sources(vlan, macmode, data);
                if (err)
-                       return err;
+                       goto destroy_macvlan_port;
        }
 
        err = register_netdevice(dev);
        if (err < 0)
-               return err;
+               goto destroy_macvlan_port;
 
        dev->priv_flags |= IFF_MACVLAN;
        err = netdev_upper_dev_link(lowerdev, dev);
@@ -1363,7 +1374,9 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
 
 unregister_netdev:
        unregister_netdevice(dev);
-
+destroy_macvlan_port:
+       if (create)
+               macvlan_port_destroy(port->dev);
        return err;
 }
 EXPORT_SYMBOL_GPL(macvlan_common_newlink);
index 070e3290aa6efea6fcb505cdf0860a4dce676b74..2513939bf2458d3bbf4f4efb91619c6cedd9207c 100644 (file)
@@ -437,7 +437,7 @@ static int macvtap_get_minor(struct macvlan_dev *vlan)
        if (retval >= 0) {
                vlan->minor = retval;
        } else if (retval == -ENOSPC) {
-               printk(KERN_ERR "too many macvtap devices\n");
+               netdev_err(vlan->dev, "Too many macvtap devices\n");
                retval = -EINVAL;
        }
        mutex_unlock(&minor_lock);
@@ -821,9 +821,8 @@ static ssize_t macvtap_put_user(struct macvtap_queue *q,
                if (iov_iter_count(iter) < vnet_hdr_len)
                        return -EINVAL;
 
-               ret = virtio_net_hdr_from_skb(skb, &vnet_hdr,
-                                             macvtap_is_little_endian(q));
-               if (ret)
+               if (virtio_net_hdr_from_skb(skb, &vnet_hdr,
+                                           macvtap_is_little_endian(q)))
                        BUG();
 
                if (copy_to_iter(&vnet_hdr, sizeof(vnet_hdr), iter) !=
index 993570b1e2aeb68726c268eea5a8332a2b8159f6..6d953c53eed6c86d9c6db39704a374ee5fac1bde 100644 (file)
@@ -134,6 +134,103 @@ int mii_ethtool_gset(struct mii_if_info *mii, struct ethtool_cmd *ecmd)
        return 0;
 }
 
+/**
+ * mii_ethtool_get_link_ksettings - get settings that are specified in @cmd
+ * @mii: MII interface
+ * @cmd: requested ethtool_link_ksettings
+ *
+ * The @cmd parameter is expected to have been cleared before calling
+ * mii_ethtool_get_link_ksettings().
+ *
+ * Returns 0 for success, negative on error.
+ */
+int mii_ethtool_get_link_ksettings(struct mii_if_info *mii,
+                                  struct ethtool_link_ksettings *cmd)
+{
+       struct net_device *dev = mii->dev;
+       u16 bmcr, bmsr, ctrl1000 = 0, stat1000 = 0;
+       u32 nego, supported, advertising, lp_advertising;
+
+       supported = (SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full |
+                    SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full |
+                    SUPPORTED_Autoneg | SUPPORTED_TP | SUPPORTED_MII);
+       if (mii->supports_gmii)
+               supported |= SUPPORTED_1000baseT_Half |
+                       SUPPORTED_1000baseT_Full;
+
+       /* only supports twisted-pair */
+       cmd->base.port = PORT_MII;
+
+       /* this isn't fully supported at higher layers */
+       cmd->base.phy_address = mii->phy_id;
+       cmd->base.mdio_support = ETH_MDIO_SUPPORTS_C22;
+
+       advertising = ADVERTISED_TP | ADVERTISED_MII;
+
+       bmcr = mii->mdio_read(dev, mii->phy_id, MII_BMCR);
+       bmsr = mii->mdio_read(dev, mii->phy_id, MII_BMSR);
+       if (mii->supports_gmii) {
+               ctrl1000 = mii->mdio_read(dev, mii->phy_id, MII_CTRL1000);
+               stat1000 = mii->mdio_read(dev, mii->phy_id, MII_STAT1000);
+       }
+       if (bmcr & BMCR_ANENABLE) {
+               advertising |= ADVERTISED_Autoneg;
+               cmd->base.autoneg = AUTONEG_ENABLE;
+
+               advertising |= mii_get_an(mii, MII_ADVERTISE);
+               if (mii->supports_gmii)
+                       advertising |= mii_ctrl1000_to_ethtool_adv_t(ctrl1000);
+
+               if (bmsr & BMSR_ANEGCOMPLETE) {
+                       lp_advertising = mii_get_an(mii, MII_LPA);
+                       lp_advertising |=
+                                       mii_stat1000_to_ethtool_lpa_t(stat1000);
+               } else {
+                       lp_advertising = 0;
+               }
+
+               nego = advertising & lp_advertising;
+
+               if (nego & (ADVERTISED_1000baseT_Full |
+                           ADVERTISED_1000baseT_Half)) {
+                       cmd->base.speed = SPEED_1000;
+                       cmd->base.duplex = !!(nego & ADVERTISED_1000baseT_Full);
+               } else if (nego & (ADVERTISED_100baseT_Full |
+                                  ADVERTISED_100baseT_Half)) {
+                       cmd->base.speed = SPEED_100;
+                       cmd->base.duplex = !!(nego & ADVERTISED_100baseT_Full);
+               } else {
+                       cmd->base.speed = SPEED_10;
+                       cmd->base.duplex = !!(nego & ADVERTISED_10baseT_Full);
+               }
+       } else {
+               cmd->base.autoneg = AUTONEG_DISABLE;
+
+               cmd->base.speed = ((bmcr & BMCR_SPEED1000 &&
+                                   (bmcr & BMCR_SPEED100) == 0) ?
+                                  SPEED_1000 :
+                                  ((bmcr & BMCR_SPEED100) ?
+                                   SPEED_100 : SPEED_10));
+               cmd->base.duplex = (bmcr & BMCR_FULLDPLX) ?
+                       DUPLEX_FULL : DUPLEX_HALF;
+
+               lp_advertising = 0;
+       }
+
+       mii->full_duplex = cmd->base.duplex;
+
+       ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+                                               supported);
+       ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
+                                               advertising);
+       ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.lp_advertising,
+                                               lp_advertising);
+
+       /* ignore maxtxpkt, maxrxpkt for now */
+
+       return 0;
+}
+
 /**
  * mii_ethtool_sset - set settings that are specified in @ecmd
  * @mii: MII interface
@@ -226,6 +323,104 @@ int mii_ethtool_sset(struct mii_if_info *mii, struct ethtool_cmd *ecmd)
        return 0;
 }
 
+/**
+ * mii_ethtool_set_link_ksettings - set settings that are specified in @cmd
+ * @mii: MII interfaces
+ * @cmd: requested ethtool_link_ksettings
+ *
+ * Returns 0 for success, negative on error.
+ */
+int mii_ethtool_set_link_ksettings(struct mii_if_info *mii,
+                                  const struct ethtool_link_ksettings *cmd)
+{
+       struct net_device *dev = mii->dev;
+       u32 speed = cmd->base.speed;
+
+       if (speed != SPEED_10 &&
+           speed != SPEED_100 &&
+           speed != SPEED_1000)
+               return -EINVAL;
+       if (cmd->base.duplex != DUPLEX_HALF && cmd->base.duplex != DUPLEX_FULL)
+               return -EINVAL;
+       if (cmd->base.port != PORT_MII)
+               return -EINVAL;
+       if (cmd->base.phy_address != mii->phy_id)
+               return -EINVAL;
+       if (cmd->base.autoneg != AUTONEG_DISABLE &&
+           cmd->base.autoneg != AUTONEG_ENABLE)
+               return -EINVAL;
+       if ((speed == SPEED_1000) && (!mii->supports_gmii))
+               return -EINVAL;
+
+       /* ignore supported, maxtxpkt, maxrxpkt */
+
+       if (cmd->base.autoneg == AUTONEG_ENABLE) {
+               u32 bmcr, advert, tmp;
+               u32 advert2 = 0, tmp2 = 0;
+               u32 advertising;
+
+               ethtool_convert_link_mode_to_legacy_u32(
+                       &advertising, cmd->link_modes.advertising);
+
+               if ((advertising & (ADVERTISED_10baseT_Half |
+                                   ADVERTISED_10baseT_Full |
+                                   ADVERTISED_100baseT_Half |
+                                   ADVERTISED_100baseT_Full |
+                                   ADVERTISED_1000baseT_Half |
+                                   ADVERTISED_1000baseT_Full)) == 0)
+                       return -EINVAL;
+
+               /* advertise only what has been requested */
+               advert = mii->mdio_read(dev, mii->phy_id, MII_ADVERTISE);
+               tmp = advert & ~(ADVERTISE_ALL | ADVERTISE_100BASE4);
+               if (mii->supports_gmii) {
+                       advert2 = mii->mdio_read(dev, mii->phy_id,
+                                                MII_CTRL1000);
+                       tmp2 = advert2 &
+                               ~(ADVERTISE_1000HALF | ADVERTISE_1000FULL);
+               }
+               tmp |= ethtool_adv_to_mii_adv_t(advertising);
+
+               if (mii->supports_gmii)
+                       tmp2 |= ethtool_adv_to_mii_ctrl1000_t(advertising);
+               if (advert != tmp) {
+                       mii->mdio_write(dev, mii->phy_id, MII_ADVERTISE, tmp);
+                       mii->advertising = tmp;
+               }
+               if ((mii->supports_gmii) && (advert2 != tmp2))
+                       mii->mdio_write(dev, mii->phy_id, MII_CTRL1000, tmp2);
+
+               /* turn on autonegotiation, and force a renegotiate */
+               bmcr = mii->mdio_read(dev, mii->phy_id, MII_BMCR);
+               bmcr |= (BMCR_ANENABLE | BMCR_ANRESTART);
+               mii->mdio_write(dev, mii->phy_id, MII_BMCR, bmcr);
+
+               mii->force_media = 0;
+       } else {
+               u32 bmcr, tmp;
+
+               /* turn off auto negotiation, set speed and duplexity */
+               bmcr = mii->mdio_read(dev, mii->phy_id, MII_BMCR);
+               tmp = bmcr & ~(BMCR_ANENABLE | BMCR_SPEED100 |
+                              BMCR_SPEED1000 | BMCR_FULLDPLX);
+               if (speed == SPEED_1000)
+                       tmp |= BMCR_SPEED1000;
+               else if (speed == SPEED_100)
+                       tmp |= BMCR_SPEED100;
+               if (cmd->base.duplex == DUPLEX_FULL) {
+                       tmp |= BMCR_FULLDPLX;
+                       mii->full_duplex = 1;
+               } else {
+                       mii->full_duplex = 0;
+               }
+               if (bmcr != tmp)
+                       mii->mdio_write(dev, mii->phy_id, MII_BMCR, tmp);
+
+               mii->force_media = 1;
+       }
+       return 0;
+}
+
 /**
  * mii_check_gmii_support - check if the MII supports Gb interfaces
  * @mii: the MII interface
@@ -466,7 +661,9 @@ MODULE_LICENSE("GPL");
 EXPORT_SYMBOL(mii_link_ok);
 EXPORT_SYMBOL(mii_nway_restart);
 EXPORT_SYMBOL(mii_ethtool_gset);
+EXPORT_SYMBOL(mii_ethtool_get_link_ksettings);
 EXPORT_SYMBOL(mii_ethtool_sset);
+EXPORT_SYMBOL(mii_ethtool_set_link_ksettings);
 EXPORT_SYMBOL(mii_check_link);
 EXPORT_SYMBOL(mii_check_media);
 EXPORT_SYMBOL(mii_check_gmii_support);
index 45f68eaf9b792b2d3ef152ac5cb328a7bddf962d..d361835b315dd6b9ed542a48515e85c8024192d3 100644 (file)
@@ -217,7 +217,7 @@ config BROADCOM_PHY
        select BCM_NET_PHYLIB
        ---help---
          Currently supports the BCM5411, BCM5421, BCM5461, BCM54616S, BCM5464,
-         BCM5481 and BCM5482 PHYs.
+         BCM5481, BCM54810 and BCM5482 PHYs.
 
 config CICADA_PHY
        tristate "Cicada PHYs"
@@ -277,6 +277,11 @@ config MARVELL_PHY
        ---help---
          Currently has a driver for the 88E1011S
 
+config MESON_GXL_PHY
+       tristate "Amlogic Meson GXL Internal PHY"
+       ---help---
+         Currently has a driver for the Amlogic Meson GXL Internal PHY
+
 config MICREL_PHY
        tristate "Micrel PHYs"
        ---help---
@@ -290,7 +295,7 @@ config MICROCHIP_PHY
 config MICROSEMI_PHY
        tristate "Microsemi PHYs"
        ---help---
-         Currently supports the VSC8531 and VSC8541 PHYs
+         Currently supports VSC8530, VSC8531, VSC8540 and VSC8541 PHYs
 
 config NATIONAL_PHY
        tristate "National Semiconductor PHYs"
index 86d12cd3fbf0912297f64674d5c1309e6339a78c..356859ac7c18be8e41da7b40aad9a71832a8a1e9 100644 (file)
@@ -42,6 +42,7 @@ obj-$(CONFIG_INTEL_XWAY_PHY)  += intel-xway.o
 obj-$(CONFIG_LSI_ET1011C_PHY)  += et1011c.o
 obj-$(CONFIG_LXT_PHY)          += lxt.o
 obj-$(CONFIG_MARVELL_PHY)      += marvell.o
+obj-$(CONFIG_MESON_GXL_PHY)    += meson-gxl.o
 obj-$(CONFIG_MICREL_KS8995MA)  += spi_ks8995.o
 obj-$(CONFIG_MICREL_PHY)       += micrel.o
 obj-$(CONFIG_MICROCHIP_PHY)    += microchip.o
index f279a897a5c7fe0e875fb8b058f4c00ae3059f62..c1e52b9dc58d39f02f279b46f400fa811efe3866 100644 (file)
 #define AT803X_MMD_ACCESS_CONTROL              0x0D
 #define AT803X_MMD_ACCESS_CONTROL_DATA         0x0E
 #define AT803X_FUNC_DATA                       0x4003
+#define AT803X_REG_CHIP_CONFIG                 0x1f
+#define AT803X_BT_BX_REG_SEL                   0x8000
 
 #define AT803X_DEBUG_ADDR                      0x1D
 #define AT803X_DEBUG_DATA                      0x1E
 
+#define AT803X_MODE_CFG_MASK                   0x0F
+#define AT803X_MODE_CFG_SGMII                  0x01
+
+#define AT803X_PSSR                    0x11    /*PHY-Specific Status Register*/
+#define AT803X_PSSR_MR_AN_COMPLETE     0x0200
+
 #define AT803X_DEBUG_REG_0                     0x00
 #define AT803X_DEBUG_RX_CLK_DLY_EN             BIT(15)
 
 #define AT803X_DEBUG_REG_5                     0x05
 #define AT803X_DEBUG_TX_CLK_DLY_EN             BIT(8)
 
-#define AT803X_REG_CHIP_CONFIG                 0x1f
-#define AT803X_BT_BX_REG_SEL                   0x8000
-
 #define ATH8030_PHY_ID 0x004dd076
 #define ATH8031_PHY_ID 0x004dd074
 #define ATH8035_PHY_ID 0x004dd072
+#define AT803X_PHY_ID_MASK                     0xffffffef
 
 MODULE_DESCRIPTION("Atheros 803x PHY driver");
 MODULE_AUTHOR("Matus Ujhelyi");
@@ -209,7 +215,6 @@ static int at803x_suspend(struct phy_device *phydev)
 {
        int value;
        int wol_enabled;
-       int ccr;
 
        mutex_lock(&phydev->lock);
 
@@ -225,16 +230,6 @@ static int at803x_suspend(struct phy_device *phydev)
 
        phy_write(phydev, MII_BMCR, value);
 
-       if (phydev->interface != PHY_INTERFACE_MODE_SGMII)
-               goto done;
-
-       /* also power-down SGMII interface */
-       ccr = phy_read(phydev, AT803X_REG_CHIP_CONFIG);
-       phy_write(phydev, AT803X_REG_CHIP_CONFIG, ccr & ~AT803X_BT_BX_REG_SEL);
-       phy_write(phydev, MII_BMCR, phy_read(phydev, MII_BMCR) | BMCR_PDOWN);
-       phy_write(phydev, AT803X_REG_CHIP_CONFIG, ccr | AT803X_BT_BX_REG_SEL);
-
-done:
        mutex_unlock(&phydev->lock);
 
        return 0;
@@ -243,7 +238,6 @@ done:
 static int at803x_resume(struct phy_device *phydev)
 {
        int value;
-       int ccr;
 
        mutex_lock(&phydev->lock);
 
@@ -251,17 +245,6 @@ static int at803x_resume(struct phy_device *phydev)
        value &= ~(BMCR_PDOWN | BMCR_ISOLATE);
        phy_write(phydev, MII_BMCR, value);
 
-       if (phydev->interface != PHY_INTERFACE_MODE_SGMII)
-               goto done;
-
-       /* also power-up SGMII interface */
-       ccr = phy_read(phydev, AT803X_REG_CHIP_CONFIG);
-       phy_write(phydev, AT803X_REG_CHIP_CONFIG, ccr & ~AT803X_BT_BX_REG_SEL);
-       value = phy_read(phydev, MII_BMCR) & ~(BMCR_PDOWN | BMCR_ISOLATE);
-       phy_write(phydev, MII_BMCR, value);
-       phy_write(phydev, AT803X_REG_CHIP_CONFIG, ccr | AT803X_BT_BX_REG_SEL);
-
-done:
        mutex_unlock(&phydev->lock);
 
        return 0;
@@ -381,12 +364,42 @@ static void at803x_link_change_notify(struct phy_device *phydev)
        }
 }
 
+static int at803x_aneg_done(struct phy_device *phydev)
+{
+       int ccr;
+
+       int aneg_done = genphy_aneg_done(phydev);
+       if (aneg_done != BMSR_ANEGCOMPLETE)
+               return aneg_done;
+
+       /*
+        * in SGMII mode, if copper side autoneg is successful,
+        * also check SGMII side autoneg result
+        */
+       ccr = phy_read(phydev, AT803X_REG_CHIP_CONFIG);
+       if ((ccr & AT803X_MODE_CFG_MASK) != AT803X_MODE_CFG_SGMII)
+               return aneg_done;
+
+       /* switch to SGMII/fiber page */
+       phy_write(phydev, AT803X_REG_CHIP_CONFIG, ccr & ~AT803X_BT_BX_REG_SEL);
+
+       /* check if the SGMII link is OK. */
+       if (!(phy_read(phydev, AT803X_PSSR) & AT803X_PSSR_MR_AN_COMPLETE)) {
+               pr_warn("803x_aneg_done: SGMII link is not ok\n");
+               aneg_done = 0;
+       }
+       /* switch back to copper page */
+       phy_write(phydev, AT803X_REG_CHIP_CONFIG, ccr | AT803X_BT_BX_REG_SEL);
+
+       return aneg_done;
+}
+
 static struct phy_driver at803x_driver[] = {
 {
        /* ATHEROS 8035 */
        .phy_id                 = ATH8035_PHY_ID,
        .name                   = "Atheros 8035 ethernet",
-       .phy_id_mask            = 0xffffffef,
+       .phy_id_mask            = AT803X_PHY_ID_MASK,
        .probe                  = at803x_probe,
        .config_init            = at803x_config_init,
        .set_wol                = at803x_set_wol,
@@ -403,7 +416,7 @@ static struct phy_driver at803x_driver[] = {
        /* ATHEROS 8030 */
        .phy_id                 = ATH8030_PHY_ID,
        .name                   = "Atheros 8030 ethernet",
-       .phy_id_mask            = 0xffffffef,
+       .phy_id_mask            = AT803X_PHY_ID_MASK,
        .probe                  = at803x_probe,
        .config_init            = at803x_config_init,
        .link_change_notify     = at803x_link_change_notify,
@@ -421,7 +434,7 @@ static struct phy_driver at803x_driver[] = {
        /* ATHEROS 8031 */
        .phy_id                 = ATH8031_PHY_ID,
        .name                   = "Atheros 8031 ethernet",
-       .phy_id_mask            = 0xffffffef,
+       .phy_id_mask            = AT803X_PHY_ID_MASK,
        .probe                  = at803x_probe,
        .config_init            = at803x_config_init,
        .set_wol                = at803x_set_wol,
@@ -432,6 +445,7 @@ static struct phy_driver at803x_driver[] = {
        .flags                  = PHY_HAS_INTERRUPT,
        .config_aneg            = genphy_config_aneg,
        .read_status            = genphy_read_status,
+       .aneg_done              = at803x_aneg_done,
        .ack_interrupt          = &at803x_ack_interrupt,
        .config_intr            = &at803x_config_intr,
 } };
@@ -439,9 +453,9 @@ static struct phy_driver at803x_driver[] = {
 module_phy_driver(at803x_driver);
 
 static struct mdio_device_id __maybe_unused atheros_tbl[] = {
-       { ATH8030_PHY_ID, 0xffffffef },
-       { ATH8031_PHY_ID, 0xffffffef },
-       { ATH8035_PHY_ID, 0xffffffef },
+       { ATH8030_PHY_ID, AT803X_PHY_ID_MASK },
+       { ATH8031_PHY_ID, AT803X_PHY_ID_MASK },
+       { ATH8035_PHY_ID, AT803X_PHY_ID_MASK },
        { }
 };
 
index 49bbc682688356ae89b8ffcc629f017196f4c5ae..196400cddf687f6722867bc93cd214a258be8119 100644 (file)
@@ -104,7 +104,7 @@ static int bcm_cygnus_config_init(struct phy_device *phydev)
                return rc;
 
        /* Advertise EEE */
-       rc = bcm_phy_enable_eee(phydev);
+       rc = bcm_phy_set_eee(phydev, true);
        if (rc)
                return rc;
 
index df0416db0b88fce5899cae9cc9f3e9df5c7c624a..ab9ad689617c78d19b21ad8f35b5f1887c483b1c 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/mdio.h>
 #include <linux/module.h>
 #include <linux/phy.h>
+#include <linux/ethtool.h>
 
 #define MII_BCM_CHANNEL_WIDTH     0x2000
 #define BCM_CL45VEN_EEE_ADV       0x3c
@@ -50,6 +51,23 @@ int bcm_phy_read_exp(struct phy_device *phydev, u16 reg)
 }
 EXPORT_SYMBOL_GPL(bcm_phy_read_exp);
 
+int bcm54xx_auxctl_read(struct phy_device *phydev, u16 regnum)
+{
+       /* The register must be written to both the Shadow Register Select and
+        * the Shadow Read Register Selector
+        */
+       phy_write(phydev, MII_BCM54XX_AUX_CTL, regnum |
+                 regnum << MII_BCM54XX_AUXCTL_SHDWSEL_READ_SHIFT);
+       return phy_read(phydev, MII_BCM54XX_AUX_CTL);
+}
+EXPORT_SYMBOL_GPL(bcm54xx_auxctl_read);
+
+int bcm54xx_auxctl_write(struct phy_device *phydev, u16 regnum, u16 val)
+{
+       return phy_write(phydev, MII_BCM54XX_AUX_CTL, regnum | val);
+}
+EXPORT_SYMBOL(bcm54xx_auxctl_write);
+
 int bcm_phy_write_misc(struct phy_device *phydev,
                       u16 reg, u16 chl, u16 val)
 {
@@ -178,7 +196,7 @@ int bcm_phy_enable_apd(struct phy_device *phydev, bool dll_pwr_down)
 }
 EXPORT_SYMBOL_GPL(bcm_phy_enable_apd);
 
-int bcm_phy_enable_eee(struct phy_device *phydev)
+int bcm_phy_set_eee(struct phy_device *phydev, bool enable)
 {
        int val;
 
@@ -188,7 +206,10 @@ int bcm_phy_enable_eee(struct phy_device *phydev)
        if (val < 0)
                return val;
 
-       val |= LPI_FEATURE_EN | LPI_FEATURE_EN_DIG1000X;
+       if (enable)
+               val |= LPI_FEATURE_EN | LPI_FEATURE_EN_DIG1000X;
+       else
+               val &= ~(LPI_FEATURE_EN | LPI_FEATURE_EN_DIG1000X);
 
        phy_write_mmd_indirect(phydev, BRCM_CL45VEN_EEE_CONTROL,
                               MDIO_MMD_AN, (u32)val);
@@ -199,14 +220,172 @@ int bcm_phy_enable_eee(struct phy_device *phydev)
        if (val < 0)
                return val;
 
-       val |= (MDIO_AN_EEE_ADV_100TX | MDIO_AN_EEE_ADV_1000T);
+       if (enable)
+               val |= (MDIO_AN_EEE_ADV_100TX | MDIO_AN_EEE_ADV_1000T);
+       else
+               val &= ~(MDIO_AN_EEE_ADV_100TX | MDIO_AN_EEE_ADV_1000T);
 
        phy_write_mmd_indirect(phydev, BCM_CL45VEN_EEE_ADV,
                               MDIO_MMD_AN, (u32)val);
 
        return 0;
 }
-EXPORT_SYMBOL_GPL(bcm_phy_enable_eee);
+EXPORT_SYMBOL_GPL(bcm_phy_set_eee);
+
+int bcm_phy_downshift_get(struct phy_device *phydev, u8 *count)
+{
+       int val;
+
+       val = bcm54xx_auxctl_read(phydev, MII_BCM54XX_AUXCTL_SHDWSEL_MISC);
+       if (val < 0)
+               return val;
+
+       /* Check if wirespeed is enabled or not */
+       if (!(val & MII_BCM54XX_AUXCTL_SHDWSEL_MISC_WIRESPEED_EN)) {
+               *count = DOWNSHIFT_DEV_DISABLE;
+               return 0;
+       }
+
+       val = bcm_phy_read_shadow(phydev, BCM54XX_SHD_SCR2);
+       if (val < 0)
+               return val;
+
+       /* Downgrade after one link attempt */
+       if (val & BCM54XX_SHD_SCR2_WSPD_RTRY_DIS) {
+               *count = 1;
+       } else {
+               /* Downgrade after configured retry count */
+               val >>= BCM54XX_SHD_SCR2_WSPD_RTRY_LMT_SHIFT;
+               val &= BCM54XX_SHD_SCR2_WSPD_RTRY_LMT_MASK;
+               *count = val + BCM54XX_SHD_SCR2_WSPD_RTRY_LMT_OFFSET;
+       }
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(bcm_phy_downshift_get);
+
+int bcm_phy_downshift_set(struct phy_device *phydev, u8 count)
+{
+       int val = 0, ret = 0;
+
+       /* Range check the number given */
+       if (count - BCM54XX_SHD_SCR2_WSPD_RTRY_LMT_OFFSET >
+           BCM54XX_SHD_SCR2_WSPD_RTRY_LMT_MASK &&
+           count != DOWNSHIFT_DEV_DEFAULT_COUNT) {
+               return -ERANGE;
+       }
+
+       val = bcm54xx_auxctl_read(phydev, MII_BCM54XX_AUXCTL_SHDWSEL_MISC);
+       if (val < 0)
+               return val;
+
+       /* Se the write enable bit */
+       val |= MII_BCM54XX_AUXCTL_MISC_WREN;
+
+       if (count == DOWNSHIFT_DEV_DISABLE) {
+               val &= ~MII_BCM54XX_AUXCTL_SHDWSEL_MISC_WIRESPEED_EN;
+               return bcm54xx_auxctl_write(phydev,
+                                           MII_BCM54XX_AUXCTL_SHDWSEL_MISC,
+                                           val);
+       } else {
+               val |= MII_BCM54XX_AUXCTL_SHDWSEL_MISC_WIRESPEED_EN;
+               ret = bcm54xx_auxctl_write(phydev,
+                                          MII_BCM54XX_AUXCTL_SHDWSEL_MISC,
+                                          val);
+               if (ret < 0)
+                       return ret;
+       }
+
+       val = bcm_phy_read_shadow(phydev, BCM54XX_SHD_SCR2);
+       val &= ~(BCM54XX_SHD_SCR2_WSPD_RTRY_LMT_MASK <<
+                BCM54XX_SHD_SCR2_WSPD_RTRY_LMT_SHIFT |
+                BCM54XX_SHD_SCR2_WSPD_RTRY_DIS);
+
+       switch (count) {
+       case 1:
+               val |= BCM54XX_SHD_SCR2_WSPD_RTRY_DIS;
+               break;
+       case DOWNSHIFT_DEV_DEFAULT_COUNT:
+               val |= 1 << BCM54XX_SHD_SCR2_WSPD_RTRY_LMT_SHIFT;
+               break;
+       default:
+               val |= (count - BCM54XX_SHD_SCR2_WSPD_RTRY_LMT_OFFSET) <<
+                       BCM54XX_SHD_SCR2_WSPD_RTRY_LMT_SHIFT;
+               break;
+       }
+
+       return bcm_phy_write_shadow(phydev, BCM54XX_SHD_SCR2, val);
+}
+EXPORT_SYMBOL_GPL(bcm_phy_downshift_set);
+
+struct bcm_phy_hw_stat {
+       const char *string;
+       u8 reg;
+       u8 shift;
+       u8 bits;
+};
+
+/* Counters freeze at either 0xffff or 0xff, better than nothing */
+static const struct bcm_phy_hw_stat bcm_phy_hw_stats[] = {
+       { "phy_receive_errors", MII_BRCM_CORE_BASE12, 0, 16 },
+       { "phy_serdes_ber_errors", MII_BRCM_CORE_BASE13, 8, 8 },
+       { "phy_false_carrier_sense_errors", MII_BRCM_CORE_BASE13, 0, 8 },
+       { "phy_local_rcvr_nok", MII_BRCM_CORE_BASE14, 8, 8 },
+       { "phy_remote_rcv_nok", MII_BRCM_CORE_BASE14, 0, 8 },
+};
+
+int bcm_phy_get_sset_count(struct phy_device *phydev)
+{
+       return ARRAY_SIZE(bcm_phy_hw_stats);
+}
+EXPORT_SYMBOL_GPL(bcm_phy_get_sset_count);
+
+void bcm_phy_get_strings(struct phy_device *phydev, u8 *data)
+{
+       unsigned int i;
+
+       for (i = 0; i < ARRAY_SIZE(bcm_phy_hw_stats); i++)
+               memcpy(data + i * ETH_GSTRING_LEN,
+                      bcm_phy_hw_stats[i].string, ETH_GSTRING_LEN);
+}
+EXPORT_SYMBOL_GPL(bcm_phy_get_strings);
+
+#ifndef UINT64_MAX
+#define UINT64_MAX              (u64)(~((u64)0))
+#endif
+
+/* Caller is supposed to provide appropriate storage for the library code to
+ * access the shadow copy
+ */
+static u64 bcm_phy_get_stat(struct phy_device *phydev, u64 *shadow,
+                           unsigned int i)
+{
+       struct bcm_phy_hw_stat stat = bcm_phy_hw_stats[i];
+       int val;
+       u64 ret;
+
+       val = phy_read(phydev, stat.reg);
+       if (val < 0) {
+               ret = UINT64_MAX;
+       } else {
+               val >>= stat.shift;
+               val = val & ((1 << stat.bits) - 1);
+               shadow[i] += val;
+               ret = shadow[i];
+       }
+
+       return ret;
+}
+
+void bcm_phy_get_stats(struct phy_device *phydev, u64 *shadow,
+                      struct ethtool_stats *stats, u64 *data)
+{
+       unsigned int i;
+
+       for (i = 0; i < ARRAY_SIZE(bcm_phy_hw_stats); i++)
+               data[i] = bcm_phy_get_stat(phydev, shadow, i);
+}
+EXPORT_SYMBOL_GPL(bcm_phy_get_stats);
 
 MODULE_DESCRIPTION("Broadcom PHY Library");
 MODULE_LICENSE("GPL v2");
index b2091c88b44dbbef9a4d1623a02b8b4add5f4f72..7c73808cbbded22bb01b60ef2b616456267f99cc 100644 (file)
@@ -19,6 +19,9 @@
 int bcm_phy_write_exp(struct phy_device *phydev, u16 reg, u16 val);
 int bcm_phy_read_exp(struct phy_device *phydev, u16 reg);
 
+int bcm54xx_auxctl_write(struct phy_device *phydev, u16 regnum, u16 val);
+int bcm54xx_auxctl_read(struct phy_device *phydev, u16 regnum);
+
 int bcm_phy_write_misc(struct phy_device *phydev,
                       u16 reg, u16 chl, u16 value);
 int bcm_phy_read_misc(struct phy_device *phydev,
@@ -33,5 +36,15 @@ int bcm_phy_config_intr(struct phy_device *phydev);
 
 int bcm_phy_enable_apd(struct phy_device *phydev, bool dll_pwr_down);
 
-int bcm_phy_enable_eee(struct phy_device *phydev);
+int bcm_phy_set_eee(struct phy_device *phydev, bool enable);
+
+int bcm_phy_downshift_get(struct phy_device *phydev, u8 *count);
+
+int bcm_phy_downshift_set(struct phy_device *phydev, u8 count);
+
+int bcm_phy_get_sset_count(struct phy_device *phydev);
+void bcm_phy_get_strings(struct phy_device *phydev, u8 *data);
+void bcm_phy_get_stats(struct phy_device *phydev, u64 *shadow,
+                      struct ethtool_stats *stats, u64 *data);
+
 #endif /* _LINUX_BCM_PHY_LIB_H */
index 9636da0b6efc449907025b52e5e9cefaa51df198..aae00bde59802ac956e228f4e71c5604b018e620 100644 (file)
 #define AFE_VDAC_OTHERS_0              MISC_ADDR(0x39, 3)
 #define AFE_HPF_TRIM_OTHERS            MISC_ADDR(0x3a, 0)
 
+struct bcm7xxx_phy_priv {
+       u64     *stats;
+};
+
 static void r_rc_cal_reset(struct phy_device *phydev)
 {
        /* Reset R_CAL/RC_CAL Engine */
@@ -167,6 +171,7 @@ static int bcm7xxx_28nm_config_init(struct phy_device *phydev)
 {
        u8 rev = PHY_BRCM_7XXX_REV(phydev->dev_flags);
        u8 patch = PHY_BRCM_7XXX_PATCH(phydev->dev_flags);
+       u8 count;
        int ret = 0;
 
        pr_info_once("%s: %s PHY revision: 0x%02x, patch: %d\n",
@@ -199,7 +204,12 @@ static int bcm7xxx_28nm_config_init(struct phy_device *phydev)
        if (ret)
                return ret;
 
-       ret = bcm_phy_enable_eee(phydev);
+       ret = bcm_phy_downshift_get(phydev, &count);
+       if (ret)
+               return ret;
+
+       /* Only enable EEE if Wirespeed/downshift is disabled */
+       ret = bcm_phy_set_eee(phydev, count == DOWNSHIFT_DEV_DISABLE);
        if (ret)
                return ret;
 
@@ -303,6 +313,74 @@ static int bcm7xxx_suspend(struct phy_device *phydev)
        return 0;
 }
 
+static int bcm7xxx_28nm_get_tunable(struct phy_device *phydev,
+                                   struct ethtool_tunable *tuna,
+                                   void *data)
+{
+       switch (tuna->id) {
+       case ETHTOOL_PHY_DOWNSHIFT:
+               return bcm_phy_downshift_get(phydev, (u8 *)data);
+       default:
+               return -EOPNOTSUPP;
+       }
+}
+
+static int bcm7xxx_28nm_set_tunable(struct phy_device *phydev,
+                                   struct ethtool_tunable *tuna,
+                                   const void *data)
+{
+       u8 count = *(u8 *)data;
+       int ret;
+
+       switch (tuna->id) {
+       case ETHTOOL_PHY_DOWNSHIFT:
+               ret = bcm_phy_downshift_set(phydev, count);
+               break;
+       default:
+               return -EOPNOTSUPP;
+       }
+
+       if (ret)
+               return ret;
+
+       /* Disable EEE advertisment since this prevents the PHY
+        * from successfully linking up, trigger auto-negotiation restart
+        * to let the MAC decide what to do.
+        */
+       ret = bcm_phy_set_eee(phydev, count == DOWNSHIFT_DEV_DISABLE);
+       if (ret)
+               return ret;
+
+       return genphy_restart_aneg(phydev);
+}
+
+static void bcm7xxx_28nm_get_phy_stats(struct phy_device *phydev,
+                                      struct ethtool_stats *stats, u64 *data)
+{
+       struct bcm7xxx_phy_priv *priv = phydev->priv;
+
+       bcm_phy_get_stats(phydev, priv->stats, stats, data);
+}
+
+static int bcm7xxx_28nm_probe(struct phy_device *phydev)
+{
+       struct bcm7xxx_phy_priv *priv;
+
+       priv = devm_kzalloc(&phydev->mdio.dev, sizeof(*priv), GFP_KERNEL);
+       if (!priv)
+               return -ENOMEM;
+
+       phydev->priv = priv;
+
+       priv->stats = devm_kcalloc(&phydev->mdio.dev,
+                                  bcm_phy_get_sset_count(phydev), sizeof(u64),
+                                  GFP_KERNEL);
+       if (!priv->stats)
+               return -ENOMEM;
+
+       return 0;
+}
+
 #define BCM7XXX_28NM_GPHY(_oui, _name)                                 \
 {                                                                      \
        .phy_id         = (_oui),                                       \
@@ -315,6 +393,12 @@ static int bcm7xxx_suspend(struct phy_device *phydev)
        .config_aneg    = genphy_config_aneg,                           \
        .read_status    = genphy_read_status,                           \
        .resume         = bcm7xxx_28nm_resume,                          \
+       .get_tunable    = bcm7xxx_28nm_get_tunable,                     \
+       .set_tunable    = bcm7xxx_28nm_set_tunable,                     \
+       .get_sset_count = bcm_phy_get_sset_count,                       \
+       .get_strings    = bcm_phy_get_strings,                          \
+       .get_stats      = bcm7xxx_28nm_get_phy_stats,                   \
+       .probe          = bcm7xxx_28nm_probe,                           \
 }
 
 #define BCM7XXX_40NM_EPHY(_oui, _name)                                 \
index 583ef8a2ec8d3af2179bc3d83de7ac0dc38b481e..409b365f12b1e3421a9889c3dd113bd75283cd88 100644 (file)
@@ -18,7 +18,7 @@
 #include <linux/module.h>
 #include <linux/phy.h>
 #include <linux/brcmphy.h>
-
+#include <linux/of.h>
 
 #define BRCM_PHY_MODEL(phydev) \
        ((phydev)->drv->phy_id & (phydev)->drv->phy_id_mask)
@@ -30,9 +30,32 @@ MODULE_DESCRIPTION("Broadcom PHY driver");
 MODULE_AUTHOR("Maciej W. Rozycki");
 MODULE_LICENSE("GPL");
 
-static int bcm54xx_auxctl_write(struct phy_device *phydev, u16 regnum, u16 val)
+static int bcm54810_config(struct phy_device *phydev)
 {
-       return phy_write(phydev, MII_BCM54XX_AUX_CTL, regnum | val);
+       int rc, val;
+
+       val = bcm_phy_read_exp(phydev, BCM54810_EXP_BROADREACH_LRE_MISC_CTL);
+       val &= ~BCM54810_EXP_BROADREACH_LRE_MISC_CTL_EN;
+       rc = bcm_phy_write_exp(phydev, BCM54810_EXP_BROADREACH_LRE_MISC_CTL,
+                              val);
+       if (rc < 0)
+               return rc;
+
+       val = bcm54xx_auxctl_read(phydev, MII_BCM54XX_AUXCTL_SHDWSEL_MISC);
+       val &= ~MII_BCM54XX_AUXCTL_SHDWSEL_MISC_RGMII_SKEW_EN;
+       val |= MII_BCM54XX_AUXCTL_MISC_WREN;
+       rc = bcm54xx_auxctl_write(phydev, MII_BCM54XX_AUXCTL_SHDWSEL_MISC,
+                                 val);
+       if (rc < 0)
+               return rc;
+
+       val = bcm_phy_read_shadow(phydev, BCM54810_SHD_CLK_CTL);
+       val &= ~BCM54810_SHD_CLK_CTL_GTXCLK_EN;
+       rc = bcm_phy_write_shadow(phydev, BCM54810_SHD_CLK_CTL, val);
+       if (rc < 0)
+               return rc;
+
+       return 0;
 }
 
 /* Needs SMDSP clock enabled via bcm54xx_phydsp_config() */
@@ -207,6 +230,12 @@ static int bcm54xx_config_init(struct phy_device *phydev)
            (phydev->dev_flags & PHY_BRCM_AUTO_PWRDWN_ENABLE))
                bcm54xx_adjust_rxrefclk(phydev);
 
+       if (BRCM_PHY_MODEL(phydev) == PHY_ID_BCM54810) {
+               err = bcm54810_config(phydev);
+               if (err)
+                       return err;
+       }
+
        bcm54xx_phydsp_config(phydev);
 
        return 0;
@@ -304,6 +333,7 @@ static int bcm5482_read_status(struct phy_device *phydev)
 
 static int bcm5481_config_aneg(struct phy_device *phydev)
 {
+       struct device_node *np = phydev->mdio.dev.of_node;
        int ret;
 
        /* Aneg firsly. */
@@ -334,6 +364,14 @@ static int bcm5481_config_aneg(struct phy_device *phydev)
                phy_write(phydev, 0x18, reg);
        }
 
+       if (of_property_read_bool(np, "enet-phy-lane-swap")) {
+               /* Lane Swap - Undocumented register...magic! */
+               ret = bcm_phy_write_exp(phydev, MII_BCM54XX_EXP_SEL_ER + 0x9,
+                                       0x11B);
+               if (ret < 0)
+                       return ret;
+       }
+
        return ret;
 }
 
@@ -567,6 +605,18 @@ static struct phy_driver broadcom_drivers[] = {
        .read_status    = genphy_read_status,
        .ack_interrupt  = bcm_phy_ack_intr,
        .config_intr    = bcm_phy_config_intr,
+}, {
+       .phy_id         = PHY_ID_BCM54810,
+       .phy_id_mask    = 0xfffffff0,
+       .name           = "Broadcom BCM54810",
+       .features       = PHY_GBIT_FEATURES |
+                         SUPPORTED_Pause | SUPPORTED_Asym_Pause,
+       .flags          = PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
+       .config_init    = bcm54xx_config_init,
+       .config_aneg    = bcm5481_config_aneg,
+       .read_status    = genphy_read_status,
+       .ack_interrupt  = bcm_phy_ack_intr,
+       .config_intr    = bcm_phy_config_intr,
 }, {
        .phy_id         = PHY_ID_BCM5482,
        .phy_id_mask    = 0xfffffff0,
@@ -651,6 +701,7 @@ static struct mdio_device_id __maybe_unused broadcom_tbl[] = {
        { PHY_ID_BCM54616S, 0xfffffff0 },
        { PHY_ID_BCM5464, 0xfffffff0 },
        { PHY_ID_BCM5481, 0xfffffff0 },
+       { PHY_ID_BCM54810, 0xfffffff0 },
        { PHY_ID_BCM5482, 0xfffffff0 },
        { PHY_ID_BCM50610, 0xfffffff0 },
        { PHY_ID_BCM50610M, 0xfffffff0 },
index 7a240fce3a7ea09edc9f10c6b93fadd94b2de3f5..e2460a57e4b1105ed398e207aa8cdfd84d03707d 100644 (file)
@@ -375,7 +375,7 @@ static int periodic_output(struct dp83640_clock *clock,
 
 /* ptp clock methods */
 
-static int ptp_dp83640_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
+static int ptp_dp83640_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
 {
        struct dp83640_clock *clock =
                container_of(ptp, struct dp83640_clock, caps);
@@ -384,13 +384,13 @@ static int ptp_dp83640_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
        int neg_adj = 0;
        u16 hi, lo;
 
-       if (ppb < 0) {
+       if (scaled_ppm < 0) {
                neg_adj = 1;
-               ppb = -ppb;
+               scaled_ppm = -scaled_ppm;
        }
-       rate = ppb;
-       rate <<= 26;
-       rate = div_u64(rate, 1953125);
+       rate = scaled_ppm;
+       rate <<= 13;
+       rate = div_u64(rate, 15625);
 
        hi = (rate >> 16) & PTP_RATE_HI_MASK;
        if (neg_adj)
@@ -1035,7 +1035,7 @@ static void dp83640_clock_init(struct dp83640_clock *clock, struct mii_bus *bus)
        clock->caps.n_per_out   = N_PER_OUT;
        clock->caps.n_pins      = DP83640_N_PINS;
        clock->caps.pps         = 0;
-       clock->caps.adjfreq     = ptp_dp83640_adjfreq;
+       clock->caps.adjfine     = ptp_dp83640_adjfine;
        clock->caps.adjtime     = ptp_dp83640_adjtime;
        clock->caps.gettime64   = ptp_dp83640_gettime;
        clock->caps.settime64   = ptp_dp83640_settime;
index 03d54c4adc881fc2d65de40a399096687c5b4444..800b39f0627943343c4276de637b30be4692352f 100644 (file)
@@ -19,6 +19,7 @@
 #define TI_DP83848C_PHY_ID             0x20005ca0
 #define NS_DP83848C_PHY_ID             0x20005c90
 #define TLK10X_PHY_ID                  0x2000a210
+#define TI_DP83822_PHY_ID              0x2000a240
 
 /* Registers */
 #define DP83848_MICR                   0x11 /* MII Interrupt Control Register */
@@ -77,6 +78,7 @@ static struct mdio_device_id __maybe_unused dp83848_tbl[] = {
        { TI_DP83848C_PHY_ID, 0xfffffff0 },
        { NS_DP83848C_PHY_ID, 0xfffffff0 },
        { TLK10X_PHY_ID, 0xfffffff0 },
+       { TI_DP83822_PHY_ID, 0xfffffff0 },
        { }
 };
 MODULE_DEVICE_TABLE(mdio, dp83848_tbl);
@@ -105,6 +107,7 @@ static struct phy_driver dp83848_driver[] = {
        DP83848_PHY_DRIVER(TI_DP83848C_PHY_ID, "TI DP83848C 10/100 Mbps PHY"),
        DP83848_PHY_DRIVER(NS_DP83848C_PHY_ID, "NS DP83848C 10/100 Mbps PHY"),
        DP83848_PHY_DRIVER(TLK10X_PHY_ID, "TI TLK10X 10/100 Mbps PHY"),
+       DP83848_PHY_DRIVER(TI_DP83822_PHY_ID, "TI DP83822 10/100 Mbps PHY"),
 };
 module_phy_driver(dp83848_driver);
 
index c649c101bbaba97ec44255b9ee1e7a878c26ada5..eb51672106811e35ee0a7b3f0578560e16bc3670 100644 (file)
@@ -279,7 +279,7 @@ EXPORT_SYMBOL_GPL(fixed_phy_register);
 void fixed_phy_unregister(struct phy_device *phy)
 {
        phy_device_remove(phy);
-
+       of_node_put(phy->mdio.dev.of_node);
        fixed_phy_del(phy->mdio.addr);
 }
 EXPORT_SYMBOL_GPL(fixed_phy_unregister);
index c2dcf02df2021616fe7657447699179b7fb7f7c4..e269262471a44fdcbfc9ef3c5833bf3ceb78624e 100644 (file)
@@ -268,7 +268,7 @@ static int marvell_config_aneg(struct phy_device *phydev)
        if (err < 0)
                return err;
 
-       err = marvell_set_polarity(phydev, phydev->mdix);
+       err = marvell_set_polarity(phydev, phydev->mdix_ctrl);
        if (err < 0)
                return err;
 
@@ -311,7 +311,7 @@ static int m88e1111_config_aneg(struct phy_device *phydev)
         */
        err = phy_write(phydev, MII_BMCR, BMCR_RESET);
 
-       err = marvell_set_polarity(phydev, phydev->mdix);
+       err = marvell_set_polarity(phydev, phydev->mdix_ctrl);
        if (err < 0)
                return err;
 
@@ -361,7 +361,7 @@ static int m88e1111_config_aneg(struct phy_device *phydev)
 static int marvell_of_reg_init(struct phy_device *phydev)
 {
        const __be32 *paddr;
-       int len, i, saved_page, current_page, page_changed, ret;
+       int len, i, saved_page, current_page, ret;
 
        if (!phydev->mdio.dev.of_node)
                return 0;
@@ -374,7 +374,6 @@ static int marvell_of_reg_init(struct phy_device *phydev)
        saved_page = phy_read(phydev, MII_MARVELL_PHY_PAGE);
        if (saved_page < 0)
                return saved_page;
-       page_changed = 0;
        current_page = saved_page;
 
        ret = 0;
@@ -388,7 +387,6 @@ static int marvell_of_reg_init(struct phy_device *phydev)
 
                if (reg_page != current_page) {
                        current_page = reg_page;
-                       page_changed = 1;
                        ret = phy_write(phydev, MII_MARVELL_PHY_PAGE, reg_page);
                        if (ret < 0)
                                goto err;
@@ -411,7 +409,7 @@ static int marvell_of_reg_init(struct phy_device *phydev)
 
        }
 err:
-       if (page_changed) {
+       if (current_page != saved_page) {
                i = phy_write(phydev, MII_MARVELL_PHY_PAGE, saved_page);
                if (ret == 0)
                        ret = i;
index d0bed52c8d1617237df3cd6e31b0af2711503b5c..6a33646bdf05d5af5455ea2b2862e5bd22a70008 100644 (file)
@@ -21,7 +21,8 @@
 struct mdio_mux_mmioreg_state {
        void *mux_handle;
        phys_addr_t phys;
-       uint8_t mask;
+       unsigned int iosize;
+       unsigned int mask;
 };
 
 /*
@@ -47,17 +48,47 @@ static int mdio_mux_mmioreg_switch_fn(int current_child, int desired_child,
        struct mdio_mux_mmioreg_state *s = data;
 
        if (current_child ^ desired_child) {
-               void __iomem *p = ioremap(s->phys, 1);
-               uint8_t x, y;
-
+               void __iomem *p = ioremap(s->phys, s->iosize);
                if (!p)
                        return -ENOMEM;
 
-               x = ioread8(p);
-               y = (x & ~s->mask) | desired_child;
-               if (x != y) {
-                       iowrite8((x & ~s->mask) | desired_child, p);
-                       pr_debug("%s: %02x -> %02x\n", __func__, x, y);
+               switch (s->iosize) {
+               case sizeof(uint8_t): {
+                       uint8_t x, y;
+
+                       x = ioread8(p);
+                       y = (x & ~s->mask) | desired_child;
+                       if (x != y) {
+                               iowrite8((x & ~s->mask) | desired_child, p);
+                               pr_debug("%s: %02x -> %02x\n", __func__, x, y);
+                       }
+
+                       break;
+               }
+               case sizeof(uint16_t): {
+                       uint16_t x, y;
+
+                       x = ioread16(p);
+                       y = (x & ~s->mask) | desired_child;
+                       if (x != y) {
+                               iowrite16((x & ~s->mask) | desired_child, p);
+                               pr_debug("%s: %04x -> %04x\n", __func__, x, y);
+                       }
+
+                       break;
+               }
+               case sizeof(uint32_t): {
+                       uint32_t x, y;
+
+                       x = ioread32(p);
+                       y = (x & ~s->mask) | desired_child;
+                       if (x != y) {
+                               iowrite32((x & ~s->mask) | desired_child, p);
+                               pr_debug("%s: %08x -> %08x\n", __func__, x, y);
+                       }
+
+                       break;
+               }
                }
 
                iounmap(p);
@@ -88,8 +119,11 @@ static int mdio_mux_mmioreg_probe(struct platform_device *pdev)
        }
        s->phys = res.start;
 
-       if (resource_size(&res) != sizeof(uint8_t)) {
-               dev_err(&pdev->dev, "only 8-bit registers are supported\n");
+       s->iosize = resource_size(&res);
+       if (s->iosize != sizeof(uint8_t) &&
+           s->iosize != sizeof(uint16_t) &&
+           s->iosize != sizeof(uint32_t)) {
+               dev_err(&pdev->dev, "only 8/16/32-bit registers are supported\n");
                return -EINVAL;
        }
 
@@ -98,8 +132,8 @@ static int mdio_mux_mmioreg_probe(struct platform_device *pdev)
                dev_err(&pdev->dev, "missing or invalid mux-mask property\n");
                return -ENODEV;
        }
-       if (be32_to_cpup(iprop) > 255) {
-               dev_err(&pdev->dev, "only 8-bit registers are supported\n");
+       if (be32_to_cpup(iprop) >= BIT(s->iosize * 8)) {
+               dev_err(&pdev->dev, "only 8/16/32-bit registers are supported\n");
                return -EINVAL;
        }
        s->mask = be32_to_cpup(iprop);
index 09deef4bed097dd2a6231c9cfd4916df86fe1e99..653d076eafe5068672f177d565eec07798a782e3 100644 (file)
@@ -38,6 +38,9 @@
 
 #include <asm/irq.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/mdio.h>
+
 int mdiobus_register_device(struct mdio_device *mdiodev)
 {
        if (mdiodev->bus->mdio_map[mdiodev->addr])
@@ -461,6 +464,8 @@ int mdiobus_read_nested(struct mii_bus *bus, int addr, u32 regnum)
        retval = bus->read(bus, addr, regnum);
        mutex_unlock(&bus->mdio_lock);
 
+       trace_mdio_access(bus, 1, addr, regnum, retval, retval);
+
        return retval;
 }
 EXPORT_SYMBOL(mdiobus_read_nested);
@@ -485,6 +490,8 @@ int mdiobus_read(struct mii_bus *bus, int addr, u32 regnum)
        retval = bus->read(bus, addr, regnum);
        mutex_unlock(&bus->mdio_lock);
 
+       trace_mdio_access(bus, 1, addr, regnum, retval, retval);
+
        return retval;
 }
 EXPORT_SYMBOL(mdiobus_read);
@@ -513,6 +520,8 @@ int mdiobus_write_nested(struct mii_bus *bus, int addr, u32 regnum, u16 val)
        err = bus->write(bus, addr, regnum, val);
        mutex_unlock(&bus->mdio_lock);
 
+       trace_mdio_access(bus, 0, addr, regnum, val, err);
+
        return err;
 }
 EXPORT_SYMBOL(mdiobus_write_nested);
@@ -538,6 +547,8 @@ int mdiobus_write(struct mii_bus *bus, int addr, u32 regnum, u16 val)
        err = bus->write(bus, addr, regnum, val);
        mutex_unlock(&bus->mdio_lock);
 
+       trace_mdio_access(bus, 0, addr, regnum, val, err);
+
        return err;
 }
 EXPORT_SYMBOL(mdiobus_write);
index 9c88e6749b9a4194353fd81df36e1dc2ebab4d69..43c8fd46504bc2e563597ef7ca24fff3d28e89b1 100644 (file)
@@ -144,7 +144,7 @@ int mdio_driver_register(struct mdio_driver *drv)
        struct mdio_driver_common *mdiodrv = &drv->mdiodrv;
        int retval;
 
-       pr_info("mdio_driver_register: %s\n", mdiodrv->driver.name);
+       pr_debug("mdio_driver_register: %s\n", mdiodrv->driver.name);
 
        mdiodrv->driver.bus = &mdio_bus_type;
        mdiodrv->driver.probe = mdio_probe;
diff --git a/drivers/net/phy/meson-gxl.c b/drivers/net/phy/meson-gxl.c
new file mode 100644 (file)
index 0000000..1ea69b7
--- /dev/null
@@ -0,0 +1,81 @@
+/*
+ * Amlogic Meson GXL Internal PHY Driver
+ *
+ * Copyright (C) 2015 Amlogic, Inc. All rights reserved.
+ * Copyright (C) 2016 BayLibre, SAS. All rights reserved.
+ * Author: Neil Armstrong <narmstrong@baylibre.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mii.h>
+#include <linux/ethtool.h>
+#include <linux/phy.h>
+#include <linux/netdevice.h>
+
+static int meson_gxl_config_init(struct phy_device *phydev)
+{
+       /* Enable Analog and DSP register Bank access by */
+       phy_write(phydev, 0x14, 0x0000);
+       phy_write(phydev, 0x14, 0x0400);
+       phy_write(phydev, 0x14, 0x0000);
+       phy_write(phydev, 0x14, 0x0400);
+
+       /* Write Analog register 23 */
+       phy_write(phydev, 0x17, 0x8E0D);
+       phy_write(phydev, 0x14, 0x4417);
+
+       /* Enable fractional PLL */
+       phy_write(phydev, 0x17, 0x0005);
+       phy_write(phydev, 0x14, 0x5C1B);
+
+       /* Program fraction FR_PLL_DIV1 */
+       phy_write(phydev, 0x17, 0x029A);
+       phy_write(phydev, 0x14, 0x5C1D);
+
+       /* Program fraction FR_PLL_DIV1 */
+       phy_write(phydev, 0x17, 0xAAAA);
+       phy_write(phydev, 0x14, 0x5C1C);
+
+       return 0;
+}
+
+static struct phy_driver meson_gxl_phy[] = {
+       {
+               .phy_id         = 0x01814400,
+               .phy_id_mask    = 0xfffffff0,
+               .name           = "Meson GXL Internal PHY",
+               .features       = PHY_BASIC_FEATURES,
+               .flags          = PHY_IS_INTERNAL,
+               .config_init    = meson_gxl_config_init,
+               .config_aneg    = genphy_config_aneg,
+               .aneg_done      = genphy_aneg_done,
+               .read_status    = genphy_read_status,
+               .suspend        = genphy_suspend,
+               .resume         = genphy_resume,
+       },
+};
+
+static struct mdio_device_id __maybe_unused meson_gxl_tbl[] = {
+       { 0x01814400, 0xfffffff0 },
+       { }
+};
+
+module_phy_driver(meson_gxl_phy);
+
+MODULE_DEVICE_TABLE(mdio, meson_gxl_tbl);
+
+MODULE_DESCRIPTION("Amlogic Meson GXL Internal PHY driver");
+MODULE_AUTHOR("Baoqi wang");
+MODULE_AUTHOR("Neil Armstrong <narmstrong@baylibre.com>");
+MODULE_LICENSE("GPL");
index 081df68d2ce1467550657996e0a7268436e5323a..ea92d524d5a814ff22727eefa1818c0b0fd47a32 100644 (file)
@@ -318,12 +318,12 @@ static int ksz8041_config_init(struct phy_device *phydev)
        /* Limit supported and advertised modes in fiber mode */
        if (of_property_read_bool(of_node, "micrel,fiber-mode")) {
                phydev->dev_flags |= MICREL_PHY_FXEN;
-               phydev->supported &= SUPPORTED_FIBRE |
-                                    SUPPORTED_100baseT_Full |
+               phydev->supported &= SUPPORTED_100baseT_Full |
                                     SUPPORTED_100baseT_Half;
-               phydev->advertising &= ADVERTISED_FIBRE |
-                                      ADVERTISED_100baseT_Full |
+               phydev->supported |= SUPPORTED_FIBRE;
+               phydev->advertising &= ADVERTISED_100baseT_Full |
                                       ADVERTISED_100baseT_Half;
+               phydev->advertising |= ADVERTISED_FIBRE;
                phydev->autoneg = AUTONEG_DISABLE;
        }
 
index 7c00e508a101cc13bf1eb542c8e8c26fa51106bf..12825a5feb3ffff14969756cbe241de06dba9e70 100644 (file)
@@ -106,6 +106,40 @@ static int lan88xx_set_wol(struct phy_device *phydev,
        return 0;
 }
 
+static void lan88xx_set_mdix(struct phy_device *phydev)
+{
+       int buf;
+       int val;
+
+       switch (phydev->mdix_ctrl) {
+       case ETH_TP_MDI:
+               val = LAN88XX_EXT_MODE_CTRL_MDI_;
+               break;
+       case ETH_TP_MDI_X:
+               val = LAN88XX_EXT_MODE_CTRL_MDI_X_;
+               break;
+       case ETH_TP_MDI_AUTO:
+               val = LAN88XX_EXT_MODE_CTRL_AUTO_MDIX_;
+               break;
+       default:
+               return;
+       }
+
+       phy_write(phydev, LAN88XX_EXT_PAGE_ACCESS, LAN88XX_EXT_PAGE_SPACE_1);
+       buf = phy_read(phydev, LAN88XX_EXT_MODE_CTRL);
+       buf &= ~LAN88XX_EXT_MODE_CTRL_MDIX_MASK_;
+       buf |= val;
+       phy_write(phydev, LAN88XX_EXT_MODE_CTRL, buf);
+       phy_write(phydev, LAN88XX_EXT_PAGE_ACCESS, LAN88XX_EXT_PAGE_SPACE_0);
+}
+
+static int lan88xx_config_aneg(struct phy_device *phydev)
+{
+       lan88xx_set_mdix(phydev);
+
+       return genphy_config_aneg(phydev);
+}
+
 static struct phy_driver microchip_phy_driver[] = {
 {
        .phy_id         = 0x0007c130,
@@ -120,7 +154,7 @@ static struct phy_driver microchip_phy_driver[] = {
        .remove         = lan88xx_remove,
 
        .config_init    = genphy_config_init,
-       .config_aneg    = genphy_config_aneg,
+       .config_aneg    = lan88xx_config_aneg,
        .read_status    = genphy_read_status,
 
        .ack_interrupt  = lan88xx_phy_ack_interrupt,
index 113616b92abbbdf551c1d738605210ba80bb8022..e03ead81fffb563339d96771e4f14c849b400ed6 100644 (file)
@@ -27,6 +27,11 @@ enum rgmii_rx_clock_delay {
 
 /* Microsemi VSC85xx PHY registers */
 /* IEEE 802. Std Registers */
+#define MSCC_PHY_BYPASS_CONTROL                  18
+#define DISABLE_HP_AUTO_MDIX_MASK        0x0080
+#define DISABLE_PAIR_SWAP_CORR_MASK      0x0020
+#define DISABLE_POLARITY_CORR_MASK       0x0010
+
 #define MSCC_PHY_EXT_PHY_CNTL_1           23
 #define MAC_IF_SELECTION_MASK             0x1800
 #define MAC_IF_SELECTION_GMII             0
@@ -44,10 +49,25 @@ enum rgmii_rx_clock_delay {
 #define EDGE_RATE_CNTL_POS                5
 #define EDGE_RATE_CNTL_MASK               0x00E0
 
+#define MSCC_PHY_DEV_AUX_CNTL            28
+#define HP_AUTO_MDIX_X_OVER_IND_MASK     0x2000
+
 #define MSCC_EXT_PAGE_ACCESS             31
 #define MSCC_PHY_PAGE_STANDARD           0x0000 /* Standard registers */
+#define MSCC_PHY_PAGE_EXTENDED           0x0001 /* Extended registers */
 #define MSCC_PHY_PAGE_EXTENDED_2         0x0002 /* Extended reg - page 2 */
 
+/* Extended Page 1 Registers */
+#define MSCC_PHY_EXT_MODE_CNTL           19
+#define FORCE_MDI_CROSSOVER_MASK         0x000C
+#define FORCE_MDI_CROSSOVER_MDIX         0x000C
+#define FORCE_MDI_CROSSOVER_MDI                  0x0008
+
+#define MSCC_PHY_ACTIPHY_CNTL            20
+#define DOWNSHIFT_CNTL_MASK              0x001C
+#define DOWNSHIFT_EN                     0x0010
+#define DOWNSHIFT_CNTL_POS               2
+
 /* Extended Page 2 Registers */
 #define MSCC_PHY_RGMII_CNTL              20
 #define RGMII_RX_CLK_DELAY_MASK                  0x0070
@@ -65,7 +85,9 @@ enum rgmii_rx_clock_delay {
 #define SECURE_ON_PASSWD_LEN_4           0x4000
 
 /* Microsemi PHY ID's */
+#define PHY_ID_VSC8530                   0x00070560
 #define PHY_ID_VSC8531                   0x00070570
+#define PHY_ID_VSC8540                   0x00070760
 #define PHY_ID_VSC8541                   0x00070770
 
 #define MSCC_VDDMAC_1500                 1500
@@ -73,6 +95,8 @@ enum rgmii_rx_clock_delay {
 #define MSCC_VDDMAC_2500                 2500
 #define MSCC_VDDMAC_3300                 3300
 
+#define DOWNSHIFT_COUNT_MAX              5
+
 struct vsc8531_private {
        int rate_magic;
 };
@@ -99,6 +123,113 @@ static int vsc85xx_phy_page_set(struct phy_device *phydev, u8 page)
        return rc;
 }
 
+static int vsc85xx_mdix_get(struct phy_device *phydev, u8 *mdix)
+{
+       u16 reg_val;
+
+       reg_val = phy_read(phydev, MSCC_PHY_DEV_AUX_CNTL);
+       if (reg_val & HP_AUTO_MDIX_X_OVER_IND_MASK)
+               *mdix = ETH_TP_MDI_X;
+       else
+               *mdix = ETH_TP_MDI;
+
+       return 0;
+}
+
+static int vsc85xx_mdix_set(struct phy_device *phydev, u8 mdix)
+{
+       int rc;
+       u16 reg_val;
+
+       reg_val = phy_read(phydev, MSCC_PHY_BYPASS_CONTROL);
+       if ((mdix == ETH_TP_MDI) || (mdix == ETH_TP_MDI_X)) {
+               reg_val |= (DISABLE_PAIR_SWAP_CORR_MASK |
+                           DISABLE_POLARITY_CORR_MASK  |
+                           DISABLE_HP_AUTO_MDIX_MASK);
+       } else {
+               reg_val &= ~(DISABLE_PAIR_SWAP_CORR_MASK |
+                            DISABLE_POLARITY_CORR_MASK  |
+                            DISABLE_HP_AUTO_MDIX_MASK);
+       }
+       rc = phy_write(phydev, MSCC_PHY_BYPASS_CONTROL, reg_val);
+       if (rc != 0)
+               return rc;
+
+       rc = vsc85xx_phy_page_set(phydev, MSCC_PHY_PAGE_EXTENDED);
+       if (rc != 0)
+               return rc;
+
+       reg_val = phy_read(phydev, MSCC_PHY_EXT_MODE_CNTL);
+       reg_val &= ~(FORCE_MDI_CROSSOVER_MASK);
+       if (mdix == ETH_TP_MDI)
+               reg_val |= FORCE_MDI_CROSSOVER_MDI;
+       else if (mdix == ETH_TP_MDI_X)
+               reg_val |= FORCE_MDI_CROSSOVER_MDIX;
+       rc = phy_write(phydev, MSCC_PHY_EXT_MODE_CNTL, reg_val);
+       if (rc != 0)
+               return rc;
+
+       rc = vsc85xx_phy_page_set(phydev, MSCC_PHY_PAGE_STANDARD);
+       if (rc != 0)
+               return rc;
+
+       return genphy_restart_aneg(phydev);
+}
+
+static int vsc85xx_downshift_get(struct phy_device *phydev, u8 *count)
+{
+       int rc;
+       u16 reg_val;
+
+       rc = vsc85xx_phy_page_set(phydev, MSCC_PHY_PAGE_EXTENDED);
+       if (rc != 0)
+               goto out;
+
+       reg_val = phy_read(phydev, MSCC_PHY_ACTIPHY_CNTL);
+       reg_val &= DOWNSHIFT_CNTL_MASK;
+       if (!(reg_val & DOWNSHIFT_EN))
+               *count = DOWNSHIFT_DEV_DISABLE;
+       else
+               *count = ((reg_val & ~DOWNSHIFT_EN) >> DOWNSHIFT_CNTL_POS) + 2;
+       rc = vsc85xx_phy_page_set(phydev, MSCC_PHY_PAGE_STANDARD);
+
+out:
+       return rc;
+}
+
+static int vsc85xx_downshift_set(struct phy_device *phydev, u8 count)
+{
+       int rc;
+       u16 reg_val;
+
+       if (count == DOWNSHIFT_DEV_DEFAULT_COUNT) {
+               /* Default downshift count 3 (i.e. Bit3:2 = 0b01) */
+               count = ((1 << DOWNSHIFT_CNTL_POS) | DOWNSHIFT_EN);
+       } else if (count > DOWNSHIFT_COUNT_MAX || count == 1) {
+               phydev_err(phydev, "Downshift count should be 2,3,4 or 5\n");
+               return -ERANGE;
+       } else if (count) {
+               /* Downshift count is either 2,3,4 or 5 */
+               count = (((count - 2) << DOWNSHIFT_CNTL_POS) | DOWNSHIFT_EN);
+       }
+
+       rc = vsc85xx_phy_page_set(phydev, MSCC_PHY_PAGE_EXTENDED);
+       if (rc != 0)
+               goto out;
+
+       reg_val = phy_read(phydev, MSCC_PHY_ACTIPHY_CNTL);
+       reg_val &= ~(DOWNSHIFT_CNTL_MASK);
+       reg_val |= count;
+       rc = phy_write(phydev, MSCC_PHY_ACTIPHY_CNTL, reg_val);
+       if (rc != 0)
+               goto out;
+
+       rc = vsc85xx_phy_page_set(phydev, MSCC_PHY_PAGE_STANDARD);
+
+out:
+       return rc;
+}
+
 static int vsc85xx_wol_set(struct phy_device *phydev,
                           struct ethtool_wolinfo *wol)
 {
@@ -310,6 +441,7 @@ static int vsc85xx_default_config(struct phy_device *phydev)
        int rc;
        u16 reg_val;
 
+       phydev->mdix_ctrl = ETH_TP_MDI_AUTO;
        mutex_lock(&phydev->lock);
        rc = vsc85xx_phy_page_set(phydev, MSCC_PHY_PAGE_EXTENDED_2);
        if (rc != 0)
@@ -327,6 +459,29 @@ out_unlock:
        return rc;
 }
 
+static int vsc85xx_get_tunable(struct phy_device *phydev,
+                              struct ethtool_tunable *tuna, void *data)
+{
+       switch (tuna->id) {
+       case ETHTOOL_PHY_DOWNSHIFT:
+               return vsc85xx_downshift_get(phydev, (u8 *)data);
+       default:
+               return -EINVAL;
+       }
+}
+
+static int vsc85xx_set_tunable(struct phy_device *phydev,
+                              struct ethtool_tunable *tuna,
+                              const void *data)
+{
+       switch (tuna->id) {
+       case ETHTOOL_PHY_DOWNSHIFT:
+               return vsc85xx_downshift_set(phydev, *(u8 *)data);
+       default:
+               return -EINVAL;
+       }
+}
+
 static int vsc85xx_config_init(struct phy_device *phydev)
 {
        int rc;
@@ -376,6 +531,28 @@ static int vsc85xx_config_intr(struct phy_device *phydev)
        return rc;
 }
 
+static int vsc85xx_config_aneg(struct phy_device *phydev)
+{
+       int rc;
+
+       rc = vsc85xx_mdix_set(phydev, phydev->mdix_ctrl);
+       if (rc < 0)
+               return rc;
+
+       return genphy_config_aneg(phydev);
+}
+
+static int vsc85xx_read_status(struct phy_device *phydev)
+{
+       int rc;
+
+       rc = vsc85xx_mdix_get(phydev, &phydev->mdix);
+       if (rc < 0)
+               return rc;
+
+       return genphy_read_status(phydev);
+}
+
 static int vsc85xx_probe(struct phy_device *phydev)
 {
        int rate_magic;
@@ -398,6 +575,27 @@ static int vsc85xx_probe(struct phy_device *phydev)
 
 /* Microsemi VSC85xx PHYs */
 static struct phy_driver vsc85xx_driver[] = {
+{
+       .phy_id         = PHY_ID_VSC8530,
+       .name           = "Microsemi FE VSC8530",
+       .phy_id_mask    = 0xfffffff0,
+       .features       = PHY_BASIC_FEATURES,
+       .flags          = PHY_HAS_INTERRUPT,
+       .soft_reset     = &genphy_soft_reset,
+       .config_init    = &vsc85xx_config_init,
+       .config_aneg    = &vsc85xx_config_aneg,
+       .aneg_done      = &genphy_aneg_done,
+       .read_status    = &vsc85xx_read_status,
+       .ack_interrupt  = &vsc85xx_ack_interrupt,
+       .config_intr    = &vsc85xx_config_intr,
+       .suspend        = &genphy_suspend,
+       .resume         = &genphy_resume,
+       .probe          = &vsc85xx_probe,
+       .set_wol        = &vsc85xx_wol_set,
+       .get_wol        = &vsc85xx_wol_get,
+       .get_tunable    = &vsc85xx_get_tunable,
+       .set_tunable    = &vsc85xx_set_tunable,
+},
 {
        .phy_id         = PHY_ID_VSC8531,
        .name           = "Microsemi VSC8531",
@@ -406,9 +604,9 @@ static struct phy_driver vsc85xx_driver[] = {
        .flags          = PHY_HAS_INTERRUPT,
        .soft_reset     = &genphy_soft_reset,
        .config_init    = &vsc85xx_config_init,
-       .config_aneg    = &genphy_config_aneg,
+       .config_aneg    = &vsc85xx_config_aneg,
        .aneg_done      = &genphy_aneg_done,
-       .read_status    = &genphy_read_status,
+       .read_status    = &vsc85xx_read_status,
        .ack_interrupt  = &vsc85xx_ack_interrupt,
        .config_intr    = &vsc85xx_config_intr,
        .suspend        = &genphy_suspend,
@@ -416,6 +614,29 @@ static struct phy_driver vsc85xx_driver[] = {
        .probe          = &vsc85xx_probe,
        .set_wol        = &vsc85xx_wol_set,
        .get_wol        = &vsc85xx_wol_get,
+       .get_tunable    = &vsc85xx_get_tunable,
+       .set_tunable    = &vsc85xx_set_tunable,
+},
+{
+       .phy_id         = PHY_ID_VSC8540,
+       .name           = "Microsemi FE VSC8540 SyncE",
+       .phy_id_mask    = 0xfffffff0,
+       .features       = PHY_BASIC_FEATURES,
+       .flags          = PHY_HAS_INTERRUPT,
+       .soft_reset     = &genphy_soft_reset,
+       .config_init    = &vsc85xx_config_init,
+       .config_aneg    = &vsc85xx_config_aneg,
+       .aneg_done      = &genphy_aneg_done,
+       .read_status    = &vsc85xx_read_status,
+       .ack_interrupt  = &vsc85xx_ack_interrupt,
+       .config_intr    = &vsc85xx_config_intr,
+       .suspend        = &genphy_suspend,
+       .resume         = &genphy_resume,
+       .probe          = &vsc85xx_probe,
+       .set_wol        = &vsc85xx_wol_set,
+       .get_wol        = &vsc85xx_wol_get,
+       .get_tunable    = &vsc85xx_get_tunable,
+       .set_tunable    = &vsc85xx_set_tunable,
 },
 {
        .phy_id         = PHY_ID_VSC8541,
@@ -425,9 +646,9 @@ static struct phy_driver vsc85xx_driver[] = {
        .flags          = PHY_HAS_INTERRUPT,
        .soft_reset     = &genphy_soft_reset,
        .config_init    = &vsc85xx_config_init,
-       .config_aneg    = &genphy_config_aneg,
+       .config_aneg    = &vsc85xx_config_aneg,
        .aneg_done      = &genphy_aneg_done,
-       .read_status    = &genphy_read_status,
+       .read_status    = &vsc85xx_read_status,
        .ack_interrupt  = &vsc85xx_ack_interrupt,
        .config_intr    = &vsc85xx_config_intr,
        .suspend        = &genphy_suspend,
@@ -435,6 +656,8 @@ static struct phy_driver vsc85xx_driver[] = {
        .probe          = &vsc85xx_probe,
        .set_wol        = &vsc85xx_wol_set,
        .get_wol        = &vsc85xx_wol_get,
+       .get_tunable    = &vsc85xx_get_tunable,
+       .set_tunable    = &vsc85xx_set_tunable,
 }
 
 };
@@ -442,7 +665,9 @@ static struct phy_driver vsc85xx_driver[] = {
 module_phy_driver(vsc85xx_driver);
 
 static struct mdio_device_id __maybe_unused vsc85xx_tbl[] = {
+       { PHY_ID_VSC8530, 0xfffffff0, },
        { PHY_ID_VSC8531, 0xfffffff0, },
+       { PHY_ID_VSC8540, 0xfffffff0, },
        { PHY_ID_VSC8541, 0xfffffff0, },
        { }
 };
index 2f94c60d49394ecc743b13660a261979c460c15b..25f93a98863b79be76ac72b814d5dd4ff75c89a9 100644 (file)
@@ -143,13 +143,14 @@ static int phy_config_interrupt(struct phy_device *phydev, u32 interrupts)
  * Returns > 0 on success or < 0 on error. 0 means that auto-negotiation
  * is still pending.
  */
-static inline int phy_aneg_done(struct phy_device *phydev)
+int phy_aneg_done(struct phy_device *phydev)
 {
        if (phydev->drv->aneg_done)
                return phydev->drv->aneg_done(phydev);
 
        return genphy_aneg_done(phydev);
 }
+EXPORT_SYMBOL(phy_aneg_done);
 
 /* A structure for mapping a particular speed and duplex
  * combination to a particular SUPPORTED and ADVERTISED value
@@ -388,7 +389,7 @@ int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd)
 
        phydev->duplex = cmd->duplex;
 
-       phydev->mdix = cmd->eth_tp_mdix_ctrl;
+       phydev->mdix_ctrl = cmd->eth_tp_mdix_ctrl;
 
        /* Restart the PHY */
        phy_start_aneg(phydev);
@@ -442,7 +443,7 @@ int phy_ethtool_ksettings_set(struct phy_device *phydev,
 
        phydev->duplex = duplex;
 
-       phydev->mdix = cmd->base.eth_tp_mdix_ctrl;
+       phydev->mdix_ctrl = cmd->base.eth_tp_mdix_ctrl;
 
        /* Restart the PHY */
        phy_start_aneg(phydev);
@@ -468,7 +469,8 @@ int phy_ethtool_gset(struct phy_device *phydev, struct ethtool_cmd *cmd)
        cmd->transceiver = phy_is_internal(phydev) ?
                XCVR_INTERNAL : XCVR_EXTERNAL;
        cmd->autoneg = phydev->autoneg;
-       cmd->eth_tp_mdix_ctrl = phydev->mdix;
+       cmd->eth_tp_mdix_ctrl = phydev->mdix_ctrl;
+       cmd->eth_tp_mdix = phydev->mdix;
 
        return 0;
 }
@@ -495,7 +497,8 @@ int phy_ethtool_ksettings_get(struct phy_device *phydev,
 
        cmd->base.phy_address = phydev->mdio.addr;
        cmd->base.autoneg = phydev->autoneg;
-       cmd->base.eth_tp_mdix_ctrl = phydev->mdix;
+       cmd->base.eth_tp_mdix_ctrl = phydev->mdix_ctrl;
+       cmd->base.eth_tp_mdix = phydev->mdix;
 
        return 0;
 }
@@ -1395,6 +1398,9 @@ int phy_ethtool_set_eee(struct phy_device *phydev, struct ethtool_eee *data)
 {
        int val = ethtool_adv_to_mmd_eee_adv_t(data->advertised);
 
+       /* Mask prohibited EEE modes */
+       val &= ~phydev->eee_broken_modes;
+
        phy_write_mmd_indirect(phydev, MDIO_AN_EEE_ADV, MDIO_MMD_AN, val);
 
        return 0;
@@ -1440,3 +1446,14 @@ int phy_ethtool_set_link_ksettings(struct net_device *ndev,
        return phy_ethtool_ksettings_set(phydev, cmd);
 }
 EXPORT_SYMBOL(phy_ethtool_set_link_ksettings);
+
+int phy_ethtool_nway_reset(struct net_device *ndev)
+{
+       struct phy_device *phydev = ndev->phydev;
+
+       if (!phydev)
+               return -ENODEV;
+
+       return genphy_restart_aneg(phydev);
+}
+EXPORT_SYMBOL(phy_ethtool_nway_reset);
index 49a1c988d29cb85ebd4a91c403b03c26f2e25817..aeaf1bcb12d01f61b209188424a204e42ef738c9 100644 (file)
@@ -724,6 +724,7 @@ struct phy_device *phy_connect(struct net_device *dev, const char *bus_id,
        phydev = to_phy_device(d);
 
        rc = phy_connect_direct(dev, phydev, handler, interface);
+       put_device(d);
        if (rc)
                return ERR_PTR(rc);
 
@@ -913,15 +914,15 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev,
         */
        err = phy_init_hw(phydev);
        if (err)
-               phy_detach(phydev);
-       else
-               phy_resume(phydev);
+               goto error;
 
+       phy_resume(phydev);
        phy_led_triggers_register(phydev);
 
        return err;
 
 error:
+       phy_detach(phydev);
        put_device(d);
        module_put(bus->owner);
        return err;
@@ -956,6 +957,7 @@ struct phy_device *phy_attach(struct net_device *dev, const char *bus_id,
        phydev = to_phy_device(d);
 
        rc = phy_attach_direct(dev, phydev, phydev->dev_flags, interface);
+       put_device(d);
        if (rc)
                return ERR_PTR(rc);
 
@@ -979,6 +981,8 @@ void phy_detach(struct phy_device *phydev)
        phydev->attached_dev = NULL;
        phy_suspend(phydev);
 
+       phy_led_triggers_unregister(phydev);
+
        /* If the device had no specific driver before (i.e. - it
         * was using the generic driver), we unbind the device
         * from the generic driver so that there's a chance a
@@ -992,8 +996,6 @@ void phy_detach(struct phy_device *phydev)
                }
        }
 
-       phy_led_triggers_unregister(phydev);
-
        /*
         * The phydev might go away on the put_device() below, so avoid
         * a use-after-free bug by reading the underlying bus first.
@@ -1118,6 +1120,43 @@ static int genphy_config_advert(struct phy_device *phydev)
        return changed;
 }
 
+/**
+ * genphy_config_eee_advert - disable unwanted eee mode advertisement
+ * @phydev: target phy_device struct
+ *
+ * Description: Writes MDIO_AN_EEE_ADV after disabling unsupported energy
+ *   efficent ethernet modes. Returns 0 if the PHY's advertisement hasn't
+ *   changed, and 1 if it has changed.
+ */
+static int genphy_config_eee_advert(struct phy_device *phydev)
+{
+       u32 broken = phydev->eee_broken_modes;
+       u32 old_adv, adv;
+
+       /* Nothing to disable */
+       if (!broken)
+               return 0;
+
+       /* If the following call fails, we assume that EEE is not
+        * supported by the phy. If we read 0, EEE is not advertised
+        * In both case, we don't need to continue
+        */
+       adv = phy_read_mmd_indirect(phydev, MDIO_AN_EEE_ADV, MDIO_MMD_AN);
+       if (adv <= 0)
+               return 0;
+
+       old_adv = adv;
+       adv &= ~broken;
+
+       /* Advertising remains unchanged with the broken mask */
+       if (old_adv == adv)
+               return 0;
+
+       phy_write_mmd_indirect(phydev, MDIO_AN_EEE_ADV, MDIO_MMD_AN, adv);
+
+       return 1;
+}
+
 /**
  * genphy_setup_forced - configures/forces speed/duplex from @phydev
  * @phydev: target phy_device struct
@@ -1176,15 +1215,20 @@ EXPORT_SYMBOL(genphy_restart_aneg);
  */
 int genphy_config_aneg(struct phy_device *phydev)
 {
-       int result;
+       int err, changed;
+
+       changed = genphy_config_eee_advert(phydev);
 
        if (AUTONEG_ENABLE != phydev->autoneg)
                return genphy_setup_forced(phydev);
 
-       result = genphy_config_advert(phydev);
-       if (result < 0) /* error */
-               return result;
-       if (result == 0) {
+       err = genphy_config_advert(phydev);
+       if (err < 0) /* error */
+               return err;
+
+       changed |= err;
+
+       if (changed == 0) {
                /* Advertisement hasn't changed, but maybe aneg was never on to
                 * begin with?  Or maybe phy was isolated?
                 */
@@ -1194,16 +1238,16 @@ int genphy_config_aneg(struct phy_device *phydev)
                        return ctl;
 
                if (!(ctl & BMCR_ANENABLE) || (ctl & BMCR_ISOLATE))
-                       result = 1; /* do restart aneg */
+                       changed = 1; /* do restart aneg */
        }
 
        /* Only restart aneg if we are advertising something different
         * than we were before.
         */
-       if (result > 0)
-               result = genphy_restart_aneg(phydev);
+       if (changed > 0)
+               return genphy_restart_aneg(phydev);
 
-       return result;
+       return 0;
 }
 EXPORT_SYMBOL(genphy_config_aneg);
 
@@ -1561,6 +1605,21 @@ static void of_set_phy_supported(struct phy_device *phydev)
                __set_phy_supported(phydev, max_speed);
 }
 
+static void of_set_phy_eee_broken(struct phy_device *phydev)
+{
+       struct device_node *node = phydev->mdio.dev.of_node;
+       u32 broken;
+
+       if (!IS_ENABLED(CONFIG_OF_MDIO))
+               return;
+
+       if (!node)
+               return;
+
+       if (!of_property_read_u32(node, "eee-broken-modes", &broken))
+               phydev->eee_broken_modes = broken;
+}
+
 /**
  * phy_probe - probe and init a PHY device
  * @dev: device to probe and init
@@ -1598,6 +1657,11 @@ static int phy_probe(struct device *dev)
        of_set_phy_supported(phydev);
        phydev->advertising = phydev->supported;
 
+       /* Get the EEE modes we want to prohibit. We will ask
+        * the PHY stop advertising these mode later on
+        */
+       of_set_phy_eee_broken(phydev);
+
        /* Set the state to READY by default */
        phydev->state = PHY_READY;
 
index cda600a1b7660ea0872a4c96105eb24100b75d90..fa62bdf2f52694dece215d2b62e548026e482c38 100644 (file)
@@ -130,7 +130,5 @@ void phy_led_triggers_unregister(struct phy_device *phy)
 
        for (i = 0; i < phy->phy_num_led_triggers; i++)
                phy_led_trigger_unregister(&phy->phy_led_triggers[i]);
-
-       devm_kfree(&phy->mdio.dev, phy->phy_led_triggers);
 }
 EXPORT_SYMBOL_GPL(phy_led_triggers_unregister);
index 2e37eb337d4868715606b92dab65cfeb60a37679..f78ff0279648fddbf6f7085e2a145c79850a61d8 100644 (file)
 /* Vitesse Extended Page Access Register */
 #define MII_VSC82X4_EXT_PAGE_ACCESS    0x1f
 
+/* Vitesse VSC8601 Extended PHY Control Register 1 */
+#define MII_VSC8601_EPHY_CTL           0x17
+#define MII_VSC8601_EPHY_CTL_RGMII_SKEW        (1 << 8)
+
 #define PHY_ID_VSC8234                 0x000fc620
 #define PHY_ID_VSC8244                 0x000fc6c0
 #define PHY_ID_VSC8514                 0x00070670
+#define PHY_ID_VSC8572                 0x000704d0
 #define PHY_ID_VSC8574                 0x000704a0
 #define PHY_ID_VSC8601                 0x00070420
 #define PHY_ID_VSC8662                 0x00070660
@@ -111,6 +116,34 @@ static int vsc824x_config_init(struct phy_device *phydev)
        return err;
 }
 
+/* This adds a skew for both TX and RX clocks, so the skew should only be
+ * applied to "rgmii-id" interfaces. It may not work as expected
+ * on "rgmii-txid", "rgmii-rxid" or "rgmii" interfaces. */
+static int vsc8601_add_skew(struct phy_device *phydev)
+{
+       int ret;
+
+       ret = phy_read(phydev, MII_VSC8601_EPHY_CTL);
+       if (ret < 0)
+               return ret;
+
+       ret |= MII_VSC8601_EPHY_CTL_RGMII_SKEW;
+       return phy_write(phydev, MII_VSC8601_EPHY_CTL, ret);
+}
+
+static int vsc8601_config_init(struct phy_device *phydev)
+{
+       int ret = 0;
+
+       if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID)
+               ret = vsc8601_add_skew(phydev);
+
+       if (ret < 0)
+               return ret;
+
+       return genphy_config_init(phydev);
+}
+
 static int vsc824x_ack_interrupt(struct phy_device *phydev)
 {
        int err = 0;
@@ -134,6 +167,7 @@ static int vsc82xx_config_intr(struct phy_device *phydev)
                        (phydev->drv->phy_id == PHY_ID_VSC8234 ||
                         phydev->drv->phy_id == PHY_ID_VSC8244 ||
                         phydev->drv->phy_id == PHY_ID_VSC8514 ||
+                        phydev->drv->phy_id == PHY_ID_VSC8572 ||
                         phydev->drv->phy_id == PHY_ID_VSC8574 ||
                         phydev->drv->phy_id == PHY_ID_VSC8601) ?
                                MII_VSC8244_IMASK_MASK :
@@ -258,6 +292,17 @@ static struct phy_driver vsc82xx_driver[] = {
        .read_status    = &genphy_read_status,
        .ack_interrupt  = &vsc824x_ack_interrupt,
        .config_intr    = &vsc82xx_config_intr,
+}, {
+       .phy_id         = PHY_ID_VSC8572,
+       .name           = "Vitesse VSC8572",
+       .phy_id_mask    = 0x000ffff0,
+       .features       = PHY_GBIT_FEATURES,
+       .flags          = PHY_HAS_INTERRUPT,
+       .config_init    = &vsc824x_config_init,
+       .config_aneg    = &vsc82x4_config_aneg,
+       .read_status    = &genphy_read_status,
+       .ack_interrupt  = &vsc824x_ack_interrupt,
+       .config_intr    = &vsc82xx_config_intr,
 }, {
        .phy_id         = PHY_ID_VSC8574,
        .name           = "Vitesse VSC8574",
@@ -275,7 +320,7 @@ static struct phy_driver vsc82xx_driver[] = {
        .phy_id_mask    = 0x000ffff0,
        .features       = PHY_GBIT_FEATURES,
        .flags          = PHY_HAS_INTERRUPT,
-       .config_init    = &genphy_config_init,
+       .config_init    = &vsc8601_config_init,
        .config_aneg    = &genphy_config_aneg,
        .read_status    = &genphy_read_status,
        .ack_interrupt  = &vsc824x_ack_interrupt,
@@ -323,6 +368,7 @@ static struct mdio_device_id __maybe_unused vitesse_tbl[] = {
        { PHY_ID_VSC8234, 0x000ffff0 },
        { PHY_ID_VSC8244, 0x000fffc0 },
        { PHY_ID_VSC8514, 0x000ffff0 },
+       { PHY_ID_VSC8572, 0x000ffff0 },
        { PHY_ID_VSC8574, 0x000ffff0 },
        { PHY_ID_VSC8662, 0x000ffff0 },
        { PHY_ID_VSC8221, 0x000ffff0 },
index 5489c0ec1d9a3cecb9957c474450006d0f63727b..3d3b1f4339eff6be6713f968da0aa55eaa9931fb 100644 (file)
@@ -204,7 +204,7 @@ static atomic_t ppp_unit_count = ATOMIC_INIT(0);
 static atomic_t channel_count = ATOMIC_INIT(0);
 
 /* per-net private data for this module */
-static int ppp_net_id __read_mostly;
+static unsigned int ppp_net_id __read_mostly;
 struct ppp_net {
        /* units to ppp mapping */
        struct idr units_idr;
index 4ddae8118c8566e4de07a16b136af13597fe4f3e..f017c72bb7fd3a3fbc09fc8d9ceeefd767eb41e8 100644 (file)
@@ -95,7 +95,7 @@ static const struct proto_ops pppoe_ops;
 static const struct ppp_channel_ops pppoe_chan_ops;
 
 /* per-net private data for this module */
-static int pppoe_net_id __read_mostly;
+static unsigned int pppoe_net_id __read_mostly;
 struct pppoe_net {
        /*
         * we could use _single_ hash table for all
index a380649bf6b5c2242edac044b497bf15922cffe5..bdc58567d10e7b370b6966c35251b81619f314ce 100644 (file)
@@ -2150,13 +2150,7 @@ static struct rtnl_link_ops team_link_ops __read_mostly = {
  * Generic netlink custom interface
  ***********************************/
 
-static struct genl_family team_nl_family = {
-       .id             = GENL_ID_GENERATE,
-       .name           = TEAM_GENL_NAME,
-       .version        = TEAM_GENL_VERSION,
-       .maxattr        = TEAM_ATTR_MAX,
-       .netnsok        = true,
-};
+static struct genl_family team_nl_family;
 
 static const struct nla_policy team_nl_policy[TEAM_ATTR_MAX + 1] = {
        [TEAM_ATTR_UNSPEC]                      = { .type = NLA_UNSPEC, },
@@ -2746,6 +2740,18 @@ static const struct genl_multicast_group team_nl_mcgrps[] = {
        { .name = TEAM_GENL_CHANGE_EVENT_MC_GRP_NAME, },
 };
 
+static struct genl_family team_nl_family __ro_after_init = {
+       .name           = TEAM_GENL_NAME,
+       .version        = TEAM_GENL_VERSION,
+       .maxattr        = TEAM_ATTR_MAX,
+       .netnsok        = true,
+       .module         = THIS_MODULE,
+       .ops            = team_nl_ops,
+       .n_ops          = ARRAY_SIZE(team_nl_ops),
+       .mcgrps         = team_nl_mcgrps,
+       .n_mcgrps       = ARRAY_SIZE(team_nl_mcgrps),
+};
+
 static int team_nl_send_multicast(struct sk_buff *skb,
                                  struct team *team, u32 portid)
 {
@@ -2767,10 +2773,9 @@ static int team_nl_send_event_port_get(struct team *team,
                                          port);
 }
 
-static int team_nl_init(void)
+static int __init team_nl_init(void)
 {
-       return genl_register_family_with_ops_groups(&team_nl_family, team_nl_ops,
-                                                   team_nl_mcgrps);
+       return genl_register_family(&team_nl_family);
 }
 
 static void team_nl_fini(void)
index 93285687cf13ea28ad5627953d91b6d164eab100..e2af2dd544f0f72441d9e04bc6065c0f4babf213 100644 (file)
@@ -878,13 +878,6 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
            sk_filter(tfile->socket.sk, skb))
                goto drop;
 
-       /* Limit the number of packets queued by dividing txq length with the
-        * number of queues.
-        */
-       if (skb_queue_len(&tfile->socket.sk->sk_receive_queue) * numqueues
-                         >= dev->tx_queue_len)
-               goto drop;
-
        if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
                goto drop;
 
@@ -1252,8 +1245,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
                return -EFAULT;
        }
 
-       err = virtio_net_hdr_to_skb(skb, &gso, tun_is_little_endian(tun));
-       if (err) {
+       if (virtio_net_hdr_to_skb(skb, &gso, tun_is_little_endian(tun))) {
                this_cpu_inc(tun->pcpu_stats->rx_frame_errors);
                kfree_skb(skb);
                return -EINVAL;
@@ -1361,15 +1353,13 @@ static ssize_t tun_put_user(struct tun_struct *tun,
        }
 
        if (vnet_hdr_sz) {
-               struct virtio_net_hdr gso = { 0 }; /* no info leak */
-               int ret;
+               struct virtio_net_hdr gso;
 
                if (iov_iter_count(iter) < vnet_hdr_sz)
                        return -EINVAL;
 
-               ret = virtio_net_hdr_from_skb(skb, &gso,
-                                             tun_is_little_endian(tun));
-               if (ret) {
+               if (virtio_net_hdr_from_skb(skb, &gso,
+                                           tun_is_little_endian(tun))) {
                        struct skb_shared_info *sinfo = skb_shinfo(skb);
                        pr_err("unexpected GSO type: "
                               "0x%x, gso_size %d, hdr_len %d\n",
@@ -1985,7 +1975,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
        int le;
        int ret;
 
-       if (cmd == TUNSETIFF || cmd == TUNSETQUEUE || _IOC_TYPE(cmd) == 0x89) {
+       if (cmd == TUNSETIFF || cmd == TUNSETQUEUE || _IOC_TYPE(cmd) == SOCK_IOC_TYPE) {
                if (copy_from_user(&ifr, argp, ifreq_len))
                        return -EFAULT;
        } else {
@@ -2005,7 +1995,11 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
        rtnl_lock();
 
        tun = __tun_get(tfile);
-       if (cmd == TUNSETIFF && !tun) {
+       if (cmd == TUNSETIFF) {
+               ret = -EEXIST;
+               if (tun)
+                       goto unlock;
+
                ifr.ifr_name[IFNAMSIZ-1] = '\0';
 
                ret = tun_set_iff(sock_net(&tfile->sk), file, &ifr);
index f79eb12c326aacf2ed62b021b0b4d87bfa5c71e7..125cff57c759e40f50337fb1c5887f77c0c78090 100644 (file)
@@ -433,13 +433,13 @@ int asix_mdio_read(struct net_device *netdev, int phy_id, int loc)
        mutex_lock(&dev->phy_mutex);
        do {
                ret = asix_set_sw_mii(dev, 0);
-               if (ret == -ENODEV)
+               if (ret == -ENODEV || ret == -ETIMEDOUT)
                        break;
                usleep_range(1000, 1100);
                ret = asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG,
                                    0, 0, 1, &smsr, 0);
        } while (!(smsr & AX_HOST_EN) && (i++ < 30) && (ret != -ENODEV));
-       if (ret == -ENODEV) {
+       if (ret == -ENODEV || ret == -ETIMEDOUT) {
                mutex_unlock(&dev->phy_mutex);
                return ret;
        }
@@ -497,13 +497,13 @@ int asix_mdio_read_nopm(struct net_device *netdev, int phy_id, int loc)
        mutex_lock(&dev->phy_mutex);
        do {
                ret = asix_set_sw_mii(dev, 1);
-               if (ret == -ENODEV)
+               if (ret == -ENODEV || ret == -ETIMEDOUT)
                        break;
                usleep_range(1000, 1100);
                ret = asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG,
                                    0, 0, 1, &smsr, 1);
        } while (!(smsr & AX_HOST_EN) && (i++ < 30) && (ret != -ENODEV));
-       if (ret == -ENODEV) {
+       if (ret == -ENODEV || ret == -ETIMEDOUT) {
                mutex_unlock(&dev->phy_mutex);
                return ret;
        }
index 49a3bc107d05490fc061c94815b746170942ba8b..6308386b09dfeafcb12c7912b06e8acf6aaff69b 100644 (file)
@@ -149,14 +149,6 @@ static const struct net_device_ops ax88172a_netdev_ops = {
        .ndo_set_rx_mode        = asix_set_multicast,
 };
 
-static int ax88172a_nway_reset(struct net_device *net)
-{
-       if (!net->phydev)
-               return -ENODEV;
-
-       return phy_start_aneg(net->phydev);
-}
-
 static const struct ethtool_ops ax88172a_ethtool_ops = {
        .get_drvinfo            = asix_get_drvinfo,
        .get_link               = usbnet_get_link,
@@ -167,7 +159,7 @@ static const struct ethtool_ops ax88172a_ethtool_ops = {
        .get_eeprom_len         = asix_get_eeprom_len,
        .get_eeprom             = asix_get_eeprom,
        .set_eeprom             = asix_set_eeprom,
-       .nway_reset             = ax88172a_nway_reset,
+       .nway_reset             = phy_ethtool_nway_reset,
        .get_link_ksettings     = phy_ethtool_get_link_ksettings,
        .set_link_ksettings     = phy_ethtool_set_link_ksettings,
 };
index 36c70d6f736322f6204ac414146497cc0421613c..a3a7db0702d8d7ece5d4999ce2426bef316f0e06 100644 (file)
@@ -1654,6 +1654,19 @@ static const struct driver_info ax88178a_info = {
        .tx_fixup = ax88179_tx_fixup,
 };
 
+static const struct driver_info cypress_GX3_info = {
+       .description = "Cypress GX3 SuperSpeed to Gigabit Ethernet Controller",
+       .bind = ax88179_bind,
+       .unbind = ax88179_unbind,
+       .status = ax88179_status,
+       .link_reset = ax88179_link_reset,
+       .reset = ax88179_reset,
+       .stop = ax88179_stop,
+       .flags = FLAG_ETHER | FLAG_FRAMING_AX,
+       .rx_fixup = ax88179_rx_fixup,
+       .tx_fixup = ax88179_tx_fixup,
+};
+
 static const struct driver_info dlink_dub1312_info = {
        .description = "D-Link DUB-1312 USB 3.0 to Gigabit Ethernet Adapter",
        .bind = ax88179_bind,
@@ -1715,6 +1728,10 @@ static const struct usb_device_id products[] = {
        /* ASIX AX88178A 10/100/1000 */
        USB_DEVICE(0x0b95, 0x178a),
        .driver_info = (unsigned long)&ax88178a_info,
+}, {
+       /* Cypress GX3 SuperSpeed to Gigabit Ethernet Bridge Controller */
+       USB_DEVICE(0x04b4, 0x3610),
+       .driver_info = (unsigned long)&cypress_GX3_info,
 }, {
        /* D-Link DUB-1312 USB 3.0 to Gigabit Ethernet Adapter */
        USB_DEVICE(0x2001, 0x4a00),
index 5662babf05832e8641da4b6aaa7bce9da4fdbc9c..3e37724d30ae7efa2153f53fab3b21dc6cac5af7 100644 (file)
@@ -151,7 +151,7 @@ kalmia_bind(struct usbnet *dev, struct usb_interface *intf)
 
        status = kalmia_init_and_get_ethernet_addr(dev, ethernet_addr);
 
-       if (status < 0) {
+       if (status) {
                usb_set_intfdata(intf, NULL);
                usb_driver_release_interface(driver_of(intf), intf);
                return status;
index c4e748e92db40faed62fe7501ad2453daca95668..0c459e92f1b3b6afd0b62b786d5294467f16fa35 100644 (file)
 #include <linux/ipv6.h>
 #include <linux/mdio.h>
 #include <net/ip6_checksum.h>
+#include <linux/interrupt.h>
+#include <linux/irqdomain.h>
+#include <linux/irq.h>
+#include <linux/irqchip/chained_irq.h>
 #include <linux/microchipphy.h>
 #include "lan78xx.h"
 
 #define DRIVER_AUTHOR  "WOOJUNG HUH <woojung.huh@microchip.com>"
 #define DRIVER_DESC    "LAN78XX USB 3.0 Gigabit Ethernet Devices"
 #define DRIVER_NAME    "lan78xx"
-#define DRIVER_VERSION "1.0.4"
+#define DRIVER_VERSION "1.0.5"
 
 #define TX_TIMEOUT_JIFFIES             (5 * HZ)
 #define THROTTLE_JIFFIES               (HZ / 8)
 /* statistic update interval (mSec) */
 #define STAT_UPDATE_TIMER              (1 * 1000)
 
+/* defines interrupts from interrupt EP */
+#define MAX_INT_EP                     (32)
+#define INT_EP_INTEP                   (31)
+#define INT_EP_OTP_WR_DONE             (28)
+#define INT_EP_EEE_TX_LPI_START                (26)
+#define INT_EP_EEE_TX_LPI_STOP         (25)
+#define INT_EP_EEE_RX_LPI              (24)
+#define INT_EP_MAC_RESET_TIMEOUT       (23)
+#define INT_EP_RDFO                    (22)
+#define INT_EP_TXE                     (21)
+#define INT_EP_USB_STATUS              (20)
+#define INT_EP_TX_DIS                  (19)
+#define INT_EP_RX_DIS                  (18)
+#define INT_EP_PHY                     (17)
+#define INT_EP_DP                      (16)
+#define INT_EP_MAC_ERR                 (15)
+#define INT_EP_TDFU                    (14)
+#define INT_EP_TDFO                    (13)
+#define INT_EP_UTX                     (12)
+#define INT_EP_GPIO_11                 (11)
+#define INT_EP_GPIO_10                 (10)
+#define INT_EP_GPIO_9                  (9)
+#define INT_EP_GPIO_8                  (8)
+#define INT_EP_GPIO_7                  (7)
+#define INT_EP_GPIO_6                  (6)
+#define INT_EP_GPIO_5                  (5)
+#define INT_EP_GPIO_4                  (4)
+#define INT_EP_GPIO_3                  (3)
+#define INT_EP_GPIO_2                  (2)
+#define INT_EP_GPIO_1                  (1)
+#define INT_EP_GPIO_0                  (0)
+
 static const char lan78xx_gstrings[][ETH_GSTRING_LEN] = {
        "RX FCS Errors",
        "RX Alignment Errors",
@@ -296,6 +332,15 @@ struct statstage {
        struct lan78xx_statstage64      curr_stat;
 };
 
+struct irq_domain_data {
+       struct irq_domain       *irqdomain;
+       unsigned int            phyirq;
+       struct irq_chip         *irqchip;
+       irq_flow_handler_t      irq_handler;
+       u32                     irqenable;
+       struct mutex            irq_lock;               /* for irq bus access */
+};
+
 struct lan78xx_net {
        struct net_device       *net;
        struct usb_device       *udev;
@@ -351,6 +396,8 @@ struct lan78xx_net {
 
        int                     delta;
        struct statstage        stats;
+
+       struct irq_domain_data  domain_data;
 };
 
 /* use ethtool to change the level for any given device */
@@ -1096,11 +1143,6 @@ static int lan78xx_link_reset(struct lan78xx_net *dev)
        int ladv, radv, ret;
        u32 buf;
 
-       /* clear PHY interrupt status */
-       ret = phy_read(phydev, LAN88XX_INT_STS);
-       if (unlikely(ret < 0))
-               return -EIO;
-
        /* clear LAN78xx interrupt status */
        ret = lan78xx_write_reg(dev, INT_STS, INT_STS_PHY_INT_);
        if (unlikely(ret < 0))
@@ -1120,16 +1162,12 @@ static int lan78xx_link_reset(struct lan78xx_net *dev)
                if (unlikely(ret < 0))
                        return -EIO;
 
-               phy_mac_interrupt(phydev, 0);
-
                del_timer(&dev->stat_monitor);
        } else if (phydev->link && !dev->link_on) {
                dev->link_on = true;
 
                phy_ethtool_ksettings_get(phydev, &ecmd);
 
-               ret = phy_read(phydev, LAN88XX_INT_STS);
-
                if (dev->udev->speed == USB_SPEED_SUPER) {
                        if (ecmd.base.speed == 1000) {
                                /* disable U2 */
@@ -1163,7 +1201,6 @@ static int lan78xx_link_reset(struct lan78xx_net *dev)
 
                ret = lan78xx_update_flowcontrol(dev, ecmd.base.duplex, ladv,
                                                 radv);
-               phy_mac_interrupt(phydev, 1);
 
                if (!timer_pending(&dev->stat_monitor)) {
                        dev->delta = 1;
@@ -1202,7 +1239,10 @@ static void lan78xx_status(struct lan78xx_net *dev, struct urb *urb)
 
        if (intdata & INT_ENP_PHY_INT) {
                netif_dbg(dev, link, dev->net, "PHY INTR: 0x%08x\n", intdata);
-                         lan78xx_defer_kevent(dev, EVENT_LINK_RESET);
+               lan78xx_defer_kevent(dev, EVENT_LINK_RESET);
+
+               if (dev->domain_data.phyirq > 0)
+                       generic_handle_irq(dev->domain_data.phyirq);
        } else
                netdev_warn(dev->net,
                            "unexpected interrupt: 0x%08x\n", intdata);
@@ -1407,11 +1447,6 @@ static u32 lan78xx_get_link(struct net_device *net)
        return net->phydev->link;
 }
 
-static int lan78xx_nway_reset(struct net_device *net)
-{
-       return phy_start_aneg(net->phydev);
-}
-
 static void lan78xx_get_drvinfo(struct net_device *net,
                                struct ethtool_drvinfo *info)
 {
@@ -1436,62 +1471,12 @@ static void lan78xx_set_msglevel(struct net_device *net, u32 level)
        dev->msg_enable = level;
 }
 
-static int lan78xx_get_mdix_status(struct net_device *net)
-{
-       struct phy_device *phydev = net->phydev;
-       int buf;
-
-       phy_write(phydev, LAN88XX_EXT_PAGE_ACCESS, LAN88XX_EXT_PAGE_SPACE_1);
-       buf = phy_read(phydev, LAN88XX_EXT_MODE_CTRL);
-       phy_write(phydev, LAN88XX_EXT_PAGE_ACCESS, LAN88XX_EXT_PAGE_SPACE_0);
-
-       return buf;
-}
-
-static void lan78xx_set_mdix_status(struct net_device *net, __u8 mdix_ctrl)
-{
-       struct lan78xx_net *dev = netdev_priv(net);
-       struct phy_device *phydev = net->phydev;
-       int buf;
-
-       if (mdix_ctrl == ETH_TP_MDI) {
-               phy_write(phydev, LAN88XX_EXT_PAGE_ACCESS,
-                         LAN88XX_EXT_PAGE_SPACE_1);
-               buf = phy_read(phydev, LAN88XX_EXT_MODE_CTRL);
-               buf &= ~LAN88XX_EXT_MODE_CTRL_MDIX_MASK_;
-               phy_write(phydev, LAN88XX_EXT_MODE_CTRL,
-                         buf | LAN88XX_EXT_MODE_CTRL_MDI_);
-               phy_write(phydev, LAN88XX_EXT_PAGE_ACCESS,
-                         LAN88XX_EXT_PAGE_SPACE_0);
-       } else if (mdix_ctrl == ETH_TP_MDI_X) {
-               phy_write(phydev, LAN88XX_EXT_PAGE_ACCESS,
-                         LAN88XX_EXT_PAGE_SPACE_1);
-               buf = phy_read(phydev, LAN88XX_EXT_MODE_CTRL);
-               buf &= ~LAN88XX_EXT_MODE_CTRL_MDIX_MASK_;
-               phy_write(phydev, LAN88XX_EXT_MODE_CTRL,
-                         buf | LAN88XX_EXT_MODE_CTRL_MDI_X_);
-               phy_write(phydev, LAN88XX_EXT_PAGE_ACCESS,
-                         LAN88XX_EXT_PAGE_SPACE_0);
-       } else if (mdix_ctrl == ETH_TP_MDI_AUTO) {
-               phy_write(phydev, LAN88XX_EXT_PAGE_ACCESS,
-                         LAN88XX_EXT_PAGE_SPACE_1);
-               buf = phy_read(phydev, LAN88XX_EXT_MODE_CTRL);
-               buf &= ~LAN88XX_EXT_MODE_CTRL_MDIX_MASK_;
-               phy_write(phydev, LAN88XX_EXT_MODE_CTRL,
-                         buf | LAN88XX_EXT_MODE_CTRL_AUTO_MDIX_);
-               phy_write(phydev, LAN88XX_EXT_PAGE_ACCESS,
-                         LAN88XX_EXT_PAGE_SPACE_0);
-       }
-       dev->mdix_ctrl = mdix_ctrl;
-}
-
 static int lan78xx_get_link_ksettings(struct net_device *net,
                                      struct ethtool_link_ksettings *cmd)
 {
        struct lan78xx_net *dev = netdev_priv(net);
        struct phy_device *phydev = net->phydev;
        int ret;
-       int buf;
 
        ret = usb_autopm_get_interface(dev->intf);
        if (ret < 0)
@@ -1499,20 +1484,6 @@ static int lan78xx_get_link_ksettings(struct net_device *net,
 
        ret = phy_ethtool_ksettings_get(phydev, cmd);
 
-       buf = lan78xx_get_mdix_status(net);
-
-       buf &= LAN88XX_EXT_MODE_CTRL_MDIX_MASK_;
-       if (buf == LAN88XX_EXT_MODE_CTRL_AUTO_MDIX_) {
-               cmd->base.eth_tp_mdix = ETH_TP_MDI_AUTO;
-               cmd->base.eth_tp_mdix_ctrl = ETH_TP_MDI_AUTO;
-       } else if (buf == LAN88XX_EXT_MODE_CTRL_MDI_) {
-               cmd->base.eth_tp_mdix = ETH_TP_MDI;
-               cmd->base.eth_tp_mdix_ctrl = ETH_TP_MDI;
-       } else if (buf == LAN88XX_EXT_MODE_CTRL_MDI_X_) {
-               cmd->base.eth_tp_mdix = ETH_TP_MDI_X;
-               cmd->base.eth_tp_mdix_ctrl = ETH_TP_MDI_X;
-       }
-
        usb_autopm_put_interface(dev->intf);
 
        return ret;
@@ -1530,9 +1501,6 @@ static int lan78xx_set_link_ksettings(struct net_device *net,
        if (ret < 0)
                return ret;
 
-       if (dev->mdix_ctrl != cmd->base.eth_tp_mdix_ctrl)
-               lan78xx_set_mdix_status(net, cmd->base.eth_tp_mdix_ctrl);
-
        /* change speed & duplex */
        ret = phy_ethtool_ksettings_set(phydev, cmd);
 
@@ -1615,7 +1583,7 @@ exit:
 
 static const struct ethtool_ops lan78xx_ethtool_ops = {
        .get_link       = lan78xx_get_link,
-       .nway_reset     = lan78xx_nway_reset,
+       .nway_reset     = phy_ethtool_nway_reset,
        .get_drvinfo    = lan78xx_get_drvinfo,
        .get_msglevel   = lan78xx_get_msglevel,
        .set_msglevel   = lan78xx_set_msglevel,
@@ -1844,6 +1812,127 @@ static void lan78xx_link_status_change(struct net_device *net)
        }
 }
 
+static int irq_map(struct irq_domain *d, unsigned int irq,
+                  irq_hw_number_t hwirq)
+{
+       struct irq_domain_data *data = d->host_data;
+
+       irq_set_chip_data(irq, data);
+       irq_set_chip_and_handler(irq, data->irqchip, data->irq_handler);
+       irq_set_noprobe(irq);
+
+       return 0;
+}
+
+static void irq_unmap(struct irq_domain *d, unsigned int irq)
+{
+       irq_set_chip_and_handler(irq, NULL, NULL);
+       irq_set_chip_data(irq, NULL);
+}
+
+static const struct irq_domain_ops chip_domain_ops = {
+       .map    = irq_map,
+       .unmap  = irq_unmap,
+};
+
+static void lan78xx_irq_mask(struct irq_data *irqd)
+{
+       struct irq_domain_data *data = irq_data_get_irq_chip_data(irqd);
+
+       data->irqenable &= ~BIT(irqd_to_hwirq(irqd));
+}
+
+static void lan78xx_irq_unmask(struct irq_data *irqd)
+{
+       struct irq_domain_data *data = irq_data_get_irq_chip_data(irqd);
+
+       data->irqenable |= BIT(irqd_to_hwirq(irqd));
+}
+
+static void lan78xx_irq_bus_lock(struct irq_data *irqd)
+{
+       struct irq_domain_data *data = irq_data_get_irq_chip_data(irqd);
+
+       mutex_lock(&data->irq_lock);
+}
+
+static void lan78xx_irq_bus_sync_unlock(struct irq_data *irqd)
+{
+       struct irq_domain_data *data = irq_data_get_irq_chip_data(irqd);
+       struct lan78xx_net *dev =
+                       container_of(data, struct lan78xx_net, domain_data);
+       u32 buf;
+       int ret;
+
+       /* call register access here because irq_bus_lock & irq_bus_sync_unlock
+        * are only two callbacks executed in non-atomic contex.
+        */
+       ret = lan78xx_read_reg(dev, INT_EP_CTL, &buf);
+       if (buf != data->irqenable)
+               ret = lan78xx_write_reg(dev, INT_EP_CTL, data->irqenable);
+
+       mutex_unlock(&data->irq_lock);
+}
+
+static struct irq_chip lan78xx_irqchip = {
+       .name                   = "lan78xx-irqs",
+       .irq_mask               = lan78xx_irq_mask,
+       .irq_unmask             = lan78xx_irq_unmask,
+       .irq_bus_lock           = lan78xx_irq_bus_lock,
+       .irq_bus_sync_unlock    = lan78xx_irq_bus_sync_unlock,
+};
+
+static int lan78xx_setup_irq_domain(struct lan78xx_net *dev)
+{
+       struct device_node *of_node;
+       struct irq_domain *irqdomain;
+       unsigned int irqmap = 0;
+       u32 buf;
+       int ret = 0;
+
+       of_node = dev->udev->dev.parent->of_node;
+
+       mutex_init(&dev->domain_data.irq_lock);
+
+       lan78xx_read_reg(dev, INT_EP_CTL, &buf);
+       dev->domain_data.irqenable = buf;
+
+       dev->domain_data.irqchip = &lan78xx_irqchip;
+       dev->domain_data.irq_handler = handle_simple_irq;
+
+       irqdomain = irq_domain_add_simple(of_node, MAX_INT_EP, 0,
+                                         &chip_domain_ops, &dev->domain_data);
+       if (irqdomain) {
+               /* create mapping for PHY interrupt */
+               irqmap = irq_create_mapping(irqdomain, INT_EP_PHY);
+               if (!irqmap) {
+                       irq_domain_remove(irqdomain);
+
+                       irqdomain = NULL;
+                       ret = -EINVAL;
+               }
+       } else {
+               ret = -EINVAL;
+       }
+
+       dev->domain_data.irqdomain = irqdomain;
+       dev->domain_data.phyirq = irqmap;
+
+       return ret;
+}
+
+static void lan78xx_remove_irq_domain(struct lan78xx_net *dev)
+{
+       if (dev->domain_data.phyirq > 0) {
+               irq_dispose_mapping(dev->domain_data.phyirq);
+
+               if (dev->domain_data.irqdomain)
+                       irq_domain_remove(dev->domain_data.irqdomain);
+       }
+       dev->domain_data.phyirq = 0;
+       dev->domain_data.irqdomain = NULL;
+}
+
 static int lan78xx_phy_init(struct lan78xx_net *dev)
 {
        int ret;
@@ -1856,15 +1945,15 @@ static int lan78xx_phy_init(struct lan78xx_net *dev)
                return -EIO;
        }
 
-       /* Enable PHY interrupts.
-        * We handle our own interrupt
-        */
-       ret = phy_read(phydev, LAN88XX_INT_STS);
-       ret = phy_write(phydev, LAN88XX_INT_MASK,
-                       LAN88XX_INT_MASK_MDINTPIN_EN_ |
-                       LAN88XX_INT_MASK_LINK_CHANGE_);
+       /* if phyirq is not set, use polling mode in phylib */
+       if (dev->domain_data.phyirq > 0)
+               phydev->irq = dev->domain_data.phyirq;
+       else
+               phydev->irq = 0;
+       netdev_dbg(dev->net, "phydev->irq = %d\n", phydev->irq);
 
-       phydev->irq = PHY_IGNORE_INTERRUPT;
+       /* set to AUTOMDIX */
+       phydev->mdix = ETH_TP_MDI_AUTO;
 
        ret = phy_connect_direct(dev->net, phydev,
                                 lan78xx_link_status_change,
@@ -1875,9 +1964,6 @@ static int lan78xx_phy_init(struct lan78xx_net *dev)
                return -EIO;
        }
 
-       /* set to AUTOMDIX */
-       lan78xx_set_mdix_status(dev->net, ETH_TP_MDI_AUTO);
-
        /* MAC doesn't support 1000T Half */
        phydev->supported &= ~SUPPORTED_1000baseT_Half;
 
@@ -2255,11 +2341,6 @@ static int lan78xx_reset(struct lan78xx_net *dev)
        buf |= MAC_CR_AUTO_DUPLEX_ | MAC_CR_AUTO_SPEED_;
        ret = lan78xx_write_reg(dev, MAC_CR, buf);
 
-       /* enable PHY interrupts */
-       ret = lan78xx_read_reg(dev, INT_EP_CTL, &buf);
-       buf |= INT_ENP_PHY_INT;
-       ret = lan78xx_write_reg(dev, INT_EP_CTL, buf);
-
        ret = lan78xx_read_reg(dev, MAC_TX, &buf);
        buf |= MAC_TX_TXEN_;
        ret = lan78xx_write_reg(dev, MAC_TX, buf);
@@ -2668,6 +2749,14 @@ static int lan78xx_bind(struct lan78xx_net *dev, struct usb_interface *intf)
 
        dev->net->hw_features = dev->net->features;
 
+       ret = lan78xx_setup_irq_domain(dev);
+       if (ret < 0) {
+               netdev_warn(dev->net,
+                           "lan78xx_setup_irq_domain() failed : %d", ret);
+               kfree(pdata);
+               return ret;
+       }
+
        /* Init all registers */
        ret = lan78xx_reset(dev);
 
@@ -2684,6 +2773,8 @@ static void lan78xx_unbind(struct lan78xx_net *dev, struct usb_interface *intf)
 {
        struct lan78xx_priv *pdata = (struct lan78xx_priv *)(dev->data[0]);
 
+       lan78xx_remove_irq_domain(dev);
+
        lan78xx_remove_mdio(dev);
 
        if (pdata) {
index 4213c28eeb43e45787621c5b10eef3893635b113..7dc61228c55b8af26f0623fa4cfb9dd512cd97ac 100644 (file)
@@ -1730,7 +1730,7 @@ static u8 r8152_rx_csum(struct r8152 *tp, struct rx_desc *rx_desc)
        u8 checksum = CHECKSUM_NONE;
        u32 opts2, opts3;
 
-       if (tp->version == RTL_VER_01)
+       if (tp->version == RTL_VER_01 || tp->version == RTL_VER_02)
                goto return_result;
 
        opts2 = le32_to_cpu(rx_desc->opts2);
@@ -1745,7 +1745,7 @@ static u8 r8152_rx_csum(struct r8152 *tp, struct rx_desc *rx_desc)
                        checksum = CHECKSUM_NONE;
                else
                        checksum = CHECKSUM_UNNECESSARY;
-       } else if (RD_IPV6_CS) {
+       } else if (opts2 & RD_IPV6_CS) {
                if ((opts2 & RD_UDP_CS) && !(opts3 & UDPF))
                        checksum = CHECKSUM_UNNECESSARY;
                else if ((opts2 & RD_TCP_CS) && !(opts3 & TCPF))
@@ -3266,10 +3266,8 @@ static int rtl8152_open(struct net_device *netdev)
                goto out;
 
        res = usb_autopm_get_interface(tp->intf);
-       if (res < 0) {
-               free_all_mem(tp);
-               goto out;
-       }
+       if (res < 0)
+               goto out_free;
 
        mutex_lock(&tp->control);
 
@@ -3285,10 +3283,9 @@ static int rtl8152_open(struct net_device *netdev)
                        netif_device_detach(tp->netdev);
                netif_warn(tp, ifup, netdev, "intr_urb submit failed: %d\n",
                           res);
-               free_all_mem(tp);
-       } else {
-               napi_enable(&tp->napi);
+               goto out_unlock;
        }
+       napi_enable(&tp->napi);
 
        mutex_unlock(&tp->control);
 
@@ -3297,7 +3294,13 @@ static int rtl8152_open(struct net_device *netdev)
        tp->pm_notifier.notifier_call = rtl_notifier;
        register_pm_notifier(&tp->pm_notifier);
 #endif
+       return 0;
 
+out_unlock:
+       mutex_unlock(&tp->control);
+       usb_autopm_put_interface(tp->intf);
+out_free:
+       free_all_mem(tp);
 out:
        return res;
 }
index 720809f82a0e60aeb7c3f894d74eb2ba8eb90614..a21d93a54cef35c0a96c85dc5d14a2bab677932c 100644 (file)
@@ -1485,6 +1485,11 @@ static void virtnet_free_queues(struct virtnet_info *vi)
                netif_napi_del(&vi->rq[i].napi);
        }
 
+       /* We called napi_hash_del() before netif_napi_del(),
+        * we need to respect an RCU grace period before freeing vi->rq
+        */
+       synchronize_net();
+
        kfree(vi->rq);
        kfree(vi->sq);
 }
@@ -1870,17 +1875,22 @@ static int virtnet_probe(struct virtio_device *vdev)
                mtu = virtio_cread16(vdev,
                                     offsetof(struct virtio_net_config,
                                              mtu));
-               if (mtu < dev->min_mtu || mtu > dev->max_mtu)
+               if (mtu < dev->min_mtu) {
                        __virtio_clear_bit(vdev, VIRTIO_NET_F_MTU);
-               else
+               } else {
                        dev->mtu = mtu;
+                       dev->max_mtu = mtu;
+               }
        }
 
        if (vi->any_header_sg)
                dev->needed_headroom = vi->hdr_len;
 
-       /* Use single tx/rx queue pair as default */
-       vi->curr_queue_pairs = 1;
+       /* Enable multiqueue by default */
+       if (num_online_cpus() >= max_queue_pairs)
+               vi->curr_queue_pairs = max_queue_pairs;
+       else
+               vi->curr_queue_pairs = num_online_cpus();
        vi->max_queue_pairs = max_queue_pairs;
 
        /* Allocate/initialize the rx/tx queues, and invoke find_vqs */
@@ -1911,6 +1921,8 @@ static int virtnet_probe(struct virtio_device *vdev)
                goto free_unregister_netdev;
        }
 
+       virtnet_set_affinity(vi);
+
        /* Assume link up if device can't report link status,
           otherwise get link status from config. */
        if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) {
@@ -2035,23 +2047,33 @@ static struct virtio_device_id id_table[] = {
        { 0 },
 };
 
+#define VIRTNET_FEATURES \
+       VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, \
+       VIRTIO_NET_F_MAC, \
+       VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, \
+       VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, \
+       VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO, \
+       VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ, \
+       VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, \
+       VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, \
+       VIRTIO_NET_F_CTRL_MAC_ADDR, \
+       VIRTIO_NET_F_MTU
+
 static unsigned int features[] = {
-       VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM,
-       VIRTIO_NET_F_GSO, VIRTIO_NET_F_MAC,
-       VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6,
-       VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6,
-       VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO,
-       VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ,
-       VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN,
-       VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ,
-       VIRTIO_NET_F_CTRL_MAC_ADDR,
+       VIRTNET_FEATURES,
+};
+
+static unsigned int features_legacy[] = {
+       VIRTNET_FEATURES,
+       VIRTIO_NET_F_GSO,
        VIRTIO_F_ANY_LAYOUT,
-       VIRTIO_NET_F_MTU,
 };
 
 static struct virtio_driver virtio_net_driver = {
        .feature_table = features,
        .feature_table_size = ARRAY_SIZE(features),
+       .feature_table_legacy = features_legacy,
+       .feature_table_size_legacy = ARRAY_SIZE(features_legacy),
        .driver.name =  KBUILD_MODNAME,
        .driver.owner = THIS_MODULE,
        .id_table =     id_table,
index 0c36de121eb0fcf1a2996a2aea8f27b03aa97b16..e34b1297c96af96e6a9d6a6841c4bbc24e74254e 100644 (file)
@@ -2279,6 +2279,7 @@ vmxnet3_set_mc(struct net_device *netdev)
                                        &adapter->shared->devRead.rxFilterConf;
        u8 *new_table = NULL;
        dma_addr_t new_table_pa = 0;
+       bool new_table_pa_valid = false;
        u32 new_mode = VMXNET3_RXM_UCAST;
 
        if (netdev->flags & IFF_PROMISC) {
@@ -2307,13 +2308,15 @@ vmxnet3_set_mc(struct net_device *netdev)
                                                        new_table,
                                                        sz,
                                                        PCI_DMA_TODEVICE);
+                               if (!dma_mapping_error(&adapter->pdev->dev,
+                                                      new_table_pa)) {
+                                       new_mode |= VMXNET3_RXM_MCAST;
+                                       new_table_pa_valid = true;
+                                       rxConf->mfTablePA = cpu_to_le64(
+                                                               new_table_pa);
+                               }
                        }
-
-                       if (!dma_mapping_error(&adapter->pdev->dev,
-                                              new_table_pa)) {
-                               new_mode |= VMXNET3_RXM_MCAST;
-                               rxConf->mfTablePA = cpu_to_le64(new_table_pa);
-                       } else {
+                       if (!new_table_pa_valid) {
                                netdev_info(netdev,
                                            "failed to copy mcast list, setting ALL_MULTI\n");
                                new_mode |= VMXNET3_RXM_ALL_MULTI;
@@ -2338,7 +2341,7 @@ vmxnet3_set_mc(struct net_device *netdev)
                               VMXNET3_CMD_UPDATE_MAC_FILTERS);
        spin_unlock_irqrestore(&adapter->cmd_lock, flags);
 
-       if (new_table_pa)
+       if (new_table_pa_valid)
                dma_unmap_single(&adapter->pdev->dev, new_table_pa,
                                 rxConf->mfTableLen, PCI_DMA_TODEVICE);
        kfree(new_table);
index 85c271c70d42fd57983f9fba822fb93d097d4590..3bca24651dc0a1c5121348a21d7431d8384d2024 100644 (file)
@@ -272,11 +272,6 @@ static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb,
        if (IS_ERR(rt))
                goto err;
 
-       if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) {
-               ip_rt_put(rt);
-               goto err;
-       }
-
        skb_dst_drop(skb);
 
        /* if dst.dev is loopback or the VRF device again this is locally
@@ -611,6 +606,10 @@ static struct sk_buff *vrf_ip_out(struct net_device *vrf_dev,
        struct dst_entry *dst = NULL;
        struct rtable *rth;
 
+       /* don't divert multicast */
+       if (ipv4_is_multicast(ip_hdr(skb)->daddr))
+               return skb;
+
        rcu_read_lock();
 
        rth = rcu_dereference(vrf->rth);
@@ -956,6 +955,7 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev,
        if (skb->pkt_type == PACKET_LOOPBACK) {
                skb->dev = vrf_dev;
                skb->skb_iif = vrf_dev->ifindex;
+               IP6CB(skb)->flags |= IP6SKB_L3SLAVE;
                skb->pkt_type = PACKET_HOST;
                goto out;
        }
@@ -996,6 +996,10 @@ static struct sk_buff *vrf_ip_rcv(struct net_device *vrf_dev,
 {
        skb->dev = vrf_dev;
        skb->skb_iif = vrf_dev->ifindex;
+       IPCB(skb)->flags |= IPSKB_L3SLAVE;
+
+       if (ipv4_is_multicast(ip_hdr(skb)->daddr))
+               goto out;
 
        /* loopback traffic; do not push through packet taps again.
         * Reset pkt_type for upper layers to process skb
@@ -1160,8 +1164,19 @@ static int vrf_add_fib_rules(const struct net_device *dev)
        if (err < 0)
                goto ipv6_err;
 
+#if IS_ENABLED(CONFIG_IP_MROUTE_MULTIPLE_TABLES)
+       err = vrf_fib_rule(dev, RTNL_FAMILY_IPMR, true);
+       if (err < 0)
+               goto ipmr_err;
+#endif
+
        return 0;
 
+#if IS_ENABLED(CONFIG_IP_MROUTE_MULTIPLE_TABLES)
+ipmr_err:
+       vrf_fib_rule(dev, AF_INET6,  false);
+#endif
+
 ipv6_err:
        vrf_fib_rule(dev, AF_INET,  false);
 
index c0170b6956bbc9b0b42579d81fbef50eb3338af3..21e92be6e56c49d5a1c4b12c790d7b5e79f2f793 100644 (file)
@@ -52,7 +52,7 @@ static bool log_ecn_error = true;
 module_param(log_ecn_error, bool, 0644);
 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
 
-static int vxlan_net_id;
+static unsigned int vxlan_net_id;
 static struct rtnl_link_ops vxlan_link_ops;
 
 static const u8 all_zeros_mac[ETH_ALEN + 2];
@@ -583,7 +583,7 @@ static struct sk_buff **vxlan_gro_receive(struct sock *sk,
                }
        }
 
-       pp = eth_gro_receive(head, skb);
+       pp = call_gro_receive(eth_gro_receive, head, skb);
        flush = 0;
 
 out:
@@ -943,17 +943,22 @@ static bool vxlan_snoop(struct net_device *dev,
 static bool vxlan_group_used(struct vxlan_net *vn, struct vxlan_dev *dev)
 {
        struct vxlan_dev *vxlan;
+       struct vxlan_sock *sock4;
+#if IS_ENABLED(CONFIG_IPV6)
+       struct vxlan_sock *sock6;
+#endif
        unsigned short family = dev->default_dst.remote_ip.sa.sa_family;
 
+       sock4 = rtnl_dereference(dev->vn4_sock);
+
        /* The vxlan_sock is only used by dev, leaving group has
         * no effect on other vxlan devices.
         */
-       if (family == AF_INET && dev->vn4_sock &&
-           atomic_read(&dev->vn4_sock->refcnt) == 1)
+       if (family == AF_INET && sock4 && atomic_read(&sock4->refcnt) == 1)
                return false;
 #if IS_ENABLED(CONFIG_IPV6)
-       if (family == AF_INET6 && dev->vn6_sock &&
-           atomic_read(&dev->vn6_sock->refcnt) == 1)
+       sock6 = rtnl_dereference(dev->vn6_sock);
+       if (family == AF_INET6 && sock6 && atomic_read(&sock6->refcnt) == 1)
                return false;
 #endif
 
@@ -961,10 +966,12 @@ static bool vxlan_group_used(struct vxlan_net *vn, struct vxlan_dev *dev)
                if (!netif_running(vxlan->dev) || vxlan == dev)
                        continue;
 
-               if (family == AF_INET && vxlan->vn4_sock != dev->vn4_sock)
+               if (family == AF_INET &&
+                   rtnl_dereference(vxlan->vn4_sock) != sock4)
                        continue;
 #if IS_ENABLED(CONFIG_IPV6)
-               if (family == AF_INET6 && vxlan->vn6_sock != dev->vn6_sock)
+               if (family == AF_INET6 &&
+                   rtnl_dereference(vxlan->vn6_sock) != sock6)
                        continue;
 #endif
 
@@ -1005,22 +1012,25 @@ static bool __vxlan_sock_release_prep(struct vxlan_sock *vs)
 
 static void vxlan_sock_release(struct vxlan_dev *vxlan)
 {
-       bool ipv4 = __vxlan_sock_release_prep(vxlan->vn4_sock);
+       struct vxlan_sock *sock4 = rtnl_dereference(vxlan->vn4_sock);
 #if IS_ENABLED(CONFIG_IPV6)
-       bool ipv6 = __vxlan_sock_release_prep(vxlan->vn6_sock);
+       struct vxlan_sock *sock6 = rtnl_dereference(vxlan->vn6_sock);
+
+       rcu_assign_pointer(vxlan->vn6_sock, NULL);
 #endif
 
+       rcu_assign_pointer(vxlan->vn4_sock, NULL);
        synchronize_net();
 
-       if (ipv4) {
-               udp_tunnel_sock_release(vxlan->vn4_sock->sock);
-               kfree(vxlan->vn4_sock);
+       if (__vxlan_sock_release_prep(sock4)) {
+               udp_tunnel_sock_release(sock4->sock);
+               kfree(sock4);
        }
 
 #if IS_ENABLED(CONFIG_IPV6)
-       if (ipv6) {
-               udp_tunnel_sock_release(vxlan->vn6_sock->sock);
-               kfree(vxlan->vn6_sock);
+       if (__vxlan_sock_release_prep(sock6)) {
+               udp_tunnel_sock_release(sock6->sock);
+               kfree(sock6);
        }
 #endif
 }
@@ -1036,18 +1046,21 @@ static int vxlan_igmp_join(struct vxlan_dev *vxlan)
        int ret = -EINVAL;
 
        if (ip->sa.sa_family == AF_INET) {
+               struct vxlan_sock *sock4 = rtnl_dereference(vxlan->vn4_sock);
                struct ip_mreqn mreq = {
                        .imr_multiaddr.s_addr   = ip->sin.sin_addr.s_addr,
                        .imr_ifindex            = ifindex,
                };
 
-               sk = vxlan->vn4_sock->sock->sk;
+               sk = sock4->sock->sk;
                lock_sock(sk);
                ret = ip_mc_join_group(sk, &mreq);
                release_sock(sk);
 #if IS_ENABLED(CONFIG_IPV6)
        } else {
-               sk = vxlan->vn6_sock->sock->sk;
+               struct vxlan_sock *sock6 = rtnl_dereference(vxlan->vn6_sock);
+
+               sk = sock6->sock->sk;
                lock_sock(sk);
                ret = ipv6_stub->ipv6_sock_mc_join(sk, ifindex,
                                                   &ip->sin6.sin6_addr);
@@ -1067,18 +1080,21 @@ static int vxlan_igmp_leave(struct vxlan_dev *vxlan)
        int ret = -EINVAL;
 
        if (ip->sa.sa_family == AF_INET) {
+               struct vxlan_sock *sock4 = rtnl_dereference(vxlan->vn4_sock);
                struct ip_mreqn mreq = {
                        .imr_multiaddr.s_addr   = ip->sin.sin_addr.s_addr,
                        .imr_ifindex            = ifindex,
                };
 
-               sk = vxlan->vn4_sock->sock->sk;
+               sk = sock4->sock->sk;
                lock_sock(sk);
                ret = ip_mc_leave_group(sk, &mreq);
                release_sock(sk);
 #if IS_ENABLED(CONFIG_IPV6)
        } else {
-               sk = vxlan->vn6_sock->sock->sk;
+               struct vxlan_sock *sock6 = rtnl_dereference(vxlan->vn6_sock);
+
+               sk = sock6->sock->sk;
                lock_sock(sk);
                ret = ipv6_stub->ipv6_sock_mc_drop(sk, ifindex,
                                                   &ip->sin6.sin6_addr);
@@ -1734,21 +1750,16 @@ static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst,
        }
 
        min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len
-                       + VXLAN_HLEN + iphdr_len
-                       + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
+                       + VXLAN_HLEN + iphdr_len;
 
        /* Need space for new headers (invalidates iph ptr) */
        err = skb_cow_head(skb, min_headroom);
        if (unlikely(err))
-               goto out_free;
-
-       skb = vlan_hwaccel_push_inside(skb);
-       if (WARN_ON(!skb))
-               return -ENOMEM;
+               return err;
 
        err = iptunnel_handle_offloads(skb, type);
        if (err)
-               goto out_free;
+               return err;
 
        vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
        vxh->vx_flags = VXLAN_HF_VNI;
@@ -1772,19 +1783,16 @@ static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst,
        if (vxflags & VXLAN_F_GPE) {
                err = vxlan_build_gpe_hdr(vxh, vxflags, skb->protocol);
                if (err < 0)
-                       goto out_free;
+                       return err;
                inner_protocol = skb->protocol;
        }
 
        skb_set_inner_protocol(skb, inner_protocol);
        return 0;
-
-out_free:
-       kfree_skb(skb);
-       return err;
 }
 
-static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan,
+static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan, struct net_device *dev,
+                                     struct vxlan_sock *sock4,
                                      struct sk_buff *skb, int oif, u8 tos,
                                      __be32 daddr, __be32 *saddr,
                                      struct dst_cache *dst_cache,
@@ -1794,6 +1802,9 @@ static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan,
        struct rtable *rt = NULL;
        struct flowi4 fl4;
 
+       if (!sock4)
+               return ERR_PTR(-EIO);
+
        if (tos && !info)
                use_cache = false;
        if (use_cache) {
@@ -1811,16 +1822,27 @@ static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan,
        fl4.saddr = *saddr;
 
        rt = ip_route_output_key(vxlan->net, &fl4);
-       if (!IS_ERR(rt)) {
+       if (likely(!IS_ERR(rt))) {
+               if (rt->dst.dev == dev) {
+                       netdev_dbg(dev, "circular route to %pI4\n", &daddr);
+                       ip_rt_put(rt);
+                       return ERR_PTR(-ELOOP);
+               }
+
                *saddr = fl4.saddr;
                if (use_cache)
                        dst_cache_set_ip4(dst_cache, &rt->dst, fl4.saddr);
+       } else {
+               netdev_dbg(dev, "no route to %pI4\n", &daddr);
+               return ERR_PTR(-ENETUNREACH);
        }
        return rt;
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
 static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan,
+                                         struct net_device *dev,
+                                         struct vxlan_sock *sock6,
                                          struct sk_buff *skb, int oif, u8 tos,
                                          __be32 label,
                                          const struct in6_addr *daddr,
@@ -1833,6 +1855,9 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan,
        struct flowi6 fl6;
        int err;
 
+       if (!sock6)
+               return ERR_PTR(-EIO);
+
        if (tos && !info)
                use_cache = false;
        if (use_cache) {
@@ -1850,10 +1875,18 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan,
        fl6.flowi6_proto = IPPROTO_UDP;
 
        err = ipv6_stub->ipv6_dst_lookup(vxlan->net,
-                                        vxlan->vn6_sock->sock->sk,
+                                        sock6->sock->sk,
                                         &ndst, &fl6);
-       if (err < 0)
-               return ERR_PTR(err);
+       if (unlikely(err < 0)) {
+               netdev_dbg(dev, "no route to %pI6\n", daddr);
+               return ERR_PTR(-ENETUNREACH);
+       }
+
+       if (unlikely(ndst->dev == dev)) {
+               netdev_dbg(dev, "circular route to %pI6\n", daddr);
+               dst_release(ndst);
+               return ERR_PTR(-ELOOP);
+       }
 
        *saddr = fl6.saddr;
        if (use_cache)
@@ -1907,23 +1940,55 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan,
        }
 }
 
+static int encap_bypass_if_local(struct sk_buff *skb, struct net_device *dev,
+                                struct vxlan_dev *vxlan, union vxlan_addr *daddr,
+                                __be32 dst_port, __be32 vni, struct dst_entry *dst,
+                                u32 rt_flags)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+       /* IPv6 rt-flags are checked against RTF_LOCAL, but the value of
+        * RTF_LOCAL is equal to RTCF_LOCAL. So to keep code simple
+        * we can use RTCF_LOCAL which works for ipv4 and ipv6 route entry.
+        */
+       BUILD_BUG_ON(RTCF_LOCAL != RTF_LOCAL);
+#endif
+       /* Bypass encapsulation if the destination is local */
+       if (rt_flags & RTCF_LOCAL &&
+           !(rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))) {
+               struct vxlan_dev *dst_vxlan;
+
+               dst_release(dst);
+               dst_vxlan = vxlan_find_vni(vxlan->net, vni,
+                                          daddr->sa.sa_family, dst_port,
+                                          vxlan->flags);
+               if (!dst_vxlan) {
+                       dev->stats.tx_errors++;
+                       kfree_skb(skb);
+
+                       return -ENOENT;
+               }
+               vxlan_encap_bypass(skb, vxlan, dst_vxlan);
+               return 1;
+       }
+
+       return 0;
+}
+
 static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
                           struct vxlan_rdst *rdst, bool did_rsc)
 {
        struct dst_cache *dst_cache;
        struct ip_tunnel_info *info;
        struct vxlan_dev *vxlan = netdev_priv(dev);
-       struct sock *sk;
-       struct rtable *rt = NULL;
-       const struct iphdr *old_iph;
+       const struct iphdr *old_iph = ip_hdr(skb);
        union vxlan_addr *dst;
        union vxlan_addr remote_ip, local_ip;
        union vxlan_addr *src;
        struct vxlan_metadata _md;
        struct vxlan_metadata *md = &_md;
        __be16 src_port = 0, dst_port;
+       struct dst_entry *ndst = NULL;
        __be32 vni, label;
-       __be16 df = 0;
        __u8 tos, ttl;
        int err;
        u32 flags = vxlan->flags;
@@ -1933,19 +1998,40 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
        info = skb_tunnel_info(skb);
 
        if (rdst) {
+               dst = &rdst->remote_ip;
+               if (vxlan_addr_any(dst)) {
+                       if (did_rsc) {
+                               /* short-circuited back to local bridge */
+                               vxlan_encap_bypass(skb, vxlan, vxlan);
+                               return;
+                       }
+                       goto drop;
+               }
+
                dst_port = rdst->remote_port ? rdst->remote_port : vxlan->cfg.dst_port;
                vni = rdst->remote_vni;
-               dst = &rdst->remote_ip;
                src = &vxlan->cfg.saddr;
                dst_cache = &rdst->dst_cache;
+               md->gbp = skb->mark;
+               ttl = vxlan->cfg.ttl;
+               if (!ttl && vxlan_addr_multicast(dst))
+                       ttl = 1;
+
+               tos = vxlan->cfg.tos;
+               if (tos == 1)
+                       tos = ip_tunnel_get_dsfield(old_iph, skb);
+
+               if (dst->sa.sa_family == AF_INET)
+                       udp_sum = !(flags & VXLAN_F_UDP_ZERO_CSUM_TX);
+               else
+                       udp_sum = !(flags & VXLAN_F_UDP_ZERO_CSUM6_TX);
+               label = vxlan->cfg.label;
        } else {
                if (!info) {
                        WARN_ONCE(1, "%s: Missing encapsulation instructions\n",
                                  dev->name);
                        goto drop;
                }
-               dst_port = info->key.tp_dst ? : vxlan->cfg.dst_port;
-               vni = tunnel_id_to_key32(info->key.tun_id);
                remote_ip.sa.sa_family = ip_tunnel_info_af(info);
                if (remote_ip.sa.sa_family == AF_INET) {
                        remote_ip.sin.sin_addr.s_addr = info->key.u.ipv4.dst;
@@ -1955,179 +2041,111 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
                        local_ip.sin6.sin6_addr = info->key.u.ipv6.src;
                }
                dst = &remote_ip;
+               dst_port = info->key.tp_dst ? : vxlan->cfg.dst_port;
+               vni = tunnel_id_to_key32(info->key.tun_id);
                src = &local_ip;
                dst_cache = &info->dst_cache;
-       }
-
-       if (vxlan_addr_any(dst)) {
-               if (did_rsc) {
-                       /* short-circuited back to local bridge */
-                       vxlan_encap_bypass(skb, vxlan, vxlan);
-                       return;
-               }
-               goto drop;
-       }
-
-       old_iph = ip_hdr(skb);
-
-       ttl = vxlan->cfg.ttl;
-       if (!ttl && vxlan_addr_multicast(dst))
-               ttl = 1;
-
-       tos = vxlan->cfg.tos;
-       if (tos == 1)
-               tos = ip_tunnel_get_dsfield(old_iph, skb);
-
-       label = vxlan->cfg.label;
-       src_port = udp_flow_src_port(dev_net(dev), skb, vxlan->cfg.port_min,
-                                    vxlan->cfg.port_max, true);
-
-       if (info) {
+               if (info->options_len)
+                       md = ip_tunnel_info_opts(info);
                ttl = info->key.ttl;
                tos = info->key.tos;
                label = info->key.label;
                udp_sum = !!(info->key.tun_flags & TUNNEL_CSUM);
-
-               if (info->options_len)
-                       md = ip_tunnel_info_opts(info);
-       } else {
-               md->gbp = skb->mark;
        }
+       src_port = udp_flow_src_port(dev_net(dev), skb, vxlan->cfg.port_min,
+                                    vxlan->cfg.port_max, true);
 
        if (dst->sa.sa_family == AF_INET) {
-               if (!vxlan->vn4_sock)
-                       goto drop;
-               sk = vxlan->vn4_sock->sock->sk;
+               struct vxlan_sock *sock4 = rcu_dereference(vxlan->vn4_sock);
+               struct rtable *rt;
+               __be16 df = 0;
 
-               rt = vxlan_get_route(vxlan, skb,
+               rt = vxlan_get_route(vxlan, dev, sock4, skb,
                                     rdst ? rdst->remote_ifindex : 0, tos,
                                     dst->sin.sin_addr.s_addr,
                                     &src->sin.sin_addr.s_addr,
                                     dst_cache, info);
                if (IS_ERR(rt)) {
-                       netdev_dbg(dev, "no route to %pI4\n",
-                                  &dst->sin.sin_addr.s_addr);
-                       dev->stats.tx_carrier_errors++;
+                       err = PTR_ERR(rt);
                        goto tx_error;
                }
 
-               if (rt->dst.dev == dev) {
-                       netdev_dbg(dev, "circular route to %pI4\n",
-                                  &dst->sin.sin_addr.s_addr);
-                       dev->stats.collisions++;
-                       goto rt_tx_error;
-               }
-
                /* Bypass encapsulation if the destination is local */
-               if (!info && rt->rt_flags & RTCF_LOCAL &&
-                   !(rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))) {
-                       struct vxlan_dev *dst_vxlan;
-
-                       ip_rt_put(rt);
-                       dst_vxlan = vxlan_find_vni(vxlan->net, vni,
-                                                  dst->sa.sa_family, dst_port,
-                                                  vxlan->flags);
-                       if (!dst_vxlan)
-                               goto tx_error;
-                       vxlan_encap_bypass(skb, vxlan, dst_vxlan);
-                       return;
-               }
-
-               if (!info)
-                       udp_sum = !(flags & VXLAN_F_UDP_ZERO_CSUM_TX);
-               else if (info->key.tun_flags & TUNNEL_DONT_FRAGMENT)
+               if (!info) {
+                       err = encap_bypass_if_local(skb, dev, vxlan, dst,
+                                                   dst_port, vni, &rt->dst,
+                                                   rt->rt_flags);
+                       if (err)
+                               return;
+               } else if (info->key.tun_flags & TUNNEL_DONT_FRAGMENT) {
                        df = htons(IP_DF);
+               }
 
+               ndst = &rt->dst;
                tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
                ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
-               err = vxlan_build_skb(skb, &rt->dst, sizeof(struct iphdr),
+               err = vxlan_build_skb(skb, ndst, sizeof(struct iphdr),
                                      vni, md, flags, udp_sum);
                if (err < 0)
-                       goto xmit_tx_error;
+                       goto tx_error;
 
-               udp_tunnel_xmit_skb(rt, sk, skb, src->sin.sin_addr.s_addr,
+               udp_tunnel_xmit_skb(rt, sock4->sock->sk, skb, src->sin.sin_addr.s_addr,
                                    dst->sin.sin_addr.s_addr, tos, ttl, df,
                                    src_port, dst_port, xnet, !udp_sum);
 #if IS_ENABLED(CONFIG_IPV6)
        } else {
-               struct dst_entry *ndst;
-               u32 rt6i_flags;
-
-               if (!vxlan->vn6_sock)
-                       goto drop;
-               sk = vxlan->vn6_sock->sock->sk;
+               struct vxlan_sock *sock6 = rcu_dereference(vxlan->vn6_sock);
 
-               ndst = vxlan6_get_route(vxlan, skb,
+               ndst = vxlan6_get_route(vxlan, dev, sock6, skb,
                                        rdst ? rdst->remote_ifindex : 0, tos,
                                        label, &dst->sin6.sin6_addr,
                                        &src->sin6.sin6_addr,
                                        dst_cache, info);
                if (IS_ERR(ndst)) {
-                       netdev_dbg(dev, "no route to %pI6\n",
-                                  &dst->sin6.sin6_addr);
-                       dev->stats.tx_carrier_errors++;
+                       err = PTR_ERR(ndst);
+                       ndst = NULL;
                        goto tx_error;
                }
 
-               if (ndst->dev == dev) {
-                       netdev_dbg(dev, "circular route to %pI6\n",
-                                  &dst->sin6.sin6_addr);
-                       dst_release(ndst);
-                       dev->stats.collisions++;
-                       goto tx_error;
-               }
+               if (!info) {
+                       u32 rt6i_flags = ((struct rt6_info *)ndst)->rt6i_flags;
 
-               /* Bypass encapsulation if the destination is local */
-               rt6i_flags = ((struct rt6_info *)ndst)->rt6i_flags;
-               if (!info && rt6i_flags & RTF_LOCAL &&
-                   !(rt6i_flags & (RTCF_BROADCAST | RTCF_MULTICAST))) {
-                       struct vxlan_dev *dst_vxlan;
-
-                       dst_release(ndst);
-                       dst_vxlan = vxlan_find_vni(vxlan->net, vni,
-                                                  dst->sa.sa_family, dst_port,
-                                                  vxlan->flags);
-                       if (!dst_vxlan)
-                               goto tx_error;
-                       vxlan_encap_bypass(skb, vxlan, dst_vxlan);
-                       return;
+                       err = encap_bypass_if_local(skb, dev, vxlan, dst,
+                                                   dst_port, vni, ndst,
+                                                   rt6i_flags);
+                       if (err)
+                               return;
                }
 
-               if (!info)
-                       udp_sum = !(flags & VXLAN_F_UDP_ZERO_CSUM6_TX);
-
                tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
                ttl = ttl ? : ip6_dst_hoplimit(ndst);
                skb_scrub_packet(skb, xnet);
                err = vxlan_build_skb(skb, ndst, sizeof(struct ipv6hdr),
                                      vni, md, flags, udp_sum);
-               if (err < 0) {
-                       dst_release(ndst);
-                       dev->stats.tx_errors++;
-                       return;
-               }
-               udp_tunnel6_xmit_skb(ndst, sk, skb, dev,
+               if (err < 0)
+                       goto tx_error;
+
+               udp_tunnel6_xmit_skb(ndst, sock6->sock->sk, skb, dev,
                                     &src->sin6.sin6_addr,
                                     &dst->sin6.sin6_addr, tos, ttl,
                                     label, src_port, dst_port, !udp_sum);
 #endif
        }
-
        return;
 
 drop:
        dev->stats.tx_dropped++;
-       goto tx_free;
+       dev_kfree_skb(skb);
+       return;
 
-xmit_tx_error:
-       /* skb is already freed. */
-       skb = NULL;
-rt_tx_error:
-       ip_rt_put(rt);
 tx_error:
+       if (err == -ELOOP)
+               dev->stats.collisions++;
+       else if (err == -ENETUNREACH)
+               dev->stats.tx_carrier_errors++;
+       dst_release(ndst);
        dev->stats.tx_errors++;
-tx_free:
-       dev_kfree_skb(skb);
+       kfree_skb(skb);
 }
 
 /* Transmit local packets over Vxlan
@@ -2403,11 +2421,10 @@ static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
        dport = info->key.tp_dst ? : vxlan->cfg.dst_port;
 
        if (ip_tunnel_info_af(info) == AF_INET) {
+               struct vxlan_sock *sock4 = rcu_dereference(vxlan->vn4_sock);
                struct rtable *rt;
 
-               if (!vxlan->vn4_sock)
-                       return -EINVAL;
-               rt = vxlan_get_route(vxlan, skb, 0, info->key.tos,
+               rt = vxlan_get_route(vxlan, dev, sock4, skb, 0, info->key.tos,
                                     info->key.u.ipv4.dst,
                                     &info->key.u.ipv4.src, NULL, info);
                if (IS_ERR(rt))
@@ -2415,11 +2432,10 @@ static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
                ip_rt_put(rt);
        } else {
 #if IS_ENABLED(CONFIG_IPV6)
+               struct vxlan_sock *sock6 = rcu_dereference(vxlan->vn6_sock);
                struct dst_entry *ndst;
 
-               if (!vxlan->vn6_sock)
-                       return -EINVAL;
-               ndst = vxlan6_get_route(vxlan, skb, 0, info->key.tos,
+               ndst = vxlan6_get_route(vxlan, dev, sock6, skb, 0, info->key.tos,
                                        info->key.label, &info->key.u.ipv6.dst,
                                        &info->key.u.ipv6.src, NULL, info);
                if (IS_ERR(ndst))
@@ -2507,10 +2523,8 @@ static void vxlan_setup(struct net_device *dev)
        dev->features   |= NETIF_F_GSO_SOFTWARE;
 
        dev->vlan_features = dev->features;
-       dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
        dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM;
        dev->hw_features |= NETIF_F_GSO_SOFTWARE;
-       dev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
        netif_keep_dst(dev);
        dev->priv_flags |= IFF_NO_QUEUE;
 
@@ -2728,10 +2742,10 @@ static int __vxlan_sock_add(struct vxlan_dev *vxlan, bool ipv6)
                return PTR_ERR(vs);
 #if IS_ENABLED(CONFIG_IPV6)
        if (ipv6)
-               vxlan->vn6_sock = vs;
+               rcu_assign_pointer(vxlan->vn6_sock, vs);
        else
 #endif
-               vxlan->vn4_sock = vs;
+               rcu_assign_pointer(vxlan->vn4_sock, vs);
        vxlan_vs_add_dev(vs, vxlan);
        return 0;
 }
@@ -2742,9 +2756,9 @@ static int vxlan_sock_add(struct vxlan_dev *vxlan)
        bool metadata = vxlan->flags & VXLAN_F_COLLECT_METADATA;
        int ret = 0;
 
-       vxlan->vn4_sock = NULL;
+       RCU_INIT_POINTER(vxlan->vn4_sock, NULL);
 #if IS_ENABLED(CONFIG_IPV6)
-       vxlan->vn6_sock = NULL;
+       RCU_INIT_POINTER(vxlan->vn6_sock, NULL);
        if (ipv6 || metadata)
                ret = __vxlan_sock_add(vxlan, true);
 #endif
index 33ab3345d333b68f983b61152d074e20f16e5e1e..4e9fe75d70675d052ad2f2be3f513b5c20dfef9a 100644 (file)
@@ -294,7 +294,7 @@ config FSL_UCC_HDLC
 config SLIC_DS26522
        tristate "Slic Maxim ds26522 card support"
        depends on SPI
-       depends on FSL_SOC || ARCH_MXC || ARCH_LAYERSCAPE
+       depends on FSL_SOC || ARCH_MXC || ARCH_LAYERSCAPE || COMPILE_TEST
        help
          This module initializes and configures the slic maxim card
          in T1 or E1 mode.
index d06a887a2352141bfb0e72c46e37ad6a28f6a1de..b776a0ab106c0d55b1b7cdf14b79d91621d27aa1 100644 (file)
@@ -223,12 +223,19 @@ static int slic_ds26522_probe(struct spi_device *spi)
        return ret;
 }
 
+static const struct spi_device_id slic_ds26522_id[] = {
+       { .name = "ds26522" },
+       { /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(spi, slic_ds26522_id);
+
 static const struct of_device_id slic_ds26522_match[] = {
        {
         .compatible = "maxim,ds26522",
         },
        {},
 };
+MODULE_DEVICE_TABLE(of, slic_ds26522_match);
 
 static struct spi_driver slic_ds26522_driver = {
        .driver = {
@@ -239,6 +246,7 @@ static struct spi_driver slic_ds26522_driver = {
                   },
        .probe = slic_ds26522_probe,
        .remove = slic_ds26522_remove,
+       .id_table = slic_ds26522_id,
 };
 
 static int __init slic_ds26522_init(void)
index 8c8edaf1bba612ca3592fbab24077a2ec8b3f1e0..8f5a3f4a43f2d0d215ea116f89417cf21b5c9d51 100644 (file)
@@ -17,6 +17,19 @@ menuconfig WLAN
 
 if WLAN
 
+config WIRELESS_WDS
+       bool "mac80211-based legacy WDS support" if EXPERT
+       help
+         This option enables the deprecated WDS support, the newer
+         mac80211-based 4-addr AP/client support supersedes it with
+         a much better feature set (HT, VHT, ...)
+
+         We plan to remove this option and code, so if you find
+         that you have to enable it, please let us know on the
+         linux-wireless@vger.kernel.org mailing list, so we can
+         help you migrate to 4-addr AP/client (or, if it's really
+         necessary, give up on our plan of removing it).
+
 source "drivers/net/wireless/admtek/Kconfig"
 source "drivers/net/wireless/ath/Kconfig"
 source "drivers/net/wireless/atmel/Kconfig"
index 6d06ec5cd087fcfc6f928c2689043263f61b8c2a..09ff8b8a644116da3532b756e29b376f64545267 100644 (file)
@@ -451,6 +451,7 @@ struct ath10k_debug {
        u32 pktlog_filter;
        u32 reg_addr;
        u32 nf_cal_period;
+       void *cal_data;
 
        struct ath10k_fw_crash_data *fw_crash_data;
 };
index 832da6ed9f13c002ca0c597793c6da618b6c29d1..82a4c67f3672ba8e7f05951ee3dd6916ac1ca356 100644 (file)
@@ -30,6 +30,8 @@
 /* ms */
 #define ATH10K_DEBUG_HTT_STATS_INTERVAL 1000
 
+#define ATH10K_DEBUG_CAL_DATA_LEN 12064
+
 #define ATH10K_FW_CRASH_DUMP_VERSION 1
 
 /**
@@ -1451,56 +1453,51 @@ static const struct file_operations fops_fw_dbglog = {
        .llseek = default_llseek,
 };
 
-static int ath10k_debug_cal_data_open(struct inode *inode, struct file *file)
+static int ath10k_debug_cal_data_fetch(struct ath10k *ar)
 {
-       struct ath10k *ar = inode->i_private;
-       void *buf;
        u32 hi_addr;
        __le32 addr;
        int ret;
 
-       mutex_lock(&ar->conf_mutex);
-
-       if (ar->state != ATH10K_STATE_ON &&
-           ar->state != ATH10K_STATE_UTF) {
-               ret = -ENETDOWN;
-               goto err;
-       }
+       lockdep_assert_held(&ar->conf_mutex);
 
-       buf = vmalloc(ar->hw_params.cal_data_len);
-       if (!buf) {
-               ret = -ENOMEM;
-               goto err;
-       }
+       if (WARN_ON(ar->hw_params.cal_data_len > ATH10K_DEBUG_CAL_DATA_LEN))
+               return -EINVAL;
 
        hi_addr = host_interest_item_address(HI_ITEM(hi_board_data));
 
        ret = ath10k_hif_diag_read(ar, hi_addr, &addr, sizeof(addr));
        if (ret) {
-               ath10k_warn(ar, "failed to read hi_board_data address: %d\n", ret);
-               goto err_vfree;
+               ath10k_warn(ar, "failed to read hi_board_data address: %d\n",
+                           ret);
+               return ret;
        }
 
-       ret = ath10k_hif_diag_read(ar, le32_to_cpu(addr), buf,
+       ret = ath10k_hif_diag_read(ar, le32_to_cpu(addr), ar->debug.cal_data,
                                   ar->hw_params.cal_data_len);
        if (ret) {
                ath10k_warn(ar, "failed to read calibration data: %d\n", ret);
-               goto err_vfree;
+               return ret;
        }
 
-       file->private_data = buf;
+       return 0;
+}
 
-       mutex_unlock(&ar->conf_mutex);
+static int ath10k_debug_cal_data_open(struct inode *inode, struct file *file)
+{
+       struct ath10k *ar = inode->i_private;
 
-       return 0;
+       mutex_lock(&ar->conf_mutex);
 
-err_vfree:
-       vfree(buf);
+       if (ar->state == ATH10K_STATE_ON ||
+           ar->state == ATH10K_STATE_UTF) {
+               ath10k_debug_cal_data_fetch(ar);
+       }
 
-err:
+       file->private_data = ar;
        mutex_unlock(&ar->conf_mutex);
 
-       return ret;
+       return 0;
 }
 
 static ssize_t ath10k_debug_cal_data_read(struct file *file,
@@ -1508,18 +1505,16 @@ static ssize_t ath10k_debug_cal_data_read(struct file *file,
                                          size_t count, loff_t *ppos)
 {
        struct ath10k *ar = file->private_data;
-       void *buf = file->private_data;
 
-       return simple_read_from_buffer(user_buf, count, ppos,
-                                      buf, ar->hw_params.cal_data_len);
-}
+       mutex_lock(&ar->conf_mutex);
 
-static int ath10k_debug_cal_data_release(struct inode *inode,
-                                        struct file *file)
-{
-       vfree(file->private_data);
+       count = simple_read_from_buffer(user_buf, count, ppos,
+                                       ar->debug.cal_data,
+                                       ar->hw_params.cal_data_len);
 
-       return 0;
+       mutex_unlock(&ar->conf_mutex);
+
+       return count;
 }
 
 static ssize_t ath10k_write_ani_enable(struct file *file,
@@ -1580,7 +1575,6 @@ static const struct file_operations fops_ani_enable = {
 static const struct file_operations fops_cal_data = {
        .open = ath10k_debug_cal_data_open,
        .read = ath10k_debug_cal_data_read,
-       .release = ath10k_debug_cal_data_release,
        .owner = THIS_MODULE,
        .llseek = default_llseek,
 };
@@ -1932,6 +1926,8 @@ void ath10k_debug_stop(struct ath10k *ar)
 {
        lockdep_assert_held(&ar->conf_mutex);
 
+       ath10k_debug_cal_data_fetch(ar);
+
        /* Must not use _sync to avoid deadlock, we do that in
         * ath10k_debug_destroy(). The check for htt_stats_mask is to avoid
         * warning from del_timer(). */
@@ -2344,6 +2340,10 @@ int ath10k_debug_create(struct ath10k *ar)
        if (!ar->debug.fw_crash_data)
                return -ENOMEM;
 
+       ar->debug.cal_data = vzalloc(ATH10K_DEBUG_CAL_DATA_LEN);
+       if (!ar->debug.cal_data)
+               return -ENOMEM;
+
        INIT_LIST_HEAD(&ar->debug.fw_stats.pdevs);
        INIT_LIST_HEAD(&ar->debug.fw_stats.vdevs);
        INIT_LIST_HEAD(&ar->debug.fw_stats.peers);
@@ -2357,6 +2357,9 @@ void ath10k_debug_destroy(struct ath10k *ar)
        vfree(ar->debug.fw_crash_data);
        ar->debug.fw_crash_data = NULL;
 
+       vfree(ar->debug.cal_data);
+       ar->debug.cal_data = NULL;
+
        ath10k_debug_fw_stats_reset(ar);
 
        kfree(ar->debug.tpc_stats);
index db6ddf974d1d3a762024fbb6d63fa0752d014ce6..aa545a1dbdc71931fdd588a05acd01fa8ee9d1d1 100644 (file)
@@ -7993,6 +7993,7 @@ int ath10k_mac_register(struct ath10k *ar)
        ieee80211_hw_set(ar->hw, WANT_MONITOR_VIF);
        ieee80211_hw_set(ar->hw, CHANCTX_STA_CSA);
        ieee80211_hw_set(ar->hw, QUEUE_CONTROL);
+       ieee80211_hw_set(ar->hw, SUPPORTS_TX_FRAG);
        ieee80211_hw_set(ar->hw, REPORTS_LOW_ACK);
 
        if (!test_bit(ATH10K_FLAG_RAW_MODE, &ar->dev_flags))
index c2df075d2d56040a8c848a942f5416f482d137f3..8ec66e74d06de14f4d4dfb8b75463f823f566582 100644 (file)
@@ -1414,6 +1414,7 @@ static const struct sdio_device_id ath6kl_sdio_devices[] = {
        {SDIO_DEVICE(MANUFACTURER_CODE, (MANUFACTURER_ID_AR6004_BASE | 0x0))},
        {SDIO_DEVICE(MANUFACTURER_CODE, (MANUFACTURER_ID_AR6004_BASE | 0x1))},
        {SDIO_DEVICE(MANUFACTURER_CODE, (MANUFACTURER_ID_AR6004_BASE | 0x2))},
+       {SDIO_DEVICE(MANUFACTURER_CODE, (MANUFACTURER_ID_AR6004_BASE | 0x18))},
        {},
 };
 
index b6f064a8d2645204725670f23aabe45d810ff431..7e27a06e5df197cbf5e5a41b97e61723d38d72a8 100644 (file)
@@ -33,7 +33,6 @@ struct coeff {
 
 enum ar9003_cal_types {
        IQ_MISMATCH_CAL = BIT(0),
-       TEMP_COMP_CAL = BIT(1),
 };
 
 static void ar9003_hw_setup_calibration(struct ath_hw *ah,
@@ -59,12 +58,6 @@ static void ar9003_hw_setup_calibration(struct ath_hw *ah,
                /* Kick-off cal */
                REG_SET_BIT(ah, AR_PHY_TIMING4, AR_PHY_TIMING4_DO_CAL);
                break;
-       case TEMP_COMP_CAL:
-               ath_dbg(common, CALIBRATE,
-                       "starting Temperature Compensation Calibration\n");
-               REG_SET_BIT(ah, AR_CH0_THERM, AR_CH0_THERM_LOCAL);
-               REG_SET_BIT(ah, AR_CH0_THERM, AR_CH0_THERM_START);
-               break;
        default:
                ath_err(common, "Invalid calibration type\n");
                break;
@@ -93,8 +86,7 @@ static bool ar9003_hw_per_calibration(struct ath_hw *ah,
                /*
                * Accumulate cal measures for active chains
                */
-               if (cur_caldata->calCollect)
-                       cur_caldata->calCollect(ah);
+               cur_caldata->calCollect(ah);
                ah->cal_samples++;
 
                if (ah->cal_samples >= cur_caldata->calNumSamples) {
@@ -107,8 +99,7 @@ static bool ar9003_hw_per_calibration(struct ath_hw *ah,
                        /*
                        * Process accumulated data
                        */
-                       if (cur_caldata->calPostProc)
-                               cur_caldata->calPostProc(ah, numChains);
+                       cur_caldata->calPostProc(ah, numChains);
 
                        /* Calibration has finished. */
                        caldata->CalValid |= cur_caldata->calType;
@@ -323,16 +314,9 @@ static const struct ath9k_percal_data iq_cal_single_sample = {
        ar9003_hw_iqcalibrate
 };
 
-static const struct ath9k_percal_data temp_cal_single_sample = {
-       TEMP_COMP_CAL,
-       MIN_CAL_SAMPLES,
-       PER_MAX_LOG_COUNT,
-};
-
 static void ar9003_hw_init_cal_settings(struct ath_hw *ah)
 {
        ah->iq_caldata.calData = &iq_cal_single_sample;
-       ah->temp_caldata.calData = &temp_cal_single_sample;
 
        if (AR_SREV_9300_20_OR_LATER(ah)) {
                ah->enabled_cals |= TX_IQ_CAL;
@@ -340,7 +324,7 @@ static void ar9003_hw_init_cal_settings(struct ath_hw *ah)
                        ah->enabled_cals |= TX_IQ_ON_AGC_CAL;
        }
 
-       ah->supp_cals = IQ_MISMATCH_CAL | TEMP_COMP_CAL;
+       ah->supp_cals = IQ_MISMATCH_CAL;
 }
 
 #define OFF_UPPER_LT 24
@@ -1399,9 +1383,6 @@ static void ar9003_hw_init_cal_common(struct ath_hw *ah)
        INIT_CAL(&ah->iq_caldata);
        INSERT_CAL(ah, &ah->iq_caldata);
 
-       INIT_CAL(&ah->temp_caldata);
-       INSERT_CAL(ah, &ah->temp_caldata);
-
        /* Initialize current pointer to first element in list */
        ah->cal_list_curr = ah->cal_list;
 
index 2a5d3ad1169c955ed781a95a353e542d1dcc571b..9cbca1229bac02862211c14d14048f08197dd39d 100644 (file)
@@ -830,7 +830,6 @@ struct ath_hw {
        /* Calibration */
        u32 supp_cals;
        struct ath9k_cal_list iq_caldata;
-       struct ath9k_cal_list temp_caldata;
        struct ath9k_cal_list adcgain_caldata;
        struct ath9k_cal_list adcdc_caldata;
        struct ath9k_cal_list *cal_list;
index c0c8bf0429d10c351f07082bde42cd3db98c8ad4..20794660d6aec42cbb4a5dcc52944127512e40e7 100644 (file)
@@ -775,9 +775,11 @@ static const struct ieee80211_iface_limit if_limits[] = {
                                 BIT(NL80211_IFTYPE_P2P_GO) },
 };
 
+#ifdef CONFIG_WIRELESS_WDS
 static const struct ieee80211_iface_limit wds_limits[] = {
        { .max = 2048,  .types = BIT(NL80211_IFTYPE_WDS) },
 };
+#endif
 
 #ifdef CONFIG_ATH9K_CHANNEL_CONTEXT
 
@@ -815,6 +817,7 @@ static const struct ieee80211_iface_combination if_comb[] = {
                                        BIT(NL80211_CHAN_WIDTH_40),
 #endif
        },
+#ifdef CONFIG_WIRELESS_WDS
        {
                .limits = wds_limits,
                .n_limits = ARRAY_SIZE(wds_limits),
@@ -822,6 +825,7 @@ static const struct ieee80211_iface_combination if_comb[] = {
                .num_different_channels = 1,
                .beacon_int_infra_match = true,
        },
+#endif
 };
 
 #ifdef CONFIG_ATH9K_CHANNEL_CONTEXT
@@ -892,7 +896,9 @@ static void ath9k_set_hw_capab(struct ath_softc *sc, struct ieee80211_hw *hw)
                        BIT(NL80211_IFTYPE_STATION) |
                        BIT(NL80211_IFTYPE_ADHOC) |
                        BIT(NL80211_IFTYPE_MESH_POINT) |
+#ifdef CONFIG_WIRELESS_WDS
                        BIT(NL80211_IFTYPE_WDS) |
+#endif
                        BIT(NL80211_IFTYPE_OCB);
 
                if (ath9k_is_chanctx_enabled())
index 6e5d9095b1956c1d792725591fe79a75fe676f38..52f3541ecbcfe40f3289bbba681503d974900625 100644 (file)
@@ -5591,7 +5591,9 @@ static struct b43_wl *b43_wireless_init(struct b43_bus_dev *dev)
                BIT(NL80211_IFTYPE_AP) |
                BIT(NL80211_IFTYPE_MESH_POINT) |
                BIT(NL80211_IFTYPE_STATION) |
+#ifdef CONFIG_WIRELESS_WDS
                BIT(NL80211_IFTYPE_WDS) |
+#endif
                BIT(NL80211_IFTYPE_ADHOC);
 
        hw->wiphy->flags |= WIPHY_FLAG_IBSS_RSN;
index 83770d2ea057875ddd75ffb312e9b2f3d28e5e9f..e97ab2b916630e1259ebbb7641f2365363d47f67 100644 (file)
@@ -3838,7 +3838,9 @@ static int b43legacy_wireless_init(struct ssb_device *dev)
        hw->wiphy->interface_modes =
                BIT(NL80211_IFTYPE_AP) |
                BIT(NL80211_IFTYPE_STATION) |
+#ifdef CONFIG_WIRELESS_WDS
                BIT(NL80211_IFTYPE_WDS) |
+#endif
                BIT(NL80211_IFTYPE_ADHOC);
        hw->queues = 1; /* FIXME: hardware has more queues */
        hw->max_rates = 2;
index f28f03a6fbfb3ecd5e129739976576ec32dd5738..ccae3bbe7db24deb3a5656c2ba9556ad6dbf7cfa 100644 (file)
@@ -405,23 +405,24 @@ static int brcmf_vif_change_validate(struct brcmf_cfg80211_info *cfg,
                                     struct brcmf_cfg80211_vif *vif,
                                     enum nl80211_iftype new_type)
 {
-       int iftype_num[NUM_NL80211_IFTYPES];
        struct brcmf_cfg80211_vif *pos;
        bool check_combos = false;
        int ret = 0;
+       struct iface_combination_params params = {
+               .num_different_channels = 1,
+       };
 
-       memset(&iftype_num[0], 0, sizeof(iftype_num));
        list_for_each_entry(pos, &cfg->vif_list, list)
                if (pos == vif) {
-                       iftype_num[new_type]++;
+                       params.iftype_num[new_type]++;
                } else {
                        /* concurrent interfaces so need check combinations */
                        check_combos = true;
-                       iftype_num[pos->wdev.iftype]++;
+                       params.iftype_num[pos->wdev.iftype]++;
                }
 
        if (check_combos)
-               ret = cfg80211_check_combinations(cfg->wiphy, 1, 0, iftype_num);
+               ret = cfg80211_check_combinations(cfg->wiphy, &params);
 
        return ret;
 }
@@ -429,15 +430,16 @@ static int brcmf_vif_change_validate(struct brcmf_cfg80211_info *cfg,
 static int brcmf_vif_add_validate(struct brcmf_cfg80211_info *cfg,
                                  enum nl80211_iftype new_type)
 {
-       int iftype_num[NUM_NL80211_IFTYPES];
        struct brcmf_cfg80211_vif *pos;
+       struct iface_combination_params params = {
+               .num_different_channels = 1,
+       };
 
-       memset(&iftype_num[0], 0, sizeof(iftype_num));
        list_for_each_entry(pos, &cfg->vif_list, list)
-               iftype_num[pos->wdev.iftype]++;
+               params.iftype_num[pos->wdev.iftype]++;
 
-       iftype_num[new_type]++;
-       return cfg80211_check_combinations(cfg->wiphy, 1, 0, iftype_num);
+       params.iftype_num[new_type]++;
+       return cfg80211_check_combinations(cfg->wiphy, &params);
 }
 
 static void convert_key_from_CPU(struct brcmf_wsec_key *key,
@@ -4424,7 +4426,7 @@ brcmf_cfg80211_start_ap(struct wiphy *wiphy, struct net_device *ndev,
        /* store current 11d setting */
        if (brcmf_fil_cmd_int_get(ifp, BRCMF_C_GET_REGULATORY,
                                  &ifp->vif->is_11d)) {
-               supports_11d = false;
+               is_11d = supports_11d = false;
        } else {
                country_ie = brcmf_parse_tlvs((u8 *)settings->beacon.tail,
                                              settings->beacon.tail_len,
index 4fdc3dad3e85437492efc25df76b8ce8a02ad0ec..b88e2048ae0baa207406972c618a28c7db4026e4 100644 (file)
@@ -1087,6 +1087,15 @@ iwl_mvm_netdetect_config(struct iwl_mvm *mvm,
                ret = iwl_mvm_switch_to_d3(mvm);
                if (ret)
                        return ret;
+       } else {
+               /* In theory, we wouldn't have to stop a running sched
+                * scan in order to start another one (for
+                * net-detect).  But in practice this doesn't seem to
+                * work properly, so stop any running sched_scan now.
+                */
+               ret = iwl_mvm_scan_stop(mvm, IWL_MVM_SCAN_SCHED, true);
+               if (ret)
+                       return ret;
        }
 
        /* rfkill release can be either for wowlan or netdetect */
@@ -1254,7 +1263,10 @@ static int __iwl_mvm_suspend(struct ieee80211_hw *hw,
  out:
        if (ret < 0) {
                iwl_mvm_ref(mvm, IWL_MVM_REF_UCODE_DOWN);
-               ieee80211_restart_hw(mvm->hw);
+               if (mvm->restart_fw > 0) {
+                       mvm->restart_fw--;
+                       ieee80211_restart_hw(mvm->hw);
+               }
                iwl_mvm_free_nd(mvm);
        }
  out_noreset:
@@ -2088,6 +2100,16 @@ static int __iwl_mvm_resume(struct iwl_mvm *mvm, bool test)
        iwl_mvm_update_changed_regdom(mvm);
 
        if (mvm->net_detect) {
+               /* If this is a non-unified image, we restart the FW,
+                * so no need to stop the netdetect scan.  If that
+                * fails, continue and try to get the wake-up reasons,
+                * but trigger a HW restart by keeping a failure code
+                * in ret.
+                */
+               if (unified_image)
+                       ret = iwl_mvm_scan_stop(mvm, IWL_MVM_SCAN_NETDETECT,
+                                               false);
+
                iwl_mvm_query_netdetect_reasons(mvm, vif);
                /* has unlocked the mutex, so skip that */
                goto out;
@@ -2271,7 +2293,8 @@ static void iwl_mvm_d3_test_disconn_work_iter(void *_data, u8 *mac,
 static int iwl_mvm_d3_test_release(struct inode *inode, struct file *file)
 {
        struct iwl_mvm *mvm = inode->i_private;
-       int remaining_time = 10;
+       bool unified_image = fw_has_capa(&mvm->fw->ucode_capa,
+                                        IWL_UCODE_TLV_CAPA_CNSLDTD_D3_D0_IMG);
 
        mvm->d3_test_active = false;
 
@@ -2282,17 +2305,21 @@ static int iwl_mvm_d3_test_release(struct inode *inode, struct file *file)
        mvm->trans->system_pm_mode = IWL_PLAT_PM_MODE_DISABLED;
 
        iwl_abort_notification_waits(&mvm->notif_wait);
-       ieee80211_restart_hw(mvm->hw);
+       if (!unified_image) {
+               int remaining_time = 10;
 
-       /* wait for restart and disconnect all interfaces */
-       while (test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status) &&
-              remaining_time > 0) {
-               remaining_time--;
-               msleep(1000);
-       }
+               ieee80211_restart_hw(mvm->hw);
+
+               /* wait for restart and disconnect all interfaces */
+               while (test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status) &&
+                      remaining_time > 0) {
+                       remaining_time--;
+                       msleep(1000);
+               }
 
-       if (remaining_time == 0)
-               IWL_ERR(mvm, "Timed out waiting for HW restart to finish!\n");
+               if (remaining_time == 0)
+                       IWL_ERR(mvm, "Timed out waiting for HW restart!\n");
+       }
 
        ieee80211_iterate_active_interfaces_atomic(
                mvm->hw, IEEE80211_IFACE_ITER_NORMAL,
index 07da4efe8458f8a38b5ccf315d29e2427a3d0a35..7b7d2a146e3020a286da8f7acc9a97d005142ce8 100644 (file)
@@ -1529,8 +1529,8 @@ static ssize_t iwl_dbgfs_mem_read(struct file *file, char __user *user_buf,
                .data = { &cmd, },
                .len = { sizeof(cmd) },
        };
-       size_t delta, len;
-       ssize_t ret;
+       size_t delta;
+       ssize_t ret, len;
 
        hcmd.id = iwl_cmd_id(*ppos >> 24 ? UMAC_RD_WR : LMAC_RD_WR,
                             DEBUG_GROUP, 0);
index 4f8c1347aa23fdf132ebe3a2638f6aa0cc9cc79f..45122dafe9226278abed80ebcba6cd5b49b19f4c 100644 (file)
@@ -4199,7 +4199,6 @@ void iwl_mvm_sync_rx_queues_internal(struct iwl_mvm *mvm,
                                     struct iwl_mvm_internal_rxq_notif *notif,
                                     u32 size)
 {
-       DECLARE_WAIT_QUEUE_HEAD_ONSTACK(notif_waitq);
        u32 qmask = BIT(mvm->trans->num_rx_queues) - 1;
        int ret;
 
@@ -4221,7 +4220,7 @@ void iwl_mvm_sync_rx_queues_internal(struct iwl_mvm *mvm,
        }
 
        if (notif->sync)
-               ret = wait_event_timeout(notif_waitq,
+               ret = wait_event_timeout(mvm->rx_sync_waitq,
                                         atomic_read(&mvm->queue_sync_counter) == 0,
                                         HZ);
        WARN_ON_ONCE(!ret);
index cde8c6cda5946be5be5d4b8bec2e33f42fd3a6ca..4a9cb76b7611d29fd1dd8dcfd8c00e24a9158c4f 100644 (file)
@@ -937,6 +937,7 @@ struct iwl_mvm {
        /* sync d0i3_tx queue and IWL_MVM_STATUS_IN_D0I3 status flag */
        spinlock_t d0i3_tx_lock;
        wait_queue_head_t d0i3_exit_waitq;
+       wait_queue_head_t rx_sync_waitq;
 
        /* BT-Coex */
        struct iwl_bt_coex_profile_notif last_bt_notif;
index e86986f5c40d78fd335ddc82ebb9ec2d4766c71b..f14aada390c53ff8613793a3c6e6a31e84bbe994 100644 (file)
@@ -622,6 +622,7 @@ iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_cfg *cfg,
        spin_lock_init(&mvm->refs_lock);
        skb_queue_head_init(&mvm->d0i3_tx);
        init_waitqueue_head(&mvm->d0i3_exit_waitq);
+       init_waitqueue_head(&mvm->rx_sync_waitq);
 
        atomic_set(&mvm->queue_sync_counter, 0);
 
index a57c6ef5bc14f4cd7dd61df53682b18a9c6f7d3a..6c802cee900c925a7734e6a8f461a24f71c5ede6 100644 (file)
@@ -547,7 +547,8 @@ void iwl_mvm_rx_queue_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb,
                                  "Received expired RX queue sync message\n");
                        return;
                }
-               atomic_dec(&mvm->queue_sync_counter);
+               if (!atomic_dec_return(&mvm->queue_sync_counter))
+                       wake_up(&mvm->rx_sync_waitq);
        }
 
        switch (internal_notif->type) {
index f279fdd6eb441f1a671d312a9f76b8eca1deae72..fa97432054912b53493d6e0f75000c6def479d93 100644 (file)
@@ -1199,6 +1199,9 @@ static int iwl_mvm_num_scans(struct iwl_mvm *mvm)
 
 static int iwl_mvm_check_running_scans(struct iwl_mvm *mvm, int type)
 {
+       bool unified_image = fw_has_capa(&mvm->fw->ucode_capa,
+                                        IWL_UCODE_TLV_CAPA_CNSLDTD_D3_D0_IMG);
+
        /* This looks a bit arbitrary, but the idea is that if we run
         * out of possible simultaneous scans and the userspace is
         * trying to run a scan type that is already running, we
@@ -1225,12 +1228,30 @@ static int iwl_mvm_check_running_scans(struct iwl_mvm *mvm, int type)
                        return -EBUSY;
                return iwl_mvm_scan_stop(mvm, IWL_MVM_SCAN_REGULAR, true);
        case IWL_MVM_SCAN_NETDETECT:
-               /* No need to stop anything for net-detect since the
-                * firmware is restarted anyway.  This way, any sched
-                * scans that were running will be restarted when we
-                * resume.
-               */
-               return 0;
+               /* For non-unified images, there's no need to stop
+                * anything for net-detect since the firmware is
+                * restarted anyway.  This way, any sched scans that
+                * were running will be restarted when we resume.
+                */
+               if (!unified_image)
+                       return 0;
+
+               /* If this is a unified image and we ran out of scans,
+                * we need to stop something.  Prefer stopping regular
+                * scans, because the results are useless at this
+                * point, and we should be able to keep running
+                * another scheduled scan while suspended.
+                */
+               if (mvm->scan_status & IWL_MVM_SCAN_REGULAR_MASK)
+                       return iwl_mvm_scan_stop(mvm, IWL_MVM_SCAN_REGULAR,
+                                                true);
+               if (mvm->scan_status & IWL_MVM_SCAN_SCHED_MASK)
+                       return iwl_mvm_scan_stop(mvm, IWL_MVM_SCAN_SCHED,
+                                                true);
+
+               /* fall through, something is wrong if no scan was
+                * running but we ran out of scans.
+                */
        default:
                WARN_ON(1);
                break;
index 001be406a3d3852a7c735f1207a134a563c1784a..2f8134b2a504223856b02692eb5ff8f6a7f4acd8 100644 (file)
@@ -541,48 +541,64 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
 MODULE_DEVICE_TABLE(pci, iwl_hw_card_ids);
 
 #ifdef CONFIG_ACPI
-#define SPL_METHOD             "SPLC"
-#define SPL_DOMAINTYPE_MODULE  BIT(0)
-#define SPL_DOMAINTYPE_WIFI    BIT(1)
-#define SPL_DOMAINTYPE_WIGIG   BIT(2)
-#define SPL_DOMAINTYPE_RFEM    BIT(3)
+#define ACPI_SPLC_METHOD       "SPLC"
+#define ACPI_SPLC_DOMAIN_WIFI  (0x07)
 
-static u64 splx_get_pwr_limit(struct iwl_trans *trans, union acpi_object *splx)
+static u64 splc_get_pwr_limit(struct iwl_trans *trans, union acpi_object *splc)
 {
-       union acpi_object *limits, *domain_type, *power_limit;
-
-       if (splx->type != ACPI_TYPE_PACKAGE ||
-           splx->package.count != 2 ||
-           splx->package.elements[0].type != ACPI_TYPE_INTEGER ||
-           splx->package.elements[0].integer.value != 0) {
-               IWL_ERR(trans, "Unsupported splx structure\n");
+       union acpi_object *data_pkg, *dflt_pwr_limit;
+       int i;
+
+       /* We need at least two elements, one for the revision and one
+        * for the data itself.  Also check that the revision is
+        * supported (currently only revision 0).
+       */
+       if (splc->type != ACPI_TYPE_PACKAGE ||
+           splc->package.count < 2 ||
+           splc->package.elements[0].type != ACPI_TYPE_INTEGER ||
+           splc->package.elements[0].integer.value != 0) {
+               IWL_DEBUG_INFO(trans,
+                              "Unsupported structure returned by the SPLC method.  Ignoring.\n");
                return 0;
        }
 
-       limits = &splx->package.elements[1];
-       if (limits->type != ACPI_TYPE_PACKAGE ||
-           limits->package.count < 2 ||
-           limits->package.elements[0].type != ACPI_TYPE_INTEGER ||
-           limits->package.elements[1].type != ACPI_TYPE_INTEGER) {
-               IWL_ERR(trans, "Invalid limits element\n");
-               return 0;
+       /* loop through all the packages to find the one for WiFi */
+       for (i = 1; i < splc->package.count; i++) {
+               union acpi_object *domain;
+
+               data_pkg = &splc->package.elements[i];
+
+               /* Skip anything that is not a package with the right
+                * amount of elements (i.e. at least 2 integers).
+                */
+               if (data_pkg->type != ACPI_TYPE_PACKAGE ||
+                   data_pkg->package.count < 2 ||
+                   data_pkg->package.elements[0].type != ACPI_TYPE_INTEGER ||
+                   data_pkg->package.elements[1].type != ACPI_TYPE_INTEGER)
+                       continue;
+
+               domain = &data_pkg->package.elements[0];
+               if (domain->integer.value == ACPI_SPLC_DOMAIN_WIFI)
+                       break;
+
+               data_pkg = NULL;
        }
 
-       domain_type = &limits->package.elements[0];
-       power_limit = &limits->package.elements[1];
-       if (!(domain_type->integer.value & SPL_DOMAINTYPE_WIFI)) {
-               IWL_DEBUG_INFO(trans, "WiFi power is not limited\n");
+       if (!data_pkg) {
+               IWL_DEBUG_INFO(trans,
+                              "No element for the WiFi domain returned by the SPLC method.\n");
                return 0;
        }
 
-       return power_limit->integer.value;
+       dflt_pwr_limit = &data_pkg->package.elements[1];
+       return dflt_pwr_limit->integer.value;
 }
 
 static void set_dflt_pwr_limit(struct iwl_trans *trans, struct pci_dev *pdev)
 {
        acpi_handle pxsx_handle;
        acpi_handle handle;
-       struct acpi_buffer splx = {ACPI_ALLOCATE_BUFFER, NULL};
+       struct acpi_buffer splc = {ACPI_ALLOCATE_BUFFER, NULL};
        acpi_status status;
 
        pxsx_handle = ACPI_HANDLE(&pdev->dev);
@@ -593,23 +609,24 @@ static void set_dflt_pwr_limit(struct iwl_trans *trans, struct pci_dev *pdev)
        }
 
        /* Get the method's handle */
-       status = acpi_get_handle(pxsx_handle, (acpi_string)SPL_METHOD, &handle);
+       status = acpi_get_handle(pxsx_handle, (acpi_string)ACPI_SPLC_METHOD,
+                                &handle);
        if (ACPI_FAILURE(status)) {
-               IWL_DEBUG_INFO(trans, "SPL method not found\n");
+               IWL_DEBUG_INFO(trans, "SPLC method not found\n");
                return;
        }
 
        /* Call SPLC with no arguments */
-       status = acpi_evaluate_object(handle, NULL, NULL, &splx);
+       status = acpi_evaluate_object(handle, NULL, NULL, &splc);
        if (ACPI_FAILURE(status)) {
                IWL_ERR(trans, "SPLC invocation failed (0x%x)\n", status);
                return;
        }
 
-       trans->dflt_pwr_limit = splx_get_pwr_limit(trans, splx.pointer);
+       trans->dflt_pwr_limit = splc_get_pwr_limit(trans, splc.pointer);
        IWL_DEBUG_INFO(trans, "Default power limit set to %lld\n",
                       trans->dflt_pwr_limit);
-       kfree(splx.pointer);
+       kfree(splc.pointer);
 }
 
 #else /* CONFIG_ACPI */
index e9a278b60dfd87996318cd0096f78486c3d7b855..5f840f16f40bd955478196397f8dcff481e88077 100644 (file)
@@ -592,6 +592,7 @@ error:
 static int iwl_pcie_txq_init(struct iwl_trans *trans, struct iwl_txq *txq,
                              int slots_num, u32 txq_id)
 {
+       struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
        int ret;
 
        txq->need_update = false;
@@ -606,6 +607,13 @@ static int iwl_pcie_txq_init(struct iwl_trans *trans, struct iwl_txq *txq,
                return ret;
 
        spin_lock_init(&txq->lock);
+
+       if (txq_id == trans_pcie->cmd_queue) {
+               static struct lock_class_key iwl_pcie_cmd_queue_lock_class;
+
+               lockdep_set_class(&txq->lock, &iwl_pcie_cmd_queue_lock_class);
+       }
+
        __skb_queue_head_init(&txq->overflow_q);
 
        /*
index e95b79bccf9b2e8acfd2b83973d6bbe397ec0727..1620a5d2757d38ceea4662fe1c885fdccd201948 100644 (file)
@@ -250,7 +250,7 @@ static inline void hwsim_clear_chanctx_magic(struct ieee80211_chanctx_conf *c)
        cp->magic = 0;
 }
 
-static int hwsim_net_id;
+static unsigned int hwsim_net_id;
 
 static int hwsim_netgroup;
 
@@ -587,15 +587,8 @@ struct hwsim_radiotap_ack_hdr {
        __le16 rt_chbitmask;
 } __packed;
 
-/* MAC80211_HWSIM netlinf family */
-static struct genl_family hwsim_genl_family = {
-       .id = GENL_ID_GENERATE,
-       .hdrsize = 0,
-       .name = "MAC80211_HWSIM",
-       .version = 1,
-       .maxattr = HWSIM_ATTR_MAX,
-       .netnsok = true,
-};
+/* MAC80211_HWSIM netlink family */
+static struct genl_family hwsim_genl_family;
 
 enum hwsim_multicast_groups {
        HWSIM_MCGRP_CONFIG,
@@ -826,7 +819,7 @@ static void mac80211_hwsim_set_tsf(struct ieee80211_hw *hw,
                data->bcn_delta = do_div(delta, bcn_int);
        } else {
                data->tsf_offset -= delta;
-               data->bcn_delta = -do_div(delta, bcn_int);
+               data->bcn_delta = -(s64)do_div(delta, bcn_int);
        }
 }
 
@@ -2256,35 +2249,51 @@ static void mac80211_hwsim_get_et_stats(struct ieee80211_hw *hw,
        WARN_ON(i != MAC80211_HWSIM_SSTATS_LEN);
 }
 
+#define HWSIM_COMMON_OPS                                       \
+       .tx = mac80211_hwsim_tx,                                \
+       .start = mac80211_hwsim_start,                          \
+       .stop = mac80211_hwsim_stop,                            \
+       .add_interface = mac80211_hwsim_add_interface,          \
+       .change_interface = mac80211_hwsim_change_interface,    \
+       .remove_interface = mac80211_hwsim_remove_interface,    \
+       .config = mac80211_hwsim_config,                        \
+       .configure_filter = mac80211_hwsim_configure_filter,    \
+       .bss_info_changed = mac80211_hwsim_bss_info_changed,    \
+       .sta_add = mac80211_hwsim_sta_add,                      \
+       .sta_remove = mac80211_hwsim_sta_remove,                \
+       .sta_notify = mac80211_hwsim_sta_notify,                \
+       .set_tim = mac80211_hwsim_set_tim,                      \
+       .conf_tx = mac80211_hwsim_conf_tx,                      \
+       .get_survey = mac80211_hwsim_get_survey,                \
+       CFG80211_TESTMODE_CMD(mac80211_hwsim_testmode_cmd)      \
+       .ampdu_action = mac80211_hwsim_ampdu_action,            \
+       .flush = mac80211_hwsim_flush,                          \
+       .get_tsf = mac80211_hwsim_get_tsf,                      \
+       .set_tsf = mac80211_hwsim_set_tsf,                      \
+       .get_et_sset_count = mac80211_hwsim_get_et_sset_count,  \
+       .get_et_stats = mac80211_hwsim_get_et_stats,            \
+       .get_et_strings = mac80211_hwsim_get_et_strings,
+
 static const struct ieee80211_ops mac80211_hwsim_ops = {
-       .tx = mac80211_hwsim_tx,
-       .start = mac80211_hwsim_start,
-       .stop = mac80211_hwsim_stop,
-       .add_interface = mac80211_hwsim_add_interface,
-       .change_interface = mac80211_hwsim_change_interface,
-       .remove_interface = mac80211_hwsim_remove_interface,
-       .config = mac80211_hwsim_config,
-       .configure_filter = mac80211_hwsim_configure_filter,
-       .bss_info_changed = mac80211_hwsim_bss_info_changed,
-       .sta_add = mac80211_hwsim_sta_add,
-       .sta_remove = mac80211_hwsim_sta_remove,
-       .sta_notify = mac80211_hwsim_sta_notify,
-       .set_tim = mac80211_hwsim_set_tim,
-       .conf_tx = mac80211_hwsim_conf_tx,
-       .get_survey = mac80211_hwsim_get_survey,
-       CFG80211_TESTMODE_CMD(mac80211_hwsim_testmode_cmd)
-       .ampdu_action = mac80211_hwsim_ampdu_action,
+       HWSIM_COMMON_OPS
        .sw_scan_start = mac80211_hwsim_sw_scan,
        .sw_scan_complete = mac80211_hwsim_sw_scan_complete,
-       .flush = mac80211_hwsim_flush,
-       .get_tsf = mac80211_hwsim_get_tsf,
-       .set_tsf = mac80211_hwsim_set_tsf,
-       .get_et_sset_count = mac80211_hwsim_get_et_sset_count,
-       .get_et_stats = mac80211_hwsim_get_et_stats,
-       .get_et_strings = mac80211_hwsim_get_et_strings,
 };
 
-static struct ieee80211_ops mac80211_hwsim_mchan_ops;
+static const struct ieee80211_ops mac80211_hwsim_mchan_ops = {
+       HWSIM_COMMON_OPS
+       .hw_scan = mac80211_hwsim_hw_scan,
+       .cancel_hw_scan = mac80211_hwsim_cancel_hw_scan,
+       .sw_scan_start = NULL,
+       .sw_scan_complete = NULL,
+       .remain_on_channel = mac80211_hwsim_roc,
+       .cancel_remain_on_channel = mac80211_hwsim_croc,
+       .add_chanctx = mac80211_hwsim_add_chanctx,
+       .remove_chanctx = mac80211_hwsim_remove_chanctx,
+       .change_chanctx = mac80211_hwsim_change_chanctx,
+       .assign_vif_chanctx = mac80211_hwsim_assign_vif_chanctx,
+       .unassign_vif_chanctx = mac80211_hwsim_unassign_vif_chanctx,
+};
 
 struct hwsim_new_radio_params {
        unsigned int channels;
@@ -3235,6 +3244,18 @@ static const struct genl_ops hwsim_ops[] = {
        },
 };
 
+static struct genl_family hwsim_genl_family __ro_after_init = {
+       .name = "MAC80211_HWSIM",
+       .version = 1,
+       .maxattr = HWSIM_ATTR_MAX,
+       .netnsok = true,
+       .module = THIS_MODULE,
+       .ops = hwsim_ops,
+       .n_ops = ARRAY_SIZE(hwsim_ops),
+       .mcgrps = hwsim_mcgrps,
+       .n_mcgrps = ARRAY_SIZE(hwsim_mcgrps),
+};
+
 static void destroy_radio(struct work_struct *work)
 {
        struct mac80211_hwsim_data *data =
@@ -3282,15 +3303,13 @@ static struct notifier_block hwsim_netlink_notifier = {
        .notifier_call = mac80211_hwsim_netlink_notify,
 };
 
-static int hwsim_init_netlink(void)
+static int __init hwsim_init_netlink(void)
 {
        int rc;
 
        printk(KERN_INFO "mac80211_hwsim: initializing netlink\n");
 
-       rc = genl_register_family_with_ops_groups(&hwsim_genl_family,
-                                                 hwsim_ops,
-                                                 hwsim_mcgrps);
+       rc = genl_register_family(&hwsim_genl_family);
        if (rc)
                goto failure;
 
@@ -3359,21 +3378,6 @@ static int __init init_mac80211_hwsim(void)
        if (channels < 1)
                return -EINVAL;
 
-       mac80211_hwsim_mchan_ops = mac80211_hwsim_ops;
-       mac80211_hwsim_mchan_ops.hw_scan = mac80211_hwsim_hw_scan;
-       mac80211_hwsim_mchan_ops.cancel_hw_scan = mac80211_hwsim_cancel_hw_scan;
-       mac80211_hwsim_mchan_ops.sw_scan_start = NULL;
-       mac80211_hwsim_mchan_ops.sw_scan_complete = NULL;
-       mac80211_hwsim_mchan_ops.remain_on_channel = mac80211_hwsim_roc;
-       mac80211_hwsim_mchan_ops.cancel_remain_on_channel = mac80211_hwsim_croc;
-       mac80211_hwsim_mchan_ops.add_chanctx = mac80211_hwsim_add_chanctx;
-       mac80211_hwsim_mchan_ops.remove_chanctx = mac80211_hwsim_remove_chanctx;
-       mac80211_hwsim_mchan_ops.change_chanctx = mac80211_hwsim_change_chanctx;
-       mac80211_hwsim_mchan_ops.assign_vif_chanctx =
-               mac80211_hwsim_assign_vif_chanctx;
-       mac80211_hwsim_mchan_ops.unassign_vif_chanctx =
-               mac80211_hwsim_unassign_vif_chanctx;
-
        spin_lock_init(&hwsim_radio_lock);
 
        err = register_pernet_device(&hwsim_net_ops);
index 94480123efa3d967dbe08dfa10114218343c4df5..274dd5a1574a3f4a936637b60f6f78fca645bdbc 100644 (file)
@@ -45,7 +45,7 @@ static int mwifiex_11n_dispatch_amsdu_pkt(struct mwifiex_private *priv,
                skb_trim(skb, le16_to_cpu(local_rx_pd->rx_pkt_length));
 
                ieee80211_amsdu_to_8023s(skb, &list, priv->curr_addr,
-                                        priv->wdev.iftype, 0, false);
+                                        priv->wdev.iftype, 0, NULL, NULL);
 
                while (!skb_queue_empty(&list)) {
                        struct rx_packet_hdr *rx_hdr;
index 5071cf0384668e6945bb9ed84fc9bce533a1c1a3..eb7b714436577b1abc64f096608d3c3b80acba24 100644 (file)
@@ -1379,11 +1379,13 @@ int rt2x00lib_probe_dev(struct rt2x00_dev *rt2x00dev)
        if (rt2x00dev->bcn->limit > 0)
                rt2x00dev->hw->wiphy->interface_modes |=
                    BIT(NL80211_IFTYPE_ADHOC) |
-                   BIT(NL80211_IFTYPE_AP) |
 #ifdef CONFIG_MAC80211_MESH
                    BIT(NL80211_IFTYPE_MESH_POINT) |
 #endif
-                   BIT(NL80211_IFTYPE_WDS);
+#ifdef CONFIG_WIRELESS_WDS
+                   BIT(NL80211_IFTYPE_WDS) |
+#endif
+                   BIT(NL80211_IFTYPE_AP);
 
        rt2x00dev->hw->wiphy->flags |= WIPHY_FLAG_IBSS_RSN;
 
index f90ff0a01c3658d6d99903e3d9795cc5390cd019..2caa4ad04dba4a6e66a02b74173ab1b016ca7af1 100644 (file)
@@ -111,7 +111,7 @@ static void rtl_fw_do_work(const struct firmware *firmware, void *context,
                        if (!err)
                                goto found_alt;
                }
-               pr_err("Firmware %s not available\n", rtlpriv->cfg->fw_name);
+               pr_err("Selected firmware is not available\n");
                rtlpriv->max_fw_size = 0;
                return;
        }
index e7b11b40e68dc3e4e8925fb8e80551527124f3a5..f361808def47af36272213111413748546783b76 100644 (file)
@@ -86,6 +86,7 @@ int rtl88e_init_sw_vars(struct ieee80211_hw *hw)
        struct rtl_priv *rtlpriv = rtl_priv(hw);
        struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw));
        u8 tid;
+       char *fw_name;
 
        rtl8188ee_bt_reg_init(hw);
        rtlpriv->dm.dm_initialgain_enable = 1;
@@ -169,10 +170,10 @@ int rtl88e_init_sw_vars(struct ieee80211_hw *hw)
                return 1;
        }
 
-       rtlpriv->cfg->fw_name = "rtlwifi/rtl8188efw.bin";
+       fw_name = "rtlwifi/rtl8188efw.bin";
        rtlpriv->max_fw_size = 0x8000;
-       pr_info("Using firmware %s\n", rtlpriv->cfg->fw_name);
-       err = request_firmware_nowait(THIS_MODULE, 1, rtlpriv->cfg->fw_name,
+       pr_info("Using firmware %s\n", fw_name);
+       err = request_firmware_nowait(THIS_MODULE, 1, fw_name,
                                      rtlpriv->io.dev, GFP_KERNEL, hw,
                                      rtl_fw_cb);
        if (err) {
@@ -284,7 +285,6 @@ static const struct rtl_hal_cfg rtl88ee_hal_cfg = {
        .bar_id = 2,
        .write_readback = true,
        .name = "rtl88e_pci",
-       .fw_name = "rtlwifi/rtl8188efw.bin",
        .ops = &rtl8188ee_hal_ops,
        .mod_params = &rtl88ee_mod_params,
 
index 5c46a98541bcfadadcbe372804999a7fb615bdce..691ddef1ae28eab7d2a193fca3a72baa7871e480 100644 (file)
@@ -92,6 +92,7 @@ int rtl92c_init_sw_vars(struct ieee80211_hw *hw)
        struct rtl_priv *rtlpriv = rtl_priv(hw);
        struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw));
        struct rtl_hal *rtlhal = rtl_hal(rtl_priv(hw));
+       char *fw_name = "rtlwifi/rtl8192cfwU.bin";
 
        rtl8192ce_bt_reg_init(hw);
 
@@ -163,15 +164,12 @@ int rtl92c_init_sw_vars(struct ieee80211_hw *hw)
        }
 
        /* request fw */
-       if (IS_VENDOR_UMC_A_CUT(rtlhal->version) &&
-           !IS_92C_SERIAL(rtlhal->version))
-               rtlpriv->cfg->fw_name = "rtlwifi/rtl8192cfwU.bin";
-       else if (IS_81XXC_VENDOR_UMC_B_CUT(rtlhal->version))
-               rtlpriv->cfg->fw_name = "rtlwifi/rtl8192cfwU_B.bin";
+       if (IS_81XXC_VENDOR_UMC_B_CUT(rtlhal->version))
+               fw_name = "rtlwifi/rtl8192cfwU_B.bin";
 
        rtlpriv->max_fw_size = 0x4000;
-       pr_info("Using firmware %s\n", rtlpriv->cfg->fw_name);
-       err = request_firmware_nowait(THIS_MODULE, 1, rtlpriv->cfg->fw_name,
+       pr_info("Using firmware %s\n", fw_name);
+       err = request_firmware_nowait(THIS_MODULE, 1, fw_name,
                                      rtlpriv->io.dev, GFP_KERNEL, hw,
                                      rtl_fw_cb);
        if (err) {
@@ -258,7 +256,6 @@ static const struct rtl_hal_cfg rtl92ce_hal_cfg = {
        .bar_id = 2,
        .write_readback = true,
        .name = "rtl92c_pci",
-       .fw_name = "rtlwifi/rtl8192cfw.bin",
        .ops = &rtl8192ce_hal_ops,
        .mod_params = &rtl92ce_mod_params,
 
index 92588e06fd1c83ee9319c4e434753cfe00e00d26..b84e13ac6ead554b28b25f36b4d2e0508157cfd0 100644 (file)
@@ -55,6 +55,7 @@ static int rtl92cu_init_sw_vars(struct ieee80211_hw *hw)
 {
        struct rtl_priv *rtlpriv = rtl_priv(hw);
        int err;
+       char *fw_name;
 
        rtlpriv->dm.dm_initialgain_enable = true;
        rtlpriv->dm.dm_flag = 0;
@@ -73,18 +74,18 @@ static int rtl92cu_init_sw_vars(struct ieee80211_hw *hw)
        }
        if (IS_VENDOR_UMC_A_CUT(rtlpriv->rtlhal.version) &&
            !IS_92C_SERIAL(rtlpriv->rtlhal.version)) {
-               rtlpriv->cfg->fw_name = "rtlwifi/rtl8192cufw_A.bin";
+               fw_name = "rtlwifi/rtl8192cufw_A.bin";
        } else if (IS_81XXC_VENDOR_UMC_B_CUT(rtlpriv->rtlhal.version)) {
-               rtlpriv->cfg->fw_name = "rtlwifi/rtl8192cufw_B.bin";
+               fw_name = "rtlwifi/rtl8192cufw_B.bin";
        } else {
-               rtlpriv->cfg->fw_name = "rtlwifi/rtl8192cufw_TMSC.bin";
+               fw_name = "rtlwifi/rtl8192cufw_TMSC.bin";
        }
        /* provide name of alternative file */
        rtlpriv->cfg->alt_fw_name = "rtlwifi/rtl8192cufw.bin";
-       pr_info("Loading firmware %s\n", rtlpriv->cfg->fw_name);
+       pr_info("Loading firmware %s\n", fw_name);
        rtlpriv->max_fw_size = 0x4000;
        err = request_firmware_nowait(THIS_MODULE, 1,
-                                     rtlpriv->cfg->fw_name, rtlpriv->io.dev,
+                                     fw_name, rtlpriv->io.dev,
                                      GFP_KERNEL, hw, rtl_fw_cb);
        return err;
 }
@@ -183,7 +184,6 @@ static struct rtl_hal_usbint_cfg rtl92cu_interface_cfg = {
 
 static struct rtl_hal_cfg rtl92cu_hal_cfg = {
        .name = "rtl92c_usb",
-       .fw_name = "rtlwifi/rtl8192cufw.bin",
        .ops = &rtl8192cu_hal_ops,
        .mod_params = &rtl92cu_mod_params,
        .usb_interface_cfg = &rtl92cu_interface_cfg,
index a9c39ebedbdd8427e3c11de0d10b1bf352e43a66..2d65e40952921924b42ecfbc5daed49471ba8b3b 100644 (file)
@@ -88,6 +88,7 @@ static int rtl92d_init_sw_vars(struct ieee80211_hw *hw)
        u8 tid;
        struct rtl_priv *rtlpriv = rtl_priv(hw);
        struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw));
+       char *fw_name = "rtlwifi/rtl8192defw.bin";
 
        rtlpriv->dm.dm_initialgain_enable = true;
        rtlpriv->dm.dm_flag = 0;
@@ -177,10 +178,10 @@ static int rtl92d_init_sw_vars(struct ieee80211_hw *hw)
 
        rtlpriv->max_fw_size = 0x8000;
        pr_info("Driver for Realtek RTL8192DE WLAN interface\n");
-       pr_info("Loading firmware file %s\n", rtlpriv->cfg->fw_name);
+       pr_info("Loading firmware file %s\n", fw_name);
 
        /* request fw */
-       err = request_firmware_nowait(THIS_MODULE, 1, rtlpriv->cfg->fw_name,
+       err = request_firmware_nowait(THIS_MODULE, 1, fw_name,
                                      rtlpriv->io.dev, GFP_KERNEL, hw,
                                      rtl_fw_cb);
        if (err) {
@@ -262,7 +263,6 @@ static const struct rtl_hal_cfg rtl92de_hal_cfg = {
        .bar_id = 2,
        .write_readback = true,
        .name = "rtl8192de",
-       .fw_name = "rtlwifi/rtl8192defw.bin",
        .ops = &rtl8192de_hal_ops,
        .mod_params = &rtl92de_mod_params,
 
index ac299cbe59b0daa7a44cfebc6b2c57bc3a616ad1..46b605de36e722301bf1decb7602ec11c3314739 100644 (file)
@@ -91,6 +91,7 @@ int rtl92ee_init_sw_vars(struct ieee80211_hw *hw)
        struct rtl_priv *rtlpriv = rtl_priv(hw);
        struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw));
        int err = 0;
+       char *fw_name;
 
        rtl92ee_bt_reg_init(hw);
        rtlpci->msi_support = rtlpriv->cfg->mod_params->msi_support;
@@ -170,11 +171,11 @@ int rtl92ee_init_sw_vars(struct ieee80211_hw *hw)
        }
 
        /* request fw */
-       rtlpriv->cfg->fw_name = "rtlwifi/rtl8192eefw.bin";
+       fw_name = "rtlwifi/rtl8192eefw.bin";
 
        rtlpriv->max_fw_size = 0x8000;
-       pr_info("Using firmware %s\n", rtlpriv->cfg->fw_name);
-       err = request_firmware_nowait(THIS_MODULE, 1, rtlpriv->cfg->fw_name,
+       pr_info("Using firmware %s\n", fw_name);
+       err = request_firmware_nowait(THIS_MODULE, 1, fw_name,
                                      rtlpriv->io.dev, GFP_KERNEL, hw,
                                      rtl_fw_cb);
        if (err) {
@@ -266,7 +267,6 @@ static const struct rtl_hal_cfg rtl92ee_hal_cfg = {
        .bar_id = 2,
        .write_readback = true,
        .name = "rtl92ee_pci",
-       .fw_name = "rtlwifi/rtl8192eefw.bin",
        .ops = &rtl8192ee_hal_ops,
        .mod_params = &rtl92ee_mod_params,
 
index a652d458fe9722dcc25f7c763b929e2d29b9028d..998cefbd7e89104b7797d5bf419fe567dea35ff2 100644 (file)
@@ -85,12 +85,13 @@ static void rtl92se_fw_cb(const struct firmware *firmware, void *context)
        struct ieee80211_hw *hw = context;
        struct rtl_priv *rtlpriv = rtl_priv(hw);
        struct rt_firmware *pfirmware = NULL;
+       char *fw_name = "rtlwifi/rtl8192sefw.bin";
 
        RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD,
                         "Firmware callback routine entered!\n");
        complete(&rtlpriv->firmware_loading_complete);
        if (!firmware) {
-               pr_err("Firmware %s not available\n", rtlpriv->cfg->fw_name);
+               pr_err("Firmware %s not available\n", fw_name);
                rtlpriv->max_fw_size = 0;
                return;
        }
@@ -113,6 +114,7 @@ static int rtl92s_init_sw_vars(struct ieee80211_hw *hw)
        struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw));
        int err = 0;
        u16 earlyrxthreshold = 7;
+       char *fw_name = "rtlwifi/rtl8192sefw.bin";
 
        rtlpriv->dm.dm_initialgain_enable = true;
        rtlpriv->dm.dm_flag = 0;
@@ -210,9 +212,9 @@ static int rtl92s_init_sw_vars(struct ieee80211_hw *hw)
        rtlpriv->max_fw_size = RTL8190_MAX_FIRMWARE_CODE_SIZE*2 +
                               sizeof(struct fw_hdr);
        pr_info("Driver for Realtek RTL8192SE/RTL8191SE\n"
-               "Loading firmware %s\n", rtlpriv->cfg->fw_name);
+               "Loading firmware %s\n", fw_name);
        /* request fw */
-       err = request_firmware_nowait(THIS_MODULE, 1, rtlpriv->cfg->fw_name,
+       err = request_firmware_nowait(THIS_MODULE, 1, fw_name,
                                      rtlpriv->io.dev, GFP_KERNEL, hw,
                                      rtl92se_fw_cb);
        if (err) {
@@ -306,7 +308,6 @@ static const struct rtl_hal_cfg rtl92se_hal_cfg = {
        .bar_id = 1,
        .write_readback = false,
        .name = "rtl92s_pci",
-       .fw_name = "rtlwifi/rtl8192sefw.bin",
        .ops = &rtl8192se_hal_ops,
        .mod_params = &rtl92se_mod_params,
 
index 89c828ad89f4230186be72e1cc640b4cf2b3309a..c51a9e8234e92417877537e3ce5d566a120e2bd1 100644 (file)
@@ -94,6 +94,7 @@ int rtl8723e_init_sw_vars(struct ieee80211_hw *hw)
        struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw));
        struct rtl_hal *rtlhal = rtl_hal(rtl_priv(hw));
        int err = 0;
+       char *fw_name = "rtlwifi/rtl8723fw.bin";
 
        rtl8723e_bt_reg_init(hw);
 
@@ -176,14 +177,12 @@ int rtl8723e_init_sw_vars(struct ieee80211_hw *hw)
                return 1;
        }
 
-       if (IS_VENDOR_8723_A_CUT(rtlhal->version))
-               rtlpriv->cfg->fw_name = "rtlwifi/rtl8723fw.bin";
-       else if (IS_81xxC_VENDOR_UMC_B_CUT(rtlhal->version))
-               rtlpriv->cfg->fw_name = "rtlwifi/rtl8723fw_B.bin";
+       if (IS_81xxC_VENDOR_UMC_B_CUT(rtlhal->version))
+               fw_name = "rtlwifi/rtl8723fw_B.bin";
 
        rtlpriv->max_fw_size = 0x6000;
-       pr_info("Using firmware %s\n", rtlpriv->cfg->fw_name);
-       err = request_firmware_nowait(THIS_MODULE, 1, rtlpriv->cfg->fw_name,
+       pr_info("Using firmware %s\n", fw_name);
+       err = request_firmware_nowait(THIS_MODULE, 1, fw_name,
                                      rtlpriv->io.dev, GFP_KERNEL, hw,
                                      rtl_fw_cb);
        if (err) {
@@ -280,7 +279,6 @@ static const struct rtl_hal_cfg rtl8723e_hal_cfg = {
        .bar_id = 2,
        .write_readback = true,
        .name = "rtl8723e_pci",
-       .fw_name = "rtlwifi/rtl8723efw.bin",
        .ops = &rtl8723e_hal_ops,
        .mod_params = &rtl8723e_mod_params,
        .maps[SYS_ISO_CTRL] = REG_SYS_ISO_CTRL,
index 20b53f035483a0f2dfc63b2e3d65c60d0e669317..847644d1f5f539ff984efb77131b57575df45b7d 100644 (file)
@@ -91,6 +91,7 @@ int rtl8723be_init_sw_vars(struct ieee80211_hw *hw)
        struct rtl_priv *rtlpriv = rtl_priv(hw);
        struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw));
        struct rtl_mac *mac = rtl_mac(rtl_priv(hw));
+       char *fw_name = "rtlwifi/rtl8723befw.bin";
 
        rtl8723be_bt_reg_init(hw);
        rtlpriv->btcoexist.btc_ops = rtl_btc_get_ops_pointer();
@@ -184,8 +185,8 @@ int rtl8723be_init_sw_vars(struct ieee80211_hw *hw)
        }
 
        rtlpriv->max_fw_size = 0x8000;
-       pr_info("Using firmware %s\n", rtlpriv->cfg->fw_name);
-       err = request_firmware_nowait(THIS_MODULE, 1, rtlpriv->cfg->fw_name,
+       pr_info("Using firmware %s\n", fw_name);
+       err = request_firmware_nowait(THIS_MODULE, 1, fw_name,
                                      rtlpriv->io.dev, GFP_KERNEL, hw,
                                      rtl_fw_cb);
        if (err) {
@@ -280,7 +281,6 @@ static const struct rtl_hal_cfg rtl8723be_hal_cfg = {
        .bar_id = 2,
        .write_readback = true,
        .name = "rtl8723be_pci",
-       .fw_name = "rtlwifi/rtl8723befw.bin",
        .ops = &rtl8723be_hal_ops,
        .mod_params = &rtl8723be_mod_params,
        .maps[SYS_ISO_CTRL] = REG_SYS_ISO_CTRL,
index 22f687b1f1334cfb7732add791861bc465d1465d..297938e0effd54c47bcc0ff27ea5c5c1552ceb52 100644 (file)
@@ -93,6 +93,7 @@ int rtl8821ae_init_sw_vars(struct ieee80211_hw *hw)
        struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw));
        struct rtl_mac *mac = rtl_mac(rtl_priv(hw));
        struct rtl_hal *rtlhal = rtl_hal(rtl_priv(hw));
+       char *fw_name, *wowlan_fw_name;
 
        rtl8821ae_bt_reg_init(hw);
        rtlpriv->btcoexist.btc_ops = rtl_btc_get_ops_pointer();
@@ -203,17 +204,17 @@ int rtl8821ae_init_sw_vars(struct ieee80211_hw *hw)
        }
 
        if (rtlhal->hw_type == HARDWARE_TYPE_RTL8812AE) {
-               rtlpriv->cfg->fw_name = "rtlwifi/rtl8812aefw.bin";
-               rtlpriv->cfg->wowlan_fw_name = "rtlwifi/rtl8812aefw_wowlan.bin";
+               fw_name = "rtlwifi/rtl8812aefw.bin";
+               wowlan_fw_name = "rtlwifi/rtl8812aefw_wowlan.bin";
        } else {
-               rtlpriv->cfg->fw_name = "rtlwifi/rtl8821aefw.bin";
-               rtlpriv->cfg->wowlan_fw_name = "rtlwifi/rtl8821aefw_wowlan.bin";
+               fw_name = "rtlwifi/rtl8821aefw.bin";
+               wowlan_fw_name = "rtlwifi/rtl8821aefw_wowlan.bin";
        }
 
        rtlpriv->max_fw_size = 0x8000;
        /*load normal firmware*/
-       pr_info("Using firmware %s\n", rtlpriv->cfg->fw_name);
-       err = request_firmware_nowait(THIS_MODULE, 1, rtlpriv->cfg->fw_name,
+       pr_info("Using firmware %s\n", fw_name);
+       err = request_firmware_nowait(THIS_MODULE, 1, fw_name,
                                      rtlpriv->io.dev, GFP_KERNEL, hw,
                                      rtl_fw_cb);
        if (err) {
@@ -222,9 +223,9 @@ int rtl8821ae_init_sw_vars(struct ieee80211_hw *hw)
                return 1;
        }
        /*load wowlan firmware*/
-       pr_info("Using firmware %s\n", rtlpriv->cfg->wowlan_fw_name);
+       pr_info("Using firmware %s\n", wowlan_fw_name);
        err = request_firmware_nowait(THIS_MODULE, 1,
-                                     rtlpriv->cfg->wowlan_fw_name,
+                                     wowlan_fw_name,
                                      rtlpriv->io.dev, GFP_KERNEL, hw,
                                      rtl_wowlan_fw_cb);
        if (err) {
@@ -320,7 +321,6 @@ static const struct rtl_hal_cfg rtl8821ae_hal_cfg = {
        .bar_id = 2,
        .write_readback = true,
        .name = "rtl8821ae_pci",
-       .fw_name = "rtlwifi/rtl8821aefw.bin",
        .ops = &rtl8821ae_hal_ops,
        .mod_params = &rtl8821ae_mod_params,
        .maps[SYS_ISO_CTRL] = REG_SYS_ISO_CTRL,
index 595f7d5d091afcfa1a092b4662d9d56731291cbb..dafe486f844867a1f96637af5fff774bf3ee3afd 100644 (file)
@@ -2278,9 +2278,7 @@ struct rtl_hal_cfg {
        u8 bar_id;
        bool write_readback;
        char *name;
-       char *fw_name;
        char *alt_fw_name;
-       char *wowlan_fw_name;
        struct rtl_hal_ops *ops;
        struct rtl_mod_params *mod_params;
        struct rtl_hal_usbint_cfg *usb_interface_cfg;
index 2f056923a5697dbee3bb4d606fb0e6811708cc2e..e536aa01b937a958684c81a8a8c228dc44975b42 100644 (file)
@@ -6088,6 +6088,7 @@ static int wl1271_init_ieee80211(struct wl1271 *wl)
        ieee80211_hw_set(wl->hw, SUPPORTS_DYNAMIC_PS);
        ieee80211_hw_set(wl->hw, SIGNAL_DBM);
        ieee80211_hw_set(wl->hw, SUPPORTS_PS);
+       ieee80211_hw_set(wl->hw, SUPPORTS_TX_FRAG);
 
        wl->hw->wiphy->cipher_suites = cipher_suites;
        wl->hw->wiphy->n_cipher_suites = ARRAY_SIZE(cipher_suites);
index a6e94b1a12cb3f8e8cbac3c9b3074d18cfe82ee3..47fe7f96a242794caf0150f367ae8300bd1d2c48 100644 (file)
@@ -391,7 +391,6 @@ static void wl1271_remove(struct sdio_func *func)
        pm_runtime_get_noresume(&func->dev);
 
        platform_device_unregister(glue->core);
-       kfree(glue);
 }
 
 #ifdef CONFIG_PM
index 8674e188b697d91741cb8e5eaef96918eb50224d..55a4488633e4a57c0296e0fd10c3cc5321b2ee61 100644 (file)
@@ -889,16 +889,16 @@ static int connect_ctrl_ring(struct backend_info *be)
        unsigned int evtchn;
        int err;
 
-       err = xenbus_gather(XBT_NIL, dev->otherend,
-                           "ctrl-ring-ref", "%u", &val, NULL);
-       if (err)
+       err = xenbus_scanf(XBT_NIL, dev->otherend,
+                          "ctrl-ring-ref", "%u", &val);
+       if (err < 0)
                goto done; /* The frontend does not have a control ring */
 
        ring_ref = val;
 
-       err = xenbus_gather(XBT_NIL, dev->otherend,
-                           "event-channel-ctrl", "%u", &val, NULL);
-       if (err) {
+       err = xenbus_scanf(XBT_NIL, dev->otherend,
+                          "event-channel-ctrl", "%u", &val);
+       if (err < 0) {
                xenbus_dev_fatal(dev, err,
                                 "reading %s/event-channel-ctrl",
                                 dev->otherend);
index 7d616b003e894f50a6337caa7aa841decda87f39..e085c8c31cfe0d21f5ad4c2c9fe94f0514c10a98 100644 (file)
@@ -304,7 +304,7 @@ static void xennet_alloc_rx_buffers(struct netfront_queue *queue)
                queue->rx_skbs[id] = skb;
 
                ref = gnttab_claim_grant_reference(&queue->gref_rx_head);
-               BUG_ON((signed short)ref < 0);
+               WARN_ON_ONCE(IS_ERR_VALUE((unsigned long)(int)ref));
                queue->grant_rx_ref[id] = ref;
 
                page = skb_frag_page(&skb_shinfo(skb)->frags[0]);
@@ -428,7 +428,7 @@ static void xennet_tx_setup_grant(unsigned long gfn, unsigned int offset,
        id = get_id_from_freelist(&queue->tx_skb_freelist, queue->tx_skbs);
        tx = RING_GET_REQUEST(&queue->tx, queue->tx.req_prod_pvt++);
        ref = gnttab_claim_grant_reference(&queue->gref_tx_head);
-       BUG_ON((signed short)ref < 0);
+       WARN_ON_ONCE(IS_ERR_VALUE((unsigned long)(int)ref));
 
        gnttab_grant_foreign_access_ref(ref, queue->info->xbdev->otherend_id,
                                        gfn, GNTMAP_readonly);
index 83deda4bb4d6d52d4b529b71712f71114aac1445..6f9563a9648852e48929a39b29982a7bbef52c0e 100644 (file)
@@ -133,7 +133,7 @@ static int mei_nfc_if_version(struct nfc_mei_phy *phy)
                return -ENOMEM;
 
        bytes_recv = mei_cldev_recv(phy->cldev, (u8 *)reply, if_version_length);
-       if (bytes_recv < 0 || bytes_recv < sizeof(struct mei_nfc_reply)) {
+       if (bytes_recv < 0 || bytes_recv < if_version_length) {
                pr_err("Could not read IF version\n");
                r = -EIO;
                goto err;
index 0d5c29ae51def6735e21aafb6579cda7c56ebd6f..7310a261c858bab090bab14ecf3646626aac7651 100644 (file)
@@ -112,17 +112,17 @@ MODULE_PARM_DESC(xeon_b2b_usd_bar2_addr64,
 
 module_param_named(xeon_b2b_usd_bar4_addr64,
                   xeon_b2b_usd_addr.bar4_addr64, ullong, 0644);
-MODULE_PARM_DESC(xeon_b2b_usd_bar2_addr64,
+MODULE_PARM_DESC(xeon_b2b_usd_bar4_addr64,
                 "XEON B2B USD BAR 4 64-bit address");
 
 module_param_named(xeon_b2b_usd_bar4_addr32,
                   xeon_b2b_usd_addr.bar4_addr32, ullong, 0644);
-MODULE_PARM_DESC(xeon_b2b_usd_bar2_addr64,
+MODULE_PARM_DESC(xeon_b2b_usd_bar4_addr32,
                 "XEON B2B USD split-BAR 4 32-bit address");
 
 module_param_named(xeon_b2b_usd_bar5_addr32,
                   xeon_b2b_usd_addr.bar5_addr32, ullong, 0644);
-MODULE_PARM_DESC(xeon_b2b_usd_bar2_addr64,
+MODULE_PARM_DESC(xeon_b2b_usd_bar5_addr32,
                 "XEON B2B USD split-BAR 5 32-bit address");
 
 module_param_named(xeon_b2b_dsd_bar2_addr64,
@@ -132,17 +132,17 @@ MODULE_PARM_DESC(xeon_b2b_dsd_bar2_addr64,
 
 module_param_named(xeon_b2b_dsd_bar4_addr64,
                   xeon_b2b_dsd_addr.bar4_addr64, ullong, 0644);
-MODULE_PARM_DESC(xeon_b2b_dsd_bar2_addr64,
+MODULE_PARM_DESC(xeon_b2b_dsd_bar4_addr64,
                 "XEON B2B DSD BAR 4 64-bit address");
 
 module_param_named(xeon_b2b_dsd_bar4_addr32,
                   xeon_b2b_dsd_addr.bar4_addr32, ullong, 0644);
-MODULE_PARM_DESC(xeon_b2b_dsd_bar2_addr64,
+MODULE_PARM_DESC(xeon_b2b_dsd_bar4_addr32,
                 "XEON B2B DSD split-BAR 4 32-bit address");
 
 module_param_named(xeon_b2b_dsd_bar5_addr32,
                   xeon_b2b_dsd_addr.bar5_addr32, ullong, 0644);
-MODULE_PARM_DESC(xeon_b2b_dsd_bar2_addr64,
+MODULE_PARM_DESC(xeon_b2b_dsd_bar5_addr32,
                 "XEON B2B DSD split-BAR 5 32-bit address");
 
 #ifndef ioread64
@@ -1755,6 +1755,8 @@ static int xeon_setup_b2b_mw(struct intel_ntb_dev *ndev,
                                            XEON_B2B_MIN_SIZE);
                if (!ndev->peer_mmio)
                        return -EIO;
+
+               ndev->peer_addr = pci_resource_start(pdev, b2b_bar);
        }
 
        return 0;
@@ -2019,6 +2021,7 @@ static int intel_ntb_init_pci(struct intel_ntb_dev *ndev, struct pci_dev *pdev)
                goto err_mmio;
        }
        ndev->peer_mmio = ndev->self_mmio;
+       ndev->peer_addr = pci_resource_start(pdev, 0);
 
        return 0;
 
index 8601c10acf74e3267d1d5501e20149ce74cdb38b..4eb8adb345084430947ccabdf6962c1099fbb703 100644 (file)
@@ -257,7 +257,7 @@ enum {
 #define NTB_QP_DEF_NUM_ENTRIES 100
 #define NTB_LINK_DOWN_TIMEOUT  10
 #define DMA_RETRIES            20
-#define DMA_OUT_RESOURCE_TO    50
+#define DMA_OUT_RESOURCE_TO    msecs_to_jiffies(50)
 
 static void ntb_transport_rxc_db(unsigned long data);
 static const struct ntb_ctx_ops ntb_transport_ops;
index 6a50f20bf1cde0e080f766ede5df147a9ea11a7b..e75d4fdc08663905eace859cff6cdebdb97e92b5 100644 (file)
@@ -72,7 +72,7 @@
 #define MAX_THREADS            32
 #define MAX_TEST_SIZE          SZ_1M
 #define MAX_SRCS               32
-#define DMA_OUT_RESOURCE_TO    50
+#define DMA_OUT_RESOURCE_TO    msecs_to_jiffies(50)
 #define DMA_RETRIES            20
 #define SZ_4G                  (1ULL << 32)
 #define MAX_SEG_ORDER          20 /* no larger than 1M for kmalloc buffer */
@@ -589,7 +589,7 @@ static ssize_t debugfs_run_read(struct file *filp, char __user *ubuf,
                return -ENOMEM;
 
        if (mutex_is_locked(&perf->run_mutex)) {
-               out_off = snprintf(buf, 64, "running\n");
+               out_off = scnprintf(buf, 64, "running\n");
                goto read_from_buf;
        }
 
@@ -600,14 +600,14 @@ static ssize_t debugfs_run_read(struct file *filp, char __user *ubuf,
                        break;
 
                if (pctx->status) {
-                       out_off += snprintf(buf + out_off, 1024 - out_off,
+                       out_off += scnprintf(buf + out_off, 1024 - out_off,
                                            "%d: error %d\n", i,
                                            pctx->status);
                        continue;
                }
 
                rate = div64_u64(pctx->copied, pctx->diff_us);
-               out_off += snprintf(buf + out_off, 1024 - out_off,
+               out_off += scnprintf(buf + out_off, 1024 - out_off,
                        "%d: copied %llu bytes in %llu usecs, %llu MBytes/s\n",
                        i, pctx->copied, pctx->diff_us, rate);
        }
index 7d311799fca1696ee7c5884fa2d58af2f2061816..435861189d97f87fc07397e6a174571d524bdb6f 100644 (file)
@@ -88,7 +88,7 @@ MODULE_PARM_DESC(delay_ms, "Milliseconds to delay the response to peer");
 
 static unsigned long db_init = 0x7;
 module_param(db_init, ulong, 0644);
-MODULE_PARM_DESC(delay_ms, "Initial doorbell bits to ring on the peer");
+MODULE_PARM_DESC(db_init, "Initial doorbell bits to ring on the peer");
 
 struct pp_ctx {
        struct ntb_dev                  *ntb;
index 8b2b740d6679e61f544a990164eb9e8f2e614f42..124c2432ac9cb3d6e0a696023507f5131774c282 100644 (file)
@@ -89,7 +89,7 @@ config NVDIMM_PFN
          Select Y if unsure
 
 config NVDIMM_DAX
-       tristate "NVDIMM DAX: Raw access to persistent memory"
+       bool "NVDIMM DAX: Raw access to persistent memory"
        default LIBNVDIMM
        depends on NVDIMM_PFN
        help
index 3509cff68ef9c73e1c5bfe79829acfc4b4467cdf..abe5c6bc756c255193d803039973971368b9471d 100644 (file)
@@ -2176,12 +2176,14 @@ static struct device **scan_labels(struct nd_region *nd_region)
        return devs;
 
  err:
-       for (i = 0; devs[i]; i++)
-               if (is_nd_blk(&nd_region->dev))
-                       namespace_blk_release(devs[i]);
-               else
-                       namespace_pmem_release(devs[i]);
-       kfree(devs);
+       if (devs) {
+               for (i = 0; devs[i]; i++)
+                       if (is_nd_blk(&nd_region->dev))
+                               namespace_blk_release(devs[i]);
+                       else
+                               namespace_pmem_release(devs[i]);
+               kfree(devs);
+       }
        return NULL;
 }
 
index 42b3a82170733971a3b1d000b8de4979f0ad311b..24618431a14bae7e891438b3601d017d1d34db4d 100644 (file)
@@ -47,7 +47,7 @@ static struct nd_region *to_region(struct pmem_device *pmem)
        return to_nd_region(to_dev(pmem)->parent);
 }
 
-static void pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset,
+static int pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset,
                unsigned int len)
 {
        struct device *dev = to_dev(pmem);
@@ -62,8 +62,12 @@ static void pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset,
                                __func__, (unsigned long long) sector,
                                cleared / 512, cleared / 512 > 1 ? "s" : "");
                badblocks_clear(&pmem->bb, sector, cleared / 512);
+       } else {
+               return -EIO;
        }
+
        invalidate_pmem(pmem->virt_addr + offset, len);
+       return 0;
 }
 
 static void write_pmem(void *pmem_addr, struct page *page,
@@ -123,7 +127,7 @@ static int pmem_do_bvec(struct pmem_device *pmem, struct page *page,
                flush_dcache_page(page);
                write_pmem(pmem_addr, page, off, len);
                if (unlikely(bad_pmem)) {
-                       pmem_clear_poison(pmem, pmem_off, len);
+                       rc = pmem_clear_poison(pmem, pmem_off, len);
                        write_pmem(pmem_addr, page, off, len);
                }
        }
index 329381a28edf8a7e4b3bb4c6ca2da669ee9caa4a..79e679d12f3b3667ad815e0bd39ab428da6651ac 100644 (file)
@@ -554,7 +554,7 @@ int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
 
        /* gcc-4.4.4 (at least) has issues with initializers and anon unions */
        c.identify.opcode = nvme_admin_identify;
-       c.identify.cns = cpu_to_le32(1);
+       c.identify.cns = cpu_to_le32(NVME_ID_CNS_CTRL);
 
        *id = kmalloc(sizeof(struct nvme_id_ctrl), GFP_KERNEL);
        if (!*id)
@@ -572,7 +572,7 @@ static int nvme_identify_ns_list(struct nvme_ctrl *dev, unsigned nsid, __le32 *n
        struct nvme_command c = { };
 
        c.identify.opcode = nvme_admin_identify;
-       c.identify.cns = cpu_to_le32(2);
+       c.identify.cns = cpu_to_le32(NVME_ID_CNS_NS_ACTIVE_LIST);
        c.identify.nsid = cpu_to_le32(nsid);
        return nvme_submit_sync_cmd(dev->admin_q, &c, ns_list, 0x1000);
 }
@@ -900,9 +900,9 @@ static int nvme_revalidate_ns(struct nvme_ns *ns, struct nvme_id_ns **id)
                return -ENODEV;
        }
 
-       if (ns->ctrl->vs >= NVME_VS(1, 1))
+       if (ns->ctrl->vs >= NVME_VS(1, 1, 0))
                memcpy(ns->eui, (*id)->eui64, sizeof(ns->eui));
-       if (ns->ctrl->vs >= NVME_VS(1, 2))
+       if (ns->ctrl->vs >= NVME_VS(1, 2, 0))
                memcpy(ns->uuid, (*id)->nguid, sizeof(ns->uuid));
 
        return 0;
@@ -1086,6 +1086,8 @@ static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled)
        int ret;
 
        while ((ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts)) == 0) {
+               if (csts == ~0)
+                       return -ENODEV;
                if ((csts & NVME_CSTS_RDY) == bit)
                        break;
 
@@ -1240,7 +1242,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
        }
        page_shift = NVME_CAP_MPSMIN(cap) + 12;
 
-       if (ctrl->vs >= NVME_VS(1, 1))
+       if (ctrl->vs >= NVME_VS(1, 1, 0))
                ctrl->subsystem = NVME_CAP_NSSRC(cap);
 
        ret = nvme_identify_ctrl(ctrl, &id);
@@ -1840,7 +1842,7 @@ static void nvme_scan_work(struct work_struct *work)
                return;
 
        nn = le32_to_cpu(id->nn);
-       if (ctrl->vs >= NVME_VS(1, 1) &&
+       if (ctrl->vs >= NVME_VS(1, 1, 0) &&
            !(ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS)) {
                if (!nvme_scan_ns_list(ctrl, nn))
                        goto done;
index f5e3011e31fcdfea4c4e067ca8b808bc66a2fffd..5daf2f4be0cd74d5cc1360ed95ceb7c9a10385ea 100644 (file)
@@ -612,7 +612,7 @@ int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node,
 
        ret = nvm_register(dev);
 
-       ns->lba_shift = ilog2(dev->sec_size) - 9;
+       ns->lba_shift = ilog2(dev->sec_size);
 
        if (sysfs_create_group(&dev->dev.kobj, attrs))
                pr_warn("%s: failed to create sysfs group for identification\n",
index 0fc99f0f257110a063f3e58722b2f0ec08da655e..5e52034ab01049e3f9935e44bee4a8b125769582 100644 (file)
@@ -99,6 +99,7 @@ struct nvme_dev {
        dma_addr_t cmb_dma_addr;
        u64 cmb_size;
        u32 cmbsz;
+       u32 cmbloc;
        struct nvme_ctrl ctrl;
        struct completion ioq_wait;
 };
@@ -893,7 +894,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
                         "I/O %d QID %d timeout, reset controller\n",
                         req->tag, nvmeq->qid);
                nvme_dev_disable(dev, false);
-               queue_work(nvme_workq, &dev->reset_work);
+               nvme_reset(dev);
 
                /*
                 * Mark the request as handled, since the inline shutdown
@@ -1214,7 +1215,7 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
        u64 cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
        struct nvme_queue *nvmeq;
 
-       dev->subsystem = readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 1) ?
+       dev->subsystem = readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 1, 0) ?
                                                NVME_CAP_NSSRC(cap) : 0;
 
        if (dev->subsystem &&
@@ -1241,20 +1242,16 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
 
        result = nvme_enable_ctrl(&dev->ctrl, cap);
        if (result)
-               goto free_nvmeq;
+               return result;
 
        nvmeq->cq_vector = 0;
        result = queue_request_irq(nvmeq);
        if (result) {
                nvmeq->cq_vector = -1;
-               goto free_nvmeq;
+               return result;
        }
 
        return result;
-
- free_nvmeq:
-       nvme_free_queues(dev, 0);
-       return result;
 }
 
 static bool nvme_should_reset(struct nvme_dev *dev, u32 csts)
@@ -1291,7 +1288,7 @@ static void nvme_watchdog_timer(unsigned long data)
 
        /* Skip controllers under certain specific conditions. */
        if (nvme_should_reset(dev, csts)) {
-               if (queue_work(nvme_workq, &dev->reset_work))
+               if (!nvme_reset(dev))
                        dev_warn(dev->dev,
                                "Failed status: 0x%x, reset controller.\n",
                                csts);
@@ -1316,10 +1313,8 @@ static int nvme_create_io_queues(struct nvme_dev *dev)
        max = min(dev->max_qid, dev->queue_count - 1);
        for (i = dev->online_queues; i <= max; i++) {
                ret = nvme_create_queue(dev->queues[i], i);
-               if (ret) {
-                       nvme_free_queues(dev, i);
+               if (ret)
                        break;
-               }
        }
 
        /*
@@ -1331,28 +1326,37 @@ static int nvme_create_io_queues(struct nvme_dev *dev)
        return ret >= 0 ? 0 : ret;
 }
 
+static ssize_t nvme_cmb_show(struct device *dev,
+                            struct device_attribute *attr,
+                            char *buf)
+{
+       struct nvme_dev *ndev = to_nvme_dev(dev_get_drvdata(dev));
+
+       return snprintf(buf, PAGE_SIZE, "cmbloc : x%08x\ncmbsz  : x%08x\n",
+                      ndev->cmbloc, ndev->cmbsz);
+}
+static DEVICE_ATTR(cmb, S_IRUGO, nvme_cmb_show, NULL);
+
 static void __iomem *nvme_map_cmb(struct nvme_dev *dev)
 {
        u64 szu, size, offset;
-       u32 cmbloc;
        resource_size_t bar_size;
        struct pci_dev *pdev = to_pci_dev(dev->dev);
        void __iomem *cmb;
        dma_addr_t dma_addr;
 
-       if (!use_cmb_sqes)
-               return NULL;
-
        dev->cmbsz = readl(dev->bar + NVME_REG_CMBSZ);
        if (!(NVME_CMB_SZ(dev->cmbsz)))
                return NULL;
+       dev->cmbloc = readl(dev->bar + NVME_REG_CMBLOC);
 
-       cmbloc = readl(dev->bar + NVME_REG_CMBLOC);
+       if (!use_cmb_sqes)
+               return NULL;
 
        szu = (u64)1 << (12 + 4 * NVME_CMB_SZU(dev->cmbsz));
        size = szu * NVME_CMB_SZ(dev->cmbsz);
-       offset = szu * NVME_CMB_OFST(cmbloc);
-       bar_size = pci_resource_len(pdev, NVME_CMB_BIR(cmbloc));
+       offset = szu * NVME_CMB_OFST(dev->cmbloc);
+       bar_size = pci_resource_len(pdev, NVME_CMB_BIR(dev->cmbloc));
 
        if (offset > bar_size)
                return NULL;
@@ -1365,7 +1369,7 @@ static void __iomem *nvme_map_cmb(struct nvme_dev *dev)
        if (size > bar_size - offset)
                size = bar_size - offset;
 
-       dma_addr = pci_resource_start(pdev, NVME_CMB_BIR(cmbloc)) + offset;
+       dma_addr = pci_resource_start(pdev, NVME_CMB_BIR(dev->cmbloc)) + offset;
        cmb = ioremap_wc(dma_addr, size);
        if (!cmb)
                return NULL;
@@ -1450,13 +1454,9 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
        result = queue_request_irq(adminq);
        if (result) {
                adminq->cq_vector = -1;
-               goto free_queues;
+               return result;
        }
        return nvme_create_io_queues(dev);
-
- free_queues:
-       nvme_free_queues(dev, 1);
-       return result;
 }
 
 static void nvme_del_queue_end(struct request *req, int error)
@@ -1511,9 +1511,9 @@ static int nvme_delete_queue(struct nvme_queue *nvmeq, u8 opcode)
        return 0;
 }
 
-static void nvme_disable_io_queues(struct nvme_dev *dev)
+static void nvme_disable_io_queues(struct nvme_dev *dev, int queues)
 {
-       int pass, queues = dev->online_queues - 1;
+       int pass;
        unsigned long timeout;
        u8 opcode = nvme_admin_delete_sq;
 
@@ -1616,9 +1616,25 @@ static int nvme_pci_enable(struct nvme_dev *dev)
                        dev->q_depth);
        }
 
-       if (readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 2))
+       /*
+        * CMBs can currently only exist on >=1.2 PCIe devices. We only
+        * populate sysfs if a CMB is implemented. Note that we add the
+        * CMB attribute to the nvme_ctrl kobj which removes the need to remove
+        * it on exit. Since nvme_dev_attrs_group has no name we can pass
+        * NULL as final argument to sysfs_add_file_to_group.
+        */
+
+       if (readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 2, 0)) {
                dev->cmb = nvme_map_cmb(dev);
 
+               if (dev->cmbsz) {
+                       if (sysfs_add_file_to_group(&dev->ctrl.device->kobj,
+                                                   &dev_attr_cmb.attr, NULL))
+                               dev_warn(dev->dev,
+                                        "failed to add sysfs attribute for CMB\n");
+               }
+       }
+
        pci_enable_pcie_error_reporting(pdev);
        pci_save_state(pdev);
        return 0;
@@ -1649,7 +1665,7 @@ static void nvme_pci_disable(struct nvme_dev *dev)
 
 static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
 {
-       int i;
+       int i, queues;
        u32 csts = -1;
 
        del_timer_sync(&dev->watchdog_timer);
@@ -1660,6 +1676,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
                csts = readl(dev->bar + NVME_REG_CSTS);
        }
 
+       queues = dev->online_queues - 1;
        for (i = dev->queue_count - 1; i > 0; i--)
                nvme_suspend_queue(dev->queues[i]);
 
@@ -1671,7 +1688,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
                if (dev->queue_count)
                        nvme_suspend_queue(dev->queues[0]);
        } else {
-               nvme_disable_io_queues(dev);
+               nvme_disable_io_queues(dev, queues);
                nvme_disable_admin_queue(dev, shutdown);
        }
        nvme_pci_disable(dev);
@@ -1818,11 +1835,10 @@ static int nvme_reset(struct nvme_dev *dev)
 {
        if (!dev->ctrl.admin_q || blk_queue_dying(dev->ctrl.admin_q))
                return -ENODEV;
-
+       if (work_busy(&dev->reset_work))
+               return -ENODEV;
        if (!queue_work(nvme_workq, &dev->reset_work))
                return -EBUSY;
-
-       flush_work(&dev->reset_work);
        return 0;
 }
 
@@ -1846,7 +1862,12 @@ static int nvme_pci_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val)
 
 static int nvme_pci_reset_ctrl(struct nvme_ctrl *ctrl)
 {
-       return nvme_reset(to_nvme_dev(ctrl));
+       struct nvme_dev *dev = to_nvme_dev(ctrl);
+       int ret = nvme_reset(dev);
+
+       if (!ret)
+               flush_work(&dev->reset_work);
+       return ret;
 }
 
 static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
@@ -1940,7 +1961,7 @@ static void nvme_reset_notify(struct pci_dev *pdev, bool prepare)
        if (prepare)
                nvme_dev_disable(dev, false);
        else
-               queue_work(nvme_workq, &dev->reset_work);
+               nvme_reset(dev);
 }
 
 static void nvme_shutdown(struct pci_dev *pdev)
@@ -2009,7 +2030,7 @@ static int nvme_resume(struct device *dev)
        struct pci_dev *pdev = to_pci_dev(dev);
        struct nvme_dev *ndev = pci_get_drvdata(pdev);
 
-       queue_work(nvme_workq, &ndev->reset_work);
+       nvme_reset(ndev);
        return 0;
 }
 #endif
@@ -2048,7 +2069,7 @@ static pci_ers_result_t nvme_slot_reset(struct pci_dev *pdev)
 
        dev_info(dev->ctrl.device, "restart after slot reset\n");
        pci_restore_state(pdev);
-       queue_work(nvme_workq, &dev->reset_work);
+       nvme_reset(dev);
        return PCI_ERS_RESULT_RECOVERED;
 }
 
index 5a8388177959916dc4dadbc5e6d8c366b0eb6b81..3d25add36d91993ddbc45572c6bc10213deea9d9 100644 (file)
@@ -83,6 +83,7 @@ enum nvme_rdma_queue_flags {
        NVME_RDMA_Q_CONNECTED = (1 << 0),
        NVME_RDMA_IB_QUEUE_ALLOCATED = (1 << 1),
        NVME_RDMA_Q_DELETING = (1 << 2),
+       NVME_RDMA_Q_LIVE = (1 << 3),
 };
 
 struct nvme_rdma_queue {
@@ -624,10 +625,18 @@ static int nvme_rdma_connect_io_queues(struct nvme_rdma_ctrl *ctrl)
 
        for (i = 1; i < ctrl->queue_count; i++) {
                ret = nvmf_connect_io_queue(&ctrl->ctrl, i);
-               if (ret)
-                       break;
+               if (ret) {
+                       dev_info(ctrl->ctrl.device,
+                               "failed to connect i/o queue: %d\n", ret);
+                       goto out_free_queues;
+               }
+               set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[i].flags);
        }
 
+       return 0;
+
+out_free_queues:
+       nvme_rdma_free_io_queues(ctrl);
        return ret;
 }
 
@@ -712,6 +721,8 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
        if (ret)
                goto stop_admin_q;
 
+       set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[0].flags);
+
        ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap);
        if (ret)
                goto stop_admin_q;
@@ -761,8 +772,10 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
 
        nvme_stop_keep_alive(&ctrl->ctrl);
 
-       for (i = 0; i < ctrl->queue_count; i++)
+       for (i = 0; i < ctrl->queue_count; i++) {
                clear_bit(NVME_RDMA_Q_CONNECTED, &ctrl->queues[i].flags);
+               clear_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[i].flags);
+       }
 
        if (ctrl->queue_count > 1)
                nvme_stop_queues(&ctrl->ctrl);
@@ -1378,6 +1391,24 @@ nvme_rdma_timeout(struct request *rq, bool reserved)
        return BLK_EH_HANDLED;
 }
 
+/*
+ * We cannot accept any other command until the Connect command has completed.
+ */
+static inline bool nvme_rdma_queue_is_ready(struct nvme_rdma_queue *queue,
+               struct request *rq)
+{
+       if (unlikely(!test_bit(NVME_RDMA_Q_LIVE, &queue->flags))) {
+               struct nvme_command *cmd = (struct nvme_command *)rq->cmd;
+
+               if (rq->cmd_type != REQ_TYPE_DRV_PRIV ||
+                   cmd->common.opcode != nvme_fabrics_command ||
+                   cmd->fabrics.fctype != nvme_fabrics_type_connect)
+                       return false;
+       }
+
+       return true;
+}
+
 static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
                const struct blk_mq_queue_data *bd)
 {
@@ -1394,6 +1425,9 @@ static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
 
        WARN_ON_ONCE(rq->tag < 0);
 
+       if (!nvme_rdma_queue_is_ready(queue, rq))
+               return BLK_MQ_RQ_QUEUE_BUSY;
+
        dev = queue->device->dev;
        ib_dma_sync_single_for_cpu(dev, sqe->dma,
                        sizeof(struct nvme_command), DMA_TO_DEVICE);
@@ -1544,6 +1578,8 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl)
        if (error)
                goto out_cleanup_queue;
 
+       set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[0].flags);
+
        error = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->cap);
        if (error) {
                dev_err(ctrl->ctrl.device,
index c2a0a1c7d05d1571c939f531d3619b5800cadbd6..3eaa4d27801ee17c7e1ccaa1b8a489844cb6f5bc 100644 (file)
@@ -606,7 +606,7 @@ static int nvme_fill_device_id_eui64(struct nvme_ns *ns, struct sg_io_hdr *hdr,
        eui = id_ns->eui64;
        len = sizeof(id_ns->eui64);
 
-       if (ns->ctrl->vs >= NVME_VS(1, 2)) {
+       if (ns->ctrl->vs >= NVME_VS(1, 2, 0)) {
                if (bitmap_empty(eui, len * 8)) {
                        eui = id_ns->nguid;
                        len = sizeof(id_ns->nguid);
@@ -679,7 +679,7 @@ static int nvme_trans_device_id_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 {
        int res;
 
-       if (ns->ctrl->vs >= NVME_VS(1, 1)) {
+       if (ns->ctrl->vs >= NVME_VS(1, 1, 0)) {
                res = nvme_fill_device_id_eui64(ns, hdr, resp, alloc_len);
                if (res != -EOPNOTSUPP)
                        return res;
index 7ab9c9381b989578cb5faaf6054721ace331c631..6fe4c48a21e46520ad1e0e569a9773813ed7e233 100644 (file)
@@ -199,7 +199,7 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
         */
 
        /* we support multiple ports and multiples hosts: */
-       id->mic = (1 << 0) | (1 << 1);
+       id->cmic = (1 << 0) | (1 << 1);
 
        /* no limit on data transfer sizes for now */
        id->mdts = 0;
@@ -511,13 +511,13 @@ int nvmet_parse_admin_cmd(struct nvmet_req *req)
        case nvme_admin_identify:
                req->data_len = 4096;
                switch (le32_to_cpu(cmd->identify.cns)) {
-               case 0x00:
+               case NVME_ID_CNS_NS:
                        req->execute = nvmet_execute_identify_ns;
                        return 0;
-               case 0x01:
+               case NVME_ID_CNS_CTRL:
                        req->execute = nvmet_execute_identify_ctrl;
                        return 0;
-               case 0x02:
+               case NVME_ID_CNS_NS_ACTIVE_LIST:
                        req->execute = nvmet_execute_identify_nslist;
                        return 0;
                }
index 6559d5afa7bfd9f808281658f686429c53fc7903..a21437a33adbef0c51395c0b88f2e8a10995ea86 100644 (file)
@@ -838,9 +838,13 @@ static void nvmet_fatal_error_handler(struct work_struct *work)
 
 void nvmet_ctrl_fatal_error(struct nvmet_ctrl *ctrl)
 {
-       ctrl->csts |= NVME_CSTS_CFS;
-       INIT_WORK(&ctrl->fatal_err_work, nvmet_fatal_error_handler);
-       schedule_work(&ctrl->fatal_err_work);
+       mutex_lock(&ctrl->lock);
+       if (!(ctrl->csts & NVME_CSTS_CFS)) {
+               ctrl->csts |= NVME_CSTS_CFS;
+               INIT_WORK(&ctrl->fatal_err_work, nvmet_fatal_error_handler);
+               schedule_work(&ctrl->fatal_err_work);
+       }
+       mutex_unlock(&ctrl->lock);
 }
 EXPORT_SYMBOL_GPL(nvmet_ctrl_fatal_error);
 
@@ -882,7 +886,7 @@ struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn,
        if (!subsys)
                return NULL;
 
-       subsys->ver = (1 << 16) | (2 << 8) | 1; /* NVMe 1.2.1 */
+       subsys->ver = NVME_VS(1, 2, 1); /* NVMe 1.2.1 */
 
        switch (type) {
        case NVME_NQN_NVME:
index 6f65646e89cfd9bc21a95946bfa4fbbe26eb835d..12f39eea569f2fb33cec45884c188d0ad8ae2493 100644 (file)
@@ -54,7 +54,7 @@ static void nvmet_format_discovery_entry(struct nvmf_disc_rsp_page_hdr *hdr,
        /* we support only dynamic controllers */
        e->cntlid = cpu_to_le16(NVME_CNTLID_DYNAMIC);
        e->asqsz = cpu_to_le16(NVMF_AQ_DEPTH);
-       e->nqntype = type;
+       e->subtype = type;
        memcpy(e->trsvcid, port->disc_addr.trsvcid, NVMF_TRSVCID_SIZE);
        memcpy(e->traddr, port->disc_addr.traddr, NVMF_TRADDR_SIZE);
        memcpy(e->tsas.common, port->disc_addr.tsas.common, NVMF_TSAS_SIZE);
@@ -187,7 +187,7 @@ int nvmet_parse_discovery_cmd(struct nvmet_req *req)
        case nvme_admin_identify:
                req->data_len = 4096;
                switch (le32_to_cpu(cmd->identify.cns)) {
-               case 0x01:
+               case NVME_ID_CNS_CTRL:
                        req->execute =
                                nvmet_execute_identify_disc_ctrl;
                        return 0;
index f8d23999e0f2c28b98934c6a790b3a6c2bde26d1..005ef5d17a191101b3b2da09a0a1c702f5a6c400 100644 (file)
@@ -951,6 +951,7 @@ err_destroy_cq:
 
 static void nvmet_rdma_destroy_queue_ib(struct nvmet_rdma_queue *queue)
 {
+       ib_drain_qp(queue->cm_id->qp);
        rdma_destroy_qp(queue->cm_id);
        ib_free_cq(queue->cq);
 }
@@ -1066,6 +1067,7 @@ nvmet_rdma_alloc_queue(struct nvmet_rdma_device *ndev,
        spin_lock_init(&queue->rsp_wr_wait_lock);
        INIT_LIST_HEAD(&queue->free_rsps);
        spin_lock_init(&queue->rsps_lock);
+       INIT_LIST_HEAD(&queue->queue_list);
 
        queue->idx = ida_simple_get(&nvmet_rdma_queue_ida, 0, 0, GFP_KERNEL);
        if (queue->idx < 0) {
@@ -1244,7 +1246,6 @@ static void __nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue)
 
        if (disconnect) {
                rdma_disconnect(queue->cm_id);
-               ib_drain_qp(queue->cm_id->qp);
                schedule_work(&queue->release_work);
        }
 }
@@ -1269,7 +1270,12 @@ static void nvmet_rdma_queue_connect_fail(struct rdma_cm_id *cm_id,
 {
        WARN_ON_ONCE(queue->state != NVMET_RDMA_Q_CONNECTING);
 
-       pr_err("failed to connect queue\n");
+       mutex_lock(&nvmet_rdma_queue_mutex);
+       if (!list_empty(&queue->queue_list))
+               list_del_init(&queue->queue_list);
+       mutex_unlock(&nvmet_rdma_queue_mutex);
+
+       pr_err("failed to connect queue %d\n", queue->idx);
        schedule_work(&queue->release_work);
 }
 
@@ -1352,7 +1358,13 @@ static int nvmet_rdma_cm_handler(struct rdma_cm_id *cm_id,
        case RDMA_CM_EVENT_ADDR_CHANGE:
        case RDMA_CM_EVENT_DISCONNECTED:
        case RDMA_CM_EVENT_TIMEWAIT_EXIT:
-               nvmet_rdma_queue_disconnect(queue);
+               /*
+                * We might end up here when we already freed the qp
+                * which means queue release sequence is in progress,
+                * so don't get in the way...
+                */
+               if (queue)
+                       nvmet_rdma_queue_disconnect(queue);
                break;
        case RDMA_CM_EVENT_DEVICE_REMOVAL:
                ret = nvmet_rdma_device_removal(cm_id, queue);
index d687e6de24a07e11a2c2cecd94e7711610d52005..a0bccb54a9bd1d762d42967e6722b43d1b4a93ec 100644 (file)
@@ -2077,8 +2077,6 @@ void of_alias_scan(void * (*dt_alloc)(u64 size, u64 align))
                        name = of_get_property(of_aliases, "stdout", NULL);
                if (name)
                        of_stdout = of_find_node_opts_by_path(name, &of_stdout_options);
-               if (of_stdout)
-                       console_set_by_of();
        }
 
        if (!of_aliases)
index b470f7e3521d49c73877b6156bc9db7b926fdc51..5a3145a025470dc4c86f8d191a139a8acb85394f 100644 (file)
@@ -292,6 +292,7 @@ struct phy_device *of_phy_find_device(struct device_node *phy_np)
                mdiodev = to_mdio_device(d);
                if (mdiodev->flags & MDIO_DEVICE_FLAG_PHY)
                        return to_phy_device(d);
+               put_device(d);
        }
 
        return NULL;
@@ -456,8 +457,11 @@ int of_phy_register_fixed_link(struct device_node *np)
                status.link = 1;
                status.duplex = of_property_read_bool(fixed_link_node,
                                                      "full-duplex");
-               if (of_property_read_u32(fixed_link_node, "speed", &status.speed))
+               if (of_property_read_u32(fixed_link_node, "speed",
+                                        &status.speed)) {
+                       of_node_put(fixed_link_node);
                        return -EINVAL;
+               }
                status.pause = of_property_read_bool(fixed_link_node, "pause");
                status.asym_pause = of_property_read_bool(fixed_link_node,
                                                          "asym-pause");
index f811d27964370475040b3a51b3b86d590b621cf8..e4bf07d20f9bbf416f756ca381d3340ce662272f 100644 (file)
@@ -29,6 +29,7 @@
 const struct of_device_id of_default_bus_match_table[] = {
        { .compatible = "simple-bus", },
        { .compatible = "simple-mfd", },
+       { .compatible = "isa", },
 #ifdef CONFIG_ARM_AMBA
        { .compatible = "arm,amba-bus", },
 #endif /* CONFIG_ARM_AMBA */
index 2cb7315e26d089701679e23a7fc3f8a9ce74e4cf..6537079963424b09bba4df9f0de93bd2a1e5820e 100644 (file)
@@ -247,6 +247,7 @@ static int __init ls_pcie_probe(struct platform_device *pdev)
 
        pp = &pcie->pp;
        pp->dev = dev;
+       pcie->drvdata = match->data;
        pp->ops = pcie->drvdata->ops;
 
        dbi_base = platform_get_resource_byname(pdev, IORESOURCE_MEM, "regs");
@@ -256,7 +257,6 @@ static int __init ls_pcie_probe(struct platform_device *pdev)
                return PTR_ERR(pcie->pp.dbi_base);
        }
 
-       pcie->drvdata = match->data;
        pcie->lut = pcie->pp.dbi_base + pcie->drvdata->lut_offset;
 
        if (!ls_pcie_is_bridge(pcie))
index 537f58a664fa230d3f6496ff68f4e7495d0f5952..8df6312ed3000a9c0587f318416e20f2329f55cc 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Copyright (C) 2015-2016 Synopsys, Inc. (www.synopsys.com)
  *
- * Authors: Joao Pinto <jpinto@synopsys.com>
+ * Authors: Joao Pinto <jpmpinto@gmail.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
index 035f50c03281c803f82d721c0a3c3a2c10a4a348..bed19994c1e94d4e32c134e58133c4acd8b8bd88 100644 (file)
@@ -637,8 +637,6 @@ int dw_pcie_host_init(struct pcie_port *pp)
                }
        }
 
-       pp->iatu_unroll_enabled = dw_pcie_iatu_unroll_enabled(pp);
-
        if (pp->ops->host_init)
                pp->ops->host_init(pp);
 
@@ -809,6 +807,11 @@ void dw_pcie_setup_rc(struct pcie_port *pp)
 {
        u32 val;
 
+       /* get iATU unroll support */
+       pp->iatu_unroll_enabled = dw_pcie_iatu_unroll_enabled(pp);
+       dev_dbg(pp->dev, "iATU unroll: %s\n",
+               pp->iatu_unroll_enabled ? "enabled" : "disabled");
+
        /* set the number of lanes */
        val = dw_pcie_readl_rc(pp, PCIE_PORT_LINK_CONTROL);
        val &= ~PORT_LINK_MODE_MASK;
index ef0a84c7a5885e1d62df499353f1384d86a5fceb..35936409b2d45921a049db1d4ddcbc64c4bb8f04 100644 (file)
@@ -533,11 +533,11 @@ static int qcom_pcie_probe(struct platform_device *pdev)
        if (IS_ERR(pcie->phy))
                return PTR_ERR(pcie->phy);
 
+       pp->dev = dev;
        ret = pcie->ops->get_resources(pcie);
        if (ret)
                return ret;
 
-       pp->dev = dev;
        pp->root_bus_nr = -1;
        pp->ops = &qcom_pcie_dw_ops;
 
index e0b22dab9b7ac37c81d380bfe00751f4496f4516..e04f69beb42d0f8ef04de5da85702f238ff2a8a5 100644 (file)
@@ -190,6 +190,9 @@ struct rockchip_pcie {
        struct  reset_control *mgmt_rst;
        struct  reset_control *mgmt_sticky_rst;
        struct  reset_control *pipe_rst;
+       struct  reset_control *pm_rst;
+       struct  reset_control *aclk_rst;
+       struct  reset_control *pclk_rst;
        struct  clk *aclk_pcie;
        struct  clk *aclk_perf_pcie;
        struct  clk *hclk_pcie;
@@ -408,6 +411,44 @@ static int rockchip_pcie_init_port(struct rockchip_pcie *rockchip)
 
        gpiod_set_value(rockchip->ep_gpio, 0);
 
+       err = reset_control_assert(rockchip->aclk_rst);
+       if (err) {
+               dev_err(dev, "assert aclk_rst err %d\n", err);
+               return err;
+       }
+
+       err = reset_control_assert(rockchip->pclk_rst);
+       if (err) {
+               dev_err(dev, "assert pclk_rst err %d\n", err);
+               return err;
+       }
+
+       err = reset_control_assert(rockchip->pm_rst);
+       if (err) {
+               dev_err(dev, "assert pm_rst err %d\n", err);
+               return err;
+       }
+
+       udelay(10);
+
+       err = reset_control_deassert(rockchip->pm_rst);
+       if (err) {
+               dev_err(dev, "deassert pm_rst err %d\n", err);
+               return err;
+       }
+
+       err = reset_control_deassert(rockchip->aclk_rst);
+       if (err) {
+               dev_err(dev, "deassert mgmt_sticky_rst err %d\n", err);
+               return err;
+       }
+
+       err = reset_control_deassert(rockchip->pclk_rst);
+       if (err) {
+               dev_err(dev, "deassert mgmt_sticky_rst err %d\n", err);
+               return err;
+       }
+
        err = phy_init(rockchip->phy);
        if (err < 0) {
                dev_err(dev, "fail to init phy, err %d\n", err);
@@ -781,6 +822,27 @@ static int rockchip_pcie_parse_dt(struct rockchip_pcie *rockchip)
                return PTR_ERR(rockchip->pipe_rst);
        }
 
+       rockchip->pm_rst = devm_reset_control_get(dev, "pm");
+       if (IS_ERR(rockchip->pm_rst)) {
+               if (PTR_ERR(rockchip->pm_rst) != -EPROBE_DEFER)
+                       dev_err(dev, "missing pm reset property in node\n");
+               return PTR_ERR(rockchip->pm_rst);
+       }
+
+       rockchip->pclk_rst = devm_reset_control_get(dev, "pclk");
+       if (IS_ERR(rockchip->pclk_rst)) {
+               if (PTR_ERR(rockchip->pclk_rst) != -EPROBE_DEFER)
+                       dev_err(dev, "missing pclk reset property in node\n");
+               return PTR_ERR(rockchip->pclk_rst);
+       }
+
+       rockchip->aclk_rst = devm_reset_control_get(dev, "aclk");
+       if (IS_ERR(rockchip->aclk_rst)) {
+               if (PTR_ERR(rockchip->aclk_rst) != -EPROBE_DEFER)
+                       dev_err(dev, "missing aclk reset property in node\n");
+               return PTR_ERR(rockchip->aclk_rst);
+       }
+
        rockchip->ep_gpio = devm_gpiod_get(dev, "ep", GPIOD_OUT_HIGH);
        if (IS_ERR(rockchip->ep_gpio)) {
                dev_err(dev, "missing ep-gpios property in node\n");
index bfdd0744b686abdb6d76fafd5132da1c294a7f6c..ad70507cfb566a2291498d4ba723e1a5a4aebd3c 100644 (file)
@@ -610,6 +610,7 @@ static int msi_verify_entries(struct pci_dev *dev)
  * msi_capability_init - configure device's MSI capability structure
  * @dev: pointer to the pci_dev data structure of MSI device function
  * @nvec: number of interrupts to allocate
+ * @affinity: flag to indicate cpu irq affinity mask should be set
  *
  * Setup the MSI capability structure of the device with the requested
  * number of interrupts.  A return value of zero indicates the successful
@@ -752,6 +753,7 @@ static void msix_program_entries(struct pci_dev *dev,
  * @dev: pointer to the pci_dev data structure of MSI-X device function
  * @entries: pointer to an array of struct msix_entry entries
  * @nvec: number of @entries
+ * @affinity: flag to indicate cpu irq affinity mask should be set
  *
  * Setup the MSI-X capability structure of device function with a
  * single MSI-X irq. A return of zero indicates the successful setup of
index 55f453de562ee63b8e53ab90cf6b671ef676c2a8..c7f3408e31487ef2db339233c61a4e66d15bb07d 100644 (file)
@@ -29,6 +29,11 @@ static int mid_pci_set_power_state(struct pci_dev *pdev, pci_power_t state)
        return intel_mid_pci_set_power_state(pdev, state);
 }
 
+static pci_power_t mid_pci_get_power_state(struct pci_dev *pdev)
+{
+       return intel_mid_pci_get_power_state(pdev);
+}
+
 static pci_power_t mid_pci_choose_state(struct pci_dev *pdev)
 {
        return PCI_D3hot;
@@ -52,6 +57,7 @@ static bool mid_pci_need_resume(struct pci_dev *dev)
 static struct pci_platform_pm_ops mid_pci_platform_pm = {
        .is_manageable  = mid_pci_power_manageable,
        .set_state      = mid_pci_set_power_state,
+       .get_state      = mid_pci_get_power_state,
        .choose_state   = mid_pci_choose_state,
        .sleep_wake     = mid_pci_sleep_wake,
        .run_wake       = mid_pci_run_wake,
index 66c4d8f4223377d6bb54849d7794fc8172b1e0a2..9526e341988ba469cdfbe48ff23591e3180d1119 100644 (file)
@@ -121,6 +121,14 @@ int pci_claim_resource(struct pci_dev *dev, int resource)
                return -EINVAL;
        }
 
+       /*
+        * If we have a shadow copy in RAM, the PCI device doesn't respond
+        * to the shadow range, so we don't need to claim it, and upstream
+        * bridges don't need to route the range to the device.
+        */
+       if (res->flags & IORESOURCE_ROM_SHADOW)
+               return 0;
+
        root = pci_find_parent_resource(dev, res);
        if (!root) {
                dev_info(&dev->dev, "can't claim BAR %d %pR: no compatible bridge window\n",
index 153f3122283deb9fa4260c0e1da0c443a5fde2c0..b6b316de055c7129648904cbdae92cafb593e8dd 100644 (file)
@@ -107,7 +107,7 @@ int soc_pcmcia_regulator_set(struct soc_pcmcia_socket *skt,
 
                ret = regulator_enable(r->reg);
        } else {
-               regulator_disable(r->reg);
+               ret = regulator_disable(r->reg);
        }
        if (ret == 0)
                r->on = on;
index c2ac7646b99f4b50cfec9a2d899ba53c55c238c3..a8ac4bcef2c04ad19247a27764f798726bc68892 100644 (file)
@@ -1011,7 +1011,7 @@ xgene_pmu_dev_ctx *acpi_get_pmu_hw_inf(struct xgene_pmu *xgene_pmu,
        rc = acpi_dev_get_resources(adev, &resource_list,
                                    acpi_pmu_dev_add_resource, &res);
        acpi_dev_free_resource_list(&resource_list);
-       if (rc < 0 || IS_ERR(&res)) {
+       if (rc < 0) {
                dev_err(dev, "PMU type %d: No resource address found\n", type);
                goto err;
        }
index 32ae78c8ca17655d877a66952d098d56a0eacf3c..c85fb0b59729de9febdbbcbeec59e2bcd80d1a2d 100644 (file)
@@ -198,7 +198,8 @@ static int da8xx_usb_phy_probe(struct platform_device *pdev)
        } else {
                int ret;
 
-               ret = phy_create_lookup(d_phy->usb11_phy, "usb-phy", "ohci.0");
+               ret = phy_create_lookup(d_phy->usb11_phy, "usb-phy",
+                                       "ohci-da8xx");
                if (ret)
                        dev_warn(dev, "Failed to create usb11 phy lookup\n");
                ret = phy_create_lookup(d_phy->usb20_phy, "usb-phy",
@@ -216,7 +217,7 @@ static int da8xx_usb_phy_remove(struct platform_device *pdev)
 
        if (!pdev->dev.of_node) {
                phy_remove_lookup(d_phy->usb20_phy, "usb-phy", "musb-da8xx");
-               phy_remove_lookup(d_phy->usb11_phy, "usb-phy", "ohci.0");
+               phy_remove_lookup(d_phy->usb11_phy, "usb-phy", "ohci-da8xx");
        }
 
        return 0;
index a2b4c6b58aea66d0b06cb153359ea3c31d4f6e66..6904633cad687d114cf19eb3292aba9896feb408 100644 (file)
@@ -249,21 +249,10 @@ err_refclk:
 static int rockchip_pcie_phy_exit(struct phy *phy)
 {
        struct rockchip_pcie_phy *rk_phy = phy_get_drvdata(phy);
-       int err = 0;
 
        clk_disable_unprepare(rk_phy->clk_pciephy_ref);
 
-       err = reset_control_deassert(rk_phy->phy_rst);
-       if (err) {
-               dev_err(&phy->dev, "deassert phy_rst err %d\n", err);
-               goto err_reset;
-       }
-
-       return err;
-
-err_reset:
-       clk_prepare_enable(rk_phy->clk_pciephy_ref);
-       return err;
+       return 0;
 }
 
 static const struct phy_ops ops = {
index b9342a2af7b3666f86471d649bd134a4baa9ff13..fec34f5213c46739b5231f34bfe628731c231a9a 100644 (file)
@@ -264,7 +264,7 @@ static int sun4i_usb_phy_init(struct phy *_phy)
                return ret;
        }
 
-       if (data->cfg->enable_pmu_unk1) {
+       if (phy->pmu && data->cfg->enable_pmu_unk1) {
                val = readl(phy->pmu + REG_PMU_UNK1);
                writel(val & ~2, phy->pmu + REG_PMU_UNK1);
        }
index 87e6334eab9309f85e8cb5308f96dec54de7f495..547ca7b3f09850ce0f0928b4fe0a26cbcaab14d2 100644 (file)
@@ -459,8 +459,6 @@ static int twl4030_phy_power_off(struct phy *phy)
        struct twl4030_usb *twl = phy_get_drvdata(phy);
 
        dev_dbg(twl->dev, "%s\n", __func__);
-       pm_runtime_mark_last_busy(twl->dev);
-       pm_runtime_put_autosuspend(twl->dev);
 
        return 0;
 }
@@ -472,6 +470,8 @@ static int twl4030_phy_power_on(struct phy *phy)
        dev_dbg(twl->dev, "%s\n", __func__);
        pm_runtime_get_sync(twl->dev);
        schedule_delayed_work(&twl->id_workaround_work, HZ);
+       pm_runtime_mark_last_busy(twl->dev);
+       pm_runtime_put_autosuspend(twl->dev);
 
        return 0;
 }
index e1ab864e1a7f06f7600a435ed4ed80e8513d5336..87b46390b69597a3299143f3ec4e0ab282475718 100644 (file)
@@ -26,7 +26,7 @@
 
 #define ASPEED_G5_NR_PINS 228
 
-#define COND1          SIG_DESC_BIT(SCU90, 6, 0)
+#define COND1          { SCU90, BIT(6), 0, 0 }
 #define COND2          { SCU94, GENMASK(1, 0), 0, 0 }
 
 #define B14 0
@@ -151,21 +151,21 @@ FUNC_GROUP_DECL(GPID0, F19, E21);
 
 #define GPID2_DESC      SIG_DESC_SET(SCU8C, 9)
 
-#define D20 26
+#define F20 26
 SIG_EXPR_LIST_DECL_SINGLE(SD2DAT0, SD2, SD2_DESC);
 SIG_EXPR_DECL(GPID2IN, GPID2, GPID2_DESC);
 SIG_EXPR_DECL(GPID2IN, GPID, GPID_DESC);
 SIG_EXPR_LIST_DECL_DUAL(GPID2IN, GPID2, GPID);
-MS_PIN_DECL(D20, GPIOD2, SD2DAT0, GPID2IN);
+MS_PIN_DECL(F20, GPIOD2, SD2DAT0, GPID2IN);
 
-#define D21 27
+#define D20 27
 SIG_EXPR_LIST_DECL_SINGLE(SD2DAT1, SD2, SD2_DESC);
 SIG_EXPR_DECL(GPID2OUT, GPID2, GPID2_DESC);
 SIG_EXPR_DECL(GPID2OUT, GPID, GPID_DESC);
 SIG_EXPR_LIST_DECL_DUAL(GPID2OUT, GPID2, GPID);
-MS_PIN_DECL(D21, GPIOD3, SD2DAT1, GPID2OUT);
+MS_PIN_DECL(D20, GPIOD3, SD2DAT1, GPID2OUT);
 
-FUNC_GROUP_DECL(GPID2, D20, D21);
+FUNC_GROUP_DECL(GPID2, F20, D20);
 
 #define GPIE_DESC      SIG_DESC_SET(HW_STRAP1, 21)
 #define GPIE0_DESC     SIG_DESC_SET(SCU8C, 12)
@@ -182,28 +182,88 @@ SIG_EXPR_LIST_DECL_SINGLE(NDCD3, NDCD3, SIG_DESC_SET(SCU80, 17));
 SIG_EXPR_DECL(GPIE0OUT, GPIE0, GPIE0_DESC);
 SIG_EXPR_DECL(GPIE0OUT, GPIE, GPIE_DESC);
 SIG_EXPR_LIST_DECL_DUAL(GPIE0OUT, GPIE0, GPIE);
-MS_PIN_DECL(C20, GPIE0, NDCD3, GPIE0OUT);
+MS_PIN_DECL(C20, GPIOE1, NDCD3, GPIE0OUT);
 
 FUNC_GROUP_DECL(GPIE0, B20, C20);
 
-#define SPI1_DESC      SIG_DESC_SET(HW_STRAP1, 13)
+#define SPI1_DESC              { HW_STRAP1, GENMASK(13, 12), 1, 0 }
+#define SPI1DEBUG_DESC         { HW_STRAP1, GENMASK(13, 12), 2, 0 }
+#define SPI1PASSTHRU_DESC      { HW_STRAP1, GENMASK(13, 12), 3, 0 }
+
 #define C18 64
-SIG_EXPR_LIST_DECL_SINGLE(SYSCS, SPI1, COND1, SPI1_DESC);
+SIG_EXPR_DECL(SYSCS, SPI1DEBUG, COND1, SPI1DEBUG_DESC);
+SIG_EXPR_DECL(SYSCS, SPI1PASSTHRU, COND1, SPI1PASSTHRU_DESC);
+SIG_EXPR_LIST_DECL_DUAL(SYSCS, SPI1DEBUG, SPI1PASSTHRU);
 SS_PIN_DECL(C18, GPIOI0, SYSCS);
 
 #define E15 65
-SIG_EXPR_LIST_DECL_SINGLE(SYSCK, SPI1, COND1, SPI1_DESC);
+SIG_EXPR_DECL(SYSCK, SPI1DEBUG, COND1, SPI1DEBUG_DESC);
+SIG_EXPR_DECL(SYSCK, SPI1PASSTHRU, COND1, SPI1PASSTHRU_DESC);
+SIG_EXPR_LIST_DECL_DUAL(SYSCK, SPI1DEBUG, SPI1PASSTHRU);
 SS_PIN_DECL(E15, GPIOI1, SYSCK);
 
-#define A14 66
-SIG_EXPR_LIST_DECL_SINGLE(SYSMOSI, SPI1, COND1, SPI1_DESC);
-SS_PIN_DECL(A14, GPIOI2, SYSMOSI);
+#define B16 66
+SIG_EXPR_DECL(SYSMOSI, SPI1DEBUG, COND1, SPI1DEBUG_DESC);
+SIG_EXPR_DECL(SYSMOSI, SPI1PASSTHRU, COND1, SPI1PASSTHRU_DESC);
+SIG_EXPR_LIST_DECL_DUAL(SYSMOSI, SPI1DEBUG, SPI1PASSTHRU);
+SS_PIN_DECL(B16, GPIOI2, SYSMOSI);
 
 #define C16 67
-SIG_EXPR_LIST_DECL_SINGLE(SYSMISO, SPI1, COND1, SPI1_DESC);
+SIG_EXPR_DECL(SYSMISO, SPI1DEBUG, COND1, SPI1DEBUG_DESC);
+SIG_EXPR_DECL(SYSMISO, SPI1PASSTHRU, COND1, SPI1PASSTHRU_DESC);
+SIG_EXPR_LIST_DECL_DUAL(SYSMISO, SPI1DEBUG, SPI1PASSTHRU);
 SS_PIN_DECL(C16, GPIOI3, SYSMISO);
 
-FUNC_GROUP_DECL(SPI1, C18, E15, A14, C16);
+#define VB_DESC        SIG_DESC_SET(HW_STRAP1, 5)
+
+#define B15 68
+SIG_EXPR_DECL(SPI1CS0, SPI1, COND1, SPI1_DESC);
+SIG_EXPR_DECL(SPI1CS0, SPI1DEBUG, COND1, SPI1DEBUG_DESC);
+SIG_EXPR_DECL(SPI1CS0, SPI1PASSTHRU, COND1, SPI1PASSTHRU_DESC);
+SIG_EXPR_LIST_DECL(SPI1CS0, SIG_EXPR_PTR(SPI1CS0, SPI1),
+                           SIG_EXPR_PTR(SPI1CS0, SPI1DEBUG),
+                           SIG_EXPR_PTR(SPI1CS0, SPI1PASSTHRU));
+SIG_EXPR_LIST_DECL_SINGLE(VBCS, VGABIOSROM, COND1, VB_DESC);
+MS_PIN_DECL(B15, GPIOI4, SPI1CS0, VBCS);
+
+#define C15 69
+SIG_EXPR_DECL(SPI1CK, SPI1, COND1, SPI1_DESC);
+SIG_EXPR_DECL(SPI1CK, SPI1DEBUG, COND1, SPI1DEBUG_DESC);
+SIG_EXPR_DECL(SPI1CK, SPI1PASSTHRU, COND1, SPI1PASSTHRU_DESC);
+SIG_EXPR_LIST_DECL(SPI1CK, SIG_EXPR_PTR(SPI1CK, SPI1),
+                           SIG_EXPR_PTR(SPI1CK, SPI1DEBUG),
+                           SIG_EXPR_PTR(SPI1CK, SPI1PASSTHRU));
+SIG_EXPR_LIST_DECL_SINGLE(VBCK, VGABIOSROM, COND1, VB_DESC);
+MS_PIN_DECL(C15, GPIOI5, SPI1CK, VBCK);
+
+#define A14 70
+SIG_EXPR_DECL(SPI1MOSI, SPI1, COND1, SPI1_DESC);
+SIG_EXPR_DECL(SPI1MOSI, SPI1DEBUG, COND1, SPI1DEBUG_DESC);
+SIG_EXPR_DECL(SPI1MOSI, SPI1PASSTHRU, COND1, SPI1PASSTHRU_DESC);
+SIG_EXPR_LIST_DECL(SPI1MOSI, SIG_EXPR_PTR(SPI1MOSI, SPI1),
+                           SIG_EXPR_PTR(SPI1MOSI, SPI1DEBUG),
+                           SIG_EXPR_PTR(SPI1MOSI, SPI1PASSTHRU));
+SIG_EXPR_LIST_DECL_SINGLE(VBMOSI, VGABIOSROM, COND1, VB_DESC);
+MS_PIN_DECL(A14, GPIOI6, SPI1MOSI, VBMOSI);
+
+#define A15 71
+SIG_EXPR_DECL(SPI1MISO, SPI1, COND1, SPI1_DESC);
+SIG_EXPR_DECL(SPI1MISO, SPI1DEBUG, COND1, SPI1DEBUG_DESC);
+SIG_EXPR_DECL(SPI1MISO, SPI1PASSTHRU, COND1, SPI1PASSTHRU_DESC);
+SIG_EXPR_LIST_DECL(SPI1MISO, SIG_EXPR_PTR(SPI1MISO, SPI1),
+                           SIG_EXPR_PTR(SPI1MISO, SPI1DEBUG),
+                           SIG_EXPR_PTR(SPI1MISO, SPI1PASSTHRU));
+SIG_EXPR_LIST_DECL_SINGLE(VBMISO, VGABIOSROM, COND1, VB_DESC);
+MS_PIN_DECL(A15, GPIOI7, SPI1MISO, VBMISO);
+
+FUNC_GROUP_DECL(SPI1, B15, C15, A14, A15);
+FUNC_GROUP_DECL(SPI1DEBUG, C18, E15, B16, C16, B15, C15, A14, A15);
+FUNC_GROUP_DECL(SPI1PASSTHRU, C18, E15, B16, C16, B15, C15, A14, A15);
+FUNC_GROUP_DECL(VGABIOSROM, B15, C15, A14, A15);
+
+#define R2 72
+SIG_EXPR_LIST_DECL_SINGLE(SGPMCK, SGPM, SIG_DESC_SET(SCU84, 8));
+SS_PIN_DECL(R2, GPIOJ0, SGPMCK);
 
 #define L2 73
 SIG_EXPR_LIST_DECL_SINGLE(SGPMLD, SGPM, SIG_DESC_SET(SCU84, 9));
@@ -580,6 +640,7 @@ static struct pinctrl_pin_desc aspeed_g5_pins[ASPEED_G5_NR_PINS] = {
        ASPEED_PINCTRL_PIN(A12),
        ASPEED_PINCTRL_PIN(A13),
        ASPEED_PINCTRL_PIN(A14),
+       ASPEED_PINCTRL_PIN(A15),
        ASPEED_PINCTRL_PIN(A2),
        ASPEED_PINCTRL_PIN(A3),
        ASPEED_PINCTRL_PIN(A4),
@@ -592,6 +653,8 @@ static struct pinctrl_pin_desc aspeed_g5_pins[ASPEED_G5_NR_PINS] = {
        ASPEED_PINCTRL_PIN(B12),
        ASPEED_PINCTRL_PIN(B13),
        ASPEED_PINCTRL_PIN(B14),
+       ASPEED_PINCTRL_PIN(B15),
+       ASPEED_PINCTRL_PIN(B16),
        ASPEED_PINCTRL_PIN(B2),
        ASPEED_PINCTRL_PIN(B20),
        ASPEED_PINCTRL_PIN(B3),
@@ -603,6 +666,7 @@ static struct pinctrl_pin_desc aspeed_g5_pins[ASPEED_G5_NR_PINS] = {
        ASPEED_PINCTRL_PIN(C12),
        ASPEED_PINCTRL_PIN(C13),
        ASPEED_PINCTRL_PIN(C14),
+       ASPEED_PINCTRL_PIN(C15),
        ASPEED_PINCTRL_PIN(C16),
        ASPEED_PINCTRL_PIN(C18),
        ASPEED_PINCTRL_PIN(C2),
@@ -614,7 +678,6 @@ static struct pinctrl_pin_desc aspeed_g5_pins[ASPEED_G5_NR_PINS] = {
        ASPEED_PINCTRL_PIN(D10),
        ASPEED_PINCTRL_PIN(D2),
        ASPEED_PINCTRL_PIN(D20),
-       ASPEED_PINCTRL_PIN(D21),
        ASPEED_PINCTRL_PIN(D4),
        ASPEED_PINCTRL_PIN(D5),
        ASPEED_PINCTRL_PIN(D6),
@@ -630,6 +693,7 @@ static struct pinctrl_pin_desc aspeed_g5_pins[ASPEED_G5_NR_PINS] = {
        ASPEED_PINCTRL_PIN(E7),
        ASPEED_PINCTRL_PIN(E9),
        ASPEED_PINCTRL_PIN(F19),
+       ASPEED_PINCTRL_PIN(F20),
        ASPEED_PINCTRL_PIN(F9),
        ASPEED_PINCTRL_PIN(H20),
        ASPEED_PINCTRL_PIN(L1),
@@ -691,11 +755,14 @@ static const struct aspeed_pin_group aspeed_g5_groups[] = {
        ASPEED_PINCTRL_GROUP(RMII2),
        ASPEED_PINCTRL_GROUP(SD1),
        ASPEED_PINCTRL_GROUP(SPI1),
+       ASPEED_PINCTRL_GROUP(SPI1DEBUG),
+       ASPEED_PINCTRL_GROUP(SPI1PASSTHRU),
        ASPEED_PINCTRL_GROUP(TIMER4),
        ASPEED_PINCTRL_GROUP(TIMER5),
        ASPEED_PINCTRL_GROUP(TIMER6),
        ASPEED_PINCTRL_GROUP(TIMER7),
        ASPEED_PINCTRL_GROUP(TIMER8),
+       ASPEED_PINCTRL_GROUP(VGABIOSROM),
 };
 
 static const struct aspeed_pin_function aspeed_g5_functions[] = {
@@ -733,11 +800,14 @@ static const struct aspeed_pin_function aspeed_g5_functions[] = {
        ASPEED_PINCTRL_FUNC(RMII2),
        ASPEED_PINCTRL_FUNC(SD1),
        ASPEED_PINCTRL_FUNC(SPI1),
+       ASPEED_PINCTRL_FUNC(SPI1DEBUG),
+       ASPEED_PINCTRL_FUNC(SPI1PASSTHRU),
        ASPEED_PINCTRL_FUNC(TIMER4),
        ASPEED_PINCTRL_FUNC(TIMER5),
        ASPEED_PINCTRL_FUNC(TIMER6),
        ASPEED_PINCTRL_FUNC(TIMER7),
        ASPEED_PINCTRL_FUNC(TIMER8),
+       ASPEED_PINCTRL_FUNC(VGABIOSROM),
 };
 
 static struct aspeed_pinctrl_data aspeed_g5_pinctrl_data = {
index 0391f9f13f3e6cb0d15334e27f0d6d92733702a7..49aeba91253198c644794c23cb4877368f6905ad 100644 (file)
@@ -166,13 +166,9 @@ static bool aspeed_sig_expr_set(const struct aspeed_sig_expr *expr,
                                bool enable, struct regmap *map)
 {
        int i;
-       bool ret;
-
-       ret = aspeed_sig_expr_eval(expr, enable, map);
-       if (ret)
-               return ret;
 
        for (i = 0; i < expr->ndescs; i++) {
+               bool ret;
                const struct aspeed_sig_desc *desc = &expr->descs[i];
                u32 pattern = enable ? desc->enable : desc->disable;
 
@@ -199,12 +195,18 @@ static bool aspeed_sig_expr_set(const struct aspeed_sig_expr *expr,
 static bool aspeed_sig_expr_enable(const struct aspeed_sig_expr *expr,
                                   struct regmap *map)
 {
+       if (aspeed_sig_expr_eval(expr, true, map))
+               return true;
+
        return aspeed_sig_expr_set(expr, true, map);
 }
 
 static bool aspeed_sig_expr_disable(const struct aspeed_sig_expr *expr,
                                    struct regmap *map)
 {
+       if (!aspeed_sig_expr_eval(expr, true, map))
+               return true;
+
        return aspeed_sig_expr_set(expr, false, map);
 }
 
index 7f77007163985762abc6110f282eec762bdc8a92..5d1e505c3c63d76a85af0dcb7051b9a3da15c2e5 100644 (file)
@@ -844,6 +844,6 @@ static struct platform_driver iproc_gpio_driver = {
 
 static int __init iproc_gpio_init(void)
 {
-       return platform_driver_probe(&iproc_gpio_driver, iproc_gpio_probe);
+       return platform_driver_register(&iproc_gpio_driver);
 }
 arch_initcall_sync(iproc_gpio_init);
index 35783db1c10bad5f50bb8ac41a59307dc97b852e..c8deb8be1da785fd15d8f5182b3b19143ca3c33f 100644 (file)
@@ -741,6 +741,6 @@ static struct platform_driver nsp_gpio_driver = {
 
 static int __init nsp_gpio_init(void)
 {
-       return platform_driver_probe(&nsp_gpio_driver, nsp_gpio_probe);
+       return platform_driver_register(&nsp_gpio_driver);
 }
 arch_initcall_sync(nsp_gpio_init);
index 47613201269af42dd3fff670c92008333bf0adf0..79c4e14a5a75e94fec9315588896bcfe9a104c11 100644 (file)
@@ -687,6 +687,7 @@ static int imx_pinctrl_probe_dt(struct platform_device *pdev,
        if (!info->functions)
                return -ENOMEM;
 
+       info->group_index = 0;
        if (flat_funcs) {
                info->ngroups = of_get_child_count(np);
        } else {
index d22a9fe2e6dfc36d63f9e493b61f2f35c2e21f97..71bbeb9321bad587504bf5ca4f4c889019b4b9f4 100644 (file)
@@ -1808,6 +1808,8 @@ static int byt_pinctrl_probe(struct platform_device *pdev)
                return PTR_ERR(vg->pctl_dev);
        }
 
+       raw_spin_lock_init(&vg->lock);
+
        ret = byt_gpio_probe(vg);
        if (ret) {
                pinctrl_unregister(vg->pctl_dev);
@@ -1815,7 +1817,6 @@ static int byt_pinctrl_probe(struct platform_device *pdev)
        }
 
        platform_set_drvdata(pdev, vg);
-       raw_spin_lock_init(&vg->lock);
        pm_runtime_enable(&pdev->dev);
 
        return 0;
index 30389f4ccab4935c20ddcbed2c5a543e77d33fc5..c43b1e9a06aff0ba3fa94f49967ca4259ed77ced 100644 (file)
@@ -1652,12 +1652,15 @@ static int chv_pinctrl_probe(struct platform_device *pdev)
 }
 
 #ifdef CONFIG_PM_SLEEP
-static int chv_pinctrl_suspend(struct device *dev)
+static int chv_pinctrl_suspend_noirq(struct device *dev)
 {
        struct platform_device *pdev = to_platform_device(dev);
        struct chv_pinctrl *pctrl = platform_get_drvdata(pdev);
+       unsigned long flags;
        int i;
 
+       raw_spin_lock_irqsave(&chv_lock, flags);
+
        pctrl->saved_intmask = readl(pctrl->regs + CHV_INTMASK);
 
        for (i = 0; i < pctrl->community->npins; i++) {
@@ -1678,15 +1681,20 @@ static int chv_pinctrl_suspend(struct device *dev)
                ctx->padctrl1 = readl(reg);
        }
 
+       raw_spin_unlock_irqrestore(&chv_lock, flags);
+
        return 0;
 }
 
-static int chv_pinctrl_resume(struct device *dev)
+static int chv_pinctrl_resume_noirq(struct device *dev)
 {
        struct platform_device *pdev = to_platform_device(dev);
        struct chv_pinctrl *pctrl = platform_get_drvdata(pdev);
+       unsigned long flags;
        int i;
 
+       raw_spin_lock_irqsave(&chv_lock, flags);
+
        /*
         * Mask all interrupts before restoring per-pin configuration
         * registers because we don't know in which state BIOS left them
@@ -1731,12 +1739,15 @@ static int chv_pinctrl_resume(struct device *dev)
        chv_writel(0xffff, pctrl->regs + CHV_INTSTAT);
        chv_writel(pctrl->saved_intmask, pctrl->regs + CHV_INTMASK);
 
+       raw_spin_unlock_irqrestore(&chv_lock, flags);
+
        return 0;
 }
 #endif
 
 static const struct dev_pm_ops chv_pinctrl_pm_ops = {
-       SET_LATE_SYSTEM_SLEEP_PM_OPS(chv_pinctrl_suspend, chv_pinctrl_resume)
+       SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(chv_pinctrl_suspend_noirq,
+                                     chv_pinctrl_resume_noirq)
 };
 
 static const struct acpi_device_id chv_pinctrl_acpi_match[] = {
index 63387a40b973a417d6dd00ec9fe952413465b159..01443762e57055b88ea9f1accb0cf6e10eda2978 100644 (file)
@@ -19,6 +19,7 @@
 #include <linux/pinctrl/pinconf.h>
 #include <linux/pinctrl/pinconf-generic.h>
 
+#include "../core.h"
 #include "pinctrl-intel.h"
 
 /* Offset from regs */
@@ -1056,6 +1057,26 @@ int intel_pinctrl_remove(struct platform_device *pdev)
 EXPORT_SYMBOL_GPL(intel_pinctrl_remove);
 
 #ifdef CONFIG_PM_SLEEP
+static bool intel_pinctrl_should_save(struct intel_pinctrl *pctrl, unsigned pin)
+{
+       const struct pin_desc *pd = pin_desc_get(pctrl->pctldev, pin);
+
+       if (!pd || !intel_pad_usable(pctrl, pin))
+               return false;
+
+       /*
+        * Only restore the pin if it is actually in use by the kernel (or
+        * by userspace). It is possible that some pins are used by the
+        * BIOS during resume and those are not always locked down so leave
+        * them alone.
+        */
+       if (pd->mux_owner || pd->gpio_owner ||
+           gpiochip_line_is_irq(&pctrl->chip, pin))
+               return true;
+
+       return false;
+}
+
 int intel_pinctrl_suspend(struct device *dev)
 {
        struct platform_device *pdev = to_platform_device(dev);
@@ -1069,7 +1090,7 @@ int intel_pinctrl_suspend(struct device *dev)
                const struct pinctrl_pin_desc *desc = &pctrl->soc->pins[i];
                u32 val;
 
-               if (!intel_pad_usable(pctrl, desc->number))
+               if (!intel_pinctrl_should_save(pctrl, desc->number))
                        continue;
 
                val = readl(intel_get_padcfg(pctrl, desc->number, PADCFG0));
@@ -1130,7 +1151,7 @@ int intel_pinctrl_resume(struct device *dev)
                void __iomem *padcfg;
                u32 val;
 
-               if (!intel_pad_usable(pctrl, desc->number))
+               if (!intel_pinctrl_should_save(pctrl, desc->number))
                        continue;
 
                padcfg = intel_get_padcfg(pctrl, desc->number, PADCFG0);
index 99da4cf91031b49757cc24735057b72c3d4f760e..b7bb371679692e5dd76186be130ed4b4a6a86cb3 100644 (file)
@@ -1512,7 +1512,7 @@ static int st_gpiolib_register_bank(struct st_pinctrl *info,
        if (info->irqmux_base || gpio_irq > 0) {
                err = gpiochip_irqchip_add(&bank->gpio_chip, &st_gpio_irqchip,
                                           0, handle_simple_irq,
-                                          IRQ_TYPE_LEVEL_LOW);
+                                          IRQ_TYPE_NONE);
                if (err) {
                        gpiochip_remove(&bank->gpio_chip);
                        dev_info(dev, "could not add irqchip\n");
index 200667f08c373eb027f2bcc4253f564563586e96..efc43711ff5cbcff2c94838e06ead49623ab9118 100644 (file)
@@ -1092,9 +1092,11 @@ int stm32_pctl_probe(struct platform_device *pdev)
                return -EINVAL;
        }
 
-       ret = stm32_pctrl_dt_setup_irq(pdev, pctl);
-       if (ret)
-               return ret;
+       if (of_find_property(np, "interrupt-parent", NULL)) {
+               ret = stm32_pctrl_dt_setup_irq(pdev, pctl);
+               if (ret)
+                       return ret;
+       }
 
        for_each_child_of_node(np, child)
                if (of_property_read_bool(child, "gpio-controller"))
index 07462d79d04000685c946a19a9cef1d90526400d..1aba2c74160eb5c51ce49dd679de2dd0090fe2dd 100644 (file)
@@ -309,7 +309,8 @@ static ssize_t goldfish_pipe_read_write(struct file *filp, char __user *buffer,
                 * much memory to the process.
                 */
                down_read(&current->mm->mmap_sem);
-               ret = get_user_pages(address, 1, !is_write, 0, &page, NULL);
+               ret = get_user_pages(address, 1, is_write ? 0 : FOLL_WRITE,
+                               &page, NULL);
                up_read(&current->mm->mmap_sem);
                if (ret < 0)
                        break;
index 81b8dcca8891dc00ca5d0cb48c28329d25e70887..b8a21d7b25d4c34e4042caf624ac4c86211c11d0 100644 (file)
@@ -576,6 +576,7 @@ config ASUS_WMI
 config ASUS_NB_WMI
        tristate "Asus Notebook WMI Driver"
        depends on ASUS_WMI
+       depends on SERIO_I8042 || SERIO_I8042 = n
        ---help---
          This is a driver for newer Asus notebooks. It adds extra features
          like wireless radio and bluetooth control, leds, hotkeys, backlight...
index d1a091b93192c7dc4a20d8dbf9901d65d918e1e1..a7614fc542b52aaaa4f58d91ea43bd9625e8de32 100644 (file)
@@ -933,6 +933,20 @@ static const struct dmi_system_id no_hw_rfkill_list[] = {
                        DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo YOGA 900"),
                },
        },
+       {
+               .ident = "Lenovo Yoga 900",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+                       DMI_MATCH(DMI_BOARD_NAME, "VIUU4"),
+               },
+       },
+       {
+               .ident = "Lenovo YOGA 910-13IKB",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+                       DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo YOGA 910-13IKB"),
+               },
+       },
        {}
 };
 
index ed5874217ee76cf4364d8bbd6f8e49f26e19c215..12dbb50633761b40253adff49f0c3881b40fed90 100644 (file)
@@ -264,7 +264,7 @@ check_acpi_dev(acpi_handle handle, u32 lvl, void *context, void **rv)
                return AE_OK;
 
        if (acpi_match_device_ids(dev, ids) == 0)
-               if (acpi_create_platform_device(dev))
+               if (acpi_create_platform_device(dev, NULL))
                        dev_info(&dev->dev,
                                 "intel-hid: created platform device\n");
 
index 146d02f8c9bc01c99c791bb45ba8df77ce7bb64b..78080763df51768f04548e1627d3baded9bbac14 100644 (file)
@@ -164,7 +164,7 @@ check_acpi_dev(acpi_handle handle, u32 lvl, void *context, void **rv)
                return AE_OK;
 
        if (acpi_match_device_ids(dev, ids) == 0)
-               if (acpi_create_platform_device(dev))
+               if (acpi_create_platform_device(dev, NULL))
                        dev_info(&dev->dev,
                                 "intel-vbtn: created platform device\n");
 
index feac4576b837101c35cf7442ca07ee47496633fe..2df07ee8f3c33e9f5e92fd776554bd44e614e836 100644 (file)
 #include <linux/acpi.h>
 #include <linux/input.h>
 #include <linux/input/sparse-keymap.h>
+#include <linux/dmi.h>
 
 MODULE_AUTHOR("Azael Avalos");
 MODULE_DESCRIPTION("Toshiba WMI Hotkey Driver");
 MODULE_LICENSE("GPL");
 
-#define TOSHIBA_WMI_EVENT_GUID "59142400-C6A3-40FA-BADB-8A2652834100"
+#define WMI_EVENT_GUID "59142400-C6A3-40FA-BADB-8A2652834100"
 
-MODULE_ALIAS("wmi:"TOSHIBA_WMI_EVENT_GUID);
+MODULE_ALIAS("wmi:"WMI_EVENT_GUID);
 
 static struct input_dev *toshiba_wmi_input_dev;
 
@@ -63,6 +64,16 @@ static void toshiba_wmi_notify(u32 value, void *context)
        kfree(response.pointer);
 }
 
+static struct dmi_system_id toshiba_wmi_dmi_table[] __initdata = {
+       {
+               .ident = "Toshiba laptop",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"),
+               },
+       },
+       {}
+};
+
 static int __init toshiba_wmi_input_setup(void)
 {
        acpi_status status;
@@ -81,7 +92,7 @@ static int __init toshiba_wmi_input_setup(void)
        if (err)
                goto err_free_dev;
 
-       status = wmi_install_notify_handler(TOSHIBA_WMI_EVENT_GUID,
+       status = wmi_install_notify_handler(WMI_EVENT_GUID,
                                            toshiba_wmi_notify, NULL);
        if (ACPI_FAILURE(status)) {
                err = -EIO;
@@ -95,7 +106,7 @@ static int __init toshiba_wmi_input_setup(void)
        return 0;
 
  err_remove_notifier:
-       wmi_remove_notify_handler(TOSHIBA_WMI_EVENT_GUID);
+       wmi_remove_notify_handler(WMI_EVENT_GUID);
  err_free_keymap:
        sparse_keymap_free(toshiba_wmi_input_dev);
  err_free_dev:
@@ -105,7 +116,7 @@ static int __init toshiba_wmi_input_setup(void)
 
 static void toshiba_wmi_input_destroy(void)
 {
-       wmi_remove_notify_handler(TOSHIBA_WMI_EVENT_GUID);
+       wmi_remove_notify_handler(WMI_EVENT_GUID);
        sparse_keymap_free(toshiba_wmi_input_dev);
        input_unregister_device(toshiba_wmi_input_dev);
 }
@@ -114,7 +125,8 @@ static int __init toshiba_wmi_init(void)
 {
        int ret;
 
-       if (!wmi_has_guid(TOSHIBA_WMI_EVENT_GUID))
+       if (!wmi_has_guid(WMI_EVENT_GUID) ||
+           !dmi_check_system(toshiba_wmi_dmi_table))
                return -ENODEV;
 
        ret = toshiba_wmi_input_setup();
@@ -130,7 +142,7 @@ static int __init toshiba_wmi_init(void)
 
 static void __exit toshiba_wmi_exit(void)
 {
-       if (wmi_has_guid(TOSHIBA_WMI_EVENT_GUID))
+       if (wmi_has_guid(WMI_EVENT_GUID))
                toshiba_wmi_input_destroy();
 }
 
index 86280b7e41f3f79db793c0545c97bb0c1e36e96e..9c13381b69662c499d0e9a679ce3eeb71c3c6d3d 100644 (file)
@@ -153,7 +153,10 @@ static int ptp_clock_adjtime(struct posix_clock *pc, struct timex *tx)
                s32 ppb = scaled_ppm_to_ppb(tx->freq);
                if (ppb > ops->max_adj || ppb < -ops->max_adj)
                        return -ERANGE;
-               err = ops->adjfreq(ops, ppb);
+               if (ops->adjfine)
+                       err = ops->adjfine(ops, tx->freq);
+               else
+                       err = ops->adjfreq(ops, ppb);
                ptp->dialed_frequency = tx->freq;
        } else if (tx->modes == 0) {
                tx->freq = ptp->dialed_frequency;
index 302e626fe6b01777523c371e2760ee8f48acc68b..53d43954a9740a5520229a1e8798516805a73c19 100644 (file)
@@ -28,7 +28,7 @@ static ssize_t clock_name_show(struct device *dev,
        struct ptp_clock *ptp = dev_get_drvdata(dev);
        return snprintf(page, PAGE_SIZE-1, "%s\n", ptp->info->name);
 }
-static DEVICE_ATTR(clock_name, 0444, clock_name_show, NULL);
+static DEVICE_ATTR_RO(clock_name);
 
 #define PTP_SHOW_INT(name, var)                                                \
 static ssize_t var##_show(struct device *dev,                          \
index 436dfe871d3230436adb301ccec42563063dfb0b..9013a585507e8a80b4580c084445ec7bbde6e04c 100644 (file)
@@ -892,7 +892,8 @@ rio_dma_transfer(struct file *filp, u32 transfer_mode,
                down_read(&current->mm->mmap_sem);
                pinned = get_user_pages(
                                (unsigned long)xfer->loc_addr & PAGE_MASK,
-                               nr_pages, dir == DMA_FROM_DEVICE, 0,
+                               nr_pages,
+                               dir == DMA_FROM_DEVICE ? FOLL_WRITE : 0,
                                page_list, NULL);
                up_read(&current->mm->mmap_sem);
 
index 67426c0477d34b2b5805826664bbc4074acfe591..5c1519b229e0e5cd718e4694cf0b27cde1c73f6d 100644 (file)
@@ -2754,7 +2754,7 @@ static int _regulator_set_voltage_time(struct regulator_dev *rdev,
                ramp_delay = rdev->desc->ramp_delay;
 
        if (ramp_delay == 0) {
-               rdev_warn(rdev, "ramp_delay not set\n");
+               rdev_dbg(rdev, "ramp_delay not set\n");
                return 0;
        }
 
index 8b2558e7363e249726336472729350f879519e94..968c3ae4535cf2f2b6fc26c54c02e773b659c104 100644 (file)
@@ -154,7 +154,7 @@ const struct uniphier_reset_data uniphier_sld3_mio_reset_data[] = {
        UNIPHIER_RESET_END,
 };
 
-const struct uniphier_reset_data uniphier_pro5_mio_reset_data[] = {
+const struct uniphier_reset_data uniphier_pro5_sd_reset_data[] = {
        UNIPHIER_MIO_RESET_SD(0, 0),
        UNIPHIER_MIO_RESET_SD(1, 1),
        UNIPHIER_MIO_RESET_EMMC_HW_RESET(6, 1),
@@ -360,7 +360,7 @@ static const struct of_device_id uniphier_reset_match[] = {
                .compatible = "socionext,uniphier-ld20-reset",
                .data = uniphier_ld20_sys_reset_data,
        },
-       /* Media I/O reset */
+       /* Media I/O reset, SD reset */
        {
                .compatible = "socionext,uniphier-sld3-mio-reset",
                .data = uniphier_sld3_mio_reset_data,
@@ -378,20 +378,20 @@ static const struct of_device_id uniphier_reset_match[] = {
                .data = uniphier_sld3_mio_reset_data,
        },
        {
-               .compatible = "socionext,uniphier-pro5-mio-reset",
-               .data = uniphier_pro5_mio_reset_data,
+               .compatible = "socionext,uniphier-pro5-sd-reset",
+               .data = uniphier_pro5_sd_reset_data,
        },
        {
-               .compatible = "socionext,uniphier-pxs2-mio-reset",
-               .data = uniphier_pro5_mio_reset_data,
+               .compatible = "socionext,uniphier-pxs2-sd-reset",
+               .data = uniphier_pro5_sd_reset_data,
        },
        {
                .compatible = "socionext,uniphier-ld11-mio-reset",
                .data = uniphier_sld3_mio_reset_data,
        },
        {
-               .compatible = "socionext,uniphier-ld20-mio-reset",
-               .data = uniphier_pro5_mio_reset_data,
+               .compatible = "socionext,uniphier-ld20-sd-reset",
+               .data = uniphier_pro5_sd_reset_data,
        },
        /* Peripheral reset */
        {
index d1e080701264f230235668b07aedf0e4936ba4f9..e859d148aba9ecdbcecc47e745209b94ed83dd67 100644 (file)
@@ -208,14 +208,14 @@ config RTC_DRV_AS3722
          will be called rtc-as3722.
 
 config RTC_DRV_DS1307
-       tristate "Dallas/Maxim DS1307/37/38/39/40, ST M41T00, EPSON RX-8025"
+       tristate "Dallas/Maxim DS1307/37/38/39/40, ST M41T00, EPSON RX-8025, ISL12057"
        help
          If you say yes here you get support for various compatible RTC
          chips (often with battery backup) connected with I2C. This driver
          should handle DS1307, DS1337, DS1338, DS1339, DS1340, ST M41T00,
-         EPSON RX-8025 and probably other chips. In some cases the RTC
-         must already have been initialized (by manufacturing or a
-         bootloader).
+         EPSON RX-8025, Intersil ISL12057 and probably other chips. In some
+         cases the RTC must already have been initialized (by manufacturing or
+         bootloader).
 
          The first seven registers on these chips hold an RTC, and other
          registers may add features such as NVRAM, a trickle charger for
@@ -234,6 +234,20 @@ config RTC_DRV_DS1307_HWMON
          Say Y here if you want to expose temperature sensor data on
          rtc-ds1307 (only DS3231)
 
+config RTC_DRV_DS1307_CENTURY
+       bool "Century bit support for rtc-ds1307"
+       depends on RTC_DRV_DS1307
+       default n
+       help
+         The DS1307 driver suffered from a bug where it was enabling the
+         century bit inconditionnally but never used it when reading the time.
+         It made the driver unable to support dates beyond 2099.
+         Setting this option will add proper support for the century bit but if
+         the time was previously set using a kernel predating this option,
+         reading the date will return a date in the next century.
+         To solve that, you could boot a kernel without this option set, set
+         the RTC date and then boot a kernel with this option set.
+
 config RTC_DRV_DS1374
        tristate "Dallas/Maxim DS1374"
        help
@@ -374,16 +388,6 @@ config RTC_DRV_ISL12022
          This driver can also be built as a module. If so, the module
          will be called rtc-isl12022.
 
-config RTC_DRV_ISL12057
-       select REGMAP_I2C
-       tristate "Intersil ISL12057"
-       help
-         If you say yes here you get support for the Intersil ISL12057
-         I2C RTC chip.
-
-         This driver can also be built as a module. If so, the module
-         will be called rtc-isl12057.
-
 config RTC_DRV_X1205
        tristate "Xicor/Intersil X1205"
        help
@@ -661,6 +665,7 @@ config RTC_DRV_DS1343
          will be called rtc-ds1343.
 
 config RTC_DRV_DS1347
+       select REGMAP_SPI
        tristate "Dallas/Maxim DS1347"
        help
          If you say yes here you get support for the
@@ -1201,7 +1206,7 @@ comment "on-CPU RTC drivers"
 
 config RTC_DRV_ASM9260
        tristate "Alphascale asm9260 RTC"
-       depends on MACH_ASM9260
+       depends on MACH_ASM9260 || COMPILE_TEST
        help
          If you say yes here you get support for the RTC on the
          Alphascale asm9260 SoC.
@@ -1241,6 +1246,9 @@ config RTC_DRV_IMXDI
 config RTC_DRV_OMAP
        tristate "TI OMAP Real Time Clock"
        depends on ARCH_OMAP || ARCH_DAVINCI || COMPILE_TEST
+       depends on OF
+       depends on PINCTRL
+       select GENERIC_PINCONF
        help
          Say "yes" here to support the on chip real time clock
          present on TI OMAP1, AM33xx, DA8xx/OMAP-L13x, AM43xx and DRA7xx.
index 8fb994bacdf7f155666d7f690a6f443b478b8fed..1ac694a330c8dadc0cbe78a4f6cd8d7359de2ecc 100644 (file)
@@ -72,7 +72,6 @@ obj-$(CONFIG_RTC_DRV_HID_SENSOR_TIME) += rtc-hid-sensor-time.o
 obj-$(CONFIG_RTC_DRV_HYM8563)  += rtc-hym8563.o
 obj-$(CONFIG_RTC_DRV_IMXDI)    += rtc-imxdi.o
 obj-$(CONFIG_RTC_DRV_ISL12022) += rtc-isl12022.o
-obj-$(CONFIG_RTC_DRV_ISL12057) += rtc-isl12057.o
 obj-$(CONFIG_RTC_DRV_ISL1208)  += rtc-isl1208.o
 obj-$(CONFIG_RTC_DRV_JZ4740)   += rtc-jz4740.o
 obj-$(CONFIG_RTC_DRV_LP8788)   += rtc-lp8788.o
index 70b4fd0f61225485ebe87480037c3562793f09d4..9e336184491cbd1b1e80c47cddbf346c24665a63 100644 (file)
@@ -327,6 +327,8 @@ static int ac100_rtc_register_clks(struct ac100_rtc_dev *chip)
                        .flags = 0,
                };
 
+               of_property_read_string_index(np, "clock-output-names",
+                                             i, &init.name);
                clk->regmap = chip->regmap;
                clk->offset = AC100_CLKOUT_CTRL1 + i;
                clk->hw.init = &init;
@@ -552,6 +554,9 @@ static int ac100_rtc_probe(struct platform_device *pdev)
        int ret;
 
        chip = devm_kzalloc(&pdev->dev, sizeof(*chip), GFP_KERNEL);
+       if (!chip)
+               return -ENOMEM;
+
        platform_set_drvdata(pdev, chip);
        chip->dev = &pdev->dev;
        chip->regmap = ac100->regmap;
index 5219916ce11d2b609bde574a4b99f72d279eb8e9..d36534965635ca48292fd6d570bbe3a60bde75df 100644 (file)
@@ -112,8 +112,6 @@ struct asm9260_rtc_priv {
        void __iomem            *iobase;
        struct rtc_device       *rtc;
        struct clk              *clk;
-       /* io lock */
-       spinlock_t              lock;
 };
 
 static irqreturn_t asm9260_rtc_irq(int irq, void *dev_id)
@@ -122,11 +120,15 @@ static irqreturn_t asm9260_rtc_irq(int irq, void *dev_id)
        u32 isr;
        unsigned long events = 0;
 
+       mutex_lock(&priv->rtc->ops_lock);
        isr = ioread32(priv->iobase + HW_CIIR);
-       if (!isr)
+       if (!isr) {
+               mutex_unlock(&priv->rtc->ops_lock);
                return IRQ_NONE;
+       }
 
        iowrite32(0, priv->iobase + HW_CIIR);
+       mutex_unlock(&priv->rtc->ops_lock);
 
        events |= RTC_AF | RTC_IRQF;
 
@@ -139,9 +141,7 @@ static int asm9260_rtc_read_time(struct device *dev, struct rtc_time *tm)
 {
        struct asm9260_rtc_priv *priv = dev_get_drvdata(dev);
        u32 ctime0, ctime1, ctime2;
-       unsigned long irq_flags;
 
-       spin_lock_irqsave(&priv->lock, irq_flags);
        ctime0 = ioread32(priv->iobase + HW_CTIME0);
        ctime1 = ioread32(priv->iobase + HW_CTIME1);
        ctime2 = ioread32(priv->iobase + HW_CTIME2);
@@ -155,7 +155,6 @@ static int asm9260_rtc_read_time(struct device *dev, struct rtc_time *tm)
                ctime1 = ioread32(priv->iobase + HW_CTIME1);
                ctime2 = ioread32(priv->iobase + HW_CTIME2);
        }
-       spin_unlock_irqrestore(&priv->lock, irq_flags);
 
        tm->tm_sec  = (ctime0 >> BM_CTIME0_SEC_S)  & BM_CTIME0_SEC_M;
        tm->tm_min  = (ctime0 >> BM_CTIME0_MIN_S)  & BM_CTIME0_MIN_M;
@@ -174,9 +173,7 @@ static int asm9260_rtc_read_time(struct device *dev, struct rtc_time *tm)
 static int asm9260_rtc_set_time(struct device *dev, struct rtc_time *tm)
 {
        struct asm9260_rtc_priv *priv = dev_get_drvdata(dev);
-       unsigned long irq_flags;
 
-       spin_lock_irqsave(&priv->lock, irq_flags);
        /*
         * make sure SEC counter will not flip other counter on write time,
         * real value will be written at the enf of sequence.
@@ -191,7 +188,6 @@ static int asm9260_rtc_set_time(struct device *dev, struct rtc_time *tm)
        iowrite32(tm->tm_hour, priv->iobase + HW_HOUR);
        iowrite32(tm->tm_min,  priv->iobase + HW_MIN);
        iowrite32(tm->tm_sec,  priv->iobase + HW_SEC);
-       spin_unlock_irqrestore(&priv->lock, irq_flags);
 
        return 0;
 }
@@ -199,9 +195,7 @@ static int asm9260_rtc_set_time(struct device *dev, struct rtc_time *tm)
 static int asm9260_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 {
        struct asm9260_rtc_priv *priv = dev_get_drvdata(dev);
-       unsigned long irq_flags;
 
-       spin_lock_irqsave(&priv->lock, irq_flags);
        alrm->time.tm_year = ioread32(priv->iobase + HW_ALYEAR);
        alrm->time.tm_mon  = ioread32(priv->iobase + HW_ALMON);
        alrm->time.tm_mday = ioread32(priv->iobase + HW_ALDOM);
@@ -213,7 +207,6 @@ static int asm9260_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 
        alrm->enabled = ioread32(priv->iobase + HW_AMR) ? 1 : 0;
        alrm->pending = ioread32(priv->iobase + HW_CIIR) ? 1 : 0;
-       spin_unlock_irqrestore(&priv->lock, irq_flags);
 
        return rtc_valid_tm(&alrm->time);
 }
@@ -221,9 +214,7 @@ static int asm9260_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 static int asm9260_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 {
        struct asm9260_rtc_priv *priv = dev_get_drvdata(dev);
-       unsigned long irq_flags;
 
-       spin_lock_irqsave(&priv->lock, irq_flags);
        iowrite32(alrm->time.tm_year, priv->iobase + HW_ALYEAR);
        iowrite32(alrm->time.tm_mon,  priv->iobase + HW_ALMON);
        iowrite32(alrm->time.tm_mday, priv->iobase + HW_ALDOM);
@@ -234,7 +225,6 @@ static int asm9260_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
        iowrite32(alrm->time.tm_sec,  priv->iobase + HW_ALSEC);
 
        iowrite32(alrm->enabled ? 0 : BM_AMR_OFF, priv->iobase + HW_AMR);
-       spin_unlock_irqrestore(&priv->lock, irq_flags);
 
        return 0;
 }
@@ -337,6 +327,7 @@ static const struct of_device_id asm9260_dt_ids[] = {
        { .compatible = "alphascale,asm9260-rtc", },
        {}
 };
+MODULE_DEVICE_TABLE(of, asm9260_dt_ids);
 
 static struct platform_driver asm9260_rtc_driver = {
        .probe          = asm9260_rtc_probe,
index 83ac2337c0f732df3cf29a4bca131e39fd37e470..de8bf56a41e7676d47e77bf391030e1adccc26e9 100644 (file)
@@ -187,7 +187,7 @@ static irqreturn_t at32_rtc_interrupt(int irq, void *dev_id)
        return ret;
 }
 
-static struct rtc_class_ops at32_rtc_ops = {
+static const struct rtc_class_ops at32_rtc_ops = {
        .read_time      = at32_rtc_readtime,
        .set_time       = at32_rtc_settime,
        .read_alarm     = at32_rtc_readalarm,
index 0299988b4f136812bad0e35c4943826608d5a114..397742446007aa2479969969c60097ad3edb6889 100644 (file)
@@ -93,8 +93,15 @@ static int bq32k_rtc_read_time(struct device *dev, struct rtc_time *tm)
        if (error)
                return error;
 
+       /*
+        * In case of oscillator failure, the register contents should be
+        * considered invalid. The flag is cleared the next time the RTC is set.
+        */
+       if (regs.minutes & BQ32K_OF)
+               return -EINVAL;
+
        tm->tm_sec = bcd2bin(regs.seconds & BQ32K_SECONDS_MASK);
-       tm->tm_min = bcd2bin(regs.minutes & BQ32K_SECONDS_MASK);
+       tm->tm_min = bcd2bin(regs.minutes & BQ32K_MINUTES_MASK);
        tm->tm_hour = bcd2bin(regs.cent_hours & BQ32K_HOURS_MASK);
        tm->tm_mday = bcd2bin(regs.date);
        tm->tm_wday = bcd2bin(regs.day) - 1;
@@ -204,13 +211,10 @@ static int bq32k_probe(struct i2c_client *client,
 
        /* Check Oscillator Failure flag */
        error = bq32k_read(dev, &reg, BQ32K_MINUTES, 1);
-       if (!error && (reg & BQ32K_OF)) {
-               dev_warn(dev, "Oscillator Failure. Check RTC battery.\n");
-               reg &= ~BQ32K_OF;
-               error = bq32k_write(dev, &reg, BQ32K_MINUTES, 1);
-       }
        if (error)
                return error;
+       if (reg & BQ32K_OF)
+               dev_warn(dev, "Oscillator Failure. Check RTC battery.\n");
 
        if (client->dev.of_node)
                trickle_charger_of_init(dev, client->dev.of_node);
index 43745cac0141a4445dda3abe6a31d8ca172355b0..7030d7cd38610f47e9e4bbecc0394068fcfd18da 100644 (file)
@@ -62,6 +62,8 @@ struct cmos_rtc {
        u8                      day_alrm;
        u8                      mon_alrm;
        u8                      century;
+
+       struct rtc_wkalrm       saved_wkalrm;
 };
 
 /* both platform and pnp busses use negative numbers for invalid irqs */
@@ -707,6 +709,8 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
                goto cleanup1;
        }
 
+       hpet_rtc_timer_init();
+
        if (is_valid_irq(rtc_irq)) {
                irq_handler_t rtc_cmos_int_handler;
 
@@ -714,6 +718,7 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
                        rtc_cmos_int_handler = hpet_rtc_interrupt;
                        retval = hpet_register_irq_handler(cmos_interrupt);
                        if (retval) {
+                               hpet_mask_rtc_irq_bit(RTC_IRQMASK);
                                dev_warn(dev, "hpet_register_irq_handler "
                                                " failed in rtc_init().");
                                goto cleanup1;
@@ -729,7 +734,6 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
                        goto cleanup1;
                }
        }
-       hpet_rtc_timer_init();
 
        /* export at least the first block of NVRAM */
        nvram.size = address_space - NVRAM_OFFSET;
@@ -772,7 +776,7 @@ static void cmos_do_shutdown(int rtc_irq)
        spin_unlock_irq(&rtc_lock);
 }
 
-static void __exit cmos_do_remove(struct device *dev)
+static void cmos_do_remove(struct device *dev)
 {
        struct cmos_rtc *cmos = dev_get_drvdata(dev);
        struct resource *ports;
@@ -844,8 +848,6 @@ static int cmos_aie_poweroff(struct device *dev)
        return retval;
 }
 
-#ifdef CONFIG_PM
-
 static int cmos_suspend(struct device *dev)
 {
        struct cmos_rtc *cmos = dev_get_drvdata(dev);
@@ -877,6 +879,8 @@ static int cmos_suspend(struct device *dev)
                        enable_irq_wake(cmos->irq);
        }
 
+       cmos_read_alarm(dev, &cmos->saved_wkalrm);
+
        dev_dbg(dev, "suspend%s, ctrl %02x\n",
                        (tmp & RTC_AIE) ? ", alarm may wake" : "",
                        tmp);
@@ -892,12 +896,32 @@ static int cmos_suspend(struct device *dev)
  */
 static inline int cmos_poweroff(struct device *dev)
 {
+       if (!IS_ENABLED(CONFIG_PM))
+               return -ENOSYS;
+
        return cmos_suspend(dev);
 }
 
-#ifdef CONFIG_PM_SLEEP
+static void cmos_check_wkalrm(struct device *dev)
+{
+       struct cmos_rtc *cmos = dev_get_drvdata(dev);
+       struct rtc_wkalrm current_alarm;
+       time64_t t_current_expires;
+       time64_t t_saved_expires;
+
+       cmos_read_alarm(dev, &current_alarm);
+       t_current_expires = rtc_tm_to_time64(&current_alarm.time);
+       t_saved_expires = rtc_tm_to_time64(&cmos->saved_wkalrm.time);
+       if (t_current_expires != t_saved_expires ||
+           cmos->saved_wkalrm.enabled != current_alarm.enabled) {
+               cmos_set_alarm(dev, &cmos->saved_wkalrm);
+       }
+}
+
+static void cmos_check_acpi_rtc_status(struct device *dev,
+                                      unsigned char *rtc_control);
 
-static int cmos_resume(struct device *dev)
+static int __maybe_unused cmos_resume(struct device *dev)
 {
        struct cmos_rtc *cmos = dev_get_drvdata(dev);
        unsigned char tmp;
@@ -910,6 +934,9 @@ static int cmos_resume(struct device *dev)
                cmos->enabled_wake = 0;
        }
 
+       /* The BIOS might have changed the alarm, restore it */
+       cmos_check_wkalrm(dev);
+
        spin_lock_irq(&rtc_lock);
        tmp = cmos->suspend_ctrl;
        cmos->suspend_ctrl = 0;
@@ -936,6 +963,9 @@ static int cmos_resume(struct device *dev)
                        tmp &= ~RTC_AIE;
                        hpet_mask_rtc_irq_bit(RTC_AIE);
                } while (mask & RTC_AIE);
+
+               if (tmp & RTC_AIE)
+                       cmos_check_acpi_rtc_status(dev, &tmp);
        }
        spin_unlock_irq(&rtc_lock);
 
@@ -944,16 +974,6 @@ static int cmos_resume(struct device *dev)
        return 0;
 }
 
-#endif
-#else
-
-static inline int cmos_poweroff(struct device *dev)
-{
-       return -ENOSYS;
-}
-
-#endif
-
 static SIMPLE_DEV_PM_OPS(cmos_pm_ops, cmos_suspend, cmos_resume);
 
 /*----------------------------------------------------------------*/
@@ -973,6 +993,21 @@ static SIMPLE_DEV_PM_OPS(cmos_pm_ops, cmos_suspend, cmos_resume);
 static u32 rtc_handler(void *context)
 {
        struct device *dev = context;
+       struct cmos_rtc *cmos = dev_get_drvdata(dev);
+       unsigned char rtc_control = 0;
+       unsigned char rtc_intr;
+       unsigned long flags;
+
+       spin_lock_irqsave(&rtc_lock, flags);
+       if (cmos_rtc.suspend_ctrl)
+               rtc_control = CMOS_READ(RTC_CONTROL);
+       if (rtc_control & RTC_AIE) {
+               cmos_rtc.suspend_ctrl &= ~RTC_AIE;
+               CMOS_WRITE(rtc_control, RTC_CONTROL);
+               rtc_intr = CMOS_READ(RTC_INTR_FLAGS);
+               rtc_update_irq(cmos->rtc, 1, rtc_intr);
+       }
+       spin_unlock_irqrestore(&rtc_lock, flags);
 
        pm_wakeup_event(dev, 0);
        acpi_clear_event(ACPI_EVENT_RTC);
@@ -1039,12 +1074,39 @@ static void cmos_wake_setup(struct device *dev)
        device_init_wakeup(dev, 1);
 }
 
+static void cmos_check_acpi_rtc_status(struct device *dev,
+                                      unsigned char *rtc_control)
+{
+       struct cmos_rtc *cmos = dev_get_drvdata(dev);
+       acpi_event_status rtc_status;
+       acpi_status status;
+
+       if (acpi_gbl_FADT.flags & ACPI_FADT_FIXED_RTC)
+               return;
+
+       status = acpi_get_event_status(ACPI_EVENT_RTC, &rtc_status);
+       if (ACPI_FAILURE(status)) {
+               dev_err(dev, "Could not get RTC status\n");
+       } else if (rtc_status & ACPI_EVENT_FLAG_SET) {
+               unsigned char mask;
+               *rtc_control &= ~RTC_AIE;
+               CMOS_WRITE(*rtc_control, RTC_CONTROL);
+               mask = CMOS_READ(RTC_INTR_FLAGS);
+               rtc_update_irq(cmos->rtc, 1, mask);
+       }
+}
+
 #else
 
 static void cmos_wake_setup(struct device *dev)
 {
 }
 
+static void cmos_check_acpi_rtc_status(struct device *dev,
+                                      unsigned char *rtc_control)
+{
+}
+
 #endif
 
 #ifdef CONFIG_PNP
@@ -1068,7 +1130,7 @@ static int cmos_pnp_probe(struct pnp_dev *pnp, const struct pnp_device_id *id)
                                pnp_irq(pnp, 0));
 }
 
-static void __exit cmos_pnp_remove(struct pnp_dev *pnp)
+static void cmos_pnp_remove(struct pnp_dev *pnp)
 {
        cmos_do_remove(&pnp->dev);
 }
@@ -1100,7 +1162,7 @@ static struct pnp_driver cmos_pnp_driver = {
        .name           = (char *) driver_name,
        .id_table       = rtc_ids,
        .probe          = cmos_pnp_probe,
-       .remove         = __exit_p(cmos_pnp_remove),
+       .remove         = cmos_pnp_remove,
        .shutdown       = cmos_pnp_shutdown,
 
        /* flag ensures resume() gets called, and stops syslog spam */
@@ -1177,7 +1239,7 @@ static int __init cmos_platform_probe(struct platform_device *pdev)
        return cmos_do_probe(&pdev->dev, resource, irq);
 }
 
-static int __exit cmos_platform_remove(struct platform_device *pdev)
+static int cmos_platform_remove(struct platform_device *pdev)
 {
        cmos_do_remove(&pdev->dev);
        return 0;
@@ -1202,13 +1264,11 @@ static void cmos_platform_shutdown(struct platform_device *pdev)
 MODULE_ALIAS("platform:rtc_cmos");
 
 static struct platform_driver cmos_platform_driver = {
-       .remove         = __exit_p(cmos_platform_remove),
+       .remove         = cmos_platform_remove,
        .shutdown       = cmos_platform_shutdown,
        .driver = {
                .name           = driver_name,
-#ifdef CONFIG_PM
                .pm             = &cmos_pm_ops,
-#endif
                .of_match_table = of_match_ptr(of_cmos_match),
        }
 };
index 101b7a240e0fa8e482ef140b921c5b31811e829a..cfc4141d99cde18def976813feba11113111a378 100644 (file)
@@ -140,7 +140,7 @@ static int coh901331_alarm_irq_enable(struct device *dev, unsigned int enabled)
        return 0;
 }
 
-static struct rtc_class_ops coh901331_ops = {
+static const struct rtc_class_ops coh901331_ops = {
        .read_time = coh901331_read_time,
        .set_mmss = coh901331_set_mmss,
        .read_alarm = coh901331_read_alarm,
index dba60c1dfce2ee9f71fefc6b023a8eee799e63d2..caf35567e14cbe1966b844ee7d6dd6012690ccdd 100644 (file)
@@ -469,7 +469,7 @@ static int davinci_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alm)
        return 0;
 }
 
-static struct rtc_class_ops davinci_rtc_ops = {
+static const struct rtc_class_ops davinci_rtc_ops = {
        .ioctl                  = davinci_rtc_ioctl,
        .read_time              = davinci_rtc_read_time,
        .set_time               = davinci_rtc_set_time,
index 8d05596a6765915373edabfb5048d1e2a2457854..b253bf1b35314776ee3df0b9fc68c7c2f544ce78 100644 (file)
@@ -159,7 +159,7 @@ static int dc_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
        return 0;
 }
 
-static struct rtc_class_ops dc_rtc_ops = {
+static const struct rtc_class_ops dc_rtc_ops = {
        .read_time              = dc_rtc_read_time,
        .set_mmss               = dc_rtc_set_mmss,
        .read_alarm             = dc_rtc_read_alarm,
index f5dd09fe5add5e65a40d9d178f0283a30c9e6548..0ec4be62322bfaf7695eb99aa6efd428264a3771 100644 (file)
@@ -102,7 +102,7 @@ static int ds1302_rtc_get_time(struct device *dev, struct rtc_time *time)
        return rtc_valid_tm(time);
 }
 
-static struct rtc_class_ops ds1302_rtc_ops = {
+static const struct rtc_class_ops ds1302_rtc_ops = {
        .read_time      = ds1302_rtc_get_time,
        .set_time       = ds1302_rtc_set_time,
 };
index 8e1c5cb6ece6f60619e5cafcea4271e056d96778..4e31036ee2596dec93accd26f627c5b95591ae9f 100644 (file)
@@ -186,6 +186,7 @@ static const struct i2c_device_id ds1307_id[] = {
        { "mcp7941x", mcp794xx },
        { "pt7c4338", ds_1307 },
        { "rx8025", rx_8025 },
+       { "isl12057", ds_1337 },
        { }
 };
 MODULE_DEVICE_TABLE(i2c, ds1307_id);
@@ -382,10 +383,25 @@ static int ds1307_get_time(struct device *dev, struct rtc_time *t)
        t->tm_mday = bcd2bin(ds1307->regs[DS1307_REG_MDAY] & 0x3f);
        tmp = ds1307->regs[DS1307_REG_MONTH] & 0x1f;
        t->tm_mon = bcd2bin(tmp) - 1;
-
-       /* assume 20YY not 19YY, and ignore DS1337_BIT_CENTURY */
        t->tm_year = bcd2bin(ds1307->regs[DS1307_REG_YEAR]) + 100;
 
+#ifdef CONFIG_RTC_DRV_DS1307_CENTURY
+       switch (ds1307->type) {
+       case ds_1337:
+       case ds_1339:
+       case ds_3231:
+               if (ds1307->regs[DS1307_REG_MONTH] & DS1337_BIT_CENTURY)
+                       t->tm_year += 100;
+               break;
+       case ds_1340:
+               if (ds1307->regs[DS1307_REG_HOUR] & DS1340_BIT_CENTURY)
+                       t->tm_year += 100;
+               break;
+       default:
+               break;
+       }
+#endif
+
        dev_dbg(dev, "%s secs=%d, mins=%d, "
                "hours=%d, mday=%d, mon=%d, year=%d, wday=%d\n",
                "read", t->tm_sec, t->tm_min,
@@ -409,6 +425,27 @@ static int ds1307_set_time(struct device *dev, struct rtc_time *t)
                t->tm_hour, t->tm_mday,
                t->tm_mon, t->tm_year, t->tm_wday);
 
+#ifdef CONFIG_RTC_DRV_DS1307_CENTURY
+       if (t->tm_year < 100)
+               return -EINVAL;
+
+       switch (ds1307->type) {
+       case ds_1337:
+       case ds_1339:
+       case ds_3231:
+       case ds_1340:
+               if (t->tm_year > 299)
+                       return -EINVAL;
+       default:
+               if (t->tm_year > 199)
+                       return -EINVAL;
+               break;
+       }
+#else
+       if (t->tm_year < 100 || t->tm_year > 199)
+               return -EINVAL;
+#endif
+
        buf[DS1307_REG_SECS] = bin2bcd(t->tm_sec);
        buf[DS1307_REG_MIN] = bin2bcd(t->tm_min);
        buf[DS1307_REG_HOUR] = bin2bcd(t->tm_hour);
@@ -424,11 +461,13 @@ static int ds1307_set_time(struct device *dev, struct rtc_time *t)
        case ds_1337:
        case ds_1339:
        case ds_3231:
-               buf[DS1307_REG_MONTH] |= DS1337_BIT_CENTURY;
+               if (t->tm_year > 199)
+                       buf[DS1307_REG_MONTH] |= DS1337_BIT_CENTURY;
                break;
        case ds_1340:
-               buf[DS1307_REG_HOUR] |= DS1340_BIT_CENTURY_EN
-                               | DS1340_BIT_CENTURY;
+               buf[DS1307_REG_HOUR] |= DS1340_BIT_CENTURY_EN;
+               if (t->tm_year > 199)
+                       buf[DS1307_REG_HOUR] |= DS1340_BIT_CENTURY;
                break;
        case mcp794xx:
                /*
@@ -1295,6 +1334,11 @@ static int ds1307_probe(struct i2c_client *client,
        if (of_property_read_bool(client->dev.of_node, "wakeup-source")) {
                ds1307_can_wakeup_device = true;
        }
+       /* Intersil ISL12057 DT backward compatibility */
+       if (of_property_read_bool(client->dev.of_node,
+                                 "isil,irq2-can-wakeup-machine")) {
+               ds1307_can_wakeup_device = true;
+       }
 #endif
 
        switch (ds1307->type) {
index 641e8e8a0dd7251f677e9146c098e7ce475fa516..ccfc9d43eb1e680956d32892e9bacfaddefafd1a 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/rtc.h>
 #include <linux/spi/spi.h>
 #include <linux/bcd.h>
+#include <linux/regmap.h>
 
 /* Registers in ds1347 rtc */
 
 #define DS1347_STATUS_REG      0x17
 #define DS1347_CLOCK_BURST     0x3F
 
-static int ds1347_read_reg(struct device *dev, unsigned char address,
-                               unsigned char *data)
-{
-       struct spi_device *spi = to_spi_device(dev);
-
-       *data = address | 0x80;
-
-       return spi_write_then_read(spi, data, 1, data, 1);
-}
-
-static int ds1347_write_reg(struct device *dev, unsigned char address,
-                               unsigned char data)
-{
-       struct spi_device *spi = to_spi_device(dev);
-       unsigned char buf[2];
-
-       buf[0] = address & 0x7F;
-       buf[1] = data;
+static const struct regmap_range ds1347_ranges[] = {
+       {
+               .range_min = DS1347_SECONDS_REG,
+               .range_max = DS1347_STATUS_REG,
+       },
+};
 
-       return spi_write_then_read(spi, buf, 2, NULL, 0);
-}
+static const struct regmap_access_table ds1347_access_table = {
+       .yes_ranges = ds1347_ranges,
+       .n_yes_ranges = ARRAY_SIZE(ds1347_ranges),
+};
 
 static int ds1347_read_time(struct device *dev, struct rtc_time *dt)
 {
        struct spi_device *spi = to_spi_device(dev);
+       struct regmap *map;
        int err;
        unsigned char buf[8];
 
-       buf[0] = DS1347_CLOCK_BURST | 0x80;
+       map = spi_get_drvdata(spi);
 
-       err = spi_write_then_read(spi, buf, 1, buf, 8);
+       err = regmap_bulk_read(map, DS1347_CLOCK_BURST, buf, 8);
        if (err)
                return err;
 
@@ -80,25 +72,27 @@ static int ds1347_read_time(struct device *dev, struct rtc_time *dt)
 static int ds1347_set_time(struct device *dev, struct rtc_time *dt)
 {
        struct spi_device *spi = to_spi_device(dev);
-       unsigned char buf[9];
+       struct regmap *map;
+       unsigned char buf[8];
+
+       map = spi_get_drvdata(spi);
 
-       buf[0] = DS1347_CLOCK_BURST & 0x7F;
-       buf[1] = bin2bcd(dt->tm_sec);
-       buf[2] = bin2bcd(dt->tm_min);
-       buf[3] = (bin2bcd(dt->tm_hour) & 0x3F);
-       buf[4] = bin2bcd(dt->tm_mday);
-       buf[5] = bin2bcd(dt->tm_mon + 1);
-       buf[6] = bin2bcd(dt->tm_wday + 1);
+       buf[0] = bin2bcd(dt->tm_sec);
+       buf[1] = bin2bcd(dt->tm_min);
+       buf[2] = (bin2bcd(dt->tm_hour) & 0x3F);
+       buf[3] = bin2bcd(dt->tm_mday);
+       buf[4] = bin2bcd(dt->tm_mon + 1);
+       buf[5] = bin2bcd(dt->tm_wday + 1);
 
        /* year in linux is from 1900 i.e in range of 100
        in rtc it is from 00 to 99 */
        dt->tm_year = dt->tm_year % 100;
 
-       buf[7] = bin2bcd(dt->tm_year);
-       buf[8] = bin2bcd(0x00);
+       buf[6] = bin2bcd(dt->tm_year);
+       buf[7] = bin2bcd(0x00);
 
        /* write the rtc settings */
-       return spi_write_then_read(spi, buf, 9, NULL, 0);
+       return regmap_bulk_write(map, DS1347_CLOCK_BURST, buf, 8);
 }
 
 static const struct rtc_class_ops ds1347_rtc_ops = {
@@ -109,35 +103,53 @@ static const struct rtc_class_ops ds1347_rtc_ops = {
 static int ds1347_probe(struct spi_device *spi)
 {
        struct rtc_device *rtc;
-       unsigned char data;
+       struct regmap_config config;
+       struct regmap *map;
+       unsigned int data;
        int res;
 
+       memset(&config, 0, sizeof(config));
+       config.reg_bits = 8;
+       config.val_bits = 8;
+       config.read_flag_mask = 0x80;
+       config.max_register = 0x3F;
+       config.wr_table = &ds1347_access_table;
+
        /* spi setup with ds1347 in mode 3 and bits per word as 8 */
        spi->mode = SPI_MODE_3;
        spi->bits_per_word = 8;
        spi_setup(spi);
 
+       map = devm_regmap_init_spi(spi, &config);
+
+       if (IS_ERR(map)) {
+               dev_err(&spi->dev, "ds1347 regmap init spi failed\n");
+               return PTR_ERR(map);
+       }
+
+       spi_set_drvdata(spi, map);
+
        /* RTC Settings */
-       res = ds1347_read_reg(&spi->dev, DS1347_SECONDS_REG, &data);
+       res = regmap_read(map, DS1347_SECONDS_REG, &data);
        if (res)
                return res;
 
        /* Disable the write protect of rtc */
-       ds1347_read_reg(&spi->dev, DS1347_CONTROL_REG, &data);
+       regmap_read(map, DS1347_CONTROL_REG, &data);
        data = data & ~(1<<7);
-       ds1347_write_reg(&spi->dev, DS1347_CONTROL_REG, data);
+       regmap_write(map, DS1347_CONTROL_REG, data);
 
        /* Enable the oscillator , disable the oscillator stop flag,
         and glitch filter to reduce current consumption */
-       ds1347_read_reg(&spi->dev, DS1347_STATUS_REG, &data);
+       regmap_read(map, DS1347_STATUS_REG, &data);
        data = data & 0x1B;
-       ds1347_write_reg(&spi->dev, DS1347_STATUS_REG, data);
+       regmap_write(map, DS1347_STATUS_REG, data);
 
        /* display the settings */
-       ds1347_read_reg(&spi->dev, DS1347_CONTROL_REG, &data);
+       regmap_read(map, DS1347_CONTROL_REG, &data);
        dev_info(&spi->dev, "DS1347 RTC CTRL Reg = 0x%02x\n", data);
 
-       ds1347_read_reg(&spi->dev, DS1347_STATUS_REG, &data);
+       regmap_read(map, DS1347_STATUS_REG, &data);
        dev_info(&spi->dev, "DS1347 RTC Status Reg = 0x%02x\n", data);
 
        rtc = devm_rtc_device_register(&spi->dev, "ds1347",
@@ -146,8 +158,6 @@ static int ds1347_probe(struct spi_device *spi)
        if (IS_ERR(rtc))
                return PTR_ERR(rtc);
 
-       spi_set_drvdata(spi, rtc);
-
        return 0;
 }
 
index b57505efadbc85475bea0c6c11fa8c7115694dad..688debc143483ff33aef70c08c85656a093b0fc9 100644 (file)
@@ -110,7 +110,7 @@ static int gemini_rtc_set_time(struct device *dev, struct rtc_time *tm)
        return 0;
 }
 
-static struct rtc_class_ops gemini_rtc_ops = {
+static const struct rtc_class_ops gemini_rtc_ops = {
        .read_time     = gemini_rtc_read_time,
        .set_time      = gemini_rtc_set_time,
 };
diff --git a/drivers/rtc/rtc-isl12057.c b/drivers/rtc/rtc-isl12057.c
deleted file mode 100644 (file)
index 0e7f0f5..0000000
+++ /dev/null
@@ -1,643 +0,0 @@
-/*
- * rtc-isl12057 - Driver for Intersil ISL12057 I2C Real Time Clock
- *
- * Copyright (C) 2013, Arnaud EBALARD <arno@natisbad.org>
- *
- * This work is largely based on Intersil ISL1208 driver developed by
- * Hebert Valerio Riedel <hvr@gnu.org>.
- *
- * Detailed datasheet on which this development is based is available here:
- *
- *  http://natisbad.org/NAS2/refs/ISL12057.pdf
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#include <linux/module.h>
-#include <linux/mutex.h>
-#include <linux/rtc.h>
-#include <linux/i2c.h>
-#include <linux/bcd.h>
-#include <linux/of.h>
-#include <linux/of_device.h>
-#include <linux/regmap.h>
-
-#define DRV_NAME "rtc-isl12057"
-
-/* RTC section */
-#define ISL12057_REG_RTC_SC    0x00    /* Seconds */
-#define ISL12057_REG_RTC_MN    0x01    /* Minutes */
-#define ISL12057_REG_RTC_HR    0x02    /* Hours */
-#define ISL12057_REG_RTC_HR_PM BIT(5)  /* AM/PM bit in 12h format */
-#define ISL12057_REG_RTC_HR_MIL BIT(6) /* 24h/12h format */
-#define ISL12057_REG_RTC_DW    0x03    /* Day of the Week */
-#define ISL12057_REG_RTC_DT    0x04    /* Date */
-#define ISL12057_REG_RTC_MO    0x05    /* Month */
-#define ISL12057_REG_RTC_MO_CEN        BIT(7)  /* Century bit */
-#define ISL12057_REG_RTC_YR    0x06    /* Year */
-#define ISL12057_RTC_SEC_LEN   7
-
-/* Alarm 1 section */
-#define ISL12057_REG_A1_SC     0x07    /* Alarm 1 Seconds */
-#define ISL12057_REG_A1_MN     0x08    /* Alarm 1 Minutes */
-#define ISL12057_REG_A1_HR     0x09    /* Alarm 1 Hours */
-#define ISL12057_REG_A1_HR_PM  BIT(5)  /* AM/PM bit in 12h format */
-#define ISL12057_REG_A1_HR_MIL BIT(6)  /* 24h/12h format */
-#define ISL12057_REG_A1_DWDT   0x0A    /* Alarm 1 Date / Day of the week */
-#define ISL12057_REG_A1_DWDT_B BIT(6)  /* DW / DT selection bit */
-#define ISL12057_A1_SEC_LEN    4
-
-/* Alarm 2 section */
-#define ISL12057_REG_A2_MN     0x0B    /* Alarm 2 Minutes */
-#define ISL12057_REG_A2_HR     0x0C    /* Alarm 2 Hours */
-#define ISL12057_REG_A2_DWDT   0x0D    /* Alarm 2 Date / Day of the week */
-#define ISL12057_A2_SEC_LEN    3
-
-/* Control/Status registers */
-#define ISL12057_REG_INT       0x0E
-#define ISL12057_REG_INT_A1IE  BIT(0)  /* Alarm 1 interrupt enable bit */
-#define ISL12057_REG_INT_A2IE  BIT(1)  /* Alarm 2 interrupt enable bit */
-#define ISL12057_REG_INT_INTCN BIT(2)  /* Interrupt control enable bit */
-#define ISL12057_REG_INT_RS1   BIT(3)  /* Freq out control bit 1 */
-#define ISL12057_REG_INT_RS2   BIT(4)  /* Freq out control bit 2 */
-#define ISL12057_REG_INT_EOSC  BIT(7)  /* Oscillator enable bit */
-
-#define ISL12057_REG_SR                0x0F
-#define ISL12057_REG_SR_A1F    BIT(0)  /* Alarm 1 interrupt bit */
-#define ISL12057_REG_SR_A2F    BIT(1)  /* Alarm 2 interrupt bit */
-#define ISL12057_REG_SR_OSF    BIT(7)  /* Oscillator failure bit */
-
-/* Register memory map length */
-#define ISL12057_MEM_MAP_LEN   0x10
-
-struct isl12057_rtc_data {
-       struct rtc_device *rtc;
-       struct regmap *regmap;
-       struct mutex lock;
-       int irq;
-};
-
-static void isl12057_rtc_regs_to_tm(struct rtc_time *tm, u8 *regs)
-{
-       tm->tm_sec = bcd2bin(regs[ISL12057_REG_RTC_SC]);
-       tm->tm_min = bcd2bin(regs[ISL12057_REG_RTC_MN]);
-
-       if (regs[ISL12057_REG_RTC_HR] & ISL12057_REG_RTC_HR_MIL) { /* AM/PM */
-               tm->tm_hour = bcd2bin(regs[ISL12057_REG_RTC_HR] & 0x1f);
-               if (regs[ISL12057_REG_RTC_HR] & ISL12057_REG_RTC_HR_PM)
-                       tm->tm_hour += 12;
-       } else {                                            /* 24 hour mode */
-               tm->tm_hour = bcd2bin(regs[ISL12057_REG_RTC_HR] & 0x3f);
-       }
-
-       tm->tm_mday = bcd2bin(regs[ISL12057_REG_RTC_DT]);
-       tm->tm_wday = bcd2bin(regs[ISL12057_REG_RTC_DW]) - 1; /* starts at 1 */
-       tm->tm_mon  = bcd2bin(regs[ISL12057_REG_RTC_MO] & 0x1f) - 1; /* ditto */
-       tm->tm_year = bcd2bin(regs[ISL12057_REG_RTC_YR]) + 100;
-
-       /* Check if years register has overflown from 99 to 00 */
-       if (regs[ISL12057_REG_RTC_MO] & ISL12057_REG_RTC_MO_CEN)
-               tm->tm_year += 100;
-}
-
-static int isl12057_rtc_tm_to_regs(u8 *regs, struct rtc_time *tm)
-{
-       u8 century_bit;
-
-       /*
-        * The clock has an 8 bit wide bcd-coded register for the year.
-        * It also has a century bit encoded in MO flag which provides
-        * information about overflow of year register from 99 to 00.
-        * tm_year is an offset from 1900 and we are interested in the
-        * 2000-2199 range, so any value less than 100 or larger than
-        * 299 is invalid.
-        */
-       if (tm->tm_year < 100 || tm->tm_year > 299)
-               return -EINVAL;
-
-       century_bit = (tm->tm_year > 199) ? ISL12057_REG_RTC_MO_CEN : 0;
-
-       regs[ISL12057_REG_RTC_SC] = bin2bcd(tm->tm_sec);
-       regs[ISL12057_REG_RTC_MN] = bin2bcd(tm->tm_min);
-       regs[ISL12057_REG_RTC_HR] = bin2bcd(tm->tm_hour); /* 24-hour format */
-       regs[ISL12057_REG_RTC_DT] = bin2bcd(tm->tm_mday);
-       regs[ISL12057_REG_RTC_MO] = bin2bcd(tm->tm_mon + 1) | century_bit;
-       regs[ISL12057_REG_RTC_YR] = bin2bcd(tm->tm_year % 100);
-       regs[ISL12057_REG_RTC_DW] = bin2bcd(tm->tm_wday + 1);
-
-       return 0;
-}
-
-/*
- * Try and match register bits w/ fixed null values to see whether we
- * are dealing with an ISL12057. Note: this function is called early
- * during init and hence does need mutex protection.
- */
-static int isl12057_i2c_validate_chip(struct regmap *regmap)
-{
-       u8 regs[ISL12057_MEM_MAP_LEN];
-       static const u8 mask[ISL12057_MEM_MAP_LEN] = { 0x80, 0x80, 0x80, 0xf8,
-                                                      0xc0, 0x60, 0x00, 0x00,
-                                                      0x00, 0x00, 0x00, 0x00,
-                                                      0x00, 0x00, 0x60, 0x7c };
-       int ret, i;
-
-       ret = regmap_bulk_read(regmap, 0, regs, ISL12057_MEM_MAP_LEN);
-       if (ret)
-               return ret;
-
-       for (i = 0; i < ISL12057_MEM_MAP_LEN; ++i) {
-               if (regs[i] & mask[i])  /* check if bits are cleared */
-                       return -ENODEV;
-       }
-
-       return 0;
-}
-
-static int _isl12057_rtc_clear_alarm(struct device *dev)
-{
-       struct isl12057_rtc_data *data = dev_get_drvdata(dev);
-       int ret;
-
-       ret = regmap_update_bits(data->regmap, ISL12057_REG_SR,
-                                ISL12057_REG_SR_A1F, 0);
-       if (ret)
-               dev_err(dev, "%s: clearing alarm failed (%d)\n", __func__, ret);
-
-       return ret;
-}
-
-static int _isl12057_rtc_update_alarm(struct device *dev, int enable)
-{
-       struct isl12057_rtc_data *data = dev_get_drvdata(dev);
-       int ret;
-
-       ret = regmap_update_bits(data->regmap, ISL12057_REG_INT,
-                                ISL12057_REG_INT_A1IE,
-                                enable ? ISL12057_REG_INT_A1IE : 0);
-       if (ret)
-               dev_err(dev, "%s: changing alarm interrupt flag failed (%d)\n",
-                       __func__, ret);
-
-       return ret;
-}
-
-/*
- * Note: as we only read from device and do not perform any update, there is
- * no need for an equivalent function which would try and get driver's main
- * lock. Here, it is safe for everyone if we just use regmap internal lock
- * on the device when reading.
- */
-static int _isl12057_rtc_read_time(struct device *dev, struct rtc_time *tm)
-{
-       struct isl12057_rtc_data *data = dev_get_drvdata(dev);
-       u8 regs[ISL12057_RTC_SEC_LEN];
-       unsigned int sr;
-       int ret;
-
-       ret = regmap_read(data->regmap, ISL12057_REG_SR, &sr);
-       if (ret) {
-               dev_err(dev, "%s: unable to read oscillator status flag (%d)\n",
-                       __func__, ret);
-               goto out;
-       } else {
-               if (sr & ISL12057_REG_SR_OSF) {
-                       ret = -ENODATA;
-                       goto out;
-               }
-       }
-
-       ret = regmap_bulk_read(data->regmap, ISL12057_REG_RTC_SC, regs,
-                              ISL12057_RTC_SEC_LEN);
-       if (ret)
-               dev_err(dev, "%s: unable to read RTC time section (%d)\n",
-                       __func__, ret);
-
-out:
-       if (ret)
-               return ret;
-
-       isl12057_rtc_regs_to_tm(tm, regs);
-
-       return rtc_valid_tm(tm);
-}
-
-static int isl12057_rtc_update_alarm(struct device *dev, int enable)
-{
-       struct isl12057_rtc_data *data = dev_get_drvdata(dev);
-       int ret;
-
-       mutex_lock(&data->lock);
-       ret = _isl12057_rtc_update_alarm(dev, enable);
-       mutex_unlock(&data->lock);
-
-       return ret;
-}
-
-static int isl12057_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alarm)
-{
-       struct isl12057_rtc_data *data = dev_get_drvdata(dev);
-       struct rtc_time *alarm_tm = &alarm->time;
-       u8 regs[ISL12057_A1_SEC_LEN];
-       unsigned int ir;
-       int ret;
-
-       mutex_lock(&data->lock);
-       ret = regmap_bulk_read(data->regmap, ISL12057_REG_A1_SC, regs,
-                              ISL12057_A1_SEC_LEN);
-       if (ret) {
-               dev_err(dev, "%s: reading alarm section failed (%d)\n",
-                       __func__, ret);
-               goto err_unlock;
-       }
-
-       alarm_tm->tm_sec  = bcd2bin(regs[0] & 0x7f);
-       alarm_tm->tm_min  = bcd2bin(regs[1] & 0x7f);
-       alarm_tm->tm_hour = bcd2bin(regs[2] & 0x3f);
-       alarm_tm->tm_mday = bcd2bin(regs[3] & 0x3f);
-
-       ret = regmap_read(data->regmap, ISL12057_REG_INT, &ir);
-       if (ret) {
-               dev_err(dev, "%s: reading alarm interrupt flag failed (%d)\n",
-                       __func__, ret);
-               goto err_unlock;
-       }
-
-       alarm->enabled = !!(ir & ISL12057_REG_INT_A1IE);
-
-err_unlock:
-       mutex_unlock(&data->lock);
-
-       return ret;
-}
-
-static int isl12057_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alarm)
-{
-       struct isl12057_rtc_data *data = dev_get_drvdata(dev);
-       struct rtc_time *alarm_tm = &alarm->time;
-       unsigned long rtc_secs, alarm_secs;
-       u8 regs[ISL12057_A1_SEC_LEN];
-       struct rtc_time rtc_tm;
-       int ret, enable = 1;
-
-       mutex_lock(&data->lock);
-       ret = _isl12057_rtc_read_time(dev, &rtc_tm);
-       if (ret)
-               goto err_unlock;
-
-       ret = rtc_tm_to_time(&rtc_tm, &rtc_secs);
-       if (ret)
-               goto err_unlock;
-
-       ret = rtc_tm_to_time(alarm_tm, &alarm_secs);
-       if (ret)
-               goto err_unlock;
-
-       /* If alarm time is before current time, disable the alarm */
-       if (!alarm->enabled || alarm_secs <= rtc_secs) {
-               enable = 0;
-       } else {
-               /*
-                * Chip only support alarms up to one month in the future. Let's
-                * return an error if we get something after that limit.
-                * Comparison is done by incrementing rtc_tm month field by one
-                * and checking alarm value is still below.
-                */
-               if (rtc_tm.tm_mon == 11) { /* handle year wrapping */
-                       rtc_tm.tm_mon = 0;
-                       rtc_tm.tm_year += 1;
-               } else {
-                       rtc_tm.tm_mon += 1;
-               }
-
-               ret = rtc_tm_to_time(&rtc_tm, &rtc_secs);
-               if (ret)
-                       goto err_unlock;
-
-               if (alarm_secs > rtc_secs) {
-                       dev_err(dev, "%s: max for alarm is one month (%d)\n",
-                               __func__, ret);
-                       ret = -EINVAL;
-                       goto err_unlock;
-               }
-       }
-
-       /* Disable the alarm before modifying it */
-       ret = _isl12057_rtc_update_alarm(dev, 0);
-       if (ret < 0) {
-               dev_err(dev, "%s: unable to disable the alarm (%d)\n",
-                       __func__, ret);
-               goto err_unlock;
-       }
-
-       /* Program alarm registers */
-       regs[0] = bin2bcd(alarm_tm->tm_sec) & 0x7f;
-       regs[1] = bin2bcd(alarm_tm->tm_min) & 0x7f;
-       regs[2] = bin2bcd(alarm_tm->tm_hour) & 0x3f;
-       regs[3] = bin2bcd(alarm_tm->tm_mday) & 0x3f;
-
-       ret = regmap_bulk_write(data->regmap, ISL12057_REG_A1_SC, regs,
-                               ISL12057_A1_SEC_LEN);
-       if (ret < 0) {
-               dev_err(dev, "%s: writing alarm section failed (%d)\n",
-                       __func__, ret);
-               goto err_unlock;
-       }
-
-       /* Enable or disable alarm */
-       ret = _isl12057_rtc_update_alarm(dev, enable);
-
-err_unlock:
-       mutex_unlock(&data->lock);
-
-       return ret;
-}
-
-static int isl12057_rtc_set_time(struct device *dev, struct rtc_time *tm)
-{
-       struct isl12057_rtc_data *data = dev_get_drvdata(dev);
-       u8 regs[ISL12057_RTC_SEC_LEN];
-       int ret;
-
-       ret = isl12057_rtc_tm_to_regs(regs, tm);
-       if (ret)
-               return ret;
-
-       mutex_lock(&data->lock);
-       ret = regmap_bulk_write(data->regmap, ISL12057_REG_RTC_SC, regs,
-                               ISL12057_RTC_SEC_LEN);
-       if (ret) {
-               dev_err(dev, "%s: unable to write RTC time section (%d)\n",
-                       __func__, ret);
-               goto out;
-       }
-
-       /*
-        * Now that RTC time has been updated, let's clear oscillator
-        * failure flag, if needed.
-        */
-       ret = regmap_update_bits(data->regmap, ISL12057_REG_SR,
-                                ISL12057_REG_SR_OSF, 0);
-       if (ret < 0)
-               dev_err(dev, "%s: unable to clear osc. failure bit (%d)\n",
-                       __func__, ret);
-
-out:
-       mutex_unlock(&data->lock);
-
-       return ret;
-}
-
-/*
- * Check current RTC status and enable/disable what needs to be. Return 0 if
- * everything went ok and a negative value upon error. Note: this function
- * is called early during init and hence does need mutex protection.
- */
-static int isl12057_check_rtc_status(struct device *dev, struct regmap *regmap)
-{
-       int ret;
-
-       /* Enable oscillator if not already running */
-       ret = regmap_update_bits(regmap, ISL12057_REG_INT,
-                                ISL12057_REG_INT_EOSC, 0);
-       if (ret < 0) {
-               dev_err(dev, "%s: unable to enable oscillator (%d)\n",
-                       __func__, ret);
-               return ret;
-       }
-
-       /* Clear alarm bit if needed */
-       ret = regmap_update_bits(regmap, ISL12057_REG_SR,
-                                ISL12057_REG_SR_A1F, 0);
-       if (ret < 0) {
-               dev_err(dev, "%s: unable to clear alarm bit (%d)\n",
-                       __func__, ret);
-               return ret;
-       }
-
-       return 0;
-}
-
-#ifdef CONFIG_OF
-/*
- * One would expect the device to be marked as a wakeup source only
- * when an IRQ pin of the RTC is routed to an interrupt line of the
- * CPU. In practice, such an IRQ pin can be connected to a PMIC and
- * this allows the device to be powered up when RTC alarm rings. This
- * is for instance the case on ReadyNAS 102, 104 and 2120. On those
- * devices with no IRQ driectly connected to the SoC, the RTC chip
- * can be forced as a wakeup source by stating that explicitly in
- * the device's .dts file using the "wakeup-source" boolean property.
- * This will guarantee 'wakealarm' sysfs entry is available on the device.
- *
- * The function below returns 1, i.e. the capability of the chip to
- * wakeup the device, based on IRQ availability or if the boolean
- * property has been set in the .dts file. Otherwise, it returns 0.
- */
-
-static bool isl12057_can_wakeup_machine(struct device *dev)
-{
-       struct isl12057_rtc_data *data = dev_get_drvdata(dev);
-
-       return data->irq || of_property_read_bool(dev->of_node, "wakeup-source")
-               || of_property_read_bool(dev->of_node, /* legacy */
-                                        "isil,irq2-can-wakeup-machine");
-}
-#else
-static bool isl12057_can_wakeup_machine(struct device *dev)
-{
-       struct isl12057_rtc_data *data = dev_get_drvdata(dev);
-
-       return !!data->irq;
-}
-#endif
-
-static int isl12057_rtc_alarm_irq_enable(struct device *dev,
-                                        unsigned int enable)
-{
-       struct isl12057_rtc_data *rtc_data = dev_get_drvdata(dev);
-       int ret = -ENOTTY;
-
-       if (rtc_data->irq)
-               ret = isl12057_rtc_update_alarm(dev, enable);
-
-       return ret;
-}
-
-static irqreturn_t isl12057_rtc_interrupt(int irq, void *data)
-{
-       struct i2c_client *client = data;
-       struct isl12057_rtc_data *rtc_data = dev_get_drvdata(&client->dev);
-       struct rtc_device *rtc = rtc_data->rtc;
-       int ret, handled = IRQ_NONE;
-       unsigned int sr;
-
-       ret = regmap_read(rtc_data->regmap, ISL12057_REG_SR, &sr);
-       if (!ret && (sr & ISL12057_REG_SR_A1F)) {
-               dev_dbg(&client->dev, "RTC alarm!\n");
-
-               rtc_update_irq(rtc, 1, RTC_IRQF | RTC_AF);
-
-               /* Acknowledge and disable the alarm */
-               _isl12057_rtc_clear_alarm(&client->dev);
-               _isl12057_rtc_update_alarm(&client->dev, 0);
-
-               handled = IRQ_HANDLED;
-       }
-
-       return handled;
-}
-
-static const struct rtc_class_ops rtc_ops = {
-       .read_time = _isl12057_rtc_read_time,
-       .set_time = isl12057_rtc_set_time,
-       .read_alarm = isl12057_rtc_read_alarm,
-       .set_alarm = isl12057_rtc_set_alarm,
-       .alarm_irq_enable = isl12057_rtc_alarm_irq_enable,
-};
-
-static const struct regmap_config isl12057_rtc_regmap_config = {
-       .reg_bits = 8,
-       .val_bits = 8,
-};
-
-static int isl12057_probe(struct i2c_client *client,
-                         const struct i2c_device_id *id)
-{
-       struct device *dev = &client->dev;
-       struct isl12057_rtc_data *data;
-       struct regmap *regmap;
-       int ret;
-
-       if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C |
-                                    I2C_FUNC_SMBUS_BYTE_DATA |
-                                    I2C_FUNC_SMBUS_I2C_BLOCK))
-               return -ENODEV;
-
-       regmap = devm_regmap_init_i2c(client, &isl12057_rtc_regmap_config);
-       if (IS_ERR(regmap)) {
-               ret = PTR_ERR(regmap);
-               dev_err(dev, "%s: regmap allocation failed (%d)\n",
-                       __func__, ret);
-               return ret;
-       }
-
-       ret = isl12057_i2c_validate_chip(regmap);
-       if (ret)
-               return ret;
-
-       ret = isl12057_check_rtc_status(dev, regmap);
-       if (ret)
-               return ret;
-
-       data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
-       if (!data)
-               return -ENOMEM;
-
-       mutex_init(&data->lock);
-       data->regmap = regmap;
-       dev_set_drvdata(dev, data);
-
-       if (client->irq > 0) {
-               ret = devm_request_threaded_irq(dev, client->irq, NULL,
-                                               isl12057_rtc_interrupt,
-                                               IRQF_SHARED|IRQF_ONESHOT,
-                                               DRV_NAME, client);
-               if (!ret)
-                       data->irq = client->irq;
-               else
-                       dev_err(dev, "%s: irq %d unavailable (%d)\n", __func__,
-                               client->irq, ret);
-       }
-
-       if (isl12057_can_wakeup_machine(dev))
-               device_init_wakeup(dev, true);
-
-       data->rtc = devm_rtc_device_register(dev, DRV_NAME, &rtc_ops,
-                                            THIS_MODULE);
-       ret = PTR_ERR_OR_ZERO(data->rtc);
-       if (ret) {
-               dev_err(dev, "%s: unable to register RTC device (%d)\n",
-                       __func__, ret);
-               goto err;
-       }
-
-       /* We cannot support UIE mode if we do not have an IRQ line */
-       if (!data->irq)
-               data->rtc->uie_unsupported = 1;
-
-err:
-       return ret;
-}
-
-static int isl12057_remove(struct i2c_client *client)
-{
-       if (isl12057_can_wakeup_machine(&client->dev))
-               device_init_wakeup(&client->dev, false);
-
-       return 0;
-}
-
-#ifdef CONFIG_PM_SLEEP
-static int isl12057_rtc_suspend(struct device *dev)
-{
-       struct isl12057_rtc_data *rtc_data = dev_get_drvdata(dev);
-
-       if (rtc_data->irq && device_may_wakeup(dev))
-               return enable_irq_wake(rtc_data->irq);
-
-       return 0;
-}
-
-static int isl12057_rtc_resume(struct device *dev)
-{
-       struct isl12057_rtc_data *rtc_data = dev_get_drvdata(dev);
-
-       if (rtc_data->irq && device_may_wakeup(dev))
-               return disable_irq_wake(rtc_data->irq);
-
-       return 0;
-}
-#endif
-
-static SIMPLE_DEV_PM_OPS(isl12057_rtc_pm_ops, isl12057_rtc_suspend,
-                        isl12057_rtc_resume);
-
-#ifdef CONFIG_OF
-static const struct of_device_id isl12057_dt_match[] = {
-       { .compatible = "isl,isl12057" }, /* for backward compat., don't use */
-       { .compatible = "isil,isl12057" },
-       { },
-};
-MODULE_DEVICE_TABLE(of, isl12057_dt_match);
-#endif
-
-static const struct i2c_device_id isl12057_id[] = {
-       { "isl12057", 0 },
-       { }
-};
-MODULE_DEVICE_TABLE(i2c, isl12057_id);
-
-static struct i2c_driver isl12057_driver = {
-       .driver = {
-               .name = DRV_NAME,
-               .pm = &isl12057_rtc_pm_ops,
-               .of_match_table = of_match_ptr(isl12057_dt_match),
-       },
-       .probe    = isl12057_probe,
-       .remove   = isl12057_remove,
-       .id_table = isl12057_id,
-};
-module_i2c_driver(isl12057_driver);
-
-MODULE_AUTHOR("Arnaud EBALARD <arno@natisbad.org>");
-MODULE_DESCRIPTION("Intersil ISL12057 RTC driver");
-MODULE_LICENSE("GPL");
index b2bcfc0bf2e51c6ee050d7705f6305bb7cd09148..5e14651b71a89a2327a2f1fe175cc56156d6a880 100644 (file)
@@ -174,7 +174,7 @@ static int jz4740_rtc_alarm_irq_enable(struct device *dev, unsigned int enable)
        return jz4740_rtc_ctrl_set_bits(rtc, JZ_RTC_CTRL_AF_IRQ, enable);
 }
 
-static struct rtc_class_ops jz4740_rtc_ops = {
+static const struct rtc_class_ops jz4740_rtc_ops = {
        .read_time      = jz4740_rtc_read_time,
        .set_mmss       = jz4740_rtc_set_mmss,
        .read_alarm     = jz4740_rtc_read_alarm,
index 025bb33b9cd2d0b9ae31499f33e92c55460e703a..4021fd04cb0ac847c955c4de79e283a97b73ff17 100644 (file)
@@ -151,7 +151,7 @@ static int mcp795_read_time(struct device *dev, struct rtc_time *tim)
        return rtc_valid_tm(tim);
 }
 
-static struct rtc_class_ops mcp795_rtc_ops = {
+static const struct rtc_class_ops mcp795_rtc_ops = {
                .read_time = mcp795_read_time,
                .set_time = mcp795_set_time
 };
index 44f622c3e0488aaf4481b5accd5b734adc3a39fe..1a61fa56f3ad77bad999d234778e8d953fc1ad5a 100644 (file)
@@ -301,7 +301,7 @@ exit:
        return ret;
 }
 
-static struct rtc_class_ops mtk_rtc_ops = {
+static const struct rtc_class_ops mtk_rtc_ops = {
        .read_time  = mtk_rtc_read_time,
        .set_time   = mtk_rtc_set_time,
        .read_alarm = mtk_rtc_read_alarm,
index 09fc1c19f0dfdd4f32e783d19c5d83b37d4cf6a4..b1b6b3041bfbc94eac0a478daabd7549bc891163 100644 (file)
@@ -214,7 +214,7 @@ static int nuc900_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
        return 0;
 }
 
-static struct rtc_class_ops nuc900_rtc_ops = {
+static const struct rtc_class_ops nuc900_rtc_ops = {
        .read_time = nuc900_rtc_read_time,
        .set_time = nuc900_rtc_set_time,
        .read_alarm = nuc900_rtc_read_alarm,
index ec2e9c5fb993c7023c9262af5c0b691905443bff..51e52446eacb8f4d1e32821fd9c2d6da019825f3 100644 (file)
  * 2 of the License, or (at your option) any later version.
  */
 
-#include <linux/kernel.h>
+#include <dt-bindings/gpio/gpio.h>
+#include <linux/bcd.h>
+#include <linux/clk.h>
+#include <linux/delay.h>
 #include <linux/init.h>
-#include <linux/module.h>
+#include <linux/io.h>
 #include <linux/ioport.h>
-#include <linux/delay.h>
-#include <linux/rtc.h>
-#include <linux/bcd.h>
-#include <linux/platform_device.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
+#include <linux/pinctrl/pinctrl.h>
+#include <linux/pinctrl/pinconf.h>
+#include <linux/pinctrl/pinconf-generic.h>
+#include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
-#include <linux/io.h>
-#include <linux/clk.h>
+#include <linux/rtc.h>
 
 /*
  * The OMAP RTC is a year/month/day/hours/minutes/seconds BCD clock
 /* OMAP_RTC_OSC_REG bit fields: */
 #define OMAP_RTC_OSC_32KCLK_EN         BIT(6)
 #define OMAP_RTC_OSC_SEL_32KCLK_SRC    BIT(3)
+#define OMAP_RTC_OSC_OSC32K_GZ_DISABLE BIT(4)
 
 /* OMAP_RTC_IRQWAKEEN bit fields: */
 #define OMAP_RTC_IRQWAKEEN_ALARM_WAKEEN        BIT(1)
 
 /* OMAP_RTC_PMIC bit fields: */
 #define OMAP_RTC_PMIC_POWER_EN_EN      BIT(16)
+#define OMAP_RTC_PMIC_EXT_WKUP_EN(x)   BIT(x)
+#define OMAP_RTC_PMIC_EXT_WKUP_POL(x)  BIT(4 + x)
 
 /* OMAP_RTC_KICKER values */
 #define        KICK0_VALUE                     0x83e70b13
@@ -140,7 +147,9 @@ struct omap_rtc {
        u8 interrupts_reg;
        bool is_pmic_controller;
        bool has_ext_clk;
+       bool is_suspending;
        const struct omap_rtc_device_type *type;
+       struct pinctrl_dev *pctldev;
 };
 
 static inline u8 rtc_read(struct omap_rtc *rtc, unsigned int reg)
@@ -469,7 +478,7 @@ static void omap_rtc_power_off(void)
        mdelay(2500);
 }
 
-static struct rtc_class_ops omap_rtc_ops = {
+static const struct rtc_class_ops omap_rtc_ops = {
        .read_time      = omap_rtc_read_time,
        .set_time       = omap_rtc_set_time,
        .read_alarm     = omap_rtc_read_alarm,
@@ -525,6 +534,139 @@ static const struct of_device_id omap_rtc_of_match[] = {
 };
 MODULE_DEVICE_TABLE(of, omap_rtc_of_match);
 
+static const struct pinctrl_pin_desc rtc_pins_desc[] = {
+       PINCTRL_PIN(0, "ext_wakeup0"),
+       PINCTRL_PIN(1, "ext_wakeup1"),
+       PINCTRL_PIN(2, "ext_wakeup2"),
+       PINCTRL_PIN(3, "ext_wakeup3"),
+};
+
+static int rtc_pinctrl_get_groups_count(struct pinctrl_dev *pctldev)
+{
+       return 0;
+}
+
+static const char *rtc_pinctrl_get_group_name(struct pinctrl_dev *pctldev,
+                                       unsigned int group)
+{
+       return NULL;
+}
+
+static const struct pinctrl_ops rtc_pinctrl_ops = {
+       .get_groups_count = rtc_pinctrl_get_groups_count,
+       .get_group_name = rtc_pinctrl_get_group_name,
+       .dt_node_to_map = pinconf_generic_dt_node_to_map_pin,
+       .dt_free_map = pinconf_generic_dt_free_map,
+};
+
+enum rtc_pin_config_param {
+       PIN_CONFIG_ACTIVE_HIGH = PIN_CONFIG_END + 1,
+};
+
+static const struct pinconf_generic_params rtc_params[] = {
+       {"ti,active-high", PIN_CONFIG_ACTIVE_HIGH, 0},
+};
+
+#ifdef CONFIG_DEBUG_FS
+static const struct pin_config_item rtc_conf_items[ARRAY_SIZE(rtc_params)] = {
+       PCONFDUMP(PIN_CONFIG_ACTIVE_HIGH, "input active high", NULL, false),
+};
+#endif
+
+static int rtc_pinconf_get(struct pinctrl_dev *pctldev,
+                       unsigned int pin, unsigned long *config)
+{
+       struct omap_rtc *rtc = pinctrl_dev_get_drvdata(pctldev);
+       unsigned int param = pinconf_to_config_param(*config);
+       u32 val;
+       u16 arg = 0;
+
+       rtc->type->unlock(rtc);
+       val = rtc_readl(rtc, OMAP_RTC_PMIC_REG);
+       rtc->type->lock(rtc);
+
+       switch (param) {
+       case PIN_CONFIG_INPUT_ENABLE:
+               if (!(val & OMAP_RTC_PMIC_EXT_WKUP_EN(pin)))
+                       return -EINVAL;
+               break;
+       case PIN_CONFIG_ACTIVE_HIGH:
+               if (val & OMAP_RTC_PMIC_EXT_WKUP_POL(pin))
+                       return -EINVAL;
+               break;
+       default:
+               return -ENOTSUPP;
+       };
+
+       *config = pinconf_to_config_packed(param, arg);
+
+       return 0;
+}
+
+static int rtc_pinconf_set(struct pinctrl_dev *pctldev,
+                       unsigned int pin, unsigned long *configs,
+                       unsigned int num_configs)
+{
+       struct omap_rtc *rtc = pinctrl_dev_get_drvdata(pctldev);
+       u32 val;
+       unsigned int param;
+       u16 param_val;
+       int i;
+
+       rtc->type->unlock(rtc);
+       val = rtc_readl(rtc, OMAP_RTC_PMIC_REG);
+       rtc->type->lock(rtc);
+
+       /* active low by default */
+       val |= OMAP_RTC_PMIC_EXT_WKUP_POL(pin);
+
+       for (i = 0; i < num_configs; i++) {
+               param = pinconf_to_config_param(configs[i]);
+               param_val = pinconf_to_config_argument(configs[i]);
+
+               switch (param) {
+               case PIN_CONFIG_INPUT_ENABLE:
+                       if (param_val)
+                               val |= OMAP_RTC_PMIC_EXT_WKUP_EN(pin);
+                       else
+                               val &= ~OMAP_RTC_PMIC_EXT_WKUP_EN(pin);
+                       break;
+               case PIN_CONFIG_ACTIVE_HIGH:
+                       val &= ~OMAP_RTC_PMIC_EXT_WKUP_POL(pin);
+                       break;
+               default:
+                       dev_err(&rtc->rtc->dev, "Property %u not supported\n",
+                               param);
+                       return -ENOTSUPP;
+               }
+       }
+
+       rtc->type->unlock(rtc);
+       rtc_writel(rtc, OMAP_RTC_PMIC_REG, val);
+       rtc->type->lock(rtc);
+
+       return 0;
+}
+
+static const struct pinconf_ops rtc_pinconf_ops = {
+       .is_generic = true,
+       .pin_config_get = rtc_pinconf_get,
+       .pin_config_set = rtc_pinconf_set,
+};
+
+static struct pinctrl_desc rtc_pinctrl_desc = {
+       .pins = rtc_pins_desc,
+       .npins = ARRAY_SIZE(rtc_pins_desc),
+       .pctlops = &rtc_pinctrl_ops,
+       .confops = &rtc_pinconf_ops,
+       .custom_params = rtc_params,
+       .num_custom_params = ARRAY_SIZE(rtc_params),
+#ifdef CONFIG_DEBUG_FS
+       .custom_conf_items = rtc_conf_items,
+#endif
+       .owner = THIS_MODULE,
+};
+
 static int omap_rtc_probe(struct platform_device *pdev)
 {
        struct omap_rtc *rtc;
@@ -646,8 +788,9 @@ static int omap_rtc_probe(struct platform_device *pdev)
         */
        if (rtc->has_ext_clk) {
                reg = rtc_read(rtc, OMAP_RTC_OSC_REG);
-               rtc_write(rtc, OMAP_RTC_OSC_REG,
-                         reg | OMAP_RTC_OSC_SEL_32KCLK_SRC);
+               reg &= ~OMAP_RTC_OSC_OSC32K_GZ_DISABLE;
+               reg |= OMAP_RTC_OSC_32KCLK_EN | OMAP_RTC_OSC_SEL_32KCLK_SRC;
+               rtc_writel(rtc, OMAP_RTC_OSC_REG, reg);
        }
 
        rtc->type->lock(rtc);
@@ -681,6 +824,15 @@ static int omap_rtc_probe(struct platform_device *pdev)
                }
        }
 
+       /* Support ext_wakeup pinconf */
+       rtc_pinctrl_desc.name = dev_name(&pdev->dev);
+
+       rtc->pctldev = pinctrl_register(&rtc_pinctrl_desc, &pdev->dev, rtc);
+       if (IS_ERR(rtc->pctldev)) {
+               dev_err(&pdev->dev, "Couldn't register pinctrl driver\n");
+               return PTR_ERR(rtc->pctldev);
+       }
+
        return 0;
 
 err:
@@ -724,6 +876,9 @@ static int __exit omap_rtc_remove(struct platform_device *pdev)
        pm_runtime_put_sync(&pdev->dev);
        pm_runtime_disable(&pdev->dev);
 
+       /* Remove ext_wakeup pinconf */
+       pinctrl_unregister(rtc->pctldev);
+
        return 0;
 }
 
@@ -746,8 +901,7 @@ static int omap_rtc_suspend(struct device *dev)
                rtc_write(rtc, OMAP_RTC_INTERRUPTS_REG, 0);
        rtc->type->lock(rtc);
 
-       /* Disable the clock/module */
-       pm_runtime_put_sync(dev);
+       rtc->is_suspending = true;
 
        return 0;
 }
@@ -756,9 +910,6 @@ static int omap_rtc_resume(struct device *dev)
 {
        struct omap_rtc *rtc = dev_get_drvdata(dev);
 
-       /* Enable the clock/module so that we can access the registers */
-       pm_runtime_get_sync(dev);
-
        rtc->type->unlock(rtc);
        if (device_may_wakeup(dev))
                disable_irq_wake(rtc->irq_alarm);
@@ -766,11 +917,34 @@ static int omap_rtc_resume(struct device *dev)
                rtc_write(rtc, OMAP_RTC_INTERRUPTS_REG, rtc->interrupts_reg);
        rtc->type->lock(rtc);
 
+       rtc->is_suspending = false;
+
+       return 0;
+}
+#endif
+
+#ifdef CONFIG_PM
+static int omap_rtc_runtime_suspend(struct device *dev)
+{
+       struct omap_rtc *rtc = dev_get_drvdata(dev);
+
+       if (rtc->is_suspending && !rtc->has_ext_clk)
+               return -EBUSY;
+
+       return 0;
+}
+
+static int omap_rtc_runtime_resume(struct device *dev)
+{
        return 0;
 }
 #endif
 
-static SIMPLE_DEV_PM_OPS(omap_rtc_pm_ops, omap_rtc_suspend, omap_rtc_resume);
+static const struct dev_pm_ops omap_rtc_pm_ops = {
+       SET_SYSTEM_SLEEP_PM_OPS(omap_rtc_suspend, omap_rtc_resume)
+       SET_RUNTIME_PM_OPS(omap_rtc_runtime_suspend,
+                          omap_rtc_runtime_resume, NULL)
+};
 
 static void omap_rtc_shutdown(struct platform_device *pdev)
 {
index 6080e0edef6328aa7d221ef48cc69c564b526a7a..4bcfb88674d38b5d3343149708ed188e7d2c707e 100644 (file)
@@ -225,7 +225,7 @@ static irqreturn_t palmas_rtc_interrupt(int irq, void *context)
        return IRQ_HANDLED;
 }
 
-static struct rtc_class_ops palmas_rtc_ops = {
+static const struct rtc_class_ops palmas_rtc_ops = {
        .read_time      = palmas_rtc_read_time,
        .set_time       = palmas_rtc_set_time,
        .read_alarm     = palmas_rtc_read_alarm,
index b4478cc92b55dbd76731ea8ec6a4228677adcebb..8895f77726e8da5444afcd602dceff8f25a9b3fd 100644 (file)
@@ -182,7 +182,8 @@ static ssize_t pcf2123_show(struct device *dev, struct device_attribute *attr,
 }
 
 static ssize_t pcf2123_store(struct device *dev, struct device_attribute *attr,
-                            const char *buffer, size_t count) {
+                            const char *buffer, size_t count)
+{
        struct pcf2123_sysfs_reg *r;
        unsigned long reg;
        unsigned long val;
@@ -199,7 +200,7 @@ static ssize_t pcf2123_store(struct device *dev, struct device_attribute *attr,
        if (ret)
                return ret;
 
-       pcf2123_write_reg(dev, reg, val);
+       ret = pcf2123_write_reg(dev, reg, val);
        if (ret < 0)
                return -EIO;
        return count;
index e6b6911c8e0528b326e6a0e93050510f81735b52..00c31c91b245fb080b5312cc2d7499268d3b1a4b 100644 (file)
@@ -232,7 +232,7 @@ static int pcf50633_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
        return ret;
 }
 
-static struct rtc_class_ops pcf50633_rtc_ops = {
+static const struct rtc_class_ops pcf50633_rtc_ops = {
        .read_time              = pcf50633_rtc_read_time,
        .set_time               = pcf50633_rtc_set_time,
        .read_alarm             = pcf50633_rtc_read_alarm,
index 64e1e4578492ab029c54be1e265598f611acce6f..5cfb6df5c43032e294ccc5a610637ba6e049b27f 100644 (file)
@@ -400,7 +400,6 @@ static struct platform_driver pic32_rtc_driver = {
        .remove         = pic32_rtc_remove,
        .driver         = {
                .name   = "pic32-rtc",
-               .owner  = THIS_MODULE,
                .of_match_table = of_match_ptr(pic32_rtc_dt_ids),
        },
 };
index 9a2f6a95d5a7cf5757308c3ce6668e2f78d20c35..f9277e536f7e8754120da35b17581b9f1492c632 100644 (file)
 #define RV8803_CTRL_TIE                        BIT(4)
 #define RV8803_CTRL_UIE                        BIT(5)
 
+#define RX8900_BACKUP_CTRL             0x18
+#define RX8900_FLAG_SWOFF              BIT(2)
+#define RX8900_FLAG_VDETOFF            BIT(3)
+
+enum rv8803_type {
+       rv_8803,
+       rx_8900
+};
+
 struct rv8803_data {
        struct i2c_client *client;
        struct rtc_device *rtc;
        struct mutex flags_lock;
        u8 ctrl;
+       enum rv8803_type type;
 };
 
 static int rv8803_read_reg(const struct i2c_client *client, u8 reg)
@@ -497,6 +507,35 @@ static struct rtc_class_ops rv8803_rtc_ops = {
        .ioctl = rv8803_ioctl,
 };
 
+static int rx8900_trickle_charger_init(struct rv8803_data *rv8803)
+{
+       struct i2c_client *client = rv8803->client;
+       struct device_node *node = client->dev.of_node;
+       int err;
+       u8 flags;
+
+       if (!node)
+               return 0;
+
+       if (rv8803->type != rx_8900)
+               return 0;
+
+       err = i2c_smbus_read_byte_data(rv8803->client, RX8900_BACKUP_CTRL);
+       if (err < 0)
+               return err;
+
+       flags = ~(RX8900_FLAG_VDETOFF | RX8900_FLAG_SWOFF) & (u8)err;
+
+       if (of_property_read_bool(node, "epson,vdet-disable"))
+               flags |= RX8900_FLAG_VDETOFF;
+
+       if (of_property_read_bool(node, "trickle-diode-disable"))
+               flags |= RX8900_FLAG_SWOFF;
+
+       return i2c_smbus_write_byte_data(rv8803->client, RX8900_BACKUP_CTRL,
+                                        flags);
+}
+
 static int rv8803_probe(struct i2c_client *client,
                        const struct i2c_device_id *id)
 {
@@ -517,6 +556,7 @@ static int rv8803_probe(struct i2c_client *client,
 
        mutex_init(&rv8803->flags_lock);
        rv8803->client = client;
+       rv8803->type = id->driver_data;
        i2c_set_clientdata(client, rv8803);
 
        flags = rv8803_read_reg(client, RV8803_FLAG);
@@ -558,6 +598,12 @@ static int rv8803_probe(struct i2c_client *client,
        if (err)
                return err;
 
+       err = rx8900_trickle_charger_init(rv8803);
+       if (err) {
+               dev_err(&client->dev, "failed to init charger\n");
+               return err;
+       }
+
        err = device_create_bin_file(&client->dev, &rv8803_nvram_attr);
        if (err)
                return err;
@@ -575,8 +621,8 @@ static int rv8803_remove(struct i2c_client *client)
 }
 
 static const struct i2c_device_id rv8803_id[] = {
-       { "rv8803", 0 },
-       { "rx8900", 0 },
+       { "rv8803", rv_8803 },
+       { "rx8900", rx_8900 },
        { }
 };
 MODULE_DEVICE_TABLE(i2c, rv8803_id);
index bbad00b233bce1b2891ecc72ee13b60a16b714ef..7c9c08eab5e5b6baf52d0bafcf86d3d92698c693 100644 (file)
@@ -317,7 +317,7 @@ static int rx6110_init(struct rx6110_data *rx6110)
        return ret;
 }
 
-static struct rtc_class_ops rx6110_rtc_ops = {
+static const struct rtc_class_ops rx6110_rtc_ops = {
        .read_time = rx6110_get_time,
        .set_time = rx6110_set_time,
 };
@@ -388,7 +388,6 @@ MODULE_DEVICE_TABLE(spi, rx6110_id);
 static struct spi_driver rx6110_driver = {
        .driver = {
                .name = RX6110_DRIVER_NAME,
-               .owner = THIS_MODULE,
        },
        .probe          = rx6110_probe,
        .remove         = rx6110_remove,
index 2b85cc7a24e752c01d1cc16c60d738f7fbfc5983..91857d8d2df8707a75a0f931d5ba6afb6ed45afe 100644 (file)
@@ -403,7 +403,7 @@ static int rx8025_alarm_irq_enable(struct device *dev, unsigned int enabled)
        return 0;
 }
 
-static struct rtc_class_ops rx8025_rtc_ops = {
+static const struct rtc_class_ops rx8025_rtc_ops = {
        .read_time = rx8025_get_time,
        .set_time = rx8025_set_time,
        .read_alarm = rx8025_read_alarm,
index f05ef8568480429f912bf47f72349eb331d214ca..e377f42abae7ae952e2651aaa4805057d1d230c0 100644 (file)
@@ -343,7 +343,7 @@ static int spear_alarm_irq_enable(struct device *dev, unsigned int enabled)
        return ret;
 }
 
-static struct rtc_class_ops spear_rtc_ops = {
+static const struct rtc_class_ops spear_rtc_ops = {
        .read_time = spear_rtc_read_time,
        .set_time = spear_rtc_set_time,
        .read_alarm = spear_rtc_read_alarm,
index e6aaaa52e7fe165383806a76ead6311df65e2433..d578e40d5a506308a76307a5050fd61552fc9420 100644 (file)
@@ -231,7 +231,7 @@ static int stmp3xxx_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alm)
        return 0;
 }
 
-static struct rtc_class_ops stmp3xxx_rtc_ops = {
+static const struct rtc_class_ops stmp3xxx_rtc_ops = {
        .alarm_irq_enable =
                          stmp3xxx_alarm_irq_enable,
        .read_time      = stmp3xxx_rtc_gettime,
index 63b9fb1318c2ae905db045458eb7e42ae4ae3338..1218d5d4224ddae32c28be3b4977a597120027f0 100644 (file)
@@ -160,7 +160,7 @@ wakealarm_store(struct device *dev, struct device_attribute *attr,
        unsigned long push = 0;
        struct rtc_wkalrm alm;
        struct rtc_device *rtc = to_rtc_device(dev);
-       char *buf_ptr;
+       const char *buf_ptr;
        int adjust = 0;
 
        /* Only request alarms that trigger in the future.  Disable them
@@ -171,7 +171,7 @@ wakealarm_store(struct device *dev, struct device_attribute *attr,
                return retval;
        rtc_tm_to_time(&alm.time, &now);
 
-       buf_ptr = (char *)buf;
+       buf_ptr = buf;
        if (*buf_ptr == '+') {
                buf_ptr++;
                if (*buf_ptr == '=') {
index 15ac597d54da20515847ced69d342ae3051f255e..3853ba963bb5d801502b8d50d992c83aeb886eb0 100644 (file)
@@ -291,7 +291,7 @@ static irqreturn_t tegra_rtc_irq_handler(int irq, void *data)
        return IRQ_HANDLED;
 }
 
-static struct rtc_class_ops tegra_rtc_ops = {
+static const struct rtc_class_ops tegra_rtc_ops = {
        .read_time      = tegra_rtc_read_time,
        .set_time       = tegra_rtc_set_time,
        .read_alarm     = tegra_rtc_read_alarm,
index 2dc787dc06c172a7cb7371a345e679467741a5de..176720b7b9e5083195b3bd789dbf5a8a622cd6c8 100644 (file)
@@ -462,7 +462,7 @@ out:
        return ret;
 }
 
-static struct rtc_class_ops twl_rtc_ops = {
+static const struct rtc_class_ops twl_rtc_ops = {
        .read_time      = twl_rtc_read_time,
        .set_time       = twl_rtc_set_time,
        .read_alarm     = twl_rtc_read_alarm,
index 831935af738966685415b0e2996dfb0c14af54b7..a7a88476e215e7b027958adb5c162a4d2b959802 100644 (file)
@@ -1205,7 +1205,7 @@ static int verify_fcx_max_data(struct dasd_device *device, __u8 lpm)
                                 mdc, lpm);
                        return mdc;
                }
-               fcx_max_data = mdc * FCX_MAX_DATA_FACTOR;
+               fcx_max_data = (u32)mdc * FCX_MAX_DATA_FACTOR;
                if (fcx_max_data < private->fcx_max_data) {
                        dev_warn(&device->cdev->dev,
                                 "The maximum data size for zHPF requests %u "
@@ -1675,7 +1675,7 @@ static u32 get_fcx_max_data(struct dasd_device *device)
                         " data size for zHPF requests failed\n");
                return 0;
        } else
-               return mdc * FCX_MAX_DATA_FACTOR;
+               return (u32)mdc * FCX_MAX_DATA_FACTOR;
 }
 
 /*
index 46be25c7461e07ed0cc636cf5a1e91a21768abe2..876c7e6e3a99264b169c217389f0b0a22764662d 100644 (file)
@@ -780,7 +780,7 @@ static int cfg_wait_idle(void)
 static int __init chp_init(void)
 {
        struct chp_id chpid;
-       int ret;
+       int state, ret;
 
        ret = crw_register_handler(CRW_RSC_CPATH, chp_process_crw);
        if (ret)
@@ -791,7 +791,9 @@ static int __init chp_init(void)
                return 0;
        /* Register available channel-paths. */
        chp_id_for_each(&chpid) {
-               if (chp_info_get_status(chpid) != CHP_STATUS_NOT_RECOGNIZED)
+               state = chp_info_get_status(chpid);
+               if (state == CHP_STATUS_CONFIGURED ||
+                   state == CHP_STATUS_STANDBY)
                        chp_new(chpid);
        }
 
index 637cf8973c9e1c55d87577815c782d6409b60b5b..581001989937ce1e0aaab11c26136d5e11b4fa4d 100644 (file)
@@ -384,7 +384,7 @@ void zfcp_dbf_san(char *tag, struct zfcp_dbf *dbf,
        /* if (len > rec_len):
         * dump data up to cap_len ignoring small duplicate in rec->payload
         */
-       spin_lock_irqsave(&dbf->pay_lock, flags);
+       spin_lock(&dbf->pay_lock);
        memset(payload, 0, sizeof(*payload));
        memcpy(payload->area, paytag, ZFCP_DBF_TAG_LEN);
        payload->fsf_req_id = req_id;
index db2739079cbb4bcf2817ab450b7236a3df5922bc..790babc5ef660334c86ecb096e405a15d50bceee 100644 (file)
@@ -353,7 +353,7 @@ static void NCR5380_print_phase(struct Scsi_Host *instance)
 #endif
 
 
-static int probe_irq __initdata;
+static int probe_irq;
 
 /**
  * probe_intr  -       helper for IRQ autoprobe
@@ -365,7 +365,7 @@ static int probe_irq __initdata;
  * used by the IRQ probe code.
  */
 
-static irqreturn_t __init probe_intr(int irq, void *dev_id)
+static irqreturn_t probe_intr(int irq, void *dev_id)
 {
        probe_irq = irq;
        return IRQ_HANDLED;
@@ -380,7 +380,7 @@ static irqreturn_t __init probe_intr(int irq, void *dev_id)
  * and then looking to see what interrupt actually turned up.
  */
 
-static int __init __maybe_unused NCR5380_probe_irq(struct Scsi_Host *instance,
+static int __maybe_unused NCR5380_probe_irq(struct Scsi_Host *instance,
                                                int possible)
 {
        struct NCR5380_hostdata *hostdata = shost_priv(instance);
index 3d53d636b17b8892f080855baf081299c3a349d6..f0cfb04517570839b968b0271b6029e07a3e4897 100644 (file)
@@ -2636,18 +2636,9 @@ static int arcmsr_queue_command_lck(struct scsi_cmnd *cmd,
        struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata;
        struct CommandControlBlock *ccb;
        int target = cmd->device->id;
-       int lun = cmd->device->lun;
-       uint8_t scsicmd = cmd->cmnd[0];
        cmd->scsi_done = done;
        cmd->host_scribble = NULL;
        cmd->result = 0;
-       if ((scsicmd == SYNCHRONIZE_CACHE) ||(scsicmd == SEND_DIAGNOSTIC)){
-               if(acb->devstate[target][lun] == ARECA_RAID_GONE) {
-                       cmd->result = (DID_NO_CONNECT << 16);
-               }
-               cmd->scsi_done(cmd);
-               return 0;
-       }
        if (target == 16) {
                /* virtual device for iop message transfer */
                arcmsr_handle_virtual_command(acb, cmd);
index 6a6906f847dbba07ef38149a9912563d48ba3d82..d9239c2d49b117175d34379716b464c7b06dcfaa 100644 (file)
@@ -61,7 +61,7 @@ MODULE_PARM_DESC(be_max_phys_size,
                "memory that can be allocated. Range is 16 - 128");
 
 #define beiscsi_disp_param(_name)\
-ssize_t        \
+static ssize_t \
 beiscsi_##_name##_disp(struct device *dev,\
                        struct device_attribute *attrib, char *buf)     \
 {      \
@@ -74,7 +74,7 @@ beiscsi_##_name##_disp(struct device *dev,\
 }
 
 #define beiscsi_change_param(_name, _minval, _maxval, _defaval)\
-int \
+static int \
 beiscsi_##_name##_change(struct beiscsi_hba *phba, uint32_t val)\
 {\
        if (val >= _minval && val <= _maxval) {\
@@ -93,7 +93,7 @@ beiscsi_##_name##_change(struct beiscsi_hba *phba, uint32_t val)\
 }
 
 #define beiscsi_store_param(_name)  \
-ssize_t \
+static ssize_t \
 beiscsi_##_name##_store(struct device *dev,\
                         struct device_attribute *attr, const char *buf,\
                         size_t count) \
@@ -112,7 +112,7 @@ beiscsi_##_name##_store(struct device *dev,\
 }
 
 #define beiscsi_init_param(_name, _minval, _maxval, _defval) \
-int \
+static int \
 beiscsi_##_name##_init(struct beiscsi_hba *phba, uint32_t val) \
 { \
        if (val >= _minval && val <= _maxval) {\
@@ -900,8 +900,9 @@ void hwi_ring_cq_db(struct beiscsi_hba *phba,
 static struct sgl_handle *alloc_io_sgl_handle(struct beiscsi_hba *phba)
 {
        struct sgl_handle *psgl_handle;
+       unsigned long flags;
 
-       spin_lock_bh(&phba->io_sgl_lock);
+       spin_lock_irqsave(&phba->io_sgl_lock, flags);
        if (phba->io_sgl_hndl_avbl) {
                beiscsi_log(phba, KERN_INFO, BEISCSI_LOG_IO,
                            "BM_%d : In alloc_io_sgl_handle,"
@@ -919,14 +920,16 @@ static struct sgl_handle *alloc_io_sgl_handle(struct beiscsi_hba *phba)
                        phba->io_sgl_alloc_index++;
        } else
                psgl_handle = NULL;
-       spin_unlock_bh(&phba->io_sgl_lock);
+       spin_unlock_irqrestore(&phba->io_sgl_lock, flags);
        return psgl_handle;
 }
 
 static void
 free_io_sgl_handle(struct beiscsi_hba *phba, struct sgl_handle *psgl_handle)
 {
-       spin_lock_bh(&phba->io_sgl_lock);
+       unsigned long flags;
+
+       spin_lock_irqsave(&phba->io_sgl_lock, flags);
        beiscsi_log(phba, KERN_INFO, BEISCSI_LOG_IO,
                    "BM_%d : In free_,io_sgl_free_index=%d\n",
                    phba->io_sgl_free_index);
@@ -941,7 +944,7 @@ free_io_sgl_handle(struct beiscsi_hba *phba, struct sgl_handle *psgl_handle)
                             "value there=%p\n", phba->io_sgl_free_index,
                             phba->io_sgl_hndl_base
                             [phba->io_sgl_free_index]);
-                spin_unlock_bh(&phba->io_sgl_lock);
+                spin_unlock_irqrestore(&phba->io_sgl_lock, flags);
                return;
        }
        phba->io_sgl_hndl_base[phba->io_sgl_free_index] = psgl_handle;
@@ -950,7 +953,7 @@ free_io_sgl_handle(struct beiscsi_hba *phba, struct sgl_handle *psgl_handle)
                phba->io_sgl_free_index = 0;
        else
                phba->io_sgl_free_index++;
-       spin_unlock_bh(&phba->io_sgl_lock);
+       spin_unlock_irqrestore(&phba->io_sgl_lock, flags);
 }
 
 static inline struct wrb_handle *
@@ -958,15 +961,16 @@ beiscsi_get_wrb_handle(struct hwi_wrb_context *pwrb_context,
                       unsigned int wrbs_per_cxn)
 {
        struct wrb_handle *pwrb_handle;
+       unsigned long flags;
 
-       spin_lock_bh(&pwrb_context->wrb_lock);
+       spin_lock_irqsave(&pwrb_context->wrb_lock, flags);
        pwrb_handle = pwrb_context->pwrb_handle_base[pwrb_context->alloc_index];
        pwrb_context->wrb_handles_available--;
        if (pwrb_context->alloc_index == (wrbs_per_cxn - 1))
                pwrb_context->alloc_index = 0;
        else
                pwrb_context->alloc_index++;
-       spin_unlock_bh(&pwrb_context->wrb_lock);
+       spin_unlock_irqrestore(&pwrb_context->wrb_lock, flags);
 
        if (pwrb_handle)
                memset(pwrb_handle->pwrb, 0, sizeof(*pwrb_handle->pwrb));
@@ -1001,14 +1005,16 @@ beiscsi_put_wrb_handle(struct hwi_wrb_context *pwrb_context,
                       struct wrb_handle *pwrb_handle,
                       unsigned int wrbs_per_cxn)
 {
-       spin_lock_bh(&pwrb_context->wrb_lock);
+       unsigned long flags;
+
+       spin_lock_irqsave(&pwrb_context->wrb_lock, flags);
        pwrb_context->pwrb_handle_base[pwrb_context->free_index] = pwrb_handle;
        pwrb_context->wrb_handles_available++;
        if (pwrb_context->free_index == (wrbs_per_cxn - 1))
                pwrb_context->free_index = 0;
        else
                pwrb_context->free_index++;
-       spin_unlock_bh(&pwrb_context->wrb_lock);
+       spin_unlock_irqrestore(&pwrb_context->wrb_lock, flags);
 }
 
 /**
@@ -1037,8 +1043,9 @@ free_wrb_handle(struct beiscsi_hba *phba, struct hwi_wrb_context *pwrb_context,
 static struct sgl_handle *alloc_mgmt_sgl_handle(struct beiscsi_hba *phba)
 {
        struct sgl_handle *psgl_handle;
+       unsigned long flags;
 
-       spin_lock_bh(&phba->mgmt_sgl_lock);
+       spin_lock_irqsave(&phba->mgmt_sgl_lock, flags);
        if (phba->eh_sgl_hndl_avbl) {
                psgl_handle = phba->eh_sgl_hndl_base[phba->eh_sgl_alloc_index];
                phba->eh_sgl_hndl_base[phba->eh_sgl_alloc_index] = NULL;
@@ -1056,14 +1063,16 @@ static struct sgl_handle *alloc_mgmt_sgl_handle(struct beiscsi_hba *phba)
                        phba->eh_sgl_alloc_index++;
        } else
                psgl_handle = NULL;
-       spin_unlock_bh(&phba->mgmt_sgl_lock);
+       spin_unlock_irqrestore(&phba->mgmt_sgl_lock, flags);
        return psgl_handle;
 }
 
 void
 free_mgmt_sgl_handle(struct beiscsi_hba *phba, struct sgl_handle *psgl_handle)
 {
-       spin_lock_bh(&phba->mgmt_sgl_lock);
+       unsigned long flags;
+
+       spin_lock_irqsave(&phba->mgmt_sgl_lock, flags);
        beiscsi_log(phba, KERN_INFO, BEISCSI_LOG_CONFIG,
                    "BM_%d : In  free_mgmt_sgl_handle,"
                    "eh_sgl_free_index=%d\n",
@@ -1078,7 +1087,7 @@ free_mgmt_sgl_handle(struct beiscsi_hba *phba, struct sgl_handle *psgl_handle)
                            "BM_%d : Double Free in eh SGL ,"
                            "eh_sgl_free_index=%d\n",
                            phba->eh_sgl_free_index);
-               spin_unlock_bh(&phba->mgmt_sgl_lock);
+               spin_unlock_irqrestore(&phba->mgmt_sgl_lock, flags);
                return;
        }
        phba->eh_sgl_hndl_base[phba->eh_sgl_free_index] = psgl_handle;
@@ -1088,7 +1097,7 @@ free_mgmt_sgl_handle(struct beiscsi_hba *phba, struct sgl_handle *psgl_handle)
                phba->eh_sgl_free_index = 0;
        else
                phba->eh_sgl_free_index++;
-       spin_unlock_bh(&phba->mgmt_sgl_lock);
+       spin_unlock_irqrestore(&phba->mgmt_sgl_lock, flags);
 }
 
 static void
@@ -4584,7 +4593,7 @@ free_hndls:
        io_task->cmd_bhs = NULL;
        return -ENOMEM;
 }
-int beiscsi_iotask_v2(struct iscsi_task *task, struct scatterlist *sg,
+static int beiscsi_iotask_v2(struct iscsi_task *task, struct scatterlist *sg,
                       unsigned int num_sg, unsigned int xferlen,
                       unsigned int writedir)
 {
@@ -4973,7 +4982,7 @@ static int beiscsi_bsg_request(struct bsg_job *job)
        return rc;
 }
 
-void beiscsi_hba_attrs_init(struct beiscsi_hba *phba)
+static void beiscsi_hba_attrs_init(struct beiscsi_hba *phba)
 {
        /* Set the logging parameter */
        beiscsi_log_enable_init(phba, beiscsi_log_enable);
index 7c0d7af0d3b7f3678e5040b7223dab051eb8eab8..4655a9f9dceae8aeefd6857d80a018a4e9c23d98 100644 (file)
@@ -85,6 +85,7 @@ static inline int send_tx_flowc_wr(struct cxgbi_sock *);
 static const struct cxgb4_uld_info cxgb4i_uld_info = {
        .name = DRV_MODULE_NAME,
        .nrxq = MAX_ULD_QSETS,
+       .ntxq = MAX_ULD_QSETS,
        .rxq_size = 1024,
        .lro = false,
        .add = t4_uld_add,
@@ -685,6 +686,11 @@ static int push_tx_frames(struct cxgbi_sock *csk, int req_completion)
                                        req_completion);
                        csk->snd_nxt += len;
                        cxgbi_skcb_clear_flag(skb, SKCBF_TX_NEED_HDR);
+               } else if (cxgbi_skcb_test_flag(skb, SKCBF_TX_FLAG_COMPL) &&
+                          (csk->wr_una_cred >= (csk->wr_max_cred / 2))) {
+                       struct cpl_close_con_req *req =
+                               (struct cpl_close_con_req *)skb->data;
+                       req->wr.wr_hi |= htonl(FW_WR_COMPL_F);
                }
                total_size += skb->truesize;
                t4_set_arp_err_handler(skb, csk, arp_failure_skb_discard);
index d1421139e6eac615d1e3ae2a9af1e02ee78d0822..2ffe029ff2b6ff29fbaaada58d277d89d2643f31 100644 (file)
@@ -2081,9 +2081,10 @@ void cxgbi_cleanup_task(struct iscsi_task *task)
        /*  never reached the xmit task callout */
        if (tdata->skb)
                __kfree_skb(tdata->skb);
-       memset(tdata, 0, sizeof(*tdata));
 
        task_release_itt(task, task->hdr_itt);
+       memset(tdata, 0, sizeof(*tdata));
+
        iscsi_tcp_cleanup_task(task);
 }
 EXPORT_SYMBOL_GPL(cxgbi_cleanup_task);
index 241829e596680f8be89941fc43ce0db642e29b18..7bb20684e9fabc3b8b950f45aee0a94d1c4e1589 100644 (file)
@@ -793,6 +793,7 @@ static void alua_rtpg_work(struct work_struct *work)
                WARN_ON(pg->flags & ALUA_PG_RUN_RTPG);
                WARN_ON(pg->flags & ALUA_PG_RUN_STPG);
                spin_unlock_irqrestore(&pg->lock, flags);
+               kref_put(&pg->kref, release_port_group);
                return;
        }
        if (pg->flags & ALUA_SYNC_STPG)
@@ -890,6 +891,7 @@ static void alua_rtpg_queue(struct alua_port_group *pg,
                /* Do not queue if the worker is already running */
                if (!(pg->flags & ALUA_PG_RUNNING)) {
                        kref_get(&pg->kref);
+                       sdev = NULL;
                        start_queue = 1;
                }
        }
@@ -901,7 +903,8 @@ static void alua_rtpg_queue(struct alua_port_group *pg,
        if (start_queue &&
            !queue_delayed_work(alua_wq, &pg->rtpg_work,
                                msecs_to_jiffies(ALUA_RTPG_DELAY_MSECS))) {
-               scsi_device_put(sdev);
+               if (sdev)
+                       scsi_device_put(sdev);
                kref_put(&pg->kref, release_port_group);
        }
 }
index 516bd6c4f4425036cd7a7bd84ab1d1a1ef3643c7..cbf010324c187589e199f2b14ef3793bf892f2a9 100644 (file)
 #include "NCR5380.h"
 #include <linux/init.h>
 #include <linux/ioport.h>
-#include <linux/isapnp.h>
+#include <linux/isa.h>
+#include <linux/pnp.h>
 #include <linux/interrupt.h>
 
+#define MAX_CARDS 8
+
+/* old-style parameters for compatibility */
 static int ncr_irq;
-static int ncr_dma;
 static int ncr_addr;
 static int ncr_5380;
 static int ncr_53c400;
 static int ncr_53c400a;
 static int dtc_3181e;
 static int hp_c2502;
+module_param(ncr_irq, int, 0);
+module_param(ncr_addr, int, 0);
+module_param(ncr_5380, int, 0);
+module_param(ncr_53c400, int, 0);
+module_param(ncr_53c400a, int, 0);
+module_param(dtc_3181e, int, 0);
+module_param(hp_c2502, int, 0);
 
-static struct override {
-       NCR5380_map_type NCR5380_map_name;
-       int irq;
-       int dma;
-       int board;              /* Use NCR53c400, Ricoh, etc. extensions ? */
-} overrides
-#ifdef GENERIC_NCR5380_OVERRIDE
-[] __initdata = GENERIC_NCR5380_OVERRIDE;
-#else
-[1] __initdata = { { 0,},};
-#endif
-
-#define NO_OVERRIDES ARRAY_SIZE(overrides)
-
-#ifndef MODULE
-
-/**
- *     internal_setup          -       handle lilo command string override
- *     @board: BOARD_* identifier for the board
- *     @str: unused
- *     @ints: numeric parameters
- *
- *     Do LILO command line initialization of the overrides array. Display
- *     errors when needed
- *
- *     Locks: none
- */
-
-static void __init internal_setup(int board, char *str, int *ints)
-{
-       static int commandline_current;
-       switch (board) {
-       case BOARD_NCR5380:
-               if (ints[0] != 2 && ints[0] != 3) {
-                       printk(KERN_ERR "generic_NCR5380_setup : usage ncr5380=" STRVAL(NCR5380_map_name) ",irq,dma\n");
-                       return;
-               }
-               break;
-       case BOARD_NCR53C400:
-               if (ints[0] != 2) {
-                       printk(KERN_ERR "generic_NCR53C400_setup : usage ncr53c400=" STRVAL(NCR5380_map_name) ",irq\n");
-                       return;
-               }
-               break;
-       case BOARD_NCR53C400A:
-               if (ints[0] != 2) {
-                       printk(KERN_ERR "generic_NCR53C400A_setup : usage ncr53c400a=" STRVAL(NCR5380_map_name) ",irq\n");
-                       return;
-               }
-               break;
-       case BOARD_DTC3181E:
-               if (ints[0] != 2) {
-                       printk("generic_DTC3181E_setup : usage dtc3181e=" STRVAL(NCR5380_map_name) ",irq\n");
-                       return;
-               }
-               break;
-       }
-
-       if (commandline_current < NO_OVERRIDES) {
-               overrides[commandline_current].NCR5380_map_name = (NCR5380_map_type) ints[1];
-               overrides[commandline_current].irq = ints[2];
-               if (ints[0] == 3)
-                       overrides[commandline_current].dma = ints[3];
-               else
-                       overrides[commandline_current].dma = DMA_NONE;
-               overrides[commandline_current].board = board;
-               ++commandline_current;
-       }
-}
-
-
-/**
- *     do_NCR53C80_setup               -       set up entry point
- *     @str: unused
- *
- *     Setup function invoked at boot to parse the ncr5380= command
- *     line.
- */
-
-static int __init do_NCR5380_setup(char *str)
-{
-       int ints[10];
-
-       get_options(str, ARRAY_SIZE(ints), ints);
-       internal_setup(BOARD_NCR5380, str, ints);
-       return 1;
-}
-
-/**
- *     do_NCR53C400_setup              -       set up entry point
- *     @str: unused
- *     @ints: integer parameters from kernel setup code
- *
- *     Setup function invoked at boot to parse the ncr53c400= command
- *     line.
- */
-
-static int __init do_NCR53C400_setup(char *str)
-{
-       int ints[10];
-
-       get_options(str, ARRAY_SIZE(ints), ints);
-       internal_setup(BOARD_NCR53C400, str, ints);
-       return 1;
-}
-
-/**
- *     do_NCR53C400A_setup     -       set up entry point
- *     @str: unused
- *     @ints: integer parameters from kernel setup code
- *
- *     Setup function invoked at boot to parse the ncr53c400a= command
- *     line.
- */
-
-static int __init do_NCR53C400A_setup(char *str)
-{
-       int ints[10];
-
-       get_options(str, ARRAY_SIZE(ints), ints);
-       internal_setup(BOARD_NCR53C400A, str, ints);
-       return 1;
-}
-
-/**
- *     do_DTC3181E_setup       -       set up entry point
- *     @str: unused
- *     @ints: integer parameters from kernel setup code
- *
- *     Setup function invoked at boot to parse the dtc3181e= command
- *     line.
- */
+static int irq[] = { 0, 0, 0, 0, 0, 0, 0, 0 };
+module_param_array(irq, int, NULL, 0);
+MODULE_PARM_DESC(irq, "IRQ number(s)");
 
-static int __init do_DTC3181E_setup(char *str)
-{
-       int ints[10];
+static int base[] = { 0, 0, 0, 0, 0, 0, 0, 0 };
+module_param_array(base, int, NULL, 0);
+MODULE_PARM_DESC(base, "base address(es)");
 
-       get_options(str, ARRAY_SIZE(ints), ints);
-       internal_setup(BOARD_DTC3181E, str, ints);
-       return 1;
-}
+static int card[] = { -1, -1, -1, -1, -1, -1, -1, -1 };
+module_param_array(card, int, NULL, 0);
+MODULE_PARM_DESC(card, "card type (0=NCR5380, 1=NCR53C400, 2=NCR53C400A, 3=DTC3181E, 4=HP C2502)");
 
-#endif
+MODULE_LICENSE("GPL");
 
 #ifndef SCSI_G_NCR5380_MEM
 /*
@@ -210,21 +90,9 @@ static void magic_configure(int idx, u8 irq, u8 magic[])
 }
 #endif
 
-/**
- *     generic_NCR5380_detect  -       look for NCR5380 controllers
- *     @tpnt: the scsi template
- *
- *     Scan for the present of NCR5380, NCR53C400, NCR53C400A, DTC3181E
- *     and DTC436(ISAPnP) controllers. If overrides have been set we use
- *     them.
- *
- *     Locks: none
- */
-
-static int __init generic_NCR5380_detect(struct scsi_host_template *tpnt)
+static int generic_NCR5380_init_one(struct scsi_host_template *tpnt,
+                       struct device *pdev, int base, int irq, int board)
 {
-       static int current_override;
-       int count;
        unsigned int *ports;
        u8 *magic = NULL;
 #ifndef SCSI_G_NCR5380_MEM
@@ -232,272 +100,222 @@ static int __init generic_NCR5380_detect(struct scsi_host_template *tpnt)
        int port_idx = -1;
        unsigned long region_size;
 #endif
-       static unsigned int __initdata ncr_53c400a_ports[] = {
+       static unsigned int ncr_53c400a_ports[] = {
                0x280, 0x290, 0x300, 0x310, 0x330, 0x340, 0x348, 0x350, 0
        };
-       static unsigned int __initdata dtc_3181e_ports[] = {
+       static unsigned int dtc_3181e_ports[] = {
                0x220, 0x240, 0x280, 0x2a0, 0x2c0, 0x300, 0x320, 0x340, 0
        };
-       static u8 ncr_53c400a_magic[] __initdata = {    /* 53C400A & DTC436 */
+       static u8 ncr_53c400a_magic[] = {       /* 53C400A & DTC436 */
                0x59, 0xb9, 0xc5, 0xae, 0xa6
        };
-       static u8 hp_c2502_magic[] __initdata = {       /* HP C2502 */
+       static u8 hp_c2502_magic[] = {  /* HP C2502 */
                0x0f, 0x22, 0xf0, 0x20, 0x80
        };
-       int flags;
+       int flags, ret;
        struct Scsi_Host *instance;
        struct NCR5380_hostdata *hostdata;
 #ifdef SCSI_G_NCR5380_MEM
-       unsigned long base;
        void __iomem *iomem;
        resource_size_t iomem_size;
 #endif
 
-       if (ncr_irq)
-               overrides[0].irq = ncr_irq;
-       if (ncr_dma)
-               overrides[0].dma = ncr_dma;
-       if (ncr_addr)
-               overrides[0].NCR5380_map_name = (NCR5380_map_type) ncr_addr;
-       if (ncr_5380)
-               overrides[0].board = BOARD_NCR5380;
-       else if (ncr_53c400)
-               overrides[0].board = BOARD_NCR53C400;
-       else if (ncr_53c400a)
-               overrides[0].board = BOARD_NCR53C400A;
-       else if (dtc_3181e)
-               overrides[0].board = BOARD_DTC3181E;
-       else if (hp_c2502)
-               overrides[0].board = BOARD_HP_C2502;
-#ifndef SCSI_G_NCR5380_MEM
-       if (!current_override && isapnp_present()) {
-               struct pnp_dev *dev = NULL;
-               count = 0;
-               while ((dev = pnp_find_dev(NULL, ISAPNP_VENDOR('D', 'T', 'C'), ISAPNP_FUNCTION(0x436e), dev))) {
-                       if (count >= NO_OVERRIDES)
-                               break;
-                       if (pnp_device_attach(dev) < 0)
-                               continue;
-                       if (pnp_activate_dev(dev) < 0) {
-                               printk(KERN_ERR "dtc436e probe: activate failed\n");
-                               pnp_device_detach(dev);
-                               continue;
-                       }
-                       if (!pnp_port_valid(dev, 0)) {
-                               printk(KERN_ERR "dtc436e probe: no valid port\n");
-                               pnp_device_detach(dev);
-                               continue;
-                       }
-                       if (pnp_irq_valid(dev, 0))
-                               overrides[count].irq = pnp_irq(dev, 0);
-                       else
-                               overrides[count].irq = NO_IRQ;
-                       if (pnp_dma_valid(dev, 0))
-                               overrides[count].dma = pnp_dma(dev, 0);
-                       else
-                               overrides[count].dma = DMA_NONE;
-                       overrides[count].NCR5380_map_name = (NCR5380_map_type) pnp_port_start(dev, 0);
-                       overrides[count].board = BOARD_DTC3181E;
-                       count++;
-               }
+       ports = NULL;
+       flags = 0;
+       switch (board) {
+       case BOARD_NCR5380:
+               flags = FLAG_NO_PSEUDO_DMA | FLAG_DMA_FIXUP;
+               break;
+       case BOARD_NCR53C400A:
+               ports = ncr_53c400a_ports;
+               magic = ncr_53c400a_magic;
+               break;
+       case BOARD_HP_C2502:
+               ports = ncr_53c400a_ports;
+               magic = hp_c2502_magic;
+               break;
+       case BOARD_DTC3181E:
+               ports = dtc_3181e_ports;
+               magic = ncr_53c400a_magic;
+               break;
        }
-#endif
-
-       for (count = 0; current_override < NO_OVERRIDES; ++current_override) {
-               if (!(overrides[current_override].NCR5380_map_name))
-                       continue;
-
-               ports = NULL;
-               flags = 0;
-               switch (overrides[current_override].board) {
-               case BOARD_NCR5380:
-                       flags = FLAG_NO_PSEUDO_DMA | FLAG_DMA_FIXUP;
-                       break;
-               case BOARD_NCR53C400A:
-                       ports = ncr_53c400a_ports;
-                       magic = ncr_53c400a_magic;
-                       break;
-               case BOARD_HP_C2502:
-                       ports = ncr_53c400a_ports;
-                       magic = hp_c2502_magic;
-                       break;
-               case BOARD_DTC3181E:
-                       ports = dtc_3181e_ports;
-                       magic = ncr_53c400a_magic;
-                       break;
-               }
 
 #ifndef SCSI_G_NCR5380_MEM
-               if (ports && magic) {
-                       /* wakeup sequence for the NCR53C400A and DTC3181E */
-
-                       /* Disable the adapter and look for a free io port */
-                       magic_configure(-1, 0, magic);
-
-                       region_size = 16;
-
-                       if (overrides[current_override].NCR5380_map_name != PORT_AUTO)
-                               for (i = 0; ports[i]; i++) {
-                                       if (!request_region(ports[i], region_size, "ncr53c80"))
-                                               continue;
-                                       if (overrides[current_override].NCR5380_map_name == ports[i])
-                                               break;
-                                       release_region(ports[i], region_size);
-                       } else
-                               for (i = 0; ports[i]; i++) {
-                                       if (!request_region(ports[i], region_size, "ncr53c80"))
-                                               continue;
-                                       if (inb(ports[i]) == 0xff)
-                                               break;
-                                       release_region(ports[i], region_size);
+       if (ports && magic) {
+               /* wakeup sequence for the NCR53C400A and DTC3181E */
+
+               /* Disable the adapter and look for a free io port */
+               magic_configure(-1, 0, magic);
+
+               region_size = 16;
+               if (base)
+                       for (i = 0; ports[i]; i++) {
+                               if (base == ports[i]) { /* index found */
+                                       if (!request_region(ports[i],
+                                                           region_size,
+                                                           "ncr53c80"))
+                                               return -EBUSY;
+                                       break;
                                }
-                       if (ports[i]) {
-                               /* At this point we have our region reserved */
-                               magic_configure(i, 0, magic); /* no IRQ yet */
-                               outb(0xc0, ports[i] + 9);
-                               if (inb(ports[i] + 9) != 0x80)
-                                       continue;
-                               overrides[current_override].NCR5380_map_name = ports[i];
-                               port_idx = i;
-                       } else
-                               continue;
-               }
+                       }
                else
-               {
-                       /* Not a 53C400A style setup - just grab */
-                       region_size = 8;
-                       if (!request_region(overrides[current_override].NCR5380_map_name,
-                                           region_size, "ncr5380"))
-                               continue;
-               }
+                       for (i = 0; ports[i]; i++) {
+                               if (!request_region(ports[i], region_size,
+                                                   "ncr53c80"))
+                                       continue;
+                               if (inb(ports[i]) == 0xff)
+                                       break;
+                               release_region(ports[i], region_size);
+                       }
+               if (ports[i]) {
+                       /* At this point we have our region reserved */
+                       magic_configure(i, 0, magic); /* no IRQ yet */
+                       outb(0xc0, ports[i] + 9);
+                       if (inb(ports[i] + 9) != 0x80) {
+                               ret = -ENODEV;
+                               goto out_release;
+                       }
+                       base = ports[i];
+                       port_idx = i;
+               } else
+                       return -EINVAL;
+       }
+       else
+       {
+               /* NCR5380 - no configuration, just grab */
+               region_size = 8;
+               if (!base || !request_region(base, region_size, "ncr5380"))
+                       return -EBUSY;
+       }
 #else
-               base = overrides[current_override].NCR5380_map_name;
-               iomem_size = NCR53C400_region_size;
-               if (!request_mem_region(base, iomem_size, "ncr5380"))
-                       continue;
-               iomem = ioremap(base, iomem_size);
-               if (!iomem) {
-                       release_mem_region(base, iomem_size);
-                       continue;
-               }
+       iomem_size = NCR53C400_region_size;
+       if (!request_mem_region(base, iomem_size, "ncr5380"))
+               return -EBUSY;
+       iomem = ioremap(base, iomem_size);
+       if (!iomem) {
+               release_mem_region(base, iomem_size);
+               return -ENOMEM;
+       }
 #endif
-               instance = scsi_register(tpnt, sizeof(struct NCR5380_hostdata));
-               if (instance == NULL)
-                       goto out_release;
-               hostdata = shost_priv(instance);
+       instance = scsi_host_alloc(tpnt, sizeof(struct NCR5380_hostdata));
+       if (instance == NULL) {
+               ret = -ENOMEM;
+               goto out_release;
+       }
+       hostdata = shost_priv(instance);
 
 #ifndef SCSI_G_NCR5380_MEM
-               instance->io_port = overrides[current_override].NCR5380_map_name;
-               instance->n_io_port = region_size;
-               hostdata->io_width = 1; /* 8-bit PDMA by default */
-
-               /*
-                * On NCR53C400 boards, NCR5380 registers are mapped 8 past
-                * the base address.
-                */
-               switch (overrides[current_override].board) {
-               case BOARD_NCR53C400:
-                       instance->io_port += 8;
-                       hostdata->c400_ctl_status = 0;
-                       hostdata->c400_blk_cnt = 1;
-                       hostdata->c400_host_buf = 4;
-                       break;
-               case BOARD_DTC3181E:
-                       hostdata->io_width = 2; /* 16-bit PDMA */
-                       /* fall through */
-               case BOARD_NCR53C400A:
-               case BOARD_HP_C2502:
-                       hostdata->c400_ctl_status = 9;
-                       hostdata->c400_blk_cnt = 10;
-                       hostdata->c400_host_buf = 8;
-                       break;
-               }
+       instance->io_port = base;
+       instance->n_io_port = region_size;
+       hostdata->io_width = 1; /* 8-bit PDMA by default */
+
+       /*
+        * On NCR53C400 boards, NCR5380 registers are mapped 8 past
+        * the base address.
+        */
+       switch (board) {
+       case BOARD_NCR53C400:
+               instance->io_port += 8;
+               hostdata->c400_ctl_status = 0;
+               hostdata->c400_blk_cnt = 1;
+               hostdata->c400_host_buf = 4;
+               break;
+       case BOARD_DTC3181E:
+               hostdata->io_width = 2; /* 16-bit PDMA */
+               /* fall through */
+       case BOARD_NCR53C400A:
+       case BOARD_HP_C2502:
+               hostdata->c400_ctl_status = 9;
+               hostdata->c400_blk_cnt = 10;
+               hostdata->c400_host_buf = 8;
+               break;
+       }
 #else
-               instance->base = overrides[current_override].NCR5380_map_name;
-               hostdata->iomem = iomem;
-               hostdata->iomem_size = iomem_size;
-               switch (overrides[current_override].board) {
-               case BOARD_NCR53C400:
-                       hostdata->c400_ctl_status = 0x100;
-                       hostdata->c400_blk_cnt = 0x101;
-                       hostdata->c400_host_buf = 0x104;
-                       break;
-               case BOARD_DTC3181E:
-               case BOARD_NCR53C400A:
-               case BOARD_HP_C2502:
-                       pr_err(DRV_MODULE_NAME ": unknown register offsets\n");
-                       goto out_unregister;
-               }
+       instance->base = base;
+       hostdata->iomem = iomem;
+       hostdata->iomem_size = iomem_size;
+       switch (board) {
+       case BOARD_NCR53C400:
+               hostdata->c400_ctl_status = 0x100;
+               hostdata->c400_blk_cnt = 0x101;
+               hostdata->c400_host_buf = 0x104;
+               break;
+       case BOARD_DTC3181E:
+       case BOARD_NCR53C400A:
+       case BOARD_HP_C2502:
+               pr_err(DRV_MODULE_NAME ": unknown register offsets\n");
+               ret = -EINVAL;
+               goto out_unregister;
+       }
 #endif
 
-               if (NCR5380_init(instance, flags | FLAG_LATE_DMA_SETUP))
-                       goto out_unregister;
+       ret = NCR5380_init(instance, flags | FLAG_LATE_DMA_SETUP);
+       if (ret)
+               goto out_unregister;
 
-               switch (overrides[current_override].board) {
-               case BOARD_NCR53C400:
-               case BOARD_DTC3181E:
-               case BOARD_NCR53C400A:
-               case BOARD_HP_C2502:
-                       NCR5380_write(hostdata->c400_ctl_status, CSR_BASE);
-               }
+       switch (board) {
+       case BOARD_NCR53C400:
+       case BOARD_DTC3181E:
+       case BOARD_NCR53C400A:
+       case BOARD_HP_C2502:
+               NCR5380_write(hostdata->c400_ctl_status, CSR_BASE);
+       }
 
-               NCR5380_maybe_reset_bus(instance);
+       NCR5380_maybe_reset_bus(instance);
 
-               if (overrides[current_override].irq != IRQ_AUTO)
-                       instance->irq = overrides[current_override].irq;
-               else
-                       instance->irq = NCR5380_probe_irq(instance, 0xffff);
+       if (irq != IRQ_AUTO)
+               instance->irq = irq;
+       else
+               instance->irq = NCR5380_probe_irq(instance, 0xffff);
 
-               /* Compatibility with documented NCR5380 kernel parameters */
-               if (instance->irq == 255)
-                       instance->irq = NO_IRQ;
+       /* Compatibility with documented NCR5380 kernel parameters */
+       if (instance->irq == 255)
+               instance->irq = NO_IRQ;
 
-               if (instance->irq != NO_IRQ) {
+       if (instance->irq != NO_IRQ) {
 #ifndef SCSI_G_NCR5380_MEM
-                       /* set IRQ for HP C2502 */
-                       if (overrides[current_override].board == BOARD_HP_C2502)
-                               magic_configure(port_idx, instance->irq, magic);
+               /* set IRQ for HP C2502 */
+               if (board == BOARD_HP_C2502)
+                       magic_configure(port_idx, instance->irq, magic);
 #endif
-                       if (request_irq(instance->irq, generic_NCR5380_intr,
-                                       0, "NCR5380", instance)) {
-                               printk(KERN_WARNING "scsi%d : IRQ%d not free, interrupts disabled\n", instance->host_no, instance->irq);
-                               instance->irq = NO_IRQ;
-                       }
-               }
-
-               if (instance->irq == NO_IRQ) {
-                       printk(KERN_INFO "scsi%d : interrupts not enabled. for better interactive performance,\n", instance->host_no);
-                       printk(KERN_INFO "scsi%d : please jumper the board for a free IRQ.\n", instance->host_no);
+               if (request_irq(instance->irq, generic_NCR5380_intr,
+                               0, "NCR5380", instance)) {
+                       printk(KERN_WARNING "scsi%d : IRQ%d not free, interrupts disabled\n", instance->host_no, instance->irq);
+                       instance->irq = NO_IRQ;
                }
+       }
 
-               ++current_override;
-               ++count;
+       if (instance->irq == NO_IRQ) {
+               printk(KERN_INFO "scsi%d : interrupts not enabled. for better interactive performance,\n", instance->host_no);
+               printk(KERN_INFO "scsi%d : please jumper the board for a free IRQ.\n", instance->host_no);
        }
-       return count;
 
+       ret = scsi_add_host(instance, pdev);
+       if (ret)
+               goto out_free_irq;
+       scsi_scan_host(instance);
+       dev_set_drvdata(pdev, instance);
+       return 0;
+
+out_free_irq:
+       if (instance->irq != NO_IRQ)
+               free_irq(instance->irq, instance);
+       NCR5380_exit(instance);
 out_unregister:
-       scsi_unregister(instance);
+       scsi_host_put(instance);
 out_release:
 #ifndef SCSI_G_NCR5380_MEM
-       release_region(overrides[current_override].NCR5380_map_name, region_size);
+       release_region(base, region_size);
 #else
        iounmap(iomem);
        release_mem_region(base, iomem_size);
 #endif
-       return count;
+       return ret;
 }
 
-/**
- *     generic_NCR5380_release_resources       -       free resources
- *     @instance: host adapter to clean up 
- *
- *     Free the generic interface resources from this adapter.
- *
- *     Locks: none
- */
-static int generic_NCR5380_release_resources(struct Scsi_Host *instance)
+static void generic_NCR5380_release_resources(struct Scsi_Host *instance)
 {
+       scsi_remove_host(instance);
        if (instance->irq != NO_IRQ)
                free_irq(instance->irq, instance);
        NCR5380_exit(instance);
@@ -511,7 +329,7 @@ static int generic_NCR5380_release_resources(struct Scsi_Host *instance)
                release_mem_region(instance->base, hostdata->iomem_size);
        }
 #endif
-       return 0;
+       scsi_host_put(instance);
 }
 
 /**
@@ -701,10 +519,9 @@ static int generic_NCR5380_dma_xfer_len(struct Scsi_Host *instance,
 #include "NCR5380.c"
 
 static struct scsi_host_template driver_template = {
+       .module                 = THIS_MODULE,
        .proc_name              = DRV_MODULE_NAME,
        .name                   = "Generic NCR5380/NCR53C400 SCSI",
-       .detect                 = generic_NCR5380_detect,
-       .release                = generic_NCR5380_release_resources,
        .info                   = generic_NCR5380_info,
        .queuecommand           = generic_NCR5380_queue_command,
        .eh_abort_handler       = generic_NCR5380_abort,
@@ -718,31 +535,115 @@ static struct scsi_host_template driver_template = {
        .max_sectors            = 128,
 };
 
-#include "scsi_module.c"
 
-module_param(ncr_irq, int, 0);
-module_param(ncr_dma, int, 0);
-module_param(ncr_addr, int, 0);
-module_param(ncr_5380, int, 0);
-module_param(ncr_53c400, int, 0);
-module_param(ncr_53c400a, int, 0);
-module_param(dtc_3181e, int, 0);
-module_param(hp_c2502, int, 0);
-MODULE_LICENSE("GPL");
+static int generic_NCR5380_isa_match(struct device *pdev, unsigned int ndev)
+{
+       int ret = generic_NCR5380_init_one(&driver_template, pdev, base[ndev],
+                                         irq[ndev], card[ndev]);
+       if (ret) {
+               if (base[ndev])
+                       printk(KERN_WARNING "Card not found at address 0x%03x\n",
+                              base[ndev]);
+               return 0;
+       }
 
-#if !defined(SCSI_G_NCR5380_MEM) && defined(MODULE)
-static struct isapnp_device_id id_table[] = {
-       {
-        ISAPNP_ANY_ID, ISAPNP_ANY_ID,
-        ISAPNP_VENDOR('D', 'T', 'C'), ISAPNP_FUNCTION(0x436e),
-        0},
-       {0}
+       return 1;
+}
+
+static int generic_NCR5380_isa_remove(struct device *pdev,
+                                  unsigned int ndev)
+{
+       generic_NCR5380_release_resources(dev_get_drvdata(pdev));
+       dev_set_drvdata(pdev, NULL);
+       return 0;
+}
+
+static struct isa_driver generic_NCR5380_isa_driver = {
+       .match          = generic_NCR5380_isa_match,
+       .remove         = generic_NCR5380_isa_remove,
+       .driver         = {
+               .name   = DRV_MODULE_NAME
+       },
+};
+
+#if !defined(SCSI_G_NCR5380_MEM) && defined(CONFIG_PNP)
+static struct pnp_device_id generic_NCR5380_pnp_ids[] = {
+       { .id = "DTC436e", .driver_data = BOARD_DTC3181E },
+       { .id = "" }
+};
+MODULE_DEVICE_TABLE(pnp, generic_NCR5380_pnp_ids);
+
+static int generic_NCR5380_pnp_probe(struct pnp_dev *pdev,
+                              const struct pnp_device_id *id)
+{
+       int base, irq;
+
+       if (pnp_activate_dev(pdev) < 0)
+               return -EBUSY;
+
+       base = pnp_port_start(pdev, 0);
+       irq = pnp_irq(pdev, 0);
+
+       return generic_NCR5380_init_one(&driver_template, &pdev->dev, base, irq,
+                                      id->driver_data);
+}
+
+static void generic_NCR5380_pnp_remove(struct pnp_dev *pdev)
+{
+       generic_NCR5380_release_resources(pnp_get_drvdata(pdev));
+       pnp_set_drvdata(pdev, NULL);
+}
+
+static struct pnp_driver generic_NCR5380_pnp_driver = {
+       .name           = DRV_MODULE_NAME,
+       .id_table       = generic_NCR5380_pnp_ids,
+       .probe          = generic_NCR5380_pnp_probe,
+       .remove         = generic_NCR5380_pnp_remove,
 };
+#endif /* !defined(SCSI_G_NCR5380_MEM) && defined(CONFIG_PNP) */
+
+static int pnp_registered, isa_registered;
+
+static int __init generic_NCR5380_init(void)
+{
+       int ret = 0;
+
+       /* compatibility with old-style parameters */
+       if (irq[0] == 0 && base[0] == 0 && card[0] == -1) {
+               irq[0] = ncr_irq;
+               base[0] = ncr_addr;
+               if (ncr_5380)
+                       card[0] = BOARD_NCR5380;
+               if (ncr_53c400)
+                       card[0] = BOARD_NCR53C400;
+               if (ncr_53c400a)
+                       card[0] = BOARD_NCR53C400A;
+               if (dtc_3181e)
+                       card[0] = BOARD_DTC3181E;
+               if (hp_c2502)
+                       card[0] = BOARD_HP_C2502;
+       }
 
-MODULE_DEVICE_TABLE(isapnp, id_table);
+#if !defined(SCSI_G_NCR5380_MEM) && defined(CONFIG_PNP)
+       if (!pnp_register_driver(&generic_NCR5380_pnp_driver))
+               pnp_registered = 1;
 #endif
+       ret = isa_register_driver(&generic_NCR5380_isa_driver, MAX_CARDS);
+       if (!ret)
+               isa_registered = 1;
+
+       return (pnp_registered || isa_registered) ? 0 : ret;
+}
+
+static void __exit generic_NCR5380_exit(void)
+{
+#if !defined(SCSI_G_NCR5380_MEM) && defined(CONFIG_PNP)
+       if (pnp_registered)
+               pnp_unregister_driver(&generic_NCR5380_pnp_driver);
+#endif
+       if (isa_registered)
+               isa_unregister_driver(&generic_NCR5380_isa_driver);
+}
 
-__setup("ncr5380=", do_NCR5380_setup);
-__setup("ncr53c400=", do_NCR53C400_setup);
-__setup("ncr53c400a=", do_NCR53C400A_setup);
-__setup("dtc3181e=", do_DTC3181E_setup);
+module_init(generic_NCR5380_init);
+module_exit(generic_NCR5380_exit);
index 595177428d7628f492f37b04413d449884b83f7c..b175b92344586aee3a1247e85288f34731fe179c 100644 (file)
 #ifndef GENERIC_NCR5380_H
 #define GENERIC_NCR5380_H
 
-#define __STRVAL(x) #x
-#define STRVAL(x) __STRVAL(x)
-
 #ifndef SCSI_G_NCR5380_MEM
 #define DRV_MODULE_NAME "g_NCR5380"
 
-#define NCR5380_map_type int
-#define NCR5380_map_name port
-
 #define NCR5380_read(reg) \
        inb(instance->io_port + (reg))
 #define NCR5380_write(reg, value) \
@@ -38,8 +32,6 @@
 /* therefore SCSI_G_NCR5380_MEM */
 #define DRV_MODULE_NAME "g_NCR5380_mmio"
 
-#define NCR5380_map_type unsigned long
-#define NCR5380_map_name base
 #define NCR53C400_mem_base 0x3880
 #define NCR53C400_host_buffer 0x3900
 #define NCR53C400_region_size 0x3a00
index a8762a3efeef3f6e9288646b835ddf868c0013e8..532474109624d9cd9c6a52c040a1027120819886 100644 (file)
@@ -2586,7 +2586,6 @@ static void ipr_process_error(struct ipr_cmnd *ipr_cmd)
        struct ipr_hostrcb *hostrcb = ipr_cmd->u.hostrcb;
        u32 ioasc = be32_to_cpu(ipr_cmd->s.ioasa.hdr.ioasc);
        u32 fd_ioasc;
-       char *envp[] = { "ASYNC_ERR_LOG=1", NULL };
 
        if (ioa_cfg->sis64)
                fd_ioasc = be32_to_cpu(hostrcb->hcam.u.error64.fd_ioasc);
@@ -2607,8 +2606,8 @@ static void ipr_process_error(struct ipr_cmnd *ipr_cmd)
        }
 
        list_add_tail(&hostrcb->queue, &ioa_cfg->hostrcb_report_q);
+       schedule_work(&ioa_cfg->work_q);
        hostrcb = ipr_get_free_hostrcb(ioa_cfg);
-       kobject_uevent_env(&ioa_cfg->host->shost_dev.kobj, KOBJ_CHANGE, envp);
 
        ipr_send_hcam(ioa_cfg, IPR_HCAM_CDB_OP_CODE_LOG_DATA, hostrcb);
 }
index c051694bfcb0f42e1e983f409a581f1da5b344d2..f9b6fba689ffb41c6806cdabeb80debe38e25189 100644 (file)
@@ -791,9 +791,9 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
 
 free_task:
        /* regular RX path uses back_lock */
-       spin_lock_bh(&session->back_lock);
+       spin_lock(&session->back_lock);
        __iscsi_put_task(task);
-       spin_unlock_bh(&session->back_lock);
+       spin_unlock(&session->back_lock);
        return NULL;
 }
 
index ca86c885dfaab4f0fb6ae3595922d6f1a77974be..3aaea713bf3712b2ad8874aaecf8a6337b7a2119 100644 (file)
@@ -2233,7 +2233,7 @@ struct megasas_instance_template {
 };
 
 #define MEGASAS_IS_LOGICAL(scp)                                                \
-       (scp->device->channel < MEGASAS_MAX_PD_CHANNELS) ? 0 : 1
+       ((scp->device->channel < MEGASAS_MAX_PD_CHANNELS) ? 0 : 1)
 
 #define MEGASAS_DEV_INDEX(scp)                                         \
        (((scp->device->channel % 2) * MEGASAS_MAX_DEV_PER_CHANNEL) +   \
index 9ff57dee72d7b0ac20fd01e00283416864aa7cc1..d8b1fbd4c8aafc61e5e5491d5394440651a43a04 100644 (file)
@@ -1700,16 +1700,13 @@ megasas_queue_command(struct Scsi_Host *shost, struct scsi_cmnd *scmd)
                goto out_done;
        }
 
-       switch (scmd->cmnd[0]) {
-       case SYNCHRONIZE_CACHE:
-               /*
-                * FW takes care of flush cache on its own
-                * No need to send it down
-                */
+       /*
+        * FW takes care of flush cache on its own for Virtual Disk.
+        * No need to send it down for VD. For JBOD send SYNCHRONIZE_CACHE to FW.
+        */
+       if ((scmd->cmnd[0] == SYNCHRONIZE_CACHE) && MEGASAS_IS_LOGICAL(scmd)) {
                scmd->result = DID_OK << 16;
                goto out_done;
-       default:
-               break;
        }
 
        return instance->instancet->build_and_issue_cmd(instance, scmd);
index 209a969a979d8768fa5d0dc15bc5f921f2c4ec1c..91b70bc46e7f184ece7d81ac233cb5957b080943 100644 (file)
@@ -1273,9 +1273,9 @@ scsih_target_alloc(struct scsi_target *starget)
                        sas_target_priv_data->handle = raid_device->handle;
                        sas_target_priv_data->sas_address = raid_device->wwid;
                        sas_target_priv_data->flags |= MPT_TARGET_FLAGS_VOLUME;
-                       sas_target_priv_data->raid_device = raid_device;
                        if (ioc->is_warpdrive)
-                               raid_device->starget = starget;
+                               sas_target_priv_data->raid_device = raid_device;
+                       raid_device->starget = starget;
                }
                spin_unlock_irqrestore(&ioc->raid_device_lock, flags);
                return 0;
@@ -4010,7 +4010,10 @@ _scsih_eedp_error_handling(struct scsi_cmnd *scmd, u16 ioc_status)
            SAM_STAT_CHECK_CONDITION;
 }
 
-
+static inline bool ata_12_16_cmd(struct scsi_cmnd *scmd)
+{
+       return (scmd->cmnd[0] == ATA_12 || scmd->cmnd[0] == ATA_16);
+}
 
 /**
  * scsih_qcmd - main scsi request entry point
@@ -4038,6 +4041,13 @@ scsih_qcmd(struct Scsi_Host *shost, struct scsi_cmnd *scmd)
        if (ioc->logging_level & MPT_DEBUG_SCSI)
                scsi_print_command(scmd);
 
+       /*
+        * Lock the device for any subsequent command until command is
+        * done.
+        */
+       if (ata_12_16_cmd(scmd))
+               scsi_internal_device_block(scmd->device);
+
        sas_device_priv_data = scmd->device->hostdata;
        if (!sas_device_priv_data || !sas_device_priv_data->sas_target) {
                scmd->result = DID_NO_CONNECT << 16;
@@ -4613,6 +4623,9 @@ _scsih_io_done(struct MPT3SAS_ADAPTER *ioc, u16 smid, u8 msix_index, u32 reply)
        if (scmd == NULL)
                return 1;
 
+       if (ata_12_16_cmd(scmd))
+               scsi_internal_device_unblock(scmd->device, SDEV_RUNNING);
+
        mpi_request = mpt3sas_base_get_msg_frame(ioc, smid);
 
        if (mpi_reply == NULL) {
index 68a5c347fae9a578fdcacbf6c618a0c15f4f6bb6..845affa112f711d8df861d3dd36299377a6ff34a 100644 (file)
@@ -1368,13 +1368,8 @@ static struct genl_multicast_group pmcraid_mcgrps[] = {
        { .name = "events", /* not really used - see ID discussion below */ },
 };
 
-static struct genl_family pmcraid_event_family = {
-       /*
-        * Due to prior multicast group abuse (the code having assumed that
-        * the family ID can be used as a multicast group ID) we need to
-        * statically allocate a family (and thus group) ID.
-        */
-       .id = GENL_ID_PMCRAID,
+static struct genl_family pmcraid_event_family __ro_after_init = {
+       .module = THIS_MODULE,
        .name = "pmcraid",
        .version = 1,
        .maxattr = PMCRAID_AEN_ATTR_MAX,
@@ -1389,7 +1384,7 @@ static struct genl_family pmcraid_event_family = {
  *     0 if the pmcraid_event_family is successfully registered
  *     with netlink generic, non-zero otherwise
  */
-static int pmcraid_netlink_init(void)
+static int __init pmcraid_netlink_init(void)
 {
        int result;
 
index ace65db1d2a25becd6dc3c4a158d932f7dab8fff..56d6142852a553ed9ad8011cb4c18a84e8656e0d 100644 (file)
@@ -707,6 +707,11 @@ qla2xxx_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
        srb_t *sp;
        int rval;
 
+       if (unlikely(test_bit(UNLOADING, &base_vha->dpc_flags))) {
+               cmd->result = DID_NO_CONNECT << 16;
+               goto qc24_fail_command;
+       }
+
        if (ha->flags.eeh_busy) {
                if (ha->flags.pci_channel_io_perm_failure) {
                        ql_dbg(ql_dbg_aer, vha, 0x9010,
@@ -1451,6 +1456,20 @@ qla2x00_abort_all_cmds(scsi_qla_host_t *vha, int res)
                for (cnt = 1; cnt < req->num_outstanding_cmds; cnt++) {
                        sp = req->outstanding_cmds[cnt];
                        if (sp) {
+                               /* Don't abort commands in adapter during EEH
+                                * recovery as it's not accessible/responding.
+                                */
+                               if (!ha->flags.eeh_busy) {
+                                       /* Get a reference to the sp and drop the lock.
+                                        * The reference ensures this sp->done() call
+                                        * - and not the call in qla2xxx_eh_abort() -
+                                        * ends the SCSI command (with result 'res').
+                                        */
+                                       sp_get(sp);
+                                       spin_unlock_irqrestore(&ha->hardware_lock, flags);
+                                       qla2xxx_eh_abort(GET_CMD_SP(sp));
+                                       spin_lock_irqsave(&ha->hardware_lock, flags);
+                               }
                                req->outstanding_cmds[cnt] = NULL;
                                sp->done(vha, sp, res);
                        }
@@ -2341,6 +2360,8 @@ qla2xxx_scan_finished(struct Scsi_Host *shost, unsigned long time)
 {
        scsi_qla_host_t *vha = shost_priv(shost);
 
+       if (test_bit(UNLOADING, &vha->dpc_flags))
+               return 1;
        if (!vha->host)
                return 1;
        if (time > vha->hw->loop_reset_delay * HZ)
index c905709707f0a2178ca82823fb2be8523e3abe96..cf04a364fd8b35f869c31e44491d237ae1b3bc6e 100644 (file)
@@ -5134,6 +5134,7 @@ static void __exit scsi_debug_exit(void)
        bus_unregister(&pseudo_lld_bus);
        root_device_unregister(pseudo_primary);
 
+       vfree(map_storep);
        vfree(dif_storep);
        vfree(fake_storep);
        kfree(sdebug_q_arr);
index 54d446c9f56e0e77372f9fe67be14d04a96a3d53..b8d3b97b217ac552ed6166e4ebe2a1c1483e5ca2 100644 (file)
@@ -36,9 +36,9 @@ struct scsi_dh_blist {
 };
 
 static const struct scsi_dh_blist scsi_dh_blist[] = {
-       {"DGC", "RAID",                 "clariion" },
-       {"DGC", "DISK",                 "clariion" },
-       {"DGC", "VRAID",                "clariion" },
+       {"DGC", "RAID",                 "emc" },
+       {"DGC", "DISK",                 "emc" },
+       {"DGC", "VRAID",                "emc" },
 
        {"COMPAQ", "MSA1000 VOLUME",    "hp_sw" },
        {"COMPAQ", "HSV110",            "hp_sw" },
index 212e98d940bc222885d8ece9a675df4782cc8be1..6f7128f49c30d62d381556275d667c84eda6ab62 100644 (file)
@@ -1307,7 +1307,6 @@ static void scsi_sequential_lun_scan(struct scsi_target *starget,
 static int scsi_report_lun_scan(struct scsi_target *starget, int bflags,
                                enum scsi_scan_mode rescan)
 {
-       char devname[64];
        unsigned char scsi_cmd[MAX_COMMAND_SIZE];
        unsigned int length;
        u64 lun;
@@ -1349,9 +1348,6 @@ static int scsi_report_lun_scan(struct scsi_target *starget, int bflags,
                }
        }
 
-       sprintf(devname, "host %d channel %d id %d",
-               shost->host_no, sdev->channel, sdev->id);
-
        /*
         * Allocate enough to hold the header (the same size as one scsi_lun)
         * plus the number of luns we are requesting.  511 was the default
@@ -1470,12 +1466,12 @@ retry:
  out_err:
        kfree(lun_data);
  out:
-       scsi_device_put(sdev);
        if (scsi_device_created(sdev))
                /*
                 * the sdev we used didn't appear in the report luns scan
                 */
                __scsi_remove_device(sdev);
+       scsi_device_put(sdev);
        return ret;
 }
 
index 7af5226aa55ba0937b69b8b0f2d5d070f3eccf7d..618422ea3a4123d8d0b115f8811634e8225de8a1 100644 (file)
@@ -4922,9 +4922,8 @@ static int sgl_map_user_pages(struct st_buffer *STbp,
        res = get_user_pages_unlocked(
                uaddr,
                nr_pages,
-               rw == READ,
-               0, /* don't force */
-               pages);
+               pages,
+               rw == READ ? FOLL_WRITE : 0); /* don't force */
 
        /* Errors and no page mapped should return here */
        if (res < nr_pages)
index 47966909286dcbd19f8837d1e25d5bcab928737a..e27b4d4e6ae2d2d564b2777335fa625fd92ddcb7 100644 (file)
@@ -63,7 +63,7 @@ config SCSI_UFSHCD_PCI
 
 config SCSI_UFS_DWC_TC_PCI
        tristate "DesignWare pci support using a G210 Test Chip"
-       depends on SCSI_UFSHCD && PCI
+       depends on SCSI_UFSHCD_PCI
        ---help---
          Synopsys Test Chip is a PHY for prototyping purposes.
 
index ee4ab85e2801be3a233d6e479e1c506640bca384..22f881e9253a256a1614df77048081cafc12efdb 100644 (file)
@@ -25,6 +25,7 @@
 
 #define UFS_VENDOR_TOSHIBA     0x198
 #define UFS_VENDOR_SAMSUNG     0x1CE
+#define UFS_VENDOR_SKHYNIX     0x1AD
 
 /**
  * ufs_device_info - ufs device details
@@ -145,6 +146,7 @@ static struct ufs_dev_fix ufs_fixups[] = {
                UFS_DEVICE_QUIRK_PA_TACTIVATE),
        UFS_FIX(UFS_VENDOR_TOSHIBA, "THGLF2G9D8KBADG",
                UFS_DEVICE_QUIRK_PA_TACTIVATE),
+       UFS_FIX(UFS_VENDOR_SKHYNIX, UFS_ANY_MODEL, UFS_DEVICE_NO_VCCQ),
 
        END_FIX
 };
index 37f3c51e9d92166365053bf288701a64b06d051a..05c745663c103a7ddb70d4e6845702a6c08ebd08 100644 (file)
@@ -1266,9 +1266,12 @@ static void ufshcd_prepare_utp_query_req_upiu(struct ufs_hba *hba,
        ucd_req_ptr->header.dword_1 = UPIU_HEADER_DWORD(
                        0, query->request.query_func, 0, 0);
 
-       /* Data segment length */
-       ucd_req_ptr->header.dword_2 = UPIU_HEADER_DWORD(
-                       0, 0, len >> 8, (u8)len);
+       /* Data segment length only need for WRITE_DESC */
+       if (query->request.upiu_req.opcode == UPIU_QUERY_OPCODE_WRITE_DESC)
+               ucd_req_ptr->header.dword_2 =
+                       UPIU_HEADER_DWORD(0, 0, (len >> 8), (u8)len);
+       else
+               ucd_req_ptr->header.dword_2 = 0;
 
        /* Copy the Query Request buffer as is */
        memcpy(&ucd_req_ptr->qr, &query->request.upiu_req,
@@ -6500,6 +6503,7 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
                if (IS_ERR(hba->devfreq)) {
                        dev_err(hba->dev, "Unable to register with devfreq %ld\n",
                                        PTR_ERR(hba->devfreq));
+                       err = PTR_ERR(hba->devfreq);
                        goto out_remove_scsi_host;
                }
                /* Suspend devfreq until the UFS device is detected */
index 4a0d3cdc607cd5d8bfdb28867187fd7d12df0919..15ca09cd16f34ad6f7a8ece088e1dababef3a1b5 100644 (file)
@@ -793,6 +793,7 @@ static int pvscsi_abort(struct scsi_cmnd *cmd)
        unsigned long flags;
        int result = SUCCESS;
        DECLARE_COMPLETION_ONSTACK(abort_cmp);
+       int done;
 
        scmd_printk(KERN_DEBUG, cmd, "task abort on host %u, %p\n",
                    adapter->host->host_no, cmd);
@@ -824,10 +825,10 @@ static int pvscsi_abort(struct scsi_cmnd *cmd)
        pvscsi_abort_cmd(adapter, ctx);
        spin_unlock_irqrestore(&adapter->hw_lock, flags);
        /* Wait for 2 secs for the completion. */
-       wait_for_completion_timeout(&abort_cmp, msecs_to_jiffies(2000));
+       done = wait_for_completion_timeout(&abort_cmp, msecs_to_jiffies(2000));
        spin_lock_irqsave(&adapter->hw_lock, flags);
 
-       if (!completion_done(&abort_cmp)) {
+       if (!done) {
                /*
                 * Failed to abort the command, unmark the fact that it
                 * was requested to be aborted.
index c097d2ccbde3163eaa9d1ab0a20f402446c79002..d41292ef85f2ff93b879237a8da7f2357c489b24 100644 (file)
@@ -26,7 +26,7 @@
 
 #include <linux/types.h>
 
-#define PVSCSI_DRIVER_VERSION_STRING   "1.0.6.0-k"
+#define PVSCSI_DRIVER_VERSION_STRING   "1.0.7.0-k"
 
 #define PVSCSI_MAX_NUM_SG_ENTRIES_PER_SEGMENT 128
 
index fe42a2fdf351c7af04f785eb1e717b2c29e551a1..e6e90e80519a7db028fb23dcc6f121b1c64fc0e6 100644 (file)
@@ -1,6 +1,7 @@
 menu "SOC (System On Chip) specific Drivers"
 
 source "drivers/soc/bcm/Kconfig"
+source "drivers/soc/fsl/qbman/Kconfig"
 source "drivers/soc/fsl/qe/Kconfig"
 source "drivers/soc/mediatek/Kconfig"
 source "drivers/soc/qcom/Kconfig"
index 203307fd92c15c37e4192c6eeaedb9f3bc8ddd23..75e1f5334821080e0643b47ae1fd592dc7124509 100644 (file)
@@ -2,5 +2,6 @@
 # Makefile for the Linux Kernel SOC fsl specific device drivers
 #
 
+obj-$(CONFIG_FSL_DPAA)                 += qbman/
 obj-$(CONFIG_QUICC_ENGINE)             += qe/
 obj-$(CONFIG_CPM)                      += qe/
diff --git a/drivers/soc/fsl/qbman/Kconfig b/drivers/soc/fsl/qbman/Kconfig
new file mode 100644 (file)
index 0000000..757033c
--- /dev/null
@@ -0,0 +1,67 @@
+menuconfig FSL_DPAA
+       bool "Freescale DPAA 1.x support"
+       depends on FSL_SOC_BOOKE
+       select GENERIC_ALLOCATOR
+       help
+         The Freescale Data Path Acceleration Architecture (DPAA) is a set of
+         hardware components on specific QorIQ multicore processors.
+         This architecture provides the infrastructure to support simplified
+         sharing of networking interfaces and accelerators by multiple CPUs.
+         The major h/w blocks composing DPAA are BMan and QMan.
+
+         The Buffer Manager (BMan) is a hardware buffer pool management block
+         that allows software and accelerators on the datapath to acquire and
+         release buffers in order to build frames.
+
+         The Queue Manager (QMan) is a hardware queue management block
+         that allows software and accelerators on the datapath to enqueue and
+         dequeue frames in order to communicate.
+
+if FSL_DPAA
+
+config FSL_DPAA_CHECKING
+       bool "Additional driver checking"
+       help
+         Compiles in additional checks, to sanity-check the drivers and
+         any use of the exported API. Not recommended for performance.
+
+config FSL_BMAN_TEST
+       tristate "BMan self-tests"
+       help
+         Compile the BMan self-test code. These tests will
+         exercise the BMan APIs to confirm functionality
+         of both the software drivers and hardware device.
+
+config FSL_BMAN_TEST_API
+       bool "High-level API self-test"
+       depends on FSL_BMAN_TEST
+       default y
+       help
+         This requires the presence of cpu-affine portals, and performs
+         high-level API testing with them (whichever portal(s) are affine
+         to the cpu(s) the test executes on).
+
+config FSL_QMAN_TEST
+       tristate "QMan self-tests"
+       help
+         Compile self-test code for QMan.
+
+config FSL_QMAN_TEST_API
+       bool "QMan high-level self-test"
+       depends on FSL_QMAN_TEST
+       default y
+       help
+         This requires the presence of cpu-affine portals, and performs
+         high-level API testing with them (whichever portal(s) are affine to
+         the cpu(s) the test executes on).
+
+config FSL_QMAN_TEST_STASH
+       bool "QMan 'hot potato' data-stashing self-test"
+       depends on FSL_QMAN_TEST
+       default y
+       help
+         This performs a "hot potato" style test enqueuing/dequeuing a frame
+         across a series of FQs scheduled to different portals (and cpus), with
+         DQRR, data and context stashing always on.
+
+endif # FSL_DPAA
diff --git a/drivers/soc/fsl/qbman/Makefile b/drivers/soc/fsl/qbman/Makefile
new file mode 100644 (file)
index 0000000..7ae199f
--- /dev/null
@@ -0,0 +1,12 @@
+obj-$(CONFIG_FSL_DPAA)                          += bman_ccsr.o qman_ccsr.o \
+                                                  bman_portal.o qman_portal.o \
+                                                  bman.o qman.o
+
+obj-$(CONFIG_FSL_BMAN_TEST)                     += bman-test.o
+bman-test-y                                      = bman_test.o
+bman-test-$(CONFIG_FSL_BMAN_TEST_API)           += bman_test_api.o
+
+obj-$(CONFIG_FSL_QMAN_TEST)                    += qman-test.o
+qman-test-y                                     = qman_test.o
+qman-test-$(CONFIG_FSL_QMAN_TEST_API)          += qman_test_api.o
+qman-test-$(CONFIG_FSL_QMAN_TEST_STASH)                += qman_test_stash.o
diff --git a/drivers/soc/fsl/qbman/bman.c b/drivers/soc/fsl/qbman/bman.c
new file mode 100644 (file)
index 0000000..ffa48fd
--- /dev/null
@@ -0,0 +1,797 @@
+/* Copyright 2008 - 2016 Freescale Semiconductor, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *      names of its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written permission.
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "bman_priv.h"
+
+#define IRQNAME                "BMan portal %d"
+#define MAX_IRQNAME    16      /* big enough for "BMan portal %d" */
+
+/* Portal register assists */
+
+/* Cache-inhibited register offsets */
+#define BM_REG_RCR_PI_CINH     0x0000
+#define BM_REG_RCR_CI_CINH     0x0004
+#define BM_REG_RCR_ITR         0x0008
+#define BM_REG_CFG             0x0100
+#define BM_REG_SCN(n)          (0x0200 + ((n) << 2))
+#define BM_REG_ISR             0x0e00
+#define BM_REG_IER             0x0e04
+#define BM_REG_ISDR            0x0e08
+#define BM_REG_IIR             0x0e0c
+
+/* Cache-enabled register offsets */
+#define BM_CL_CR               0x0000
+#define BM_CL_RR0              0x0100
+#define BM_CL_RR1              0x0140
+#define BM_CL_RCR              0x1000
+#define BM_CL_RCR_PI_CENA      0x3000
+#define BM_CL_RCR_CI_CENA      0x3100
+
+/*
+ * Portal modes.
+ *   Enum types;
+ *     pmode == production mode
+ *     cmode == consumption mode,
+ *   Enum values use 3 letter codes. First letter matches the portal mode,
+ *   remaining two letters indicate;
+ *     ci == cache-inhibited portal register
+ *     ce == cache-enabled portal register
+ *     vb == in-band valid-bit (cache-enabled)
+ */
+enum bm_rcr_pmode {            /* matches BCSP_CFG::RPM */
+       bm_rcr_pci = 0,         /* PI index, cache-inhibited */
+       bm_rcr_pce = 1,         /* PI index, cache-enabled */
+       bm_rcr_pvb = 2          /* valid-bit */
+};
+enum bm_rcr_cmode {            /* s/w-only */
+       bm_rcr_cci,             /* CI index, cache-inhibited */
+       bm_rcr_cce              /* CI index, cache-enabled */
+};
+
+
+/* --- Portal structures --- */
+
+#define BM_RCR_SIZE            8
+
+/* Release Command */
+struct bm_rcr_entry {
+       union {
+               struct {
+                       u8 _ncw_verb; /* writes to this are non-coherent */
+                       u8 bpid; /* used with BM_RCR_VERB_CMD_BPID_SINGLE */
+                       u8 __reserved1[62];
+               };
+               struct bm_buffer bufs[8];
+       };
+};
+#define BM_RCR_VERB_VBIT               0x80
+#define BM_RCR_VERB_CMD_MASK           0x70    /* one of two values; */
+#define BM_RCR_VERB_CMD_BPID_SINGLE    0x20
+#define BM_RCR_VERB_CMD_BPID_MULTI     0x30
+#define BM_RCR_VERB_BUFCOUNT_MASK      0x0f    /* values 1..8 */
+
+struct bm_rcr {
+       struct bm_rcr_entry *ring, *cursor;
+       u8 ci, available, ithresh, vbit;
+#ifdef CONFIG_FSL_DPAA_CHECKING
+       u32 busy;
+       enum bm_rcr_pmode pmode;
+       enum bm_rcr_cmode cmode;
+#endif
+};
+
+/* MC (Management Command) command */
+struct bm_mc_command {
+       u8 _ncw_verb; /* writes to this are non-coherent */
+       u8 bpid; /* used by acquire command */
+       u8 __reserved[62];
+};
+#define BM_MCC_VERB_VBIT               0x80
+#define BM_MCC_VERB_CMD_MASK           0x70    /* where the verb contains; */
+#define BM_MCC_VERB_CMD_ACQUIRE                0x10
+#define BM_MCC_VERB_CMD_QUERY          0x40
+#define BM_MCC_VERB_ACQUIRE_BUFCOUNT   0x0f    /* values 1..8 go here */
+
+/* MC result, Acquire and Query Response */
+union bm_mc_result {
+       struct {
+               u8 verb;
+               u8 bpid;
+               u8 __reserved[62];
+       };
+       struct bm_buffer bufs[8];
+};
+#define BM_MCR_VERB_VBIT               0x80
+#define BM_MCR_VERB_CMD_MASK           BM_MCC_VERB_CMD_MASK
+#define BM_MCR_VERB_CMD_ACQUIRE                BM_MCC_VERB_CMD_ACQUIRE
+#define BM_MCR_VERB_CMD_QUERY          BM_MCC_VERB_CMD_QUERY
+#define BM_MCR_VERB_CMD_ERR_INVALID    0x60
+#define BM_MCR_VERB_CMD_ERR_ECC                0x70
+#define BM_MCR_VERB_ACQUIRE_BUFCOUNT   BM_MCC_VERB_ACQUIRE_BUFCOUNT /* 0..8 */
+#define BM_MCR_TIMEOUT                 10000 /* us */
+
+struct bm_mc {
+       struct bm_mc_command *cr;
+       union bm_mc_result *rr;
+       u8 rridx, vbit;
+#ifdef CONFIG_FSL_DPAA_CHECKING
+       enum {
+               /* Can only be _mc_start()ed */
+               mc_idle,
+               /* Can only be _mc_commit()ed or _mc_abort()ed */
+               mc_user,
+               /* Can only be _mc_retry()ed */
+               mc_hw
+       } state;
+#endif
+};
+
+struct bm_addr {
+       void __iomem *ce;       /* cache-enabled */
+       void __iomem *ci;       /* cache-inhibited */
+};
+
+struct bm_portal {
+       struct bm_addr addr;
+       struct bm_rcr rcr;
+       struct bm_mc mc;
+} ____cacheline_aligned;
+
+/* Cache-inhibited register access. */
+static inline u32 bm_in(struct bm_portal *p, u32 offset)
+{
+       return __raw_readl(p->addr.ci + offset);
+}
+
+static inline void bm_out(struct bm_portal *p, u32 offset, u32 val)
+{
+       __raw_writel(val, p->addr.ci + offset);
+}
+
+/* Cache Enabled Portal Access */
+static inline void bm_cl_invalidate(struct bm_portal *p, u32 offset)
+{
+       dpaa_invalidate(p->addr.ce + offset);
+}
+
+static inline void bm_cl_touch_ro(struct bm_portal *p, u32 offset)
+{
+       dpaa_touch_ro(p->addr.ce + offset);
+}
+
+static inline u32 bm_ce_in(struct bm_portal *p, u32 offset)
+{
+       return __raw_readl(p->addr.ce + offset);
+}
+
+struct bman_portal {
+       struct bm_portal p;
+       /* interrupt sources processed by portal_isr(), configurable */
+       unsigned long irq_sources;
+       /* probing time config params for cpu-affine portals */
+       const struct bm_portal_config *config;
+       char irqname[MAX_IRQNAME];
+};
+
+static cpumask_t affine_mask;
+static DEFINE_SPINLOCK(affine_mask_lock);
+static DEFINE_PER_CPU(struct bman_portal, bman_affine_portal);
+
+static inline struct bman_portal *get_affine_portal(void)
+{
+       return &get_cpu_var(bman_affine_portal);
+}
+
+static inline void put_affine_portal(void)
+{
+       put_cpu_var(bman_affine_portal);
+}
+
+/*
+ * This object type refers to a pool, it isn't *the* pool. There may be
+ * more than one such object per BMan buffer pool, eg. if different users of the
+ * pool are operating via different portals.
+ */
+struct bman_pool {
+       /* index of the buffer pool to encapsulate (0-63) */
+       u32 bpid;
+       /* Used for hash-table admin when using depletion notifications. */
+       struct bman_portal *portal;
+       struct bman_pool *next;
+};
+
+static u32 poll_portal_slow(struct bman_portal *p, u32 is);
+
+static irqreturn_t portal_isr(int irq, void *ptr)
+{
+       struct bman_portal *p = ptr;
+       struct bm_portal *portal = &p->p;
+       u32 clear = p->irq_sources;
+       u32 is = bm_in(portal, BM_REG_ISR) & p->irq_sources;
+
+       if (unlikely(!is))
+               return IRQ_NONE;
+
+       clear |= poll_portal_slow(p, is);
+       bm_out(portal, BM_REG_ISR, clear);
+       return IRQ_HANDLED;
+}
+
+/* --- RCR API --- */
+
+#define RCR_SHIFT      ilog2(sizeof(struct bm_rcr_entry))
+#define RCR_CARRY      (uintptr_t)(BM_RCR_SIZE << RCR_SHIFT)
+
+/* Bit-wise logic to wrap a ring pointer by clearing the "carry bit" */
+static struct bm_rcr_entry *rcr_carryclear(struct bm_rcr_entry *p)
+{
+       uintptr_t addr = (uintptr_t)p;
+
+       addr &= ~RCR_CARRY;
+
+       return (struct bm_rcr_entry *)addr;
+}
+
+#ifdef CONFIG_FSL_DPAA_CHECKING
+/* Bit-wise logic to convert a ring pointer to a ring index */
+static int rcr_ptr2idx(struct bm_rcr_entry *e)
+{
+       return ((uintptr_t)e >> RCR_SHIFT) & (BM_RCR_SIZE - 1);
+}
+#endif
+
+/* Increment the 'cursor' ring pointer, taking 'vbit' into account */
+static inline void rcr_inc(struct bm_rcr *rcr)
+{
+       /* increment to the next RCR pointer and handle overflow and 'vbit' */
+       struct bm_rcr_entry *partial = rcr->cursor + 1;
+
+       rcr->cursor = rcr_carryclear(partial);
+       if (partial != rcr->cursor)
+               rcr->vbit ^= BM_RCR_VERB_VBIT;
+}
+
+static int bm_rcr_get_avail(struct bm_portal *portal)
+{
+       struct bm_rcr *rcr = &portal->rcr;
+
+       return rcr->available;
+}
+
+static int bm_rcr_get_fill(struct bm_portal *portal)
+{
+       struct bm_rcr *rcr = &portal->rcr;
+
+       return BM_RCR_SIZE - 1 - rcr->available;
+}
+
+static void bm_rcr_set_ithresh(struct bm_portal *portal, u8 ithresh)
+{
+       struct bm_rcr *rcr = &portal->rcr;
+
+       rcr->ithresh = ithresh;
+       bm_out(portal, BM_REG_RCR_ITR, ithresh);
+}
+
+static void bm_rcr_cce_prefetch(struct bm_portal *portal)
+{
+       __maybe_unused struct bm_rcr *rcr = &portal->rcr;
+
+       DPAA_ASSERT(rcr->cmode == bm_rcr_cce);
+       bm_cl_touch_ro(portal, BM_CL_RCR_CI_CENA);
+}
+
+static u8 bm_rcr_cce_update(struct bm_portal *portal)
+{
+       struct bm_rcr *rcr = &portal->rcr;
+       u8 diff, old_ci = rcr->ci;
+
+       DPAA_ASSERT(rcr->cmode == bm_rcr_cce);
+       rcr->ci = bm_ce_in(portal, BM_CL_RCR_CI_CENA) & (BM_RCR_SIZE - 1);
+       bm_cl_invalidate(portal, BM_CL_RCR_CI_CENA);
+       diff = dpaa_cyc_diff(BM_RCR_SIZE, old_ci, rcr->ci);
+       rcr->available += diff;
+       return diff;
+}
+
+static inline struct bm_rcr_entry *bm_rcr_start(struct bm_portal *portal)
+{
+       struct bm_rcr *rcr = &portal->rcr;
+
+       DPAA_ASSERT(!rcr->busy);
+       if (!rcr->available)
+               return NULL;
+#ifdef CONFIG_FSL_DPAA_CHECKING
+       rcr->busy = 1;
+#endif
+       dpaa_zero(rcr->cursor);
+       return rcr->cursor;
+}
+
+static inline void bm_rcr_pvb_commit(struct bm_portal *portal, u8 myverb)
+{
+       struct bm_rcr *rcr = &portal->rcr;
+       struct bm_rcr_entry *rcursor;
+
+       DPAA_ASSERT(rcr->busy);
+       DPAA_ASSERT(rcr->pmode == bm_rcr_pvb);
+       DPAA_ASSERT(rcr->available >= 1);
+       dma_wmb();
+       rcursor = rcr->cursor;
+       rcursor->_ncw_verb = myverb | rcr->vbit;
+       dpaa_flush(rcursor);
+       rcr_inc(rcr);
+       rcr->available--;
+#ifdef CONFIG_FSL_DPAA_CHECKING
+       rcr->busy = 0;
+#endif
+}
+
+static int bm_rcr_init(struct bm_portal *portal, enum bm_rcr_pmode pmode,
+                      enum bm_rcr_cmode cmode)
+{
+       struct bm_rcr *rcr = &portal->rcr;
+       u32 cfg;
+       u8 pi;
+
+       rcr->ring = portal->addr.ce + BM_CL_RCR;
+       rcr->ci = bm_in(portal, BM_REG_RCR_CI_CINH) & (BM_RCR_SIZE - 1);
+       pi = bm_in(portal, BM_REG_RCR_PI_CINH) & (BM_RCR_SIZE - 1);
+       rcr->cursor = rcr->ring + pi;
+       rcr->vbit = (bm_in(portal, BM_REG_RCR_PI_CINH) & BM_RCR_SIZE) ?
+               BM_RCR_VERB_VBIT : 0;
+       rcr->available = BM_RCR_SIZE - 1
+               - dpaa_cyc_diff(BM_RCR_SIZE, rcr->ci, pi);
+       rcr->ithresh = bm_in(portal, BM_REG_RCR_ITR);
+#ifdef CONFIG_FSL_DPAA_CHECKING
+       rcr->busy = 0;
+       rcr->pmode = pmode;
+       rcr->cmode = cmode;
+#endif
+       cfg = (bm_in(portal, BM_REG_CFG) & 0xffffffe0)
+               | (pmode & 0x3); /* BCSP_CFG::RPM */
+       bm_out(portal, BM_REG_CFG, cfg);
+       return 0;
+}
+
+static void bm_rcr_finish(struct bm_portal *portal)
+{
+#ifdef CONFIG_FSL_DPAA_CHECKING
+       struct bm_rcr *rcr = &portal->rcr;
+       int i;
+
+       DPAA_ASSERT(!rcr->busy);
+
+       i = bm_in(portal, BM_REG_RCR_PI_CINH) & (BM_RCR_SIZE - 1);
+       if (i != rcr_ptr2idx(rcr->cursor))
+               pr_crit("losing uncommited RCR entries\n");
+
+       i = bm_in(portal, BM_REG_RCR_CI_CINH) & (BM_RCR_SIZE - 1);
+       if (i != rcr->ci)
+               pr_crit("missing existing RCR completions\n");
+       if (rcr->ci != rcr_ptr2idx(rcr->cursor))
+               pr_crit("RCR destroyed unquiesced\n");
+#endif
+}
+
+/* --- Management command API --- */
+static int bm_mc_init(struct bm_portal *portal)
+{
+       struct bm_mc *mc = &portal->mc;
+
+       mc->cr = portal->addr.ce + BM_CL_CR;
+       mc->rr = portal->addr.ce + BM_CL_RR0;
+       mc->rridx = (__raw_readb(&mc->cr->_ncw_verb) & BM_MCC_VERB_VBIT) ?
+                   0 : 1;
+       mc->vbit = mc->rridx ? BM_MCC_VERB_VBIT : 0;
+#ifdef CONFIG_FSL_DPAA_CHECKING
+       mc->state = mc_idle;
+#endif
+       return 0;
+}
+
+static void bm_mc_finish(struct bm_portal *portal)
+{
+#ifdef CONFIG_FSL_DPAA_CHECKING
+       struct bm_mc *mc = &portal->mc;
+
+       DPAA_ASSERT(mc->state == mc_idle);
+       if (mc->state != mc_idle)
+               pr_crit("Losing incomplete MC command\n");
+#endif
+}
+
+static inline struct bm_mc_command *bm_mc_start(struct bm_portal *portal)
+{
+       struct bm_mc *mc = &portal->mc;
+
+       DPAA_ASSERT(mc->state == mc_idle);
+#ifdef CONFIG_FSL_DPAA_CHECKING
+       mc->state = mc_user;
+#endif
+       dpaa_zero(mc->cr);
+       return mc->cr;
+}
+
+static inline void bm_mc_commit(struct bm_portal *portal, u8 myverb)
+{
+       struct bm_mc *mc = &portal->mc;
+       union bm_mc_result *rr = mc->rr + mc->rridx;
+
+       DPAA_ASSERT(mc->state == mc_user);
+       dma_wmb();
+       mc->cr->_ncw_verb = myverb | mc->vbit;
+       dpaa_flush(mc->cr);
+       dpaa_invalidate_touch_ro(rr);
+#ifdef CONFIG_FSL_DPAA_CHECKING
+       mc->state = mc_hw;
+#endif
+}
+
+static inline union bm_mc_result *bm_mc_result(struct bm_portal *portal)
+{
+       struct bm_mc *mc = &portal->mc;
+       union bm_mc_result *rr = mc->rr + mc->rridx;
+
+       DPAA_ASSERT(mc->state == mc_hw);
+       /*
+        * The inactive response register's verb byte always returns zero until
+        * its command is submitted and completed. This includes the valid-bit,
+        * in case you were wondering...
+        */
+       if (!__raw_readb(&rr->verb)) {
+               dpaa_invalidate_touch_ro(rr);
+               return NULL;
+       }
+       mc->rridx ^= 1;
+       mc->vbit ^= BM_MCC_VERB_VBIT;
+#ifdef CONFIG_FSL_DPAA_CHECKING
+       mc->state = mc_idle;
+#endif
+       return rr;
+}
+
+static inline int bm_mc_result_timeout(struct bm_portal *portal,
+                                      union bm_mc_result **mcr)
+{
+       int timeout = BM_MCR_TIMEOUT;
+
+       do {
+               *mcr = bm_mc_result(portal);
+               if (*mcr)
+                       break;
+               udelay(1);
+       } while (--timeout);
+
+       return timeout;
+}
+
+/* Disable all BSCN interrupts for the portal */
+static void bm_isr_bscn_disable(struct bm_portal *portal)
+{
+       bm_out(portal, BM_REG_SCN(0), 0);
+       bm_out(portal, BM_REG_SCN(1), 0);
+}
+
+static int bman_create_portal(struct bman_portal *portal,
+                             const struct bm_portal_config *c)
+{
+       struct bm_portal *p;
+       int ret;
+
+       p = &portal->p;
+       /*
+        * prep the low-level portal struct with the mapped addresses from the
+        * config, everything that follows depends on it and "config" is more
+        * for (de)reference...
+        */
+       p->addr.ce = c->addr_virt[DPAA_PORTAL_CE];
+       p->addr.ci = c->addr_virt[DPAA_PORTAL_CI];
+       if (bm_rcr_init(p, bm_rcr_pvb, bm_rcr_cce)) {
+               dev_err(c->dev, "RCR initialisation failed\n");
+               goto fail_rcr;
+       }
+       if (bm_mc_init(p)) {
+               dev_err(c->dev, "MC initialisation failed\n");
+               goto fail_mc;
+       }
+       /*
+        * Default to all BPIDs disabled, we enable as required at
+        * run-time.
+        */
+       bm_isr_bscn_disable(p);
+
+       /* Write-to-clear any stale interrupt status bits */
+       bm_out(p, BM_REG_ISDR, 0xffffffff);
+       portal->irq_sources = 0;
+       bm_out(p, BM_REG_IER, 0);
+       bm_out(p, BM_REG_ISR, 0xffffffff);
+       snprintf(portal->irqname, MAX_IRQNAME, IRQNAME, c->cpu);
+       if (request_irq(c->irq, portal_isr, 0, portal->irqname, portal)) {
+               dev_err(c->dev, "request_irq() failed\n");
+               goto fail_irq;
+       }
+       if (c->cpu != -1 && irq_can_set_affinity(c->irq) &&
+           irq_set_affinity(c->irq, cpumask_of(c->cpu))) {
+               dev_err(c->dev, "irq_set_affinity() failed\n");
+               goto fail_affinity;
+       }
+
+       /* Need RCR to be empty before continuing */
+       ret = bm_rcr_get_fill(p);
+       if (ret) {
+               dev_err(c->dev, "RCR unclean\n");
+               goto fail_rcr_empty;
+       }
+       /* Success */
+       portal->config = c;
+
+       bm_out(p, BM_REG_ISDR, 0);
+       bm_out(p, BM_REG_IIR, 0);
+
+       return 0;
+
+fail_rcr_empty:
+fail_affinity:
+       free_irq(c->irq, portal);
+fail_irq:
+       bm_mc_finish(p);
+fail_mc:
+       bm_rcr_finish(p);
+fail_rcr:
+       return -EIO;
+}
+
+struct bman_portal *bman_create_affine_portal(const struct bm_portal_config *c)
+{
+       struct bman_portal *portal;
+       int err;
+
+       portal = &per_cpu(bman_affine_portal, c->cpu);
+       err = bman_create_portal(portal, c);
+       if (err)
+               return NULL;
+
+       spin_lock(&affine_mask_lock);
+       cpumask_set_cpu(c->cpu, &affine_mask);
+       spin_unlock(&affine_mask_lock);
+
+       return portal;
+}
+
+static u32 poll_portal_slow(struct bman_portal *p, u32 is)
+{
+       u32 ret = is;
+
+       if (is & BM_PIRQ_RCRI) {
+               bm_rcr_cce_update(&p->p);
+               bm_rcr_set_ithresh(&p->p, 0);
+               bm_out(&p->p, BM_REG_ISR, BM_PIRQ_RCRI);
+               is &= ~BM_PIRQ_RCRI;
+       }
+
+       /* There should be no status register bits left undefined */
+       DPAA_ASSERT(!is);
+       return ret;
+}
+
+int bman_p_irqsource_add(struct bman_portal *p, u32 bits)
+{
+       unsigned long irqflags;
+
+       local_irq_save(irqflags);
+       set_bits(bits & BM_PIRQ_VISIBLE, &p->irq_sources);
+       bm_out(&p->p, BM_REG_IER, p->irq_sources);
+       local_irq_restore(irqflags);
+       return 0;
+}
+
+static int bm_shutdown_pool(u32 bpid)
+{
+       struct bm_mc_command *bm_cmd;
+       union bm_mc_result *bm_res;
+
+       while (1) {
+               struct bman_portal *p = get_affine_portal();
+               /* Acquire buffers until empty */
+               bm_cmd = bm_mc_start(&p->p);
+               bm_cmd->bpid = bpid;
+               bm_mc_commit(&p->p, BM_MCC_VERB_CMD_ACQUIRE | 1);
+               if (!bm_mc_result_timeout(&p->p, &bm_res)) {
+                       put_affine_portal();
+                       pr_crit("BMan Acquire Command timedout\n");
+                       return -ETIMEDOUT;
+               }
+               if (!(bm_res->verb & BM_MCR_VERB_ACQUIRE_BUFCOUNT)) {
+                       put_affine_portal();
+                       /* Pool is empty */
+                       return 0;
+               }
+               put_affine_portal();
+       }
+
+       return 0;
+}
+
+struct gen_pool *bm_bpalloc;
+
+static int bm_alloc_bpid_range(u32 *result, u32 count)
+{
+       unsigned long addr;
+
+       addr = gen_pool_alloc(bm_bpalloc, count);
+       if (!addr)
+               return -ENOMEM;
+
+       *result = addr & ~DPAA_GENALLOC_OFF;
+
+       return 0;
+}
+
+static int bm_release_bpid(u32 bpid)
+{
+       int ret;
+
+       ret = bm_shutdown_pool(bpid);
+       if (ret) {
+               pr_debug("BPID %d leaked\n", bpid);
+               return ret;
+       }
+
+       gen_pool_free(bm_bpalloc, bpid | DPAA_GENALLOC_OFF, 1);
+       return 0;
+}
+
+struct bman_pool *bman_new_pool(void)
+{
+       struct bman_pool *pool = NULL;
+       u32 bpid;
+
+       if (bm_alloc_bpid_range(&bpid, 1))
+               return NULL;
+
+       pool = kmalloc(sizeof(*pool), GFP_KERNEL);
+       if (!pool)
+               goto err;
+
+       pool->bpid = bpid;
+
+       return pool;
+err:
+       bm_release_bpid(bpid);
+       kfree(pool);
+       return NULL;
+}
+EXPORT_SYMBOL(bman_new_pool);
+
+void bman_free_pool(struct bman_pool *pool)
+{
+       bm_release_bpid(pool->bpid);
+
+       kfree(pool);
+}
+EXPORT_SYMBOL(bman_free_pool);
+
+int bman_get_bpid(const struct bman_pool *pool)
+{
+       return pool->bpid;
+}
+EXPORT_SYMBOL(bman_get_bpid);
+
+static void update_rcr_ci(struct bman_portal *p, int avail)
+{
+       if (avail)
+               bm_rcr_cce_prefetch(&p->p);
+       else
+               bm_rcr_cce_update(&p->p);
+}
+
+int bman_release(struct bman_pool *pool, const struct bm_buffer *bufs, u8 num)
+{
+       struct bman_portal *p;
+       struct bm_rcr_entry *r;
+       unsigned long irqflags;
+       int avail, timeout = 1000; /* 1ms */
+       int i = num - 1;
+
+       DPAA_ASSERT(num > 0 && num <= 8);
+
+       do {
+               p = get_affine_portal();
+               local_irq_save(irqflags);
+               avail = bm_rcr_get_avail(&p->p);
+               if (avail < 2)
+                       update_rcr_ci(p, avail);
+               r = bm_rcr_start(&p->p);
+               local_irq_restore(irqflags);
+               put_affine_portal();
+               if (likely(r))
+                       break;
+
+               udelay(1);
+       } while (--timeout);
+
+       if (unlikely(!timeout))
+               return -ETIMEDOUT;
+
+       p = get_affine_portal();
+       local_irq_save(irqflags);
+       /*
+        * we can copy all but the first entry, as this can trigger badness
+        * with the valid-bit
+        */
+       bm_buffer_set64(r->bufs, bm_buffer_get64(bufs));
+       bm_buffer_set_bpid(r->bufs, pool->bpid);
+       if (i)
+               memcpy(&r->bufs[1], &bufs[1], i * sizeof(bufs[0]));
+
+       bm_rcr_pvb_commit(&p->p, BM_RCR_VERB_CMD_BPID_SINGLE |
+                         (num & BM_RCR_VERB_BUFCOUNT_MASK));
+
+       local_irq_restore(irqflags);
+       put_affine_portal();
+       return 0;
+}
+EXPORT_SYMBOL(bman_release);
+
+int bman_acquire(struct bman_pool *pool, struct bm_buffer *bufs, u8 num)
+{
+       struct bman_portal *p = get_affine_portal();
+       struct bm_mc_command *mcc;
+       union bm_mc_result *mcr;
+       int ret;
+
+       DPAA_ASSERT(num > 0 && num <= 8);
+
+       mcc = bm_mc_start(&p->p);
+       mcc->bpid = pool->bpid;
+       bm_mc_commit(&p->p, BM_MCC_VERB_CMD_ACQUIRE |
+                    (num & BM_MCC_VERB_ACQUIRE_BUFCOUNT));
+       if (!bm_mc_result_timeout(&p->p, &mcr)) {
+               put_affine_portal();
+               pr_crit("BMan Acquire Timeout\n");
+               return -ETIMEDOUT;
+       }
+       ret = mcr->verb & BM_MCR_VERB_ACQUIRE_BUFCOUNT;
+       if (bufs)
+               memcpy(&bufs[0], &mcr->bufs[0], num * sizeof(bufs[0]));
+
+       put_affine_portal();
+       if (ret != num)
+               ret = -ENOMEM;
+       return ret;
+}
+EXPORT_SYMBOL(bman_acquire);
+
+const struct bm_portal_config *
+bman_get_bm_portal_config(const struct bman_portal *portal)
+{
+       return portal->config;
+}
diff --git a/drivers/soc/fsl/qbman/bman_ccsr.c b/drivers/soc/fsl/qbman/bman_ccsr.c
new file mode 100644 (file)
index 0000000..9deb052
--- /dev/null
@@ -0,0 +1,263 @@
+/* Copyright (c) 2009 - 2016 Freescale Semiconductor, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *      names of its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written permission.
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "bman_priv.h"
+
+u16 bman_ip_rev;
+EXPORT_SYMBOL(bman_ip_rev);
+
+/* Register offsets */
+#define REG_FBPR_FPC           0x0800
+#define REG_ECSR               0x0a00
+#define REG_ECIR               0x0a04
+#define REG_EADR               0x0a08
+#define REG_EDATA(n)           (0x0a10 + ((n) * 0x04))
+#define REG_SBEC(n)            (0x0a80 + ((n) * 0x04))
+#define REG_IP_REV_1           0x0bf8
+#define REG_IP_REV_2           0x0bfc
+#define REG_FBPR_BARE          0x0c00
+#define REG_FBPR_BAR           0x0c04
+#define REG_FBPR_AR            0x0c10
+#define REG_SRCIDR             0x0d04
+#define REG_LIODNR             0x0d08
+#define REG_ERR_ISR            0x0e00
+#define REG_ERR_IER            0x0e04
+#define REG_ERR_ISDR           0x0e08
+
+/* Used by all error interrupt registers except 'inhibit' */
+#define BM_EIRQ_IVCI   0x00000010      /* Invalid Command Verb */
+#define BM_EIRQ_FLWI   0x00000008      /* FBPR Low Watermark */
+#define BM_EIRQ_MBEI   0x00000004      /* Multi-bit ECC Error */
+#define BM_EIRQ_SBEI   0x00000002      /* Single-bit ECC Error */
+#define BM_EIRQ_BSCN   0x00000001      /* pool State Change Notification */
+
+struct bman_hwerr_txt {
+       u32 mask;
+       const char *txt;
+};
+
+static const struct bman_hwerr_txt bman_hwerr_txts[] = {
+       { BM_EIRQ_IVCI, "Invalid Command Verb" },
+       { BM_EIRQ_FLWI, "FBPR Low Watermark" },
+       { BM_EIRQ_MBEI, "Multi-bit ECC Error" },
+       { BM_EIRQ_SBEI, "Single-bit ECC Error" },
+       { BM_EIRQ_BSCN, "Pool State Change Notification" },
+};
+
+/* Only trigger low water mark interrupt once only */
+#define BMAN_ERRS_TO_DISABLE BM_EIRQ_FLWI
+
+/* Pointer to the start of the BMan's CCSR space */
+static u32 __iomem *bm_ccsr_start;
+
+static inline u32 bm_ccsr_in(u32 offset)
+{
+       return ioread32be(bm_ccsr_start + offset/4);
+}
+static inline void bm_ccsr_out(u32 offset, u32 val)
+{
+       iowrite32be(val, bm_ccsr_start + offset/4);
+}
+
+static void bm_get_version(u16 *id, u8 *major, u8 *minor)
+{
+       u32 v = bm_ccsr_in(REG_IP_REV_1);
+       *id = (v >> 16);
+       *major = (v >> 8) & 0xff;
+       *minor = v & 0xff;
+}
+
+/* signal transactions for FBPRs with higher priority */
+#define FBPR_AR_RPRIO_HI BIT(30)
+
+static void bm_set_memory(u64 ba, u32 size)
+{
+       u32 exp = ilog2(size);
+       /* choke if size isn't within range */
+       DPAA_ASSERT(size >= 4096 && size <= 1024*1024*1024 &&
+                  is_power_of_2(size));
+       /* choke if '[e]ba' has lower-alignment than 'size' */
+       DPAA_ASSERT(!(ba & (size - 1)));
+       bm_ccsr_out(REG_FBPR_BARE, upper_32_bits(ba));
+       bm_ccsr_out(REG_FBPR_BAR, lower_32_bits(ba));
+       bm_ccsr_out(REG_FBPR_AR, exp - 1);
+}
+
+/*
+ * Location and size of BMan private memory
+ *
+ * Ideally we would use the DMA API to turn rmem->base into a DMA address
+ * (especially if iommu translations ever get involved).  Unfortunately, the
+ * DMA API currently does not allow mapping anything that is not backed with
+ * a struct page.
+ */
+static dma_addr_t fbpr_a;
+static size_t fbpr_sz;
+
+static int bman_fbpr(struct reserved_mem *rmem)
+{
+       fbpr_a = rmem->base;
+       fbpr_sz = rmem->size;
+
+       WARN_ON(!(fbpr_a && fbpr_sz));
+
+       return 0;
+}
+RESERVEDMEM_OF_DECLARE(bman_fbpr, "fsl,bman-fbpr", bman_fbpr);
+
+static irqreturn_t bman_isr(int irq, void *ptr)
+{
+       u32 isr_val, ier_val, ecsr_val, isr_mask, i;
+       struct device *dev = ptr;
+
+       ier_val = bm_ccsr_in(REG_ERR_IER);
+       isr_val = bm_ccsr_in(REG_ERR_ISR);
+       ecsr_val = bm_ccsr_in(REG_ECSR);
+       isr_mask = isr_val & ier_val;
+
+       if (!isr_mask)
+               return IRQ_NONE;
+
+       for (i = 0; i < ARRAY_SIZE(bman_hwerr_txts); i++) {
+               if (bman_hwerr_txts[i].mask & isr_mask) {
+                       dev_err_ratelimited(dev, "ErrInt: %s\n",
+                                           bman_hwerr_txts[i].txt);
+                       if (bman_hwerr_txts[i].mask & ecsr_val) {
+                               /* Re-arm error capture registers */
+                               bm_ccsr_out(REG_ECSR, ecsr_val);
+                       }
+                       if (bman_hwerr_txts[i].mask & BMAN_ERRS_TO_DISABLE) {
+                               dev_dbg(dev, "Disabling error 0x%x\n",
+                                       bman_hwerr_txts[i].mask);
+                               ier_val &= ~bman_hwerr_txts[i].mask;
+                               bm_ccsr_out(REG_ERR_IER, ier_val);
+                       }
+               }
+       }
+       bm_ccsr_out(REG_ERR_ISR, isr_val);
+
+       return IRQ_HANDLED;
+}
+
+static int fsl_bman_probe(struct platform_device *pdev)
+{
+       int ret, err_irq;
+       struct device *dev = &pdev->dev;
+       struct device_node *node = dev->of_node;
+       struct resource *res;
+       u16 id, bm_pool_cnt;
+       u8 major, minor;
+
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       if (!res) {
+               dev_err(dev, "Can't get %s property 'IORESOURCE_MEM'\n",
+                       node->full_name);
+               return -ENXIO;
+       }
+       bm_ccsr_start = devm_ioremap(dev, res->start,
+                                    res->end - res->start + 1);
+       if (!bm_ccsr_start)
+               return -ENXIO;
+
+       bm_get_version(&id, &major, &minor);
+       if (major == 1 && minor == 0) {
+               bman_ip_rev = BMAN_REV10;
+               bm_pool_cnt = BM_POOL_MAX;
+       } else if (major == 2 && minor == 0) {
+               bman_ip_rev = BMAN_REV20;
+               bm_pool_cnt = 8;
+       } else if (major == 2 && minor == 1) {
+               bman_ip_rev = BMAN_REV21;
+               bm_pool_cnt = BM_POOL_MAX;
+       } else {
+               dev_err(dev, "Unknown Bman version:%04x,%02x,%02x\n",
+                       id, major, minor);
+               return -ENODEV;
+       }
+
+       bm_set_memory(fbpr_a, fbpr_sz);
+
+       err_irq = platform_get_irq(pdev, 0);
+       if (err_irq <= 0) {
+               dev_info(dev, "Can't get %s IRQ\n", node->full_name);
+               return -ENODEV;
+       }
+       ret = devm_request_irq(dev, err_irq, bman_isr, IRQF_SHARED, "bman-err",
+                              dev);
+       if (ret)  {
+               dev_err(dev, "devm_request_irq() failed %d for '%s'\n",
+                       ret, node->full_name);
+               return ret;
+       }
+       /* Disable Buffer Pool State Change */
+       bm_ccsr_out(REG_ERR_ISDR, BM_EIRQ_BSCN);
+       /*
+        * Write-to-clear any stale bits, (eg. starvation being asserted prior
+        * to resource allocation during driver init).
+        */
+       bm_ccsr_out(REG_ERR_ISR, 0xffffffff);
+       /* Enable Error Interrupts */
+       bm_ccsr_out(REG_ERR_IER, 0xffffffff);
+
+       bm_bpalloc = devm_gen_pool_create(dev, 0, -1, "bman-bpalloc");
+       if (IS_ERR(bm_bpalloc)) {
+               ret = PTR_ERR(bm_bpalloc);
+               dev_err(dev, "bman-bpalloc pool init failed (%d)\n", ret);
+               return ret;
+       }
+
+       /* seed BMan resource pool */
+       ret = gen_pool_add(bm_bpalloc, DPAA_GENALLOC_OFF, bm_pool_cnt, -1);
+       if (ret) {
+               dev_err(dev, "Failed to seed BPID range [%d..%d] (%d)\n",
+                       0, bm_pool_cnt - 1, ret);
+               return ret;
+       }
+
+       return 0;
+};
+
+static const struct of_device_id fsl_bman_ids[] = {
+       {
+               .compatible = "fsl,bman",
+       },
+       {}
+};
+
+static struct platform_driver fsl_bman_driver = {
+       .driver = {
+               .name = KBUILD_MODNAME,
+               .of_match_table = fsl_bman_ids,
+               .suppress_bind_attrs = true,
+       },
+       .probe = fsl_bman_probe,
+};
+
+builtin_platform_driver(fsl_bman_driver);
diff --git a/drivers/soc/fsl/qbman/bman_portal.c b/drivers/soc/fsl/qbman/bman_portal.c
new file mode 100644 (file)
index 0000000..6579cc1
--- /dev/null
@@ -0,0 +1,219 @@
+/* Copyright 2008 - 2016 Freescale Semiconductor, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *      names of its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written permission.
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "bman_priv.h"
+
+static struct bman_portal *affine_bportals[NR_CPUS];
+static struct cpumask portal_cpus;
+/* protect bman global registers and global data shared among portals */
+static DEFINE_SPINLOCK(bman_lock);
+
+static struct bman_portal *init_pcfg(struct bm_portal_config *pcfg)
+{
+       struct bman_portal *p = bman_create_affine_portal(pcfg);
+
+       if (!p) {
+               dev_crit(pcfg->dev, "%s: Portal failure on cpu %d\n",
+                        __func__, pcfg->cpu);
+               return NULL;
+       }
+
+       bman_p_irqsource_add(p, BM_PIRQ_RCRI);
+       affine_bportals[pcfg->cpu] = p;
+
+       dev_info(pcfg->dev, "Portal initialised, cpu %d\n", pcfg->cpu);
+
+       return p;
+}
+
+static void bman_offline_cpu(unsigned int cpu)
+{
+       struct bman_portal *p = affine_bportals[cpu];
+       const struct bm_portal_config *pcfg;
+
+       if (!p)
+               return;
+
+       pcfg = bman_get_bm_portal_config(p);
+       if (!pcfg)
+               return;
+
+       irq_set_affinity(pcfg->irq, cpumask_of(0));
+}
+
+static void bman_online_cpu(unsigned int cpu)
+{
+       struct bman_portal *p = affine_bportals[cpu];
+       const struct bm_portal_config *pcfg;
+
+       if (!p)
+               return;
+
+       pcfg = bman_get_bm_portal_config(p);
+       if (!pcfg)
+               return;
+
+       irq_set_affinity(pcfg->irq, cpumask_of(cpu));
+}
+
+static int bman_hotplug_cpu_callback(struct notifier_block *nfb,
+                                    unsigned long action, void *hcpu)
+{
+       unsigned int cpu = (unsigned long)hcpu;
+
+       switch (action) {
+       case CPU_ONLINE:
+       case CPU_ONLINE_FROZEN:
+               bman_online_cpu(cpu);
+               break;
+       case CPU_DOWN_PREPARE:
+       case CPU_DOWN_PREPARE_FROZEN:
+               bman_offline_cpu(cpu);
+       }
+
+       return NOTIFY_OK;
+}
+
+static struct notifier_block bman_hotplug_cpu_notifier = {
+       .notifier_call = bman_hotplug_cpu_callback,
+};
+
+static int bman_portal_probe(struct platform_device *pdev)
+{
+       struct device *dev = &pdev->dev;
+       struct device_node *node = dev->of_node;
+       struct bm_portal_config *pcfg;
+       struct resource *addr_phys[2];
+       void __iomem *va;
+       int irq, cpu;
+
+       pcfg = devm_kmalloc(dev, sizeof(*pcfg), GFP_KERNEL);
+       if (!pcfg)
+               return -ENOMEM;
+
+       pcfg->dev = dev;
+
+       addr_phys[0] = platform_get_resource(pdev, IORESOURCE_MEM,
+                                            DPAA_PORTAL_CE);
+       if (!addr_phys[0]) {
+               dev_err(dev, "Can't get %s property 'reg::CE'\n",
+                       node->full_name);
+               return -ENXIO;
+       }
+
+       addr_phys[1] = platform_get_resource(pdev, IORESOURCE_MEM,
+                                            DPAA_PORTAL_CI);
+       if (!addr_phys[1]) {
+               dev_err(dev, "Can't get %s property 'reg::CI'\n",
+                       node->full_name);
+               return -ENXIO;
+       }
+
+       pcfg->cpu = -1;
+
+       irq = platform_get_irq(pdev, 0);
+       if (irq <= 0) {
+               dev_err(dev, "Can't get %s IRQ'\n", node->full_name);
+               return -ENXIO;
+       }
+       pcfg->irq = irq;
+
+       va = ioremap_prot(addr_phys[0]->start, resource_size(addr_phys[0]), 0);
+       if (!va)
+               goto err_ioremap1;
+
+       pcfg->addr_virt[DPAA_PORTAL_CE] = va;
+
+       va = ioremap_prot(addr_phys[1]->start, resource_size(addr_phys[1]),
+                         _PAGE_GUARDED | _PAGE_NO_CACHE);
+       if (!va)
+               goto err_ioremap2;
+
+       pcfg->addr_virt[DPAA_PORTAL_CI] = va;
+
+       spin_lock(&bman_lock);
+       cpu = cpumask_next_zero(-1, &portal_cpus);
+       if (cpu >= nr_cpu_ids) {
+               /* unassigned portal, skip init */
+               spin_unlock(&bman_lock);
+               return 0;
+       }
+
+       cpumask_set_cpu(cpu, &portal_cpus);
+       spin_unlock(&bman_lock);
+       pcfg->cpu = cpu;
+
+       if (!init_pcfg(pcfg))
+               goto err_ioremap2;
+
+       /* clear irq affinity if assigned cpu is offline */
+       if (!cpu_online(cpu))
+               bman_offline_cpu(cpu);
+
+       return 0;
+
+err_ioremap2:
+       iounmap(pcfg->addr_virt[DPAA_PORTAL_CE]);
+err_ioremap1:
+       dev_err(dev, "ioremap failed\n");
+       return -ENXIO;
+}
+
+static const struct of_device_id bman_portal_ids[] = {
+       {
+               .compatible = "fsl,bman-portal",
+       },
+       {}
+};
+MODULE_DEVICE_TABLE(of, bman_portal_ids);
+
+static struct platform_driver bman_portal_driver = {
+       .driver = {
+               .name = KBUILD_MODNAME,
+               .of_match_table = bman_portal_ids,
+       },
+       .probe = bman_portal_probe,
+};
+
+static int __init bman_portal_driver_register(struct platform_driver *drv)
+{
+       int ret;
+
+       ret = platform_driver_register(drv);
+       if (ret < 0)
+               return ret;
+
+       register_hotcpu_notifier(&bman_hotplug_cpu_notifier);
+
+       return 0;
+}
+
+module_driver(bman_portal_driver,
+             bman_portal_driver_register, platform_driver_unregister);
diff --git a/drivers/soc/fsl/qbman/bman_priv.h b/drivers/soc/fsl/qbman/bman_priv.h
new file mode 100644 (file)
index 0000000..f6896a2
--- /dev/null
@@ -0,0 +1,80 @@
+/* Copyright 2008 - 2016 Freescale Semiconductor, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *      names of its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written permission.
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include "dpaa_sys.h"
+
+#include <soc/fsl/bman.h>
+
+/* Portal processing (interrupt) sources */
+#define BM_PIRQ_RCRI   0x00000002      /* RCR Ring (below threshold) */
+
+/* Revision info (for errata and feature handling) */
+#define BMAN_REV10 0x0100
+#define BMAN_REV20 0x0200
+#define BMAN_REV21 0x0201
+extern u16 bman_ip_rev;        /* 0 if uninitialised, otherwise BMAN_REVx */
+
+extern struct gen_pool *bm_bpalloc;
+
+struct bm_portal_config {
+       /*
+        * Corenet portal addresses;
+        * [0]==cache-enabled, [1]==cache-inhibited.
+        */
+       void __iomem *addr_virt[2];
+       /* Allow these to be joined in lists */
+       struct list_head list;
+       struct device *dev;
+       /* User-visible portal configuration settings */
+       /* portal is affined to this cpu */
+       int cpu;
+       /* portal interrupt line */
+       int irq;
+};
+
+struct bman_portal *bman_create_affine_portal(
+                       const struct bm_portal_config *config);
+/*
+ * The below bman_p_***() variant might be called in a situation that the cpu
+ * which the portal affine to is not online yet.
+ * @bman_portal specifies which portal the API will use.
+ */
+int bman_p_irqsource_add(struct bman_portal *p, u32 bits);
+
+/*
+ * Used by all portal interrupt registers except 'inhibit'
+ * This mask contains all the "irqsource" bits visible to API users
+ */
+#define BM_PIRQ_VISIBLE        BM_PIRQ_RCRI
+
+const struct bm_portal_config *
+bman_get_bm_portal_config(const struct bman_portal *portal);
diff --git a/drivers/soc/fsl/qbman/bman_test.c b/drivers/soc/fsl/qbman/bman_test.c
new file mode 100644 (file)
index 0000000..09b1c96
--- /dev/null
@@ -0,0 +1,53 @@
+/* Copyright 2008 - 2016 Freescale Semiconductor, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *      names of its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written permission.
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "bman_test.h"
+
+MODULE_AUTHOR("Geoff Thorpe");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("BMan testing");
+
+static int test_init(void)
+{
+#ifdef CONFIG_FSL_BMAN_TEST_API
+       int loop = 1;
+
+       while (loop--)
+               bman_test_api();
+#endif
+       return 0;
+}
+
+static void test_exit(void)
+{
+}
+
+module_init(test_init);
+module_exit(test_exit);
diff --git a/drivers/soc/fsl/qbman/bman_test.h b/drivers/soc/fsl/qbman/bman_test.h
new file mode 100644 (file)
index 0000000..037ed34
--- /dev/null
@@ -0,0 +1,35 @@
+/* Copyright 2008 - 2016 Freescale Semiconductor, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *      names of its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written permission.
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "bman_priv.h"
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+void bman_test_api(void);
diff --git a/drivers/soc/fsl/qbman/bman_test_api.c b/drivers/soc/fsl/qbman/bman_test_api.c
new file mode 100644 (file)
index 0000000..6f6bdd1
--- /dev/null
@@ -0,0 +1,151 @@
+/* Copyright 2008 - 2016 Freescale Semiconductor, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *      names of its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written permission.
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "bman_test.h"
+
+#define NUM_BUFS       93
+#define LOOPS          3
+#define BMAN_TOKEN_MASK 0x00FFFFFFFFFFLLU
+
+static struct bman_pool *pool;
+static struct bm_buffer bufs_in[NUM_BUFS] ____cacheline_aligned;
+static struct bm_buffer bufs_out[NUM_BUFS] ____cacheline_aligned;
+static int bufs_received;
+
+static void bufs_init(void)
+{
+       int i;
+
+       for (i = 0; i < NUM_BUFS; i++)
+               bm_buffer_set64(&bufs_in[i], 0xfedc01234567LLU * i);
+       bufs_received = 0;
+}
+
+static inline int bufs_cmp(const struct bm_buffer *a, const struct bm_buffer *b)
+{
+       if (bman_ip_rev == BMAN_REV20 || bman_ip_rev == BMAN_REV21) {
+
+               /*
+                * On SoCs with BMan revison 2.0, BMan only respects the 40
+                * LS-bits of buffer addresses, masking off the upper 8-bits on
+                * release commands. The API provides for 48-bit addresses
+                * because some SoCs support all 48-bits. When generating
+                * garbage addresses for testing, we either need to zero the
+                * upper 8-bits when releasing to BMan (otherwise we'll be
+                * disappointed when the buffers we acquire back from BMan
+                * don't match), or we need to mask the upper 8-bits off when
+                * comparing. We do the latter.
+                */
+               if ((bm_buffer_get64(a) & BMAN_TOKEN_MASK) <
+                   (bm_buffer_get64(b) & BMAN_TOKEN_MASK))
+                       return -1;
+               if ((bm_buffer_get64(a) & BMAN_TOKEN_MASK) >
+                   (bm_buffer_get64(b) & BMAN_TOKEN_MASK))
+                       return 1;
+       } else {
+               if (bm_buffer_get64(a) < bm_buffer_get64(b))
+                       return -1;
+               if (bm_buffer_get64(a) > bm_buffer_get64(b))
+                       return 1;
+       }
+
+       return 0;
+}
+
+static void bufs_confirm(void)
+{
+       int i, j;
+
+       for (i = 0; i < NUM_BUFS; i++) {
+               int matches = 0;
+
+               for (j = 0; j < NUM_BUFS; j++)
+                       if (!bufs_cmp(&bufs_in[i], &bufs_out[j]))
+                               matches++;
+               WARN_ON(matches != 1);
+       }
+}
+
+/* test */
+void bman_test_api(void)
+{
+       int i, loops = LOOPS;
+
+       bufs_init();
+
+       pr_info("%s(): Starting\n", __func__);
+
+       pool = bman_new_pool();
+       if (!pool) {
+               pr_crit("bman_new_pool() failed\n");
+               goto failed;
+       }
+
+       /* Release buffers */
+do_loop:
+       i = 0;
+       while (i < NUM_BUFS) {
+               int num = 8;
+
+               if (i + num > NUM_BUFS)
+                       num = NUM_BUFS - i;
+               if (bman_release(pool, bufs_in + i, num)) {
+                       pr_crit("bman_release() failed\n");
+                       goto failed;
+               }
+               i += num;
+       }
+
+       /* Acquire buffers */
+       while (i > 0) {
+               int tmp, num = 8;
+
+               if (num > i)
+                       num = i;
+               tmp = bman_acquire(pool, bufs_out + i - num, num);
+               WARN_ON(tmp != num);
+               i -= num;
+       }
+       i = bman_acquire(pool, NULL, 1);
+       WARN_ON(i > 0);
+
+       bufs_confirm();
+
+       if (--loops)
+               goto do_loop;
+
+       /* Clean up */
+       bman_free_pool(pool);
+       pr_info("%s(): Finished\n", __func__);
+       return;
+
+failed:
+       WARN_ON(1);
+}
diff --git a/drivers/soc/fsl/qbman/dpaa_sys.h b/drivers/soc/fsl/qbman/dpaa_sys.h
new file mode 100644 (file)
index 0000000..b63fd72
--- /dev/null
@@ -0,0 +1,103 @@
+/* Copyright 2008 - 2016 Freescale Semiconductor, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *      names of its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written permission.
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __DPAA_SYS_H
+#define __DPAA_SYS_H
+
+#include <linux/cpu.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/kthread.h>
+#include <linux/vmalloc.h>
+#include <linux/platform_device.h>
+#include <linux/of_reserved_mem.h>
+#include <linux/prefetch.h>
+#include <linux/genalloc.h>
+#include <asm/cacheflush.h>
+
+/* For 2-element tables related to cache-inhibited and cache-enabled mappings */
+#define DPAA_PORTAL_CE 0
+#define DPAA_PORTAL_CI 1
+
+#if (L1_CACHE_BYTES != 32) && (L1_CACHE_BYTES != 64)
+#error "Unsupported Cacheline Size"
+#endif
+
+static inline void dpaa_flush(void *p)
+{
+#ifdef CONFIG_PPC
+       flush_dcache_range((unsigned long)p, (unsigned long)p+64);
+#elif defined(CONFIG_ARM32)
+       __cpuc_flush_dcache_area(p, 64);
+#elif defined(CONFIG_ARM64)
+       __flush_dcache_area(p, 64);
+#endif
+}
+
+#define dpaa_invalidate(p) dpaa_flush(p)
+
+#define dpaa_zero(p) memset(p, 0, 64)
+
+static inline void dpaa_touch_ro(void *p)
+{
+#if (L1_CACHE_BYTES == 32)
+       prefetch(p+32);
+#endif
+       prefetch(p);
+}
+
+/* Commonly used combo */
+static inline void dpaa_invalidate_touch_ro(void *p)
+{
+       dpaa_invalidate(p);
+       dpaa_touch_ro(p);
+}
+
+
+#ifdef CONFIG_FSL_DPAA_CHECKING
+#define DPAA_ASSERT(x) WARN_ON(!(x))
+#else
+#define DPAA_ASSERT(x)
+#endif
+
+/* cyclic helper for rings */
+static inline u8 dpaa_cyc_diff(u8 ringsize, u8 first, u8 last)
+{
+       /* 'first' is included, 'last' is excluded */
+       if (first <= last)
+               return last - first;
+       return ringsize + last - first;
+}
+
+/* Offset applied to genalloc pools due to zero being an error return */
+#define DPAA_GENALLOC_OFF      0x80000000
+
+#endif /* __DPAA_SYS_H */
diff --git a/drivers/soc/fsl/qbman/qman.c b/drivers/soc/fsl/qbman/qman.c
new file mode 100644 (file)
index 0000000..119054b
--- /dev/null
@@ -0,0 +1,2881 @@
+/* Copyright 2008 - 2016 Freescale Semiconductor, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *      names of its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written permission.
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "qman_priv.h"
+
+#define DQRR_MAXFILL   15
+#define EQCR_ITHRESH   4       /* if EQCR congests, interrupt threshold */
+#define IRQNAME                "QMan portal %d"
+#define MAX_IRQNAME    16      /* big enough for "QMan portal %d" */
+#define QMAN_POLL_LIMIT 32
+#define QMAN_PIRQ_DQRR_ITHRESH 12
+#define QMAN_PIRQ_MR_ITHRESH 4
+#define QMAN_PIRQ_IPERIOD 100
+
+/* Portal register assists */
+
+/* Cache-inhibited register offsets */
+#define QM_REG_EQCR_PI_CINH    0x0000
+#define QM_REG_EQCR_CI_CINH    0x0004
+#define QM_REG_EQCR_ITR                0x0008
+#define QM_REG_DQRR_PI_CINH    0x0040
+#define QM_REG_DQRR_CI_CINH    0x0044
+#define QM_REG_DQRR_ITR                0x0048
+#define QM_REG_DQRR_DCAP       0x0050
+#define QM_REG_DQRR_SDQCR      0x0054
+#define QM_REG_DQRR_VDQCR      0x0058
+#define QM_REG_DQRR_PDQCR      0x005c
+#define QM_REG_MR_PI_CINH      0x0080
+#define QM_REG_MR_CI_CINH      0x0084
+#define QM_REG_MR_ITR          0x0088
+#define QM_REG_CFG             0x0100
+#define QM_REG_ISR             0x0e00
+#define QM_REG_IER             0x0e04
+#define QM_REG_ISDR            0x0e08
+#define QM_REG_IIR             0x0e0c
+#define QM_REG_ITPR            0x0e14
+
+/* Cache-enabled register offsets */
+#define QM_CL_EQCR             0x0000
+#define QM_CL_DQRR             0x1000
+#define QM_CL_MR               0x2000
+#define QM_CL_EQCR_PI_CENA     0x3000
+#define QM_CL_EQCR_CI_CENA     0x3100
+#define QM_CL_DQRR_PI_CENA     0x3200
+#define QM_CL_DQRR_CI_CENA     0x3300
+#define QM_CL_MR_PI_CENA       0x3400
+#define QM_CL_MR_CI_CENA       0x3500
+#define QM_CL_CR               0x3800
+#define QM_CL_RR0              0x3900
+#define QM_CL_RR1              0x3940
+
+/*
+ * BTW, the drivers (and h/w programming model) already obtain the required
+ * synchronisation for portal accesses and data-dependencies. Use of barrier()s
+ * or other order-preserving primitives simply degrade performance. Hence the
+ * use of the __raw_*() interfaces, which simply ensure that the compiler treats
+ * the portal registers as volatile
+ */
+
+/* Cache-enabled ring access */
+#define qm_cl(base, idx)       ((void *)base + ((idx) << 6))
+
+/*
+ * Portal modes.
+ *   Enum types;
+ *     pmode == production mode
+ *     cmode == consumption mode,
+ *     dmode == h/w dequeue mode.
+ *   Enum values use 3 letter codes. First letter matches the portal mode,
+ *   remaining two letters indicate;
+ *     ci == cache-inhibited portal register
+ *     ce == cache-enabled portal register
+ *     vb == in-band valid-bit (cache-enabled)
+ *     dc == DCA (Discrete Consumption Acknowledgment), DQRR-only
+ *   As for "enum qm_dqrr_dmode", it should be self-explanatory.
+ */
+enum qm_eqcr_pmode {           /* matches QCSP_CFG::EPM */
+       qm_eqcr_pci = 0,        /* PI index, cache-inhibited */
+       qm_eqcr_pce = 1,        /* PI index, cache-enabled */
+       qm_eqcr_pvb = 2         /* valid-bit */
+};
+enum qm_dqrr_dmode {           /* matches QCSP_CFG::DP */
+       qm_dqrr_dpush = 0,      /* SDQCR  + VDQCR */
+       qm_dqrr_dpull = 1       /* PDQCR */
+};
+enum qm_dqrr_pmode {           /* s/w-only */
+       qm_dqrr_pci,            /* reads DQRR_PI_CINH */
+       qm_dqrr_pce,            /* reads DQRR_PI_CENA */
+       qm_dqrr_pvb             /* reads valid-bit */
+};
+enum qm_dqrr_cmode {           /* matches QCSP_CFG::DCM */
+       qm_dqrr_cci = 0,        /* CI index, cache-inhibited */
+       qm_dqrr_cce = 1,        /* CI index, cache-enabled */
+       qm_dqrr_cdc = 2         /* Discrete Consumption Acknowledgment */
+};
+enum qm_mr_pmode {             /* s/w-only */
+       qm_mr_pci,              /* reads MR_PI_CINH */
+       qm_mr_pce,              /* reads MR_PI_CENA */
+       qm_mr_pvb               /* reads valid-bit */
+};
+enum qm_mr_cmode {             /* matches QCSP_CFG::MM */
+       qm_mr_cci = 0,          /* CI index, cache-inhibited */
+       qm_mr_cce = 1           /* CI index, cache-enabled */
+};
+
+/* --- Portal structures --- */
+
+#define QM_EQCR_SIZE           8
+#define QM_DQRR_SIZE           16
+#define QM_MR_SIZE             8
+
+/* "Enqueue Command" */
+struct qm_eqcr_entry {
+       u8 _ncw_verb; /* writes to this are non-coherent */
+       u8 dca;
+       u16 seqnum;
+       u32 orp;        /* 24-bit */
+       u32 fqid;       /* 24-bit */
+       u32 tag;
+       struct qm_fd fd;
+       u8 __reserved3[32];
+} __packed;
+#define QM_EQCR_VERB_VBIT              0x80
+#define QM_EQCR_VERB_CMD_MASK          0x61    /* but only one value; */
+#define QM_EQCR_VERB_CMD_ENQUEUE       0x01
+#define QM_EQCR_SEQNUM_NESN            0x8000  /* Advance NESN */
+#define QM_EQCR_SEQNUM_NLIS            0x4000  /* More fragments to come */
+#define QM_EQCR_SEQNUM_SEQMASK         0x3fff  /* sequence number goes here */
+
+struct qm_eqcr {
+       struct qm_eqcr_entry *ring, *cursor;
+       u8 ci, available, ithresh, vbit;
+#ifdef CONFIG_FSL_DPAA_CHECKING
+       u32 busy;
+       enum qm_eqcr_pmode pmode;
+#endif
+};
+
+struct qm_dqrr {
+       const struct qm_dqrr_entry *ring, *cursor;
+       u8 pi, ci, fill, ithresh, vbit;
+#ifdef CONFIG_FSL_DPAA_CHECKING
+       enum qm_dqrr_dmode dmode;
+       enum qm_dqrr_pmode pmode;
+       enum qm_dqrr_cmode cmode;
+#endif
+};
+
+struct qm_mr {
+       union qm_mr_entry *ring, *cursor;
+       u8 pi, ci, fill, ithresh, vbit;
+#ifdef CONFIG_FSL_DPAA_CHECKING
+       enum qm_mr_pmode pmode;
+       enum qm_mr_cmode cmode;
+#endif
+};
+
+/* MC (Management Command) command */
+/* "Query FQ" */
+struct qm_mcc_queryfq {
+       u8 _ncw_verb;
+       u8 __reserved1[3];
+       u32 fqid;       /* 24-bit */
+       u8 __reserved2[56];
+} __packed;
+/* "Alter FQ State Commands " */
+struct qm_mcc_alterfq {
+       u8 _ncw_verb;
+       u8 __reserved1[3];
+       u32 fqid;       /* 24-bit */
+       u8 __reserved2;
+       u8 count;       /* number of consecutive FQID */
+       u8 __reserved3[10];
+       u32 context_b;  /* frame queue context b */
+       u8 __reserved4[40];
+} __packed;
+
+/* "Query CGR" */
+struct qm_mcc_querycgr {
+       u8 _ncw_verb;
+       u8 __reserved1[30];
+       u8 cgid;
+       u8 __reserved2[32];
+};
+
+struct qm_mcc_querywq {
+       u8 _ncw_verb;
+       u8 __reserved;
+       /* select channel if verb != QUERYWQ_DEDICATED */
+       u16 channel_wq; /* ignores wq (3 lsbits): _res[0-2] */
+       u8 __reserved2[60];
+} __packed;
+
+#define QM_MCC_VERB_VBIT               0x80
+#define QM_MCC_VERB_MASK               0x7f    /* where the verb contains; */
+#define QM_MCC_VERB_INITFQ_PARKED      0x40
+#define QM_MCC_VERB_INITFQ_SCHED       0x41
+#define QM_MCC_VERB_QUERYFQ            0x44
+#define QM_MCC_VERB_QUERYFQ_NP         0x45    /* "non-programmable" fields */
+#define QM_MCC_VERB_QUERYWQ            0x46
+#define QM_MCC_VERB_QUERYWQ_DEDICATED  0x47
+#define QM_MCC_VERB_ALTER_SCHED                0x48    /* Schedule FQ */
+#define QM_MCC_VERB_ALTER_FE           0x49    /* Force Eligible FQ */
+#define QM_MCC_VERB_ALTER_RETIRE       0x4a    /* Retire FQ */
+#define QM_MCC_VERB_ALTER_OOS          0x4b    /* Take FQ out of service */
+#define QM_MCC_VERB_ALTER_FQXON                0x4d    /* FQ XON */
+#define QM_MCC_VERB_ALTER_FQXOFF       0x4e    /* FQ XOFF */
+#define QM_MCC_VERB_INITCGR            0x50
+#define QM_MCC_VERB_MODIFYCGR          0x51
+#define QM_MCC_VERB_CGRTESTWRITE       0x52
+#define QM_MCC_VERB_QUERYCGR           0x58
+#define QM_MCC_VERB_QUERYCONGESTION    0x59
+union qm_mc_command {
+       struct {
+               u8 _ncw_verb; /* writes to this are non-coherent */
+               u8 __reserved[63];
+       };
+       struct qm_mcc_initfq initfq;
+       struct qm_mcc_queryfq queryfq;
+       struct qm_mcc_alterfq alterfq;
+       struct qm_mcc_initcgr initcgr;
+       struct qm_mcc_querycgr querycgr;
+       struct qm_mcc_querywq querywq;
+       struct qm_mcc_queryfq_np queryfq_np;
+};
+
+/* MC (Management Command) result */
+/* "Query FQ" */
+struct qm_mcr_queryfq {
+       u8 verb;
+       u8 result;
+       u8 __reserved1[8];
+       struct qm_fqd fqd;      /* the FQD fields are here */
+       u8 __reserved2[30];
+} __packed;
+
+/* "Alter FQ State Commands" */
+struct qm_mcr_alterfq {
+       u8 verb;
+       u8 result;
+       u8 fqs;         /* Frame Queue Status */
+       u8 __reserved1[61];
+};
+#define QM_MCR_VERB_RRID               0x80
+#define QM_MCR_VERB_MASK               QM_MCC_VERB_MASK
+#define QM_MCR_VERB_INITFQ_PARKED      QM_MCC_VERB_INITFQ_PARKED
+#define QM_MCR_VERB_INITFQ_SCHED       QM_MCC_VERB_INITFQ_SCHED
+#define QM_MCR_VERB_QUERYFQ            QM_MCC_VERB_QUERYFQ
+#define QM_MCR_VERB_QUERYFQ_NP         QM_MCC_VERB_QUERYFQ_NP
+#define QM_MCR_VERB_QUERYWQ            QM_MCC_VERB_QUERYWQ
+#define QM_MCR_VERB_QUERYWQ_DEDICATED  QM_MCC_VERB_QUERYWQ_DEDICATED
+#define QM_MCR_VERB_ALTER_SCHED                QM_MCC_VERB_ALTER_SCHED
+#define QM_MCR_VERB_ALTER_FE           QM_MCC_VERB_ALTER_FE
+#define QM_MCR_VERB_ALTER_RETIRE       QM_MCC_VERB_ALTER_RETIRE
+#define QM_MCR_VERB_ALTER_OOS          QM_MCC_VERB_ALTER_OOS
+#define QM_MCR_RESULT_NULL             0x00
+#define QM_MCR_RESULT_OK               0xf0
+#define QM_MCR_RESULT_ERR_FQID         0xf1
+#define QM_MCR_RESULT_ERR_FQSTATE      0xf2
+#define QM_MCR_RESULT_ERR_NOTEMPTY     0xf3    /* OOS fails if FQ is !empty */
+#define QM_MCR_RESULT_ERR_BADCHANNEL   0xf4
+#define QM_MCR_RESULT_PENDING          0xf8
+#define QM_MCR_RESULT_ERR_BADCOMMAND   0xff
+#define QM_MCR_FQS_ORLPRESENT          0x02    /* ORL fragments to come */
+#define QM_MCR_FQS_NOTEMPTY            0x01    /* FQ has enqueued frames */
+#define QM_MCR_TIMEOUT                 10000   /* us */
+union qm_mc_result {
+       struct {
+               u8 verb;
+               u8 result;
+               u8 __reserved1[62];
+       };
+       struct qm_mcr_queryfq queryfq;
+       struct qm_mcr_alterfq alterfq;
+       struct qm_mcr_querycgr querycgr;
+       struct qm_mcr_querycongestion querycongestion;
+       struct qm_mcr_querywq querywq;
+       struct qm_mcr_queryfq_np queryfq_np;
+};
+
+struct qm_mc {
+       union qm_mc_command *cr;
+       union qm_mc_result *rr;
+       u8 rridx, vbit;
+#ifdef CONFIG_FSL_DPAA_CHECKING
+       enum {
+               /* Can be _mc_start()ed */
+               qman_mc_idle,
+               /* Can be _mc_commit()ed or _mc_abort()ed */
+               qman_mc_user,
+               /* Can only be _mc_retry()ed */
+               qman_mc_hw
+       } state;
+#endif
+};
+
+struct qm_addr {
+       void __iomem *ce;       /* cache-enabled */
+       void __iomem *ci;       /* cache-inhibited */
+};
+
+struct qm_portal {
+       /*
+        * In the non-CONFIG_FSL_DPAA_CHECKING case, the following stuff up to
+        * and including 'mc' fits within a cacheline (yay!). The 'config' part
+        * is setup-only, so isn't a cause for a concern. In other words, don't
+        * rearrange this structure on a whim, there be dragons ...
+        */
+       struct qm_addr addr;
+       struct qm_eqcr eqcr;
+       struct qm_dqrr dqrr;
+       struct qm_mr mr;
+       struct qm_mc mc;
+} ____cacheline_aligned;
+
+/* Cache-inhibited register access. */
+static inline u32 qm_in(struct qm_portal *p, u32 offset)
+{
+       return __raw_readl(p->addr.ci + offset);
+}
+
+static inline void qm_out(struct qm_portal *p, u32 offset, u32 val)
+{
+       __raw_writel(val, p->addr.ci + offset);
+}
+
+/* Cache Enabled Portal Access */
+static inline void qm_cl_invalidate(struct qm_portal *p, u32 offset)
+{
+       dpaa_invalidate(p->addr.ce + offset);
+}
+
+static inline void qm_cl_touch_ro(struct qm_portal *p, u32 offset)
+{
+       dpaa_touch_ro(p->addr.ce + offset);
+}
+
+static inline u32 qm_ce_in(struct qm_portal *p, u32 offset)
+{
+       return __raw_readl(p->addr.ce + offset);
+}
+
+/* --- EQCR API --- */
+
+#define EQCR_SHIFT     ilog2(sizeof(struct qm_eqcr_entry))
+#define EQCR_CARRY     (uintptr_t)(QM_EQCR_SIZE << EQCR_SHIFT)
+
+/* Bit-wise logic to wrap a ring pointer by clearing the "carry bit" */
+static struct qm_eqcr_entry *eqcr_carryclear(struct qm_eqcr_entry *p)
+{
+       uintptr_t addr = (uintptr_t)p;
+
+       addr &= ~EQCR_CARRY;
+
+       return (struct qm_eqcr_entry *)addr;
+}
+
+/* Bit-wise logic to convert a ring pointer to a ring index */
+static int eqcr_ptr2idx(struct qm_eqcr_entry *e)
+{
+       return ((uintptr_t)e >> EQCR_SHIFT) & (QM_EQCR_SIZE - 1);
+}
+
+/* Increment the 'cursor' ring pointer, taking 'vbit' into account */
+static inline void eqcr_inc(struct qm_eqcr *eqcr)
+{
+       /* increment to the next EQCR pointer and handle overflow and 'vbit' */
+       struct qm_eqcr_entry *partial = eqcr->cursor + 1;
+
+       eqcr->cursor = eqcr_carryclear(partial);
+       if (partial != eqcr->cursor)
+               eqcr->vbit ^= QM_EQCR_VERB_VBIT;
+}
+
+static inline int qm_eqcr_init(struct qm_portal *portal,
+                               enum qm_eqcr_pmode pmode,
+                               unsigned int eq_stash_thresh,
+                               int eq_stash_prio)
+{
+       struct qm_eqcr *eqcr = &portal->eqcr;
+       u32 cfg;
+       u8 pi;
+
+       eqcr->ring = portal->addr.ce + QM_CL_EQCR;
+       eqcr->ci = qm_in(portal, QM_REG_EQCR_CI_CINH) & (QM_EQCR_SIZE - 1);
+       qm_cl_invalidate(portal, QM_CL_EQCR_CI_CENA);
+       pi = qm_in(portal, QM_REG_EQCR_PI_CINH) & (QM_EQCR_SIZE - 1);
+       eqcr->cursor = eqcr->ring + pi;
+       eqcr->vbit = (qm_in(portal, QM_REG_EQCR_PI_CINH) & QM_EQCR_SIZE) ?
+                    QM_EQCR_VERB_VBIT : 0;
+       eqcr->available = QM_EQCR_SIZE - 1 -
+                         dpaa_cyc_diff(QM_EQCR_SIZE, eqcr->ci, pi);
+       eqcr->ithresh = qm_in(portal, QM_REG_EQCR_ITR);
+#ifdef CONFIG_FSL_DPAA_CHECKING
+       eqcr->busy = 0;
+       eqcr->pmode = pmode;
+#endif
+       cfg = (qm_in(portal, QM_REG_CFG) & 0x00ffffff) |
+             (eq_stash_thresh << 28) | /* QCSP_CFG: EST */
+             (eq_stash_prio << 26) | /* QCSP_CFG: EP */
+             ((pmode & 0x3) << 24); /* QCSP_CFG::EPM */
+       qm_out(portal, QM_REG_CFG, cfg);
+       return 0;
+}
+
+static inline unsigned int qm_eqcr_get_ci_stashing(struct qm_portal *portal)
+{
+       return (qm_in(portal, QM_REG_CFG) >> 28) & 0x7;
+}
+
+static inline void qm_eqcr_finish(struct qm_portal *portal)
+{
+       struct qm_eqcr *eqcr = &portal->eqcr;
+       u8 pi = qm_in(portal, QM_REG_EQCR_PI_CINH) & (QM_EQCR_SIZE - 1);
+       u8 ci = qm_in(portal, QM_REG_EQCR_CI_CINH) & (QM_EQCR_SIZE - 1);
+
+       DPAA_ASSERT(!eqcr->busy);
+       if (pi != eqcr_ptr2idx(eqcr->cursor))
+               pr_crit("losing uncommited EQCR entries\n");
+       if (ci != eqcr->ci)
+               pr_crit("missing existing EQCR completions\n");
+       if (eqcr->ci != eqcr_ptr2idx(eqcr->cursor))
+               pr_crit("EQCR destroyed unquiesced\n");
+}
+
+static inline struct qm_eqcr_entry *qm_eqcr_start_no_stash(struct qm_portal
+                                                                *portal)
+{
+       struct qm_eqcr *eqcr = &portal->eqcr;
+
+       DPAA_ASSERT(!eqcr->busy);
+       if (!eqcr->available)
+               return NULL;
+
+#ifdef CONFIG_FSL_DPAA_CHECKING
+       eqcr->busy = 1;
+#endif
+       dpaa_zero(eqcr->cursor);
+       return eqcr->cursor;
+}
+
+static inline struct qm_eqcr_entry *qm_eqcr_start_stash(struct qm_portal
+                                                               *portal)
+{
+       struct qm_eqcr *eqcr = &portal->eqcr;
+       u8 diff, old_ci;
+
+       DPAA_ASSERT(!eqcr->busy);
+       if (!eqcr->available) {
+               old_ci = eqcr->ci;
+               eqcr->ci = qm_ce_in(portal, QM_CL_EQCR_CI_CENA) &
+                          (QM_EQCR_SIZE - 1);
+               diff = dpaa_cyc_diff(QM_EQCR_SIZE, old_ci, eqcr->ci);
+               eqcr->available += diff;
+               if (!diff)
+                       return NULL;
+       }
+#ifdef CONFIG_FSL_DPAA_CHECKING
+       eqcr->busy = 1;
+#endif
+       dpaa_zero(eqcr->cursor);
+       return eqcr->cursor;
+}
+
+static inline void eqcr_commit_checks(struct qm_eqcr *eqcr)
+{
+       DPAA_ASSERT(eqcr->busy);
+       DPAA_ASSERT(eqcr->cursor->orp == (eqcr->cursor->orp & 0x00ffffff));
+       DPAA_ASSERT(eqcr->cursor->fqid == (eqcr->cursor->fqid & 0x00ffffff));
+       DPAA_ASSERT(eqcr->available >= 1);
+}
+
+static inline void qm_eqcr_pvb_commit(struct qm_portal *portal, u8 myverb)
+{
+       struct qm_eqcr *eqcr = &portal->eqcr;
+       struct qm_eqcr_entry *eqcursor;
+
+       eqcr_commit_checks(eqcr);
+       DPAA_ASSERT(eqcr->pmode == qm_eqcr_pvb);
+       dma_wmb();
+       eqcursor = eqcr->cursor;
+       eqcursor->_ncw_verb = myverb | eqcr->vbit;
+       dpaa_flush(eqcursor);
+       eqcr_inc(eqcr);
+       eqcr->available--;
+#ifdef CONFIG_FSL_DPAA_CHECKING
+       eqcr->busy = 0;
+#endif
+}
+
+static inline void qm_eqcr_cce_prefetch(struct qm_portal *portal)
+{
+       qm_cl_touch_ro(portal, QM_CL_EQCR_CI_CENA);
+}
+
+static inline u8 qm_eqcr_cce_update(struct qm_portal *portal)
+{
+       struct qm_eqcr *eqcr = &portal->eqcr;
+       u8 diff, old_ci = eqcr->ci;
+
+       eqcr->ci = qm_ce_in(portal, QM_CL_EQCR_CI_CENA) & (QM_EQCR_SIZE - 1);
+       qm_cl_invalidate(portal, QM_CL_EQCR_CI_CENA);
+       diff = dpaa_cyc_diff(QM_EQCR_SIZE, old_ci, eqcr->ci);
+       eqcr->available += diff;
+       return diff;
+}
+
+static inline void qm_eqcr_set_ithresh(struct qm_portal *portal, u8 ithresh)
+{
+       struct qm_eqcr *eqcr = &portal->eqcr;
+
+       eqcr->ithresh = ithresh;
+       qm_out(portal, QM_REG_EQCR_ITR, ithresh);
+}
+
+static inline u8 qm_eqcr_get_avail(struct qm_portal *portal)
+{
+       struct qm_eqcr *eqcr = &portal->eqcr;
+
+       return eqcr->available;
+}
+
+static inline u8 qm_eqcr_get_fill(struct qm_portal *portal)
+{
+       struct qm_eqcr *eqcr = &portal->eqcr;
+
+       return QM_EQCR_SIZE - 1 - eqcr->available;
+}
+
+/* --- DQRR API --- */
+
+#define DQRR_SHIFT     ilog2(sizeof(struct qm_dqrr_entry))
+#define DQRR_CARRY     (uintptr_t)(QM_DQRR_SIZE << DQRR_SHIFT)
+
+static const struct qm_dqrr_entry *dqrr_carryclear(
+                                       const struct qm_dqrr_entry *p)
+{
+       uintptr_t addr = (uintptr_t)p;
+
+       addr &= ~DQRR_CARRY;
+
+       return (const struct qm_dqrr_entry *)addr;
+}
+
+static inline int dqrr_ptr2idx(const struct qm_dqrr_entry *e)
+{
+       return ((uintptr_t)e >> DQRR_SHIFT) & (QM_DQRR_SIZE - 1);
+}
+
+static const struct qm_dqrr_entry *dqrr_inc(const struct qm_dqrr_entry *e)
+{
+       return dqrr_carryclear(e + 1);
+}
+
+static inline void qm_dqrr_set_maxfill(struct qm_portal *portal, u8 mf)
+{
+       qm_out(portal, QM_REG_CFG, (qm_in(portal, QM_REG_CFG) & 0xff0fffff) |
+                                  ((mf & (QM_DQRR_SIZE - 1)) << 20));
+}
+
+static inline int qm_dqrr_init(struct qm_portal *portal,
+                              const struct qm_portal_config *config,
+                              enum qm_dqrr_dmode dmode,
+                              enum qm_dqrr_pmode pmode,
+                              enum qm_dqrr_cmode cmode, u8 max_fill)
+{
+       struct qm_dqrr *dqrr = &portal->dqrr;
+       u32 cfg;
+
+       /* Make sure the DQRR will be idle when we enable */
+       qm_out(portal, QM_REG_DQRR_SDQCR, 0);
+       qm_out(portal, QM_REG_DQRR_VDQCR, 0);
+       qm_out(portal, QM_REG_DQRR_PDQCR, 0);
+       dqrr->ring = portal->addr.ce + QM_CL_DQRR;
+       dqrr->pi = qm_in(portal, QM_REG_DQRR_PI_CINH) & (QM_DQRR_SIZE - 1);
+       dqrr->ci = qm_in(portal, QM_REG_DQRR_CI_CINH) & (QM_DQRR_SIZE - 1);
+       dqrr->cursor = dqrr->ring + dqrr->ci;
+       dqrr->fill = dpaa_cyc_diff(QM_DQRR_SIZE, dqrr->ci, dqrr->pi);
+       dqrr->vbit = (qm_in(portal, QM_REG_DQRR_PI_CINH) & QM_DQRR_SIZE) ?
+                       QM_DQRR_VERB_VBIT : 0;
+       dqrr->ithresh = qm_in(portal, QM_REG_DQRR_ITR);
+#ifdef CONFIG_FSL_DPAA_CHECKING
+       dqrr->dmode = dmode;
+       dqrr->pmode = pmode;
+       dqrr->cmode = cmode;
+#endif
+       /* Invalidate every ring entry before beginning */
+       for (cfg = 0; cfg < QM_DQRR_SIZE; cfg++)
+               dpaa_invalidate(qm_cl(dqrr->ring, cfg));
+       cfg = (qm_in(portal, QM_REG_CFG) & 0xff000f00) |
+               ((max_fill & (QM_DQRR_SIZE - 1)) << 20) | /* DQRR_MF */
+               ((dmode & 1) << 18) |                   /* DP */
+               ((cmode & 3) << 16) |                   /* DCM */
+               0xa0 |                                  /* RE+SE */
+               (0 ? 0x40 : 0) |                        /* Ignore RP */
+               (0 ? 0x10 : 0);                         /* Ignore SP */
+       qm_out(portal, QM_REG_CFG, cfg);
+       qm_dqrr_set_maxfill(portal, max_fill);
+       return 0;
+}
+
+static inline void qm_dqrr_finish(struct qm_portal *portal)
+{
+#ifdef CONFIG_FSL_DPAA_CHECKING
+       struct qm_dqrr *dqrr = &portal->dqrr;
+
+       if (dqrr->cmode != qm_dqrr_cdc &&
+           dqrr->ci != dqrr_ptr2idx(dqrr->cursor))
+               pr_crit("Ignoring completed DQRR entries\n");
+#endif
+}
+
+static inline const struct qm_dqrr_entry *qm_dqrr_current(
+                                               struct qm_portal *portal)
+{
+       struct qm_dqrr *dqrr = &portal->dqrr;
+
+       if (!dqrr->fill)
+               return NULL;
+       return dqrr->cursor;
+}
+
+static inline u8 qm_dqrr_next(struct qm_portal *portal)
+{
+       struct qm_dqrr *dqrr = &portal->dqrr;
+
+       DPAA_ASSERT(dqrr->fill);
+       dqrr->cursor = dqrr_inc(dqrr->cursor);
+       return --dqrr->fill;
+}
+
+static inline void qm_dqrr_pvb_update(struct qm_portal *portal)
+{
+       struct qm_dqrr *dqrr = &portal->dqrr;
+       struct qm_dqrr_entry *res = qm_cl(dqrr->ring, dqrr->pi);
+
+       DPAA_ASSERT(dqrr->pmode == qm_dqrr_pvb);
+#ifndef CONFIG_FSL_PAMU
+       /*
+        * If PAMU is not available we need to invalidate the cache.
+        * When PAMU is available the cache is updated by stash
+        */
+       dpaa_invalidate_touch_ro(res);
+#endif
+       /*
+        *  when accessing 'verb', use __raw_readb() to ensure that compiler
+        * inlining doesn't try to optimise out "excess reads".
+        */
+       if ((__raw_readb(&res->verb) & QM_DQRR_VERB_VBIT) == dqrr->vbit) {
+               dqrr->pi = (dqrr->pi + 1) & (QM_DQRR_SIZE - 1);
+               if (!dqrr->pi)
+                       dqrr->vbit ^= QM_DQRR_VERB_VBIT;
+               dqrr->fill++;
+       }
+}
+
+static inline void qm_dqrr_cdc_consume_1ptr(struct qm_portal *portal,
+                                       const struct qm_dqrr_entry *dq,
+                                       int park)
+{
+       __maybe_unused struct qm_dqrr *dqrr = &portal->dqrr;
+       int idx = dqrr_ptr2idx(dq);
+
+       DPAA_ASSERT(dqrr->cmode == qm_dqrr_cdc);
+       DPAA_ASSERT((dqrr->ring + idx) == dq);
+       DPAA_ASSERT(idx < QM_DQRR_SIZE);
+       qm_out(portal, QM_REG_DQRR_DCAP, (0 << 8) | /* DQRR_DCAP::S */
+              ((park ? 1 : 0) << 6) |              /* DQRR_DCAP::PK */
+              idx);                                /* DQRR_DCAP::DCAP_CI */
+}
+
+static inline void qm_dqrr_cdc_consume_n(struct qm_portal *portal, u32 bitmask)
+{
+       __maybe_unused struct qm_dqrr *dqrr = &portal->dqrr;
+
+       DPAA_ASSERT(dqrr->cmode == qm_dqrr_cdc);
+       qm_out(portal, QM_REG_DQRR_DCAP, (1 << 8) | /* DQRR_DCAP::S */
+              (bitmask << 16));                    /* DQRR_DCAP::DCAP_CI */
+}
+
+static inline void qm_dqrr_sdqcr_set(struct qm_portal *portal, u32 sdqcr)
+{
+       qm_out(portal, QM_REG_DQRR_SDQCR, sdqcr);
+}
+
+static inline void qm_dqrr_vdqcr_set(struct qm_portal *portal, u32 vdqcr)
+{
+       qm_out(portal, QM_REG_DQRR_VDQCR, vdqcr);
+}
+
+static inline void qm_dqrr_set_ithresh(struct qm_portal *portal, u8 ithresh)
+{
+       qm_out(portal, QM_REG_DQRR_ITR, ithresh);
+}
+
+/* --- MR API --- */
+
+#define MR_SHIFT       ilog2(sizeof(union qm_mr_entry))
+#define MR_CARRY       (uintptr_t)(QM_MR_SIZE << MR_SHIFT)
+
+static union qm_mr_entry *mr_carryclear(union qm_mr_entry *p)
+{
+       uintptr_t addr = (uintptr_t)p;
+
+       addr &= ~MR_CARRY;
+
+       return (union qm_mr_entry *)addr;
+}
+
+static inline int mr_ptr2idx(const union qm_mr_entry *e)
+{
+       return ((uintptr_t)e >> MR_SHIFT) & (QM_MR_SIZE - 1);
+}
+
+static inline union qm_mr_entry *mr_inc(union qm_mr_entry *e)
+{
+       return mr_carryclear(e + 1);
+}
+
+static inline int qm_mr_init(struct qm_portal *portal, enum qm_mr_pmode pmode,
+                            enum qm_mr_cmode cmode)
+{
+       struct qm_mr *mr = &portal->mr;
+       u32 cfg;
+
+       mr->ring = portal->addr.ce + QM_CL_MR;
+       mr->pi = qm_in(portal, QM_REG_MR_PI_CINH) & (QM_MR_SIZE - 1);
+       mr->ci = qm_in(portal, QM_REG_MR_CI_CINH) & (QM_MR_SIZE - 1);
+       mr->cursor = mr->ring + mr->ci;
+       mr->fill = dpaa_cyc_diff(QM_MR_SIZE, mr->ci, mr->pi);
+       mr->vbit = (qm_in(portal, QM_REG_MR_PI_CINH) & QM_MR_SIZE)
+               ? QM_MR_VERB_VBIT : 0;
+       mr->ithresh = qm_in(portal, QM_REG_MR_ITR);
+#ifdef CONFIG_FSL_DPAA_CHECKING
+       mr->pmode = pmode;
+       mr->cmode = cmode;
+#endif
+       cfg = (qm_in(portal, QM_REG_CFG) & 0xfffff0ff) |
+             ((cmode & 1) << 8);       /* QCSP_CFG:MM */
+       qm_out(portal, QM_REG_CFG, cfg);
+       return 0;
+}
+
+static inline void qm_mr_finish(struct qm_portal *portal)
+{
+       struct qm_mr *mr = &portal->mr;
+
+       if (mr->ci != mr_ptr2idx(mr->cursor))
+               pr_crit("Ignoring completed MR entries\n");
+}
+
+static inline const union qm_mr_entry *qm_mr_current(struct qm_portal *portal)
+{
+       struct qm_mr *mr = &portal->mr;
+
+       if (!mr->fill)
+               return NULL;
+       return mr->cursor;
+}
+
+static inline int qm_mr_next(struct qm_portal *portal)
+{
+       struct qm_mr *mr = &portal->mr;
+
+       DPAA_ASSERT(mr->fill);
+       mr->cursor = mr_inc(mr->cursor);
+       return --mr->fill;
+}
+
+static inline void qm_mr_pvb_update(struct qm_portal *portal)
+{
+       struct qm_mr *mr = &portal->mr;
+       union qm_mr_entry *res = qm_cl(mr->ring, mr->pi);
+
+       DPAA_ASSERT(mr->pmode == qm_mr_pvb);
+       /*
+        *  when accessing 'verb', use __raw_readb() to ensure that compiler
+        * inlining doesn't try to optimise out "excess reads".
+        */
+       if ((__raw_readb(&res->verb) & QM_MR_VERB_VBIT) == mr->vbit) {
+               mr->pi = (mr->pi + 1) & (QM_MR_SIZE - 1);
+               if (!mr->pi)
+                       mr->vbit ^= QM_MR_VERB_VBIT;
+               mr->fill++;
+               res = mr_inc(res);
+       }
+       dpaa_invalidate_touch_ro(res);
+}
+
+static inline void qm_mr_cci_consume(struct qm_portal *portal, u8 num)
+{
+       struct qm_mr *mr = &portal->mr;
+
+       DPAA_ASSERT(mr->cmode == qm_mr_cci);
+       mr->ci = (mr->ci + num) & (QM_MR_SIZE - 1);
+       qm_out(portal, QM_REG_MR_CI_CINH, mr->ci);
+}
+
+static inline void qm_mr_cci_consume_to_current(struct qm_portal *portal)
+{
+       struct qm_mr *mr = &portal->mr;
+
+       DPAA_ASSERT(mr->cmode == qm_mr_cci);
+       mr->ci = mr_ptr2idx(mr->cursor);
+       qm_out(portal, QM_REG_MR_CI_CINH, mr->ci);
+}
+
+static inline void qm_mr_set_ithresh(struct qm_portal *portal, u8 ithresh)
+{
+       qm_out(portal, QM_REG_MR_ITR, ithresh);
+}
+
+/* --- Management command API --- */
+
+static inline int qm_mc_init(struct qm_portal *portal)
+{
+       struct qm_mc *mc = &portal->mc;
+
+       mc->cr = portal->addr.ce + QM_CL_CR;
+       mc->rr = portal->addr.ce + QM_CL_RR0;
+       mc->rridx = (__raw_readb(&mc->cr->_ncw_verb) & QM_MCC_VERB_VBIT)
+                   ? 0 : 1;
+       mc->vbit = mc->rridx ? QM_MCC_VERB_VBIT : 0;
+#ifdef CONFIG_FSL_DPAA_CHECKING
+       mc->state = qman_mc_idle;
+#endif
+       return 0;
+}
+
+static inline void qm_mc_finish(struct qm_portal *portal)
+{
+#ifdef CONFIG_FSL_DPAA_CHECKING
+       struct qm_mc *mc = &portal->mc;
+
+       DPAA_ASSERT(mc->state == qman_mc_idle);
+       if (mc->state != qman_mc_idle)
+               pr_crit("Losing incomplete MC command\n");
+#endif
+}
+
+static inline union qm_mc_command *qm_mc_start(struct qm_portal *portal)
+{
+       struct qm_mc *mc = &portal->mc;
+
+       DPAA_ASSERT(mc->state == qman_mc_idle);
+#ifdef CONFIG_FSL_DPAA_CHECKING
+       mc->state = qman_mc_user;
+#endif
+       dpaa_zero(mc->cr);
+       return mc->cr;
+}
+
+static inline void qm_mc_commit(struct qm_portal *portal, u8 myverb)
+{
+       struct qm_mc *mc = &portal->mc;
+       union qm_mc_result *rr = mc->rr + mc->rridx;
+
+       DPAA_ASSERT(mc->state == qman_mc_user);
+       dma_wmb();
+       mc->cr->_ncw_verb = myverb | mc->vbit;
+       dpaa_flush(mc->cr);
+       dpaa_invalidate_touch_ro(rr);
+#ifdef CONFIG_FSL_DPAA_CHECKING
+       mc->state = qman_mc_hw;
+#endif
+}
+
+static inline union qm_mc_result *qm_mc_result(struct qm_portal *portal)
+{
+       struct qm_mc *mc = &portal->mc;
+       union qm_mc_result *rr = mc->rr + mc->rridx;
+
+       DPAA_ASSERT(mc->state == qman_mc_hw);
+       /*
+        *  The inactive response register's verb byte always returns zero until
+        * its command is submitted and completed. This includes the valid-bit,
+        * in case you were wondering...
+        */
+       if (!__raw_readb(&rr->verb)) {
+               dpaa_invalidate_touch_ro(rr);
+               return NULL;
+       }
+       mc->rridx ^= 1;
+       mc->vbit ^= QM_MCC_VERB_VBIT;
+#ifdef CONFIG_FSL_DPAA_CHECKING
+       mc->state = qman_mc_idle;
+#endif
+       return rr;
+}
+
+static inline int qm_mc_result_timeout(struct qm_portal *portal,
+                                      union qm_mc_result **mcr)
+{
+       int timeout = QM_MCR_TIMEOUT;
+
+       do {
+               *mcr = qm_mc_result(portal);
+               if (*mcr)
+                       break;
+               udelay(1);
+       } while (--timeout);
+
+       return timeout;
+}
+
+static inline void fq_set(struct qman_fq *fq, u32 mask)
+{
+       set_bits(mask, &fq->flags);
+}
+
+static inline void fq_clear(struct qman_fq *fq, u32 mask)
+{
+       clear_bits(mask, &fq->flags);
+}
+
+static inline int fq_isset(struct qman_fq *fq, u32 mask)
+{
+       return fq->flags & mask;
+}
+
+static inline int fq_isclear(struct qman_fq *fq, u32 mask)
+{
+       return !(fq->flags & mask);
+}
+
+struct qman_portal {
+       struct qm_portal p;
+       /* PORTAL_BITS_*** - dynamic, strictly internal */
+       unsigned long bits;
+       /* interrupt sources processed by portal_isr(), configurable */
+       unsigned long irq_sources;
+       u32 use_eqcr_ci_stashing;
+       /* only 1 volatile dequeue at a time */
+       struct qman_fq *vdqcr_owned;
+       u32 sdqcr;
+       /* probing time config params for cpu-affine portals */
+       const struct qm_portal_config *config;
+       /* needed for providing a non-NULL device to dma_map_***() */
+       struct platform_device *pdev;
+       /* 2-element array. cgrs[0] is mask, cgrs[1] is snapshot. */
+       struct qman_cgrs *cgrs;
+       /* linked-list of CSCN handlers. */
+       struct list_head cgr_cbs;
+       /* list lock */
+       spinlock_t cgr_lock;
+       struct work_struct congestion_work;
+       struct work_struct mr_work;
+       char irqname[MAX_IRQNAME];
+};
+
+static cpumask_t affine_mask;
+static DEFINE_SPINLOCK(affine_mask_lock);
+static u16 affine_channels[NR_CPUS];
+static DEFINE_PER_CPU(struct qman_portal, qman_affine_portal);
+struct qman_portal *affine_portals[NR_CPUS];
+
+static inline struct qman_portal *get_affine_portal(void)
+{
+       return &get_cpu_var(qman_affine_portal);
+}
+
+static inline void put_affine_portal(void)
+{
+       put_cpu_var(qman_affine_portal);
+}
+
+static struct workqueue_struct *qm_portal_wq;
+
+int qman_wq_alloc(void)
+{
+       qm_portal_wq = alloc_workqueue("qman_portal_wq", 0, 1);
+       if (!qm_portal_wq)
+               return -ENOMEM;
+       return 0;
+}
+
+/*
+ * This is what everything can wait on, even if it migrates to a different cpu
+ * to the one whose affine portal it is waiting on.
+ */
+static DECLARE_WAIT_QUEUE_HEAD(affine_queue);
+
+static struct qman_fq **fq_table;
+static u32 num_fqids;
+
+int qman_alloc_fq_table(u32 _num_fqids)
+{
+       num_fqids = _num_fqids;
+
+       fq_table = vzalloc(num_fqids * 2 * sizeof(struct qman_fq *));
+       if (!fq_table)
+               return -ENOMEM;
+
+       pr_debug("Allocated fq lookup table at %p, entry count %u\n",
+                fq_table, num_fqids * 2);
+       return 0;
+}
+
+static struct qman_fq *idx_to_fq(u32 idx)
+{
+       struct qman_fq *fq;
+
+#ifdef CONFIG_FSL_DPAA_CHECKING
+       if (WARN_ON(idx >= num_fqids * 2))
+               return NULL;
+#endif
+       fq = fq_table[idx];
+       DPAA_ASSERT(!fq || idx == fq->idx);
+
+       return fq;
+}
+
+/*
+ * Only returns full-service fq objects, not enqueue-only
+ * references (QMAN_FQ_FLAG_NO_MODIFY).
+ */
+static struct qman_fq *fqid_to_fq(u32 fqid)
+{
+       return idx_to_fq(fqid * 2);
+}
+
+static struct qman_fq *tag_to_fq(u32 tag)
+{
+#if BITS_PER_LONG == 64
+       return idx_to_fq(tag);
+#else
+       return (struct qman_fq *)tag;
+#endif
+}
+
+static u32 fq_to_tag(struct qman_fq *fq)
+{
+#if BITS_PER_LONG == 64
+       return fq->idx;
+#else
+       return (u32)fq;
+#endif
+}
+
+static u32 __poll_portal_slow(struct qman_portal *p, u32 is);
+static inline unsigned int __poll_portal_fast(struct qman_portal *p,
+                                       unsigned int poll_limit);
+static void qm_congestion_task(struct work_struct *work);
+static void qm_mr_process_task(struct work_struct *work);
+
+static irqreturn_t portal_isr(int irq, void *ptr)
+{
+       struct qman_portal *p = ptr;
+
+       u32 clear = QM_DQAVAIL_MASK | p->irq_sources;
+       u32 is = qm_in(&p->p, QM_REG_ISR) & p->irq_sources;
+
+       if (unlikely(!is))
+               return IRQ_NONE;
+
+       /* DQRR-handling if it's interrupt-driven */
+       if (is & QM_PIRQ_DQRI)
+               __poll_portal_fast(p, QMAN_POLL_LIMIT);
+       /* Handling of anything else that's interrupt-driven */
+       clear |= __poll_portal_slow(p, is);
+       qm_out(&p->p, QM_REG_ISR, clear);
+       return IRQ_HANDLED;
+}
+
+static int drain_mr_fqrni(struct qm_portal *p)
+{
+       const union qm_mr_entry *msg;
+loop:
+       msg = qm_mr_current(p);
+       if (!msg) {
+               /*
+                * if MR was full and h/w had other FQRNI entries to produce, we
+                * need to allow it time to produce those entries once the
+                * existing entries are consumed. A worst-case situation
+                * (fully-loaded system) means h/w sequencers may have to do 3-4
+                * other things before servicing the portal's MR pump, each of
+                * which (if slow) may take ~50 qman cycles (which is ~200
+                * processor cycles). So rounding up and then multiplying this
+                * worst-case estimate by a factor of 10, just to be
+                * ultra-paranoid, goes as high as 10,000 cycles. NB, we consume
+                * one entry at a time, so h/w has an opportunity to produce new
+                * entries well before the ring has been fully consumed, so
+                * we're being *really* paranoid here.
+                */
+               u64 now, then = jiffies;
+
+               do {
+                       now = jiffies;
+               } while ((then + 10000) > now);
+               msg = qm_mr_current(p);
+               if (!msg)
+                       return 0;
+       }
+       if ((msg->verb & QM_MR_VERB_TYPE_MASK) != QM_MR_VERB_FQRNI) {
+               /* We aren't draining anything but FQRNIs */
+               pr_err("Found verb 0x%x in MR\n", msg->verb);
+               return -1;
+       }
+       qm_mr_next(p);
+       qm_mr_cci_consume(p, 1);
+       goto loop;
+}
+
+static int qman_create_portal(struct qman_portal *portal,
+                             const struct qm_portal_config *c,
+                             const struct qman_cgrs *cgrs)
+{
+       struct qm_portal *p;
+       char buf[16];
+       int ret;
+       u32 isdr;
+
+       p = &portal->p;
+
+#ifdef CONFIG_FSL_PAMU
+       /* PAMU is required for stashing */
+       portal->use_eqcr_ci_stashing = ((qman_ip_rev >= QMAN_REV30) ? 1 : 0);
+#else
+       portal->use_eqcr_ci_stashing = 0;
+#endif
+       /*
+        * prep the low-level portal struct with the mapped addresses from the
+        * config, everything that follows depends on it and "config" is more
+        * for (de)reference
+        */
+       p->addr.ce = c->addr_virt[DPAA_PORTAL_CE];
+       p->addr.ci = c->addr_virt[DPAA_PORTAL_CI];
+       /*
+        * If CI-stashing is used, the current defaults use a threshold of 3,
+        * and stash with high-than-DQRR priority.
+        */
+       if (qm_eqcr_init(p, qm_eqcr_pvb,
+                       portal->use_eqcr_ci_stashing ? 3 : 0, 1)) {
+               dev_err(c->dev, "EQCR initialisation failed\n");
+               goto fail_eqcr;
+       }
+       if (qm_dqrr_init(p, c, qm_dqrr_dpush, qm_dqrr_pvb,
+                       qm_dqrr_cdc, DQRR_MAXFILL)) {
+               dev_err(c->dev, "DQRR initialisation failed\n");
+               goto fail_dqrr;
+       }
+       if (qm_mr_init(p, qm_mr_pvb, qm_mr_cci)) {
+               dev_err(c->dev, "MR initialisation failed\n");
+               goto fail_mr;
+       }
+       if (qm_mc_init(p)) {
+               dev_err(c->dev, "MC initialisation failed\n");
+               goto fail_mc;
+       }
+       /* static interrupt-gating controls */
+       qm_dqrr_set_ithresh(p, QMAN_PIRQ_DQRR_ITHRESH);
+       qm_mr_set_ithresh(p, QMAN_PIRQ_MR_ITHRESH);
+       qm_out(p, QM_REG_ITPR, QMAN_PIRQ_IPERIOD);
+       portal->cgrs = kmalloc(2 * sizeof(*cgrs), GFP_KERNEL);
+       if (!portal->cgrs)
+               goto fail_cgrs;
+       /* initial snapshot is no-depletion */
+       qman_cgrs_init(&portal->cgrs[1]);
+       if (cgrs)
+               portal->cgrs[0] = *cgrs;
+       else
+               /* if the given mask is NULL, assume all CGRs can be seen */
+               qman_cgrs_fill(&portal->cgrs[0]);
+       INIT_LIST_HEAD(&portal->cgr_cbs);
+       spin_lock_init(&portal->cgr_lock);
+       INIT_WORK(&portal->congestion_work, qm_congestion_task);
+       INIT_WORK(&portal->mr_work, qm_mr_process_task);
+       portal->bits = 0;
+       portal->sdqcr = QM_SDQCR_SOURCE_CHANNELS | QM_SDQCR_COUNT_UPTO3 |
+                       QM_SDQCR_DEDICATED_PRECEDENCE | QM_SDQCR_TYPE_PRIO_QOS |
+                       QM_SDQCR_TOKEN_SET(0xab) | QM_SDQCR_CHANNELS_DEDICATED;
+       sprintf(buf, "qportal-%d", c->channel);
+       portal->pdev = platform_device_alloc(buf, -1);
+       if (!portal->pdev)
+               goto fail_devalloc;
+       if (dma_set_mask(&portal->pdev->dev, DMA_BIT_MASK(40)))
+               goto fail_devadd;
+       ret = platform_device_add(portal->pdev);
+       if (ret)
+               goto fail_devadd;
+       isdr = 0xffffffff;
+       qm_out(p, QM_REG_ISDR, isdr);
+       portal->irq_sources = 0;
+       qm_out(p, QM_REG_IER, 0);
+       qm_out(p, QM_REG_ISR, 0xffffffff);
+       snprintf(portal->irqname, MAX_IRQNAME, IRQNAME, c->cpu);
+       if (request_irq(c->irq, portal_isr, 0, portal->irqname, portal)) {
+               dev_err(c->dev, "request_irq() failed\n");
+               goto fail_irq;
+       }
+       if (c->cpu != -1 && irq_can_set_affinity(c->irq) &&
+           irq_set_affinity(c->irq, cpumask_of(c->cpu))) {
+               dev_err(c->dev, "irq_set_affinity() failed\n");
+               goto fail_affinity;
+       }
+
+       /* Need EQCR to be empty before continuing */
+       isdr &= ~QM_PIRQ_EQCI;
+       qm_out(p, QM_REG_ISDR, isdr);
+       ret = qm_eqcr_get_fill(p);
+       if (ret) {
+               dev_err(c->dev, "EQCR unclean\n");
+               goto fail_eqcr_empty;
+       }
+       isdr &= ~(QM_PIRQ_DQRI | QM_PIRQ_MRI);
+       qm_out(p, QM_REG_ISDR, isdr);
+       if (qm_dqrr_current(p)) {
+               dev_err(c->dev, "DQRR unclean\n");
+               qm_dqrr_cdc_consume_n(p, 0xffff);
+       }
+       if (qm_mr_current(p) && drain_mr_fqrni(p)) {
+               /* special handling, drain just in case it's a few FQRNIs */
+               const union qm_mr_entry *e = qm_mr_current(p);
+
+               dev_err(c->dev, "MR dirty, VB 0x%x, rc 0x%x\n, addr 0x%x",
+                       e->verb, e->ern.rc, e->ern.fd.addr_lo);
+               goto fail_dqrr_mr_empty;
+       }
+       /* Success */
+       portal->config = c;
+       qm_out(p, QM_REG_ISDR, 0);
+       qm_out(p, QM_REG_IIR, 0);
+       /* Write a sane SDQCR */
+       qm_dqrr_sdqcr_set(p, portal->sdqcr);
+       return 0;
+
+fail_dqrr_mr_empty:
+fail_eqcr_empty:
+fail_affinity:
+       free_irq(c->irq, portal);
+fail_irq:
+       platform_device_del(portal->pdev);
+fail_devadd:
+       platform_device_put(portal->pdev);
+fail_devalloc:
+       kfree(portal->cgrs);
+fail_cgrs:
+       qm_mc_finish(p);
+fail_mc:
+       qm_mr_finish(p);
+fail_mr:
+       qm_dqrr_finish(p);
+fail_dqrr:
+       qm_eqcr_finish(p);
+fail_eqcr:
+       return -EIO;
+}
+
+struct qman_portal *qman_create_affine_portal(const struct qm_portal_config *c,
+                                             const struct qman_cgrs *cgrs)
+{
+       struct qman_portal *portal;
+       int err;
+
+       portal = &per_cpu(qman_affine_portal, c->cpu);
+       err = qman_create_portal(portal, c, cgrs);
+       if (err)
+               return NULL;
+
+       spin_lock(&affine_mask_lock);
+       cpumask_set_cpu(c->cpu, &affine_mask);
+       affine_channels[c->cpu] = c->channel;
+       affine_portals[c->cpu] = portal;
+       spin_unlock(&affine_mask_lock);
+
+       return portal;
+}
+
+static void qman_destroy_portal(struct qman_portal *qm)
+{
+       const struct qm_portal_config *pcfg;
+
+       /* Stop dequeues on the portal */
+       qm_dqrr_sdqcr_set(&qm->p, 0);
+
+       /*
+        * NB we do this to "quiesce" EQCR. If we add enqueue-completions or
+        * something related to QM_PIRQ_EQCI, this may need fixing.
+        * Also, due to the prefetching model used for CI updates in the enqueue
+        * path, this update will only invalidate the CI cacheline *after*
+        * working on it, so we need to call this twice to ensure a full update
+        * irrespective of where the enqueue processing was at when the teardown
+        * began.
+        */
+       qm_eqcr_cce_update(&qm->p);
+       qm_eqcr_cce_update(&qm->p);
+       pcfg = qm->config;
+
+       free_irq(pcfg->irq, qm);
+
+       kfree(qm->cgrs);
+       qm_mc_finish(&qm->p);
+       qm_mr_finish(&qm->p);
+       qm_dqrr_finish(&qm->p);
+       qm_eqcr_finish(&qm->p);
+
+       platform_device_del(qm->pdev);
+       platform_device_put(qm->pdev);
+
+       qm->config = NULL;
+}
+
+const struct qm_portal_config *qman_destroy_affine_portal(void)
+{
+       struct qman_portal *qm = get_affine_portal();
+       const struct qm_portal_config *pcfg;
+       int cpu;
+
+       pcfg = qm->config;
+       cpu = pcfg->cpu;
+
+       qman_destroy_portal(qm);
+
+       spin_lock(&affine_mask_lock);
+       cpumask_clear_cpu(cpu, &affine_mask);
+       spin_unlock(&affine_mask_lock);
+       put_affine_portal();
+       return pcfg;
+}
+
+/* Inline helper to reduce nesting in __poll_portal_slow() */
+static inline void fq_state_change(struct qman_portal *p, struct qman_fq *fq,
+                                  const union qm_mr_entry *msg, u8 verb)
+{
+       switch (verb) {
+       case QM_MR_VERB_FQRL:
+               DPAA_ASSERT(fq_isset(fq, QMAN_FQ_STATE_ORL));
+               fq_clear(fq, QMAN_FQ_STATE_ORL);
+               break;
+       case QM_MR_VERB_FQRN:
+               DPAA_ASSERT(fq->state == qman_fq_state_parked ||
+                           fq->state == qman_fq_state_sched);
+               DPAA_ASSERT(fq_isset(fq, QMAN_FQ_STATE_CHANGING));
+               fq_clear(fq, QMAN_FQ_STATE_CHANGING);
+               if (msg->fq.fqs & QM_MR_FQS_NOTEMPTY)
+                       fq_set(fq, QMAN_FQ_STATE_NE);
+               if (msg->fq.fqs & QM_MR_FQS_ORLPRESENT)
+                       fq_set(fq, QMAN_FQ_STATE_ORL);
+               fq->state = qman_fq_state_retired;
+               break;
+       case QM_MR_VERB_FQPN:
+               DPAA_ASSERT(fq->state == qman_fq_state_sched);
+               DPAA_ASSERT(fq_isclear(fq, QMAN_FQ_STATE_CHANGING));
+               fq->state = qman_fq_state_parked;
+       }
+}
+
+static void qm_congestion_task(struct work_struct *work)
+{
+       struct qman_portal *p = container_of(work, struct qman_portal,
+                                            congestion_work);
+       struct qman_cgrs rr, c;
+       union qm_mc_result *mcr;
+       struct qman_cgr *cgr;
+
+       spin_lock(&p->cgr_lock);
+       qm_mc_start(&p->p);
+       qm_mc_commit(&p->p, QM_MCC_VERB_QUERYCONGESTION);
+       if (!qm_mc_result_timeout(&p->p, &mcr)) {
+               spin_unlock(&p->cgr_lock);
+               dev_crit(p->config->dev, "QUERYCONGESTION timeout\n");
+               return;
+       }
+       /* mask out the ones I'm not interested in */
+       qman_cgrs_and(&rr, (struct qman_cgrs *)&mcr->querycongestion.state,
+                     &p->cgrs[0]);
+       /* check previous snapshot for delta, enter/exit congestion */
+       qman_cgrs_xor(&c, &rr, &p->cgrs[1]);
+       /* update snapshot */
+       qman_cgrs_cp(&p->cgrs[1], &rr);
+       /* Invoke callback */
+       list_for_each_entry(cgr, &p->cgr_cbs, node)
+               if (cgr->cb && qman_cgrs_get(&c, cgr->cgrid))
+                       cgr->cb(p, cgr, qman_cgrs_get(&rr, cgr->cgrid));
+       spin_unlock(&p->cgr_lock);
+}
+
+static void qm_mr_process_task(struct work_struct *work)
+{
+       struct qman_portal *p = container_of(work, struct qman_portal,
+                                            mr_work);
+       const union qm_mr_entry *msg;
+       struct qman_fq *fq;
+       u8 verb, num = 0;
+
+       preempt_disable();
+
+       while (1) {
+               qm_mr_pvb_update(&p->p);
+               msg = qm_mr_current(&p->p);
+               if (!msg)
+                       break;
+
+               verb = msg->verb & QM_MR_VERB_TYPE_MASK;
+               /* The message is a software ERN iff the 0x20 bit is clear */
+               if (verb & 0x20) {
+                       switch (verb) {
+                       case QM_MR_VERB_FQRNI:
+                               /* nada, we drop FQRNIs on the floor */
+                               break;
+                       case QM_MR_VERB_FQRN:
+                       case QM_MR_VERB_FQRL:
+                               /* Lookup in the retirement table */
+                               fq = fqid_to_fq(msg->fq.fqid);
+                               if (WARN_ON(!fq))
+                                       break;
+                               fq_state_change(p, fq, msg, verb);
+                               if (fq->cb.fqs)
+                                       fq->cb.fqs(p, fq, msg);
+                               break;
+                       case QM_MR_VERB_FQPN:
+                               /* Parked */
+                               fq = tag_to_fq(msg->fq.contextB);
+                               fq_state_change(p, fq, msg, verb);
+                               if (fq->cb.fqs)
+                                       fq->cb.fqs(p, fq, msg);
+                               break;
+                       case QM_MR_VERB_DC_ERN:
+                               /* DCP ERN */
+                               pr_crit_once("Leaking DCP ERNs!\n");
+                               break;
+                       default:
+                               pr_crit("Invalid MR verb 0x%02x\n", verb);
+                       }
+               } else {
+                       /* Its a software ERN */
+                       fq = tag_to_fq(msg->ern.tag);
+                       fq->cb.ern(p, fq, msg);
+               }
+               num++;
+               qm_mr_next(&p->p);
+       }
+
+       qm_mr_cci_consume(&p->p, num);
+       preempt_enable();
+}
+
+static u32 __poll_portal_slow(struct qman_portal *p, u32 is)
+{
+       if (is & QM_PIRQ_CSCI) {
+               queue_work_on(smp_processor_id(), qm_portal_wq,
+                             &p->congestion_work);
+       }
+
+       if (is & QM_PIRQ_EQRI) {
+               qm_eqcr_cce_update(&p->p);
+               qm_eqcr_set_ithresh(&p->p, 0);
+               wake_up(&affine_queue);
+       }
+
+       if (is & QM_PIRQ_MRI) {
+               queue_work_on(smp_processor_id(), qm_portal_wq,
+                             &p->mr_work);
+       }
+
+       return is;
+}
+
+/*
+ * remove some slowish-path stuff from the "fast path" and make sure it isn't
+ * inlined.
+ */
+static noinline void clear_vdqcr(struct qman_portal *p, struct qman_fq *fq)
+{
+       p->vdqcr_owned = NULL;
+       fq_clear(fq, QMAN_FQ_STATE_VDQCR);
+       wake_up(&affine_queue);
+}
+
+/*
+ * The only states that would conflict with other things if they ran at the
+ * same time on the same cpu are:
+ *
+ *   (i) setting/clearing vdqcr_owned, and
+ *  (ii) clearing the NE (Not Empty) flag.
+ *
+ * Both are safe. Because;
+ *
+ *   (i) this clearing can only occur after qman_volatile_dequeue() has set the
+ *      vdqcr_owned field (which it does before setting VDQCR), and
+ *      qman_volatile_dequeue() blocks interrupts and preemption while this is
+ *      done so that we can't interfere.
+ *  (ii) the NE flag is only cleared after qman_retire_fq() has set it, and as
+ *      with (i) that API prevents us from interfering until it's safe.
+ *
+ * The good thing is that qman_volatile_dequeue() and qman_retire_fq() run far
+ * less frequently (ie. per-FQ) than __poll_portal_fast() does, so the nett
+ * advantage comes from this function not having to "lock" anything at all.
+ *
+ * Note also that the callbacks are invoked at points which are safe against the
+ * above potential conflicts, but that this function itself is not re-entrant
+ * (this is because the function tracks one end of each FIFO in the portal and
+ * we do *not* want to lock that). So the consequence is that it is safe for
+ * user callbacks to call into any QMan API.
+ */
+static inline unsigned int __poll_portal_fast(struct qman_portal *p,
+                                       unsigned int poll_limit)
+{
+       const struct qm_dqrr_entry *dq;
+       struct qman_fq *fq;
+       enum qman_cb_dqrr_result res;
+       unsigned int limit = 0;
+
+       do {
+               qm_dqrr_pvb_update(&p->p);
+               dq = qm_dqrr_current(&p->p);
+               if (!dq)
+                       break;
+
+               if (dq->stat & QM_DQRR_STAT_UNSCHEDULED) {
+                       /*
+                        * VDQCR: don't trust contextB as the FQ may have
+                        * been configured for h/w consumption and we're
+                        * draining it post-retirement.
+                        */
+                       fq = p->vdqcr_owned;
+                       /*
+                        * We only set QMAN_FQ_STATE_NE when retiring, so we
+                        * only need to check for clearing it when doing
+                        * volatile dequeues.  It's one less thing to check
+                        * in the critical path (SDQCR).
+                        */
+                       if (dq->stat & QM_DQRR_STAT_FQ_EMPTY)
+                               fq_clear(fq, QMAN_FQ_STATE_NE);
+                       /*
+                        * This is duplicated from the SDQCR code, but we
+                        * have stuff to do before *and* after this callback,
+                        * and we don't want multiple if()s in the critical
+                        * path (SDQCR).
+                        */
+                       res = fq->cb.dqrr(p, fq, dq);
+                       if (res == qman_cb_dqrr_stop)
+                               break;
+                       /* Check for VDQCR completion */
+                       if (dq->stat & QM_DQRR_STAT_DQCR_EXPIRED)
+                               clear_vdqcr(p, fq);
+               } else {
+                       /* SDQCR: contextB points to the FQ */
+                       fq = tag_to_fq(dq->contextB);
+                       /* Now let the callback do its stuff */
+                       res = fq->cb.dqrr(p, fq, dq);
+                       /*
+                        * The callback can request that we exit without
+                        * consuming this entry nor advancing;
+                        */
+                       if (res == qman_cb_dqrr_stop)
+                               break;
+               }
+               /* Interpret 'dq' from a driver perspective. */
+               /*
+                * Parking isn't possible unless HELDACTIVE was set. NB,
+                * FORCEELIGIBLE implies HELDACTIVE, so we only need to
+                * check for HELDACTIVE to cover both.
+                */
+               DPAA_ASSERT((dq->stat & QM_DQRR_STAT_FQ_HELDACTIVE) ||
+                           (res != qman_cb_dqrr_park));
+               /* just means "skip it, I'll consume it myself later on" */
+               if (res != qman_cb_dqrr_defer)
+                       qm_dqrr_cdc_consume_1ptr(&p->p, dq,
+                                                res == qman_cb_dqrr_park);
+               /* Move forward */
+               qm_dqrr_next(&p->p);
+               /*
+                * Entry processed and consumed, increment our counter.  The
+                * callback can request that we exit after consuming the
+                * entry, and we also exit if we reach our processing limit,
+                * so loop back only if neither of these conditions is met.
+                */
+       } while (++limit < poll_limit && res != qman_cb_dqrr_consume_stop);
+
+       return limit;
+}
+
+void qman_p_irqsource_add(struct qman_portal *p, u32 bits)
+{
+       unsigned long irqflags;
+
+       local_irq_save(irqflags);
+       set_bits(bits & QM_PIRQ_VISIBLE, &p->irq_sources);
+       qm_out(&p->p, QM_REG_IER, p->irq_sources);
+       local_irq_restore(irqflags);
+}
+EXPORT_SYMBOL(qman_p_irqsource_add);
+
+void qman_p_irqsource_remove(struct qman_portal *p, u32 bits)
+{
+       unsigned long irqflags;
+       u32 ier;
+
+       /*
+        * Our interrupt handler only processes+clears status register bits that
+        * are in p->irq_sources. As we're trimming that mask, if one of them
+        * were to assert in the status register just before we remove it from
+        * the enable register, there would be an interrupt-storm when we
+        * release the IRQ lock. So we wait for the enable register update to
+        * take effect in h/w (by reading it back) and then clear all other bits
+        * in the status register. Ie. we clear them from ISR once it's certain
+        * IER won't allow them to reassert.
+        */
+       local_irq_save(irqflags);
+       bits &= QM_PIRQ_VISIBLE;
+       clear_bits(bits, &p->irq_sources);
+       qm_out(&p->p, QM_REG_IER, p->irq_sources);
+       ier = qm_in(&p->p, QM_REG_IER);
+       /*
+        * Using "~ier" (rather than "bits" or "~p->irq_sources") creates a
+        * data-dependency, ie. to protect against re-ordering.
+        */
+       qm_out(&p->p, QM_REG_ISR, ~ier);
+       local_irq_restore(irqflags);
+}
+EXPORT_SYMBOL(qman_p_irqsource_remove);
+
+const cpumask_t *qman_affine_cpus(void)
+{
+       return &affine_mask;
+}
+EXPORT_SYMBOL(qman_affine_cpus);
+
+u16 qman_affine_channel(int cpu)
+{
+       if (cpu < 0) {
+               struct qman_portal *portal = get_affine_portal();
+
+               cpu = portal->config->cpu;
+               put_affine_portal();
+       }
+       WARN_ON(!cpumask_test_cpu(cpu, &affine_mask));
+       return affine_channels[cpu];
+}
+EXPORT_SYMBOL(qman_affine_channel);
+
+struct qman_portal *qman_get_affine_portal(int cpu)
+{
+       return affine_portals[cpu];
+}
+EXPORT_SYMBOL(qman_get_affine_portal);
+
+int qman_p_poll_dqrr(struct qman_portal *p, unsigned int limit)
+{
+       return __poll_portal_fast(p, limit);
+}
+EXPORT_SYMBOL(qman_p_poll_dqrr);
+
+void qman_p_static_dequeue_add(struct qman_portal *p, u32 pools)
+{
+       unsigned long irqflags;
+
+       local_irq_save(irqflags);
+       pools &= p->config->pools;
+       p->sdqcr |= pools;
+       qm_dqrr_sdqcr_set(&p->p, p->sdqcr);
+       local_irq_restore(irqflags);
+}
+EXPORT_SYMBOL(qman_p_static_dequeue_add);
+
+/* Frame queue API */
+
+static const char *mcr_result_str(u8 result)
+{
+       switch (result) {
+       case QM_MCR_RESULT_NULL:
+               return "QM_MCR_RESULT_NULL";
+       case QM_MCR_RESULT_OK:
+               return "QM_MCR_RESULT_OK";
+       case QM_MCR_RESULT_ERR_FQID:
+               return "QM_MCR_RESULT_ERR_FQID";
+       case QM_MCR_RESULT_ERR_FQSTATE:
+               return "QM_MCR_RESULT_ERR_FQSTATE";
+       case QM_MCR_RESULT_ERR_NOTEMPTY:
+               return "QM_MCR_RESULT_ERR_NOTEMPTY";
+       case QM_MCR_RESULT_PENDING:
+               return "QM_MCR_RESULT_PENDING";
+       case QM_MCR_RESULT_ERR_BADCOMMAND:
+               return "QM_MCR_RESULT_ERR_BADCOMMAND";
+       }
+       return "<unknown MCR result>";
+}
+
+int qman_create_fq(u32 fqid, u32 flags, struct qman_fq *fq)
+{
+       if (flags & QMAN_FQ_FLAG_DYNAMIC_FQID) {
+               int ret = qman_alloc_fqid(&fqid);
+
+               if (ret)
+                       return ret;
+       }
+       fq->fqid = fqid;
+       fq->flags = flags;
+       fq->state = qman_fq_state_oos;
+       fq->cgr_groupid = 0;
+
+       /* A context_b of 0 is allegedly special, so don't use that fqid */
+       if (fqid == 0 || fqid >= num_fqids) {
+               WARN(1, "bad fqid %d\n", fqid);
+               return -EINVAL;
+       }
+
+       fq->idx = fqid * 2;
+       if (flags & QMAN_FQ_FLAG_NO_MODIFY)
+               fq->idx++;
+
+       WARN_ON(fq_table[fq->idx]);
+       fq_table[fq->idx] = fq;
+
+       return 0;
+}
+EXPORT_SYMBOL(qman_create_fq);
+
+void qman_destroy_fq(struct qman_fq *fq)
+{
+       /*
+        * We don't need to lock the FQ as it is a pre-condition that the FQ be
+        * quiesced. Instead, run some checks.
+        */
+       switch (fq->state) {
+       case qman_fq_state_parked:
+       case qman_fq_state_oos:
+               if (fq_isset(fq, QMAN_FQ_FLAG_DYNAMIC_FQID))
+                       qman_release_fqid(fq->fqid);
+
+               DPAA_ASSERT(fq_table[fq->idx]);
+               fq_table[fq->idx] = NULL;
+               return;
+       default:
+               break;
+       }
+       DPAA_ASSERT(NULL == "qman_free_fq() on unquiesced FQ!");
+}
+EXPORT_SYMBOL(qman_destroy_fq);
+
+u32 qman_fq_fqid(struct qman_fq *fq)
+{
+       return fq->fqid;
+}
+EXPORT_SYMBOL(qman_fq_fqid);
+
+int qman_init_fq(struct qman_fq *fq, u32 flags, struct qm_mcc_initfq *opts)
+{
+       union qm_mc_command *mcc;
+       union qm_mc_result *mcr;
+       struct qman_portal *p;
+       u8 res, myverb;
+       int ret = 0;
+
+       myverb = (flags & QMAN_INITFQ_FLAG_SCHED)
+               ? QM_MCC_VERB_INITFQ_SCHED : QM_MCC_VERB_INITFQ_PARKED;
+
+       if (fq->state != qman_fq_state_oos &&
+           fq->state != qman_fq_state_parked)
+               return -EINVAL;
+#ifdef CONFIG_FSL_DPAA_CHECKING
+       if (fq_isset(fq, QMAN_FQ_FLAG_NO_MODIFY))
+               return -EINVAL;
+#endif
+       if (opts && (opts->we_mask & QM_INITFQ_WE_OAC)) {
+               /* And can't be set at the same time as TDTHRESH */
+               if (opts->we_mask & QM_INITFQ_WE_TDTHRESH)
+                       return -EINVAL;
+       }
+       /* Issue an INITFQ_[PARKED|SCHED] management command */
+       p = get_affine_portal();
+       if (fq_isset(fq, QMAN_FQ_STATE_CHANGING) ||
+           (fq->state != qman_fq_state_oos &&
+            fq->state != qman_fq_state_parked)) {
+               ret = -EBUSY;
+               goto out;
+       }
+       mcc = qm_mc_start(&p->p);
+       if (opts)
+               mcc->initfq = *opts;
+       mcc->initfq.fqid = fq->fqid;
+       mcc->initfq.count = 0;
+       /*
+        * If the FQ does *not* have the TO_DCPORTAL flag, contextB is set as a
+        * demux pointer. Otherwise, the caller-provided value is allowed to
+        * stand, don't overwrite it.
+        */
+       if (fq_isclear(fq, QMAN_FQ_FLAG_TO_DCPORTAL)) {
+               dma_addr_t phys_fq;
+
+               mcc->initfq.we_mask |= QM_INITFQ_WE_CONTEXTB;
+               mcc->initfq.fqd.context_b = fq_to_tag(fq);
+               /*
+                *  and the physical address - NB, if the user wasn't trying to
+                * set CONTEXTA, clear the stashing settings.
+                */
+               if (!(mcc->initfq.we_mask & QM_INITFQ_WE_CONTEXTA)) {
+                       mcc->initfq.we_mask |= QM_INITFQ_WE_CONTEXTA;
+                       memset(&mcc->initfq.fqd.context_a, 0,
+                               sizeof(mcc->initfq.fqd.context_a));
+               } else {
+                       phys_fq = dma_map_single(&p->pdev->dev, fq, sizeof(*fq),
+                                                DMA_TO_DEVICE);
+                       qm_fqd_stashing_set64(&mcc->initfq.fqd, phys_fq);
+               }
+       }
+       if (flags & QMAN_INITFQ_FLAG_LOCAL) {
+               int wq = 0;
+
+               if (!(mcc->initfq.we_mask & QM_INITFQ_WE_DESTWQ)) {
+                       mcc->initfq.we_mask |= QM_INITFQ_WE_DESTWQ;
+                       wq = 4;
+               }
+               qm_fqd_set_destwq(&mcc->initfq.fqd, p->config->channel, wq);
+       }
+       qm_mc_commit(&p->p, myverb);
+       if (!qm_mc_result_timeout(&p->p, &mcr)) {
+               dev_err(p->config->dev, "MCR timeout\n");
+               ret = -ETIMEDOUT;
+               goto out;
+       }
+
+       DPAA_ASSERT((mcr->verb & QM_MCR_VERB_MASK) == myverb);
+       res = mcr->result;
+       if (res != QM_MCR_RESULT_OK) {
+               ret = -EIO;
+               goto out;
+       }
+       if (opts) {
+               if (opts->we_mask & QM_INITFQ_WE_FQCTRL) {
+                       if (opts->fqd.fq_ctrl & QM_FQCTRL_CGE)
+                               fq_set(fq, QMAN_FQ_STATE_CGR_EN);
+                       else
+                               fq_clear(fq, QMAN_FQ_STATE_CGR_EN);
+               }
+               if (opts->we_mask & QM_INITFQ_WE_CGID)
+                       fq->cgr_groupid = opts->fqd.cgid;
+       }
+       fq->state = (flags & QMAN_INITFQ_FLAG_SCHED) ?
+               qman_fq_state_sched : qman_fq_state_parked;
+
+out:
+       put_affine_portal();
+       return ret;
+}
+EXPORT_SYMBOL(qman_init_fq);
+
+int qman_schedule_fq(struct qman_fq *fq)
+{
+       union qm_mc_command *mcc;
+       union qm_mc_result *mcr;
+       struct qman_portal *p;
+       int ret = 0;
+
+       if (fq->state != qman_fq_state_parked)
+               return -EINVAL;
+#ifdef CONFIG_FSL_DPAA_CHECKING
+       if (fq_isset(fq, QMAN_FQ_FLAG_NO_MODIFY))
+               return -EINVAL;
+#endif
+       /* Issue a ALTERFQ_SCHED management command */
+       p = get_affine_portal();
+       if (fq_isset(fq, QMAN_FQ_STATE_CHANGING) ||
+           fq->state != qman_fq_state_parked) {
+               ret = -EBUSY;
+               goto out;
+       }
+       mcc = qm_mc_start(&p->p);
+       mcc->alterfq.fqid = fq->fqid;
+       qm_mc_commit(&p->p, QM_MCC_VERB_ALTER_SCHED);
+       if (!qm_mc_result_timeout(&p->p, &mcr)) {
+               dev_err(p->config->dev, "ALTER_SCHED timeout\n");
+               ret = -ETIMEDOUT;
+               goto out;
+       }
+
+       DPAA_ASSERT((mcr->verb & QM_MCR_VERB_MASK) == QM_MCR_VERB_ALTER_SCHED);
+       if (mcr->result != QM_MCR_RESULT_OK) {
+               ret = -EIO;
+               goto out;
+       }
+       fq->state = qman_fq_state_sched;
+out:
+       put_affine_portal();
+       return ret;
+}
+EXPORT_SYMBOL(qman_schedule_fq);
+
+int qman_retire_fq(struct qman_fq *fq, u32 *flags)
+{
+       union qm_mc_command *mcc;
+       union qm_mc_result *mcr;
+       struct qman_portal *p;
+       int ret;
+       u8 res;
+
+       if (fq->state != qman_fq_state_parked &&
+           fq->state != qman_fq_state_sched)
+               return -EINVAL;
+#ifdef CONFIG_FSL_DPAA_CHECKING
+       if (fq_isset(fq, QMAN_FQ_FLAG_NO_MODIFY))
+               return -EINVAL;
+#endif
+       p = get_affine_portal();
+       if (fq_isset(fq, QMAN_FQ_STATE_CHANGING) ||
+           fq->state == qman_fq_state_retired ||
+           fq->state == qman_fq_state_oos) {
+               ret = -EBUSY;
+               goto out;
+       }
+       mcc = qm_mc_start(&p->p);
+       mcc->alterfq.fqid = fq->fqid;
+       qm_mc_commit(&p->p, QM_MCC_VERB_ALTER_RETIRE);
+       if (!qm_mc_result_timeout(&p->p, &mcr)) {
+               dev_crit(p->config->dev, "ALTER_RETIRE timeout\n");
+               ret = -ETIMEDOUT;
+               goto out;
+       }
+
+       DPAA_ASSERT((mcr->verb & QM_MCR_VERB_MASK) == QM_MCR_VERB_ALTER_RETIRE);
+       res = mcr->result;
+       /*
+        * "Elegant" would be to treat OK/PENDING the same way; set CHANGING,
+        * and defer the flags until FQRNI or FQRN (respectively) show up. But
+        * "Friendly" is to process OK immediately, and not set CHANGING. We do
+        * friendly, otherwise the caller doesn't necessarily have a fully
+        * "retired" FQ on return even if the retirement was immediate. However
+        * this does mean some code duplication between here and
+        * fq_state_change().
+        */
+       if (res == QM_MCR_RESULT_OK) {
+               ret = 0;
+               /* Process 'fq' right away, we'll ignore FQRNI */
+               if (mcr->alterfq.fqs & QM_MCR_FQS_NOTEMPTY)
+                       fq_set(fq, QMAN_FQ_STATE_NE);
+               if (mcr->alterfq.fqs & QM_MCR_FQS_ORLPRESENT)
+                       fq_set(fq, QMAN_FQ_STATE_ORL);
+               if (flags)
+                       *flags = fq->flags;
+               fq->state = qman_fq_state_retired;
+               if (fq->cb.fqs) {
+                       /*
+                        * Another issue with supporting "immediate" retirement
+                        * is that we're forced to drop FQRNIs, because by the
+                        * time they're seen it may already be "too late" (the
+                        * fq may have been OOS'd and free()'d already). But if
+                        * the upper layer wants a callback whether it's
+                        * immediate or not, we have to fake a "MR" entry to
+                        * look like an FQRNI...
+                        */
+                       union qm_mr_entry msg;
+
+                       msg.verb = QM_MR_VERB_FQRNI;
+                       msg.fq.fqs = mcr->alterfq.fqs;
+                       msg.fq.fqid = fq->fqid;
+                       msg.fq.contextB = fq_to_tag(fq);
+                       fq->cb.fqs(p, fq, &msg);
+               }
+       } else if (res == QM_MCR_RESULT_PENDING) {
+               ret = 1;
+               fq_set(fq, QMAN_FQ_STATE_CHANGING);
+       } else {
+               ret = -EIO;
+       }
+out:
+       put_affine_portal();
+       return ret;
+}
+EXPORT_SYMBOL(qman_retire_fq);
+
+int qman_oos_fq(struct qman_fq *fq)
+{
+       union qm_mc_command *mcc;
+       union qm_mc_result *mcr;
+       struct qman_portal *p;
+       int ret = 0;
+
+       if (fq->state != qman_fq_state_retired)
+               return -EINVAL;
+#ifdef CONFIG_FSL_DPAA_CHECKING
+       if (fq_isset(fq, QMAN_FQ_FLAG_NO_MODIFY))
+               return -EINVAL;
+#endif
+       p = get_affine_portal();
+       if (fq_isset(fq, QMAN_FQ_STATE_BLOCKOOS) ||
+           fq->state != qman_fq_state_retired) {
+               ret = -EBUSY;
+               goto out;
+       }
+       mcc = qm_mc_start(&p->p);
+       mcc->alterfq.fqid = fq->fqid;
+       qm_mc_commit(&p->p, QM_MCC_VERB_ALTER_OOS);
+       if (!qm_mc_result_timeout(&p->p, &mcr)) {
+               ret = -ETIMEDOUT;
+               goto out;
+       }
+       DPAA_ASSERT((mcr->verb & QM_MCR_VERB_MASK) == QM_MCR_VERB_ALTER_OOS);
+       if (mcr->result != QM_MCR_RESULT_OK) {
+               ret = -EIO;
+               goto out;
+       }
+       fq->state = qman_fq_state_oos;
+out:
+       put_affine_portal();
+       return ret;
+}
+EXPORT_SYMBOL(qman_oos_fq);
+
+int qman_query_fq(struct qman_fq *fq, struct qm_fqd *fqd)
+{
+       union qm_mc_command *mcc;
+       union qm_mc_result *mcr;
+       struct qman_portal *p = get_affine_portal();
+       int ret = 0;
+
+       mcc = qm_mc_start(&p->p);
+       mcc->queryfq.fqid = fq->fqid;
+       qm_mc_commit(&p->p, QM_MCC_VERB_QUERYFQ);
+       if (!qm_mc_result_timeout(&p->p, &mcr)) {
+               ret = -ETIMEDOUT;
+               goto out;
+       }
+
+       DPAA_ASSERT((mcr->verb & QM_MCR_VERB_MASK) == QM_MCR_VERB_QUERYFQ);
+       if (mcr->result == QM_MCR_RESULT_OK)
+               *fqd = mcr->queryfq.fqd;
+       else
+               ret = -EIO;
+out:
+       put_affine_portal();
+       return ret;
+}
+
+static int qman_query_fq_np(struct qman_fq *fq,
+                           struct qm_mcr_queryfq_np *np)
+{
+       union qm_mc_command *mcc;
+       union qm_mc_result *mcr;
+       struct qman_portal *p = get_affine_portal();
+       int ret = 0;
+
+       mcc = qm_mc_start(&p->p);
+       mcc->queryfq.fqid = fq->fqid;
+       qm_mc_commit(&p->p, QM_MCC_VERB_QUERYFQ_NP);
+       if (!qm_mc_result_timeout(&p->p, &mcr)) {
+               ret = -ETIMEDOUT;
+               goto out;
+       }
+
+       DPAA_ASSERT((mcr->verb & QM_MCR_VERB_MASK) == QM_MCR_VERB_QUERYFQ_NP);
+       if (mcr->result == QM_MCR_RESULT_OK)
+               *np = mcr->queryfq_np;
+       else if (mcr->result == QM_MCR_RESULT_ERR_FQID)
+               ret = -ERANGE;
+       else
+               ret = -EIO;
+out:
+       put_affine_portal();
+       return ret;
+}
+
+static int qman_query_cgr(struct qman_cgr *cgr,
+                         struct qm_mcr_querycgr *cgrd)
+{
+       union qm_mc_command *mcc;
+       union qm_mc_result *mcr;
+       struct qman_portal *p = get_affine_portal();
+       int ret = 0;
+
+       mcc = qm_mc_start(&p->p);
+       mcc->querycgr.cgid = cgr->cgrid;
+       qm_mc_commit(&p->p, QM_MCC_VERB_QUERYCGR);
+       if (!qm_mc_result_timeout(&p->p, &mcr)) {
+               ret = -ETIMEDOUT;
+               goto out;
+       }
+       DPAA_ASSERT((mcr->verb & QM_MCR_VERB_MASK) == QM_MCC_VERB_QUERYCGR);
+       if (mcr->result == QM_MCR_RESULT_OK)
+               *cgrd = mcr->querycgr;
+       else {
+               dev_err(p->config->dev, "QUERY_CGR failed: %s\n",
+                       mcr_result_str(mcr->result));
+               ret = -EIO;
+       }
+out:
+       put_affine_portal();
+       return ret;
+}
+
+int qman_query_cgr_congested(struct qman_cgr *cgr, bool *result)
+{
+       struct qm_mcr_querycgr query_cgr;
+       int err;
+
+       err = qman_query_cgr(cgr, &query_cgr);
+       if (err)
+               return err;
+
+       *result = !!query_cgr.cgr.cs;
+       return 0;
+}
+EXPORT_SYMBOL(qman_query_cgr_congested);
+
+/* internal function used as a wait_event() expression */
+static int set_p_vdqcr(struct qman_portal *p, struct qman_fq *fq, u32 vdqcr)
+{
+       unsigned long irqflags;
+       int ret = -EBUSY;
+
+       local_irq_save(irqflags);
+       if (p->vdqcr_owned)
+               goto out;
+       if (fq_isset(fq, QMAN_FQ_STATE_VDQCR))
+               goto out;
+
+       fq_set(fq, QMAN_FQ_STATE_VDQCR);
+       p->vdqcr_owned = fq;
+       qm_dqrr_vdqcr_set(&p->p, vdqcr);
+       ret = 0;
+out:
+       local_irq_restore(irqflags);
+       return ret;
+}
+
+static int set_vdqcr(struct qman_portal **p, struct qman_fq *fq, u32 vdqcr)
+{
+       int ret;
+
+       *p = get_affine_portal();
+       ret = set_p_vdqcr(*p, fq, vdqcr);
+       put_affine_portal();
+       return ret;
+}
+
+static int wait_vdqcr_start(struct qman_portal **p, struct qman_fq *fq,
+                               u32 vdqcr, u32 flags)
+{
+       int ret = 0;
+
+       if (flags & QMAN_VOLATILE_FLAG_WAIT_INT)
+               ret = wait_event_interruptible(affine_queue,
+                               !set_vdqcr(p, fq, vdqcr));
+       else
+               wait_event(affine_queue, !set_vdqcr(p, fq, vdqcr));
+       return ret;
+}
+
+int qman_volatile_dequeue(struct qman_fq *fq, u32 flags, u32 vdqcr)
+{
+       struct qman_portal *p;
+       int ret;
+
+       if (fq->state != qman_fq_state_parked &&
+           fq->state != qman_fq_state_retired)
+               return -EINVAL;
+       if (vdqcr & QM_VDQCR_FQID_MASK)
+               return -EINVAL;
+       if (fq_isset(fq, QMAN_FQ_STATE_VDQCR))
+               return -EBUSY;
+       vdqcr = (vdqcr & ~QM_VDQCR_FQID_MASK) | fq->fqid;
+       if (flags & QMAN_VOLATILE_FLAG_WAIT)
+               ret = wait_vdqcr_start(&p, fq, vdqcr, flags);
+       else
+               ret = set_vdqcr(&p, fq, vdqcr);
+       if (ret)
+               return ret;
+       /* VDQCR is set */
+       if (flags & QMAN_VOLATILE_FLAG_FINISH) {
+               if (flags & QMAN_VOLATILE_FLAG_WAIT_INT)
+                       /*
+                        * NB: don't propagate any error - the caller wouldn't
+                        * know whether the VDQCR was issued or not. A signal
+                        * could arrive after returning anyway, so the caller
+                        * can check signal_pending() if that's an issue.
+                        */
+                       wait_event_interruptible(affine_queue,
+                               !fq_isset(fq, QMAN_FQ_STATE_VDQCR));
+               else
+                       wait_event(affine_queue,
+                               !fq_isset(fq, QMAN_FQ_STATE_VDQCR));
+       }
+       return 0;
+}
+EXPORT_SYMBOL(qman_volatile_dequeue);
+
+static void update_eqcr_ci(struct qman_portal *p, u8 avail)
+{
+       if (avail)
+               qm_eqcr_cce_prefetch(&p->p);
+       else
+               qm_eqcr_cce_update(&p->p);
+}
+
+int qman_enqueue(struct qman_fq *fq, const struct qm_fd *fd)
+{
+       struct qman_portal *p;
+       struct qm_eqcr_entry *eq;
+       unsigned long irqflags;
+       u8 avail;
+
+       p = get_affine_portal();
+       local_irq_save(irqflags);
+
+       if (p->use_eqcr_ci_stashing) {
+               /*
+                * The stashing case is easy, only update if we need to in
+                * order to try and liberate ring entries.
+                */
+               eq = qm_eqcr_start_stash(&p->p);
+       } else {
+               /*
+                * The non-stashing case is harder, need to prefetch ahead of
+                * time.
+                */
+               avail = qm_eqcr_get_avail(&p->p);
+               if (avail < 2)
+                       update_eqcr_ci(p, avail);
+               eq = qm_eqcr_start_no_stash(&p->p);
+       }
+
+       if (unlikely(!eq))
+               goto out;
+
+       eq->fqid = fq->fqid;
+       eq->tag = fq_to_tag(fq);
+       eq->fd = *fd;
+
+       qm_eqcr_pvb_commit(&p->p, QM_EQCR_VERB_CMD_ENQUEUE);
+out:
+       local_irq_restore(irqflags);
+       put_affine_portal();
+       return 0;
+}
+EXPORT_SYMBOL(qman_enqueue);
+
+static int qm_modify_cgr(struct qman_cgr *cgr, u32 flags,
+                        struct qm_mcc_initcgr *opts)
+{
+       union qm_mc_command *mcc;
+       union qm_mc_result *mcr;
+       struct qman_portal *p = get_affine_portal();
+       u8 verb = QM_MCC_VERB_MODIFYCGR;
+       int ret = 0;
+
+       mcc = qm_mc_start(&p->p);
+       if (opts)
+               mcc->initcgr = *opts;
+       mcc->initcgr.cgid = cgr->cgrid;
+       if (flags & QMAN_CGR_FLAG_USE_INIT)
+               verb = QM_MCC_VERB_INITCGR;
+       qm_mc_commit(&p->p, verb);
+       if (!qm_mc_result_timeout(&p->p, &mcr)) {
+               ret = -ETIMEDOUT;
+               goto out;
+       }
+
+       DPAA_ASSERT((mcr->verb & QM_MCR_VERB_MASK) == verb);
+       if (mcr->result != QM_MCR_RESULT_OK)
+               ret = -EIO;
+
+out:
+       put_affine_portal();
+       return ret;
+}
+
+#define PORTAL_IDX(n)  (n->config->channel - QM_CHANNEL_SWPORTAL0)
+#define TARG_MASK(n)   (BIT(31) >> PORTAL_IDX(n))
+
+static u8 qman_cgr_cpus[CGR_NUM];
+
+void qman_init_cgr_all(void)
+{
+       struct qman_cgr cgr;
+       int err_cnt = 0;
+
+       for (cgr.cgrid = 0; cgr.cgrid < CGR_NUM; cgr.cgrid++) {
+               if (qm_modify_cgr(&cgr, QMAN_CGR_FLAG_USE_INIT, NULL))
+                       err_cnt++;
+       }
+
+       if (err_cnt)
+               pr_err("Warning: %d error%s while initialising CGR h/w\n",
+                      err_cnt, (err_cnt > 1) ? "s" : "");
+}
+
+int qman_create_cgr(struct qman_cgr *cgr, u32 flags,
+                   struct qm_mcc_initcgr *opts)
+{
+       struct qm_mcr_querycgr cgr_state;
+       struct qm_mcc_initcgr local_opts = {};
+       int ret;
+       struct qman_portal *p;
+
+       /*
+        * We have to check that the provided CGRID is within the limits of the
+        * data-structures, for obvious reasons. However we'll let h/w take
+        * care of determining whether it's within the limits of what exists on
+        * the SoC.
+        */
+       if (cgr->cgrid >= CGR_NUM)
+               return -EINVAL;
+
+       preempt_disable();
+       p = get_affine_portal();
+       qman_cgr_cpus[cgr->cgrid] = smp_processor_id();
+       preempt_enable();
+
+       cgr->chan = p->config->channel;
+       spin_lock(&p->cgr_lock);
+
+       if (opts) {
+               ret = qman_query_cgr(cgr, &cgr_state);
+               if (ret)
+                       goto out;
+               if (opts)
+                       local_opts = *opts;
+               if ((qman_ip_rev & 0xFF00) >= QMAN_REV30)
+                       local_opts.cgr.cscn_targ_upd_ctrl =
+                               QM_CGR_TARG_UDP_CTRL_WRITE_BIT | PORTAL_IDX(p);
+               else
+                       /* Overwrite TARG */
+                       local_opts.cgr.cscn_targ = cgr_state.cgr.cscn_targ |
+                                                  TARG_MASK(p);
+               local_opts.we_mask |= QM_CGR_WE_CSCN_TARG;
+
+               /* send init if flags indicate so */
+               if (opts && (flags & QMAN_CGR_FLAG_USE_INIT))
+                       ret = qm_modify_cgr(cgr, QMAN_CGR_FLAG_USE_INIT,
+                                           &local_opts);
+               else
+                       ret = qm_modify_cgr(cgr, 0, &local_opts);
+               if (ret)
+                       goto out;
+       }
+
+       list_add(&cgr->node, &p->cgr_cbs);
+
+       /* Determine if newly added object requires its callback to be called */
+       ret = qman_query_cgr(cgr, &cgr_state);
+       if (ret) {
+               /* we can't go back, so proceed and return success */
+               dev_err(p->config->dev, "CGR HW state partially modified\n");
+               ret = 0;
+               goto out;
+       }
+       if (cgr->cb && cgr_state.cgr.cscn_en &&
+           qman_cgrs_get(&p->cgrs[1], cgr->cgrid))
+               cgr->cb(p, cgr, 1);
+out:
+       spin_unlock(&p->cgr_lock);
+       put_affine_portal();
+       return ret;
+}
+EXPORT_SYMBOL(qman_create_cgr);
+
+int qman_delete_cgr(struct qman_cgr *cgr)
+{
+       unsigned long irqflags;
+       struct qm_mcr_querycgr cgr_state;
+       struct qm_mcc_initcgr local_opts;
+       int ret = 0;
+       struct qman_cgr *i;
+       struct qman_portal *p = get_affine_portal();
+
+       if (cgr->chan != p->config->channel) {
+               /* attempt to delete from other portal than creator */
+               dev_err(p->config->dev, "CGR not owned by current portal");
+               dev_dbg(p->config->dev, " create 0x%x, delete 0x%x\n",
+                       cgr->chan, p->config->channel);
+
+               ret = -EINVAL;
+               goto put_portal;
+       }
+       memset(&local_opts, 0, sizeof(struct qm_mcc_initcgr));
+       spin_lock_irqsave(&p->cgr_lock, irqflags);
+       list_del(&cgr->node);
+       /*
+        * If there are no other CGR objects for this CGRID in the list,
+        * update CSCN_TARG accordingly
+        */
+       list_for_each_entry(i, &p->cgr_cbs, node)
+               if (i->cgrid == cgr->cgrid && i->cb)
+                       goto release_lock;
+       ret = qman_query_cgr(cgr, &cgr_state);
+       if (ret)  {
+               /* add back to the list */
+               list_add(&cgr->node, &p->cgr_cbs);
+               goto release_lock;
+       }
+       /* Overwrite TARG */
+       local_opts.we_mask = QM_CGR_WE_CSCN_TARG;
+       if ((qman_ip_rev & 0xFF00) >= QMAN_REV30)
+               local_opts.cgr.cscn_targ_upd_ctrl = PORTAL_IDX(p);
+       else
+               local_opts.cgr.cscn_targ = cgr_state.cgr.cscn_targ &
+                                                        ~(TARG_MASK(p));
+       ret = qm_modify_cgr(cgr, 0, &local_opts);
+       if (ret)
+               /* add back to the list */
+               list_add(&cgr->node, &p->cgr_cbs);
+release_lock:
+       spin_unlock_irqrestore(&p->cgr_lock, irqflags);
+put_portal:
+       put_affine_portal();
+       return ret;
+}
+EXPORT_SYMBOL(qman_delete_cgr);
+
+struct cgr_comp {
+       struct qman_cgr *cgr;
+       struct completion completion;
+};
+
+static int qman_delete_cgr_thread(void *p)
+{
+       struct cgr_comp *cgr_comp = (struct cgr_comp *)p;
+       int ret;
+
+       ret = qman_delete_cgr(cgr_comp->cgr);
+       complete(&cgr_comp->completion);
+
+       return ret;
+}
+
+void qman_delete_cgr_safe(struct qman_cgr *cgr)
+{
+       struct task_struct *thread;
+       struct cgr_comp cgr_comp;
+
+       preempt_disable();
+       if (qman_cgr_cpus[cgr->cgrid] != smp_processor_id()) {
+               init_completion(&cgr_comp.completion);
+               cgr_comp.cgr = cgr;
+               thread = kthread_create(qman_delete_cgr_thread, &cgr_comp,
+                                       "cgr_del");
+
+               if (IS_ERR(thread))
+                       goto out;
+
+               kthread_bind(thread, qman_cgr_cpus[cgr->cgrid]);
+               wake_up_process(thread);
+               wait_for_completion(&cgr_comp.completion);
+               preempt_enable();
+               return;
+       }
+out:
+       qman_delete_cgr(cgr);
+       preempt_enable();
+}
+EXPORT_SYMBOL(qman_delete_cgr_safe);
+
+/* Cleanup FQs */
+
+static int _qm_mr_consume_and_match_verb(struct qm_portal *p, int v)
+{
+       const union qm_mr_entry *msg;
+       int found = 0;
+
+       qm_mr_pvb_update(p);
+       msg = qm_mr_current(p);
+       while (msg) {
+               if ((msg->verb & QM_MR_VERB_TYPE_MASK) == v)
+                       found = 1;
+               qm_mr_next(p);
+               qm_mr_cci_consume_to_current(p);
+               qm_mr_pvb_update(p);
+               msg = qm_mr_current(p);
+       }
+       return found;
+}
+
+static int _qm_dqrr_consume_and_match(struct qm_portal *p, u32 fqid, int s,
+                                     bool wait)
+{
+       const struct qm_dqrr_entry *dqrr;
+       int found = 0;
+
+       do {
+               qm_dqrr_pvb_update(p);
+               dqrr = qm_dqrr_current(p);
+               if (!dqrr)
+                       cpu_relax();
+       } while (wait && !dqrr);
+
+       while (dqrr) {
+               if (dqrr->fqid == fqid && (dqrr->stat & s))
+                       found = 1;
+               qm_dqrr_cdc_consume_1ptr(p, dqrr, 0);
+               qm_dqrr_pvb_update(p);
+               qm_dqrr_next(p);
+               dqrr = qm_dqrr_current(p);
+       }
+       return found;
+}
+
+#define qm_mr_drain(p, V) \
+       _qm_mr_consume_and_match_verb(p, QM_MR_VERB_##V)
+
+#define qm_dqrr_drain(p, f, S) \
+       _qm_dqrr_consume_and_match(p, f, QM_DQRR_STAT_##S, false)
+
+#define qm_dqrr_drain_wait(p, f, S) \
+       _qm_dqrr_consume_and_match(p, f, QM_DQRR_STAT_##S, true)
+
+#define qm_dqrr_drain_nomatch(p) \
+       _qm_dqrr_consume_and_match(p, 0, 0, false)
+
+static int qman_shutdown_fq(u32 fqid)
+{
+       struct qman_portal *p;
+       struct device *dev;
+       union qm_mc_command *mcc;
+       union qm_mc_result *mcr;
+       int orl_empty, drain = 0, ret = 0;
+       u32 channel, wq, res;
+       u8 state;
+
+       p = get_affine_portal();
+       dev = p->config->dev;
+       /* Determine the state of the FQID */
+       mcc = qm_mc_start(&p->p);
+       mcc->queryfq_np.fqid = fqid;
+       qm_mc_commit(&p->p, QM_MCC_VERB_QUERYFQ_NP);
+       if (!qm_mc_result_timeout(&p->p, &mcr)) {
+               dev_err(dev, "QUERYFQ_NP timeout\n");
+               ret = -ETIMEDOUT;
+               goto out;
+       }
+
+       DPAA_ASSERT((mcr->verb & QM_MCR_VERB_MASK) == QM_MCR_VERB_QUERYFQ_NP);
+       state = mcr->queryfq_np.state & QM_MCR_NP_STATE_MASK;
+       if (state == QM_MCR_NP_STATE_OOS)
+               goto out; /* Already OOS, no need to do anymore checks */
+
+       /* Query which channel the FQ is using */
+       mcc = qm_mc_start(&p->p);
+       mcc->queryfq.fqid = fqid;
+       qm_mc_commit(&p->p, QM_MCC_VERB_QUERYFQ);
+       if (!qm_mc_result_timeout(&p->p, &mcr)) {
+               dev_err(dev, "QUERYFQ timeout\n");
+               ret = -ETIMEDOUT;
+               goto out;
+       }
+
+       DPAA_ASSERT((mcr->verb & QM_MCR_VERB_MASK) == QM_MCR_VERB_QUERYFQ);
+       /* Need to store these since the MCR gets reused */
+       channel = qm_fqd_get_chan(&mcr->queryfq.fqd);
+       wq = qm_fqd_get_wq(&mcr->queryfq.fqd);
+
+       switch (state) {
+       case QM_MCR_NP_STATE_TEN_SCHED:
+       case QM_MCR_NP_STATE_TRU_SCHED:
+       case QM_MCR_NP_STATE_ACTIVE:
+       case QM_MCR_NP_STATE_PARKED:
+               orl_empty = 0;
+               mcc = qm_mc_start(&p->p);
+               mcc->alterfq.fqid = fqid;
+               qm_mc_commit(&p->p, QM_MCC_VERB_ALTER_RETIRE);
+               if (!qm_mc_result_timeout(&p->p, &mcr)) {
+                       dev_err(dev, "QUERYFQ_NP timeout\n");
+                       ret = -ETIMEDOUT;
+                       goto out;
+               }
+               DPAA_ASSERT((mcr->verb & QM_MCR_VERB_MASK) ==
+                           QM_MCR_VERB_ALTER_RETIRE);
+               res = mcr->result; /* Make a copy as we reuse MCR below */
+
+               if (res == QM_MCR_RESULT_PENDING) {
+                       /*
+                        * Need to wait for the FQRN in the message ring, which
+                        * will only occur once the FQ has been drained.  In
+                        * order for the FQ to drain the portal needs to be set
+                        * to dequeue from the channel the FQ is scheduled on
+                        */
+                       int found_fqrn = 0;
+                       u16 dequeue_wq = 0;
+
+                       /* Flag that we need to drain FQ */
+                       drain = 1;
+
+                       if (channel >= qm_channel_pool1 &&
+                           channel < qm_channel_pool1 + 15) {
+                               /* Pool channel, enable the bit in the portal */
+                               dequeue_wq = (channel -
+                                             qm_channel_pool1 + 1)<<4 | wq;
+                       } else if (channel < qm_channel_pool1) {
+                               /* Dedicated channel */
+                               dequeue_wq = wq;
+                       } else {
+                               dev_err(dev, "Can't recover FQ 0x%x, ch: 0x%x",
+                                       fqid, channel);
+                               ret = -EBUSY;
+                               goto out;
+                       }
+                       /* Set the sdqcr to drain this channel */
+                       if (channel < qm_channel_pool1)
+                               qm_dqrr_sdqcr_set(&p->p,
+                                                 QM_SDQCR_TYPE_ACTIVE |
+                                                 QM_SDQCR_CHANNELS_DEDICATED);
+                       else
+                               qm_dqrr_sdqcr_set(&p->p,
+                                                 QM_SDQCR_TYPE_ACTIVE |
+                                                 QM_SDQCR_CHANNELS_POOL_CONV
+                                                 (channel));
+                       do {
+                               /* Keep draining DQRR while checking the MR*/
+                               qm_dqrr_drain_nomatch(&p->p);
+                               /* Process message ring too */
+                               found_fqrn = qm_mr_drain(&p->p, FQRN);
+                               cpu_relax();
+                       } while (!found_fqrn);
+
+               }
+               if (res != QM_MCR_RESULT_OK &&
+                   res != QM_MCR_RESULT_PENDING) {
+                       dev_err(dev, "retire_fq failed: FQ 0x%x, res=0x%x\n",
+                               fqid, res);
+                       ret = -EIO;
+                       goto out;
+               }
+               if (!(mcr->alterfq.fqs & QM_MCR_FQS_ORLPRESENT)) {
+                       /*
+                        * ORL had no entries, no need to wait until the
+                        * ERNs come in
+                        */
+                       orl_empty = 1;
+               }
+               /*
+                * Retirement succeeded, check to see if FQ needs
+                * to be drained
+                */
+               if (drain || mcr->alterfq.fqs & QM_MCR_FQS_NOTEMPTY) {
+                       /* FQ is Not Empty, drain using volatile DQ commands */
+                       do {
+                               u32 vdqcr = fqid | QM_VDQCR_NUMFRAMES_SET(3);
+
+                               qm_dqrr_vdqcr_set(&p->p, vdqcr);
+                               /*
+                                * Wait for a dequeue and process the dequeues,
+                                * making sure to empty the ring completely
+                                */
+                       } while (qm_dqrr_drain_wait(&p->p, fqid, FQ_EMPTY));
+               }
+               qm_dqrr_sdqcr_set(&p->p, 0);
+
+               while (!orl_empty) {
+                       /* Wait for the ORL to have been completely drained */
+                       orl_empty = qm_mr_drain(&p->p, FQRL);
+                       cpu_relax();
+               }
+               mcc = qm_mc_start(&p->p);
+               mcc->alterfq.fqid = fqid;
+               qm_mc_commit(&p->p, QM_MCC_VERB_ALTER_OOS);
+               if (!qm_mc_result_timeout(&p->p, &mcr)) {
+                       ret = -ETIMEDOUT;
+                       goto out;
+               }
+
+               DPAA_ASSERT((mcr->verb & QM_MCR_VERB_MASK) ==
+                           QM_MCR_VERB_ALTER_OOS);
+               if (mcr->result != QM_MCR_RESULT_OK) {
+                       dev_err(dev, "OOS after drain fail: FQ 0x%x (0x%x)\n",
+                               fqid, mcr->result);
+                       ret = -EIO;
+                       goto out;
+               }
+               break;
+
+       case QM_MCR_NP_STATE_RETIRED:
+               /* Send OOS Command */
+               mcc = qm_mc_start(&p->p);
+               mcc->alterfq.fqid = fqid;
+               qm_mc_commit(&p->p, QM_MCC_VERB_ALTER_OOS);
+               if (!qm_mc_result_timeout(&p->p, &mcr)) {
+                       ret = -ETIMEDOUT;
+                       goto out;
+               }
+
+               DPAA_ASSERT((mcr->verb & QM_MCR_VERB_MASK) ==
+                           QM_MCR_VERB_ALTER_OOS);
+               if (mcr->result) {
+                       dev_err(dev, "OOS fail: FQ 0x%x (0x%x)\n",
+                               fqid, mcr->result);
+                       ret = -EIO;
+                       goto out;
+               }
+               break;
+
+       case QM_MCR_NP_STATE_OOS:
+               /*  Done */
+               break;
+
+       default:
+               ret = -EIO;
+       }
+
+out:
+       put_affine_portal();
+       return ret;
+}
+
+const struct qm_portal_config *qman_get_qm_portal_config(
+                                               struct qman_portal *portal)
+{
+       return portal->config;
+}
+
+struct gen_pool *qm_fqalloc; /* FQID allocator */
+struct gen_pool *qm_qpalloc; /* pool-channel allocator */
+struct gen_pool *qm_cgralloc; /* CGR ID allocator */
+
+static int qman_alloc_range(struct gen_pool *p, u32 *result, u32 cnt)
+{
+       unsigned long addr;
+
+       addr = gen_pool_alloc(p, cnt);
+       if (!addr)
+               return -ENOMEM;
+
+       *result = addr & ~DPAA_GENALLOC_OFF;
+
+       return 0;
+}
+
+int qman_alloc_fqid_range(u32 *result, u32 count)
+{
+       return qman_alloc_range(qm_fqalloc, result, count);
+}
+EXPORT_SYMBOL(qman_alloc_fqid_range);
+
+int qman_alloc_pool_range(u32 *result, u32 count)
+{
+       return qman_alloc_range(qm_qpalloc, result, count);
+}
+EXPORT_SYMBOL(qman_alloc_pool_range);
+
+int qman_alloc_cgrid_range(u32 *result, u32 count)
+{
+       return qman_alloc_range(qm_cgralloc, result, count);
+}
+EXPORT_SYMBOL(qman_alloc_cgrid_range);
+
+int qman_release_fqid(u32 fqid)
+{
+       int ret = qman_shutdown_fq(fqid);
+
+       if (ret) {
+               pr_debug("FQID %d leaked\n", fqid);
+               return ret;
+       }
+
+       gen_pool_free(qm_fqalloc, fqid | DPAA_GENALLOC_OFF, 1);
+       return 0;
+}
+EXPORT_SYMBOL(qman_release_fqid);
+
+static int qpool_cleanup(u32 qp)
+{
+       /*
+        * We query all FQDs starting from
+        * FQID 1 until we get an "invalid FQID" error, looking for non-OOS FQDs
+        * whose destination channel is the pool-channel being released.
+        * When a non-OOS FQD is found we attempt to clean it up
+        */
+       struct qman_fq fq = {
+               .fqid = QM_FQID_RANGE_START
+       };
+       int err;
+
+       do {
+               struct qm_mcr_queryfq_np np;
+
+               err = qman_query_fq_np(&fq, &np);
+               if (err)
+                       /* FQID range exceeded, found no problems */
+                       return 0;
+               if ((np.state & QM_MCR_NP_STATE_MASK) != QM_MCR_NP_STATE_OOS) {
+                       struct qm_fqd fqd;
+
+                       err = qman_query_fq(&fq, &fqd);
+                       if (WARN_ON(err))
+                               return 0;
+                       if (qm_fqd_get_chan(&fqd) == qp) {
+                               /* The channel is the FQ's target, clean it */
+                               err = qman_shutdown_fq(fq.fqid);
+                               if (err)
+                                       /*
+                                        * Couldn't shut down the FQ
+                                        * so the pool must be leaked
+                                        */
+                                       return err;
+                       }
+               }
+               /* Move to the next FQID */
+               fq.fqid++;
+       } while (1);
+}
+
+int qman_release_pool(u32 qp)
+{
+       int ret;
+
+       ret = qpool_cleanup(qp);
+       if (ret) {
+               pr_debug("CHID %d leaked\n", qp);
+               return ret;
+       }
+
+       gen_pool_free(qm_qpalloc, qp | DPAA_GENALLOC_OFF, 1);
+       return 0;
+}
+EXPORT_SYMBOL(qman_release_pool);
+
+static int cgr_cleanup(u32 cgrid)
+{
+       /*
+        * query all FQDs starting from FQID 1 until we get an "invalid FQID"
+        * error, looking for non-OOS FQDs whose CGR is the CGR being released
+        */
+       struct qman_fq fq = {
+               .fqid = 1
+       };
+       int err;
+
+       do {
+               struct qm_mcr_queryfq_np np;
+
+               err = qman_query_fq_np(&fq, &np);
+               if (err)
+                       /* FQID range exceeded, found no problems */
+                       return 0;
+               if ((np.state & QM_MCR_NP_STATE_MASK) != QM_MCR_NP_STATE_OOS) {
+                       struct qm_fqd fqd;
+
+                       err = qman_query_fq(&fq, &fqd);
+                       if (WARN_ON(err))
+                               return 0;
+                       if ((fqd.fq_ctrl & QM_FQCTRL_CGE) &&
+                           fqd.cgid == cgrid) {
+                               pr_err("CRGID 0x%x is being used by FQID 0x%x, CGR will be leaked\n",
+                                      cgrid, fq.fqid);
+                               return -EIO;
+                       }
+               }
+               /* Move to the next FQID */
+               fq.fqid++;
+       } while (1);
+}
+
+int qman_release_cgrid(u32 cgrid)
+{
+       int ret;
+
+       ret = cgr_cleanup(cgrid);
+       if (ret) {
+               pr_debug("CGRID %d leaked\n", cgrid);
+               return ret;
+       }
+
+       gen_pool_free(qm_cgralloc, cgrid | DPAA_GENALLOC_OFF, 1);
+       return 0;
+}
+EXPORT_SYMBOL(qman_release_cgrid);
diff --git a/drivers/soc/fsl/qbman/qman_ccsr.c b/drivers/soc/fsl/qbman/qman_ccsr.c
new file mode 100644 (file)
index 0000000..0cace9e
--- /dev/null
@@ -0,0 +1,808 @@
+/* Copyright 2008 - 2016 Freescale Semiconductor, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *      names of its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written permission.
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "qman_priv.h"
+
+u16 qman_ip_rev;
+EXPORT_SYMBOL(qman_ip_rev);
+u16 qm_channel_pool1 = QMAN_CHANNEL_POOL1;
+EXPORT_SYMBOL(qm_channel_pool1);
+
+/* Register offsets */
+#define REG_QCSP_LIO_CFG(n)    (0x0000 + ((n) * 0x10))
+#define REG_QCSP_IO_CFG(n)     (0x0004 + ((n) * 0x10))
+#define REG_QCSP_DD_CFG(n)     (0x000c + ((n) * 0x10))
+#define REG_DD_CFG             0x0200
+#define REG_DCP_CFG(n)         (0x0300 + ((n) * 0x10))
+#define REG_DCP_DD_CFG(n)      (0x0304 + ((n) * 0x10))
+#define REG_DCP_DLM_AVG(n)     (0x030c + ((n) * 0x10))
+#define REG_PFDR_FPC           0x0400
+#define REG_PFDR_FP_HEAD       0x0404
+#define REG_PFDR_FP_TAIL       0x0408
+#define REG_PFDR_FP_LWIT       0x0410
+#define REG_PFDR_CFG           0x0414
+#define REG_SFDR_CFG           0x0500
+#define REG_SFDR_IN_USE                0x0504
+#define REG_WQ_CS_CFG(n)       (0x0600 + ((n) * 0x04))
+#define REG_WQ_DEF_ENC_WQID    0x0630
+#define REG_WQ_SC_DD_CFG(n)    (0x640 + ((n) * 0x04))
+#define REG_WQ_PC_DD_CFG(n)    (0x680 + ((n) * 0x04))
+#define REG_WQ_DC0_DD_CFG(n)   (0x6c0 + ((n) * 0x04))
+#define REG_WQ_DC1_DD_CFG(n)   (0x700 + ((n) * 0x04))
+#define REG_WQ_DCn_DD_CFG(n)   (0x6c0 + ((n) * 0x40)) /* n=2,3 */
+#define REG_CM_CFG             0x0800
+#define REG_ECSR               0x0a00
+#define REG_ECIR               0x0a04
+#define REG_EADR               0x0a08
+#define REG_ECIR2              0x0a0c
+#define REG_EDATA(n)           (0x0a10 + ((n) * 0x04))
+#define REG_SBEC(n)            (0x0a80 + ((n) * 0x04))
+#define REG_MCR                        0x0b00
+#define REG_MCP(n)             (0x0b04 + ((n) * 0x04))
+#define REG_MISC_CFG           0x0be0
+#define REG_HID_CFG            0x0bf0
+#define REG_IDLE_STAT          0x0bf4
+#define REG_IP_REV_1           0x0bf8
+#define REG_IP_REV_2           0x0bfc
+#define REG_FQD_BARE           0x0c00
+#define REG_PFDR_BARE          0x0c20
+#define REG_offset_BAR         0x0004  /* relative to REG_[FQD|PFDR]_BARE */
+#define REG_offset_AR          0x0010  /* relative to REG_[FQD|PFDR]_BARE */
+#define REG_QCSP_BARE          0x0c80
+#define REG_QCSP_BAR           0x0c84
+#define REG_CI_SCHED_CFG       0x0d00
+#define REG_SRCIDR             0x0d04
+#define REG_LIODNR             0x0d08
+#define REG_CI_RLM_AVG         0x0d14
+#define REG_ERR_ISR            0x0e00
+#define REG_ERR_IER            0x0e04
+#define REG_REV3_QCSP_LIO_CFG(n)       (0x1000 + ((n) * 0x10))
+#define REG_REV3_QCSP_IO_CFG(n)        (0x1004 + ((n) * 0x10))
+#define REG_REV3_QCSP_DD_CFG(n)        (0x100c + ((n) * 0x10))
+
+/* Assists for QMAN_MCR */
+#define MCR_INIT_PFDR          0x01000000
+#define MCR_get_rslt(v)                (u8)((v) >> 24)
+#define MCR_rslt_idle(r)       (!(r) || ((r) >= 0xf0))
+#define MCR_rslt_ok(r)         ((r) == 0xf0)
+#define MCR_rslt_eaccess(r)    ((r) == 0xf8)
+#define MCR_rslt_inval(r)      ((r) == 0xff)
+
+/*
+ * Corenet initiator settings. Stash request queues are 4-deep to match cores
+ * ability to snarf. Stash priority is 3, other priorities are 2.
+ */
+#define QM_CI_SCHED_CFG_SRCCIV         4
+#define QM_CI_SCHED_CFG_SRQ_W          3
+#define QM_CI_SCHED_CFG_RW_W           2
+#define QM_CI_SCHED_CFG_BMAN_W         2
+/* write SRCCIV enable */
+#define QM_CI_SCHED_CFG_SRCCIV_EN      BIT(31)
+
+/* Follows WQ_CS_CFG0-5 */
+enum qm_wq_class {
+       qm_wq_portal = 0,
+       qm_wq_pool = 1,
+       qm_wq_fman0 = 2,
+       qm_wq_fman1 = 3,
+       qm_wq_caam = 4,
+       qm_wq_pme = 5,
+       qm_wq_first = qm_wq_portal,
+       qm_wq_last = qm_wq_pme
+};
+
+/* Follows FQD_[BARE|BAR|AR] and PFDR_[BARE|BAR|AR] */
+enum qm_memory {
+       qm_memory_fqd,
+       qm_memory_pfdr
+};
+
+/* Used by all error interrupt registers except 'inhibit' */
+#define QM_EIRQ_CIDE   0x20000000      /* Corenet Initiator Data Error */
+#define QM_EIRQ_CTDE   0x10000000      /* Corenet Target Data Error */
+#define QM_EIRQ_CITT   0x08000000      /* Corenet Invalid Target Transaction */
+#define QM_EIRQ_PLWI   0x04000000      /* PFDR Low Watermark */
+#define QM_EIRQ_MBEI   0x02000000      /* Multi-bit ECC Error */
+#define QM_EIRQ_SBEI   0x01000000      /* Single-bit ECC Error */
+#define QM_EIRQ_PEBI   0x00800000      /* PFDR Enqueues Blocked Interrupt */
+#define QM_EIRQ_IFSI   0x00020000      /* Invalid FQ Flow Control State */
+#define QM_EIRQ_ICVI   0x00010000      /* Invalid Command Verb */
+#define QM_EIRQ_IDDI   0x00000800      /* Invalid Dequeue (Direct-connect) */
+#define QM_EIRQ_IDFI   0x00000400      /* Invalid Dequeue FQ */
+#define QM_EIRQ_IDSI   0x00000200      /* Invalid Dequeue Source */
+#define QM_EIRQ_IDQI   0x00000100      /* Invalid Dequeue Queue */
+#define QM_EIRQ_IECE   0x00000010      /* Invalid Enqueue Configuration */
+#define QM_EIRQ_IEOI   0x00000008      /* Invalid Enqueue Overflow */
+#define QM_EIRQ_IESI   0x00000004      /* Invalid Enqueue State */
+#define QM_EIRQ_IECI   0x00000002      /* Invalid Enqueue Channel */
+#define QM_EIRQ_IEQI   0x00000001      /* Invalid Enqueue Queue */
+
+/* QMAN_ECIR valid error bit */
+#define PORTAL_ECSR_ERR        (QM_EIRQ_IEQI | QM_EIRQ_IESI | QM_EIRQ_IEOI | \
+                        QM_EIRQ_IDQI | QM_EIRQ_IDSI | QM_EIRQ_IDFI | \
+                        QM_EIRQ_IDDI | QM_EIRQ_ICVI | QM_EIRQ_IFSI)
+#define FQID_ECSR_ERR  (QM_EIRQ_IEQI | QM_EIRQ_IECI | QM_EIRQ_IESI | \
+                        QM_EIRQ_IEOI | QM_EIRQ_IDQI | QM_EIRQ_IDFI | \
+                        QM_EIRQ_IFSI)
+
+struct qm_ecir {
+       u32 info; /* res[30-31], ptyp[29], pnum[24-28], fqid[0-23] */
+};
+
+static bool qm_ecir_is_dcp(const struct qm_ecir *p)
+{
+       return p->info & BIT(29);
+}
+
+static int qm_ecir_get_pnum(const struct qm_ecir *p)
+{
+       return (p->info >> 24) & 0x1f;
+}
+
+static int qm_ecir_get_fqid(const struct qm_ecir *p)
+{
+       return p->info & (BIT(24) - 1);
+}
+
+struct qm_ecir2 {
+       u32 info; /* ptyp[31], res[10-30], pnum[0-9] */
+};
+
+static bool qm_ecir2_is_dcp(const struct qm_ecir2 *p)
+{
+       return p->info & BIT(31);
+}
+
+static int qm_ecir2_get_pnum(const struct qm_ecir2 *p)
+{
+       return p->info & (BIT(10) - 1);
+}
+
+struct qm_eadr {
+       u32 info; /* memid[24-27], eadr[0-11] */
+                 /* v3: memid[24-28], eadr[0-15] */
+};
+
+static int qm_eadr_get_memid(const struct qm_eadr *p)
+{
+       return (p->info >> 24) & 0xf;
+}
+
+static int qm_eadr_get_eadr(const struct qm_eadr *p)
+{
+       return p->info & (BIT(12) - 1);
+}
+
+static int qm_eadr_v3_get_memid(const struct qm_eadr *p)
+{
+       return (p->info >> 24) & 0x1f;
+}
+
+static int qm_eadr_v3_get_eadr(const struct qm_eadr *p)
+{
+       return p->info & (BIT(16) - 1);
+}
+
+struct qman_hwerr_txt {
+       u32 mask;
+       const char *txt;
+};
+
+
+static const struct qman_hwerr_txt qman_hwerr_txts[] = {
+       { QM_EIRQ_CIDE, "Corenet Initiator Data Error" },
+       { QM_EIRQ_CTDE, "Corenet Target Data Error" },
+       { QM_EIRQ_CITT, "Corenet Invalid Target Transaction" },
+       { QM_EIRQ_PLWI, "PFDR Low Watermark" },
+       { QM_EIRQ_MBEI, "Multi-bit ECC Error" },
+       { QM_EIRQ_SBEI, "Single-bit ECC Error" },
+       { QM_EIRQ_PEBI, "PFDR Enqueues Blocked Interrupt" },
+       { QM_EIRQ_ICVI, "Invalid Command Verb" },
+       { QM_EIRQ_IFSI, "Invalid Flow Control State" },
+       { QM_EIRQ_IDDI, "Invalid Dequeue (Direct-connect)" },
+       { QM_EIRQ_IDFI, "Invalid Dequeue FQ" },
+       { QM_EIRQ_IDSI, "Invalid Dequeue Source" },
+       { QM_EIRQ_IDQI, "Invalid Dequeue Queue" },
+       { QM_EIRQ_IECE, "Invalid Enqueue Configuration" },
+       { QM_EIRQ_IEOI, "Invalid Enqueue Overflow" },
+       { QM_EIRQ_IESI, "Invalid Enqueue State" },
+       { QM_EIRQ_IECI, "Invalid Enqueue Channel" },
+       { QM_EIRQ_IEQI, "Invalid Enqueue Queue" },
+};
+
+struct qman_error_info_mdata {
+       u16 addr_mask;
+       u16 bits;
+       const char *txt;
+};
+
+static const struct qman_error_info_mdata error_mdata[] = {
+       { 0x01FF, 24, "FQD cache tag memory 0" },
+       { 0x01FF, 24, "FQD cache tag memory 1" },
+       { 0x01FF, 24, "FQD cache tag memory 2" },
+       { 0x01FF, 24, "FQD cache tag memory 3" },
+       { 0x0FFF, 512, "FQD cache memory" },
+       { 0x07FF, 128, "SFDR memory" },
+       { 0x01FF, 72, "WQ context memory" },
+       { 0x00FF, 240, "CGR memory" },
+       { 0x00FF, 302, "Internal Order Restoration List memory" },
+       { 0x01FF, 256, "SW portal ring memory" },
+};
+
+#define QMAN_ERRS_TO_DISABLE (QM_EIRQ_PLWI | QM_EIRQ_PEBI)
+
+/*
+ * TODO: unimplemented registers
+ *
+ * Keeping a list here of QMan registers I have not yet covered;
+ * QCSP_DD_IHRSR, QCSP_DD_IHRFR, QCSP_DD_HASR,
+ * DCP_DD_IHRSR, DCP_DD_IHRFR, DCP_DD_HASR, CM_CFG,
+ * QMAN_EECC, QMAN_SBET, QMAN_EINJ, QMAN_SBEC0-12
+ */
+
+/* Pointer to the start of the QMan's CCSR space */
+static u32 __iomem *qm_ccsr_start;
+/* A SDQCR mask comprising all the available/visible pool channels */
+static u32 qm_pools_sdqcr;
+
+static inline u32 qm_ccsr_in(u32 offset)
+{
+       return ioread32be(qm_ccsr_start + offset/4);
+}
+
+static inline void qm_ccsr_out(u32 offset, u32 val)
+{
+       iowrite32be(val, qm_ccsr_start + offset/4);
+}
+
+u32 qm_get_pools_sdqcr(void)
+{
+       return qm_pools_sdqcr;
+}
+
+enum qm_dc_portal {
+       qm_dc_portal_fman0 = 0,
+       qm_dc_portal_fman1 = 1
+};
+
+static void qm_set_dc(enum qm_dc_portal portal, int ed, u8 sernd)
+{
+       DPAA_ASSERT(!ed || portal == qm_dc_portal_fman0 ||
+                   portal == qm_dc_portal_fman1);
+       if ((qman_ip_rev & 0xFF00) >= QMAN_REV30)
+               qm_ccsr_out(REG_DCP_CFG(portal),
+                           (ed ? 0x1000 : 0) | (sernd & 0x3ff));
+       else
+               qm_ccsr_out(REG_DCP_CFG(portal),
+                           (ed ? 0x100 : 0) | (sernd & 0x1f));
+}
+
+static void qm_set_wq_scheduling(enum qm_wq_class wq_class,
+                                u8 cs_elev, u8 csw2, u8 csw3, u8 csw4,
+                                u8 csw5, u8 csw6, u8 csw7)
+{
+       qm_ccsr_out(REG_WQ_CS_CFG(wq_class), ((cs_elev & 0xff) << 24) |
+                   ((csw2 & 0x7) << 20) | ((csw3 & 0x7) << 16) |
+                   ((csw4 & 0x7) << 12) | ((csw5 & 0x7) << 8) |
+                   ((csw6 & 0x7) << 4) | (csw7 & 0x7));
+}
+
+static void qm_set_hid(void)
+{
+       qm_ccsr_out(REG_HID_CFG, 0);
+}
+
+static void qm_set_corenet_initiator(void)
+{
+       qm_ccsr_out(REG_CI_SCHED_CFG, QM_CI_SCHED_CFG_SRCCIV_EN |
+                   (QM_CI_SCHED_CFG_SRCCIV << 24) |
+                   (QM_CI_SCHED_CFG_SRQ_W << 8) |
+                   (QM_CI_SCHED_CFG_RW_W << 4) |
+                   QM_CI_SCHED_CFG_BMAN_W);
+}
+
+static void qm_get_version(u16 *id, u8 *major, u8 *minor)
+{
+       u32 v = qm_ccsr_in(REG_IP_REV_1);
+       *id = (v >> 16);
+       *major = (v >> 8) & 0xff;
+       *minor = v & 0xff;
+}
+
+#define PFDR_AR_EN             BIT(31)
+static void qm_set_memory(enum qm_memory memory, u64 ba, u32 size)
+{
+       u32 offset = (memory == qm_memory_fqd) ? REG_FQD_BARE : REG_PFDR_BARE;
+       u32 exp = ilog2(size);
+
+       /* choke if size isn't within range */
+       DPAA_ASSERT((size >= 4096) && (size <= 1024*1024*1024) &&
+                   is_power_of_2(size));
+       /* choke if 'ba' has lower-alignment than 'size' */
+       DPAA_ASSERT(!(ba & (size - 1)));
+       qm_ccsr_out(offset, upper_32_bits(ba));
+       qm_ccsr_out(offset + REG_offset_BAR, lower_32_bits(ba));
+       qm_ccsr_out(offset + REG_offset_AR, PFDR_AR_EN | (exp - 1));
+}
+
+static void qm_set_pfdr_threshold(u32 th, u8 k)
+{
+       qm_ccsr_out(REG_PFDR_FP_LWIT, th & 0xffffff);
+       qm_ccsr_out(REG_PFDR_CFG, k);
+}
+
+static void qm_set_sfdr_threshold(u16 th)
+{
+       qm_ccsr_out(REG_SFDR_CFG, th & 0x3ff);
+}
+
+static int qm_init_pfdr(struct device *dev, u32 pfdr_start, u32 num)
+{
+       u8 rslt = MCR_get_rslt(qm_ccsr_in(REG_MCR));
+
+       DPAA_ASSERT(pfdr_start && !(pfdr_start & 7) && !(num & 7) && num);
+       /* Make sure the command interface is 'idle' */
+       if (!MCR_rslt_idle(rslt)) {
+               dev_crit(dev, "QMAN_MCR isn't idle");
+               WARN_ON(1);
+       }
+
+       /* Write the MCR command params then the verb */
+       qm_ccsr_out(REG_MCP(0), pfdr_start);
+       /*
+        * TODO: remove this - it's a workaround for a model bug that is
+        * corrected in more recent versions. We use the workaround until
+        * everyone has upgraded.
+        */
+       qm_ccsr_out(REG_MCP(1), pfdr_start + num - 16);
+       dma_wmb();
+       qm_ccsr_out(REG_MCR, MCR_INIT_PFDR);
+       /* Poll for the result */
+       do {
+               rslt = MCR_get_rslt(qm_ccsr_in(REG_MCR));
+       } while (!MCR_rslt_idle(rslt));
+       if (MCR_rslt_ok(rslt))
+               return 0;
+       if (MCR_rslt_eaccess(rslt))
+               return -EACCES;
+       if (MCR_rslt_inval(rslt))
+               return -EINVAL;
+       dev_crit(dev, "Unexpected result from MCR_INIT_PFDR: %02x\n", rslt);
+       return -ENODEV;
+}
+
+/*
+ * Ideally we would use the DMA API to turn rmem->base into a DMA address
+ * (especially if iommu translations ever get involved).  Unfortunately, the
+ * DMA API currently does not allow mapping anything that is not backed with
+ * a struct page.
+ */
+static dma_addr_t fqd_a, pfdr_a;
+static size_t fqd_sz, pfdr_sz;
+
+static int qman_fqd(struct reserved_mem *rmem)
+{
+       fqd_a = rmem->base;
+       fqd_sz = rmem->size;
+
+       WARN_ON(!(fqd_a && fqd_sz));
+
+       return 0;
+}
+RESERVEDMEM_OF_DECLARE(qman_fqd, "fsl,qman-fqd", qman_fqd);
+
+static int qman_pfdr(struct reserved_mem *rmem)
+{
+       pfdr_a = rmem->base;
+       pfdr_sz = rmem->size;
+
+       WARN_ON(!(pfdr_a && pfdr_sz));
+
+       return 0;
+}
+RESERVEDMEM_OF_DECLARE(qman_pfdr, "fsl,qman-pfdr", qman_pfdr);
+
+static unsigned int qm_get_fqid_maxcnt(void)
+{
+       return fqd_sz / 64;
+}
+
+/*
+ * Flush this memory range from data cache so that QMAN originated
+ * transactions for this memory region could be marked non-coherent.
+ */
+static int zero_priv_mem(struct device *dev, struct device_node *node,
+                        phys_addr_t addr, size_t sz)
+{
+       /* map as cacheable, non-guarded */
+       void __iomem *tmpp = ioremap_prot(addr, sz, 0);
+
+       memset_io(tmpp, 0, sz);
+       flush_dcache_range((unsigned long)tmpp,
+                          (unsigned long)tmpp + sz);
+       iounmap(tmpp);
+
+       return 0;
+}
+
+static void log_edata_bits(struct device *dev, u32 bit_count)
+{
+       u32 i, j, mask = 0xffffffff;
+
+       dev_warn(dev, "ErrInt, EDATA:\n");
+       i = bit_count / 32;
+       if (bit_count % 32) {
+               i++;
+               mask = ~(mask << bit_count % 32);
+       }
+       j = 16 - i;
+       dev_warn(dev, "  0x%08x\n", qm_ccsr_in(REG_EDATA(j)) & mask);
+       j++;
+       for (; j < 16; j++)
+               dev_warn(dev, "  0x%08x\n", qm_ccsr_in(REG_EDATA(j)));
+}
+
+static void log_additional_error_info(struct device *dev, u32 isr_val,
+                                     u32 ecsr_val)
+{
+       struct qm_ecir ecir_val;
+       struct qm_eadr eadr_val;
+       int memid;
+
+       ecir_val.info = qm_ccsr_in(REG_ECIR);
+       /* Is portal info valid */
+       if ((qman_ip_rev & 0xFF00) >= QMAN_REV30) {
+               struct qm_ecir2 ecir2_val;
+
+               ecir2_val.info = qm_ccsr_in(REG_ECIR2);
+               if (ecsr_val & PORTAL_ECSR_ERR) {
+                       dev_warn(dev, "ErrInt: %s id %d\n",
+                                qm_ecir2_is_dcp(&ecir2_val) ? "DCP" : "SWP",
+                                qm_ecir2_get_pnum(&ecir2_val));
+               }
+               if (ecsr_val & (FQID_ECSR_ERR | QM_EIRQ_IECE))
+                       dev_warn(dev, "ErrInt: ecir.fqid 0x%x\n",
+                                qm_ecir_get_fqid(&ecir_val));
+
+               if (ecsr_val & (QM_EIRQ_SBEI|QM_EIRQ_MBEI)) {
+                       eadr_val.info = qm_ccsr_in(REG_EADR);
+                       memid = qm_eadr_v3_get_memid(&eadr_val);
+                       dev_warn(dev, "ErrInt: EADR Memory: %s, 0x%x\n",
+                                error_mdata[memid].txt,
+                                error_mdata[memid].addr_mask
+                                       & qm_eadr_v3_get_eadr(&eadr_val));
+                       log_edata_bits(dev, error_mdata[memid].bits);
+               }
+       } else {
+               if (ecsr_val & PORTAL_ECSR_ERR) {
+                       dev_warn(dev, "ErrInt: %s id %d\n",
+                                qm_ecir_is_dcp(&ecir_val) ? "DCP" : "SWP",
+                                qm_ecir_get_pnum(&ecir_val));
+               }
+               if (ecsr_val & FQID_ECSR_ERR)
+                       dev_warn(dev, "ErrInt: ecir.fqid 0x%x\n",
+                                qm_ecir_get_fqid(&ecir_val));
+
+               if (ecsr_val & (QM_EIRQ_SBEI|QM_EIRQ_MBEI)) {
+                       eadr_val.info = qm_ccsr_in(REG_EADR);
+                       memid = qm_eadr_get_memid(&eadr_val);
+                       dev_warn(dev, "ErrInt: EADR Memory: %s, 0x%x\n",
+                                error_mdata[memid].txt,
+                                error_mdata[memid].addr_mask
+                                       & qm_eadr_get_eadr(&eadr_val));
+                       log_edata_bits(dev, error_mdata[memid].bits);
+               }
+       }
+}
+
+static irqreturn_t qman_isr(int irq, void *ptr)
+{
+       u32 isr_val, ier_val, ecsr_val, isr_mask, i;
+       struct device *dev = ptr;
+
+       ier_val = qm_ccsr_in(REG_ERR_IER);
+       isr_val = qm_ccsr_in(REG_ERR_ISR);
+       ecsr_val = qm_ccsr_in(REG_ECSR);
+       isr_mask = isr_val & ier_val;
+
+       if (!isr_mask)
+               return IRQ_NONE;
+
+       for (i = 0; i < ARRAY_SIZE(qman_hwerr_txts); i++) {
+               if (qman_hwerr_txts[i].mask & isr_mask) {
+                       dev_err_ratelimited(dev, "ErrInt: %s\n",
+                                           qman_hwerr_txts[i].txt);
+                       if (qman_hwerr_txts[i].mask & ecsr_val) {
+                               log_additional_error_info(dev, isr_mask,
+                                                         ecsr_val);
+                               /* Re-arm error capture registers */
+                               qm_ccsr_out(REG_ECSR, ecsr_val);
+                       }
+                       if (qman_hwerr_txts[i].mask & QMAN_ERRS_TO_DISABLE) {
+                               dev_dbg(dev, "Disabling error 0x%x\n",
+                                       qman_hwerr_txts[i].mask);
+                               ier_val &= ~qman_hwerr_txts[i].mask;
+                               qm_ccsr_out(REG_ERR_IER, ier_val);
+                       }
+               }
+       }
+       qm_ccsr_out(REG_ERR_ISR, isr_val);
+
+       return IRQ_HANDLED;
+}
+
+static int qman_init_ccsr(struct device *dev)
+{
+       int i, err;
+
+       /* FQD memory */
+       qm_set_memory(qm_memory_fqd, fqd_a, fqd_sz);
+       /* PFDR memory */
+       qm_set_memory(qm_memory_pfdr, pfdr_a, pfdr_sz);
+       err = qm_init_pfdr(dev, 8, pfdr_sz / 64 - 8);
+       if (err)
+               return err;
+       /* thresholds */
+       qm_set_pfdr_threshold(512, 64);
+       qm_set_sfdr_threshold(128);
+       /* clear stale PEBI bit from interrupt status register */
+       qm_ccsr_out(REG_ERR_ISR, QM_EIRQ_PEBI);
+       /* corenet initiator settings */
+       qm_set_corenet_initiator();
+       /* HID settings */
+       qm_set_hid();
+       /* Set scheduling weights to defaults */
+       for (i = qm_wq_first; i <= qm_wq_last; i++)
+               qm_set_wq_scheduling(i, 0, 0, 0, 0, 0, 0, 0);
+       /* We are not prepared to accept ERNs for hardware enqueues */
+       qm_set_dc(qm_dc_portal_fman0, 1, 0);
+       qm_set_dc(qm_dc_portal_fman1, 1, 0);
+       return 0;
+}
+
+#define LIO_CFG_LIODN_MASK 0x0fff0000
+void qman_liodn_fixup(u16 channel)
+{
+       static int done;
+       static u32 liodn_offset;
+       u32 before, after;
+       int idx = channel - QM_CHANNEL_SWPORTAL0;
+
+       if ((qman_ip_rev & 0xFF00) >= QMAN_REV30)
+               before = qm_ccsr_in(REG_REV3_QCSP_LIO_CFG(idx));
+       else
+               before = qm_ccsr_in(REG_QCSP_LIO_CFG(idx));
+       if (!done) {
+               liodn_offset = before & LIO_CFG_LIODN_MASK;
+               done = 1;
+               return;
+       }
+       after = (before & (~LIO_CFG_LIODN_MASK)) | liodn_offset;
+       if ((qman_ip_rev & 0xFF00) >= QMAN_REV30)
+               qm_ccsr_out(REG_REV3_QCSP_LIO_CFG(idx), after);
+       else
+               qm_ccsr_out(REG_QCSP_LIO_CFG(idx), after);
+}
+
+#define IO_CFG_SDEST_MASK 0x00ff0000
+void qman_set_sdest(u16 channel, unsigned int cpu_idx)
+{
+       int idx = channel - QM_CHANNEL_SWPORTAL0;
+       u32 before, after;
+
+       if ((qman_ip_rev & 0xFF00) >= QMAN_REV30) {
+               before = qm_ccsr_in(REG_REV3_QCSP_IO_CFG(idx));
+               /* Each pair of vcpu share the same SRQ(SDEST) */
+               cpu_idx /= 2;
+               after = (before & (~IO_CFG_SDEST_MASK)) | (cpu_idx << 16);
+               qm_ccsr_out(REG_REV3_QCSP_IO_CFG(idx), after);
+       } else {
+               before = qm_ccsr_in(REG_QCSP_IO_CFG(idx));
+               after = (before & (~IO_CFG_SDEST_MASK)) | (cpu_idx << 16);
+               qm_ccsr_out(REG_QCSP_IO_CFG(idx), after);
+       }
+}
+
+static int qman_resource_init(struct device *dev)
+{
+       int pool_chan_num, cgrid_num;
+       int ret, i;
+
+       switch (qman_ip_rev >> 8) {
+       case 1:
+               pool_chan_num = 15;
+               cgrid_num = 256;
+               break;
+       case 2:
+               pool_chan_num = 3;
+               cgrid_num = 64;
+               break;
+       case 3:
+               pool_chan_num = 15;
+               cgrid_num = 256;
+               break;
+       default:
+               return -ENODEV;
+       }
+
+       ret = gen_pool_add(qm_qpalloc, qm_channel_pool1 | DPAA_GENALLOC_OFF,
+                          pool_chan_num, -1);
+       if (ret) {
+               dev_err(dev, "Failed to seed pool channels (%d)\n", ret);
+               return ret;
+       }
+
+       ret = gen_pool_add(qm_cgralloc, DPAA_GENALLOC_OFF, cgrid_num, -1);
+       if (ret) {
+               dev_err(dev, "Failed to seed CGRID range (%d)\n", ret);
+               return ret;
+       }
+
+       /* parse pool channels into the SDQCR mask */
+       for (i = 0; i < cgrid_num; i++)
+               qm_pools_sdqcr |= QM_SDQCR_CHANNELS_POOL_CONV(i);
+
+       ret = gen_pool_add(qm_fqalloc, QM_FQID_RANGE_START | DPAA_GENALLOC_OFF,
+                          qm_get_fqid_maxcnt() - QM_FQID_RANGE_START, -1);
+       if (ret) {
+               dev_err(dev, "Failed to seed FQID range (%d)\n", ret);
+               return ret;
+       }
+
+       return 0;
+}
+
+static int fsl_qman_probe(struct platform_device *pdev)
+{
+       struct device *dev = &pdev->dev;
+       struct device_node *node = dev->of_node;
+       struct resource *res;
+       int ret, err_irq;
+       u16 id;
+       u8 major, minor;
+
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       if (!res) {
+               dev_err(dev, "Can't get %s property 'IORESOURCE_MEM'\n",
+                       node->full_name);
+               return -ENXIO;
+       }
+       qm_ccsr_start = devm_ioremap(dev, res->start, resource_size(res));
+       if (!qm_ccsr_start)
+               return -ENXIO;
+
+       qm_get_version(&id, &major, &minor);
+       if (major == 1 && minor == 0) {
+               dev_err(dev, "Rev1.0 on P4080 rev1 is not supported!\n");
+                       return -ENODEV;
+       } else if (major == 1 && minor == 1)
+               qman_ip_rev = QMAN_REV11;
+       else if (major == 1 && minor == 2)
+               qman_ip_rev = QMAN_REV12;
+       else if (major == 2 && minor == 0)
+               qman_ip_rev = QMAN_REV20;
+       else if (major == 3 && minor == 0)
+               qman_ip_rev = QMAN_REV30;
+       else if (major == 3 && minor == 1)
+               qman_ip_rev = QMAN_REV31;
+       else {
+               dev_err(dev, "Unknown QMan version\n");
+               return -ENODEV;
+       }
+
+       if ((qman_ip_rev & 0xff00) >= QMAN_REV30)
+               qm_channel_pool1 = QMAN_CHANNEL_POOL1_REV3;
+
+       ret = zero_priv_mem(dev, node, fqd_a, fqd_sz);
+       WARN_ON(ret);
+       if (ret)
+               return -ENODEV;
+
+       ret = qman_init_ccsr(dev);
+       if (ret) {
+               dev_err(dev, "CCSR setup failed\n");
+               return ret;
+       }
+
+       err_irq = platform_get_irq(pdev, 0);
+       if (err_irq <= 0) {
+               dev_info(dev, "Can't get %s property 'interrupts'\n",
+                        node->full_name);
+               return -ENODEV;
+       }
+       ret = devm_request_irq(dev, err_irq, qman_isr, IRQF_SHARED, "qman-err",
+                              dev);
+       if (ret)  {
+               dev_err(dev, "devm_request_irq() failed %d for '%s'\n",
+                       ret, node->full_name);
+               return ret;
+       }
+
+       /*
+        * Write-to-clear any stale bits, (eg. starvation being asserted prior
+        * to resource allocation during driver init).
+        */
+       qm_ccsr_out(REG_ERR_ISR, 0xffffffff);
+       /* Enable Error Interrupts */
+       qm_ccsr_out(REG_ERR_IER, 0xffffffff);
+
+       qm_fqalloc = devm_gen_pool_create(dev, 0, -1, "qman-fqalloc");
+       if (IS_ERR(qm_fqalloc)) {
+               ret = PTR_ERR(qm_fqalloc);
+               dev_err(dev, "qman-fqalloc pool init failed (%d)\n", ret);
+               return ret;
+       }
+
+       qm_qpalloc = devm_gen_pool_create(dev, 0, -1, "qman-qpalloc");
+       if (IS_ERR(qm_qpalloc)) {
+               ret = PTR_ERR(qm_qpalloc);
+               dev_err(dev, "qman-qpalloc pool init failed (%d)\n", ret);
+               return ret;
+       }
+
+       qm_cgralloc = devm_gen_pool_create(dev, 0, -1, "qman-cgralloc");
+       if (IS_ERR(qm_cgralloc)) {
+               ret = PTR_ERR(qm_cgralloc);
+               dev_err(dev, "qman-cgralloc pool init failed (%d)\n", ret);
+               return ret;
+       }
+
+       ret = qman_resource_init(dev);
+       if (ret)
+               return ret;
+
+       ret = qman_alloc_fq_table(qm_get_fqid_maxcnt());
+       if (ret)
+               return ret;
+
+       ret = qman_wq_alloc();
+       if (ret)
+               return ret;
+
+       return 0;
+}
+
+static const struct of_device_id fsl_qman_ids[] = {
+       {
+               .compatible = "fsl,qman",
+       },
+       {}
+};
+
+static struct platform_driver fsl_qman_driver = {
+       .driver = {
+               .name = KBUILD_MODNAME,
+               .of_match_table = fsl_qman_ids,
+               .suppress_bind_attrs = true,
+       },
+       .probe = fsl_qman_probe,
+};
+
+builtin_platform_driver(fsl_qman_driver);
diff --git a/drivers/soc/fsl/qbman/qman_portal.c b/drivers/soc/fsl/qbman/qman_portal.c
new file mode 100644 (file)
index 0000000..1486143
--- /dev/null
@@ -0,0 +1,355 @@
+/* Copyright 2008 - 2016 Freescale Semiconductor, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *      names of its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written permission.
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "qman_priv.h"
+
+/* Enable portal interupts (as opposed to polling mode) */
+#define CONFIG_FSL_DPA_PIRQ_SLOW  1
+#define CONFIG_FSL_DPA_PIRQ_FAST  1
+
+static struct cpumask portal_cpus;
+/* protect qman global registers and global data shared among portals */
+static DEFINE_SPINLOCK(qman_lock);
+
+static void portal_set_cpu(struct qm_portal_config *pcfg, int cpu)
+{
+#ifdef CONFIG_FSL_PAMU
+       struct device *dev = pcfg->dev;
+       int window_count = 1;
+       struct iommu_domain_geometry geom_attr;
+       struct pamu_stash_attribute stash_attr;
+       int ret;
+
+       pcfg->iommu_domain = iommu_domain_alloc(&platform_bus_type);
+       if (!pcfg->iommu_domain) {
+               dev_err(dev, "%s(): iommu_domain_alloc() failed", __func__);
+               goto no_iommu;
+       }
+       geom_attr.aperture_start = 0;
+       geom_attr.aperture_end =
+               ((dma_addr_t)1 << min(8 * sizeof(dma_addr_t), (size_t)36)) - 1;
+       geom_attr.force_aperture = true;
+       ret = iommu_domain_set_attr(pcfg->iommu_domain, DOMAIN_ATTR_GEOMETRY,
+                                   &geom_attr);
+       if (ret < 0) {
+               dev_err(dev, "%s(): iommu_domain_set_attr() = %d", __func__,
+                       ret);
+               goto out_domain_free;
+       }
+       ret = iommu_domain_set_attr(pcfg->iommu_domain, DOMAIN_ATTR_WINDOWS,
+                                   &window_count);
+       if (ret < 0) {
+               dev_err(dev, "%s(): iommu_domain_set_attr() = %d", __func__,
+                       ret);
+               goto out_domain_free;
+       }
+       stash_attr.cpu = cpu;
+       stash_attr.cache = PAMU_ATTR_CACHE_L1;
+       ret = iommu_domain_set_attr(pcfg->iommu_domain,
+                                   DOMAIN_ATTR_FSL_PAMU_STASH,
+                                   &stash_attr);
+       if (ret < 0) {
+               dev_err(dev, "%s(): iommu_domain_set_attr() = %d",
+                       __func__, ret);
+               goto out_domain_free;
+       }
+       ret = iommu_domain_window_enable(pcfg->iommu_domain, 0, 0, 1ULL << 36,
+                                        IOMMU_READ | IOMMU_WRITE);
+       if (ret < 0) {
+               dev_err(dev, "%s(): iommu_domain_window_enable() = %d",
+                       __func__, ret);
+               goto out_domain_free;
+       }
+       ret = iommu_attach_device(pcfg->iommu_domain, dev);
+       if (ret < 0) {
+               dev_err(dev, "%s(): iommu_device_attach() = %d", __func__,
+                       ret);
+               goto out_domain_free;
+       }
+       ret = iommu_domain_set_attr(pcfg->iommu_domain,
+                                   DOMAIN_ATTR_FSL_PAMU_ENABLE,
+                                   &window_count);
+       if (ret < 0) {
+               dev_err(dev, "%s(): iommu_domain_set_attr() = %d", __func__,
+                       ret);
+               goto out_detach_device;
+       }
+
+no_iommu:
+#endif
+       qman_set_sdest(pcfg->channel, cpu);
+
+       return;
+
+#ifdef CONFIG_FSL_PAMU
+out_detach_device:
+       iommu_detach_device(pcfg->iommu_domain, NULL);
+out_domain_free:
+       iommu_domain_free(pcfg->iommu_domain);
+       pcfg->iommu_domain = NULL;
+#endif
+}
+
+static struct qman_portal *init_pcfg(struct qm_portal_config *pcfg)
+{
+       struct qman_portal *p;
+       u32 irq_sources = 0;
+
+       /* We need the same LIODN offset for all portals */
+       qman_liodn_fixup(pcfg->channel);
+
+       pcfg->iommu_domain = NULL;
+       portal_set_cpu(pcfg, pcfg->cpu);
+
+       p = qman_create_affine_portal(pcfg, NULL);
+       if (!p) {
+               dev_crit(pcfg->dev, "%s: Portal failure on cpu %d\n",
+                        __func__, pcfg->cpu);
+               return NULL;
+       }
+
+       /* Determine what should be interrupt-vs-poll driven */
+#ifdef CONFIG_FSL_DPA_PIRQ_SLOW
+       irq_sources |= QM_PIRQ_EQCI | QM_PIRQ_EQRI | QM_PIRQ_MRI |
+                      QM_PIRQ_CSCI;
+#endif
+#ifdef CONFIG_FSL_DPA_PIRQ_FAST
+       irq_sources |= QM_PIRQ_DQRI;
+#endif
+       qman_p_irqsource_add(p, irq_sources);
+
+       spin_lock(&qman_lock);
+       if (cpumask_equal(&portal_cpus, cpu_possible_mask)) {
+               /* all assigned portals are initialized now */
+               qman_init_cgr_all();
+       }
+       spin_unlock(&qman_lock);
+
+       dev_info(pcfg->dev, "Portal initialised, cpu %d\n", pcfg->cpu);
+
+       return p;
+}
+
+static void qman_portal_update_sdest(const struct qm_portal_config *pcfg,
+                                                       unsigned int cpu)
+{
+#ifdef CONFIG_FSL_PAMU /* TODO */
+       struct pamu_stash_attribute stash_attr;
+       int ret;
+
+       if (pcfg->iommu_domain) {
+               stash_attr.cpu = cpu;
+               stash_attr.cache = PAMU_ATTR_CACHE_L1;
+               ret = iommu_domain_set_attr(pcfg->iommu_domain,
+                               DOMAIN_ATTR_FSL_PAMU_STASH, &stash_attr);
+               if (ret < 0) {
+                       dev_err(pcfg->dev,
+                               "Failed to update pamu stash setting\n");
+                       return;
+               }
+       }
+#endif
+       qman_set_sdest(pcfg->channel, cpu);
+}
+
+static void qman_offline_cpu(unsigned int cpu)
+{
+       struct qman_portal *p;
+       const struct qm_portal_config *pcfg;
+
+       p = affine_portals[cpu];
+       if (p) {
+               pcfg = qman_get_qm_portal_config(p);
+               if (pcfg) {
+                       irq_set_affinity(pcfg->irq, cpumask_of(0));
+                       qman_portal_update_sdest(pcfg, 0);
+               }
+       }
+}
+
+static void qman_online_cpu(unsigned int cpu)
+{
+       struct qman_portal *p;
+       const struct qm_portal_config *pcfg;
+
+       p = affine_portals[cpu];
+       if (p) {
+               pcfg = qman_get_qm_portal_config(p);
+               if (pcfg) {
+                       irq_set_affinity(pcfg->irq, cpumask_of(cpu));
+                       qman_portal_update_sdest(pcfg, cpu);
+               }
+       }
+}
+
+static int qman_hotplug_cpu_callback(struct notifier_block *nfb,
+                                    unsigned long action, void *hcpu)
+{
+       unsigned int cpu = (unsigned long)hcpu;
+
+       switch (action) {
+       case CPU_ONLINE:
+       case CPU_ONLINE_FROZEN:
+               qman_online_cpu(cpu);
+               break;
+       case CPU_DOWN_PREPARE:
+       case CPU_DOWN_PREPARE_FROZEN:
+               qman_offline_cpu(cpu);
+       default:
+               break;
+       }
+       return NOTIFY_OK;
+}
+
+static struct notifier_block qman_hotplug_cpu_notifier = {
+       .notifier_call = qman_hotplug_cpu_callback,
+};
+
+static int qman_portal_probe(struct platform_device *pdev)
+{
+       struct device *dev = &pdev->dev;
+       struct device_node *node = dev->of_node;
+       struct qm_portal_config *pcfg;
+       struct resource *addr_phys[2];
+       const u32 *channel;
+       void __iomem *va;
+       int irq, len, cpu;
+
+       pcfg = devm_kmalloc(dev, sizeof(*pcfg), GFP_KERNEL);
+       if (!pcfg)
+               return -ENOMEM;
+
+       pcfg->dev = dev;
+
+       addr_phys[0] = platform_get_resource(pdev, IORESOURCE_MEM,
+                                            DPAA_PORTAL_CE);
+       if (!addr_phys[0]) {
+               dev_err(dev, "Can't get %s property 'reg::CE'\n",
+                       node->full_name);
+               return -ENXIO;
+       }
+
+       addr_phys[1] = platform_get_resource(pdev, IORESOURCE_MEM,
+                                            DPAA_PORTAL_CI);
+       if (!addr_phys[1]) {
+               dev_err(dev, "Can't get %s property 'reg::CI'\n",
+                       node->full_name);
+               return -ENXIO;
+       }
+
+       channel = of_get_property(node, "cell-index", &len);
+       if (!channel || (len != 4)) {
+               dev_err(dev, "Can't get %s property 'cell-index'\n",
+                       node->full_name);
+               return -ENXIO;
+       }
+       pcfg->channel = *channel;
+       pcfg->cpu = -1;
+       irq = platform_get_irq(pdev, 0);
+       if (irq <= 0) {
+               dev_err(dev, "Can't get %s IRQ\n", node->full_name);
+               return -ENXIO;
+       }
+       pcfg->irq = irq;
+
+       va = ioremap_prot(addr_phys[0]->start, resource_size(addr_phys[0]), 0);
+       if (!va)
+               goto err_ioremap1;
+
+       pcfg->addr_virt[DPAA_PORTAL_CE] = va;
+
+       va = ioremap_prot(addr_phys[1]->start, resource_size(addr_phys[1]),
+                         _PAGE_GUARDED | _PAGE_NO_CACHE);
+       if (!va)
+               goto err_ioremap2;
+
+       pcfg->addr_virt[DPAA_PORTAL_CI] = va;
+
+       pcfg->pools = qm_get_pools_sdqcr();
+
+       spin_lock(&qman_lock);
+       cpu = cpumask_next_zero(-1, &portal_cpus);
+       if (cpu >= nr_cpu_ids) {
+               /* unassigned portal, skip init */
+               spin_unlock(&qman_lock);
+               return 0;
+       }
+
+       cpumask_set_cpu(cpu, &portal_cpus);
+       spin_unlock(&qman_lock);
+       pcfg->cpu = cpu;
+
+       if (!init_pcfg(pcfg))
+               goto err_ioremap2;
+
+       /* clear irq affinity if assigned cpu is offline */
+       if (!cpu_online(cpu))
+               qman_offline_cpu(cpu);
+
+       return 0;
+
+err_ioremap2:
+       iounmap(pcfg->addr_virt[DPAA_PORTAL_CE]);
+err_ioremap1:
+       dev_err(dev, "ioremap failed\n");
+       return -ENXIO;
+}
+
+static const struct of_device_id qman_portal_ids[] = {
+       {
+               .compatible = "fsl,qman-portal",
+       },
+       {}
+};
+MODULE_DEVICE_TABLE(of, qman_portal_ids);
+
+static struct platform_driver qman_portal_driver = {
+       .driver = {
+               .name = KBUILD_MODNAME,
+               .of_match_table = qman_portal_ids,
+       },
+       .probe = qman_portal_probe,
+};
+
+static int __init qman_portal_driver_register(struct platform_driver *drv)
+{
+       int ret;
+
+       ret = platform_driver_register(drv);
+       if (ret < 0)
+               return ret;
+
+       register_hotcpu_notifier(&qman_hotplug_cpu_notifier);
+
+       return 0;
+}
+
+module_driver(qman_portal_driver,
+             qman_portal_driver_register, platform_driver_unregister);
diff --git a/drivers/soc/fsl/qbman/qman_priv.h b/drivers/soc/fsl/qbman/qman_priv.h
new file mode 100644 (file)
index 0000000..5cf821e
--- /dev/null
@@ -0,0 +1,371 @@
+/* Copyright 2008 - 2016 Freescale Semiconductor, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *      names of its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written permission.
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include "dpaa_sys.h"
+
+#include <soc/fsl/qman.h>
+#include <linux/iommu.h>
+
+#if defined(CONFIG_FSL_PAMU)
+#include <asm/fsl_pamu_stash.h>
+#endif
+
+struct qm_mcr_querywq {
+       u8 verb;
+       u8 result;
+       u16 channel_wq; /* ignores wq (3 lsbits): _res[0-2] */
+       u8 __reserved[28];
+       u32 wq_len[8];
+} __packed;
+
+static inline u16 qm_mcr_querywq_get_chan(const struct qm_mcr_querywq *wq)
+{
+       return wq->channel_wq >> 3;
+}
+
+struct __qm_mcr_querycongestion {
+       u32 state[8];
+};
+
+/* "Query Congestion Group State" */
+struct qm_mcr_querycongestion {
+       u8 verb;
+       u8 result;
+       u8 __reserved[30];
+       /* Access this struct using qman_cgrs_get() */
+       struct __qm_mcr_querycongestion state;
+} __packed;
+
+/* "Query CGR" */
+struct qm_mcr_querycgr {
+       u8 verb;
+       u8 result;
+       u16 __reserved1;
+       struct __qm_mc_cgr cgr; /* CGR fields */
+       u8 __reserved2[6];
+       u8 i_bcnt_hi;   /* high 8-bits of 40-bit "Instant" */
+       u32 i_bcnt_lo;  /* low 32-bits of 40-bit */
+       u8 __reserved3[3];
+       u8 a_bcnt_hi;   /* high 8-bits of 40-bit "Average" */
+       u32 a_bcnt_lo;  /* low 32-bits of 40-bit */
+       u32 cscn_targ_swp[4];
+} __packed;
+
+static inline u64 qm_mcr_querycgr_i_get64(const struct qm_mcr_querycgr *q)
+{
+       return ((u64)q->i_bcnt_hi << 32) | (u64)q->i_bcnt_lo;
+}
+static inline u64 qm_mcr_querycgr_a_get64(const struct qm_mcr_querycgr *q)
+{
+       return ((u64)q->a_bcnt_hi << 32) | (u64)q->a_bcnt_lo;
+}
+
+/* "Query FQ Non-Programmable Fields" */
+struct qm_mcc_queryfq_np {
+       u8 _ncw_verb;
+       u8 __reserved1[3];
+       u32 fqid;       /* 24-bit */
+       u8 __reserved2[56];
+} __packed;
+
+struct qm_mcr_queryfq_np {
+       u8 verb;
+       u8 result;
+       u8 __reserved1;
+       u8 state;               /* QM_MCR_NP_STATE_*** */
+       u32 fqd_link;           /* 24-bit, _res2[24-31] */
+       u16 odp_seq;            /* 14-bit, _res3[14-15] */
+       u16 orp_nesn;           /* 14-bit, _res4[14-15] */
+       u16 orp_ea_hseq;        /* 15-bit, _res5[15] */
+       u16 orp_ea_tseq;        /* 15-bit, _res6[15] */
+       u32 orp_ea_hptr;        /* 24-bit, _res7[24-31] */
+       u32 orp_ea_tptr;        /* 24-bit, _res8[24-31] */
+       u32 pfdr_hptr;          /* 24-bit, _res9[24-31] */
+       u32 pfdr_tptr;          /* 24-bit, _res10[24-31] */
+       u8 __reserved2[5];
+       u8 is;                  /* 1-bit, _res12[1-7] */
+       u16 ics_surp;
+       u32 byte_cnt;
+       u32 frm_cnt;            /* 24-bit, _res13[24-31] */
+       u32 __reserved3;
+       u16 ra1_sfdr;           /* QM_MCR_NP_RA1_*** */
+       u16 ra2_sfdr;           /* QM_MCR_NP_RA2_*** */
+       u16 __reserved4;
+       u16 od1_sfdr;           /* QM_MCR_NP_OD1_*** */
+       u16 od2_sfdr;           /* QM_MCR_NP_OD2_*** */
+       u16 od3_sfdr;           /* QM_MCR_NP_OD3_*** */
+} __packed;
+
+#define QM_MCR_NP_STATE_FE             0x10
+#define QM_MCR_NP_STATE_R              0x08
+#define QM_MCR_NP_STATE_MASK           0x07    /* Reads FQD::STATE; */
+#define QM_MCR_NP_STATE_OOS            0x00
+#define QM_MCR_NP_STATE_RETIRED                0x01
+#define QM_MCR_NP_STATE_TEN_SCHED      0x02
+#define QM_MCR_NP_STATE_TRU_SCHED      0x03
+#define QM_MCR_NP_STATE_PARKED         0x04
+#define QM_MCR_NP_STATE_ACTIVE         0x05
+#define QM_MCR_NP_PTR_MASK             0x07ff  /* for RA[12] & OD[123] */
+#define QM_MCR_NP_RA1_NRA(v)           (((v) >> 14) & 0x3)     /* FQD::NRA */
+#define QM_MCR_NP_RA2_IT(v)            (((v) >> 14) & 0x1)     /* FQD::IT */
+#define QM_MCR_NP_OD1_NOD(v)           (((v) >> 14) & 0x3)     /* FQD::NOD */
+#define QM_MCR_NP_OD3_NPC(v)           (((v) >> 14) & 0x3)     /* FQD::NPC */
+
+enum qm_mcr_queryfq_np_masks {
+       qm_mcr_fqd_link_mask = BIT(24)-1,
+       qm_mcr_odp_seq_mask = BIT(14)-1,
+       qm_mcr_orp_nesn_mask = BIT(14)-1,
+       qm_mcr_orp_ea_hseq_mask = BIT(15)-1,
+       qm_mcr_orp_ea_tseq_mask = BIT(15)-1,
+       qm_mcr_orp_ea_hptr_mask = BIT(24)-1,
+       qm_mcr_orp_ea_tptr_mask = BIT(24)-1,
+       qm_mcr_pfdr_hptr_mask = BIT(24)-1,
+       qm_mcr_pfdr_tptr_mask = BIT(24)-1,
+       qm_mcr_is_mask = BIT(1)-1,
+       qm_mcr_frm_cnt_mask = BIT(24)-1,
+};
+#define qm_mcr_np_get(np, field) \
+       ((np)->field & (qm_mcr_##field##_mask))
+
+/* Congestion Groups */
+
+/*
+ * This wrapper represents a bit-array for the state of the 256 QMan congestion
+ * groups. Is also used as a *mask* for congestion groups, eg. so we ignore
+ * those that don't concern us. We harness the structure and accessor details
+ * already used in the management command to query congestion groups.
+ */
+#define CGR_BITS_PER_WORD 5
+#define CGR_WORD(x)    ((x) >> CGR_BITS_PER_WORD)
+#define CGR_BIT(x)     (BIT(31) >> ((x) & 0x1f))
+#define CGR_NUM        (sizeof(struct __qm_mcr_querycongestion) << 3)
+
+struct qman_cgrs {
+       struct __qm_mcr_querycongestion q;
+};
+
+static inline void qman_cgrs_init(struct qman_cgrs *c)
+{
+       memset(c, 0, sizeof(*c));
+}
+
+static inline void qman_cgrs_fill(struct qman_cgrs *c)
+{
+       memset(c, 0xff, sizeof(*c));
+}
+
+static inline int qman_cgrs_get(struct qman_cgrs *c, u8 cgr)
+{
+       return c->q.state[CGR_WORD(cgr)] & CGR_BIT(cgr);
+}
+
+static inline void qman_cgrs_cp(struct qman_cgrs *dest,
+                               const struct qman_cgrs *src)
+{
+       *dest = *src;
+}
+
+static inline void qman_cgrs_and(struct qman_cgrs *dest,
+                       const struct qman_cgrs *a, const struct qman_cgrs *b)
+{
+       int ret;
+       u32 *_d = dest->q.state;
+       const u32 *_a = a->q.state;
+       const u32 *_b = b->q.state;
+
+       for (ret = 0; ret < 8; ret++)
+               *_d++ = *_a++ & *_b++;
+}
+
+static inline void qman_cgrs_xor(struct qman_cgrs *dest,
+                       const struct qman_cgrs *a, const struct qman_cgrs *b)
+{
+       int ret;
+       u32 *_d = dest->q.state;
+       const u32 *_a = a->q.state;
+       const u32 *_b = b->q.state;
+
+       for (ret = 0; ret < 8; ret++)
+               *_d++ = *_a++ ^ *_b++;
+}
+
+void qman_init_cgr_all(void);
+
+struct qm_portal_config {
+       /*
+        * Corenet portal addresses;
+        * [0]==cache-enabled, [1]==cache-inhibited.
+        */
+       void __iomem *addr_virt[2];
+       struct device *dev;
+       struct iommu_domain *iommu_domain;
+       /* Allow these to be joined in lists */
+       struct list_head list;
+       /* User-visible portal configuration settings */
+       /* portal is affined to this cpu */
+       int cpu;
+       /* portal interrupt line */
+       int irq;
+       /*
+        * the portal's dedicated channel id, used initialising
+        * frame queues to target this portal when scheduled
+        */
+       u16 channel;
+       /*
+        * mask of pool channels this portal has dequeue access to
+        * (using QM_SDQCR_CHANNELS_POOL(n) for the bitmask)
+        */
+       u32 pools;
+};
+
+/* Revision info (for errata and feature handling) */
+#define QMAN_REV11 0x0101
+#define QMAN_REV12 0x0102
+#define QMAN_REV20 0x0200
+#define QMAN_REV30 0x0300
+#define QMAN_REV31 0x0301
+extern u16 qman_ip_rev; /* 0 if uninitialised, otherwise QMAN_REVx */
+
+#define QM_FQID_RANGE_START 1 /* FQID 0 reserved for internal use */
+extern struct gen_pool *qm_fqalloc; /* FQID allocator */
+extern struct gen_pool *qm_qpalloc; /* pool-channel allocator */
+extern struct gen_pool *qm_cgralloc; /* CGR ID allocator */
+u32 qm_get_pools_sdqcr(void);
+
+int qman_wq_alloc(void);
+void qman_liodn_fixup(u16 channel);
+void qman_set_sdest(u16 channel, unsigned int cpu_idx);
+
+struct qman_portal *qman_create_affine_portal(
+                       const struct qm_portal_config *config,
+                       const struct qman_cgrs *cgrs);
+const struct qm_portal_config *qman_destroy_affine_portal(void);
+
+/*
+ * qman_query_fq - Queries FQD fields (via h/w query command)
+ * @fq: the frame queue object to be queried
+ * @fqd: storage for the queried FQD fields
+ */
+int qman_query_fq(struct qman_fq *fq, struct qm_fqd *fqd);
+
+/*
+ * For qman_volatile_dequeue(); Choose one PRECEDENCE. EXACT is optional. Use
+ * NUMFRAMES(n) (6-bit) or NUMFRAMES_TILLEMPTY to fill in the frame-count. Use
+ * FQID(n) to fill in the frame queue ID.
+ */
+#define QM_VDQCR_PRECEDENCE_VDQCR      0x0
+#define QM_VDQCR_PRECEDENCE_SDQCR      0x80000000
+#define QM_VDQCR_EXACT                 0x40000000
+#define QM_VDQCR_NUMFRAMES_MASK                0x3f000000
+#define QM_VDQCR_NUMFRAMES_SET(n)      (((n) & 0x3f) << 24)
+#define QM_VDQCR_NUMFRAMES_GET(n)      (((n) >> 24) & 0x3f)
+#define QM_VDQCR_NUMFRAMES_TILLEMPTY   QM_VDQCR_NUMFRAMES_SET(0)
+
+#define QMAN_VOLATILE_FLAG_WAIT             0x00000001 /* wait if VDQCR is in use */
+#define QMAN_VOLATILE_FLAG_WAIT_INT  0x00000002 /* if wait, interruptible? */
+#define QMAN_VOLATILE_FLAG_FINISH    0x00000004 /* wait till VDQCR completes */
+
+/*
+ * qman_volatile_dequeue - Issue a volatile dequeue command
+ * @fq: the frame queue object to dequeue from
+ * @flags: a bit-mask of QMAN_VOLATILE_FLAG_*** options
+ * @vdqcr: bit mask of QM_VDQCR_*** options, as per qm_dqrr_vdqcr_set()
+ *
+ * Attempts to lock access to the portal's VDQCR volatile dequeue functionality.
+ * The function will block and sleep if QMAN_VOLATILE_FLAG_WAIT is specified and
+ * the VDQCR is already in use, otherwise returns non-zero for failure. If
+ * QMAN_VOLATILE_FLAG_FINISH is specified, the function will only return once
+ * the VDQCR command has finished executing (ie. once the callback for the last
+ * DQRR entry resulting from the VDQCR command has been called). If not using
+ * the FINISH flag, completion can be determined either by detecting the
+ * presence of the QM_DQRR_STAT_UNSCHEDULED and QM_DQRR_STAT_DQCR_EXPIRED bits
+ * in the "stat" parameter passed to the FQ's dequeue callback, or by waiting
+ * for the QMAN_FQ_STATE_VDQCR bit to disappear.
+ */
+int qman_volatile_dequeue(struct qman_fq *fq, u32 flags, u32 vdqcr);
+
+int qman_alloc_fq_table(u32 num_fqids);
+
+/*   QMan s/w corenet portal, low-level i/face  */
+
+/*
+ * For qm_dqrr_sdqcr_set(); Choose one SOURCE. Choose one COUNT. Choose one
+ * dequeue TYPE. Choose TOKEN (8-bit).
+ * If SOURCE == CHANNELS,
+ *   Choose CHANNELS_DEDICATED and/or CHANNELS_POOL(n).
+ *   You can choose DEDICATED_PRECEDENCE if the portal channel should have
+ *   priority.
+ * If SOURCE == SPECIFICWQ,
+ *     Either select the work-queue ID with SPECIFICWQ_WQ(), or select the
+ *     channel (SPECIFICWQ_DEDICATED or SPECIFICWQ_POOL()) and specify the
+ *     work-queue priority (0-7) with SPECIFICWQ_WQ() - either way, you get the
+ *     same value.
+ */
+#define QM_SDQCR_SOURCE_CHANNELS       0x0
+#define QM_SDQCR_SOURCE_SPECIFICWQ     0x40000000
+#define QM_SDQCR_COUNT_EXACT1          0x0
+#define QM_SDQCR_COUNT_UPTO3           0x20000000
+#define QM_SDQCR_DEDICATED_PRECEDENCE  0x10000000
+#define QM_SDQCR_TYPE_MASK             0x03000000
+#define QM_SDQCR_TYPE_NULL             0x0
+#define QM_SDQCR_TYPE_PRIO_QOS         0x01000000
+#define QM_SDQCR_TYPE_ACTIVE_QOS       0x02000000
+#define QM_SDQCR_TYPE_ACTIVE           0x03000000
+#define QM_SDQCR_TOKEN_MASK            0x00ff0000
+#define QM_SDQCR_TOKEN_SET(v)          (((v) & 0xff) << 16)
+#define QM_SDQCR_TOKEN_GET(v)          (((v) >> 16) & 0xff)
+#define QM_SDQCR_CHANNELS_DEDICATED    0x00008000
+#define QM_SDQCR_SPECIFICWQ_MASK       0x000000f7
+#define QM_SDQCR_SPECIFICWQ_DEDICATED  0x00000000
+#define QM_SDQCR_SPECIFICWQ_POOL(n)    ((n) << 4)
+#define QM_SDQCR_SPECIFICWQ_WQ(n)      (n)
+
+/* For qm_dqrr_vdqcr_set(): use FQID(n) to fill in the frame queue ID */
+#define QM_VDQCR_FQID_MASK             0x00ffffff
+#define QM_VDQCR_FQID(n)               ((n) & QM_VDQCR_FQID_MASK)
+
+/*
+ * Used by all portal interrupt registers except 'inhibit'
+ * Channels with frame availability
+ */
+#define QM_PIRQ_DQAVAIL        0x0000ffff
+
+/* The DQAVAIL interrupt fields break down into these bits; */
+#define QM_DQAVAIL_PORTAL      0x8000          /* Portal channel */
+#define QM_DQAVAIL_POOL(n)     (0x8000 >> (n)) /* Pool channel, n==[1..15] */
+#define QM_DQAVAIL_MASK                0xffff
+/* This mask contains all the "irqsource" bits visible to API users */
+#define QM_PIRQ_VISIBLE        (QM_PIRQ_SLOW | QM_PIRQ_DQRI)
+
+extern struct qman_portal *affine_portals[NR_CPUS];
+const struct qm_portal_config *qman_get_qm_portal_config(
+                                               struct qman_portal *portal);
diff --git a/drivers/soc/fsl/qbman/qman_test.c b/drivers/soc/fsl/qbman/qman_test.c
new file mode 100644 (file)
index 0000000..18f7f02
--- /dev/null
@@ -0,0 +1,62 @@
+/* Copyright 2008 - 2016 Freescale Semiconductor, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *      names of its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written permission.
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "qman_test.h"
+
+MODULE_AUTHOR("Geoff Thorpe");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("QMan testing");
+
+static int test_init(void)
+{
+       int loop = 1;
+       int err = 0;
+
+       while (loop--) {
+#ifdef CONFIG_FSL_QMAN_TEST_STASH
+               err = qman_test_stash();
+               if (err)
+                       break;
+#endif
+#ifdef CONFIG_FSL_QMAN_TEST_API
+               err = qman_test_api();
+               if (err)
+                       break;
+#endif
+       }
+       return err;
+}
+
+static void test_exit(void)
+{
+}
+
+module_init(test_init);
+module_exit(test_exit);
diff --git a/drivers/soc/fsl/qbman/qman_test.h b/drivers/soc/fsl/qbman/qman_test.h
new file mode 100644 (file)
index 0000000..d5f8cb2
--- /dev/null
@@ -0,0 +1,36 @@
+/* Copyright 2008 - 2016 Freescale Semiconductor, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *      names of its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written permission.
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "qman_priv.h"
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+int qman_test_stash(void);
+int qman_test_api(void);
diff --git a/drivers/soc/fsl/qbman/qman_test_api.c b/drivers/soc/fsl/qbman/qman_test_api.c
new file mode 100644 (file)
index 0000000..6880ff1
--- /dev/null
@@ -0,0 +1,252 @@
+/* Copyright 2008 - 2016 Freescale Semiconductor, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *      names of its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written permission.
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "qman_test.h"
+
+#define CGR_ID         27
+#define POOL_ID                2
+#define FQ_FLAGS       QMAN_FQ_FLAG_DYNAMIC_FQID
+#define NUM_ENQUEUES   10
+#define NUM_PARTIAL    4
+#define PORTAL_SDQCR   (QM_SDQCR_SOURCE_CHANNELS | \
+                       QM_SDQCR_TYPE_PRIO_QOS | \
+                       QM_SDQCR_TOKEN_SET(0x98) | \
+                       QM_SDQCR_CHANNELS_DEDICATED | \
+                       QM_SDQCR_CHANNELS_POOL(POOL_ID))
+#define PORTAL_OPAQUE  ((void *)0xf00dbeef)
+#define VDQCR_FLAGS    (QMAN_VOLATILE_FLAG_WAIT | QMAN_VOLATILE_FLAG_FINISH)
+
+static enum qman_cb_dqrr_result cb_dqrr(struct qman_portal *,
+                                       struct qman_fq *,
+                                       const struct qm_dqrr_entry *);
+static void cb_ern(struct qman_portal *, struct qman_fq *,
+                  const union qm_mr_entry *);
+static void cb_fqs(struct qman_portal *, struct qman_fq *,
+                  const union qm_mr_entry *);
+
+static struct qm_fd fd, fd_dq;
+static struct qman_fq fq_base = {
+       .cb.dqrr = cb_dqrr,
+       .cb.ern = cb_ern,
+       .cb.fqs = cb_fqs
+};
+static DECLARE_WAIT_QUEUE_HEAD(waitqueue);
+static int retire_complete, sdqcr_complete;
+
+/* Helpers for initialising and "incrementing" a frame descriptor */
+static void fd_init(struct qm_fd *fd)
+{
+       qm_fd_addr_set64(fd, 0xabdeadbeefLLU);
+       qm_fd_set_contig_big(fd, 0x0000ffff);
+       fd->cmd = 0xfeedf00d;
+}
+
+static void fd_inc(struct qm_fd *fd)
+{
+       u64 t = qm_fd_addr_get64(fd);
+       int z = t >> 40;
+       unsigned int len, off;
+       enum qm_fd_format fmt;
+
+       t <<= 1;
+       if (z)
+               t |= 1;
+       qm_fd_addr_set64(fd, t);
+
+       fmt = qm_fd_get_format(fd);
+       off = qm_fd_get_offset(fd);
+       len = qm_fd_get_length(fd);
+       len--;
+       qm_fd_set_param(fd, fmt, off, len);
+
+       fd->cmd++;
+}
+
+/* The only part of the 'fd' we can't memcmp() is the ppid */
+static int fd_cmp(const struct qm_fd *a, const struct qm_fd *b)
+{
+       int r = (qm_fd_addr_get64(a) == qm_fd_addr_get64(b)) ? 0 : -1;
+
+       if (!r) {
+               enum qm_fd_format fmt_a, fmt_b;
+
+               fmt_a = qm_fd_get_format(a);
+               fmt_b = qm_fd_get_format(b);
+               r = fmt_a - fmt_b;
+       }
+       if (!r)
+               r = a->cfg - b->cfg;
+       if (!r)
+               r = a->cmd - b->cmd;
+       return r;
+}
+
+/* test */
+static int do_enqueues(struct qman_fq *fq)
+{
+       unsigned int loop;
+       int err = 0;
+
+       for (loop = 0; loop < NUM_ENQUEUES; loop++) {
+               if (qman_enqueue(fq, &fd)) {
+                       pr_crit("qman_enqueue() failed\n");
+                       err = -EIO;
+               }
+               fd_inc(&fd);
+       }
+
+       return err;
+}
+
+int qman_test_api(void)
+{
+       unsigned int flags, frmcnt;
+       int err;
+       struct qman_fq *fq = &fq_base;
+
+       pr_info("%s(): Starting\n", __func__);
+       fd_init(&fd);
+       fd_init(&fd_dq);
+
+       /* Initialise (parked) FQ */
+       err = qman_create_fq(0, FQ_FLAGS, fq);
+       if (err) {
+               pr_crit("qman_create_fq() failed\n");
+               goto failed;
+       }
+       err = qman_init_fq(fq, QMAN_INITFQ_FLAG_LOCAL, NULL);
+       if (err) {
+               pr_crit("qman_init_fq() failed\n");
+               goto failed;
+       }
+       /* Do enqueues + VDQCR, twice. (Parked FQ) */
+       err = do_enqueues(fq);
+       if (err)
+               goto failed;
+       pr_info("VDQCR (till-empty);\n");
+       frmcnt = QM_VDQCR_NUMFRAMES_TILLEMPTY;
+       err = qman_volatile_dequeue(fq, VDQCR_FLAGS, frmcnt);
+       if (err) {
+               pr_crit("qman_volatile_dequeue() failed\n");
+               goto failed;
+       }
+       err = do_enqueues(fq);
+       if (err)
+               goto failed;
+       pr_info("VDQCR (%d of %d);\n", NUM_PARTIAL, NUM_ENQUEUES);
+       frmcnt = QM_VDQCR_NUMFRAMES_SET(NUM_PARTIAL);
+       err = qman_volatile_dequeue(fq, VDQCR_FLAGS, frmcnt);
+       if (err) {
+               pr_crit("qman_volatile_dequeue() failed\n");
+               goto failed;
+       }
+       pr_info("VDQCR (%d of %d);\n", NUM_ENQUEUES - NUM_PARTIAL,
+               NUM_ENQUEUES);
+       frmcnt = QM_VDQCR_NUMFRAMES_SET(NUM_ENQUEUES - NUM_PARTIAL);
+       err = qman_volatile_dequeue(fq, VDQCR_FLAGS, frmcnt);
+       if (err) {
+               pr_err("qman_volatile_dequeue() failed\n");
+               goto failed;
+       }
+
+       err = do_enqueues(fq);
+       if (err)
+               goto failed;
+       pr_info("scheduled dequeue (till-empty)\n");
+       err = qman_schedule_fq(fq);
+       if (err) {
+               pr_crit("qman_schedule_fq() failed\n");
+               goto failed;
+       }
+       wait_event(waitqueue, sdqcr_complete);
+
+       /* Retire and OOS the FQ */
+       err = qman_retire_fq(fq, &flags);
+       if (err < 0) {
+               pr_crit("qman_retire_fq() failed\n");
+               goto failed;
+       }
+       wait_event(waitqueue, retire_complete);
+       if (flags & QMAN_FQ_STATE_BLOCKOOS) {
+               err = -EIO;
+               pr_crit("leaking frames\n");
+               goto failed;
+       }
+       err = qman_oos_fq(fq);
+       if (err) {
+               pr_crit("qman_oos_fq() failed\n");
+               goto failed;
+       }
+       qman_destroy_fq(fq);
+       pr_info("%s(): Finished\n", __func__);
+       return 0;
+
+failed:
+       WARN_ON(1);
+       return err;
+}
+
+static enum qman_cb_dqrr_result cb_dqrr(struct qman_portal *p,
+                                       struct qman_fq *fq,
+                                       const struct qm_dqrr_entry *dq)
+{
+       if (WARN_ON(fd_cmp(&fd_dq, &dq->fd))) {
+               pr_err("BADNESS: dequeued frame doesn't match;\n");
+               return qman_cb_dqrr_consume;
+       }
+       fd_inc(&fd_dq);
+       if (!(dq->stat & QM_DQRR_STAT_UNSCHEDULED) && !fd_cmp(&fd_dq, &fd)) {
+               sdqcr_complete = 1;
+               wake_up(&waitqueue);
+       }
+       return qman_cb_dqrr_consume;
+}
+
+static void cb_ern(struct qman_portal *p, struct qman_fq *fq,
+                  const union qm_mr_entry *msg)
+{
+       pr_crit("cb_ern() unimplemented");
+       WARN_ON(1);
+}
+
+static void cb_fqs(struct qman_portal *p, struct qman_fq *fq,
+                  const union qm_mr_entry *msg)
+{
+       u8 verb = (msg->verb & QM_MR_VERB_TYPE_MASK);
+
+       if ((verb != QM_MR_VERB_FQRN) && (verb != QM_MR_VERB_FQRNI)) {
+               pr_crit("unexpected FQS message");
+               WARN_ON(1);
+               return;
+       }
+       pr_info("Retirement message received\n");
+       retire_complete = 1;
+       wake_up(&waitqueue);
+}
diff --git a/drivers/soc/fsl/qbman/qman_test_stash.c b/drivers/soc/fsl/qbman/qman_test_stash.c
new file mode 100644 (file)
index 0000000..43cf66b
--- /dev/null
@@ -0,0 +1,617 @@
+/* Copyright 2009 - 2016 Freescale Semiconductor, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *      names of its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written permission.
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "qman_test.h"
+
+#include <linux/dma-mapping.h>
+#include <linux/delay.h>
+
+/*
+ * Algorithm:
+ *
+ * Each cpu will have HP_PER_CPU "handlers" set up, each of which incorporates
+ * an rx/tx pair of FQ objects (both of which are stashed on dequeue). The
+ * organisation of FQIDs is such that the HP_PER_CPU*NUM_CPUS handlers will
+ * shuttle a "hot potato" frame around them such that every forwarding action
+ * moves it from one cpu to another. (The use of more than one handler per cpu
+ * is to allow enough handlers/FQs to truly test the significance of caching -
+ * ie. when cache-expiries are occurring.)
+ *
+ * The "hot potato" frame content will be HP_NUM_WORDS*4 bytes in size, and the
+ * first and last words of the frame data will undergo a transformation step on
+ * each forwarding action. To achieve this, each handler will be assigned a
+ * 32-bit "mixer", that is produced using a 32-bit LFSR. When a frame is
+ * received by a handler, the mixer of the expected sender is XOR'd into all
+ * words of the entire frame, which is then validated against the original
+ * values. Then, before forwarding, the entire frame is XOR'd with the mixer of
+ * the current handler. Apart from validating that the frame is taking the
+ * expected path, this also provides some quasi-realistic overheads to each
+ * forwarding action - dereferencing *all* the frame data, computation, and
+ * conditional branching. There is a "special" handler designated to act as the
+ * instigator of the test by creating an enqueuing the "hot potato" frame, and
+ * to determine when the test has completed by counting HP_LOOPS iterations.
+ *
+ * Init phases:
+ *
+ * 1. prepare each cpu's 'hp_cpu' struct using on_each_cpu(,,1) and link them
+ *    into 'hp_cpu_list'. Specifically, set processor_id, allocate HP_PER_CPU
+ *    handlers and link-list them (but do no other handler setup).
+ *
+ * 2. scan over 'hp_cpu_list' HP_PER_CPU times, the first time sets each
+ *    hp_cpu's 'iterator' to point to its first handler. With each loop,
+ *    allocate rx/tx FQIDs and mixer values to the hp_cpu's iterator handler
+ *    and advance the iterator for the next loop. This includes a final fixup,
+ *    which connects the last handler to the first (and which is why phase 2
+ *    and 3 are separate).
+ *
+ * 3. scan over 'hp_cpu_list' HP_PER_CPU times, the first time sets each
+ *    hp_cpu's 'iterator' to point to its first handler. With each loop,
+ *    initialise FQ objects and advance the iterator for the next loop.
+ *    Moreover, do this initialisation on the cpu it applies to so that Rx FQ
+ *    initialisation targets the correct cpu.
+ */
+
+/*
+ * helper to run something on all cpus (can't use on_each_cpu(), as that invokes
+ * the fn from irq context, which is too restrictive).
+ */
+struct bstrap {
+       int (*fn)(void);
+       atomic_t started;
+};
+static int bstrap_fn(void *bs)
+{
+       struct bstrap *bstrap = bs;
+       int err;
+
+       atomic_inc(&bstrap->started);
+       err = bstrap->fn();
+       if (err)
+               return err;
+       while (!kthread_should_stop())
+               msleep(20);
+       return 0;
+}
+static int on_all_cpus(int (*fn)(void))
+{
+       int cpu;
+
+       for_each_cpu(cpu, cpu_online_mask) {
+               struct bstrap bstrap = {
+                       .fn = fn,
+                       .started = ATOMIC_INIT(0)
+               };
+               struct task_struct *k = kthread_create(bstrap_fn, &bstrap,
+                       "hotpotato%d", cpu);
+               int ret;
+
+               if (IS_ERR(k))
+                       return -ENOMEM;
+               kthread_bind(k, cpu);
+               wake_up_process(k);
+               /*
+                * If we call kthread_stop() before the "wake up" has had an
+                * effect, then the thread may exit with -EINTR without ever
+                * running the function. So poll until it's started before
+                * requesting it to stop.
+                */
+               while (!atomic_read(&bstrap.started))
+                       msleep(20);
+               ret = kthread_stop(k);
+               if (ret)
+                       return ret;
+       }
+       return 0;
+}
+
+struct hp_handler {
+
+       /* The following data is stashed when 'rx' is dequeued; */
+       /* -------------- */
+       /* The Rx FQ, dequeues of which will stash the entire hp_handler */
+       struct qman_fq rx;
+       /* The Tx FQ we should forward to */
+       struct qman_fq tx;
+       /* The value we XOR post-dequeue, prior to validating */
+       u32 rx_mixer;
+       /* The value we XOR pre-enqueue, after validating */
+       u32 tx_mixer;
+       /* what the hotpotato address should be on dequeue */
+       dma_addr_t addr;
+       u32 *frame_ptr;
+
+       /* The following data isn't (necessarily) stashed on dequeue; */
+       /* -------------- */
+       u32 fqid_rx, fqid_tx;
+       /* list node for linking us into 'hp_cpu' */
+       struct list_head node;
+       /* Just to check ... */
+       unsigned int processor_id;
+} ____cacheline_aligned;
+
+struct hp_cpu {
+       /* identify the cpu we run on; */
+       unsigned int processor_id;
+       /* root node for the per-cpu list of handlers */
+       struct list_head handlers;
+       /* list node for linking us into 'hp_cpu_list' */
+       struct list_head node;
+       /*
+        * when repeatedly scanning 'hp_list', each time linking the n'th
+        * handlers together, this is used as per-cpu iterator state
+        */
+       struct hp_handler *iterator;
+};
+
+/* Each cpu has one of these */
+static DEFINE_PER_CPU(struct hp_cpu, hp_cpus);
+
+/* links together the hp_cpu structs, in first-come first-serve order. */
+static LIST_HEAD(hp_cpu_list);
+static spinlock_t hp_lock = __SPIN_LOCK_UNLOCKED(hp_lock);
+
+static unsigned int hp_cpu_list_length;
+
+/* the "special" handler, that starts and terminates the test. */
+static struct hp_handler *special_handler;
+static int loop_counter;
+
+/* handlers are allocated out of this, so they're properly aligned. */
+static struct kmem_cache *hp_handler_slab;
+
+/* this is the frame data */
+static void *__frame_ptr;
+static u32 *frame_ptr;
+static dma_addr_t frame_dma;
+
+/* the main function waits on this */
+static DECLARE_WAIT_QUEUE_HEAD(queue);
+
+#define HP_PER_CPU     2
+#define HP_LOOPS       8
+/* 80 bytes, like a small ethernet frame, and bleeds into a second cacheline */
+#define HP_NUM_WORDS   80
+/* First word of the LFSR-based frame data */
+#define HP_FIRST_WORD  0xabbaf00d
+
+static inline u32 do_lfsr(u32 prev)
+{
+       return (prev >> 1) ^ (-(prev & 1u) & 0xd0000001u);
+}
+
+static int allocate_frame_data(void)
+{
+       u32 lfsr = HP_FIRST_WORD;
+       int loop;
+       struct platform_device *pdev = platform_device_alloc("foobar", -1);
+
+       if (!pdev) {
+               pr_crit("platform_device_alloc() failed");
+               return -EIO;
+       }
+       if (platform_device_add(pdev)) {
+               pr_crit("platform_device_add() failed");
+               return -EIO;
+       }
+       __frame_ptr = kmalloc(4 * HP_NUM_WORDS, GFP_KERNEL);
+       if (!__frame_ptr)
+               return -ENOMEM;
+
+       frame_ptr = PTR_ALIGN(__frame_ptr, 64);
+       for (loop = 0; loop < HP_NUM_WORDS; loop++) {
+               frame_ptr[loop] = lfsr;
+               lfsr = do_lfsr(lfsr);
+       }
+       frame_dma = dma_map_single(&pdev->dev, frame_ptr, 4 * HP_NUM_WORDS,
+                                  DMA_BIDIRECTIONAL);
+       platform_device_del(pdev);
+       platform_device_put(pdev);
+       return 0;
+}
+
+static void deallocate_frame_data(void)
+{
+       kfree(__frame_ptr);
+}
+
+static inline int process_frame_data(struct hp_handler *handler,
+                                    const struct qm_fd *fd)
+{
+       u32 *p = handler->frame_ptr;
+       u32 lfsr = HP_FIRST_WORD;
+       int loop;
+
+       if (qm_fd_addr_get64(fd) != handler->addr) {
+               pr_crit("bad frame address");
+               return -EIO;
+       }
+       for (loop = 0; loop < HP_NUM_WORDS; loop++, p++) {
+               *p ^= handler->rx_mixer;
+               if (*p != lfsr) {
+                       pr_crit("corrupt frame data");
+                       return -EIO;
+               }
+               *p ^= handler->tx_mixer;
+               lfsr = do_lfsr(lfsr);
+       }
+       return 0;
+}
+
+static enum qman_cb_dqrr_result normal_dqrr(struct qman_portal *portal,
+                                           struct qman_fq *fq,
+                                           const struct qm_dqrr_entry *dqrr)
+{
+       struct hp_handler *handler = (struct hp_handler *)fq;
+
+       if (process_frame_data(handler, &dqrr->fd)) {
+               WARN_ON(1);
+               goto skip;
+       }
+       if (qman_enqueue(&handler->tx, &dqrr->fd)) {
+               pr_crit("qman_enqueue() failed");
+               WARN_ON(1);
+       }
+skip:
+       return qman_cb_dqrr_consume;
+}
+
+static enum qman_cb_dqrr_result special_dqrr(struct qman_portal *portal,
+                                            struct qman_fq *fq,
+                                            const struct qm_dqrr_entry *dqrr)
+{
+       struct hp_handler *handler = (struct hp_handler *)fq;
+
+       process_frame_data(handler, &dqrr->fd);
+       if (++loop_counter < HP_LOOPS) {
+               if (qman_enqueue(&handler->tx, &dqrr->fd)) {
+                       pr_crit("qman_enqueue() failed");
+                       WARN_ON(1);
+                       goto skip;
+               }
+       } else {
+               pr_info("Received final (%dth) frame\n", loop_counter);
+               wake_up(&queue);
+       }
+skip:
+       return qman_cb_dqrr_consume;
+}
+
+static int create_per_cpu_handlers(void)
+{
+       struct hp_handler *handler;
+       int loop;
+       struct hp_cpu *hp_cpu = this_cpu_ptr(&hp_cpus);
+
+       hp_cpu->processor_id = smp_processor_id();
+       spin_lock(&hp_lock);
+       list_add_tail(&hp_cpu->node, &hp_cpu_list);
+       hp_cpu_list_length++;
+       spin_unlock(&hp_lock);
+       INIT_LIST_HEAD(&hp_cpu->handlers);
+       for (loop = 0; loop < HP_PER_CPU; loop++) {
+               handler = kmem_cache_alloc(hp_handler_slab, GFP_KERNEL);
+               if (!handler) {
+                       pr_crit("kmem_cache_alloc() failed");
+                       WARN_ON(1);
+                       return -EIO;
+               }
+               handler->processor_id = hp_cpu->processor_id;
+               handler->addr = frame_dma;
+               handler->frame_ptr = frame_ptr;
+               list_add_tail(&handler->node, &hp_cpu->handlers);
+       }
+       return 0;
+}
+
+static int destroy_per_cpu_handlers(void)
+{
+       struct list_head *loop, *tmp;
+       struct hp_cpu *hp_cpu = this_cpu_ptr(&hp_cpus);
+
+       spin_lock(&hp_lock);
+       list_del(&hp_cpu->node);
+       spin_unlock(&hp_lock);
+       list_for_each_safe(loop, tmp, &hp_cpu->handlers) {
+               u32 flags = 0;
+               struct hp_handler *handler = list_entry(loop, struct hp_handler,
+                                                       node);
+               if (qman_retire_fq(&handler->rx, &flags) ||
+                   (flags & QMAN_FQ_STATE_BLOCKOOS)) {
+                       pr_crit("qman_retire_fq(rx) failed, flags: %x", flags);
+                       WARN_ON(1);
+                       return -EIO;
+               }
+               if (qman_oos_fq(&handler->rx)) {
+                       pr_crit("qman_oos_fq(rx) failed");
+                       WARN_ON(1);
+                       return -EIO;
+               }
+               qman_destroy_fq(&handler->rx);
+               qman_destroy_fq(&handler->tx);
+               qman_release_fqid(handler->fqid_rx);
+               list_del(&handler->node);
+               kmem_cache_free(hp_handler_slab, handler);
+       }
+       return 0;
+}
+
+static inline u8 num_cachelines(u32 offset)
+{
+       u8 res = (offset + (L1_CACHE_BYTES - 1))
+                        / (L1_CACHE_BYTES);
+       if (res > 3)
+               return 3;
+       return res;
+}
+#define STASH_DATA_CL \
+       num_cachelines(HP_NUM_WORDS * 4)
+#define STASH_CTX_CL \
+       num_cachelines(offsetof(struct hp_handler, fqid_rx))
+
+static int init_handler(void *h)
+{
+       struct qm_mcc_initfq opts;
+       struct hp_handler *handler = h;
+       int err;
+
+       if (handler->processor_id != smp_processor_id()) {
+               err = -EIO;
+               goto failed;
+       }
+       /* Set up rx */
+       memset(&handler->rx, 0, sizeof(handler->rx));
+       if (handler == special_handler)
+               handler->rx.cb.dqrr = special_dqrr;
+       else
+               handler->rx.cb.dqrr = normal_dqrr;
+       err = qman_create_fq(handler->fqid_rx, 0, &handler->rx);
+       if (err) {
+               pr_crit("qman_create_fq(rx) failed");
+               goto failed;
+       }
+       memset(&opts, 0, sizeof(opts));
+       opts.we_mask = QM_INITFQ_WE_FQCTRL | QM_INITFQ_WE_CONTEXTA;
+       opts.fqd.fq_ctrl = QM_FQCTRL_CTXASTASHING;
+       qm_fqd_set_stashing(&opts.fqd, 0, STASH_DATA_CL, STASH_CTX_CL);
+       err = qman_init_fq(&handler->rx, QMAN_INITFQ_FLAG_SCHED |
+                          QMAN_INITFQ_FLAG_LOCAL, &opts);
+       if (err) {
+               pr_crit("qman_init_fq(rx) failed");
+               goto failed;
+       }
+       /* Set up tx */
+       memset(&handler->tx, 0, sizeof(handler->tx));
+       err = qman_create_fq(handler->fqid_tx, QMAN_FQ_FLAG_NO_MODIFY,
+                            &handler->tx);
+       if (err) {
+               pr_crit("qman_create_fq(tx) failed");
+               goto failed;
+       }
+
+       return 0;
+failed:
+       return err;
+}
+
+static void init_handler_cb(void *h)
+{
+       if (init_handler(h))
+               WARN_ON(1);
+}
+
+static int init_phase2(void)
+{
+       int loop;
+       u32 fqid = 0;
+       u32 lfsr = 0xdeadbeef;
+       struct hp_cpu *hp_cpu;
+       struct hp_handler *handler;
+
+       for (loop = 0; loop < HP_PER_CPU; loop++) {
+               list_for_each_entry(hp_cpu, &hp_cpu_list, node) {
+                       int err;
+
+                       if (!loop)
+                               hp_cpu->iterator = list_first_entry(
+                                               &hp_cpu->handlers,
+                                               struct hp_handler, node);
+                       else
+                               hp_cpu->iterator = list_entry(
+                                               hp_cpu->iterator->node.next,
+                                               struct hp_handler, node);
+                       /* Rx FQID is the previous handler's Tx FQID */
+                       hp_cpu->iterator->fqid_rx = fqid;
+                       /* Allocate new FQID for Tx */
+                       err = qman_alloc_fqid(&fqid);
+                       if (err) {
+                               pr_crit("qman_alloc_fqid() failed");
+                               return err;
+                       }
+                       hp_cpu->iterator->fqid_tx = fqid;
+                       /* Rx mixer is the previous handler's Tx mixer */
+                       hp_cpu->iterator->rx_mixer = lfsr;
+                       /* Get new mixer for Tx */
+                       lfsr = do_lfsr(lfsr);
+                       hp_cpu->iterator->tx_mixer = lfsr;
+               }
+       }
+       /* Fix up the first handler (fqid_rx==0, rx_mixer=0xdeadbeef) */
+       hp_cpu = list_first_entry(&hp_cpu_list, struct hp_cpu, node);
+       handler = list_first_entry(&hp_cpu->handlers, struct hp_handler, node);
+       if (handler->fqid_rx != 0 || handler->rx_mixer != 0xdeadbeef)
+               return 1;
+       handler->fqid_rx = fqid;
+       handler->rx_mixer = lfsr;
+       /* and tag it as our "special" handler */
+       special_handler = handler;
+       return 0;
+}
+
+static int init_phase3(void)
+{
+       int loop, err;
+       struct hp_cpu *hp_cpu;
+
+       for (loop = 0; loop < HP_PER_CPU; loop++) {
+               list_for_each_entry(hp_cpu, &hp_cpu_list, node) {
+                       if (!loop)
+                               hp_cpu->iterator = list_first_entry(
+                                               &hp_cpu->handlers,
+                                               struct hp_handler, node);
+                       else
+                               hp_cpu->iterator = list_entry(
+                                               hp_cpu->iterator->node.next,
+                                               struct hp_handler, node);
+                       preempt_disable();
+                       if (hp_cpu->processor_id == smp_processor_id()) {
+                               err = init_handler(hp_cpu->iterator);
+                               if (err)
+                                       return err;
+                       } else {
+                               smp_call_function_single(hp_cpu->processor_id,
+                                       init_handler_cb, hp_cpu->iterator, 1);
+                       }
+                       preempt_enable();
+               }
+       }
+       return 0;
+}
+
+static int send_first_frame(void *ignore)
+{
+       u32 *p = special_handler->frame_ptr;
+       u32 lfsr = HP_FIRST_WORD;
+       int loop, err;
+       struct qm_fd fd;
+
+       if (special_handler->processor_id != smp_processor_id()) {
+               err = -EIO;
+               goto failed;
+       }
+       memset(&fd, 0, sizeof(fd));
+       qm_fd_addr_set64(&fd, special_handler->addr);
+       qm_fd_set_contig_big(&fd, HP_NUM_WORDS * 4);
+       for (loop = 0; loop < HP_NUM_WORDS; loop++, p++) {
+               if (*p != lfsr) {
+                       err = -EIO;
+                       pr_crit("corrupt frame data");
+                       goto failed;
+               }
+               *p ^= special_handler->tx_mixer;
+               lfsr = do_lfsr(lfsr);
+       }
+       pr_info("Sending first frame\n");
+       err = qman_enqueue(&special_handler->tx, &fd);
+       if (err) {
+               pr_crit("qman_enqueue() failed");
+               goto failed;
+       }
+
+       return 0;
+failed:
+       return err;
+}
+
+static void send_first_frame_cb(void *ignore)
+{
+       if (send_first_frame(NULL))
+               WARN_ON(1);
+}
+
+int qman_test_stash(void)
+{
+       int err;
+
+       if (cpumask_weight(cpu_online_mask) < 2) {
+               pr_info("%s(): skip - only 1 CPU\n", __func__);
+               return 0;
+       }
+
+       pr_info("%s(): Starting\n", __func__);
+
+       hp_cpu_list_length = 0;
+       loop_counter = 0;
+       hp_handler_slab = kmem_cache_create("hp_handler_slab",
+                       sizeof(struct hp_handler), L1_CACHE_BYTES,
+                       SLAB_HWCACHE_ALIGN, NULL);
+       if (!hp_handler_slab) {
+               err = -EIO;
+               pr_crit("kmem_cache_create() failed");
+               goto failed;
+       }
+
+       err = allocate_frame_data();
+       if (err)
+               goto failed;
+
+       /* Init phase 1 */
+       pr_info("Creating %d handlers per cpu...\n", HP_PER_CPU);
+       if (on_all_cpus(create_per_cpu_handlers)) {
+               err = -EIO;
+               pr_crit("on_each_cpu() failed");
+               goto failed;
+       }
+       pr_info("Number of cpus: %d, total of %d handlers\n",
+               hp_cpu_list_length, hp_cpu_list_length * HP_PER_CPU);
+
+       err = init_phase2();
+       if (err)
+               goto failed;
+
+       err = init_phase3();
+       if (err)
+               goto failed;
+
+       preempt_disable();
+       if (special_handler->processor_id == smp_processor_id()) {
+               err = send_first_frame(NULL);
+               if (err)
+                       goto failed;
+       } else {
+               smp_call_function_single(special_handler->processor_id,
+                                        send_first_frame_cb, NULL, 1);
+       }
+       preempt_enable();
+
+       wait_event(queue, loop_counter == HP_LOOPS);
+       deallocate_frame_data();
+       if (on_all_cpus(destroy_per_cpu_handlers)) {
+               err = -EIO;
+               pr_crit("on_each_cpu() failed");
+               goto failed;
+       }
+       kmem_cache_destroy(hp_handler_slab);
+       pr_info("%s(): Finished\n", __func__);
+
+       return 0;
+failed:
+       WARN_ON(1);
+       return err;
+}
index 333eb2215a5795c65d505b4ae81a3f09b511a3c2..0aaf429f31d571f569fe0916b52d10cf3dadfd49 100644 (file)
@@ -41,7 +41,8 @@ struct qe_gpio_chip {
 
 static void qe_gpio_save_regs(struct of_mm_gpio_chip *mm_gc)
 {
-       struct qe_gpio_chip *qe_gc = gpiochip_get_data(&mm_gc->gc);
+       struct qe_gpio_chip *qe_gc =
+               container_of(mm_gc, struct qe_gpio_chip, mm_gc);
        struct qe_pio_regs __iomem *regs = mm_gc->regs;
 
        qe_gc->cpdata = in_be32(&regs->cpdata);
index 7026507e6f1d5e4e53245a43c7c5839d5a4f9171..2707a827261b10378ef45460082463cf80890dd1 100644 (file)
@@ -69,8 +69,8 @@ static phys_addr_t qebase = -1;
 phys_addr_t get_qe_base(void)
 {
        struct device_node *qe;
-       int size;
-       const u32 *prop;
+       int ret;
+       struct resource res;
 
        if (qebase != -1)
                return qebase;
@@ -82,9 +82,9 @@ phys_addr_t get_qe_base(void)
                        return qebase;
        }
 
-       prop = of_get_property(qe, "reg", &size);
-       if (prop && size >= sizeof(*prop))
-               qebase = of_translate_address(qe, prop);
+       ret = of_address_to_resource(qe, 0, &res);
+       if (!ret)
+               qebase = res.start;
        of_node_put(qe);
 
        return qebase;
index 41eff805a9041c879627b96395a584e545960b5f..104e68d9b84f281c96e77b3861e5f11f1348cc49 100644 (file)
@@ -70,6 +70,11 @@ int cpm_muram_init(void)
        }
 
        muram_pool = gen_pool_create(0, -1);
+       if (!muram_pool) {
+               pr_err("Cannot allocate memory pool for CPM/QE muram");
+               ret = -ENOMEM;
+               goto out_muram;
+       }
        muram_pbase = of_translate_address(np, zero);
        if (muram_pbase == (phys_addr_t)OF_BAD_ADDR) {
                pr_err("Cannot translate zero through CPM muram node");
@@ -116,6 +121,9 @@ static unsigned long cpm_muram_alloc_common(unsigned long size,
        struct muram_block *entry;
        unsigned long start;
 
+       if (!muram_pool && cpm_muram_init())
+               goto out2;
+
        start = gen_pool_alloc_algo(muram_pool, size, algo, data);
        if (!start)
                goto out2;
index 5e48b147017866dca9b98a1d2703d374a0d9c60b..a1048b44e6b93309499ddaac4bdb77d8b747c773 100644 (file)
@@ -99,7 +99,7 @@ int ucc_of_parse_tdm(struct device_node *np, struct ucc_tdm *utdm,
        utdm->tdm_port = val;
        ut_info->uf_info.tdm_num = utdm->tdm_port;
 
-       if (of_get_property(np, "fsl,tdm-internal-loopback", NULL))
+       if (of_property_read_bool(np, "fsl,tdm-internal-loopback"))
                utdm->tdm_mode = TDM_INTERNAL_LOOPBACK;
        else
                utdm->tdm_mode = TDM_NORMAL;
@@ -167,7 +167,7 @@ int ucc_of_parse_tdm(struct device_node *np, struct ucc_tdm *utdm,
        }
 
        if (siram_init_flag == 0) {
-               memset_io(utdm->siram, 0,  res->end - res->start + 1);
+               memset_io(utdm->siram, 0,  resource_size(res));
                siram_init_flag = 1;
        }
 
index 35c0dd94566814c484758bae43d354bf4e282e13..a67b0ff6a362380e4f62382c12c557f98a127947 100644 (file)
@@ -70,6 +70,7 @@
 #define SPI_SR                 0x2c
 #define SPI_SR_EOQF            0x10000000
 #define SPI_SR_TCFQF           0x80000000
+#define SPI_SR_CLEAR           0xdaad0000
 
 #define SPI_RSER               0x30
 #define SPI_RSER_EOQFE         0x10000000
@@ -646,6 +647,11 @@ static const struct regmap_config dspi_regmap_config = {
        .max_register = 0x88,
 };
 
+static void dspi_init(struct fsl_dspi *dspi)
+{
+       regmap_write(dspi->regmap, SPI_SR, SPI_SR_CLEAR);
+}
+
 static int dspi_probe(struct platform_device *pdev)
 {
        struct device_node *np = pdev->dev.of_node;
@@ -709,6 +715,7 @@ static int dspi_probe(struct platform_device *pdev)
                return PTR_ERR(dspi->regmap);
        }
 
+       dspi_init(dspi);
        dspi->irq = platform_get_irq(pdev, 0);
        if (dspi->irq < 0) {
                dev_err(&pdev->dev, "can't get platform irq\n");
index 7451585a080e5ccc13e2fc59d532c70e786a4ce0..2c175b9495f7ee102a0d70a96f42fa730c019247 100644 (file)
@@ -458,7 +458,7 @@ static void fsl_espi_cpu_irq(struct mpc8xxx_spi *mspi, u32 events)
 
                mspi->len -= rx_nr_bytes;
 
-               if (mspi->rx)
+               if (rx_nr_bytes && mspi->rx)
                        mspi->get_rx(rx_data, mspi);
        }
 
index 5787b723b593f79bb5e55f3b68abcb2f19d4b5cb..838783c3fed0ae81626099b8e295dc12963a1360 100644 (file)
@@ -1618,9 +1618,11 @@ static void of_register_spi_devices(struct spi_master *master)
                if (of_node_test_and_set_flag(nc, OF_POPULATED))
                        continue;
                spi = of_register_spi_device(master, nc);
-               if (IS_ERR(spi))
+               if (IS_ERR(spi)) {
                        dev_warn(&master->dev, "Failed to create SPI device for %s\n",
                                nc->full_name);
+                       of_node_clear_flag(nc, OF_POPULATED);
+               }
        }
 }
 #else
@@ -3131,6 +3133,7 @@ static int of_spi_notify(struct notifier_block *nb, unsigned long action,
                if (IS_ERR(spi)) {
                        pr_err("%s: failed to create for '%s'\n",
                                        __func__, rd->dn->full_name);
+                       of_node_clear_flag(rd->dn, OF_POPULATED);
                        return notifier_from_errno(PTR_ERR(spi));
                }
                break;
index 396ded52ab70242b8b3c069c52cc7fed2c44624d..209a8f7ef02bd4b04cabefad50d17351e5e6990c 100644 (file)
@@ -1187,8 +1187,10 @@ int ion_query_heaps(struct ion_client *client, struct ion_heap_query *query)
                hdata.type = heap->type;
                hdata.heap_id = heap->id;
 
-               ret = copy_to_user(&buffer[cnt],
-                                  &hdata, sizeof(hdata));
+               if (copy_to_user(&buffer[cnt], &hdata, sizeof(hdata))) {
+                       ret = -EFAULT;
+                       goto out;
+               }
 
                cnt++;
                if (cnt >= max_cnt)
index 15bac92b7f042023dbe327de670d8cd011a7f323..46b2bb99bfd6b2081a0775ce9ad87393bfcd5b2e 100644 (file)
@@ -107,7 +107,7 @@ struct ion_platform_data *ion_parse_dt(struct platform_device *pdev,
 
                heap_pdev = of_platform_device_create(node, heaps[i].name,
                                                      &pdev->dev);
-               if (!pdev)
+               if (!heap_pdev)
                        return ERR_PTR(-ENOMEM);
                heap_pdev->dev.platform_data = &heaps[i];
 
index 7043eb0543f6b15c98161e341b354a05e7748e02..5ab49a798164bfb8c400609ed4f36fa2c0126cc6 100644 (file)
@@ -207,7 +207,8 @@ static int ni_tio_clock_period_ps(const struct ni_gpct *counter,
                 * clock period is specified by user with prescaling
                 * already taken into account.
                 */
-               return counter->clock_period_ps;
+               *period_ps = counter->clock_period_ps;
+               return 0;
        }
 
        switch (generic_clock_source & NI_GPCT_PRESCALE_MODE_CLOCK_SRC_MASK) {
index e36ee984485bfcf3aea56f777e9f7ec4c671ed02..d33d6fe078ad730ae3cdb4c7d9291c23d3711397 100644 (file)
@@ -128,6 +128,7 @@ int arche_platform_change_state(enum arche_platform_state state,
        pdev = of_find_device_by_node(np);
        if (!pdev) {
                pr_err("arche-platform device not found\n");
+               of_node_put(np);
                return -ENODEV;
        }
 
@@ -185,6 +186,7 @@ int arche_platform_change_state(enum arche_platform_state state,
 exit:
        spin_unlock_irqrestore(&arche_pdata->wake_lock, flags);
        mutex_unlock(&arche_pdata->platform_state_mutex);
+       put_device(&pdev->dev);
        of_node_put(np);
        return ret;
 }
index 071bb1cfd3ae1d38c510c19adbf7cdd3d9daa9a7..baab460eeaa3828f7a3f84490f4dda566b647c44 100644 (file)
@@ -1548,7 +1548,8 @@ static int ap_probe(struct usb_interface *interface,
        INIT_LIST_HEAD(&es2->arpcs);
        spin_lock_init(&es2->arpc_lock);
 
-       if (es2_arpc_in_enable(es2))
+       retval = es2_arpc_in_enable(es2);
+       if (retval)
                goto error;
 
        retval = gb_hd_add(hd);
index 5e06e4229e4239e3e307ce2a0faac266ddd1c7e5..250caa00de5e9f4665978ef0105a8b42c2e49420 100644 (file)
@@ -702,15 +702,13 @@ static int gb_gpio_probe(struct gbphy_device *gbphy_dev,
        ret = gb_gpio_irqchip_add(gpio, irqc, 0,
                                   handle_level_irq, IRQ_TYPE_NONE);
        if (ret) {
-               dev_err(&connection->bundle->dev,
-                       "failed to add irq chip: %d\n", ret);
+               dev_err(&gbphy_dev->dev, "failed to add irq chip: %d\n", ret);
                goto exit_line_free;
        }
 
        ret = gpiochip_add(gpio);
        if (ret) {
-               dev_err(&connection->bundle->dev,
-                       "failed to add gpio chip: %d\n", ret);
+               dev_err(&gbphy_dev->dev, "failed to add gpio chip: %d\n", ret);
                goto exit_gpio_irqchip_remove;
        }
 
index 69f67ddbd4a364d00fe31932a15c773599eb8851..660b4674a76f584aa8a426abffc595d05051659c 100644 (file)
@@ -127,7 +127,7 @@ struct gb_module *gb_module_create(struct gb_host_device *hd, u8 module_id,
        return module;
 
 err_put_interfaces:
-       for (--i; i > 0; --i)
+       for (--i; i >= 0; --i)
                gb_interface_put(module->interfaces[i]);
 
        put_device(&module->dev);
index 5ee7954bd9f91af58091454a478b64e1a0dd7741..2633d2bfb1b4f86bfd34e0da52f1e647b105e301 100644 (file)
@@ -888,7 +888,7 @@ static int gb_uart_probe(struct gbphy_device *gbphy_dev,
        minor = alloc_minor(gb_tty);
        if (minor < 0) {
                if (minor == -ENOSPC) {
-                       dev_err(&connection->bundle->dev,
+                       dev_err(&gbphy_dev->dev,
                                "no more free minor numbers\n");
                        retval = -ENODEV;
                } else {
index d626125d7af942511853404649318da45548a662..564b36d4f6486c7c0dad7a18cca4b0bf0155c4a9 100644 (file)
@@ -468,6 +468,8 @@ static inline int __sca3000_get_base_freq(struct sca3000_state *st,
        case SCA3000_MEAS_MODE_OP_2:
                *base_freq = info->option_mode_2_freq;
                break;
+       default:
+               ret = -EINVAL;
        }
 error_ret:
        return ret;
index 5eecf1cb1028892796872ad592daf3a5e4d89e3c..3892a747041082518f3c1cbec66755b180124e24 100644 (file)
@@ -655,6 +655,7 @@ static void ad5933_work(struct work_struct *work)
        __be16 buf[2];
        int val[2];
        unsigned char status;
+       int ret;
 
        mutex_lock(&indio_dev->mlock);
        if (st->state == AD5933_CTRL_INIT_START_FREQ) {
@@ -662,19 +663,22 @@ static void ad5933_work(struct work_struct *work)
                ad5933_cmd(st, AD5933_CTRL_START_SWEEP);
                st->state = AD5933_CTRL_START_SWEEP;
                schedule_delayed_work(&st->work, st->poll_time_jiffies);
-               mutex_unlock(&indio_dev->mlock);
-               return;
+               goto out;
        }
 
-       ad5933_i2c_read(st->client, AD5933_REG_STATUS, 1, &status);
+       ret = ad5933_i2c_read(st->client, AD5933_REG_STATUS, 1, &status);
+       if (ret)
+               goto out;
 
        if (status & AD5933_STAT_DATA_VALID) {
                int scan_count = bitmap_weight(indio_dev->active_scan_mask,
                                               indio_dev->masklength);
-               ad5933_i2c_read(st->client,
+               ret = ad5933_i2c_read(st->client,
                                test_bit(1, indio_dev->active_scan_mask) ?
                                AD5933_REG_REAL_DATA : AD5933_REG_IMAG_DATA,
                                scan_count * 2, (u8 *)buf);
+               if (ret)
+                       goto out;
 
                if (scan_count == 2) {
                        val[0] = be16_to_cpu(buf[0]);
@@ -686,8 +690,7 @@ static void ad5933_work(struct work_struct *work)
        } else {
                /* no data available - try again later */
                schedule_delayed_work(&st->work, st->poll_time_jiffies);
-               mutex_unlock(&indio_dev->mlock);
-               return;
+               goto out;
        }
 
        if (status & AD5933_STAT_SWEEP_DONE) {
@@ -700,7 +703,7 @@ static void ad5933_work(struct work_struct *work)
                ad5933_cmd(st, AD5933_CTRL_INC_FREQ);
                schedule_delayed_work(&st->work, st->poll_time_jiffies);
        }
-
+out:
        mutex_unlock(&indio_dev->mlock);
 }
 
index 6eae605959055ec75def8fde36793f2678758606..23fda9d98bffd571fd0b7d3de323b6b457237e54 100644 (file)
@@ -871,12 +871,10 @@ static ssize_t xattr_cache_store(struct kobject *kobj,
 }
 LUSTRE_RW_ATTR(xattr_cache);
 
-static ssize_t unstable_stats_show(struct kobject *kobj,
-                                  struct attribute *attr,
-                                  char *buf)
+static int ll_unstable_stats_seq_show(struct seq_file *m, void *v)
 {
-       struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
-                                             ll_kobj);
+       struct super_block     *sb    = m->private;
+       struct ll_sb_info      *sbi   = ll_s2sbi(sb);
        struct cl_client_cache *cache = sbi->ll_cache;
        long pages;
        int mb;
@@ -884,19 +882,21 @@ static ssize_t unstable_stats_show(struct kobject *kobj,
        pages = atomic_long_read(&cache->ccc_unstable_nr);
        mb = (pages * PAGE_SIZE) >> 20;
 
-       return sprintf(buf, "unstable_check:     %8d\n"
-                           "unstable_pages: %12ld\n"
-                           "unstable_mb:        %8d\n",
-                           cache->ccc_unstable_check, pages, mb);
+       seq_printf(m,
+                  "unstable_check:     %8d\n"
+                  "unstable_pages: %12ld\n"
+                  "unstable_mb:        %8d\n",
+                  cache->ccc_unstable_check, pages, mb);
+
+       return 0;
 }
 
-static ssize_t unstable_stats_store(struct kobject *kobj,
-                                   struct attribute *attr,
-                                   const char *buffer,
-                                   size_t count)
+static ssize_t ll_unstable_stats_seq_write(struct file *file,
+                                          const char __user *buffer,
+                                          size_t count, loff_t *off)
 {
-       struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
-                                             ll_kobj);
+       struct super_block *sb = ((struct seq_file *)file->private_data)->private;
+       struct ll_sb_info *sbi = ll_s2sbi(sb);
        char kernbuf[128];
        int val, rc;
 
@@ -922,7 +922,7 @@ static ssize_t unstable_stats_store(struct kobject *kobj,
 
        return count;
 }
-LUSTRE_RW_ATTR(unstable_stats);
+LPROC_SEQ_FOPS(ll_unstable_stats);
 
 static ssize_t root_squash_show(struct kobject *kobj, struct attribute *attr,
                                char *buf)
@@ -995,6 +995,7 @@ static struct lprocfs_vars lprocfs_llite_obd_vars[] = {
        /* { "filegroups",   lprocfs_rd_filegroups,  0, 0 }, */
        { "max_cached_mb",    &ll_max_cached_mb_fops, NULL },
        { "statahead_stats",  &ll_statahead_stats_fops, NULL, 0 },
+       { "unstable_stats",   &ll_unstable_stats_fops, NULL },
        { "sbi_flags",        &ll_sbi_flags_fops, NULL, 0 },
        { .name =               "nosquash_nids",
          .fops =               &ll_nosquash_nids_fops          },
@@ -1026,7 +1027,6 @@ static struct attribute *llite_attrs[] = {
        &lustre_attr_max_easize.attr,
        &lustre_attr_default_easize.attr,
        &lustre_attr_xattr_cache.attr,
-       &lustre_attr_unstable_stats.attr,
        &lustre_attr_root_squash.attr,
        NULL,
 };
index ea15cc6380970e159addcf91ce18c431b1cb1ef0..4d9bd02ede4700c8a89460ed68b0ce66898d3c26 100644 (file)
@@ -482,6 +482,8 @@ static int bcm2048_set_rds_no_lock(struct bcm2048_device *bdev, u8 rds_on)
                                           flags);
                memset(&bdev->rds_info, 0, sizeof(bdev->rds_info));
        }
+       if (err)
+               return err;
 
        return bcm2048_send_command(bdev, BCM2048_I2C_FM_RDS_SYSTEM,
                                    bdev->cache_fm_rds_system);
index a324322ee0ad1ebec13e280368538056a691fb18..499952c8ef3915ba54afe52347bc220cbad1387c 100644 (file)
@@ -106,13 +106,12 @@ static int nvec_mouse_probe(struct platform_device *pdev)
 {
        struct nvec_chip *nvec = dev_get_drvdata(pdev->dev.parent);
        struct serio *ser_dev;
-       char mouse_reset[] = { NVEC_PS2, SEND_COMMAND, PSMOUSE_RST, 3 };
 
-       ser_dev = devm_kzalloc(&pdev->dev, sizeof(struct serio), GFP_KERNEL);
+       ser_dev = kzalloc(sizeof(struct serio), GFP_KERNEL);
        if (!ser_dev)
                return -ENOMEM;
 
-       ser_dev->id.type = SERIO_PS_PSTHRU;
+       ser_dev->id.type = SERIO_8042;
        ser_dev->write = ps2_sendcommand;
        ser_dev->start = ps2_startstreaming;
        ser_dev->stop = ps2_stopstreaming;
@@ -127,9 +126,6 @@ static int nvec_mouse_probe(struct platform_device *pdev)
 
        serio_register_port(ser_dev);
 
-       /* mouse reset */
-       nvec_write_async(nvec, mouse_reset, sizeof(mouse_reset));
-
        return 0;
 }
 
index 955247979aaa4d9a72575d9ed3c9f1c13995cb5a..4ed6d8d7712ae1077395f6921430d9319e3a9df2 100644 (file)
 
 #define PANEL_PLANE_TL                                0x08001C
 #define PANEL_PLANE_TL_TOP_SHIFT                      16
-#define PANEL_PLANE_TL_TOP_MASK                       (0xeff << 16)
-#define PANEL_PLANE_TL_LEFT_MASK                      0xeff
+#define PANEL_PLANE_TL_TOP_MASK                       (0x7ff << 16)
+#define PANEL_PLANE_TL_LEFT_MASK                      0x7ff
 
 #define PANEL_PLANE_BR                                0x080020
 #define PANEL_PLANE_BR_BOTTOM_SHIFT                   16
-#define PANEL_PLANE_BR_BOTTOM_MASK                    (0xeff << 16)
-#define PANEL_PLANE_BR_RIGHT_MASK                     0xeff
+#define PANEL_PLANE_BR_BOTTOM_MASK                    (0x7ff << 16)
+#define PANEL_PLANE_BR_RIGHT_MASK                     0x7ff
 
 #define PANEL_HORIZONTAL_TOTAL                        0x080024
 #define PANEL_HORIZONTAL_TOTAL_TOTAL_SHIFT            16
index c29040fdf9a7757b7c0d428616e244a6e4d5f697..1091b9f1dd070e3d27c269402b43b0a09d96bcdc 100644 (file)
@@ -423,8 +423,7 @@ create_pagelist(char __user *buf, size_t count, unsigned short type,
                actual_pages = get_user_pages(task, task->mm,
                                          (unsigned long)buf & ~(PAGE_SIZE - 1),
                                          num_pages,
-                                         (type == PAGELIST_READ) /*Write */ ,
-                                         0 /*Force */ ,
+                                         (type == PAGELIST_READ) ? FOLL_WRITE : 0,
                                          pages,
                                          NULL /*vmas */);
                up_read(&task->mm->mmap_sem);
index e11c0e07471bc7ebba04e48d3efc746236ec2f4e..7b6cd4d80621e38ff6d47fcd87b45fbe9cd4259b 100644 (file)
@@ -1477,8 +1477,7 @@ dump_phys_mem(void *virt_addr, uint32_t num_bytes)
                current->mm,              /* mm */
                (unsigned long)virt_addr, /* start */
                num_pages,                /* len */
-               0,                        /* write */
-               0,                        /* force */
+               0,                        /* gup_flags */
                pages,                    /* pages (array of page pointers) */
                NULL);                    /* vmas */
        up_read(&current->mm->mmap_sem);
index 78f5613e9467cf5579a57f6d13137af9f5e1d5b9..6ab7443eabdefa2bdf7c4a4653433e10250b9bf2 100644 (file)
@@ -3388,7 +3388,6 @@ int wilc_init(struct net_device *dev, struct host_if_drv **hif_drv_handler)
 
        clients_count++;
 
-       destroy_workqueue(hif_workqueue);
 _fail_:
        return result;
 }
index ad26b9372f1096a1940c405a3f019066b35f8e33..96eedfc49c9428938dfdd9fe483d37a0735b7a14 100644 (file)
@@ -653,6 +653,7 @@ static struct iscsit_transport cxgbit_transport = {
 static struct cxgb4_uld_info cxgbit_uld_info = {
        .name           = DRV_NAME,
        .nrxq           = MAX_ULD_QSETS,
+       .ntxq           = MAX_ULD_QSETS,
        .rxq_size       = 1024,
        .lro            = true,
        .add            = cxgbit_uld_add,
index 39b928c2849d71c47390ef55002dc767f7ae4e12..b7d747e92c7abf589e35154b25482a9dedb57118 100644 (file)
@@ -1804,6 +1804,10 @@ int iscsit_process_nop_out(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
         * Otherwise, initiator is not expecting a NOPIN is response.
         * Just ignore for now.
         */
+
+       if (cmd)
+               iscsit_free_cmd(cmd, false);
+
         return 0;
 }
 EXPORT_SYMBOL(iscsit_process_nop_out);
@@ -2982,7 +2986,7 @@ iscsit_build_nopin_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn,
 
        pr_debug("Built NOPIN %s Response ITT: 0x%08x, TTT: 0x%08x,"
                " StatSN: 0x%08x, Length %u\n", (nopout_response) ?
-               "Solicitied" : "Unsolicitied", cmd->init_task_tag,
+               "Solicited" : "Unsolicited", cmd->init_task_tag,
                cmd->targ_xfer_tag, cmd->stat_sn, cmd->buf_ptr_size);
 }
 EXPORT_SYMBOL(iscsit_build_nopin_rsp);
index adf419fa429189ceca94d04782221fc34de23b6c..15f79a2ca34ab6e17fd5fda6f68425b9af1809eb 100644 (file)
@@ -434,7 +434,7 @@ static int iscsi_login_zero_tsih_s2(
 
                /*
                 * Make MaxRecvDataSegmentLength PAGE_SIZE aligned for
-                * Immediate Data + Unsolicitied Data-OUT if necessary..
+                * Immediate Data + Unsolicited Data-OUT if necessary..
                 */
                param = iscsi_find_param_from_key("MaxRecvDataSegmentLength",
                                                  conn->param_list);
@@ -646,7 +646,7 @@ static void iscsi_post_login_start_timers(struct iscsi_conn *conn)
 {
        struct iscsi_session *sess = conn->sess;
        /*
-        * FIXME: Unsolicitied NopIN support for ISER
+        * FIXME: Unsolicited NopIN support for ISER
         */
        if (conn->conn_transport->transport_type == ISCSI_INFINIBAND)
                return;
index 6094a6beddde9fb5d045644b6b11b32e6bd149c1..7dfefd66df93874b1359824890b4b760275ff2c6 100644 (file)
@@ -754,15 +754,7 @@ EXPORT_SYMBOL(target_complete_cmd);
 
 void target_complete_cmd_with_length(struct se_cmd *cmd, u8 scsi_status, int length)
 {
-       if (scsi_status != SAM_STAT_GOOD) {
-               return;
-       }
-
-       /*
-        * Calculate new residual count based upon length of SCSI data
-        * transferred.
-        */
-       if (length < cmd->data_length) {
+       if (scsi_status == SAM_STAT_GOOD && length < cmd->data_length) {
                if (cmd->se_cmd_flags & SCF_UNDERFLOW_BIT) {
                        cmd->residual_count += cmd->data_length - length;
                } else {
@@ -771,12 +763,6 @@ void target_complete_cmd_with_length(struct se_cmd *cmd, u8 scsi_status, int len
                }
 
                cmd->data_length = length;
-       } else if (length > cmd->data_length) {
-               cmd->se_cmd_flags |= SCF_OVERFLOW_BIT;
-               cmd->residual_count = length - cmd->data_length;
-       } else {
-               cmd->se_cmd_flags &= ~(SCF_OVERFLOW_BIT | SCF_UNDERFLOW_BIT);
-               cmd->residual_count = 0;
        }
 
        target_complete_cmd(cmd, scsi_status);
@@ -1706,6 +1692,7 @@ void transport_generic_request_failure(struct se_cmd *cmd,
        case TCM_LOGICAL_BLOCK_GUARD_CHECK_FAILED:
        case TCM_LOGICAL_BLOCK_APP_TAG_CHECK_FAILED:
        case TCM_LOGICAL_BLOCK_REF_TAG_CHECK_FAILED:
+       case TCM_COPY_TARGET_DEVICE_NOT_REACHABLE:
                break;
        case TCM_OUT_OF_RESOURCES:
                sense_reason = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
@@ -2547,8 +2534,12 @@ int target_get_sess_cmd(struct se_cmd *se_cmd, bool ack_kref)
         * fabric acknowledgement that requires two target_put_sess_cmd()
         * invocations before se_cmd descriptor release.
         */
-       if (ack_kref)
-               kref_get(&se_cmd->cmd_kref);
+       if (ack_kref) {
+               if (!kref_get_unless_zero(&se_cmd->cmd_kref))
+                       return -EINVAL;
+
+               se_cmd->se_cmd_flags |= SCF_ACK_KREF;
+       }
 
        spin_lock_irqsave(&se_sess->sess_cmd_lock, flags);
        if (se_sess->sess_tearing_down) {
@@ -2627,7 +2618,7 @@ EXPORT_SYMBOL(target_put_sess_cmd);
  */
 void target_sess_cmd_list_set_waiting(struct se_session *se_sess)
 {
-       struct se_cmd *se_cmd;
+       struct se_cmd *se_cmd, *tmp_cmd;
        unsigned long flags;
        int rc;
 
@@ -2639,14 +2630,16 @@ void target_sess_cmd_list_set_waiting(struct se_session *se_sess)
        se_sess->sess_tearing_down = 1;
        list_splice_init(&se_sess->sess_cmd_list, &se_sess->sess_wait_list);
 
-       list_for_each_entry(se_cmd, &se_sess->sess_wait_list, se_cmd_list) {
+       list_for_each_entry_safe(se_cmd, tmp_cmd,
+                                &se_sess->sess_wait_list, se_cmd_list) {
                rc = kref_get_unless_zero(&se_cmd->cmd_kref);
                if (rc) {
                        se_cmd->cmd_wait_set = 1;
                        spin_lock(&se_cmd->t_state_lock);
                        se_cmd->transport_state |= CMD_T_FABRIC_STOP;
                        spin_unlock(&se_cmd->t_state_lock);
-               }
+               } else
+                       list_del_init(&se_cmd->se_cmd_list);
        }
 
        spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
@@ -2871,6 +2864,12 @@ static const struct sense_info sense_info_table[] = {
                .ascq = 0x03, /* LOGICAL BLOCK REFERENCE TAG CHECK FAILED */
                .add_sector_info = true,
        },
+       [TCM_COPY_TARGET_DEVICE_NOT_REACHABLE] = {
+               .key = COPY_ABORTED,
+               .asc = 0x0d,
+               .ascq = 0x02, /* COPY TARGET DEVICE NOT REACHABLE */
+
+       },
        [TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE] = {
                /*
                 * Returning ILLEGAL REQUEST would cause immediate IO errors on
index 62bf4fe5704a929aa01a5f970f26e7f1020f9442..2b3c8564ace8154548349c6a71872f0b1aceeadb 100644 (file)
@@ -96,7 +96,7 @@ struct tcmu_dev {
        size_t dev_size;
        u32 cmdr_size;
        u32 cmdr_last_cleaned;
-       /* Offset of data ring from start of mb */
+       /* Offset of data area from start of mb */
        /* Must add data_off and mb_addr to get the address */
        size_t data_off;
        size_t data_size;
@@ -147,8 +147,8 @@ static const struct genl_multicast_group tcmu_mcgrps[] = {
 };
 
 /* Our generic netlink family */
-static struct genl_family tcmu_genl_family = {
-       .id = GENL_ID_GENERATE,
+static struct genl_family tcmu_genl_family __ro_after_init = {
+       .module = THIS_MODULE,
        .hdrsize = 0,
        .name = "TCM-USER",
        .version = 1,
@@ -349,7 +349,7 @@ static inline size_t spc_bitmap_free(unsigned long *bitmap)
 
 /*
  * We can't queue a command until we have space available on the cmd ring *and*
- * space available on the data ring.
+ * space available on the data area.
  *
  * Called with ring lock held.
  */
@@ -389,7 +389,8 @@ static bool is_ring_space_avail(struct tcmu_dev *udev, size_t cmd_size, size_t d
        return true;
 }
 
-static int tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
+static sense_reason_t
+tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
 {
        struct tcmu_dev *udev = tcmu_cmd->tcmu_dev;
        struct se_cmd *se_cmd = tcmu_cmd->se_cmd;
@@ -405,7 +406,7 @@ static int tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
        DECLARE_BITMAP(old_bitmap, DATA_BLOCK_BITS);
 
        if (test_bit(TCMU_DEV_BIT_BROKEN, &udev->flags))
-               return -EINVAL;
+               return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
 
        /*
         * Must be a certain minimum size for response sense info, but
@@ -432,11 +433,14 @@ static int tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
                BUG_ON(!(se_cmd->t_bidi_data_sg && se_cmd->t_bidi_data_nents));
                data_length += se_cmd->t_bidi_data_sg->length;
        }
-       if ((command_size > (udev->cmdr_size / 2))
-           || data_length > udev->data_size)
-               pr_warn("TCMU: Request of size %zu/%zu may be too big for %u/%zu "
-                       "cmd/data ring buffers\n", command_size, data_length,
+       if ((command_size > (udev->cmdr_size / 2)) ||
+           data_length > udev->data_size) {
+               pr_warn("TCMU: Request of size %zu/%zu is too big for %u/%zu "
+                       "cmd ring/data area\n", command_size, data_length,
                        udev->cmdr_size, udev->data_size);
+               spin_unlock_irq(&udev->cmdr_lock);
+               return TCM_INVALID_CDB_FIELD;
+       }
 
        while (!is_ring_space_avail(udev, command_size, data_length)) {
                int ret;
@@ -450,7 +454,7 @@ static int tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
                finish_wait(&udev->wait_cmdr, &__wait);
                if (!ret) {
                        pr_warn("tcmu: command timed out\n");
-                       return -ETIMEDOUT;
+                       return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
                }
 
                spin_lock_irq(&udev->cmdr_lock);
@@ -487,9 +491,7 @@ static int tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
 
        bitmap_copy(old_bitmap, udev->data_bitmap, DATA_BLOCK_BITS);
 
-       /*
-        * Fix up iovecs, and handle if allocation in data ring wrapped.
-        */
+       /* Handle allocating space from the data area */
        iov = &entry->req.iov[0];
        iov_cnt = 0;
        copy_to_data_area = (se_cmd->data_direction == DMA_TO_DEVICE
@@ -526,10 +528,11 @@ static int tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
        mod_timer(&udev->timeout,
                round_jiffies_up(jiffies + msecs_to_jiffies(TCMU_TIME_OUT)));
 
-       return 0;
+       return TCM_NO_SENSE;
 }
 
-static int tcmu_queue_cmd(struct se_cmd *se_cmd)
+static sense_reason_t
+tcmu_queue_cmd(struct se_cmd *se_cmd)
 {
        struct se_device *se_dev = se_cmd->se_dev;
        struct tcmu_dev *udev = TCMU_DEV(se_dev);
@@ -538,10 +541,10 @@ static int tcmu_queue_cmd(struct se_cmd *se_cmd)
 
        tcmu_cmd = tcmu_alloc_cmd(se_cmd);
        if (!tcmu_cmd)
-               return -ENOMEM;
+               return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
 
        ret = tcmu_queue_cmd_ring(tcmu_cmd);
-       if (ret < 0) {
+       if (ret != TCM_NO_SENSE) {
                pr_err("TCMU: Could not queue command\n");
                spin_lock_irq(&udev->commands_lock);
                idr_remove(&udev->commands, tcmu_cmd->cmd_id);
@@ -561,7 +564,7 @@ static void tcmu_handle_completion(struct tcmu_cmd *cmd, struct tcmu_cmd_entry *
        if (test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags)) {
                /*
                 * cmd has been completed already from timeout, just reclaim
-                * data ring space and free cmd
+                * data area space and free cmd
                 */
                free_data_area(udev, cmd);
 
@@ -1128,21 +1131,10 @@ static sector_t tcmu_get_blocks(struct se_device *dev)
                       dev->dev_attrib.block_size);
 }
 
-static sense_reason_t
-tcmu_pass_op(struct se_cmd *se_cmd)
-{
-       int ret = tcmu_queue_cmd(se_cmd);
-
-       if (ret != 0)
-               return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
-       else
-               return TCM_NO_SENSE;
-}
-
 static sense_reason_t
 tcmu_parse_cdb(struct se_cmd *cmd)
 {
-       return passthrough_parse_cdb(cmd, tcmu_pass_op);
+       return passthrough_parse_cdb(cmd, tcmu_queue_cmd);
 }
 
 static const struct target_backend_ops tcmu_ops = {
index 75cd85426ae3a27f276947f667794acb7c9454d0..094a1440eacb3dccdd9c35a678a2940c3e03216d 100644 (file)
@@ -104,7 +104,7 @@ static int target_xcopy_locate_se_dev_e4(struct se_cmd *se_cmd, struct xcopy_op
        }
        mutex_unlock(&g_device_mutex);
 
-       pr_err("Unable to locate 0xe4 descriptor for EXTENDED_COPY\n");
+       pr_debug_ratelimited("Unable to locate 0xe4 descriptor for EXTENDED_COPY\n");
        return -EINVAL;
 }
 
@@ -185,7 +185,7 @@ static int target_xcopy_parse_tiddesc_e4(struct se_cmd *se_cmd, struct xcopy_op
 
 static int target_xcopy_parse_target_descriptors(struct se_cmd *se_cmd,
                                struct xcopy_op *xop, unsigned char *p,
-                               unsigned short tdll)
+                               unsigned short tdll, sense_reason_t *sense_ret)
 {
        struct se_device *local_dev = se_cmd->se_dev;
        unsigned char *desc = p;
@@ -193,6 +193,8 @@ static int target_xcopy_parse_target_descriptors(struct se_cmd *se_cmd,
        unsigned short start = 0;
        bool src = true;
 
+       *sense_ret = TCM_INVALID_PARAMETER_LIST;
+
        if (offset != 0) {
                pr_err("XCOPY target descriptor list length is not"
                        " multiple of %d\n", XCOPY_TARGET_DESC_LEN);
@@ -243,9 +245,16 @@ static int target_xcopy_parse_target_descriptors(struct se_cmd *se_cmd,
                rc = target_xcopy_locate_se_dev_e4(se_cmd, xop, true);
        else
                rc = target_xcopy_locate_se_dev_e4(se_cmd, xop, false);
-
-       if (rc < 0)
+       /*
+        * If a matching IEEE NAA 0x83 descriptor for the requested device
+        * is not located on this node, return COPY_ABORTED with ASQ/ASQC
+        * 0x0d/0x02 - COPY_TARGET_DEVICE_NOT_REACHABLE to request the
+        * initiator to fall back to normal copy method.
+        */
+       if (rc < 0) {
+               *sense_ret = TCM_COPY_TARGET_DEVICE_NOT_REACHABLE;
                goto out;
+       }
 
        pr_debug("XCOPY TGT desc: Source dev: %p NAA IEEE WWN: 0x%16phN\n",
                 xop->src_dev, &xop->src_tid_wwn[0]);
@@ -653,6 +662,7 @@ static int target_xcopy_read_source(
        rc = target_xcopy_setup_pt_cmd(xpt_cmd, xop, src_dev, &cdb[0],
                                remote_port, true);
        if (rc < 0) {
+               ec_cmd->scsi_status = xpt_cmd->se_cmd.scsi_status;
                transport_generic_free_cmd(se_cmd, 0);
                return rc;
        }
@@ -664,6 +674,7 @@ static int target_xcopy_read_source(
 
        rc = target_xcopy_issue_pt_cmd(xpt_cmd);
        if (rc < 0) {
+               ec_cmd->scsi_status = xpt_cmd->se_cmd.scsi_status;
                transport_generic_free_cmd(se_cmd, 0);
                return rc;
        }
@@ -714,6 +725,7 @@ static int target_xcopy_write_destination(
                                remote_port, false);
        if (rc < 0) {
                struct se_cmd *src_cmd = &xop->src_pt_cmd->se_cmd;
+               ec_cmd->scsi_status = xpt_cmd->se_cmd.scsi_status;
                /*
                 * If the failure happened before the t_mem_list hand-off in
                 * target_xcopy_setup_pt_cmd(), Reset memory + clear flag so that
@@ -729,6 +741,7 @@ static int target_xcopy_write_destination(
 
        rc = target_xcopy_issue_pt_cmd(xpt_cmd);
        if (rc < 0) {
+               ec_cmd->scsi_status = xpt_cmd->se_cmd.scsi_status;
                se_cmd->se_cmd_flags &= ~SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC;
                transport_generic_free_cmd(se_cmd, 0);
                return rc;
@@ -815,9 +828,14 @@ static void target_xcopy_do_work(struct work_struct *work)
 out:
        xcopy_pt_undepend_remotedev(xop);
        kfree(xop);
-
-       pr_warn("target_xcopy_do_work: Setting X-COPY CHECK_CONDITION -> sending response\n");
-       ec_cmd->scsi_status = SAM_STAT_CHECK_CONDITION;
+       /*
+        * Don't override an error scsi status if it has already been set
+        */
+       if (ec_cmd->scsi_status == SAM_STAT_GOOD) {
+               pr_warn_ratelimited("target_xcopy_do_work: rc: %d, Setting X-COPY"
+                       " CHECK_CONDITION -> sending response\n", rc);
+               ec_cmd->scsi_status = SAM_STAT_CHECK_CONDITION;
+       }
        target_complete_cmd(ec_cmd, SAM_STAT_CHECK_CONDITION);
 }
 
@@ -875,7 +893,7 @@ sense_reason_t target_do_xcopy(struct se_cmd *se_cmd)
                " tdll: %hu sdll: %u inline_dl: %u\n", list_id, list_id_usage,
                tdll, sdll, inline_dl);
 
-       rc = target_xcopy_parse_target_descriptors(se_cmd, xop, &p[16], tdll);
+       rc = target_xcopy_parse_target_descriptors(se_cmd, xop, &p[16], tdll, &ret);
        if (rc <= 0)
                goto out;
 
index 216e18cc9133d6709b25168610e98552b96d2647..ff5de9a96643f9b21e06a14af4bcd5e7277689f9 100644 (file)
@@ -572,10 +572,10 @@ static void ft_send_work(struct work_struct *work)
        if (target_submit_cmd(&cmd->se_cmd, cmd->sess->se_sess, fcp->fc_cdb,
                              &cmd->ft_sense_buffer[0], scsilun_to_int(&fcp->fc_lun),
                              ntohl(fcp->fc_dl), task_attr, data_dir,
-                             TARGET_SCF_ACK_KREF))
+                             TARGET_SCF_ACK_KREF | TARGET_SCF_USE_CPUID))
                goto err;
 
-       pr_debug("r_ctl %x alloc target_submit_cmd\n", fh->fh_r_ctl);
+       pr_debug("r_ctl %x target_submit_cmd %p\n", fh->fh_r_ctl, cmd);
        return;
 
 err:
index 6ffbb603d9122a0259daa69db5bcca03ba891aa5..fd5c3de794705bb467f8689540f8045e61629d7f 100644 (file)
 
 #include "tcm_fc.h"
 
+#define TFC_SESS_DBG(lport, fmt, args...) \
+       pr_debug("host%u: rport %6.6x: " fmt,      \
+                (lport)->host->host_no,           \
+                (lport)->port_id, ##args )
+
 static void ft_sess_delete_all(struct ft_tport *);
 
 /*
@@ -167,24 +172,29 @@ static struct ft_sess *ft_sess_get(struct fc_lport *lport, u32 port_id)
        struct ft_tport *tport;
        struct hlist_head *head;
        struct ft_sess *sess;
+       char *reason = "no session created";
 
        rcu_read_lock();
        tport = rcu_dereference(lport->prov[FC_TYPE_FCP]);
-       if (!tport)
+       if (!tport) {
+               reason = "not an FCP port";
                goto out;
+       }
 
        head = &tport->hash[ft_sess_hash(port_id)];
        hlist_for_each_entry_rcu(sess, head, hash) {
                if (sess->port_id == port_id) {
                        kref_get(&sess->kref);
                        rcu_read_unlock();
-                       pr_debug("port_id %x found %p\n", port_id, sess);
+                       TFC_SESS_DBG(lport, "port_id %x found %p\n",
+                                    port_id, sess);
                        return sess;
                }
        }
 out:
        rcu_read_unlock();
-       pr_debug("port_id %x not found\n", port_id);
+       TFC_SESS_DBG(lport, "port_id %x not found, %s\n",
+                    port_id, reason);
        return NULL;
 }
 
@@ -195,7 +205,7 @@ static int ft_sess_alloc_cb(struct se_portal_group *se_tpg,
        struct ft_tport *tport = sess->tport;
        struct hlist_head *head = &tport->hash[ft_sess_hash(sess->port_id)];
 
-       pr_debug("port_id %x sess %p\n", sess->port_id, sess);
+       TFC_SESS_DBG(tport->lport, "port_id %x sess %p\n", sess->port_id, sess);
        hlist_add_head_rcu(&sess->hash, head);
        tport->sess_count++;
 
@@ -223,7 +233,7 @@ static struct ft_sess *ft_sess_create(struct ft_tport *tport, u32 port_id,
 
        sess = kzalloc(sizeof(*sess), GFP_KERNEL);
        if (!sess)
-               return NULL;
+               return ERR_PTR(-ENOMEM);
 
        kref_init(&sess->kref); /* ref for table entry */
        sess->tport = tport;
@@ -234,8 +244,9 @@ static struct ft_sess *ft_sess_create(struct ft_tport *tport, u32 port_id,
                                             TARGET_PROT_NORMAL, &initiatorname[0],
                                             sess, ft_sess_alloc_cb);
        if (IS_ERR(sess->se_sess)) {
+               int rc = PTR_ERR(sess->se_sess);
                kfree(sess);
-               return NULL;
+               sess = ERR_PTR(rc);
        }
        return sess;
 }
@@ -319,7 +330,7 @@ void ft_sess_close(struct se_session *se_sess)
                mutex_unlock(&ft_lport_lock);
                return;
        }
-       pr_debug("port_id %x\n", port_id);
+       TFC_SESS_DBG(sess->tport->lport, "port_id %x close session\n", port_id);
        ft_sess_unhash(sess);
        mutex_unlock(&ft_lport_lock);
        ft_close_sess(sess);
@@ -379,8 +390,13 @@ static int ft_prli_locked(struct fc_rport_priv *rdata, u32 spp_len,
                if (!(fcp_parm & FCP_SPPF_INIT_FCN))
                        return FC_SPP_RESP_CONF;
                sess = ft_sess_create(tport, rdata->ids.port_id, rdata);
-               if (!sess)
-                       return FC_SPP_RESP_RES;
+               if (IS_ERR(sess)) {
+                       if (PTR_ERR(sess) == -EACCES) {
+                               spp->spp_flags &= ~FC_SPP_EST_IMG_PAIR;
+                               return FC_SPP_RESP_CONF;
+                       } else
+                               return FC_SPP_RESP_RES;
+               }
                if (!sess->params)
                        rdata->prli_count++;
                sess->params = fcp_parm;
@@ -423,8 +439,8 @@ static int ft_prli(struct fc_rport_priv *rdata, u32 spp_len,
        mutex_lock(&ft_lport_lock);
        ret = ft_prli_locked(rdata, spp_len, rspp, spp);
        mutex_unlock(&ft_lport_lock);
-       pr_debug("port_id %x flags %x ret %x\n",
-              rdata->ids.port_id, rspp ? rspp->spp_flags : 0, ret);
+       TFC_SESS_DBG(rdata->local_port, "port_id %x flags %x ret %x\n",
+                    rdata->ids.port_id, rspp ? rspp->spp_flags : 0, ret);
        return ret;
 }
 
@@ -477,11 +493,11 @@ static void ft_recv(struct fc_lport *lport, struct fc_frame *fp)
        struct ft_sess *sess;
        u32 sid = fc_frame_sid(fp);
 
-       pr_debug("sid %x\n", sid);
+       TFC_SESS_DBG(lport, "recv sid %x\n", sid);
 
        sess = ft_sess_get(lport, sid);
        if (!sess) {
-               pr_debug("sid %x sess lookup failed\n", sid);
+               TFC_SESS_DBG(lport, "sid %x sess lookup failed\n", sid);
                /* TBD XXX - if FCP_CMND, send PRLO */
                fc_frame_free(fp);
                return;
index 9b4815e81b0df01cf2160d752499b670c4a2d731..19bf2028e508437e46c96e1a5df89e763f6cf59c 100644 (file)
 #include <linux/types.h>
 #include <linux/init.h>
 #include <linux/pci.h>
+#include <linux/acpi.h>
 #include <linux/thermal.h>
 #include <linux/pm.h>
 
 /* Intel PCH thermal Device IDs */
+#define PCH_THERMAL_DID_HSW_1  0x9C24 /* Haswell PCH */
+#define PCH_THERMAL_DID_HSW_2  0x8C24 /* Haswell PCH */
 #define PCH_THERMAL_DID_WPT    0x9CA4 /* Wildcat Point */
 #define PCH_THERMAL_DID_SKL    0x9D31 /* Skylake PCH */
 
@@ -66,9 +69,53 @@ struct pch_thermal_device {
        unsigned long crt_temp;
        int hot_trip_id;
        unsigned long hot_temp;
+       int psv_trip_id;
+       unsigned long psv_temp;
        bool bios_enabled;
 };
 
+#ifdef CONFIG_ACPI
+
+/*
+ * On some platforms, there is a companion ACPI device, which adds
+ * passive trip temperature using _PSV method. There is no specific
+ * passive temperature setting in MMIO interface of this PCI device.
+ */
+static void pch_wpt_add_acpi_psv_trip(struct pch_thermal_device *ptd,
+                                     int *nr_trips)
+{
+       struct acpi_device *adev;
+
+       ptd->psv_trip_id = -1;
+
+       adev = ACPI_COMPANION(&ptd->pdev->dev);
+       if (adev) {
+               unsigned long long r;
+               acpi_status status;
+
+               status = acpi_evaluate_integer(adev->handle, "_PSV", NULL,
+                                              &r);
+               if (ACPI_SUCCESS(status)) {
+                       unsigned long trip_temp;
+
+                       trip_temp = DECI_KELVIN_TO_MILLICELSIUS(r);
+                       if (trip_temp) {
+                               ptd->psv_temp = trip_temp;
+                               ptd->psv_trip_id = *nr_trips;
+                               ++(*nr_trips);
+                       }
+               }
+       }
+}
+#else
+static void pch_wpt_add_acpi_psv_trip(struct pch_thermal_device *ptd,
+                                     int *nr_trips)
+{
+       ptd->psv_trip_id = -1;
+
+}
+#endif
+
 static int pch_wpt_init(struct pch_thermal_device *ptd, int *nr_trips)
 {
        u8 tsel;
@@ -119,6 +166,8 @@ read_trips:
                ++(*nr_trips);
        }
 
+       pch_wpt_add_acpi_psv_trip(ptd, nr_trips);
+
        return 0;
 }
 
@@ -194,6 +243,8 @@ static int pch_get_trip_type(struct thermal_zone_device *tzd, int trip,
                *type = THERMAL_TRIP_CRITICAL;
        else if (ptd->hot_trip_id == trip)
                *type = THERMAL_TRIP_HOT;
+       else if (ptd->psv_trip_id == trip)
+               *type = THERMAL_TRIP_PASSIVE;
        else
                return -EINVAL;
 
@@ -208,6 +259,8 @@ static int pch_get_trip_temp(struct thermal_zone_device *tzd, int trip, int *tem
                *temp = ptd->crt_temp;
        else if (ptd->hot_trip_id == trip)
                *temp = ptd->hot_temp;
+       else if (ptd->psv_trip_id == trip)
+               *temp = ptd->psv_temp;
        else
                return -EINVAL;
 
@@ -242,6 +295,11 @@ static int intel_pch_thermal_probe(struct pci_dev *pdev,
                ptd->ops = &pch_dev_ops_wpt;
                dev_name = "pch_skylake";
                break;
+       case PCH_THERMAL_DID_HSW_1:
+       case PCH_THERMAL_DID_HSW_2:
+               ptd->ops = &pch_dev_ops_wpt;
+               dev_name = "pch_haswell";
+               break;
        default:
                dev_err(&pdev->dev, "unknown pch thermal device\n");
                return -ENODEV;
@@ -324,6 +382,8 @@ static int intel_pch_thermal_resume(struct device *device)
 static struct pci_device_id intel_pch_thermal_id[] = {
        { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_WPT) },
        { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_SKL) },
+       { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_HSW_1) },
+       { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_HSW_2) },
        { 0, },
 };
 MODULE_DEVICE_TABLE(pci, intel_pch_thermal_id);
index 0e4dc0afcfd244d510b003575249c4c3ce1d16bd..afada655f86198366b88b0d379a4bb79c8095bbb 100644 (file)
@@ -669,20 +669,17 @@ static struct thermal_cooling_device_ops powerclamp_cooling_ops = {
        .set_cur_state = powerclamp_set_cur_state,
 };
 
-static const struct x86_cpu_id intel_powerclamp_ids[] __initconst = {
+static const struct x86_cpu_id __initconst intel_powerclamp_ids[] = {
        { X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, X86_FEATURE_MWAIT },
-       { X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, X86_FEATURE_ARAT },
-       { X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, X86_FEATURE_NONSTOP_TSC },
-       { X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, X86_FEATURE_CONSTANT_TSC},
        {}
 };
 MODULE_DEVICE_TABLE(x86cpu, intel_powerclamp_ids);
 
 static int __init powerclamp_probe(void)
 {
+
        if (!x86_match_cpu(intel_powerclamp_ids)) {
-               pr_err("Intel powerclamp does not run on family %d model %d\n",
-                               boot_cpu_data.x86, boot_cpu_data.x86_model);
+               pr_err("CPU does not support MWAIT");
                return -ENODEV;
        }
 
index 226b0b4aced6a2e9fd348086259fc517e3f674ee..911fd964c742485847fde04b83254135abcead53 100644 (file)
@@ -2163,8 +2163,8 @@ static const struct genl_multicast_group thermal_event_mcgrps[] = {
        { .name = THERMAL_GENL_MCAST_GROUP_NAME, },
 };
 
-static struct genl_family thermal_event_genl_family = {
-       .id = GENL_ID_GENERATE,
+static struct genl_family thermal_event_genl_family __ro_after_init = {
+       .module = THIS_MODULE,
        .name = THERMAL_GENL_FAMILY_NAME,
        .version = THERMAL_GENL_VERSION,
        .maxattr = THERMAL_GENL_ATTR_MAX,
@@ -2235,7 +2235,7 @@ int thermal_generate_netlink_event(struct thermal_zone_device *tz,
 }
 EXPORT_SYMBOL_GPL(thermal_generate_netlink_event);
 
-static int genetlink_init(void)
+static int __init genetlink_init(void)
 {
        return genl_register_family(&thermal_event_genl_family);
 }
index 886fcf37f291ac7c78654aa2f3a511f976846732..b9923464599f6fc18b825d902fe8f14b5555e981 100644 (file)
@@ -213,7 +213,7 @@ static int qrk_serial_setup(struct lpss8250 *lpss, struct uart_port *port)
        struct pci_dev *pdev = to_pci_dev(port->dev);
        int ret;
 
-       ret = pci_alloc_irq_vectors(pdev, 1, 1, 0);
+       ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_ALL_TYPES);
        if (ret < 0)
                return ret;
 
index 1bfb6fdbaa20861a40599f6c33facbf615788b6d..1731b98d2471077c762806b63f88993b0a475fc3 100644 (file)
@@ -83,7 +83,8 @@ static const struct serial8250_config uart_config[] = {
                .name           = "16550A",
                .fifo_size      = 16,
                .tx_loadsz      = 16,
-               .fcr            = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10,
+               .fcr            = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10 |
+                                 UART_FCR_CLEAR_RCVR | UART_FCR_CLEAR_XMIT,
                .rxtrig_bytes   = {1, 4, 8, 14},
                .flags          = UART_CAP_FIFO,
        },
index b8d9c8c9d02a9762a2be77861554f13e49488a0c..417d9e7038e1aa53ea3da50569d158f7b88d4c78 100644 (file)
@@ -99,7 +99,7 @@ static void uniphier_serial_out(struct uart_port *p, int offset, int value)
        case UART_LCR:
                valshift = UNIPHIER_UART_LCR_SHIFT;
                /* Divisor latch access bit does not exist. */
-               value &= ~(UART_LCR_DLAB << valshift);
+               value &= ~UART_LCR_DLAB;
                /* fall through */
        case UART_MCR:
                offset = UNIPHIER_UART_LCR_MCR;
@@ -199,7 +199,7 @@ static int uniphier_uart_probe(struct platform_device *pdev)
 
        regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
        if (!regs) {
-               dev_err(dev, "failed to get memory resource");
+               dev_err(dev, "failed to get memory resource\n");
                return -EINVAL;
        }
 
index c7831407a882d2bb19be29153926d156a2a10a93..25c1d7bc010043b15f1676660bef7577150252cb 100644 (file)
@@ -1625,6 +1625,7 @@ config SERIAL_SPRD_CONSOLE
 config SERIAL_STM32
        tristate "STMicroelectronics STM32 serial port support"
        select SERIAL_CORE
+       depends on HAS_DMA
        depends on ARM || COMPILE_TEST
        help
          This driver is for the on-chip Serial Controller on
index fd8aa1f4ba782b62466dadea4868a0005f6fe5e2..168b10cad47b5437c2152313fcad026e2747300a 100644 (file)
@@ -2132,11 +2132,29 @@ static void atmel_set_termios(struct uart_port *port, struct ktermios *termios,
                mode |= ATMEL_US_USMODE_RS485;
        } else if (termios->c_cflag & CRTSCTS) {
                /* RS232 with hardware handshake (RTS/CTS) */
-               if (atmel_use_dma_rx(port) && !atmel_use_fifo(port)) {
-                       dev_info(port->dev, "not enabling hardware flow control because DMA is used");
-                       termios->c_cflag &= ~CRTSCTS;
-               } else {
+               if (atmel_use_fifo(port) &&
+                   !mctrl_gpio_to_gpiod(atmel_port->gpios, UART_GPIO_CTS)) {
+                       /*
+                        * with ATMEL_US_USMODE_HWHS set, the controller will
+                        * be able to drive the RTS pin high/low when the RX
+                        * FIFO is above RXFTHRES/below RXFTHRES2.
+                        * It will also disable the transmitter when the CTS
+                        * pin is high.
+                        * This mode is not activated if CTS pin is a GPIO
+                        * because in this case, the transmitter is always
+                        * disabled (there must be an internal pull-up
+                        * responsible for this behaviour).
+                        * If the RTS pin is a GPIO, the controller won't be
+                        * able to drive it according to the FIFO thresholds,
+                        * but it will be handled by the driver.
+                        */
                        mode |= ATMEL_US_USMODE_HWHS;
+               } else {
+                       /*
+                        * For platforms without FIFO, the flow control is
+                        * handled by the driver.
+                        */
+                       mode |= ATMEL_US_USMODE_NORMAL;
                }
        } else {
                /* RS232 without hadware handshake */
index de9d5107c00a0e1430b9ad8f8dfb99b7d4b2c9b3..76103f2c4a8001e36e10fffc7581ee2ee4c885b6 100644 (file)
@@ -328,7 +328,7 @@ static void lpuart_dma_tx(struct lpuart_port *sport)
 
        sport->dma_tx_bytes = uart_circ_chars_pending(xmit);
 
-       if (xmit->tail < xmit->head) {
+       if (xmit->tail < xmit->head || xmit->head == 0) {
                sport->dma_tx_nents = 1;
                sg_init_one(sgl, xmit->buf + xmit->tail, sport->dma_tx_bytes);
        } else {
@@ -359,7 +359,6 @@ static void lpuart_dma_tx(struct lpuart_port *sport)
        sport->dma_tx_in_progress = true;
        sport->dma_tx_cookie = dmaengine_submit(sport->dma_tx_desc);
        dma_async_issue_pending(sport->dma_tx_chan);
-
 }
 
 static void lpuart_dma_tx_complete(void *arg)
index d391650b82e7be9bb63ac7df85a9de3e4fe0bed8..42caccb5e87eeabf732872225ef1527f427acea6 100644 (file)
@@ -419,6 +419,7 @@ static struct dmi_system_id pch_uart_dmi_table[] = {
                },
                (void *)MINNOW_UARTCLK,
        },
+       { }
 };
 
 /* Return UART clock, checking for board specific clocks. */
index 2675792a8f5963a37b82d708b0ce87f8f070d5dd..fb0672554123a196d75fd9eedca35a915ddc12e6 100644 (file)
@@ -1130,9 +1130,13 @@ static int sc16is7xx_gpio_direction_output(struct gpio_chip *chip,
 {
        struct sc16is7xx_port *s = gpiochip_get_data(chip);
        struct uart_port *port = &s->p[0].port;
+       u8 state = sc16is7xx_port_read(port, SC16IS7XX_IOSTATE_REG);
 
-       sc16is7xx_port_update(port, SC16IS7XX_IOSTATE_REG, BIT(offset),
-                             val ? BIT(offset) : 0);
+       if (val)
+               state |= BIT(offset);
+       else
+               state &= ~BIT(offset);
+       sc16is7xx_port_write(port, SC16IS7XX_IOSTATE_REG, state);
        sc16is7xx_port_update(port, SC16IS7XX_IODIR_REG, BIT(offset),
                              BIT(offset));
 
index 6e4f63627479db8d33547ef18fd1e6e365f5bdb7..f2303f390345e14664f31976662f7c804fbbb9d9 100644 (file)
@@ -111,7 +111,7 @@ void uart_write_wakeup(struct uart_port *port)
         * closed.  No cookie for you.
         */
        BUG_ON(!state);
-       tty_wakeup(state->port.tty);
+       tty_port_tty_wakeup(&state->port);
 }
 
 static void uart_stop(struct tty_struct *tty)
@@ -632,7 +632,7 @@ static void uart_flush_buffer(struct tty_struct *tty)
        if (port->ops->flush_buffer)
                port->ops->flush_buffer(port);
        uart_port_unlock(port, flags);
-       tty_wakeup(tty);
+       tty_port_tty_wakeup(&state->port);
 }
 
 /*
@@ -2746,8 +2746,6 @@ int uart_add_one_port(struct uart_driver *drv, struct uart_port *uport)
        uport->cons = drv->cons;
        uport->minor = drv->tty_driver->minor_start + uport->line;
 
-       port->console = uart_console(uport);
-
        /*
         * If this port is a console, then the spinlock is already
         * initialised.
@@ -2761,6 +2759,8 @@ int uart_add_one_port(struct uart_driver *drv, struct uart_port *uport)
 
        uart_configure_port(drv, state, uport);
 
+       port->console = uart_console(uport);
+
        num_groups = 2;
        if (uport->attr_group)
                num_groups++;
index 41d97492310271dbb3f2036d484b7b2ae5ee8c10..cd97ceb76e4ffe6f48af1a614532bdf42318e2de 100644 (file)
@@ -31,7 +31,7 @@ struct stm32_usart_info {
        struct stm32_usart_config cfg;
 };
 
-#define UNDEF_REG ~0
+#define UNDEF_REG 0xff
 
 /* Register offsets */
 struct stm32_usart_info stm32f4_info = {
index f37edaa5ac7577ed77c944d68e1fed7f526674e9..dd4c02fa4820a2f9c07c9ddfafd7fd7dd6be1fea 100644 (file)
@@ -1200,6 +1200,7 @@ static int __init cdns_early_console_setup(struct earlycon_device *device,
 OF_EARLYCON_DECLARE(cdns, "xlnx,xuartps", cdns_early_console_setup);
 OF_EARLYCON_DECLARE(cdns, "cdns,uart-r1p8", cdns_early_console_setup);
 OF_EARLYCON_DECLARE(cdns, "cdns,uart-r1p12", cdns_early_console_setup);
+OF_EARLYCON_DECLARE(cdns, "xlnx,zynqmp-uart", cdns_early_console_setup);
 
 /**
  * cdns_uart_console_write - perform write operation
@@ -1438,6 +1439,7 @@ static const struct of_device_id cdns_uart_of_match[] = {
        { .compatible = "xlnx,xuartps", },
        { .compatible = "cdns,uart-r1p8", },
        { .compatible = "cdns,uart-r1p12", .data = &zynqmp_uart_def },
+       { .compatible = "xlnx,zynqmp-uart", .data = &zynqmp_uart_def },
        {}
 };
 MODULE_DEVICE_TABLE(of, cdns_uart_of_match);
index 06fb39c1d6dd5e06fb7541030d881d9999813771..8c3bf3d613c061615bbcf15a607f371e9208c67b 100644 (file)
@@ -870,10 +870,15 @@ static int vc_do_resize(struct tty_struct *tty, struct vc_data *vc,
        if (new_cols == vc->vc_cols && new_rows == vc->vc_rows)
                return 0;
 
+       if (new_screen_size > (4 << 20))
+               return -EINVAL;
        newscreen = kmalloc(new_screen_size, GFP_USER);
        if (!newscreen)
                return -ENOMEM;
 
+       if (vc == sel_cons)
+               clear_selection();
+
        old_rows = vc->vc_rows;
        old_row_size = vc->vc_size_row;
 
@@ -1176,7 +1181,7 @@ static void csi_J(struct vc_data *vc, int vpar)
                        break;
                case 3: /* erase scroll-back buffer (and whole display) */
                        scr_memsetw(vc->vc_screenbuf, vc->vc_video_erase_char,
-                                   vc->vc_screenbuf_size >> 1);
+                                   vc->vc_screenbuf_size);
                        set_origin(vc);
                        if (con_is_visible(vc))
                                update_screen(vc);
index 69426e644d17019b1c6e2d672b2e1a04cbffc3fd..3dbb4a21ab44c8a6555f5e2ad2a321a61526e23b 100644 (file)
@@ -914,6 +914,7 @@ static int ci_hdrc_probe(struct platform_device *pdev)
        if (!ci)
                return -ENOMEM;
 
+       spin_lock_init(&ci->lock);
        ci->dev = dev;
        ci->platdata = dev_get_platdata(dev);
        ci->imx28_write_fix = !!(ci->platdata->flags &
index 96ae69502c86fac5b1b2f7306f049d7f7bb5085f..111b0e0b8698b76d0de983e856c89dadea92734e 100644 (file)
@@ -188,6 +188,8 @@ static void host_stop(struct ci_hdrc *ci)
 
        if (hcd) {
                usb_remove_hcd(hcd);
+               ci->role = CI_ROLE_END;
+               synchronize_irq(ci->irq);
                usb_put_hcd(hcd);
                if (ci->platdata->reg_vbus && !ci_otg_is_fsm_mode(ci) &&
                        (ci->platdata->flags & CI_HDRC_TURN_VBUS_EARLY_ON))
index 661f43fe0f9e9e8f9d9be29eaed5bb1307b8df54..c9e80ad48fdcdb0912271a336600167ff91d7276 100644 (file)
@@ -1889,8 +1889,6 @@ static int udc_start(struct ci_hdrc *ci)
        struct usb_otg_caps *otg_caps = &ci->platdata->ci_otg_caps;
        int retval = 0;
 
-       spin_lock_init(&ci->lock);
-
        ci->gadget.ops          = &usb_gadget_ops;
        ci->gadget.speed        = USB_SPEED_UNKNOWN;
        ci->gadget.max_speed    = USB_SPEED_HIGH;
index 78f0f85bebdc25ef971405175feb621865f4cad1..fada988512a1622a9551a5d6d5ae7482d78ac35f 100644 (file)
@@ -932,8 +932,6 @@ static int wait_serial_change(struct acm *acm, unsigned long arg)
        DECLARE_WAITQUEUE(wait, current);
        struct async_icount old, new;
 
-       if (arg & (TIOCM_DSR | TIOCM_RI | TIOCM_CD))
-               return -EINVAL;
        do {
                spin_lock_irq(&acm->read_lock);
                old = acm->oldcount;
@@ -1161,6 +1159,8 @@ static int acm_probe(struct usb_interface *intf,
        if (quirks == IGNORE_DEVICE)
                return -ENODEV;
 
+       memset(&h, 0x00, sizeof(struct usb_cdc_parsed_header));
+
        num_rx_buf = (quirks == SINGLE_RX_URB) ? 1 : ACM_NR;
 
        /* handle quirks deadly to normal probing*/
index fa9b26b915071ada49ee1b068ff098c09a962ff1..4c0fa0b173538847e680ae13e983d6a3534311d6 100644 (file)
@@ -463,9 +463,18 @@ static void dwc2_clear_force_mode(struct dwc2_hsotg *hsotg)
  */
 void dwc2_force_dr_mode(struct dwc2_hsotg *hsotg)
 {
+       bool ret;
+
        switch (hsotg->dr_mode) {
        case USB_DR_MODE_HOST:
-               dwc2_force_mode(hsotg, true);
+               ret = dwc2_force_mode(hsotg, true);
+               /*
+                * NOTE: This is required for some rockchip soc based
+                * platforms on their host-only dwc2.
+                */
+               if (!ret)
+                       msleep(50);
+
                break;
        case USB_DR_MODE_PERIPHERAL:
                dwc2_force_mode(hsotg, false);
index aad4107ef927e26388f302ebbbd7907ad9a295c0..2a21a0414b1d385347ac700b4db5fda2f502c523 100644 (file)
@@ -259,6 +259,13 @@ enum dwc2_lx_state {
        DWC2_L3,        /* Off state */
 };
 
+/*
+ * Gadget periodic tx fifo sizes as used by legacy driver
+ * EP0 is not included
+ */
+#define DWC2_G_P_LEGACY_TX_FIFO_SIZE {256, 256, 256, 256, 768, 768, 768, \
+                                          768, 0, 0, 0, 0, 0, 0, 0}
+
 /* Gadget ep0 states */
 enum dwc2_ep0_state {
        DWC2_EP0_SETUP,
index 4cd6403a75668c35bef6a277f987bf41fc55ed27..24fbebc9b409050092c8a54296c445f129422e83 100644 (file)
@@ -186,10 +186,9 @@ static void dwc2_hsotg_ctrl_epint(struct dwc2_hsotg *hsotg,
  */
 static void dwc2_hsotg_init_fifo(struct dwc2_hsotg *hsotg)
 {
-       unsigned int fifo;
+       unsigned int ep;
        unsigned int addr;
        int timeout;
-       u32 dptxfsizn;
        u32 val;
 
        /* Reset fifo map if not correctly cleared during previous session */
@@ -217,16 +216,16 @@ static void dwc2_hsotg_init_fifo(struct dwc2_hsotg *hsotg)
         * them to endpoints dynamically according to maxpacket size value of
         * given endpoint.
         */
-       for (fifo = 1; fifo < MAX_EPS_CHANNELS; fifo++) {
-               dptxfsizn = dwc2_readl(hsotg->regs + DPTXFSIZN(fifo));
-
-               val = (dptxfsizn & FIFOSIZE_DEPTH_MASK) | addr;
-               addr += dptxfsizn >> FIFOSIZE_DEPTH_SHIFT;
-
-               if (addr > hsotg->fifo_mem)
-                       break;
+       for (ep = 1; ep < MAX_EPS_CHANNELS; ep++) {
+               if (!hsotg->g_tx_fifo_sz[ep])
+                       continue;
+               val = addr;
+               val |= hsotg->g_tx_fifo_sz[ep] << FIFOSIZE_DEPTH_SHIFT;
+               WARN_ONCE(addr + hsotg->g_tx_fifo_sz[ep] > hsotg->fifo_mem,
+                         "insufficient fifo memory");
+               addr += hsotg->g_tx_fifo_sz[ep];
 
-               dwc2_writel(val, hsotg->regs + DPTXFSIZN(fifo));
+               dwc2_writel(val, hsotg->regs + DPTXFSIZN(ep));
        }
 
        /*
@@ -3807,10 +3806,36 @@ static void dwc2_hsotg_dump(struct dwc2_hsotg *hsotg)
 static void dwc2_hsotg_of_probe(struct dwc2_hsotg *hsotg)
 {
        struct device_node *np = hsotg->dev->of_node;
+       u32 len = 0;
+       u32 i = 0;
 
        /* Enable dma if requested in device tree */
        hsotg->g_using_dma = of_property_read_bool(np, "g-use-dma");
 
+       /*
+       * Register TX periodic fifo size per endpoint.
+       * EP0 is excluded since it has no fifo configuration.
+       */
+       if (!of_find_property(np, "g-tx-fifo-size", &len))
+               goto rx_fifo;
+
+       len /= sizeof(u32);
+
+       /* Read tx fifo sizes other than ep0 */
+       if (of_property_read_u32_array(np, "g-tx-fifo-size",
+                                               &hsotg->g_tx_fifo_sz[1], len))
+               goto rx_fifo;
+
+       /* Add ep0 */
+       len++;
+
+       /* Make remaining TX fifos unavailable */
+       if (len < MAX_EPS_CHANNELS) {
+               for (i = len; i < MAX_EPS_CHANNELS; i++)
+                       hsotg->g_tx_fifo_sz[i] = 0;
+       }
+
+rx_fifo:
        /* Register RX fifo size */
        of_property_read_u32(np, "g-rx-fifo-size", &hsotg->g_rx_fifo_sz);
 
@@ -3832,10 +3857,13 @@ int dwc2_gadget_init(struct dwc2_hsotg *hsotg, int irq)
        struct device *dev = hsotg->dev;
        int epnum;
        int ret;
+       int i;
+       u32 p_tx_fifo[] = DWC2_G_P_LEGACY_TX_FIFO_SIZE;
 
        /* Initialize to legacy fifo configuration values */
        hsotg->g_rx_fifo_sz = 2048;
        hsotg->g_np_g_tx_fifo_sz = 1024;
+       memcpy(&hsotg->g_tx_fifo_sz[1], p_tx_fifo, sizeof(p_tx_fifo));
        /* Device tree specific probe */
        dwc2_hsotg_of_probe(hsotg);
 
@@ -3853,6 +3881,9 @@ int dwc2_gadget_init(struct dwc2_hsotg *hsotg, int irq)
        dev_dbg(dev, "NonPeriodic TXFIFO size: %d\n",
                                                hsotg->g_np_g_tx_fifo_sz);
        dev_dbg(dev, "RXFIFO size: %d\n", hsotg->g_rx_fifo_sz);
+       for (i = 0; i < MAX_EPS_CHANNELS; i++)
+               dev_dbg(dev, "Periodic TXFIFO%2d size: %d\n", i,
+                                               hsotg->g_tx_fifo_sz[i]);
 
        hsotg->gadget.max_speed = USB_SPEED_HIGH;
        hsotg->gadget.ops = &dwc2_hsotg_gadget_ops;
index 7287a763cd0cc4ca5a114ed687e6c34b0bdd643d..fea446900cadd06dcf857bb9cdaa3c9b0ba79025 100644 (file)
@@ -769,15 +769,14 @@ static int dwc3_core_init(struct dwc3 *dwc)
        return 0;
 
 err4:
-       phy_power_off(dwc->usb2_generic_phy);
+       phy_power_off(dwc->usb3_generic_phy);
 
 err3:
-       phy_power_off(dwc->usb3_generic_phy);
+       phy_power_off(dwc->usb2_generic_phy);
 
 err2:
        usb_phy_set_suspend(dwc->usb2_phy, 1);
        usb_phy_set_suspend(dwc->usb3_phy, 1);
-       dwc3_core_exit(dwc);
 
 err1:
        usb_phy_shutdown(dwc->usb2_phy);
index 89a2f712fdfe32f5fc0a6fc0681b0d8db005e2a2..aaaf256f71dd63091aec15f0e6b64eaec03ef8f0 100644 (file)
@@ -31,6 +31,7 @@
 #include <linux/slab.h>
 #include <linux/regmap.h>
 #include <linux/reset.h>
+#include <linux/pinctrl/consumer.h>
 #include <linux/usb/of.h>
 
 #include "core.h"
index 07cc8929f27134e40b1084389fb3efb3c29c58f6..1dfa56a5f1c511a6a40f38cb976ed1096af3c5db 100644 (file)
@@ -783,6 +783,7 @@ static void dwc3_prepare_one_trb(struct dwc3_ep *dep,
                req->trb = trb;
                req->trb_dma = dwc3_trb_dma_offset(dep, trb);
                req->first_trb_index = dep->trb_enqueue;
+               dep->queued_requests++;
        }
 
        dwc3_ep_inc_enq(dep);
@@ -833,8 +834,6 @@ static void dwc3_prepare_one_trb(struct dwc3_ep *dep,
 
        trb->ctrl |= DWC3_TRB_CTRL_HWO;
 
-       dep->queued_requests++;
-
        trace_dwc3_prepare_trb(dep, trb);
 }
 
@@ -1074,9 +1073,17 @@ static int __dwc3_gadget_ep_queue(struct dwc3_ep *dep, struct dwc3_request *req)
 
        list_add_tail(&req->list, &dep->pending_list);
 
-       if (usb_endpoint_xfer_isoc(dep->endpoint.desc) &&
-                       dep->flags & DWC3_EP_PENDING_REQUEST) {
-               if (list_empty(&dep->started_list)) {
+       /*
+        * NOTICE: Isochronous endpoints should NEVER be prestarted. We must
+        * wait for a XferNotReady event so we will know what's the current
+        * (micro-)frame number.
+        *
+        * Without this trick, we are very, very likely gonna get Bus Expiry
+        * errors which will force us issue EndTransfer command.
+        */
+       if (usb_endpoint_xfer_isoc(dep->endpoint.desc)) {
+               if ((dep->flags & DWC3_EP_PENDING_REQUEST) &&
+                               list_empty(&dep->started_list)) {
                        dwc3_stop_active_transfer(dwc, dep->number, true);
                        dep->flags = DWC3_EP_ENABLED;
                }
@@ -1861,8 +1868,11 @@ static int __dwc3_cleanup_done_trbs(struct dwc3 *dwc, struct dwc3_ep *dep,
        unsigned int            s_pkt = 0;
        unsigned int            trb_status;
 
-       dep->queued_requests--;
        dwc3_ep_inc_deq(dep);
+
+       if (req->trb == trb)
+               dep->queued_requests--;
+
        trace_dwc3_complete_trb(dep, trb);
 
        /*
@@ -2980,7 +2990,7 @@ err3:
        kfree(dwc->setup_buf);
 
 err2:
-       dma_free_coherent(dwc->dev, sizeof(*dwc->ep0_trb),
+       dma_free_coherent(dwc->dev, sizeof(*dwc->ep0_trb) * 2,
                        dwc->ep0_trb, dwc->ep0_trb_addr);
 
 err1:
@@ -3005,7 +3015,7 @@ void dwc3_gadget_exit(struct dwc3 *dwc)
        kfree(dwc->setup_buf);
        kfree(dwc->zlp_buf);
 
-       dma_free_coherent(dwc->dev, sizeof(*dwc->ep0_trb),
+       dma_free_coherent(dwc->dev, sizeof(*dwc->ep0_trb) * 2,
                        dwc->ep0_trb, dwc->ep0_trb_addr);
 
        dma_free_coherent(dwc->dev, sizeof(*dwc->ctrl_req),
index 54ad100af35b487758460089ce476b7aaab4d07f..17989b72cdaec18dbf55d0709897fc75d1842ab7 100644 (file)
@@ -136,8 +136,60 @@ struct ffs_epfile {
        /*
         * Buffer for holding data from partial reads which may happen since
         * we’re rounding user read requests to a multiple of a max packet size.
+        *
+        * The pointer is initialised with NULL value and may be set by
+        * __ffs_epfile_read_data function to point to a temporary buffer.
+        *
+        * In normal operation, calls to __ffs_epfile_read_buffered will consume
+        * data from said buffer and eventually free it.  Importantly, while the
+        * function is using the buffer, it sets the pointer to NULL.  This is
+        * all right since __ffs_epfile_read_data and __ffs_epfile_read_buffered
+        * can never run concurrently (they are synchronised by epfile->mutex)
+        * so the latter will not assign a new value to the pointer.
+        *
+        * Meanwhile ffs_func_eps_disable frees the buffer (if the pointer is
+        * valid) and sets the pointer to READ_BUFFER_DROP value.  This special
+        * value is crux of the synchronisation between ffs_func_eps_disable and
+        * __ffs_epfile_read_data.
+        *
+        * Once __ffs_epfile_read_data is about to finish it will try to set the
+        * pointer back to its old value (as described above), but seeing as the
+        * pointer is not-NULL (namely READ_BUFFER_DROP) it will instead free
+        * the buffer.
+        *
+        * == State transitions ==
+        *
+        * â€¢ ptr == NULL:  (initial state)
+        *   â—¦ __ffs_epfile_read_buffer_free: go to ptr == DROP
+        *   â—¦ __ffs_epfile_read_buffered:    nop
+        *   â—¦ __ffs_epfile_read_data allocates temp buffer: go to ptr == buf
+        *   â—¦ reading finishes:              n/a, not in â€˜and reading’ state
+        * â€¢ ptr == DROP:
+        *   â—¦ __ffs_epfile_read_buffer_free: nop
+        *   â—¦ __ffs_epfile_read_buffered:    go to ptr == NULL
+        *   â—¦ __ffs_epfile_read_data allocates temp buffer: free buf, nop
+        *   â—¦ reading finishes:              n/a, not in â€˜and reading’ state
+        * â€¢ ptr == buf:
+        *   â—¦ __ffs_epfile_read_buffer_free: free buf, go to ptr == DROP
+        *   â—¦ __ffs_epfile_read_buffered:    go to ptr == NULL and reading
+        *   â—¦ __ffs_epfile_read_data:        n/a, __ffs_epfile_read_buffered
+        *                                    is always called first
+        *   â—¦ reading finishes:              n/a, not in â€˜and reading’ state
+        * â€¢ ptr == NULL and reading:
+        *   â—¦ __ffs_epfile_read_buffer_free: go to ptr == DROP and reading
+        *   â—¦ __ffs_epfile_read_buffered:    n/a, mutex is held
+        *   â—¦ __ffs_epfile_read_data:        n/a, mutex is held
+        *   â—¦ reading finishes and â€¦
+        *     â€¦ all data read:               free buf, go to ptr == NULL
+        *     â€¦ otherwise:                   go to ptr == buf and reading
+        * â€¢ ptr == DROP and reading:
+        *   â—¦ __ffs_epfile_read_buffer_free: nop
+        *   â—¦ __ffs_epfile_read_buffered:    n/a, mutex is held
+        *   â—¦ __ffs_epfile_read_data:        n/a, mutex is held
+        *   â—¦ reading finishes:              free buf, go to ptr == DROP
         */
-       struct ffs_buffer               *read_buffer;   /* P: epfile->mutex */
+       struct ffs_buffer               *read_buffer;
+#define READ_BUFFER_DROP ((struct ffs_buffer *)ERR_PTR(-ESHUTDOWN))
 
        char                            name[5];
 
@@ -736,25 +788,47 @@ static void ffs_epfile_async_io_complete(struct usb_ep *_ep,
        schedule_work(&io_data->work);
 }
 
+static void __ffs_epfile_read_buffer_free(struct ffs_epfile *epfile)
+{
+       /*
+        * See comment in struct ffs_epfile for full read_buffer pointer
+        * synchronisation story.
+        */
+       struct ffs_buffer *buf = xchg(&epfile->read_buffer, READ_BUFFER_DROP);
+       if (buf && buf != READ_BUFFER_DROP)
+               kfree(buf);
+}
+
 /* Assumes epfile->mutex is held. */
 static ssize_t __ffs_epfile_read_buffered(struct ffs_epfile *epfile,
                                          struct iov_iter *iter)
 {
-       struct ffs_buffer *buf = epfile->read_buffer;
+       /*
+        * Null out epfile->read_buffer so ffs_func_eps_disable does not free
+        * the buffer while we are using it.  See comment in struct ffs_epfile
+        * for full read_buffer pointer synchronisation story.
+        */
+       struct ffs_buffer *buf = xchg(&epfile->read_buffer, NULL);
        ssize_t ret;
-       if (!buf)
+       if (!buf || buf == READ_BUFFER_DROP)
                return 0;
 
        ret = copy_to_iter(buf->data, buf->length, iter);
        if (buf->length == ret) {
                kfree(buf);
-               epfile->read_buffer = NULL;
-       } else if (unlikely(iov_iter_count(iter))) {
+               return ret;
+       }
+
+       if (unlikely(iov_iter_count(iter))) {
                ret = -EFAULT;
        } else {
                buf->length -= ret;
                buf->data += ret;
        }
+
+       if (cmpxchg(&epfile->read_buffer, NULL, buf))
+               kfree(buf);
+
        return ret;
 }
 
@@ -783,7 +857,15 @@ static ssize_t __ffs_epfile_read_data(struct ffs_epfile *epfile,
        buf->length = data_len;
        buf->data = buf->storage;
        memcpy(buf->storage, data + ret, data_len);
-       epfile->read_buffer = buf;
+
+       /*
+        * At this point read_buffer is NULL or READ_BUFFER_DROP (if
+        * ffs_func_eps_disable has been called in the meanwhile).  See comment
+        * in struct ffs_epfile for full read_buffer pointer synchronisation
+        * story.
+        */
+       if (unlikely(cmpxchg(&epfile->read_buffer, NULL, buf)))
+               kfree(buf);
 
        return ret;
 }
@@ -1097,8 +1179,7 @@ ffs_epfile_release(struct inode *inode, struct file *file)
 
        ENTER();
 
-       kfree(epfile->read_buffer);
-       epfile->read_buffer = NULL;
+       __ffs_epfile_read_buffer_free(epfile);
        ffs_data_closed(epfile->ffs);
 
        return 0;
@@ -1724,24 +1805,20 @@ static void ffs_func_eps_disable(struct ffs_function *func)
        unsigned count            = func->ffs->eps_count;
        unsigned long flags;
 
+       spin_lock_irqsave(&func->ffs->eps_lock, flags);
        do {
-               if (epfile)
-                       mutex_lock(&epfile->mutex);
-               spin_lock_irqsave(&func->ffs->eps_lock, flags);
                /* pending requests get nuked */
                if (likely(ep->ep))
                        usb_ep_disable(ep->ep);
                ++ep;
-               spin_unlock_irqrestore(&func->ffs->eps_lock, flags);
 
                if (epfile) {
                        epfile->ep = NULL;
-                       kfree(epfile->read_buffer);
-                       epfile->read_buffer = NULL;
-                       mutex_unlock(&epfile->mutex);
+                       __ffs_epfile_read_buffer_free(epfile);
                        ++epfile;
                }
        } while (--count);
+       spin_unlock_irqrestore(&func->ffs->eps_lock, flags);
 }
 
 static int ffs_func_eps_enable(struct ffs_function *func)
@@ -3148,11 +3225,11 @@ static bool ffs_func_req_match(struct usb_function *f,
 
        switch (creq->bRequestType & USB_RECIP_MASK) {
        case USB_RECIP_INTERFACE:
-               return ffs_func_revmap_intf(func,
-                                           le16_to_cpu(creq->wIndex) >= 0);
+               return (ffs_func_revmap_intf(func,
+                                            le16_to_cpu(creq->wIndex)) >= 0);
        case USB_RECIP_ENDPOINT:
-               return ffs_func_revmap_ep(func,
-                                         le16_to_cpu(creq->wIndex) >= 0);
+               return (ffs_func_revmap_ep(func,
+                                          le16_to_cpu(creq->wIndex)) >= 0);
        default:
                return (bool) (func->ffs->user_flags &
                               FUNCTIONFS_ALL_CTRL_RECIP);
index 39a6df1e2ded7557e4e48a32110dbc4049e6c5ed..84a1709e0784244eec1c5d3d81edbef977f2ad29 100644 (file)
@@ -579,13 +579,6 @@ static netdev_tx_t eth_start_xmit(struct sk_buff *skb,
 
        req->length = length;
 
-       /* throttle high/super speed IRQ rate back slightly */
-       if (gadget_is_dualspeed(dev->gadget))
-               req->no_interrupt = (dev->gadget->speed == USB_SPEED_HIGH ||
-                                    dev->gadget->speed == USB_SPEED_SUPER)
-                       ? ((atomic_read(&dev->tx_qlen) % dev->qmult) != 0)
-                       : 0;
-
        retval = usb_ep_queue(in, req, GFP_ATOMIC);
        switch (retval) {
        default:
index bb1f6c8f0f01ab492c6b5c0d1852c655d2cc26ee..45bc997d071131c1e7ef51c3968dd5e295994f44 100644 (file)
@@ -1978,7 +1978,7 @@ static struct usba_ep * atmel_udc_of_init(struct platform_device *pdev,
                        dev_err(&pdev->dev, "of_probe: name error(%d)\n", ret);
                        goto err;
                }
-               ep->ep.name = name;
+               ep->ep.name = kasprintf(GFP_KERNEL, "ep%d", ep->index);
 
                ep->ep_regs = udc->regs + USBA_EPT_BASE(i);
                ep->dma_regs = udc->regs + USBA_DMA_BASE(i);
index 1e5f529d51a21f4a17db61cf0f8b7f48d8234386..063064801ceb0e37a0f4a5b45101f02f7c1def83 100644 (file)
@@ -1308,11 +1308,6 @@ MODULE_LICENSE ("GPL");
 #define        PLATFORM_DRIVER         ehci_mv_driver
 #endif
 
-#ifdef CONFIG_MIPS_SEAD3
-#include "ehci-sead3.c"
-#define        PLATFORM_DRIVER         ehci_hcd_sead3_driver
-#endif
-
 static int __init ehci_hcd_init(void)
 {
        int retval = 0;
index 876dca4fc2162520603afdfd430e26d54b82531f..a268d9e8d6cfb17b214278e23c44267cf8c06e33 100644 (file)
@@ -39,7 +39,7 @@
 
 #define DRIVER_DESC "EHCI generic platform driver"
 #define EHCI_MAX_CLKS 4
-#define EHCI_MAX_RSTS 3
+#define EHCI_MAX_RSTS 4
 #define hcd_to_ehci_priv(h) ((struct ehci_platform_priv *)hcd_to_ehci(h)->priv)
 
 struct ehci_platform_priv {
diff --git a/drivers/usb/host/ehci-sead3.c b/drivers/usb/host/ehci-sead3.c
deleted file mode 100644 (file)
index 3d86cc2..0000000
+++ /dev/null
@@ -1,185 +0,0 @@
-/*
- * MIPS CI13320A EHCI Host Controller driver
- * Based on "ehci-au1xxx.c" by K.Boge <karsten.boge@amd.com>
- *
- * Copyright (C) 2012 MIPS Technologies, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
- * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software Foundation,
- * Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include <linux/err.h>
-#include <linux/platform_device.h>
-
-static int ehci_sead3_setup(struct usb_hcd *hcd)
-{
-       int ret;
-       struct ehci_hcd *ehci = hcd_to_ehci(hcd);
-
-       ehci->caps = hcd->regs + 0x100;
-
-#ifdef __BIG_ENDIAN
-       ehci->big_endian_mmio = 1;
-       ehci->big_endian_desc = 1;
-#endif
-
-       ret = ehci_setup(hcd);
-       if (ret)
-               return ret;
-
-       ehci->need_io_watchdog = 0;
-
-       /* Set burst length to 16 words. */
-       ehci_writel(ehci, 0x1010, &ehci->regs->reserved1[1]);
-
-       return ret;
-}
-
-const struct hc_driver ehci_sead3_hc_driver = {
-       .description            = hcd_name,
-       .product_desc           = "SEAD-3 EHCI",
-       .hcd_priv_size          = sizeof(struct ehci_hcd),
-
-       /*
-        * generic hardware linkage
-        */
-       .irq                    = ehci_irq,
-       .flags                  = HCD_MEMORY | HCD_USB2 | HCD_BH,
-
-       /*
-        * basic lifecycle operations
-        *
-        */
-       .reset                  = ehci_sead3_setup,
-       .start                  = ehci_run,
-       .stop                   = ehci_stop,
-       .shutdown               = ehci_shutdown,
-
-       /*
-        * managing i/o requests and associated device resources
-        */
-       .urb_enqueue            = ehci_urb_enqueue,
-       .urb_dequeue            = ehci_urb_dequeue,
-       .endpoint_disable       = ehci_endpoint_disable,
-       .endpoint_reset         = ehci_endpoint_reset,
-
-       /*
-        * scheduling support
-        */
-       .get_frame_number       = ehci_get_frame,
-
-       /*
-        * root hub support
-        */
-       .hub_status_data        = ehci_hub_status_data,
-       .hub_control            = ehci_hub_control,
-       .bus_suspend            = ehci_bus_suspend,
-       .bus_resume             = ehci_bus_resume,
-       .relinquish_port        = ehci_relinquish_port,
-       .port_handed_over       = ehci_port_handed_over,
-
-       .clear_tt_buffer_complete       = ehci_clear_tt_buffer_complete,
-};
-
-static int ehci_hcd_sead3_drv_probe(struct platform_device *pdev)
-{
-       struct usb_hcd *hcd;
-       struct resource *res;
-       int ret;
-
-       if (usb_disabled())
-               return -ENODEV;
-
-       if (pdev->resource[1].flags != IORESOURCE_IRQ) {
-               pr_debug("resource[1] is not IORESOURCE_IRQ");
-               return -ENOMEM;
-       }
-       hcd = usb_create_hcd(&ehci_sead3_hc_driver, &pdev->dev, "SEAD-3");
-       if (!hcd)
-               return -ENOMEM;
-
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       hcd->regs = devm_ioremap_resource(&pdev->dev, res);
-       if (IS_ERR(hcd->regs)) {
-               ret = PTR_ERR(hcd->regs);
-               goto err1;
-       }
-       hcd->rsrc_start = res->start;
-       hcd->rsrc_len = resource_size(res);
-
-       /* Root hub has integrated TT. */
-       hcd->has_tt = 1;
-
-       ret = usb_add_hcd(hcd, pdev->resource[1].start,
-                         IRQF_SHARED);
-       if (ret == 0) {
-               platform_set_drvdata(pdev, hcd);
-               device_wakeup_enable(hcd->self.controller);
-               return ret;
-       }
-
-err1:
-       usb_put_hcd(hcd);
-       return ret;
-}
-
-static int ehci_hcd_sead3_drv_remove(struct platform_device *pdev)
-{
-       struct usb_hcd *hcd = platform_get_drvdata(pdev);
-
-       usb_remove_hcd(hcd);
-       usb_put_hcd(hcd);
-
-       return 0;
-}
-
-#ifdef CONFIG_PM
-static int ehci_hcd_sead3_drv_suspend(struct device *dev)
-{
-       struct usb_hcd *hcd = dev_get_drvdata(dev);
-       bool do_wakeup = device_may_wakeup(dev);
-
-       return ehci_suspend(hcd, do_wakeup);
-}
-
-static int ehci_hcd_sead3_drv_resume(struct device *dev)
-{
-       struct usb_hcd *hcd = dev_get_drvdata(dev);
-
-       ehci_resume(hcd, false);
-       return 0;
-}
-
-static const struct dev_pm_ops sead3_ehci_pmops = {
-       .suspend        = ehci_hcd_sead3_drv_suspend,
-       .resume         = ehci_hcd_sead3_drv_resume,
-};
-
-#define SEAD3_EHCI_PMOPS (&sead3_ehci_pmops)
-
-#else
-#define SEAD3_EHCI_PMOPS NULL
-#endif
-
-static struct platform_driver ehci_hcd_sead3_driver = {
-       .probe          = ehci_hcd_sead3_drv_probe,
-       .remove         = ehci_hcd_sead3_drv_remove,
-       .shutdown       = usb_hcd_platform_shutdown,
-       .driver = {
-               .name   = "sead3-ehci",
-               .pm     = SEAD3_EHCI_PMOPS,
-       }
-};
-
-MODULE_ALIAS("platform:sead3-ehci");
index 5b5880c0ae1916d14c1ce997b8be9d3c2a9d3798..b38a228134df108d148037232126e815d9557a7b 100644 (file)
@@ -221,6 +221,12 @@ static int usb_hcd_at91_probe(const struct hc_driver *driver,
        ohci->num_ports = board->ports;
        at91_start_hc(pdev);
 
+       /*
+        * The RemoteWakeupConnected bit has to be set explicitly
+        * before calling ohci_run. The reset value of this bit is 0.
+        */
+       ohci->hc_control = OHCI_CTRL_RWC;
+
        retval = usb_add_hcd(hcd, irq, IRQF_SHARED);
        if (retval == 0) {
                device_wakeup_enable(hcd->self.controller);
@@ -677,9 +683,6 @@ ohci_hcd_at91_drv_suspend(struct device *dev)
         * REVISIT: some boards will be able to turn VBUS off...
         */
        if (!ohci_at91->wakeup) {
-               ohci->hc_control = ohci_readl(ohci, &ohci->regs->control);
-               ohci->hc_control &= OHCI_CTRL_RWC;
-               ohci_writel(ohci, ohci->hc_control, &ohci->regs->control);
                ohci->rh_state = OHCI_RH_HALTED;
 
                /* flush the writes */
index 1700908b84ef8b7c15716f07e448345f25fae392..86612ac3fda220e3d1c9b1b5c959f45f723d543d 100644 (file)
@@ -72,7 +72,7 @@
 static const char      hcd_name [] = "ohci_hcd";
 
 #define        STATECHANGE_DELAY       msecs_to_jiffies(300)
-#define        IO_WATCHDOG_DELAY       msecs_to_jiffies(250)
+#define        IO_WATCHDOG_DELAY       msecs_to_jiffies(275)
 
 #include "ohci.h"
 #include "pci-quirks.h"
index d793f548dfe26aef387d13b703454abe67beb5be..a9a1e4c40480cf2c5c7c7995aa5d337d3ef3ee87 100644 (file)
@@ -995,6 +995,14 @@ static void quirk_usb_handoff_xhci(struct pci_dev *pdev)
        }
        val = readl(base + ext_cap_offset);
 
+       /* Auto handoff never worked for these devices. Force it and continue */
+       if ((pdev->vendor == PCI_VENDOR_ID_TI && pdev->device == 0x8241) ||
+                       (pdev->vendor == PCI_VENDOR_ID_RENESAS
+                        && pdev->device == 0x0014)) {
+               val = (val | XHCI_HC_OS_OWNED) & ~XHCI_HC_BIOS_OWNED;
+               writel(val, base + ext_cap_offset);
+       }
+
        /* If the BIOS owns the HC, signal that the OS wants it, and wait */
        if (val & XHCI_HC_BIOS_OWNED) {
                writel(val | XHCI_HC_OS_OWNED, base + ext_cap_offset);
index 730b9fd266852db5812e98456c9ff8299aa40ae6..0ef16900efedd7783489ade75594b4d0da798318 100644 (file)
@@ -1166,7 +1166,7 @@ int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
                                xhci_set_link_state(xhci, port_array, wIndex,
                                                        XDEV_RESUME);
                                spin_unlock_irqrestore(&xhci->lock, flags);
-                               msleep(20);
+                               msleep(USB_RESUME_TIMEOUT);
                                spin_lock_irqsave(&xhci->lock, flags);
                                xhci_set_link_state(xhci, port_array, wIndex,
                                                        XDEV_U0);
@@ -1355,6 +1355,35 @@ int xhci_bus_suspend(struct usb_hcd *hcd)
        return 0;
 }
 
+/*
+ * Workaround for missing Cold Attach Status (CAS) if device re-plugged in S3.
+ * warm reset a USB3 device stuck in polling or compliance mode after resume.
+ * See Intel 100/c230 series PCH specification update Doc #332692-006 Errata #8
+ */
+static bool xhci_port_missing_cas_quirk(int port_index,
+                                            __le32 __iomem **port_array)
+{
+       u32 portsc;
+
+       portsc = readl(port_array[port_index]);
+
+       /* if any of these are set we are not stuck */
+       if (portsc & (PORT_CONNECT | PORT_CAS))
+               return false;
+
+       if (((portsc & PORT_PLS_MASK) != XDEV_POLLING) &&
+           ((portsc & PORT_PLS_MASK) != XDEV_COMP_MODE))
+               return false;
+
+       /* clear wakeup/change bits, and do a warm port reset */
+       portsc &= ~(PORT_RWC_BITS | PORT_CEC | PORT_WAKE_BITS);
+       portsc |= PORT_WR;
+       writel(portsc, port_array[port_index]);
+       /* flush write */
+       readl(port_array[port_index]);
+       return true;
+}
+
 int xhci_bus_resume(struct usb_hcd *hcd)
 {
        struct xhci_hcd *xhci = hcd_to_xhci(hcd);
@@ -1392,6 +1421,14 @@ int xhci_bus_resume(struct usb_hcd *hcd)
                u32 temp;
 
                temp = readl(port_array[port_index]);
+
+               /* warm reset CAS limited ports stuck in polling/compliance */
+               if ((xhci->quirks & XHCI_MISSING_CAS) &&
+                   (hcd->speed >= HCD_USB3) &&
+                   xhci_port_missing_cas_quirk(port_index, port_array)) {
+                       xhci_dbg(xhci, "reset stuck port %d\n", port_index);
+                       continue;
+               }
                if (DEV_SUPERSPEED_ANY(temp))
                        temp &= ~(PORT_RWC_BITS | PORT_CEC | PORT_WAKE_BITS);
                else
@@ -1410,7 +1447,7 @@ int xhci_bus_resume(struct usb_hcd *hcd)
 
        if (need_usb2_u3_exit) {
                spin_unlock_irqrestore(&xhci->lock, flags);
-               msleep(20);
+               msleep(USB_RESUME_TIMEOUT);
                spin_lock_irqsave(&xhci->lock, flags);
        }
 
index d7b0f97abbad608200cbfb5b59d0faacfa1b8b43..e96ae80d107e94fd8db8c33cae52ff9153f14730 100644 (file)
 
 #define PCI_DEVICE_ID_INTEL_LYNXPOINT_XHCI     0x8c31
 #define PCI_DEVICE_ID_INTEL_LYNXPOINT_LP_XHCI  0x9c31
+#define PCI_DEVICE_ID_INTEL_WILDCATPOINT_LP_XHCI       0x9cb1
 #define PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI            0x22b5
 #define PCI_DEVICE_ID_INTEL_SUNRISEPOINT_H_XHCI                0xa12f
 #define PCI_DEVICE_ID_INTEL_SUNRISEPOINT_LP_XHCI       0x9d2f
 #define PCI_DEVICE_ID_INTEL_BROXTON_M_XHCI             0x0aa8
 #define PCI_DEVICE_ID_INTEL_BROXTON_B_XHCI             0x1aa8
+#define PCI_DEVICE_ID_INTEL_APL_XHCI                   0x5aa8
 
 static const char hcd_name[] = "xhci_hcd";
 
@@ -153,7 +155,8 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
                xhci->quirks |= XHCI_SPURIOUS_REBOOT;
        }
        if (pdev->vendor == PCI_VENDOR_ID_INTEL &&
-               pdev->device == PCI_DEVICE_ID_INTEL_LYNXPOINT_LP_XHCI) {
+               (pdev->device == PCI_DEVICE_ID_INTEL_LYNXPOINT_LP_XHCI ||
+                pdev->device == PCI_DEVICE_ID_INTEL_WILDCATPOINT_LP_XHCI)) {
                xhci->quirks |= XHCI_SPURIOUS_REBOOT;
                xhci->quirks |= XHCI_SPURIOUS_WAKEUP;
        }
@@ -169,6 +172,11 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
                 pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI) {
                xhci->quirks |= XHCI_SSIC_PORT_UNUSED;
        }
+       if (pdev->vendor == PCI_VENDOR_ID_INTEL &&
+           (pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI ||
+            pdev->device == PCI_DEVICE_ID_INTEL_APL_XHCI))
+               xhci->quirks |= XHCI_MISSING_CAS;
+
        if (pdev->vendor == PCI_VENDOR_ID_ETRON &&
                        pdev->device == PCI_DEVICE_ID_EJ168) {
                xhci->quirks |= XHCI_RESET_ON_RESUME;
index b2c1dc5dc0f30f17fa2f4354537c193958ab5f78..f945380035d07e2f7af2bb73da9d39bf94475991 100644 (file)
@@ -314,6 +314,8 @@ struct xhci_op_regs {
 #define XDEV_U2                (0x2 << 5)
 #define XDEV_U3                (0x3 << 5)
 #define XDEV_INACTIVE  (0x6 << 5)
+#define XDEV_POLLING   (0x7 << 5)
+#define XDEV_COMP_MODE  (0xa << 5)
 #define XDEV_RESUME    (0xf << 5)
 /* true: port has power (see HCC_PPC) */
 #define PORT_POWER     (1 << 9)
@@ -1653,6 +1655,7 @@ struct xhci_hcd {
 #define XHCI_MTK_HOST          (1 << 21)
 #define XHCI_SSIC_PORT_UNUSED  (1 << 22)
 #define XHCI_NO_64BIT_SUPPORT  (1 << 23)
+#define XHCI_MISSING_CAS       (1 << 24)
        unsigned int            num_active_eps;
        unsigned int            limit_active_eps;
        /* There are two roothubs to keep track of bus suspend info for */
index 210b7e43a6fd40bb6a626a9f300fc2bbe1eb0200..2440f88e07a35781433ba501fd60d8cf0b246431 100644 (file)
@@ -479,7 +479,8 @@ static int da8xx_probe(struct platform_device *pdev)
 
        glue->phy = devm_phy_get(&pdev->dev, "usb-phy");
        if (IS_ERR(glue->phy)) {
-               dev_err(&pdev->dev, "failed to get phy\n");
+               if (PTR_ERR(glue->phy) != -EPROBE_DEFER)
+                       dev_err(&pdev->dev, "failed to get phy\n");
                return PTR_ERR(glue->phy);
        }
 
index 27dadc0d9114bf14d6034906153d7f1df60be4a5..c3e172e15ec3d9d9ec7346b972b2702fed710c65 100644 (file)
@@ -986,7 +986,7 @@ b_host:
        }
 #endif
 
-       schedule_work(&musb->irq_work);
+       schedule_delayed_work(&musb->irq_work, 0);
 
        return handled;
 }
@@ -1855,14 +1855,23 @@ static void musb_pm_runtime_check_session(struct musb *musb)
                MUSB_DEVCTL_HR;
        switch (devctl & ~s) {
        case MUSB_QUIRK_B_INVALID_VBUS_91:
-               if (!musb->session && !musb->quirk_invalid_vbus) {
-                       musb->quirk_invalid_vbus = true;
+               if (musb->quirk_retries--) {
                        musb_dbg(musb,
-                                "First invalid vbus, assume no session");
+                                "Poll devctl on invalid vbus, assume no session");
+                       schedule_delayed_work(&musb->irq_work,
+                                             msecs_to_jiffies(1000));
+
                        return;
                }
-               break;
        case MUSB_QUIRK_A_DISCONNECT_19:
+               if (musb->quirk_retries--) {
+                       musb_dbg(musb,
+                                "Poll devctl on possible host mode disconnect");
+                       schedule_delayed_work(&musb->irq_work,
+                                             msecs_to_jiffies(1000));
+
+                       return;
+               }
                if (!musb->session)
                        break;
                musb_dbg(musb, "Allow PM on possible host mode disconnect");
@@ -1886,9 +1895,9 @@ static void musb_pm_runtime_check_session(struct musb *musb)
                if (error < 0)
                        dev_err(musb->controller, "Could not enable: %i\n",
                                error);
+               musb->quirk_retries = 3;
        } else {
                musb_dbg(musb, "Allow PM with no session: %02x", devctl);
-               musb->quirk_invalid_vbus = false;
                pm_runtime_mark_last_busy(musb->controller);
                pm_runtime_put_autosuspend(musb->controller);
        }
@@ -1899,7 +1908,7 @@ static void musb_pm_runtime_check_session(struct musb *musb)
 /* Only used to provide driver mode change events */
 static void musb_irq_work(struct work_struct *data)
 {
-       struct musb *musb = container_of(data, struct musb, irq_work);
+       struct musb *musb = container_of(data, struct musb, irq_work.work);
 
        musb_pm_runtime_check_session(musb);
 
@@ -1969,6 +1978,7 @@ static struct musb *allocate_instance(struct device *dev,
        INIT_LIST_HEAD(&musb->control);
        INIT_LIST_HEAD(&musb->in_bulk);
        INIT_LIST_HEAD(&musb->out_bulk);
+       INIT_LIST_HEAD(&musb->pending_list);
 
        musb->vbuserr_retry = VBUSERR_RETRY_COUNT;
        musb->a_wait_bcon = OTG_TIME_A_WAIT_BCON;
@@ -2018,6 +2028,84 @@ static void musb_free(struct musb *musb)
        musb_host_free(musb);
 }
 
+struct musb_pending_work {
+       int (*callback)(struct musb *musb, void *data);
+       void *data;
+       struct list_head node;
+};
+
+/*
+ * Called from musb_runtime_resume(), musb_resume(), and
+ * musb_queue_resume_work(). Callers must take musb->lock.
+ */
+static int musb_run_resume_work(struct musb *musb)
+{
+       struct musb_pending_work *w, *_w;
+       unsigned long flags;
+       int error = 0;
+
+       spin_lock_irqsave(&musb->list_lock, flags);
+       list_for_each_entry_safe(w, _w, &musb->pending_list, node) {
+               if (w->callback) {
+                       error = w->callback(musb, w->data);
+                       if (error < 0) {
+                               dev_err(musb->controller,
+                                       "resume callback %p failed: %i\n",
+                                       w->callback, error);
+                       }
+               }
+               list_del(&w->node);
+               devm_kfree(musb->controller, w);
+       }
+       spin_unlock_irqrestore(&musb->list_lock, flags);
+
+       return error;
+}
+
+/*
+ * Called to run work if device is active or else queue the work to happen
+ * on resume. Caller must take musb->lock and must hold an RPM reference.
+ *
+ * Note that we cowardly refuse queuing work after musb PM runtime
+ * resume is done calling musb_run_resume_work() and return -EINPROGRESS
+ * instead.
+ */
+int musb_queue_resume_work(struct musb *musb,
+                          int (*callback)(struct musb *musb, void *data),
+                          void *data)
+{
+       struct musb_pending_work *w;
+       unsigned long flags;
+       int error;
+
+       if (WARN_ON(!callback))
+               return -EINVAL;
+
+       if (pm_runtime_active(musb->controller))
+               return callback(musb, data);
+
+       w = devm_kzalloc(musb->controller, sizeof(*w), GFP_ATOMIC);
+       if (!w)
+               return -ENOMEM;
+
+       w->callback = callback;
+       w->data = data;
+       spin_lock_irqsave(&musb->list_lock, flags);
+       if (musb->is_runtime_suspended) {
+               list_add_tail(&w->node, &musb->pending_list);
+               error = 0;
+       } else {
+               dev_err(musb->controller, "could not add resume work %p\n",
+                       callback);
+               devm_kfree(musb->controller, w);
+               error = -EINPROGRESS;
+       }
+       spin_unlock_irqrestore(&musb->list_lock, flags);
+
+       return error;
+}
+EXPORT_SYMBOL_GPL(musb_queue_resume_work);
+
 static void musb_deassert_reset(struct work_struct *work)
 {
        struct musb *musb;
@@ -2065,6 +2153,7 @@ musb_init_controller(struct device *dev, int nIrq, void __iomem *ctrl)
        }
 
        spin_lock_init(&musb->lock);
+       spin_lock_init(&musb->list_lock);
        musb->board_set_power = plat->set_power;
        musb->min_power = plat->min_power;
        musb->ops = plat->platform_ops;
@@ -2114,11 +2203,6 @@ musb_init_controller(struct device *dev, int nIrq, void __iomem *ctrl)
                musb->io.ep_offset = musb_flat_ep_offset;
                musb->io.ep_select = musb_flat_ep_select;
        }
-       /* And override them with platform specific ops if specified. */
-       if (musb->ops->ep_offset)
-               musb->io.ep_offset = musb->ops->ep_offset;
-       if (musb->ops->ep_select)
-               musb->io.ep_select = musb->ops->ep_select;
 
        /* At least tusb6010 has its own offsets */
        if (musb->ops->ep_offset)
@@ -2213,7 +2297,7 @@ musb_init_controller(struct device *dev, int nIrq, void __iomem *ctrl)
        musb_generic_disable(musb);
 
        /* Init IRQ workqueue before request_irq */
-       INIT_WORK(&musb->irq_work, musb_irq_work);
+       INIT_DELAYED_WORK(&musb->irq_work, musb_irq_work);
        INIT_DELAYED_WORK(&musb->deassert_reset_work, musb_deassert_reset);
        INIT_DELAYED_WORK(&musb->finish_resume_work, musb_host_finish_resume);
 
@@ -2296,6 +2380,7 @@ musb_init_controller(struct device *dev, int nIrq, void __iomem *ctrl)
        if (status)
                goto fail5;
 
+       musb->is_initialized = 1;
        pm_runtime_mark_last_busy(musb->controller);
        pm_runtime_put_autosuspend(musb->controller);
 
@@ -2309,7 +2394,7 @@ fail4:
        musb_host_cleanup(musb);
 
 fail3:
-       cancel_work_sync(&musb->irq_work);
+       cancel_delayed_work_sync(&musb->irq_work);
        cancel_delayed_work_sync(&musb->finish_resume_work);
        cancel_delayed_work_sync(&musb->deassert_reset_work);
        if (musb->dma_controller)
@@ -2376,7 +2461,7 @@ static int musb_remove(struct platform_device *pdev)
         */
        musb_exit_debugfs(musb);
 
-       cancel_work_sync(&musb->irq_work);
+       cancel_delayed_work_sync(&musb->irq_work);
        cancel_delayed_work_sync(&musb->finish_resume_work);
        cancel_delayed_work_sync(&musb->deassert_reset_work);
        pm_runtime_get_sync(musb->controller);
@@ -2562,6 +2647,7 @@ static int musb_suspend(struct device *dev)
 
        musb_platform_disable(musb);
        musb_generic_disable(musb);
+       WARN_ON(!list_empty(&musb->pending_list));
 
        spin_lock_irqsave(&musb->lock, flags);
 
@@ -2583,9 +2669,11 @@ static int musb_suspend(struct device *dev)
 
 static int musb_resume(struct device *dev)
 {
-       struct musb     *musb = dev_to_musb(dev);
-       u8              devctl;
-       u8              mask;
+       struct musb *musb = dev_to_musb(dev);
+       unsigned long flags;
+       int error;
+       u8 devctl;
+       u8 mask;
 
        /*
         * For static cmos like DaVinci, register values were preserved
@@ -2619,6 +2707,13 @@ static int musb_resume(struct device *dev)
 
        musb_start(musb);
 
+       spin_lock_irqsave(&musb->lock, flags);
+       error = musb_run_resume_work(musb);
+       if (error)
+               dev_err(musb->controller, "resume work failed with %i\n",
+                       error);
+       spin_unlock_irqrestore(&musb->lock, flags);
+
        return 0;
 }
 
@@ -2627,14 +2722,16 @@ static int musb_runtime_suspend(struct device *dev)
        struct musb     *musb = dev_to_musb(dev);
 
        musb_save_context(musb);
+       musb->is_runtime_suspended = 1;
 
        return 0;
 }
 
 static int musb_runtime_resume(struct device *dev)
 {
-       struct musb     *musb = dev_to_musb(dev);
-       static int      first = 1;
+       struct musb *musb = dev_to_musb(dev);
+       unsigned long flags;
+       int error;
 
        /*
         * When pm_runtime_get_sync called for the first time in driver
@@ -2645,9 +2742,10 @@ static int musb_runtime_resume(struct device *dev)
         * Also context restore without save does not make
         * any sense
         */
-       if (!first)
-               musb_restore_context(musb);
-       first = 0;
+       if (!musb->is_initialized)
+               return 0;
+
+       musb_restore_context(musb);
 
        if (musb->need_finish_resume) {
                musb->need_finish_resume = 0;
@@ -2655,6 +2753,14 @@ static int musb_runtime_resume(struct device *dev)
                                msecs_to_jiffies(USB_RESUME_TIMEOUT));
        }
 
+       spin_lock_irqsave(&musb->lock, flags);
+       error = musb_run_resume_work(musb);
+       if (error)
+               dev_err(musb->controller, "resume work failed with %i\n",
+                       error);
+       musb->is_runtime_suspended = 0;
+       spin_unlock_irqrestore(&musb->lock, flags);
+
        return 0;
 }
 
index 2cb88a498f8a5681265654e079456e0ae944667e..91817d77d59c8ecd226e25fb6ce9e07b5a597d3f 100644 (file)
@@ -303,13 +303,14 @@ struct musb_context_registers {
 struct musb {
        /* device lock */
        spinlock_t              lock;
+       spinlock_t              list_lock;      /* resume work list lock */
 
        struct musb_io          io;
        const struct musb_platform_ops *ops;
        struct musb_context_registers context;
 
        irqreturn_t             (*isr)(int, void *);
-       struct work_struct      irq_work;
+       struct delayed_work     irq_work;
        struct delayed_work     deassert_reset_work;
        struct delayed_work     finish_resume_work;
        struct delayed_work     gadget_work;
@@ -337,6 +338,7 @@ struct musb {
        struct list_head        control;        /* of musb_qh */
        struct list_head        in_bulk;        /* of musb_qh */
        struct list_head        out_bulk;       /* of musb_qh */
+       struct list_head        pending_list;   /* pending work list */
 
        struct timer_list       otg_timer;
        struct notifier_block   nb;
@@ -379,12 +381,15 @@ struct musb {
 
        int                     port_mode;      /* MUSB_PORT_MODE_* */
        bool                    session;
-       bool                    quirk_invalid_vbus;
+       unsigned long           quirk_retries;
        bool                    is_host;
 
        int                     a_wait_bcon;    /* VBUS timeout in msecs */
        unsigned long           idle_timeout;   /* Next timeout in jiffies */
 
+       unsigned                is_initialized:1;
+       unsigned                is_runtime_suspended:1;
+
        /* active means connected and not suspended */
        unsigned                is_active:1;
 
@@ -540,6 +545,10 @@ extern irqreturn_t musb_interrupt(struct musb *);
 
 extern void musb_hnp_stop(struct musb *musb);
 
+int musb_queue_resume_work(struct musb *musb,
+                          int (*callback)(struct musb *musb, void *data),
+                          void *data);
+
 static inline void musb_platform_set_vbus(struct musb *musb, int is_on)
 {
        if (musb->ops->set_vbus)
index 0f17d2140db6e5c36eceef43b185d2c7c183e091..feae1561b9abb6924d2fe2fc6f1222dfac9d2be7 100644 (file)
@@ -185,24 +185,19 @@ static void dsps_musb_disable(struct musb *musb)
        musb_writel(reg_base, wrp->coreintr_clear, wrp->usb_bitmap);
        musb_writel(reg_base, wrp->epintr_clear,
                         wrp->txep_bitmap | wrp->rxep_bitmap);
+       del_timer_sync(&glue->timer);
        musb_writeb(musb->mregs, MUSB_DEVCTL, 0);
 }
 
-static void otg_timer(unsigned long _musb)
+/* Caller must take musb->lock */
+static int dsps_check_status(struct musb *musb, void *unused)
 {
-       struct musb *musb = (void *)_musb;
        void __iomem *mregs = musb->mregs;
        struct device *dev = musb->controller;
        struct dsps_glue *glue = dev_get_drvdata(dev->parent);
        const struct dsps_musb_wrapper *wrp = glue->wrp;
        u8 devctl;
-       unsigned long flags;
        int skip_session = 0;
-       int err;
-
-       err = pm_runtime_get_sync(dev);
-       if (err < 0)
-               dev_err(dev, "Poll could not pm_runtime_get: %i\n", err);
 
        /*
         * We poll because DSPS IP's won't expose several OTG-critical
@@ -212,7 +207,6 @@ static void otg_timer(unsigned long _musb)
        dev_dbg(musb->controller, "Poll devctl %02x (%s)\n", devctl,
                                usb_otg_state_string(musb->xceiv->otg->state));
 
-       spin_lock_irqsave(&musb->lock, flags);
        switch (musb->xceiv->otg->state) {
        case OTG_STATE_A_WAIT_VRISE:
                mod_timer(&glue->timer, jiffies +
@@ -245,8 +239,30 @@ static void otg_timer(unsigned long _musb)
        default:
                break;
        }
-       spin_unlock_irqrestore(&musb->lock, flags);
 
+       return 0;
+}
+
+static void otg_timer(unsigned long _musb)
+{
+       struct musb *musb = (void *)_musb;
+       struct device *dev = musb->controller;
+       unsigned long flags;
+       int err;
+
+       err = pm_runtime_get(dev);
+       if ((err != -EINPROGRESS) && err < 0) {
+               dev_err(dev, "Poll could not pm_runtime_get: %i\n", err);
+               pm_runtime_put_noidle(dev);
+
+               return;
+       }
+
+       spin_lock_irqsave(&musb->lock, flags);
+       err = musb_queue_resume_work(musb, dsps_check_status, NULL);
+       if (err < 0)
+               dev_err(dev, "%s resume work: %i\n", __func__, err);
+       spin_unlock_irqrestore(&musb->lock, flags);
        pm_runtime_mark_last_busy(dev);
        pm_runtime_put_autosuspend(dev);
 }
@@ -767,28 +783,13 @@ static int dsps_probe(struct platform_device *pdev)
 
        platform_set_drvdata(pdev, glue);
        pm_runtime_enable(&pdev->dev);
-       pm_runtime_use_autosuspend(&pdev->dev);
-       pm_runtime_set_autosuspend_delay(&pdev->dev, 200);
-
-       ret = pm_runtime_get_sync(&pdev->dev);
-       if (ret < 0) {
-               dev_err(&pdev->dev, "pm_runtime_get_sync FAILED");
-               goto err2;
-       }
-
        ret = dsps_create_musb_pdev(glue, pdev);
        if (ret)
-               goto err3;
-
-       pm_runtime_mark_last_busy(&pdev->dev);
-       pm_runtime_put_autosuspend(&pdev->dev);
+               goto err;
 
        return 0;
 
-err3:
-       pm_runtime_put_sync(&pdev->dev);
-err2:
-       pm_runtime_dont_use_autosuspend(&pdev->dev);
+err:
        pm_runtime_disable(&pdev->dev);
        return ret;
 }
@@ -799,9 +800,6 @@ static int dsps_remove(struct platform_device *pdev)
 
        platform_device_unregister(glue->musb);
 
-       /* disable usbss clocks */
-       pm_runtime_dont_use_autosuspend(&pdev->dev);
-       pm_runtime_put_sync(&pdev->dev);
        pm_runtime_disable(&pdev->dev);
 
        return 0;
index bff4869a57cd193072215662e984e367eb551cdb..a55173c9e5645d6f5245c0ff78c4ccf151d75107 100644 (file)
@@ -1114,7 +1114,7 @@ static int musb_gadget_enable(struct usb_ep *ep,
                        musb_ep->dma ? "dma, " : "",
                        musb_ep->packet_sz);
 
-       schedule_work(&musb->irq_work);
+       schedule_delayed_work(&musb->irq_work, 0);
 
 fail:
        spin_unlock_irqrestore(&musb->lock, flags);
@@ -1158,7 +1158,7 @@ static int musb_gadget_disable(struct usb_ep *ep)
        musb_ep->desc = NULL;
        musb_ep->end_point.desc = NULL;
 
-       schedule_work(&musb->irq_work);
+       schedule_delayed_work(&musb->irq_work, 0);
 
        spin_unlock_irqrestore(&(musb->lock), flags);
 
@@ -1222,13 +1222,22 @@ void musb_ep_restart(struct musb *musb, struct musb_request *req)
                rxstate(musb, req);
 }
 
+static int musb_ep_restart_resume_work(struct musb *musb, void *data)
+{
+       struct musb_request *req = data;
+
+       musb_ep_restart(musb, req);
+
+       return 0;
+}
+
 static int musb_gadget_queue(struct usb_ep *ep, struct usb_request *req,
                        gfp_t gfp_flags)
 {
        struct musb_ep          *musb_ep;
        struct musb_request     *request;
        struct musb             *musb;
-       int                     status = 0;
+       int                     status;
        unsigned long           lockflags;
 
        if (!ep || !req)
@@ -1245,6 +1254,17 @@ static int musb_gadget_queue(struct usb_ep *ep, struct usb_request *req,
        if (request->ep != musb_ep)
                return -EINVAL;
 
+       status = pm_runtime_get(musb->controller);
+       if ((status != -EINPROGRESS) && status < 0) {
+               dev_err(musb->controller,
+                       "pm runtime get failed in %s\n",
+                       __func__);
+               pm_runtime_put_noidle(musb->controller);
+
+               return status;
+       }
+       status = 0;
+
        trace_musb_req_enq(request);
 
        /* request is mine now... */
@@ -1270,11 +1290,20 @@ static int musb_gadget_queue(struct usb_ep *ep, struct usb_request *req,
        list_add_tail(&request->list, &musb_ep->req_list);
 
        /* it this is the head of the queue, start i/o ... */
-       if (!musb_ep->busy && &request->list == musb_ep->req_list.next)
-               musb_ep_restart(musb, request);
+       if (!musb_ep->busy && &request->list == musb_ep->req_list.next) {
+               status = musb_queue_resume_work(musb,
+                                               musb_ep_restart_resume_work,
+                                               request);
+               if (status < 0)
+                       dev_err(musb->controller, "%s resume work: %i\n",
+                               __func__, status);
+       }
 
 unlock:
        spin_unlock_irqrestore(&musb->lock, lockflags);
+       pm_runtime_mark_last_busy(musb->controller);
+       pm_runtime_put_autosuspend(musb->controller);
+
        return status;
 }
 
@@ -1965,7 +1994,7 @@ static int musb_gadget_stop(struct usb_gadget *g)
         */
 
        /* Force check of devctl register for PM runtime */
-       schedule_work(&musb->irq_work);
+       schedule_delayed_work(&musb->irq_work, 0);
 
        pm_runtime_mark_last_busy(musb->controller);
        pm_runtime_put_autosuspend(musb->controller);
index 1ab6973d4f6197083e766f70db455c4c7c8bd8e2..e8be8e39ab8fbd2d5b03a6fdc98dc3d78b00512e 100644 (file)
@@ -287,6 +287,7 @@ static int omap2430_musb_init(struct musb *musb)
        }
        musb->isr = omap2430_musb_interrupt;
        phy_init(musb->phy);
+       phy_power_on(musb->phy);
 
        l = musb_readl(musb->mregs, OTG_INTERFSEL);
 
@@ -323,8 +324,6 @@ static void omap2430_musb_enable(struct musb *musb)
        struct musb_hdrc_platform_data *pdata = dev_get_platdata(dev);
        struct omap_musb_board_data *data = pdata->board_data;
 
-       if (!WARN_ON(!musb->phy))
-               phy_power_on(musb->phy);
 
        switch (glue->status) {
 
@@ -361,9 +360,6 @@ static void omap2430_musb_disable(struct musb *musb)
        struct device *dev = musb->controller;
        struct omap2430_glue *glue = dev_get_drvdata(dev->parent);
 
-       if (!WARN_ON(!musb->phy))
-               phy_power_off(musb->phy);
-
        if (glue->status != MUSB_UNKNOWN)
                omap_control_usb_set_mode(glue->control_otghs,
                        USB_MODE_DISCONNECT);
@@ -375,6 +371,7 @@ static int omap2430_musb_exit(struct musb *musb)
        struct omap2430_glue *glue = dev_get_drvdata(dev->parent);
 
        omap2430_low_level_exit(musb);
+       phy_power_off(musb->phy);
        phy_exit(musb->phy);
        musb->phy = NULL;
        cancel_work_sync(&glue->omap_musb_mailbox_work);
@@ -516,17 +513,18 @@ static int omap2430_probe(struct platform_device *pdev)
        }
 
        pm_runtime_enable(glue->dev);
-       pm_runtime_use_autosuspend(glue->dev);
-       pm_runtime_set_autosuspend_delay(glue->dev, 100);
 
        ret = platform_device_add(musb);
        if (ret) {
                dev_err(&pdev->dev, "failed to register musb device\n");
-               goto err2;
+               goto err3;
        }
 
        return 0;
 
+err3:
+       pm_runtime_disable(glue->dev);
+
 err2:
        platform_device_put(musb);
 
@@ -538,10 +536,7 @@ static int omap2430_remove(struct platform_device *pdev)
 {
        struct omap2430_glue *glue = platform_get_drvdata(pdev);
 
-       pm_runtime_get_sync(glue->dev);
        platform_device_unregister(glue->musb);
-       pm_runtime_put_sync(glue->dev);
-       pm_runtime_dont_use_autosuspend(glue->dev);
        pm_runtime_disable(glue->dev);
 
        return 0;
index df7c9f46be548f61800b7beaa7f182b5dc447ad5..e85cc8e4e7a9c02e32fdef579d04d8adb22469df 100644 (file)
@@ -724,7 +724,7 @@ tusb_otg_ints(struct musb *musb, u32 int_src, void __iomem *tbase)
                        dev_dbg(musb->controller, "vbus change, %s, otg %03x\n",
                                usb_otg_state_string(musb->xceiv->otg->state), otg_stat);
                        idle_timeout = jiffies + (1 * HZ);
-                       schedule_work(&musb->irq_work);
+                       schedule_delayed_work(&musb->irq_work, 0);
 
                } else /* A-dev state machine */ {
                        dev_dbg(musb->controller, "vbus change, %s, otg %03x\n",
@@ -814,7 +814,7 @@ tusb_otg_ints(struct musb *musb, u32 int_src, void __iomem *tbase)
                        break;
                }
        }
-       schedule_work(&musb->irq_work);
+       schedule_delayed_work(&musb->irq_work, 0);
 
        return idle_timeout;
 }
@@ -864,7 +864,7 @@ static irqreturn_t tusb_musb_interrupt(int irq, void *__hci)
                musb_writel(tbase, TUSB_PRCM_WAKEUP_CLEAR, reg);
                if (reg & ~TUSB_PRCM_WNORCS) {
                        musb->is_active = 1;
-                       schedule_work(&musb->irq_work);
+                       schedule_delayed_work(&musb->irq_work, 0);
                }
                dev_dbg(musb->controller, "wake %sactive %02x\n",
                                musb->is_active ? "" : "in", reg);
index 1d70add926f0ff632964ca92fc92a3f2f035fb7b..d544b331c9f2ce80d83095f30184e2102eda6ea6 100644 (file)
@@ -9,6 +9,7 @@
  *
  */
 
+#include <linux/delay.h>
 #include <linux/io.h>
 #include "common.h"
 #include "rcar3.h"
@@ -35,10 +36,13 @@ static int usbhs_rcar3_power_ctrl(struct platform_device *pdev,
 
        usbhs_write32(priv, UGCTRL2, UGCTRL2_RESERVED_3 | UGCTRL2_USB0SEL_OTG);
 
-       if (enable)
+       if (enable) {
                usbhs_bset(priv, LPSTS, LPSTS_SUSPM, LPSTS_SUSPM);
-       else
+               /* The controller on R-Car Gen3 needs to wait up to 45 usec */
+               udelay(45);
+       } else {
                usbhs_bset(priv, LPSTS, LPSTS_SUSPM, 0);
+       }
 
        return 0;
 }
index 54a4de0efdbaa48fa7e1a49672e114657737b42d..243ac5ebe46a02560d5cbe3ca8fd97fe4e5eb721 100644 (file)
@@ -131,6 +131,7 @@ static const struct usb_device_id id_table[] = {
        { USB_DEVICE(0x10C4, 0x88A4) }, /* MMB Networks ZigBee USB Device */
        { USB_DEVICE(0x10C4, 0x88A5) }, /* Planet Innovation Ingeni ZigBee USB Device */
        { USB_DEVICE(0x10C4, 0x8946) }, /* Ketra N1 Wireless Interface */
+       { USB_DEVICE(0x10C4, 0x8962) }, /* Brim Brothers charging dock */
        { USB_DEVICE(0x10C4, 0x8977) }, /* CEL MeshWorks DevKit Device */
        { USB_DEVICE(0x10C4, 0x8998) }, /* KCF Technologies PRN */
        { USB_DEVICE(0x10C4, 0x8A2A) }, /* HubZ dual ZigBee and Z-Wave dongle */
@@ -1077,7 +1078,9 @@ static int cp210x_tiocmget(struct tty_struct *tty)
        u8 control;
        int result;
 
-       cp210x_read_u8_reg(port, CP210X_GET_MDMSTS, &control);
+       result = cp210x_read_u8_reg(port, CP210X_GET_MDMSTS, &control);
+       if (result)
+               return result;
 
        result = ((control & CONTROL_DTR) ? TIOCM_DTR : 0)
                |((control & CONTROL_RTS) ? TIOCM_RTS : 0)
index b2d767e743fc2258c8b13e84401e5f34b60efcec..6e9fc8bcc285d122c2148cefcb7c8f57ae6c4aa2 100644 (file)
@@ -986,7 +986,8 @@ static const struct usb_device_id id_table_combined[] = {
        /* ekey Devices */
        { USB_DEVICE(FTDI_VID, FTDI_EKEY_CONV_USB_PID) },
        /* Infineon Devices */
-       { USB_DEVICE_INTERFACE_NUMBER(INFINEON_VID, INFINEON_TRIBOARD_PID, 1) },
+       { USB_DEVICE_INTERFACE_NUMBER(INFINEON_VID, INFINEON_TRIBOARD_TC1798_PID, 1) },
+       { USB_DEVICE_INTERFACE_NUMBER(INFINEON_VID, INFINEON_TRIBOARD_TC2X7_PID, 1) },
        /* GE Healthcare devices */
        { USB_DEVICE(GE_HEALTHCARE_VID, GE_HEALTHCARE_NEMO_TRACKER_PID) },
        /* Active Research (Actisense) devices */
@@ -1011,6 +1012,8 @@ static const struct usb_device_id id_table_combined[] = {
        { USB_DEVICE(ICPDAS_VID, ICPDAS_I7561U_PID) },
        { USB_DEVICE(ICPDAS_VID, ICPDAS_I7563U_PID) },
        { USB_DEVICE(WICED_VID, WICED_USB20706V2_PID) },
+       { USB_DEVICE(TI_VID, TI_CC3200_LAUNCHPAD_PID),
+               .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
        { }                                     /* Terminating entry */
 };
 
index f87a938cf00571eb69edbd8d625f58041384d5fa..48ee04c94a7541ad6c5f9023be107246207c56fd 100644 (file)
 #define ATMEL_VID              0x03eb /* Vendor ID */
 #define STK541_PID             0x2109 /* Zigbee Controller */
 
+/*
+ * Texas Instruments
+ */
+#define TI_VID                 0x0451
+#define TI_CC3200_LAUNCHPAD_PID        0xC32A /* SimpleLink Wi-Fi CC3200 LaunchPad */
+
 /*
  * Blackfin gnICE JTAG
  * http://docs.blackfin.uclinux.org/doku.php?id=hw:jtag:gnice
 /*
  * Infineon Technologies
  */
-#define INFINEON_VID           0x058b
-#define INFINEON_TRIBOARD_PID  0x0028 /* DAS JTAG TriBoard TC1798 V1.0 */
+#define INFINEON_VID                   0x058b
+#define INFINEON_TRIBOARD_TC1798_PID   0x0028 /* DAS JTAG TriBoard TC1798 V1.0 */
+#define INFINEON_TRIBOARD_TC2X7_PID    0x0043 /* DAS JTAG TriBoard TC2X7 V1.0 */
 
 /*
  * Acton Research Corp.
index d213cf44a7e45ef8ae692bf4a9e63d31088cae8b..4a037b4a79cf3168cb45d60d1b6488ac21681570 100644 (file)
@@ -1078,7 +1078,8 @@ static int usb_serial_probe(struct usb_interface *interface,
 
        serial->disconnected = 0;
 
-       usb_serial_console_init(serial->port[0]->minor);
+       if (num_ports > 0)
+               usb_serial_console_init(serial->port[0]->minor);
 exit:
        module_put(type->driver.owner);
        return 0;
index ffd086733421316bed0d3c0cf56aa20134fb54f5..1a59f335b063e7e79f8ece2173f2f03574ce3a5b 100644 (file)
@@ -954,10 +954,15 @@ int usb_stor_CB_transport(struct scsi_cmnd *srb, struct us_data *us)
 
        /* COMMAND STAGE */
        /* let's send the command via the control pipe */
+       /*
+        * Command is sometime (f.e. after scsi_eh_prep_cmnd) on the stack.
+        * Stack may be vmallocated.  So no DMA for us.  Make a copy.
+        */
+       memcpy(us->iobuf, srb->cmnd, srb->cmd_len);
        result = usb_stor_ctrl_transfer(us, us->send_ctrl_pipe,
                                      US_CBI_ADSC, 
                                      USB_TYPE_CLASS | USB_RECIP_INTERFACE, 0, 
-                                     us->ifnum, srb->cmnd, srb->cmd_len);
+                                     us->ifnum, us->iobuf, srb->cmd_len);
 
        /* check the return code for the command */
        usb_stor_dbg(us, "Call to usb_stor_ctrl_transfer() returned %d\n",
index 79b2b628066d81c5e5c6ceda10f707587c55b1c8..79451f7ef1b76301cc881379ad28ce12e4b4dc33 100644 (file)
@@ -133,6 +133,13 @@ static void bytewise_xor(void *_bo, const void *_bi1, const void *_bi2,
                bo[itr] = bi1[itr] ^ bi2[itr];
 }
 
+/* Scratch space for MAC calculations. */
+struct wusb_mac_scratch {
+       struct aes_ccm_b0 b0;
+       struct aes_ccm_b1 b1;
+       struct aes_ccm_a ax;
+};
+
 /*
  * CC-MAC function WUSB1.0[6.5]
  *
@@ -197,16 +204,15 @@ static void bytewise_xor(void *_bo, const void *_bi1, const void *_bi2,
  *       what sg[4] is for. Maybe there is a smarter way to do this.
  */
 static int wusb_ccm_mac(struct crypto_skcipher *tfm_cbc,
-                       struct crypto_cipher *tfm_aes, void *mic,
+                       struct crypto_cipher *tfm_aes,
+                       struct wusb_mac_scratch *scratch,
+                       void *mic,
                        const struct aes_ccm_nonce *n,
                        const struct aes_ccm_label *a, const void *b,
                        size_t blen)
 {
        int result = 0;
        SKCIPHER_REQUEST_ON_STACK(req, tfm_cbc);
-       struct aes_ccm_b0 b0;
-       struct aes_ccm_b1 b1;
-       struct aes_ccm_a ax;
        struct scatterlist sg[4], sg_dst;
        void *dst_buf;
        size_t dst_size;
@@ -218,16 +224,17 @@ static int wusb_ccm_mac(struct crypto_skcipher *tfm_cbc,
         * These checks should be compile time optimized out
         * ensure @a fills b1's mac_header and following fields
         */
-       WARN_ON(sizeof(*a) != sizeof(b1) - sizeof(b1.la));
-       WARN_ON(sizeof(b0) != sizeof(struct aes_ccm_block));
-       WARN_ON(sizeof(b1) != sizeof(struct aes_ccm_block));
-       WARN_ON(sizeof(ax) != sizeof(struct aes_ccm_block));
+       WARN_ON(sizeof(*a) != sizeof(scratch->b1) - sizeof(scratch->b1.la));
+       WARN_ON(sizeof(scratch->b0) != sizeof(struct aes_ccm_block));
+       WARN_ON(sizeof(scratch->b1) != sizeof(struct aes_ccm_block));
+       WARN_ON(sizeof(scratch->ax) != sizeof(struct aes_ccm_block));
 
        result = -ENOMEM;
        zero_padding = blen % sizeof(struct aes_ccm_block);
        if (zero_padding)
                zero_padding = sizeof(struct aes_ccm_block) - zero_padding;
-       dst_size = blen + sizeof(b0) + sizeof(b1) + zero_padding;
+       dst_size = blen + sizeof(scratch->b0) + sizeof(scratch->b1) +
+               zero_padding;
        dst_buf = kzalloc(dst_size, GFP_KERNEL);
        if (!dst_buf)
                goto error_dst_buf;
@@ -235,9 +242,9 @@ static int wusb_ccm_mac(struct crypto_skcipher *tfm_cbc,
        memset(iv, 0, sizeof(iv));
 
        /* Setup B0 */
-       b0.flags = 0x59;        /* Format B0 */
-       b0.ccm_nonce = *n;
-       b0.lm = cpu_to_be16(0); /* WUSB1.0[6.5] sez l(m) is 0 */
+       scratch->b0.flags = 0x59;       /* Format B0 */
+       scratch->b0.ccm_nonce = *n;
+       scratch->b0.lm = cpu_to_be16(0);        /* WUSB1.0[6.5] sez l(m) is 0 */
 
        /* Setup B1
         *
@@ -246,12 +253,12 @@ static int wusb_ccm_mac(struct crypto_skcipher *tfm_cbc,
         * 14'--after clarification, it means to use A's contents
         * for MAC Header, EO, sec reserved and padding.
         */
-       b1.la = cpu_to_be16(blen + 14);
-       memcpy(&b1.mac_header, a, sizeof(*a));
+       scratch->b1.la = cpu_to_be16(blen + 14);
+       memcpy(&scratch->b1.mac_header, a, sizeof(*a));
 
        sg_init_table(sg, ARRAY_SIZE(sg));
-       sg_set_buf(&sg[0], &b0, sizeof(b0));
-       sg_set_buf(&sg[1], &b1, sizeof(b1));
+       sg_set_buf(&sg[0], &scratch->b0, sizeof(scratch->b0));
+       sg_set_buf(&sg[1], &scratch->b1, sizeof(scratch->b1));
        sg_set_buf(&sg[2], b, blen);
        /* 0 if well behaved :) */
        sg_set_buf(&sg[3], bzero, zero_padding);
@@ -276,11 +283,12 @@ static int wusb_ccm_mac(struct crypto_skcipher *tfm_cbc,
         * POS Crypto API: size is assumed to be AES's block size.
         * Thanks for documenting it -- tip taken from airo.c
         */
-       ax.flags = 0x01;                /* as per WUSB 1.0 spec */
-       ax.ccm_nonce = *n;
-       ax.counter = 0;
-       crypto_cipher_encrypt_one(tfm_aes, (void *)&ax, (void *)&ax);
-       bytewise_xor(mic, &ax, iv, 8);
+       scratch->ax.flags = 0x01;               /* as per WUSB 1.0 spec */
+       scratch->ax.ccm_nonce = *n;
+       scratch->ax.counter = 0;
+       crypto_cipher_encrypt_one(tfm_aes, (void *)&scratch->ax,
+                                 (void *)&scratch->ax);
+       bytewise_xor(mic, &scratch->ax, iv, 8);
        result = 8;
 error_cbc_crypt:
        kfree(dst_buf);
@@ -303,6 +311,7 @@ ssize_t wusb_prf(void *out, size_t out_size,
        struct aes_ccm_nonce n = *_n;
        struct crypto_skcipher *tfm_cbc;
        struct crypto_cipher *tfm_aes;
+       struct wusb_mac_scratch *scratch;
        u64 sfn = 0;
        __le64 sfn_le;
 
@@ -329,17 +338,25 @@ ssize_t wusb_prf(void *out, size_t out_size,
                printk(KERN_ERR "E: can't set AES key: %d\n", (int)result);
                goto error_setkey_aes;
        }
+       scratch = kmalloc(sizeof(*scratch), GFP_KERNEL);
+       if (!scratch) {
+               result = -ENOMEM;
+               goto error_alloc_scratch;
+       }
 
        for (bitr = 0; bitr < (len + 63) / 64; bitr++) {
                sfn_le = cpu_to_le64(sfn++);
                memcpy(&n.sfn, &sfn_le, sizeof(n.sfn)); /* n.sfn++... */
-               result = wusb_ccm_mac(tfm_cbc, tfm_aes, out + bytes,
+               result = wusb_ccm_mac(tfm_cbc, tfm_aes, scratch, out + bytes,
                                      &n, a, b, blen);
                if (result < 0)
                        goto error_ccm_mac;
                bytes += result;
        }
        result = bytes;
+
+       kfree(scratch);
+error_alloc_scratch:
 error_ccm_mac:
 error_setkey_aes:
        crypto_free_cipher(tfm_aes);
index d059ad4d0dbdc76f8fbbdbfc47dfd990d2006fc5..97ee1b46db698f03dee8ea50beb00e864bff0579 100644 (file)
@@ -56,8 +56,11 @@ static struct uwb_rc *uwb_rc_find_by_index(int index)
        struct uwb_rc *rc = NULL;
 
        dev = class_find_device(&uwb_rc_class, NULL, &index, uwb_rc_index_match);
-       if (dev)
+       if (dev) {
                rc = dev_get_drvdata(dev);
+               put_device(dev);
+       }
+
        return rc;
 }
 
@@ -467,7 +470,9 @@ struct uwb_rc *__uwb_rc_try_get(struct uwb_rc *target_rc)
        if (dev) {
                rc = dev_get_drvdata(dev);
                __uwb_rc_get(rc);
+               put_device(dev);
        }
+
        return rc;
 }
 EXPORT_SYMBOL_GPL(__uwb_rc_try_get);
@@ -520,8 +525,11 @@ struct uwb_rc *uwb_rc_get_by_grandpa(const struct device *grandpa_dev)
 
        dev = class_find_device(&uwb_rc_class, NULL, grandpa_dev,
                                find_rc_grandpa);
-       if (dev)
+       if (dev) {
                rc = dev_get_drvdata(dev);
+               put_device(dev);
+       }
+
        return rc;
 }
 EXPORT_SYMBOL_GPL(uwb_rc_get_by_grandpa);
@@ -553,8 +561,10 @@ struct uwb_rc *uwb_rc_get_by_dev(const struct uwb_dev_addr *addr)
        struct uwb_rc *rc = NULL;
 
        dev = class_find_device(&uwb_rc_class, NULL, addr, find_rc_dev);
-       if (dev)
+       if (dev) {
                rc = dev_get_drvdata(dev);
+               put_device(dev);
+       }
 
        return rc;
 }
index c1304b8d498530124c7ca30319c12cc823e84d6e..678e93741ae156bf5d2c41b7a52f7b215262210d 100644 (file)
@@ -97,6 +97,8 @@ static bool uwb_rc_class_device_exists(struct uwb_rc *target_rc)
 
        dev = class_find_device(&uwb_rc_class, NULL, target_rc, find_rc);
 
+       put_device(dev);
+
        return (dev != NULL);
 }
 
index d624a527777f6a12d5f63e273008908fbf0cacfc..031bc08d000d4a7d774f3793df7be5168712e161 100644 (file)
@@ -829,8 +829,9 @@ static long vfio_pci_ioctl(void *device_data,
 
        } else if (cmd == VFIO_DEVICE_SET_IRQS) {
                struct vfio_irq_set hdr;
+               size_t size;
                u8 *data = NULL;
-               int ret = 0;
+               int max, ret = 0;
 
                minsz = offsetofend(struct vfio_irq_set, count);
 
@@ -838,23 +839,31 @@ static long vfio_pci_ioctl(void *device_data,
                        return -EFAULT;
 
                if (hdr.argsz < minsz || hdr.index >= VFIO_PCI_NUM_IRQS ||
+                   hdr.count >= (U32_MAX - hdr.start) ||
                    hdr.flags & ~(VFIO_IRQ_SET_DATA_TYPE_MASK |
                                  VFIO_IRQ_SET_ACTION_TYPE_MASK))
                        return -EINVAL;
 
-               if (!(hdr.flags & VFIO_IRQ_SET_DATA_NONE)) {
-                       size_t size;
-                       int max = vfio_pci_get_irq_count(vdev, hdr.index);
+               max = vfio_pci_get_irq_count(vdev, hdr.index);
+               if (hdr.start >= max || hdr.start + hdr.count > max)
+                       return -EINVAL;
 
-                       if (hdr.flags & VFIO_IRQ_SET_DATA_BOOL)
-                               size = sizeof(uint8_t);
-                       else if (hdr.flags & VFIO_IRQ_SET_DATA_EVENTFD)
-                               size = sizeof(int32_t);
-                       else
-                               return -EINVAL;
+               switch (hdr.flags & VFIO_IRQ_SET_DATA_TYPE_MASK) {
+               case VFIO_IRQ_SET_DATA_NONE:
+                       size = 0;
+                       break;
+               case VFIO_IRQ_SET_DATA_BOOL:
+                       size = sizeof(uint8_t);
+                       break;
+               case VFIO_IRQ_SET_DATA_EVENTFD:
+                       size = sizeof(int32_t);
+                       break;
+               default:
+                       return -EINVAL;
+               }
 
-                       if (hdr.argsz - minsz < hdr.count * size ||
-                           hdr.start >= max || hdr.start + hdr.count > max)
+               if (size) {
+                       if (hdr.argsz - minsz < hdr.count * size)
                                return -EINVAL;
 
                        data = memdup_user((void __user *)(arg + minsz),
index c2e60893cd09a5772d608ffa5e3364c288f205ab..1c46045b0e7fc6b2e8ef421853742851aa880d7e 100644 (file)
@@ -256,7 +256,7 @@ static int vfio_msi_enable(struct vfio_pci_device *vdev, int nvec, bool msix)
        if (!is_irq_none(vdev))
                return -EINVAL;
 
-       vdev->ctx = kzalloc(nvec * sizeof(struct vfio_pci_irq_ctx), GFP_KERNEL);
+       vdev->ctx = kcalloc(nvec, sizeof(struct vfio_pci_irq_ctx), GFP_KERNEL);
        if (!vdev->ctx)
                return -ENOMEM;
 
index af2f117208f11a70102a2674e23da3de35c21811..5d3b0db5ce0af34997a3aa748292b6bd8d48c191 100644 (file)
@@ -2187,7 +2187,7 @@ config FB_GOLDFISH
 
 config FB_COBALT
        tristate "Cobalt server LCD frame buffer support"
-       depends on FB && (MIPS_COBALT || MIPS_SEAD3)
+       depends on FB && MIPS_COBALT
 
 config FB_SH7760
        bool "SH7760/SH7763/SH7720/SH7721 LCDC support"
index 19ad8645d93cd25a9f41375bcb30a1677ae721b7..e5d9bfc1703a5ff9fcaafe44fbb3c7e152fa16c7 100644 (file)
@@ -526,8 +526,8 @@ int versatile_clcd_init_panel(struct clcd_fb *fb,
        np = of_find_matching_node_and_match(NULL, versatile_clcd_of_match,
                                             &clcd_id);
        if (!np) {
-               dev_err(dev, "no Versatile syscon node\n");
-               return -ENODEV;
+               /* Vexpress does not have this */
+               return 0;
        }
        versatile_clcd_type = (enum versatile_clcd)clcd_id->data;
 
index 07675d6f323e774b4549ab7d4bb1e8a90dff0663..2d3b691f3fc4885414ae9234950d8e9c798e5fe6 100644 (file)
@@ -63,7 +63,6 @@
 #define LCD_CUR_POS(x)         ((x) & LCD_CUR_POS_MASK)
 #define LCD_TEXT_POS(x)                ((x) | LCD_TEXT_MODE)
 
-#ifdef CONFIG_MIPS_COBALT
 static inline void lcd_write_control(struct fb_info *info, u8 control)
 {
        writel((u32)control << 24, info->screen_base);
@@ -83,47 +82,6 @@ static inline u8 lcd_read_data(struct fb_info *info)
 {
        return readl(info->screen_base + LCD_DATA_REG_OFFSET) >> 24;
 }
-#else
-
-#define LCD_CTL                        0x00
-#define LCD_DATA               0x08
-#define CPLD_STATUS            0x10
-#define CPLD_DATA              0x18
-
-static inline void cpld_wait(struct fb_info *info)
-{
-       do {
-       } while (readl(info->screen_base + CPLD_STATUS) & 1);
-}
-
-static inline void lcd_write_control(struct fb_info *info, u8 control)
-{
-       cpld_wait(info);
-       writel(control, info->screen_base + LCD_CTL);
-}
-
-static inline u8 lcd_read_control(struct fb_info *info)
-{
-       cpld_wait(info);
-       readl(info->screen_base + LCD_CTL);
-       cpld_wait(info);
-       return readl(info->screen_base + CPLD_DATA) & 0xff;
-}
-
-static inline void lcd_write_data(struct fb_info *info, u8 data)
-{
-       cpld_wait(info);
-       writel(data, info->screen_base + LCD_DATA);
-}
-
-static inline u8 lcd_read_data(struct fb_info *info)
-{
-       cpld_wait(info);
-       readl(info->screen_base + LCD_DATA);
-       cpld_wait(info);
-       return readl(info->screen_base + CPLD_DATA) & 0xff;
-}
-#endif
 
 static int lcd_busy_wait(struct fb_info *info)
 {
index 3b1ca441107370d39c54bd4bffa24248eaa730c5..a2564ab91e62d3c2775441d292e54e1d25492a40 100644 (file)
@@ -686,8 +686,8 @@ static ssize_t pvr2fb_write(struct fb_info *info, const char *buf,
        if (!pages)
                return -ENOMEM;
 
-       ret = get_user_pages_unlocked((unsigned long)buf, nr_pages, WRITE,
-                       0, pages);
+       ret = get_user_pages_unlocked((unsigned long)buf, nr_pages, pages,
+                       FOLL_WRITE);
 
        if (ret < nr_pages) {
                nr_pages = ret;
index 60bdad3a689b8280b373164f7a13fdcb2a5a1cc2..150ce2abf6c8f193b4e3b5c9fa66001cd709f39e 100644 (file)
@@ -245,8 +245,8 @@ static long ioctl_memcpy(struct fsl_hv_ioctl_memcpy __user *p)
        /* Get the physical addresses of the source buffer */
        down_read(&current->mm->mmap_sem);
        num_pinned = get_user_pages(param.local_vaddr - lb_offset,
-               num_pages, (param.source == -1) ? READ : WRITE,
-               0, pages, NULL);
+               num_pages, (param.source == -1) ? 0 : FOLL_WRITE,
+               pages, NULL);
        up_read(&current->mm->mmap_sem);
 
        if (num_pinned != num_pages) {
diff --git a/drivers/virtio/config.c b/drivers/virtio/config.c
deleted file mode 100644 (file)
index f70bcd2..0000000
+++ /dev/null
@@ -1,12 +0,0 @@
-/* Configuration space parsing helpers for virtio.
- *
- * The configuration is [type][len][... len bytes ...] fields.
- *
- * Copyright 2007 Rusty Russell, IBM Corporation.
- * GPL v2 or later.
- */
-#include <linux/err.h>
-#include <linux/virtio.h>
-#include <linux/virtio_config.h>
-#include <linux/bug.h>
-
index 4e7003db12c4a4385034231d23cd1318006eca04..181793f078524ae8c06751d4b03677a132b4a7c3 100644 (file)
@@ -577,6 +577,8 @@ static int virtballoon_probe(struct virtio_device *vdev)
 
        virtio_device_ready(vdev);
 
+       if (towards_target(vb))
+               virtballoon_changed(vdev);
        return 0;
 
 out_del_vqs:
index 8c4e61783441b9f818c18b6dedd0b113b575ab77..6d9e5173d5fa6b7f4da58b48268dd48c7e8c1e1f 100644 (file)
@@ -212,10 +212,18 @@ int virtio_pci_legacy_probe(struct virtio_pci_device *vp_dev)
                return -ENODEV;
        }
 
-       rc = dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(64));
-       if (rc)
-               rc = dma_set_mask_and_coherent(&pci_dev->dev,
-                                               DMA_BIT_MASK(32));
+       rc = dma_set_mask(&pci_dev->dev, DMA_BIT_MASK(64));
+       if (rc) {
+               rc = dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(32));
+       } else {
+               /*
+                * The virtio ring base address is expressed as a 32-bit PFN,
+                * with a page size of 1 << VIRTIO_PCI_QUEUE_ADDR_SHIFT.
+                */
+               dma_set_coherent_mask(&pci_dev->dev,
+                               DMA_BIT_MASK(32 + VIRTIO_PCI_QUEUE_ADDR_SHIFT));
+       }
+
        if (rc)
                dev_warn(&pci_dev->dev, "Failed to enable 64-bit or 32-bit DMA.  Trying to continue, but this might not work.\n");
 
index ed9c9eeedfe5f83fd1b7b3ca475280643d8758f1..489bfc61cf30001626307f4e2f936d2e65cdeb73 100644 (file)
@@ -167,7 +167,7 @@ static bool vring_use_dma_api(struct virtio_device *vdev)
  * making all of the arch DMA ops work on the vring device itself
  * is a mess.  For now, we use the parent device for DMA ops.
  */
-static struct device *vring_dma_dev(const struct vring_virtqueue *vq)
+static inline struct device *vring_dma_dev(const struct vring_virtqueue *vq)
 {
        return vq->vq.vdev->dev.parent;
 }
@@ -732,7 +732,8 @@ void virtqueue_disable_cb(struct virtqueue *_vq)
 
        if (!(vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
                vq->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
-               vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow);
+               if (!vq->event)
+                       vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow);
        }
 
 }
@@ -764,7 +765,8 @@ unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq)
         * entry. Always do both to keep code simple. */
        if (vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
                vq->avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
-               vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow);
+               if (!vq->event)
+                       vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow);
        }
        vring_used_event(&vq->vring) = cpu_to_virtio16(_vq->vdev, last_used_idx = vq->last_used_idx);
        END_USE(vq);
@@ -832,10 +834,11 @@ bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
         * more to do. */
        /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
         * either clear the flags bit or point the event index at the next
-        * entry. Always do both to keep code simple. */
+        * entry. Always update the event index to keep code simple. */
        if (vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
                vq->avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
-               vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow);
+               if (!vq->event)
+                       vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow);
        }
        /* TODO: tune this threshold */
        bufs = (u16)(vq->avail_idx_shadow - vq->last_used_idx) * 3 / 4;
@@ -953,7 +956,8 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
        /* No callback?  Tell other side not to bother us. */
        if (!callback) {
                vq->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
-               vq->vring.avail->flags = cpu_to_virtio16(vdev, vq->avail_flags_shadow);
+               if (!vq->event)
+                       vq->vring.avail->flags = cpu_to_virtio16(vdev, vq->avail_flags_shadow);
        }
 
        /* Put everything in free lists. */
index 15b64076bc26257abffd02a60de7b2fe4b22c4e0..bdbadaa47ef3ecb481b200785d4bc1aa9981f6e4 100644 (file)
@@ -156,12 +156,16 @@ size_t vme_get_size(struct vme_resource *resource)
        case VME_MASTER:
                retval = vme_master_get(resource, &enabled, &base, &size,
                        &aspace, &cycle, &dwidth);
+               if (retval)
+                       return 0;
 
                return size;
                break;
        case VME_SLAVE:
                retval = vme_slave_get(resource, &enabled, &base, &size,
                        &buf_base, &aspace, &cycle);
+               if (retval)
+                       return 0;
 
                return size;
                break;
index fdd3228e06781c4dd150091673e51ba3bdb9ce71..3eb58cb51e5648f87f5d732e33cde8c6cd49569d 100644 (file)
@@ -155,6 +155,7 @@ config TANGOX_WATCHDOG
 config WDAT_WDT
        tristate "ACPI Watchdog Action Table (WDAT)"
        depends on ACPI
+       select WATCHDOG_CORE
        select ACPI_WATCHDOG
        help
          This driver adds support for systems with ACPI Watchdog Action
index e473e3b237203fcc7440128e804e3bbb141fc0b2..6d1fbda0f461ca2d2304eaeb43aef35adfd77cf4 100644 (file)
@@ -499,6 +499,10 @@ static int wdat_wdt_resume_noirq(struct device *dev)
                ret = wdat_wdt_enable_reboot(wdat);
                if (ret)
                        return ret;
+
+               ret = wdat_wdt_ping(&wdat->wdd);
+               if (ret)
+                       return ret;
        }
 
        return wdat_wdt_start(&wdat->wdd);
index e12bd3635f832e7fa5330667fe77f32fe305db4b..26e5e8507f031f3118229f5164286160d3bcece6 100644 (file)
@@ -168,7 +168,9 @@ out:
 #endif /* CONFIG_HIBERNATE_CALLBACKS */
 
 struct shutdown_handler {
-       const char *command;
+#define SHUTDOWN_CMD_SIZE 11
+       const char command[SHUTDOWN_CMD_SIZE];
+       bool flag;
        void (*cb)(void);
 };
 
@@ -206,22 +208,22 @@ static void do_reboot(void)
        ctrl_alt_del();
 }
 
+static struct shutdown_handler shutdown_handlers[] = {
+       { "poweroff",   true,   do_poweroff },
+       { "halt",       false,  do_poweroff },
+       { "reboot",     true,   do_reboot   },
+#ifdef CONFIG_HIBERNATE_CALLBACKS
+       { "suspend",    true,   do_suspend  },
+#endif
+};
+
 static void shutdown_handler(struct xenbus_watch *watch,
                             const char **vec, unsigned int len)
 {
        char *str;
        struct xenbus_transaction xbt;
        int err;
-       static struct shutdown_handler handlers[] = {
-               { "poweroff",   do_poweroff },
-               { "halt",       do_poweroff },
-               { "reboot",     do_reboot   },
-#ifdef CONFIG_HIBERNATE_CALLBACKS
-               { "suspend",    do_suspend  },
-#endif
-               {NULL, NULL},
-       };
-       static struct shutdown_handler *handler;
+       int idx;
 
        if (shutting_down != SHUTDOWN_INVALID)
                return;
@@ -238,13 +240,13 @@ static void shutdown_handler(struct xenbus_watch *watch,
                return;
        }
 
-       for (handler = &handlers[0]; handler->command; handler++) {
-               if (strcmp(str, handler->command) == 0)
+       for (idx = 0; idx < ARRAY_SIZE(shutdown_handlers); idx++) {
+               if (strcmp(str, shutdown_handlers[idx].command) == 0)
                        break;
        }
 
        /* Only acknowledge commands which we are prepared to handle. */
-       if (handler->cb)
+       if (idx < ARRAY_SIZE(shutdown_handlers))
                xenbus_write(xbt, "control", "shutdown", "");
 
        err = xenbus_transaction_end(xbt, 0);
@@ -253,8 +255,8 @@ static void shutdown_handler(struct xenbus_watch *watch,
                goto again;
        }
 
-       if (handler->cb) {
-               handler->cb();
+       if (idx < ARRAY_SIZE(shutdown_handlers)) {
+               shutdown_handlers[idx].cb();
        } else {
                pr_info("Ignoring shutdown request: %s\n", str);
                shutting_down = SHUTDOWN_INVALID;
@@ -310,6 +312,9 @@ static struct notifier_block xen_reboot_nb = {
 static int setup_shutdown_watcher(void)
 {
        int err;
+       int idx;
+#define FEATURE_PATH_SIZE (SHUTDOWN_CMD_SIZE + sizeof("feature-"))
+       char node[FEATURE_PATH_SIZE];
 
        err = register_xenbus_watch(&shutdown_watch);
        if (err) {
@@ -326,6 +331,14 @@ static int setup_shutdown_watcher(void)
        }
 #endif
 
+       for (idx = 0; idx < ARRAY_SIZE(shutdown_handlers); idx++) {
+               if (!shutdown_handlers[idx].flag)
+                       continue;
+               snprintf(node, FEATURE_PATH_SIZE, "feature-%s",
+                        shutdown_handlers[idx].command);
+               xenbus_printf(XBT_NIL, "control", node, "%u", 1);
+       }
+
        return 0;
 }
 
index c1010f018bd857985b5bf0ada1f5286f0f68b2bf..1e8be12ebb559880fa5d10f05a2782ad46c36be3 100644 (file)
@@ -364,7 +364,7 @@ out:
 
 static int xenbus_write_watch(unsigned msg_type, struct xenbus_file_priv *u)
 {
-       struct watch_adapter *watch, *tmp_watch;
+       struct watch_adapter *watch;
        char *path, *token;
        int err, rc;
        LIST_HEAD(staging_q);
@@ -399,7 +399,7 @@ static int xenbus_write_watch(unsigned msg_type, struct xenbus_file_priv *u)
                }
                list_add(&watch->list, &u->watches);
        } else {
-               list_for_each_entry_safe(watch, tmp_watch, &u->watches, list) {
+               list_for_each_entry(watch, &u->watches, list) {
                        if (!strcmp(watch->token, token) &&
                            !strcmp(watch->watch.node, path)) {
                                unregister_xenbus_watch(&watch->watch);
index 611a231196757abdb040615acf2a80244360a6a2..6d40a972ffb24585c7e386d5817f53672841e0fc 100644 (file)
@@ -335,7 +335,9 @@ static int backend_state;
 static void xenbus_reset_backend_state_changed(struct xenbus_watch *w,
                                        const char **v, unsigned int l)
 {
-       xenbus_scanf(XBT_NIL, v[XS_WATCH_PATH], "", "%i", &backend_state);
+       if (xenbus_scanf(XBT_NIL, v[XS_WATCH_PATH], "", "%i",
+                        &backend_state) != 1)
+               backend_state = XenbusStateUnknown;
        printk(KERN_DEBUG "XENBUS: backend %s %s\n",
                        v[XS_WATCH_PATH], xenbus_strstate(backend_state));
        wake_up(&backend_state_wq);
index 2037e7a77a3767c9e5878838686a6c15a9eea884..d764236072b192d33a0b8eedb3821d7391991067 100644 (file)
@@ -91,11 +91,9 @@ static const struct afs_call_type afs_SRXCBTellMeAboutYourself = {
  */
 bool afs_cm_incoming_call(struct afs_call *call)
 {
-       u32 operation_id = ntohl(call->operation_ID);
+       _enter("{CB.OP %u}", call->operation_ID);
 
-       _enter("{CB.OP %u}", operation_id);
-
-       switch (operation_id) {
+       switch (call->operation_ID) {
        case CBCallBack:
                call->type = &afs_SRXCBCallBack;
                return true;
index 96f4d764d1a6784b9bbd3ed7f0313b3b3d49f869..31c616ab9b400a66dfbcd39c12dd87665f88acf9 100644 (file)
@@ -364,7 +364,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
                        buffer = kmap(page);
                        ret = afs_extract_data(call, buffer,
                                               call->count, true);
-                       kunmap(buffer);
+                       kunmap(page);
                        if (ret < 0)
                                return ret;
                }
@@ -397,7 +397,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
                page = call->reply3;
                buffer = kmap(page);
                memset(buffer + call->count, 0, PAGE_SIZE - call->count);
-               kunmap(buffer);
+               kunmap(page);
        }
 
        _leave(" = 0 [done]");
index 5497c8496055762f18e47a7464bd5b2d71d80caa..535a38d2c1d06f752cd6beae54766c1a9d899527 100644 (file)
@@ -112,7 +112,7 @@ struct afs_call {
        bool                    need_attention; /* T if RxRPC poked us */
        u16                     service_id;     /* RxRPC service ID to call */
        __be16                  port;           /* target UDP port */
-       __be32                  operation_ID;   /* operation ID for an incoming call */
+       u32                     operation_ID;   /* operation ID for an incoming call */
        u32                     count;          /* count for use in unmarshalling */
        __be32                  tmp;            /* place to extract temporary data */
        afs_dataversion_t       store_version;  /* updated version expected from store */
index 477928b259400a33bef35b0ab40608eba6d8b4e5..25f05a8d21b195fffb10f89cff990888fefd1ab2 100644 (file)
@@ -676,10 +676,11 @@ static int afs_deliver_cm_op_id(struct afs_call *call)
        ASSERTCMP(call->offset, <, 4);
 
        /* the operation ID forms the first four bytes of the request data */
-       ret = afs_extract_data(call, &call->operation_ID, 4, true);
+       ret = afs_extract_data(call, &call->tmp, 4, true);
        if (ret < 0)
                return ret;
 
+       call->operation_ID = ntohl(call->tmp);
        call->state = AFS_CALL_AWAIT_REQUEST;
        call->offset = 0;
 
index 1157e13a36d681ecba8e926bb9e91943cfeb6723..428484f2f8413dc6972da9857b3392264b9e6421 100644 (file)
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1078,6 +1078,17 @@ static void aio_complete(struct kiocb *kiocb, long res, long res2)
        unsigned tail, pos, head;
        unsigned long   flags;
 
+       if (kiocb->ki_flags & IOCB_WRITE) {
+               struct file *file = kiocb->ki_filp;
+
+               /*
+                * Tell lockdep we inherited freeze protection from submission
+                * thread.
+                */
+               __sb_writers_acquired(file_inode(file)->i_sb, SB_FREEZE_WRITE);
+               file_end_write(file);
+       }
+
        /*
         * Special case handling for sync iocbs:
         *  - events go directly into the iocb for fast handling
@@ -1392,122 +1403,106 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx)
        return -EINVAL;
 }
 
-typedef ssize_t (rw_iter_op)(struct kiocb *, struct iov_iter *);
-
-static int aio_setup_vectored_rw(int rw, char __user *buf, size_t len,
-                                struct iovec **iovec,
-                                bool compat,
-                                struct iov_iter *iter)
+static int aio_setup_rw(int rw, struct iocb *iocb, struct iovec **iovec,
+               bool vectored, bool compat, struct iov_iter *iter)
 {
+       void __user *buf = (void __user *)(uintptr_t)iocb->aio_buf;
+       size_t len = iocb->aio_nbytes;
+
+       if (!vectored) {
+               ssize_t ret = import_single_range(rw, buf, len, *iovec, iter);
+               *iovec = NULL;
+               return ret;
+       }
 #ifdef CONFIG_COMPAT
        if (compat)
-               return compat_import_iovec(rw,
-                               (struct compat_iovec __user *)buf,
-                               len, UIO_FASTIOV, iovec, iter);
+               return compat_import_iovec(rw, buf, len, UIO_FASTIOV, iovec,
+                               iter);
 #endif
-       return import_iovec(rw, (struct iovec __user *)buf,
-                               len, UIO_FASTIOV, iovec, iter);
+       return import_iovec(rw, buf, len, UIO_FASTIOV, iovec, iter);
 }
 
-/*
- * aio_run_iocb:
- *     Performs the initial checks and io submission.
- */
-static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode,
-                           char __user *buf, size_t len, bool compat)
+static inline ssize_t aio_ret(struct kiocb *req, ssize_t ret)
+{
+       switch (ret) {
+       case -EIOCBQUEUED:
+               return ret;
+       case -ERESTARTSYS:
+       case -ERESTARTNOINTR:
+       case -ERESTARTNOHAND:
+       case -ERESTART_RESTARTBLOCK:
+               /*
+                * There's no easy way to restart the syscall since other AIO's
+                * may be already running. Just fail this IO with EINTR.
+                */
+               ret = -EINTR;
+               /*FALLTHRU*/
+       default:
+               aio_complete(req, ret, 0);
+               return 0;
+       }
+}
+
+static ssize_t aio_read(struct kiocb *req, struct iocb *iocb, bool vectored,
+               bool compat)
 {
        struct file *file = req->ki_filp;
-       ssize_t ret;
-       int rw;
-       fmode_t mode;
-       rw_iter_op *iter_op;
        struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
        struct iov_iter iter;
+       ssize_t ret;
 
-       switch (opcode) {
-       case IOCB_CMD_PREAD:
-       case IOCB_CMD_PREADV:
-               mode    = FMODE_READ;
-               rw      = READ;
-               iter_op = file->f_op->read_iter;
-               goto rw_common;
-
-       case IOCB_CMD_PWRITE:
-       case IOCB_CMD_PWRITEV:
-               mode    = FMODE_WRITE;
-               rw      = WRITE;
-               iter_op = file->f_op->write_iter;
-               goto rw_common;
-rw_common:
-               if (unlikely(!(file->f_mode & mode)))
-                       return -EBADF;
-
-               if (!iter_op)
-                       return -EINVAL;
-
-               if (opcode == IOCB_CMD_PREADV || opcode == IOCB_CMD_PWRITEV)
-                       ret = aio_setup_vectored_rw(rw, buf, len,
-                                               &iovec, compat, &iter);
-               else {
-                       ret = import_single_range(rw, buf, len, iovec, &iter);
-                       iovec = NULL;
-               }
-               if (!ret)
-                       ret = rw_verify_area(rw, file, &req->ki_pos,
-                                            iov_iter_count(&iter));
-               if (ret < 0) {
-                       kfree(iovec);
-                       return ret;
-               }
-
-               if (rw == WRITE)
-                       file_start_write(file);
-
-               ret = iter_op(req, &iter);
-
-               if (rw == WRITE)
-                       file_end_write(file);
-               kfree(iovec);
-               break;
-
-       case IOCB_CMD_FDSYNC:
-               if (!file->f_op->aio_fsync)
-                       return -EINVAL;
-
-               ret = file->f_op->aio_fsync(req, 1);
-               break;
+       if (unlikely(!(file->f_mode & FMODE_READ)))
+               return -EBADF;
+       if (unlikely(!file->f_op->read_iter))
+               return -EINVAL;
 
-       case IOCB_CMD_FSYNC:
-               if (!file->f_op->aio_fsync)
-                       return -EINVAL;
+       ret = aio_setup_rw(READ, iocb, &iovec, vectored, compat, &iter);
+       if (ret)
+               return ret;
+       ret = rw_verify_area(READ, file, &req->ki_pos, iov_iter_count(&iter));
+       if (!ret)
+               ret = aio_ret(req, file->f_op->read_iter(req, &iter));
+       kfree(iovec);
+       return ret;
+}
 
-               ret = file->f_op->aio_fsync(req, 0);
-               break;
+static ssize_t aio_write(struct kiocb *req, struct iocb *iocb, bool vectored,
+               bool compat)
+{
+       struct file *file = req->ki_filp;
+       struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
+       struct iov_iter iter;
+       ssize_t ret;
 
-       default:
-               pr_debug("EINVAL: no operation provided\n");
+       if (unlikely(!(file->f_mode & FMODE_WRITE)))
+               return -EBADF;
+       if (unlikely(!file->f_op->write_iter))
                return -EINVAL;
-       }
 
-       if (ret != -EIOCBQUEUED) {
+       ret = aio_setup_rw(WRITE, iocb, &iovec, vectored, compat, &iter);
+       if (ret)
+               return ret;
+       ret = rw_verify_area(WRITE, file, &req->ki_pos, iov_iter_count(&iter));
+       if (!ret) {
+               req->ki_flags |= IOCB_WRITE;
+               file_start_write(file);
+               ret = aio_ret(req, file->f_op->write_iter(req, &iter));
                /*
-                * There's no easy way to restart the syscall since other AIO's
-                * may be already running. Just fail this IO with EINTR.
+                * We release freeze protection in aio_complete().  Fool lockdep
+                * by telling it the lock got released so that it doesn't
+                * complain about held lock when we return to userspace.
                 */
-               if (unlikely(ret == -ERESTARTSYS || ret == -ERESTARTNOINTR ||
-                            ret == -ERESTARTNOHAND ||
-                            ret == -ERESTART_RESTARTBLOCK))
-                       ret = -EINTR;
-               aio_complete(req, ret, 0);
+               __sb_writers_release(file_inode(file)->i_sb, SB_FREEZE_WRITE);
        }
-
-       return 0;
+       kfree(iovec);
+       return ret;
 }
 
 static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
                         struct iocb *iocb, bool compat)
 {
        struct aio_kiocb *req;
+       struct file *file;
        ssize_t ret;
 
        /* enforce forwards compatibility on users */
@@ -1530,7 +1525,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
        if (unlikely(!req))
                return -EAGAIN;
 
-       req->common.ki_filp = fget(iocb->aio_fildes);
+       req->common.ki_filp = file = fget(iocb->aio_fildes);
        if (unlikely(!req->common.ki_filp)) {
                ret = -EBADF;
                goto out_put_req;
@@ -1565,13 +1560,29 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
        req->ki_user_iocb = user_iocb;
        req->ki_user_data = iocb->aio_data;
 
-       ret = aio_run_iocb(&req->common, iocb->aio_lio_opcode,
-                          (char __user *)(unsigned long)iocb->aio_buf,
-                          iocb->aio_nbytes,
-                          compat);
-       if (ret)
-               goto out_put_req;
+       get_file(file);
+       switch (iocb->aio_lio_opcode) {
+       case IOCB_CMD_PREAD:
+               ret = aio_read(&req->common, iocb, false, compat);
+               break;
+       case IOCB_CMD_PWRITE:
+               ret = aio_write(&req->common, iocb, false, compat);
+               break;
+       case IOCB_CMD_PREADV:
+               ret = aio_read(&req->common, iocb, true, compat);
+               break;
+       case IOCB_CMD_PWRITEV:
+               ret = aio_write(&req->common, iocb, true, compat);
+               break;
+       default:
+               pr_debug("invalid aio operation %d\n", iocb->aio_lio_opcode);
+               ret = -EINVAL;
+               break;
+       }
+       fput(file);
 
+       if (ret && ret != -EIOCBQUEUED)
+               goto out_put_req;
        return 0;
 out_put_req:
        put_reqs_available(ctx, 1);
index e0f59263a96d5944c1a5315e17bf9ee872309b9f..c6bad51d8ec7b56d8027c019a98a656596911110 100644 (file)
@@ -43,7 +43,10 @@ struct befs_sb_info {
        u32 ag_shift;
        u32 num_ags;
 
-       /* jornal log entry */
+       /* State of the superblock */
+       u32 flags;
+
+       /* Journal log entry */
        befs_block_run log_blocks;
        befs_off_t log_start;
        befs_off_t log_end;
@@ -79,7 +82,7 @@ enum befs_err {
        BEFS_BT_END,
        BEFS_BT_EMPTY,
        BEFS_BT_MATCH,
-       BEFS_BT_PARMATCH,
+       BEFS_BT_OVERFLOW,
        BEFS_BT_NOT_FOUND
 };
 
@@ -140,18 +143,6 @@ befs_iaddrs_per_block(struct super_block *sb)
        return BEFS_SB(sb)->block_size / sizeof (befs_disk_inode_addr);
 }
 
-static inline int
-befs_iaddr_is_empty(const befs_inode_addr *iaddr)
-{
-       return (!iaddr->allocation_group) && (!iaddr->start) && (!iaddr->len);
-}
-
-static inline size_t
-befs_brun_size(struct super_block *sb, befs_block_run run)
-{
-       return BEFS_SB(sb)->block_size * run.len;
-}
-
 #include "endian.h"
 
 #endif                         /* _LINUX_BEFS_H */
index 307645f9e284c546db0e3d94e989e3064791b343..7e135ea73fddf65295a804502d7c3a57e906cffb 100644 (file)
@@ -85,7 +85,7 @@ struct befs_btree_node {
 };
 
 /* local constants */
-static const befs_off_t befs_bt_inval = 0xffffffffffffffffULL;
+static const befs_off_t BEFS_BT_INVAL = 0xffffffffffffffffULL;
 
 /* local functions */
 static int befs_btree_seekleaf(struct super_block *sb, const befs_data_stream *ds,
@@ -156,8 +156,6 @@ befs_bt_read_super(struct super_block *sb, const befs_data_stream *ds,
        sup->max_depth = fs32_to_cpu(sb, od_sup->max_depth);
        sup->data_type = fs32_to_cpu(sb, od_sup->data_type);
        sup->root_node_ptr = fs64_to_cpu(sb, od_sup->root_node_ptr);
-       sup->free_node_ptr = fs64_to_cpu(sb, od_sup->free_node_ptr);
-       sup->max_size = fs64_to_cpu(sb, od_sup->max_size);
 
        brelse(bh);
        if (sup->magic != BEFS_BTREE_MAGIC) {
@@ -183,8 +181,8 @@ befs_bt_read_super(struct super_block *sb, const befs_data_stream *ds,
  * Calls befs_read_datastream to read in the indicated btree node and
  * makes sure its header fields are in cpu byteorder, byteswapping if
  * necessary.
- * Note: node->bh must be NULL when this function called first
- * time. Don't forget brelse(node->bh) after last call.
+ * Note: node->bh must be NULL when this function is called the first time.
+ * Don't forget brelse(node->bh) after last call.
  *
  * On success, returns BEFS_OK and *@node contains the btree node that
  * starts at @node_off, with the node->head fields in cpu byte order.
@@ -244,7 +242,7 @@ befs_bt_read_node(struct super_block *sb, const befs_data_stream *ds,
  *   Read the superblock and rootnode of the b+tree.
  *   Drill down through the interior nodes using befs_find_key().
  *   Once at the correct leaf node, use befs_find_key() again to get the
- *   actuall value stored with the key.
+ *   actual value stored with the key.
  */
 int
 befs_btree_find(struct super_block *sb, const befs_data_stream *ds,
@@ -283,9 +281,9 @@ befs_btree_find(struct super_block *sb, const befs_data_stream *ds,
 
        while (!befs_leafnode(this_node)) {
                res = befs_find_key(sb, this_node, key, &node_off);
-               if (res == BEFS_BT_NOT_FOUND)
+               /* if no key set, try the overflow node */
+               if (res == BEFS_BT_OVERFLOW)
                        node_off = this_node->head.overflow;
-               /* if no match, go to overflow node */
                if (befs_bt_read_node(sb, ds, this_node, node_off) != BEFS_OK) {
                        befs_error(sb, "befs_btree_find() failed to read "
                                   "node at %llu", node_off);
@@ -293,15 +291,15 @@ befs_btree_find(struct super_block *sb, const befs_data_stream *ds,
                }
        }
 
-       /* at the correct leaf node now */
-
+       /* at a leaf node now, check if it is correct */
        res = befs_find_key(sb, this_node, key, value);
 
        brelse(this_node->bh);
        kfree(this_node);
 
        if (res != BEFS_BT_MATCH) {
-               befs_debug(sb, "<--- %s Key %s not found", __func__, key);
+               befs_error(sb, "<--- %s Key %s not found", __func__, key);
+               befs_debug(sb, "<--- %s ERROR", __func__);
                *value = 0;
                return BEFS_BT_NOT_FOUND;
        }
@@ -324,16 +322,12 @@ befs_btree_find(struct super_block *sb, const befs_data_stream *ds,
  * @findkey: Keystring to search for
  * @value: If key is found, the value stored with the key is put here
  *
- * finds exact match if one exists, and returns BEFS_BT_MATCH
- * If no exact match, finds first key in node that is greater
- * (alphabetically) than the search key and returns BEFS_BT_PARMATCH
- * (for partial match, I guess). Can you think of something better to
- * call it?
- *
- * If no key was a match or greater than the search key, return
- * BEFS_BT_NOT_FOUND.
+ * Finds exact match if one exists, and returns BEFS_BT_MATCH.
+ * If there is no match and node's value array is too small for key, return
+ * BEFS_BT_OVERFLOW.
+ * If no match and node should countain this key, return BEFS_BT_NOT_FOUND.
  *
- * Use binary search instead of a linear.
+ * Uses binary search instead of a linear.
  */
 static int
 befs_find_key(struct super_block *sb, struct befs_btree_node *node,
@@ -348,18 +342,16 @@ befs_find_key(struct super_block *sb, struct befs_btree_node *node,
 
        befs_debug(sb, "---> %s %s", __func__, findkey);
 
-       *value = 0;
-
        findkey_len = strlen(findkey);
 
-       /* if node can not contain key, just skeep this node */
+       /* if node can not contain key, just skip this node */
        last = node->head.all_key_count - 1;
        thiskey = befs_bt_get_key(sb, node, last, &keylen);
 
        eq = befs_compare_strings(thiskey, keylen, findkey, findkey_len);
        if (eq < 0) {
-               befs_debug(sb, "<--- %s %s not found", __func__, findkey);
-               return BEFS_BT_NOT_FOUND;
+               befs_debug(sb, "<--- node can't contain %s", findkey);
+               return BEFS_BT_OVERFLOW;
        }
 
        valarray = befs_bt_valarray(node);
@@ -387,12 +379,15 @@ befs_find_key(struct super_block *sb, struct befs_btree_node *node,
                else
                        first = mid + 1;
        }
+
+       /* return an existing value so caller can arrive to a leaf node */
        if (eq < 0)
                *value = fs64_to_cpu(sb, valarray[mid + 1]);
        else
                *value = fs64_to_cpu(sb, valarray[mid]);
-       befs_debug(sb, "<--- %s found %s at %d", __func__, thiskey, mid);
-       return BEFS_BT_PARMATCH;
+       befs_error(sb, "<--- %s %s not found", __func__, findkey);
+       befs_debug(sb, "<--- %s ERROR", __func__);
+       return BEFS_BT_NOT_FOUND;
 }
 
 /**
@@ -405,7 +400,7 @@ befs_find_key(struct super_block *sb, struct befs_btree_node *node,
  * @keysize: Length of the returned key
  * @value: Value stored with the returned key
  *
- * Heres how it works: Key_no is the index of the key/value pair to 
+ * Here's how it works: Key_no is the index of the key/value pair to
  * return in keybuf/value.
  * Bufsize is the size of keybuf (BEFS_NAME_LEN+1 is a good size). Keysize is 
  * the number of characters in the key (just a convenience).
@@ -422,7 +417,7 @@ befs_btree_read(struct super_block *sb, const befs_data_stream *ds,
 {
        struct befs_btree_node *this_node;
        befs_btree_super bt_super;
-       befs_off_t node_off = 0;
+       befs_off_t node_off;
        int cur_key;
        fs64 *valarray;
        char *keystart;
@@ -467,7 +462,7 @@ befs_btree_read(struct super_block *sb, const befs_data_stream *ds,
        while (key_sum + this_node->head.all_key_count <= key_no) {
 
                /* no more nodes to look in: key_no is too large */
-               if (this_node->head.right == befs_bt_inval) {
+               if (this_node->head.right == BEFS_BT_INVAL) {
                        *keysize = 0;
                        *value = 0;
                        befs_debug(sb,
@@ -541,7 +536,6 @@ befs_btree_read(struct super_block *sb, const befs_data_stream *ds,
  * @node_off: Pointer to offset of current node within datastream. Modified
  *             by the function.
  *
- *
  * Helper function for btree traverse. Moves the current position to the 
  * start of the first leaf node.
  *
@@ -608,7 +602,7 @@ static int
 befs_leafnode(struct befs_btree_node *node)
 {
        /* all interior nodes (and only interior nodes) have an overflow node */
-       if (node->head.overflow == befs_bt_inval)
+       if (node->head.overflow == BEFS_BT_INVAL)
                return 1;
        else
                return 0;
@@ -715,7 +709,7 @@ befs_bt_get_key(struct super_block *sb, struct befs_btree_node *node,
  *
  * Returns 0 if @key1 and @key2 are equal.
  * Returns >0 if @key1 is greater.
- * Returns <0 if @key2 is greater..
+ * Returns <0 if @key2 is greater.
  */
 static int
 befs_compare_strings(const void *key1, int keylen1,
index af1bc19b7c85b582e5c87981b92beb65deeefcf5..b4c7ba013c0d6e752296599d6b29597e5dd13512 100644 (file)
@@ -22,22 +22,22 @@ const befs_inode_addr BAD_IADDR = { 0, 0, 0 };
 
 static int befs_find_brun_direct(struct super_block *sb,
                                 const befs_data_stream *data,
-                                befs_blocknr_t blockno, befs_block_run * run);
+                                befs_blocknr_t blockno, befs_block_run *run);
 
 static int befs_find_brun_indirect(struct super_block *sb,
                                   const befs_data_stream *data,
                                   befs_blocknr_t blockno,
-                                  befs_block_run * run);
+                                  befs_block_run *run);
 
 static int befs_find_brun_dblindirect(struct super_block *sb,
                                      const befs_data_stream *data,
                                      befs_blocknr_t blockno,
-                                     befs_block_run * run);
+                                     befs_block_run *run);
 
 /**
  * befs_read_datastream - get buffer_head containing data, starting from pos.
  * @sb: Filesystem superblock
- * @ds: datastrem to find data with
+ * @ds: datastream to find data with
  * @pos: start of data
  * @off: offset of data in buffer_head->b_data
  *
@@ -46,7 +46,7 @@ static int befs_find_brun_dblindirect(struct super_block *sb,
  */
 struct buffer_head *
 befs_read_datastream(struct super_block *sb, const befs_data_stream *ds,
-                    befs_off_t pos, uint * off)
+                    befs_off_t pos, uint *off)
 {
        struct buffer_head *bh;
        befs_block_run run;
@@ -75,7 +75,13 @@ befs_read_datastream(struct super_block *sb, const befs_data_stream *ds,
        return bh;
 }
 
-/*
+/**
+ * befs_fblock2brun - give back block run for fblock
+ * @sb: the superblock
+ * @data: datastream to read from
+ * @fblock: the blocknumber with the file position to find
+ * @run: The found run is passed back through this pointer
+ *
  * Takes a file position and gives back a brun who's starting block
  * is block number fblock of the file.
  * 
@@ -88,7 +94,7 @@ befs_read_datastream(struct super_block *sb, const befs_data_stream *ds,
  */
 int
 befs_fblock2brun(struct super_block *sb, const befs_data_stream *data,
-                befs_blocknr_t fblock, befs_block_run * run)
+                befs_blocknr_t fblock, befs_block_run *run)
 {
        int err;
        befs_off_t pos = fblock << BEFS_SB(sb)->block_shift;
@@ -115,7 +121,7 @@ befs_fblock2brun(struct super_block *sb, const befs_data_stream *data,
 /**
  * befs_read_lsmylink - read long symlink from datastream.
  * @sb: Filesystem superblock 
- * @ds: Datastrem to read from
+ * @ds: Datastream to read from
  * @buff: Buffer in which to place long symlink data
  * @len: Length of the long symlink in bytes
  *
@@ -128,6 +134,7 @@ befs_read_lsymlink(struct super_block *sb, const befs_data_stream *ds,
        befs_off_t bytes_read = 0;      /* bytes readed */
        u16 plen;
        struct buffer_head *bh;
+
        befs_debug(sb, "---> %s length: %llu", __func__, len);
 
        while (bytes_read < len) {
@@ -183,13 +190,13 @@ befs_count_blocks(struct super_block *sb, const befs_data_stream *ds)
                metablocks += ds->indirect.len;
 
        /*
-          Double indir block, plus all the indirect blocks it mapps
-          In the double-indirect range, all block runs of data are
-          BEFS_DBLINDIR_BRUN_LEN blocks long. Therefore, we know 
-          how many data block runs are in the double-indirect region,
-          and from that we know how many indirect blocks it takes to
-          map them. We assume that the indirect blocks are also
-          BEFS_DBLINDIR_BRUN_LEN blocks long.
+        * Double indir block, plus all the indirect blocks it maps.
+        * In the double-indirect range, all block runs of data are
+        * BEFS_DBLINDIR_BRUN_LEN blocks long. Therefore, we know
+        * how many data block runs are in the double-indirect region,
+        * and from that we know how many indirect blocks it takes to
+        * map them. We assume that the indirect blocks are also
+        * BEFS_DBLINDIR_BRUN_LEN blocks long.
         */
        if (ds->size > ds->max_indirect_range && ds->max_indirect_range != 0) {
                uint dbl_bytes;
@@ -212,58 +219,50 @@ befs_count_blocks(struct super_block *sb, const befs_data_stream *ds)
        return blocks;
 }
 
-/*
-       Finds the block run that starts at file block number blockno
-       in the file represented by the datastream data, if that 
-       blockno is in the direct region of the datastream.
-       
-       sb: the superblock
-       data: the datastream
-       blockno: the blocknumber to find
-       run: The found run is passed back through this pointer
-       
-       Return value is BEFS_OK if the blockrun is found, BEFS_ERR
-       otherwise.
-       
-       Algorithm:
-       Linear search. Checks each element of array[] to see if it
-       contains the blockno-th filesystem block. This is necessary
-       because the block runs map variable amounts of data. Simply
-       keeps a count of the number of blocks searched so far (sum),
-       incrementing this by the length of each block run as we come
-       across it. Adds sum to *count before returning (this is so
-       you can search multiple arrays that are logicaly one array,
-       as in the indirect region code).
-       
-       When/if blockno is found, if blockno is inside of a block 
-       run as stored on disk, we offset the start and length members
-       of the block run, so that blockno is the start and len is
-       still valid (the run ends in the same place).
-       
-       2001-11-15 Will Dyson
-*/
+/**
+ * befs_find_brun_direct - find a direct block run in the datastream
+ * @sb: the superblock
+ * @data: the datastream
+ * @blockno: the blocknumber to find
+ * @run: The found run is passed back through this pointer
+ *
+ * Finds the block run that starts at file block number blockno
+ * in the file represented by the datastream data, if that
+ * blockno is in the direct region of the datastream.
+ *
+ * Return value is BEFS_OK if the blockrun is found, BEFS_ERR
+ * otherwise.
+ *
+ * Algorithm:
+ * Linear search. Checks each element of array[] to see if it
+ * contains the blockno-th filesystem block. This is necessary
+ * because the block runs map variable amounts of data. Simply
+ * keeps a count of the number of blocks searched so far (sum),
+ * incrementing this by the length of each block run as we come
+ * across it. Adds sum to *count before returning (this is so
+ * you can search multiple arrays that are logicaly one array,
+ * as in the indirect region code).
+ *
+ * When/if blockno is found, if blockno is inside of a block
+ * run as stored on disk, we offset the start and length members
+ * of the block run, so that blockno is the start and len is
+ * still valid (the run ends in the same place).
+ */
 static int
 befs_find_brun_direct(struct super_block *sb, const befs_data_stream *data,
-                     befs_blocknr_t blockno, befs_block_run * run)
+                     befs_blocknr_t blockno, befs_block_run *run)
 {
        int i;
        const befs_block_run *array = data->direct;
        befs_blocknr_t sum;
-       befs_blocknr_t max_block =
-           data->max_direct_range >> BEFS_SB(sb)->block_shift;
 
        befs_debug(sb, "---> %s, find %lu", __func__, (unsigned long)blockno);
 
-       if (blockno > max_block) {
-               befs_error(sb, "%s passed block outside of direct region",
-                          __func__);
-               return BEFS_ERR;
-       }
-
        for (i = 0, sum = 0; i < BEFS_NUM_DIRECT_BLOCKS;
             sum += array[i].len, i++) {
                if (blockno >= sum && blockno < sum + (array[i].len)) {
                        int offset = blockno - sum;
+
                        run->allocation_group = array[i].allocation_group;
                        run->start = array[i].start + offset;
                        run->len = array[i].len - offset;
@@ -275,38 +274,39 @@ befs_find_brun_direct(struct super_block *sb, const befs_data_stream *data,
                }
        }
 
+       befs_error(sb, "%s failed to find file block %lu", __func__,
+                  (unsigned long)blockno);
        befs_debug(sb, "---> %s ERROR", __func__);
        return BEFS_ERR;
 }
 
-/*
-       Finds the block run that starts at file block number blockno
-       in the file represented by the datastream data, if that 
-       blockno is in the indirect region of the datastream.
-       
-       sb: the superblock
-       data: the datastream
-       blockno: the blocknumber to find
-       run: The found run is passed back through this pointer
-       
-       Return value is BEFS_OK if the blockrun is found, BEFS_ERR
-       otherwise.
-       
-       Algorithm:
-       For each block in the indirect run of the datastream, read
-       it in and search through it for search_blk.
-       
-       XXX:
-       Really should check to make sure blockno is inside indirect
-       region.
-       
-       2001-11-15 Will Dyson
-*/
+/**
+ * befs_find_brun_indirect - find a block run in the datastream
+ * @sb: the superblock
+ * @data: the datastream
+ * @blockno: the blocknumber to find
+ * @run: The found run is passed back through this pointer
+ *
+ * Finds the block run that starts at file block number blockno
+ * in the file represented by the datastream data, if that
+ * blockno is in the indirect region of the datastream.
+ *
+ * Return value is BEFS_OK if the blockrun is found, BEFS_ERR
+ * otherwise.
+ *
+ * Algorithm:
+ * For each block in the indirect run of the datastream, read
+ * it in and search through it for search_blk.
+ *
+ * XXX:
+ * Really should check to make sure blockno is inside indirect
+ * region.
+ */
 static int
 befs_find_brun_indirect(struct super_block *sb,
                        const befs_data_stream *data,
                        befs_blocknr_t blockno,
-                       befs_block_run * run)
+                       befs_block_run *run)
 {
        int i, j;
        befs_blocknr_t sum = 0;
@@ -326,11 +326,12 @@ befs_find_brun_indirect(struct super_block *sb,
 
        /* Examine blocks of the indirect run one at a time */
        for (i = 0; i < indirect.len; i++) {
-               indirblock = befs_bread(sb, indirblockno + i);
+               indirblock = sb_bread(sb, indirblockno + i);
                if (indirblock == NULL) {
-                       befs_debug(sb, "---> %s failed to read "
+                       befs_error(sb, "---> %s failed to read "
                                   "disk block %lu from the indirect brun",
                                   __func__, (unsigned long)indirblockno + i);
+                       befs_debug(sb, "<--- %s ERROR", __func__);
                        return BEFS_ERR;
                }
 
@@ -370,52 +371,51 @@ befs_find_brun_indirect(struct super_block *sb,
        return BEFS_ERR;
 }
 
-/*
-       Finds the block run that starts at file block number blockno
-       in the file represented by the datastream data, if that 
-       blockno is in the double-indirect region of the datastream.
-       
-       sb: the superblock
-       data: the datastream
-       blockno: the blocknumber to find
-       run: The found run is passed back through this pointer
-       
-       Return value is BEFS_OK if the blockrun is found, BEFS_ERR
-       otherwise.
-       
-       Algorithm:
-       The block runs in the double-indirect region are different.
-       They are always allocated 4 fs blocks at a time, so each
-       block run maps a constant amount of file data. This means
-       that we can directly calculate how many block runs into the
-       double-indirect region we need to go to get to the one that
-       maps a particular filesystem block.
-       
-       We do this in two stages. First we calculate which of the
-       inode addresses in the double-indirect block will point us
-       to the indirect block that contains the mapping for the data,
-       then we calculate which of the inode addresses in that 
-       indirect block maps the data block we are after.
-       
-       Oh, and once we've done that, we actually read in the blocks 
-       that contain the inode addresses we calculated above. Even 
-       though the double-indirect run may be several blocks long, 
-       we can calculate which of those blocks will contain the index
-       we are after and only read that one. We then follow it to 
-       the indirect block and perform a  similar process to find
-       the actual block run that maps the data block we are interested
-       in.
-       
-       Then we offset the run as in befs_find_brun_array() and we are 
-       done.
-       
-       2001-11-15 Will Dyson
-*/
+/**
+ * befs_find_brun_dblindirect - find a block run in the datastream
+ * @sb: the superblock
+ * @data: the datastream
+ * @blockno: the blocknumber to find
+ * @run: The found run is passed back through this pointer
+ *
+ * Finds the block run that starts at file block number blockno
+ * in the file represented by the datastream data, if that
+ * blockno is in the double-indirect region of the datastream.
+ *
+ * Return value is BEFS_OK if the blockrun is found, BEFS_ERR
+ * otherwise.
+ *
+ * Algorithm:
+ * The block runs in the double-indirect region are different.
+ * They are always allocated 4 fs blocks at a time, so each
+ * block run maps a constant amount of file data. This means
+ * that we can directly calculate how many block runs into the
+ * double-indirect region we need to go to get to the one that
+ * maps a particular filesystem block.
+ *
+ * We do this in two stages. First we calculate which of the
+ * inode addresses in the double-indirect block will point us
+ * to the indirect block that contains the mapping for the data,
+ * then we calculate which of the inode addresses in that
+ * indirect block maps the data block we are after.
+ *
+ * Oh, and once we've done that, we actually read in the blocks
+ * that contain the inode addresses we calculated above. Even
+ * though the double-indirect run may be several blocks long,
+ * we can calculate which of those blocks will contain the index
+ * we are after and only read that one. We then follow it to
+ * the indirect block and perform a similar process to find
+ * the actual block run that maps the data block we are interested
+ * in.
+ *
+ * Then we offset the run as in befs_find_brun_array() and we are
+ * done.
+ */
 static int
 befs_find_brun_dblindirect(struct super_block *sb,
                           const befs_data_stream *data,
                           befs_blocknr_t blockno,
-                          befs_block_run * run)
+                          befs_block_run *run)
 {
        int dblindir_indx;
        int indir_indx;
@@ -430,10 +430,9 @@ befs_find_brun_dblindirect(struct super_block *sb,
        struct buffer_head *indir_block;
        befs_block_run indir_run;
        befs_disk_inode_addr *iaddr_array;
-       struct befs_sb_info *befs_sb = BEFS_SB(sb);
 
        befs_blocknr_t indir_start_blk =
-           data->max_indirect_range >> befs_sb->block_shift;
+           data->max_indirect_range >> BEFS_SB(sb)->block_shift;
 
        off_t dbl_indir_off = blockno - indir_start_blk;
 
@@ -471,7 +470,7 @@ befs_find_brun_dblindirect(struct super_block *sb,
        }
 
        dbl_indir_block =
-           befs_bread(sb, iaddr2blockno(sb, &data->double_indirect) +
+           sb_bread(sb, iaddr2blockno(sb, &data->double_indirect) +
                                        dbl_which_block);
        if (dbl_indir_block == NULL) {
                befs_error(sb, "%s couldn't read the "
@@ -479,7 +478,6 @@ befs_find_brun_dblindirect(struct super_block *sb,
                           (unsigned long)
                           iaddr2blockno(sb, &data->double_indirect) +
                           dbl_which_block);
-               brelse(dbl_indir_block);
                return BEFS_ERR;
        }
 
@@ -499,12 +497,11 @@ befs_find_brun_dblindirect(struct super_block *sb,
        }
 
        indir_block =
-           befs_bread(sb, iaddr2blockno(sb, &indir_run) + which_block);
+           sb_bread(sb, iaddr2blockno(sb, &indir_run) + which_block);
        if (indir_block == NULL) {
                befs_error(sb, "%s couldn't read the indirect block "
                           "at blockno %lu", __func__, (unsigned long)
                           iaddr2blockno(sb, &indir_run) + which_block);
-               brelse(indir_block);
                return BEFS_ERR;
        }
 
index 4de7cffcd66223c4a85a216cb58e5a8e62953e6c..85c13392e9e897dae2ee288f301c88c3067e8303 100644 (file)
@@ -169,6 +169,7 @@ befs_dump_super_block(const struct super_block *sb, befs_super_block * sup)
 
        befs_debug(sb, "  num_blocks %llu", fs64_to_cpu(sb, sup->num_blocks));
        befs_debug(sb, "  used_blocks %llu", fs64_to_cpu(sb, sup->used_blocks));
+       befs_debug(sb, "  inode_size %u", fs32_to_cpu(sb, sup->inode_size));
 
        befs_debug(sb, "  magic2 %08x", fs32_to_cpu(sb, sup->magic2));
        befs_debug(sb, "  blocks_per_ag %u",
index 523c8af2d770b08473bfa6200d635ae46364b0e2..b4a558126ee1724b0d3bd833f68a1c201833af33 100644 (file)
@@ -27,7 +27,7 @@ struct buffer_head *
 befs_bread_iaddr(struct super_block *sb, befs_inode_addr iaddr)
 {
        struct buffer_head *bh;
-       befs_blocknr_t block = 0;
+       befs_blocknr_t block;
        struct befs_sb_info *befs_sb = BEFS_SB(sb);
 
        befs_debug(sb, "---> Enter %s "
@@ -59,27 +59,3 @@ befs_bread_iaddr(struct super_block *sb, befs_inode_addr iaddr)
        befs_debug(sb, "<--- %s ERROR", __func__);
        return NULL;
 }
-
-struct buffer_head *
-befs_bread(struct super_block *sb, befs_blocknr_t block)
-{
-       struct buffer_head *bh;
-
-       befs_debug(sb, "---> Enter %s %lu", __func__, (unsigned long)block);
-
-       bh = sb_bread(sb, block);
-
-       if (bh == NULL) {
-               befs_error(sb, "Failed to read block %lu",
-                          (unsigned long)block);
-               goto error;
-       }
-
-       befs_debug(sb, "<--- %s", __func__);
-
-       return bh;
-
-      error:
-       befs_debug(sb, "<--- %s ERROR", __func__);
-       return NULL;
-}
index 9b78266b6aa57b73637167d0fb426ccc08be2cff..78d7bc6e60dee4d51fb1ac6325cca8b3929bd9c2 100644 (file)
@@ -5,5 +5,3 @@
 struct buffer_head *befs_bread_iaddr(struct super_block *sb,
                                     befs_inode_addr iaddr);
 
-struct buffer_head *befs_bread(struct super_block *sb, befs_blocknr_t block);
-
index bfe9f999493531b3bbbc348e053f6f581db878fe..647a276eba5654593739aafa9a6984ca967d8adf 100644 (file)
@@ -120,7 +120,7 @@ befs_get_block(struct inode *inode, sector_t block,
        struct super_block *sb = inode->i_sb;
        befs_data_stream *ds = &BEFS_I(inode)->i_data.ds;
        befs_block_run run = BAD_IADDR;
-       int res = 0;
+       int res;
        ulong disk_off;
 
        befs_debug(sb, "---> befs_get_block() for inode %lu, block %ld",
@@ -179,15 +179,16 @@ befs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
                kfree(utfname);
 
        } else {
-               ret = befs_btree_find(sb, ds, dentry->d_name.name, &offset);
+               ret = befs_btree_find(sb, ds, name, &offset);
        }
 
        if (ret == BEFS_BT_NOT_FOUND) {
                befs_debug(sb, "<--- %s %pd not found", __func__, dentry);
+               d_add(dentry, NULL);
                return ERR_PTR(-ENOENT);
 
        } else if (ret != BEFS_OK || offset == 0) {
-               befs_warning(sb, "<--- %s Error", __func__);
+               befs_error(sb, "<--- %s Error", __func__);
                return ERR_PTR(-ENODATA);
        }
 
@@ -211,56 +212,55 @@ befs_readdir(struct file *file, struct dir_context *ctx)
        befs_off_t value;
        int result;
        size_t keysize;
-       unsigned char d_type;
        char keybuf[BEFS_NAME_LEN + 1];
 
        befs_debug(sb, "---> %s name %pD, inode %ld, ctx->pos %lld",
                  __func__, file, inode->i_ino, ctx->pos);
 
-more:
-       result = befs_btree_read(sb, ds, ctx->pos, BEFS_NAME_LEN + 1,
-                                keybuf, &keysize, &value);
+       while (1) {
+               result = befs_btree_read(sb, ds, ctx->pos, BEFS_NAME_LEN + 1,
+                                        keybuf, &keysize, &value);
 
-       if (result == BEFS_ERR) {
-               befs_debug(sb, "<--- %s ERROR", __func__);
-               befs_error(sb, "IO error reading %pD (inode %lu)",
-                          file, inode->i_ino);
-               return -EIO;
-
-       } else if (result == BEFS_BT_END) {
-               befs_debug(sb, "<--- %s END", __func__);
-               return 0;
-
-       } else if (result == BEFS_BT_EMPTY) {
-               befs_debug(sb, "<--- %s Empty directory", __func__);
-               return 0;
-       }
+               if (result == BEFS_ERR) {
+                       befs_debug(sb, "<--- %s ERROR", __func__);
+                       befs_error(sb, "IO error reading %pD (inode %lu)",
+                                  file, inode->i_ino);
+                       return -EIO;
 
-       d_type = DT_UNKNOWN;
+               } else if (result == BEFS_BT_END) {
+                       befs_debug(sb, "<--- %s END", __func__);
+                       return 0;
 
-       /* Convert to NLS */
-       if (BEFS_SB(sb)->nls) {
-               char *nlsname;
-               int nlsnamelen;
-               result =
-                   befs_utf2nls(sb, keybuf, keysize, &nlsname, &nlsnamelen);
-               if (result < 0) {
-                       befs_debug(sb, "<--- %s ERROR", __func__);
-                       return result;
+               } else if (result == BEFS_BT_EMPTY) {
+                       befs_debug(sb, "<--- %s Empty directory", __func__);
+                       return 0;
                }
-               if (!dir_emit(ctx, nlsname, nlsnamelen,
-                                (ino_t) value, d_type)) {
+
+               /* Convert to NLS */
+               if (BEFS_SB(sb)->nls) {
+                       char *nlsname;
+                       int nlsnamelen;
+
+                       result =
+                           befs_utf2nls(sb, keybuf, keysize, &nlsname,
+                                        &nlsnamelen);
+                       if (result < 0) {
+                               befs_debug(sb, "<--- %s ERROR", __func__);
+                               return result;
+                       }
+                       if (!dir_emit(ctx, nlsname, nlsnamelen,
+                                     (ino_t) value, DT_UNKNOWN)) {
+                               kfree(nlsname);
+                               return 0;
+                       }
                        kfree(nlsname);
-                       return 0;
+               } else {
+                       if (!dir_emit(ctx, keybuf, keysize,
+                                     (ino_t) value, DT_UNKNOWN))
+                               return 0;
                }
-               kfree(nlsname);
-       } else {
-               if (!dir_emit(ctx, keybuf, keysize,
-                                (ino_t) value, d_type))
-                       return 0;
+               ctx->pos++;
        }
-       ctx->pos++;
-       goto more;
 }
 
 static struct inode *
@@ -299,7 +299,6 @@ static struct inode *befs_iget(struct super_block *sb, unsigned long ino)
        struct befs_sb_info *befs_sb = BEFS_SB(sb);
        struct befs_inode_info *befs_ino;
        struct inode *inode;
-       long ret = -EIO;
 
        befs_debug(sb, "---> %s inode = %lu", __func__, ino);
 
@@ -318,7 +317,7 @@ static struct inode *befs_iget(struct super_block *sb, unsigned long ino)
                   befs_ino->i_inode_num.allocation_group,
                   befs_ino->i_inode_num.start, befs_ino->i_inode_num.len);
 
-       bh = befs_bread(sb, inode->i_ino);
+       bh = sb_bread(sb, inode->i_ino);
        if (!bh) {
                befs_error(sb, "unable to read inode block - "
                           "inode = %lu", inode->i_ino);
@@ -421,7 +420,7 @@ static struct inode *befs_iget(struct super_block *sb, unsigned long ino)
       unacquire_none:
        iget_failed(inode);
        befs_debug(sb, "<--- %s - Bad inode", __func__);
-       return ERR_PTR(ret);
+       return ERR_PTR(-EIO);
 }
 
 /* Initialize the inode cache. Called at fs setup.
@@ -436,10 +435,9 @@ befs_init_inodecache(void)
                                              0, (SLAB_RECLAIM_ACCOUNT|
                                                SLAB_MEM_SPREAD|SLAB_ACCOUNT),
                                              init_once);
-       if (befs_inode_cachep == NULL) {
-               pr_err("%s: Couldn't initialize inode slabcache\n", __func__);
+       if (befs_inode_cachep == NULL)
                return -ENOMEM;
-       }
+
        return 0;
 }
 
@@ -524,8 +522,6 @@ befs_utf2nls(struct super_block *sb, const char *in,
 
        *out = result = kmalloc(maxlen, GFP_NOFS);
        if (!*out) {
-               befs_error(sb, "%s cannot allocate memory", __func__);
-               *out_len = 0;
                return -ENOMEM;
        }
 
@@ -604,7 +600,6 @@ befs_nls2utf(struct super_block *sb, const char *in,
 
        *out = result = kmalloc(maxlen, GFP_NOFS);
        if (!*out) {
-               befs_error(sb, "%s cannot allocate memory", __func__);
                *out_len = 0;
                return -ENOMEM;
        }
@@ -637,10 +632,6 @@ befs_nls2utf(struct super_block *sb, const char *in,
        return -EILSEQ;
 }
 
-/**
- * Use the
- *
- */
 enum {
        Opt_uid, Opt_gid, Opt_charset, Opt_debug, Opt_err,
 };
@@ -760,19 +751,19 @@ befs_fill_super(struct super_block *sb, void *data, int silent)
        long ret = -EINVAL;
        const unsigned long sb_block = 0;
        const off_t x86_sb_off = 512;
+       int blocksize;
 
        save_mount_options(sb, data);
 
        sb->s_fs_info = kzalloc(sizeof(*befs_sb), GFP_KERNEL);
-       if (sb->s_fs_info == NULL) {
-               pr_err("(%s): Unable to allocate memory for private "
-                      "portion of superblock. Bailing.\n", sb->s_id);
+       if (sb->s_fs_info == NULL)
                goto unacquire_none;
-       }
+
        befs_sb = BEFS_SB(sb);
 
        if (!parse_options((char *) data, &befs_sb->mount_opts)) {
-               befs_error(sb, "cannot parse mount options");
+               if (!silent)
+                       befs_error(sb, "cannot parse mount options");
                goto unacquire_priv_sbp;
        }
 
@@ -793,10 +784,16 @@ befs_fill_super(struct super_block *sb, void *data, int silent)
         * least 1k to get the second 512 bytes of the volume.
         * -WD 10-26-01
         */ 
-       sb_min_blocksize(sb, 1024);
+       blocksize = sb_min_blocksize(sb, 1024);
+       if (!blocksize) {
+               if (!silent)
+                       befs_error(sb, "unable to set blocksize");
+               goto unacquire_priv_sbp;
+       }
 
        if (!(bh = sb_bread(sb, sb_block))) {
-               befs_error(sb, "unable to read superblock");
+               if (!silent)
+                       befs_error(sb, "unable to read superblock");
                goto unacquire_priv_sbp;
        }
 
@@ -820,9 +817,9 @@ befs_fill_super(struct super_block *sb, void *data, int silent)
        brelse(bh);
 
        if( befs_sb->num_blocks > ~((sector_t)0) ) {
-               befs_error(sb, "blocks count: %llu "
-                       "is larger than the host can use",
-                       befs_sb->num_blocks);
+               if (!silent)
+                       befs_error(sb, "blocks count: %llu is larger than the host can use",
+                                       befs_sb->num_blocks);
                goto unacquire_priv_sbp;
        }
 
@@ -841,7 +838,8 @@ befs_fill_super(struct super_block *sb, void *data, int silent)
        }
        sb->s_root = d_make_root(root);
        if (!sb->s_root) {
-               befs_error(sb, "get root inode failed");
+               if (!silent)
+                       befs_error(sb, "get root inode failed");
                goto unacquire_priv_sbp;
        }
 
@@ -870,9 +868,9 @@ befs_fill_super(struct super_block *sb, void *data, int silent)
       unacquire_priv_sbp:
        kfree(befs_sb->mount_opts.iocharset);
        kfree(sb->s_fs_info);
+       sb->s_fs_info = NULL;
 
       unacquire_none:
-       sb->s_fs_info = NULL;
        return ret;
 }
 
index aeafc4d842788ca7cc760599b68e9d5988dd4110..7c50025c99d8ab38f2de8c3abe93a201143e0635 100644 (file)
 #include "befs.h"
 #include "super.h"
 
-/**
- * load_befs_sb -- Read from disk and properly byteswap all the fields
+/*
+ * befs_load_sb -- Read from disk and properly byteswap all the fields
  * of the befs superblock
- *
- *
- *
- *
  */
 int
-befs_load_sb(struct super_block *sb, befs_super_block * disk_sb)
+befs_load_sb(struct super_block *sb, befs_super_block *disk_sb)
 {
        struct befs_sb_info *befs_sb = BEFS_SB(sb);
 
        /* Check the byte order of the filesystem */
        if (disk_sb->fs_byte_order == BEFS_BYTEORDER_NATIVE_LE)
-           befs_sb->byte_order = BEFS_BYTESEX_LE;
+               befs_sb->byte_order = BEFS_BYTESEX_LE;
        else if (disk_sb->fs_byte_order == BEFS_BYTEORDER_NATIVE_BE)
-           befs_sb->byte_order = BEFS_BYTESEX_BE;
+               befs_sb->byte_order = BEFS_BYTESEX_BE;
 
        befs_sb->magic1 = fs32_to_cpu(sb, disk_sb->magic1);
        befs_sb->magic2 = fs32_to_cpu(sb, disk_sb->magic2);
@@ -45,6 +41,8 @@ befs_load_sb(struct super_block *sb, befs_super_block * disk_sb)
        befs_sb->ag_shift = fs32_to_cpu(sb, disk_sb->ag_shift);
        befs_sb->num_ags = fs32_to_cpu(sb, disk_sb->num_ags);
 
+       befs_sb->flags = fs32_to_cpu(sb, disk_sb->flags);
+
        befs_sb->log_blocks = fsrun_to_cpu(sb, disk_sb->log_blocks);
        befs_sb->log_start = fs64_to_cpu(sb, disk_sb->log_start);
        befs_sb->log_end = fs64_to_cpu(sb, disk_sb->log_end);
@@ -84,15 +82,15 @@ befs_check_sb(struct super_block *sb)
        }
 
        if (befs_sb->block_size > PAGE_SIZE) {
-               befs_error(sb, "blocksize(%u) cannot be larger"
+               befs_error(sb, "blocksize(%u) cannot be larger "
                           "than system pagesize(%lu)", befs_sb->block_size,
                           PAGE_SIZE);
                return BEFS_ERR;
        }
 
        /*
-          * block_shift and block_size encode the same information
-          * in different ways as a consistency check.
+        * block_shift and block_size encode the same information
+        * in different ways as a consistency check.
         */
 
        if ((1 << befs_sb->block_shift) != befs_sb->block_size) {
@@ -101,10 +99,18 @@ befs_check_sb(struct super_block *sb)
                return BEFS_ERR;
        }
 
-       if (befs_sb->log_start != befs_sb->log_end) {
+
+       /* ag_shift also encodes the same information as blocks_per_ag in a
+        * different way, non-fatal consistency check
+        */
+       if ((1 << befs_sb->ag_shift) != befs_sb->blocks_per_ag)
+               befs_error(sb, "ag_shift disagrees with blocks_per_ag.");
+
+       if (befs_sb->log_start != befs_sb->log_end ||
+           befs_sb->flags == BEFS_DIRTY) {
                befs_error(sb, "Filesystem not clean! There are blocks in the "
-                          "journal. You must boot into BeOS and mount this volume "
-                          "to make it clean.");
+                          "journal. You must boot into BeOS and mount this "
+                          "volume to make it clean.");
                return BEFS_ERR;
        }
 
index ccc70d96958d87e0b2db3fc901b513e51a9aa803..d4d8b7e36b2ffe7b84ddb78e4064b606aaee5e01 100644 (file)
@@ -698,7 +698,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 
                        ret = btrfs_map_bio(root, comp_bio, mirror_num, 0);
                        if (ret) {
-                               bio->bi_error = ret;
+                               comp_bio->bi_error = ret;
                                bio_endio(comp_bio);
                        }
 
@@ -728,7 +728,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 
        ret = btrfs_map_bio(root, comp_bio, mirror_num, 0);
        if (ret) {
-               bio->bi_error = ret;
+               comp_bio->bi_error = ret;
                bio_endio(comp_bio);
        }
 
index 6c21bad26a27ba909be4691d3a9ad640aa38cbb4..0b8ce2b9f7d0c8c052b8f73df269798450263784 100644 (file)
@@ -252,7 +252,8 @@ struct btrfs_super_block {
 #define BTRFS_FEATURE_COMPAT_SAFE_CLEAR                0ULL
 
 #define BTRFS_FEATURE_COMPAT_RO_SUPP                   \
-       (BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)
+       (BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE |      \
+        BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE_VALID)
 
 #define BTRFS_FEATURE_COMPAT_RO_SAFE_SET       0ULL
 #define BTRFS_FEATURE_COMPAT_RO_SAFE_CLEAR     0ULL
index e720d3e6ec20e7179cb95ae81f0fc3f76b3e1029..3a57f99d96aa7aa0af541e328af203ff73acdb64 100644 (file)
@@ -2586,6 +2586,7 @@ int open_ctree(struct super_block *sb,
        int num_backups_tried = 0;
        int backup_index = 0;
        int max_active;
+       int clear_free_space_tree = 0;
 
        tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info, GFP_KERNEL);
        chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info, GFP_KERNEL);
@@ -3148,6 +3149,26 @@ retry_root_backup:
        if (sb->s_flags & MS_RDONLY)
                return 0;
 
+       if (btrfs_test_opt(fs_info, CLEAR_CACHE) &&
+           btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
+               clear_free_space_tree = 1;
+       } else if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) &&
+                  !btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID)) {
+               btrfs_warn(fs_info, "free space tree is invalid");
+               clear_free_space_tree = 1;
+       }
+
+       if (clear_free_space_tree) {
+               btrfs_info(fs_info, "clearing free space tree");
+               ret = btrfs_clear_free_space_tree(fs_info);
+               if (ret) {
+                       btrfs_warn(fs_info,
+                                  "failed to clear free space tree: %d", ret);
+                       close_ctree(tree_root);
+                       return ret;
+               }
+       }
+
        if (btrfs_test_opt(tree_root->fs_info, FREE_SPACE_TREE) &&
            !btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
                btrfs_info(fs_info, "creating free space tree");
@@ -3185,18 +3206,6 @@ retry_root_backup:
 
        btrfs_qgroup_rescan_resume(fs_info);
 
-       if (btrfs_test_opt(tree_root->fs_info, CLEAR_CACHE) &&
-           btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
-               btrfs_info(fs_info, "clearing free space tree");
-               ret = btrfs_clear_free_space_tree(fs_info);
-               if (ret) {
-                       btrfs_warn(fs_info,
-                               "failed to clear free space tree: %d", ret);
-                       close_ctree(tree_root);
-                       return ret;
-               }
-       }
-
        if (!fs_info->uuid_root) {
                btrfs_info(fs_info, "creating UUID tree");
                ret = btrfs_create_uuid_tree(fs_info);
index 210c94ac881888045b59a2f7a0d10f7c698cdebc..4607af38c72e100e6728ff41d8198140dd722d93 100644 (file)
@@ -2647,7 +2647,10 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
 
                btrfs_free_delayed_extent_op(extent_op);
                if (ret) {
+                       spin_lock(&delayed_refs->lock);
                        locked_ref->processing = 0;
+                       delayed_refs->num_heads_ready++;
+                       spin_unlock(&delayed_refs->lock);
                        btrfs_delayed_ref_unlock(locked_ref);
                        btrfs_put_delayed_ref(ref);
                        btrfs_debug(fs_info, "run_one_delayed_ref returned %d",
index ee40384c394d5647a8f9ee65e8c28f0cd32426fc..8ed05d95584a30c2c0cdc4feb1bd2036e93d3276 100644 (file)
@@ -5558,17 +5558,45 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
        }
 }
 
-/*
- * The extent buffer bitmap operations are done with byte granularity because
- * bitmap items are not guaranteed to be aligned to a word and therefore a
- * single word in a bitmap may straddle two pages in the extent buffer.
- */
-#define BIT_BYTE(nr) ((nr) / BITS_PER_BYTE)
-#define BYTE_MASK ((1 << BITS_PER_BYTE) - 1)
-#define BITMAP_FIRST_BYTE_MASK(start) \
-       ((BYTE_MASK << ((start) & (BITS_PER_BYTE - 1))) & BYTE_MASK)
-#define BITMAP_LAST_BYTE_MASK(nbits) \
-       (BYTE_MASK >> (-(nbits) & (BITS_PER_BYTE - 1)))
+void le_bitmap_set(u8 *map, unsigned int start, int len)
+{
+       u8 *p = map + BIT_BYTE(start);
+       const unsigned int size = start + len;
+       int bits_to_set = BITS_PER_BYTE - (start % BITS_PER_BYTE);
+       u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(start);
+
+       while (len - bits_to_set >= 0) {
+               *p |= mask_to_set;
+               len -= bits_to_set;
+               bits_to_set = BITS_PER_BYTE;
+               mask_to_set = ~0;
+               p++;
+       }
+       if (len) {
+               mask_to_set &= BITMAP_LAST_BYTE_MASK(size);
+               *p |= mask_to_set;
+       }
+}
+
+void le_bitmap_clear(u8 *map, unsigned int start, int len)
+{
+       u8 *p = map + BIT_BYTE(start);
+       const unsigned int size = start + len;
+       int bits_to_clear = BITS_PER_BYTE - (start % BITS_PER_BYTE);
+       u8 mask_to_clear = BITMAP_FIRST_BYTE_MASK(start);
+
+       while (len - bits_to_clear >= 0) {
+               *p &= ~mask_to_clear;
+               len -= bits_to_clear;
+               bits_to_clear = BITS_PER_BYTE;
+               mask_to_clear = ~0;
+               p++;
+       }
+       if (len) {
+               mask_to_clear &= BITMAP_LAST_BYTE_MASK(size);
+               *p &= ~mask_to_clear;
+       }
+}
 
 /*
  * eb_bitmap_offset() - calculate the page and offset of the byte containing the
@@ -5612,7 +5640,7 @@ static inline void eb_bitmap_offset(struct extent_buffer *eb,
 int extent_buffer_test_bit(struct extent_buffer *eb, unsigned long start,
                           unsigned long nr)
 {
-       char *kaddr;
+       u8 *kaddr;
        struct page *page;
        unsigned long i;
        size_t offset;
@@ -5634,13 +5662,13 @@ int extent_buffer_test_bit(struct extent_buffer *eb, unsigned long start,
 void extent_buffer_bitmap_set(struct extent_buffer *eb, unsigned long start,
                              unsigned long pos, unsigned long len)
 {
-       char *kaddr;
+       u8 *kaddr;
        struct page *page;
        unsigned long i;
        size_t offset;
        const unsigned int size = pos + len;
        int bits_to_set = BITS_PER_BYTE - (pos % BITS_PER_BYTE);
-       unsigned int mask_to_set = BITMAP_FIRST_BYTE_MASK(pos);
+       u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(pos);
 
        eb_bitmap_offset(eb, start, pos, &i, &offset);
        page = eb->pages[i];
@@ -5651,7 +5679,7 @@ void extent_buffer_bitmap_set(struct extent_buffer *eb, unsigned long start,
                kaddr[offset] |= mask_to_set;
                len -= bits_to_set;
                bits_to_set = BITS_PER_BYTE;
-               mask_to_set = ~0U;
+               mask_to_set = ~0;
                if (++offset >= PAGE_SIZE && len > 0) {
                        offset = 0;
                        page = eb->pages[++i];
@@ -5676,13 +5704,13 @@ void extent_buffer_bitmap_set(struct extent_buffer *eb, unsigned long start,
 void extent_buffer_bitmap_clear(struct extent_buffer *eb, unsigned long start,
                                unsigned long pos, unsigned long len)
 {
-       char *kaddr;
+       u8 *kaddr;
        struct page *page;
        unsigned long i;
        size_t offset;
        const unsigned int size = pos + len;
        int bits_to_clear = BITS_PER_BYTE - (pos % BITS_PER_BYTE);
-       unsigned int mask_to_clear = BITMAP_FIRST_BYTE_MASK(pos);
+       u8 mask_to_clear = BITMAP_FIRST_BYTE_MASK(pos);
 
        eb_bitmap_offset(eb, start, pos, &i, &offset);
        page = eb->pages[i];
@@ -5693,7 +5721,7 @@ void extent_buffer_bitmap_clear(struct extent_buffer *eb, unsigned long start,
                kaddr[offset] &= ~mask_to_clear;
                len -= bits_to_clear;
                bits_to_clear = BITS_PER_BYTE;
-               mask_to_clear = ~0U;
+               mask_to_clear = ~0;
                if (++offset >= PAGE_SIZE && len > 0) {
                        offset = 0;
                        page = eb->pages[++i];
index 4a094f1dc7ef98cb283009bc73ad6c9927c21145..ab31d145227edf423e15b7fb72d2094f402f7e56 100644 (file)
  */
 #define EXTENT_PAGE_PRIVATE 1
 
+/*
+ * The extent buffer bitmap operations are done with byte granularity instead of
+ * word granularity for two reasons:
+ * 1. The bitmaps must be little-endian on disk.
+ * 2. Bitmap items are not guaranteed to be aligned to a word and therefore a
+ *    single word in a bitmap may straddle two pages in the extent buffer.
+ */
+#define BIT_BYTE(nr) ((nr) / BITS_PER_BYTE)
+#define BYTE_MASK ((1 << BITS_PER_BYTE) - 1)
+#define BITMAP_FIRST_BYTE_MASK(start) \
+       ((BYTE_MASK << ((start) & (BITS_PER_BYTE - 1))) & BYTE_MASK)
+#define BITMAP_LAST_BYTE_MASK(nbits) \
+       (BYTE_MASK >> (-(nbits) & (BITS_PER_BYTE - 1)))
+
+static inline int le_test_bit(int nr, const u8 *addr)
+{
+       return 1U & (addr[BIT_BYTE(nr)] >> (nr & (BITS_PER_BYTE-1)));
+}
+
+extern void le_bitmap_set(u8 *map, unsigned int start, int len);
+extern void le_bitmap_clear(u8 *map, unsigned int start, int len);
+
 struct extent_state;
 struct btrfs_root;
 struct btrfs_io_bio;
index e4a42a8e4f849bf87399bc9ebd03d6989ce2b868..57401b474ec6f1d0f050a003bb345c50f58c867f 100644 (file)
@@ -151,7 +151,7 @@ static inline u32 free_space_bitmap_size(u64 size, u32 sectorsize)
        return DIV_ROUND_UP((u32)div_u64(size, sectorsize), BITS_PER_BYTE);
 }
 
-static unsigned long *alloc_bitmap(u32 bitmap_size)
+static u8 *alloc_bitmap(u32 bitmap_size)
 {
        void *mem;
 
@@ -180,8 +180,7 @@ int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
        struct btrfs_free_space_info *info;
        struct btrfs_key key, found_key;
        struct extent_buffer *leaf;
-       unsigned long *bitmap;
-       char *bitmap_cursor;
+       u8 *bitmap, *bitmap_cursor;
        u64 start, end;
        u64 bitmap_range, i;
        u32 bitmap_size, flags, expected_extent_count;
@@ -231,7 +230,7 @@ int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
                                                block_group->sectorsize);
                                last = div_u64(found_key.objectid + found_key.offset - start,
                                               block_group->sectorsize);
-                               bitmap_set(bitmap, first, last - first);
+                               le_bitmap_set(bitmap, first, last - first);
 
                                extent_count++;
                                nr++;
@@ -270,7 +269,7 @@ int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
                goto out;
        }
 
-       bitmap_cursor = (char *)bitmap;
+       bitmap_cursor = bitmap;
        bitmap_range = block_group->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS;
        i = start;
        while (i < end) {
@@ -319,7 +318,7 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
        struct btrfs_free_space_info *info;
        struct btrfs_key key, found_key;
        struct extent_buffer *leaf;
-       unsigned long *bitmap;
+       u8 *bitmap;
        u64 start, end;
        /* Initialize to silence GCC. */
        u64 extent_start = 0;
@@ -363,7 +362,7 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
                                break;
                        } else if (found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) {
                                unsigned long ptr;
-                               char *bitmap_cursor;
+                               u8 *bitmap_cursor;
                                u32 bitmap_pos, data_size;
 
                                ASSERT(found_key.objectid >= start);
@@ -373,7 +372,7 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
                                bitmap_pos = div_u64(found_key.objectid - start,
                                                     block_group->sectorsize *
                                                     BITS_PER_BYTE);
-                               bitmap_cursor = ((char *)bitmap) + bitmap_pos;
+                               bitmap_cursor = bitmap + bitmap_pos;
                                data_size = free_space_bitmap_size(found_key.offset,
                                                                   block_group->sectorsize);
 
@@ -410,7 +409,7 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
        offset = start;
        bitnr = 0;
        while (offset < end) {
-               bit = !!test_bit(bitnr, bitmap);
+               bit = !!le_test_bit(bitnr, bitmap);
                if (prev_bit == 0 && bit == 1) {
                        extent_start = offset;
                } else if (prev_bit == 1 && bit == 0) {
@@ -1185,6 +1184,7 @@ int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info)
        }
 
        btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE);
+       btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
        clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
 
        ret = btrfs_commit_transaction(trans, tree_root);
@@ -1253,6 +1253,7 @@ int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info)
                return PTR_ERR(trans);
 
        btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE);
+       btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
        fs_info->free_space_root = NULL;
 
        ret = clear_free_space_tree(trans, free_space_root);
index 2b790bda79988002403f6fe326dcc5535e9f079d..8e3a5a266917c0fac9da9f41ee080262f3394779 100644 (file)
@@ -4605,8 +4605,8 @@ delete:
                        BUG_ON(ret);
                        if (btrfs_should_throttle_delayed_refs(trans, root))
                                btrfs_async_run_delayed_refs(root,
-                                                            trans->transid,
-                                       trans->delayed_ref_updates * 2, 0);
+                                       trans->delayed_ref_updates * 2,
+                                       trans->transid, 0);
                        if (be_nice) {
                                if (truncate_space_check(trans, root,
                                                         extent_num_bytes)) {
@@ -8931,9 +8931,14 @@ again:
         *    So even we call qgroup_free_data(), it won't decrease reserved
         *    space.
         * 2) Not written to disk
-        *    This means the reserved space should be freed here.
+        *    This means the reserved space should be freed here. However,
+        *    if a truncate invalidates the page (by clearing PageDirty)
+        *    and the page is accounted for while allocating extent
+        *    in btrfs_check_data_free_space() we let delayed_ref to
+        *    free the entire extent.
         */
-       btrfs_qgroup_free_data(inode, page_start, PAGE_SIZE);
+       if (PageDirty(page))
+               btrfs_qgroup_free_data(inode, page_start, PAGE_SIZE);
        if (!inode_evicting) {
                clear_extent_bit(tree, page_start, page_end,
                                 EXTENT_LOCKED | EXTENT_DIRTY |
index 18e1aa0f85f5764aa28de59e148a96f49fdacf04..7acbd2cf6192ee8d967236f4b3aeecf1bfc98658 100644 (file)
@@ -3814,6 +3814,11 @@ process_slot:
                }
                btrfs_release_path(path);
                key.offset = next_key_min_offset;
+
+               if (fatal_signal_pending(current)) {
+                       ret = -EINTR;
+                       goto out;
+               }
        }
        ret = 0;
 
index 0ec8ffa37ab09dce21e6d2806c938cce6d4d17a7..c4af0cdb783d0e2ee203ad416abb745fcabd7e02 100644 (file)
@@ -2728,7 +2728,14 @@ static int do_relocation(struct btrfs_trans_handle *trans,
 
                bytenr = btrfs_node_blockptr(upper->eb, slot);
                if (lowest) {
-                       BUG_ON(bytenr != node->bytenr);
+                       if (bytenr != node->bytenr) {
+                               btrfs_err(root->fs_info,
+               "lowest leaf/node mismatch: bytenr %llu node->bytenr %llu slot %d upper %llu",
+                                         bytenr, node->bytenr, slot,
+                                         upper->eb->start);
+                               err = -EIO;
+                               goto next;
+                       }
                } else {
                        if (node->eb->start == bytenr)
                                goto next;
index 01bc36cec26ea132f215aed048bc4f3f5e4e80fd..71261b459863b92ea8e0dff40e99fc79b449286a 100644 (file)
@@ -5805,6 +5805,64 @@ static int changed_extent(struct send_ctx *sctx,
        int ret = 0;
 
        if (sctx->cur_ino != sctx->cmp_key->objectid) {
+
+               if (result == BTRFS_COMPARE_TREE_CHANGED) {
+                       struct extent_buffer *leaf_l;
+                       struct extent_buffer *leaf_r;
+                       struct btrfs_file_extent_item *ei_l;
+                       struct btrfs_file_extent_item *ei_r;
+
+                       leaf_l = sctx->left_path->nodes[0];
+                       leaf_r = sctx->right_path->nodes[0];
+                       ei_l = btrfs_item_ptr(leaf_l,
+                                             sctx->left_path->slots[0],
+                                             struct btrfs_file_extent_item);
+                       ei_r = btrfs_item_ptr(leaf_r,
+                                             sctx->right_path->slots[0],
+                                             struct btrfs_file_extent_item);
+
+                       /*
+                        * We may have found an extent item that has changed
+                        * only its disk_bytenr field and the corresponding
+                        * inode item was not updated. This case happens due to
+                        * very specific timings during relocation when a leaf
+                        * that contains file extent items is COWed while
+                        * relocation is ongoing and its in the stage where it
+                        * updates data pointers. So when this happens we can
+                        * safely ignore it since we know it's the same extent,
+                        * but just at different logical and physical locations
+                        * (when an extent is fully replaced with a new one, we
+                        * know the generation number must have changed too,
+                        * since snapshot creation implies committing the current
+                        * transaction, and the inode item must have been updated
+                        * as well).
+                        * This replacement of the disk_bytenr happens at
+                        * relocation.c:replace_file_extents() through
+                        * relocation.c:btrfs_reloc_cow_block().
+                        */
+                       if (btrfs_file_extent_generation(leaf_l, ei_l) ==
+                           btrfs_file_extent_generation(leaf_r, ei_r) &&
+                           btrfs_file_extent_ram_bytes(leaf_l, ei_l) ==
+                           btrfs_file_extent_ram_bytes(leaf_r, ei_r) &&
+                           btrfs_file_extent_compression(leaf_l, ei_l) ==
+                           btrfs_file_extent_compression(leaf_r, ei_r) &&
+                           btrfs_file_extent_encryption(leaf_l, ei_l) ==
+                           btrfs_file_extent_encryption(leaf_r, ei_r) &&
+                           btrfs_file_extent_other_encoding(leaf_l, ei_l) ==
+                           btrfs_file_extent_other_encoding(leaf_r, ei_r) &&
+                           btrfs_file_extent_type(leaf_l, ei_l) ==
+                           btrfs_file_extent_type(leaf_r, ei_r) &&
+                           btrfs_file_extent_disk_bytenr(leaf_l, ei_l) !=
+                           btrfs_file_extent_disk_bytenr(leaf_r, ei_r) &&
+                           btrfs_file_extent_disk_num_bytes(leaf_l, ei_l) ==
+                           btrfs_file_extent_disk_num_bytes(leaf_r, ei_r) &&
+                           btrfs_file_extent_offset(leaf_l, ei_l) ==
+                           btrfs_file_extent_offset(leaf_r, ei_r) &&
+                           btrfs_file_extent_num_bytes(leaf_l, ei_l) ==
+                           btrfs_file_extent_num_bytes(leaf_r, ei_r))
+                               return 0;
+               }
+
                inconsistent_snapshot_error(sctx, result, "extent");
                return -EIO;
        }
index d19ab0317283ca728963700c93cff334819eaf16..caad80bb9bd0116072af62a8aae96bac86db4c51 100644 (file)
@@ -273,20 +273,37 @@ out:
        return ret;
 }
 
-/**
- * test_bit_in_byte - Determine whether a bit is set in a byte
- * @nr: bit number to test
- * @addr: Address to start counting from
- */
-static inline int test_bit_in_byte(int nr, const u8 *addr)
+static int check_eb_bitmap(unsigned long *bitmap, struct extent_buffer *eb,
+                          unsigned long len)
 {
-       return 1UL & (addr[nr / BITS_PER_BYTE] >> (nr & (BITS_PER_BYTE - 1)));
+       unsigned long i;
+
+       for (i = 0; i < len * BITS_PER_BYTE; i++) {
+               int bit, bit1;
+
+               bit = !!test_bit(i, bitmap);
+               bit1 = !!extent_buffer_test_bit(eb, 0, i);
+               if (bit1 != bit) {
+                       test_msg("Bits do not match\n");
+                       return -EINVAL;
+               }
+
+               bit1 = !!extent_buffer_test_bit(eb, i / BITS_PER_BYTE,
+                                               i % BITS_PER_BYTE);
+               if (bit1 != bit) {
+                       test_msg("Offset bits do not match\n");
+                       return -EINVAL;
+               }
+       }
+       return 0;
 }
 
 static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb,
                             unsigned long len)
 {
-       unsigned long i, x;
+       unsigned long i, j;
+       u32 x;
+       int ret;
 
        memset(bitmap, 0, len);
        memset_extent_buffer(eb, 0, 0, len);
@@ -297,16 +314,18 @@ static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb,
 
        bitmap_set(bitmap, 0, len * BITS_PER_BYTE);
        extent_buffer_bitmap_set(eb, 0, 0, len * BITS_PER_BYTE);
-       if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) {
+       ret = check_eb_bitmap(bitmap, eb, len);
+       if (ret) {
                test_msg("Setting all bits failed\n");
-               return -EINVAL;
+               return ret;
        }
 
        bitmap_clear(bitmap, 0, len * BITS_PER_BYTE);
        extent_buffer_bitmap_clear(eb, 0, 0, len * BITS_PER_BYTE);
-       if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) {
+       ret = check_eb_bitmap(bitmap, eb, len);
+       if (ret) {
                test_msg("Clearing all bits failed\n");
-               return -EINVAL;
+               return ret;
        }
 
        /* Straddling pages test */
@@ -316,9 +335,10 @@ static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb,
                        sizeof(long) * BITS_PER_BYTE);
                extent_buffer_bitmap_set(eb, PAGE_SIZE - sizeof(long) / 2, 0,
                                        sizeof(long) * BITS_PER_BYTE);
-               if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) {
+               ret = check_eb_bitmap(bitmap, eb, len);
+               if (ret) {
                        test_msg("Setting straddling pages failed\n");
-                       return -EINVAL;
+                       return ret;
                }
 
                bitmap_set(bitmap, 0, len * BITS_PER_BYTE);
@@ -328,9 +348,10 @@ static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb,
                extent_buffer_bitmap_set(eb, 0, 0, len * BITS_PER_BYTE);
                extent_buffer_bitmap_clear(eb, PAGE_SIZE - sizeof(long) / 2, 0,
                                        sizeof(long) * BITS_PER_BYTE);
-               if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) {
+               ret = check_eb_bitmap(bitmap, eb, len);
+               if (ret) {
                        test_msg("Clearing straddling pages failed\n");
-                       return -EINVAL;
+                       return ret;
                }
        }
 
@@ -339,28 +360,22 @@ static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb,
         * something repetitive that could miss some hypothetical off-by-n bug.
         */
        x = 0;
-       for (i = 0; i < len / sizeof(long); i++) {
-               x = (0x19660dULL * (u64)x + 0x3c6ef35fULL) & 0xffffffffUL;
-               bitmap[i] = x;
-       }
-       write_extent_buffer(eb, bitmap, 0, len);
-
-       for (i = 0; i < len * BITS_PER_BYTE; i++) {
-               int bit, bit1;
-
-               bit = !!test_bit_in_byte(i, (u8 *)bitmap);
-               bit1 = !!extent_buffer_test_bit(eb, 0, i);
-               if (bit1 != bit) {
-                       test_msg("Testing bit pattern failed\n");
-                       return -EINVAL;
+       bitmap_clear(bitmap, 0, len * BITS_PER_BYTE);
+       extent_buffer_bitmap_clear(eb, 0, 0, len * BITS_PER_BYTE);
+       for (i = 0; i < len * BITS_PER_BYTE / 32; i++) {
+               x = (0x19660dULL * (u64)x + 0x3c6ef35fULL) & 0xffffffffU;
+               for (j = 0; j < 32; j++) {
+                       if (x & (1U << j)) {
+                               bitmap_set(bitmap, i * 32 + j, 1);
+                               extent_buffer_bitmap_set(eb, 0, i * 32 + j, 1);
+                       }
                }
+       }
 
-               bit1 = !!extent_buffer_test_bit(eb, i / BITS_PER_BYTE,
-                                               i % BITS_PER_BYTE);
-               if (bit1 != bit) {
-                       test_msg("Testing bit pattern with offset failed\n");
-                       return -EINVAL;
-               }
+       ret = check_eb_bitmap(bitmap, eb, len);
+       if (ret) {
+               test_msg("Random bit pattern failed\n");
+               return ret;
        }
 
        return 0;
index 7508d3b427804c0cb634b565e0b625e99a2c0dfc..6e144048a72eedb206e902bf71fe19d6c6555c0e 100644 (file)
 #include "../transaction.h"
 
 struct free_space_extent {
-       u64 start, length;
+       u64 start;
+       u64 length;
 };
 
-/*
- * The test cases align their operations to this in order to hit some of the
- * edge cases in the bitmap code.
- */
-#define BITMAP_RANGE (BTRFS_FREE_SPACE_BITMAP_BITS * PAGE_SIZE)
-
 static int __check_free_space_extents(struct btrfs_trans_handle *trans,
                                      struct btrfs_fs_info *fs_info,
                                      struct btrfs_block_group_cache *cache,
                                      struct btrfs_path *path,
-                                     struct free_space_extent *extents,
+                                     const struct free_space_extent * const extents,
                                      unsigned int num_extents)
 {
        struct btrfs_free_space_info *info;
@@ -126,7 +121,7 @@ static int check_free_space_extents(struct btrfs_trans_handle *trans,
                                    struct btrfs_fs_info *fs_info,
                                    struct btrfs_block_group_cache *cache,
                                    struct btrfs_path *path,
-                                   struct free_space_extent *extents,
+                                   const struct free_space_extent * const extents,
                                    unsigned int num_extents)
 {
        struct btrfs_free_space_info *info;
@@ -168,9 +163,10 @@ static int check_free_space_extents(struct btrfs_trans_handle *trans,
 static int test_empty_block_group(struct btrfs_trans_handle *trans,
                                  struct btrfs_fs_info *fs_info,
                                  struct btrfs_block_group_cache *cache,
-                                 struct btrfs_path *path)
+                                 struct btrfs_path *path,
+                                 u32 alignment)
 {
-       struct free_space_extent extents[] = {
+       const struct free_space_extent extents[] = {
                {cache->key.objectid, cache->key.offset},
        };
 
@@ -181,9 +177,10 @@ static int test_empty_block_group(struct btrfs_trans_handle *trans,
 static int test_remove_all(struct btrfs_trans_handle *trans,
                           struct btrfs_fs_info *fs_info,
                           struct btrfs_block_group_cache *cache,
-                          struct btrfs_path *path)
+                          struct btrfs_path *path,
+                          u32 alignment)
 {
-       struct free_space_extent extents[] = {};
+       const struct free_space_extent extents[] = {};
        int ret;
 
        ret = __remove_from_free_space_tree(trans, fs_info, cache, path,
@@ -201,16 +198,17 @@ static int test_remove_all(struct btrfs_trans_handle *trans,
 static int test_remove_beginning(struct btrfs_trans_handle *trans,
                                 struct btrfs_fs_info *fs_info,
                                 struct btrfs_block_group_cache *cache,
-                                struct btrfs_path *path)
+                                struct btrfs_path *path,
+                                u32 alignment)
 {
-       struct free_space_extent extents[] = {
-               {cache->key.objectid + BITMAP_RANGE,
-                       cache->key.offset - BITMAP_RANGE},
+       const struct free_space_extent extents[] = {
+               {cache->key.objectid + alignment,
+                       cache->key.offset - alignment},
        };
        int ret;
 
        ret = __remove_from_free_space_tree(trans, fs_info, cache, path,
-                                           cache->key.objectid, BITMAP_RANGE);
+                                           cache->key.objectid, alignment);
        if (ret) {
                test_msg("Could not remove free space\n");
                return ret;
@@ -224,17 +222,18 @@ static int test_remove_beginning(struct btrfs_trans_handle *trans,
 static int test_remove_end(struct btrfs_trans_handle *trans,
                           struct btrfs_fs_info *fs_info,
                           struct btrfs_block_group_cache *cache,
-                          struct btrfs_path *path)
+                          struct btrfs_path *path,
+                          u32 alignment)
 {
-       struct free_space_extent extents[] = {
-               {cache->key.objectid, cache->key.offset - BITMAP_RANGE},
+       const struct free_space_extent extents[] = {
+               {cache->key.objectid, cache->key.offset - alignment},
        };
        int ret;
 
        ret = __remove_from_free_space_tree(trans, fs_info, cache, path,
                                            cache->key.objectid +
-                                           cache->key.offset - BITMAP_RANGE,
-                                           BITMAP_RANGE);
+                                           cache->key.offset - alignment,
+                                           alignment);
        if (ret) {
                test_msg("Could not remove free space\n");
                return ret;
@@ -247,18 +246,19 @@ static int test_remove_end(struct btrfs_trans_handle *trans,
 static int test_remove_middle(struct btrfs_trans_handle *trans,
                              struct btrfs_fs_info *fs_info,
                              struct btrfs_block_group_cache *cache,
-                             struct btrfs_path *path)
+                             struct btrfs_path *path,
+                             u32 alignment)
 {
-       struct free_space_extent extents[] = {
-               {cache->key.objectid, BITMAP_RANGE},
-               {cache->key.objectid + 2 * BITMAP_RANGE,
-                       cache->key.offset - 2 * BITMAP_RANGE},
+       const struct free_space_extent extents[] = {
+               {cache->key.objectid, alignment},
+               {cache->key.objectid + 2 * alignment,
+                       cache->key.offset - 2 * alignment},
        };
        int ret;
 
        ret = __remove_from_free_space_tree(trans, fs_info, cache, path,
-                                           cache->key.objectid + BITMAP_RANGE,
-                                           BITMAP_RANGE);
+                                           cache->key.objectid + alignment,
+                                           alignment);
        if (ret) {
                test_msg("Could not remove free space\n");
                return ret;
@@ -271,10 +271,11 @@ static int test_remove_middle(struct btrfs_trans_handle *trans,
 static int test_merge_left(struct btrfs_trans_handle *trans,
                           struct btrfs_fs_info *fs_info,
                           struct btrfs_block_group_cache *cache,
-                          struct btrfs_path *path)
+                          struct btrfs_path *path,
+                          u32 alignment)
 {
-       struct free_space_extent extents[] = {
-               {cache->key.objectid, 2 * BITMAP_RANGE},
+       const struct free_space_extent extents[] = {
+               {cache->key.objectid, 2 * alignment},
        };
        int ret;
 
@@ -287,15 +288,15 @@ static int test_merge_left(struct btrfs_trans_handle *trans,
        }
 
        ret = __add_to_free_space_tree(trans, fs_info, cache, path,
-                                      cache->key.objectid, BITMAP_RANGE);
+                                      cache->key.objectid, alignment);
        if (ret) {
                test_msg("Could not add free space\n");
                return ret;
        }
 
        ret = __add_to_free_space_tree(trans, fs_info, cache, path,
-                                      cache->key.objectid + BITMAP_RANGE,
-                                      BITMAP_RANGE);
+                                      cache->key.objectid + alignment,
+                                      alignment);
        if (ret) {
                test_msg("Could not add free space\n");
                return ret;
@@ -308,10 +309,11 @@ static int test_merge_left(struct btrfs_trans_handle *trans,
 static int test_merge_right(struct btrfs_trans_handle *trans,
                           struct btrfs_fs_info *fs_info,
                           struct btrfs_block_group_cache *cache,
-                          struct btrfs_path *path)
+                          struct btrfs_path *path,
+                          u32 alignment)
 {
-       struct free_space_extent extents[] = {
-               {cache->key.objectid + BITMAP_RANGE, 2 * BITMAP_RANGE},
+       const struct free_space_extent extents[] = {
+               {cache->key.objectid + alignment, 2 * alignment},
        };
        int ret;
 
@@ -324,16 +326,16 @@ static int test_merge_right(struct btrfs_trans_handle *trans,
        }
 
        ret = __add_to_free_space_tree(trans, fs_info, cache, path,
-                                      cache->key.objectid + 2 * BITMAP_RANGE,
-                                      BITMAP_RANGE);
+                                      cache->key.objectid + 2 * alignment,
+                                      alignment);
        if (ret) {
                test_msg("Could not add free space\n");
                return ret;
        }
 
        ret = __add_to_free_space_tree(trans, fs_info, cache, path,
-                                      cache->key.objectid + BITMAP_RANGE,
-                                      BITMAP_RANGE);
+                                      cache->key.objectid + alignment,
+                                      alignment);
        if (ret) {
                test_msg("Could not add free space\n");
                return ret;
@@ -346,10 +348,11 @@ static int test_merge_right(struct btrfs_trans_handle *trans,
 static int test_merge_both(struct btrfs_trans_handle *trans,
                           struct btrfs_fs_info *fs_info,
                           struct btrfs_block_group_cache *cache,
-                          struct btrfs_path *path)
+                          struct btrfs_path *path,
+                          u32 alignment)
 {
-       struct free_space_extent extents[] = {
-               {cache->key.objectid, 3 * BITMAP_RANGE},
+       const struct free_space_extent extents[] = {
+               {cache->key.objectid, 3 * alignment},
        };
        int ret;
 
@@ -362,23 +365,23 @@ static int test_merge_both(struct btrfs_trans_handle *trans,
        }
 
        ret = __add_to_free_space_tree(trans, fs_info, cache, path,
-                                      cache->key.objectid, BITMAP_RANGE);
+                                      cache->key.objectid, alignment);
        if (ret) {
                test_msg("Could not add free space\n");
                return ret;
        }
 
        ret = __add_to_free_space_tree(trans, fs_info, cache, path,
-                                      cache->key.objectid + 2 * BITMAP_RANGE,
-                                      BITMAP_RANGE);
+                                      cache->key.objectid + 2 * alignment,
+                                      alignment);
        if (ret) {
                test_msg("Could not add free space\n");
                return ret;
        }
 
        ret = __add_to_free_space_tree(trans, fs_info, cache, path,
-                                      cache->key.objectid + BITMAP_RANGE,
-                                      BITMAP_RANGE);
+                                      cache->key.objectid + alignment,
+                                      alignment);
        if (ret) {
                test_msg("Could not add free space\n");
                return ret;
@@ -391,12 +394,13 @@ static int test_merge_both(struct btrfs_trans_handle *trans,
 static int test_merge_none(struct btrfs_trans_handle *trans,
                           struct btrfs_fs_info *fs_info,
                           struct btrfs_block_group_cache *cache,
-                          struct btrfs_path *path)
+                          struct btrfs_path *path,
+                          u32 alignment)
 {
-       struct free_space_extent extents[] = {
-               {cache->key.objectid, BITMAP_RANGE},
-               {cache->key.objectid + 2 * BITMAP_RANGE, BITMAP_RANGE},
-               {cache->key.objectid + 4 * BITMAP_RANGE, BITMAP_RANGE},
+       const struct free_space_extent extents[] = {
+               {cache->key.objectid, alignment},
+               {cache->key.objectid + 2 * alignment, alignment},
+               {cache->key.objectid + 4 * alignment, alignment},
        };
        int ret;
 
@@ -409,23 +413,23 @@ static int test_merge_none(struct btrfs_trans_handle *trans,
        }
 
        ret = __add_to_free_space_tree(trans, fs_info, cache, path,
-                                      cache->key.objectid, BITMAP_RANGE);
+                                      cache->key.objectid, alignment);
        if (ret) {
                test_msg("Could not add free space\n");
                return ret;
        }
 
        ret = __add_to_free_space_tree(trans, fs_info, cache, path,
-                                      cache->key.objectid + 4 * BITMAP_RANGE,
-                                      BITMAP_RANGE);
+                                      cache->key.objectid + 4 * alignment,
+                                      alignment);
        if (ret) {
                test_msg("Could not add free space\n");
                return ret;
        }
 
        ret = __add_to_free_space_tree(trans, fs_info, cache, path,
-                                      cache->key.objectid + 2 * BITMAP_RANGE,
-                                      BITMAP_RANGE);
+                                      cache->key.objectid + 2 * alignment,
+                                      alignment);
        if (ret) {
                test_msg("Could not add free space\n");
                return ret;
@@ -438,10 +442,11 @@ static int test_merge_none(struct btrfs_trans_handle *trans,
 typedef int (*test_func_t)(struct btrfs_trans_handle *,
                           struct btrfs_fs_info *,
                           struct btrfs_block_group_cache *,
-                          struct btrfs_path *);
+                          struct btrfs_path *,
+                          u32 alignment);
 
-static int run_test(test_func_t test_func, int bitmaps,
-               u32 sectorsize, u32 nodesize)
+static int run_test(test_func_t test_func, int bitmaps, u32 sectorsize,
+                   u32 nodesize, u32 alignment)
 {
        struct btrfs_fs_info *fs_info;
        struct btrfs_root *root = NULL;
@@ -480,7 +485,7 @@ static int run_test(test_func_t test_func, int bitmaps,
        btrfs_set_header_nritems(root->node, 0);
        root->alloc_bytenr += 2 * nodesize;
 
-       cache = btrfs_alloc_dummy_block_group(8 * BITMAP_RANGE, sectorsize);
+       cache = btrfs_alloc_dummy_block_group(8 * alignment, sectorsize);
        if (!cache) {
                test_msg("Couldn't allocate dummy block group cache\n");
                ret = -ENOMEM;
@@ -514,7 +519,7 @@ static int run_test(test_func_t test_func, int bitmaps,
                }
        }
 
-       ret = test_func(&trans, root->fs_info, cache, path);
+       ret = test_func(&trans, root->fs_info, cache, path, alignment);
        if (ret)
                goto out;
 
@@ -539,15 +544,27 @@ out:
        return ret;
 }
 
-static int run_test_both_formats(test_func_t test_func,
-       u32 sectorsize, u32 nodesize)
+static int run_test_both_formats(test_func_t test_func, u32 sectorsize,
+                                u32 nodesize, u32 alignment)
 {
+       int test_ret = 0;
        int ret;
 
-       ret = run_test(test_func, 0, sectorsize, nodesize);
-       if (ret)
-               return ret;
-       return run_test(test_func, 1, sectorsize, nodesize);
+       ret = run_test(test_func, 0, sectorsize, nodesize, alignment);
+       if (ret) {
+               test_msg("%pf failed with extents, sectorsize=%u, nodesize=%u, alignment=%u\n",
+                        test_func, sectorsize, nodesize, alignment);
+               test_ret = ret;
+       }
+
+       ret = run_test(test_func, 1, sectorsize, nodesize, alignment);
+       if (ret) {
+               test_msg("%pf failed with bitmaps, sectorsize=%u, nodesize=%u, alignment=%u\n",
+                        test_func, sectorsize, nodesize, alignment);
+               test_ret = ret;
+       }
+
+       return test_ret;
 }
 
 int btrfs_test_free_space_tree(u32 sectorsize, u32 nodesize)
@@ -563,18 +580,30 @@ int btrfs_test_free_space_tree(u32 sectorsize, u32 nodesize)
                test_merge_both,
                test_merge_none,
        };
+       u32 bitmap_alignment;
+       int test_ret = 0;
        int i;
 
+       /*
+        * Align some operations to a page to flush out bugs in the extent
+        * buffer bitmap handling of highmem.
+        */
+       bitmap_alignment = BTRFS_FREE_SPACE_BITMAP_BITS * PAGE_SIZE;
+
        test_msg("Running free space tree tests\n");
        for (i = 0; i < ARRAY_SIZE(tests); i++) {
-               int ret = run_test_both_formats(tests[i], sectorsize,
-                       nodesize);
-               if (ret) {
-                       test_msg("%pf : sectorsize %u failed\n",
-                               tests[i], sectorsize);
-                       return ret;
-               }
+               int ret;
+
+               ret = run_test_both_formats(tests[i], sectorsize, nodesize,
+                                           sectorsize);
+               if (ret)
+                       test_ret = ret;
+
+               ret = run_test_both_formats(tests[i], sectorsize, nodesize,
+                                           bitmap_alignment);
+               if (ret)
+                       test_ret = ret;
        }
 
-       return 0;
+       return test_ret;
 }
index 528cae123dc9ebaa4c27ea32bd92b05f7d8e2af5..3d33c4e41e5f9a38195436432e54314548076b0d 100644 (file)
@@ -2713,14 +2713,12 @@ static inline void btrfs_remove_all_log_ctxs(struct btrfs_root *root,
                                             int index, int error)
 {
        struct btrfs_log_ctx *ctx;
+       struct btrfs_log_ctx *safe;
 
-       if (!error) {
-               INIT_LIST_HEAD(&root->log_ctxs[index]);
-               return;
-       }
-
-       list_for_each_entry(ctx, &root->log_ctxs[index], list)
+       list_for_each_entry_safe(ctx, safe, &root->log_ctxs[index], list) {
+               list_del_init(&ctx->list);
                ctx->log_ret = error;
+       }
 
        INIT_LIST_HEAD(&root->log_ctxs[index]);
 }
@@ -2961,13 +2959,9 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
        mutex_unlock(&root->log_mutex);
 
 out_wake_log_root:
-       /*
-        * We needn't get log_mutex here because we are sure all
-        * the other tasks are blocked.
-        */
+       mutex_lock(&log_root_tree->log_mutex);
        btrfs_remove_all_log_ctxs(log_root_tree, index2, ret);
 
-       mutex_lock(&log_root_tree->log_mutex);
        log_root_tree->log_transid_committed++;
        atomic_set(&log_root_tree->log_commit[index2], 0);
        mutex_unlock(&log_root_tree->log_mutex);
@@ -2978,10 +2972,8 @@ out_wake_log_root:
        if (waitqueue_active(&log_root_tree->log_commit_wait[index2]))
                wake_up(&log_root_tree->log_commit_wait[index2]);
 out:
-       /* See above. */
-       btrfs_remove_all_log_ctxs(root, index1, ret);
-
        mutex_lock(&root->log_mutex);
+       btrfs_remove_all_log_ctxs(root, index1, ret);
        root->log_transid_committed++;
        atomic_set(&root->log_commit[index1], 0);
        mutex_unlock(&root->log_mutex);
index 7bf08825cc1107a20842533fd93b4f2c2d128577..f995e3528a33107bfa1caab3b09b2b5f5af8cf32 100644 (file)
@@ -1272,7 +1272,8 @@ again:
                statret = __ceph_do_getattr(inode, page,
                                            CEPH_STAT_CAP_INLINE_DATA, !!page);
                if (statret < 0) {
-                        __free_page(page);
+                       if (page)
+                               __free_page(page);
                        if (statret == -ENODATA) {
                                BUG_ON(retry_op != READ_INLINE);
                                goto again;
@@ -1769,7 +1770,6 @@ const struct file_operations ceph_file_fops = {
        .fsync = ceph_fsync,
        .lock = ceph_lock,
        .flock = ceph_flock,
-       .splice_read = generic_file_splice_read,
        .splice_write = iter_file_splice_write,
        .unlocked_ioctl = ceph_ioctl,
        .compat_ioctl   = ceph_ioctl,
index bca1b49c1c4b1bc8d2f83e4389a48c1f107de661..ef4d046473256009843b6b4c82e1b5b1e451b02d 100644 (file)
@@ -1511,7 +1511,8 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
                        ceph_fill_dirfrag(d_inode(parent), rinfo->dir_dir);
        }
 
-       if (ceph_frag_is_leftmost(frag) && req->r_readdir_offset == 2) {
+       if (ceph_frag_is_leftmost(frag) && req->r_readdir_offset == 2 &&
+           !(rinfo->hash_order && req->r_path2)) {
                /* note dir version at start of readdir so we can tell
                 * if any dentries get dropped */
                req->r_dir_release_cnt = atomic64_read(&ci->i_release_count);
index a29ffce981879d5fe46f3858ee90c2fd840f98c1..b382e5910eea8bf7cf7a4711fac79104e6466224 100644 (file)
@@ -845,6 +845,8 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc)
                err = ceph_fs_debugfs_init(fsc);
                if (err < 0)
                        goto fail;
+       } else {
+               root = dget(fsc->sb->s_root);
        }
 
        fsc->mount_state = CEPH_MOUNT_MOUNTED;
index 40b703217977df467aa4b0c4cdc2ffd68423bd30..febc28f9e2c27648e1621531e90cefc482ed2e5d 100644 (file)
@@ -16,7 +16,7 @@
 static int __remove_xattr(struct ceph_inode_info *ci,
                          struct ceph_inode_xattr *xattr);
 
-const struct xattr_handler ceph_other_xattr_handler;
+static const struct xattr_handler ceph_other_xattr_handler;
 
 /*
  * List of handlers for synthetic system.* attributes. Other
@@ -1086,7 +1086,7 @@ static int ceph_set_xattr_handler(const struct xattr_handler *handler,
        return __ceph_setxattr(inode, name, value, size, flags);
 }
 
-const struct xattr_handler ceph_other_xattr_handler = {
+static const struct xattr_handler ceph_other_xattr_handler = {
        .prefix = "",  /* match any name => handlers called with full name */
        .get = ceph_get_xattr_handler,
        .set = ceph_set_xattr_handler,
index 6c58e13fed2f1d647e9b4cc58e1fc3660aff95d3..3d03e48a92139a4f67feeb072048ee5649006481 100644 (file)
@@ -152,6 +152,7 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
        list_for_each(tmp1, &cifs_tcp_ses_list) {
                server = list_entry(tmp1, struct TCP_Server_Info,
                                    tcp_ses_list);
+               seq_printf(m, "\nNumber of credits: %d", server->credits);
                i++;
                list_for_each(tmp2, &server->smb_ses_list) {
                        ses = list_entry(tmp2, struct cifs_ses,
index 1418daa03d959f7144f6ae0f0eabb0fdcb1f8d34..07ed81cf1552e6ae97be753de0720741e3f80068 100644 (file)
@@ -49,6 +49,7 @@
 #define CIFS_MOUNT_USE_PREFIX_PATH 0x1000000 /* make subpath with unaccessible
                                              * root mountable
                                              */
+#define CIFS_MOUNT_UID_FROM_ACL 0x2000000 /* try to get UID via special SID */
 
 struct cifs_sb_info {
        struct rb_root tlink_tree;
index 0065256881d8382ccc9974bff9f76b100e140643..57ff0756e30c690d139b97b31f546c3037f7ef7c 100644 (file)
@@ -36,7 +36,15 @@ struct smb_mnt_fs_info {
        __u64   cifs_posix_caps;
 } __packed;
 
+struct smb_snapshot_array {
+       __u32   number_of_snapshots;
+       __u32   number_of_snapshots_returned;
+       __u32   snapshot_array_size;
+       /*      snapshots[]; */
+} __packed;
+
 #define CIFS_IOCTL_MAGIC       0xCF
 #define CIFS_IOC_COPYCHUNK_FILE        _IOW(CIFS_IOCTL_MAGIC, 3, int)
 #define CIFS_IOC_SET_INTEGRITY  _IO(CIFS_IOCTL_MAGIC, 4)
 #define CIFS_IOC_GET_MNT_INFO _IOR(CIFS_IOCTL_MAGIC, 5, struct smb_mnt_fs_info)
+#define CIFS_ENUMERATE_SNAPSHOTS _IOR(CIFS_IOCTL_MAGIC, 6, struct smb_snapshot_array)
index 71e8a56e9479567e9f5ad0fb201a2ea3e91d6e99..15bac390dff945d7fa5d785f666b2b0291fb9f65 100644 (file)
@@ -42,6 +42,35 @@ static const struct cifs_sid sid_authusers = {
 /* group users */
 static const struct cifs_sid sid_user = {1, 2 , {0, 0, 0, 0, 0, 5}, {} };
 
+/* S-1-22-1 Unmapped Unix users */
+static const struct cifs_sid sid_unix_users = {1, 1, {0, 0, 0, 0, 0, 22},
+               {cpu_to_le32(1), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} };
+
+/* S-1-22-2 Unmapped Unix groups */
+static const struct cifs_sid sid_unix_groups = { 1, 1, {0, 0, 0, 0, 0, 22},
+               {cpu_to_le32(2), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} };
+
+/*
+ * See http://technet.microsoft.com/en-us/library/hh509017(v=ws.10).aspx
+ */
+
+/* S-1-5-88 MS NFS and Apple style UID/GID/mode */
+
+/* S-1-5-88-1 Unix uid */
+static const struct cifs_sid sid_unix_NFS_users = { 1, 2, {0, 0, 0, 0, 0, 5},
+       {cpu_to_le32(88),
+        cpu_to_le32(1), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} };
+
+/* S-1-5-88-2 Unix gid */
+static const struct cifs_sid sid_unix_NFS_groups = { 1, 2, {0, 0, 0, 0, 0, 5},
+       {cpu_to_le32(88),
+        cpu_to_le32(2), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} };
+
+/* S-1-5-88-3 Unix mode */
+static const struct cifs_sid sid_unix_NFS_mode = { 1, 2, {0, 0, 0, 0, 0, 5},
+       {cpu_to_le32(88),
+        cpu_to_le32(3), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} };
+
 static const struct cred *root_cred;
 
 static int
@@ -183,6 +212,62 @@ compare_sids(const struct cifs_sid *ctsid, const struct cifs_sid *cwsid)
        return 0; /* sids compare/match */
 }
 
+static bool
+is_well_known_sid(const struct cifs_sid *psid, uint32_t *puid, bool is_group)
+{
+       int i;
+       int num_subauth;
+       const struct cifs_sid *pwell_known_sid;
+
+       if (!psid || (puid == NULL))
+               return false;
+
+       num_subauth = psid->num_subauth;
+
+       /* check if Mac (or Windows NFS) vs. Samba format for Unix owner SID */
+       if (num_subauth == 2) {
+               if (is_group)
+                       pwell_known_sid = &sid_unix_groups;
+               else
+                       pwell_known_sid = &sid_unix_users;
+       } else if (num_subauth == 3) {
+               if (is_group)
+                       pwell_known_sid = &sid_unix_NFS_groups;
+               else
+                       pwell_known_sid = &sid_unix_NFS_users;
+       } else
+               return false;
+
+       /* compare the revision */
+       if (psid->revision != pwell_known_sid->revision)
+               return false;
+
+       /* compare all of the six auth values */
+       for (i = 0; i < NUM_AUTHS; ++i) {
+               if (psid->authority[i] != pwell_known_sid->authority[i]) {
+                       cifs_dbg(FYI, "auth %d did not match\n", i);
+                       return false;
+               }
+       }
+
+       if (num_subauth == 2) {
+               if (psid->sub_auth[0] != pwell_known_sid->sub_auth[0])
+                       return false;
+
+               *puid = le32_to_cpu(psid->sub_auth[1]);
+       } else /* 3 subauths, ie Windows/Mac style */ {
+               *puid = le32_to_cpu(psid->sub_auth[0]);
+               if ((psid->sub_auth[0] != pwell_known_sid->sub_auth[0]) ||
+                   (psid->sub_auth[1] != pwell_known_sid->sub_auth[1]))
+                       return false;
+
+               *puid = le32_to_cpu(psid->sub_auth[2]);
+       }
+
+       cifs_dbg(FYI, "Unix UID %d returned from SID\n", *puid);
+       return true; /* well known sid found, uid returned */
+}
+
 static void
 cifs_copy_sid(struct cifs_sid *dst, const struct cifs_sid *src)
 {
@@ -276,6 +361,43 @@ sid_to_id(struct cifs_sb_info *cifs_sb, struct cifs_sid *psid,
                return -EIO;
        }
 
+       if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UID_FROM_ACL) {
+               uint32_t unix_id;
+               bool is_group;
+
+               if (sidtype != SIDOWNER)
+                       is_group = true;
+               else
+                       is_group = false;
+
+               if (is_well_known_sid(psid, &unix_id, is_group) == false)
+                       goto try_upcall_to_get_id;
+
+               if (is_group) {
+                       kgid_t gid;
+                       gid_t id;
+
+                       id = (gid_t)unix_id;
+                       gid = make_kgid(&init_user_ns, id);
+                       if (gid_valid(gid)) {
+                               fgid = gid;
+                               goto got_valid_id;
+                       }
+               } else {
+                       kuid_t uid;
+                       uid_t id;
+
+                       id = (uid_t)unix_id;
+                       uid = make_kuid(&init_user_ns, id);
+                       if (uid_valid(uid)) {
+                               fuid = uid;
+                               goto got_valid_id;
+                       }
+               }
+               /* If unable to find uid/gid easily from SID try via upcall */
+       }
+
+try_upcall_to_get_id:
        sidstr = sid_to_key_str(psid, sidtype);
        if (!sidstr)
                return -ENOMEM;
@@ -329,6 +451,7 @@ out_revert_creds:
         * Note that we return 0 here unconditionally. If the mapping
         * fails then we just fall back to using the mnt_uid/mnt_gid.
         */
+got_valid_id:
        if (sidtype == SIDOWNER)
                fattr->cf_uid = fuid;
        else
index cca04e710421d83453dab921a01c881d1c35ebce..15261ba464c5023f82014751fee8efaf09abec5f 100644 (file)
@@ -64,15 +64,15 @@ unsigned int global_secflags = CIFSSEC_DEF;
 unsigned int sign_CIFS_PDUs = 1;
 static const struct super_operations cifs_super_ops;
 unsigned int CIFSMaxBufSize = CIFS_MAX_MSGSIZE;
-module_param(CIFSMaxBufSize, uint, 0);
+module_param(CIFSMaxBufSize, uint, 0444);
 MODULE_PARM_DESC(CIFSMaxBufSize, "Network buffer size (not including header). "
                                 "Default: 16384 Range: 8192 to 130048");
 unsigned int cifs_min_rcv = CIFS_MIN_RCV_POOL;
-module_param(cifs_min_rcv, uint, 0);
+module_param(cifs_min_rcv, uint, 0444);
 MODULE_PARM_DESC(cifs_min_rcv, "Network buffers in pool. Default: 4 Range: "
                                "1 to 64");
 unsigned int cifs_min_small = 30;
-module_param(cifs_min_small, uint, 0);
+module_param(cifs_min_small, uint, 0444);
 MODULE_PARM_DESC(cifs_min_small, "Small network buffers in pool. Default: 30 "
                                 "Range: 2 to 256");
 unsigned int cifs_max_pending = CIFS_MAX_REQ;
@@ -271,7 +271,7 @@ cifs_alloc_inode(struct super_block *sb)
        cifs_inode->createtime = 0;
        cifs_inode->epoch = 0;
 #ifdef CONFIG_CIFS_SMB2
-       get_random_bytes(cifs_inode->lease_key, SMB2_LEASE_KEY_SIZE);
+       generate_random_uuid(cifs_inode->lease_key);
 #endif
        /*
         * Can not set i_flags here - they get immediately overwritten to zero
@@ -469,6 +469,8 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
                seq_puts(s, ",posixpaths");
        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID)
                seq_puts(s, ",setuids");
+       if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UID_FROM_ACL)
+               seq_puts(s, ",idsfromsid");
        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)
                seq_puts(s, ",serverino");
        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
@@ -1262,7 +1264,6 @@ init_cifs(void)
        GlobalTotalActiveXid = 0;
        GlobalMaxActiveXid = 0;
        spin_lock_init(&cifs_tcp_ses_lock);
-       spin_lock_init(&cifs_file_list_lock);
        spin_lock_init(&GlobalMid_Lock);
 
        get_random_bytes(&cifs_lock_secret, sizeof(cifs_lock_secret));
index 8f1d8c1e72bece412a926d0e20b9bdc95a7fa9ee..1f17f6bd7a601c56e40a6393c25eaee892e40a0f 100644 (file)
 #define SMB_ECHO_INTERVAL_MAX 600
 #define SMB_ECHO_INTERVAL_DEFAULT 60
 
+/*
+ * Default number of credits to keep available for SMB3.
+ * This value is chosen somewhat arbitrarily. The Windows client
+ * defaults to 128 credits, the Windows server allows clients up to
+ * 512 credits (or 8K for later versions), and the NetApp server
+ * does not limit clients at all.  Choose a high enough default value
+ * such that the client shouldn't limit performance, but allow mount
+ * to override (until you approach 64K, where we limit credits to 65000
+ * to reduce possibility of seeing more server credit overflow bugs.
+ */
+#define SMB2_MAX_CREDITS_AVAILABLE 32000
+
 #include "cifspdu.h"
 
 #ifndef XATTR_DOS_ATTRIB
@@ -376,6 +388,8 @@ struct smb_version_operations {
        int (*calc_signature)(struct smb_rqst *, struct TCP_Server_Info *);
        int (*set_integrity)(const unsigned int, struct cifs_tcon *tcon,
                             struct cifsFileInfo *src_file);
+       int (*enum_snapshots)(const unsigned int xid, struct cifs_tcon *tcon,
+                            struct cifsFileInfo *src_file, void __user *);
        int (*query_mf_symlink)(unsigned int, struct cifs_tcon *,
                                struct cifs_sb_info *, const unsigned char *,
                                char *, unsigned int *);
@@ -464,6 +478,7 @@ struct smb_vol {
        bool retry:1;
        bool intr:1;
        bool setuids:1;
+       bool setuidfromacl:1;
        bool override_uid:1;
        bool override_gid:1;
        bool dynperm:1;
@@ -510,6 +525,7 @@ struct smb_vol {
        struct sockaddr_storage srcaddr; /* allow binding to a local IP */
        struct nls_table *local_nls;
        unsigned int echo_interval; /* echo interval in secs */
+       unsigned int max_credits; /* smb3 max_credits 10 < credits < 60000 */
 };
 
 #define CIFS_MOUNT_MASK (CIFS_MOUNT_NO_PERM | CIFS_MOUNT_SET_UID | \
@@ -567,7 +583,8 @@ struct TCP_Server_Info {
        bool noblocksnd;                /* use blocking sendmsg */
        bool noautotune;                /* do not autotune send buf sizes */
        bool tcp_nodelay;
-       int credits;  /* send no more requests at once */
+       unsigned int credits;  /* send no more requests at once */
+       unsigned int max_credits; /* can override large 32000 default at mnt */
        unsigned int in_flight;  /* number of requests on the wire to server */
        spinlock_t req_lock;  /* protect the two values above */
        struct mutex srv_mutex;
@@ -833,6 +850,7 @@ struct cifs_tcon {
        struct list_head tcon_list;
        int tc_count;
        struct list_head openFileList;
+       spinlock_t open_file_lock; /* protects list above */
        struct cifs_ses *ses;   /* pointer to session associated with */
        char treeName[MAX_TREE_SIZE + 1]; /* UNC name of resource in ASCII */
        char *nativeFileSystem;
@@ -889,7 +907,7 @@ struct cifs_tcon {
 #endif /* CONFIG_CIFS_STATS2 */
        __u64    bytes_read;
        __u64    bytes_written;
-       spinlock_t stat_lock;
+       spinlock_t stat_lock;  /* protects the two fields above */
 #endif /* CONFIG_CIFS_STATS */
        FILE_SYSTEM_DEVICE_INFO fsDevInfo;
        FILE_SYSTEM_ATTRIBUTE_INFO fsAttrInfo; /* ok if fs name truncated */
@@ -1040,20 +1058,24 @@ struct cifs_fid_locks {
 };
 
 struct cifsFileInfo {
+       /* following two lists are protected by tcon->open_file_lock */
        struct list_head tlist; /* pointer to next fid owned by tcon */
        struct list_head flist; /* next fid (file instance) for this inode */
+       /* lock list below protected by cifsi->lock_sem */
        struct cifs_fid_locks *llist;   /* brlocks held by this fid */
        kuid_t uid;             /* allows finding which FileInfo structure */
        __u32 pid;              /* process id who opened file */
        struct cifs_fid fid;    /* file id from remote */
+       struct list_head rlist; /* reconnect list */
        /* BB add lock scope info here if needed */ ;
        /* lock scope id (0 if none) */
        struct dentry *dentry;
-       unsigned int f_flags;
        struct tcon_link *tlink;
+       unsigned int f_flags;
        bool invalidHandle:1;   /* file closed via session abend */
        bool oplock_break_cancelled:1;
-       int count;              /* refcount protected by cifs_file_list_lock */
+       int count;
+       spinlock_t file_info_lock; /* protects four flag/count fields above */
        struct mutex fh_mutex; /* prevents reopen race after dead ses*/
        struct cifs_search_info srch_inf;
        struct work_struct oplock_break; /* work for oplock breaks */
@@ -1120,7 +1142,7 @@ struct cifs_writedata {
 
 /*
  * Take a reference on the file private data. Must be called with
- * cifs_file_list_lock held.
+ * cfile->file_info_lock held.
  */
 static inline void
 cifsFileInfo_get_locked(struct cifsFileInfo *cifs_file)
@@ -1514,8 +1536,10 @@ require use of the stronger protocol */
  *  GlobalMid_Lock protects:
  *     list operations on pending_mid_q and oplockQ
  *      updates to XID counters, multiplex id  and SMB sequence numbers
- *  cifs_file_list_lock protects:
- *     list operations on tcp and SMB session lists and tCon lists
+ *  tcp_ses_lock protects:
+ *     list operations on tcp and SMB session lists
+ *  tcon->open_file_lock protects the list of open files hanging off the tcon
+ *  cfile->file_info_lock protects counters and fields in cifs file struct
  *  f_owner.lock protects certain per file struct operations
  *  mapping->page_lock protects certain per page operations
  *
@@ -1547,18 +1571,12 @@ GLOBAL_EXTERN struct list_head          cifs_tcp_ses_list;
  * tcp session, and the list of tcon's per smb session. It also protects
  * the reference counters for the server, smb session, and tcon. Finally,
  * changes to the tcon->tidStatus should be done while holding this lock.
+ * generally the locks should be taken in order tcp_ses_lock before
+ * tcon->open_file_lock and that before file->file_info_lock since the
+ * structure order is cifs_socket-->cifs_ses-->cifs_tcon-->cifs_file
  */
 GLOBAL_EXTERN spinlock_t               cifs_tcp_ses_lock;
 
-/*
- * This lock protects the cifs_file->llist and cifs_file->flist
- * list operations, and updates to some flags (cifs_file->invalidHandle)
- * It will be moved to either use the tcon->stat_lock or equivalent later.
- * If cifs_tcp_ses_lock and the lock below are both needed to be held, then
- * the cifs_tcp_ses_lock must be grabbed first and released last.
- */
-GLOBAL_EXTERN spinlock_t       cifs_file_list_lock;
-
 #ifdef CONFIG_CIFS_DNOTIFY_EXPERIMENTAL /* unused temporarily */
 /* Outstanding dir notify requests */
 GLOBAL_EXTERN struct list_head GlobalDnotifyReqList;
index 4ead72a001f974d4e3b18dedf7a2466f4426c0e9..ced0e42ce460963104d89bb8b941b899ceb33dea 100644 (file)
@@ -193,6 +193,8 @@ extern struct smb_vol *cifs_get_volume_info(char *mount_data,
 extern int cifs_mount(struct cifs_sb_info *, struct smb_vol *);
 extern void cifs_umount(struct cifs_sb_info *);
 extern void cifs_mark_open_files_invalid(struct cifs_tcon *tcon);
+extern void cifs_reopen_persistent_handles(struct cifs_tcon *tcon);
+
 extern bool cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset,
                                    __u64 length, __u8 type,
                                    struct cifsLockInfo **conf_lock,
index f82d2823622f334cf83e677c042ead92a0379b0a..3f3185febc585f93fb9f88a0b0e96ec49b636f4a 100644 (file)
@@ -98,13 +98,13 @@ cifs_mark_open_files_invalid(struct cifs_tcon *tcon)
        struct list_head *tmp1;
 
        /* list all files open on tree connection and mark them invalid */
-       spin_lock(&cifs_file_list_lock);
+       spin_lock(&tcon->open_file_lock);
        list_for_each_safe(tmp, tmp1, &tcon->openFileList) {
                open_file = list_entry(tmp, struct cifsFileInfo, tlist);
                open_file->invalidHandle = true;
                open_file->oplock_break_cancelled = true;
        }
-       spin_unlock(&cifs_file_list_lock);
+       spin_unlock(&tcon->open_file_lock);
        /*
         * BB Add call to invalidate_inodes(sb) for all superblocks mounted
         * to this tcon.
index 2e4f4bad8b1e9bf75cc76074914b8c2c8f63a3ff..aab5227979e2ee27c1858b47f9bf33c275f27633 100644 (file)
@@ -63,7 +63,6 @@ extern mempool_t *cifs_req_poolp;
 #define TLINK_IDLE_EXPIRE      (600 * HZ)
 
 enum {
-
        /* Mount options that take no arguments */
        Opt_user_xattr, Opt_nouser_xattr,
        Opt_forceuid, Opt_noforceuid,
@@ -76,7 +75,7 @@ enum {
        Opt_noposixpaths, Opt_nounix,
        Opt_nocase,
        Opt_brl, Opt_nobrl,
-       Opt_forcemandatorylock, Opt_setuids,
+       Opt_forcemandatorylock, Opt_setuidfromacl, Opt_setuids,
        Opt_nosetuids, Opt_dynperm, Opt_nodynperm,
        Opt_nohard, Opt_nosoft,
        Opt_nointr, Opt_intr,
@@ -95,7 +94,7 @@ enum {
        Opt_cruid, Opt_gid, Opt_file_mode,
        Opt_dirmode, Opt_port,
        Opt_rsize, Opt_wsize, Opt_actimeo,
-       Opt_echo_interval,
+       Opt_echo_interval, Opt_max_credits,
 
        /* Mount options which take string value */
        Opt_user, Opt_pass, Opt_ip,
@@ -148,6 +147,7 @@ static const match_table_t cifs_mount_option_tokens = {
        { Opt_forcemandatorylock, "forcemand" },
        { Opt_setuids, "setuids" },
        { Opt_nosetuids, "nosetuids" },
+       { Opt_setuidfromacl, "idsfromsid" },
        { Opt_dynperm, "dynperm" },
        { Opt_nodynperm, "nodynperm" },
        { Opt_nohard, "nohard" },
@@ -190,6 +190,7 @@ static const match_table_t cifs_mount_option_tokens = {
        { Opt_wsize, "wsize=%s" },
        { Opt_actimeo, "actimeo=%s" },
        { Opt_echo_interval, "echo_interval=%s" },
+       { Opt_max_credits, "max_credits=%s" },
 
        { Opt_blank_user, "user=" },
        { Opt_blank_user, "username=" },
@@ -1376,6 +1377,9 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
                case Opt_nosetuids:
                        vol->setuids = 0;
                        break;
+               case Opt_setuidfromacl:
+                       vol->setuidfromacl = 1;
+                       break;
                case Opt_dynperm:
                        vol->dynperm = true;
                        break;
@@ -1586,6 +1590,15 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
                        }
                        vol->echo_interval = option;
                        break;
+               case Opt_max_credits:
+                       if (get_option_ul(args, &option) || (option < 20) ||
+                           (option > 60000)) {
+                               cifs_dbg(VFS, "%s: Invalid max_credits value\n",
+                                        __func__);
+                               goto cifs_parse_mount_err;
+                       }
+                       vol->max_credits = option;
+                       break;
 
                /* String Arguments */
 
@@ -2163,7 +2176,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
        memcpy(&tcp_ses->dstaddr, &volume_info->dstaddr,
                sizeof(tcp_ses->dstaddr));
 #ifdef CONFIG_CIFS_SMB2
-       get_random_bytes(tcp_ses->client_guid, SMB2_CLIENT_GUID_SIZE);
+       generate_random_uuid(tcp_ses->client_guid);
 #endif
        /*
         * at this point we are the only ones with the pointer
@@ -3270,6 +3283,8 @@ int cifs_setup_cifs_sb(struct smb_vol *pvolume_info,
                cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_PERM;
        if (pvolume_info->setuids)
                cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_SET_UID;
+       if (pvolume_info->setuidfromacl)
+               cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_UID_FROM_ACL;
        if (pvolume_info->server_ino)
                cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_SERVER_INUM;
        if (pvolume_info->remap)
@@ -3598,7 +3613,11 @@ try_mount_again:
                bdi_destroy(&cifs_sb->bdi);
                goto out;
        }
-
+       if ((volume_info->max_credits < 20) ||
+            (volume_info->max_credits > 60000))
+               server->max_credits = SMB2_MAX_CREDITS_AVAILABLE;
+       else
+               server->max_credits = volume_info->max_credits;
        /* get a reference to a SMB session */
        ses = cifs_get_smb_ses(server, volume_info);
        if (IS_ERR(ses)) {
@@ -3688,14 +3707,16 @@ remote_path_check:
                        goto mount_fail_check;
                }
 
-               rc = cifs_are_all_path_components_accessible(server,
+               if (rc != -EREMOTE) {
+                       rc = cifs_are_all_path_components_accessible(server,
                                                             xid, tcon, cifs_sb,
                                                             full_path);
-               if (rc != 0) {
-                       cifs_dbg(VFS, "cannot query dirs between root and final path, "
-                                "enabling CIFS_MOUNT_USE_PREFIX_PATH\n");
-                       cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_USE_PREFIX_PATH;
-                       rc = 0;
+                       if (rc != 0) {
+                               cifs_dbg(VFS, "cannot query dirs between root and final path, "
+                                        "enabling CIFS_MOUNT_USE_PREFIX_PATH\n");
+                               cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_USE_PREFIX_PATH;
+                               rc = 0;
+                       }
                }
                kfree(full_path);
        }
index a95fe8b1afe93de6b091ce2015cd6f9dbb2f3f23..7f5f6176c6f15caff307e078320122141c119ab9 100644 (file)
@@ -305,6 +305,7 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
        cfile->tlink = cifs_get_tlink(tlink);
        INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
        mutex_init(&cfile->fh_mutex);
+       spin_lock_init(&cfile->file_info_lock);
 
        cifs_sb_active(inode->i_sb);
 
@@ -317,7 +318,7 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
                oplock = 0;
        }
 
-       spin_lock(&cifs_file_list_lock);
+       spin_lock(&tcon->open_file_lock);
        if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
                oplock = fid->pending_open->oplock;
        list_del(&fid->pending_open->olist);
@@ -326,12 +327,13 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
        server->ops->set_fid(cfile, fid, oplock);
 
        list_add(&cfile->tlist, &tcon->openFileList);
+
        /* if readable file instance put first in list*/
        if (file->f_mode & FMODE_READ)
                list_add(&cfile->flist, &cinode->openFileList);
        else
                list_add_tail(&cfile->flist, &cinode->openFileList);
-       spin_unlock(&cifs_file_list_lock);
+       spin_unlock(&tcon->open_file_lock);
 
        if (fid->purge_cache)
                cifs_zap_mapping(inode);
@@ -343,16 +345,16 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
 struct cifsFileInfo *
 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
 {
-       spin_lock(&cifs_file_list_lock);
+       spin_lock(&cifs_file->file_info_lock);
        cifsFileInfo_get_locked(cifs_file);
-       spin_unlock(&cifs_file_list_lock);
+       spin_unlock(&cifs_file->file_info_lock);
        return cifs_file;
 }
 
 /*
  * Release a reference on the file private data. This may involve closing
  * the filehandle out on the server. Must be called without holding
- * cifs_file_list_lock.
+ * tcon->open_file_lock and cifs_file->file_info_lock.
  */
 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
 {
@@ -367,11 +369,15 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
        struct cifs_pending_open open;
        bool oplock_break_cancelled;
 
-       spin_lock(&cifs_file_list_lock);
+       spin_lock(&tcon->open_file_lock);
+
+       spin_lock(&cifs_file->file_info_lock);
        if (--cifs_file->count > 0) {
-               spin_unlock(&cifs_file_list_lock);
+               spin_unlock(&cifs_file->file_info_lock);
+               spin_unlock(&tcon->open_file_lock);
                return;
        }
+       spin_unlock(&cifs_file->file_info_lock);
 
        if (server->ops->get_lease_key)
                server->ops->get_lease_key(inode, &fid);
@@ -395,7 +401,8 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
                        set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
                cifs_set_oplock_level(cifsi, 0);
        }
-       spin_unlock(&cifs_file_list_lock);
+
+       spin_unlock(&tcon->open_file_lock);
 
        oplock_break_cancelled = cancel_work_sync(&cifs_file->oplock_break);
 
@@ -732,6 +739,15 @@ reopen_success:
         * to the server to get the new inode info.
         */
 
+       /*
+        * If the server returned a read oplock and we have mandatory brlocks,
+        * set oplock level to None.
+        */
+       if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
+               cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
+               oplock = 0;
+       }
+
        server->ops->set_fid(cfile, &cfile->fid, oplock);
        if (oparms.reconnect)
                cifs_relock_file(cfile);
@@ -753,6 +769,36 @@ int cifs_close(struct inode *inode, struct file *file)
        return 0;
 }
 
+void
+cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
+{
+       struct cifsFileInfo *open_file;
+       struct list_head *tmp;
+       struct list_head *tmp1;
+       struct list_head tmp_list;
+
+       cifs_dbg(FYI, "Reopen persistent handles");
+       INIT_LIST_HEAD(&tmp_list);
+
+       /* list all files open on tree connection, reopen resilient handles  */
+       spin_lock(&tcon->open_file_lock);
+       list_for_each(tmp, &tcon->openFileList) {
+               open_file = list_entry(tmp, struct cifsFileInfo, tlist);
+               if (!open_file->invalidHandle)
+                       continue;
+               cifsFileInfo_get(open_file);
+               list_add_tail(&open_file->rlist, &tmp_list);
+       }
+       spin_unlock(&tcon->open_file_lock);
+
+       list_for_each_safe(tmp, tmp1, &tmp_list) {
+               open_file = list_entry(tmp, struct cifsFileInfo, rlist);
+               cifs_reopen_file(open_file, false /* do not flush */);
+               list_del_init(&open_file->rlist);
+               cifsFileInfo_put(open_file);
+       }
+}
+
 int cifs_closedir(struct inode *inode, struct file *file)
 {
        int rc = 0;
@@ -772,10 +818,10 @@ int cifs_closedir(struct inode *inode, struct file *file)
        server = tcon->ses->server;
 
        cifs_dbg(FYI, "Freeing private data in close dir\n");
-       spin_lock(&cifs_file_list_lock);
+       spin_lock(&cfile->file_info_lock);
        if (server->ops->dir_needs_close(cfile)) {
                cfile->invalidHandle = true;
-               spin_unlock(&cifs_file_list_lock);
+               spin_unlock(&cfile->file_info_lock);
                if (server->ops->close_dir)
                        rc = server->ops->close_dir(xid, tcon, &cfile->fid);
                else
@@ -784,7 +830,7 @@ int cifs_closedir(struct inode *inode, struct file *file)
                /* not much we can do if it fails anyway, ignore rc */
                rc = 0;
        } else
-               spin_unlock(&cifs_file_list_lock);
+               spin_unlock(&cfile->file_info_lock);
 
        buf = cfile->srch_inf.ntwrk_buf_start;
        if (buf) {
@@ -1728,12 +1774,13 @@ struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
 {
        struct cifsFileInfo *open_file = NULL;
        struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
+       struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
 
        /* only filter by fsuid on multiuser mounts */
        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
                fsuid_only = false;
 
-       spin_lock(&cifs_file_list_lock);
+       spin_lock(&tcon->open_file_lock);
        /* we could simply get the first_list_entry since write-only entries
           are always at the end of the list but since the first entry might
           have a close pending, we go through the whole list */
@@ -1744,8 +1791,8 @@ struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
                        if (!open_file->invalidHandle) {
                                /* found a good file */
                                /* lock it so it will not be closed on us */
-                               cifsFileInfo_get_locked(open_file);
-                               spin_unlock(&cifs_file_list_lock);
+                               cifsFileInfo_get(open_file);
+                               spin_unlock(&tcon->open_file_lock);
                                return open_file;
                        } /* else might as well continue, and look for
                             another, or simply have the caller reopen it
@@ -1753,7 +1800,7 @@ struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
                } else /* write only file */
                        break; /* write only files are last so must be done */
        }
-       spin_unlock(&cifs_file_list_lock);
+       spin_unlock(&tcon->open_file_lock);
        return NULL;
 }
 
@@ -1762,6 +1809,7 @@ struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
 {
        struct cifsFileInfo *open_file, *inv_file = NULL;
        struct cifs_sb_info *cifs_sb;
+       struct cifs_tcon *tcon;
        bool any_available = false;
        int rc;
        unsigned int refind = 0;
@@ -1777,15 +1825,16 @@ struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
        }
 
        cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
+       tcon = cifs_sb_master_tcon(cifs_sb);
 
        /* only filter by fsuid on multiuser mounts */
        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
                fsuid_only = false;
 
-       spin_lock(&cifs_file_list_lock);
+       spin_lock(&tcon->open_file_lock);
 refind_writable:
        if (refind > MAX_REOPEN_ATT) {
-               spin_unlock(&cifs_file_list_lock);
+               spin_unlock(&tcon->open_file_lock);
                return NULL;
        }
        list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
@@ -1796,8 +1845,8 @@ refind_writable:
                if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
                        if (!open_file->invalidHandle) {
                                /* found a good writable file */
-                               cifsFileInfo_get_locked(open_file);
-                               spin_unlock(&cifs_file_list_lock);
+                               cifsFileInfo_get(open_file);
+                               spin_unlock(&tcon->open_file_lock);
                                return open_file;
                        } else {
                                if (!inv_file)
@@ -1813,24 +1862,24 @@ refind_writable:
 
        if (inv_file) {
                any_available = false;
-               cifsFileInfo_get_locked(inv_file);
+               cifsFileInfo_get(inv_file);
        }
 
-       spin_unlock(&cifs_file_list_lock);
+       spin_unlock(&tcon->open_file_lock);
 
        if (inv_file) {
                rc = cifs_reopen_file(inv_file, false);
                if (!rc)
                        return inv_file;
                else {
-                       spin_lock(&cifs_file_list_lock);
+                       spin_lock(&tcon->open_file_lock);
                        list_move_tail(&inv_file->flist,
                                        &cifs_inode->openFileList);
-                       spin_unlock(&cifs_file_list_lock);
+                       spin_unlock(&tcon->open_file_lock);
                        cifsFileInfo_put(inv_file);
-                       spin_lock(&cifs_file_list_lock);
                        ++refind;
                        inv_file = NULL;
+                       spin_lock(&tcon->open_file_lock);
                        goto refind_writable;
                }
        }
@@ -3612,15 +3661,17 @@ static int cifs_readpage(struct file *file, struct page *page)
 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
 {
        struct cifsFileInfo *open_file;
+       struct cifs_tcon *tcon =
+               cifs_sb_master_tcon(CIFS_SB(cifs_inode->vfs_inode.i_sb));
 
-       spin_lock(&cifs_file_list_lock);
+       spin_lock(&tcon->open_file_lock);
        list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
                if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
-                       spin_unlock(&cifs_file_list_lock);
+                       spin_unlock(&tcon->open_file_lock);
                        return 1;
                }
        }
-       spin_unlock(&cifs_file_list_lock);
+       spin_unlock(&tcon->open_file_lock);
        return 0;
 }
 
index 7a3b84e300f8978b80baf43d834feb7fab5d0cfa..9f51b81119f2b204361f25d86dcfd350a8917700 100644 (file)
@@ -189,7 +189,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
        xid = get_xid();
 
        cifs_sb = CIFS_SB(inode->i_sb);
-
+       cifs_dbg(VFS, "cifs ioctl 0x%x\n", command);
        switch (command) {
                case FS_IOC_GETFLAGS:
                        if (pSMBFile == NULL)
@@ -267,11 +267,23 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
                        tcon = tlink_tcon(pSMBFile->tlink);
                        rc = smb_mnt_get_fsinfo(xid, tcon, (void __user *)arg);
                        break;
+               case CIFS_ENUMERATE_SNAPSHOTS:
+                       if (arg == 0) {
+                               rc = -EINVAL;
+                               goto cifs_ioc_exit;
+                       }
+                       tcon = tlink_tcon(pSMBFile->tlink);
+                       if (tcon->ses->server->ops->enum_snapshots)
+                               rc = tcon->ses->server->ops->enum_snapshots(xid, tcon,
+                                               pSMBFile, (void __user *)arg);
+                       else
+                               rc = -EOPNOTSUPP;
+                       break;
                default:
                        cifs_dbg(FYI, "unsupported ioctl\n");
                        break;
        }
-
+cifs_ioc_exit:
        free_xid(xid);
        return rc;
 }
index 813fe13c2ae175869cdc6db06a19392d09d7f05f..c6729156f9a00cf69938f8ec66c92fde561849ab 100644 (file)
@@ -120,6 +120,7 @@ tconInfoAlloc(void)
                ++ret_buf->tc_count;
                INIT_LIST_HEAD(&ret_buf->openFileList);
                INIT_LIST_HEAD(&ret_buf->tcon_list);
+               spin_lock_init(&ret_buf->open_file_lock);
 #ifdef CONFIG_CIFS_STATS
                spin_lock_init(&ret_buf->stat_lock);
 #endif
@@ -465,7 +466,7 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv)
                                continue;
 
                        cifs_stats_inc(&tcon->stats.cifs_stats.num_oplock_brks);
-                       spin_lock(&cifs_file_list_lock);
+                       spin_lock(&tcon->open_file_lock);
                        list_for_each(tmp2, &tcon->openFileList) {
                                netfile = list_entry(tmp2, struct cifsFileInfo,
                                                     tlist);
@@ -495,11 +496,11 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv)
                                           &netfile->oplock_break);
                                netfile->oplock_break_cancelled = false;
 
-                               spin_unlock(&cifs_file_list_lock);
+                               spin_unlock(&tcon->open_file_lock);
                                spin_unlock(&cifs_tcp_ses_lock);
                                return true;
                        }
-                       spin_unlock(&cifs_file_list_lock);
+                       spin_unlock(&tcon->open_file_lock);
                        spin_unlock(&cifs_tcp_ses_lock);
                        cifs_dbg(FYI, "No matching file for oplock break\n");
                        return true;
@@ -613,9 +614,9 @@ backup_cred(struct cifs_sb_info *cifs_sb)
 void
 cifs_del_pending_open(struct cifs_pending_open *open)
 {
-       spin_lock(&cifs_file_list_lock);
+       spin_lock(&tlink_tcon(open->tlink)->open_file_lock);
        list_del(&open->olist);
-       spin_unlock(&cifs_file_list_lock);
+       spin_unlock(&tlink_tcon(open->tlink)->open_file_lock);
 }
 
 void
@@ -635,7 +636,7 @@ void
 cifs_add_pending_open(struct cifs_fid *fid, struct tcon_link *tlink,
                      struct cifs_pending_open *open)
 {
-       spin_lock(&cifs_file_list_lock);
+       spin_lock(&tlink_tcon(tlink)->open_file_lock);
        cifs_add_pending_open_locked(fid, tlink, open);
-       spin_unlock(&cifs_file_list_lock);
+       spin_unlock(&tlink_tcon(open->tlink)->open_file_lock);
 }
index 65cf85dcda09bd773886c8aa6a86d82af1a033b5..8f6a2a5863b9d9275bfb6afb00fc16b867101275 100644 (file)
@@ -597,14 +597,14 @@ find_cifs_entry(const unsigned int xid, struct cifs_tcon *tcon, loff_t pos,
             is_dir_changed(file)) || (index_to_find < first_entry_in_buffer)) {
                /* close and restart search */
                cifs_dbg(FYI, "search backing up - close and restart search\n");
-               spin_lock(&cifs_file_list_lock);
+               spin_lock(&cfile->file_info_lock);
                if (server->ops->dir_needs_close(cfile)) {
                        cfile->invalidHandle = true;
-                       spin_unlock(&cifs_file_list_lock);
+                       spin_unlock(&cfile->file_info_lock);
                        if (server->ops->close_dir)
                                server->ops->close_dir(xid, tcon, &cfile->fid);
                } else
-                       spin_unlock(&cifs_file_list_lock);
+                       spin_unlock(&cfile->file_info_lock);
                if (cfile->srch_inf.ntwrk_buf_start) {
                        cifs_dbg(FYI, "freeing SMB ff cache buf on search rewind\n");
                        if (cfile->srch_inf.smallBuf)
index 4f0231e685a922efcb97a6c5905bfedc7ca7ece6..1238cd3552f9cc8e8f8fc560117af37cd224aa13 100644 (file)
@@ -266,9 +266,15 @@ smb2_set_file_info(struct inode *inode, const char *full_path,
        struct tcon_link *tlink;
        int rc;
 
+       if ((buf->CreationTime == 0) && (buf->LastAccessTime == 0) &&
+           (buf->LastWriteTime == 0) && (buf->ChangeTime) &&
+           (buf->Attributes == 0))
+               return 0; /* would be a no op, no sense sending this */
+
        tlink = cifs_sb_tlink(cifs_sb);
        if (IS_ERR(tlink))
                return PTR_ERR(tlink);
+
        rc = smb2_open_op_close(xid, tlink_tcon(tlink), cifs_sb, full_path,
                                FILE_WRITE_ATTRIBUTES, FILE_OPEN, 0, buf,
                                SMB2_OP_SET_INFO);
index 389fb9f8c84e22308ac5fcb44c27f005bee6e27c..3d383489b9cf313395e94a0bff4a719501b4b309 100644 (file)
@@ -549,19 +549,19 @@ smb2_is_valid_lease_break(char *buffer)
                list_for_each(tmp1, &server->smb_ses_list) {
                        ses = list_entry(tmp1, struct cifs_ses, smb_ses_list);
 
-                       spin_lock(&cifs_file_list_lock);
                        list_for_each(tmp2, &ses->tcon_list) {
                                tcon = list_entry(tmp2, struct cifs_tcon,
                                                  tcon_list);
+                               spin_lock(&tcon->open_file_lock);
                                cifs_stats_inc(
                                    &tcon->stats.cifs_stats.num_oplock_brks);
                                if (smb2_tcon_has_lease(tcon, rsp, lw)) {
-                                       spin_unlock(&cifs_file_list_lock);
+                                       spin_unlock(&tcon->open_file_lock);
                                        spin_unlock(&cifs_tcp_ses_lock);
                                        return true;
                                }
+                               spin_unlock(&tcon->open_file_lock);
                        }
-                       spin_unlock(&cifs_file_list_lock);
                }
        }
        spin_unlock(&cifs_tcp_ses_lock);
@@ -603,7 +603,7 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
                        tcon = list_entry(tmp1, struct cifs_tcon, tcon_list);
 
                        cifs_stats_inc(&tcon->stats.cifs_stats.num_oplock_brks);
-                       spin_lock(&cifs_file_list_lock);
+                       spin_lock(&tcon->open_file_lock);
                        list_for_each(tmp2, &tcon->openFileList) {
                                cfile = list_entry(tmp2, struct cifsFileInfo,
                                                     tlist);
@@ -615,7 +615,7 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
 
                                cifs_dbg(FYI, "file id match, oplock break\n");
                                cinode = CIFS_I(d_inode(cfile->dentry));
-
+                               spin_lock(&cfile->file_info_lock);
                                if (!CIFS_CACHE_WRITE(cinode) &&
                                    rsp->OplockLevel == SMB2_OPLOCK_LEVEL_NONE)
                                        cfile->oplock_break_cancelled = true;
@@ -637,14 +637,14 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
                                        clear_bit(
                                           CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2,
                                           &cinode->flags);
-
+                               spin_unlock(&cfile->file_info_lock);
                                queue_work(cifsiod_wq, &cfile->oplock_break);
 
-                               spin_unlock(&cifs_file_list_lock);
+                               spin_unlock(&tcon->open_file_lock);
                                spin_unlock(&cifs_tcp_ses_lock);
                                return true;
                        }
-                       spin_unlock(&cifs_file_list_lock);
+                       spin_unlock(&tcon->open_file_lock);
                        spin_unlock(&cifs_tcp_ses_lock);
                        cifs_dbg(FYI, "No matching file for oplock break\n");
                        return true;
index d203c0329626cd41ee05fd75da9ee4439304bdc1..5d456ebb381386e5299cf61123400f4985480e9a 100644 (file)
@@ -28,6 +28,7 @@
 #include "cifs_unicode.h"
 #include "smb2status.h"
 #include "smb2glob.h"
+#include "cifs_ioctl.h"
 
 static int
 change_conf(struct TCP_Server_Info *server)
@@ -70,6 +71,10 @@ smb2_add_credits(struct TCP_Server_Info *server, const unsigned int add,
        spin_lock(&server->req_lock);
        val = server->ops->get_credits_field(server, optype);
        *val += add;
+       if (*val > 65000) {
+               *val = 65000; /* Don't get near 64K credits, avoid srv bugs */
+               printk_once(KERN_WARNING "server overflowed SMB3 credits\n");
+       }
        server->in_flight--;
        if (server->in_flight == 0 && (optype & CIFS_OP_MASK) != CIFS_NEG_OP)
                rc = change_conf(server);
@@ -287,7 +292,7 @@ SMB3_request_interfaces(const unsigned int xid, struct cifs_tcon *tcon)
                cifs_dbg(FYI, "Link Speed %lld\n",
                        le64_to_cpu(out_buf->LinkSpeed));
        }
-
+       kfree(out_buf);
        return rc;
 }
 #endif /* STATS2 */
@@ -541,6 +546,7 @@ smb2_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock)
        server->ops->set_oplock_level(cinode, oplock, fid->epoch,
                                      &fid->purge_cache);
        cinode->can_cache_brlcks = CIFS_CACHE_WRITE(cinode);
+       memcpy(cfile->fid.create_guid, fid->create_guid, 16);
 }
 
 static void
@@ -699,6 +705,7 @@ smb2_clone_range(const unsigned int xid,
 
 cchunk_out:
        kfree(pcchunk);
+       kfree(retbuf);
        return rc;
 }
 
@@ -823,7 +830,6 @@ smb2_duplicate_extents(const unsigned int xid,
 {
        int rc;
        unsigned int ret_data_len;
-       char *retbuf = NULL;
        struct duplicate_extents_to_file dup_ext_buf;
        struct cifs_tcon *tcon = tlink_tcon(trgtfile->tlink);
 
@@ -849,7 +855,7 @@ smb2_duplicate_extents(const unsigned int xid,
                        FSCTL_DUPLICATE_EXTENTS_TO_FILE,
                        true /* is_fsctl */, (char *)&dup_ext_buf,
                        sizeof(struct duplicate_extents_to_file),
-                       (char **)&retbuf,
+                       NULL,
                        &ret_data_len);
 
        if (ret_data_len > 0)
@@ -872,7 +878,6 @@ smb3_set_integrity(const unsigned int xid, struct cifs_tcon *tcon,
                   struct cifsFileInfo *cfile)
 {
        struct fsctl_set_integrity_information_req integr_info;
-       char *retbuf = NULL;
        unsigned int ret_data_len;
 
        integr_info.ChecksumAlgorithm = cpu_to_le16(CHECKSUM_TYPE_UNCHANGED);
@@ -884,9 +889,53 @@ smb3_set_integrity(const unsigned int xid, struct cifs_tcon *tcon,
                        FSCTL_SET_INTEGRITY_INFORMATION,
                        true /* is_fsctl */, (char *)&integr_info,
                        sizeof(struct fsctl_set_integrity_information_req),
+                       NULL,
+                       &ret_data_len);
+
+}
+
+static int
+smb3_enum_snapshots(const unsigned int xid, struct cifs_tcon *tcon,
+                  struct cifsFileInfo *cfile, void __user *ioc_buf)
+{
+       char *retbuf = NULL;
+       unsigned int ret_data_len = 0;
+       int rc;
+       struct smb_snapshot_array snapshot_in;
+
+       rc = SMB2_ioctl(xid, tcon, cfile->fid.persistent_fid,
+                       cfile->fid.volatile_fid,
+                       FSCTL_SRV_ENUMERATE_SNAPSHOTS,
+                       true /* is_fsctl */, NULL, 0 /* no input data */,
                        (char **)&retbuf,
                        &ret_data_len);
+       cifs_dbg(FYI, "enum snaphots ioctl returned %d and ret buflen is %d\n",
+                       rc, ret_data_len);
+       if (rc)
+               return rc;
 
+       if (ret_data_len && (ioc_buf != NULL) && (retbuf != NULL)) {
+               /* Fixup buffer */
+               if (copy_from_user(&snapshot_in, ioc_buf,
+                   sizeof(struct smb_snapshot_array))) {
+                       rc = -EFAULT;
+                       kfree(retbuf);
+                       return rc;
+               }
+               if (snapshot_in.snapshot_array_size < sizeof(struct smb_snapshot_array)) {
+                       rc = -ERANGE;
+                       return rc;
+               }
+
+               if (ret_data_len > snapshot_in.snapshot_array_size)
+                       ret_data_len = snapshot_in.snapshot_array_size;
+
+               if (copy_to_user(ioc_buf, retbuf, ret_data_len))
+                       rc = -EFAULT;
+       }
+
+       kfree(retbuf);
+       return rc;
 }
 
 static int
@@ -1041,7 +1090,7 @@ smb2_set_lease_key(struct inode *inode, struct cifs_fid *fid)
 static void
 smb2_new_lease_key(struct cifs_fid *fid)
 {
-       get_random_bytes(fid->lease_key, SMB2_LEASE_KEY_SIZE);
+       generate_random_uuid(fid->lease_key);
 }
 
 #define SMB2_SYMLINK_STRUCT_SIZE \
@@ -1654,6 +1703,7 @@ struct smb_version_operations smb21_operations = {
        .clone_range = smb2_clone_range,
        .wp_retry_size = smb2_wp_retry_size,
        .dir_needs_close = smb2_dir_needs_close,
+       .enum_snapshots = smb3_enum_snapshots,
 };
 
 struct smb_version_operations smb30_operations = {
@@ -1740,6 +1790,7 @@ struct smb_version_operations smb30_operations = {
        .wp_retry_size = smb2_wp_retry_size,
        .dir_needs_close = smb2_dir_needs_close,
        .fallocate = smb3_fallocate,
+       .enum_snapshots = smb3_enum_snapshots,
 };
 
 #ifdef CONFIG_CIFS_SMB311
@@ -1827,6 +1878,7 @@ struct smb_version_operations smb311_operations = {
        .wp_retry_size = smb2_wp_retry_size,
        .dir_needs_close = smb2_dir_needs_close,
        .fallocate = smb3_fallocate,
+       .enum_snapshots = smb3_enum_snapshots,
 };
 #endif /* CIFS_SMB311 */
 
index 29e06db5f187bea7d4f5ce29d2cf5c0faad6ae09..5ca5ea4668a1482ef9643cd525ded6ebbaa7e326 100644 (file)
@@ -100,7 +100,21 @@ smb2_hdr_assemble(struct smb2_hdr *hdr, __le16 smb2_cmd /* command */ ,
        hdr->ProtocolId = SMB2_PROTO_NUMBER;
        hdr->StructureSize = cpu_to_le16(64);
        hdr->Command = smb2_cmd;
-       hdr->CreditRequest = cpu_to_le16(2); /* BB make this dynamic */
+       if (tcon && tcon->ses && tcon->ses->server) {
+               struct TCP_Server_Info *server = tcon->ses->server;
+
+               spin_lock(&server->req_lock);
+               /* Request up to 2 credits but don't go over the limit. */
+               if (server->credits >= server->max_credits)
+                       hdr->CreditRequest = cpu_to_le16(0);
+               else
+                       hdr->CreditRequest = cpu_to_le16(
+                               min_t(int, server->max_credits -
+                                               server->credits, 2));
+               spin_unlock(&server->req_lock);
+       } else {
+               hdr->CreditRequest = cpu_to_le16(2);
+       }
        hdr->ProcessId = cpu_to_le32((__u16)current->tgid);
 
        if (!tcon)
@@ -236,8 +250,13 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon)
        }
 
        cifs_mark_open_files_invalid(tcon);
+
        rc = SMB2_tcon(0, tcon->ses, tcon->treeName, tcon, nls_codepage);
        mutex_unlock(&tcon->ses->session_mutex);
+
+       if (tcon->use_persistent)
+               cifs_reopen_persistent_handles(tcon);
+
        cifs_dbg(FYI, "reconnect tcon rc = %d\n", rc);
        if (rc)
                goto out;
@@ -574,59 +593,42 @@ vneg_out:
        return -EIO;
 }
 
-int
-SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses,
-               const struct nls_table *nls_cp)
+struct SMB2_sess_data {
+       unsigned int xid;
+       struct cifs_ses *ses;
+       struct nls_table *nls_cp;
+       void (*func)(struct SMB2_sess_data *);
+       int result;
+       u64 previous_session;
+
+       /* we will send the SMB in three pieces:
+        * a fixed length beginning part, an optional
+        * SPNEGO blob (which can be zero length), and a
+        * last part which will include the strings
+        * and rest of bcc area. This allows us to avoid
+        * a large buffer 17K allocation
+        */
+       int buf0_type;
+       struct kvec iov[2];
+};
+
+static int
+SMB2_sess_alloc_buffer(struct SMB2_sess_data *sess_data)
 {
+       int rc;
+       struct cifs_ses *ses = sess_data->ses;
        struct smb2_sess_setup_req *req;
-       struct smb2_sess_setup_rsp *rsp = NULL;
-       struct kvec iov[2];
-       int rc = 0;
-       int resp_buftype = CIFS_NO_BUFFER;
-       __le32 phase = NtLmNegotiate; /* NTLMSSP, if needed, is multistage */
        struct TCP_Server_Info *server = ses->server;
-       u16 blob_length = 0;
-       struct key *spnego_key = NULL;
-       char *security_blob = NULL;
-       unsigned char *ntlmssp_blob = NULL;
-       bool use_spnego = false; /* else use raw ntlmssp */
-
-       cifs_dbg(FYI, "Session Setup\n");
-
-       if (!server) {
-               WARN(1, "%s: server is NULL!\n", __func__);
-               return -EIO;
-       }
-
-       /*
-        * If we are here due to reconnect, free per-smb session key
-        * in case signing was required.
-        */
-       kfree(ses->auth_key.response);
-       ses->auth_key.response = NULL;
-
-       /*
-        * If memory allocation is successful, caller of this function
-        * frees it.
-        */
-       ses->ntlmssp = kmalloc(sizeof(struct ntlmssp_auth), GFP_KERNEL);
-       if (!ses->ntlmssp)
-               return -ENOMEM;
-       ses->ntlmssp->sesskey_per_smbsess = true;
-
-       /* FIXME: allow for other auth types besides NTLMSSP (e.g. krb5) */
-       if (ses->sectype != Kerberos && ses->sectype != RawNTLMSSP)
-               ses->sectype = RawNTLMSSP;
-
-ssetup_ntlmssp_authenticate:
-       if (phase == NtLmChallenge)
-               phase = NtLmAuthenticate; /* if ntlmssp, now final phase */
 
        rc = small_smb2_init(SMB2_SESSION_SETUP, NULL, (void **) &req);
        if (rc)
                return rc;
 
        req->hdr.SessionId = 0; /* First session, not a reauthenticate */
+
+       /* if reconnect, we need to send previous sess id, otherwise it is 0 */
+       req->PreviousSessionId = sess_data->previous_session;
+
        req->Flags = 0; /* MBZ */
        /* to enable echos and oplocks */
        req->hdr.CreditRequest = cpu_to_le16(3);
@@ -642,199 +644,368 @@ ssetup_ntlmssp_authenticate:
        req->Capabilities = 0;
        req->Channel = 0; /* MBZ */
 
-       iov[0].iov_base = (char *)req;
+       sess_data->iov[0].iov_base = (char *)req;
        /* 4 for rfc1002 length field and 1 for pad */
-       iov[0].iov_len = get_rfc1002_length(req) + 4 - 1;
+       sess_data->iov[0].iov_len = get_rfc1002_length(req) + 4 - 1;
+       /*
+        * This variable will be used to clear the buffer
+        * allocated above in case of any error in the calling function.
+        */
+       sess_data->buf0_type = CIFS_SMALL_BUFFER;
 
-       if (ses->sectype == Kerberos) {
-#ifdef CONFIG_CIFS_UPCALL
-               struct cifs_spnego_msg *msg;
+       return 0;
+}
 
-               spnego_key = cifs_get_spnego_key(ses);
-               if (IS_ERR(spnego_key)) {
-                       rc = PTR_ERR(spnego_key);
-                       spnego_key = NULL;
-                       goto ssetup_exit;
-               }
+static void
+SMB2_sess_free_buffer(struct SMB2_sess_data *sess_data)
+{
+       free_rsp_buf(sess_data->buf0_type, sess_data->iov[0].iov_base);
+       sess_data->buf0_type = CIFS_NO_BUFFER;
+}
 
-               msg = spnego_key->payload.data[0];
-               /*
-                * check version field to make sure that cifs.upcall is
-                * sending us a response in an expected form
-                */
-               if (msg->version != CIFS_SPNEGO_UPCALL_VERSION) {
-                       cifs_dbg(VFS,
-                                 "bad cifs.upcall version. Expected %d got %d",
-                                 CIFS_SPNEGO_UPCALL_VERSION, msg->version);
-                       rc = -EKEYREJECTED;
-                       goto ssetup_exit;
-               }
-               ses->auth_key.response = kmemdup(msg->data, msg->sesskey_len,
-                                                GFP_KERNEL);
-               if (!ses->auth_key.response) {
-                       cifs_dbg(VFS,
-                               "Kerberos can't allocate (%u bytes) memory",
-                               msg->sesskey_len);
-                       rc = -ENOMEM;
-                       goto ssetup_exit;
-               }
-               ses->auth_key.len = msg->sesskey_len;
-               blob_length = msg->secblob_len;
-               iov[1].iov_base = msg->data + msg->sesskey_len;
-               iov[1].iov_len = blob_length;
-#else
-               rc = -EOPNOTSUPP;
-               goto ssetup_exit;
-#endif /* CONFIG_CIFS_UPCALL */
-       } else if (phase == NtLmNegotiate) { /* if not krb5 must be ntlmssp */
-               ntlmssp_blob = kmalloc(sizeof(struct _NEGOTIATE_MESSAGE),
-                                      GFP_KERNEL);
-               if (ntlmssp_blob == NULL) {
-                       rc = -ENOMEM;
-                       goto ssetup_exit;
-               }
-               build_ntlmssp_negotiate_blob(ntlmssp_blob, ses);
-               if (use_spnego) {
-                       /* blob_length = build_spnego_ntlmssp_blob(
-                                       &security_blob,
-                                       sizeof(struct _NEGOTIATE_MESSAGE),
-                                       ntlmssp_blob); */
-                       /* BB eventually need to add this */
-                       cifs_dbg(VFS, "spnego not supported for SMB2 yet\n");
-                       rc = -EOPNOTSUPP;
-                       kfree(ntlmssp_blob);
-                       goto ssetup_exit;
-               } else {
-                       blob_length = sizeof(struct _NEGOTIATE_MESSAGE);
-                       /* with raw NTLMSSP we don't encapsulate in SPNEGO */
-                       security_blob = ntlmssp_blob;
-               }
-               iov[1].iov_base = security_blob;
-               iov[1].iov_len = blob_length;
-       } else if (phase == NtLmAuthenticate) {
-               req->hdr.SessionId = ses->Suid;
-               rc = build_ntlmssp_auth_blob(&ntlmssp_blob, &blob_length, ses,
-                                            nls_cp);
-               if (rc) {
-                       cifs_dbg(FYI, "build_ntlmssp_auth_blob failed %d\n",
-                                rc);
-                       goto ssetup_exit; /* BB double check error handling */
-               }
-               if (use_spnego) {
-                       /* blob_length = build_spnego_ntlmssp_blob(
-                                                       &security_blob,
-                                                       blob_length,
-                                                       ntlmssp_blob); */
-                       cifs_dbg(VFS, "spnego not supported for SMB2 yet\n");
-                       rc = -EOPNOTSUPP;
-                       kfree(ntlmssp_blob);
-                       goto ssetup_exit;
-               } else {
-                       security_blob = ntlmssp_blob;
-               }
-               iov[1].iov_base = security_blob;
-               iov[1].iov_len = blob_length;
-       } else {
-               cifs_dbg(VFS, "illegal ntlmssp phase\n");
-               rc = -EIO;
-               goto ssetup_exit;
-       }
+static int
+SMB2_sess_sendreceive(struct SMB2_sess_data *sess_data)
+{
+       int rc;
+       struct smb2_sess_setup_req *req = sess_data->iov[0].iov_base;
 
        /* Testing shows that buffer offset must be at location of Buffer[0] */
        req->SecurityBufferOffset =
-                               cpu_to_le16(sizeof(struct smb2_sess_setup_req) -
-                                           1 /* pad */ - 4 /* rfc1001 len */);
-       req->SecurityBufferLength = cpu_to_le16(blob_length);
+               cpu_to_le16(sizeof(struct smb2_sess_setup_req) -
+                       1 /* pad */ - 4 /* rfc1001 len */);
+       req->SecurityBufferLength = cpu_to_le16(sess_data->iov[1].iov_len);
 
-       inc_rfc1001_len(req, blob_length - 1 /* pad */);
+       inc_rfc1001_len(req, sess_data->iov[1].iov_len - 1 /* pad */);
 
        /* BB add code to build os and lm fields */
 
-       rc = SendReceive2(xid, ses, iov, 2, &resp_buftype,
-                         CIFS_LOG_ERROR | CIFS_NEG_OP);
+       rc = SendReceive2(sess_data->xid, sess_data->ses,
+                               sess_data->iov, 2,
+                               &sess_data->buf0_type,
+                               CIFS_LOG_ERROR | CIFS_NEG_OP);
 
-       kfree(security_blob);
-       rsp = (struct smb2_sess_setup_rsp *)iov[0].iov_base;
-       ses->Suid = rsp->hdr.SessionId;
-       if (resp_buftype != CIFS_NO_BUFFER &&
-           rsp->hdr.Status == STATUS_MORE_PROCESSING_REQUIRED) {
-               if (phase != NtLmNegotiate) {
-                       cifs_dbg(VFS, "Unexpected more processing error\n");
-                       goto ssetup_exit;
-               }
-               if (offsetof(struct smb2_sess_setup_rsp, Buffer) - 4 !=
-                               le16_to_cpu(rsp->SecurityBufferOffset)) {
-                       cifs_dbg(VFS, "Invalid security buffer offset %d\n",
-                                le16_to_cpu(rsp->SecurityBufferOffset));
-                       rc = -EIO;
-                       goto ssetup_exit;
+       return rc;
+}
+
+static int
+SMB2_sess_establish_session(struct SMB2_sess_data *sess_data)
+{
+       int rc = 0;
+       struct cifs_ses *ses = sess_data->ses;
+
+       mutex_lock(&ses->server->srv_mutex);
+       if (ses->server->sign && ses->server->ops->generate_signingkey) {
+               rc = ses->server->ops->generate_signingkey(ses);
+               kfree(ses->auth_key.response);
+               ses->auth_key.response = NULL;
+               if (rc) {
+                       cifs_dbg(FYI,
+                               "SMB3 session key generation failed\n");
+                       mutex_unlock(&ses->server->srv_mutex);
+                       goto keygen_exit;
                }
+       }
+       if (!ses->server->session_estab) {
+               ses->server->sequence_number = 0x2;
+               ses->server->session_estab = true;
+       }
+       mutex_unlock(&ses->server->srv_mutex);
+
+       cifs_dbg(FYI, "SMB2/3 session established successfully\n");
+       spin_lock(&GlobalMid_Lock);
+       ses->status = CifsGood;
+       ses->need_reconnect = false;
+       spin_unlock(&GlobalMid_Lock);
 
-               /* NTLMSSP Negotiate sent now processing challenge (response) */
-               phase = NtLmChallenge; /* process ntlmssp challenge */
-               rc = 0; /* MORE_PROCESSING is not an error here but expected */
-               rc = decode_ntlmssp_challenge(rsp->Buffer,
-                               le16_to_cpu(rsp->SecurityBufferLength), ses);
+keygen_exit:
+       if (!ses->server->sign) {
+               kfree(ses->auth_key.response);
+               ses->auth_key.response = NULL;
+       }
+       return rc;
+}
+
+#ifdef CONFIG_CIFS_UPCALL
+static void
+SMB2_auth_kerberos(struct SMB2_sess_data *sess_data)
+{
+       int rc;
+       struct cifs_ses *ses = sess_data->ses;
+       struct cifs_spnego_msg *msg;
+       struct key *spnego_key = NULL;
+       struct smb2_sess_setup_rsp *rsp = NULL;
+
+       rc = SMB2_sess_alloc_buffer(sess_data);
+       if (rc)
+               goto out;
+
+       spnego_key = cifs_get_spnego_key(ses);
+       if (IS_ERR(spnego_key)) {
+               rc = PTR_ERR(spnego_key);
+               spnego_key = NULL;
+               goto out;
        }
 
+       msg = spnego_key->payload.data[0];
        /*
-        * BB eventually add code for SPNEGO decoding of NtlmChallenge blob,
-        * but at least the raw NTLMSSP case works.
+        * check version field to make sure that cifs.upcall is
+        * sending us a response in an expected form
         */
+       if (msg->version != CIFS_SPNEGO_UPCALL_VERSION) {
+               cifs_dbg(VFS,
+                         "bad cifs.upcall version. Expected %d got %d",
+                         CIFS_SPNEGO_UPCALL_VERSION, msg->version);
+               rc = -EKEYREJECTED;
+               goto out_put_spnego_key;
+       }
+
+       ses->auth_key.response = kmemdup(msg->data, msg->sesskey_len,
+                                        GFP_KERNEL);
+       if (!ses->auth_key.response) {
+               cifs_dbg(VFS,
+                       "Kerberos can't allocate (%u bytes) memory",
+                       msg->sesskey_len);
+               rc = -ENOMEM;
+               goto out_put_spnego_key;
+       }
+       ses->auth_key.len = msg->sesskey_len;
+
+       sess_data->iov[1].iov_base = msg->data + msg->sesskey_len;
+       sess_data->iov[1].iov_len = msg->secblob_len;
+
+       rc = SMB2_sess_sendreceive(sess_data);
+       if (rc)
+               goto out_put_spnego_key;
+
+       rsp = (struct smb2_sess_setup_rsp *)sess_data->iov[0].iov_base;
+       ses->Suid = rsp->hdr.SessionId;
+
+       ses->session_flags = le16_to_cpu(rsp->SessionFlags);
+       if (ses->session_flags & SMB2_SESSION_FLAG_ENCRYPT_DATA)
+               cifs_dbg(VFS, "SMB3 encryption not supported yet\n");
+
+       rc = SMB2_sess_establish_session(sess_data);
+out_put_spnego_key:
+       key_invalidate(spnego_key);
+       key_put(spnego_key);
+out:
+       sess_data->result = rc;
+       sess_data->func = NULL;
+       SMB2_sess_free_buffer(sess_data);
+}
+#else
+static void
+SMB2_auth_kerberos(struct SMB2_sess_data *sess_data)
+{
+       cifs_dbg(VFS, "Kerberos negotiated but upcall support disabled!\n");
+       sess_data->result = -EOPNOTSUPP;
+       sess_data->func = NULL;
+}
+#endif
+
+static void
+SMB2_sess_auth_rawntlmssp_authenticate(struct SMB2_sess_data *sess_data);
+
+static void
+SMB2_sess_auth_rawntlmssp_negotiate(struct SMB2_sess_data *sess_data)
+{
+       int rc;
+       struct cifs_ses *ses = sess_data->ses;
+       struct smb2_sess_setup_rsp *rsp = NULL;
+       char *ntlmssp_blob = NULL;
+       bool use_spnego = false; /* else use raw ntlmssp */
+       u16 blob_length = 0;
+
        /*
-        * No tcon so can't do
-        * cifs_stats_inc(&tcon->stats.smb2_stats.smb2_com_fail[SMB2...]);
+        * If memory allocation is successful, caller of this function
+        * frees it.
         */
-       if (rc != 0)
-               goto ssetup_exit;
+       ses->ntlmssp = kmalloc(sizeof(struct ntlmssp_auth), GFP_KERNEL);
+       if (!ses->ntlmssp) {
+               rc = -ENOMEM;
+               goto out_err;
+       }
+       ses->ntlmssp->sesskey_per_smbsess = true;
+
+       rc = SMB2_sess_alloc_buffer(sess_data);
+       if (rc)
+               goto out_err;
+
+       ntlmssp_blob = kmalloc(sizeof(struct _NEGOTIATE_MESSAGE),
+                              GFP_KERNEL);
+       if (ntlmssp_blob == NULL) {
+               rc = -ENOMEM;
+               goto out;
+       }
+
+       build_ntlmssp_negotiate_blob(ntlmssp_blob, ses);
+       if (use_spnego) {
+               /* BB eventually need to add this */
+               cifs_dbg(VFS, "spnego not supported for SMB2 yet\n");
+               rc = -EOPNOTSUPP;
+               goto out;
+       } else {
+               blob_length = sizeof(struct _NEGOTIATE_MESSAGE);
+               /* with raw NTLMSSP we don't encapsulate in SPNEGO */
+       }
+       sess_data->iov[1].iov_base = ntlmssp_blob;
+       sess_data->iov[1].iov_len = blob_length;
+
+       rc = SMB2_sess_sendreceive(sess_data);
+       rsp = (struct smb2_sess_setup_rsp *)sess_data->iov[0].iov_base;
+
+       /* If true, rc here is expected and not an error */
+       if (sess_data->buf0_type != CIFS_NO_BUFFER &&
+               rsp->hdr.Status == STATUS_MORE_PROCESSING_REQUIRED)
+               rc = 0;
+
+       if (rc)
+               goto out;
+
+       if (offsetof(struct smb2_sess_setup_rsp, Buffer) - 4 !=
+                       le16_to_cpu(rsp->SecurityBufferOffset)) {
+               cifs_dbg(VFS, "Invalid security buffer offset %d\n",
+                       le16_to_cpu(rsp->SecurityBufferOffset));
+               rc = -EIO;
+               goto out;
+       }
+       rc = decode_ntlmssp_challenge(rsp->Buffer,
+                       le16_to_cpu(rsp->SecurityBufferLength), ses);
+       if (rc)
+               goto out;
+
+       cifs_dbg(FYI, "rawntlmssp session setup challenge phase\n");
+
 
+       ses->Suid = rsp->hdr.SessionId;
        ses->session_flags = le16_to_cpu(rsp->SessionFlags);
        if (ses->session_flags & SMB2_SESSION_FLAG_ENCRYPT_DATA)
                cifs_dbg(VFS, "SMB3 encryption not supported yet\n");
-ssetup_exit:
-       free_rsp_buf(resp_buftype, rsp);
-
-       /* if ntlmssp, and negotiate succeeded, proceed to authenticate phase */
-       if ((phase == NtLmChallenge) && (rc == 0))
-               goto ssetup_ntlmssp_authenticate;
 
+out:
+       kfree(ntlmssp_blob);
+       SMB2_sess_free_buffer(sess_data);
        if (!rc) {
-               mutex_lock(&server->srv_mutex);
-               if (server->sign && server->ops->generate_signingkey) {
-                       rc = server->ops->generate_signingkey(ses);
-                       kfree(ses->auth_key.response);
-                       ses->auth_key.response = NULL;
-                       if (rc) {
-                               cifs_dbg(FYI,
-                                       "SMB3 session key generation failed\n");
-                               mutex_unlock(&server->srv_mutex);
-                               goto keygen_exit;
-                       }
-               }
-               if (!server->session_estab) {
-                       server->sequence_number = 0x2;
-                       server->session_estab = true;
-               }
-               mutex_unlock(&server->srv_mutex);
-
-               cifs_dbg(FYI, "SMB2/3 session established successfully\n");
-               spin_lock(&GlobalMid_Lock);
-               ses->status = CifsGood;
-               ses->need_reconnect = false;
-               spin_unlock(&GlobalMid_Lock);
+               sess_data->result = 0;
+               sess_data->func = SMB2_sess_auth_rawntlmssp_authenticate;
+               return;
        }
+out_err:
+       kfree(ses->ntlmssp);
+       ses->ntlmssp = NULL;
+       sess_data->result = rc;
+       sess_data->func = NULL;
+}
 
-keygen_exit:
-       if (!server->sign) {
-               kfree(ses->auth_key.response);
-               ses->auth_key.response = NULL;
+static void
+SMB2_sess_auth_rawntlmssp_authenticate(struct SMB2_sess_data *sess_data)
+{
+       int rc;
+       struct cifs_ses *ses = sess_data->ses;
+       struct smb2_sess_setup_req *req;
+       struct smb2_sess_setup_rsp *rsp = NULL;
+       unsigned char *ntlmssp_blob = NULL;
+       bool use_spnego = false; /* else use raw ntlmssp */
+       u16 blob_length = 0;
+
+       rc = SMB2_sess_alloc_buffer(sess_data);
+       if (rc)
+               goto out;
+
+       req = (struct smb2_sess_setup_req *) sess_data->iov[0].iov_base;
+       req->hdr.SessionId = ses->Suid;
+
+       rc = build_ntlmssp_auth_blob(&ntlmssp_blob, &blob_length, ses,
+                                       sess_data->nls_cp);
+       if (rc) {
+               cifs_dbg(FYI, "build_ntlmssp_auth_blob failed %d\n", rc);
+               goto out;
        }
-       if (spnego_key) {
-               key_invalidate(spnego_key);
-               key_put(spnego_key);
+
+       if (use_spnego) {
+               /* BB eventually need to add this */
+               cifs_dbg(VFS, "spnego not supported for SMB2 yet\n");
+               rc = -EOPNOTSUPP;
+               goto out;
        }
+       sess_data->iov[1].iov_base = ntlmssp_blob;
+       sess_data->iov[1].iov_len = blob_length;
+
+       rc = SMB2_sess_sendreceive(sess_data);
+       if (rc)
+               goto out;
+
+       rsp = (struct smb2_sess_setup_rsp *)sess_data->iov[0].iov_base;
+
+       ses->Suid = rsp->hdr.SessionId;
+       ses->session_flags = le16_to_cpu(rsp->SessionFlags);
+       if (ses->session_flags & SMB2_SESSION_FLAG_ENCRYPT_DATA)
+               cifs_dbg(VFS, "SMB3 encryption not supported yet\n");
+
+       rc = SMB2_sess_establish_session(sess_data);
+out:
+       kfree(ntlmssp_blob);
+       SMB2_sess_free_buffer(sess_data);
        kfree(ses->ntlmssp);
+       ses->ntlmssp = NULL;
+       sess_data->result = rc;
+       sess_data->func = NULL;
+}
 
+static int
+SMB2_select_sec(struct cifs_ses *ses, struct SMB2_sess_data *sess_data)
+{
+       if (ses->sectype != Kerberos && ses->sectype != RawNTLMSSP)
+               ses->sectype = RawNTLMSSP;
+
+       switch (ses->sectype) {
+       case Kerberos:
+               sess_data->func = SMB2_auth_kerberos;
+               break;
+       case RawNTLMSSP:
+               sess_data->func = SMB2_sess_auth_rawntlmssp_negotiate;
+               break;
+       default:
+               cifs_dbg(VFS, "secType %d not supported!\n", ses->sectype);
+               return -EOPNOTSUPP;
+       }
+
+       return 0;
+}
+
+int
+SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses,
+               const struct nls_table *nls_cp)
+{
+       int rc = 0;
+       struct TCP_Server_Info *server = ses->server;
+       struct SMB2_sess_data *sess_data;
+
+       cifs_dbg(FYI, "Session Setup\n");
+
+       if (!server) {
+               WARN(1, "%s: server is NULL!\n", __func__);
+               return -EIO;
+       }
+
+       sess_data = kzalloc(sizeof(struct SMB2_sess_data), GFP_KERNEL);
+       if (!sess_data)
+               return -ENOMEM;
+
+       rc = SMB2_select_sec(ses, sess_data);
+       if (rc)
+               goto out;
+       sess_data->xid = xid;
+       sess_data->ses = ses;
+       sess_data->buf0_type = CIFS_NO_BUFFER;
+       sess_data->nls_cp = (struct nls_table *) nls_cp;
+
+       while (sess_data->func)
+               sess_data->func(sess_data);
+
+       rc = sess_data->result;
+out:
+       kfree(sess_data);
        return rc;
 }
 
@@ -1164,7 +1335,7 @@ create_durable_v2_buf(struct cifs_fid *pfid)
 
        buf->dcontext.Timeout = 0; /* Should this be configurable by workload */
        buf->dcontext.Flags = cpu_to_le32(SMB2_DHANDLE_FLAG_PERSISTENT);
-       get_random_bytes(buf->dcontext.CreateGuid, 16);
+       generate_random_uuid(buf->dcontext.CreateGuid);
        memcpy(pfid->create_guid, buf->dcontext.CreateGuid, 16);
 
        /* SMB2_CREATE_DURABLE_HANDLE_REQUEST is "DH2Q" */
@@ -2057,6 +2228,7 @@ smb2_async_readv(struct cifs_readdata *rdata)
        if (rdata->credits) {
                buf->CreditCharge = cpu_to_le16(DIV_ROUND_UP(rdata->bytes,
                                                SMB2_MAX_BUFFER_SIZE));
+               buf->CreditRequest = buf->CreditCharge;
                spin_lock(&server->req_lock);
                server->credits += rdata->credits -
                                                le16_to_cpu(buf->CreditCharge);
@@ -2243,6 +2415,7 @@ smb2_async_writev(struct cifs_writedata *wdata,
        if (wdata->credits) {
                req->hdr.CreditCharge = cpu_to_le16(DIV_ROUND_UP(wdata->bytes,
                                                    SMB2_MAX_BUFFER_SIZE));
+               req->hdr.CreditRequest = req->hdr.CreditCharge;
                spin_lock(&server->req_lock);
                server->credits += wdata->credits -
                                        le16_to_cpu(req->hdr.CreditCharge);
index ff88d9feb01e7475f75a230c004d5f40a80f14f9..fd3709e8de33eddee8c66cca29cb6653921b020e 100644 (file)
@@ -276,7 +276,7 @@ struct smb2_sess_setup_req {
        __le32 Channel;
        __le16 SecurityBufferOffset;
        __le16 SecurityBufferLength;
-       __le64 PreviousSessionId;
+       __u64 PreviousSessionId;
        __u8   Buffer[1];       /* variable length GSS security buffer */
 } __packed;
 
index 5e23f64c0804ba647017cfa82808c4da85ff4760..20af5187ba63cc14150185952a3f7cdf9526556d 100644 (file)
@@ -33,7 +33,8 @@
 
 #define MAX_EA_VALUE_SIZE 65535
 #define CIFS_XATTR_CIFS_ACL "system.cifs_acl"
-
+#define CIFS_XATTR_ATTRIB "cifs.dosattrib"  /* full name: user.cifs.dosattrib */
+#define CIFS_XATTR_CREATETIME "cifs.creationtime"  /* user.cifs.creationtime */
 /* BB need to add server (Samba e.g) support for security and trusted prefix */
 
 enum { XATTR_USER, XATTR_CIFS_ACL, XATTR_ACL_ACCESS, XATTR_ACL_DEFAULT };
@@ -144,6 +145,54 @@ out:
        return rc;
 }
 
+static int cifs_attrib_get(struct dentry *dentry,
+                          struct inode *inode, void *value,
+                          size_t size)
+{
+       ssize_t rc;
+       __u32 *pattribute;
+
+       rc = cifs_revalidate_dentry_attr(dentry);
+
+       if (rc)
+               return rc;
+
+       if ((value == NULL) || (size == 0))
+               return sizeof(__u32);
+       else if (size < sizeof(__u32))
+               return -ERANGE;
+
+       /* return dos attributes as pseudo xattr */
+       pattribute = (__u32 *)value;
+       *pattribute = CIFS_I(inode)->cifsAttrs;
+
+       return sizeof(__u32);
+}
+
+static int cifs_creation_time_get(struct dentry *dentry, struct inode *inode,
+                                 void *value, size_t size)
+{
+       ssize_t rc;
+       __u64 * pcreatetime;
+
+       rc = cifs_revalidate_dentry_attr(dentry);
+       if (rc)
+               return rc;
+
+       if ((value == NULL) || (size == 0))
+               return sizeof(__u64);
+       else if (size < sizeof(__u64))
+               return -ERANGE;
+
+       /* return dos attributes as pseudo xattr */
+       pcreatetime = (__u64 *)value;
+       *pcreatetime = CIFS_I(inode)->createtime;
+       return sizeof(__u64);
+
+       return rc;
+}
+
+
 static int cifs_xattr_get(const struct xattr_handler *handler,
                          struct dentry *dentry, struct inode *inode,
                          const char *name, void *value, size_t size)
@@ -168,10 +217,19 @@ static int cifs_xattr_get(const struct xattr_handler *handler,
                rc = -ENOMEM;
                goto out;
        }
-       /* return dos attributes as pseudo xattr */
+
        /* return alt name if available as pseudo attr */
        switch (handler->flags) {
        case XATTR_USER:
+               cifs_dbg(FYI, "%s:querying user xattr %s\n", __func__, name);
+               if (strcmp(name, CIFS_XATTR_ATTRIB) == 0) {
+                       rc = cifs_attrib_get(dentry, inode, value, size);
+                       break;
+               } else if (strcmp(name, CIFS_XATTR_CREATETIME) == 0) {
+                       rc = cifs_creation_time_get(dentry, inode, value, size);
+                       break;
+               }
+
                if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR)
                        goto out;
 
index 281b768000e664e4d4ef9092d4bb567d003623a0..eb9c92c9b20f5de5e325d7a05e5d055d0816989e 100644 (file)
@@ -1,6 +1,7 @@
 #include <linux/slab.h>
 #include <linux/file.h>
 #include <linux/fdtable.h>
+#include <linux/freezer.h>
 #include <linux/mm.h>
 #include <linux/stat.h>
 #include <linux/fcntl.h>
@@ -423,7 +424,9 @@ static int coredump_wait(int exit_code, struct core_state *core_state)
        if (core_waiters > 0) {
                struct core_thread *ptr;
 
+               freezer_do_not_count();
                wait_for_completion(&core_state->startup);
+               freezer_count();
                /*
                 * Wait for all the threads to become inactive, so that
                 * all the thread context (extended register state, like
index 61057b7dbddbe17532940f02677668bb9d67152b..98f87fe8f1862d57866b211abeb76e19837d550a 100644 (file)
@@ -151,7 +151,10 @@ static int do_page_crypto(struct inode *inode,
                        struct page *src_page, struct page *dest_page,
                        gfp_t gfp_flags)
 {
-       u8 xts_tweak[FS_XTS_TWEAK_SIZE];
+       struct {
+               __le64 index;
+               u8 padding[FS_XTS_TWEAK_SIZE - sizeof(__le64)];
+       } xts_tweak;
        struct skcipher_request *req = NULL;
        DECLARE_FS_COMPLETION_RESULT(ecr);
        struct scatterlist dst, src;
@@ -171,17 +174,15 @@ static int do_page_crypto(struct inode *inode,
                req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
                page_crypt_complete, &ecr);
 
-       BUILD_BUG_ON(FS_XTS_TWEAK_SIZE < sizeof(index));
-       memcpy(xts_tweak, &index, sizeof(index));
-       memset(&xts_tweak[sizeof(index)], 0,
-                       FS_XTS_TWEAK_SIZE - sizeof(index));
+       BUILD_BUG_ON(sizeof(xts_tweak) != FS_XTS_TWEAK_SIZE);
+       xts_tweak.index = cpu_to_le64(index);
+       memset(xts_tweak.padding, 0, sizeof(xts_tweak.padding));
 
        sg_init_table(&dst, 1);
        sg_set_page(&dst, dest_page, PAGE_SIZE, 0);
        sg_init_table(&src, 1);
        sg_set_page(&src, src_page, PAGE_SIZE, 0);
-       skcipher_request_set_crypt(req, &src, &dst, PAGE_SIZE,
-                                       xts_tweak);
+       skcipher_request_set_crypt(req, &src, &dst, PAGE_SIZE, &xts_tweak);
        if (rw == FS_DECRYPT)
                res = crypto_skcipher_decrypt(req);
        else
index 9a28133ac3b848fc04fdab2e5a91b9e671820f56..9b774f4b50c89e0b4c8e6c26b3eae1595f4dc62f 100644 (file)
@@ -39,65 +39,54 @@ static void fname_crypt_complete(struct crypto_async_request *req, int res)
 static int fname_encrypt(struct inode *inode,
                        const struct qstr *iname, struct fscrypt_str *oname)
 {
-       u32 ciphertext_len;
        struct skcipher_request *req = NULL;
        DECLARE_FS_COMPLETION_RESULT(ecr);
        struct fscrypt_info *ci = inode->i_crypt_info;
        struct crypto_skcipher *tfm = ci->ci_ctfm;
        int res = 0;
        char iv[FS_CRYPTO_BLOCK_SIZE];
-       struct scatterlist src_sg, dst_sg;
+       struct scatterlist sg;
        int padding = 4 << (ci->ci_flags & FS_POLICY_FLAGS_PAD_MASK);
-       char *workbuf, buf[32], *alloc_buf = NULL;
-       unsigned lim;
+       unsigned int lim;
+       unsigned int cryptlen;
 
        lim = inode->i_sb->s_cop->max_namelen(inode);
        if (iname->len <= 0 || iname->len > lim)
                return -EIO;
 
-       ciphertext_len = max(iname->len, (u32)FS_CRYPTO_BLOCK_SIZE);
-       ciphertext_len = round_up(ciphertext_len, padding);
-       ciphertext_len = min(ciphertext_len, lim);
+       /*
+        * Copy the filename to the output buffer for encrypting in-place and
+        * pad it with the needed number of NUL bytes.
+        */
+       cryptlen = max_t(unsigned int, iname->len, FS_CRYPTO_BLOCK_SIZE);
+       cryptlen = round_up(cryptlen, padding);
+       cryptlen = min(cryptlen, lim);
+       memcpy(oname->name, iname->name, iname->len);
+       memset(oname->name + iname->len, 0, cryptlen - iname->len);
 
-       if (ciphertext_len <= sizeof(buf)) {
-               workbuf = buf;
-       } else {
-               alloc_buf = kmalloc(ciphertext_len, GFP_NOFS);
-               if (!alloc_buf)
-                       return -ENOMEM;
-               workbuf = alloc_buf;
-       }
+       /* Initialize the IV */
+       memset(iv, 0, FS_CRYPTO_BLOCK_SIZE);
 
-       /* Allocate request */
+       /* Set up the encryption request */
        req = skcipher_request_alloc(tfm, GFP_NOFS);
        if (!req) {
                printk_ratelimited(KERN_ERR
-                       "%s: crypto_request_alloc() failed\n", __func__);
-               kfree(alloc_buf);
+                       "%s: skcipher_request_alloc() failed\n", __func__);
                return -ENOMEM;
        }
        skcipher_request_set_callback(req,
                        CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
                        fname_crypt_complete, &ecr);
+       sg_init_one(&sg, oname->name, cryptlen);
+       skcipher_request_set_crypt(req, &sg, &sg, cryptlen, iv);
 
-       /* Copy the input */
-       memcpy(workbuf, iname->name, iname->len);
-       if (iname->len < ciphertext_len)
-               memset(workbuf + iname->len, 0, ciphertext_len - iname->len);
-
-       /* Initialize IV */
-       memset(iv, 0, FS_CRYPTO_BLOCK_SIZE);
-
-       /* Create encryption request */
-       sg_init_one(&src_sg, workbuf, ciphertext_len);
-       sg_init_one(&dst_sg, oname->name, ciphertext_len);
-       skcipher_request_set_crypt(req, &src_sg, &dst_sg, ciphertext_len, iv);
+       /* Do the encryption */
        res = crypto_skcipher_encrypt(req);
        if (res == -EINPROGRESS || res == -EBUSY) {
+               /* Request is being completed asynchronously; wait for it */
                wait_for_completion(&ecr.completion);
                res = ecr.res;
        }
-       kfree(alloc_buf);
        skcipher_request_free(req);
        if (res < 0) {
                printk_ratelimited(KERN_ERR
@@ -105,7 +94,7 @@ static int fname_encrypt(struct inode *inode,
                return res;
        }
 
-       oname->len = ciphertext_len;
+       oname->len = cryptlen;
        return 0;
 }
 
index 82f0285f5d084934d0c13d98d684fbbd3f69f3ed..67fb6d8876d06861a0048dbfe229cb95a867da6c 100644 (file)
@@ -185,7 +185,7 @@ int get_crypt_info(struct inode *inode)
        struct crypto_skcipher *ctfm;
        const char *cipher_str;
        int keysize;
-       u8 raw_key[FS_MAX_KEY_SIZE];
+       u8 *raw_key = NULL;
        int res;
 
        res = fscrypt_initialize();
@@ -238,6 +238,15 @@ retry:
        if (res)
                goto out;
 
+       /*
+        * This cannot be a stack buffer because it is passed to the scatterlist
+        * crypto API as part of key derivation.
+        */
+       res = -ENOMEM;
+       raw_key = kmalloc(FS_MAX_KEY_SIZE, GFP_NOFS);
+       if (!raw_key)
+               goto out;
+
        if (fscrypt_dummy_context_enabled(inode)) {
                memset(raw_key, 0x42, FS_AES_256_XTS_KEY_SIZE);
                goto got_key;
@@ -276,7 +285,8 @@ got_key:
        if (res)
                goto out;
 
-       memzero_explicit(raw_key, sizeof(raw_key));
+       kzfree(raw_key);
+       raw_key = NULL;
        if (cmpxchg(&inode->i_crypt_info, NULL, crypt_info) != NULL) {
                put_crypt_info(crypt_info);
                goto retry;
@@ -287,7 +297,7 @@ out:
        if (res == -ENOKEY)
                res = 0;
        put_crypt_info(crypt_info);
-       memzero_explicit(raw_key, sizeof(raw_key));
+       kzfree(raw_key);
        return res;
 }
 
index ed115acb5dee04bd15726ed2b9f368f6d13315cc..6865663aac69690f7ccdce030c48488a13ce917b 100644 (file)
@@ -109,6 +109,8 @@ int fscrypt_process_policy(struct file *filp,
        if (ret)
                return ret;
 
+       inode_lock(inode);
+
        if (!inode_has_encryption_context(inode)) {
                if (!S_ISDIR(inode->i_mode))
                        ret = -EINVAL;
@@ -127,6 +129,8 @@ int fscrypt_process_policy(struct file *filp,
                ret = -EINVAL;
        }
 
+       inode_unlock(inode);
+
        mnt_drop_write_file(filp);
        return ret;
 }
index 1e6e227134d7b5dae4fa3fc43feafeb994bfa708..0643ae44f3427ddc1088cccb6a0a710039cdc6e1 100644 (file)
 static uint32_t dlm_nl_seqnum;
 static uint32_t listener_nlportid;
 
-static struct genl_family family = {
-       .id             = GENL_ID_GENERATE,
-       .name           = DLM_GENL_NAME,
-       .version        = DLM_GENL_VERSION,
-};
+static struct genl_family family;
 
 static int prepare_data(u8 cmd, struct sk_buff **skbp, size_t size)
 {
@@ -76,9 +72,17 @@ static struct genl_ops dlm_nl_ops[] = {
        },
 };
 
+static struct genl_family family __ro_after_init = {
+       .name           = DLM_GENL_NAME,
+       .version        = DLM_GENL_VERSION,
+       .ops            = dlm_nl_ops,
+       .n_ops          = ARRAY_SIZE(dlm_nl_ops),
+       .module         = THIS_MODULE,
+};
+
 int __init dlm_netlink_init(void)
 {
-       return genl_register_family_with_ops(&family, dlm_nl_ops);
+       return genl_register_family(&family);
 }
 
 void dlm_netlink_exit(void)
index 6fcfb3f7b137951b133d3db95431c23bc2d4677f..4e497b9ee71ee96d0647721e4649feff7adaf7c1 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -191,6 +191,7 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
 {
        struct page *page;
        int ret;
+       unsigned int gup_flags = FOLL_FORCE;
 
 #ifdef CONFIG_STACK_GROWSUP
        if (write) {
@@ -199,12 +200,16 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
                        return NULL;
        }
 #endif
+
+       if (write)
+               gup_flags |= FOLL_WRITE;
+
        /*
         * We are doing an exec().  'current' is the process
         * doing the exec and bprm->mm is the new process's mm.
         */
-       ret = get_user_pages_remote(current, bprm->mm, pos, 1, write,
-                       1, &page, NULL);
+       ret = get_user_pages_remote(current, bprm->mm, pos, 1, gup_flags,
+                       &page, NULL);
        if (ret <= 0)
                return NULL;
 
index 79101651fe9ed2a6ccb3682ba39c590755b5bb00..42f9a0a0c4caf09722bc69fa278d509a7b7f16b3 100644 (file)
@@ -137,7 +137,7 @@ Espan:
 bad_entry:
        EXOFS_ERR(
                "ERROR [exofs_check_page]: bad entry in directory(0x%lx): %s - "
-               "offset=%lu, inode=0x%llu, rec_len=%d, name_len=%d\n",
+               "offset=%lu, inode=0x%llx, rec_len=%d, name_len=%d\n",
                dir->i_ino, error, (page->index<<PAGE_SHIFT)+offs,
                _LLU(le64_to_cpu(p->inode_no)),
                rec_len, p->name_len);
index d831e24dc88534844aa03a98ba2963f9d3e4b3d0..41b8b44a391cb5dc8ed833cb95b632424372e038 100644 (file)
@@ -622,7 +622,7 @@ static int ext2_get_blocks(struct inode *inode,
                           u32 *bno, bool *new, bool *boundary,
                           int create)
 {
-       int err = -EIO;
+       int err;
        int offsets[4];
        Indirect chain[4];
        Indirect *partial;
@@ -639,7 +639,7 @@ static int ext2_get_blocks(struct inode *inode,
        depth = ext2_block_to_path(inode,iblock,offsets,&blocks_to_boundary);
 
        if (depth == 0)
-               return (err);
+               return -EIO;
 
        partial = ext2_get_branch(inode, depth, offsets, chain, &err);
        /* Simplest case - block found, no allocation needed */
@@ -761,7 +761,6 @@ static int ext2_get_blocks(struct inode *inode,
        ext2_splice_branch(inode, iblock, partial, indirect_blks, count);
        mutex_unlock(&ei->truncate_mutex);
 got_it:
-       *bno = le32_to_cpu(chain[depth-1].key);
        if (count > blocks_to_boundary)
                *boundary = true;
        err = count;
@@ -772,6 +771,8 @@ cleanup:
                brelse(partial->bh);
                partial--;
        }
+       if (err > 0)
+               *bno = le32_to_cpu(chain[depth-1].key);
        return err;
 }
 
index 02ddec6d8a7da3135cef70cc2960bd8789f20378..fdb19543af1e62f9b990c944b92fd5dce0fd644a 100644 (file)
@@ -128,12 +128,12 @@ static void debug_print_tree(struct ext4_sb_info *sbi)
        node = rb_first(&sbi->system_blks);
        while (node) {
                entry = rb_entry(node, struct ext4_system_zone, node);
-               printk("%s%llu-%llu", first ? "" : ", ",
+               printk(KERN_CONT "%s%llu-%llu", first ? "" : ", ",
                       entry->start_blk, entry->start_blk + entry->count - 1);
                first = 0;
                node = rb_next(node);
        }
-       printk("\n");
+       printk(KERN_CONT "\n");
 }
 
 int ext4_setup_system_zone(struct super_block *sb)
index 282a51b07c5769e89a2064e12dfad53650183614..a8a750f596217062fa4e5d565ea44752e3e6ba50 100644 (file)
@@ -235,6 +235,7 @@ struct ext4_io_submit {
 #define        EXT4_MAX_BLOCK_SIZE             65536
 #define EXT4_MIN_BLOCK_LOG_SIZE                10
 #define EXT4_MAX_BLOCK_LOG_SIZE                16
+#define EXT4_MAX_CLUSTER_LOG_SIZE      30
 #ifdef __KERNEL__
 # define EXT4_BLOCK_SIZE(s)            ((s)->s_blocksize)
 #else
index 3ef1df6ae9ec6f67102c52cf9624a317e321d19b..1aba469f82209fe40d602579a17964346749e024 100644 (file)
 #ifdef CONFIG_EXT4_DEBUG
 extern ushort ext4_mballoc_debug;
 
-#define mb_debug(n, fmt, a...)                                         \
-       do {                                                            \
-               if ((n) <= ext4_mballoc_debug) {                        \
-                       printk(KERN_DEBUG "(%s, %d): %s: ",             \
-                              __FILE__, __LINE__, __func__);           \
-                       printk(fmt, ## a);                              \
-               }                                                       \
-       } while (0)
+#define mb_debug(n, fmt, ...)                                          \
+do {                                                                   \
+       if ((n) <= ext4_mballoc_debug) {                                \
+               printk(KERN_DEBUG "(%s, %d): %s: " fmt,                 \
+                      __FILE__, __LINE__, __func__, ##__VA_ARGS__);    \
+       }                                                               \
+} while (0)
 #else
-#define mb_debug(n, fmt, a...)         no_printk(fmt, ## a)
+#define mb_debug(n, fmt, ...)  no_printk(fmt, ##__VA_ARGS__)
 #endif
 
 #define EXT4_MB_HISTORY_ALLOC          1       /* allocation */
index f92f10d4f66ace5dd13c528ac0f2ca005e066667..104f8bfba71822dd55f05b0caaebbf41aa2eb941 100644 (file)
@@ -577,12 +577,13 @@ static inline unsigned dx_node_limit(struct inode *dir)
 static void dx_show_index(char * label, struct dx_entry *entries)
 {
        int i, n = dx_get_count (entries);
-       printk(KERN_DEBUG "%s index ", label);
+       printk(KERN_DEBUG "%s index", label);
        for (i = 0; i < n; i++) {
-               printk("%x->%lu ", i ? dx_get_hash(entries + i) :
-                               0, (unsigned long)dx_get_block(entries + i));
+               printk(KERN_CONT " %x->%lu",
+                      i ? dx_get_hash(entries + i) : 0,
+                      (unsigned long)dx_get_block(entries + i));
        }
-       printk("\n");
+       printk(KERN_CONT "\n");
 }
 
 struct stats
@@ -679,7 +680,7 @@ static struct stats dx_show_leaf(struct inode *dir,
                }
                de = ext4_next_entry(de, size);
        }
-       printk("(%i)\n", names);
+       printk(KERN_CONT "(%i)\n", names);
        return (struct stats) { names, space, 1 };
 }
 
@@ -798,7 +799,7 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
                q = entries + count - 1;
                while (p <= q) {
                        m = p + (q - p) / 2;
-                       dxtrace(printk("."));
+                       dxtrace(printk(KERN_CONT "."));
                        if (dx_get_hash(m) > hash)
                                q = m - 1;
                        else
@@ -810,7 +811,7 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
                        at = entries;
                        while (n--)
                        {
-                               dxtrace(printk(","));
+                               dxtrace(printk(KERN_CONT ","));
                                if (dx_get_hash(++at) > hash)
                                {
                                        at--;
@@ -821,7 +822,8 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
                }
 
                at = p - 1;
-               dxtrace(printk(" %x->%u\n", at == entries ? 0 : dx_get_hash(at),
+               dxtrace(printk(KERN_CONT " %x->%u\n",
+                              at == entries ? 0 : dx_get_hash(at),
                               dx_get_block(at)));
                frame->entries = entries;
                frame->at = at;
index 6db81fbcbaa6cce558b6ef9f7e613e8284e898a6..52b0530c5d65a95fa0512b29cb2b3645d0277b69 100644 (file)
@@ -597,14 +597,15 @@ void __ext4_std_error(struct super_block *sb, const char *function,
 void __ext4_abort(struct super_block *sb, const char *function,
                unsigned int line, const char *fmt, ...)
 {
+       struct va_format vaf;
        va_list args;
 
        save_error_info(sb, function, line);
        va_start(args, fmt);
-       printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: ", sb->s_id,
-              function, line);
-       vprintk(fmt, args);
-       printk("\n");
+       vaf.fmt = fmt;
+       vaf.va = &args;
+       printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: %pV\n",
+              sb->s_id, function, line, &vaf);
        va_end(args);
 
        if ((sb->s_flags & MS_RDONLY) == 0) {
@@ -2715,12 +2716,12 @@ static void print_daily_error_info(unsigned long arg)
                       es->s_first_error_func,
                       le32_to_cpu(es->s_first_error_line));
                if (es->s_first_error_ino)
-                       printk(": inode %u",
+                       printk(KERN_CONT ": inode %u",
                               le32_to_cpu(es->s_first_error_ino));
                if (es->s_first_error_block)
-                       printk(": block %llu", (unsigned long long)
+                       printk(KERN_CONT ": block %llu", (unsigned long long)
                               le64_to_cpu(es->s_first_error_block));
-               printk("\n");
+               printk(KERN_CONT "\n");
        }
        if (es->s_last_error_time) {
                printk(KERN_NOTICE "EXT4-fs (%s): last error at time %u: %.*s:%d",
@@ -2729,12 +2730,12 @@ static void print_daily_error_info(unsigned long arg)
                       es->s_last_error_func,
                       le32_to_cpu(es->s_last_error_line));
                if (es->s_last_error_ino)
-                       printk(": inode %u",
+                       printk(KERN_CONT ": inode %u",
                               le32_to_cpu(es->s_last_error_ino));
                if (es->s_last_error_block)
-                       printk(": block %llu", (unsigned long long)
+                       printk(KERN_CONT ": block %llu", (unsigned long long)
                               le64_to_cpu(es->s_last_error_block));
-               printk("\n");
+               printk(KERN_CONT "\n");
        }
        mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ);  /* Once a day */
 }
@@ -3564,7 +3565,15 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
        if (blocksize < EXT4_MIN_BLOCK_SIZE ||
            blocksize > EXT4_MAX_BLOCK_SIZE) {
                ext4_msg(sb, KERN_ERR,
-                      "Unsupported filesystem blocksize %d", blocksize);
+                      "Unsupported filesystem blocksize %d (%d log_block_size)",
+                        blocksize, le32_to_cpu(es->s_log_block_size));
+               goto failed_mount;
+       }
+       if (le32_to_cpu(es->s_log_block_size) >
+           (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
+               ext4_msg(sb, KERN_ERR,
+                        "Invalid log block size: %u",
+                        le32_to_cpu(es->s_log_block_size));
                goto failed_mount;
        }
 
@@ -3696,6 +3705,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                                 "block size (%d)", clustersize, blocksize);
                        goto failed_mount;
                }
+               if (le32_to_cpu(es->s_log_cluster_size) >
+                   (EXT4_MAX_CLUSTER_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
+                       ext4_msg(sb, KERN_ERR,
+                                "Invalid log cluster size: %u",
+                                le32_to_cpu(es->s_log_cluster_size));
+                       goto failed_mount;
+               }
                sbi->s_cluster_bits = le32_to_cpu(es->s_log_cluster_size) -
                        le32_to_cpu(es->s_log_block_size);
                sbi->s_clusters_per_group =
index 73bcfd41f5f262453e729fc11c4604b5109de842..42145be5c6b4dea4258b35586693ae336f047d25 100644 (file)
@@ -223,14 +223,18 @@ static struct attribute *ext4_attrs[] = {
 EXT4_ATTR_FEATURE(lazy_itable_init);
 EXT4_ATTR_FEATURE(batched_discard);
 EXT4_ATTR_FEATURE(meta_bg_resize);
+#ifdef CONFIG_EXT4_FS_ENCRYPTION
 EXT4_ATTR_FEATURE(encryption);
+#endif
 EXT4_ATTR_FEATURE(metadata_csum_seed);
 
 static struct attribute *ext4_feat_attrs[] = {
        ATTR_LIST(lazy_itable_init),
        ATTR_LIST(batched_discard),
        ATTR_LIST(meta_bg_resize),
+#ifdef CONFIG_EXT4_FS_ENCRYPTION
        ATTR_LIST(encryption),
+#endif
        ATTR_LIST(metadata_csum_seed),
        NULL,
 };
index c15d63389957bda0926a187da5df4bfcf7a9668c..d77be9e9f5352f2eedd1ab005dba20a899bbbbf6 100644 (file)
 #include "acl.h"
 
 #ifdef EXT4_XATTR_DEBUG
-# define ea_idebug(inode, f...) do { \
-               printk(KERN_DEBUG "inode %s:%lu: ", \
-                       inode->i_sb->s_id, inode->i_ino); \
-               printk(f); \
-               printk("\n"); \
-       } while (0)
-# define ea_bdebug(bh, f...) do { \
-               printk(KERN_DEBUG "block %pg:%lu: ",               \
-                      bh->b_bdev, (unsigned long) bh->b_blocknr); \
-               printk(f); \
-               printk("\n"); \
-       } while (0)
+# define ea_idebug(inode, fmt, ...)                                    \
+       printk(KERN_DEBUG "inode %s:%lu: " fmt "\n",                    \
+              inode->i_sb->s_id, inode->i_ino, ##__VA_ARGS__)
+# define ea_bdebug(bh, fmt, ...)                                       \
+       printk(KERN_DEBUG "block %pg:%lu: " fmt "\n",                   \
+              bh->b_bdev, (unsigned long)bh->b_blocknr, ##__VA_ARGS__)
 #else
 # define ea_idebug(inode, fmt, ...)    no_printk(fmt, ##__VA_ARGS__)
 # define ea_bdebug(bh, fmt, ...)       no_printk(fmt, ##__VA_ARGS__)
@@ -241,7 +235,7 @@ __xattr_check_inode(struct inode *inode, struct ext4_xattr_ibody_header *header,
        int error = -EFSCORRUPTED;
 
        if (((void *) header >= end) ||
-           (header->h_magic != le32_to_cpu(EXT4_XATTR_MAGIC)))
+           (header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)))
                goto errout;
        error = ext4_xattr_check_names(entry, end, entry);
 errout:
index 93985c64d8a8bef1328ddd5f35cc7d047f41fc3a..6f14ee923acd2b4cf75f93bed945f6fc04b79c93 100644 (file)
@@ -852,16 +852,16 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
 
        for (segno = start_segno; segno < end_segno; segno++) {
 
-               if (get_valid_blocks(sbi, segno, 1) == 0 ||
-                                       unlikely(f2fs_cp_error(sbi)))
-                       goto next;
-
                /* find segment summary of victim */
                sum_page = find_get_page(META_MAPPING(sbi),
                                        GET_SUM_BLOCK(sbi, segno));
-               f2fs_bug_on(sbi, !PageUptodate(sum_page));
                f2fs_put_page(sum_page, 0);
 
+               if (get_valid_blocks(sbi, segno, 1) == 0 ||
+                               !PageUptodate(sum_page) ||
+                               unlikely(f2fs_cp_error(sbi)))
+                       goto next;
+
                sum = page_address(sum_page);
                f2fs_bug_on(sbi, type != GET_SUM_TYPE((&sum->footer)));
 
index 6a4d0e5418a179def0e5d4da4d3adbbcf3a82cd0..b3ebe512d64c014a70f0802c75a0d7c88305ec10 100644 (file)
@@ -286,6 +286,11 @@ const struct dentry_operations fuse_dentry_operations = {
        .d_release      = fuse_dentry_release,
 };
 
+const struct dentry_operations fuse_root_dentry_operations = {
+       .d_init         = fuse_dentry_init,
+       .d_release      = fuse_dentry_release,
+};
+
 int fuse_valid_type(int m)
 {
        return S_ISREG(m) || S_ISDIR(m) || S_ISLNK(m) || S_ISCHR(m) ||
index abc66a6237fd0ebd9bbafbf893b21b97874d01dc..2401c5dabb2a227b6511be01b1589ffa5180e636 100644 (file)
@@ -1985,6 +1985,10 @@ static int fuse_write_end(struct file *file, struct address_space *mapping,
 {
        struct inode *inode = page->mapping->host;
 
+       /* Haven't copied anything?  Skip zeroing, size extending, dirtying. */
+       if (!copied)
+               goto unlock;
+
        if (!PageUptodate(page)) {
                /* Zero any unwritten bytes at the end of the page */
                size_t endoff = (pos + copied) & ~PAGE_MASK;
@@ -1995,6 +1999,8 @@ static int fuse_write_end(struct file *file, struct address_space *mapping,
 
        fuse_write_update_size(inode, pos + copied);
        set_page_dirty(page);
+
+unlock:
        unlock_page(page);
        put_page(page);
 
index 0dfbb136e59a8515e3ee9fe77d9996c22722d50c..91307940c8ac5e921b08133a04ca0b65283fd308 100644 (file)
@@ -692,6 +692,7 @@ static inline u64 get_node_id(struct inode *inode)
 extern const struct file_operations fuse_dev_operations;
 
 extern const struct dentry_operations fuse_dentry_operations;
+extern const struct dentry_operations fuse_root_dentry_operations;
 
 /**
  * Inode to nodeid comparison.
index 17141099f2e783fc6f2c10eabda326e13728b2f5..6fe6a88ecb4afd9eaaad1b0c75fe961108efc64a 100644 (file)
@@ -1131,10 +1131,11 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
 
        err = -ENOMEM;
        root = fuse_get_root_inode(sb, d.rootmode);
+       sb->s_d_op = &fuse_root_dentry_operations;
        root_dentry = d_make_root(root);
        if (!root_dentry)
                goto err_dev_free;
-       /* only now - we want root dentry with NULL ->d_op */
+       /* Root dentry doesn't have .d_revalidate */
        sb->s_d_op = &fuse_dentry_operations;
 
        init_req = fuse_request_alloc(0);
index 013d1d36fbbf7fa3e3f321a05c8a5e956b731cb6..a8ee8c33ca782dbe4a4c17f42bf91fda9e83a523 100644 (file)
@@ -433,8 +433,7 @@ iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length,
        struct page *page = data;
        int ret;
 
-       ret = __block_write_begin_int(page, pos & ~PAGE_MASK, length,
-                       NULL, iomap);
+       ret = __block_write_begin_int(page, pos, length, NULL, iomap);
        if (ret)
                return ret;
 
@@ -561,7 +560,7 @@ int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fi,
        }
 
        while (len > 0) {
-               ret = iomap_apply(inode, start, len, 0, ops, &ctx,
+               ret = iomap_apply(inode, start, len, IOMAP_REPORT, ops, &ctx,
                                iomap_fiemap_actor);
                /* inode with no (attribute) mapping will give ENOENT */
                if (ret == -ENOENT)
index ad0c745ebad72e89fd987e0f67de5e61f525ff2a..871c8b39209913d95708398688df19ca31b0eba7 100644 (file)
@@ -687,6 +687,11 @@ static int isofs_fill_super(struct super_block *s, void *data, int silent)
        pri_bh = NULL;
 
 root_found:
+       /* We don't support read-write mounts */
+       if (!(s->s_flags & MS_RDONLY)) {
+               error = -EACCES;
+               goto out_freebh;
+       }
 
        if (joliet_level && (pri == NULL || !opt.rock)) {
                /* This is the case of Joliet with the norock mount flag.
@@ -1501,9 +1506,6 @@ struct inode *__isofs_iget(struct super_block *sb,
 static struct dentry *isofs_mount(struct file_system_type *fs_type,
        int flags, const char *dev_name, void *data)
 {
-       /* We don't support read-write mounts */
-       if (!(flags & MS_RDONLY))
-               return ERR_PTR(-EACCES);
        return mount_bdev(fs_type, flags, dev_name, data, isofs_fill_super);
 }
 
index 3d8246a9faa454ec8e3774b1b6320ce0a12f67da..e1652665bd93d0cf30dece02b5a1b7692fdec811 100644 (file)
@@ -1149,6 +1149,7 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh)
                JBUFFER_TRACE(jh, "file as BJ_Reserved");
                spin_lock(&journal->j_list_lock);
                __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved);
+               spin_unlock(&journal->j_list_lock);
        } else if (jh->b_transaction == journal->j_committing_transaction) {
                /* first access by this transaction */
                jh->b_modified = 0;
@@ -1156,8 +1157,8 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh)
                JBUFFER_TRACE(jh, "set next transaction");
                spin_lock(&journal->j_list_lock);
                jh->b_next_transaction = transaction;
+               spin_unlock(&journal->j_list_lock);
        }
-       spin_unlock(&journal->j_list_lock);
        jbd_unlock_bh_state(bh);
 
        /*
index dcd96aac02f5c401451479dfec78d7db87c45bae..cf4c636ff4da5ab2d345261fc3d7e4f0db6d4c3c 100644 (file)
@@ -110,8 +110,9 @@ static struct kernfs_node *kernfs_common_ancestor(struct kernfs_node *a,
  * kn_to:   /n1/n2/n3         [depth=3]
  * result:  /../..
  *
- * return value: length of the string.  If greater than buflen,
- * then contents of buf are undefined.  On error, -1 is returned.
+ * Returns the length of the full path.  If the full length is equal to or
+ * greater than @buflen, @buf contains the truncated path with the trailing
+ * '\0'.  On error, -errno is returned.
  */
 static int kernfs_path_from_node_locked(struct kernfs_node *kn_to,
                                        struct kernfs_node *kn_from,
@@ -119,9 +120,8 @@ static int kernfs_path_from_node_locked(struct kernfs_node *kn_to,
 {
        struct kernfs_node *kn, *common;
        const char parent_str[] = "/..";
-       size_t depth_from, depth_to, len = 0, nlen = 0;
-       char *p;
-       int i;
+       size_t depth_from, depth_to, len = 0;
+       int i, j;
 
        if (!kn_from)
                kn_from = kernfs_root(kn_to)->kn;
@@ -131,7 +131,7 @@ static int kernfs_path_from_node_locked(struct kernfs_node *kn_to,
 
        common = kernfs_common_ancestor(kn_from, kn_to);
        if (WARN_ON(!common))
-               return -1;
+               return -EINVAL;
 
        depth_to = kernfs_depth(common, kn_to);
        depth_from = kernfs_depth(common, kn_from);
@@ -144,22 +144,16 @@ static int kernfs_path_from_node_locked(struct kernfs_node *kn_to,
                               len < buflen ? buflen - len : 0);
 
        /* Calculate how many bytes we need for the rest */
-       for (kn = kn_to; kn != common; kn = kn->parent)
-               nlen += strlen(kn->name) + 1;
-
-       if (len + nlen >= buflen)
-               return len + nlen;
-
-       p = buf + len + nlen;
-       *p = '\0';
-       for (kn = kn_to; kn != common; kn = kn->parent) {
-               size_t tmp = strlen(kn->name);
-               p -= tmp;
-               memcpy(p, kn->name, tmp);
-               *(--p) = '/';
+       for (i = depth_to - 1; i >= 0; i--) {
+               for (kn = kn_to, j = 0; j < i; j++)
+                       kn = kn->parent;
+               len += strlcpy(buf + len, "/",
+                              len < buflen ? buflen - len : 0);
+               len += strlcpy(buf + len, kn->name,
+                              len < buflen ? buflen - len : 0);
        }
 
-       return len + nlen;
+       return len;
 }
 
 /**
@@ -185,29 +179,6 @@ int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen)
        return ret;
 }
 
-/**
- * kernfs_path_len - determine the length of the full path of a given node
- * @kn: kernfs_node of interest
- *
- * The returned length doesn't include the space for the terminating '\0'.
- */
-size_t kernfs_path_len(struct kernfs_node *kn)
-{
-       size_t len = 0;
-       unsigned long flags;
-
-       spin_lock_irqsave(&kernfs_rename_lock, flags);
-
-       do {
-               len += strlen(kn->name) + 1;
-               kn = kn->parent;
-       } while (kn && kn->parent);
-
-       spin_unlock_irqrestore(&kernfs_rename_lock, flags);
-
-       return len;
-}
-
 /**
  * kernfs_path_from_node - build path of node @to relative to @from.
  * @from: parent kernfs_node relative to which we need to build the path
@@ -220,8 +191,9 @@ size_t kernfs_path_len(struct kernfs_node *kn)
  * path (which includes '..'s) as needed to reach from @from to @to is
  * returned.
  *
- * If @buf isn't long enough, the return value will be greater than @buflen
- * and @buf contents are undefined.
+ * Returns the length of the full path.  If the full length is equal to or
+ * greater than @buflen, @buf contains the truncated path with the trailing
+ * '\0'.  On error, -errno is returned.
  */
 int kernfs_path_from_node(struct kernfs_node *to, struct kernfs_node *from,
                          char *buf, size_t buflen)
@@ -236,28 +208,6 @@ int kernfs_path_from_node(struct kernfs_node *to, struct kernfs_node *from,
 }
 EXPORT_SYMBOL_GPL(kernfs_path_from_node);
 
-/**
- * kernfs_path - build full path of a given node
- * @kn: kernfs_node of interest
- * @buf: buffer to copy @kn's name into
- * @buflen: size of @buf
- *
- * Builds and returns the full path of @kn in @buf of @buflen bytes.  The
- * path is built from the end of @buf so the returned pointer usually
- * doesn't match @buf.  If @buf isn't long enough, @buf is nul terminated
- * and %NULL is returned.
- */
-char *kernfs_path(struct kernfs_node *kn, char *buf, size_t buflen)
-{
-       int ret;
-
-       ret = kernfs_path_from_node(kn, NULL, buf, buflen);
-       if (ret < 0 || ret >= buflen)
-               return NULL;
-       return buf;
-}
-EXPORT_SYMBOL_GPL(kernfs_path);
-
 /**
  * pr_cont_kernfs_name - pr_cont name of a kernfs_node
  * @kn: kernfs_node of interest
index 2bcb86e6e6ca0988cbe4337c970247869db34a24..78219d5644e90aacaf3aeb9fdfe2a234f4e31b84 100644 (file)
@@ -911,6 +911,7 @@ const struct file_operations kernfs_file_fops = {
        .open           = kernfs_fop_open,
        .release        = kernfs_fop_release,
        .poll           = kernfs_fop_poll,
+       .fsync          = noop_fsync,
 };
 
 /**
index 5426189406c17c233bfcd0e139db5e7921e412fb..fb8cac88251ae64da116b8390826118c3eb1c81c 100644 (file)
@@ -15,6 +15,6 @@ struct lockd_net {
        struct list_head nsm_handles;
 };
 
-extern int lockd_net_id;
+extern unsigned int lockd_net_id;
 
 #endif
index fc4084ef4736d47a410e27052497cd00829204c2..1c13dd80744ff99cc0691476c3a2920eca9757cc 100644 (file)
@@ -57,7 +57,7 @@ static struct task_struct     *nlmsvc_task;
 static struct svc_rqst         *nlmsvc_rqst;
 unsigned long                  nlmsvc_timeout;
 
-int lockd_net_id;
+unsigned int lockd_net_id;
 
 /*
  * These can be set at insmod time (useful for NFS as root filesystem),
index ce93b416b490fa558589a6bdd2bb620ffb108fcc..22c5b4aa49611ac46cb50dbf4fc8ef25ec500b34 100644 (file)
@@ -1609,6 +1609,7 @@ int fcntl_getlease(struct file *filp)
 
        ctx = smp_load_acquire(&inode->i_flctx);
        if (ctx && !list_empty_careful(&ctx->flc_lease)) {
+               percpu_down_read_preempt_disable(&file_rwsem);
                spin_lock(&ctx->flc_lock);
                time_out_leases(inode, &dispose);
                list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
@@ -1618,6 +1619,8 @@ int fcntl_getlease(struct file *filp)
                        break;
                }
                spin_unlock(&ctx->flc_lock);
+               percpu_up_read_preempt_enable(&file_rwsem);
+
                locks_dispose_list(&dispose);
        }
        return type;
@@ -2529,11 +2532,14 @@ locks_remove_lease(struct file *filp, struct file_lock_context *ctx)
        if (list_empty(&ctx->flc_lease))
                return;
 
+       percpu_down_read_preempt_disable(&file_rwsem);
        spin_lock(&ctx->flc_lock);
        list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, fl_list)
                if (filp == fl->fl_file)
                        lease_modify(fl, F_UNLCK, &dispose);
        spin_unlock(&ctx->flc_lock);
+       percpu_up_read_preempt_enable(&file_rwsem);
+
        locks_dispose_list(&dispose);
 }
 
index a7f601cd521a079af70d0364a67478ab3e9d5831..5b4eed2215304a14ac2614058ae2b2002a3f2ae9 100644 (file)
@@ -4668,6 +4668,31 @@ int generic_readlink(struct dentry *dentry, char __user *buffer, int buflen)
 }
 EXPORT_SYMBOL(generic_readlink);
 
+/**
+ * vfs_get_link - get symlink body
+ * @dentry: dentry on which to get symbolic link
+ * @done: caller needs to free returned data with this
+ *
+ * Calls security hook and i_op->get_link() on the supplied inode.
+ *
+ * It does not touch atime.  That's up to the caller if necessary.
+ *
+ * Does not work on "special" symlinks like /proc/$$/fd/N
+ */
+const char *vfs_get_link(struct dentry *dentry, struct delayed_call *done)
+{
+       const char *res = ERR_PTR(-EINVAL);
+       struct inode *inode = d_inode(dentry);
+
+       if (d_is_symlink(dentry)) {
+               res = ERR_PTR(security_inode_readlink(dentry));
+               if (!res)
+                       res = inode->i_op->get_link(dentry, inode, done);
+       }
+       return res;
+}
+EXPORT_SYMBOL(vfs_get_link);
+
 /* get the link contents into pagecache */
 const char *page_get_link(struct dentry *dentry, struct inode *inode,
                          struct delayed_call *callback)
index 58aca9c931acaecc62bf02717c9cfa9d58baf8cf..e6c234b1a6456c1364bcc06b029f3cc4a155d5bd 100644 (file)
@@ -2824,6 +2824,7 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns)
        return new_ns;
 }
 
+__latent_entropy
 struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
                struct user_namespace *user_ns, struct fs_struct *new_fs)
 {
index 217847679f0eac675492ac049cbc0a42a6f75066..2905479f214a4654223ec90ae13e21138ac2be4d 100644 (file)
@@ -344,9 +344,10 @@ static void bl_write_cleanup(struct work_struct *work)
                u64 start = hdr->args.offset & (loff_t)PAGE_MASK;
                u64 end = (hdr->args.offset + hdr->args.count +
                        PAGE_SIZE - 1) & (loff_t)PAGE_MASK;
+               u64 lwb = hdr->args.offset + hdr->args.count;
 
                ext_tree_mark_written(bl, start >> SECTOR_SHIFT,
-                                       (end - start) >> SECTOR_SHIFT, end);
+                                       (end - start) >> SECTOR_SHIFT, lwb);
        }
 
        pnfs_ld_write_done(hdr);
index 532d8e242d4d76c44413d6c814c7548094614ebe..484bebc20bca6a502cc621106d7f34975b48dedc 100644 (file)
@@ -197,7 +197,7 @@ static int nfs_callback_up_net(int minorversion, struct svc_serv *serv,
        }
 
        ret = -EPROTONOSUPPORT;
-       if (minorversion == 0)
+       if (!IS_ENABLED(CONFIG_NFS_V4_1) || minorversion == 0)
                ret = nfs4_callback_up_net(serv, net);
        else if (xprt->ops->bc_up)
                ret = xprt->ops->bc_up(serv, net);
index 7555ba889d1fce916cc96b8f23c03ad4d6037366..ebecfb8fba067cd4316e1c59e12c472c97d930a6 100644 (file)
@@ -314,7 +314,8 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat
                /* Match the full socket address */
                if (!rpc_cmp_addr_port(sap, clap))
                        /* Match all xprt_switch full socket addresses */
-                       if (!rpc_clnt_xprt_switch_has_addr(clp->cl_rpcclient,
+                       if (IS_ERR(clp->cl_rpcclient) ||
+                            !rpc_clnt_xprt_switch_has_addr(clp->cl_rpcclient,
                                                           sap))
                                continue;
 
index bf4ec5ecc97e4571e3f971222c71f0519874682a..ce42dd00e4ee5f1131715d1ea23c2a5792ea0c73 100644 (file)
@@ -2015,7 +2015,7 @@ static void nfsiod_stop(void)
        destroy_workqueue(wq);
 }
 
-int nfs_net_id;
+unsigned int nfs_net_id;
 EXPORT_SYMBOL_GPL(nfs_net_id);
 
 static int nfs_net_init(struct net *net)
index c8162c660c440bb28eb7e31fbd33ce1b4c3b1438..5551e8ef67fd0b64faa92688a8fa38b05bbb0c78 100644 (file)
@@ -98,7 +98,7 @@ rename_retry:
                return end;
        }
        namelen = strlen(base);
-       if (flags & NFS_PATH_CANONICAL) {
+       if (*end == '/') {
                /* Strip off excess slashes in base string */
                while (namelen > 0 && base[namelen - 1] == '/')
                        namelen--;
index fbce0d885d4c2dc77bb98f604ec881175e945eb7..5fbd2bde91ba7e7c18a367559d636b90908fc6e7 100644 (file)
@@ -35,6 +35,6 @@ struct nfs_net {
 #endif
 };
 
-extern int nfs_net_id;
+extern unsigned int nfs_net_id;
 
 #endif
index 9b3a82abab079f0a03047260ed2ea4e3ee9154ed..1452177c822dbc4a1be8d7e164de88d636764aed 100644 (file)
@@ -542,6 +542,13 @@ static inline bool nfs4_valid_open_stateid(const struct nfs4_state *state)
        return test_bit(NFS_STATE_RECOVERY_FAILED, &state->flags) == 0;
 }
 
+static inline bool nfs4_state_match_open_stateid_other(const struct nfs4_state *state,
+               const nfs4_stateid *stateid)
+{
+       return test_bit(NFS_OPEN_STATE, &state->flags) &&
+               nfs4_stateid_match_other(&state->open_stateid, stateid);
+}
+
 #else
 
 #define nfs4_close_state(a, b) do { } while (0)
index ad917bd72b38c3b213ce994ee2d063b111cb6277..241da19b7da4a54a45b1a2bd6cae4cab8cab4dde 100644 (file)
@@ -1451,7 +1451,6 @@ static void nfs_resync_open_stateid_locked(struct nfs4_state *state)
 }
 
 static void nfs_clear_open_stateid_locked(struct nfs4_state *state,
-               nfs4_stateid *arg_stateid,
                nfs4_stateid *stateid, fmode_t fmode)
 {
        clear_bit(NFS_O_RDWR_STATE, &state->flags);
@@ -1469,10 +1468,9 @@ static void nfs_clear_open_stateid_locked(struct nfs4_state *state,
        }
        if (stateid == NULL)
                return;
-       /* Handle races with OPEN */
-       if (!nfs4_stateid_match_other(arg_stateid, &state->open_stateid) ||
-           (nfs4_stateid_match_other(stateid, &state->open_stateid) &&
-           !nfs4_stateid_is_newer(stateid, &state->open_stateid))) {
+       /* Handle OPEN+OPEN_DOWNGRADE races */
+       if (nfs4_stateid_match_other(stateid, &state->open_stateid) &&
+           !nfs4_stateid_is_newer(stateid, &state->open_stateid)) {
                nfs_resync_open_stateid_locked(state);
                return;
        }
@@ -1486,7 +1484,9 @@ static void nfs_clear_open_stateid(struct nfs4_state *state,
        nfs4_stateid *stateid, fmode_t fmode)
 {
        write_seqlock(&state->seqlock);
-       nfs_clear_open_stateid_locked(state, arg_stateid, stateid, fmode);
+       /* Ignore, if the CLOSE argment doesn't match the current stateid */
+       if (nfs4_state_match_open_stateid_other(state, arg_stateid))
+               nfs_clear_open_stateid_locked(state, stateid, fmode);
        write_sequnlock(&state->seqlock);
        if (test_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags))
                nfs4_schedule_state_manager(state->owner->so_server->nfs_client);
@@ -1545,7 +1545,7 @@ static int update_open_stateid(struct nfs4_state *state,
        struct nfs_client *clp = server->nfs_client;
        struct nfs_inode *nfsi = NFS_I(state->inode);
        struct nfs_delegation *deleg_cur;
-       nfs4_stateid freeme = {0};
+       nfs4_stateid freeme = { };
        int ret = 0;
 
        fmode &= (FMODE_READ|FMODE_WRITE);
@@ -2564,15 +2564,23 @@ static void nfs41_check_delegation_stateid(struct nfs4_state *state)
 static int nfs41_check_expired_locks(struct nfs4_state *state)
 {
        int status, ret = NFS_OK;
-       struct nfs4_lock_state *lsp;
+       struct nfs4_lock_state *lsp, *prev = NULL;
        struct nfs_server *server = NFS_SERVER(state->inode);
 
        if (!test_bit(LK_STATE_IN_USE, &state->flags))
                goto out;
+
+       spin_lock(&state->state_lock);
        list_for_each_entry(lsp, &state->lock_states, ls_locks) {
                if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags)) {
                        struct rpc_cred *cred = lsp->ls_state->owner->so_cred;
 
+                       atomic_inc(&lsp->ls_count);
+                       spin_unlock(&state->state_lock);
+
+                       nfs4_put_lock_state(prev);
+                       prev = lsp;
+
                        status = nfs41_test_and_free_expired_stateid(server,
                                        &lsp->ls_stateid,
                                        cred);
@@ -2585,10 +2593,14 @@ static int nfs41_check_expired_locks(struct nfs4_state *state)
                                        set_bit(NFS_LOCK_LOST, &lsp->ls_flags);
                        } else if (status != NFS_OK) {
                                ret = status;
-                               break;
+                               nfs4_put_lock_state(prev);
+                               goto out;
                        }
+                       spin_lock(&state->state_lock);
                }
-       };
+       }
+       spin_unlock(&state->state_lock);
+       nfs4_put_lock_state(prev);
 out:
        return ret;
 }
@@ -3122,7 +3134,8 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
        } else if (is_rdwr)
                calldata->arg.fmode |= FMODE_READ|FMODE_WRITE;
 
-       if (!nfs4_valid_open_stateid(state))
+       if (!nfs4_valid_open_stateid(state) ||
+           test_bit(NFS_OPEN_STATE, &state->flags) == 0)
                call_close = 0;
        spin_unlock(&state->owner->so_lock);
 
@@ -5569,6 +5582,7 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
        switch (task->tk_status) {
        case 0:
                renew_lease(data->res.server, data->timestamp);
+               break;
        case -NFS4ERR_ADMIN_REVOKED:
        case -NFS4ERR_DELEG_REVOKED:
        case -NFS4ERR_EXPIRED:
@@ -5579,8 +5593,6 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
        case -NFS4ERR_OLD_STATEID:
        case -NFS4ERR_STALE_STATEID:
                task->tk_status = 0;
-               if (data->roc)
-                       pnfs_roc_set_barrier(data->inode, data->roc_barrier);
                break;
        default:
                if (nfs4_async_handle_error(task, data->res.server,
@@ -5590,6 +5602,8 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
                }
        }
        data->rpc_status = task->tk_status;
+       if (data->roc && data->rpc_status == 0)
+               pnfs_roc_set_barrier(data->inode, data->roc_barrier);
 }
 
 static void nfs4_delegreturn_release(void *calldata)
index b62973045a3e048016f1af48f761fdb152f3908d..a61350f75c741d734475cfa38903118d673b76aa 100644 (file)
@@ -178,12 +178,14 @@ static int nfs4_slot_get_seqid(struct nfs4_slot_table  *tbl, u32 slotid,
        __must_hold(&tbl->slot_tbl_lock)
 {
        struct nfs4_slot *slot;
+       int ret;
 
        slot = nfs4_lookup_slot(tbl, slotid);
-       if (IS_ERR(slot))
-               return PTR_ERR(slot);
-       *seq_nr = slot->seq_nr;
-       return 0;
+       ret = PTR_ERR_OR_ZERO(slot);
+       if (!ret)
+               *seq_nr = slot->seq_nr;
+
+       return ret;
 }
 
 /*
@@ -196,7 +198,7 @@ static int nfs4_slot_get_seqid(struct nfs4_slot_table  *tbl, u32 slotid,
 static bool nfs4_slot_seqid_in_use(struct nfs4_slot_table *tbl,
                u32 slotid, u32 seq_nr)
 {
-       u32 cur_seq;
+       u32 cur_seq = 0;
        bool ret = false;
 
        spin_lock(&tbl->slot_tbl_lock);
index 5f4281ec5f72c3556d76a479854d6fb378d03124..0959c96616623f876a5905deb6e03c1a438fe338 100644 (file)
@@ -1547,6 +1547,7 @@ restart:
                                ssleep(1);
                        case -NFS4ERR_ADMIN_REVOKED:
                        case -NFS4ERR_STALE_STATEID:
+                       case -NFS4ERR_OLD_STATEID:
                        case -NFS4ERR_BAD_STATEID:
                        case -NFS4ERR_RECLAIM_BAD:
                        case -NFS4ERR_RECLAIM_CONFLICT:
index 56b2d96f9103e42c57e5dd490f2561c8a10c554a..259ef85f435aa7f9e0b0e4d06d3ce24a9e7a3ad3 100644 (file)
@@ -146,6 +146,8 @@ set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh,
        u32 id;
        int i;
 
+       if (fsinfo->nlayouttypes == 0)
+               goto out_no_driver;
        if (!(server->nfs_client->cl_exchange_flags &
                 (EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS))) {
                printk(KERN_ERR "NFS: %s: cl_exchange_flags 0x%x\n",
index fd8c9a5bcac44c74101d1b2ba4001c8785875254..420d3a0ab258fb2b312310e50081bbab30f2c2ae 100644 (file)
@@ -9,7 +9,7 @@
 #include <net/netns/generic.h>
 #include <linux/fs.h>
 
-static int grace_net_id;
+static unsigned int grace_net_id;
 static DEFINE_SPINLOCK(grace_lock);
 
 /**
index b10d557f9c9ef0033a6dcbcfcf30141745132e8f..3714231a9d0fb71e4e440a9f8efa7113839c4392 100644 (file)
@@ -84,6 +84,8 @@ struct nfsd_net {
        struct list_head client_lru;
        struct list_head close_lru;
        struct list_head del_recall_lru;
+
+       /* protected by blocked_locks_lock */
        struct list_head blocked_locks_lru;
 
        struct delayed_work laundromat_work;
@@ -91,6 +93,9 @@ struct nfsd_net {
        /* client_lock protects the client lru list and session hash table */
        spinlock_t client_lock;
 
+       /* protects blocked_locks_lru */
+       spinlock_t blocked_locks_lock;
+
        struct file *rec_file;
        bool in_grace;
        const struct nfsd4_client_tracking_ops *client_tracking_ops;
@@ -119,5 +124,5 @@ struct nfsd_net {
 /* Simple check to find out if a given net was properly initialized */
 #define nfsd_netns_ready(nn) ((nn)->sessionid_hashtbl)
 
-extern int nfsd_net_id;
+extern unsigned int nfsd_net_id;
 #endif /* __NFSD_NETNS_H__ */
index 9752beb78659dd1f02a4411a397fb27b9a8cf4ef..4b4beaaa4eaac01233f874c7dfdb8d1a6d7cd3d6 100644 (file)
@@ -217,7 +217,7 @@ find_blocked_lock(struct nfs4_lockowner *lo, struct knfsd_fh *fh,
 {
        struct nfsd4_blocked_lock *cur, *found = NULL;
 
-       spin_lock(&nn->client_lock);
+       spin_lock(&nn->blocked_locks_lock);
        list_for_each_entry(cur, &lo->lo_blocked, nbl_list) {
                if (fh_match(fh, &cur->nbl_fh)) {
                        list_del_init(&cur->nbl_list);
@@ -226,7 +226,7 @@ find_blocked_lock(struct nfs4_lockowner *lo, struct knfsd_fh *fh,
                        break;
                }
        }
-       spin_unlock(&nn->client_lock);
+       spin_unlock(&nn->blocked_locks_lock);
        if (found)
                posix_unblock_lock(&found->nbl_lock);
        return found;
@@ -1227,9 +1227,7 @@ static void put_ol_stateid_locked(struct nfs4_ol_stateid *stp,
 
 static bool unhash_lock_stateid(struct nfs4_ol_stateid *stp)
 {
-       struct nfs4_openowner *oo = openowner(stp->st_openstp->st_stateowner);
-
-       lockdep_assert_held(&oo->oo_owner.so_client->cl_lock);
+       lockdep_assert_held(&stp->st_stid.sc_client->cl_lock);
 
        list_del_init(&stp->st_locks);
        nfs4_unhash_stid(&stp->st_stid);
@@ -1238,12 +1236,12 @@ static bool unhash_lock_stateid(struct nfs4_ol_stateid *stp)
 
 static void release_lock_stateid(struct nfs4_ol_stateid *stp)
 {
-       struct nfs4_openowner *oo = openowner(stp->st_openstp->st_stateowner);
+       struct nfs4_client *clp = stp->st_stid.sc_client;
        bool unhashed;
 
-       spin_lock(&oo->oo_owner.so_client->cl_lock);
+       spin_lock(&clp->cl_lock);
        unhashed = unhash_lock_stateid(stp);
-       spin_unlock(&oo->oo_owner.so_client->cl_lock);
+       spin_unlock(&clp->cl_lock);
        if (unhashed)
                nfs4_put_stid(&stp->st_stid);
 }
@@ -4665,7 +4663,7 @@ nfs4_laundromat(struct nfsd_net *nn)
         * indefinitely once the lock does become free.
         */
        BUG_ON(!list_empty(&reaplist));
-       spin_lock(&nn->client_lock);
+       spin_lock(&nn->blocked_locks_lock);
        while (!list_empty(&nn->blocked_locks_lru)) {
                nbl = list_first_entry(&nn->blocked_locks_lru,
                                        struct nfsd4_blocked_lock, nbl_lru);
@@ -4678,7 +4676,7 @@ nfs4_laundromat(struct nfsd_net *nn)
                list_move(&nbl->nbl_lru, &reaplist);
                list_del_init(&nbl->nbl_list);
        }
-       spin_unlock(&nn->client_lock);
+       spin_unlock(&nn->blocked_locks_lock);
 
        while (!list_empty(&reaplist)) {
                nbl = list_first_entry(&nn->blocked_locks_lru,
@@ -5439,13 +5437,13 @@ nfsd4_lm_notify(struct file_lock *fl)
        bool queue = false;
 
        /* An empty list means that something else is going to be using it */
-       spin_lock(&nn->client_lock);
+       spin_lock(&nn->blocked_locks_lock);
        if (!list_empty(&nbl->nbl_list)) {
                list_del_init(&nbl->nbl_list);
                list_del_init(&nbl->nbl_lru);
                queue = true;
        }
-       spin_unlock(&nn->client_lock);
+       spin_unlock(&nn->blocked_locks_lock);
 
        if (queue)
                nfsd4_run_cb(&nbl->nbl_cb);
@@ -5868,10 +5866,10 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
        if (fl_flags & FL_SLEEP) {
                nbl->nbl_time = jiffies;
-               spin_lock(&nn->client_lock);
+               spin_lock(&nn->blocked_locks_lock);
                list_add_tail(&nbl->nbl_list, &lock_sop->lo_blocked);
                list_add_tail(&nbl->nbl_lru, &nn->blocked_locks_lru);
-               spin_unlock(&nn->client_lock);
+               spin_unlock(&nn->blocked_locks_lock);
        }
 
        err = vfs_lock_file(filp, F_SETLK, file_lock, conflock);
@@ -5900,10 +5898,10 @@ out:
        if (nbl) {
                /* dequeue it if we queued it before */
                if (fl_flags & FL_SLEEP) {
-                       spin_lock(&nn->client_lock);
+                       spin_lock(&nn->blocked_locks_lock);
                        list_del_init(&nbl->nbl_list);
                        list_del_init(&nbl->nbl_lru);
-                       spin_unlock(&nn->client_lock);
+                       spin_unlock(&nn->blocked_locks_lock);
                }
                free_blocked_lock(nbl);
        }
@@ -6943,9 +6941,11 @@ static int nfs4_state_create_net(struct net *net)
        INIT_LIST_HEAD(&nn->client_lru);
        INIT_LIST_HEAD(&nn->close_lru);
        INIT_LIST_HEAD(&nn->del_recall_lru);
-       INIT_LIST_HEAD(&nn->blocked_locks_lru);
        spin_lock_init(&nn->client_lock);
 
+       spin_lock_init(&nn->blocked_locks_lock);
+       INIT_LIST_HEAD(&nn->blocked_locks_lru);
+
        INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main);
        get_net(net);
 
@@ -7063,14 +7063,14 @@ nfs4_state_shutdown_net(struct net *net)
        }
 
        BUG_ON(!list_empty(&reaplist));
-       spin_lock(&nn->client_lock);
+       spin_lock(&nn->blocked_locks_lock);
        while (!list_empty(&nn->blocked_locks_lru)) {
                nbl = list_first_entry(&nn->blocked_locks_lru,
                                        struct nfsd4_blocked_lock, nbl_lru);
                list_move(&nbl->nbl_lru, &reaplist);
                list_del_init(&nbl->nbl_list);
        }
-       spin_unlock(&nn->client_lock);
+       spin_unlock(&nn->blocked_locks_lock);
 
        while (!list_empty(&reaplist)) {
                nbl = list_first_entry(&nn->blocked_locks_lru,
index 36b2af931e06d1d1a7c3dc613747a58433350811..2857e46d5cc5eacd3c2a78b6de8947ef97c48e6b 100644 (file)
@@ -1201,7 +1201,7 @@ static int create_proc_exports_entry(void)
 }
 #endif
 
-int nfsd_net_id;
+unsigned int nfsd_net_id;
 
 static __net_init int nfsd_init_net(struct net *net)
 {
index 8718af895eabf791451e1b607d7b0a2b2d33cef2..8c9fb29c667327549737a6a9b6440d2564c7c710 100644 (file)
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -118,7 +118,7 @@ again:
        return ret;
 }
 
-static int open_related_ns(struct ns_common *ns,
+int open_related_ns(struct ns_common *ns,
                   struct ns_common *(*get_ns)(struct ns_common *ns))
 {
        struct path path = {};
index a1861357900127e19182932c39322c55d17fe5fe..0ee19ecc982d4e4ef8137836ba4d753559eb7612 100644 (file)
@@ -1544,8 +1544,6 @@ const struct file_operations ntfs_dir_ops = {
        .iterate        = ntfs_readdir,         /* Read directory contents. */
 #ifdef NTFS_RW
        .fsync          = ntfs_dir_fsync,       /* Sync a directory to disk. */
-       /*.aio_fsync    = ,*/                   /* Sync all outstanding async
-                                                  i/o operations on a kiocb. */
 #endif /* NTFS_RW */
        /*.ioctl        = ,*/                   /* Perform function on the
                                                   mounted filesystem. */
index e7054e2ac9227dc4687a98beea6e005c2d03d988..3ecb9f337b7d318868ad1c1ebaaf7ea331aeb623 100644 (file)
@@ -3699,7 +3699,7 @@ static void ocfs2_dx_dir_transfer_leaf(struct inode *dir, u32 split_hash,
 static int ocfs2_dx_dir_rebalance_credits(struct ocfs2_super *osb,
                                          struct ocfs2_dx_root_block *dx_root)
 {
-       int credits = ocfs2_clusters_to_blocks(osb->sb, 2);
+       int credits = ocfs2_clusters_to_blocks(osb->sb, 3);
 
        credits += ocfs2_calc_extend_credits(osb->sb, &dx_root->dr_list);
        credits += ocfs2_quota_trans_credits(osb->sb);
index 1e8fe844e69fdaa92c80c9ca162d736c8b06a984..5355efba4bc8c13f4e2ac2f5a7dfe7a6a6284225 100644 (file)
@@ -73,7 +73,7 @@ static int orangefs_revalidate_lookup(struct dentry *dentry)
                }
        }
 
-       dentry->d_time = jiffies + orangefs_dcache_timeout_msecs*HZ/1000;
+       orangefs_set_timeout(dentry);
        ret = 1;
 out_release_op:
        op_release(new_op);
@@ -94,8 +94,9 @@ out_drop:
 static int orangefs_d_revalidate(struct dentry *dentry, unsigned int flags)
 {
        int ret;
+       unsigned long time = (unsigned long) dentry->d_fsdata;
 
-       if (time_before(jiffies, dentry->d_time))
+       if (time_before(jiffies, time))
                return 1;
 
        if (flags & LOOKUP_RCU)
index 66ea0cc37b189fc0b129606ef9893cc68a4b6d43..02cc6139ec90798900f6c626934c9f6d2baee266 100644 (file)
@@ -621,9 +621,9 @@ static int orangefs_file_release(struct inode *inode, struct file *file)
         * readahead cache (if any); this forces an expensive refresh of
         * data for the next caller of mmap (or 'get_block' accesses)
         */
-       if (file->f_path.dentry->d_inode &&
-           file->f_path.dentry->d_inode->i_mapping &&
-           mapping_nrpages(&file->f_path.dentry->d_inode->i_data)) {
+       if (file_inode(file) &&
+           file_inode(file)->i_mapping &&
+           mapping_nrpages(&file_inode(file)->i_data)) {
                if (orangefs_features & ORANGEFS_FEATURE_READAHEAD) {
                        gossip_debug(GOSSIP_INODE_DEBUG,
                            "calling flush_racache on %pU\n",
@@ -632,7 +632,7 @@ static int orangefs_file_release(struct inode *inode, struct file *file)
                        gossip_debug(GOSSIP_INODE_DEBUG,
                            "flush_racache finished\n");
                }
-               truncate_inode_pages(file->f_path.dentry->d_inode->i_mapping,
+               truncate_inode_pages(file_inode(file)->i_mapping,
                                     0);
        }
        return 0;
@@ -648,7 +648,7 @@ static int orangefs_fsync(struct file *file,
 {
        int ret = -EINVAL;
        struct orangefs_inode_s *orangefs_inode =
-               ORANGEFS_I(file->f_path.dentry->d_inode);
+               ORANGEFS_I(file_inode(file));
        struct orangefs_kernel_op_s *new_op = NULL;
 
        /* required call */
@@ -661,7 +661,7 @@ static int orangefs_fsync(struct file *file,
 
        ret = service_operation(new_op,
                        "orangefs_fsync",
-                       get_interruptible_flag(file->f_path.dentry->d_inode));
+                       get_interruptible_flag(file_inode(file)));
 
        gossip_debug(GOSSIP_FILE_DEBUG,
                     "orangefs_fsync got return value of %d\n",
@@ -669,7 +669,7 @@ static int orangefs_fsync(struct file *file,
 
        op_release(new_op);
 
-       orangefs_flush_inode(file->f_path.dentry->d_inode);
+       orangefs_flush_inode(file_inode(file));
        return ret;
 }
 
index d15d3d2dba6225ce52bf97127a2de62dae609b8d..a290ff6ec7569dc3b5eea9ded2df0e8483c49d65 100644 (file)
@@ -72,7 +72,7 @@ static int orangefs_create(struct inode *dir,
 
        d_instantiate(dentry, inode);
        unlock_new_inode(inode);
-       dentry->d_time = jiffies + orangefs_dcache_timeout_msecs*HZ/1000;
+       orangefs_set_timeout(dentry);
        ORANGEFS_I(inode)->getattr_time = jiffies - 1;
 
        gossip_debug(GOSSIP_NAME_DEBUG,
@@ -183,7 +183,7 @@ static struct dentry *orangefs_lookup(struct inode *dir, struct dentry *dentry,
                goto out;
        }
 
-       dentry->d_time = jiffies + orangefs_dcache_timeout_msecs*HZ/1000;
+       orangefs_set_timeout(dentry);
 
        inode = orangefs_iget(dir->i_sb, &new_op->downcall.resp.lookup.refn);
        if (IS_ERR(inode)) {
@@ -322,7 +322,7 @@ static int orangefs_symlink(struct inode *dir,
 
        d_instantiate(dentry, inode);
        unlock_new_inode(inode);
-       dentry->d_time = jiffies + orangefs_dcache_timeout_msecs*HZ/1000;
+       orangefs_set_timeout(dentry);
        ORANGEFS_I(inode)->getattr_time = jiffies - 1;
 
        gossip_debug(GOSSIP_NAME_DEBUG,
@@ -386,7 +386,7 @@ static int orangefs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
 
        d_instantiate(dentry, inode);
        unlock_new_inode(inode);
-       dentry->d_time = jiffies + orangefs_dcache_timeout_msecs*HZ/1000;
+       orangefs_set_timeout(dentry);
        ORANGEFS_I(inode)->getattr_time = jiffies - 1;
 
        gossip_debug(GOSSIP_NAME_DEBUG,
index eb09aa026723820099a91ed4563caff0e22b8512..38887cc5577fa901acd907a0af2fb016a3441b2c 100644 (file)
@@ -114,6 +114,7 @@ static const struct seq_operations help_debug_ops = {
 };
 
 const struct file_operations debug_help_fops = {
+       .owner          = THIS_MODULE,
        .open           = orangefs_debug_help_open,
        .read           = seq_read,
        .release        = seq_release,
@@ -121,6 +122,7 @@ const struct file_operations debug_help_fops = {
 };
 
 static const struct file_operations kernel_debug_fops = {
+       .owner          = THIS_MODULE,
        .open           = orangefs_debug_open,
        .read           = orangefs_debug_read,
        .write          = orangefs_debug_write,
@@ -141,6 +143,9 @@ static struct client_debug_mask client_debug_mask;
  */
 static DEFINE_MUTEX(orangefs_debug_lock);
 
+/* Used to protect data in ORANGEFS_KMOD_DEBUG_HELP_FILE */
+static DEFINE_MUTEX(orangefs_help_file_lock);
+
 /*
  * initialize kmod debug operations, create orangefs debugfs dir and
  * ORANGEFS_KMOD_DEBUG_HELP_FILE.
@@ -289,6 +294,8 @@ static void *help_start(struct seq_file *m, loff_t *pos)
 
        gossip_debug(GOSSIP_DEBUGFS_DEBUG, "help_start: start\n");
 
+       mutex_lock(&orangefs_help_file_lock);
+
        if (*pos == 0)
                payload = m->private;
 
@@ -305,6 +312,7 @@ static void *help_next(struct seq_file *m, void *v, loff_t *pos)
 static void help_stop(struct seq_file *m, void *p)
 {
        gossip_debug(GOSSIP_DEBUGFS_DEBUG, "help_stop: start\n");
+       mutex_unlock(&orangefs_help_file_lock);
 }
 
 static int help_show(struct seq_file *m, void *v)
@@ -610,32 +618,54 @@ out:
  * /sys/kernel/debug/orangefs/debug-help can be catted to
  * see all the available kernel and client debug keywords.
  *
- * When the kernel boots, we have no idea what keywords the
+ * When orangefs.ko initializes, we have no idea what keywords the
  * client supports, nor their associated masks.
  *
- * We pass through this function once at boot and stamp a
+ * We pass through this function once at module-load and stamp a
  * boilerplate "we don't know" message for the client in the
  * debug-help file. We pass through here again when the client
  * starts and then we can fill out the debug-help file fully.
  *
  * The client might be restarted any number of times between
- * reboots, we only build the debug-help file the first time.
+ * module reloads, we only build the debug-help file the first time.
  */
 int orangefs_prepare_debugfs_help_string(int at_boot)
 {
-       int rc = -EINVAL;
-       int i;
-       int byte_count = 0;
        char *client_title = "Client Debug Keywords:\n";
        char *kernel_title = "Kernel Debug Keywords:\n";
+       size_t string_size =  DEBUG_HELP_STRING_SIZE;
+       size_t result_size;
+       size_t i;
+       char *new;
+       int rc = -EINVAL;
 
        gossip_debug(GOSSIP_UTILS_DEBUG, "%s: start\n", __func__);
 
-       if (at_boot) {
-               byte_count += strlen(HELP_STRING_UNINITIALIZED);
+       if (at_boot)
                client_title = HELP_STRING_UNINITIALIZED;
-       } else {
-               /*
+
+       /* build a new debug_help_string. */
+       new = kzalloc(DEBUG_HELP_STRING_SIZE, GFP_KERNEL);
+       if (!new) {
+               rc = -ENOMEM;
+               goto out;
+       }
+
+       /*
+        * strlcat(dst, src, size) will append at most
+        * "size - strlen(dst) - 1" bytes of src onto dst,
+        * null terminating the result, and return the total
+        * length of the string it tried to create.
+        *
+        * We'll just plow through here building our new debug
+        * help string and let strlcat take care of assuring that
+        * dst doesn't overflow.
+        */
+       strlcat(new, client_title, string_size);
+
+       if (!at_boot) {
+
+                /*
                 * fill the client keyword/mask array and remember
                 * how many elements there were.
                 */
@@ -644,64 +674,40 @@ int orangefs_prepare_debugfs_help_string(int at_boot)
                if (cdm_element_count <= 0)
                        goto out;
 
-               /* Count the bytes destined for debug_help_string. */
-               byte_count += strlen(client_title);
-
                for (i = 0; i < cdm_element_count; i++) {
-                       byte_count += strlen(cdm_array[i].keyword + 2);
-                       if (byte_count >= DEBUG_HELP_STRING_SIZE) {
-                               pr_info("%s: overflow 1!\n", __func__);
-                               goto out;
-                       }
+                       strlcat(new, "\t", string_size);
+                       strlcat(new, cdm_array[i].keyword, string_size);
+                       strlcat(new, "\n", string_size);
                }
-
-               gossip_debug(GOSSIP_UTILS_DEBUG,
-                            "%s: cdm_element_count:%d:\n",
-                            __func__,
-                            cdm_element_count);
        }
 
-       byte_count += strlen(kernel_title);
+       strlcat(new, "\n", string_size);
+       strlcat(new, kernel_title, string_size);
+
        for (i = 0; i < num_kmod_keyword_mask_map; i++) {
-               byte_count +=
-                       strlen(s_kmod_keyword_mask_map[i].keyword + 2);
-               if (byte_count >= DEBUG_HELP_STRING_SIZE) {
-                       pr_info("%s: overflow 2!\n", __func__);
-                       goto out;
-               }
+               strlcat(new, "\t", string_size);
+               strlcat(new, s_kmod_keyword_mask_map[i].keyword, string_size);
+               result_size = strlcat(new, "\n", string_size);
        }
 
-       /* build debug_help_string. */
-       debug_help_string = kzalloc(DEBUG_HELP_STRING_SIZE, GFP_KERNEL);
-       if (!debug_help_string) {
-               rc = -ENOMEM;
+       /* See if we tried to put too many bytes into "new"... */
+       if (result_size >= string_size) {
+               kfree(new);
                goto out;
        }
 
-       strcat(debug_help_string, client_title);
-
-       if (!at_boot) {
-               for (i = 0; i < cdm_element_count; i++) {
-                       strcat(debug_help_string, "\t");
-                       strcat(debug_help_string, cdm_array[i].keyword);
-                       strcat(debug_help_string, "\n");
-               }
-       }
-
-       strcat(debug_help_string, "\n");
-       strcat(debug_help_string, kernel_title);
-
-       for (i = 0; i < num_kmod_keyword_mask_map; i++) {
-               strcat(debug_help_string, "\t");
-               strcat(debug_help_string, s_kmod_keyword_mask_map[i].keyword);
-               strcat(debug_help_string, "\n");
+       if (at_boot) {
+               debug_help_string = new;
+       } else {
+               mutex_lock(&orangefs_help_file_lock);
+               memset(debug_help_string, 0, DEBUG_HELP_STRING_SIZE);
+               strlcat(debug_help_string, new, string_size);
+               mutex_unlock(&orangefs_help_file_lock);
        }
 
        rc = 0;
 
-out:
-
-       return rc;
+out:   return rc;
 
 }
 
@@ -959,8 +965,12 @@ int orangefs_debugfs_new_client_string(void __user *arg)
        ret = copy_from_user(&client_debug_array_string,
                                      (void __user *)arg,
                                      ORANGEFS_MAX_DEBUG_STRING_LEN);
-       if (ret != 0)
+
+       if (ret != 0) {
+               pr_info("%s: CLIENT_STRING: copy_from_user failed\n",
+                       __func__);
                return -EIO;
+       }
 
        /*
         * The real client-core makes an effort to ensure
@@ -975,45 +985,18 @@ int orangefs_debugfs_new_client_string(void __user *arg)
        client_debug_array_string[ORANGEFS_MAX_DEBUG_STRING_LEN - 1] =
                '\0';
        
-       if (ret != 0) {
-               pr_info("%s: CLIENT_STRING: copy_from_user failed\n",
-                       __func__);
-               return -EIO;
-       }
-
        pr_info("%s: client debug array string has been received.\n",
                __func__);
 
        if (!help_string_initialized) {
 
-               /* Free the "we don't know yet" default string... */
-               kfree(debug_help_string);
-
-               /* build a proper debug help string */
+               /* Build a proper debug help string. */
                if (orangefs_prepare_debugfs_help_string(0)) {
                        gossip_err("%s: no debug help string \n",
                                   __func__);
                        return -EIO;
                }
 
-               /* Replace the boilerplate boot-time debug-help file. */
-               debugfs_remove(help_file_dentry);
-
-               help_file_dentry =
-                       debugfs_create_file(
-                               ORANGEFS_KMOD_DEBUG_HELP_FILE,
-                               0444,
-                               debug_dir,
-                               debug_help_string,
-                               &debug_help_fops);
-
-               if (!help_file_dentry) {
-                       gossip_err("%s: debugfs_create_file failed for"
-                                  " :%s:!\n",
-                                  __func__,
-                                  ORANGEFS_KMOD_DEBUG_HELP_FILE);
-                       return -EIO;
-               }
        }
 
        debug_mask_to_string(&client_debug_mask, 1);
index 0a82048f3aafadbc3b8195acba3a877ed65f2a0d..3bf803d732c5b3702f735c5776cae6d249b85364 100644 (file)
@@ -580,4 +580,11 @@ static inline void orangefs_i_size_write(struct inode *inode, loff_t i_size)
 #endif
 }
 
+static inline void orangefs_set_timeout(struct dentry *dentry)
+{
+       unsigned long time = jiffies + orangefs_dcache_timeout_msecs*HZ/1000;
+
+       dentry->d_fsdata = (void *) time;
+}
+
 #endif /* __ORANGEFSKERNEL_H */
index 2e5b03065f345a3e9f48d0401f0def348eeaa1e6..4113eb0495bf90549daca478dd0f8c5a7940680a 100644 (file)
@@ -124,7 +124,7 @@ static int __init orangefs_init(void)
         * unknown at boot time.
         *
         * orangefs_prepare_debugfs_help_string will be used again
-        * later to rebuild the debug-help file after the client starts
+        * later to rebuild the debug-help-string after the client starts
         * and passes along the needed info. The argument signifies
         * which time orangefs_prepare_debugfs_help_string is being
         * called.
@@ -152,7 +152,9 @@ static int __init orangefs_init(void)
 
        ret = register_filesystem(&orangefs_fs_type);
        if (ret == 0) {
-               pr_info("orangefs: module version %s loaded\n", ORANGEFS_VERSION);
+               pr_info("%s: module version %s loaded\n",
+                       __func__,
+                       ORANGEFS_VERSION);
                ret = 0;
                goto out;
        }
index 3f803b3a1f8295b9d9d3f903cc9f1198c6910f4a..36795eed40b09ee1bbb6766e65019b8d1c6695f0 100644 (file)
@@ -57,6 +57,7 @@ int ovl_copy_xattr(struct dentry *old, struct dentry *new)
        ssize_t list_size, size, value_size = 0;
        char *buf, *name, *value = NULL;
        int uninitialized_var(error);
+       size_t slen;
 
        if (!(old->d_inode->i_opflags & IOP_XATTR) ||
            !(new->d_inode->i_opflags & IOP_XATTR))
@@ -79,7 +80,16 @@ int ovl_copy_xattr(struct dentry *old, struct dentry *new)
                goto out;
        }
 
-       for (name = buf; name < (buf + list_size); name += strlen(name) + 1) {
+       for (name = buf; list_size; name += slen) {
+               slen = strnlen(name, list_size) + 1;
+
+               /* underlying fs providing us with an broken xattr list? */
+               if (WARN_ON(slen > list_size)) {
+                       error = -EIO;
+                       break;
+               }
+               list_size -= slen;
+
                if (ovl_is_private_xattr(name))
                        continue;
 retry:
@@ -168,46 +178,14 @@ static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len)
                len -= bytes;
        }
 
+       if (!error)
+               error = vfs_fsync(new_file, 0);
        fput(new_file);
 out_fput:
        fput(old_file);
        return error;
 }
 
-static char *ovl_read_symlink(struct dentry *realdentry)
-{
-       int res;
-       char *buf;
-       struct inode *inode = realdentry->d_inode;
-       mm_segment_t old_fs;
-
-       res = -EINVAL;
-       if (!inode->i_op->readlink)
-               goto err;
-
-       res = -ENOMEM;
-       buf = (char *) __get_free_page(GFP_KERNEL);
-       if (!buf)
-               goto err;
-
-       old_fs = get_fs();
-       set_fs(get_ds());
-       /* The cast to a user pointer is valid due to the set_fs() */
-       res = inode->i_op->readlink(realdentry,
-                                   (char __user *)buf, PAGE_SIZE - 1);
-       set_fs(old_fs);
-       if (res < 0) {
-               free_page((unsigned long) buf);
-               goto err;
-       }
-       buf[res] = '\0';
-
-       return buf;
-
-err:
-       return ERR_PTR(res);
-}
-
 static int ovl_set_timestamps(struct dentry *upperdentry, struct kstat *stat)
 {
        struct iattr attr = {
@@ -354,19 +332,20 @@ out_cleanup:
 int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
                    struct path *lowerpath, struct kstat *stat)
 {
+       DEFINE_DELAYED_CALL(done);
        struct dentry *workdir = ovl_workdir(dentry);
        int err;
        struct kstat pstat;
        struct path parentpath;
+       struct dentry *lowerdentry = lowerpath->dentry;
        struct dentry *upperdir;
        struct dentry *upperdentry;
-       const struct cred *old_cred;
-       char *link = NULL;
+       const char *link = NULL;
 
        if (WARN_ON(!workdir))
                return -EROFS;
 
-       ovl_do_check_copy_up(lowerpath->dentry);
+       ovl_do_check_copy_up(lowerdentry);
 
        ovl_path_upper(parent, &parentpath);
        upperdir = parentpath.dentry;
@@ -376,13 +355,11 @@ int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
                return err;
 
        if (S_ISLNK(stat->mode)) {
-               link = ovl_read_symlink(lowerpath->dentry);
+               link = vfs_get_link(lowerdentry, &done);
                if (IS_ERR(link))
                        return PTR_ERR(link);
        }
 
-       old_cred = ovl_override_creds(dentry->d_sb);
-
        err = -EIO;
        if (lock_rename(workdir, upperdir) != NULL) {
                pr_err("overlayfs: failed to lock workdir+upperdir\n");
@@ -403,19 +380,16 @@ int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
        }
 out_unlock:
        unlock_rename(workdir, upperdir);
-       revert_creds(old_cred);
-
-       if (link)
-               free_page((unsigned long) link);
+       do_delayed_call(&done);
 
        return err;
 }
 
 int ovl_copy_up(struct dentry *dentry)
 {
-       int err;
+       int err = 0;
+       const struct cred *old_cred = ovl_override_creds(dentry->d_sb);
 
-       err = 0;
        while (!err) {
                struct dentry *next;
                struct dentry *parent;
@@ -447,6 +421,7 @@ int ovl_copy_up(struct dentry *dentry)
                dput(parent);
                dput(next);
        }
+       revert_creds(old_cred);
 
        return err;
 }
index 5f90ddf778bab71db8c7b651e64f0c321c15b926..306b6c16184081f26579b25f830b1b6f5eb0c094 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/cred.h>
 #include <linux/posix_acl.h>
 #include <linux/posix_acl_xattr.h>
+#include <linux/atomic.h>
 #include "overlayfs.h"
 
 void ovl_cleanup(struct inode *wdir, struct dentry *wdentry)
@@ -37,8 +38,10 @@ struct dentry *ovl_lookup_temp(struct dentry *workdir, struct dentry *dentry)
 {
        struct dentry *temp;
        char name[20];
+       static atomic_t temp_id = ATOMIC_INIT(0);
 
-       snprintf(name, sizeof(name), "#%lx", (unsigned long) dentry);
+       /* counter is allowed to wrap, since temp dentries are ephemeral */
+       snprintf(name, sizeof(name), "#%x", atomic_inc_return(&temp_id));
 
        temp = lookup_one_len(name, workdir, strlen(name));
        if (!IS_ERR(temp) && temp->d_inode) {
index c18d6a4ff456bfd57015eebc0e0c083246d55fb0..7fb53d05553780ac4bc9f3909afdb81df723af32 100644 (file)
@@ -19,6 +19,7 @@ static int ovl_copy_up_truncate(struct dentry *dentry)
        struct dentry *parent;
        struct kstat stat;
        struct path lowerpath;
+       const struct cred *old_cred;
 
        parent = dget_parent(dentry);
        err = ovl_copy_up(parent);
@@ -26,12 +27,14 @@ static int ovl_copy_up_truncate(struct dentry *dentry)
                goto out_dput_parent;
 
        ovl_path_lower(dentry, &lowerpath);
-       err = vfs_getattr(&lowerpath, &stat);
-       if (err)
-               goto out_dput_parent;
 
-       stat.size = 0;
-       err = ovl_copy_up_one(parent, dentry, &lowerpath, &stat);
+       old_cred = ovl_override_creds(dentry->d_sb);
+       err = vfs_getattr(&lowerpath, &stat);
+       if (!err) {
+               stat.size = 0;
+               err = ovl_copy_up_one(parent, dentry, &lowerpath, &stat);
+       }
+       revert_creds(old_cred);
 
 out_dput_parent:
        dput(parent);
@@ -153,45 +156,18 @@ static const char *ovl_get_link(struct dentry *dentry,
                                struct inode *inode,
                                struct delayed_call *done)
 {
-       struct dentry *realdentry;
-       struct inode *realinode;
        const struct cred *old_cred;
        const char *p;
 
        if (!dentry)
                return ERR_PTR(-ECHILD);
 
-       realdentry = ovl_dentry_real(dentry);
-       realinode = realdentry->d_inode;
-
-       if (WARN_ON(!realinode->i_op->get_link))
-               return ERR_PTR(-EPERM);
-
        old_cred = ovl_override_creds(dentry->d_sb);
-       p = realinode->i_op->get_link(realdentry, realinode, done);
+       p = vfs_get_link(ovl_dentry_real(dentry), done);
        revert_creds(old_cred);
        return p;
 }
 
-static int ovl_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
-{
-       struct path realpath;
-       struct inode *realinode;
-       const struct cred *old_cred;
-       int err;
-
-       ovl_path_real(dentry, &realpath);
-       realinode = realpath.dentry->d_inode;
-
-       if (!realinode->i_op->readlink)
-               return -EINVAL;
-
-       old_cred = ovl_override_creds(dentry->d_sb);
-       err = realinode->i_op->readlink(realpath.dentry, buf, bufsiz);
-       revert_creds(old_cred);
-       return err;
-}
-
 bool ovl_is_private_xattr(const char *name)
 {
        return strncmp(name, OVL_XATTR_PREFIX,
@@ -294,9 +270,6 @@ struct posix_acl *ovl_get_acl(struct inode *inode, int type)
        if (!IS_ENABLED(CONFIG_FS_POSIX_ACL) || !IS_POSIXACL(realinode))
                return NULL;
 
-       if (!realinode->i_op->get_acl)
-               return NULL;
-
        old_cred = ovl_override_creds(inode->i_sb);
        acl = get_acl(realinode, type);
        revert_creds(old_cred);
@@ -375,7 +348,7 @@ static const struct inode_operations ovl_file_inode_operations = {
 static const struct inode_operations ovl_symlink_inode_operations = {
        .setattr        = ovl_setattr,
        .get_link       = ovl_get_link,
-       .readlink       = ovl_readlink,
+       .readlink       = generic_readlink,
        .getattr        = ovl_getattr,
        .listxattr      = ovl_listxattr,
        .update_time    = ovl_update_time,
index 7e3f0127fc1aa7fea2096fcf806bca4dc9c9b60f..edd46a0e951d3e6d5ecbae0de8091c6f903d5297 100644 (file)
@@ -273,12 +273,11 @@ static bool ovl_is_opaquedir(struct dentry *dentry)
 {
        int res;
        char val;
-       struct inode *inode = dentry->d_inode;
 
-       if (!S_ISDIR(inode->i_mode) || !(inode->i_opflags & IOP_XATTR))
+       if (!d_is_dir(dentry))
                return false;
 
-       res = __vfs_getxattr(dentry, inode, OVL_XATTR_OPAQUE, &val, 1);
+       res = vfs_getxattr(dentry, OVL_XATTR_OPAQUE, &val, 1);
        if (res == 1 && val == 'y')
                return true;
 
@@ -419,16 +418,12 @@ static bool ovl_dentry_weird(struct dentry *dentry)
                                  DCACHE_OP_COMPARE);
 }
 
-static inline struct dentry *ovl_lookup_real(struct super_block *ovl_sb,
-                                            struct dentry *dir,
+static inline struct dentry *ovl_lookup_real(struct dentry *dir,
                                             const struct qstr *name)
 {
-       const struct cred *old_cred;
        struct dentry *dentry;
 
-       old_cred = ovl_override_creds(ovl_sb);
        dentry = lookup_one_len_unlocked(name->name, dir, name->len);
-       revert_creds(old_cred);
 
        if (IS_ERR(dentry)) {
                if (PTR_ERR(dentry) == -ENOENT)
@@ -469,6 +464,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
                          unsigned int flags)
 {
        struct ovl_entry *oe;
+       const struct cred *old_cred;
        struct ovl_entry *poe = dentry->d_parent->d_fsdata;
        struct path *stack = NULL;
        struct dentry *upperdir, *upperdentry = NULL;
@@ -479,9 +475,10 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
        unsigned int i;
        int err;
 
+       old_cred = ovl_override_creds(dentry->d_sb);
        upperdir = ovl_upperdentry_dereference(poe);
        if (upperdir) {
-               this = ovl_lookup_real(dentry->d_sb, upperdir, &dentry->d_name);
+               this = ovl_lookup_real(upperdir, &dentry->d_name);
                err = PTR_ERR(this);
                if (IS_ERR(this))
                        goto out;
@@ -514,8 +511,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
                bool opaque = false;
                struct path lowerpath = poe->lowerstack[i];
 
-               this = ovl_lookup_real(dentry->d_sb,
-                                      lowerpath.dentry, &dentry->d_name);
+               this = ovl_lookup_real(lowerpath.dentry, &dentry->d_name);
                err = PTR_ERR(this);
                if (IS_ERR(this)) {
                        /*
@@ -588,6 +584,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
                ovl_copyattr(realdentry->d_inode, inode);
        }
 
+       revert_creds(old_cred);
        oe->opaque = upperopaque;
        oe->__upperdentry = upperdentry;
        memcpy(oe->lowerstack, stack, sizeof(struct path) * ctr);
@@ -606,6 +603,7 @@ out_put:
 out_put_upper:
        dput(upperdentry);
 out:
+       revert_creds(old_cred);
        return ERR_PTR(err);
 }
 
@@ -834,6 +832,19 @@ retry:
                if (err)
                        goto out_dput;
 
+               /*
+                * Try to remove POSIX ACL xattrs from workdir.  We are good if:
+                *
+                * a) success (there was a POSIX ACL xattr and was removed)
+                * b) -ENODATA (there was no POSIX ACL xattr)
+                * c) -EOPNOTSUPP (POSIX ACL xattrs are not supported)
+                *
+                * There are various other error values that could effectively
+                * mean that the xattr doesn't exist (e.g. -ERANGE is returned
+                * if the xattr name is too long), but the set of filesystems
+                * allowed as upper are limited to "normal" ones, where checking
+                * for the above two errors is sufficient.
+                */
                err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_DEFAULT);
                if (err && err != -ENODATA && err != -EOPNOTSUPP)
                        goto out_dput;
@@ -1026,6 +1037,21 @@ ovl_posix_acl_xattr_set(const struct xattr_handler *handler,
 
        posix_acl_release(acl);
 
+       /*
+        * Check if sgid bit needs to be cleared (actual setacl operation will
+        * be done with mounter's capabilities and so that won't do it for us).
+        */
+       if (unlikely(inode->i_mode & S_ISGID) &&
+           handler->flags == ACL_TYPE_ACCESS &&
+           !in_group_p(inode->i_gid) &&
+           !capable_wrt_inode_uidgid(inode, CAP_FSETID)) {
+               struct iattr iattr = { .ia_valid = ATTR_KILL_SGID };
+
+               err = ovl_setattr(dentry, &iattr);
+               if (err)
+                       return err;
+       }
+
        err = ovl_xattr_set(dentry, handler->name, value, size, flags);
        if (!err)
                ovl_copyattr(ovl_inode_real(inode, NULL), inode);
@@ -1292,6 +1318,12 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
        if (!oe)
                goto out_put_cred;
 
+       sb->s_magic = OVERLAYFS_SUPER_MAGIC;
+       sb->s_op = &ovl_super_operations;
+       sb->s_xattr = ovl_xattr_handlers;
+       sb->s_fs_info = ufs;
+       sb->s_flags |= MS_POSIXACL | MS_NOREMOTELOCK;
+
        root_dentry = d_make_root(ovl_new_inode(sb, S_IFDIR));
        if (!root_dentry)
                goto out_free_oe;
@@ -1315,12 +1347,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
        ovl_inode_init(d_inode(root_dentry), realinode, !!upperpath.dentry);
        ovl_copyattr(realinode, d_inode(root_dentry));
 
-       sb->s_magic = OVERLAYFS_SUPER_MAGIC;
-       sb->s_op = &ovl_super_operations;
-       sb->s_xattr = ovl_xattr_handlers;
        sb->s_root = root_dentry;
-       sb->s_fs_info = ufs;
-       sb->s_flags |= MS_POSIXACL | MS_NOREMOTELOCK;
 
        return 0;
 
index 89600fd5963d46d5a5bd0915bde36850f7e71110..81818adb8e9ee3cc1adfbd5d0487d427f8c1f531 100644 (file)
@@ -412,10 +412,11 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
        mm = get_task_mm(task);
        if (mm) {
                vsize = task_vsize(mm);
-               if (permitted) {
-                       eip = KSTK_EIP(task);
-                       esp = KSTK_ESP(task);
-               }
+               /*
+                * esp and eip are intentionally zeroed out.  There is no
+                * non-racy way to read them without freezing the task.
+                * Programs that need reliable values can use ptrace(2).
+                */
        }
 
        get_task_comm(tcomm, task);
index c2964d890c9a58910d4d9c2b53c5845637fed70e..ca651ac00660889a86fcd5c27d4a29aa709d5a53 100644 (file)
@@ -832,6 +832,7 @@ static ssize_t mem_rw(struct file *file, char __user *buf,
        unsigned long addr = *ppos;
        ssize_t copied;
        char *page;
+       unsigned int flags;
 
        if (!mm)
                return 0;
@@ -844,6 +845,11 @@ static ssize_t mem_rw(struct file *file, char __user *buf,
        if (!atomic_inc_not_zero(&mm->mm_users))
                goto free;
 
+       /* Maybe we should limit FOLL_FORCE to actual ptrace users? */
+       flags = FOLL_FORCE;
+       if (write)
+               flags |= FOLL_WRITE;
+
        while (count > 0) {
                int this_len = min_t(int, count, PAGE_SIZE);
 
@@ -852,7 +858,7 @@ static ssize_t mem_rw(struct file *file, char __user *buf,
                        break;
                }
 
-               this_len = access_remote_vm(mm, addr, page, this_len, write);
+               this_len = access_remote_vm(mm, addr, page, this_len, flags);
                if (!this_len) {
                        if (!copied)
                                copied = -EIO;
@@ -964,8 +970,7 @@ static ssize_t environ_read(struct file *file, char __user *buf,
                max_len = min_t(size_t, PAGE_SIZE, count);
                this_len = min(max_len, this_len);
 
-               retval = access_remote_vm(mm, (env_start + src),
-                       page, this_len, 0);
+               retval = access_remote_vm(mm, (env_start + src), page, this_len, 0);
 
                if (retval <= 0) {
                        ret = retval;
@@ -1007,6 +1012,9 @@ static ssize_t auxv_read(struct file *file, char __user *buf,
 {
        struct mm_struct *mm = file->private_data;
        unsigned int nwords = 0;
+
+       if (!mm)
+               return 0;
        do {
                nwords += 2;
        } while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */
index 6909582ce5e5b9f6d94a89a4e1eb184488c0518e..35b92d81692f098efc911cbdd334182a96c546a8 100644 (file)
@@ -266,24 +266,15 @@ static int do_maps_open(struct inode *inode, struct file *file,
  * /proc/PID/maps that is the stack of the main task.
  */
 static int is_stack(struct proc_maps_private *priv,
-                   struct vm_area_struct *vma, int is_pid)
+                   struct vm_area_struct *vma)
 {
-       int stack = 0;
-
-       if (is_pid) {
-               stack = vma->vm_start <= vma->vm_mm->start_stack &&
-                       vma->vm_end >= vma->vm_mm->start_stack;
-       } else {
-               struct inode *inode = priv->inode;
-               struct task_struct *task;
-
-               rcu_read_lock();
-               task = pid_task(proc_pid(inode), PIDTYPE_PID);
-               if (task)
-                       stack = vma_is_stack_for_task(vma, task);
-               rcu_read_unlock();
-       }
-       return stack;
+       /*
+        * We make no effort to guess what a given thread considers to be
+        * its "stack".  It's not even well-defined for programs written
+        * languages like Go.
+        */
+       return vma->vm_start <= vma->vm_mm->start_stack &&
+               vma->vm_end >= vma->vm_mm->start_stack;
 }
 
 static void
@@ -354,7 +345,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
                        goto done;
                }
 
-               if (is_stack(priv, vma, is_pid))
+               if (is_stack(priv, vma))
                        name = "[stack]";
        }
 
@@ -1669,7 +1660,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
                seq_file_path(m, file, "\n\t= ");
        } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) {
                seq_puts(m, " heap");
-       } else if (is_stack(proc_priv, vma, is_pid)) {
+       } else if (is_stack(proc_priv, vma)) {
                seq_puts(m, " stack");
        }
 
index faacb0c0d857602111bfc04f2e374c451059c358..37175621e8906881adf4e034ce06224664120031 100644 (file)
@@ -124,25 +124,17 @@ unsigned long task_statm(struct mm_struct *mm,
 }
 
 static int is_stack(struct proc_maps_private *priv,
-                   struct vm_area_struct *vma, int is_pid)
+                   struct vm_area_struct *vma)
 {
        struct mm_struct *mm = vma->vm_mm;
-       int stack = 0;
-
-       if (is_pid) {
-               stack = vma->vm_start <= mm->start_stack &&
-                       vma->vm_end >= mm->start_stack;
-       } else {
-               struct inode *inode = priv->inode;
-               struct task_struct *task;
-
-               rcu_read_lock();
-               task = pid_task(proc_pid(inode), PIDTYPE_PID);
-               if (task)
-                       stack = vma_is_stack_for_task(vma, task);
-               rcu_read_unlock();
-       }
-       return stack;
+
+       /*
+        * We make no effort to guess what a given thread considers to be
+        * its "stack".  It's not even well-defined for programs written
+        * languages like Go.
+        */
+       return vma->vm_start <= mm->start_stack &&
+               vma->vm_end >= mm->start_stack;
 }
 
 /*
@@ -184,7 +176,7 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma,
        if (file) {
                seq_pad(m, ' ');
                seq_file_path(m, file, "");
-       } else if (mm && is_stack(priv, vma, is_pid)) {
+       } else if (mm && is_stack(priv, vma)) {
                seq_pad(m, ' ');
                seq_printf(m, "[stack]");
        }
index 8b252673d4540b519e4a46096869ff039246e614..e99b1a72d9a7348abd9d59a37d661ec335bfe73b 100644 (file)
@@ -12,14 +12,8 @@ static const struct genl_multicast_group quota_mcgrps[] = {
 };
 
 /* Netlink family structure for quota */
-static struct genl_family quota_genl_family = {
-       /*
-        * Needed due to multicast group ID abuse - old code assumed
-        * the family ID was also a valid multicast group ID (which
-        * isn't true) and userspace might thus rely on it. Assign a
-        * static ID for this group to make dealing with that easier.
-        */
-       .id = GENL_ID_VFS_DQUOT,
+static struct genl_family quota_genl_family __ro_after_init = {
+       .module = THIS_MODULE,
        .hdrsize = 0,
        .name = "VFS_DQUOT",
        .version = 1,
index 66215a7b17cf14d0b776dbb8e62b310a1571fdeb..190e0d362581a9a87d5cdae2a069e0d7e8991af9 100644 (file)
@@ -730,6 +730,35 @@ static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter,
 /* A write operation does a read from user space and vice versa */
 #define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ)
 
+/**
+ * rw_copy_check_uvector() - Copy an array of &struct iovec from userspace
+ *     into the kernel and check that it is valid.
+ *
+ * @type: One of %CHECK_IOVEC_ONLY, %READ, or %WRITE.
+ * @uvector: Pointer to the userspace array.
+ * @nr_segs: Number of elements in userspace array.
+ * @fast_segs: Number of elements in @fast_pointer.
+ * @fast_pointer: Pointer to (usually small on-stack) kernel array.
+ * @ret_pointer: (output parameter) Pointer to a variable that will point to
+ *     either @fast_pointer, a newly allocated kernel array, or NULL,
+ *     depending on which array was used.
+ *
+ * This function copies an array of &struct iovec of @nr_segs from
+ * userspace into the kernel and checks that each element is valid (e.g.
+ * it does not point to a kernel address or cause overflow by being too
+ * large, etc.).
+ *
+ * As an optimization, the caller may provide a pointer to a small
+ * on-stack array in @fast_pointer, typically %UIO_FASTIOV elements long
+ * (the size of this array, or 0 if unused, should be given in @fast_segs).
+ *
+ * @ret_pointer will always point to the array that was used, so the
+ * caller must take care not to call kfree() on it e.g. in case the
+ * @fast_pointer array was used and it was allocated on the stack.
+ *
+ * Return: The total number of bytes covered by the iovec array on success
+ *   or a negative error code on error.
+ */
 ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
                              unsigned long nr_segs, unsigned long fast_segs,
                              struct iovec *fast_pointer,
index 153d4f3bd441febd7004b1862cd218afc0ee6252..5a7750bd2eea765d06f5e8b76687701b24867c7b 100644 (file)
@@ -299,13 +299,8 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
 {
        struct iov_iter to;
        struct kiocb kiocb;
-       loff_t isize;
        int idx, ret;
 
-       isize = i_size_read(in->f_mapping->host);
-       if (unlikely(*ppos >= isize))
-               return 0;
-
        iov_iter_pipe(&to, ITER_PIPE | READ, pipe, len);
        idx = to.idx;
        init_sync_kiocb(&kiocb, in);
@@ -413,7 +408,8 @@ static ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
        if (res <= 0)
                return -ENOMEM;
 
-       nr_pages = res / PAGE_SIZE;
+       BUG_ON(dummy);
+       nr_pages = DIV_ROUND_UP(res, PAGE_SIZE);
 
        vec = __vec;
        if (nr_pages > PIPE_DEF_BUFFERS) {
index c2ff475c1711f38aa0b4b5296c68589e68fa1fc2..c183835566c19c56fd30d6a1c22d380d10668411 100644 (file)
@@ -1269,25 +1269,34 @@ EXPORT_SYMBOL(__sb_start_write);
 static void sb_wait_write(struct super_block *sb, int level)
 {
        percpu_down_write(sb->s_writers.rw_sem + level-1);
-       /*
-        * We are going to return to userspace and forget about this lock, the
-        * ownership goes to the caller of thaw_super() which does unlock.
-        *
-        * FIXME: we should do this before return from freeze_super() after we
-        * called sync_filesystem(sb) and s_op->freeze_fs(sb), and thaw_super()
-        * should re-acquire these locks before s_op->unfreeze_fs(sb). However
-        * this leads to lockdep false-positives, so currently we do the early
-        * release right after acquire.
-        */
-       percpu_rwsem_release(sb->s_writers.rw_sem + level-1, 0, _THIS_IP_);
 }
 
-static void sb_freeze_unlock(struct super_block *sb)
+/*
+ * We are going to return to userspace and forget about these locks, the
+ * ownership goes to the caller of thaw_super() which does unlock().
+ */
+static void lockdep_sb_freeze_release(struct super_block *sb)
+{
+       int level;
+
+       for (level = SB_FREEZE_LEVELS - 1; level >= 0; level--)
+               percpu_rwsem_release(sb->s_writers.rw_sem + level, 0, _THIS_IP_);
+}
+
+/*
+ * Tell lockdep we are holding these locks before we call ->unfreeze_fs(sb).
+ */
+static void lockdep_sb_freeze_acquire(struct super_block *sb)
 {
        int level;
 
        for (level = 0; level < SB_FREEZE_LEVELS; ++level)
                percpu_rwsem_acquire(sb->s_writers.rw_sem + level, 0, _THIS_IP_);
+}
+
+static void sb_freeze_unlock(struct super_block *sb)
+{
+       int level;
 
        for (level = SB_FREEZE_LEVELS - 1; level >= 0; level--)
                percpu_up_write(sb->s_writers.rw_sem + level);
@@ -1379,10 +1388,11 @@ int freeze_super(struct super_block *sb)
                }
        }
        /*
-        * This is just for debugging purposes so that fs can warn if it
-        * sees write activity when frozen is set to SB_FREEZE_COMPLETE.
+        * For debugging purposes so that fs can warn if it sees write activity
+        * when frozen is set to SB_FREEZE_COMPLETE, and for thaw_super().
         */
        sb->s_writers.frozen = SB_FREEZE_COMPLETE;
+       lockdep_sb_freeze_release(sb);
        up_write(&sb->s_umount);
        return 0;
 }
@@ -1399,7 +1409,7 @@ int thaw_super(struct super_block *sb)
        int error;
 
        down_write(&sb->s_umount);
-       if (sb->s_writers.frozen == SB_UNFROZEN) {
+       if (sb->s_writers.frozen != SB_FREEZE_COMPLETE) {
                up_write(&sb->s_umount);
                return -EINVAL;
        }
@@ -1409,11 +1419,14 @@ int thaw_super(struct super_block *sb)
                goto out;
        }
 
+       lockdep_sb_freeze_acquire(sb);
+
        if (sb->s_op->unfreeze_fs) {
                error = sb->s_op->unfreeze_fs(sb);
                if (error) {
                        printk(KERN_ERR
                                "VFS:Filesystem thaw failed\n");
+                       lockdep_sb_freeze_release(sb);
                        up_write(&sb->s_umount);
                        return error;
                }
index 94374e43502599c466153858476bd42652a7ea1b..2b67bda2021b9e2955ae66ba03e536dfdcefec2a 100644 (file)
@@ -21,14 +21,14 @@ DEFINE_SPINLOCK(sysfs_symlink_target_lock);
 
 void sysfs_warn_dup(struct kernfs_node *parent, const char *name)
 {
-       char *buf, *path = NULL;
+       char *buf;
 
        buf = kzalloc(PATH_MAX, GFP_KERNEL);
        if (buf)
-               path = kernfs_path(parent, buf, PATH_MAX);
+               kernfs_path(parent, buf, PATH_MAX);
 
        WARN(1, KERN_WARNING "sysfs: cannot create duplicate filename '%s/%s'\n",
-            path, name);
+            buf, name);
 
        kfree(buf);
 }
index c8f60df2733eba516b189716a6b3bb3ab64520d1..ca16c5d7bab1726af305fa3a1a534ab5bc78cb4e 100644 (file)
@@ -439,7 +439,7 @@ static unsigned int vfs_dent_type(uint8_t type)
  */
 static int ubifs_readdir(struct file *file, struct dir_context *ctx)
 {
-       int err;
+       int err = 0;
        struct qstr nm;
        union ubifs_key key;
        struct ubifs_dent_node *dent;
@@ -541,14 +541,20 @@ out:
        kfree(file->private_data);
        file->private_data = NULL;
 
-       if (err != -ENOENT) {
+       if (err != -ENOENT)
                ubifs_err(c, "cannot find next direntry, error %d", err);
-               return err;
-       }
+       else
+               /*
+                * -ENOENT is a non-fatal error in this context, the TNC uses
+                * it to indicate that the cursor moved past the current directory
+                * and readdir() has to stop.
+                */
+               err = 0;
+
 
        /* 2 is a special value indicating that there are no more direntries */
        ctx->pos = 2;
-       return 0;
+       return err;
 }
 
 /* Free saved readdir() state when the directory is closed */
@@ -1060,9 +1066,9 @@ static void unlock_4_inodes(struct inode *inode1, struct inode *inode2,
        mutex_unlock(&ubifs_inode(inode1)->ui_mutex);
 }
 
-static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry,
-                       struct inode *new_dir, struct dentry *new_dentry,
-                       unsigned int flags)
+static int do_rename(struct inode *old_dir, struct dentry *old_dentry,
+                    struct inode *new_dir, struct dentry *new_dentry,
+                    unsigned int flags)
 {
        struct ubifs_info *c = old_dir->i_sb->s_fs_info;
        struct inode *old_inode = d_inode(old_dentry);
@@ -1323,7 +1329,7 @@ static int ubifs_xrename(struct inode *old_dir, struct dentry *old_dentry,
        return err;
 }
 
-static int ubifs_rename2(struct inode *old_dir, struct dentry *old_dentry,
+static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry,
                        struct inode *new_dir, struct dentry *new_dentry,
                        unsigned int flags)
 {
@@ -1336,7 +1342,7 @@ static int ubifs_rename2(struct inode *old_dir, struct dentry *old_dentry,
        if (flags & RENAME_EXCHANGE)
                return ubifs_xrename(old_dir, old_dentry, new_dir, new_dentry);
 
-       return ubifs_rename(old_dir, old_dentry, new_dir, new_dentry, flags);
+       return do_rename(old_dir, old_dentry, new_dir, new_dentry, flags);
 }
 
 int ubifs_getattr(struct vfsmount *mnt, struct dentry *dentry,
@@ -1387,7 +1393,7 @@ const struct inode_operations ubifs_dir_inode_operations = {
        .mkdir       = ubifs_mkdir,
        .rmdir       = ubifs_rmdir,
        .mknod       = ubifs_mknod,
-       .rename      = ubifs_rename2,
+       .rename      = ubifs_rename,
        .setattr     = ubifs_setattr,
        .getattr     = ubifs_getattr,
        .listxattr   = ubifs_listxattr,
index 6c2f4d41ed737c0bf90482a8674ea1a29948fca4..d9f9615bfd71a24c4235795cef11f7c5b1c4c6b6 100644 (file)
@@ -172,6 +172,7 @@ out_cancel:
        host_ui->xattr_cnt -= 1;
        host_ui->xattr_size -= CALC_DENT_SIZE(nm->len);
        host_ui->xattr_size -= CALC_XATTR_BYTES(size);
+       host_ui->xattr_names -= nm->len;
        mutex_unlock(&host_ui->ui_mutex);
 out_free:
        make_bad_inode(inode);
@@ -478,6 +479,7 @@ out_cancel:
        host_ui->xattr_cnt += 1;
        host_ui->xattr_size += CALC_DENT_SIZE(nm->len);
        host_ui->xattr_size += CALC_XATTR_BYTES(ui->data_len);
+       host_ui->xattr_names += nm->len;
        mutex_unlock(&host_ui->ui_mutex);
        ubifs_release_budget(c, &req);
        make_bad_inode(inode);
index 3368659c471e4f7dc8cd425738bccb7d5fec78a6..2d13b4e62faec1cd0e3ff8d5966e767821867a2c 100644 (file)
@@ -170,7 +170,7 @@ int __vfs_setxattr_noperm(struct dentry *dentry, const char *name,
                const void *value, size_t size, int flags)
 {
        struct inode *inode = dentry->d_inode;
-       int error = -EOPNOTSUPP;
+       int error = -EAGAIN;
        int issec = !strncmp(name, XATTR_SECURITY_PREFIX,
                                   XATTR_SECURITY_PREFIX_LEN);
 
@@ -183,15 +183,21 @@ int __vfs_setxattr_noperm(struct dentry *dentry, const char *name,
                        security_inode_post_setxattr(dentry, name, value,
                                                     size, flags);
                }
-       } else if (issec) {
-               const char *suffix = name + XATTR_SECURITY_PREFIX_LEN;
-
+       } else {
                if (unlikely(is_bad_inode(inode)))
                        return -EIO;
-               error = security_inode_setsecurity(inode, suffix, value,
-                                                  size, flags);
-               if (!error)
-                       fsnotify_xattr(dentry);
+       }
+       if (error == -EAGAIN) {
+               error = -EOPNOTSUPP;
+
+               if (issec) {
+                       const char *suffix = name + XATTR_SECURITY_PREFIX_LEN;
+
+                       error = security_inode_setsecurity(inode, suffix, value,
+                                                          size, flags);
+                       if (!error)
+                               fsnotify_xattr(dentry);
+               }
        }
 
        return error;
index c27344cf38e177187f048eb799297f281eea5a2f..c6eb21940783e4de3c555520eddbba48d0b19be4 100644 (file)
@@ -3974,9 +3974,6 @@ xfs_bmap_remap_alloc(
         * allocating, so skip that check by pretending to be freeing.
         */
        error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING);
-       if (error)
-               goto error0;
-error0:
        xfs_perag_put(args.pag);
        if (error)
                trace_xfs_bmap_remap_alloc_error(ap->ip, error, _RET_IP_);
@@ -3999,6 +3996,39 @@ xfs_bmap_alloc(
        return xfs_bmap_btalloc(ap);
 }
 
+/* Trim extent to fit a logical block range. */
+void
+xfs_trim_extent(
+       struct xfs_bmbt_irec    *irec,
+       xfs_fileoff_t           bno,
+       xfs_filblks_t           len)
+{
+       xfs_fileoff_t           distance;
+       xfs_fileoff_t           end = bno + len;
+
+       if (irec->br_startoff + irec->br_blockcount <= bno ||
+           irec->br_startoff >= end) {
+               irec->br_blockcount = 0;
+               return;
+       }
+
+       if (irec->br_startoff < bno) {
+               distance = bno - irec->br_startoff;
+               if (isnullstartblock(irec->br_startblock))
+                       irec->br_startblock = DELAYSTARTBLOCK;
+               if (irec->br_startblock != DELAYSTARTBLOCK &&
+                   irec->br_startblock != HOLESTARTBLOCK)
+                       irec->br_startblock += distance;
+               irec->br_startoff += distance;
+               irec->br_blockcount -= distance;
+       }
+
+       if (end < irec->br_startoff + irec->br_blockcount) {
+               distance = irec->br_startoff + irec->br_blockcount - end;
+               irec->br_blockcount -= distance;
+       }
+}
+
 /*
  * Trim the returned map to the required bounds
  */
@@ -4829,6 +4859,219 @@ xfs_bmap_split_indlen(
        return stolen;
 }
 
+int
+xfs_bmap_del_extent_delay(
+       struct xfs_inode        *ip,
+       int                     whichfork,
+       xfs_extnum_t            *idx,
+       struct xfs_bmbt_irec    *got,
+       struct xfs_bmbt_irec    *del)
+{
+       struct xfs_mount        *mp = ip->i_mount;
+       struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
+       struct xfs_bmbt_irec    new;
+       int64_t                 da_old, da_new, da_diff = 0;
+       xfs_fileoff_t           del_endoff, got_endoff;
+       xfs_filblks_t           got_indlen, new_indlen, stolen;
+       int                     error = 0, state = 0;
+       bool                    isrt;
+
+       XFS_STATS_INC(mp, xs_del_exlist);
+
+       isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
+       del_endoff = del->br_startoff + del->br_blockcount;
+       got_endoff = got->br_startoff + got->br_blockcount;
+       da_old = startblockval(got->br_startblock);
+       da_new = 0;
+
+       ASSERT(*idx >= 0);
+       ASSERT(*idx < ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
+       ASSERT(del->br_blockcount > 0);
+       ASSERT(got->br_startoff <= del->br_startoff);
+       ASSERT(got_endoff >= del_endoff);
+
+       if (isrt) {
+               int64_t rtexts = XFS_FSB_TO_B(mp, del->br_blockcount);
+
+               do_div(rtexts, mp->m_sb.sb_rextsize);
+               xfs_mod_frextents(mp, rtexts);
+       }
+
+       /*
+        * Update the inode delalloc counter now and wait to update the
+        * sb counters as we might have to borrow some blocks for the
+        * indirect block accounting.
+        */
+       xfs_trans_reserve_quota_nblks(NULL, ip, -((long)del->br_blockcount), 0,
+                       isrt ? XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS);
+       ip->i_delayed_blks -= del->br_blockcount;
+
+       if (whichfork == XFS_COW_FORK)
+               state |= BMAP_COWFORK;
+
+       if (got->br_startoff == del->br_startoff)
+               state |= BMAP_LEFT_CONTIG;
+       if (got_endoff == del_endoff)
+               state |= BMAP_RIGHT_CONTIG;
+
+       switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
+       case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
+               /*
+                * Matches the whole extent.  Delete the entry.
+                */
+               xfs_iext_remove(ip, *idx, 1, state);
+               --*idx;
+               break;
+       case BMAP_LEFT_CONTIG:
+               /*
+                * Deleting the first part of the extent.
+                */
+               trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
+               got->br_startoff = del_endoff;
+               got->br_blockcount -= del->br_blockcount;
+               da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip,
+                               got->br_blockcount), da_old);
+               got->br_startblock = nullstartblock((int)da_new);
+               xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got);
+               trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+               break;
+       case BMAP_RIGHT_CONTIG:
+               /*
+                * Deleting the last part of the extent.
+                */
+               trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
+               got->br_blockcount = got->br_blockcount - del->br_blockcount;
+               da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip,
+                               got->br_blockcount), da_old);
+               got->br_startblock = nullstartblock((int)da_new);
+               xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got);
+               trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+               break;
+       case 0:
+               /*
+                * Deleting the middle of the extent.
+                *
+                * Distribute the original indlen reservation across the two new
+                * extents.  Steal blocks from the deleted extent if necessary.
+                * Stealing blocks simply fudges the fdblocks accounting below.
+                * Warn if either of the new indlen reservations is zero as this
+                * can lead to delalloc problems.
+                */
+               trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
+
+               got->br_blockcount = del->br_startoff - got->br_startoff;
+               got_indlen = xfs_bmap_worst_indlen(ip, got->br_blockcount);
+
+               new.br_blockcount = got_endoff - del_endoff;
+               new_indlen = xfs_bmap_worst_indlen(ip, new.br_blockcount);
+
+               WARN_ON_ONCE(!got_indlen || !new_indlen);
+               stolen = xfs_bmap_split_indlen(da_old, &got_indlen, &new_indlen,
+                                                      del->br_blockcount);
+
+               got->br_startblock = nullstartblock((int)got_indlen);
+               xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got);
+               trace_xfs_bmap_post_update(ip, *idx, 0, _THIS_IP_);
+
+               new.br_startoff = del_endoff;
+               new.br_state = got->br_state;
+               new.br_startblock = nullstartblock((int)new_indlen);
+
+               ++*idx;
+               xfs_iext_insert(ip, *idx, 1, &new, state);
+
+               da_new = got_indlen + new_indlen - stolen;
+               del->br_blockcount -= stolen;
+               break;
+       }
+
+       ASSERT(da_old >= da_new);
+       da_diff = da_old - da_new;
+       if (!isrt)
+               da_diff += del->br_blockcount;
+       if (da_diff)
+               xfs_mod_fdblocks(mp, da_diff, false);
+       return error;
+}
+
+void
+xfs_bmap_del_extent_cow(
+       struct xfs_inode        *ip,
+       xfs_extnum_t            *idx,
+       struct xfs_bmbt_irec    *got,
+       struct xfs_bmbt_irec    *del)
+{
+       struct xfs_mount        *mp = ip->i_mount;
+       struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
+       struct xfs_bmbt_irec    new;
+       xfs_fileoff_t           del_endoff, got_endoff;
+       int                     state = BMAP_COWFORK;
+
+       XFS_STATS_INC(mp, xs_del_exlist);
+
+       del_endoff = del->br_startoff + del->br_blockcount;
+       got_endoff = got->br_startoff + got->br_blockcount;
+
+       ASSERT(*idx >= 0);
+       ASSERT(*idx < ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
+       ASSERT(del->br_blockcount > 0);
+       ASSERT(got->br_startoff <= del->br_startoff);
+       ASSERT(got_endoff >= del_endoff);
+       ASSERT(!isnullstartblock(got->br_startblock));
+
+       if (got->br_startoff == del->br_startoff)
+               state |= BMAP_LEFT_CONTIG;
+       if (got_endoff == del_endoff)
+               state |= BMAP_RIGHT_CONTIG;
+
+       switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
+       case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
+               /*
+                * Matches the whole extent.  Delete the entry.
+                */
+               xfs_iext_remove(ip, *idx, 1, state);
+               --*idx;
+               break;
+       case BMAP_LEFT_CONTIG:
+               /*
+                * Deleting the first part of the extent.
+                */
+               trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
+               got->br_startoff = del_endoff;
+               got->br_blockcount -= del->br_blockcount;
+               got->br_startblock = del->br_startblock + del->br_blockcount;
+               xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got);
+               trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+               break;
+       case BMAP_RIGHT_CONTIG:
+               /*
+                * Deleting the last part of the extent.
+                */
+               trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
+               got->br_blockcount -= del->br_blockcount;
+               xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got);
+               trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+               break;
+       case 0:
+               /*
+                * Deleting the middle of the extent.
+                */
+               trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
+               got->br_blockcount = del->br_startoff - got->br_startoff;
+               xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got);
+               trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+
+               new.br_startoff = del_endoff;
+               new.br_blockcount = got_endoff - del_endoff;
+               new.br_state = got->br_state;
+               new.br_startblock = del->br_startblock + del->br_blockcount;
+
+               ++*idx;
+               xfs_iext_insert(ip, *idx, 1, &new, state);
+               break;
+       }
+}
+
 /*
  * Called by xfs_bmapi to update file extent records and the btree
  * after removing space (or undoing a delayed allocation).
@@ -5171,175 +5414,6 @@ done:
        return error;
 }
 
-/* Remove an extent from the CoW fork.  Similar to xfs_bmap_del_extent. */
-int
-xfs_bunmapi_cow(
-       struct xfs_inode                *ip,
-       struct xfs_bmbt_irec            *del)
-{
-       xfs_filblks_t                   da_new;
-       xfs_filblks_t                   da_old;
-       xfs_fsblock_t                   del_endblock = 0;
-       xfs_fileoff_t                   del_endoff;
-       int                             delay;
-       struct xfs_bmbt_rec_host        *ep;
-       int                             error;
-       struct xfs_bmbt_irec            got;
-       xfs_fileoff_t                   got_endoff;
-       struct xfs_ifork                *ifp;
-       struct xfs_mount                *mp;
-       xfs_filblks_t                   nblks;
-       struct xfs_bmbt_irec            new;
-       /* REFERENCED */
-       uint                            qfield;
-       xfs_filblks_t                   temp;
-       xfs_filblks_t                   temp2;
-       int                             state = BMAP_COWFORK;
-       int                             eof;
-       xfs_extnum_t                    eidx;
-
-       mp = ip->i_mount;
-       XFS_STATS_INC(mp, xs_del_exlist);
-
-       ep = xfs_bmap_search_extents(ip, del->br_startoff, XFS_COW_FORK, &eof,
-                       &eidx, &got, &new);
-
-       ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); ifp = ifp;
-       ASSERT((eidx >= 0) && (eidx < ifp->if_bytes /
-               (uint)sizeof(xfs_bmbt_rec_t)));
-       ASSERT(del->br_blockcount > 0);
-       ASSERT(got.br_startoff <= del->br_startoff);
-       del_endoff = del->br_startoff + del->br_blockcount;
-       got_endoff = got.br_startoff + got.br_blockcount;
-       ASSERT(got_endoff >= del_endoff);
-       delay = isnullstartblock(got.br_startblock);
-       ASSERT(isnullstartblock(del->br_startblock) == delay);
-       qfield = 0;
-       error = 0;
-       /*
-        * If deleting a real allocation, must free up the disk space.
-        */
-       if (!delay) {
-               nblks = del->br_blockcount;
-               qfield = XFS_TRANS_DQ_BCOUNT;
-               /*
-                * Set up del_endblock and cur for later.
-                */
-               del_endblock = del->br_startblock + del->br_blockcount;
-               da_old = da_new = 0;
-       } else {
-               da_old = startblockval(got.br_startblock);
-               da_new = 0;
-               nblks = 0;
-       }
-       qfield = qfield;
-       nblks = nblks;
-
-       /*
-        * Set flag value to use in switch statement.
-        * Left-contig is 2, right-contig is 1.
-        */
-       switch (((got.br_startoff == del->br_startoff) << 1) |
-               (got_endoff == del_endoff)) {
-       case 3:
-               /*
-                * Matches the whole extent.  Delete the entry.
-                */
-               xfs_iext_remove(ip, eidx, 1, BMAP_COWFORK);
-               --eidx;
-               break;
-
-       case 2:
-               /*
-                * Deleting the first part of the extent.
-                */
-               trace_xfs_bmap_pre_update(ip, eidx, state, _THIS_IP_);
-               xfs_bmbt_set_startoff(ep, del_endoff);
-               temp = got.br_blockcount - del->br_blockcount;
-               xfs_bmbt_set_blockcount(ep, temp);
-               if (delay) {
-                       temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
-                               da_old);
-                       xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
-                       trace_xfs_bmap_post_update(ip, eidx, state, _THIS_IP_);
-                       da_new = temp;
-                       break;
-               }
-               xfs_bmbt_set_startblock(ep, del_endblock);
-               trace_xfs_bmap_post_update(ip, eidx, state, _THIS_IP_);
-               break;
-
-       case 1:
-               /*
-                * Deleting the last part of the extent.
-                */
-               temp = got.br_blockcount - del->br_blockcount;
-               trace_xfs_bmap_pre_update(ip, eidx, state, _THIS_IP_);
-               xfs_bmbt_set_blockcount(ep, temp);
-               if (delay) {
-                       temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
-                               da_old);
-                       xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
-                       trace_xfs_bmap_post_update(ip, eidx, state, _THIS_IP_);
-                       da_new = temp;
-                       break;
-               }
-               trace_xfs_bmap_post_update(ip, eidx, state, _THIS_IP_);
-               break;
-
-       case 0:
-               /*
-                * Deleting the middle of the extent.
-                */
-               temp = del->br_startoff - got.br_startoff;
-               trace_xfs_bmap_pre_update(ip, eidx, state, _THIS_IP_);
-               xfs_bmbt_set_blockcount(ep, temp);
-               new.br_startoff = del_endoff;
-               temp2 = got_endoff - del_endoff;
-               new.br_blockcount = temp2;
-               new.br_state = got.br_state;
-               if (!delay) {
-                       new.br_startblock = del_endblock;
-               } else {
-                       temp = xfs_bmap_worst_indlen(ip, temp);
-                       xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
-                       temp2 = xfs_bmap_worst_indlen(ip, temp2);
-                       new.br_startblock = nullstartblock((int)temp2);
-                       da_new = temp + temp2;
-                       while (da_new > da_old) {
-                               if (temp) {
-                                       temp--;
-                                       da_new--;
-                                       xfs_bmbt_set_startblock(ep,
-                                               nullstartblock((int)temp));
-                               }
-                               if (da_new == da_old)
-                                       break;
-                               if (temp2) {
-                                       temp2--;
-                                       da_new--;
-                                       new.br_startblock =
-                                               nullstartblock((int)temp2);
-                               }
-                       }
-               }
-               trace_xfs_bmap_post_update(ip, eidx, state, _THIS_IP_);
-               xfs_iext_insert(ip, eidx + 1, 1, &new, state);
-               ++eidx;
-               break;
-       }
-
-       /*
-        * Account for change in delayed indirect blocks.
-        * Nothing to do for disk quota accounting here.
-        */
-       ASSERT(da_old >= da_new);
-       if (da_old > da_new)
-               xfs_mod_fdblocks(mp, (int64_t)(da_old - da_new), false);
-
-       return error;
-}
-
 /*
  * Unmap (remove) blocks from a file.
  * If nexts is nonzero then the number of extents to remove is limited to
index f97db7132564569dc7f2aefbf05f27da719c8e10..7cae6ec27fa6b26a84984fddb3dc35d6556e2122 100644 (file)
@@ -190,6 +190,8 @@ void        xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt,
 #define        XFS_BMAP_TRACE_EXLIST(ip,c,w)
 #endif
 
+void   xfs_trim_extent(struct xfs_bmbt_irec *irec, xfs_fileoff_t bno,
+               xfs_filblks_t len);
 int    xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd);
 void   xfs_bmap_local_to_extents_empty(struct xfs_inode *ip, int whichfork);
 void   xfs_bmap_add_free(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
@@ -221,7 +223,11 @@ int        xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip,
                xfs_fileoff_t bno, xfs_filblks_t len, int flags,
                xfs_extnum_t nexts, xfs_fsblock_t *firstblock,
                struct xfs_defer_ops *dfops, int *done);
-int    xfs_bunmapi_cow(struct xfs_inode *ip, struct xfs_bmbt_irec *del);
+int    xfs_bmap_del_extent_delay(struct xfs_inode *ip, int whichfork,
+               xfs_extnum_t *idx, struct xfs_bmbt_irec *got,
+               struct xfs_bmbt_irec *del);
+void   xfs_bmap_del_extent_cow(struct xfs_inode *ip, xfs_extnum_t *idx,
+               struct xfs_bmbt_irec *got, struct xfs_bmbt_irec *del);
 int    xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx,
                xfs_extnum_t num);
 uint   xfs_default_attroffset(struct xfs_inode *ip);
index 5c8e6f2ce44f461d343a98b6b49ad6b0b09a3b8b..0e80993c8a5914d3dfa1f861b2b459d7a513d67a 100644 (file)
@@ -4826,7 +4826,7 @@ xfs_btree_calc_size(
        return rval;
 }
 
-int
+static int
 xfs_btree_count_blocks_helper(
        struct xfs_btree_cur    *cur,
        int                     level,
index 613c5cf1943646764880ba3beeffb98667dc5268..5c2929f94bd3bf27411f860b57697b9d899312d1 100644 (file)
@@ -199,9 +199,9 @@ xfs_defer_intake_work(
        struct xfs_defer_pending        *dfp;
 
        list_for_each_entry(dfp, &dop->dop_intake, dfp_list) {
-               trace_xfs_defer_intake_work(tp->t_mountp, dfp);
                dfp->dfp_intent = dfp->dfp_type->create_intent(tp,
                                dfp->dfp_count);
+               trace_xfs_defer_intake_work(tp->t_mountp, dfp);
                list_sort(tp->t_mountp, &dfp->dfp_work,
                                dfp->dfp_type->diff_items);
                list_for_each(li, &dfp->dfp_work)
@@ -221,21 +221,14 @@ xfs_defer_trans_abort(
        struct xfs_defer_pending        *dfp;
 
        trace_xfs_defer_trans_abort(tp->t_mountp, dop);
-       /*
-        * If the transaction was committed, drop the intent reference
-        * since we're bailing out of here. The other reference is
-        * dropped when the intent hits the AIL.  If the transaction
-        * was not committed, the intent is freed by the intent item
-        * unlock handler on abort.
-        */
-       if (!dop->dop_committed)
-               return;
 
-       /* Abort intent items. */
+       /* Abort intent items that don't have a done item. */
        list_for_each_entry(dfp, &dop->dop_pending, dfp_list) {
                trace_xfs_defer_pending_abort(tp->t_mountp, dfp);
-               if (!dfp->dfp_done)
+               if (dfp->dfp_intent && !dfp->dfp_done) {
                        dfp->dfp_type->abort_intent(dfp->dfp_intent);
+                       dfp->dfp_intent = NULL;
+               }
        }
 
        /* Shut down FS. */
index 3cc3cf7674746f279fcb0acf4eae27d49d089814..ac9a003dd29acf80d7c766788cb40d1f98d9132c 100644 (file)
@@ -191,8 +191,7 @@ xfs_dquot_buf_verify_crc(
        if (mp->m_quotainfo)
                ndquots = mp->m_quotainfo->qi_dqperchunk;
        else
-               ndquots = xfs_calc_dquots_per_chunk(
-                                       XFS_BB_TO_FSB(mp, bp->b_length));
+               ndquots = xfs_calc_dquots_per_chunk(bp->b_length);
 
        for (i = 0; i < ndquots; i++, d++) {
                if (!xfs_verify_cksum((char *)d, sizeof(struct xfs_dqblk),
index f6547fc5e016e75a130a738c3e4a1b3a087d7281..6b7579e7b60a228ee6e633221bc64952f4a93a41 100644 (file)
@@ -865,7 +865,6 @@ typedef struct xfs_timestamp {
  * padding field for v3 inodes.
  */
 #define        XFS_DINODE_MAGIC                0x494e  /* 'IN' */
-#define XFS_DINODE_GOOD_VERSION(v)     ((v) >= 1 && (v) <= 3)
 typedef struct xfs_dinode {
        __be16          di_magic;       /* inode magic # = XFS_DINODE_MAGIC */
        __be16          di_mode;        /* mode and type of file */
index 8de9a3a29589bd59c0684c8eab278db3edceba53..134424fac434fdd7fdd3cecf12d3007712b9734b 100644 (file)
@@ -57,6 +57,17 @@ xfs_inobp_check(
 }
 #endif
 
+bool
+xfs_dinode_good_version(
+       struct xfs_mount *mp,
+       __u8            version)
+{
+       if (xfs_sb_version_hascrc(&mp->m_sb))
+               return version == 3;
+
+       return version == 1 || version == 2;
+}
+
 /*
  * If we are doing readahead on an inode buffer, we might be in log recovery
  * reading an inode allocation buffer that hasn't yet been replayed, and hence
@@ -91,7 +102,7 @@ xfs_inode_buf_verify(
 
                dip = xfs_buf_offset(bp, (i << mp->m_sb.sb_inodelog));
                di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) &&
-                           XFS_DINODE_GOOD_VERSION(dip->di_version);
+                       xfs_dinode_good_version(mp, dip->di_version);
                if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
                                                XFS_ERRTAG_ITOBP_INOTOBP,
                                                XFS_RANDOM_ITOBP_INOTOBP))) {
index 62d9d4681c8c28a1294b575903f0720693bc6666..3cfe12a4f58ac8560e1cd92e529a85477ff5ee02 100644 (file)
@@ -74,6 +74,8 @@ void  xfs_inode_from_disk(struct xfs_inode *ip, struct xfs_dinode *from);
 void   xfs_log_dinode_to_disk(struct xfs_log_dinode *from,
                               struct xfs_dinode *to);
 
+bool   xfs_dinode_good_version(struct xfs_mount *mp, __u8 version);
+
 #if defined(DEBUG)
 void   xfs_inobp_check(struct xfs_mount *, struct xfs_buf *);
 #else
index a314fc7b56fa5b0fcfb0e234a9a931eda6de7e73..6e4f7f900fea4c30f44477dc258db5334b980486 100644 (file)
@@ -249,6 +249,7 @@ xfs_file_dio_aio_read(
        struct xfs_inode        *ip = XFS_I(inode);
        loff_t                  isize = i_size_read(inode);
        size_t                  count = iov_iter_count(to);
+       loff_t                  end = iocb->ki_pos + count - 1;
        struct iov_iter         data;
        struct xfs_buftarg      *target;
        ssize_t                 ret = 0;
@@ -272,49 +273,21 @@ xfs_file_dio_aio_read(
 
        file_accessed(iocb->ki_filp);
 
-       /*
-        * Locking is a bit tricky here. If we take an exclusive lock for direct
-        * IO, we effectively serialise all new concurrent read IO to this file
-        * and block it behind IO that is currently in progress because IO in
-        * progress holds the IO lock shared. We only need to hold the lock
-        * exclusive to blow away the page cache, so only take lock exclusively
-        * if the page cache needs invalidation. This allows the normal direct
-        * IO case of no page cache pages to proceeed concurrently without
-        * serialisation.
-        */
        xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
        if (mapping->nrpages) {
-               xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
-               xfs_rw_ilock(ip, XFS_IOLOCK_EXCL);
+               ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, end);
+               if (ret)
+                       goto out_unlock;
 
                /*
-                * The generic dio code only flushes the range of the particular
-                * I/O. Because we take an exclusive lock here, this whole
-                * sequence is considerably more expensive for us. This has a
-                * noticeable performance impact for any file with cached pages,
-                * even when outside of the range of the particular I/O.
-                *
-                * Hence, amortize the cost of the lock against a full file
-                * flush and reduce the chances of repeated iolock cycles going
-                * forward.
+                * Invalidate whole pages. This can return an error if we fail
+                * to invalidate a page, but this should never happen on XFS.
+                * Warn if it does fail.
                 */
-               if (mapping->nrpages) {
-                       ret = filemap_write_and_wait(mapping);
-                       if (ret) {
-                               xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
-                               return ret;
-                       }
-
-                       /*
-                        * Invalidate whole pages. This can return an error if
-                        * we fail to invalidate a page, but this should never
-                        * happen on XFS. Warn if it does fail.
-                        */
-                       ret = invalidate_inode_pages2(mapping);
-                       WARN_ON_ONCE(ret);
-                       ret = 0;
-               }
-               xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
+               ret = invalidate_inode_pages2_range(mapping,
+                               iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT);
+               WARN_ON_ONCE(ret);
+               ret = 0;
        }
 
        data = *to;
@@ -324,8 +297,9 @@ xfs_file_dio_aio_read(
                iocb->ki_pos += ret;
                iov_iter_advance(to, ret);
        }
-       xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
 
+out_unlock:
+       xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
        return ret;
 }
 
@@ -570,61 +544,49 @@ xfs_file_dio_aio_write(
        if ((iocb->ki_pos | count) & target->bt_logical_sectormask)
                return -EINVAL;
 
-       /* "unaligned" here means not aligned to a filesystem block */
-       if ((iocb->ki_pos & mp->m_blockmask) ||
-           ((iocb->ki_pos + count) & mp->m_blockmask))
-               unaligned_io = 1;
-
        /*
-        * We don't need to take an exclusive lock unless there page cache needs
-        * to be invalidated or unaligned IO is being executed. We don't need to
-        * consider the EOF extension case here because
-        * xfs_file_aio_write_checks() will relock the inode as necessary for
-        * EOF zeroing cases and fill out the new inode size as appropriate.
+        * Don't take the exclusive iolock here unless the I/O is unaligned to
+        * the file system block size.  We don't need to consider the EOF
+        * extension case here because xfs_file_aio_write_checks() will relock
+        * the inode as necessary for EOF zeroing cases and fill out the new
+        * inode size as appropriate.
         */
-       if (unaligned_io || mapping->nrpages)
+       if ((iocb->ki_pos & mp->m_blockmask) ||
+           ((iocb->ki_pos + count) & mp->m_blockmask)) {
+               unaligned_io = 1;
                iolock = XFS_IOLOCK_EXCL;
-       else
+       } else {
                iolock = XFS_IOLOCK_SHARED;
-       xfs_rw_ilock(ip, iolock);
-
-       /*
-        * Recheck if there are cached pages that need invalidate after we got
-        * the iolock to protect against other threads adding new pages while
-        * we were waiting for the iolock.
-        */
-       if (mapping->nrpages && iolock == XFS_IOLOCK_SHARED) {
-               xfs_rw_iunlock(ip, iolock);
-               iolock = XFS_IOLOCK_EXCL;
-               xfs_rw_ilock(ip, iolock);
        }
 
+       xfs_rw_ilock(ip, iolock);
+
        ret = xfs_file_aio_write_checks(iocb, from, &iolock);
        if (ret)
                goto out;
        count = iov_iter_count(from);
        end = iocb->ki_pos + count - 1;
 
-       /*
-        * See xfs_file_dio_aio_read() for why we do a full-file flush here.
-        */
        if (mapping->nrpages) {
-               ret = filemap_write_and_wait(VFS_I(ip)->i_mapping);
+               ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, end);
                if (ret)
                        goto out;
+
                /*
                 * Invalidate whole pages. This can return an error if we fail
                 * to invalidate a page, but this should never happen on XFS.
                 * Warn if it does fail.
                 */
-               ret = invalidate_inode_pages2(VFS_I(ip)->i_mapping);
+               ret = invalidate_inode_pages2_range(mapping,
+                               iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT);
                WARN_ON_ONCE(ret);
                ret = 0;
        }
 
        /*
         * If we are doing unaligned IO, wait for all other IO to drain,
-        * otherwise demote the lock if we had to flush cached pages
+        * otherwise demote the lock if we had to take the exclusive lock
+        * for other reasons in xfs_file_aio_write_checks.
         */
        if (unaligned_io)
                inode_dio_wait(inode);
@@ -947,134 +909,6 @@ out_unlock:
        return error;
 }
 
-/*
- * Flush all file writes out to disk.
- */
-static int
-xfs_file_wait_for_io(
-       struct inode    *inode,
-       loff_t          offset,
-       size_t          len)
-{
-       loff_t          rounding;
-       loff_t          ioffset;
-       loff_t          iendoffset;
-       loff_t          bs;
-       int             ret;
-
-       bs = inode->i_sb->s_blocksize;
-       inode_dio_wait(inode);
-
-       rounding = max_t(xfs_off_t, bs, PAGE_SIZE);
-       ioffset = round_down(offset, rounding);
-       iendoffset = round_up(offset + len, rounding) - 1;
-       ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
-                                          iendoffset);
-       return ret;
-}
-
-/* Hook up to the VFS reflink function */
-STATIC int
-xfs_file_share_range(
-       struct file     *file_in,
-       loff_t          pos_in,
-       struct file     *file_out,
-       loff_t          pos_out,
-       u64             len,
-       bool            is_dedupe)
-{
-       struct inode    *inode_in;
-       struct inode    *inode_out;
-       ssize_t         ret;
-       loff_t          bs;
-       loff_t          isize;
-       int             same_inode;
-       loff_t          blen;
-       unsigned int    flags = 0;
-
-       inode_in = file_inode(file_in);
-       inode_out = file_inode(file_out);
-       bs = inode_out->i_sb->s_blocksize;
-
-       /* Don't touch certain kinds of inodes */
-       if (IS_IMMUTABLE(inode_out))
-               return -EPERM;
-       if (IS_SWAPFILE(inode_in) ||
-           IS_SWAPFILE(inode_out))
-               return -ETXTBSY;
-
-       /* Reflink only works within this filesystem. */
-       if (inode_in->i_sb != inode_out->i_sb)
-               return -EXDEV;
-       same_inode = (inode_in->i_ino == inode_out->i_ino);
-
-       /* Don't reflink dirs, pipes, sockets... */
-       if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
-               return -EISDIR;
-       if (S_ISFIFO(inode_in->i_mode) || S_ISFIFO(inode_out->i_mode))
-               return -EINVAL;
-       if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
-               return -EINVAL;
-
-       /* Don't share DAX file data for now. */
-       if (IS_DAX(inode_in) || IS_DAX(inode_out))
-               return -EINVAL;
-
-       /* Are we going all the way to the end? */
-       isize = i_size_read(inode_in);
-       if (isize == 0)
-               return 0;
-       if (len == 0)
-               len = isize - pos_in;
-
-       /* Ensure offsets don't wrap and the input is inside i_size */
-       if (pos_in + len < pos_in || pos_out + len < pos_out ||
-           pos_in + len > isize)
-               return -EINVAL;
-
-       /* Don't allow dedupe past EOF in the dest file */
-       if (is_dedupe) {
-               loff_t  disize;
-
-               disize = i_size_read(inode_out);
-               if (pos_out >= disize || pos_out + len > disize)
-                       return -EINVAL;
-       }
-
-       /* If we're linking to EOF, continue to the block boundary. */
-       if (pos_in + len == isize)
-               blen = ALIGN(isize, bs) - pos_in;
-       else
-               blen = len;
-
-       /* Only reflink if we're aligned to block boundaries */
-       if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_in + blen, bs) ||
-           !IS_ALIGNED(pos_out, bs) || !IS_ALIGNED(pos_out + blen, bs))
-               return -EINVAL;
-
-       /* Don't allow overlapped reflink within the same file */
-       if (same_inode && pos_out + blen > pos_in && pos_out < pos_in + blen)
-               return -EINVAL;
-
-       /* Wait for the completion of any pending IOs on srcfile */
-       ret = xfs_file_wait_for_io(inode_in, pos_in, len);
-       if (ret)
-               goto out;
-       ret = xfs_file_wait_for_io(inode_out, pos_out, len);
-       if (ret)
-               goto out;
-
-       if (is_dedupe)
-               flags |= XFS_REFLINK_DEDUPE;
-       ret = xfs_reflink_remap_range(XFS_I(inode_in), pos_in, XFS_I(inode_out),
-                       pos_out, len, flags);
-       if (ret < 0)
-               goto out;
-
-out:
-       return ret;
-}
-
 STATIC ssize_t
 xfs_file_copy_range(
        struct file     *file_in,
@@ -1086,7 +920,7 @@ xfs_file_copy_range(
 {
        int             error;
 
-       error = xfs_file_share_range(file_in, pos_in, file_out, pos_out,
+       error = xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out,
                                     len, false);
        if (error)
                return error;
@@ -1101,7 +935,7 @@ xfs_file_clone_range(
        loff_t          pos_out,
        u64             len)
 {
-       return xfs_file_share_range(file_in, pos_in, file_out, pos_out,
+       return xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out,
                                     len, false);
 }
 
@@ -1124,7 +958,7 @@ xfs_file_dedupe_range(
        if (len > XFS_MAX_DEDUPE_LEN)
                len = XFS_MAX_DEDUPE_LEN;
 
-       error = xfs_file_share_range(src_file, loff, dst_file, dst_loff,
+       error = xfs_reflink_remap_range(src_file, loff, dst_file, dst_loff,
                                     len, true);
        if (error)
                return error;
index 14796b744e0a1ebb9f8d428131d2522316262e82..f295049db68159523ac936727c748e7264f80993 100644 (file)
@@ -1656,9 +1656,9 @@ void
 xfs_inode_set_cowblocks_tag(
        xfs_inode_t     *ip)
 {
-       trace_xfs_inode_set_eofblocks_tag(ip);
+       trace_xfs_inode_set_cowblocks_tag(ip);
        return __xfs_inode_set_eofblocks_tag(ip, xfs_queue_cowblocks,
-                       trace_xfs_perag_set_eofblocks,
+                       trace_xfs_perag_set_cowblocks,
                        XFS_ICI_COWBLOCKS_TAG);
 }
 
@@ -1666,7 +1666,7 @@ void
 xfs_inode_clear_cowblocks_tag(
        xfs_inode_t     *ip)
 {
-       trace_xfs_inode_clear_eofblocks_tag(ip);
+       trace_xfs_inode_clear_cowblocks_tag(ip);
        return __xfs_inode_clear_eofblocks_tag(ip,
-                       trace_xfs_perag_clear_eofblocks, XFS_ICI_COWBLOCKS_TAG);
+                       trace_xfs_perag_clear_cowblocks, XFS_ICI_COWBLOCKS_TAG);
 }
index d907eb9f8ef32a079f845c4aa4731bb3413fcb25..436e109bb01e59d32bda87e9dd43ab3229cba509 100644 (file)
@@ -566,6 +566,17 @@ xfs_file_iomap_begin_delay(
        xfs_bmap_search_extents(ip, offset_fsb, XFS_DATA_FORK, &eof, &idx,
                        &got, &prev);
        if (!eof && got.br_startoff <= offset_fsb) {
+               if (xfs_is_reflink_inode(ip)) {
+                       bool            shared;
+
+                       end_fsb = min(XFS_B_TO_FSB(mp, offset + count),
+                                       maxbytes_fsb);
+                       xfs_trim_extent(&got, offset_fsb, end_fsb - offset_fsb);
+                       error = xfs_reflink_reserve_cow(ip, &got, &shared);
+                       if (error)
+                               goto out_unlock;
+               }
+
                trace_xfs_iomap_found(ip, offset, count, 0, &got);
                goto done;
        }
@@ -961,19 +972,13 @@ xfs_file_iomap_begin(
        struct xfs_mount        *mp = ip->i_mount;
        struct xfs_bmbt_irec    imap;
        xfs_fileoff_t           offset_fsb, end_fsb;
-       bool                    shared, trimmed;
        int                     nimaps = 1, error = 0;
+       bool                    shared = false, trimmed = false;
        unsigned                lockmode;
 
        if (XFS_FORCED_SHUTDOWN(mp))
                return -EIO;
 
-       if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) {
-               error = xfs_reflink_reserve_cow_range(ip, offset, length);
-               if (error < 0)
-                       return error;
-       }
-
        if ((flags & IOMAP_WRITE) && !IS_DAX(inode) &&
                   !xfs_get_extsz_hint(ip)) {
                /* Reserve delalloc blocks for regular writeback. */
@@ -981,7 +986,16 @@ xfs_file_iomap_begin(
                                iomap);
        }
 
-       lockmode = xfs_ilock_data_map_shared(ip);
+       /*
+        * COW writes will allocate delalloc space, so we need to make sure
+        * to take the lock exclusively here.
+        */
+       if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) {
+               lockmode = XFS_ILOCK_EXCL;
+               xfs_ilock(ip, XFS_ILOCK_EXCL);
+       } else {
+               lockmode = xfs_ilock_data_map_shared(ip);
+       }
 
        ASSERT(offset <= mp->m_super->s_maxbytes);
        if ((xfs_fsize_t)offset + length > mp->m_super->s_maxbytes)
@@ -991,16 +1005,24 @@ xfs_file_iomap_begin(
 
        error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
                               &nimaps, 0);
-       if (error) {
-               xfs_iunlock(ip, lockmode);
-               return error;
+       if (error)
+               goto out_unlock;
+
+       if (flags & IOMAP_REPORT) {
+               /* Trim the mapping to the nearest shared extent boundary. */
+               error = xfs_reflink_trim_around_shared(ip, &imap, &shared,
+                               &trimmed);
+               if (error)
+                       goto out_unlock;
        }
 
-       /* Trim the mapping to the nearest shared extent boundary. */
-       error = xfs_reflink_trim_around_shared(ip, &imap, &shared, &trimmed);
-       if (error) {
-               xfs_iunlock(ip, lockmode);
-               return error;
+       if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) {
+               error = xfs_reflink_reserve_cow(ip, &imap, &shared);
+               if (error)
+                       goto out_unlock;
+
+               end_fsb = imap.br_startoff + imap.br_blockcount;
+               length = XFS_FSB_TO_B(mp, end_fsb) - offset;
        }
 
        if ((flags & IOMAP_WRITE) && imap_needs_alloc(inode, &imap, nimaps)) {
@@ -1039,6 +1061,9 @@ xfs_file_iomap_begin(
        if (shared)
                iomap->flags |= IOMAP_F_SHARED;
        return 0;
+out_unlock:
+       xfs_iunlock(ip, lockmode);
+       return error;
 }
 
 static int
index fc7873942bea51866611aee5a7437f3d4a036a67..b341f10cf4810bf3716aec354f33fbdbf9ab5494 100644 (file)
@@ -1009,6 +1009,7 @@ xfs_mountfs(
  out_quota:
        xfs_qm_unmount_quotas(mp);
  out_rtunmount:
+       mp->m_super->s_flags &= ~MS_ACTIVE;
        xfs_rtunmount_inodes(mp);
  out_rele_rip:
        IRELE(rip);
index 5965e9455d91e03621680a08493610085d5a926c..a279b4e7f5feaa83a0cf1fbbfaf1c4d8e393ecbc 100644 (file)
@@ -182,7 +182,8 @@ xfs_reflink_trim_around_shared(
        if (!xfs_is_reflink_inode(ip) ||
            ISUNWRITTEN(irec) ||
            irec->br_startblock == HOLESTARTBLOCK ||
-           irec->br_startblock == DELAYSTARTBLOCK) {
+           irec->br_startblock == DELAYSTARTBLOCK ||
+           isnullstartblock(irec->br_startblock)) {
                *shared = false;
                return 0;
        }
@@ -227,50 +228,54 @@ xfs_reflink_trim_around_shared(
        }
 }
 
-/* Create a CoW reservation for a range of blocks within a file. */
-static int
-__xfs_reflink_reserve_cow(
+/*
+ * Trim the passed in imap to the next shared/unshared extent boundary, and
+ * if imap->br_startoff points to a shared extent reserve space for it in the
+ * COW fork.  In this case *shared is set to true, else to false.
+ *
+ * Note that imap will always contain the block numbers for the existing blocks
+ * in the data fork, as the upper layers need them for read-modify-write
+ * operations.
+ */
+int
+xfs_reflink_reserve_cow(
        struct xfs_inode        *ip,
-       xfs_fileoff_t           *offset_fsb,
-       xfs_fileoff_t           end_fsb,
-       bool                    *skipped)
+       struct xfs_bmbt_irec    *imap,
+       bool                    *shared)
 {
-       struct xfs_bmbt_irec    got, prev, imap;
-       xfs_fileoff_t           orig_end_fsb;
-       int                     nimaps, eof = 0, error = 0;
-       bool                    shared = false, trimmed = false;
+       struct xfs_bmbt_irec    got, prev;
+       xfs_fileoff_t           end_fsb, orig_end_fsb;
+       int                     eof = 0, error = 0;
+       bool                    trimmed;
        xfs_extnum_t            idx;
        xfs_extlen_t            align;
 
-       /* Already reserved?  Skip the refcount btree access. */
-       xfs_bmap_search_extents(ip, *offset_fsb, XFS_COW_FORK, &eof, &idx,
+       /*
+        * Search the COW fork extent list first.  This serves two purposes:
+        * first this implement the speculative preallocation using cowextisze,
+        * so that we also unshared block adjacent to shared blocks instead
+        * of just the shared blocks themselves.  Second the lookup in the
+        * extent list is generally faster than going out to the shared extent
+        * tree.
+        */
+       xfs_bmap_search_extents(ip, imap->br_startoff, XFS_COW_FORK, &eof, &idx,
                        &got, &prev);
-       if (!eof && got.br_startoff <= *offset_fsb) {
-               end_fsb = orig_end_fsb = got.br_startoff + got.br_blockcount;
-               trace_xfs_reflink_cow_found(ip, &got);
-               goto done;
-       }
+       if (!eof && got.br_startoff <= imap->br_startoff) {
+               trace_xfs_reflink_cow_found(ip, imap);
+               xfs_trim_extent(imap, got.br_startoff, got.br_blockcount);
 
-       /* Read extent from the source file. */
-       nimaps = 1;
-       error = xfs_bmapi_read(ip, *offset_fsb, end_fsb - *offset_fsb,
-                       &imap, &nimaps, 0);
-       if (error)
-               goto out_unlock;
-       ASSERT(nimaps == 1);
+               *shared = true;
+               return 0;
+       }
 
        /* Trim the mapping to the nearest shared extent boundary. */
-       error = xfs_reflink_trim_around_shared(ip, &imap, &shared, &trimmed);
+       error = xfs_reflink_trim_around_shared(ip, imap, shared, &trimmed);
        if (error)
-               goto out_unlock;
-
-       end_fsb = orig_end_fsb = imap.br_startoff + imap.br_blockcount;
+               return error;
 
        /* Not shared?  Just report the (potentially capped) extent. */
-       if (!shared) {
-               *skipped = true;
-               goto done;
-       }
+       if (!*shared)
+               return 0;
 
        /*
         * Fork all the shared blocks from our write offset until the end of
@@ -278,72 +283,38 @@ __xfs_reflink_reserve_cow(
         */
        error = xfs_qm_dqattach_locked(ip, 0);
        if (error)
-               goto out_unlock;
+               return error;
+
+       end_fsb = orig_end_fsb = imap->br_startoff + imap->br_blockcount;
 
        align = xfs_eof_alignment(ip, xfs_get_cowextsz_hint(ip));
        if (align)
                end_fsb = roundup_64(end_fsb, align);
 
 retry:
-       error = xfs_bmapi_reserve_delalloc(ip, XFS_COW_FORK, *offset_fsb,
-                       end_fsb - *offset_fsb, &got,
-                       &prev, &idx, eof);
+       error = xfs_bmapi_reserve_delalloc(ip, XFS_COW_FORK, imap->br_startoff,
+                       end_fsb - imap->br_startoff, &got, &prev, &idx, eof);
        switch (error) {
        case 0:
                break;
        case -ENOSPC:
        case -EDQUOT:
                /* retry without any preallocation */
-               trace_xfs_reflink_cow_enospc(ip, &imap);
+               trace_xfs_reflink_cow_enospc(ip, imap);
                if (end_fsb != orig_end_fsb) {
                        end_fsb = orig_end_fsb;
                        goto retry;
                }
                /*FALLTHRU*/
        default:
-               goto out_unlock;
+               return error;
        }
 
        if (end_fsb != orig_end_fsb)
                xfs_inode_set_cowblocks_tag(ip);
 
        trace_xfs_reflink_cow_alloc(ip, &got);
-done:
-       *offset_fsb = end_fsb;
-out_unlock:
-       return error;
-}
-
-/* Create a CoW reservation for part of a file. */
-int
-xfs_reflink_reserve_cow_range(
-       struct xfs_inode        *ip,
-       xfs_off_t               offset,
-       xfs_off_t               count)
-{
-       struct xfs_mount        *mp = ip->i_mount;
-       xfs_fileoff_t           offset_fsb, end_fsb;
-       bool                    skipped = false;
-       int                     error;
-
-       trace_xfs_reflink_reserve_cow_range(ip, offset, count);
-
-       offset_fsb = XFS_B_TO_FSBT(mp, offset);
-       end_fsb = XFS_B_TO_FSB(mp, offset + count);
-
-       xfs_ilock(ip, XFS_ILOCK_EXCL);
-       while (offset_fsb < end_fsb) {
-               error = __xfs_reflink_reserve_cow(ip, &offset_fsb, end_fsb,
-                               &skipped);
-               if (error) {
-                       trace_xfs_reflink_reserve_cow_range_error(ip, error,
-                               _RET_IP_);
-                       break;
-               }
-       }
-       xfs_iunlock(ip, XFS_ILOCK_EXCL);
-
-       return error;
+       return 0;
 }
 
 /* Allocate all CoW reservations covering a range of blocks in a file. */
@@ -358,9 +329,8 @@ __xfs_reflink_allocate_cow(
        struct xfs_defer_ops    dfops;
        struct xfs_trans        *tp;
        xfs_fsblock_t           first_block;
-       xfs_fileoff_t           next_fsb;
        int                     nimaps = 1, error;
-       bool                    skipped = false;
+       bool                    shared;
 
        xfs_defer_init(&dfops, &first_block);
 
@@ -371,33 +341,38 @@ __xfs_reflink_allocate_cow(
 
        xfs_ilock(ip, XFS_ILOCK_EXCL);
 
-       next_fsb = *offset_fsb;
-       error = __xfs_reflink_reserve_cow(ip, &next_fsb, end_fsb, &skipped);
+       /* Read extent from the source file. */
+       nimaps = 1;
+       error = xfs_bmapi_read(ip, *offset_fsb, end_fsb - *offset_fsb,
+                       &imap, &nimaps, 0);
+       if (error)
+               goto out_unlock;
+       ASSERT(nimaps == 1);
+
+       error = xfs_reflink_reserve_cow(ip, &imap, &shared);
        if (error)
                goto out_trans_cancel;
 
-       if (skipped) {
-               *offset_fsb = next_fsb;
+       if (!shared) {
+               *offset_fsb = imap.br_startoff + imap.br_blockcount;
                goto out_trans_cancel;
        }
 
        xfs_trans_ijoin(tp, ip, 0);
-       error = xfs_bmapi_write(tp, ip, *offset_fsb, next_fsb - *offset_fsb,
+       error = xfs_bmapi_write(tp, ip, imap.br_startoff, imap.br_blockcount,
                        XFS_BMAPI_COWFORK, &first_block,
                        XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK),
                        &imap, &nimaps, &dfops);
        if (error)
                goto out_trans_cancel;
 
-       /* We might not have been able to map the whole delalloc extent */
-       *offset_fsb = min(*offset_fsb + imap.br_blockcount, next_fsb);
-
        error = xfs_defer_finish(&tp, &dfops, NULL);
        if (error)
                goto out_trans_cancel;
 
        error = xfs_trans_commit(tp);
 
+       *offset_fsb = imap.br_startoff + imap.br_blockcount;
 out_unlock:
        xfs_iunlock(ip, XFS_ILOCK_EXCL);
        return error;
@@ -536,58 +511,49 @@ xfs_reflink_cancel_cow_blocks(
        xfs_fileoff_t                   offset_fsb,
        xfs_fileoff_t                   end_fsb)
 {
-       struct xfs_bmbt_irec            irec;
-       xfs_filblks_t                   count_fsb;
+       struct xfs_ifork                *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
+       struct xfs_bmbt_irec            got, prev, del;
+       xfs_extnum_t                    idx;
        xfs_fsblock_t                   firstfsb;
        struct xfs_defer_ops            dfops;
-       int                             error = 0;
-       int                             nimaps;
+       int                             error = 0, eof = 0;
 
        if (!xfs_is_reflink_inode(ip))
                return 0;
 
-       /* Go find the old extent in the CoW fork. */
-       while (offset_fsb < end_fsb) {
-               nimaps = 1;
-               count_fsb = (xfs_filblks_t)(end_fsb - offset_fsb);
-               error = xfs_bmapi_read(ip, offset_fsb, count_fsb, &irec,
-                               &nimaps, XFS_BMAPI_COWFORK);
-               if (error)
-                       break;
-               ASSERT(nimaps == 1);
-
-               trace_xfs_reflink_cancel_cow(ip, &irec);
+       xfs_bmap_search_extents(ip, offset_fsb, XFS_COW_FORK, &eof, &idx,
+                       &got, &prev);
+       if (eof)
+               return 0;
 
-               if (irec.br_startblock == DELAYSTARTBLOCK) {
-                       /* Free a delayed allocation. */
-                       xfs_mod_fdblocks(ip->i_mount, irec.br_blockcount,
-                                       false);
-                       ip->i_delayed_blks -= irec.br_blockcount;
+       while (got.br_startoff < end_fsb) {
+               del = got;
+               xfs_trim_extent(&del, offset_fsb, end_fsb - offset_fsb);
+               trace_xfs_reflink_cancel_cow(ip, &del);
 
-                       /* Remove the mapping from the CoW fork. */
-                       error = xfs_bunmapi_cow(ip, &irec);
+               if (isnullstartblock(del.br_startblock)) {
+                       error = xfs_bmap_del_extent_delay(ip, XFS_COW_FORK,
+                                       &idx, &got, &del);
                        if (error)
                                break;
-               } else if (irec.br_startblock == HOLESTARTBLOCK) {
-                       /* empty */
                } else {
                        xfs_trans_ijoin(*tpp, ip, 0);
                        xfs_defer_init(&dfops, &firstfsb);
 
                        /* Free the CoW orphan record. */
                        error = xfs_refcount_free_cow_extent(ip->i_mount,
-                                       &dfops, irec.br_startblock,
-                                       irec.br_blockcount);
+                                       &dfops, del.br_startblock,
+                                       del.br_blockcount);
                        if (error)
                                break;
 
                        xfs_bmap_add_free(ip->i_mount, &dfops,
-                                       irec.br_startblock, irec.br_blockcount,
+                                       del.br_startblock, del.br_blockcount,
                                        NULL);
 
                        /* Update quota accounting */
                        xfs_trans_mod_dquot_byino(*tpp, ip, XFS_TRANS_DQ_BCOUNT,
-                                       -(long)irec.br_blockcount);
+                                       -(long)del.br_blockcount);
 
                        /* Roll the transaction */
                        error = xfs_defer_finish(tpp, &dfops, ip);
@@ -597,15 +563,18 @@ xfs_reflink_cancel_cow_blocks(
                        }
 
                        /* Remove the mapping from the CoW fork. */
-                       error = xfs_bunmapi_cow(ip, &irec);
-                       if (error)
-                               break;
+                       xfs_bmap_del_extent_cow(ip, &idx, &got, &del);
                }
 
-               /* Roll on... */
-               offset_fsb = irec.br_startoff + irec.br_blockcount;
+               if (++idx >= ifp->if_bytes / sizeof(struct xfs_bmbt_rec))
+                       break;
+               xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &got);
        }
 
+       /* clear tag if cow fork is emptied */
+       if (!ifp->if_bytes)
+               xfs_inode_clear_cowblocks_tag(ip);
+
        return error;
 }
 
@@ -668,25 +637,26 @@ xfs_reflink_end_cow(
        xfs_off_t                       offset,
        xfs_off_t                       count)
 {
-       struct xfs_bmbt_irec            irec;
-       struct xfs_bmbt_irec            uirec;
+       struct xfs_ifork                *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
+       struct xfs_bmbt_irec            got, prev, del;
        struct xfs_trans                *tp;
        xfs_fileoff_t                   offset_fsb;
        xfs_fileoff_t                   end_fsb;
-       xfs_filblks_t                   count_fsb;
        xfs_fsblock_t                   firstfsb;
        struct xfs_defer_ops            dfops;
-       int                             error;
+       int                             error, eof = 0;
        unsigned int                    resblks;
-       xfs_filblks_t                   ilen;
        xfs_filblks_t                   rlen;
-       int                             nimaps;
+       xfs_extnum_t                    idx;
 
        trace_xfs_reflink_end_cow(ip, offset, count);
 
+       /* No COW extents?  That's easy! */
+       if (ifp->if_bytes == 0)
+               return 0;
+
        offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
        end_fsb = XFS_B_TO_FSB(ip->i_mount, offset + count);
-       count_fsb = (xfs_filblks_t)(end_fsb - offset_fsb);
 
        /* Start a rolling transaction to switch the mappings */
        resblks = XFS_EXTENTADD_SPACE_RES(ip->i_mount, XFS_DATA_FORK);
@@ -698,72 +668,65 @@ xfs_reflink_end_cow(
        xfs_ilock(ip, XFS_ILOCK_EXCL);
        xfs_trans_ijoin(tp, ip, 0);
 
-       /* Go find the old extent in the CoW fork. */
-       while (offset_fsb < end_fsb) {
-               /* Read extent from the source file */
-               nimaps = 1;
-               count_fsb = (xfs_filblks_t)(end_fsb - offset_fsb);
-               error = xfs_bmapi_read(ip, offset_fsb, count_fsb, &irec,
-                               &nimaps, XFS_BMAPI_COWFORK);
-               if (error)
-                       goto out_cancel;
-               ASSERT(nimaps == 1);
+       xfs_bmap_search_extents(ip, end_fsb - 1, XFS_COW_FORK, &eof, &idx,
+                       &got, &prev);
 
-               ASSERT(irec.br_startblock != DELAYSTARTBLOCK);
-               trace_xfs_reflink_cow_remap(ip, &irec);
+       /* If there is a hole at end_fsb - 1 go to the previous extent */
+       if (eof || got.br_startoff > end_fsb) {
+               ASSERT(idx > 0);
+               xfs_bmbt_get_all(xfs_iext_get_ext(ifp, --idx), &got);
+       }
 
-               /*
-                * We can have a hole in the CoW fork if part of a directio
-                * write is CoW but part of it isn't.
-                */
-               rlen = ilen = irec.br_blockcount;
-               if (irec.br_startblock == HOLESTARTBLOCK)
+       /* Walk backwards until we're out of the I/O range... */
+       while (got.br_startoff + got.br_blockcount > offset_fsb) {
+               del = got;
+               xfs_trim_extent(&del, offset_fsb, end_fsb - offset_fsb);
+
+               /* Extent delete may have bumped idx forward */
+               if (!del.br_blockcount) {
+                       idx--;
                        goto next_extent;
+               }
+
+               ASSERT(!isnullstartblock(got.br_startblock));
 
                /* Unmap the old blocks in the data fork. */
-               while (rlen) {
-                       xfs_defer_init(&dfops, &firstfsb);
-                       error = __xfs_bunmapi(tp, ip, irec.br_startoff,
-                                       &rlen, 0, 1, &firstfsb, &dfops);
-                       if (error)
-                               goto out_defer;
-
-                       /*
-                        * Trim the extent to whatever got unmapped.
-                        * Remember, bunmapi works backwards.
-                        */
-                       uirec.br_startblock = irec.br_startblock + rlen;
-                       uirec.br_startoff = irec.br_startoff + rlen;
-                       uirec.br_blockcount = irec.br_blockcount - rlen;
-                       irec.br_blockcount = rlen;
-                       trace_xfs_reflink_cow_remap_piece(ip, &uirec);
+               xfs_defer_init(&dfops, &firstfsb);
+               rlen = del.br_blockcount;
+               error = __xfs_bunmapi(tp, ip, del.br_startoff, &rlen, 0, 1,
+                               &firstfsb, &dfops);
+               if (error)
+                       goto out_defer;
 
-                       /* Free the CoW orphan record. */
-                       error = xfs_refcount_free_cow_extent(tp->t_mountp,
-                                       &dfops, uirec.br_startblock,
-                                       uirec.br_blockcount);
-                       if (error)
-                               goto out_defer;
+               /* Trim the extent to whatever got unmapped. */
+               if (rlen) {
+                       xfs_trim_extent(&del, del.br_startoff + rlen,
+                               del.br_blockcount - rlen);
+               }
+               trace_xfs_reflink_cow_remap(ip, &del);
 
-                       /* Map the new blocks into the data fork. */
-                       error = xfs_bmap_map_extent(tp->t_mountp, &dfops,
-                                       ip, &uirec);
-                       if (error)
-                               goto out_defer;
+               /* Free the CoW orphan record. */
+               error = xfs_refcount_free_cow_extent(tp->t_mountp, &dfops,
+                               del.br_startblock, del.br_blockcount);
+               if (error)
+                       goto out_defer;
 
-                       /* Remove the mapping from the CoW fork. */
-                       error = xfs_bunmapi_cow(ip, &uirec);
-                       if (error)
-                               goto out_defer;
+               /* Map the new blocks into the data fork. */
+               error = xfs_bmap_map_extent(tp->t_mountp, &dfops, ip, &del);
+               if (error)
+                       goto out_defer;
 
-                       error = xfs_defer_finish(&tp, &dfops, ip);
-                       if (error)
-                               goto out_defer;
-               }
+               /* Remove the mapping from the CoW fork. */
+               xfs_bmap_del_extent_cow(ip, &idx, &got, &del);
+
+               error = xfs_defer_finish(&tp, &dfops, ip);
+               if (error)
+                       goto out_defer;
 
 next_extent:
-               /* Roll on... */
-               offset_fsb = irec.br_startoff + ilen;
+               if (idx < 0)
+                       break;
+               xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &got);
        }
 
        error = xfs_trans_commit(tp);
@@ -774,7 +737,6 @@ next_extent:
 
 out_defer:
        xfs_defer_cancel(&dfops);
-out_cancel:
        xfs_trans_cancel(tp);
        xfs_iunlock(ip, XFS_ILOCK_EXCL);
 out:
@@ -1312,19 +1274,26 @@ out_error:
  */
 int
 xfs_reflink_remap_range(
-       struct xfs_inode        *src,
-       xfs_off_t               srcoff,
-       struct xfs_inode        *dest,
-       xfs_off_t               destoff,
-       xfs_off_t               len,
-       unsigned int            flags)
+       struct file             *file_in,
+       loff_t                  pos_in,
+       struct file             *file_out,
+       loff_t                  pos_out,
+       u64                     len,
+       bool                    is_dedupe)
 {
+       struct inode            *inode_in = file_inode(file_in);
+       struct xfs_inode        *src = XFS_I(inode_in);
+       struct inode            *inode_out = file_inode(file_out);
+       struct xfs_inode        *dest = XFS_I(inode_out);
        struct xfs_mount        *mp = src->i_mount;
+       loff_t                  bs = inode_out->i_sb->s_blocksize;
+       bool                    same_inode = (inode_in == inode_out);
        xfs_fileoff_t           sfsbno, dfsbno;
        xfs_filblks_t           fsblen;
-       int                     error;
        xfs_extlen_t            cowextsize;
-       bool                    is_same;
+       loff_t                  isize;
+       ssize_t                 ret;
+       loff_t                  blen;
 
        if (!xfs_sb_version_hasreflink(&mp->m_sb))
                return -EOPNOTSUPP;
@@ -1332,17 +1301,8 @@ xfs_reflink_remap_range(
        if (XFS_FORCED_SHUTDOWN(mp))
                return -EIO;
 
-       /* Don't reflink realtime inodes */
-       if (XFS_IS_REALTIME_INODE(src) || XFS_IS_REALTIME_INODE(dest))
-               return -EINVAL;
-
-       if (flags & ~XFS_REFLINK_ALL)
-               return -EINVAL;
-
-       trace_xfs_reflink_remap_range(src, srcoff, len, dest, destoff);
-
        /* Lock both files against IO */
-       if (src->i_ino == dest->i_ino) {
+       if (same_inode) {
                xfs_ilock(src, XFS_IOLOCK_EXCL);
                xfs_ilock(src, XFS_MMAPLOCK_EXCL);
        } else {
@@ -1350,39 +1310,126 @@ xfs_reflink_remap_range(
                xfs_lock_two_inodes(src, dest, XFS_MMAPLOCK_EXCL);
        }
 
+       /* Don't touch certain kinds of inodes */
+       ret = -EPERM;
+       if (IS_IMMUTABLE(inode_out))
+               goto out_unlock;
+
+       ret = -ETXTBSY;
+       if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out))
+               goto out_unlock;
+
+
+       /* Don't reflink dirs, pipes, sockets... */
+       ret = -EISDIR;
+       if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
+               goto out_unlock;
+       ret = -EINVAL;
+       if (S_ISFIFO(inode_in->i_mode) || S_ISFIFO(inode_out->i_mode))
+               goto out_unlock;
+       if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
+               goto out_unlock;
+
+       /* Don't reflink realtime inodes */
+       if (XFS_IS_REALTIME_INODE(src) || XFS_IS_REALTIME_INODE(dest))
+               goto out_unlock;
+
+       /* Don't share DAX file data for now. */
+       if (IS_DAX(inode_in) || IS_DAX(inode_out))
+               goto out_unlock;
+
+       /* Are we going all the way to the end? */
+       isize = i_size_read(inode_in);
+       if (isize == 0) {
+               ret = 0;
+               goto out_unlock;
+       }
+
+       if (len == 0)
+               len = isize - pos_in;
+
+       /* Ensure offsets don't wrap and the input is inside i_size */
+       if (pos_in + len < pos_in || pos_out + len < pos_out ||
+           pos_in + len > isize)
+               goto out_unlock;
+
+       /* Don't allow dedupe past EOF in the dest file */
+       if (is_dedupe) {
+               loff_t  disize;
+
+               disize = i_size_read(inode_out);
+               if (pos_out >= disize || pos_out + len > disize)
+                       goto out_unlock;
+       }
+
+       /* If we're linking to EOF, continue to the block boundary. */
+       if (pos_in + len == isize)
+               blen = ALIGN(isize, bs) - pos_in;
+       else
+               blen = len;
+
+       /* Only reflink if we're aligned to block boundaries */
+       if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_in + blen, bs) ||
+           !IS_ALIGNED(pos_out, bs) || !IS_ALIGNED(pos_out + blen, bs))
+               goto out_unlock;
+
+       /* Don't allow overlapped reflink within the same file */
+       if (same_inode) {
+               if (pos_out + blen > pos_in && pos_out < pos_in + blen)
+                       goto out_unlock;
+       }
+
+       /* Wait for the completion of any pending IOs on both files */
+       inode_dio_wait(inode_in);
+       if (!same_inode)
+               inode_dio_wait(inode_out);
+
+       ret = filemap_write_and_wait_range(inode_in->i_mapping,
+                       pos_in, pos_in + len - 1);
+       if (ret)
+               goto out_unlock;
+
+       ret = filemap_write_and_wait_range(inode_out->i_mapping,
+                       pos_out, pos_out + len - 1);
+       if (ret)
+               goto out_unlock;
+
+       trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out);
+
        /*
         * Check that the extents are the same.
         */
-       if (flags & XFS_REFLINK_DEDUPE) {
-               is_same = false;
-               error = xfs_compare_extents(VFS_I(src), srcoff, VFS_I(dest),
-                               destoff, len, &is_same);
-               if (error)
-                       goto out_error;
+       if (is_dedupe) {
+               bool            is_same = false;
+
+               ret = xfs_compare_extents(inode_in, pos_in, inode_out, pos_out,
+                               len, &is_same);
+               if (ret)
+                       goto out_unlock;
                if (!is_same) {
-                       error = -EBADE;
-                       goto out_error;
+                       ret = -EBADE;
+                       goto out_unlock;
                }
        }
 
-       error = xfs_reflink_set_inode_flag(src, dest);
-       if (error)
-               goto out_error;
+       ret = xfs_reflink_set_inode_flag(src, dest);
+       if (ret)
+               goto out_unlock;
 
        /*
         * Invalidate the page cache so that we can clear any CoW mappings
         * in the destination file.
         */
-       truncate_inode_pages_range(&VFS_I(dest)->i_data, destoff,
-                                  PAGE_ALIGN(destoff + len) - 1);
+       truncate_inode_pages_range(&inode_out->i_data, pos_out,
+                                  PAGE_ALIGN(pos_out + len) - 1);
 
-       dfsbno = XFS_B_TO_FSBT(mp, destoff);
-       sfsbno = XFS_B_TO_FSBT(mp, srcoff);
+       dfsbno = XFS_B_TO_FSBT(mp, pos_out);
+       sfsbno = XFS_B_TO_FSBT(mp, pos_in);
        fsblen = XFS_B_TO_FSB(mp, len);
-       error = xfs_reflink_remap_blocks(src, sfsbno, dest, dfsbno, fsblen,
-                       destoff + len);
-       if (error)
-               goto out_error;
+       ret = xfs_reflink_remap_blocks(src, sfsbno, dest, dfsbno, fsblen,
+                       pos_out + len);
+       if (ret)
+               goto out_unlock;
 
        /*
         * Carry the cowextsize hint from src to dest if we're sharing the
@@ -1390,26 +1437,24 @@ xfs_reflink_remap_range(
         * has a cowextsize hint, and the destination file does not.
         */
        cowextsize = 0;
-       if (srcoff == 0 && len == i_size_read(VFS_I(src)) &&
+       if (pos_in == 0 && len == i_size_read(inode_in) &&
            (src->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE) &&
-           destoff == 0 && len >= i_size_read(VFS_I(dest)) &&
+           pos_out == 0 && len >= i_size_read(inode_out) &&
            !(dest->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE))
                cowextsize = src->i_d.di_cowextsize;
 
-       error = xfs_reflink_update_dest(dest, destoff + len, cowextsize);
-       if (error)
-               goto out_error;
+       ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize);
 
-out_error:
+out_unlock:
        xfs_iunlock(src, XFS_MMAPLOCK_EXCL);
        xfs_iunlock(src, XFS_IOLOCK_EXCL);
        if (src->i_ino != dest->i_ino) {
                xfs_iunlock(dest, XFS_MMAPLOCK_EXCL);
                xfs_iunlock(dest, XFS_IOLOCK_EXCL);
        }
-       if (error)
-               trace_xfs_reflink_remap_range_error(dest, error, _RET_IP_);
-       return error;
+       if (ret)
+               trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_);
+       return ret;
 }
 
 /*
index 5dc3c8ac12aa5bef547904ca5dbe48275d63bc34..fad11607c9adf3937d6fa739c2ede29c7d0a8bb2 100644 (file)
@@ -26,8 +26,8 @@ extern int xfs_reflink_find_shared(struct xfs_mount *mp, xfs_agnumber_t agno,
 extern int xfs_reflink_trim_around_shared(struct xfs_inode *ip,
                struct xfs_bmbt_irec *irec, bool *shared, bool *trimmed);
 
-extern int xfs_reflink_reserve_cow_range(struct xfs_inode *ip,
-               xfs_off_t offset, xfs_off_t count);
+extern int xfs_reflink_reserve_cow(struct xfs_inode *ip,
+               struct xfs_bmbt_irec *imap, bool *shared);
 extern int xfs_reflink_allocate_cow_range(struct xfs_inode *ip,
                xfs_off_t offset, xfs_off_t count);
 extern bool xfs_reflink_find_cow_mapping(struct xfs_inode *ip, xfs_off_t offset,
@@ -43,11 +43,8 @@ extern int xfs_reflink_cancel_cow_range(struct xfs_inode *ip, xfs_off_t offset,
 extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset,
                xfs_off_t count);
 extern int xfs_reflink_recover_cow(struct xfs_mount *mp);
-#define XFS_REFLINK_DEDUPE     1       /* only reflink if contents match */
-#define XFS_REFLINK_ALL                (XFS_REFLINK_DEDUPE)
-extern int xfs_reflink_remap_range(struct xfs_inode *src, xfs_off_t srcoff,
-               struct xfs_inode *dest, xfs_off_t destoff, xfs_off_t len,
-               unsigned int flags);
+extern int xfs_reflink_remap_range(struct file *file_in, loff_t pos_in,
+               struct file *file_out, loff_t pos_out, u64 len, bool is_dedupe);
 extern int xfs_reflink_clear_inode_flag(struct xfs_inode *ip,
                struct xfs_trans **tpp);
 extern int xfs_reflink_unshare(struct xfs_inode *ip, xfs_off_t offset,
index 5f8d55d29a11cc4a4db2e30f55766acbeed78b33..276d3023d60f8201b635ae1f0c2ccbf26aac74fd 100644 (file)
@@ -512,13 +512,13 @@ static struct attribute *xfs_error_attrs[] = {
 };
 
 
-struct kobj_type xfs_error_cfg_ktype = {
+static struct kobj_type xfs_error_cfg_ktype = {
        .release = xfs_sysfs_release,
        .sysfs_ops = &xfs_sysfs_ops,
        .default_attrs = xfs_error_attrs,
 };
 
-struct kobj_type xfs_error_ktype = {
+static struct kobj_type xfs_error_ktype = {
        .release = xfs_sysfs_release,
        .sysfs_ops = &xfs_sysfs_ops,
 };
index ad188d3a83f3739db19ed8af577fe4259c9267e8..0907752be62d3de9e385890550a8e92f0402b398 100644 (file)
@@ -3346,7 +3346,7 @@ DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_alloc);
 DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_found);
 DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_enospc);
 
-DEFINE_RW_EVENT(xfs_reflink_reserve_cow_range);
+DEFINE_RW_EVENT(xfs_reflink_reserve_cow);
 DEFINE_RW_EVENT(xfs_reflink_allocate_cow_range);
 
 DEFINE_INODE_IREC_EVENT(xfs_reflink_bounce_dio_write);
@@ -3356,9 +3356,7 @@ DEFINE_INODE_IREC_EVENT(xfs_reflink_trim_irec);
 DEFINE_SIMPLE_IO_EVENT(xfs_reflink_cancel_cow_range);
 DEFINE_SIMPLE_IO_EVENT(xfs_reflink_end_cow);
 DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_remap);
-DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_remap_piece);
 
-DEFINE_INODE_ERROR_EVENT(xfs_reflink_reserve_cow_range_error);
 DEFINE_INODE_ERROR_EVENT(xfs_reflink_allocate_cow_range_error);
 DEFINE_INODE_ERROR_EVENT(xfs_reflink_cancel_cow_range_error);
 DEFINE_INODE_ERROR_EVENT(xfs_reflink_end_cow_error);
index 1b949e08015ccd936f5a99bcf868ed30c7f78f26..c19700e2a2fe25d169a64180593438d9815c3f77 100644 (file)
@@ -230,72 +230,62 @@ struct acpi_table_facs {
 /* Fields common to all versions of the FADT */
 
 struct acpi_table_fadt {
-       struct acpi_table_header header;        /* [V1] Common ACPI table header */
-       u32 facs;               /* [V1] 32-bit physical address of FACS */
-       u32 dsdt;               /* [V1] 32-bit physical address of DSDT */
-       u8 model;               /* [V1] System Interrupt Model (ACPI 1.0) - not used in ACPI 2.0+ */
-       u8 preferred_profile;   /* [V1] Conveys preferred power management profile to OSPM. */
-       u16 sci_interrupt;      /* [V1] System vector of SCI interrupt */
-       u32 smi_command;        /* [V1] 32-bit Port address of SMI command port */
-       u8 acpi_enable;         /* [V1] Value to write to SMI_CMD to enable ACPI */
-       u8 acpi_disable;        /* [V1] Value to write to SMI_CMD to disable ACPI */
-       u8 s4_bios_request;     /* [V1] Value to write to SMI_CMD to enter S4BIOS state */
-       u8 pstate_control;      /* [V1] Processor performance state control */
-       u32 pm1a_event_block;   /* [V1] 32-bit port address of Power Mgt 1a Event Reg Blk */
-       u32 pm1b_event_block;   /* [V1] 32-bit port address of Power Mgt 1b Event Reg Blk */
-       u32 pm1a_control_block; /* [V1] 32-bit port address of Power Mgt 1a Control Reg Blk */
-       u32 pm1b_control_block; /* [V1] 32-bit port address of Power Mgt 1b Control Reg Blk */
-       u32 pm2_control_block;  /* [V1] 32-bit port address of Power Mgt 2 Control Reg Blk */
-       u32 pm_timer_block;     /* [V1] 32-bit port address of Power Mgt Timer Ctrl Reg Blk */
-       u32 gpe0_block;         /* [V1] 32-bit port address of General Purpose Event 0 Reg Blk */
-       u32 gpe1_block;         /* [V1] 32-bit port address of General Purpose Event 1 Reg Blk */
-       u8 pm1_event_length;    /* [V1] Byte Length of ports at pm1x_event_block */
-       u8 pm1_control_length;  /* [V1] Byte Length of ports at pm1x_control_block */
-       u8 pm2_control_length;  /* [V1] Byte Length of ports at pm2_control_block */
-       u8 pm_timer_length;     /* [V1] Byte Length of ports at pm_timer_block */
-       u8 gpe0_block_length;   /* [V1] Byte Length of ports at gpe0_block */
-       u8 gpe1_block_length;   /* [V1] Byte Length of ports at gpe1_block */
-       u8 gpe1_base;           /* [V1] Offset in GPE number space where GPE1 events start */
-       u8 cst_control;         /* [V1] Support for the _CST object and C-States change notification */
-       u16 c2_latency;         /* [V1] Worst case HW latency to enter/exit C2 state */
-       u16 c3_latency;         /* [V1] Worst case HW latency to enter/exit C3 state */
-       u16 flush_size;         /* [V1] Processor memory cache line width, in bytes */
-       u16 flush_stride;       /* [V1] Number of flush strides that need to be read */
-       u8 duty_offset;         /* [V1] Processor duty cycle index in processor P_CNT reg */
-       u8 duty_width;          /* [V1] Processor duty cycle value bit width in P_CNT register */
-       u8 day_alarm;           /* [V1] Index to day-of-month alarm in RTC CMOS RAM */
-       u8 month_alarm;         /* [V1] Index to month-of-year alarm in RTC CMOS RAM */
-       u8 century;             /* [V1] Index to century in RTC CMOS RAM */
-       u16 boot_flags;         /* [V3] IA-PC Boot Architecture Flags (see below for individual flags) */
-       u8 reserved;            /* [V1] Reserved, must be zero */
-       u32 flags;              /* [V1] Miscellaneous flag bits (see below for individual flags) */
-       /* End of Version 1 FADT fields (ACPI 1.0) */
-
-       struct acpi_generic_address reset_register;     /* [V3] 64-bit address of the Reset register */
-       u8 reset_value;         /* [V3] Value to write to the reset_register port to reset the system */
-       u16 arm_boot_flags;     /* [V5] ARM-Specific Boot Flags (see below for individual flags) (ACPI 5.1) */
-       u8 minor_revision;      /* [V5] FADT Minor Revision (ACPI 5.1) */
-       u64 Xfacs;              /* [V3] 64-bit physical address of FACS */
-       u64 Xdsdt;              /* [V3] 64-bit physical address of DSDT */
-       struct acpi_generic_address xpm1a_event_block;  /* [V3] 64-bit Extended Power Mgt 1a Event Reg Blk address */
-       struct acpi_generic_address xpm1b_event_block;  /* [V3] 64-bit Extended Power Mgt 1b Event Reg Blk address */
-       struct acpi_generic_address xpm1a_control_block;        /* [V3] 64-bit Extended Power Mgt 1a Control Reg Blk address */
-       struct acpi_generic_address xpm1b_control_block;        /* [V3] 64-bit Extended Power Mgt 1b Control Reg Blk address */
-       struct acpi_generic_address xpm2_control_block; /* [V3] 64-bit Extended Power Mgt 2 Control Reg Blk address */
-       struct acpi_generic_address xpm_timer_block;    /* [V3] 64-bit Extended Power Mgt Timer Ctrl Reg Blk address */
-       struct acpi_generic_address xgpe0_block;        /* [V3] 64-bit Extended General Purpose Event 0 Reg Blk address */
-       struct acpi_generic_address xgpe1_block;        /* [V3] 64-bit Extended General Purpose Event 1 Reg Blk address */
-       /* End of Version 3 FADT fields (ACPI 2.0) */
-
-       struct acpi_generic_address sleep_control;      /* [V4] 64-bit Sleep Control register (ACPI 5.0) */
-       /* End of Version 4 FADT fields (ACPI 3.0 and ACPI 4.0) (Field was originally reserved in ACPI 3.0) */
-
-       struct acpi_generic_address sleep_status;       /* [V5] 64-bit Sleep Status register (ACPI 5.0) */
-       /* End of Version 5 FADT fields (ACPI 5.0) */
-
-       u64 hypervisor_id;      /* [V6] Hypervisor Vendor ID (ACPI 6.0) */
-       /* End of Version 6 FADT fields (ACPI 6.0) */
-
+       struct acpi_table_header header;        /* Common ACPI table header */
+       u32 facs;               /* 32-bit physical address of FACS */
+       u32 dsdt;               /* 32-bit physical address of DSDT */
+       u8 model;               /* System Interrupt Model (ACPI 1.0) - not used in ACPI 2.0+ */
+       u8 preferred_profile;   /* Conveys preferred power management profile to OSPM. */
+       u16 sci_interrupt;      /* System vector of SCI interrupt */
+       u32 smi_command;        /* 32-bit Port address of SMI command port */
+       u8 acpi_enable;         /* Value to write to SMI_CMD to enable ACPI */
+       u8 acpi_disable;        /* Value to write to SMI_CMD to disable ACPI */
+       u8 s4_bios_request;     /* Value to write to SMI_CMD to enter S4BIOS state */
+       u8 pstate_control;      /* Processor performance state control */
+       u32 pm1a_event_block;   /* 32-bit port address of Power Mgt 1a Event Reg Blk */
+       u32 pm1b_event_block;   /* 32-bit port address of Power Mgt 1b Event Reg Blk */
+       u32 pm1a_control_block; /* 32-bit port address of Power Mgt 1a Control Reg Blk */
+       u32 pm1b_control_block; /* 32-bit port address of Power Mgt 1b Control Reg Blk */
+       u32 pm2_control_block;  /* 32-bit port address of Power Mgt 2 Control Reg Blk */
+       u32 pm_timer_block;     /* 32-bit port address of Power Mgt Timer Ctrl Reg Blk */
+       u32 gpe0_block;         /* 32-bit port address of General Purpose Event 0 Reg Blk */
+       u32 gpe1_block;         /* 32-bit port address of General Purpose Event 1 Reg Blk */
+       u8 pm1_event_length;    /* Byte Length of ports at pm1x_event_block */
+       u8 pm1_control_length;  /* Byte Length of ports at pm1x_control_block */
+       u8 pm2_control_length;  /* Byte Length of ports at pm2_control_block */
+       u8 pm_timer_length;     /* Byte Length of ports at pm_timer_block */
+       u8 gpe0_block_length;   /* Byte Length of ports at gpe0_block */
+       u8 gpe1_block_length;   /* Byte Length of ports at gpe1_block */
+       u8 gpe1_base;           /* Offset in GPE number space where GPE1 events start */
+       u8 cst_control;         /* Support for the _CST object and C-States change notification */
+       u16 c2_latency;         /* Worst case HW latency to enter/exit C2 state */
+       u16 c3_latency;         /* Worst case HW latency to enter/exit C3 state */
+       u16 flush_size;         /* Processor memory cache line width, in bytes */
+       u16 flush_stride;       /* Number of flush strides that need to be read */
+       u8 duty_offset;         /* Processor duty cycle index in processor P_CNT reg */
+       u8 duty_width;          /* Processor duty cycle value bit width in P_CNT register */
+       u8 day_alarm;           /* Index to day-of-month alarm in RTC CMOS RAM */
+       u8 month_alarm;         /* Index to month-of-year alarm in RTC CMOS RAM */
+       u8 century;             /* Index to century in RTC CMOS RAM */
+       u16 boot_flags;         /* IA-PC Boot Architecture Flags (see below for individual flags) */
+       u8 reserved;            /* Reserved, must be zero */
+       u32 flags;              /* Miscellaneous flag bits (see below for individual flags) */
+       struct acpi_generic_address reset_register;     /* 64-bit address of the Reset register */
+       u8 reset_value;         /* Value to write to the reset_register port to reset the system */
+       u16 arm_boot_flags;     /* ARM-Specific Boot Flags (see below for individual flags) (ACPI 5.1) */
+       u8 minor_revision;      /* FADT Minor Revision (ACPI 5.1) */
+       u64 Xfacs;              /* 64-bit physical address of FACS */
+       u64 Xdsdt;              /* 64-bit physical address of DSDT */
+       struct acpi_generic_address xpm1a_event_block;  /* 64-bit Extended Power Mgt 1a Event Reg Blk address */
+       struct acpi_generic_address xpm1b_event_block;  /* 64-bit Extended Power Mgt 1b Event Reg Blk address */
+       struct acpi_generic_address xpm1a_control_block;        /* 64-bit Extended Power Mgt 1a Control Reg Blk address */
+       struct acpi_generic_address xpm1b_control_block;        /* 64-bit Extended Power Mgt 1b Control Reg Blk address */
+       struct acpi_generic_address xpm2_control_block; /* 64-bit Extended Power Mgt 2 Control Reg Blk address */
+       struct acpi_generic_address xpm_timer_block;    /* 64-bit Extended Power Mgt Timer Ctrl Reg Blk address */
+       struct acpi_generic_address xgpe0_block;        /* 64-bit Extended General Purpose Event 0 Reg Blk address */
+       struct acpi_generic_address xgpe1_block;        /* 64-bit Extended General Purpose Event 1 Reg Blk address */
+       struct acpi_generic_address sleep_control;      /* 64-bit Sleep Control register (ACPI 5.0) */
+       struct acpi_generic_address sleep_status;       /* 64-bit Sleep Status register (ACPI 5.0) */
+       u64 hypervisor_id;      /* Hypervisor Vendor ID (ACPI 6.0) */
 };
 
 /* Masks for FADT IA-PC Boot Architecture Flags (boot_flags) [Vx]=Introduced in this FADT revision */
@@ -311,8 +301,8 @@ struct acpi_table_fadt {
 
 /* Masks for FADT ARM Boot Architecture Flags (arm_boot_flags) ACPI 5.1 */
 
-#define ACPI_FADT_PSCI_COMPLIANT    (1)        /* 00: [V5] PSCI 0.2+ is implemented */
-#define ACPI_FADT_PSCI_USE_HVC      (1<<1)     /* 01: [V5] HVC must be used instead of SMC as the PSCI conduit */
+#define ACPI_FADT_PSCI_COMPLIANT    (1)        /* 00: [V5+] PSCI 0.2+ is implemented */
+#define ACPI_FADT_PSCI_USE_HVC      (1<<1)     /* 01: [V5+] HVC must be used instead of SMC as the PSCI conduit */
 
 /* Masks for FADT flags */
 
@@ -409,34 +399,20 @@ struct acpi_table_desc {
  * match the expected length. In other words, the length of the
  * FADT is the bottom line as to what the version really is.
  *
- * NOTE: There is no officialy released V2 of the FADT. This
- * version was used only for prototyping and testing during the
- * 32-bit to 64-bit transition. V3 was the first official 64-bit
- * version of the FADT.
- *
- * Update this list of defines when a new version of the FADT is
- * added to the ACPI specification. Note that the FADT version is
- * only incremented when new fields are appended to the existing
- * version. Therefore, the FADT version is competely independent
- * from the version of the ACPI specification where it is
- * defined.
- *
- * For reference, the various FADT lengths are as follows:
- *     FADT V1 size: 0x074      ACPI 1.0
- *     FADT V3 size: 0x0F4      ACPI 2.0
- *     FADT V4 size: 0x100      ACPI 3.0 and ACPI 4.0
- *     FADT V5 size: 0x10C      ACPI 5.0
- *     FADT V6 size: 0x114      ACPI 6.0
+ * For reference, the values below are as follows:
+ *     FADT V1 size: 0x074
+ *     FADT V2 size: 0x084
+ *     FADT V3 size: 0x0F4
+ *     FADT V4 size: 0x0F4
+ *     FADT V5 size: 0x10C
+ *     FADT V6 size: 0x114
  */
-#define ACPI_FADT_V1_SIZE       (u32) (ACPI_FADT_OFFSET (flags) + 4)   /* ACPI 1.0 */
-#define ACPI_FADT_V3_SIZE       (u32) (ACPI_FADT_OFFSET (sleep_control))       /* ACPI 2.0 */
-#define ACPI_FADT_V4_SIZE       (u32) (ACPI_FADT_OFFSET (sleep_status))        /* ACPI 3.0 and ACPI 4.0 */
-#define ACPI_FADT_V5_SIZE       (u32) (ACPI_FADT_OFFSET (hypervisor_id))       /* ACPI 5.0 */
-#define ACPI_FADT_V6_SIZE       (u32) (sizeof (struct acpi_table_fadt))        /* ACPI 6.0 */
-
-/* Update these when new FADT versions are added */
+#define ACPI_FADT_V1_SIZE       (u32) (ACPI_FADT_OFFSET (flags) + 4)
+#define ACPI_FADT_V2_SIZE       (u32) (ACPI_FADT_OFFSET (minor_revision) + 1)
+#define ACPI_FADT_V3_SIZE       (u32) (ACPI_FADT_OFFSET (sleep_control))
+#define ACPI_FADT_V5_SIZE       (u32) (ACPI_FADT_OFFSET (hypervisor_id))
+#define ACPI_FADT_V6_SIZE       (u32) (sizeof (struct acpi_table_fadt))
 
-#define ACPI_FADT_MAX_VERSION   6
 #define ACPI_FADT_CONFORMANCE   "ACPI 6.1 (FADT version 6)"
 
 #endif                         /* __ACTBL_H__ */
index 17a940a1447716420d076be01e3402eb314bafb0..8caa79c617035e60a41ee850150d2b472f35df5a 100644 (file)
@@ -21,7 +21,7 @@ extern void pcc_mbox_free_channel(struct mbox_chan *chan);
 static inline struct mbox_chan *pcc_mbox_request_channel(struct mbox_client *cl,
                                                         int subspace_id)
 {
-       return NULL;
+       return ERR_PTR(-ENODEV);
 }
 static inline void pcc_mbox_free_channel(struct mbox_chan *chan) { }
 #endif
index a5d98d171866fe758462b15898f5e04019fe57cb..e861a24f06f2aca2bb575a10fa3041fcb32815e3 100644 (file)
 #ifndef __init
 #define __init
 #endif
+#ifndef __iomem
+#define __iomem
+#endif
 
 /* Host-dependent types and defines for user-space ACPICA */
 
diff --git a/include/asm-generic/export.h b/include/asm-generic/export.h
new file mode 100644 (file)
index 0000000..63554e9
--- /dev/null
@@ -0,0 +1,94 @@
+#ifndef __ASM_GENERIC_EXPORT_H
+#define __ASM_GENERIC_EXPORT_H
+
+#ifndef KSYM_FUNC
+#define KSYM_FUNC(x) x
+#endif
+#ifdef CONFIG_64BIT
+#define __put .quad
+#ifndef KSYM_ALIGN
+#define KSYM_ALIGN 8
+#endif
+#ifndef KCRC_ALIGN
+#define KCRC_ALIGN 8
+#endif
+#else
+#define __put .long
+#ifndef KSYM_ALIGN
+#define KSYM_ALIGN 4
+#endif
+#ifndef KCRC_ALIGN
+#define KCRC_ALIGN 4
+#endif
+#endif
+
+#ifdef CONFIG_HAVE_UNDERSCORE_SYMBOL_PREFIX
+#define KSYM(name) _##name
+#else
+#define KSYM(name) name
+#endif
+
+/*
+ * note on .section use: @progbits vs %progbits nastiness doesn't matter,
+ * since we immediately emit into those sections anyway.
+ */
+.macro ___EXPORT_SYMBOL name,val,sec
+#ifdef CONFIG_MODULES
+       .globl KSYM(__ksymtab_\name)
+       .section ___ksymtab\sec+\name,"a"
+       .balign KSYM_ALIGN
+KSYM(__ksymtab_\name):
+       __put \val, KSYM(__kstrtab_\name)
+       .previous
+       .section __ksymtab_strings,"a"
+KSYM(__kstrtab_\name):
+#ifdef CONFIG_HAVE_UNDERSCORE_SYMBOL_PREFIX
+       .asciz "_\name"
+#else
+       .asciz "\name"
+#endif
+       .previous
+#ifdef CONFIG_MODVERSIONS
+       .section ___kcrctab\sec+\name,"a"
+       .balign KCRC_ALIGN
+KSYM(__kcrctab_\name):
+       __put KSYM(__crc_\name)
+       .weak KSYM(__crc_\name)
+       .previous
+#endif
+#endif
+.endm
+#undef __put
+
+#if defined(__KSYM_DEPS__)
+
+#define __EXPORT_SYMBOL(sym, val, sec) === __KSYM_##sym ===
+
+#elif defined(CONFIG_TRIM_UNUSED_KSYMS)
+
+#include <linux/kconfig.h>
+#include <generated/autoksyms.h>
+
+#define __EXPORT_SYMBOL(sym, val, sec)                         \
+       __cond_export_sym(sym, val, sec, __is_defined(__KSYM_##sym))
+#define __cond_export_sym(sym, val, sec, conf)                 \
+       ___cond_export_sym(sym, val, sec, conf)
+#define ___cond_export_sym(sym, val, sec, enabled)             \
+       __cond_export_sym_##enabled(sym, val, sec)
+#define __cond_export_sym_1(sym, val, sec) ___EXPORT_SYMBOL sym, val, sec
+#define __cond_export_sym_0(sym, val, sec) /* nothing */
+
+#else
+#define __EXPORT_SYMBOL(sym, val, sec) ___EXPORT_SYMBOL sym, val, sec
+#endif
+
+#define EXPORT_SYMBOL(name)                                    \
+       __EXPORT_SYMBOL(name, KSYM_FUNC(KSYM(name)),)
+#define EXPORT_SYMBOL_GPL(name)                                \
+       __EXPORT_SYMBOL(name, KSYM_FUNC(KSYM(name)), _gpl)
+#define EXPORT_DATA_SYMBOL(name)                               \
+       __EXPORT_SYMBOL(name, KSYM(name),)
+#define EXPORT_DATA_SYMBOL_GPL(name)                           \
+       __EXPORT_SYMBOL(name, KSYM(name),_gpl)
+
+#endif
diff --git a/include/asm-generic/libata-portmap.h b/include/asm-generic/libata-portmap.h
deleted file mode 100644 (file)
index cf14f2f..0000000
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef __ASM_GENERIC_LIBATA_PORTMAP_H
-#define __ASM_GENERIC_LIBATA_PORTMAP_H
-
-#define ATA_PRIMARY_IRQ(dev)   14
-#define ATA_SECONDARY_IRQ(dev) 15
-
-#endif
index 4d9f233c4ba8343504b977e0273cc9a04cbbedb8..0504ef8f3aa31d5e7a9f0d86d1cb3fc130fee9e0 100644 (file)
@@ -65,6 +65,11 @@ extern void setup_per_cpu_areas(void);
 #define PER_CPU_DEF_ATTRIBUTES
 #endif
 
+#define raw_cpu_generic_read(pcp)                                      \
+({                                                                     \
+       *raw_cpu_ptr(&(pcp));                                           \
+})
+
 #define raw_cpu_generic_to_op(pcp, val, op)                            \
 do {                                                                   \
        *raw_cpu_ptr(&(pcp)) op val;                                    \
@@ -72,34 +77,39 @@ do {                                                                        \
 
 #define raw_cpu_generic_add_return(pcp, val)                           \
 ({                                                                     \
-       raw_cpu_add(pcp, val);                                          \
-       raw_cpu_read(pcp);                                              \
+       typeof(&(pcp)) __p = raw_cpu_ptr(&(pcp));                       \
+                                                                       \
+       *__p += val;                                                    \
+       *__p;                                                           \
 })
 
 #define raw_cpu_generic_xchg(pcp, nval)                                        \
 ({                                                                     \
+       typeof(&(pcp)) __p = raw_cpu_ptr(&(pcp));                       \
        typeof(pcp) __ret;                                              \
-       __ret = raw_cpu_read(pcp);                                      \
-       raw_cpu_write(pcp, nval);                                       \
+       __ret = *__p;                                                   \
+       *__p = nval;                                                    \
        __ret;                                                          \
 })
 
 #define raw_cpu_generic_cmpxchg(pcp, oval, nval)                       \
 ({                                                                     \
+       typeof(&(pcp)) __p = raw_cpu_ptr(&(pcp));                       \
        typeof(pcp) __ret;                                              \
-       __ret = raw_cpu_read(pcp);                                      \
+       __ret = *__p;                                                   \
        if (__ret == (oval))                                            \
-               raw_cpu_write(pcp, nval);                               \
+               *__p = nval;                                            \
        __ret;                                                          \
 })
 
 #define raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \
 ({                                                                     \
+       typeof(&(pcp1)) __p1 = raw_cpu_ptr(&(pcp1));                    \
+       typeof(&(pcp2)) __p2 = raw_cpu_ptr(&(pcp2));                    \
        int __ret = 0;                                                  \
-       if (raw_cpu_read(pcp1) == (oval1) &&                            \
-                        raw_cpu_read(pcp2)  == (oval2)) {              \
-               raw_cpu_write(pcp1, nval1);                             \
-               raw_cpu_write(pcp2, nval2);                             \
+       if (*__p1 == (oval1) && *__p2  == (oval2)) {                    \
+               *__p1 = nval1;                                          \
+               *__p2 = nval2;                                          \
                __ret = 1;                                              \
        }                                                               \
        (__ret);                                                        \
@@ -108,9 +118,9 @@ do {                                                                        \
 #define this_cpu_generic_read(pcp)                                     \
 ({                                                                     \
        typeof(pcp) __ret;                                              \
-       preempt_disable();                                              \
-       __ret = *this_cpu_ptr(&(pcp));                                  \
-       preempt_enable();                                               \
+       preempt_disable_notrace();                                      \
+       __ret = raw_cpu_generic_read(pcp);                              \
+       preempt_enable_notrace();                                       \
        __ret;                                                          \
 })
 
@@ -118,17 +128,17 @@ do {                                                                      \
 do {                                                                   \
        unsigned long __flags;                                          \
        raw_local_irq_save(__flags);                                    \
-       *raw_cpu_ptr(&(pcp)) op val;                                    \
+       raw_cpu_generic_to_op(pcp, val, op);                            \
        raw_local_irq_restore(__flags);                                 \
 } while (0)
 
+
 #define this_cpu_generic_add_return(pcp, val)                          \
 ({                                                                     \
        typeof(pcp) __ret;                                              \
        unsigned long __flags;                                          \
        raw_local_irq_save(__flags);                                    \
-       raw_cpu_add(pcp, val);                                          \
-       __ret = raw_cpu_read(pcp);                                      \
+       __ret = raw_cpu_generic_add_return(pcp, val);                   \
        raw_local_irq_restore(__flags);                                 \
        __ret;                                                          \
 })
@@ -138,8 +148,7 @@ do {                                                                        \
        typeof(pcp) __ret;                                              \
        unsigned long __flags;                                          \
        raw_local_irq_save(__flags);                                    \
-       __ret = raw_cpu_read(pcp);                                      \
-       raw_cpu_write(pcp, nval);                                       \
+       __ret = raw_cpu_generic_xchg(pcp, nval);                        \
        raw_local_irq_restore(__flags);                                 \
        __ret;                                                          \
 })
@@ -149,9 +158,7 @@ do {                                                                        \
        typeof(pcp) __ret;                                              \
        unsigned long __flags;                                          \
        raw_local_irq_save(__flags);                                    \
-       __ret = raw_cpu_read(pcp);                                      \
-       if (__ret == (oval))                                            \
-               raw_cpu_write(pcp, nval);                               \
+       __ret = raw_cpu_generic_cmpxchg(pcp, oval, nval);               \
        raw_local_irq_restore(__flags);                                 \
        __ret;                                                          \
 })
@@ -168,16 +175,16 @@ do {                                                                      \
 })
 
 #ifndef raw_cpu_read_1
-#define raw_cpu_read_1(pcp)            (*raw_cpu_ptr(&(pcp)))
+#define raw_cpu_read_1(pcp)            raw_cpu_generic_read(pcp)
 #endif
 #ifndef raw_cpu_read_2
-#define raw_cpu_read_2(pcp)            (*raw_cpu_ptr(&(pcp)))
+#define raw_cpu_read_2(pcp)            raw_cpu_generic_read(pcp)
 #endif
 #ifndef raw_cpu_read_4
-#define raw_cpu_read_4(pcp)            (*raw_cpu_ptr(&(pcp)))
+#define raw_cpu_read_4(pcp)            raw_cpu_generic_read(pcp)
 #endif
 #ifndef raw_cpu_read_8
-#define raw_cpu_read_8(pcp)            (*raw_cpu_ptr(&(pcp)))
+#define raw_cpu_read_8(pcp)            raw_cpu_generic_read(pcp)
 #endif
 
 #ifndef raw_cpu_write_1
index af0254c0942476f67e92c08f8e75918e529b4994..4df64a1fc09e7aab7f88cd4afe73928228930147 100644 (file)
@@ -14,6 +14,8 @@
  * [_sdata, _edata]: contains .data.* sections, may also contain .rodata.*
  *                   and/or .init.* sections.
  * [__start_rodata, __end_rodata]: contains .rodata.* sections
+ * [__start_data_ro_after_init, __end_data_ro_after_init]:
+ *                  contains data.ro_after_init section
  * [__init_begin, __init_end]: contains .init.* sections, but .init.text.*
  *                   may be out of this range on some architectures.
  * [_sinittext, _einittext]: contains .init.text.* sections
@@ -31,6 +33,7 @@ extern char _data[], _sdata[], _edata[];
 extern char __bss_start[], __bss_stop[];
 extern char __init_begin[], __init_end[];
 extern char _sinittext[], _einittext[];
+extern char __start_data_ro_after_init[], __end_data_ro_after_init[];
 extern char _end[];
 extern char __per_cpu_load[], __per_cpu_start[], __per_cpu_end[];
 extern char __kprobes_text_start[], __kprobes_text_end[];
index 3e42bcdd014b45b66b95853ee699937666b11b3f..31e1d639abedacd87828613416bce5c459694fc8 100644 (file)
        *(.dtb.init.rodata)                                             \
        VMLINUX_SYMBOL(__dtb_end) = .;
 
-/* .data section */
+/*
+ * .data section
+ * LD_DEAD_CODE_DATA_ELIMINATION option enables -fdata-sections generates
+ * .data.identifier which needs to be pulled in with .data, but don't want to
+ * pull in .data..stuff which has its own requirements. Same for bss.
+ */
 #define DATA_DATA                                                      \
-       *(.data)                                                        \
+       *(.data .data.[0-9a-zA-Z_]*)                                    \
        *(.ref.data)                                                    \
        *(.data..shared_aligned) /* percpu related */                   \
        MEM_KEEP(init.data)                                             \
  * own by defining an empty RO_AFTER_INIT_DATA.
  */
 #ifndef RO_AFTER_INIT_DATA
-#define RO_AFTER_INIT_DATA *(.data..ro_after_init)
+#define RO_AFTER_INIT_DATA                                             \
+       __start_data_ro_after_init = .;                                 \
+       *(.data..ro_after_init)                                         \
+       __end_data_ro_after_init = .;
 #endif
 
 /*
        /* Kernel symbol table: Normal symbols */                       \
        __ksymtab         : AT(ADDR(__ksymtab) - LOAD_OFFSET) {         \
                VMLINUX_SYMBOL(__start___ksymtab) = .;                  \
-               *(SORT(___ksymtab+*))                                   \
+               KEEP(*(SORT(___ksymtab+*)))                             \
                VMLINUX_SYMBOL(__stop___ksymtab) = .;                   \
        }                                                               \
                                                                        \
        /* Kernel symbol table: GPL-only symbols */                     \
        __ksymtab_gpl     : AT(ADDR(__ksymtab_gpl) - LOAD_OFFSET) {     \
                VMLINUX_SYMBOL(__start___ksymtab_gpl) = .;              \
-               *(SORT(___ksymtab_gpl+*))                               \
+               KEEP(*(SORT(___ksymtab_gpl+*)))                         \
                VMLINUX_SYMBOL(__stop___ksymtab_gpl) = .;               \
        }                                                               \
                                                                        \
        /* Kernel symbol table: Normal unused symbols */                \
        __ksymtab_unused  : AT(ADDR(__ksymtab_unused) - LOAD_OFFSET) {  \
                VMLINUX_SYMBOL(__start___ksymtab_unused) = .;           \
-               *(SORT(___ksymtab_unused+*))                            \
+               KEEP(*(SORT(___ksymtab_unused+*)))                      \
                VMLINUX_SYMBOL(__stop___ksymtab_unused) = .;            \
        }                                                               \
                                                                        \
        /* Kernel symbol table: GPL-only unused symbols */              \
        __ksymtab_unused_gpl : AT(ADDR(__ksymtab_unused_gpl) - LOAD_OFFSET) { \
                VMLINUX_SYMBOL(__start___ksymtab_unused_gpl) = .;       \
-               *(SORT(___ksymtab_unused_gpl+*))                        \
+               KEEP(*(SORT(___ksymtab_unused_gpl+*)))                  \
                VMLINUX_SYMBOL(__stop___ksymtab_unused_gpl) = .;        \
        }                                                               \
                                                                        \
        /* Kernel symbol table: GPL-future-only symbols */              \
        __ksymtab_gpl_future : AT(ADDR(__ksymtab_gpl_future) - LOAD_OFFSET) { \
                VMLINUX_SYMBOL(__start___ksymtab_gpl_future) = .;       \
-               *(SORT(___ksymtab_gpl_future+*))                        \
+               KEEP(*(SORT(___ksymtab_gpl_future+*)))                  \
                VMLINUX_SYMBOL(__stop___ksymtab_gpl_future) = .;        \
        }                                                               \
                                                                        \
        /* Kernel symbol table: Normal symbols */                       \
        __kcrctab         : AT(ADDR(__kcrctab) - LOAD_OFFSET) {         \
                VMLINUX_SYMBOL(__start___kcrctab) = .;                  \
-               *(SORT(___kcrctab+*))                                   \
+               KEEP(*(SORT(___kcrctab+*)))                             \
                VMLINUX_SYMBOL(__stop___kcrctab) = .;                   \
        }                                                               \
                                                                        \
        /* Kernel symbol table: GPL-only symbols */                     \
        __kcrctab_gpl     : AT(ADDR(__kcrctab_gpl) - LOAD_OFFSET) {     \
                VMLINUX_SYMBOL(__start___kcrctab_gpl) = .;              \
-               *(SORT(___kcrctab_gpl+*))                               \
+               KEEP(*(SORT(___kcrctab_gpl+*)))                         \
                VMLINUX_SYMBOL(__stop___kcrctab_gpl) = .;               \
        }                                                               \
                                                                        \
        /* Kernel symbol table: Normal unused symbols */                \
        __kcrctab_unused  : AT(ADDR(__kcrctab_unused) - LOAD_OFFSET) {  \
                VMLINUX_SYMBOL(__start___kcrctab_unused) = .;           \
-               *(SORT(___kcrctab_unused+*))                            \
+               KEEP(*(SORT(___kcrctab_unused+*)))                      \
                VMLINUX_SYMBOL(__stop___kcrctab_unused) = .;            \
        }                                                               \
                                                                        \
        /* Kernel symbol table: GPL-only unused symbols */              \
        __kcrctab_unused_gpl : AT(ADDR(__kcrctab_unused_gpl) - LOAD_OFFSET) { \
                VMLINUX_SYMBOL(__start___kcrctab_unused_gpl) = .;       \
-               *(SORT(___kcrctab_unused_gpl+*))                        \
+               KEEP(*(SORT(___kcrctab_unused_gpl+*)))                  \
                VMLINUX_SYMBOL(__stop___kcrctab_unused_gpl) = .;        \
        }                                                               \
                                                                        \
        /* Kernel symbol table: GPL-future-only symbols */              \
        __kcrctab_gpl_future : AT(ADDR(__kcrctab_gpl_future) - LOAD_OFFSET) { \
                VMLINUX_SYMBOL(__start___kcrctab_gpl_future) = .;       \
-               *(SORT(___kcrctab_gpl_future+*))                        \
+               KEEP(*(SORT(___kcrctab_gpl_future+*)))                  \
                VMLINUX_SYMBOL(__stop___kcrctab_gpl_future) = .;        \
        }                                                               \
                                                                        \
        /* Kernel symbol table: strings */                              \
         __ksymtab_strings : AT(ADDR(__ksymtab_strings) - LOAD_OFFSET) {        \
-               *(__ksymtab_strings)                                    \
+               KEEP(*(__ksymtab_strings))                              \
        }                                                               \
                                                                        \
        /* __*init sections */                                          \
 #define SECURITY_INIT                                                  \
        .security_initcall.init : AT(ADDR(.security_initcall.init) - LOAD_OFFSET) { \
                VMLINUX_SYMBOL(__security_initcall_start) = .;          \
-               *(.security_initcall.init)                              \
+               KEEP(*(.security_initcall.init))                        \
                VMLINUX_SYMBOL(__security_initcall_end) = .;            \
        }
 
 /* .text section. Map to function alignment to avoid address changes
- * during second ld run in second ld pass when generating System.map */
+ * during second ld run in second ld pass when generating System.map
+ * LD_DEAD_CODE_DATA_ELIMINATION option enables -ffunction-sections generates
+ * .text.identifier which needs to be pulled in with .text , but some
+ * architectures define .text.foo which is not intended to be pulled in here.
+ * Those enabling LD_DEAD_CODE_DATA_ELIMINATION must ensure they don't have
+ * conflicting section names, and must pull in .text.[0-9a-zA-Z_]* */
 #define TEXT_TEXT                                                      \
                ALIGN_FUNCTION();                                       \
                *(.text.hot .text .text.fixup .text.unlikely)           \
 
 /* init and exit section handling */
 #define INIT_DATA                                                      \
+       KEEP(*(SORT(___kentry+*)))                                      \
        *(.init.data)                                                   \
        MEM_DISCARD(init.data)                                          \
        KERNEL_CTORS()                                                  \
                BSS_FIRST_SECTIONS                                      \
                *(.bss..page_aligned)                                   \
                *(.dynbss)                                              \
-               *(.bss)                                                 \
+               *(.bss .bss.[0-9a-zA-Z_]*)                              \
                *(COMMON)                                               \
        }
 
 
 #define INIT_CALLS_LEVEL(level)                                                \
                VMLINUX_SYMBOL(__initcall##level##_start) = .;          \
-               *(.initcall##level##.init)                              \
-               *(.initcall##level##s.init)                             \
+               KEEP(*(.initcall##level##.init))                        \
+               KEEP(*(.initcall##level##s.init))                       \
 
 #define INIT_CALLS                                                     \
                VMLINUX_SYMBOL(__initcall_start) = .;                   \
-               *(.initcallearly.init)                                  \
+               KEEP(*(.initcallearly.init))                            \
                INIT_CALLS_LEVEL(0)                                     \
                INIT_CALLS_LEVEL(1)                                     \
                INIT_CALLS_LEVEL(2)                                     \
 
 #define CON_INITCALL                                                   \
                VMLINUX_SYMBOL(__con_initcall_start) = .;               \
-               *(.con_initcall.init)                                   \
+               KEEP(*(.con_initcall.init))                             \
                VMLINUX_SYMBOL(__con_initcall_end) = .;
 
 #define SECURITY_INITCALL                                              \
                VMLINUX_SYMBOL(__security_initcall_start) = .;          \
-               *(.security_initcall.init)                              \
+               KEEP(*(.security_initcall.init))                        \
                VMLINUX_SYMBOL(__security_initcall_end) = .;
 
 #ifdef CONFIG_BLK_DEV_INITRD
 #define INIT_RAM_FS                                                    \
        . = ALIGN(4);                                                   \
        VMLINUX_SYMBOL(__initramfs_start) = .;                          \
-       *(.init.ramfs)                                                  \
+       KEEP(*(.init.ramfs))                                            \
        . = ALIGN(8);                                                   \
-       *(.init.ramfs.info)
+       KEEP(*(.init.ramfs.info))
 #else
 #define INIT_RAM_FS
 #endif
index 43cf193e54d666be087c5962ac82401f0db3782e..8b4dc62470ffae3df724e7efacd67553220a7d3f 100644 (file)
@@ -47,8 +47,14 @@ struct drm_crtc;
  * @src_h: height of visible portion of plane (in 16.16)
  * @rotation: rotation of the plane
  * @zpos: priority of the given plane on crtc (optional)
+ *     Note that multiple active planes on the same crtc can have an identical
+ *     zpos value. The rule to solving the conflict is to compare the plane
+ *     object IDs; the plane with a higher ID must be stacked on top of a
+ *     plane with a lower ID.
  * @normalized_zpos: normalized value of zpos: unique, range from 0 to N-1
- *     where N is the number of active planes for given crtc
+ *     where N is the number of active planes for given crtc. Note that
+ *     the driver must call drm_atomic_normalize_zpos() to update this before
+ *     it can be trusted.
  * @src: clipped source coordinates of the plane (in 16.16)
  * @dst: clipped destination coordinates of the plane
  * @visible: visibility of the plane
diff --git a/include/dt-bindings/net/mdio.h b/include/dt-bindings/net/mdio.h
new file mode 100644 (file)
index 0000000..99c6d90
--- /dev/null
@@ -0,0 +1,19 @@
+/*
+ * This header provides generic constants for ethernet MDIO bindings
+ */
+
+#ifndef _DT_BINDINGS_NET_MDIO_H
+#define _DT_BINDINGS_NET_MDIO_H
+
+/*
+ * EEE capability Advertisement
+ */
+
+#define MDIO_EEE_100TX         0x0002  /* 100TX EEE cap */
+#define MDIO_EEE_1000T         0x0004  /* 1000T EEE cap */
+#define MDIO_EEE_10GT          0x0008  /* 10GT EEE cap */
+#define MDIO_EEE_1000KX                0x0010  /* 1000KX EEE cap */
+#define MDIO_EEE_10GKX4                0x0020  /* 10G KX4 EEE cap */
+#define MDIO_EEE_10GKR         0x0040  /* 10G KR EEE cap */
+
+#endif
index 94afcb2c384cf2d7d184742e870c0becfeffe312..61a3d90f32b338a030c3a064b50c403a48464293 100644 (file)
@@ -326,6 +326,7 @@ struct pci_dev;
 int acpi_pci_irq_enable (struct pci_dev *dev);
 void acpi_penalize_isa_irq(int irq, int active);
 bool acpi_isa_irq_available(int irq);
+void acpi_penalize_sci_irq(int irq, int trigger, int polarity);
 void acpi_pci_irq_disable (struct pci_dev *dev);
 
 extern int ec_read(u8 addr, u8 *val);
@@ -554,7 +555,8 @@ int acpi_device_uevent_modalias(struct device *, struct kobj_uevent_env *);
 int acpi_device_modalias(struct device *, char *, int);
 void acpi_walk_dep_device_list(acpi_handle handle);
 
-struct platform_device *acpi_create_platform_device(struct acpi_device *);
+struct platform_device *acpi_create_platform_device(struct acpi_device *,
+                                                   struct property_entry *);
 #define ACPI_PTR(_ptr) (_ptr)
 
 static inline void acpi_device_set_enumerated(struct acpi_device *adev)
@@ -946,9 +948,17 @@ struct acpi_reference_args {
 #ifdef CONFIG_ACPI
 int acpi_dev_get_property(struct acpi_device *adev, const char *name,
                          acpi_object_type type, const union acpi_object **obj);
-int acpi_node_get_property_reference(struct fwnode_handle *fwnode,
-                                    const char *name, size_t index,
-                                    struct acpi_reference_args *args);
+int __acpi_node_get_property_reference(struct fwnode_handle *fwnode,
+                               const char *name, size_t index, size_t num_args,
+                               struct acpi_reference_args *args);
+
+static inline int acpi_node_get_property_reference(struct fwnode_handle *fwnode,
+                               const char *name, size_t index,
+                               struct acpi_reference_args *args)
+{
+       return __acpi_node_get_property_reference(fwnode, name, index,
+               MAX_ACPI_REFERENCE_ARGS, args);
+}
 
 int acpi_node_prop_get(struct fwnode_handle *fwnode, const char *propname,
                       void **valptr);
@@ -1024,6 +1034,14 @@ static inline int acpi_dev_get_property(struct acpi_device *adev,
        return -ENXIO;
 }
 
+static inline int
+__acpi_node_get_property_reference(struct fwnode_handle *fwnode,
+                               const char *name, size_t index, size_t num_args,
+                               struct acpi_reference_args *args)
+{
+       return -ENXIO;
+}
+
 static inline int acpi_node_get_property_reference(struct fwnode_handle *fwnode,
                                const char *name, size_t index,
                                struct acpi_reference_args *args)
index adbc812c009b575d1006e494d390c18ac8c7f0b0..fdb180367ba134aaa8b54e372c44c8680161fd2f 100644 (file)
@@ -105,6 +105,7 @@ enum {
        ATA_ID_CFA_KEY_MGMT     = 162,
        ATA_ID_CFA_MODES        = 163,
        ATA_ID_DATA_SET_MGMT    = 169,
+       ATA_ID_SCT_CMD_XPORT    = 206,
        ATA_ID_ROT_SPEED        = 217,
        ATA_ID_PIO4             = (1 << 1),
 
@@ -788,6 +789,48 @@ static inline bool ata_id_sense_reporting_enabled(const u16 *id)
        return id[ATA_ID_COMMAND_SET_4] & (1 << 6);
 }
 
+/**
+ *
+ * Word: 206 - SCT Command Transport
+ *    15:12 - Vendor Specific
+ *     11:6 - Reserved
+ *        5 - SCT Command Transport Data Tables supported
+ *        4 - SCT Command Transport Features Control supported
+ *        3 - SCT Command Transport Error Recovery Control supported
+ *        2 - SCT Command Transport Write Same supported
+ *        1 - SCT Command Transport Long Sector Access supported
+ *        0 - SCT Command Transport supported
+ */
+static inline bool ata_id_sct_data_tables(const u16 *id)
+{
+       return id[ATA_ID_SCT_CMD_XPORT] & (1 << 5) ? true : false;
+}
+
+static inline bool ata_id_sct_features_ctrl(const u16 *id)
+{
+       return id[ATA_ID_SCT_CMD_XPORT] & (1 << 4) ? true : false;
+}
+
+static inline bool ata_id_sct_error_recovery_ctrl(const u16 *id)
+{
+       return id[ATA_ID_SCT_CMD_XPORT] & (1 << 3) ? true : false;
+}
+
+static inline bool ata_id_sct_write_same(const u16 *id)
+{
+       return id[ATA_ID_SCT_CMD_XPORT] & (1 << 2) ? true : false;
+}
+
+static inline bool ata_id_sct_long_sector_access(const u16 *id)
+{
+       return id[ATA_ID_SCT_CMD_XPORT] & (1 << 1) ? true : false;
+}
+
+static inline bool ata_id_sct_supported(const u16 *id)
+{
+       return id[ATA_ID_SCT_CMD_XPORT] & (1 << 0) ? true : false;
+}
+
 /**
  *     ata_id_major_version    -       get ATA level of drive
  *     @id: Identify data
@@ -1071,32 +1114,6 @@ static inline void ata_id_to_hd_driveid(u16 *id)
 #endif
 }
 
-/*
- * Write LBA Range Entries to the buffer that will cover the extent from
- * sector to sector + count.  This is used for TRIM and for ADD LBA(S)
- * TO NV CACHE PINNED SET.
- */
-static inline unsigned ata_set_lba_range_entries(void *_buffer,
-               unsigned num, u64 sector, unsigned long count)
-{
-       __le64 *buffer = _buffer;
-       unsigned i = 0, used_bytes;
-
-       while (i < num) {
-               u64 entry = sector |
-                       ((u64)(count > 0xffff ? 0xffff : count) << 48);
-               buffer[i++] = __cpu_to_le64(entry);
-               if (count <= 0xffff)
-                       break;
-               count -= 0xffff;
-               sector += 0xffff;
-       }
-
-       used_bytes = ALIGN(i * 8, 512);
-       memset(buffer + i, 0, used_bytes - i * 8);
-       return used_bytes;
-}
-
 static inline bool ata_ok(u8 status)
 {
        return ((status & (ATA_BUSY | ATA_DRDY | ATA_DF | ATA_DRQ | ATA_ERR))
index cbdbf34de5b607e799bd9739e8815dff4037c76c..3bf5d33800ab61957cba76fcd0aec1815493f440 100644 (file)
@@ -343,16 +343,7 @@ static inline struct blkcg *cpd_to_blkcg(struct blkcg_policy_data *cpd)
  */
 static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen)
 {
-       char *p;
-
-       p = cgroup_path(blkg->blkcg->css.cgroup, buf, buflen);
-       if (!p) {
-               strncpy(buf, "<unavailable>", buflen);
-               return -ENAMETOOLONG;
-       }
-
-       memmove(buf, p, buf + buflen - p);
-       return 0;
+       return cgroup_path(blkg->blkcg->css.cgroup, buf, buflen);
 }
 
 /**
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
new file mode 100644 (file)
index 0000000..7b6e5d1
--- /dev/null
@@ -0,0 +1,92 @@
+#ifndef _BPF_CGROUP_H
+#define _BPF_CGROUP_H
+
+#include <linux/jump_label.h>
+#include <uapi/linux/bpf.h>
+
+struct sock;
+struct cgroup;
+struct sk_buff;
+
+#ifdef CONFIG_CGROUP_BPF
+
+extern struct static_key_false cgroup_bpf_enabled_key;
+#define cgroup_bpf_enabled static_branch_unlikely(&cgroup_bpf_enabled_key)
+
+struct cgroup_bpf {
+       /*
+        * Store two sets of bpf_prog pointers, one for programs that are
+        * pinned directly to this cgroup, and one for those that are effective
+        * when this cgroup is accessed.
+        */
+       struct bpf_prog *prog[MAX_BPF_ATTACH_TYPE];
+       struct bpf_prog *effective[MAX_BPF_ATTACH_TYPE];
+};
+
+void cgroup_bpf_put(struct cgroup *cgrp);
+void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent);
+
+void __cgroup_bpf_update(struct cgroup *cgrp,
+                        struct cgroup *parent,
+                        struct bpf_prog *prog,
+                        enum bpf_attach_type type);
+
+/* Wrapper for __cgroup_bpf_update() protected by cgroup_mutex */
+void cgroup_bpf_update(struct cgroup *cgrp,
+                      struct bpf_prog *prog,
+                      enum bpf_attach_type type);
+
+int __cgroup_bpf_run_filter_skb(struct sock *sk,
+                               struct sk_buff *skb,
+                               enum bpf_attach_type type);
+
+int __cgroup_bpf_run_filter_sk(struct sock *sk,
+                              enum bpf_attach_type type);
+
+/* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */
+#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb)                            \
+({                                                                           \
+       int __ret = 0;                                                        \
+       if (cgroup_bpf_enabled)                                               \
+               __ret = __cgroup_bpf_run_filter_skb(sk, skb,                  \
+                                                   BPF_CGROUP_INET_INGRESS); \
+                                                                             \
+       __ret;                                                                \
+})
+
+#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb)                              \
+({                                                                            \
+       int __ret = 0;                                                         \
+       if (cgroup_bpf_enabled && sk && sk == skb->sk) {                       \
+               typeof(sk) __sk = sk_to_full_sk(sk);                           \
+               if (sk_fullsock(__sk))                                         \
+                       __ret = __cgroup_bpf_run_filter_skb(__sk, skb,         \
+                                                     BPF_CGROUP_INET_EGRESS); \
+       }                                                                      \
+       __ret;                                                                 \
+})
+
+#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk)                                     \
+({                                                                            \
+       int __ret = 0;                                                         \
+       if (cgroup_bpf_enabled && sk) {                                        \
+               __ret = __cgroup_bpf_run_filter_sk(sk,                         \
+                                                BPF_CGROUP_INET_SOCK_CREATE); \
+       }                                                                      \
+       __ret;                                                                 \
+})
+
+#else
+
+struct cgroup_bpf {};
+static inline void cgroup_bpf_put(struct cgroup *cgrp) {}
+static inline void cgroup_bpf_inherit(struct cgroup *cgrp,
+                                     struct cgroup *parent) {}
+
+#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; })
+
+#endif /* CONFIG_CGROUP_BPF */
+
+#endif /* _BPF_CGROUP_H */
index edcd96ded8aafaaef9d37dcdb39df124cb1eb6c7..69d0a7f12a3bd516ef295c72153f7abcea56c0f3 100644 (file)
@@ -233,13 +233,14 @@ void bpf_register_map_type(struct bpf_map_type_list *tl);
 
 struct bpf_prog *bpf_prog_get(u32 ufd);
 struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type);
-struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i);
-struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog);
+struct bpf_prog * __must_check bpf_prog_add(struct bpf_prog *prog, int i);
+void bpf_prog_sub(struct bpf_prog *prog, int i);
+struct bpf_prog * __must_check bpf_prog_inc(struct bpf_prog *prog);
 void bpf_prog_put(struct bpf_prog *prog);
 
 struct bpf_map *bpf_map_get_with_uref(u32 ufd);
 struct bpf_map *__bpf_map_get(struct fd f);
-struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref);
+struct bpf_map * __must_check bpf_map_inc(struct bpf_map *map, bool uref);
 void bpf_map_put_with_uref(struct bpf_map *map);
 void bpf_map_put(struct bpf_map *map);
 int bpf_map_precharge_memlock(u32 pages);
@@ -298,15 +299,21 @@ static inline struct bpf_prog *bpf_prog_get_type(u32 ufd,
 {
        return ERR_PTR(-EOPNOTSUPP);
 }
-static inline struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i)
+static inline struct bpf_prog * __must_check bpf_prog_add(struct bpf_prog *prog,
+                                                         int i)
 {
        return ERR_PTR(-EOPNOTSUPP);
 }
 
+static inline void bpf_prog_sub(struct bpf_prog *prog, int i)
+{
+}
+
 static inline void bpf_prog_put(struct bpf_prog *prog)
 {
 }
-static inline struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog)
+
+static inline struct bpf_prog * __must_check bpf_prog_inc(struct bpf_prog *prog)
 {
        return ERR_PTR(-EOPNOTSUPP);
 }
index ac5b393ee6b276084d06796004ad15c78839fc3c..7453c12815317575c96f271dedd7e76b2c7f2f10 100644 (file)
@@ -14,7 +14,7 @@
   * are obviously wrong for any sort of memory access.
   */
 #define BPF_REGISTER_MAX_RANGE (1024 * 1024 * 1024)
-#define BPF_REGISTER_MIN_RANGE -(1024 * 1024 * 1024)
+#define BPF_REGISTER_MIN_RANGE -1
 
 struct bpf_reg_state {
        enum bpf_reg_type type;
@@ -22,7 +22,8 @@ struct bpf_reg_state {
         * Used to determine if any memory access using this register will
         * result in a bad access.
         */
-       u64 min_value, max_value;
+       s64 min_value;
+       u64 max_value;
        u32 id;
        union {
                /* valid when type == CONST_IMM | PTR_TO_STACK | UNKNOWN_VALUE */
index 60def78c4e1221ea75756fd34c07a66abf9fbc16..4f7d8be9ddbf8de2fc0e657764e9dfe515852e3e 100644 (file)
@@ -13,6 +13,7 @@
 #define PHY_ID_BCM5241                 0x0143bc30
 #define PHY_ID_BCMAC131                        0x0143bc70
 #define PHY_ID_BCM5481                 0x0143bca0
+#define PHY_ID_BCM54810                        0x03625d00
 #define PHY_ID_BCM5482                 0x0143bcb0
 #define PHY_ID_BCM5411                 0x00206070
 #define PHY_ID_BCM5421                 0x002060e0
@@ -56,6 +57,7 @@
 #define PHY_BRCM_EXT_IBND_TX_ENABLE    0x00002000
 #define PHY_BRCM_CLEAR_RGMII_MODE      0x00004000
 #define PHY_BRCM_DIS_TXCRXC_NOENRGY    0x00008000
+
 /* Broadcom BCM7xxx specific workarounds */
 #define PHY_BRCM_7XXX_REV(x)           (((x) >> 8) & 0xff)
 #define PHY_BRCM_7XXX_PATCH(x)         ((x) & 0xff)
 #define MII_BCM54XX_AUXCTL_MISC_FORCE_AMDIX    0x0200
 #define MII_BCM54XX_AUXCTL_MISC_RDSEL_MISC     0x7000
 #define MII_BCM54XX_AUXCTL_SHDWSEL_MISC        0x0007
+#define MII_BCM54XX_AUXCTL_SHDWSEL_READ_SHIFT  12
+#define MII_BCM54XX_AUXCTL_SHDWSEL_MISC_RGMII_SKEW_EN  (1 << 8)
+#define MII_BCM54XX_AUXCTL_SHDWSEL_MISC_WIRESPEED_EN   (1 << 4)
 
 #define MII_BCM54XX_AUXCTL_SHDWSEL_MASK        0x0007
 
 #define BCM_LED_SRC_INTR       0x6
 #define BCM_LED_SRC_QUALITY    0x7
 #define BCM_LED_SRC_RCVLED     0x8
+#define BCM_LED_SRC_WIRESPEED  0x9
 #define BCM_LED_SRC_MULTICOLOR1        0xa
 #define BCM_LED_SRC_OPENSHORT  0xb
 #define BCM_LED_SRC_OFF                0xe     /* Tied high */
  * Shadow values go into bits [14:10] of register 0x1c to select a shadow
  * register to access.
  */
+
+/* 00100: Reserved control register 2 */
+#define BCM54XX_SHD_SCR2               0x04
+#define  BCM54XX_SHD_SCR2_WSPD_RTRY_DIS        0x100
+#define  BCM54XX_SHD_SCR2_WSPD_RTRY_LMT_SHIFT  2
+#define  BCM54XX_SHD_SCR2_WSPD_RTRY_LMT_OFFSET 2
+#define  BCM54XX_SHD_SCR2_WSPD_RTRY_LMT_MASK   0x7
+
 /* 00101: Spare Control Register 3 */
 #define BCM54XX_SHD_SCR3               0x05
 #define  BCM54XX_SHD_SCR3_DEF_CLK125   0x0001
 #define BCM5482_SSD_SGMII_SLAVE_EN     0x0002  /* Slave mode enable */
 #define BCM5482_SSD_SGMII_SLAVE_AD     0x0001  /* Slave auto-detection */
 
+/* BCM54810 Registers */
+#define BCM54810_EXP_BROADREACH_LRE_MISC_CTL   (MII_BCM54XX_EXP_SEL_ER + 0x90)
+#define BCM54810_EXP_BROADREACH_LRE_MISC_CTL_EN        (1 << 0)
+#define BCM54810_SHD_CLK_CTL                   0x3
+#define BCM54810_SHD_CLK_CTL_GTXCLK_EN         (1 << 9)
+
 
 /*****************************************************************************/
 /* Fast Ethernet Transceiver definitions. */
 #define LPI_FEATURE_EN_DIG1000X                0x4000
 
 /* Core register definitions*/
+#define MII_BRCM_CORE_BASE12   0x12
+#define MII_BRCM_CORE_BASE13   0x13
+#define MII_BRCM_CORE_BASE14   0x14
 #define MII_BRCM_CORE_BASE1E   0x1E
 #define MII_BRCM_CORE_EXPB0    0xB0
 #define MII_BRCM_CORE_EXPB1    0xB1
index 96337b15a60d59cd12e342b101d70efc1cd380d1..a8e66344bacc225642acc872c614b3017c314218 100644 (file)
@@ -258,6 +258,8 @@ struct ceph_watch_item {
        struct ceph_entity_addr addr;
 };
 
+#define CEPH_LINGER_ID_START   0xffff000000000000ULL
+
 struct ceph_osd_client {
        struct ceph_client     *client;
 
index 5b17de62c962cd73d625427c2230d66e08cbcb4b..861b4677fc5b41134f96da33710a79735b827fe0 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/percpu-refcount.h>
 #include <linux/percpu-rwsem.h>
 #include <linux/workqueue.h>
+#include <linux/bpf-cgroup.h>
 
 #ifdef CONFIG_CGROUPS
 
@@ -300,6 +301,9 @@ struct cgroup {
        /* used to schedule release agent */
        struct work_struct release_agent_work;
 
+       /* used to store eBPF programs */
+       struct cgroup_bpf bpf;
+
        /* ids of the ancestors at each level including self */
        int ancestor_ids[];
 };
index 440a72164a11054d63f7a186b4f8ebff705e2e05..c83c23f0577bd908df08298dcbde74a7961dcf8c 100644 (file)
@@ -97,7 +97,7 @@ int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
 int cgroup_rm_cftypes(struct cftype *cfts);
 void cgroup_file_notify(struct cgroup_file *cfile);
 
-char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen);
+int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen);
 int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry);
 int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
                     struct pid *pid, struct task_struct *tsk);
@@ -555,8 +555,7 @@ static inline int cgroup_name(struct cgroup *cgrp, char *buf, size_t buflen)
        return kernfs_name(cgrp->kn, buf, buflen);
 }
 
-static inline char * __must_check cgroup_path(struct cgroup *cgrp, char *buf,
-                                             size_t buflen)
+static inline int cgroup_path(struct cgroup *cgrp, char *buf, size_t buflen)
 {
        return kernfs_path(cgrp->kn, buf, buflen);
 }
@@ -658,8 +657,8 @@ struct cgroup_namespace *copy_cgroup_ns(unsigned long flags,
                                        struct user_namespace *user_ns,
                                        struct cgroup_namespace *old_ns);
 
-char *cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
-                    struct cgroup_namespace *ns);
+int cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
+                  struct cgroup_namespace *ns);
 
 #else /* !CONFIG_CGROUPS */
 
index af596381fa0fa05862bb836a02deaeea29fb9fa2..a428aec36aceeb22da48e9d3d755d2423e71969b 100644 (file)
@@ -785,7 +785,7 @@ extern struct of_device_id __clk_of_table;
  * routines, one at of_clk_init(), and one at platform device probe
  */
 #define CLK_OF_DECLARE_DRIVER(name, compat, fn) \
-       static void name##_of_clk_init_driver(struct device_node *np)   \
+       static void __init name##_of_clk_init_driver(struct device_node *np) \
        {                                                               \
                of_node_clear_flag(np, OF_POPULATED);                   \
                fn(np);                                                 \
index 573c5a18908fd53970fefea291805c500fd1d7f9..432f5c97e18f4f75fd68b1c1101cf828fb3d4fa0 100644 (file)
 #endif /* GCC_VERSION >= 40300 */
 
 #if GCC_VERSION >= 40500
+
+#ifndef __CHECKER__
+#ifdef LATENT_ENTROPY_PLUGIN
+#define __latent_entropy __attribute__((latent_entropy))
+#endif
+#endif
+
 /*
  * Mark a position in code as unreachable.  This can be used to
  * suppress control flow warnings after asm blocks that transfer
index 668569844d37cef4d51c6c7652d5f34ded2a66d6..cf0fa5d86059b6672773025b625027f6301074c1 100644 (file)
@@ -182,6 +182,29 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
 # define unreachable() do { } while (1)
 #endif
 
+/*
+ * KENTRY - kernel entry point
+ * This can be used to annotate symbols (functions or data) that are used
+ * without their linker symbol being referenced explicitly. For example,
+ * interrupt vector handlers, or functions in the kernel image that are found
+ * programatically.
+ *
+ * Not required for symbols exported with EXPORT_SYMBOL, or initcalls. Those
+ * are handled in their own way (with KEEP() in linker scripts).
+ *
+ * KENTRY can be avoided if the symbols in question are marked as KEEP() in the
+ * linker script. For example an architecture could KEEP() its entire
+ * boot/exception vector code rather than annotate each function and data.
+ */
+#ifndef KENTRY
+# define KENTRY(sym)                                           \
+       extern typeof(sym) sym;                                 \
+       static const unsigned long __kentry_##sym               \
+       __used                                                  \
+       __attribute__((section("___kentry" "+" #sym ), used))   \
+       = (unsigned long)&sym;
+#endif
+
 #ifndef RELOC_HIDE
 # define RELOC_HIDE(ptr, off)                                  \
   ({ unsigned long __ptr;                                      \
@@ -406,6 +429,10 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
 # define __attribute_const__   /* unimplemented */
 #endif
 
+#ifndef __latent_entropy
+# define __latent_entropy
+#endif
+
 /*
  * Tell gcc if a function is cold. The compiler will assume any path
  * directly leading to the call is unlikely.
index 3672809234a728ea9e7779b0456cbd57647d7150..d530c4627e54ef1091be820a78d3616166ca8906 100644 (file)
@@ -173,12 +173,6 @@ static inline void console_sysfs_notify(void)
 #endif
 extern bool console_suspend_enabled;
 
-#ifdef CONFIG_OF
-extern void console_set_by_of(void);
-#else
-static inline void console_set_by_of(void) {}
-#endif
-
 /* Suspend and resume console messages over PM events */
 extern void suspend_console(void);
 extern void resume_console(void);
index 631ba33bbe9fdb2fe0fd3f73cb1ab1773b52f617..32dc0cbd51ca3729bef594f7a84832bc6389e3ea 100644 (file)
@@ -639,19 +639,19 @@ static inline int cpufreq_table_find_index_al(struct cpufreq_policy *policy,
                                              unsigned int target_freq)
 {
        struct cpufreq_frequency_table *table = policy->freq_table;
+       struct cpufreq_frequency_table *pos, *best = table - 1;
        unsigned int freq;
-       int i, best = -1;
 
-       for (i = 0; table[i].frequency != CPUFREQ_TABLE_END; i++) {
-               freq = table[i].frequency;
+       cpufreq_for_each_valid_entry(pos, table) {
+               freq = pos->frequency;
 
                if (freq >= target_freq)
-                       return i;
+                       return pos - table;
 
-               best = i;
+               best = pos;
        }
 
-       return best;
+       return best - table;
 }
 
 /* Find lowest freq at or above target in a table in descending order */
@@ -659,28 +659,28 @@ static inline int cpufreq_table_find_index_dl(struct cpufreq_policy *policy,
                                              unsigned int target_freq)
 {
        struct cpufreq_frequency_table *table = policy->freq_table;
+       struct cpufreq_frequency_table *pos, *best = table - 1;
        unsigned int freq;
-       int i, best = -1;
 
-       for (i = 0; table[i].frequency != CPUFREQ_TABLE_END; i++) {
-               freq = table[i].frequency;
+       cpufreq_for_each_valid_entry(pos, table) {
+               freq = pos->frequency;
 
                if (freq == target_freq)
-                       return i;
+                       return pos - table;
 
                if (freq > target_freq) {
-                       best = i;
+                       best = pos;
                        continue;
                }
 
                /* No freq found above target_freq */
-               if (best == -1)
-                       return i;
+               if (best == table - 1)
+                       return pos - table;
 
-               return best;
+               return best - table;
        }
 
-       return best;
+       return best - table;
 }
 
 /* Works only on sorted freq-tables */
@@ -700,28 +700,28 @@ static inline int cpufreq_table_find_index_ah(struct cpufreq_policy *policy,
                                              unsigned int target_freq)
 {
        struct cpufreq_frequency_table *table = policy->freq_table;
+       struct cpufreq_frequency_table *pos, *best = table - 1;
        unsigned int freq;
-       int i, best = -1;
 
-       for (i = 0; table[i].frequency != CPUFREQ_TABLE_END; i++) {
-               freq = table[i].frequency;
+       cpufreq_for_each_valid_entry(pos, table) {
+               freq = pos->frequency;
 
                if (freq == target_freq)
-                       return i;
+                       return pos - table;
 
                if (freq < target_freq) {
-                       best = i;
+                       best = pos;
                        continue;
                }
 
                /* No freq found below target_freq */
-               if (best == -1)
-                       return i;
+               if (best == table - 1)
+                       return pos - table;
 
-               return best;
+               return best - table;
        }
 
-       return best;
+       return best - table;
 }
 
 /* Find highest freq at or below target in a table in descending order */
@@ -729,19 +729,19 @@ static inline int cpufreq_table_find_index_dh(struct cpufreq_policy *policy,
                                              unsigned int target_freq)
 {
        struct cpufreq_frequency_table *table = policy->freq_table;
+       struct cpufreq_frequency_table *pos, *best = table - 1;
        unsigned int freq;
-       int i, best = -1;
 
-       for (i = 0; table[i].frequency != CPUFREQ_TABLE_END; i++) {
-               freq = table[i].frequency;
+       cpufreq_for_each_valid_entry(pos, table) {
+               freq = pos->frequency;
 
                if (freq <= target_freq)
-                       return i;
+                       return pos - table;
 
-               best = i;
+               best = pos;
        }
 
-       return best;
+       return best - table;
 }
 
 /* Works only on sorted freq-tables */
@@ -761,32 +761,32 @@ static inline int cpufreq_table_find_index_ac(struct cpufreq_policy *policy,
                                              unsigned int target_freq)
 {
        struct cpufreq_frequency_table *table = policy->freq_table;
+       struct cpufreq_frequency_table *pos, *best = table - 1;
        unsigned int freq;
-       int i, best = -1;
 
-       for (i = 0; table[i].frequency != CPUFREQ_TABLE_END; i++) {
-               freq = table[i].frequency;
+       cpufreq_for_each_valid_entry(pos, table) {
+               freq = pos->frequency;
 
                if (freq == target_freq)
-                       return i;
+                       return pos - table;
 
                if (freq < target_freq) {
-                       best = i;
+                       best = pos;
                        continue;
                }
 
                /* No freq found below target_freq */
-               if (best == -1)
-                       return i;
+               if (best == table - 1)
+                       return pos - table;
 
                /* Choose the closest freq */
-               if (target_freq - table[best].frequency > freq - target_freq)
-                       return i;
+               if (target_freq - best->frequency > freq - target_freq)
+                       return pos - table;
 
-               return best;
+               return best - table;
        }
 
-       return best;
+       return best - table;
 }
 
 /* Find closest freq to target in a table in descending order */
@@ -794,32 +794,32 @@ static inline int cpufreq_table_find_index_dc(struct cpufreq_policy *policy,
                                              unsigned int target_freq)
 {
        struct cpufreq_frequency_table *table = policy->freq_table;
+       struct cpufreq_frequency_table *pos, *best = table - 1;
        unsigned int freq;
-       int i, best = -1;
 
-       for (i = 0; table[i].frequency != CPUFREQ_TABLE_END; i++) {
-               freq = table[i].frequency;
+       cpufreq_for_each_valid_entry(pos, table) {
+               freq = pos->frequency;
 
                if (freq == target_freq)
-                       return i;
+                       return pos - table;
 
                if (freq > target_freq) {
-                       best = i;
+                       best = pos;
                        continue;
                }
 
                /* No freq found above target_freq */
-               if (best == -1)
-                       return i;
+               if (best == table - 1)
+                       return pos - table;
 
                /* Choose the closest freq */
-               if (table[best].frequency - target_freq > target_freq - freq)
-                       return i;
+               if (best->frequency - target_freq > target_freq - freq)
+                       return pos - table;
 
-               return best;
+               return best - table;
        }
 
-       return best;
+       return best - table;
 }
 
 /* Works only on sorted freq-tables */
index 9b207a8c5af3cedecc545dbf179464f32af4c4d0..afe641c02dca3320f3bf4255092deafeb77d536f 100644 (file)
@@ -81,6 +81,7 @@ enum cpuhp_state {
        CPUHP_AP_ARM_ARCH_TIMER_STARTING,
        CPUHP_AP_ARM_GLOBAL_TIMER_STARTING,
        CPUHP_AP_DUMMY_TIMER_STARTING,
+       CPUHP_AP_JCORE_TIMER_STARTING,
        CPUHP_AP_EXYNOS4_MCT_TIMER_STARTING,
        CPUHP_AP_ARM_TWD_STARTING,
        CPUHP_AP_METAG_TIMER_STARTING,
index 4d3f0d1aec73b856552f5d5e30b706ef5d2e04ec..bf1907d96097c474685b8c8f6d52a5e70d86769b 100644 (file)
@@ -52,7 +52,8 @@ extern struct srcu_struct debugfs_srcu;
  * Must only be called under the protection established by
  * debugfs_use_file_start().
  */
-static inline const struct file_operations *debugfs_real_fops(struct file *filp)
+static inline const struct file_operations *
+debugfs_real_fops(const struct file *filp)
        __must_hold(&debugfs_srcu)
 {
        /*
index bc41e87a969bfb7f71029538d94be0a1e14ad5c5..a00105cf795e6a5d7e7cfee4169c25cfbd6a1593 100644 (file)
@@ -698,6 +698,25 @@ static inline int devm_add_action_or_reset(struct device *dev,
        return ret;
 }
 
+/**
+ * devm_alloc_percpu - Resource-managed alloc_percpu
+ * @dev: Device to allocate per-cpu memory for
+ * @type: Type to allocate per-cpu memory for
+ *
+ * Managed alloc_percpu. Per-cpu memory allocated with this function is
+ * automatically freed on driver detach.
+ *
+ * RETURNS:
+ * Pointer to allocated memory on success, NULL on failure.
+ */
+#define devm_alloc_percpu(dev, type)      \
+       ((typeof(type) __percpu *)__devm_alloc_percpu((dev), sizeof(type), \
+                                                     __alignof__(type)))
+
+void __percpu *__devm_alloc_percpu(struct device *dev, size_t size,
+                                  size_t align);
+void devm_free_percpu(struct device *dev, void __percpu *pdata);
+
 struct device_dma_parameters {
        /*
         * a low level driver may set these to teach IOMMU code about
index c934d3a96b5e7da7fd7966ddcb34682454100206..2896f93808ae3a8ed962cf1958e72ff2e5f8da3a 100644 (file)
@@ -67,7 +67,7 @@
  *     genl_magic_func.h
  *             generates an entry in the static genl_ops array,
  *             and static register/unregister functions to
- *             genl_register_family_with_ops().
+ *             genl_register_family().
  *
  *     flags and handler:
  *             GENL_op_init( .doit = x, .dumpit = y, .flags = something)
index d7df4922da1d08843253086b144199fd299fd1c5..2a0f61fbc7310e61f5927c31250e208d217c3e26 100644 (file)
@@ -1,5 +1,6 @@
 #ifndef _LINUX_EXPORT_H
 #define _LINUX_EXPORT_H
+
 /*
  * Export symbols from the kernel to modules.  Forked from module.h
  * to reduce the amount of pointless cruft we feed to gcc when only
@@ -42,27 +43,26 @@ extern struct module __this_module;
 #ifdef CONFIG_MODVERSIONS
 /* Mark the CRC weak since genksyms apparently decides not to
  * generate a checksums for some symbols */
-#define __CRC_SYMBOL(sym, sec)                                 \
-       extern __visible void *__crc_##sym __attribute__((weak));               \
-       static const unsigned long __kcrctab_##sym              \
-       __used                                                  \
-       __attribute__((section("___kcrctab" sec "+" #sym), unused))     \
+#define __CRC_SYMBOL(sym, sec)                                         \
+       extern __visible void *__crc_##sym __attribute__((weak));       \
+       static const unsigned long __kcrctab_##sym                      \
+       __used                                                          \
+       __attribute__((section("___kcrctab" sec "+" #sym), used))       \
        = (unsigned long) &__crc_##sym;
 #else
 #define __CRC_SYMBOL(sym, sec)
 #endif
 
 /* For every exported symbol, place a struct in the __ksymtab section */
-#define ___EXPORT_SYMBOL(sym, sec)                             \
-       extern typeof(sym) sym;                                 \
-       __CRC_SYMBOL(sym, sec)                                  \
-       static const char __kstrtab_##sym[]                     \
-       __attribute__((section("__ksymtab_strings"), aligned(1))) \
-       = VMLINUX_SYMBOL_STR(sym);                              \
-       extern const struct kernel_symbol __ksymtab_##sym;      \
-       __visible const struct kernel_symbol __ksymtab_##sym    \
-       __used                                                  \
-       __attribute__((section("___ksymtab" sec "+" #sym), unused))     \
+#define ___EXPORT_SYMBOL(sym, sec)                                     \
+       extern typeof(sym) sym;                                         \
+       __CRC_SYMBOL(sym, sec)                                          \
+       static const char __kstrtab_##sym[]                             \
+       __attribute__((section("__ksymtab_strings"), aligned(1)))       \
+       = VMLINUX_SYMBOL_STR(sym);                                      \
+       static const struct kernel_symbol __ksymtab_##sym               \
+       __used                                                          \
+       __attribute__((section("___ksymtab" sec "+" #sym), used))       \
        = { (unsigned long)&sym, __kstrtab_##sym }
 
 #if defined(__KSYM_DEPS__)
index aca2a6a1d0358f181264877965ff3e2ba882a8a0..6e84b2cae6ad62b529298b662856dc857c3091b6 100644 (file)
@@ -105,7 +105,7 @@ struct files_struct *get_files_struct(struct task_struct *);
 void put_files_struct(struct files_struct *fs);
 void reset_files_struct(struct files_struct *);
 int unshare_files(struct files_struct **);
-struct files_struct *dup_fd(struct files_struct *, int *);
+struct files_struct *dup_fd(struct files_struct *, int *) __latent_entropy;
 void do_close_on_exec(struct files_struct *);
 int iterate_fd(struct files_struct *, unsigned,
                int (*)(const void *, struct file *, unsigned),
index 1f09c521adfe23be913bb5780438bb1ae4ce1cd5..97338134398f678bb96515df98960325fc09f85e 100644 (file)
@@ -408,8 +408,8 @@ struct bpf_prog {
        enum bpf_prog_type      type;           /* Type of BPF program */
        struct bpf_prog_aux     *aux;           /* Auxiliary fields */
        struct sock_fprog_kern  *orig_prog;     /* Original BPF program */
-       unsigned int            (*bpf_func)(const struct sk_buff *skb,
-                                           const struct bpf_insn *filter);
+       unsigned int            (*bpf_func)(const void *ctx,
+                                           const struct bpf_insn *insn);
        /* Instructions for interpreter */
        union {
                struct sock_filter      insns[0];
@@ -438,7 +438,7 @@ struct xdp_buff {
 };
 
 /* compute the linear packet data range [data, data_end) which
- * will be accessed by cls_bpf and act_bpf programs
+ * will be accessed by cls_bpf, act_bpf and lwt programs
  */
 static inline void bpf_compute_data_end(struct sk_buff *skb)
 {
@@ -498,16 +498,16 @@ static inline u32 bpf_prog_run_clear_cb(const struct bpf_prog *prog,
        return BPF_PROG_RUN(prog, skb);
 }
 
-static inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog,
-                                  struct xdp_buff *xdp)
+static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog,
+                                           struct xdp_buff *xdp)
 {
-       u32 ret;
-
-       rcu_read_lock();
-       ret = BPF_PROG_RUN(prog, (void *)xdp);
-       rcu_read_unlock();
-
-       return ret;
+       /* Caller needs to hold rcu_read_lock() (!), otherwise program
+        * can be released while still running, or map elements could be
+        * freed early while still having concurrent users. XDP fastpath
+        * already takes rcu_read_lock() when fetching the program, so
+        * it's not necessary here anymore.
+        */
+       return BPF_PROG_RUN(prog, xdp);
 }
 
 static inline unsigned int bpf_prog_size(unsigned int proglen)
index c46d2aa16d81221c240ec46cf5c7fec6dbfe4bdd..1d18af0345543aaecbfc46a04230735c2b0b0cf7 100644 (file)
@@ -106,8 +106,9 @@ static inline void frontswap_invalidate_area(unsigned type)
 
 static inline void frontswap_init(unsigned type, unsigned long *map)
 {
-       if (frontswap_enabled())
-               __frontswap_init(type, map);
+#ifdef CONFIG_FRONTSWAP
+       __frontswap_init(type, map);
+#endif
 }
 
 #endif /* _LINUX_FRONTSWAP_H */
index bc65d5918140d3ab1d077eb5ed8f4655a8586c25..dc0478c07b2abd3887d7f5b1b84818a4ee24162e 100644 (file)
@@ -321,6 +321,7 @@ struct writeback_control;
 #define IOCB_HIPRI             (1 << 3)
 #define IOCB_DSYNC             (1 << 4)
 #define IOCB_SYNC              (1 << 5)
+#define IOCB_WRITE             (1 << 6)
 
 struct kiocb {
        struct file             *ki_filp;
@@ -1709,7 +1710,6 @@ struct file_operations {
        int (*flush) (struct file *, fl_owner_t id);
        int (*release) (struct inode *, struct file *);
        int (*fsync) (struct file *, loff_t, loff_t, int datasync);
-       int (*aio_fsync) (struct kiocb *, int datasync);
        int (*fasync) (int, struct file *, int);
        int (*lock) (struct file *, int, struct file_lock *);
        ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
@@ -2934,6 +2934,7 @@ extern int vfs_stat(const char __user *, struct kstat *);
 extern int vfs_lstat(const char __user *, struct kstat *);
 extern int vfs_fstat(unsigned int, struct kstat *);
 extern int vfs_fstatat(int , const char __user *, struct kstat *, int);
+extern const char *vfs_get_link(struct dentry *, struct delayed_call *);
 
 extern int __generic_block_fiemap(struct inode *inode,
                                  struct fiemap_extent_info *fieinfo,
index 1dbf52f9c24b88a0f4a299bae5ec3bfdcea1e566..e0341af6950e2116a43b3b0281f57fea8099c06f 100644 (file)
@@ -437,7 +437,7 @@ extern void disk_flush_events(struct gendisk *disk, unsigned int mask);
 extern unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask);
 
 /* drivers/char/random.c */
-extern void add_disk_randomness(struct gendisk *disk);
+extern void add_disk_randomness(struct gendisk *disk) __latent_entropy;
 extern void rand_initialize_disk(struct gendisk *disk);
 
 static inline sector_t get_start_sect(struct block_device *bdev)
index 667c31101b8b91f0b1d17a99a08edd9cf8d29d63..377257d8f7e3557fe1f98eae3a5f906eb68bac44 100644 (file)
@@ -259,16 +259,7 @@ static struct genl_ops ZZZ_genl_ops[] __read_mostly = {
  *                                                                     {{{2
  */
 #define ZZZ_genl_family                CONCAT_(GENL_MAGIC_FAMILY, _genl_family)
-static struct genl_family ZZZ_genl_family __read_mostly = {
-       .id = GENL_ID_GENERATE,
-       .name = __stringify(GENL_MAGIC_FAMILY),
-       .version = GENL_MAGIC_VERSION,
-#ifdef GENL_MAGIC_FAMILY_HDRSZ
-       .hdrsize = NLA_ALIGN(GENL_MAGIC_FAMILY_HDRSZ),
-#endif
-       .maxattr = ARRAY_SIZE(drbd_tla_nl_policy)-1,
-};
-
+static struct genl_family ZZZ_genl_family;
 /*
  * Magic: define multicast groups
  * Magic: define multicast group registration helper
@@ -302,11 +293,23 @@ static int CONCAT_(GENL_MAGIC_FAMILY, _genl_multicast_ ## group)( \
 #undef GENL_mc_group
 #define GENL_mc_group(group)
 
+static struct genl_family ZZZ_genl_family __ro_after_init = {
+       .name = __stringify(GENL_MAGIC_FAMILY),
+       .version = GENL_MAGIC_VERSION,
+#ifdef GENL_MAGIC_FAMILY_HDRSZ
+       .hdrsize = NLA_ALIGN(GENL_MAGIC_FAMILY_HDRSZ),
+#endif
+       .maxattr = ARRAY_SIZE(drbd_tla_nl_policy)-1,
+       .ops = ZZZ_genl_ops,
+       .n_ops = ARRAY_SIZE(ZZZ_genl_ops),
+       .mcgrps = ZZZ_genl_mcgrps,
+       .n_mcgrps = ARRAY_SIZE(ZZZ_genl_mcgrps),
+       .module = THIS_MODULE,
+};
+
 int CONCAT_(GENL_MAGIC_FAMILY, _genl_register)(void)
 {
-       return genl_register_family_with_ops_groups(&ZZZ_genl_family,   \
-                                                   ZZZ_genl_ops,       \
-                                                   ZZZ_genl_mcgrps);
+       return genl_register_family(&ZZZ_genl_family);
 }
 
 void CONCAT_(GENL_MAGIC_FAMILY, _genl_unregister)(void)
index 9b9f65d9987393d456911f41eacb4bdfa9fe0284..e35e6de633b9a7bc2a080d6b3596aa16e8c8582f 100644 (file)
@@ -22,7 +22,7 @@ extern int mincore_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
                        unsigned char *vec);
 extern bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
                         unsigned long new_addr, unsigned long old_end,
-                        pmd_t *old_pmd, pmd_t *new_pmd);
+                        pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush);
 extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
                        unsigned long addr, pgprot_t newprot,
                        int prot_numa);
index 6824556d37ed2cd4fb60e7aacd74f594986880c3..cd184bdca58fdc28e0ae59432445248c588fd2fc 100644 (file)
@@ -1169,13 +1169,6 @@ int __must_check __vmbus_driver_register(struct hv_driver *hv_driver,
                                         const char *mod_name);
 void vmbus_driver_unregister(struct hv_driver *hv_driver);
 
-static inline const char *vmbus_dev_name(const struct hv_device *device_obj)
-{
-       const struct kobject *kobj = &device_obj->device.kobj;
-
-       return kobj->name;
-}
-
 void vmbus_hvsock_device_unregister(struct vmbus_channel *channel);
 
 int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj,
index a80516fd65c8c12f658346be54fee320b5c513b7..fe849329511a7b4e46ddb6d61441b8675131eb1b 100644 (file)
@@ -1576,6 +1576,9 @@ struct ieee80211_vht_operation {
 #define WLAN_AUTH_SHARED_KEY 1
 #define WLAN_AUTH_FT 2
 #define WLAN_AUTH_SAE 3
+#define WLAN_AUTH_FILS_SK 4
+#define WLAN_AUTH_FILS_SK_PFS 5
+#define WLAN_AUTH_FILS_PK 6
 #define WLAN_AUTH_LEAP 128
 
 #define WLAN_AUTH_CHALLENGE_LEN 128
@@ -1960,6 +1963,26 @@ enum ieee80211_eid {
 
        WLAN_EID_VENDOR_SPECIFIC = 221,
        WLAN_EID_QOS_PARAMETER = 222,
+       WLAN_EID_CAG_NUMBER = 237,
+       WLAN_EID_AP_CSN = 239,
+       WLAN_EID_FILS_INDICATION = 240,
+       WLAN_EID_DILS = 241,
+       WLAN_EID_FRAGMENT = 242,
+       WLAN_EID_EXTENSION = 255
+};
+
+/* Element ID Extensions for Element ID 255 */
+enum ieee80211_eid_ext {
+       WLAN_EID_EXT_ASSOC_DELAY_INFO = 1,
+       WLAN_EID_EXT_FILS_REQ_PARAMS = 2,
+       WLAN_EID_EXT_FILS_KEY_CONFIRM = 3,
+       WLAN_EID_EXT_FILS_SESSION = 4,
+       WLAN_EID_EXT_FILS_HLP_CONTAINER = 5,
+       WLAN_EID_EXT_FILS_IP_ADDR_ASSIGN = 6,
+       WLAN_EID_EXT_KEY_DELIVERY = 7,
+       WLAN_EID_EXT_FILS_WRAPPED_DATA = 8,
+       WLAN_EID_EXT_FILS_PUBLIC_KEY = 12,
+       WLAN_EID_EXT_FILS_NONCE = 13,
 };
 
 /* Action category code */
@@ -2073,6 +2096,9 @@ enum ieee80211_key_len {
 #define IEEE80211_GCMP_MIC_LEN         16
 #define IEEE80211_GCMP_PN_LEN          6
 
+#define FILS_NONCE_LEN                 16
+#define FILS_MAX_KEK_LEN               64
+
 /* Public action codes */
 enum ieee80211_pub_actioncode {
        WLAN_PUB_ACTION_EXT_CHANSW_ANN = 4,
index f563907ed7767fdf231558e2e042e670b5d62c1d..3355efc8978164cd56c0b77c862511bb11122a2e 100644 (file)
@@ -44,4 +44,20 @@ static inline int arp_hdr_len(struct net_device *dev)
                return sizeof(struct arphdr) + (dev->addr_len + sizeof(u32)) * 2;
        }
 }
+
+static inline bool dev_is_mac_header_xmit(const struct net_device *dev)
+{
+       switch (dev->type) {
+       case ARPHRD_TUNNEL:
+       case ARPHRD_TUNNEL6:
+       case ARPHRD_SIT:
+       case ARPHRD_IPGRE:
+       case ARPHRD_VOID:
+       case ARPHRD_NONE:
+               return false;
+       default:
+               return true;
+       }
+}
+
 #endif /* _LINUX_IF_ARP_H */
index 3319d97d789dfd84773a0b361633c69896d342f3..8d5fcd6284ce0f4702d9eb28703bf2ae80d7e399 100644 (file)
@@ -399,22 +399,6 @@ static inline struct sk_buff *__vlan_hwaccel_push_inside(struct sk_buff *skb)
                skb->vlan_tci = 0;
        return skb;
 }
-/*
- * vlan_hwaccel_push_inside - pushes vlan tag to the payload
- * @skb: skbuff to tag
- *
- * Checks is tag is present in @skb->vlan_tci and if it is, it pushes the
- * VLAN tag from @skb->vlan_tci inside to the payload.
- *
- * Following the skb_unshare() example, in case of error, the calling function
- * doesn't have to worry about freeing the original skb.
- */
-static inline struct sk_buff *vlan_hwaccel_push_inside(struct sk_buff *skb)
-{
-       if (skb_vlan_tag_present(skb))
-               skb = __vlan_hwaccel_push_inside(skb);
-       return skb;
-}
 
 /**
  * __vlan_hwaccel_put_tag - hardware accelerated VLAN inserting
index 5a3321a7909b92f489f17a2e59c9aec8c90e35ba..e30104ceb86dcb3076f7950d34d2776735b132ff 100644 (file)
@@ -39,7 +39,7 @@
 
 /* These are for everybody (although not all archs will actually
    discard it in modules) */
-#define __init         __section(.init.text) __cold notrace
+#define __init         __section(.init.text) __cold notrace __latent_entropy
 #define __initdata     __section(.init.data)
 #define __initconst    __section(.init.rodata)
 #define __exitdata     __section(.exit.data)
@@ -75,7 +75,8 @@
 #define __exit          __section(.exit.text) __exitused __cold notrace
 
 /* Used for MEMORY_HOTPLUG */
-#define __meminit        __section(.meminit.text) __cold notrace
+#define __meminit        __section(.meminit.text) __cold notrace \
+                                                 __latent_entropy
 #define __meminitdata    __section(.meminit.data)
 #define __meminitconst   __section(.meminit.rodata)
 #define __memexit        __section(.memexit.text) __exitused __cold notrace
@@ -139,24 +140,8 @@ extern bool initcall_debug;
 
 #ifndef __ASSEMBLY__
 
-#ifdef CONFIG_LTO
-/* Work around a LTO gcc problem: when there is no reference to a variable
- * in a module it will be moved to the end of the program. This causes
- * reordering of initcalls which the kernel does not like.
- * Add a dummy reference function to avoid this. The function is
- * deleted by the linker.
- */
-#define LTO_REFERENCE_INITCALL(x) \
-       ; /* yes this is needed */                      \
-       static __used __exit void *reference_##x(void)  \
-       {                                               \
-               return &x;                              \
-       }
-#else
-#define LTO_REFERENCE_INITCALL(x)
-#endif
-
-/* initcalls are now grouped by functionality into separate 
+/*
+ * initcalls are now grouped by functionality into separate
  * subsections. Ordering inside the subsections is determined
  * by link order. 
  * For backwards compatibility, initcall() puts the call in 
@@ -164,12 +149,16 @@ extern bool initcall_debug;
  *
  * The `id' arg to __define_initcall() is needed so that multiple initcalls
  * can point at the same handler without causing duplicate-symbol build errors.
+ *
+ * Initcalls are run by placing pointers in initcall sections that the
+ * kernel iterates at runtime. The linker can do dead code / data elimination
+ * and remove that completely, so the initcall sections have to be marked
+ * as KEEP() in the linker script.
  */
 
 #define __define_initcall(fn, id) \
        static initcall_t __initcall_##fn##id __used \
-       __attribute__((__section__(".initcall" #id ".init"))) = fn; \
-       LTO_REFERENCE_INITCALL(__initcall_##fn##id)
+       __attribute__((__section__(".initcall" #id ".init"))) = fn;
 
 /*
  * Early initcalls run before initializing SMP.
@@ -205,15 +194,15 @@ extern bool initcall_debug;
 
 #define __initcall(fn) device_initcall(fn)
 
-#define __exitcall(fn) \
+#define __exitcall(fn)                                         \
        static exitcall_t __exitcall_##fn __exit_call = fn
 
-#define console_initcall(fn) \
-       static initcall_t __initcall_##fn \
+#define console_initcall(fn)                                   \
+       static initcall_t __initcall_##fn                       \
        __used __section(.con_initcall.init) = fn
 
-#define security_initcall(fn) \
-       static initcall_t __initcall_##fn \
+#define security_initcall(fn)                                  \
+       static initcall_t __initcall_##fn                       \
        __used __section(.security_initcall.init) = fn
 
 struct obs_kernel_param {
index e2c8419278c192fd8002ae0b470ea2421dc51a57..82ef36eac8a16a8fc2b7b021f345910ac04b04b4 100644 (file)
@@ -141,4 +141,26 @@ enum {
 void *memremap(resource_size_t offset, size_t size, unsigned long flags);
 void memunmap(void *addr);
 
+/*
+ * On x86 PAT systems we have memory tracking that keeps track of
+ * the allowed mappings on memory ranges. This tracking works for
+ * all the in-kernel mapping APIs (ioremap*), but where the user
+ * wishes to map a range from a physical device into user memory
+ * the tracking won't be updated. This API is to be used by
+ * drivers which remap physical device pages into userspace,
+ * and wants to make sure they are mapped WC and not UC.
+ */
+#ifndef arch_io_reserve_memtype_wc
+static inline int arch_io_reserve_memtype_wc(resource_size_t base,
+                                            resource_size_t size)
+{
+       return 0;
+}
+
+static inline void arch_io_free_memtype_wc(resource_size_t base,
+                                          resource_size_t size)
+{
+}
+#endif
+
 #endif /* _LINUX_IO_H */
index e63e288dee836c5c81d88909550ee1155eb160b2..7892f55a1866db26d5c4edabf69a59606a2efa3f 100644 (file)
@@ -19,11 +19,15 @@ struct vm_fault;
 #define IOMAP_UNWRITTEN        0x04    /* blocks allocated @blkno in unwritten state */
 
 /*
- * Flags for iomap mappings:
+ * Flags for all iomap mappings:
  */
-#define IOMAP_F_MERGED 0x01    /* contains multiple blocks/extents */
-#define IOMAP_F_SHARED 0x02    /* block shared with another file */
-#define IOMAP_F_NEW    0x04    /* blocks have been newly allocated */
+#define IOMAP_F_NEW    0x01    /* blocks have been newly allocated */
+
+/*
+ * Flags that only need to be reported for IOMAP_REPORT requests:
+ */
+#define IOMAP_F_MERGED 0x10    /* contains multiple blocks/extents */
+#define IOMAP_F_SHARED 0x20    /* block shared with another file */
 
 /*
  * Magic value for blkno:
@@ -42,8 +46,9 @@ struct iomap {
 /*
  * Flags for iomap_begin / iomap_end.  No flag implies a read.
  */
-#define IOMAP_WRITE            (1 << 0)
-#define IOMAP_ZERO             (1 << 1)
+#define IOMAP_WRITE            (1 << 0) /* writing, must allocate blocks */
+#define IOMAP_ZERO             (1 << 1) /* zeroing operation, may skip holes */
+#define IOMAP_REPORT           (1 << 2) /* report extent status, e.g. FIEMAP */
 
 struct iomap_ops {
        /*
index 7e9a789be5e0df0198fbebcded35f1d90bc8a650..3f95233b2733d790bd2e488c3227e187e8695ee2 100644 (file)
@@ -64,6 +64,10 @@ struct ipv6_devconf {
        } stable_secret;
        __s32           use_oif_addrs_only;
        __s32           keep_addr_on_down;
+       __s32           seg6_enabled;
+#ifdef CONFIG_IPV6_SEG6_HMAC
+       __s32           seg6_require_hmac;
+#endif
 
        struct ctl_table_header *sysctl_header;
 };
@@ -123,12 +127,12 @@ struct inet6_skb_parm {
 };
 
 #if defined(CONFIG_NET_L3_MASTER_DEV)
-static inline bool skb_l3mdev_slave(__u16 flags)
+static inline bool ipv6_l3mdev_skb(__u16 flags)
 {
        return flags & IP6SKB_L3SLAVE;
 }
 #else
-static inline bool skb_l3mdev_slave(__u16 flags)
+static inline bool ipv6_l3mdev_skb(__u16 flags)
 {
        return false;
 }
@@ -139,11 +143,22 @@ static inline bool skb_l3mdev_slave(__u16 flags)
 
 static inline int inet6_iif(const struct sk_buff *skb)
 {
-       bool l3_slave = skb_l3mdev_slave(IP6CB(skb)->flags);
+       bool l3_slave = ipv6_l3mdev_skb(IP6CB(skb)->flags);
 
        return l3_slave ? skb->skb_iif : IP6CB(skb)->iif;
 }
 
+/* can not be used in TCP layer after tcp_v6_fill_cb */
+static inline bool inet6_exact_dif_match(struct net *net, struct sk_buff *skb)
+{
+#if defined(CONFIG_NET_L3_MASTER_DEV)
+       if (!net->ipv4.sysctl_tcp_l3mdev_accept &&
+           skb && ipv6_l3mdev_skb(IP6CB(skb)->flags))
+               return true;
+#endif
+       return false;
+}
+
 struct tcp6_request_sock {
        struct tcp_request_sock   tcp6rsk_tcp;
 };
@@ -218,8 +233,9 @@ struct ipv6_pinfo {
                                 rxflow:1,
                                rxtclass:1,
                                rxpmtu:1,
-                               rxorigdstaddr:1;
-                               /* 2 bits hole */
+                               rxorigdstaddr:1,
+                               recvfragsize:1;
+                               /* 1 bits hole */
                } bits;
                __u16           all;
        } rxopt;
index 8361c8d3edd10d8ae050f491d07cff180f407f35..b7e34313cdfe4a68fee46334158d7d22111993e9 100644 (file)
 #define GITS_BASER_TYPE_SHIFT                  (56)
 #define GITS_BASER_TYPE(r)             (((r) >> GITS_BASER_TYPE_SHIFT) & 7)
 #define GITS_BASER_ENTRY_SIZE_SHIFT            (48)
-#define GITS_BASER_ENTRY_SIZE(r)       ((((r) >> GITS_BASER_ENTRY_SIZE_SHIFT) & 0xff) + 1)
+#define GITS_BASER_ENTRY_SIZE(r)       ((((r) >> GITS_BASER_ENTRY_SIZE_SHIFT) & 0x1f) + 1)
 #define GITS_BASER_SHAREABILITY_SHIFT  (10)
 #define GITS_BASER_InnerShareable                                      \
        GIC_BASER_SHAREABILITY(GITS_BASER, InnerShareable)
index d600303306eb7b54bbc68b62c89243e778f2a75c..820c0ad54a0117596e63bae6845b1ebd771c3412 100644 (file)
@@ -44,6 +44,7 @@ static inline void kasan_disable_current(void)
 void kasan_unpoison_shadow(const void *address, size_t size);
 
 void kasan_unpoison_task_stack(struct task_struct *task);
+void kasan_unpoison_stack_above_sp_to(const void *watermark);
 
 void kasan_alloc_pages(struct page *page, unsigned int order);
 void kasan_free_pages(struct page *page, unsigned int order);
@@ -85,6 +86,7 @@ size_t kasan_metadata_size(struct kmem_cache *cache);
 static inline void kasan_unpoison_shadow(const void *address, size_t size) {}
 
 static inline void kasan_unpoison_task_stack(struct task_struct *task) {}
+static inline void kasan_unpoison_stack_above_sp_to(const void *watermark) {}
 
 static inline void kasan_enable_current(void) {}
 static inline void kasan_disable_current(void) {}
index 15ec117ec5373e8c98ac801d433e67a8aa11e974..8f2e059e4d45559b54c1fbd087181865beac7af7 100644 (file)
@@ -31,7 +31,6 @@
  * When CONFIG_BOOGER is not defined, we generate a (... 1, 0) pair, and when
  * the last step cherry picks the 2nd arg, we get a zero.
  */
-#define config_enabled(cfg)            ___is_defined(cfg)
 #define __is_defined(x)                        ___is_defined(x)
 #define ___is_defined(val)             ____is_defined(__ARG_PLACEHOLDER_##val)
 #define ____is_defined(arg1_or_junk)   __take_second_arg(arg1_or_junk 1, 0)
  * otherwise. For boolean options, this is equivalent to
  * IS_ENABLED(CONFIG_FOO).
  */
-#define IS_BUILTIN(option) config_enabled(option)
+#define IS_BUILTIN(option) __is_defined(option)
 
 /*
  * IS_MODULE(CONFIG_FOO) evaluates to 1 if CONFIG_FOO is set to 'm', 0
  * otherwise.
  */
-#define IS_MODULE(option) config_enabled(option##_MODULE)
+#define IS_MODULE(option) __is_defined(option##_MODULE)
 
 /*
  * IS_REACHABLE(CONFIG_FOO) evaluates to 1 if the currently compiled
index 96356ef012de722fe4234cefe606392bfb5ca36a..7056238fd9f5cfd0b495edbbd75093f9141977e5 100644 (file)
@@ -269,10 +269,8 @@ static inline bool kernfs_ns_enabled(struct kernfs_node *kn)
 }
 
 int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen);
-size_t kernfs_path_len(struct kernfs_node *kn);
 int kernfs_path_from_node(struct kernfs_node *root_kn, struct kernfs_node *kn,
                          char *buf, size_t buflen);
-char *kernfs_path(struct kernfs_node *kn, char *buf, size_t buflen);
 void pr_cont_kernfs_name(struct kernfs_node *kn);
 void pr_cont_kernfs_path(struct kernfs_node *kn);
 struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn);
@@ -341,12 +339,10 @@ static inline bool kernfs_ns_enabled(struct kernfs_node *kn)
 static inline int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen)
 { return -ENOSYS; }
 
-static inline size_t kernfs_path_len(struct kernfs_node *kn)
-{ return 0; }
-
-static inline char *kernfs_path(struct kernfs_node *kn, char *buf,
-                               size_t buflen)
-{ return NULL; }
+static inline int kernfs_path_from_node(struct kernfs_node *root_kn,
+                                       struct kernfs_node *kn,
+                                       char *buf, size_t buflen)
+{ return -ENOSYS; }
 
 static inline void pr_cont_kernfs_name(struct kernfs_node *kn) { }
 static inline void pr_cont_kernfs_path(struct kernfs_node *kn) { }
@@ -436,6 +432,22 @@ static inline void kernfs_init(void) { }
 
 #endif /* CONFIG_KERNFS */
 
+/**
+ * kernfs_path - build full path of a given node
+ * @kn: kernfs_node of interest
+ * @buf: buffer to copy @kn's name into
+ * @buflen: size of @buf
+ *
+ * Builds and returns the full path of @kn in @buf of @buflen bytes.  The
+ * path is built from the end of @buf so the returned pointer usually
+ * doesn't match @buf.  If @buf isn't long enough, @buf is nul terminated
+ * and %NULL is returned.
+ */
+static inline int kernfs_path(struct kernfs_node *kn, char *buf, size_t buflen)
+{
+       return kernfs_path_from_node(kn, NULL, buf, buflen);
+}
+
 static inline struct kernfs_node *
 kernfs_find_and_get(struct kernfs_node *kn, const char *name)
 {
index e37d4f99f510ae2f1953ea07c9b6fe29a0cc401d..616eef4d81ea38af48cf809ecdbc2a721aff4640 100644 (file)
@@ -46,7 +46,8 @@
 #ifdef CONFIG_ATA_NONSTANDARD
 #include <asm/libata-portmap.h>
 #else
-#include <asm-generic/libata-portmap.h>
+#define ATA_PRIMARY_IRQ(dev)   14
+#define ATA_SECONDARY_IRQ(dev) 15
 #endif
 
 /*
index 2931aa43dab11cf88cbfd9b7ab4db88967801c44..0d3f14fd26217fcb2a472c411c2e9ee73a6c6c85 100644 (file)
@@ -82,6 +82,7 @@ static inline int mvebu_mbus_get_io_win_info(phys_addr_t phyaddr, u32 *size,
 }
 #endif
 
+#ifdef CONFIG_MVEBU_MBUS
 int mvebu_mbus_save_cpu_target(u32 __iomem *store_addr);
 void mvebu_mbus_get_pcie_mem_aperture(struct resource *res);
 void mvebu_mbus_get_pcie_io_aperture(struct resource *res);
@@ -97,5 +98,12 @@ int mvebu_mbus_init(const char *soc, phys_addr_t mbus_phys_base,
                    size_t mbus_size, phys_addr_t sdram_phys_base,
                    size_t sdram_size);
 int mvebu_mbus_dt_init(bool is_coherent);
+#else
+static inline int mvebu_mbus_get_dram_win_info(phys_addr_t phyaddr, u8 *target,
+                                              u8 *attr)
+{
+       return -EINVAL;
+}
+#endif /* CONFIG_MVEBU_MBUS */
 
 #endif /* __LINUX_MBUS_H */
index 47492c9631b3826fb8a52a53f6dbbdcbb7bbe2e3..1629a0c32679d907da9984f6fc4b211d472ca875 100644 (file)
@@ -31,7 +31,11 @@ struct mii_if_info {
 extern int mii_link_ok (struct mii_if_info *mii);
 extern int mii_nway_restart (struct mii_if_info *mii);
 extern int mii_ethtool_gset(struct mii_if_info *mii, struct ethtool_cmd *ecmd);
+extern int mii_ethtool_get_link_ksettings(
+       struct mii_if_info *mii, struct ethtool_link_ksettings *cmd);
 extern int mii_ethtool_sset(struct mii_if_info *mii, struct ethtool_cmd *ecmd);
+extern int mii_ethtool_set_link_ksettings(
+       struct mii_if_info *mii, const struct ethtool_link_ksettings *cmd);
 extern int mii_check_gmii_support(struct mii_if_info *mii);
 extern void mii_check_link (struct mii_if_info *mii);
 extern unsigned int mii_check_media (struct mii_if_info *mii,
index f6a16429735812f678f96595dff75e603504a8af..3be7abd6e722d0c0f6a34f40a21327b81251f72e 100644 (file)
@@ -1399,7 +1399,8 @@ void mlx4_fmr_unmap(struct mlx4_dev *dev, struct mlx4_fmr *fmr,
                    u32 *lkey, u32 *rkey);
 int mlx4_fmr_free(struct mlx4_dev *dev, struct mlx4_fmr *fmr);
 int mlx4_SYNC_TPT(struct mlx4_dev *dev);
-int mlx4_test_interrupts(struct mlx4_dev *dev);
+int mlx4_test_interrupt(struct mlx4_dev *dev, int vector);
+int mlx4_test_async(struct mlx4_dev *dev);
 int mlx4_query_diag_counters(struct mlx4_dev *dev, u8 op_modifier,
                             const u32 offset[], u32 value[],
                             size_t array_len, u8 port);
index 58276144ba81f338ae82f8d3ab960c7dff9d58e3..9f489365b3d39c2400fd2fd19099ca98803396d7 100644 (file)
@@ -277,6 +277,7 @@ enum mlx5_event {
        MLX5_EVENT_TYPE_INTERNAL_ERROR     = 0x08,
        MLX5_EVENT_TYPE_PORT_CHANGE        = 0x09,
        MLX5_EVENT_TYPE_GPIO_EVENT         = 0x15,
+       MLX5_EVENT_TYPE_PORT_MODULE_EVENT  = 0x16,
        MLX5_EVENT_TYPE_REMOTE_CONFIG      = 0x19,
 
        MLX5_EVENT_TYPE_DB_BF_CONGESTION   = 0x1a,
@@ -552,6 +553,15 @@ struct mlx5_eqe_vport_change {
        __be32          rsvd1[6];
 } __packed;
 
+struct mlx5_eqe_port_module {
+       u8        reserved_at_0[1];
+       u8        module;
+       u8        reserved_at_2[1];
+       u8        module_status;
+       u8        reserved_at_4[2];
+       u8        error_type;
+} __packed;
+
 union ev_data {
        __be32                          raw[7];
        struct mlx5_eqe_cmd             cmd;
@@ -565,6 +575,7 @@ union ev_data {
        struct mlx5_eqe_page_req        req_pages;
        struct mlx5_eqe_page_fault      page_fault;
        struct mlx5_eqe_vport_change    vport_change;
+       struct mlx5_eqe_port_module     port_module;
 } __packed;
 
 struct mlx5_eqe {
@@ -1060,6 +1071,11 @@ enum {
        MLX5_INFINIBAND_PORT_COUNTERS_GROUP   = 0x20,
 };
 
+enum {
+       MLX5_PCIE_PERFORMANCE_COUNTERS_GROUP       = 0x0,
+       MLX5_PCIE_TIMERS_AND_STATES_COUNTERS_GROUP = 0x2,
+};
+
 static inline u16 mlx5_to_sw_pkey_sz(int pkey_sz)
 {
        if (pkey_sz > MLX5_MAX_LOG_PKEY_TABLE)
index 85c4786427e49686f5adedeb962e67dfbc375616..0ae55361e674be4c08ffa33c3c69573eee7bf99b 100644 (file)
@@ -104,6 +104,8 @@ enum {
 enum {
        MLX5_REG_QETCR           = 0x4005,
        MLX5_REG_QTCT            = 0x400a,
+       MLX5_REG_DCBX_PARAM      = 0x4020,
+       MLX5_REG_DCBX_APP        = 0x4021,
        MLX5_REG_PCAP            = 0x5001,
        MLX5_REG_PMTU            = 0x5003,
        MLX5_REG_PTYS            = 0x5004,
@@ -121,6 +123,12 @@ enum {
        MLX5_REG_HOST_ENDIANNESS = 0x7004,
        MLX5_REG_MCIA            = 0x9014,
        MLX5_REG_MLCR            = 0x902b,
+       MLX5_REG_MPCNT           = 0x9051,
+};
+
+enum mlx5_dcbx_oper_mode {
+       MLX5E_DCBX_PARAM_VER_OPER_HOST  = 0x0,
+       MLX5E_DCBX_PARAM_VER_OPER_AUTO  = 0x3,
 };
 
 enum {
@@ -208,7 +216,7 @@ struct mlx5_cmd_first {
 
 struct mlx5_cmd_msg {
        struct list_head                list;
-       struct cache_ent               *cache;
+       struct cmd_msg_cache           *parent;
        u32                             len;
        struct mlx5_cmd_first           first;
        struct mlx5_cmd_mailbox        *next;
@@ -228,17 +236,17 @@ struct mlx5_cmd_debug {
        u16                     outlen;
 };
 
-struct cache_ent {
+struct cmd_msg_cache {
        /* protect block chain allocations
         */
        spinlock_t              lock;
        struct list_head        head;
+       unsigned int            max_inbox_size;
+       unsigned int            num_ent;
 };
 
-struct cmd_msg_cache {
-       struct cache_ent        large;
-       struct cache_ent        med;
-
+enum {
+       MLX5_NUM_COMMAND_CACHES = 5,
 };
 
 struct mlx5_cmd_stats {
@@ -281,7 +289,7 @@ struct mlx5_cmd {
        struct mlx5_cmd_work_ent *ent_arr[MLX5_MAX_COMMANDS];
        struct pci_pool *pool;
        struct mlx5_cmd_debug dbg;
-       struct cmd_msg_cache cache;
+       struct cmd_msg_cache cache[MLX5_NUM_COMMAND_CACHES];
        int checksum_disabled;
        struct mlx5_cmd_stats stats[MLX5_CMD_OP_MAX];
 };
@@ -310,6 +318,13 @@ struct mlx5_buf {
        u8                      page_shift;
 };
 
+struct mlx5_frag_buf {
+       struct mlx5_buf_list    *frags;
+       int                     npages;
+       int                     size;
+       u8                      page_shift;
+};
+
 struct mlx5_eq_tasklet {
        struct list_head list;
        struct list_head process_list;
@@ -418,8 +433,12 @@ struct mlx5_core_health {
        u32                             prev;
        int                             miss_counter;
        bool                            sick;
+       /* wq spinlock to synchronize draining */
+       spinlock_t                      wq_lock;
        struct workqueue_struct        *wq;
+       unsigned long                   flags;
        struct work_struct              work;
+       struct delayed_work             recover_work;
 };
 
 struct mlx5_cq_table {
@@ -494,6 +513,31 @@ struct mlx5_rl_table {
        struct mlx5_rl_entry   *rl_entry;
 };
 
+enum port_module_event_status_type {
+       MLX5_MODULE_STATUS_PLUGGED   = 0x1,
+       MLX5_MODULE_STATUS_UNPLUGGED = 0x2,
+       MLX5_MODULE_STATUS_ERROR     = 0x3,
+       MLX5_MODULE_STATUS_NUM       = 0x3,
+};
+
+enum  port_module_event_error_type {
+       MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED,
+       MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX_CABLE_MODULE,
+       MLX5_MODULE_EVENT_ERROR_BUS_STUCK,
+       MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT,
+       MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST,
+       MLX5_MODULE_EVENT_ERROR_UNKNOWN_IDENTIFIER,
+       MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE,
+       MLX5_MODULE_EVENT_ERROR_BAD_CABLE,
+       MLX5_MODULE_EVENT_ERROR_UNKNOWN,
+       MLX5_MODULE_EVENT_ERROR_NUM,
+};
+
+struct mlx5_port_module_event_stats {
+       u64 status_counters[MLX5_MODULE_STATUS_NUM];
+       u64 error_counters[MLX5_MODULE_EVENT_ERROR_NUM];
+};
+
 struct mlx5_priv {
        char                    name[MLX5_MAX_NAME_LEN];
        struct mlx5_eq_table    eq_table;
@@ -555,6 +599,8 @@ struct mlx5_priv {
        unsigned long           pci_dev_data;
        struct mlx5_fc_stats            fc_stats;
        struct mlx5_rl_table            rl_table;
+
+       struct mlx5_port_module_event_stats  pme_stats;
 };
 
 enum mlx5_device_state {
@@ -625,10 +671,6 @@ struct mlx5_db {
        int                     index;
 };
 
-enum {
-       MLX5_DB_PER_PAGE = PAGE_SIZE / L1_CACHE_BYTES,
-};
-
 enum {
        MLX5_COMP_EQ_SIZE = 1024,
 };
@@ -638,13 +680,6 @@ enum {
        MLX5_PTYS_EN = 1 << 2,
 };
 
-struct mlx5_db_pgdir {
-       struct list_head        list;
-       DECLARE_BITMAP(bitmap, MLX5_DB_PER_PAGE);
-       __be32                 *db_page;
-       dma_addr_t              db_dma;
-};
-
 typedef void (*mlx5_cmd_cbk_t)(int status, void *context);
 
 struct mlx5_cmd_work_ent {
@@ -789,10 +824,14 @@ void mlx5_health_cleanup(struct mlx5_core_dev *dev);
 int mlx5_health_init(struct mlx5_core_dev *dev);
 void mlx5_start_health_poll(struct mlx5_core_dev *dev);
 void mlx5_stop_health_poll(struct mlx5_core_dev *dev);
+void mlx5_drain_health_wq(struct mlx5_core_dev *dev);
 int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size,
                        struct mlx5_buf *buf, int node);
 int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, struct mlx5_buf *buf);
 void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_buf *buf);
+int mlx5_frag_buf_alloc_node(struct mlx5_core_dev *dev, int size,
+                            struct mlx5_frag_buf *buf, int node);
+void mlx5_frag_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf);
 struct mlx5_cmd_mailbox *mlx5_alloc_cmd_mailbox_chain(struct mlx5_core_dev *dev,
                                                      gfp_t flags, int npages);
 void mlx5_free_cmd_mailbox_chain(struct mlx5_core_dev *dev,
@@ -837,6 +876,7 @@ void mlx5_unregister_debugfs(void);
 int mlx5_eq_init(struct mlx5_core_dev *dev);
 void mlx5_eq_cleanup(struct mlx5_core_dev *dev);
 void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas);
+void mlx5_fill_page_frag_array(struct mlx5_frag_buf *frag_buf, __be64 *pas);
 void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn);
 void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type);
 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
index 93ebc5e213345213d33d99e6e68970711f42c6e3..949b24b6c4794ce14909d779b7dbfd2534aa53db 100644 (file)
@@ -42,6 +42,10 @@ enum {
        MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO  = 1 << 16,
 };
 
+enum {
+       MLX5_FLOW_TABLE_TUNNEL_EN = BIT(0),
+};
+
 #define LEFTOVERS_RULE_NUM      2
 static inline void build_leftovers_ft_param(int *priority,
                                            int *n_ent,
@@ -69,8 +73,8 @@ enum mlx5_flow_namespace_type {
 
 struct mlx5_flow_table;
 struct mlx5_flow_group;
-struct mlx5_flow_rule;
 struct mlx5_flow_namespace;
+struct mlx5_flow_handle;
 
 struct mlx5_flow_spec {
        u8   match_criteria_enable;
@@ -97,13 +101,15 @@ mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns,
                                    int prio,
                                    int num_flow_table_entries,
                                    int max_num_groups,
-                                   u32 level);
+                                   u32 level,
+                                   u32 flags);
 
 struct mlx5_flow_table *
 mlx5_create_flow_table(struct mlx5_flow_namespace *ns,
                       int prio,
                       int num_flow_table_entries,
-                      u32 level);
+                      u32 level,
+                      u32 flags);
 struct mlx5_flow_table *
 mlx5_create_vport_flow_table(struct mlx5_flow_namespace *ns,
                             int prio,
@@ -124,21 +130,28 @@ struct mlx5_flow_group *
 mlx5_create_flow_group(struct mlx5_flow_table *ft, u32 *in);
 void mlx5_destroy_flow_group(struct mlx5_flow_group *fg);
 
+struct mlx5_flow_act {
+       u32 action;
+       u32 flow_tag;
+       u32 encap_id;
+};
+
 /* Single destination per rule.
  * Group ID is implied by the match criteria.
  */
-struct mlx5_flow_rule *
-mlx5_add_flow_rule(struct mlx5_flow_table *ft,
-                  struct mlx5_flow_spec *spec,
-                  u32 action,
-                  u32 flow_tag,
-                  struct mlx5_flow_destination *dest);
-void mlx5_del_flow_rule(struct mlx5_flow_rule *fr);
-
-int mlx5_modify_rule_destination(struct mlx5_flow_rule *rule,
-                                struct mlx5_flow_destination *dest);
-
-struct mlx5_fc *mlx5_flow_rule_counter(struct mlx5_flow_rule *rule);
+struct mlx5_flow_handle *
+mlx5_add_flow_rules(struct mlx5_flow_table *ft,
+                   struct mlx5_flow_spec *spec,
+                   struct mlx5_flow_act *flow_act,
+                   struct mlx5_flow_destination *dest,
+                   int dest_num);
+void mlx5_del_flow_rules(struct mlx5_flow_handle *fr);
+
+int mlx5_modify_rule_destination(struct mlx5_flow_handle *handler,
+                                struct mlx5_flow_destination *new_dest,
+                                struct mlx5_flow_destination *old_dest);
+
+struct mlx5_fc *mlx5_flow_rule_counter(struct mlx5_flow_handle *handler);
 struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging);
 void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter);
 void mlx5_fc_query_cached(struct mlx5_fc *counter,
index 6045d4d580656b95a69a5f773db84945f39f9a23..a5f0fbedf1e7c8f303fa6e3cccb7af400e1f1a87 100644 (file)
@@ -83,6 +83,7 @@ enum {
        MLX5_CMD_OP_SET_HCA_CAP                   = 0x109,
        MLX5_CMD_OP_QUERY_ISSI                    = 0x10a,
        MLX5_CMD_OP_SET_ISSI                      = 0x10b,
+       MLX5_CMD_OP_SET_DRIVER_VERSION            = 0x10d,
        MLX5_CMD_OP_CREATE_MKEY                   = 0x200,
        MLX5_CMD_OP_QUERY_MKEY                    = 0x201,
        MLX5_CMD_OP_DESTROY_MKEY                  = 0x202,
@@ -145,6 +146,12 @@ enum {
        MLX5_CMD_OP_QUERY_Q_COUNTER               = 0x773,
        MLX5_CMD_OP_SET_RATE_LIMIT                = 0x780,
        MLX5_CMD_OP_QUERY_RATE_LIMIT              = 0x781,
+       MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT      = 0x782,
+       MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT     = 0x783,
+       MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT       = 0x784,
+       MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT      = 0x785,
+       MLX5_CMD_OP_CREATE_QOS_PARA_VPORT         = 0x786,
+       MLX5_CMD_OP_DESTROY_QOS_PARA_VPORT        = 0x787,
        MLX5_CMD_OP_ALLOC_PD                      = 0x800,
        MLX5_CMD_OP_DEALLOC_PD                    = 0x801,
        MLX5_CMD_OP_ALLOC_UAR                     = 0x802,
@@ -537,13 +544,27 @@ struct mlx5_ifc_e_switch_cap_bits {
 
 struct mlx5_ifc_qos_cap_bits {
        u8         packet_pacing[0x1];
-       u8         reserved_0[0x1f];
-       u8         reserved_1[0x20];
+       u8         esw_scheduling[0x1];
+       u8         reserved_at_2[0x1e];
+
+       u8         reserved_at_20[0x20];
+
        u8         packet_pacing_max_rate[0x20];
+
        u8         packet_pacing_min_rate[0x20];
-       u8         reserved_2[0x10];
+
+       u8         reserved_at_80[0x10];
        u8         packet_pacing_rate_table_size[0x10];
-       u8         reserved_3[0x760];
+
+       u8         esw_element_type[0x10];
+       u8         esw_tsar_type[0x10];
+
+       u8         reserved_at_c0[0x10];
+       u8         max_qos_para_vport[0x10];
+
+       u8         max_tsar_bw_share[0x20];
+
+       u8         reserved_at_100[0x700];
 };
 
 struct mlx5_ifc_per_protocol_networking_offload_caps_bits {
@@ -804,7 +825,8 @@ struct mlx5_ifc_cmd_hca_cap_bits {
        u8         early_vf_enable[0x1];
        u8         reserved_at_1a9[0x2];
        u8         local_ca_ack_delay[0x5];
-       u8         reserved_at_1af[0x2];
+       u8         port_module_event[0x1];
+       u8         reserved_at_1b0[0x1];
        u8         ports_check[0x1];
        u8         reserved_at_1b2[0x1];
        u8         disable_link_up[0x1];
@@ -888,7 +910,7 @@ struct mlx5_ifc_cmd_hca_cap_bits {
        u8         log_pg_sz[0x8];
 
        u8         bf[0x1];
-       u8         reserved_at_261[0x1];
+       u8         driver_version[0x1];
        u8         pad_tx_eth_packet[0x1];
        u8         reserved_at_263[0x8];
        u8         log_bf_reg_size[0x5];
@@ -1735,6 +1757,80 @@ struct mlx5_ifc_eth_802_3_cntrs_grp_data_layout_bits {
        u8         reserved_at_4c0[0x300];
 };
 
+struct mlx5_ifc_pcie_perf_cntrs_grp_data_layout_bits {
+       u8         life_time_counter_high[0x20];
+
+       u8         life_time_counter_low[0x20];
+
+       u8         rx_errors[0x20];
+
+       u8         tx_errors[0x20];
+
+       u8         l0_to_recovery_eieos[0x20];
+
+       u8         l0_to_recovery_ts[0x20];
+
+       u8         l0_to_recovery_framing[0x20];
+
+       u8         l0_to_recovery_retrain[0x20];
+
+       u8         crc_error_dllp[0x20];
+
+       u8         crc_error_tlp[0x20];
+
+       u8         reserved_at_140[0x680];
+};
+
+struct mlx5_ifc_pcie_tas_cntrs_grp_data_layout_bits {
+       u8         life_time_counter_high[0x20];
+
+       u8         life_time_counter_low[0x20];
+
+       u8         time_to_boot_image_start[0x20];
+
+       u8         time_to_link_image[0x20];
+
+       u8         calibration_time[0x20];
+
+       u8         time_to_first_perst[0x20];
+
+       u8         time_to_detect_state[0x20];
+
+       u8         time_to_l0[0x20];
+
+       u8         time_to_crs_en[0x20];
+
+       u8         time_to_plastic_image_start[0x20];
+
+       u8         time_to_iron_image_start[0x20];
+
+       u8         perst_handler[0x20];
+
+       u8         times_in_l1[0x20];
+
+       u8         times_in_l23[0x20];
+
+       u8         dl_down[0x20];
+
+       u8         config_cycle1usec[0x20];
+
+       u8         config_cycle2to7usec[0x20];
+
+       u8         config_cycle_8to15usec[0x20];
+
+       u8         config_cycle_16_to_63usec[0x20];
+
+       u8         config_cycle_64usec[0x20];
+
+       u8         correctable_err_msg_sent[0x20];
+
+       u8         non_fatal_err_msg_sent[0x20];
+
+       u8         fatal_err_msg_sent[0x20];
+
+       u8         reserved_at_2e0[0x4e0];
+};
+
 struct mlx5_ifc_cmd_inter_comp_event_bits {
        u8         command_completion_vector[0x20];
 
@@ -2333,6 +2429,30 @@ struct mlx5_ifc_sqc_bits {
        struct mlx5_ifc_wq_bits wq;
 };
 
+enum {
+       SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR = 0x0,
+       SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT = 0x1,
+       SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC = 0x2,
+       SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC = 0x3,
+};
+
+struct mlx5_ifc_scheduling_context_bits {
+       u8         element_type[0x8];
+       u8         reserved_at_8[0x18];
+
+       u8         element_attributes[0x20];
+
+       u8         parent_element_id[0x20];
+
+       u8         reserved_at_60[0x40];
+
+       u8         bw_share[0x20];
+
+       u8         max_average_bw[0x20];
+
+       u8         reserved_at_e0[0x120];
+};
+
 struct mlx5_ifc_rqtc_bits {
        u8         reserved_at_0[0xa0];
 
@@ -2844,7 +2964,7 @@ struct mlx5_ifc_xrqc_bits {
 
        struct mlx5_ifc_tag_matching_topology_context_bits tag_matching_topology_context;
 
-       u8         reserved_at_180[0x200];
+       u8         reserved_at_180[0x880];
 
        struct mlx5_ifc_wq_bits wq;
 };
@@ -2875,6 +2995,12 @@ union mlx5_ifc_eth_cntrs_grp_data_layout_auto_bits {
        u8         reserved_at_0[0x7c0];
 };
 
+union mlx5_ifc_pcie_cntrs_grp_data_layout_auto_bits {
+       struct mlx5_ifc_pcie_perf_cntrs_grp_data_layout_bits pcie_perf_cntrs_grp_data_layout;
+       struct mlx5_ifc_pcie_tas_cntrs_grp_data_layout_bits pcie_tas_cntrs_grp_data_layout;
+       u8         reserved_at_0[0x7c0];
+};
+
 union mlx5_ifc_event_auto_bits {
        struct mlx5_ifc_comp_event_bits comp_event;
        struct mlx5_ifc_dct_events_bits dct_events;
@@ -2920,6 +3046,29 @@ struct mlx5_ifc_register_loopback_control_bits {
        u8         reserved_at_20[0x60];
 };
 
+struct mlx5_ifc_vport_tc_element_bits {
+       u8         traffic_class[0x4];
+       u8         reserved_at_4[0xc];
+       u8         vport_number[0x10];
+};
+
+struct mlx5_ifc_vport_element_bits {
+       u8         reserved_at_0[0x10];
+       u8         vport_number[0x10];
+};
+
+enum {
+       TSAR_ELEMENT_TSAR_TYPE_DWRR = 0x0,
+       TSAR_ELEMENT_TSAR_TYPE_ROUND_ROBIN = 0x1,
+       TSAR_ELEMENT_TSAR_TYPE_ETS = 0x2,
+};
+
+struct mlx5_ifc_tsar_element_bits {
+       u8         reserved_at_0[0x8];
+       u8         tsar_type[0x8];
+       u8         reserved_at_10[0x10];
+};
+
 struct mlx5_ifc_teardown_hca_out_bits {
        u8         status[0x8];
        u8         reserved_at_8[0x18];
@@ -3540,6 +3689,39 @@ struct mlx5_ifc_query_special_contexts_in_bits {
        u8         reserved_at_40[0x40];
 };
 
+struct mlx5_ifc_query_scheduling_element_out_bits {
+       u8         opcode[0x10];
+       u8         reserved_at_10[0x10];
+
+       u8         reserved_at_20[0x10];
+       u8         op_mod[0x10];
+
+       u8         reserved_at_40[0xc0];
+
+       struct mlx5_ifc_scheduling_context_bits scheduling_context;
+
+       u8         reserved_at_300[0x100];
+};
+
+enum {
+       SCHEDULING_HIERARCHY_E_SWITCH = 0x2,
+};
+
+struct mlx5_ifc_query_scheduling_element_in_bits {
+       u8         opcode[0x10];
+       u8         reserved_at_10[0x10];
+
+       u8         reserved_at_20[0x10];
+       u8         op_mod[0x10];
+
+       u8         scheduling_hierarchy[0x8];
+       u8         reserved_at_48[0x18];
+
+       u8         scheduling_element_id[0x20];
+
+       u8         reserved_at_80[0x180];
+};
+
 struct mlx5_ifc_query_rqt_out_bits {
        u8         status[0x8];
        u8         reserved_at_8[0x18];
@@ -3904,6 +4086,25 @@ struct mlx5_ifc_query_issi_in_bits {
        u8         reserved_at_40[0x40];
 };
 
+struct mlx5_ifc_set_driver_version_out_bits {
+       u8         status[0x8];
+       u8         reserved_0[0x18];
+
+       u8         syndrome[0x20];
+       u8         reserved_1[0x40];
+};
+
+struct mlx5_ifc_set_driver_version_in_bits {
+       u8         opcode[0x10];
+       u8         reserved_0[0x10];
+
+       u8         reserved_1[0x10];
+       u8         op_mod[0x10];
+
+       u8         reserved_2[0x40];
+       u8         driver_version[64][0x8];
+};
+
 struct mlx5_ifc_query_hca_vport_pkey_out_bits {
        u8         status[0x8];
        u8         reserved_at_8[0x18];
@@ -4725,6 +4926,43 @@ struct mlx5_ifc_modify_sq_in_bits {
        struct mlx5_ifc_sqc_bits ctx;
 };
 
+struct mlx5_ifc_modify_scheduling_element_out_bits {
+       u8         status[0x8];
+       u8         reserved_at_8[0x18];
+
+       u8         syndrome[0x20];
+
+       u8         reserved_at_40[0x1c0];
+};
+
+enum {
+       MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_BW_SHARE = 0x1,
+       MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW = 0x2,
+};
+
+struct mlx5_ifc_modify_scheduling_element_in_bits {
+       u8         opcode[0x10];
+       u8         reserved_at_10[0x10];
+
+       u8         reserved_at_20[0x10];
+       u8         op_mod[0x10];
+
+       u8         scheduling_hierarchy[0x8];
+       u8         reserved_at_48[0x18];
+
+       u8         scheduling_element_id[0x20];
+
+       u8         reserved_at_80[0x20];
+
+       u8         modify_bitmask[0x20];
+
+       u8         reserved_at_c0[0x40];
+
+       struct mlx5_ifc_scheduling_context_bits scheduling_context;
+
+       u8         reserved_at_300[0x100];
+};
+
 struct mlx5_ifc_modify_rqt_out_bits {
        u8         status[0x8];
        u8         reserved_at_8[0x18];
@@ -5390,6 +5628,30 @@ struct mlx5_ifc_destroy_sq_in_bits {
        u8         reserved_at_60[0x20];
 };
 
+struct mlx5_ifc_destroy_scheduling_element_out_bits {
+       u8         status[0x8];
+       u8         reserved_at_8[0x18];
+
+       u8         syndrome[0x20];
+
+       u8         reserved_at_40[0x1c0];
+};
+
+struct mlx5_ifc_destroy_scheduling_element_in_bits {
+       u8         opcode[0x10];
+       u8         reserved_at_10[0x10];
+
+       u8         reserved_at_20[0x10];
+       u8         op_mod[0x10];
+
+       u8         scheduling_hierarchy[0x8];
+       u8         reserved_at_48[0x18];
+
+       u8         scheduling_element_id[0x20];
+
+       u8         reserved_at_80[0x180];
+};
+
 struct mlx5_ifc_destroy_rqt_out_bits {
        u8         status[0x8];
        u8         reserved_at_8[0x18];
@@ -6017,6 +6279,36 @@ struct mlx5_ifc_create_sq_in_bits {
        struct mlx5_ifc_sqc_bits ctx;
 };
 
+struct mlx5_ifc_create_scheduling_element_out_bits {
+       u8         status[0x8];
+       u8         reserved_at_8[0x18];
+
+       u8         syndrome[0x20];
+
+       u8         reserved_at_40[0x40];
+
+       u8         scheduling_element_id[0x20];
+
+       u8         reserved_at_a0[0x160];
+};
+
+struct mlx5_ifc_create_scheduling_element_in_bits {
+       u8         opcode[0x10];
+       u8         reserved_at_10[0x10];
+
+       u8         reserved_at_20[0x10];
+       u8         op_mod[0x10];
+
+       u8         scheduling_hierarchy[0x8];
+       u8         reserved_at_48[0x18];
+
+       u8         reserved_at_60[0xa0];
+
+       struct mlx5_ifc_scheduling_context_bits scheduling_context;
+
+       u8         reserved_at_300[0x100];
+};
+
 struct mlx5_ifc_create_rqt_out_bits {
        u8         status[0x8];
        u8         reserved_at_8[0x18];
@@ -7028,6 +7320,18 @@ struct mlx5_ifc_ppcnt_reg_bits {
        union mlx5_ifc_eth_cntrs_grp_data_layout_auto_bits counter_set;
 };
 
+struct mlx5_ifc_mpcnt_reg_bits {
+       u8         reserved_at_0[0x8];
+       u8         pcie_index[0x8];
+       u8         reserved_at_10[0xa];
+       u8         grp[0x6];
+
+       u8         clr[0x1];
+       u8         reserved_at_21[0x1f];
+
+       union mlx5_ifc_pcie_cntrs_grp_data_layout_auto_bits counter_set;
+};
+
 struct mlx5_ifc_ppad_reg_bits {
        u8         reserved_at_0[0x3];
        u8         single_mac[0x1];
@@ -7633,6 +7937,7 @@ union mlx5_ifc_ports_control_registers_document_bits {
        struct mlx5_ifc_pmtu_reg_bits pmtu_reg;
        struct mlx5_ifc_ppad_reg_bits ppad_reg;
        struct mlx5_ifc_ppcnt_reg_bits ppcnt_reg;
+       struct mlx5_ifc_mpcnt_reg_bits mpcnt_reg;
        struct mlx5_ifc_pplm_reg_bits pplm_reg;
        struct mlx5_ifc_pplr_reg_bits pplr_reg;
        struct mlx5_ifc_ppsc_reg_bits ppsc_reg;
index b3065acd20b4c9f678d92c8d2e13a81a619dc006..e527732fb31bcc16791a93f1d45f5de2dd5d7649 100644 (file)
@@ -94,6 +94,9 @@ enum mlx5e_link_mode {
 
 #define MLX5E_PROT_MASK(link_mode) (1 << link_mode)
 
+#define PORT_MODULE_EVENT_MODULE_STATUS_MASK 0xF
+#define PORT_MODULE_EVENT_ERROR_TYPE_MASK         0xF
+
 int mlx5_set_port_caps(struct mlx5_core_dev *dev, u8 port_num, u32 caps);
 int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys,
                         int ptys_size, int proto_mask, u8 local_port);
@@ -138,8 +141,12 @@ int mlx5_query_port_pfc(struct mlx5_core_dev *dev, u8 *pfc_en_tx,
 int mlx5_max_tc(struct mlx5_core_dev *mdev);
 
 int mlx5_set_port_prio_tc(struct mlx5_core_dev *mdev, u8 *prio_tc);
+int mlx5_query_port_prio_tc(struct mlx5_core_dev *mdev,
+                           u8 prio, u8 *tc);
 int mlx5_set_port_tc_group(struct mlx5_core_dev *mdev, u8 *tc_group);
 int mlx5_set_port_tc_bw_alloc(struct mlx5_core_dev *mdev, u8 *tc_bw);
+int mlx5_query_port_tc_bw_alloc(struct mlx5_core_dev *mdev,
+                               u8 tc, u8 *bw_pct);
 int mlx5_modify_port_ets_rate_limit(struct mlx5_core_dev *mdev,
                                    u8 *max_bw_value,
                                    u8 *max_bw_unit);
@@ -155,4 +162,6 @@ void mlx5_query_port_fcs(struct mlx5_core_dev *mdev, bool *supported,
 int mlx5_query_module_eeprom(struct mlx5_core_dev *dev,
                             u16 offset, u16 size, u8 *data);
 
+int mlx5_query_port_dcbx_param(struct mlx5_core_dev *mdev, u32 *out);
+int mlx5_set_port_dcbx_param(struct mlx5_core_dev *mdev, u32 *in);
 #endif /* __MLX5_PORT_H__ */
index 33c97dc900f82493f9f5bf6a4f375d37cbe4a4f8..1cde0fd53f90e026645a3d171e79653e3477980a 100644 (file)
@@ -55,7 +55,7 @@ struct mlx5_srq_attr {
        u32 lwm;
        u32 user_index;
        u64 db_record;
-       u64 *pas;
+       __be64 *pas;
 };
 
 struct mlx5_core_dev;
index 451b0bde9083c91dcff3bb3a80a5567eb1bf3378..ec35157ea7252c587894ff4f94d406f50b7aeb66 100644 (file)
 #include <linux/mlx5/driver.h>
 #include <linux/mlx5/device.h>
 
+enum {
+       MLX5_CAP_INLINE_MODE_L2,
+       MLX5_CAP_INLINE_MODE_VPORT_CONTEXT,
+       MLX5_CAP_INLINE_MODE_NOT_REQUIRED,
+};
+
 u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport);
 u8 mlx5_query_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod,
                                u16 vport);
@@ -43,8 +49,8 @@ int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod,
                                  u16 vport, u8 state);
 int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev,
                                     u16 vport, u8 *addr);
-void mlx5_query_nic_vport_min_inline(struct mlx5_core_dev *mdev,
-                                    u8 *min_inline);
+int mlx5_query_nic_vport_min_inline(struct mlx5_core_dev *mdev,
+                                   u16 vport, u8 *min_inline);
 int mlx5_modify_nic_vport_min_inline(struct mlx5_core_dev *mdev,
                                     u16 vport, u8 min_inline);
 int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *dev,
index e9caec6a51e97a618ff5ab9b76dd6371708564e8..a92c8d73aeafc5f5bafa551eacc284f86bad50d0 100644 (file)
@@ -1266,29 +1266,25 @@ static inline int fixup_user_fault(struct task_struct *tsk,
 }
 #endif
 
-extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write);
+extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len,
+               unsigned int gup_flags);
 extern int access_remote_vm(struct mm_struct *mm, unsigned long addr,
-               void *buf, int len, int write);
+               void *buf, int len, unsigned int gup_flags);
 
-long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
-                     unsigned long start, unsigned long nr_pages,
-                     unsigned int foll_flags, struct page **pages,
-                     struct vm_area_struct **vmas, int *nonblocking);
 long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm,
                            unsigned long start, unsigned long nr_pages,
-                           int write, int force, struct page **pages,
+                           unsigned int gup_flags, struct page **pages,
                            struct vm_area_struct **vmas);
 long get_user_pages(unsigned long start, unsigned long nr_pages,
-                           int write, int force, struct page **pages,
+                           unsigned int gup_flags, struct page **pages,
                            struct vm_area_struct **vmas);
 long get_user_pages_locked(unsigned long start, unsigned long nr_pages,
-                   int write, int force, struct page **pages, int *locked);
+                   unsigned int gup_flags, struct page **pages, int *locked);
 long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm,
                               unsigned long start, unsigned long nr_pages,
-                              int write, int force, struct page **pages,
-                              unsigned int gup_flags);
+                              struct page **pages, unsigned int gup_flags);
 long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
-                   int write, int force, struct page **pages);
+                   struct page **pages, unsigned int gup_flags);
 int get_user_pages_fast(unsigned long start, int nr_pages, int write,
                        struct page **pages);
 
@@ -1306,7 +1302,7 @@ struct frame_vector {
 struct frame_vector *frame_vector_create(unsigned int nr_frames);
 void frame_vector_destroy(struct frame_vector *vec);
 int get_vaddr_frames(unsigned long start, unsigned int nr_pfns,
-                    bool write, bool force, struct frame_vector *vec);
+                    unsigned int gup_flags, struct frame_vector *vec);
 void put_vaddr_frames(struct frame_vector *vec);
 int frame_vector_to_pages(struct frame_vector *vec);
 void frame_vector_to_pfns(struct frame_vector *vec);
@@ -1391,7 +1387,7 @@ static inline int stack_guard_page_end(struct vm_area_struct *vma,
                !vma_growsup(vma->vm_next, addr);
 }
 
-int vma_is_stack_for_task(struct vm_area_struct *vma, struct task_struct *t);
+int vma_is_stack_for_current(struct vm_area_struct *vma);
 
 extern unsigned long move_page_tables(struct vm_area_struct *vma,
                unsigned long old_addr, struct vm_area_struct *new_vma,
@@ -2232,6 +2228,7 @@ static inline struct page *follow_page(struct vm_area_struct *vma,
 #define FOLL_TRIED     0x800   /* a retry, previous pass started an IO */
 #define FOLL_MLOCK     0x1000  /* lock present pages */
 #define FOLL_REMOTE    0x2000  /* we are working on non-current tsk/mm */
+#define FOLL_COW       0x4000  /* internal GUP flag */
 
 typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
                        void *data);
index 7f2ae99e5daf39406fa5b166f6a1a75c464d1fcd..0f088f3a2fed8df6435819a52360940310272462 100644 (file)
@@ -440,33 +440,7 @@ struct zone {
        seqlock_t               span_seqlock;
 #endif
 
-       /*
-        * wait_table           -- the array holding the hash table
-        * wait_table_hash_nr_entries   -- the size of the hash table array
-        * wait_table_bits      -- wait_table_size == (1 << wait_table_bits)
-        *
-        * The purpose of all these is to keep track of the people
-        * waiting for a page to become available and make them
-        * runnable again when possible. The trouble is that this
-        * consumes a lot of space, especially when so few things
-        * wait on pages at a given time. So instead of using
-        * per-page waitqueues, we use a waitqueue hash table.
-        *
-        * The bucket discipline is to sleep on the same queue when
-        * colliding and wake all in that wait queue when removing.
-        * When something wakes, it must check to be sure its page is
-        * truly available, a la thundering herd. The cost of a
-        * collision is great, but given the expected load of the
-        * table, they should be so rare as to be outweighed by the
-        * benefits from the saved space.
-        *
-        * __wait_on_page_locked() and unlock_page() in mm/filemap.c, are the
-        * primary users of these fields, and in mm/page_alloc.c
-        * free_area_init_core() performs the initialization of them.
-        */
-       wait_queue_head_t       *wait_table;
-       unsigned long           wait_table_hash_nr_entries;
-       unsigned long           wait_table_bits;
+       int initialized;
 
        /* Write-intensive fields used from the page allocator */
        ZONE_PADDING(_pad1_)
@@ -546,7 +520,7 @@ static inline bool zone_spans_pfn(const struct zone *zone, unsigned long pfn)
 
 static inline bool zone_is_initialized(struct zone *zone)
 {
-       return !!zone->wait_table;
+       return zone->initialized;
 }
 
 static inline bool zone_is_empty(struct zone *zone)
index c5d3d5024fc8584aa22e99dd8b0fd5da75e7bf19..d8905a229f34833a4336b0a69431a4a0a94bc76e 100644 (file)
@@ -1184,7 +1184,7 @@ int nand_read_oob_syndrome(struct mtd_info *mtd, struct nand_chip *chip,
                           int page);
 
 /* Reset and initialize a NAND device */
-int nand_reset(struct nand_chip *chip);
+int nand_reset(struct nand_chip *chip, int chipnr);
 
 /* Free resources held by the NAND device */
 void nand_cleanup(struct nand_chip *chip);
index 458c87631e7fe7a9f8f46c5b3445cd4db81c14bb..1ff5ea6e12214db818c2cfa8a9b8ed5cbddc307c 100644 (file)
@@ -316,7 +316,6 @@ struct napi_struct {
        unsigned int            gro_count;
        int                     (*poll)(struct napi_struct *, int);
 #ifdef CONFIG_NETPOLL
-       spinlock_t              poll_lock;
        int                     poll_owner;
 #endif
        struct net_device       *dev;
@@ -334,6 +333,16 @@ enum {
        NAPI_STATE_NPSVC,       /* Netpoll - don't dequeue from poll_list */
        NAPI_STATE_HASHED,      /* In NAPI hash (busy polling possible) */
        NAPI_STATE_NO_BUSY_POLL,/* Do not add in napi_hash, no busy polling */
+       NAPI_STATE_IN_BUSY_POLL,/* sk_busy_loop() owns this NAPI */
+};
+
+enum {
+       NAPIF_STATE_SCHED        = (1UL << NAPI_STATE_SCHED),
+       NAPIF_STATE_DISABLE      = (1UL << NAPI_STATE_DISABLE),
+       NAPIF_STATE_NPSVC        = (1UL << NAPI_STATE_NPSVC),
+       NAPIF_STATE_HASHED       = (1UL << NAPI_STATE_HASHED),
+       NAPIF_STATE_NO_BUSY_POLL = (1UL << NAPI_STATE_NO_BUSY_POLL),
+       NAPIF_STATE_IN_BUSY_POLL = (1UL << NAPI_STATE_IN_BUSY_POLL),
 };
 
 enum gro_result {
@@ -453,31 +462,21 @@ static inline bool napi_reschedule(struct napi_struct *napi)
        return false;
 }
 
-void __napi_complete(struct napi_struct *n);
-void napi_complete_done(struct napi_struct *n, int work_done);
+bool __napi_complete(struct napi_struct *n);
+bool napi_complete_done(struct napi_struct *n, int work_done);
 /**
  *     napi_complete - NAPI processing complete
  *     @n: NAPI context
  *
  * Mark NAPI processing as complete.
  * Consider using napi_complete_done() instead.
+ * Return false if device should avoid rearming interrupts.
  */
-static inline void napi_complete(struct napi_struct *n)
+static inline bool napi_complete(struct napi_struct *n)
 {
        return napi_complete_done(n, 0);
 }
 
-/**
- *     napi_hash_add - add a NAPI to global hashtable
- *     @napi: NAPI context
- *
- * Generate a new napi_id and store a @napi under it in napi_hash.
- * Used for busy polling (CONFIG_NET_RX_BUSY_POLL).
- * Note: This is normally automatically done from netif_napi_add(),
- * so might disappear in a future Linux version.
- */
-void napi_hash_add(struct napi_struct *napi);
-
 /**
  *     napi_hash_del - remove a NAPI from global table
  *     @napi: NAPI context
@@ -732,8 +731,8 @@ struct xps_dev_maps {
        struct rcu_head rcu;
        struct xps_map __rcu *cpu_map[0];
 };
-#define XPS_DEV_MAPS_SIZE (sizeof(struct xps_dev_maps) +               \
-    (nr_cpu_ids * sizeof(struct xps_map *)))
+#define XPS_DEV_MAPS_SIZE(_tcs) (sizeof(struct xps_dev_maps) +         \
+       (nr_cpu_ids * (_tcs) * sizeof(struct xps_map *)))
 #endif /* CONFIG_XPS */
 
 #define TC_MAX_QUEUE   16
@@ -803,6 +802,7 @@ struct tc_to_netdev {
                struct tc_cls_matchall_offload *cls_mall;
                struct tc_cls_bpf_offload *cls_bpf;
        };
+       bool egress_dev;
 };
 
 /* These structures hold the attributes of xdp state that are being passed
@@ -926,7 +926,7 @@ struct netdev_xdp {
  *     3. Update dev->stats asynchronously and atomically, and define
  *        neither operation.
  *
- * bool (*ndo_has_offload_stats)(int attr_id)
+ * bool (*ndo_has_offload_stats)(const struct net_device *dev, int attr_id)
  *     Return true if this device supports offload stats of this attr_id.
  *
  * int (*ndo_get_offload_stats)(int attr_id, const struct net_device *dev,
@@ -1166,7 +1166,7 @@ struct net_device_ops {
 
        struct rtnl_link_stats64* (*ndo_get_stats64)(struct net_device *dev,
                                                     struct rtnl_link_stats64 *storage);
-       bool                    (*ndo_has_offload_stats)(int attr_id);
+       bool                    (*ndo_has_offload_stats)(const struct net_device *dev, int attr_id);
        int                     (*ndo_get_offload_stats)(int attr_id,
                                                         const struct net_device *dev,
                                                         void *attr_data);
@@ -1620,7 +1620,7 @@ enum netdev_priv_flags {
  *     @dcbnl_ops:     Data Center Bridging netlink ops
  *     @num_tc:        Number of traffic classes in the net device
  *     @tc_to_txq:     XXX: need comments on this one
- *     @prio_tc_map    XXX: need comments on this one
+ *     @prio_tc_map:   XXX: need comments on this one
  *
  *     @fcoe_ddp_xid:  Max exchange id for FCoE LRO by ddp
  *
@@ -1920,34 +1920,10 @@ int netdev_set_prio_tc_map(struct net_device *dev, u8 prio, u8 tc)
        return 0;
 }
 
-static inline
-void netdev_reset_tc(struct net_device *dev)
-{
-       dev->num_tc = 0;
-       memset(dev->tc_to_txq, 0, sizeof(dev->tc_to_txq));
-       memset(dev->prio_tc_map, 0, sizeof(dev->prio_tc_map));
-}
-
-static inline
-int netdev_set_tc_queue(struct net_device *dev, u8 tc, u16 count, u16 offset)
-{
-       if (tc >= dev->num_tc)
-               return -EINVAL;
-
-       dev->tc_to_txq[tc].count = count;
-       dev->tc_to_txq[tc].offset = offset;
-       return 0;
-}
-
-static inline
-int netdev_set_num_tc(struct net_device *dev, u8 num_tc)
-{
-       if (num_tc > TC_MAX_QUEUE)
-               return -EINVAL;
-
-       dev->num_tc = num_tc;
-       return 0;
-}
+int netdev_txq_to_tc(struct net_device *dev, unsigned int txq);
+void netdev_reset_tc(struct net_device *dev);
+int netdev_set_tc_queue(struct net_device *dev, u8 tc, u16 count, u16 offset);
+int netdev_set_num_tc(struct net_device *dev, u8 num_tc);
 
 static inline
 int netdev_get_num_tc(struct net_device *dev)
@@ -2167,7 +2143,10 @@ struct napi_gro_cb {
        /* Used to determine if flush_id can be ignored */
        u8      is_atomic:1;
 
-       /* 5 bit hole */
+       /* Number of gro_receive callbacks this packet already went through */
+       u8 recursion_counter:4;
+
+       /* 1 bit hole */
 
        /* used to support CHECKSUM_COMPLETE for tunneling protocols */
        __wsum  csum;
@@ -2178,6 +2157,40 @@ struct napi_gro_cb {
 
 #define NAPI_GRO_CB(skb) ((struct napi_gro_cb *)(skb)->cb)
 
+#define GRO_RECURSION_LIMIT 15
+static inline int gro_recursion_inc_test(struct sk_buff *skb)
+{
+       return ++NAPI_GRO_CB(skb)->recursion_counter == GRO_RECURSION_LIMIT;
+}
+
+typedef struct sk_buff **(*gro_receive_t)(struct sk_buff **, struct sk_buff *);
+static inline struct sk_buff **call_gro_receive(gro_receive_t cb,
+                                               struct sk_buff **head,
+                                               struct sk_buff *skb)
+{
+       if (unlikely(gro_recursion_inc_test(skb))) {
+               NAPI_GRO_CB(skb)->flush |= 1;
+               return NULL;
+       }
+
+       return cb(head, skb);
+}
+
+typedef struct sk_buff **(*gro_receive_sk_t)(struct sock *, struct sk_buff **,
+                                            struct sk_buff *);
+static inline struct sk_buff **call_gro_receive_sk(gro_receive_sk_t cb,
+                                                  struct sock *sk,
+                                                  struct sk_buff **head,
+                                                  struct sk_buff *skb)
+{
+       if (unlikely(gro_recursion_inc_test(skb))) {
+               NAPI_GRO_CB(skb)->flush |= 1;
+               return NULL;
+       }
+
+       return cb(sk, head, skb);
+}
+
 struct packet_type {
        __be16                  type;   /* This is really htons(ether_type). */
        struct net_device       *dev;   /* NULL is wildcarded here           */
@@ -3241,7 +3254,7 @@ int dev_get_phys_port_id(struct net_device *dev,
 int dev_get_phys_port_name(struct net_device *dev,
                           char *name, size_t len);
 int dev_change_proto_down(struct net_device *dev, bool proto_down);
-int dev_change_xdp_fd(struct net_device *dev, int fd);
+int dev_change_xdp_fd(struct net_device *dev, int fd, u32 flags);
 struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev);
 struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
                                    struct netdev_queue *txq, int *ret);
@@ -3250,6 +3263,21 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
 bool is_skb_forwardable(const struct net_device *dev,
                        const struct sk_buff *skb);
 
+static __always_inline int ____dev_forward_skb(struct net_device *dev,
+                                              struct sk_buff *skb)
+{
+       if (skb_orphan_frags(skb, GFP_ATOMIC) ||
+           unlikely(!is_skb_forwardable(dev, skb))) {
+               atomic_long_inc(&dev->rx_dropped);
+               kfree_skb(skb);
+               return NET_RX_DROP;
+       }
+
+       skb_scrub_packet(skb, true);
+       skb->priority = 0;
+       return 0;
+}
+
 void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev);
 
 extern int             netdev_budget;
@@ -3435,6 +3463,17 @@ static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu)
        txq->xmit_lock_owner = cpu;
 }
 
+static inline bool __netif_tx_acquire(struct netdev_queue *txq)
+{
+       __acquire(&txq->_xmit_lock);
+       return true;
+}
+
+static inline void __netif_tx_release(struct netdev_queue *txq)
+{
+       __release(&txq->_xmit_lock);
+}
+
 static inline void __netif_tx_lock_bh(struct netdev_queue *txq)
 {
        spin_lock_bh(&txq->_xmit_lock);
@@ -3536,17 +3575,21 @@ static inline void netif_tx_unlock_bh(struct net_device *dev)
 #define HARD_TX_LOCK(dev, txq, cpu) {                  \
        if ((dev->features & NETIF_F_LLTX) == 0) {      \
                __netif_tx_lock(txq, cpu);              \
+       } else {                                        \
+               __netif_tx_acquire(txq);                \
        }                                               \
 }
 
 #define HARD_TX_TRYLOCK(dev, txq)                      \
        (((dev->features & NETIF_F_LLTX) == 0) ?        \
                __netif_tx_trylock(txq) :               \
-               true )
+               __netif_tx_acquire(txq))
 
 #define HARD_TX_UNLOCK(dev, txq) {                     \
        if ((dev->features & NETIF_F_LLTX) == 0) {      \
                __netif_tx_unlock(txq);                 \
+       } else {                                        \
+               __netif_tx_release(txq);                \
        }                                               \
 }
 
index abc7fdcb9eb1a204c5a93669e7d17d618151f329..69230140215b6f84a1fc3276bd7d2d7c155eecc8 100644 (file)
@@ -49,13 +49,11 @@ struct sock;
 
 struct nf_hook_state {
        unsigned int hook;
-       int thresh;
        u_int8_t pf;
        struct net_device *in;
        struct net_device *out;
        struct sock *sk;
        struct net *net;
-       struct nf_hook_entry __rcu *hook_entries;
        int (*okfn)(struct net *, struct sock *, struct sk_buff *);
 };
 
@@ -82,9 +80,8 @@ struct nf_hook_entry {
 };
 
 static inline void nf_hook_state_init(struct nf_hook_state *p,
-                                     struct nf_hook_entry *hook_entry,
                                      unsigned int hook,
-                                     int thresh, u_int8_t pf,
+                                     u_int8_t pf,
                                      struct net_device *indev,
                                      struct net_device *outdev,
                                      struct sock *sk,
@@ -92,13 +89,11 @@ static inline void nf_hook_state_init(struct nf_hook_state *p,
                                      int (*okfn)(struct net *, struct sock *, struct sk_buff *))
 {
        p->hook = hook;
-       p->thresh = thresh;
        p->pf = pf;
        p->in = indev;
        p->out = outdev;
        p->sk = sk;
        p->net = net;
-       RCU_INIT_POINTER(p->hook_entries, hook_entry);
        p->okfn = okfn;
 }
 
@@ -152,23 +147,20 @@ void nf_unregister_sockopt(struct nf_sockopt_ops *reg);
 extern struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
 #endif
 
-int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state);
+int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
+                struct nf_hook_entry *entry);
 
 /**
- *     nf_hook_thresh - call a netfilter hook
+ *     nf_hook - call a netfilter hook
  *
  *     Returns 1 if the hook has allowed the packet to pass.  The function
  *     okfn must be invoked by the caller in this case.  Any other return
  *     value indicates the packet has been consumed by the hook.
  */
-static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook,
-                                struct net *net,
-                                struct sock *sk,
-                                struct sk_buff *skb,
-                                struct net_device *indev,
-                                struct net_device *outdev,
-                                int (*okfn)(struct net *, struct sock *, struct sk_buff *),
-                                int thresh)
+static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net,
+                         struct sock *sk, struct sk_buff *skb,
+                         struct net_device *indev, struct net_device *outdev,
+                         int (*okfn)(struct net *, struct sock *, struct sk_buff *))
 {
        struct nf_hook_entry *hook_head;
        int ret = 1;
@@ -185,24 +177,16 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook,
        if (hook_head) {
                struct nf_hook_state state;
 
-               nf_hook_state_init(&state, hook_head, hook, thresh,
-                                  pf, indev, outdev, sk, net, okfn);
+               nf_hook_state_init(&state, hook, pf, indev, outdev,
+                                  sk, net, okfn);
 
-               ret = nf_hook_slow(skb, &state);
+               ret = nf_hook_slow(skb, &state, hook_head);
        }
        rcu_read_unlock();
 
        return ret;
 }
 
-static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net,
-                         struct sock *sk, struct sk_buff *skb,
-                         struct net_device *indev, struct net_device *outdev,
-                         int (*okfn)(struct net *, struct sock *, struct sk_buff *))
-{
-       return nf_hook_thresh(pf, hook, net, sk, skb, indev, outdev, okfn, INT_MIN);
-}
-                   
 /* Activate hook; either okfn or kfree_skb called, unless a hook
    returns NF_STOLEN (in which case, it's up to the hook to deal with
    the consequences).
@@ -220,19 +204,6 @@ static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net,
    coders :)
 */
 
-static inline int
-NF_HOOK_THRESH(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk,
-              struct sk_buff *skb, struct net_device *in,
-              struct net_device *out,
-              int (*okfn)(struct net *, struct sock *, struct sk_buff *),
-              int thresh)
-{
-       int ret = nf_hook_thresh(pf, hook, net, sk, skb, in, out, okfn, thresh);
-       if (ret == 1)
-               ret = okfn(net, sk, skb);
-       return ret;
-}
-
 static inline int
 NF_HOOK_COND(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk,
             struct sk_buff *skb, struct net_device *in, struct net_device *out,
@@ -242,7 +213,7 @@ NF_HOOK_COND(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk,
        int ret;
 
        if (!cond ||
-           ((ret = nf_hook_thresh(pf, hook, net, sk, skb, in, out, okfn, INT_MIN)) == 1))
+           ((ret = nf_hook(pf, hook, net, sk, skb, in, out, okfn)) == 1))
                ret = okfn(net, sk, skb);
        return ret;
 }
@@ -252,7 +223,10 @@ NF_HOOK(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, struct
        struct net_device *in, struct net_device *out,
        int (*okfn)(struct net *, struct sock *, struct sk_buff *))
 {
-       return NF_HOOK_THRESH(pf, hook, net, sk, skb, in, out, okfn, INT_MIN);
+       int ret = nf_hook(pf, hook, net, sk, skb, in, out, okfn);
+       if (ret == 1)
+               ret = okfn(net, sk, skb);
+       return ret;
 }
 
 /* Call setsockopt() */
index 83b9a2e0d8d46a64dfe0faeda4b67ea6ce972677..8e42253e5d4dec83294a629290c12317b1b4e81a 100644 (file)
@@ -79,10 +79,12 @@ enum ip_set_ext_id {
        IPSET_EXT_ID_MAX,
 };
 
+struct ip_set;
+
 /* Extension type */
 struct ip_set_ext_type {
        /* Destroy extension private data (can be NULL) */
-       void (*destroy)(void *ext);
+       void (*destroy)(struct ip_set *set, void *ext);
        enum ip_set_extension type;
        enum ipset_cadt_flags flag;
        /* Size and minimal alignment */
@@ -92,17 +94,6 @@ struct ip_set_ext_type {
 
 extern const struct ip_set_ext_type ip_set_extensions[];
 
-struct ip_set_ext {
-       u64 packets;
-       u64 bytes;
-       u32 timeout;
-       u32 skbmark;
-       u32 skbmarkmask;
-       u32 skbprio;
-       u16 skbqueue;
-       char *comment;
-};
-
 struct ip_set_counter {
        atomic64_t bytes;
        atomic64_t packets;
@@ -122,6 +113,15 @@ struct ip_set_skbinfo {
        u32 skbmarkmask;
        u32 skbprio;
        u16 skbqueue;
+       u16 __pad;
+};
+
+struct ip_set_ext {
+       struct ip_set_skbinfo skbinfo;
+       u64 packets;
+       u64 bytes;
+       char *comment;
+       u32 timeout;
 };
 
 struct ip_set;
@@ -252,6 +252,10 @@ struct ip_set {
        u8 flags;
        /* Default timeout value, if enabled */
        u32 timeout;
+       /* Number of elements (vs timeout) */
+       u32 elements;
+       /* Size of the dynamic extensions (vs timeout) */
+       size_t ext_size;
        /* Element data size */
        size_t dsize;
        /* Offsets to extensions in elements */
@@ -268,7 +272,7 @@ ip_set_ext_destroy(struct ip_set *set, void *data)
         */
        if (SET_WITH_COMMENT(set))
                ip_set_extensions[IPSET_EXT_ID_COMMENT].destroy(
-                       ext_comment(data, set));
+                       set, ext_comment(data, set));
 }
 
 static inline int
@@ -294,104 +298,6 @@ ip_set_put_flags(struct sk_buff *skb, struct ip_set *set)
        return nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(cadt_flags));
 }
 
-static inline void
-ip_set_add_bytes(u64 bytes, struct ip_set_counter *counter)
-{
-       atomic64_add((long long)bytes, &(counter)->bytes);
-}
-
-static inline void
-ip_set_add_packets(u64 packets, struct ip_set_counter *counter)
-{
-       atomic64_add((long long)packets, &(counter)->packets);
-}
-
-static inline u64
-ip_set_get_bytes(const struct ip_set_counter *counter)
-{
-       return (u64)atomic64_read(&(counter)->bytes);
-}
-
-static inline u64
-ip_set_get_packets(const struct ip_set_counter *counter)
-{
-       return (u64)atomic64_read(&(counter)->packets);
-}
-
-static inline void
-ip_set_update_counter(struct ip_set_counter *counter,
-                     const struct ip_set_ext *ext,
-                     struct ip_set_ext *mext, u32 flags)
-{
-       if (ext->packets != ULLONG_MAX &&
-           !(flags & IPSET_FLAG_SKIP_COUNTER_UPDATE)) {
-               ip_set_add_bytes(ext->bytes, counter);
-               ip_set_add_packets(ext->packets, counter);
-       }
-       if (flags & IPSET_FLAG_MATCH_COUNTERS) {
-               mext->packets = ip_set_get_packets(counter);
-               mext->bytes = ip_set_get_bytes(counter);
-       }
-}
-
-static inline void
-ip_set_get_skbinfo(struct ip_set_skbinfo *skbinfo,
-                     const struct ip_set_ext *ext,
-                     struct ip_set_ext *mext, u32 flags)
-{
-               mext->skbmark = skbinfo->skbmark;
-               mext->skbmarkmask = skbinfo->skbmarkmask;
-               mext->skbprio = skbinfo->skbprio;
-               mext->skbqueue = skbinfo->skbqueue;
-}
-static inline bool
-ip_set_put_skbinfo(struct sk_buff *skb, struct ip_set_skbinfo *skbinfo)
-{
-       /* Send nonzero parameters only */
-       return ((skbinfo->skbmark || skbinfo->skbmarkmask) &&
-               nla_put_net64(skb, IPSET_ATTR_SKBMARK,
-                             cpu_to_be64((u64)skbinfo->skbmark << 32 |
-                                         skbinfo->skbmarkmask),
-                             IPSET_ATTR_PAD)) ||
-              (skbinfo->skbprio &&
-               nla_put_net32(skb, IPSET_ATTR_SKBPRIO,
-                             cpu_to_be32(skbinfo->skbprio))) ||
-              (skbinfo->skbqueue &&
-               nla_put_net16(skb, IPSET_ATTR_SKBQUEUE,
-                            cpu_to_be16(skbinfo->skbqueue)));
-}
-
-static inline void
-ip_set_init_skbinfo(struct ip_set_skbinfo *skbinfo,
-                   const struct ip_set_ext *ext)
-{
-       skbinfo->skbmark = ext->skbmark;
-       skbinfo->skbmarkmask = ext->skbmarkmask;
-       skbinfo->skbprio = ext->skbprio;
-       skbinfo->skbqueue = ext->skbqueue;
-}
-
-static inline bool
-ip_set_put_counter(struct sk_buff *skb, struct ip_set_counter *counter)
-{
-       return nla_put_net64(skb, IPSET_ATTR_BYTES,
-                            cpu_to_be64(ip_set_get_bytes(counter)),
-                            IPSET_ATTR_PAD) ||
-              nla_put_net64(skb, IPSET_ATTR_PACKETS,
-                            cpu_to_be64(ip_set_get_packets(counter)),
-                            IPSET_ATTR_PAD);
-}
-
-static inline void
-ip_set_init_counter(struct ip_set_counter *counter,
-                   const struct ip_set_ext *ext)
-{
-       if (ext->bytes != ULLONG_MAX)
-               atomic64_set(&(counter)->bytes, (long long)(ext->bytes));
-       if (ext->packets != ULLONG_MAX)
-               atomic64_set(&(counter)->packets, (long long)(ext->packets));
-}
-
 /* Netlink CB args */
 enum {
        IPSET_CB_NET = 0,       /* net namespace */
@@ -431,6 +337,8 @@ extern size_t ip_set_elem_len(struct ip_set *set, struct nlattr *tb[],
                              size_t len, size_t align);
 extern int ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[],
                                 struct ip_set_ext *ext);
+extern int ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set,
+                                const void *e, bool active);
 
 static inline int
 ip_set_get_hostipaddr4(struct nlattr *nla, u32 *ipaddr)
@@ -546,10 +454,8 @@ bitmap_bytes(u32 a, u32 b)
 
 #include <linux/netfilter/ipset/ip_set_timeout.h>
 #include <linux/netfilter/ipset/ip_set_comment.h>
-
-int
-ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set,
-                     const void *e, bool active);
+#include <linux/netfilter/ipset/ip_set_counter.h>
+#include <linux/netfilter/ipset/ip_set_skbinfo.h>
 
 #define IP_SET_INIT_KEXT(skb, opt, set)                        \
        { .bytes = (skb)->len, .packets = 1,            \
index 5e4662a71e011d5dc5dbfa222df54c35d293ed26..366d6c0ea04fe5c028fbda7413145d9f4d34b7d6 100644 (file)
@@ -6,8 +6,8 @@
 #define IPSET_BITMAP_MAX_RANGE 0x0000FFFF
 
 enum {
+       IPSET_ADD_STORE_PLAIN_TIMEOUT = -1,
        IPSET_ADD_FAILED = 1,
-       IPSET_ADD_STORE_PLAIN_TIMEOUT,
        IPSET_ADD_START_STORED_TIMEOUT,
 };
 
index 8d024852595704fa67b0d5588c76fcca3ef67b9d..8e2bab1e8e90930f954ec7dc3a1b7a8179eecd13 100644 (file)
@@ -20,13 +20,14 @@ ip_set_comment_uget(struct nlattr *tb)
  * The kadt functions don't use the comment extensions in any way.
  */
 static inline void
-ip_set_init_comment(struct ip_set_comment *comment,
+ip_set_init_comment(struct ip_set *set, struct ip_set_comment *comment,
                    const struct ip_set_ext *ext)
 {
        struct ip_set_comment_rcu *c = rcu_dereference_protected(comment->c, 1);
        size_t len = ext->comment ? strlen(ext->comment) : 0;
 
        if (unlikely(c)) {
+               set->ext_size -= sizeof(*c) + strlen(c->str) + 1;
                kfree_rcu(c, rcu);
                rcu_assign_pointer(comment->c, NULL);
        }
@@ -34,16 +35,17 @@ ip_set_init_comment(struct ip_set_comment *comment,
                return;
        if (unlikely(len > IPSET_MAX_COMMENT_SIZE))
                len = IPSET_MAX_COMMENT_SIZE;
-       c = kzalloc(sizeof(*c) + len + 1, GFP_ATOMIC);
+       c = kmalloc(sizeof(*c) + len + 1, GFP_ATOMIC);
        if (unlikely(!c))
                return;
        strlcpy(c->str, ext->comment, len + 1);
+       set->ext_size += sizeof(*c) + strlen(c->str) + 1;
        rcu_assign_pointer(comment->c, c);
 }
 
 /* Used only when dumping a set, protected by rcu_read_lock_bh() */
 static inline int
-ip_set_put_comment(struct sk_buff *skb, struct ip_set_comment *comment)
+ip_set_put_comment(struct sk_buff *skb, const struct ip_set_comment *comment)
 {
        struct ip_set_comment_rcu *c = rcu_dereference_bh(comment->c);
 
@@ -58,13 +60,14 @@ ip_set_put_comment(struct sk_buff *skb, struct ip_set_comment *comment)
  * of the set data anymore.
  */
 static inline void
-ip_set_comment_free(struct ip_set_comment *comment)
+ip_set_comment_free(struct ip_set *set, struct ip_set_comment *comment)
 {
        struct ip_set_comment_rcu *c;
 
        c = rcu_dereference_protected(comment->c, 1);
        if (unlikely(!c))
                return;
+       set->ext_size -= sizeof(*c) + strlen(c->str) + 1;
        kfree_rcu(c, rcu);
        rcu_assign_pointer(comment->c, NULL);
 }
diff --git a/include/linux/netfilter/ipset/ip_set_counter.h b/include/linux/netfilter/ipset/ip_set_counter.h
new file mode 100644 (file)
index 0000000..bb6fba4
--- /dev/null
@@ -0,0 +1,75 @@
+#ifndef _IP_SET_COUNTER_H
+#define _IP_SET_COUNTER_H
+
+/* Copyright (C) 2015 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifdef __KERNEL__
+
+static inline void
+ip_set_add_bytes(u64 bytes, struct ip_set_counter *counter)
+{
+       atomic64_add((long long)bytes, &(counter)->bytes);
+}
+
+static inline void
+ip_set_add_packets(u64 packets, struct ip_set_counter *counter)
+{
+       atomic64_add((long long)packets, &(counter)->packets);
+}
+
+static inline u64
+ip_set_get_bytes(const struct ip_set_counter *counter)
+{
+       return (u64)atomic64_read(&(counter)->bytes);
+}
+
+static inline u64
+ip_set_get_packets(const struct ip_set_counter *counter)
+{
+       return (u64)atomic64_read(&(counter)->packets);
+}
+
+static inline void
+ip_set_update_counter(struct ip_set_counter *counter,
+                     const struct ip_set_ext *ext,
+                     struct ip_set_ext *mext, u32 flags)
+{
+       if (ext->packets != ULLONG_MAX &&
+           !(flags & IPSET_FLAG_SKIP_COUNTER_UPDATE)) {
+               ip_set_add_bytes(ext->bytes, counter);
+               ip_set_add_packets(ext->packets, counter);
+       }
+       if (flags & IPSET_FLAG_MATCH_COUNTERS) {
+               mext->packets = ip_set_get_packets(counter);
+               mext->bytes = ip_set_get_bytes(counter);
+       }
+}
+
+static inline bool
+ip_set_put_counter(struct sk_buff *skb, const struct ip_set_counter *counter)
+{
+       return nla_put_net64(skb, IPSET_ATTR_BYTES,
+                            cpu_to_be64(ip_set_get_bytes(counter)),
+                            IPSET_ATTR_PAD) ||
+              nla_put_net64(skb, IPSET_ATTR_PACKETS,
+                            cpu_to_be64(ip_set_get_packets(counter)),
+                            IPSET_ATTR_PAD);
+}
+
+static inline void
+ip_set_init_counter(struct ip_set_counter *counter,
+                   const struct ip_set_ext *ext)
+{
+       if (ext->bytes != ULLONG_MAX)
+               atomic64_set(&(counter)->bytes, (long long)(ext->bytes));
+       if (ext->packets != ULLONG_MAX)
+               atomic64_set(&(counter)->packets, (long long)(ext->packets));
+}
+
+#endif /* __KERNEL__ */
+#endif /* _IP_SET_COUNTER_H */
diff --git a/include/linux/netfilter/ipset/ip_set_skbinfo.h b/include/linux/netfilter/ipset/ip_set_skbinfo.h
new file mode 100644 (file)
index 0000000..29d7ef2
--- /dev/null
@@ -0,0 +1,46 @@
+#ifndef _IP_SET_SKBINFO_H
+#define _IP_SET_SKBINFO_H
+
+/* Copyright (C) 2015 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifdef __KERNEL__
+
+static inline void
+ip_set_get_skbinfo(struct ip_set_skbinfo *skbinfo,
+                  const struct ip_set_ext *ext,
+                  struct ip_set_ext *mext, u32 flags)
+{
+       mext->skbinfo = *skbinfo;
+}
+
+static inline bool
+ip_set_put_skbinfo(struct sk_buff *skb, const struct ip_set_skbinfo *skbinfo)
+{
+       /* Send nonzero parameters only */
+       return ((skbinfo->skbmark || skbinfo->skbmarkmask) &&
+               nla_put_net64(skb, IPSET_ATTR_SKBMARK,
+                             cpu_to_be64((u64)skbinfo->skbmark << 32 |
+                                         skbinfo->skbmarkmask),
+                             IPSET_ATTR_PAD)) ||
+              (skbinfo->skbprio &&
+               nla_put_net32(skb, IPSET_ATTR_SKBPRIO,
+                             cpu_to_be32(skbinfo->skbprio))) ||
+              (skbinfo->skbqueue &&
+               nla_put_net16(skb, IPSET_ATTR_SKBQUEUE,
+                             cpu_to_be16(skbinfo->skbqueue)));
+}
+
+static inline void
+ip_set_init_skbinfo(struct ip_set_skbinfo *skbinfo,
+                   const struct ip_set_ext *ext)
+{
+       *skbinfo = ext->skbinfo;
+}
+
+#endif /* __KERNEL__ */
+#endif /* _IP_SET_SKBINFO_H */
index 1d6a935c1ac5f4becf782394a4c8e03f7a913eb8..bfb3531fd88a4f7811e6ef9fffbaff672dfa6c53 100644 (file)
@@ -40,7 +40,7 @@ ip_set_timeout_uget(struct nlattr *tb)
 }
 
 static inline bool
-ip_set_timeout_expired(unsigned long *t)
+ip_set_timeout_expired(const unsigned long *t)
 {
        return *t != IPSET_ELEM_PERMANENT && time_is_before_jiffies(*t);
 }
@@ -63,7 +63,7 @@ ip_set_timeout_set(unsigned long *timeout, u32 value)
 }
 
 static inline u32
-ip_set_timeout_get(unsigned long *timeout)
+ip_set_timeout_get(const unsigned long *timeout)
 {
        return *timeout == IPSET_ELEM_PERMANENT ? 0 :
                jiffies_to_msecs(*timeout - jiffies)/MSEC_PER_SEC;
index 2ad1a2b289b5a57d25a3835e97302dd0db774f90..cd4eaf8df445fe4ecb699c07b170b1af778748c6 100644 (file)
@@ -4,6 +4,7 @@
 
 #include <linux/netdevice.h>
 #include <linux/static_key.h>
+#include <linux/netfilter.h>
 #include <uapi/linux/netfilter/x_tables.h>
 
 /* Test a struct->invflags and a boolean for inequality */
  * @target:    the target extension
  * @matchinfo: per-match data
  * @targetinfo:        per-target data
- * @net                network namespace through which the action was invoked
- * @in:                input netdevice
- * @out:       output netdevice
+ * @state:     pointer to hook state this packet came from
  * @fragoff:   packet is a fragment, this is the data offset
  * @thoff:     position of transport header relative to skb->data
- * @hook:      hook number given packet came from
- * @family:    Actual NFPROTO_* through which the function is invoked
- *             (helpful when match->family == NFPROTO_UNSPEC)
  *
  * Fields written to by extensions:
  *
@@ -38,15 +34,47 @@ struct xt_action_param {
        union {
                const void *matchinfo, *targinfo;
        };
-       struct net *net;
-       const struct net_device *in, *out;
+       const struct nf_hook_state *state;
        int fragoff;
        unsigned int thoff;
-       unsigned int hooknum;
-       u_int8_t family;
        bool hotdrop;
 };
 
+static inline struct net *xt_net(const struct xt_action_param *par)
+{
+       return par->state->net;
+}
+
+static inline struct net_device *xt_in(const struct xt_action_param *par)
+{
+       return par->state->in;
+}
+
+static inline const char *xt_inname(const struct xt_action_param *par)
+{
+       return par->state->in->name;
+}
+
+static inline struct net_device *xt_out(const struct xt_action_param *par)
+{
+       return par->state->out;
+}
+
+static inline const char *xt_outname(const struct xt_action_param *par)
+{
+       return par->state->out->name;
+}
+
+static inline unsigned int xt_hooknum(const struct xt_action_param *par)
+{
+       return par->state->hook;
+}
+
+static inline u_int8_t xt_family(const struct xt_action_param *par)
+{
+       return par->state->pf;
+}
+
 /**
  * struct xt_mtchk_param - parameters for match extensions'
  * checkentry functions
index 33e37fb41d5d40df1779860e6a4f17ab6a47ef6f..2dc3b49b804a32642b7b03827f57b0755c6fda36 100644 (file)
@@ -26,10 +26,10 @@ static inline int nf_hook_ingress(struct sk_buff *skb)
        if (unlikely(!e))
                return 0;
 
-       nf_hook_state_init(&state, e, NF_NETDEV_INGRESS, INT_MIN,
+       nf_hook_state_init(&state, NF_NETDEV_INGRESS,
                           NFPROTO_NETDEV, skb->dev, NULL, NULL,
                           dev_net(skb->dev), NULL);
-       return nf_hook_slow(skb, &state);
+       return nf_hook_slow(skb, &state, e);
 }
 
 static inline void nf_hook_ingress_init(struct net_device *dev)
index b25ee9ffdbe67e06a5c70305360cceea3de43aef..1828900c94118ac959168873a91a6dcd4cb8d4cf 100644 (file)
@@ -78,8 +78,11 @@ static inline void *netpoll_poll_lock(struct napi_struct *napi)
        struct net_device *dev = napi->dev;
 
        if (dev && dev->npinfo) {
-               spin_lock(&napi->poll_lock);
-               napi->poll_owner = smp_processor_id();
+               int owner = smp_processor_id();
+
+               while (cmpxchg(&napi->poll_owner, -1, owner) != -1)
+                       cpu_relax();
+
                return napi;
        }
        return NULL;
@@ -89,10 +92,8 @@ static inline void netpoll_poll_unlock(void *have)
 {
        struct napi_struct *napi = have;
 
-       if (napi) {
-               napi->poll_owner = -1;
-               spin_unlock(&napi->poll_lock);
-       }
+       if (napi)
+               smp_store_release(&napi->poll_owner, -1);
 }
 
 static inline bool netpoll_tx_running(struct net_device *dev)
index 7676557ce357d682c3c47f0599e66bdd8a42225f..fc3c2420659395039be1288a82e8d62d98aefab3 100644 (file)
@@ -16,7 +16,6 @@
 #define _LINUX_NVME_H
 
 #include <linux/types.h>
-#include <linux/uuid.h>
 
 /* NQN names in commands fields specified one size */
 #define NVMF_NQN_FIELD_LEN     256
@@ -182,7 +181,7 @@ struct nvme_id_ctrl {
        char                    fr[8];
        __u8                    rab;
        __u8                    ieee[3];
-       __u8                    mic;
+       __u8                    cmic;
        __u8                    mdts;
        __le16                  cntlid;
        __le32                  ver;
@@ -202,7 +201,13 @@ struct nvme_id_ctrl {
        __u8                    apsta;
        __le16                  wctemp;
        __le16                  cctemp;
-       __u8                    rsvd270[50];
+       __le16                  mtfa;
+       __le32                  hmpre;
+       __le32                  hmmin;
+       __u8                    tnvmcap[16];
+       __u8                    unvmcap[16];
+       __le32                  rpmbs;
+       __u8                    rsvd316[4];
        __le16                  kas;
        __u8                    rsvd322[190];
        __u8                    sqes;
@@ -267,7 +272,7 @@ struct nvme_id_ns {
        __le16                  nabo;
        __le16                  nabspf;
        __u16                   rsvd46;
-       __le64                  nvmcap[2];
+       __u8                    nvmcap[16];
        __u8                    rsvd64[40];
        __u8                    nguid[16];
        __u8                    eui64[8];
@@ -276,6 +281,16 @@ struct nvme_id_ns {
        __u8                    vs[3712];
 };
 
+enum {
+       NVME_ID_CNS_NS                  = 0x00,
+       NVME_ID_CNS_CTRL                = 0x01,
+       NVME_ID_CNS_NS_ACTIVE_LIST      = 0x02,
+       NVME_ID_CNS_NS_PRESENT_LIST     = 0x10,
+       NVME_ID_CNS_NS_PRESENT          = 0x11,
+       NVME_ID_CNS_CTRL_NS_LIST        = 0x12,
+       NVME_ID_CNS_CTRL_LIST           = 0x13,
+};
+
 enum {
        NVME_NS_FEAT_THIN       = 1 << 0,
        NVME_NS_FLBAS_LBA_MASK  = 0xf,
@@ -556,8 +571,10 @@ enum nvme_admin_opcode {
        nvme_admin_set_features         = 0x09,
        nvme_admin_get_features         = 0x0a,
        nvme_admin_async_event          = 0x0c,
+       nvme_admin_ns_mgmt              = 0x0d,
        nvme_admin_activate_fw          = 0x10,
        nvme_admin_download_fw          = 0x11,
+       nvme_admin_ns_attach            = 0x15,
        nvme_admin_keep_alive           = 0x18,
        nvme_admin_format_nvm           = 0x80,
        nvme_admin_security_send        = 0x81,
@@ -583,6 +600,7 @@ enum {
        NVME_FEAT_WRITE_ATOMIC  = 0x0a,
        NVME_FEAT_ASYNC_EVENT   = 0x0b,
        NVME_FEAT_AUTO_PST      = 0x0c,
+       NVME_FEAT_HOST_MEM_BUF  = 0x0d,
        NVME_FEAT_KATO          = 0x0f,
        NVME_FEAT_SW_PROGRESS   = 0x80,
        NVME_FEAT_HOST_ID       = 0x81,
@@ -745,7 +763,7 @@ struct nvmf_common_command {
 struct nvmf_disc_rsp_page_entry {
        __u8            trtype;
        __u8            adrfam;
-       __u8            nqntype;
+       __u8            subtype;
        __u8            treq;
        __le16          portid;
        __le16          cntlid;
@@ -794,7 +812,7 @@ struct nvmf_connect_command {
 };
 
 struct nvmf_connect_data {
-       uuid_be         hostid;
+       __u8            hostid[16];
        __le16          cntlid;
        char            resv4[238];
        char            subsysnqn[NVMF_NQN_FIELD_LEN];
@@ -905,12 +923,23 @@ enum {
        NVME_SC_INVALID_VECTOR          = 0x108,
        NVME_SC_INVALID_LOG_PAGE        = 0x109,
        NVME_SC_INVALID_FORMAT          = 0x10a,
-       NVME_SC_FIRMWARE_NEEDS_RESET    = 0x10b,
+       NVME_SC_FW_NEEDS_CONV_RESET     = 0x10b,
        NVME_SC_INVALID_QUEUE           = 0x10c,
        NVME_SC_FEATURE_NOT_SAVEABLE    = 0x10d,
        NVME_SC_FEATURE_NOT_CHANGEABLE  = 0x10e,
        NVME_SC_FEATURE_NOT_PER_NS      = 0x10f,
-       NVME_SC_FW_NEEDS_RESET_SUBSYS   = 0x110,
+       NVME_SC_FW_NEEDS_SUBSYS_RESET   = 0x110,
+       NVME_SC_FW_NEEDS_RESET          = 0x111,
+       NVME_SC_FW_NEEDS_MAX_TIME       = 0x112,
+       NVME_SC_FW_ACIVATE_PROHIBITED   = 0x113,
+       NVME_SC_OVERLAPPING_RANGE       = 0x114,
+       NVME_SC_NS_INSUFFICENT_CAP      = 0x115,
+       NVME_SC_NS_ID_UNAVAILABLE       = 0x116,
+       NVME_SC_NS_ALREADY_ATTACHED     = 0x118,
+       NVME_SC_NS_IS_PRIVATE           = 0x119,
+       NVME_SC_NS_NOT_ATTACHED         = 0x11a,
+       NVME_SC_THIN_PROV_NOT_SUPP      = 0x11b,
+       NVME_SC_CTRL_LIST_INVALID       = 0x11c,
 
        /*
         * I/O Command Set Specific - NVM commands:
@@ -941,6 +970,7 @@ enum {
        NVME_SC_REFTAG_CHECK            = 0x284,
        NVME_SC_COMPARE_FAILED          = 0x285,
        NVME_SC_ACCESS_DENIED           = 0x286,
+       NVME_SC_UNWRITTEN_BLOCK         = 0x287,
 
        NVME_SC_DNR                     = 0x4000,
 };
@@ -960,6 +990,7 @@ struct nvme_completion {
        __le16  status;         /* did the command fail, and if so, why? */
 };
 
-#define NVME_VS(major, minor) (((major) << 16) | ((minor) << 8))
+#define NVME_VS(major, minor, tertiary) \
+       (((major) << 16) | ((minor) << 8) | (tertiary))
 
 #endif /* _LINUX_NVME_H */
index 060d0ede88df6dfc34fbfcd1e60629d8dce5373d..4741ecdb981743151b70afff63b10740dfaa4132 100644 (file)
@@ -1257,6 +1257,7 @@ extern u64 perf_swevent_set_period(struct perf_event *event);
 extern void perf_event_enable(struct perf_event *event);
 extern void perf_event_disable(struct perf_event *event);
 extern void perf_event_disable_local(struct perf_event *event);
+extern void perf_event_disable_inatomic(struct perf_event *event);
 extern void perf_event_task_tick(void);
 #else /* !CONFIG_PERF_EVENTS: */
 static inline void *
index e7e1fd3825646090ee3e4eeaae13d3f43d3cf2f2..feb8a98e8dd3bf71618b6edbcb6324c92972baa8 100644 (file)
@@ -417,6 +417,9 @@ struct phy_device {
        u32 advertising;
        u32 lp_advertising;
 
+       /* Energy efficient ethernet modes which should be prohibited */
+       u32 eee_broken_modes;
+
        int autoneg;
 
        int link_timeout;
@@ -447,6 +450,7 @@ struct phy_device {
        struct net_device *attached_dev;
 
        u8 mdix;
+       u8 mdix_ctrl;
 
        void (*adjust_link)(struct net_device *dev);
 };
@@ -611,6 +615,13 @@ struct phy_driver {
        void (*get_strings)(struct phy_device *dev, u8 *data);
        void (*get_stats)(struct phy_device *dev,
                          struct ethtool_stats *stats, u64 *data);
+
+       /* Get and Set PHY tunables */
+       int (*get_tunable)(struct phy_device *dev,
+                          struct ethtool_tunable *tuna, void *data);
+       int (*set_tunable)(struct phy_device *dev,
+                           struct ethtool_tunable *tuna,
+                           const void *data);
 };
 #define to_phy_driver(d) container_of(to_mdio_common_driver(d),                \
                                      struct phy_driver, mdiodrv)
@@ -786,6 +797,7 @@ void phy_detach(struct phy_device *phydev);
 void phy_start(struct phy_device *phydev);
 void phy_stop(struct phy_device *phydev);
 int phy_start_aneg(struct phy_device *phydev);
+int phy_aneg_done(struct phy_device *phydev);
 
 int phy_stop_interrupts(struct phy_device *phydev);
 
@@ -859,6 +871,7 @@ int phy_ethtool_get_link_ksettings(struct net_device *ndev,
                                   struct ethtool_link_ksettings *cmd);
 int phy_ethtool_set_link_ksettings(struct net_device *ndev,
                                   const struct ethtool_link_ksettings *cmd);
+int phy_ethtool_nway_reset(struct net_device *ndev);
 
 int __init mdio_bus_init(void);
 void mdio_bus_exit(void);
index ee1bed7dbfc634c5e490e00b694b5b170087decd..78bb0d7f6b11ac0a78e02eb35d12c9cbc0545a53 100644 (file)
@@ -253,6 +253,13 @@ static inline int phy_set_mode(struct phy *phy, enum phy_mode mode)
        return -ENOSYS;
 }
 
+static inline int phy_reset(struct phy *phy)
+{
+       if (!phy)
+               return 0;
+       return -ENOSYS;
+}
+
 static inline int phy_get_bus_width(struct phy *phy)
 {
        return -ENOSYS;
index e1d756f813487eed56cd86f974e8e5a08e2cb0a3..0e81b2778ae0e15790cce8308a6aba2c84bf514a 100644 (file)
@@ -1,6 +1,7 @@
 #ifndef __LINUX_PIM_H
 #define __LINUX_PIM_H
 
+#include <linux/skbuff.h>
 #include <asm/byteorder.h>
 
 /* Message types - V1 */
 
 /* Message types - V2 */
 #define PIM_VERSION            2
-#define PIM_REGISTER           1
+
+/* RFC7761, sec 4.9:
+ *  Type
+ *        Types for specific PIM messages.  PIM Types are:
+ *
+ *  Message Type                          Destination
+ *  ---------------------------------------------------------------------
+ *  0 = Hello                             Multicast to ALL-PIM-ROUTERS
+ *  1 = Register                          Unicast to RP
+ *  2 = Register-Stop                     Unicast to source of Register
+ *                                        packet
+ *  3 = Join/Prune                        Multicast to ALL-PIM-ROUTERS
+ *  4 = Bootstrap                         Multicast to ALL-PIM-ROUTERS
+ *  5 = Assert                            Multicast to ALL-PIM-ROUTERS
+ *  6 = Graft (used in PIM-DM only)       Unicast to RPF'(S)
+ *  7 = Graft-Ack (used in PIM-DM only)   Unicast to source of Graft
+ *                                        packet
+ *  8 = Candidate-RP-Advertisement        Unicast to Domain's BSR
+ */
+enum {
+       PIM_TYPE_HELLO,
+       PIM_TYPE_REGISTER,
+       PIM_TYPE_REGISTER_STOP,
+       PIM_TYPE_JOIN_PRUNE,
+       PIM_TYPE_BOOTSTRAP,
+       PIM_TYPE_ASSERT,
+       PIM_TYPE_GRAFT,
+       PIM_TYPE_GRAFT_ACK,
+       PIM_TYPE_CANDIDATE_RP_ADV
+};
 
 #define PIM_NULL_REGISTER      cpu_to_be32(0x40000000)
 
-static inline bool ipmr_pimsm_enabled(void)
-{
-       return IS_BUILTIN(CONFIG_IP_PIMSM_V1) || IS_BUILTIN(CONFIG_IP_PIMSM_V2);
-}
+/* RFC7761, sec 4.9:
+ * The PIM header common to all PIM messages is:
+ *   0                   1                   2                   3
+ *   0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ *  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *  |PIM Ver| Type  |   Reserved    |           Checksum            |
+ *  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+struct pimhdr {
+       __u8    type;
+       __u8    reserved;
+       __be16  csum;
+};
 
 /* PIMv2 register message header layout (ietf-draft-idmr-pimvsm-v2-00.ps */
-struct pimreghdr
-{
+struct pimreghdr {
        __u8    type;
        __u8    reserved;
        __be16  csum;
        __be32  flags;
 };
 
-struct sk_buff;
-extern int pim_rcv_v1(struct sk_buff *);
+int pim_rcv_v1(struct sk_buff *skb);
+
+static inline bool ipmr_pimsm_enabled(void)
+{
+       return IS_BUILTIN(CONFIG_IP_PIMSM_V1) || IS_BUILTIN(CONFIG_IP_PIMSM_V2);
+}
+
+static inline struct pimhdr *pim_hdr(const struct sk_buff *skb)
+{
+       return (struct pimhdr *)skb_transport_header(skb);
+}
+
+static inline u8 pim_hdr_version(const struct pimhdr *pimhdr)
+{
+       return pimhdr->type >> 4;
+}
+
+static inline u8 pim_hdr_type(const struct pimhdr *pimhdr)
+{
+       return pimhdr->type & 0xf;
+}
+
+/* check if the address is 224.0.0.13, RFC7761 sec 4.3.1 */
+static inline bool pim_ipv4_all_pim_routers(__be32 addr)
+{
+       return addr == htonl(0xE000000D);
+}
 #endif
index e4c08c1ff0c5906786f549c36fbc9b060f323e58..a1bacf1150b2146e6cf560740459edf08f0c18a5 100644 (file)
@@ -25,7 +25,6 @@ static inline int mm_pkey_alloc(struct mm_struct *mm)
 
 static inline int mm_pkey_free(struct mm_struct *mm, int pkey)
 {
-       WARN_ONCE(1, "free of protection key when disabled");
        return -EINVAL;
 }
 
index b97bf2ef996ef3e9e0cf906280741f20365d1455..368c7ad06ae5b173d9cbe866de539d358e42b516 100644 (file)
@@ -82,4 +82,8 @@ static inline struct proc_dir_entry *proc_net_mkdir(
        return proc_mkdir_data(name, 0, parent, net);
 }
 
+struct ns_common;
+int open_related_ns(struct ns_common *ns,
+                  struct ns_common *(*get_ns)(struct ns_common *ns));
+
 #endif /* _LINUX_PROC_FS_H */
index 5ad54fc66cf0f589aa9eca3275683e56e7650107..b76d47aba564ff0936779226ec1e1b7773f90582 100644 (file)
@@ -58,7 +58,14 @@ struct system_device_crosststamp;
  *
  * clock operations
  *
+ * @adjfine:  Adjusts the frequency of the hardware clock.
+ *            parameter scaled_ppm: Desired frequency offset from
+ *            nominal frequency in parts per million, but with a
+ *            16 bit binary fractional field.
+ *
  * @adjfreq:  Adjusts the frequency of the hardware clock.
+ *            This method is deprecated.  New drivers should implement
+ *            the @adjfine method instead.
  *            parameter delta: Desired frequency offset from nominal frequency
  *            in parts per billion
  *
@@ -108,6 +115,7 @@ struct ptp_clock_info {
        int n_pins;
        int pps;
        struct ptp_pin_desc *pin_config;
+       int (*adjfine)(struct ptp_clock_info *ptp, long scaled_ppm);
        int (*adjfreq)(struct ptp_clock_info *ptp, s32 delta);
        int (*adjtime)(struct ptp_clock_info *ptp, s64 delta);
        int (*gettime64)(struct ptp_clock_info *ptp, struct timespec64 *ts);
index 72d88cf3ca25b70fe160fa1d250803f78ee2a1ee..37dfba101c6cd8e5f05ca4fda6db321900dd9ccc 100644 (file)
@@ -56,23 +56,6 @@ struct qed_chain_pbl_u32 {
        u32 cons_page_idx;
 };
 
-struct qed_chain_pbl {
-       /* Base address of a pre-allocated buffer for pbl */
-       dma_addr_t      p_phys_table;
-       void            *p_virt_table;
-
-       /* Table for keeping the virtual addresses of the chain pages,
-        * respectively to the physical addresses in the pbl table.
-        */
-       void **pp_virt_addr_tbl;
-
-       /* Index to current used page by producer/consumer */
-       union {
-               struct qed_chain_pbl_u16 pbl16;
-               struct qed_chain_pbl_u32 pbl32;
-       } u;
-};
-
 struct qed_chain_u16 {
        /* Cyclic index of next element to produce/consme */
        u16 prod_idx;
@@ -86,46 +69,78 @@ struct qed_chain_u32 {
 };
 
 struct qed_chain {
-       void                    *p_virt_addr;
-       dma_addr_t              p_phys_addr;
-       void                    *p_prod_elem;
-       void                    *p_cons_elem;
+       /* fastpath portion of the chain - required for commands such
+        * as produce / consume.
+        */
+       /* Point to next element to produce/consume */
+       void *p_prod_elem;
+       void *p_cons_elem;
+
+       /* Fastpath portions of the PBL [if exists] */
+       struct {
+               /* Table for keeping the virtual addresses of the chain pages,
+                * respectively to the physical addresses in the pbl table.
+                */
+               void **pp_virt_addr_tbl;
 
-       enum qed_chain_mode     mode;
-       enum qed_chain_use_mode intended_use; /* used to produce/consume */
-       enum qed_chain_cnt_type cnt_type;
+               union {
+                       struct qed_chain_pbl_u16 u16;
+                       struct qed_chain_pbl_u32 u32;
+               } c;
+       } pbl;
 
        union {
                struct qed_chain_u16 chain16;
                struct qed_chain_u32 chain32;
        } u;
 
+       /* Capacity counts only usable elements */
+       u32 capacity;
        u32 page_cnt;
 
-       /* Number of elements - capacity is for usable elements only,
-        * while size will contain total number of elements [for entire chain].
+       enum qed_chain_mode mode;
+
+       /* Elements information for fast calculations */
+       u16 elem_per_page;
+       u16 elem_per_page_mask;
+       u16 elem_size;
+       u16 next_page_mask;
+       u16 usable_per_page;
+       u8 elem_unusable;
+
+       u8 cnt_type;
+
+       /* Slowpath of the chain - required for initialization and destruction,
+        * but isn't involved in regular functionality.
         */
-       u32 capacity;
+
+       /* Base address of a pre-allocated buffer for pbl */
+       struct {
+               dma_addr_t p_phys_table;
+               void *p_virt_table;
+       } pbl_sp;
+
+       /* Address of first page of the chain - the address is required
+        * for fastpath operation [consume/produce] but only for the the SINGLE
+        * flavour which isn't considered fastpath [== SPQ].
+        */
+       void *p_virt_addr;
+       dma_addr_t p_phys_addr;
+
+       /* Total number of elements [for entire chain] */
        u32 size;
 
-       /* Elements information for fast calculations */
-       u16                     elem_per_page;
-       u16                     elem_per_page_mask;
-       u16                     elem_unusable;
-       u16                     usable_per_page;
-       u16                     elem_size;
-       u16                     next_page_mask;
-       struct qed_chain_pbl    pbl;
+       u8 intended_use;
 };
 
 #define QED_CHAIN_PBL_ENTRY_SIZE        (8)
 #define QED_CHAIN_PAGE_SIZE             (0x1000)
 #define ELEMS_PER_PAGE(elem_size)       (QED_CHAIN_PAGE_SIZE / (elem_size))
 
-#define UNUSABLE_ELEMS_PER_PAGE(elem_size, mode)     \
-       ((mode == QED_CHAIN_MODE_NEXT_PTR) ?         \
-        (1 + ((sizeof(struct qed_chain_next) - 1) / \
-              (elem_size))) : 0)
+#define UNUSABLE_ELEMS_PER_PAGE(elem_size, mode)        \
+       (((mode) == QED_CHAIN_MODE_NEXT_PTR) ?           \
+        (u8)(1 + ((sizeof(struct qed_chain_next) - 1) / \
+                  (elem_size))) : 0)
 
 #define USABLE_ELEMS_PER_PAGE(elem_size, mode) \
        ((u32)(ELEMS_PER_PAGE(elem_size) -     \
@@ -186,7 +201,7 @@ static inline u16 qed_chain_get_usable_per_page(struct qed_chain *p_chain)
        return p_chain->usable_per_page;
 }
 
-static inline u16 qed_chain_get_unusable_per_page(struct qed_chain *p_chain)
+static inline u8 qed_chain_get_unusable_per_page(struct qed_chain *p_chain)
 {
        return p_chain->elem_unusable;
 }
@@ -198,7 +213,7 @@ static inline u32 qed_chain_get_page_cnt(struct qed_chain *p_chain)
 
 static inline dma_addr_t qed_chain_get_pbl_phys(struct qed_chain *p_chain)
 {
-       return p_chain->pbl.p_phys_table;
+       return p_chain->pbl_sp.p_phys_table;
 }
 
 /**
@@ -214,10 +229,10 @@ static inline dma_addr_t qed_chain_get_pbl_phys(struct qed_chain *p_chain)
 static inline void
 qed_chain_advance_page(struct qed_chain *p_chain,
                       void **p_next_elem, void *idx_to_inc, void *page_to_inc)
-
 {
        struct qed_chain_next *p_next = NULL;
        u32 page_index = 0;
+
        switch (p_chain->mode) {
        case QED_CHAIN_MODE_NEXT_PTR:
                p_next = *p_next_elem;
@@ -305,7 +320,7 @@ static inline void *qed_chain_produce(struct qed_chain *p_chain)
                if ((p_chain->u.chain16.prod_idx &
                     p_chain->elem_per_page_mask) == p_chain->next_page_mask) {
                        p_prod_idx = &p_chain->u.chain16.prod_idx;
-                       p_prod_page_idx = &p_chain->pbl.u.pbl16.prod_page_idx;
+                       p_prod_page_idx = &p_chain->pbl.c.u16.prod_page_idx;
                        qed_chain_advance_page(p_chain, &p_chain->p_prod_elem,
                                               p_prod_idx, p_prod_page_idx);
                }
@@ -314,7 +329,7 @@ static inline void *qed_chain_produce(struct qed_chain *p_chain)
                if ((p_chain->u.chain32.prod_idx &
                     p_chain->elem_per_page_mask) == p_chain->next_page_mask) {
                        p_prod_idx = &p_chain->u.chain32.prod_idx;
-                       p_prod_page_idx = &p_chain->pbl.u.pbl32.prod_page_idx;
+                       p_prod_page_idx = &p_chain->pbl.c.u32.prod_page_idx;
                        qed_chain_advance_page(p_chain, &p_chain->p_prod_elem,
                                               p_prod_idx, p_prod_page_idx);
                }
@@ -378,7 +393,7 @@ static inline void *qed_chain_consume(struct qed_chain *p_chain)
                if ((p_chain->u.chain16.cons_idx &
                     p_chain->elem_per_page_mask) == p_chain->next_page_mask) {
                        p_cons_idx = &p_chain->u.chain16.cons_idx;
-                       p_cons_page_idx = &p_chain->pbl.u.pbl16.cons_page_idx;
+                       p_cons_page_idx = &p_chain->pbl.c.u16.cons_page_idx;
                        qed_chain_advance_page(p_chain, &p_chain->p_cons_elem,
                                               p_cons_idx, p_cons_page_idx);
                }
@@ -387,8 +402,8 @@ static inline void *qed_chain_consume(struct qed_chain *p_chain)
                if ((p_chain->u.chain32.cons_idx &
                     p_chain->elem_per_page_mask) == p_chain->next_page_mask) {
                        p_cons_idx = &p_chain->u.chain32.cons_idx;
-                       p_cons_page_idx = &p_chain->pbl.u.pbl32.cons_page_idx;
-               qed_chain_advance_page(p_chain, &p_chain->p_cons_elem,
+                       p_cons_page_idx = &p_chain->pbl.c.u32.cons_page_idx;
+                       qed_chain_advance_page(p_chain, &p_chain->p_cons_elem,
                                               p_cons_idx, p_cons_page_idx);
                }
                p_chain->u.chain32.cons_idx++;
@@ -429,25 +444,26 @@ static inline void qed_chain_reset(struct qed_chain *p_chain)
                u32 reset_val = p_chain->page_cnt - 1;
 
                if (is_chain_u16(p_chain)) {
-                       p_chain->pbl.u.pbl16.prod_page_idx = (u16)reset_val;
-                       p_chain->pbl.u.pbl16.cons_page_idx = (u16)reset_val;
+                       p_chain->pbl.c.u16.prod_page_idx = (u16)reset_val;
+                       p_chain->pbl.c.u16.cons_page_idx = (u16)reset_val;
                } else {
-                       p_chain->pbl.u.pbl32.prod_page_idx = reset_val;
-                       p_chain->pbl.u.pbl32.cons_page_idx = reset_val;
+                       p_chain->pbl.c.u32.prod_page_idx = reset_val;
+                       p_chain->pbl.c.u32.cons_page_idx = reset_val;
                }
        }
 
        switch (p_chain->intended_use) {
-       case QED_CHAIN_USE_TO_CONSUME_PRODUCE:
-       case QED_CHAIN_USE_TO_PRODUCE:
-               /* Do nothing */
-               break;
-
        case QED_CHAIN_USE_TO_CONSUME:
                /* produce empty elements */
                for (i = 0; i < p_chain->capacity; i++)
                        qed_chain_recycle_consumed(p_chain);
                break;
+
+       case QED_CHAIN_USE_TO_CONSUME_PRODUCE:
+       case QED_CHAIN_USE_TO_PRODUCE:
+       default:
+               /* Do nothing */
+               break;
        }
 }
 
@@ -473,13 +489,13 @@ static inline void qed_chain_init_params(struct qed_chain *p_chain,
        p_chain->p_virt_addr = NULL;
        p_chain->p_phys_addr = 0;
        p_chain->elem_size      = elem_size;
-       p_chain->intended_use = intended_use;
+       p_chain->intended_use = (u8)intended_use;
        p_chain->mode           = mode;
-       p_chain->cnt_type = cnt_type;
+       p_chain->cnt_type = (u8)cnt_type;
 
-       p_chain->elem_per_page          = ELEMS_PER_PAGE(elem_size);
+       p_chain->elem_per_page = ELEMS_PER_PAGE(elem_size);
        p_chain->usable_per_page = USABLE_ELEMS_PER_PAGE(elem_size, mode);
-       p_chain->elem_per_page_mask     = p_chain->elem_per_page - 1;
+       p_chain->elem_per_page_mask = p_chain->elem_per_page - 1;
        p_chain->elem_unusable = UNUSABLE_ELEMS_PER_PAGE(elem_size, mode);
        p_chain->next_page_mask = (p_chain->usable_per_page &
                                   p_chain->elem_per_page_mask);
@@ -488,8 +504,8 @@ static inline void qed_chain_init_params(struct qed_chain *p_chain,
        p_chain->capacity = p_chain->usable_per_page * page_cnt;
        p_chain->size = p_chain->elem_per_page * page_cnt;
 
-       p_chain->pbl.p_phys_table = 0;
-       p_chain->pbl.p_virt_table = NULL;
+       p_chain->pbl_sp.p_phys_table = 0;
+       p_chain->pbl_sp.p_virt_table = NULL;
        p_chain->pbl.pp_virt_addr_tbl = NULL;
 }
 
@@ -530,8 +546,8 @@ static inline void qed_chain_init_pbl_mem(struct qed_chain *p_chain,
                                          dma_addr_t p_phys_pbl,
                                          void **pp_virt_addr_tbl)
 {
-       p_chain->pbl.p_phys_table = p_phys_pbl;
-       p_chain->pbl.p_virt_table = p_virt_pbl;
+       p_chain->pbl_sp.p_phys_table = p_phys_pbl;
+       p_chain->pbl_sp.p_virt_table = p_virt_pbl;
        p_chain->pbl.pp_virt_addr_tbl = pp_virt_addr_tbl;
 }
 
index 15130805d792dd7918bf8e04dca33fa84cd238d7..7a52f7c58c37ca1942ab29388ad9110282fd5480 100644 (file)
 #include <linux/qed/qed_if.h>
 #include <linux/qed/qed_iov_if.h>
 
+struct qed_queue_start_common_params {
+       /* Should always be relative to entity sending this. */
+       u8 vport_id;
+       u16 queue_id;
+
+       /* Relative, but relevant only for PFs */
+       u8 stats_id;
+
+       /* These are always absolute */
+       u16 sb;
+       u8 sb_idx;
+};
+
+struct qed_rxq_start_ret_params {
+       void __iomem *p_prod;
+       void *p_handle;
+};
+
+struct qed_txq_start_ret_params {
+       void __iomem *p_doorbell;
+       void *p_handle;
+};
+
 struct qed_dev_eth_info {
        struct qed_dev_info common;
 
@@ -22,7 +45,7 @@ struct qed_dev_eth_info {
        u8      num_tc;
 
        u8      port_mac[ETH_ALEN];
-       u     num_vlan_filters;
+       u16     num_vlan_filters;
        u16     num_mac_filters;
 
        /* Legacy VF - this affects the datapath, so qede has to know */
@@ -56,18 +79,6 @@ struct qed_start_vport_params {
        bool clear_stats;
 };
 
-struct qed_stop_rxq_params {
-       u8 rss_id;
-       u8 rx_queue_id;
-       u8 vport_id;
-       bool eq_completion_only;
-};
-
-struct qed_stop_txq_params {
-       u8 rss_id;
-       u8 tx_queue_id;
-};
-
 enum qed_filter_rx_mode_type {
        QED_FILTER_RX_MODE_TYPE_REGULAR,
        QED_FILTER_RX_MODE_TYPE_MULTI_PROMISC,
@@ -112,15 +123,6 @@ struct qed_filter_params {
        union qed_filter_type_params filter;
 };
 
-struct qed_queue_start_common_params {
-       u8 rss_id;
-       u8 queue_id;
-       u8 vport_id;
-       u16 sb;
-       u16 sb_idx;
-       u16 vf_qid;
-};
-
 struct qed_tunn_params {
        u16 vxlan_port;
        u8 update_vxlan_port;
@@ -220,24 +222,24 @@ struct qed_eth_ops {
                            struct qed_update_vport_params *params);
 
        int (*q_rx_start)(struct qed_dev *cdev,
+                         u8 rss_num,
                          struct qed_queue_start_common_params *params,
                          u16 bd_max_bytes,
                          dma_addr_t bd_chain_phys_addr,
                          dma_addr_t cqe_pbl_addr,
                          u16 cqe_pbl_size,
-                         void __iomem **pp_prod);
+                         struct qed_rxq_start_ret_params *ret_params);
 
-       int (*q_rx_stop)(struct qed_dev *cdev,
-                        struct qed_stop_rxq_params *params);
+       int (*q_rx_stop)(struct qed_dev *cdev, u8 rss_id, void *handle);
 
        int (*q_tx_start)(struct qed_dev *cdev,
+                         u8 rss_num,
                          struct qed_queue_start_common_params *params,
                          dma_addr_t pbl_addr,
                          u16 pbl_size,
-                         void __iomem **pp_doorbell);
+                         struct qed_txq_start_ret_params *ret_params);
 
-       int (*q_tx_stop)(struct qed_dev *cdev,
-                        struct qed_stop_txq_params *params);
+       int (*q_tx_stop)(struct qed_dev *cdev, u8 rss_id, void *handle);
 
        int (*filter_config)(struct qed_dev *cdev,
                             struct qed_filter_params *params);
index f9ae903bbb8445c8d44c8531f2ebd1a47cc05f25..4b454f4f5b2511b71ba0c5815da51a20dfbef48a 100644 (file)
@@ -146,6 +146,7 @@ enum qed_led_mode {
 #define DIRECT_REG_RD(reg_addr) readl((void __iomem *)(reg_addr))
 
 #define QED_COALESCE_MAX 0xFF
+#define QED_DEFAULT_RX_USECS 12
 
 /* forward */
 struct qed_dev;
@@ -165,6 +166,7 @@ struct qed_iscsi_pf_params {
        u32 max_cwnd;
        u16 cq_num_entries;
        u16 cmdq_num_entries;
+       u32 two_msl_timer;
        u16 dup_ack_threshold;
        u16 tx_sws_timer;
        u16 min_rto;
@@ -266,11 +268,15 @@ struct qed_dev_info {
        u8              mf_mode;
        bool            tx_switching;
        bool            rdma_supported;
+       u16             mtu;
+
+       bool wol_support;
 };
 
 enum qed_sb_type {
        QED_SB_TYPE_L2_QUEUE,
        QED_SB_TYPE_CNQ,
+       QED_SB_TYPE_STORAGE,
 };
 
 enum qed_protocol {
@@ -400,6 +406,15 @@ struct qed_selftest_ops {
  * @return 0 on success, error otherwise.
  */
        int (*selftest_clock)(struct qed_dev *cdev);
+
+/**
+ * @brief selftest_nvram - Perform nvram test
+ *
+ * @param cdev
+ *
+ * @return 0 on success, error otherwise.
+ */
+       int (*selftest_nvram) (struct qed_dev *cdev);
 };
 
 struct qed_common_ops {
@@ -553,6 +568,41 @@ struct qed_common_ops {
  */
        int (*set_led)(struct qed_dev *cdev,
                       enum qed_led_mode mode);
+
+/**
+ * @brief update_drv_state - API to inform the change in the driver state.
+ *
+ * @param cdev
+ * @param active
+ *
+ */
+       int (*update_drv_state)(struct qed_dev *cdev, bool active);
+
+/**
+ * @brief update_mac - API to inform the change in the mac address
+ *
+ * @param cdev
+ * @param mac
+ *
+ */
+       int (*update_mac)(struct qed_dev *cdev, u8 *mac);
+
+/**
+ * @brief update_mtu - API to inform the change in the mtu
+ *
+ * @param cdev
+ * @param mtu
+ *
+ */
+       int (*update_mtu)(struct qed_dev *cdev, u16 mtu);
+
+/**
+ * @brief update_wol - update of changes in the WoL configuration
+ *
+ * @param cdev
+ * @param enabled - true iff WoL should be enabled.
+ */
+       int (*update_wol) (struct qed_dev *cdev, bool enabled);
 };
 
 #define MASK_FIELD(_name, _value) \
diff --git a/include/linux/qed/qed_iscsi_if.h b/include/linux/qed/qed_iscsi_if.h
new file mode 100644 (file)
index 0000000..d279124
--- /dev/null
@@ -0,0 +1,229 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015 QLogic Corporation
+ *
+ * This software is available under the terms of the GNU General Public License
+ * (GPL) Version 2, available from the file COPYING in the main directory of
+ * this source tree.
+ */
+
+#ifndef _QED_ISCSI_IF_H
+#define _QED_ISCSI_IF_H
+#include <linux/types.h>
+#include <linux/qed/qed_if.h>
+
+typedef int (*iscsi_event_cb_t) (void *context,
+                                u8 fw_event_code, void *fw_handle);
+struct qed_iscsi_stats {
+       u64 iscsi_rx_bytes_cnt;
+       u64 iscsi_rx_packet_cnt;
+       u64 iscsi_rx_new_ooo_isle_events_cnt;
+       u32 iscsi_cmdq_threshold_cnt;
+       u32 iscsi_rq_threshold_cnt;
+       u32 iscsi_immq_threshold_cnt;
+
+       u64 iscsi_rx_dropped_pdus_task_not_valid;
+
+       u64 iscsi_rx_data_pdu_cnt;
+       u64 iscsi_rx_r2t_pdu_cnt;
+       u64 iscsi_rx_total_pdu_cnt;
+
+       u64 iscsi_tx_go_to_slow_start_event_cnt;
+       u64 iscsi_tx_fast_retransmit_event_cnt;
+
+       u64 iscsi_tx_data_pdu_cnt;
+       u64 iscsi_tx_r2t_pdu_cnt;
+       u64 iscsi_tx_total_pdu_cnt;
+
+       u64 iscsi_tx_bytes_cnt;
+       u64 iscsi_tx_packet_cnt;
+};
+
+struct qed_dev_iscsi_info {
+       struct qed_dev_info common;
+
+       void __iomem *primary_dbq_rq_addr;
+       void __iomem *secondary_bdq_rq_addr;
+};
+
+struct qed_iscsi_id_params {
+       u8 mac[ETH_ALEN];
+       u32 ip[4];
+       u16 port;
+};
+
+struct qed_iscsi_params_offload {
+       u8 layer_code;
+       dma_addr_t sq_pbl_addr;
+       u32 initial_ack;
+
+       struct qed_iscsi_id_params src;
+       struct qed_iscsi_id_params dst;
+       u16 vlan_id;
+       u8 tcp_flags;
+       u8 ip_version;
+       u8 default_cq;
+
+       u8 ka_max_probe_cnt;
+       u8 dup_ack_theshold;
+       u32 rcv_next;
+       u32 snd_una;
+       u32 snd_next;
+       u32 snd_max;
+       u32 snd_wnd;
+       u32 rcv_wnd;
+       u32 snd_wl1;
+       u32 cwnd;
+       u32 ss_thresh;
+       u16 srtt;
+       u16 rtt_var;
+       u32 ts_time;
+       u32 ts_recent;
+       u32 ts_recent_age;
+       u32 total_rt;
+       u32 ka_timeout_delta;
+       u32 rt_timeout_delta;
+       u8 dup_ack_cnt;
+       u8 snd_wnd_probe_cnt;
+       u8 ka_probe_cnt;
+       u8 rt_cnt;
+       u32 flow_label;
+       u32 ka_timeout;
+       u32 ka_interval;
+       u32 max_rt_time;
+       u32 initial_rcv_wnd;
+       u8 ttl;
+       u8 tos_or_tc;
+       u16 remote_port;
+       u16 local_port;
+       u16 mss;
+       u8 snd_wnd_scale;
+       u8 rcv_wnd_scale;
+       u32 ts_ticks_per_second;
+       u16 da_timeout_value;
+       u8 ack_frequency;
+};
+
+struct qed_iscsi_params_update {
+       u8 update_flag;
+#define QED_ISCSI_CONN_HD_EN            BIT(0)
+#define QED_ISCSI_CONN_DD_EN            BIT(1)
+#define QED_ISCSI_CONN_INITIAL_R2T      BIT(2)
+#define QED_ISCSI_CONN_IMMEDIATE_DATA   BIT(3)
+
+       u32 max_seq_size;
+       u32 max_recv_pdu_length;
+       u32 max_send_pdu_length;
+       u32 first_seq_length;
+       u32 exp_stat_sn;
+};
+
+#define MAX_TID_BLOCKS_ISCSI (512)
+struct qed_iscsi_tid {
+       u32 size;               /* In bytes per task */
+       u32 num_tids_per_block;
+       u8 *blocks[MAX_TID_BLOCKS_ISCSI];
+};
+
+struct qed_iscsi_cb_ops {
+       struct qed_common_cb_ops common;
+};
+
+/**
+ * struct qed_iscsi_ops - qed iSCSI operations.
+ * @common:            common operations pointer
+ * @ll2:               light L2 operations pointer
+ * @fill_dev_info:     fills iSCSI specific information
+ *                     @param cdev
+ *                     @param info
+ *                     @return 0 on sucesss, otherwise error value.
+ * @register_ops:      register iscsi operations
+ *                     @param cdev
+ *                     @param ops - specified using qed_iscsi_cb_ops
+ *                     @param cookie - driver private
+ * @start:             iscsi in FW
+ *                     @param cdev
+ *                     @param tasks - qed will fill information about tasks
+ *                     return 0 on success, otherwise error value.
+ * @stop:              iscsi in FW
+ *                     @param cdev
+ *                     return 0 on success, otherwise error value.
+ * @acquire_conn:      acquire a new iscsi connection
+ *                     @param cdev
+ *                     @param handle - qed will fill handle that should be
+ *                             used henceforth as identifier of the
+ *                             connection.
+ *                     @param p_doorbell - qed will fill the address of the
+ *                             doorbell.
+ *                     @return 0 on sucesss, otherwise error value.
+ * @release_conn:      release a previously acquired iscsi connection
+ *                     @param cdev
+ *                     @param handle - the connection handle.
+ *                     @return 0 on success, otherwise error value.
+ * @offload_conn:      configures an offloaded connection
+ *                     @param cdev
+ *                     @param handle - the connection handle.
+ *                     @param conn_info - the configuration to use for the
+ *                             offload.
+ *                     @return 0 on success, otherwise error value.
+ * @update_conn:       updates an offloaded connection
+ *                     @param cdev
+ *                     @param handle - the connection handle.
+ *                     @param conn_info - the configuration to use for the
+ *                             offload.
+ *                     @return 0 on success, otherwise error value.
+ * @destroy_conn:      stops an offloaded connection
+ *                     @param cdev
+ *                     @param handle - the connection handle.
+ *                     @return 0 on success, otherwise error value.
+ * @clear_sq:          clear all task in sq
+ *                     @param cdev
+ *                     @param handle - the connection handle.
+ *                     @return 0 on success, otherwise error value.
+ * @get_stats:         iSCSI related statistics
+ *                     @param cdev
+ *                     @param stats - pointer to struck that would be filled
+ *                             we stats
+ *                     @return 0 on success, error otherwise.
+ */
+struct qed_iscsi_ops {
+       const struct qed_common_ops *common;
+
+       const struct qed_ll2_ops *ll2;
+
+       int (*fill_dev_info)(struct qed_dev *cdev,
+                            struct qed_dev_iscsi_info *info);
+
+       void (*register_ops)(struct qed_dev *cdev,
+                            struct qed_iscsi_cb_ops *ops, void *cookie);
+
+       int (*start)(struct qed_dev *cdev,
+                    struct qed_iscsi_tid *tasks,
+                    void *event_context, iscsi_event_cb_t async_event_cb);
+
+       int (*stop)(struct qed_dev *cdev);
+
+       int (*acquire_conn)(struct qed_dev *cdev,
+                           u32 *handle,
+                           u32 *fw_cid, void __iomem **p_doorbell);
+
+       int (*release_conn)(struct qed_dev *cdev, u32 handle);
+
+       int (*offload_conn)(struct qed_dev *cdev,
+                           u32 handle,
+                           struct qed_iscsi_params_offload *conn_info);
+
+       int (*update_conn)(struct qed_dev *cdev,
+                          u32 handle,
+                          struct qed_iscsi_params_update *conn_info);
+
+       int (*destroy_conn)(struct qed_dev *cdev, u32 handle, u8 abrt_conn);
+
+       int (*clear_sq)(struct qed_dev *cdev, u32 handle);
+
+       int (*get_stats)(struct qed_dev *cdev,
+                        struct qed_iscsi_stats *stats);
+};
+
+const struct qed_iscsi_ops *qed_get_iscsi_ops(void);
+void qed_put_iscsi_ops(void);
+#endif
index 99fbe6d55acb29dcb8fd39cdbb1f1f1f9c2bbe49..f48d64b0e2fb943a492981f30f3851ed77103711 100644 (file)
@@ -68,7 +68,7 @@ void qede_roce_unregister_driver(struct qedr_driver *drv);
 
 bool qede_roce_supported(struct qede_dev *dev);
 
-#if IS_ENABLED(CONFIG_INFINIBAND_QEDR)
+#if IS_ENABLED(CONFIG_QED_RDMA)
 int qede_roce_dev_add(struct qede_dev *dev);
 void qede_roce_dev_event_open(struct qede_dev *dev);
 void qede_roce_dev_event_close(struct qede_dev *dev);
index f7bb7a355cf71381100730a2379ba206bc3258ea..7bd2403e4fef1ad7fb0a5f03b4e104e96234d26b 100644 (file)
@@ -18,9 +18,20 @@ struct random_ready_callback {
 };
 
 extern void add_device_randomness(const void *, unsigned int);
+
+#if defined(CONFIG_GCC_PLUGIN_LATENT_ENTROPY) && !defined(__CHECKER__)
+static inline void add_latent_entropy(void)
+{
+       add_device_randomness((const void *)&latent_entropy,
+                             sizeof(latent_entropy));
+}
+#else
+static inline void add_latent_entropy(void) {}
+#endif
+
 extern void add_input_randomness(unsigned int type, unsigned int code,
-                                unsigned int value);
-extern void add_interrupt_randomness(int irq, int irq_flags);
+                                unsigned int value) __latent_entropy;
+extern void add_interrupt_randomness(int irq, int irq_flags) __latent_entropy;
 
 extern void get_random_bytes(void *buf, int nbytes);
 extern int add_random_ready_callback(struct random_ready_callback *rdy);
index 9adc7b21903d3dc97987bbcdcd6caa7b449cecc6..f6673132431d09c3caa0c1394286fb310c93f9c1 100644 (file)
@@ -15,6 +15,7 @@
 
 #include <linux/list.h>
 #include <linux/rbtree.h>
+#include <linux/delay.h>
 #include <linux/err.h>
 #include <linux/bug.h>
 #include <linux/lockdep.h>
@@ -116,22 +117,22 @@ struct reg_sequence {
 #define regmap_read_poll_timeout(map, addr, val, cond, sleep_us, timeout_us) \
 ({ \
        ktime_t timeout = ktime_add_us(ktime_get(), timeout_us); \
-       int ret; \
+       int pollret; \
        might_sleep_if(sleep_us); \
        for (;;) { \
-               ret = regmap_read((map), (addr), &(val)); \
-               if (ret) \
+               pollret = regmap_read((map), (addr), &(val)); \
+               if (pollret) \
                        break; \
                if (cond) \
                        break; \
                if (timeout_us && ktime_compare(ktime_get(), timeout) > 0) { \
-                       ret = regmap_read((map), (addr), &(val)); \
+                       pollret = regmap_read((map), (addr), &(val)); \
                        break; \
                } \
                if (sleep_us) \
                        usleep_range((sleep_us >> 2) + 1, sleep_us); \
        } \
-       ret ?: ((cond) ? 0 : -ETIMEDOUT); \
+       pollret ?: ((cond) ? 0 : -ETIMEDOUT); \
 })
 
 #ifdef CONFIG_REGMAP
index 348f51b0ec92ed02e72a2060eedb03f37cd0995f..e9c009dc3a4a35a256731f37b3b44d3f05b317e4 100644 (file)
@@ -2567,6 +2567,7 @@ extern void sched_autogroup_create_attach(struct task_struct *p);
 extern void sched_autogroup_detach(struct task_struct *p);
 extern void sched_autogroup_fork(struct signal_struct *sig);
 extern void sched_autogroup_exit(struct signal_struct *sig);
+extern void sched_autogroup_exit_task(struct task_struct *p);
 #ifdef CONFIG_PROC_FS
 extern void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m);
 extern int proc_sched_autogroup_set_nice(struct task_struct *p, int nice);
@@ -2576,6 +2577,7 @@ static inline void sched_autogroup_create_attach(struct task_struct *p) { }
 static inline void sched_autogroup_detach(struct task_struct *p) { }
 static inline void sched_autogroup_fork(struct signal_struct *sig) { }
 static inline void sched_autogroup_exit(struct signal_struct *sig) { }
+static inline void sched_autogroup_exit_task(struct task_struct *p) { }
 #endif
 
 extern int yield_to(struct task_struct *p, bool preempt);
diff --git a/include/linux/seg6.h b/include/linux/seg6.h
new file mode 100644 (file)
index 0000000..7a66d2b
--- /dev/null
@@ -0,0 +1,6 @@
+#ifndef _LINUX_SEG6_H
+#define _LINUX_SEG6_H
+
+#include <uapi/linux/seg6.h>
+
+#endif
diff --git a/include/linux/seg6_genl.h b/include/linux/seg6_genl.h
new file mode 100644 (file)
index 0000000..d6c3fb4
--- /dev/null
@@ -0,0 +1,6 @@
+#ifndef _LINUX_SEG6_GENL_H
+#define _LINUX_SEG6_GENL_H
+
+#include <uapi/linux/seg6_genl.h>
+
+#endif
diff --git a/include/linux/seg6_hmac.h b/include/linux/seg6_hmac.h
new file mode 100644 (file)
index 0000000..da437eb
--- /dev/null
@@ -0,0 +1,6 @@
+#ifndef _LINUX_SEG6_HMAC_H
+#define _LINUX_SEG6_HMAC_H
+
+#include <uapi/linux/seg6_hmac.h>
+
+#endif
diff --git a/include/linux/seg6_iptunnel.h b/include/linux/seg6_iptunnel.h
new file mode 100644 (file)
index 0000000..5377cf6
--- /dev/null
@@ -0,0 +1,6 @@
+#ifndef _LINUX_SEG6_IPTUNNEL_H
+#define _LINUX_SEG6_IPTUNNEL_H
+
+#include <uapi/linux/seg6_iptunnel.h>
+
+#endif
index 601258f6e62153f3814e2a38e4c21ad97fc7ee87..9c535fbccf2c7dbfae04cee393460e86d588c26b 100644 (file)
@@ -936,6 +936,7 @@ struct sk_buff_fclones {
 
 /**
  *     skb_fclone_busy - check if fclone is busy
+ *     @sk: socket
  *     @skb: buffer
  *
  * Returns true if skb is a fast clone, and its clone is not freed.
@@ -1086,7 +1087,7 @@ __skb_set_sw_hash(struct sk_buff *skb, __u32 hash, bool is_l4)
 }
 
 void __skb_get_hash(struct sk_buff *skb);
-u32 __skb_get_hash_symmetric(struct sk_buff *skb);
+u32 __skb_get_hash_symmetric(const struct sk_buff *skb);
 u32 skb_get_poff(const struct sk_buff *skb);
 u32 __skb_get_poff(const struct sk_buff *skb, void *data,
                   const struct flow_keys *keys, int hlen);
@@ -1798,11 +1799,11 @@ static inline unsigned int skb_headlen(const struct sk_buff *skb)
        return skb->len - skb->data_len;
 }
 
-static inline int skb_pagelen(const struct sk_buff *skb)
+static inline unsigned int skb_pagelen(const struct sk_buff *skb)
 {
-       int i, len = 0;
+       unsigned int i, len = 0;
 
-       for (i = (int)skb_shinfo(skb)->nr_frags - 1; i >= 0; i--)
+       for (i = skb_shinfo(skb)->nr_frags - 1; (int)i >= 0; i--)
                len += skb_frag_size(&skb_shinfo(skb)->frags[i]);
        return len + skb_headlen(skb);
 }
@@ -3032,9 +3033,13 @@ static inline void skb_frag_list_init(struct sk_buff *skb)
 int __skb_wait_for_more_packets(struct sock *sk, int *err, long *timeo_p,
                                const struct sk_buff *skb);
 struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned flags,
+                                       void (*destructor)(struct sock *sk,
+                                                          struct sk_buff *skb),
                                        int *peeked, int *off, int *err,
                                        struct sk_buff **last);
 struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags,
+                                   void (*destructor)(struct sock *sk,
+                                                      struct sk_buff *skb),
                                    int *peeked, int *off, int *err);
 struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock,
                                  int *err);
index 705840e0438fec53136885d8b1b04f36367a7e04..3537fb33cc90c6ae0e9628f4ca8c08c692b4bdda 100644 (file)
@@ -135,8 +135,6 @@ struct plat_stmmacenet_data {
        void (*bus_setup)(void __iomem *ioaddr);
        int (*init)(struct platform_device *pdev, void *priv);
        void (*exit)(struct platform_device *pdev, void *priv);
-       void (*suspend)(struct platform_device *pdev, void *priv);
-       void (*resume)(struct platform_device *pdev, void *priv);
        void *bsp_priv;
        struct stmmac_axi *axi;
        int has_gmac4;
index ab02a457da1fa8aea378889394730c708b03e89b..e5d19344037491651c6c80f6b310842ee5715126 100644 (file)
@@ -25,6 +25,7 @@ struct svc_xprt_ops {
        void            (*xpo_detach)(struct svc_xprt *);
        void            (*xpo_free)(struct svc_xprt *);
        int             (*xpo_secure_port)(struct svc_rqst *);
+       void            (*xpo_kill_temp_xprt)(struct svc_xprt *);
 };
 
 struct svc_xprt_class {
index 0d7abb8b7315ce3ab5162bc606c64337f3709d20..91a740f6b884236e3ed5771f01397f4647f3cd9d 100644 (file)
@@ -902,8 +902,5 @@ asmlinkage long sys_pkey_mprotect(unsigned long start, size_t len,
                                  unsigned long prot, int pkey);
 asmlinkage long sys_pkey_alloc(unsigned long flags, unsigned long init_val);
 asmlinkage long sys_pkey_free(int pkey);
-//asmlinkage long sys_pkey_get(int pkey, unsigned long flags);
-//asmlinkage long sys_pkey_set(int pkey, unsigned long access_rights,
-//                          unsigned long flags);
 
 #endif
index a17ae7b8521805847aa1c81920cafd863bc4a275..734bab4c3bef9318162aacdf96d486708ae2035f 100644 (file)
@@ -123,6 +123,7 @@ struct tcp_request_sock {
        u32                             txhash;
        u32                             rcv_isn;
        u32                             snt_isn;
+       u32                             ts_off;
        u32                             last_oow_ack_time; /* last SYNACK */
        u32                             rcv_nxt; /* the ack # by SYNACK. For
                                                  * FastOpen it's the seq#
@@ -176,8 +177,6 @@ struct tcp_sock {
                                 * sum(delta(snd_una)), or how many bytes
                                 * were acked.
                                 */
-       struct u64_stats_sync syncp; /* protects 64bit vars (cf tcp_get_info()) */
-
        u32     snd_una;        /* First byte we want an ack for        */
        u32     snd_sml;        /* Last byte of the most recently transmitted small packet */
        u32     rcv_tstamp;     /* timestamp of last received ACK (for keepalives) */
@@ -213,8 +212,11 @@ struct tcp_sock {
                u8 reord;    /* reordering detected */
        } rack;
        u16     advmss;         /* Advertised MSS                       */
-       u8      rate_app_limited:1,  /* rate_{delivered,interval_us} limited? */
-               unused:7;
+       u32     chrono_start;   /* Start time in jiffies of a TCP chrono */
+       u32     chrono_stat[3]; /* Time in jiffies for chrono_stat stats */
+       u8      chrono_type:2,  /* current chronograph type */
+               rate_app_limited:1,  /* rate_{delivered,interval_us} limited? */
+               unused:5;
        u8      nonagle     : 4,/* Disable Nagle algorithm?             */
                thin_lto    : 1,/* Use linear timeouts for thin streams */
                thin_dupack : 1,/* Fast retransmit on first dupack      */
@@ -427,4 +429,6 @@ static inline void tcp_saved_syn_free(struct tcp_sock *tp)
        tp->saved_syn = NULL;
 }
 
+struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk);
+
 #endif /* _LINUX_TCP_H */
index 45f004e9cc598c0e57f665ca3476b5a1d623b695..2873baf5372a7b1484888566725e2c4dc7368d26 100644 (file)
 struct timespec;
 struct compat_timespec;
 
-#ifdef CONFIG_THREAD_INFO_IN_TASK
-struct thread_info {
-       unsigned long           flags;          /* low level flags */
-};
-
-#define INIT_THREAD_INFO(tsk)                  \
-{                                              \
-       .flags          = 0,                    \
-}
-#endif
-
 #ifdef CONFIG_THREAD_INFO_IN_TASK
 #define current_thread_info() ((struct thread_info *)current)
 #endif
index 1c912f85e041756476008f9616be4414019a5614..66204007d7ac32f1f32068c1f46fc1a033e6953f 100644 (file)
@@ -58,7 +58,7 @@ static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb,
                                          struct virtio_net_hdr *hdr,
                                          bool little_endian)
 {
-       memset(hdr, 0, sizeof(*hdr));
+       memset(hdr, 0, sizeof(*hdr));   /* no info leak */
 
        if (skb_is_gso(skb)) {
                struct skb_shared_info *sinfo = skb_shinfo(skb);
@@ -98,4 +98,4 @@ static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb,
        return 0;
 }
 
-#endif /* _LINUX_VIRTIO_BYTEORDER */
+#endif /* _LINUX_VIRTIO_NET_H */
index 82f3c912a5b176d5f2d57361765c9bccbdded367..9dddf77a69ccbcb003cfa66bcc0de337f78f3dae 100644 (file)
@@ -42,7 +42,6 @@ struct tc_action {
        struct gnet_stats_basic_cpu __percpu *cpu_bstats;
        struct gnet_stats_queue __percpu *cpu_qstats;
 };
-#define tcf_act                common.tcfa_act
 #define tcf_head       common.tcfa_head
 #define tcf_index      common.tcfa_index
 #define tcf_refcnt     common.tcfa_refcnt
@@ -120,6 +119,8 @@ struct tc_action_ops {
        int     (*walk)(struct net *, struct sk_buff *,
                        struct netlink_callback *, int, const struct tc_action_ops *);
        void    (*stats_update)(struct tc_action *, u64, u32, u64);
+       int     (*get_dev)(const struct tc_action *a, struct net *net,
+                          struct net_device **mirred_dev);
 };
 
 struct tc_action_net {
index f2d0727879472451e0c2815fc68f90b66a4c388c..8f998afc138434f672ab28883287e463f60f4733 100644 (file)
@@ -174,6 +174,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex,
                      const struct in6_addr *addr);
 int ipv6_sock_mc_drop(struct sock *sk, int ifindex,
                      const struct in6_addr *addr);
+void __ipv6_sock_mc_close(struct sock *sk);
 void ipv6_sock_mc_close(struct sock *sk);
 bool inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr,
                    const struct in6_addr *src_addr);
index f00bf667ec3399dd38850e048557c41fa7786dc6..554671c81f4a39a8a773a01a318af0b1c70b5617 100644 (file)
@@ -1018,7 +1018,7 @@ static inline void hci_set_drvdata(struct hci_dev *hdev, void *data)
 }
 
 struct hci_dev *hci_dev_get(int index);
-struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src);
+struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src, u8 src_type);
 
 struct hci_dev *hci_alloc_dev(void);
 void hci_free_dev(struct hci_dev *hdev);
index f32f7ef8a23a25a06f8e095f6982b57c78b0dd7f..3c857778a6ca6870f7e7d5604adcd263380e4708 100644 (file)
@@ -681,7 +681,7 @@ static inline int bond_get_targets_ip(__be32 *targets, __be32 ip)
 }
 
 /* exported from bond_main.c */
-extern int bond_net_id;
+extern unsigned int bond_net_id;
 extern const struct bond_parm_tbl bond_lacp_tbl[];
 extern const struct bond_parm_tbl xmit_hashtype_tbl[];
 extern const struct bond_parm_tbl arp_validate_tbl[];
index 2fbeb1313c0f4f78ac82ddf6c18d1016a901f99a..d73b849e29a6869e282103f329c3a02f4e1a6882 100644 (file)
@@ -58,10 +58,9 @@ static inline unsigned long busy_loop_end_time(void)
        return busy_loop_us_clock() + ACCESS_ONCE(sysctl_net_busy_poll);
 }
 
-static inline bool sk_can_busy_loop(struct sock *sk)
+static inline bool sk_can_busy_loop(const struct sock *sk)
 {
-       return sk->sk_ll_usec && sk->sk_napi_id &&
-              !need_resched() && !signal_pending(current);
+       return sk->sk_ll_usec && sk->sk_napi_id && !signal_pending(current);
 }
 
 
@@ -81,11 +80,6 @@ static inline void skb_mark_napi_id(struct sk_buff *skb,
        skb->napi_id = napi->napi_id;
 }
 
-/* used in the protocol hanlder to propagate the napi_id to the socket */
-static inline void sk_mark_napi_id(struct sock *sk, struct sk_buff *skb)
-{
-       sk->sk_napi_id = skb->napi_id;
-}
 
 #else /* CONFIG_NET_RX_BUSY_POLL */
 static inline unsigned long net_busy_loop_on(void)
@@ -108,10 +102,6 @@ static inline void skb_mark_napi_id(struct sk_buff *skb,
 {
 }
 
-static inline void sk_mark_napi_id(struct sock *sk, struct sk_buff *skb)
-{
-}
-
 static inline bool busy_loop_timeout(unsigned long end_time)
 {
        return true;
@@ -123,4 +113,23 @@ static inline bool sk_busy_loop(struct sock *sk, int nonblock)
 }
 
 #endif /* CONFIG_NET_RX_BUSY_POLL */
+
+/* used in the protocol hanlder to propagate the napi_id to the socket */
+static inline void sk_mark_napi_id(struct sock *sk, const struct sk_buff *skb)
+{
+#ifdef CONFIG_NET_RX_BUSY_POLL
+       sk->sk_napi_id = skb->napi_id;
+#endif
+}
+
+/* variant used for unconnected sockets */
+static inline void sk_mark_napi_id_once(struct sock *sk,
+                                       const struct sk_buff *skb)
+{
+#ifdef CONFIG_NET_RX_BUSY_POLL
+       if (!sk->sk_napi_id)
+               sk->sk_napi_id = skb->napi_id;
+#endif
+}
+
 #endif /* _LINUX_NET_BUSY_POLL_H */
index fe78f02a242e723e29dfdfb4baf39bbadc68962d..2019310cf135d2f22727bd6554a76b3c8929fc87 100644 (file)
@@ -771,6 +771,30 @@ struct cfg80211_csa_settings {
        u8 count;
 };
 
+/**
+ * struct iface_combination_params - input parameters for interface combinations
+ *
+ * Used to pass interface combination parameters
+ *
+ * @num_different_channels: the number of different channels we want
+ *     to use for verification
+ * @radar_detect: a bitmap where each bit corresponds to a channel
+ *     width where radar detection is needed, as in the definition of
+ *     &struct ieee80211_iface_combination.@radar_detect_widths
+ * @iftype_num: array with the number of interfaces of each interface
+ *     type.  The index is the interface type as specified in &enum
+ *     nl80211_iftype.
+ * @new_beacon_int: set this to the beacon interval of a new interface
+ *     that's not operating yet, if such is to be checked as part of
+ *     the verification
+ */
+struct iface_combination_params {
+       int num_different_channels;
+       u8 radar_detect;
+       int iftype_num[NUM_NL80211_IFTYPES];
+       u32 new_beacon_int;
+};
+
 /**
  * enum station_parameters_apply_mask - station parameter values to apply
  * @STATION_PARAM_APPLY_UAPSD: apply new uAPSD parameters (uapsd_queues, max_sp)
@@ -796,9 +820,9 @@ enum station_parameters_apply_mask {
  *     (or NULL for no change)
  * @supported_rates_len: number of supported rates
  * @sta_flags_mask: station flags that changed
- *     (bitmask of BIT(NL80211_STA_FLAG_...))
+ *     (bitmask of BIT(%NL80211_STA_FLAG_...))
  * @sta_flags_set: station flags values
- *     (bitmask of BIT(NL80211_STA_FLAG_...))
+ *     (bitmask of BIT(%NL80211_STA_FLAG_...))
  * @listen_interval: listen interval or -1 for no change
  * @aid: AID or zero for no change
  * @peer_aid: mesh peer AID or zero for no change
@@ -1761,9 +1785,11 @@ const u8 *ieee80211_bss_get_ie(struct cfg80211_bss *bss, u8 ie);
  * @key_len: length of WEP key for shared key authentication
  * @key_idx: index of WEP key for shared key authentication
  * @key: WEP key for shared key authentication
- * @sae_data: Non-IE data to use with SAE or %NULL. This starts with
- *     Authentication transaction sequence number field.
- * @sae_data_len: Length of sae_data buffer in octets
+ * @auth_data: Fields and elements in Authentication frames. This contains
+ *     the authentication frame body (non-IE and IE data), excluding the
+ *     Authentication algorithm number, i.e., starting at the Authentication
+ *     transaction sequence number field.
+ * @auth_data_len: Length of auth_data buffer in octets
  */
 struct cfg80211_auth_request {
        struct cfg80211_bss *bss;
@@ -1772,8 +1798,8 @@ struct cfg80211_auth_request {
        enum nl80211_auth_type auth_type;
        const u8 *key;
        u8 key_len, key_idx;
-       const u8 *sae_data;
-       size_t sae_data_len;
+       const u8 *auth_data;
+       size_t auth_data_len;
 };
 
 /**
@@ -1814,6 +1840,12 @@ enum cfg80211_assoc_req_flags {
  * @ht_capa_mask:  The bits of ht_capa which are to be used.
  * @vht_capa: VHT capability override
  * @vht_capa_mask: VHT capability mask indicating which fields to use
+ * @fils_kek: FILS KEK for protecting (Re)Association Request/Response frame or
+ *     %NULL if FILS is not used.
+ * @fils_kek_len: Length of fils_kek in octets
+ * @fils_nonces: FILS nonces (part of AAD) for protecting (Re)Association
+ *     Request/Response frame or %NULL if FILS is not used. This field starts
+ *     with 16 octets of STA Nonce followed by 16 octets of AP Nonce.
  */
 struct cfg80211_assoc_request {
        struct cfg80211_bss *bss;
@@ -1825,6 +1857,9 @@ struct cfg80211_assoc_request {
        struct ieee80211_ht_cap ht_capa;
        struct ieee80211_ht_cap ht_capa_mask;
        struct ieee80211_vht_cap vht_capa, vht_capa_mask;
+       const u8 *fils_kek;
+       size_t fils_kek_len;
+       const u8 *fils_nonces;
 };
 
 /**
@@ -2015,6 +2050,18 @@ struct cfg80211_connect_params {
        const u8 *prev_bssid;
 };
 
+/**
+ * enum cfg80211_connect_params_changed - Connection parameters being updated
+ *
+ * This enum provides information of all connect parameters that
+ * have to be updated as part of update_connect_params() call.
+ *
+ * @UPDATE_ASSOC_IES: Indicates whether association request IEs are updated
+ */
+enum cfg80211_connect_params_changed {
+       UPDATE_ASSOC_IES                = BIT(0),
+};
+
 /**
  * enum wiphy_params_flags - set_wiphy_params bitfield values
  * @WIPHY_PARAM_RETRY_SHORT: wiphy->retry_short has changed
@@ -2536,9 +2583,18 @@ struct cfg80211_nan_func {
  *     cases, the result of roaming is indicated with a call to
  *     cfg80211_roamed() or cfg80211_roamed_bss().
  *     (invoked with the wireless_dev mutex held)
- * @disconnect: Disconnect from the BSS/ESS. Once done, call
- *     cfg80211_disconnected().
+ * @update_connect_params: Update the connect parameters while connected to a
+ *     BSS. The updated parameters can be used by driver/firmware for
+ *     subsequent BSS selection (roaming) decisions and to form the
+ *     Authentication/(Re)Association Request frames. This call does not
+ *     request an immediate disassociation or reassociation with the current
+ *     BSS, i.e., this impacts only subsequent (re)associations. The bits in
+ *     changed are defined in &enum cfg80211_connect_params_changed.
  *     (invoked with the wireless_dev mutex held)
+ * @disconnect: Disconnect from the BSS/ESS or stop connection attempts if
+ *      connection is in progress. Once done, call cfg80211_disconnected() in
+ *      case connection was already established (invoked with the
+ *      wireless_dev mutex held), otherwise call cfg80211_connect_timeout().
  *
  * @join_ibss: Join the specified IBSS (or create if necessary). Once done, call
  *     cfg80211_ibss_joined(), also call that function when changing BSSID due
@@ -2706,6 +2762,8 @@ struct cfg80211_nan_func {
  * @nan_change_conf: changes NAN configuration. The changed parameters must
  *     be specified in @changes (using &enum cfg80211_nan_conf_changes);
  *     All other parameters must be ignored.
+ *
+ * @set_multicast_to_unicast: configure multicast to unicast conversion for BSS
  */
 struct cfg80211_ops {
        int     (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow);
@@ -2820,6 +2878,10 @@ struct cfg80211_ops {
 
        int     (*connect)(struct wiphy *wiphy, struct net_device *dev,
                           struct cfg80211_connect_params *sme);
+       int     (*update_connect_params)(struct wiphy *wiphy,
+                                        struct net_device *dev,
+                                        struct cfg80211_connect_params *sme,
+                                        u32 changed);
        int     (*disconnect)(struct wiphy *wiphy, struct net_device *dev,
                              u16 reason_code);
 
@@ -2982,6 +3044,10 @@ struct cfg80211_ops {
                                   struct wireless_dev *wdev,
                                   struct cfg80211_nan_conf *conf,
                                   u32 changes);
+
+       int     (*set_multicast_to_unicast)(struct wiphy *wiphy,
+                                           struct net_device *dev,
+                                           const bool enabled);
 };
 
 /*
@@ -3080,6 +3146,12 @@ struct ieee80211_iface_limit {
  *     only in special cases.
  * @radar_detect_widths: bitmap of channel widths supported for radar detection
  * @radar_detect_regions: bitmap of regions supported for radar detection
+ * @beacon_int_min_gcd: This interface combination supports different
+ *     beacon intervals.
+ *     = 0 - all beacon intervals for different interface must be same.
+ *     > 0 - any beacon interval for the interface part of this combination AND
+ *           *GCD* of all beacon intervals from beaconing interfaces of this
+ *           combination must be greater or equal to this value.
  *
  * With this structure the driver can describe which interface
  * combinations it supports concurrently.
@@ -3088,47 +3160,54 @@ struct ieee80211_iface_limit {
  *
  * 1. Allow #STA <= 1, #AP <= 1, matching BI, channels = 1, 2 total:
  *
- *  struct ieee80211_iface_limit limits1[] = {
- *     { .max = 1, .types = BIT(NL80211_IFTYPE_STATION), },
- *     { .max = 1, .types = BIT(NL80211_IFTYPE_AP}, },
- *  };
- *  struct ieee80211_iface_combination combination1 = {
- *     .limits = limits1,
- *     .n_limits = ARRAY_SIZE(limits1),
- *     .max_interfaces = 2,
- *     .beacon_int_infra_match = true,
- *  };
+ *    .. code-block:: c
+ *
+ *     struct ieee80211_iface_limit limits1[] = {
+ *             { .max = 1, .types = BIT(NL80211_IFTYPE_STATION), },
+ *             { .max = 1, .types = BIT(NL80211_IFTYPE_AP}, },
+ *     };
+ *     struct ieee80211_iface_combination combination1 = {
+ *             .limits = limits1,
+ *             .n_limits = ARRAY_SIZE(limits1),
+ *             .max_interfaces = 2,
+ *             .beacon_int_infra_match = true,
+ *     };
  *
  *
  * 2. Allow #{AP, P2P-GO} <= 8, channels = 1, 8 total:
  *
- *  struct ieee80211_iface_limit limits2[] = {
- *     { .max = 8, .types = BIT(NL80211_IFTYPE_AP) |
- *                          BIT(NL80211_IFTYPE_P2P_GO), },
- *  };
- *  struct ieee80211_iface_combination combination2 = {
- *     .limits = limits2,
- *     .n_limits = ARRAY_SIZE(limits2),
- *     .max_interfaces = 8,
- *     .num_different_channels = 1,
- *  };
+ *    .. code-block:: c
+ *
+ *     struct ieee80211_iface_limit limits2[] = {
+ *             { .max = 8, .types = BIT(NL80211_IFTYPE_AP) |
+ *                                  BIT(NL80211_IFTYPE_P2P_GO), },
+ *     };
+ *     struct ieee80211_iface_combination combination2 = {
+ *             .limits = limits2,
+ *             .n_limits = ARRAY_SIZE(limits2),
+ *             .max_interfaces = 8,
+ *             .num_different_channels = 1,
+ *     };
  *
  *
  * 3. Allow #STA <= 1, #{P2P-client,P2P-GO} <= 3 on two channels, 4 total.
  *
- * This allows for an infrastructure connection and three P2P connections.
+ *    This allows for an infrastructure connection and three P2P connections.
+ *
+ *    .. code-block:: c
+ *
+ *     struct ieee80211_iface_limit limits3[] = {
+ *             { .max = 1, .types = BIT(NL80211_IFTYPE_STATION), },
+ *             { .max = 3, .types = BIT(NL80211_IFTYPE_P2P_GO) |
+ *                                  BIT(NL80211_IFTYPE_P2P_CLIENT), },
+ *     };
+ *     struct ieee80211_iface_combination combination3 = {
+ *             .limits = limits3,
+ *             .n_limits = ARRAY_SIZE(limits3),
+ *             .max_interfaces = 4,
+ *             .num_different_channels = 2,
+ *     };
  *
- *  struct ieee80211_iface_limit limits3[] = {
- *     { .max = 1, .types = BIT(NL80211_IFTYPE_STATION), },
- *     { .max = 3, .types = BIT(NL80211_IFTYPE_P2P_GO) |
- *                          BIT(NL80211_IFTYPE_P2P_CLIENT), },
- *  };
- *  struct ieee80211_iface_combination combination3 = {
- *     .limits = limits3,
- *     .n_limits = ARRAY_SIZE(limits3),
- *     .max_interfaces = 4,
- *     .num_different_channels = 2,
- *  };
  */
 struct ieee80211_iface_combination {
        const struct ieee80211_iface_limit *limits;
@@ -3138,6 +3217,7 @@ struct ieee80211_iface_combination {
        bool beacon_int_infra_match;
        u8 radar_detect_widths;
        u8 radar_detect_regions;
+       u32 beacon_int_min_gcd;
 };
 
 struct ieee80211_txrx_stypes {
@@ -3745,8 +3825,8 @@ struct cfg80211_cached_keys;
  * @beacon_interval: beacon interval used on this device for transmitting
  *     beacons, 0 when not valid
  * @address: The address for this device, valid only if @netdev is %NULL
- * @p2p_started: true if this is a P2P Device that has been started
- * @nan_started: true if this is a NAN interface that has been started
+ * @is_running: true if this is a non-netdev device that has been started, e.g.
+ *     the P2P Device.
  * @cac_started: true if DFS channel availability check has been started
  * @cac_start_time: timestamp (jiffies) when the dfs state was entered.
  * @cac_time_ms: CAC time in ms
@@ -3778,7 +3858,7 @@ struct wireless_dev {
 
        struct mutex mtx;
 
-       bool use_4addr, p2p_started, nan_started;
+       bool use_4addr, is_running;
 
        u8 address[ETH_ALEN] __aligned(sizeof(u16));
 
@@ -3835,6 +3915,13 @@ static inline u8 *wdev_address(struct wireless_dev *wdev)
        return wdev->address;
 }
 
+static inline bool wdev_running(struct wireless_dev *wdev)
+{
+       if (wdev->netdev)
+               return netif_running(wdev->netdev);
+       return wdev->is_running;
+}
+
 /**
  * wdev_priv - return wiphy priv from wireless_dev
  *
@@ -4039,6 +4126,18 @@ unsigned int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr);
  * that do not do the 802.11/802.3 conversion on the device.
  */
 
+/**
+ * ieee80211_data_to_8023_exthdr - convert an 802.11 data frame to 802.3
+ * @skb: the 802.11 data frame
+ * @ehdr: pointer to a &struct ethhdr that will get the header, instead
+ *     of it being pushed into the SKB
+ * @addr: the device MAC address
+ * @iftype: the virtual interface type
+ * Return: 0 on success. Non-zero on error.
+ */
+int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
+                                 const u8 *addr, enum nl80211_iftype iftype);
+
 /**
  * ieee80211_data_to_8023 - convert an 802.11 data frame to 802.3
  * @skb: the 802.11 data frame
@@ -4046,8 +4145,11 @@ unsigned int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr);
  * @iftype: the virtual interface type
  * Return: 0 on success. Non-zero on error.
  */
-int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr,
-                          enum nl80211_iftype iftype);
+static inline int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr,
+                                        enum nl80211_iftype iftype)
+{
+       return ieee80211_data_to_8023_exthdr(skb, NULL, addr, iftype);
+}
 
 /**
  * ieee80211_data_from_8023 - convert an 802.3 frame to 802.11
@@ -4065,22 +4167,23 @@ int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr,
 /**
  * ieee80211_amsdu_to_8023s - decode an IEEE 802.11n A-MSDU frame
  *
- * Decode an IEEE 802.11n A-MSDU frame and convert it to a list of
- * 802.3 frames. The @list will be empty if the decode fails. The
- * @skb is consumed after the function returns.
+ * Decode an IEEE 802.11 A-MSDU and convert it to a list of 802.3 frames.
+ * The @list will be empty if the decode fails. The @skb must be fully
+ * header-less before being passed in here; it is freed in this function.
  *
- * @skb: The input IEEE 802.11n A-MSDU frame.
+ * @skb: The input A-MSDU frame without any headers.
  * @list: The output list of 802.3 frames. It must be allocated and
  *     initialized by by the caller.
  * @addr: The device MAC address.
  * @iftype: The device interface type.
  * @extra_headroom: The hardware extra headroom for SKBs in the @list.
- * @has_80211_header: Set it true if SKB is with IEEE 802.11 header.
+ * @check_da: DA to check in the inner ethernet header, or NULL
+ * @check_sa: SA to check in the inner ethernet header, or NULL
  */
 void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list,
                              const u8 *addr, enum nl80211_iftype iftype,
                              const unsigned int extra_headroom,
-                             bool has_80211_header);
+                             const u8 *check_da, const u8 *check_sa);
 
 /**
  * cfg80211_classify8021d - determine the 802.1p/1d tag for a data frame
@@ -4139,6 +4242,27 @@ static inline const u8 *cfg80211_find_ie(u8 eid, const u8 *ies, int len)
        return cfg80211_find_ie_match(eid, ies, len, NULL, 0, 0);
 }
 
+/**
+ * cfg80211_find_ext_ie - find information element with EID Extension in data
+ *
+ * @ext_eid: element ID Extension
+ * @ies: data consisting of IEs
+ * @len: length of data
+ *
+ * Return: %NULL if the extended element ID could not be found or if
+ * the element is invalid (claims to be longer than the given
+ * data), or a pointer to the first byte of the requested
+ * element, that is the byte containing the element ID.
+ *
+ * Note: There are no checks on the element length other than
+ * having to fit into the given data.
+ */
+static inline const u8 *cfg80211_find_ext_ie(u8 ext_eid, const u8 *ies, int len)
+{
+       return cfg80211_find_ie_match(WLAN_EID_EXTENSION, ies, len,
+                                     &ext_eid, 1, 2);
+}
+
 /**
  * cfg80211_find_vendor_ie - find vendor specific information element in data
  *
@@ -4539,7 +4663,8 @@ void cfg80211_auth_timeout(struct net_device *dev, const u8 *addr);
  *     moves to cfg80211 in this call
  * @buf: authentication frame (header + body)
  * @len: length of the frame data
- * @uapsd_queues: bitmap of ACs configured to uapsd. -1 if n/a.
+ * @uapsd_queues: bitmap of queues configured for uapsd. Same format
+ *     as the AC bitmap in the QoS info field
  *
  * After being asked to associate via cfg80211_ops::assoc() the driver must
  * call either this function or cfg80211_auth_timeout().
@@ -5575,36 +5700,20 @@ unsigned int ieee80211_get_num_supported_channels(struct wiphy *wiphy);
  * cfg80211_check_combinations - check interface combinations
  *
  * @wiphy: the wiphy
- * @num_different_channels: the number of different channels we want
- *     to use for verification
- * @radar_detect: a bitmap where each bit corresponds to a channel
- *     width where radar detection is needed, as in the definition of
- *     &struct ieee80211_iface_combination.@radar_detect_widths
- * @iftype_num: array with the numbers of interfaces of each interface
- *     type.  The index is the interface type as specified in &enum
- *     nl80211_iftype.
+ * @params: the interface combinations parameter
  *
  * This function can be called by the driver to check whether a
  * combination of interfaces and their types are allowed according to
  * the interface combinations.
  */
 int cfg80211_check_combinations(struct wiphy *wiphy,
-                               const int num_different_channels,
-                               const u8 radar_detect,
-                               const int iftype_num[NUM_NL80211_IFTYPES]);
+                               struct iface_combination_params *params);
 
 /**
  * cfg80211_iter_combinations - iterate over matching combinations
  *
  * @wiphy: the wiphy
- * @num_different_channels: the number of different channels we want
- *     to use for verification
- * @radar_detect: a bitmap where each bit corresponds to a channel
- *     width where radar detection is needed, as in the definition of
- *     &struct ieee80211_iface_combination.@radar_detect_widths
- * @iftype_num: array with the numbers of interfaces of each interface
- *     type.  The index is the interface type as specified in &enum
- *     nl80211_iftype.
+ * @params: the interface combinations parameter
  * @iter: function to call for each matching combination
  * @data: pointer to pass to iter function
  *
@@ -5613,9 +5722,7 @@ int cfg80211_check_combinations(struct wiphy *wiphy,
  * purposes.
  */
 int cfg80211_iter_combinations(struct wiphy *wiphy,
-                              const int num_different_channels,
-                              const u8 radar_detect,
-                              const int iftype_num[NUM_NL80211_IFTYPES],
+                              struct iface_combination_params *params,
                               void (*iter)(const struct ieee80211_iface_combination *c,
                                            void *data),
                               void *data);
index 211bd3c37028d80fa1fd2a7849757008d4b6ee7f..d29e5fc8258216b9d79604bc99b69b66cce3e443 100644 (file)
@@ -92,6 +92,8 @@ struct devlink_ops {
 
        int (*eswitch_mode_get)(struct devlink *devlink, u16 *p_mode);
        int (*eswitch_mode_set)(struct devlink *devlink, u16 mode);
+       int (*eswitch_inline_mode_get)(struct devlink *devlink, u8 *p_inline_mode);
+       int (*eswitch_inline_mode_set)(struct devlink *devlink, u8 inline_mode);
 };
 
 static inline void *devlink_priv(struct devlink *devlink)
index 6965c8f68ade48500458d926407a02f376301120..701fc814d0af85cdc99c445eaa3da9a1ef5b5761 100644 (file)
@@ -115,6 +115,7 @@ static inline struct ip_tunnel_info *skb_tunnel_info_unclone(struct sk_buff *skb
 static inline struct metadata_dst *__ip_tun_set_dst(__be32 saddr,
                                                    __be32 daddr,
                                                    __u8 tos, __u8 ttl,
+                                                   __be16 tp_dst,
                                                    __be16 flags,
                                                    __be64 tunnel_id,
                                                    int md_size)
@@ -127,7 +128,7 @@ static inline struct metadata_dst *__ip_tun_set_dst(__be32 saddr,
 
        ip_tunnel_key_init(&tun_dst->u.tun_info.key,
                           saddr, daddr, tos, ttl,
-                          0, 0, 0, tunnel_id, flags);
+                          0, 0, tp_dst, tunnel_id, flags);
        return tun_dst;
 }
 
@@ -139,12 +140,13 @@ static inline struct metadata_dst *ip_tun_rx_dst(struct sk_buff *skb,
        const struct iphdr *iph = ip_hdr(skb);
 
        return __ip_tun_set_dst(iph->saddr, iph->daddr, iph->tos, iph->ttl,
-                               flags, tunnel_id, md_size);
+                               0, flags, tunnel_id, md_size);
 }
 
 static inline struct metadata_dst *__ipv6_tun_set_dst(const struct in6_addr *saddr,
                                                      const struct in6_addr *daddr,
                                                      __u8 tos, __u8 ttl,
+                                                     __be16 tp_dst,
                                                      __be32 label,
                                                      __be16 flags,
                                                      __be64 tunnel_id,
@@ -162,7 +164,7 @@ static inline struct metadata_dst *__ipv6_tun_set_dst(const struct in6_addr *sad
        info->key.tun_flags = flags;
        info->key.tun_id = tunnel_id;
        info->key.tp_src = 0;
-       info->key.tp_dst = 0;
+       info->key.tp_dst = tp_dst;
 
        info->key.u.ipv6.src = *saddr;
        info->key.u.ipv6.dst = *daddr;
@@ -183,7 +185,7 @@ static inline struct metadata_dst *ipv6_tun_rx_dst(struct sk_buff *skb,
 
        return __ipv6_tun_set_dst(&ip6h->saddr, &ip6h->daddr,
                                  ipv6_get_dsfield(ip6h), ip6h->hop_limit,
-                                 ip6_flowlabel(ip6h), flags, tunnel_id,
+                                 0, ip6_flowlabel(ip6h), flags, tunnel_id,
                                  md_size);
 }
 #endif /* __NET_DST_METADATA_H */
index 456e4a6006abff9bdb122642ca3824d8d8f6bd93..8dbfdf728cd8ce901b3b05f0e58b4eeee25051fe 100644 (file)
@@ -8,6 +8,11 @@
 #include <net/flow.h>
 #include <net/rtnetlink.h>
 
+struct fib_kuid_range {
+       kuid_t start;
+       kuid_t end;
+};
+
 struct fib_rule {
        struct list_head        list;
        int                     iifindex;
@@ -30,6 +35,7 @@ struct fib_rule {
        int                     suppress_prefixlen;
        char                    iifname[IFNAMSIZ];
        char                    oifname[IFNAMSIZ];
+       struct fib_kuid_range   uid_range;
        struct rcu_head         rcu;
 };
 
@@ -92,7 +98,8 @@ struct fib_rules_ops {
        [FRA_SUPPRESS_PREFIXLEN] = { .type = NLA_U32 }, \
        [FRA_SUPPRESS_IFGROUP] = { .type = NLA_U32 }, \
        [FRA_GOTO]      = { .type = NLA_U32 }, \
-       [FRA_L3MDEV]    = { .type = NLA_U8 }
+       [FRA_L3MDEV]    = { .type = NLA_U8 }, \
+       [FRA_UID_RANGE] = { .len = sizeof(struct fib_rule_uid_range) }
 
 static inline void fib_rule_get(struct fib_rule *rule)
 {
index 035aa7716967802340ae3bd4e915c45f33f1383e..6bbbca8af8e3e2d0c91fb6a024a97a2431f04631 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/in6.h>
 #include <linux/atomic.h>
 #include <net/flow_dissector.h>
+#include <linux/uidgid.h>
 
 /*
  * ifindex generation is per-net namespace, and loopback is
@@ -37,6 +38,7 @@ struct flowi_common {
 #define FLOWI_FLAG_SKIP_NH_OIF         0x04
        __u32   flowic_secid;
        struct flowi_tunnel flowic_tun_key;
+       kuid_t  flowic_uid;
 };
 
 union flowi_uli {
@@ -74,6 +76,7 @@ struct flowi4 {
 #define flowi4_flags           __fl_common.flowic_flags
 #define flowi4_secid           __fl_common.flowic_secid
 #define flowi4_tun_key         __fl_common.flowic_tun_key
+#define flowi4_uid             __fl_common.flowic_uid
 
        /* (saddr,daddr) must be grouped, same order as in IP header */
        __be32                  saddr;
@@ -93,7 +96,8 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif,
                                      __u32 mark, __u8 tos, __u8 scope,
                                      __u8 proto, __u8 flags,
                                      __be32 daddr, __be32 saddr,
-                                     __be16 dport, __be16 sport)
+                                     __be16 dport, __be16 sport,
+                                     kuid_t uid)
 {
        fl4->flowi4_oif = oif;
        fl4->flowi4_iif = LOOPBACK_IFINDEX;
@@ -104,6 +108,7 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif,
        fl4->flowi4_flags = flags;
        fl4->flowi4_secid = 0;
        fl4->flowi4_tun_key.tun_id = 0;
+       fl4->flowi4_uid = uid;
        fl4->daddr = daddr;
        fl4->saddr = saddr;
        fl4->fl4_dport = dport;
@@ -131,6 +136,7 @@ struct flowi6 {
 #define flowi6_flags           __fl_common.flowic_flags
 #define flowi6_secid           __fl_common.flowic_secid
 #define flowi6_tun_key         __fl_common.flowic_tun_key
+#define flowi6_uid             __fl_common.flowic_uid
        struct in6_addr         daddr;
        struct in6_addr         saddr;
        /* Note: flowi6_tos is encoded in flowlabel, too. */
@@ -176,6 +182,7 @@ struct flowi {
 #define flowi_flags    u.__fl_common.flowic_flags
 #define flowi_secid    u.__fl_common.flowic_secid
 #define flowi_tun_key  u.__fl_common.flowic_tun_key
+#define flowi_uid      u.__fl_common.flowic_uid
 } __attribute__((__aligned__(BITS_PER_LONG/8)));
 
 static inline struct flowi *flowi4_to_flowi(struct flowi4 *fl4)
index d9534927d93bc46005afb5214ef83c72544385b7..c4f31666afd24695b9d83688f0800ebcc017114b 100644 (file)
@@ -128,6 +128,11 @@ enum flow_dissector_key_id {
        FLOW_DISSECTOR_KEY_FLOW_LABEL, /* struct flow_dissector_key_flow_tags */
        FLOW_DISSECTOR_KEY_GRE_KEYID, /* struct flow_dissector_key_keyid */
        FLOW_DISSECTOR_KEY_MPLS_ENTROPY, /* struct flow_dissector_key_keyid */
+       FLOW_DISSECTOR_KEY_ENC_KEYID, /* struct flow_dissector_key_keyid */
+       FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, /* struct flow_dissector_key_ipv4_addrs */
+       FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, /* struct flow_dissector_key_ipv6_addrs */
+       FLOW_DISSECTOR_KEY_ENC_CONTROL, /* struct flow_dissector_key_control */
+       FLOW_DISSECTOR_KEY_ENC_PORTS, /* struct flow_dissector_key_ports */
 
        FLOW_DISSECTOR_KEY_MAX,
 };
index 8d4608ce8716360924ae68a24ffb25c0171187fc..a34275be360001e89740fe4325a53905683e9778 100644 (file)
@@ -20,7 +20,7 @@ struct genl_info;
 
 /**
  * struct genl_family - generic netlink family
- * @id: protocol family idenfitier
+ * @id: protocol family identifier (private)
  * @hdrsize: length of user specific header in bytes
  * @name: name of family
  * @version: protocol version
@@ -39,16 +39,16 @@ struct genl_info;
  *     Note that unbind() will not be called symmetrically if the
  *     generic netlink family is removed while there are still open
  *     sockets.
- * @attrbuf: buffer to store parsed attributes
- * @family_list: family list
- * @mcgrps: multicast groups used by this family (private)
- * @n_mcgrps: number of multicast groups (private)
+ * @attrbuf: buffer to store parsed attributes (private)
+ * @mcgrps: multicast groups used by this family
+ * @n_mcgrps: number of multicast groups
  * @mcgrp_offset: starting number of multicast group IDs in this family
- * @ops: the operations supported by this family (private)
- * @n_ops: number of operations supported by this family (private)
+ *     (private)
+ * @ops: the operations supported by this family
+ * @n_ops: number of operations supported by this family
  */
 struct genl_family {
-       unsigned int            id;
+       int                     id;             /* private */
        unsigned int            hdrsize;
        char                    name[GENL_NAMSIZ];
        unsigned int            version;
@@ -64,15 +64,16 @@ struct genl_family {
        int                     (*mcast_bind)(struct net *net, int group);
        void                    (*mcast_unbind)(struct net *net, int group);
        struct nlattr **        attrbuf;        /* private */
-       const struct genl_ops * ops;            /* private */
-       const struct genl_multicast_group *mcgrps; /* private */
-       unsigned int            n_ops;          /* private */
-       unsigned int            n_mcgrps;       /* private */
+       const struct genl_ops * ops;
+       const struct genl_multicast_group *mcgrps;
+       unsigned int            n_ops;
+       unsigned int            n_mcgrps;
        unsigned int            mcgrp_offset;   /* private */
-       struct list_head        family_list;    /* private */
        struct module           *module;
 };
 
+struct nlattr **genl_family_attrbuf(const struct genl_family *family);
+
 /**
  * struct genl_info - receiving information
  * @snd_seq: sending sequence number
@@ -130,64 +131,13 @@ struct genl_ops {
        u8                      flags;
 };
 
-int __genl_register_family(struct genl_family *family);
-
-static inline int genl_register_family(struct genl_family *family)
-{
-       family->module = THIS_MODULE;
-       return __genl_register_family(family);
-}
-
-/**
- * genl_register_family_with_ops - register a generic netlink family with ops
- * @family: generic netlink family
- * @ops: operations to be registered
- * @n_ops: number of elements to register
- *
- * Registers the specified family and operations from the specified table.
- * Only one family may be registered with the same family name or identifier.
- *
- * The family id may equal GENL_ID_GENERATE causing an unique id to
- * be automatically generated and assigned.
- *
- * Either a doit or dumpit callback must be specified for every registered
- * operation or the function will fail. Only one operation structure per
- * command identifier may be registered.
- *
- * See include/net/genetlink.h for more documenation on the operations
- * structure.
- *
- * Return 0 on success or a negative error code.
- */
-static inline int
-_genl_register_family_with_ops_grps(struct genl_family *family,
-                                   const struct genl_ops *ops, size_t n_ops,
-                                   const struct genl_multicast_group *mcgrps,
-                                   size_t n_mcgrps)
-{
-       family->module = THIS_MODULE;
-       family->ops = ops;
-       family->n_ops = n_ops;
-       family->mcgrps = mcgrps;
-       family->n_mcgrps = n_mcgrps;
-       return __genl_register_family(family);
-}
-
-#define genl_register_family_with_ops(family, ops)                     \
-       _genl_register_family_with_ops_grps((family),                   \
-                                           (ops), ARRAY_SIZE(ops),     \
-                                           NULL, 0)
-#define genl_register_family_with_ops_groups(family, ops, grps)        \
-       _genl_register_family_with_ops_grps((family),                   \
-                                           (ops), ARRAY_SIZE(ops),     \
-                                           (grps), ARRAY_SIZE(grps))
-
-int genl_unregister_family(struct genl_family *family);
-void genl_notify(struct genl_family *family, struct sk_buff *skb,
+int genl_register_family(struct genl_family *family);
+int genl_unregister_family(const struct genl_family *family);
+void genl_notify(const struct genl_family *family, struct sk_buff *skb,
                 struct genl_info *info, u32 group, gfp_t flags);
 
 void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq,
-                 struct genl_family *family, int flags, u8 cmd);
+                 const struct genl_family *family, int flags, u8 cmd);
 
 /**
  * genlmsg_nlhdr - Obtain netlink header from user specified header
@@ -196,8 +146,8 @@ void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq,
  *
  * Returns pointer to netlink header.
  */
-static inline struct nlmsghdr *genlmsg_nlhdr(void *user_hdr,
-                                            struct genl_family *family)
+static inline struct nlmsghdr *
+genlmsg_nlhdr(void *user_hdr, const struct genl_family *family)
 {
        return (struct nlmsghdr *)((char *)user_hdr -
                                   family->hdrsize -
@@ -233,7 +183,7 @@ static inline int genlmsg_parse(const struct nlmsghdr *nlh,
  */
 static inline void genl_dump_check_consistent(struct netlink_callback *cb,
                                              void *user_hdr,
-                                             struct genl_family *family)
+                                             const struct genl_family *family)
 {
        nl_dump_check_consistent(cb, genlmsg_nlhdr(user_hdr, family));
 }
@@ -250,7 +200,7 @@ static inline void genl_dump_check_consistent(struct netlink_callback *cb,
  */
 static inline void *genlmsg_put_reply(struct sk_buff *skb,
                                      struct genl_info *info,
-                                     struct genl_family *family,
+                                     const struct genl_family *family,
                                      int flags, u8 cmd)
 {
        return genlmsg_put(skb, info->snd_portid, info->snd_seq, family,
@@ -287,7 +237,7 @@ static inline void genlmsg_cancel(struct sk_buff *skb, void *hdr)
  * @group: offset of multicast group in groups array
  * @flags: allocation flags
  */
-static inline int genlmsg_multicast_netns(struct genl_family *family,
+static inline int genlmsg_multicast_netns(const struct genl_family *family,
                                          struct net *net, struct sk_buff *skb,
                                          u32 portid, unsigned int group, gfp_t flags)
 {
@@ -305,7 +255,7 @@ static inline int genlmsg_multicast_netns(struct genl_family *family,
  * @group: offset of multicast group in groups array
  * @flags: allocation flags
  */
-static inline int genlmsg_multicast(struct genl_family *family,
+static inline int genlmsg_multicast(const struct genl_family *family,
                                    struct sk_buff *skb, u32 portid,
                                    unsigned int group, gfp_t flags)
 {
@@ -323,7 +273,7 @@ static inline int genlmsg_multicast(struct genl_family *family,
  *
  * This function must hold the RTNL or rcu_read_lock().
  */
-int genlmsg_multicast_allns(struct genl_family *family,
+int genlmsg_multicast_allns(const struct genl_family *family,
                            struct sk_buff *skb, u32 portid,
                            unsigned int group, gfp_t flags);
 
@@ -407,8 +357,9 @@ static inline struct sk_buff *genlmsg_new(size_t payload, gfp_t flags)
  * This function returns the number of broadcast listeners that have set the
  * NETLINK_RECV_NO_ENOBUFS socket option.
  */
-static inline int genl_set_err(struct genl_family *family, struct net *net,
-                              u32 portid, u32 group, int code)
+static inline int genl_set_err(const struct genl_family *family,
+                              struct net *net, u32 portid,
+                              u32 group, int code)
 {
        if (WARN_ON_ONCE(group >= family->n_mcgrps))
                return -EINVAL;
@@ -416,7 +367,7 @@ static inline int genl_set_err(struct genl_family *family, struct net *net,
        return netlink_set_err(net->genl_sock, portid, group, code);
 }
 
-static inline int genl_has_listeners(struct genl_family *family,
+static inline int genl_has_listeners(const struct genl_family *family,
                                     struct net *net, unsigned int group)
 {
        if (WARN_ON_ONCE(group >= family->n_mcgrps))
index d15214d673b2e8e08fd6437b572278fb1359f10d..2a1abbf8da74368cd01adc40cef6c0644e059ef2 100644 (file)
@@ -68,6 +68,9 @@ static inline int gro_cells_init(struct gro_cells *gcells, struct net_device *de
                struct gro_cell *cell = per_cpu_ptr(gcells->cells, i);
 
                __skb_queue_head_init(&cell->napi_skbs);
+
+               set_bit(NAPI_STATE_NO_BUSY_POLL, &cell->napi.state);
+
                netif_napi_add(dev, &cell->napi, gro_cell_poll, 64);
                napi_enable(&cell->napi);
        }
index ba07b9d8ed638bd5d467b82c4fff2cbec4836c32..d0e7e3f8e67ae208de3195f4af9cd740972dee0b 100644 (file)
@@ -333,9 +333,9 @@ enum ieee80211_radiotap_type {
 #define IEEE80211_RADIOTAP_TIMESTAMP_UNIT_NS                   0x0003
 #define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_MASK                 0x00F0
 #define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_BEGIN_MDPU           0x0000
-#define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_EO_MPDU              0x0010
+#define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_PLCP_SIG_ACQ         0x0010
 #define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_EO_PPDU              0x0020
-#define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_PLCP_SIG_ACQ         0x0030
+#define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_EO_MPDU              0x0030
 #define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_UNKNOWN              0x00F0
 
 #define IEEE80211_RADIOTAP_TIMESTAMP_FLAG_64BIT                        0x00
index 515352c6280a45c6b1783ae4e10760b729b917d0..b0576cb2ab25dddf6fdfb6fd4327fdc24829b178 100644 (file)
@@ -190,8 +190,8 @@ struct inet6_dev {
        __u32                   if_flags;
        int                     dead;
 
+       u32                     desync_factor;
        u8                      rndid[8];
-       struct timer_list       regen_timer;
        struct list_head        tempaddr_list;
 
        struct in6_addr         token;
index 197a30d221e92b839e2e96fa37f4a796514ea461..146054ceea8e0566f79739b1ed115dea53423258 100644 (file)
@@ -289,11 +289,6 @@ static inline int inet_csk_reqsk_queue_len(const struct sock *sk)
        return reqsk_queue_len(&inet_csk(sk)->icsk_accept_queue);
 }
 
-static inline int inet_csk_reqsk_queue_young(const struct sock *sk)
-{
-       return reqsk_queue_len_young(&inet_csk(sk)->icsk_accept_queue);
-}
-
 static inline int inet_csk_reqsk_queue_is_full(const struct sock *sk)
 {
        return inet_csk_reqsk_queue_len(sk) >= sk->sk_max_ack_backlog;
index 236a81034fefec203e27bb73b5a1e5d7d512f8d5..c9cff977a7fb2cc9d8446715cc09a51e5090aff9 100644 (file)
@@ -228,6 +228,7 @@ struct inet_sock {
 #define IP_CMSG_PASSSEC                BIT(5)
 #define IP_CMSG_ORIGDSTADDR    BIT(6)
 #define IP_CMSG_CHECKSUM       BIT(7)
+#define IP_CMSG_RECVFRAGSIZE   BIT(8)
 
 /**
  * sk_to_full_sk - Access to a full socket
index bc43c0fcae122daea994a17843abcd84f6e6adb1..ab6761a7c883a756583f570dc861af1e1d30e67f 100644 (file)
@@ -38,7 +38,7 @@ struct sock;
 struct inet_skb_parm {
        int                     iif;
        struct ip_options       opt;            /* Compiled IP options          */
-       unsigned char           flags;
+       u16                     flags;
 
 #define IPSKB_FORWARDED                BIT(0)
 #define IPSKB_XFRM_TUNNEL_SIZE BIT(1)
@@ -47,11 +47,16 @@ struct inet_skb_parm {
 #define IPSKB_REROUTED         BIT(4)
 #define IPSKB_DOREDIRECT       BIT(5)
 #define IPSKB_FRAG_PMTU                BIT(6)
-#define IPSKB_FRAG_SEGS                BIT(7)
+#define IPSKB_L3SLAVE          BIT(7)
 
        u16                     frag_max_size;
 };
 
+static inline bool ipv4_l3mdev_skb(u16 flags)
+{
+       return !!(flags & IPSKB_L3SLAVE);
+}
+
 static inline unsigned int ip_hdrlen(const struct sk_buff *skb)
 {
        return ip_hdr(skb)->ihl * 4;
@@ -173,6 +178,7 @@ struct ip_reply_arg {
                                /* -1 if not needed */ 
        int         bound_dev_if;
        u8          tos;
+       kuid_t      uid;
 }; 
 
 #define IP_REPLY_ARG_NOSRCCHECK 1
@@ -572,7 +578,8 @@ int ip_options_rcv_srr(struct sk_buff *skb);
  */
 
 void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb);
-void ip_cmsg_recv_offset(struct msghdr *msg, struct sk_buff *skb, int offset);
+void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk,
+                        struct sk_buff *skb, int tlen, int offset);
 int ip_cmsg_send(struct sock *sk, struct msghdr *msg,
                 struct ipcm_cookie *ipc, bool allow_ipv6);
 int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
@@ -594,7 +601,7 @@ void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 dport,
 
 static inline void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb)
 {
-       ip_cmsg_recv_offset(msg, skb, 0);
+       ip_cmsg_recv_offset(msg, skb->sk, skb, 0, 0);
 }
 
 bool icmp_global_allow(void);
index fb961a576abe4a62d02c69d6393abd91a2930fe1..a74e2aa40ef42d6e7edb917890164cce9f0fa835 100644 (file)
@@ -230,6 +230,8 @@ struct fib6_table {
        rwlock_t                tb6_lock;
        struct fib6_node        tb6_root;
        struct inet_peer_base   tb6_peers;
+       unsigned int            flags;
+#define RT6_TABLE_HAS_DFLT_ROUTER      BIT(0)
 };
 
 #define RT6_TABLE_UNSPEC       RT_TABLE_UNSPEC
index e0cd318d5103fb9a9a7be9ddfda40f8696925fcc..9dc2c182a263218ad63ceb326893b4c86c21ff95 100644 (file)
@@ -32,6 +32,7 @@ struct route_info {
 #define RT6_LOOKUP_F_SRCPREF_TMP       0x00000008
 #define RT6_LOOKUP_F_SRCPREF_PUBLIC    0x00000010
 #define RT6_LOOKUP_F_SRCPREF_COA       0x00000020
+#define RT6_LOOKUP_F_IGNORE_LINKSTATE  0x00000040
 
 /* We do not (yet ?) support IPv6 jumbograms (RFC 2675)
  * Unlike IPv4, hdr->seg_len doesn't include the IPv6 header
@@ -139,9 +140,10 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
                  const struct in6_addr *gwaddr);
 
 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, int oif,
-                    u32 mark);
+                    u32 mark, kuid_t uid);
 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu);
-void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark);
+void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
+                 kuid_t uid);
 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
                            u32 mark);
 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk);
index 20ed9699fcd40be5362083fdbbf58d2da9420b44..1b1cf33cbfb02eaf4eb1eeb92be474076fdeebe4 100644 (file)
@@ -146,6 +146,7 @@ static inline void ip6tunnel_xmit(struct sock *sk, struct sk_buff *skb,
 {
        int pkt_len, err;
 
+       memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
        pkt_len = skb->len - skb_inner_network_offset(skb);
        err = ip6_local_out(dev_net(skb_dst(skb)->dev), sk, skb);
        if (unlikely(net_xmit_eval(err)))
index b9314b48e39f32ef22366087673bcc961d196b43..f390c3bb05c5d2189d169d31038fd5cb991bebaa 100644 (file)
@@ -243,6 +243,7 @@ int fib_table_dump(struct fib_table *table, struct sk_buff *skb,
                   struct netlink_callback *cb);
 int fib_table_flush(struct net *net, struct fib_table *table);
 struct fib_table *fib_trie_unmerge(struct fib_table *main_tb);
+void fib_table_flush_external(struct fib_table *table);
 void fib_free_table(struct fib_table *tb);
 
 #ifndef CONFIG_IP_MULTIPLE_TABLES
index 59557c07904b40ad28c448413d285c245c0ac529..e893fe43dd139d827cd587c814c1cb0cd0a9c8fe 100644 (file)
@@ -129,7 +129,7 @@ struct ip_tunnel {
 #endif
        struct ip_tunnel_prl_entry __rcu *prl;  /* potential router list */
        unsigned int            prl_count;      /* # of entries in PRL */
-       int                     ip_tnl_net_id;
+       unsigned int            ip_tnl_net_id;
        struct gro_cells        gro_cells;
        bool                    collect_md;
        bool                    ignore_df;
@@ -248,7 +248,7 @@ void ip_tunnel_uninit(struct net_device *dev);
 void  ip_tunnel_dellink(struct net_device *dev, struct list_head *head);
 struct net *ip_tunnel_get_link_net(const struct net_device *dev);
 int ip_tunnel_get_iflink(const struct net_device *dev);
-int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
+int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
                       struct rtnl_link_ops *ops, char *devname);
 
 void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops);
@@ -275,7 +275,7 @@ int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
                         struct ip_tunnel_parm *p);
 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
                      struct ip_tunnel_parm *p);
-void ip_tunnel_setup(struct net_device *dev, int net_id);
+void ip_tunnel_setup(struct net_device *dev, unsigned int net_id);
 
 struct ip_tunnel_encap_ops {
        size_t (*encap_hlen)(struct ip_tunnel_encap *e);
index 8fed1cd78658a6e088b63f7703290cb02c1969b6..0a3622bf086f3447dc4a079f5082388326681481 100644 (file)
@@ -932,7 +932,8 @@ int ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb);
  */
 
 void ipv6_push_nfrag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt,
-                         u8 *proto, struct in6_addr **daddr_p);
+                         u8 *proto, struct in6_addr **daddr_p,
+                         struct in6_addr *saddr);
 void ipv6_push_frag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt,
                         u8 *proto);
 
index 82e76fe1c1f747a633ed1c92258a40451ec16e63..d4c1c75b886244f76f9539c0709bb72be7974578 100644 (file)
@@ -94,7 +94,8 @@ static inline bool lwtunnel_xmit_redirect(struct lwtunnel_state *lwtstate)
 static inline unsigned int lwtunnel_headroom(struct lwtunnel_state *lwtstate,
                                             unsigned int mtu)
 {
-       if (lwtunnel_xmit_redirect(lwtstate) && lwtstate->headroom < mtu)
+       if ((lwtunnel_xmit_redirect(lwtstate) ||
+            lwtunnel_output_redirect(lwtstate)) && lwtstate->headroom < mtu)
                return lwtstate->headroom;
 
        return 0;
index a810dfcb83c2382cb7fe1954eba7bae1645e9046..5345d358a510ce63d5c13fd48cfd7f9284405768 100644 (file)
@@ -811,14 +811,18 @@ enum mac80211_rate_control_flags {
  * in the control information, and it will be filled by the rate
  * control algorithm according to what should be sent. For example,
  * if this array contains, in the format { <idx>, <count> } the
- * information
+ * information::
+ *
  *    { 3, 2 }, { 2, 2 }, { 1, 4 }, { -1, 0 }, { -1, 0 }
+ *
  * then this means that the frame should be transmitted
  * up to twice at rate 3, up to twice at rate 2, and up to four
  * times at rate 1 if it doesn't get acknowledged. Say it gets
  * acknowledged by the peer after the fifth attempt, the status
- * information should then contain
+ * information should then contain::
+ *
  *   { 3, 2 }, { 2, 2 }, { 1, 1 }, { -1, 0 } ...
+ *
  * since it was transmitted twice at rate 3, twice at rate 2
  * and once at rate 1 after which we received an acknowledgement.
  */
@@ -1168,8 +1172,8 @@ enum mac80211_rx_vht_flags {
  * @rate_idx: index of data rate into band's supported rates or MCS index if
  *     HT or VHT is used (%RX_FLAG_HT/%RX_FLAG_VHT)
  * @vht_nss: number of streams (VHT only)
- * @flag: %RX_FLAG_*
- * @vht_flag: %RX_VHT_FLAG_*
+ * @flag: %RX_FLAG_\*
+ * @vht_flag: %RX_VHT_FLAG_\*
  * @rx_flags: internal RX flags for mac80211
  * @ampdu_reference: A-MPDU reference number, must be a different value for
  *     each A-MPDU but the same for each subframe within one A-MPDU
@@ -1432,13 +1436,13 @@ enum ieee80211_vif_flags {
  * @probe_req_reg: probe requests should be reported to mac80211 for this
  *     interface.
  * @drv_priv: data area for driver use, will always be aligned to
- *     sizeof(void *).
+ *     sizeof(void \*).
  * @txq: the multicast data TX queue (if driver uses the TXQ abstraction)
  */
 struct ieee80211_vif {
        enum nl80211_iftype type;
        struct ieee80211_bss_conf bss_conf;
-       u8 addr[ETH_ALEN];
+       u8 addr[ETH_ALEN] __aligned(2);
        bool p2p;
        bool csa_active;
        bool mu_mimo_owner;
@@ -1743,9 +1747,10 @@ struct ieee80211_sta_rates {
  * @wme: indicates whether the STA supports QoS/WME (if local devices does,
  *     otherwise always false)
  * @drv_priv: data area for driver use, will always be aligned to
- *     sizeof(void *), size is determined in hw information.
+ *     sizeof(void \*), size is determined in hw information.
  * @uapsd_queues: bitmap of queues configured for uapsd. Only valid
- *     if wme is supported.
+ *     if wme is supported. The bits order is like in
+ *     IEEE80211_WMM_IE_STA_QOSINFO_AC_*.
  * @max_sp: max Service Period. Only valid if wme is supported.
  * @bandwidth: current bandwidth the station can receive with
  * @rx_nss: in HT/VHT, the maximum number of spatial streams the
@@ -2025,6 +2030,10 @@ struct ieee80211_txq {
  *     drivers, mac80211 packet loss mechanism will not be triggered and driver
  *     is completely depending on firmware event for station kickout.
  *
+ * @IEEE80211_HW_SUPPORTS_TX_FRAG: Hardware does fragmentation by itself.
+ *     The stack will not do fragmentation.
+ *     The callback for @set_frag_threshold should be set as well.
+ *
  * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays
  */
 enum ieee80211_hw_flags {
@@ -2066,6 +2075,7 @@ enum ieee80211_hw_flags {
        IEEE80211_HW_TX_AMSDU,
        IEEE80211_HW_TX_FRAG_LIST,
        IEEE80211_HW_REPORTS_LOW_ACK,
+       IEEE80211_HW_SUPPORTS_TX_FRAG,
 
        /* keep last, obviously */
        NUM_IEEE80211_HW_FLAGS
@@ -2146,12 +2156,12 @@ enum ieee80211_hw_flags {
  *
  * @radiotap_mcs_details: lists which MCS information can the HW
  *     reports, by default it is set to _MCS, _GI and _BW but doesn't
- *     include _FMT. Use %IEEE80211_RADIOTAP_MCS_HAVE_* values, only
+ *     include _FMT. Use %IEEE80211_RADIOTAP_MCS_HAVE_\* values, only
  *     adding _BW is supported today.
  *
  * @radiotap_vht_details: lists which VHT MCS information the HW reports,
  *     the default is _GI | _BANDWIDTH.
- *     Use the %IEEE80211_RADIOTAP_VHT_KNOWN_* values.
+ *     Use the %IEEE80211_RADIOTAP_VHT_KNOWN_\* values.
  *
  * @radiotap_timestamp: Information for the radiotap timestamp field; if the
  *     'units_pos' member is set to a non-negative value it must be set to
@@ -2486,6 +2496,7 @@ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb);
  * in the software stack cares about, we will, in the future, have mac80211
  * tell the driver which information elements are interesting in the sense
  * that we want to see changes in them. This will include
+ *
  *  - a list of information element IDs
  *  - a list of OUIs for the vendor information element
  *
@@ -3093,8 +3104,9 @@ enum ieee80211_reconfig_type {
  *     The callback must be atomic.
  *
  * @set_frag_threshold: Configuration of fragmentation threshold. Assign this
- *     if the device does fragmentation by itself; if this callback is
- *     implemented then the stack will not do fragmentation.
+ *     if the device does fragmentation by itself. Note that to prevent the
+ *     stack from doing fragmentation IEEE80211_HW_SUPPORTS_TX_FRAG
+ *     should be set as well.
  *     The callback can sleep.
  *
  * @set_rts_threshold: Configuration of RTS threshold (if device needs it)
@@ -4087,6 +4099,10 @@ void ieee80211_sta_pspoll(struct ieee80211_sta *sta);
  * This must be used in conjunction with ieee80211_sta_ps_transition()
  * and possibly ieee80211_sta_pspoll(); calls to all three must be
  * serialized.
+ * %IEEE80211_NUM_TIDS can be passed as the tid if the tid is unknown.
+ * In this case, mac80211 will not check that this tid maps to an AC
+ * that is trigger enabled and assume that the caller did the proper
+ * checks.
  */
 void ieee80211_sta_uapsd_trigger(struct ieee80211_sta *sta, u8 tid);
 
index fc4f757107df0b51eae83781324337001ed7fb6d..af8fe8a909dc0ca62e54056cf4be9d0d4ea82477 100644 (file)
@@ -170,7 +170,7 @@ static inline struct net *copy_net_ns(unsigned long flags,
 extern struct list_head net_namespace_list;
 
 struct net *get_net_ns_by_pid(pid_t pid);
-struct net *get_net_ns_by_fd(int pid);
+struct net *get_net_ns_by_fd(int fd);
 
 #ifdef CONFIG_SYSCTL
 void ipx_register_sysctl(void);
@@ -291,7 +291,7 @@ struct pernet_operations {
        int (*init)(struct net *net);
        void (*exit)(struct net *net);
        void (*exit_batch)(struct list_head *net_exit_list);
-       int *id;
+       unsigned int *id;
        size_t size;
 };
 
index de629f1520df2d476f74acab98ba08cbadd03a92..e7b836590f0b7a24e13659063b7aa87ad133885e 100644 (file)
@@ -98,7 +98,7 @@ struct nf_conntrack_l4proto {
                const struct nla_policy *nla_policy;
        } ctnl_timeout;
 #endif
-       int     *net_id;
+       unsigned int    *net_id;
        /* Init l4proto pernet data */
        int (*init_net)(struct net *net, u_int16_t proto);
 
@@ -125,14 +125,24 @@ struct nf_conntrack_l4proto *nf_ct_l4proto_find_get(u_int16_t l3proto,
 void nf_ct_l4proto_put(struct nf_conntrack_l4proto *p);
 
 /* Protocol pernet registration. */
+int nf_ct_l4proto_pernet_register_one(struct net *net,
+                                     struct nf_conntrack_l4proto *proto);
+void nf_ct_l4proto_pernet_unregister_one(struct net *net,
+                                        struct nf_conntrack_l4proto *proto);
 int nf_ct_l4proto_pernet_register(struct net *net,
-                                 struct nf_conntrack_l4proto *proto);
+                                 struct nf_conntrack_l4proto *proto[],
+                                 unsigned int num_proto);
 void nf_ct_l4proto_pernet_unregister(struct net *net,
-                                    struct nf_conntrack_l4proto *proto);
+                                    struct nf_conntrack_l4proto *proto[],
+                                    unsigned int num_proto);
 
 /* Protocol global registration. */
-int nf_ct_l4proto_register(struct nf_conntrack_l4proto *proto);
-void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *proto);
+int nf_ct_l4proto_register_one(struct nf_conntrack_l4proto *proto);
+void nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *proto);
+int nf_ct_l4proto_register(struct nf_conntrack_l4proto *proto[],
+                          unsigned int num_proto);
+void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *proto[],
+                             unsigned int num_proto);
 
 /* Generic netlink helpers */
 int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb,
index 498814626e28b7115345308694bb66d240933f9f..1723a67c0b0a887d689c58481189f77f723aa400 100644 (file)
@@ -30,8 +30,7 @@ static inline struct nf_conn_labels *nf_ct_labels_ext_add(struct nf_conn *ct)
        if (net->ct.labels_used == 0)
                return NULL;
 
-       return nf_ct_ext_add_length(ct, NF_CT_EXT_LABELS,
-                                   sizeof(struct nf_conn_labels), GFP_ATOMIC);
+       return nf_ct_ext_add(ct, NF_CT_EXT_LABELS, GFP_ATOMIC);
 #else
        return NULL;
 #endif
index e6937318546ceee3b32a63bd791932c1ea1e12de..b0ca402c1f72e20bae492c634f663b8846e788a5 100644 (file)
@@ -54,7 +54,7 @@ struct synproxy_net {
        struct synproxy_stats __percpu  *stats;
 };
 
-extern int synproxy_net_id;
+extern unsigned int synproxy_net_id;
 static inline struct synproxy_net *synproxy_pernet(struct net *net)
 {
        return net_generic(net, synproxy_net_id);
index 309cd267be4faf589927478c39615e1eef144fd6..a559aa41253cee6680ee7f626df523af287ae08d 100644 (file)
@@ -109,5 +109,10 @@ void nf_log_dump_packet_common(struct nf_log_buf *m, u_int8_t pf,
                               const struct net_device *out,
                               const struct nf_loginfo *loginfo,
                               const char *prefix);
+void nf_log_l2packet(struct net *net, u_int8_t pf, unsigned int hooknum,
+                    const struct sk_buff *skb,
+                    const struct net_device *in,
+                    const struct net_device *out,
+                    const struct nf_loginfo *loginfo, const char *prefix);
 
 #endif /* _NF_LOG_H */
index 2280cfe86c56157f3ca1165baf6f4ad0e73a18be..09948d10e38e0b939b9764caafd7f3b9a9be41d5 100644 (file)
@@ -12,6 +12,7 @@ struct nf_queue_entry {
        unsigned int            id;
 
        struct nf_hook_state    state;
+       struct nf_hook_entry    *hook;
        u16                     size; /* sizeof(entry) + saved route keys */
 
        /* extra space to store route keys */
diff --git a/include/net/netfilter/nf_socket.h b/include/net/netfilter/nf_socket.h
new file mode 100644 (file)
index 0000000..f2fc39c
--- /dev/null
@@ -0,0 +1,27 @@
+#ifndef _NF_SOCK_H_
+#define _NF_SOCK_H_
+
+struct net_device;
+struct sk_buff;
+struct sock;
+struct net;
+
+static inline bool nf_sk_is_transparent(struct sock *sk)
+{
+       switch (sk->sk_state) {
+       case TCP_TIME_WAIT:
+               return inet_twsk(sk)->tw_transparent;
+       case TCP_NEW_SYN_RECV:
+               return inet_rsk(inet_reqsk(sk))->no_srccheck;
+       default:
+               return inet_sk(sk)->transparent;
+       }
+}
+
+struct sock *nf_sk_lookup_slow_v4(struct net *net, const struct sk_buff *skb,
+                                 const struct net_device *indev);
+
+struct sock *nf_sk_lookup_slow_v6(struct net *net, const struct sk_buff *skb,
+                                 const struct net_device *indev);
+
+#endif
index 5031e072567bd85318d5a6ab240b3ab79199facd..311f02739b51940688f46bcb99d7bc019448d3d3 100644 (file)
 
 struct nft_pktinfo {
        struct sk_buff                  *skb;
-       struct net                      *net;
-       const struct net_device         *in;
-       const struct net_device         *out;
-       u8                              pf;
-       u8                              hook;
        bool                            tprot_set;
        u8                              tprot;
        /* for x_tables compatibility */
        struct xt_action_param          xt;
 };
 
+static inline struct net *nft_net(const struct nft_pktinfo *pkt)
+{
+       return pkt->xt.state->net;
+}
+
+static inline unsigned int nft_hook(const struct nft_pktinfo *pkt)
+{
+       return pkt->xt.state->hook;
+}
+
+static inline u8 nft_pf(const struct nft_pktinfo *pkt)
+{
+       return pkt->xt.state->pf;
+}
+
+static inline const struct net_device *nft_in(const struct nft_pktinfo *pkt)
+{
+       return pkt->xt.state->in;
+}
+
+static inline const struct net_device *nft_out(const struct nft_pktinfo *pkt)
+{
+       return pkt->xt.state->out;
+}
+
 static inline void nft_set_pktinfo(struct nft_pktinfo *pkt,
                                   struct sk_buff *skb,
                                   const struct nf_hook_state *state)
 {
        pkt->skb = skb;
-       pkt->net = pkt->xt.net = state->net;
-       pkt->in = pkt->xt.in = state->in;
-       pkt->out = pkt->xt.out = state->out;
-       pkt->hook = pkt->xt.hooknum = state->hook;
-       pkt->pf = pkt->xt.family = state->pf;
+       pkt->xt.state = state;
 }
 
 static inline void nft_set_pktinfo_proto_unspec(struct nft_pktinfo *pkt,
@@ -145,7 +161,7 @@ static inline enum nft_registers nft_type_to_reg(enum nft_data_types type)
        return type == NFT_DATA_VERDICT ? NFT_REG_VERDICT : NFT_REG_1 * NFT_REG_SIZE / NFT_REG32_SIZE;
 }
 
-unsigned int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest);
+int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest);
 unsigned int nft_parse_register(const struct nlattr *attr);
 int nft_dump_register(struct sk_buff *skb, unsigned int attr, unsigned int reg);
 
@@ -542,7 +558,8 @@ void *nft_set_elem_init(const struct nft_set *set,
                        const struct nft_set_ext_tmpl *tmpl,
                        const u32 *key, const u32 *data,
                        u64 timeout, gfp_t gfp);
-void nft_set_elem_destroy(const struct nft_set *set, void *elem);
+void nft_set_elem_destroy(const struct nft_set *set, void *elem,
+                         bool destroy_expr);
 
 /**
  *     struct nft_set_gc_batch_head - nf_tables set garbage collection batch
@@ -693,7 +710,6 @@ static inline int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src)
 {
        int err;
 
-       __module_get(src->ops->type->owner);
        if (src->ops->clone) {
                dst->ops = src->ops;
                err = src->ops->clone(dst, src);
@@ -702,6 +718,8 @@ static inline int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src)
        } else {
                memcpy(dst, src, src->ops->size);
        }
+
+       __module_get(src->ops->type->owner);
        return 0;
 }
 
index 00f4f6b1b1ba0758735ebabdff902c43b22630ee..862373d4ea9d1e5118b10e0064a85901932721cc 100644 (file)
@@ -1,12 +1,18 @@
 #ifndef _NET_NF_TABLES_CORE_H
 #define _NET_NF_TABLES_CORE_H
 
+extern struct nft_expr_type nft_imm_type;
+extern struct nft_expr_type nft_cmp_type;
+extern struct nft_expr_type nft_lookup_type;
+extern struct nft_expr_type nft_bitwise_type;
+extern struct nft_expr_type nft_byteorder_type;
+extern struct nft_expr_type nft_payload_type;
+extern struct nft_expr_type nft_dynset_type;
+extern struct nft_expr_type nft_range_type;
+
 int nf_tables_core_module_init(void);
 void nf_tables_core_module_exit(void);
 
-int nft_immediate_module_init(void);
-void nft_immediate_module_exit(void);
-
 struct nft_cmp_fast_expr {
        u32                     data;
        enum nft_registers      sreg:8;
@@ -25,24 +31,6 @@ static inline u32 nft_cmp_fast_mask(unsigned int len)
 
 extern const struct nft_expr_ops nft_cmp_fast_ops;
 
-int nft_cmp_module_init(void);
-void nft_cmp_module_exit(void);
-
-int nft_range_module_init(void);
-void nft_range_module_exit(void);
-
-int nft_lookup_module_init(void);
-void nft_lookup_module_exit(void);
-
-int nft_dynset_module_init(void);
-void nft_dynset_module_exit(void);
-
-int nft_bitwise_module_init(void);
-void nft_bitwise_module_exit(void);
-
-int nft_byteorder_module_init(void);
-void nft_byteorder_module_exit(void);
-
 struct nft_payload {
        enum nft_payload_bases  base:8;
        u8                      offset;
@@ -62,7 +50,4 @@ struct nft_payload_set {
 extern const struct nft_expr_ops nft_payload_fast_ops;
 extern struct static_key_false nft_trace_enabled;
 
-int nft_payload_module_init(void);
-void nft_payload_module_exit(void);
-
 #endif /* _NET_NF_TABLES_CORE_H */
diff --git a/include/net/netfilter/nft_fib.h b/include/net/netfilter/nft_fib.h
new file mode 100644 (file)
index 0000000..cbedda0
--- /dev/null
@@ -0,0 +1,31 @@
+#ifndef _NFT_FIB_H_
+#define _NFT_FIB_H_
+
+struct nft_fib {
+       enum nft_registers      dreg:8;
+       u8                      result;
+       u32                     flags;
+};
+
+extern const struct nla_policy nft_fib_policy[];
+
+int nft_fib_dump(struct sk_buff *skb, const struct nft_expr *expr);
+int nft_fib_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+                const struct nlattr * const tb[]);
+int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr,
+                    const struct nft_data **data);
+
+
+void nft_fib4_eval_type(const struct nft_expr *expr, struct nft_regs *regs,
+                       const struct nft_pktinfo *pkt);
+void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
+                  const struct nft_pktinfo *pkt);
+
+void nft_fib6_eval_type(const struct nft_expr *expr, struct nft_regs *regs,
+                       const struct nft_pktinfo *pkt);
+void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
+                  const struct nft_pktinfo *pkt);
+
+void nft_fib_store_result(void *reg, enum nft_fib_result r,
+                         const struct nft_pktinfo *pkt, int index);
+#endif
index 254a0fc018006a773f31f41bc07e89e5b81528c2..d3938f11ae52ee234ea0b4c4e3f7b2e37615601d 100644 (file)
@@ -713,7 +713,7 @@ static inline int nla_ok(const struct nlattr *nla, int remaining)
  */
 static inline struct nlattr *nla_next(const struct nlattr *nla, int *remaining)
 {
-       int totlen = NLA_ALIGN(nla->nla_len);
+       unsigned int totlen = NLA_ALIGN(nla->nla_len);
 
        *remaining -= totlen;
        return (struct nlattr *) ((char *) nla + totlen);
@@ -1190,6 +1190,16 @@ static inline struct in6_addr nla_get_in6_addr(const struct nlattr *nla)
        return tmp;
 }
 
+/**
+ * nla_memdup - duplicate attribute memory (kmemdup)
+ * @src: netlink attribute to duplicate from
+ * @gfp: GFP mask
+ */
+static inline void *nla_memdup(const struct nlattr *src, gfp_t gfp)
+{
+       return kmemdup(nla_data(src), nla_len(src), gfp);
+}
+
 /**
  * nla_nest_start - Start a new level of nested attributes
  * @skb: socket buffer to add attributes to
index e469e85de3f9cad8c3b3573c6c5846d7c6303106..3d06d94d2e528cb856032f77c5216aa31e077f05 100644 (file)
@@ -91,7 +91,6 @@ struct netns_ct {
        struct nf_ip_net        nf_ct_proto;
 #if defined(CONFIG_NF_CONNTRACK_LABELS)
        unsigned int            labels_used;
-       u8                      label_words;
 #endif
 };
 #endif
index 70e158551704767da8653bc4149698e34105a072..d315786bcfd7ad0d1f4479821775c0ca96567553 100644 (file)
@@ -31,7 +31,7 @@ struct net_generic {
        void *ptr[0];
 };
 
-static inline void *net_generic(const struct net *net, int id)
+static inline void *net_generic(const struct net *net, unsigned int id)
 {
        struct net_generic *ng;
        void *ptr;
index 10d0848f5b8aa85b47803506f7a70c2dcd5c1364..de7745e2edcc63e65c1ab224a6835367d55c54c4 100644 (file)
@@ -85,6 +85,7 @@ struct netns_ipv6 {
 #endif
        atomic_t                dev_addr_genid;
        atomic_t                fib6_sernum;
+       struct seg6_pernet_data *seg6_data;
 };
 
 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
index 767b03a3fe67a5efdd59e7f9f53a0a1f8f588ff0..f0a051480c6c27e1a360a9c041acd582114c4aae 100644 (file)
@@ -171,6 +171,8 @@ void tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst,
                     struct tcf_exts *src);
 int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts);
 int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts);
+int tcf_exts_get_dev(struct net_device *dev, struct tcf_exts *exts,
+                    struct net_device **hw_dev);
 
 /**
  * struct tcf_pkt_info - packet information
@@ -425,16 +427,14 @@ struct tc_cls_u32_offload {
        };
 };
 
-static inline bool tc_should_offload(const struct net_device *dev,
-                                    const struct tcf_proto *tp, u32 flags)
+static inline bool tc_can_offload(const struct net_device *dev,
+                                 const struct tcf_proto *tp)
 {
        const struct Qdisc *sch = tp->q;
        const struct Qdisc_class_ops *cops = sch->ops->cl_ops;
 
        if (!(dev->features & NETIF_F_HW_TC))
                return false;
-       if (flags & TCA_CLS_FLAGS_SKIP_HW)
-               return false;
        if (!dev->netdev_ops->ndo_setup_tc)
                return false;
        if (cops && cops->tcf_cl_offload)
@@ -443,6 +443,19 @@ static inline bool tc_should_offload(const struct net_device *dev,
        return true;
 }
 
+static inline bool tc_skip_hw(u32 flags)
+{
+       return (flags & TCA_CLS_FLAGS_SKIP_HW) ? true : false;
+}
+
+static inline bool tc_should_offload(const struct net_device *dev,
+                                    const struct tcf_proto *tp, u32 flags)
+{
+       if (tc_skip_hw(flags))
+               return false;
+       return tc_can_offload(dev, tp);
+}
+
 static inline bool tc_skip_sw(u32 flags)
 {
        return (flags & TCA_CLS_FLAGS_SKIP_SW) ? true : false;
index cd334c9584e9aa7267652b9f6f538834a7cbcbfa..f1b76b8e6d2d296177116d0ef0f254d175551cbe 100644 (file)
@@ -6,6 +6,8 @@
 #include <linux/if_vlan.h>
 #include <net/sch_generic.h>
 
+#define DEFAULT_TX_QUEUE_LEN   1000
+
 struct qdisc_walker {
        int     stop;
        int     skip;
index 0429d47cad25c27eebe1a665e250d055f90d4e84..c0874c87c173717f2c13c8af06d2482a76190243 100644 (file)
@@ -153,7 +153,7 @@ static inline struct rtable *ip_route_output_ports(struct net *net, struct flowi
        flowi4_init_output(fl4, oif, sk ? sk->sk_mark : 0, tos,
                           RT_SCOPE_UNIVERSE, proto,
                           sk ? inet_sk_flowi_flags(sk) : 0,
-                          daddr, saddr, dport, sport);
+                          daddr, saddr, dport, sport, sock_net_uid(net, sk));
        if (sk)
                security_sk_classify_flow(sk, flowi4_to_flowi(fl4));
        return ip_route_output_flow(net, fl4, sk);
@@ -269,7 +269,8 @@ static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst, __be32
                flow_flags |= FLOWI_FLAG_ANYSRC;
 
        flowi4_init_output(fl4, oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE,
-                          protocol, flow_flags, dst, src, dport, sport);
+                          protocol, flow_flags, dst, src, dport, sport,
+                          sk->sk_uid);
 }
 
 static inline struct rtable *ip_route_connect(struct flowi4 *fl4,
index 87a7f42e763963255afc32deaa774570bf03551a..f0dcaebebddb0215b5496a6e2cce4df2c3ad246a 100644 (file)
@@ -152,7 +152,7 @@ void sctp_unhash_endpoint(struct sctp_endpoint *);
 struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *,
                             struct sctphdr *, struct sctp_association **,
                             struct sctp_transport **);
-void sctp_err_finish(struct sock *, struct sctp_association *);
+void sctp_err_finish(struct sock *, struct sctp_transport *);
 void sctp_icmp_frag_needed(struct sock *, struct sctp_association *,
                           struct sctp_transport *t, __u32 pmtu);
 void sctp_icmp_redirect(struct sock *, struct sctp_transport *,
@@ -164,7 +164,7 @@ void sctp_backlog_migrate(struct sctp_association *assoc,
                          struct sock *oldsk, struct sock *newsk);
 int sctp_transport_hashtable_init(void);
 void sctp_transport_hashtable_destroy(void);
-void sctp_hash_transport(struct sctp_transport *t);
+int sctp_hash_transport(struct sctp_transport *t);
 void sctp_unhash_transport(struct sctp_transport *t);
 struct sctp_transport *sctp_addrs_lookup_transport(
                                struct net *net,
index bd4a3ded7c871b0f08fcbc222b8f04292fed900c..92daabdc007d94a544baa10d278d229e42e40eb3 100644 (file)
@@ -124,7 +124,7 @@ extern struct sctp_globals {
        /* This is the sctp port control hash.  */
        struct sctp_bind_hashbucket *port_hashtable;
        /* This is the hash of all transports. */
-       struct rhashtable transport_hashtable;
+       struct rhltable transport_hashtable;
 
        /* Sizes of above hashtables. */
        int ep_hashsize;
@@ -761,7 +761,7 @@ static inline int sctp_packet_empty(struct sctp_packet *packet)
 struct sctp_transport {
        /* A list of transports. */
        struct list_head transports;
-       struct rhash_head node;
+       struct rhlist_head node;
 
        /* Reference counting. */
        atomic_t refcnt;
index 3f36d45b714a4ba295fe253dbca56e54e0dd0b32..0caee631a8364fe6e49ab8cacba864d019be8b47 100644 (file)
@@ -6,10 +6,10 @@
 u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport);
 u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
                               __be16 dport);
-__u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
-                                __be16 sport, __be16 dport);
-__u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr,
-                                  __be16 sport, __be16 dport);
+u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
+                              __be16 sport, __be16 dport, u32 *tsoff);
+u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr,
+                                __be16 sport, __be16 dport, u32 *tsoff);
 u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr,
                                __be16 sport, __be16 dport);
 u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr,
diff --git a/include/net/seg6.h b/include/net/seg6.h
new file mode 100644 (file)
index 0000000..4e03575
--- /dev/null
@@ -0,0 +1,62 @@
+/*
+ *  SR-IPv6 implementation
+ *
+ *  Author:
+ *  David Lebrun <david.lebrun@uclouvain.be>
+ *
+ *
+ *  This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _NET_SEG6_H
+#define _NET_SEG6_H
+
+#include <linux/net.h>
+#include <linux/ipv6.h>
+#include <net/lwtunnel.h>
+#include <linux/seg6.h>
+#include <linux/rhashtable.h>
+
+static inline void update_csum_diff4(struct sk_buff *skb, __be32 from,
+                                    __be32 to)
+{
+       __be32 diff[] = { ~from, to };
+
+       skb->csum = ~csum_partial((char *)diff, sizeof(diff), ~skb->csum);
+}
+
+static inline void update_csum_diff16(struct sk_buff *skb, __be32 *from,
+                                     __be32 *to)
+{
+       __be32 diff[] = {
+               ~from[0], ~from[1], ~from[2], ~from[3],
+               to[0], to[1], to[2], to[3],
+       };
+
+       skb->csum = ~csum_partial((char *)diff, sizeof(diff), ~skb->csum);
+}
+
+struct seg6_pernet_data {
+       struct mutex lock;
+       struct in6_addr __rcu *tun_src;
+#ifdef CONFIG_IPV6_SEG6_HMAC
+       struct rhashtable hmac_infos;
+#endif
+};
+
+static inline struct seg6_pernet_data *seg6_pernet(struct net *net)
+{
+       return net->ipv6.seg6_data;
+}
+
+extern int seg6_init(void);
+extern void seg6_exit(void);
+extern int seg6_iptunnel_init(void);
+extern void seg6_iptunnel_exit(void);
+
+extern bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len);
+
+#endif
diff --git a/include/net/seg6_hmac.h b/include/net/seg6_hmac.h
new file mode 100644 (file)
index 0000000..69c3a10
--- /dev/null
@@ -0,0 +1,62 @@
+/*
+ *  SR-IPv6 implementation
+ *
+ *  Author:
+ *  David Lebrun <david.lebrun@uclouvain.be>
+ *
+ *
+ *  This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _NET_SEG6_HMAC_H
+#define _NET_SEG6_HMAC_H
+
+#include <net/flow.h>
+#include <net/ip6_fib.h>
+#include <net/sock.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/route.h>
+#include <net/seg6.h>
+#include <linux/seg6_hmac.h>
+#include <linux/rhashtable.h>
+
+#define SEG6_HMAC_MAX_DIGESTSIZE       160
+#define SEG6_HMAC_RING_SIZE            256
+
+struct seg6_hmac_info {
+       struct rhash_head node;
+       struct rcu_head rcu;
+
+       u32 hmackeyid;
+       char secret[SEG6_HMAC_SECRET_LEN];
+       u8 slen;
+       u8 alg_id;
+};
+
+struct seg6_hmac_algo {
+       u8 alg_id;
+       char name[64];
+       struct crypto_shash * __percpu *tfms;
+       struct shash_desc * __percpu *shashs;
+};
+
+extern int seg6_hmac_compute(struct seg6_hmac_info *hinfo,
+                            struct ipv6_sr_hdr *hdr, struct in6_addr *saddr,
+                            u8 *output);
+extern struct seg6_hmac_info *seg6_hmac_info_lookup(struct net *net, u32 key);
+extern int seg6_hmac_info_add(struct net *net, u32 key,
+                             struct seg6_hmac_info *hinfo);
+extern int seg6_hmac_info_del(struct net *net, u32 key);
+extern int seg6_push_hmac(struct net *net, struct in6_addr *saddr,
+                         struct ipv6_sr_hdr *srh);
+extern bool seg6_hmac_validate_skb(struct sk_buff *skb);
+extern int seg6_hmac_init(void);
+extern void seg6_hmac_exit(void);
+extern int seg6_hmac_net_init(struct net *net);
+extern void seg6_hmac_net_exit(struct net *net);
+
+#endif
index 27648955333892551030f87887c5545018e530af..69afda6bea15643f8769c8a0e261aac924f0f225 100644 (file)
@@ -252,6 +252,7 @@ struct sock_common {
   *    @sk_pacing_rate: Pacing rate (if supported by transport/packet scheduler)
   *    @sk_max_pacing_rate: Maximum pacing rate (%SO_MAX_PACING_RATE)
   *    @sk_sndbuf: size of send buffer in bytes
+  *    @sk_padding: unused element for alignment
   *    @sk_no_check_tx: %SO_NO_CHECK setting, set checksum in TX packets
   *    @sk_no_check_rx: allow zero checksum in RX packets
   *    @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO)
@@ -302,7 +303,8 @@ struct sock_common {
   *    @sk_backlog_rcv: callback to process the backlog
   *    @sk_destruct: called at sock freeing time, i.e. when all refcnt == 0
   *    @sk_reuseport_cb: reuseport group container
- */
+  *    @sk_rcu: used during RCU grace period
+  */
 struct sock {
        /*
         * Now struct inet_timewait_sock also uses sock_common, so please just
@@ -387,6 +389,21 @@ struct sock {
         * Because of non atomicity rules, all
         * changes are protected by socket lock.
         */
+       unsigned int            __sk_flags_offset[0];
+#ifdef __BIG_ENDIAN_BITFIELD
+#define SK_FL_PROTO_SHIFT  16
+#define SK_FL_PROTO_MASK   0x00ff0000
+
+#define SK_FL_TYPE_SHIFT   0
+#define SK_FL_TYPE_MASK    0x0000ffff
+#else
+#define SK_FL_PROTO_SHIFT  8
+#define SK_FL_PROTO_MASK   0x0000ff00
+
+#define SK_FL_TYPE_SHIFT   16
+#define SK_FL_TYPE_MASK    0xffff0000
+#endif
+
        kmemcheck_bitfield_begin(flags);
        unsigned int            sk_padding : 2,
                                sk_no_check_tx : 1,
@@ -417,6 +434,7 @@ struct sock {
        u32                     sk_max_ack_backlog;
        __u32                   sk_priority;
        __u32                   sk_mark;
+       kuid_t                  sk_uid;
        struct pid              *sk_peer_pid;
        const struct cred       *sk_peer_cred;
        long                    sk_rcvtimeo;
@@ -912,14 +930,16 @@ static inline void sock_rps_reset_rxhash(struct sock *sk)
 #endif
 }
 
-#define sk_wait_event(__sk, __timeo, __condition)                      \
+#define sk_wait_event(__sk, __timeo, __condition, __wait)              \
        ({      int __rc;                                               \
                release_sock(__sk);                                     \
                __rc = __condition;                                     \
                if (!__rc) {                                            \
-                       *(__timeo) = schedule_timeout(*(__timeo));      \
+                       *(__timeo) = wait_woken(__wait,                 \
+                                               TASK_INTERRUPTIBLE,     \
+                                               *(__timeo));            \
                }                                                       \
-               sched_annotate_sleep();                                         \
+               sched_annotate_sleep();                                 \
                lock_sock(__sk);                                        \
                __rc = __condition;                                     \
                __rc;                                                   \
@@ -1160,11 +1180,6 @@ static inline void sk_enter_memory_pressure(struct sock *sk)
        sk->sk_prot->enter_memory_pressure(sk);
 }
 
-static inline long sk_prot_mem_limits(const struct sock *sk, int index)
-{
-       return sk->sk_prot->sysctl_mem[index];
-}
-
 static inline long
 sk_memory_allocated(const struct sock *sk)
 {
@@ -1279,11 +1294,27 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind);
 void __sk_mem_reduce_allocated(struct sock *sk, int amount);
 void __sk_mem_reclaim(struct sock *sk, int amount);
 
-#define SK_MEM_QUANTUM ((int)PAGE_SIZE)
+/* We used to have PAGE_SIZE here, but systems with 64KB pages
+ * do not necessarily have 16x time more memory than 4KB ones.
+ */
+#define SK_MEM_QUANTUM 4096
 #define SK_MEM_QUANTUM_SHIFT ilog2(SK_MEM_QUANTUM)
 #define SK_MEM_SEND    0
 #define SK_MEM_RECV    1
 
+/* sysctl_mem values are in pages, we convert them in SK_MEM_QUANTUM units */
+static inline long sk_prot_mem_limits(const struct sock *sk, int index)
+{
+       long val = sk->sk_prot->sysctl_mem[index];
+
+#if PAGE_SIZE > SK_MEM_QUANTUM
+       val <<= PAGE_SHIFT - SK_MEM_QUANTUM_SHIFT;
+#elif PAGE_SIZE < SK_MEM_QUANTUM
+       val >>= SK_MEM_QUANTUM_SHIFT - PAGE_SHIFT;
+#endif
+       return val;
+}
+
 static inline int sk_mem_pages(int amt)
 {
        return (amt + SK_MEM_QUANTUM - 1) >> SK_MEM_QUANTUM_SHIFT;
@@ -1596,11 +1627,11 @@ static inline void sock_put(struct sock *sk)
 void sock_gen_put(struct sock *sk);
 
 int __sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested,
-                    unsigned int trim_cap);
+                    unsigned int trim_cap, bool refcounted);
 static inline int sk_receive_skb(struct sock *sk, struct sk_buff *skb,
                                 const int nested)
 {
-       return __sk_receive_skb(sk, skb, nested, 1);
+       return __sk_receive_skb(sk, skb, nested, 1, true);
 }
 
 static inline void sk_tx_queue_set(struct sock *sk, int tx_queue)
@@ -1651,6 +1682,7 @@ static inline void sock_graft(struct sock *sk, struct socket *parent)
        sk->sk_wq = parent->wq;
        parent->sk = sk;
        sk_set_socket(sk, parent);
+       sk->sk_uid = SOCK_INODE(parent)->i_uid;
        security_sock_graft(sk, parent);
        write_unlock_bh(&sk->sk_callback_lock);
 }
@@ -1658,6 +1690,11 @@ static inline void sock_graft(struct sock *sk, struct socket *parent)
 kuid_t sock_i_uid(struct sock *sk);
 unsigned long sock_i_ino(struct sock *sk);
 
+static inline kuid_t sock_net_uid(const struct net *net, const struct sock *sk)
+{
+       return sk ? sk->sk_uid : make_kuid(net->user_ns, 0);
+}
+
 static inline u32 net_tx_rndhash(void)
 {
        u32 v = prandom_u32();
index 5767e9dbcf92453e7e4336481db5956f729c38ed..19cd3d3458049a2fe6d317bc48ff9e5c0524587b 100644 (file)
@@ -27,6 +27,7 @@ struct tcf_skbedit {
        u32             flags;
        u32             priority;
        u32             mark;
+       u32             mask;
        u16             queue_mapping;
        u16             ptype;
 };
index 253f8da6c2a67a91a9658e980860432224d05f1a..efef0b4b1b2bddc76095bcd4d02ebaaa3b2beb56 100644 (file)
@@ -12,6 +12,8 @@
 #define __NET_TC_TUNNEL_KEY_H
 
 #include <net/act_api.h>
+#include <linux/tc_act/tc_tunnel_key.h>
+#include <net/dst_metadata.h>
 
 struct tcf_tunnel_key_params {
        struct rcu_head         rcu;
@@ -27,4 +29,39 @@ struct tcf_tunnel_key {
 
 #define to_tunnel_key(a) ((struct tcf_tunnel_key *)a)
 
+static inline bool is_tcf_tunnel_set(const struct tc_action *a)
+{
+#ifdef CONFIG_NET_CLS_ACT
+       struct tcf_tunnel_key *t = to_tunnel_key(a);
+       struct tcf_tunnel_key_params *params = rtnl_dereference(t->params);
+
+       if (a->ops && a->ops->type == TCA_ACT_TUNNEL_KEY)
+               return params->tcft_action == TCA_TUNNEL_KEY_ACT_SET;
+#endif
+       return false;
+}
+
+static inline bool is_tcf_tunnel_release(const struct tc_action *a)
+{
+#ifdef CONFIG_NET_CLS_ACT
+       struct tcf_tunnel_key *t = to_tunnel_key(a);
+       struct tcf_tunnel_key_params *params = rtnl_dereference(t->params);
+
+       if (a->ops && a->ops->type == TCA_ACT_TUNNEL_KEY)
+               return params->tcft_action == TCA_TUNNEL_KEY_ACT_RELEASE;
+#endif
+       return false;
+}
+
+static inline struct ip_tunnel_info *tcf_tunnel_info(const struct tc_action *a)
+{
+#ifdef CONFIG_NET_CLS_ACT
+       struct tcf_tunnel_key *t = to_tunnel_key(a);
+       struct tcf_tunnel_key_params *params = rtnl_dereference(t->params);
+
+       return &params->tcft_enc_metadata->u.tun_info;
+#else
+       return NULL;
+#endif
+}
 #endif /* __NET_TC_TUNNEL_KEY_H */
index f83b7f220a65ea7de2ff1083e0a6ef52e7619d6e..207147b4c6b2a126af103e715d99c8914b290d48 100644 (file)
@@ -794,12 +794,23 @@ struct tcp_skb_cb {
  */
 static inline int tcp_v6_iif(const struct sk_buff *skb)
 {
-       bool l3_slave = skb_l3mdev_slave(TCP_SKB_CB(skb)->header.h6.flags);
+       bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
 
        return l3_slave ? skb->skb_iif : TCP_SKB_CB(skb)->header.h6.iif;
 }
 #endif
 
+/* TCP_SKB_CB reference means this can not be used from early demux */
+static inline bool inet_exact_dif_match(struct net *net, struct sk_buff *skb)
+{
+#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
+       if (!net->ipv4.sysctl_tcp_l3mdev_accept &&
+           skb && ipv4_l3mdev_skb(TCP_SKB_CB(skb)->header.h4.flags))
+               return true;
+#endif
+       return false;
+}
+
 /* Due to TSO, an SKB can be composed of multiple actual
  * packets.  To keep these tracked properly, we use this.
  */
@@ -947,6 +958,7 @@ u32 tcp_slow_start(struct tcp_sock *tp, u32 acked);
 void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked);
 
 u32 tcp_reno_ssthresh(struct sock *sk);
+u32 tcp_reno_undo_cwnd(struct sock *sk);
 void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked);
 extern struct tcp_congestion_ops tcp_reno;
 
@@ -1209,6 +1221,7 @@ static inline void tcp_prequeue_init(struct tcp_sock *tp)
 
 bool tcp_prequeue(struct sock *sk, struct sk_buff *skb);
 bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb);
+int tcp_filter(struct sock *sk, struct sk_buff *skb);
 
 #undef STATE_TRACE
 
@@ -1503,11 +1516,26 @@ struct tcp_fastopen_context {
        struct rcu_head         rcu;
 };
 
+/* Latencies incurred by various limits for a sender. They are
+ * chronograph-like stats that are mutually exclusive.
+ */
+enum tcp_chrono {
+       TCP_CHRONO_UNSPEC,
+       TCP_CHRONO_BUSY, /* Actively sending data (non-empty write queue) */
+       TCP_CHRONO_RWND_LIMITED, /* Stalled by insufficient receive window */
+       TCP_CHRONO_SNDBUF_LIMITED, /* Stalled by insufficient send buffer */
+       __TCP_CHRONO_MAX,
+};
+
+void tcp_chrono_start(struct sock *sk, const enum tcp_chrono type);
+void tcp_chrono_stop(struct sock *sk, const enum tcp_chrono type);
+
 /* write queue abstraction */
 static inline void tcp_write_queue_purge(struct sock *sk)
 {
        struct sk_buff *skb;
 
+       tcp_chrono_stop(sk, TCP_CHRONO_BUSY);
        while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL)
                sk_wmem_free_skb(sk, skb);
        sk_mem_reclaim(sk);
@@ -1566,8 +1594,10 @@ static inline void tcp_advance_send_head(struct sock *sk, const struct sk_buff *
 
 static inline void tcp_check_send_head(struct sock *sk, struct sk_buff *skb_unlinked)
 {
-       if (sk->sk_send_head == skb_unlinked)
+       if (sk->sk_send_head == skb_unlinked) {
                sk->sk_send_head = NULL;
+               tcp_chrono_stop(sk, TCP_CHRONO_BUSY);
+       }
        if (tcp_sk(sk)->highest_sack == skb_unlinked)
                tcp_sk(sk)->highest_sack = NULL;
 }
@@ -1589,6 +1619,7 @@ static inline void tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb
        /* Queue it, remembering where we must start sending. */
        if (sk->sk_send_head == NULL) {
                sk->sk_send_head = skb;
+               tcp_chrono_start(sk, TCP_CHRONO_BUSY);
 
                if (tcp_sk(sk)->highest_sack == NULL)
                        tcp_sk(sk)->highest_sack = skb;
@@ -1796,7 +1827,7 @@ struct tcp_request_sock_ops {
        struct dst_entry *(*route_req)(const struct sock *sk, struct flowi *fl,
                                       const struct request_sock *req,
                                       bool *strict);
-       __u32 (*init_seq)(const struct sk_buff *skb);
+       __u32 (*init_seq)(const struct sk_buff *skb, u32 *tsoff);
        int (*send_synack)(const struct sock *sk, struct dst_entry *dst,
                           struct flowi *fl, struct request_sock *req,
                           struct tcp_fastopen_cookie *foc,
index 18f1e6b9192711d9d5d92345adc86e716929b70a..1661791e8ca19bbc8f59e5c19263486b24f24b45 100644 (file)
@@ -246,8 +246,24 @@ static inline __be16 udp_flow_src_port(struct net *net, struct sk_buff *skb,
 }
 
 /* net/ipv4/udp.c */
+void udp_destruct_sock(struct sock *sk);
 void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len);
 int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb);
+void udp_skb_destructor(struct sock *sk, struct sk_buff *skb);
+static inline struct sk_buff *
+__skb_recv_udp(struct sock *sk, unsigned int flags, int noblock, int *peeked,
+              int *off, int *err)
+{
+       return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
+                                  udp_skb_destructor, peeked, off, err);
+}
+static inline struct sk_buff *skb_recv_udp(struct sock *sk, unsigned int flags,
+                                          int noblock, int *err)
+{
+       int peeked, off = 0;
+
+       return __skb_recv_udp(sk, flags, noblock, &peeked, &off, err);
+}
 
 void udp_v4_early_demux(struct sk_buff *skb);
 int udp_get_port(struct sock *sk, unsigned short snum,
@@ -262,6 +278,7 @@ void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst);
 int udp_rcv(struct sk_buff *skb);
 int udp_ioctl(struct sock *sk, int cmd, unsigned long arg);
 int udp_init_sock(struct sock *sk);
+int __udp_disconnect(struct sock *sk, int flags);
 int udp_disconnect(struct sock *sk, int flags);
 unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait);
 struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
index 80761938b9a78081822a4b82b4bd3fb30b5f6625..36097d388219ed1e205e607a756697c3eb4e00c6 100644 (file)
@@ -27,6 +27,7 @@ static __inline__ int udplite_getfrag(void *from, char *to, int  offset,
 static inline int udplite_sk_init(struct sock *sk)
 {
        udp_sk(sk)->pcflag = UDPLITE_BIT;
+       sk->sk_destruct = udp_destruct_sock;
        return 0;
 }
 
index 0255613a54a4097392649f33e9ee2801ebe2570c..49a59202f85ee2a03a447249f87fe935e28e8967 100644 (file)
@@ -225,9 +225,9 @@ struct vxlan_config {
 struct vxlan_dev {
        struct hlist_node hlist;        /* vni hash table */
        struct list_head  next;         /* vxlan's per namespace list */
-       struct vxlan_sock *vn4_sock;    /* listening socket for IPv4 */
+       struct vxlan_sock __rcu *vn4_sock;      /* listening socket for IPv4 */
 #if IS_ENABLED(CONFIG_IPV6)
-       struct vxlan_sock *vn6_sock;    /* listening socket for IPv6 */
+       struct vxlan_sock __rcu *vn6_sock;      /* listening socket for IPv6 */
 #endif
        struct net_device *dev;
        struct net        *net;         /* netns for packet i/o */
@@ -281,16 +281,6 @@ struct vxlan_dev {
 struct net_device *vxlan_dev_create(struct net *net, const char *name,
                                    u8 name_assign_type, struct vxlan_config *conf);
 
-static inline __be16 vxlan_dev_dst_port(struct vxlan_dev *vxlan,
-                                       unsigned short family)
-{
-#if IS_ENABLED(CONFIG_IPV6)
-       if (family == AF_INET6)
-               return inet_sk(vxlan->vn6_sock->sock->sk)->inet_sport;
-#endif
-       return inet_sk(vxlan->vn4_sock->sock->sk)->inet_sport;
-}
-
 static inline netdev_features_t vxlan_features_check(struct sk_buff *skb,
                                                     netdev_features_t features)
 {
diff --git a/include/soc/fsl/bman.h b/include/soc/fsl/bman.h
new file mode 100644 (file)
index 0000000..eaaf56d
--- /dev/null
@@ -0,0 +1,129 @@
+/* Copyright 2008 - 2016 Freescale Semiconductor, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *      names of its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written permission.
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __FSL_BMAN_H
+#define __FSL_BMAN_H
+
+/* wrapper for 48-bit buffers */
+struct bm_buffer {
+       union {
+               struct {
+                       __be16 bpid; /* hi 8-bits reserved */
+                       __be16 hi; /* High 16-bits of 48-bit address */
+                       __be32 lo; /* Low 32-bits of 48-bit address */
+               };
+               __be64 data;
+       };
+} __aligned(8);
+/*
+ * Restore the 48 bit address previously stored in BMan
+ * hardware pools as a dma_addr_t
+ */
+static inline dma_addr_t bm_buf_addr(const struct bm_buffer *buf)
+{
+       return be64_to_cpu(buf->data) & 0xffffffffffffLLU;
+}
+
+static inline u64 bm_buffer_get64(const struct bm_buffer *buf)
+{
+       return be64_to_cpu(buf->data) & 0xffffffffffffLLU;
+}
+
+static inline void bm_buffer_set64(struct bm_buffer *buf, u64 addr)
+{
+       buf->hi = cpu_to_be16(upper_32_bits(addr));
+       buf->lo = cpu_to_be32(lower_32_bits(addr));
+}
+
+static inline u8 bm_buffer_get_bpid(const struct bm_buffer *buf)
+{
+       return be16_to_cpu(buf->bpid) & 0xff;
+}
+
+static inline void bm_buffer_set_bpid(struct bm_buffer *buf, int bpid)
+{
+       buf->bpid = cpu_to_be16(bpid & 0xff);
+}
+
+/* Managed portal, high-level i/face */
+
+/* Portal and Buffer Pools */
+struct bman_portal;
+struct bman_pool;
+
+#define BM_POOL_MAX            64 /* max # of buffer pools */
+
+/**
+ * bman_new_pool - Allocates a Buffer Pool object
+ *
+ * Creates a pool object, and returns a reference to it or NULL on error.
+ */
+struct bman_pool *bman_new_pool(void);
+
+/**
+ * bman_free_pool - Deallocates a Buffer Pool object
+ * @pool: the pool object to release
+ */
+void bman_free_pool(struct bman_pool *pool);
+
+/**
+ * bman_get_bpid - Returns a pool object's BPID.
+ * @pool: the pool object
+ *
+ * The returned value is the index of the encapsulated buffer pool,
+ * in the range of [0, @BM_POOL_MAX-1].
+ */
+int bman_get_bpid(const struct bman_pool *pool);
+
+/**
+ * bman_release - Release buffer(s) to the buffer pool
+ * @pool: the buffer pool object to release to
+ * @bufs: an array of buffers to release
+ * @num: the number of buffers in @bufs (1-8)
+ *
+ * Adds the given buffers to RCR entries. If the RCR ring is unresponsive,
+ * the function will return -ETIMEDOUT. Otherwise, it returns zero.
+ */
+int bman_release(struct bman_pool *pool, const struct bm_buffer *bufs, u8 num);
+
+/**
+ * bman_acquire - Acquire buffer(s) from a buffer pool
+ * @pool: the buffer pool object to acquire from
+ * @bufs: array for storing the acquired buffers
+ * @num: the number of buffers desired (@bufs is at least this big)
+ *
+ * Issues an "Acquire" command via the portal's management command interface.
+ * The return value will be the number of buffers obtained from the pool, or a
+ * negative error code if a h/w error or pool starvation was encountered. In
+ * the latter case, the content of @bufs is undefined.
+ */
+int bman_acquire(struct bman_pool *pool, struct bm_buffer *bufs, u8 num);
+
+#endif /* __FSL_BMAN_H */
diff --git a/include/soc/fsl/qman.h b/include/soc/fsl/qman.h
new file mode 100644 (file)
index 0000000..37f3eb0
--- /dev/null
@@ -0,0 +1,1074 @@
+/* Copyright 2008 - 2016 Freescale Semiconductor, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *      names of its contributors may be used to endorse or promote products
+ *      derived from this software without specific prior written permission.
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __FSL_QMAN_H
+#define __FSL_QMAN_H
+
+#include <linux/bitops.h>
+
+/* Hardware constants */
+#define QM_CHANNEL_SWPORTAL0 0
+#define QMAN_CHANNEL_POOL1 0x21
+#define QMAN_CHANNEL_POOL1_REV3 0x401
+extern u16 qm_channel_pool1;
+
+/* Portal processing (interrupt) sources */
+#define QM_PIRQ_CSCI   0x00100000      /* Congestion State Change */
+#define QM_PIRQ_EQCI   0x00080000      /* Enqueue Command Committed */
+#define QM_PIRQ_EQRI   0x00040000      /* EQCR Ring (below threshold) */
+#define QM_PIRQ_DQRI   0x00020000      /* DQRR Ring (non-empty) */
+#define QM_PIRQ_MRI    0x00010000      /* MR Ring (non-empty) */
+/*
+ * This mask contains all the interrupt sources that need handling except DQRI,
+ * ie. that if present should trigger slow-path processing.
+ */
+#define QM_PIRQ_SLOW   (QM_PIRQ_CSCI | QM_PIRQ_EQCI | QM_PIRQ_EQRI | \
+                        QM_PIRQ_MRI)
+
+/* For qman_static_dequeue_*** APIs */
+#define QM_SDQCR_CHANNELS_POOL_MASK    0x00007fff
+/* for n in [1,15] */
+#define QM_SDQCR_CHANNELS_POOL(n)      (0x00008000 >> (n))
+/* for conversion from n of qm_channel */
+static inline u32 QM_SDQCR_CHANNELS_POOL_CONV(u16 channel)
+{
+       return QM_SDQCR_CHANNELS_POOL(channel + 1 - qm_channel_pool1);
+}
+
+/* --- QMan data structures (and associated constants) --- */
+
+/* "Frame Descriptor (FD)" */
+struct qm_fd {
+       union {
+               struct {
+                       u8 cfg8b_w1;
+                       u8 bpid;        /* Buffer Pool ID */
+                       u8 cfg8b_w3;
+                       u8 addr_hi;     /* high 8-bits of 40-bit address */
+                       __be32 addr_lo; /* low 32-bits of 40-bit address */
+               } __packed;
+               __be64 data;
+       };
+       __be32 cfg;     /* format, offset, length / congestion */
+       union {
+               __be32 cmd;
+               __be32 status;
+       };
+} __aligned(8);
+
+#define QM_FD_FORMAT_SG                BIT(31)
+#define QM_FD_FORMAT_LONG      BIT(30)
+#define QM_FD_FORMAT_COMPOUND  BIT(29)
+#define QM_FD_FORMAT_MASK      GENMASK(31, 29)
+#define QM_FD_OFF_SHIFT                20
+#define QM_FD_OFF_MASK         GENMASK(28, 20)
+#define QM_FD_LEN_MASK         GENMASK(19, 0)
+#define QM_FD_LEN_BIG_MASK     GENMASK(28, 0)
+
+enum qm_fd_format {
+       /*
+        * 'contig' implies a contiguous buffer, whereas 'sg' implies a
+        * scatter-gather table. 'big' implies a 29-bit length with no offset
+        * field, otherwise length is 20-bit and offset is 9-bit. 'compound'
+        * implies a s/g-like table, where each entry itself represents a frame
+        * (contiguous or scatter-gather) and the 29-bit "length" is
+        * interpreted purely for congestion calculations, ie. a "congestion
+        * weight".
+        */
+       qm_fd_contig = 0,
+       qm_fd_contig_big = QM_FD_FORMAT_LONG,
+       qm_fd_sg = QM_FD_FORMAT_SG,
+       qm_fd_sg_big = QM_FD_FORMAT_SG | QM_FD_FORMAT_LONG,
+       qm_fd_compound = QM_FD_FORMAT_COMPOUND
+};
+
+static inline dma_addr_t qm_fd_addr(const struct qm_fd *fd)
+{
+       return be64_to_cpu(fd->data) & 0xffffffffffLLU;
+}
+
+static inline u64 qm_fd_addr_get64(const struct qm_fd *fd)
+{
+       return be64_to_cpu(fd->data) & 0xffffffffffLLU;
+}
+
+static inline void qm_fd_addr_set64(struct qm_fd *fd, u64 addr)
+{
+       fd->addr_hi = upper_32_bits(addr);
+       fd->addr_lo = cpu_to_be32(lower_32_bits(addr));
+}
+
+/*
+ * The 'format' field indicates the interpretation of the remaining
+ * 29 bits of the 32-bit word.
+ * If 'format' is _contig or _sg, 20b length and 9b offset.
+ * If 'format' is _contig_big or _sg_big, 29b length.
+ * If 'format' is _compound, 29b "congestion weight".
+ */
+static inline enum qm_fd_format qm_fd_get_format(const struct qm_fd *fd)
+{
+       return be32_to_cpu(fd->cfg) & QM_FD_FORMAT_MASK;
+}
+
+static inline int qm_fd_get_offset(const struct qm_fd *fd)
+{
+       return (be32_to_cpu(fd->cfg) & QM_FD_OFF_MASK) >> QM_FD_OFF_SHIFT;
+}
+
+static inline int qm_fd_get_length(const struct qm_fd *fd)
+{
+       return be32_to_cpu(fd->cfg) & QM_FD_LEN_MASK;
+}
+
+static inline int qm_fd_get_len_big(const struct qm_fd *fd)
+{
+       return be32_to_cpu(fd->cfg) & QM_FD_LEN_BIG_MASK;
+}
+
+static inline void qm_fd_set_param(struct qm_fd *fd, enum qm_fd_format fmt,
+                                  int off, int len)
+{
+       fd->cfg = cpu_to_be32(fmt | (len & QM_FD_LEN_BIG_MASK) |
+                             ((off << QM_FD_OFF_SHIFT) & QM_FD_OFF_MASK));
+}
+
+#define qm_fd_set_contig(fd, off, len) \
+       qm_fd_set_param(fd, qm_fd_contig, off, len)
+#define qm_fd_set_sg(fd, off, len) qm_fd_set_param(fd, qm_fd_sg, off, len)
+#define qm_fd_set_contig_big(fd, len) \
+       qm_fd_set_param(fd, qm_fd_contig_big, 0, len)
+#define qm_fd_set_sg_big(fd, len) qm_fd_set_param(fd, qm_fd_sg_big, 0, len)
+
+static inline void qm_fd_clear_fd(struct qm_fd *fd)
+{
+       fd->data = 0;
+       fd->cfg = 0;
+       fd->cmd = 0;
+}
+
+/* Scatter/Gather table entry */
+struct qm_sg_entry {
+       union {
+               struct {
+                       u8 __reserved1[3];
+                       u8 addr_hi;     /* high 8-bits of 40-bit address */
+                       __be32 addr_lo; /* low 32-bits of 40-bit address */
+               };
+               __be64 data;
+       };
+       __be32 cfg;     /* E bit, F bit, length */
+       u8 __reserved2;
+       u8 bpid;
+       __be16 offset; /* 13-bit, _res[13-15]*/
+} __packed;
+
+#define QM_SG_LEN_MASK GENMASK(29, 0)
+#define QM_SG_OFF_MASK GENMASK(12, 0)
+#define QM_SG_FIN      BIT(30)
+#define QM_SG_EXT      BIT(31)
+
+static inline dma_addr_t qm_sg_addr(const struct qm_sg_entry *sg)
+{
+       return be64_to_cpu(sg->data) & 0xffffffffffLLU;
+}
+
+static inline u64 qm_sg_entry_get64(const struct qm_sg_entry *sg)
+{
+       return be64_to_cpu(sg->data) & 0xffffffffffLLU;
+}
+
+static inline void qm_sg_entry_set64(struct qm_sg_entry *sg, u64 addr)
+{
+       sg->addr_hi = upper_32_bits(addr);
+       sg->addr_lo = cpu_to_be32(lower_32_bits(addr));
+}
+
+static inline bool qm_sg_entry_is_final(const struct qm_sg_entry *sg)
+{
+       return be32_to_cpu(sg->cfg) & QM_SG_FIN;
+}
+
+static inline bool qm_sg_entry_is_ext(const struct qm_sg_entry *sg)
+{
+       return be32_to_cpu(sg->cfg) & QM_SG_EXT;
+}
+
+static inline int qm_sg_entry_get_len(const struct qm_sg_entry *sg)
+{
+       return be32_to_cpu(sg->cfg) & QM_SG_LEN_MASK;
+}
+
+static inline void qm_sg_entry_set_len(struct qm_sg_entry *sg, int len)
+{
+       sg->cfg = cpu_to_be32(len & QM_SG_LEN_MASK);
+}
+
+static inline void qm_sg_entry_set_f(struct qm_sg_entry *sg, int len)
+{
+       sg->cfg = cpu_to_be32(QM_SG_FIN | (len & QM_SG_LEN_MASK));
+}
+
+static inline int qm_sg_entry_get_off(const struct qm_sg_entry *sg)
+{
+       return be32_to_cpu(sg->offset) & QM_SG_OFF_MASK;
+}
+
+/* "Frame Dequeue Response" */
+struct qm_dqrr_entry {
+       u8 verb;
+       u8 stat;
+       u16 seqnum;     /* 15-bit */
+       u8 tok;
+       u8 __reserved2[3];
+       u32 fqid;       /* 24-bit */
+       u32 contextB;
+       struct qm_fd fd;
+       u8 __reserved4[32];
+} __packed;
+#define QM_DQRR_VERB_VBIT              0x80
+#define QM_DQRR_VERB_MASK              0x7f    /* where the verb contains; */
+#define QM_DQRR_VERB_FRAME_DEQUEUE     0x60    /* "this format" */
+#define QM_DQRR_STAT_FQ_EMPTY          0x80    /* FQ empty */
+#define QM_DQRR_STAT_FQ_HELDACTIVE     0x40    /* FQ held active */
+#define QM_DQRR_STAT_FQ_FORCEELIGIBLE  0x20    /* FQ was force-eligible'd */
+#define QM_DQRR_STAT_FD_VALID          0x10    /* has a non-NULL FD */
+#define QM_DQRR_STAT_UNSCHEDULED       0x02    /* Unscheduled dequeue */
+#define QM_DQRR_STAT_DQCR_EXPIRED      0x01    /* VDQCR or PDQCR expired*/
+
+/* "ERN Message Response" */
+/* "FQ State Change Notification" */
+union qm_mr_entry {
+       struct {
+               u8 verb;
+               u8 __reserved[63];
+       };
+       struct {
+               u8 verb;
+               u8 dca;
+               u16 seqnum;
+               u8 rc;          /* Rej Code: 8-bit */
+               u8 orp_hi;      /* ORP: 24-bit */
+               u16 orp_lo;
+               u32 fqid;       /* 24-bit */
+               u32 tag;
+               struct qm_fd fd;
+               u8 __reserved1[32];
+       } __packed ern;
+       struct {
+               u8 verb;
+               u8 fqs;         /* Frame Queue Status */
+               u8 __reserved1[6];
+               u32 fqid;       /* 24-bit */
+               u32 contextB;
+               u8 __reserved2[48];
+       } __packed fq;          /* FQRN/FQRNI/FQRL/FQPN */
+};
+#define QM_MR_VERB_VBIT                        0x80
+/*
+ * ERNs originating from direct-connect portals ("dcern") use 0x20 as a verb
+ * which would be invalid as a s/w enqueue verb. A s/w ERN can be distinguished
+ * from the other MR types by noting if the 0x20 bit is unset.
+ */
+#define QM_MR_VERB_TYPE_MASK           0x27
+#define QM_MR_VERB_DC_ERN              0x20
+#define QM_MR_VERB_FQRN                        0x21
+#define QM_MR_VERB_FQRNI               0x22
+#define QM_MR_VERB_FQRL                        0x23
+#define QM_MR_VERB_FQPN                        0x24
+#define QM_MR_RC_MASK                  0xf0    /* contains one of; */
+#define QM_MR_RC_CGR_TAILDROP          0x00
+#define QM_MR_RC_WRED                  0x10
+#define QM_MR_RC_ERROR                 0x20
+#define QM_MR_RC_ORPWINDOW_EARLY       0x30
+#define QM_MR_RC_ORPWINDOW_LATE                0x40
+#define QM_MR_RC_FQ_TAILDROP           0x50
+#define QM_MR_RC_ORPWINDOW_RETIRED     0x60
+#define QM_MR_RC_ORP_ZERO              0x70
+#define QM_MR_FQS_ORLPRESENT           0x02    /* ORL fragments to come */
+#define QM_MR_FQS_NOTEMPTY             0x01    /* FQ has enqueued frames */
+
+/*
+ * An identical structure of FQD fields is present in the "Init FQ" command and
+ * the "Query FQ" result, it's suctioned out into the "struct qm_fqd" type.
+ * Within that, the 'stashing' and 'taildrop' pieces are also factored out, the
+ * latter has two inlines to assist with converting to/from the mant+exp
+ * representation.
+ */
+struct qm_fqd_stashing {
+       /* See QM_STASHING_EXCL_<...> */
+       u8 exclusive;
+       /* Numbers of cachelines */
+       u8 cl; /* _res[6-7], as[4-5], ds[2-3], cs[0-1] */
+};
+
+struct qm_fqd_oac {
+       /* "Overhead Accounting Control", see QM_OAC_<...> */
+       u8 oac; /* oac[6-7], _res[0-5] */
+       /* Two's-complement value (-128 to +127) */
+       s8 oal; /* "Overhead Accounting Length" */
+};
+
+struct qm_fqd {
+       /* _res[6-7], orprws[3-5], oa[2], olws[0-1] */
+       u8 orpc;
+       u8 cgid;
+       __be16 fq_ctrl; /* See QM_FQCTRL_<...> */
+       __be16 dest_wq; /* channel[3-15], wq[0-2] */
+       __be16 ics_cred; /* 15-bit */
+       /*
+        * For "Initialize Frame Queue" commands, the write-enable mask
+        * determines whether 'td' or 'oac_init' is observed. For query
+        * commands, this field is always 'td', and 'oac_query' (below) reflects
+        * the Overhead ACcounting values.
+        */
+       union {
+               __be16 td; /* "Taildrop": _res[13-15], mant[5-12], exp[0-4] */
+               struct qm_fqd_oac oac_init;
+       };
+       __be32 context_b;
+       union {
+               /* Treat it as 64-bit opaque */
+               __be64 opaque;
+               struct {
+                       __be32 hi;
+                       __be32 lo;
+               };
+               /* Treat it as s/w portal stashing config */
+               /* see "FQD Context_A field used for [...]" */
+               struct {
+                       struct qm_fqd_stashing stashing;
+                       /*
+                        * 48-bit address of FQ context to
+                        * stash, must be cacheline-aligned
+                        */
+                       __be16 context_hi;
+                       __be32 context_lo;
+               } __packed;
+       } context_a;
+       struct qm_fqd_oac oac_query;
+} __packed;
+
+#define QM_FQD_CHAN_OFF                3
+#define QM_FQD_WQ_MASK         GENMASK(2, 0)
+#define QM_FQD_TD_EXP_MASK     GENMASK(4, 0)
+#define QM_FQD_TD_MANT_OFF     5
+#define QM_FQD_TD_MANT_MASK    GENMASK(12, 5)
+#define QM_FQD_TD_MAX          0xe0000000
+#define QM_FQD_TD_MANT_MAX     0xff
+#define QM_FQD_OAC_OFF         6
+#define QM_FQD_AS_OFF          4
+#define QM_FQD_DS_OFF          2
+#define QM_FQD_XS_MASK         0x3
+
+/* 64-bit converters for context_hi/lo */
+static inline u64 qm_fqd_stashing_get64(const struct qm_fqd *fqd)
+{
+       return be64_to_cpu(fqd->context_a.opaque) & 0xffffffffffffULL;
+}
+
+static inline dma_addr_t qm_fqd_stashing_addr(const struct qm_fqd *fqd)
+{
+       return be64_to_cpu(fqd->context_a.opaque) & 0xffffffffffffULL;
+}
+
+static inline u64 qm_fqd_context_a_get64(const struct qm_fqd *fqd)
+{
+       return qm_fqd_stashing_get64(fqd);
+}
+
+static inline void qm_fqd_stashing_set64(struct qm_fqd *fqd, u64 addr)
+{
+       fqd->context_a.context_hi = upper_32_bits(addr);
+       fqd->context_a.context_lo = lower_32_bits(addr);
+}
+
+static inline void qm_fqd_context_a_set64(struct qm_fqd *fqd, u64 addr)
+{
+       fqd->context_a.hi = cpu_to_be16(upper_32_bits(addr));
+       fqd->context_a.lo = cpu_to_be32(lower_32_bits(addr));
+}
+
+/* convert a threshold value into mant+exp representation */
+static inline int qm_fqd_set_taildrop(struct qm_fqd *fqd, u32 val,
+                                     int roundup)
+{
+       u32 e = 0;
+       int td, oddbit = 0;
+
+       if (val > QM_FQD_TD_MAX)
+               return -ERANGE;
+
+       while (val > QM_FQD_TD_MANT_MAX) {
+               oddbit = val & 1;
+               val >>= 1;
+               e++;
+               if (roundup && oddbit)
+                       val++;
+       }
+
+       td = (val << QM_FQD_TD_MANT_OFF) & QM_FQD_TD_MANT_MASK;
+       td |= (e & QM_FQD_TD_EXP_MASK);
+       fqd->td = cpu_to_be16(td);
+       return 0;
+}
+/* and the other direction */
+static inline int qm_fqd_get_taildrop(const struct qm_fqd *fqd)
+{
+       int td = be16_to_cpu(fqd->td);
+
+       return ((td & QM_FQD_TD_MANT_MASK) >> QM_FQD_TD_MANT_OFF)
+               << (td & QM_FQD_TD_EXP_MASK);
+}
+
+static inline void qm_fqd_set_stashing(struct qm_fqd *fqd, u8 as, u8 ds, u8 cs)
+{
+       struct qm_fqd_stashing *st = &fqd->context_a.stashing;
+
+       st->cl = ((as & QM_FQD_XS_MASK) << QM_FQD_AS_OFF) |
+                ((ds & QM_FQD_XS_MASK) << QM_FQD_DS_OFF) |
+                (cs & QM_FQD_XS_MASK);
+}
+
+static inline u8 qm_fqd_get_stashing(const struct qm_fqd *fqd)
+{
+       return fqd->context_a.stashing.cl;
+}
+
+static inline void qm_fqd_set_oac(struct qm_fqd *fqd, u8 val)
+{
+       fqd->oac_init.oac = val << QM_FQD_OAC_OFF;
+}
+
+static inline void qm_fqd_set_oal(struct qm_fqd *fqd, s8 val)
+{
+       fqd->oac_init.oal = val;
+}
+
+static inline void qm_fqd_set_destwq(struct qm_fqd *fqd, int ch, int wq)
+{
+       fqd->dest_wq = cpu_to_be16((ch << QM_FQD_CHAN_OFF) |
+                                  (wq & QM_FQD_WQ_MASK));
+}
+
+static inline int qm_fqd_get_chan(const struct qm_fqd *fqd)
+{
+       return be16_to_cpu(fqd->dest_wq) >> QM_FQD_CHAN_OFF;
+}
+
+static inline int qm_fqd_get_wq(const struct qm_fqd *fqd)
+{
+       return be16_to_cpu(fqd->dest_wq) & QM_FQD_WQ_MASK;
+}
+
+/* See "Frame Queue Descriptor (FQD)" */
+/* Frame Queue Descriptor (FQD) field 'fq_ctrl' uses these constants */
+#define QM_FQCTRL_MASK         0x07ff  /* 'fq_ctrl' flags; */
+#define QM_FQCTRL_CGE          0x0400  /* Congestion Group Enable */
+#define QM_FQCTRL_TDE          0x0200  /* Tail-Drop Enable */
+#define QM_FQCTRL_CTXASTASHING 0x0080  /* Context-A stashing */
+#define QM_FQCTRL_CPCSTASH     0x0040  /* CPC Stash Enable */
+#define QM_FQCTRL_FORCESFDR    0x0008  /* High-priority SFDRs */
+#define QM_FQCTRL_AVOIDBLOCK   0x0004  /* Don't block active */
+#define QM_FQCTRL_HOLDACTIVE   0x0002  /* Hold active in portal */
+#define QM_FQCTRL_PREFERINCACHE        0x0001  /* Aggressively cache FQD */
+#define QM_FQCTRL_LOCKINCACHE  QM_FQCTRL_PREFERINCACHE /* older naming */
+
+/* See "FQD Context_A field used for [...] */
+/* Frame Queue Descriptor (FQD) field 'CONTEXT_A' uses these constants */
+#define QM_STASHING_EXCL_ANNOTATION    0x04
+#define QM_STASHING_EXCL_DATA          0x02
+#define QM_STASHING_EXCL_CTX           0x01
+
+/* See "Intra Class Scheduling" */
+/* FQD field 'OAC' (Overhead ACcounting) uses these constants */
+#define QM_OAC_ICS             0x2 /* Accounting for Intra-Class Scheduling */
+#define QM_OAC_CG              0x1 /* Accounting for Congestion Groups */
+
+/*
+ * This struct represents the 32-bit "WR_PARM_[GYR]" parameters in CGR fields
+ * and associated commands/responses. The WRED parameters are calculated from
+ * these fields as follows;
+ *   MaxTH = MA * (2 ^ Mn)
+ *   Slope = SA / (2 ^ Sn)
+ *    MaxP = 4 * (Pn + 1)
+ */
+struct qm_cgr_wr_parm {
+       /* MA[24-31], Mn[19-23], SA[12-18], Sn[6-11], Pn[0-5] */
+       u32 word;
+};
+/*
+ * This struct represents the 13-bit "CS_THRES" CGR field. In the corresponding
+ * management commands, this is padded to a 16-bit structure field, so that's
+ * how we represent it here. The congestion state threshold is calculated from
+ * these fields as follows;
+ *   CS threshold = TA * (2 ^ Tn)
+ */
+struct qm_cgr_cs_thres {
+       /* _res[13-15], TA[5-12], Tn[0-4] */
+       u16 word;
+};
+/*
+ * This identical structure of CGR fields is present in the "Init/Modify CGR"
+ * commands and the "Query CGR" result. It's suctioned out here into its own
+ * struct.
+ */
+struct __qm_mc_cgr {
+       struct qm_cgr_wr_parm wr_parm_g;
+       struct qm_cgr_wr_parm wr_parm_y;
+       struct qm_cgr_wr_parm wr_parm_r;
+       u8 wr_en_g;     /* boolean, use QM_CGR_EN */
+       u8 wr_en_y;     /* boolean, use QM_CGR_EN */
+       u8 wr_en_r;     /* boolean, use QM_CGR_EN */
+       u8 cscn_en;     /* boolean, use QM_CGR_EN */
+       union {
+               struct {
+                       u16 cscn_targ_upd_ctrl; /* use QM_CSCN_TARG_UDP_ */
+                       u16 cscn_targ_dcp_low;  /* CSCN_TARG_DCP low-16bits */
+               };
+               u32 cscn_targ;  /* use QM_CGR_TARG_* */
+       };
+       u8 cstd_en;     /* boolean, use QM_CGR_EN */
+       u8 cs;          /* boolean, only used in query response */
+       struct qm_cgr_cs_thres cs_thres; /* use qm_cgr_cs_thres_set64() */
+       u8 mode;        /* QMAN_CGR_MODE_FRAME not supported in rev1.0 */
+} __packed;
+#define QM_CGR_EN              0x01 /* For wr_en_*, cscn_en, cstd_en */
+#define QM_CGR_TARG_UDP_CTRL_WRITE_BIT 0x8000 /* value written to portal bit*/
+#define QM_CGR_TARG_UDP_CTRL_DCP       0x4000 /* 0: SWP, 1: DCP */
+#define QM_CGR_TARG_PORTAL(n)  (0x80000000 >> (n)) /* s/w portal, 0-9 */
+#define QM_CGR_TARG_FMAN0      0x00200000 /* direct-connect portal: fman0 */
+#define QM_CGR_TARG_FMAN1      0x00100000 /*                      : fman1 */
+/* Convert CGR thresholds to/from "cs_thres" format */
+static inline u64 qm_cgr_cs_thres_get64(const struct qm_cgr_cs_thres *th)
+{
+       return ((th->word >> 5) & 0xff) << (th->word & 0x1f);
+}
+
+static inline int qm_cgr_cs_thres_set64(struct qm_cgr_cs_thres *th, u64 val,
+                                       int roundup)
+{
+       u32 e = 0;
+       int oddbit = 0;
+
+       while (val > 0xff) {
+               oddbit = val & 1;
+               val >>= 1;
+               e++;
+               if (roundup && oddbit)
+                       val++;
+       }
+       th->word = ((val & 0xff) << 5) | (e & 0x1f);
+       return 0;
+}
+
+/* "Initialize FQ" */
+struct qm_mcc_initfq {
+       u8 __reserved1[2];
+       u16 we_mask;    /* Write Enable Mask */
+       u32 fqid;       /* 24-bit */
+       u16 count;      /* Initialises 'count+1' FQDs */
+       struct qm_fqd fqd; /* the FQD fields go here */
+       u8 __reserved2[30];
+} __packed;
+/* "Initialize/Modify CGR" */
+struct qm_mcc_initcgr {
+       u8 __reserve1[2];
+       u16 we_mask;    /* Write Enable Mask */
+       struct __qm_mc_cgr cgr; /* CGR fields */
+       u8 __reserved2[2];
+       u8 cgid;
+       u8 __reserved3[32];
+} __packed;
+
+/* INITFQ-specific flags */
+#define QM_INITFQ_WE_MASK              0x01ff  /* 'Write Enable' flags; */
+#define QM_INITFQ_WE_OAC               0x0100
+#define QM_INITFQ_WE_ORPC              0x0080
+#define QM_INITFQ_WE_CGID              0x0040
+#define QM_INITFQ_WE_FQCTRL            0x0020
+#define QM_INITFQ_WE_DESTWQ            0x0010
+#define QM_INITFQ_WE_ICSCRED           0x0008
+#define QM_INITFQ_WE_TDTHRESH          0x0004
+#define QM_INITFQ_WE_CONTEXTB          0x0002
+#define QM_INITFQ_WE_CONTEXTA          0x0001
+/* INITCGR/MODIFYCGR-specific flags */
+#define QM_CGR_WE_MASK                 0x07ff  /* 'Write Enable Mask'; */
+#define QM_CGR_WE_WR_PARM_G            0x0400
+#define QM_CGR_WE_WR_PARM_Y            0x0200
+#define QM_CGR_WE_WR_PARM_R            0x0100
+#define QM_CGR_WE_WR_EN_G              0x0080
+#define QM_CGR_WE_WR_EN_Y              0x0040
+#define QM_CGR_WE_WR_EN_R              0x0020
+#define QM_CGR_WE_CSCN_EN              0x0010
+#define QM_CGR_WE_CSCN_TARG            0x0008
+#define QM_CGR_WE_CSTD_EN              0x0004
+#define QM_CGR_WE_CS_THRES             0x0002
+#define QM_CGR_WE_MODE                 0x0001
+
+#define QMAN_CGR_FLAG_USE_INIT      0x00000001
+
+       /* Portal and Frame Queues */
+/* Represents a managed portal */
+struct qman_portal;
+
+/*
+ * This object type represents QMan frame queue descriptors (FQD), it is
+ * cacheline-aligned, and initialised by qman_create_fq(). The structure is
+ * defined further down.
+ */
+struct qman_fq;
+
+/*
+ * This object type represents a QMan congestion group, it is defined further
+ * down.
+ */
+struct qman_cgr;
+
+/*
+ * This enum, and the callback type that returns it, are used when handling
+ * dequeued frames via DQRR. Note that for "null" callbacks registered with the
+ * portal object (for handling dequeues that do not demux because contextB is
+ * NULL), the return value *MUST* be qman_cb_dqrr_consume.
+ */
+enum qman_cb_dqrr_result {
+       /* DQRR entry can be consumed */
+       qman_cb_dqrr_consume,
+       /* Like _consume, but requests parking - FQ must be held-active */
+       qman_cb_dqrr_park,
+       /* Does not consume, for DCA mode only. */
+       qman_cb_dqrr_defer,
+       /*
+        * Stop processing without consuming this ring entry. Exits the current
+        * qman_p_poll_dqrr() or interrupt-handling, as appropriate. If within
+        * an interrupt handler, the callback would typically call
+        * qman_irqsource_remove(QM_PIRQ_DQRI) before returning this value,
+        * otherwise the interrupt will reassert immediately.
+        */
+       qman_cb_dqrr_stop,
+       /* Like qman_cb_dqrr_stop, but consumes the current entry. */
+       qman_cb_dqrr_consume_stop
+};
+typedef enum qman_cb_dqrr_result (*qman_cb_dqrr)(struct qman_portal *qm,
+                                       struct qman_fq *fq,
+                                       const struct qm_dqrr_entry *dqrr);
+
+/*
+ * This callback type is used when handling ERNs, FQRNs and FQRLs via MR. They
+ * are always consumed after the callback returns.
+ */
+typedef void (*qman_cb_mr)(struct qman_portal *qm, struct qman_fq *fq,
+                          const union qm_mr_entry *msg);
+
+/*
+ * s/w-visible states. Ie. tentatively scheduled + truly scheduled + active +
+ * held-active + held-suspended are just "sched". Things like "retired" will not
+ * be assumed until it is complete (ie. QMAN_FQ_STATE_CHANGING is set until
+ * then, to indicate it's completing and to gate attempts to retry the retire
+ * command). Note, park commands do not set QMAN_FQ_STATE_CHANGING because it's
+ * technically impossible in the case of enqueue DCAs (which refer to DQRR ring
+ * index rather than the FQ that ring entry corresponds to), so repeated park
+ * commands are allowed (if you're silly enough to try) but won't change FQ
+ * state, and the resulting park notifications move FQs from "sched" to
+ * "parked".
+ */
+enum qman_fq_state {
+       qman_fq_state_oos,
+       qman_fq_state_parked,
+       qman_fq_state_sched,
+       qman_fq_state_retired
+};
+
+#define QMAN_FQ_STATE_CHANGING      0x80000000 /* 'state' is changing */
+#define QMAN_FQ_STATE_NE            0x40000000 /* retired FQ isn't empty */
+#define QMAN_FQ_STATE_ORL           0x20000000 /* retired FQ has ORL */
+#define QMAN_FQ_STATE_BLOCKOOS      0xe0000000 /* if any are set, no OOS */
+#define QMAN_FQ_STATE_CGR_EN        0x10000000 /* CGR enabled */
+#define QMAN_FQ_STATE_VDQCR         0x08000000 /* being volatile dequeued */
+
+/*
+ * Frame queue objects (struct qman_fq) are stored within memory passed to
+ * qman_create_fq(), as this allows stashing of caller-provided demux callback
+ * pointers at no extra cost to stashing of (driver-internal) FQ state. If the
+ * caller wishes to add per-FQ state and have it benefit from dequeue-stashing,
+ * they should;
+ *
+ * (a) extend the qman_fq structure with their state; eg.
+ *
+ *     // myfq is allocated and driver_fq callbacks filled in;
+ *     struct my_fq {
+ *        struct qman_fq base;
+ *        int an_extra_field;
+ *        [ ... add other fields to be associated with each FQ ...]
+ *     } *myfq = some_my_fq_allocator();
+ *     struct qman_fq *fq = qman_create_fq(fqid, flags, &myfq->base);
+ *
+ *     // in a dequeue callback, access extra fields from 'fq' via a cast;
+ *     struct my_fq *myfq = (struct my_fq *)fq;
+ *     do_something_with(myfq->an_extra_field);
+ *     [...]
+ *
+ * (b) when and if configuring the FQ for context stashing, specify how ever
+ *     many cachelines are required to stash 'struct my_fq', to accelerate not
+ *     only the QMan driver but the callback as well.
+ */
+
+struct qman_fq_cb {
+       qman_cb_dqrr dqrr;      /* for dequeued frames */
+       qman_cb_mr ern;         /* for s/w ERNs */
+       qman_cb_mr fqs;         /* frame-queue state changes*/
+};
+
+struct qman_fq {
+       /* Caller of qman_create_fq() provides these demux callbacks */
+       struct qman_fq_cb cb;
+       /*
+        * These are internal to the driver, don't touch. In particular, they
+        * may change, be removed, or extended (so you shouldn't rely on
+        * sizeof(qman_fq) being a constant).
+        */
+       u32 fqid, idx;
+       unsigned long flags;
+       enum qman_fq_state state;
+       int cgr_groupid;
+};
+
+/*
+ * This callback type is used when handling congestion group entry/exit.
+ * 'congested' is non-zero on congestion-entry, and zero on congestion-exit.
+ */
+typedef void (*qman_cb_cgr)(struct qman_portal *qm,
+                           struct qman_cgr *cgr, int congested);
+
+struct qman_cgr {
+       /* Set these prior to qman_create_cgr() */
+       u32 cgrid; /* 0..255, but u32 to allow specials like -1, 256, etc.*/
+       qman_cb_cgr cb;
+       /* These are private to the driver */
+       u16 chan; /* portal channel this object is created on */
+       struct list_head node;
+};
+
+/* Flags to qman_create_fq() */
+#define QMAN_FQ_FLAG_NO_ENQUEUE             0x00000001 /* can't enqueue */
+#define QMAN_FQ_FLAG_NO_MODIFY      0x00000002 /* can only enqueue */
+#define QMAN_FQ_FLAG_TO_DCPORTAL     0x00000004 /* consumed by CAAM/PME/Fman */
+#define QMAN_FQ_FLAG_DYNAMIC_FQID    0x00000020 /* (de)allocate fqid */
+
+/* Flags to qman_init_fq() */
+#define QMAN_INITFQ_FLAG_SCHED      0x00000001 /* schedule rather than park */
+#define QMAN_INITFQ_FLAG_LOCAL      0x00000004 /* set dest portal */
+
+       /* Portal Management */
+/**
+ * qman_p_irqsource_add - add processing sources to be interrupt-driven
+ * @bits: bitmask of QM_PIRQ_**I processing sources
+ *
+ * Adds processing sources that should be interrupt-driven (rather than
+ * processed via qman_poll_***() functions).
+ */
+void qman_p_irqsource_add(struct qman_portal *p, u32 bits);
+
+/**
+ * qman_p_irqsource_remove - remove processing sources from being int-driven
+ * @bits: bitmask of QM_PIRQ_**I processing sources
+ *
+ * Removes processing sources from being interrupt-driven, so that they will
+ * instead be processed via qman_poll_***() functions.
+ */
+void qman_p_irqsource_remove(struct qman_portal *p, u32 bits);
+
+/**
+ * qman_affine_cpus - return a mask of cpus that have affine portals
+ */
+const cpumask_t *qman_affine_cpus(void);
+
+/**
+ * qman_affine_channel - return the channel ID of an portal
+ * @cpu: the cpu whose affine portal is the subject of the query
+ *
+ * If @cpu is -1, the affine portal for the current CPU will be used. It is a
+ * bug to call this function for any value of @cpu (other than -1) that is not a
+ * member of the mask returned from qman_affine_cpus().
+ */
+u16 qman_affine_channel(int cpu);
+
+/**
+ * qman_get_affine_portal - return the portal pointer affine to cpu
+ * @cpu: the cpu whose affine portal is the subject of the query
+ */
+struct qman_portal *qman_get_affine_portal(int cpu);
+
+/**
+ * qman_p_poll_dqrr - process DQRR (fast-path) entries
+ * @limit: the maximum number of DQRR entries to process
+ *
+ * Use of this function requires that DQRR processing not be interrupt-driven.
+ * The return value represents the number of DQRR entries processed.
+ */
+int qman_p_poll_dqrr(struct qman_portal *p, unsigned int limit);
+
+/**
+ * qman_p_static_dequeue_add - Add pool channels to the portal SDQCR
+ * @pools: bit-mask of pool channels, using QM_SDQCR_CHANNELS_POOL(n)
+ *
+ * Adds a set of pool channels to the portal's static dequeue command register
+ * (SDQCR). The requested pools are limited to those the portal has dequeue
+ * access to.
+ */
+void qman_p_static_dequeue_add(struct qman_portal *p, u32 pools);
+
+       /* FQ management */
+/**
+ * qman_create_fq - Allocates a FQ
+ * @fqid: the index of the FQD to encapsulate, must be "Out of Service"
+ * @flags: bit-mask of QMAN_FQ_FLAG_*** options
+ * @fq: memory for storing the 'fq', with callbacks filled in
+ *
+ * Creates a frame queue object for the given @fqid, unless the
+ * QMAN_FQ_FLAG_DYNAMIC_FQID flag is set in @flags, in which case a FQID is
+ * dynamically allocated (or the function fails if none are available). Once
+ * created, the caller should not touch the memory at 'fq' except as extended to
+ * adjacent memory for user-defined fields (see the definition of "struct
+ * qman_fq" for more info). NO_MODIFY is only intended for enqueuing to
+ * pre-existing frame-queues that aren't to be otherwise interfered with, it
+ * prevents all other modifications to the frame queue. The TO_DCPORTAL flag
+ * causes the driver to honour any contextB modifications requested in the
+ * qm_init_fq() API, as this indicates the frame queue will be consumed by a
+ * direct-connect portal (PME, CAAM, or Fman). When frame queues are consumed by
+ * software portals, the contextB field is controlled by the driver and can't be
+ * modified by the caller.
+ */
+int qman_create_fq(u32 fqid, u32 flags, struct qman_fq *fq);
+
+/**
+ * qman_destroy_fq - Deallocates a FQ
+ * @fq: the frame queue object to release
+ *
+ * The memory for this frame queue object ('fq' provided in qman_create_fq()) is
+ * not deallocated but the caller regains ownership, to do with as desired. The
+ * FQ must be in the 'out-of-service' or in the 'parked' state.
+ */
+void qman_destroy_fq(struct qman_fq *fq);
+
+/**
+ * qman_fq_fqid - Queries the frame queue ID of a FQ object
+ * @fq: the frame queue object to query
+ */
+u32 qman_fq_fqid(struct qman_fq *fq);
+
+/**
+ * qman_init_fq - Initialises FQ fields, leaves the FQ "parked" or "scheduled"
+ * @fq: the frame queue object to modify, must be 'parked' or new.
+ * @flags: bit-mask of QMAN_INITFQ_FLAG_*** options
+ * @opts: the FQ-modification settings, as defined in the low-level API
+ *
+ * The @opts parameter comes from the low-level portal API. Select
+ * QMAN_INITFQ_FLAG_SCHED in @flags to cause the frame queue to be scheduled
+ * rather than parked. NB, @opts can be NULL.
+ *
+ * Note that some fields and options within @opts may be ignored or overwritten
+ * by the driver;
+ * 1. the 'count' and 'fqid' fields are always ignored (this operation only
+ * affects one frame queue: @fq).
+ * 2. the QM_INITFQ_WE_CONTEXTB option of the 'we_mask' field and the associated
+ * 'fqd' structure's 'context_b' field are sometimes overwritten;
+ *   - if @fq was not created with QMAN_FQ_FLAG_TO_DCPORTAL, then context_b is
+ *     initialised to a value used by the driver for demux.
+ *   - if context_b is initialised for demux, so is context_a in case stashing
+ *     is requested (see item 4).
+ * (So caller control of context_b is only possible for TO_DCPORTAL frame queue
+ * objects.)
+ * 3. if @flags contains QMAN_INITFQ_FLAG_LOCAL, the 'fqd' structure's
+ * 'dest::channel' field will be overwritten to match the portal used to issue
+ * the command. If the WE_DESTWQ write-enable bit had already been set by the
+ * caller, the channel workqueue will be left as-is, otherwise the write-enable
+ * bit is set and the workqueue is set to a default of 4. If the "LOCAL" flag
+ * isn't set, the destination channel/workqueue fields and the write-enable bit
+ * are left as-is.
+ * 4. if the driver overwrites context_a/b for demux, then if
+ * QM_INITFQ_WE_CONTEXTA is set, the driver will only overwrite
+ * context_a.address fields and will leave the stashing fields provided by the
+ * user alone, otherwise it will zero out the context_a.stashing fields.
+ */
+int qman_init_fq(struct qman_fq *fq, u32 flags, struct qm_mcc_initfq *opts);
+
+/**
+ * qman_schedule_fq - Schedules a FQ
+ * @fq: the frame queue object to schedule, must be 'parked'
+ *
+ * Schedules the frame queue, which must be Parked, which takes it to
+ * Tentatively-Scheduled or Truly-Scheduled depending on its fill-level.
+ */
+int qman_schedule_fq(struct qman_fq *fq);
+
+/**
+ * qman_retire_fq - Retires a FQ
+ * @fq: the frame queue object to retire
+ * @flags: FQ flags (QMAN_FQ_STATE*) if retirement completes immediately
+ *
+ * Retires the frame queue. This returns zero if it succeeds immediately, +1 if
+ * the retirement was started asynchronously, otherwise it returns negative for
+ * failure. When this function returns zero, @flags is set to indicate whether
+ * the retired FQ is empty and/or whether it has any ORL fragments (to show up
+ * as ERNs). Otherwise the corresponding flags will be known when a subsequent
+ * FQRN message shows up on the portal's message ring.
+ *
+ * NB, if the retirement is asynchronous (the FQ was in the Truly Scheduled or
+ * Active state), the completion will be via the message ring as a FQRN - but
+ * the corresponding callback may occur before this function returns!! Ie. the
+ * caller should be prepared to accept the callback as the function is called,
+ * not only once it has returned.
+ */
+int qman_retire_fq(struct qman_fq *fq, u32 *flags);
+
+/**
+ * qman_oos_fq - Puts a FQ "out of service"
+ * @fq: the frame queue object to be put out-of-service, must be 'retired'
+ *
+ * The frame queue must be retired and empty, and if any order restoration list
+ * was released as ERNs at the time of retirement, they must all be consumed.
+ */
+int qman_oos_fq(struct qman_fq *fq);
+
+/**
+ * qman_enqueue - Enqueue a frame to a frame queue
+ * @fq: the frame queue object to enqueue to
+ * @fd: a descriptor of the frame to be enqueued
+ *
+ * Fills an entry in the EQCR of portal @qm to enqueue the frame described by
+ * @fd. The descriptor details are copied from @fd to the EQCR entry, the 'pid'
+ * field is ignored. The return value is non-zero on error, such as ring full.
+ */
+int qman_enqueue(struct qman_fq *fq, const struct qm_fd *fd);
+
+/**
+ * qman_alloc_fqid_range - Allocate a contiguous range of FQIDs
+ * @result: is set by the API to the base FQID of the allocated range
+ * @count: the number of FQIDs required
+ *
+ * Returns 0 on success, or a negative error code.
+ */
+int qman_alloc_fqid_range(u32 *result, u32 count);
+#define qman_alloc_fqid(result) qman_alloc_fqid_range(result, 1)
+
+/**
+ * qman_release_fqid - Release the specified frame queue ID
+ * @fqid: the FQID to be released back to the resource pool
+ *
+ * This function can also be used to seed the allocator with
+ * FQID ranges that it can subsequently allocate from.
+ * Returns 0 on success, or a negative error code.
+ */
+int qman_release_fqid(u32 fqid);
+
+       /* Pool-channel management */
+/**
+ * qman_alloc_pool_range - Allocate a contiguous range of pool-channel IDs
+ * @result: is set by the API to the base pool-channel ID of the allocated range
+ * @count: the number of pool-channel IDs required
+ *
+ * Returns 0 on success, or a negative error code.
+ */
+int qman_alloc_pool_range(u32 *result, u32 count);
+#define qman_alloc_pool(result) qman_alloc_pool_range(result, 1)
+
+/**
+ * qman_release_pool - Release the specified pool-channel ID
+ * @id: the pool-chan ID to be released back to the resource pool
+ *
+ * This function can also be used to seed the allocator with
+ * pool-channel ID ranges that it can subsequently allocate from.
+ * Returns 0 on success, or a negative error code.
+ */
+int qman_release_pool(u32 id);
+
+       /* CGR management */
+/**
+ * qman_create_cgr - Register a congestion group object
+ * @cgr: the 'cgr' object, with fields filled in
+ * @flags: QMAN_CGR_FLAG_* values
+ * @opts: optional state of CGR settings
+ *
+ * Registers this object to receiving congestion entry/exit callbacks on the
+ * portal affine to the cpu portal on which this API is executed. If opts is
+ * NULL then only the callback (cgr->cb) function is registered. If @flags
+ * contains QMAN_CGR_FLAG_USE_INIT, then an init hw command (which will reset
+ * any unspecified parameters) will be used rather than a modify hw hardware
+ * (which only modifies the specified parameters).
+ */
+int qman_create_cgr(struct qman_cgr *cgr, u32 flags,
+                   struct qm_mcc_initcgr *opts);
+
+/**
+ * qman_delete_cgr - Deregisters a congestion group object
+ * @cgr: the 'cgr' object to deregister
+ *
+ * "Unplugs" this CGR object from the portal affine to the cpu on which this API
+ * is executed. This must be excuted on the same affine portal on which it was
+ * created.
+ */
+int qman_delete_cgr(struct qman_cgr *cgr);
+
+/**
+ * qman_delete_cgr_safe - Deregisters a congestion group object from any CPU
+ * @cgr: the 'cgr' object to deregister
+ *
+ * This will select the proper CPU and run there qman_delete_cgr().
+ */
+void qman_delete_cgr_safe(struct qman_cgr *cgr);
+
+/**
+ * qman_query_cgr_congested - Queries CGR's congestion status
+ * @cgr: the 'cgr' object to query
+ * @result: returns 'cgr's congestion status, 1 (true) if congested
+ */
+int qman_query_cgr_congested(struct qman_cgr *cgr, bool *result);
+
+/**
+ * qman_alloc_cgrid_range - Allocate a contiguous range of CGR IDs
+ * @result: is set by the API to the base CGR ID of the allocated range
+ * @count: the number of CGR IDs required
+ *
+ * Returns 0 on success, or a negative error code.
+ */
+int qman_alloc_cgrid_range(u32 *result, u32 count);
+#define qman_alloc_cgrid(result) qman_alloc_cgrid_range(result, 1)
+
+/**
+ * qman_release_cgrid - Release the specified CGR ID
+ * @id: the CGR ID to be released back to the resource pool
+ *
+ * This function can also be used to seed the allocator with
+ * CGR ID ranges that it can subsequently allocate from.
+ * Returns 0 on success, or a negative error code.
+ */
+int qman_release_cgrid(u32 id);
+
+#endif /* __FSL_QMAN_H */
index fb8e3b6febdff7f5fdb3f0335455b9e074647c40..c2119008990a36a54b43ee66d57024df6330e893 100644 (file)
@@ -177,6 +177,7 @@ enum tcm_sense_reason_table {
        TCM_LOGICAL_BLOCK_GUARD_CHECK_FAILED    = R(0x15),
        TCM_LOGICAL_BLOCK_APP_TAG_CHECK_FAILED  = R(0x16),
        TCM_LOGICAL_BLOCK_REF_TAG_CHECK_FAILED  = R(0x17),
+       TCM_COPY_TARGET_DEVICE_NOT_REACHABLE    = R(0x18),
 #undef R
 };
 
diff --git a/include/trace/events/cgroup.h b/include/trace/events/cgroup.h
new file mode 100644 (file)
index 0000000..ab68640
--- /dev/null
@@ -0,0 +1,163 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM cgroup
+
+#if !defined(_TRACE_CGROUP_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_CGROUP_H
+
+#include <linux/cgroup.h>
+#include <linux/tracepoint.h>
+
+DECLARE_EVENT_CLASS(cgroup_root,
+
+       TP_PROTO(struct cgroup_root *root),
+
+       TP_ARGS(root),
+
+       TP_STRUCT__entry(
+               __field(        int,            root                    )
+               __field(        u16,            ss_mask                 )
+               __string(       name,           root->name              )
+       ),
+
+       TP_fast_assign(
+               __entry->root = root->hierarchy_id;
+               __entry->ss_mask = root->subsys_mask;
+               __assign_str(name, root->name);
+       ),
+
+       TP_printk("root=%d ss_mask=%#x name=%s",
+                 __entry->root, __entry->ss_mask, __get_str(name))
+);
+
+DEFINE_EVENT(cgroup_root, cgroup_setup_root,
+
+       TP_PROTO(struct cgroup_root *root),
+
+       TP_ARGS(root)
+);
+
+DEFINE_EVENT(cgroup_root, cgroup_destroy_root,
+
+       TP_PROTO(struct cgroup_root *root),
+
+       TP_ARGS(root)
+);
+
+DEFINE_EVENT(cgroup_root, cgroup_remount,
+
+       TP_PROTO(struct cgroup_root *root),
+
+       TP_ARGS(root)
+);
+
+DECLARE_EVENT_CLASS(cgroup,
+
+       TP_PROTO(struct cgroup *cgrp),
+
+       TP_ARGS(cgrp),
+
+       TP_STRUCT__entry(
+               __field(        int,            root                    )
+               __field(        int,            id                      )
+               __field(        int,            level                   )
+               __dynamic_array(char,           path,
+                               cgrp->kn ? cgroup_path(cgrp, NULL, 0) + 1
+                                        : strlen("(null)"))
+       ),
+
+       TP_fast_assign(
+               __entry->root = cgrp->root->hierarchy_id;
+               __entry->id = cgrp->id;
+               __entry->level = cgrp->level;
+               if (cgrp->kn)
+                       cgroup_path(cgrp, __get_dynamic_array(path),
+                                   __get_dynamic_array_len(path));
+               else
+                       __assign_str(path, "(null)");
+       ),
+
+       TP_printk("root=%d id=%d level=%d path=%s",
+                 __entry->root, __entry->id, __entry->level, __get_str(path))
+);
+
+DEFINE_EVENT(cgroup, cgroup_mkdir,
+
+       TP_PROTO(struct cgroup *cgroup),
+
+       TP_ARGS(cgroup)
+);
+
+DEFINE_EVENT(cgroup, cgroup_rmdir,
+
+       TP_PROTO(struct cgroup *cgroup),
+
+       TP_ARGS(cgroup)
+);
+
+DEFINE_EVENT(cgroup, cgroup_release,
+
+       TP_PROTO(struct cgroup *cgroup),
+
+       TP_ARGS(cgroup)
+);
+
+DEFINE_EVENT(cgroup, cgroup_rename,
+
+       TP_PROTO(struct cgroup *cgroup),
+
+       TP_ARGS(cgroup)
+);
+
+DECLARE_EVENT_CLASS(cgroup_migrate,
+
+       TP_PROTO(struct cgroup *dst_cgrp, struct task_struct *task, bool threadgroup),
+
+       TP_ARGS(dst_cgrp, task, threadgroup),
+
+       TP_STRUCT__entry(
+               __field(        int,            dst_root                )
+               __field(        int,            dst_id                  )
+               __field(        int,            dst_level               )
+               __dynamic_array(char,           dst_path,
+                               dst_cgrp->kn ? cgroup_path(dst_cgrp, NULL, 0) + 1
+                                            : strlen("(null)"))
+               __field(        int,            pid                     )
+               __string(       comm,           task->comm              )
+       ),
+
+       TP_fast_assign(
+               __entry->dst_root = dst_cgrp->root->hierarchy_id;
+               __entry->dst_id = dst_cgrp->id;
+               __entry->dst_level = dst_cgrp->level;
+               if (dst_cgrp->kn)
+                       cgroup_path(dst_cgrp, __get_dynamic_array(dst_path),
+                                   __get_dynamic_array_len(dst_path));
+               else
+                       __assign_str(dst_path, "(null)");
+               __entry->pid = task->pid;
+               __assign_str(comm, task->comm);
+       ),
+
+       TP_printk("dst_root=%d dst_id=%d dst_level=%d dst_path=%s pid=%d comm=%s",
+                 __entry->dst_root, __entry->dst_id, __entry->dst_level,
+                 __get_str(dst_path), __entry->pid, __get_str(comm))
+);
+
+DEFINE_EVENT(cgroup_migrate, cgroup_attach_task,
+
+       TP_PROTO(struct cgroup *dst_cgrp, struct task_struct *task, bool threadgroup),
+
+       TP_ARGS(dst_cgrp, task, threadgroup)
+);
+
+DEFINE_EVENT(cgroup_migrate, cgroup_transfer_tasks,
+
+       TP_PROTO(struct cgroup *dst_cgrp, struct task_struct *task, bool threadgroup),
+
+       TP_ARGS(dst_cgrp, task, threadgroup)
+);
+
+#endif /* _TRACE_CGROUP_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/events/mdio.h b/include/trace/events/mdio.h
new file mode 100644 (file)
index 0000000..00d85f5
--- /dev/null
@@ -0,0 +1,42 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mdio
+
+#if !defined(_TRACE_MDIO_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_MDIO_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT_CONDITION(mdio_access,
+
+       TP_PROTO(struct mii_bus *bus, char read,
+                u8 addr, unsigned regnum, u16 val, int err),
+
+       TP_ARGS(bus, read, addr, regnum, val, err),
+
+       TP_CONDITION(err >= 0),
+
+       TP_STRUCT__entry(
+               __array(char, busid, MII_BUS_ID_SIZE)
+               __field(char, read)
+               __field(u8, addr)
+               __field(u16, val)
+               __field(unsigned, regnum)
+       ),
+
+       TP_fast_assign(
+               strncpy(__entry->busid, bus->id, MII_BUS_ID_SIZE);
+               __entry->read = read;
+               __entry->addr = addr;
+               __entry->regnum = regnum;
+               __entry->val = val;
+       ),
+
+       TP_printk("%s %-5s phy:0x%02hhx reg:0x%02x val:0x%04hx",
+                 __entry->busid, __entry->read ? "read" : "write",
+                 __entry->addr, __entry->regnum, __entry->val)
+);
+
+#endif /* if !defined(_TRACE_MDIO_H) || defined(TRACE_HEADER_MULTI_READ) */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
index 67d632f1743ddd9ce351962fd7326f7320743efc..2c748ddad5f875711ed66f91eae9bc69b9a41fe0 100644 (file)
@@ -92,4 +92,6 @@
 
 #define SO_CNX_ADVICE          53
 
+#define SCM_TIMESTAMPING_OPT_STATS     54
+
 #endif /* __ASM_GENERIC_SOCKET_H */
index dbfee7e86ba6429319188a03d618881ae34a9ef0..9b1462e38b821a762b284b44a20a96de9f0930d9 100644 (file)
@@ -730,10 +730,6 @@ __SYSCALL(__NR_pkey_mprotect, sys_pkey_mprotect)
 __SYSCALL(__NR_pkey_alloc,    sys_pkey_alloc)
 #define __NR_pkey_free 290
 __SYSCALL(__NR_pkey_free,     sys_pkey_free)
-#define __NR_pkey_get 291
-//__SYSCALL(__NR_pkey_get,      sys_pkey_get)
-#define __NR_pkey_set 292
-//__SYSCALL(__NR_pkey_set,      sys_pkey_set)
 
 #undef __NR_syscalls
 #define __NR_syscalls 291
index 6965d0909554573d723d9d7eca3bb205e12d2760..cd2be1c8e9fb6cfac94b6dc9e734274a7c1f9932 100644 (file)
@@ -75,6 +75,7 @@ header-y += bpf_perf_event.h
 header-y += bpf.h
 header-y += bpqether.h
 header-y += bsg.h
+header-y += bt-bmc.h
 header-y += btrfs.h
 header-y += can.h
 header-y += capability.h
index 5cd4d4d2dd1d226ba6e5b024a697e71dfde9ac7d..9c9c6ad55f1487b9b4cead653fc996930603b259 100644 (file)
@@ -14,7 +14,6 @@
 
 #include <linux/atmapi.h>
 #include <linux/atmioc.h>
-#include <linux/time.h>
 
 #define ZATM_GETPOOL   _IOW('a',ATMIOC_SARPRV+1,struct atmif_sioc)
                                                /* get pool statistics */
index 374ef582ae18d6c46c3ae07005ee74ec0bc813de..6123d9b8e828b2ff0d825bd8f4ffe374daac9b4f 100644 (file)
@@ -73,6 +73,8 @@ enum bpf_cmd {
        BPF_PROG_LOAD,
        BPF_OBJ_PIN,
        BPF_OBJ_GET,
+       BPF_PROG_ATTACH,
+       BPF_PROG_DETACH,
 };
 
 enum bpf_map_type {
@@ -85,6 +87,8 @@ enum bpf_map_type {
        BPF_MAP_TYPE_PERCPU_ARRAY,
        BPF_MAP_TYPE_STACK_TRACE,
        BPF_MAP_TYPE_CGROUP_ARRAY,
+       BPF_MAP_TYPE_LRU_HASH,
+       BPF_MAP_TYPE_LRU_PERCPU_HASH,
 };
 
 enum bpf_prog_type {
@@ -96,8 +100,22 @@ enum bpf_prog_type {
        BPF_PROG_TYPE_TRACEPOINT,
        BPF_PROG_TYPE_XDP,
        BPF_PROG_TYPE_PERF_EVENT,
+       BPF_PROG_TYPE_CGROUP_SKB,
+       BPF_PROG_TYPE_CGROUP_SOCK,
+       BPF_PROG_TYPE_LWT_IN,
+       BPF_PROG_TYPE_LWT_OUT,
+       BPF_PROG_TYPE_LWT_XMIT,
 };
 
+enum bpf_attach_type {
+       BPF_CGROUP_INET_INGRESS,
+       BPF_CGROUP_INET_EGRESS,
+       BPF_CGROUP_INET_SOCK_CREATE,
+       __MAX_BPF_ATTACH_TYPE
+};
+
+#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE
+
 #define BPF_PSEUDO_MAP_FD      1
 
 /* flags for BPF_MAP_UPDATE_ELEM command */
@@ -106,6 +124,13 @@ enum bpf_prog_type {
 #define BPF_EXIST      2 /* update existing element */
 
 #define BPF_F_NO_PREALLOC      (1U << 0)
+/* Instead of having one common LRU list in the
+ * BPF_MAP_TYPE_LRU_[PERCPU_]HASH map, use a percpu LRU list
+ * which can scale and perform better.
+ * Note, the LRU nodes (including free nodes) cannot be moved
+ * across different LRU lists.
+ */
+#define BPF_F_NO_COMMON_LRU    (1U << 1)
 
 union bpf_attr {
        struct { /* anonymous struct used by BPF_MAP_CREATE command */
@@ -141,299 +166,320 @@ union bpf_attr {
                __aligned_u64   pathname;
                __u32           bpf_fd;
        };
+
+       struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */
+               __u32           target_fd;      /* container object to attach to */
+               __u32           attach_bpf_fd;  /* eBPF program to attach */
+               __u32           attach_type;
+       };
 } __attribute__((aligned(8)));
 
+/* BPF helper function descriptions:
+ *
+ * void *bpf_map_lookup_elem(&map, &key)
+ *     Return: Map value or NULL
+ *
+ * int bpf_map_update_elem(&map, &key, &value, flags)
+ *     Return: 0 on success or negative error
+ *
+ * int bpf_map_delete_elem(&map, &key)
+ *     Return: 0 on success or negative error
+ *
+ * int bpf_probe_read(void *dst, int size, void *src)
+ *     Return: 0 on success or negative error
+ *
+ * u64 bpf_ktime_get_ns(void)
+ *     Return: current ktime
+ *
+ * int bpf_trace_printk(const char *fmt, int fmt_size, ...)
+ *     Return: length of buffer written or negative error
+ *
+ * u32 bpf_prandom_u32(void)
+ *     Return: random value
+ *
+ * u32 bpf_raw_smp_processor_id(void)
+ *     Return: SMP processor ID
+ *
+ * int bpf_skb_store_bytes(skb, offset, from, len, flags)
+ *     store bytes into packet
+ *     @skb: pointer to skb
+ *     @offset: offset within packet from skb->mac_header
+ *     @from: pointer where to copy bytes from
+ *     @len: number of bytes to store into packet
+ *     @flags: bit 0 - if true, recompute skb->csum
+ *             other bits - reserved
+ *     Return: 0 on success or negative error
+ *
+ * int bpf_l3_csum_replace(skb, offset, from, to, flags)
+ *     recompute IP checksum
+ *     @skb: pointer to skb
+ *     @offset: offset within packet where IP checksum is located
+ *     @from: old value of header field
+ *     @to: new value of header field
+ *     @flags: bits 0-3 - size of header field
+ *             other bits - reserved
+ *     Return: 0 on success or negative error
+ *
+ * int bpf_l4_csum_replace(skb, offset, from, to, flags)
+ *     recompute TCP/UDP checksum
+ *     @skb: pointer to skb
+ *     @offset: offset within packet where TCP/UDP checksum is located
+ *     @from: old value of header field
+ *     @to: new value of header field
+ *     @flags: bits 0-3 - size of header field
+ *             bit 4 - is pseudo header
+ *             other bits - reserved
+ *     Return: 0 on success or negative error
+ *
+ * int bpf_tail_call(ctx, prog_array_map, index)
+ *     jump into another BPF program
+ *     @ctx: context pointer passed to next program
+ *     @prog_array_map: pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY
+ *     @index: index inside array that selects specific program to run
+ *     Return: 0 on success or negative error
+ *
+ * int bpf_clone_redirect(skb, ifindex, flags)
+ *     redirect to another netdev
+ *     @skb: pointer to skb
+ *     @ifindex: ifindex of the net device
+ *     @flags: bit 0 - if set, redirect to ingress instead of egress
+ *             other bits - reserved
+ *     Return: 0 on success or negative error
+ *
+ * u64 bpf_get_current_pid_tgid(void)
+ *     Return: current->tgid << 32 | current->pid
+ *
+ * u64 bpf_get_current_uid_gid(void)
+ *     Return: current_gid << 32 | current_uid
+ *
+ * int bpf_get_current_comm(char *buf, int size_of_buf)
+ *     stores current->comm into buf
+ *     Return: 0 on success or negative error
+ *
+ * u32 bpf_get_cgroup_classid(skb)
+ *     retrieve a proc's classid
+ *     @skb: pointer to skb
+ *     Return: classid if != 0
+ *
+ * int bpf_skb_vlan_push(skb, vlan_proto, vlan_tci)
+ *     Return: 0 on success or negative error
+ *
+ * int bpf_skb_vlan_pop(skb)
+ *     Return: 0 on success or negative error
+ *
+ * int bpf_skb_get_tunnel_key(skb, key, size, flags)
+ * int bpf_skb_set_tunnel_key(skb, key, size, flags)
+ *     retrieve or populate tunnel metadata
+ *     @skb: pointer to skb
+ *     @key: pointer to 'struct bpf_tunnel_key'
+ *     @size: size of 'struct bpf_tunnel_key'
+ *     @flags: room for future extensions
+ *     Return: 0 on success or negative error
+ *
+ * u64 bpf_perf_event_read(&map, index)
+ *     Return: Number events read or error code
+ *
+ * int bpf_redirect(ifindex, flags)
+ *     redirect to another netdev
+ *     @ifindex: ifindex of the net device
+ *     @flags: bit 0 - if set, redirect to ingress instead of egress
+ *             other bits - reserved
+ *     Return: TC_ACT_REDIRECT
+ *
+ * u32 bpf_get_route_realm(skb)
+ *     retrieve a dst's tclassid
+ *     @skb: pointer to skb
+ *     Return: realm if != 0
+ *
+ * int bpf_perf_event_output(ctx, map, index, data, size)
+ *     output perf raw sample
+ *     @ctx: struct pt_regs*
+ *     @map: pointer to perf_event_array map
+ *     @index: index of event in the map
+ *     @data: data on stack to be output as raw data
+ *     @size: size of data
+ *     Return: 0 on success or negative error
+ *
+ * int bpf_get_stackid(ctx, map, flags)
+ *     walk user or kernel stack and return id
+ *     @ctx: struct pt_regs*
+ *     @map: pointer to stack_trace map
+ *     @flags: bits 0-7 - numer of stack frames to skip
+ *             bit 8 - collect user stack instead of kernel
+ *             bit 9 - compare stacks by hash only
+ *             bit 10 - if two different stacks hash into the same stackid
+ *                      discard old
+ *             other bits - reserved
+ *     Return: >= 0 stackid on success or negative error
+ *
+ * s64 bpf_csum_diff(from, from_size, to, to_size, seed)
+ *     calculate csum diff
+ *     @from: raw from buffer
+ *     @from_size: length of from buffer
+ *     @to: raw to buffer
+ *     @to_size: length of to buffer
+ *     @seed: optional seed
+ *     Return: csum result or negative error code
+ *
+ * int bpf_skb_get_tunnel_opt(skb, opt, size)
+ *     retrieve tunnel options metadata
+ *     @skb: pointer to skb
+ *     @opt: pointer to raw tunnel option data
+ *     @size: size of @opt
+ *     Return: option size
+ *
+ * int bpf_skb_set_tunnel_opt(skb, opt, size)
+ *     populate tunnel options metadata
+ *     @skb: pointer to skb
+ *     @opt: pointer to raw tunnel option data
+ *     @size: size of @opt
+ *     Return: 0 on success or negative error
+ *
+ * int bpf_skb_change_proto(skb, proto, flags)
+ *     Change protocol of the skb. Currently supported is v4 -> v6,
+ *     v6 -> v4 transitions. The helper will also resize the skb. eBPF
+ *     program is expected to fill the new headers via skb_store_bytes
+ *     and lX_csum_replace.
+ *     @skb: pointer to skb
+ *     @proto: new skb->protocol type
+ *     @flags: reserved
+ *     Return: 0 on success or negative error
+ *
+ * int bpf_skb_change_type(skb, type)
+ *     Change packet type of skb.
+ *     @skb: pointer to skb
+ *     @type: new skb->pkt_type type
+ *     Return: 0 on success or negative error
+ *
+ * int bpf_skb_under_cgroup(skb, map, index)
+ *     Check cgroup2 membership of skb
+ *     @skb: pointer to skb
+ *     @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
+ *     @index: index of the cgroup in the bpf_map
+ *     Return:
+ *       == 0 skb failed the cgroup2 descendant test
+ *       == 1 skb succeeded the cgroup2 descendant test
+ *        < 0 error
+ *
+ * u32 bpf_get_hash_recalc(skb)
+ *     Retrieve and possibly recalculate skb->hash.
+ *     @skb: pointer to skb
+ *     Return: hash
+ *
+ * u64 bpf_get_current_task(void)
+ *     Returns current task_struct
+ *     Return: current
+ *
+ * int bpf_probe_write_user(void *dst, void *src, int len)
+ *     safely attempt to write to a location
+ *     @dst: destination address in userspace
+ *     @src: source address on stack
+ *     @len: number of bytes to copy
+ *     Return: 0 on success or negative error
+ *
+ * int bpf_current_task_under_cgroup(map, index)
+ *     Check cgroup2 membership of current task
+ *     @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
+ *     @index: index of the cgroup in the bpf_map
+ *     Return:
+ *       == 0 current failed the cgroup2 descendant test
+ *       == 1 current succeeded the cgroup2 descendant test
+ *        < 0 error
+ *
+ * int bpf_skb_change_tail(skb, len, flags)
+ *     The helper will resize the skb to the given new size, to be used f.e.
+ *     with control messages.
+ *     @skb: pointer to skb
+ *     @len: new skb length
+ *     @flags: reserved
+ *     Return: 0 on success or negative error
+ *
+ * int bpf_skb_pull_data(skb, len)
+ *     The helper will pull in non-linear data in case the skb is non-linear
+ *     and not all of len are part of the linear section. Only needed for
+ *     read/write with direct packet access.
+ *     @skb: pointer to skb
+ *     @len: len to make read/writeable
+ *     Return: 0 on success or negative error
+ *
+ * s64 bpf_csum_update(skb, csum)
+ *     Adds csum into skb->csum in case of CHECKSUM_COMPLETE.
+ *     @skb: pointer to skb
+ *     @csum: csum to add
+ *     Return: csum on success or negative error
+ *
+ * void bpf_set_hash_invalid(skb)
+ *     Invalidate current skb->hash.
+ *     @skb: pointer to skb
+ *
+ * int bpf_get_numa_node_id()
+ *     Return: Id of current NUMA node.
+ *
+ * int bpf_skb_change_head()
+ *     Grows headroom of skb and adjusts MAC header offset accordingly.
+ *     Will extends/reallocae as required automatically.
+ *     May change skb data pointer and will thus invalidate any check
+ *     performed for direct packet access.
+ *     @skb: pointer to skb
+ *     @len: length of header to be pushed in front
+ *     @flags: Flags (unused for now)
+ *     Return: 0 on success or negative error
+ */
+#define __BPF_FUNC_MAPPER(FN)          \
+       FN(unspec),                     \
+       FN(map_lookup_elem),            \
+       FN(map_update_elem),            \
+       FN(map_delete_elem),            \
+       FN(probe_read),                 \
+       FN(ktime_get_ns),               \
+       FN(trace_printk),               \
+       FN(get_prandom_u32),            \
+       FN(get_smp_processor_id),       \
+       FN(skb_store_bytes),            \
+       FN(l3_csum_replace),            \
+       FN(l4_csum_replace),            \
+       FN(tail_call),                  \
+       FN(clone_redirect),             \
+       FN(get_current_pid_tgid),       \
+       FN(get_current_uid_gid),        \
+       FN(get_current_comm),           \
+       FN(get_cgroup_classid),         \
+       FN(skb_vlan_push),              \
+       FN(skb_vlan_pop),               \
+       FN(skb_get_tunnel_key),         \
+       FN(skb_set_tunnel_key),         \
+       FN(perf_event_read),            \
+       FN(redirect),                   \
+       FN(get_route_realm),            \
+       FN(perf_event_output),          \
+       FN(skb_load_bytes),             \
+       FN(get_stackid),                \
+       FN(csum_diff),                  \
+       FN(skb_get_tunnel_opt),         \
+       FN(skb_set_tunnel_opt),         \
+       FN(skb_change_proto),           \
+       FN(skb_change_type),            \
+       FN(skb_under_cgroup),           \
+       FN(get_hash_recalc),            \
+       FN(get_current_task),           \
+       FN(probe_write_user),           \
+       FN(current_task_under_cgroup),  \
+       FN(skb_change_tail),            \
+       FN(skb_pull_data),              \
+       FN(csum_update),                \
+       FN(set_hash_invalid),           \
+       FN(get_numa_node_id),           \
+       FN(skb_change_head),
+
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
  */
+#define __BPF_ENUM_FN(x) BPF_FUNC_ ## x
 enum bpf_func_id {
-       BPF_FUNC_unspec,
-       BPF_FUNC_map_lookup_elem, /* void *map_lookup_elem(&map, &key) */
-       BPF_FUNC_map_update_elem, /* int map_update_elem(&map, &key, &value, flags) */
-       BPF_FUNC_map_delete_elem, /* int map_delete_elem(&map, &key) */
-       BPF_FUNC_probe_read,      /* int bpf_probe_read(void *dst, int size, void *src) */
-       BPF_FUNC_ktime_get_ns,    /* u64 bpf_ktime_get_ns(void) */
-       BPF_FUNC_trace_printk,    /* int bpf_trace_printk(const char *fmt, int fmt_size, ...) */
-       BPF_FUNC_get_prandom_u32, /* u32 prandom_u32(void) */
-       BPF_FUNC_get_smp_processor_id, /* u32 raw_smp_processor_id(void) */
-
-       /**
-        * skb_store_bytes(skb, offset, from, len, flags) - store bytes into packet
-        * @skb: pointer to skb
-        * @offset: offset within packet from skb->mac_header
-        * @from: pointer where to copy bytes from
-        * @len: number of bytes to store into packet
-        * @flags: bit 0 - if true, recompute skb->csum
-        *         other bits - reserved
-        * Return: 0 on success
-        */
-       BPF_FUNC_skb_store_bytes,
-
-       /**
-        * l3_csum_replace(skb, offset, from, to, flags) - recompute IP checksum
-        * @skb: pointer to skb
-        * @offset: offset within packet where IP checksum is located
-        * @from: old value of header field
-        * @to: new value of header field
-        * @flags: bits 0-3 - size of header field
-        *         other bits - reserved
-        * Return: 0 on success
-        */
-       BPF_FUNC_l3_csum_replace,
-
-       /**
-        * l4_csum_replace(skb, offset, from, to, flags) - recompute TCP/UDP checksum
-        * @skb: pointer to skb
-        * @offset: offset within packet where TCP/UDP checksum is located
-        * @from: old value of header field
-        * @to: new value of header field
-        * @flags: bits 0-3 - size of header field
-        *         bit 4 - is pseudo header
-        *         other bits - reserved
-        * Return: 0 on success
-        */
-       BPF_FUNC_l4_csum_replace,
-
-       /**
-        * bpf_tail_call(ctx, prog_array_map, index) - jump into another BPF program
-        * @ctx: context pointer passed to next program
-        * @prog_array_map: pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY
-        * @index: index inside array that selects specific program to run
-        * Return: 0 on success
-        */
-       BPF_FUNC_tail_call,
-
-       /**
-        * bpf_clone_redirect(skb, ifindex, flags) - redirect to another netdev
-        * @skb: pointer to skb
-        * @ifindex: ifindex of the net device
-        * @flags: bit 0 - if set, redirect to ingress instead of egress
-        *         other bits - reserved
-        * Return: 0 on success
-        */
-       BPF_FUNC_clone_redirect,
-
-       /**
-        * u64 bpf_get_current_pid_tgid(void)
-        * Return: current->tgid << 32 | current->pid
-        */
-       BPF_FUNC_get_current_pid_tgid,
-
-       /**
-        * u64 bpf_get_current_uid_gid(void)
-        * Return: current_gid << 32 | current_uid
-        */
-       BPF_FUNC_get_current_uid_gid,
-
-       /**
-        * bpf_get_current_comm(char *buf, int size_of_buf)
-        * stores current->comm into buf
-        * Return: 0 on success
-        */
-       BPF_FUNC_get_current_comm,
-
-       /**
-        * bpf_get_cgroup_classid(skb) - retrieve a proc's classid
-        * @skb: pointer to skb
-        * Return: classid if != 0
-        */
-       BPF_FUNC_get_cgroup_classid,
-       BPF_FUNC_skb_vlan_push, /* bpf_skb_vlan_push(skb, vlan_proto, vlan_tci) */
-       BPF_FUNC_skb_vlan_pop,  /* bpf_skb_vlan_pop(skb) */
-
-       /**
-        * bpf_skb_[gs]et_tunnel_key(skb, key, size, flags)
-        * retrieve or populate tunnel metadata
-        * @skb: pointer to skb
-        * @key: pointer to 'struct bpf_tunnel_key'
-        * @size: size of 'struct bpf_tunnel_key'
-        * @flags: room for future extensions
-        * Retrun: 0 on success
-        */
-       BPF_FUNC_skb_get_tunnel_key,
-       BPF_FUNC_skb_set_tunnel_key,
-       BPF_FUNC_perf_event_read,       /* u64 bpf_perf_event_read(&map, index) */
-       /**
-        * bpf_redirect(ifindex, flags) - redirect to another netdev
-        * @ifindex: ifindex of the net device
-        * @flags: bit 0 - if set, redirect to ingress instead of egress
-        *         other bits - reserved
-        * Return: TC_ACT_REDIRECT
-        */
-       BPF_FUNC_redirect,
-
-       /**
-        * bpf_get_route_realm(skb) - retrieve a dst's tclassid
-        * @skb: pointer to skb
-        * Return: realm if != 0
-        */
-       BPF_FUNC_get_route_realm,
-
-       /**
-        * bpf_perf_event_output(ctx, map, index, data, size) - output perf raw sample
-        * @ctx: struct pt_regs*
-        * @map: pointer to perf_event_array map
-        * @index: index of event in the map
-        * @data: data on stack to be output as raw data
-        * @size: size of data
-        * Return: 0 on success
-        */
-       BPF_FUNC_perf_event_output,
-       BPF_FUNC_skb_load_bytes,
-
-       /**
-        * bpf_get_stackid(ctx, map, flags) - walk user or kernel stack and return id
-        * @ctx: struct pt_regs*
-        * @map: pointer to stack_trace map
-        * @flags: bits 0-7 - numer of stack frames to skip
-        *         bit 8 - collect user stack instead of kernel
-        *         bit 9 - compare stacks by hash only
-        *         bit 10 - if two different stacks hash into the same stackid
-        *                  discard old
-        *         other bits - reserved
-        * Return: >= 0 stackid on success or negative error
-        */
-       BPF_FUNC_get_stackid,
-
-       /**
-        * bpf_csum_diff(from, from_size, to, to_size, seed) - calculate csum diff
-        * @from: raw from buffer
-        * @from_size: length of from buffer
-        * @to: raw to buffer
-        * @to_size: length of to buffer
-        * @seed: optional seed
-        * Return: csum result
-        */
-       BPF_FUNC_csum_diff,
-
-       /**
-        * bpf_skb_[gs]et_tunnel_opt(skb, opt, size)
-        * retrieve or populate tunnel options metadata
-        * @skb: pointer to skb
-        * @opt: pointer to raw tunnel option data
-        * @size: size of @opt
-        * Return: 0 on success for set, option size for get
-        */
-       BPF_FUNC_skb_get_tunnel_opt,
-       BPF_FUNC_skb_set_tunnel_opt,
-
-       /**
-        * bpf_skb_change_proto(skb, proto, flags)
-        * Change protocol of the skb. Currently supported is
-        * v4 -> v6, v6 -> v4 transitions. The helper will also
-        * resize the skb. eBPF program is expected to fill the
-        * new headers via skb_store_bytes and lX_csum_replace.
-        * @skb: pointer to skb
-        * @proto: new skb->protocol type
-        * @flags: reserved
-        * Return: 0 on success or negative error
-        */
-       BPF_FUNC_skb_change_proto,
-
-       /**
-        * bpf_skb_change_type(skb, type)
-        * Change packet type of skb.
-        * @skb: pointer to skb
-        * @type: new skb->pkt_type type
-        * Return: 0 on success or negative error
-        */
-       BPF_FUNC_skb_change_type,
-
-       /**
-        * bpf_skb_under_cgroup(skb, map, index) - Check cgroup2 membership of skb
-        * @skb: pointer to skb
-        * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
-        * @index: index of the cgroup in the bpf_map
-        * Return:
-        *   == 0 skb failed the cgroup2 descendant test
-        *   == 1 skb succeeded the cgroup2 descendant test
-        *    < 0 error
-        */
-       BPF_FUNC_skb_under_cgroup,
-
-       /**
-        * bpf_get_hash_recalc(skb)
-        * Retrieve and possibly recalculate skb->hash.
-        * @skb: pointer to skb
-        * Return: hash
-        */
-       BPF_FUNC_get_hash_recalc,
-
-       /**
-        * u64 bpf_get_current_task(void)
-        * Returns current task_struct
-        * Return: current
-        */
-       BPF_FUNC_get_current_task,
-
-       /**
-        * bpf_probe_write_user(void *dst, void *src, int len)
-        * safely attempt to write to a location
-        * @dst: destination address in userspace
-        * @src: source address on stack
-        * @len: number of bytes to copy
-        * Return: 0 on success or negative error
-        */
-       BPF_FUNC_probe_write_user,
-
-       /**
-        * bpf_current_task_under_cgroup(map, index) - Check cgroup2 membership of current task
-        * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
-        * @index: index of the cgroup in the bpf_map
-        * Return:
-        *   == 0 current failed the cgroup2 descendant test
-        *   == 1 current succeeded the cgroup2 descendant test
-        *    < 0 error
-        */
-       BPF_FUNC_current_task_under_cgroup,
-
-       /**
-        * bpf_skb_change_tail(skb, len, flags)
-        * The helper will resize the skb to the given new size,
-        * to be used f.e. with control messages.
-        * @skb: pointer to skb
-        * @len: new skb length
-        * @flags: reserved
-        * Return: 0 on success or negative error
-        */
-       BPF_FUNC_skb_change_tail,
-
-       /**
-        * bpf_skb_pull_data(skb, len)
-        * The helper will pull in non-linear data in case the
-        * skb is non-linear and not all of len are part of the
-        * linear section. Only needed for read/write with direct
-        * packet access.
-        * @skb: pointer to skb
-        * @len: len to make read/writeable
-        * Return: 0 on success or negative error
-        */
-       BPF_FUNC_skb_pull_data,
-
-       /**
-        * bpf_csum_update(skb, csum)
-        * Adds csum into skb->csum in case of CHECKSUM_COMPLETE.
-        * @skb: pointer to skb
-        * @csum: csum to add
-        * Return: csum on success or negative error
-        */
-       BPF_FUNC_csum_update,
-
-       /**
-        * bpf_set_hash_invalid(skb)
-        * Invalidate current skb>hash.
-        * @skb: pointer to skb
-        */
-       BPF_FUNC_set_hash_invalid,
-
-       /**
-        * bpf_get_numa_node_id()
-        * Returns the id of the current NUMA node.
-        */
-       BPF_FUNC_get_numa_node_id,
-
+       __BPF_FUNC_MAPPER(__BPF_ENUM_FN)
        __BPF_FUNC_MAX_ID,
 };
+#undef __BPF_ENUM_FN
 
 /* All flags used by eBPF helper functions, placed here. */
 
@@ -507,6 +553,29 @@ struct bpf_tunnel_key {
        __u32 tunnel_label;
 };
 
+/* Generic BPF return codes which all BPF program types may support.
+ * The values are binary compatible with their TC_ACT_* counter-part to
+ * provide backwards compatibility with existing SCHED_CLS and SCHED_ACT
+ * programs.
+ *
+ * XDP is handled seprately, see XDP_*.
+ */
+enum bpf_ret_code {
+       BPF_OK = 0,
+       /* 1 reserved */
+       BPF_DROP = 2,
+       /* 3-6 reserved */
+       BPF_REDIRECT = 7,
+       /* >127 are reserved for prog type specific return codes */
+};
+
+struct bpf_sock {
+       __u32 bound_dev_if;
+       __u32 family;
+       __u32 type;
+       __u32 protocol;
+};
+
 /* User return codes for XDP prog type.
  * A valid XDP program must return one of these defined values. All other
  * return codes are reserved for future use. Unknown return codes will result
index a6c35e1a89ad9297b69c048200328b8ec6490fa1..05865edaefdae02a0f997eb3bdcc82fd921fb9db 100644 (file)
@@ -5,9 +5,7 @@
  *     Defines for the BPQETHER pseudo device driver
  */
 
-#ifndef __LINUX_IF_ETHER_H
 #include <linux/if_ether.h>
-#endif
 
 #define SIOCSBPQETHOPT         (SIOCDEVPRIVATE+0)      /* reserved */
 #define SIOCSBPQETHADDR                (SIOCDEVPRIVATE+1)
diff --git a/include/uapi/linux/bt-bmc.h b/include/uapi/linux/bt-bmc.h
new file mode 100644 (file)
index 0000000..d9ec766
--- /dev/null
@@ -0,0 +1,18 @@
+/*
+ * Copyright (c) 2015-2016, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _UAPI_LINUX_BT_BMC_H
+#define _UAPI_LINUX_BT_BMC_H
+
+#include <linux/ioctl.h>
+
+#define __BT_BMC_IOCTL_MAGIC   0xb1
+#define BT_BMC_IOCTL_SMS_ATN   _IO(__BT_BMC_IOCTL_MAGIC, 0x00)
+
+#endif /* _UAPI_LINUX_BT_BMC_H */
index ac5eacd3055b503f9b95e330600608bf5a018cec..db4c253f8011b2f483ddd1ffc09f4f04a93fdc0a 100644 (file)
@@ -239,7 +239,17 @@ struct btrfs_ioctl_fs_info_args {
  * Used by:
  * struct btrfs_ioctl_feature_flags
  */
-#define BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE        (1ULL << 0)
+#define BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE                (1ULL << 0)
+/*
+ * Older kernels (< 4.9) on big-endian systems produced broken free space tree
+ * bitmaps, and btrfs-progs also used to corrupt the free space tree (versions
+ * < 4.7.3).  If this bit is clear, then the free space tree cannot be trusted.
+ * btrfs-progs can also intentionally clear this bit to ask the kernel to
+ * rebuild the free space tree, however this might not work on older kernels
+ * that do not know about this bit. If not sure, clear the cache manually on
+ * first mount when booting older kernel versions.
+ */
+#define BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE_VALID  (1ULL << 1)
 
 #define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF   (1ULL << 0)
 #define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL  (1ULL << 1)
index 915bfa74458c147c48a8b12d19221022e0d9e376..9014c33d4e77bf409be507f6fb8da59edc14b2f8 100644 (file)
@@ -102,6 +102,13 @@ enum devlink_eswitch_mode {
        DEVLINK_ESWITCH_MODE_SWITCHDEV,
 };
 
+enum devlink_eswitch_inline_mode {
+       DEVLINK_ESWITCH_INLINE_MODE_NONE,
+       DEVLINK_ESWITCH_INLINE_MODE_LINK,
+       DEVLINK_ESWITCH_INLINE_MODE_NETWORK,
+       DEVLINK_ESWITCH_INLINE_MODE_TRANSPORT,
+};
+
 enum devlink_attr {
        /* don't change the order or add anything between, this is ABI! */
        DEVLINK_ATTR_UNSPEC,
@@ -133,6 +140,7 @@ enum devlink_attr {
        DEVLINK_ATTR_SB_OCC_CUR,                /* u32 */
        DEVLINK_ATTR_SB_OCC_MAX,                /* u32 */
        DEVLINK_ATTR_ESWITCH_MODE,              /* u16 */
+       DEVLINK_ATTR_ESWITCH_INLINE_MODE,       /* u8 */
 
        /* add new attributes above here, update the policy in devlink.c */
 
index 099a4200732cc187f069331cbf3cbfa35e9a7e52..f0db7788f887b9947e0e1aa78d48a9980e80bddf 100644 (file)
@@ -119,8 +119,7 @@ struct ethtool_cmd {
 static inline void ethtool_cmd_speed_set(struct ethtool_cmd *ep,
                                         __u32 speed)
 {
-
-       ep->speed = (__u16)speed;
+       ep->speed = (__u16)(speed & 0xFFFF);
        ep->speed_hi = (__u16)(speed >> 16);
 }
 
@@ -249,6 +248,19 @@ struct ethtool_tunable {
        void    *data[0];
 };
 
+#define DOWNSHIFT_DEV_DEFAULT_COUNT    0xff
+#define DOWNSHIFT_DEV_DISABLE          0
+
+enum phy_tunable_id {
+       ETHTOOL_PHY_ID_UNSPEC,
+       ETHTOOL_PHY_DOWNSHIFT,
+       /*
+        * Add your fresh new phy tunable attribute above and remember to update
+        * phy_tunable_strings[] in net/core/ethtool.c
+        */
+       __ETHTOOL_PHY_TUNABLE_COUNT,
+};
+
 /**
  * struct ethtool_regs - hardware register dump
  * @cmd: Command number = %ETHTOOL_GREGS
@@ -549,6 +561,7 @@ struct ethtool_pauseparam {
  * @ETH_SS_FEATURES: Device feature names
  * @ETH_SS_RSS_HASH_FUNCS: RSS hush function names
  * @ETH_SS_PHY_STATS: Statistic names, for use with %ETHTOOL_GPHYSTATS
+ * @ETH_SS_PHY_TUNABLES: PHY tunable names
  */
 enum ethtool_stringset {
        ETH_SS_TEST             = 0,
@@ -559,6 +572,7 @@ enum ethtool_stringset {
        ETH_SS_RSS_HASH_FUNCS,
        ETH_SS_TUNABLES,
        ETH_SS_PHY_STATS,
+       ETH_SS_PHY_TUNABLES,
 };
 
 /**
@@ -1314,7 +1328,8 @@ struct ethtool_per_queue_op {
 
 #define ETHTOOL_GLINKSETTINGS  0x0000004c /* Get ethtool_link_settings */
 #define ETHTOOL_SLINKSETTINGS  0x0000004d /* Set ethtool_link_settings */
-
+#define ETHTOOL_PHY_GTUNABLE   0x0000004e /* Get PHY tunable configuration */
+#define ETHTOOL_PHY_STUNABLE   0x0000004f /* Set PHY tunable configuration */
 
 /* compatibility with older code */
 #define SPARC_ETH_GSET         ETHTOOL_GSET
index 14404b3ebb89a54e9437ddeb2748907b0ff73d64..bbf02a63a01133351ea49392f1bb06da13b4130d 100644 (file)
@@ -29,6 +29,11 @@ struct fib_rule_hdr {
        __u32           flags;
 };
 
+struct fib_rule_uid_range {
+       __u32           start;
+       __u32           end;
+};
+
 enum {
        FRA_UNSPEC,
        FRA_DST,        /* destination address */
@@ -51,6 +56,7 @@ enum {
        FRA_OIFNAME,
        FRA_PAD,
        FRA_L3MDEV,     /* iif or oif is l3mdev goto its table */
+       FRA_UID_RANGE,  /* UID range */
        __FRA_MAX
 };
 
index 5512c90af7e30f5593fbba398882f99a62fe65c0..adc899381e0d84eb8f206cce51eeb2865fe71397 100644 (file)
@@ -26,10 +26,11 @@ struct genlmsghdr {
 /*
  * List of reserved static generic netlink identifiers:
  */
-#define GENL_ID_GENERATE       0
 #define GENL_ID_CTRL           NLMSG_MIN_TYPE
 #define GENL_ID_VFS_DQUOT      (NLMSG_MIN_TYPE + 1)
 #define GENL_ID_PMCRAID                (NLMSG_MIN_TYPE + 2)
+/* must be last reserved + 1 */
+#define GENL_START_ALLOC       (NLMSG_MIN_TYPE + 3)
 
 /**************************************************************************
  * Controller
index b4fba662cd32f818cb059cd722bf4c1e1300e621..6b13e591abc9e646ce11b15d7fe5ee8209a509c1 100644 (file)
@@ -275,6 +275,8 @@ enum {
        IFLA_BR_PAD,
        IFLA_BR_VLAN_STATS_ENABLED,
        IFLA_BR_MCAST_STATS_ENABLED,
+       IFLA_BR_MCAST_IGMP_VERSION,
+       IFLA_BR_MCAST_MLD_VERSION,
        __IFLA_BR_MAX,
 };
 
@@ -874,10 +876,14 @@ enum {
 
 /* XDP section */
 
+#define XDP_FLAGS_UPDATE_IF_NOEXIST    (1U << 0)
+#define XDP_FLAGS_MASK                 (XDP_FLAGS_UPDATE_IF_NOEXIST)
+
 enum {
        IFLA_XDP_UNSPEC,
        IFLA_XDP_FD,
        IFLA_XDP_ATTACHED,
+       IFLA_XDP_FLAGS,
        __IFLA_XDP_MAX,
 };
 
index eaf94919291aaf78419a5f43b17d66eb07f12600..4e557f4e95538a26d19375415437733a9b084c6c 100644 (file)
@@ -117,6 +117,7 @@ struct in_addr {
 #define IP_NODEFRAG     22
 #define IP_CHECKSUM    23
 #define IP_BIND_ADDRESS_NO_PORT        24
+#define IP_RECVFRAGSIZE        25
 
 /* IP_MTU_DISCOVER values */
 #define IP_PMTUDISC_DONT               0       /* Never send DF frames */
index b39ea4f2e701d27edb0db76a94560557e3ae408c..46444f8fbee4eed8c4aa223ee5ff3ef1be182765 100644 (file)
@@ -283,6 +283,7 @@ struct in6_flowlabel_req {
 #define IPV6_RECVORIGDSTADDR    IPV6_ORIGDSTADDR
 #define IPV6_TRANSPARENT        75
 #define IPV6_UNICAST_IF         76
+#define IPV6_RECVFRAGSIZE      77
 
 /*
  * Multicast Routing:
index 8c2772340c3f1ced21c54d5178365dc6ab973e0f..53561be1ac2118e35a9daa7cd63cbbf900b238e9 100644 (file)
@@ -39,6 +39,7 @@ struct in6_ifreq {
 #define IPV6_SRCRT_STRICT      0x01    /* Deprecated; will be removed */
 #define IPV6_SRCRT_TYPE_0      0       /* Deprecated; will be removed */
 #define IPV6_SRCRT_TYPE_2      2       /* IPv6 type 2 Routing Header   */
+#define IPV6_SRCRT_TYPE_4      4       /* Segment Routing with IPv6 */
 
 /*
  *     routing header
@@ -178,6 +179,8 @@ enum {
        DEVCONF_DROP_UNSOLICITED_NA,
        DEVCONF_KEEP_ADDR_ON_DOWN,
        DEVCONF_RTR_SOLICIT_MAX_INTERVAL,
+       DEVCONF_SEG6_ENABLED,
+       DEVCONF_SEG6_REQUIRE_HMAC,
        DEVCONF_MAX
 };
 
index 300ef255d1e0ec496356fcbf7152a7c674590a44..4ee67cb99143deefbe11abafdd5bb49d18dba5ba 100644 (file)
@@ -972,12 +972,19 @@ struct kvm_irqfd {
        __u8  pad[16];
 };
 
+/* For KVM_CAP_ADJUST_CLOCK */
+
+/* Do not use 1, KVM_CHECK_EXTENSION returned it before we had flags.  */
+#define KVM_CLOCK_TSC_STABLE           2
+
 struct kvm_clock_data {
        __u64 clock;
        __u32 flags;
        __u32 pad[9];
 };
 
+/* For KVM_CAP_SW_TLB */
+
 #define KVM_MMU_FSL_BOOKE_NOHV         0
 #define KVM_MMU_FSL_BOOKE_HV           1
 
index 4bd27d0270a2af6b63ea4920274aa97088888168..5daa48e2571e35d1c35c009f8c39884b15ff540c 100644 (file)
@@ -124,8 +124,8 @@ enum {
        L2TP_ATTR_STATS,                /* nested */
        L2TP_ATTR_IP6_SADDR,            /* struct in6_addr */
        L2TP_ATTR_IP6_DADDR,            /* struct in6_addr */
-       L2TP_ATTR_UDP_ZERO_CSUM6_TX,    /* u8 */
-       L2TP_ATTR_UDP_ZERO_CSUM6_RX,    /* u8 */
+       L2TP_ATTR_UDP_ZERO_CSUM6_TX,    /* flag */
+       L2TP_ATTR_UDP_ZERO_CSUM6_RX,    /* flag */
        L2TP_ATTR_PAD,
        __L2TP_ATTR_MAX,
 };
index a478fe80e203bdf4865ad9ed5f01343eaacd49ce..92724cba1eba07c9035febba046d1687cd0ce96a 100644 (file)
@@ -9,6 +9,8 @@ enum lwtunnel_encap_types {
        LWTUNNEL_ENCAP_IP,
        LWTUNNEL_ENCAP_ILA,
        LWTUNNEL_ENCAP_IP6,
+       LWTUNNEL_ENCAP_SEG6,
+       LWTUNNEL_ENCAP_BPF,
        __LWTUNNEL_ENCAP_MAX,
 };
 
@@ -42,4 +44,26 @@ enum lwtunnel_ip6_t {
 
 #define LWTUNNEL_IP6_MAX (__LWTUNNEL_IP6_MAX - 1)
 
+enum {
+       LWT_BPF_PROG_UNSPEC,
+       LWT_BPF_PROG_FD,
+       LWT_BPF_PROG_NAME,
+       __LWT_BPF_PROG_MAX,
+};
+
+#define LWT_BPF_PROG_MAX (__LWT_BPF_PROG_MAX - 1)
+
+enum {
+       LWT_BPF_UNSPEC,
+       LWT_BPF_IN,
+       LWT_BPF_OUT,
+       LWT_BPF_XMIT,
+       LWT_BPF_XMIT_HEADROOM,
+       __LWT_BPF_MAX,
+};
+
+#define LWT_BPF_MAX (__LWT_BPF_MAX - 1)
+
+#define LWT_BPF_MAX_HEADROOM 256
+
 #endif /* _UAPI_LWTUNNEL_H_ */
index 264e515de16f6ede07c962344ae0ac92f641d161..464dcca5ed68304bac7c80ab5fd62192d81b3970 100644 (file)
@@ -25,8 +25,9 @@ enum {
        SOF_TIMESTAMPING_TX_ACK = (1<<9),
        SOF_TIMESTAMPING_OPT_CMSG = (1<<10),
        SOF_TIMESTAMPING_OPT_TSONLY = (1<<11),
+       SOF_TIMESTAMPING_OPT_STATS = (1<<12),
 
-       SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_TSONLY,
+       SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_STATS,
        SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) |
                                 SOF_TIMESTAMPING_LAST
 };
index d93f949d1d9aad326231829f70ba57b7b9cf10ee..7550e9176a54ea839ec792d13f223ac0be82f1a1 100644 (file)
@@ -13,7 +13,7 @@
 #define NF_STOLEN 2
 #define NF_QUEUE 3
 #define NF_REPEAT 4
-#define NF_STOP 5
+#define NF_STOP 5      /* Deprecated, for userspace nf_queue compatibility. */
 #define NF_MAX_VERDICT NF_STOP
 
 /* we overload the higher bits for encoding auxiliary data such as the queue
index c6c4477c136b9f77749088bc8b054f853b45b0f6..14e5f619167ec8ad1deaeb37895df08f8f4f05b1 100644 (file)
@@ -758,6 +758,19 @@ enum nft_meta_keys {
        NFT_META_PRANDOM,
 };
 
+/**
+ * enum nft_rt_keys - nf_tables routing expression keys
+ *
+ * @NFT_RT_CLASSID: realm value of packet's route (skb->dst->tclassid)
+ * @NFT_RT_NEXTHOP4: routing nexthop for IPv4
+ * @NFT_RT_NEXTHOP6: routing nexthop for IPv6
+ */
+enum nft_rt_keys {
+       NFT_RT_CLASSID,
+       NFT_RT_NEXTHOP4,
+       NFT_RT_NEXTHOP6,
+};
+
 /**
  * enum nft_hash_attributes - nf_tables hash expression netlink attributes
  *
@@ -796,6 +809,20 @@ enum nft_meta_attributes {
 };
 #define NFTA_META_MAX          (__NFTA_META_MAX - 1)
 
+/**
+ * enum nft_rt_attributes - nf_tables routing expression netlink attributes
+ *
+ * @NFTA_RT_DREG: destination register (NLA_U32)
+ * @NFTA_RT_KEY: routing data item to load (NLA_U32: nft_rt_keys)
+ */
+enum nft_rt_attributes {
+       NFTA_RT_UNSPEC,
+       NFTA_RT_DREG,
+       NFTA_RT_KEY,
+       __NFTA_RT_MAX
+};
+#define NFTA_RT_MAX            (__NFTA_RT_MAX - 1)
+
 /**
  * enum nft_ct_keys - nf_tables ct expression keys
  *
@@ -1109,6 +1136,42 @@ enum nft_gen_attributes {
 };
 #define NFTA_GEN_MAX           (__NFTA_GEN_MAX - 1)
 
+/*
+ * enum nft_fib_attributes - nf_tables fib expression netlink attributes
+ *
+ * @NFTA_FIB_DREG: destination register (NLA_U32)
+ * @NFTA_FIB_RESULT: desired result (NLA_U32)
+ * @NFTA_FIB_FLAGS: flowi fields to initialize when querying the FIB (NLA_U32)
+ *
+ * The FIB expression performs a route lookup according
+ * to the packet data.
+ */
+enum nft_fib_attributes {
+       NFTA_FIB_UNSPEC,
+       NFTA_FIB_DREG,
+       NFTA_FIB_RESULT,
+       NFTA_FIB_FLAGS,
+       __NFTA_FIB_MAX
+};
+#define NFTA_FIB_MAX (__NFTA_FIB_MAX - 1)
+
+enum nft_fib_result {
+       NFT_FIB_RESULT_UNSPEC,
+       NFT_FIB_RESULT_OIF,
+       NFT_FIB_RESULT_OIFNAME,
+       NFT_FIB_RESULT_ADDRTYPE,
+       __NFT_FIB_RESULT_MAX
+};
+#define NFT_FIB_RESULT_MAX     (__NFT_FIB_RESULT_MAX - 1)
+
+enum nft_fib_flags {
+       NFTA_FIB_F_SADDR        = 1 << 0,       /* look up src */
+       NFTA_FIB_F_DADDR        = 1 << 1,       /* look up dst */
+       NFTA_FIB_F_MARK         = 1 << 2,       /* use skb->mark */
+       NFTA_FIB_F_IIF          = 1 << 3,       /* restrict to iif */
+       NFTA_FIB_F_OIF          = 1 << 4,       /* restrict to oif */
+};
+
 /**
  * enum nft_trace_attributes - nf_tables trace netlink attributes
  *
index 56368e9b462245b6a50a2756abac6233b95650d9..259c9c77fdc1cc8505e36ae06d29f66a8e005bf4 100644 (file)
  *
  * @NL80211_CMD_SET_WDS_PEER: Set the MAC address of the peer on a WDS interface.
  *
+ * @NL80211_CMD_SET_MULTICAST_TO_UNICAST: Configure if this AP should perform
+ *     multicast to unicast conversion. When enabled, all multicast packets
+ *     with ethertype ARP, IPv4 or IPv6 (possibly within an 802.1Q header)
+ *     will be sent out to each station once with the destination (multicast)
+ *     MAC address replaced by the station's MAC address. Note that this may
+ *     break certain expectations of the receiver, e.g. the ability to drop
+ *     unicast IP packets encapsulated in multicast L2 frames, or the ability
+ *     to not send destination unreachable messages in such cases.
+ *     This can only be toggled per BSS. Configure this on an interface of
+ *     type %NL80211_IFTYPE_AP. It applies to all its VLAN interfaces
+ *     (%NL80211_IFTYPE_AP_VLAN), except for those in 4addr (WDS) mode.
+ *     If %NL80211_ATTR_MULTICAST_TO_UNICAST_ENABLED is not present with this
+ *     command, the feature is disabled.
+ *
  * @NL80211_CMD_JOIN_MESH: Join a mesh. The mesh ID must be given, and initial
  *     mesh config parameters may be given.
  * @NL80211_CMD_LEAVE_MESH: Leave the mesh network -- no special arguments, the
  *     This will contain a %NL80211_ATTR_NAN_MATCH nested attribute and
  *     %NL80211_ATTR_COOKIE.
  *
+ * @NL80211_CMD_UPDATE_CONNECT_PARAMS: Update one or more connect parameters
+ *     for subsequent roaming cases if the driver or firmware uses internal
+ *     BSS selection. This command can be issued only while connected and it
+ *     does not result in a change for the current association. Currently,
+ *     only the %NL80211_ATTR_IE data is used and updated with this command.
+ *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
  */
@@ -1069,6 +1089,10 @@ enum nl80211_commands {
        NL80211_CMD_CHANGE_NAN_CONFIG,
        NL80211_CMD_NAN_MATCH,
 
+       NL80211_CMD_SET_MULTICAST_TO_UNICAST,
+
+       NL80211_CMD_UPDATE_CONNECT_PARAMS,
+
        /* add new commands above here */
 
        /* used to define NL80211_CMD_MAX below */
@@ -1638,8 +1662,16 @@ enum nl80211_commands {
  *     the connection request from a station. nl80211_connect_failed_reason
  *     enum has different reasons of connection failure.
  *
- * @NL80211_ATTR_SAE_DATA: SAE elements in Authentication frames. This starts
- *     with the Authentication transaction sequence number field.
+ * @NL80211_ATTR_AUTH_DATA: Fields and elements in Authentication frames.
+ *     This contains the authentication frame body (non-IE and IE data),
+ *     excluding the Authentication algorithm number, i.e., starting at the
+ *     Authentication transaction sequence number field. It is used with
+ *     authentication algorithms that need special fields to be added into
+ *     the frames (SAE and FILS). Currently, only the SAE cases use the
+ *     initial two fields (Authentication transaction sequence number and
+ *     Status code). However, those fields are included in the attribute data
+ *     for all authentication algorithms to keep the attribute definition
+ *     consistent.
  *
  * @NL80211_ATTR_VHT_CAPABILITY: VHT Capability information element (from
  *     association request when used with NL80211_CMD_NEW_STATION)
@@ -1936,6 +1968,14 @@ enum nl80211_commands {
  *     attribute.
  * @NL80211_ATTR_NAN_MATCH: used to report a match. This is a nested attribute.
  *     See &enum nl80211_nan_match_attributes.
+ * @NL80211_ATTR_FILS_KEK: KEK for FILS (Re)Association Request/Response frame
+ *     protection.
+ * @NL80211_ATTR_FILS_NONCES: Nonces (part of AAD) for FILS (Re)Association
+ *     Request/Response frame protection. This attribute contains the 16 octet
+ *     STA Nonce followed by 16 octets of AP Nonce.
+ *
+ * @NL80211_ATTR_MULTICAST_TO_UNICAST_ENABLED: Indicates whether or not multicast
+ *     packets should be send out as unicast to all stations (flag attribute).
  *
  * @NUM_NL80211_ATTR: total number of nl80211_attrs available
  * @NL80211_ATTR_MAX: highest attribute number currently defined
@@ -2195,7 +2235,7 @@ enum nl80211_attrs {
 
        NL80211_ATTR_CONN_FAILED_REASON,
 
-       NL80211_ATTR_SAE_DATA,
+       NL80211_ATTR_AUTH_DATA,
 
        NL80211_ATTR_VHT_CAPABILITY,
 
@@ -2336,6 +2376,11 @@ enum nl80211_attrs {
        NL80211_ATTR_NAN_FUNC,
        NL80211_ATTR_NAN_MATCH,
 
+       NL80211_ATTR_FILS_KEK,
+       NL80211_ATTR_FILS_NONCES,
+
+       NL80211_ATTR_MULTICAST_TO_UNICAST_ENABLED,
+
        /* add attributes here, update the policy in nl80211.c */
 
        __NL80211_ATTR_AFTER_LAST,
@@ -2347,6 +2392,7 @@ enum nl80211_attrs {
 #define NL80211_ATTR_SCAN_GENERATION NL80211_ATTR_GENERATION
 #define        NL80211_ATTR_MESH_PARAMS NL80211_ATTR_MESH_CONFIG
 #define NL80211_ATTR_IFACE_SOCKET_OWNER NL80211_ATTR_SOCKET_OWNER
+#define NL80211_ATTR_SAE_DATA NL80211_ATTR_AUTH_DATA
 
 /*
  * Allow user space programs to use #ifdef on new attributes by defining them
@@ -3660,6 +3706,9 @@ enum nl80211_bss_status {
  * @NL80211_AUTHTYPE_FT: Fast BSS Transition (IEEE 802.11r)
  * @NL80211_AUTHTYPE_NETWORK_EAP: Network EAP (some Cisco APs and mainly LEAP)
  * @NL80211_AUTHTYPE_SAE: Simultaneous authentication of equals
+ * @NL80211_AUTHTYPE_FILS_SK: Fast Initial Link Setup shared key
+ * @NL80211_AUTHTYPE_FILS_SK_PFS: Fast Initial Link Setup shared key with PFS
+ * @NL80211_AUTHTYPE_FILS_PK: Fast Initial Link Setup public key
  * @__NL80211_AUTHTYPE_NUM: internal
  * @NL80211_AUTHTYPE_MAX: maximum valid auth algorithm
  * @NL80211_AUTHTYPE_AUTOMATIC: determine automatically (if necessary by
@@ -3672,6 +3721,9 @@ enum nl80211_auth_type {
        NL80211_AUTHTYPE_FT,
        NL80211_AUTHTYPE_NETWORK_EAP,
        NL80211_AUTHTYPE_SAE,
+       NL80211_AUTHTYPE_FILS_SK,
+       NL80211_AUTHTYPE_FILS_SK_PFS,
+       NL80211_AUTHTYPE_FILS_PK,
 
        /* keep last */
        __NL80211_AUTHTYPE_NUM,
@@ -4280,6 +4332,9 @@ enum nl80211_iface_limit_attrs {
  *     of supported channel widths for radar detection.
  * @NL80211_IFACE_COMB_RADAR_DETECT_REGIONS: u32 attribute containing the bitmap
  *     of supported regulatory regions for radar detection.
+ * @NL80211_IFACE_COMB_BI_MIN_GCD: u32 attribute specifying the minimum GCD of
+ *     different beacon intervals supported by all the interface combinations
+ *     in this group (if not present, all beacon intervals be identical).
  * @NUM_NL80211_IFACE_COMB: number of attributes
  * @MAX_NL80211_IFACE_COMB: highest attribute number
  *
@@ -4287,8 +4342,8 @@ enum nl80211_iface_limit_attrs {
  *     limits = [ #{STA} <= 1, #{AP} <= 1 ], matching BI, channels = 1, max = 2
  *     => allows an AP and a STA that must match BIs
  *
- *     numbers = [ #{AP, P2P-GO} <= 8 ], channels = 1, max = 8
- *     => allows 8 of AP/GO
+ *     numbers = [ #{AP, P2P-GO} <= 8 ], BI min gcd, channels = 1, max = 8,
+ *     => allows 8 of AP/GO that can have BI gcd >= min gcd
  *
  *     numbers = [ #{STA} <= 2 ], channels = 2, max = 2
  *     => allows two STAs on different channels
@@ -4314,6 +4369,7 @@ enum nl80211_if_combination_attrs {
        NL80211_IFACE_COMB_NUM_CHANNELS,
        NL80211_IFACE_COMB_RADAR_DETECT_WIDTHS,
        NL80211_IFACE_COMB_RADAR_DETECT_REGIONS,
+       NL80211_IFACE_COMB_BI_MIN_GCD,
 
        /* keep last */
        NUM_NL80211_IFACE_COMB,
@@ -4634,6 +4690,8 @@ enum nl80211_feature_flags {
  *     configuration (AP/mesh) with HT rates.
  * @NL80211_EXT_FEATURE_BEACON_RATE_VHT: Driver supports beacon rate
  *     configuration (AP/mesh) with VHT rates.
+ * @NL80211_EXT_FEATURE_FILS_STA: This driver supports Fast Initial Link Setup
+ *     with user space SME (NL80211_CMD_AUTHENTICATE) in station mode.
  *
  * @NUM_NL80211_EXT_FEATURES: number of extended features.
  * @MAX_NL80211_EXT_FEATURES: highest extended feature index.
@@ -4648,6 +4706,7 @@ enum nl80211_ext_feature_index {
        NL80211_EXT_FEATURE_BEACON_RATE_LEGACY,
        NL80211_EXT_FEATURE_BEACON_RATE_HT,
        NL80211_EXT_FEATURE_BEACON_RATE_VHT,
+       NL80211_EXT_FEATURE_FILS_STA,
 
        /* add new features before the definition below */
        NUM_NL80211_EXT_FEATURES,
index 59ed3992c7609540cfebddae99e24eba1a86f6d0..375d812fea36f31dc6fa353be96bc30ba87ce20b 100644 (file)
@@ -705,6 +705,15 @@ enum ovs_nat_attr {
 
 #define OVS_NAT_ATTR_MAX (__OVS_NAT_ATTR_MAX - 1)
 
+/*
+ * struct ovs_action_push_eth - %OVS_ACTION_ATTR_PUSH_ETH action argument.
+ * @addresses: Source and destination MAC addresses.
+ * @eth_type: Ethernet type
+ */
+struct ovs_action_push_eth {
+       struct ovs_key_ethernet addresses;
+};
+
 /**
  * enum ovs_action_attr - Action types.
  *
@@ -738,6 +747,10 @@ enum ovs_nat_attr {
  * is no MPLS label stack, as determined by ethertype, no action is taken.
  * @OVS_ACTION_ATTR_CT: Track the connection. Populate the conntrack-related
  * entries in the flow key.
+ * @OVS_ACTION_ATTR_PUSH_ETH: Push a new outermost Ethernet header onto the
+ * packet.
+ * @OVS_ACTION_ATTR_POP_ETH: Pop the outermost Ethernet header off the
+ * packet.
  *
  * Only a single header can be set with a single %OVS_ACTION_ATTR_SET.  Not all
  * fields within a header are modifiable, e.g. the IPv4 protocol and fragment
@@ -765,6 +778,8 @@ enum ovs_action_attr {
                                       * bits. */
        OVS_ACTION_ATTR_CT,           /* Nested OVS_CT_ATTR_* . */
        OVS_ACTION_ATTR_TRUNC,        /* u32 struct ovs_action_trunc. */
+       OVS_ACTION_ATTR_PUSH_ETH,     /* struct ovs_action_push_eth. */
+       OVS_ACTION_ATTR_POP_ETH,      /* No argument. */
 
        __OVS_ACTION_ATTR_MAX,        /* Nothing past this will be accepted
                                       * from userspace. */
index d812172d1d7b45a7e6fa279db719d75cd3efcc9e..e5a2e68b2236e51e5b7bab0a143a02763054dac6 100644 (file)
  */
 #define PCI_EXP_DEVCAP2                36      /* Device Capabilities 2 */
 #define  PCI_EXP_DEVCAP2_ARI           0x00000020 /* Alternative Routing-ID */
+#define  PCI_EXP_DEVCAP2_ATOMIC_ROUTE  0x00000040 /* Atomic Op routing */
+#define PCI_EXP_DEVCAP2_ATOMIC_COMP64  0x00000100 /* Atomic 64-bit compare */
 #define  PCI_EXP_DEVCAP2_LTR           0x00000800 /* Latency tolerance reporting */
 #define  PCI_EXP_DEVCAP2_OBFF_MASK     0x000c0000 /* OBFF support mechanism */
 #define  PCI_EXP_DEVCAP2_OBFF_MSG      0x00040000 /* New message signaling */
 #define PCI_EXP_DEVCTL2                40      /* Device Control 2 */
 #define  PCI_EXP_DEVCTL2_COMP_TIMEOUT  0x000f  /* Completion Timeout Value */
 #define  PCI_EXP_DEVCTL2_ARI           0x0020  /* Alternative Routing-ID */
+#define PCI_EXP_DEVCTL2_ATOMIC_REQ     0x0040  /* Set Atomic requests */
 #define  PCI_EXP_DEVCTL2_IDO_REQ_EN    0x0100  /* Allow IDO for requests */
 #define  PCI_EXP_DEVCTL2_IDO_CMP_EN    0x0200  /* Allow IDO for completions */
 #define  PCI_EXP_DEVCTL2_LTR_EN                0x0400  /* Enable LTR mechanism */
index 8fd715f806a27f327051d48a8e6b4ab8656c9703..86786d45ee66de3b573c2d10a685782f00d700af 100644 (file)
@@ -447,6 +447,16 @@ enum {
        TCA_FLOWER_KEY_TCP_DST_MASK,    /* be16 */
        TCA_FLOWER_KEY_UDP_SRC_MASK,    /* be16 */
        TCA_FLOWER_KEY_UDP_DST_MASK,    /* be16 */
+       TCA_FLOWER_KEY_SCTP_SRC_MASK,   /* be16 */
+       TCA_FLOWER_KEY_SCTP_DST_MASK,   /* be16 */
+
+       TCA_FLOWER_KEY_SCTP_SRC,        /* be16 */
+       TCA_FLOWER_KEY_SCTP_DST,        /* be16 */
+
+       TCA_FLOWER_KEY_ENC_UDP_SRC_PORT,        /* be16 */
+       TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK,   /* be16 */
+       TCA_FLOWER_KEY_ENC_UDP_DST_PORT,        /* be16 */
+       TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK,   /* be16 */
        __TCA_FLOWER_MAX,
 };
 
index 262f0379d83ac1965d8f792b5f1d3b8634f86d69..e14377f2ec273a183ae3b2e3da887ef2f893f7d1 100644 (file)
@@ -318,6 +318,7 @@ enum rtattr_type_t {
        RTA_ENCAP,
        RTA_EXPIRES,
        RTA_PAD,
+       RTA_UID,
        __RTA_MAX
 };
 
@@ -350,7 +351,7 @@ struct rtnexthop {
 #define RTNH_F_OFFLOAD         8       /* offloaded route */
 #define RTNH_F_LINKDOWN                16      /* carrier-down on nexthop */
 
-#define RTNH_COMPARE_MASK      (RTNH_F_DEAD | RTNH_F_LINKDOWN)
+#define RTNH_COMPARE_MASK      (RTNH_F_DEAD | RTNH_F_LINKDOWN | RTNH_F_OFFLOAD)
 
 /* Macros to handle hexthops */
 
diff --git a/include/uapi/linux/seg6.h b/include/uapi/linux/seg6.h
new file mode 100644 (file)
index 0000000..c396a80
--- /dev/null
@@ -0,0 +1,54 @@
+/*
+ *  SR-IPv6 implementation
+ *
+ *  Author:
+ *  David Lebrun <david.lebrun@uclouvain.be>
+ *
+ *
+ *  This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _UAPI_LINUX_SEG6_H
+#define _UAPI_LINUX_SEG6_H
+
+/*
+ * SRH
+ */
+struct ipv6_sr_hdr {
+       __u8    nexthdr;
+       __u8    hdrlen;
+       __u8    type;
+       __u8    segments_left;
+       __u8    first_segment;
+       __u8    flag_1;
+       __u8    flag_2;
+       __u8    reserved;
+
+       struct in6_addr segments[0];
+};
+
+#define SR6_FLAG1_CLEANUP      (1 << 7)
+#define SR6_FLAG1_PROTECTED    (1 << 6)
+#define SR6_FLAG1_OAM          (1 << 5)
+#define SR6_FLAG1_ALERT                (1 << 4)
+#define SR6_FLAG1_HMAC         (1 << 3)
+
+#define SR6_TLV_INGRESS                1
+#define SR6_TLV_EGRESS         2
+#define SR6_TLV_OPAQUE         3
+#define SR6_TLV_PADDING                4
+#define SR6_TLV_HMAC           5
+
+#define sr_has_cleanup(srh) ((srh)->flag_1 & SR6_FLAG1_CLEANUP)
+#define sr_has_hmac(srh) ((srh)->flag_1 & SR6_FLAG1_HMAC)
+
+struct sr6_tlv {
+       __u8 type;
+       __u8 len;
+       __u8 data[0];
+};
+
+#endif
diff --git a/include/uapi/linux/seg6_genl.h b/include/uapi/linux/seg6_genl.h
new file mode 100644 (file)
index 0000000..fcf1c60
--- /dev/null
@@ -0,0 +1,32 @@
+#ifndef _UAPI_LINUX_SEG6_GENL_H
+#define _UAPI_LINUX_SEG6_GENL_H
+
+#define SEG6_GENL_NAME         "SEG6"
+#define SEG6_GENL_VERSION      0x1
+
+enum {
+       SEG6_ATTR_UNSPEC,
+       SEG6_ATTR_DST,
+       SEG6_ATTR_DSTLEN,
+       SEG6_ATTR_HMACKEYID,
+       SEG6_ATTR_SECRET,
+       SEG6_ATTR_SECRETLEN,
+       SEG6_ATTR_ALGID,
+       SEG6_ATTR_HMACINFO,
+       __SEG6_ATTR_MAX,
+};
+
+#define SEG6_ATTR_MAX (__SEG6_ATTR_MAX - 1)
+
+enum {
+       SEG6_CMD_UNSPEC,
+       SEG6_CMD_SETHMAC,
+       SEG6_CMD_DUMPHMAC,
+       SEG6_CMD_SET_TUNSRC,
+       SEG6_CMD_GET_TUNSRC,
+       __SEG6_CMD_MAX,
+};
+
+#define SEG6_CMD_MAX (__SEG6_CMD_MAX - 1)
+
+#endif
diff --git a/include/uapi/linux/seg6_hmac.h b/include/uapi/linux/seg6_hmac.h
new file mode 100644 (file)
index 0000000..b652dfd
--- /dev/null
@@ -0,0 +1,21 @@
+#ifndef _UAPI_LINUX_SEG6_HMAC_H
+#define _UAPI_LINUX_SEG6_HMAC_H
+
+#include <linux/seg6.h>
+
+#define SEG6_HMAC_SECRET_LEN   64
+#define SEG6_HMAC_FIELD_LEN    32
+
+struct sr6_tlv_hmac {
+       struct sr6_tlv tlvhdr;
+       __u16 reserved;
+       __be32 hmackeyid;
+       __u8 hmac[SEG6_HMAC_FIELD_LEN];
+};
+
+enum {
+       SEG6_HMAC_ALGO_SHA1 = 1,
+       SEG6_HMAC_ALGO_SHA256 = 2,
+};
+
+#endif
diff --git a/include/uapi/linux/seg6_iptunnel.h b/include/uapi/linux/seg6_iptunnel.h
new file mode 100644 (file)
index 0000000..0f7dbd2
--- /dev/null
@@ -0,0 +1,44 @@
+/*
+ *  SR-IPv6 implementation
+ *
+ *  Author:
+ *  David Lebrun <david.lebrun@uclouvain.be>
+ *
+ *
+ *  This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _UAPI_LINUX_SEG6_IPTUNNEL_H
+#define _UAPI_LINUX_SEG6_IPTUNNEL_H
+
+enum {
+       SEG6_IPTUNNEL_UNSPEC,
+       SEG6_IPTUNNEL_SRH,
+       __SEG6_IPTUNNEL_MAX,
+};
+#define SEG6_IPTUNNEL_MAX (__SEG6_IPTUNNEL_MAX - 1)
+
+struct seg6_iptunnel_encap {
+       int mode;
+       struct ipv6_sr_hdr srh[0];
+};
+
+#define SEG6_IPTUN_ENCAP_SIZE(x) ((sizeof(*x)) + (((x)->srh->hdrlen + 1) << 3))
+
+enum {
+       SEG6_IPTUN_MODE_INLINE,
+       SEG6_IPTUN_MODE_ENCAP,
+};
+
+static inline size_t seg6_lwt_headroom(struct seg6_iptunnel_encap *tuninfo)
+{
+       int encap = (tuninfo->mode == SEG6_IPTUN_MODE_ENCAP);
+
+       return ((tuninfo->srh->hdrlen + 1) << 3) +
+              (encap * sizeof(struct ipv6hdr));
+}
+
+#endif
index 8e7890b26d9ab6b140ffe3bf13aea49ab27c7fef..79d029d2531005167c13869ba3c136c029aa6665 100644 (file)
@@ -24,6 +24,8 @@
 #define SIOCINQ                FIONREAD
 #define SIOCOUTQ       TIOCOUTQ        /* output queue size (not sent + not acked) */
 
+#define SOCK_IOC_TYPE  0x89
+
 /* Routing table calls. */
 #define SIOCADDRT      0x890B          /* add routing table entry      */
 #define SIOCDELRT      0x890C          /* delete routing table entry   */
@@ -84,6 +86,7 @@
 #define SIOCWANDEV     0x894A          /* get/set netdev parameters    */
 
 #define SIOCOUTQNSD    0x894B          /* output queue size (not sent only) */
+#define SIOCGSKNS      0x894C          /* get socket network namespace */
 
 /* ARP cache control calls. */
                    /*  0x8950 - 0x8952  * obsolete calls, don't re-use */
index a4d00c608d8f7e307e0be65e1e8b78371e4dc908..2884425738ce73e3bbbbf8dcfa79c6e345022476 100644 (file)
@@ -28,6 +28,7 @@
 #define SKBEDIT_F_QUEUE_MAPPING                0x2
 #define SKBEDIT_F_MARK                 0x4
 #define SKBEDIT_F_PTYPE                        0x8
+#define SKBEDIT_F_MASK                 0x10
 
 struct tc_skbedit {
        tc_gen;
@@ -42,6 +43,7 @@ enum {
        TCA_SKBEDIT_MARK,
        TCA_SKBEDIT_PAD,
        TCA_SKBEDIT_PTYPE,
+       TCA_SKBEDIT_MASK,
        __TCA_SKBEDIT_MAX
 };
 #define TCA_SKBEDIT_MAX (__TCA_SKBEDIT_MAX - 1)
index 890106ff16e6840845de0bc9601e58ab4a4b557c..84ea55e1076b6b363c8688d1773fc21e9630fdb8 100644 (file)
@@ -33,6 +33,7 @@ enum {
        TCA_TUNNEL_KEY_ENC_IPV6_DST,    /* struct in6_addr */
        TCA_TUNNEL_KEY_ENC_KEY_ID,      /* be64 */
        TCA_TUNNEL_KEY_PAD,
+       TCA_TUNNEL_KEY_ENC_DST_PORT,    /* be16 */
        __TCA_TUNNEL_KEY_MAX,
 };
 
index 73ac0db487f87cfe3d205c57856ad2d9d5dd33cd..c53de2691cecec43f4dbde55dd69da3c07da719f 100644 (file)
@@ -214,6 +214,18 @@ struct tcp_info {
        __u32   tcpi_data_segs_out;     /* RFC4898 tcpEStatsDataSegsOut */
 
        __u64   tcpi_delivery_rate;
+
+       __u64   tcpi_busy_time;      /* Time (usec) busy sending data */
+       __u64   tcpi_rwnd_limited;   /* Time (usec) limited by receive window */
+       __u64   tcpi_sndbuf_limited; /* Time (usec) limited by send buffer */
+};
+
+/* netlink attributes types for SCM_TIMESTAMPING_OPT_STATS */
+enum {
+       TCP_NLA_PAD,
+       TCP_NLA_BUSY,           /* Time (usec) busy sending data */
+       TCP_NLA_RWND_LIMITED,   /* Time (usec) limited by receive window */
+       TCP_NLA_SNDBUF_LIMITED, /* Time (usec) limited by send buffer */
 };
 
 /* for TCP_MD5SIG socket option */
diff --git a/include/uapi/rdma/qedr-abi.h b/include/uapi/rdma/qedr-abi.h
new file mode 100644 (file)
index 0000000..75c270d
--- /dev/null
@@ -0,0 +1,106 @@
+/* QLogic qedr NIC Driver
+ * Copyright (c) 2015-2016  QLogic Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and /or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __QEDR_USER_H__
+#define __QEDR_USER_H__
+
+#include <linux/types.h>
+
+#define QEDR_ABI_VERSION               (8)
+
+/* user kernel communication data structures. */
+
+struct qedr_alloc_ucontext_resp {
+       __u64 db_pa;
+       __u32 db_size;
+
+       __u32 max_send_wr;
+       __u32 max_recv_wr;
+       __u32 max_srq_wr;
+       __u32 sges_per_send_wr;
+       __u32 sges_per_recv_wr;
+       __u32 sges_per_srq_wr;
+       __u32 max_cqes;
+};
+
+struct qedr_alloc_pd_ureq {
+       __u64 rsvd1;
+};
+
+struct qedr_alloc_pd_uresp {
+       __u32 pd_id;
+};
+
+struct qedr_create_cq_ureq {
+       __u64 addr;
+       __u64 len;
+};
+
+struct qedr_create_cq_uresp {
+       __u32 db_offset;
+       __u16 icid;
+};
+
+struct qedr_create_qp_ureq {
+       __u32 qp_handle_hi;
+       __u32 qp_handle_lo;
+
+       /* SQ */
+       /* user space virtual address of SQ buffer */
+       __u64 sq_addr;
+
+       /* length of SQ buffer */
+       __u64 sq_len;
+
+       /* RQ */
+       /* user space virtual address of RQ buffer */
+       __u64 rq_addr;
+
+       /* length of RQ buffer */
+       __u64 rq_len;
+};
+
+struct qedr_create_qp_uresp {
+       __u32 qp_id;
+       __u32 atomic_supported;
+
+       /* SQ */
+       __u32 sq_db_offset;
+       __u16 sq_icid;
+
+       /* RQ */
+       __u32 rq_db_offset;
+       __u16 rq_icid;
+
+       __u32 rq_db2_offset;
+};
+
+#endif /* __QEDR_USER_H__ */
index 33d00a4ce6567f03f1cb52b08a5e72bda04efbaf..819d895edfdca7f9b146f187b0636410eb5bde7f 100644 (file)
 #include <linux/types.h>
 #include <sound/asound.h>
 
-#ifndef __KERNEL__
-#error This API is an early revision and not enabled in the current
-#error kernel release, it will be enabled in a future kernel version
-#error with incompatible changes to what is here.
-#endif
-
 /*
  * Maximum number of channels topology kcontrol can represent.
  */
index 34407f15e6d34da57be238f69441f1dad9e60764..5001a576b1d799d63e2316cfaf7d2f57521714d9 100644 (file)
@@ -1154,6 +1154,18 @@ config CGROUP_PERF
 
          Say N if unsure.
 
+config CGROUP_BPF
+       bool "Support for eBPF programs attached to cgroups"
+       depends on BPF_SYSCALL && SOCK_CGROUP_DATA
+       help
+         Allow attaching eBPF programs to a cgroup using the bpf(2)
+         syscall command BPF_PROG_ATTACH.
+
+         In which context these programs are accessed depends on the type
+         of attachment. For instance, programs that are attached using
+         BPF_CGROUP_INET_INGRESS will be executed on the ingress path of
+         inet sockets.
+
 config CGROUP_DEBUG
        bool "Example controller"
        default n
@@ -1945,6 +1957,7 @@ config MODULE_FORCE_UNLOAD
 
 config MODVERSIONS
        bool "Module versioning support"
+       depends on BROKEN
        help
          Usually, you have to use modules compiled with your kernel.
          Saying Y here makes it sometimes possible to use modules
index 7bc47ee31c369d442676edba32233fbb2ddbb462..c4fb45525d08802e721fa8c2c8d67dbb2b296ecf 100644 (file)
@@ -2,6 +2,8 @@
 # Makefile for the linux kernel.
 #
 
+ccflags-y := -fno-function-sections -fno-data-sections
+
 obj-y                          := main.o version.o mounts.o
 ifneq ($(CONFIG_BLK_DEV_INITRD),y)
 obj-y                          += noinitramfs.o
index 8a09b32e07d6c33351993c4c6beb19f5782d6c94..dd4104c9aa12c6a517ff7066d52ab6057d26e586 100644 (file)
@@ -272,7 +272,7 @@ int __init rd_load_image(char *from)
                sys_write(out_fd, buf, BLOCK_SIZE);
 #if !defined(CONFIG_S390)
                if (!(i % 16)) {
-                       printk("%c\b", rotator[rotate & 0x3]);
+                       pr_cont("%c\b", rotator[rotate & 0x3]);
                        rotate++;
                }
 #endif
index a8a58e2794a50469d465fad1a6939a4b68ec1114..2858be732f6d25dd8431cd994645c8c6af3828c2 100644 (file)
@@ -789,6 +789,7 @@ int __init_or_module do_one_initcall(initcall_t fn)
        }
        WARN(msgbuf[0], "initcall %pF returned with %s\n", fn, msgbuf);
 
+       add_latent_entropy();
        return ret;
 }
 
index a521999de4f103954e40db932e8903f3ed7e48f1..bf74eaa5c39f208ea5f1ee41db38e57349512e7d 100644 (file)
@@ -53,7 +53,7 @@ static struct msg_msg *alloc_msg(size_t len)
        size_t alen;
 
        alen = min(len, DATALEN_MSG);
-       msg = kmalloc(sizeof(*msg) + alen, GFP_KERNEL);
+       msg = kmalloc(sizeof(*msg) + alen, GFP_KERNEL_ACCOUNT);
        if (msg == NULL)
                return NULL;
 
@@ -65,7 +65,7 @@ static struct msg_msg *alloc_msg(size_t len)
        while (len > 0) {
                struct msg_msgseg *seg;
                alen = min(len, DATALEN_SEG);
-               seg = kmalloc(sizeof(*seg) + alen, GFP_KERNEL);
+               seg = kmalloc(sizeof(*seg) + alen, GFP_KERNEL_ACCOUNT);
                if (seg == NULL)
                        goto out_err;
                *pseg = seg;
index f1ca11613379fa2950aa7e33e732d85462f1fb57..67b9fbd871be8ec0d9e557f654fe7463a29f82b5 100644 (file)
@@ -126,7 +126,7 @@ static atomic_t    audit_lost = ATOMIC_INIT(0);
 
 /* The netlink socket. */
 static struct sock *audit_sock;
-static int audit_net_id;
+static unsigned int audit_net_id;
 
 /* Hash for inode-based rules */
 struct list_head audit_inode_hash[AUDIT_INODE_BUCKETS];
@@ -1172,9 +1172,8 @@ static void __net_exit audit_net_exit(struct net *net)
                audit_sock = NULL;
        }
 
-       RCU_INIT_POINTER(aunet->nlsk, NULL);
-       synchronize_net();
        netlink_kernel_release(sock);
+       aunet->nlsk = NULL;
 }
 
 static struct pernet_operations audit_net_ops __net_initdata = {
index eed911d091dacebc429393e80120721fc6a8f110..1276474ac3cd9ddf2a87f37312edc661d2ec335c 100644 (file)
@@ -1,7 +1,8 @@
 obj-y := core.o
 
 obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o
-obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o
+obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o
 ifeq ($(CONFIG_PERF_EVENTS),y)
 obj-$(CONFIG_BPF_SYSCALL) += stackmap.o
 endif
+obj-$(CONFIG_CGROUP_BPF) += cgroup.o
diff --git a/kernel/bpf/bpf_lru_list.c b/kernel/bpf/bpf_lru_list.c
new file mode 100644 (file)
index 0000000..89b7ef4
--- /dev/null
@@ -0,0 +1,695 @@
+/* Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/cpumask.h>
+#include <linux/spinlock.h>
+#include <linux/percpu.h>
+
+#include "bpf_lru_list.h"
+
+#define LOCAL_FREE_TARGET              (128)
+#define LOCAL_NR_SCANS                 LOCAL_FREE_TARGET
+
+#define PERCPU_FREE_TARGET             (16)
+#define PERCPU_NR_SCANS                        PERCPU_FREE_TARGET
+
+/* Helpers to get the local list index */
+#define LOCAL_LIST_IDX(t)      ((t) - BPF_LOCAL_LIST_T_OFFSET)
+#define LOCAL_FREE_LIST_IDX    LOCAL_LIST_IDX(BPF_LRU_LOCAL_LIST_T_FREE)
+#define LOCAL_PENDING_LIST_IDX LOCAL_LIST_IDX(BPF_LRU_LOCAL_LIST_T_PENDING)
+#define IS_LOCAL_LIST_TYPE(t)  ((t) >= BPF_LOCAL_LIST_T_OFFSET)
+
+static int get_next_cpu(int cpu)
+{
+       cpu = cpumask_next(cpu, cpu_possible_mask);
+       if (cpu >= nr_cpu_ids)
+               cpu = cpumask_first(cpu_possible_mask);
+       return cpu;
+}
+
+/* Local list helpers */
+static struct list_head *local_free_list(struct bpf_lru_locallist *loc_l)
+{
+       return &loc_l->lists[LOCAL_FREE_LIST_IDX];
+}
+
+static struct list_head *local_pending_list(struct bpf_lru_locallist *loc_l)
+{
+       return &loc_l->lists[LOCAL_PENDING_LIST_IDX];
+}
+
+/* bpf_lru_node helpers */
+static bool bpf_lru_node_is_ref(const struct bpf_lru_node *node)
+{
+       return node->ref;
+}
+
+static void bpf_lru_list_count_inc(struct bpf_lru_list *l,
+                                  enum bpf_lru_list_type type)
+{
+       if (type < NR_BPF_LRU_LIST_COUNT)
+               l->counts[type]++;
+}
+
+static void bpf_lru_list_count_dec(struct bpf_lru_list *l,
+                                  enum bpf_lru_list_type type)
+{
+       if (type < NR_BPF_LRU_LIST_COUNT)
+               l->counts[type]--;
+}
+
+static void __bpf_lru_node_move_to_free(struct bpf_lru_list *l,
+                                       struct bpf_lru_node *node,
+                                       struct list_head *free_list,
+                                       enum bpf_lru_list_type tgt_free_type)
+{
+       if (WARN_ON_ONCE(IS_LOCAL_LIST_TYPE(node->type)))
+               return;
+
+       /* If the removing node is the next_inactive_rotation candidate,
+        * move the next_inactive_rotation pointer also.
+        */
+       if (&node->list == l->next_inactive_rotation)
+               l->next_inactive_rotation = l->next_inactive_rotation->prev;
+
+       bpf_lru_list_count_dec(l, node->type);
+
+       node->type = tgt_free_type;
+       list_move(&node->list, free_list);
+}
+
+/* Move nodes from local list to the LRU list */
+static void __bpf_lru_node_move_in(struct bpf_lru_list *l,
+                                  struct bpf_lru_node *node,
+                                  enum bpf_lru_list_type tgt_type)
+{
+       if (WARN_ON_ONCE(!IS_LOCAL_LIST_TYPE(node->type)) ||
+           WARN_ON_ONCE(IS_LOCAL_LIST_TYPE(tgt_type)))
+               return;
+
+       bpf_lru_list_count_inc(l, tgt_type);
+       node->type = tgt_type;
+       node->ref = 0;
+       list_move(&node->list, &l->lists[tgt_type]);
+}
+
+/* Move nodes between or within active and inactive list (like
+ * active to inactive, inactive to active or tail of active back to
+ * the head of active).
+ */
+static void __bpf_lru_node_move(struct bpf_lru_list *l,
+                               struct bpf_lru_node *node,
+                               enum bpf_lru_list_type tgt_type)
+{
+       if (WARN_ON_ONCE(IS_LOCAL_LIST_TYPE(node->type)) ||
+           WARN_ON_ONCE(IS_LOCAL_LIST_TYPE(tgt_type)))
+               return;
+
+       if (node->type != tgt_type) {
+               bpf_lru_list_count_dec(l, node->type);
+               bpf_lru_list_count_inc(l, tgt_type);
+               node->type = tgt_type;
+       }
+       node->ref = 0;
+
+       /* If the moving node is the next_inactive_rotation candidate,
+        * move the next_inactive_rotation pointer also.
+        */
+       if (&node->list == l->next_inactive_rotation)
+               l->next_inactive_rotation = l->next_inactive_rotation->prev;
+
+       list_move(&node->list, &l->lists[tgt_type]);
+}
+
+static bool bpf_lru_list_inactive_low(const struct bpf_lru_list *l)
+{
+       return l->counts[BPF_LRU_LIST_T_INACTIVE] <
+               l->counts[BPF_LRU_LIST_T_ACTIVE];
+}
+
+/* Rotate the active list:
+ * 1. Start from tail
+ * 2. If the node has the ref bit set, it will be rotated
+ *    back to the head of active list with the ref bit cleared.
+ *    Give this node one more chance to survive in the active list.
+ * 3. If the ref bit is not set, move it to the head of the
+ *    inactive list.
+ * 4. It will at most scan nr_scans nodes
+ */
+static void __bpf_lru_list_rotate_active(struct bpf_lru *lru,
+                                        struct bpf_lru_list *l)
+{
+       struct list_head *active = &l->lists[BPF_LRU_LIST_T_ACTIVE];
+       struct bpf_lru_node *node, *tmp_node, *first_node;
+       unsigned int i = 0;
+
+       first_node = list_first_entry(active, struct bpf_lru_node, list);
+       list_for_each_entry_safe_reverse(node, tmp_node, active, list) {
+               if (bpf_lru_node_is_ref(node))
+                       __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_ACTIVE);
+               else
+                       __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_INACTIVE);
+
+               if (++i == lru->nr_scans || node == first_node)
+                       break;
+       }
+}
+
+/* Rotate the inactive list.  It starts from the next_inactive_rotation
+ * 1. If the node has ref bit set, it will be moved to the head
+ *    of active list with the ref bit cleared.
+ * 2. If the node does not have ref bit set, it will leave it
+ *    at its current location (i.e. do nothing) so that it can
+ *    be considered during the next inactive_shrink.
+ * 3. It will at most scan nr_scans nodes
+ */
+static void __bpf_lru_list_rotate_inactive(struct bpf_lru *lru,
+                                          struct bpf_lru_list *l)
+{
+       struct list_head *inactive = &l->lists[BPF_LRU_LIST_T_INACTIVE];
+       struct list_head *cur, *last, *next = inactive;
+       struct bpf_lru_node *node;
+       unsigned int i = 0;
+
+       if (list_empty(inactive))
+               return;
+
+       last = l->next_inactive_rotation->next;
+       if (last == inactive)
+               last = last->next;
+
+       cur = l->next_inactive_rotation;
+       while (i < lru->nr_scans) {
+               if (cur == inactive) {
+                       cur = cur->prev;
+                       continue;
+               }
+
+               node = list_entry(cur, struct bpf_lru_node, list);
+               next = cur->prev;
+               if (bpf_lru_node_is_ref(node))
+                       __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_ACTIVE);
+               if (cur == last)
+                       break;
+               cur = next;
+               i++;
+       }
+
+       l->next_inactive_rotation = next;
+}
+
+/* Shrink the inactive list.  It starts from the tail of the
+ * inactive list and only move the nodes without the ref bit
+ * set to the designated free list.
+ */
+static unsigned int
+__bpf_lru_list_shrink_inactive(struct bpf_lru *lru,
+                              struct bpf_lru_list *l,
+                              unsigned int tgt_nshrink,
+                              struct list_head *free_list,
+                              enum bpf_lru_list_type tgt_free_type)
+{
+       struct list_head *inactive = &l->lists[BPF_LRU_LIST_T_INACTIVE];
+       struct bpf_lru_node *node, *tmp_node, *first_node;
+       unsigned int nshrinked = 0;
+       unsigned int i = 0;
+
+       first_node = list_first_entry(inactive, struct bpf_lru_node, list);
+       list_for_each_entry_safe_reverse(node, tmp_node, inactive, list) {
+               if (bpf_lru_node_is_ref(node)) {
+                       __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_ACTIVE);
+               } else if (lru->del_from_htab(lru->del_arg, node)) {
+                       __bpf_lru_node_move_to_free(l, node, free_list,
+                                                   tgt_free_type);
+                       if (++nshrinked == tgt_nshrink)
+                               break;
+               }
+
+               if (++i == lru->nr_scans)
+                       break;
+       }
+
+       return nshrinked;
+}
+
+/* 1. Rotate the active list (if needed)
+ * 2. Always rotate the inactive list
+ */
+static void __bpf_lru_list_rotate(struct bpf_lru *lru, struct bpf_lru_list *l)
+{
+       if (bpf_lru_list_inactive_low(l))
+               __bpf_lru_list_rotate_active(lru, l);
+
+       __bpf_lru_list_rotate_inactive(lru, l);
+}
+
+/* Calls __bpf_lru_list_shrink_inactive() to shrink some
+ * ref-bit-cleared nodes and move them to the designated
+ * free list.
+ *
+ * If it cannot get a free node after calling
+ * __bpf_lru_list_shrink_inactive().  It will just remove
+ * one node from either inactive or active list without
+ * honoring the ref-bit.  It prefers inactive list to active
+ * list in this situation.
+ */
+static unsigned int __bpf_lru_list_shrink(struct bpf_lru *lru,
+                                         struct bpf_lru_list *l,
+                                         unsigned int tgt_nshrink,
+                                         struct list_head *free_list,
+                                         enum bpf_lru_list_type tgt_free_type)
+
+{
+       struct bpf_lru_node *node, *tmp_node;
+       struct list_head *force_shrink_list;
+       unsigned int nshrinked;
+
+       nshrinked = __bpf_lru_list_shrink_inactive(lru, l, tgt_nshrink,
+                                                  free_list, tgt_free_type);
+       if (nshrinked)
+               return nshrinked;
+
+       /* Do a force shrink by ignoring the reference bit */
+       if (!list_empty(&l->lists[BPF_LRU_LIST_T_INACTIVE]))
+               force_shrink_list = &l->lists[BPF_LRU_LIST_T_INACTIVE];
+       else
+               force_shrink_list = &l->lists[BPF_LRU_LIST_T_ACTIVE];
+
+       list_for_each_entry_safe_reverse(node, tmp_node, force_shrink_list,
+                                        list) {
+               if (lru->del_from_htab(lru->del_arg, node)) {
+                       __bpf_lru_node_move_to_free(l, node, free_list,
+                                                   tgt_free_type);
+                       return 1;
+               }
+       }
+
+       return 0;
+}
+
+/* Flush the nodes from the local pending list to the LRU list */
+static void __local_list_flush(struct bpf_lru_list *l,
+                              struct bpf_lru_locallist *loc_l)
+{
+       struct bpf_lru_node *node, *tmp_node;
+
+       list_for_each_entry_safe_reverse(node, tmp_node,
+                                        local_pending_list(loc_l), list) {
+               if (bpf_lru_node_is_ref(node))
+                       __bpf_lru_node_move_in(l, node, BPF_LRU_LIST_T_ACTIVE);
+               else
+                       __bpf_lru_node_move_in(l, node,
+                                              BPF_LRU_LIST_T_INACTIVE);
+       }
+}
+
+static void bpf_lru_list_push_free(struct bpf_lru_list *l,
+                                  struct bpf_lru_node *node)
+{
+       unsigned long flags;
+
+       if (WARN_ON_ONCE(IS_LOCAL_LIST_TYPE(node->type)))
+               return;
+
+       raw_spin_lock_irqsave(&l->lock, flags);
+       __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_FREE);
+       raw_spin_unlock_irqrestore(&l->lock, flags);
+}
+
+static void bpf_lru_list_pop_free_to_local(struct bpf_lru *lru,
+                                          struct bpf_lru_locallist *loc_l)
+{
+       struct bpf_lru_list *l = &lru->common_lru.lru_list;
+       struct bpf_lru_node *node, *tmp_node;
+       unsigned int nfree = 0;
+
+       raw_spin_lock(&l->lock);
+
+       __local_list_flush(l, loc_l);
+
+       __bpf_lru_list_rotate(lru, l);
+
+       list_for_each_entry_safe(node, tmp_node, &l->lists[BPF_LRU_LIST_T_FREE],
+                                list) {
+               __bpf_lru_node_move_to_free(l, node, local_free_list(loc_l),
+                                           BPF_LRU_LOCAL_LIST_T_FREE);
+               if (++nfree == LOCAL_FREE_TARGET)
+                       break;
+       }
+
+       if (nfree < LOCAL_FREE_TARGET)
+               __bpf_lru_list_shrink(lru, l, LOCAL_FREE_TARGET - nfree,
+                                     local_free_list(loc_l),
+                                     BPF_LRU_LOCAL_LIST_T_FREE);
+
+       raw_spin_unlock(&l->lock);
+}
+
+static void __local_list_add_pending(struct bpf_lru *lru,
+                                    struct bpf_lru_locallist *loc_l,
+                                    int cpu,
+                                    struct bpf_lru_node *node,
+                                    u32 hash)
+{
+       *(u32 *)((void *)node + lru->hash_offset) = hash;
+       node->cpu = cpu;
+       node->type = BPF_LRU_LOCAL_LIST_T_PENDING;
+       node->ref = 0;
+       list_add(&node->list, local_pending_list(loc_l));
+}
+
+struct bpf_lru_node *__local_list_pop_free(struct bpf_lru_locallist *loc_l)
+{
+       struct bpf_lru_node *node;
+
+       node = list_first_entry_or_null(local_free_list(loc_l),
+                                       struct bpf_lru_node,
+                                       list);
+       if (node)
+               list_del(&node->list);
+
+       return node;
+}
+
+struct bpf_lru_node *__local_list_pop_pending(struct bpf_lru *lru,
+                                             struct bpf_lru_locallist *loc_l)
+{
+       struct bpf_lru_node *node;
+       bool force = false;
+
+ignore_ref:
+       /* Get from the tail (i.e. older element) of the pending list. */
+       list_for_each_entry_reverse(node, local_pending_list(loc_l),
+                                   list) {
+               if ((!bpf_lru_node_is_ref(node) || force) &&
+                   lru->del_from_htab(lru->del_arg, node)) {
+                       list_del(&node->list);
+                       return node;
+               }
+       }
+
+       if (!force) {
+               force = true;
+               goto ignore_ref;
+       }
+
+       return NULL;
+}
+
+static struct bpf_lru_node *bpf_percpu_lru_pop_free(struct bpf_lru *lru,
+                                                   u32 hash)
+{
+       struct list_head *free_list;
+       struct bpf_lru_node *node = NULL;
+       struct bpf_lru_list *l;
+       unsigned long flags;
+       int cpu = raw_smp_processor_id();
+
+       l = per_cpu_ptr(lru->percpu_lru, cpu);
+
+       raw_spin_lock_irqsave(&l->lock, flags);
+
+       __bpf_lru_list_rotate(lru, l);
+
+       free_list = &l->lists[BPF_LRU_LIST_T_FREE];
+       if (list_empty(free_list))
+               __bpf_lru_list_shrink(lru, l, PERCPU_FREE_TARGET, free_list,
+                                     BPF_LRU_LIST_T_FREE);
+
+       if (!list_empty(free_list)) {
+               node = list_first_entry(free_list, struct bpf_lru_node, list);
+               *(u32 *)((void *)node + lru->hash_offset) = hash;
+               node->ref = 0;
+               __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_INACTIVE);
+       }
+
+       raw_spin_unlock_irqrestore(&l->lock, flags);
+
+       return node;
+}
+
+static struct bpf_lru_node *bpf_common_lru_pop_free(struct bpf_lru *lru,
+                                                   u32 hash)
+{
+       struct bpf_lru_locallist *loc_l, *steal_loc_l;
+       struct bpf_common_lru *clru = &lru->common_lru;
+       struct bpf_lru_node *node;
+       int steal, first_steal;
+       unsigned long flags;
+       int cpu = raw_smp_processor_id();
+
+       loc_l = per_cpu_ptr(clru->local_list, cpu);
+
+       raw_spin_lock_irqsave(&loc_l->lock, flags);
+
+       node = __local_list_pop_free(loc_l);
+       if (!node) {
+               bpf_lru_list_pop_free_to_local(lru, loc_l);
+               node = __local_list_pop_free(loc_l);
+       }
+
+       if (node)
+               __local_list_add_pending(lru, loc_l, cpu, node, hash);
+
+       raw_spin_unlock_irqrestore(&loc_l->lock, flags);
+
+       if (node)
+               return node;
+
+       /* No free nodes found from the local free list and
+        * the global LRU list.
+        *
+        * Steal from the local free/pending list of the
+        * current CPU and remote CPU in RR.  It starts
+        * with the loc_l->next_steal CPU.
+        */
+
+       first_steal = loc_l->next_steal;
+       steal = first_steal;
+       do {
+               steal_loc_l = per_cpu_ptr(clru->local_list, steal);
+
+               raw_spin_lock_irqsave(&steal_loc_l->lock, flags);
+
+               node = __local_list_pop_free(steal_loc_l);
+               if (!node)
+                       node = __local_list_pop_pending(lru, steal_loc_l);
+
+               raw_spin_unlock_irqrestore(&steal_loc_l->lock, flags);
+
+               steal = get_next_cpu(steal);
+       } while (!node && steal != first_steal);
+
+       loc_l->next_steal = steal;
+
+       if (node) {
+               raw_spin_lock_irqsave(&loc_l->lock, flags);
+               __local_list_add_pending(lru, loc_l, cpu, node, hash);
+               raw_spin_unlock_irqrestore(&loc_l->lock, flags);
+       }
+
+       return node;
+}
+
+struct bpf_lru_node *bpf_lru_pop_free(struct bpf_lru *lru, u32 hash)
+{
+       if (lru->percpu)
+               return bpf_percpu_lru_pop_free(lru, hash);
+       else
+               return bpf_common_lru_pop_free(lru, hash);
+}
+
+static void bpf_common_lru_push_free(struct bpf_lru *lru,
+                                    struct bpf_lru_node *node)
+{
+       unsigned long flags;
+
+       if (WARN_ON_ONCE(node->type == BPF_LRU_LIST_T_FREE) ||
+           WARN_ON_ONCE(node->type == BPF_LRU_LOCAL_LIST_T_FREE))
+               return;
+
+       if (node->type == BPF_LRU_LOCAL_LIST_T_PENDING) {
+               struct bpf_lru_locallist *loc_l;
+
+               loc_l = per_cpu_ptr(lru->common_lru.local_list, node->cpu);
+
+               raw_spin_lock_irqsave(&loc_l->lock, flags);
+
+               if (unlikely(node->type != BPF_LRU_LOCAL_LIST_T_PENDING)) {
+                       raw_spin_unlock_irqrestore(&loc_l->lock, flags);
+                       goto check_lru_list;
+               }
+
+               node->type = BPF_LRU_LOCAL_LIST_T_FREE;
+               node->ref = 0;
+               list_move(&node->list, local_free_list(loc_l));
+
+               raw_spin_unlock_irqrestore(&loc_l->lock, flags);
+               return;
+       }
+
+check_lru_list:
+       bpf_lru_list_push_free(&lru->common_lru.lru_list, node);
+}
+
+static void bpf_percpu_lru_push_free(struct bpf_lru *lru,
+                                    struct bpf_lru_node *node)
+{
+       struct bpf_lru_list *l;
+       unsigned long flags;
+
+       l = per_cpu_ptr(lru->percpu_lru, node->cpu);
+
+       raw_spin_lock_irqsave(&l->lock, flags);
+
+       __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_FREE);
+
+       raw_spin_unlock_irqrestore(&l->lock, flags);
+}
+
+void bpf_lru_push_free(struct bpf_lru *lru, struct bpf_lru_node *node)
+{
+       if (lru->percpu)
+               bpf_percpu_lru_push_free(lru, node);
+       else
+               bpf_common_lru_push_free(lru, node);
+}
+
+void bpf_common_lru_populate(struct bpf_lru *lru, void *buf, u32 node_offset,
+                            u32 elem_size, u32 nr_elems)
+{
+       struct bpf_lru_list *l = &lru->common_lru.lru_list;
+       u32 i;
+
+       for (i = 0; i < nr_elems; i++) {
+               struct bpf_lru_node *node;
+
+               node = (struct bpf_lru_node *)(buf + node_offset);
+               node->type = BPF_LRU_LIST_T_FREE;
+               node->ref = 0;
+               list_add(&node->list, &l->lists[BPF_LRU_LIST_T_FREE]);
+               buf += elem_size;
+       }
+}
+
+void bpf_percpu_lru_populate(struct bpf_lru *lru, void *buf, u32 node_offset,
+                            u32 elem_size, u32 nr_elems)
+{
+       u32 i, pcpu_entries;
+       int cpu;
+       struct bpf_lru_list *l;
+
+       pcpu_entries = nr_elems / num_possible_cpus();
+
+       i = 0;
+
+       for_each_possible_cpu(cpu) {
+               struct bpf_lru_node *node;
+
+               l = per_cpu_ptr(lru->percpu_lru, cpu);
+again:
+               node = (struct bpf_lru_node *)(buf + node_offset);
+               node->cpu = cpu;
+               node->type = BPF_LRU_LIST_T_FREE;
+               node->ref = 0;
+               list_add(&node->list, &l->lists[BPF_LRU_LIST_T_FREE]);
+               i++;
+               buf += elem_size;
+               if (i == nr_elems)
+                       break;
+               if (i % pcpu_entries)
+                       goto again;
+       }
+}
+
+void bpf_lru_populate(struct bpf_lru *lru, void *buf, u32 node_offset,
+                     u32 elem_size, u32 nr_elems)
+{
+       if (lru->percpu)
+               bpf_percpu_lru_populate(lru, buf, node_offset, elem_size,
+                                       nr_elems);
+       else
+               bpf_common_lru_populate(lru, buf, node_offset, elem_size,
+                                       nr_elems);
+}
+
+static void bpf_lru_locallist_init(struct bpf_lru_locallist *loc_l, int cpu)
+{
+       int i;
+
+       for (i = 0; i < NR_BPF_LRU_LOCAL_LIST_T; i++)
+               INIT_LIST_HEAD(&loc_l->lists[i]);
+
+       loc_l->next_steal = cpu;
+
+       raw_spin_lock_init(&loc_l->lock);
+}
+
+static void bpf_lru_list_init(struct bpf_lru_list *l)
+{
+       int i;
+
+       for (i = 0; i < NR_BPF_LRU_LIST_T; i++)
+               INIT_LIST_HEAD(&l->lists[i]);
+
+       for (i = 0; i < NR_BPF_LRU_LIST_COUNT; i++)
+               l->counts[i] = 0;
+
+       l->next_inactive_rotation = &l->lists[BPF_LRU_LIST_T_INACTIVE];
+
+       raw_spin_lock_init(&l->lock);
+}
+
+int bpf_lru_init(struct bpf_lru *lru, bool percpu, u32 hash_offset,
+                del_from_htab_func del_from_htab, void *del_arg)
+{
+       int cpu;
+
+       if (percpu) {
+               lru->percpu_lru = alloc_percpu(struct bpf_lru_list);
+               if (!lru->percpu_lru)
+                       return -ENOMEM;
+
+               for_each_possible_cpu(cpu) {
+                       struct bpf_lru_list *l;
+
+                       l = per_cpu_ptr(lru->percpu_lru, cpu);
+                       bpf_lru_list_init(l);
+               }
+               lru->nr_scans = PERCPU_NR_SCANS;
+       } else {
+               struct bpf_common_lru *clru = &lru->common_lru;
+
+               clru->local_list = alloc_percpu(struct bpf_lru_locallist);
+               if (!clru->local_list)
+                       return -ENOMEM;
+
+               for_each_possible_cpu(cpu) {
+                       struct bpf_lru_locallist *loc_l;
+
+                       loc_l = per_cpu_ptr(clru->local_list, cpu);
+                       bpf_lru_locallist_init(loc_l, cpu);
+               }
+
+               bpf_lru_list_init(&clru->lru_list);
+               lru->nr_scans = LOCAL_NR_SCANS;
+       }
+
+       lru->percpu = percpu;
+       lru->del_from_htab = del_from_htab;
+       lru->del_arg = del_arg;
+       lru->hash_offset = hash_offset;
+
+       return 0;
+}
+
+void bpf_lru_destroy(struct bpf_lru *lru)
+{
+       if (lru->percpu)
+               free_percpu(lru->percpu_lru);
+       else
+               free_percpu(lru->common_lru.local_list);
+}
diff --git a/kernel/bpf/bpf_lru_list.h b/kernel/bpf/bpf_lru_list.h
new file mode 100644 (file)
index 0000000..5c35a98
--- /dev/null
@@ -0,0 +1,84 @@
+/* Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#ifndef __BPF_LRU_LIST_H_
+#define __BPF_LRU_LIST_H_
+
+#include <linux/list.h>
+#include <linux/spinlock_types.h>
+
+#define NR_BPF_LRU_LIST_T      (3)
+#define NR_BPF_LRU_LIST_COUNT  (2)
+#define NR_BPF_LRU_LOCAL_LIST_T (2)
+#define BPF_LOCAL_LIST_T_OFFSET NR_BPF_LRU_LIST_T
+
+enum bpf_lru_list_type {
+       BPF_LRU_LIST_T_ACTIVE,
+       BPF_LRU_LIST_T_INACTIVE,
+       BPF_LRU_LIST_T_FREE,
+       BPF_LRU_LOCAL_LIST_T_FREE,
+       BPF_LRU_LOCAL_LIST_T_PENDING,
+};
+
+struct bpf_lru_node {
+       struct list_head list;
+       u16 cpu;
+       u8 type;
+       u8 ref;
+};
+
+struct bpf_lru_list {
+       struct list_head lists[NR_BPF_LRU_LIST_T];
+       unsigned int counts[NR_BPF_LRU_LIST_COUNT];
+       /* The next inacitve list rotation starts from here */
+       struct list_head *next_inactive_rotation;
+
+       raw_spinlock_t lock ____cacheline_aligned_in_smp;
+};
+
+struct bpf_lru_locallist {
+       struct list_head lists[NR_BPF_LRU_LOCAL_LIST_T];
+       u16 next_steal;
+       raw_spinlock_t lock;
+};
+
+struct bpf_common_lru {
+       struct bpf_lru_list lru_list;
+       struct bpf_lru_locallist __percpu *local_list;
+};
+
+typedef bool (*del_from_htab_func)(void *arg, struct bpf_lru_node *node);
+
+struct bpf_lru {
+       union {
+               struct bpf_common_lru common_lru;
+               struct bpf_lru_list __percpu *percpu_lru;
+       };
+       del_from_htab_func del_from_htab;
+       void *del_arg;
+       unsigned int hash_offset;
+       unsigned int nr_scans;
+       bool percpu;
+};
+
+static inline void bpf_lru_node_set_ref(struct bpf_lru_node *node)
+{
+       /* ref is an approximation on access frequency.  It does not
+        * have to be very accurate.  Hence, no protection is used.
+        */
+       node->ref = 1;
+}
+
+int bpf_lru_init(struct bpf_lru *lru, bool percpu, u32 hash_offset,
+                del_from_htab_func del_from_htab, void *delete_arg);
+void bpf_lru_populate(struct bpf_lru *lru, void *buf, u32 node_offset,
+                     u32 elem_size, u32 nr_elems);
+void bpf_lru_destroy(struct bpf_lru *lru);
+struct bpf_lru_node *bpf_lru_pop_free(struct bpf_lru *lru, u32 hash);
+void bpf_lru_push_free(struct bpf_lru *lru, struct bpf_lru_node *node);
+void bpf_lru_promote(struct bpf_lru *lru, struct bpf_lru_node *node);
+
+#endif
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
new file mode 100644 (file)
index 0000000..a515f7b
--- /dev/null
@@ -0,0 +1,200 @@
+/*
+ * Functions to manage eBPF programs attached to cgroups
+ *
+ * Copyright (c) 2016 Daniel Mack
+ *
+ * This file is subject to the terms and conditions of version 2 of the GNU
+ * General Public License.  See the file COPYING in the main directory of the
+ * Linux distribution for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/atomic.h>
+#include <linux/cgroup.h>
+#include <linux/slab.h>
+#include <linux/bpf.h>
+#include <linux/bpf-cgroup.h>
+#include <net/sock.h>
+
+DEFINE_STATIC_KEY_FALSE(cgroup_bpf_enabled_key);
+EXPORT_SYMBOL(cgroup_bpf_enabled_key);
+
+/**
+ * cgroup_bpf_put() - put references of all bpf programs
+ * @cgrp: the cgroup to modify
+ */
+void cgroup_bpf_put(struct cgroup *cgrp)
+{
+       unsigned int type;
+
+       for (type = 0; type < ARRAY_SIZE(cgrp->bpf.prog); type++) {
+               struct bpf_prog *prog = cgrp->bpf.prog[type];
+
+               if (prog) {
+                       bpf_prog_put(prog);
+                       static_branch_dec(&cgroup_bpf_enabled_key);
+               }
+       }
+}
+
+/**
+ * cgroup_bpf_inherit() - inherit effective programs from parent
+ * @cgrp: the cgroup to modify
+ * @parent: the parent to inherit from
+ */
+void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent)
+{
+       unsigned int type;
+
+       for (type = 0; type < ARRAY_SIZE(cgrp->bpf.effective); type++) {
+               struct bpf_prog *e;
+
+               e = rcu_dereference_protected(parent->bpf.effective[type],
+                                             lockdep_is_held(&cgroup_mutex));
+               rcu_assign_pointer(cgrp->bpf.effective[type], e);
+       }
+}
+
+/**
+ * __cgroup_bpf_update() - Update the pinned program of a cgroup, and
+ *                         propagate the change to descendants
+ * @cgrp: The cgroup which descendants to traverse
+ * @parent: The parent of @cgrp, or %NULL if @cgrp is the root
+ * @prog: A new program to pin
+ * @type: Type of pinning operation (ingress/egress)
+ *
+ * Each cgroup has a set of two pointers for bpf programs; one for eBPF
+ * programs it owns, and which is effective for execution.
+ *
+ * If @prog is not %NULL, this function attaches a new program to the cgroup
+ * and releases the one that is currently attached, if any. @prog is then made
+ * the effective program of type @type in that cgroup.
+ *
+ * If @prog is %NULL, the currently attached program of type @type is released,
+ * and the effective program of the parent cgroup (if any) is inherited to
+ * @cgrp.
+ *
+ * Then, the descendants of @cgrp are walked and the effective program for
+ * each of them is set to the effective program of @cgrp unless the
+ * descendant has its own program attached, in which case the subbranch is
+ * skipped. This ensures that delegated subcgroups with own programs are left
+ * untouched.
+ *
+ * Must be called with cgroup_mutex held.
+ */
+void __cgroup_bpf_update(struct cgroup *cgrp,
+                        struct cgroup *parent,
+                        struct bpf_prog *prog,
+                        enum bpf_attach_type type)
+{
+       struct bpf_prog *old_prog, *effective;
+       struct cgroup_subsys_state *pos;
+
+       old_prog = xchg(cgrp->bpf.prog + type, prog);
+
+       effective = (!prog && parent) ?
+               rcu_dereference_protected(parent->bpf.effective[type],
+                                         lockdep_is_held(&cgroup_mutex)) :
+               prog;
+
+       css_for_each_descendant_pre(pos, &cgrp->self) {
+               struct cgroup *desc = container_of(pos, struct cgroup, self);
+
+               /* skip the subtree if the descendant has its own program */
+               if (desc->bpf.prog[type] && desc != cgrp)
+                       pos = css_rightmost_descendant(pos);
+               else
+                       rcu_assign_pointer(desc->bpf.effective[type],
+                                          effective);
+       }
+
+       if (prog)
+               static_branch_inc(&cgroup_bpf_enabled_key);
+
+       if (old_prog) {
+               bpf_prog_put(old_prog);
+               static_branch_dec(&cgroup_bpf_enabled_key);
+       }
+}
+
+/**
+ * __cgroup_bpf_run_filter_skb() - Run a program for packet filtering
+ * @sk: The socken sending or receiving traffic
+ * @skb: The skb that is being sent or received
+ * @type: The type of program to be exectuted
+ *
+ * If no socket is passed, or the socket is not of type INET or INET6,
+ * this function does nothing and returns 0.
+ *
+ * The program type passed in via @type must be suitable for network
+ * filtering. No further check is performed to assert that.
+ *
+ * This function will return %-EPERM if any if an attached program was found
+ * and if it returned != 1 during execution. In all other cases, 0 is returned.
+ */
+int __cgroup_bpf_run_filter_skb(struct sock *sk,
+                               struct sk_buff *skb,
+                               enum bpf_attach_type type)
+{
+       struct bpf_prog *prog;
+       struct cgroup *cgrp;
+       int ret = 0;
+
+       if (!sk || !sk_fullsock(sk))
+               return 0;
+
+       if (sk->sk_family != AF_INET &&
+           sk->sk_family != AF_INET6)
+               return 0;
+
+       cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
+
+       rcu_read_lock();
+
+       prog = rcu_dereference(cgrp->bpf.effective[type]);
+       if (prog) {
+               unsigned int offset = skb->data - skb_network_header(skb);
+
+               __skb_push(skb, offset);
+               ret = bpf_prog_run_save_cb(prog, skb) == 1 ? 0 : -EPERM;
+               __skb_pull(skb, offset);
+       }
+
+       rcu_read_unlock();
+
+       return ret;
+}
+EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb);
+
+/**
+ * __cgroup_bpf_run_filter_sk() - Run a program on a sock
+ * @sk: sock structure to manipulate
+ * @type: The type of program to be exectuted
+ *
+ * socket is passed is expected to be of type INET or INET6.
+ *
+ * The program type passed in via @type must be suitable for sock
+ * filtering. No further check is performed to assert that.
+ *
+ * This function will return %-EPERM if any if an attached program was found
+ * and if it returned != 1 during execution. In all other cases, 0 is returned.
+ */
+int __cgroup_bpf_run_filter_sk(struct sock *sk,
+                              enum bpf_attach_type type)
+{
+       struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
+       struct bpf_prog *prog;
+       int ret = 0;
+
+
+       rcu_read_lock();
+
+       prog = rcu_dereference(cgrp->bpf.effective[type]);
+       if (prog)
+               ret = BPF_PROG_RUN(prog, sk) == 1 ? 0 : -EPERM;
+
+       rcu_read_unlock();
+
+       return ret;
+}
+EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
index 570eeca7bdfa79ce16d18ee94fac64c40aa11d38..34debc1a9641875382b6603820f3e85c4a18dffc 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/filter.h>
 #include <linux/vmalloc.h>
 #include "percpu_freelist.h"
+#include "bpf_lru_list.h"
 
 struct bucket {
        struct hlist_head head;
@@ -25,7 +26,10 @@ struct bpf_htab {
        struct bpf_map map;
        struct bucket *buckets;
        void *elems;
-       struct pcpu_freelist freelist;
+       union {
+               struct pcpu_freelist freelist;
+               struct bpf_lru lru;
+       };
        void __percpu *extra_elems;
        atomic_t count; /* number of elements in this hashtable */
        u32 n_buckets;  /* number of hash buckets */
@@ -48,11 +52,26 @@ struct htab_elem {
        union {
                struct rcu_head rcu;
                enum extra_elem_state state;
+               struct bpf_lru_node lru_node;
        };
        u32 hash;
        char key[0] __aligned(8);
 };
 
+static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node);
+
+static bool htab_is_lru(const struct bpf_htab *htab)
+{
+       return htab->map.map_type == BPF_MAP_TYPE_LRU_HASH ||
+               htab->map.map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH;
+}
+
+static bool htab_is_percpu(const struct bpf_htab *htab)
+{
+       return htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH ||
+               htab->map.map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH;
+}
+
 static inline void htab_elem_set_ptr(struct htab_elem *l, u32 key_size,
                                     void __percpu *pptr)
 {
@@ -73,7 +92,7 @@ static void htab_free_elems(struct bpf_htab *htab)
 {
        int i;
 
-       if (htab->map.map_type != BPF_MAP_TYPE_PERCPU_HASH)
+       if (!htab_is_percpu(htab))
                goto free_elems;
 
        for (i = 0; i < htab->map.max_entries; i++) {
@@ -87,7 +106,22 @@ free_elems:
        vfree(htab->elems);
 }
 
-static int prealloc_elems_and_freelist(struct bpf_htab *htab)
+static struct htab_elem *prealloc_lru_pop(struct bpf_htab *htab, void *key,
+                                         u32 hash)
+{
+       struct bpf_lru_node *node = bpf_lru_pop_free(&htab->lru, hash);
+       struct htab_elem *l;
+
+       if (node) {
+               l = container_of(node, struct htab_elem, lru_node);
+               memcpy(l->key, key, htab->map.key_size);
+               return l;
+       }
+
+       return NULL;
+}
+
+static int prealloc_init(struct bpf_htab *htab)
 {
        int err = -ENOMEM, i;
 
@@ -95,7 +129,7 @@ static int prealloc_elems_and_freelist(struct bpf_htab *htab)
        if (!htab->elems)
                return -ENOMEM;
 
-       if (htab->map.map_type != BPF_MAP_TYPE_PERCPU_HASH)
+       if (!htab_is_percpu(htab))
                goto skip_percpu_elems;
 
        for (i = 0; i < htab->map.max_entries; i++) {
@@ -110,12 +144,27 @@ static int prealloc_elems_and_freelist(struct bpf_htab *htab)
        }
 
 skip_percpu_elems:
-       err = pcpu_freelist_init(&htab->freelist);
+       if (htab_is_lru(htab))
+               err = bpf_lru_init(&htab->lru,
+                                  htab->map.map_flags & BPF_F_NO_COMMON_LRU,
+                                  offsetof(struct htab_elem, hash) -
+                                  offsetof(struct htab_elem, lru_node),
+                                  htab_lru_map_delete_node,
+                                  htab);
+       else
+               err = pcpu_freelist_init(&htab->freelist);
+
        if (err)
                goto free_elems;
 
-       pcpu_freelist_populate(&htab->freelist, htab->elems, htab->elem_size,
-                              htab->map.max_entries);
+       if (htab_is_lru(htab))
+               bpf_lru_populate(&htab->lru, htab->elems,
+                                offsetof(struct htab_elem, lru_node),
+                                htab->elem_size, htab->map.max_entries);
+       else
+               pcpu_freelist_populate(&htab->freelist, htab->elems,
+                                      htab->elem_size, htab->map.max_entries);
+
        return 0;
 
 free_elems:
@@ -123,6 +172,16 @@ free_elems:
        return err;
 }
 
+static void prealloc_destroy(struct bpf_htab *htab)
+{
+       htab_free_elems(htab);
+
+       if (htab_is_lru(htab))
+               bpf_lru_destroy(&htab->lru);
+       else
+               pcpu_freelist_destroy(&htab->freelist);
+}
+
 static int alloc_extra_elems(struct bpf_htab *htab)
 {
        void __percpu *pptr;
@@ -143,15 +202,37 @@ static int alloc_extra_elems(struct bpf_htab *htab)
 /* Called from syscall */
 static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
 {
-       bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_HASH;
+       bool percpu = (attr->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
+                      attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH);
+       bool lru = (attr->map_type == BPF_MAP_TYPE_LRU_HASH ||
+                   attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH);
+       /* percpu_lru means each cpu has its own LRU list.
+        * it is different from BPF_MAP_TYPE_PERCPU_HASH where
+        * the map's value itself is percpu.  percpu_lru has
+        * nothing to do with the map's value.
+        */
+       bool percpu_lru = (attr->map_flags & BPF_F_NO_COMMON_LRU);
+       bool prealloc = !(attr->map_flags & BPF_F_NO_PREALLOC);
        struct bpf_htab *htab;
        int err, i;
        u64 cost;
 
-       if (attr->map_flags & ~BPF_F_NO_PREALLOC)
+       if (lru && !capable(CAP_SYS_ADMIN))
+               /* LRU implementation is much complicated than other
+                * maps.  Hence, limit to CAP_SYS_ADMIN for now.
+                */
+               return ERR_PTR(-EPERM);
+
+       if (attr->map_flags & ~(BPF_F_NO_PREALLOC | BPF_F_NO_COMMON_LRU))
                /* reserved bits should not be used */
                return ERR_PTR(-EINVAL);
 
+       if (!lru && percpu_lru)
+               return ERR_PTR(-EINVAL);
+
+       if (lru && !prealloc)
+               return ERR_PTR(-ENOTSUPP);
+
        htab = kzalloc(sizeof(*htab), GFP_USER);
        if (!htab)
                return ERR_PTR(-ENOMEM);
@@ -171,6 +252,18 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
            htab->map.value_size == 0)
                goto free_htab;
 
+       if (percpu_lru) {
+               /* ensure each CPU's lru list has >=1 elements.
+                * since we are at it, make each lru list has the same
+                * number of elements.
+                */
+               htab->map.max_entries = roundup(attr->max_entries,
+                                               num_possible_cpus());
+               if (htab->map.max_entries < attr->max_entries)
+                       htab->map.max_entries = rounddown(attr->max_entries,
+                                                         num_possible_cpus());
+       }
+
        /* hash table size must be power of 2 */
        htab->n_buckets = roundup_pow_of_two(htab->map.max_entries);
 
@@ -241,14 +334,17 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
                raw_spin_lock_init(&htab->buckets[i].lock);
        }
 
-       if (!percpu) {
+       if (!percpu && !lru) {
+               /* lru itself can remove the least used element, so
+                * there is no need for an extra elem during map_update.
+                */
                err = alloc_extra_elems(htab);
                if (err)
                        goto free_buckets;
        }
 
-       if (!(attr->map_flags & BPF_F_NO_PREALLOC)) {
-               err = prealloc_elems_and_freelist(htab);
+       if (prealloc) {
+               err = prealloc_init(htab);
                if (err)
                        goto free_extra_elems;
        }
@@ -323,6 +419,46 @@ static void *htab_map_lookup_elem(struct bpf_map *map, void *key)
        return NULL;
 }
 
+static void *htab_lru_map_lookup_elem(struct bpf_map *map, void *key)
+{
+       struct htab_elem *l = __htab_map_lookup_elem(map, key);
+
+       if (l) {
+               bpf_lru_node_set_ref(&l->lru_node);
+               return l->key + round_up(map->key_size, 8);
+       }
+
+       return NULL;
+}
+
+/* It is called from the bpf_lru_list when the LRU needs to delete
+ * older elements from the htab.
+ */
+static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node)
+{
+       struct bpf_htab *htab = (struct bpf_htab *)arg;
+       struct htab_elem *l, *tgt_l;
+       struct hlist_head *head;
+       unsigned long flags;
+       struct bucket *b;
+
+       tgt_l = container_of(node, struct htab_elem, lru_node);
+       b = __select_bucket(htab, tgt_l->hash);
+       head = &b->head;
+
+       raw_spin_lock_irqsave(&b->lock, flags);
+
+       hlist_for_each_entry_rcu(l, head, hash_node)
+               if (l == tgt_l) {
+                       hlist_del_rcu(&l->hash_node);
+                       break;
+               }
+
+       raw_spin_unlock_irqrestore(&b->lock, flags);
+
+       return l == tgt_l;
+}
+
 /* Called from syscall */
 static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
 {
@@ -420,6 +556,24 @@ static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
        }
 }
 
+static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr,
+                           void *value, bool onallcpus)
+{
+       if (!onallcpus) {
+               /* copy true value_size bytes */
+               memcpy(this_cpu_ptr(pptr), value, htab->map.value_size);
+       } else {
+               u32 size = round_up(htab->map.value_size, 8);
+               int off = 0, cpu;
+
+               for_each_possible_cpu(cpu) {
+                       bpf_long_memcpy(per_cpu_ptr(pptr, cpu),
+                                       value + off, size);
+                       off += size;
+               }
+       }
+}
+
 static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
                                         void *value, u32 key_size, u32 hash,
                                         bool percpu, bool onallcpus,
@@ -479,18 +633,8 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
                        }
                }
 
-               if (!onallcpus) {
-                       /* copy true value_size bytes */
-                       memcpy(this_cpu_ptr(pptr), value, htab->map.value_size);
-               } else {
-                       int off = 0, cpu;
+               pcpu_copy_value(htab, pptr, value, onallcpus);
 
-                       for_each_possible_cpu(cpu) {
-                               bpf_long_memcpy(per_cpu_ptr(pptr, cpu),
-                                               value + off, size);
-                               off += size;
-                       }
-               }
                if (!prealloc)
                        htab_elem_set_ptr(l_new, key_size, pptr);
        } else {
@@ -571,6 +715,70 @@ err:
        return ret;
 }
 
+static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value,
+                                   u64 map_flags)
+{
+       struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+       struct htab_elem *l_new, *l_old = NULL;
+       struct hlist_head *head;
+       unsigned long flags;
+       struct bucket *b;
+       u32 key_size, hash;
+       int ret;
+
+       if (unlikely(map_flags > BPF_EXIST))
+               /* unknown flags */
+               return -EINVAL;
+
+       WARN_ON_ONCE(!rcu_read_lock_held());
+
+       key_size = map->key_size;
+
+       hash = htab_map_hash(key, key_size);
+
+       b = __select_bucket(htab, hash);
+       head = &b->head;
+
+       /* For LRU, we need to alloc before taking bucket's
+        * spinlock because getting free nodes from LRU may need
+        * to remove older elements from htab and this removal
+        * operation will need a bucket lock.
+        */
+       l_new = prealloc_lru_pop(htab, key, hash);
+       if (!l_new)
+               return -ENOMEM;
+       memcpy(l_new->key + round_up(map->key_size, 8), value, map->value_size);
+
+       /* bpf_map_update_elem() can be called in_irq() */
+       raw_spin_lock_irqsave(&b->lock, flags);
+
+       l_old = lookup_elem_raw(head, hash, key, key_size);
+
+       ret = check_flags(htab, l_old, map_flags);
+       if (ret)
+               goto err;
+
+       /* add new element to the head of the list, so that
+        * concurrent search will find it before old elem
+        */
+       hlist_add_head_rcu(&l_new->hash_node, head);
+       if (l_old) {
+               bpf_lru_node_set_ref(&l_new->lru_node);
+               hlist_del_rcu(&l_old->hash_node);
+       }
+       ret = 0;
+
+err:
+       raw_spin_unlock_irqrestore(&b->lock, flags);
+
+       if (ret)
+               bpf_lru_push_free(&htab->lru, &l_new->lru_node);
+       else if (l_old)
+               bpf_lru_push_free(&htab->lru, &l_old->lru_node);
+
+       return ret;
+}
+
 static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
                                         void *value, u64 map_flags,
                                         bool onallcpus)
@@ -606,22 +814,9 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
                goto err;
 
        if (l_old) {
-               void __percpu *pptr = htab_elem_get_ptr(l_old, key_size);
-               u32 size = htab->map.value_size;
-
                /* per-cpu hash map can update value in-place */
-               if (!onallcpus) {
-                       memcpy(this_cpu_ptr(pptr), value, size);
-               } else {
-                       int off = 0, cpu;
-
-                       size = round_up(size, 8);
-                       for_each_possible_cpu(cpu) {
-                               bpf_long_memcpy(per_cpu_ptr(pptr, cpu),
-                                               value + off, size);
-                               off += size;
-                       }
-               }
+               pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size),
+                               value, onallcpus);
        } else {
                l_new = alloc_htab_elem(htab, key, value, key_size,
                                        hash, true, onallcpus, false);
@@ -637,12 +832,84 @@ err:
        return ret;
 }
 
+static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
+                                            void *value, u64 map_flags,
+                                            bool onallcpus)
+{
+       struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+       struct htab_elem *l_new = NULL, *l_old;
+       struct hlist_head *head;
+       unsigned long flags;
+       struct bucket *b;
+       u32 key_size, hash;
+       int ret;
+
+       if (unlikely(map_flags > BPF_EXIST))
+               /* unknown flags */
+               return -EINVAL;
+
+       WARN_ON_ONCE(!rcu_read_lock_held());
+
+       key_size = map->key_size;
+
+       hash = htab_map_hash(key, key_size);
+
+       b = __select_bucket(htab, hash);
+       head = &b->head;
+
+       /* For LRU, we need to alloc before taking bucket's
+        * spinlock because LRU's elem alloc may need
+        * to remove older elem from htab and this removal
+        * operation will need a bucket lock.
+        */
+       if (map_flags != BPF_EXIST) {
+               l_new = prealloc_lru_pop(htab, key, hash);
+               if (!l_new)
+                       return -ENOMEM;
+       }
+
+       /* bpf_map_update_elem() can be called in_irq() */
+       raw_spin_lock_irqsave(&b->lock, flags);
+
+       l_old = lookup_elem_raw(head, hash, key, key_size);
+
+       ret = check_flags(htab, l_old, map_flags);
+       if (ret)
+               goto err;
+
+       if (l_old) {
+               bpf_lru_node_set_ref(&l_old->lru_node);
+
+               /* per-cpu hash map can update value in-place */
+               pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size),
+                               value, onallcpus);
+       } else {
+               pcpu_copy_value(htab, htab_elem_get_ptr(l_new, key_size),
+                               value, onallcpus);
+               hlist_add_head_rcu(&l_new->hash_node, head);
+               l_new = NULL;
+       }
+       ret = 0;
+err:
+       raw_spin_unlock_irqrestore(&b->lock, flags);
+       if (l_new)
+               bpf_lru_push_free(&htab->lru, &l_new->lru_node);
+       return ret;
+}
+
 static int htab_percpu_map_update_elem(struct bpf_map *map, void *key,
                                       void *value, u64 map_flags)
 {
        return __htab_percpu_map_update_elem(map, key, value, map_flags, false);
 }
 
+static int htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
+                                          void *value, u64 map_flags)
+{
+       return __htab_lru_percpu_map_update_elem(map, key, value, map_flags,
+                                                false);
+}
+
 /* Called from syscall or from eBPF program */
 static int htab_map_delete_elem(struct bpf_map *map, void *key)
 {
@@ -676,6 +943,39 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key)
        return ret;
 }
 
+static int htab_lru_map_delete_elem(struct bpf_map *map, void *key)
+{
+       struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+       struct hlist_head *head;
+       struct bucket *b;
+       struct htab_elem *l;
+       unsigned long flags;
+       u32 hash, key_size;
+       int ret = -ENOENT;
+
+       WARN_ON_ONCE(!rcu_read_lock_held());
+
+       key_size = map->key_size;
+
+       hash = htab_map_hash(key, key_size);
+       b = __select_bucket(htab, hash);
+       head = &b->head;
+
+       raw_spin_lock_irqsave(&b->lock, flags);
+
+       l = lookup_elem_raw(head, hash, key, key_size);
+
+       if (l) {
+               hlist_del_rcu(&l->hash_node);
+               ret = 0;
+       }
+
+       raw_spin_unlock_irqrestore(&b->lock, flags);
+       if (l)
+               bpf_lru_push_free(&htab->lru, &l->lru_node);
+       return ret;
+}
+
 static void delete_all_elements(struct bpf_htab *htab)
 {
        int i;
@@ -687,7 +987,8 @@ static void delete_all_elements(struct bpf_htab *htab)
 
                hlist_for_each_entry_safe(l, n, head, hash_node) {
                        hlist_del_rcu(&l->hash_node);
-                       htab_elem_free(htab, l);
+                       if (l->state != HTAB_EXTRA_ELEM_USED)
+                               htab_elem_free(htab, l);
                }
        }
 }
@@ -707,12 +1008,11 @@ static void htab_map_free(struct bpf_map *map)
         * not have executed. Wait for them.
         */
        rcu_barrier();
-       if (htab->map.map_flags & BPF_F_NO_PREALLOC) {
+       if (htab->map.map_flags & BPF_F_NO_PREALLOC)
                delete_all_elements(htab);
-       } else {
-               htab_free_elems(htab);
-               pcpu_freelist_destroy(&htab->freelist);
-       }
+       else
+               prealloc_destroy(htab);
+
        free_percpu(htab->extra_elems);
        kvfree(htab->buckets);
        kfree(htab);
@@ -732,6 +1032,20 @@ static struct bpf_map_type_list htab_type __read_mostly = {
        .type = BPF_MAP_TYPE_HASH,
 };
 
+static const struct bpf_map_ops htab_lru_ops = {
+       .map_alloc = htab_map_alloc,
+       .map_free = htab_map_free,
+       .map_get_next_key = htab_map_get_next_key,
+       .map_lookup_elem = htab_lru_map_lookup_elem,
+       .map_update_elem = htab_lru_map_update_elem,
+       .map_delete_elem = htab_lru_map_delete_elem,
+};
+
+static struct bpf_map_type_list htab_lru_type __read_mostly = {
+       .ops = &htab_lru_ops,
+       .type = BPF_MAP_TYPE_LRU_HASH,
+};
+
 /* Called from eBPF program */
 static void *htab_percpu_map_lookup_elem(struct bpf_map *map, void *key)
 {
@@ -743,8 +1057,21 @@ static void *htab_percpu_map_lookup_elem(struct bpf_map *map, void *key)
                return NULL;
 }
 
+static void *htab_lru_percpu_map_lookup_elem(struct bpf_map *map, void *key)
+{
+       struct htab_elem *l = __htab_map_lookup_elem(map, key);
+
+       if (l) {
+               bpf_lru_node_set_ref(&l->lru_node);
+               return this_cpu_ptr(htab_elem_get_ptr(l, map->key_size));
+       }
+
+       return NULL;
+}
+
 int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value)
 {
+       struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
        struct htab_elem *l;
        void __percpu *pptr;
        int ret = -ENOENT;
@@ -760,6 +1087,8 @@ int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value)
        l = __htab_map_lookup_elem(map, key);
        if (!l)
                goto out;
+       if (htab_is_lru(htab))
+               bpf_lru_node_set_ref(&l->lru_node);
        pptr = htab_elem_get_ptr(l, map->key_size);
        for_each_possible_cpu(cpu) {
                bpf_long_memcpy(value + off,
@@ -775,10 +1104,16 @@ out:
 int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,
                           u64 map_flags)
 {
+       struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
        int ret;
 
        rcu_read_lock();
-       ret = __htab_percpu_map_update_elem(map, key, value, map_flags, true);
+       if (htab_is_lru(htab))
+               ret = __htab_lru_percpu_map_update_elem(map, key, value,
+                                                       map_flags, true);
+       else
+               ret = __htab_percpu_map_update_elem(map, key, value, map_flags,
+                                                   true);
        rcu_read_unlock();
 
        return ret;
@@ -798,10 +1133,26 @@ static struct bpf_map_type_list htab_percpu_type __read_mostly = {
        .type = BPF_MAP_TYPE_PERCPU_HASH,
 };
 
+static const struct bpf_map_ops htab_lru_percpu_ops = {
+       .map_alloc = htab_map_alloc,
+       .map_free = htab_map_free,
+       .map_get_next_key = htab_map_get_next_key,
+       .map_lookup_elem = htab_lru_percpu_map_lookup_elem,
+       .map_update_elem = htab_lru_percpu_map_update_elem,
+       .map_delete_elem = htab_lru_map_delete_elem,
+};
+
+static struct bpf_map_type_list htab_lru_percpu_type __read_mostly = {
+       .ops = &htab_lru_percpu_ops,
+       .type = BPF_MAP_TYPE_LRU_PERCPU_HASH,
+};
+
 static int __init register_htab_map(void)
 {
        bpf_register_map_type(&htab_type);
        bpf_register_map_type(&htab_percpu_type);
+       bpf_register_map_type(&htab_lru_type);
+       bpf_register_map_type(&htab_lru_percpu_type);
        return 0;
 }
 late_initcall(register_htab_map);
index 1ed8473ec537c5aa13feb483c0fc44d2b6ad35ad..0b030c9126d3a5f5452461f4df3a5349d80ad4a7 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/namei.h>
 #include <linux/fs.h>
 #include <linux/kdev_t.h>
+#include <linux/parser.h>
 #include <linux/filter.h>
 #include <linux/bpf.h>
 
@@ -87,6 +88,7 @@ static struct inode *bpf_get_inode(struct super_block *sb,
        switch (mode & S_IFMT) {
        case S_IFDIR:
        case S_IFREG:
+       case S_IFLNK:
                break;
        default:
                return ERR_PTR(-EINVAL);
@@ -119,6 +121,16 @@ static int bpf_inode_type(const struct inode *inode, enum bpf_type *type)
        return 0;
 }
 
+static void bpf_dentry_finalize(struct dentry *dentry, struct inode *inode,
+                               struct inode *dir)
+{
+       d_instantiate(dentry, inode);
+       dget(dentry);
+
+       dir->i_mtime = current_time(dir);
+       dir->i_ctime = dir->i_mtime;
+}
+
 static int bpf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
        struct inode *inode;
@@ -133,9 +145,7 @@ static int bpf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
        inc_nlink(inode);
        inc_nlink(dir);
 
-       d_instantiate(dentry, inode);
-       dget(dentry);
-
+       bpf_dentry_finalize(dentry, inode, dir);
        return 0;
 }
 
@@ -151,9 +161,7 @@ static int bpf_mkobj_ops(struct inode *dir, struct dentry *dentry,
        inode->i_op = iops;
        inode->i_private = dentry->d_fsdata;
 
-       d_instantiate(dentry, inode);
-       dget(dentry);
-
+       bpf_dentry_finalize(dentry, inode, dir);
        return 0;
 }
 
@@ -181,13 +189,37 @@ bpf_lookup(struct inode *dir, struct dentry *dentry, unsigned flags)
 {
        if (strchr(dentry->d_name.name, '.'))
                return ERR_PTR(-EPERM);
+
        return simple_lookup(dir, dentry, flags);
 }
 
+static int bpf_symlink(struct inode *dir, struct dentry *dentry,
+                      const char *target)
+{
+       char *link = kstrdup(target, GFP_USER | __GFP_NOWARN);
+       struct inode *inode;
+
+       if (!link)
+               return -ENOMEM;
+
+       inode = bpf_get_inode(dir->i_sb, dir, S_IRWXUGO | S_IFLNK);
+       if (IS_ERR(inode)) {
+               kfree(link);
+               return PTR_ERR(inode);
+       }
+
+       inode->i_op = &simple_symlink_inode_operations;
+       inode->i_link = link;
+
+       bpf_dentry_finalize(dentry, inode, dir);
+       return 0;
+}
+
 static const struct inode_operations bpf_dir_iops = {
        .lookup         = bpf_lookup,
        .mknod          = bpf_mkobj,
        .mkdir          = bpf_mkdir,
+       .symlink        = bpf_symlink,
        .rmdir          = simple_rmdir,
        .rename         = simple_rename,
        .link           = simple_link,
@@ -324,6 +356,8 @@ static void bpf_evict_inode(struct inode *inode)
        truncate_inode_pages_final(&inode->i_data);
        clear_inode(inode);
 
+       if (S_ISLNK(inode->i_mode))
+               kfree(inode->i_link);
        if (!bpf_inode_type(inode, &type))
                bpf_any_put(inode->i_private, type);
 }
@@ -331,15 +365,66 @@ static void bpf_evict_inode(struct inode *inode)
 static const struct super_operations bpf_super_ops = {
        .statfs         = simple_statfs,
        .drop_inode     = generic_delete_inode,
+       .show_options   = generic_show_options,
        .evict_inode    = bpf_evict_inode,
 };
 
+enum {
+       OPT_MODE,
+       OPT_ERR,
+};
+
+static const match_table_t bpf_mount_tokens = {
+       { OPT_MODE, "mode=%o" },
+       { OPT_ERR, NULL },
+};
+
+struct bpf_mount_opts {
+       umode_t mode;
+};
+
+static int bpf_parse_options(char *data, struct bpf_mount_opts *opts)
+{
+       substring_t args[MAX_OPT_ARGS];
+       int option, token;
+       char *ptr;
+
+       opts->mode = S_IRWXUGO;
+
+       while ((ptr = strsep(&data, ",")) != NULL) {
+               if (!*ptr)
+                       continue;
+
+               token = match_token(ptr, bpf_mount_tokens, args);
+               switch (token) {
+               case OPT_MODE:
+                       if (match_octal(&args[0], &option))
+                               return -EINVAL;
+                       opts->mode = option & S_IALLUGO;
+                       break;
+               /* We might like to report bad mount options here, but
+                * traditionally we've ignored all mount options, so we'd
+                * better continue to ignore non-existing options for bpf.
+                */
+               }
+       }
+
+       return 0;
+}
+
 static int bpf_fill_super(struct super_block *sb, void *data, int silent)
 {
        static struct tree_descr bpf_rfiles[] = { { "" } };
+       struct bpf_mount_opts opts;
        struct inode *inode;
        int ret;
 
+       save_mount_options(sb, data);
+
+       ret = bpf_parse_options(data, &opts);
+       if (ret)
+               return ret;
+
        ret = simple_fill_super(sb, BPF_FS_MAGIC, bpf_rfiles);
        if (ret)
                return ret;
@@ -349,7 +434,7 @@ static int bpf_fill_super(struct super_block *sb, void *data, int silent)
        inode = sb->s_root->d_inode;
        inode->i_op = &bpf_dir_iops;
        inode->i_mode &= ~S_IALLUGO;
-       inode->i_mode |= S_ISVTX | S_IRWXUGO;
+       inode->i_mode |= S_ISVTX | opts.mode;
 
        return 0;
 }
index 228f962447a508f50ec2a2f81f83bdb1f9368e77..85af86c496cdeed8bb1ce49db3295e0b199ae403 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/license.h>
 #include <linux/filter.h>
 #include <linux/version.h>
+#include <linux/kernel.h>
 
 DEFINE_PER_CPU(int, bpf_prog_active);
 
@@ -137,18 +138,31 @@ static int bpf_map_release(struct inode *inode, struct file *filp)
 static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
 {
        const struct bpf_map *map = filp->private_data;
+       const struct bpf_array *array;
+       u32 owner_prog_type = 0;
+
+       if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) {
+               array = container_of(map, struct bpf_array, map);
+               owner_prog_type = array->owner_prog_type;
+       }
 
        seq_printf(m,
                   "map_type:\t%u\n"
                   "key_size:\t%u\n"
                   "value_size:\t%u\n"
                   "max_entries:\t%u\n"
-                  "map_flags:\t%#x\n",
+                  "map_flags:\t%#x\n"
+                  "memlock:\t%llu\n",
                   map->map_type,
                   map->key_size,
                   map->value_size,
                   map->max_entries,
-                  map->map_flags);
+                  map->map_flags,
+                  map->pages * 1ULL << PAGE_SHIFT);
+
+       if (owner_prog_type)
+               seq_printf(m, "owner_prog_type:\t%u\n",
+                          owner_prog_type);
 }
 #endif
 
@@ -194,7 +208,7 @@ static int map_create(union bpf_attr *attr)
 
        err = bpf_map_charge_memlock(map);
        if (err)
-               goto free_map;
+               goto free_map_nouncharge;
 
        err = bpf_map_new_fd(map);
        if (err < 0)
@@ -204,6 +218,8 @@ static int map_create(union bpf_attr *attr)
        return err;
 
 free_map:
+       bpf_map_uncharge_memlock(map);
+free_map_nouncharge:
        map->ops->map_free(map);
        return err;
 }
@@ -252,12 +268,6 @@ struct bpf_map *bpf_map_get_with_uref(u32 ufd)
        return map;
 }
 
-/* helper to convert user pointers passed inside __aligned_u64 fields */
-static void __user *u64_to_ptr(__u64 val)
-{
-       return (void __user *) (unsigned long) val;
-}
-
 int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
 {
        return -ENOTSUPP;
@@ -268,8 +278,8 @@ int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
 
 static int map_lookup_elem(union bpf_attr *attr)
 {
-       void __user *ukey = u64_to_ptr(attr->key);
-       void __user *uvalue = u64_to_ptr(attr->value);
+       void __user *ukey = u64_to_user_ptr(attr->key);
+       void __user *uvalue = u64_to_user_ptr(attr->value);
        int ufd = attr->map_fd;
        struct bpf_map *map;
        void *key, *value, *ptr;
@@ -295,6 +305,7 @@ static int map_lookup_elem(union bpf_attr *attr)
                goto free_key;
 
        if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
+           map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
            map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
                value_size = round_up(map->value_size, 8) * num_possible_cpus();
        else
@@ -305,7 +316,8 @@ static int map_lookup_elem(union bpf_attr *attr)
        if (!value)
                goto free_key;
 
-       if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH) {
+       if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
+           map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
                err = bpf_percpu_hash_copy(map, key, value);
        } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
                err = bpf_percpu_array_copy(map, key, value);
@@ -342,8 +354,8 @@ err_put:
 
 static int map_update_elem(union bpf_attr *attr)
 {
-       void __user *ukey = u64_to_ptr(attr->key);
-       void __user *uvalue = u64_to_ptr(attr->value);
+       void __user *ukey = u64_to_user_ptr(attr->key);
+       void __user *uvalue = u64_to_user_ptr(attr->value);
        int ufd = attr->map_fd;
        struct bpf_map *map;
        void *key, *value;
@@ -369,6 +381,7 @@ static int map_update_elem(union bpf_attr *attr)
                goto free_key;
 
        if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
+           map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
            map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
                value_size = round_up(map->value_size, 8) * num_possible_cpus();
        else
@@ -388,7 +401,8 @@ static int map_update_elem(union bpf_attr *attr)
         */
        preempt_disable();
        __this_cpu_inc(bpf_prog_active);
-       if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH) {
+       if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
+           map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
                err = bpf_percpu_hash_update(map, key, value, attr->flags);
        } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
                err = bpf_percpu_array_update(map, key, value, attr->flags);
@@ -420,7 +434,7 @@ err_put:
 
 static int map_delete_elem(union bpf_attr *attr)
 {
-       void __user *ukey = u64_to_ptr(attr->key);
+       void __user *ukey = u64_to_user_ptr(attr->key);
        int ufd = attr->map_fd;
        struct bpf_map *map;
        struct fd f;
@@ -464,8 +478,8 @@ err_put:
 
 static int map_get_next_key(union bpf_attr *attr)
 {
-       void __user *ukey = u64_to_ptr(attr->key);
-       void __user *unext_key = u64_to_ptr(attr->next_key);
+       void __user *ukey = u64_to_user_ptr(attr->key);
+       void __user *unext_key = u64_to_user_ptr(attr->next_key);
        int ufd = attr->map_fd;
        struct bpf_map *map;
        void *key, *next_key;
@@ -680,10 +694,22 @@ struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i)
 }
 EXPORT_SYMBOL_GPL(bpf_prog_add);
 
+void bpf_prog_sub(struct bpf_prog *prog, int i)
+{
+       /* Only to be used for undoing previous bpf_prog_add() in some
+        * error path. We still know that another entity in our call
+        * path holds a reference to the program, thus atomic_sub() can
+        * be safely used in such cases!
+        */
+       WARN_ON(atomic_sub_return(i, &prog->aux->refcnt) == 0);
+}
+EXPORT_SYMBOL_GPL(bpf_prog_sub);
+
 struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog)
 {
        return bpf_prog_add(prog, 1);
 }
+EXPORT_SYMBOL_GPL(bpf_prog_inc);
 
 static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *type)
 {
@@ -730,7 +756,7 @@ static int bpf_prog_load(union bpf_attr *attr)
                return -EINVAL;
 
        /* copy eBPF program license from user space */
-       if (strncpy_from_user(license, u64_to_ptr(attr->license),
+       if (strncpy_from_user(license, u64_to_user_ptr(attr->license),
                              sizeof(license) - 1) < 0)
                return -EFAULT;
        license[sizeof(license) - 1] = 0;
@@ -760,7 +786,7 @@ static int bpf_prog_load(union bpf_attr *attr)
        prog->len = attr->insn_cnt;
 
        err = -EFAULT;
-       if (copy_from_user(prog->insns, u64_to_ptr(attr->insns),
+       if (copy_from_user(prog->insns, u64_to_user_ptr(attr->insns),
                           prog->len * sizeof(struct bpf_insn)) != 0)
                goto free_prog;
 
@@ -811,7 +837,7 @@ static int bpf_obj_pin(const union bpf_attr *attr)
        if (CHECK_ATTR(BPF_OBJ))
                return -EINVAL;
 
-       return bpf_obj_pin_user(attr->bpf_fd, u64_to_ptr(attr->pathname));
+       return bpf_obj_pin_user(attr->bpf_fd, u64_to_user_ptr(attr->pathname));
 }
 
 static int bpf_obj_get(const union bpf_attr *attr)
@@ -819,9 +845,85 @@ static int bpf_obj_get(const union bpf_attr *attr)
        if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0)
                return -EINVAL;
 
-       return bpf_obj_get_user(u64_to_ptr(attr->pathname));
+       return bpf_obj_get_user(u64_to_user_ptr(attr->pathname));
 }
 
+#ifdef CONFIG_CGROUP_BPF
+
+#define BPF_PROG_ATTACH_LAST_FIELD attach_type
+
+static int bpf_prog_attach(const union bpf_attr *attr)
+{
+       struct bpf_prog *prog;
+       struct cgroup *cgrp;
+       enum bpf_prog_type ptype;
+
+       if (!capable(CAP_NET_ADMIN))
+               return -EPERM;
+
+       if (CHECK_ATTR(BPF_PROG_ATTACH))
+               return -EINVAL;
+
+       switch (attr->attach_type) {
+       case BPF_CGROUP_INET_INGRESS:
+       case BPF_CGROUP_INET_EGRESS:
+               ptype = BPF_PROG_TYPE_CGROUP_SKB;
+               break;
+       case BPF_CGROUP_INET_SOCK_CREATE:
+               ptype = BPF_PROG_TYPE_CGROUP_SOCK;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
+       if (IS_ERR(prog))
+               return PTR_ERR(prog);
+
+       cgrp = cgroup_get_from_fd(attr->target_fd);
+       if (IS_ERR(cgrp)) {
+               bpf_prog_put(prog);
+               return PTR_ERR(cgrp);
+       }
+
+       cgroup_bpf_update(cgrp, prog, attr->attach_type);
+       cgroup_put(cgrp);
+
+       return 0;
+}
+
+#define BPF_PROG_DETACH_LAST_FIELD attach_type
+
+static int bpf_prog_detach(const union bpf_attr *attr)
+{
+       struct cgroup *cgrp;
+
+       if (!capable(CAP_NET_ADMIN))
+               return -EPERM;
+
+       if (CHECK_ATTR(BPF_PROG_DETACH))
+               return -EINVAL;
+
+       switch (attr->attach_type) {
+       case BPF_CGROUP_INET_INGRESS:
+       case BPF_CGROUP_INET_EGRESS:
+       case BPF_CGROUP_INET_SOCK_CREATE:
+               cgrp = cgroup_get_from_fd(attr->target_fd);
+               if (IS_ERR(cgrp))
+                       return PTR_ERR(cgrp);
+
+               cgroup_bpf_update(cgrp, NULL, attr->attach_type);
+               cgroup_put(cgrp);
+               break;
+
+       default:
+               return -EINVAL;
+       }
+
+       return 0;
+}
+#endif /* CONFIG_CGROUP_BPF */
+
 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
 {
        union bpf_attr attr = {};
@@ -888,6 +990,16 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
        case BPF_OBJ_GET:
                err = bpf_obj_get(&attr);
                break;
+
+#ifdef CONFIG_CGROUP_BPF
+       case BPF_PROG_ATTACH:
+               err = bpf_prog_attach(&attr);
+               break;
+       case BPF_PROG_DETACH:
+               err = bpf_prog_detach(&attr);
+               break;
+#endif
+
        default:
                err = -EINVAL;
                break;
index 846d7ceaf2021e327b96dafc10ca7d068b10d267..8135cb1077ee06624f7e8b05b2c43f6df5294f64 100644 (file)
@@ -19,6 +19,7 @@
 #include <net/netlink.h>
 #include <linux/file.h>
 #include <linux/vmalloc.h>
+#include <linux/stringify.h>
 
 /* bpf_check() is a static code analyzer that walks eBPF program
  * instruction by instruction and updates register/stack state.
@@ -190,6 +191,22 @@ static const char * const reg_type_str[] = {
        [PTR_TO_PACKET_END]     = "pkt_end",
 };
 
+#define __BPF_FUNC_STR_FN(x) [BPF_FUNC_ ## x] = __stringify(bpf_ ## x)
+static const char * const func_id_str[] = {
+       __BPF_FUNC_MAPPER(__BPF_FUNC_STR_FN)
+};
+#undef __BPF_FUNC_STR_FN
+
+static const char *func_id_name(int id)
+{
+       BUILD_BUG_ON(ARRAY_SIZE(func_id_str) != __BPF_FUNC_MAX_ID);
+
+       if (id >= 0 && id < __BPF_FUNC_MAX_ID && func_id_str[id])
+               return func_id_str[id];
+       else
+               return "unknown";
+}
+
 static void print_verifier_state(struct bpf_verifier_state *state)
 {
        struct bpf_reg_state *reg;
@@ -217,8 +234,8 @@ static void print_verifier_state(struct bpf_verifier_state *state)
                                reg->map_ptr->value_size,
                                reg->id);
                if (reg->min_value != BPF_REGISTER_MIN_RANGE)
-                       verbose(",min_value=%llu",
-                               (unsigned long long)reg->min_value);
+                       verbose(",min_value=%lld",
+                               (long long)reg->min_value);
                if (reg->max_value != BPF_REGISTER_MAX_RANGE)
                        verbose(",max_value=%llu",
                                (unsigned long long)reg->max_value);
@@ -354,7 +371,8 @@ static void print_bpf_insn(struct bpf_insn *insn)
                u8 opcode = BPF_OP(insn->code);
 
                if (opcode == BPF_CALL) {
-                       verbose("(%02x) call %d\n", insn->code, insn->imm);
+                       verbose("(%02x) call %s#%d\n", insn->code,
+                               func_id_name(insn->imm), insn->imm);
                } else if (insn->code == (BPF_JMP | BPF_JA)) {
                        verbose("(%02x) goto pc%+d\n",
                                insn->code, insn->off);
@@ -615,12 +633,19 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno, int off,
 #define MAX_PACKET_OFF 0xffff
 
 static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
-                                      const struct bpf_call_arg_meta *meta)
+                                      const struct bpf_call_arg_meta *meta,
+                                      enum bpf_access_type t)
 {
        switch (env->prog->type) {
+       case BPF_PROG_TYPE_LWT_IN:
+       case BPF_PROG_TYPE_LWT_OUT:
+               /* dst_input() and dst_output() can't write for now */
+               if (t == BPF_WRITE)
+                       return false;
        case BPF_PROG_TYPE_SCHED_CLS:
        case BPF_PROG_TYPE_SCHED_ACT:
        case BPF_PROG_TYPE_XDP:
+       case BPF_PROG_TYPE_LWT_XMIT:
                if (meta)
                        return meta->pkt_access;
 
@@ -760,7 +785,7 @@ static int check_mem_access(struct bpf_verifier_env *env, u32 regno, int off,
                         * index'es we need to make sure that whatever we use
                         * will have a set floor within our range.
                         */
-                       if ((s64)reg->min_value < 0) {
+                       if (reg->min_value < 0) {
                                verbose("R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
                                        regno);
                                return -EACCES;
@@ -819,7 +844,7 @@ static int check_mem_access(struct bpf_verifier_env *env, u32 regno, int off,
                        err = check_stack_read(state, off, size, value_regno);
                }
        } else if (state->regs[regno].type == PTR_TO_PACKET) {
-               if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL)) {
+               if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) {
                        verbose("cannot write into packet\n");
                        return -EACCES;
                }
@@ -952,7 +977,8 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
                return 0;
        }
 
-       if (type == PTR_TO_PACKET && !may_access_direct_pkt_data(env, meta)) {
+       if (type == PTR_TO_PACKET &&
+           !may_access_direct_pkt_data(env, meta, BPF_READ)) {
                verbose("helper access to the packet is not allowed\n");
                return -EACCES;
        }
@@ -1114,8 +1140,8 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
 
        return 0;
 error:
-       verbose("cannot pass map_type %d into func %d\n",
-               map->map_type, func_id);
+       verbose("cannot pass map_type %d into func %s#%d\n",
+               map->map_type, func_id_name(func_id), func_id);
        return -EINVAL;
 }
 
@@ -1172,7 +1198,7 @@ static int check_call(struct bpf_verifier_env *env, int func_id)
 
        /* find function prototype */
        if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) {
-               verbose("invalid func %d\n", func_id);
+               verbose("invalid func %s#%d\n", func_id_name(func_id), func_id);
                return -EINVAL;
        }
 
@@ -1180,7 +1206,7 @@ static int check_call(struct bpf_verifier_env *env, int func_id)
                fn = env->prog->aux->ops->get_func_proto(func_id);
 
        if (!fn) {
-               verbose("unknown func %d\n", func_id);
+               verbose("unknown func %s#%d\n", func_id_name(func_id), func_id);
                return -EINVAL;
        }
 
@@ -1200,7 +1226,8 @@ static int check_call(struct bpf_verifier_env *env, int func_id)
         */
        err = check_raw_mode(fn);
        if (err) {
-               verbose("kernel subsystem misconfigured func %d\n", func_id);
+               verbose("kernel subsystem misconfigured func %s#%d\n",
+                       func_id_name(func_id), func_id);
                return err;
        }
 
@@ -1256,8 +1283,8 @@ static int check_call(struct bpf_verifier_env *env, int func_id)
                regs[BPF_REG_0].map_ptr = meta.map_ptr;
                regs[BPF_REG_0].id = ++env->id_gen;
        } else {
-               verbose("unknown return type %d of func %d\n",
-                       fn->ret_type, func_id);
+               verbose("unknown return type %d of func %s#%d\n",
+                       fn->ret_type, func_id_name(func_id), func_id);
                return -EINVAL;
        }
 
@@ -1471,7 +1498,8 @@ static void check_reg_overflow(struct bpf_reg_state *reg)
 {
        if (reg->max_value > BPF_REGISTER_MAX_RANGE)
                reg->max_value = BPF_REGISTER_MAX_RANGE;
-       if ((s64)reg->min_value < BPF_REGISTER_MIN_RANGE)
+       if (reg->min_value < BPF_REGISTER_MIN_RANGE ||
+           reg->min_value > BPF_REGISTER_MAX_RANGE)
                reg->min_value = BPF_REGISTER_MIN_RANGE;
 }
 
@@ -1479,8 +1507,8 @@ static void adjust_reg_min_max_vals(struct bpf_verifier_env *env,
                                    struct bpf_insn *insn)
 {
        struct bpf_reg_state *regs = env->cur_state.regs, *dst_reg;
-       u64 min_val = BPF_REGISTER_MIN_RANGE, max_val = BPF_REGISTER_MAX_RANGE;
-       bool min_set = false, max_set = false;
+       s64 min_val = BPF_REGISTER_MIN_RANGE;
+       u64 max_val = BPF_REGISTER_MAX_RANGE;
        u8 opcode = BPF_OP(insn->code);
 
        dst_reg = &regs[insn->dst_reg];
@@ -1503,7 +1531,6 @@ static void adjust_reg_min_max_vals(struct bpf_verifier_env *env,
        } else if (insn->imm < BPF_REGISTER_MAX_RANGE &&
                   (s64)insn->imm > BPF_REGISTER_MIN_RANGE) {
                min_val = max_val = insn->imm;
-               min_set = max_set = true;
        }
 
        /* We don't know anything about what was done to this register, mark it
@@ -1515,22 +1542,43 @@ static void adjust_reg_min_max_vals(struct bpf_verifier_env *env,
                return;
        }
 
+       /* If one of our values was at the end of our ranges then we can't just
+        * do our normal operations to the register, we need to set the values
+        * to the min/max since they are undefined.
+        */
+       if (min_val == BPF_REGISTER_MIN_RANGE)
+               dst_reg->min_value = BPF_REGISTER_MIN_RANGE;
+       if (max_val == BPF_REGISTER_MAX_RANGE)
+               dst_reg->max_value = BPF_REGISTER_MAX_RANGE;
+
        switch (opcode) {
        case BPF_ADD:
-               dst_reg->min_value += min_val;
-               dst_reg->max_value += max_val;
+               if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE)
+                       dst_reg->min_value += min_val;
+               if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE)
+                       dst_reg->max_value += max_val;
                break;
        case BPF_SUB:
-               dst_reg->min_value -= min_val;
-               dst_reg->max_value -= max_val;
+               if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE)
+                       dst_reg->min_value -= min_val;
+               if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE)
+                       dst_reg->max_value -= max_val;
                break;
        case BPF_MUL:
-               dst_reg->min_value *= min_val;
-               dst_reg->max_value *= max_val;
+               if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE)
+                       dst_reg->min_value *= min_val;
+               if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE)
+                       dst_reg->max_value *= max_val;
                break;
        case BPF_AND:
-               /* & is special since it could end up with 0 bits set. */
-               dst_reg->min_value &= min_val;
+               /* Disallow AND'ing of negative numbers, ain't nobody got time
+                * for that.  Otherwise the minimum is 0 and the max is the max
+                * value we could AND against.
+                */
+               if (min_val < 0)
+                       dst_reg->min_value = BPF_REGISTER_MIN_RANGE;
+               else
+                       dst_reg->min_value = 0;
                dst_reg->max_value = max_val;
                break;
        case BPF_LSH:
@@ -1540,24 +1588,25 @@ static void adjust_reg_min_max_vals(struct bpf_verifier_env *env,
                 */
                if (min_val > ilog2(BPF_REGISTER_MAX_RANGE))
                        dst_reg->min_value = BPF_REGISTER_MIN_RANGE;
-               else
+               else if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE)
                        dst_reg->min_value <<= min_val;
 
                if (max_val > ilog2(BPF_REGISTER_MAX_RANGE))
                        dst_reg->max_value = BPF_REGISTER_MAX_RANGE;
-               else
+               else if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE)
                        dst_reg->max_value <<= max_val;
                break;
        case BPF_RSH:
-               dst_reg->min_value >>= min_val;
-               dst_reg->max_value >>= max_val;
-               break;
-       case BPF_MOD:
-               /* % is special since it is an unsigned modulus, so the floor
-                * will always be 0.
+               /* RSH by a negative number is undefined, and the BPF_RSH is an
+                * unsigned shift, so make the appropriate casts.
                 */
-               dst_reg->min_value = 0;
-               dst_reg->max_value = max_val - 1;
+               if (min_val < 0 || dst_reg->min_value < 0)
+                       dst_reg->min_value = BPF_REGISTER_MIN_RANGE;
+               else
+                       dst_reg->min_value =
+                               (u64)(dst_reg->min_value) >> min_val;
+               if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE)
+                       dst_reg->max_value >>= max_val;
                break;
        default:
                reset_reg_range_values(regs, insn->dst_reg);
index 44066158f0d1fa4e6a81d87e355845ff00b2e4bb..2ee9ec3051b20774b118a57e4609f30e87bf82be 100644 (file)
@@ -64,6 +64,9 @@
 #include <linux/file.h>
 #include <net/sock.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/cgroup.h>
+
 /*
  * pidlists linger the following amount before being destroyed.  The goal
  * is avoiding frequent destruction in the middle of consecutive read calls
@@ -1176,6 +1179,8 @@ static void cgroup_destroy_root(struct cgroup_root *root)
        struct cgroup *cgrp = &root->cgrp;
        struct cgrp_cset_link *link, *tmp_link;
 
+       trace_cgroup_destroy_root(root);
+
        cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp);
 
        BUG_ON(atomic_read(&root->nr_cgrps));
@@ -1874,6 +1879,9 @@ static int cgroup_remount(struct kernfs_root *kf_root, int *flags, char *data)
                strcpy(root->release_agent_path, opts.release_agent);
                spin_unlock(&release_agent_path_lock);
        }
+
+       trace_cgroup_remount(root);
+
  out_unlock:
        kfree(opts.release_agent);
        kfree(opts.name);
@@ -2031,6 +2039,8 @@ static int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask)
        if (ret)
                goto destroy_root;
 
+       trace_cgroup_setup_root(root);
+
        /*
         * There must be no failure case after here, since rebinding takes
         * care of subsystems' refcounts, which are explicitly dropped in
@@ -2315,22 +2325,18 @@ static struct file_system_type cgroup2_fs_type = {
        .fs_flags = FS_USERNS_MOUNT,
 };
 
-static char *cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen,
-                                  struct cgroup_namespace *ns)
+static int cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen,
+                                struct cgroup_namespace *ns)
 {
        struct cgroup *root = cset_cgroup_from_root(ns->root_cset, cgrp->root);
-       int ret;
 
-       ret = kernfs_path_from_node(cgrp->kn, root->kn, buf, buflen);
-       if (ret < 0 || ret >= buflen)
-               return NULL;
-       return buf;
+       return kernfs_path_from_node(cgrp->kn, root->kn, buf, buflen);
 }
 
-char *cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
-                    struct cgroup_namespace *ns)
+int cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
+                  struct cgroup_namespace *ns)
 {
-       char *ret;
+       int ret;
 
        mutex_lock(&cgroup_mutex);
        spin_lock_irq(&css_set_lock);
@@ -2357,12 +2363,12 @@ EXPORT_SYMBOL_GPL(cgroup_path_ns);
  *
  * Return value is the same as kernfs_path().
  */
-char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
+int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
 {
        struct cgroup_root *root;
        struct cgroup *cgrp;
        int hierarchy_id = 1;
-       char *path = NULL;
+       int ret;
 
        mutex_lock(&cgroup_mutex);
        spin_lock_irq(&css_set_lock);
@@ -2371,16 +2377,15 @@ char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
 
        if (root) {
                cgrp = task_cgroup_from_root(task, root);
-               path = cgroup_path_ns_locked(cgrp, buf, buflen, &init_cgroup_ns);
+               ret = cgroup_path_ns_locked(cgrp, buf, buflen, &init_cgroup_ns);
        } else {
                /* if no hierarchy exists, everyone is in "/" */
-               if (strlcpy(buf, "/", buflen) < buflen)
-                       path = buf;
+               ret = strlcpy(buf, "/", buflen);
        }
 
        spin_unlock_irq(&css_set_lock);
        mutex_unlock(&cgroup_mutex);
-       return path;
+       return ret;
 }
 EXPORT_SYMBOL_GPL(task_cgroup_path);
 
@@ -2830,6 +2835,10 @@ static int cgroup_attach_task(struct cgroup *dst_cgrp,
                ret = cgroup_migrate(leader, threadgroup, dst_cgrp->root);
 
        cgroup_migrate_finish(&preloaded_csets);
+
+       if (!ret)
+               trace_cgroup_attach_task(dst_cgrp, leader, threadgroup);
+
        return ret;
 }
 
@@ -3611,6 +3620,8 @@ static int cgroup_rename(struct kernfs_node *kn, struct kernfs_node *new_parent,
        mutex_lock(&cgroup_mutex);
 
        ret = kernfs_rename(kn, new_parent, new_name_str);
+       if (!ret)
+               trace_cgroup_rename(cgrp);
 
        mutex_unlock(&cgroup_mutex);
 
@@ -4381,6 +4392,8 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
 
                if (task) {
                        ret = cgroup_migrate(task, false, to->root);
+                       if (!ret)
+                               trace_cgroup_transfer_tasks(to, task, false);
                        put_task_struct(task);
                }
        } while (task && !ret);
@@ -5046,6 +5059,8 @@ static void css_release_work_fn(struct work_struct *work)
                        ss->css_released(css);
        } else {
                /* cgroup release path */
+               trace_cgroup_release(cgrp);
+
                cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id);
                cgrp->id = -1;
 
@@ -5059,6 +5074,8 @@ static void css_release_work_fn(struct work_struct *work)
                if (cgrp->kn)
                        RCU_INIT_POINTER(*(void __rcu __force **)&cgrp->kn->priv,
                                         NULL);
+
+               cgroup_bpf_put(cgrp);
        }
 
        mutex_unlock(&cgroup_mutex);
@@ -5266,6 +5283,9 @@ static struct cgroup *cgroup_create(struct cgroup *parent)
        if (!cgroup_on_dfl(cgrp))
                cgrp->subtree_control = cgroup_control(cgrp);
 
+       if (parent)
+               cgroup_bpf_inherit(cgrp, parent);
+
        cgroup_propagate_control(cgrp);
 
        /* @cgrp doesn't have dir yet so the following will only create csses */
@@ -5332,6 +5352,8 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
        if (ret)
                goto out_destroy;
 
+       trace_cgroup_mkdir(cgrp);
+
        /* let's create and online css's */
        kernfs_activate(kn);
 
@@ -5507,6 +5529,9 @@ static int cgroup_rmdir(struct kernfs_node *kn)
 
        ret = cgroup_destroy_locked(cgrp);
 
+       if (!ret)
+               trace_cgroup_rmdir(cgrp);
+
        cgroup_kn_unlock(kn);
        return ret;
 }
@@ -5743,7 +5768,7 @@ core_initcall(cgroup_wq_init);
 int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
                     struct pid *pid, struct task_struct *tsk)
 {
-       char *buf, *path;
+       char *buf;
        int retval;
        struct cgroup_root *root;
 
@@ -5786,18 +5811,18 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
                 * " (deleted)" is appended to the cgroup path.
                 */
                if (cgroup_on_dfl(cgrp) || !(tsk->flags & PF_EXITING)) {
-                       path = cgroup_path_ns_locked(cgrp, buf, PATH_MAX,
+                       retval = cgroup_path_ns_locked(cgrp, buf, PATH_MAX,
                                                current->nsproxy->cgroup_ns);
-                       if (!path) {
+                       if (retval >= PATH_MAX)
                                retval = -ENAMETOOLONG;
+                       if (retval < 0)
                                goto out_unlock;
-                       }
+
+                       seq_puts(m, buf);
                } else {
-                       path = "/";
+                       seq_puts(m, "/");
                }
 
-               seq_puts(m, path);
-
                if (cgroup_on_dfl(cgrp) && cgroup_is_dead(cgrp))
                        seq_puts(m, " (deleted)\n");
                else
@@ -6062,8 +6087,9 @@ static void cgroup_release_agent(struct work_struct *work)
 {
        struct cgroup *cgrp =
                container_of(work, struct cgroup, release_agent_work);
-       char *pathbuf = NULL, *agentbuf = NULL, *path;
+       char *pathbuf = NULL, *agentbuf = NULL;
        char *argv[3], *envp[3];
+       int ret;
 
        mutex_lock(&cgroup_mutex);
 
@@ -6073,13 +6099,13 @@ static void cgroup_release_agent(struct work_struct *work)
                goto out;
 
        spin_lock_irq(&css_set_lock);
-       path = cgroup_path_ns_locked(cgrp, pathbuf, PATH_MAX, &init_cgroup_ns);
+       ret = cgroup_path_ns_locked(cgrp, pathbuf, PATH_MAX, &init_cgroup_ns);
        spin_unlock_irq(&css_set_lock);
-       if (!path)
+       if (ret < 0 || ret >= PATH_MAX)
                goto out;
 
        argv[0] = agentbuf;
-       argv[1] = path;
+       argv[1] = pathbuf;
        argv[2] = NULL;
 
        /* minimal command environment */
@@ -6474,6 +6500,19 @@ static __init int cgroup_namespaces_init(void)
 }
 subsys_initcall(cgroup_namespaces_init);
 
+#ifdef CONFIG_CGROUP_BPF
+void cgroup_bpf_update(struct cgroup *cgrp,
+                      struct bpf_prog *prog,
+                      enum bpf_attach_type type)
+{
+       struct cgroup *parent = cgroup_parent(cgrp);
+
+       mutex_lock(&cgroup_mutex);
+       __cgroup_bpf_update(cgrp, parent, prog, type);
+       mutex_unlock(&cgroup_mutex);
+}
+#endif /* CONFIG_CGROUP_BPF */
+
 #ifdef CONFIG_CGROUP_DEBUG
 static struct cgroup_subsys_state *
 debug_css_alloc(struct cgroup_subsys_state *parent_css)
index 5df20d6d152071b40244fb5d85279b8040a641ba..29de1a9352c005c0d4808d2f842c15d29df1b2f8 100644 (file)
@@ -228,7 +228,7 @@ static struct {
        .wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq),
        .lock = __MUTEX_INITIALIZER(cpu_hotplug.lock),
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
-       .dep_map = {.name = "cpu_hotplug.lock" },
+       .dep_map = STATIC_LOCKDEP_MAP_INIT("cpu_hotplug.dep_map", &cpu_hotplug.dep_map),
 #endif
 };
 
index 2b4c20ab5bbe170b8762b783e61df37a38368477..29f815d2ef7e3cfdffe03dd3d9adc409749c61fc 100644 (file)
@@ -2715,7 +2715,7 @@ void __cpuset_memory_pressure_bump(void)
 int proc_cpuset_show(struct seq_file *m, struct pid_namespace *ns,
                     struct pid *pid, struct task_struct *tsk)
 {
-       char *buf, *p;
+       char *buf;
        struct cgroup_subsys_state *css;
        int retval;
 
@@ -2724,14 +2724,15 @@ int proc_cpuset_show(struct seq_file *m, struct pid_namespace *ns,
        if (!buf)
                goto out;
 
-       retval = -ENAMETOOLONG;
        css = task_get_css(tsk, cpuset_cgrp_id);
-       p = cgroup_path_ns(css->cgroup, buf, PATH_MAX,
-                          current->nsproxy->cgroup_ns);
+       retval = cgroup_path_ns(css->cgroup, buf, PATH_MAX,
+                               current->nsproxy->cgroup_ns);
        css_put(css);
-       if (!p)
+       if (retval >= PATH_MAX)
+               retval = -ENAMETOOLONG;
+       if (retval < 0)
                goto out_free;
-       seq_puts(m, p);
+       seq_puts(m, buf);
        seq_putc(m, '\n');
        retval = 0;
 out_free:
index c6e47e97b33fdb7b0bbabf91016d51bc76d4b63c..22cc734aa1b211be2e5a8a2be061dc628d503c77 100644 (file)
@@ -902,6 +902,17 @@ list_update_cgroup_event(struct perf_event *event,
         * this will always be called from the right CPU.
         */
        cpuctx = __get_cpu_context(ctx);
+
+       /* Only set/clear cpuctx->cgrp if current task uses event->cgrp. */
+       if (perf_cgroup_from_task(current, ctx) != event->cgrp) {
+               /*
+                * We are removing the last cpu event in this context.
+                * If that event is not active in this cpu, cpuctx->cgrp
+                * should've been cleared by perf_cgroup_switch.
+                */
+               WARN_ON_ONCE(!add && cpuctx->cgrp);
+               return;
+       }
        cpuctx->cgrp = add ? event->cgrp : NULL;
 }
 
@@ -1960,6 +1971,12 @@ void perf_event_disable(struct perf_event *event)
 }
 EXPORT_SYMBOL_GPL(perf_event_disable);
 
+void perf_event_disable_inatomic(struct perf_event *event)
+{
+       event->pending_disable = 1;
+       irq_work_queue(&event->pending);
+}
+
 static void perf_set_shadow_time(struct perf_event *event,
                                 struct perf_event_context *ctx,
                                 u64 tstamp)
@@ -7075,8 +7092,8 @@ static int __perf_event_overflow(struct perf_event *event,
        if (events && atomic_dec_and_test(&event->event_limit)) {
                ret = 1;
                event->pending_kill = POLL_HUP;
-               event->pending_disable = 1;
-               irq_work_queue(&event->pending);
+
+               perf_event_disable_inatomic(event);
        }
 
        READ_ONCE(event->overflow_handler)(event, data, regs);
@@ -7709,7 +7726,7 @@ static void bpf_overflow_handler(struct perf_event *event,
        if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1))
                goto out;
        rcu_read_lock();
-       ret = BPF_PROG_RUN(event->prog, (void *)&ctx);
+       ret = BPF_PROG_RUN(event->prog, &ctx);
        rcu_read_unlock();
 out:
        __this_cpu_dec(bpf_prog_active);
@@ -8012,6 +8029,7 @@ restart:
  * if <size> is not specified, the range is treated as a single address.
  */
 enum {
+       IF_ACT_NONE = -1,
        IF_ACT_FILTER,
        IF_ACT_START,
        IF_ACT_STOP,
@@ -8035,6 +8053,7 @@ static const match_table_t if_tokens = {
        { IF_SRC_KERNEL,        "%u/%u" },
        { IF_SRC_FILEADDR,      "%u@%s" },
        { IF_SRC_KERNELADDR,    "%u" },
+       { IF_ACT_NONE,          NULL },
 };
 
 /*
@@ -8855,7 +8874,10 @@ EXPORT_SYMBOL_GPL(perf_pmu_register);
 
 void perf_pmu_unregister(struct pmu *pmu)
 {
+       int remove_device;
+
        mutex_lock(&pmus_lock);
+       remove_device = pmu_bus_running;
        list_del_rcu(&pmu->entry);
        mutex_unlock(&pmus_lock);
 
@@ -8869,10 +8891,12 @@ void perf_pmu_unregister(struct pmu *pmu)
        free_percpu(pmu->pmu_disable_count);
        if (pmu->type >= PERF_TYPE_MAX)
                idr_remove(&pmu_idr, pmu->type);
-       if (pmu->nr_addr_filters)
-               device_remove_file(pmu->dev, &dev_attr_nr_addr_filters);
-       device_del(pmu->dev);
-       put_device(pmu->dev);
+       if (remove_device) {
+               if (pmu->nr_addr_filters)
+                       device_remove_file(pmu->dev, &dev_attr_nr_addr_filters);
+               device_del(pmu->dev);
+               put_device(pmu->dev);
+       }
        free_pmu_context(pmu);
 }
 EXPORT_SYMBOL_GPL(perf_pmu_unregister);
index d4129bb05e5d044101ba2cbea672f96954b69a51..f9ec9add21647fb4b60c3be08f515112b4bf4a25 100644 (file)
@@ -300,7 +300,8 @@ int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr,
 
 retry:
        /* Read the page with vaddr into memory */
-       ret = get_user_pages_remote(NULL, mm, vaddr, 1, 0, 1, &old_page, &vma);
+       ret = get_user_pages_remote(NULL, mm, vaddr, 1, FOLL_FORCE, &old_page,
+                       &vma);
        if (ret <= 0)
                return ret;
 
@@ -1710,7 +1711,8 @@ static int is_trap_at_addr(struct mm_struct *mm, unsigned long vaddr)
         * but we treat this as a 'remote' access since it is
         * essentially a kernel access to the memory.
         */
-       result = get_user_pages_remote(NULL, mm, vaddr, 1, 0, 1, &page, NULL);
+       result = get_user_pages_remote(NULL, mm, vaddr, 1, FOLL_FORCE, &page,
+                       NULL);
        if (result < 0)
                return result;
 
index 9d68c45ebbe30e34290dfda571f7d75e01ce3d97..3076f3089919b60697f296ec01126e0af5947bcc 100644 (file)
@@ -836,6 +836,7 @@ void __noreturn do_exit(long code)
         */
        perf_event_exit_task(tsk);
 
+       sched_autogroup_exit_task(tsk);
        cgroup_exit(tsk);
 
        /*
index 6d42242485cb2863e940ae0d54983a2d34789096..997ac1d584f76b0e42551975bdae960e61c0807e 100644 (file)
@@ -315,6 +315,9 @@ static void account_kernel_stack(struct task_struct *tsk, int account)
 
 static void release_task_stack(struct task_struct *tsk)
 {
+       if (WARN_ON(tsk->state != TASK_DEAD))
+               return;  /* Better to leak the stack than to free prematurely */
+
        account_kernel_stack(tsk, -1);
        arch_release_thread_stack(tsk->stack);
        free_thread_stack(tsk);
@@ -547,7 +550,8 @@ free_tsk:
 }
 
 #ifdef CONFIG_MMU
-static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
+static __latent_entropy int dup_mmap(struct mm_struct *mm,
+                                       struct mm_struct *oldmm)
 {
        struct vm_area_struct *mpnt, *tmp, *prev, **pprev;
        struct rb_node **rb_link, *rb_parent;
@@ -1441,7 +1445,8 @@ init_task_pid(struct task_struct *task, enum pid_type type, struct pid *pid)
  * parts of the process environment (as per the clone
  * flags). The actual kick-off is left to the caller.
  */
-static struct task_struct *copy_process(unsigned long clone_flags,
+static __latent_entropy struct task_struct *copy_process(
+                                       unsigned long clone_flags,
                                        unsigned long stack_start,
                                        unsigned long stack_size,
                                        int __user *child_tidptr,
@@ -1860,6 +1865,7 @@ bad_fork_cleanup_count:
        atomic_dec(&p->cred->user->processes);
        exit_creds(p);
 bad_fork_free:
+       p->state = TASK_DEAD;
        put_task_stack(p);
        free_task(p);
 fork_out:
@@ -1926,6 +1932,7 @@ long _do_fork(unsigned long clone_flags,
 
        p = copy_process(clone_flags, stack_start, stack_size,
                         child_tidptr, NULL, trace, tls, NUMA_NO_NODE);
+       add_latent_entropy();
        /*
         * Do this prior waking up the new thread - the thread pointer
         * might get invalid after that point, if the thread exits quickly.
index 0c5f1a5db654c7da7db4ec168ef44516407d6af5..6b669593e7eb18b18743a771415976a8c17b6920 100644 (file)
@@ -721,6 +721,7 @@ int irq_set_parent(int irq, int parent_irq)
        irq_put_desc_unlock(desc, flags);
        return 0;
 }
+EXPORT_SYMBOL_GPL(irq_set_parent);
 #endif
 
 /*
@@ -1340,12 +1341,12 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
 
        } else if (new->flags & IRQF_TRIGGER_MASK) {
                unsigned int nmsk = new->flags & IRQF_TRIGGER_MASK;
-               unsigned int omsk = irq_settings_get_trigger_mask(desc);
+               unsigned int omsk = irqd_get_trigger_type(&desc->irq_data);
 
                if (nmsk != omsk)
                        /* hope the handler works with current  trigger mode */
                        pr_warn("irq %d uses trigger mode %u; requested %u\n",
-                               irq, nmsk, omsk);
+                               irq, omsk, nmsk);
        }
 
        *old_ptr = new;
index 8d44b3fea9d08f901e3b24ed4a619a4883d4042f..30e6d05aa5a9f726422c7968dabbff5e1c590c2a 100644 (file)
@@ -53,8 +53,15 @@ void notrace __sanitizer_cov_trace_pc(void)
        /*
         * We are interested in code coverage as a function of a syscall inputs,
         * so we ignore code executed in interrupts.
+        * The checks for whether we are in an interrupt are open-coded, because
+        * 1. We can't use in_interrupt() here, since it also returns true
+        *    when we are inside local_bh_disable() section.
+        * 2. We don't want to use (in_irq() | in_serving_softirq() | in_nmi()),
+        *    since that leads to slower generated code (three separate tests,
+        *    one for each of the flags).
         */
-       if (!t || in_interrupt())
+       if (!t || (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_OFFSET
+                                                       | NMI_MASK)))
                return;
        mode = READ_ONCE(t->kcov_mode);
        if (mode == KCOV_MODE_TRACE) {
index 51c4b24b6328609e57e6e206814a3c3c938d8555..c2b88490d857583026a35090b62f7891446b7ba2 100644 (file)
@@ -45,6 +45,14 @@ enum {
 #define LOCKF_USED_IN_IRQ_READ \
                (LOCKF_USED_IN_HARDIRQ_READ | LOCKF_USED_IN_SOFTIRQ_READ)
 
+/*
+ * CONFIG_PROVE_LOCKING_SMALL is defined for sparc. Sparc requires .text,
+ * .data and .bss to fit in required 32MB limit for the kernel. With
+ * PROVE_LOCKING we could go over this limit and cause system boot-up problems.
+ * So, reduce the static allocations for lockdeps related structures so that
+ * everything fits in current required size limit.
+ */
+#ifdef CONFIG_PROVE_LOCKING_SMALL
 /*
  * MAX_LOCKDEP_ENTRIES is the maximum number of lock dependencies
  * we track.
@@ -54,18 +62,24 @@ enum {
  * table (if it's not there yet), and we check it for lock order
  * conflicts and deadlocks.
  */
+#define MAX_LOCKDEP_ENTRIES    16384UL
+#define MAX_LOCKDEP_CHAINS_BITS        15
+#define MAX_STACK_TRACE_ENTRIES        262144UL
+#else
 #define MAX_LOCKDEP_ENTRIES    32768UL
 
 #define MAX_LOCKDEP_CHAINS_BITS        16
-#define MAX_LOCKDEP_CHAINS     (1UL << MAX_LOCKDEP_CHAINS_BITS)
-
-#define MAX_LOCKDEP_CHAIN_HLOCKS (MAX_LOCKDEP_CHAINS*5)
 
 /*
  * Stack-trace: tightly packed array of stack backtrace
  * addresses. Protected by the hash_lock.
  */
 #define MAX_STACK_TRACE_ENTRIES        524288UL
+#endif
+
+#define MAX_LOCKDEP_CHAINS     (1UL << MAX_LOCKDEP_CHAINS_BITS)
+
+#define MAX_LOCKDEP_CHAIN_HLOCKS (MAX_LOCKDEP_CHAINS*5)
 
 extern struct list_head all_lock_classes;
 extern struct lock_chain lock_chains[];
index 1e7f5da648d991d2fbb69f0022fce67cea3b8b5e..6ccb08f57fcb431d26b266163f53b321b2782bd8 100644 (file)
@@ -498,9 +498,9 @@ static int enter_state(suspend_state_t state)
 
 #ifndef CONFIG_SUSPEND_SKIP_SYNC
        trace_suspend_resume(TPS("sync_filesystems"), 0, true);
-       printk(KERN_INFO "PM: Syncing filesystems ... ");
+       pr_info("PM: Syncing filesystems ... ");
        sys_sync();
-       printk("done.\n");
+       pr_cont("done.\n");
        trace_suspend_resume(TPS("sync_filesystems"), 0, false);
 #endif
 
index 084452e34a125ff24da375e6dce25c1224b46310..bdff5ed57f10a5ef57a015856830471422f3918a 100644 (file)
@@ -203,8 +203,10 @@ static int __init test_suspend(void)
 
        /* RTCs have initialized by now too ... can we use one? */
        dev = class_find_device(rtc_class, NULL, NULL, has_wakealarm);
-       if (dev)
+       if (dev) {
                rtc = rtc_class_open(dev_name(dev));
+               put_device(dev);
+       }
        if (!rtc) {
                printk(warn_no_rtc);
                return 0;
index d5e3973154739f9453926f28e71ec7e20ba753bb..f7a55e9ff2f7621c2403d6bf28c6f8b88d1120f2 100644 (file)
@@ -253,17 +253,6 @@ static int preferred_console = -1;
 int console_set_on_cmdline;
 EXPORT_SYMBOL(console_set_on_cmdline);
 
-#ifdef CONFIG_OF
-static bool of_specified_console;
-
-void console_set_by_of(void)
-{
-       of_specified_console = true;
-}
-#else
-# define of_specified_console false
-#endif
-
 /* Flag: console code may call schedule() */
 static int console_may_schedule;
 
@@ -794,8 +783,6 @@ static ssize_t devkmsg_write(struct kiocb *iocb, struct iov_iter *from)
        return ret;
 }
 
-static void cont_flush(void);
-
 static ssize_t devkmsg_read(struct file *file, char __user *buf,
                            size_t count, loff_t *ppos)
 {
@@ -811,7 +798,6 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
        if (ret)
                return ret;
        raw_spin_lock_irq(&logbuf_lock);
-       cont_flush();
        while (user->seq == log_next_seq) {
                if (file->f_flags & O_NONBLOCK) {
                        ret = -EAGAIN;
@@ -874,7 +860,6 @@ static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence)
                return -ESPIPE;
 
        raw_spin_lock_irq(&logbuf_lock);
-       cont_flush();
        switch (whence) {
        case SEEK_SET:
                /* the first record */
@@ -913,7 +898,6 @@ static unsigned int devkmsg_poll(struct file *file, poll_table *wait)
        poll_wait(file, &log_wait, wait);
 
        raw_spin_lock_irq(&logbuf_lock);
-       cont_flush();
        if (user->seq < log_next_seq) {
                /* return error when data has vanished underneath us */
                if (user->seq < log_first_seq)
@@ -1300,7 +1284,6 @@ static int syslog_print(char __user *buf, int size)
                size_t skip;
 
                raw_spin_lock_irq(&logbuf_lock);
-               cont_flush();
                if (syslog_seq < log_first_seq) {
                        /* messages are gone, move to first one */
                        syslog_seq = log_first_seq;
@@ -1360,7 +1343,6 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
                return -ENOMEM;
 
        raw_spin_lock_irq(&logbuf_lock);
-       cont_flush();
        if (buf) {
                u64 next_seq;
                u64 seq;
@@ -1522,7 +1504,6 @@ int do_syslog(int type, char __user *buf, int len, int source)
        /* Number of chars in the log buffer */
        case SYSLOG_ACTION_SIZE_UNREAD:
                raw_spin_lock_irq(&logbuf_lock);
-               cont_flush();
                if (syslog_seq < log_first_seq) {
                        /* messages are gone, move to first one */
                        syslog_seq = log_first_seq;
@@ -1769,6 +1750,10 @@ static size_t log_output(int facility, int level, enum log_flags lflags, const c
                cont_flush();
        }
 
+       /* Skip empty continuation lines that couldn't be added - they just flush */
+       if (!text_len && (lflags & LOG_CONT))
+               return 0;
+
        /* If it doesn't end in a newline, try to buffer the current line */
        if (!(lflags & LOG_NEWLINE)) {
                if (cont_add(facility, level, lflags, text, text_len))
@@ -2653,7 +2638,7 @@ void register_console(struct console *newcon)
         *      didn't select a console we take the first one
         *      that registers here.
         */
-       if (preferred_console < 0 && !of_specified_console) {
+       if (preferred_console < 0) {
                if (newcon->index < 0)
                        newcon->index = 0;
                if (newcon->setup == NULL ||
@@ -3035,7 +3020,6 @@ void kmsg_dump(enum kmsg_dump_reason reason)
                dumper->active = true;
 
                raw_spin_lock_irqsave(&logbuf_lock, flags);
-               cont_flush();
                dumper->cur_seq = clear_seq;
                dumper->cur_idx = clear_idx;
                dumper->next_seq = log_next_seq;
@@ -3126,7 +3110,6 @@ bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog,
        bool ret;
 
        raw_spin_lock_irqsave(&logbuf_lock, flags);
-       cont_flush();
        ret = kmsg_dump_get_line_nolock(dumper, syslog, line, size, len);
        raw_spin_unlock_irqrestore(&logbuf_lock, flags);
 
@@ -3169,7 +3152,6 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
                goto out;
 
        raw_spin_lock_irqsave(&logbuf_lock, flags);
-       cont_flush();
        if (dumper->cur_seq < log_first_seq) {
                /* messages are gone, move to first available one */
                dumper->cur_seq = log_first_seq;
index 2a99027312a6af6773027e20029752efddc418e3..e6474f7272ec2ce96c95532ea906da113acd5354 100644 (file)
@@ -537,7 +537,7 @@ int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst
                int this_len, retval;
 
                this_len = (len > sizeof(buf)) ? sizeof(buf) : len;
-               retval = access_process_vm(tsk, src, buf, this_len, 0);
+               retval = access_process_vm(tsk, src, buf, this_len, FOLL_FORCE);
                if (!retval) {
                        if (copied)
                                break;
@@ -564,7 +564,8 @@ int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long ds
                this_len = (len > sizeof(buf)) ? sizeof(buf) : len;
                if (copy_from_user(buf, src, this_len))
                        return -EFAULT;
-               retval = access_process_vm(tsk, dst, buf, this_len, 1);
+               retval = access_process_vm(tsk, dst, buf, this_len,
+                               FOLL_FORCE | FOLL_WRITE);
                if (!retval) {
                        if (copied)
                                break;
@@ -1127,7 +1128,7 @@ int generic_ptrace_peekdata(struct task_struct *tsk, unsigned long addr,
        unsigned long tmp;
        int copied;
 
-       copied = access_process_vm(tsk, addr, &tmp, sizeof(tmp), 0);
+       copied = access_process_vm(tsk, addr, &tmp, sizeof(tmp), FOLL_FORCE);
        if (copied != sizeof(tmp))
                return -EIO;
        return put_user(tmp, (unsigned long __user *)data);
@@ -1138,7 +1139,8 @@ int generic_ptrace_pokedata(struct task_struct *tsk, unsigned long addr,
 {
        int copied;
 
-       copied = access_process_vm(tsk, addr, &data, sizeof(data), 1);
+       copied = access_process_vm(tsk, addr, &data, sizeof(data),
+                       FOLL_FORCE | FOLL_WRITE);
        return (copied == sizeof(data)) ? 0 : -EIO;
 }
 
@@ -1155,7 +1157,8 @@ int compat_ptrace_request(struct task_struct *child, compat_long_t request,
        switch (request) {
        case PTRACE_PEEKTEXT:
        case PTRACE_PEEKDATA:
-               ret = access_process_vm(child, addr, &word, sizeof(word), 0);
+               ret = access_process_vm(child, addr, &word, sizeof(word),
+                               FOLL_FORCE);
                if (ret != sizeof(word))
                        ret = -EIO;
                else
@@ -1164,7 +1167,8 @@ int compat_ptrace_request(struct task_struct *child, compat_long_t request,
 
        case PTRACE_POKETEXT:
        case PTRACE_POKEDATA:
-               ret = access_process_vm(child, addr, &data, sizeof(data), 1);
+               ret = access_process_vm(child, addr, &data, sizeof(data),
+                               FOLL_FORCE | FOLL_WRITE);
                ret = (ret != sizeof(data) ? -EIO : 0);
                break;
 
index 944b1b491ed84b3d2d1cf977a9838a30cf405121..1898559e6b60ddc52884f6977fca21e57c6f1f90 100644 (file)
@@ -170,7 +170,7 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
                                      false));
 }
 
-static void rcu_process_callbacks(struct softirq_action *unused)
+static __latent_entropy void rcu_process_callbacks(struct softirq_action *unused)
 {
        __rcu_process_callbacks(&rcu_sched_ctrlblk);
        __rcu_process_callbacks(&rcu_bh_ctrlblk);
index 7e2e03879c2e55bf259f257868b8b457d5353b07..69a5611a7e7c03dcf950d94badfcce3445863440 100644 (file)
@@ -3013,7 +3013,7 @@ __rcu_process_callbacks(struct rcu_state *rsp)
 /*
  * Do RCU core processing for the current CPU.
  */
-static void rcu_process_callbacks(struct softirq_action *unused)
+static __latent_entropy void rcu_process_callbacks(struct softirq_action *unused)
 {
        struct rcu_state *rsp;
 
index a5d966cb889175d5b196283341b1cead20f20603..f1c8fd5662464cd356d174b80e8cff9e0a6a4b12 100644 (file)
@@ -111,10 +111,13 @@ bool task_wants_autogroup(struct task_struct *p, struct task_group *tg)
 {
        if (tg != &root_task_group)
                return false;
-
        /*
-        * We can only assume the task group can't go away on us if
-        * autogroup_move_group() can see us on ->thread_group list.
+        * If we race with autogroup_move_group() the caller can use the old
+        * value of signal->autogroup but in this case sched_move_task() will
+        * be called again before autogroup_kref_put().
+        *
+        * However, there is no way sched_autogroup_exit_task() could tell us
+        * to avoid autogroup->tg, so we abuse PF_EXITING flag for this case.
         */
        if (p->flags & PF_EXITING)
                return false;
@@ -122,6 +125,16 @@ bool task_wants_autogroup(struct task_struct *p, struct task_group *tg)
        return true;
 }
 
+void sched_autogroup_exit_task(struct task_struct *p)
+{
+       /*
+        * We are going to call exit_notify() and autogroup_move_group() can't
+        * see this thread after that: we can no longer use signal->autogroup.
+        * See the PF_EXITING check in task_wants_autogroup().
+        */
+       sched_move_task(p);
+}
+
 static void
 autogroup_move_group(struct task_struct *p, struct autogroup *ag)
 {
@@ -138,13 +151,20 @@ autogroup_move_group(struct task_struct *p, struct autogroup *ag)
        }
 
        p->signal->autogroup = autogroup_kref_get(ag);
-
-       if (!READ_ONCE(sysctl_sched_autogroup_enabled))
-               goto out;
-
+       /*
+        * We can't avoid sched_move_task() after we changed signal->autogroup,
+        * this process can already run with task_group() == prev->tg or we can
+        * race with cgroup code which can read autogroup = prev under rq->lock.
+        * In the latter case for_each_thread() can not miss a migrating thread,
+        * cpu_cgroup_attach() must not be possible after cgroup_exit() and it
+        * can't be removed from thread list, we hold ->siglock.
+        *
+        * If an exiting thread was already removed from thread list we rely on
+        * sched_autogroup_exit_task().
+        */
        for_each_thread(p, t)
                sched_move_task(t);
-out:
+
        unlock_task_sighand(p, &flags);
        autogroup_kref_put(prev);
 }
index 94732d1ab00ab9d9afdebad41642e279249776cb..154fd689fe02e910be7abe1102dbac32b5b50a8a 100644 (file)
@@ -5192,21 +5192,14 @@ void sched_show_task(struct task_struct *p)
        int ppid;
        unsigned long state = p->state;
 
+       if (!try_get_task_stack(p))
+               return;
        if (state)
                state = __ffs(state) + 1;
        printk(KERN_INFO "%-15.15s %c", p->comm,
                state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?');
-#if BITS_PER_LONG == 32
-       if (state == TASK_RUNNING)
-               printk(KERN_CONT " running  ");
-       else
-               printk(KERN_CONT " %08lx ", thread_saved_pc(p));
-#else
        if (state == TASK_RUNNING)
                printk(KERN_CONT "  running task    ");
-       else
-               printk(KERN_CONT " %016lx ", thread_saved_pc(p));
-#endif
 #ifdef CONFIG_DEBUG_STACK_USAGE
        free = stack_not_used(p);
 #endif
@@ -5221,6 +5214,7 @@ void sched_show_task(struct task_struct *p)
 
        print_worker_info(KERN_INFO, p);
        show_stack(p, NULL);
+       put_task_stack(p);
 }
 
 void show_state_filter(unsigned long state_filter)
@@ -7515,11 +7509,27 @@ static struct kmem_cache *task_group_cache __read_mostly;
 DECLARE_PER_CPU(cpumask_var_t, load_balance_mask);
 DECLARE_PER_CPU(cpumask_var_t, select_idle_mask);
 
+#define WAIT_TABLE_BITS 8
+#define WAIT_TABLE_SIZE (1 << WAIT_TABLE_BITS)
+static wait_queue_head_t bit_wait_table[WAIT_TABLE_SIZE] __cacheline_aligned;
+
+wait_queue_head_t *bit_waitqueue(void *word, int bit)
+{
+       const int shift = BITS_PER_LONG == 32 ? 5 : 6;
+       unsigned long val = (unsigned long)word << shift | bit;
+
+       return bit_wait_table + hash_long(val, WAIT_TABLE_BITS);
+}
+EXPORT_SYMBOL(bit_waitqueue);
+
 void __init sched_init(void)
 {
        int i, j;
        unsigned long alloc_size = 0, ptr;
 
+       for (i = 0; i < WAIT_TABLE_SIZE; i++)
+               init_waitqueue_head(bit_wait_table + i);
+
 #ifdef CONFIG_FAIR_GROUP_SCHED
        alloc_size += 2 * nr_cpu_ids * sizeof(void **);
 #endif
index 13935886a4711b2efd576f8890e1564f54991653..fa178b62ea79b53e3cbf37d78d65699e145d6b98 100644 (file)
@@ -415,7 +415,8 @@ static char *task_group_path(struct task_group *tg)
        if (autogroup_path(tg, group_path, PATH_MAX))
                return group_path;
 
-       return cgroup_path(tg->css.cgroup, group_path, PATH_MAX);
+       cgroup_path(tg->css.cgroup, group_path, PATH_MAX);
+       return group_path;
 }
 #endif
 
index 502e95a6e9276cd5ca36dfebbe0f8425610b9c33..c242944f5cbd560223ce91990ca92666460110bf 100644 (file)
@@ -690,7 +690,14 @@ void init_entity_runnable_average(struct sched_entity *se)
         * will definitely be update (after enqueue).
         */
        sa->period_contrib = 1023;
-       sa->load_avg = scale_load_down(se->load.weight);
+       /*
+        * Tasks are intialized with full load to be seen as heavy tasks until
+        * they get a chance to stabilize to their real load level.
+        * Group entities are intialized with zero load to reflect the fact that
+        * nothing has been attached to the task group yet.
+        */
+       if (entity_is_task(se))
+               sa->load_avg = scale_load_down(se->load.weight);
        sa->load_sum = sa->load_avg * LOAD_AVG_MAX;
        /*
         * At this point, util_avg won't be used in select_task_rq_fair anyway
@@ -5471,13 +5478,18 @@ static inline int select_idle_smt(struct task_struct *p, struct sched_domain *sd
  */
 static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int target)
 {
-       struct sched_domain *this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc));
-       u64 avg_idle = this_rq()->avg_idle;
-       u64 avg_cost = this_sd->avg_scan_cost;
+       struct sched_domain *this_sd;
+       u64 avg_cost, avg_idle = this_rq()->avg_idle;
        u64 time, cost;
        s64 delta;
        int cpu, wrap;
 
+       this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc));
+       if (!this_sd)
+               return -1;
+
+       avg_cost = this_sd->avg_scan_cost;
+
        /*
         * Due to large variance we need a large fuzz factor; hackbench in
         * particularly is sensitive here.
@@ -8522,7 +8534,7 @@ static void nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle) { }
  * run_rebalance_domains is triggered when needed from the scheduler tick.
  * Also triggered for nohz idle balancing (with nohz_balancing_kick set).
  */
-static void run_rebalance_domains(struct softirq_action *h)
+static __latent_entropy void run_rebalance_domains(struct softirq_action *h)
 {
        struct rq *this_rq = this_rq();
        enum cpu_idle_type idle = this_rq->idle_balance ?
@@ -8827,7 +8839,6 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
 {
        struct sched_entity *se;
        struct cfs_rq *cfs_rq;
-       struct rq *rq;
        int i;
 
        tg->cfs_rq = kzalloc(sizeof(cfs_rq) * nr_cpu_ids, GFP_KERNEL);
@@ -8842,8 +8853,6 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
        init_cfs_bandwidth(tg_cfs_bandwidth(tg));
 
        for_each_possible_cpu(i) {
-               rq = cpu_rq(i);
-
                cfs_rq = kzalloc_node(sizeof(struct cfs_rq),
                                      GFP_KERNEL, cpu_to_node(i));
                if (!cfs_rq)
index 4f7053579fe3c9e473bfb9854f959a874e9566ea..9453efe9b25a64bd4aefceae8e5dffef54df64f2 100644 (file)
@@ -480,16 +480,6 @@ void wake_up_bit(void *word, int bit)
 }
 EXPORT_SYMBOL(wake_up_bit);
 
-wait_queue_head_t *bit_waitqueue(void *word, int bit)
-{
-       const int shift = BITS_PER_LONG == 32 ? 5 : 6;
-       const struct zone *zone = page_zone(virt_to_page(word));
-       unsigned long val = (unsigned long)word << shift | bit;
-
-       return &zone->wait_table[hash_long(val, zone->wait_table_bits)];
-}
-EXPORT_SYMBOL(bit_waitqueue);
-
 /*
  * Manipulate the atomic_t address to produce a better bit waitqueue table hash
  * index (we're keying off bit -1, but that would produce a horrible hash
index 0db7c8a2afe2fb531fe390d78ff9bb435c992077..bff9c774987ad259f0b79b6022ab4f7b8e17d639 100644 (file)
@@ -195,7 +195,7 @@ static u32 seccomp_run_filters(const struct seccomp_data *sd)
         * value always takes priority (ignoring the DATA).
         */
        for (; f; f = f->prev) {
-               u32 cur_ret = BPF_PROG_RUN(f->prog, (void *)sd);
+               u32 cur_ret = BPF_PROG_RUN(f->prog, sd);
 
                if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION))
                        ret = cur_ret;
index 66762645f9e86c30505610736f0380a2cdd1ae00..744fa611cae06b26d89a04e915f0d7fadf37035b 100644 (file)
@@ -58,7 +58,7 @@ static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp
 DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
 
 const char * const softirq_to_name[NR_SOFTIRQS] = {
-       "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL",
+       "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "IRQ_POLL",
        "TASKLET", "SCHED", "HRTIMER", "RCU"
 };
 
@@ -496,7 +496,7 @@ void __tasklet_hi_schedule_first(struct tasklet_struct *t)
 }
 EXPORT_SYMBOL(__tasklet_hi_schedule_first);
 
-static void tasklet_action(struct softirq_action *a)
+static __latent_entropy void tasklet_action(struct softirq_action *a)
 {
        struct tasklet_struct *list;
 
@@ -532,7 +532,7 @@ static void tasklet_action(struct softirq_action *a)
        }
 }
 
-static void tasklet_hi_action(struct softirq_action *a)
+static __latent_entropy void tasklet_hi_action(struct softirq_action *a)
 {
        struct tasklet_struct *list;
 
index b3f05ee20d1845736580d3c4fbd55d67e7fc3b51..8a5e44236f78d3ba069e0c81ef3304f4be5ba7fd 100644 (file)
@@ -41,12 +41,7 @@ static DEFINE_PER_CPU(__u32, taskstats_seqnum);
 static int family_registered;
 struct kmem_cache *taskstats_cache;
 
-static struct genl_family family = {
-       .id             = GENL_ID_GENERATE,
-       .name           = TASKSTATS_GENL_NAME,
-       .version        = TASKSTATS_GENL_VERSION,
-       .maxattr        = TASKSTATS_CMD_ATTR_MAX,
-};
+static struct genl_family family;
 
 static const struct nla_policy taskstats_cmd_get_policy[TASKSTATS_CMD_ATTR_MAX+1] = {
        [TASKSTATS_CMD_ATTR_PID]  = { .type = NLA_U32 },
@@ -54,7 +49,11 @@ static const struct nla_policy taskstats_cmd_get_policy[TASKSTATS_CMD_ATTR_MAX+1
        [TASKSTATS_CMD_ATTR_REGISTER_CPUMASK] = { .type = NLA_STRING },
        [TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK] = { .type = NLA_STRING },};
 
-static const struct nla_policy cgroupstats_cmd_get_policy[CGROUPSTATS_CMD_ATTR_MAX+1] = {
+/*
+ * We have to use TASKSTATS_CMD_ATTR_MAX here, it is the maxattr in the family.
+ * Make sure they are always aligned.
+ */
+static const struct nla_policy cgroupstats_cmd_get_policy[TASKSTATS_CMD_ATTR_MAX+1] = {
        [CGROUPSTATS_CMD_ATTR_FD] = { .type = NLA_U32 },
 };
 
@@ -651,6 +650,15 @@ static const struct genl_ops taskstats_ops[] = {
        },
 };
 
+static struct genl_family family __ro_after_init = {
+       .name           = TASKSTATS_GENL_NAME,
+       .version        = TASKSTATS_GENL_VERSION,
+       .maxattr        = TASKSTATS_CMD_ATTR_MAX,
+       .module         = THIS_MODULE,
+       .ops            = taskstats_ops,
+       .n_ops          = ARRAY_SIZE(taskstats_ops),
+};
+
 /* Needed early in initialization */
 void __init taskstats_init_early(void)
 {
@@ -667,7 +675,7 @@ static int __init taskstats_init(void)
 {
        int rc;
 
-       rc = genl_register_family_with_ops(&family, taskstats_ops);
+       rc = genl_register_family(&family);
        if (rc)
                return rc;
 
index c3aad685bbc036cc2f03672085aedd9dc9c8fb86..12dd190634ab3e0617ac260133818523f142f8a2 100644 (file)
@@ -542,7 +542,6 @@ static int alarm_clock_get(clockid_t which_clock, struct timespec *tp)
 static int alarm_timer_create(struct k_itimer *new_timer)
 {
        enum  alarmtimer_type type;
-       struct alarm_base *base;
 
        if (!alarmtimer_get_rtcdev())
                return -ENOTSUPP;
@@ -551,7 +550,6 @@ static int alarm_timer_create(struct k_itimer *new_timer)
                return -EPERM;
 
        type = clock2alarm(new_timer->it_clock);
-       base = &alarm_bases[type];
        alarm_init(&new_timer->it.alarm.alarmtimer, type, alarm_handle_timer);
        return 0;
 }
index 32bf6f75a8fec255c6d5fbf38e9fecd9e1e848fa..c611c47de8849b5ac68a8085d85dcd86e143d907 100644 (file)
@@ -878,7 +878,7 @@ static inline struct timer_base *get_timer_base(u32 tflags)
 
 #ifdef CONFIG_NO_HZ_COMMON
 static inline struct timer_base *
-__get_target_base(struct timer_base *base, unsigned tflags)
+get_target_base(struct timer_base *base, unsigned tflags)
 {
 #ifdef CONFIG_SMP
        if ((tflags & TIMER_PINNED) || !base->migration_enabled)
@@ -891,25 +891,27 @@ __get_target_base(struct timer_base *base, unsigned tflags)
 
 static inline void forward_timer_base(struct timer_base *base)
 {
+       unsigned long jnow = READ_ONCE(jiffies);
+
        /*
         * We only forward the base when it's idle and we have a delta between
         * base clock and jiffies.
         */
-       if (!base->is_idle || (long) (jiffies - base->clk) < 2)
+       if (!base->is_idle || (long) (jnow - base->clk) < 2)
                return;
 
        /*
         * If the next expiry value is > jiffies, then we fast forward to
         * jiffies otherwise we forward to the next expiry value.
         */
-       if (time_after(base->next_expiry, jiffies))
-               base->clk = jiffies;
+       if (time_after(base->next_expiry, jnow))
+               base->clk = jnow;
        else
                base->clk = base->next_expiry;
 }
 #else
 static inline struct timer_base *
-__get_target_base(struct timer_base *base, unsigned tflags)
+get_target_base(struct timer_base *base, unsigned tflags)
 {
        return get_timer_this_cpu_base(tflags);
 }
@@ -917,14 +919,6 @@ __get_target_base(struct timer_base *base, unsigned tflags)
 static inline void forward_timer_base(struct timer_base *base) { }
 #endif
 
-static inline struct timer_base *
-get_target_base(struct timer_base *base, unsigned tflags)
-{
-       struct timer_base *target = __get_target_base(base, tflags);
-
-       forward_timer_base(target);
-       return target;
-}
 
 /*
  * We are using hashed locking: Holding per_cpu(timer_bases[x]).lock means
@@ -943,7 +937,14 @@ static struct timer_base *lock_timer_base(struct timer_list *timer,
 {
        for (;;) {
                struct timer_base *base;
-               u32 tf = timer->flags;
+               u32 tf;
+
+               /*
+                * We need to use READ_ONCE() here, otherwise the compiler
+                * might re-read @tf between the check for TIMER_MIGRATING
+                * and spin_lock().
+                */
+               tf = READ_ONCE(timer->flags);
 
                if (!(tf & TIMER_MIGRATING)) {
                        base = get_timer_base(tf);
@@ -964,6 +965,8 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
        unsigned long clk = 0, flags;
        int ret = 0;
 
+       BUG_ON(!timer->function);
+
        /*
         * This is a common optimization triggered by the networking code - if
         * the timer is re-modified to have the same timeout or ends up in the
@@ -972,13 +975,16 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
        if (timer_pending(timer)) {
                if (timer->expires == expires)
                        return 1;
+
                /*
-                * Take the current timer_jiffies of base, but without holding
-                * the lock!
+                * We lock timer base and calculate the bucket index right
+                * here. If the timer ends up in the same bucket, then we
+                * just update the expiry time and avoid the whole
+                * dequeue/enqueue dance.
                 */
-               base = get_timer_base(timer->flags);
-               clk = base->clk;
+               base = lock_timer_base(timer, &flags);
 
+               clk = base->clk;
                idx = calc_wheel_index(expires, clk);
 
                /*
@@ -988,14 +994,14 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
                 */
                if (idx == timer_get_idx(timer)) {
                        timer->expires = expires;
-                       return 1;
+                       ret = 1;
+                       goto out_unlock;
                }
+       } else {
+               base = lock_timer_base(timer, &flags);
        }
 
        timer_stats_timer_set_start_info(timer);
-       BUG_ON(!timer->function);
-
-       base = lock_timer_base(timer, &flags);
 
        ret = detach_if_pending(timer, base, false);
        if (!ret && pending_only)
@@ -1025,12 +1031,16 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
                }
        }
 
+       /* Try to forward a stale timer base clock */
+       forward_timer_base(base);
+
        timer->expires = expires;
        /*
         * If 'idx' was calculated above and the base time did not advance
-        * between calculating 'idx' and taking the lock, only enqueue_timer()
-        * and trigger_dyntick_cpu() is required. Otherwise we need to
-        * (re)calculate the wheel index via internal_add_timer().
+        * between calculating 'idx' and possibly switching the base, only
+        * enqueue_timer() and trigger_dyntick_cpu() is required. Otherwise
+        * we need to (re)calculate the wheel index via
+        * internal_add_timer().
         */
        if (idx != UINT_MAX && clk == base->clk) {
                enqueue_timer(base, timer, idx);
@@ -1510,12 +1520,16 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
        is_max_delta = (nextevt == base->clk + NEXT_TIMER_MAX_DELTA);
        base->next_expiry = nextevt;
        /*
-        * We have a fresh next event. Check whether we can forward the base:
+        * We have a fresh next event. Check whether we can forward the
+        * base. We can only do that when @basej is past base->clk
+        * otherwise we might rewind base->clk.
         */
-       if (time_after(nextevt, jiffies))
-               base->clk = jiffies;
-       else if (time_after(nextevt, base->clk))
-               base->clk = nextevt;
+       if (time_after(basej, base->clk)) {
+               if (time_after(nextevt, basej))
+                       base->clk = basej;
+               else if (time_after(nextevt, base->clk))
+                       base->clk = nextevt;
+       }
 
        if (time_before_eq(nextevt, basej)) {
                expires = basem;
@@ -1633,7 +1647,7 @@ static inline void __run_timers(struct timer_base *base)
 /*
  * This function runs timers and the timer-tq in bottom half context.
  */
-static void run_timer_softirq(struct softirq_action *h)
+static __latent_entropy void run_timer_softirq(struct softirq_action *h)
 {
        struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
 
index 2050a7652a86afa140ac94c3eb4de604aea80449..da87b3cba5b39aabb264dcf046b500c12259b0a0 100644 (file)
@@ -1862,6 +1862,10 @@ static int __ftrace_hash_update_ipmodify(struct ftrace_ops *ops,
 
        /* Update rec->flags */
        do_for_each_ftrace_rec(pg, rec) {
+
+               if (rec->flags & FTRACE_FL_DISABLED)
+                       continue;
+
                /* We need to update only differences of filter_hash */
                in_old = !!ftrace_lookup_ip(old_hash, rec->ip);
                in_new = !!ftrace_lookup_ip(new_hash, rec->ip);
@@ -1884,6 +1888,10 @@ rollback:
 
        /* Roll back what we did above */
        do_for_each_ftrace_rec(pg, rec) {
+
+               if (rec->flags & FTRACE_FL_DISABLED)
+                       continue;
+
                if (rec == end)
                        goto err_out;
 
@@ -2397,6 +2405,10 @@ void __weak ftrace_replace_code(int enable)
                return;
 
        do_for_each_ftrace_rec(pg, rec) {
+
+               if (rec->flags & FTRACE_FL_DISABLED)
+                       continue;
+
                failed = __ftrace_replace_code(rec, enable);
                if (failed) {
                        ftrace_bug(failed, rec);
@@ -2763,7 +2775,7 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command)
                struct dyn_ftrace *rec;
 
                do_for_each_ftrace_rec(pg, rec) {
-                       if (FTRACE_WARN_ON_ONCE(rec->flags))
+                       if (FTRACE_WARN_ON_ONCE(rec->flags & ~FTRACE_FL_DISABLED))
                                pr_warn("  %pS flags:%lx\n",
                                        (void *)rec->ip, rec->flags);
                } while_for_each_ftrace_rec();
@@ -3598,6 +3610,10 @@ match_records(struct ftrace_hash *hash, char *func, int len, char *mod)
                goto out_unlock;
 
        do_for_each_ftrace_rec(pg, rec) {
+
+               if (rec->flags & FTRACE_FL_DISABLED)
+                       continue;
+
                if (ftrace_match_record(rec, &func_g, mod_match, exclude_mod)) {
                        ret = enter_record(hash, rec, clear_filter);
                        if (ret < 0) {
@@ -3793,6 +3809,9 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
 
        do_for_each_ftrace_rec(pg, rec) {
 
+               if (rec->flags & FTRACE_FL_DISABLED)
+                       continue;
+
                if (!ftrace_match_record(rec, &func_g, NULL, 0))
                        continue;
 
@@ -4685,6 +4704,9 @@ ftrace_set_func(unsigned long *array, int *idx, int size, char *buffer)
 
        do_for_each_ftrace_rec(pg, rec) {
 
+               if (rec->flags & FTRACE_FL_DISABLED)
+                       continue;
+
                if (ftrace_match_record(rec, &func_g, NULL, 0)) {
                        /* if it is in the array */
                        exists = false;
index 39d07e754822b94a259554c1e66e2709ecac6631..a6c8db1d62f65ffb2ec6a53011ce0b675f9fb7d4 100644 (file)
@@ -198,6 +198,7 @@ config FRAME_WARN
        int "Warn for stack frames larger than (needs gcc 4.4)"
        range 0 8192
        default 0 if KASAN
+       default 2048 if GCC_PLUGIN_LATENT_ENTROPY
        default 1024 if !64BIT
        default 2048 if 64BIT
        help
@@ -1084,6 +1085,9 @@ config PROVE_LOCKING
 
         For more details, see Documentation/locking/lockdep-design.txt.
 
+config PROVE_LOCKING_SMALL
+       bool
+
 config LOCKDEP
        bool
        depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
@@ -1857,15 +1861,6 @@ config PROVIDE_OHCI1394_DMA_INIT
 
          See Documentation/debugging-via-ohci1394.txt for more information.
 
-config BUILD_DOCSRC
-       bool "Build targets in Documentation/ tree"
-       depends on HEADERS_CHECK
-       help
-         This option attempts to build objects from the source files in the
-         kernel Documentation/ tree.
-
-         Say N if you are unsure.
-
 config DMA_API_DEBUG
        bool "Enable debugging of DMA-API usage"
        depends on HAVE_DMA_API_DEBUG
index 0a1139644d328a92ae346ee6fa723d7e18085c75..144fe6b1a03ea536893f6e35d153a895a238b67d 100644 (file)
@@ -292,7 +292,7 @@ unsigned long gen_pool_alloc_algo(struct gen_pool *pool, size_t size,
        struct gen_pool_chunk *chunk;
        unsigned long addr = 0;
        int order = pool->min_alloc_order;
-       int nbits, start_bit = 0, end_bit, remain;
+       int nbits, start_bit, end_bit, remain;
 
 #ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
        BUG_ON(in_nmi());
@@ -307,6 +307,7 @@ unsigned long gen_pool_alloc_algo(struct gen_pool *pool, size_t size,
                if (size > atomic_read(&chunk->avail))
                        continue;
 
+               start_bit = 0;
                end_bit = chunk_size(chunk) >> order;
 retry:
                start_bit = algo(chunk->bits, end_bit, start_bit,
index 7312e7784611d7c56b6c154a2c2bb9975142792b..f2bd21b93dfca464ae24bb18c6492bd8c7f1eb88 100644 (file)
@@ -683,10 +683,11 @@ static void pipe_advance(struct iov_iter *i, size_t size)
        struct pipe_inode_info *pipe = i->pipe;
        struct pipe_buffer *buf;
        int idx = i->idx;
-       size_t off = i->iov_offset;
+       size_t off = i->iov_offset, orig_sz;
        
        if (unlikely(i->count < size))
                size = i->count;
+       orig_sz = size;
 
        if (size) {
                if (off) /* make it relative to the beginning of buffer */
@@ -713,6 +714,7 @@ static void pipe_advance(struct iov_iter *i, size_t size)
                        pipe->nrbufs--;
                }
        }
+       i->count -= orig_sz;
 }
 
 void iov_iter_advance(struct iov_iter *i, size_t size)
@@ -1139,6 +1141,28 @@ const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
 }
 EXPORT_SYMBOL(dup_iter);
 
+/**
+ * import_iovec() - Copy an array of &struct iovec from userspace
+ *     into the kernel, check that it is valid, and initialize a new
+ *     &struct iov_iter iterator to access it.
+ *
+ * @type: One of %READ or %WRITE.
+ * @uvector: Pointer to the userspace array.
+ * @nr_segs: Number of elements in userspace array.
+ * @fast_segs: Number of elements in @iov.
+ * @iov: (input and output parameter) Pointer to pointer to (usually small
+ *     on-stack) kernel array.
+ * @i: Pointer to iterator that will be initialized on success.
+ *
+ * If the array pointed to by *@iov is large enough to hold all @nr_segs,
+ * then this function places %NULL in *@iov on return. Otherwise, a new
+ * array will be allocated and the result placed in *@iov. This means that
+ * the caller may call kfree() on *@iov regardless of whether the small
+ * on-stack array was used or not (and regardless of whether this function
+ * returns an error or not).
+ *
+ * Return: 0 on success or negative error code on error.
+ */
 int import_iovec(int type, const struct iovec __user * uvector,
                 unsigned nr_segs, unsigned fast_segs,
                 struct iovec **iov, struct iov_iter *i)
index 2be55692aa43fc5f9d4f9fbe4d337e38fe359b45..1d6565e810309eb710523a814983849d76101c6b 100644 (file)
@@ -74,7 +74,7 @@ void irq_poll_complete(struct irq_poll *iop)
 }
 EXPORT_SYMBOL(irq_poll_complete);
 
-static void irq_poll_softirq(struct softirq_action *h)
+static void __latent_entropy irq_poll_softirq(struct softirq_action *h)
 {
        struct list_head *list = this_cpu_ptr(&blk_cpu_iopoll);
        int rearm = 0, budget = irq_poll_budget;
index 5464c8744ea95647beeef44efc3722161f5b224b..e24388a863a76ff47afc2a7fc3680bc6ab6e8f4e 100644 (file)
@@ -64,8 +64,13 @@ int mpi_powm(MPI res, MPI base, MPI exp, MPI mod)
        if (!esize) {
                /* Exponent is zero, result is 1 mod MOD, i.e., 1 or 0
                 * depending on if MOD equals 1.  */
-               rp[0] = 1;
                res->nlimbs = (msize == 1 && mod->d[0] == 1) ? 0 : 1;
+               if (res->nlimbs) {
+                       if (mpi_resize(res, 1) < 0)
+                               goto enomem;
+                       rp = res->d;
+                       rp[0] = 1;
+               }
                res->sign = 0;
                goto leave;
        }
index fce1e9afc6d97a3a27787d44b48590a5850138aa..b42b8577fc2346045c4d88ede93960bf1cd8f3e7 100644 (file)
@@ -14,7 +14,7 @@
 #include <linux/types.h>
 #include <net/netlink.h>
 
-static const u16 nla_attr_minlen[NLA_TYPE_MAX+1] = {
+static const u8 nla_attr_minlen[NLA_TYPE_MAX+1] = {
        [NLA_U8]        = sizeof(u8),
        [NLA_U16]       = sizeof(u16),
        [NLA_U32]       = sizeof(u32),
index 27fe74948882e9558ad8cc72dfa6da1b7051dce7..9ac959ef4cae972374540f34895465507e656d7b 100644 (file)
@@ -33,6 +33,7 @@
 
 #define PERCPU_COUNT_BIAS      (1LU << (BITS_PER_LONG - 1))
 
+static DEFINE_SPINLOCK(percpu_ref_switch_lock);
 static DECLARE_WAIT_QUEUE_HEAD(percpu_ref_switch_waitq);
 
 static unsigned long __percpu *percpu_count_ptr(struct percpu_ref *ref)
@@ -82,6 +83,7 @@ int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release,
        atomic_long_set(&ref->count, start_count);
 
        ref->release = release;
+       ref->confirm_switch = NULL;
        return 0;
 }
 EXPORT_SYMBOL_GPL(percpu_ref_init);
@@ -101,6 +103,8 @@ void percpu_ref_exit(struct percpu_ref *ref)
        unsigned long __percpu *percpu_count = percpu_count_ptr(ref);
 
        if (percpu_count) {
+               /* non-NULL confirm_switch indicates switching in progress */
+               WARN_ON_ONCE(ref->confirm_switch);
                free_percpu(percpu_count);
                ref->percpu_count_ptr = __PERCPU_REF_ATOMIC_DEAD;
        }
@@ -161,66 +165,23 @@ static void percpu_ref_noop_confirm_switch(struct percpu_ref *ref)
 static void __percpu_ref_switch_to_atomic(struct percpu_ref *ref,
                                          percpu_ref_func_t *confirm_switch)
 {
-       if (!(ref->percpu_count_ptr & __PERCPU_REF_ATOMIC)) {
-               /* switching from percpu to atomic */
-               ref->percpu_count_ptr |= __PERCPU_REF_ATOMIC;
-
-               /*
-                * Non-NULL ->confirm_switch is used to indicate that
-                * switching is in progress.  Use noop one if unspecified.
-                */
-               WARN_ON_ONCE(ref->confirm_switch);
-               ref->confirm_switch =
-                       confirm_switch ?: percpu_ref_noop_confirm_switch;
-
-               percpu_ref_get(ref);    /* put after confirmation */
-               call_rcu_sched(&ref->rcu, percpu_ref_switch_to_atomic_rcu);
-       } else if (confirm_switch) {
-               /*
-                * Somebody already set ATOMIC.  Switching may still be in
-                * progress.  @confirm_switch must be invoked after the
-                * switching is complete and a full sched RCU grace period
-                * has passed.  Wait synchronously for the previous
-                * switching and schedule @confirm_switch invocation.
-                */
-               wait_event(percpu_ref_switch_waitq, !ref->confirm_switch);
-               ref->confirm_switch = confirm_switch;
-
-               percpu_ref_get(ref);    /* put after confirmation */
-               call_rcu_sched(&ref->rcu, percpu_ref_call_confirm_rcu);
+       if (ref->percpu_count_ptr & __PERCPU_REF_ATOMIC) {
+               if (confirm_switch)
+                       confirm_switch(ref);
+               return;
        }
-}
 
-/**
- * percpu_ref_switch_to_atomic - switch a percpu_ref to atomic mode
- * @ref: percpu_ref to switch to atomic mode
- * @confirm_switch: optional confirmation callback
- *
- * There's no reason to use this function for the usual reference counting.
- * Use percpu_ref_kill[_and_confirm]().
- *
- * Schedule switching of @ref to atomic mode.  All its percpu counts will
- * be collected to the main atomic counter.  On completion, when all CPUs
- * are guaraneed to be in atomic mode, @confirm_switch, which may not
- * block, is invoked.  This function may be invoked concurrently with all
- * the get/put operations and can safely be mixed with kill and reinit
- * operations.  Note that @ref will stay in atomic mode across kill/reinit
- * cycles until percpu_ref_switch_to_percpu() is called.
- *
- * This function normally doesn't block and can be called from any context
- * but it may block if @confirm_kill is specified and @ref is already in
- * the process of switching to atomic mode.  In such cases, @confirm_switch
- * will be invoked after the switching is complete.
- *
- * Due to the way percpu_ref is implemented, @confirm_switch will be called
- * after at least one full sched RCU grace period has passed but this is an
- * implementation detail and must not be depended upon.
- */
-void percpu_ref_switch_to_atomic(struct percpu_ref *ref,
-                                percpu_ref_func_t *confirm_switch)
-{
-       ref->force_atomic = true;
-       __percpu_ref_switch_to_atomic(ref, confirm_switch);
+       /* switching from percpu to atomic */
+       ref->percpu_count_ptr |= __PERCPU_REF_ATOMIC;
+
+       /*
+        * Non-NULL ->confirm_switch is used to indicate that switching is
+        * in progress.  Use noop one if unspecified.
+        */
+       ref->confirm_switch = confirm_switch ?: percpu_ref_noop_confirm_switch;
+
+       percpu_ref_get(ref);    /* put after confirmation */
+       call_rcu_sched(&ref->rcu, percpu_ref_switch_to_atomic_rcu);
 }
 
 static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref)
@@ -233,8 +194,6 @@ static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref)
        if (!(ref->percpu_count_ptr & __PERCPU_REF_ATOMIC))
                return;
 
-       wait_event(percpu_ref_switch_waitq, !ref->confirm_switch);
-
        atomic_long_add(PERCPU_COUNT_BIAS, &ref->count);
 
        /*
@@ -250,6 +209,58 @@ static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref)
                          ref->percpu_count_ptr & ~__PERCPU_REF_ATOMIC);
 }
 
+static void __percpu_ref_switch_mode(struct percpu_ref *ref,
+                                    percpu_ref_func_t *confirm_switch)
+{
+       lockdep_assert_held(&percpu_ref_switch_lock);
+
+       /*
+        * If the previous ATOMIC switching hasn't finished yet, wait for
+        * its completion.  If the caller ensures that ATOMIC switching
+        * isn't in progress, this function can be called from any context.
+        */
+       wait_event_lock_irq(percpu_ref_switch_waitq, !ref->confirm_switch,
+                           percpu_ref_switch_lock);
+
+       if (ref->force_atomic || (ref->percpu_count_ptr & __PERCPU_REF_DEAD))
+               __percpu_ref_switch_to_atomic(ref, confirm_switch);
+       else
+               __percpu_ref_switch_to_percpu(ref);
+}
+
+/**
+ * percpu_ref_switch_to_atomic - switch a percpu_ref to atomic mode
+ * @ref: percpu_ref to switch to atomic mode
+ * @confirm_switch: optional confirmation callback
+ *
+ * There's no reason to use this function for the usual reference counting.
+ * Use percpu_ref_kill[_and_confirm]().
+ *
+ * Schedule switching of @ref to atomic mode.  All its percpu counts will
+ * be collected to the main atomic counter.  On completion, when all CPUs
+ * are guaraneed to be in atomic mode, @confirm_switch, which may not
+ * block, is invoked.  This function may be invoked concurrently with all
+ * the get/put operations and can safely be mixed with kill and reinit
+ * operations.  Note that @ref will stay in atomic mode across kill/reinit
+ * cycles until percpu_ref_switch_to_percpu() is called.
+ *
+ * This function may block if @ref is in the process of switching to atomic
+ * mode.  If the caller ensures that @ref is not in the process of
+ * switching to atomic mode, this function can be called from any context.
+ */
+void percpu_ref_switch_to_atomic(struct percpu_ref *ref,
+                                percpu_ref_func_t *confirm_switch)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&percpu_ref_switch_lock, flags);
+
+       ref->force_atomic = true;
+       __percpu_ref_switch_mode(ref, confirm_switch);
+
+       spin_unlock_irqrestore(&percpu_ref_switch_lock, flags);
+}
+
 /**
  * percpu_ref_switch_to_percpu - switch a percpu_ref to percpu mode
  * @ref: percpu_ref to switch to percpu mode
@@ -264,17 +275,20 @@ static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref)
  * dying or dead, the actual switching takes place on the following
  * percpu_ref_reinit().
  *
- * This function normally doesn't block and can be called from any context
- * but it may block if @ref is in the process of switching to atomic mode
- * by percpu_ref_switch_atomic().
+ * This function may block if @ref is in the process of switching to atomic
+ * mode.  If the caller ensures that @ref is not in the process of
+ * switching to atomic mode, this function can be called from any context.
  */
 void percpu_ref_switch_to_percpu(struct percpu_ref *ref)
 {
+       unsigned long flags;
+
+       spin_lock_irqsave(&percpu_ref_switch_lock, flags);
+
        ref->force_atomic = false;
+       __percpu_ref_switch_mode(ref, NULL);
 
-       /* a dying or dead ref can't be switched to percpu mode w/o reinit */
-       if (!(ref->percpu_count_ptr & __PERCPU_REF_DEAD))
-               __percpu_ref_switch_to_percpu(ref);
+       spin_unlock_irqrestore(&percpu_ref_switch_lock, flags);
 }
 
 /**
@@ -290,21 +304,23 @@ void percpu_ref_switch_to_percpu(struct percpu_ref *ref)
  *
  * This function normally doesn't block and can be called from any context
  * but it may block if @confirm_kill is specified and @ref is in the
- * process of switching to atomic mode by percpu_ref_switch_atomic().
- *
- * Due to the way percpu_ref is implemented, @confirm_switch will be called
- * after at least one full sched RCU grace period has passed but this is an
- * implementation detail and must not be depended upon.
+ * process of switching to atomic mode by percpu_ref_switch_to_atomic().
  */
 void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
                                 percpu_ref_func_t *confirm_kill)
 {
+       unsigned long flags;
+
+       spin_lock_irqsave(&percpu_ref_switch_lock, flags);
+
        WARN_ONCE(ref->percpu_count_ptr & __PERCPU_REF_DEAD,
                  "%s called more than once on %pf!", __func__, ref->release);
 
        ref->percpu_count_ptr |= __PERCPU_REF_DEAD;
-       __percpu_ref_switch_to_atomic(ref, confirm_kill);
+       __percpu_ref_switch_mode(ref, confirm_kill);
        percpu_ref_put(ref);
+
+       spin_unlock_irqrestore(&percpu_ref_switch_lock, flags);
 }
 EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm);
 
@@ -321,11 +337,16 @@ EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm);
  */
 void percpu_ref_reinit(struct percpu_ref *ref)
 {
+       unsigned long flags;
+
+       spin_lock_irqsave(&percpu_ref_switch_lock, flags);
+
        WARN_ON_ONCE(!percpu_ref_is_zero(ref));
 
        ref->percpu_count_ptr &= ~__PERCPU_REF_DEAD;
        percpu_ref_get(ref);
-       if (!ref->force_atomic)
-               __percpu_ref_switch_to_percpu(ref);
+       __percpu_ref_switch_mode(ref, NULL);
+
+       spin_unlock_irqrestore(&percpu_ref_switch_lock, flags);
 }
 EXPORT_SYMBOL_GPL(percpu_ref_reinit);
index 915982b304bbb16a9d7924b3c1957857189ab6ba..fa594b1140e648899410d15920ea5705d7202661 100644 (file)
@@ -47,7 +47,7 @@ static inline void prandom_state_selftest(void)
 }
 #endif
 
-static DEFINE_PER_CPU(struct rnd_state, net_rand_state);
+static DEFINE_PER_CPU(struct rnd_state, net_rand_state) __latent_entropy;
 
 /**
  *     prandom_u32_state - seeded pseudo-random number generator.
index 60f77f1d470a0589ccdeacbb52a9eb45a8a1cec1..f87d138e96724a43d219231bb98d6b1a863a0f0a 100644 (file)
@@ -50,7 +50,7 @@
                                        STACK_ALLOC_ALIGN)
 #define STACK_ALLOC_INDEX_BITS (DEPOT_STACK_BITS - \
                STACK_ALLOC_NULL_PROTECTION_BITS - STACK_ALLOC_OFFSET_BITS)
-#define STACK_ALLOC_SLABS_CAP 1024
+#define STACK_ALLOC_SLABS_CAP 8192
 #define STACK_ALLOC_MAX_SLABS \
        (((1LL << (STACK_ALLOC_INDEX_BITS)) < STACK_ALLOC_SLABS_CAP) ? \
         (1LL << (STACK_ALLOC_INDEX_BITS)) : STACK_ALLOC_SLABS_CAP)
@@ -192,6 +192,7 @@ void depot_fetch_stack(depot_stack_handle_t handle, struct stack_trace *trace)
        trace->entries = stack->entries;
        trace->skip = 0;
 }
+EXPORT_SYMBOL_GPL(depot_fetch_stack);
 
 /**
  * depot_save_stack - save stack in a stack depot.
@@ -283,3 +284,4 @@ exit:
 fast_exit:
        return retval;
 }
+EXPORT_SYMBOL_GPL(depot_save_stack);
index 94346b4d8984c5cfa88743be66b1880b929e4a60..0362da0b66c352e4cb3eb96748fe2db4955d6b11 100644 (file)
@@ -4831,7 +4831,7 @@ static struct bpf_test tests[] = {
                { },
                INTERNAL,
                { 0x34 },
-               { { 1, 0xbef } },
+               { { ETH_HLEN, 0xbef } },
                .fill_helper = bpf_fill_ld_abs_vlan_push_pop,
        },
        /*
index be0ee11fa0d9ee8ff068244a559a2c52ad96c84c..86e3e0e74d20e78d173c1fa6dc4096f81695634e 100644 (file)
@@ -187,7 +187,7 @@ config MEMORY_HOTPLUG
        bool "Allow for memory hot-add"
        depends on SPARSEMEM || X86_64_ACPI_NUMA
        depends on ARCH_ENABLE_MEMORY_HOTPLUG
-       depends on !KASAN
+       depends on COMPILE_TEST || !KASAN
 
 config MEMORY_HOTPLUG_SPARSE
        def_bool y
index 384c2cb51b56bf75ab2c132d0087e3757a71c276..c960459eda7e640ea55be1d4ed80c6a9125a8877 100644 (file)
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -385,6 +385,9 @@ struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align)
        bitmap_maxno = cma_bitmap_maxno(cma);
        bitmap_count = cma_bitmap_pages_to_bits(cma, count);
 
+       if (bitmap_count > bitmap_maxno)
+               return NULL;
+
        for (;;) {
                mutex_lock(&cma->lock);
                bitmap_no = bitmap_find_next_zero_area_off(cma->bitmap,
index 849f459ad0780e27bc256ff13fd52fa8c9007661..50b52fe51937ca70e62a33ab1553aef9b77ad1a0 100644 (file)
@@ -790,9 +790,7 @@ EXPORT_SYMBOL(__page_cache_alloc);
  */
 wait_queue_head_t *page_waitqueue(struct page *page)
 {
-       const struct zone *zone = page_zone(page);
-
-       return &zone->wait_table[hash_ptr(page, zone->wait_table_bits)];
+       return bit_waitqueue(page, 0);
 }
 EXPORT_SYMBOL(page_waitqueue);
 
@@ -1734,6 +1732,9 @@ find_page:
                        if (inode->i_blkbits == PAGE_SHIFT ||
                                        !mapping->a_ops->is_partially_uptodate)
                                goto page_not_up_to_date;
+                       /* pipes can't handle partially uptodate pages */
+                       if (unlikely(iter->type & ITER_PIPE))
+                               goto page_not_up_to_date;
                        if (!trylock_page(page))
                                goto page_not_up_to_date;
                        /* Did it get truncated before we got the lock? */
index 381bb07ed14f2271e487fb98d0e95f6ff5470750..db77dcb38afda3d3720a228e14236fb2e324f929 100644 (file)
  * get_vaddr_frames() - map virtual addresses to pfns
  * @start:     starting user address
  * @nr_frames: number of pages / pfns from start to map
- * @write:     whether pages will be written to by the caller
- * @force:     whether to force write access even if user mapping is
- *             readonly. See description of the same argument of
-               get_user_pages().
+ * @gup_flags: flags modifying lookup behaviour
  * @vec:       structure which receives pages / pfns of the addresses mapped.
  *             It should have space for at least nr_frames entries.
  *
@@ -34,7 +31,7 @@
  * This function takes care of grabbing mmap_sem as necessary.
  */
 int get_vaddr_frames(unsigned long start, unsigned int nr_frames,
-                    bool write, bool force, struct frame_vector *vec)
+                    unsigned int gup_flags, struct frame_vector *vec)
 {
        struct mm_struct *mm = current->mm;
        struct vm_area_struct *vma;
@@ -59,7 +56,7 @@ int get_vaddr_frames(unsigned long start, unsigned int nr_frames,
                vec->got_ref = true;
                vec->is_pfns = false;
                ret = get_user_pages_locked(start, nr_frames,
-                       write, force, (struct page **)(vec->ptrs), &locked);
+                       gup_flags, (struct page **)(vec->ptrs), &locked);
                goto out;
        }
 
index 96b2b2fd0fbd13f0b7385e7adc3359c6c7793503..ec4f82704b6f368bf4e128d3feb7356a8c482022 100644 (file)
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -60,6 +60,16 @@ static int follow_pfn_pte(struct vm_area_struct *vma, unsigned long address,
        return -EEXIST;
 }
 
+/*
+ * FOLL_FORCE can write to even unwritable pte's, but only
+ * after we've gone through a COW cycle and they are dirty.
+ */
+static inline bool can_follow_write_pte(pte_t pte, unsigned int flags)
+{
+       return pte_write(pte) ||
+               ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pte_dirty(pte));
+}
+
 static struct page *follow_page_pte(struct vm_area_struct *vma,
                unsigned long address, pmd_t *pmd, unsigned int flags)
 {
@@ -95,7 +105,7 @@ retry:
        }
        if ((flags & FOLL_NUMA) && pte_protnone(pte))
                goto no_page;
-       if ((flags & FOLL_WRITE) && !pte_write(pte)) {
+       if ((flags & FOLL_WRITE) && !can_follow_write_pte(pte, flags)) {
                pte_unmap_unlock(ptep, ptl);
                return NULL;
        }
@@ -412,7 +422,7 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
         * reCOWed by userspace write).
         */
        if ((ret & VM_FAULT_WRITE) && !(vma->vm_flags & VM_WRITE))
-               *flags &= ~FOLL_WRITE;
+               *flags |= FOLL_COW;
        return 0;
 }
 
@@ -516,7 +526,7 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
  * instead of __get_user_pages. __get_user_pages should be used only if
  * you need some special @gup_flags.
  */
-long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
+static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                unsigned long start, unsigned long nr_pages,
                unsigned int gup_flags, struct page **pages,
                struct vm_area_struct **vmas, int *nonblocking)
@@ -621,7 +631,6 @@ next_page:
        } while (nr_pages);
        return i;
 }
-EXPORT_SYMBOL(__get_user_pages);
 
 bool vma_permits_fault(struct vm_area_struct *vma, unsigned int fault_flags)
 {
@@ -729,7 +738,6 @@ static __always_inline long __get_user_pages_locked(struct task_struct *tsk,
                                                struct mm_struct *mm,
                                                unsigned long start,
                                                unsigned long nr_pages,
-                                               int write, int force,
                                                struct page **pages,
                                                struct vm_area_struct **vmas,
                                                int *locked, bool notify_drop,
@@ -747,10 +755,6 @@ static __always_inline long __get_user_pages_locked(struct task_struct *tsk,
 
        if (pages)
                flags |= FOLL_GET;
-       if (write)
-               flags |= FOLL_WRITE;
-       if (force)
-               flags |= FOLL_FORCE;
 
        pages_done = 0;
        lock_dropped = false;
@@ -843,12 +847,12 @@ static __always_inline long __get_user_pages_locked(struct task_struct *tsk,
  *          up_read(&mm->mmap_sem);
  */
 long get_user_pages_locked(unsigned long start, unsigned long nr_pages,
-                          int write, int force, struct page **pages,
+                          unsigned int gup_flags, struct page **pages,
                           int *locked)
 {
        return __get_user_pages_locked(current, current->mm, start, nr_pages,
-                                      write, force, pages, NULL, locked, true,
-                                      FOLL_TOUCH);
+                                      pages, NULL, locked, true,
+                                      gup_flags | FOLL_TOUCH);
 }
 EXPORT_SYMBOL(get_user_pages_locked);
 
@@ -864,14 +868,14 @@ EXPORT_SYMBOL(get_user_pages_locked);
  */
 __always_inline long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm,
                                               unsigned long start, unsigned long nr_pages,
-                                              int write, int force, struct page **pages,
-                                              unsigned int gup_flags)
+                                              struct page **pages, unsigned int gup_flags)
 {
        long ret;
        int locked = 1;
+
        down_read(&mm->mmap_sem);
-       ret = __get_user_pages_locked(tsk, mm, start, nr_pages, write, force,
-                                     pages, NULL, &locked, false, gup_flags);
+       ret = __get_user_pages_locked(tsk, mm, start, nr_pages, pages, NULL,
+                                     &locked, false, gup_flags);
        if (locked)
                up_read(&mm->mmap_sem);
        return ret;
@@ -896,10 +900,10 @@ EXPORT_SYMBOL(__get_user_pages_unlocked);
  * "force" parameter).
  */
 long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
-                            int write, int force, struct page **pages)
+                            struct page **pages, unsigned int gup_flags)
 {
        return __get_user_pages_unlocked(current, current->mm, start, nr_pages,
-                                        write, force, pages, FOLL_TOUCH);
+                                        pages, gup_flags | FOLL_TOUCH);
 }
 EXPORT_SYMBOL(get_user_pages_unlocked);
 
@@ -910,9 +914,7 @@ EXPORT_SYMBOL(get_user_pages_unlocked);
  * @mm:                mm_struct of target mm
  * @start:     starting user address
  * @nr_pages:  number of pages from start to pin
- * @write:     whether pages will be written to by the caller
- * @force:     whether to force access even when user mapping is currently
- *             protected (but never forces write access to shared mapping).
+ * @gup_flags: flags modifying lookup behaviour
  * @pages:     array that receives pointers to the pages pinned.
  *             Should be at least nr_pages long. Or NULL, if caller
  *             only intends to ensure the pages are faulted in.
@@ -941,9 +943,9 @@ EXPORT_SYMBOL(get_user_pages_unlocked);
  * or similar operation cannot guarantee anything stronger anyway because
  * locks can't be held over the syscall boundary.
  *
- * If write=0, the page must not be written to. If the page is written to,
- * set_page_dirty (or set_page_dirty_lock, as appropriate) must be called
- * after the page is finished with, and before put_page is called.
+ * If gup_flags & FOLL_WRITE == 0, the page must not be written to. If the page
+ * is written to, set_page_dirty (or set_page_dirty_lock, as appropriate) must
+ * be called after the page is finished with, and before put_page is called.
  *
  * get_user_pages is typically used for fewer-copy IO operations, to get a
  * handle on the memory by some means other than accesses via the user virtual
@@ -960,12 +962,12 @@ EXPORT_SYMBOL(get_user_pages_unlocked);
  */
 long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm,
                unsigned long start, unsigned long nr_pages,
-               int write, int force, struct page **pages,
+               unsigned int gup_flags, struct page **pages,
                struct vm_area_struct **vmas)
 {
-       return __get_user_pages_locked(tsk, mm, start, nr_pages, write, force,
-                                      pages, vmas, NULL, false,
-                                      FOLL_TOUCH | FOLL_REMOTE);
+       return __get_user_pages_locked(tsk, mm, start, nr_pages, pages, vmas,
+                                      NULL, false,
+                                      gup_flags | FOLL_TOUCH | FOLL_REMOTE);
 }
 EXPORT_SYMBOL(get_user_pages_remote);
 
@@ -976,12 +978,12 @@ EXPORT_SYMBOL(get_user_pages_remote);
  * obviously don't pass FOLL_REMOTE in here.
  */
 long get_user_pages(unsigned long start, unsigned long nr_pages,
-               int write, int force, struct page **pages,
+               unsigned int gup_flags, struct page **pages,
                struct vm_area_struct **vmas)
 {
        return __get_user_pages_locked(current, current->mm, start, nr_pages,
-                                      write, force, pages, vmas, NULL, false,
-                                      FOLL_TOUCH);
+                                      pages, vmas, NULL, false,
+                                      gup_flags | FOLL_TOUCH);
 }
 EXPORT_SYMBOL(get_user_pages);
 
@@ -1505,7 +1507,8 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
                start += nr << PAGE_SHIFT;
                pages += nr;
 
-               ret = get_user_pages_unlocked(start, nr_pages - nr, write, 0, pages);
+               ret = get_user_pages_unlocked(start, nr_pages - nr, pages,
+                               write ? FOLL_WRITE : 0);
 
                /* Have to be a bit careful with return values */
                if (nr > 0) {
index cdcd25cb30fea3f2ad2c660e547d014b7378b3dd..eff3de359d50a30588abf70676dc6993c751471d 100644 (file)
@@ -1426,11 +1426,12 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
 
 bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
                  unsigned long new_addr, unsigned long old_end,
-                 pmd_t *old_pmd, pmd_t *new_pmd)
+                 pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
 {
        spinlock_t *old_ptl, *new_ptl;
        pmd_t pmd;
        struct mm_struct *mm = vma->vm_mm;
+       bool force_flush = false;
 
        if ((old_addr & ~HPAGE_PMD_MASK) ||
            (new_addr & ~HPAGE_PMD_MASK) ||
@@ -1455,6 +1456,8 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
                new_ptl = pmd_lockptr(mm, new_pmd);
                if (new_ptl != old_ptl)
                        spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
+               if (pmd_present(*old_pmd) && pmd_dirty(*old_pmd))
+                       force_flush = true;
                pmd = pmdp_huge_get_and_clear(mm, old_addr, old_pmd);
                VM_BUG_ON(!pmd_none(*new_pmd));
 
@@ -1467,6 +1470,10 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
                set_pmd_at(mm, new_addr, new_pmd, pmd_mksoft_dirty(pmd));
                if (new_ptl != old_ptl)
                        spin_unlock(new_ptl);
+               if (force_flush)
+                       flush_tlb_range(vma, old_addr, old_addr + PMD_SIZE);
+               else
+                       *need_flush = true;
                spin_unlock(old_ptl);
                return true;
        }
index ec49d9ef1eefd0155f099d813ed547c16172d69e..418bf01a50ed1f9dde0ca6c083aafa86c1869f23 100644 (file)
@@ -1826,11 +1826,17 @@ static void return_unused_surplus_pages(struct hstate *h,
  * is not the case is if a reserve map was changed between calls.  It
  * is the responsibility of the caller to notice the difference and
  * take appropriate action.
+ *
+ * vma_add_reservation is used in error paths where a reservation must
+ * be restored when a newly allocated huge page must be freed.  It is
+ * to be called after calling vma_needs_reservation to determine if a
+ * reservation exists.
  */
 enum vma_resv_mode {
        VMA_NEEDS_RESV,
        VMA_COMMIT_RESV,
        VMA_END_RESV,
+       VMA_ADD_RESV,
 };
 static long __vma_reservation_common(struct hstate *h,
                                struct vm_area_struct *vma, unsigned long addr,
@@ -1856,6 +1862,14 @@ static long __vma_reservation_common(struct hstate *h,
                region_abort(resv, idx, idx + 1);
                ret = 0;
                break;
+       case VMA_ADD_RESV:
+               if (vma->vm_flags & VM_MAYSHARE)
+                       ret = region_add(resv, idx, idx + 1);
+               else {
+                       region_abort(resv, idx, idx + 1);
+                       ret = region_del(resv, idx, idx + 1);
+               }
+               break;
        default:
                BUG();
        }
@@ -1903,6 +1917,56 @@ static void vma_end_reservation(struct hstate *h,
        (void)__vma_reservation_common(h, vma, addr, VMA_END_RESV);
 }
 
+static long vma_add_reservation(struct hstate *h,
+                       struct vm_area_struct *vma, unsigned long addr)
+{
+       return __vma_reservation_common(h, vma, addr, VMA_ADD_RESV);
+}
+
+/*
+ * This routine is called to restore a reservation on error paths.  In the
+ * specific error paths, a huge page was allocated (via alloc_huge_page)
+ * and is about to be freed.  If a reservation for the page existed,
+ * alloc_huge_page would have consumed the reservation and set PagePrivate
+ * in the newly allocated page.  When the page is freed via free_huge_page,
+ * the global reservation count will be incremented if PagePrivate is set.
+ * However, free_huge_page can not adjust the reserve map.  Adjust the
+ * reserve map here to be consistent with global reserve count adjustments
+ * to be made by free_huge_page.
+ */
+static void restore_reserve_on_error(struct hstate *h,
+                       struct vm_area_struct *vma, unsigned long address,
+                       struct page *page)
+{
+       if (unlikely(PagePrivate(page))) {
+               long rc = vma_needs_reservation(h, vma, address);
+
+               if (unlikely(rc < 0)) {
+                       /*
+                        * Rare out of memory condition in reserve map
+                        * manipulation.  Clear PagePrivate so that
+                        * global reserve count will not be incremented
+                        * by free_huge_page.  This will make it appear
+                        * as though the reservation for this page was
+                        * consumed.  This may prevent the task from
+                        * faulting in the page at a later time.  This
+                        * is better than inconsistent global huge page
+                        * accounting of reserve counts.
+                        */
+                       ClearPagePrivate(page);
+               } else if (rc) {
+                       rc = vma_add_reservation(h, vma, address);
+                       if (unlikely(rc < 0))
+                               /*
+                                * See above comment about rare out of
+                                * memory condition.
+                                */
+                               ClearPagePrivate(page);
+               } else
+                       vma_end_reservation(h, vma, address);
+       }
+}
+
 struct page *alloc_huge_page(struct vm_area_struct *vma,
                                    unsigned long addr, int avoid_reserve)
 {
@@ -3498,6 +3562,7 @@ retry_avoidcopy:
        spin_unlock(ptl);
        mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
 out_release_all:
+       restore_reserve_on_error(h, vma, address, new_page);
        put_page(new_page);
 out_release_old:
        put_page(old_page);
@@ -3680,6 +3745,7 @@ backout:
        spin_unlock(ptl);
 backout_unlocked:
        unlock_page(page);
+       restore_reserve_on_error(h, vma, address, page);
        put_page(page);
        goto out;
 }
index 88af13c00d3cbfedb1d6d42ef5ddcf6ca9a50cab..70c009741aab705c6bb344df1f1851d9582bc7a7 100644 (file)
@@ -34,6 +34,7 @@
 #include <linux/string.h>
 #include <linux/types.h>
 #include <linux/vmalloc.h>
+#include <linux/bug.h>
 
 #include "kasan.h"
 #include "../slab.h"
@@ -62,7 +63,7 @@ void kasan_unpoison_shadow(const void *address, size_t size)
        }
 }
 
-static void __kasan_unpoison_stack(struct task_struct *task, void *sp)
+static void __kasan_unpoison_stack(struct task_struct *task, const void *sp)
 {
        void *base = task_stack_page(task);
        size_t size = sp - base;
@@ -77,9 +78,24 @@ void kasan_unpoison_task_stack(struct task_struct *task)
 }
 
 /* Unpoison the stack for the current task beyond a watermark sp value. */
-asmlinkage void kasan_unpoison_remaining_stack(void *sp)
+asmlinkage void kasan_unpoison_task_stack_below(const void *watermark)
 {
-       __kasan_unpoison_stack(current, sp);
+       __kasan_unpoison_stack(current, watermark);
+}
+
+/*
+ * Clear all poison for the region between the current SP and a provided
+ * watermark value, as is sometimes required prior to hand-crafted asm function
+ * returns in the middle of functions.
+ */
+void kasan_unpoison_stack_above_sp_to(const void *watermark)
+{
+       const void *sp = __builtin_frame_address(0);
+       size_t size = watermark - sp;
+
+       if (WARN_ON(sp > watermark))
+               return;
+       kasan_unpoison_shadow(sp, size);
 }
 
 /*
index a5e453cf05c499cf5c7eeb9b66ce14936d4494fd..d1380ed93fdf084d5043a2fb1f8bb3e476cb7bf9 100644 (file)
@@ -1414,6 +1414,7 @@ static void kmemleak_scan(void)
        /* data/bss scanning */
        scan_large_block(_sdata, _edata);
        scan_large_block(__bss_start, __bss_stop);
+       scan_large_block(__start_data_ro_after_init, __end_data_ro_after_init);
 
 #ifdef CONFIG_SMP
        /* per-cpu sections scanning */
@@ -1453,8 +1454,11 @@ static void kmemleak_scan(void)
 
                read_lock(&tasklist_lock);
                do_each_thread(g, p) {
-                       scan_block(task_stack_page(p), task_stack_page(p) +
-                                  THREAD_SIZE, NULL);
+                       void *stack = try_get_task_stack(p);
+                       if (stack) {
+                               scan_block(stack, stack + THREAD_SIZE, NULL);
+                               put_task_stack(p);
+                       }
                } while_each_thread(g, p);
                read_unlock(&tasklist_lock);
        }
index 1d05cb9d363d0bfadd6a9c58efccd0551d0bd7f2..234676e31edd3b0609014a3adcf4fb4e200a0c0e 100644 (file)
@@ -554,6 +554,8 @@ int __list_lru_init(struct list_lru *lru, bool memcg_aware,
        err = memcg_init_list_lru(lru, memcg_aware);
        if (err) {
                kfree(lru->node);
+               /* Do this so a list_lru_destroy() doesn't crash: */
+               lru->node = NULL;
                goto out;
        }
 
index ae052b5e3315217874569d0c1cf57ade9cea99a1..0f870ba43942e74d1d535a13437e446da5863b1a 100644 (file)
@@ -1917,6 +1917,15 @@ retry:
                     current->flags & PF_EXITING))
                goto force;
 
+       /*
+        * Prevent unbounded recursion when reclaim operations need to
+        * allocate memory. This might exceed the limits temporarily,
+        * but we prefer facilitating memory reclaim and getting back
+        * under the limit over triggering OOM kills in these cases.
+        */
+       if (unlikely(current->flags & PF_MEMALLOC))
+               goto force;
+
        if (unlikely(task_in_memcg_oom(current)))
                goto nomem;
 
index de88f33519c0d6398de8fcc06bfa82f2a477dd95..19e796d36a629147dd36217ecab34934300dc660 100644 (file)
@@ -1112,10 +1112,10 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
        }
 
        if (!PageHuge(p) && PageTransHuge(hpage)) {
-               lock_page(hpage);
-               if (!PageAnon(hpage) || unlikely(split_huge_page(hpage))) {
-                       unlock_page(hpage);
-                       if (!PageAnon(hpage))
+               lock_page(p);
+               if (!PageAnon(p) || unlikely(split_huge_page(p))) {
+                       unlock_page(p);
+                       if (!PageAnon(p))
                                pr_err("Memory failure: %#lx: non anonymous thp\n",
                                        pfn);
                        else
@@ -1126,9 +1126,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
                        put_hwpoison_page(p);
                        return -EBUSY;
                }
-               unlock_page(hpage);
-               get_hwpoison_page(p);
-               put_hwpoison_page(hpage);
+               unlock_page(p);
                VM_BUG_ON_PAGE(!page_count(p), p);
                hpage = compound_head(p);
        }
index fc1987dfd8cc7f62fa911fd905f743cb3ec53ec3..e18c57bdc75c4c96e3ef79546afe11fab5a3c07a 100644 (file)
@@ -3869,10 +3869,11 @@ EXPORT_SYMBOL_GPL(generic_access_phys);
  * given task for page fault accounting.
  */
 static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
-               unsigned long addr, void *buf, int len, int write)
+               unsigned long addr, void *buf, int len, unsigned int gup_flags)
 {
        struct vm_area_struct *vma;
        void *old_buf = buf;
+       int write = gup_flags & FOLL_WRITE;
 
        down_read(&mm->mmap_sem);
        /* ignore errors, just check how much was successfully transferred */
@@ -3882,7 +3883,7 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
                struct page *page = NULL;
 
                ret = get_user_pages_remote(tsk, mm, addr, 1,
-                               write, 1, &page, &vma);
+                               gup_flags, &page, &vma);
                if (ret <= 0) {
 #ifndef CONFIG_HAVE_IOREMAP_PROT
                        break;
@@ -3934,14 +3935,14 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
  * @addr:      start address to access
  * @buf:       source or destination buffer
  * @len:       number of bytes to transfer
- * @write:     whether the access is a write
+ * @gup_flags: flags modifying lookup behaviour
  *
  * The caller must hold a reference on @mm.
  */
 int access_remote_vm(struct mm_struct *mm, unsigned long addr,
-               void *buf, int len, int write)
+               void *buf, int len, unsigned int gup_flags)
 {
-       return __access_remote_vm(NULL, mm, addr, buf, len, write);
+       return __access_remote_vm(NULL, mm, addr, buf, len, gup_flags);
 }
 
 /*
@@ -3950,7 +3951,7 @@ int access_remote_vm(struct mm_struct *mm, unsigned long addr,
  * Do not walk the page table directly, use get_user_pages
  */
 int access_process_vm(struct task_struct *tsk, unsigned long addr,
-               void *buf, int len, int write)
+               void *buf, int len, unsigned int gup_flags)
 {
        struct mm_struct *mm;
        int ret;
@@ -3959,7 +3960,8 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr,
        if (!mm)
                return 0;
 
-       ret = __access_remote_vm(tsk, mm, addr, buf, len, write);
+       ret = __access_remote_vm(tsk, mm, addr, buf, len, gup_flags);
+
        mmput(mm);
 
        return ret;
index 962927309b6e963fa05a0acf09608c2ec6cef761..cad4b9125695cfbfcc7509e942f6d8f8a37f460c 100644 (file)
@@ -268,7 +268,6 @@ void __init register_page_bootmem_info_node(struct pglist_data *pgdat)
        unsigned long i, pfn, end_pfn, nr_pages;
        int node = pgdat->node_id;
        struct page *page;
-       struct zone *zone;
 
        nr_pages = PAGE_ALIGN(sizeof(struct pglist_data)) >> PAGE_SHIFT;
        page = virt_to_page(pgdat);
@@ -276,19 +275,6 @@ void __init register_page_bootmem_info_node(struct pglist_data *pgdat)
        for (i = 0; i < nr_pages; i++, page++)
                get_page_bootmem(node, page, NODE_INFO);
 
-       zone = &pgdat->node_zones[0];
-       for (; zone < pgdat->node_zones + MAX_NR_ZONES - 1; zone++) {
-               if (zone_is_initialized(zone)) {
-                       nr_pages = zone->wait_table_hash_nr_entries
-                               * sizeof(wait_queue_head_t);
-                       nr_pages = PAGE_ALIGN(nr_pages) >> PAGE_SHIFT;
-                       page = virt_to_page(zone->wait_table);
-
-                       for (i = 0; i < nr_pages; i++, page++)
-                               get_page_bootmem(node, page, NODE_INFO);
-               }
-       }
-
        pfn = pgdat->node_start_pfn;
        end_pfn = pgdat_end_pfn(pgdat);
 
@@ -2131,7 +2117,6 @@ void try_offline_node(int nid)
        unsigned long start_pfn = pgdat->node_start_pfn;
        unsigned long end_pfn = start_pfn + pgdat->node_spanned_pages;
        unsigned long pfn;
-       int i;
 
        for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
                unsigned long section_nr = pfn_to_section_nr(pfn);
@@ -2158,20 +2143,6 @@ void try_offline_node(int nid)
         */
        node_set_offline(nid);
        unregister_one_node(nid);
-
-       /* free waittable in each zone */
-       for (i = 0; i < MAX_NR_ZONES; i++) {
-               struct zone *zone = pgdat->node_zones + i;
-
-               /*
-                * wait_table may be allocated from boot memory,
-                * here only free if it's allocated by vmalloc.
-                */
-               if (is_vmalloc_addr(zone->wait_table)) {
-                       vfree(zone->wait_table);
-                       zone->wait_table = NULL;
-               }
-       }
 }
 EXPORT_SYMBOL(try_offline_node);
 
index ad1c96ac313c0f442b1635baaa96555d355a74bb..0b859af06b87df4e17af4180b953a6337c251dff 100644 (file)
@@ -850,7 +850,7 @@ static int lookup_node(unsigned long addr)
        struct page *p;
        int err;
 
-       err = get_user_pages(addr & PAGE_MASK, 1, 0, 0, &p, NULL);
+       err = get_user_pages(addr & PAGE_MASK, 1, 0, &p, NULL);
        if (err >= 0) {
                err = page_to_nid(p);
                put_page(p);
index bcdbe62f3e6da12766f0d96a24aa4c2111614e6b..11936526b08b8c5f5c5d0454f6789008a0c6f313 100644 (file)
@@ -25,7 +25,6 @@
 #include <linux/perf_event.h>
 #include <linux/pkeys.h>
 #include <linux/ksm.h>
-#include <linux/pkeys.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/cacheflush.h>
index da22ad2a5678265ea9f2d0aa5ece9e14c519a494..6ccecc03f56ad05940484a481c5a092f1ad245ef 100644 (file)
@@ -104,11 +104,13 @@ static pte_t move_soft_dirty_pte(pte_t pte)
 static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
                unsigned long old_addr, unsigned long old_end,
                struct vm_area_struct *new_vma, pmd_t *new_pmd,
-               unsigned long new_addr, bool need_rmap_locks)
+               unsigned long new_addr, bool need_rmap_locks, bool *need_flush)
 {
        struct mm_struct *mm = vma->vm_mm;
        pte_t *old_pte, *new_pte, pte;
        spinlock_t *old_ptl, *new_ptl;
+       bool force_flush = false;
+       unsigned long len = old_end - old_addr;
 
        /*
         * When need_rmap_locks is true, we take the i_mmap_rwsem and anon_vma
@@ -146,6 +148,14 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
                                   new_pte++, new_addr += PAGE_SIZE) {
                if (pte_none(*old_pte))
                        continue;
+
+               /*
+                * We are remapping a dirty PTE, make sure to
+                * flush TLB before we drop the PTL for the
+                * old PTE or we may race with page_mkclean().
+                */
+               if (pte_present(*old_pte) && pte_dirty(*old_pte))
+                       force_flush = true;
                pte = ptep_get_and_clear(mm, old_addr, old_pte);
                pte = move_pte(pte, new_vma->vm_page_prot, old_addr, new_addr);
                pte = move_soft_dirty_pte(pte);
@@ -156,6 +166,10 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
        if (new_ptl != old_ptl)
                spin_unlock(new_ptl);
        pte_unmap(new_pte - 1);
+       if (force_flush)
+               flush_tlb_range(vma, old_end - len, old_end);
+       else
+               *need_flush = true;
        pte_unmap_unlock(old_pte - 1, old_ptl);
        if (need_rmap_locks)
                drop_rmap_locks(vma);
@@ -201,13 +215,12 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
                                if (need_rmap_locks)
                                        take_rmap_locks(vma);
                                moved = move_huge_pmd(vma, old_addr, new_addr,
-                                                   old_end, old_pmd, new_pmd);
+                                                   old_end, old_pmd, new_pmd,
+                                                   &need_flush);
                                if (need_rmap_locks)
                                        drop_rmap_locks(vma);
-                               if (moved) {
-                                       need_flush = true;
+                               if (moved)
                                        continue;
-                               }
                        }
                        split_huge_pmd(vma, old_pmd, old_addr);
                        if (pmd_trans_unstable(old_pmd))
@@ -220,11 +233,10 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
                        extent = next - new_addr;
                if (extent > LATENCY_LIMIT)
                        extent = LATENCY_LIMIT;
-               move_ptes(vma, old_pmd, old_addr, old_addr + extent,
-                         new_vma, new_pmd, new_addr, need_rmap_locks);
-               need_flush = true;
+               move_ptes(vma, old_pmd, old_addr, old_addr + extent, new_vma,
+                         new_pmd, new_addr, need_rmap_locks, &need_flush);
        }
-       if (likely(need_flush))
+       if (need_flush)
                flush_tlb_range(vma, old_end-len, old_addr);
 
        mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end);
index 95daf81a4855d99d2ae176ab16c92a1b4302b0ae..8b8faaf2a9e95cfc607ff1a5a37c83eadfda59fe 100644 (file)
@@ -109,7 +109,7 @@ unsigned int kobjsize(const void *objp)
        return PAGE_SIZE << compound_order(page);
 }
 
-long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
+static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                      unsigned long start, unsigned long nr_pages,
                      unsigned int foll_flags, struct page **pages,
                      struct vm_area_struct **vmas, int *nonblocking)
@@ -160,33 +160,25 @@ finish_or_fault:
  * - don't permit access to VMAs that don't support it, such as I/O mappings
  */
 long get_user_pages(unsigned long start, unsigned long nr_pages,
-                   int write, int force, struct page **pages,
+                   unsigned int gup_flags, struct page **pages,
                    struct vm_area_struct **vmas)
 {
-       int flags = 0;
-
-       if (write)
-               flags |= FOLL_WRITE;
-       if (force)
-               flags |= FOLL_FORCE;
-
-       return __get_user_pages(current, current->mm, start, nr_pages, flags,
-                               pages, vmas, NULL);
+       return __get_user_pages(current, current->mm, start, nr_pages,
+                               gup_flags, pages, vmas, NULL);
 }
 EXPORT_SYMBOL(get_user_pages);
 
 long get_user_pages_locked(unsigned long start, unsigned long nr_pages,
-                           int write, int force, struct page **pages,
+                           unsigned int gup_flags, struct page **pages,
                            int *locked)
 {
-       return get_user_pages(start, nr_pages, write, force, pages, NULL);
+       return get_user_pages(start, nr_pages, gup_flags, pages, NULL);
 }
 EXPORT_SYMBOL(get_user_pages_locked);
 
 long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm,
                               unsigned long start, unsigned long nr_pages,
-                              int write, int force, struct page **pages,
-                              unsigned int gup_flags)
+                              struct page **pages, unsigned int gup_flags)
 {
        long ret;
        down_read(&mm->mmap_sem);
@@ -198,10 +190,10 @@ long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm,
 EXPORT_SYMBOL(__get_user_pages_unlocked);
 
 long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
-                            int write, int force, struct page **pages)
+                            struct page **pages, unsigned int gup_flags)
 {
        return __get_user_pages_unlocked(current, current->mm, start, nr_pages,
-                                        write, force, pages, 0);
+                                        pages, gup_flags);
 }
 EXPORT_SYMBOL(get_user_pages_unlocked);
 
@@ -1817,9 +1809,10 @@ void filemap_map_pages(struct fault_env *fe,
 EXPORT_SYMBOL(filemap_map_pages);
 
 static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
-               unsigned long addr, void *buf, int len, int write)
+               unsigned long addr, void *buf, int len, unsigned int gup_flags)
 {
        struct vm_area_struct *vma;
+       int write = gup_flags & FOLL_WRITE;
 
        down_read(&mm->mmap_sem);
 
@@ -1854,21 +1847,22 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
  * @addr:      start address to access
  * @buf:       source or destination buffer
  * @len:       number of bytes to transfer
- * @write:     whether the access is a write
+ * @gup_flags: flags modifying lookup behaviour
  *
  * The caller must hold a reference on @mm.
  */
 int access_remote_vm(struct mm_struct *mm, unsigned long addr,
-               void *buf, int len, int write)
+               void *buf, int len, unsigned int gup_flags)
 {
-       return __access_remote_vm(NULL, mm, addr, buf, len, write);
+       return __access_remote_vm(NULL, mm, addr, buf, len, gup_flags);
 }
 
 /*
  * Access another process' address space.
  * - source/target buffer must be kernel space
  */
-int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write)
+int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len,
+               unsigned int gup_flags)
 {
        struct mm_struct *mm;
 
@@ -1879,7 +1873,7 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in
        if (!mm)
                return 0;
 
-       len = __access_remote_vm(tsk, mm, addr, buf, len, write);
+       len = __access_remote_vm(tsk, mm, addr, buf, len, gup_flags);
 
        mmput(mm);
        return len;
index ca423cc20b5985b9ba5528e7648881f05445a50d..6de9440e3ae2d995b28577dc4a000fc23f4182c0 100644 (file)
@@ -91,6 +91,11 @@ EXPORT_PER_CPU_SYMBOL(_numa_mem_);
 int _node_numa_mem_[MAX_NUMNODES];
 #endif
 
+#ifdef CONFIG_GCC_PLUGIN_LATENT_ENTROPY
+volatile unsigned long latent_entropy __latent_entropy;
+EXPORT_SYMBOL(latent_entropy);
+#endif
+
 /*
  * Array of node states.
  */
@@ -3653,7 +3658,7 @@ retry:
        /* Make sure we know about allocations which stall for too long */
        if (time_after(jiffies, alloc_start + stall_timeout)) {
                warn_alloc(gfp_mask,
-                       "page alloction stalls for %ums, order:%u\n",
+                       "page allocation stalls for %ums, order:%u",
                        jiffies_to_msecs(jiffies-alloc_start), order);
                stall_timeout += 10 * HZ;
        }
@@ -4219,7 +4224,7 @@ static void show_migration_types(unsigned char type)
        }
 
        *p = '\0';
-       printk("(%s) ", tmp);
+       printk(KERN_CONT "(%s) ", tmp);
 }
 
 /*
@@ -4330,7 +4335,8 @@ void show_free_areas(unsigned int filter)
                        free_pcp += per_cpu_ptr(zone->pageset, cpu)->pcp.count;
 
                show_node(zone);
-               printk("%s"
+               printk(KERN_CONT
+                       "%s"
                        " free:%lukB"
                        " min:%lukB"
                        " low:%lukB"
@@ -4377,8 +4383,8 @@ void show_free_areas(unsigned int filter)
                        K(zone_page_state(zone, NR_FREE_CMA_PAGES)));
                printk("lowmem_reserve[]:");
                for (i = 0; i < MAX_NR_ZONES; i++)
-                       printk(" %ld", zone->lowmem_reserve[i]);
-               printk("\n");
+                       printk(KERN_CONT " %ld", zone->lowmem_reserve[i]);
+               printk(KERN_CONT "\n");
        }
 
        for_each_populated_zone(zone) {
@@ -4389,7 +4395,7 @@ void show_free_areas(unsigned int filter)
                if (skip_free_areas_node(filter, zone_to_nid(zone)))
                        continue;
                show_node(zone);
-               printk("%s: ", zone->name);
+               printk(KERN_CONT "%s: ", zone->name);
 
                spin_lock_irqsave(&zone->lock, flags);
                for (order = 0; order < MAX_ORDER; order++) {
@@ -4407,11 +4413,12 @@ void show_free_areas(unsigned int filter)
                }
                spin_unlock_irqrestore(&zone->lock, flags);
                for (order = 0; order < MAX_ORDER; order++) {
-                       printk("%lu*%lukB ", nr[order], K(1UL) << order);
+                       printk(KERN_CONT "%lu*%lukB ",
+                              nr[order], K(1UL) << order);
                        if (nr[order])
                                show_migration_types(types[order]);
                }
-               printk("= %lukB\n", K(total));
+               printk(KERN_CONT "= %lukB\n", K(total));
        }
 
        hugetlb_show_meminfo();
@@ -4971,72 +4978,6 @@ void __ref build_all_zonelists(pg_data_t *pgdat, struct zone *zone)
 #endif
 }
 
-/*
- * Helper functions to size the waitqueue hash table.
- * Essentially these want to choose hash table sizes sufficiently
- * large so that collisions trying to wait on pages are rare.
- * But in fact, the number of active page waitqueues on typical
- * systems is ridiculously low, less than 200. So this is even
- * conservative, even though it seems large.
- *
- * The constant PAGES_PER_WAITQUEUE specifies the ratio of pages to
- * waitqueues, i.e. the size of the waitq table given the number of pages.
- */
-#define PAGES_PER_WAITQUEUE    256
-
-#ifndef CONFIG_MEMORY_HOTPLUG
-static inline unsigned long wait_table_hash_nr_entries(unsigned long pages)
-{
-       unsigned long size = 1;
-
-       pages /= PAGES_PER_WAITQUEUE;
-
-       while (size < pages)
-               size <<= 1;
-
-       /*
-        * Once we have dozens or even hundreds of threads sleeping
-        * on IO we've got bigger problems than wait queue collision.
-        * Limit the size of the wait table to a reasonable size.
-        */
-       size = min(size, 4096UL);
-
-       return max(size, 4UL);
-}
-#else
-/*
- * A zone's size might be changed by hot-add, so it is not possible to determine
- * a suitable size for its wait_table.  So we use the maximum size now.
- *
- * The max wait table size = 4096 x sizeof(wait_queue_head_t).   ie:
- *
- *    i386 (preemption config)    : 4096 x 16 = 64Kbyte.
- *    ia64, x86-64 (no preemption): 4096 x 20 = 80Kbyte.
- *    ia64, x86-64 (preemption)   : 4096 x 24 = 96Kbyte.
- *
- * The maximum entries are prepared when a zone's memory is (512K + 256) pages
- * or more by the traditional way. (See above).  It equals:
- *
- *    i386, x86-64, powerpc(4K page size) : =  ( 2G + 1M)byte.
- *    ia64(16K page size)                 : =  ( 8G + 4M)byte.
- *    powerpc (64K page size)             : =  (32G +16M)byte.
- */
-static inline unsigned long wait_table_hash_nr_entries(unsigned long pages)
-{
-       return 4096UL;
-}
-#endif
-
-/*
- * This is an integer logarithm so that shifts can be used later
- * to extract the more random high bits from the multiplicative
- * hash function before the remainder is taken.
- */
-static inline unsigned long wait_table_bits(unsigned long size)
-{
-       return ffz(~size);
-}
-
 /*
  * Initially all pages are reserved - free ones are freed
  * up by free_all_bootmem() once the early boot process is
@@ -5299,49 +5240,6 @@ void __init setup_per_cpu_pageset(void)
                        alloc_percpu(struct per_cpu_nodestat);
 }
 
-static noinline __ref
-int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages)
-{
-       int i;
-       size_t alloc_size;
-
-       /*
-        * The per-page waitqueue mechanism uses hashed waitqueues
-        * per zone.
-        */
-       zone->wait_table_hash_nr_entries =
-                wait_table_hash_nr_entries(zone_size_pages);
-       zone->wait_table_bits =
-               wait_table_bits(zone->wait_table_hash_nr_entries);
-       alloc_size = zone->wait_table_hash_nr_entries
-                                       * sizeof(wait_queue_head_t);
-
-       if (!slab_is_available()) {
-               zone->wait_table = (wait_queue_head_t *)
-                       memblock_virt_alloc_node_nopanic(
-                               alloc_size, zone->zone_pgdat->node_id);
-       } else {
-               /*
-                * This case means that a zone whose size was 0 gets new memory
-                * via memory hot-add.
-                * But it may be the case that a new node was hot-added.  In
-                * this case vmalloc() will not be able to use this new node's
-                * memory - this wait_table must be initialized to use this new
-                * node itself as well.
-                * To use this new node's memory, further consideration will be
-                * necessary.
-                */
-               zone->wait_table = vmalloc(alloc_size);
-       }
-       if (!zone->wait_table)
-               return -ENOMEM;
-
-       for (i = 0; i < zone->wait_table_hash_nr_entries; ++i)
-               init_waitqueue_head(zone->wait_table + i);
-
-       return 0;
-}
-
 static __meminit void zone_pcp_init(struct zone *zone)
 {
        /*
@@ -5362,10 +5260,7 @@ int __meminit init_currently_empty_zone(struct zone *zone,
                                        unsigned long size)
 {
        struct pglist_data *pgdat = zone->zone_pgdat;
-       int ret;
-       ret = zone_wait_table_init(zone, size);
-       if (ret)
-               return ret;
+
        pgdat->nr_zones = zone_idx(zone) + 1;
 
        zone->zone_start_pfn = zone_start_pfn;
@@ -5377,6 +5272,7 @@ int __meminit init_currently_empty_zone(struct zone *zone,
                        zone_start_pfn, (zone_start_pfn + size));
 
        zone_init_free_lists(zone);
+       zone->initialized = 1;
 
        return 0;
 }
index 9903830aaebbf00256fbb108891c2e5e09baecac..255714302394137e37e4d8bd5cf87d685732d9d6 100644 (file)
@@ -1961,8 +1961,9 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
        void *base = (void *)ULONG_MAX;
        void **areas = NULL;
        struct pcpu_alloc_info *ai;
-       size_t size_sum, areas_size, max_distance;
-       int group, i, rc;
+       size_t size_sum, areas_size;
+       unsigned long max_distance;
+       int group, i, highest_group, rc;
 
        ai = pcpu_build_alloc_info(reserved_size, dyn_size, atom_size,
                                   cpu_distance_fn);
@@ -1978,7 +1979,8 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
                goto out_free;
        }
 
-       /* allocate, copy and determine base address */
+       /* allocate, copy and determine base address & max_distance */
+       highest_group = 0;
        for (group = 0; group < ai->nr_groups; group++) {
                struct pcpu_group_info *gi = &ai->groups[group];
                unsigned int cpu = NR_CPUS;
@@ -1999,6 +2001,21 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
                areas[group] = ptr;
 
                base = min(ptr, base);
+               if (ptr > areas[highest_group])
+                       highest_group = group;
+       }
+       max_distance = areas[highest_group] - base;
+       max_distance += ai->unit_size * ai->groups[highest_group].nr_units;
+
+       /* warn if maximum distance is further than 75% of vmalloc space */
+       if (max_distance > VMALLOC_TOTAL * 3 / 4) {
+               pr_warn("max_distance=0x%lx too large for vmalloc space 0x%lx\n",
+                               max_distance, VMALLOC_TOTAL);
+#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
+               /* and fail if we have fallback */
+               rc = -EINVAL;
+               goto out_free_areas;
+#endif
        }
 
        /*
@@ -2023,23 +2040,8 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
        }
 
        /* base address is now known, determine group base offsets */
-       max_distance = 0;
        for (group = 0; group < ai->nr_groups; group++) {
                ai->groups[group].base_offset = areas[group] - base;
-               max_distance = max_t(size_t, max_distance,
-                                    ai->groups[group].base_offset);
-       }
-       max_distance += ai->unit_size;
-
-       /* warn if maximum distance is further than 75% of vmalloc space */
-       if (max_distance > VMALLOC_TOTAL * 3 / 4) {
-               pr_warn("max_distance=0x%zx too large for vmalloc space 0x%lx\n",
-                       max_distance, VMALLOC_TOTAL);
-#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
-               /* and fail if we have fallback */
-               rc = -EINVAL;
-               goto out_free;
-#endif
        }
 
        pr_info("Embedded %zu pages/cpu @%p s%zu r%zu d%zu u%zu\n",
index 07514d41ebcc1623b789fc93e09794058ecdc6ca..be8dc8d1edb95b34d8c6b7fbf34321e597e981e1 100644 (file)
@@ -88,12 +88,16 @@ static int process_vm_rw_single_vec(unsigned long addr,
        ssize_t rc = 0;
        unsigned long max_pages_per_loop = PVM_MAX_KMALLOC_PAGES
                / sizeof(struct pages *);
+       unsigned int flags = FOLL_REMOTE;
 
        /* Work out address and page range required */
        if (len == 0)
                return 0;
        nr_pages = (addr + len - 1) / PAGE_SIZE - addr / PAGE_SIZE + 1;
 
+       if (vm_write)
+               flags |= FOLL_WRITE;
+
        while (!rc && nr_pages && iov_iter_count(iter)) {
                int pages = min(nr_pages, max_pages_per_loop);
                size_t bytes;
@@ -104,8 +108,7 @@ static int process_vm_rw_single_vec(unsigned long addr,
                 * current/current->mm
                 */
                pages = __get_user_pages_unlocked(task, mm, pa, pages,
-                                                 vm_write, 0, process_pages,
-                                                 FOLL_REMOTE);
+                                                 process_pages, flags);
                if (pages <= 0)
                        return -EFAULT;
 
index ad7813d73ea79879008eb570e790a101a4d7c3f7..166ebf5d2bceda1bdc9a824b6b3040f28588a171 100644 (file)
@@ -1483,6 +1483,8 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
        copy_highpage(newpage, oldpage);
        flush_dcache_page(newpage);
 
+       __SetPageLocked(newpage);
+       __SetPageSwapBacked(newpage);
        SetPageUptodate(newpage);
        set_page_private(newpage, swap_index);
        SetPageSwapCache(newpage);
index 090fb26b3a39b4feba105650f2a663f5c9972064..0b0550ca85b40c9108b4085873d67babc200885d 100644 (file)
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -233,6 +233,7 @@ static void kmem_cache_node_init(struct kmem_cache_node *parent)
        spin_lock_init(&parent->list_lock);
        parent->free_objects = 0;
        parent->free_touched = 0;
+       parent->num_slabs = 0;
 }
 
 #define MAKE_LIST(cachep, listp, slab, nodeid)                         \
@@ -966,7 +967,7 @@ static int setup_kmem_cache_node(struct kmem_cache *cachep,
         * guaranteed to be valid until irq is re-enabled, because it will be
         * freed after synchronize_sched().
         */
-       if (force_change)
+       if (old_shared && force_change)
                synchronize_sched();
 
 fail:
@@ -1382,24 +1383,27 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid)
        for_each_kmem_cache_node(cachep, node, n) {
                unsigned long active_objs = 0, num_objs = 0, free_objects = 0;
                unsigned long active_slabs = 0, num_slabs = 0;
+               unsigned long num_slabs_partial = 0, num_slabs_free = 0;
+               unsigned long num_slabs_full;
 
                spin_lock_irqsave(&n->list_lock, flags);
-               list_for_each_entry(page, &n->slabs_full, lru) {
-                       active_objs += cachep->num;
-                       active_slabs++;
-               }
+               num_slabs = n->num_slabs;
                list_for_each_entry(page, &n->slabs_partial, lru) {
                        active_objs += page->active;
-                       active_slabs++;
+                       num_slabs_partial++;
                }
                list_for_each_entry(page, &n->slabs_free, lru)
-                       num_slabs++;
+                       num_slabs_free++;
 
                free_objects += n->free_objects;
                spin_unlock_irqrestore(&n->list_lock, flags);
 
-               num_slabs += active_slabs;
                num_objs = num_slabs * cachep->num;
+               active_slabs = num_slabs - num_slabs_free;
+               num_slabs_full = num_slabs -
+                       (num_slabs_partial + num_slabs_free);
+               active_objs += (num_slabs_full * cachep->num);
+
                pr_warn("  node %d: slabs: %ld/%ld, objs: %ld/%ld, free: %ld\n",
                        node, active_slabs, num_slabs, active_objs, num_objs,
                        free_objects);
@@ -2314,6 +2318,7 @@ static int drain_freelist(struct kmem_cache *cache,
 
                page = list_entry(p, struct page, lru);
                list_del(&page->lru);
+               n->num_slabs--;
                /*
                 * Safe to drop the lock. The slab is no longer linked
                 * to the cache.
@@ -2752,6 +2757,8 @@ static void cache_grow_end(struct kmem_cache *cachep, struct page *page)
                list_add_tail(&page->lru, &(n->slabs_free));
        else
                fixup_slab_list(cachep, n, page, &list);
+
+       n->num_slabs++;
        STATS_INC_GROWN(cachep);
        n->free_objects += cachep->num - page->active;
        spin_unlock(&n->list_lock);
@@ -3443,6 +3450,7 @@ static void free_block(struct kmem_cache *cachep, void **objpp,
 
                page = list_last_entry(&n->slabs_free, struct page, lru);
                list_move(&page->lru, list);
+               n->num_slabs--;
        }
 }
 
@@ -4099,6 +4107,8 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo)
        unsigned long num_objs;
        unsigned long active_slabs = 0;
        unsigned long num_slabs, free_objects = 0, shared_avail = 0;
+       unsigned long num_slabs_partial = 0, num_slabs_free = 0;
+       unsigned long num_slabs_full = 0;
        const char *name;
        char *error = NULL;
        int node;
@@ -4111,33 +4121,34 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo)
                check_irq_on();
                spin_lock_irq(&n->list_lock);
 
-               list_for_each_entry(page, &n->slabs_full, lru) {
-                       if (page->active != cachep->num && !error)
-                               error = "slabs_full accounting error";
-                       active_objs += cachep->num;
-                       active_slabs++;
-               }
+               num_slabs += n->num_slabs;
+
                list_for_each_entry(page, &n->slabs_partial, lru) {
                        if (page->active == cachep->num && !error)
                                error = "slabs_partial accounting error";
                        if (!page->active && !error)
                                error = "slabs_partial accounting error";
                        active_objs += page->active;
-                       active_slabs++;
+                       num_slabs_partial++;
                }
+
                list_for_each_entry(page, &n->slabs_free, lru) {
                        if (page->active && !error)
                                error = "slabs_free accounting error";
-                       num_slabs++;
+                       num_slabs_free++;
                }
+
                free_objects += n->free_objects;
                if (n->shared)
                        shared_avail += n->shared->avail;
 
                spin_unlock_irq(&n->list_lock);
        }
-       num_slabs += active_slabs;
        num_objs = num_slabs * cachep->num;
+       active_slabs = num_slabs - num_slabs_free;
+       num_slabs_full = num_slabs - (num_slabs_partial + num_slabs_free);
+       active_objs += (num_slabs_full * cachep->num);
+
        if (num_objs - active_objs != free_objects && !error)
                error = "free_objects accounting error";
 
index 9653f2e2591ad0982d2dc74c668323a43e5b026d..bc05fdc3edce106b12e5113ad8c8777de6de217d 100644 (file)
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -432,6 +432,7 @@ struct kmem_cache_node {
        struct list_head slabs_partial; /* partial list first, better asm code */
        struct list_head slabs_full;
        struct list_head slabs_free;
+       unsigned long num_slabs;
        unsigned long free_objects;
        unsigned int free_limit;
        unsigned int colour_next;       /* Per-node cache coloring */
index 71f0b28a1bec8bc58a479f7c53343b647bcf5f24..329b03843863940f2288ea046d6b950d126c0a49 100644 (file)
@@ -533,8 +533,8 @@ void memcg_create_kmem_cache(struct mem_cgroup *memcg,
 
        s = create_cache(cache_name, root_cache->object_size,
                         root_cache->size, root_cache->align,
-                        root_cache->flags, root_cache->ctor,
-                        memcg, root_cache);
+                        root_cache->flags & CACHE_CREATE_MASK,
+                        root_cache->ctor, memcg, root_cache);
        /*
         * If we could not create a memcg cache, do not complain, because
         * that's not critical at all as we can always proceed with the root
index 2210de290b54d160d31afc937471225077bf083a..f30438970cd176e5dde188bc6e05a28c3367e451 100644 (file)
@@ -2224,6 +2224,8 @@ static unsigned long read_swap_header(struct swap_info_struct *p,
                swab32s(&swap_header->info.version);
                swab32s(&swap_header->info.last_page);
                swab32s(&swap_header->info.nr_badpages);
+               if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES)
+                       return 0;
                for (i = 0; i < swap_header->info.nr_badpages; i++)
                        swab32s(&swap_header->info.badpages[i]);
        }
index 662cddf914af2048ab6c9c674f59d37a7aceba69..1a41553db866f543719c019e36dc2c71c4b3b984 100644 (file)
--- a/mm/util.c
+++ b/mm/util.c
@@ -230,8 +230,10 @@ void __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma,
 }
 
 /* Check if the vma is being used as a stack by this task */
-int vma_is_stack_for_task(struct vm_area_struct *vma, struct task_struct *t)
+int vma_is_stack_for_current(struct vm_area_struct *vma)
 {
+       struct task_struct * __maybe_unused t = current;
+
        return (vma->vm_start <= KSTK_ESP(t) && vma->vm_end >= KSTK_ESP(t));
 }
 
@@ -283,7 +285,8 @@ EXPORT_SYMBOL_GPL(__get_user_pages_fast);
 int __weak get_user_pages_fast(unsigned long start,
                                int nr_pages, int write, struct page **pages)
 {
-       return get_user_pages_unlocked(start, nr_pages, write, 0, pages);
+       return get_user_pages_unlocked(start, nr_pages, pages,
+                                      write ? FOLL_WRITE : 0);
 }
 EXPORT_SYMBOL_GPL(get_user_pages_fast);
 
@@ -623,7 +626,7 @@ int get_cmdline(struct task_struct *task, char *buffer, int buflen)
        if (len > buflen)
                len = buflen;
 
-       res = access_process_vm(task, arg_start, buffer, len, 0);
+       res = access_process_vm(task, arg_start, buffer, len, FOLL_FORCE);
 
        /*
         * If the nul at the end of args has been overwritten, then
@@ -638,7 +641,8 @@ int get_cmdline(struct task_struct *task, char *buffer, int buflen)
                        if (len > buflen - res)
                                len = buflen - res;
                        res += access_process_vm(task, env_start,
-                                                buffer+res, len, 0);
+                                                buffer+res, len,
+                                                FOLL_FORCE);
                        res = strnlen(buffer, res);
                }
        }
index 744f926af442096c36eac4b0d9259c25e7627e0e..76fda22681480b68c9432dce3d18a4e0d89516e7 100644 (file)
@@ -3043,7 +3043,9 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
                                            sc.gfp_mask,
                                            sc.reclaim_idx);
 
+       current->flags |= PF_MEMALLOC;
        nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
+       current->flags &= ~PF_MEMALLOC;
 
        trace_mm_vmscan_memcg_reclaim_end(nr_reclaimed);
 
index 9accde339601e6d736a281793ccf19d805a10df7..691f0ad7067d167dfac7e8746b43c4b83fe47754 100644 (file)
@@ -44,7 +44,7 @@
 
 /* Global VLAN variables */
 
-int vlan_net_id __read_mostly;
+unsigned int vlan_net_id __read_mostly;
 
 const char vlan_fullname[] = "802.1Q VLAN Support";
 const char vlan_version[] = DRV_VERSION;
@@ -663,7 +663,7 @@ static struct sk_buff **vlan_gro_receive(struct sk_buff **head,
 
        skb_gro_pull(skb, sizeof(*vhdr));
        skb_gro_postpull_rcsum(skb, vhdr, sizeof(*vhdr));
-       pp = ptype->callbacks.gro_receive(head, skb);
+       pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb);
 
 out_unlock:
        rcu_read_unlock();
index cc15579780669a613165336387a3f8eabc669dec..df8bd65dd370e225c8e410202d8130b9857aac5d 100644 (file)
@@ -159,7 +159,7 @@ void vlan_netlink_fini(void);
 
 extern struct rtnl_link_ops vlan_link_ops;
 
-extern int vlan_net_id;
+extern unsigned int vlan_net_id;
 
 struct proc_dir_entry;
 
index 7b6cd340b72bc52a651be7906e072fe4306f6ee7..a1005007224ca04ee673fb948776107d6ba075c4 100644 (file)
@@ -402,6 +402,14 @@ config LWTUNNEL
          weight tunnel endpoint. Tunnel encapsulation parameters are stored
          with light weight tunnel state associated with fib routes.
 
+config LWTUNNEL_BPF
+       bool "Execute BPF program as route nexthop action"
+       depends on LWTUNNEL
+       default y if LWTUNNEL=y
+       ---help---
+         Allows to run BPF programs as a nexthop action following a route
+         lookup for incoming and outgoing packets.
+
 config DST_CACHE
        bool
        default n
index f20742cbae6d3d69cd959a8baa53922f3d45c356..b73b96a2854b152eebf8e9a5c68caa892318c6e6 100644 (file)
@@ -17,7 +17,7 @@ config BATMAN_ADV
 
 config BATMAN_ADV_BATMAN_V
        bool "B.A.T.M.A.N. V protocol (experimental)"
-       depends on BATMAN_ADV && CFG80211=y || (CFG80211=m && BATMAN_ADV=m)
+       depends on BATMAN_ADV && !(CFG80211=m && BATMAN_ADV=y)
        default n
        help
          This option enables the B.A.T.M.A.N. V protocol, the successor
index e2d18d0b1f06260ad69cb0d8c012ded379f582c3..f00f666e2ccd4714bb7a5210c48e39edb40e0c17 100644 (file)
@@ -698,7 +698,7 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff,
 
        forw_packet_aggr->skb = netdev_alloc_skb_ip_align(NULL, skb_size);
        if (!forw_packet_aggr->skb) {
-               batadv_forw_packet_free(forw_packet_aggr);
+               batadv_forw_packet_free(forw_packet_aggr, true);
                return;
        }
 
@@ -717,17 +717,10 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff,
        if (direct_link)
                forw_packet_aggr->direct_link_flags |= 1;
 
-       /* add new packet to packet list */
-       spin_lock_bh(&bat_priv->forw_bat_list_lock);
-       hlist_add_head(&forw_packet_aggr->list, &bat_priv->forw_bat_list);
-       spin_unlock_bh(&bat_priv->forw_bat_list_lock);
-
-       /* start timer for this packet */
        INIT_DELAYED_WORK(&forw_packet_aggr->delayed_work,
                          batadv_iv_send_outstanding_bat_ogm_packet);
-       queue_delayed_work(batadv_event_workqueue,
-                          &forw_packet_aggr->delayed_work,
-                          send_time - jiffies);
+
+       batadv_forw_packet_ogmv1_queue(bat_priv, forw_packet_aggr, send_time);
 }
 
 /* aggregate a new packet into the existing ogm packet */
@@ -1272,7 +1265,7 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
         */
        tq_iface_penalty = BATADV_TQ_MAX_VALUE;
        if (if_outgoing && (if_incoming == if_outgoing) &&
-           batadv_is_wifi_netdev(if_outgoing->net_dev))
+           batadv_is_wifi_hardif(if_outgoing))
                tq_iface_penalty = batadv_hop_penalty(BATADV_TQ_MAX_VALUE,
                                                      bat_priv);
 
@@ -1611,7 +1604,7 @@ out:
        if (hardif_neigh)
                batadv_hardif_neigh_put(hardif_neigh);
 
-       kfree_skb(skb_priv);
+       consume_skb(skb_priv);
 }
 
 /**
@@ -1783,17 +1776,17 @@ static void batadv_iv_send_outstanding_bat_ogm_packet(struct work_struct *work)
        struct delayed_work *delayed_work;
        struct batadv_forw_packet *forw_packet;
        struct batadv_priv *bat_priv;
+       bool dropped = false;
 
        delayed_work = to_delayed_work(work);
        forw_packet = container_of(delayed_work, struct batadv_forw_packet,
                                   delayed_work);
        bat_priv = netdev_priv(forw_packet->if_incoming->soft_iface);
-       spin_lock_bh(&bat_priv->forw_bat_list_lock);
-       hlist_del(&forw_packet->list);
-       spin_unlock_bh(&bat_priv->forw_bat_list_lock);
 
-       if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_DEACTIVATING)
+       if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_DEACTIVATING) {
+               dropped = true;
                goto out;
+       }
 
        batadv_iv_ogm_emit(forw_packet);
 
@@ -1810,7 +1803,10 @@ static void batadv_iv_send_outstanding_bat_ogm_packet(struct work_struct *work)
                batadv_iv_ogm_schedule(forw_packet->if_incoming);
 
 out:
-       batadv_forw_packet_free(forw_packet);
+       /* do we get something for free()? */
+       if (batadv_forw_packet_steal(forw_packet,
+                                    &bat_priv->forw_bat_list_lock))
+               batadv_forw_packet_free(forw_packet, dropped);
 }
 
 static int batadv_iv_ogm_receive(struct sk_buff *skb,
@@ -1820,17 +1816,18 @@ static int batadv_iv_ogm_receive(struct sk_buff *skb,
        struct batadv_ogm_packet *ogm_packet;
        u8 *packet_pos;
        int ogm_offset;
-       bool ret;
+       bool res;
+       int ret = NET_RX_DROP;
 
-       ret = batadv_check_management_packet(skb, if_incoming, BATADV_OGM_HLEN);
-       if (!ret)
-               return NET_RX_DROP;
+       res = batadv_check_management_packet(skb, if_incoming, BATADV_OGM_HLEN);
+       if (!res)
+               goto free_skb;
 
        /* did we receive a B.A.T.M.A.N. IV OGM packet on an interface
         * that does not have B.A.T.M.A.N. IV enabled ?
         */
        if (bat_priv->algo_ops->iface.enable != batadv_iv_ogm_iface_enable)
-               return NET_RX_DROP;
+               goto free_skb;
 
        batadv_inc_counter(bat_priv, BATADV_CNT_MGMT_RX);
        batadv_add_counter(bat_priv, BATADV_CNT_MGMT_RX_BYTES,
@@ -1851,8 +1848,15 @@ static int batadv_iv_ogm_receive(struct sk_buff *skb,
                ogm_packet = (struct batadv_ogm_packet *)packet_pos;
        }
 
-       kfree_skb(skb);
-       return NET_RX_SUCCESS;
+       ret = NET_RX_SUCCESS;
+
+free_skb:
+       if (ret == NET_RX_SUCCESS)
+               consume_skb(skb);
+       else
+               kfree_skb(skb);
+
+       return ret;
 }
 
 #ifdef CONFIG_BATMAN_ADV_DEBUGFS
@@ -2486,7 +2490,7 @@ batadv_iv_gw_get_best_gw_node(struct batadv_priv *bat_priv)
        struct batadv_orig_node *orig_node;
 
        rcu_read_lock();
-       hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) {
+       hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.gateway_list, list) {
                orig_node = gw_node->orig_node;
                router = batadv_orig_router_get(orig_node, BATADV_IF_DEFAULT);
                if (!router)
@@ -2674,7 +2678,7 @@ static void batadv_iv_gw_print(struct batadv_priv *bat_priv,
                 "      Gateway      (#/255)           Nexthop [outgoingIF]: advertised uplink bandwidth\n");
 
        rcu_read_lock();
-       hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) {
+       hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.gateway_list, list) {
                /* fails if orig_node has no router */
                if (batadv_iv_gw_write_buffer_text(bat_priv, seq, gw_node) < 0)
                        continue;
@@ -2774,7 +2778,7 @@ static void batadv_iv_gw_dump(struct sk_buff *msg, struct netlink_callback *cb,
        int idx = 0;
 
        rcu_read_lock();
-       hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) {
+       hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.gateway_list, list) {
                if (idx++ < idx_skip)
                        continue;
 
index e79f6f01182eb6731927aea1bc0cba553fb600ff..2ac612d7bab4d0b4035c9e476dab17536349dca3 100644 (file)
@@ -750,7 +750,7 @@ batadv_v_gw_get_best_gw_node(struct batadv_priv *bat_priv)
        u32 max_bw = 0, bw;
 
        rcu_read_lock();
-       hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) {
+       hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.gateway_list, list) {
                if (!kref_get_unless_zero(&gw_node->refcount))
                        continue;
 
@@ -787,7 +787,7 @@ static bool batadv_v_gw_is_eligible(struct batadv_priv *bat_priv,
                                    struct batadv_orig_node *curr_gw_orig,
                                    struct batadv_orig_node *orig_node)
 {
-       struct batadv_gw_node *curr_gw = NULL, *orig_gw = NULL;
+       struct batadv_gw_node *curr_gw, *orig_gw = NULL;
        u32 gw_throughput, orig_throughput, threshold;
        bool ret = false;
 
@@ -889,7 +889,7 @@ static void batadv_v_gw_print(struct batadv_priv *bat_priv,
                 "      Gateway        ( throughput)           Nexthop [outgoingIF]: advertised uplink bandwidth\n");
 
        rcu_read_lock();
-       hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) {
+       hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.gateway_list, list) {
                /* fails if orig_node has no router */
                if (batadv_v_gw_write_buffer_text(bat_priv, seq, gw_node) < 0)
                        continue;
@@ -1009,7 +1009,7 @@ static void batadv_v_gw_dump(struct sk_buff *msg, struct netlink_callback *cb,
        int idx = 0;
 
        rcu_read_lock();
-       hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) {
+       hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.gateway_list, list) {
                if (idx++ < idx_skip)
                        continue;
 
index ee08540ce503a1cc3fc350a693d87510cf51ab32..f2fb2f05b6bf280d2b5fae26ed10288f73345f16 100644 (file)
@@ -75,6 +75,7 @@ static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh)
 {
        struct batadv_hard_iface *hard_iface = neigh->if_incoming;
        struct ethtool_link_ksettings link_settings;
+       struct net_device *real_netdev;
        struct station_info sinfo;
        u32 throughput;
        int ret;
@@ -89,23 +90,27 @@ static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh)
        /* if this is a wireless device, then ask its throughput through
         * cfg80211 API
         */
-       if (batadv_is_wifi_netdev(hard_iface->net_dev)) {
-               if (hard_iface->net_dev->ieee80211_ptr) {
-                       ret = cfg80211_get_station(hard_iface->net_dev,
-                                                  neigh->addr, &sinfo);
-                       if (ret == -ENOENT) {
-                               /* Node is not associated anymore! It would be
-                                * possible to delete this neighbor. For now set
-                                * the throughput metric to 0.
-                                */
-                               return 0;
-                       }
-                       if (!ret)
-                               return sinfo.expected_throughput / 100;
+       if (batadv_is_wifi_hardif(hard_iface)) {
+               if (!batadv_is_cfg80211_hardif(hard_iface))
+                       /* unsupported WiFi driver version */
+                       goto default_throughput;
+
+               real_netdev = batadv_get_real_netdev(hard_iface->net_dev);
+               if (!real_netdev)
+                       goto default_throughput;
+
+               ret = cfg80211_get_station(real_netdev, neigh->addr, &sinfo);
+
+               dev_put(real_netdev);
+               if (ret == -ENOENT) {
+                       /* Node is not associated anymore! It would be
+                        * possible to delete this neighbor. For now set
+                        * the throughput metric to 0.
+                        */
+                       return 0;
                }
-
-               /* unsupported WiFi driver version */
-               goto default_throughput;
+               if (!ret)
+                       return sinfo.expected_throughput / 100;
        }
 
        /* if not a wifi interface, check if this device provides data via
@@ -187,7 +192,7 @@ batadv_v_elp_wifi_neigh_probe(struct batadv_hardif_neigh_node *neigh)
        int elp_skb_len;
 
        /* this probing routine is for Wifi neighbours only */
-       if (!batadv_is_wifi_netdev(hard_iface->net_dev))
+       if (!batadv_is_wifi_hardif(hard_iface))
                return true;
 
        /* probe the neighbor only if no unicast packets have been sent
@@ -352,7 +357,7 @@ int batadv_v_elp_iface_enable(struct batadv_hard_iface *hard_iface)
        /* warn the user (again) if there is no throughput data is available */
        hard_iface->bat_v.flags &= ~BATADV_WARNING_DEFAULT;
 
-       if (batadv_is_wifi_netdev(hard_iface->net_dev))
+       if (batadv_is_wifi_hardif(hard_iface))
                hard_iface->bat_v.flags &= ~BATADV_FULL_DUPLEX;
 
        INIT_DELAYED_WORK(&hard_iface->bat_v.elp_wq,
@@ -492,20 +497,21 @@ int batadv_v_elp_packet_recv(struct sk_buff *skb,
        struct batadv_elp_packet *elp_packet;
        struct batadv_hard_iface *primary_if;
        struct ethhdr *ethhdr = (struct ethhdr *)skb_mac_header(skb);
-       bool ret;
+       bool res;
+       int ret = NET_RX_DROP;
 
-       ret = batadv_check_management_packet(skb, if_incoming, BATADV_ELP_HLEN);
-       if (!ret)
-               return NET_RX_DROP;
+       res = batadv_check_management_packet(skb, if_incoming, BATADV_ELP_HLEN);
+       if (!res)
+               goto free_skb;
 
        if (batadv_is_my_mac(bat_priv, ethhdr->h_source))
-               return NET_RX_DROP;
+               goto free_skb;
 
        /* did we receive a B.A.T.M.A.N. V ELP packet on an interface
         * that does not have B.A.T.M.A.N. V ELP enabled ?
         */
        if (strcmp(bat_priv->algo_ops->name, "BATMAN_V") != 0)
-               return NET_RX_DROP;
+               goto free_skb;
 
        elp_packet = (struct batadv_elp_packet *)skb->data;
 
@@ -516,14 +522,19 @@ int batadv_v_elp_packet_recv(struct sk_buff *skb,
 
        primary_if = batadv_primary_if_get_selected(bat_priv);
        if (!primary_if)
-               goto out;
+               goto free_skb;
 
        batadv_v_elp_neigh_update(bat_priv, ethhdr->h_source, if_incoming,
                                  elp_packet);
 
-out:
-       if (primary_if)
-               batadv_hardif_put(primary_if);
-       consume_skb(skb);
-       return NET_RX_SUCCESS;
+       ret = NET_RX_SUCCESS;
+       batadv_hardif_put(primary_if);
+
+free_skb:
+       if (ret == NET_RX_SUCCESS)
+               consume_skb(skb);
+       else
+               kfree_skb(skb);
+
+       return ret;
 }
index 1aeeadca620cd090ce763ef017cb01f6a2ec2890..38b9aab83fc0eaf63e3713d278482524253d5c1a 100644 (file)
@@ -140,6 +140,7 @@ static void batadv_v_ogm_send(struct work_struct *work)
        unsigned char *ogm_buff, *pkt_buff;
        int ogm_buff_len;
        u16 tvlv_len = 0;
+       int ret;
 
        bat_v = container_of(work, struct batadv_priv_bat_v, ogm_wq.work);
        bat_priv = container_of(bat_v, struct batadv_priv, bat_v);
@@ -182,6 +183,31 @@ static void batadv_v_ogm_send(struct work_struct *work)
                if (!kref_get_unless_zero(&hard_iface->refcount))
                        continue;
 
+               ret = batadv_hardif_no_broadcast(hard_iface, NULL, NULL);
+               if (ret) {
+                       char *type;
+
+                       switch (ret) {
+                       case BATADV_HARDIF_BCAST_NORECIPIENT:
+                               type = "no neighbor";
+                               break;
+                       case BATADV_HARDIF_BCAST_DUPFWD:
+                               type = "single neighbor is source";
+                               break;
+                       case BATADV_HARDIF_BCAST_DUPORIG:
+                               type = "single neighbor is originator";
+                               break;
+                       default:
+                               type = "unknown";
+                       }
+
+                       batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "OGM2 from ourselve on %s surpressed: %s\n",
+                                  hard_iface->net_dev->name, type);
+
+                       batadv_hardif_put(hard_iface);
+                       continue;
+               }
+
                batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
                           "Sending own OGM2 packet (originator %pM, seqno %u, throughput %u, TTL %d) on interface %s [%pM]\n",
                           ogm_packet->orig, ntohl(ogm_packet->seqno),
@@ -401,7 +427,7 @@ static int batadv_v_ogm_metric_update(struct batadv_priv *bat_priv,
                                      struct batadv_hard_iface *if_incoming,
                                      struct batadv_hard_iface *if_outgoing)
 {
-       struct batadv_orig_ifinfo *orig_ifinfo = NULL;
+       struct batadv_orig_ifinfo *orig_ifinfo;
        struct batadv_neigh_ifinfo *neigh_ifinfo = NULL;
        bool protection_started = false;
        int ret = -EINVAL;
@@ -486,7 +512,7 @@ static bool batadv_v_ogm_route_update(struct batadv_priv *bat_priv,
                                      struct batadv_hard_iface *if_outgoing)
 {
        struct batadv_neigh_node *router = NULL;
-       struct batadv_orig_node *orig_neigh_node = NULL;
+       struct batadv_orig_node *orig_neigh_node;
        struct batadv_neigh_node *orig_neigh_router = NULL;
        struct batadv_neigh_ifinfo *router_ifinfo = NULL, *neigh_ifinfo = NULL;
        u32 router_throughput, neigh_throughput;
@@ -651,6 +677,7 @@ static void batadv_v_ogm_process(const struct sk_buff *skb, int ogm_offset,
        struct batadv_hard_iface *hard_iface;
        struct batadv_ogm2_packet *ogm_packet;
        u32 ogm_throughput, link_throughput, path_throughput;
+       int ret;
 
        ethhdr = eth_hdr(skb);
        ogm_packet = (struct batadv_ogm2_packet *)(skb->data + ogm_offset);
@@ -716,6 +743,35 @@ static void batadv_v_ogm_process(const struct sk_buff *skb, int ogm_offset,
                if (!kref_get_unless_zero(&hard_iface->refcount))
                        continue;
 
+               ret = batadv_hardif_no_broadcast(hard_iface,
+                                                ogm_packet->orig,
+                                                hardif_neigh->orig);
+
+               if (ret) {
+                       char *type;
+
+                       switch (ret) {
+                       case BATADV_HARDIF_BCAST_NORECIPIENT:
+                               type = "no neighbor";
+                               break;
+                       case BATADV_HARDIF_BCAST_DUPFWD:
+                               type = "single neighbor is source";
+                               break;
+                       case BATADV_HARDIF_BCAST_DUPORIG:
+                               type = "single neighbor is originator";
+                               break;
+                       default:
+                               type = "unknown";
+                       }
+
+                       batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "OGM2 packet from %pM on %s surpressed: %s\n",
+                                  ogm_packet->orig, hard_iface->net_dev->name,
+                                  type);
+
+                       batadv_hardif_put(hard_iface);
+                       continue;
+               }
+
                batadv_v_ogm_process_per_outif(bat_priv, ethhdr, ogm_packet,
                                               orig_node, neigh_node,
                                               if_incoming, hard_iface);
@@ -754,18 +810,18 @@ int batadv_v_ogm_packet_recv(struct sk_buff *skb,
         * B.A.T.M.A.N. V enabled ?
         */
        if (strcmp(bat_priv->algo_ops->name, "BATMAN_V") != 0)
-               return NET_RX_DROP;
+               goto free_skb;
 
        if (!batadv_check_management_packet(skb, if_incoming, BATADV_OGM2_HLEN))
-               return NET_RX_DROP;
+               goto free_skb;
 
        if (batadv_is_my_mac(bat_priv, ethhdr->h_source))
-               return NET_RX_DROP;
+               goto free_skb;
 
        ogm_packet = (struct batadv_ogm2_packet *)skb->data;
 
        if (batadv_is_my_mac(bat_priv, ogm_packet->orig))
-               return NET_RX_DROP;
+               goto free_skb;
 
        batadv_inc_counter(bat_priv, BATADV_CNT_MGMT_RX);
        batadv_add_counter(bat_priv, BATADV_CNT_MGMT_RX_BYTES,
@@ -786,7 +842,12 @@ int batadv_v_ogm_packet_recv(struct sk_buff *skb,
        }
 
        ret = NET_RX_SUCCESS;
-       consume_skb(skb);
+
+free_skb:
+       if (ret == NET_RX_SUCCESS)
+               consume_skb(skb);
+       else
+               kfree_skb(skb);
 
        return ret;
 }
index b4ffba7dd58331bcba72e045cbe5b63d99c405f1..77925504379dac7d64777393ddae326b5d6d9505 100644 (file)
@@ -186,7 +186,7 @@ struct batadv_debuginfo batadv_debuginfo_##_name = {        \
 /* the following attributes are general and therefore they will be directly
  * placed in the BATADV_DEBUGFS_SUBDIR subdirectory of debugfs
  */
-static BATADV_DEBUGINFO(routing_algos, S_IRUGO, batadv_algorithms_open);
+static BATADV_DEBUGINFO(routing_algos, 0444, batadv_algorithms_open);
 
 static struct batadv_debuginfo *batadv_general_debuginfos[] = {
        &batadv_debuginfo_routing_algos,
@@ -194,26 +194,24 @@ static struct batadv_debuginfo *batadv_general_debuginfos[] = {
 };
 
 /* The following attributes are per soft interface */
-static BATADV_DEBUGINFO(neighbors, S_IRUGO, neighbors_open);
-static BATADV_DEBUGINFO(originators, S_IRUGO, batadv_originators_open);
-static BATADV_DEBUGINFO(gateways, S_IRUGO, batadv_gateways_open);
-static BATADV_DEBUGINFO(transtable_global, S_IRUGO,
-                       batadv_transtable_global_open);
+static BATADV_DEBUGINFO(neighbors, 0444, neighbors_open);
+static BATADV_DEBUGINFO(originators, 0444, batadv_originators_open);
+static BATADV_DEBUGINFO(gateways, 0444, batadv_gateways_open);
+static BATADV_DEBUGINFO(transtable_global, 0444, batadv_transtable_global_open);
 #ifdef CONFIG_BATMAN_ADV_BLA
-static BATADV_DEBUGINFO(bla_claim_table, S_IRUGO, batadv_bla_claim_table_open);
-static BATADV_DEBUGINFO(bla_backbone_table, S_IRUGO,
+static BATADV_DEBUGINFO(bla_claim_table, 0444, batadv_bla_claim_table_open);
+static BATADV_DEBUGINFO(bla_backbone_table, 0444,
                        batadv_bla_backbone_table_open);
 #endif
 #ifdef CONFIG_BATMAN_ADV_DAT
-static BATADV_DEBUGINFO(dat_cache, S_IRUGO, batadv_dat_cache_open);
+static BATADV_DEBUGINFO(dat_cache, 0444, batadv_dat_cache_open);
 #endif
-static BATADV_DEBUGINFO(transtable_local, S_IRUGO,
-                       batadv_transtable_local_open);
+static BATADV_DEBUGINFO(transtable_local, 0444, batadv_transtable_local_open);
 #ifdef CONFIG_BATMAN_ADV_NC
-static BATADV_DEBUGINFO(nc_nodes, S_IRUGO, batadv_nc_nodes_open);
+static BATADV_DEBUGINFO(nc_nodes, 0444, batadv_nc_nodes_open);
 #endif
 #ifdef CONFIG_BATMAN_ADV_MCAST
-static BATADV_DEBUGINFO(mcast_flags, S_IRUGO, batadv_mcast_flags_open);
+static BATADV_DEBUGINFO(mcast_flags, 0444, batadv_mcast_flags_open);
 #endif
 
 static struct batadv_debuginfo *batadv_mesh_debuginfos[] = {
@@ -253,7 +251,7 @@ struct batadv_debuginfo batadv_hardif_debuginfo_##_name = { \
        },                                                      \
 }
 
-static BATADV_HARDIF_DEBUGINFO(originators, S_IRUGO,
+static BATADV_HARDIF_DEBUGINFO(originators, 0444,
                               batadv_originators_hardif_open);
 
 static struct batadv_debuginfo *batadv_hardif_debuginfos[] = {
index e257efdc5d0374cdb13ad8c3282269049eeb4d16..49576c5a3fe306a42c28c3901d2b2c6cce7d0b8e 100644 (file)
@@ -369,12 +369,11 @@ out:
  * batadv_dbg_arp - print a debug message containing all the ARP packet details
  * @bat_priv: the bat priv with all the soft interface information
  * @skb: ARP packet
- * @type: ARP type
  * @hdr_size: size of the possible header before the ARP packet
  * @msg: message to print together with the debugging information
  */
 static void batadv_dbg_arp(struct batadv_priv *bat_priv, struct sk_buff *skb,
-                          u16 type, int hdr_size, char *msg)
+                          int hdr_size, char *msg)
 {
        struct batadv_unicast_4addr_packet *unicast_4addr_packet;
        struct batadv_bcast_packet *bcast_pkt;
@@ -441,7 +440,7 @@ static void batadv_dbg_arp(struct batadv_priv *bat_priv, struct sk_buff *skb,
 #else
 
 static void batadv_dbg_arp(struct batadv_priv *bat_priv, struct sk_buff *skb,
-                          u16 type, int hdr_size, char *msg)
+                          int hdr_size, char *msg)
 {
 }
 
@@ -949,6 +948,41 @@ static unsigned short batadv_dat_get_vid(struct sk_buff *skb, int *hdr_size)
        return vid;
 }
 
+/**
+ * batadv_dat_arp_create_reply - create an ARP Reply
+ * @bat_priv: the bat priv with all the soft interface information
+ * @ip_src: ARP sender IP
+ * @ip_dst: ARP target IP
+ * @hw_src: Ethernet source and ARP sender MAC
+ * @hw_dst: Ethernet destination and ARP target MAC
+ * @vid: VLAN identifier (optional, set to zero otherwise)
+ *
+ * Creates an ARP Reply from the given values, optionally encapsulated in a
+ * VLAN header.
+ *
+ * Return: An skb containing an ARP Reply.
+ */
+static struct sk_buff *
+batadv_dat_arp_create_reply(struct batadv_priv *bat_priv, __be32 ip_src,
+                           __be32 ip_dst, u8 *hw_src, u8 *hw_dst,
+                           unsigned short vid)
+{
+       struct sk_buff *skb;
+
+       skb = arp_create(ARPOP_REPLY, ETH_P_ARP, ip_dst, bat_priv->soft_iface,
+                        ip_src, hw_dst, hw_src, hw_dst);
+       if (!skb)
+               return NULL;
+
+       skb_reset_mac_header(skb);
+
+       if (vid & BATADV_VLAN_HAS_TAG)
+               skb = vlan_insert_tag(skb, htons(ETH_P_8021Q),
+                                     vid & VLAN_VID_MASK);
+
+       return skb;
+}
+
 /**
  * batadv_dat_snoop_outgoing_arp_request - snoop the ARP request and try to
  * answer using DAT
@@ -983,8 +1017,7 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv,
        if (type != ARPOP_REQUEST)
                goto out;
 
-       batadv_dbg_arp(bat_priv, skb, type, hdr_size,
-                      "Parsing outgoing ARP REQUEST");
+       batadv_dbg_arp(bat_priv, skb, hdr_size, "Parsing outgoing ARP REQUEST");
 
        ip_src = batadv_arp_ip_src(skb, hdr_size);
        hw_src = batadv_arp_hw_src(skb, hdr_size);
@@ -1007,20 +1040,12 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv,
                        goto out;
                }
 
-               skb_new = arp_create(ARPOP_REPLY, ETH_P_ARP, ip_src,
-                                    bat_priv->soft_iface, ip_dst, hw_src,
-                                    dat_entry->mac_addr, hw_src);
+               skb_new = batadv_dat_arp_create_reply(bat_priv, ip_dst, ip_src,
+                                                     dat_entry->mac_addr,
+                                                     hw_src, vid);
                if (!skb_new)
                        goto out;
 
-               if (vid & BATADV_VLAN_HAS_TAG) {
-                       skb_new = vlan_insert_tag(skb_new, htons(ETH_P_8021Q),
-                                                 vid & VLAN_VID_MASK);
-                       if (!skb_new)
-                               goto out;
-               }
-
-               skb_reset_mac_header(skb_new);
                skb_new->protocol = eth_type_trans(skb_new,
                                                   bat_priv->soft_iface);
                bat_priv->stats.rx_packets++;
@@ -1075,8 +1100,7 @@ bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv,
        ip_src = batadv_arp_ip_src(skb, hdr_size);
        ip_dst = batadv_arp_ip_dst(skb, hdr_size);
 
-       batadv_dbg_arp(bat_priv, skb, type, hdr_size,
-                      "Parsing incoming ARP REQUEST");
+       batadv_dbg_arp(bat_priv, skb, hdr_size, "Parsing incoming ARP REQUEST");
 
        batadv_dat_entry_add(bat_priv, ip_src, hw_src, vid);
 
@@ -1084,25 +1108,11 @@ bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv,
        if (!dat_entry)
                goto out;
 
-       skb_new = arp_create(ARPOP_REPLY, ETH_P_ARP, ip_src,
-                            bat_priv->soft_iface, ip_dst, hw_src,
-                            dat_entry->mac_addr, hw_src);
-
+       skb_new = batadv_dat_arp_create_reply(bat_priv, ip_dst, ip_src,
+                                             dat_entry->mac_addr, hw_src, vid);
        if (!skb_new)
                goto out;
 
-       /* the rest of the TX path assumes that the mac_header offset pointing
-        * to the inner Ethernet header has been set, therefore reset it now.
-        */
-       skb_reset_mac_header(skb_new);
-
-       if (vid & BATADV_VLAN_HAS_TAG) {
-               skb_new = vlan_insert_tag(skb_new, htons(ETH_P_8021Q),
-                                         vid & VLAN_VID_MASK);
-               if (!skb_new)
-                       goto out;
-       }
-
        /* To preserve backwards compatibility, the node has choose the outgoing
         * format based on the incoming request packet type. The assumption is
         * that a node not using the 4addr packet format doesn't support it.
@@ -1149,8 +1159,7 @@ void batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv,
        if (type != ARPOP_REPLY)
                return;
 
-       batadv_dbg_arp(bat_priv, skb, type, hdr_size,
-                      "Parsing outgoing ARP REPLY");
+       batadv_dbg_arp(bat_priv, skb, hdr_size, "Parsing outgoing ARP REPLY");
 
        hw_src = batadv_arp_hw_src(skb, hdr_size);
        ip_src = batadv_arp_ip_src(skb, hdr_size);
@@ -1195,8 +1204,7 @@ bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv,
        if (type != ARPOP_REPLY)
                goto out;
 
-       batadv_dbg_arp(bat_priv, skb, type, hdr_size,
-                      "Parsing incoming ARP REPLY");
+       batadv_dbg_arp(bat_priv, skb, hdr_size, "Parsing incoming ARP REPLY");
 
        hw_src = batadv_arp_hw_src(skb, hdr_size);
        ip_src = batadv_arp_ip_src(skb, hdr_size);
index 0934730fb7ffb539cd89dc29e3b7a8d9630c51b6..9c561e683f4b8b68642b626b51a0dcda30260e97 100644 (file)
@@ -20,6 +20,7 @@
 
 #include <linux/atomic.h>
 #include <linux/byteorder/generic.h>
+#include <linux/errno.h>
 #include <linux/etherdevice.h>
 #include <linux/fs.h>
 #include <linux/if_ether.h>
 /**
  * batadv_frag_clear_chain - delete entries in the fragment buffer chain
  * @head: head of chain with entries.
+ * @dropped: whether the chain is cleared because all fragments are dropped
  *
  * Free fragments in the passed hlist. Should be called with appropriate lock.
  */
-static void batadv_frag_clear_chain(struct hlist_head *head)
+static void batadv_frag_clear_chain(struct hlist_head *head, bool dropped)
 {
        struct batadv_frag_list_entry *entry;
        struct hlist_node *node;
 
        hlist_for_each_entry_safe(entry, node, head, list) {
                hlist_del(&entry->list);
-               kfree_skb(entry->skb);
+
+               if (dropped)
+                       kfree_skb(entry->skb);
+               else
+                       consume_skb(entry->skb);
+
                kfree(entry);
        }
 }
@@ -73,7 +80,7 @@ void batadv_frag_purge_orig(struct batadv_orig_node *orig_node,
                spin_lock_bh(&chain->lock);
 
                if (!check_cb || check_cb(chain)) {
-                       batadv_frag_clear_chain(&chain->head);
+                       batadv_frag_clear_chain(&chain->fragment_list, true);
                        chain->size = 0;
                }
 
@@ -117,8 +124,8 @@ static bool batadv_frag_init_chain(struct batadv_frag_table_entry *chain,
        if (chain->seqno == seqno)
                return false;
 
-       if (!hlist_empty(&chain->head))
-               batadv_frag_clear_chain(&chain->head);
+       if (!hlist_empty(&chain->fragment_list))
+               batadv_frag_clear_chain(&chain->fragment_list, true);
 
        chain->size = 0;
        chain->seqno = seqno;
@@ -176,7 +183,7 @@ static bool batadv_frag_insert_packet(struct batadv_orig_node *orig_node,
        chain = &orig_node->fragments[bucket];
        spin_lock_bh(&chain->lock);
        if (batadv_frag_init_chain(chain, seqno)) {
-               hlist_add_head(&frag_entry_new->list, &chain->head);
+               hlist_add_head(&frag_entry_new->list, &chain->fragment_list);
                chain->size = skb->len - hdr_size;
                chain->timestamp = jiffies;
                chain->total_size = ntohs(frag_packet->total_size);
@@ -185,7 +192,7 @@ static bool batadv_frag_insert_packet(struct batadv_orig_node *orig_node,
        }
 
        /* Find the position for the new fragment. */
-       hlist_for_each_entry(frag_entry_curr, &chain->head, list) {
+       hlist_for_each_entry(frag_entry_curr, &chain->fragment_list, list) {
                /* Drop packet if fragment already exists. */
                if (frag_entry_curr->no == frag_entry_new->no)
                        goto err_unlock;
@@ -220,11 +227,11 @@ out:
                 * exceeds the maximum size of one merged packet. Don't allow
                 * packets to have different total_size.
                 */
-               batadv_frag_clear_chain(&chain->head);
+               batadv_frag_clear_chain(&chain->fragment_list, true);
                chain->size = 0;
        } else if (ntohs(frag_packet->total_size) == chain->size) {
                /* All fragments received. Hand over chain to caller. */
-               hlist_move_list(&chain->head, chain_out);
+               hlist_move_list(&chain->fragment_list, chain_out);
                chain->size = 0;
        }
 
@@ -252,8 +259,9 @@ batadv_frag_merge_packets(struct hlist_head *chain)
 {
        struct batadv_frag_packet *packet;
        struct batadv_frag_list_entry *entry;
-       struct sk_buff *skb_out = NULL;
+       struct sk_buff *skb_out;
        int size, hdr_size = sizeof(struct batadv_frag_packet);
+       bool dropped = false;
 
        /* Remove first entry, as this is the destination for the rest of the
         * fragments.
@@ -270,6 +278,7 @@ batadv_frag_merge_packets(struct hlist_head *chain)
        if (pskb_expand_head(skb_out, 0, size - skb_out->len, GFP_ATOMIC) < 0) {
                kfree_skb(skb_out);
                skb_out = NULL;
+               dropped = true;
                goto free;
        }
 
@@ -291,7 +300,7 @@ batadv_frag_merge_packets(struct hlist_head *chain)
 
 free:
        /* Locking is not needed, because 'chain' is not part of any orig. */
-       batadv_frag_clear_chain(chain);
+       batadv_frag_clear_chain(chain, dropped);
        return skb_out;
 }
 
@@ -352,7 +361,7 @@ bool batadv_frag_skb_fwd(struct sk_buff *skb,
                         struct batadv_orig_node *orig_node_src)
 {
        struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface);
-       struct batadv_orig_node *orig_node_dst = NULL;
+       struct batadv_orig_node *orig_node_dst;
        struct batadv_neigh_node *neigh_node = NULL;
        struct batadv_frag_packet *packet;
        u16 total_size;
@@ -433,8 +442,7 @@ err:
  * @orig_node: final destination of the created fragments
  * @neigh_node: next-hop of the created fragments
  *
- * Return: the netdev tx status or -1 in case of error.
- * When -1 is returned the skb is not consumed.
+ * Return: the netdev tx status or a negative errno code on a failure
  */
 int batadv_frag_send_packet(struct sk_buff *skb,
                            struct batadv_orig_node *orig_node,
@@ -447,7 +455,7 @@ int batadv_frag_send_packet(struct sk_buff *skb,
        unsigned int mtu = neigh_node->if_incoming->net_dev->mtu;
        unsigned int header_size = sizeof(frag_header);
        unsigned int max_fragment_size, max_packet_size;
-       int ret = -1;
+       int ret;
 
        /* To avoid merge and refragmentation at next-hops we never send
         * fragments larger than BATADV_FRAG_MAX_FRAG_SIZE
@@ -457,13 +465,17 @@ int batadv_frag_send_packet(struct sk_buff *skb,
        max_packet_size = max_fragment_size * BATADV_FRAG_MAX_FRAGMENTS;
 
        /* Don't even try to fragment, if we need more than 16 fragments */
-       if (skb->len > max_packet_size)
-               goto out;
+       if (skb->len > max_packet_size) {
+               ret = -EAGAIN;
+               goto free_skb;
+       }
 
        bat_priv = orig_node->bat_priv;
        primary_if = batadv_primary_if_get_selected(bat_priv);
-       if (!primary_if)
-               goto out;
+       if (!primary_if) {
+               ret = -EINVAL;
+               goto put_primary_if;
+       }
 
        /* Create one header to be copied to all fragments */
        frag_header.packet_type = BATADV_UNICAST_FRAG;
@@ -488,34 +500,35 @@ int batadv_frag_send_packet(struct sk_buff *skb,
        /* Eat and send fragments from the tail of skb */
        while (skb->len > max_fragment_size) {
                skb_fragment = batadv_frag_create(skb, &frag_header, mtu);
-               if (!skb_fragment)
-                       goto out;
+               if (!skb_fragment) {
+                       ret = -ENOMEM;
+                       goto free_skb;
+               }
 
                batadv_inc_counter(bat_priv, BATADV_CNT_FRAG_TX);
                batadv_add_counter(bat_priv, BATADV_CNT_FRAG_TX_BYTES,
                                   skb_fragment->len + ETH_HLEN);
                ret = batadv_send_unicast_skb(skb_fragment, neigh_node);
                if (ret != NET_XMIT_SUCCESS) {
-                       /* return -1 so that the caller can free the original
-                        * skb
-                        */
-                       ret = -1;
-                       goto out;
+                       ret = NET_XMIT_DROP;
+                       goto free_skb;
                }
 
                frag_header.no++;
 
                /* The initial check in this function should cover this case */
                if (frag_header.no == BATADV_FRAG_MAX_FRAGMENTS - 1) {
-                       ret = -1;
-                       goto out;
+                       ret = -EINVAL;
+                       goto free_skb;
                }
        }
 
        /* Make room for the fragment header. */
        if (batadv_skb_head_push(skb, header_size) < 0 ||
-           pskb_expand_head(skb, header_size + ETH_HLEN, 0, GFP_ATOMIC) < 0)
-               goto out;
+           pskb_expand_head(skb, header_size + ETH_HLEN, 0, GFP_ATOMIC) < 0) {
+               ret = -ENOMEM;
+               goto free_skb;
+       }
 
        memcpy(skb->data, &frag_header, header_size);
 
@@ -524,10 +537,13 @@ int batadv_frag_send_packet(struct sk_buff *skb,
        batadv_add_counter(bat_priv, BATADV_CNT_FRAG_TX_BYTES,
                           skb->len + ETH_HLEN);
        ret = batadv_send_unicast_skb(skb, neigh_node);
+       /* skb was consumed */
+       skb = NULL;
 
-out:
-       if (primary_if)
-               batadv_hardif_put(primary_if);
+put_primary_if:
+       batadv_hardif_put(primary_if);
+free_skb:
+       kfree_skb(skb);
 
        return ret;
 }
index 3202fe329e639abbb4c955092e97c7735a5fead6..b95f619606af86b7e0b557bf67b911ceac5b0d54 100644 (file)
@@ -47,7 +47,7 @@ int batadv_frag_send_packet(struct sk_buff *skb,
 static inline bool
 batadv_frag_check_entry(struct batadv_frag_table_entry *frags_entry)
 {
-       if (!hlist_empty(&frags_entry->head) &&
+       if (!hlist_empty(&frags_entry->fragment_list) &&
            batadv_has_timed_out(frags_entry->timestamp, BATADV_FRAG_TIMEOUT))
                return true;
        return false;
index de055d64debe4b335627ad74aeba60d6b0e4f6f2..52b8bd6ec43183519a63483950c2e886e47a6f9e 100644 (file)
@@ -348,7 +348,7 @@ static void batadv_gw_node_add(struct batadv_priv *bat_priv,
 
        spin_lock_bh(&bat_priv->gw.list_lock);
        kref_get(&gw_node->refcount);
-       hlist_add_head_rcu(&gw_node->list, &bat_priv->gw.list);
+       hlist_add_head_rcu(&gw_node->list, &bat_priv->gw.gateway_list);
        spin_unlock_bh(&bat_priv->gw.list_lock);
 
        batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
@@ -376,7 +376,8 @@ struct batadv_gw_node *batadv_gw_node_get(struct batadv_priv *bat_priv,
        struct batadv_gw_node *gw_node_tmp, *gw_node = NULL;
 
        rcu_read_lock();
-       hlist_for_each_entry_rcu(gw_node_tmp, &bat_priv->gw.list, list) {
+       hlist_for_each_entry_rcu(gw_node_tmp, &bat_priv->gw.gateway_list,
+                                list) {
                if (gw_node_tmp->orig_node != orig_node)
                        continue;
 
@@ -475,7 +476,7 @@ void batadv_gw_node_free(struct batadv_priv *bat_priv)
 
        spin_lock_bh(&bat_priv->gw.list_lock);
        hlist_for_each_entry_safe(gw_node, node_tmp,
-                                 &bat_priv->gw.list, list) {
+                                 &bat_priv->gw.gateway_list, list) {
                hlist_del_init_rcu(&gw_node->list);
                batadv_gw_node_put(gw_node);
        }
@@ -704,7 +705,7 @@ bool batadv_gw_out_of_range(struct batadv_priv *bat_priv,
 {
        struct batadv_neigh_node *neigh_curr = NULL;
        struct batadv_neigh_node *neigh_old = NULL;
-       struct batadv_orig_node *orig_dst_node = NULL;
+       struct batadv_orig_node *orig_dst_node;
        struct batadv_gw_node *gw_node = NULL;
        struct batadv_gw_node *curr_gw = NULL;
        struct batadv_neigh_ifinfo *curr_ifinfo, *old_ifinfo;
index 08ce36147c4c3a76bba8d98004617f7a7f03a2e6..61a431a9772ba96418644b399c9e787cbfd0e743 100644 (file)
@@ -92,8 +92,8 @@ out:
  *
  * Return: result of rtnl_link_ops->get_link_net or @fallback_net
  */
-static const struct net *batadv_getlink_net(const struct net_device *netdev,
-                                           const struct net *fallback_net)
+static struct net *batadv_getlink_net(const struct net_device *netdev,
+                                     struct net *fallback_net)
 {
        if (!netdev->rtnl_link_ops)
                return fallback_net;
@@ -116,9 +116,9 @@ static const struct net *batadv_getlink_net(const struct net_device *netdev,
  * Return: true if the devices are each others parent, otherwise false
  */
 static bool batadv_mutual_parents(const struct net_device *dev1,
-                                 const struct net *net1,
+                                 struct net *net1,
                                  const struct net_device *dev2,
-                                 const struct net *net2)
+                                 struct net *net2)
 {
        int dev1_parent_iflink = dev_get_iflink(dev1);
        int dev2_parent_iflink = dev_get_iflink(dev2);
@@ -154,7 +154,7 @@ static bool batadv_is_on_batman_iface(const struct net_device *net_dev)
 {
        struct net *net = dev_net(net_dev);
        struct net_device *parent_dev;
-       const struct net *parent_net;
+       struct net *parent_net;
        bool ret;
 
        /* check if this is a batman-adv mesh interface */
@@ -202,13 +202,77 @@ static bool batadv_is_valid_iface(const struct net_device *net_dev)
 }
 
 /**
- * batadv_is_wifi_netdev - check if the given net_device struct is a wifi
- *  interface
+ * batadv_get_real_netdevice - check if the given netdev struct is a virtual
+ *  interface on top of another 'real' interface
+ * @netdev: the device to check
+ *
+ * Callers must hold the rtnl semaphore. You may want batadv_get_real_netdev()
+ * instead of this.
+ *
+ * Return: the 'real' net device or the original net device and NULL in case
+ *  of an error.
+ */
+static struct net_device *batadv_get_real_netdevice(struct net_device *netdev)
+{
+       struct batadv_hard_iface *hard_iface = NULL;
+       struct net_device *real_netdev = NULL;
+       struct net *real_net;
+       struct net *net;
+       int ifindex;
+
+       ASSERT_RTNL();
+
+       if (!netdev)
+               return NULL;
+
+       if (netdev->ifindex == dev_get_iflink(netdev)) {
+               dev_hold(netdev);
+               return netdev;
+       }
+
+       hard_iface = batadv_hardif_get_by_netdev(netdev);
+       if (!hard_iface || !hard_iface->soft_iface)
+               goto out;
+
+       net = dev_net(hard_iface->soft_iface);
+       ifindex = dev_get_iflink(netdev);
+       real_net = batadv_getlink_net(netdev, net);
+       real_netdev = dev_get_by_index(real_net, ifindex);
+
+out:
+       if (hard_iface)
+               batadv_hardif_put(hard_iface);
+       return real_netdev;
+}
+
+/**
+ * batadv_get_real_netdev - check if the given net_device struct is a virtual
+ *  interface on top of another 'real' interface
  * @net_device: the device to check
  *
- * Return: true if the net device is a 802.11 wireless device, false otherwise.
+ * Return: the 'real' net device or the original net device and NULL in case
+ *  of an error.
  */
-bool batadv_is_wifi_netdev(struct net_device *net_device)
+struct net_device *batadv_get_real_netdev(struct net_device *net_device)
+{
+       struct net_device *real_netdev;
+
+       rtnl_lock();
+       real_netdev = batadv_get_real_netdevice(net_device);
+       rtnl_unlock();
+
+       return real_netdev;
+}
+
+/**
+ * batadv_is_wext_netdev - check if the given net_device struct is a
+ *  wext wifi interface
+ * @net_device: the device to check
+ *
+ * Return: true if the net device is a wext wireless device, false
+ *  otherwise.
+ */
+static bool batadv_is_wext_netdev(struct net_device *net_device)
 {
        if (!net_device)
                return false;
@@ -221,6 +285,22 @@ bool batadv_is_wifi_netdev(struct net_device *net_device)
                return true;
 #endif
 
+       return false;
+}
+
+/**
+ * batadv_is_cfg80211_netdev - check if the given net_device struct is a
+ *  cfg80211 wifi interface
+ * @net_device: the device to check
+ *
+ * Return: true if the net device is a cfg80211 wireless device, false
+ *  otherwise.
+ */
+static bool batadv_is_cfg80211_netdev(struct net_device *net_device)
+{
+       if (!net_device)
+               return false;
+
        /* cfg80211 drivers have to set ieee80211_ptr */
        if (net_device->ieee80211_ptr)
                return true;
@@ -228,6 +308,125 @@ bool batadv_is_wifi_netdev(struct net_device *net_device)
        return false;
 }
 
+/**
+ * batadv_wifi_flags_evaluate - calculate wifi flags for net_device
+ * @net_device: the device to check
+ *
+ * Return: batadv_hard_iface_wifi_flags flags of the device
+ */
+static u32 batadv_wifi_flags_evaluate(struct net_device *net_device)
+{
+       u32 wifi_flags = 0;
+       struct net_device *real_netdev;
+
+       if (batadv_is_wext_netdev(net_device))
+               wifi_flags |= BATADV_HARDIF_WIFI_WEXT_DIRECT;
+
+       if (batadv_is_cfg80211_netdev(net_device))
+               wifi_flags |= BATADV_HARDIF_WIFI_CFG80211_DIRECT;
+
+       real_netdev = batadv_get_real_netdevice(net_device);
+       if (!real_netdev)
+               return wifi_flags;
+
+       if (real_netdev == net_device)
+               goto out;
+
+       if (batadv_is_wext_netdev(real_netdev))
+               wifi_flags |= BATADV_HARDIF_WIFI_WEXT_INDIRECT;
+
+       if (batadv_is_cfg80211_netdev(real_netdev))
+               wifi_flags |= BATADV_HARDIF_WIFI_CFG80211_INDIRECT;
+
+out:
+       dev_put(real_netdev);
+       return wifi_flags;
+}
+
+/**
+ * batadv_is_cfg80211_hardif - check if the given hardif is a cfg80211 wifi
+ *  interface
+ * @hard_iface: the device to check
+ *
+ * Return: true if the net device is a cfg80211 wireless device, false
+ *  otherwise.
+ */
+bool batadv_is_cfg80211_hardif(struct batadv_hard_iface *hard_iface)
+{
+       u32 allowed_flags = 0;
+
+       allowed_flags |= BATADV_HARDIF_WIFI_CFG80211_DIRECT;
+       allowed_flags |= BATADV_HARDIF_WIFI_CFG80211_INDIRECT;
+
+       return !!(hard_iface->wifi_flags & allowed_flags);
+}
+
+/**
+ * batadv_is_wifi_hardif - check if the given hardif is a wifi interface
+ * @hard_iface: the device to check
+ *
+ * Return: true if the net device is a 802.11 wireless device, false otherwise.
+ */
+bool batadv_is_wifi_hardif(struct batadv_hard_iface *hard_iface)
+{
+       if (!hard_iface)
+               return false;
+
+       return hard_iface->wifi_flags != 0;
+}
+
+/**
+ * batadv_hardif_no_broadcast - check whether (re)broadcast is necessary
+ * @if_outgoing: the outgoing interface checked and considered for (re)broadcast
+ * @orig_addr: the originator of this packet
+ * @orig_neigh: originator address of the forwarder we just got the packet from
+ *  (NULL if we originated)
+ *
+ * Checks whether a packet needs to be (re)broadcasted on the given interface.
+ *
+ * Return:
+ *     BATADV_HARDIF_BCAST_NORECIPIENT: No neighbor on interface
+ *     BATADV_HARDIF_BCAST_DUPFWD: Just one neighbor, but it is the forwarder
+ *     BATADV_HARDIF_BCAST_DUPORIG: Just one neighbor, but it is the originator
+ *     BATADV_HARDIF_BCAST_OK: Several neighbors, must broadcast
+ */
+int batadv_hardif_no_broadcast(struct batadv_hard_iface *if_outgoing,
+                              u8 *orig_addr, u8 *orig_neigh)
+{
+       struct batadv_hardif_neigh_node *hardif_neigh;
+       struct hlist_node *first;
+       int ret = BATADV_HARDIF_BCAST_OK;
+
+       rcu_read_lock();
+
+       /* 0 neighbors -> no (re)broadcast */
+       first = rcu_dereference(hlist_first_rcu(&if_outgoing->neigh_list));
+       if (!first) {
+               ret = BATADV_HARDIF_BCAST_NORECIPIENT;
+               goto out;
+       }
+
+       /* >1 neighbors -> (re)brodcast */
+       if (rcu_dereference(hlist_next_rcu(first)))
+               goto out;
+
+       hardif_neigh = hlist_entry(first, struct batadv_hardif_neigh_node,
+                                  list);
+
+       /* 1 neighbor, is the originator -> no rebroadcast */
+       if (orig_addr && batadv_compare_eth(hardif_neigh->orig, orig_addr)) {
+               ret = BATADV_HARDIF_BCAST_DUPORIG;
+       /* 1 neighbor, is the one we received from -> no rebroadcast */
+       } else if (orig_neigh &&
+                  batadv_compare_eth(hardif_neigh->orig, orig_neigh)) {
+               ret = BATADV_HARDIF_BCAST_DUPFWD;
+       }
+
+out:
+       rcu_read_unlock();
+       return ret;
+}
+
 static struct batadv_hard_iface *
 batadv_hardif_get_active(const struct net_device *soft_iface)
 {
@@ -697,7 +896,8 @@ batadv_hardif_add_interface(struct net_device *net_dev)
        kref_init(&hard_iface->refcount);
 
        hard_iface->num_bcasts = BATADV_NUM_BCASTS_DEFAULT;
-       if (batadv_is_wifi_netdev(net_dev))
+       hard_iface->wifi_flags = batadv_wifi_flags_evaluate(net_dev);
+       if (batadv_is_wifi_hardif(hard_iface))
                hard_iface->num_bcasts = BATADV_NUM_BCASTS_WIRELESS;
 
        batadv_v_hardif_init(hard_iface);
@@ -806,6 +1006,11 @@ static int batadv_hard_if_event(struct notifier_block *this,
                if (hard_iface == primary_if)
                        batadv_primary_if_update_addr(bat_priv, NULL);
                break;
+       case NETDEV_CHANGEUPPER:
+               hard_iface->wifi_flags = batadv_wifi_flags_evaluate(net_dev);
+               if (batadv_is_wifi_hardif(hard_iface))
+                       hard_iface->num_bcasts = BATADV_NUM_BCASTS_WIRELESS;
+               break;
        default:
                break;
        }
index a76724d369bfda14a772b56af2bff2b9cb836e80..d6309a42362918d4beb4d43d174e0c4e8773e75c 100644 (file)
@@ -39,6 +39,20 @@ enum batadv_hard_if_state {
        BATADV_IF_I_WANT_YOU,
 };
 
+/**
+ * enum batadv_hard_if_bcast - broadcast avoidance options
+ * @BATADV_HARDIF_BCAST_OK: Do broadcast on according hard interface
+ * @BATADV_HARDIF_BCAST_NORECIPIENT: Broadcast not needed, there is no recipient
+ * @BATADV_HARDIF_BCAST_DUPFWD: There is just the neighbor we got it from
+ * @BATADV_HARDIF_BCAST_DUPORIG: There is just the originator
+ */
+enum batadv_hard_if_bcast {
+       BATADV_HARDIF_BCAST_OK = 0,
+       BATADV_HARDIF_BCAST_NORECIPIENT,
+       BATADV_HARDIF_BCAST_DUPFWD,
+       BATADV_HARDIF_BCAST_DUPORIG,
+};
+
 /**
  * enum batadv_hard_if_cleanup - Cleanup modi for soft_iface after slave removal
  * @BATADV_IF_CLEANUP_KEEP: Don't automatically delete soft-interface
@@ -51,8 +65,9 @@ enum batadv_hard_if_cleanup {
 
 extern struct notifier_block batadv_hard_if_notifier;
 
-bool batadv_is_wifi_netdev(struct net_device *net_device);
-bool batadv_is_wifi_iface(int ifindex);
+struct net_device *batadv_get_real_netdev(struct net_device *net_device);
+bool batadv_is_cfg80211_hardif(struct batadv_hard_iface *hard_iface);
+bool batadv_is_wifi_hardif(struct batadv_hard_iface *hard_iface);
 struct batadv_hard_iface*
 batadv_hardif_get_by_netdev(const struct net_device *net_dev);
 int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
@@ -63,6 +78,8 @@ void batadv_hardif_remove_interfaces(void);
 int batadv_hardif_min_mtu(struct net_device *soft_iface);
 void batadv_update_min_mtu(struct net_device *soft_iface);
 void batadv_hardif_release(struct kref *ref);
+int batadv_hardif_no_broadcast(struct batadv_hard_iface *if_outgoing,
+                              u8 *orig_addr, u8 *orig_neigh);
 
 /**
  * batadv_hardif_put - decrement the hard interface refcounter and possibly
index cbbf87075f06fc7c68103a9a65147748b706e40b..557a7044cfbccecd3b2a5e66659a3ac151d819fa 100644 (file)
@@ -61,36 +61,6 @@ void batadv_hash_set_lock_class(struct batadv_hashtable *hash,
 /* free only the hashtable and the hash itself. */
 void batadv_hash_destroy(struct batadv_hashtable *hash);
 
-/* remove the hash structure. if hashdata_free_cb != NULL, this function will be
- * called to remove the elements inside of the hash.  if you don't remove the
- * elements, memory might be leaked.
- */
-static inline void batadv_hash_delete(struct batadv_hashtable *hash,
-                                     batadv_hashdata_free_cb free_cb,
-                                     void *arg)
-{
-       struct hlist_head *head;
-       struct hlist_node *node, *node_tmp;
-       spinlock_t *list_lock; /* spinlock to protect write access */
-       u32 i;
-
-       for (i = 0; i < hash->size; i++) {
-               head = &hash->table[i];
-               list_lock = &hash->list_locks[i];
-
-               spin_lock_bh(list_lock);
-               hlist_for_each_safe(node, node_tmp, head) {
-                       hlist_del_rcu(node);
-
-                       if (free_cb)
-                               free_cb(node, arg);
-               }
-               spin_unlock_bh(list_lock);
-       }
-
-       batadv_hash_destroy(hash);
-}
-
 /**
  *     batadv_hash_add - adds data to the hashtable
  *     @hash: storage hash table
index 378cc1119d6664ff1dcbf5993a8b2871505c6939..b310f381ae028d26ed89dea88ba071b7433cfca4 100644 (file)
@@ -38,7 +38,6 @@
 #include <linux/skbuff.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
-#include <linux/stat.h>
 #include <linux/stddef.h>
 #include <linux/string.h>
 #include <linux/uaccess.h>
@@ -322,8 +321,8 @@ int batadv_socket_setup(struct batadv_priv *bat_priv)
        if (!bat_priv->debug_dir)
                goto err;
 
-       d = debugfs_create_file(BATADV_ICMP_SOCKET, S_IFREG | S_IWUSR | S_IRUSR,
-                               bat_priv->debug_dir, bat_priv, &batadv_fops);
+       d = debugfs_create_file(BATADV_ICMP_SOCKET, 0600, bat_priv->debug_dir,
+                               bat_priv, &batadv_fops);
        if (!d)
                goto err;
 
index 56dc532f7a2c276e98690ed37055d0b4a9fe39c8..c73c31769abaf9c802c408f1e7fae17c9db134b7 100644 (file)
@@ -31,7 +31,6 @@
 #include <linux/sched.h> /* for linux/wait.h */
 #include <linux/slab.h>
 #include <linux/spinlock.h>
-#include <linux/stat.h>
 #include <linux/stddef.h>
 #include <linux/types.h>
 #include <linux/uaccess.h>
@@ -212,8 +211,7 @@ int batadv_debug_log_setup(struct batadv_priv *bat_priv)
        spin_lock_init(&bat_priv->debug_log->lock);
        init_waitqueue_head(&bat_priv->debug_log->queue_wait);
 
-       d = debugfs_create_file("log", S_IFREG | S_IRUSR,
-                               bat_priv->debug_dir, bat_priv,
+       d = debugfs_create_file("log", 0400, bat_priv->debug_dir, bat_priv,
                                &batadv_log_fops);
        if (!d)
                goto err;
index e0e1a88c3e5807dfefea4dc0f255b69087c1f15b..3284a7b0325dc631bc8de438a0f62ba8587c9f50 100644 (file)
@@ -63,7 +63,7 @@ enum batadv_dbg_level {
        BATADV_DBG_NC           = BIT(5),
        BATADV_DBG_MCAST        = BIT(6),
        BATADV_DBG_TP_METER     = BIT(7),
-       BATADV_DBG_ALL          = 127,
+       BATADV_DBG_ALL          = 255,
 };
 
 #ifdef CONFIG_BATMAN_ADV_DEBUG
@@ -71,12 +71,12 @@ int batadv_debug_log(struct batadv_priv *bat_priv, const char *fmt, ...)
 __printf(2, 3);
 
 /* possibly ratelimited debug output */
-#define _batadv_dbg(type, bat_priv, ratelimited, fmt, arg...)  \
-       do {                                                    \
-               if (atomic_read(&bat_priv->log_level) & type && \
-                   (!ratelimited || net_ratelimit()))          \
-                       batadv_debug_log(bat_priv, fmt, ## arg);\
-       }                                                       \
+#define _batadv_dbg(type, bat_priv, ratelimited, fmt, arg...)          \
+       do {                                                            \
+               if (atomic_read(&(bat_priv)->log_level) & (type) &&     \
+                   (!(ratelimited) || net_ratelimit()))                \
+                       batadv_debug_log(bat_priv, fmt, ## arg);        \
+       }                                                               \
        while (0)
 #else /* !CONFIG_BATMAN_ADV_DEBUG */
 __printf(4, 5)
index 2c017ab47557bbb57dfd996fe8cb605a833f977e..d46415edd3be98d9538def8d55674b5336eca6a1 100644 (file)
@@ -23,6 +23,7 @@
 #include <linux/crc32c.h>
 #include <linux/errno.h>
 #include <linux/fs.h>
+#include <linux/genetlink.h>
 #include <linux/if_ether.h>
 #include <linux/if_vlan.h>
 #include <linux/init.h>
@@ -44,6 +45,7 @@
 #include <linux/workqueue.h>
 #include <net/dsfield.h>
 #include <net/rtnetlink.h>
+#include <uapi/linux/batman_adv.h>
 
 #include "bat_algo.h"
 #include "bat_iv_ogm.h"
@@ -160,7 +162,7 @@ int batadv_mesh_init(struct net_device *soft_iface)
 
        INIT_HLIST_HEAD(&bat_priv->forw_bat_list);
        INIT_HLIST_HEAD(&bat_priv->forw_bcast_list);
-       INIT_HLIST_HEAD(&bat_priv->gw.list);
+       INIT_HLIST_HEAD(&bat_priv->gw.gateway_list);
 #ifdef CONFIG_BATMAN_ADV_MCAST
        INIT_HLIST_HEAD(&bat_priv->mcast.want_all_unsnoopables_list);
        INIT_HLIST_HEAD(&bat_priv->mcast.want_all_ipv4_list);
@@ -402,6 +404,8 @@ void batadv_skb_set_priority(struct sk_buff *skb, int offset)
 static int batadv_recv_unhandled_packet(struct sk_buff *skb,
                                        struct batadv_hard_iface *recv_if)
 {
+       kfree_skb(skb);
+
        return NET_RX_DROP;
 }
 
@@ -416,7 +420,6 @@ int batadv_batman_skb_recv(struct sk_buff *skb, struct net_device *dev,
        struct batadv_ogm_packet *batadv_ogm_packet;
        struct batadv_hard_iface *hard_iface;
        u8 idx;
-       int ret;
 
        hard_iface = container_of(ptype, struct batadv_hard_iface,
                                  batman_adv_ptype);
@@ -466,14 +469,8 @@ int batadv_batman_skb_recv(struct sk_buff *skb, struct net_device *dev,
        /* reset control block to avoid left overs from previous users */
        memset(skb->cb, 0, sizeof(struct batadv_skb_cb));
 
-       /* all receive handlers return whether they received or reused
-        * the supplied skb. if not, we have to free the skb.
-        */
        idx = batadv_ogm_packet->packet_type;
-       ret = (*batadv_rx_handler[idx])(skb, hard_iface);
-
-       if (ret == NET_RX_DROP)
-               kfree_skb(skb);
+       (*batadv_rx_handler[idx])(skb, hard_iface);
 
        batadv_hardif_put(hard_iface);
 
@@ -653,3 +650,4 @@ MODULE_DESCRIPTION(BATADV_DRIVER_DESC);
 MODULE_SUPPORTED_DEVICE(BATADV_DRIVER_DEVICE);
 MODULE_VERSION(BATADV_SOURCE_VERSION);
 MODULE_ALIAS_RTNL_LINK("batadv");
+MODULE_ALIAS_GENL_FAMILY(BATADV_NL_NAME);
index 09af21e27639675234b42e97124d87d8572ccbe2..a6cc8040a21dd24fb507683230fd66a9edb62458 100644 (file)
@@ -24,7 +24,7 @@
 #define BATADV_DRIVER_DEVICE "batman-adv"
 
 #ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2016.4"
+#define BATADV_SOURCE_VERSION "2016.5"
 #endif
 
 /* B.A.T.M.A.N. parameters */
@@ -48,6 +48,7 @@
 #define BATADV_TT_CLIENT_TEMP_TIMEOUT 600000 /* in milliseconds */
 #define BATADV_TT_WORK_PERIOD 5000 /* 5 seconds */
 #define BATADV_ORIG_WORK_PERIOD 1000 /* 1 second */
+#define BATADV_MCAST_WORK_PERIOD 500 /* 0.5 seconds */
 #define BATADV_DAT_ENTRY_TIMEOUT (5 * 60000) /* 5 mins in milliseconds */
 /* sliding packet range of received originator messages in sequence numbers
  * (should be a multiple of our word size)
@@ -185,7 +186,6 @@ enum batadv_uev_type {
 
 #include <linux/bitops.h> /* for packet.h */
 #include <linux/compiler.h>
-#include <linux/cpumask.h>
 #include <linux/etherdevice.h>
 #include <linux/if_ether.h> /* for packet.h */
 #include <linux/if_vlan.h>
@@ -200,8 +200,8 @@ struct packet_type;
 struct seq_file;
 struct sk_buff;
 
-#define BATADV_PRINT_VID(vid) ((vid & BATADV_VLAN_HAS_TAG) ? \
-                              (int)(vid & VLAN_VID_MASK) : -1)
+#define BATADV_PRINT_VID(vid) (((vid) & BATADV_VLAN_HAS_TAG) ? \
+                              (int)((vid) & VLAN_VID_MASK) : -1)
 
 extern struct list_head batadv_hardif_list;
 
@@ -284,26 +284,6 @@ static inline void batadv_add_counter(struct batadv_priv *bat_priv, size_t idx,
 
 #define batadv_inc_counter(b, i) batadv_add_counter(b, i, 1)
 
-/**
- * batadv_sum_counter - Sum the cpu-local counters for index 'idx'
- * @bat_priv: the bat priv with all the soft interface information
- * @idx: index of counter to sum up
- *
- * Return: sum of all cpu-local counters
- */
-static inline u64 batadv_sum_counter(struct batadv_priv *bat_priv,  size_t idx)
-{
-       u64 *counters, sum = 0;
-       int cpu;
-
-       for_each_possible_cpu(cpu) {
-               counters = per_cpu_ptr(bat_priv->bat_counters, cpu);
-               sum += counters[idx];
-       }
-
-       return sum;
-}
-
 /* Define a macro to reach the control buffer of the skb. The members of the
  * control buffer are defined in struct batadv_skb_cb in types.h.
  * The macro is inspired by the similar macro TCP_SKB_CB() in tcp.h.
index 13661f43386f09f5661ae1b7444f4c56e63a3b8a..090a69fc342eac8a0b6bf89556d2b32523817d09 100644 (file)
@@ -33,6 +33,7 @@
 #include <linux/in6.h>
 #include <linux/ip.h>
 #include <linux/ipv6.h>
+#include <linux/jiffies.h>
 #include <linux/kernel.h>
 #include <linux/kref.h>
 #include <linux/list.h>
@@ -48,6 +49,7 @@
 #include <linux/stddef.h>
 #include <linux/string.h>
 #include <linux/types.h>
+#include <linux/workqueue.h>
 #include <net/addrconf.h>
 #include <net/if_inet6.h>
 #include <net/ip.h>
 #include "translation-table.h"
 #include "tvlv.h"
 
+static void batadv_mcast_mla_update(struct work_struct *work);
+
+/**
+ * batadv_mcast_start_timer - schedule the multicast periodic worker
+ * @bat_priv: the bat priv with all the soft interface information
+ */
+static void batadv_mcast_start_timer(struct batadv_priv *bat_priv)
+{
+       queue_delayed_work(batadv_event_workqueue, &bat_priv->mcast.work,
+                          msecs_to_jiffies(BATADV_MCAST_WORK_PERIOD));
+}
+
 /**
  * batadv_mcast_get_bridge - get the bridge on top of the softif if it exists
  * @soft_iface: netdev struct of the mesh interface
@@ -231,19 +245,15 @@ out:
 
 /**
  * batadv_mcast_mla_list_free - free a list of multicast addresses
- * @bat_priv: the bat priv with all the soft interface information
  * @mcast_list: the list to free
  *
  * Removes and frees all items in the given mcast_list.
  */
-static void batadv_mcast_mla_list_free(struct batadv_priv *bat_priv,
-                                      struct hlist_head *mcast_list)
+static void batadv_mcast_mla_list_free(struct hlist_head *mcast_list)
 {
        struct batadv_hw_addr *mcast_entry;
        struct hlist_node *tmp;
 
-       lockdep_assert_held(&bat_priv->tt.commit_lock);
-
        hlist_for_each_entry_safe(mcast_entry, tmp, mcast_list, list) {
                hlist_del(&mcast_entry->list);
                kfree(mcast_entry);
@@ -259,6 +269,8 @@ static void batadv_mcast_mla_list_free(struct batadv_priv *bat_priv,
  * translation table except the ones listed in the given mcast_list.
  *
  * If mcast_list is NULL then all are retracted.
+ *
+ * Do not call outside of the mcast worker! (or cancel mcast worker first)
  */
 static void batadv_mcast_mla_tt_retract(struct batadv_priv *bat_priv,
                                        struct hlist_head *mcast_list)
@@ -266,7 +278,7 @@ static void batadv_mcast_mla_tt_retract(struct batadv_priv *bat_priv,
        struct batadv_hw_addr *mcast_entry;
        struct hlist_node *tmp;
 
-       lockdep_assert_held(&bat_priv->tt.commit_lock);
+       WARN_ON(delayed_work_pending(&bat_priv->mcast.work));
 
        hlist_for_each_entry_safe(mcast_entry, tmp, &bat_priv->mcast.mla_list,
                                  list) {
@@ -291,6 +303,8 @@ static void batadv_mcast_mla_tt_retract(struct batadv_priv *bat_priv,
  *
  * Adds multicast listener announcements from the given mcast_list to the
  * translation table if they have not been added yet.
+ *
+ * Do not call outside of the mcast worker! (or cancel mcast worker first)
  */
 static void batadv_mcast_mla_tt_add(struct batadv_priv *bat_priv,
                                    struct hlist_head *mcast_list)
@@ -298,7 +312,7 @@ static void batadv_mcast_mla_tt_add(struct batadv_priv *bat_priv,
        struct batadv_hw_addr *mcast_entry;
        struct hlist_node *tmp;
 
-       lockdep_assert_held(&bat_priv->tt.commit_lock);
+       WARN_ON(delayed_work_pending(&bat_priv->mcast.work));
 
        if (!mcast_list)
                return;
@@ -532,13 +546,18 @@ update:
 }
 
 /**
- * batadv_mcast_mla_update - update the own MLAs
+ * __batadv_mcast_mla_update - update the own MLAs
  * @bat_priv: the bat priv with all the soft interface information
  *
  * Updates the own multicast listener announcements in the translation
  * table as well as the own, announced multicast tvlv container.
+ *
+ * Note that non-conflicting reads and writes to bat_priv->mcast.mla_list
+ * in batadv_mcast_mla_tt_retract() and batadv_mcast_mla_tt_add() are
+ * ensured by the non-parallel execution of the worker this function
+ * belongs to.
  */
-void batadv_mcast_mla_update(struct batadv_priv *bat_priv)
+static void __batadv_mcast_mla_update(struct batadv_priv *bat_priv)
 {
        struct net_device *soft_iface = bat_priv->soft_iface;
        struct hlist_head mcast_list = HLIST_HEAD_INIT;
@@ -560,7 +579,30 @@ update:
        batadv_mcast_mla_tt_add(bat_priv, &mcast_list);
 
 out:
-       batadv_mcast_mla_list_free(bat_priv, &mcast_list);
+       batadv_mcast_mla_list_free(&mcast_list);
+}
+
+/**
+ * batadv_mcast_mla_update - update the own MLAs
+ * @work: kernel work struct
+ *
+ * Updates the own multicast listener announcements in the translation
+ * table as well as the own, announced multicast tvlv container.
+ *
+ * In the end, reschedules the work timer.
+ */
+static void batadv_mcast_mla_update(struct work_struct *work)
+{
+       struct delayed_work *delayed_work;
+       struct batadv_priv_mcast *priv_mcast;
+       struct batadv_priv *bat_priv;
+
+       delayed_work = to_delayed_work(work);
+       priv_mcast = container_of(delayed_work, struct batadv_priv_mcast, work);
+       bat_priv = container_of(priv_mcast, struct batadv_priv, mcast);
+
+       __batadv_mcast_mla_update(bat_priv);
+       batadv_mcast_start_timer(bat_priv);
 }
 
 /**
@@ -1132,6 +1174,9 @@ void batadv_mcast_init(struct batadv_priv *bat_priv)
        batadv_tvlv_handler_register(bat_priv, batadv_mcast_tvlv_ogm_handler,
                                     NULL, BATADV_TVLV_MCAST, 2,
                                     BATADV_TVLV_HANDLER_OGM_CIFNOTFND);
+
+       INIT_DELAYED_WORK(&bat_priv->mcast.work, batadv_mcast_mla_update);
+       batadv_mcast_start_timer(bat_priv);
 }
 
 #ifdef CONFIG_BATMAN_ADV_DEBUGFS
@@ -1243,12 +1288,13 @@ int batadv_mcast_flags_seq_print_text(struct seq_file *seq, void *offset)
  */
 void batadv_mcast_free(struct batadv_priv *bat_priv)
 {
+       cancel_delayed_work_sync(&bat_priv->mcast.work);
+
        batadv_tvlv_container_unregister(bat_priv, BATADV_TVLV_MCAST, 2);
        batadv_tvlv_handler_unregister(bat_priv, BATADV_TVLV_MCAST, 2);
 
-       spin_lock_bh(&bat_priv->tt.commit_lock);
+       /* safely calling outside of worker, as worker was canceled above */
        batadv_mcast_mla_tt_retract(bat_priv, NULL);
-       spin_unlock_bh(&bat_priv->tt.commit_lock);
 }
 
 /**
index 1fb00ba84907a46732c33deac4aed01061222b1c..2cddaf52a21d5e8ea927c96e51e5a811de6785c9 100644 (file)
@@ -39,8 +39,6 @@ enum batadv_forw_mode {
 
 #ifdef CONFIG_BATMAN_ADV_MCAST
 
-void batadv_mcast_mla_update(struct batadv_priv *bat_priv);
-
 enum batadv_forw_mode
 batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb,
                       struct batadv_orig_node **mcast_single_orig);
@@ -55,10 +53,6 @@ void batadv_mcast_purge_orig(struct batadv_orig_node *orig_node);
 
 #else
 
-static inline void batadv_mcast_mla_update(struct batadv_priv *bat_priv)
-{
-}
-
 static inline enum batadv_forw_mode
 batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb,
                       struct batadv_orig_node **mcast_single_orig)
index 64cb6acbe0a64bf8da5c5b03c1874b9e886f369f..062738163bdce747b7f49c96d9180899bb15ea2f 100644 (file)
 
 #include <linux/atomic.h>
 #include <linux/byteorder/generic.h>
+#include <linux/cache.h>
 #include <linux/errno.h>
+#include <linux/export.h>
 #include <linux/fs.h>
 #include <linux/genetlink.h>
 #include <linux/if_ether.h>
 #include <linux/init.h>
+#include <linux/kernel.h>
 #include <linux/netdevice.h>
 #include <linux/netlink.h>
 #include <linux/printk.h>
 #include "tp_meter.h"
 #include "translation-table.h"
 
-struct genl_family batadv_netlink_family = {
-       .id = GENL_ID_GENERATE,
-       .hdrsize = 0,
-       .name = BATADV_NL_NAME,
-       .version = 1,
-       .maxattr = BATADV_ATTR_MAX,
-       .netnsok = true,
-};
+struct genl_family batadv_netlink_family;
 
 /* multicast groups */
 enum batadv_netlink_multicast_groups {
@@ -534,7 +530,7 @@ batadv_netlink_dump_hardifs(struct sk_buff *msg, struct netlink_callback *cb)
        return msg->len;
 }
 
-static struct genl_ops batadv_netlink_ops[] = {
+static const struct genl_ops batadv_netlink_ops[] = {
        {
                .cmd = BATADV_CMD_GET_MESH_INFO,
                .flags = GENL_ADMIN_PERM,
@@ -610,6 +606,19 @@ static struct genl_ops batadv_netlink_ops[] = {
 
 };
 
+struct genl_family batadv_netlink_family __ro_after_init = {
+       .hdrsize = 0,
+       .name = BATADV_NL_NAME,
+       .version = 1,
+       .maxattr = BATADV_ATTR_MAX,
+       .netnsok = true,
+       .module = THIS_MODULE,
+       .ops = batadv_netlink_ops,
+       .n_ops = ARRAY_SIZE(batadv_netlink_ops),
+       .mcgrps = batadv_netlink_mcgrps,
+       .n_mcgrps = ARRAY_SIZE(batadv_netlink_mcgrps),
+};
+
 /**
  * batadv_netlink_register - register batadv genl netlink family
  */
@@ -617,9 +626,7 @@ void __init batadv_netlink_register(void)
 {
        int ret;
 
-       ret = genl_register_family_with_ops_groups(&batadv_netlink_family,
-                                                  batadv_netlink_ops,
-                                                  batadv_netlink_mcgrps);
+       ret = genl_register_family(&batadv_netlink_family);
        if (ret)
                pr_warn("unable to register netlink family");
 }
index e3baf697a35c0e1584b08361ef905a6652c6e04f..ab5a3bf0765f36f2fe14ff4a91d43d905e08a1f3 100644 (file)
@@ -44,7 +44,6 @@
 #include <linux/skbuff.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
-#include <linux/stat.h>
 #include <linux/stddef.h>
 #include <linux/string.h>
 #include <linux/workqueue.h>
@@ -261,10 +260,16 @@ static void batadv_nc_path_put(struct batadv_nc_path *nc_path)
 /**
  * batadv_nc_packet_free - frees nc packet
  * @nc_packet: the nc packet to free
+ * @dropped: whether the packet is freed because is is dropped
  */
-static void batadv_nc_packet_free(struct batadv_nc_packet *nc_packet)
+static void batadv_nc_packet_free(struct batadv_nc_packet *nc_packet,
+                                 bool dropped)
 {
-       kfree_skb(nc_packet->skb);
+       if (dropped)
+               kfree_skb(nc_packet->skb);
+       else
+               consume_skb(nc_packet->skb);
+
        batadv_nc_path_put(nc_packet->nc_path);
        kfree(nc_packet);
 }
@@ -577,7 +582,7 @@ static void batadv_nc_send_packet(struct batadv_nc_packet *nc_packet)
 {
        batadv_send_unicast_skb(nc_packet->skb, nc_packet->neigh_node);
        nc_packet->skb = NULL;
-       batadv_nc_packet_free(nc_packet);
+       batadv_nc_packet_free(nc_packet, false);
 }
 
 /**
@@ -611,7 +616,7 @@ static bool batadv_nc_sniffed_purge(struct batadv_priv *bat_priv,
 
        /* purge nc packet */
        list_del(&nc_packet->list);
-       batadv_nc_packet_free(nc_packet);
+       batadv_nc_packet_free(nc_packet, true);
 
        res = true;
 
@@ -1209,11 +1214,11 @@ static bool batadv_nc_code_packets(struct batadv_priv *bat_priv,
        }
 
        /* skb_src is now coded into skb_dest, so free it */
-       kfree_skb(skb_src);
+       consume_skb(skb_src);
 
        /* avoid duplicate free of skb from nc_packet */
        nc_packet->skb = NULL;
-       batadv_nc_packet_free(nc_packet);
+       batadv_nc_packet_free(nc_packet, false);
 
        /* Send the coded packet and return true */
        batadv_send_unicast_skb(skb_dest, first_dest);
@@ -1400,7 +1405,7 @@ static void batadv_nc_skb_store_before_coding(struct batadv_priv *bat_priv,
        /* batadv_nc_skb_store_for_decoding() clones the skb, so we must free
         * our ref
         */
-       kfree_skb(skb);
+       consume_skb(skb);
 }
 
 /**
@@ -1724,7 +1729,7 @@ batadv_nc_skb_decode_packet(struct batadv_priv *bat_priv, struct sk_buff *skb,
        ether_addr_copy(unicast_packet->dest, orig_dest);
        unicast_packet->ttvn = ttvn;
 
-       batadv_nc_packet_free(nc_packet);
+       batadv_nc_packet_free(nc_packet, false);
        return unicast_packet;
 }
 
@@ -1814,11 +1819,11 @@ static int batadv_nc_recv_coded_packet(struct sk_buff *skb,
 
        /* Check if network coding is enabled */
        if (!atomic_read(&bat_priv->network_coding))
-               return NET_RX_DROP;
+               goto free_skb;
 
        /* Make sure we can access (and remove) header */
        if (unlikely(!pskb_may_pull(skb, hdr_size)))
-               return NET_RX_DROP;
+               goto free_skb;
 
        coded_packet = (struct batadv_coded_packet *)skb->data;
        ethhdr = eth_hdr(skb);
@@ -1826,7 +1831,7 @@ static int batadv_nc_recv_coded_packet(struct sk_buff *skb,
        /* Verify frame is destined for us */
        if (!batadv_is_my_mac(bat_priv, ethhdr->h_dest) &&
            !batadv_is_my_mac(bat_priv, coded_packet->second_dest))
-               return NET_RX_DROP;
+               goto free_skb;
 
        /* Update stat counter */
        if (batadv_is_my_mac(bat_priv, coded_packet->second_dest))
@@ -1836,7 +1841,7 @@ static int batadv_nc_recv_coded_packet(struct sk_buff *skb,
                                                   coded_packet);
        if (!nc_packet) {
                batadv_inc_counter(bat_priv, BATADV_CNT_NC_DECODE_FAILED);
-               return NET_RX_DROP;
+               goto free_skb;
        }
 
        /* Make skb's linear, because decoding accesses the entire buffer */
@@ -1861,7 +1866,10 @@ static int batadv_nc_recv_coded_packet(struct sk_buff *skb,
        return batadv_recv_unicast_packet(skb, recv_if);
 
 free_nc_packet:
-       batadv_nc_packet_free(nc_packet);
+       batadv_nc_packet_free(nc_packet, true);
+free_skb:
+       kfree_skb(skb);
+
        return NET_RX_DROP;
 }
 
@@ -1961,17 +1969,16 @@ int batadv_nc_init_debugfs(struct batadv_priv *bat_priv)
        if (!nc_dir)
                goto out;
 
-       file = debugfs_create_u8("min_tq", S_IRUGO | S_IWUSR, nc_dir,
-                                &bat_priv->nc.min_tq);
+       file = debugfs_create_u8("min_tq", 0644, nc_dir, &bat_priv->nc.min_tq);
        if (!file)
                goto out;
 
-       file = debugfs_create_u32("max_fwd_delay", S_IRUGO | S_IWUSR, nc_dir,
+       file = debugfs_create_u32("max_fwd_delay", 0644, nc_dir,
                                  &bat_priv->nc.max_fwd_delay);
        if (!file)
                goto out;
 
-       file = debugfs_create_u32("max_buffer_time", S_IRUGO | S_IWUSR, nc_dir,
+       file = debugfs_create_u32("max_buffer_time", 0644, nc_dir,
                                  &bat_priv->nc.max_buffer_time);
        if (!file)
                goto out;
index 5f3bfc41aeb1ca5e505a232a480ad5671d85265a..8f3b2969cc4e3044e714086329166b9a3b7517a4 100644 (file)
@@ -364,7 +364,7 @@ struct batadv_orig_ifinfo *
 batadv_orig_ifinfo_new(struct batadv_orig_node *orig_node,
                       struct batadv_hard_iface *if_outgoing)
 {
-       struct batadv_orig_ifinfo *orig_ifinfo = NULL;
+       struct batadv_orig_ifinfo *orig_ifinfo;
        unsigned long reset_time;
 
        spin_lock_bh(&orig_node->neigh_list_lock);
@@ -512,15 +512,17 @@ batadv_neigh_node_get(const struct batadv_orig_node *orig_node,
  * batadv_hardif_neigh_create - create a hardif neighbour node
  * @hard_iface: the interface this neighbour is connected to
  * @neigh_addr: the interface address of the neighbour to retrieve
+ * @orig_node: originator object representing the neighbour
  *
  * Return: the hardif neighbour node if found or created or NULL otherwise.
  */
 static struct batadv_hardif_neigh_node *
 batadv_hardif_neigh_create(struct batadv_hard_iface *hard_iface,
-                          const u8 *neigh_addr)
+                          const u8 *neigh_addr,
+                          struct batadv_orig_node *orig_node)
 {
        struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
-       struct batadv_hardif_neigh_node *hardif_neigh = NULL;
+       struct batadv_hardif_neigh_node *hardif_neigh;
 
        spin_lock_bh(&hard_iface->neigh_list_lock);
 
@@ -536,6 +538,7 @@ batadv_hardif_neigh_create(struct batadv_hard_iface *hard_iface,
        kref_get(&hard_iface->refcount);
        INIT_HLIST_NODE(&hardif_neigh->list);
        ether_addr_copy(hardif_neigh->addr, neigh_addr);
+       ether_addr_copy(hardif_neigh->orig, orig_node->orig);
        hardif_neigh->if_incoming = hard_iface;
        hardif_neigh->last_seen = jiffies;
 
@@ -544,7 +547,7 @@ batadv_hardif_neigh_create(struct batadv_hard_iface *hard_iface,
        if (bat_priv->algo_ops->neigh.hardif_init)
                bat_priv->algo_ops->neigh.hardif_init(hardif_neigh);
 
-       hlist_add_head(&hardif_neigh->list, &hard_iface->neigh_list);
+       hlist_add_head_rcu(&hardif_neigh->list, &hard_iface->neigh_list);
 
 out:
        spin_unlock_bh(&hard_iface->neigh_list_lock);
@@ -556,21 +559,23 @@ out:
  *  node
  * @hard_iface: the interface this neighbour is connected to
  * @neigh_addr: the interface address of the neighbour to retrieve
+ * @orig_node: originator object representing the neighbour
  *
  * Return: the hardif neighbour node if found or created or NULL otherwise.
  */
 static struct batadv_hardif_neigh_node *
 batadv_hardif_neigh_get_or_create(struct batadv_hard_iface *hard_iface,
-                                 const u8 *neigh_addr)
+                                 const u8 *neigh_addr,
+                                 struct batadv_orig_node *orig_node)
 {
-       struct batadv_hardif_neigh_node *hardif_neigh = NULL;
+       struct batadv_hardif_neigh_node *hardif_neigh;
 
        /* first check without locking to avoid the overhead */
        hardif_neigh = batadv_hardif_neigh_get(hard_iface, neigh_addr);
        if (hardif_neigh)
                return hardif_neigh;
 
-       return batadv_hardif_neigh_create(hard_iface, neigh_addr);
+       return batadv_hardif_neigh_create(hard_iface, neigh_addr, orig_node);
 }
 
 /**
@@ -630,7 +635,7 @@ batadv_neigh_node_create(struct batadv_orig_node *orig_node,
                goto out;
 
        hardif_neigh = batadv_hardif_neigh_get_or_create(hard_iface,
-                                                        neigh_addr);
+                                                        neigh_addr, orig_node);
        if (!hardif_neigh)
                goto out;
 
@@ -683,7 +688,7 @@ batadv_neigh_node_get_or_create(struct batadv_orig_node *orig_node,
                                struct batadv_hard_iface *hard_iface,
                                const u8 *neigh_addr)
 {
-       struct batadv_neigh_node *neigh_node = NULL;
+       struct batadv_neigh_node *neigh_node;
 
        /* first check without locking to avoid the overhead */
        neigh_node = batadv_neigh_node_get(orig_node, hard_iface, neigh_addr);
@@ -1021,7 +1026,7 @@ struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv,
        batadv_orig_node_vlan_put(vlan);
 
        for (i = 0; i < BATADV_FRAG_BUFFER_COUNT; i++) {
-               INIT_HLIST_HEAD(&orig_node->fragments[i].head);
+               INIT_HLIST_HEAD(&orig_node->fragments[i].fragment_list);
                spin_lock_init(&orig_node->fragments[i].lock);
                orig_node->fragments[i].size = 0;
        }
index 6afc0b86950eca24633fc28138c643598e07c84f..7a36bcfa0ba01993080b2ad1ddca94ca0d50adc9 100644 (file)
@@ -21,7 +21,7 @@
 #include <asm/byteorder.h>
 #include <linux/types.h>
 
-#define batadv_tp_is_error(n) ((u8)n > 127 ? 1 : 0)
+#define batadv_tp_is_error(n) ((u8)(n) > 127 ? 1 : 0)
 
 /**
  * enum batadv_packettype - types for batman-adv encapsulated packets
@@ -251,16 +251,6 @@ struct batadv_elp_packet {
 
 #define BATADV_ELP_HLEN sizeof(struct batadv_elp_packet)
 
-/**
- * enum batadv_icmp_user_cmd_type - types for batman-adv icmp cmd modes
- * @BATADV_TP_START: start a throughput meter run
- * @BATADV_TP_STOP: stop a throughput meter run
- */
-enum batadv_icmp_user_cmd_type {
-       BATADV_TP_START         = 0,
-       BATADV_TP_STOP          = 2,
-};
-
 /**
  * struct batadv_icmp_header - common members among all the ICMP packets
  * @packet_type: batman-adv packet type, part of the general header
index 7e8dc648b95a671d5643ac6cd06ca5e572f930a7..6713bdf414cdacdaf36ecd6ac516f99e079fb51e 100644 (file)
@@ -196,8 +196,8 @@ bool batadv_check_management_packet(struct sk_buff *skb,
        if (!is_broadcast_ether_addr(ethhdr->h_dest))
                return false;
 
-       /* packet with broadcast sender address */
-       if (is_broadcast_ether_addr(ethhdr->h_source))
+       /* packet with invalid sender address */
+       if (!is_valid_ether_addr(ethhdr->h_source))
                return false;
 
        /* create a copy of the skb, if needed, to modify it. */
@@ -262,11 +262,11 @@ static int batadv_recv_my_icmp_packet(struct batadv_priv *bat_priv,
                icmph->ttl = BATADV_TTL;
 
                res = batadv_send_skb_to_orig(skb, orig_node, NULL);
-               if (res == -1)
-                       goto out;
-
-               ret = NET_RX_SUCCESS;
+               if (res == NET_XMIT_SUCCESS)
+                       ret = NET_RX_SUCCESS;
 
+               /* skb was consumed */
+               skb = NULL;
                break;
        case BATADV_TP:
                if (!pskb_may_pull(skb, sizeof(struct batadv_icmp_tp_packet)))
@@ -274,6 +274,8 @@ static int batadv_recv_my_icmp_packet(struct batadv_priv *bat_priv,
 
                batadv_tp_meter_recv(bat_priv, skb);
                ret = NET_RX_SUCCESS;
+               /* skb was consumed */
+               skb = NULL;
                goto out;
        default:
                /* drop unknown type */
@@ -284,6 +286,9 @@ out:
                batadv_hardif_put(primary_if);
        if (orig_node)
                batadv_orig_node_put(orig_node);
+
+       kfree_skb(skb);
+
        return ret;
 }
 
@@ -325,14 +330,20 @@ static int batadv_recv_icmp_ttl_exceeded(struct batadv_priv *bat_priv,
        icmp_packet->ttl = BATADV_TTL;
 
        res = batadv_send_skb_to_orig(skb, orig_node, NULL);
-       if (res != -1)
-               ret = NET_RX_SUCCESS;
+       if (res == NET_RX_SUCCESS)
+               ret = NET_XMIT_SUCCESS;
+
+       /* skb was consumed */
+       skb = NULL;
 
 out:
        if (primary_if)
                batadv_hardif_put(primary_if);
        if (orig_node)
                batadv_orig_node_put(orig_node);
+
+       kfree_skb(skb);
+
        return ret;
 }
 
@@ -349,21 +360,21 @@ int batadv_recv_icmp_packet(struct sk_buff *skb,
 
        /* drop packet if it has not necessary minimum size */
        if (unlikely(!pskb_may_pull(skb, hdr_size)))
-               goto out;
+               goto free_skb;
 
        ethhdr = eth_hdr(skb);
 
-       /* packet with unicast indication but broadcast recipient */
-       if (is_broadcast_ether_addr(ethhdr->h_dest))
-               goto out;
+       /* packet with unicast indication but non-unicast recipient */
+       if (!is_valid_ether_addr(ethhdr->h_dest))
+               goto free_skb;
 
-       /* packet with broadcast sender address */
-       if (is_broadcast_ether_addr(ethhdr->h_source))
-               goto out;
+       /* packet with broadcast/multicast sender address */
+       if (is_multicast_ether_addr(ethhdr->h_source))
+               goto free_skb;
 
        /* not for me */
        if (!batadv_is_my_mac(bat_priv, ethhdr->h_dest))
-               goto out;
+               goto free_skb;
 
        icmph = (struct batadv_icmp_header *)skb->data;
 
@@ -372,17 +383,17 @@ int batadv_recv_icmp_packet(struct sk_buff *skb,
             icmph->msg_type == BATADV_ECHO_REQUEST) &&
            (skb->len >= sizeof(struct batadv_icmp_packet_rr))) {
                if (skb_linearize(skb) < 0)
-                       goto out;
+                       goto free_skb;
 
                /* create a copy of the skb, if needed, to modify it. */
                if (skb_cow(skb, ETH_HLEN) < 0)
-                       goto out;
+                       goto free_skb;
 
                ethhdr = eth_hdr(skb);
                icmph = (struct batadv_icmp_header *)skb->data;
                icmp_packet_rr = (struct batadv_icmp_packet_rr *)icmph;
                if (icmp_packet_rr->rr_cur >= BATADV_RR_LEN)
-                       goto out;
+                       goto free_skb;
 
                ether_addr_copy(icmp_packet_rr->rr[icmp_packet_rr->rr_cur],
                                ethhdr->h_dest);
@@ -400,11 +411,11 @@ int batadv_recv_icmp_packet(struct sk_buff *skb,
        /* get routing information */
        orig_node = batadv_orig_hash_find(bat_priv, icmph->dst);
        if (!orig_node)
-               goto out;
+               goto free_skb;
 
        /* create a copy of the skb, if needed, to modify it. */
        if (skb_cow(skb, ETH_HLEN) < 0)
-               goto out;
+               goto put_orig_node;
 
        icmph = (struct batadv_icmp_header *)skb->data;
 
@@ -413,12 +424,18 @@ int batadv_recv_icmp_packet(struct sk_buff *skb,
 
        /* route it */
        res = batadv_send_skb_to_orig(skb, orig_node, recv_if);
-       if (res != -1)
+       if (res == NET_XMIT_SUCCESS)
                ret = NET_RX_SUCCESS;
 
-out:
+       /* skb was consumed */
+       skb = NULL;
+
+put_orig_node:
        if (orig_node)
                batadv_orig_node_put(orig_node);
+free_skb:
+       kfree_skb(skb);
+
        return ret;
 }
 
@@ -445,12 +462,12 @@ static int batadv_check_unicast_packet(struct batadv_priv *bat_priv,
 
        ethhdr = eth_hdr(skb);
 
-       /* packet with unicast indication but broadcast recipient */
-       if (is_broadcast_ether_addr(ethhdr->h_dest))
+       /* packet with unicast indication but non-unicast recipient */
+       if (!is_valid_ether_addr(ethhdr->h_dest))
                return -EBADR;
 
-       /* packet with broadcast sender address */
-       if (is_broadcast_ether_addr(ethhdr->h_source))
+       /* packet with broadcast/multicast sender address */
+       if (is_multicast_ether_addr(ethhdr->h_source))
                return -EBADR;
 
        /* not for me */
@@ -667,18 +684,18 @@ static int batadv_route_unicast_packet(struct sk_buff *skb,
        if (unicast_packet->ttl < 2) {
                pr_debug("Warning - can't forward unicast packet from %pM to %pM: ttl exceeded\n",
                         ethhdr->h_source, unicast_packet->dest);
-               goto out;
+               goto free_skb;
        }
 
        /* get routing information */
        orig_node = batadv_orig_hash_find(bat_priv, unicast_packet->dest);
 
        if (!orig_node)
-               goto out;
+               goto free_skb;
 
        /* create a copy of the skb, if needed, to modify it. */
        if (skb_cow(skb, ETH_HLEN) < 0)
-               goto out;
+               goto put_orig_node;
 
        /* decrement ttl */
        unicast_packet = (struct batadv_unicast_packet *)skb->data;
@@ -702,8 +719,11 @@ static int batadv_route_unicast_packet(struct sk_buff *skb,
 
        len = skb->len;
        res = batadv_send_skb_to_orig(skb, orig_node, recv_if);
-       if (res == -1)
-               goto out;
+       if (res == NET_XMIT_SUCCESS)
+               ret = NET_RX_SUCCESS;
+
+       /* skb was consumed */
+       skb = NULL;
 
        /* translate transmit result into receive result */
        if (res == NET_XMIT_SUCCESS) {
@@ -713,11 +733,11 @@ static int batadv_route_unicast_packet(struct sk_buff *skb,
                                   len + ETH_HLEN);
        }
 
-       ret = NET_RX_SUCCESS;
+put_orig_node:
+       batadv_orig_node_put(orig_node);
+free_skb:
+       kfree_skb(skb);
 
-out:
-       if (orig_node)
-               batadv_orig_node_put(orig_node);
        return ret;
 }
 
@@ -902,14 +922,18 @@ int batadv_recv_unhandled_unicast_packet(struct sk_buff *skb,
 
        check = batadv_check_unicast_packet(bat_priv, skb, hdr_size);
        if (check < 0)
-               return NET_RX_DROP;
+               goto free_skb;
 
        /* we don't know about this type, drop it. */
        unicast_packet = (struct batadv_unicast_packet *)skb->data;
        if (batadv_is_my_mac(bat_priv, unicast_packet->dest))
-               return NET_RX_DROP;
+               goto free_skb;
 
        return batadv_route_unicast_packet(skb, recv_if);
+
+free_skb:
+       kfree_skb(skb);
+       return NET_RX_DROP;
 }
 
 int batadv_recv_unicast_packet(struct sk_buff *skb,
@@ -923,6 +947,7 @@ int batadv_recv_unicast_packet(struct sk_buff *skb,
        int check, hdr_size = sizeof(*unicast_packet);
        enum batadv_subtype subtype;
        bool is4addr;
+       int ret = NET_RX_DROP;
 
        unicast_packet = (struct batadv_unicast_packet *)skb->data;
        unicast_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data;
@@ -942,9 +967,9 @@ int batadv_recv_unicast_packet(struct sk_buff *skb,
                batadv_nc_skb_store_sniffed_unicast(bat_priv, skb);
 
        if (check < 0)
-               return NET_RX_DROP;
+               goto free_skb;
        if (!batadv_check_unicast_ttvn(bat_priv, skb, hdr_size))
-               return NET_RX_DROP;
+               goto free_skb;
 
        /* packet for me */
        if (batadv_is_my_mac(bat_priv, unicast_packet->dest)) {
@@ -982,7 +1007,14 @@ rx_success:
                return NET_RX_SUCCESS;
        }
 
-       return batadv_route_unicast_packet(skb, recv_if);
+       ret = batadv_route_unicast_packet(skb, recv_if);
+       /* skb was consumed */
+       skb = NULL;
+
+free_skb:
+       kfree_skb(skb);
+
+       return ret;
 }
 
 /**
@@ -1004,15 +1036,15 @@ int batadv_recv_unicast_tvlv(struct sk_buff *skb,
        int ret = NET_RX_DROP;
 
        if (batadv_check_unicast_packet(bat_priv, skb, hdr_size) < 0)
-               return NET_RX_DROP;
+               goto free_skb;
 
        /* the header is likely to be modified while forwarding */
        if (skb_cow(skb, hdr_size) < 0)
-               return NET_RX_DROP;
+               goto free_skb;
 
        /* packet needs to be linearized to access the tvlv content */
        if (skb_linearize(skb) < 0)
-               return NET_RX_DROP;
+               goto free_skb;
 
        unicast_tvlv_packet = (struct batadv_unicast_tvlv_packet *)skb->data;
 
@@ -1020,17 +1052,21 @@ int batadv_recv_unicast_tvlv(struct sk_buff *skb,
        tvlv_buff_len = ntohs(unicast_tvlv_packet->tvlv_len);
 
        if (tvlv_buff_len > skb->len - hdr_size)
-               return NET_RX_DROP;
+               goto free_skb;
 
        ret = batadv_tvlv_containers_process(bat_priv, false, NULL,
                                             unicast_tvlv_packet->src,
                                             unicast_tvlv_packet->dst,
                                             tvlv_buff, tvlv_buff_len);
 
-       if (ret != NET_RX_SUCCESS)
+       if (ret != NET_RX_SUCCESS) {
                ret = batadv_route_unicast_packet(skb, recv_if);
-       else
-               consume_skb(skb);
+               /* skb was consumed */
+               skb = NULL;
+       }
+
+free_skb:
+       kfree_skb(skb);
 
        return ret;
 }
@@ -1056,20 +1092,22 @@ int batadv_recv_frag_packet(struct sk_buff *skb,
 
        if (batadv_check_unicast_packet(bat_priv, skb,
                                        sizeof(*frag_packet)) < 0)
-               goto out;
+               goto free_skb;
 
        frag_packet = (struct batadv_frag_packet *)skb->data;
        orig_node_src = batadv_orig_hash_find(bat_priv, frag_packet->orig);
        if (!orig_node_src)
-               goto out;
+               goto free_skb;
 
        skb->priority = frag_packet->priority + 256;
 
        /* Route the fragment if it is not for us and too big to be merged. */
        if (!batadv_is_my_mac(bat_priv, frag_packet->dest) &&
            batadv_frag_skb_fwd(skb, recv_if, orig_node_src)) {
+               /* skb was consumed */
+               skb = NULL;
                ret = NET_RX_SUCCESS;
-               goto out;
+               goto put_orig_node;
        }
 
        batadv_inc_counter(bat_priv, BATADV_CNT_FRAG_RX);
@@ -1077,20 +1115,24 @@ int batadv_recv_frag_packet(struct sk_buff *skb,
 
        /* Add fragment to buffer and merge if possible. */
        if (!batadv_frag_skb_buffer(&skb, orig_node_src))
-               goto out;
+               goto put_orig_node;
 
        /* Deliver merged packet to the appropriate handler, if it was
         * merged
         */
-       if (skb)
+       if (skb) {
                batadv_batman_skb_recv(skb, recv_if->net_dev,
                                       &recv_if->batman_adv_ptype, NULL);
+               /* skb was consumed */
+               skb = NULL;
+       }
 
        ret = NET_RX_SUCCESS;
 
-out:
-       if (orig_node_src)
-               batadv_orig_node_put(orig_node_src);
+put_orig_node:
+       batadv_orig_node_put(orig_node_src);
+free_skb:
+       kfree_skb(skb);
 
        return ret;
 }
@@ -1109,35 +1151,35 @@ int batadv_recv_bcast_packet(struct sk_buff *skb,
 
        /* drop packet if it has not necessary minimum size */
        if (unlikely(!pskb_may_pull(skb, hdr_size)))
-               goto out;
+               goto free_skb;
 
        ethhdr = eth_hdr(skb);
 
        /* packet with broadcast indication but unicast recipient */
        if (!is_broadcast_ether_addr(ethhdr->h_dest))
-               goto out;
+               goto free_skb;
 
-       /* packet with broadcast sender address */
-       if (is_broadcast_ether_addr(ethhdr->h_source))
-               goto out;
+       /* packet with broadcast/multicast sender address */
+       if (is_multicast_ether_addr(ethhdr->h_source))
+               goto free_skb;
 
        /* ignore broadcasts sent by myself */
        if (batadv_is_my_mac(bat_priv, ethhdr->h_source))
-               goto out;
+               goto free_skb;
 
        bcast_packet = (struct batadv_bcast_packet *)skb->data;
 
        /* ignore broadcasts originated by myself */
        if (batadv_is_my_mac(bat_priv, bcast_packet->orig))
-               goto out;
+               goto free_skb;
 
        if (bcast_packet->ttl < 2)
-               goto out;
+               goto free_skb;
 
        orig_node = batadv_orig_hash_find(bat_priv, bcast_packet->orig);
 
        if (!orig_node)
-               goto out;
+               goto free_skb;
 
        spin_lock_bh(&orig_node->bcast_seqno_lock);
 
@@ -1165,18 +1207,18 @@ int batadv_recv_bcast_packet(struct sk_buff *skb,
 
        /* check whether this has been sent by another originator before */
        if (batadv_bla_check_bcast_duplist(bat_priv, skb))
-               goto out;
+               goto free_skb;
 
        batadv_skb_set_priority(skb, sizeof(struct batadv_bcast_packet));
 
        /* rebroadcast packet */
-       batadv_add_bcast_packet_to_list(bat_priv, skb, 1);
+       batadv_add_bcast_packet_to_list(bat_priv, skb, 1, false);
 
        /* don't hand the broadcast up if it is from an originator
         * from the same backbone.
         */
        if (batadv_bla_is_backbone_gw(skb, orig_node, hdr_size))
-               goto out;
+               goto free_skb;
 
        if (batadv_dat_snoop_incoming_arp_request(bat_priv, skb, hdr_size))
                goto rx_success;
@@ -1192,6 +1234,8 @@ rx_success:
 
 spin_unlock:
        spin_unlock_bh(&orig_node->bcast_seqno_lock);
+free_skb:
+       kfree_skb(skb);
 out:
        if (orig_node)
                batadv_orig_node_put(orig_node);
index 8d4e1f57857455ea5694ed7a8ef014a70cdee7b4..49021b7124f37a4e95e043eb1b9d88855b60885c 100644 (file)
@@ -19,6 +19,7 @@
 #include "main.h"
 
 #include <linux/atomic.h>
+#include <linux/bug.h>
 #include <linux/byteorder/generic.h>
 #include <linux/errno.h>
 #include <linux/etherdevice.h>
@@ -64,8 +65,11 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work);
  * If neigh_node is NULL, then the packet is broadcasted using hard_iface,
  * otherwise it is sent as unicast to the given neighbor.
  *
- * Return: NET_TX_DROP in case of error or the result of dev_queue_xmit(skb)
- * otherwise
+ * Regardless of the return value, the skb is consumed.
+ *
+ * Return: A negative errno code is returned on a failure. A success does not
+ * guarantee the frame will be transmitted as it may be dropped due
+ * to congestion or traffic shaping.
  */
 int batadv_send_skb_packet(struct sk_buff *skb,
                           struct batadv_hard_iface *hard_iface,
@@ -73,7 +77,6 @@ int batadv_send_skb_packet(struct sk_buff *skb,
 {
        struct batadv_priv *bat_priv;
        struct ethhdr *ethhdr;
-       int ret;
 
        bat_priv = netdev_priv(hard_iface->soft_iface);
 
@@ -111,15 +114,8 @@ int batadv_send_skb_packet(struct sk_buff *skb,
        /* dev_queue_xmit() returns a negative result on error.  However on
         * congestion and traffic shaping, it drops and returns NET_XMIT_DROP
         * (which is > 0). This will not be treated as an error.
-        *
-        * a negative value cannot be returned because it could be interepreted
-        * as not consumed skb by callers of batadv_send_skb_to_orig.
         */
-       ret = dev_queue_xmit(skb);
-       if (ret < 0)
-               ret = NET_XMIT_DROP;
-
-       return ret;
+       return dev_queue_xmit(skb);
 send_skb_err:
        kfree_skb(skb);
        return NET_XMIT_DROP;
@@ -165,11 +161,9 @@ int batadv_send_unicast_skb(struct sk_buff *skb,
  * host, NULL can be passed as recv_if and no interface alternating is
  * attempted.
  *
- * Return: -1 on failure (and the skb is not consumed), -EINPROGRESS if the
- * skb is buffered for later transmit or the NET_XMIT status returned by the
+ * Return: negative errno code on a failure, -EINPROGRESS if the skb is
+ * buffered for later transmit or the NET_XMIT status returned by the
  * lower routine if the packet has been passed down.
- *
- * If the returning value is not -1 the skb has been consumed.
  */
 int batadv_send_skb_to_orig(struct sk_buff *skb,
                            struct batadv_orig_node *orig_node,
@@ -177,12 +171,14 @@ int batadv_send_skb_to_orig(struct sk_buff *skb,
 {
        struct batadv_priv *bat_priv = orig_node->bat_priv;
        struct batadv_neigh_node *neigh_node;
-       int ret = -1;
+       int ret;
 
        /* batadv_find_router() increases neigh_nodes refcount if found. */
        neigh_node = batadv_find_router(bat_priv, orig_node, recv_if);
-       if (!neigh_node)
-               goto out;
+       if (!neigh_node) {
+               ret = -EINVAL;
+               goto free_skb;
+       }
 
        /* Check if the skb is too large to send in one piece and fragment
         * it if needed.
@@ -191,8 +187,10 @@ int batadv_send_skb_to_orig(struct sk_buff *skb,
            skb->len > neigh_node->if_incoming->net_dev->mtu) {
                /* Fragment and send packet. */
                ret = batadv_frag_send_packet(skb, orig_node, neigh_node);
+               /* skb was consumed */
+               skb = NULL;
 
-               goto out;
+               goto put_neigh_node;
        }
 
        /* try to network code the packet, if it is received on an interface
@@ -204,9 +202,13 @@ int batadv_send_skb_to_orig(struct sk_buff *skb,
        else
                ret = batadv_send_unicast_skb(skb, neigh_node);
 
-out:
-       if (neigh_node)
-               batadv_neigh_node_put(neigh_node);
+       /* skb was consumed */
+       skb = NULL;
+
+put_neigh_node:
+       batadv_neigh_node_put(neigh_node);
+free_skb:
+       kfree_skb(skb);
 
        return ret;
 }
@@ -327,7 +329,7 @@ int batadv_send_skb_unicast(struct batadv_priv *bat_priv,
 {
        struct batadv_unicast_packet *unicast_packet;
        struct ethhdr *ethhdr;
-       int res, ret = NET_XMIT_DROP;
+       int ret = NET_XMIT_DROP;
 
        if (!orig_node)
                goto out;
@@ -364,13 +366,12 @@ int batadv_send_skb_unicast(struct batadv_priv *bat_priv,
        if (batadv_tt_global_client_is_roaming(bat_priv, ethhdr->h_dest, vid))
                unicast_packet->ttvn = unicast_packet->ttvn - 1;
 
-       res = batadv_send_skb_to_orig(skb, orig_node, NULL);
-       if (res != -1)
-               ret = NET_XMIT_SUCCESS;
+       ret = batadv_send_skb_to_orig(skb, orig_node, NULL);
+        /* skb was consumed */
+       skb = NULL;
 
 out:
-       if (ret == NET_XMIT_DROP)
-               kfree_skb(skb);
+       kfree_skb(skb);
        return ret;
 }
 
@@ -451,13 +452,19 @@ int batadv_send_skb_via_gw(struct batadv_priv *bat_priv, struct sk_buff *skb,
 /**
  * batadv_forw_packet_free - free a forwarding packet
  * @forw_packet: The packet to free
+ * @dropped: whether the packet is freed because is is dropped
  *
  * This frees a forwarding packet and releases any resources it might
  * have claimed.
  */
-void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet)
+void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet,
+                            bool dropped)
 {
-       kfree_skb(forw_packet->skb);
+       if (dropped)
+               kfree_skb(forw_packet->skb);
+       else
+               consume_skb(forw_packet->skb);
+
        if (forw_packet->if_incoming)
                batadv_hardif_put(forw_packet->if_incoming);
        if (forw_packet->if_outgoing)
@@ -514,6 +521,8 @@ batadv_forw_packet_alloc(struct batadv_hard_iface *if_incoming,
        if (if_outgoing)
                kref_get(&if_outgoing->refcount);
 
+       INIT_HLIST_NODE(&forw_packet->list);
+       INIT_HLIST_NODE(&forw_packet->cleanup_list);
        forw_packet->skb = NULL;
        forw_packet->queue_left = queue_left;
        forw_packet->if_incoming = if_incoming;
@@ -529,19 +538,191 @@ err:
        return NULL;
 }
 
+/**
+ * batadv_forw_packet_was_stolen - check whether someone stole this packet
+ * @forw_packet: the forwarding packet to check
+ *
+ * This function checks whether the given forwarding packet was claimed by
+ * someone else for free().
+ *
+ * Return: True if someone stole it, false otherwise.
+ */
+static bool
+batadv_forw_packet_was_stolen(struct batadv_forw_packet *forw_packet)
+{
+       return !hlist_unhashed(&forw_packet->cleanup_list);
+}
+
+/**
+ * batadv_forw_packet_steal - claim a forw_packet for free()
+ * @forw_packet: the forwarding packet to steal
+ * @lock: a key to the store to steal from (e.g. forw_{bat,bcast}_list_lock)
+ *
+ * This function tries to steal a specific forw_packet from global
+ * visibility for the purpose of getting it for free(). That means
+ * the caller is *not* allowed to requeue it afterwards.
+ *
+ * Return: True if stealing was successful. False if someone else stole it
+ * before us.
+ */
+bool batadv_forw_packet_steal(struct batadv_forw_packet *forw_packet,
+                             spinlock_t *lock)
+{
+       /* did purging routine steal it earlier? */
+       spin_lock_bh(lock);
+       if (batadv_forw_packet_was_stolen(forw_packet)) {
+               spin_unlock_bh(lock);
+               return false;
+       }
+
+       hlist_del_init(&forw_packet->list);
+
+       /* Just to spot misuse of this function */
+       hlist_add_fake(&forw_packet->cleanup_list);
+
+       spin_unlock_bh(lock);
+       return true;
+}
+
+/**
+ * batadv_forw_packet_list_steal - claim a list of forward packets for free()
+ * @forw_list: the to be stolen forward packets
+ * @cleanup_list: a backup pointer, to be able to dispose the packet later
+ * @hard_iface: the interface to steal forward packets from
+ *
+ * This function claims responsibility to free any forw_packet queued on the
+ * given hard_iface. If hard_iface is NULL forwarding packets on all hard
+ * interfaces will be claimed.
+ *
+ * The packets are being moved from the forw_list to the cleanup_list and
+ * by that allows already running threads to notice the claiming.
+ */
 static void
-_batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv,
-                                struct batadv_forw_packet *forw_packet,
-                                unsigned long send_time)
+batadv_forw_packet_list_steal(struct hlist_head *forw_list,
+                             struct hlist_head *cleanup_list,
+                             const struct batadv_hard_iface *hard_iface)
 {
-       /* add new packet to packet list */
-       spin_lock_bh(&bat_priv->forw_bcast_list_lock);
-       hlist_add_head(&forw_packet->list, &bat_priv->forw_bcast_list);
-       spin_unlock_bh(&bat_priv->forw_bcast_list_lock);
+       struct batadv_forw_packet *forw_packet;
+       struct hlist_node *safe_tmp_node;
+
+       hlist_for_each_entry_safe(forw_packet, safe_tmp_node,
+                                 forw_list, list) {
+               /* if purge_outstanding_packets() was called with an argument
+                * we delete only packets belonging to the given interface
+                */
+               if (hard_iface &&
+                   (forw_packet->if_incoming != hard_iface) &&
+                   (forw_packet->if_outgoing != hard_iface))
+                       continue;
+
+               hlist_del(&forw_packet->list);
+               hlist_add_head(&forw_packet->cleanup_list, cleanup_list);
+       }
+}
+
+/**
+ * batadv_forw_packet_list_free - free a list of forward packets
+ * @head: a list of to be freed forw_packets
+ *
+ * This function cancels the scheduling of any packet in the provided list,
+ * waits for any possibly running packet forwarding thread to finish and
+ * finally, safely frees this forward packet.
+ *
+ * This function might sleep.
+ */
+static void batadv_forw_packet_list_free(struct hlist_head *head)
+{
+       struct batadv_forw_packet *forw_packet;
+       struct hlist_node *safe_tmp_node;
+
+       hlist_for_each_entry_safe(forw_packet, safe_tmp_node, head,
+                                 cleanup_list) {
+               cancel_delayed_work_sync(&forw_packet->delayed_work);
+
+               hlist_del(&forw_packet->cleanup_list);
+               batadv_forw_packet_free(forw_packet, true);
+       }
+}
+
+/**
+ * batadv_forw_packet_queue - try to queue a forwarding packet
+ * @forw_packet: the forwarding packet to queue
+ * @lock: a key to the store (e.g. forw_{bat,bcast}_list_lock)
+ * @head: the shelve to queue it on (e.g. forw_{bat,bcast}_list)
+ * @send_time: timestamp (jiffies) when the packet is to be sent
+ *
+ * This function tries to (re)queue a forwarding packet. Requeuing
+ * is prevented if the according interface is shutting down
+ * (e.g. if batadv_forw_packet_list_steal() was called for this
+ * packet earlier).
+ *
+ * Calling batadv_forw_packet_queue() after a call to
+ * batadv_forw_packet_steal() is forbidden!
+ *
+ * Caller needs to ensure that forw_packet->delayed_work was initialized.
+ */
+static void batadv_forw_packet_queue(struct batadv_forw_packet *forw_packet,
+                                    spinlock_t *lock, struct hlist_head *head,
+                                    unsigned long send_time)
+{
+       spin_lock_bh(lock);
+
+       /* did purging routine steal it from us? */
+       if (batadv_forw_packet_was_stolen(forw_packet)) {
+               /* If you got it for free() without trouble, then
+                * don't get back into the queue after stealing...
+                */
+               WARN_ONCE(hlist_fake(&forw_packet->cleanup_list),
+                         "Requeuing after batadv_forw_packet_steal() not allowed!\n");
 
-       /* start timer for this packet */
-       queue_delayed_work(batadv_event_workqueue, &forw_packet->delayed_work,
-                          send_time);
+               spin_unlock_bh(lock);
+               return;
+       }
+
+       hlist_del_init(&forw_packet->list);
+       hlist_add_head(&forw_packet->list, head);
+
+       queue_delayed_work(batadv_event_workqueue,
+                          &forw_packet->delayed_work,
+                          send_time - jiffies);
+       spin_unlock_bh(lock);
+}
+
+/**
+ * batadv_forw_packet_bcast_queue - try to queue a broadcast packet
+ * @bat_priv: the bat priv with all the soft interface information
+ * @forw_packet: the forwarding packet to queue
+ * @send_time: timestamp (jiffies) when the packet is to be sent
+ *
+ * This function tries to (re)queue a broadcast packet.
+ *
+ * Caller needs to ensure that forw_packet->delayed_work was initialized.
+ */
+static void
+batadv_forw_packet_bcast_queue(struct batadv_priv *bat_priv,
+                              struct batadv_forw_packet *forw_packet,
+                              unsigned long send_time)
+{
+       batadv_forw_packet_queue(forw_packet, &bat_priv->forw_bcast_list_lock,
+                                &bat_priv->forw_bcast_list, send_time);
+}
+
+/**
+ * batadv_forw_packet_ogmv1_queue - try to queue an OGMv1 packet
+ * @bat_priv: the bat priv with all the soft interface information
+ * @forw_packet: the forwarding packet to queue
+ * @send_time: timestamp (jiffies) when the packet is to be sent
+ *
+ * This function tries to (re)queue an OGMv1 packet.
+ *
+ * Caller needs to ensure that forw_packet->delayed_work was initialized.
+ */
+void batadv_forw_packet_ogmv1_queue(struct batadv_priv *bat_priv,
+                                   struct batadv_forw_packet *forw_packet,
+                                   unsigned long send_time)
+{
+       batadv_forw_packet_queue(forw_packet, &bat_priv->forw_bat_list_lock,
+                                &bat_priv->forw_bat_list, send_time);
 }
 
 /**
@@ -549,6 +730,7 @@ _batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv,
  * @bat_priv: the bat priv with all the soft interface information
  * @skb: broadcast packet to add
  * @delay: number of jiffies to wait before sending
+ * @own_packet: true if it is a self-generated broadcast packet
  *
  * add a broadcast packet to the queue and setup timers. broadcast packets
  * are sent multiple times to increase probability for being received.
@@ -560,9 +742,10 @@ _batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv,
  */
 int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv,
                                    const struct sk_buff *skb,
-                                   unsigned long delay)
+                                   unsigned long delay,
+                                   bool own_packet)
 {
-       struct batadv_hard_iface *primary_if = NULL;
+       struct batadv_hard_iface *primary_if;
        struct batadv_forw_packet *forw_packet;
        struct batadv_bcast_packet *bcast_packet;
        struct sk_buff *newskb;
@@ -586,18 +769,17 @@ int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv,
        bcast_packet = (struct batadv_bcast_packet *)newskb->data;
        bcast_packet->ttl--;
 
-       skb_reset_mac_header(newskb);
-
        forw_packet->skb = newskb;
+       forw_packet->own = own_packet;
 
        INIT_DELAYED_WORK(&forw_packet->delayed_work,
                          batadv_send_outstanding_bcast_packet);
 
-       _batadv_add_bcast_packet_to_list(bat_priv, forw_packet, delay);
+       batadv_forw_packet_bcast_queue(bat_priv, forw_packet, jiffies + delay);
        return NETDEV_TX_OK;
 
 err_packet_free:
-       batadv_forw_packet_free(forw_packet);
+       batadv_forw_packet_free(forw_packet, true);
 err:
        return NETDEV_TX_BUSY;
 }
@@ -605,11 +787,18 @@ err:
 static void batadv_send_outstanding_bcast_packet(struct work_struct *work)
 {
        struct batadv_hard_iface *hard_iface;
+       struct batadv_hardif_neigh_node *neigh_node;
        struct delayed_work *delayed_work;
        struct batadv_forw_packet *forw_packet;
+       struct batadv_bcast_packet *bcast_packet;
        struct sk_buff *skb1;
        struct net_device *soft_iface;
        struct batadv_priv *bat_priv;
+       unsigned long send_time = jiffies + msecs_to_jiffies(5);
+       bool dropped = false;
+       u8 *neigh_addr;
+       u8 *orig_neigh;
+       int ret = 0;
 
        delayed_work = to_delayed_work(work);
        forw_packet = container_of(delayed_work, struct batadv_forw_packet,
@@ -617,15 +806,17 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work)
        soft_iface = forw_packet->if_incoming->soft_iface;
        bat_priv = netdev_priv(soft_iface);
 
-       spin_lock_bh(&bat_priv->forw_bcast_list_lock);
-       hlist_del(&forw_packet->list);
-       spin_unlock_bh(&bat_priv->forw_bcast_list_lock);
-
-       if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_DEACTIVATING)
+       if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_DEACTIVATING) {
+               dropped = true;
                goto out;
+       }
 
-       if (batadv_dat_drop_broadcast_packet(bat_priv, forw_packet))
+       if (batadv_dat_drop_broadcast_packet(bat_priv, forw_packet)) {
+               dropped = true;
                goto out;
+       }
+
+       bcast_packet = (struct batadv_bcast_packet *)forw_packet->skb->data;
 
        /* rebroadcast packet */
        rcu_read_lock();
@@ -636,6 +827,49 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work)
                if (forw_packet->num_packets >= hard_iface->num_bcasts)
                        continue;
 
+               if (forw_packet->own) {
+                       neigh_node = NULL;
+               } else {
+                       neigh_addr = eth_hdr(forw_packet->skb)->h_source;
+                       neigh_node = batadv_hardif_neigh_get(hard_iface,
+                                                            neigh_addr);
+               }
+
+               orig_neigh = neigh_node ? neigh_node->orig : NULL;
+
+               ret = batadv_hardif_no_broadcast(hard_iface, bcast_packet->orig,
+                                                orig_neigh);
+
+               if (ret) {
+                       char *type;
+
+                       switch (ret) {
+                       case BATADV_HARDIF_BCAST_NORECIPIENT:
+                               type = "no neighbor";
+                               break;
+                       case BATADV_HARDIF_BCAST_DUPFWD:
+                               type = "single neighbor is source";
+                               break;
+                       case BATADV_HARDIF_BCAST_DUPORIG:
+                               type = "single neighbor is originator";
+                               break;
+                       default:
+                               type = "unknown";
+                       }
+
+                       batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "BCAST packet from orig %pM on %s surpressed: %s\n",
+                                  bcast_packet->orig,
+                                  hard_iface->net_dev->name, type);
+
+                       if (neigh_node)
+                               batadv_hardif_neigh_put(neigh_node);
+
+                       continue;
+               }
+
+               if (neigh_node)
+                       batadv_hardif_neigh_put(neigh_node);
+
                if (!kref_get_unless_zero(&hard_iface->refcount))
                        continue;
 
@@ -652,22 +886,34 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work)
 
        /* if we still have some more bcasts to send */
        if (forw_packet->num_packets < BATADV_NUM_BCASTS_MAX) {
-               _batadv_add_bcast_packet_to_list(bat_priv, forw_packet,
-                                                msecs_to_jiffies(5));
+               batadv_forw_packet_bcast_queue(bat_priv, forw_packet,
+                                              send_time);
                return;
        }
 
 out:
-       batadv_forw_packet_free(forw_packet);
+       /* do we get something for free()? */
+       if (batadv_forw_packet_steal(forw_packet,
+                                    &bat_priv->forw_bcast_list_lock))
+               batadv_forw_packet_free(forw_packet, dropped);
 }
 
+/**
+ * batadv_purge_outstanding_packets - stop/purge scheduled bcast/OGMv1 packets
+ * @bat_priv: the bat priv with all the soft interface information
+ * @hard_iface: the hard interface to cancel and purge bcast/ogm packets on
+ *
+ * This method cancels and purges any broadcast and OGMv1 packet on the given
+ * hard_iface. If hard_iface is NULL, broadcast and OGMv1 packets on all hard
+ * interfaces will be canceled and purged.
+ *
+ * This function might sleep.
+ */
 void
 batadv_purge_outstanding_packets(struct batadv_priv *bat_priv,
                                 const struct batadv_hard_iface *hard_iface)
 {
-       struct batadv_forw_packet *forw_packet;
-       struct hlist_node *safe_tmp_node;
-       bool pending;
+       struct hlist_head head = HLIST_HEAD_INIT;
 
        if (hard_iface)
                batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
@@ -677,57 +923,18 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv,
                batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
                           "purge_outstanding_packets()\n");
 
-       /* free bcast list */
+       /* claim bcast list for free() */
        spin_lock_bh(&bat_priv->forw_bcast_list_lock);
-       hlist_for_each_entry_safe(forw_packet, safe_tmp_node,
-                                 &bat_priv->forw_bcast_list, list) {
-               /* if purge_outstanding_packets() was called with an argument
-                * we delete only packets belonging to the given interface
-                */
-               if ((hard_iface) &&
-                   (forw_packet->if_incoming != hard_iface) &&
-                   (forw_packet->if_outgoing != hard_iface))
-                       continue;
-
-               spin_unlock_bh(&bat_priv->forw_bcast_list_lock);
-
-               /* batadv_send_outstanding_bcast_packet() will lock the list to
-                * delete the item from the list
-                */
-               pending = cancel_delayed_work_sync(&forw_packet->delayed_work);
-               spin_lock_bh(&bat_priv->forw_bcast_list_lock);
-
-               if (pending) {
-                       hlist_del(&forw_packet->list);
-                       batadv_forw_packet_free(forw_packet);
-               }
-       }
+       batadv_forw_packet_list_steal(&bat_priv->forw_bcast_list, &head,
+                                     hard_iface);
        spin_unlock_bh(&bat_priv->forw_bcast_list_lock);
 
-       /* free batman packet list */
+       /* claim batman packet list for free() */
        spin_lock_bh(&bat_priv->forw_bat_list_lock);
-       hlist_for_each_entry_safe(forw_packet, safe_tmp_node,
-                                 &bat_priv->forw_bat_list, list) {
-               /* if purge_outstanding_packets() was called with an argument
-                * we delete only packets belonging to the given interface
-                */
-               if ((hard_iface) &&
-                   (forw_packet->if_incoming != hard_iface) &&
-                   (forw_packet->if_outgoing != hard_iface))
-                       continue;
-
-               spin_unlock_bh(&bat_priv->forw_bat_list_lock);
-
-               /* send_outstanding_bat_packet() will lock the list to
-                * delete the item from the list
-                */
-               pending = cancel_delayed_work_sync(&forw_packet->delayed_work);
-               spin_lock_bh(&bat_priv->forw_bat_list_lock);
-
-               if (pending) {
-                       hlist_del(&forw_packet->list);
-                       batadv_forw_packet_free(forw_packet);
-               }
-       }
+       batadv_forw_packet_list_steal(&bat_priv->forw_bat_list, &head,
+                                     hard_iface);
        spin_unlock_bh(&bat_priv->forw_bat_list_lock);
+
+       /* then cancel or wait for packet workers to finish and free */
+       batadv_forw_packet_list_free(&head);
 }
index 999f78683d9e93724d05c33c40a74ace8dcbab6e..a94e1e8639ca2da6ca85a3e7a5ea37af12815960 100644 (file)
 #include "main.h"
 
 #include <linux/compiler.h>
+#include <linux/spinlock.h>
 #include <linux/types.h>
 
 #include "packet.h"
 
 struct sk_buff;
 
-void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet);
+void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet,
+                            bool dropped);
 struct batadv_forw_packet *
 batadv_forw_packet_alloc(struct batadv_hard_iface *if_incoming,
                         struct batadv_hard_iface *if_outgoing,
                         atomic_t *queue_left,
                         struct batadv_priv *bat_priv);
+bool batadv_forw_packet_steal(struct batadv_forw_packet *packet, spinlock_t *l);
+void batadv_forw_packet_ogmv1_queue(struct batadv_priv *bat_priv,
+                                   struct batadv_forw_packet *forw_packet,
+                                   unsigned long send_time);
 
 int batadv_send_skb_to_orig(struct sk_buff *skb,
                            struct batadv_orig_node *orig_node,
@@ -46,7 +52,8 @@ int batadv_send_unicast_skb(struct sk_buff *skb,
                            struct batadv_neigh_node *neigh_node);
 int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv,
                                    const struct sk_buff *skb,
-                                   unsigned long delay);
+                                   unsigned long delay,
+                                   bool own_packet);
 void
 batadv_purge_outstanding_packets(struct batadv_priv *bat_priv,
                                 const struct batadv_hard_iface *hard_iface);
index 49e16b6e0ba3391f36aa60fdadf756df7058f0ba..7b3494ae6ad93fd0d32391e5c88f5d636f43acd5 100644 (file)
@@ -22,6 +22,7 @@
 #include <linux/byteorder/generic.h>
 #include <linux/cache.h>
 #include <linux/compiler.h>
+#include <linux/cpumask.h>
 #include <linux/errno.h>
 #include <linux/etherdevice.h>
 #include <linux/ethtool.h>
@@ -116,6 +117,26 @@ static int batadv_interface_release(struct net_device *dev)
        return 0;
 }
 
+/**
+ * batadv_sum_counter - Sum the cpu-local counters for index 'idx'
+ * @bat_priv: the bat priv with all the soft interface information
+ * @idx: index of counter to sum up
+ *
+ * Return: sum of all cpu-local counters
+ */
+static u64 batadv_sum_counter(struct batadv_priv *bat_priv,  size_t idx)
+{
+       u64 *counters, sum = 0;
+       int cpu;
+
+       for_each_possible_cpu(cpu) {
+               counters = per_cpu_ptr(bat_priv->bat_counters, cpu);
+               sum += counters[idx];
+       }
+
+       return sum;
+}
+
 static struct net_device_stats *batadv_interface_stats(struct net_device *dev)
 {
        struct batadv_priv *bat_priv = netdev_priv(dev);
@@ -336,12 +357,12 @@ send:
                seqno = atomic_inc_return(&bat_priv->bcast_seqno);
                bcast_packet->seqno = htonl(seqno);
 
-               batadv_add_bcast_packet_to_list(bat_priv, skb, brd_delay);
+               batadv_add_bcast_packet_to_list(bat_priv, skb, brd_delay, true);
 
                /* a copy is stored in the bcast list, therefore removing
                 * the original skb.
                 */
-               kfree_skb(skb);
+               consume_skb(skb);
 
        /* unicast packet */
        } else {
@@ -365,7 +386,7 @@ send:
                        ret = batadv_send_skb_via_tt(bat_priv, skb, dst_hint,
                                                     vid);
                }
-               if (ret == NET_XMIT_DROP)
+               if (ret != NET_XMIT_SUCCESS)
                        goto dropped_freed;
        }
 
index 02d96f224c602cb4568e3357199227ef668f9a23..17c844196eb26c9faf9fd543b88cd86cc1c2c029 100644 (file)
@@ -33,7 +33,6 @@
 #include <linux/rcupdate.h>
 #include <linux/rtnetlink.h>
 #include <linux/slab.h>
-#include <linux/stat.h>
 #include <linux/stddef.h>
 #include <linux/string.h>
 #include <linux/stringify.h>
@@ -666,41 +665,36 @@ static ssize_t batadv_store_isolation_mark(struct kobject *kobj,
        return count;
 }
 
-BATADV_ATTR_SIF_BOOL(aggregated_ogms, S_IRUGO | S_IWUSR, NULL);
-BATADV_ATTR_SIF_BOOL(bonding, S_IRUGO | S_IWUSR, NULL);
+BATADV_ATTR_SIF_BOOL(aggregated_ogms, 0644, NULL);
+BATADV_ATTR_SIF_BOOL(bonding, 0644, NULL);
 #ifdef CONFIG_BATMAN_ADV_BLA
-BATADV_ATTR_SIF_BOOL(bridge_loop_avoidance, S_IRUGO | S_IWUSR,
-                    batadv_bla_status_update);
+BATADV_ATTR_SIF_BOOL(bridge_loop_avoidance, 0644, batadv_bla_status_update);
 #endif
 #ifdef CONFIG_BATMAN_ADV_DAT
-BATADV_ATTR_SIF_BOOL(distributed_arp_table, S_IRUGO | S_IWUSR,
-                    batadv_dat_status_update);
+BATADV_ATTR_SIF_BOOL(distributed_arp_table, 0644, batadv_dat_status_update);
 #endif
-BATADV_ATTR_SIF_BOOL(fragmentation, S_IRUGO | S_IWUSR, batadv_update_min_mtu);
-static BATADV_ATTR(routing_algo, S_IRUGO, batadv_show_bat_algo, NULL);
-static BATADV_ATTR(gw_mode, S_IRUGO | S_IWUSR, batadv_show_gw_mode,
-                  batadv_store_gw_mode);
-BATADV_ATTR_SIF_UINT(orig_interval, orig_interval, S_IRUGO | S_IWUSR,
-                    2 * BATADV_JITTER, INT_MAX, NULL);
-BATADV_ATTR_SIF_UINT(hop_penalty, hop_penalty, S_IRUGO | S_IWUSR, 0,
-                    BATADV_TQ_MAX_VALUE, NULL);
-static BATADV_ATTR(gw_sel_class, S_IRUGO | S_IWUSR, batadv_show_gw_sel_class,
+BATADV_ATTR_SIF_BOOL(fragmentation, 0644, batadv_update_min_mtu);
+static BATADV_ATTR(routing_algo, 0444, batadv_show_bat_algo, NULL);
+static BATADV_ATTR(gw_mode, 0644, batadv_show_gw_mode, batadv_store_gw_mode);
+BATADV_ATTR_SIF_UINT(orig_interval, orig_interval, 0644, 2 * BATADV_JITTER,
+                    INT_MAX, NULL);
+BATADV_ATTR_SIF_UINT(hop_penalty, hop_penalty, 0644, 0, BATADV_TQ_MAX_VALUE,
+                    NULL);
+static BATADV_ATTR(gw_sel_class, 0644, batadv_show_gw_sel_class,
                   batadv_store_gw_sel_class);
-static BATADV_ATTR(gw_bandwidth, S_IRUGO | S_IWUSR, batadv_show_gw_bwidth,
+static BATADV_ATTR(gw_bandwidth, 0644, batadv_show_gw_bwidth,
                   batadv_store_gw_bwidth);
 #ifdef CONFIG_BATMAN_ADV_MCAST
-BATADV_ATTR_SIF_BOOL(multicast_mode, S_IRUGO | S_IWUSR, NULL);
+BATADV_ATTR_SIF_BOOL(multicast_mode, 0644, NULL);
 #endif
 #ifdef CONFIG_BATMAN_ADV_DEBUG
-BATADV_ATTR_SIF_UINT(log_level, log_level, S_IRUGO | S_IWUSR, 0,
-                    BATADV_DBG_ALL, NULL);
+BATADV_ATTR_SIF_UINT(log_level, log_level, 0644, 0, BATADV_DBG_ALL, NULL);
 #endif
 #ifdef CONFIG_BATMAN_ADV_NC
-BATADV_ATTR_SIF_BOOL(network_coding, S_IRUGO | S_IWUSR,
-                    batadv_nc_status_update);
+BATADV_ATTR_SIF_BOOL(network_coding, 0644, batadv_nc_status_update);
 #endif
-static BATADV_ATTR(isolation_mark, S_IRUGO | S_IWUSR,
-                  batadv_show_isolation_mark, batadv_store_isolation_mark);
+static BATADV_ATTR(isolation_mark, 0644, batadv_show_isolation_mark,
+                  batadv_store_isolation_mark);
 
 static struct batadv_attribute *batadv_mesh_attrs[] = {
        &batadv_attr_aggregated_ogms,
@@ -731,7 +725,7 @@ static struct batadv_attribute *batadv_mesh_attrs[] = {
        NULL,
 };
 
-BATADV_ATTR_VLAN_BOOL(ap_isolation, S_IRUGO | S_IWUSR, NULL);
+BATADV_ATTR_VLAN_BOOL(ap_isolation, 0644, NULL);
 
 /* array of vlan specific sysfs attributes */
 static struct batadv_attribute *batadv_vlan_attrs[] = {
@@ -1116,14 +1110,13 @@ static ssize_t batadv_show_throughput_override(struct kobject *kobj,
 
 #endif
 
-static BATADV_ATTR(mesh_iface, S_IRUGO | S_IWUSR, batadv_show_mesh_iface,
+static BATADV_ATTR(mesh_iface, 0644, batadv_show_mesh_iface,
                   batadv_store_mesh_iface);
-static BATADV_ATTR(iface_status, S_IRUGO, batadv_show_iface_status, NULL);
+static BATADV_ATTR(iface_status, 0444, batadv_show_iface_status, NULL);
 #ifdef CONFIG_BATMAN_ADV_BATMAN_V
-BATADV_ATTR_HIF_UINT(elp_interval, bat_v.elp_interval, S_IRUGO | S_IWUSR,
+BATADV_ATTR_HIF_UINT(elp_interval, bat_v.elp_interval, 0644,
                     2 * BATADV_JITTER, INT_MAX, NULL);
-static BATADV_ATTR(throughput_override, S_IRUGO | S_IWUSR,
-                  batadv_show_throughput_override,
+static BATADV_ATTR(throughput_override, 0644, batadv_show_throughput_override,
                   batadv_store_throughput_override);
 #endif
 
index 2333777f919d8ef3e28055733e0d55b64d3ecff3..981e8c5b07e9398c68df711d1d7b54e6e9333ead 100644 (file)
@@ -615,9 +615,6 @@ static int batadv_tp_send_msg(struct batadv_tp_vars *tp_vars, const u8 *src,
        batadv_tp_fill_prerandom(tp_vars, data, data_len);
 
        r = batadv_send_skb_to_orig(skb, orig_node, NULL);
-       if (r == -1)
-               kfree_skb(skb);
-
        if (r == NET_XMIT_SUCCESS)
                return 0;
 
@@ -837,6 +834,7 @@ static int batadv_tp_send(void *arg)
        primary_if = batadv_primary_if_get_selected(bat_priv);
        if (unlikely(!primary_if)) {
                err = BATADV_TP_REASON_DST_UNREACHABLE;
+               tp_vars->reason = err;
                goto out;
        }
 
@@ -1206,9 +1204,6 @@ static int batadv_tp_send_ack(struct batadv_priv *bat_priv, const u8 *dst,
 
        /* send the ack */
        r = batadv_send_skb_to_orig(skb, orig_node, NULL);
-       if (r == -1)
-               kfree_skb(skb);
-
        if (unlikely(r < 0) || (r == NET_XMIT_DROP)) {
                ret = BATADV_TP_REASON_DST_UNREACHABLE;
                goto out;
index 7f663092f6de49831680600b2a83aedc15904bac..447f9490b6920b605509e7c90a0e094dbf608d98 100644 (file)
@@ -56,7 +56,6 @@
 #include "hard-interface.h"
 #include "hash.h"
 #include "log.h"
-#include "multicast.h"
 #include "netlink.h"
 #include "originator.h"
 #include "packet.h"
@@ -647,6 +646,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
        struct net *net = dev_net(soft_iface);
        struct batadv_softif_vlan *vlan;
        struct net_device *in_dev = NULL;
+       struct batadv_hard_iface *in_hardif = NULL;
        struct hlist_head *head;
        struct batadv_tt_orig_list_entry *orig_entry;
        int hash_added, table_size, packet_size_max;
@@ -658,6 +658,9 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
        if (ifindex != BATADV_NULL_IFINDEX)
                in_dev = dev_get_by_index(net, ifindex);
 
+       if (in_dev)
+               in_hardif = batadv_hardif_get_by_netdev(in_dev);
+
        tt_local = batadv_tt_local_hash_find(bat_priv, addr, vid);
 
        if (!is_multicast_ether_addr(addr))
@@ -731,7 +734,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
         */
        tt_local->common.flags = BATADV_TT_CLIENT_NEW;
        tt_local->common.vid = vid;
-       if (batadv_is_wifi_netdev(in_dev))
+       if (batadv_is_wifi_hardif(in_hardif))
                tt_local->common.flags |= BATADV_TT_CLIENT_WIFI;
        kref_init(&tt_local->common.refcount);
        tt_local->last_seen = jiffies;
@@ -791,7 +794,7 @@ check_roaming:
         */
        remote_flags = tt_local->common.flags & BATADV_TT_REMOTE_MASK;
 
-       if (batadv_is_wifi_netdev(in_dev))
+       if (batadv_is_wifi_hardif(in_hardif))
                tt_local->common.flags |= BATADV_TT_CLIENT_WIFI;
        else
                tt_local->common.flags &= ~BATADV_TT_CLIENT_WIFI;
@@ -815,6 +818,8 @@ check_roaming:
 
        ret = true;
 out:
+       if (in_hardif)
+               batadv_hardif_put(in_hardif);
        if (in_dev)
                dev_put(in_dev);
        if (tt_local)
@@ -3795,9 +3800,6 @@ static void batadv_tt_local_commit_changes_nolock(struct batadv_priv *bat_priv)
 {
        lockdep_assert_held(&bat_priv->tt.commit_lock);
 
-       /* Update multicast addresses in local translation table */
-       batadv_mcast_mla_update(bat_priv);
-
        if (atomic_read(&bat_priv->tt.local_changes) < 1) {
                if (!batadv_atomic_dec_not_zero(&bat_priv->tt.ogm_append_cnt))
                        batadv_tt_tvlv_container_update(bat_priv);
@@ -3835,8 +3837,8 @@ void batadv_tt_local_commit_changes(struct batadv_priv *bat_priv)
 bool batadv_is_ap_isolated(struct batadv_priv *bat_priv, u8 *src, u8 *dst,
                           unsigned short vid)
 {
-       struct batadv_tt_local_entry *tt_local_entry = NULL;
-       struct batadv_tt_global_entry *tt_global_entry = NULL;
+       struct batadv_tt_local_entry *tt_local_entry;
+       struct batadv_tt_global_entry *tt_global_entry;
        struct batadv_softif_vlan *vlan;
        bool ret = false;
 
@@ -3845,27 +3847,24 @@ bool batadv_is_ap_isolated(struct batadv_priv *bat_priv, u8 *src, u8 *dst,
                return false;
 
        if (!atomic_read(&vlan->ap_isolation))
-               goto out;
+               goto vlan_put;
 
        tt_local_entry = batadv_tt_local_hash_find(bat_priv, dst, vid);
        if (!tt_local_entry)
-               goto out;
+               goto vlan_put;
 
        tt_global_entry = batadv_tt_global_hash_find(bat_priv, src, vid);
        if (!tt_global_entry)
-               goto out;
+               goto local_entry_put;
 
-       if (!_batadv_is_ap_isolated(tt_local_entry, tt_global_entry))
-               goto out;
-
-       ret = true;
+       if (_batadv_is_ap_isolated(tt_local_entry, tt_global_entry))
+               ret = true;
 
-out:
+       batadv_tt_global_entry_put(tt_global_entry);
+local_entry_put:
+       batadv_tt_local_entry_put(tt_local_entry);
+vlan_put:
        batadv_softif_vlan_put(vlan);
-       if (tt_global_entry)
-               batadv_tt_global_entry_put(tt_global_entry);
-       if (tt_local_entry)
-               batadv_tt_local_entry_put(tt_local_entry);
        return ret;
 }
 
index 77654f055f24b088a77c9e23e420171619cf88c3..a783420356ae0cd4a6273b3b7a04781242e37a82 100644 (file)
@@ -600,7 +600,6 @@ void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, u8 *src,
        unsigned char *tvlv_buff;
        unsigned int tvlv_len;
        ssize_t hdr_len = sizeof(*unicast_tvlv_packet);
-       int res;
 
        orig_node = batadv_orig_hash_find(bat_priv, dst);
        if (!orig_node)
@@ -633,9 +632,7 @@ void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, u8 *src,
        tvlv_buff += sizeof(*tvlv_hdr);
        memcpy(tvlv_buff, tvlv_value, tvlv_value_len);
 
-       res = batadv_send_skb_to_orig(skb, orig_node, NULL);
-       if (res == -1)
-               kfree_skb(skb);
+       batadv_send_skb_to_orig(skb, orig_node, NULL);
 out:
        batadv_orig_node_put(orig_node);
 }
index b3dd1a381aad9b2bc970515a84648721be624403..e913aee28c98bf77cdd7fe92496fa4b188ff9604 100644 (file)
@@ -118,13 +118,29 @@ struct batadv_hard_iface_bat_v {
        u8 flags;
 };
 
+/**
+ * enum batadv_hard_iface_wifi_flags - Flags describing the wifi configuration
+ *  of a batadv_hard_iface
+ * @BATADV_HARDIF_WIFI_WEXT_DIRECT: it is a wext wifi device
+ * @BATADV_HARDIF_WIFI_CFG80211_DIRECT: it is a cfg80211 wifi device
+ * @BATADV_HARDIF_WIFI_WEXT_INDIRECT: link device is a wext wifi device
+ * @BATADV_HARDIF_WIFI_CFG80211_INDIRECT: link device is a cfg80211 wifi device
+ */
+enum batadv_hard_iface_wifi_flags {
+       BATADV_HARDIF_WIFI_WEXT_DIRECT = BIT(0),
+       BATADV_HARDIF_WIFI_CFG80211_DIRECT = BIT(1),
+       BATADV_HARDIF_WIFI_WEXT_INDIRECT = BIT(2),
+       BATADV_HARDIF_WIFI_CFG80211_INDIRECT = BIT(3),
+};
+
 /**
  * struct batadv_hard_iface - network device known to batman-adv
  * @list: list node for batadv_hardif_list
  * @if_num: identificator of the interface
  * @if_status: status of the interface for batman-adv
- * @net_dev: pointer to the net_device
  * @num_bcasts: number of payload re-broadcasts on this interface (ARQ)
+ * @wifi_flags: flags whether this is (directly or indirectly) a wifi interface
+ * @net_dev: pointer to the net_device
  * @hardif_obj: kobject of the per interface sysfs "mesh" directory
  * @refcount: number of contexts the object is used
  * @batman_adv_ptype: packet type describing packets that should be processed by
@@ -141,8 +157,9 @@ struct batadv_hard_iface {
        struct list_head list;
        s16 if_num;
        char if_status;
-       struct net_device *net_dev;
        u8 num_bcasts;
+       u32 wifi_flags;
+       struct net_device *net_dev;
        struct kobject *hardif_obj;
        struct kref refcount;
        struct packet_type batman_adv_ptype;
@@ -184,7 +201,7 @@ struct batadv_orig_ifinfo {
 
 /**
  * struct batadv_frag_table_entry - head in the fragment buffer table
- * @head: head of list with fragments
+ * @fragment_list: head of list with fragments
  * @lock: lock to protect the list of fragments
  * @timestamp: time (jiffie) of last received fragment
  * @seqno: sequence number of the fragments in the list
@@ -192,8 +209,8 @@ struct batadv_orig_ifinfo {
  * @total_size: expected size of the assembled packet
  */
 struct batadv_frag_table_entry {
-       struct hlist_head head;
-       spinlock_t lock; /* protects head */
+       struct hlist_head fragment_list;
+       spinlock_t lock; /* protects fragment_list */
        unsigned long timestamp;
        u16 seqno;
        u16 size;
@@ -408,6 +425,7 @@ struct batadv_hardif_neigh_node_bat_v {
  * struct batadv_hardif_neigh_node - unique neighbor per hard-interface
  * @list: list node for batadv_hard_iface::neigh_list
  * @addr: the MAC address of the neighboring interface
+ * @orig: the address of the originator this neighbor node belongs to
  * @if_incoming: pointer to incoming hard-interface
  * @last_seen: when last packet via this neighbor was received
  * @bat_v: B.A.T.M.A.N. V private data
@@ -417,6 +435,7 @@ struct batadv_hardif_neigh_node_bat_v {
 struct batadv_hardif_neigh_node {
        struct hlist_node list;
        u8 addr[ETH_ALEN];
+       u8 orig[ETH_ALEN];
        struct batadv_hard_iface *if_incoming;
        unsigned long last_seen;
 #ifdef CONFIG_BATMAN_ADV_BATMAN_V
@@ -706,8 +725,8 @@ struct batadv_priv_debug_log {
 
 /**
  * struct batadv_priv_gw - per mesh interface gateway data
- * @list: list of available gateway nodes
- * @list_lock: lock protecting gw_list & curr_gw
+ * @gateway_list: list of available gateway nodes
+ * @list_lock: lock protecting gateway_list & curr_gw
  * @curr_gw: pointer to currently selected gateway node
  * @mode: gateway operation: off, client or server (see batadv_gw_modes)
  * @sel_class: gateway selection class (applies if gw_mode client)
@@ -716,8 +735,8 @@ struct batadv_priv_debug_log {
  * @reselect: bool indicating a gateway re-selection is in progress
  */
 struct batadv_priv_gw {
-       struct hlist_head list;
-       spinlock_t list_lock; /* protects gw_list & curr_gw */
+       struct hlist_head gateway_list;
+       spinlock_t list_lock; /* protects gateway_list & curr_gw */
        struct batadv_gw_node __rcu *curr_gw;  /* rcu protected pointer */
        atomic_t mode;
        atomic_t sel_class;
@@ -785,9 +804,10 @@ struct batadv_mcast_querier_state {
  * @num_want_all_ipv6: counter for items in want_all_ipv6_list
  * @want_lists_lock: lock for protecting modifications to mcast want lists
  *  (traversals are rcu-locked)
+ * @work: work queue callback item for multicast TT and TVLV updates
  */
 struct batadv_priv_mcast {
-       struct hlist_head mla_list;
+       struct hlist_head mla_list; /* see __batadv_mcast_mla_update() */
        struct hlist_head want_all_unsnoopables_list;
        struct hlist_head want_all_ipv4_list;
        struct hlist_head want_all_ipv6_list;
@@ -802,6 +822,7 @@ struct batadv_priv_mcast {
        atomic_t num_want_all_ipv6;
        /* protects want_all_{unsnoopables,ipv4,ipv6}_list */
        spinlock_t want_lists_lock;
+       struct delayed_work work;
 };
 #endif
 
@@ -1363,7 +1384,8 @@ struct batadv_skb_cb {
 
 /**
  * struct batadv_forw_packet - structure for bcast packets to be sent/forwarded
- * @list: list node for batadv_socket_client::queue_list
+ * @list: list node for batadv_priv::forw_{bat,bcast}_list
+ * @cleanup_list: list node for purging functions
  * @send_time: execution time for delayed_work (packet sending)
  * @own: bool for locally generated packets (local OGMs are re-scheduled after
  *  sending)
@@ -1380,6 +1402,7 @@ struct batadv_skb_cb {
  */
 struct batadv_forw_packet {
        struct hlist_node list;
+       struct hlist_node cleanup_list;
        unsigned long send_time;
        u8 own;
        struct sk_buff *skb;
index d020299baba41253022ca277711801bc9732828c..1904a93f47d50a2bd1c2c1651f48b1ab762ece3c 100644 (file)
@@ -1090,7 +1090,6 @@ static int get_l2cap_conn(char *buf, bdaddr_t *addr, u8 *addr_type,
 {
        struct hci_conn *hcon;
        struct hci_dev *hdev;
-       bdaddr_t *src = BDADDR_ANY;
        int n;
 
        n = sscanf(buf, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx %hhu",
@@ -1101,7 +1100,8 @@ static int get_l2cap_conn(char *buf, bdaddr_t *addr, u8 *addr_type,
        if (n < 7)
                return -EINVAL;
 
-       hdev = hci_get_route(addr, src);
+       /* The LE_PUBLIC address type is ignored because of BDADDR_ANY */
+       hdev = hci_get_route(addr, BDADDR_ANY, BDADDR_LE_PUBLIC);
        if (!hdev)
                return -ENOENT;
 
index 3809617aa98d83c428fc56606e0aa05562272c40..dc59eae5471788e42091ba7fc907585d36e99c2d 100644 (file)
@@ -613,7 +613,7 @@ int hci_conn_del(struct hci_conn *conn)
        return 0;
 }
 
-struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src)
+struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src, uint8_t src_type)
 {
        int use_src = bacmp(src, BDADDR_ANY);
        struct hci_dev *hdev = NULL, *d;
@@ -634,7 +634,29 @@ struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src)
                 */
 
                if (use_src) {
-                       if (!bacmp(&d->bdaddr, src)) {
+                       bdaddr_t id_addr;
+                       u8 id_addr_type;
+
+                       if (src_type == BDADDR_BREDR) {
+                               if (!lmp_bredr_capable(d))
+                                       continue;
+                               bacpy(&id_addr, &d->bdaddr);
+                               id_addr_type = BDADDR_BREDR;
+                       } else {
+                               if (!lmp_le_capable(d))
+                                       continue;
+
+                               hci_copy_identity_address(d, &id_addr,
+                                                         &id_addr_type);
+
+                               /* Convert from HCI to three-value type */
+                               if (id_addr_type == ADDR_LE_DEV_PUBLIC)
+                                       id_addr_type = BDADDR_LE_PUBLIC;
+                               else
+                                       id_addr_type = BDADDR_LE_RANDOM;
+                       }
+
+                       if (!bacmp(&id_addr, src) && id_addr_type == src_type) {
                                hdev = d; break;
                        }
                } else {
index e2288421fe6b79775d1bb3ddde69341782298cc7..1015d9c8d97ddbe978ae7b54698b093f1961b958 100644 (file)
@@ -969,41 +969,38 @@ void __hci_req_enable_advertising(struct hci_request *req)
        hci_req_add(req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable);
 }
 
-static u8 append_local_name(struct hci_dev *hdev, u8 *ptr, u8 ad_len)
+u8 append_local_name(struct hci_dev *hdev, u8 *ptr, u8 ad_len)
 {
-       size_t complete_len;
        size_t short_len;
-       int max_len;
-
-       max_len = HCI_MAX_AD_LENGTH - ad_len - 2;
-       complete_len = strlen(hdev->dev_name);
-       short_len = strlen(hdev->short_name);
-
-       /* no space left for name */
-       if (max_len < 1)
-               return ad_len;
+       size_t complete_len;
 
-       /* no name set */
-       if (!complete_len)
+       /* no space left for name (+ NULL + type + len) */
+       if ((HCI_MAX_AD_LENGTH - ad_len) < HCI_MAX_SHORT_NAME_LENGTH + 3)
                return ad_len;
 
-       /* complete name fits and is eq to max short name len or smaller */
-       if (complete_len <= max_len &&
-           complete_len <= HCI_MAX_SHORT_NAME_LENGTH) {
+       /* use complete name if present and fits */
+       complete_len = strlen(hdev->dev_name);
+       if (complete_len && complete_len <= HCI_MAX_SHORT_NAME_LENGTH)
                return eir_append_data(ptr, ad_len, EIR_NAME_COMPLETE,
-                                      hdev->dev_name, complete_len);
-       }
+                                      hdev->dev_name, complete_len + 1);
 
-       /* short name set and fits */
-       if (short_len && short_len <= max_len) {
+       /* use short name if present */
+       short_len = strlen(hdev->short_name);
+       if (short_len)
                return eir_append_data(ptr, ad_len, EIR_NAME_SHORT,
-                                      hdev->short_name, short_len);
-       }
+                                      hdev->short_name, short_len + 1);
 
-       /* no short name set so shorten complete name */
-       if (!short_len) {
-               return eir_append_data(ptr, ad_len, EIR_NAME_SHORT,
-                                      hdev->dev_name, max_len);
+       /* use shortened full name if present, we already know that name
+        * is longer then HCI_MAX_SHORT_NAME_LENGTH
+        */
+       if (complete_len) {
+               u8 name[HCI_MAX_SHORT_NAME_LENGTH + 1];
+
+               memcpy(name, hdev->dev_name, HCI_MAX_SHORT_NAME_LENGTH);
+               name[HCI_MAX_SHORT_NAME_LENGTH] = '\0';
+
+               return eir_append_data(ptr, ad_len, EIR_NAME_SHORT, name,
+                                      sizeof(name));
        }
 
        return ad_len;
index 6b06629245a8c0358a5f1c6bf8964d66f291d0a6..dde77bd59f915a48c4ea64c0444d61ac50151f67 100644 (file)
@@ -106,6 +106,8 @@ static inline void hci_update_background_scan(struct hci_dev *hdev)
 void hci_request_setup(struct hci_dev *hdev);
 void hci_request_cancel_all(struct hci_dev *hdev);
 
+u8 append_local_name(struct hci_dev *hdev, u8 *ptr, u8 ad_len);
+
 static inline u16 eir_append_data(u8 *eir, u16 eir_len, u8 type,
                                  u8 *data, u8 data_len)
 {
index d4cad29b033fc6d8601913013f291ab287648928..577f1c01454a566cc63431ff8bb76f08785431ca 100644 (file)
@@ -7060,7 +7060,7 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid,
        BT_DBG("%pMR -> %pMR (type %u) psm 0x%2.2x", &chan->src, dst,
               dst_type, __le16_to_cpu(psm));
 
-       hdev = hci_get_route(dst, &chan->src);
+       hdev = hci_get_route(dst, &chan->src, chan->src_type);
        if (!hdev)
                return -EHOSTUNREACH;
 
index 736038085feb403f6b93cb2473cb5cb747b6d842..1fba2a03f8ae8a25c95737ed963a894321ce8288 100644 (file)
@@ -6017,7 +6017,15 @@ static int read_adv_features(struct sock *sk, struct hci_dev *hdev,
        return err;
 }
 
-static u8 tlv_data_max_len(u32 adv_flags, bool is_adv_data)
+static u8 calculate_name_len(struct hci_dev *hdev)
+{
+       u8 buf[HCI_MAX_SHORT_NAME_LENGTH + 3];
+
+       return append_local_name(hdev, buf, 0);
+}
+
+static u8 tlv_data_max_len(struct hci_dev *hdev, u32 adv_flags,
+                          bool is_adv_data)
 {
        u8 max_len = HCI_MAX_AD_LENGTH;
 
@@ -6030,9 +6038,8 @@ static u8 tlv_data_max_len(u32 adv_flags, bool is_adv_data)
                if (adv_flags & MGMT_ADV_FLAG_TX_POWER)
                        max_len -= 3;
        } else {
-               /* at least 1 byte of name should fit in */
                if (adv_flags & MGMT_ADV_FLAG_LOCAL_NAME)
-                       max_len -= 3;
+                       max_len -= calculate_name_len(hdev);
 
                if (adv_flags & (MGMT_ADV_FLAG_APPEARANCE))
                        max_len -= 4;
@@ -6063,12 +6070,13 @@ static bool appearance_managed(u32 adv_flags)
        return adv_flags & MGMT_ADV_FLAG_APPEARANCE;
 }
 
-static bool tlv_data_is_valid(u32 adv_flags, u8 *data, u8 len, bool is_adv_data)
+static bool tlv_data_is_valid(struct hci_dev *hdev, u32 adv_flags, u8 *data,
+                             u8 len, bool is_adv_data)
 {
        int i, cur_len;
        u8 max_len;
 
-       max_len = tlv_data_max_len(adv_flags, is_adv_data);
+       max_len = tlv_data_max_len(hdev, adv_flags, is_adv_data);
 
        if (len > max_len)
                return false;
@@ -6215,8 +6223,8 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev,
                goto unlock;
        }
 
-       if (!tlv_data_is_valid(flags, cp->data, cp->adv_data_len, true) ||
-           !tlv_data_is_valid(flags, cp->data + cp->adv_data_len,
+       if (!tlv_data_is_valid(hdev, flags, cp->data, cp->adv_data_len, true) ||
+           !tlv_data_is_valid(hdev, flags, cp->data + cp->adv_data_len,
                               cp->scan_rsp_len, false)) {
                err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING,
                                      MGMT_STATUS_INVALID_PARAMS);
@@ -6429,8 +6437,8 @@ static int get_adv_size_info(struct sock *sk, struct hci_dev *hdev,
 
        rp.instance = cp->instance;
        rp.flags = cp->flags;
-       rp.max_adv_data_len = tlv_data_max_len(flags, true);
-       rp.max_scan_rsp_len = tlv_data_max_len(flags, false);
+       rp.max_adv_data_len = tlv_data_max_len(hdev, flags, true);
+       rp.max_scan_rsp_len = tlv_data_max_len(hdev, flags, false);
 
        err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_GET_ADV_SIZE_INFO,
                                MGMT_STATUS_SUCCESS, &rp, sizeof(rp));
index 8e385a0ae60e0bd6e4b1ed7615905a7170c4a8bc..2f2cb5e27cdd4dc9b6079b5d602c89e510bd9ab8 100644 (file)
@@ -178,7 +178,7 @@ static void rfcomm_reparent_device(struct rfcomm_dev *dev)
        struct hci_dev *hdev;
        struct hci_conn *conn;
 
-       hdev = hci_get_route(&dev->dst, &dev->src);
+       hdev = hci_get_route(&dev->dst, &dev->src, BDADDR_BREDR);
        if (!hdev)
                return;
 
index f52bcbf2e58cd8b8ded4c3d65c8dd9b87034b002..3125ce670c2f241f446daae17a37fbcc5f485574 100644 (file)
@@ -219,7 +219,7 @@ static int sco_connect(struct sock *sk)
 
        BT_DBG("%pMR -> %pMR", &sco_pi(sk)->src, &sco_pi(sk)->dst);
 
-       hdev = hci_get_route(&sco_pi(sk)->dst, &sco_pi(sk)->src);
+       hdev = hci_get_route(&sco_pi(sk)->dst, &sco_pi(sk)->src, BDADDR_BREDR);
        if (!hdev)
                return -EHOSTUNREACH;
 
index c5fea9393946f64af336873645db82d09599d442..b30e77e8427c54bffc6801e2f1139139807c439f 100644 (file)
@@ -25,6 +25,7 @@
 #include <linux/slab.h>
 #include <linux/timer.h>
 #include <linux/inetdevice.h>
+#include <linux/mroute.h>
 #include <net/ip.h>
 #if IS_ENABLED(CONFIG_IPV6)
 #include <net/ipv6.h>
@@ -364,13 +365,18 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br,
                                                    __be32 group,
                                                    u8 *igmp_type)
 {
+       struct igmpv3_query *ihv3;
+       size_t igmp_hdr_size;
        struct sk_buff *skb;
        struct igmphdr *ih;
        struct ethhdr *eth;
        struct iphdr *iph;
 
+       igmp_hdr_size = sizeof(*ih);
+       if (br->multicast_igmp_version == 3)
+               igmp_hdr_size = sizeof(*ihv3);
        skb = netdev_alloc_skb_ip_align(br->dev, sizeof(*eth) + sizeof(*iph) +
-                                                sizeof(*ih) + 4);
+                                                igmp_hdr_size + 4);
        if (!skb)
                goto out;
 
@@ -395,7 +401,7 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br,
        iph->version = 4;
        iph->ihl = 6;
        iph->tos = 0xc0;
-       iph->tot_len = htons(sizeof(*iph) + sizeof(*ih) + 4);
+       iph->tot_len = htons(sizeof(*iph) + igmp_hdr_size + 4);
        iph->id = 0;
        iph->frag_off = htons(IP_DF);
        iph->ttl = 1;
@@ -411,17 +417,37 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br,
        skb_put(skb, 24);
 
        skb_set_transport_header(skb, skb->len);
-       ih = igmp_hdr(skb);
        *igmp_type = IGMP_HOST_MEMBERSHIP_QUERY;
-       ih->type = IGMP_HOST_MEMBERSHIP_QUERY;
-       ih->code = (group ? br->multicast_last_member_interval :
-                           br->multicast_query_response_interval) /
-                  (HZ / IGMP_TIMER_SCALE);
-       ih->group = group;
-       ih->csum = 0;
-       ih->csum = ip_compute_csum((void *)ih, sizeof(struct igmphdr));
-       skb_put(skb, sizeof(*ih));
 
+       switch (br->multicast_igmp_version) {
+       case 2:
+               ih = igmp_hdr(skb);
+               ih->type = IGMP_HOST_MEMBERSHIP_QUERY;
+               ih->code = (group ? br->multicast_last_member_interval :
+                                   br->multicast_query_response_interval) /
+                          (HZ / IGMP_TIMER_SCALE);
+               ih->group = group;
+               ih->csum = 0;
+               ih->csum = ip_compute_csum((void *)ih, sizeof(*ih));
+               break;
+       case 3:
+               ihv3 = igmpv3_query_hdr(skb);
+               ihv3->type = IGMP_HOST_MEMBERSHIP_QUERY;
+               ihv3->code = (group ? br->multicast_last_member_interval :
+                                     br->multicast_query_response_interval) /
+                            (HZ / IGMP_TIMER_SCALE);
+               ihv3->group = group;
+               ihv3->qqic = br->multicast_query_interval / HZ;
+               ihv3->nsrcs = 0;
+               ihv3->resv = 0;
+               ihv3->suppress = 0;
+               ihv3->qrv = 2;
+               ihv3->csum = 0;
+               ihv3->csum = ip_compute_csum((void *)ihv3, sizeof(*ihv3));
+               break;
+       }
+
+       skb_put(skb, igmp_hdr_size);
        __skb_pull(skb, sizeof(*eth));
 
 out:
@@ -433,15 +459,20 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
                                                    const struct in6_addr *grp,
                                                    u8 *igmp_type)
 {
-       struct sk_buff *skb;
+       struct mld2_query *mld2q;
+       unsigned long interval;
        struct ipv6hdr *ip6h;
        struct mld_msg *mldq;
+       size_t mld_hdr_size;
+       struct sk_buff *skb;
        struct ethhdr *eth;
        u8 *hopopt;
-       unsigned long interval;
 
+       mld_hdr_size = sizeof(*mldq);
+       if (br->multicast_mld_version == 2)
+               mld_hdr_size = sizeof(*mld2q);
        skb = netdev_alloc_skb_ip_align(br->dev, sizeof(*eth) + sizeof(*ip6h) +
-                                                8 + sizeof(*mldq));
+                                                8 + mld_hdr_size);
        if (!skb)
                goto out;
 
@@ -460,7 +491,7 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
        ip6h = ipv6_hdr(skb);
 
        *(__force __be32 *)ip6h = htonl(0x60000000);
-       ip6h->payload_len = htons(8 + sizeof(*mldq));
+       ip6h->payload_len = htons(8 + mld_hdr_size);
        ip6h->nexthdr = IPPROTO_HOPOPTS;
        ip6h->hop_limit = 1;
        ipv6_addr_set(&ip6h->daddr, htonl(0xff020000), 0, 0, htonl(1));
@@ -488,26 +519,47 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
 
        /* ICMPv6 */
        skb_set_transport_header(skb, skb->len);
-       mldq = (struct mld_msg *) icmp6_hdr(skb);
-
        interval = ipv6_addr_any(grp) ?
                        br->multicast_query_response_interval :
                        br->multicast_last_member_interval;
-
        *igmp_type = ICMPV6_MGM_QUERY;
-       mldq->mld_type = ICMPV6_MGM_QUERY;
-       mldq->mld_code = 0;
-       mldq->mld_cksum = 0;
-       mldq->mld_maxdelay = htons((u16)jiffies_to_msecs(interval));
-       mldq->mld_reserved = 0;
-       mldq->mld_mca = *grp;
-
-       /* checksum */
-       mldq->mld_cksum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
-                                         sizeof(*mldq), IPPROTO_ICMPV6,
-                                         csum_partial(mldq,
-                                                      sizeof(*mldq), 0));
-       skb_put(skb, sizeof(*mldq));
+       switch (br->multicast_mld_version) {
+       case 1:
+               mldq = (struct mld_msg *)icmp6_hdr(skb);
+               mldq->mld_type = ICMPV6_MGM_QUERY;
+               mldq->mld_code = 0;
+               mldq->mld_cksum = 0;
+               mldq->mld_maxdelay = htons((u16)jiffies_to_msecs(interval));
+               mldq->mld_reserved = 0;
+               mldq->mld_mca = *grp;
+               mldq->mld_cksum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
+                                                 sizeof(*mldq), IPPROTO_ICMPV6,
+                                                 csum_partial(mldq,
+                                                              sizeof(*mldq),
+                                                              0));
+               break;
+       case 2:
+               mld2q = (struct mld2_query *)icmp6_hdr(skb);
+               mld2q->mld2q_mrc = ntohs((u16)jiffies_to_msecs(interval));
+               mld2q->mld2q_type = ICMPV6_MGM_QUERY;
+               mld2q->mld2q_code = 0;
+               mld2q->mld2q_cksum = 0;
+               mld2q->mld2q_resv1 = 0;
+               mld2q->mld2q_resv2 = 0;
+               mld2q->mld2q_suppress = 0;
+               mld2q->mld2q_qrv = 2;
+               mld2q->mld2q_nsrcs = 0;
+               mld2q->mld2q_qqic = br->multicast_query_interval / HZ;
+               mld2q->mld2q_mca = *grp;
+               mld2q->mld2q_cksum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
+                                                    sizeof(*mld2q),
+                                                    IPPROTO_ICMPV6,
+                                                    csum_partial(mld2q,
+                                                                 sizeof(*mld2q),
+                                                                 0));
+               break;
+       }
+       skb_put(skb, mld_hdr_size);
 
        __skb_pull(skb, sizeof(*eth));
 
@@ -607,7 +659,8 @@ err:
 }
 
 struct net_bridge_mdb_entry *br_multicast_new_group(struct net_bridge *br,
-       struct net_bridge_port *port, struct br_ip *group)
+                                                   struct net_bridge_port *p,
+                                                   struct br_ip *group)
 {
        struct net_bridge_mdb_htable *mdb;
        struct net_bridge_mdb_entry *mp;
@@ -623,7 +676,7 @@ struct net_bridge_mdb_entry *br_multicast_new_group(struct net_bridge *br,
        }
 
        hash = br_ip_hash(mdb, group);
-       mp = br_multicast_get_group(br, port, group, hash);
+       mp = br_multicast_get_group(br, p, group, hash);
        switch (PTR_ERR(mp)) {
        case 0:
                break;
@@ -680,9 +733,9 @@ static int br_multicast_add_group(struct net_bridge *br,
                                  struct net_bridge_port *port,
                                  struct br_ip *group)
 {
-       struct net_bridge_mdb_entry *mp;
-       struct net_bridge_port_group *p;
        struct net_bridge_port_group __rcu **pp;
+       struct net_bridge_port_group *p;
+       struct net_bridge_mdb_entry *mp;
        unsigned long now = jiffies;
        int err;
 
@@ -860,9 +913,9 @@ static void br_multicast_send_query(struct net_bridge *br,
                                    struct net_bridge_port *port,
                                    struct bridge_mcast_own_query *own_query)
 {
-       unsigned long time;
-       struct br_ip br_group;
        struct bridge_mcast_other_query *other_query = NULL;
+       struct br_ip br_group;
+       unsigned long time;
 
        if (!netif_running(br->dev) || br->multicast_disabled ||
            !br->multicast_querier)
@@ -972,13 +1025,12 @@ static void br_multicast_enable(struct bridge_mcast_own_query *query)
                mod_timer(&query->timer, jiffies);
 }
 
-void br_multicast_enable_port(struct net_bridge_port *port)
+static void __br_multicast_enable_port(struct net_bridge_port *port)
 {
        struct net_bridge *br = port->br;
 
-       spin_lock(&br->multicast_lock);
        if (br->multicast_disabled || !netif_running(br->dev))
-               goto out;
+               return;
 
        br_multicast_enable(&port->ip4_own_query);
 #if IS_ENABLED(CONFIG_IPV6)
@@ -987,8 +1039,14 @@ void br_multicast_enable_port(struct net_bridge_port *port)
        if (port->multicast_router == MDB_RTR_TYPE_PERM &&
            hlist_unhashed(&port->rlist))
                br_multicast_add_router(br, port);
+}
 
-out:
+void br_multicast_enable_port(struct net_bridge_port *port)
+{
+       struct net_bridge *br = port->br;
+
+       spin_lock(&br->multicast_lock);
+       __br_multicast_enable_port(port);
        spin_unlock(&br->multicast_lock);
 }
 
@@ -1633,6 +1691,21 @@ static void br_multicast_err_count(const struct net_bridge *br,
        u64_stats_update_end(&pstats->syncp);
 }
 
+static void br_multicast_pim(struct net_bridge *br,
+                            struct net_bridge_port *port,
+                            const struct sk_buff *skb)
+{
+       unsigned int offset = skb_transport_offset(skb);
+       struct pimhdr *pimhdr, _pimhdr;
+
+       pimhdr = skb_header_pointer(skb, offset, sizeof(_pimhdr), &_pimhdr);
+       if (!pimhdr || pim_hdr_version(pimhdr) != PIM_VERSION ||
+           pim_hdr_type(pimhdr) != PIM_TYPE_HELLO)
+               return;
+
+       br_multicast_mark_router(br, port);
+}
+
 static int br_multicast_ipv4_rcv(struct net_bridge *br,
                                 struct net_bridge_port *port,
                                 struct sk_buff *skb,
@@ -1645,8 +1718,12 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
        err = ip_mc_check_igmp(skb, &skb_trimmed);
 
        if (err == -ENOMSG) {
-               if (!ipv4_is_local_multicast(ip_hdr(skb)->daddr))
+               if (!ipv4_is_local_multicast(ip_hdr(skb)->daddr)) {
                        BR_INPUT_SKB_CB(skb)->mrouters_only = 1;
+               } else if (pim_ipv4_all_pim_routers(ip_hdr(skb)->daddr)) {
+                       if (ip_hdr(skb)->protocol == IPPROTO_PIM)
+                               br_multicast_pim(br, port, skb);
+               }
                return 0;
        } else if (err < 0) {
                br_multicast_err_count(br, port, skb->protocol);
@@ -1806,7 +1883,9 @@ void br_multicast_init(struct net_bridge *br)
 
        br->ip4_other_query.delay_time = 0;
        br->ip4_querier.port = NULL;
+       br->multicast_igmp_version = 2;
 #if IS_ENABLED(CONFIG_IPV6)
+       br->multicast_mld_version = 1;
        br->ip6_other_query.delay_time = 0;
        br->ip6_querier.port = NULL;
 #endif
@@ -1994,8 +2073,9 @@ static void br_multicast_start_querier(struct net_bridge *br,
 
 int br_multicast_toggle(struct net_bridge *br, unsigned long val)
 {
-       int err = 0;
        struct net_bridge_mdb_htable *mdb;
+       struct net_bridge_port *port;
+       int err = 0;
 
        spin_lock_bh(&br->multicast_lock);
        if (br->multicast_disabled == !val)
@@ -2023,10 +2103,9 @@ rollback:
                        goto rollback;
        }
 
-       br_multicast_start_querier(br, &br->ip4_own_query);
-#if IS_ENABLED(CONFIG_IPV6)
-       br_multicast_start_querier(br, &br->ip6_own_query);
-#endif
+       br_multicast_open(br);
+       list_for_each_entry(port, &br->port_list, list)
+               __br_multicast_enable_port(port);
 
 unlock:
        spin_unlock_bh(&br->multicast_lock);
@@ -2107,6 +2186,44 @@ unlock:
        return err;
 }
 
+int br_multicast_set_igmp_version(struct net_bridge *br, unsigned long val)
+{
+       /* Currently we support only version 2 and 3 */
+       switch (val) {
+       case 2:
+       case 3:
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       spin_lock_bh(&br->multicast_lock);
+       br->multicast_igmp_version = val;
+       spin_unlock_bh(&br->multicast_lock);
+
+       return 0;
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+int br_multicast_set_mld_version(struct net_bridge *br, unsigned long val)
+{
+       /* Currently we support version 1 and 2 */
+       switch (val) {
+       case 1:
+       case 2:
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       spin_lock_bh(&br->multicast_lock);
+       br->multicast_mld_version = val;
+       spin_unlock_bh(&br->multicast_lock);
+
+       return 0;
+}
+#endif
+
 /**
  * br_multicast_list_adjacent - Returns snooped multicast addresses
  * @dev:       The bridge port adjacent to which to retrieve addresses
index 2fe9345c1407108bf5a0802671fa419aa9b24e7e..83d937f4415eea3f386f83269abac90fdaabb641 100644 (file)
@@ -46,7 +46,7 @@
 #include <linux/sysctl.h>
 #endif
 
-static int brnf_net_id __read_mostly;
+static unsigned int brnf_net_id __read_mostly;
 
 struct brnf_net {
        bool enabled;
@@ -561,8 +561,8 @@ static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff
        }
        nf_bridge_push_encap_header(skb);
 
-       NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_FORWARD, net, sk, skb,
-                      in, skb->dev, br_forward_finish, 1);
+       br_nf_hook_thresh(NF_BR_FORWARD, net, sk, skb, in, skb->dev,
+                         br_forward_finish);
        return 0;
 }
 
@@ -845,8 +845,10 @@ static unsigned int ip_sabotage_in(void *priv,
                                   struct sk_buff *skb,
                                   const struct nf_hook_state *state)
 {
-       if (skb->nf_bridge && !skb->nf_bridge->in_prerouting)
-               return NF_STOP;
+       if (skb->nf_bridge && !skb->nf_bridge->in_prerouting) {
+               state->okfn(state->net, state->sk, skb);
+               return NF_STOLEN;
+       }
 
        return NF_ACCEPT;
 }
@@ -1016,10 +1018,10 @@ int br_nf_hook_thresh(unsigned int hook, struct net *net,
 
        /* We may already have this, but read-locks nest anyway */
        rcu_read_lock();
-       nf_hook_state_init(&state, elem, hook, NF_BR_PRI_BRNF + 1,
-                          NFPROTO_BRIDGE, indev, outdev, sk, net, okfn);
+       nf_hook_state_init(&state, hook, NFPROTO_BRIDGE, indev, outdev,
+                          sk, net, okfn);
 
-       ret = nf_hook_slow(skb, &state);
+       ret = nf_hook_slow(skb, &state, elem);
        rcu_read_unlock();
        if (ret == 1)
                ret = okfn(net, sk, skb);
index e99037c6f7b7b7e1073375d320f4fb186c4e8b38..71c7453268c1cf5a34949b0451f457ac18a4657b 100644 (file)
@@ -858,6 +858,8 @@ static const struct nla_policy br_policy[IFLA_BR_MAX + 1] = {
        [IFLA_BR_VLAN_DEFAULT_PVID] = { .type = NLA_U16 },
        [IFLA_BR_VLAN_STATS_ENABLED] = { .type = NLA_U8 },
        [IFLA_BR_MCAST_STATS_ENABLED] = { .type = NLA_U8 },
+       [IFLA_BR_MCAST_IGMP_VERSION] = { .type = NLA_U8 },
+       [IFLA_BR_MCAST_MLD_VERSION] = { .type = NLA_U8 },
 };
 
 static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
@@ -1069,6 +1071,26 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
                mcast_stats = nla_get_u8(data[IFLA_BR_MCAST_STATS_ENABLED]);
                br->multicast_stats_enabled = !!mcast_stats;
        }
+
+       if (data[IFLA_BR_MCAST_IGMP_VERSION]) {
+               __u8 igmp_version;
+
+               igmp_version = nla_get_u8(data[IFLA_BR_MCAST_IGMP_VERSION]);
+               err = br_multicast_set_igmp_version(br, igmp_version);
+               if (err)
+                       return err;
+       }
+
+#if IS_ENABLED(CONFIG_IPV6)
+       if (data[IFLA_BR_MCAST_MLD_VERSION]) {
+               __u8 mld_version;
+
+               mld_version = nla_get_u8(data[IFLA_BR_MCAST_MLD_VERSION]);
+               err = br_multicast_set_mld_version(br, mld_version);
+               if (err)
+                       return err;
+       }
+#endif
 #endif
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
        if (data[IFLA_BR_NF_CALL_IPTABLES]) {
@@ -1135,6 +1157,8 @@ static size_t br_get_size(const struct net_device *brdev)
               nla_total_size_64bit(sizeof(u64)) + /* IFLA_BR_MCAST_QUERY_INTVL */
               nla_total_size_64bit(sizeof(u64)) + /* IFLA_BR_MCAST_QUERY_RESPONSE_INTVL */
               nla_total_size_64bit(sizeof(u64)) + /* IFLA_BR_MCAST_STARTUP_QUERY_INTVL */
+              nla_total_size(sizeof(u8)) +     /* IFLA_BR_MCAST_IGMP_VERSION */
+              nla_total_size(sizeof(u8)) +     /* IFLA_BR_MCAST_MLD_VERSION */
 #endif
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
               nla_total_size(sizeof(u8)) +     /* IFLA_BR_NF_CALL_IPTABLES */
@@ -1210,9 +1234,15 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev)
            nla_put_u32(skb, IFLA_BR_MCAST_LAST_MEMBER_CNT,
                        br->multicast_last_member_count) ||
            nla_put_u32(skb, IFLA_BR_MCAST_STARTUP_QUERY_CNT,
-                       br->multicast_startup_query_count))
+                       br->multicast_startup_query_count) ||
+           nla_put_u8(skb, IFLA_BR_MCAST_IGMP_VERSION,
+                      br->multicast_igmp_version))
                return -EMSGSIZE;
-
+#if IS_ENABLED(CONFIG_IPV6)
+       if (nla_put_u8(skb, IFLA_BR_MCAST_MLD_VERSION,
+                      br->multicast_mld_version))
+               return -EMSGSIZE;
+#endif
        clockval = jiffies_to_clock_t(br->multicast_last_member_interval);
        if (nla_put_u64_64bit(skb, IFLA_BR_MCAST_LAST_MEMBER_INTVL, clockval,
                              IFLA_BR_PAD))
index 1b63177e0ccd4bd60e95eac4cb8fbccf3cf8fa0e..26aec2366bc35ce616d2ed6fd34cd302a0c06831 100644 (file)
@@ -333,6 +333,8 @@ struct net_bridge
        u32                             multicast_last_member_count;
        u32                             multicast_startup_query_count;
 
+       u8                              multicast_igmp_version;
+
        unsigned long                   multicast_last_member_interval;
        unsigned long                   multicast_membership_interval;
        unsigned long                   multicast_querier_interval;
@@ -353,6 +355,7 @@ struct net_bridge
        struct bridge_mcast_other_query ip6_other_query;
        struct bridge_mcast_own_query   ip6_own_query;
        struct bridge_mcast_querier     ip6_querier;
+       u8                              multicast_mld_version;
 #endif /* IS_ENABLED(CONFIG_IPV6) */
 #endif
 
@@ -582,6 +585,10 @@ int br_multicast_set_port_router(struct net_bridge_port *p, unsigned long val);
 int br_multicast_toggle(struct net_bridge *br, unsigned long val);
 int br_multicast_set_querier(struct net_bridge *br, unsigned long val);
 int br_multicast_set_hash_max(struct net_bridge *br, unsigned long val);
+int br_multicast_set_igmp_version(struct net_bridge *br, unsigned long val);
+#if IS_ENABLED(CONFIG_IPV6)
+int br_multicast_set_mld_version(struct net_bridge *br, unsigned long val);
+#endif
 struct net_bridge_mdb_entry *
 br_mdb_ip_get(struct net_bridge_mdb_htable *mdb, struct br_ip *dst);
 struct net_bridge_mdb_entry *
index e120307c6e36ab1173232ee1d4fb1730f5dbae60..c9d2e0abfb89e456a3cdf68d1fa40be20e257e17 100644 (file)
@@ -440,6 +440,23 @@ static ssize_t hash_max_store(struct device *d, struct device_attribute *attr,
 }
 static DEVICE_ATTR_RW(hash_max);
 
+static ssize_t multicast_igmp_version_show(struct device *d,
+                                          struct device_attribute *attr,
+                                          char *buf)
+{
+       struct net_bridge *br = to_bridge(d);
+
+       return sprintf(buf, "%u\n", br->multicast_igmp_version);
+}
+
+static ssize_t multicast_igmp_version_store(struct device *d,
+                                           struct device_attribute *attr,
+                                           const char *buf, size_t len)
+{
+       return store_bridge_parm(d, buf, len, br_multicast_set_igmp_version);
+}
+static DEVICE_ATTR_RW(multicast_igmp_version);
+
 static ssize_t multicast_last_member_count_show(struct device *d,
                                                struct device_attribute *attr,
                                                char *buf)
@@ -642,6 +659,25 @@ static ssize_t multicast_stats_enabled_store(struct device *d,
        return store_bridge_parm(d, buf, len, set_stats_enabled);
 }
 static DEVICE_ATTR_RW(multicast_stats_enabled);
+
+#if IS_ENABLED(CONFIG_IPV6)
+static ssize_t multicast_mld_version_show(struct device *d,
+                                         struct device_attribute *attr,
+                                         char *buf)
+{
+       struct net_bridge *br = to_bridge(d);
+
+       return sprintf(buf, "%u\n", br->multicast_mld_version);
+}
+
+static ssize_t multicast_mld_version_store(struct device *d,
+                                          struct device_attribute *attr,
+                                          const char *buf, size_t len)
+{
+       return store_bridge_parm(d, buf, len, br_multicast_set_mld_version);
+}
+static DEVICE_ATTR_RW(multicast_mld_version);
+#endif
 #endif
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 static ssize_t nf_call_iptables_show(
@@ -809,6 +845,10 @@ static struct attribute *bridge_attrs[] = {
        &dev_attr_multicast_query_response_interval.attr,
        &dev_attr_multicast_startup_query_interval.attr,
        &dev_attr_multicast_stats_enabled.attr,
+       &dev_attr_multicast_igmp_version.attr,
+#if IS_ENABLED(CONFIG_IPV6)
+       &dev_attr_multicast_mld_version.attr,
+#endif
 #endif
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
        &dev_attr_nf_call_iptables.attr,
index 9cebf47ac840722d0294713f98ef98595272f0be..e7ef1a1ef3a6af942e511b6385fed7c08b0561ed 100644 (file)
@@ -22,6 +22,7 @@ config NFT_BRIDGE_REJECT
 
 config NF_LOG_BRIDGE
        tristate "Bridge packet logging"
+       select NF_LOG_COMMON
 
 endif # NF_TABLES_BRIDGE
 
index 070cf134a22f983c915c488a48b09d942e16fcce..5929309beaa1d5d310a19029364693c0b0355772 100644 (file)
@@ -51,7 +51,8 @@ ebt_arpreply_tg(struct sk_buff *skb, const struct xt_action_param *par)
        if (diptr == NULL)
                return EBT_DROP;
 
-       arp_send(ARPOP_REPLY, ETH_P_ARP, *siptr, (struct net_device *)par->in,
+       arp_send(ARPOP_REPLY, ETH_P_ARP, *siptr,
+                (struct net_device *)xt_in(par),
                 *diptr, shp, info->mac, shp);
 
        return info->target;
index 9a11086ba6ff4a966e551ea650bc15df1b4483ac..e88bd4827ac1add767973298760895d851aa4b10 100644 (file)
@@ -179,7 +179,7 @@ ebt_log_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
        const struct ebt_log_info *info = par->targinfo;
        struct nf_loginfo li;
-       struct net *net = par->net;
+       struct net *net = xt_net(par);
 
        li.type = NF_LOG_TYPE_LOG;
        li.u.log.level = info->loglevel;
@@ -190,11 +190,12 @@ ebt_log_tg(struct sk_buff *skb, const struct xt_action_param *par)
         * nf_log_packet() with NFT_LOG_TYPE_LOG here. --Pablo
         */
        if (info->bitmask & EBT_LOG_NFLOG)
-               nf_log_packet(net, NFPROTO_BRIDGE, par->hooknum, skb,
-                             par->in, par->out, &li, "%s", info->prefix);
+               nf_log_packet(net, NFPROTO_BRIDGE, xt_hooknum(par), skb,
+                             xt_in(par), xt_out(par), &li, "%s",
+                             info->prefix);
        else
-               ebt_log_packet(net, NFPROTO_BRIDGE, par->hooknum, skb, par->in,
-                              par->out, &li, info->prefix);
+               ebt_log_packet(net, NFPROTO_BRIDGE, xt_hooknum(par), skb,
+                              xt_in(par), xt_out(par), &li, info->prefix);
        return EBT_CONTINUE;
 }
 
index 54816150608e0e0f2fd45757a93d42f07a30a42d..c1dc486862002f4f492d81d91e2165c4e9084ed2 100644 (file)
@@ -23,16 +23,16 @@ static unsigned int
 ebt_nflog_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
        const struct ebt_nflog_info *info = par->targinfo;
+       struct net *net = xt_net(par);
        struct nf_loginfo li;
-       struct net *net = par->net;
 
        li.type = NF_LOG_TYPE_ULOG;
        li.u.ulog.copy_len = info->len;
        li.u.ulog.group = info->group;
        li.u.ulog.qthreshold = info->threshold;
 
-       nf_log_packet(net, PF_BRIDGE, par->hooknum, skb, par->in,
-                     par->out, &li, "%s", info->prefix);
+       nf_log_packet(net, PF_BRIDGE, xt_hooknum(par), skb, xt_in(par),
+                     xt_out(par), &li, "%s", info->prefix);
        return EBT_CONTINUE;
 }
 
index 2e7c4f974340919c2101ee60c3b3b86a727755d5..8d2a85e0594ee3c5c91c3c4b4c05c71fc6f40601 100644 (file)
@@ -23,12 +23,12 @@ ebt_redirect_tg(struct sk_buff *skb, const struct xt_action_param *par)
        if (!skb_make_writable(skb, 0))
                return EBT_DROP;
 
-       if (par->hooknum != NF_BR_BROUTING)
+       if (xt_hooknum(par) != NF_BR_BROUTING)
                /* rcu_read_lock()ed by nf_hook_thresh */
                ether_addr_copy(eth_hdr(skb)->h_dest,
-                               br_port_get_rcu(par->in)->br->dev->dev_addr);
+                               br_port_get_rcu(xt_in(par))->br->dev->dev_addr);
        else
-               ether_addr_copy(eth_hdr(skb)->h_dest, par->in->dev_addr);
+               ether_addr_copy(eth_hdr(skb)->h_dest, xt_in(par)->dev_addr);
        skb->pkt_type = PACKET_HOST;
        return info->target;
 }
index ec94c6f1ae881461bb1c72fc7a8965c335de73fe..8fe36dc3aab29ceac90443f229b9d745b6dfe5d6 100644 (file)
@@ -53,7 +53,7 @@ static int ebt_broute(struct sk_buff *skb)
        struct nf_hook_state state;
        int ret;
 
-       nf_hook_state_init(&state, NULL, NF_BR_BROUTING, INT_MIN,
+       nf_hook_state_init(&state, NF_BR_BROUTING,
                           NFPROTO_BRIDGE, skb->dev, NULL, NULL,
                           dev_net(skb->dev), NULL);
 
index f5c11bbe27db651dfd66f6cb8e7dc5b77b88a511..1ab6014cf0f8ec7c0fd58aca63438e009a22cb93 100644 (file)
@@ -194,12 +194,8 @@ unsigned int ebt_do_table(struct sk_buff *skb,
        const struct ebt_table_info *private;
        struct xt_action_param acpar;
 
-       acpar.family  = NFPROTO_BRIDGE;
-       acpar.net     = state->net;
-       acpar.in      = state->in;
-       acpar.out     = state->out;
+       acpar.state   = state;
        acpar.hotdrop = false;
-       acpar.hooknum = hook;
 
        read_lock_bh(&table->lock);
        private = table->private;
index 1663df59854502b997d9bc56bb49712c0b99f28a..c197b1f844eee9896b20e790df7dce4fbfeb4ea6 100644 (file)
@@ -24,21 +24,7 @@ static void nf_log_bridge_packet(struct net *net, u_int8_t pf,
                                 const struct nf_loginfo *loginfo,
                                 const char *prefix)
 {
-       switch (eth_hdr(skb)->h_proto) {
-       case htons(ETH_P_IP):
-               nf_log_packet(net, NFPROTO_IPV4, hooknum, skb, in, out,
-                             loginfo, "%s", prefix);
-               break;
-       case htons(ETH_P_IPV6):
-               nf_log_packet(net, NFPROTO_IPV6, hooknum, skb, in, out,
-                             loginfo, "%s", prefix);
-               break;
-       case htons(ETH_P_ARP):
-       case htons(ETH_P_RARP):
-               nf_log_packet(net, NFPROTO_ARP, hooknum, skb, in, out,
-                             loginfo, "%s", prefix);
-               break;
-       }
+       nf_log_l2packet(net, pf, hooknum, skb, in, out, loginfo, prefix);
 }
 
 static struct nf_logger nf_bridge_logger __read_mostly = {
index ad47a921b70152f7a802284a03ce690b03390d1c..5974dbc1ea240fa6b8b2d109891ab211a8d556ff 100644 (file)
@@ -23,7 +23,7 @@ static void nft_meta_bridge_get_eval(const struct nft_expr *expr,
                                     const struct nft_pktinfo *pkt)
 {
        const struct nft_meta *priv = nft_expr_priv(expr);
-       const struct net_device *in = pkt->in, *out = pkt->out;
+       const struct net_device *in = nft_in(pkt), *out = nft_out(pkt);
        u32 *dest = &regs->data[priv->dreg];
        const struct net_bridge_port *p;
 
index 4b3df6b0e3b93cc47531950a9b4c8e6c9b5d1c13..206dc266ecd237c2874d25352dd631e3bc31b002 100644 (file)
@@ -315,17 +315,20 @@ static void nft_reject_bridge_eval(const struct nft_expr *expr,
        case htons(ETH_P_IP):
                switch (priv->type) {
                case NFT_REJECT_ICMP_UNREACH:
-                       nft_reject_br_send_v4_unreach(pkt->net, pkt->skb,
-                                                     pkt->in, pkt->hook,
+                       nft_reject_br_send_v4_unreach(nft_net(pkt), pkt->skb,
+                                                     nft_in(pkt),
+                                                     nft_hook(pkt),
                                                      priv->icmp_code);
                        break;
                case NFT_REJECT_TCP_RST:
-                       nft_reject_br_send_v4_tcp_reset(pkt->net, pkt->skb,
-                                                       pkt->in, pkt->hook);
+                       nft_reject_br_send_v4_tcp_reset(nft_net(pkt), pkt->skb,
+                                                       nft_in(pkt),
+                                                       nft_hook(pkt));
                        break;
                case NFT_REJECT_ICMPX_UNREACH:
-                       nft_reject_br_send_v4_unreach(pkt->net, pkt->skb,
-                                                     pkt->in, pkt->hook,
+                       nft_reject_br_send_v4_unreach(nft_net(pkt), pkt->skb,
+                                                     nft_in(pkt),
+                                                     nft_hook(pkt),
                                                      nft_reject_icmp_code(priv->icmp_code));
                        break;
                }
@@ -333,17 +336,20 @@ static void nft_reject_bridge_eval(const struct nft_expr *expr,
        case htons(ETH_P_IPV6):
                switch (priv->type) {
                case NFT_REJECT_ICMP_UNREACH:
-                       nft_reject_br_send_v6_unreach(pkt->net, pkt->skb,
-                                                     pkt->in, pkt->hook,
+                       nft_reject_br_send_v6_unreach(nft_net(pkt), pkt->skb,
+                                                     nft_in(pkt),
+                                                     nft_hook(pkt),
                                                      priv->icmp_code);
                        break;
                case NFT_REJECT_TCP_RST:
-                       nft_reject_br_send_v6_tcp_reset(pkt->net, pkt->skb,
-                                                       pkt->in, pkt->hook);
+                       nft_reject_br_send_v6_tcp_reset(nft_net(pkt), pkt->skb,
+                                                       nft_in(pkt),
+                                                       nft_hook(pkt));
                        break;
                case NFT_REJECT_ICMPX_UNREACH:
-                       nft_reject_br_send_v6_unreach(pkt->net, pkt->skb,
-                                                     pkt->in, pkt->hook,
+                       nft_reject_br_send_v6_unreach(nft_net(pkt), pkt->skb,
+                                                     nft_in(pkt),
+                                                     nft_hook(pkt),
                                                      nft_reject_icmpv6_code(priv->icmp_code));
                        break;
                }
index d730a0f68f46b43b3e8dd51cb3bb029f04cde93a..2d38b6e34203b7df5638c340126c2f5169fb3791 100644 (file)
@@ -52,7 +52,7 @@ struct caif_net {
        struct caif_device_entry_list caifdevs;
 };
 
-static int caif_net_id;
+static unsigned int caif_net_id;
 static int q_high = 50; /* Percent */
 
 struct cfcnfg *get_cfcnfg(struct net *net)
index fa39fc29870867ca5a7323cdeab9ca3204a2653d..273cb07f57d87186224fb50943af949b99b3cde2 100644 (file)
@@ -390,8 +390,7 @@ cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv,
        rcu_read_lock();
 
        if (adapt_layer == NULL) {
-               pr_debug("link setup response but no client exist,"
-                               "send linkdown back\n");
+               pr_debug("link setup response but no client exist, send linkdown back\n");
                cfctrl_linkdown_req(cnfg->ctrl, channel_id, NULL);
                goto unlock;
        }
@@ -401,8 +400,7 @@ cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv,
 
        phyinfo = cfcnfg_get_phyinfo_rcu(cnfg, phyid);
        if (phyinfo == NULL) {
-               pr_err("ERROR: Link Layer Device disappeared"
-                               "while connecting\n");
+               pr_err("ERROR: Link Layer Device disappeared while connecting\n");
                goto unlock;
        }
 
@@ -436,8 +434,7 @@ cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv,
                servicel = cfdbgl_create(channel_id, &phyinfo->dev_info);
                break;
        default:
-               pr_err("Protocol error. Link setup response "
-                               "- unknown channel type\n");
+               pr_err("Protocol error. Link setup response - unknown channel type\n");
                goto unlock;
        }
        if (!servicel)
index 8e999ffdf28be91fb444b53cef62a34b582f6ba3..436a7537e6a9d3ef065ec572568c42fd13331b5e 100644 (file)
@@ -77,7 +77,7 @@
                     (CAN_EFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG) : \
                     (CAN_SFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG))
 
-#define CAN_BCM_VERSION "20160617"
+#define CAN_BCM_VERSION "20161123"
 
 MODULE_DESCRIPTION("PF_CAN broadcast manager protocol");
 MODULE_LICENSE("Dual BSD/GPL");
@@ -109,8 +109,9 @@ struct bcm_op {
        u32 count;
        u32 nframes;
        u32 currframe;
-       struct canfd_frame *frames;
-       struct canfd_frame *last_frames;
+       /* void pointers to arrays of struct can[fd]_frame */
+       void *frames;
+       void *last_frames;
        struct canfd_frame sframe;
        struct canfd_frame last_sframe;
        struct sock *sk;
@@ -681,7 +682,7 @@ static void bcm_rx_handler(struct sk_buff *skb, void *data)
 
        if (op->flags & RX_FILTER_ID) {
                /* the easiest case */
-               bcm_rx_update_and_send(op, &op->last_frames[0], rxframe);
+               bcm_rx_update_and_send(op, op->last_frames, rxframe);
                goto rx_starttimer;
        }
 
@@ -1068,7 +1069,7 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
 
                if (msg_head->nframes) {
                        /* update CAN frames content */
-                       err = memcpy_from_msg((u8 *)op->frames, msg,
+                       err = memcpy_from_msg(op->frames, msg,
                                              msg_head->nframes * op->cfsiz);
                        if (err < 0)
                                return err;
@@ -1118,7 +1119,7 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
                }
 
                if (msg_head->nframes) {
-                       err = memcpy_from_msg((u8 *)op->frames, msg,
+                       err = memcpy_from_msg(op->frames, msg,
                                              msg_head->nframes * op->cfsiz);
                        if (err < 0) {
                                if (op->frames != &op->sframe)
@@ -1163,6 +1164,7 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
        /* check flags */
 
        if (op->flags & RX_RTR_FRAME) {
+               struct canfd_frame *frame0 = op->frames;
 
                /* no timers in RTR-mode */
                hrtimer_cancel(&op->thrtimer);
@@ -1174,8 +1176,8 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
                 * prevent a full-load-loopback-test ... ;-]
                 */
                if ((op->flags & TX_CP_CAN_ID) ||
-                   (op->frames[0].can_id == op->can_id))
-                       op->frames[0].can_id = op->can_id & ~CAN_RTR_FLAG;
+                   (frame0->can_id == op->can_id))
+                       frame0->can_id = op->can_id & ~CAN_RTR_FLAG;
 
        } else {
                if (op->flags & SETTIMER) {
@@ -1549,24 +1551,31 @@ static int bcm_connect(struct socket *sock, struct sockaddr *uaddr, int len,
        struct sockaddr_can *addr = (struct sockaddr_can *)uaddr;
        struct sock *sk = sock->sk;
        struct bcm_sock *bo = bcm_sk(sk);
+       int ret = 0;
 
        if (len < sizeof(*addr))
                return -EINVAL;
 
-       if (bo->bound)
-               return -EISCONN;
+       lock_sock(sk);
+
+       if (bo->bound) {
+               ret = -EISCONN;
+               goto fail;
+       }
 
        /* bind a device to this socket */
        if (addr->can_ifindex) {
                struct net_device *dev;
 
                dev = dev_get_by_index(&init_net, addr->can_ifindex);
-               if (!dev)
-                       return -ENODEV;
-
+               if (!dev) {
+                       ret = -ENODEV;
+                       goto fail;
+               }
                if (dev->type != ARPHRD_CAN) {
                        dev_put(dev);
-                       return -ENODEV;
+                       ret = -ENODEV;
+                       goto fail;
                }
 
                bo->ifindex = dev->ifindex;
@@ -1577,17 +1586,24 @@ static int bcm_connect(struct socket *sock, struct sockaddr *uaddr, int len,
                bo->ifindex = 0;
        }
 
-       bo->bound = 1;
-
        if (proc_dir) {
                /* unique socket address as filename */
                sprintf(bo->procname, "%lu", sock_i_ino(sk));
                bo->bcm_proc_read = proc_create_data(bo->procname, 0644,
                                                     proc_dir,
                                                     &bcm_proc_fops, sk);
+               if (!bo->bcm_proc_read) {
+                       ret = -ENOMEM;
+                       goto fail;
+               }
        }
 
-       return 0;
+       bo->bound = 1;
+
+fail:
+       release_sock(sk);
+
+       return ret;
 }
 
 static int bcm_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
index 7d54e944de5e0723918d8196e2d10766fd0b84c5..dcbe67ff3e2b281abc87d791def2f131fdba9fb3 100644 (file)
@@ -34,7 +34,8 @@ void ceph_file_layout_from_legacy(struct ceph_file_layout *fl,
        fl->stripe_count = le32_to_cpu(legacy->fl_stripe_count);
        fl->object_size = le32_to_cpu(legacy->fl_object_size);
        fl->pool_id = le32_to_cpu(legacy->fl_pg_pool);
-       if (fl->pool_id == 0)
+       if (fl->pool_id == 0 && fl->stripe_unit == 0 &&
+           fl->stripe_count == 0 && fl->object_size == 0)
                fl->pool_id = -1;
 }
 EXPORT_SYMBOL(ceph_file_layout_from_legacy);
index d9bf7a1d0a583730a9f4c7376102d1ce5694f66f..e6ae15bc41b74dfc96e9d967139367b4ad3be952 100644 (file)
@@ -4094,6 +4094,7 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
        osd_init(&osdc->homeless_osd);
        osdc->homeless_osd.o_osdc = osdc;
        osdc->homeless_osd.o_osd = CEPH_HOMELESS_OSD;
+       osdc->last_linger_id = CEPH_LINGER_ID_START;
        osdc->linger_requests = RB_ROOT;
        osdc->map_checks = RB_ROOT;
        osdc->linger_map_checks = RB_ROOT;
index 00d2601407c5dfd5b8e8a94bf5de2160ae29cc38..1a7c9a79a53c22e8e61e3b6e8e1720db71d9f9a9 100644 (file)
@@ -26,7 +26,7 @@ struct page **ceph_get_direct_page_vector(const void __user *data,
        while (got < num_pages) {
                rc = get_user_pages_unlocked(
                    (unsigned long)data + ((unsigned long)got * PAGE_SIZE),
-                   num_pages - got, write_page, 0, pages + got);
+                   num_pages - got, pages + got, write_page ? FOLL_WRITE : 0);
                if (rc < 0)
                        break;
                BUG_ON(rc == 0);
index d6508c2ddca502800181a4c4fa49c92235edcebc..f6761b6e3b29bc4c645585bfdcb38ca94bdace34 100644 (file)
@@ -24,6 +24,7 @@ obj-$(CONFIG_NET_PTP_CLASSIFY) += ptp_classifier.o
 obj-$(CONFIG_CGROUP_NET_PRIO) += netprio_cgroup.o
 obj-$(CONFIG_CGROUP_NET_CLASSID) += netclassid_cgroup.o
 obj-$(CONFIG_LWTUNNEL) += lwtunnel.o
+obj-$(CONFIG_LWTUNNEL_BPF) += lwt_bpf.o
 obj-$(CONFIG_DST_CACHE) += dst_cache.o
 obj-$(CONFIG_HWBM) += hwbm.o
 obj-$(CONFIG_NET_DEVLINK) += devlink.o
index bfb973aebb5b16a8cd04eebdd712bcd6006e86d6..49816af8586bb832e806972b486588041a99524c 100644 (file)
@@ -165,6 +165,7 @@ done:
  *     __skb_try_recv_datagram - Receive a datagram skbuff
  *     @sk: socket
  *     @flags: MSG_ flags
+ *     @destructor: invoked under the receive lock on successful dequeue
  *     @peeked: returns non-zero if this packet has been seen before
  *     @off: an offset in bytes to peek skb from. Returns an offset
  *           within an skb where data actually starts
@@ -197,6 +198,8 @@ done:
  *     the standard around please.
  */
 struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags,
+                                       void (*destructor)(struct sock *sk,
+                                                          struct sk_buff *skb),
                                        int *peeked, int *off, int *err,
                                        struct sk_buff **last)
 {
@@ -241,9 +244,11 @@ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags,
                                }
 
                                atomic_inc(&skb->users);
-                       } else
+                       } else {
                                __skb_unlink(skb, queue);
-
+                               if (destructor)
+                                       destructor(sk, skb);
+                       }
                        spin_unlock_irqrestore(&queue->lock, cpu_flags);
                        *off = _off;
                        return skb;
@@ -262,6 +267,8 @@ no_packet:
 EXPORT_SYMBOL(__skb_try_recv_datagram);
 
 struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
+                                   void (*destructor)(struct sock *sk,
+                                                      struct sk_buff *skb),
                                    int *peeked, int *off, int *err)
 {
        struct sk_buff *skb, *last;
@@ -270,8 +277,8 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
        timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
 
        do {
-               skb = __skb_try_recv_datagram(sk, flags, peeked, off, err,
-                                             &last);
+               skb = __skb_try_recv_datagram(sk, flags, destructor, peeked,
+                                             off, err, &last);
                if (skb)
                        return skb;
 
@@ -290,7 +297,7 @@ struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags,
        int peeked, off = 0;
 
        return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
-                                  &peeked, &off, err);
+                                  NULL, &peeked, &off, err);
 }
 EXPORT_SYMBOL(skb_recv_datagram);
 
index f55fb453601620ce255087d12c639e075d7b256f..bffb5253e77867b1d6a0ada7cc99f4605e03ad28 100644 (file)
@@ -1765,19 +1765,14 @@ EXPORT_SYMBOL_GPL(is_skb_forwardable);
 
 int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
 {
-       if (skb_orphan_frags(skb, GFP_ATOMIC) ||
-           unlikely(!is_skb_forwardable(dev, skb))) {
-               atomic_long_inc(&dev->rx_dropped);
-               kfree_skb(skb);
-               return NET_RX_DROP;
-       }
+       int ret = ____dev_forward_skb(dev, skb);
 
-       skb_scrub_packet(skb, true);
-       skb->priority = 0;
-       skb->protocol = eth_type_trans(skb, dev);
-       skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
+       if (likely(!ret)) {
+               skb->protocol = eth_type_trans(skb, dev);
+               skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
+       }
 
-       return 0;
+       return ret;
 }
 EXPORT_SYMBOL_GPL(__dev_forward_skb);
 
@@ -1948,37 +1943,80 @@ static void netif_setup_tc(struct net_device *dev, unsigned int txq)
        }
 }
 
+int netdev_txq_to_tc(struct net_device *dev, unsigned int txq)
+{
+       if (dev->num_tc) {
+               struct netdev_tc_txq *tc = &dev->tc_to_txq[0];
+               int i;
+
+               for (i = 0; i < TC_MAX_QUEUE; i++, tc++) {
+                       if ((txq - tc->offset) < tc->count)
+                               return i;
+               }
+
+               return -1;
+       }
+
+       return 0;
+}
+
 #ifdef CONFIG_XPS
 static DEFINE_MUTEX(xps_map_mutex);
 #define xmap_dereference(P)            \
        rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex))
 
-static struct xps_map *remove_xps_queue(struct xps_dev_maps *dev_maps,
-                                       int cpu, u16 index)
+static bool remove_xps_queue(struct xps_dev_maps *dev_maps,
+                            int tci, u16 index)
 {
        struct xps_map *map = NULL;
        int pos;
 
        if (dev_maps)
-               map = xmap_dereference(dev_maps->cpu_map[cpu]);
+               map = xmap_dereference(dev_maps->cpu_map[tci]);
+       if (!map)
+               return false;
 
-       for (pos = 0; map && pos < map->len; pos++) {
-               if (map->queues[pos] == index) {
-                       if (map->len > 1) {
-                               map->queues[pos] = map->queues[--map->len];
-                       } else {
-                               RCU_INIT_POINTER(dev_maps->cpu_map[cpu], NULL);
-                               kfree_rcu(map, rcu);
-                               map = NULL;
-                       }
+       for (pos = map->len; pos--;) {
+               if (map->queues[pos] != index)
+                       continue;
+
+               if (map->len > 1) {
+                       map->queues[pos] = map->queues[--map->len];
                        break;
                }
+
+               RCU_INIT_POINTER(dev_maps->cpu_map[tci], NULL);
+               kfree_rcu(map, rcu);
+               return false;
        }
 
-       return map;
+       return true;
 }
 
-static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index)
+static bool remove_xps_queue_cpu(struct net_device *dev,
+                                struct xps_dev_maps *dev_maps,
+                                int cpu, u16 offset, u16 count)
+{
+       int num_tc = dev->num_tc ? : 1;
+       bool active = false;
+       int tci;
+
+       for (tci = cpu * num_tc; num_tc--; tci++) {
+               int i, j;
+
+               for (i = count, j = offset; i--; j++) {
+                       if (!remove_xps_queue(dev_maps, cpu, j))
+                               break;
+               }
+
+               active |= i < 0;
+       }
+
+       return active;
+}
+
+static void netif_reset_xps_queues(struct net_device *dev, u16 offset,
+                                  u16 count)
 {
        struct xps_dev_maps *dev_maps;
        int cpu, i;
@@ -1990,21 +2028,16 @@ static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index)
        if (!dev_maps)
                goto out_no_maps;
 
-       for_each_possible_cpu(cpu) {
-               for (i = index; i < dev->num_tx_queues; i++) {
-                       if (!remove_xps_queue(dev_maps, cpu, i))
-                               break;
-               }
-               if (i == dev->num_tx_queues)
-                       active = true;
-       }
+       for_each_possible_cpu(cpu)
+               active |= remove_xps_queue_cpu(dev, dev_maps, cpu,
+                                              offset, count);
 
        if (!active) {
                RCU_INIT_POINTER(dev->xps_maps, NULL);
                kfree_rcu(dev_maps, rcu);
        }
 
-       for (i = index; i < dev->num_tx_queues; i++)
+       for (i = offset + (count - 1); count--; i--)
                netdev_queue_numa_node_write(netdev_get_tx_queue(dev, i),
                                             NUMA_NO_NODE);
 
@@ -2012,6 +2045,11 @@ out_no_maps:
        mutex_unlock(&xps_map_mutex);
 }
 
+static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index)
+{
+       netif_reset_xps_queues(dev, index, dev->num_tx_queues - index);
+}
+
 static struct xps_map *expand_xps_map(struct xps_map *map,
                                      int cpu, u16 index)
 {
@@ -2051,20 +2089,28 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
                        u16 index)
 {
        struct xps_dev_maps *dev_maps, *new_dev_maps = NULL;
+       int i, cpu, tci, numa_node_id = -2;
+       int maps_sz, num_tc = 1, tc = 0;
        struct xps_map *map, *new_map;
-       int maps_sz = max_t(unsigned int, XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES);
-       int cpu, numa_node_id = -2;
        bool active = false;
 
+       if (dev->num_tc) {
+               num_tc = dev->num_tc;
+               tc = netdev_txq_to_tc(dev, index);
+               if (tc < 0)
+                       return -EINVAL;
+       }
+
+       maps_sz = XPS_DEV_MAPS_SIZE(num_tc);
+       if (maps_sz < L1_CACHE_BYTES)
+               maps_sz = L1_CACHE_BYTES;
+
        mutex_lock(&xps_map_mutex);
 
        dev_maps = xmap_dereference(dev->xps_maps);
 
        /* allocate memory for queue storage */
-       for_each_online_cpu(cpu) {
-               if (!cpumask_test_cpu(cpu, mask))
-                       continue;
-
+       for_each_cpu_and(cpu, cpu_online_mask, mask) {
                if (!new_dev_maps)
                        new_dev_maps = kzalloc(maps_sz, GFP_KERNEL);
                if (!new_dev_maps) {
@@ -2072,25 +2118,38 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
                        return -ENOMEM;
                }
 
-               map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) :
+               tci = cpu * num_tc + tc;
+               map = dev_maps ? xmap_dereference(dev_maps->cpu_map[tci]) :
                                 NULL;
 
                map = expand_xps_map(map, cpu, index);
                if (!map)
                        goto error;
 
-               RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map);
+               RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
        }
 
        if (!new_dev_maps)
                goto out_no_new_maps;
 
        for_each_possible_cpu(cpu) {
+               /* copy maps belonging to foreign traffic classes */
+               for (i = tc, tci = cpu * num_tc; dev_maps && i--; tci++) {
+                       /* fill in the new device map from the old device map */
+                       map = xmap_dereference(dev_maps->cpu_map[tci]);
+                       RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
+               }
+
+               /* We need to explicitly update tci as prevous loop
+                * could break out early if dev_maps is NULL.
+                */
+               tci = cpu * num_tc + tc;
+
                if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) {
                        /* add queue to CPU maps */
                        int pos = 0;
 
-                       map = xmap_dereference(new_dev_maps->cpu_map[cpu]);
+                       map = xmap_dereference(new_dev_maps->cpu_map[tci]);
                        while ((pos < map->len) && (map->queues[pos] != index))
                                pos++;
 
@@ -2104,26 +2163,36 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
 #endif
                } else if (dev_maps) {
                        /* fill in the new device map from the old device map */
-                       map = xmap_dereference(dev_maps->cpu_map[cpu]);
-                       RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map);
+                       map = xmap_dereference(dev_maps->cpu_map[tci]);
+                       RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
                }
 
+               /* copy maps belonging to foreign traffic classes */
+               for (i = num_tc - tc, tci++; dev_maps && --i; tci++) {
+                       /* fill in the new device map from the old device map */
+                       map = xmap_dereference(dev_maps->cpu_map[tci]);
+                       RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map);
+               }
        }
 
        rcu_assign_pointer(dev->xps_maps, new_dev_maps);
 
        /* Cleanup old maps */
-       if (dev_maps) {
-               for_each_possible_cpu(cpu) {
-                       new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]);
-                       map = xmap_dereference(dev_maps->cpu_map[cpu]);
+       if (!dev_maps)
+               goto out_no_old_maps;
+
+       for_each_possible_cpu(cpu) {
+               for (i = num_tc, tci = cpu * num_tc; i--; tci++) {
+                       new_map = xmap_dereference(new_dev_maps->cpu_map[tci]);
+                       map = xmap_dereference(dev_maps->cpu_map[tci]);
                        if (map && map != new_map)
                                kfree_rcu(map, rcu);
                }
-
-               kfree_rcu(dev_maps, rcu);
        }
 
+       kfree_rcu(dev_maps, rcu);
+
+out_no_old_maps:
        dev_maps = new_dev_maps;
        active = true;
 
@@ -2138,11 +2207,12 @@ out_no_new_maps:
 
        /* removes queue from unused CPUs */
        for_each_possible_cpu(cpu) {
-               if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu))
-                       continue;
-
-               if (remove_xps_queue(dev_maps, cpu, index))
-                       active = true;
+               for (i = tc, tci = cpu * num_tc; i--; tci++)
+                       active |= remove_xps_queue(dev_maps, tci, index);
+               if (!cpumask_test_cpu(cpu, mask) || !cpu_online(cpu))
+                       active |= remove_xps_queue(dev_maps, tci, index);
+               for (i = num_tc - tc, tci++; --i; tci++)
+                       active |= remove_xps_queue(dev_maps, tci, index);
        }
 
        /* free map if not active */
@@ -2158,11 +2228,14 @@ out_no_maps:
 error:
        /* remove any maps that we added */
        for_each_possible_cpu(cpu) {
-               new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]);
-               map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) :
-                                NULL;
-               if (new_map && new_map != map)
-                       kfree(new_map);
+               for (i = num_tc, tci = cpu * num_tc; i--; tci++) {
+                       new_map = xmap_dereference(new_dev_maps->cpu_map[tci]);
+                       map = dev_maps ?
+                             xmap_dereference(dev_maps->cpu_map[tci]) :
+                             NULL;
+                       if (new_map && new_map != map)
+                               kfree(new_map);
+               }
        }
 
        mutex_unlock(&xps_map_mutex);
@@ -2173,6 +2246,44 @@ error:
 EXPORT_SYMBOL(netif_set_xps_queue);
 
 #endif
+void netdev_reset_tc(struct net_device *dev)
+{
+#ifdef CONFIG_XPS
+       netif_reset_xps_queues_gt(dev, 0);
+#endif
+       dev->num_tc = 0;
+       memset(dev->tc_to_txq, 0, sizeof(dev->tc_to_txq));
+       memset(dev->prio_tc_map, 0, sizeof(dev->prio_tc_map));
+}
+EXPORT_SYMBOL(netdev_reset_tc);
+
+int netdev_set_tc_queue(struct net_device *dev, u8 tc, u16 count, u16 offset)
+{
+       if (tc >= dev->num_tc)
+               return -EINVAL;
+
+#ifdef CONFIG_XPS
+       netif_reset_xps_queues(dev, offset, count);
+#endif
+       dev->tc_to_txq[tc].count = count;
+       dev->tc_to_txq[tc].offset = offset;
+       return 0;
+}
+EXPORT_SYMBOL(netdev_set_tc_queue);
+
+int netdev_set_num_tc(struct net_device *dev, u8 num_tc)
+{
+       if (num_tc > TC_MAX_QUEUE)
+               return -EINVAL;
+
+#ifdef CONFIG_XPS
+       netif_reset_xps_queues_gt(dev, 0);
+#endif
+       dev->num_tc = num_tc;
+       return 0;
+}
+EXPORT_SYMBOL(netdev_set_num_tc);
+
 /*
  * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
  * greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
@@ -2483,7 +2594,7 @@ int skb_checksum_help(struct sk_buff *skb)
                        goto out;
        }
 
-       *(__sum16 *)(skb->data + offset) = csum_fold(csum);
+       *(__sum16 *)(skb->data + offset) = csum_fold(csum) ?: CSUM_MANGLED_0;
 out_set_summed:
        skb->ip_summed = CHECKSUM_NONE;
 out:
@@ -2899,6 +3010,7 @@ struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *d
        }
        return head;
 }
+EXPORT_SYMBOL_GPL(validate_xmit_skb_list);
 
 static void qdisc_pkt_len_init(struct sk_buff *skb)
 {
@@ -3084,8 +3196,14 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
        rcu_read_lock();
        dev_maps = rcu_dereference(dev->xps_maps);
        if (dev_maps) {
-               map = rcu_dereference(
-                   dev_maps->cpu_map[skb->sender_cpu - 1]);
+               unsigned int tci = skb->sender_cpu - 1;
+
+               if (dev->num_tc) {
+                       tci *= dev->num_tc;
+                       tci += netdev_get_prio_tc_map(dev, skb->priority);
+               }
+
+               map = rcu_dereference(dev_maps->cpu_map[tci]);
                if (map) {
                        if (map->len == 1)
                                queue_index = map->queues[0];
@@ -3709,7 +3827,7 @@ int netif_rx_ni(struct sk_buff *skb)
 }
 EXPORT_SYMBOL(netif_rx_ni);
 
-static void net_tx_action(struct softirq_action *h)
+static __latent_entropy void net_tx_action(struct softirq_action *h)
 {
        struct softnet_data *sd = this_cpu_ptr(&softnet_data);
 
@@ -4359,7 +4477,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
        if (!(skb->dev->features & NETIF_F_GRO))
                goto normal;
 
-       if (skb_is_gso(skb) || skb_has_frag_list(skb) || skb->csum_bad)
+       if (skb->csum_bad)
                goto normal;
 
        gro_list_prepare(napi, skb);
@@ -4372,9 +4490,10 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
                skb_set_network_header(skb, skb_gro_offset(skb));
                skb_reset_mac_len(skb);
                NAPI_GRO_CB(skb)->same_flow = 0;
-               NAPI_GRO_CB(skb)->flush = 0;
+               NAPI_GRO_CB(skb)->flush = skb_is_gso(skb) || skb_has_frag_list(skb);
                NAPI_GRO_CB(skb)->free = 0;
                NAPI_GRO_CB(skb)->encap_mark = 0;
+               NAPI_GRO_CB(skb)->recursion_counter = 0;
                NAPI_GRO_CB(skb)->is_fou = 0;
                NAPI_GRO_CB(skb)->is_atomic = 1;
                NAPI_GRO_CB(skb)->gro_remcsum_start = 0;
@@ -4779,26 +4898,36 @@ void __napi_schedule_irqoff(struct napi_struct *n)
 }
 EXPORT_SYMBOL(__napi_schedule_irqoff);
 
-void __napi_complete(struct napi_struct *n)
+bool __napi_complete(struct napi_struct *n)
 {
        BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
 
+       /* Some drivers call us directly, instead of calling
+        * napi_complete_done().
+        */
+       if (unlikely(test_bit(NAPI_STATE_IN_BUSY_POLL, &n->state)))
+               return false;
+
        list_del_init(&n->poll_list);
        smp_mb__before_atomic();
        clear_bit(NAPI_STATE_SCHED, &n->state);
+       return true;
 }
 EXPORT_SYMBOL(__napi_complete);
 
-void napi_complete_done(struct napi_struct *n, int work_done)
+bool napi_complete_done(struct napi_struct *n, int work_done)
 {
        unsigned long flags;
 
        /*
-        * don't let napi dequeue from the cpu poll list
-        * just in case its running on a different cpu
+        * 1) Don't let napi dequeue from the cpu poll list
+        *    just in case its running on a different cpu.
+        * 2) If we are busy polling, do nothing here, we have
+        *    the guarantee we will be called later.
         */
-       if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state)))
-               return;
+       if (unlikely(n->state & (NAPIF_STATE_NPSVC |
+                                NAPIF_STATE_IN_BUSY_POLL)))
+               return false;
 
        if (n->gro_list) {
                unsigned long timeout = 0;
@@ -4820,6 +4949,7 @@ void napi_complete_done(struct napi_struct *n, int work_done)
                __napi_complete(n);
                local_irq_restore(flags);
        }
+       return true;
 }
 EXPORT_SYMBOL(napi_complete_done);
 
@@ -4837,13 +4967,41 @@ static struct napi_struct *napi_by_id(unsigned int napi_id)
 }
 
 #if defined(CONFIG_NET_RX_BUSY_POLL)
+
 #define BUSY_POLL_BUDGET 8
+
+static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock)
+{
+       int rc;
+
+       clear_bit(NAPI_STATE_IN_BUSY_POLL, &napi->state);
+
+       local_bh_disable();
+
+       /* All we really want here is to re-enable device interrupts.
+        * Ideally, a new ndo_busy_poll_stop() could avoid another round.
+        */
+       rc = napi->poll(napi, BUSY_POLL_BUDGET);
+       netpoll_poll_unlock(have_poll_lock);
+       if (rc == BUSY_POLL_BUDGET)
+               __napi_schedule(napi);
+       local_bh_enable();
+       if (local_softirq_pending())
+               do_softirq();
+}
+
 bool sk_busy_loop(struct sock *sk, int nonblock)
 {
        unsigned long end_time = !nonblock ? sk_busy_loop_end_time(sk) : 0;
+       int (*napi_poll)(struct napi_struct *napi, int budget);
        int (*busy_poll)(struct napi_struct *dev);
+       void *have_poll_lock = NULL;
        struct napi_struct *napi;
-       int rc = false;
+       int rc;
+
+restart:
+       rc = false;
+       napi_poll = NULL;
 
        rcu_read_lock();
 
@@ -4854,24 +5012,33 @@ bool sk_busy_loop(struct sock *sk, int nonblock)
        /* Note: ndo_busy_poll method is optional in linux-4.5 */
        busy_poll = napi->dev->netdev_ops->ndo_busy_poll;
 
-       do {
+       preempt_disable();
+       for (;;) {
                rc = 0;
                local_bh_disable();
                if (busy_poll) {
                        rc = busy_poll(napi);
-               } else if (napi_schedule_prep(napi)) {
-                       void *have = netpoll_poll_lock(napi);
-
-                       if (test_bit(NAPI_STATE_SCHED, &napi->state)) {
-                               rc = napi->poll(napi, BUSY_POLL_BUDGET);
-                               trace_napi_poll(napi, rc, BUSY_POLL_BUDGET);
-                               if (rc == BUSY_POLL_BUDGET) {
-                                       napi_complete_done(napi, rc);
-                                       napi_schedule(napi);
-                               }
-                       }
-                       netpoll_poll_unlock(have);
+                       goto count;
+               }
+               if (!napi_poll) {
+                       unsigned long val = READ_ONCE(napi->state);
+
+                       /* If multiple threads are competing for this napi,
+                        * we avoid dirtying napi->state as much as we can.
+                        */
+                       if (val & (NAPIF_STATE_DISABLE | NAPIF_STATE_SCHED |
+                                  NAPIF_STATE_IN_BUSY_POLL))
+                               goto count;
+                       if (cmpxchg(&napi->state, val,
+                                   val | NAPIF_STATE_IN_BUSY_POLL |
+                                         NAPIF_STATE_SCHED) != val)
+                               goto count;
+                       have_poll_lock = netpoll_poll_lock(napi);
+                       napi_poll = napi->poll;
                }
+               rc = napi_poll(napi, BUSY_POLL_BUDGET);
+               trace_napi_poll(napi, rc, BUSY_POLL_BUDGET);
+count:
                if (rc > 0)
                        __NET_ADD_STATS(sock_net(sk),
                                        LINUX_MIB_BUSYPOLLRXPACKETS, rc);
@@ -4880,10 +5047,26 @@ bool sk_busy_loop(struct sock *sk, int nonblock)
                if (rc == LL_FLUSH_FAILED)
                        break; /* permanent failure */
 
-               cpu_relax();
-       } while (!nonblock && skb_queue_empty(&sk->sk_receive_queue) &&
-                !need_resched() && !busy_loop_timeout(end_time));
+               if (nonblock || !skb_queue_empty(&sk->sk_receive_queue) ||
+                   busy_loop_timeout(end_time))
+                       break;
 
+               if (unlikely(need_resched())) {
+                       if (napi_poll)
+                               busy_poll_stop(napi, have_poll_lock);
+                       preempt_enable();
+                       rcu_read_unlock();
+                       cond_resched();
+                       rc = !skb_queue_empty(&sk->sk_receive_queue);
+                       if (rc || busy_loop_timeout(end_time))
+                               return rc;
+                       goto restart;
+               }
+               cpu_relax_lowlatency();
+       }
+       if (napi_poll)
+               busy_poll_stop(napi, have_poll_lock);
+       preempt_enable();
        rc = !skb_queue_empty(&sk->sk_receive_queue);
 out:
        rcu_read_unlock();
@@ -4893,7 +5076,7 @@ EXPORT_SYMBOL(sk_busy_loop);
 
 #endif /* CONFIG_NET_RX_BUSY_POLL */
 
-void napi_hash_add(struct napi_struct *napi)
+static void napi_hash_add(struct napi_struct *napi)
 {
        if (test_bit(NAPI_STATE_NO_BUSY_POLL, &napi->state) ||
            test_and_set_bit(NAPI_STATE_HASHED, &napi->state))
@@ -4913,7 +5096,6 @@ void napi_hash_add(struct napi_struct *napi)
 
        spin_unlock(&napi_hash_lock);
 }
-EXPORT_SYMBOL_GPL(napi_hash_add);
 
 /* Warning : caller is responsible to make sure rcu grace period
  * is respected before freeing memory containing @napi
@@ -4961,7 +5143,6 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
        list_add(&napi->dev_list, &dev->napi_list);
        napi->dev = dev;
 #ifdef CONFIG_NETPOLL
-       spin_lock_init(&napi->poll_lock);
        napi->poll_owner = -1;
 #endif
        set_bit(NAPI_STATE_SCHED, &napi->state);
@@ -5062,7 +5243,7 @@ out_unlock:
        return work;
 }
 
-static void net_rx_action(struct softirq_action *h)
+static __latent_entropy void net_rx_action(struct softirq_action *h)
 {
        struct softnet_data *sd = this_cpu_ptr(&softnet_data);
        unsigned long time_limit = jiffies + 2;
@@ -5079,7 +5260,7 @@ static void net_rx_action(struct softirq_action *h)
 
                if (list_empty(&list)) {
                        if (!sd_has_rps_ipi_waiting(sd) && list_empty(&repoll))
-                               return;
+                               goto out;
                        break;
                }
 
@@ -5097,7 +5278,6 @@ static void net_rx_action(struct softirq_action *h)
                }
        }
 
-       __kfree_skb_flush();
        local_irq_disable();
 
        list_splice_tail_init(&sd->poll_list, &list);
@@ -5107,6 +5287,8 @@ static void net_rx_action(struct softirq_action *h)
                __raise_softirq_irqoff(NET_RX_SOFTIRQ);
 
        net_rps_action_and_irq_enable(sd);
+out:
+       __kfree_skb_flush();
 }
 
 struct netdev_adjacent {
@@ -5400,12 +5582,12 @@ static struct net_device *netdev_next_lower_dev(struct net_device *dev,
 {
        struct netdev_adjacent *lower;
 
-       lower = list_entry(*iter, struct netdev_adjacent, list);
+       lower = list_entry((*iter)->next, struct netdev_adjacent, list);
 
        if (&lower->list == &dev->adj_list.lower)
                return NULL;
 
-       *iter = lower->list.next;
+       *iter = &lower->list;
 
        return lower->dev;
 }
@@ -6510,26 +6692,42 @@ EXPORT_SYMBOL(dev_change_proto_down);
  *     dev_change_xdp_fd - set or clear a bpf program for a device rx path
  *     @dev: device
  *     @fd: new program fd or negative value to clear
+ *     @flags: xdp-related flags
  *
  *     Set or clear a bpf program for a device
  */
-int dev_change_xdp_fd(struct net_device *dev, int fd)
+int dev_change_xdp_fd(struct net_device *dev, int fd, u32 flags)
 {
        const struct net_device_ops *ops = dev->netdev_ops;
        struct bpf_prog *prog = NULL;
-       struct netdev_xdp xdp = {};
+       struct netdev_xdp xdp;
        int err;
 
+       ASSERT_RTNL();
+
        if (!ops->ndo_xdp)
                return -EOPNOTSUPP;
        if (fd >= 0) {
+               if (flags & XDP_FLAGS_UPDATE_IF_NOEXIST) {
+                       memset(&xdp, 0, sizeof(xdp));
+                       xdp.command = XDP_QUERY_PROG;
+
+                       err = ops->ndo_xdp(dev, &xdp);
+                       if (err < 0)
+                               return err;
+                       if (xdp.prog_attached)
+                               return -EBUSY;
+               }
+
                prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP);
                if (IS_ERR(prog))
                        return PTR_ERR(prog);
        }
 
+       memset(&xdp, 0, sizeof(xdp));
        xdp.command = XDP_SETUP_PROG;
        xdp.prog = prog;
+
        err = ops->ndo_xdp(dev, &xdp);
        if (err < 0 && prog)
                bpf_prog_put(prog);
@@ -7527,7 +7725,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 
        if (!dev->tx_queue_len) {
                dev->priv_flags |= IFF_NO_QUEUE;
-               dev->tx_queue_len = 1;
+               dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN;
        }
 
        dev->num_tx_queues = txqs;
index d2fd736de6a29f8cbf77da29ba5162245f919ce5..2b5bf9efa720a9feed9d2f187dda6ab5bf59cda0 100644 (file)
@@ -341,15 +341,7 @@ static void devlink_nl_post_doit(const struct genl_ops *ops,
        mutex_unlock(&devlink_mutex);
 }
 
-static struct genl_family devlink_nl_family = {
-       .id             = GENL_ID_GENERATE,
-       .name           = DEVLINK_GENL_NAME,
-       .version        = DEVLINK_GENL_VERSION,
-       .maxattr        = DEVLINK_ATTR_MAX,
-       .netnsok        = true,
-       .pre_doit       = devlink_nl_pre_doit,
-       .post_doit      = devlink_nl_post_doit,
-};
+static struct genl_family devlink_nl_family;
 
 enum devlink_multicast_groups {
        DEVLINK_MCGRP_CONFIG,
@@ -1402,26 +1394,45 @@ static int devlink_nl_cmd_sb_occ_max_clear_doit(struct sk_buff *skb,
 
 static int devlink_eswitch_fill(struct sk_buff *msg, struct devlink *devlink,
                                enum devlink_command cmd, u32 portid,
-                               u32 seq, int flags, u16 mode)
+                               u32 seq, int flags)
 {
+       const struct devlink_ops *ops = devlink->ops;
        void *hdr;
+       int err = 0;
+       u16 mode;
+       u8 inline_mode;
 
        hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
        if (!hdr)
                return -EMSGSIZE;
 
-       if (devlink_nl_put_handle(msg, devlink))
-               goto nla_put_failure;
+       err = devlink_nl_put_handle(msg, devlink);
+       if (err)
+               goto out;
 
-       if (nla_put_u16(msg, DEVLINK_ATTR_ESWITCH_MODE, mode))
-               goto nla_put_failure;
+       err = ops->eswitch_mode_get(devlink, &mode);
+       if (err)
+               goto out;
+       err = nla_put_u16(msg, DEVLINK_ATTR_ESWITCH_MODE, mode);
+       if (err)
+               goto out;
+
+       if (ops->eswitch_inline_mode_get) {
+               err = ops->eswitch_inline_mode_get(devlink, &inline_mode);
+               if (err)
+                       goto out;
+               err = nla_put_u8(msg, DEVLINK_ATTR_ESWITCH_INLINE_MODE,
+                                inline_mode);
+               if (err)
+                       goto out;
+       }
 
        genlmsg_end(msg, hdr);
        return 0;
 
-nla_put_failure:
+out:
        genlmsg_cancel(msg, hdr);
-       return -EMSGSIZE;
+       return err;
 }
 
 static int devlink_nl_cmd_eswitch_mode_get_doit(struct sk_buff *skb,
@@ -1430,22 +1441,17 @@ static int devlink_nl_cmd_eswitch_mode_get_doit(struct sk_buff *skb,
        struct devlink *devlink = info->user_ptr[0];
        const struct devlink_ops *ops = devlink->ops;
        struct sk_buff *msg;
-       u16 mode;
        int err;
 
        if (!ops || !ops->eswitch_mode_get)
                return -EOPNOTSUPP;
 
-       err = ops->eswitch_mode_get(devlink, &mode);
-       if (err)
-               return err;
-
        msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
        if (!msg)
                return -ENOMEM;
 
        err = devlink_eswitch_fill(msg, devlink, DEVLINK_CMD_ESWITCH_MODE_GET,
-                                  info->snd_portid, info->snd_seq, 0, mode);
+                                  info->snd_portid, info->snd_seq, 0);
 
        if (err) {
                nlmsg_free(msg);
@@ -1461,15 +1467,32 @@ static int devlink_nl_cmd_eswitch_mode_set_doit(struct sk_buff *skb,
        struct devlink *devlink = info->user_ptr[0];
        const struct devlink_ops *ops = devlink->ops;
        u16 mode;
+       u8 inline_mode;
+       int err = 0;
 
-       if (!info->attrs[DEVLINK_ATTR_ESWITCH_MODE])
-               return -EINVAL;
+       if (!ops)
+               return -EOPNOTSUPP;
 
-       mode = nla_get_u16(info->attrs[DEVLINK_ATTR_ESWITCH_MODE]);
+       if (info->attrs[DEVLINK_ATTR_ESWITCH_MODE]) {
+               if (!ops->eswitch_mode_set)
+                       return -EOPNOTSUPP;
+               mode = nla_get_u16(info->attrs[DEVLINK_ATTR_ESWITCH_MODE]);
+               err = ops->eswitch_mode_set(devlink, mode);
+               if (err)
+                       return err;
+       }
 
-       if (ops && ops->eswitch_mode_set)
-               return ops->eswitch_mode_set(devlink, mode);
-       return -EOPNOTSUPP;
+       if (info->attrs[DEVLINK_ATTR_ESWITCH_INLINE_MODE]) {
+               if (!ops->eswitch_inline_mode_set)
+                       return -EOPNOTSUPP;
+               inline_mode = nla_get_u8(
+                               info->attrs[DEVLINK_ATTR_ESWITCH_INLINE_MODE]);
+               err = ops->eswitch_inline_mode_set(devlink, inline_mode);
+               if (err)
+                       return err;
+       }
+
+       return 0;
 }
 
 static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
@@ -1486,6 +1509,7 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
        [DEVLINK_ATTR_SB_THRESHOLD] = { .type = NLA_U32 },
        [DEVLINK_ATTR_SB_TC_INDEX] = { .type = NLA_U16 },
        [DEVLINK_ATTR_ESWITCH_MODE] = { .type = NLA_U16 },
+       [DEVLINK_ATTR_ESWITCH_INLINE_MODE] = { .type = NLA_U8 },
 };
 
 static const struct genl_ops devlink_nl_ops[] = {
@@ -1620,6 +1644,20 @@ static const struct genl_ops devlink_nl_ops[] = {
        },
 };
 
+static struct genl_family devlink_nl_family __ro_after_init = {
+       .name           = DEVLINK_GENL_NAME,
+       .version        = DEVLINK_GENL_VERSION,
+       .maxattr        = DEVLINK_ATTR_MAX,
+       .netnsok        = true,
+       .pre_doit       = devlink_nl_pre_doit,
+       .post_doit      = devlink_nl_post_doit,
+       .module         = THIS_MODULE,
+       .ops            = devlink_nl_ops,
+       .n_ops          = ARRAY_SIZE(devlink_nl_ops),
+       .mcgrps         = devlink_nl_mcgrps,
+       .n_mcgrps       = ARRAY_SIZE(devlink_nl_mcgrps),
+};
+
 /**
  *     devlink_alloc - Allocate new devlink instance resources
  *
@@ -1842,9 +1880,7 @@ EXPORT_SYMBOL_GPL(devlink_sb_unregister);
 
 static int __init devlink_module_init(void)
 {
-       return genl_register_family_with_ops_groups(&devlink_nl_family,
-                                                   devlink_nl_ops,
-                                                   devlink_nl_mcgrps);
+       return genl_register_family(&devlink_nl_family);
 }
 
 static void __exit devlink_module_exit(void)
index 72cfb0c611254cbe48885e4d3a7b497e91d94fed..8e0c0635ee975e1c71a126677f63ff220f7f0c44 100644 (file)
@@ -59,12 +59,7 @@ struct dm_hw_stat_delta {
        unsigned long last_drop_val;
 };
 
-static struct genl_family net_drop_monitor_family = {
-       .id             = GENL_ID_GENERATE,
-       .hdrsize        = 0,
-       .name           = "NET_DM",
-       .version        = 2,
-};
+static struct genl_family net_drop_monitor_family;
 
 static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_cpu_data);
 
@@ -351,6 +346,17 @@ static const struct genl_ops dropmon_ops[] = {
        },
 };
 
+static struct genl_family net_drop_monitor_family __ro_after_init = {
+       .hdrsize        = 0,
+       .name           = "NET_DM",
+       .version        = 2,
+       .module         = THIS_MODULE,
+       .ops            = dropmon_ops,
+       .n_ops          = ARRAY_SIZE(dropmon_ops),
+       .mcgrps         = dropmon_mcgrps,
+       .n_mcgrps       = ARRAY_SIZE(dropmon_mcgrps),
+};
+
 static struct notifier_block dropmon_net_notifier = {
        .notifier_call = dropmon_net_event
 };
@@ -367,8 +373,7 @@ static int __init init_net_drop_monitor(void)
                return -ENOSPC;
        }
 
-       rc = genl_register_family_with_ops_groups(&net_drop_monitor_family,
-                                                 dropmon_ops, dropmon_mcgrps);
+       rc = genl_register_family(&net_drop_monitor_family);
        if (rc) {
                pr_err("Could not create drop monitor netlink family\n");
                return rc;
index 977489820eb957098705c9ea1674ed3092a6cfe6..e23766c7e3ba19414494d242af86c1029e8eee61 100644 (file)
@@ -119,6 +119,12 @@ tunable_strings[__ETHTOOL_TUNABLE_COUNT][ETH_GSTRING_LEN] = {
        [ETHTOOL_TX_COPYBREAK]  = "tx-copybreak",
 };
 
+static const char
+phy_tunable_strings[__ETHTOOL_PHY_TUNABLE_COUNT][ETH_GSTRING_LEN] = {
+       [ETHTOOL_ID_UNSPEC]     = "Unspec",
+       [ETHTOOL_PHY_DOWNSHIFT] = "phy-downshift",
+};
+
 static int ethtool_get_features(struct net_device *dev, void __user *useraddr)
 {
        struct ethtool_gfeatures cmd = {
@@ -227,6 +233,9 @@ static int __ethtool_get_sset_count(struct net_device *dev, int sset)
        if (sset == ETH_SS_TUNABLES)
                return ARRAY_SIZE(tunable_strings);
 
+       if (sset == ETH_SS_PHY_TUNABLES)
+               return ARRAY_SIZE(phy_tunable_strings);
+
        if (sset == ETH_SS_PHY_STATS) {
                if (dev->phydev)
                        return phy_get_sset_count(dev->phydev);
@@ -253,6 +262,8 @@ static void __ethtool_get_strings(struct net_device *dev,
                       sizeof(rss_hash_func_strings));
        else if (stringset == ETH_SS_TUNABLES)
                memcpy(data, tunable_strings, sizeof(tunable_strings));
+       else if (stringset == ETH_SS_PHY_TUNABLES)
+               memcpy(data, phy_tunable_strings, sizeof(phy_tunable_strings));
        else if (stringset == ETH_SS_PHY_STATS) {
                struct phy_device *phydev = dev->phydev;
 
@@ -2422,6 +2433,85 @@ static int ethtool_set_per_queue(struct net_device *dev, void __user *useraddr)
        };
 }
 
+static int ethtool_phy_tunable_valid(const struct ethtool_tunable *tuna)
+{
+       switch (tuna->id) {
+       case ETHTOOL_PHY_DOWNSHIFT:
+               if (tuna->len != sizeof(u8) ||
+                   tuna->type_id != ETHTOOL_TUNABLE_U8)
+                       return -EINVAL;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static int get_phy_tunable(struct net_device *dev, void __user *useraddr)
+{
+       int ret;
+       struct ethtool_tunable tuna;
+       struct phy_device *phydev = dev->phydev;
+       void *data;
+
+       if (!(phydev && phydev->drv && phydev->drv->get_tunable))
+               return -EOPNOTSUPP;
+
+       if (copy_from_user(&tuna, useraddr, sizeof(tuna)))
+               return -EFAULT;
+       ret = ethtool_phy_tunable_valid(&tuna);
+       if (ret)
+               return ret;
+       data = kmalloc(tuna.len, GFP_USER);
+       if (!data)
+               return -ENOMEM;
+       mutex_lock(&phydev->lock);
+       ret = phydev->drv->get_tunable(phydev, &tuna, data);
+       mutex_unlock(&phydev->lock);
+       if (ret)
+               goto out;
+       useraddr += sizeof(tuna);
+       ret = -EFAULT;
+       if (copy_to_user(useraddr, data, tuna.len))
+               goto out;
+       ret = 0;
+
+out:
+       kfree(data);
+       return ret;
+}
+
+static int set_phy_tunable(struct net_device *dev, void __user *useraddr)
+{
+       int ret;
+       struct ethtool_tunable tuna;
+       struct phy_device *phydev = dev->phydev;
+       void *data;
+
+       if (!(phydev && phydev->drv && phydev->drv->set_tunable))
+               return -EOPNOTSUPP;
+       if (copy_from_user(&tuna, useraddr, sizeof(tuna)))
+               return -EFAULT;
+       ret = ethtool_phy_tunable_valid(&tuna);
+       if (ret)
+               return ret;
+       data = kmalloc(tuna.len, GFP_USER);
+       if (!data)
+               return -ENOMEM;
+       useraddr += sizeof(tuna);
+       ret = -EFAULT;
+       if (copy_from_user(data, useraddr, tuna.len))
+               goto out;
+       mutex_lock(&phydev->lock);
+       ret = phydev->drv->set_tunable(phydev, &tuna, data);
+       mutex_unlock(&phydev->lock);
+
+out:
+       kfree(data);
+       return ret;
+}
+
 /* The main entry point in this file.  Called from net/core/dev_ioctl.c */
 
 int dev_ethtool(struct net *net, struct ifreq *ifr)
@@ -2479,6 +2569,8 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
        case ETHTOOL_GET_TS_INFO:
        case ETHTOOL_GEEE:
        case ETHTOOL_GTUNABLE:
+       case ETHTOOL_PHY_GTUNABLE:
+       case ETHTOOL_GLINKSETTINGS:
                break;
        default:
                if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
@@ -2684,6 +2776,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
        case ETHTOOL_SLINKSETTINGS:
                rc = ethtool_set_link_ksettings(dev, useraddr);
                break;
+       case ETHTOOL_PHY_GTUNABLE:
+               rc = get_phy_tunable(dev, useraddr);
+               break;
+       case ETHTOOL_PHY_STUNABLE:
+               rc = set_phy_tunable(dev, useraddr);
+               break;
        default:
                rc = -EOPNOTSUPP;
        }
index be4629c344a6de30e701be9e5efb606a7e662cd6..b6791d94841d56cf8b1027d3ba2d71dd21302caf 100644 (file)
 #include <net/fib_rules.h>
 #include <net/ip_tunnels.h>
 
+static const struct fib_kuid_range fib_kuid_range_unset = {
+       KUIDT_INIT(0),
+       KUIDT_INIT(~0),
+};
+
 int fib_default_rule_add(struct fib_rules_ops *ops,
                         u32 pref, u32 table, u32 flags)
 {
@@ -33,6 +38,7 @@ int fib_default_rule_add(struct fib_rules_ops *ops,
        r->table = table;
        r->flags = flags;
        r->fr_net = ops->fro_net;
+       r->uid_range = fib_kuid_range_unset;
 
        r->suppress_prefixlen = -1;
        r->suppress_ifgroup = -1;
@@ -172,6 +178,34 @@ void fib_rules_unregister(struct fib_rules_ops *ops)
 }
 EXPORT_SYMBOL_GPL(fib_rules_unregister);
 
+static int uid_range_set(struct fib_kuid_range *range)
+{
+       return uid_valid(range->start) && uid_valid(range->end);
+}
+
+static struct fib_kuid_range nla_get_kuid_range(struct nlattr **tb)
+{
+       struct fib_rule_uid_range *in;
+       struct fib_kuid_range out;
+
+       in = (struct fib_rule_uid_range *)nla_data(tb[FRA_UID_RANGE]);
+
+       out.start = make_kuid(current_user_ns(), in->start);
+       out.end = make_kuid(current_user_ns(), in->end);
+
+       return out;
+}
+
+static int nla_put_uid_range(struct sk_buff *skb, struct fib_kuid_range *range)
+{
+       struct fib_rule_uid_range out = {
+               from_kuid_munged(current_user_ns(), range->start),
+               from_kuid_munged(current_user_ns(), range->end)
+       };
+
+       return nla_put(skb, FRA_UID_RANGE, sizeof(out), &out);
+}
+
 static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
                          struct flowi *fl, int flags,
                          struct fib_lookup_arg *arg)
@@ -193,6 +227,10 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
        if (rule->l3mdev && !l3mdev_fib_rule_match(rule->fr_net, fl, arg))
                goto out;
 
+       if (uid_lt(fl->flowi_uid, rule->uid_range.start) ||
+           uid_gt(fl->flowi_uid, rule->uid_range.end))
+               goto out;
+
        ret = ops->match(rule, fl, flags);
 out:
        return (rule->flags & FIB_RULE_INVERT) ? !ret : ret;
@@ -305,6 +343,10 @@ static int rule_exists(struct fib_rules_ops *ops, struct fib_rule_hdr *frh,
                if (r->l3mdev != rule->l3mdev)
                        continue;
 
+               if (!uid_eq(r->uid_range.start, rule->uid_range.start) ||
+                   !uid_eq(r->uid_range.end, rule->uid_range.end))
+                       continue;
+
                if (!ops->compare(r, frh, tb))
                        continue;
                return 1;
@@ -429,6 +471,21 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh)
        if (rule->l3mdev && rule->table)
                goto errout_free;
 
+       if (tb[FRA_UID_RANGE]) {
+               if (current_user_ns() != net->user_ns) {
+                       err = -EPERM;
+                       goto errout_free;
+               }
+
+               rule->uid_range = nla_get_kuid_range(tb);
+
+               if (!uid_range_set(&rule->uid_range) ||
+                   !uid_lte(rule->uid_range.start, rule->uid_range.end))
+                       goto errout_free;
+       } else {
+               rule->uid_range = fib_kuid_range_unset;
+       }
+
        if ((nlh->nlmsg_flags & NLM_F_EXCL) &&
            rule_exists(ops, frh, tb, rule)) {
                err = -EEXIST;
@@ -497,6 +554,7 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh)
        struct fib_rules_ops *ops = NULL;
        struct fib_rule *rule, *tmp;
        struct nlattr *tb[FRA_MAX+1];
+       struct fib_kuid_range range;
        int err = -EINVAL;
 
        if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh)))
@@ -516,6 +574,14 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh)
        if (err < 0)
                goto errout;
 
+       if (tb[FRA_UID_RANGE]) {
+               range = nla_get_kuid_range(tb);
+               if (!uid_range_set(&range))
+                       goto errout;
+       } else {
+               range = fib_kuid_range_unset;
+       }
+
        list_for_each_entry(rule, &ops->rules_list, list) {
                if (frh->action && (frh->action != rule->action))
                        continue;
@@ -552,6 +618,11 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh)
                    (rule->l3mdev != nla_get_u8(tb[FRA_L3MDEV])))
                        continue;
 
+               if (uid_range_set(&range) &&
+                   (!uid_eq(rule->uid_range.start, range.start) ||
+                    !uid_eq(rule->uid_range.end, range.end)))
+                       continue;
+
                if (!ops->compare(rule, frh, tb))
                        continue;
 
@@ -619,7 +690,8 @@ static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops,
                         + nla_total_size(4) /* FRA_SUPPRESS_IFGROUP */
                         + nla_total_size(4) /* FRA_FWMARK */
                         + nla_total_size(4) /* FRA_FWMASK */
-                        + nla_total_size_64bit(8); /* FRA_TUN_ID */
+                        + nla_total_size_64bit(8) /* FRA_TUN_ID */
+                        + nla_total_size(sizeof(struct fib_kuid_range));
 
        if (ops->nlmsg_payload)
                payload += ops->nlmsg_payload(rule);
@@ -679,7 +751,9 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
            (rule->tun_id &&
             nla_put_be64(skb, FRA_TUN_ID, rule->tun_id, FRA_PAD)) ||
            (rule->l3mdev &&
-            nla_put_u8(skb, FRA_L3MDEV, rule->l3mdev)))
+            nla_put_u8(skb, FRA_L3MDEV, rule->l3mdev)) ||
+           (uid_range_set(&rule->uid_range) &&
+            nla_put_uid_range(skb, &rule->uid_range)))
                goto nla_put_failure;
 
        if (rule->suppress_ifgroup != -1) {
index cd9e2ba66b0e0bc32ba8b3f6fba701aa307ce669..56b43587d2006c400af06df00f221296dd8d1f13 100644 (file)
@@ -30,6 +30,7 @@
 #include <linux/inet.h>
 #include <linux/netdevice.h>
 #include <linux/if_packet.h>
+#include <linux/if_arp.h>
 #include <linux/gfp.h>
 #include <net/ip.h>
 #include <net/protocol.h>
@@ -78,6 +79,10 @@ int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap)
        if (skb_pfmemalloc(skb) && !sock_flag(sk, SOCK_MEMALLOC))
                return -ENOMEM;
 
+       err = BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb);
+       if (err)
+               return err;
+
        err = security_sock_rcv_skb(sk, skb);
        if (err)
                return err;
@@ -1628,6 +1633,19 @@ static inline int __bpf_rx_skb(struct net_device *dev, struct sk_buff *skb)
        return dev_forward_skb(dev, skb);
 }
 
+static inline int __bpf_rx_skb_no_mac(struct net_device *dev,
+                                     struct sk_buff *skb)
+{
+       int ret = ____dev_forward_skb(dev, skb);
+
+       if (likely(!ret)) {
+               skb->dev = dev;
+               ret = netif_rx(skb);
+       }
+
+       return ret;
+}
+
 static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb)
 {
        int ret;
@@ -1647,6 +1665,50 @@ static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb)
        return ret;
 }
 
+static int __bpf_redirect_no_mac(struct sk_buff *skb, struct net_device *dev,
+                                u32 flags)
+{
+       /* skb->mac_len is not set on normal egress */
+       unsigned int mlen = skb->network_header - skb->mac_header;
+
+       __skb_pull(skb, mlen);
+
+       /* At ingress, the mac header has already been pulled once.
+        * At egress, skb_pospull_rcsum has to be done in case that
+        * the skb is originated from ingress (i.e. a forwarded skb)
+        * to ensure that rcsum starts at net header.
+        */
+       if (!skb_at_tc_ingress(skb))
+               skb_postpull_rcsum(skb, skb_mac_header(skb), mlen);
+       skb_pop_mac_header(skb);
+       skb_reset_mac_len(skb);
+       return flags & BPF_F_INGRESS ?
+              __bpf_rx_skb_no_mac(dev, skb) : __bpf_tx_skb(dev, skb);
+}
+
+static int __bpf_redirect_common(struct sk_buff *skb, struct net_device *dev,
+                                u32 flags)
+{
+       /* Verify that a link layer header is carried */
+       if (unlikely(skb->mac_header >= skb->network_header)) {
+               kfree_skb(skb);
+               return -ERANGE;
+       }
+
+       bpf_push_mac_rcsum(skb);
+       return flags & BPF_F_INGRESS ?
+              __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb);
+}
+
+static int __bpf_redirect(struct sk_buff *skb, struct net_device *dev,
+                         u32 flags)
+{
+       if (dev_is_mac_header_xmit(dev))
+               return __bpf_redirect_common(skb, dev, flags);
+       else
+               return __bpf_redirect_no_mac(skb, dev, flags);
+}
+
 BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags)
 {
        struct net_device *dev;
@@ -1675,10 +1737,7 @@ BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags)
                return -ENOMEM;
        }
 
-       bpf_push_mac_rcsum(clone);
-
-       return flags & BPF_F_INGRESS ?
-              __bpf_rx_skb(dev, clone) : __bpf_tx_skb(dev, clone);
+       return __bpf_redirect(clone, dev, flags);
 }
 
 static const struct bpf_func_proto bpf_clone_redirect_proto = {
@@ -1722,10 +1781,7 @@ int skb_do_redirect(struct sk_buff *skb)
                return -EINVAL;
        }
 
-       bpf_push_mac_rcsum(skb);
-
-       return ri->flags & BPF_F_INGRESS ?
-              __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb);
+       return __bpf_redirect(skb, dev, ri->flags);
 }
 
 static const struct bpf_func_proto bpf_redirect_proto = {
@@ -2138,12 +2194,53 @@ static const struct bpf_func_proto bpf_skb_change_tail_proto = {
        .arg3_type      = ARG_ANYTHING,
 };
 
+BPF_CALL_3(bpf_skb_change_head, struct sk_buff *, skb, u32, head_room,
+          u64, flags)
+{
+       u32 max_len = __bpf_skb_max_len(skb);
+       u32 new_len = skb->len + head_room;
+       int ret;
+
+       if (unlikely(flags || (!skb_is_gso(skb) && new_len > max_len) ||
+                    new_len < skb->len))
+               return -EINVAL;
+
+       ret = skb_cow(skb, head_room);
+       if (likely(!ret)) {
+               /* Idea for this helper is that we currently only
+                * allow to expand on mac header. This means that
+                * skb->protocol network header, etc, stay as is.
+                * Compared to bpf_skb_change_tail(), we're more
+                * flexible due to not needing to linearize or
+                * reset GSO. Intention for this helper is to be
+                * used by an L3 skb that needs to push mac header
+                * for redirection into L2 device.
+                */
+               __skb_push(skb, head_room);
+               memset(skb->data, 0, head_room);
+               skb_reset_mac_header(skb);
+       }
+
+       bpf_compute_data_end(skb);
+       return 0;
+}
+
+static const struct bpf_func_proto bpf_skb_change_head_proto = {
+       .func           = bpf_skb_change_head,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+       .arg2_type      = ARG_ANYTHING,
+       .arg3_type      = ARG_ANYTHING,
+};
+
 bool bpf_helper_changes_skb_data(void *func)
 {
        if (func == bpf_skb_vlan_push ||
            func == bpf_skb_vlan_pop ||
            func == bpf_skb_store_bytes ||
            func == bpf_skb_change_proto ||
+           func == bpf_skb_change_head ||
            func == bpf_skb_change_tail ||
            func == bpf_skb_pull_data ||
            func == bpf_l3_csum_replace ||
@@ -2578,6 +2675,79 @@ xdp_func_proto(enum bpf_func_id func_id)
        }
 }
 
+static const struct bpf_func_proto *
+cg_skb_func_proto(enum bpf_func_id func_id)
+{
+       switch (func_id) {
+       case BPF_FUNC_skb_load_bytes:
+               return &bpf_skb_load_bytes_proto;
+       default:
+               return sk_filter_func_proto(func_id);
+       }
+}
+
+static const struct bpf_func_proto *
+lwt_inout_func_proto(enum bpf_func_id func_id)
+{
+       switch (func_id) {
+       case BPF_FUNC_skb_load_bytes:
+               return &bpf_skb_load_bytes_proto;
+       case BPF_FUNC_skb_pull_data:
+               return &bpf_skb_pull_data_proto;
+       case BPF_FUNC_csum_diff:
+               return &bpf_csum_diff_proto;
+       case BPF_FUNC_get_cgroup_classid:
+               return &bpf_get_cgroup_classid_proto;
+       case BPF_FUNC_get_route_realm:
+               return &bpf_get_route_realm_proto;
+       case BPF_FUNC_get_hash_recalc:
+               return &bpf_get_hash_recalc_proto;
+       case BPF_FUNC_perf_event_output:
+               return &bpf_skb_event_output_proto;
+       case BPF_FUNC_get_smp_processor_id:
+               return &bpf_get_smp_processor_id_proto;
+       case BPF_FUNC_skb_under_cgroup:
+               return &bpf_skb_under_cgroup_proto;
+       default:
+               return sk_filter_func_proto(func_id);
+       }
+}
+
+static const struct bpf_func_proto *
+lwt_xmit_func_proto(enum bpf_func_id func_id)
+{
+       switch (func_id) {
+       case BPF_FUNC_skb_get_tunnel_key:
+               return &bpf_skb_get_tunnel_key_proto;
+       case BPF_FUNC_skb_set_tunnel_key:
+               return bpf_get_skb_set_tunnel_proto(func_id);
+       case BPF_FUNC_skb_get_tunnel_opt:
+               return &bpf_skb_get_tunnel_opt_proto;
+       case BPF_FUNC_skb_set_tunnel_opt:
+               return bpf_get_skb_set_tunnel_proto(func_id);
+       case BPF_FUNC_redirect:
+               return &bpf_redirect_proto;
+       case BPF_FUNC_clone_redirect:
+               return &bpf_clone_redirect_proto;
+       case BPF_FUNC_skb_change_tail:
+               return &bpf_skb_change_tail_proto;
+       case BPF_FUNC_skb_change_head:
+               return &bpf_skb_change_head_proto;
+       case BPF_FUNC_skb_store_bytes:
+               return &bpf_skb_store_bytes_proto;
+       case BPF_FUNC_csum_update:
+               return &bpf_csum_update_proto;
+       case BPF_FUNC_l3_csum_replace:
+               return &bpf_l3_csum_replace_proto;
+       case BPF_FUNC_l4_csum_replace:
+               return &bpf_l4_csum_replace_proto;
+       case BPF_FUNC_set_hash_invalid:
+               return &bpf_set_hash_invalid_proto;
+       default:
+               return lwt_inout_func_proto(func_id);
+       }
+}
+
 static bool __is_valid_access(int off, int size, enum bpf_access_type type)
 {
        if (off < 0 || off >= sizeof(struct __sk_buff))
@@ -2615,6 +2785,65 @@ static bool sk_filter_is_valid_access(int off, int size,
        return __is_valid_access(off, size, type);
 }
 
+static bool lwt_is_valid_access(int off, int size,
+                               enum bpf_access_type type,
+                               enum bpf_reg_type *reg_type)
+{
+       switch (off) {
+       case offsetof(struct __sk_buff, tc_classid):
+               return false;
+       }
+
+       if (type == BPF_WRITE) {
+               switch (off) {
+               case offsetof(struct __sk_buff, mark):
+               case offsetof(struct __sk_buff, priority):
+               case offsetof(struct __sk_buff, cb[0]) ...
+                    offsetof(struct __sk_buff, cb[4]):
+                       break;
+               default:
+                       return false;
+               }
+       }
+
+       switch (off) {
+       case offsetof(struct __sk_buff, data):
+               *reg_type = PTR_TO_PACKET;
+               break;
+       case offsetof(struct __sk_buff, data_end):
+               *reg_type = PTR_TO_PACKET_END;
+               break;
+       }
+
+       return __is_valid_access(off, size, type);
+}
+
+static bool sock_filter_is_valid_access(int off, int size,
+                                       enum bpf_access_type type,
+                                       enum bpf_reg_type *reg_type)
+{
+       if (type == BPF_WRITE) {
+               switch (off) {
+               case offsetof(struct bpf_sock, bound_dev_if):
+                       break;
+               default:
+                       return false;
+               }
+       }
+
+       if (off < 0 || off + size > sizeof(struct bpf_sock))
+               return false;
+
+       /* The verifier guarantees that size > 0. */
+       if (off % size != 0)
+               return false;
+
+       if (size != sizeof(__u32))
+               return false;
+
+       return true;
+}
+
 static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write,
                               const struct bpf_prog *prog)
 {
@@ -2873,6 +3102,51 @@ static u32 sk_filter_convert_ctx_access(enum bpf_access_type type, int dst_reg,
        return insn - insn_buf;
 }
 
+static u32 sock_filter_convert_ctx_access(enum bpf_access_type type,
+                                         int dst_reg, int src_reg,
+                                         int ctx_off,
+                                         struct bpf_insn *insn_buf,
+                                         struct bpf_prog *prog)
+{
+       struct bpf_insn *insn = insn_buf;
+
+       switch (ctx_off) {
+       case offsetof(struct bpf_sock, bound_dev_if):
+               BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_bound_dev_if) != 4);
+
+               if (type == BPF_WRITE)
+                       *insn++ = BPF_STX_MEM(BPF_W, dst_reg, src_reg,
+                                       offsetof(struct sock, sk_bound_dev_if));
+               else
+                       *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
+                                     offsetof(struct sock, sk_bound_dev_if));
+               break;
+
+       case offsetof(struct bpf_sock, family):
+               BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_family) != 2);
+
+               *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
+                                     offsetof(struct sock, sk_family));
+               break;
+
+       case offsetof(struct bpf_sock, type):
+               *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
+                                     offsetof(struct sock, __sk_flags_offset));
+               *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, SK_FL_TYPE_MASK);
+               *insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, SK_FL_TYPE_SHIFT);
+               break;
+
+       case offsetof(struct bpf_sock, protocol):
+               *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
+                                     offsetof(struct sock, __sk_flags_offset));
+               *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, SK_FL_PROTO_MASK);
+               *insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, SK_FL_PROTO_SHIFT);
+               break;
+       }
+
+       return insn - insn_buf;
+}
+
 static u32 tc_cls_act_convert_ctx_access(enum bpf_access_type type, int dst_reg,
                                         int src_reg, int ctx_off,
                                         struct bpf_insn *insn_buf,
@@ -2940,6 +3214,31 @@ static const struct bpf_verifier_ops xdp_ops = {
        .convert_ctx_access     = xdp_convert_ctx_access,
 };
 
+static const struct bpf_verifier_ops cg_skb_ops = {
+       .get_func_proto         = cg_skb_func_proto,
+       .is_valid_access        = sk_filter_is_valid_access,
+       .convert_ctx_access     = sk_filter_convert_ctx_access,
+};
+
+static const struct bpf_verifier_ops lwt_inout_ops = {
+       .get_func_proto         = lwt_inout_func_proto,
+       .is_valid_access        = lwt_is_valid_access,
+       .convert_ctx_access     = sk_filter_convert_ctx_access,
+};
+
+static const struct bpf_verifier_ops lwt_xmit_ops = {
+       .get_func_proto         = lwt_xmit_func_proto,
+       .is_valid_access        = lwt_is_valid_access,
+       .convert_ctx_access     = sk_filter_convert_ctx_access,
+       .gen_prologue           = tc_cls_act_prologue,
+};
+
+static const struct bpf_verifier_ops cg_sock_ops = {
+       .get_func_proto         = sk_filter_func_proto,
+       .is_valid_access        = sock_filter_is_valid_access,
+       .convert_ctx_access     = sock_filter_convert_ctx_access,
+};
+
 static struct bpf_prog_type_list sk_filter_type __read_mostly = {
        .ops    = &sk_filter_ops,
        .type   = BPF_PROG_TYPE_SOCKET_FILTER,
@@ -2960,12 +3259,42 @@ static struct bpf_prog_type_list xdp_type __read_mostly = {
        .type   = BPF_PROG_TYPE_XDP,
 };
 
+static struct bpf_prog_type_list cg_skb_type __read_mostly = {
+       .ops    = &cg_skb_ops,
+       .type   = BPF_PROG_TYPE_CGROUP_SKB,
+};
+
+static struct bpf_prog_type_list lwt_in_type __read_mostly = {
+       .ops    = &lwt_inout_ops,
+       .type   = BPF_PROG_TYPE_LWT_IN,
+};
+
+static struct bpf_prog_type_list lwt_out_type __read_mostly = {
+       .ops    = &lwt_inout_ops,
+       .type   = BPF_PROG_TYPE_LWT_OUT,
+};
+
+static struct bpf_prog_type_list lwt_xmit_type __read_mostly = {
+       .ops    = &lwt_xmit_ops,
+       .type   = BPF_PROG_TYPE_LWT_XMIT,
+};
+
+static struct bpf_prog_type_list cg_sock_type __read_mostly = {
+       .ops    = &cg_sock_ops,
+       .type   = BPF_PROG_TYPE_CGROUP_SOCK
+};
+
 static int __init register_sk_filter_ops(void)
 {
        bpf_register_prog_type(&sk_filter_type);
        bpf_register_prog_type(&sched_cls_type);
        bpf_register_prog_type(&sched_act_type);
        bpf_register_prog_type(&xdp_type);
+       bpf_register_prog_type(&cg_skb_type);
+       bpf_register_prog_type(&cg_sock_type);
+       bpf_register_prog_type(&lwt_in_type);
+       bpf_register_prog_type(&lwt_out_type);
+       bpf_register_prog_type(&lwt_xmit_type);
 
        return 0;
 }
index 1a7b80f733764770c3ac44853314eec2837c0abd..1eb6f949e5b2f8d55263fff361bcd758a3a12afb 100644 (file)
@@ -122,7 +122,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
        struct flow_dissector_key_keyid *key_keyid;
        bool skip_vlan = false;
        u8 ip_proto = 0;
-       bool ret = false;
+       bool ret;
 
        if (!data) {
                data = skb->data;
@@ -246,15 +246,13 @@ ipv6:
        case htons(ETH_P_8021AD):
        case htons(ETH_P_8021Q): {
                const struct vlan_hdr *vlan;
+               struct vlan_hdr _vlan;
+               bool vlan_tag_present = skb && skb_vlan_tag_present(skb);
 
-               if (skb_vlan_tag_present(skb))
+               if (vlan_tag_present)
                        proto = skb->protocol;
 
-               if (!skb_vlan_tag_present(skb) ||
-                   proto == cpu_to_be16(ETH_P_8021Q) ||
-                   proto == cpu_to_be16(ETH_P_8021AD)) {
-                       struct vlan_hdr _vlan;
-
+               if (!vlan_tag_present || eth_type_vlan(skb->protocol)) {
                        vlan = __skb_header_pointer(skb, nhoff, sizeof(_vlan),
                                                    data, hlen, &_vlan);
                        if (!vlan)
@@ -272,7 +270,7 @@ ipv6:
                                                             FLOW_DISSECTOR_KEY_VLAN,
                                                             target_container);
 
-                       if (skb_vlan_tag_present(skb)) {
+                       if (vlan_tag_present) {
                                key_vlan->vlan_id = skb_vlan_tag_get_id(skb);
                                key_vlan->vlan_priority =
                                        (skb_vlan_tag_get_prio(skb) >> VLAN_PRIO_SHIFT);
@@ -551,12 +549,17 @@ ip_proto_again:
 out_good:
        ret = true;
 
-out_bad:
+       key_control->thoff = (u16)nhoff;
+out:
        key_basic->n_proto = proto;
        key_basic->ip_proto = ip_proto;
-       key_control->thoff = (u16)nhoff;
 
        return ret;
+
+out_bad:
+       ret = false;
+       key_control->thoff = min_t(u16, nhoff, skb ? skb->len : hlen);
+       goto out;
 }
 EXPORT_SYMBOL(__skb_flow_dissect);
 
@@ -723,7 +726,7 @@ EXPORT_SYMBOL(make_flow_keys_digest);
 
 static struct flow_dissector flow_keys_dissector_symmetric __read_mostly;
 
-u32 __skb_get_hash_symmetric(struct sk_buff *skb)
+u32 __skb_get_hash_symmetric(const struct sk_buff *skb)
 {
        struct flow_keys keys;
 
@@ -1010,4 +1013,4 @@ static int __init init_default_flow_dissectors(void)
        return 0;
 }
 
-late_initcall_sync(init_default_flow_dissectors);
+core_initcall(init_default_flow_dissectors);
diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
new file mode 100644 (file)
index 0000000..71bb3e2
--- /dev/null
@@ -0,0 +1,396 @@
+/* Copyright (c) 2016 Thomas Graf <tgraf@tgraf.ch>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/types.h>
+#include <linux/bpf.h>
+#include <net/lwtunnel.h>
+
+struct bpf_lwt_prog {
+       struct bpf_prog *prog;
+       char *name;
+};
+
+struct bpf_lwt {
+       struct bpf_lwt_prog in;
+       struct bpf_lwt_prog out;
+       struct bpf_lwt_prog xmit;
+       int family;
+};
+
+#define MAX_PROG_NAME 256
+
+static inline struct bpf_lwt *bpf_lwt_lwtunnel(struct lwtunnel_state *lwt)
+{
+       return (struct bpf_lwt *)lwt->data;
+}
+
+#define NO_REDIRECT false
+#define CAN_REDIRECT true
+
+static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt,
+                      struct dst_entry *dst, bool can_redirect)
+{
+       int ret;
+
+       /* Preempt disable is needed to protect per-cpu redirect_info between
+        * BPF prog and skb_do_redirect(). The call_rcu in bpf_prog_put() and
+        * access to maps strictly require a rcu_read_lock() for protection,
+        * mixing with BH RCU lock doesn't work.
+        */
+       preempt_disable();
+       rcu_read_lock();
+       bpf_compute_data_end(skb);
+       ret = bpf_prog_run_save_cb(lwt->prog, skb);
+       rcu_read_unlock();
+
+       switch (ret) {
+       case BPF_OK:
+               break;
+
+       case BPF_REDIRECT:
+               if (unlikely(!can_redirect)) {
+                       pr_warn_once("Illegal redirect return code in prog %s\n",
+                                    lwt->name ? : "<unknown>");
+                       ret = BPF_OK;
+               } else {
+                       ret = skb_do_redirect(skb);
+                       if (ret == 0)
+                               ret = BPF_REDIRECT;
+               }
+               break;
+
+       case BPF_DROP:
+               kfree_skb(skb);
+               ret = -EPERM;
+               break;
+
+       default:
+               pr_warn_once("bpf-lwt: Illegal return value %u, expect packet loss\n", ret);
+               kfree_skb(skb);
+               ret = -EINVAL;
+               break;
+       }
+
+       preempt_enable();
+
+       return ret;
+}
+
+static int bpf_input(struct sk_buff *skb)
+{
+       struct dst_entry *dst = skb_dst(skb);
+       struct bpf_lwt *bpf;
+       int ret;
+
+       bpf = bpf_lwt_lwtunnel(dst->lwtstate);
+       if (bpf->in.prog) {
+               ret = run_lwt_bpf(skb, &bpf->in, dst, NO_REDIRECT);
+               if (ret < 0)
+                       return ret;
+       }
+
+       if (unlikely(!dst->lwtstate->orig_input)) {
+               pr_warn_once("orig_input not set on dst for prog %s\n",
+                            bpf->out.name);
+               kfree_skb(skb);
+               return -EINVAL;
+       }
+
+       return dst->lwtstate->orig_input(skb);
+}
+
+static int bpf_output(struct net *net, struct sock *sk, struct sk_buff *skb)
+{
+       struct dst_entry *dst = skb_dst(skb);
+       struct bpf_lwt *bpf;
+       int ret;
+
+       bpf = bpf_lwt_lwtunnel(dst->lwtstate);
+       if (bpf->out.prog) {
+               ret = run_lwt_bpf(skb, &bpf->out, dst, NO_REDIRECT);
+               if (ret < 0)
+                       return ret;
+       }
+
+       if (unlikely(!dst->lwtstate->orig_output)) {
+               pr_warn_once("orig_output not set on dst for prog %s\n",
+                            bpf->out.name);
+               kfree_skb(skb);
+               return -EINVAL;
+       }
+
+       return dst->lwtstate->orig_output(net, sk, skb);
+}
+
+static int xmit_check_hhlen(struct sk_buff *skb)
+{
+       int hh_len = skb_dst(skb)->dev->hard_header_len;
+
+       if (skb_headroom(skb) < hh_len) {
+               int nhead = HH_DATA_ALIGN(hh_len - skb_headroom(skb));
+
+               if (pskb_expand_head(skb, nhead, 0, GFP_ATOMIC))
+                       return -ENOMEM;
+       }
+
+       return 0;
+}
+
+static int bpf_xmit(struct sk_buff *skb)
+{
+       struct dst_entry *dst = skb_dst(skb);
+       struct bpf_lwt *bpf;
+
+       bpf = bpf_lwt_lwtunnel(dst->lwtstate);
+       if (bpf->xmit.prog) {
+               int ret;
+
+               ret = run_lwt_bpf(skb, &bpf->xmit, dst, CAN_REDIRECT);
+               switch (ret) {
+               case BPF_OK:
+                       /* If the header was expanded, headroom might be too
+                        * small for L2 header to come, expand as needed.
+                        */
+                       ret = xmit_check_hhlen(skb);
+                       if (unlikely(ret))
+                               return ret;
+
+                       return LWTUNNEL_XMIT_CONTINUE;
+               case BPF_REDIRECT:
+                       return LWTUNNEL_XMIT_DONE;
+               default:
+                       return ret;
+               }
+       }
+
+       return LWTUNNEL_XMIT_CONTINUE;
+}
+
+static void bpf_lwt_prog_destroy(struct bpf_lwt_prog *prog)
+{
+       if (prog->prog)
+               bpf_prog_put(prog->prog);
+
+       kfree(prog->name);
+}
+
+static void bpf_destroy_state(struct lwtunnel_state *lwt)
+{
+       struct bpf_lwt *bpf = bpf_lwt_lwtunnel(lwt);
+
+       bpf_lwt_prog_destroy(&bpf->in);
+       bpf_lwt_prog_destroy(&bpf->out);
+       bpf_lwt_prog_destroy(&bpf->xmit);
+}
+
+static const struct nla_policy bpf_prog_policy[LWT_BPF_PROG_MAX + 1] = {
+       [LWT_BPF_PROG_FD]   = { .type = NLA_U32, },
+       [LWT_BPF_PROG_NAME] = { .type = NLA_NUL_STRING,
+                               .len = MAX_PROG_NAME },
+};
+
+static int bpf_parse_prog(struct nlattr *attr, struct bpf_lwt_prog *prog,
+                         enum bpf_prog_type type)
+{
+       struct nlattr *tb[LWT_BPF_PROG_MAX + 1];
+       struct bpf_prog *p;
+       int ret;
+       u32 fd;
+
+       ret = nla_parse_nested(tb, LWT_BPF_PROG_MAX, attr, bpf_prog_policy);
+       if (ret < 0)
+               return ret;
+
+       if (!tb[LWT_BPF_PROG_FD] || !tb[LWT_BPF_PROG_NAME])
+               return -EINVAL;
+
+       prog->name = nla_memdup(tb[LWT_BPF_PROG_NAME], GFP_KERNEL);
+       if (!prog->name)
+               return -ENOMEM;
+
+       fd = nla_get_u32(tb[LWT_BPF_PROG_FD]);
+       p = bpf_prog_get_type(fd, type);
+       if (IS_ERR(p))
+               return PTR_ERR(p);
+
+       prog->prog = p;
+
+       return 0;
+}
+
+static const struct nla_policy bpf_nl_policy[LWT_BPF_MAX + 1] = {
+       [LWT_BPF_IN]            = { .type = NLA_NESTED, },
+       [LWT_BPF_OUT]           = { .type = NLA_NESTED, },
+       [LWT_BPF_XMIT]          = { .type = NLA_NESTED, },
+       [LWT_BPF_XMIT_HEADROOM] = { .type = NLA_U32 },
+};
+
+static int bpf_build_state(struct net_device *dev, struct nlattr *nla,
+                          unsigned int family, const void *cfg,
+                          struct lwtunnel_state **ts)
+{
+       struct nlattr *tb[LWT_BPF_MAX + 1];
+       struct lwtunnel_state *newts;
+       struct bpf_lwt *bpf;
+       int ret;
+
+       if (family != AF_INET && family != AF_INET6)
+               return -EAFNOSUPPORT;
+
+       ret = nla_parse_nested(tb, LWT_BPF_MAX, nla, bpf_nl_policy);
+       if (ret < 0)
+               return ret;
+
+       if (!tb[LWT_BPF_IN] && !tb[LWT_BPF_OUT] && !tb[LWT_BPF_XMIT])
+               return -EINVAL;
+
+       newts = lwtunnel_state_alloc(sizeof(*bpf));
+       if (!newts)
+               return -ENOMEM;
+
+       newts->type = LWTUNNEL_ENCAP_BPF;
+       bpf = bpf_lwt_lwtunnel(newts);
+
+       if (tb[LWT_BPF_IN]) {
+               newts->flags |= LWTUNNEL_STATE_INPUT_REDIRECT;
+               ret = bpf_parse_prog(tb[LWT_BPF_IN], &bpf->in,
+                                    BPF_PROG_TYPE_LWT_IN);
+               if (ret  < 0)
+                       goto errout;
+       }
+
+       if (tb[LWT_BPF_OUT]) {
+               newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT;
+               ret = bpf_parse_prog(tb[LWT_BPF_OUT], &bpf->out,
+                                    BPF_PROG_TYPE_LWT_OUT);
+               if (ret < 0)
+                       goto errout;
+       }
+
+       if (tb[LWT_BPF_XMIT]) {
+               newts->flags |= LWTUNNEL_STATE_XMIT_REDIRECT;
+               ret = bpf_parse_prog(tb[LWT_BPF_XMIT], &bpf->xmit,
+                                    BPF_PROG_TYPE_LWT_XMIT);
+               if (ret < 0)
+                       goto errout;
+       }
+
+       if (tb[LWT_BPF_XMIT_HEADROOM]) {
+               u32 headroom = nla_get_u32(tb[LWT_BPF_XMIT_HEADROOM]);
+
+               if (headroom > LWT_BPF_MAX_HEADROOM) {
+                       ret = -ERANGE;
+                       goto errout;
+               }
+
+               newts->headroom = headroom;
+       }
+
+       bpf->family = family;
+       *ts = newts;
+
+       return 0;
+
+errout:
+       bpf_destroy_state(newts);
+       kfree(newts);
+       return ret;
+}
+
+static int bpf_fill_lwt_prog(struct sk_buff *skb, int attr,
+                            struct bpf_lwt_prog *prog)
+{
+       struct nlattr *nest;
+
+       if (!prog->prog)
+               return 0;
+
+       nest = nla_nest_start(skb, attr);
+       if (!nest)
+               return -EMSGSIZE;
+
+       if (prog->name &&
+           nla_put_string(skb, LWT_BPF_PROG_NAME, prog->name))
+               return -EMSGSIZE;
+
+       return nla_nest_end(skb, nest);
+}
+
+static int bpf_fill_encap_info(struct sk_buff *skb, struct lwtunnel_state *lwt)
+{
+       struct bpf_lwt *bpf = bpf_lwt_lwtunnel(lwt);
+
+       if (bpf_fill_lwt_prog(skb, LWT_BPF_IN, &bpf->in) < 0 ||
+           bpf_fill_lwt_prog(skb, LWT_BPF_OUT, &bpf->out) < 0 ||
+           bpf_fill_lwt_prog(skb, LWT_BPF_XMIT, &bpf->xmit) < 0)
+               return -EMSGSIZE;
+
+       return 0;
+}
+
+static int bpf_encap_nlsize(struct lwtunnel_state *lwtstate)
+{
+       int nest_len = nla_total_size(sizeof(struct nlattr)) +
+                      nla_total_size(MAX_PROG_NAME) + /* LWT_BPF_PROG_NAME */
+                      0;
+
+       return nest_len + /* LWT_BPF_IN */
+              nest_len + /* LWT_BPF_OUT */
+              nest_len + /* LWT_BPF_XMIT */
+              0;
+}
+
+int bpf_lwt_prog_cmp(struct bpf_lwt_prog *a, struct bpf_lwt_prog *b)
+{
+       /* FIXME:
+        * The LWT state is currently rebuilt for delete requests which
+        * results in a new bpf_prog instance. Comparing names for now.
+        */
+       if (!a->name && !b->name)
+               return 0;
+
+       if (!a->name || !b->name)
+               return 1;
+
+       return strcmp(a->name, b->name);
+}
+
+static int bpf_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
+{
+       struct bpf_lwt *a_bpf = bpf_lwt_lwtunnel(a);
+       struct bpf_lwt *b_bpf = bpf_lwt_lwtunnel(b);
+
+       return bpf_lwt_prog_cmp(&a_bpf->in, &b_bpf->in) ||
+              bpf_lwt_prog_cmp(&a_bpf->out, &b_bpf->out) ||
+              bpf_lwt_prog_cmp(&a_bpf->xmit, &b_bpf->xmit);
+}
+
+static const struct lwtunnel_encap_ops bpf_encap_ops = {
+       .build_state    = bpf_build_state,
+       .destroy_state  = bpf_destroy_state,
+       .input          = bpf_input,
+       .output         = bpf_output,
+       .xmit           = bpf_xmit,
+       .fill_encap     = bpf_fill_encap_info,
+       .get_encap_size = bpf_encap_nlsize,
+       .cmp_encap      = bpf_encap_cmp,
+};
+
+static int __init bpf_lwt_init(void)
+{
+       return lwtunnel_encap_add_ops(&bpf_encap_ops, LWTUNNEL_ENCAP_BPF);
+}
+
+subsys_initcall(bpf_lwt_init)
index 88fd64250b021ea342e8fa39cff4e21a53ece182..a5d4e866ce88b4d055798d9ea55fc905b351fb3d 100644 (file)
@@ -39,6 +39,10 @@ static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type)
                return "MPLS";
        case LWTUNNEL_ENCAP_ILA:
                return "ILA";
+       case LWTUNNEL_ENCAP_SEG6:
+               return "SEG6";
+       case LWTUNNEL_ENCAP_BPF:
+               return "BPF";
        case LWTUNNEL_ENCAP_IP6:
        case LWTUNNEL_ENCAP_IP:
        case LWTUNNEL_ENCAP_NONE:
index 2ae929f9bd06f3be98cbd1add881641a0df1d7a4..782dd866366554e53dda3e6c69c807ec90bd0e08 100644 (file)
@@ -2291,13 +2291,10 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
                for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
                     n != NULL;
                     n = rcu_dereference_bh(n->next)) {
-                       if (!net_eq(dev_net(n->dev), net))
-                               continue;
-                       if (neigh_ifindex_filtered(n->dev, filter_idx))
-                               continue;
-                       if (neigh_master_filtered(n->dev, filter_master_idx))
-                               continue;
-                       if (idx < s_idx)
+                       if (idx < s_idx || !net_eq(dev_net(n->dev), net))
+                               goto next;
+                       if (neigh_ifindex_filtered(n->dev, filter_idx) ||
+                           neigh_master_filtered(n->dev, filter_master_idx))
                                goto next;
                        if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
                                            cb->nlh->nlmsg_seq,
@@ -2332,9 +2329,7 @@ static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
                if (h > s_h)
                        s_idx = 0;
                for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
-                       if (pneigh_net(n) != net)
-                               continue;
-                       if (idx < s_idx)
+                       if (idx < s_idx || pneigh_net(n) != net)
                                goto next;
                        if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
                                            cb->nlh->nlmsg_seq,
index 6e4f3472108015f0fbd7ac6f3dfe7e74a019e8be..b0c04cf4851d67b58c541fa602c6d53009cc9bee 100644 (file)
@@ -950,10 +950,13 @@ net_rx_queue_update_kobjects(struct net_device *dev, int old_num, int new_num)
        }
 
        while (--i >= new_num) {
+               struct kobject *kobj = &dev->_rx[i].kobj;
+
+               if (!list_empty(&dev_net(dev)->exit_list))
+                       kobj->uevent_suppress = 1;
                if (dev->sysfs_rx_queue_group)
-                       sysfs_remove_group(&dev->_rx[i].kobj,
-                                          dev->sysfs_rx_queue_group);
-               kobject_put(&dev->_rx[i].kobj);
+                       sysfs_remove_group(kobj, dev->sysfs_rx_queue_group);
+               kobject_put(kobj);
        }
 
        return error;
@@ -1021,7 +1024,6 @@ static ssize_t show_trans_timeout(struct netdev_queue *queue,
        return sprintf(buf, "%lu", trans_timeout);
 }
 
-#ifdef CONFIG_XPS
 static unsigned int get_netdev_queue_index(struct netdev_queue *queue)
 {
        struct net_device *dev = queue->dev;
@@ -1033,6 +1035,21 @@ static unsigned int get_netdev_queue_index(struct netdev_queue *queue)
        return i;
 }
 
+static ssize_t show_traffic_class(struct netdev_queue *queue,
+                                 struct netdev_queue_attribute *attribute,
+                                 char *buf)
+{
+       struct net_device *dev = queue->dev;
+       int index = get_netdev_queue_index(queue);
+       int tc = netdev_txq_to_tc(dev, index);
+
+       if (tc < 0)
+               return -EINVAL;
+
+       return sprintf(buf, "%u\n", tc);
+}
+
+#ifdef CONFIG_XPS
 static ssize_t show_tx_maxrate(struct netdev_queue *queue,
                               struct netdev_queue_attribute *attribute,
                               char *buf)
@@ -1075,6 +1092,9 @@ static struct netdev_queue_attribute queue_tx_maxrate =
 static struct netdev_queue_attribute queue_trans_timeout =
        __ATTR(tx_timeout, S_IRUGO, show_trans_timeout, NULL);
 
+static struct netdev_queue_attribute queue_traffic_class =
+       __ATTR(traffic_class, S_IRUGO, show_traffic_class, NULL);
+
 #ifdef CONFIG_BQL
 /*
  * Byte queue limits sysfs structures and functions.
@@ -1190,29 +1210,38 @@ static ssize_t show_xps_map(struct netdev_queue *queue,
                            struct netdev_queue_attribute *attribute, char *buf)
 {
        struct net_device *dev = queue->dev;
+       int cpu, len, num_tc = 1, tc = 0;
        struct xps_dev_maps *dev_maps;
        cpumask_var_t mask;
        unsigned long index;
-       int i, len;
 
        if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
                return -ENOMEM;
 
        index = get_netdev_queue_index(queue);
 
+       if (dev->num_tc) {
+               num_tc = dev->num_tc;
+               tc = netdev_txq_to_tc(dev, index);
+               if (tc < 0)
+                       return -EINVAL;
+       }
+
        rcu_read_lock();
        dev_maps = rcu_dereference(dev->xps_maps);
        if (dev_maps) {
-               for_each_possible_cpu(i) {
-                       struct xps_map *map =
-                           rcu_dereference(dev_maps->cpu_map[i]);
-                       if (map) {
-                               int j;
-                               for (j = 0; j < map->len; j++) {
-                                       if (map->queues[j] == index) {
-                                               cpumask_set_cpu(i, mask);
-                                               break;
-                                       }
+               for_each_possible_cpu(cpu) {
+                       int i, tci = cpu * num_tc + tc;
+                       struct xps_map *map;
+
+                       map = rcu_dereference(dev_maps->cpu_map[tci]);
+                       if (!map)
+                               continue;
+
+                       for (i = map->len; i--;) {
+                               if (map->queues[i] == index) {
+                                       cpumask_set_cpu(cpu, mask);
+                                       break;
                                }
                        }
                }
@@ -1260,6 +1289,7 @@ static struct netdev_queue_attribute xps_cpus_attribute =
 
 static struct attribute *netdev_queue_default_attrs[] = {
        &queue_trans_timeout.attr,
+       &queue_traffic_class.attr,
 #ifdef CONFIG_XPS
        &xps_cpus_attribute.attr,
        &queue_tx_maxrate.attr,
@@ -1340,6 +1370,8 @@ netdev_queue_update_kobjects(struct net_device *dev, int old_num, int new_num)
        while (--i >= new_num) {
                struct netdev_queue *queue = dev->_tx + i;
 
+               if (!list_empty(&dev_net(dev)->exit_list))
+                       queue->kobj.uevent_suppress = 1;
 #ifdef CONFIG_BQL
                sysfs_remove_group(&queue->kobj, &dql_group);
 #endif
@@ -1525,6 +1557,9 @@ void netdev_unregister_kobject(struct net_device *ndev)
 {
        struct device *dev = &(ndev->dev);
 
+       if (!list_empty(&dev_net(ndev)->exit_list))
+               dev_set_uevent_suppress(dev, 1);
+
        kobject_get(&dev->kobj);
 
        remove_queue_kobjects(ndev);
index b9243b14af177006d14d0fe37c7d2e56f6cf625f..a38feac547d7778eafbc69b4e4b8f7b977ed1492 100644 (file)
@@ -55,7 +55,7 @@ static struct net_generic *net_alloc_generic(void)
        return ng;
 }
 
-static int net_assign_generic(struct net *net, int id, void *data)
+static int net_assign_generic(struct net *net, unsigned int id, void *data)
 {
        struct net_generic *ng, *old_ng;
 
@@ -122,8 +122,7 @@ out:
 static void ops_free(const struct pernet_operations *ops, struct net *net)
 {
        if (ops->id && ops->size) {
-               int id = *ops->id;
-               kfree(net_generic(net, id));
+               kfree(net_generic(net, *ops->id));
        }
 }
 
@@ -215,13 +214,16 @@ static void rtnl_net_notifyid(struct net *net, int cmd, int id);
  */
 int peernet2id_alloc(struct net *net, struct net *peer)
 {
+       unsigned long flags;
        bool alloc;
        int id;
 
-       spin_lock_bh(&net->nsid_lock);
+       if (atomic_read(&net->count) == 0)
+               return NETNSA_NSID_NOT_ASSIGNED;
+       spin_lock_irqsave(&net->nsid_lock, flags);
        alloc = atomic_read(&peer->count) == 0 ? false : true;
        id = __peernet2id_alloc(net, peer, &alloc);
-       spin_unlock_bh(&net->nsid_lock);
+       spin_unlock_irqrestore(&net->nsid_lock, flags);
        if (alloc && id >= 0)
                rtnl_net_notifyid(net, RTM_NEWNSID, id);
        return id;
@@ -230,11 +232,12 @@ int peernet2id_alloc(struct net *net, struct net *peer)
 /* This function returns, if assigned, the id of a peer netns. */
 int peernet2id(struct net *net, struct net *peer)
 {
+       unsigned long flags;
        int id;
 
-       spin_lock_bh(&net->nsid_lock);
+       spin_lock_irqsave(&net->nsid_lock, flags);
        id = __peernet2id(net, peer);
-       spin_unlock_bh(&net->nsid_lock);
+       spin_unlock_irqrestore(&net->nsid_lock, flags);
        return id;
 }
 EXPORT_SYMBOL(peernet2id);
@@ -249,17 +252,18 @@ bool peernet_has_id(struct net *net, struct net *peer)
 
 struct net *get_net_ns_by_id(struct net *net, int id)
 {
+       unsigned long flags;
        struct net *peer;
 
        if (id < 0)
                return NULL;
 
        rcu_read_lock();
-       spin_lock_bh(&net->nsid_lock);
+       spin_lock_irqsave(&net->nsid_lock, flags);
        peer = idr_find(&net->netns_ids, id);
        if (peer)
                get_net(peer);
-       spin_unlock_bh(&net->nsid_lock);
+       spin_unlock_irqrestore(&net->nsid_lock, flags);
        rcu_read_unlock();
 
        return peer;
@@ -429,17 +433,17 @@ static void cleanup_net(struct work_struct *work)
                for_each_net(tmp) {
                        int id;
 
-                       spin_lock_bh(&tmp->nsid_lock);
+                       spin_lock_irq(&tmp->nsid_lock);
                        id = __peernet2id(tmp, net);
                        if (id >= 0)
                                idr_remove(&tmp->netns_ids, id);
-                       spin_unlock_bh(&tmp->nsid_lock);
+                       spin_unlock_irq(&tmp->nsid_lock);
                        if (id >= 0)
                                rtnl_net_notifyid(tmp, RTM_DELNSID, id);
                }
-               spin_lock_bh(&net->nsid_lock);
+               spin_lock_irq(&net->nsid_lock);
                idr_destroy(&net->netns_ids);
-               spin_unlock_bh(&net->nsid_lock);
+               spin_unlock_irq(&net->nsid_lock);
 
        }
        rtnl_unlock();
@@ -568,6 +572,7 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
        struct net *net = sock_net(skb->sk);
        struct nlattr *tb[NETNSA_MAX + 1];
+       unsigned long flags;
        struct net *peer;
        int nsid, err;
 
@@ -588,15 +593,15 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh)
        if (IS_ERR(peer))
                return PTR_ERR(peer);
 
-       spin_lock_bh(&net->nsid_lock);
+       spin_lock_irqsave(&net->nsid_lock, flags);
        if (__peernet2id(net, peer) >= 0) {
-               spin_unlock_bh(&net->nsid_lock);
+               spin_unlock_irqrestore(&net->nsid_lock, flags);
                err = -EEXIST;
                goto out;
        }
 
        err = alloc_netid(net, peer, nsid);
-       spin_unlock_bh(&net->nsid_lock);
+       spin_unlock_irqrestore(&net->nsid_lock, flags);
        if (err >= 0) {
                rtnl_net_notifyid(net, RTM_NEWNSID, err);
                err = 0;
@@ -718,10 +723,11 @@ static int rtnl_net_dumpid(struct sk_buff *skb, struct netlink_callback *cb)
                .idx = 0,
                .s_idx = cb->args[0],
        };
+       unsigned long flags;
 
-       spin_lock_bh(&net->nsid_lock);
+       spin_lock_irqsave(&net->nsid_lock, flags);
        idr_for_each(&net->netns_ids, rtnl_net_dumpid_one, &net_cb);
-       spin_unlock_bh(&net->nsid_lock);
+       spin_unlock_irqrestore(&net->nsid_lock, flags);
 
        cb->args[0] = net_cb.idx;
        return skb->len;
@@ -876,7 +882,7 @@ again:
                        }
                        return error;
                }
-               max_gen_ptrs = max_t(unsigned int, max_gen_ptrs, *ops->id);
+               max_gen_ptrs = max(max_gen_ptrs, *ops->id);
        }
        error = __register_pernet_operations(list, ops);
        if (error) {
index 53599bd0c82df605e6c4b8a6e4f0ef6aa2ee9fee..9424673009c14e0fb288b8e4041dba596b37ee8d 100644 (file)
@@ -171,12 +171,12 @@ static void poll_one_napi(struct napi_struct *napi)
 static void poll_napi(struct net_device *dev)
 {
        struct napi_struct *napi;
+       int cpu = smp_processor_id();
 
        list_for_each_entry(napi, &dev->napi_list, dev_list) {
-               if (napi->poll_owner != smp_processor_id() &&
-                   spin_trylock(&napi->poll_lock)) {
+               if (cmpxchg(&napi->poll_owner, -1, cpu) == -1) {
                        poll_one_napi(napi);
-                       spin_unlock(&napi->poll_lock);
+                       smp_store_release(&napi->poll_owner, -1);
                }
        }
 }
index 5219a9e2127aeda719fce840e594cf0f4fe058e7..8e69ce4722364e77f78dd3da256680df37c733c0 100644 (file)
 #define M_QUEUE_XMIT           2       /* Inject packet into qdisc */
 
 /* If lock -- protects updating of if_list */
-#define   if_lock(t)           spin_lock(&(t->if_lock));
-#define   if_unlock(t)           spin_unlock(&(t->if_lock));
+#define   if_lock(t)           mutex_lock(&(t->if_lock));
+#define   if_unlock(t)           mutex_unlock(&(t->if_lock));
 
 /* Used to help with determining the pkts on receive */
 #define PKTGEN_MAGIC 0xbe9be955
@@ -413,7 +413,7 @@ struct pktgen_hdr {
 };
 
 
-static int pg_net_id __read_mostly;
+static unsigned int pg_net_id __read_mostly;
 
 struct pktgen_net {
        struct net              *net;
@@ -423,7 +423,7 @@ struct pktgen_net {
 };
 
 struct pktgen_thread {
-       spinlock_t if_lock;             /* for list of devices */
+       struct mutex if_lock;           /* for list of devices */
        struct list_head if_list;       /* All device here */
        struct list_head th_list;
        struct task_struct *tsk;
@@ -2010,11 +2010,13 @@ static void pktgen_change_name(const struct pktgen_net *pn, struct net_device *d
 {
        struct pktgen_thread *t;
 
+       mutex_lock(&pktgen_thread_lock);
+
        list_for_each_entry(t, &pn->pktgen_threads, th_list) {
                struct pktgen_dev *pkt_dev;
 
-               rcu_read_lock();
-               list_for_each_entry_rcu(pkt_dev, &t->if_list, list) {
+               if_lock(t);
+               list_for_each_entry(pkt_dev, &t->if_list, list) {
                        if (pkt_dev->odev != dev)
                                continue;
 
@@ -2029,8 +2031,9 @@ static void pktgen_change_name(const struct pktgen_net *pn, struct net_device *d
                                       dev->name);
                        break;
                }
-               rcu_read_unlock();
+               if_unlock(t);
        }
+       mutex_unlock(&pktgen_thread_lock);
 }
 
 static int pktgen_device_event(struct notifier_block *unused,
@@ -3762,7 +3765,7 @@ static int __net_init pktgen_create_thread(int cpu, struct pktgen_net *pn)
                return -ENOMEM;
        }
 
-       spin_lock_init(&t->if_lock);
+       mutex_init(&t->if_lock);
        t->cpu = cpu;
 
        INIT_LIST_HEAD(&t->if_list);
index fb7348f135014fcea21b621ab30e1ad02b62448d..4cb84f0449a814ef1bda5e87b0499052bfdba940 100644 (file)
@@ -275,6 +275,7 @@ int rtnl_unregister(int protocol, int msgtype)
 
        rtnl_msg_handlers[protocol][msgindex].doit = NULL;
        rtnl_msg_handlers[protocol][msgindex].dumpit = NULL;
+       rtnl_msg_handlers[protocol][msgindex].calcit = NULL;
 
        return 0;
 }
@@ -839,18 +840,20 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev,
        if (dev->dev.parent && dev_is_pci(dev->dev.parent) &&
            (ext_filter_mask & RTEXT_FILTER_VF)) {
                int num_vfs = dev_num_vf(dev->dev.parent);
-               size_t size = nla_total_size(sizeof(struct nlattr));
-               size += nla_total_size(num_vfs * sizeof(struct nlattr));
+               size_t size = nla_total_size(0);
                size += num_vfs *
-                       (nla_total_size(sizeof(struct ifla_vf_mac)) +
-                        nla_total_size(MAX_VLAN_LIST_LEN *
-                                       sizeof(struct nlattr)) +
+                       (nla_total_size(0) +
+                        nla_total_size(sizeof(struct ifla_vf_mac)) +
+                        nla_total_size(sizeof(struct ifla_vf_vlan)) +
+                        nla_total_size(0) + /* nest IFLA_VF_VLAN_LIST */
                         nla_total_size(MAX_VLAN_LIST_LEN *
                                        sizeof(struct ifla_vf_vlan_info)) +
                         nla_total_size(sizeof(struct ifla_vf_spoofchk)) +
+                        nla_total_size(sizeof(struct ifla_vf_tx_rate)) +
                         nla_total_size(sizeof(struct ifla_vf_rate)) +
                         nla_total_size(sizeof(struct ifla_vf_link_state)) +
                         nla_total_size(sizeof(struct ifla_vf_rss_query_en)) +
+                        nla_total_size(0) + /* nest IFLA_VF_STATS */
                         /* IFLA_VF_STATS_RX_PACKETS */
                         nla_total_size_64bit(sizeof(__u64)) +
                         /* IFLA_VF_STATS_TX_PACKETS */
@@ -898,7 +901,8 @@ static size_t rtnl_port_size(const struct net_device *dev,
 
 static size_t rtnl_xdp_size(const struct net_device *dev)
 {
-       size_t xdp_size = nla_total_size(1);    /* XDP_ATTACHED */
+       size_t xdp_size = nla_total_size(0) +   /* nest IFLA_XDP */
+                         nla_total_size(1);    /* XDP_ATTACHED */
 
        if (!dev->netdev_ops->ndo_xdp)
                return 0;
@@ -1501,6 +1505,7 @@ static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = {
 static const struct nla_policy ifla_xdp_policy[IFLA_XDP_MAX + 1] = {
        [IFLA_XDP_FD]           = { .type = NLA_S32 },
        [IFLA_XDP_ATTACHED]     = { .type = NLA_U8 },
+       [IFLA_XDP_FLAGS]        = { .type = NLA_U32 },
 };
 
 static const struct rtnl_link_ops *linkinfo_to_kind_ops(const struct nlattr *nla)
@@ -1605,7 +1610,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
                head = &net->dev_index_head[h];
                hlist_for_each_entry(dev, head, index_hlist) {
                        if (link_dump_filtered(dev, master_idx, kind_ops))
-                               continue;
+                               goto cont;
                        if (idx < s_idx)
                                goto cont;
                        err = rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK,
@@ -2160,6 +2165,7 @@ static int do_setlink(const struct sk_buff *skb,
 
        if (tb[IFLA_XDP]) {
                struct nlattr *xdp[IFLA_XDP_MAX + 1];
+               u32 xdp_flags = 0;
 
                err = nla_parse_nested(xdp, IFLA_XDP_MAX, tb[IFLA_XDP],
                                       ifla_xdp_policy);
@@ -2170,9 +2176,19 @@ static int do_setlink(const struct sk_buff *skb,
                        err = -EINVAL;
                        goto errout;
                }
+
+               if (xdp[IFLA_XDP_FLAGS]) {
+                       xdp_flags = nla_get_u32(xdp[IFLA_XDP_FLAGS]);
+                       if (xdp_flags & ~XDP_FLAGS_MASK) {
+                               err = -EINVAL;
+                               goto errout;
+                       }
+               }
+
                if (xdp[IFLA_XDP_FD]) {
                        err = dev_change_xdp_fd(dev,
-                                               nla_get_s32(xdp[IFLA_XDP_FD]));
+                                               nla_get_s32(xdp[IFLA_XDP_FD]),
+                                               xdp_flags);
                        if (err)
                                goto errout;
                        status |= DO_SETLINK_NOTIFY;
@@ -2733,7 +2749,7 @@ static u16 rtnl_calcit(struct sk_buff *skb, struct nlmsghdr *nlh)
                                                           ext_filter_mask));
        }
 
-       return min_ifinfo_dump_size;
+       return nlmsg_total_size(min_ifinfo_dump_size);
 }
 
 static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
@@ -2848,7 +2864,10 @@ nla_put_failure:
 
 static inline size_t rtnl_fdb_nlmsg_size(void)
 {
-       return NLMSG_ALIGN(sizeof(struct ndmsg)) + nla_total_size(ETH_ALEN);
+       return NLMSG_ALIGN(sizeof(struct ndmsg)) +
+              nla_total_size(ETH_ALEN) +       /* NDA_LLADDR */
+              nla_total_size(sizeof(u16)) +    /* NDA_VLAN */
+              0;
 }
 
 static void rtnl_fdb_notify(struct net_device *dev, u8 *addr, u16 vid, int type,
@@ -3158,7 +3177,7 @@ int ndo_dflt_fdb_dump(struct sk_buff *skb,
        err = nlmsg_populate_fdb(skb, cb, dev, idx, &dev->uc);
        if (err)
                goto out;
-       nlmsg_populate_fdb(skb, cb, dev, idx, &dev->mc);
+       err = nlmsg_populate_fdb(skb, cb, dev, idx, &dev->mc);
 out:
        netif_addr_unlock_bh(dev);
        return err;
@@ -3664,7 +3683,7 @@ static int rtnl_get_offload_stats(struct sk_buff *skb, struct net_device *dev,
                if (!size)
                        continue;
 
-               if (!dev->netdev_ops->ndo_has_offload_stats(attr_id))
+               if (!dev->netdev_ops->ndo_has_offload_stats(dev, attr_id))
                        continue;
 
                attr = nla_reserve_64bit(skb, attr_id, size,
@@ -3705,7 +3724,7 @@ static int rtnl_get_offload_stats_size(const struct net_device *dev)
 
        for (attr_id = IFLA_OFFLOAD_XSTATS_FIRST;
             attr_id <= IFLA_OFFLOAD_XSTATS_MAX; attr_id++) {
-               if (!dev->netdev_ops->ndo_has_offload_stats(attr_id))
+               if (!dev->netdev_ops->ndo_has_offload_stats(dev, attr_id))
                        continue;
                size = rtnl_get_offload_stats_attr_size(attr_id);
                nla_size += nla_total_size_64bit(size);
index fd3ce461fbe6210ab95fbcbb4b5e6c862a262898..88a8e429fc3e6d5779a5fa989dfecb330acb1cbe 100644 (file)
@@ -12,6 +12,7 @@
 #include <net/secure_seq.h>
 
 #if IS_ENABLED(CONFIG_IPV6) || IS_ENABLED(CONFIG_INET)
+#include <net/tcp.h>
 #define NET_SECRET_SIZE (MD5_MESSAGE_BYTES / 4)
 
 static u32 net_secret[NET_SECRET_SIZE] ____cacheline_aligned;
@@ -40,8 +41,8 @@ static u32 seq_scale(u32 seq)
 #endif
 
 #if IS_ENABLED(CONFIG_IPV6)
-__u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr,
-                                  __be16 sport, __be16 dport)
+u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr,
+                                __be16 sport, __be16 dport, u32 *tsoff)
 {
        u32 secret[MD5_MESSAGE_BYTES / 4];
        u32 hash[MD5_DIGEST_WORDS];
@@ -58,6 +59,7 @@ __u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr,
 
        md5_transform(hash, secret);
 
+       *tsoff = sysctl_tcp_timestamps == 1 ? hash[1] : 0;
        return seq_scale(hash[0]);
 }
 EXPORT_SYMBOL(secure_tcpv6_sequence_number);
@@ -86,8 +88,8 @@ EXPORT_SYMBOL(secure_ipv6_port_ephemeral);
 
 #ifdef CONFIG_INET
 
-__u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
-                                __be16 sport, __be16 dport)
+u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
+                              __be16 sport, __be16 dport, u32 *tsoff)
 {
        u32 hash[MD5_DIGEST_WORDS];
 
@@ -99,6 +101,7 @@ __u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
 
        md5_transform(hash, net_secret);
 
+       *tsoff = sysctl_tcp_timestamps == 1 ? hash[1] : 0;
        return seq_scale(hash[0]);
 }
 
index 1e3e0087245b713de61f51687fc4e06ea1025b50..b45cd1494243fc99686016949f4546dbba11f424 100644 (file)
@@ -354,7 +354,7 @@ EXPORT_SYMBOL(build_skb);
 
 struct napi_alloc_cache {
        struct page_frag_cache page;
-       size_t skb_count;
+       unsigned int skb_count;
        void *skb_cache[NAPI_SKB_CACHE_SIZE];
 };
 
@@ -2656,7 +2656,9 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
        struct skb_frag_struct *fragfrom, *fragto;
 
        BUG_ON(shiftlen > skb->len);
-       BUG_ON(skb_headlen(skb));       /* Would corrupt stream */
+
+       if (skb_headlen(skb))
+               return 0;
 
        todo = shiftlen;
        from = 0;
@@ -3712,21 +3714,29 @@ int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb)
 }
 EXPORT_SYMBOL(sock_queue_err_skb);
 
+static bool is_icmp_err_skb(const struct sk_buff *skb)
+{
+       return skb && (SKB_EXT_ERR(skb)->ee.ee_origin == SO_EE_ORIGIN_ICMP ||
+                      SKB_EXT_ERR(skb)->ee.ee_origin == SO_EE_ORIGIN_ICMP6);
+}
+
 struct sk_buff *sock_dequeue_err_skb(struct sock *sk)
 {
        struct sk_buff_head *q = &sk->sk_error_queue;
-       struct sk_buff *skb, *skb_next;
+       struct sk_buff *skb, *skb_next = NULL;
+       bool icmp_next = false;
        unsigned long flags;
-       int err = 0;
 
        spin_lock_irqsave(&q->lock, flags);
        skb = __skb_dequeue(q);
        if (skb && (skb_next = skb_peek(q)))
-               err = SKB_EXT_ERR(skb_next)->ee.ee_errno;
+               icmp_next = is_icmp_err_skb(skb_next);
        spin_unlock_irqrestore(&q->lock, flags);
 
-       sk->sk_err = err;
-       if (err)
+       if (is_icmp_err_skb(skb) && !icmp_next)
+               sk->sk_err = 0;
+
+       if (skb_next)
                sk->sk_error_report(sk);
 
        return skb;
@@ -3838,10 +3848,18 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
        if (!skb_may_tx_timestamp(sk, tsonly))
                return;
 
-       if (tsonly)
-               skb = alloc_skb(0, GFP_ATOMIC);
-       else
+       if (tsonly) {
+#ifdef CONFIG_INET
+               if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS) &&
+                   sk->sk_protocol == IPPROTO_TCP &&
+                   sk->sk_type == SOCK_STREAM)
+                       skb = tcp_get_timestamping_opt_stats(sk);
+               else
+#endif
+                       skb = alloc_skb(0, GFP_ATOMIC);
+       } else {
                skb = skb_clone(orig_skb, GFP_ATOMIC);
+       }
        if (!skb)
                return;
 
index d8e4532e89e7c28737c95c723e5f5b3d184a7805..d8c7f8c877ca3c4fd3ace2a43ffa6de4a07da4be 100644 (file)
@@ -453,7 +453,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 EXPORT_SYMBOL(sock_queue_rcv_skb);
 
 int __sk_receive_skb(struct sock *sk, struct sk_buff *skb,
-                    const int nested, unsigned int trim_cap)
+                    const int nested, unsigned int trim_cap, bool refcounted)
 {
        int rc = NET_RX_SUCCESS;
 
@@ -487,7 +487,8 @@ int __sk_receive_skb(struct sock *sk, struct sk_buff *skb,
 
        bh_unlock_sock(sk);
 out:
-       sock_put(sk);
+       if (refcounted)
+               sock_put(sk);
        return rc;
 discard_and_relse:
        kfree_skb(skb);
@@ -853,6 +854,13 @@ set_rcvbuf:
                                sk->sk_tskey = 0;
                        }
                }
+
+               if (val & SOF_TIMESTAMPING_OPT_STATS &&
+                   !(val & SOF_TIMESTAMPING_OPT_TSONLY)) {
+                       ret = -EINVAL;
+                       break;
+               }
+
                sk->sk_tsflags = val;
                if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
                        sock_enable_timestamp(sk,
@@ -1543,6 +1551,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
                RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL);
 
                newsk->sk_err      = 0;
+               newsk->sk_err_soft = 0;
                newsk->sk_priority = 0;
                newsk->sk_incoming_cpu = raw_smp_processor_id();
                atomic64_set(&newsk->sk_cookie, 0);
@@ -2078,14 +2087,14 @@ void __sk_flush_backlog(struct sock *sk)
  */
 int sk_wait_data(struct sock *sk, long *timeo, const struct sk_buff *skb)
 {
+       DEFINE_WAIT_FUNC(wait, woken_wake_function);
        int rc;
-       DEFINE_WAIT(wait);
 
-       prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+       add_wait_queue(sk_sleep(sk), &wait);
        sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
-       rc = sk_wait_event(sk, timeo, skb_peek_tail(&sk->sk_receive_queue) != skb);
+       rc = sk_wait_event(sk, timeo, skb_peek_tail(&sk->sk_receive_queue) != skb, &wait);
        sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
-       finish_wait(sk_sleep(sk), &wait);
+       remove_wait_queue(sk_sleep(sk), &wait);
        return rc;
 }
 EXPORT_SYMBOL(sk_wait_data);
@@ -2460,8 +2469,11 @@ void sock_init_data(struct socket *sock, struct sock *sk)
                sk->sk_type     =       sock->type;
                sk->sk_wq       =       sock->wq;
                sock->sk        =       sk;
-       } else
+               sk->sk_uid      =       SOCK_INODE(sock)->i_uid;
+       } else {
                sk->sk_wq       =       NULL;
+               sk->sk_uid      =       make_kuid(sock_net(sk)->user_ns, 0);
+       }
 
        rwlock_init(&sk->sk_callback_lock);
        lockdep_set_class_and_name(&sk->sk_callback_lock,
index e92b759d906c1bbcad5ff3ecc977d6393df90361..9a1a352fd1ebe598e4925bcda037dc0e4a2288bc 100644 (file)
@@ -129,7 +129,6 @@ int reuseport_add_sock(struct sock *sk, struct sock *sk2)
 
        return 0;
 }
-EXPORT_SYMBOL(reuseport_add_sock);
 
 static void reuseport_free_rcu(struct rcu_head *head)
 {
index 1086c8b280a868101df7b44691eccac40bd7b3e1..f575bcf64af2c32f684f178ea553338b00a9a051 100644 (file)
@@ -53,8 +53,8 @@ void sk_stream_write_space(struct sock *sk)
  */
 int sk_stream_wait_connect(struct sock *sk, long *timeo_p)
 {
+       DEFINE_WAIT_FUNC(wait, woken_wake_function);
        struct task_struct *tsk = current;
-       DEFINE_WAIT(wait);
        int done;
 
        do {
@@ -68,13 +68,13 @@ int sk_stream_wait_connect(struct sock *sk, long *timeo_p)
                if (signal_pending(tsk))
                        return sock_intr_errno(*timeo_p);
 
-               prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+               add_wait_queue(sk_sleep(sk), &wait);
                sk->sk_write_pending++;
                done = sk_wait_event(sk, timeo_p,
                                     !sk->sk_err &&
                                     !((1 << sk->sk_state) &
-                                      ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)));
-               finish_wait(sk_sleep(sk), &wait);
+                                      ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)), &wait);
+               remove_wait_queue(sk_sleep(sk), &wait);
                sk->sk_write_pending--;
        } while (!done);
        return 0;
@@ -94,16 +94,16 @@ static inline int sk_stream_closing(struct sock *sk)
 void sk_stream_wait_close(struct sock *sk, long timeout)
 {
        if (timeout) {
-               DEFINE_WAIT(wait);
+               DEFINE_WAIT_FUNC(wait, woken_wake_function);
+
+               add_wait_queue(sk_sleep(sk), &wait);
 
                do {
-                       prepare_to_wait(sk_sleep(sk), &wait,
-                                       TASK_INTERRUPTIBLE);
-                       if (sk_wait_event(sk, &timeout, !sk_stream_closing(sk)))
+                       if (sk_wait_event(sk, &timeout, !sk_stream_closing(sk), &wait))
                                break;
                } while (!signal_pending(current) && timeout);
 
-               finish_wait(sk_sleep(sk), &wait);
+               remove_wait_queue(sk_sleep(sk), &wait);
        }
 }
 EXPORT_SYMBOL(sk_stream_wait_close);
@@ -119,16 +119,16 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
        long vm_wait = 0;
        long current_timeo = *timeo_p;
        bool noblock = (*timeo_p ? false : true);
-       DEFINE_WAIT(wait);
+       DEFINE_WAIT_FUNC(wait, woken_wake_function);
 
        if (sk_stream_memory_free(sk))
                current_timeo = vm_wait = (prandom_u32() % (HZ / 5)) + 2;
 
+       add_wait_queue(sk_sleep(sk), &wait);
+
        while (1) {
                sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
 
-               prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
-
                if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
                        goto do_error;
                if (!*timeo_p) {
@@ -147,7 +147,7 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
                sk_wait_event(sk, &current_timeo, sk->sk_err ||
                                                  (sk->sk_shutdown & SEND_SHUTDOWN) ||
                                                  (sk_stream_memory_free(sk) &&
-                                                 !vm_wait));
+                                                 !vm_wait), &wait);
                sk->sk_write_pending--;
 
                if (vm_wait) {
@@ -161,7 +161,7 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
                *timeo_p = current_timeo;
        }
 out:
-       finish_wait(sk_sleep(sk), &wait);
+       remove_wait_queue(sk_sleep(sk), &wait);
        return err;
 
 do_error:
index 345a3aeb8c7e36449a765298cd6512eab8cfef4b..fda321d814d6c94e650fa76f5151c1686f459d66 100644 (file)
@@ -235,7 +235,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
 {
        const struct iphdr *iph = (struct iphdr *)skb->data;
        const u8 offset = iph->ihl << 2;
-       const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset);
+       const struct dccp_hdr *dh;
        struct dccp_sock *dp;
        struct inet_sock *inet;
        const int type = icmp_hdr(skb)->type;
@@ -245,11 +245,13 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
        int err;
        struct net *net = dev_net(skb->dev);
 
-       if (skb->len < offset + sizeof(*dh) ||
-           skb->len < offset + __dccp_basic_hdr_len(dh)) {
-               __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
-               return;
-       }
+       /* Only need dccph_dport & dccph_sport which are the first
+        * 4 bytes in dccp header.
+        * Our caller (icmp_socket_deliver()) already pulled 8 bytes for us.
+        */
+       BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_sport) > 8);
+       BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_dport) > 8);
+       dh = (struct dccp_hdr *)(skb->data + offset);
 
        sk = __inet_lookup_established(net, &dccp_hashinfo,
                                       iph->daddr, dh->dccph_dport,
@@ -588,13 +590,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
        if (inet_csk_reqsk_queue_is_full(sk))
                goto drop;
 
-       /*
-        * Accept backlog is full. If we have already queued enough
-        * of warm entries in syn queue, drop request. It is better than
-        * clogging syn queue with openreqs with exponentially increasing
-        * timeout.
-        */
-       if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
+       if (sk_acceptq_is_full(sk))
                goto drop;
 
        req = inet_reqsk_alloc(&dccp_request_sock_ops, sk, true);
@@ -868,7 +864,7 @@ lookup:
                goto discard_and_relse;
        nf_reset(skb);
 
-       return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4);
+       return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4, refcounted);
 
 no_dccp_socket:
        if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
index 3828f94b234c1104a3e745b3c0a76ab343aed4b6..adfc790f71935913801f90d3ab46b7c133af4ec3 100644 (file)
@@ -70,7 +70,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
                        u8 type, u8 code, int offset, __be32 info)
 {
        const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
-       const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset);
+       const struct dccp_hdr *dh;
        struct dccp_sock *dp;
        struct ipv6_pinfo *np;
        struct sock *sk;
@@ -78,12 +78,13 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
        __u64 seq;
        struct net *net = dev_net(skb->dev);
 
-       if (skb->len < offset + sizeof(*dh) ||
-           skb->len < offset + __dccp_basic_hdr_len(dh)) {
-               __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
-                                 ICMP6_MIB_INERRORS);
-               return;
-       }
+       /* Only need dccph_dport & dccph_sport which are the first
+        * 4 bytes in dccp header.
+        * Our caller (icmpv6_notify()) already pulled 8 bytes for us.
+        */
+       BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_sport) > 8);
+       BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_dport) > 8);
+       dh = (struct dccp_hdr *)(skb->data + offset);
 
        sk = __inet6_lookup_established(net, &dccp_hashinfo,
                                        &hdr->daddr, dh->dccph_dport,
@@ -325,7 +326,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
        if (inet_csk_reqsk_queue_is_full(sk))
                goto drop;
 
-       if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
+       if (sk_acceptq_is_full(sk))
                goto drop;
 
        req = inet_reqsk_alloc(&dccp6_request_sock_ops, sk, true);
@@ -738,7 +739,8 @@ lookup:
        if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
                goto discard_and_relse;
 
-       return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4) ? -1 : 0;
+       return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4,
+                               refcounted) ? -1 : 0;
 
 no_dccp_socket:
        if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
@@ -956,6 +958,7 @@ static const struct inet_connection_sock_af_ops dccp_ipv6_mapped = {
        .getsockopt        = ipv6_getsockopt,
        .addr2sockaddr     = inet6_csk_addr2sockaddr,
        .sockaddr_len      = sizeof(struct sockaddr_in6),
+       .bind_conflict     = inet6_csk_bind_conflict,
 #ifdef CONFIG_COMPAT
        .compat_setsockopt = compat_ipv6_setsockopt,
        .compat_getsockopt = compat_ipv6_getsockopt,
index 41e65804ddf59651c78ae58b697e7e5e603c9167..9fe25bf6329691ecf0acdc35df7278b074d446c1 100644 (file)
@@ -1009,6 +1009,10 @@ void dccp_close(struct sock *sk, long timeout)
                __kfree_skb(skb);
        }
 
+       /* If socket has been already reset kill it. */
+       if (sk->sk_state == DCCP_CLOSED)
+               goto adjudge_to_death;
+
        if (data_was_unread) {
                /* Unread data was tossed, send an appropriate Reset Code */
                DCCP_WARN("ABORT with %u bytes unread\n", data_was_unread);
index 13d6b1a6e0fc2b0730827d93d154d6464a3e58ec..a90ed67027b0cfa6b8ba8a25fc72b1ccd9f2886b 100644 (file)
@@ -1718,7 +1718,7 @@ static int dn_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
         * See if there is data ready to read, sleep if there isn't
         */
        for(;;) {
-               DEFINE_WAIT(wait);
+               DEFINE_WAIT_FUNC(wait, woken_wake_function);
 
                if (sk->sk_err)
                        goto out;
@@ -1749,11 +1749,11 @@ static int dn_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
                        goto out;
                }
 
-               prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+               add_wait_queue(sk_sleep(sk), &wait);
                sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
-               sk_wait_event(sk, &timeo, dn_data_ready(sk, queue, flags, target));
+               sk_wait_event(sk, &timeo, dn_data_ready(sk, queue, flags, target), &wait);
                sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
-               finish_wait(sk_sleep(sk), &wait);
+               remove_wait_queue(sk_sleep(sk), &wait);
        }
 
        skb_queue_walk_safe(queue, skb, n) {
@@ -1999,19 +1999,19 @@ static int dn_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
                 * size.
                 */
                if (dn_queue_too_long(scp, queue, flags)) {
-                       DEFINE_WAIT(wait);
+                       DEFINE_WAIT_FUNC(wait, woken_wake_function);
 
                        if (flags & MSG_DONTWAIT) {
                                err = -EWOULDBLOCK;
                                goto out;
                        }
 
-                       prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+                       add_wait_queue(sk_sleep(sk), &wait);
                        sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
                        sk_wait_event(sk, &timeo,
-                                     !dn_queue_too_long(scp, queue, flags));
+                                     !dn_queue_too_long(scp, queue, flags), &wait);
                        sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
-                       finish_wait(sk_sleep(sk), &wait);
+                       remove_wait_queue(sk_sleep(sk), &wait);
                        continue;
                }
 
index f983c102ebe37275bd4d7d371503b3c4f0871104..8c5a479681ca9ed8c2686208fc570e819a4db31a 100644 (file)
@@ -62,6 +62,7 @@
 #include <net/dsa.h>
 #include <net/flow_dissector.h>
 #include <linux/uaccess.h>
+#include <net/pkt_sched.h>
 
 __setup("ether=", netdev_boot_setup);
 
@@ -359,7 +360,7 @@ void ether_setup(struct net_device *dev)
        dev->min_mtu            = ETH_MIN_MTU;
        dev->max_mtu            = ETH_DATA_LEN;
        dev->addr_len           = ETH_ALEN;
-       dev->tx_queue_len       = 1000; /* Ethernet wants good queues */
+       dev->tx_queue_len       = DEFAULT_TX_QUEUE_LEN;
        dev->flags              = IFF_BROADCAST|IFF_MULTICAST;
        dev->priv_flags         |= IFF_TX_SKB_SHARING;
 
@@ -440,7 +441,7 @@ struct sk_buff **eth_gro_receive(struct sk_buff **head,
 
        skb_gro_pull(skb, sizeof(*eh));
        skb_gro_postpull_rcsum(skb, eh, sizeof(*eh));
-       pp = ptype->callbacks.gro_receive(head, skb);
+       pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb);
 
 out_unlock:
        rcu_read_unlock();
index 5ee1d43f13100849c6ac28a75082d65f69350d72..4ebe2aa3e7d3e944295e9d53890e3cb9b7a90139 100644 (file)
@@ -300,10 +300,6 @@ static void hsr_forward_do(struct hsr_frame_info *frame)
 static void check_local_dest(struct hsr_priv *hsr, struct sk_buff *skb,
                             struct hsr_frame_info *frame)
 {
-       struct net_device *master_dev;
-
-       master_dev = hsr_port_get_hsr(hsr, HSR_PT_MASTER)->dev;
-
        if (hsr_addr_is_self(hsr, eth_hdr(skb)->h_dest)) {
                frame->is_local_exclusive = true;
                skb->pkt_type = PACKET_HOST;
index d4d1617f43a8bfb842a8017dc51655ae1065942b..1ab30e7d3f99e19c5e54fd9747cfce7a5c1f559b 100644 (file)
@@ -131,13 +131,7 @@ static const struct nla_policy hsr_genl_policy[HSR_A_MAX + 1] = {
        [HSR_A_IF2_SEQ] = { .type = NLA_U16 },
 };
 
-static struct genl_family hsr_genl_family = {
-       .id = GENL_ID_GENERATE,
-       .hdrsize = 0,
-       .name = "HSR",
-       .version = 1,
-       .maxattr = HSR_A_MAX,
-};
+static struct genl_family hsr_genl_family;
 
 static const struct genl_multicast_group hsr_mcgrps[] = {
        { .name = "hsr-network", },
@@ -467,6 +461,18 @@ static const struct genl_ops hsr_ops[] = {
        },
 };
 
+static struct genl_family hsr_genl_family __ro_after_init = {
+       .hdrsize = 0,
+       .name = "HSR",
+       .version = 1,
+       .maxattr = HSR_A_MAX,
+       .module = THIS_MODULE,
+       .ops = hsr_ops,
+       .n_ops = ARRAY_SIZE(hsr_ops),
+       .mcgrps = hsr_mcgrps,
+       .n_mcgrps = ARRAY_SIZE(hsr_mcgrps),
+};
+
 int __init hsr_netlink_init(void)
 {
        int rc;
@@ -475,8 +481,7 @@ int __init hsr_netlink_init(void)
        if (rc)
                goto fail_rtnl_link_register;
 
-       rc = genl_register_family_with_ops_groups(&hsr_genl_family, hsr_ops,
-                                                 hsr_mcgrps);
+       rc = genl_register_family(&hsr_genl_family);
        if (rc)
                goto fail_genl_register_family;
 
index c8133c07ceee4ce29411a5f9ea47c0c529231223..6bde9e5a55031c9a184c418588a78790405b1b83 100644 (file)
 static unsigned int ieee802154_seq_num;
 static DEFINE_SPINLOCK(ieee802154_seq_lock);
 
-struct genl_family nl802154_family = {
-       .id             = GENL_ID_GENERATE,
-       .hdrsize        = 0,
-       .name           = IEEE802154_NL_NAME,
-       .version        = 1,
-       .maxattr        = IEEE802154_ATTR_MAX,
-};
-
 /* Requests to userspace */
 struct sk_buff *ieee802154_nl_create(int flags, u8 req)
 {
@@ -139,11 +131,21 @@ static const struct genl_multicast_group ieee802154_mcgrps[] = {
        [IEEE802154_BEACON_MCGRP] = { .name = IEEE802154_MCAST_BEACON_NAME, },
 };
 
+struct genl_family nl802154_family __ro_after_init = {
+       .hdrsize        = 0,
+       .name           = IEEE802154_NL_NAME,
+       .version        = 1,
+       .maxattr        = IEEE802154_ATTR_MAX,
+       .module         = THIS_MODULE,
+       .ops            = ieee8021154_ops,
+       .n_ops          = ARRAY_SIZE(ieee8021154_ops),
+       .mcgrps         = ieee802154_mcgrps,
+       .n_mcgrps       = ARRAY_SIZE(ieee802154_mcgrps),
+};
+
 int __init ieee802154_nl_init(void)
 {
-       return genl_register_family_with_ops_groups(&nl802154_family,
-                                                   ieee8021154_ops,
-                                                   ieee802154_mcgrps);
+       return genl_register_family(&nl802154_family);
 }
 
 void ieee802154_nl_exit(void)
index d90a4ed5b8a037e1dacef4cac0c44b158efabbb8..fc60cd061f3966a2381803a8e4d85206770253dc 100644 (file)
 #include "rdev-ops.h"
 #include "core.h"
 
-static int nl802154_pre_doit(const struct genl_ops *ops, struct sk_buff *skb,
-                            struct genl_info *info);
-
-static void nl802154_post_doit(const struct genl_ops *ops, struct sk_buff *skb,
-                              struct genl_info *info);
-
 /* the netlink family */
-static struct genl_family nl802154_fam = {
-       .id = GENL_ID_GENERATE,         /* don't bother with a hardcoded ID */
-       .name = NL802154_GENL_NAME,     /* have users key off the name instead */
-       .hdrsize = 0,                   /* no private header */
-       .version = 1,                   /* no particular meaning now */
-       .maxattr = NL802154_ATTR_MAX,
-       .netnsok = true,
-       .pre_doit = nl802154_pre_doit,
-       .post_doit = nl802154_post_doit,
-};
+static struct genl_family nl802154_fam;
 
 /* multicast groups */
 enum nl802154_multicast_groups {
@@ -263,13 +248,14 @@ nl802154_prepare_wpan_dev_dump(struct sk_buff *skb,
 
        if (!cb->args[0]) {
                err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl802154_fam.hdrsize,
-                                 nl802154_fam.attrbuf, nl802154_fam.maxattr,
+                                 genl_family_attrbuf(&nl802154_fam),
+                                 nl802154_fam.maxattr,
                                  nl802154_policy);
                if (err)
                        goto out_unlock;
 
                *wpan_dev = __cfg802154_wpan_dev_from_attrs(sock_net(skb->sk),
-                                                           nl802154_fam.attrbuf);
+                                                           genl_family_attrbuf(&nl802154_fam));
                if (IS_ERR(*wpan_dev)) {
                        err = PTR_ERR(*wpan_dev);
                        goto out_unlock;
@@ -575,7 +561,7 @@ static int nl802154_dump_wpan_phy_parse(struct sk_buff *skb,
                                        struct netlink_callback *cb,
                                        struct nl802154_dump_wpan_phy_state *state)
 {
-       struct nlattr **tb = nl802154_fam.attrbuf;
+       struct nlattr **tb = genl_family_attrbuf(&nl802154_fam);
        int ret = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl802154_fam.hdrsize,
                              tb, nl802154_fam.maxattr, nl802154_policy);
 
@@ -2476,11 +2462,25 @@ static const struct genl_ops nl802154_ops[] = {
 #endif /* CONFIG_IEEE802154_NL802154_EXPERIMENTAL */
 };
 
+static struct genl_family nl802154_fam __ro_after_init = {
+       .name = NL802154_GENL_NAME,     /* have users key off the name instead */
+       .hdrsize = 0,                   /* no private header */
+       .version = 1,                   /* no particular meaning now */
+       .maxattr = NL802154_ATTR_MAX,
+       .netnsok = true,
+       .pre_doit = nl802154_pre_doit,
+       .post_doit = nl802154_post_doit,
+       .module = THIS_MODULE,
+       .ops = nl802154_ops,
+       .n_ops = ARRAY_SIZE(nl802154_ops),
+       .mcgrps = nl802154_mcgrps,
+       .n_mcgrps = ARRAY_SIZE(nl802154_mcgrps),
+};
+
 /* initialisation/exit functions */
-int nl802154_init(void)
+int __init nl802154_init(void)
 {
-       return genl_register_family_with_ops_groups(&nl802154_fam, nl802154_ops,
-                                                   nl802154_mcgrps);
+       return genl_register_family(&nl802154_fam);
 }
 
 void nl802154_exit(void)
index 1effc986739e5d068c7ee04f614ec3f0845c408e..24d2550492ee120d1a247563d480831a5f6dfe66 100644 (file)
@@ -374,8 +374,18 @@ lookup_protocol:
 
        if (sk->sk_prot->init) {
                err = sk->sk_prot->init(sk);
-               if (err)
+               if (err) {
+                       sk_common_release(sk);
+                       goto out;
+               }
+       }
+
+       if (!kern) {
+               err = BPF_CGROUP_RUN_PROG_INET_SOCK(sk);
+               if (err) {
                        sk_common_release(sk);
+                       goto out;
+               }
        }
 out:
        return err;
@@ -533,9 +543,9 @@ EXPORT_SYMBOL(inet_dgram_connect);
 
 static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias)
 {
-       DEFINE_WAIT(wait);
+       DEFINE_WAIT_FUNC(wait, woken_wake_function);
 
-       prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+       add_wait_queue(sk_sleep(sk), &wait);
        sk->sk_write_pending += writebias;
 
        /* Basic assumption: if someone sets sk->sk_err, he _must_
@@ -545,13 +555,12 @@ static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias)
         */
        while ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
                release_sock(sk);
-               timeo = schedule_timeout(timeo);
+               timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, timeo);
                lock_sock(sk);
                if (signal_pending(current) || !timeo)
                        break;
-               prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
        }
-       finish_wait(sk_sleep(sk), &wait);
+       remove_wait_queue(sk_sleep(sk), &wait);
        sk->sk_write_pending -= writebias;
        return timeo;
 }
@@ -1391,7 +1400,7 @@ struct sk_buff **inet_gro_receive(struct sk_buff **head, struct sk_buff *skb)
        skb_gro_pull(skb, sizeof(*iph));
        skb_set_transport_header(skb, skb_gro_offset(skb));
 
-       pp = ops->callbacks.gro_receive(head, skb);
+       pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
 
 out_unlock:
        rcu_read_unlock();
index c3b80478226ed4aab6ed8ce1e75ea34c9a921385..121384bbb40ba24163c5aca2b84c8bd6bfb38f93 100644 (file)
@@ -151,7 +151,7 @@ static void fib_replace_table(struct net *net, struct fib_table *old,
 
 int fib_unmerge(struct net *net)
 {
-       struct fib_table *old, *new;
+       struct fib_table *old, *new, *main_table;
 
        /* attempt to fetch local table if it has been allocated */
        old = fib_get_table(net, RT_TABLE_LOCAL);
@@ -162,11 +162,21 @@ int fib_unmerge(struct net *net)
        if (!new)
                return -ENOMEM;
 
+       /* table is already unmerged */
+       if (new == old)
+               return 0;
+
        /* replace merged table with clean table */
-       if (new != old) {
-               fib_replace_table(net, old, new);
-               fib_free_table(old);
-       }
+       fib_replace_table(net, old, new);
+       fib_free_table(old);
+
+       /* attempt to fetch main table if it has been allocated */
+       main_table = fib_get_table(net, RT_TABLE_MAIN);
+       if (!main_table)
+               return 0;
+
+       /* flush local entries from main table */
+       fib_table_flush_external(main_table);
 
        return 0;
 }
@@ -610,6 +620,7 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = {
        [RTA_FLOW]              = { .type = NLA_U32 },
        [RTA_ENCAP_TYPE]        = { .type = NLA_U16 },
        [RTA_ENCAP]             = { .type = NLA_NESTED },
+       [RTA_UID]               = { .type = NLA_U32 },
 };
 
 static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
index 31cef3602585b50e463b017cff2837f7bdd0dfb0..026f309c51e9b6143e74efe2d31c8c5e7a7a3c26 100644 (file)
@@ -1743,8 +1743,10 @@ struct fib_table *fib_trie_unmerge(struct fib_table *oldtb)
                                local_l = fib_find_node(lt, &local_tp, l->key);
 
                        if (fib_insert_alias(lt, local_tp, local_l, new_fa,
-                                            NULL, l->key))
+                                            NULL, l->key)) {
+                               kmem_cache_free(fn_alias_kmem, new_fa);
                                goto out;
+                       }
                }
 
                /* stop loop if key wrapped back to 0 */
@@ -1760,6 +1762,71 @@ out:
        return NULL;
 }
 
+/* Caller must hold RTNL */
+void fib_table_flush_external(struct fib_table *tb)
+{
+       struct trie *t = (struct trie *)tb->tb_data;
+       struct key_vector *pn = t->kv;
+       unsigned long cindex = 1;
+       struct hlist_node *tmp;
+       struct fib_alias *fa;
+
+       /* walk trie in reverse order */
+       for (;;) {
+               unsigned char slen = 0;
+               struct key_vector *n;
+
+               if (!(cindex--)) {
+                       t_key pkey = pn->key;
+
+                       /* cannot resize the trie vector */
+                       if (IS_TRIE(pn))
+                               break;
+
+                       /* resize completed node */
+                       pn = resize(t, pn);
+                       cindex = get_index(pkey, pn);
+
+                       continue;
+               }
+
+               /* grab the next available node */
+               n = get_child(pn, cindex);
+               if (!n)
+                       continue;
+
+               if (IS_TNODE(n)) {
+                       /* record pn and cindex for leaf walking */
+                       pn = n;
+                       cindex = 1ul << n->bits;
+
+                       continue;
+               }
+
+               hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) {
+                       /* if alias was cloned to local then we just
+                        * need to remove the local copy from main
+                        */
+                       if (tb->tb_id != fa->tb_id) {
+                               hlist_del_rcu(&fa->fa_list);
+                               alias_free_mem_rcu(fa);
+                               continue;
+                       }
+
+                       /* record local slen */
+                       slen = fa->fa_slen;
+               }
+
+               /* update leaf slen */
+               n->slen = slen;
+
+               if (hlist_empty(&n->leaf)) {
+                       put_child_root(pn, n->key, NULL);
+                       node_free(n);
+               }
+       }
+}
+
 /* Caller must hold RTNL. */
 int fib_table_flush(struct net *net, struct fib_table *tb)
 {
@@ -2413,22 +2480,19 @@ static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter,
        struct key_vector *l, **tp = &iter->tnode;
        t_key key;
 
-       /* use cache location of next-to-find key */
+       /* use cached location of previously found key */
        if (iter->pos > 0 && pos >= iter->pos) {
-               pos -= iter->pos;
                key = iter->key;
        } else {
-               iter->pos = 0;
+               iter->pos = 1;
                key = 0;
        }
 
-       while ((l = leaf_walk_rcu(tp, key)) != NULL) {
+       pos -= iter->pos;
+
+       while ((l = leaf_walk_rcu(tp, key)) && (pos-- > 0)) {
                key = l->key + 1;
                iter->pos++;
-
-               if (--pos <= 0)
-                       break;
-
                l = NULL;
 
                /* handle unlikely case of a key wrap */
@@ -2437,7 +2501,7 @@ static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter,
        }
 
        if (l)
-               iter->key = key;        /* remember it */
+               iter->key = l->key;     /* remember it */
        else
                iter->pos = 0;          /* forget it */
 
@@ -2465,7 +2529,7 @@ static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos)
                return fib_route_get_idx(iter, *pos);
 
        iter->pos = 0;
-       iter->key = 0;
+       iter->key = KEY_MAX;
 
        return SEQ_START_TOKEN;
 }
@@ -2474,7 +2538,7 @@ static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
        struct fib_route_iter *iter = seq->private;
        struct key_vector *l = NULL;
-       t_key key = iter->key;
+       t_key key = iter->key + 1;
 
        ++*pos;
 
@@ -2483,7 +2547,7 @@ static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
                l = leaf_walk_rcu(&iter->tnode, key);
 
        if (l) {
-               iter->key = l->key + 1;
+               iter->key = l->key;
                iter->pos++;
        } else {
                iter->pos = 0;
index cf50f7e2b0124d3bfa6ad2caae65cf1cf590ad44..805f6607f8d9a89661bed62db6b370b964d43f1d 100644 (file)
@@ -249,7 +249,7 @@ static struct sk_buff **fou_gro_receive(struct sock *sk,
        if (!ops || !ops->callbacks.gro_receive)
                goto out_unlock;
 
-       pp = ops->callbacks.gro_receive(head, skb);
+       pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
 
 out_unlock:
        rcu_read_unlock();
@@ -441,7 +441,7 @@ next_proto:
        if (WARN_ON_ONCE(!ops || !ops->callbacks.gro_receive))
                goto out_unlock;
 
-       pp = ops->callbacks.gro_receive(head, skb);
+       pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
        flush = 0;
 
 out_unlock:
@@ -622,14 +622,7 @@ static int fou_destroy(struct net *net, struct fou_cfg *cfg)
        return err;
 }
 
-static struct genl_family fou_nl_family = {
-       .id             = GENL_ID_GENERATE,
-       .hdrsize        = 0,
-       .name           = FOU_GENL_NAME,
-       .version        = FOU_GENL_VERSION,
-       .maxattr        = FOU_ATTR_MAX,
-       .netnsok        = true,
-};
+static struct genl_family fou_nl_family;
 
 static const struct nla_policy fou_nl_policy[FOU_ATTR_MAX + 1] = {
        [FOU_ATTR_PORT] = { .type = NLA_U16, },
@@ -831,6 +824,17 @@ static const struct genl_ops fou_nl_ops[] = {
        },
 };
 
+static struct genl_family fou_nl_family __ro_after_init = {
+       .hdrsize        = 0,
+       .name           = FOU_GENL_NAME,
+       .version        = FOU_GENL_VERSION,
+       .maxattr        = FOU_ATTR_MAX,
+       .netnsok        = true,
+       .module         = THIS_MODULE,
+       .ops            = fou_nl_ops,
+       .n_ops          = ARRAY_SIZE(fou_nl_ops),
+};
+
 size_t fou_encap_hlen(struct ip_tunnel_encap *e)
 {
        return sizeof(struct udphdr);
@@ -1086,8 +1090,7 @@ static int __init fou_init(void)
        if (ret)
                goto exit;
 
-       ret = genl_register_family_with_ops(&fou_nl_family,
-                                           fou_nl_ops);
+       ret = genl_register_family(&fou_nl_family);
        if (ret < 0)
                goto unregister;
 
index 96e0efecefa6aa2f4bc97c098c08ee6c25f2e11c..d5cac99170b194151b16f614508f7fa0933ff2e1 100644 (file)
@@ -229,7 +229,7 @@ static struct sk_buff **gre_gro_receive(struct sk_buff **head,
        /* Adjusted NAPI_GRO_CB(skb)->csum after skb_gro_pull()*/
        skb_gro_postpull_rcsum(skb, greh, grehlen);
 
-       pp = ptype->callbacks.gro_receive(head, skb);
+       pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb);
        flush = 0;
 
 out_unlock:
index 38abe70e595fabf472aa8fe094e71d070f781164..691146abde2df65345165516b4aad819d51968f2 100644 (file)
@@ -425,6 +425,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
        fl4.daddr = daddr;
        fl4.saddr = saddr;
        fl4.flowi4_mark = mark;
+       fl4.flowi4_uid = sock_net_uid(net, NULL);
        fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos);
        fl4.flowi4_proto = IPPROTO_ICMP;
        fl4.flowi4_oif = l3mdev_master_ifindex(skb->dev);
@@ -473,11 +474,12 @@ static struct rtable *icmp_route_lookup(struct net *net,
                      param->replyopts.opt.opt.faddr : iph->saddr);
        fl4->saddr = saddr;
        fl4->flowi4_mark = mark;
+       fl4->flowi4_uid = sock_net_uid(net, NULL);
        fl4->flowi4_tos = RT_TOS(tos);
        fl4->flowi4_proto = IPPROTO_ICMP;
        fl4->fl4_icmp_type = type;
        fl4->fl4_icmp_code = code;
-       fl4->flowi4_oif = l3mdev_master_ifindex(skb_in->dev);
+       fl4->flowi4_oif = l3mdev_master_ifindex(skb_dst(skb_in)->dev);
 
        security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4));
        rt = __ip_route_output_key_hash(net, fl4,
@@ -502,7 +504,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
        if (err)
                goto relookup_failed;
 
-       if (inet_addr_type_dev_table(net, skb_in->dev,
+       if (inet_addr_type_dev_table(net, skb_dst(skb_in)->dev,
                                     fl4_dec.saddr) == RTN_LOCAL) {
                rt2 = __ip_route_output_key(net, &fl4_dec);
                if (IS_ERR(rt2))
index 606cc3e85d2bc7b1fc02e5c2ea305a5bd65349c5..15db786d50ed28c7d31855a9582ab111752b3641 100644 (file)
@@ -162,7 +162,7 @@ static int unsolicited_report_interval(struct in_device *in_dev)
 }
 
 static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im);
-static void igmpv3_del_delrec(struct in_device *in_dev, __be32 multiaddr);
+static void igmpv3_del_delrec(struct in_device *in_dev, struct ip_mc_list *im);
 static void igmpv3_clear_delrec(struct in_device *in_dev);
 static int sf_setstate(struct ip_mc_list *pmc);
 static void sf_markstate(struct ip_mc_list *pmc);
@@ -1130,10 +1130,15 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im)
        spin_unlock_bh(&in_dev->mc_tomb_lock);
 }
 
-static void igmpv3_del_delrec(struct in_device *in_dev, __be32 multiaddr)
+/*
+ * restore ip_mc_list deleted records
+ */
+static void igmpv3_del_delrec(struct in_device *in_dev, struct ip_mc_list *im)
 {
        struct ip_mc_list *pmc, *pmc_prev;
-       struct ip_sf_list *psf, *psf_next;
+       struct ip_sf_list *psf;
+       struct net *net = dev_net(in_dev->dev);
+       __be32 multiaddr = im->multiaddr;
 
        spin_lock_bh(&in_dev->mc_tomb_lock);
        pmc_prev = NULL;
@@ -1149,16 +1154,26 @@ static void igmpv3_del_delrec(struct in_device *in_dev, __be32 multiaddr)
                        in_dev->mc_tomb = pmc->next;
        }
        spin_unlock_bh(&in_dev->mc_tomb_lock);
+
+       spin_lock_bh(&im->lock);
        if (pmc) {
-               for (psf = pmc->tomb; psf; psf = psf_next) {
-                       psf_next = psf->sf_next;
-                       kfree(psf);
+               im->interface = pmc->interface;
+               im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+               im->sfmode = pmc->sfmode;
+               if (pmc->sfmode == MCAST_INCLUDE) {
+                       im->tomb = pmc->tomb;
+                       im->sources = pmc->sources;
+                       for (psf = im->sources; psf; psf = psf->sf_next)
+                               psf->sf_crcount = im->crcount;
                }
                in_dev_put(pmc->interface);
-               kfree(pmc);
        }
+       spin_unlock_bh(&im->lock);
 }
 
+/*
+ * flush ip_mc_list deleted records
+ */
 static void igmpv3_clear_delrec(struct in_device *in_dev)
 {
        struct ip_mc_list *pmc, *nextpmc;
@@ -1366,7 +1381,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
        ip_mc_hash_add(in_dev, im);
 
 #ifdef CONFIG_IP_MULTICAST
-       igmpv3_del_delrec(in_dev, im->multiaddr);
+       igmpv3_del_delrec(in_dev, im);
 #endif
        igmp_group_added(im);
        if (!in_dev->dead)
@@ -1626,8 +1641,12 @@ void ip_mc_remap(struct in_device *in_dev)
 
        ASSERT_RTNL();
 
-       for_each_pmc_rtnl(in_dev, pmc)
+       for_each_pmc_rtnl(in_dev, pmc) {
+#ifdef CONFIG_IP_MULTICAST
+               igmpv3_del_delrec(in_dev, pmc);
+#endif
                igmp_group_added(pmc);
+       }
 }
 
 /* Device going down */
@@ -1648,7 +1667,6 @@ void ip_mc_down(struct in_device *in_dev)
        in_dev->mr_gq_running = 0;
        if (del_timer(&in_dev->mr_gq_timer))
                __in_dev_put(in_dev);
-       igmpv3_clear_delrec(in_dev);
 #endif
 
        ip_mc_dec_group(in_dev, IGMP_ALL_HOSTS);
@@ -1688,8 +1706,12 @@ void ip_mc_up(struct in_device *in_dev)
 #endif
        ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS);
 
-       for_each_pmc_rtnl(in_dev, pmc)
+       for_each_pmc_rtnl(in_dev, pmc) {
+#ifdef CONFIG_IP_MULTICAST
+               igmpv3_del_delrec(in_dev, pmc);
+#endif
                igmp_group_added(pmc);
+       }
 }
 
 /*
@@ -1704,13 +1726,13 @@ void ip_mc_destroy_dev(struct in_device *in_dev)
 
        /* Deactivate timers */
        ip_mc_down(in_dev);
+#ifdef CONFIG_IP_MULTICAST
+       igmpv3_clear_delrec(in_dev);
+#endif
 
        while ((i = rtnl_dereference(in_dev->mc_list)) != NULL) {
                in_dev->mc_list = i->next_rcu;
                in_dev->mc_count--;
-
-               /* We've dropped the groups in ip_mc_down already */
-               ip_mc_clear_src(i);
                ip_ma_put(i);
        }
 }
index 61a9deec299332aa431c13ad15197974306ff117..d5d3ead0a6c31e42e8843d30f8c643324a91b8e9 100644 (file)
@@ -415,7 +415,7 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk,
                           sk->sk_protocol, inet_sk_flowi_flags(sk),
                           (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr,
                           ireq->ir_loc_addr, ireq->ir_rmt_port,
-                          htons(ireq->ir_num));
+                          htons(ireq->ir_num), sk->sk_uid);
        security_req_classify_flow(req, flowi4_to_flowi(fl4));
        rt = ip_route_output_flow(net, fl4, sk);
        if (IS_ERR(rt))
@@ -452,7 +452,7 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk,
                           sk->sk_protocol, inet_sk_flowi_flags(sk),
                           (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr,
                           ireq->ir_loc_addr, ireq->ir_rmt_port,
-                          htons(ireq->ir_num));
+                          htons(ireq->ir_num), sk->sk_uid);
        security_req_classify_flow(req, flowi4_to_flowi(fl4));
        rt = ip_route_output_flow(net, fl4, sk);
        if (IS_ERR(rt))
index 3b34024202d8144d823abe1bce5cb2d34b24e9de..4dea33e5f29572e09c29621ee8eadc4e60a9a9a2 100644 (file)
@@ -861,10 +861,11 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb,
                         struct netlink_callback *cb,
                         const struct inet_diag_req_v2 *r, struct nlattr *bc)
 {
+       bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN);
        struct net *net = sock_net(skb->sk);
-       int i, num, s_i, s_num;
        u32 idiag_states = r->idiag_states;
-       bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN);
+       int i, num, s_i, s_num;
+       struct sock *sk;
 
        if (idiag_states & TCPF_SYN_RECV)
                idiag_states |= TCPF_NEW_SYN_RECV;
@@ -877,7 +878,6 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb,
 
                for (i = s_i; i < INET_LHTABLE_SIZE; i++) {
                        struct inet_listen_hashbucket *ilb;
-                       struct sock *sk;
 
                        num = 0;
                        ilb = &hashinfo->listening_hash[i];
@@ -922,13 +922,14 @@ skip_listen_ht:
        if (!(idiag_states & ~TCPF_LISTEN))
                goto out;
 
+#define SKARR_SZ 16
        for (i = s_i; i <= hashinfo->ehash_mask; i++) {
                struct inet_ehash_bucket *head = &hashinfo->ehash[i];
                spinlock_t *lock = inet_ehash_lockp(hashinfo, i);
                struct hlist_nulls_node *node;
-               struct sock *sk;
-
-               num = 0;
+               struct sock *sk_arr[SKARR_SZ];
+               int num_arr[SKARR_SZ];
+               int idx, accum, res;
 
                if (hlist_nulls_empty(&head->chain))
                        continue;
@@ -936,9 +937,12 @@ skip_listen_ht:
                if (i > s_i)
                        s_num = 0;
 
+next_chunk:
+               num = 0;
+               accum = 0;
                spin_lock_bh(lock);
                sk_nulls_for_each(sk, node, &head->chain) {
-                       int state, res;
+                       int state;
 
                        if (!net_eq(sock_net(sk), net))
                                continue;
@@ -962,21 +966,35 @@ skip_listen_ht:
                        if (!inet_diag_bc_sk(bc, sk))
                                goto next_normal;
 
-                       res = sk_diag_fill(sk, skb, r,
+                       sock_hold(sk);
+                       num_arr[accum] = num;
+                       sk_arr[accum] = sk;
+                       if (++accum == SKARR_SZ)
+                               break;
+next_normal:
+                       ++num;
+               }
+               spin_unlock_bh(lock);
+               res = 0;
+               for (idx = 0; idx < accum; idx++) {
+                       if (res >= 0) {
+                               res = sk_diag_fill(sk_arr[idx], skb, r,
                                           sk_user_ns(NETLINK_CB(cb->skb).sk),
                                           NETLINK_CB(cb->skb).portid,
                                           cb->nlh->nlmsg_seq, NLM_F_MULTI,
                                           cb->nlh, net_admin);
-                       if (res < 0) {
-                               spin_unlock_bh(lock);
-                               goto done;
+                               if (res < 0)
+                                       num = num_arr[idx];
                        }
-next_normal:
-                       ++num;
+                       sock_gen_put(sk_arr[idx]);
                }
-
-               spin_unlock_bh(lock);
+               if (res < 0)
+                       break;
                cond_resched();
+               if (accum == SKARR_SZ) {
+                       s_num = num + 1;
+                       goto next_chunk;
+               }
        }
 
 done:
index 77c20a489218c9cf1865f397b83f43bc58457dc6..ca97835bfec4b2291446a54d7f6bb1af408afc29 100644 (file)
@@ -25,6 +25,7 @@
 #include <net/inet_hashtables.h>
 #include <net/secure_seq.h>
 #include <net/ip.h>
+#include <net/tcp.h>
 #include <net/sock_reuseport.h>
 
 static u32 inet_ehashfn(const struct net *net, const __be32 laddr,
@@ -172,7 +173,7 @@ EXPORT_SYMBOL_GPL(__inet_inherit_port);
 
 static inline int compute_score(struct sock *sk, struct net *net,
                                const unsigned short hnum, const __be32 daddr,
-                               const int dif)
+                               const int dif, bool exact_dif)
 {
        int score = -1;
        struct inet_sock *inet = inet_sk(sk);
@@ -186,7 +187,7 @@ static inline int compute_score(struct sock *sk, struct net *net,
                                return -1;
                        score += 4;
                }
-               if (sk->sk_bound_dev_if) {
+               if (sk->sk_bound_dev_if || exact_dif) {
                        if (sk->sk_bound_dev_if != dif)
                                return -1;
                        score += 4;
@@ -215,11 +216,12 @@ struct sock *__inet_lookup_listener(struct net *net,
        unsigned int hash = inet_lhashfn(net, hnum);
        struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
        int score, hiscore = 0, matches = 0, reuseport = 0;
+       bool exact_dif = inet_exact_dif_match(net, skb);
        struct sock *sk, *result = NULL;
        u32 phash = 0;
 
        sk_for_each_rcu(sk, &ilb->head) {
-               score = compute_score(sk, net, hnum, daddr, dif);
+               score = compute_score(sk, net, hnum, daddr, dif, exact_dif);
                if (score > hiscore) {
                        reuseport = sk->sk_reuseport;
                        if (reuseport) {
index 8b4ffd2168395b4d3b6ec67e166af13c122128ca..9f0a7b96646f368021d9cd51bc3f728ba49eed0d 100644 (file)
@@ -117,7 +117,7 @@ int ip_forward(struct sk_buff *skb)
        if (opt->is_strictroute && rt->rt_uses_gateway)
                goto sr_failed;
 
-       IPCB(skb)->flags |= IPSKB_FORWARDED | IPSKB_FRAG_SEGS;
+       IPCB(skb)->flags |= IPSKB_FORWARDED;
        mtu = ip_dst_mtu_maybe_forward(&rt->dst, true);
        if (ip_exceeds_mtu(skb, mtu)) {
                IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS);
index 576f705d81809fccc61833f7445873620af6b4eb..78fd620483353293220c971d3c8f11e9fe4a8c28 100644 (file)
@@ -113,8 +113,8 @@ MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
 static int ipgre_tunnel_init(struct net_device *dev);
 
-static int ipgre_net_id __read_mostly;
-static int gre_tap_net_id __read_mostly;
+static unsigned int ipgre_net_id __read_mostly;
+static unsigned int gre_tap_net_id __read_mostly;
 
 static void ipgre_err(struct sk_buff *skb, u32 info,
                      const struct tnl_ptk_info *tpi)
index 05d105832bdbb88f5f9d611d9f8bd35b1ae7f5d6..9af2b7853be4b82e989b421e8194c406e42f947c 100644 (file)
@@ -74,6 +74,7 @@
 #include <net/checksum.h>
 #include <net/inetpeer.h>
 #include <net/lwtunnel.h>
+#include <linux/bpf-cgroup.h>
 #include <linux/igmp.h>
 #include <linux/netfilter_ipv4.h>
 #include <linux/netfilter_bridge.h>
@@ -239,19 +240,23 @@ static int ip_finish_output_gso(struct net *net, struct sock *sk,
        struct sk_buff *segs;
        int ret = 0;
 
-       /* common case: fragmentation of segments is not allowed,
-        * or seglen is <= mtu
+       /* common case: seglen is <= mtu
         */
-       if (((IPCB(skb)->flags & IPSKB_FRAG_SEGS) == 0) ||
-             skb_gso_validate_mtu(skb, mtu))
+       if (skb_gso_validate_mtu(skb, mtu))
                return ip_finish_output2(net, sk, skb);
 
-       /* Slowpath -  GSO segment length is exceeding the dst MTU.
+       /* Slowpath -  GSO segment length exceeds the egress MTU.
         *
-        * This can happen in two cases:
-        * 1) TCP GRO packet, DF bit not set
-        * 2) skb arrived via virtio-net, we thus get TSO/GSO skbs directly
-        * from host network stack.
+        * This can happen in several cases:
+        *  - Forwarding of a TCP GRO skb, when DF flag is not set.
+        *  - Forwarding of an skb that arrived on a virtualization interface
+        *    (virtio-net/vhost/tap) with TSO/GSO size set by other network
+        *    stack.
+        *  - Local GSO skb transmitted on an NETIF_F_TSO tunnel stacked over an
+        *    interface with a smaller MTU.
+        *  - Arriving GRO skb (or GSO skb in a virtualized environment) that is
+        *    bridged to a NETIF_F_TSO tunnel stacked over an interface with an
+        *    insufficent MTU.
         */
        features = netif_skb_features(skb);
        BUILD_BUG_ON(sizeof(*IPCB(skb)) > SKB_SGO_CB_OFFSET);
@@ -281,6 +286,13 @@ static int ip_finish_output_gso(struct net *net, struct sock *sk,
 static int ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
        unsigned int mtu;
+       int ret;
+
+       ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
+       if (ret) {
+               kfree_skb(skb);
+               return ret;
+       }
 
 #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
        /* Policy lookup after SNAT yielded a new policy */
@@ -299,6 +311,20 @@ static int ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *sk
        return ip_finish_output2(net, sk, skb);
 }
 
+static int ip_mc_finish_output(struct net *net, struct sock *sk,
+                              struct sk_buff *skb)
+{
+       int ret;
+
+       ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
+       if (ret) {
+               kfree_skb(skb);
+               return ret;
+       }
+
+       return dev_loopback_xmit(net, sk, skb);
+}
+
 int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
        struct rtable *rt = skb_rtable(skb);
@@ -336,7 +362,7 @@ int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb)
                        if (newskb)
                                NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING,
                                        net, sk, newskb, NULL, newskb->dev,
-                                       dev_loopback_xmit);
+                                       ip_mc_finish_output);
                }
 
                /* Multicasts with ttl 0 must not go beyond the host */
@@ -352,7 +378,7 @@ int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb)
                if (newskb)
                        NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING,
                                net, sk, newskb, NULL, newskb->dev,
-                               dev_loopback_xmit);
+                               ip_mc_finish_output);
        }
 
        return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
@@ -538,7 +564,6 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
 {
        struct iphdr *iph;
        int ptr;
-       struct net_device *dev;
        struct sk_buff *skb2;
        unsigned int mtu, hlen, left, len, ll_rs;
        int offset;
@@ -546,8 +571,6 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
        struct rtable *rt = skb_rtable(skb);
        int err = 0;
 
-       dev = rt->dst.dev;
-
        /* for offloaded checksums cleanup checksum before fragmentation */
        if (skb->ip_summed == CHECKSUM_PARTIAL &&
            (err = skb_checksum_help(skb)))
@@ -580,7 +603,7 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
         */
        if (skb_has_frag_list(skb)) {
                struct sk_buff *frag, *frag2;
-               int first_len = skb_pagelen(skb);
+               unsigned int first_len = skb_pagelen(skb);
 
                if (first_len - hlen > mtu ||
                    ((first_len - hlen) & 7) ||
@@ -1582,7 +1605,8 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
        }
 
        oif = arg->bound_dev_if;
-       oif = oif ? : skb->skb_iif;
+       if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
+               oif = skb->skb_iif;
 
        flowi4_init_output(&fl4, oif,
                           IP4_REPLY_MARK(net, skb->mark),
@@ -1590,7 +1614,8 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
                           RT_SCOPE_UNIVERSE, ip_hdr(skb)->protocol,
                           ip_reply_arg_flowi_flags(arg),
                           daddr, saddr,
-                          tcp_hdr(skb)->source, tcp_hdr(skb)->dest);
+                          tcp_hdr(skb)->source, tcp_hdr(skb)->dest,
+                          arg->uid);
        security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));
        rt = ip_route_output_key(net, &fl4);
        if (IS_ERR(rt))
index af4919792b6a812041dcb18ff30aa8b27482c7a2..8b13881ed0644c3c32e87718752f8613023c72eb 100644 (file)
@@ -97,8 +97,19 @@ static void ip_cmsg_recv_retopts(struct msghdr *msg, struct sk_buff *skb)
        put_cmsg(msg, SOL_IP, IP_RETOPTS, opt->optlen, opt->__data);
 }
 
+static void ip_cmsg_recv_fragsize(struct msghdr *msg, struct sk_buff *skb)
+{
+       int val;
+
+       if (IPCB(skb)->frag_max_size == 0)
+               return;
+
+       val = IPCB(skb)->frag_max_size;
+       put_cmsg(msg, SOL_IP, IP_RECVFRAGSIZE, sizeof(val), &val);
+}
+
 static void ip_cmsg_recv_checksum(struct msghdr *msg, struct sk_buff *skb,
-                                 int offset)
+                                 int tlen, int offset)
 {
        __wsum csum = skb->csum;
 
@@ -106,8 +117,9 @@ static void ip_cmsg_recv_checksum(struct msghdr *msg, struct sk_buff *skb,
                return;
 
        if (offset != 0)
-               csum = csum_sub(csum, csum_partial(skb_transport_header(skb),
-                                                  offset, 0));
+               csum = csum_sub(csum,
+                               csum_partial(skb_transport_header(skb) + tlen,
+                                            offset, 0));
 
        put_cmsg(msg, SOL_IP, IP_CHECKSUM, sizeof(__wsum), &csum);
 }
@@ -152,10 +164,10 @@ static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb)
        put_cmsg(msg, SOL_IP, IP_ORIGDSTADDR, sizeof(sin), &sin);
 }
 
-void ip_cmsg_recv_offset(struct msghdr *msg, struct sk_buff *skb,
-                        int offset)
+void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk,
+                        struct sk_buff *skb, int tlen, int offset)
 {
-       struct inet_sock *inet = inet_sk(skb->sk);
+       struct inet_sock *inet = inet_sk(sk);
        unsigned int flags = inet->cmsg_flags;
 
        /* Ordered by supposed usage frequency */
@@ -216,7 +228,10 @@ void ip_cmsg_recv_offset(struct msghdr *msg, struct sk_buff *skb,
        }
 
        if (flags & IP_CMSG_CHECKSUM)
-               ip_cmsg_recv_checksum(msg, skb, offset);
+               ip_cmsg_recv_checksum(msg, skb, tlen, offset);
+
+       if (flags & IP_CMSG_RECVFRAGSIZE)
+               ip_cmsg_recv_fragsize(msg, skb);
 }
 EXPORT_SYMBOL(ip_cmsg_recv_offset);
 
@@ -613,6 +628,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
        case IP_MULTICAST_LOOP:
        case IP_RECVORIGDSTADDR:
        case IP_CHECKSUM:
+       case IP_RECVFRAGSIZE:
                if (optlen >= sizeof(int)) {
                        if (get_user(val, (int __user *) optval))
                                return -EFAULT;
@@ -725,6 +741,14 @@ static int do_ip_setsockopt(struct sock *sk, int level,
                        }
                }
                break;
+       case IP_RECVFRAGSIZE:
+               if (sk->sk_type != SOCK_RAW && sk->sk_type != SOCK_DGRAM)
+                       goto e_inval;
+               if (val)
+                       inet->cmsg_flags |= IP_CMSG_RECVFRAGSIZE;
+               else
+                       inet->cmsg_flags &= ~IP_CMSG_RECVFRAGSIZE;
+               break;
        case IP_TOS:    /* This sets both TOS and Precedence */
                if (sk->sk_type == SOCK_STREAM) {
                        val &= ~INET_ECN_MASK;
@@ -1356,6 +1380,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
        case IP_CHECKSUM:
                val = (inet->cmsg_flags & IP_CMSG_CHECKSUM) != 0;
                break;
+       case IP_RECVFRAGSIZE:
+               val = (inet->cmsg_flags & IP_CMSG_RECVFRAGSIZE) != 0;
+               break;
        case IP_TOS:
                val = inet->tos;
                break;
index 12a92e3349ed5ab322496b553abf944acd9f1d4c..823abaef006bd353cf0466f14dd3abaa26f80c07 100644 (file)
@@ -994,7 +994,7 @@ int ip_tunnel_get_iflink(const struct net_device *dev)
 }
 EXPORT_SYMBOL(ip_tunnel_get_iflink);
 
-int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
+int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
                                  struct rtnl_link_ops *ops, char *devname)
 {
        struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
@@ -1196,7 +1196,7 @@ void ip_tunnel_uninit(struct net_device *dev)
 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
 
 /* Do least required initialization, rest of init is done in tunnel_init call */
-void ip_tunnel_setup(struct net_device *dev, int net_id)
+void ip_tunnel_setup(struct net_device *dev, unsigned int net_id)
 {
        struct ip_tunnel *tunnel = netdev_priv(dev);
        tunnel->ip_tnl_net_id = net_id;
index 777bc1883870ec91d7c4df9adfde50fc3f384043..fed3d29f9eb3b716664b8d9eba052695cbb867bd 100644 (file)
@@ -63,7 +63,6 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
        int pkt_len = skb->len - skb_inner_network_offset(skb);
        struct net *net = dev_net(rt->dst.dev);
        struct net_device *dev = skb->dev;
-       int skb_iif = skb->skb_iif;
        struct iphdr *iph;
        int err;
 
@@ -73,16 +72,6 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
        skb_dst_set(skb, &rt->dst);
        memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
 
-       if (skb_iif && !(df & htons(IP_DF))) {
-               /* Arrived from an ingress interface, got encapsulated, with
-                * fragmentation of encapulating frames allowed.
-                * If skb is gso, the resulting encapsulated network segments
-                * may exceed dst mtu.
-                * Allow IP Fragmentation of segments.
-                */
-               IPCB(skb)->flags |= IPSKB_FRAG_SEGS;
-       }
-
        /* Push down and install the IP header. */
        skb_push(skb, sizeof(struct iphdr));
        skb_reset_network_header(skb);
index 5d7944f394d9af83d6209c6ab2778be9e24d3553..8b14f1404c8f7315495877a34ad6757aee4232a8 100644 (file)
@@ -46,7 +46,7 @@
 
 static struct rtnl_link_ops vti_link_ops __read_mostly;
 
-static int vti_net_id __read_mostly;
+static unsigned int vti_net_id __read_mostly;
 static int vti_tunnel_init(struct net_device *dev);
 
 static int vti_input(struct sk_buff *skb, int nexthdr, __be32 spi,
index c9392589c4157e4ac5d4c2e619b6f3ed265165be..79489f017854e917df49f9eda569add649f1fd32 100644 (file)
@@ -121,7 +121,7 @@ static bool log_ecn_error = true;
 module_param(log_ecn_error, bool, 0644);
 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
 
-static int ipip_net_id __read_mostly;
+static unsigned int ipip_net_id __read_mostly;
 
 static int ipip_tunnel_init(struct net_device *dev);
 static struct rtnl_link_ops ipip_link_ops __read_mostly;
index 5f006e13de567ad65ce856a47c118f5ea91ac671..665505d86b1242e7b202c459d6c21b58937b09f8 100644 (file)
@@ -137,6 +137,9 @@ static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4,
                .flags = FIB_LOOKUP_NOREF,
        };
 
+       /* update flow if oif or iif point to device enslaved to l3mdev */
+       l3mdev_update_flow(net, flowi4_to_flowi(flp4));
+
        err = fib_rules_lookup(net->ipv4.mr_rules_ops,
                               flowi4_to_flowi(flp4), 0, &arg);
        if (err < 0)
@@ -163,7 +166,9 @@ static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp,
                return -EINVAL;
        }
 
-       mrt = ipmr_get_table(rule->fr_net, rule->table);
+       arg->table = fib_rule_get_table(rule, arg);
+
+       mrt = ipmr_get_table(rule->fr_net, arg->table);
        if (!mrt)
                return -EAGAIN;
        res->mrt = mrt;
@@ -1749,7 +1754,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
                vif->dev->stats.tx_bytes += skb->len;
        }
 
-       IPCB(skb)->flags |= IPSKB_FORWARDED | IPSKB_FRAG_SEGS;
+       IPCB(skb)->flags |= IPSKB_FORWARDED;
 
        /* RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
         * not only before forwarding, but after forwarding on all output
@@ -1809,6 +1814,12 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt,
 
        /* Wrong interface: drop packet and (maybe) send PIM assert. */
        if (mrt->vif_table[vif].dev != skb->dev) {
+               struct net_device *mdev;
+
+               mdev = l3mdev_master_dev_rcu(mrt->vif_table[vif].dev);
+               if (mdev == skb->dev)
+                       goto forward;
+
                if (rt_is_output_route(skb_rtable(skb))) {
                        /* It is our own packet, looped back.
                         * Very complicated situation...
@@ -2053,7 +2064,7 @@ static int pim_rcv(struct sk_buff *skb)
                goto drop;
 
        pim = (struct pimreghdr *)skb_transport_header(skb);
-       if (pim->type != ((PIM_VERSION << 4) | (PIM_REGISTER)) ||
+       if (pim->type != ((PIM_VERSION << 4) | (PIM_TYPE_REGISTER)) ||
            (pim->flags & PIM_NULL_REGISTER) ||
            (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
             csum_fold(skb_checksum(skb, 0, skb->len, 0))))
index d613309e3e5d0cd74c7831d3bf55af9a2d7ce915..c11eb1744ab1b77715cbd82cd25a7542fd58fad8 100644 (file)
@@ -25,6 +25,12 @@ config NF_CONNTRACK_IPV4
 
          To compile it as a module, choose M here.  If unsure, say N.
 
+config NF_SOCKET_IPV4
+       tristate "IPv4 socket lookup support"
+       help
+         This option enables the IPv4 socket lookup infrastructure. This is
+         is required by the iptables socket match.
+
 if NF_TABLES
 
 config NF_TABLES_IPV4
@@ -54,6 +60,14 @@ config NFT_DUP_IPV4
        help
          This module enables IPv4 packet duplication support for nf_tables.
 
+config NFT_FIB_IPV4
+       select NFT_FIB
+       tristate "nf_tables fib / ip route lookup support"
+       help
+         This module enables IPv4 FIB lookups, e.g. for reverse path filtering.
+         It also allows query of the FIB for the route type, e.g. local, unicast,
+         multicast or blackhole.
+
 endif # NF_TABLES_IPV4
 
 config NF_TABLES_ARP
index 853328f8fd050fa4c3094d498ed940362401b01a..f462fee66ac88548a3f59ae4ed566c61f6648c8d 100644 (file)
@@ -14,6 +14,8 @@ obj-$(CONFIG_NF_NAT_IPV4) += nf_nat_ipv4.o
 # defrag
 obj-$(CONFIG_NF_DEFRAG_IPV4) += nf_defrag_ipv4.o
 
+obj-$(CONFIG_NF_SOCKET_IPV4) += nf_socket_ipv4.o
+
 # logging
 obj-$(CONFIG_NF_LOG_ARP) += nf_log_arp.o
 obj-$(CONFIG_NF_LOG_IPV4) += nf_log_ipv4.o
@@ -34,6 +36,7 @@ obj-$(CONFIG_NF_TABLES_IPV4) += nf_tables_ipv4.o
 obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o
 obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o
 obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o
+obj-$(CONFIG_NFT_FIB_IPV4) += nft_fib_ipv4.o
 obj-$(CONFIG_NFT_MASQ_IPV4) += nft_masq_ipv4.o
 obj-$(CONFIG_NFT_REDIR_IPV4) += nft_redir_ipv4.o
 obj-$(CONFIG_NFT_DUP_IPV4) += nft_dup_ipv4.o
index b31df597fd37e4e8fa41777d7883973a6c852254..39004da318e2d833ac528afb9564f7fc8247442f 100644 (file)
@@ -217,11 +217,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
         */
        e = get_entry(table_base, private->hook_entry[hook]);
 
-       acpar.net     = state->net;
-       acpar.in      = state->in;
-       acpar.out     = state->out;
-       acpar.hooknum = hook;
-       acpar.family  = NFPROTO_ARP;
+       acpar.state   = state;
        acpar.hotdrop = false;
 
        arp = arp_hdr(skb);
@@ -809,7 +805,7 @@ static int get_info(struct net *net, void __user *user,
 #endif
        t = try_then_request_module(xt_find_table_lock(net, NFPROTO_ARP, name),
                                    "arptable_%s", name);
-       if (!IS_ERR_OR_NULL(t)) {
+       if (t) {
                struct arpt_getinfo info;
                const struct xt_table_info *private = t->private;
 #ifdef CONFIG_COMPAT
@@ -838,7 +834,7 @@ static int get_info(struct net *net, void __user *user,
                xt_table_unlock(t);
                module_put(t->me);
        } else
-               ret = t ? PTR_ERR(t) : -ENOENT;
+               ret = -ENOENT;
 #ifdef CONFIG_COMPAT
        if (compat)
                xt_compat_unlock(NFPROTO_ARP);
@@ -863,7 +859,7 @@ static int get_entries(struct net *net, struct arpt_get_entries __user *uptr,
        get.name[sizeof(get.name) - 1] = '\0';
 
        t = xt_find_table_lock(net, NFPROTO_ARP, get.name);
-       if (!IS_ERR_OR_NULL(t)) {
+       if (t) {
                const struct xt_table_info *private = t->private;
 
                if (get.size == private->size)
@@ -875,7 +871,7 @@ static int get_entries(struct net *net, struct arpt_get_entries __user *uptr,
                module_put(t->me);
                xt_table_unlock(t);
        } else
-               ret = t ? PTR_ERR(t) : -ENOENT;
+               ret = -ENOENT;
 
        return ret;
 }
@@ -902,8 +898,8 @@ static int __do_replace(struct net *net, const char *name,
 
        t = try_then_request_module(xt_find_table_lock(net, NFPROTO_ARP, name),
                                    "arptable_%s", name);
-       if (IS_ERR_OR_NULL(t)) {
-               ret = t ? PTR_ERR(t) : -ENOENT;
+       if (!t) {
+               ret = -ENOENT;
                goto free_newinfo_counters_untrans;
        }
 
@@ -1018,8 +1014,8 @@ static int do_add_counters(struct net *net, const void __user *user,
                return PTR_ERR(paddc);
 
        t = xt_find_table_lock(net, NFPROTO_ARP, tmp.name);
-       if (IS_ERR_OR_NULL(t)) {
-               ret = t ? PTR_ERR(t) : -ENOENT;
+       if (!t) {
+               ret = -ENOENT;
                goto free;
        }
 
@@ -1408,7 +1404,7 @@ static int compat_get_entries(struct net *net,
 
        xt_compat_lock(NFPROTO_ARP);
        t = xt_find_table_lock(net, NFPROTO_ARP, get.name);
-       if (!IS_ERR_OR_NULL(t)) {
+       if (t) {
                const struct xt_table_info *private = t->private;
                struct xt_table_info info;
 
@@ -1423,7 +1419,7 @@ static int compat_get_entries(struct net *net,
                module_put(t->me);
                xt_table_unlock(t);
        } else
-               ret = t ? PTR_ERR(t) : -ENOENT;
+               ret = -ENOENT;
 
        xt_compat_unlock(NFPROTO_ARP);
        return ret;
index 7c00ce90adb8496ed9fb97894163f783b158e7cf..46815c8a60d707deccd28961126de7392fce39f0 100644 (file)
@@ -261,11 +261,7 @@ ipt_do_table(struct sk_buff *skb,
        acpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET;
        acpar.thoff   = ip_hdrlen(skb);
        acpar.hotdrop = false;
-       acpar.net     = state->net;
-       acpar.in      = state->in;
-       acpar.out     = state->out;
-       acpar.family  = NFPROTO_IPV4;
-       acpar.hooknum = hook;
+       acpar.state   = state;
 
        IP_NF_ASSERT(table->valid_hooks & (1 << hook));
        local_bh_disable();
@@ -977,7 +973,7 @@ static int get_info(struct net *net, void __user *user,
 #endif
        t = try_then_request_module(xt_find_table_lock(net, AF_INET, name),
                                    "iptable_%s", name);
-       if (!IS_ERR_OR_NULL(t)) {
+       if (t) {
                struct ipt_getinfo info;
                const struct xt_table_info *private = t->private;
 #ifdef CONFIG_COMPAT
@@ -1007,7 +1003,7 @@ static int get_info(struct net *net, void __user *user,
                xt_table_unlock(t);
                module_put(t->me);
        } else
-               ret = t ? PTR_ERR(t) : -ENOENT;
+               ret = -ENOENT;
 #ifdef CONFIG_COMPAT
        if (compat)
                xt_compat_unlock(AF_INET);
@@ -1032,7 +1028,7 @@ get_entries(struct net *net, struct ipt_get_entries __user *uptr,
        get.name[sizeof(get.name) - 1] = '\0';
 
        t = xt_find_table_lock(net, AF_INET, get.name);
-       if (!IS_ERR_OR_NULL(t)) {
+       if (t) {
                const struct xt_table_info *private = t->private;
                if (get.size == private->size)
                        ret = copy_entries_to_user(private->size,
@@ -1043,7 +1039,7 @@ get_entries(struct net *net, struct ipt_get_entries __user *uptr,
                module_put(t->me);
                xt_table_unlock(t);
        } else
-               ret = t ? PTR_ERR(t) : -ENOENT;
+               ret = -ENOENT;
 
        return ret;
 }
@@ -1068,8 +1064,8 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
 
        t = try_then_request_module(xt_find_table_lock(net, AF_INET, name),
                                    "iptable_%s", name);
-       if (IS_ERR_OR_NULL(t)) {
-               ret = t ? PTR_ERR(t) : -ENOENT;
+       if (!t) {
+               ret = -ENOENT;
                goto free_newinfo_counters_untrans;
        }
 
@@ -1184,8 +1180,8 @@ do_add_counters(struct net *net, const void __user *user,
                return PTR_ERR(paddc);
 
        t = xt_find_table_lock(net, AF_INET, tmp.name);
-       if (IS_ERR_OR_NULL(t)) {
-               ret = t ? PTR_ERR(t) : -ENOENT;
+       if (!t) {
+               ret = -ENOENT;
                goto free;
        }
 
@@ -1630,7 +1626,7 @@ compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr,
 
        xt_compat_lock(AF_INET);
        t = xt_find_table_lock(net, AF_INET, get.name);
-       if (!IS_ERR_OR_NULL(t)) {
+       if (t) {
                const struct xt_table_info *private = t->private;
                struct xt_table_info info;
                ret = compat_table_info(private, &info);
@@ -1644,7 +1640,7 @@ compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr,
                module_put(t->me);
                xt_table_unlock(t);
        } else
-               ret = t ? PTR_ERR(t) : -ENOENT;
+               ret = -ENOENT;
 
        xt_compat_unlock(AF_INET);
        return ret;
index 4a9e6db9df8d719a14b6aa129b78ba614587767f..e6e206fa86c84a0668fdd0eaa015a6ccc3998b45 100644 (file)
@@ -62,7 +62,7 @@ struct clusterip_config {
 static const struct file_operations clusterip_proc_fops;
 #endif
 
-static int clusterip_net_id __read_mostly;
+static unsigned int clusterip_net_id __read_mostly;
 
 struct clusterip_net {
        struct list_head configs;
index da7f02a0b868843b01dffcaeb75181584585f81f..34cfb9b0bc0aa42f8df296797141b3cb4a78a243 100644 (file)
@@ -55,7 +55,8 @@ masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par)
        range.min_proto = mr->range[0].min;
        range.max_proto = mr->range[0].max;
 
-       return nf_nat_masquerade_ipv4(skb, par->hooknum, &range, par->out);
+       return nf_nat_masquerade_ipv4(skb, xt_hooknum(par), &range,
+                                     xt_out(par));
 }
 
 static struct xt_target masquerade_tg_reg __read_mostly = {
index 1d16c0f28df00d17c38a52bfeff23c0a1bca0142..8bd0d7b266320ecf51e0782ad6ccc7d7ad276ebe 100644 (file)
@@ -34,7 +34,7 @@ static unsigned int
 reject_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
        const struct ipt_reject_info *reject = par->targinfo;
-       int hook = par->hooknum;
+       int hook = xt_hooknum(par);
 
        switch (reject->with) {
        case IPT_ICMP_NET_UNREACHABLE:
@@ -59,7 +59,7 @@ reject_tg(struct sk_buff *skb, const struct xt_action_param *par)
                nf_send_unreach(skb, ICMP_PKT_FILTERED, hook);
                break;
        case IPT_TCP_RESET:
-               nf_send_reset(par->net, skb, hook);
+               nf_send_reset(xt_net(par), skb, hook);
        case IPT_ICMP_ECHOREPLY:
                /* Doesn't happen. */
                break;
index db5b875094465ca4b5f79435f46080fc5f612569..36141168822122ebe4fe3d044c183195d0f2ef88 100644 (file)
@@ -263,12 +263,12 @@ static unsigned int
 synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par)
 {
        const struct xt_synproxy_info *info = par->targinfo;
-       struct net *net = par->net;
+       struct net *net = xt_net(par);
        struct synproxy_net *snet = synproxy_pernet(net);
        struct synproxy_options opts = {};
        struct tcphdr *th, _th;
 
-       if (nf_ip_checksum(skb, par->hooknum, par->thoff, IPPROTO_TCP))
+       if (nf_ip_checksum(skb, xt_hooknum(par), par->thoff, IPPROTO_TCP))
                return NF_DROP;
 
        th = skb_header_pointer(skb, par->thoff, sizeof(_th), &_th);
index 78cc64eddfc1855849652c563b3c931cd7cc72e6..59b49945b48103d25b885f1827d5479044e10e57 100644 (file)
@@ -95,7 +95,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
        flow.flowi4_tos = RT_TOS(iph->tos);
        flow.flowi4_scope = RT_SCOPE_UNIVERSE;
 
-       return rpfilter_lookup_reverse(par->net, &flow, par->in, info->flags) ^ invert;
+       return rpfilter_lookup_reverse(xt_net(par), &flow, xt_in(par), info->flags) ^ invert;
 }
 
 static int rpfilter_check(const struct xt_mtchk_param *par)
index 713c09a74b9009cd3132bd388638f25590b5bdc6..7130ed5dc1fac1779b55a317358719f0a98021f6 100644 (file)
@@ -336,47 +336,34 @@ MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET));
 MODULE_ALIAS("ip_conntrack");
 MODULE_LICENSE("GPL");
 
+static struct nf_conntrack_l4proto *builtin_l4proto4[] = {
+       &nf_conntrack_l4proto_tcp4,
+       &nf_conntrack_l4proto_udp4,
+       &nf_conntrack_l4proto_icmp,
+};
+
 static int ipv4_net_init(struct net *net)
 {
        int ret = 0;
 
-       ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_tcp4);
-       if (ret < 0) {
-               pr_err("nf_conntrack_tcp4: pernet registration failed\n");
-               goto out_tcp;
-       }
-       ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_udp4);
-       if (ret < 0) {
-               pr_err("nf_conntrack_udp4: pernet registration failed\n");
-               goto out_udp;
-       }
-       ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_icmp);
-       if (ret < 0) {
-               pr_err("nf_conntrack_icmp4: pernet registration failed\n");
-               goto out_icmp;
-       }
+       ret = nf_ct_l4proto_pernet_register(net, builtin_l4proto4,
+                                           ARRAY_SIZE(builtin_l4proto4));
+       if (ret < 0)
+               return ret;
        ret = nf_ct_l3proto_pernet_register(net, &nf_conntrack_l3proto_ipv4);
        if (ret < 0) {
                pr_err("nf_conntrack_ipv4: pernet registration failed\n");
-               goto out_ipv4;
+               nf_ct_l4proto_pernet_unregister(net, builtin_l4proto4,
+                                               ARRAY_SIZE(builtin_l4proto4));
        }
-       return 0;
-out_ipv4:
-       nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_icmp);
-out_icmp:
-       nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udp4);
-out_udp:
-       nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_tcp4);
-out_tcp:
        return ret;
 }
 
 static void ipv4_net_exit(struct net *net)
 {
        nf_ct_l3proto_pernet_unregister(net, &nf_conntrack_l3proto_ipv4);
-       nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_icmp);
-       nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udp4);
-       nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_tcp4);
+       nf_ct_l4proto_pernet_unregister(net, builtin_l4proto4,
+                                       ARRAY_SIZE(builtin_l4proto4));
 }
 
 static struct pernet_operations ipv4_net_ops = {
@@ -410,37 +397,21 @@ static int __init nf_conntrack_l3proto_ipv4_init(void)
                goto cleanup_pernet;
        }
 
-       ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_tcp4);
-       if (ret < 0) {
-               pr_err("nf_conntrack_ipv4: can't register tcp4 proto.\n");
+       ret = nf_ct_l4proto_register(builtin_l4proto4,
+                                    ARRAY_SIZE(builtin_l4proto4));
+       if (ret < 0)
                goto cleanup_hooks;
-       }
-
-       ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_udp4);
-       if (ret < 0) {
-               pr_err("nf_conntrack_ipv4: can't register udp4 proto.\n");
-               goto cleanup_tcp4;
-       }
-
-       ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_icmp);
-       if (ret < 0) {
-               pr_err("nf_conntrack_ipv4: can't register icmpv4 proto.\n");
-               goto cleanup_udp4;
-       }
 
        ret = nf_ct_l3proto_register(&nf_conntrack_l3proto_ipv4);
        if (ret < 0) {
                pr_err("nf_conntrack_ipv4: can't register ipv4 proto.\n");
-               goto cleanup_icmpv4;
+               goto cleanup_l4proto;
        }
 
        return ret;
- cleanup_icmpv4:
-       nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmp);
- cleanup_udp4:
-       nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp4);
- cleanup_tcp4:
-       nf_ct_l4proto_unregister(&nf_conntrack_l4proto_tcp4);
+cleanup_l4proto:
+       nf_ct_l4proto_unregister(builtin_l4proto4,
+                                ARRAY_SIZE(builtin_l4proto4));
  cleanup_hooks:
        nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops));
  cleanup_pernet:
@@ -454,9 +425,8 @@ static void __exit nf_conntrack_l3proto_ipv4_fini(void)
 {
        synchronize_net();
        nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv4);
-       nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmp);
-       nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp4);
-       nf_ct_l4proto_unregister(&nf_conntrack_l4proto_tcp4);
+       nf_ct_l4proto_unregister(builtin_l4proto4,
+                                ARRAY_SIZE(builtin_l4proto4));
        nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops));
        unregister_pernet_subsys(&ipv4_net_ops);
        nf_unregister_sockopt(&so_getorigdst);
diff --git a/net/ipv4/netfilter/nf_socket_ipv4.c b/net/ipv4/netfilter/nf_socket_ipv4.c
new file mode 100644 (file)
index 0000000..a83d558
--- /dev/null
@@ -0,0 +1,163 @@
+/*
+ * Copyright (C) 2007-2008 BalaBit IT Ltd.
+ * Author: Krisztian Kovacs
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <net/icmp.h>
+#include <net/sock.h>
+#include <net/inet_sock.h>
+#include <net/netfilter/nf_socket.h>
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+#include <net/netfilter/nf_conntrack.h>
+#endif
+
+static int
+extract_icmp4_fields(const struct sk_buff *skb, u8 *protocol,
+                    __be32 *raddr, __be32 *laddr,
+                    __be16 *rport, __be16 *lport)
+{
+       unsigned int outside_hdrlen = ip_hdrlen(skb);
+       struct iphdr *inside_iph, _inside_iph;
+       struct icmphdr *icmph, _icmph;
+       __be16 *ports, _ports[2];
+
+       icmph = skb_header_pointer(skb, outside_hdrlen,
+                                  sizeof(_icmph), &_icmph);
+       if (icmph == NULL)
+               return 1;
+
+       switch (icmph->type) {
+       case ICMP_DEST_UNREACH:
+       case ICMP_SOURCE_QUENCH:
+       case ICMP_REDIRECT:
+       case ICMP_TIME_EXCEEDED:
+       case ICMP_PARAMETERPROB:
+               break;
+       default:
+               return 1;
+       }
+
+       inside_iph = skb_header_pointer(skb, outside_hdrlen +
+                                       sizeof(struct icmphdr),
+                                       sizeof(_inside_iph), &_inside_iph);
+       if (inside_iph == NULL)
+               return 1;
+
+       if (inside_iph->protocol != IPPROTO_TCP &&
+           inside_iph->protocol != IPPROTO_UDP)
+               return 1;
+
+       ports = skb_header_pointer(skb, outside_hdrlen +
+                                  sizeof(struct icmphdr) +
+                                  (inside_iph->ihl << 2),
+                                  sizeof(_ports), &_ports);
+       if (ports == NULL)
+               return 1;
+
+       /* the inside IP packet is the one quoted from our side, thus
+        * its saddr is the local address */
+       *protocol = inside_iph->protocol;
+       *laddr = inside_iph->saddr;
+       *lport = ports[0];
+       *raddr = inside_iph->daddr;
+       *rport = ports[1];
+
+       return 0;
+}
+
+static struct sock *
+nf_socket_get_sock_v4(struct net *net, struct sk_buff *skb, const int doff,
+                     const u8 protocol,
+                     const __be32 saddr, const __be32 daddr,
+                     const __be16 sport, const __be16 dport,
+                     const struct net_device *in)
+{
+       switch (protocol) {
+       case IPPROTO_TCP:
+               return inet_lookup(net, &tcp_hashinfo, skb, doff,
+                                  saddr, sport, daddr, dport,
+                                  in->ifindex);
+       case IPPROTO_UDP:
+               return udp4_lib_lookup(net, saddr, sport, daddr, dport,
+                                      in->ifindex);
+       }
+       return NULL;
+}
+
+struct sock *nf_sk_lookup_slow_v4(struct net *net, const struct sk_buff *skb,
+                                 const struct net_device *indev)
+{
+       __be32 uninitialized_var(daddr), uninitialized_var(saddr);
+       __be16 uninitialized_var(dport), uninitialized_var(sport);
+       const struct iphdr *iph = ip_hdr(skb);
+       struct sk_buff *data_skb = NULL;
+       u8 uninitialized_var(protocol);
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+       enum ip_conntrack_info ctinfo;
+       struct nf_conn const *ct;
+#endif
+       int doff = 0;
+
+       if (iph->protocol == IPPROTO_UDP || iph->protocol == IPPROTO_TCP) {
+               struct udphdr _hdr, *hp;
+
+               hp = skb_header_pointer(skb, ip_hdrlen(skb),
+                                       sizeof(_hdr), &_hdr);
+               if (hp == NULL)
+                       return NULL;
+
+               protocol = iph->protocol;
+               saddr = iph->saddr;
+               sport = hp->source;
+               daddr = iph->daddr;
+               dport = hp->dest;
+               data_skb = (struct sk_buff *)skb;
+               doff = iph->protocol == IPPROTO_TCP ?
+                       ip_hdrlen(skb) + __tcp_hdrlen((struct tcphdr *)hp) :
+                       ip_hdrlen(skb) + sizeof(*hp);
+
+       } else if (iph->protocol == IPPROTO_ICMP) {
+               if (extract_icmp4_fields(skb, &protocol, &saddr, &daddr,
+                                        &sport, &dport))
+                       return NULL;
+       } else {
+               return NULL;
+       }
+
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+       /* Do the lookup with the original socket address in
+        * case this is a reply packet of an established
+        * SNAT-ted connection.
+        */
+       ct = nf_ct_get(skb, &ctinfo);
+       if (ct && !nf_ct_is_untracked(ct) &&
+           ((iph->protocol != IPPROTO_ICMP &&
+             ctinfo == IP_CT_ESTABLISHED_REPLY) ||
+            (iph->protocol == IPPROTO_ICMP &&
+             ctinfo == IP_CT_RELATED_REPLY)) &&
+           (ct->status & IPS_SRC_NAT_DONE)) {
+
+               daddr = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip;
+               dport = (iph->protocol == IPPROTO_TCP) ?
+                       ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.tcp.port :
+                       ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port;
+       }
+#endif
+
+       return nf_socket_get_sock_v4(net, data_skb, doff, protocol, saddr,
+                                    daddr, sport, dport, indev);
+}
+EXPORT_SYMBOL_GPL(nf_sk_lookup_slow_v4);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Krisztian Kovacs, Balazs Scheidler");
+MODULE_DESCRIPTION("Netfilter IPv4 socket lookup infrastructure");
index bf855e64fc45c86fbf286bc34b2bd96fbac17cff..0af3d8df70dd713bbe30d469987bab33c295cf99 100644 (file)
@@ -28,9 +28,9 @@ static void nft_dup_ipv4_eval(const struct nft_expr *expr,
        struct in_addr gw = {
                .s_addr = (__force __be32)regs->data[priv->sreg_addr],
        };
-       int oif = regs->data[priv->sreg_dev];
+       int oif = priv->sreg_dev ? regs->data[priv->sreg_dev] : -1;
 
-       nf_dup_ipv4(pkt->net, pkt->skb, pkt->hook, &gw, oif);
+       nf_dup_ipv4(nft_net(pkt), pkt->skb, nft_hook(pkt), &gw, oif);
 }
 
 static int nft_dup_ipv4_init(const struct nft_ctx *ctx,
@@ -59,7 +59,9 @@ static int nft_dup_ipv4_dump(struct sk_buff *skb, const struct nft_expr *expr)
 {
        struct nft_dup_ipv4 *priv = nft_expr_priv(expr);
 
-       if (nft_dump_register(skb, NFTA_DUP_SREG_ADDR, priv->sreg_addr) ||
+       if (nft_dump_register(skb, NFTA_DUP_SREG_ADDR, priv->sreg_addr))
+               goto nla_put_failure;
+       if (priv->sreg_dev &&
            nft_dump_register(skb, NFTA_DUP_SREG_DEV, priv->sreg_dev))
                goto nla_put_failure;
 
diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c
new file mode 100644 (file)
index 0000000..1b49966
--- /dev/null
@@ -0,0 +1,238 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nft_fib.h>
+
+#include <net/ip_fib.h>
+#include <net/route.h>
+
+/* don't try to find route from mcast/bcast/zeronet */
+static __be32 get_saddr(__be32 addr)
+{
+       if (ipv4_is_multicast(addr) || ipv4_is_lbcast(addr) ||
+           ipv4_is_zeronet(addr))
+               return 0;
+       return addr;
+}
+
+static bool fib4_is_local(const struct sk_buff *skb)
+{
+       const struct rtable *rt = skb_rtable(skb);
+
+       return rt && (rt->rt_flags & RTCF_LOCAL);
+}
+
+#define DSCP_BITS     0xfc
+
+void nft_fib4_eval_type(const struct nft_expr *expr, struct nft_regs *regs,
+                       const struct nft_pktinfo *pkt)
+{
+       const struct nft_fib *priv = nft_expr_priv(expr);
+       u32 *dst = &regs->data[priv->dreg];
+       const struct net_device *dev = NULL;
+       const struct iphdr *iph;
+       __be32 addr;
+
+       if (priv->flags & NFTA_FIB_F_IIF)
+               dev = nft_in(pkt);
+       else if (priv->flags & NFTA_FIB_F_OIF)
+               dev = nft_out(pkt);
+
+       iph = ip_hdr(pkt->skb);
+       if (priv->flags & NFTA_FIB_F_DADDR)
+               addr = iph->daddr;
+       else
+               addr = iph->saddr;
+
+       *dst = inet_dev_addr_type(nft_net(pkt), dev, addr);
+}
+EXPORT_SYMBOL_GPL(nft_fib4_eval_type);
+
+static int get_ifindex(const struct net_device *dev)
+{
+       return dev ? dev->ifindex : 0;
+}
+
+void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
+                  const struct nft_pktinfo *pkt)
+{
+       const struct nft_fib *priv = nft_expr_priv(expr);
+       u32 *dest = &regs->data[priv->dreg];
+       const struct iphdr *iph;
+       struct fib_result res;
+       struct flowi4 fl4 = {
+               .flowi4_scope = RT_SCOPE_UNIVERSE,
+               .flowi4_iif = LOOPBACK_IFINDEX,
+       };
+       const struct net_device *oif;
+       struct net_device *found;
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+       int i;
+#endif
+
+       /*
+        * Do not set flowi4_oif, it restricts results (for example, asking
+        * for oif 3 will get RTN_UNICAST result even if the daddr exits
+        * on another interface.
+        *
+        * Search results for the desired outinterface instead.
+        */
+       if (priv->flags & NFTA_FIB_F_OIF)
+               oif = nft_out(pkt);
+       else if (priv->flags & NFTA_FIB_F_IIF)
+               oif = nft_in(pkt);
+       else
+               oif = NULL;
+
+       if (nft_hook(pkt) == NF_INET_PRE_ROUTING && fib4_is_local(pkt->skb)) {
+               nft_fib_store_result(dest, priv->result, pkt, LOOPBACK_IFINDEX);
+               return;
+       }
+
+       iph = ip_hdr(pkt->skb);
+       if (ipv4_is_multicast(iph->daddr) &&
+           ipv4_is_zeronet(iph->saddr) &&
+           ipv4_is_local_multicast(iph->daddr)) {
+               nft_fib_store_result(dest, priv->result, pkt,
+                                    get_ifindex(pkt->skb->dev));
+               return;
+       }
+
+       if (priv->flags & NFTA_FIB_F_MARK)
+               fl4.flowi4_mark = pkt->skb->mark;
+
+       fl4.flowi4_tos = iph->tos & DSCP_BITS;
+
+       if (priv->flags & NFTA_FIB_F_DADDR) {
+               fl4.daddr = iph->daddr;
+               fl4.saddr = get_saddr(iph->saddr);
+       } else {
+               fl4.daddr = iph->saddr;
+               fl4.saddr = get_saddr(iph->daddr);
+       }
+
+       if (fib_lookup(nft_net(pkt), &fl4, &res, FIB_LOOKUP_IGNORE_LINKSTATE))
+               return;
+
+       switch (res.type) {
+       case RTN_UNICAST:
+               break;
+       case RTN_LOCAL: /* should not appear here, see fib4_is_local() above */
+               return;
+       default:
+               break;
+       }
+
+       if (!oif) {
+               found = FIB_RES_DEV(res);
+               goto ok;
+       }
+
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+       for (i = 0; i < res.fi->fib_nhs; i++) {
+               struct fib_nh *nh = &res.fi->fib_nh[i];
+
+               if (nh->nh_dev == oif) {
+                       found = nh->nh_dev;
+                       goto ok;
+               }
+       }
+       return;
+#else
+       found = FIB_RES_DEV(res);
+       if (found != oif)
+               return;
+#endif
+ok:
+       switch (priv->result) {
+       case NFT_FIB_RESULT_OIF:
+               *dest = found->ifindex;
+               break;
+       case NFT_FIB_RESULT_OIFNAME:
+               strncpy((char *)dest, found->name, IFNAMSIZ);
+               break;
+       default:
+               WARN_ON_ONCE(1);
+               break;
+       }
+}
+EXPORT_SYMBOL_GPL(nft_fib4_eval);
+
+static struct nft_expr_type nft_fib4_type;
+
+static const struct nft_expr_ops nft_fib4_type_ops = {
+       .type           = &nft_fib4_type,
+       .size           = NFT_EXPR_SIZE(sizeof(struct nft_fib)),
+       .eval           = nft_fib4_eval_type,
+       .init           = nft_fib_init,
+       .dump           = nft_fib_dump,
+       .validate       = nft_fib_validate,
+};
+
+static const struct nft_expr_ops nft_fib4_ops = {
+       .type           = &nft_fib4_type,
+       .size           = NFT_EXPR_SIZE(sizeof(struct nft_fib)),
+       .eval           = nft_fib4_eval,
+       .init           = nft_fib_init,
+       .dump           = nft_fib_dump,
+       .validate       = nft_fib_validate,
+};
+
+static const struct nft_expr_ops *
+nft_fib4_select_ops(const struct nft_ctx *ctx,
+                   const struct nlattr * const tb[])
+{
+       enum nft_fib_result result;
+
+       if (!tb[NFTA_FIB_RESULT])
+               return ERR_PTR(-EINVAL);
+
+       result = htonl(nla_get_be32(tb[NFTA_FIB_RESULT]));
+
+       switch (result) {
+       case NFT_FIB_RESULT_OIF:
+               return &nft_fib4_ops;
+       case NFT_FIB_RESULT_OIFNAME:
+               return &nft_fib4_ops;
+       case NFT_FIB_RESULT_ADDRTYPE:
+               return &nft_fib4_type_ops;
+       default:
+               return ERR_PTR(-EOPNOTSUPP);
+       }
+}
+
+static struct nft_expr_type nft_fib4_type __read_mostly = {
+       .name           = "fib",
+       .select_ops     = &nft_fib4_select_ops,
+       .policy         = nft_fib_policy,
+       .maxattr        = NFTA_FIB_MAX,
+       .family         = NFPROTO_IPV4,
+       .owner          = THIS_MODULE,
+};
+
+static int __init nft_fib4_module_init(void)
+{
+       return nft_register_expr(&nft_fib4_type);
+}
+
+static void __exit nft_fib4_module_exit(void)
+{
+       nft_unregister_expr(&nft_fib4_type);
+}
+
+module_init(nft_fib4_module_init);
+module_exit(nft_fib4_module_exit);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
+MODULE_ALIAS_NFT_AF_EXPR(2, "fib");
index 51ced81b616c5aeb3f9d38244f444c8263ec5dca..4f697e43181160530d7ecfb5752538c121402d03 100644 (file)
@@ -31,8 +31,8 @@ static void nft_masq_ipv4_eval(const struct nft_expr *expr,
                range.max_proto.all =
                        *(__be16 *)&regs->data[priv->sreg_proto_max];
        }
-       regs->verdict.code = nf_nat_masquerade_ipv4(pkt->skb, pkt->hook,
-                                                   &range, pkt->out);
+       regs->verdict.code = nf_nat_masquerade_ipv4(pkt->skb, nft_hook(pkt),
+                                                   &range, nft_out(pkt));
 }
 
 static struct nft_expr_type nft_masq_ipv4_type;
index c09d4381427ea91d522fc6fd6ff961541f24956a..16df0493c5cefe81b2e8e97e614862947fb57f46 100644 (file)
@@ -35,8 +35,7 @@ static void nft_redir_ipv4_eval(const struct nft_expr *expr,
 
        mr.range[0].flags |= priv->flags;
 
-       regs->verdict.code = nf_nat_redirect_ipv4(pkt->skb, &mr,
-                                                 pkt->hook);
+       regs->verdict.code = nf_nat_redirect_ipv4(pkt->skb, &mr, nft_hook(pkt));
 }
 
 static struct nft_expr_type nft_redir_ipv4_type;
index 2c2553b9026cc652b6917529f71eba487c1ce711..517ce93699de3da0d0477061b0b80c056560d796 100644 (file)
@@ -27,10 +27,10 @@ static void nft_reject_ipv4_eval(const struct nft_expr *expr,
 
        switch (priv->type) {
        case NFT_REJECT_ICMP_UNREACH:
-               nf_send_unreach(pkt->skb, priv->icmp_code, pkt->hook);
+               nf_send_unreach(pkt->skb, priv->icmp_code, nft_hook(pkt));
                break;
        case NFT_REJECT_TCP_RST:
-               nf_send_reset(pkt->net, pkt->skb, pkt->hook);
+               nf_send_reset(nft_net(pkt), pkt->skb, nft_hook(pkt));
                break;
        default:
                break;
index 7cf7d6e380c2c87ecccb11bae3f677676062d11f..d11129f1178dc7e57daf59ca9ccd3ac8d6be3327 100644 (file)
@@ -789,7 +789,8 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 
        flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos,
                           RT_SCOPE_UNIVERSE, sk->sk_protocol,
-                          inet_sk_flowi_flags(sk), faddr, saddr, 0, 0);
+                          inet_sk_flowi_flags(sk), faddr, saddr, 0, 0,
+                          sk->sk_uid);
 
        security_sk_classify_flow(sk, flowi4_to_flowi(&fl4));
        rt = ip_route_output_flow(net, &fl4, sk);
@@ -994,7 +995,7 @@ struct proto ping_prot = {
        .init =         ping_init_sock,
        .close =        ping_close,
        .connect =      ip4_datagram_connect,
-       .disconnect =   udp_disconnect,
+       .disconnect =   __udp_disconnect,
        .setsockopt =   ip_setsockopt,
        .getsockopt =   ip_getsockopt,
        .sendmsg =      ping_v4_sendmsg,
index 03618ed035323fe580e90b7eee3e29c5f185f924..2300fae11b22ea2beab67cdc4fbc6fe1b36222f5 100644 (file)
@@ -606,7 +606,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
                           inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
                           inet_sk_flowi_flags(sk) |
                            (inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0),
-                          daddr, saddr, 0, 0);
+                          daddr, saddr, 0, 0, sk->sk_uid);
 
        if (!inet->hdrincl) {
                rfv.msg = msg;
@@ -695,12 +695,20 @@ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 {
        struct inet_sock *inet = inet_sk(sk);
        struct sockaddr_in *addr = (struct sockaddr_in *) uaddr;
+       u32 tb_id = RT_TABLE_LOCAL;
        int ret = -EINVAL;
        int chk_addr_ret;
 
        if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_in))
                goto out;
-       chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr);
+
+       if (sk->sk_bound_dev_if)
+               tb_id = l3mdev_fib_table_by_index(sock_net(sk),
+                                                sk->sk_bound_dev_if) ? : tb_id;
+
+       chk_addr_ret = inet_addr_type_table(sock_net(sk), addr->sin_addr.s_addr,
+                                           tb_id);
+
        ret = -EADDRNOTAVAIL;
        if (addr->sin_addr.s_addr && chk_addr_ret != RTN_LOCAL &&
            chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST)
@@ -920,7 +928,7 @@ int raw_abort(struct sock *sk, int err)
 
        sk->sk_err = err;
        sk->sk_error_report(sk);
-       udp_disconnect(sk, 0);
+       __udp_disconnect(sk, 0);
 
        release_sock(sk);
 
@@ -934,7 +942,7 @@ struct proto raw_prot = {
        .close             = raw_close,
        .destroy           = raw_destroy,
        .connect           = ip4_datagram_connect,
-       .disconnect        = udp_disconnect,
+       .disconnect        = __udp_disconnect,
        .ioctl             = raw_ioctl,
        .init              = raw_init,
        .setsockopt        = raw_setsockopt,
index ef3bea061b758fe9a8edb6364da2580ef99f3d5e..e1a51ca68d23c324b6643234c6af399aacca83e0 100644 (file)
@@ -3,6 +3,7 @@
 #include <linux/inet_diag.h>
 #include <linux/sock_diag.h>
 
+#include <net/inet_sock.h>
 #include <net/raw.h>
 #include <net/rawv6.h>
 
@@ -78,10 +79,11 @@ static struct sock *raw_sock_get(struct net *net, const struct inet_diag_req_v2
                                 * hashinfo->lock here.
                                 */
                                sock_hold(sk);
-                               break;
+                               goto out_unlock;
                        }
                }
        }
+out_unlock:
        read_unlock(&hashinfo->lock);
 
        return sk ? sk : ERR_PTR(-ENOENT);
@@ -204,11 +206,14 @@ static int raw_diag_destroy(struct sk_buff *in_skb,
 {
        struct net *net = sock_net(in_skb->sk);
        struct sock *sk;
+       int err;
 
        sk = raw_sock_get(net, r);
        if (IS_ERR(sk))
                return PTR_ERR(sk);
-       return sock_diag_destroy(sk, ECONNABORTED);
+       err = sock_diag_destroy(sk, ECONNABORTED);
+       sock_put(sk);
+       return err;
 }
 #endif
 
index 62d4d90c1389c4ea7da37c81779b2f55207d2a92..fa5c037227cb2a503c88b0990932a888ca2e8957 100644 (file)
@@ -507,7 +507,8 @@ void __ip_select_ident(struct net *net, struct iphdr *iph, int segs)
 }
 EXPORT_SYMBOL(__ip_select_ident);
 
-static void __build_flow_key(struct flowi4 *fl4, const struct sock *sk,
+static void __build_flow_key(const struct net *net, struct flowi4 *fl4,
+                            const struct sock *sk,
                             const struct iphdr *iph,
                             int oif, u8 tos,
                             u8 prot, u32 mark, int flow_flags)
@@ -523,19 +524,21 @@ static void __build_flow_key(struct flowi4 *fl4, const struct sock *sk,
        flowi4_init_output(fl4, oif, mark, tos,
                           RT_SCOPE_UNIVERSE, prot,
                           flow_flags,
-                          iph->daddr, iph->saddr, 0, 0);
+                          iph->daddr, iph->saddr, 0, 0,
+                          sock_net_uid(net, sk));
 }
 
 static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb,
                               const struct sock *sk)
 {
+       const struct net *net = dev_net(skb->dev);
        const struct iphdr *iph = ip_hdr(skb);
        int oif = skb->dev->ifindex;
        u8 tos = RT_TOS(iph->tos);
        u8 prot = iph->protocol;
        u32 mark = skb->mark;
 
-       __build_flow_key(fl4, sk, iph, oif, tos, prot, mark, 0);
+       __build_flow_key(net, fl4, sk, iph, oif, tos, prot, mark, 0);
 }
 
 static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk)
@@ -552,7 +555,7 @@ static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk)
                           RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
                           inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
                           inet_sk_flowi_flags(sk),
-                          daddr, inet->inet_saddr, 0, 0);
+                          daddr, inet->inet_saddr, 0, 0, sk->sk_uid);
        rcu_read_unlock();
 }
 
@@ -753,7 +756,9 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow
                        goto reject_redirect;
        }
 
-       n = ipv4_neigh_lookup(&rt->dst, NULL, &new_gw);
+       n = __ipv4_neigh_lookup(rt->dst.dev, new_gw);
+       if (!n)
+               n = neigh_create(&arp_tbl, &new_gw, rt->dst.dev);
        if (!IS_ERR(n)) {
                if (!(n->nud_state & NUD_VALID)) {
                        neigh_event_send(n, NULL);
@@ -800,7 +805,7 @@ static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buf
 
        rt = (struct rtable *) dst;
 
-       __build_flow_key(&fl4, sk, iph, oif, tos, prot, mark, 0);
+       __build_flow_key(sock_net(sk), &fl4, sk, iph, oif, tos, prot, mark, 0);
        __ip_do_redirect(rt, skb, &fl4, true);
 }
 
@@ -1018,7 +1023,7 @@ void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
        if (!mark)
                mark = IP4_REPLY_MARK(net, skb->mark);
 
-       __build_flow_key(&fl4, NULL, iph, oif,
+       __build_flow_key(net, &fl4, NULL, iph, oif,
                         RT_TOS(iph->tos), protocol, mark, flow_flags);
        rt = __ip_route_output_key(net, &fl4);
        if (!IS_ERR(rt)) {
@@ -1034,7 +1039,7 @@ static void __ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
        struct flowi4 fl4;
        struct rtable *rt;
 
-       __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
+       __build_flow_key(sock_net(sk), &fl4, sk, iph, 0, 0, 0, 0, 0);
 
        if (!fl4.flowi4_mark)
                fl4.flowi4_mark = IP4_REPLY_MARK(sock_net(sk), skb->mark);
@@ -1053,6 +1058,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
        struct rtable *rt;
        struct dst_entry *odst = NULL;
        bool new = false;
+       struct net *net = sock_net(sk);
 
        bh_lock_sock(sk);
 
@@ -1066,7 +1072,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
                goto out;
        }
 
-       __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
+       __build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0);
 
        rt = (struct rtable *)odst;
        if (odst->obsolete && !odst->ops->check(odst, 0)) {
@@ -1106,7 +1112,7 @@ void ipv4_redirect(struct sk_buff *skb, struct net *net,
        struct flowi4 fl4;
        struct rtable *rt;
 
-       __build_flow_key(&fl4, NULL, iph, oif,
+       __build_flow_key(net, &fl4, NULL, iph, oif,
                         RT_TOS(iph->tos), protocol, mark, flow_flags);
        rt = __ip_route_output_key(net, &fl4);
        if (!IS_ERR(rt)) {
@@ -1121,9 +1127,10 @@ void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk)
        const struct iphdr *iph = (const struct iphdr *) skb->data;
        struct flowi4 fl4;
        struct rtable *rt;
+       struct net *net = sock_net(sk);
 
-       __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
-       rt = __ip_route_output_key(sock_net(sk), &fl4);
+       __build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0);
+       rt = __ip_route_output_key(net, &fl4);
        if (!IS_ERR(rt)) {
                __ip_do_redirect(rt, skb, &fl4, false);
                ip_rt_put(rt);
@@ -1596,6 +1603,19 @@ static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr)
        spin_unlock_bh(&fnhe_lock);
 }
 
+static void set_lwt_redirect(struct rtable *rth)
+{
+       if (lwtunnel_output_redirect(rth->dst.lwtstate)) {
+               rth->dst.lwtstate->orig_output = rth->dst.output;
+               rth->dst.output = lwtunnel_output;
+       }
+
+       if (lwtunnel_input_redirect(rth->dst.lwtstate)) {
+               rth->dst.lwtstate->orig_input = rth->dst.input;
+               rth->dst.input = lwtunnel_input;
+       }
+}
+
 /* called in rcu_read_lock() section */
 static int __mkroute_input(struct sk_buff *skb,
                           const struct fib_result *res,
@@ -1685,14 +1705,7 @@ rt_cache:
        rth->dst.input = ip_forward;
 
        rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag);
-       if (lwtunnel_output_redirect(rth->dst.lwtstate)) {
-               rth->dst.lwtstate->orig_output = rth->dst.output;
-               rth->dst.output = lwtunnel_output;
-       }
-       if (lwtunnel_input_redirect(rth->dst.lwtstate)) {
-               rth->dst.lwtstate->orig_input = rth->dst.input;
-               rth->dst.input = lwtunnel_input;
-       }
+       set_lwt_redirect(rth);
        skb_dst_set(skb, &rth->dst);
 out:
        err = 0;
@@ -1919,8 +1932,18 @@ local_input:
                rth->dst.error= -err;
                rth->rt_flags   &= ~RTCF_LOCAL;
        }
+
        if (do_cache) {
-               if (unlikely(!rt_cache_route(&FIB_RES_NH(res), rth))) {
+               struct fib_nh *nh = &FIB_RES_NH(res);
+
+               rth->dst.lwtstate = lwtstate_get(nh->nh_lwtstate);
+               if (lwtunnel_input_redirect(rth->dst.lwtstate)) {
+                       WARN_ON(rth->dst.input == lwtunnel_input);
+                       rth->dst.lwtstate->orig_input = rth->dst.input;
+                       rth->dst.input = lwtunnel_input;
+               }
+
+               if (unlikely(!rt_cache_route(nh, rth))) {
                        rth->dst.flags |= DST_NOCACHE;
                        rt_add_uncached_list(rth);
                }
@@ -1980,25 +2003,35 @@ int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
         */
        if (ipv4_is_multicast(daddr)) {
                struct in_device *in_dev = __in_dev_get_rcu(dev);
+               int our = 0;
+
+               if (in_dev)
+                       our = ip_check_mc_rcu(in_dev, daddr, saddr,
+                                             ip_hdr(skb)->protocol);
+
+               /* check l3 master if no match yet */
+               if ((!in_dev || !our) && netif_is_l3_slave(dev)) {
+                       struct in_device *l3_in_dev;
 
-               if (in_dev) {
-                       int our = ip_check_mc_rcu(in_dev, daddr, saddr,
-                                                 ip_hdr(skb)->protocol);
-                       if (our
+                       l3_in_dev = __in_dev_get_rcu(skb->dev);
+                       if (l3_in_dev)
+                               our = ip_check_mc_rcu(l3_in_dev, daddr, saddr,
+                                                     ip_hdr(skb)->protocol);
+               }
+
+               res = -EINVAL;
+               if (our
 #ifdef CONFIG_IP_MROUTE
-                               ||
-                           (!ipv4_is_local_multicast(daddr) &&
-                            IN_DEV_MFORWARD(in_dev))
+                       ||
+                   (!ipv4_is_local_multicast(daddr) &&
+                    IN_DEV_MFORWARD(in_dev))
 #endif
-                          ) {
-                               int res = ip_route_input_mc(skb, daddr, saddr,
-                                                           tos, dev, our);
-                               rcu_read_unlock();
-                               return res;
-                       }
+                  ) {
+                       res = ip_route_input_mc(skb, daddr, saddr,
+                                               tos, dev, our);
                }
                rcu_read_unlock();
-               return -EINVAL;
+               return res;
        }
        res = ip_route_input_slow(skb, daddr, saddr, tos, dev);
        rcu_read_unlock();
@@ -2138,8 +2171,7 @@ add:
        }
 
        rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0);
-       if (lwtunnel_output_redirect(rth->dst.lwtstate))
-               rth->dst.output = lwtunnel_output;
+       set_lwt_redirect(rth);
 
        return rth;
 }
@@ -2266,7 +2298,8 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
                res.fi = NULL;
                res.table = NULL;
                if (fl4->flowi4_oif &&
-                   !netif_index_is_l3_master(net, fl4->flowi4_oif)) {
+                   (ipv4_is_multicast(fl4->daddr) ||
+                   !netif_index_is_l3_master(net, fl4->flowi4_oif))) {
                        /* Apparently, routing tables are wrong. Assume,
                           that the destination is on link.
 
@@ -2493,6 +2526,11 @@ static int rt_fill_info(struct net *net,  __be32 dst, __be32 src, u32 table_id,
            nla_put_u32(skb, RTA_MARK, fl4->flowi4_mark))
                goto nla_put_failure;
 
+       if (!uid_eq(fl4->flowi4_uid, INVALID_UID) &&
+           nla_put_u32(skb, RTA_UID,
+                       from_kuid_munged(current_user_ns(), fl4->flowi4_uid)))
+               goto nla_put_failure;
+
        error = rt->dst.error;
 
        if (rt_is_input_route(rt)) {
@@ -2545,6 +2583,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
        int mark;
        struct sk_buff *skb;
        u32 table_id = RT_TABLE_MAIN;
+       kuid_t uid;
 
        err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy);
        if (err < 0)
@@ -2572,6 +2611,10 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
        dst = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0;
        iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0;
        mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0;
+       if (tb[RTA_UID])
+               uid = make_kuid(current_user_ns(), nla_get_u32(tb[RTA_UID]));
+       else
+               uid = (iif ? INVALID_UID : current_uid());
 
        memset(&fl4, 0, sizeof(fl4));
        fl4.daddr = dst;
@@ -2579,6 +2622,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
        fl4.flowi4_tos = rtm->rtm_tos;
        fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0;
        fl4.flowi4_mark = mark;
+       fl4.flowi4_uid = uid;
 
        if (iif) {
                struct net_device *dev;
index e3c4043c27de289b7761cef4adbcd6c8f731d534..3e88467d70eec498e0a167474084c98c89069574 100644 (file)
@@ -334,6 +334,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
        treq = tcp_rsk(req);
        treq->rcv_isn           = ntohl(th->seq) - 1;
        treq->snt_isn           = cookie;
+       treq->ts_off            = 0;
        req->mss                = mss;
        ireq->ir_num            = ntohs(th->dest);
        ireq->ir_rmt_port       = th->source;
@@ -372,7 +373,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
                           RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP,
                           inet_sk_flowi_flags(sk),
                           opt->srr ? opt->faddr : ireq->ir_rmt_addr,
-                          ireq->ir_loc_addr, th->source, th->dest);
+                          ireq->ir_loc_addr, th->source, th->dest, sk->sk_uid);
        security_req_classify_flow(req, flowi4_to_flowi(&fl4));
        rt = ip_route_output_key(sock_net(sk), &fl4);
        if (IS_ERR(rt)) {
index 1cb67de106fee1103aa487af1f889ae6aea0c80c..80bc36b25de21d5e6b1c3e6f6001258b38656d41 100644 (file)
@@ -96,11 +96,11 @@ static void inet_get_ping_group_range_table(struct ctl_table *table, kgid_t *low
                container_of(table->data, struct net, ipv4.ping_group_range.range);
        unsigned int seq;
        do {
-               seq = read_seqbegin(&net->ipv4.ip_local_ports.lock);
+               seq = read_seqbegin(&net->ipv4.ping_group_range.lock);
 
                *low = data[0];
                *high = data[1];
-       } while (read_seqretry(&net->ipv4.ip_local_ports.lock, seq));
+       } while (read_seqretry(&net->ipv4.ping_group_range.lock, seq));
 }
 
 /* Update system visible IP port range */
@@ -109,10 +109,10 @@ static void set_ping_group_range(struct ctl_table *table, kgid_t low, kgid_t hig
        kgid_t *data = table->data;
        struct net *net =
                container_of(table->data, struct net, ipv4.ping_group_range.range);
-       write_seqlock(&net->ipv4.ip_local_ports.lock);
+       write_seqlock(&net->ipv4.ping_group_range.lock);
        data[0] = low;
        data[1] = high;
-       write_sequnlock(&net->ipv4.ip_local_ports.lock);
+       write_sequnlock(&net->ipv4.ping_group_range.lock);
 }
 
 /* Validate changes from /proc interface. */
index 3251fe71f39f2395befb0e662ca19423e6b9ea90..1149b48700a125b03359b4b65fbb3f1d8494e0c4 100644 (file)
 
 #include <asm/uaccess.h>
 #include <asm/ioctls.h>
-#include <asm/unaligned.h>
 #include <net/busy_poll.h>
 
 int sysctl_tcp_min_tso_segs __read_mostly = 2;
@@ -405,7 +404,6 @@ void tcp_init_sock(struct sock *sk)
        tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
        tp->snd_cwnd_clamp = ~0;
        tp->mss_cache = TCP_MSS_DEFAULT;
-       u64_stats_init(&tp->syncp);
 
        tp->reordering = sock_net(sk)->ipv4.sysctl_tcp_reordering;
        tcp_enable_early_retrans(tp);
@@ -998,8 +996,11 @@ do_error:
                goto out;
 out_err:
        /* make sure we wake any epoll edge trigger waiter */
-       if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && err == -EAGAIN))
+       if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 &&
+                    err == -EAGAIN)) {
                sk->sk_write_space(sk);
+               tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED);
+       }
        return sk_stream_error(sk, flags, err);
 }
 
@@ -1164,7 +1165,7 @@ restart:
 
        err = -EPIPE;
        if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
-               goto out_err;
+               goto do_error;
 
        sg = !!(sk->sk_route_caps & NETIF_F_SG);
 
@@ -1241,7 +1242,7 @@ new_segment:
 
                        if (!skb_can_coalesce(skb, i, pfrag->page,
                                              pfrag->offset)) {
-                               if (i == sysctl_max_skb_frags || !sg) {
+                               if (i >= sysctl_max_skb_frags || !sg) {
                                        tcp_mark_push(tp, skb);
                                        goto new_segment;
                                }
@@ -1333,8 +1334,11 @@ do_error:
 out_err:
        err = sk_stream_error(sk, flags, err);
        /* make sure we wake any epoll edge trigger waiter */
-       if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && err == -EAGAIN))
+       if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 &&
+                    err == -EAGAIN)) {
                sk->sk_write_space(sk);
+               tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED);
+       }
        release_sock(sk);
        return err;
 }
@@ -2302,7 +2306,7 @@ EXPORT_SYMBOL(tcp_disconnect);
 static inline bool tcp_can_repair_sock(const struct sock *sk)
 {
        return ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN) &&
-               ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_ESTABLISHED));
+               (sk->sk_state != TCP_LISTEN);
 }
 
 static int tcp_repair_set_window(struct tcp_sock *tp, char __user *optbuf, int len)
@@ -2704,15 +2708,33 @@ int compat_tcp_setsockopt(struct sock *sk, int level, int optname,
 EXPORT_SYMBOL(compat_tcp_setsockopt);
 #endif
 
+static void tcp_get_info_chrono_stats(const struct tcp_sock *tp,
+                                     struct tcp_info *info)
+{
+       u64 stats[__TCP_CHRONO_MAX], total = 0;
+       enum tcp_chrono i;
+
+       for (i = TCP_CHRONO_BUSY; i < __TCP_CHRONO_MAX; ++i) {
+               stats[i] = tp->chrono_stat[i - 1];
+               if (i == tp->chrono_type)
+                       stats[i] += tcp_time_stamp - tp->chrono_start;
+               stats[i] *= USEC_PER_SEC / HZ;
+               total += stats[i];
+       }
+
+       info->tcpi_busy_time = total;
+       info->tcpi_rwnd_limited = stats[TCP_CHRONO_RWND_LIMITED];
+       info->tcpi_sndbuf_limited = stats[TCP_CHRONO_SNDBUF_LIMITED];
+}
+
 /* Return information about state of tcp endpoint in API format. */
 void tcp_get_info(struct sock *sk, struct tcp_info *info)
 {
        const struct tcp_sock *tp = tcp_sk(sk); /* iff sk_type == SOCK_STREAM */
        const struct inet_connection_sock *icsk = inet_csk(sk);
        u32 now = tcp_time_stamp, intv;
-       unsigned int start;
-       int notsent_bytes;
        u64 rate64;
+       bool slow;
        u32 rate;
 
        memset(info, 0, sizeof(*info));
@@ -2721,6 +2743,27 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
 
        info->tcpi_state = sk_state_load(sk);
 
+       /* Report meaningful fields for all TCP states, including listeners */
+       rate = READ_ONCE(sk->sk_pacing_rate);
+       rate64 = rate != ~0U ? rate : ~0ULL;
+       info->tcpi_pacing_rate = rate64;
+
+       rate = READ_ONCE(sk->sk_max_pacing_rate);
+       rate64 = rate != ~0U ? rate : ~0ULL;
+       info->tcpi_max_pacing_rate = rate64;
+
+       info->tcpi_reordering = tp->reordering;
+       info->tcpi_snd_cwnd = tp->snd_cwnd;
+
+       if (info->tcpi_state == TCP_LISTEN) {
+               /* listeners aliased fields :
+                * tcpi_unacked -> Number of children ready for accept()
+                * tcpi_sacked  -> max backlog
+                */
+               info->tcpi_unacked = sk->sk_ack_backlog;
+               info->tcpi_sacked = sk->sk_max_ack_backlog;
+               return;
+       }
        info->tcpi_ca_state = icsk->icsk_ca_state;
        info->tcpi_retransmits = icsk->icsk_retransmits;
        info->tcpi_probes = icsk->icsk_probes_out;
@@ -2748,13 +2791,9 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
        info->tcpi_snd_mss = tp->mss_cache;
        info->tcpi_rcv_mss = icsk->icsk_ack.rcv_mss;
 
-       if (info->tcpi_state == TCP_LISTEN) {
-               info->tcpi_unacked = sk->sk_ack_backlog;
-               info->tcpi_sacked = sk->sk_max_ack_backlog;
-       } else {
-               info->tcpi_unacked = tp->packets_out;
-               info->tcpi_sacked = tp->sacked_out;
-       }
+       info->tcpi_unacked = tp->packets_out;
+       info->tcpi_sacked = tp->sacked_out;
+
        info->tcpi_lost = tp->lost_out;
        info->tcpi_retrans = tp->retrans_out;
        info->tcpi_fackets = tp->fackets_out;
@@ -2768,34 +2807,25 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
        info->tcpi_rtt = tp->srtt_us >> 3;
        info->tcpi_rttvar = tp->mdev_us >> 2;
        info->tcpi_snd_ssthresh = tp->snd_ssthresh;
-       info->tcpi_snd_cwnd = tp->snd_cwnd;
        info->tcpi_advmss = tp->advmss;
-       info->tcpi_reordering = tp->reordering;
 
        info->tcpi_rcv_rtt = jiffies_to_usecs(tp->rcv_rtt_est.rtt)>>3;
        info->tcpi_rcv_space = tp->rcvq_space.space;
 
        info->tcpi_total_retrans = tp->total_retrans;
 
-       rate = READ_ONCE(sk->sk_pacing_rate);
-       rate64 = rate != ~0U ? rate : ~0ULL;
-       put_unaligned(rate64, &info->tcpi_pacing_rate);
+       slow = lock_sock_fast(sk);
 
-       rate = READ_ONCE(sk->sk_max_pacing_rate);
-       rate64 = rate != ~0U ? rate : ~0ULL;
-       put_unaligned(rate64, &info->tcpi_max_pacing_rate);
+       info->tcpi_bytes_acked = tp->bytes_acked;
+       info->tcpi_bytes_received = tp->bytes_received;
+       info->tcpi_notsent_bytes = max_t(int, 0, tp->write_seq - tp->snd_nxt);
+       tcp_get_info_chrono_stats(tp, info);
+
+       unlock_sock_fast(sk, slow);
 
-       do {
-               start = u64_stats_fetch_begin_irq(&tp->syncp);
-               put_unaligned(tp->bytes_acked, &info->tcpi_bytes_acked);
-               put_unaligned(tp->bytes_received, &info->tcpi_bytes_received);
-       } while (u64_stats_fetch_retry_irq(&tp->syncp, start));
        info->tcpi_segs_out = tp->segs_out;
        info->tcpi_segs_in = tp->segs_in;
 
-       notsent_bytes = READ_ONCE(tp->write_seq) - READ_ONCE(tp->snd_nxt);
-       info->tcpi_notsent_bytes = max(0, notsent_bytes);
-
        info->tcpi_min_rtt = tcp_min_rtt(tp);
        info->tcpi_data_segs_in = tp->data_segs_in;
        info->tcpi_data_segs_out = tp->data_segs_out;
@@ -2806,11 +2836,31 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
        if (rate && intv) {
                rate64 = (u64)rate * tp->mss_cache * USEC_PER_SEC;
                do_div(rate64, intv);
-               put_unaligned(rate64, &info->tcpi_delivery_rate);
+               info->tcpi_delivery_rate = rate64;
        }
 }
 EXPORT_SYMBOL_GPL(tcp_get_info);
 
+struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk)
+{
+       const struct tcp_sock *tp = tcp_sk(sk);
+       struct sk_buff *stats;
+       struct tcp_info info;
+
+       stats = alloc_skb(3 * nla_total_size_64bit(sizeof(u64)), GFP_ATOMIC);
+       if (!stats)
+               return NULL;
+
+       tcp_get_info_chrono_stats(tp, &info);
+       nla_put_u64_64bit(stats, TCP_NLA_BUSY,
+                         info.tcpi_busy_time, TCP_NLA_PAD);
+       nla_put_u64_64bit(stats, TCP_NLA_RWND_LIMITED,
+                         info.tcpi_rwnd_limited, TCP_NLA_PAD);
+       nla_put_u64_64bit(stats, TCP_NLA_SNDBUF_LIMITED,
+                         info.tcpi_sndbuf_limited, TCP_NLA_PAD);
+       return stats;
+}
+
 static int do_tcp_getsockopt(struct sock *sk, int level,
                int optname, char __user *optval, int __user *optlen)
 {
index 0ea66c2c93444198fa21629dac617160f78a5cee..b89bce4c721eed530f5cfc725b759147b38cef42 100644 (file)
  * observed, or adjust the sending rate if it estimates there is a
  * traffic policer, in order to keep the drop rate reasonable.
  *
+ * Here is a state transition diagram for BBR:
+ *
+ *             |
+ *             V
+ *    +---> STARTUP  ----+
+ *    |        |         |
+ *    |        V         |
+ *    |      DRAIN   ----+
+ *    |        |         |
+ *    |        V         |
+ *    +---> PROBE_BW ----+
+ *    |      ^    |      |
+ *    |      |    |      |
+ *    |      +----+      |
+ *    |                  |
+ *    +---- PROBE_RTT <--+
+ *
+ * A BBR flow starts in STARTUP, and ramps up its sending rate quickly.
+ * When it estimates the pipe is full, it enters DRAIN to drain the queue.
+ * In steady state a BBR flow only uses PROBE_BW and PROBE_RTT.
+ * A long-lived BBR flow spends the vast majority of its time remaining
+ * (repeatedly) in PROBE_BW, fully probing and utilizing the pipe's bandwidth
+ * in a fair manner, with a small, bounded queue. *If* a flow has been
+ * continuously sending for the entire min_rtt window, and hasn't seen an RTT
+ * sample that matches or decreases its min_rtt estimate for 10 seconds, then
+ * it briefly enters PROBE_RTT to cut inflight to a minimum value to re-probe
+ * the path's two-way propagation delay (min_rtt). When exiting PROBE_RTT, if
+ * we estimated that we reached the full bw of the pipe then we enter PROBE_BW;
+ * otherwise we enter STARTUP to try to fill the pipe.
+ *
  * BBR is described in detail in:
  *   "BBR: Congestion-Based Congestion Control",
  *   Neal Cardwell, Yuchung Cheng, C. Stephen Gunn, Soheil Hassas Yeganeh,
@@ -51,7 +81,7 @@ enum bbr_mode {
        BBR_STARTUP,    /* ramp up sending rate rapidly to fill pipe */
        BBR_DRAIN,      /* drain any queue created during startup */
        BBR_PROBE_BW,   /* discover, share bw: pace around estimated bw */
-       BBR_PROBE_RTT,  /* cut cwnd to min to probe min_rtt */
+       BBR_PROBE_RTT,  /* cut inflight to min to probe min_rtt */
 };
 
 /* BBR congestion control block */
index 1294af4e0127b7a9b98d6e9cfa9e3979c7d7086e..79c4817abc94d08265edb2dfa995e3e479148a16 100644 (file)
@@ -68,8 +68,9 @@ int tcp_register_congestion_control(struct tcp_congestion_ops *ca)
 {
        int ret = 0;
 
-       /* all algorithms must implement ssthresh and cong_avoid ops */
-       if (!ca->ssthresh || !(ca->cong_avoid || ca->cong_control)) {
+       /* all algorithms must implement these */
+       if (!ca->ssthresh || !ca->undo_cwnd ||
+           !(ca->cong_avoid || ca->cong_control)) {
                pr_err("%s does not implement required ops\n", ca->name);
                return -EINVAL;
        }
@@ -200,8 +201,10 @@ static void tcp_reinit_congestion_control(struct sock *sk,
        icsk->icsk_ca_ops = ca;
        icsk->icsk_ca_setsockopt = 1;
 
-       if (sk->sk_state != TCP_CLOSE)
+       if (sk->sk_state != TCP_CLOSE) {
+               memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
                tcp_init_congestion_control(sk);
+       }
 }
 
 /* Manage refcounts on socket close. */
@@ -441,10 +444,19 @@ u32 tcp_reno_ssthresh(struct sock *sk)
 }
 EXPORT_SYMBOL_GPL(tcp_reno_ssthresh);
 
+u32 tcp_reno_undo_cwnd(struct sock *sk)
+{
+       const struct tcp_sock *tp = tcp_sk(sk);
+
+       return max(tp->snd_cwnd, tp->snd_ssthresh << 1);
+}
+EXPORT_SYMBOL_GPL(tcp_reno_undo_cwnd);
+
 struct tcp_congestion_ops tcp_reno = {
        .flags          = TCP_CONG_NON_RESTRICTED,
        .name           = "reno",
        .owner          = THIS_MODULE,
        .ssthresh       = tcp_reno_ssthresh,
        .cong_avoid     = tcp_reno_cong_avoid,
+       .undo_cwnd      = tcp_reno_undo_cwnd,
 };
index 10d728b6804c259e459cc81a82e5c961839cc578..bde22ebb92a8fc47b474ab0794c6ce3e323dc44b 100644 (file)
@@ -56,6 +56,7 @@ struct dctcp {
        u32 next_seq;
        u32 ce_state;
        u32 delayed_ack_reserved;
+       u32 loss_cwnd;
 };
 
 static unsigned int dctcp_shift_g __read_mostly = 4; /* g = 1/2^4 */
@@ -96,6 +97,7 @@ static void dctcp_init(struct sock *sk)
                ca->dctcp_alpha = min(dctcp_alpha_on_init, DCTCP_MAX_ALPHA);
 
                ca->delayed_ack_reserved = 0;
+               ca->loss_cwnd = 0;
                ca->ce_state = 0;
 
                dctcp_reset(tp, ca);
@@ -111,9 +113,10 @@ static void dctcp_init(struct sock *sk)
 
 static u32 dctcp_ssthresh(struct sock *sk)
 {
-       const struct dctcp *ca = inet_csk_ca(sk);
+       struct dctcp *ca = inet_csk_ca(sk);
        struct tcp_sock *tp = tcp_sk(sk);
 
+       ca->loss_cwnd = tp->snd_cwnd;
        return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->dctcp_alpha) >> 11U), 2U);
 }
 
@@ -185,8 +188,8 @@ static void dctcp_ce_state_1_to_0(struct sock *sk)
 
 static void dctcp_update_alpha(struct sock *sk, u32 flags)
 {
-       const struct tcp_sock *tp = tcp_sk(sk);
        struct dctcp *ca = inet_csk_ca(sk);
+       struct tcp_sock *tp = tcp_sk(sk);
        u32 acked_bytes = tp->snd_una - ca->prior_snd_una;
 
        /* If ack did not advance snd_una, count dupack as MSS size.
@@ -226,6 +229,13 @@ static void dctcp_update_alpha(struct sock *sk, u32 flags)
                WRITE_ONCE(ca->dctcp_alpha, alpha);
                dctcp_reset(tp, ca);
        }
+
+       if (flags & CA_ACK_ECE) {
+               unsigned int cwnd = dctcp_ssthresh(sk);
+
+               if (cwnd != tp->snd_cwnd)
+                       tp->snd_cwnd = cwnd;
+       }
 }
 
 static void dctcp_state(struct sock *sk, u8 new_state)
@@ -308,12 +318,20 @@ static size_t dctcp_get_info(struct sock *sk, u32 ext, int *attr,
        return 0;
 }
 
+static u32 dctcp_cwnd_undo(struct sock *sk)
+{
+       const struct dctcp *ca = inet_csk_ca(sk);
+
+       return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd);
+}
+
 static struct tcp_congestion_ops dctcp __read_mostly = {
        .init           = dctcp_init,
        .in_ack_event   = dctcp_update_alpha,
        .cwnd_event     = dctcp_cwnd_event,
        .ssthresh       = dctcp_ssthresh,
        .cong_avoid     = tcp_reno_cong_avoid,
+       .undo_cwnd      = dctcp_cwnd_undo,
        .set_state      = dctcp_state,
        .get_info       = dctcp_get_info,
        .flags          = TCP_CONG_NEEDS_ECN,
@@ -324,6 +342,7 @@ static struct tcp_congestion_ops dctcp __read_mostly = {
 static struct tcp_congestion_ops dctcp_reno __read_mostly = {
        .ssthresh       = tcp_reno_ssthresh,
        .cong_avoid     = tcp_reno_cong_avoid,
+       .undo_cwnd      = tcp_reno_undo_cwnd,
        .get_info       = dctcp_get_info,
        .owner          = THIS_MODULE,
        .name           = "dctcp-reno",
index db7842495a641829a8725cb436ed2fb3aa5d53e4..6d9879e93648a0c60579586242643ba364f4e506 100644 (file)
@@ -94,6 +94,7 @@ static const struct hstcp_aimd_val {
 
 struct hstcp {
        u32     ai;
+       u32     loss_cwnd;
 };
 
 static void hstcp_init(struct sock *sk)
@@ -150,16 +151,24 @@ static void hstcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 static u32 hstcp_ssthresh(struct sock *sk)
 {
        const struct tcp_sock *tp = tcp_sk(sk);
-       const struct hstcp *ca = inet_csk_ca(sk);
+       struct hstcp *ca = inet_csk_ca(sk);
 
+       ca->loss_cwnd = tp->snd_cwnd;
        /* Do multiplicative decrease */
        return max(tp->snd_cwnd - ((tp->snd_cwnd * hstcp_aimd_vals[ca->ai].md) >> 8), 2U);
 }
 
+static u32 hstcp_cwnd_undo(struct sock *sk)
+{
+       const struct hstcp *ca = inet_csk_ca(sk);
+
+       return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd);
+}
 
 static struct tcp_congestion_ops tcp_highspeed __read_mostly = {
        .init           = hstcp_init,
        .ssthresh       = hstcp_ssthresh,
+       .undo_cwnd      = hstcp_cwnd_undo,
        .cong_avoid     = hstcp_cong_avoid,
 
        .owner          = THIS_MODULE,
index 083831e359df92ca9ba0fe7dd5a7a76fe41a94b0..0f7175c3338e062a4a6507aacfdebc89e97a1948 100644 (file)
@@ -166,6 +166,7 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 static struct tcp_congestion_ops tcp_hybla __read_mostly = {
        .init           = hybla_init,
        .ssthresh       = tcp_reno_ssthresh,
+       .undo_cwnd      = tcp_reno_undo_cwnd,
        .cong_avoid     = hybla_cong_avoid,
        .set_state      = hybla_state,
 
index c8e6d86be11421664f8832666812887a7a6bd07b..60352ff4f5a85f065793a5fb21c0d69713a0453a 100644 (file)
@@ -48,6 +48,7 @@ struct illinois {
        u32     end_seq;        /* right edge of current RTT */
        u32     alpha;          /* Additive increase */
        u32     beta;           /* Muliplicative decrease */
+       u32     loss_cwnd;      /* cwnd on loss */
        u16     acked;          /* # packets acked by current ACK */
        u8      rtt_above;      /* average rtt has gone above threshold */
        u8      rtt_low;        /* # of rtts measurements below threshold */
@@ -296,10 +297,18 @@ static u32 tcp_illinois_ssthresh(struct sock *sk)
        struct tcp_sock *tp = tcp_sk(sk);
        struct illinois *ca = inet_csk_ca(sk);
 
+       ca->loss_cwnd = tp->snd_cwnd;
        /* Multiplicative decrease */
        return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->beta) >> BETA_SHIFT), 2U);
 }
 
+static u32 tcp_illinois_cwnd_undo(struct sock *sk)
+{
+       const struct illinois *ca = inet_csk_ca(sk);
+
+       return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd);
+}
+
 /* Extract info for Tcp socket info provided via netlink. */
 static size_t tcp_illinois_info(struct sock *sk, u32 ext, int *attr,
                                union tcp_cc_info *info)
@@ -327,6 +336,7 @@ static size_t tcp_illinois_info(struct sock *sk, u32 ext, int *attr,
 static struct tcp_congestion_ops tcp_illinois __read_mostly = {
        .init           = tcp_illinois_init,
        .ssthresh       = tcp_illinois_ssthresh,
+       .undo_cwnd      = tcp_illinois_cwnd_undo,
        .cong_avoid     = tcp_illinois_cong_avoid,
        .set_state      = tcp_illinois_state,
        .get_info       = tcp_illinois_info,
index a27b9c0e27c08b4e4aeaff3d0bfdf3ae561ba4d8..fe668c1b9cedd90a3d6546be6af44b70bd0b3d7f 100644 (file)
@@ -85,6 +85,7 @@ int sysctl_tcp_dsack __read_mostly = 1;
 int sysctl_tcp_app_win __read_mostly = 31;
 int sysctl_tcp_adv_win_scale __read_mostly = 1;
 EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
+EXPORT_SYMBOL(sysctl_tcp_timestamps);
 
 /* rfc5961 challenge ack rate limiting */
 int sysctl_tcp_challenge_ack_limit = 1000;
@@ -2394,10 +2395,7 @@ static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss)
        if (tp->prior_ssthresh) {
                const struct inet_connection_sock *icsk = inet_csk(sk);
 
-               if (icsk->icsk_ca_ops->undo_cwnd)
-                       tp->snd_cwnd = icsk->icsk_ca_ops->undo_cwnd(sk);
-               else
-                       tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh << 1);
+               tp->snd_cwnd = icsk->icsk_ca_ops->undo_cwnd(sk);
 
                if (tp->prior_ssthresh > tp->snd_ssthresh) {
                        tp->snd_ssthresh = tp->prior_ssthresh;
@@ -3181,6 +3179,9 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
                        tp->lost_skb_hint = NULL;
        }
 
+       if (!skb)
+               tcp_chrono_stop(sk, TCP_CHRONO_BUSY);
+
        if (likely(between(tp->snd_up, prior_snd_una, tp->snd_una)))
                tp->snd_up = tp->snd_una;
 
@@ -3351,9 +3352,7 @@ static void tcp_snd_una_update(struct tcp_sock *tp, u32 ack)
        u32 delta = ack - tp->snd_una;
 
        sock_owned_by_me((struct sock *)tp);
-       u64_stats_update_begin_raw(&tp->syncp);
        tp->bytes_acked += delta;
-       u64_stats_update_end_raw(&tp->syncp);
        tp->snd_una = ack;
 }
 
@@ -3363,9 +3362,7 @@ static void tcp_rcv_nxt_update(struct tcp_sock *tp, u32 seq)
        u32 delta = seq - tp->rcv_nxt;
 
        sock_owned_by_me((struct sock *)tp);
-       u64_stats_update_begin_raw(&tp->syncp);
        tp->bytes_received += delta;
-       u64_stats_update_end_raw(&tp->syncp);
        tp->rcv_nxt = seq;
 }
 
@@ -5063,8 +5060,11 @@ static void tcp_check_space(struct sock *sk)
                /* pairs with tcp_poll() */
                smp_mb__after_atomic();
                if (sk->sk_socket &&
-                   test_bit(SOCK_NOSPACE, &sk->sk_socket->flags))
+                   test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
                        tcp_new_space(sk);
+                       if (!test_bit(SOCK_NOSPACE, &sk->sk_socket->flags))
+                               tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED);
+               }
        }
 }
 
@@ -6298,13 +6298,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
                        goto drop;
        }
 
-
-       /* Accept backlog is full. If we have already queued enough
-        * of warm entries in syn queue, drop request. It is better than
-        * clogging syn queue with openreqs with exponentially increasing
-        * timeout.
-        */
-       if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) {
+       if (sk_acceptq_is_full(sk)) {
                NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
                goto drop;
        }
@@ -6314,6 +6308,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
                goto drop;
 
        tcp_rsk(req)->af_specific = af_ops;
+       tcp_rsk(req)->ts_off = 0;
 
        tcp_clear_options(&tmp_opt);
        tmp_opt.mss_clamp = af_ops->mss_clamp;
@@ -6335,6 +6330,9 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
        if (security_inet_conn_request(sk, skb, req))
                goto drop_and_free;
 
+       if (isn && tmp_opt.tstamp_ok)
+               af_ops->init_seq(skb, &tcp_rsk(req)->ts_off);
+
        if (!want_cookie && !isn) {
                /* VJ's idea. We save last timestamp seen
                 * from the destination in peer table, when entering
@@ -6375,7 +6373,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
                        goto drop_and_release;
                }
 
-               isn = af_ops->init_seq(skb);
+               isn = af_ops->init_seq(skb, &tcp_rsk(req)->ts_off);
        }
        if (!dst) {
                dst = af_ops->route_req(sk, &fl, req, NULL);
@@ -6387,6 +6385,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 
        if (want_cookie) {
                isn = cookie_init_sequence(af_ops, sk, skb, &req->mss);
+               tcp_rsk(req)->ts_off = 0;
                req->cookie_ts = tmp_opt.tstamp_ok;
                if (!tmp_opt.tstamp_ok)
                        inet_rsk(req)->ecn_ok = 0;
index 83b3d0b8c4812d734728360a694a596a17257d02..b50f05905cedc9be6306f57a8ba89ddf8e22b982 100644 (file)
@@ -86,7 +86,6 @@
 
 int sysctl_tcp_tw_reuse __read_mostly;
 int sysctl_tcp_low_latency __read_mostly;
-EXPORT_SYMBOL(sysctl_tcp_low_latency);
 
 #ifdef CONFIG_TCP_MD5SIG
 static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
@@ -96,12 +95,12 @@ static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
 struct inet_hashinfo tcp_hashinfo;
 EXPORT_SYMBOL(tcp_hashinfo);
 
-static  __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
+static u32 tcp_v4_init_sequence(const struct sk_buff *skb, u32 *tsoff)
 {
        return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
                                          ip_hdr(skb)->saddr,
                                          tcp_hdr(skb)->dest,
-                                         tcp_hdr(skb)->source);
+                                         tcp_hdr(skb)->source, tsoff);
 }
 
 int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
@@ -238,7 +237,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
                tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
                                                           inet->inet_daddr,
                                                           inet->inet_sport,
-                                                          usin->sin_port);
+                                                          usin->sin_port,
+                                                          &tp->tsoffset);
 
        inet->inet_id = tp->write_seq ^ jiffies;
 
@@ -692,6 +692,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
                     offsetof(struct inet_timewait_sock, tw_bound_dev_if));
 
        arg.tos = ip_hdr(skb)->tos;
+       arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
        local_bh_disable();
        ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
                              skb, &TCP_SKB_CB(skb)->header.h4.opt,
@@ -712,7 +713,7 @@ out:
    outside socket context is ugly, certainly. What can I do?
  */
 
-static void tcp_v4_send_ack(struct net *net,
+static void tcp_v4_send_ack(const struct sock *sk,
                            struct sk_buff *skb, u32 seq, u32 ack,
                            u32 win, u32 tsval, u32 tsecr, int oif,
                            struct tcp_md5sig_key *key,
@@ -727,6 +728,7 @@ static void tcp_v4_send_ack(struct net *net,
 #endif
                        ];
        } rep;
+       struct net *net = sock_net(sk);
        struct ip_reply_arg arg;
 
        memset(&rep.th, 0, sizeof(struct tcphdr));
@@ -776,6 +778,7 @@ static void tcp_v4_send_ack(struct net *net,
        if (oif)
                arg.bound_dev_if = oif;
        arg.tos = tos;
+       arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL);
        local_bh_disable();
        ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
                              skb, &TCP_SKB_CB(skb)->header.h4.opt,
@@ -791,7 +794,7 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
        struct inet_timewait_sock *tw = inet_twsk(sk);
        struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
 
-       tcp_v4_send_ack(sock_net(sk), skb,
+       tcp_v4_send_ack(sk, skb,
                        tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
                        tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
                        tcp_time_stamp + tcptw->tw_ts_offset,
@@ -819,10 +822,10 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
         * exception of <SYN> segments, MUST be right-shifted by
         * Rcv.Wind.Shift bits:
         */
-       tcp_v4_send_ack(sock_net(sk), skb, seq,
+       tcp_v4_send_ack(sk, skb, seq,
                        tcp_rsk(req)->rcv_nxt,
                        req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
-                       tcp_time_stamp,
+                       tcp_time_stamp + tcp_rsk(req)->ts_off,
                        req->ts_recent,
                        0,
                        tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
@@ -1565,6 +1568,21 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
 }
 EXPORT_SYMBOL(tcp_add_backlog);
 
+int tcp_filter(struct sock *sk, struct sk_buff *skb)
+{
+       struct tcphdr *th = (struct tcphdr *)skb->data;
+       unsigned int eaten = skb->len;
+       int err;
+
+       err = sk_filter_trim_cap(sk, skb, th->doff * 4);
+       if (!err) {
+               eaten -= skb->len;
+               TCP_SKB_CB(skb)->end_seq -= eaten;
+       }
+       return err;
+}
+EXPORT_SYMBOL(tcp_filter);
+
 /*
  *     From tcp_input.c
  */
@@ -1677,8 +1695,10 @@ process:
 
        nf_reset(skb);
 
-       if (sk_filter(sk, skb))
+       if (tcp_filter(sk, skb))
                goto discard_and_relse;
+       th = (const struct tcphdr *)skb->data;
+       iph = ip_hdr(skb);
 
        skb->dev = NULL;
 
@@ -1887,7 +1907,6 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
        struct tcp_iter_state *st = seq->private;
        struct net *net = seq_file_net(seq);
        struct inet_listen_hashbucket *ilb;
-       struct inet_connection_sock *icsk;
        struct sock *sk = cur;
 
        if (!sk) {
@@ -1909,7 +1928,6 @@ get_sk:
                        continue;
                if (sk->sk_family == st->family)
                        return sk;
-               icsk = inet_csk(sk);
        }
        spin_unlock(&ilb->lock);
        st->offset = 0;
index c67ece1390c253304454cd41eed59cae26dd10a8..046fd3910873306d74207615d6997e1c847ea361 100644 (file)
@@ -316,6 +316,7 @@ static void tcp_lp_pkts_acked(struct sock *sk, const struct ack_sample *sample)
 static struct tcp_congestion_ops tcp_lp __read_mostly = {
        .init = tcp_lp_init,
        .ssthresh = tcp_reno_ssthresh,
+       .undo_cwnd = tcp_reno_undo_cwnd,
        .cong_avoid = tcp_lp_cong_avoid,
        .pkts_acked = tcp_lp_pkts_acked,
 
index bf1f3b2b29d1226b572fdc6286c697c2f75b805b..d46f4d5b1c62edf95791e9d47d966c3bc61e1888 100644 (file)
@@ -742,14 +742,7 @@ void tcp_fastopen_cache_set(struct sock *sk, u16 mss,
        rcu_read_unlock();
 }
 
-static struct genl_family tcp_metrics_nl_family = {
-       .id             = GENL_ID_GENERATE,
-       .hdrsize        = 0,
-       .name           = TCP_METRICS_GENL_NAME,
-       .version        = TCP_METRICS_GENL_VERSION,
-       .maxattr        = TCP_METRICS_ATTR_MAX,
-       .netnsok        = true,
-};
+static struct genl_family tcp_metrics_nl_family;
 
 static const struct nla_policy tcp_metrics_nl_policy[TCP_METRICS_ATTR_MAX + 1] = {
        [TCP_METRICS_ATTR_ADDR_IPV4]    = { .type = NLA_U32, },
@@ -1116,6 +1109,17 @@ static const struct genl_ops tcp_metrics_nl_ops[] = {
        },
 };
 
+static struct genl_family tcp_metrics_nl_family __ro_after_init = {
+       .hdrsize        = 0,
+       .name           = TCP_METRICS_GENL_NAME,
+       .version        = TCP_METRICS_GENL_VERSION,
+       .maxattr        = TCP_METRICS_ATTR_MAX,
+       .netnsok        = true,
+       .module         = THIS_MODULE,
+       .ops            = tcp_metrics_nl_ops,
+       .n_ops          = ARRAY_SIZE(tcp_metrics_nl_ops),
+};
+
 static unsigned int tcpmhash_entries;
 static int __init set_tcpmhash_entries(char *str)
 {
@@ -1179,8 +1183,7 @@ void __init tcp_metrics_init(void)
        if (ret < 0)
                panic("Could not allocate the tcp_metrics hash table\n");
 
-       ret = genl_register_family_with_ops(&tcp_metrics_nl_family,
-                                           tcp_metrics_nl_ops);
+       ret = genl_register_family(&tcp_metrics_nl_family);
        if (ret < 0)
                panic("Could not register tcp_metrics generic netlink\n");
 }
index 6234ebaa7db109d010ad4396ae36e1885f5f72f8..28ce5ee831f59d0a66d49b27c766b396b3e62ff9 100644 (file)
@@ -532,7 +532,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
                        newtp->rx_opt.ts_recent_stamp = 0;
                        newtp->tcp_header_len = sizeof(struct tcphdr);
                }
-               newtp->tsoffset = 0;
+               newtp->tsoffset = treq->ts_off;
 #ifdef CONFIG_TCP_MD5SIG
                newtp->md5sig_info = NULL;      /*XXX*/
                if (newtp->af_specific->md5_lookup(sk, newsk))
@@ -581,6 +581,8 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 
                if (tmp_opt.saw_tstamp) {
                        tmp_opt.ts_recent = req->ts_recent;
+                       if (tmp_opt.rcv_tsecr)
+                               tmp_opt.rcv_tsecr -= tcp_rsk(req)->ts_off;
                        /* We do not store true stamp, but it is not required,
                         * it can be estimated (approximately)
                         * from another data.
index 896e9dfbdb5cd9ca0fa003f6be2c5cd332dde7cf..c7adcb57654ea57d1ba6702c91743cb7d2c74d28 100644 (file)
@@ -640,7 +640,7 @@ static unsigned int tcp_synack_options(struct request_sock *req,
        }
        if (likely(ireq->tstamp_ok)) {
                opts->options |= OPTION_TS;
-               opts->tsval = tcp_skb_timestamp(skb);
+               opts->tsval = tcp_skb_timestamp(skb) + tcp_rsk(req)->ts_off;
                opts->tsecr = req->ts_recent;
                remaining -= TCPOLEN_TSTAMP_ALIGNED;
        }
@@ -1514,6 +1514,18 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited)
                if (sysctl_tcp_slow_start_after_idle &&
                    (s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto)
                        tcp_cwnd_application_limited(sk);
+
+               /* The following conditions together indicate the starvation
+                * is caused by insufficient sender buffer:
+                * 1) just sent some data (see tcp_write_xmit)
+                * 2) not cwnd limited (this else condition)
+                * 3) no more data to send (null tcp_send_head )
+                * 4) application is hitting buffer limit (SOCK_NOSPACE)
+                */
+               if (!tcp_send_head(sk) && sk->sk_socket &&
+                   test_bit(SOCK_NOSPACE, &sk->sk_socket->flags) &&
+                   (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))
+                       tcp_chrono_start(sk, TCP_CHRONO_SNDBUF_LIMITED);
        }
 }
 
@@ -2081,6 +2093,47 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
        return false;
 }
 
+static void tcp_chrono_set(struct tcp_sock *tp, const enum tcp_chrono new)
+{
+       const u32 now = tcp_time_stamp;
+
+       if (tp->chrono_type > TCP_CHRONO_UNSPEC)
+               tp->chrono_stat[tp->chrono_type - 1] += now - tp->chrono_start;
+       tp->chrono_start = now;
+       tp->chrono_type = new;
+}
+
+void tcp_chrono_start(struct sock *sk, const enum tcp_chrono type)
+{
+       struct tcp_sock *tp = tcp_sk(sk);
+
+       /* If there are multiple conditions worthy of tracking in a
+        * chronograph then the highest priority enum takes precedence
+        * over the other conditions. So that if something "more interesting"
+        * starts happening, stop the previous chrono and start a new one.
+        */
+       if (type > tp->chrono_type)
+               tcp_chrono_set(tp, type);
+}
+
+void tcp_chrono_stop(struct sock *sk, const enum tcp_chrono type)
+{
+       struct tcp_sock *tp = tcp_sk(sk);
+
+
+       /* There are multiple conditions worthy of tracking in a
+        * chronograph, so that the highest priority enum takes
+        * precedence over the other conditions (see tcp_chrono_start).
+        * If a condition stops, we only stop chrono tracking if
+        * it's the "most interesting" or current chrono we are
+        * tracking and starts busy chrono if we have pending data.
+        */
+       if (tcp_write_queue_empty(sk))
+               tcp_chrono_set(tp, TCP_CHRONO_UNSPEC);
+       else if (type == tp->chrono_type)
+               tcp_chrono_set(tp, TCP_CHRONO_BUSY);
+}
+
 /* This routine writes packets to the network.  It advances the
  * send_head.  This happens as incoming acks open up the remote
  * window for us.
@@ -2103,7 +2156,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
        unsigned int tso_segs, sent_pkts;
        int cwnd_quota;
        int result;
-       bool is_cwnd_limited = false;
+       bool is_cwnd_limited = false, is_rwnd_limited = false;
        u32 max_segs;
 
        sent_pkts = 0;
@@ -2140,8 +2193,10 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
                                break;
                }
 
-               if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now)))
+               if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now))) {
+                       is_rwnd_limited = true;
                        break;
+               }
 
                if (tso_segs == 1) {
                        if (unlikely(!tcp_nagle_test(tp, skb, mss_now,
@@ -2186,6 +2241,11 @@ repair:
                        break;
        }
 
+       if (is_rwnd_limited)
+               tcp_chrono_start(sk, TCP_CHRONO_RWND_LIMITED);
+       else
+               tcp_chrono_stop(sk, TCP_CHRONO_RWND_LIMITED);
+
        if (likely(sent_pkts)) {
                if (tcp_in_cwnd_reduction(sk))
                        tp->prr_out += sent_pkts;
@@ -2514,7 +2574,7 @@ void tcp_skb_collapse_tstamp(struct sk_buff *skb,
 }
 
 /* Collapses two adjacent SKB's during retransmission. */
-static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
+static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
 {
        struct tcp_sock *tp = tcp_sk(sk);
        struct sk_buff *next_skb = tcp_write_queue_next(sk, skb);
@@ -2525,13 +2585,17 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
 
        BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1);
 
+       if (next_skb_size) {
+               if (next_skb_size <= skb_availroom(skb))
+                       skb_copy_bits(next_skb, 0, skb_put(skb, next_skb_size),
+                                     next_skb_size);
+               else if (!skb_shift(skb, next_skb, next_skb_size))
+                       return false;
+       }
        tcp_highest_sack_combine(sk, next_skb, skb);
 
        tcp_unlink_write_queue(next_skb, sk);
 
-       skb_copy_from_linear_data(next_skb, skb_put(skb, next_skb_size),
-                                 next_skb_size);
-
        if (next_skb->ip_summed == CHECKSUM_PARTIAL)
                skb->ip_summed = CHECKSUM_PARTIAL;
 
@@ -2560,6 +2624,7 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
        tcp_skb_collapse_tstamp(skb, next_skb);
 
        sk_wmem_free_skb(sk, next_skb);
+       return true;
 }
 
 /* Check if coalescing SKBs is legal. */
@@ -2567,14 +2632,11 @@ static bool tcp_can_collapse(const struct sock *sk, const struct sk_buff *skb)
 {
        if (tcp_skb_pcount(skb) > 1)
                return false;
-       /* TODO: SACK collapsing could be used to remove this condition */
-       if (skb_shinfo(skb)->nr_frags != 0)
-               return false;
        if (skb_cloned(skb))
                return false;
        if (skb == tcp_send_head(sk))
                return false;
-       /* Some heurestics for collapsing over SACK'd could be invented */
+       /* Some heuristics for collapsing over SACK'd could be invented */
        if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
                return false;
 
@@ -2612,16 +2674,12 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
 
                if (space < 0)
                        break;
-               /* Punt if not enough space exists in the first SKB for
-                * the data in the second
-                */
-               if (skb->len > skb_availroom(to))
-                       break;
 
                if (after(TCP_SKB_CB(skb)->end_seq, tcp_wnd_end(tp)))
                        break;
 
-               tcp_collapse_retrans(sk, to);
+               if (!tcp_collapse_retrans(sk, to))
+                       break;
        }
 }
 
@@ -3300,6 +3358,8 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
        fo->copied = space;
 
        tcp_connect_queue_skb(sk, syn_data);
+       if (syn_data->len)
+               tcp_chrono_start(sk, TCP_CHRONO_BUSY);
 
        err = tcp_transmit_skb(sk, syn_data, 1, sk->sk_allocation);
 
index bf5ea9e9bbc1ed3c07c03f9db69b9848cf83ec8e..f2123075ce6e1be4753e26bb1db81423e272caef 100644 (file)
 #define TCP_SCALABLE_AI_CNT    50U
 #define TCP_SCALABLE_MD_SCALE  3
 
+struct scalable {
+       u32 loss_cwnd;
+};
+
 static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 {
        struct tcp_sock *tp = tcp_sk(sk);
@@ -32,12 +36,23 @@ static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 static u32 tcp_scalable_ssthresh(struct sock *sk)
 {
        const struct tcp_sock *tp = tcp_sk(sk);
+       struct scalable *ca = inet_csk_ca(sk);
+
+       ca->loss_cwnd = tp->snd_cwnd;
 
        return max(tp->snd_cwnd - (tp->snd_cwnd>>TCP_SCALABLE_MD_SCALE), 2U);
 }
 
+static u32 tcp_scalable_cwnd_undo(struct sock *sk)
+{
+       const struct scalable *ca = inet_csk_ca(sk);
+
+       return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd);
+}
+
 static struct tcp_congestion_ops tcp_scalable __read_mostly = {
        .ssthresh       = tcp_scalable_ssthresh,
+       .undo_cwnd      = tcp_scalable_cwnd_undo,
        .cong_avoid     = tcp_scalable_cong_avoid,
 
        .owner          = THIS_MODULE,
index 4c4bac1b5eab221928c569592c833e1bfcba748d..218cfcc77650004fea3f4bdfe24760ff0f634acc 100644 (file)
@@ -307,6 +307,7 @@ EXPORT_SYMBOL_GPL(tcp_vegas_get_info);
 static struct tcp_congestion_ops tcp_vegas __read_mostly = {
        .init           = tcp_vegas_init,
        .ssthresh       = tcp_reno_ssthresh,
+       .undo_cwnd      = tcp_reno_undo_cwnd,
        .cong_avoid     = tcp_vegas_cong_avoid,
        .pkts_acked     = tcp_vegas_pkts_acked,
        .set_state      = tcp_vegas_state,
index 40171e163cffa723f3eb1539740a8a7c3e963f53..76005d4b8dfc2287009628aa07912e0183ea2f05 100644 (file)
@@ -30,6 +30,7 @@ struct veno {
        u32 basertt;            /* the min of all Veno rtt measurements seen (in usec) */
        u32 inc;                /* decide whether to increase cwnd */
        u32 diff;               /* calculate the diff rate */
+       u32 loss_cwnd;          /* cwnd when loss occured */
 };
 
 /* There are several situations when we must "re-start" Veno:
@@ -193,6 +194,7 @@ static u32 tcp_veno_ssthresh(struct sock *sk)
        const struct tcp_sock *tp = tcp_sk(sk);
        struct veno *veno = inet_csk_ca(sk);
 
+       veno->loss_cwnd = tp->snd_cwnd;
        if (veno->diff < beta)
                /* in "non-congestive state", cut cwnd by 1/5 */
                return max(tp->snd_cwnd * 4 / 5, 2U);
@@ -201,9 +203,17 @@ static u32 tcp_veno_ssthresh(struct sock *sk)
                return max(tp->snd_cwnd >> 1U, 2U);
 }
 
+static u32 tcp_veno_cwnd_undo(struct sock *sk)
+{
+       const struct veno *veno = inet_csk_ca(sk);
+
+       return max(tcp_sk(sk)->snd_cwnd, veno->loss_cwnd);
+}
+
 static struct tcp_congestion_ops tcp_veno __read_mostly = {
        .init           = tcp_veno_init,
        .ssthresh       = tcp_veno_ssthresh,
+       .undo_cwnd      = tcp_veno_cwnd_undo,
        .cong_avoid     = tcp_veno_cong_avoid,
        .pkts_acked     = tcp_veno_pkts_acked,
        .set_state      = tcp_veno_state,
index 4b03a2e2a0504617813838746c13691cf86557f6..fed66dc0e0f5f242cf0af25434fa9cfa89998958 100644 (file)
@@ -278,6 +278,7 @@ static struct tcp_congestion_ops tcp_westwood __read_mostly = {
        .init           = tcp_westwood_init,
        .ssthresh       = tcp_reno_ssthresh,
        .cong_avoid     = tcp_reno_cong_avoid,
+       .undo_cwnd      = tcp_reno_undo_cwnd,
        .cwnd_event     = tcp_westwood_event,
        .in_ack_event   = tcp_westwood_ack,
        .get_info       = tcp_westwood_info,
index 9c5fc973267fe542a41f42c249e564dc8f5a0624..e6ff99c4bd3b6914a6e79dc3bb94aa45176dae9e 100644 (file)
@@ -37,6 +37,7 @@ struct yeah {
        u32 fast_count;
 
        u32 pkts_acked;
+       u32 loss_cwnd;
 };
 
 static void tcp_yeah_init(struct sock *sk)
@@ -219,13 +220,22 @@ static u32 tcp_yeah_ssthresh(struct sock *sk)
 
        yeah->fast_count = 0;
        yeah->reno_count = max(yeah->reno_count>>1, 2U);
+       yeah->loss_cwnd = tp->snd_cwnd;
 
        return max_t(int, tp->snd_cwnd - reduction, 2);
 }
 
+static u32 tcp_yeah_cwnd_undo(struct sock *sk)
+{
+       const struct yeah *yeah = inet_csk_ca(sk);
+
+       return max(tcp_sk(sk)->snd_cwnd, yeah->loss_cwnd);
+}
+
 static struct tcp_congestion_ops tcp_yeah __read_mostly = {
        .init           = tcp_yeah_init,
        .ssthresh       = tcp_yeah_ssthresh,
+       .undo_cwnd      = tcp_yeah_cwnd_undo,
        .cong_avoid     = tcp_yeah_cong_avoid,
        .set_state      = tcp_vegas_state,
        .cwnd_event     = tcp_vegas_cwnd_event,
index c8332715ee2dc439fe4f75e0a65779698d58151f..e1d0bf8eba4b4aabaa8bb1972cd3b2a57c3ceca4 100644 (file)
@@ -580,7 +580,8 @@ EXPORT_SYMBOL_GPL(udp4_lib_lookup_skb);
  * Does increment socket refcount.
  */
 #if IS_ENABLED(CONFIG_NETFILTER_XT_MATCH_SOCKET) || \
-    IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TPROXY)
+    IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TPROXY) || \
+    IS_ENABLED(CONFIG_NF_SOCKET_IPV4)
 struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
                             __be32 daddr, __be16 dport, int dif)
 {
@@ -1019,7 +1020,8 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
                flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos,
                                   RT_SCOPE_UNIVERSE, sk->sk_protocol,
                                   flow_flags,
-                                  faddr, saddr, dport, inet->inet_sport);
+                                  faddr, saddr, dport, inet->inet_sport,
+                                  sk->sk_uid);
 
                security_sk_classify_flow(sk, flowi4_to_flowi(fl4));
                rt = ip_route_output_flow(net, fl4, sk);
@@ -1172,26 +1174,26 @@ out:
        return ret;
 }
 
+/* fully reclaim rmem/fwd memory allocated for skb */
 static void udp_rmem_release(struct sock *sk, int size, int partial)
 {
        int amt;
 
        atomic_sub(size, &sk->sk_rmem_alloc);
-
-       spin_lock_bh(&sk->sk_receive_queue.lock);
        sk->sk_forward_alloc += size;
        amt = (sk->sk_forward_alloc - partial) & ~(SK_MEM_QUANTUM - 1);
        sk->sk_forward_alloc -= amt;
-       spin_unlock_bh(&sk->sk_receive_queue.lock);
 
        if (amt)
                __sk_mem_reduce_allocated(sk, amt >> SK_MEM_QUANTUM_SHIFT);
 }
 
-static void udp_rmem_free(struct sk_buff *skb)
+/* Note: called with sk_receive_queue.lock held */
+void udp_skb_destructor(struct sock *sk, struct sk_buff *skb)
 {
-       udp_rmem_release(skb->sk, skb->truesize, 1);
+       udp_rmem_release(sk, skb->truesize, 1);
 }
+EXPORT_SYMBOL(udp_skb_destructor);
 
 int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
 {
@@ -1228,9 +1230,9 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
 
        sk->sk_forward_alloc -= size;
 
-       /* the skb owner in now the udp socket */
-       skb->sk = sk;
-       skb->destructor = udp_rmem_free;
+       /* no need to setup a destructor, we will explicitly release the
+        * forward allocated memory on dequeue
+        */
        skb->dev = NULL;
        sock_skb_set_dropcount(sk, skb);
 
@@ -1251,13 +1253,21 @@ drop:
 }
 EXPORT_SYMBOL_GPL(__udp_enqueue_schedule_skb);
 
-static void udp_destruct_sock(struct sock *sk)
+void udp_destruct_sock(struct sock *sk)
 {
        /* reclaim completely the forward allocated memory */
-       __skb_queue_purge(&sk->sk_receive_queue);
-       udp_rmem_release(sk, 0, 0);
+       unsigned int total = 0;
+       struct sk_buff *skb;
+
+       while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
+               total += skb->truesize;
+               kfree_skb(skb);
+       }
+       udp_rmem_release(sk, total, 0);
+
        inet_sock_destruct(sk);
 }
+EXPORT_SYMBOL_GPL(udp_destruct_sock);
 
 int udp_init_sock(struct sock *sk)
 {
@@ -1287,12 +1297,11 @@ EXPORT_SYMBOL_GPL(skb_consume_udp);
  */
 static int first_packet_length(struct sock *sk)
 {
-       struct sk_buff_head list_kill, *rcvq = &sk->sk_receive_queue;
+       struct sk_buff_head *rcvq = &sk->sk_receive_queue;
        struct sk_buff *skb;
+       int total = 0;
        int res;
 
-       __skb_queue_head_init(&list_kill);
-
        spin_lock_bh(&rcvq->lock);
        while ((skb = skb_peek(rcvq)) != NULL &&
                udp_lib_checksum_complete(skb)) {
@@ -1302,12 +1311,13 @@ static int first_packet_length(struct sock *sk)
                                IS_UDPLITE(sk));
                atomic_inc(&sk->sk_drops);
                __skb_unlink(skb, rcvq);
-               __skb_queue_tail(&list_kill, skb);
+               total += skb->truesize;
+               kfree_skb(skb);
        }
        res = skb ? skb->len : -1;
+       if (total)
+               udp_rmem_release(sk, total, 1);
        spin_unlock_bh(&rcvq->lock);
-
-       __skb_queue_purge(&list_kill);
        return res;
 }
 
@@ -1362,8 +1372,7 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
 
 try_again:
        peeking = off = sk_peek_offset(sk, flags);
-       skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
-                                 &peeked, &off, &err);
+       skb = __skb_recv_udp(sk, flags, noblock, &peeked, &off, &err);
        if (!skb)
                return err;
 
@@ -1380,7 +1389,8 @@ try_again:
         * coverage checksum (UDP-Lite), do it before the copy.
         */
 
-       if (copied < ulen || UDP_SKB_CB(skb)->partial_cov || peeking) {
+       if (copied < ulen || peeking ||
+           (is_udplite && UDP_SKB_CB(skb)->partial_cov)) {
                checksum_valid = !udp_lib_checksum_complete(skb);
                if (!checksum_valid)
                        goto csum_copy_err;
@@ -1420,7 +1430,7 @@ try_again:
                *addr_len = sizeof(*sin);
        }
        if (inet->cmsg_flags)
-               ip_cmsg_recv_offset(msg, skb, sizeof(struct udphdr) + off);
+               ip_cmsg_recv_offset(msg, sk, skb, sizeof(struct udphdr), off);
 
        err = copied;
        if (flags & MSG_TRUNC)
@@ -1442,7 +1452,7 @@ csum_copy_err:
        goto try_again;
 }
 
-int udp_disconnect(struct sock *sk, int flags)
+int __udp_disconnect(struct sock *sk, int flags)
 {
        struct inet_sock *inet = inet_sk(sk);
        /*
@@ -1464,6 +1474,15 @@ int udp_disconnect(struct sock *sk, int flags)
        sk_dst_reset(sk);
        return 0;
 }
+EXPORT_SYMBOL(__udp_disconnect);
+
+int udp_disconnect(struct sock *sk, int flags)
+{
+       lock_sock(sk);
+       __udp_disconnect(sk, flags);
+       release_sock(sk);
+       return 0;
+}
 EXPORT_SYMBOL(udp_disconnect);
 
 void udp_lib_unhash(struct sock *sk)
@@ -1543,7 +1562,7 @@ static void udp_v4_rehash(struct sock *sk)
        udp_lib_rehash(sk, new_hash);
 }
 
-static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 {
        int rc;
 
@@ -1551,6 +1570,8 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
                sock_rps_save_rxhash(sk, skb);
                sk_mark_napi_id(sk, skb);
                sk_incoming_cpu_update(sk);
+       } else {
+               sk_mark_napi_id_once(sk, skb);
        }
 
        rc = __udp_enqueue_schedule_skb(sk, skb);
@@ -1722,10 +1743,10 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
 
        if (use_hash2) {
                hash2_any = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum) &
-                           udp_table.mask;
-               hash2 = udp4_portaddr_hash(net, daddr, hnum) & udp_table.mask;
+                           udptable->mask;
+               hash2 = udp4_portaddr_hash(net, daddr, hnum) & udptable->mask;
 start_lookup:
-               hslot = &udp_table.hash2[hash2];
+               hslot = &udptable->hash2[hash2];
                offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node);
        }
 
@@ -2272,7 +2293,7 @@ int udp_abort(struct sock *sk, int err)
 
        sk->sk_err = err;
        sk->sk_error_report(sk);
-       udp_disconnect(sk, 0);
+       __udp_disconnect(sk, 0);
 
        release_sock(sk);
 
index 7e0fe4bdd96702256b7608ad3fe45df114573702..feb50a16398dfa856fd928fe823b4f6556d2caa1 100644 (file)
@@ -25,7 +25,7 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
                int flags, int *addr_len);
 int udp_sendpage(struct sock *sk, struct page *page, int offset, size_t size,
                 int flags);
-int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
+int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
 void udp_destroy_sock(struct sock *sk);
 
 #ifdef CONFIG_PROC_FS
index f9333c9636076501fbc8df1806ee057dabddb4a5..b2be1d9757efb8ce8b82dc0a0fe3a475d193ea5b 100644 (file)
@@ -295,7 +295,7 @@ unflush:
 
        skb_gro_pull(skb, sizeof(struct udphdr)); /* pull encapsulating udp header */
        skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr));
-       pp = udp_sk(sk)->gro_receive(sk, head, skb);
+       pp = call_gro_receive_sk(udp_sk(sk)->gro_receive, sk, head, skb);
 
 out_unlock:
        rcu_read_unlock();
index af817158d830c0da080935ba29e012dffbb89112..59f10fe9782e57b9d40facc369f412195639b84b 100644 (file)
@@ -50,10 +50,11 @@ struct proto        udplite_prot = {
        .sendmsg           = udp_sendmsg,
        .recvmsg           = udp_recvmsg,
        .sendpage          = udp_sendpage,
-       .backlog_rcv       = udp_queue_rcv_skb,
        .hash              = udp_lib_hash,
        .unhash            = udp_lib_unhash,
        .get_port          = udp_v4_get_port,
+       .memory_allocated  = &udp_memory_allocated,
+       .sysctl_mem        = sysctl_udp_mem,
        .obj_size          = sizeof(struct udp_sock),
        .h.udp_table       = &udplite_table,
 #ifdef CONFIG_COMPAT
index 2343e4f2e0bfce5c97978a06a601fca14b41610a..ec1267e2bd1f8c9168687513010cec7a7271ba71 100644 (file)
@@ -289,4 +289,39 @@ config IPV6_PIMSM_V2
          Support for IPv6 PIM multicast routing protocol PIM-SMv2.
          If unsure, say N.
 
+config IPV6_SEG6_LWTUNNEL
+       bool "IPv6: Segment Routing Header encapsulation support"
+       depends on IPV6
+       select LWTUNNEL
+       ---help---
+         Support for encapsulation of packets within an outer IPv6
+         header and a Segment Routing Header using the lightweight
+         tunnels mechanism.
+
+         If unsure, say N.
+
+config IPV6_SEG6_INLINE
+       bool "IPv6: direct Segment Routing Header insertion "
+       depends on IPV6_SEG6_LWTUNNEL
+       ---help---
+         Support for direct insertion of the Segment Routing Header,
+         also known as inline mode. Be aware that direct insertion of
+         extension headers (as opposed to encapsulation) may break
+         multiple mechanisms such as PMTUD or IPSec AH. Use this feature
+         only if you know exactly what you are doing.
+
+         If unsure, say N.
+
+config IPV6_SEG6_HMAC
+       bool "IPv6: Segment Routing HMAC support"
+       depends on IPV6
+       select CRYPTO_HMAC
+       select CRYPTO_SHA1
+       select CRYPTO_SHA256
+       ---help---
+         Support for HMAC signature generation and verification
+         of SR-enabled packets.
+
+         If unsure, say N.
+
 endif # IPV6
index c174ccb340a15bca6e712932ae5e7a9c34739efb..a9e9fec387ce828be30af69e49bd31b71604652f 100644 (file)
@@ -9,7 +9,7 @@ ipv6-objs :=    af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \
                route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \
                raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o ping.o \
                exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o \
-               udp_offload.o
+               udp_offload.o seg6.o
 
 ipv6-offload :=        ip6_offload.o tcpv6_offload.o exthdrs_offload.o
 
@@ -23,6 +23,8 @@ ipv6-$(CONFIG_IPV6_MULTIPLE_TABLES) += fib6_rules.o
 ipv6-$(CONFIG_PROC_FS) += proc.o
 ipv6-$(CONFIG_SYN_COOKIES) += syncookies.o
 ipv6-$(CONFIG_NETLABEL) += calipso.o
+ipv6-$(CONFIG_IPV6_SEG6_LWTUNNEL) += seg6_iptunnel.o
+ipv6-$(CONFIG_IPV6_SEG6_HMAC) += seg6_hmac.o
 
 ipv6-objs += $(ipv6-y)
 
index d8983e15f85945343ab85b85b0e1c5cb9916b6ab..4c387dc338e379f4339f8357514fc80b2652db7f 100644 (file)
@@ -147,9 +147,8 @@ static inline void addrconf_sysctl_unregister(struct inet6_dev *idev)
 }
 #endif
 
-static void __ipv6_regen_rndid(struct inet6_dev *idev);
-static void __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr);
-static void ipv6_regen_rndid(unsigned long data);
+static void ipv6_regen_rndid(struct inet6_dev *idev);
+static void ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr);
 
 static int ipv6_generate_eui64(u8 *eui, struct net_device *dev);
 static int ipv6_count_addresses(struct inet6_dev *idev);
@@ -184,7 +183,7 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
 
 static void addrconf_dad_start(struct inet6_ifaddr *ifp);
 static void addrconf_dad_work(struct work_struct *w);
-static void addrconf_dad_completed(struct inet6_ifaddr *ifp);
+static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id);
 static void addrconf_dad_run(struct inet6_dev *idev);
 static void addrconf_rs_timer(unsigned long data);
 static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa);
@@ -239,6 +238,10 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
        .use_oif_addrs_only     = 0,
        .ignore_routes_with_linkdown = 0,
        .keep_addr_on_down      = 0,
+       .seg6_enabled           = 0,
+#ifdef CONFIG_IPV6_SEG6_HMAC
+       .seg6_require_hmac      = 0,
+#endif
 };
 
 static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
@@ -285,6 +288,10 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
        .use_oif_addrs_only     = 0,
        .ignore_routes_with_linkdown = 0,
        .keep_addr_on_down      = 0,
+       .seg6_enabled           = 0,
+#ifdef CONFIG_IPV6_SEG6_HMAC
+       .seg6_require_hmac      = 0,
+#endif
 };
 
 /* Check if a valid qdisc is available */
@@ -409,9 +416,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
                goto err_release;
        }
 
-       /* One reference from device.  We must do this before
-        * we invoke __ipv6_regen_rndid().
-        */
+       /* One reference from device. */
        in6_dev_hold(ndev);
 
        if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
@@ -425,17 +430,15 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
 #endif
 
        INIT_LIST_HEAD(&ndev->tempaddr_list);
-       setup_timer(&ndev->regen_timer, ipv6_regen_rndid, (unsigned long)ndev);
+       ndev->desync_factor = U32_MAX;
        if ((dev->flags&IFF_LOOPBACK) ||
            dev->type == ARPHRD_TUNNEL ||
            dev->type == ARPHRD_TUNNEL6 ||
            dev->type == ARPHRD_SIT ||
            dev->type == ARPHRD_NONE) {
                ndev->cnf.use_tempaddr = -1;
-       } else {
-               in6_dev_hold(ndev);
-               ipv6_regen_rndid((unsigned long) ndev);
-       }
+       } else
+               ipv6_regen_rndid(ndev);
 
        ndev->token = in6addr_any;
 
@@ -447,7 +450,6 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
        err = addrconf_sysctl_register(ndev);
        if (err) {
                ipv6_mc_destroy_dev(ndev);
-               del_timer(&ndev->regen_timer);
                snmp6_unregister_dev(ndev);
                goto err_release;
        }
@@ -1190,6 +1192,8 @@ static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *i
        int ret = 0;
        u32 addr_flags;
        unsigned long now = jiffies;
+       long max_desync_factor;
+       s32 cnf_temp_preferred_lft;
 
        write_lock_bh(&idev->lock);
        if (ift) {
@@ -1222,23 +1226,42 @@ retry:
        }
        in6_ifa_hold(ifp);
        memcpy(addr.s6_addr, ifp->addr.s6_addr, 8);
-       __ipv6_try_regen_rndid(idev, tmpaddr);
+       ipv6_try_regen_rndid(idev, tmpaddr);
        memcpy(&addr.s6_addr[8], idev->rndid, 8);
        age = (now - ifp->tstamp) / HZ;
+
+       regen_advance = idev->cnf.regen_max_retry *
+                       idev->cnf.dad_transmits *
+                       NEIGH_VAR(idev->nd_parms, RETRANS_TIME) / HZ;
+
+       /* recalculate max_desync_factor each time and update
+        * idev->desync_factor if it's larger
+        */
+       cnf_temp_preferred_lft = READ_ONCE(idev->cnf.temp_prefered_lft);
+       max_desync_factor = min_t(__u32,
+                                 idev->cnf.max_desync_factor,
+                                 cnf_temp_preferred_lft - regen_advance);
+
+       if (unlikely(idev->desync_factor > max_desync_factor)) {
+               if (max_desync_factor > 0) {
+                       get_random_bytes(&idev->desync_factor,
+                                        sizeof(idev->desync_factor));
+                       idev->desync_factor %= max_desync_factor;
+               } else {
+                       idev->desync_factor = 0;
+               }
+       }
+
        tmp_valid_lft = min_t(__u32,
                              ifp->valid_lft,
                              idev->cnf.temp_valid_lft + age);
-       tmp_prefered_lft = min_t(__u32,
-                                ifp->prefered_lft,
-                                idev->cnf.temp_prefered_lft + age -
-                                idev->cnf.max_desync_factor);
+       tmp_prefered_lft = cnf_temp_preferred_lft + age -
+                           idev->desync_factor;
+       tmp_prefered_lft = min_t(__u32, ifp->prefered_lft, tmp_prefered_lft);
        tmp_plen = ifp->prefix_len;
        tmp_tstamp = ifp->tstamp;
        spin_unlock_bh(&ifp->lock);
 
-       regen_advance = idev->cnf.regen_max_retry *
-                       idev->cnf.dad_transmits *
-                       NEIGH_VAR(idev->nd_parms, RETRANS_TIME) / HZ;
        write_unlock_bh(&idev->lock);
 
        /* A temporary address is created only if this calculated Preferred
@@ -2150,7 +2173,7 @@ static int ipv6_inherit_eui64(u8 *eui, struct inet6_dev *idev)
 }
 
 /* (re)generation of randomized interface identifier (RFC 3041 3.2, 3.5) */
-static void __ipv6_regen_rndid(struct inet6_dev *idev)
+static void ipv6_regen_rndid(struct inet6_dev *idev)
 {
 regen:
        get_random_bytes(idev->rndid, sizeof(idev->rndid));
@@ -2179,43 +2202,10 @@ regen:
        }
 }
 
-static void ipv6_regen_rndid(unsigned long data)
-{
-       struct inet6_dev *idev = (struct inet6_dev *) data;
-       unsigned long expires;
-
-       rcu_read_lock_bh();
-       write_lock_bh(&idev->lock);
-
-       if (idev->dead)
-               goto out;
-
-       __ipv6_regen_rndid(idev);
-
-       expires = jiffies +
-               idev->cnf.temp_prefered_lft * HZ -
-               idev->cnf.regen_max_retry * idev->cnf.dad_transmits *
-               NEIGH_VAR(idev->nd_parms, RETRANS_TIME) -
-               idev->cnf.max_desync_factor * HZ;
-       if (time_before(expires, jiffies)) {
-               pr_warn("%s: too short regeneration interval; timer disabled for %s\n",
-                       __func__, idev->dev->name);
-               goto out;
-       }
-
-       if (!mod_timer(&idev->regen_timer, expires))
-               in6_dev_hold(idev);
-
-out:
-       write_unlock_bh(&idev->lock);
-       rcu_read_unlock_bh();
-       in6_dev_put(idev);
-}
-
-static void  __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr)
+static void  ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr)
 {
        if (tmpaddr && memcmp(idev->rndid, &tmpaddr->s6_addr[8], 8) == 0)
-               __ipv6_regen_rndid(idev);
+               ipv6_regen_rndid(idev);
 }
 
 /*
@@ -2356,7 +2346,7 @@ static void manage_tempaddrs(struct inet6_dev *idev,
                        max_valid = 0;
 
                max_prefered = idev->cnf.temp_prefered_lft -
-                              idev->cnf.max_desync_factor - age;
+                              idev->desync_factor - age;
                if (max_prefered < 0)
                        max_prefered = 0;
 
@@ -2916,6 +2906,7 @@ static void add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
                spin_lock_bh(&ifp->lock);
                ifp->flags &= ~IFA_F_TENTATIVE;
                spin_unlock_bh(&ifp->lock);
+               rt_genid_bump_ipv6(dev_net(idev->dev));
                ipv6_ifa_notify(RTM_NEWADDR, ifp);
                in6_ifa_put(ifp);
        }
@@ -3018,7 +3009,7 @@ static void init_loopback(struct net_device *dev)
                                 * lo device down, release this obsolete dst and
                                 * reallocate a new router for ifa.
                                 */
-                               if (sp_ifa->rt->dst.obsolete > 0) {
+                               if (!atomic_read(&sp_ifa->rt->rt6i_ref)) {
                                        ip6_rt_put(sp_ifa->rt);
                                        sp_ifa->rt = NULL;
                                } else {
@@ -3594,9 +3585,6 @@ restart:
        if (!how)
                idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD|IF_READY);
 
-       if (how && del_timer(&idev->regen_timer))
-               in6_dev_put(idev);
-
        /* Step 3: clear tempaddr list */
        while (!list_empty(&idev->tempaddr_list)) {
                ifa = list_first_entry(&idev->tempaddr_list,
@@ -3761,7 +3749,7 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
 {
        struct inet6_dev *idev = ifp->idev;
        struct net_device *dev = idev->dev;
-       bool notify = false;
+       bool bump_id, notify = false;
 
        addrconf_join_solict(dev, &ifp->addr);
 
@@ -3776,11 +3764,12 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
            idev->cnf.accept_dad < 1 ||
            !(ifp->flags&IFA_F_TENTATIVE) ||
            ifp->flags & IFA_F_NODAD) {
+               bump_id = ifp->flags & IFA_F_TENTATIVE;
                ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED);
                spin_unlock(&ifp->lock);
                read_unlock_bh(&idev->lock);
 
-               addrconf_dad_completed(ifp);
+               addrconf_dad_completed(ifp, bump_id);
                return;
        }
 
@@ -3840,8 +3829,8 @@ static void addrconf_dad_work(struct work_struct *w)
                                                struct inet6_ifaddr,
                                                dad_work);
        struct inet6_dev *idev = ifp->idev;
+       bool bump_id, disable_ipv6 = false;
        struct in6_addr mcaddr;
-       bool disable_ipv6 = false;
 
        enum {
                DAD_PROCESS,
@@ -3911,11 +3900,12 @@ static void addrconf_dad_work(struct work_struct *w)
                 * DAD was successful
                 */
 
+               bump_id = ifp->flags & IFA_F_TENTATIVE;
                ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED);
                spin_unlock(&ifp->lock);
                write_unlock_bh(&idev->lock);
 
-               addrconf_dad_completed(ifp);
+               addrconf_dad_completed(ifp, bump_id);
 
                goto out;
        }
@@ -3952,7 +3942,7 @@ static bool ipv6_lonely_lladdr(struct inet6_ifaddr *ifp)
        return true;
 }
 
-static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
+static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id)
 {
        struct net_device *dev = ifp->idev->dev;
        struct in6_addr lladdr;
@@ -4004,6 +3994,9 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
                spin_unlock(&ifp->lock);
                write_unlock_bh(&ifp->idev->lock);
        }
+
+       if (bump_id)
+               rt_genid_bump_ipv6(dev_net(dev));
 }
 
 static void addrconf_dad_run(struct inet6_dev *idev)
@@ -4965,6 +4958,10 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
        array[DEVCONF_DROP_UNICAST_IN_L2_MULTICAST] = cnf->drop_unicast_in_l2_multicast;
        array[DEVCONF_DROP_UNSOLICITED_NA] = cnf->drop_unsolicited_na;
        array[DEVCONF_KEEP_ADDR_ON_DOWN] = cnf->keep_addr_on_down;
+       array[DEVCONF_SEG6_ENABLED] = cnf->seg6_enabled;
+#ifdef CONFIG_IPV6_SEG6_HMAC
+       array[DEVCONF_SEG6_REQUIRE_HMAC] = cnf->seg6_require_hmac;
+#endif
 }
 
 static inline size_t inet6_ifla6_size(void)
@@ -6056,6 +6053,22 @@ static const struct ctl_table addrconf_sysctl[] = {
                .proc_handler   = proc_dointvec,
 
        },
+       {
+               .procname       = "seg6_enabled",
+               .data           = &ipv6_devconf.seg6_enabled,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec,
+       },
+#ifdef CONFIG_IPV6_SEG6_HMAC
+       {
+               .procname       = "seg6_require_hmac",
+               .data           = &ipv6_devconf.seg6_require_hmac,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec,
+       },
+#endif
        {
                /* sentinel */
        }
index 46ad699937fdcc6eaaf4b6490e6f3822ad444df8..237e654ba717ea642ef14dc7502834ada55a1854 100644 (file)
@@ -61,6 +61,7 @@
 #include <net/ip6_tunnel.h>
 #endif
 #include <net/calipso.h>
+#include <net/seg6.h>
 
 #include <asm/uaccess.h>
 #include <linux/mroute6.h>
@@ -257,6 +258,14 @@ lookup_protocol:
                        goto out;
                }
        }
+
+       if (!kern) {
+               err = BPF_CGROUP_RUN_PROG_INET_SOCK(sk);
+               if (err) {
+                       sk_common_release(sk);
+                       goto out;
+               }
+       }
 out:
        return err;
 out_rcu_unlock:
@@ -678,6 +687,7 @@ int inet6_sk_rebuild_header(struct sock *sk)
                fl6.flowi6_mark = sk->sk_mark;
                fl6.fl6_dport = inet->inet_dport;
                fl6.fl6_sport = inet->inet_sport;
+               fl6.flowi6_uid = sk->sk_uid;
                security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
 
                rcu_read_lock();
@@ -990,6 +1000,10 @@ static int __init inet6_init(void)
        if (err)
                goto calipso_fail;
 
+       err = seg6_init();
+       if (err)
+               goto seg6_fail;
+
 #ifdef CONFIG_SYSCTL
        err = ipv6_sysctl_register();
        if (err)
@@ -1000,8 +1014,10 @@ out:
 
 #ifdef CONFIG_SYSCTL
 sysctl_fail:
-       calipso_exit();
+       seg6_exit();
 #endif
+seg6_fail:
+       calipso_exit();
 calipso_fail:
        pingv6_exit();
 pingv6_fail:
index 0630a4d5daaabaf9a294c18c398ab569ed74661c..189eb10b742d02fa5b39ac7206703e31e30c3cf7 100644 (file)
@@ -662,9 +662,10 @@ static int ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
                return 0;
 
        if (type == NDISC_REDIRECT)
-               ip6_redirect(skb, net, skb->dev->ifindex, 0);
+               ip6_redirect(skb, net, skb->dev->ifindex, 0,
+                            sock_net_uid(net, NULL));
        else
-               ip6_update_pmtu(skb, net, info, 0, 0);
+               ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL));
        xfrm_state_put(x);
 
        return 0;
index 37874e2f30edf98f31e2a5097761143d507d5b95..c5d76d2edd26bf7b6ca5609669f2e298af409f0a 100644 (file)
@@ -54,6 +54,7 @@ static void ip6_datagram_flow_key_init(struct flowi6 *fl6, struct sock *sk)
        fl6->fl6_dport = inet->inet_dport;
        fl6->fl6_sport = inet->inet_sport;
        fl6->flowlabel = np->flow_label;
+       fl6->flowi6_uid = sk->sk_uid;
 
        if (!fl6->flowi6_oif)
                fl6->flowi6_oif = np->sticky_pktinfo.ipi6_ifindex;
@@ -715,6 +716,11 @@ void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg,
                        put_cmsg(msg, SOL_IPV6, IPV6_ORIGDSTADDR, sizeof(sin6), &sin6);
                }
        }
+       if (np->rxopt.bits.recvfragsize && opt->frag_max_size) {
+               int val = opt->frag_max_size;
+
+               put_cmsg(msg, SOL_IPV6, IPV6_RECVFRAGSIZE, sizeof(val), &val);
+       }
 }
 
 void ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg,
index 060a60b2f8a6db074167e389b56893337c887fe9..218f0cba231cb4fcc7d4beb4d5fecd5ff1d20696 100644 (file)
@@ -474,9 +474,10 @@ static int esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
                return 0;
 
        if (type == NDISC_REDIRECT)
-               ip6_redirect(skb, net, skb->dev->ifindex, 0);
+               ip6_redirect(skb, net, skb->dev->ifindex, 0,
+                            sock_net_uid(net, NULL));
        else
-               ip6_update_pmtu(skb, net, info, 0, 0);
+               ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL));
        xfrm_state_put(x);
 
        return 0;
index 139ceb68bd3705b5bc4cedb0c5a1e39fa59fc6b8..926818c331e524b4d24da321a076b634a7191f2c 100644 (file)
 #if IS_ENABLED(CONFIG_IPV6_MIP6)
 #include <net/xfrm.h>
 #endif
+#include <linux/seg6.h>
+#include <net/seg6.h>
+#ifdef CONFIG_IPV6_SEG6_HMAC
+#include <net/seg6_hmac.h>
+#endif
 
 #include <linux/uaccess.h>
 
@@ -286,6 +291,182 @@ static int ipv6_destopt_rcv(struct sk_buff *skb)
        return -1;
 }
 
+static void seg6_update_csum(struct sk_buff *skb)
+{
+       struct ipv6_sr_hdr *hdr;
+       struct in6_addr *addr;
+       __be32 from, to;
+
+       /* srh is at transport offset and seg_left is already decremented
+        * but daddr is not yet updated with next segment
+        */
+
+       hdr = (struct ipv6_sr_hdr *)skb_transport_header(skb);
+       addr = hdr->segments + hdr->segments_left;
+
+       hdr->segments_left++;
+       from = *(__be32 *)hdr;
+
+       hdr->segments_left--;
+       to = *(__be32 *)hdr;
+
+       /* update skb csum with diff resulting from seg_left decrement */
+
+       update_csum_diff4(skb, from, to);
+
+       /* compute csum diff between current and next segment and update */
+
+       update_csum_diff16(skb, (__be32 *)(&ipv6_hdr(skb)->daddr),
+                          (__be32 *)addr);
+}
+
+static int ipv6_srh_rcv(struct sk_buff *skb)
+{
+       struct inet6_skb_parm *opt = IP6CB(skb);
+       struct net *net = dev_net(skb->dev);
+       struct ipv6_sr_hdr *hdr;
+       struct inet6_dev *idev;
+       struct in6_addr *addr;
+       bool cleanup = false;
+       int accept_seg6;
+
+       hdr = (struct ipv6_sr_hdr *)skb_transport_header(skb);
+
+       idev = __in6_dev_get(skb->dev);
+
+       accept_seg6 = net->ipv6.devconf_all->seg6_enabled;
+       if (accept_seg6 > idev->cnf.seg6_enabled)
+               accept_seg6 = idev->cnf.seg6_enabled;
+
+       if (!accept_seg6) {
+               kfree_skb(skb);
+               return -1;
+       }
+
+#ifdef CONFIG_IPV6_SEG6_HMAC
+       if (!seg6_hmac_validate_skb(skb)) {
+               kfree_skb(skb);
+               return -1;
+       }
+#endif
+
+looped_back:
+       if (hdr->segments_left > 0) {
+               if (hdr->nexthdr != NEXTHDR_IPV6 && hdr->segments_left == 1 &&
+                   sr_has_cleanup(hdr))
+                       cleanup = true;
+       } else {
+               if (hdr->nexthdr == NEXTHDR_IPV6) {
+                       int offset = (hdr->hdrlen + 1) << 3;
+
+                       skb_postpull_rcsum(skb, skb_network_header(skb),
+                                          skb_network_header_len(skb));
+
+                       if (!pskb_pull(skb, offset)) {
+                               kfree_skb(skb);
+                               return -1;
+                       }
+                       skb_postpull_rcsum(skb, skb_transport_header(skb),
+                                          offset);
+
+                       skb_reset_network_header(skb);
+                       skb_reset_transport_header(skb);
+                       skb->encapsulation = 0;
+
+                       __skb_tunnel_rx(skb, skb->dev, net);
+
+                       netif_rx(skb);
+                       return -1;
+               }
+
+               opt->srcrt = skb_network_header_len(skb);
+               opt->lastopt = opt->srcrt;
+               skb->transport_header += (hdr->hdrlen + 1) << 3;
+               opt->nhoff = (&hdr->nexthdr) - skb_network_header(skb);
+
+               return 1;
+       }
+
+       if (hdr->segments_left >= (hdr->hdrlen >> 1)) {
+               __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+                               IPSTATS_MIB_INHDRERRORS);
+               icmpv6_param_prob(skb, ICMPV6_HDR_FIELD,
+                                 ((&hdr->segments_left) -
+                                  skb_network_header(skb)));
+               kfree_skb(skb);
+               return -1;
+       }
+
+       if (skb_cloned(skb)) {
+               if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) {
+                       __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+                                       IPSTATS_MIB_OUTDISCARDS);
+                       kfree_skb(skb);
+                       return -1;
+               }
+       }
+
+       hdr = (struct ipv6_sr_hdr *)skb_transport_header(skb);
+
+       hdr->segments_left--;
+       addr = hdr->segments + hdr->segments_left;
+
+       skb_push(skb, sizeof(struct ipv6hdr));
+
+       if (skb->ip_summed == CHECKSUM_COMPLETE)
+               seg6_update_csum(skb);
+
+       ipv6_hdr(skb)->daddr = *addr;
+
+       if (cleanup) {
+               int srhlen = (hdr->hdrlen + 1) << 3;
+               int nh = hdr->nexthdr;
+
+               skb_pull_rcsum(skb, sizeof(struct ipv6hdr) + srhlen);
+               memmove(skb_network_header(skb) + srhlen,
+                       skb_network_header(skb),
+                       (unsigned char *)hdr - skb_network_header(skb));
+               skb->network_header += srhlen;
+               ipv6_hdr(skb)->nexthdr = nh;
+               ipv6_hdr(skb)->payload_len = htons(skb->len -
+                                                  sizeof(struct ipv6hdr));
+               skb_push_rcsum(skb, sizeof(struct ipv6hdr));
+       }
+
+       skb_dst_drop(skb);
+
+       ip6_route_input(skb);
+
+       if (skb_dst(skb)->error) {
+               dst_input(skb);
+               return -1;
+       }
+
+       if (skb_dst(skb)->dev->flags & IFF_LOOPBACK) {
+               if (ipv6_hdr(skb)->hop_limit <= 1) {
+                       __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+                                       IPSTATS_MIB_INHDRERRORS);
+                       icmpv6_send(skb, ICMPV6_TIME_EXCEED,
+                                   ICMPV6_EXC_HOPLIMIT, 0);
+                       kfree_skb(skb);
+                       return -1;
+               }
+               ipv6_hdr(skb)->hop_limit--;
+
+               /* be sure that srh is still present before reinjecting */
+               if (!cleanup) {
+                       skb_pull(skb, sizeof(struct ipv6hdr));
+                       goto looped_back;
+               }
+               skb_set_transport_header(skb, sizeof(struct ipv6hdr));
+               IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);
+       }
+
+       dst_input(skb);
+
+       return -1;
+}
+
 /********************************
   Routing header.
  ********************************/
@@ -326,6 +507,10 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb)
                return -1;
        }
 
+       /* segment routing */
+       if (hdr->type == IPV6_SRCRT_TYPE_4)
+               return ipv6_srh_rcv(skb);
+
 looped_back:
        if (hdr->segments_left == 0) {
                switch (hdr->type) {
@@ -679,9 +864,9 @@ int ipv6_parse_hopopts(struct sk_buff *skb)
  *     for headers.
  */
 
-static void ipv6_push_rthdr(struct sk_buff *skb, u8 *proto,
-                           struct ipv6_rt_hdr *opt,
-                           struct in6_addr **addr_p)
+static void ipv6_push_rthdr0(struct sk_buff *skb, u8 *proto,
+                            struct ipv6_rt_hdr *opt,
+                            struct in6_addr **addr_p, struct in6_addr *saddr)
 {
        struct rt0_hdr *phdr, *ihdr;
        int hops;
@@ -704,6 +889,62 @@ static void ipv6_push_rthdr(struct sk_buff *skb, u8 *proto,
        *proto = NEXTHDR_ROUTING;
 }
 
+static void ipv6_push_rthdr4(struct sk_buff *skb, u8 *proto,
+                            struct ipv6_rt_hdr *opt,
+                            struct in6_addr **addr_p, struct in6_addr *saddr)
+{
+       struct ipv6_sr_hdr *sr_phdr, *sr_ihdr;
+       int plen, hops;
+
+       sr_ihdr = (struct ipv6_sr_hdr *)opt;
+       plen = (sr_ihdr->hdrlen + 1) << 3;
+
+       sr_phdr = (struct ipv6_sr_hdr *)skb_push(skb, plen);
+       memcpy(sr_phdr, sr_ihdr, sizeof(struct ipv6_sr_hdr));
+
+       hops = sr_ihdr->first_segment + 1;
+       memcpy(sr_phdr->segments + 1, sr_ihdr->segments + 1,
+              (hops - 1) * sizeof(struct in6_addr));
+
+       sr_phdr->segments[0] = **addr_p;
+       *addr_p = &sr_ihdr->segments[hops - 1];
+
+#ifdef CONFIG_IPV6_SEG6_HMAC
+       if (sr_has_hmac(sr_phdr)) {
+               struct net *net = NULL;
+
+               if (skb->dev)
+                       net = dev_net(skb->dev);
+               else if (skb->sk)
+                       net = sock_net(skb->sk);
+
+               WARN_ON(!net);
+
+               if (net)
+                       seg6_push_hmac(net, saddr, sr_phdr);
+       }
+#endif
+
+       sr_phdr->nexthdr = *proto;
+       *proto = NEXTHDR_ROUTING;
+}
+
+static void ipv6_push_rthdr(struct sk_buff *skb, u8 *proto,
+                           struct ipv6_rt_hdr *opt,
+                           struct in6_addr **addr_p, struct in6_addr *saddr)
+{
+       switch (opt->type) {
+       case IPV6_SRCRT_TYPE_0:
+               ipv6_push_rthdr0(skb, proto, opt, addr_p, saddr);
+               break;
+       case IPV6_SRCRT_TYPE_4:
+               ipv6_push_rthdr4(skb, proto, opt, addr_p, saddr);
+               break;
+       default:
+               break;
+       }
+}
+
 static void ipv6_push_exthdr(struct sk_buff *skb, u8 *proto, u8 type, struct ipv6_opt_hdr *opt)
 {
        struct ipv6_opt_hdr *h = (struct ipv6_opt_hdr *)skb_push(skb, ipv6_optlen(opt));
@@ -715,10 +956,10 @@ static void ipv6_push_exthdr(struct sk_buff *skb, u8 *proto, u8 type, struct ipv
 
 void ipv6_push_nfrag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt,
                          u8 *proto,
-                         struct in6_addr **daddr)
+                         struct in6_addr **daddr, struct in6_addr *saddr)
 {
        if (opt->srcrt) {
-               ipv6_push_rthdr(skb, proto, opt->srcrt, daddr);
+               ipv6_push_rthdr(skb, proto, opt->srcrt, daddr, saddr);
                /*
                 * IPV6_RTHDRDSTOPTS is ignored
                 * unless IPV6_RTHDR is set (RFC3542).
@@ -945,7 +1186,22 @@ struct in6_addr *fl6_update_dst(struct flowi6 *fl6,
                return NULL;
 
        *orig = fl6->daddr;
-       fl6->daddr = *((struct rt0_hdr *)opt->srcrt)->addr;
+
+       switch (opt->srcrt->type) {
+       case IPV6_SRCRT_TYPE_0:
+               fl6->daddr = *((struct rt0_hdr *)opt->srcrt)->addr;
+               break;
+       case IPV6_SRCRT_TYPE_4:
+       {
+               struct ipv6_sr_hdr *srh = (struct ipv6_sr_hdr *)opt->srcrt;
+
+               fl6->daddr = srh->segments[srh->first_segment];
+               break;
+       }
+       default:
+               return NULL;
+       }
+
        return orig;
 }
 EXPORT_SYMBOL_GPL(fl6_update_dst);
index bd59c343d35f297ff6c0462cac4cc76c6c9b76ed..eb948ffd734b5fc03a5fd683cff0202fa48d4f0f 100644 (file)
@@ -92,9 +92,10 @@ static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
        struct net *net = dev_net(skb->dev);
 
        if (type == ICMPV6_PKT_TOOBIG)
-               ip6_update_pmtu(skb, net, info, 0, 0);
+               ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL));
        else if (type == NDISC_REDIRECT)
-               ip6_redirect(skb, net, skb->dev->ifindex, 0);
+               ip6_redirect(skb, net, skb->dev->ifindex, 0,
+                            sock_net_uid(net, NULL));
 
        if (!(type & ICMPV6_INFOMSG_MASK))
                if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
@@ -448,7 +449,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
        if (__ipv6_addr_needs_scope_id(addr_type))
                iif = skb->dev->ifindex;
        else
-               iif = l3mdev_master_ifindex(skb->dev);
+               iif = l3mdev_master_ifindex(skb_dst(skb)->dev);
 
        /*
         *      Must not send error if the source does not uniquely
@@ -484,6 +485,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
        fl6.flowi6_oif = iif;
        fl6.fl6_icmp_type = type;
        fl6.fl6_icmp_code = code;
+       fl6.flowi6_uid = sock_net_uid(net, NULL);
        security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
 
        sk = icmpv6_xmit_lock(net);
@@ -658,6 +660,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
        fl6.flowi6_oif = skb->dev->ifindex;
        fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
        fl6.flowi6_mark = mark;
+       fl6.flowi6_uid = sock_net_uid(net, NULL);
        security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
 
        sk = icmpv6_xmit_lock(net);
index e604013dd8147a36ac247ef44f0c37c6519cbcc4..af8f52ee7180868294930629f4f312c012a7a35c 100644 (file)
@@ -118,15 +118,7 @@ static const struct rhashtable_params rht_params = {
        .obj_cmpfn = ila_cmpfn,
 };
 
-static struct genl_family ila_nl_family = {
-       .id             = GENL_ID_GENERATE,
-       .hdrsize        = 0,
-       .name           = ILA_GENL_NAME,
-       .version        = ILA_GENL_VERSION,
-       .maxattr        = ILA_ATTR_MAX,
-       .netnsok        = true,
-       .parallel_ops   = true,
-};
+static struct genl_family ila_nl_family;
 
 static const struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = {
        [ILA_ATTR_LOCATOR] = { .type = NLA_U64, },
@@ -482,7 +474,15 @@ static int ila_nl_dump_start(struct netlink_callback *cb)
 {
        struct net *net = sock_net(cb->skb->sk);
        struct ila_net *ilan = net_generic(net, ila_net_id);
-       struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args;
+       struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args[0];
+
+       if (!iter) {
+               iter = kmalloc(sizeof(*iter), GFP_KERNEL);
+               if (!iter)
+                       return -ENOMEM;
+
+               cb->args[0] = (long)iter;
+       }
 
        return rhashtable_walk_init(&ilan->rhash_table, &iter->rhiter,
                                    GFP_KERNEL);
@@ -490,16 +490,18 @@ static int ila_nl_dump_start(struct netlink_callback *cb)
 
 static int ila_nl_dump_done(struct netlink_callback *cb)
 {
-       struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args;
+       struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args[0];
 
        rhashtable_walk_exit(&iter->rhiter);
 
+       kfree(iter);
+
        return 0;
 }
 
 static int ila_nl_dump(struct sk_buff *skb, struct netlink_callback *cb)
 {
-       struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args;
+       struct ila_dump_iter *iter = (struct ila_dump_iter *)cb->args[0];
        struct rhashtable_iter *rhiter = &iter->rhiter;
        struct ila_map *ila;
        int ret;
@@ -561,6 +563,18 @@ static const struct genl_ops ila_nl_ops[] = {
        },
 };
 
+static struct genl_family ila_nl_family __ro_after_init = {
+       .hdrsize        = 0,
+       .name           = ILA_GENL_NAME,
+       .version        = ILA_GENL_VERSION,
+       .maxattr        = ILA_ATTR_MAX,
+       .netnsok        = true,
+       .parallel_ops   = true,
+       .module         = THIS_MODULE,
+       .ops            = ila_nl_ops,
+       .n_ops          = ARRAY_SIZE(ila_nl_ops),
+};
+
 #define ILA_HASH_TABLE_SIZE 1024
 
 static __net_init int ila_init_net(struct net *net)
@@ -623,7 +637,7 @@ static int ila_xlat_addr(struct sk_buff *skb, bool set_csum_neutral)
        return 0;
 }
 
-int ila_xlat_init(void)
+int __init ila_xlat_init(void)
 {
        int ret;
 
@@ -631,8 +645,7 @@ int ila_xlat_init(void)
        if (ret)
                goto exit;
 
-       ret = genl_register_family_with_ops(&ila_nl_family,
-                                           ila_nl_ops);
+       ret = genl_register_family(&ila_nl_family);
        if (ret < 0)
                goto unregister;
 
index 532c3ef282c5f09cb028e9dbc5dae66e0676edfe..1c86c478f578b49373e61a4c397f23f3dc7f3fc6 100644 (file)
@@ -88,6 +88,7 @@ struct dst_entry *inet6_csk_route_req(const struct sock *sk,
        fl6->flowi6_mark = ireq->ir_mark;
        fl6->fl6_dport = ireq->ir_rmt_port;
        fl6->fl6_sport = htons(ireq->ir_num);
+       fl6->flowi6_uid = sk->sk_uid;
        security_req_classify_flow(req, flowi6_to_flowi(fl6));
 
        dst = ip6_dst_lookup_flow(sk, fl6, final_p);
@@ -136,6 +137,7 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk,
        fl6->flowi6_mark = sk->sk_mark;
        fl6->fl6_sport = inet->inet_sport;
        fl6->fl6_dport = inet->inet_dport;
+       fl6->flowi6_uid = sk->sk_uid;
        security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
 
        rcu_read_lock();
index 00cf28ad45650c801c90c37fb571acb7d1615183..02761c9fe43eb306fa1887e577130e5abd2aa2b8 100644 (file)
@@ -96,7 +96,7 @@ EXPORT_SYMBOL(__inet6_lookup_established);
 static inline int compute_score(struct sock *sk, struct net *net,
                                const unsigned short hnum,
                                const struct in6_addr *daddr,
-                               const int dif)
+                               const int dif, bool exact_dif)
 {
        int score = -1;
 
@@ -109,7 +109,7 @@ static inline int compute_score(struct sock *sk, struct net *net,
                                return -1;
                        score++;
                }
-               if (sk->sk_bound_dev_if) {
+               if (sk->sk_bound_dev_if || exact_dif) {
                        if (sk->sk_bound_dev_if != dif)
                                return -1;
                        score++;
@@ -131,11 +131,12 @@ struct sock *inet6_lookup_listener(struct net *net,
        unsigned int hash = inet_lhashfn(net, hnum);
        struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
        int score, hiscore = 0, matches = 0, reuseport = 0;
+       bool exact_dif = inet6_exact_dif_match(net, skb);
        struct sock *sk, *result = NULL;
        u32 phash = 0;
 
        sk_for_each(sk, &ilb->head) {
-               score = compute_score(sk, net, hnum, daddr, dif);
+               score = compute_score(sk, net, hnum, daddr, dif, exact_dif);
                if (score > hiscore) {
                        reuseport = sk->sk_reuseport;
                        if (reuseport) {
@@ -263,13 +264,15 @@ EXPORT_SYMBOL_GPL(inet6_hash_connect);
 
 int inet6_hash(struct sock *sk)
 {
+       int err = 0;
+
        if (sk->sk_state != TCP_CLOSE) {
                local_bh_disable();
-               __inet_hash(sk, NULL, ipv6_rcv_saddr_equal);
+               err = __inet_hash(sk, NULL, ipv6_rcv_saddr_equal);
                local_bh_enable();
        }
 
-       return 0;
+       return err;
 }
 EXPORT_SYMBOL_GPL(inet6_hash);
 
index d7d6d3ae0b3b62423c4c41317ad22fbe491eac75..75b6108234dd05a54af0ae51c7c11eaf1ca26d75 100644 (file)
@@ -64,7 +64,7 @@ MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
 #define IP6_GRE_HASH_SIZE_SHIFT  5
 #define IP6_GRE_HASH_SIZE (1 << IP6_GRE_HASH_SIZE_SHIFT)
 
-static int ip6gre_net_id __read_mostly;
+static unsigned int ip6gre_net_id __read_mostly;
 struct ip6gre_net {
        struct ip6_tnl __rcu *tunnels[4][IP6_GRE_HASH_SIZE];
 
@@ -548,6 +548,8 @@ static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev)
        if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
                fl6.flowi6_mark = skb->mark;
 
+       fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
+
        err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM));
        if (err)
                return -1;
@@ -602,6 +604,8 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev)
        if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
                fl6.flowi6_mark = skb->mark;
 
+       fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
+
        if (gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM)))
                return -1;
 
index e7bfd55899a34ab8a314ecdfb9a98d8a55d0af15..1fcf61f1cbc33f09919624c78ab918b6258d147f 100644 (file)
@@ -246,7 +246,7 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head,
 
        skb_gro_postpull_rcsum(skb, iph, nlen);
 
-       pp = ops->callbacks.gro_receive(head, skb);
+       pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
 
 out_unlock:
        rcu_read_unlock();
index 6001e781164eb6d49cf604e7d8067a42ebc7dc3d..70d0de4041972ceaeb4656fa4bdef884a5403b10 100644 (file)
@@ -39,6 +39,7 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 
+#include <linux/bpf-cgroup.h>
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv6.h>
 
@@ -131,6 +132,14 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
 
 static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
+       int ret;
+
+       ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
+       if (ret) {
+               kfree_skb(skb);
+               return ret;
+       }
+
        if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
            dst_allfrag(skb_dst(skb)) ||
            (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
@@ -203,7 +212,8 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
                if (opt->opt_flen)
                        ipv6_push_frag_opts(skb, opt, &proto);
                if (opt->opt_nflen)
-                       ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
+                       ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
+                                            &fl6->saddr);
        }
 
        skb_push(skb, sizeof(struct ipv6hdr));
@@ -624,7 +634,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
 
        hroom = LL_RESERVED_SPACE(rt->dst.dev);
        if (skb_has_frag_list(skb)) {
-               int first_len = skb_pagelen(skb);
+               unsigned int first_len = skb_pagelen(skb);
                struct sk_buff *frag2;
 
                if (first_len - hlen > mtu ||
@@ -1366,7 +1376,7 @@ emsgsize:
        if (((length > mtu) ||
             (skb && skb_is_gso(skb))) &&
            (sk->sk_protocol == IPPROTO_UDP) &&
-           (rt->dst.dev->features & NETIF_F_UFO) &&
+           (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len &&
            (sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk)) {
                err = ip6_ufo_append_data(sk, queue, getfrag, from, length,
                                          hh_len, fragheaderlen, exthdrlen,
@@ -1672,7 +1682,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
        if (opt && opt->opt_flen)
                ipv6_push_frag_opts(skb, opt, &proto);
        if (opt && opt->opt_nflen)
-               ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
+               ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr);
 
        skb_push(skb, sizeof(struct ipv6hdr));
        skb_reset_network_header(skb);
index 3a70567846aaab8be248493b92ec1e8da3655eca..1f49fb1101a1e86557dd8fe841c93df2d708e707 100644 (file)
@@ -83,7 +83,7 @@ static int ip6_tnl_dev_init(struct net_device *dev);
 static void ip6_tnl_dev_setup(struct net_device *dev);
 static struct rtnl_link_ops ip6_link_ops __read_mostly;
 
-static int ip6_tnl_net_id __read_mostly;
+static unsigned int ip6_tnl_net_id __read_mostly;
 struct ip6_tnl_net {
        /* the IPv6 tunnel fallback device */
        struct net_device *fb_tnl_dev;
@@ -157,6 +157,7 @@ ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_
        hash = HASH(&any, local);
        for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
                if (ipv6_addr_equal(local, &t->parms.laddr) &&
+                   ipv6_addr_any(&t->parms.raddr) &&
                    (t->dev->flags & IFF_UP))
                        return t;
        }
@@ -164,6 +165,7 @@ ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_
        hash = HASH(remote, &any);
        for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
                if (ipv6_addr_equal(remote, &t->parms.raddr) &&
+                   ipv6_addr_any(&t->parms.laddr) &&
                    (t->dev->flags & IFF_UP))
                        return t;
        }
@@ -1032,6 +1034,7 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
        int mtu;
        unsigned int psh_hlen = sizeof(struct ipv6hdr) + t->encap_hlen;
        unsigned int max_headroom = psh_hlen;
+       bool use_cache = false;
        u8 hop_limit;
        int err = -1;
 
@@ -1064,7 +1067,15 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
 
                memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr));
                neigh_release(neigh);
-       } else if (!fl6->flowi6_mark)
+       } else if (!(t->parms.flags &
+                    (IP6_TNL_F_USE_ORIG_TCLASS | IP6_TNL_F_USE_ORIG_FWMARK))) {
+               /* enable the cache only only if the routing decision does
+                * not depend on the current inner header value
+                */
+               use_cache = true;
+       }
+
+       if (use_cache)
                dst = dst_cache_get(&t->dst_cache);
 
        if (!ip6_tnl_xmit_ctl(t, &fl6->saddr, &fl6->daddr))
@@ -1148,14 +1159,14 @@ route_lookup:
                if (t->encap.type != TUNNEL_ENCAP_NONE)
                        goto tx_err_dst_release;
        } else {
-               if (!fl6->flowi6_mark && ndst)
+               if (use_cache && ndst)
                        dst_cache_set_ip6(&t->dst_cache, ndst, &fl6->saddr);
        }
        skb_dst_set(skb, dst);
 
        if (encap_limit >= 0) {
                init_tel_txopt(&opt, encap_limit);
-               ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL);
+               ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL, NULL);
        }
 
        /* Calculate max headroom for all the headers and adjust
@@ -1170,6 +1181,7 @@ route_lookup:
        if (err)
                return err;
 
+       skb->protocol = htons(ETH_P_IPV6);
        skb_push(skb, sizeof(struct ipv6hdr));
        skb_reset_network_header(skb);
        ipv6h = ipv6_hdr(skb);
@@ -1237,6 +1249,8 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
                        fl6.flowi6_mark = skb->mark;
        }
 
+       fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
+
        if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6))
                return -1;
 
@@ -1315,6 +1329,8 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
                        fl6.flowi6_mark = skb->mark;
        }
 
+       fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
+
        if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6))
                return -1;
 
index a7520528ecd27fae3d8ccee03c76345f0776234a..b283f293ee4ae7537da0bde51b5a4695a2e6f249 100644 (file)
@@ -88,9 +88,6 @@ int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk,
 
        uh->len = htons(skb->len);
 
-       memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
-       IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED
-                           | IPSKB_REROUTED);
        skb_dst_set(skb, dst);
 
        udp6_set_csum(nocheck, skb, saddr, daddr, skb->len);
index 35c5b2d8c401a99f1344c50ddd3956c4e6f6a53e..c476bb8e9cdb3b783f2cfe75735fcd578f7f98ab 100644 (file)
@@ -64,7 +64,7 @@ static int vti6_dev_init(struct net_device *dev);
 static void vti6_dev_setup(struct net_device *dev);
 static struct rtnl_link_ops vti6_link_ops __read_mostly;
 
-static int vti6_net_id __read_mostly;
+static unsigned int vti6_net_id __read_mostly;
 struct vti6_net {
        /* the vti6 tunnel fallback device */
        struct net_device *fb_tnl_dev;
@@ -608,9 +608,10 @@ static int vti6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
                return 0;
 
        if (type == NDISC_REDIRECT)
-               ip6_redirect(skb, net, skb->dev->ifindex, 0);
+               ip6_redirect(skb, net, skb->dev->ifindex, 0,
+                            sock_net_uid(net, NULL));
        else
-               ip6_update_pmtu(skb, net, info, 0, 0);
+               ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL));
        xfrm_state_put(x);
 
        return 0;
index 7f4265b1649b60eff007b61c0a59bdb244de03d3..52101b37ad6e48c65dae320f44c1ad6ce0ac098c 100644 (file)
@@ -636,7 +636,7 @@ static int pim6_rcv(struct sk_buff *skb)
                goto drop;
 
        pim = (struct pimreghdr *)skb_transport_header(skb);
-       if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
+       if (pim->type != ((PIM_VERSION << 4) | PIM_TYPE_REGISTER) ||
            (pim->flags & PIM_NULL_REGISTER) ||
            (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
                             sizeof(*pim), IPPROTO_PIM,
index 1b9316e1386a96c899c67888fba4618d3004e69a..54d165b9845a02c24f735477f48abdb0c944bae2 100644 (file)
@@ -74,9 +74,10 @@ static int ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
                return 0;
 
        if (type == NDISC_REDIRECT)
-               ip6_redirect(skb, net, skb->dev->ifindex, 0);
+               ip6_redirect(skb, net, skb->dev->ifindex, 0,
+                            sock_net_uid(net, NULL));
        else
-               ip6_update_pmtu(skb, net, info, 0, 0);
+               ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL));
        xfrm_state_put(x);
 
        return 0;
index 5330262ab673c022fbf700d22782a74ccd1494fe..3ba5303735605077fedc3515163286234b51f141 100644 (file)
@@ -52,6 +52,7 @@
 #include <net/udplite.h>
 #include <net/xfrm.h>
 #include <net/compat.h>
+#include <net/seg6.h>
 
 #include <asm/uaccess.h>
 
@@ -120,6 +121,7 @@ struct ipv6_txoptions *ipv6_update_options(struct sock *sk,
 static bool setsockopt_needs_rtnl(int optname)
 {
        switch (optname) {
+       case IPV6_ADDRFORM:
        case IPV6_ADD_MEMBERSHIP:
        case IPV6_DROP_MEMBERSHIP:
        case IPV6_JOIN_ANYCAST:
@@ -198,7 +200,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
                        }
 
                        fl6_free_socklist(sk);
-                       ipv6_sock_mc_close(sk);
+                       __ipv6_sock_mc_close(sk);
 
                        /*
                         * Sock is moving from IPv6 to IPv4 (sk_prot), so
@@ -429,6 +431,15 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
 
                                break;
 #endif
+                       case IPV6_SRCRT_TYPE_4:
+                       {
+                               struct ipv6_sr_hdr *srh = (struct ipv6_sr_hdr *)
+                                                         opt->srcrt;
+
+                               if (!seg6_validate_srh(srh, optlen))
+                                       goto sticky_done;
+                               break;
+                       }
                        default:
                                goto sticky_done;
                        }
@@ -867,6 +878,10 @@ pref_skip_coa:
                np->autoflowlabel = valbool;
                retv = 0;
                break;
+       case IPV6_RECVFRAGSIZE:
+               np->rxopt.bits.recvfragsize = valbool;
+               retv = 0;
+               break;
        }
 
        release_sock(sk);
@@ -1309,6 +1324,10 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
                val = np->autoflowlabel;
                break;
 
+       case IPV6_RECVFRAGSIZE:
+               val = np->rxopt.bits.recvfragsize;
+               break;
+
        default:
                return -ENOPROTOOPT;
        }
index 75c1fc54f188939c4ed78a7323e8907c5e3b9be9..14a3903f1c82d83d44c39befdfe827833d09b13c 100644 (file)
@@ -276,16 +276,14 @@ static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net,
        return idev;
 }
 
-void ipv6_sock_mc_close(struct sock *sk)
+void __ipv6_sock_mc_close(struct sock *sk)
 {
        struct ipv6_pinfo *np = inet6_sk(sk);
        struct ipv6_mc_socklist *mc_lst;
        struct net *net = sock_net(sk);
 
-       if (!rcu_access_pointer(np->ipv6_mc_list))
-               return;
+       ASSERT_RTNL();
 
-       rtnl_lock();
        while ((mc_lst = rtnl_dereference(np->ipv6_mc_list)) != NULL) {
                struct net_device *dev;
 
@@ -303,8 +301,17 @@ void ipv6_sock_mc_close(struct sock *sk)
 
                atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc);
                kfree_rcu(mc_lst, rcu);
-
        }
+}
+
+void ipv6_sock_mc_close(struct sock *sk)
+{
+       struct ipv6_pinfo *np = inet6_sk(sk);
+
+       if (!rcu_access_pointer(np->ipv6_mc_list))
+               return;
+       rtnl_lock();
+       __ipv6_sock_mc_close(sk);
        rtnl_unlock();
 }
 
index d11c46833d615b394797e193008f1cc8e4592935..39970e212ad574ae36406b5354e0550698249387 100644 (file)
@@ -26,6 +26,7 @@ int ip6_route_me_harder(struct net *net, struct sk_buff *skb)
        struct flowi6 fl6 = {
                .flowi6_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0,
                .flowi6_mark = skb->mark,
+               .flowi6_uid = sock_net_uid(net, skb->sk),
                .daddr = iph->daddr,
                .saddr = iph->saddr,
        };
index e10a04c9cdc7be6bd8fc18f960e7bf51f43cbd81..6acb2eecd986cbf64deb0fcdb47c3931cf18a4ed 100644 (file)
@@ -25,6 +25,12 @@ config NF_CONNTRACK_IPV6
 
          To compile it as a module, choose M here.  If unsure, say N.
 
+config NF_SOCKET_IPV6
+       tristate "IPv6 socket lookup support"
+       help
+         This option enables the IPv6 socket lookup infrastructure. This
+         is used by the ip6tables socket match.
+
 if NF_TABLES
 
 config NF_TABLES_IPV6
@@ -54,6 +60,14 @@ config NFT_DUP_IPV6
        help
          This module enables IPv6 packet duplication support for nf_tables.
 
+config NFT_FIB_IPV6
+       tristate "nf_tables fib / ipv6 route lookup support"
+       select NFT_FIB
+       help
+         This module enables IPv6 FIB lookups, e.g. for reverse path filtering.
+         It also allows query of the FIB for the route type, e.g. local, unicast,
+         multicast or blackhole.
+
 endif # NF_TABLES_IPV6
 endif # NF_TABLES
 
index b4f7d0b4e2afc630f7a5be2ae949dff676dc5985..fe180c96040e28b0e78862ca911678846ea570a7 100644 (file)
@@ -24,6 +24,8 @@ obj-$(CONFIG_NF_NAT_MASQUERADE_IPV6) += nf_nat_masquerade_ipv6.o
 nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o
 obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o
 
+obj-$(CONFIG_NF_SOCKET_IPV6) += nf_socket_ipv6.o
+
 # logging
 obj-$(CONFIG_NF_LOG_IPV6) += nf_log_ipv6.o
 
@@ -40,6 +42,7 @@ obj-$(CONFIG_NFT_REJECT_IPV6) += nft_reject_ipv6.o
 obj-$(CONFIG_NFT_MASQ_IPV6) += nft_masq_ipv6.o
 obj-$(CONFIG_NFT_REDIR_IPV6) += nft_redir_ipv6.o
 obj-$(CONFIG_NFT_DUP_IPV6) += nft_dup_ipv6.o
+obj-$(CONFIG_NFT_FIB_IPV6) += nft_fib_ipv6.o
 
 # matches
 obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o
index 55aacea24396dc17d429896c5ff77e5e8162a52e..6ff42b8301ccca2ca237679d6fab41e80fe0cf49 100644 (file)
@@ -291,11 +291,7 @@ ip6t_do_table(struct sk_buff *skb,
         * rule is also a fragment-specific rule, non-fragments won't
         * match it. */
        acpar.hotdrop = false;
-       acpar.net     = state->net;
-       acpar.in      = state->in;
-       acpar.out     = state->out;
-       acpar.family  = NFPROTO_IPV6;
-       acpar.hooknum = hook;
+       acpar.state   = state;
 
        IP_NF_ASSERT(table->valid_hooks & (1 << hook));
 
@@ -1007,7 +1003,7 @@ static int get_info(struct net *net, void __user *user,
 #endif
        t = try_then_request_module(xt_find_table_lock(net, AF_INET6, name),
                                    "ip6table_%s", name);
-       if (!IS_ERR_OR_NULL(t)) {
+       if (t) {
                struct ip6t_getinfo info;
                const struct xt_table_info *private = t->private;
 #ifdef CONFIG_COMPAT
@@ -1037,7 +1033,7 @@ static int get_info(struct net *net, void __user *user,
                xt_table_unlock(t);
                module_put(t->me);
        } else
-               ret = t ? PTR_ERR(t) : -ENOENT;
+               ret = -ENOENT;
 #ifdef CONFIG_COMPAT
        if (compat)
                xt_compat_unlock(AF_INET6);
@@ -1063,7 +1059,7 @@ get_entries(struct net *net, struct ip6t_get_entries __user *uptr,
        get.name[sizeof(get.name) - 1] = '\0';
 
        t = xt_find_table_lock(net, AF_INET6, get.name);
-       if (!IS_ERR_OR_NULL(t)) {
+       if (t) {
                struct xt_table_info *private = t->private;
                if (get.size == private->size)
                        ret = copy_entries_to_user(private->size,
@@ -1074,7 +1070,7 @@ get_entries(struct net *net, struct ip6t_get_entries __user *uptr,
                module_put(t->me);
                xt_table_unlock(t);
        } else
-               ret = t ? PTR_ERR(t) : -ENOENT;
+               ret = -ENOENT;
 
        return ret;
 }
@@ -1099,8 +1095,8 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
 
        t = try_then_request_module(xt_find_table_lock(net, AF_INET6, name),
                                    "ip6table_%s", name);
-       if (IS_ERR_OR_NULL(t)) {
-               ret = t ? PTR_ERR(t) : -ENOENT;
+       if (!t) {
+               ret = -ENOENT;
                goto free_newinfo_counters_untrans;
        }
 
@@ -1214,8 +1210,8 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len,
        if (IS_ERR(paddc))
                return PTR_ERR(paddc);
        t = xt_find_table_lock(net, AF_INET6, tmp.name);
-       if (IS_ERR_OR_NULL(t)) {
-               ret = t ? PTR_ERR(t) : -ENOENT;
+       if (!t) {
+               ret = -ENOENT;
                goto free;
        }
 
@@ -1651,7 +1647,7 @@ compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr,
 
        xt_compat_lock(AF_INET6);
        t = xt_find_table_lock(net, AF_INET6, get.name);
-       if (!IS_ERR_OR_NULL(t)) {
+       if (t) {
                const struct xt_table_info *private = t->private;
                struct xt_table_info info;
                ret = compat_table_info(private, &info);
@@ -1665,7 +1661,7 @@ compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr,
                module_put(t->me);
                xt_table_unlock(t);
        } else
-               ret = t ? PTR_ERR(t) : -ENOENT;
+               ret = -ENOENT;
 
        xt_compat_unlock(AF_INET6);
        return ret;
index 7f9f45d829d2e7e2a742187f22ebde6c12df1a40..2b1a15846f9ac1f40d45aef52af6aab92d515408 100644 (file)
@@ -24,7 +24,7 @@
 static unsigned int
 masquerade_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 {
-       return nf_nat_masquerade_ipv6(skb, par->targinfo, par->out);
+       return nf_nat_masquerade_ipv6(skb, par->targinfo, xt_out(par));
 }
 
 static int masquerade_tg6_checkentry(const struct xt_tgchk_param *par)
index db29bbf41b5977d3228ea58ce3032725c175f989..fa51a205918dbb06731f7300d34921b3c4096737 100644 (file)
@@ -39,35 +39,40 @@ static unsigned int
 reject_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 {
        const struct ip6t_reject_info *reject = par->targinfo;
-       struct net *net = par->net;
+       struct net *net = xt_net(par);
 
        switch (reject->with) {
        case IP6T_ICMP6_NO_ROUTE:
-               nf_send_unreach6(net, skb, ICMPV6_NOROUTE, par->hooknum);
+               nf_send_unreach6(net, skb, ICMPV6_NOROUTE, xt_hooknum(par));
                break;
        case IP6T_ICMP6_ADM_PROHIBITED:
-               nf_send_unreach6(net, skb, ICMPV6_ADM_PROHIBITED, par->hooknum);
+               nf_send_unreach6(net, skb, ICMPV6_ADM_PROHIBITED,
+                                xt_hooknum(par));
                break;
        case IP6T_ICMP6_NOT_NEIGHBOUR:
-               nf_send_unreach6(net, skb, ICMPV6_NOT_NEIGHBOUR, par->hooknum);
+               nf_send_unreach6(net, skb, ICMPV6_NOT_NEIGHBOUR,
+                                xt_hooknum(par));
                break;
        case IP6T_ICMP6_ADDR_UNREACH:
-               nf_send_unreach6(net, skb, ICMPV6_ADDR_UNREACH, par->hooknum);
+               nf_send_unreach6(net, skb, ICMPV6_ADDR_UNREACH,
+                                xt_hooknum(par));
                break;
        case IP6T_ICMP6_PORT_UNREACH:
-               nf_send_unreach6(net, skb, ICMPV6_PORT_UNREACH, par->hooknum);
+               nf_send_unreach6(net, skb, ICMPV6_PORT_UNREACH,
+                                xt_hooknum(par));
                break;
        case IP6T_ICMP6_ECHOREPLY:
                /* Do nothing */
                break;
        case IP6T_TCP_RESET:
-               nf_send_reset6(net, skb, par->hooknum);
+               nf_send_reset6(net, skb, xt_hooknum(par));
                break;
        case IP6T_ICMP6_POLICY_FAIL:
-               nf_send_unreach6(net, skb, ICMPV6_POLICY_FAIL, par->hooknum);
+               nf_send_unreach6(net, skb, ICMPV6_POLICY_FAIL, xt_hooknum(par));
                break;
        case IP6T_ICMP6_REJECT_ROUTE:
-               nf_send_unreach6(net, skb, ICMPV6_REJECT_ROUTE, par->hooknum);
+               nf_send_unreach6(net, skb, ICMPV6_REJECT_ROUTE,
+                                xt_hooknum(par));
                break;
        }
 
index 06bed74cf5ee0667a86e284289ed23cafeb015cb..99a1216287c8a00fd198edb0693117727891edec 100644 (file)
@@ -277,12 +277,12 @@ static unsigned int
 synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 {
        const struct xt_synproxy_info *info = par->targinfo;
-       struct net *net = par->net;
+       struct net *net = xt_net(par);
        struct synproxy_net *snet = synproxy_pernet(net);
        struct synproxy_options opts = {};
        struct tcphdr *th, _th;
 
-       if (nf_ip6_checksum(skb, par->hooknum, par->thoff, IPPROTO_TCP))
+       if (nf_ip6_checksum(skb, xt_hooknum(par), par->thoff, IPPROTO_TCP))
                return NF_DROP;
 
        th = skb_header_pointer(skb, par->thoff, sizeof(_th), &_th);
index 1ee1b25df09679c32e0bd21526024cea9f7d26d1..d5263dc364a97a2a5530e69defd46df225b004b1 100644 (file)
@@ -93,7 +93,8 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
        if (unlikely(saddrtype == IPV6_ADDR_ANY))
                return true ^ invert; /* not routable: forward path will drop it */
 
-       return rpfilter_lookup_reverse6(par->net, skb, par->in, info->flags) ^ invert;
+       return rpfilter_lookup_reverse6(xt_net(par), skb, xt_in(par),
+                                       info->flags) ^ invert;
 }
 
 static int rpfilter_check(const struct xt_mtchk_param *par)
index 963ee38486755487f3f4562a01091674de0fb279..500be28ff5635c18f0fd3d33209f92fa953cb244 100644 (file)
@@ -336,47 +336,35 @@ static struct nf_sockopt_ops so_getorigdst6 = {
        .owner          = THIS_MODULE,
 };
 
+static struct nf_conntrack_l4proto *builtin_l4proto6[] = {
+       &nf_conntrack_l4proto_tcp6,
+       &nf_conntrack_l4proto_udp6,
+       &nf_conntrack_l4proto_icmpv6,
+};
+
 static int ipv6_net_init(struct net *net)
 {
        int ret = 0;
 
-       ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_tcp6);
-       if (ret < 0) {
-               pr_err("nf_conntrack_tcp6: pernet registration failed\n");
-               goto out;
-       }
-       ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_udp6);
-       if (ret < 0) {
-               pr_err("nf_conntrack_udp6: pernet registration failed\n");
-               goto cleanup_tcp6;
-       }
-       ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_icmpv6);
-       if (ret < 0) {
-               pr_err("nf_conntrack_icmp6: pernet registration failed\n");
-               goto cleanup_udp6;
-       }
+       ret = nf_ct_l4proto_pernet_register(net, builtin_l4proto6,
+                                           ARRAY_SIZE(builtin_l4proto6));
+       if (ret < 0)
+               return ret;
+
        ret = nf_ct_l3proto_pernet_register(net, &nf_conntrack_l3proto_ipv6);
        if (ret < 0) {
                pr_err("nf_conntrack_ipv6: pernet registration failed.\n");
-               goto cleanup_icmpv6;
+               nf_ct_l4proto_pernet_unregister(net, builtin_l4proto6,
+                                               ARRAY_SIZE(builtin_l4proto6));
        }
-       return 0;
- cleanup_icmpv6:
-       nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_icmpv6);
- cleanup_udp6:
-       nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udp6);
- cleanup_tcp6:
-       nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_tcp6);
- out:
        return ret;
 }
 
 static void ipv6_net_exit(struct net *net)
 {
        nf_ct_l3proto_pernet_unregister(net, &nf_conntrack_l3proto_ipv6);
-       nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_icmpv6);
-       nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udp6);
-       nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_tcp6);
+       nf_ct_l4proto_pernet_unregister(net, builtin_l4proto6,
+                                       ARRAY_SIZE(builtin_l4proto6));
 }
 
 static struct pernet_operations ipv6_net_ops = {
@@ -409,37 +397,20 @@ static int __init nf_conntrack_l3proto_ipv6_init(void)
                goto cleanup_pernet;
        }
 
-       ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_tcp6);
-       if (ret < 0) {
-               pr_err("nf_conntrack_ipv6: can't register tcp6 proto.\n");
+       ret = nf_ct_l4proto_register(builtin_l4proto6,
+                                    ARRAY_SIZE(builtin_l4proto6));
+       if (ret < 0)
                goto cleanup_hooks;
-       }
-
-       ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_udp6);
-       if (ret < 0) {
-               pr_err("nf_conntrack_ipv6: can't register udp6 proto.\n");
-               goto cleanup_tcp6;
-       }
-
-       ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_icmpv6);
-       if (ret < 0) {
-               pr_err("nf_conntrack_ipv6: can't register icmpv6 proto.\n");
-               goto cleanup_udp6;
-       }
 
        ret = nf_ct_l3proto_register(&nf_conntrack_l3proto_ipv6);
        if (ret < 0) {
                pr_err("nf_conntrack_ipv6: can't register ipv6 proto.\n");
-               goto cleanup_icmpv6;
+               goto cleanup_l4proto;
        }
        return ret;
-
- cleanup_icmpv6:
-       nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmpv6);
- cleanup_udp6:
-       nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp6);
- cleanup_tcp6:
-       nf_ct_l4proto_unregister(&nf_conntrack_l4proto_tcp6);
+cleanup_l4proto:
+       nf_ct_l4proto_unregister(builtin_l4proto6,
+                                ARRAY_SIZE(builtin_l4proto6));
  cleanup_hooks:
        nf_unregister_hooks(ipv6_conntrack_ops, ARRAY_SIZE(ipv6_conntrack_ops));
  cleanup_pernet:
@@ -453,9 +424,8 @@ static void __exit nf_conntrack_l3proto_ipv6_fini(void)
 {
        synchronize_net();
        nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv6);
-       nf_ct_l4proto_unregister(&nf_conntrack_l4proto_tcp6);
-       nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp6);
-       nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmpv6);
+       nf_ct_l4proto_unregister(builtin_l4proto6,
+                                ARRAY_SIZE(builtin_l4proto6));
        nf_unregister_hooks(ipv6_conntrack_ops, ARRAY_SIZE(ipv6_conntrack_ops));
        unregister_pernet_subsys(&ipv6_net_ops);
        nf_unregister_sockopt(&so_getorigdst6);
diff --git a/net/ipv6/netfilter/nf_socket_ipv6.c b/net/ipv6/netfilter/nf_socket_ipv6.c
new file mode 100644 (file)
index 0000000..ebb2bf8
--- /dev/null
@@ -0,0 +1,151 @@
+/*
+ * Copyright (C) 2007-2008 BalaBit IT Ltd.
+ * Author: Krisztian Kovacs
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <net/icmp.h>
+#include <net/sock.h>
+#include <net/inet_sock.h>
+#include <net/inet6_hashtables.h>
+#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
+#include <net/netfilter/nf_socket.h>
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+#include <net/netfilter/nf_conntrack.h>
+#endif
+
+static int
+extract_icmp6_fields(const struct sk_buff *skb,
+                    unsigned int outside_hdrlen,
+                    int *protocol,
+                    const struct in6_addr **raddr,
+                    const struct in6_addr **laddr,
+                    __be16 *rport,
+                    __be16 *lport,
+                    struct ipv6hdr *ipv6_var)
+{
+       const struct ipv6hdr *inside_iph;
+       struct icmp6hdr *icmph, _icmph;
+       __be16 *ports, _ports[2];
+       u8 inside_nexthdr;
+       __be16 inside_fragoff;
+       int inside_hdrlen;
+
+       icmph = skb_header_pointer(skb, outside_hdrlen,
+                                  sizeof(_icmph), &_icmph);
+       if (icmph == NULL)
+               return 1;
+
+       if (icmph->icmp6_type & ICMPV6_INFOMSG_MASK)
+               return 1;
+
+       inside_iph = skb_header_pointer(skb, outside_hdrlen + sizeof(_icmph),
+                                       sizeof(*ipv6_var), ipv6_var);
+       if (inside_iph == NULL)
+               return 1;
+       inside_nexthdr = inside_iph->nexthdr;
+
+       inside_hdrlen = ipv6_skip_exthdr(skb, outside_hdrlen + sizeof(_icmph) +
+                                             sizeof(*ipv6_var),
+                                        &inside_nexthdr, &inside_fragoff);
+       if (inside_hdrlen < 0)
+               return 1; /* hjm: Packet has no/incomplete transport layer headers. */
+
+       if (inside_nexthdr != IPPROTO_TCP &&
+           inside_nexthdr != IPPROTO_UDP)
+               return 1;
+
+       ports = skb_header_pointer(skb, inside_hdrlen,
+                                  sizeof(_ports), &_ports);
+       if (ports == NULL)
+               return 1;
+
+       /* the inside IP packet is the one quoted from our side, thus
+        * its saddr is the local address */
+       *protocol = inside_nexthdr;
+       *laddr = &inside_iph->saddr;
+       *lport = ports[0];
+       *raddr = &inside_iph->daddr;
+       *rport = ports[1];
+
+       return 0;
+}
+
+static struct sock *
+nf_socket_get_sock_v6(struct net *net, struct sk_buff *skb, int doff,
+                     const u8 protocol,
+                     const struct in6_addr *saddr, const struct in6_addr *daddr,
+                     const __be16 sport, const __be16 dport,
+                     const struct net_device *in)
+{
+       switch (protocol) {
+       case IPPROTO_TCP:
+               return inet6_lookup(net, &tcp_hashinfo, skb, doff,
+                                   saddr, sport, daddr, dport,
+                                   in->ifindex);
+       case IPPROTO_UDP:
+               return udp6_lib_lookup(net, saddr, sport, daddr, dport,
+                                      in->ifindex);
+       }
+
+       return NULL;
+}
+
+struct sock *nf_sk_lookup_slow_v6(struct net *net, const struct sk_buff *skb,
+                                 const struct net_device *indev)
+{
+       __be16 uninitialized_var(dport), uninitialized_var(sport);
+       const struct in6_addr *daddr = NULL, *saddr = NULL;
+       struct ipv6hdr *iph = ipv6_hdr(skb);
+       struct sk_buff *data_skb = NULL;
+       int doff = 0;
+       int thoff = 0, tproto;
+
+       tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL);
+       if (tproto < 0) {
+               pr_debug("unable to find transport header in IPv6 packet, dropping\n");
+               return NULL;
+       }
+
+       if (tproto == IPPROTO_UDP || tproto == IPPROTO_TCP) {
+               struct udphdr _hdr, *hp;
+
+               hp = skb_header_pointer(skb, thoff, sizeof(_hdr), &_hdr);
+               if (hp == NULL)
+                       return NULL;
+
+               saddr = &iph->saddr;
+               sport = hp->source;
+               daddr = &iph->daddr;
+               dport = hp->dest;
+               data_skb = (struct sk_buff *)skb;
+               doff = tproto == IPPROTO_TCP ?
+                       thoff + __tcp_hdrlen((struct tcphdr *)hp) :
+                       thoff + sizeof(*hp);
+
+       } else if (tproto == IPPROTO_ICMPV6) {
+               struct ipv6hdr ipv6_var;
+
+               if (extract_icmp6_fields(skb, thoff, &tproto, &saddr, &daddr,
+                                        &sport, &dport, &ipv6_var))
+                       return NULL;
+       } else {
+               return NULL;
+       }
+
+       return nf_socket_get_sock_v6(net, data_skb, doff, tproto, saddr, daddr,
+                                    sport, dport, indev);
+}
+EXPORT_SYMBOL_GPL(nf_sk_lookup_slow_v6);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Krisztian Kovacs, Balazs Scheidler");
+MODULE_DESCRIPTION("Netfilter IPv6 socket lookup infrastructure");
index 8bfd470cbe726678c5da89dff4f24b9fa089b356..d8b5b60b7d5310d0bff6e16f9964b5a91c38a64f 100644 (file)
@@ -26,9 +26,9 @@ static void nft_dup_ipv6_eval(const struct nft_expr *expr,
 {
        struct nft_dup_ipv6 *priv = nft_expr_priv(expr);
        struct in6_addr *gw = (struct in6_addr *)&regs->data[priv->sreg_addr];
-       int oif = regs->data[priv->sreg_dev];
+       int oif = priv->sreg_dev ? regs->data[priv->sreg_dev] : -1;
 
-       nf_dup_ipv6(pkt->net, pkt->skb, pkt->hook, gw, oif);
+       nf_dup_ipv6(nft_net(pkt), pkt->skb, nft_hook(pkt), gw, oif);
 }
 
 static int nft_dup_ipv6_init(const struct nft_ctx *ctx,
@@ -57,7 +57,9 @@ static int nft_dup_ipv6_dump(struct sk_buff *skb, const struct nft_expr *expr)
 {
        struct nft_dup_ipv6 *priv = nft_expr_priv(expr);
 
-       if (nft_dump_register(skb, NFTA_DUP_SREG_ADDR, priv->sreg_addr) ||
+       if (nft_dump_register(skb, NFTA_DUP_SREG_ADDR, priv->sreg_addr))
+               goto nla_put_failure;
+       if (priv->sreg_dev &&
            nft_dump_register(skb, NFTA_DUP_SREG_DEV, priv->sreg_dev))
                goto nla_put_failure;
 
diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c
new file mode 100644 (file)
index 0000000..d526bb5
--- /dev/null
@@ -0,0 +1,275 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <linux/netfilter_ipv6.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nft_fib.h>
+
+#include <net/ip6_fib.h>
+#include <net/ip6_route.h>
+
+static bool fib6_is_local(const struct sk_buff *skb)
+{
+       const struct rt6_info *rt = (const void *)skb_dst(skb);
+
+       return rt && (rt->rt6i_flags & RTF_LOCAL);
+}
+
+static int get_ifindex(const struct net_device *dev)
+{
+       return dev ? dev->ifindex : 0;
+}
+
+static int nft_fib6_flowi_init(struct flowi6 *fl6, const struct nft_fib *priv,
+                              const struct nft_pktinfo *pkt,
+                              const struct net_device *dev)
+{
+       const struct ipv6hdr *iph = ipv6_hdr(pkt->skb);
+       int lookup_flags = 0;
+
+       if (priv->flags & NFTA_FIB_F_DADDR) {
+               fl6->daddr = iph->daddr;
+               fl6->saddr = iph->saddr;
+       } else {
+               fl6->daddr = iph->saddr;
+               fl6->saddr = iph->daddr;
+       }
+
+       if (ipv6_addr_type(&fl6->daddr) & IPV6_ADDR_LINKLOCAL) {
+               lookup_flags |= RT6_LOOKUP_F_IFACE;
+               fl6->flowi6_oif = get_ifindex(dev ? dev : pkt->skb->dev);
+       }
+
+       if (ipv6_addr_type(&fl6->saddr) & IPV6_ADDR_UNICAST)
+               lookup_flags |= RT6_LOOKUP_F_HAS_SADDR;
+
+       if (priv->flags & NFTA_FIB_F_MARK)
+               fl6->flowi6_mark = pkt->skb->mark;
+
+       fl6->flowlabel = (*(__be32 *)iph) & IPV6_FLOWINFO_MASK;
+
+       return lookup_flags;
+}
+
+static u32 __nft_fib6_eval_type(const struct nft_fib *priv,
+                               const struct nft_pktinfo *pkt)
+{
+       const struct net_device *dev = NULL;
+       const struct nf_ipv6_ops *v6ops;
+       const struct nf_afinfo *afinfo;
+       int route_err, addrtype;
+       struct rt6_info *rt;
+       struct flowi6 fl6 = {
+               .flowi6_iif = LOOPBACK_IFINDEX,
+               .flowi6_proto = pkt->tprot,
+       };
+       u32 ret = 0;
+
+       afinfo = nf_get_afinfo(NFPROTO_IPV6);
+       if (!afinfo)
+               return RTN_UNREACHABLE;
+
+       if (priv->flags & NFTA_FIB_F_IIF)
+               dev = nft_in(pkt);
+       else if (priv->flags & NFTA_FIB_F_OIF)
+               dev = nft_out(pkt);
+
+       nft_fib6_flowi_init(&fl6, priv, pkt, dev);
+
+       v6ops = nf_get_ipv6_ops();
+       if (dev && v6ops && v6ops->chk_addr(nft_net(pkt), &fl6.daddr, dev, true))
+               ret = RTN_LOCAL;
+
+       route_err = afinfo->route(nft_net(pkt), (struct dst_entry **)&rt,
+                                 flowi6_to_flowi(&fl6), false);
+       if (route_err)
+               goto err;
+
+       if (rt->rt6i_flags & RTF_REJECT) {
+               route_err = rt->dst.error;
+               dst_release(&rt->dst);
+               goto err;
+       }
+
+       if (ipv6_anycast_destination((struct dst_entry *)rt, &fl6.daddr))
+               ret = RTN_ANYCAST;
+       else if (!dev && rt->rt6i_flags & RTF_LOCAL)
+               ret = RTN_LOCAL;
+
+       dst_release(&rt->dst);
+
+       if (ret)
+               return ret;
+
+       addrtype = ipv6_addr_type(&fl6.daddr);
+
+       if (addrtype & IPV6_ADDR_MULTICAST)
+               return RTN_MULTICAST;
+       if (addrtype & IPV6_ADDR_UNICAST)
+               return RTN_UNICAST;
+
+       return RTN_UNSPEC;
+ err:
+       switch (route_err) {
+       case -EINVAL:
+               return RTN_BLACKHOLE;
+       case -EACCES:
+               return RTN_PROHIBIT;
+       case -EAGAIN:
+               return RTN_THROW;
+       default:
+               break;
+       }
+
+       return RTN_UNREACHABLE;
+}
+
+void nft_fib6_eval_type(const struct nft_expr *expr, struct nft_regs *regs,
+                       const struct nft_pktinfo *pkt)
+{
+       const struct nft_fib *priv = nft_expr_priv(expr);
+       u32 *dest = &regs->data[priv->dreg];
+
+       *dest = __nft_fib6_eval_type(priv, pkt);
+}
+EXPORT_SYMBOL_GPL(nft_fib6_eval_type);
+
+void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
+                  const struct nft_pktinfo *pkt)
+{
+       const struct nft_fib *priv = nft_expr_priv(expr);
+       const struct net_device *oif = NULL;
+       u32 *dest = &regs->data[priv->dreg];
+       struct flowi6 fl6 = {
+               .flowi6_iif = LOOPBACK_IFINDEX,
+               .flowi6_proto = pkt->tprot,
+       };
+       struct rt6_info *rt;
+       int lookup_flags;
+
+       if (priv->flags & NFTA_FIB_F_IIF)
+               oif = nft_in(pkt);
+       else if (priv->flags & NFTA_FIB_F_OIF)
+               oif = nft_out(pkt);
+
+       lookup_flags = nft_fib6_flowi_init(&fl6, priv, pkt, oif);
+
+       if (nft_hook(pkt) == NF_INET_PRE_ROUTING && fib6_is_local(pkt->skb)) {
+               nft_fib_store_result(dest, priv->result, pkt, LOOPBACK_IFINDEX);
+               return;
+       }
+
+       *dest = 0;
+ again:
+       rt = (void *)ip6_route_lookup(nft_net(pkt), &fl6, lookup_flags);
+       if (rt->dst.error)
+               goto put_rt_err;
+
+       /* Should not see RTF_LOCAL here */
+       if (rt->rt6i_flags & (RTF_REJECT | RTF_ANYCAST | RTF_LOCAL))
+               goto put_rt_err;
+
+       if (oif && oif != rt->rt6i_idev->dev) {
+               /* multipath route? Try again with F_IFACE */
+               if ((lookup_flags & RT6_LOOKUP_F_IFACE) == 0) {
+                       lookup_flags |= RT6_LOOKUP_F_IFACE;
+                       fl6.flowi6_oif = oif->ifindex;
+                       ip6_rt_put(rt);
+                       goto again;
+               }
+       }
+
+       switch (priv->result) {
+       case NFT_FIB_RESULT_OIF:
+               *dest = rt->rt6i_idev->dev->ifindex;
+               break;
+       case NFT_FIB_RESULT_OIFNAME:
+               strncpy((char *)dest, rt->rt6i_idev->dev->name, IFNAMSIZ);
+               break;
+       default:
+               WARN_ON_ONCE(1);
+               break;
+       }
+
+ put_rt_err:
+       ip6_rt_put(rt);
+}
+EXPORT_SYMBOL_GPL(nft_fib6_eval);
+
+static struct nft_expr_type nft_fib6_type;
+
+static const struct nft_expr_ops nft_fib6_type_ops = {
+       .type           = &nft_fib6_type,
+       .size           = NFT_EXPR_SIZE(sizeof(struct nft_fib)),
+       .eval           = nft_fib6_eval_type,
+       .init           = nft_fib_init,
+       .dump           = nft_fib_dump,
+       .validate       = nft_fib_validate,
+};
+
+static const struct nft_expr_ops nft_fib6_ops = {
+       .type           = &nft_fib6_type,
+       .size           = NFT_EXPR_SIZE(sizeof(struct nft_fib)),
+       .eval           = nft_fib6_eval,
+       .init           = nft_fib_init,
+       .dump           = nft_fib_dump,
+       .validate       = nft_fib_validate,
+};
+
+static const struct nft_expr_ops *
+nft_fib6_select_ops(const struct nft_ctx *ctx,
+                   const struct nlattr * const tb[])
+{
+       enum nft_fib_result result;
+
+       if (!tb[NFTA_FIB_RESULT])
+               return ERR_PTR(-EINVAL);
+
+       result = htonl(nla_get_be32(tb[NFTA_FIB_RESULT]));
+
+       switch (result) {
+       case NFT_FIB_RESULT_OIF:
+               return &nft_fib6_ops;
+       case NFT_FIB_RESULT_OIFNAME:
+               return &nft_fib6_ops;
+       case NFT_FIB_RESULT_ADDRTYPE:
+               return &nft_fib6_type_ops;
+       default:
+               return ERR_PTR(-EOPNOTSUPP);
+       }
+}
+
+static struct nft_expr_type nft_fib6_type __read_mostly = {
+       .name           = "fib",
+       .select_ops     = &nft_fib6_select_ops,
+       .policy         = nft_fib_policy,
+       .maxattr        = NFTA_FIB_MAX,
+       .family         = NFPROTO_IPV6,
+       .owner          = THIS_MODULE,
+};
+
+static int __init nft_fib6_module_init(void)
+{
+       return nft_register_expr(&nft_fib6_type);
+}
+
+static void __exit nft_fib6_module_exit(void)
+{
+       nft_unregister_expr(&nft_fib6_type);
+}
+module_init(nft_fib6_module_init);
+module_exit(nft_fib6_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
+MODULE_ALIAS_NFT_AF_EXPR(10, "fib");
index 9597ffb740773f37463706d26f29660a05f61abb..a2aff1277b405ac99a94d593034ab9a5238b0c4c 100644 (file)
@@ -32,7 +32,8 @@ static void nft_masq_ipv6_eval(const struct nft_expr *expr,
                range.max_proto.all =
                        *(__be16 *)&regs->data[priv->sreg_proto_max];
        }
-       regs->verdict.code = nf_nat_masquerade_ipv6(pkt->skb, &range, pkt->out);
+       regs->verdict.code = nf_nat_masquerade_ipv6(pkt->skb, &range,
+                                                   nft_out(pkt));
 }
 
 static struct nft_expr_type nft_masq_ipv6_type;
index aca44e89a88133575b3435f82d71ed5c05e98649..bfcd5af6bc153ad57409af7e2fd20df55c057700 100644 (file)
@@ -35,7 +35,8 @@ static void nft_redir_ipv6_eval(const struct nft_expr *expr,
 
        range.flags |= priv->flags;
 
-       regs->verdict.code = nf_nat_redirect_ipv6(pkt->skb, &range, pkt->hook);
+       regs->verdict.code =
+               nf_nat_redirect_ipv6(pkt->skb, &range, nft_hook(pkt));
 }
 
 static struct nft_expr_type nft_redir_ipv6_type;
index 92bda9908bb9a354e701fab450549b43e2f22bfa..057deeaff1cbc6d55b7e7ac6738dac5909af331f 100644 (file)
@@ -27,11 +27,11 @@ static void nft_reject_ipv6_eval(const struct nft_expr *expr,
 
        switch (priv->type) {
        case NFT_REJECT_ICMP_UNREACH:
-               nf_send_unreach6(pkt->net, pkt->skb, priv->icmp_code,
-                                pkt->hook);
+               nf_send_unreach6(nft_net(pkt), pkt->skb, priv->icmp_code,
+                                nft_hook(pkt));
                break;
        case NFT_REJECT_TCP_RST:
-               nf_send_reset6(pkt->net, pkt->skb, pkt->hook);
+               nf_send_reset6(nft_net(pkt), pkt->skb, nft_hook(pkt));
                break;
        default:
                break;
index 0e983b694ee805dc662a49ae5f6c9438b5ed931d..e1f8b34d7a2ef8fb232826747d2ffe6652c1cb06 100644 (file)
@@ -113,6 +113,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
        fl6.daddr = *daddr;
        fl6.flowi6_oif = oif;
        fl6.flowi6_mark = sk->sk_mark;
+       fl6.flowi6_uid = sk->sk_uid;
        fl6.fl6_icmp_type = user_icmph.icmp6_type;
        fl6.fl6_icmp_code = user_icmph.icmp6_code;
        security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
@@ -180,7 +181,7 @@ struct proto pingv6_prot = {
        .init =         ping_init_sock,
        .close =        ping_close,
        .connect =      ip6_datagram_connect_v6_only,
-       .disconnect =   udp_disconnect,
+       .disconnect =   __udp_disconnect,
        .setsockopt =   ipv6_setsockopt,
        .getsockopt =   ipv6_getsockopt,
        .sendmsg =      ping_v6_sendmsg,
index d7e8b955ade8f10fffcdff7e9c0a722004710b29..291ebc260e70ef7fbb4310d22ed0c915a372d0b5 100644 (file)
@@ -776,6 +776,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
        memset(&fl6, 0, sizeof(fl6));
 
        fl6.flowi6_mark = sk->sk_mark;
+       fl6.flowi6_uid = sk->sk_uid;
 
        ipc6.hlimit = -1;
        ipc6.tclass = -1;
@@ -1243,7 +1244,7 @@ struct proto rawv6_prot = {
        .close             = rawv6_close,
        .destroy           = raw6_destroy,
        .connect           = ip6_datagram_connect_v6_only,
-       .disconnect        = udp_disconnect,
+       .disconnect        = __udp_disconnect,
        .ioctl             = rawv6_ioctl,
        .init              = rawv6_init_sk,
        .setsockopt        = rawv6_setsockopt,
index 2160d5d009cb6e97f36f3b4319322fb52a354866..e1da5b888cc4901711d573075f8ae4eada7f086e 100644 (file)
@@ -211,7 +211,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
 {
        struct sk_buff *prev, *next;
        struct net_device *dev;
-       int offset, end;
+       int offset, end, fragsize;
        struct net *net = dev_net(skb_dst(skb)->dev);
        u8 ecn;
 
@@ -336,6 +336,10 @@ found:
        fq->ecn |= ecn;
        add_frag_mem_limit(fq->q.net, skb->truesize);
 
+       fragsize = -skb_network_offset(skb) + skb->len;
+       if (fragsize > fq->q.max_size)
+               fq->q.max_size = fragsize;
+
        /* The first fragment.
         * nhoffset is obtained from the first fragment, of course.
         */
@@ -456,7 +460,8 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
        skb_network_header(head)[nhoff] = skb_transport_header(head)[0];
        memmove(head->head + sizeof(struct frag_hdr), head->head,
                (head->data - head->head) - sizeof(struct frag_hdr));
-       head->mac_header += sizeof(struct frag_hdr);
+       if (skb_mac_header_was_set(head))
+               head->mac_header += sizeof(struct frag_hdr);
        head->network_header += sizeof(struct frag_hdr);
 
        skb_reset_transport_header(head);
@@ -494,6 +499,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
        ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn);
        IP6CB(head)->nhoff = nhoff;
        IP6CB(head)->flags |= IP6SKB_FRAGMENTED;
+       IP6CB(head)->frag_max_size = fq->q.max_size;
 
        /* Yes, and fold redundant checksum back. 8) */
        skb_postpush_rcsum(head, skb_network_header(head),
index bdbc38e8bf2906d48439928f08adf4b880469b60..b317bb135ed403fe46ba7255f3ab550dbecaac49 100644 (file)
@@ -102,11 +102,13 @@ static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
 #ifdef CONFIG_IPV6_ROUTE_INFO
 static struct rt6_info *rt6_add_route_info(struct net *net,
                                           const struct in6_addr *prefix, int prefixlen,
-                                          const struct in6_addr *gwaddr, int ifindex,
+                                          const struct in6_addr *gwaddr,
+                                          struct net_device *dev,
                                           unsigned int pref);
 static struct rt6_info *rt6_get_route_info(struct net *net,
                                           const struct in6_addr *prefix, int prefixlen,
-                                          const struct in6_addr *gwaddr, int ifindex);
+                                          const struct in6_addr *gwaddr,
+                                          struct net_device *dev);
 #endif
 
 struct uncached_list {
@@ -656,7 +658,8 @@ static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
        struct net_device *dev = rt->dst.dev;
 
        if (dev && !netif_carrier_ok(dev) &&
-           idev->cnf.ignore_routes_with_linkdown)
+           idev->cnf.ignore_routes_with_linkdown &&
+           !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
                goto out;
 
        if (rt6_check_expired(rt))
@@ -803,7 +806,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
                rt = rt6_get_dflt_router(gwaddr, dev);
        else
                rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
-                                       gwaddr, dev->ifindex);
+                                       gwaddr, dev);
 
        if (rt && !lifetime) {
                ip6_del_rt(rt);
@@ -811,8 +814,8 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
        }
 
        if (!rt && lifetime)
-               rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
-                                       pref);
+               rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
+                                       dev, pref);
        else if (rt)
                rt->rt6i_flags = RTF_ROUTEINFO |
                                 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
@@ -1050,6 +1053,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
        int strict = 0;
 
        strict |= flags & RT6_LOOKUP_F_IFACE;
+       strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
        if (net->ipv6.devconf_all->forwarding == 0)
                strict |= RT6_LOOKUP_F_REACHABLE;
 
@@ -1360,6 +1364,9 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
        if (rt6->rt6i_flags & RTF_LOCAL)
                return;
 
+       if (dst_metric_locked(dst, RTAX_MTU))
+               return;
+
        dst_confirm(dst);
        mtu = max_t(u32, mtu, IPV6_MIN_MTU);
        if (mtu >= dst_mtu(dst))
@@ -1401,7 +1408,7 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
 }
 
 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
-                    int oif, u32 mark)
+                    int oif, u32 mark, kuid_t uid)
 {
        const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
        struct dst_entry *dst;
@@ -1413,6 +1420,7 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
        fl6.daddr = iph->daddr;
        fl6.saddr = iph->saddr;
        fl6.flowlabel = ip6_flowinfo(iph);
+       fl6.flowi6_uid = uid;
 
        dst = ip6_route_output(net, NULL, &fl6);
        if (!dst->error)
@@ -1426,7 +1434,7 @@ void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
        struct dst_entry *dst;
 
        ip6_update_pmtu(skb, sock_net(sk), mtu,
-                       sk->sk_bound_dev_if, sk->sk_mark);
+                       sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid);
 
        dst = __sk_dst_get(sk);
        if (!dst || !dst->obsolete ||
@@ -1518,7 +1526,8 @@ static struct dst_entry *ip6_route_redirect(struct net *net,
                                flags, __ip6_route_redirect);
 }
 
-void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
+void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
+                 kuid_t uid)
 {
        const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
        struct dst_entry *dst;
@@ -1531,6 +1540,7 @@ void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
        fl6.daddr = iph->daddr;
        fl6.saddr = iph->saddr;
        fl6.flowlabel = ip6_flowinfo(iph);
+       fl6.flowi6_uid = uid;
 
        dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
        rt6_do_redirect(dst, NULL, skb);
@@ -1552,6 +1562,7 @@ void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
        fl6.flowi6_mark = mark;
        fl6.daddr = msg->dest;
        fl6.saddr = iph->daddr;
+       fl6.flowi6_uid = sock_net_uid(net, NULL);
 
        dst = ip6_route_redirect(net, &fl6, &iph->saddr);
        rt6_do_redirect(dst, NULL, skb);
@@ -1560,7 +1571,8 @@ void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
 
 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
 {
-       ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
+       ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
+                    sk->sk_uid);
 }
 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
 
@@ -1789,7 +1801,7 @@ static struct rt6_info *ip6_nh_lookup_table(struct net *net,
        };
        struct fib6_table *table;
        struct rt6_info *rt;
-       int flags = RT6_LOOKUP_F_IFACE;
+       int flags = RT6_LOOKUP_F_IFACE | RT6_LOOKUP_F_IGNORE_LINKSTATE;
 
        table = fib6_get_table(net, cfg->fc_table);
        if (!table)
@@ -2325,13 +2337,16 @@ static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
 #ifdef CONFIG_IPV6_ROUTE_INFO
 static struct rt6_info *rt6_get_route_info(struct net *net,
                                           const struct in6_addr *prefix, int prefixlen,
-                                          const struct in6_addr *gwaddr, int ifindex)
+                                          const struct in6_addr *gwaddr,
+                                          struct net_device *dev)
 {
+       u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
+       int ifindex = dev->ifindex;
        struct fib6_node *fn;
        struct rt6_info *rt = NULL;
        struct fib6_table *table;
 
-       table = fib6_get_table(net, RT6_TABLE_INFO);
+       table = fib6_get_table(net, tb_id);
        if (!table)
                return NULL;
 
@@ -2357,12 +2372,13 @@ out:
 
 static struct rt6_info *rt6_add_route_info(struct net *net,
                                           const struct in6_addr *prefix, int prefixlen,
-                                          const struct in6_addr *gwaddr, int ifindex,
+                                          const struct in6_addr *gwaddr,
+                                          struct net_device *dev,
                                           unsigned int pref)
 {
        struct fib6_config cfg = {
                .fc_metric      = IP6_RT_PRIO_USER,
-               .fc_ifindex     = ifindex,
+               .fc_ifindex     = dev->ifindex,
                .fc_dst_len     = prefixlen,
                .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
                                  RTF_UP | RTF_PREF(pref),
@@ -2371,7 +2387,7 @@ static struct rt6_info *rt6_add_route_info(struct net *net,
                .fc_nlinfo.nl_net = net,
        };
 
-       cfg.fc_table = l3mdev_fib_table_by_index(net, ifindex) ? : RT6_TABLE_INFO;
+       cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
        cfg.fc_dst = *prefix;
        cfg.fc_gateway = *gwaddr;
 
@@ -2381,16 +2397,17 @@ static struct rt6_info *rt6_add_route_info(struct net *net,
 
        ip6_route_add(&cfg);
 
-       return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
+       return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
 }
 #endif
 
 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
 {
+       u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
        struct rt6_info *rt;
        struct fib6_table *table;
 
-       table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
+       table = fib6_get_table(dev_net(dev), tb_id);
        if (!table)
                return NULL;
 
@@ -2424,20 +2441,20 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
 
        cfg.fc_gateway = *gwaddr;
 
-       ip6_route_add(&cfg);
+       if (!ip6_route_add(&cfg)) {
+               struct fib6_table *table;
+
+               table = fib6_get_table(dev_net(dev), cfg.fc_table);
+               if (table)
+                       table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
+       }
 
        return rt6_get_dflt_router(gwaddr, dev);
 }
 
-void rt6_purge_dflt_routers(struct net *net)
+static void __rt6_purge_dflt_routers(struct fib6_table *table)
 {
        struct rt6_info *rt;
-       struct fib6_table *table;
-
-       /* NOTE: Keep consistent with rt6_get_dflt_router */
-       table = fib6_get_table(net, RT6_TABLE_DFLT);
-       if (!table)
-               return;
 
 restart:
        read_lock_bh(&table->tb6_lock);
@@ -2451,6 +2468,27 @@ restart:
                }
        }
        read_unlock_bh(&table->tb6_lock);
+
+       table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
+}
+
+void rt6_purge_dflt_routers(struct net *net)
+{
+       struct fib6_table *table;
+       struct hlist_head *head;
+       unsigned int h;
+
+       rcu_read_lock();
+
+       for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
+               head = &net->ipv6.fib_table_hash[h];
+               hlist_for_each_entry_rcu(table, head, tb6_hlist) {
+                       if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
+                               __rt6_purge_dflt_routers(table);
+               }
+       }
+
+       rcu_read_unlock();
 }
 
 static void rtmsg_to_fib6_config(struct net *net,
@@ -2728,6 +2766,7 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
           PMTU discouvery.
         */
        if (rt->dst.dev == arg->dev &&
+           dst_metric_raw(&rt->dst, RTAX_MTU) &&
            !dst_metric_locked(&rt->dst, RTAX_MTU)) {
                if (rt->rt6i_flags & RTF_CACHE) {
                        /* For RTF_CACHE with rt6i_pmtu == 0
@@ -2767,6 +2806,7 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
        [RTA_ENCAP_TYPE]        = { .type = NLA_U16 },
        [RTA_ENCAP]             = { .type = NLA_NESTED },
        [RTA_EXPIRES]           = { .type = NLA_U32 },
+       [RTA_UID]               = { .type = NLA_U32 },
 };
 
 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -3341,6 +3381,12 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
        if (tb[RTA_MARK])
                fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
 
+       if (tb[RTA_UID])
+               fl6.flowi6_uid = make_kuid(current_user_ns(),
+                                          nla_get_u32(tb[RTA_UID]));
+       else
+               fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
+
        if (iif) {
                struct net_device *dev;
                int flags = 0;
diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c
new file mode 100644 (file)
index 0000000..b172d85
--- /dev/null
@@ -0,0 +1,495 @@
+/*
+ *  SR-IPv6 implementation
+ *
+ *  Author:
+ *  David Lebrun <david.lebrun@uclouvain.be>
+ *
+ *
+ *  This program is free software; you can redistribute it and/or
+ *       modify it under the terms of the GNU General Public License
+ *       as published by the Free Software Foundation; either version
+ *       2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/net.h>
+#include <linux/in6.h>
+#include <linux/slab.h>
+
+#include <net/ipv6.h>
+#include <net/protocol.h>
+
+#include <net/seg6.h>
+#include <net/genetlink.h>
+#include <linux/seg6.h>
+#include <linux/seg6_genl.h>
+#ifdef CONFIG_IPV6_SEG6_HMAC
+#include <net/seg6_hmac.h>
+#endif
+
+bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len)
+{
+       int trailing;
+       unsigned int tlv_offset;
+
+       if (srh->type != IPV6_SRCRT_TYPE_4)
+               return false;
+
+       if (((srh->hdrlen + 1) << 3) != len)
+               return false;
+
+       if (srh->segments_left != srh->first_segment)
+               return false;
+
+       tlv_offset = sizeof(*srh) + ((srh->first_segment + 1) << 4);
+
+       trailing = len - tlv_offset;
+       if (trailing < 0)
+               return false;
+
+       while (trailing) {
+               struct sr6_tlv *tlv;
+               unsigned int tlv_len;
+
+               tlv = (struct sr6_tlv *)((unsigned char *)srh + tlv_offset);
+               tlv_len = sizeof(*tlv) + tlv->len;
+
+               trailing -= tlv_len;
+               if (trailing < 0)
+                       return false;
+
+               tlv_offset += tlv_len;
+       }
+
+       return true;
+}
+
+static struct genl_family seg6_genl_family;
+
+static const struct nla_policy seg6_genl_policy[SEG6_ATTR_MAX + 1] = {
+       [SEG6_ATTR_DST]                         = { .type = NLA_BINARY,
+               .len = sizeof(struct in6_addr) },
+       [SEG6_ATTR_DSTLEN]                      = { .type = NLA_S32, },
+       [SEG6_ATTR_HMACKEYID]           = { .type = NLA_U32, },
+       [SEG6_ATTR_SECRET]                      = { .type = NLA_BINARY, },
+       [SEG6_ATTR_SECRETLEN]           = { .type = NLA_U8, },
+       [SEG6_ATTR_ALGID]                       = { .type = NLA_U8, },
+       [SEG6_ATTR_HMACINFO]            = { .type = NLA_NESTED, },
+};
+
+#ifdef CONFIG_IPV6_SEG6_HMAC
+
+static int seg6_genl_sethmac(struct sk_buff *skb, struct genl_info *info)
+{
+       struct net *net = genl_info_net(info);
+       struct seg6_pernet_data *sdata;
+       struct seg6_hmac_info *hinfo;
+       u32 hmackeyid;
+       char *secret;
+       int err = 0;
+       u8 algid;
+       u8 slen;
+
+       sdata = seg6_pernet(net);
+
+       if (!info->attrs[SEG6_ATTR_HMACKEYID] ||
+           !info->attrs[SEG6_ATTR_SECRETLEN] ||
+           !info->attrs[SEG6_ATTR_ALGID])
+               return -EINVAL;
+
+       hmackeyid = nla_get_u32(info->attrs[SEG6_ATTR_HMACKEYID]);
+       slen = nla_get_u8(info->attrs[SEG6_ATTR_SECRETLEN]);
+       algid = nla_get_u8(info->attrs[SEG6_ATTR_ALGID]);
+
+       if (hmackeyid == 0)
+               return -EINVAL;
+
+       if (slen > SEG6_HMAC_SECRET_LEN)
+               return -EINVAL;
+
+       mutex_lock(&sdata->lock);
+       hinfo = seg6_hmac_info_lookup(net, hmackeyid);
+
+       if (!slen) {
+               if (!hinfo)
+                       err = -ENOENT;
+
+               err = seg6_hmac_info_del(net, hmackeyid);
+
+               goto out_unlock;
+       }
+
+       if (!info->attrs[SEG6_ATTR_SECRET]) {
+               err = -EINVAL;
+               goto out_unlock;
+       }
+
+       if (hinfo) {
+               err = seg6_hmac_info_del(net, hmackeyid);
+               if (err)
+                       goto out_unlock;
+       }
+
+       secret = (char *)nla_data(info->attrs[SEG6_ATTR_SECRET]);
+
+       hinfo = kzalloc(sizeof(*hinfo), GFP_KERNEL);
+       if (!hinfo) {
+               err = -ENOMEM;
+               goto out_unlock;
+       }
+
+       memcpy(hinfo->secret, secret, slen);
+       hinfo->slen = slen;
+       hinfo->alg_id = algid;
+       hinfo->hmackeyid = hmackeyid;
+
+       err = seg6_hmac_info_add(net, hmackeyid, hinfo);
+       if (err)
+               kfree(hinfo);
+
+out_unlock:
+       mutex_unlock(&sdata->lock);
+       return err;
+}
+
+#else
+
+static int seg6_genl_sethmac(struct sk_buff *skb, struct genl_info *info)
+{
+       return -ENOTSUPP;
+}
+
+#endif
+
+static int seg6_genl_set_tunsrc(struct sk_buff *skb, struct genl_info *info)
+{
+       struct net *net = genl_info_net(info);
+       struct in6_addr *val, *t_old, *t_new;
+       struct seg6_pernet_data *sdata;
+
+       sdata = seg6_pernet(net);
+
+       if (!info->attrs[SEG6_ATTR_DST])
+               return -EINVAL;
+
+       val = nla_data(info->attrs[SEG6_ATTR_DST]);
+       t_new = kmemdup(val, sizeof(*val), GFP_KERNEL);
+
+       mutex_lock(&sdata->lock);
+
+       t_old = sdata->tun_src;
+       rcu_assign_pointer(sdata->tun_src, t_new);
+
+       mutex_unlock(&sdata->lock);
+
+       synchronize_net();
+       kfree(t_old);
+
+       return 0;
+}
+
+static int seg6_genl_get_tunsrc(struct sk_buff *skb, struct genl_info *info)
+{
+       struct net *net = genl_info_net(info);
+       struct in6_addr *tun_src;
+       struct sk_buff *msg;
+       void *hdr;
+
+       msg = genlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+       if (!msg)
+               return -ENOMEM;
+
+       hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq,
+                         &seg6_genl_family, 0, SEG6_CMD_GET_TUNSRC);
+       if (!hdr)
+               goto free_msg;
+
+       rcu_read_lock();
+       tun_src = rcu_dereference(seg6_pernet(net)->tun_src);
+
+       if (nla_put(msg, SEG6_ATTR_DST, sizeof(struct in6_addr), tun_src))
+               goto nla_put_failure;
+
+       rcu_read_unlock();
+
+       genlmsg_end(msg, hdr);
+       genlmsg_reply(msg, info);
+
+       return 0;
+
+nla_put_failure:
+       rcu_read_unlock();
+       genlmsg_cancel(msg, hdr);
+free_msg:
+       nlmsg_free(msg);
+       return -ENOMEM;
+}
+
+#ifdef CONFIG_IPV6_SEG6_HMAC
+
+static int __seg6_hmac_fill_info(struct seg6_hmac_info *hinfo,
+                                struct sk_buff *msg)
+{
+       if (nla_put_u32(msg, SEG6_ATTR_HMACKEYID, hinfo->hmackeyid) ||
+           nla_put_u8(msg, SEG6_ATTR_SECRETLEN, hinfo->slen) ||
+           nla_put(msg, SEG6_ATTR_SECRET, hinfo->slen, hinfo->secret) ||
+           nla_put_u8(msg, SEG6_ATTR_ALGID, hinfo->alg_id))
+               return -1;
+
+       return 0;
+}
+
+static int __seg6_genl_dumphmac_element(struct seg6_hmac_info *hinfo,
+                                       u32 portid, u32 seq, u32 flags,
+                                       struct sk_buff *skb, u8 cmd)
+{
+       void *hdr;
+
+       hdr = genlmsg_put(skb, portid, seq, &seg6_genl_family, flags, cmd);
+       if (!hdr)
+               return -ENOMEM;
+
+       if (__seg6_hmac_fill_info(hinfo, skb) < 0)
+               goto nla_put_failure;
+
+       genlmsg_end(skb, hdr);
+       return 0;
+
+nla_put_failure:
+       genlmsg_cancel(skb, hdr);
+       return -EMSGSIZE;
+}
+
+static int seg6_genl_dumphmac_start(struct netlink_callback *cb)
+{
+       struct net *net = sock_net(cb->skb->sk);
+       struct seg6_pernet_data *sdata;
+       struct rhashtable_iter *iter;
+
+       sdata = seg6_pernet(net);
+       iter = (struct rhashtable_iter *)cb->args[0];
+
+       if (!iter) {
+               iter = kmalloc(sizeof(*iter), GFP_KERNEL);
+               if (!iter)
+                       return -ENOMEM;
+
+               cb->args[0] = (long)iter;
+       }
+
+       rhashtable_walk_enter(&sdata->hmac_infos, iter);
+
+       return 0;
+}
+
+static int seg6_genl_dumphmac_done(struct netlink_callback *cb)
+{
+       struct rhashtable_iter *iter = (struct rhashtable_iter *)cb->args[0];
+
+       rhashtable_walk_exit(iter);
+
+       kfree(iter);
+
+       return 0;
+}
+
+static int seg6_genl_dumphmac(struct sk_buff *skb, struct netlink_callback *cb)
+{
+       struct rhashtable_iter *iter = (struct rhashtable_iter *)cb->args[0];
+       struct net *net = sock_net(skb->sk);
+       struct seg6_pernet_data *sdata;
+       struct seg6_hmac_info *hinfo;
+       int ret;
+
+       sdata = seg6_pernet(net);
+
+       ret = rhashtable_walk_start(iter);
+       if (ret && ret != -EAGAIN)
+               goto done;
+
+       for (;;) {
+               hinfo = rhashtable_walk_next(iter);
+
+               if (IS_ERR(hinfo)) {
+                       if (PTR_ERR(hinfo) == -EAGAIN)
+                               continue;
+                       ret = PTR_ERR(hinfo);
+                       goto done;
+               } else if (!hinfo) {
+                       break;
+               }
+
+               ret = __seg6_genl_dumphmac_element(hinfo,
+                                                  NETLINK_CB(cb->skb).portid,
+                                                  cb->nlh->nlmsg_seq,
+                                                  NLM_F_MULTI,
+                                                  skb, SEG6_CMD_DUMPHMAC);
+               if (ret)
+                       goto done;
+       }
+
+       ret = skb->len;
+
+done:
+       rhashtable_walk_stop(iter);
+       return ret;
+}
+
+#else
+
+static int seg6_genl_dumphmac_start(struct netlink_callback *cb)
+{
+       return 0;
+}
+
+static int seg6_genl_dumphmac_done(struct netlink_callback *cb)
+{
+       return 0;
+}
+
+static int seg6_genl_dumphmac(struct sk_buff *skb, struct netlink_callback *cb)
+{
+       return -ENOTSUPP;
+}
+
+#endif
+
+static int __net_init seg6_net_init(struct net *net)
+{
+       struct seg6_pernet_data *sdata;
+
+       sdata = kzalloc(sizeof(*sdata), GFP_KERNEL);
+       if (!sdata)
+               return -ENOMEM;
+
+       mutex_init(&sdata->lock);
+
+       sdata->tun_src = kzalloc(sizeof(*sdata->tun_src), GFP_KERNEL);
+       if (!sdata->tun_src) {
+               kfree(sdata);
+               return -ENOMEM;
+       }
+
+       net->ipv6.seg6_data = sdata;
+
+#ifdef CONFIG_IPV6_SEG6_HMAC
+       seg6_hmac_net_init(net);
+#endif
+
+       return 0;
+}
+
+static void __net_exit seg6_net_exit(struct net *net)
+{
+       struct seg6_pernet_data *sdata = seg6_pernet(net);
+
+#ifdef CONFIG_IPV6_SEG6_HMAC
+       seg6_hmac_net_exit(net);
+#endif
+
+       kfree(sdata->tun_src);
+       kfree(sdata);
+}
+
+static struct pernet_operations ip6_segments_ops = {
+       .init = seg6_net_init,
+       .exit = seg6_net_exit,
+};
+
+static const struct genl_ops seg6_genl_ops[] = {
+       {
+               .cmd    = SEG6_CMD_SETHMAC,
+               .doit   = seg6_genl_sethmac,
+               .policy = seg6_genl_policy,
+               .flags  = GENL_ADMIN_PERM,
+       },
+       {
+               .cmd    = SEG6_CMD_DUMPHMAC,
+               .start  = seg6_genl_dumphmac_start,
+               .dumpit = seg6_genl_dumphmac,
+               .done   = seg6_genl_dumphmac_done,
+               .policy = seg6_genl_policy,
+               .flags  = GENL_ADMIN_PERM,
+       },
+       {
+               .cmd    = SEG6_CMD_SET_TUNSRC,
+               .doit   = seg6_genl_set_tunsrc,
+               .policy = seg6_genl_policy,
+               .flags  = GENL_ADMIN_PERM,
+       },
+       {
+               .cmd    = SEG6_CMD_GET_TUNSRC,
+               .doit   = seg6_genl_get_tunsrc,
+               .policy = seg6_genl_policy,
+               .flags  = GENL_ADMIN_PERM,
+       },
+};
+
+static struct genl_family seg6_genl_family __ro_after_init = {
+       .hdrsize        = 0,
+       .name           = SEG6_GENL_NAME,
+       .version        = SEG6_GENL_VERSION,
+       .maxattr        = SEG6_ATTR_MAX,
+       .netnsok        = true,
+       .parallel_ops   = true,
+       .ops            = seg6_genl_ops,
+       .n_ops          = ARRAY_SIZE(seg6_genl_ops),
+       .module         = THIS_MODULE,
+};
+
+int __init seg6_init(void)
+{
+       int err = -ENOMEM;
+
+       err = genl_register_family(&seg6_genl_family);
+       if (err)
+               goto out;
+
+       err = register_pernet_subsys(&ip6_segments_ops);
+       if (err)
+               goto out_unregister_genl;
+
+#ifdef CONFIG_IPV6_SEG6_LWTUNNEL
+       err = seg6_iptunnel_init();
+       if (err)
+               goto out_unregister_pernet;
+#endif
+
+#ifdef CONFIG_IPV6_SEG6_HMAC
+       err = seg6_hmac_init();
+       if (err)
+               goto out_unregister_iptun;
+#endif
+
+       pr_info("Segment Routing with IPv6\n");
+
+out:
+       return err;
+#ifdef CONFIG_IPV6_SEG6_HMAC
+out_unregister_iptun:
+#ifdef CONFIG_IPV6_SEG6_LWTUNNEL
+       seg6_iptunnel_exit();
+#endif
+#endif
+#ifdef CONFIG_IPV6_SEG6_LWTUNNEL
+out_unregister_pernet:
+       unregister_pernet_subsys(&ip6_segments_ops);
+#endif
+out_unregister_genl:
+       genl_unregister_family(&seg6_genl_family);
+       goto out;
+}
+
+void seg6_exit(void)
+{
+#ifdef CONFIG_IPV6_SEG6_HMAC
+       seg6_hmac_exit();
+#endif
+#ifdef CONFIG_IPV6_SEG6_LWTUNNEL
+       seg6_iptunnel_exit();
+#endif
+       unregister_pernet_subsys(&ip6_segments_ops);
+       genl_unregister_family(&seg6_genl_family);
+}
diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c
new file mode 100644 (file)
index 0000000..ef1c8a4
--- /dev/null
@@ -0,0 +1,484 @@
+/*
+ *  SR-IPv6 implementation -- HMAC functions
+ *
+ *  Author:
+ *  David Lebrun <david.lebrun@uclouvain.be>
+ *
+ *
+ *  This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <linux/netdevice.h>
+#include <linux/in6.h>
+#include <linux/icmpv6.h>
+#include <linux/mroute6.h>
+#include <linux/slab.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6.h>
+
+#include <net/sock.h>
+#include <net/snmp.h>
+
+#include <net/ipv6.h>
+#include <net/protocol.h>
+#include <net/transp_v6.h>
+#include <net/rawv6.h>
+#include <net/ndisc.h>
+#include <net/ip6_route.h>
+#include <net/addrconf.h>
+#include <net/xfrm.h>
+
+#include <linux/cryptohash.h>
+#include <crypto/hash.h>
+#include <crypto/sha.h>
+#include <net/seg6.h>
+#include <net/genetlink.h>
+#include <net/seg6_hmac.h>
+#include <linux/random.h>
+
+static char * __percpu *hmac_ring;
+
+static int seg6_hmac_cmpfn(struct rhashtable_compare_arg *arg, const void *obj)
+{
+       const struct seg6_hmac_info *hinfo = obj;
+
+       return (hinfo->hmackeyid != *(__u32 *)arg->key);
+}
+
+static inline void seg6_hinfo_release(struct seg6_hmac_info *hinfo)
+{
+       kfree_rcu(hinfo, rcu);
+}
+
+static void seg6_free_hi(void *ptr, void *arg)
+{
+       struct seg6_hmac_info *hinfo = (struct seg6_hmac_info *)ptr;
+
+       if (hinfo)
+               seg6_hinfo_release(hinfo);
+}
+
+static const struct rhashtable_params rht_params = {
+       .head_offset            = offsetof(struct seg6_hmac_info, node),
+       .key_offset             = offsetof(struct seg6_hmac_info, hmackeyid),
+       .key_len                = sizeof(u32),
+       .automatic_shrinking    = true,
+       .obj_cmpfn              = seg6_hmac_cmpfn,
+};
+
+static struct seg6_hmac_algo hmac_algos[] = {
+       {
+               .alg_id = SEG6_HMAC_ALGO_SHA1,
+               .name = "hmac(sha1)",
+       },
+       {
+               .alg_id = SEG6_HMAC_ALGO_SHA256,
+               .name = "hmac(sha256)",
+       },
+};
+
+static struct sr6_tlv_hmac *seg6_get_tlv_hmac(struct ipv6_sr_hdr *srh)
+{
+       struct sr6_tlv_hmac *tlv;
+
+       if (srh->hdrlen < (srh->first_segment + 1) * 2 + 5)
+               return NULL;
+
+       if (!sr_has_hmac(srh))
+               return NULL;
+
+       tlv = (struct sr6_tlv_hmac *)
+             ((char *)srh + ((srh->hdrlen + 1) << 3) - 40);
+
+       if (tlv->tlvhdr.type != SR6_TLV_HMAC || tlv->tlvhdr.len != 38)
+               return NULL;
+
+       return tlv;
+}
+
+static struct seg6_hmac_algo *__hmac_get_algo(u8 alg_id)
+{
+       struct seg6_hmac_algo *algo;
+       int i, alg_count;
+
+       alg_count = sizeof(hmac_algos) / sizeof(struct seg6_hmac_algo);
+       for (i = 0; i < alg_count; i++) {
+               algo = &hmac_algos[i];
+               if (algo->alg_id == alg_id)
+                       return algo;
+       }
+
+       return NULL;
+}
+
+static int __do_hmac(struct seg6_hmac_info *hinfo, const char *text, u8 psize,
+                    u8 *output, int outlen)
+{
+       struct seg6_hmac_algo *algo;
+       struct crypto_shash *tfm;
+       struct shash_desc *shash;
+       int ret, dgsize;
+
+       algo = __hmac_get_algo(hinfo->alg_id);
+       if (!algo)
+               return -ENOENT;
+
+       tfm = *this_cpu_ptr(algo->tfms);
+
+       dgsize = crypto_shash_digestsize(tfm);
+       if (dgsize > outlen) {
+               pr_debug("sr-ipv6: __do_hmac: digest size too big (%d / %d)\n",
+                        dgsize, outlen);
+               return -ENOMEM;
+       }
+
+       ret = crypto_shash_setkey(tfm, hinfo->secret, hinfo->slen);
+       if (ret < 0) {
+               pr_debug("sr-ipv6: crypto_shash_setkey failed: err %d\n", ret);
+               goto failed;
+       }
+
+       shash = *this_cpu_ptr(algo->shashs);
+       shash->tfm = tfm;
+
+       ret = crypto_shash_digest(shash, text, psize, output);
+       if (ret < 0) {
+               pr_debug("sr-ipv6: crypto_shash_digest failed: err %d\n", ret);
+               goto failed;
+       }
+
+       return dgsize;
+
+failed:
+       return ret;
+}
+
+int seg6_hmac_compute(struct seg6_hmac_info *hinfo, struct ipv6_sr_hdr *hdr,
+                     struct in6_addr *saddr, u8 *output)
+{
+       __be32 hmackeyid = cpu_to_be32(hinfo->hmackeyid);
+       u8 tmp_out[SEG6_HMAC_MAX_DIGESTSIZE];
+       int plen, i, dgsize, wrsize;
+       char *ring, *off;
+
+       /* a 160-byte buffer for digest output allows to store highest known
+        * hash function (RadioGatun) with up to 1216 bits
+        */
+
+       /* saddr(16) + first_seg(1) + cleanup(1) + keyid(4) + seglist(16n) */
+       plen = 16 + 1 + 1 + 4 + (hdr->first_segment + 1) * 16;
+
+       /* this limit allows for 14 segments */
+       if (plen >= SEG6_HMAC_RING_SIZE)
+               return -EMSGSIZE;
+
+       /* Let's build the HMAC text on the ring buffer. The text is composed
+        * as follows, in order:
+        *
+        * 1. Source IPv6 address (128 bits)
+        * 2. first_segment value (8 bits)
+        * 3. cleanup flag (8 bits: highest bit is cleanup value, others are 0)
+        * 4. HMAC Key ID (32 bits)
+        * 5. All segments in the segments list (n * 128 bits)
+        */
+
+       local_bh_disable();
+       ring = *this_cpu_ptr(hmac_ring);
+       off = ring;
+
+       /* source address */
+       memcpy(off, saddr, 16);
+       off += 16;
+
+       /* first_segment value */
+       *off++ = hdr->first_segment;
+
+       /* cleanup flag */
+       *off++ = !!(sr_has_cleanup(hdr)) << 7;
+
+       /* HMAC Key ID */
+       memcpy(off, &hmackeyid, 4);
+       off += 4;
+
+       /* all segments in the list */
+       for (i = 0; i < hdr->first_segment + 1; i++) {
+               memcpy(off, hdr->segments + i, 16);
+               off += 16;
+       }
+
+       dgsize = __do_hmac(hinfo, ring, plen, tmp_out,
+                          SEG6_HMAC_MAX_DIGESTSIZE);
+       local_bh_enable();
+
+       if (dgsize < 0)
+               return dgsize;
+
+       wrsize = SEG6_HMAC_FIELD_LEN;
+       if (wrsize > dgsize)
+               wrsize = dgsize;
+
+       memset(output, 0, SEG6_HMAC_FIELD_LEN);
+       memcpy(output, tmp_out, wrsize);
+
+       return 0;
+}
+EXPORT_SYMBOL(seg6_hmac_compute);
+
+/* checks if an incoming SR-enabled packet's HMAC status matches
+ * the incoming policy.
+ *
+ * called with rcu_read_lock()
+ */
+bool seg6_hmac_validate_skb(struct sk_buff *skb)
+{
+       u8 hmac_output[SEG6_HMAC_FIELD_LEN];
+       struct net *net = dev_net(skb->dev);
+       struct seg6_hmac_info *hinfo;
+       struct sr6_tlv_hmac *tlv;
+       struct ipv6_sr_hdr *srh;
+       struct inet6_dev *idev;
+
+       idev = __in6_dev_get(skb->dev);
+
+       srh = (struct ipv6_sr_hdr *)skb_transport_header(skb);
+
+       tlv = seg6_get_tlv_hmac(srh);
+
+       /* mandatory check but no tlv */
+       if (idev->cnf.seg6_require_hmac > 0 && !tlv)
+               return false;
+
+       /* no check */
+       if (idev->cnf.seg6_require_hmac < 0)
+               return true;
+
+       /* check only if present */
+       if (idev->cnf.seg6_require_hmac == 0 && !tlv)
+               return true;
+
+       /* now, seg6_require_hmac >= 0 && tlv */
+
+       hinfo = seg6_hmac_info_lookup(net, be32_to_cpu(tlv->hmackeyid));
+       if (!hinfo)
+               return false;
+
+       if (seg6_hmac_compute(hinfo, srh, &ipv6_hdr(skb)->saddr, hmac_output))
+               return false;
+
+       if (memcmp(hmac_output, tlv->hmac, SEG6_HMAC_FIELD_LEN) != 0)
+               return false;
+
+       return true;
+}
+EXPORT_SYMBOL(seg6_hmac_validate_skb);
+
+/* called with rcu_read_lock() */
+struct seg6_hmac_info *seg6_hmac_info_lookup(struct net *net, u32 key)
+{
+       struct seg6_pernet_data *sdata = seg6_pernet(net);
+       struct seg6_hmac_info *hinfo;
+
+       hinfo = rhashtable_lookup_fast(&sdata->hmac_infos, &key, rht_params);
+
+       return hinfo;
+}
+EXPORT_SYMBOL(seg6_hmac_info_lookup);
+
+int seg6_hmac_info_add(struct net *net, u32 key, struct seg6_hmac_info *hinfo)
+{
+       struct seg6_pernet_data *sdata = seg6_pernet(net);
+       int err;
+
+       err = rhashtable_lookup_insert_fast(&sdata->hmac_infos, &hinfo->node,
+                                           rht_params);
+
+       return err;
+}
+EXPORT_SYMBOL(seg6_hmac_info_add);
+
+int seg6_hmac_info_del(struct net *net, u32 key)
+{
+       struct seg6_pernet_data *sdata = seg6_pernet(net);
+       struct seg6_hmac_info *hinfo;
+       int err = -ENOENT;
+
+       hinfo = rhashtable_lookup_fast(&sdata->hmac_infos, &key, rht_params);
+       if (!hinfo)
+               goto out;
+
+       err = rhashtable_remove_fast(&sdata->hmac_infos, &hinfo->node,
+                                    rht_params);
+       if (err)
+               goto out;
+
+       seg6_hinfo_release(hinfo);
+
+out:
+       return err;
+}
+EXPORT_SYMBOL(seg6_hmac_info_del);
+
+int seg6_push_hmac(struct net *net, struct in6_addr *saddr,
+                  struct ipv6_sr_hdr *srh)
+{
+       struct seg6_hmac_info *hinfo;
+       struct sr6_tlv_hmac *tlv;
+       int err = -ENOENT;
+
+       tlv = seg6_get_tlv_hmac(srh);
+       if (!tlv)
+               return -EINVAL;
+
+       rcu_read_lock();
+
+       hinfo = seg6_hmac_info_lookup(net, be32_to_cpu(tlv->hmackeyid));
+       if (!hinfo)
+               goto out;
+
+       memset(tlv->hmac, 0, SEG6_HMAC_FIELD_LEN);
+       err = seg6_hmac_compute(hinfo, srh, saddr, tlv->hmac);
+
+out:
+       rcu_read_unlock();
+       return err;
+}
+EXPORT_SYMBOL(seg6_push_hmac);
+
+static int seg6_hmac_init_ring(void)
+{
+       int i;
+
+       hmac_ring = alloc_percpu(char *);
+
+       if (!hmac_ring)
+               return -ENOMEM;
+
+       for_each_possible_cpu(i) {
+               char *ring = kzalloc(SEG6_HMAC_RING_SIZE, GFP_KERNEL);
+
+               if (!ring)
+                       return -ENOMEM;
+
+               *per_cpu_ptr(hmac_ring, i) = ring;
+       }
+
+       return 0;
+}
+
+static int seg6_hmac_init_algo(void)
+{
+       struct seg6_hmac_algo *algo;
+       struct crypto_shash *tfm;
+       struct shash_desc *shash;
+       int i, alg_count, cpu;
+
+       alg_count = sizeof(hmac_algos) / sizeof(struct seg6_hmac_algo);
+
+       for (i = 0; i < alg_count; i++) {
+               struct crypto_shash **p_tfm;
+               int shsize;
+
+               algo = &hmac_algos[i];
+               algo->tfms = alloc_percpu(struct crypto_shash *);
+               if (!algo->tfms)
+                       return -ENOMEM;
+
+               for_each_possible_cpu(cpu) {
+                       tfm = crypto_alloc_shash(algo->name, 0, GFP_KERNEL);
+                       if (IS_ERR(tfm))
+                               return PTR_ERR(tfm);
+                       p_tfm = per_cpu_ptr(algo->tfms, cpu);
+                       *p_tfm = tfm;
+               }
+
+               p_tfm = this_cpu_ptr(algo->tfms);
+               tfm = *p_tfm;
+
+               shsize = sizeof(*shash) + crypto_shash_descsize(tfm);
+
+               algo->shashs = alloc_percpu(struct shash_desc *);
+               if (!algo->shashs)
+                       return -ENOMEM;
+
+               for_each_possible_cpu(cpu) {
+                       shash = kzalloc(shsize, GFP_KERNEL);
+                       if (!shash)
+                               return -ENOMEM;
+                       *per_cpu_ptr(algo->shashs, cpu) = shash;
+               }
+       }
+
+       return 0;
+}
+
+int __init seg6_hmac_init(void)
+{
+       int ret;
+
+       ret = seg6_hmac_init_ring();
+       if (ret < 0)
+               goto out;
+
+       ret = seg6_hmac_init_algo();
+
+out:
+       return ret;
+}
+EXPORT_SYMBOL(seg6_hmac_init);
+
+int __net_init seg6_hmac_net_init(struct net *net)
+{
+       struct seg6_pernet_data *sdata = seg6_pernet(net);
+
+       rhashtable_init(&sdata->hmac_infos, &rht_params);
+
+       return 0;
+}
+EXPORT_SYMBOL(seg6_hmac_net_init);
+
+void seg6_hmac_exit(void)
+{
+       struct seg6_hmac_algo *algo = NULL;
+       int i, alg_count, cpu;
+
+       for_each_possible_cpu(i) {
+               char *ring = *per_cpu_ptr(hmac_ring, i);
+
+               kfree(ring);
+       }
+       free_percpu(hmac_ring);
+
+       alg_count = sizeof(hmac_algos) / sizeof(struct seg6_hmac_algo);
+       for (i = 0; i < alg_count; i++) {
+               algo = &hmac_algos[i];
+               for_each_possible_cpu(cpu) {
+                       struct crypto_shash *tfm;
+                       struct shash_desc *shash;
+
+                       shash = *per_cpu_ptr(algo->shashs, cpu);
+                       kfree(shash);
+                       tfm = *per_cpu_ptr(algo->tfms, cpu);
+                       crypto_free_shash(tfm);
+               }
+               free_percpu(algo->tfms);
+               free_percpu(algo->shashs);
+       }
+}
+EXPORT_SYMBOL(seg6_hmac_exit);
+
+void __net_exit seg6_hmac_net_exit(struct net *net)
+{
+       struct seg6_pernet_data *sdata = seg6_pernet(net);
+
+       rhashtable_free_and_destroy(&sdata->hmac_infos, seg6_free_hi, NULL);
+}
+EXPORT_SYMBOL(seg6_hmac_net_exit);
diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
new file mode 100644 (file)
index 0000000..bbfca22
--- /dev/null
@@ -0,0 +1,431 @@
+/*
+ *  SR-IPv6 implementation
+ *
+ *  Author:
+ *  David Lebrun <david.lebrun@uclouvain.be>
+ *
+ *
+ *  This program is free software; you can redistribute it and/or
+ *        modify it under the terms of the GNU General Public License
+ *        as published by the Free Software Foundation; either version
+ *        2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/types.h>
+#include <linux/skbuff.h>
+#include <linux/net.h>
+#include <linux/module.h>
+#include <net/ip.h>
+#include <net/lwtunnel.h>
+#include <net/netevent.h>
+#include <net/netns/generic.h>
+#include <net/ip6_fib.h>
+#include <net/route.h>
+#include <net/seg6.h>
+#include <linux/seg6.h>
+#include <linux/seg6_iptunnel.h>
+#include <net/addrconf.h>
+#include <net/ip6_route.h>
+#ifdef CONFIG_DST_CACHE
+#include <net/dst_cache.h>
+#endif
+#ifdef CONFIG_IPV6_SEG6_HMAC
+#include <net/seg6_hmac.h>
+#endif
+
+struct seg6_lwt {
+#ifdef CONFIG_DST_CACHE
+       struct dst_cache cache;
+#endif
+       struct seg6_iptunnel_encap tuninfo[0];
+};
+
+static inline struct seg6_lwt *seg6_lwt_lwtunnel(struct lwtunnel_state *lwt)
+{
+       return (struct seg6_lwt *)lwt->data;
+}
+
+static inline struct seg6_iptunnel_encap *
+seg6_encap_lwtunnel(struct lwtunnel_state *lwt)
+{
+       return seg6_lwt_lwtunnel(lwt)->tuninfo;
+}
+
+static const struct nla_policy seg6_iptunnel_policy[SEG6_IPTUNNEL_MAX + 1] = {
+       [SEG6_IPTUNNEL_SRH]     = { .type = NLA_BINARY },
+};
+
+int nla_put_srh(struct sk_buff *skb, int attrtype,
+               struct seg6_iptunnel_encap *tuninfo)
+{
+       struct seg6_iptunnel_encap *data;
+       struct nlattr *nla;
+       int len;
+
+       len = SEG6_IPTUN_ENCAP_SIZE(tuninfo);
+
+       nla = nla_reserve(skb, attrtype, len);
+       if (!nla)
+               return -EMSGSIZE;
+
+       data = nla_data(nla);
+       memcpy(data, tuninfo, len);
+
+       return 0;
+}
+
+static void set_tun_src(struct net *net, struct net_device *dev,
+                       struct in6_addr *daddr, struct in6_addr *saddr)
+{
+       struct seg6_pernet_data *sdata = seg6_pernet(net);
+       struct in6_addr *tun_src;
+
+       rcu_read_lock();
+
+       tun_src = rcu_dereference(sdata->tun_src);
+
+       if (!ipv6_addr_any(tun_src)) {
+               memcpy(saddr, tun_src, sizeof(struct in6_addr));
+       } else {
+               ipv6_dev_get_saddr(net, dev, daddr, IPV6_PREFER_SRC_PUBLIC,
+                                  saddr);
+       }
+
+       rcu_read_unlock();
+}
+
+/* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */
+static int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
+{
+       struct net *net = dev_net(skb_dst(skb)->dev);
+       struct ipv6hdr *hdr, *inner_hdr;
+       struct ipv6_sr_hdr *isrh;
+       int hdrlen, tot_len, err;
+
+       hdrlen = (osrh->hdrlen + 1) << 3;
+       tot_len = hdrlen + sizeof(*hdr);
+
+       err = pskb_expand_head(skb, tot_len, 0, GFP_ATOMIC);
+       if (unlikely(err))
+               return err;
+
+       inner_hdr = ipv6_hdr(skb);
+
+       skb_push(skb, tot_len);
+       skb_reset_network_header(skb);
+       skb_mac_header_rebuild(skb);
+       hdr = ipv6_hdr(skb);
+
+       /* inherit tc, flowlabel and hlim
+        * hlim will be decremented in ip6_forward() afterwards and
+        * decapsulation will overwrite inner hlim with outer hlim
+        */
+       ip6_flow_hdr(hdr, ip6_tclass(ip6_flowinfo(inner_hdr)),
+                    ip6_flowlabel(inner_hdr));
+       hdr->hop_limit = inner_hdr->hop_limit;
+       hdr->nexthdr = NEXTHDR_ROUTING;
+
+       isrh = (void *)hdr + sizeof(*hdr);
+       memcpy(isrh, osrh, hdrlen);
+
+       isrh->nexthdr = NEXTHDR_IPV6;
+
+       hdr->daddr = isrh->segments[isrh->first_segment];
+       set_tun_src(net, skb->dev, &hdr->daddr, &hdr->saddr);
+
+#ifdef CONFIG_IPV6_SEG6_HMAC
+       if (sr_has_hmac(isrh)) {
+               err = seg6_push_hmac(net, &hdr->saddr, isrh);
+               if (unlikely(err))
+                       return err;
+       }
+#endif
+
+       skb_postpush_rcsum(skb, hdr, tot_len);
+
+       return 0;
+}
+
+/* insert an SRH within an IPv6 packet, just after the IPv6 header */
+#ifdef CONFIG_IPV6_SEG6_INLINE
+static int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
+{
+       struct ipv6hdr *hdr, *oldhdr;
+       struct ipv6_sr_hdr *isrh;
+       int hdrlen, err;
+
+       hdrlen = (osrh->hdrlen + 1) << 3;
+
+       err = pskb_expand_head(skb, hdrlen, 0, GFP_ATOMIC);
+       if (unlikely(err))
+               return err;
+
+       oldhdr = ipv6_hdr(skb);
+
+       skb_pull(skb, sizeof(struct ipv6hdr));
+       skb_postpull_rcsum(skb, skb_network_header(skb),
+                          sizeof(struct ipv6hdr));
+
+       skb_push(skb, sizeof(struct ipv6hdr) + hdrlen);
+       skb_reset_network_header(skb);
+       skb_mac_header_rebuild(skb);
+
+       hdr = ipv6_hdr(skb);
+
+       memmove(hdr, oldhdr, sizeof(*hdr));
+
+       isrh = (void *)hdr + sizeof(*hdr);
+       memcpy(isrh, osrh, hdrlen);
+
+       isrh->nexthdr = hdr->nexthdr;
+       hdr->nexthdr = NEXTHDR_ROUTING;
+
+       isrh->segments[0] = hdr->daddr;
+       hdr->daddr = isrh->segments[isrh->first_segment];
+
+#ifdef CONFIG_IPV6_SEG6_HMAC
+       if (sr_has_hmac(isrh)) {
+               struct net *net = dev_net(skb_dst(skb)->dev);
+
+               err = seg6_push_hmac(net, &hdr->saddr, isrh);
+               if (unlikely(err))
+                       return err;
+       }
+#endif
+
+       skb_postpush_rcsum(skb, hdr, sizeof(struct ipv6hdr) + hdrlen);
+
+       return 0;
+}
+#endif
+
+static int seg6_do_srh(struct sk_buff *skb)
+{
+       struct dst_entry *dst = skb_dst(skb);
+       struct seg6_iptunnel_encap *tinfo;
+       int err = 0;
+
+       tinfo = seg6_encap_lwtunnel(dst->lwtstate);
+
+       if (likely(!skb->encapsulation)) {
+               skb_reset_inner_headers(skb);
+               skb->encapsulation = 1;
+       }
+
+       switch (tinfo->mode) {
+#ifdef CONFIG_IPV6_SEG6_INLINE
+       case SEG6_IPTUN_MODE_INLINE:
+               err = seg6_do_srh_inline(skb, tinfo->srh);
+               skb_reset_inner_headers(skb);
+               break;
+#endif
+       case SEG6_IPTUN_MODE_ENCAP:
+               err = seg6_do_srh_encap(skb, tinfo->srh);
+               break;
+       }
+
+       if (err)
+               return err;
+
+       ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
+       skb_set_transport_header(skb, sizeof(struct ipv6hdr));
+
+       skb_set_inner_protocol(skb, skb->protocol);
+
+       return 0;
+}
+
+int seg6_input(struct sk_buff *skb)
+{
+       int err;
+
+       err = seg6_do_srh(skb);
+       if (unlikely(err)) {
+               kfree_skb(skb);
+               return err;
+       }
+
+       skb_dst_drop(skb);
+       ip6_route_input(skb);
+
+       return dst_input(skb);
+}
+
+int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
+{
+       struct dst_entry *orig_dst = skb_dst(skb);
+       struct dst_entry *dst = NULL;
+       struct seg6_lwt *slwt;
+       int err = -EINVAL;
+
+       err = seg6_do_srh(skb);
+       if (unlikely(err))
+               goto drop;
+
+       slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate);
+
+#ifdef CONFIG_DST_CACHE
+       dst = dst_cache_get(&slwt->cache);
+#endif
+
+       if (unlikely(!dst)) {
+               struct ipv6hdr *hdr = ipv6_hdr(skb);
+               struct flowi6 fl6;
+
+               fl6.daddr = hdr->daddr;
+               fl6.saddr = hdr->saddr;
+               fl6.flowlabel = ip6_flowinfo(hdr);
+               fl6.flowi6_mark = skb->mark;
+               fl6.flowi6_proto = hdr->nexthdr;
+
+               dst = ip6_route_output(net, NULL, &fl6);
+               if (dst->error) {
+                       err = dst->error;
+                       dst_release(dst);
+                       goto drop;
+               }
+
+#ifdef CONFIG_DST_CACHE
+               dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr);
+#endif
+       }
+
+       skb_dst_drop(skb);
+       skb_dst_set(skb, dst);
+
+       return dst_output(net, sk, skb);
+drop:
+       kfree_skb(skb);
+       return err;
+}
+
+static int seg6_build_state(struct net_device *dev, struct nlattr *nla,
+                           unsigned int family, const void *cfg,
+                           struct lwtunnel_state **ts)
+{
+       struct nlattr *tb[SEG6_IPTUNNEL_MAX + 1];
+       struct seg6_iptunnel_encap *tuninfo;
+       struct lwtunnel_state *newts;
+       int tuninfo_len, min_size;
+       struct seg6_lwt *slwt;
+       int err;
+
+       err = nla_parse_nested(tb, SEG6_IPTUNNEL_MAX, nla,
+                              seg6_iptunnel_policy);
+
+       if (err < 0)
+               return err;
+
+       if (!tb[SEG6_IPTUNNEL_SRH])
+               return -EINVAL;
+
+       tuninfo = nla_data(tb[SEG6_IPTUNNEL_SRH]);
+       tuninfo_len = nla_len(tb[SEG6_IPTUNNEL_SRH]);
+
+       /* tuninfo must contain at least the iptunnel encap structure,
+        * the SRH and one segment
+        */
+       min_size = sizeof(*tuninfo) + sizeof(struct ipv6_sr_hdr) +
+                  sizeof(struct in6_addr);
+       if (tuninfo_len < min_size)
+               return -EINVAL;
+
+       switch (tuninfo->mode) {
+#ifdef CONFIG_IPV6_SEG6_INLINE
+       case SEG6_IPTUN_MODE_INLINE:
+               break;
+#endif
+       case SEG6_IPTUN_MODE_ENCAP:
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       /* verify that SRH is consistent */
+       if (!seg6_validate_srh(tuninfo->srh, tuninfo_len - sizeof(*tuninfo)))
+               return -EINVAL;
+
+       newts = lwtunnel_state_alloc(tuninfo_len + sizeof(*slwt));
+       if (!newts)
+               return -ENOMEM;
+
+       slwt = seg6_lwt_lwtunnel(newts);
+
+#ifdef CONFIG_DST_CACHE
+       err = dst_cache_init(&slwt->cache, GFP_KERNEL);
+       if (err) {
+               kfree(newts);
+               return err;
+       }
+#endif
+
+       memcpy(&slwt->tuninfo, tuninfo, tuninfo_len);
+
+       newts->type = LWTUNNEL_ENCAP_SEG6;
+       newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT |
+                       LWTUNNEL_STATE_INPUT_REDIRECT;
+       newts->headroom = seg6_lwt_headroom(tuninfo);
+
+       *ts = newts;
+
+       return 0;
+}
+
+#ifdef CONFIG_DST_CACHE
+static void seg6_destroy_state(struct lwtunnel_state *lwt)
+{
+       dst_cache_destroy(&seg6_lwt_lwtunnel(lwt)->cache);
+}
+#endif
+
+static int seg6_fill_encap_info(struct sk_buff *skb,
+                               struct lwtunnel_state *lwtstate)
+{
+       struct seg6_iptunnel_encap *tuninfo = seg6_encap_lwtunnel(lwtstate);
+
+       if (nla_put_srh(skb, SEG6_IPTUNNEL_SRH, tuninfo))
+               return -EMSGSIZE;
+
+       return 0;
+}
+
+static int seg6_encap_nlsize(struct lwtunnel_state *lwtstate)
+{
+       struct seg6_iptunnel_encap *tuninfo = seg6_encap_lwtunnel(lwtstate);
+
+       return nla_total_size(SEG6_IPTUN_ENCAP_SIZE(tuninfo));
+}
+
+static int seg6_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
+{
+       struct seg6_iptunnel_encap *a_hdr = seg6_encap_lwtunnel(a);
+       struct seg6_iptunnel_encap *b_hdr = seg6_encap_lwtunnel(b);
+       int len = SEG6_IPTUN_ENCAP_SIZE(a_hdr);
+
+       if (len != SEG6_IPTUN_ENCAP_SIZE(b_hdr))
+               return 1;
+
+       return memcmp(a_hdr, b_hdr, len);
+}
+
+static const struct lwtunnel_encap_ops seg6_iptun_ops = {
+       .build_state = seg6_build_state,
+#ifdef CONFIG_DST_CACHE
+       .destroy_state = seg6_destroy_state,
+#endif
+       .output = seg6_output,
+       .input = seg6_input,
+       .fill_encap = seg6_fill_encap_info,
+       .get_encap_size = seg6_encap_nlsize,
+       .cmp_encap = seg6_encap_cmp,
+};
+
+int __init seg6_iptunnel_init(void)
+{
+       return lwtunnel_encap_add_ops(&seg6_iptun_ops, LWTUNNEL_ENCAP_SEG6);
+}
+
+void seg6_iptunnel_exit(void)
+{
+       lwtunnel_encap_del_ops(&seg6_iptun_ops, LWTUNNEL_ENCAP_SEG6);
+}
index dc7a3449ffc1c1eceaf2c2e6e6acff4423d6d1d5..0355231162b85e58c2ccf01af598c1e94cf629c8 100644 (file)
@@ -76,7 +76,7 @@ static bool check_6rd(struct ip_tunnel *tunnel, const struct in6_addr *v6dst,
                      __be32 *v4dst);
 static struct rtnl_link_ops sit_link_ops __read_mostly;
 
-static int sit_net_id __read_mostly;
+static unsigned int sit_net_id __read_mostly;
 struct sit_net {
        struct ip_tunnel __rcu *tunnels_r_l[IP6_SIT_HASH_SIZE];
        struct ip_tunnel __rcu *tunnels_r[IP6_SIT_HASH_SIZE];
index 59c483937aec1c626687c658efe10c7d25f47c10..a4d49760bf434e0800fb92cf10cdd6e6ce22f5e5 100644 (file)
@@ -209,6 +209,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
        treq->snt_synack.v64    = 0;
        treq->rcv_isn = ntohl(th->seq) - 1;
        treq->snt_isn = cookie;
+       treq->ts_off = 0;
 
        /*
         * We need to lookup the dst_entry to get the correct window size.
@@ -227,6 +228,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
                fl6.flowi6_mark = ireq->ir_mark;
                fl6.fl6_dport = ireq->ir_rmt_port;
                fl6.fl6_sport = inet_sk(sk)->inet_sport;
+               fl6.flowi6_uid = sk->sk_uid;
                security_req_classify_flow(req, flowi6_to_flowi(&fl6));
 
                dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
index 5a27ab4eab3974280aad827241944b53daab569e..a2185a214abcdc16942f3154a7e5d9daeec93e7b 100644 (file)
@@ -101,12 +101,12 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
        }
 }
 
-static __u32 tcp_v6_init_sequence(const struct sk_buff *skb)
+static u32 tcp_v6_init_sequence(const struct sk_buff *skb, u32 *tsoff)
 {
        return secure_tcpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32,
                                            ipv6_hdr(skb)->saddr.s6_addr32,
                                            tcp_hdr(skb)->dest,
-                                           tcp_hdr(skb)->source);
+                                           tcp_hdr(skb)->source, tsoff);
 }
 
 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
@@ -233,6 +233,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
        fl6.flowi6_mark = sk->sk_mark;
        fl6.fl6_dport = usin->sin6_port;
        fl6.fl6_sport = inet->inet_sport;
+       fl6.flowi6_uid = sk->sk_uid;
 
        opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
        final_p = fl6_update_dst(&fl6, opt, &final);
@@ -282,7 +283,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
                tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
                                                             sk->sk_v6_daddr.s6_addr32,
                                                             inet->inet_sport,
-                                                            inet->inet_dport);
+                                                            inet->inet_dport,
+                                                            &tp->tsoffset);
 
        err = tcp_connect(sk);
        if (err)
@@ -818,12 +820,17 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
        fl6.flowi6_proto = IPPROTO_TCP;
        if (rt6_need_strict(&fl6.daddr) && !oif)
                fl6.flowi6_oif = tcp_v6_iif(skb);
-       else
-               fl6.flowi6_oif = oif ? : skb->skb_iif;
+       else {
+               if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
+                       oif = skb->skb_iif;
+
+               fl6.flowi6_oif = oif;
+       }
 
        fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark);
        fl6.fl6_dport = t1->dest;
        fl6.fl6_sport = t1->source;
+       fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
        security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
 
        /* Pass a socket to ip6_dst_lookup either it is for RST
@@ -950,7 +957,8 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
                        tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
                        tcp_rsk(req)->rcv_nxt,
                        req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
-                       tcp_time_stamp, req->ts_recent, sk->sk_bound_dev_if,
+                       tcp_time_stamp + tcp_rsk(req)->ts_off,
+                       req->ts_recent, sk->sk_bound_dev_if,
                        tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr),
                        0, 0);
 }
@@ -1225,7 +1233,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
        if (skb->protocol == htons(ETH_P_IP))
                return tcp_v4_do_rcv(sk, skb);
 
-       if (sk_filter(sk, skb))
+       if (tcp_filter(sk, skb))
                goto discard;
 
        /*
@@ -1453,8 +1461,10 @@ process:
        if (tcp_v6_inbound_md5_hash(sk, skb))
                goto discard_and_relse;
 
-       if (sk_filter(sk, skb))
+       if (tcp_filter(sk, skb))
                goto discard_and_relse;
+       th = (const struct tcphdr *)skb->data;
+       hdr = ipv6_hdr(skb);
 
        skb->dev = NULL;
 
index 71963b23d5a543357a93cbdd2bf9861cd6408632..649efc26a2527db2e7b6a814a56523dffcb9c0d9 100644 (file)
@@ -302,7 +302,8 @@ EXPORT_SYMBOL_GPL(udp6_lib_lookup_skb);
  * Does increment socket refcount.
  */
 #if IS_ENABLED(CONFIG_NETFILTER_XT_MATCH_SOCKET) || \
-    IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TPROXY)
+    IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TPROXY) || \
+    IS_ENABLED(CONFIG_NF_SOCKET_IPV6)
 struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport,
                             const struct in6_addr *daddr, __be16 dport, int dif)
 {
@@ -343,8 +344,7 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 
 try_again:
        peeking = off = sk_peek_offset(sk, flags);
-       skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
-                                 &peeked, &off, &err);
+       skb = __skb_recv_udp(sk, flags, noblock, &peeked, &off, &err);
        if (!skb)
                return err;
 
@@ -363,7 +363,8 @@ try_again:
         * coverage checksum (UDP-Lite), do it before the copy.
         */
 
-       if (copied < ulen || UDP_SKB_CB(skb)->partial_cov || peeking) {
+       if (copied < ulen || peeking ||
+           (is_udplite && UDP_SKB_CB(skb)->partial_cov)) {
                checksum_valid = !udp_lib_checksum_complete(skb);
                if (!checksum_valid)
                        goto csum_copy_err;
@@ -425,7 +426,8 @@ try_again:
 
        if (is_udp4) {
                if (inet->cmsg_flags)
-                       ip_cmsg_recv(msg, skb);
+                       ip_cmsg_recv_offset(msg, sk, skb,
+                                           sizeof(struct udphdr), off);
        } else {
                if (np->rxopt.all)
                        ip6_datagram_recv_specific_ctl(sk, msg, skb);
@@ -510,7 +512,7 @@ out:
        return;
 }
 
-static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 {
        int rc;
 
@@ -518,6 +520,8 @@ static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
                sock_rps_save_rxhash(sk, skb);
                sk_mark_napi_id(sk, skb);
                sk_incoming_cpu_update(sk);
+       } else {
+               sk_mark_napi_id_once(sk, skb);
        }
 
        rc = __udp_enqueue_schedule_skb(sk, skb);
@@ -687,10 +691,10 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
 
        if (use_hash2) {
                hash2_any = udp6_portaddr_hash(net, &in6addr_any, hnum) &
-                           udp_table.mask;
-               hash2 = udp6_portaddr_hash(net, daddr, hnum) & udp_table.mask;
+                           udptable->mask;
+               hash2 = udp6_portaddr_hash(net, daddr, hnum) & udptable->mask;
 start_lookup:
-               hslot = &udp_table.hash2[hash2];
+               hslot = &udptable->hash2[hash2];
                offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node);
        }
 
@@ -1137,6 +1141,7 @@ do_udp_sendmsg:
                fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex;
 
        fl6.flowi6_mark = sk->sk_mark;
+       fl6.flowi6_uid = sk->sk_uid;
        sockc.tsflags = sk->sk_tsflags;
 
        if (msg->msg_controllen) {
index f6eb1ab34f4bc50e321e74f129feee4118f03807..e78bdc76dcc33ceda888fb323a530f774057edbb 100644 (file)
@@ -26,7 +26,7 @@ int compat_udpv6_getsockopt(struct sock *sk, int level, int optname,
 int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len);
 int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
                  int flags, int *addr_len);
-int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
+int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
 void udpv6_destroy_sock(struct sock *sk);
 
 #ifdef CONFIG_PROC_FS
index 47d0d2b87106558fece3496479198005c55b99e7..2784cc363f2b533df16effcec35ffcdc2b528885 100644 (file)
@@ -45,10 +45,11 @@ struct proto udplitev6_prot = {
        .getsockopt        = udpv6_getsockopt,
        .sendmsg           = udpv6_sendmsg,
        .recvmsg           = udpv6_recvmsg,
-       .backlog_rcv       = udpv6_queue_rcv_skb,
        .hash              = udp_lib_hash,
        .unhash            = udp_lib_unhash,
        .get_port          = udp_v6_get_port,
+       .memory_allocated  = &udp_memory_allocated,
+       .sysctl_mem        = sysctl_udp_mem,
        .obj_size          = sizeof(struct udp6_sock),
        .h.udp_table       = &udplite_table,
 #ifdef CONFIG_COMPAT
index e1c0bbe7996cf8ca00374db26488bd85e02a36ad..d7b731a78d09f3ef7941911c3e35e29f3e8ff2d8 100644 (file)
@@ -44,7 +44,7 @@ struct xfrm6_tunnel_net {
        u32 spi;
 };
 
-static int xfrm6_tunnel_net_id __read_mostly;
+static unsigned int xfrm6_tunnel_net_id __read_mostly;
 static inline struct xfrm6_tunnel_net *xfrm6_tunnel_pernet(struct net *net)
 {
        return net_generic(net, xfrm6_tunnel_net_id);
index e15c40e86660cf204cc1bc734abde164fb3ef22e..7fc340e574cf3b1e5484cbcbabe3a1e33be582b4 100644 (file)
 
 
 
-static struct genl_family irda_nl_family = {
-       .id = GENL_ID_GENERATE,
-       .name = IRDA_NL_NAME,
-       .hdrsize = 0,
-       .version = IRDA_NL_VERSION,
-       .maxattr = IRDA_NL_CMD_MAX,
-};
+static struct genl_family irda_nl_family;
 
 static struct net_device * ifname_to_netdev(struct net *net, struct genl_info *info)
 {
@@ -147,9 +141,19 @@ static const struct genl_ops irda_nl_ops[] = {
 
 };
 
-int irda_nl_register(void)
+static struct genl_family irda_nl_family __ro_after_init = {
+       .name = IRDA_NL_NAME,
+       .hdrsize = 0,
+       .version = IRDA_NL_VERSION,
+       .maxattr = IRDA_NL_CMD_MAX,
+       .module = THIS_MODULE,
+       .ops = irda_nl_ops,
+       .n_ops = ARRAY_SIZE(irda_nl_ops),
+};
+
+int __init irda_nl_register(void)
 {
-       return genl_register_family_with_ops(&irda_nl_family, irda_nl_ops);
+       return genl_register_family(&irda_nl_family);
 }
 
 void irda_nl_unregister(void)
index f9c9ecb0cdd3b3eea618538fda2e884583f9bc09..c6252ed42c1de65dee149d7d869b62b96616e22a 100644 (file)
@@ -36,7 +36,7 @@
 #define _X2KEY(x) ((x) == XFRM_INF ? 0 : (x))
 #define _KEY2X(x) ((x) == 0 ? XFRM_INF : (x))
 
-static int pfkey_net_id __read_mostly;
+static unsigned int pfkey_net_id __read_mostly;
 struct netns_pfkey {
        /* List of all pfkey sockets. */
        struct hlist_head table;
index a2ed3bda4ddcd70071bc8ac87120e5591d9dd681..85948c69b23681ef9542e43faad8af235a309828 100644 (file)
@@ -715,7 +715,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
                        l2tp_info(session, L2TP_MSG_SEQ,
                                  "%s: requested to enable seq numbers by LNS\n",
                                  session->name);
-                       session->send_seq = -1;
+                       session->send_seq = 1;
                        l2tp_session_set_header_len(session, tunnel->version);
                }
        } else {
index 42de4ccd159f6f6853930afd44cea239e2011a54..982f6c44ea01f053a51afcbb4b271a2e77df2178 100644 (file)
@@ -251,8 +251,6 @@ static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
        int ret;
        int chk_addr_ret;
 
-       if (!sock_flag(sk, SOCK_ZAPPED))
-               return -EINVAL;
        if (addr_len < sizeof(struct sockaddr_l2tpip))
                return -EINVAL;
        if (addr->l2tp_family != AF_INET)
@@ -267,6 +265,9 @@ static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
        read_unlock_bh(&l2tp_ip_lock);
 
        lock_sock(sk);
+       if (!sock_flag(sk, SOCK_ZAPPED))
+               goto out;
+
        if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_l2tpip))
                goto out;
 
@@ -338,7 +339,7 @@ static int l2tp_ip_disconnect(struct sock *sk, int flags)
        if (sock_flag(sk, SOCK_ZAPPED))
                return 0;
 
-       return udp_disconnect(sk, flags);
+       return __udp_disconnect(sk, flags);
 }
 
 static int l2tp_ip_getname(struct socket *sock, struct sockaddr *uaddr,
index ea2ae6664cc8d643319016ea7a234dc034ec590e..667ec909846f445ad2f78876893d401d65503efa 100644 (file)
@@ -269,8 +269,6 @@ static int l2tp_ip6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
        int addr_type;
        int err;
 
-       if (!sock_flag(sk, SOCK_ZAPPED))
-               return -EINVAL;
        if (addr->l2tp_family != AF_INET6)
                return -EINVAL;
        if (addr_len < sizeof(*addr))
@@ -296,6 +294,9 @@ static int l2tp_ip6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
        lock_sock(sk);
 
        err = -EINVAL;
+       if (!sock_flag(sk, SOCK_ZAPPED))
+               goto out_unlock;
+
        if (sk->sk_state != TCP_CLOSE)
                goto out_unlock;
 
@@ -410,7 +411,7 @@ static int l2tp_ip6_disconnect(struct sock *sk, int flags)
        if (sock_flag(sk, SOCK_ZAPPED))
                return 0;
 
-       return udp_disconnect(sk, flags);
+       return __udp_disconnect(sk, flags);
 }
 
 static int l2tp_ip6_getname(struct socket *sock, struct sockaddr *uaddr,
@@ -519,6 +520,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
        memset(&fl6, 0, sizeof(fl6));
 
        fl6.flowi6_mark = sk->sk_mark;
+       fl6.flowi6_uid = sk->sk_uid;
 
        ipc6.hlimit = -1;
        ipc6.tclass = -1;
index bf3117771822af3f8cdfcbc956d5b8018050c732..3620fba317863dc59c93c1089faf63451e831aa5 100644 (file)
 #include "l2tp_core.h"
 
 
-static struct genl_family l2tp_nl_family = {
-       .id             = GENL_ID_GENERATE,
-       .name           = L2TP_GENL_NAME,
-       .version        = L2TP_GENL_VERSION,
-       .hdrsize        = 0,
-       .maxattr        = L2TP_ATTR_MAX,
-       .netnsok        = true,
-};
+static struct genl_family l2tp_nl_family;
 
 static const struct genl_multicast_group l2tp_multicast_group[] = {
        {
@@ -227,14 +220,14 @@ static int l2tp_nl_cmd_tunnel_create(struct sk_buff *skb, struct genl_info *info
                        cfg.local_udp_port = nla_get_u16(info->attrs[L2TP_ATTR_UDP_SPORT]);
                if (info->attrs[L2TP_ATTR_UDP_DPORT])
                        cfg.peer_udp_port = nla_get_u16(info->attrs[L2TP_ATTR_UDP_DPORT]);
-               if (info->attrs[L2TP_ATTR_UDP_CSUM])
-                       cfg.use_udp_checksums = nla_get_flag(info->attrs[L2TP_ATTR_UDP_CSUM]);
+               cfg.use_udp_checksums = nla_get_flag(
+                       info->attrs[L2TP_ATTR_UDP_CSUM]);
 
 #if IS_ENABLED(CONFIG_IPV6)
-               if (info->attrs[L2TP_ATTR_UDP_ZERO_CSUM6_TX])
-                       cfg.udp6_zero_tx_checksums = nla_get_flag(info->attrs[L2TP_ATTR_UDP_ZERO_CSUM6_TX]);
-               if (info->attrs[L2TP_ATTR_UDP_ZERO_CSUM6_RX])
-                       cfg.udp6_zero_rx_checksums = nla_get_flag(info->attrs[L2TP_ATTR_UDP_ZERO_CSUM6_RX]);
+               cfg.udp6_zero_tx_checksums = nla_get_flag(
+                       info->attrs[L2TP_ATTR_UDP_ZERO_CSUM6_TX]);
+               cfg.udp6_zero_rx_checksums = nla_get_flag(
+                       info->attrs[L2TP_ATTR_UDP_ZERO_CSUM6_RX]);
 #endif
        }
 
@@ -386,9 +379,24 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla
 
        switch (tunnel->encap) {
        case L2TP_ENCAPTYPE_UDP:
+               switch (sk->sk_family) {
+               case AF_INET:
+                       if (nla_put_u8(skb, L2TP_ATTR_UDP_CSUM, !sk->sk_no_check_tx))
+                               goto nla_put_failure;
+                       break;
+#if IS_ENABLED(CONFIG_IPV6)
+               case AF_INET6:
+                       if (udp_get_no_check6_tx(sk) &&
+                           nla_put_flag(skb, L2TP_ATTR_UDP_ZERO_CSUM6_TX))
+                               goto nla_put_failure;
+                       if (udp_get_no_check6_rx(sk) &&
+                           nla_put_flag(skb, L2TP_ATTR_UDP_ZERO_CSUM6_RX))
+                               goto nla_put_failure;
+                       break;
+#endif
+               }
                if (nla_put_u16(skb, L2TP_ATTR_UDP_SPORT, ntohs(inet->inet_sport)) ||
-                   nla_put_u16(skb, L2TP_ATTR_UDP_DPORT, ntohs(inet->inet_dport)) ||
-                   nla_put_u8(skb, L2TP_ATTR_UDP_CSUM, !sk->sk_no_check_tx))
+                   nla_put_u16(skb, L2TP_ATTR_UDP_DPORT, ntohs(inet->inet_dport)))
                        goto nla_put_failure;
                /* NOBREAK */
        case L2TP_ENCAPTYPE_IP:
@@ -977,6 +985,19 @@ static const struct genl_ops l2tp_nl_ops[] = {
        },
 };
 
+static struct genl_family l2tp_nl_family __ro_after_init = {
+       .name           = L2TP_GENL_NAME,
+       .version        = L2TP_GENL_VERSION,
+       .hdrsize        = 0,
+       .maxattr        = L2TP_ATTR_MAX,
+       .netnsok        = true,
+       .module         = THIS_MODULE,
+       .ops            = l2tp_nl_ops,
+       .n_ops          = ARRAY_SIZE(l2tp_nl_ops),
+       .mcgrps         = l2tp_multicast_group,
+       .n_mcgrps       = ARRAY_SIZE(l2tp_multicast_group),
+};
+
 int l2tp_nl_register_ops(enum l2tp_pwtype pw_type, const struct l2tp_nl_cmd_ops *ops)
 {
        int ret;
@@ -1010,12 +1031,10 @@ void l2tp_nl_unregister_ops(enum l2tp_pwtype pw_type)
 }
 EXPORT_SYMBOL_GPL(l2tp_nl_unregister_ops);
 
-static int l2tp_nl_init(void)
+static int __init l2tp_nl_init(void)
 {
        pr_info("L2TP netlink interface\n");
-       return genl_register_family_with_ops_groups(&l2tp_nl_family,
-                                                   l2tp_nl_ops,
-                                                   l2tp_multicast_group);
+       return genl_register_family(&l2tp_nl_family);
 }
 
 static void l2tp_nl_cleanup(void)
index 41d47bfda15c91ed9492ae8b516b979458e49be7..2ddfec1e4acfdea1404f555d17cc3586d4afd935 100644 (file)
@@ -1272,7 +1272,7 @@ static int pppol2tp_session_setsockopt(struct sock *sk,
                        err = -EINVAL;
                        break;
                }
-               session->recv_seq = val ? -1 : 0;
+               session->recv_seq = !!val;
                l2tp_info(session, PPPOL2TP_MSG_CONTROL,
                          "%s: set recv_seq=%d\n",
                          session->name, session->recv_seq);
@@ -1283,7 +1283,7 @@ static int pppol2tp_session_setsockopt(struct sock *sk,
                        err = -EINVAL;
                        break;
                }
-               session->send_seq = val ? -1 : 0;
+               session->send_seq = !!val;
                {
                        struct sock *ssk      = ps->sock;
                        struct pppox_sock *po = pppox_sk(ssk);
@@ -1301,7 +1301,7 @@ static int pppol2tp_session_setsockopt(struct sock *sk,
                        err = -EINVAL;
                        break;
                }
-               session->lns_mode = val ? -1 : 0;
+               session->lns_mode = !!val;
                l2tp_info(session, PPPOL2TP_MSG_CONTROL,
                          "%s: set lns_mode=%d\n",
                          session->name, session->lns_mode);
index db916cf51ffeabd3d5246cb09a9e7227d483f574..5e92963824202823bcb706c54444ad5a6e7d2358 100644 (file)
@@ -532,12 +532,12 @@ out:
 
 static int llc_ui_wait_for_disc(struct sock *sk, long timeout)
 {
-       DEFINE_WAIT(wait);
+       DEFINE_WAIT_FUNC(wait, woken_wake_function);
        int rc = 0;
 
+       add_wait_queue(sk_sleep(sk), &wait);
        while (1) {
-               prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
-               if (sk_wait_event(sk, &timeout, sk->sk_state == TCP_CLOSE))
+               if (sk_wait_event(sk, &timeout, sk->sk_state == TCP_CLOSE, &wait))
                        break;
                rc = -ERESTARTSYS;
                if (signal_pending(current))
@@ -547,39 +547,39 @@ static int llc_ui_wait_for_disc(struct sock *sk, long timeout)
                        break;
                rc = 0;
        }
-       finish_wait(sk_sleep(sk), &wait);
+       remove_wait_queue(sk_sleep(sk), &wait);
        return rc;
 }
 
 static bool llc_ui_wait_for_conn(struct sock *sk, long timeout)
 {
-       DEFINE_WAIT(wait);
+       DEFINE_WAIT_FUNC(wait, woken_wake_function);
 
+       add_wait_queue(sk_sleep(sk), &wait);
        while (1) {
-               prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
-               if (sk_wait_event(sk, &timeout, sk->sk_state != TCP_SYN_SENT))
+               if (sk_wait_event(sk, &timeout, sk->sk_state != TCP_SYN_SENT, &wait))
                        break;
                if (signal_pending(current) || !timeout)
                        break;
        }
-       finish_wait(sk_sleep(sk), &wait);
+       remove_wait_queue(sk_sleep(sk), &wait);
        return timeout;
 }
 
 static int llc_ui_wait_for_busy_core(struct sock *sk, long timeout)
 {
-       DEFINE_WAIT(wait);
+       DEFINE_WAIT_FUNC(wait, woken_wake_function);
        struct llc_sock *llc = llc_sk(sk);
        int rc;
 
+       add_wait_queue(sk_sleep(sk), &wait);
        while (1) {
-               prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
                rc = 0;
                if (sk_wait_event(sk, &timeout,
                                  (sk->sk_shutdown & RCV_SHUTDOWN) ||
                                  (!llc_data_accept_state(llc->state) &&
                                   !llc->remote_busy_flag &&
-                                  !llc->p_flag)))
+                                  !llc->p_flag), &wait))
                        break;
                rc = -ERESTARTSYS;
                if (signal_pending(current))
@@ -588,7 +588,7 @@ static int llc_ui_wait_for_busy_core(struct sock *sk, long timeout)
                if (!timeout)
                        break;
        }
-       finish_wait(sk_sleep(sk), &wait);
+       remove_wait_queue(sk_sleep(sk), &wait);
        return rc;
 }
 
index f9137a8341f4db16b9dca579b0ed8f5300a59767..0b202b343fd44bee43df293aaf663e3a6fd3e0d2 100644 (file)
@@ -19,6 +19,7 @@ mac80211-y := \
        aes_gcm.o \
        aes_cmac.o \
        aes_gmac.o \
+       fils_aead.o \
        cfg.o \
        ethtool.o \
        rx.o \
index 7663c28ba3539f230c9cc5b1bd044164f1ef561c..a4e0d59a40dd52b90f2f54e230600a1633f06831 100644 (file)
 #include "key.h"
 #include "aes_ccm.h"
 
-void ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
-                              u8 *data, size_t data_len, u8 *mic,
-                              size_t mic_len)
+int ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
+                             u8 *data, size_t data_len, u8 *mic,
+                             size_t mic_len)
 {
        struct scatterlist sg[3];
+       struct aead_request *aead_req;
+       int reqsize = sizeof(*aead_req) + crypto_aead_reqsize(tfm);
+       u8 *__aad;
 
-       char aead_req_data[sizeof(struct aead_request) +
-                          crypto_aead_reqsize(tfm)]
-               __aligned(__alignof__(struct aead_request));
-       struct aead_request *aead_req = (void *) aead_req_data;
+       aead_req = kzalloc(reqsize + CCM_AAD_LEN, GFP_ATOMIC);
+       if (!aead_req)
+               return -ENOMEM;
 
-       memset(aead_req, 0, sizeof(aead_req_data));
+       __aad = (u8 *)aead_req + reqsize;
+       memcpy(__aad, aad, CCM_AAD_LEN);
 
        sg_init_table(sg, 3);
-       sg_set_buf(&sg[0], &aad[2], be16_to_cpup((__be16 *)aad));
+       sg_set_buf(&sg[0], &__aad[2], be16_to_cpup((__be16 *)__aad));
        sg_set_buf(&sg[1], data, data_len);
        sg_set_buf(&sg[2], mic, mic_len);
 
@@ -41,6 +44,9 @@ void ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
        aead_request_set_ad(aead_req, sg[0].length);
 
        crypto_aead_encrypt(aead_req);
+       kzfree(aead_req);
+
+       return 0;
 }
 
 int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
@@ -48,18 +54,23 @@ int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
                              size_t mic_len)
 {
        struct scatterlist sg[3];
-       char aead_req_data[sizeof(struct aead_request) +
-                          crypto_aead_reqsize(tfm)]
-               __aligned(__alignof__(struct aead_request));
-       struct aead_request *aead_req = (void *) aead_req_data;
+       struct aead_request *aead_req;
+       int reqsize = sizeof(*aead_req) + crypto_aead_reqsize(tfm);
+       u8 *__aad;
+       int err;
 
        if (data_len == 0)
                return -EINVAL;
 
-       memset(aead_req, 0, sizeof(aead_req_data));
+       aead_req = kzalloc(reqsize + CCM_AAD_LEN, GFP_ATOMIC);
+       if (!aead_req)
+               return -ENOMEM;
+
+       __aad = (u8 *)aead_req + reqsize;
+       memcpy(__aad, aad, CCM_AAD_LEN);
 
        sg_init_table(sg, 3);
-       sg_set_buf(&sg[0], &aad[2], be16_to_cpup((__be16 *)aad));
+       sg_set_buf(&sg[0], &__aad[2], be16_to_cpup((__be16 *)__aad));
        sg_set_buf(&sg[1], data, data_len);
        sg_set_buf(&sg[2], mic, mic_len);
 
@@ -67,7 +78,10 @@ int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
        aead_request_set_crypt(aead_req, sg, sg, data_len + mic_len, b_0);
        aead_request_set_ad(aead_req, sg[0].length);
 
-       return crypto_aead_decrypt(aead_req);
+       err = crypto_aead_decrypt(aead_req);
+       kzfree(aead_req);
+
+       return err;
 }
 
 struct crypto_aead *ieee80211_aes_key_setup_encrypt(const u8 key[],
index 6a73d1e4d186d34a00da8c2f8e509985c3805607..fcd3254c5cf08d9c61c6bb7ff6f6260922f8c583 100644 (file)
 
 #include <linux/crypto.h>
 
+#define CCM_AAD_LEN    32
+
 struct crypto_aead *ieee80211_aes_key_setup_encrypt(const u8 key[],
                                                    size_t key_len,
                                                    size_t mic_len);
-void ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
-                              u8 *data, size_t data_len, u8 *mic,
-                              size_t mic_len);
+int ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
+                             u8 *data, size_t data_len, u8 *mic,
+                             size_t mic_len);
 int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
                              u8 *data, size_t data_len, u8 *mic,
                              size_t mic_len);
index bdf0790d89cca6fe3f64c097c84299ac35e1f3d0..d0bd5fff5f0a6241cd057183ed701de938c60757 100644 (file)
@@ -23,7 +23,7 @@
 #define AAD_LEN 20
 
 
-static void gf_mulx(u8 *pad)
+void gf_mulx(u8 *pad)
 {
        int i, carry;
 
@@ -35,9 +35,9 @@ static void gf_mulx(u8 *pad)
                pad[AES_BLOCK_SIZE - 1] ^= 0x87;
 }
 
-static void aes_cmac_vector(struct crypto_cipher *tfm, size_t num_elem,
-                           const u8 *addr[], const size_t *len, u8 *mac,
-                           size_t mac_len)
+void aes_cmac_vector(struct crypto_cipher *tfm, size_t num_elem,
+                    const u8 *addr[], const size_t *len, u8 *mac,
+                    size_t mac_len)
 {
        u8 cbc[AES_BLOCK_SIZE], pad[AES_BLOCK_SIZE];
        const u8 *pos, *end;
index 3702041f44fdb16ce382c701eb53ef9493009f52..c827e1d5de8b11a2b9cb8d0d17a7385e83bdd8bc 100644 (file)
 
 #include <linux/crypto.h>
 
+void gf_mulx(u8 *pad);
+void aes_cmac_vector(struct crypto_cipher *tfm, size_t num_elem,
+                    const u8 *addr[], const size_t *len, u8 *mac,
+                    size_t mac_len);
 struct crypto_cipher *ieee80211_aes_cmac_key_setup(const u8 key[],
                                                   size_t key_len);
 void ieee80211_aes_cmac(struct crypto_cipher *tfm, const u8 *aad,
index 3afe361fd27ca5ef5ac1648106fbe010520fb377..8a4397cc1b08b2ffae5ca09b2a8f927aa090dac1 100644 (file)
 #include "key.h"
 #include "aes_gcm.h"
 
-void ieee80211_aes_gcm_encrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad,
-                              u8 *data, size_t data_len, u8 *mic)
+int ieee80211_aes_gcm_encrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad,
+                             u8 *data, size_t data_len, u8 *mic)
 {
        struct scatterlist sg[3];
+       struct aead_request *aead_req;
+       int reqsize = sizeof(*aead_req) + crypto_aead_reqsize(tfm);
+       u8 *__aad;
 
-       char aead_req_data[sizeof(struct aead_request) +
-                          crypto_aead_reqsize(tfm)]
-               __aligned(__alignof__(struct aead_request));
-       struct aead_request *aead_req = (void *)aead_req_data;
+       aead_req = kzalloc(reqsize + GCM_AAD_LEN, GFP_ATOMIC);
+       if (!aead_req)
+               return -ENOMEM;
 
-       memset(aead_req, 0, sizeof(aead_req_data));
+       __aad = (u8 *)aead_req + reqsize;
+       memcpy(__aad, aad, GCM_AAD_LEN);
 
        sg_init_table(sg, 3);
-       sg_set_buf(&sg[0], &aad[2], be16_to_cpup((__be16 *)aad));
+       sg_set_buf(&sg[0], &__aad[2], be16_to_cpup((__be16 *)__aad));
        sg_set_buf(&sg[1], data, data_len);
        sg_set_buf(&sg[2], mic, IEEE80211_GCMP_MIC_LEN);
 
@@ -37,24 +40,31 @@ void ieee80211_aes_gcm_encrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad,
        aead_request_set_ad(aead_req, sg[0].length);
 
        crypto_aead_encrypt(aead_req);
+       kzfree(aead_req);
+       return 0;
 }
 
 int ieee80211_aes_gcm_decrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad,
                              u8 *data, size_t data_len, u8 *mic)
 {
        struct scatterlist sg[3];
-       char aead_req_data[sizeof(struct aead_request) +
-                          crypto_aead_reqsize(tfm)]
-               __aligned(__alignof__(struct aead_request));
-       struct aead_request *aead_req = (void *)aead_req_data;
+       struct aead_request *aead_req;
+       int reqsize = sizeof(*aead_req) + crypto_aead_reqsize(tfm);
+       u8 *__aad;
+       int err;
 
        if (data_len == 0)
                return -EINVAL;
 
-       memset(aead_req, 0, sizeof(aead_req_data));
+       aead_req = kzalloc(reqsize + GCM_AAD_LEN, GFP_ATOMIC);
+       if (!aead_req)
+               return -ENOMEM;
+
+       __aad = (u8 *)aead_req + reqsize;
+       memcpy(__aad, aad, GCM_AAD_LEN);
 
        sg_init_table(sg, 3);
-       sg_set_buf(&sg[0], &aad[2], be16_to_cpup((__be16 *)aad));
+       sg_set_buf(&sg[0], &__aad[2], be16_to_cpup((__be16 *)__aad));
        sg_set_buf(&sg[1], data, data_len);
        sg_set_buf(&sg[2], mic, IEEE80211_GCMP_MIC_LEN);
 
@@ -63,7 +73,10 @@ int ieee80211_aes_gcm_decrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad,
                               data_len + IEEE80211_GCMP_MIC_LEN, j_0);
        aead_request_set_ad(aead_req, sg[0].length);
 
-       return crypto_aead_decrypt(aead_req);
+       err = crypto_aead_decrypt(aead_req);
+       kzfree(aead_req);
+
+       return err;
 }
 
 struct crypto_aead *ieee80211_aes_gcm_key_setup_encrypt(const u8 key[],
index 1347fda6b76a8890ab5a0e6d902cc6570d551126..55aed5352494fca761f6a30b8c5782ed7effaf10 100644 (file)
 
 #include <linux/crypto.h>
 
-void ieee80211_aes_gcm_encrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad,
-                              u8 *data, size_t data_len, u8 *mic);
+#define GCM_AAD_LEN    32
+
+int ieee80211_aes_gcm_encrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad,
+                             u8 *data, size_t data_len, u8 *mic);
 int ieee80211_aes_gcm_decrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad,
                              u8 *data, size_t data_len, u8 *mic);
 struct crypto_aead *ieee80211_aes_gcm_key_setup_encrypt(const u8 key[],
index 3ddd927aaf306acf98a82651a39392a50930e339..bd72a862ddb79f5c0bfd059a3cbf1f1e05f2032d 100644 (file)
 #include "key.h"
 #include "aes_gmac.h"
 
-#define GMAC_MIC_LEN 16
-#define GMAC_NONCE_LEN 12
-#define AAD_LEN 20
-
 int ieee80211_aes_gmac(struct crypto_aead *tfm, const u8 *aad, u8 *nonce,
                       const u8 *data, size_t data_len, u8 *mic)
 {
        struct scatterlist sg[4];
-       char aead_req_data[sizeof(struct aead_request) +
-                          crypto_aead_reqsize(tfm)]
-               __aligned(__alignof__(struct aead_request));
-       struct aead_request *aead_req = (void *)aead_req_data;
-       u8 zero[GMAC_MIC_LEN], iv[AES_BLOCK_SIZE];
+       u8 *zero, *__aad, iv[AES_BLOCK_SIZE];
+       struct aead_request *aead_req;
+       int reqsize = sizeof(*aead_req) + crypto_aead_reqsize(tfm);
 
        if (data_len < GMAC_MIC_LEN)
                return -EINVAL;
 
-       memset(aead_req, 0, sizeof(aead_req_data));
+       aead_req = kzalloc(reqsize + GMAC_MIC_LEN + GMAC_AAD_LEN, GFP_ATOMIC);
+       if (!aead_req)
+               return -ENOMEM;
+
+       zero = (u8 *)aead_req + reqsize;
+       __aad = zero + GMAC_MIC_LEN;
+       memcpy(__aad, aad, GMAC_AAD_LEN);
 
-       memset(zero, 0, GMAC_MIC_LEN);
        sg_init_table(sg, 4);
-       sg_set_buf(&sg[0], aad, AAD_LEN);
+       sg_set_buf(&sg[0], __aad, GMAC_AAD_LEN);
        sg_set_buf(&sg[1], data, data_len - GMAC_MIC_LEN);
        sg_set_buf(&sg[2], zero, GMAC_MIC_LEN);
        sg_set_buf(&sg[3], mic, GMAC_MIC_LEN);
@@ -49,9 +48,10 @@ int ieee80211_aes_gmac(struct crypto_aead *tfm, const u8 *aad, u8 *nonce,
 
        aead_request_set_tfm(aead_req, tfm);
        aead_request_set_crypt(aead_req, sg, sg, 0, iv);
-       aead_request_set_ad(aead_req, AAD_LEN + data_len);
+       aead_request_set_ad(aead_req, GMAC_AAD_LEN + data_len);
 
        crypto_aead_encrypt(aead_req);
+       kzfree(aead_req);
 
        return 0;
 }
index d328204d73a8a658cc50ab136c0eae5ba82577ec..32e6442c95be4df1af1ee04186f31c20a86c3abc 100644 (file)
 
 #include <linux/crypto.h>
 
+#define GMAC_AAD_LEN   20
+#define GMAC_MIC_LEN   16
+#define GMAC_NONCE_LEN 12
+
 struct crypto_aead *ieee80211_aes_gmac_key_setup(const u8 key[],
                                                 size_t key_len);
 int ieee80211_aes_gmac(struct crypto_aead *tfm, const u8 *aad, u8 *nonce,
index f6749dced021bc8176c3a8ae1b4fe263bd6730f6..3b5fd4188f2ac7c67c269ad425812221294c823e 100644 (file)
@@ -315,11 +315,7 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta,
        mutex_lock(&sta->ampdu_mlme.mtx);
 
        if (test_bit(tid, sta->ampdu_mlme.agg_session_valid)) {
-               tid_agg_rx = rcu_dereference_protected(
-                               sta->ampdu_mlme.tid_rx[tid],
-                               lockdep_is_held(&sta->ampdu_mlme.mtx));
-
-               if (tid_agg_rx->dialog_token == dialog_token) {
+               if (sta->ampdu_mlme.tid_rx_token[tid] == dialog_token) {
                        ht_dbg_ratelimited(sta->sdata,
                                           "updated AddBA Req from %pM on tid %u\n",
                                           sta->sta.addr, tid);
@@ -396,7 +392,6 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta,
        }
 
        /* update data */
-       tid_agg_rx->dialog_token = dialog_token;
        tid_agg_rx->ssn = start_seq_num;
        tid_agg_rx->head_seq_num = start_seq_num;
        tid_agg_rx->buf_size = buf_size;
@@ -418,6 +413,7 @@ end:
        if (status == WLAN_STATUS_SUCCESS) {
                __set_bit(tid, sta->ampdu_mlme.agg_session_valid);
                __clear_bit(tid, sta->ampdu_mlme.unexpected_agg);
+               sta->ampdu_mlme.tid_rx_token[tid] = dialog_token;
        }
        mutex_unlock(&sta->ampdu_mlme.mtx);
 
index fd6541f3ade3e1342ac0fdce3ae3403de5752770..e91e503bf99257d7e8f3945c1500eebaa91f5e81 100644 (file)
@@ -357,10 +357,7 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
        mutex_lock(&local->sta_mtx);
 
        if (mac_addr) {
-               if (ieee80211_vif_is_mesh(&sdata->vif))
-                       sta = sta_info_get(sdata, mac_addr);
-               else
-                       sta = sta_info_get_bss(sdata, mac_addr);
+               sta = sta_info_get_bss(sdata, mac_addr);
                /*
                 * The ASSOC test makes sure the driver is ready to
                 * receive the key. When wpa_supplicant has roamed
@@ -867,6 +864,8 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
        }
        sdata->needed_rx_chains = sdata->local->rx_chains;
 
+       sdata->vif.bss_conf.beacon_int = params->beacon_interval;
+
        mutex_lock(&local->mtx);
        err = ieee80211_vif_use_channel(sdata, &params->chandef,
                                        IEEE80211_CHANCTX_SHARED);
@@ -897,7 +896,6 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
                                              vlan->vif.type);
        }
 
-       sdata->vif.bss_conf.beacon_int = params->beacon_interval;
        sdata->vif.bss_conf.dtim_period = params->dtim_period;
        sdata->vif.bss_conf.enable_beacon = true;
        sdata->vif.bss_conf.allow_p2p_go_ps = sdata->vif.p2p;
@@ -1523,9 +1521,6 @@ static int ieee80211_change_station(struct wiphy *wiphy,
                goto out_err;
 
        if (params->vlan && params->vlan != sta->sdata->dev) {
-               bool prev_4addr = false;
-               bool new_4addr = false;
-
                vlansdata = IEEE80211_DEV_TO_SUB_IF(params->vlan);
 
                if (params->vlan->ieee80211_ptr->use_4addr) {
@@ -1535,26 +1530,21 @@ static int ieee80211_change_station(struct wiphy *wiphy,
                        }
 
                        rcu_assign_pointer(vlansdata->u.vlan.sta, sta);
-                       new_4addr = true;
                        __ieee80211_check_fast_rx_iface(vlansdata);
                }
 
                if (sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN &&
-                   sta->sdata->u.vlan.sta) {
+                   sta->sdata->u.vlan.sta)
                        RCU_INIT_POINTER(sta->sdata->u.vlan.sta, NULL);
-                       prev_4addr = true;
-               }
+
+               if (test_sta_flag(sta, WLAN_STA_AUTHORIZED))
+                       ieee80211_vif_dec_num_mcast(sta->sdata);
 
                sta->sdata = vlansdata;
                ieee80211_check_fast_xmit(sta);
 
-               if (sta->sta_state == IEEE80211_STA_AUTHORIZED &&
-                   prev_4addr != new_4addr) {
-                       if (new_4addr)
-                               atomic_dec(&sta->sdata->bss->num_mcast_sta);
-                       else
-                               atomic_inc(&sta->sdata->bss->num_mcast_sta);
-               }
+               if (test_sta_flag(sta, WLAN_STA_AUTHORIZED))
+                       ieee80211_vif_inc_num_mcast(sta->sdata);
 
                ieee80211_send_layer2_update(sta);
        }
@@ -2480,13 +2470,6 @@ int __ieee80211_request_smps_ap(struct ieee80211_sub_if_data *sdata,
            smps_mode == IEEE80211_SMPS_AUTOMATIC)
                return 0;
 
-        /* If no associated stations, there's no need to do anything */
-       if (!atomic_read(&sdata->u.ap.num_mcast_sta)) {
-               sdata->smps_mode = smps_mode;
-               ieee80211_queue_work(&sdata->local->hw, &sdata->recalc_smps);
-               return 0;
-       }
-
        ht_dbg(sdata,
               "SMPS %d requested in AP mode, sending Action frame to %d stations\n",
               smps_mode, atomic_read(&sdata->u.ap.num_mcast_sta));
index f56e2f487d096aef3f7e1c083688548f92ed7110..e02ba42ca827501fdefc155d0ad71f2d4ffc2f36 100644 (file)
@@ -210,6 +210,7 @@ static const char *hw_flag_names[] = {
        FLAG(TX_AMSDU),
        FLAG(TX_FRAG_LIST),
        FLAG(REPORTS_LOW_ACK),
+       FLAG(SUPPORTS_TX_FRAG),
 #undef FLAG
 };
 
index bcec1240f41d90bec8b97a3bf52f8a8fcf918384..1a05f85cb1f0610b41ea212df2624a9078ea16cd 100644 (file)
@@ -477,6 +477,7 @@ IEEE80211_IF_FILE_RW(tdls_wider_bw);
 IEEE80211_IF_FILE(num_mcast_sta, u.ap.num_mcast_sta, ATOMIC);
 IEEE80211_IF_FILE(num_sta_ps, u.ap.ps.num_sta_ps, ATOMIC);
 IEEE80211_IF_FILE(dtim_count, u.ap.ps.dtim_count, DEC);
+IEEE80211_IF_FILE(num_mcast_sta_vlan, u.vlan.num_mcast_sta, ATOMIC);
 
 static ssize_t ieee80211_if_fmt_num_buffered_multicast(
        const struct ieee80211_sub_if_data *sdata, char *buf, int buflen)
@@ -684,6 +685,13 @@ static void add_ap_files(struct ieee80211_sub_if_data *sdata)
        DEBUGFS_ADD_MODE(tkip_mic_test, 0200);
 }
 
+static void add_vlan_files(struct ieee80211_sub_if_data *sdata)
+{
+       /* add num_mcast_sta_vlan using name num_mcast_sta */
+       debugfs_create_file("num_mcast_sta", 0400, sdata->vif.debugfs_dir,
+                           sdata, &num_mcast_sta_vlan_ops);
+}
+
 static void add_ibss_files(struct ieee80211_sub_if_data *sdata)
 {
        DEBUGFS_ADD_MODE(tsf, 0600);
@@ -787,6 +795,9 @@ static void add_files(struct ieee80211_sub_if_data *sdata)
        case NL80211_IFTYPE_AP:
                add_ap_files(sdata);
                break;
+       case NL80211_IFTYPE_AP_VLAN:
+               add_vlan_files(sdata);
+               break;
        case NL80211_IFTYPE_WDS:
                add_wds_files(sdata);
                break;
index a2fcdb47a0e68443baecfc805bfc33e617dd8146..f6003b8c2c3335925d605d46d0072a8a96fed930 100644 (file)
@@ -199,13 +199,18 @@ static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf,
                       "TID\t\tRX\tDTKN\tSSN\t\tTX\tDTKN\tpending\n");
 
        for (i = 0; i < IEEE80211_NUM_TIDS; i++) {
+               bool tid_rx_valid;
+
                tid_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[i]);
                tid_tx = rcu_dereference(sta->ampdu_mlme.tid_tx[i]);
+               tid_rx_valid = test_bit(i, sta->ampdu_mlme.agg_session_valid);
 
                p += scnprintf(p, sizeof(buf) + buf - p, "%02d", i);
-               p += scnprintf(p, sizeof(buf) + buf - p, "\t\t%x", !!tid_rx);
+               p += scnprintf(p, sizeof(buf) + buf - p, "\t\t%x",
+                              tid_rx_valid);
                p += scnprintf(p, sizeof(buf) + buf - p, "\t%#.2x",
-                               tid_rx ? tid_rx->dialog_token : 0);
+                              tid_rx_valid ?
+                                       sta->ampdu_mlme.tid_rx_token[i] : 0);
                p += scnprintf(p, sizeof(buf) + buf - p, "\t%#.3x",
                                tid_rx ? tid_rx->ssn : 0);
 
diff --git a/net/mac80211/fils_aead.c b/net/mac80211/fils_aead.c
new file mode 100644 (file)
index 0000000..ecfdd97
--- /dev/null
@@ -0,0 +1,342 @@
+/*
+ * FILS AEAD for (Re)Association Request/Response frames
+ * Copyright 2016, Qualcomm Atheros, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <crypto/aes.h>
+#include <crypto/algapi.h>
+#include <crypto/skcipher.h>
+
+#include "ieee80211_i.h"
+#include "aes_cmac.h"
+#include "fils_aead.h"
+
+static int aes_s2v(struct crypto_cipher *tfm,
+                  size_t num_elem, const u8 *addr[], size_t len[], u8 *v)
+{
+       u8 d[AES_BLOCK_SIZE], tmp[AES_BLOCK_SIZE];
+       size_t i;
+       const u8 *data[2];
+       size_t data_len[2], data_elems;
+
+       /* D = AES-CMAC(K, <zero>) */
+       memset(tmp, 0, AES_BLOCK_SIZE);
+       data[0] = tmp;
+       data_len[0] = AES_BLOCK_SIZE;
+       aes_cmac_vector(tfm, 1, data, data_len, d, AES_BLOCK_SIZE);
+
+       for (i = 0; i < num_elem - 1; i++) {
+               /* D = dbl(D) xor AES_CMAC(K, Si) */
+               gf_mulx(d); /* dbl */
+               aes_cmac_vector(tfm, 1, &addr[i], &len[i], tmp,
+                               AES_BLOCK_SIZE);
+               crypto_xor(d, tmp, AES_BLOCK_SIZE);
+       }
+
+       if (len[i] >= AES_BLOCK_SIZE) {
+               /* len(Sn) >= 128 */
+               size_t j;
+               const u8 *pos;
+
+               /* T = Sn xorend D */
+
+               /* Use a temporary buffer to perform xorend on Sn (addr[i]) to
+                * avoid modifying the const input argument.
+                */
+               data[0] = addr[i];
+               data_len[0] = len[i] - AES_BLOCK_SIZE;
+               pos = addr[i] + data_len[0];
+               for (j = 0; j < AES_BLOCK_SIZE; j++)
+                       tmp[j] = pos[j] ^ d[j];
+               data[1] = tmp;
+               data_len[1] = AES_BLOCK_SIZE;
+               data_elems = 2;
+       } else {
+               /* len(Sn) < 128 */
+               /* T = dbl(D) xor pad(Sn) */
+               gf_mulx(d); /* dbl */
+               memset(tmp, 0, AES_BLOCK_SIZE);
+               memcpy(tmp, addr[i], len[i]);
+               tmp[len[i]] = 0x80;
+               crypto_xor(d, tmp, AES_BLOCK_SIZE);
+               data[0] = d;
+               data_len[0] = sizeof(d);
+               data_elems = 1;
+       }
+       /* V = AES-CMAC(K, T) */
+       aes_cmac_vector(tfm, data_elems, data, data_len, v, AES_BLOCK_SIZE);
+
+       return 0;
+}
+
+/* Note: addr[] and len[] needs to have one extra slot at the end. */
+static int aes_siv_encrypt(const u8 *key, size_t key_len,
+                          const u8 *plain, size_t plain_len,
+                          size_t num_elem, const u8 *addr[],
+                          size_t len[], u8 *out)
+{
+       u8 v[AES_BLOCK_SIZE];
+       struct crypto_cipher *tfm;
+       struct crypto_skcipher *tfm2;
+       struct skcipher_request *req;
+       int res;
+       struct scatterlist src[1], dst[1];
+       u8 *tmp;
+
+       key_len /= 2; /* S2V key || CTR key */
+
+       addr[num_elem] = plain;
+       len[num_elem] = plain_len;
+       num_elem++;
+
+       /* S2V */
+
+       tfm = crypto_alloc_cipher("aes", 0, 0);
+       if (IS_ERR(tfm))
+               return PTR_ERR(tfm);
+       /* K1 for S2V */
+       res = crypto_cipher_setkey(tfm, key, key_len);
+       if (!res)
+               res = aes_s2v(tfm, num_elem, addr, len, v);
+       crypto_free_cipher(tfm);
+       if (res)
+               return res;
+
+       /* Use a temporary buffer of the plaintext to handle need for
+        * overwriting this during AES-CTR.
+        */
+       tmp = kmemdup(plain, plain_len, GFP_KERNEL);
+       if (!tmp)
+               return -ENOMEM;
+
+       /* IV for CTR before encrypted data */
+       memcpy(out, v, AES_BLOCK_SIZE);
+
+       /* Synthetic IV to be used as the initial counter in CTR:
+        * Q = V bitand (1^64 || 0^1 || 1^31 || 0^1 || 1^31)
+        */
+       v[8] &= 0x7f;
+       v[12] &= 0x7f;
+
+       /* CTR */
+
+       tfm2 = crypto_alloc_skcipher("ctr(aes)", 0, 0);
+       if (IS_ERR(tfm2)) {
+               kfree(tmp);
+               return PTR_ERR(tfm2);
+       }
+       /* K2 for CTR */
+       res = crypto_skcipher_setkey(tfm2, key + key_len, key_len);
+       if (res)
+               goto fail;
+
+       req = skcipher_request_alloc(tfm2, GFP_KERNEL);
+       if (!req) {
+               res = -ENOMEM;
+               goto fail;
+       }
+
+       sg_init_one(src, tmp, plain_len);
+       sg_init_one(dst, out + AES_BLOCK_SIZE, plain_len);
+       skcipher_request_set_crypt(req, src, dst, plain_len, v);
+       res = crypto_skcipher_encrypt(req);
+       skcipher_request_free(req);
+fail:
+       kfree(tmp);
+       crypto_free_skcipher(tfm2);
+       return res;
+}
+
+/* Note: addr[] and len[] needs to have one extra slot at the end. */
+static int aes_siv_decrypt(const u8 *key, size_t key_len,
+                          const u8 *iv_crypt, size_t iv_c_len,
+                          size_t num_elem, const u8 *addr[], size_t len[],
+                          u8 *out)
+{
+       struct crypto_cipher *tfm;
+       struct crypto_skcipher *tfm2;
+       struct skcipher_request *req;
+       struct scatterlist src[1], dst[1];
+       size_t crypt_len;
+       int res;
+       u8 frame_iv[AES_BLOCK_SIZE], iv[AES_BLOCK_SIZE];
+       u8 check[AES_BLOCK_SIZE];
+
+       crypt_len = iv_c_len - AES_BLOCK_SIZE;
+       key_len /= 2; /* S2V key || CTR key */
+       addr[num_elem] = out;
+       len[num_elem] = crypt_len;
+       num_elem++;
+
+       memcpy(iv, iv_crypt, AES_BLOCK_SIZE);
+       memcpy(frame_iv, iv_crypt, AES_BLOCK_SIZE);
+
+       /* Synthetic IV to be used as the initial counter in CTR:
+        * Q = V bitand (1^64 || 0^1 || 1^31 || 0^1 || 1^31)
+        */
+       iv[8] &= 0x7f;
+       iv[12] &= 0x7f;
+
+       /* CTR */
+
+       tfm2 = crypto_alloc_skcipher("ctr(aes)", 0, 0);
+       if (IS_ERR(tfm2))
+               return PTR_ERR(tfm2);
+       /* K2 for CTR */
+       res = crypto_skcipher_setkey(tfm2, key + key_len, key_len);
+       if (res) {
+               crypto_free_skcipher(tfm2);
+               return res;
+       }
+
+       req = skcipher_request_alloc(tfm2, GFP_KERNEL);
+       if (!req) {
+               crypto_free_skcipher(tfm2);
+               return -ENOMEM;
+       }
+
+       sg_init_one(src, iv_crypt + AES_BLOCK_SIZE, crypt_len);
+       sg_init_one(dst, out, crypt_len);
+       skcipher_request_set_crypt(req, src, dst, crypt_len, iv);
+       res = crypto_skcipher_decrypt(req);
+       skcipher_request_free(req);
+       crypto_free_skcipher(tfm2);
+       if (res)
+               return res;
+
+       /* S2V */
+
+       tfm = crypto_alloc_cipher("aes", 0, 0);
+       if (IS_ERR(tfm))
+               return PTR_ERR(tfm);
+       /* K1 for S2V */
+       res = crypto_cipher_setkey(tfm, key, key_len);
+       if (!res)
+               res = aes_s2v(tfm, num_elem, addr, len, check);
+       crypto_free_cipher(tfm);
+       if (res)
+               return res;
+       if (memcmp(check, frame_iv, AES_BLOCK_SIZE) != 0)
+               return -EINVAL;
+       return 0;
+}
+
+int fils_encrypt_assoc_req(struct sk_buff *skb,
+                          struct ieee80211_mgd_assoc_data *assoc_data)
+{
+       struct ieee80211_mgmt *mgmt = (void *)skb->data;
+       u8 *capab, *ies, *encr;
+       const u8 *addr[5 + 1], *session;
+       size_t len[5 + 1];
+       size_t crypt_len;
+
+       if (ieee80211_is_reassoc_req(mgmt->frame_control)) {
+               capab = (u8 *)&mgmt->u.reassoc_req.capab_info;
+               ies = mgmt->u.reassoc_req.variable;
+       } else {
+               capab = (u8 *)&mgmt->u.assoc_req.capab_info;
+               ies = mgmt->u.assoc_req.variable;
+       }
+
+       session = cfg80211_find_ext_ie(WLAN_EID_EXT_FILS_SESSION,
+                                      ies, skb->data + skb->len - ies);
+       if (!session || session[1] != 1 + 8)
+               return -EINVAL;
+       /* encrypt after FILS Session element */
+       encr = (u8 *)session + 2 + 1 + 8;
+
+       /* AES-SIV AAD vectors */
+
+       /* The STA's MAC address */
+       addr[0] = mgmt->sa;
+       len[0] = ETH_ALEN;
+       /* The AP's BSSID */
+       addr[1] = mgmt->da;
+       len[1] = ETH_ALEN;
+       /* The STA's nonce */
+       addr[2] = assoc_data->fils_nonces;
+       len[2] = FILS_NONCE_LEN;
+       /* The AP's nonce */
+       addr[3] = &assoc_data->fils_nonces[FILS_NONCE_LEN];
+       len[3] = FILS_NONCE_LEN;
+       /* The (Re)Association Request frame from the Capability Information
+        * field to the FILS Session element (both inclusive).
+        */
+       addr[4] = capab;
+       len[4] = encr - capab;
+
+       crypt_len = skb->data + skb->len - encr;
+       skb_put(skb, AES_BLOCK_SIZE);
+       return aes_siv_encrypt(assoc_data->fils_kek, assoc_data->fils_kek_len,
+                              encr, crypt_len, 1, addr, len, encr);
+}
+
+int fils_decrypt_assoc_resp(struct ieee80211_sub_if_data *sdata,
+                           u8 *frame, size_t *frame_len,
+                           struct ieee80211_mgd_assoc_data *assoc_data)
+{
+       struct ieee80211_mgmt *mgmt = (void *)frame;
+       u8 *capab, *ies, *encr;
+       const u8 *addr[5 + 1], *session;
+       size_t len[5 + 1];
+       int res;
+       size_t crypt_len;
+
+       if (*frame_len < 24 + 6)
+               return -EINVAL;
+
+       capab = (u8 *)&mgmt->u.assoc_resp.capab_info;
+       ies = mgmt->u.assoc_resp.variable;
+       session = cfg80211_find_ext_ie(WLAN_EID_EXT_FILS_SESSION,
+                                      ies, frame + *frame_len - ies);
+       if (!session || session[1] != 1 + 8) {
+               mlme_dbg(sdata,
+                        "No (valid) FILS Session element in (Re)Association Response frame from %pM",
+                        mgmt->sa);
+               return -EINVAL;
+       }
+       /* decrypt after FILS Session element */
+       encr = (u8 *)session + 2 + 1 + 8;
+
+       /* AES-SIV AAD vectors */
+
+       /* The AP's BSSID */
+       addr[0] = mgmt->sa;
+       len[0] = ETH_ALEN;
+       /* The STA's MAC address */
+       addr[1] = mgmt->da;
+       len[1] = ETH_ALEN;
+       /* The AP's nonce */
+       addr[2] = &assoc_data->fils_nonces[FILS_NONCE_LEN];
+       len[2] = FILS_NONCE_LEN;
+       /* The STA's nonce */
+       addr[3] = assoc_data->fils_nonces;
+       len[3] = FILS_NONCE_LEN;
+       /* The (Re)Association Response frame from the Capability Information
+        * field to the FILS Session element (both inclusive).
+        */
+       addr[4] = capab;
+       len[4] = encr - capab;
+
+       crypt_len = frame + *frame_len - encr;
+       if (crypt_len < AES_BLOCK_SIZE) {
+               mlme_dbg(sdata,
+                        "Not enough room for AES-SIV data after FILS Session element in (Re)Association Response frame from %pM",
+                        mgmt->sa);
+               return -EINVAL;
+       }
+       res = aes_siv_decrypt(assoc_data->fils_kek, assoc_data->fils_kek_len,
+                             encr, crypt_len, 5, addr, len, encr);
+       if (res != 0) {
+               mlme_dbg(sdata,
+                        "AES-SIV decryption of (Re)Association Response frame from %pM failed",
+                        mgmt->sa);
+               return res;
+       }
+       *frame_len -= AES_BLOCK_SIZE;
+       return 0;
+}
diff --git a/net/mac80211/fils_aead.h b/net/mac80211/fils_aead.h
new file mode 100644 (file)
index 0000000..fbc6523
--- /dev/null
@@ -0,0 +1,19 @@
+/*
+ * FILS AEAD for (Re)Association Request/Response frames
+ * Copyright 2016, Qualcomm Atheros, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef FILS_AEAD_H
+#define FILS_AEAD_H
+
+int fils_encrypt_assoc_req(struct sk_buff *skb,
+                          struct ieee80211_mgd_assoc_data *assoc_data);
+int fils_decrypt_assoc_resp(struct ieee80211_sub_if_data *sdata,
+                           u8 *frame, size_t *frame_len,
+                           struct ieee80211_mgd_assoc_data *assoc_data);
+
+#endif /* FILS_AEAD_H */
index 34c2add2c455978936c5379218620d41269a43a6..d37a577f63a1809b3ef56bfa471d46241edd0de5 100644 (file)
@@ -84,6 +84,8 @@ struct ieee80211_local;
 #define IEEE80211_DEFAULT_MAX_SP_LEN           \
        IEEE80211_WMM_IE_STA_QOSINFO_SP_ALL
 
+extern const u8 ieee80211_ac_to_qos_mask[IEEE80211_NUM_ACS];
+
 #define IEEE80211_DEAUTH_FRAME_LEN     (24 /* hdr */ + 2 /* reason */)
 
 #define IEEE80211_MAX_NAN_INSTANCE_ID 255
@@ -307,6 +309,7 @@ struct ieee80211_if_vlan {
 
        /* used for all tx if the VLAN is configured to 4-addr mode */
        struct sta_info __rcu *sta;
+       atomic_t num_mcast_sta; /* number of stations receiving multicast */
 };
 
 struct mesh_stats {
@@ -398,6 +401,10 @@ struct ieee80211_mgd_assoc_data {
 
        struct ieee80211_vht_cap ap_vht_cap;
 
+       u8 fils_nonces[2 * FILS_NONCE_LEN];
+       u8 fils_kek[FILS_MAX_KEK_LEN];
+       size_t fils_kek_len;
+
        size_t ie_len;
        u8 ie[];
 };
@@ -442,7 +449,7 @@ struct ieee80211_if_managed {
        struct ieee80211_mgd_auth_data *auth_data;
        struct ieee80211_mgd_assoc_data *assoc_data;
 
-       u8 bssid[ETH_ALEN];
+       u8 bssid[ETH_ALEN] __aligned(2);
 
        u16 aid;
 
@@ -1527,6 +1534,23 @@ ieee80211_have_rx_timestamp(struct ieee80211_rx_status *status)
        return false;
 }
 
+void ieee80211_vif_inc_num_mcast(struct ieee80211_sub_if_data *sdata);
+void ieee80211_vif_dec_num_mcast(struct ieee80211_sub_if_data *sdata);
+
+/* This function returns the number of multicast stations connected to this
+ * interface. It returns -1 if that number is not tracked, that is for netdevs
+ * not in AP or AP_VLAN mode or when using 4addr.
+ */
+static inline int
+ieee80211_vif_get_num_mcast_if(struct ieee80211_sub_if_data *sdata)
+{
+       if (sdata->vif.type == NL80211_IFTYPE_AP)
+               return atomic_read(&sdata->u.ap.num_mcast_sta);
+       if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN && !sdata->u.vlan.sta)
+               return atomic_read(&sdata->u.vlan.num_mcast_sta);
+       return -1;
+}
+
 u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local,
                                     struct ieee80211_rx_status *status,
                                     unsigned int mpdu_len,
index 73e6a8fd28455aa84d2d6774aec7d49bf49f023d..41497b670e2bde0e55a2a35f5fbbdb60148ddd54 100644 (file)
@@ -1998,3 +1998,19 @@ void ieee80211_iface_exit(void)
 {
        unregister_netdevice_notifier(&mac80211_netdev_notifier);
 }
+
+void ieee80211_vif_inc_num_mcast(struct ieee80211_sub_if_data *sdata)
+{
+       if (sdata->vif.type == NL80211_IFTYPE_AP)
+               atomic_inc(&sdata->u.ap.num_mcast_sta);
+       else if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
+               atomic_inc(&sdata->u.vlan.num_mcast_sta);
+}
+
+void ieee80211_vif_dec_num_mcast(struct ieee80211_sub_if_data *sdata)
+{
+       if (sdata->vif.type == NL80211_IFTYPE_AP)
+               atomic_dec(&sdata->u.ap.num_mcast_sta);
+       else if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
+               atomic_dec(&sdata->u.vlan.num_mcast_sta);
+}
index 1075ac24c8c5f433690749970d9e27393351caa9..1822c77f2b1c3125ec1bc63fa6b7de085a26bbed 100644 (file)
@@ -549,6 +549,7 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
                           NL80211_FEATURE_MAC_ON_CREATE |
                           NL80211_FEATURE_USERSPACE_MPM |
                           NL80211_FEATURE_FULL_AP_CLIENT_STATE;
+       wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_FILS_STA);
 
        if (!ops->hw_scan)
                wiphy->features |= NL80211_FEATURE_LOW_PRIORITY_SCAN |
@@ -821,6 +822,10 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
             !local->ops->tdls_recv_channel_switch))
                return -EOPNOTSUPP;
 
+       if (WARN_ON(ieee80211_hw_check(hw, SUPPORTS_TX_FRAG) &&
+                   !local->ops->set_frag_threshold))
+               return -EINVAL;
+
        if (WARN_ON(local->hw.wiphy->interface_modes &
                        BIT(NL80211_IFTYPE_NAN) &&
                    (!local->ops->start_nan || !local->ops->stop_nan)))
index 7486f2dab4ba70ade0b2faf48ffa7f3880ac2ad4..d157b250ff7710088dfeee928045f2612e8c5464 100644 (file)
@@ -30,6 +30,7 @@
 #include "driver-ops.h"
 #include "rate.h"
 #include "led.h"
+#include "fils_aead.h"
 
 #define IEEE80211_AUTH_TIMEOUT         (HZ / 5)
 #define IEEE80211_AUTH_TIMEOUT_LONG    (HZ / 2)
@@ -652,6 +653,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
                        2 + sizeof(struct ieee80211_ht_cap) + /* HT */
                        2 + sizeof(struct ieee80211_vht_cap) + /* VHT */
                        assoc_data->ie_len + /* extra IEs */
+                       (assoc_data->fils_kek_len ? 16 /* AES-SIV */ : 0) +
                        9, /* WMM */
                        GFP_KERNEL);
        if (!skb)
@@ -875,6 +877,12 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
                memcpy(pos, assoc_data->ie + offset, noffset - offset);
        }
 
+       if (assoc_data->fils_kek_len &&
+           fils_encrypt_assoc_req(skb, assoc_data) < 0) {
+               dev_kfree_skb(skb);
+               return;
+       }
+
        drv_mgd_prepare_tx(local, sdata);
 
        IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
@@ -2618,6 +2626,9 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
        case WLAN_AUTH_LEAP:
        case WLAN_AUTH_FT:
        case WLAN_AUTH_SAE:
+       case WLAN_AUTH_FILS_SK:
+       case WLAN_AUTH_FILS_SK_PFS:
+       case WLAN_AUTH_FILS_PK:
                break;
        case WLAN_AUTH_SHARED_KEY:
                if (ifmgd->auth_data->expected_transaction != 4) {
@@ -3143,6 +3154,10 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
                   reassoc ? "Rea" : "A", mgmt->sa,
                   capab_info, status_code, (u16)(aid & ~(BIT(15) | BIT(14))));
 
+       if (assoc_data->fils_kek_len &&
+           fils_decrypt_assoc_resp(sdata, (u8 *)mgmt, &len, assoc_data) < 0)
+               return;
+
        pos = mgmt->u.assoc_resp.variable;
        ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), false, &elems);
 
@@ -3193,7 +3208,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
                uapsd_queues = 0;
                for (ac = 0; ac < IEEE80211_NUM_ACS; ac++)
                        if (sdata->tx_conf[ac].uapsd)
-                               uapsd_queues |= BIT(ac);
+                               uapsd_queues |= ieee80211_ac_to_qos_mask[ac];
        }
 
        cfg80211_rx_assoc_resp(sdata->dev, bss, (u8 *)mgmt, len, uapsd_queues);
@@ -4479,24 +4494,36 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
        case NL80211_AUTHTYPE_SAE:
                auth_alg = WLAN_AUTH_SAE;
                break;
+       case NL80211_AUTHTYPE_FILS_SK:
+               auth_alg = WLAN_AUTH_FILS_SK;
+               break;
+       case NL80211_AUTHTYPE_FILS_SK_PFS:
+               auth_alg = WLAN_AUTH_FILS_SK_PFS;
+               break;
+       case NL80211_AUTHTYPE_FILS_PK:
+               auth_alg = WLAN_AUTH_FILS_PK;
+               break;
        default:
                return -EOPNOTSUPP;
        }
 
-       auth_data = kzalloc(sizeof(*auth_data) + req->sae_data_len +
+       auth_data = kzalloc(sizeof(*auth_data) + req->auth_data_len +
                            req->ie_len, GFP_KERNEL);
        if (!auth_data)
                return -ENOMEM;
 
        auth_data->bss = req->bss;
 
-       if (req->sae_data_len >= 4) {
-               __le16 *pos = (__le16 *) req->sae_data;
-               auth_data->sae_trans = le16_to_cpu(pos[0]);
-               auth_data->sae_status = le16_to_cpu(pos[1]);
-               memcpy(auth_data->data, req->sae_data + 4,
-                      req->sae_data_len - 4);
-               auth_data->data_len += req->sae_data_len - 4;
+       if (req->auth_data_len >= 4) {
+               if (req->auth_type == NL80211_AUTHTYPE_SAE) {
+                       __le16 *pos = (__le16 *) req->auth_data;
+
+                       auth_data->sae_trans = le16_to_cpu(pos[0]);
+                       auth_data->sae_status = le16_to_cpu(pos[1]);
+               }
+               memcpy(auth_data->data, req->auth_data + 4,
+                      req->auth_data_len - 4);
+               auth_data->data_len += req->auth_data_len - 4;
        }
 
        if (req->ie && req->ie_len) {
@@ -4692,6 +4719,21 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
                assoc_data->ie_len = req->ie_len;
        }
 
+       if (req->fils_kek) {
+               /* should already be checked in cfg80211 - so warn */
+               if (WARN_ON(req->fils_kek_len > FILS_MAX_KEK_LEN)) {
+                       err = -EINVAL;
+                       goto err_free;
+               }
+               memcpy(assoc_data->fils_kek, req->fils_kek,
+                      req->fils_kek_len);
+               assoc_data->fils_kek_len = req->fils_kek_len;
+       }
+
+       if (req->fils_nonces)
+               memcpy(assoc_data->fils_nonces, req->fils_nonces,
+                      2 * FILS_NONCE_LEN);
+
        assoc_data->bss = req->bss;
 
        if (ifmgd->req_smps == IEEE80211_SMPS_AUTOMATIC) {
index c3f610bba3fe9879cb1594e5b1caf00b8a506d6a..eede5c6db8d5a784a27da0c53fc6f34b75dcf0bb 100644 (file)
@@ -820,7 +820,7 @@ int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
                    mgmt->u.action.category == WLAN_CATEGORY_SPECTRUM_MGMT)
                        break;
                rcu_read_lock();
-               sta = sta_info_get(sdata, mgmt->da);
+               sta = sta_info_get_bss(sdata, mgmt->da);
                rcu_read_unlock();
                if (!sta)
                        return -ENOLINK;
index 6175db385ba7d085f4d2f614697f8ef7e9d914cd..eeab7250f4b978bd2af2b320ae8f430ab00cc81d 100644 (file)
@@ -1394,13 +1394,15 @@ void ieee80211_sta_uapsd_trigger(struct ieee80211_sta *pubsta, u8 tid)
        u8 ac = ieee802_1d_to_ac[tid & 7];
 
        /*
-        * If this AC is not trigger-enabled do nothing.
+        * If this AC is not trigger-enabled do nothing unless the
+        * driver is calling us after it already checked.
         *
         * NB: This could/should check a separate bitmap of trigger-
         * enabled queues, but for now we only implement uAPSD w/o
         * TSPEC changes to the ACs, so they're always the same.
         */
-       if (!(sta->sta.uapsd_queues & BIT(ac)))
+       if (!(sta->sta.uapsd_queues & ieee80211_ac_to_qos_mask[ac]) &&
+           tid != IEEE80211_NUM_TIDS)
                return;
 
        /* if we are in a service period, do nothing */
@@ -2215,7 +2217,8 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx)
             sdata->vif.type == NL80211_IFTYPE_AP_VLAN) &&
            !(sdata->flags & IEEE80211_SDATA_DONT_BRIDGE_PACKETS) &&
            (sdata->vif.type != NL80211_IFTYPE_AP_VLAN || !sdata->u.vlan.sta)) {
-               if (is_multicast_ether_addr(ehdr->h_dest)) {
+               if (is_multicast_ether_addr(ehdr->h_dest) &&
+                   ieee80211_vif_get_num_mcast_if(sdata) != 0) {
                        /*
                         * send multicast frames both to higher layers in
                         * local net stack and back to the wireless medium
@@ -2224,7 +2227,7 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx)
                        if (!xmit_skb)
                                net_info_ratelimited("%s: failed to clone multicast frame\n",
                                                    dev->name);
-               } else {
+               } else if (!is_multicast_ether_addr(ehdr->h_dest)) {
                        dsta = sta_info_get(sdata, skb->data);
                        if (dsta) {
                                /*
@@ -2298,6 +2301,8 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
        __le16 fc = hdr->frame_control;
        struct sk_buff_head frame_list;
        struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
+       struct ethhdr ethhdr;
+       const u8 *check_da = ethhdr.h_dest, *check_sa = ethhdr.h_source;
 
        if (unlikely(!ieee80211_is_data(fc)))
                return RX_CONTINUE;
@@ -2308,24 +2313,53 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
        if (!(status->rx_flags & IEEE80211_RX_AMSDU))
                return RX_CONTINUE;
 
-       if (ieee80211_has_a4(hdr->frame_control) &&
-           rx->sdata->vif.type == NL80211_IFTYPE_AP_VLAN &&
-           !rx->sdata->u.vlan.sta)
-               return RX_DROP_UNUSABLE;
+       if (unlikely(ieee80211_has_a4(hdr->frame_control))) {
+               switch (rx->sdata->vif.type) {
+               case NL80211_IFTYPE_AP_VLAN:
+                       if (!rx->sdata->u.vlan.sta)
+                               return RX_DROP_UNUSABLE;
+                       break;
+               case NL80211_IFTYPE_STATION:
+                       if (!rx->sdata->u.mgd.use_4addr)
+                               return RX_DROP_UNUSABLE;
+                       break;
+               default:
+                       return RX_DROP_UNUSABLE;
+               }
+               check_da = NULL;
+               check_sa = NULL;
+       } else switch (rx->sdata->vif.type) {
+               case NL80211_IFTYPE_AP:
+               case NL80211_IFTYPE_AP_VLAN:
+                       check_da = NULL;
+                       break;
+               case NL80211_IFTYPE_STATION:
+                       if (!rx->sta ||
+                           !test_sta_flag(rx->sta, WLAN_STA_TDLS_PEER))
+                               check_sa = NULL;
+                       break;
+               case NL80211_IFTYPE_MESH_POINT:
+                       check_sa = NULL;
+                       break;
+               default:
+                       break;
+       }
 
-       if (is_multicast_ether_addr(hdr->addr1) &&
-           ((rx->sdata->vif.type == NL80211_IFTYPE_AP_VLAN &&
-             rx->sdata->u.vlan.sta) ||
-            (rx->sdata->vif.type == NL80211_IFTYPE_STATION &&
-             rx->sdata->u.mgd.use_4addr)))
+       if (is_multicast_ether_addr(hdr->addr1))
                return RX_DROP_UNUSABLE;
 
        skb->dev = dev;
        __skb_queue_head_init(&frame_list);
 
+       if (ieee80211_data_to_8023_exthdr(skb, &ethhdr,
+                                         rx->sdata->vif.addr,
+                                         rx->sdata->vif.type))
+               return RX_DROP_UNUSABLE;
+
        ieee80211_amsdu_to_8023s(skb, &frame_list, dev->dev_addr,
                                 rx->sdata->vif.type,
-                                rx->local->hw.extra_tx_headroom, true);
+                                rx->local->hw.extra_tx_headroom,
+                                check_da, check_sa);
 
        while (!skb_queue_empty(&frame_list)) {
                rx->skb = __skb_dequeue(&frame_list);
index 78e9ecbc96e616d0f90228abb5bcda59147ea73f..1711bae4abf2f16f49f80747ae2c9f75a2fea38b 100644 (file)
@@ -688,7 +688,7 @@ static void __sta_info_recalc_tim(struct sta_info *sta, bool ignore_pending)
        }
 
        /* No need to do anything if the driver does all */
-       if (!local->ops->set_tim)
+       if (ieee80211_hw_check(&local->hw, AP_LINK_PS))
                return;
 
        if (sta->dead)
@@ -709,7 +709,7 @@ static void __sta_info_recalc_tim(struct sta_info *sta, bool ignore_pending)
        for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
                unsigned long tids;
 
-               if (ignore_for_tim & BIT(ac))
+               if (ignore_for_tim & ieee80211_ac_to_qos_mask[ac])
                        continue;
 
                indicate_tim |= !skb_queue_empty(&sta->tx_filtered[ac]) ||
@@ -1389,7 +1389,7 @@ ieee80211_sta_ps_more_data(struct sta_info *sta, u8 ignored_acs,
                return true;
 
        for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
-               if (ignored_acs & BIT(ac))
+               if (ignored_acs & ieee80211_ac_to_qos_mask[ac])
                        continue;
 
                if (!skb_queue_empty(&sta->tx_filtered[ac]) ||
@@ -1414,7 +1414,7 @@ ieee80211_sta_ps_get_frames(struct sta_info *sta, int n_frames, u8 ignored_acs,
        for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
                unsigned long tids;
 
-               if (ignored_acs & BIT(ac))
+               if (ignored_acs & ieee80211_ac_to_qos_mask[ac])
                        continue;
 
                tids = ieee80211_tids_for_ac(ac);
@@ -1482,7 +1482,7 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta,
                        BIT(find_highest_prio_tid(driver_release_tids));
 
        if (skb_queue_empty(&frames) && !driver_release_tids) {
-               int tid;
+               int tid, ac;
 
                /*
                 * For PS-Poll, this can only happen due to a race condition
@@ -1500,7 +1500,10 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta,
                 */
 
                /* This will evaluate to 1, 3, 5 or 7. */
-               tid = 7 - ((ffs(~ignored_acs) - 1) << 1);
+               for (ac = IEEE80211_AC_VO; ac < IEEE80211_NUM_ACS; ac++)
+                       if (ignored_acs & BIT(ac))
+                               continue;
+               tid = 7 - 2 * ac;
 
                ieee80211_send_null_response(sta, tid, reason, true, false);
        } else if (!driver_release_tids) {
@@ -1871,10 +1874,7 @@ int sta_info_move_state(struct sta_info *sta,
                        if (!sta->sta.support_p2p_ps)
                                ieee80211_recalc_p2p_go_ps_allowed(sta->sdata);
                } else if (sta->sta_state == IEEE80211_STA_AUTHORIZED) {
-                       if (sta->sdata->vif.type == NL80211_IFTYPE_AP ||
-                           (sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN &&
-                            !sta->sdata->u.vlan.sta))
-                               atomic_dec(&sta->sdata->bss->num_mcast_sta);
+                       ieee80211_vif_dec_num_mcast(sta->sdata);
                        clear_bit(WLAN_STA_AUTHORIZED, &sta->_flags);
                        ieee80211_clear_fast_xmit(sta);
                        ieee80211_clear_fast_rx(sta);
@@ -1882,10 +1882,7 @@ int sta_info_move_state(struct sta_info *sta,
                break;
        case IEEE80211_STA_AUTHORIZED:
                if (sta->sta_state == IEEE80211_STA_ASSOC) {
-                       if (sta->sdata->vif.type == NL80211_IFTYPE_AP ||
-                           (sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN &&
-                            !sta->sdata->u.vlan.sta))
-                               atomic_inc(&sta->sdata->bss->num_mcast_sta);
+                       ieee80211_vif_inc_num_mcast(sta->sdata);
                        set_bit(WLAN_STA_AUTHORIZED, &sta->_flags);
                        ieee80211_check_fast_xmit(sta);
                        ieee80211_check_fast_rx(sta);
index ed5fcb984a01a51ce08caa32101b50265509eb3f..dd06ef0b88614566ea4eb751a314856d38fb0e39 100644 (file)
@@ -184,7 +184,6 @@ struct tid_ampdu_tx {
  * @ssn: Starting Sequence Number expected to be aggregated.
  * @buf_size: buffer size for incoming A-MPDUs
  * @timeout: reset timer value (in TUs).
- * @dialog_token: dialog token for aggregation session
  * @rcu_head: RCU head used for freeing this struct
  * @reorder_lock: serializes access to reorder buffer, see below.
  * @auto_seq: used for offloaded BA sessions to automatically pick head_seq_and
@@ -213,7 +212,6 @@ struct tid_ampdu_rx {
        u16 ssn;
        u16 buf_size;
        u16 timeout;
-       u8 dialog_token;
        bool auto_seq;
        bool removed;
 };
@@ -225,6 +223,7 @@ struct tid_ampdu_rx {
  *     to tid_tx[idx], which are protected by the sta spinlock)
  *     tid_start_tx is also protected by sta->lock.
  * @tid_rx: aggregation info for Rx per TID -- RCU protected
+ * @tid_rx_token: dialog tokens for valid aggregation sessions
  * @tid_rx_timer_expired: bitmap indicating on which TIDs the
  *     RX timer expired until the work for it runs
  * @tid_rx_stop_requested:  bitmap indicating which BA sessions per TID the
@@ -243,6 +242,7 @@ struct sta_ampdu_mlme {
        struct mutex mtx;
        /* rx */
        struct tid_ampdu_rx __rcu *tid_rx[IEEE80211_NUM_TIDS];
+       u8 tid_rx_token[IEEE80211_NUM_TIDS];
        unsigned long tid_rx_timer_expired[BITS_TO_LONGS(IEEE80211_NUM_TIDS)];
        unsigned long tid_rx_stop_requested[BITS_TO_LONGS(IEEE80211_NUM_TIDS)];
        unsigned long agg_session_valid[BITS_TO_LONGS(IEEE80211_NUM_TIDS)];
index 1c56abc496272bb58d71639975e4706c266f2066..2c21b7039136fe467845df5494432b58a6a226fc 100644 (file)
@@ -331,9 +331,8 @@ ieee80211_tx_h_check_assoc(struct ieee80211_tx_data *tx)
                        I802_DEBUG_INC(tx->local->tx_handlers_drop_not_assoc);
                        return TX_DROP;
                }
-       } else if (unlikely(tx->sdata->vif.type == NL80211_IFTYPE_AP &&
-                           ieee80211_is_data(hdr->frame_control) &&
-                           !atomic_read(&tx->sdata->u.ap.num_mcast_sta))) {
+       } else if (unlikely(ieee80211_is_data(hdr->frame_control) &&
+                           ieee80211_vif_get_num_mcast_if(tx->sdata) == 0)) {
                /*
                 * No associated STAs - no need to send multicast
                 * frames.
@@ -935,7 +934,7 @@ ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx)
        if (info->flags & IEEE80211_TX_CTL_DONTFRAG)
                return TX_CONTINUE;
 
-       if (tx->local->ops->set_frag_threshold)
+       if (ieee80211_hw_check(&tx->local->hw, SUPPORTS_TX_FRAG))
                return TX_CONTINUE;
 
        /*
@@ -1501,7 +1500,6 @@ static bool ieee80211_queue_skb(struct ieee80211_local *local,
                                struct sta_info *sta,
                                struct sk_buff *skb)
 {
-       struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
        struct fq *fq = &local->fq;
        struct ieee80211_vif *vif;
        struct txq_info *txqi;
@@ -1526,8 +1524,6 @@ static bool ieee80211_queue_skb(struct ieee80211_local *local,
        if (!txqi)
                return false;
 
-       info->control.vif = vif;
-
        spin_lock_bh(&fq->lock);
        ieee80211_txq_enqueue(local, txqi, skb);
        spin_unlock_bh(&fq->lock);
@@ -2801,7 +2797,7 @@ void ieee80211_check_fast_xmit(struct sta_info *sta)
 
        /* fast-xmit doesn't handle fragmentation at all */
        if (local->hw.wiphy->frag_threshold != (u32)-1 &&
-           !local->ops->set_frag_threshold)
+           !ieee80211_hw_check(&local->hw, SUPPORTS_TX_FRAG))
                goto out;
 
        rcu_read_lock();
@@ -3060,11 +3056,12 @@ static bool ieee80211_amsdu_prepare_head(struct ieee80211_sub_if_data *sdata,
        struct ieee80211_local *local = sdata->local;
        struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
        struct ieee80211_hdr *hdr;
-       struct ethhdr amsdu_hdr;
+       struct ethhdr *amsdu_hdr;
        int hdr_len = fast_tx->hdr_len - sizeof(rfc1042_header);
        int subframe_len = skb->len - hdr_len;
        void *data;
-       u8 *qc;
+       u8 *qc, *h_80211_src, *h_80211_dst;
+       const u8 *bssid;
 
        if (info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE)
                return false;
@@ -3072,19 +3069,44 @@ static bool ieee80211_amsdu_prepare_head(struct ieee80211_sub_if_data *sdata,
        if (info->control.flags & IEEE80211_TX_CTRL_AMSDU)
                return true;
 
-       if (!ieee80211_amsdu_realloc_pad(local, skb, sizeof(amsdu_hdr),
+       if (!ieee80211_amsdu_realloc_pad(local, skb, sizeof(*amsdu_hdr),
                                         &subframe_len))
                return false;
 
-       amsdu_hdr.h_proto = cpu_to_be16(subframe_len);
-       memcpy(amsdu_hdr.h_source, skb->data + fast_tx->sa_offs, ETH_ALEN);
-       memcpy(amsdu_hdr.h_dest, skb->data + fast_tx->da_offs, ETH_ALEN);
+       data = skb_push(skb, sizeof(*amsdu_hdr));
+       memmove(data, data + sizeof(*amsdu_hdr), hdr_len);
+       hdr = data;
+       amsdu_hdr = data + hdr_len;
+       /* h_80211_src/dst is addr* field within hdr */
+       h_80211_src = data + fast_tx->sa_offs;
+       h_80211_dst = data + fast_tx->da_offs;
+
+       amsdu_hdr->h_proto = cpu_to_be16(subframe_len);
+       ether_addr_copy(amsdu_hdr->h_source, h_80211_src);
+       ether_addr_copy(amsdu_hdr->h_dest, h_80211_dst);
+
+       /* according to IEEE 802.11-2012 8.3.2 table 8-19, the outer SA/DA
+        * fields needs to be changed to BSSID for A-MSDU frames depending
+        * on FromDS/ToDS values.
+        */
+       switch (sdata->vif.type) {
+       case NL80211_IFTYPE_STATION:
+               bssid = sdata->u.mgd.bssid;
+               break;
+       case NL80211_IFTYPE_AP:
+       case NL80211_IFTYPE_AP_VLAN:
+               bssid = sdata->vif.addr;
+               break;
+       default:
+               bssid = NULL;
+       }
 
-       data = skb_push(skb, sizeof(amsdu_hdr));
-       memmove(data, data + sizeof(amsdu_hdr), hdr_len);
-       memcpy(data + hdr_len, &amsdu_hdr, sizeof(amsdu_hdr));
+       if (bssid && ieee80211_has_fromds(hdr->frame_control))
+               ether_addr_copy(h_80211_src, bssid);
+
+       if (bssid && ieee80211_has_tods(hdr->frame_control))
+               ether_addr_copy(h_80211_dst, bssid);
 
-       hdr = data;
        qc = ieee80211_get_qos_ctl(hdr);
        *qc |= IEEE80211_QOS_CTL_A_MSDU_PRESENT;
 
@@ -3213,7 +3235,6 @@ static void ieee80211_xmit_fast_finish(struct ieee80211_sub_if_data *sdata,
 
        if (hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) {
                tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK;
-               *ieee80211_get_qos_ctl(hdr) = tid;
                hdr->seq_ctrl = ieee80211_tx_next_seq(sta, tid);
        } else {
                info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ;
@@ -3338,6 +3359,11 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata,
                      (tid_tx ? IEEE80211_TX_CTL_AMPDU : 0);
        info->control.flags = IEEE80211_TX_CTRL_FAST_XMIT;
 
+       if (hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) {
+               tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK;
+               *ieee80211_get_qos_ctl(hdr) = tid;
+       }
+
        __skb_queue_head_init(&tx.skbs);
 
        tx.flags = IEEE80211_TX_UNICAST;
@@ -3426,6 +3452,11 @@ begin:
                goto begin;
        }
 
+       if (test_bit(IEEE80211_TXQ_AMPDU, &txqi->flags))
+               info->flags |= IEEE80211_TX_CTL_AMPDU;
+       else
+               info->flags &= ~IEEE80211_TX_CTL_AMPDU;
+
        if (info->control.flags & IEEE80211_TX_CTRL_FAST_XMIT) {
                struct sta_info *sta = container_of(txq->sta, struct sta_info,
                                                    sta);
index 545c79a42a779b949fe4cd62afee7e36685d358c..ac59fbd280dff8f712fb9dbc841147e4642ec607 100644 (file)
@@ -3308,10 +3308,11 @@ int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata,
        struct ieee80211_local *local = sdata->local;
        struct ieee80211_sub_if_data *sdata_iter;
        enum nl80211_iftype iftype = sdata->wdev.iftype;
-       int num[NUM_NL80211_IFTYPES];
        struct ieee80211_chanctx *ctx;
-       int num_different_channels = 0;
        int total = 1;
+       struct iface_combination_params params = {
+               .radar_detect = radar_detect,
+       };
 
        lockdep_assert_held(&local->chanctx_mtx);
 
@@ -3322,12 +3323,19 @@ int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata,
                    !chandef->chan))
                return -EINVAL;
 
-       if (chandef)
-               num_different_channels = 1;
-
        if (WARN_ON(iftype >= NUM_NL80211_IFTYPES))
                return -EINVAL;
 
+       if (sdata->vif.type == NL80211_IFTYPE_AP ||
+           sdata->vif.type == NL80211_IFTYPE_MESH_POINT) {
+               /*
+                * always passing this is harmless, since it'll be the
+                * same value that cfg80211 finds if it finds the same
+                * interface ... and that's always allowed
+                */
+               params.new_beacon_int = sdata->vif.bss_conf.beacon_int;
+       }
+
        /* Always allow software iftypes */
        if (local->hw.wiphy->software_iftypes & BIT(iftype)) {
                if (radar_detect)
@@ -3335,24 +3343,26 @@ int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata,
                return 0;
        }
 
-       memset(num, 0, sizeof(num));
+       if (chandef)
+               params.num_different_channels = 1;
 
        if (iftype != NL80211_IFTYPE_UNSPECIFIED)
-               num[iftype] = 1;
+               params.iftype_num[iftype] = 1;
 
        list_for_each_entry(ctx, &local->chanctx_list, list) {
                if (ctx->replace_state == IEEE80211_CHANCTX_WILL_BE_REPLACED)
                        continue;
-               radar_detect |= ieee80211_chanctx_radar_detect(local, ctx);
+               params.radar_detect |=
+                       ieee80211_chanctx_radar_detect(local, ctx);
                if (ctx->mode == IEEE80211_CHANCTX_EXCLUSIVE) {
-                       num_different_channels++;
+                       params.num_different_channels++;
                        continue;
                }
                if (chandef && chanmode == IEEE80211_CHANCTX_SHARED &&
                    cfg80211_chandef_compatible(chandef,
                                                &ctx->conf.def))
                        continue;
-               num_different_channels++;
+               params.num_different_channels++;
        }
 
        list_for_each_entry_rcu(sdata_iter, &local->interfaces, list) {
@@ -3365,16 +3375,14 @@ int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata,
                    local->hw.wiphy->software_iftypes & BIT(wdev_iter->iftype))
                        continue;
 
-               num[wdev_iter->iftype]++;
+               params.iftype_num[wdev_iter->iftype]++;
                total++;
        }
 
-       if (total == 1 && !radar_detect)
+       if (total == 1 && !params.radar_detect)
                return 0;
 
-       return cfg80211_check_combinations(local->hw.wiphy,
-                                          num_different_channels,
-                                          radar_detect, num);
+       return cfg80211_check_combinations(local->hw.wiphy, &params);
 }
 
 static void
@@ -3390,12 +3398,10 @@ ieee80211_iter_max_chans(const struct ieee80211_iface_combination *c,
 int ieee80211_max_num_channels(struct ieee80211_local *local)
 {
        struct ieee80211_sub_if_data *sdata;
-       int num[NUM_NL80211_IFTYPES] = {};
        struct ieee80211_chanctx *ctx;
-       int num_different_channels = 0;
-       u8 radar_detect = 0;
        u32 max_num_different_channels = 1;
        int err;
+       struct iface_combination_params params = {0};
 
        lockdep_assert_held(&local->chanctx_mtx);
 
@@ -3403,17 +3409,17 @@ int ieee80211_max_num_channels(struct ieee80211_local *local)
                if (ctx->replace_state == IEEE80211_CHANCTX_WILL_BE_REPLACED)
                        continue;
 
-               num_different_channels++;
+               params.num_different_channels++;
 
-               radar_detect |= ieee80211_chanctx_radar_detect(local, ctx);
+               params.radar_detect |=
+                       ieee80211_chanctx_radar_detect(local, ctx);
        }
 
        list_for_each_entry_rcu(sdata, &local->interfaces, list)
-               num[sdata->wdev.iftype]++;
+               params.iftype_num[sdata->wdev.iftype]++;
 
-       err = cfg80211_iter_combinations(local->hw.wiphy,
-                                        num_different_channels, radar_detect,
-                                        num, ieee80211_iter_max_chans,
+       err = cfg80211_iter_combinations(local->hw.wiphy, &params,
+                                        ieee80211_iter_max_chans,
                                         &max_num_different_channels);
        if (err < 0)
                return err;
@@ -3456,3 +3462,10 @@ void ieee80211_txq_get_depth(struct ieee80211_txq *txq,
                *byte_cnt = txqi->tin.backlog_bytes + frag_bytes;
 }
 EXPORT_SYMBOL(ieee80211_txq_get_depth);
+
+const u8 ieee80211_ac_to_qos_mask[IEEE80211_NUM_ACS] = {
+       IEEE80211_WMM_IE_STA_QOSINFO_AC_VO,
+       IEEE80211_WMM_IE_STA_QOSINFO_AC_VI,
+       IEEE80211_WMM_IE_STA_QOSINFO_AC_BE,
+       IEEE80211_WMM_IE_STA_QOSINFO_AC_BK
+};
index ee715764a828954e4c8d90e3bfbcbc697c100ec0..6832bf6ab69fe012ea4eeb3c02b79523083cdc58 100644 (file)
@@ -270,6 +270,22 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,
                vht_cap->vht_mcs.tx_mcs_map |= cpu_to_le16(peer_tx << i * 2);
        }
 
+       /*
+        * This is a workaround for VHT-enabled STAs which break the spec
+        * and have the VHT-MCS Rx map filled in with value 3 for all eight
+        * spacial streams, an example is AR9462.
+        *
+        * As per spec, in section 22.1.1 Introduction to the VHT PHY
+        * A VHT STA shall support at least single spactial stream VHT-MCSs
+        * 0 to 7 (transmit and receive) in all supported channel widths.
+        */
+       if (vht_cap->vht_mcs.rx_mcs_map == cpu_to_le16(0xFFFF)) {
+               vht_cap->vht_supported = false;
+               sdata_info(sdata, "Ignoring VHT IE from %pM due to invalid rx_mcs_map\n",
+                          sta->addr);
+               return;
+       }
+
        /* finally set up the bandwidth */
        switch (vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) {
        case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ:
index 9eb0aee9105b398b442380315ec758a430a103b1..3e3d3014e9ab6d026a13232ce6cb930b9fc87093 100644 (file)
@@ -236,26 +236,35 @@ void ieee80211_set_qos_hdr(struct ieee80211_sub_if_data *sdata,
 {
        struct ieee80211_hdr *hdr = (void *)skb->data;
        struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+       u8 tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK;
+       u8 flags;
        u8 *p;
-       u8 ack_policy, tid;
 
        if (!ieee80211_is_data_qos(hdr->frame_control))
                return;
 
        p = ieee80211_get_qos_ctl(hdr);
-       tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK;
 
-       /* preserve EOSP bit */
-       ack_policy = *p & IEEE80211_QOS_CTL_EOSP;
+       /* set up the first byte */
+
+       /*
+        * preserve everything but the TID and ACK policy
+        * (which we both write here)
+        */
+       flags = *p & ~(IEEE80211_QOS_CTL_TID_MASK |
+                      IEEE80211_QOS_CTL_ACK_POLICY_MASK);
 
        if (is_multicast_ether_addr(hdr->addr1) ||
            sdata->noack_map & BIT(tid)) {
-               ack_policy |= IEEE80211_QOS_CTL_ACK_POLICY_NOACK;
+               flags |= IEEE80211_QOS_CTL_ACK_POLICY_NOACK;
                info->flags |= IEEE80211_TX_CTL_NO_ACK;
        }
 
-       /* qos header is 2 bytes */
-       *p++ = ack_policy | tid;
+       *p = flags | tid;
+
+       /* set up the second byte */
+       p++;
+
        if (ieee80211_vif_is_mesh(&sdata->vif)) {
                /* preserve RSPI and Mesh PS Level bit */
                *p &= ((IEEE80211_QOS_CTL_RSPI |
index b48c1e13e28170edd0202a6bb21c6e2fdb488f47..8af6dd388d1195540fdaf66f7ac55a452620ad5f 100644 (file)
@@ -57,7 +57,7 @@ ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx)
 
        if (info->control.hw_key &&
            (info->flags & IEEE80211_TX_CTL_DONTFRAG ||
-            tx->local->ops->set_frag_threshold) &&
+            ieee80211_hw_check(&tx->local->hw, SUPPORTS_TX_FRAG)) &&
            !(tx->key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC)) {
                /* hwaccel - with no need for SW-generated MMIC */
                return TX_CONTINUE;
@@ -405,7 +405,7 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb,
        u8 *pos;
        u8 pn[6];
        u64 pn64;
-       u8 aad[2 * AES_BLOCK_SIZE];
+       u8 aad[CCM_AAD_LEN];
        u8 b_0[AES_BLOCK_SIZE];
 
        if (info->control.hw_key &&
@@ -461,10 +461,8 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb,
 
        pos += IEEE80211_CCMP_HDR_LEN;
        ccmp_special_blocks(skb, pn, b_0, aad);
-       ieee80211_aes_ccm_encrypt(key->u.ccmp.tfm, b_0, aad, pos, len,
-                                 skb_put(skb, mic_len), mic_len);
-
-       return 0;
+       return ieee80211_aes_ccm_encrypt(key->u.ccmp.tfm, b_0, aad, pos, len,
+                                        skb_put(skb, mic_len), mic_len);
 }
 
 
@@ -639,7 +637,7 @@ static int gcmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
        u8 *pos;
        u8 pn[6];
        u64 pn64;
-       u8 aad[2 * AES_BLOCK_SIZE];
+       u8 aad[GCM_AAD_LEN];
        u8 j_0[AES_BLOCK_SIZE];
 
        if (info->control.hw_key &&
@@ -696,10 +694,8 @@ static int gcmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
 
        pos += IEEE80211_GCMP_HDR_LEN;
        gcmp_special_blocks(skb, pn, j_0, aad);
-       ieee80211_aes_gcm_encrypt(key->u.gcmp.tfm, j_0, aad, pos, len,
-                                 skb_put(skb, IEEE80211_GCMP_MIC_LEN));
-
-       return 0;
+       return ieee80211_aes_gcm_encrypt(key->u.gcmp.tfm, j_0, aad, pos, len,
+                                        skb_put(skb, IEEE80211_GCMP_MIC_LEN));
 }
 
 ieee80211_tx_result
@@ -1123,9 +1119,9 @@ ieee80211_crypto_aes_gmac_encrypt(struct ieee80211_tx_data *tx)
        struct ieee80211_key *key = tx->key;
        struct ieee80211_mmie_16 *mmie;
        struct ieee80211_hdr *hdr;
-       u8 aad[20];
+       u8 aad[GMAC_AAD_LEN];
        u64 pn64;
-       u8 nonce[12];
+       u8 nonce[GMAC_NONCE_LEN];
 
        if (WARN_ON(skb_queue_len(&tx->skbs) != 1))
                return TX_DROP;
@@ -1171,7 +1167,7 @@ ieee80211_crypto_aes_gmac_decrypt(struct ieee80211_rx_data *rx)
        struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
        struct ieee80211_key *key = rx->key;
        struct ieee80211_mmie_16 *mmie;
-       u8 aad[20], mic[16], ipn[6], nonce[12];
+       u8 aad[GMAC_AAD_LEN], mic[GMAC_MIC_LEN], ipn[6], nonce[GMAC_NONCE_LEN];
        struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
 
        if (!ieee80211_is_mgmt(hdr->frame_control))
index 13290a70fa714f0c0ce7083c223d835da04d77d8..1308a56f259149e7aee910f5e84dfe3e14819ec9 100644 (file)
@@ -246,6 +246,7 @@ enum {
        ncsi_dev_state_config_gls,
        ncsi_dev_state_config_done,
        ncsi_dev_state_suspend_select   = 0x0401,
+       ncsi_dev_state_suspend_gls,
        ncsi_dev_state_suspend_dcnt,
        ncsi_dev_state_suspend_dc,
        ncsi_dev_state_suspend_deselect,
@@ -264,6 +265,7 @@ struct ncsi_dev_priv {
 #endif
        unsigned int        package_num;     /* Number of packages         */
        struct list_head    packages;        /* List of packages           */
+       struct ncsi_channel *hot_channel;    /* Channel was ever active    */
        struct ncsi_request requests[256];   /* Request table              */
        unsigned int        request_id;      /* Last used request ID       */
 #define NCSI_REQ_START_IDX     1
index b41a6617d4980de34604c7a2d6db5047a5029126..6898e7229285a6720115a37d58673f569d4a9bff 100644 (file)
@@ -141,23 +141,35 @@ static int ncsi_aen_handler_hncdsc(struct ncsi_dev_priv *ndp,
                return -ENODEV;
 
        /* If the channel is active one, we need reconfigure it */
+       spin_lock_irqsave(&nc->lock, flags);
        ncm = &nc->modes[NCSI_MODE_LINK];
        hncdsc = (struct ncsi_aen_hncdsc_pkt *)h;
        ncm->data[3] = ntohl(hncdsc->status);
        if (!list_empty(&nc->link) ||
-           nc->state != NCSI_CHANNEL_ACTIVE ||
-           (ncm->data[3] & 0x1))
+           nc->state != NCSI_CHANNEL_ACTIVE) {
+               spin_unlock_irqrestore(&nc->lock, flags);
                return 0;
+       }
 
-       if (ndp->flags & NCSI_DEV_HWA)
+       spin_unlock_irqrestore(&nc->lock, flags);
+       if (!(ndp->flags & NCSI_DEV_HWA) && !(ncm->data[3] & 0x1))
                ndp->flags |= NCSI_DEV_RESHUFFLE;
 
        /* If this channel is the active one and the link doesn't
         * work, we have to choose another channel to be active one.
         * The logic here is exactly similar to what we do when link
         * is down on the active channel.
+        *
+        * On the other hand, we need configure it when host driver
+        * state on the active channel becomes ready.
         */
        ncsi_stop_channel_monitor(nc);
+
+       spin_lock_irqsave(&nc->lock, flags);
+       nc->state = (ncm->data[3] & 0x1) ? NCSI_CHANNEL_INACTIVE :
+                                          NCSI_CHANNEL_ACTIVE;
+       spin_unlock_irqrestore(&nc->lock, flags);
+
        spin_lock_irqsave(&ndp->lock, flags);
        list_add_tail_rcu(&nc->link, &ndp->channel_queue);
        spin_unlock_irqrestore(&ndp->lock, flags);
index 5e509e547c2ddf1867639e01541eb37b1348d663..a3bd5fa8ad093a3fb7533f00186111bf9a19c2ed 100644 (file)
@@ -540,42 +540,86 @@ static void ncsi_suspend_channel(struct ncsi_dev_priv *ndp)
                nd->state = ncsi_dev_state_suspend_select;
                /* Fall through */
        case ncsi_dev_state_suspend_select:
-       case ncsi_dev_state_suspend_dcnt:
-       case ncsi_dev_state_suspend_dc:
-       case ncsi_dev_state_suspend_deselect:
                ndp->pending_req_num = 1;
 
-               np = ndp->active_package;
-               nc = ndp->active_channel;
+               nca.type = NCSI_PKT_CMD_SP;
                nca.package = np->id;
-               if (nd->state == ncsi_dev_state_suspend_select) {
-                       nca.type = NCSI_PKT_CMD_SP;
-                       nca.channel = NCSI_RESERVED_CHANNEL;
-                       if (ndp->flags & NCSI_DEV_HWA)
-                               nca.bytes[0] = 0;
-                       else
-                               nca.bytes[0] = 1;
+               nca.channel = NCSI_RESERVED_CHANNEL;
+               if (ndp->flags & NCSI_DEV_HWA)
+                       nca.bytes[0] = 0;
+               else
+                       nca.bytes[0] = 1;
+
+               /* To retrieve the last link states of channels in current
+                * package when current active channel needs fail over to
+                * another one. It means we will possibly select another
+                * channel as next active one. The link states of channels
+                * are most important factor of the selection. So we need
+                * accurate link states. Unfortunately, the link states on
+                * inactive channels can't be updated with LSC AEN in time.
+                */
+               if (ndp->flags & NCSI_DEV_RESHUFFLE)
+                       nd->state = ncsi_dev_state_suspend_gls;
+               else
                        nd->state = ncsi_dev_state_suspend_dcnt;
-               } else if (nd->state == ncsi_dev_state_suspend_dcnt) {
-                       nca.type = NCSI_PKT_CMD_DCNT;
-                       nca.channel = nc->id;
-                       nd->state = ncsi_dev_state_suspend_dc;
-               } else if (nd->state == ncsi_dev_state_suspend_dc) {
-                       nca.type = NCSI_PKT_CMD_DC;
+               ret = ncsi_xmit_cmd(&nca);
+               if (ret)
+                       goto error;
+
+               break;
+       case ncsi_dev_state_suspend_gls:
+               ndp->pending_req_num = np->channel_num;
+
+               nca.type = NCSI_PKT_CMD_GLS;
+               nca.package = np->id;
+
+               nd->state = ncsi_dev_state_suspend_dcnt;
+               NCSI_FOR_EACH_CHANNEL(np, nc) {
                        nca.channel = nc->id;
-                       nca.bytes[0] = 1;
-                       nd->state = ncsi_dev_state_suspend_deselect;
-               } else if (nd->state == ncsi_dev_state_suspend_deselect) {
-                       nca.type = NCSI_PKT_CMD_DP;
-                       nca.channel = NCSI_RESERVED_CHANNEL;
-                       nd->state = ncsi_dev_state_suspend_done;
+                       ret = ncsi_xmit_cmd(&nca);
+                       if (ret)
+                               goto error;
                }
 
+               break;
+       case ncsi_dev_state_suspend_dcnt:
+               ndp->pending_req_num = 1;
+
+               nca.type = NCSI_PKT_CMD_DCNT;
+               nca.package = np->id;
+               nca.channel = nc->id;
+
+               nd->state = ncsi_dev_state_suspend_dc;
                ret = ncsi_xmit_cmd(&nca);
-               if (ret) {
-                       nd->state = ncsi_dev_state_functional;
-                       return;
-               }
+               if (ret)
+                       goto error;
+
+               break;
+       case ncsi_dev_state_suspend_dc:
+               ndp->pending_req_num = 1;
+
+               nca.type = NCSI_PKT_CMD_DC;
+               nca.package = np->id;
+               nca.channel = nc->id;
+               nca.bytes[0] = 1;
+
+               nd->state = ncsi_dev_state_suspend_deselect;
+               ret = ncsi_xmit_cmd(&nca);
+               if (ret)
+                       goto error;
+
+               break;
+       case ncsi_dev_state_suspend_deselect:
+               ndp->pending_req_num = 1;
+
+               nca.type = NCSI_PKT_CMD_DP;
+               nca.package = np->id;
+               nca.channel = NCSI_RESERVED_CHANNEL;
+
+               nd->state = ncsi_dev_state_suspend_done;
+               ret = ncsi_xmit_cmd(&nca);
+               if (ret)
+                       goto error;
 
                break;
        case ncsi_dev_state_suspend_done:
@@ -589,6 +633,10 @@ static void ncsi_suspend_channel(struct ncsi_dev_priv *ndp)
                netdev_warn(nd->dev, "Wrong NCSI state 0x%x in suspend\n",
                            nd->state);
        }
+
+       return;
+error:
+       nd->state = ncsi_dev_state_functional;
 }
 
 static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
@@ -597,6 +645,7 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
        struct net_device *dev = nd->dev;
        struct ncsi_package *np = ndp->active_package;
        struct ncsi_channel *nc = ndp->active_channel;
+       struct ncsi_channel *hot_nc = NULL;
        struct ncsi_cmd_arg nca;
        unsigned char index;
        unsigned long flags;
@@ -702,12 +751,20 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
                break;
        case ncsi_dev_state_config_done:
                spin_lock_irqsave(&nc->lock, flags);
-               if (nc->modes[NCSI_MODE_LINK].data[2] & 0x1)
+               if (nc->modes[NCSI_MODE_LINK].data[2] & 0x1) {
+                       hot_nc = nc;
                        nc->state = NCSI_CHANNEL_ACTIVE;
-               else
+               } else {
+                       hot_nc = NULL;
                        nc->state = NCSI_CHANNEL_INACTIVE;
+               }
                spin_unlock_irqrestore(&nc->lock, flags);
 
+               /* Update the hot channel */
+               spin_lock_irqsave(&ndp->lock, flags);
+               ndp->hot_channel = hot_nc;
+               spin_unlock_irqrestore(&ndp->lock, flags);
+
                ncsi_start_channel_monitor(nc);
                ncsi_process_next_channel(ndp);
                break;
@@ -725,10 +782,14 @@ error:
 static int ncsi_choose_active_channel(struct ncsi_dev_priv *ndp)
 {
        struct ncsi_package *np;
-       struct ncsi_channel *nc, *found;
+       struct ncsi_channel *nc, *found, *hot_nc;
        struct ncsi_channel_mode *ncm;
        unsigned long flags;
 
+       spin_lock_irqsave(&ndp->lock, flags);
+       hot_nc = ndp->hot_channel;
+       spin_unlock_irqrestore(&ndp->lock, flags);
+
        /* The search is done once an inactive channel with up
         * link is found.
         */
@@ -746,6 +807,9 @@ static int ncsi_choose_active_channel(struct ncsi_dev_priv *ndp)
                        if (!found)
                                found = nc;
 
+                       if (nc == hot_nc)
+                               found = nc;
+
                        ncm = &nc->modes[NCSI_MODE_LINK];
                        if (ncm->data[2] & 0x1) {
                                spin_unlock_irqrestore(&nc->lock, flags);
index e8d56d9a4df2cc2d92ebeb397aadd9869982c0cd..44410d30d4614d60d328b4fc04aee02c4f8b3ebc 100644 (file)
@@ -57,6 +57,10 @@ config NF_CONNTRACK
 config NF_LOG_COMMON
        tristate
 
+config NF_LOG_NETDEV
+       tristate "Netdev packet logging"
+       select NF_LOG_COMMON
+
 if NF_CONNTRACK
 
 config NF_CONNTRACK_MARK
@@ -474,6 +478,12 @@ config NFT_META
          This option adds the "meta" expression that you can use to match and
          to set packet metainformation such as the packet mark.
 
+config NFT_RT
+       tristate "Netfilter nf_tables routing module"
+       help
+         This option adds the "rt" expression that you can use to match
+         packet routing information such as the packet nexthop.
+
 config NFT_NUMGEN
        tristate "Netfilter nf_tables number generator module"
        help
@@ -581,6 +591,19 @@ config NFT_HASH
          This option adds the "hash" expression that you can use to perform
          a hash operation on registers.
 
+config NFT_FIB
+       tristate
+
+config NFT_FIB_INET
+       depends on NF_TABLES_INET
+       depends on NFT_FIB_IPV4
+       depends on NFT_FIB_IPV6
+       tristate "Netfilter nf_tables fib inet support"
+       help
+         This option allows using the FIB expression from the inet table.
+         The lookup will be delegated to the IPv4 or IPv6 FIB depending
+         on the protocol of the packet.
+
 if NF_TABLES_NETDEV
 
 config NF_DUP_NETDEV
@@ -1409,9 +1432,10 @@ config NETFILTER_XT_MATCH_SOCKET
        tristate '"socket" match support'
        depends on NETFILTER_XTABLES
        depends on NETFILTER_ADVANCED
-       depends on !NF_CONNTRACK || NF_CONNTRACK
        depends on IPV6 || IPV6=n
        depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n
+       depends on NF_SOCKET_IPV4
+       depends on NF_SOCKET_IPV6
        select NF_DEFRAG_IPV4
        select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES != n
        help
index c23c3c84416f7e3cdffd43d158a8722747190acb..5bbf767672ec042fe1a4de7423007a00d4ae0be7 100644 (file)
@@ -48,6 +48,9 @@ nf_nat-y      := nf_nat_core.o nf_nat_proto_unknown.o nf_nat_proto_common.o \
 # generic transport layer logging
 obj-$(CONFIG_NF_LOG_COMMON) += nf_log_common.o
 
+# packet logging for netdev family
+obj-$(CONFIG_NF_LOG_NETDEV) += nf_log_netdev.o
+
 obj-$(CONFIG_NF_NAT) += nf_nat.o
 obj-$(CONFIG_NF_NAT_REDIRECT) += nf_nat_redirect.o
 
@@ -81,6 +84,7 @@ obj-$(CONFIG_NF_TABLES_NETDEV)        += nf_tables_netdev.o
 obj-$(CONFIG_NFT_COMPAT)       += nft_compat.o
 obj-$(CONFIG_NFT_EXTHDR)       += nft_exthdr.o
 obj-$(CONFIG_NFT_META)         += nft_meta.o
+obj-$(CONFIG_NFT_RT)           += nft_rt.o
 obj-$(CONFIG_NFT_NUMGEN)       += nft_numgen.o
 obj-$(CONFIG_NFT_CT)           += nft_ct.o
 obj-$(CONFIG_NFT_LIMIT)                += nft_limit.o
@@ -96,6 +100,8 @@ obj-$(CONFIG_NFT_LOG)                += nft_log.o
 obj-$(CONFIG_NFT_MASQ)         += nft_masq.o
 obj-$(CONFIG_NFT_REDIR)                += nft_redir.o
 obj-$(CONFIG_NFT_HASH)         += nft_hash.o
+obj-$(CONFIG_NFT_FIB)          += nft_fib.o
+obj-$(CONFIG_NFT_FIB_INET)     += nft_fib_inet.o
 
 # nf_tables netdev
 obj-$(CONFIG_NFT_DUP_NETDEV)   += nft_dup_netdev.o
index fcb5d1df11e99b61351e8e381626c96e6ee1820b..de30e08d58f2fe848549e718757a403b0b8cf2ba 100644 (file)
@@ -302,77 +302,40 @@ void _nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n)
 }
 EXPORT_SYMBOL(_nf_unregister_hooks);
 
-unsigned int nf_iterate(struct sk_buff *skb,
-                       struct nf_hook_state *state,
-                       struct nf_hook_entry **entryp)
-{
-       unsigned int verdict;
-
-       /*
-        * The caller must not block between calls to this
-        * function because of risk of continuing from deleted element.
-        */
-       while (*entryp) {
-               if (state->thresh > (*entryp)->ops.priority) {
-                       *entryp = rcu_dereference((*entryp)->next);
-                       continue;
-               }
-
-               /* Optimization: we don't need to hold module
-                  reference here, since function can't sleep. --RR */
-repeat:
-               verdict = (*entryp)->ops.hook((*entryp)->ops.priv, skb, state);
-               if (verdict != NF_ACCEPT) {
-#ifdef CONFIG_NETFILTER_DEBUG
-                       if (unlikely((verdict & NF_VERDICT_MASK)
-                                                       > NF_MAX_VERDICT)) {
-                               NFDEBUG("Evil return from %p(%u).\n",
-                                       (*entryp)->ops.hook, state->hook);
-                               *entryp = rcu_dereference((*entryp)->next);
-                               continue;
-                       }
-#endif
-                       if (verdict != NF_REPEAT)
-                               return verdict;
-                       goto repeat;
-               }
-               *entryp = rcu_dereference((*entryp)->next);
-       }
-       return NF_ACCEPT;
-}
-
-
 /* Returns 1 if okfn() needs to be executed by the caller,
  * -EPERM for NF_DROP, 0 otherwise.  Caller must hold rcu_read_lock. */
-int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state)
+int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
+                struct nf_hook_entry *entry)
 {
-       struct nf_hook_entry *entry;
        unsigned int verdict;
-       int ret = 0;
-
-       entry = rcu_dereference(state->hook_entries);
-next_hook:
-       verdict = nf_iterate(skb, state, &entry);
-       if (verdict == NF_ACCEPT || verdict == NF_STOP) {
-               ret = 1;
-       } else if ((verdict & NF_VERDICT_MASK) == NF_DROP) {
-               kfree_skb(skb);
-               ret = NF_DROP_GETERR(verdict);
-               if (ret == 0)
-                       ret = -EPERM;
-       } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) {
-               int err;
-
-               RCU_INIT_POINTER(state->hook_entries, entry);
-               err = nf_queue(skb, state, verdict >> NF_VERDICT_QBITS);
-               if (err < 0) {
-                       if (err == -ESRCH &&
-                          (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS))
-                               goto next_hook;
+       int ret;
+
+       do {
+               verdict = entry->ops.hook(entry->ops.priv, skb, state);
+               switch (verdict & NF_VERDICT_MASK) {
+               case NF_ACCEPT:
+                       entry = rcu_dereference(entry->next);
+                       break;
+               case NF_DROP:
                        kfree_skb(skb);
+                       ret = NF_DROP_GETERR(verdict);
+                       if (ret == 0)
+                               ret = -EPERM;
+                       return ret;
+               case NF_QUEUE:
+                       ret = nf_queue(skb, state, &entry, verdict);
+                       if (ret == 1 && entry)
+                               continue;
+                       return ret;
+               default:
+                       /* Implicit handling for NF_STOLEN, as well as any other
+                        * non conventional verdicts.
+                        */
+                       return 0;
                }
-       }
-       return ret;
+       } while (entry);
+
+       return 1;
 }
 EXPORT_SYMBOL(nf_hook_slow);
 
index 234a8ec82076803a1c2ca75de9c7a2406363954b..4083a8051f0f7a57fd524f478d94d99d32f37fdc 100644 (file)
@@ -99,6 +99,15 @@ config IP_SET_HASH_IPPORTNET
 
          To compile it as a module, choose M here.  If unsure, say N.
 
+config IP_SET_HASH_IPMAC
+       tristate "hash:ip,mac set support"
+       depends on IP_SET
+       help
+         This option adds the hash:ip,mac set type support, by which
+         one can store IPv4/IPv6 address and MAC (ethernet address) pairs in a set.
+
+         To compile it as a module, choose M here.  If unsure, say N.
+
 config IP_SET_HASH_MAC
        tristate "hash:mac set support"
        depends on IP_SET
index 3dbd5e95848947230f80692cb26acb9437b3106c..28ec148df02d0b094e1cd854b6af575366ae3653 100644 (file)
@@ -14,6 +14,7 @@ obj-$(CONFIG_IP_SET_BITMAP_PORT) += ip_set_bitmap_port.o
 
 # hash types
 obj-$(CONFIG_IP_SET_HASH_IP) += ip_set_hash_ip.o
+obj-$(CONFIG_IP_SET_HASH_IPMAC) += ip_set_hash_ipmac.o
 obj-$(CONFIG_IP_SET_HASH_IPMARK) += ip_set_hash_ipmark.o
 obj-$(CONFIG_IP_SET_HASH_IPPORT) += ip_set_hash_ipport.o
 obj-$(CONFIG_IP_SET_HASH_IPPORTIP) += ip_set_hash_ipportip.o
index 2e8e7e5fb4a64344ccb8f80b03607cc9b7f81053..6f09a99298cdf6aaf18c1ff947c76db2d4985865 100644 (file)
@@ -22,6 +22,7 @@
 #define mtype_kadt             IPSET_TOKEN(MTYPE, _kadt)
 #define mtype_uadt             IPSET_TOKEN(MTYPE, _uadt)
 #define mtype_destroy          IPSET_TOKEN(MTYPE, _destroy)
+#define mtype_memsize          IPSET_TOKEN(MTYPE, _memsize)
 #define mtype_flush            IPSET_TOKEN(MTYPE, _flush)
 #define mtype_head             IPSET_TOKEN(MTYPE, _head)
 #define mtype_same_set         IPSET_TOKEN(MTYPE, _same_set)
@@ -40,11 +41,8 @@ mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
 {
        struct mtype *map = set->data;
 
-       init_timer(&map->gc);
-       map->gc.data = (unsigned long)set;
-       map->gc.function = gc;
-       map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
-       add_timer(&map->gc);
+       setup_timer(&map->gc, gc, (unsigned long)set);
+       mod_timer(&map->gc, jiffies + IPSET_GC_PERIOD(set->timeout) * HZ);
 }
 
 static void
@@ -82,6 +80,16 @@ mtype_flush(struct ip_set *set)
        if (set->extensions & IPSET_EXT_DESTROY)
                mtype_ext_cleanup(set);
        memset(map->members, 0, map->memsize);
+       set->elements = 0;
+       set->ext_size = 0;
+}
+
+/* Calculate the actual memory size of the set data */
+static size_t
+mtype_memsize(const struct mtype *map, size_t dsize)
+{
+       return sizeof(*map) + map->memsize +
+              map->elements * dsize;
 }
 
 static int
@@ -89,14 +97,15 @@ mtype_head(struct ip_set *set, struct sk_buff *skb)
 {
        const struct mtype *map = set->data;
        struct nlattr *nested;
-       size_t memsize = sizeof(*map) + map->memsize;
+       size_t memsize = mtype_memsize(map, set->dsize) + set->ext_size;
 
        nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
        if (!nested)
                goto nla_put_failure;
        if (mtype_do_head(skb, map) ||
            nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) ||
-           nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)))
+           nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)) ||
+           nla_put_net32(skb, IPSET_ATTR_ELEMENTS, htonl(set->elements)))
                goto nla_put_failure;
        if (unlikely(ip_set_put_flags(skb, set)))
                goto nla_put_failure;
@@ -140,6 +149,7 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
        if (ret == IPSET_ADD_FAILED) {
                if (SET_WITH_TIMEOUT(set) &&
                    ip_set_timeout_expired(ext_timeout(x, set))) {
+                       set->elements--;
                        ret = 0;
                } else if (!(flags & IPSET_FLAG_EXIST)) {
                        set_bit(e->id, map->members);
@@ -148,6 +158,8 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
                /* Element is re-added, cleanup extensions */
                ip_set_ext_destroy(set, x);
        }
+       if (ret > 0)
+               set->elements--;
 
        if (SET_WITH_TIMEOUT(set))
 #ifdef IP_SET_BITMAP_STORED_TIMEOUT
@@ -159,12 +171,13 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
        if (SET_WITH_COUNTER(set))
                ip_set_init_counter(ext_counter(x, set), ext);
        if (SET_WITH_COMMENT(set))
-               ip_set_init_comment(ext_comment(x, set), ext);
+               ip_set_init_comment(set, ext_comment(x, set), ext);
        if (SET_WITH_SKBINFO(set))
                ip_set_init_skbinfo(ext_skbinfo(x, set), ext);
 
        /* Activate element */
        set_bit(e->id, map->members);
+       set->elements++;
 
        return 0;
 }
@@ -181,6 +194,7 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
                return -IPSET_ERR_EXIST;
 
        ip_set_ext_destroy(set, x);
+       set->elements--;
        if (SET_WITH_TIMEOUT(set) &&
            ip_set_timeout_expired(ext_timeout(x, set)))
                return -IPSET_ERR_EXIST;
@@ -276,6 +290,7 @@ mtype_gc(unsigned long ul_set)
                        if (ip_set_timeout_expired(ext_timeout(x, set))) {
                                clear_bit(id, map->members);
                                ip_set_ext_destroy(set, x);
+                               set->elements--;
                        }
                }
        spin_unlock_bh(&set->lock);
index a748b0c2c981482eb853159f96ac69b563aebae8..c296f9b606d495d59c50ee46a04e7e17b21c1530 100644 (file)
@@ -36,7 +36,7 @@ struct ip_set_net {
        bool            is_destroyed;   /* all sets are destroyed */
 };
 
-static int ip_set_net_id __read_mostly;
+static unsigned int ip_set_net_id __read_mostly;
 
 static inline struct ip_set_net *ip_set_pernet(struct net *net)
 {
@@ -324,7 +324,7 @@ ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr)
 }
 EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6);
 
-typedef void (*destroyer)(void *);
+typedef void (*destroyer)(struct ip_set *, void *);
 /* ipset data extension types, in size order */
 
 const struct ip_set_ext_type ip_set_extensions[] = {
@@ -426,20 +426,20 @@ ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[],
                if (!SET_WITH_SKBINFO(set))
                        return -IPSET_ERR_SKBINFO;
                fullmark = be64_to_cpu(nla_get_be64(tb[IPSET_ATTR_SKBMARK]));
-               ext->skbmark = fullmark >> 32;
-               ext->skbmarkmask = fullmark & 0xffffffff;
+               ext->skbinfo.skbmark = fullmark >> 32;
+               ext->skbinfo.skbmarkmask = fullmark & 0xffffffff;
        }
        if (tb[IPSET_ATTR_SKBPRIO]) {
                if (!SET_WITH_SKBINFO(set))
                        return -IPSET_ERR_SKBINFO;
-               ext->skbprio = be32_to_cpu(nla_get_be32(
-                                           tb[IPSET_ATTR_SKBPRIO]));
+               ext->skbinfo.skbprio =
+                       be32_to_cpu(nla_get_be32(tb[IPSET_ATTR_SKBPRIO]));
        }
        if (tb[IPSET_ATTR_SKBQUEUE]) {
                if (!SET_WITH_SKBINFO(set))
                        return -IPSET_ERR_SKBINFO;
-               ext->skbqueue = be16_to_cpu(nla_get_be16(
-                                           tb[IPSET_ATTR_SKBQUEUE]));
+               ext->skbinfo.skbqueue =
+                       be16_to_cpu(nla_get_be16(tb[IPSET_ATTR_SKBQUEUE]));
        }
        return 0;
 }
@@ -541,7 +541,7 @@ int
 ip_set_test(ip_set_id_t index, const struct sk_buff *skb,
            const struct xt_action_param *par, struct ip_set_adt_opt *opt)
 {
-       struct ip_set *set = ip_set_rcu_get(par->net, index);
+       struct ip_set *set = ip_set_rcu_get(xt_net(par), index);
        int ret = 0;
 
        BUG_ON(!set);
@@ -579,7 +579,7 @@ int
 ip_set_add(ip_set_id_t index, const struct sk_buff *skb,
           const struct xt_action_param *par, struct ip_set_adt_opt *opt)
 {
-       struct ip_set *set = ip_set_rcu_get(par->net, index);
+       struct ip_set *set = ip_set_rcu_get(xt_net(par), index);
        int ret;
 
        BUG_ON(!set);
@@ -601,7 +601,7 @@ int
 ip_set_del(ip_set_id_t index, const struct sk_buff *skb,
           const struct xt_action_param *par, struct ip_set_adt_opt *opt)
 {
-       struct ip_set *set = ip_set_rcu_get(par->net, index);
+       struct ip_set *set = ip_set_rcu_get(xt_net(par), index);
        int ret = 0;
 
        BUG_ON(!set);
index d32fd6b036bfa8f3301fd5ab15a515b413442512..1b05d4a7d5a13e3a3eb42a56fdac186bb40451b2 100644 (file)
@@ -85,6 +85,8 @@ struct htable {
 };
 
 #define hbucket(h, i)          ((h)->bucket[i])
+#define ext_size(n, dsize)     \
+       (sizeof(struct hbucket) + (n) * (dsize))
 
 #ifndef IPSET_NET_COUNT
 #define IPSET_NET_COUNT                1
@@ -150,24 +152,34 @@ htable_bits(u32 hashsize)
 #define INIT_CIDR(cidr, host_mask)     \
        DCIDR_PUT(((cidr) ? NCIDR_GET(cidr) : host_mask))
 
-#define SET_HOST_MASK(family)  (family == AF_INET ? 32 : 128)
-
 #ifdef IP_SET_HASH_WITH_NET0
-/* cidr from 0 to SET_HOST_MASK() value and c = cidr + 1 */
-#define NLEN(family)           (SET_HOST_MASK(family) + 1)
+/* cidr from 0 to HOST_MASK value and c = cidr + 1 */
+#define NLEN                   (HOST_MASK + 1)
 #define CIDR_POS(c)            ((c) - 1)
 #else
-/* cidr from 1 to SET_HOST_MASK() value and c = cidr + 1 */
-#define NLEN(family)           SET_HOST_MASK(family)
+/* cidr from 1 to HOST_MASK value and c = cidr + 1 */
+#define NLEN                   HOST_MASK
 #define CIDR_POS(c)            ((c) - 2)
 #endif
 
 #else
-#define NLEN(family)           0
+#define NLEN                   0
 #endif /* IP_SET_HASH_WITH_NETS */
 
 #endif /* _IP_SET_HASH_GEN_H */
 
+#ifndef MTYPE
+#error "MTYPE is not defined!"
+#endif
+
+#ifndef HTYPE
+#error "HTYPE is not defined!"
+#endif
+
+#ifndef HOST_MASK
+#error "HOST_MASK is not defined!"
+#endif
+
 /* Family dependent templates */
 
 #undef ahash_data
@@ -191,7 +203,6 @@ htable_bits(u32 hashsize)
 #undef mtype_same_set
 #undef mtype_kadt
 #undef mtype_uadt
-#undef mtype
 
 #undef mtype_add
 #undef mtype_del
@@ -207,6 +218,7 @@ htable_bits(u32 hashsize)
 #undef mtype_variant
 #undef mtype_data_match
 
+#undef htype
 #undef HKEY
 
 #define mtype_data_equal       IPSET_TOKEN(MTYPE, _data_equal)
@@ -233,7 +245,6 @@ htable_bits(u32 hashsize)
 #define mtype_same_set         IPSET_TOKEN(MTYPE, _same_set)
 #define mtype_kadt             IPSET_TOKEN(MTYPE, _kadt)
 #define mtype_uadt             IPSET_TOKEN(MTYPE, _uadt)
-#define mtype                  MTYPE
 
 #define mtype_add              IPSET_TOKEN(MTYPE, _add)
 #define mtype_del              IPSET_TOKEN(MTYPE, _del)
@@ -249,62 +260,54 @@ htable_bits(u32 hashsize)
 #define mtype_variant          IPSET_TOKEN(MTYPE, _variant)
 #define mtype_data_match       IPSET_TOKEN(MTYPE, _data_match)
 
-#ifndef MTYPE
-#error "MTYPE is not defined!"
-#endif
-
-#ifndef HOST_MASK
-#error "HOST_MASK is not defined!"
-#endif
-
 #ifndef HKEY_DATALEN
 #define HKEY_DATALEN           sizeof(struct mtype_elem)
 #endif
 
-#define HKEY(data, initval, htable_bits)                       \
-(jhash2((u32 *)(data), HKEY_DATALEN / sizeof(u32), initval)    \
-       & jhash_mask(htable_bits))
+#define htype                  MTYPE
 
-#ifndef htype
-#ifndef HTYPE
-#error "HTYPE is not defined!"
-#endif /* HTYPE */
-#define htype                  HTYPE
+#define HKEY(data, initval, htable_bits)                       \
+({                                                             \
+       const u32 *__k = (const u32 *)data;                     \
+       u32 __l = HKEY_DATALEN / sizeof(u32);                   \
+                                                               \
+       BUILD_BUG_ON(HKEY_DATALEN % sizeof(u32) != 0);          \
+                                                               \
+       jhash2(__k, __l, initval) & jhash_mask(htable_bits);    \
+})
 
 /* The generic hash structure */
 struct htype {
        struct htable __rcu *table; /* the hash table */
+       struct timer_list gc;   /* garbage collection when timeout enabled */
        u32 maxelem;            /* max elements in the hash */
-       u32 elements;           /* current element (vs timeout) */
        u32 initval;            /* random jhash init value */
 #ifdef IP_SET_HASH_WITH_MARKMASK
        u32 markmask;           /* markmask value for mark mask to store */
 #endif
-       struct timer_list gc;   /* garbage collection when timeout enabled */
-       struct mtype_elem next; /* temporary storage for uadd */
 #ifdef IP_SET_HASH_WITH_MULTI
        u8 ahash_max;           /* max elements in an array block */
 #endif
 #ifdef IP_SET_HASH_WITH_NETMASK
        u8 netmask;             /* netmask value for subnets to store */
 #endif
+       struct mtype_elem next; /* temporary storage for uadd */
 #ifdef IP_SET_HASH_WITH_NETS
-       struct net_prefixes nets[0]; /* book-keeping of prefixes */
+       struct net_prefixes nets[NLEN]; /* book-keeping of prefixes */
 #endif
 };
-#endif /* htype */
 
 #ifdef IP_SET_HASH_WITH_NETS
 /* Network cidr size book keeping when the hash stores different
  * sized networks. cidr == real cidr + 1 to support /0.
  */
 static void
-mtype_add_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n)
+mtype_add_cidr(struct htype *h, u8 cidr, u8 n)
 {
        int i, j;
 
        /* Add in increasing prefix order, so larger cidr first */
-       for (i = 0, j = -1; i < nets_length && h->nets[i].cidr[n]; i++) {
+       for (i = 0, j = -1; i < NLEN && h->nets[i].cidr[n]; i++) {
                if (j != -1) {
                        continue;
                } else if (h->nets[i].cidr[n] < cidr) {
@@ -323,11 +326,11 @@ mtype_add_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n)
 }
 
 static void
-mtype_del_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n)
+mtype_del_cidr(struct htype *h, u8 cidr, u8 n)
 {
-       u8 i, j, net_end = nets_length - 1;
+       u8 i, j, net_end = NLEN - 1;
 
-       for (i = 0; i < nets_length; i++) {
+       for (i = 0; i < NLEN; i++) {
                if (h->nets[i].cidr[n] != cidr)
                        continue;
                h->nets[CIDR_POS(cidr)].nets[n]--;
@@ -343,24 +346,9 @@ mtype_del_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n)
 
 /* Calculate the actual memory size of the set data */
 static size_t
-mtype_ahash_memsize(const struct htype *h, const struct htable *t,
-                   u8 nets_length, size_t dsize)
+mtype_ahash_memsize(const struct htype *h, const struct htable *t)
 {
-       u32 i;
-       struct hbucket *n;
-       size_t memsize = sizeof(*h) + sizeof(*t);
-
-#ifdef IP_SET_HASH_WITH_NETS
-       memsize += sizeof(struct net_prefixes) * nets_length;
-#endif
-       for (i = 0; i < jhash_size(t->htable_bits); i++) {
-               n = rcu_dereference_bh(hbucket(t, i));
-               if (!n)
-                       continue;
-               memsize += sizeof(struct hbucket) + n->size * dsize;
-       }
-
-       return memsize;
+       return sizeof(*h) + sizeof(*t);
 }
 
 /* Get the ith element from the array block n */
@@ -398,9 +386,10 @@ mtype_flush(struct ip_set *set)
                kfree_rcu(n, rcu);
        }
 #ifdef IP_SET_HASH_WITH_NETS
-       memset(h->nets, 0, sizeof(struct net_prefixes) * NLEN(set->family));
+       memset(h->nets, 0, sizeof(h->nets));
 #endif
-       h->elements = 0;
+       set->elements = 0;
+       set->ext_size = 0;
 }
 
 /* Destroy the hashtable part of the set */
@@ -444,11 +433,8 @@ mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
 {
        struct htype *h = set->data;
 
-       init_timer(&h->gc);
-       h->gc.data = (unsigned long)set;
-       h->gc.function = gc;
-       h->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
-       add_timer(&h->gc);
+       setup_timer(&h->gc, gc, (unsigned long)set);
+       mod_timer(&h->gc, jiffies + IPSET_GC_PERIOD(set->timeout) * HZ);
        pr_debug("gc initialized, run in every %u\n",
                 IPSET_GC_PERIOD(set->timeout));
 }
@@ -473,12 +459,13 @@ mtype_same_set(const struct ip_set *a, const struct ip_set *b)
 
 /* Delete expired elements from the hashtable */
 static void
-mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize)
+mtype_expire(struct ip_set *set, struct htype *h)
 {
        struct htable *t;
        struct hbucket *n, *tmp;
        struct mtype_elem *data;
        u32 i, j, d;
+       size_t dsize = set->dsize;
 #ifdef IP_SET_HASH_WITH_NETS
        u8 k;
 #endif
@@ -494,21 +481,20 @@ mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize)
                                continue;
                        }
                        data = ahash_data(n, j, dsize);
-                       if (ip_set_timeout_expired(ext_timeout(data, set))) {
-                               pr_debug("expired %u/%u\n", i, j);
-                               clear_bit(j, n->used);
-                               smp_mb__after_atomic();
+                       if (!ip_set_timeout_expired(ext_timeout(data, set)))
+                               continue;
+                       pr_debug("expired %u/%u\n", i, j);
+                       clear_bit(j, n->used);
+                       smp_mb__after_atomic();
 #ifdef IP_SET_HASH_WITH_NETS
-                               for (k = 0; k < IPSET_NET_COUNT; k++)
-                                       mtype_del_cidr(h,
-                                               NCIDR_PUT(DCIDR_GET(data->cidr,
-                                                                   k)),
-                                               nets_length, k);
+                       for (k = 0; k < IPSET_NET_COUNT; k++)
+                               mtype_del_cidr(h,
+                                       NCIDR_PUT(DCIDR_GET(data->cidr, k)),
+                                       k);
 #endif
-                               ip_set_ext_destroy(set, data);
-                               h->elements--;
-                               d++;
-                       }
+                       ip_set_ext_destroy(set, data);
+                       set->elements--;
+                       d++;
                }
                if (d >= AHASH_INIT_SIZE) {
                        if (d >= n->size) {
@@ -532,6 +518,7 @@ mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize)
                                d++;
                        }
                        tmp->pos = d;
+                       set->ext_size -= ext_size(AHASH_INIT_SIZE, dsize);
                        rcu_assign_pointer(hbucket(t, i), tmp);
                        kfree_rcu(n, rcu);
                }
@@ -546,7 +533,7 @@ mtype_gc(unsigned long ul_set)
 
        pr_debug("called\n");
        spin_lock_bh(&set->lock);
-       mtype_expire(set, h, NLEN(set->family), set->dsize);
+       mtype_expire(set, h);
        spin_unlock_bh(&set->lock);
 
        h->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
@@ -563,7 +550,7 @@ mtype_resize(struct ip_set *set, bool retried)
        struct htype *h = set->data;
        struct htable *t, *orig;
        u8 htable_bits;
-       size_t dsize = set->dsize;
+       size_t extsize, dsize = set->dsize;
 #ifdef IP_SET_HASH_WITH_NETS
        u8 flags;
        struct mtype_elem *tmp;
@@ -606,6 +593,7 @@ retry:
        /* There can't be another parallel resizing, but dumping is possible */
        atomic_set(&orig->ref, 1);
        atomic_inc(&orig->uref);
+       extsize = 0;
        pr_debug("attempt to resize set %s from %u to %u, t %p\n",
                 set->name, orig->htable_bits, htable_bits, orig);
        for (i = 0; i < jhash_size(orig->htable_bits); i++) {
@@ -636,6 +624,7 @@ retry:
                                        goto cleanup;
                                }
                                m->size = AHASH_INIT_SIZE;
+                               extsize = ext_size(AHASH_INIT_SIZE, dsize);
                                RCU_INIT_POINTER(hbucket(t, key), m);
                        } else if (m->pos >= m->size) {
                                struct hbucket *ht;
@@ -655,6 +644,7 @@ retry:
                                memcpy(ht, m, sizeof(struct hbucket) +
                                              m->size * dsize);
                                ht->size = m->size + AHASH_INIT_SIZE;
+                               extsize += ext_size(AHASH_INIT_SIZE, dsize);
                                kfree(m);
                                m = ht;
                                RCU_INIT_POINTER(hbucket(t, key), ht);
@@ -668,6 +658,7 @@ retry:
                }
        }
        rcu_assign_pointer(h->table, t);
+       set->ext_size = extsize;
 
        spin_unlock_bh(&set->lock);
 
@@ -715,11 +706,11 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
        bool deleted = false, forceadd = false, reuse = false;
        u32 key, multi = 0;
 
-       if (h->elements >= h->maxelem) {
+       if (set->elements >= h->maxelem) {
                if (SET_WITH_TIMEOUT(set))
                        /* FIXME: when set is full, we slow down here */
-                       mtype_expire(set, h, NLEN(set->family), set->dsize);
-               if (h->elements >= h->maxelem && SET_WITH_FORCEADD(set))
+                       mtype_expire(set, h);
+               if (set->elements >= h->maxelem && SET_WITH_FORCEADD(set))
                        forceadd = true;
        }
 
@@ -727,20 +718,15 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
        key = HKEY(value, h->initval, t->htable_bits);
        n = __ipset_dereference_protected(hbucket(t, key), 1);
        if (!n) {
-               if (forceadd) {
-                       if (net_ratelimit())
-                               pr_warn("Set %s is full, maxelem %u reached\n",
-                                       set->name, h->maxelem);
-                       return -IPSET_ERR_HASH_FULL;
-               } else if (h->elements >= h->maxelem) {
+               if (forceadd || set->elements >= h->maxelem)
                        goto set_full;
-               }
                old = NULL;
                n = kzalloc(sizeof(*n) + AHASH_INIT_SIZE * set->dsize,
                            GFP_ATOMIC);
                if (!n)
                        return -ENOMEM;
                n->size = AHASH_INIT_SIZE;
+               set->ext_size += ext_size(AHASH_INIT_SIZE, set->dsize);
                goto copy_elem;
        }
        for (i = 0; i < n->pos; i++) {
@@ -778,14 +764,14 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
                        for (i = 0; i < IPSET_NET_COUNT; i++)
                                mtype_del_cidr(h,
                                        NCIDR_PUT(DCIDR_GET(data->cidr, i)),
-                                       NLEN(set->family), i);
+                                       i);
 #endif
                        ip_set_ext_destroy(set, data);
-                       h->elements--;
+                       set->elements--;
                }
                goto copy_data;
        }
-       if (h->elements >= h->maxelem)
+       if (set->elements >= h->maxelem)
                goto set_full;
        /* Create a new slot */
        if (n->pos >= n->size) {
@@ -804,17 +790,17 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
                memcpy(n, old, sizeof(struct hbucket) +
                       old->size * set->dsize);
                n->size = old->size + AHASH_INIT_SIZE;
+               set->ext_size += ext_size(AHASH_INIT_SIZE, set->dsize);
        }
 
 copy_elem:
        j = n->pos++;
        data = ahash_data(n, j, set->dsize);
 copy_data:
-       h->elements++;
+       set->elements++;
 #ifdef IP_SET_HASH_WITH_NETS
        for (i = 0; i < IPSET_NET_COUNT; i++)
-               mtype_add_cidr(h, NCIDR_PUT(DCIDR_GET(d->cidr, i)),
-                              NLEN(set->family), i);
+               mtype_add_cidr(h, NCIDR_PUT(DCIDR_GET(d->cidr, i)), i);
 #endif
        memcpy(data, d, sizeof(struct mtype_elem));
 overwrite_extensions:
@@ -824,7 +810,7 @@ overwrite_extensions:
        if (SET_WITH_COUNTER(set))
                ip_set_init_counter(ext_counter(data, set), ext);
        if (SET_WITH_COMMENT(set))
-               ip_set_init_comment(ext_comment(data, set), ext);
+               ip_set_init_comment(set, ext_comment(data, set), ext);
        if (SET_WITH_SKBINFO(set))
                ip_set_init_skbinfo(ext_skbinfo(data, set), ext);
        /* Must come last for the case when timed out entry is reused */
@@ -883,11 +869,11 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
                smp_mb__after_atomic();
                if (i + 1 == n->pos)
                        n->pos--;
-               h->elements--;
+               set->elements--;
 #ifdef IP_SET_HASH_WITH_NETS
                for (j = 0; j < IPSET_NET_COUNT; j++)
                        mtype_del_cidr(h, NCIDR_PUT(DCIDR_GET(d->cidr, j)),
-                                      NLEN(set->family), j);
+                                      j);
 #endif
                ip_set_ext_destroy(set, data);
 
@@ -896,6 +882,7 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
                                k++;
                }
                if (n->pos == 0 && k == 0) {
+                       set->ext_size -= ext_size(n->size, dsize);
                        rcu_assign_pointer(hbucket(t, key), NULL);
                        kfree_rcu(n, rcu);
                } else if (k >= AHASH_INIT_SIZE) {
@@ -914,6 +901,7 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
                                k++;
                        }
                        tmp->pos = k;
+                       set->ext_size -= ext_size(AHASH_INIT_SIZE, dsize);
                        rcu_assign_pointer(hbucket(t, key), tmp);
                        kfree_rcu(n, rcu);
                }
@@ -957,14 +945,13 @@ mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d,
        int i, j = 0;
 #endif
        u32 key, multi = 0;
-       u8 nets_length = NLEN(set->family);
 
        pr_debug("test by nets\n");
-       for (; j < nets_length && h->nets[j].cidr[0] && !multi; j++) {
+       for (; j < NLEN && h->nets[j].cidr[0] && !multi; j++) {
 #if IPSET_NET_COUNT == 2
                mtype_data_reset_elem(d, &orig);
                mtype_data_netmask(d, NCIDR_GET(h->nets[j].cidr[0]), false);
-               for (k = 0; k < nets_length && h->nets[k].cidr[1] && !multi;
+               for (k = 0; k < NLEN && h->nets[k].cidr[1] && !multi;
                     k++) {
                        mtype_data_netmask(d, NCIDR_GET(h->nets[k].cidr[1]),
                                           true);
@@ -1021,7 +1008,7 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext,
         * try all possible network sizes
         */
        for (i = 0; i < IPSET_NET_COUNT; i++)
-               if (DCIDR_GET(d->cidr, i) != SET_HOST_MASK(set->family))
+               if (DCIDR_GET(d->cidr, i) != HOST_MASK)
                        break;
        if (i == IPSET_NET_COUNT) {
                ret = mtype_test_cidrs(set, d, ext, mext, flags);
@@ -1062,7 +1049,7 @@ mtype_head(struct ip_set *set, struct sk_buff *skb)
 
        rcu_read_lock_bh();
        t = rcu_dereference_bh_nfnl(h->table);
-       memsize = mtype_ahash_memsize(h, t, NLEN(set->family), set->dsize);
+       memsize = mtype_ahash_memsize(h, t) + set->ext_size;
        htable_bits = t->htable_bits;
        rcu_read_unlock_bh();
 
@@ -1083,7 +1070,8 @@ mtype_head(struct ip_set *set, struct sk_buff *skb)
                goto nla_put_failure;
 #endif
        if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) ||
-           nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)))
+           nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)) ||
+           nla_put_net32(skb, IPSET_ATTR_ELEMENTS, htonl(set->elements)))
                goto nla_put_failure;
        if (unlikely(ip_set_put_flags(skb, set)))
                goto nla_put_failure;
@@ -1238,41 +1226,35 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
        struct htype *h;
        struct htable *t;
 
+       pr_debug("Create set %s with family %s\n",
+                set->name, set->family == NFPROTO_IPV4 ? "inet" : "inet6");
+
 #ifndef IP_SET_PROTO_UNDEF
        if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6))
                return -IPSET_ERR_INVALID_FAMILY;
 #endif
 
-#ifdef IP_SET_HASH_WITH_MARKMASK
-       markmask = 0xffffffff;
-#endif
-#ifdef IP_SET_HASH_WITH_NETMASK
-       netmask = set->family == NFPROTO_IPV4 ? 32 : 128;
-       pr_debug("Create set %s with family %s\n",
-                set->name, set->family == NFPROTO_IPV4 ? "inet" : "inet6");
-#endif
-
        if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) ||
                     !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) ||
                     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
                     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))
                return -IPSET_ERR_PROTOCOL;
+
 #ifdef IP_SET_HASH_WITH_MARKMASK
        /* Separated condition in order to avoid directive in argument list */
        if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_MARKMASK)))
                return -IPSET_ERR_PROTOCOL;
-#endif
 
-       if (tb[IPSET_ATTR_HASHSIZE]) {
-               hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]);
-               if (hashsize < IPSET_MIMINAL_HASHSIZE)
-                       hashsize = IPSET_MIMINAL_HASHSIZE;
+       markmask = 0xffffffff;
+       if (tb[IPSET_ATTR_MARKMASK]) {
+               markmask = ntohl(nla_get_be32(tb[IPSET_ATTR_MARKMASK]));
+               if (markmask == 0)
+                       return -IPSET_ERR_INVALID_MARKMASK;
        }
-
-       if (tb[IPSET_ATTR_MAXELEM])
-               maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]);
+#endif
 
 #ifdef IP_SET_HASH_WITH_NETMASK
+       netmask = set->family == NFPROTO_IPV4 ? 32 : 128;
        if (tb[IPSET_ATTR_NETMASK]) {
                netmask = nla_get_u8(tb[IPSET_ATTR_NETMASK]);
 
@@ -1282,33 +1264,21 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
                        return -IPSET_ERR_INVALID_NETMASK;
        }
 #endif
-#ifdef IP_SET_HASH_WITH_MARKMASK
-       if (tb[IPSET_ATTR_MARKMASK]) {
-               markmask = ntohl(nla_get_be32(tb[IPSET_ATTR_MARKMASK]));
 
-               if (markmask == 0)
-                       return -IPSET_ERR_INVALID_MARKMASK;
+       if (tb[IPSET_ATTR_HASHSIZE]) {
+               hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]);
+               if (hashsize < IPSET_MIMINAL_HASHSIZE)
+                       hashsize = IPSET_MIMINAL_HASHSIZE;
        }
-#endif
+
+       if (tb[IPSET_ATTR_MAXELEM])
+               maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]);
 
        hsize = sizeof(*h);
-#ifdef IP_SET_HASH_WITH_NETS
-       hsize += sizeof(struct net_prefixes) * NLEN(set->family);
-#endif
        h = kzalloc(hsize, GFP_KERNEL);
        if (!h)
                return -ENOMEM;
 
-       h->maxelem = maxelem;
-#ifdef IP_SET_HASH_WITH_NETMASK
-       h->netmask = netmask;
-#endif
-#ifdef IP_SET_HASH_WITH_MARKMASK
-       h->markmask = markmask;
-#endif
-       get_random_bytes(&h->initval, sizeof(h->initval));
-       set->timeout = IPSET_NO_TIMEOUT;
-
        hbits = htable_bits(hashsize);
        hsize = htable_size(hbits);
        if (hsize == 0) {
@@ -1320,8 +1290,17 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
                kfree(h);
                return -ENOMEM;
        }
+       h->maxelem = maxelem;
+#ifdef IP_SET_HASH_WITH_NETMASK
+       h->netmask = netmask;
+#endif
+#ifdef IP_SET_HASH_WITH_MARKMASK
+       h->markmask = markmask;
+#endif
+       get_random_bytes(&h->initval, sizeof(h->initval));
+
        t->htable_bits = hbits;
-       rcu_assign_pointer(h->table, t);
+       RCU_INIT_POINTER(h->table, t);
 
        set->data = h;
 #ifndef IP_SET_PROTO_UNDEF
@@ -1339,6 +1318,7 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
                        __alignof__(struct IPSET_TOKEN(HTYPE, 6_elem)));
        }
 #endif
+       set->timeout = IPSET_NO_TIMEOUT;
        if (tb[IPSET_ATTR_TIMEOUT]) {
                set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
 #ifndef IP_SET_PROTO_UNDEF
index 9d6bf19f7b78046fa093e2f58940fe795c779b15..20bfbd315f61822e53e90273356686eb9f1d3648 100644 (file)
@@ -82,7 +82,7 @@ hash_ip4_kadt(struct ip_set *set, const struct sk_buff *skb,
              const struct xt_action_param *par,
              enum ipset_adt adt, struct ip_set_adt_opt *opt)
 {
-       const struct hash_ip *h = set->data;
+       const struct hash_ip4 *h = set->data;
        ipset_adtfn adtfn = set->variant->adt[adt];
        struct hash_ip4_elem e = { 0 };
        struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
@@ -101,7 +101,7 @@ static int
 hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
              enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
 {
-       const struct hash_ip *h = set->data;
+       const struct hash_ip4 *h = set->data;
        ipset_adtfn adtfn = set->variant->adt[adt];
        struct hash_ip4_elem e = { 0 };
        struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
@@ -199,7 +199,7 @@ nla_put_failure:
 }
 
 static inline void
-hash_ip6_data_next(struct hash_ip4_elem *next, const struct hash_ip6_elem *e)
+hash_ip6_data_next(struct hash_ip6_elem *next, const struct hash_ip6_elem *e)
 {
 }
 
@@ -217,7 +217,7 @@ hash_ip6_kadt(struct ip_set *set, const struct sk_buff *skb,
              const struct xt_action_param *par,
              enum ipset_adt adt, struct ip_set_adt_opt *opt)
 {
-       const struct hash_ip *h = set->data;
+       const struct hash_ip6 *h = set->data;
        ipset_adtfn adtfn = set->variant->adt[adt];
        struct hash_ip6_elem e = { { .all = { 0 } } };
        struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
@@ -234,7 +234,7 @@ static int
 hash_ip6_uadt(struct ip_set *set, struct nlattr *tb[],
              enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
 {
-       const struct hash_ip *h = set->data;
+       const struct hash_ip6 *h = set->data;
        ipset_adtfn adtfn = set->variant->adt[adt];
        struct hash_ip6_elem e = { { .all = { 0 } } };
        struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
diff --git a/net/netfilter/ipset/ip_set_hash_ipmac.c b/net/netfilter/ipset/ip_set_hash_ipmac.c
new file mode 100644 (file)
index 0000000..1ab5ed2
--- /dev/null
@@ -0,0 +1,315 @@
+/* Copyright (C) 2016 Tomasz Chilinski <tomasz.chilinski@chilan.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/* Kernel module implementing an IP set type: the hash:ip,mac type */
+
+#include <linux/jhash.h>
+#include <linux/module.h>
+#include <linux/ip.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/errno.h>
+#include <linux/random.h>
+#include <linux/if_ether.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/netlink.h>
+#include <net/tcp.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter/ipset/pfxlen.h>
+#include <linux/netfilter/ipset/ip_set.h>
+#include <linux/netfilter/ipset/ip_set_hash.h>
+
+#define IPSET_TYPE_REV_MIN     0
+#define IPSET_TYPE_REV_MAX     0
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Tomasz Chilinski <tomasz.chilinski@chilan.com>");
+IP_SET_MODULE_DESC("hash:ip,mac", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX);
+MODULE_ALIAS("ip_set_hash:ip,mac");
+
+/* Type specific function prefix */
+#define HTYPE          hash_ipmac
+
+/* Zero valued element is not supported */
+static const unsigned char invalid_ether[ETH_ALEN] = { 0 };
+
+/* IPv4 variant */
+
+/* Member elements */
+struct hash_ipmac4_elem {
+       /* Zero valued IP addresses cannot be stored */
+       __be32 ip;
+       union {
+               unsigned char ether[ETH_ALEN];
+               __be32 foo[2];
+       };
+};
+
+/* Common functions */
+
+static inline bool
+hash_ipmac4_data_equal(const struct hash_ipmac4_elem *e1,
+                      const struct hash_ipmac4_elem *e2,
+                      u32 *multi)
+{
+       return e1->ip == e2->ip && ether_addr_equal(e1->ether, e2->ether);
+}
+
+static bool
+hash_ipmac4_data_list(struct sk_buff *skb, const struct hash_ipmac4_elem *e)
+{
+       if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, e->ip) ||
+           nla_put(skb, IPSET_ATTR_ETHER, ETH_ALEN, e->ether))
+               goto nla_put_failure;
+       return false;
+
+nla_put_failure:
+       return true;
+}
+
+static inline void
+hash_ipmac4_data_next(struct hash_ipmac4_elem *next,
+                     const struct hash_ipmac4_elem *e)
+{
+       next->ip = e->ip;
+}
+
+#define MTYPE          hash_ipmac4
+#define PF             4
+#define HOST_MASK      32
+#define HKEY_DATALEN   sizeof(struct hash_ipmac4_elem)
+#include "ip_set_hash_gen.h"
+
+static int
+hash_ipmac4_kadt(struct ip_set *set, const struct sk_buff *skb,
+                const struct xt_action_param *par,
+                enum ipset_adt adt, struct ip_set_adt_opt *opt)
+{
+       ipset_adtfn adtfn = set->variant->adt[adt];
+       struct hash_ipmac4_elem e = { .ip = 0, { .foo[0] = 0, .foo[1] = 0 } };
+       struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
+
+        /* MAC can be src only */
+       if (!(opt->flags & IPSET_DIM_TWO_SRC))
+               return 0;
+
+       if (skb_mac_header(skb) < skb->head ||
+           (skb_mac_header(skb) + ETH_HLEN) > skb->data)
+               return -EINVAL;
+
+       memcpy(e.ether, eth_hdr(skb)->h_source, ETH_ALEN);
+       if (ether_addr_equal(e.ether, invalid_ether))
+               return -EINVAL;
+
+       ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip);
+
+       return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
+}
+
+static int
+hash_ipmac4_uadt(struct ip_set *set, struct nlattr *tb[],
+                enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+{
+       ipset_adtfn adtfn = set->variant->adt[adt];
+       struct hash_ipmac4_elem e = { .ip = 0, { .foo[0] = 0, .foo[1] = 0 } };
+       struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+       int ret;
+
+       if (unlikely(!tb[IPSET_ATTR_IP] ||
+                    !tb[IPSET_ATTR_ETHER] ||
+                    nla_len(tb[IPSET_ATTR_ETHER]) != ETH_ALEN ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)   ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
+               return -IPSET_ERR_PROTOCOL;
+
+       if (tb[IPSET_ATTR_LINENO])
+               *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
+       ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &e.ip) ||
+               ip_set_get_extensions(set, tb, &ext);
+       if (ret)
+               return ret;
+       memcpy(e.ether, nla_data(tb[IPSET_ATTR_ETHER]), ETH_ALEN);
+       if (ether_addr_equal(e.ether, invalid_ether))
+               return -IPSET_ERR_HASH_ELEM;
+
+       return adtfn(set, &e, &ext, &ext, flags);
+}
+
+/* IPv6 variant */
+
+/* Member elements */
+struct hash_ipmac6_elem {
+       /* Zero valued IP addresses cannot be stored */
+       union nf_inet_addr ip;
+       union {
+               unsigned char ether[ETH_ALEN];
+               __be32 foo[2];
+       };
+};
+
+/* Common functions */
+
+static inline bool
+hash_ipmac6_data_equal(const struct hash_ipmac6_elem *e1,
+                      const struct hash_ipmac6_elem *e2,
+                      u32 *multi)
+{
+       return ipv6_addr_equal(&e1->ip.in6, &e2->ip.in6) &&
+               ether_addr_equal(e1->ether, e2->ether);
+}
+
+static bool
+hash_ipmac6_data_list(struct sk_buff *skb, const struct hash_ipmac6_elem *e)
+{
+       if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &e->ip.in6) ||
+           nla_put(skb, IPSET_ATTR_ETHER, ETH_ALEN, e->ether))
+               goto nla_put_failure;
+       return false;
+
+nla_put_failure:
+       return true;
+}
+
+static inline void
+hash_ipmac6_data_next(struct hash_ipmac6_elem *next,
+                     const struct hash_ipmac6_elem *e)
+{
+}
+
+#undef MTYPE
+#undef PF
+#undef HOST_MASK
+#undef HKEY_DATALEN
+
+#define MTYPE          hash_ipmac6
+#define PF             6
+#define HOST_MASK      128
+#define HKEY_DATALEN   sizeof(struct hash_ipmac6_elem)
+#define IP_SET_EMIT_CREATE
+#include "ip_set_hash_gen.h"
+
+static int
+hash_ipmac6_kadt(struct ip_set *set, const struct sk_buff *skb,
+                const struct xt_action_param *par,
+                enum ipset_adt adt, struct ip_set_adt_opt *opt)
+{
+       ipset_adtfn adtfn = set->variant->adt[adt];
+       struct hash_ipmac6_elem e = {
+               { .all = { 0 } },
+               { .foo[0] = 0, .foo[1] = 0 }
+       };
+       struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
+
+        /* MAC can be src only */
+       if (!(opt->flags & IPSET_DIM_TWO_SRC))
+               return 0;
+
+       if (skb_mac_header(skb) < skb->head ||
+           (skb_mac_header(skb) + ETH_HLEN) > skb->data)
+               return -EINVAL;
+
+       memcpy(e.ether, eth_hdr(skb)->h_source, ETH_ALEN);
+       if (ether_addr_equal(e.ether, invalid_ether))
+               return -EINVAL;
+
+       ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6);
+
+       return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
+}
+
+static int
+hash_ipmac6_uadt(struct ip_set *set, struct nlattr *tb[],
+                enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+{
+       ipset_adtfn adtfn = set->variant->adt[adt];
+       struct hash_ipmac6_elem e = {
+               { .all = { 0 } },
+               { .foo[0] = 0, .foo[1] = 0 }
+       };
+       struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+       int ret;
+
+       if (unlikely(!tb[IPSET_ATTR_IP] ||
+                    !tb[IPSET_ATTR_ETHER] ||
+                    nla_len(tb[IPSET_ATTR_ETHER]) != ETH_ALEN ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)   ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) ||
+                    !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE)))
+               return -IPSET_ERR_PROTOCOL;
+
+       if (tb[IPSET_ATTR_LINENO])
+               *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]);
+
+       ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) ||
+               ip_set_get_extensions(set, tb, &ext);
+       if (ret)
+               return ret;
+
+       memcpy(e.ether, nla_data(tb[IPSET_ATTR_ETHER]), ETH_ALEN);
+       if (ether_addr_equal(e.ether, invalid_ether))
+               return -IPSET_ERR_HASH_ELEM;
+
+       return adtfn(set, &e, &ext, &ext, flags);
+}
+
+static struct ip_set_type hash_ipmac_type __read_mostly = {
+       .name           = "hash:ip,mac",
+       .protocol       = IPSET_PROTOCOL,
+       .features       = IPSET_TYPE_IP | IPSET_TYPE_MAC,
+       .dimension      = IPSET_DIM_TWO,
+       .family         = NFPROTO_UNSPEC,
+       .revision_min   = IPSET_TYPE_REV_MIN,
+       .revision_max   = IPSET_TYPE_REV_MAX,
+       .create         = hash_ipmac_create,
+       .create_policy  = {
+               [IPSET_ATTR_HASHSIZE]   = { .type = NLA_U32 },
+               [IPSET_ATTR_MAXELEM]    = { .type = NLA_U32 },
+               [IPSET_ATTR_PROBES]     = { .type = NLA_U8 },
+               [IPSET_ATTR_RESIZE]     = { .type = NLA_U8  },
+               [IPSET_ATTR_TIMEOUT]    = { .type = NLA_U32 },
+               [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
+       },
+       .adt_policy     = {
+               [IPSET_ATTR_IP]         = { .type = NLA_NESTED },
+               [IPSET_ATTR_ETHER]      = { .type = NLA_BINARY,
+                               .len  = ETH_ALEN },
+               [IPSET_ATTR_TIMEOUT]    = { .type = NLA_U32 },
+               [IPSET_ATTR_LINENO]     = { .type = NLA_U32 },
+               [IPSET_ATTR_BYTES]      = { .type = NLA_U64 },
+               [IPSET_ATTR_PACKETS]    = { .type = NLA_U64 },
+               [IPSET_ATTR_COMMENT]    = { .type = NLA_NUL_STRING },
+               [IPSET_ATTR_SKBMARK]    = { .type = NLA_U64 },
+               [IPSET_ATTR_SKBPRIO]    = { .type = NLA_U32 },
+               [IPSET_ATTR_SKBQUEUE]   = { .type = NLA_U16 },
+       },
+       .me             = THIS_MODULE,
+};
+
+static int __init
+hash_ipmac_init(void)
+{
+       return ip_set_type_register(&hash_ipmac_type);
+}
+
+static void __exit
+hash_ipmac_fini(void)
+{
+       ip_set_type_unregister(&hash_ipmac_type);
+}
+
+module_init(hash_ipmac_init);
+module_exit(hash_ipmac_fini);
index a0695a2ab585cb219dc946570553040cf93aedde..b64cf14e8352f488588af54fc5c650b27f31a09d 100644 (file)
@@ -85,7 +85,7 @@ hash_ipmark4_kadt(struct ip_set *set, const struct sk_buff *skb,
                  const struct xt_action_param *par,
                  enum ipset_adt adt, struct ip_set_adt_opt *opt)
 {
-       const struct hash_ipmark *h = set->data;
+       const struct hash_ipmark4 *h = set->data;
        ipset_adtfn adtfn = set->variant->adt[adt];
        struct hash_ipmark4_elem e = { };
        struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
@@ -101,7 +101,7 @@ static int
 hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
                  enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
 {
-       const struct hash_ipmark *h = set->data;
+       const struct hash_ipmark4 *h = set->data;
        ipset_adtfn adtfn = set->variant->adt[adt];
        struct hash_ipmark4_elem e = { };
        struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
@@ -193,7 +193,7 @@ nla_put_failure:
 }
 
 static inline void
-hash_ipmark6_data_next(struct hash_ipmark4_elem *next,
+hash_ipmark6_data_next(struct hash_ipmark6_elem *next,
                       const struct hash_ipmark6_elem *d)
 {
 }
@@ -211,7 +211,7 @@ hash_ipmark6_kadt(struct ip_set *set, const struct sk_buff *skb,
                  const struct xt_action_param *par,
                  enum ipset_adt adt, struct ip_set_adt_opt *opt)
 {
-       const struct hash_ipmark *h = set->data;
+       const struct hash_ipmark6 *h = set->data;
        ipset_adtfn adtfn = set->variant->adt[adt];
        struct hash_ipmark6_elem e = { };
        struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
@@ -227,7 +227,7 @@ static int
 hash_ipmark6_uadt(struct ip_set *set, struct nlattr *tb[],
                  enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
 {
-       const struct hash_ipmark *h = set->data;
+       const struct hash_ipmark6 *h = set->data;
        ipset_adtfn adtfn = set->variant->adt[adt];
        struct hash_ipmark6_elem e = { };
        struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
index 9d84b3dff603c98a9aa5a917b4c1314e8fd99044..f438740e6c6a4e4ee94d971de8cba7556ea65bec 100644 (file)
@@ -108,7 +108,7 @@ static int
 hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
                  enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
 {
-       const struct hash_ipport *h = set->data;
+       const struct hash_ipport4 *h = set->data;
        ipset_adtfn adtfn = set->variant->adt[adt];
        struct hash_ipport4_elem e = { .ip = 0 };
        struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
@@ -231,7 +231,7 @@ nla_put_failure:
 }
 
 static inline void
-hash_ipport6_data_next(struct hash_ipport4_elem *next,
+hash_ipport6_data_next(struct hash_ipport6_elem *next,
                       const struct hash_ipport6_elem *d)
 {
        next->port = d->port;
@@ -266,7 +266,7 @@ static int
 hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[],
                  enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
 {
-       const struct hash_ipport *h = set->data;
+       const struct hash_ipport6 *h = set->data;
        ipset_adtfn adtfn = set->variant->adt[adt];
        struct hash_ipport6_elem e = { .ip = { .all = { 0 } } };
        struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
index 215b7b942038a63e33fe40c2df52a1e61d8c20b0..6215fb898c509ebcd35d555ee2fc7a2371733f0d 100644 (file)
@@ -111,7 +111,7 @@ static int
 hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
                    enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
 {
-       const struct hash_ipportip *h = set->data;
+       const struct hash_ipportip4 *h = set->data;
        ipset_adtfn adtfn = set->variant->adt[adt];
        struct hash_ipportip4_elem e = { .ip = 0 };
        struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
@@ -241,7 +241,7 @@ nla_put_failure:
 }
 
 static inline void
-hash_ipportip6_data_next(struct hash_ipportip4_elem *next,
+hash_ipportip6_data_next(struct hash_ipportip6_elem *next,
                         const struct hash_ipportip6_elem *d)
 {
        next->port = d->port;
@@ -277,7 +277,7 @@ static int
 hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[],
                    enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
 {
-       const struct hash_ipportip *h = set->data;
+       const struct hash_ipportip6 *h = set->data;
        ipset_adtfn adtfn = set->variant->adt[adt];
        struct hash_ipportip6_elem e = {  .ip = { .all = { 0 } } };
        struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
index 9ca719625ea339522c10154da684b63278e57845..5ab1b99a53c2b4338837a1fc17e067b9fee6a9d7 100644 (file)
@@ -138,7 +138,7 @@ hash_ipportnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
                     const struct xt_action_param *par,
                     enum ipset_adt adt, struct ip_set_adt_opt *opt)
 {
-       const struct hash_ipportnet *h = set->data;
+       const struct hash_ipportnet4 *h = set->data;
        ipset_adtfn adtfn = set->variant->adt[adt];
        struct hash_ipportnet4_elem e = {
                .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
@@ -163,7 +163,7 @@ static int
 hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
                     enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
 {
-       const struct hash_ipportnet *h = set->data;
+       const struct hash_ipportnet4 *h = set->data;
        ipset_adtfn adtfn = set->variant->adt[adt];
        struct hash_ipportnet4_elem e = { .cidr = HOST_MASK - 1 };
        struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
@@ -370,7 +370,7 @@ nla_put_failure:
 }
 
 static inline void
-hash_ipportnet6_data_next(struct hash_ipportnet4_elem *next,
+hash_ipportnet6_data_next(struct hash_ipportnet6_elem *next,
                          const struct hash_ipportnet6_elem *d)
 {
        next->port = d->port;
@@ -389,7 +389,7 @@ hash_ipportnet6_kadt(struct ip_set *set, const struct sk_buff *skb,
                     const struct xt_action_param *par,
                     enum ipset_adt adt, struct ip_set_adt_opt *opt)
 {
-       const struct hash_ipportnet *h = set->data;
+       const struct hash_ipportnet6 *h = set->data;
        ipset_adtfn adtfn = set->variant->adt[adt];
        struct hash_ipportnet6_elem e = {
                .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
@@ -414,7 +414,7 @@ static int
 hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
                     enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
 {
-       const struct hash_ipportnet *h = set->data;
+       const struct hash_ipportnet6 *h = set->data;
        ipset_adtfn adtfn = set->variant->adt[adt];
        struct hash_ipportnet6_elem e = { .cidr = HOST_MASK - 1 };
        struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
index 3e4bffdc1cc01e7385e76be6d7ee9cdad8a0af18..5d9e895452e744a38c7324bcf37924c977b62727 100644 (file)
@@ -117,7 +117,7 @@ hash_net4_kadt(struct ip_set *set, const struct sk_buff *skb,
               const struct xt_action_param *par,
               enum ipset_adt adt, struct ip_set_adt_opt *opt)
 {
-       const struct hash_net *h = set->data;
+       const struct hash_net4 *h = set->data;
        ipset_adtfn adtfn = set->variant->adt[adt];
        struct hash_net4_elem e = {
                .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
@@ -139,7 +139,7 @@ static int
 hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
               enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
 {
-       const struct hash_net *h = set->data;
+       const struct hash_net4 *h = set->data;
        ipset_adtfn adtfn = set->variant->adt[adt];
        struct hash_net4_elem e = { .cidr = HOST_MASK };
        struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
@@ -268,7 +268,7 @@ nla_put_failure:
 }
 
 static inline void
-hash_net6_data_next(struct hash_net4_elem *next,
+hash_net6_data_next(struct hash_net6_elem *next,
                    const struct hash_net6_elem *d)
 {
 }
@@ -286,7 +286,7 @@ hash_net6_kadt(struct ip_set *set, const struct sk_buff *skb,
               const struct xt_action_param *par,
               enum ipset_adt adt, struct ip_set_adt_opt *opt)
 {
-       const struct hash_net *h = set->data;
+       const struct hash_net6 *h = set->data;
        ipset_adtfn adtfn = set->variant->adt[adt];
        struct hash_net6_elem e = {
                .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
index f0f688db6213930ad568b645080eb171bb30f345..44cf11939c916473b024d0b3fddaa6dbb1777ed8 100644 (file)
@@ -156,7 +156,7 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb,
                    const struct xt_action_param *par,
                    enum ipset_adt adt, struct ip_set_adt_opt *opt)
 {
-       struct hash_netiface *h = set->data;
+       struct hash_netiface4 *h = set->data;
        ipset_adtfn adtfn = set->variant->adt[adt];
        struct hash_netiface4_elem e = {
                .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
@@ -170,7 +170,7 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb,
        ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip);
        e.ip &= ip_set_netmask(e.cidr);
 
-#define IFACE(dir)     (par->dir ? par->dir->name : "")
+#define IFACE(dir)     (par->state->dir ? par->state->dir->name : "")
 #define SRCDIR         (opt->flags & IPSET_DIM_TWO_SRC)
 
        if (opt->cmdflags & IPSET_FLAG_PHYSDEV) {
@@ -196,7 +196,7 @@ static int
 hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
                    enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
 {
-       struct hash_netiface *h = set->data;
+       struct hash_netiface4 *h = set->data;
        ipset_adtfn adtfn = set->variant->adt[adt];
        struct hash_netiface4_elem e = { .cidr = HOST_MASK, .elem = 1 };
        struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
@@ -348,7 +348,7 @@ nla_put_failure:
 }
 
 static inline void
-hash_netiface6_data_next(struct hash_netiface4_elem *next,
+hash_netiface6_data_next(struct hash_netiface6_elem *next,
                         const struct hash_netiface6_elem *d)
 {
 }
@@ -367,7 +367,7 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb,
                    const struct xt_action_param *par,
                    enum ipset_adt adt, struct ip_set_adt_opt *opt)
 {
-       struct hash_netiface *h = set->data;
+       struct hash_netiface6 *h = set->data;
        ipset_adtfn adtfn = set->variant->adt[adt];
        struct hash_netiface6_elem e = {
                .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
index a93dfebffa811bcaee4be26c935e583b8ed3fc00..db614e13b193ddb1733bcb098d53d2f12520066f 100644 (file)
@@ -143,7 +143,7 @@ hash_netnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
                  const struct xt_action_param *par,
                  enum ipset_adt adt, struct ip_set_adt_opt *opt)
 {
-       const struct hash_netnet *h = set->data;
+       const struct hash_netnet4 *h = set->data;
        ipset_adtfn adtfn = set->variant->adt[adt];
        struct hash_netnet4_elem e = { };
        struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
@@ -165,7 +165,7 @@ static int
 hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
                  enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
 {
-       const struct hash_netnet *h = set->data;
+       const struct hash_netnet4 *h = set->data;
        ipset_adtfn adtfn = set->variant->adt[adt];
        struct hash_netnet4_elem e = { };
        struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
@@ -352,7 +352,7 @@ nla_put_failure:
 }
 
 static inline void
-hash_netnet6_data_next(struct hash_netnet4_elem *next,
+hash_netnet6_data_next(struct hash_netnet6_elem *next,
                       const struct hash_netnet6_elem *d)
 {
 }
@@ -377,7 +377,7 @@ hash_netnet6_kadt(struct ip_set *set, const struct sk_buff *skb,
                  const struct xt_action_param *par,
                  enum ipset_adt adt, struct ip_set_adt_opt *opt)
 {
-       const struct hash_netnet *h = set->data;
+       const struct hash_netnet6 *h = set->data;
        ipset_adtfn adtfn = set->variant->adt[adt];
        struct hash_netnet6_elem e = { };
        struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
index 731813e0f08ce6af692386eb0d1cf8cead14454c..54b64b6cd0cdb2196e1f507909784096af45e11f 100644 (file)
@@ -133,7 +133,7 @@ hash_netport4_kadt(struct ip_set *set, const struct sk_buff *skb,
                   const struct xt_action_param *par,
                   enum ipset_adt adt, struct ip_set_adt_opt *opt)
 {
-       const struct hash_netport *h = set->data;
+       const struct hash_netport4 *h = set->data;
        ipset_adtfn adtfn = set->variant->adt[adt];
        struct hash_netport4_elem e = {
                .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
@@ -157,7 +157,7 @@ static int
 hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
                   enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
 {
-       const struct hash_netport *h = set->data;
+       const struct hash_netport4 *h = set->data;
        ipset_adtfn adtfn = set->variant->adt[adt];
        struct hash_netport4_elem e = { .cidr = HOST_MASK - 1 };
        struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
@@ -329,7 +329,7 @@ nla_put_failure:
 }
 
 static inline void
-hash_netport6_data_next(struct hash_netport4_elem *next,
+hash_netport6_data_next(struct hash_netport6_elem *next,
                        const struct hash_netport6_elem *d)
 {
        next->port = d->port;
@@ -348,7 +348,7 @@ hash_netport6_kadt(struct ip_set *set, const struct sk_buff *skb,
                   const struct xt_action_param *par,
                   enum ipset_adt adt, struct ip_set_adt_opt *opt)
 {
-       const struct hash_netport *h = set->data;
+       const struct hash_netport6 *h = set->data;
        ipset_adtfn adtfn = set->variant->adt[adt];
        struct hash_netport6_elem e = {
                .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK),
@@ -372,7 +372,7 @@ static int
 hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[],
                   enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
 {
-       const struct hash_netport *h = set->data;
+       const struct hash_netport6 *h = set->data;
        ipset_adtfn adtfn = set->variant->adt[adt];
        struct hash_netport6_elem e = { .cidr = HOST_MASK  - 1 };
        struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
index 9a14c237830f4b2ccbbd999662256d88cd95662e..aff846960ac4423da8ec5a99f4faccf294a812e0 100644 (file)
@@ -154,7 +154,7 @@ hash_netportnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
                      const struct xt_action_param *par,
                      enum ipset_adt adt, struct ip_set_adt_opt *opt)
 {
-       const struct hash_netportnet *h = set->data;
+       const struct hash_netportnet4 *h = set->data;
        ipset_adtfn adtfn = set->variant->adt[adt];
        struct hash_netportnet4_elem e = { };
        struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
@@ -180,7 +180,7 @@ static int
 hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
                      enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
 {
-       const struct hash_netportnet *h = set->data;
+       const struct hash_netportnet4 *h = set->data;
        ipset_adtfn adtfn = set->variant->adt[adt];
        struct hash_netportnet4_elem e = { };
        struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
@@ -406,7 +406,7 @@ nla_put_failure:
 }
 
 static inline void
-hash_netportnet6_data_next(struct hash_netportnet4_elem *next,
+hash_netportnet6_data_next(struct hash_netportnet6_elem *next,
                           const struct hash_netportnet6_elem *d)
 {
        next->port = d->port;
@@ -432,7 +432,7 @@ hash_netportnet6_kadt(struct ip_set *set, const struct sk_buff *skb,
                      const struct xt_action_param *par,
                      enum ipset_adt adt, struct ip_set_adt_opt *opt)
 {
-       const struct hash_netportnet *h = set->data;
+       const struct hash_netportnet6 *h = set->data;
        ipset_adtfn adtfn = set->variant->adt[adt];
        struct hash_netportnet6_elem e = { };
        struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
@@ -458,7 +458,7 @@ static int
 hash_netportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
                      enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
 {
-       const struct hash_netportnet *h = set->data;
+       const struct hash_netportnet6 *h = set->data;
        ipset_adtfn adtfn = set->variant->adt[adt];
        struct hash_netportnet6_elem e = { };
        struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
index a2a89e4e0a14f7ce44b9abf64bf206627fc21e95..51077c53d76b1f00bc96e5f708fcd398b58afd22 100644 (file)
@@ -166,6 +166,7 @@ __list_set_del_rcu(struct rcu_head * rcu)
 static inline void
 list_set_del(struct ip_set *set, struct set_elem *e)
 {
+       set->elements--;
        list_del_rcu(&e->list);
        call_rcu(&e->rcu, __list_set_del_rcu);
 }
@@ -227,7 +228,7 @@ list_set_init_extensions(struct ip_set *set, const struct ip_set_ext *ext,
        if (SET_WITH_COUNTER(set))
                ip_set_init_counter(ext_counter(e, set), ext);
        if (SET_WITH_COMMENT(set))
-               ip_set_init_comment(ext_comment(e, set), ext);
+               ip_set_init_comment(set, ext_comment(e, set), ext);
        if (SET_WITH_SKBINFO(set))
                ip_set_init_skbinfo(ext_skbinfo(e, set), ext);
        /* Update timeout last */
@@ -309,6 +310,7 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext,
                list_add_rcu(&e->list, &prev->list);
        else
                list_add_tail_rcu(&e->list, &map->members);
+       set->elements++;
 
        return 0;
 }
@@ -419,6 +421,8 @@ list_set_flush(struct ip_set *set)
 
        list_for_each_entry_safe(e, n, &map->members, list)
                list_set_del(set, e);
+       set->elements = 0;
+       set->ext_size = 0;
 }
 
 static void
@@ -441,12 +445,12 @@ list_set_destroy(struct ip_set *set)
        set->data = NULL;
 }
 
-static int
-list_set_head(struct ip_set *set, struct sk_buff *skb)
+/* Calculate the actual memory size of the set data */
+static size_t
+list_set_memsize(const struct list_set *map, size_t dsize)
 {
-       const struct list_set *map = set->data;
-       struct nlattr *nested;
        struct set_elem *e;
+       size_t memsize;
        u32 n = 0;
 
        rcu_read_lock();
@@ -454,13 +458,25 @@ list_set_head(struct ip_set *set, struct sk_buff *skb)
                n++;
        rcu_read_unlock();
 
+       memsize = sizeof(*map) + n * dsize;
+
+       return memsize;
+}
+
+static int
+list_set_head(struct ip_set *set, struct sk_buff *skb)
+{
+       const struct list_set *map = set->data;
+       struct nlattr *nested;
+       size_t memsize = list_set_memsize(map, set->dsize) + set->ext_size;
+
        nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
        if (!nested)
                goto nla_put_failure;
        if (nla_put_net32(skb, IPSET_ATTR_SIZE, htonl(map->size)) ||
            nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) ||
-           nla_put_net32(skb, IPSET_ATTR_MEMSIZE,
-                         htonl(sizeof(*map) + n * set->dsize)))
+           nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)) ||
+           nla_put_net32(skb, IPSET_ATTR_ELEMENTS, htonl(set->elements)))
                goto nla_put_failure;
        if (unlikely(ip_set_put_flags(skb, set)))
                goto nla_put_failure;
@@ -570,11 +586,8 @@ list_set_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
 {
        struct list_set *map = set->data;
 
-       init_timer(&map->gc);
-       map->gc.data = (unsigned long)set;
-       map->gc.function = gc;
-       map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
-       add_timer(&map->gc);
+       setup_timer(&map->gc, gc, (unsigned long)set);
+       mod_timer(&map->gc, jiffies + IPSET_GC_PERIOD(set->timeout) * HZ);
 }
 
 /* Create list:set type of sets */
index 2c1b498a7a271df0f6e3ffa86407b89ba103e9d9..db40050f8785eb9205a7bf493a71c9b956b93ab8 100644 (file)
@@ -70,7 +70,7 @@ EXPORT_SYMBOL(ip_vs_get_debug_level);
 #endif
 EXPORT_SYMBOL(ip_vs_new_conn_out);
 
-static int ip_vs_net_id __read_mostly;
+static unsigned int ip_vs_net_id __read_mostly;
 /* netns cnt used for uniqueness */
 static atomic_t ipvs_netns_cnt = ATOMIC_INIT(0);
 
index c3c809b2e7122daabac5b45ffed67979e0fecb28..038c2ba0ae0fa5877d3ed63e8c7ade908a45261b 100644 (file)
@@ -2840,14 +2840,7 @@ static struct nf_sockopt_ops ip_vs_sockopts = {
  */
 
 /* IPVS genetlink family */
-static struct genl_family ip_vs_genl_family = {
-       .id             = GENL_ID_GENERATE,
-       .hdrsize        = 0,
-       .name           = IPVS_GENL_NAME,
-       .version        = IPVS_GENL_VERSION,
-       .maxattr        = IPVS_CMD_MAX,
-       .netnsok        = true,         /* Make ipvsadm to work on netns */
-};
+static struct genl_family ip_vs_genl_family;
 
 /* Policy used for first-level command attributes */
 static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
@@ -3872,10 +3865,20 @@ static const struct genl_ops ip_vs_genl_ops[] = {
        },
 };
 
+static struct genl_family ip_vs_genl_family __ro_after_init = {
+       .hdrsize        = 0,
+       .name           = IPVS_GENL_NAME,
+       .version        = IPVS_GENL_VERSION,
+       .maxattr        = IPVS_CMD_ATTR_MAX,
+       .netnsok        = true,         /* Make ipvsadm to work on netns */
+       .module         = THIS_MODULE,
+       .ops            = ip_vs_genl_ops,
+       .n_ops          = ARRAY_SIZE(ip_vs_genl_ops),
+};
+
 static int __init ip_vs_genl_register(void)
 {
-       return genl_register_family_with_ops(&ip_vs_genl_family,
-                                            ip_vs_genl_ops);
+       return genl_register_family(&ip_vs_genl_family);
 }
 
 static void ip_vs_genl_unregister(void)
index 1b07578bedf336c53e3b6072c8c3324f7f18081b..9350530c16c1b0e9524591945214fc8775e5a9e6 100644 (file)
@@ -283,6 +283,7 @@ struct ip_vs_sync_buff {
  */
 static void ntoh_seq(struct ip_vs_seq *no, struct ip_vs_seq *ho)
 {
+       memset(ho, 0, sizeof(*ho));
        ho->init_seq       = get_unaligned_be32(&no->init_seq);
        ho->delta          = get_unaligned_be32(&no->delta);
        ho->previous_delta = get_unaligned_be32(&no->previous_delta);
@@ -917,8 +918,10 @@ static void ip_vs_proc_conn(struct netns_ipvs *ipvs, struct ip_vs_conn_param *pa
                        kfree(param->pe_data);
        }
 
-       if (opt)
-               memcpy(&cp->in_seq, opt, sizeof(*opt));
+       if (opt) {
+               cp->in_seq = opt->in_seq;
+               cp->out_seq = opt->out_seq;
+       }
        atomic_set(&cp->in_pkts, sysctl_sync_threshold(ipvs));
        cp->state = state;
        cp->old_state = cp->state;
index ba6a1d4212225f5ff735eed006e12b3a244a5076..6a0bbfa8e7020d8f2f004e6659141731dbf3342f 100644 (file)
@@ -76,6 +76,7 @@ struct conntrack_gc_work {
        struct delayed_work     dwork;
        u32                     last_bucket;
        bool                    exiting;
+       long                    next_gc_run;
 };
 
 static __read_mostly struct kmem_cache *nf_conntrack_cachep;
@@ -83,9 +84,11 @@ static __read_mostly spinlock_t nf_conntrack_locks_all_lock;
 static __read_mostly DEFINE_SPINLOCK(nf_conntrack_locks_all_lock);
 static __read_mostly bool nf_conntrack_locks_all;
 
+/* every gc cycle scans at most 1/GC_MAX_BUCKETS_DIV part of table */
 #define GC_MAX_BUCKETS_DIV     64u
-#define GC_MAX_BUCKETS         8192u
-#define GC_INTERVAL            (5 * HZ)
+/* upper bound of scan intervals */
+#define GC_INTERVAL_MAX                (2 * HZ)
+/* maximum conntracks to evict per gc run */
 #define GC_MAX_EVICTS          256u
 
 static struct conntrack_gc_work conntrack_gc_work;
@@ -936,13 +939,13 @@ static noinline int early_drop(struct net *net, unsigned int _hash)
 static void gc_worker(struct work_struct *work)
 {
        unsigned int i, goal, buckets = 0, expired_count = 0;
-       unsigned long next_run = GC_INTERVAL;
-       unsigned int ratio, scanned = 0;
        struct conntrack_gc_work *gc_work;
+       unsigned int ratio, scanned = 0;
+       unsigned long next_run;
 
        gc_work = container_of(work, struct conntrack_gc_work, dwork.work);
 
-       goal = min(nf_conntrack_htable_size / GC_MAX_BUCKETS_DIV, GC_MAX_BUCKETS);
+       goal = nf_conntrack_htable_size / GC_MAX_BUCKETS_DIV;
        i = gc_work->last_bucket;
 
        do {
@@ -982,17 +985,47 @@ static void gc_worker(struct work_struct *work)
        if (gc_work->exiting)
                return;
 
+       /*
+        * Eviction will normally happen from the packet path, and not
+        * from this gc worker.
+        *
+        * This worker is only here to reap expired entries when system went
+        * idle after a busy period.
+        *
+        * The heuristics below are supposed to balance conflicting goals:
+        *
+        * 1. Minimize time until we notice a stale entry
+        * 2. Maximize scan intervals to not waste cycles
+        *
+        * Normally, expired_count will be 0, this increases the next_run time
+        * to priorize 2) above.
+        *
+        * As soon as a timed-out entry is found, move towards 1) and increase
+        * the scan frequency.
+        * In case we have lots of evictions next scan is done immediately.
+        */
        ratio = scanned ? expired_count * 100 / scanned : 0;
-       if (ratio >= 90)
+       if (ratio >= 90 || expired_count == GC_MAX_EVICTS) {
+               gc_work->next_gc_run = 0;
                next_run = 0;
+       } else if (expired_count) {
+               gc_work->next_gc_run /= 2U;
+               next_run = msecs_to_jiffies(1);
+       } else {
+               if (gc_work->next_gc_run < GC_INTERVAL_MAX)
+                       gc_work->next_gc_run += msecs_to_jiffies(1);
+
+               next_run = gc_work->next_gc_run;
+       }
 
        gc_work->last_bucket = i;
-       schedule_delayed_work(&gc_work->dwork, next_run);
+       queue_delayed_work(system_long_wq, &gc_work->dwork, next_run);
 }
 
 static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work)
 {
        INIT_DELAYED_WORK(&gc_work->dwork, gc_worker);
+       gc_work->next_gc_run = GC_INTERVAL_MAX;
        gc_work->exiting = false;
 }
 
@@ -1305,7 +1338,7 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
                if (skb->nfct)
                        goto out;
        }
-
+repeat:
        ct = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum,
                               l3proto, l4proto, &set_reply, &ctinfo);
        if (!ct) {
@@ -1337,6 +1370,12 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
                NF_CT_STAT_INC_ATOMIC(net, invalid);
                if (ret == -NF_DROP)
                        NF_CT_STAT_INC_ATOMIC(net, drop);
+               /* Special case: TCP tracker reports an attempt to reopen a
+                * closed/aborted connection. We have to go back and create a
+                * fresh conntrack.
+                */
+               if (ret == -NF_REPEAT)
+                       goto repeat;
                ret = -ret;
                goto out;
        }
@@ -1344,15 +1383,8 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
        if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
                nf_conntrack_event_cache(IPCT_REPLY, ct);
 out:
-       if (tmpl) {
-               /* Special case: we have to repeat this hook, assign the
-                * template again to this packet. We assume that this packet
-                * has no conntrack assigned. This is used by nf_ct_tcp. */
-               if (ret == NF_REPEAT)
-                       skb->nfct = (struct nf_conntrack *)tmpl;
-               else
-                       nf_ct_put(tmpl);
-       }
+       if (tmpl)
+               nf_ct_put(tmpl);
 
        return ret;
 }
@@ -1885,7 +1917,7 @@ int nf_conntrack_init_start(void)
        nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED);
 
        conntrack_gc_work_init(&conntrack_gc_work);
-       schedule_delayed_work(&conntrack_gc_work.dwork, GC_INTERVAL);
+       queue_delayed_work(system_long_wq, &conntrack_gc_work.dwork, GC_INTERVAL_MAX);
 
        return 0;
 
index 336e21559e011d4f0fe154934ecf7d5a765aefdd..7341adf7059d3232f458bc02528d9631107ebfce 100644 (file)
@@ -138,9 +138,14 @@ __nf_conntrack_helper_find(const char *name, u16 l3num, u8 protonum)
 
        for (i = 0; i < nf_ct_helper_hsize; i++) {
                hlist_for_each_entry_rcu(h, &nf_ct_helper_hash[i], hnode) {
-                       if (!strcmp(h->name, name) &&
-                           h->tuple.src.l3num == l3num &&
-                           h->tuple.dst.protonum == protonum)
+                       if (strcmp(h->name, name))
+                               continue;
+
+                       if (h->tuple.src.l3num != NFPROTO_UNSPEC &&
+                           h->tuple.src.l3num != l3num)
+                               continue;
+
+                       if (h->tuple.dst.protonum == protonum)
                                return h;
                }
        }
index 8d2c7d8c666a1de3d6e53ad145a3cc4c0750703c..9bd34647225a2a22304ab780ff80221642d27251 100644 (file)
@@ -281,15 +281,15 @@ void nf_ct_l4proto_unregister_sysctl(struct net *net,
 
 /* FIXME: Allow NULL functions and sub in pointers to generic for
    them. --RR */
-int nf_ct_l4proto_register(struct nf_conntrack_l4proto *l4proto)
+int nf_ct_l4proto_register_one(struct nf_conntrack_l4proto *l4proto)
 {
        int ret = 0;
 
        if (l4proto->l3proto >= PF_MAX)
                return -EBUSY;
 
-       if ((l4proto->to_nlattr && !l4proto->nlattr_size)
-               || (l4proto->tuple_to_nlattr && !l4proto->nlattr_tuple_size))
+       if ((l4proto->to_nlattr && !l4proto->nlattr_size) ||
+           (l4proto->tuple_to_nlattr && !l4proto->nlattr_tuple_size))
                return -EINVAL;
 
        mutex_lock(&nf_ct_proto_mutex);
@@ -307,7 +307,8 @@ int nf_ct_l4proto_register(struct nf_conntrack_l4proto *l4proto)
                }
 
                for (i = 0; i < MAX_NF_CT_PROTO; i++)
-                       RCU_INIT_POINTER(proto_array[i], &nf_conntrack_l4proto_generic);
+                       RCU_INIT_POINTER(proto_array[i],
+                                        &nf_conntrack_l4proto_generic);
 
                /* Before making proto_array visible to lockless readers,
                 * we must make sure its content is committed to memory.
@@ -335,10 +336,10 @@ out_unlock:
        mutex_unlock(&nf_ct_proto_mutex);
        return ret;
 }
-EXPORT_SYMBOL_GPL(nf_ct_l4proto_register);
+EXPORT_SYMBOL_GPL(nf_ct_l4proto_register_one);
 
-int nf_ct_l4proto_pernet_register(struct net *net,
-                                 struct nf_conntrack_l4proto *l4proto)
+int nf_ct_l4proto_pernet_register_one(struct net *net,
+                                     struct nf_conntrack_l4proto *l4proto)
 {
        int ret = 0;
        struct nf_proto_net *pn = NULL;
@@ -361,9 +362,9 @@ int nf_ct_l4proto_pernet_register(struct net *net,
 out:
        return ret;
 }
-EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register);
+EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register_one);
 
-void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *l4proto)
+void nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *l4proto)
 {
        BUG_ON(l4proto->l3proto >= PF_MAX);
 
@@ -378,10 +379,10 @@ void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *l4proto)
 
        synchronize_rcu();
 }
-EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister);
+EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister_one);
 
-void nf_ct_l4proto_pernet_unregister(struct net *net,
-                                    struct nf_conntrack_l4proto *l4proto)
+void nf_ct_l4proto_pernet_unregister_one(struct net *net,
+                                        struct nf_conntrack_l4proto *l4proto)
 {
        struct nf_proto_net *pn = NULL;
 
@@ -395,6 +396,66 @@ void nf_ct_l4proto_pernet_unregister(struct net *net,
        /* Remove all contrack entries for this protocol */
        nf_ct_iterate_cleanup(net, kill_l4proto, l4proto, 0, 0);
 }
+EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister_one);
+
+int nf_ct_l4proto_register(struct nf_conntrack_l4proto *l4proto[],
+                          unsigned int num_proto)
+{
+       int ret = -EINVAL, ver;
+       unsigned int i;
+
+       for (i = 0; i < num_proto; i++) {
+               ret = nf_ct_l4proto_register_one(l4proto[i]);
+               if (ret < 0)
+                       break;
+       }
+       if (i != num_proto) {
+               ver = l4proto[i]->l3proto == PF_INET6 ? 6 : 4;
+               pr_err("nf_conntrack_ipv%d: can't register %s%d proto.\n",
+                      ver, l4proto[i]->name, ver);
+               nf_ct_l4proto_unregister(l4proto, i);
+       }
+       return ret;
+}
+EXPORT_SYMBOL_GPL(nf_ct_l4proto_register);
+
+int nf_ct_l4proto_pernet_register(struct net *net,
+                                 struct nf_conntrack_l4proto *l4proto[],
+                                 unsigned int num_proto)
+{
+       int ret = -EINVAL;
+       unsigned int i;
+
+       for (i = 0; i < num_proto; i++) {
+               ret = nf_ct_l4proto_pernet_register_one(net, l4proto[i]);
+               if (ret < 0)
+                       break;
+       }
+       if (i != num_proto) {
+               pr_err("nf_conntrack_%s%d: pernet registration failed\n",
+                      l4proto[i]->name,
+                      l4proto[i]->l3proto == PF_INET6 ? 6 : 4);
+               nf_ct_l4proto_pernet_unregister(net, l4proto, i);
+       }
+       return ret;
+}
+EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register);
+
+void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *l4proto[],
+                             unsigned int num_proto)
+{
+       while (num_proto-- != 0)
+               nf_ct_l4proto_unregister_one(l4proto[num_proto]);
+}
+EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister);
+
+void nf_ct_l4proto_pernet_unregister(struct net *net,
+                                    struct nf_conntrack_l4proto *l4proto[],
+                                    unsigned int num_proto)
+{
+       while (num_proto-- != 0)
+               nf_ct_l4proto_pernet_unregister_one(net, l4proto[num_proto]);
+}
 EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister);
 
 int nf_conntrack_proto_pernet_init(struct net *net)
index a45bee52dccc235f1d7a4f7caa11cfa2b8057531..073b047314dc5c7690523f63c7fff15e4a3c4be7 100644 (file)
@@ -385,7 +385,7 @@ dccp_state_table[CT_DCCP_ROLE_MAX + 1][DCCP_PKT_SYNCACK + 1][CT_DCCP_MAX + 1] =
 };
 
 /* this module per-net specifics */
-static int dccp_net_id __read_mostly;
+static unsigned int dccp_net_id __read_mostly;
 struct dccp_net {
        struct nf_proto_net pn;
        int dccp_loose;
@@ -936,30 +936,21 @@ static struct nf_conntrack_l4proto dccp_proto6 __read_mostly = {
        .init_net               = dccp_init_net,
 };
 
+static struct nf_conntrack_l4proto *dccp_proto[] = {
+       &dccp_proto4,
+       &dccp_proto6,
+};
+
 static __net_init int dccp_net_init(struct net *net)
 {
-       int ret = 0;
-       ret = nf_ct_l4proto_pernet_register(net, &dccp_proto4);
-       if (ret < 0) {
-               pr_err("nf_conntrack_dccp4: pernet registration failed.\n");
-               goto out;
-       }
-       ret = nf_ct_l4proto_pernet_register(net, &dccp_proto6);
-       if (ret < 0) {
-               pr_err("nf_conntrack_dccp6: pernet registration failed.\n");
-               goto cleanup_dccp4;
-       }
-       return 0;
-cleanup_dccp4:
-       nf_ct_l4proto_pernet_unregister(net, &dccp_proto4);
-out:
-       return ret;
+       return nf_ct_l4proto_pernet_register(net, dccp_proto,
+                                            ARRAY_SIZE(dccp_proto));
 }
 
 static __net_exit void dccp_net_exit(struct net *net)
 {
-       nf_ct_l4proto_pernet_unregister(net, &dccp_proto6);
-       nf_ct_l4proto_pernet_unregister(net, &dccp_proto4);
+       nf_ct_l4proto_pernet_unregister(net, dccp_proto,
+                                       ARRAY_SIZE(dccp_proto));
 }
 
 static struct pernet_operations dccp_net_ops = {
@@ -975,29 +966,16 @@ static int __init nf_conntrack_proto_dccp_init(void)
 
        ret = register_pernet_subsys(&dccp_net_ops);
        if (ret < 0)
-               goto out_pernet;
-
-       ret = nf_ct_l4proto_register(&dccp_proto4);
-       if (ret < 0)
-               goto out_dccp4;
-
-       ret = nf_ct_l4proto_register(&dccp_proto6);
+               return ret;
+       ret = nf_ct_l4proto_register(dccp_proto, ARRAY_SIZE(dccp_proto));
        if (ret < 0)
-               goto out_dccp6;
-
-       return 0;
-out_dccp6:
-       nf_ct_l4proto_unregister(&dccp_proto4);
-out_dccp4:
-       unregister_pernet_subsys(&dccp_net_ops);
-out_pernet:
+               unregister_pernet_subsys(&dccp_net_ops);
        return ret;
 }
 
 static void __exit nf_conntrack_proto_dccp_fini(void)
 {
-       nf_ct_l4proto_unregister(&dccp_proto6);
-       nf_ct_l4proto_unregister(&dccp_proto4);
+       nf_ct_l4proto_unregister(dccp_proto, ARRAY_SIZE(dccp_proto));
        unregister_pernet_subsys(&dccp_net_ops);
 }
 
index 9a715f88b2f194e90a64c3b15449810e832b9b2c..87bb40a3feb58e371e3fc899ab7c8b4dd2f38859 100644 (file)
@@ -53,7 +53,7 @@ static unsigned int gre_timeouts[GRE_CT_MAX] = {
        [GRE_CT_REPLIED]        = 180*HZ,
 };
 
-static int proto_gre_net_id __read_mostly;
+static unsigned int proto_gre_net_id __read_mostly;
 struct netns_proto_gre {
        struct nf_proto_net     nf;
        rwlock_t                keymap_lock;
@@ -396,7 +396,9 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 __read_mostly = {
 static int proto_gre_net_init(struct net *net)
 {
        int ret = 0;
-       ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_gre4);
+
+       ret = nf_ct_l4proto_pernet_register_one(net,
+                                               &nf_conntrack_l4proto_gre4);
        if (ret < 0)
                pr_err("nf_conntrack_gre4: pernet registration failed.\n");
        return ret;
@@ -404,7 +406,7 @@ static int proto_gre_net_init(struct net *net)
 
 static void proto_gre_net_exit(struct net *net)
 {
-       nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_gre4);
+       nf_ct_l4proto_pernet_unregister_one(net, &nf_conntrack_l4proto_gre4);
        nf_ct_gre_keymap_flush(net);
 }
 
@@ -422,8 +424,7 @@ static int __init nf_ct_proto_gre_init(void)
        ret = register_pernet_subsys(&proto_gre_net_ops);
        if (ret < 0)
                goto out_pernet;
-
-       ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_gre4);
+       ret = nf_ct_l4proto_register_one(&nf_conntrack_l4proto_gre4);
        if (ret < 0)
                goto out_gre4;
 
@@ -436,7 +437,7 @@ out_pernet:
 
 static void __exit nf_ct_proto_gre_fini(void)
 {
-       nf_ct_l4proto_unregister(&nf_conntrack_l4proto_gre4);
+       nf_ct_l4proto_unregister_one(&nf_conntrack_l4proto_gre4);
        unregister_pernet_subsys(&proto_gre_net_ops);
 }
 
index 982ea62606c7a150a69d2af19ecd72ca9bfd71ef..d096c2d6b87bd9bde8dcb73912ed0f43501f0ccf 100644 (file)
@@ -144,7 +144,7 @@ static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = {
        }
 };
 
-static int sctp_net_id __read_mostly;
+static unsigned int sctp_net_id        __read_mostly;
 struct sctp_net {
        struct nf_proto_net pn;
        unsigned int timeouts[SCTP_CONNTRACK_MAX];
@@ -816,32 +816,21 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = {
        .init_net               = sctp_init_net,
 };
 
+static struct nf_conntrack_l4proto *sctp_proto[] = {
+       &nf_conntrack_l4proto_sctp4,
+       &nf_conntrack_l4proto_sctp6,
+};
+
 static int sctp_net_init(struct net *net)
 {
-       int ret = 0;
-
-       ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_sctp4);
-       if (ret < 0) {
-               pr_err("nf_conntrack_sctp4: pernet registration failed.\n");
-               goto out;
-       }
-       ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_sctp6);
-       if (ret < 0) {
-               pr_err("nf_conntrack_sctp6: pernet registration failed.\n");
-               goto cleanup_sctp4;
-       }
-       return 0;
-
-cleanup_sctp4:
-       nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_sctp4);
-out:
-       return ret;
+       return nf_ct_l4proto_pernet_register(net, sctp_proto,
+                                            ARRAY_SIZE(sctp_proto));
 }
 
 static void sctp_net_exit(struct net *net)
 {
-       nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_sctp6);
-       nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_sctp4);
+       nf_ct_l4proto_pernet_unregister(net, sctp_proto,
+                                       ARRAY_SIZE(sctp_proto));
 }
 
 static struct pernet_operations sctp_net_ops = {
@@ -857,29 +846,16 @@ static int __init nf_conntrack_proto_sctp_init(void)
 
        ret = register_pernet_subsys(&sctp_net_ops);
        if (ret < 0)
-               goto out_pernet;
-
-       ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_sctp4);
-       if (ret < 0)
-               goto out_sctp4;
-
-       ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_sctp6);
+               return ret;
+       ret = nf_ct_l4proto_register(sctp_proto, ARRAY_SIZE(sctp_proto));
        if (ret < 0)
-               goto out_sctp6;
-
-       return 0;
-out_sctp6:
-       nf_ct_l4proto_unregister(&nf_conntrack_l4proto_sctp4);
-out_sctp4:
-       unregister_pernet_subsys(&sctp_net_ops);
-out_pernet:
+               unregister_pernet_subsys(&sctp_net_ops);
        return ret;
 }
 
 static void __exit nf_conntrack_proto_sctp_fini(void)
 {
-       nf_ct_l4proto_unregister(&nf_conntrack_l4proto_sctp6);
-       nf_ct_l4proto_unregister(&nf_conntrack_l4proto_sctp4);
+       nf_ct_l4proto_unregister(sctp_proto, ARRAY_SIZE(sctp_proto));
        unregister_pernet_subsys(&sctp_net_ops);
 }
 
index 029206e8dec4958f7651279d81e27833b584cdd0..7808604c70a219053c09afa388015816cf6ae008 100644 (file)
@@ -35,7 +35,7 @@ static unsigned int udplite_timeouts[UDPLITE_CT_MAX] = {
        [UDPLITE_CT_REPLIED]    = 180*HZ,
 };
 
-static int udplite_net_id __read_mostly;
+static unsigned int udplite_net_id __read_mostly;
 struct udplite_net {
        struct nf_proto_net pn;
        unsigned int timeouts[UDPLITE_CT_MAX];
@@ -336,32 +336,21 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly =
        .init_net               = udplite_init_net,
 };
 
+static struct nf_conntrack_l4proto *udplite_proto[] = {
+       &nf_conntrack_l4proto_udplite4,
+       &nf_conntrack_l4proto_udplite6,
+};
+
 static int udplite_net_init(struct net *net)
 {
-       int ret = 0;
-
-       ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_udplite4);
-       if (ret < 0) {
-               pr_err("nf_conntrack_udplite4: pernet registration failed.\n");
-               goto out;
-       }
-       ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_udplite6);
-       if (ret < 0) {
-               pr_err("nf_conntrack_udplite6: pernet registration failed.\n");
-               goto cleanup_udplite4;
-       }
-       return 0;
-
-cleanup_udplite4:
-       nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udplite4);
-out:
-       return ret;
+       return nf_ct_l4proto_pernet_register(net, udplite_proto,
+                                            ARRAY_SIZE(udplite_proto));
 }
 
 static void udplite_net_exit(struct net *net)
 {
-       nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udplite6);
-       nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udplite4);
+       nf_ct_l4proto_pernet_unregister(net, udplite_proto,
+                                       ARRAY_SIZE(udplite_proto));
 }
 
 static struct pernet_operations udplite_net_ops = {
@@ -377,29 +366,16 @@ static int __init nf_conntrack_proto_udplite_init(void)
 
        ret = register_pernet_subsys(&udplite_net_ops);
        if (ret < 0)
-               goto out_pernet;
-
-       ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_udplite4);
-       if (ret < 0)
-               goto out_udplite4;
-
-       ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_udplite6);
+               return ret;
+       ret = nf_ct_l4proto_register(udplite_proto, ARRAY_SIZE(udplite_proto));
        if (ret < 0)
-               goto out_udplite6;
-
-       return 0;
-out_udplite6:
-       nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udplite4);
-out_udplite4:
-       unregister_pernet_subsys(&udplite_net_ops);
-out_pernet:
+               unregister_pernet_subsys(&udplite_net_ops);
        return ret;
 }
 
 static void __exit nf_conntrack_proto_udplite_exit(void)
 {
-       nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udplite6);
-       nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udplite4);
+       nf_ct_l4proto_unregister(udplite_proto, ARRAY_SIZE(udplite_proto));
        unregister_pernet_subsys(&udplite_net_ops);
 }
 
index 621b81c7bddc5d486dff5cff77c4070a41edda36..c3fc14e021ecf55ba9085cd7ee7a86bfc5352750 100644 (file)
@@ -1436,9 +1436,12 @@ static int process_sip_request(struct sk_buff *skb, unsigned int protoff,
                handler = &sip_handlers[i];
                if (handler->request == NULL)
                        continue;
-               if (*datalen < handler->len ||
+               if (*datalen < handler->len + 2 ||
                    strncasecmp(*dptr, handler->method, handler->len))
                        continue;
+               if ((*dptr)[handler->len] != ' ' ||
+                   !isalpha((*dptr)[handler->len+1]))
+                       continue;
 
                if (ct_sip_get_header(ct, *dptr, 0, *datalen, SIP_HDR_CSEQ,
                                      &matchoff, &matchlen) <= 0) {
index 7ec69723940f120fb783160ebaa6889cf24fcb3b..44ae986c383f1312c4f17f0d27e7d05470aced27 100644 (file)
@@ -19,7 +19,7 @@ void nf_dup_netdev_egress(const struct nft_pktinfo *pkt, int oif)
        struct net_device *dev;
        struct sk_buff *skb;
 
-       dev = dev_get_by_index_rcu(pkt->net, oif);
+       dev = dev_get_by_index_rcu(nft_net(pkt), oif);
        if (dev == NULL)
                return;
 
index e0adb5959342148d9501a48f6bb92b90d2566c00..c46d214d532355b7294a543d7c68843a82fb489d 100644 (file)
 #define NFDEBUG(format, args...)
 #endif
 
-
-/* core.c */
-unsigned int nf_iterate(struct sk_buff *skb, struct nf_hook_state *state,
-                       struct nf_hook_entry **entryp);
-
 /* nf_queue.c */
 int nf_queue(struct sk_buff *skb, struct nf_hook_state *state,
-            unsigned int queuenum);
+            struct nf_hook_entry **entryp, unsigned int verdict);
 void nf_queue_nf_hook_drop(struct net *net, const struct nf_hook_entry *entry);
 int __init netfilter_queue_init(void);
 
index 119fe1cb1ea917918d586e96c1e034d82e0d151a..ed9b80815fa0b5ca5eea8b807fe59a8432839711 100644 (file)
@@ -175,6 +175,33 @@ nf_log_dump_packet_common(struct nf_log_buf *m, u_int8_t pf,
 }
 EXPORT_SYMBOL_GPL(nf_log_dump_packet_common);
 
+/* bridge and netdev logging families share this code. */
+void nf_log_l2packet(struct net *net, u_int8_t pf,
+                    unsigned int hooknum,
+                    const struct sk_buff *skb,
+                    const struct net_device *in,
+                    const struct net_device *out,
+                    const struct nf_loginfo *loginfo,
+                    const char *prefix)
+{
+       switch (eth_hdr(skb)->h_proto) {
+       case htons(ETH_P_IP):
+               nf_log_packet(net, NFPROTO_IPV4, hooknum, skb, in, out,
+                             loginfo, "%s", prefix);
+               break;
+       case htons(ETH_P_IPV6):
+               nf_log_packet(net, NFPROTO_IPV6, hooknum, skb, in, out,
+                             loginfo, "%s", prefix);
+               break;
+       case htons(ETH_P_ARP):
+       case htons(ETH_P_RARP):
+               nf_log_packet(net, NFPROTO_ARP, hooknum, skb, in, out,
+                             loginfo, "%s", prefix);
+               break;
+       }
+}
+EXPORT_SYMBOL_GPL(nf_log_l2packet);
+
 static int __init nf_log_common_init(void)
 {
        return 0;
diff --git a/net/netfilter/nf_log_netdev.c b/net/netfilter/nf_log_netdev.c
new file mode 100644 (file)
index 0000000..1f64594
--- /dev/null
@@ -0,0 +1,80 @@
+/*
+ * (C) 2016 by Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <net/route.h>
+
+#include <linux/netfilter.h>
+#include <net/netfilter/nf_log.h>
+
+static void nf_log_netdev_packet(struct net *net, u_int8_t pf,
+                                unsigned int hooknum,
+                                const struct sk_buff *skb,
+                                const struct net_device *in,
+                                const struct net_device *out,
+                                const struct nf_loginfo *loginfo,
+                                const char *prefix)
+{
+       nf_log_l2packet(net, pf, hooknum, skb, in, out, loginfo, prefix);
+}
+
+static struct nf_logger nf_netdev_logger __read_mostly = {
+       .name           = "nf_log_netdev",
+       .type           = NF_LOG_TYPE_LOG,
+       .logfn          = nf_log_netdev_packet,
+       .me             = THIS_MODULE,
+};
+
+static int __net_init nf_log_netdev_net_init(struct net *net)
+{
+       return nf_log_set(net, NFPROTO_NETDEV, &nf_netdev_logger);
+}
+
+static void __net_exit nf_log_netdev_net_exit(struct net *net)
+{
+       nf_log_unset(net, &nf_netdev_logger);
+}
+
+static struct pernet_operations nf_log_netdev_net_ops = {
+       .init = nf_log_netdev_net_init,
+       .exit = nf_log_netdev_net_exit,
+};
+
+static int __init nf_log_netdev_init(void)
+{
+       int ret;
+
+       /* Request to load the real packet loggers. */
+       nf_logger_request_module(NFPROTO_IPV4, NF_LOG_TYPE_LOG);
+       nf_logger_request_module(NFPROTO_IPV6, NF_LOG_TYPE_LOG);
+       nf_logger_request_module(NFPROTO_ARP, NF_LOG_TYPE_LOG);
+
+       ret = register_pernet_subsys(&nf_log_netdev_net_ops);
+       if (ret < 0)
+               return ret;
+
+       nf_log_register(NFPROTO_NETDEV, &nf_netdev_logger);
+       return 0;
+}
+
+static void __exit nf_log_netdev_exit(void)
+{
+       unregister_pernet_subsys(&nf_log_netdev_net_ops);
+       nf_log_unregister(&nf_netdev_logger);
+}
+
+module_init(nf_log_netdev_init);
+module_exit(nf_log_netdev_exit);
+
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_DESCRIPTION("Netfilter netdev packet logging");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NF_LOGGER(5, 0); /* NFPROTO_NETDEV */
index 96964a0070e11da46aa97b3fe94fb4f778e40418..77cba9f6ccb6b5de00444bf6fec370b72aa74a07 100644 (file)
@@ -107,13 +107,8 @@ void nf_queue_nf_hook_drop(struct net *net, const struct nf_hook_entry *entry)
        rcu_read_unlock();
 }
 
-/*
- * Any packet that leaves via this function must come back
- * through nf_reinject().
- */
-int nf_queue(struct sk_buff *skb,
-            struct nf_hook_state *state,
-            unsigned int queuenum)
+static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
+                     struct nf_hook_entry *hook_entry, unsigned int queuenum)
 {
        int status = -ENOENT;
        struct nf_queue_entry *entry = NULL;
@@ -141,6 +136,7 @@ int nf_queue(struct sk_buff *skb,
        *entry = (struct nf_queue_entry) {
                .skb    = skb,
                .state  = *state,
+               .hook   = hook_entry,
                .size   = sizeof(*entry) + afinfo->route_key_size,
        };
 
@@ -161,17 +157,54 @@ err:
        return status;
 }
 
+/* Packets leaving via this function must come back through nf_reinject(). */
+int nf_queue(struct sk_buff *skb, struct nf_hook_state *state,
+            struct nf_hook_entry **entryp, unsigned int verdict)
+{
+       struct nf_hook_entry *entry = *entryp;
+       int ret;
+
+       ret = __nf_queue(skb, state, entry, verdict >> NF_VERDICT_QBITS);
+       if (ret < 0) {
+               if (ret == -ESRCH &&
+                   (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS)) {
+                       *entryp = rcu_dereference(entry->next);
+                       return 1;
+               }
+               kfree_skb(skb);
+       }
+
+       return 0;
+}
+
+static unsigned int nf_iterate(struct sk_buff *skb,
+                              struct nf_hook_state *state,
+                              struct nf_hook_entry **entryp)
+{
+       unsigned int verdict;
+
+       do {
+repeat:
+               verdict = (*entryp)->ops.hook((*entryp)->ops.priv, skb, state);
+               if (verdict != NF_ACCEPT) {
+                       if (verdict != NF_REPEAT)
+                               return verdict;
+                       goto repeat;
+               }
+               *entryp = rcu_dereference((*entryp)->next);
+       } while (*entryp);
+
+       return NF_ACCEPT;
+}
+
 void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
 {
-       struct nf_hook_entry *hook_entry;
+       struct nf_hook_entry *hook_entry = entry->hook;
+       struct nf_hook_ops *elem = &hook_entry->ops;
        struct sk_buff *skb = entry->skb;
        const struct nf_afinfo *afinfo;
-       struct nf_hook_ops *elem;
        int err;
 
-       hook_entry = rcu_dereference(entry->state.hook_entries);
-       elem = &hook_entry->ops;
-
        nf_queue_entry_release_refs(entry);
 
        /* Continue traversal iff userspace said ok... */
@@ -184,29 +217,27 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
                        verdict = NF_DROP;
        }
 
-       entry->state.thresh = INT_MIN;
-
        if (verdict == NF_ACCEPT) {
-       next_hook:
-               verdict = nf_iterate(skb, &entry->state, &hook_entry);
+               hook_entry = rcu_dereference(hook_entry->next);
+               if (hook_entry)
+next_hook:
+                       verdict = nf_iterate(skb, &entry->state, &hook_entry);
        }
 
        switch (verdict & NF_VERDICT_MASK) {
        case NF_ACCEPT:
        case NF_STOP:
+okfn:
                local_bh_disable();
                entry->state.okfn(entry->state.net, entry->state.sk, skb);
                local_bh_enable();
                break;
        case NF_QUEUE:
-               RCU_INIT_POINTER(entry->state.hook_entries, hook_entry);
-               err = nf_queue(skb, &entry->state,
-                              verdict >> NF_VERDICT_QBITS);
-               if (err < 0) {
-                       if (err == -ESRCH &&
-                          (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS))
+               err = nf_queue(skb, &entry->state, &hook_entry, verdict);
+               if (err == 1) {
+                       if (hook_entry)
                                goto next_hook;
-                       kfree_skb(skb);
+                       goto okfn;
                }
                break;
        case NF_STOLEN:
index c8a4a48bced988a29cd19df06a00117ea026c6ad..7c6d1fbe38b9aafc668ca39ee6e096d64f79bc7f 100644 (file)
@@ -24,7 +24,7 @@
 #include <net/netfilter/nf_conntrack_synproxy.h>
 #include <net/netfilter/nf_conntrack_zones.h>
 
-int synproxy_net_id;
+unsigned int synproxy_net_id;
 EXPORT_SYMBOL_GPL(synproxy_net_id);
 
 bool
index b70d3ea1430e7db49c4a4fc86f87dbb47cfdbe8c..026581b04ea8d16b805183332ce216aac9c36ebc 100644 (file)
@@ -2956,12 +2956,14 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
 
        err = nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set);
        if (err < 0)
-               goto err2;
+               goto err3;
 
        list_add_tail_rcu(&set->list, &table->sets);
        table->use++;
        return 0;
 
+err3:
+       ops->destroy(set);
 err2:
        kfree(set);
 err1:
@@ -3452,14 +3454,15 @@ void *nft_set_elem_init(const struct nft_set *set,
        return elem;
 }
 
-void nft_set_elem_destroy(const struct nft_set *set, void *elem)
+void nft_set_elem_destroy(const struct nft_set *set, void *elem,
+                         bool destroy_expr)
 {
        struct nft_set_ext *ext = nft_set_elem_ext(set, elem);
 
        nft_data_uninit(nft_set_ext_key(ext), NFT_DATA_VALUE);
        if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
                nft_data_uninit(nft_set_ext_data(ext), set->dtype);
-       if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPR))
+       if (destroy_expr && nft_set_ext_exists(ext, NFT_SET_EXT_EXPR))
                nf_tables_expr_destroy(NULL, nft_set_ext_expr(ext));
 
        kfree(elem);
@@ -3565,6 +3568,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
                dreg = nft_type_to_reg(set->dtype);
                list_for_each_entry(binding, &set->bindings, list) {
                        struct nft_ctx bind_ctx = {
+                               .net    = ctx->net,
                                .afi    = ctx->afi,
                                .table  = ctx->table,
                                .chain  = (struct nft_chain *)binding->chain,
@@ -3812,7 +3816,7 @@ void nft_set_gc_batch_release(struct rcu_head *rcu)
 
        gcb = container_of(rcu, struct nft_set_gc_batch, head.rcu);
        for (i = 0; i < gcb->head.cnt; i++)
-               nft_set_elem_destroy(gcb->head.set, gcb->elems[i]);
+               nft_set_elem_destroy(gcb->head.set, gcb->elems[i], true);
        kfree(gcb);
 }
 EXPORT_SYMBOL_GPL(nft_set_gc_batch_release);
@@ -4030,7 +4034,7 @@ static void nf_tables_commit_release(struct nft_trans *trans)
                break;
        case NFT_MSG_DELSETELEM:
                nft_set_elem_destroy(nft_trans_elem_set(trans),
-                                    nft_trans_elem(trans).priv);
+                                    nft_trans_elem(trans).priv, true);
                break;
        }
        kfree(trans);
@@ -4171,7 +4175,7 @@ static void nf_tables_abort_release(struct nft_trans *trans)
                break;
        case NFT_MSG_NEWSETELEM:
                nft_set_elem_destroy(nft_trans_elem_set(trans),
-                                    nft_trans_elem(trans).priv);
+                                    nft_trans_elem(trans).priv, true);
                break;
        }
        kfree(trans);
@@ -4421,9 +4425,9 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx,
  *     Otherwise a 0 is returned and the attribute value is stored in the
  *     destination variable.
  */
-unsigned int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest)
+int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest)
 {
-       int val;
+       u32 val;
 
        val = ntohl(nla_get_be32(attr));
        if (val > max)
index 0dd5c695482f64b248de0906fcbbd1dcb8df6364..65dbeadcb11888263527dfff874288ce437744d8 100644 (file)
@@ -53,10 +53,10 @@ static noinline void __nft_trace_packet(struct nft_traceinfo *info,
 
        nft_trace_notify(info);
 
-       nf_log_trace(pkt->net, pkt->pf, pkt->hook, pkt->skb, pkt->in,
-                    pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ",
-                    chain->table->name, chain->name, comments[type],
-                    rulenum);
+       nf_log_trace(nft_net(pkt), nft_pf(pkt), nft_hook(pkt), pkt->skb,
+                    nft_in(pkt), nft_out(pkt), &trace_loginfo,
+                    "TRACE: %s:%s:%s:%u ",
+                    chain->table->name, chain->name, comments[type], rulenum);
 }
 
 static inline void nft_trace_packet(struct nft_traceinfo *info,
@@ -124,7 +124,7 @@ unsigned int
 nft_do_chain(struct nft_pktinfo *pkt, void *priv)
 {
        const struct nft_chain *chain = priv, *basechain = chain;
-       const struct net *net = pkt->net;
+       const struct net *net = nft_net(pkt);
        const struct nft_rule *rule;
        const struct nft_expr *expr, *last;
        struct nft_regs regs;
@@ -178,6 +178,7 @@ next_rule:
        case NF_ACCEPT:
        case NF_DROP:
        case NF_QUEUE:
+       case NF_STOLEN:
                nft_trace_packet(&info, chain, rule,
                                 rulenum, NFT_TRACETYPE_RULE);
                return regs.verdict.code;
@@ -231,68 +232,40 @@ next_rule:
 }
 EXPORT_SYMBOL_GPL(nft_do_chain);
 
+static struct nft_expr_type *nft_basic_types[] = {
+       &nft_imm_type,
+       &nft_cmp_type,
+       &nft_lookup_type,
+       &nft_bitwise_type,
+       &nft_byteorder_type,
+       &nft_payload_type,
+       &nft_dynset_type,
+       &nft_range_type,
+};
+
 int __init nf_tables_core_module_init(void)
 {
-       int err;
-
-       err = nft_immediate_module_init();
-       if (err < 0)
-               goto err1;
-
-       err = nft_cmp_module_init();
-       if (err < 0)
-               goto err2;
-
-       err = nft_lookup_module_init();
-       if (err < 0)
-               goto err3;
-
-       err = nft_bitwise_module_init();
-       if (err < 0)
-               goto err4;
+       int err, i;
 
-       err = nft_byteorder_module_init();
-       if (err < 0)
-               goto err5;
-
-       err = nft_payload_module_init();
-       if (err < 0)
-               goto err6;
-
-       err = nft_dynset_module_init();
-       if (err < 0)
-               goto err7;
-
-       err = nft_range_module_init();
-       if (err < 0)
-               goto err8;
+       for (i = 0; i < ARRAY_SIZE(nft_basic_types); i++) {
+               err = nft_register_expr(nft_basic_types[i]);
+               if (err)
+                       goto err;
+       }
 
        return 0;
-err8:
-       nft_dynset_module_exit();
-err7:
-       nft_payload_module_exit();
-err6:
-       nft_byteorder_module_exit();
-err5:
-       nft_bitwise_module_exit();
-err4:
-       nft_lookup_module_exit();
-err3:
-       nft_cmp_module_exit();
-err2:
-       nft_immediate_module_exit();
-err1:
+
+err:
+       while (i-- > 0)
+               nft_unregister_expr(nft_basic_types[i]);
        return err;
 }
 
 void nf_tables_core_module_exit(void)
 {
-       nft_dynset_module_exit();
-       nft_payload_module_exit();
-       nft_byteorder_module_exit();
-       nft_bitwise_module_exit();
-       nft_lookup_module_exit();
-       nft_cmp_module_exit();
-       nft_immediate_module_exit();
+       int i;
+
+       i = ARRAY_SIZE(nft_basic_types);
+       while (i-- > 0)
+               nft_unregister_expr(nft_basic_types[i]);
 }
index ab695f8e2d294b0c69df035cd3d153d03ab34fce..12eb9041dca284a398a2c1ac64924ba773a18489 100644 (file)
@@ -171,7 +171,7 @@ void nft_trace_notify(struct nft_traceinfo *info)
        unsigned int size;
        int event = (NFNL_SUBSYS_NFTABLES << 8) | NFT_MSG_TRACE;
 
-       if (!nfnetlink_has_listeners(pkt->net, NFNLGRP_NFTRACE))
+       if (!nfnetlink_has_listeners(nft_net(pkt), NFNLGRP_NFTRACE))
                return;
 
        size = nlmsg_total_size(sizeof(struct nfgenmsg)) +
@@ -207,7 +207,7 @@ void nft_trace_notify(struct nft_traceinfo *info)
        nfmsg->version          = NFNETLINK_V0;
        nfmsg->res_id           = 0;
 
-       if (nla_put_be32(skb, NFTA_TRACE_NFPROTO, htonl(pkt->pf)))
+       if (nla_put_be32(skb, NFTA_TRACE_NFPROTO, htonl(nft_pf(pkt))))
                goto nla_put_failure;
 
        if (nla_put_be32(skb, NFTA_TRACE_TYPE, htonl(info->type)))
@@ -249,7 +249,7 @@ void nft_trace_notify(struct nft_traceinfo *info)
                goto nla_put_failure;
 
        if (!info->packet_dumped) {
-               if (nf_trace_fill_dev_info(skb, pkt->in, pkt->out))
+               if (nf_trace_fill_dev_info(skb, nft_in(pkt), nft_out(pkt)))
                        goto nla_put_failure;
 
                if (nf_trace_fill_pkt_info(skb, pkt))
@@ -258,7 +258,7 @@ void nft_trace_notify(struct nft_traceinfo *info)
        }
 
        nlmsg_end(skb, nlh);
-       nfnetlink_send(skb, pkt->net, 0, NFNLGRP_NFTRACE, 0, GFP_ATOMIC);
+       nfnetlink_send(skb, nft_net(pkt), 0, NFNLGRP_NFTRACE, 0, GFP_ATOMIC);
        return;
 
  nla_put_failure:
index eb086a192c5a0d8c0f64bbe0c52818b8afe4fa90..763cb4d54e8d45c8f59a68020d7261070a188974 100644 (file)
@@ -80,7 +80,7 @@ struct nfulnl_instance {
 
 #define INSTANCE_BUCKETS       16
 
-static int nfnl_log_net_id __read_mostly;
+static unsigned int nfnl_log_net_id __read_mostly;
 
 struct nfnl_log_net {
        spinlock_t instances_lock;
@@ -330,7 +330,7 @@ nfulnl_alloc_skb(struct net *net, u32 peer_portid, unsigned int inst_size,
         * message.  WARNING: has to be <= 128k due to slab restrictions */
 
        n = max(inst_size, pkt_size);
-       skb = alloc_skb(n, GFP_ATOMIC);
+       skb = alloc_skb(n, GFP_ATOMIC | __GFP_NOWARN);
        if (!skb) {
                if (n > pkt_size) {
                        /* try to allocate only as much as we need for current
index af832c5260485f3f99a3dae8fdf0019f896350ef..be7627b8040057c4b06b5961e7cae4c0c5b10d66 100644 (file)
@@ -69,7 +69,7 @@ struct nfqnl_instance {
  * Following fields are dirtied for each queued packet,
  * keep them in same cache line if possible.
  */
-       spinlock_t      lock;
+       spinlock_t      lock    ____cacheline_aligned_in_smp;
        unsigned int    queue_total;
        unsigned int    id_sequence;            /* 'sequence' of pkt ids */
        struct list_head queue_list;            /* packets in queue */
@@ -77,7 +77,7 @@ struct nfqnl_instance {
 
 typedef int (*nfqnl_cmpfn)(struct nf_queue_entry *, unsigned long);
 
-static int nfnl_queue_net_id __read_mostly;
+static unsigned int nfnl_queue_net_id __read_mostly;
 
 #define INSTANCE_BUCKETS       16
 struct nfnl_queue_net {
@@ -919,7 +919,7 @@ static struct notifier_block nfqnl_dev_notifier = {
 
 static int nf_hook_cmp(struct nf_queue_entry *entry, unsigned long entry_ptr)
 {
-       return rcu_access_pointer(entry->state.hook_entries) ==
+       return rcu_access_pointer(entry->hook) ==
                (struct nf_hook_entry *)entry_ptr;
 }
 
index 31c15ed2e5fcb1cce63c620650d17bc0942a8ce2..877d9acd91ef5c616c43d00f265facbfe4c2d334 100644 (file)
@@ -121,7 +121,6 @@ nla_put_failure:
        return -1;
 }
 
-static struct nft_expr_type nft_bitwise_type;
 static const struct nft_expr_ops nft_bitwise_ops = {
        .type           = &nft_bitwise_type,
        .size           = NFT_EXPR_SIZE(sizeof(struct nft_bitwise)),
@@ -130,20 +129,10 @@ static const struct nft_expr_ops nft_bitwise_ops = {
        .dump           = nft_bitwise_dump,
 };
 
-static struct nft_expr_type nft_bitwise_type __read_mostly = {
+struct nft_expr_type nft_bitwise_type __read_mostly = {
        .name           = "bitwise",
        .ops            = &nft_bitwise_ops,
        .policy         = nft_bitwise_policy,
        .maxattr        = NFTA_BITWISE_MAX,
        .owner          = THIS_MODULE,
 };
-
-int __init nft_bitwise_module_init(void)
-{
-       return nft_register_expr(&nft_bitwise_type);
-}
-
-void nft_bitwise_module_exit(void)
-{
-       nft_unregister_expr(&nft_bitwise_type);
-}
index ee63d981268d9ebb8fd42329bd0e242195e09003..13d4e421a6b33ccc44ff3c70ab6bb9c38d256822 100644 (file)
@@ -169,7 +169,6 @@ nla_put_failure:
        return -1;
 }
 
-static struct nft_expr_type nft_byteorder_type;
 static const struct nft_expr_ops nft_byteorder_ops = {
        .type           = &nft_byteorder_type,
        .size           = NFT_EXPR_SIZE(sizeof(struct nft_byteorder)),
@@ -178,20 +177,10 @@ static const struct nft_expr_ops nft_byteorder_ops = {
        .dump           = nft_byteorder_dump,
 };
 
-static struct nft_expr_type nft_byteorder_type __read_mostly = {
+struct nft_expr_type nft_byteorder_type __read_mostly = {
        .name           = "byteorder",
        .ops            = &nft_byteorder_ops,
        .policy         = nft_byteorder_policy,
        .maxattr        = NFTA_BYTEORDER_MAX,
        .owner          = THIS_MODULE,
 };
-
-int __init nft_byteorder_module_init(void)
-{
-       return nft_register_expr(&nft_byteorder_type);
-}
-
-void nft_byteorder_module_exit(void)
-{
-       nft_unregister_expr(&nft_byteorder_type);
-}
index 2e53739812b1712af2f60762ed5c8a029facd5a0..2b96effeadc1bc708a1f16e89342c8d39c4c4da7 100644 (file)
@@ -84,9 +84,6 @@ static int nft_cmp_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
        if (err < 0)
                return err;
 
-       if (desc.len > U8_MAX)
-               return -ERANGE;
-
        priv->op  = ntohl(nla_get_be32(tb[NFTA_CMP_OP]));
        priv->len = desc.len;
        return 0;
@@ -110,7 +107,6 @@ nla_put_failure:
        return -1;
 }
 
-static struct nft_expr_type nft_cmp_type;
 static const struct nft_expr_ops nft_cmp_ops = {
        .type           = &nft_cmp_type,
        .size           = NFT_EXPR_SIZE(sizeof(struct nft_cmp_expr)),
@@ -211,20 +207,10 @@ nft_cmp_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[])
                return &nft_cmp_ops;
 }
 
-static struct nft_expr_type nft_cmp_type __read_mostly = {
+struct nft_expr_type nft_cmp_type __read_mostly = {
        .name           = "cmp",
        .select_ops     = nft_cmp_select_ops,
        .policy         = nft_cmp_policy,
        .maxattr        = NFTA_CMP_MAX,
        .owner          = THIS_MODULE,
 };
-
-int __init nft_cmp_module_init(void)
-{
-       return nft_register_expr(&nft_cmp_type);
-}
-
-void nft_cmp_module_exit(void)
-{
-       nft_unregister_expr(&nft_cmp_type);
-}
index d7b0d171172ad792321d3d46eaf413791c4174a5..6837348c8993b4f131b189423d4610377f9c2109 100644 (file)
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2016 Pablo Neira Ayuso <pablo@netfilter.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -518,15 +519,61 @@ static struct nft_expr_type nft_ct_type __read_mostly = {
        .owner          = THIS_MODULE,
 };
 
+static void nft_notrack_eval(const struct nft_expr *expr,
+                            struct nft_regs *regs,
+                            const struct nft_pktinfo *pkt)
+{
+       struct sk_buff *skb = pkt->skb;
+       enum ip_conntrack_info ctinfo;
+       struct nf_conn *ct;
+
+       ct = nf_ct_get(pkt->skb, &ctinfo);
+       /* Previously seen (loopback or untracked)?  Ignore. */
+       if (ct)
+               return;
+
+       ct = nf_ct_untracked_get();
+       atomic_inc(&ct->ct_general.use);
+       skb->nfct = &ct->ct_general;
+       skb->nfctinfo = IP_CT_NEW;
+}
+
+static struct nft_expr_type nft_notrack_type;
+static const struct nft_expr_ops nft_notrack_ops = {
+       .type           = &nft_notrack_type,
+       .size           = NFT_EXPR_SIZE(0),
+       .eval           = nft_notrack_eval,
+};
+
+static struct nft_expr_type nft_notrack_type __read_mostly = {
+       .name           = "notrack",
+       .ops            = &nft_notrack_ops,
+       .owner          = THIS_MODULE,
+};
+
 static int __init nft_ct_module_init(void)
 {
+       int err;
+
        BUILD_BUG_ON(NF_CT_LABELS_MAX_SIZE > NFT_REG_SIZE);
 
-       return nft_register_expr(&nft_ct_type);
+       err = nft_register_expr(&nft_ct_type);
+       if (err < 0)
+               return err;
+
+       err = nft_register_expr(&nft_notrack_type);
+       if (err < 0)
+               goto err1;
+
+       return 0;
+err1:
+       nft_unregister_expr(&nft_ct_type);
+       return err;
 }
 
 static void __exit nft_ct_module_exit(void)
 {
+       nft_unregister_expr(&nft_notrack_type);
        nft_unregister_expr(&nft_ct_type);
 }
 
@@ -536,3 +583,4 @@ module_exit(nft_ct_module_exit);
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
 MODULE_ALIAS_NFT_EXPR("ct");
+MODULE_ALIAS_NFT_EXPR("notrack");
index e3b83c31da2e56ee9932d4d3d22dc8acfd87a617..7de2f46734a428d0938fef91aa914865c62d680e 100644 (file)
@@ -44,18 +44,22 @@ static void *nft_dynset_new(struct nft_set *set, const struct nft_expr *expr,
                                 &regs->data[priv->sreg_key],
                                 &regs->data[priv->sreg_data],
                                 timeout, GFP_ATOMIC);
-       if (elem == NULL) {
-               if (set->size)
-                       atomic_dec(&set->nelems);
-               return NULL;
-       }
+       if (elem == NULL)
+               goto err1;
 
        ext = nft_set_elem_ext(set, elem);
        if (priv->expr != NULL &&
            nft_expr_clone(nft_set_ext_expr(ext), priv->expr) < 0)
-               return NULL;
+               goto err2;
 
        return elem;
+
+err2:
+       nft_set_elem_destroy(set, elem, false);
+err1:
+       if (set->size)
+               atomic_dec(&set->nelems);
+       return NULL;
 }
 
 static void nft_dynset_eval(const struct nft_expr *expr,
@@ -139,6 +143,9 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
                        return PTR_ERR(set);
        }
 
+       if (set->ops->update == NULL)
+               return -EOPNOTSUPP;
+
        if (set->flags & NFT_SET_CONSTANT)
                return -EBUSY;
 
@@ -158,7 +165,8 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
        if (tb[NFTA_DYNSET_TIMEOUT] != NULL) {
                if (!(set->flags & NFT_SET_TIMEOUT))
                        return -EINVAL;
-               timeout = be64_to_cpu(nla_get_be64(tb[NFTA_DYNSET_TIMEOUT]));
+               timeout = msecs_to_jiffies(be64_to_cpu(nla_get_be64(
+                                               tb[NFTA_DYNSET_TIMEOUT])));
        }
 
        priv->sreg_key = nft_parse_register(tb[NFTA_DYNSET_SREG_KEY]);
@@ -246,7 +254,8 @@ static int nft_dynset_dump(struct sk_buff *skb, const struct nft_expr *expr)
                goto nla_put_failure;
        if (nla_put_string(skb, NFTA_DYNSET_SET_NAME, priv->set->name))
                goto nla_put_failure;
-       if (nla_put_be64(skb, NFTA_DYNSET_TIMEOUT, cpu_to_be64(priv->timeout),
+       if (nla_put_be64(skb, NFTA_DYNSET_TIMEOUT,
+                        cpu_to_be64(jiffies_to_msecs(priv->timeout)),
                         NFTA_DYNSET_PAD))
                goto nla_put_failure;
        if (priv->expr && nft_expr_dump(skb, NFTA_DYNSET_EXPR, priv->expr))
@@ -259,7 +268,6 @@ nla_put_failure:
        return -1;
 }
 
-static struct nft_expr_type nft_dynset_type;
 static const struct nft_expr_ops nft_dynset_ops = {
        .type           = &nft_dynset_type,
        .size           = NFT_EXPR_SIZE(sizeof(struct nft_dynset)),
@@ -269,20 +277,10 @@ static const struct nft_expr_ops nft_dynset_ops = {
        .dump           = nft_dynset_dump,
 };
 
-static struct nft_expr_type nft_dynset_type __read_mostly = {
+struct nft_expr_type nft_dynset_type __read_mostly = {
        .name           = "dynset",
        .ops            = &nft_dynset_ops,
        .policy         = nft_dynset_policy,
        .maxattr        = NFTA_DYNSET_MAX,
        .owner          = THIS_MODULE,
 };
-
-int __init nft_dynset_module_init(void)
-{
-       return nft_register_expr(&nft_dynset_type);
-}
-
-void nft_dynset_module_exit(void)
-{
-       nft_unregister_expr(&nft_dynset_type);
-}
index a84cf3d6605661aa8bb8966e226a79ea1b63786a..47beb3abcc9daf46e084c0f189eaf7091d11241e 100644 (file)
@@ -59,7 +59,8 @@ static int nft_exthdr_init(const struct nft_ctx *ctx,
                           const struct nlattr * const tb[])
 {
        struct nft_exthdr *priv = nft_expr_priv(expr);
-       u32 offset, len, err;
+       u32 offset, len;
+       int err;
 
        if (tb[NFTA_EXTHDR_DREG] == NULL ||
            tb[NFTA_EXTHDR_TYPE] == NULL ||
diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c
new file mode 100644 (file)
index 0000000..249c9b8
--- /dev/null
@@ -0,0 +1,159 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Generic part shared by ipv4 and ipv6 backends.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nft_fib.h>
+
+const struct nla_policy nft_fib_policy[NFTA_FIB_MAX + 1] = {
+       [NFTA_FIB_DREG]         = { .type = NLA_U32 },
+       [NFTA_FIB_RESULT]       = { .type = NLA_U32 },
+       [NFTA_FIB_FLAGS]        = { .type = NLA_U32 },
+};
+EXPORT_SYMBOL(nft_fib_policy);
+
+#define NFTA_FIB_F_ALL (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR | \
+                       NFTA_FIB_F_MARK | NFTA_FIB_F_IIF | NFTA_FIB_F_OIF)
+
+int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr,
+                    const struct nft_data **data)
+{
+       const struct nft_fib *priv = nft_expr_priv(expr);
+       unsigned int hooks;
+
+       switch (priv->result) {
+       case NFT_FIB_RESULT_OIF: /* fallthrough */
+       case NFT_FIB_RESULT_OIFNAME:
+               hooks = (1 << NF_INET_PRE_ROUTING);
+               break;
+       case NFT_FIB_RESULT_ADDRTYPE:
+               if (priv->flags & NFTA_FIB_F_IIF)
+                       hooks = (1 << NF_INET_PRE_ROUTING) |
+                               (1 << NF_INET_LOCAL_IN) |
+                               (1 << NF_INET_FORWARD);
+               else if (priv->flags & NFTA_FIB_F_OIF)
+                       hooks = (1 << NF_INET_LOCAL_OUT) |
+                               (1 << NF_INET_POST_ROUTING) |
+                               (1 << NF_INET_FORWARD);
+               else
+                       hooks = (1 << NF_INET_LOCAL_IN) |
+                               (1 << NF_INET_LOCAL_OUT) |
+                               (1 << NF_INET_FORWARD) |
+                               (1 << NF_INET_PRE_ROUTING) |
+                               (1 << NF_INET_POST_ROUTING);
+
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       return nft_chain_validate_hooks(ctx->chain, hooks);
+}
+EXPORT_SYMBOL_GPL(nft_fib_validate);
+
+int nft_fib_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+                const struct nlattr * const tb[])
+{
+       struct nft_fib *priv = nft_expr_priv(expr);
+       unsigned int len;
+       int err;
+
+       if (!tb[NFTA_FIB_DREG] || !tb[NFTA_FIB_RESULT] || !tb[NFTA_FIB_FLAGS])
+               return -EINVAL;
+
+       priv->flags = ntohl(nla_get_be32(tb[NFTA_FIB_FLAGS]));
+
+       if (priv->flags == 0 || (priv->flags & ~NFTA_FIB_F_ALL))
+               return -EINVAL;
+
+       if ((priv->flags & (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR)) ==
+                          (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR))
+               return -EINVAL;
+       if ((priv->flags & (NFTA_FIB_F_IIF | NFTA_FIB_F_OIF)) ==
+                          (NFTA_FIB_F_IIF | NFTA_FIB_F_OIF))
+               return -EINVAL;
+       if ((priv->flags & (NFTA_FIB_F_SADDR | NFTA_FIB_F_DADDR)) == 0)
+               return -EINVAL;
+
+       priv->result = htonl(nla_get_be32(tb[NFTA_FIB_RESULT]));
+       priv->dreg = nft_parse_register(tb[NFTA_FIB_DREG]);
+
+       switch (priv->result) {
+       case NFT_FIB_RESULT_OIF:
+               if (priv->flags & NFTA_FIB_F_OIF)
+                       return -EINVAL;
+               len = sizeof(int);
+               break;
+       case NFT_FIB_RESULT_OIFNAME:
+               if (priv->flags & NFTA_FIB_F_OIF)
+                       return -EINVAL;
+               len = IFNAMSIZ;
+               break;
+       case NFT_FIB_RESULT_ADDRTYPE:
+               len = sizeof(u32);
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       err = nft_validate_register_store(ctx, priv->dreg, NULL,
+                                         NFT_DATA_VALUE, len);
+       if (err < 0)
+               return err;
+
+       return nft_fib_validate(ctx, expr, NULL);
+}
+EXPORT_SYMBOL_GPL(nft_fib_init);
+
+int nft_fib_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+       const struct nft_fib *priv = nft_expr_priv(expr);
+
+       if (nft_dump_register(skb, NFTA_FIB_DREG, priv->dreg))
+               return -1;
+
+       if (nla_put_be32(skb, NFTA_FIB_RESULT, htonl(priv->result)))
+               return -1;
+
+       if (nla_put_be32(skb, NFTA_FIB_FLAGS, htonl(priv->flags)))
+               return -1;
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(nft_fib_dump);
+
+void nft_fib_store_result(void *reg, enum nft_fib_result r,
+                         const struct nft_pktinfo *pkt, int index)
+{
+       struct net_device *dev;
+       u32 *dreg = reg;
+
+       switch (r) {
+       case NFT_FIB_RESULT_OIF:
+               *dreg = index;
+               break;
+       case NFT_FIB_RESULT_OIFNAME:
+               dev = dev_get_by_index_rcu(nft_net(pkt), index);
+               strncpy(reg, dev ? dev->name : "", IFNAMSIZ);
+               break;
+       default:
+               WARN_ON_ONCE(1);
+               *dreg = 0;
+               break;
+       }
+}
+EXPORT_SYMBOL_GPL(nft_fib_store_result);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
diff --git a/net/netfilter/nft_fib_inet.c b/net/netfilter/nft_fib_inet.c
new file mode 100644 (file)
index 0000000..9120fc7
--- /dev/null
@@ -0,0 +1,82 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+
+#include <net/netfilter/nft_fib.h>
+
+static void nft_fib_inet_eval(const struct nft_expr *expr,
+                             struct nft_regs *regs,
+                             const struct nft_pktinfo *pkt)
+{
+       const struct nft_fib *priv = nft_expr_priv(expr);
+
+       switch (nft_pf(pkt)) {
+       case NFPROTO_IPV4:
+               switch (priv->result) {
+               case NFT_FIB_RESULT_OIF:
+               case NFT_FIB_RESULT_OIFNAME:
+                       return nft_fib4_eval(expr, regs, pkt);
+               case NFT_FIB_RESULT_ADDRTYPE:
+                       return nft_fib4_eval_type(expr, regs, pkt);
+               }
+               break;
+       case NFPROTO_IPV6:
+               switch (priv->result) {
+               case NFT_FIB_RESULT_OIF:
+               case NFT_FIB_RESULT_OIFNAME:
+                       return nft_fib6_eval(expr, regs, pkt);
+               case NFT_FIB_RESULT_ADDRTYPE:
+                       return nft_fib6_eval_type(expr, regs, pkt);
+               }
+               break;
+       }
+
+       regs->verdict.code = NF_DROP;
+}
+
+static struct nft_expr_type nft_fib_inet_type;
+static const struct nft_expr_ops nft_fib_inet_ops = {
+       .type           = &nft_fib_inet_type,
+       .size           = NFT_EXPR_SIZE(sizeof(struct nft_fib)),
+       .eval           = nft_fib_inet_eval,
+       .init           = nft_fib_init,
+       .dump           = nft_fib_dump,
+       .validate       = nft_fib_validate,
+};
+
+static struct nft_expr_type nft_fib_inet_type __read_mostly = {
+       .family         = NFPROTO_INET,
+       .name           = "fib",
+       .ops            = &nft_fib_inet_ops,
+       .policy         = nft_fib_policy,
+       .maxattr        = NFTA_FIB_MAX,
+       .owner          = THIS_MODULE,
+};
+
+static int __init nft_fib_inet_module_init(void)
+{
+       return nft_register_expr(&nft_fib_inet_type);
+}
+
+static void __exit nft_fib_inet_module_exit(void)
+{
+       nft_unregister_expr(&nft_fib_inet_type);
+}
+
+module_init(nft_fib_inet_module_init);
+module_exit(nft_fib_inet_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
+MODULE_ALIAS_NFT_AF_EXPR(1, "fib");
index 09473b415b95b281c3264cda23d446dd5a3d56ab..97ad8e30e4b4be497edea40468a047c20a165ea0 100644 (file)
@@ -44,6 +44,7 @@ static const struct nla_policy nft_hash_policy[NFTA_HASH_MAX + 1] = {
        [NFTA_HASH_LEN]         = { .type = NLA_U32 },
        [NFTA_HASH_MODULUS]     = { .type = NLA_U32 },
        [NFTA_HASH_SEED]        = { .type = NLA_U32 },
+       [NFTA_HASH_OFFSET]      = { .type = NLA_U32 },
 };
 
 static int nft_hash_init(const struct nft_ctx *ctx,
@@ -56,7 +57,6 @@ static int nft_hash_init(const struct nft_ctx *ctx,
        if (!tb[NFTA_HASH_SREG] ||
            !tb[NFTA_HASH_DREG] ||
            !tb[NFTA_HASH_LEN]  ||
-           !tb[NFTA_HASH_SEED] ||
            !tb[NFTA_HASH_MODULUS])
                return -EINVAL;
 
@@ -79,7 +79,10 @@ static int nft_hash_init(const struct nft_ctx *ctx,
        if (priv->offset + priv->modulus - 1 < priv->offset)
                return -EOVERFLOW;
 
-       priv->seed = ntohl(nla_get_be32(tb[NFTA_HASH_SEED]));
+       if (tb[NFTA_HASH_SEED])
+               priv->seed = ntohl(nla_get_be32(tb[NFTA_HASH_SEED]));
+       else
+               get_random_bytes(&priv->seed, sizeof(priv->seed));
 
        return nft_validate_register_load(priv->sreg, len) &&
               nft_validate_register_store(ctx, priv->dreg, NULL,
index d17018ff54e6e67accc7f60e22f3cbfea6adfa94..728baf88295aab3d4f0e1272d551672ae5a2fb13 100644 (file)
@@ -54,9 +54,6 @@ static int nft_immediate_init(const struct nft_ctx *ctx,
        if (err < 0)
                return err;
 
-       if (desc.len > U8_MAX)
-               return -ERANGE;
-
        priv->dlen = desc.len;
 
        priv->dreg = nft_parse_register(tb[NFTA_IMMEDIATE_DREG]);
@@ -105,7 +102,6 @@ static int nft_immediate_validate(const struct nft_ctx *ctx,
        return 0;
 }
 
-static struct nft_expr_type nft_imm_type;
 static const struct nft_expr_ops nft_imm_ops = {
        .type           = &nft_imm_type,
        .size           = NFT_EXPR_SIZE(sizeof(struct nft_immediate_expr)),
@@ -116,20 +112,10 @@ static const struct nft_expr_ops nft_imm_ops = {
        .validate       = nft_immediate_validate,
 };
 
-static struct nft_expr_type nft_imm_type __read_mostly = {
+struct nft_expr_type nft_imm_type __read_mostly = {
        .name           = "immediate",
        .ops            = &nft_imm_ops,
        .policy         = nft_immediate_policy,
        .maxattr        = NFTA_IMMEDIATE_MAX,
        .owner          = THIS_MODULE,
 };
-
-int __init nft_immediate_module_init(void)
-{
-       return nft_register_expr(&nft_imm_type);
-}
-
-void nft_immediate_module_exit(void)
-{
-       nft_unregister_expr(&nft_imm_type);
-}
index 1b01404bb33fa7832b9cbe14ba9ad24142f363f9..6271e40a3dd6d00b0a19f31a5ef5509185120505 100644 (file)
@@ -32,8 +32,9 @@ static void nft_log_eval(const struct nft_expr *expr,
 {
        const struct nft_log *priv = nft_expr_priv(expr);
 
-       nf_log_packet(pkt->net, pkt->pf, pkt->hook, pkt->skb, pkt->in,
-                     pkt->out, &priv->loginfo, "%s", priv->prefix);
+       nf_log_packet(nft_net(pkt), nft_pf(pkt), nft_hook(pkt), pkt->skb,
+                     nft_in(pkt), nft_out(pkt), &priv->loginfo, "%s",
+                     priv->prefix);
 }
 
 static const struct nla_policy nft_log_policy[NFTA_LOG_MAX + 1] = {
index 8166b6994cc75dcd6a556172ae541c13ce337fc4..d4f97fa7e21d0036690e229768ab097fc5220cfc 100644 (file)
@@ -35,9 +35,8 @@ static void nft_lookup_eval(const struct nft_expr *expr,
        const struct nft_set_ext *ext;
        bool found;
 
-       found = set->ops->lookup(pkt->net, set, &regs->data[priv->sreg], &ext) ^
-               priv->invert;
-
+       found = set->ops->lookup(nft_net(pkt), set, &regs->data[priv->sreg],
+                                &ext) ^ priv->invert;
        if (!found) {
                regs->verdict.code = NFT_BREAK;
                return;
@@ -155,7 +154,6 @@ nla_put_failure:
        return -1;
 }
 
-static struct nft_expr_type nft_lookup_type;
 static const struct nft_expr_ops nft_lookup_ops = {
        .type           = &nft_lookup_type,
        .size           = NFT_EXPR_SIZE(sizeof(struct nft_lookup)),
@@ -165,20 +163,10 @@ static const struct nft_expr_ops nft_lookup_ops = {
        .dump           = nft_lookup_dump,
 };
 
-static struct nft_expr_type nft_lookup_type __read_mostly = {
+struct nft_expr_type nft_lookup_type __read_mostly = {
        .name           = "lookup",
        .ops            = &nft_lookup_ops,
        .policy         = nft_lookup_policy,
        .maxattr        = NFTA_LOOKUP_MAX,
        .owner          = THIS_MODULE,
 };
-
-int __init nft_lookup_module_init(void)
-{
-       return nft_register_expr(&nft_lookup_type);
-}
-
-void nft_lookup_module_exit(void)
-{
-       nft_unregister_expr(&nft_lookup_type);
-}
index 6c1e0246706e06b492dbdb2ed068acf1fa2879da..66c7f4b4c49bda596d9151d162bc68d95d957560 100644 (file)
@@ -36,7 +36,7 @@ void nft_meta_get_eval(const struct nft_expr *expr,
 {
        const struct nft_meta *priv = nft_expr_priv(expr);
        const struct sk_buff *skb = pkt->skb;
-       const struct net_device *in = pkt->in, *out = pkt->out;
+       const struct net_device *in = nft_in(pkt), *out = nft_out(pkt);
        struct sock *sk;
        u32 *dest = &regs->data[priv->dreg];
 
@@ -49,7 +49,7 @@ void nft_meta_get_eval(const struct nft_expr *expr,
                *(__be16 *)dest = skb->protocol;
                break;
        case NFT_META_NFPROTO:
-               *dest = pkt->pf;
+               *dest = nft_pf(pkt);
                break;
        case NFT_META_L4PROTO:
                if (!pkt->tprot_set)
@@ -146,7 +146,7 @@ void nft_meta_get_eval(const struct nft_expr *expr,
                        break;
                }
 
-               switch (pkt->pf) {
+               switch (nft_pf(pkt)) {
                case NFPROTO_IPV4:
                        if (ipv4_is_multicast(ip_hdr(skb)->daddr))
                                *dest = PACKET_MULTICAST;
@@ -310,6 +310,11 @@ int nft_meta_set_validate(const struct nft_ctx *ctx,
        case NFPROTO_NETDEV:
                hooks = 1 << NF_NETDEV_INGRESS;
                break;
+       case NFPROTO_IPV4:
+       case NFPROTO_IPV6:
+       case NFPROTO_INET:
+               hooks = 1 << NF_INET_PRE_ROUTING;
+               break;
        default:
                return -EOPNOTSUPP;
        }
index 55bc5ab78d4a292b6326e57f2a12f0ee25232b20..a66b36097b8f4f3cc1d230d9943be852866fcfbd 100644 (file)
@@ -65,7 +65,7 @@ static int nft_ng_inc_init(const struct nft_ctx *ctx,
                return -EOVERFLOW;
 
        priv->dreg = nft_parse_register(tb[NFTA_NG_DREG]);
-       atomic_set(&priv->counter, 0);
+       atomic_set(&priv->counter, priv->modulus - 1);
 
        return nft_validate_register_store(ctx, priv->dreg, NULL,
                                           NFT_DATA_VALUE, sizeof(u32));
index b2f88617611aac450631e606174f5bfd58cd38fa..98fb5d7b8087f60f5b97109b2570169631286590 100644 (file)
@@ -148,7 +148,6 @@ nla_put_failure:
        return -1;
 }
 
-static struct nft_expr_type nft_payload_type;
 static const struct nft_expr_ops nft_payload_ops = {
        .type           = &nft_payload_type,
        .size           = NFT_EXPR_SIZE(sizeof(struct nft_payload)),
@@ -320,20 +319,10 @@ nft_payload_select_ops(const struct nft_ctx *ctx,
                return &nft_payload_ops;
 }
 
-static struct nft_expr_type nft_payload_type __read_mostly = {
+struct nft_expr_type nft_payload_type __read_mostly = {
        .name           = "payload",
        .select_ops     = nft_payload_select_ops,
        .policy         = nft_payload_policy,
        .maxattr        = NFTA_PAYLOAD_MAX,
        .owner          = THIS_MODULE,
 };
-
-int __init nft_payload_module_init(void)
-{
-       return nft_register_expr(&nft_payload_type);
-}
-
-void nft_payload_module_exit(void)
-{
-       nft_unregister_expr(&nft_payload_type);
-}
index 393d359a1889fe30541271973fadc489936d5c44..3e19fa1230dc6b9274090257be97b91827699485 100644 (file)
@@ -43,7 +43,7 @@ static void nft_queue_eval(const struct nft_expr *expr,
                        queue = priv->queuenum + cpu % priv->queues_total;
                } else {
                        queue = nfqueue_hash(pkt->skb, queue,
-                                            priv->queues_total, pkt->pf,
+                                            priv->queues_total, nft_pf(pkt),
                                             jhash_initval);
                }
        }
index c6d5358482d12ce81b8f4782e72ba2dcdbfb1caa..009062606697dc76ccf90d1a1a1c98b6ded00a1c 100644 (file)
@@ -28,22 +28,20 @@ static void nft_range_eval(const struct nft_expr *expr,
                         const struct nft_pktinfo *pkt)
 {
        const struct nft_range_expr *priv = nft_expr_priv(expr);
-       bool mismatch;
        int d1, d2;
 
        d1 = memcmp(&regs->data[priv->sreg], &priv->data_from, priv->len);
        d2 = memcmp(&regs->data[priv->sreg], &priv->data_to, priv->len);
        switch (priv->op) {
        case NFT_RANGE_EQ:
-               mismatch = (d1 < 0 || d2 > 0);
+               if (d1 < 0 || d2 > 0)
+                       regs->verdict.code = NFT_BREAK;
                break;
        case NFT_RANGE_NEQ:
-               mismatch = (d1 >= 0 && d2 <= 0);
+               if (d1 >= 0 && d2 <= 0)
+                       regs->verdict.code = NFT_BREAK;
                break;
        }
-
-       if (mismatch)
-               regs->verdict.code = NFT_BREAK;
 }
 
 static const struct nla_policy nft_range_policy[NFTA_RANGE_MAX + 1] = {
@@ -59,6 +57,7 @@ static int nft_range_init(const struct nft_ctx *ctx, const struct nft_expr *expr
        struct nft_range_expr *priv = nft_expr_priv(expr);
        struct nft_data_desc desc_from, desc_to;
        int err;
+       u32 op;
 
        err = nft_data_init(NULL, &priv->data_from, sizeof(priv->data_from),
                            &desc_from, tb[NFTA_RANGE_FROM_DATA]);
@@ -80,7 +79,20 @@ static int nft_range_init(const struct nft_ctx *ctx, const struct nft_expr *expr
        if (err < 0)
                goto err2;
 
-       priv->op  = ntohl(nla_get_be32(tb[NFTA_RANGE_OP]));
+       err = nft_parse_u32_check(tb[NFTA_RANGE_OP], U8_MAX, &op);
+       if (err < 0)
+               goto err2;
+
+       switch (op) {
+       case NFT_RANGE_EQ:
+       case NFT_RANGE_NEQ:
+               break;
+       default:
+               err = -EINVAL;
+               goto err2;
+       }
+
+       priv->op  = op;
        priv->len = desc_from.len;
        return 0;
 err2:
@@ -110,7 +122,6 @@ nla_put_failure:
        return -1;
 }
 
-static struct nft_expr_type nft_range_type;
 static const struct nft_expr_ops nft_range_ops = {
        .type           = &nft_range_type,
        .size           = NFT_EXPR_SIZE(sizeof(struct nft_range_expr)),
@@ -119,20 +130,10 @@ static const struct nft_expr_ops nft_range_ops = {
        .dump           = nft_range_dump,
 };
 
-static struct nft_expr_type nft_range_type __read_mostly = {
+struct nft_expr_type nft_range_type __read_mostly = {
        .name           = "range",
        .ops            = &nft_range_ops,
        .policy         = nft_range_policy,
        .maxattr        = NFTA_RANGE_MAX,
        .owner          = THIS_MODULE,
 };
-
-int __init nft_range_module_init(void)
-{
-       return nft_register_expr(&nft_range_type);
-}
-
-void nft_range_module_exit(void)
-{
-       nft_unregister_expr(&nft_range_type);
-}
index e79d9ca2ffee0002e734ee27880c512da3066900..9e90a02cb104dad81daf84208902e90390a8f504 100644 (file)
@@ -23,36 +23,36 @@ static void nft_reject_inet_eval(const struct nft_expr *expr,
 {
        struct nft_reject *priv = nft_expr_priv(expr);
 
-       switch (pkt->pf) {
+       switch (nft_pf(pkt)) {
        case NFPROTO_IPV4:
                switch (priv->type) {
                case NFT_REJECT_ICMP_UNREACH:
                        nf_send_unreach(pkt->skb, priv->icmp_code,
-                                       pkt->hook);
+                                       nft_hook(pkt));
                        break;
                case NFT_REJECT_TCP_RST:
-                       nf_send_reset(pkt->net, pkt->skb, pkt->hook);
+                       nf_send_reset(nft_net(pkt), pkt->skb, nft_hook(pkt));
                        break;
                case NFT_REJECT_ICMPX_UNREACH:
                        nf_send_unreach(pkt->skb,
                                        nft_reject_icmp_code(priv->icmp_code),
-                                       pkt->hook);
+                                       nft_hook(pkt));
                        break;
                }
                break;
        case NFPROTO_IPV6:
                switch (priv->type) {
                case NFT_REJECT_ICMP_UNREACH:
-                       nf_send_unreach6(pkt->net, pkt->skb, priv->icmp_code,
-                                        pkt->hook);
+                       nf_send_unreach6(nft_net(pkt), pkt->skb,
+                                        priv->icmp_code, nft_hook(pkt));
                        break;
                case NFT_REJECT_TCP_RST:
-                       nf_send_reset6(pkt->net, pkt->skb, pkt->hook);
+                       nf_send_reset6(nft_net(pkt), pkt->skb, nft_hook(pkt));
                        break;
                case NFT_REJECT_ICMPX_UNREACH:
-                       nf_send_unreach6(pkt->net, pkt->skb,
+                       nf_send_unreach6(nft_net(pkt), pkt->skb,
                                         nft_reject_icmpv6_code(priv->icmp_code),
-                                        pkt->hook);
+                                        nft_hook(pkt));
                        break;
                }
                break;
diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c
new file mode 100644 (file)
index 0000000..d3eb640
--- /dev/null
@@ -0,0 +1,153 @@
+/*
+ * Copyright (c) 2016 Anders K. Pedersen <akp@cohaesio.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/dst.h>
+#include <net/ip6_route.h>
+#include <net/route.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+
+struct nft_rt {
+       enum nft_rt_keys        key:8;
+       enum nft_registers      dreg:8;
+};
+
+void nft_rt_get_eval(const struct nft_expr *expr,
+                    struct nft_regs *regs,
+                    const struct nft_pktinfo *pkt)
+{
+       const struct nft_rt *priv = nft_expr_priv(expr);
+       const struct sk_buff *skb = pkt->skb;
+       u32 *dest = &regs->data[priv->dreg];
+       const struct dst_entry *dst;
+
+       dst = skb_dst(skb);
+       if (!dst)
+               goto err;
+
+       switch (priv->key) {
+#ifdef CONFIG_IP_ROUTE_CLASSID
+       case NFT_RT_CLASSID:
+               *dest = dst->tclassid;
+               break;
+#endif
+       case NFT_RT_NEXTHOP4:
+               if (nft_pf(pkt) != NFPROTO_IPV4)
+                       goto err;
+
+               *dest = rt_nexthop((const struct rtable *)dst,
+                                  ip_hdr(skb)->daddr);
+               break;
+       case NFT_RT_NEXTHOP6:
+               if (nft_pf(pkt) != NFPROTO_IPV6)
+                       goto err;
+
+               memcpy(dest, rt6_nexthop((struct rt6_info *)dst,
+                                        &ipv6_hdr(skb)->daddr),
+                      sizeof(struct in6_addr));
+               break;
+       default:
+               WARN_ON(1);
+               goto err;
+       }
+       return;
+
+err:
+       regs->verdict.code = NFT_BREAK;
+}
+
+const struct nla_policy nft_rt_policy[NFTA_RT_MAX + 1] = {
+       [NFTA_RT_DREG]          = { .type = NLA_U32 },
+       [NFTA_RT_KEY]           = { .type = NLA_U32 },
+};
+
+int nft_rt_get_init(const struct nft_ctx *ctx,
+                   const struct nft_expr *expr,
+                   const struct nlattr * const tb[])
+{
+       struct nft_rt *priv = nft_expr_priv(expr);
+       unsigned int len;
+
+       if (tb[NFTA_RT_KEY] == NULL ||
+           tb[NFTA_RT_DREG] == NULL)
+               return -EINVAL;
+
+       priv->key = ntohl(nla_get_be32(tb[NFTA_RT_KEY]));
+       switch (priv->key) {
+#ifdef CONFIG_IP_ROUTE_CLASSID
+       case NFT_RT_CLASSID:
+#endif
+       case NFT_RT_NEXTHOP4:
+               len = sizeof(u32);
+               break;
+       case NFT_RT_NEXTHOP6:
+               len = sizeof(struct in6_addr);
+               break;
+       default:
+               return -EOPNOTSUPP;
+       }
+
+       priv->dreg = nft_parse_register(tb[NFTA_RT_DREG]);
+       return nft_validate_register_store(ctx, priv->dreg, NULL,
+                                          NFT_DATA_VALUE, len);
+}
+
+int nft_rt_get_dump(struct sk_buff *skb,
+                   const struct nft_expr *expr)
+{
+       const struct nft_rt *priv = nft_expr_priv(expr);
+
+       if (nla_put_be32(skb, NFTA_RT_KEY, htonl(priv->key)))
+               goto nla_put_failure;
+       if (nft_dump_register(skb, NFTA_RT_DREG, priv->dreg))
+               goto nla_put_failure;
+       return 0;
+
+nla_put_failure:
+       return -1;
+}
+
+static struct nft_expr_type nft_rt_type;
+static const struct nft_expr_ops nft_rt_get_ops = {
+       .type           = &nft_rt_type,
+       .size           = NFT_EXPR_SIZE(sizeof(struct nft_rt)),
+       .eval           = nft_rt_get_eval,
+       .init           = nft_rt_get_init,
+       .dump           = nft_rt_get_dump,
+};
+
+static struct nft_expr_type nft_rt_type __read_mostly = {
+       .name           = "rt",
+       .ops            = &nft_rt_get_ops,
+       .policy         = nft_rt_policy,
+       .maxattr        = NFTA_RT_MAX,
+       .owner          = THIS_MODULE,
+};
+
+static int __init nft_rt_module_init(void)
+{
+       return nft_register_expr(&nft_rt_type);
+}
+
+static void __exit nft_rt_module_exit(void)
+{
+       nft_unregister_expr(&nft_rt_type);
+}
+
+module_init(nft_rt_module_init);
+module_exit(nft_rt_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Anders K. Pedersen <akp@cohaesio.com>");
+MODULE_ALIAS_NFT_EXPR("rt");
index 3794cb2fc78876ce02eb35992cb0113d498367ba..a3dface3e6e6895e3d778c9378d4579b5723811d 100644 (file)
@@ -98,7 +98,7 @@ static bool nft_hash_update(struct nft_set *set, const u32 *key,
                            const struct nft_set_ext **ext)
 {
        struct nft_hash *priv = nft_set_priv(set);
-       struct nft_hash_elem *he;
+       struct nft_hash_elem *he, *prev;
        struct nft_hash_cmp_arg arg = {
                .genmask = NFT_GENMASK_ANY,
                .set     = set,
@@ -112,15 +112,24 @@ static bool nft_hash_update(struct nft_set *set, const u32 *key,
        he = new(set, expr, regs);
        if (he == NULL)
                goto err1;
-       if (rhashtable_lookup_insert_key(&priv->ht, &arg, &he->node,
-                                        nft_hash_params))
+
+       prev = rhashtable_lookup_get_insert_key(&priv->ht, &arg, &he->node,
+                                               nft_hash_params);
+       if (IS_ERR(prev))
                goto err2;
+
+       /* Another cpu may race to insert the element with the same key */
+       if (prev) {
+               nft_set_elem_destroy(set, he, true);
+               he = prev;
+       }
+
 out:
        *ext = &he->ext;
        return true;
 
 err2:
-       nft_set_elem_destroy(set, he);
+       nft_set_elem_destroy(set, he, true);
 err1:
        return false;
 }
@@ -332,7 +341,7 @@ static int nft_hash_init(const struct nft_set *set,
 
 static void nft_hash_elem_destroy(void *ptr, void *arg)
 {
-       nft_set_elem_destroy((const struct nft_set *)arg, ptr);
+       nft_set_elem_destroy((const struct nft_set *)arg, ptr, true);
 }
 
 static void nft_hash_destroy(const struct nft_set *set)
index 38b5bda242f86fe4ca0e09d7f365892917d32c9f..36493a7cae8827fa41036f381d65d28d59b139b2 100644 (file)
@@ -266,7 +266,7 @@ static void nft_rbtree_destroy(const struct nft_set *set)
        while ((node = priv->root.rb_node) != NULL) {
                rb_erase(node, &priv->root);
                rbe = rb_entry(node, struct nft_rbtree_elem, node);
-               nft_set_elem_destroy(set, rbe);
+               nft_set_elem_destroy(set, rbe, true);
        }
 }
 
index e0aa7c1d0224154db4ef09c752b88ca5e52bc404..ad818e52859bee19eeab9e0d31832101782fa79e 100644 (file)
@@ -982,7 +982,7 @@ void xt_free_table_info(struct xt_table_info *info)
 }
 EXPORT_SYMBOL(xt_free_table_info);
 
-/* Find table by name, grabs mutex & ref.  Returns ERR_PTR() on error. */
+/* Find table by name, grabs mutex & ref.  Returns NULL on error. */
 struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af,
                                    const char *name)
 {
@@ -1513,7 +1513,7 @@ xt_hook_ops_alloc(const struct xt_table *table, nf_hookfn *fn)
        if (!num_hooks)
                return ERR_PTR(-EINVAL);
 
-       ops = kmalloc(sizeof(*ops) * num_hooks, GFP_KERNEL);
+       ops = kcalloc(num_hooks, sizeof(*ops), GFP_KERNEL);
        if (ops == NULL)
                return ERR_PTR(-ENOMEM);
 
index 4973cbddc446bd50a377765bec23128716ff630e..19247a17e5114f1e278e30508085463d7396959e 100644 (file)
@@ -132,9 +132,9 @@ audit_tg(struct sk_buff *skb, const struct xt_action_param *par)
                goto errout;
 
        audit_log_format(ab, "action=%hhu hook=%u len=%u inif=%s outif=%s",
-                        info->type, par->hooknum, skb->len,
-                        par->in ? par->in->name : "?",
-                        par->out ? par->out->name : "?");
+                        info->type, xt_hooknum(par), skb->len,
+                        xt_in(par) ? xt_inname(par) : "?",
+                        xt_out(par) ? xt_outname(par) : "?");
 
        if (skb->mark)
                audit_log_format(ab, " mark=%#x", skb->mark);
@@ -144,7 +144,7 @@ audit_tg(struct sk_buff *skb, const struct xt_action_param *par)
                                 eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest,
                                 ntohs(eth_hdr(skb)->h_proto));
 
-               if (par->family == NFPROTO_BRIDGE) {
+               if (xt_family(par) == NFPROTO_BRIDGE) {
                        switch (eth_hdr(skb)->h_proto) {
                        case htons(ETH_P_IP):
                                audit_ip4(ab, skb);
@@ -157,7 +157,7 @@ audit_tg(struct sk_buff *skb, const struct xt_action_param *par)
                }
        }
 
-       switch (par->family) {
+       switch (xt_family(par)) {
        case NFPROTO_IPV4:
                audit_ip4(ab, skb);
                break;
index 1763ab82bcd75c343aebc53a26a621e434788d47..c3b2017ebe41f4cc1df39127d2cf9c680a17b153 100644 (file)
@@ -32,15 +32,15 @@ static unsigned int
 log_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
        const struct xt_log_info *loginfo = par->targinfo;
+       struct net *net = xt_net(par);
        struct nf_loginfo li;
-       struct net *net = par->net;
 
        li.type = NF_LOG_TYPE_LOG;
        li.u.log.level = loginfo->level;
        li.u.log.logflags = loginfo->logflags;
 
-       nf_log_packet(net, par->family, par->hooknum, skb, par->in, par->out,
-                     &li, "%s", loginfo->prefix);
+       nf_log_packet(net, xt_family(par), xt_hooknum(par), skb, xt_in(par),
+                     xt_out(par), &li, "%s", loginfo->prefix);
        return XT_CONTINUE;
 }
 
index b253e07cb1c579cbad46eb745db0537ff0635b82..94d0b5411192ccfa86d0e8742488e4f7dd5fc52c 100644 (file)
@@ -33,8 +33,8 @@ netmap_tg6(struct sk_buff *skb, const struct xt_action_param *par)
                netmask.ip6[i] = ~(range->min_addr.ip6[i] ^
                                   range->max_addr.ip6[i]);
 
-       if (par->hooknum == NF_INET_PRE_ROUTING ||
-           par->hooknum == NF_INET_LOCAL_OUT)
+       if (xt_hooknum(par) == NF_INET_PRE_ROUTING ||
+           xt_hooknum(par) == NF_INET_LOCAL_OUT)
                new_addr.in6 = ipv6_hdr(skb)->daddr;
        else
                new_addr.in6 = ipv6_hdr(skb)->saddr;
@@ -51,7 +51,7 @@ netmap_tg6(struct sk_buff *skb, const struct xt_action_param *par)
        newrange.min_proto      = range->min_proto;
        newrange.max_proto      = range->max_proto;
 
-       return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(par->hooknum));
+       return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(xt_hooknum(par)));
 }
 
 static int netmap_tg6_checkentry(const struct xt_tgchk_param *par)
@@ -72,16 +72,16 @@ netmap_tg4(struct sk_buff *skb, const struct xt_action_param *par)
        const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
        struct nf_nat_range newrange;
 
-       NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING ||
-                    par->hooknum == NF_INET_POST_ROUTING ||
-                    par->hooknum == NF_INET_LOCAL_OUT ||
-                    par->hooknum == NF_INET_LOCAL_IN);
+       NF_CT_ASSERT(xt_hooknum(par) == NF_INET_PRE_ROUTING ||
+                    xt_hooknum(par) == NF_INET_POST_ROUTING ||
+                    xt_hooknum(par) == NF_INET_LOCAL_OUT ||
+                    xt_hooknum(par) == NF_INET_LOCAL_IN);
        ct = nf_ct_get(skb, &ctinfo);
 
        netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip);
 
-       if (par->hooknum == NF_INET_PRE_ROUTING ||
-           par->hooknum == NF_INET_LOCAL_OUT)
+       if (xt_hooknum(par) == NF_INET_PRE_ROUTING ||
+           xt_hooknum(par) == NF_INET_LOCAL_OUT)
                new_ip = ip_hdr(skb)->daddr & ~netmask;
        else
                new_ip = ip_hdr(skb)->saddr & ~netmask;
@@ -96,7 +96,7 @@ netmap_tg4(struct sk_buff *skb, const struct xt_action_param *par)
        newrange.max_proto   = mr->range[0].max;
 
        /* Hand modified range to generic setup. */
-       return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(par->hooknum));
+       return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(xt_hooknum(par)));
 }
 
 static int netmap_tg4_check(const struct xt_tgchk_param *par)
index 018eed7e1ff1e6f6c60dbe43a504e24c3860cf4d..c7f8958cea4a9e24ec26500f92a9af659dbfbb2a 100644 (file)
@@ -25,19 +25,20 @@ static unsigned int
 nflog_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
        const struct xt_nflog_info *info = par->targinfo;
+       struct net *net = xt_net(par);
        struct nf_loginfo li;
-       struct net *net = par->net;
 
        li.type              = NF_LOG_TYPE_ULOG;
        li.u.ulog.copy_len   = info->len;
        li.u.ulog.group      = info->group;
        li.u.ulog.qthreshold = info->threshold;
+       li.u.ulog.flags      = 0;
 
        if (info->flags & XT_NFLOG_F_COPY_LEN)
                li.u.ulog.flags |= NF_LOG_F_COPY_LEN;
 
-       nfulnl_log_packet(net, par->family, par->hooknum, skb, par->in,
-                         par->out, &li, info->prefix);
+       nfulnl_log_packet(net, xt_family(par), xt_hooknum(par), skb,
+                         xt_in(par), xt_out(par), &li, info->prefix);
        return XT_CONTINUE;
 }
 
index 8f1779ff7e30619825c43db9b06be07274bc2f8e..a360b99a958af224a2aa98fb6fe080a885b733ef 100644 (file)
@@ -43,7 +43,7 @@ nfqueue_tg_v1(struct sk_buff *skb, const struct xt_action_param *par)
 
        if (info->queues_total > 1) {
                queue = nfqueue_hash(skb, queue, info->queues_total,
-                                    par->family, jhash_initval);
+                                    xt_family(par), jhash_initval);
        }
        return NF_QUEUE_NR(queue);
 }
@@ -98,7 +98,7 @@ nfqueue_tg_v3(struct sk_buff *skb, const struct xt_action_param *par)
                        queue = info->queuenum + cpu % info->queues_total;
                } else {
                        queue = nfqueue_hash(skb, queue, info->queues_total,
-                                            par->family, jhash_initval);
+                                            xt_family(par), jhash_initval);
                }
        }
 
index 03f0b370e17876b5e1f4ed382260509bdf0f2235..651dce65a30b5fee461c15e10eadbc54c7e479ae 100644 (file)
@@ -31,7 +31,7 @@
 static unsigned int
 redirect_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 {
-       return nf_nat_redirect_ipv6(skb, par->targinfo, par->hooknum);
+       return nf_nat_redirect_ipv6(skb, par->targinfo, xt_hooknum(par));
 }
 
 static int redirect_tg6_checkentry(const struct xt_tgchk_param *par)
@@ -62,7 +62,7 @@ static int redirect_tg4_check(const struct xt_tgchk_param *par)
 static unsigned int
 redirect_tg4(struct sk_buff *skb, const struct xt_action_param *par)
 {
-       return nf_nat_redirect_ipv4(skb, par->targinfo, par->hooknum);
+       return nf_nat_redirect_ipv4(skb, par->targinfo, xt_hooknum(par));
 }
 
 static struct xt_target redirect_tg_reg[] __read_mostly = {
index 872db2d0e2a9970642c50e933c0a2225f8dd4599..27241a767f17b4b27d24095a31e5e9a2d3e29ce4 100644 (file)
@@ -108,7 +108,7 @@ tcpmss_mangle_packet(struct sk_buff *skb,
                return -1;
 
        if (info->mss == XT_TCPMSS_CLAMP_PMTU) {
-               struct net *net = par->net;
+               struct net *net = xt_net(par);
                unsigned int in_mtu = tcpmss_reverse_mtu(net, skb, family);
                unsigned int min_mtu = min(dst_mtu(skb_dst(skb)), in_mtu);
 
@@ -172,7 +172,7 @@ tcpmss_mangle_packet(struct sk_buff *skb,
         * length IPv6 header of 60, ergo the default MSS value is 1220
         * Since no MSS was provided, we must use the default values
         */
-       if (par->family == NFPROTO_IPV4)
+       if (xt_family(par) == NFPROTO_IPV4)
                newmss = min(newmss, (u16)536);
        else
                newmss = min(newmss, (u16)1220);
index 0471db4032c5ea6eb839229124762cbf1901e53f..1c57ace75ae62be26e4a5b26ded9d84fb5b377a0 100644 (file)
@@ -33,7 +33,7 @@ tee_tg4(struct sk_buff *skb, const struct xt_action_param *par)
        const struct xt_tee_tginfo *info = par->targinfo;
        int oif = info->priv ? info->priv->oif : 0;
 
-       nf_dup_ipv4(par->net, skb, par->hooknum, &info->gw.in, oif);
+       nf_dup_ipv4(xt_net(par), skb, xt_hooknum(par), &info->gw.in, oif);
 
        return XT_CONTINUE;
 }
@@ -45,7 +45,7 @@ tee_tg6(struct sk_buff *skb, const struct xt_action_param *par)
        const struct xt_tee_tginfo *info = par->targinfo;
        int oif = info->priv ? info->priv->oif : 0;
 
-       nf_dup_ipv6(par->net, skb, par->hooknum, &info->gw.in6, oif);
+       nf_dup_ipv6(xt_net(par), skb, xt_hooknum(par), &info->gw.in6, oif);
 
        return XT_CONTINUE;
 }
index 663c4c3c907284254a09e692b84f2c60567545de..dbd72cc40e42f74cfa2bc8d934399198dae45371 100644 (file)
@@ -364,7 +364,8 @@ tproxy_tg4_v0(struct sk_buff *skb, const struct xt_action_param *par)
 {
        const struct xt_tproxy_target_info *tgi = par->targinfo;
 
-       return tproxy_tg4(par->net, skb, tgi->laddr, tgi->lport, tgi->mark_mask, tgi->mark_value);
+       return tproxy_tg4(xt_net(par), skb, tgi->laddr, tgi->lport,
+                         tgi->mark_mask, tgi->mark_value);
 }
 
 static unsigned int
@@ -372,7 +373,8 @@ tproxy_tg4_v1(struct sk_buff *skb, const struct xt_action_param *par)
 {
        const struct xt_tproxy_target_info_v1 *tgi = par->targinfo;
 
-       return tproxy_tg4(par->net, skb, tgi->laddr.ip, tgi->lport, tgi->mark_mask, tgi->mark_value);
+       return tproxy_tg4(xt_net(par), skb, tgi->laddr.ip, tgi->lport,
+                         tgi->mark_mask, tgi->mark_value);
 }
 
 #ifdef XT_TPROXY_HAVE_IPV6
@@ -442,7 +444,7 @@ tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff,
                 * to a listener socket if there's one */
                struct sock *sk2;
 
-               sk2 = nf_tproxy_get_sock_v6(par->net, skb, thoff, hp, tproto,
+               sk2 = nf_tproxy_get_sock_v6(xt_net(par), skb, thoff, hp, tproto,
                                            &iph->saddr,
                                            tproxy_laddr6(skb, &tgi->laddr.in6, &iph->daddr),
                                            hp->source,
@@ -485,10 +487,10 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par)
         * addresses, this happens if the redirect already happened
         * and the current packet belongs to an already established
         * connection */
-       sk = nf_tproxy_get_sock_v6(par->net, skb, thoff, hp, tproto,
+       sk = nf_tproxy_get_sock_v6(xt_net(par), skb, thoff, hp, tproto,
                                   &iph->saddr, &iph->daddr,
                                   hp->source, hp->dest,
-                                  par->in, NFT_LOOKUP_ESTABLISHED);
+                                  xt_in(par), NFT_LOOKUP_ESTABLISHED);
 
        laddr = tproxy_laddr6(skb, &tgi->laddr.in6, &iph->daddr);
        lport = tgi->lport ? tgi->lport : hp->dest;
@@ -500,10 +502,10 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par)
        else if (!sk)
                /* no there's no established connection, check if
                 * there's a listener on the redirected addr/port */
-               sk = nf_tproxy_get_sock_v6(par->net, skb, thoff, hp,
+               sk = nf_tproxy_get_sock_v6(xt_net(par), skb, thoff, hp,
                                           tproto, &iph->saddr, laddr,
                                           hp->source, lport,
-                                          par->in, NFT_LOOKUP_LISTENER);
+                                          xt_in(par), NFT_LOOKUP_LISTENER);
 
        /* NOTE: assign_sock consumes our sk reference */
        if (sk && tproxy_sk_is_transparent(sk)) {
index 11d6091991a4ec569f6797023b75b241c3aa7575..e329dabde35f50a397c76d1962164f89e136bd08 100644 (file)
@@ -125,7 +125,7 @@ static inline bool match_type(struct net *net, const struct net_device *dev,
 static bool
 addrtype_mt_v0(const struct sk_buff *skb, struct xt_action_param *par)
 {
-       struct net *net = par->net;
+       struct net *net = xt_net(par);
        const struct xt_addrtype_info *info = par->matchinfo;
        const struct iphdr *iph = ip_hdr(skb);
        bool ret = true;
@@ -143,19 +143,19 @@ addrtype_mt_v0(const struct sk_buff *skb, struct xt_action_param *par)
 static bool
 addrtype_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
 {
-       struct net *net = par->net;
+       struct net *net = xt_net(par);
        const struct xt_addrtype_info_v1 *info = par->matchinfo;
        const struct iphdr *iph;
        const struct net_device *dev = NULL;
        bool ret = true;
 
        if (info->flags & XT_ADDRTYPE_LIMIT_IFACE_IN)
-               dev = par->in;
+               dev = xt_in(par);
        else if (info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT)
-               dev = par->out;
+               dev = xt_out(par);
 
 #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
-       if (par->family == NFPROTO_IPV6)
+       if (xt_family(par) == NFPROTO_IPV6)
                return addrtype_mt6(net, dev, skb, info);
 #endif
        iph = ip_hdr(skb);
index 96fa26b20b67dce9ed2d2abaaf6ce71dda363faf..9a9884a39c0e9cc806a3a6b0b16bbd387a26d220 100644 (file)
@@ -112,7 +112,7 @@ xt_cluster_mt(const struct sk_buff *skb, struct xt_action_param *par)
         * know, matches should not alter packets, but we are doing this here
         * because we would need to add a PKTTYPE target for this sole purpose.
         */
-       if (!xt_cluster_is_multicast_addr(skb, par->family) &&
+       if (!xt_cluster_is_multicast_addr(skb, xt_family(par)) &&
            skb->pkt_type == PACKET_MULTICAST) {
                pskb->pkt_type = PACKET_HOST;
        }
index b6dc322593a34586aa7b0a0b73b23fcf9c47f5d2..bb3845339efd83803e243de0ff13d595227bd8ff 100644 (file)
@@ -317,7 +317,7 @@ static int count_them(struct net *net,
 static bool
 connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
-       struct net *net = par->net;
+       struct net *net = xt_net(par);
        const struct xt_connlimit_info *info = par->matchinfo;
        union nf_inet_addr addr;
        struct nf_conntrack_tuple tuple;
@@ -332,11 +332,11 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
                tuple_ptr = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
                zone = nf_ct_zone(ct);
        } else if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
-                                     par->family, net, &tuple)) {
+                                     xt_family(par), net, &tuple)) {
                goto hotdrop;
        }
 
-       if (par->family == NFPROTO_IPV6) {
+       if (xt_family(par) == NFPROTO_IPV6) {
                const struct ipv6hdr *iph = ipv6_hdr(skb);
                memcpy(&addr.ip6, (info->flags & XT_CONNLIMIT_DADDR) ?
                       &iph->daddr : &iph->saddr, sizeof(addr.ip6));
@@ -347,7 +347,7 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
        }
 
        connections = count_them(net, info->data, tuple_ptr, &addr,
-                                &info->mask, par->family, zone);
+                                &info->mask, xt_family(par), zone);
        if (connections == 0)
                /* kmalloc failed, drop it entirely */
                goto hotdrop;
index 69f78e96fdb44a5c293a92e53a98559b1ddf2209..b83e158e116afc35f3a9ab7b739409b523cebb86 100644 (file)
@@ -44,7 +44,7 @@ connmark_tg(struct sk_buff *skb, const struct xt_action_param *par)
        u_int32_t newmark;
 
        ct = nf_ct_get(skb, &ctinfo);
-       if (ct == NULL)
+       if (ct == NULL || nf_ct_is_untracked(ct))
                return XT_CONTINUE;
 
        switch (info->mode) {
@@ -97,7 +97,7 @@ connmark_mt(const struct sk_buff *skb, struct xt_action_param *par)
        const struct nf_conn *ct;
 
        ct = nf_ct_get(skb, &ctinfo);
-       if (ct == NULL)
+       if (ct == NULL || nf_ct_is_untracked(ct))
                return false;
 
        return ((ct->mark & info->mask) == info->mark) ^ info->invert;
index a3b8f697cfc5a21fd92b048dc18396aa8de1a471..2dea15ebc55bbbc57baaed42cb8cbbe819733e0a 100644 (file)
@@ -200,22 +200,22 @@ conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par,
                return false;
 
        if (info->match_flags & XT_CONNTRACK_ORIGSRC)
-               if (conntrack_mt_origsrc(ct, info, par->family) ^
+               if (conntrack_mt_origsrc(ct, info, xt_family(par)) ^
                    !(info->invert_flags & XT_CONNTRACK_ORIGSRC))
                        return false;
 
        if (info->match_flags & XT_CONNTRACK_ORIGDST)
-               if (conntrack_mt_origdst(ct, info, par->family) ^
+               if (conntrack_mt_origdst(ct, info, xt_family(par)) ^
                    !(info->invert_flags & XT_CONNTRACK_ORIGDST))
                        return false;
 
        if (info->match_flags & XT_CONNTRACK_REPLSRC)
-               if (conntrack_mt_replsrc(ct, info, par->family) ^
+               if (conntrack_mt_replsrc(ct, info, xt_family(par)) ^
                    !(info->invert_flags & XT_CONNTRACK_REPLSRC))
                        return false;
 
        if (info->match_flags & XT_CONNTRACK_REPLDST)
-               if (conntrack_mt_repldst(ct, info, par->family) ^
+               if (conntrack_mt_repldst(ct, info, xt_family(par)) ^
                    !(info->invert_flags & XT_CONNTRACK_REPLDST))
                        return false;
 
index d9202cdd25c9b8982071312cc6c10c5479493d4d..96ebe1cdefec880dc9497a6a0f3f278705f41e63 100644 (file)
@@ -24,12 +24,12 @@ static bool devgroup_mt(const struct sk_buff *skb, struct xt_action_param *par)
        const struct xt_devgroup_info *info = par->matchinfo;
 
        if (info->flags & XT_DEVGROUP_MATCH_SRC &&
-           (((info->src_group ^ par->in->group) & info->src_mask ? 1 : 0) ^
+           (((info->src_group ^ xt_in(par)->group) & info->src_mask ? 1 : 0) ^
             ((info->flags & XT_DEVGROUP_INVERT_SRC) ? 1 : 0)))
                return false;
 
        if (info->flags & XT_DEVGROUP_MATCH_DST &&
-           (((info->dst_group ^ par->out->group) & info->dst_mask ? 1 : 0) ^
+           (((info->dst_group ^ xt_out(par)->group) & info->dst_mask ? 1 : 0) ^
             ((info->flags & XT_DEVGROUP_INVERT_DST) ? 1 : 0)))
                return false;
 
index 64670fc5d0e1a581f4e3b3885d8117c7a70b0edb..236ac8008909d3abdf1d236406fc0bb0bb48dffd 100644 (file)
@@ -58,7 +58,7 @@ static bool tos_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
        const struct xt_tos_match_info *info = par->matchinfo;
 
-       if (par->family == NFPROTO_IPV4)
+       if (xt_family(par) == NFPROTO_IPV4)
                return ((ip_hdr(skb)->tos & info->tos_mask) ==
                       info->tos_value) ^ !!info->invert;
        else
index 2fab0c65aa94b66615d7ba86b67d24e2cb15e89f..10063408141d25bdd0f1a1241ffe6395bae753f6 100644 (file)
@@ -49,7 +49,7 @@ struct hashlimit_net {
        struct proc_dir_entry   *ip6t_hashlimit;
 };
 
-static int hashlimit_net_id;
+static unsigned int hashlimit_net_id;
 static inline struct hashlimit_net *hashlimit_pernet(struct net *net)
 {
        return net_generic(net, hashlimit_net_id);
@@ -431,7 +431,7 @@ static void htable_put(struct xt_hashlimit_htable *hinfo)
    CREDITS_PER_JIFFY*HZ*60*60*24 < 2^32 ie.
 */
 #define MAX_CPJ_v1 (0xFFFFFFFF / (HZ*60*60*24))
-#define MAX_CPJ (0xFFFFFFFFFFFFFFFF / (HZ*60*60*24))
+#define MAX_CPJ (0xFFFFFFFFFFFFFFFFULL / (HZ*60*60*24))
 
 /* Repeated shift and or gives us all 1s, final shift and add 1 gives
  * us the power of 2 below the theoretical max, so GCC simply does a
@@ -473,7 +473,7 @@ static u64 user2credits(u64 user, int revision)
                return div64_u64(user * HZ * CREDITS_PER_JIFFY_v1,
                                 XT_HASHLIMIT_SCALE);
        } else {
-               if (user > 0xFFFFFFFFFFFFFFFF / (HZ*CREDITS_PER_JIFFY))
+               if (user > 0xFFFFFFFFFFFFFFFFULL / (HZ*CREDITS_PER_JIFFY))
                        return div64_u64(user, XT_HASHLIMIT_SCALE_v2)
                                * HZ * CREDITS_PER_JIFFY;
 
index 89d53104c6b365b12c76ff684064bc5d032656c3..000e70377f85dd90fa61203ac4f18abfc62e57ec 100644 (file)
@@ -26,6 +26,8 @@
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Fan Du <fan.du@windriver.com>");
 MODULE_DESCRIPTION("Xtables: IPv4/6 IPsec-IPComp SPI match");
+MODULE_ALIAS("ipt_ipcomp");
+MODULE_ALIAS("ip6t_ipcomp");
 
 /* Returns 1 if the spi is matched by the range, 0 otherwise */
 static inline bool
index 71a9d95e0a81bbdd17d94f37c15cf612c1781dd4..0fdc89064488050bf4e667b1c09a26169a326872 100644 (file)
@@ -48,9 +48,9 @@ static bool
 ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
        const struct xt_ipvs_mtinfo *data = par->matchinfo;
-       struct netns_ipvs *ipvs = net_ipvs(par->net);
+       struct netns_ipvs *ipvs = net_ipvs(xt_net(par));
        /* ipvs_mt_check ensures that family is only NFPROTO_IPV[46]. */
-       const u_int8_t family = par->family;
+       const u_int8_t family = xt_family(par);
        struct ip_vs_iphdr iph;
        struct ip_vs_protocol *pp;
        struct ip_vs_conn *cp;
index ac1d3c3d09e72dbbfbf49f4dcdad6fc7600dcded..ec06fb1cb16fccd2bfe2bcb9698316a69780e50b 100644 (file)
@@ -42,29 +42,31 @@ ports_match_v1(const struct xt_multiport_v1 *minfo,
                        e = minfo->ports[++i];
                        pr_debug("src or dst matches with %d-%d?\n", s, e);
 
-                       if (minfo->flags == XT_MULTIPORT_SOURCE
-                           && src >= s && src <= e)
-                               return true ^ minfo->invert;
-                       if (minfo->flags == XT_MULTIPORT_DESTINATION
-                           && dst >= s && dst <= e)
-                               return true ^ minfo->invert;
-                       if (minfo->flags == XT_MULTIPORT_EITHER
-                           && ((dst >= s && dst <= e)
-                               || (src >= s && src <= e)))
-                               return true ^ minfo->invert;
+                       switch (minfo->flags) {
+                       case XT_MULTIPORT_SOURCE:
+                               return (src >= s && src <= e) ^ minfo->invert;
+                       case XT_MULTIPORT_DESTINATION:
+                               return (dst >= s && dst <= e) ^ minfo->invert;
+                       case XT_MULTIPORT_EITHER:
+                               return ((dst >= s && dst <= e) ||
+                                       (src >= s && src <= e)) ^ minfo->invert;
+                       default:
+                               break;
+                       }
                } else {
                        /* exact port matching */
                        pr_debug("src or dst matches with %d?\n", s);
 
-                       if (minfo->flags == XT_MULTIPORT_SOURCE
-                           && src == s)
-                               return true ^ minfo->invert;
-                       if (minfo->flags == XT_MULTIPORT_DESTINATION
-                           && dst == s)
-                               return true ^ minfo->invert;
-                       if (minfo->flags == XT_MULTIPORT_EITHER
-                           && (src == s || dst == s))
-                               return true ^ minfo->invert;
+                       switch (minfo->flags) {
+                       case XT_MULTIPORT_SOURCE:
+                               return (src == s) ^ minfo->invert;
+                       case XT_MULTIPORT_DESTINATION:
+                               return (dst == s) ^ minfo->invert;
+                       case XT_MULTIPORT_EITHER:
+                               return (src == s || dst == s) ^ minfo->invert;
+                       default:
+                               break;
+                       }
                }
        }
 
index cf327593852a2b75cf3d5eb9e9e8d07af27b17a8..cc0518fe598e459f27de36984e0313c73c43b61f 100644 (file)
@@ -26,7 +26,7 @@ static bool nfacct_mt(const struct sk_buff *skb, struct xt_action_param *par)
 
        nfnl_acct_update(skb, info->nfacct);
 
-       overquota = nfnl_acct_overquota(par->net, skb, info->nfacct);
+       overquota = nfnl_acct_overquota(xt_net(par), skb, info->nfacct);
 
        return overquota == NFACCT_UNDERQUOTA ? false : true;
 }
index 2455b69b58104366e4670967f7a169994af3ea97..c05fefcec238c0648fe2b2710dde7e60e1e37279 100644 (file)
@@ -201,7 +201,7 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
        unsigned char opts[MAX_IPOPTLEN];
        const struct xt_osf_finger *kf;
        const struct xt_osf_user_finger *f;
-       struct net *net = p->net;
+       struct net *net = xt_net(p);
 
        if (!info)
                return false;
@@ -326,8 +326,8 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
                fcount++;
 
                if (info->flags & XT_OSF_LOG)
-                       nf_log_packet(net, p->family, p->hooknum, skb,
-                                     p->in, p->out, NULL,
+                       nf_log_packet(net, xt_family(p), xt_hooknum(p), skb,
+                                     xt_in(p), xt_out(p), NULL,
                                      "%s [%s:%s] : %pI4:%d -> %pI4:%d hops=%d\n",
                                      f->genre, f->version, f->subtype,
                                      &ip->saddr, ntohs(tcp->source),
@@ -341,8 +341,8 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
        rcu_read_unlock();
 
        if (!fcount && (info->flags & XT_OSF_LOG))
-               nf_log_packet(net, p->family, p->hooknum, skb, p->in,
-                             p->out, NULL,
+               nf_log_packet(net, xt_family(p), xt_hooknum(p), skb, xt_in(p),
+                             xt_out(p), NULL,
                        "Remote OS is not known: %pI4:%u -> %pI4:%u\n",
                                &ip->saddr, ntohs(tcp->source),
                                &ip->daddr, ntohs(tcp->dest));
index a20e731b5b6c79a01e0762405932ed0685fbe002..16477df45b3bf12fc1e92c083a3314ad28ece042 100644 (file)
@@ -63,7 +63,7 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par)
        const struct xt_owner_match_info *info = par->matchinfo;
        const struct file *filp;
        struct sock *sk = skb_to_full_sk(skb);
-       struct net *net = par->net;
+       struct net *net = xt_net(par);
 
        if (sk == NULL || sk->sk_socket == NULL)
                return (info->match ^ info->invert) == 0;
index 5b645cb598fc261d52d8b44c6acc63ac8d388dea..57efb703ff18019ec0f341c56062f9a6b264cd79 100644 (file)
@@ -30,10 +30,10 @@ pkttype_mt(const struct sk_buff *skb, struct xt_action_param *par)
 
        if (skb->pkt_type != PACKET_LOOPBACK)
                type = skb->pkt_type;
-       else if (par->family == NFPROTO_IPV4 &&
+       else if (xt_family(par) == NFPROTO_IPV4 &&
            ipv4_is_multicast(ip_hdr(skb)->daddr))
                type = PACKET_MULTICAST;
-       else if (par->family == NFPROTO_IPV6 &&
+       else if (xt_family(par) == NFPROTO_IPV6 &&
            ipv6_hdr(skb)->daddr.s6_addr[0] == 0xFF)
                type = PACKET_MULTICAST;
        else
index f23e97bb42d7c0f860cdf15258e530a11bb54bef..2b4ab189bba7bf6f47f9c5479af9c5b9acdd94c5 100644 (file)
@@ -116,9 +116,9 @@ policy_mt(const struct sk_buff *skb, struct xt_action_param *par)
        int ret;
 
        if (info->flags & XT_POLICY_MATCH_IN)
-               ret = match_policy_in(skb, info, par->family);
+               ret = match_policy_in(skb, info, xt_family(par));
        else
-               ret = match_policy_out(skb, info, par->family);
+               ret = match_policy_out(skb, info, xt_family(par));
 
        if (ret < 0)
                ret = info->flags & XT_POLICY_MATCH_NONE ? true : false;
index e3b7a09b103e490a2c85364d8938b47565b9c66c..1d89a4eaf841a33e95247badfba064ce7dad0f63 100644 (file)
@@ -95,7 +95,7 @@ struct recent_net {
 #endif
 };
 
-static int recent_net_id __read_mostly;
+static unsigned int recent_net_id __read_mostly;
 
 static inline struct recent_net *recent_pernet(struct net *net)
 {
@@ -236,7 +236,7 @@ static void recent_table_flush(struct recent_table *t)
 static bool
 recent_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
-       struct net *net = par->net;
+       struct net *net = xt_net(par);
        struct recent_net *recent_net = recent_pernet(net);
        const struct xt_recent_mtinfo_v1 *info = par->matchinfo;
        struct recent_table *t;
@@ -245,7 +245,7 @@ recent_mt(const struct sk_buff *skb, struct xt_action_param *par)
        u_int8_t ttl;
        bool ret = info->invert;
 
-       if (par->family == NFPROTO_IPV4) {
+       if (xt_family(par) == NFPROTO_IPV4) {
                const struct iphdr *iph = ip_hdr(skb);
 
                if (info->side == XT_RECENT_DEST)
@@ -266,7 +266,7 @@ recent_mt(const struct sk_buff *skb, struct xt_action_param *par)
        }
 
        /* use TTL as seen before forwarding */
-       if (par->out != NULL && skb->sk == NULL)
+       if (xt_out(par) != NULL && skb->sk == NULL)
                ttl++;
 
        spin_lock_bh(&recent_lock);
@@ -274,12 +274,12 @@ recent_mt(const struct sk_buff *skb, struct xt_action_param *par)
 
        nf_inet_addr_mask(&addr, &addr_mask, &t->mask);
 
-       e = recent_entry_lookup(t, &addr_mask, par->family,
+       e = recent_entry_lookup(t, &addr_mask, xt_family(par),
                                (info->check_set & XT_RECENT_TTL) ? ttl : 0);
        if (e == NULL) {
                if (!(info->check_set & XT_RECENT_SET))
                        goto out;
-               e = recent_entry_init(t, &addr_mask, par->family, ttl);
+               e = recent_entry_init(t, &addr_mask, xt_family(par), ttl);
                if (e == NULL)
                        par->hotdrop = true;
                ret = !ret;
index 5669e5b453f43546300658c8ccbbbf719c5a2555..64285702afd54421e00f512860d9f1896df62969 100644 (file)
@@ -55,7 +55,7 @@ set_match_v0(const struct sk_buff *skb, struct xt_action_param *par)
 {
        const struct xt_set_info_match_v0 *info = par->matchinfo;
 
-       ADT_OPT(opt, par->family, info->match_set.u.compat.dim,
+       ADT_OPT(opt, xt_family(par), info->match_set.u.compat.dim,
                info->match_set.u.compat.flags, 0, UINT_MAX);
 
        return match_set(info->match_set.index, skb, par, &opt,
@@ -118,7 +118,7 @@ set_match_v1(const struct sk_buff *skb, struct xt_action_param *par)
 {
        const struct xt_set_info_match_v1 *info = par->matchinfo;
 
-       ADT_OPT(opt, par->family, info->match_set.dim,
+       ADT_OPT(opt, xt_family(par), info->match_set.dim,
                info->match_set.flags, 0, UINT_MAX);
 
        if (opt.flags & IPSET_RETURN_NOMATCH)
@@ -184,7 +184,7 @@ set_match_v3(const struct sk_buff *skb, struct xt_action_param *par)
        const struct xt_set_info_match_v3 *info = par->matchinfo;
        int ret;
 
-       ADT_OPT(opt, par->family, info->match_set.dim,
+       ADT_OPT(opt, xt_family(par), info->match_set.dim,
                info->match_set.flags, info->flags, UINT_MAX);
 
        if (info->packets.op != IPSET_COUNTER_NONE ||
@@ -231,7 +231,7 @@ set_match_v4(const struct sk_buff *skb, struct xt_action_param *par)
        const struct xt_set_info_match_v4 *info = par->matchinfo;
        int ret;
 
-       ADT_OPT(opt, par->family, info->match_set.dim,
+       ADT_OPT(opt, xt_family(par), info->match_set.dim,
                info->match_set.flags, info->flags, UINT_MAX);
 
        if (info->packets.op != IPSET_COUNTER_NONE ||
@@ -259,9 +259,9 @@ set_target_v0(struct sk_buff *skb, const struct xt_action_param *par)
 {
        const struct xt_set_info_target_v0 *info = par->targinfo;
 
-       ADT_OPT(add_opt, par->family, info->add_set.u.compat.dim,
+       ADT_OPT(add_opt, xt_family(par), info->add_set.u.compat.dim,
                info->add_set.u.compat.flags, 0, UINT_MAX);
-       ADT_OPT(del_opt, par->family, info->del_set.u.compat.dim,
+       ADT_OPT(del_opt, xt_family(par), info->del_set.u.compat.dim,
                info->del_set.u.compat.flags, 0, UINT_MAX);
 
        if (info->add_set.index != IPSET_INVALID_ID)
@@ -332,9 +332,9 @@ set_target_v1(struct sk_buff *skb, const struct xt_action_param *par)
 {
        const struct xt_set_info_target_v1 *info = par->targinfo;
 
-       ADT_OPT(add_opt, par->family, info->add_set.dim,
+       ADT_OPT(add_opt, xt_family(par), info->add_set.dim,
                info->add_set.flags, 0, UINT_MAX);
-       ADT_OPT(del_opt, par->family, info->del_set.dim,
+       ADT_OPT(del_opt, xt_family(par), info->del_set.dim,
                info->del_set.flags, 0, UINT_MAX);
 
        if (info->add_set.index != IPSET_INVALID_ID)
@@ -401,9 +401,9 @@ set_target_v2(struct sk_buff *skb, const struct xt_action_param *par)
 {
        const struct xt_set_info_target_v2 *info = par->targinfo;
 
-       ADT_OPT(add_opt, par->family, info->add_set.dim,
+       ADT_OPT(add_opt, xt_family(par), info->add_set.dim,
                info->add_set.flags, info->flags, info->timeout);
-       ADT_OPT(del_opt, par->family, info->del_set.dim,
+       ADT_OPT(del_opt, xt_family(par), info->del_set.dim,
                info->del_set.flags, 0, UINT_MAX);
 
        /* Normalize to fit into jiffies */
@@ -423,17 +423,19 @@ set_target_v2(struct sk_buff *skb, const struct xt_action_param *par)
 
 /* Revision 3 target */
 
+#define MOPT(opt, member)      ((opt).ext.skbinfo.member)
+
 static unsigned int
 set_target_v3(struct sk_buff *skb, const struct xt_action_param *par)
 {
        const struct xt_set_info_target_v3 *info = par->targinfo;
        int ret;
 
-       ADT_OPT(add_opt, par->family, info->add_set.dim,
+       ADT_OPT(add_opt, xt_family(par), info->add_set.dim,
                info->add_set.flags, info->flags, info->timeout);
-       ADT_OPT(del_opt, par->family, info->del_set.dim,
+       ADT_OPT(del_opt, xt_family(par), info->del_set.dim,
                info->del_set.flags, 0, UINT_MAX);
-       ADT_OPT(map_opt, par->family, info->map_set.dim,
+       ADT_OPT(map_opt, xt_family(par), info->map_set.dim,
                info->map_set.flags, 0, UINT_MAX);
 
        /* Normalize to fit into jiffies */
@@ -453,14 +455,14 @@ set_target_v3(struct sk_buff *skb, const struct xt_action_param *par)
                if (!ret)
                        return XT_CONTINUE;
                if (map_opt.cmdflags & IPSET_FLAG_MAP_SKBMARK)
-                       skb->mark = (skb->mark & ~(map_opt.ext.skbmarkmask))
-                                   ^ (map_opt.ext.skbmark);
+                       skb->mark = (skb->mark & ~MOPT(map_opt,skbmarkmask))
+                                   ^ MOPT(map_opt, skbmark);
                if (map_opt.cmdflags & IPSET_FLAG_MAP_SKBPRIO)
-                       skb->priority = map_opt.ext.skbprio;
+                       skb->priority = MOPT(map_opt, skbprio);
                if ((map_opt.cmdflags & IPSET_FLAG_MAP_SKBQUEUE) &&
                    skb->dev &&
-                   skb->dev->real_num_tx_queues > map_opt.ext.skbqueue)
-                       skb_set_queue_mapping(skb, map_opt.ext.skbqueue);
+                   skb->dev->real_num_tx_queues > MOPT(map_opt, skbqueue))
+                       skb_set_queue_mapping(skb, MOPT(map_opt, skbqueue));
        }
        return XT_CONTINUE;
 }
index b10ade272b50941fbdb6d730944db6dc334c8c29..2198914707f58a0248fe0ff5df83294fb8b7d7a9 100644 (file)
 #include <net/netfilter/ipv4/nf_defrag_ipv4.h>
 
 #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
-#define XT_SOCKET_HAVE_IPV6 1
 #include <linux/netfilter_ipv6/ip6_tables.h>
 #include <net/inet6_hashtables.h>
 #include <net/netfilter/ipv6/nf_defrag_ipv6.h>
 #endif
 
+#include <net/netfilter/nf_socket.h>
 #include <linux/netfilter/xt_socket.h>
 
-#if IS_ENABLED(CONFIG_NF_CONNTRACK)
-#define XT_SOCKET_HAVE_CONNTRACK 1
-#include <net/netfilter/nf_conntrack.h>
-#endif
-
-static int
-extract_icmp4_fields(const struct sk_buff *skb,
-                   u8 *protocol,
-                   __be32 *raddr,
-                   __be32 *laddr,
-                   __be16 *rport,
-                   __be16 *lport)
-{
-       unsigned int outside_hdrlen = ip_hdrlen(skb);
-       struct iphdr *inside_iph, _inside_iph;
-       struct icmphdr *icmph, _icmph;
-       __be16 *ports, _ports[2];
-
-       icmph = skb_header_pointer(skb, outside_hdrlen,
-                                  sizeof(_icmph), &_icmph);
-       if (icmph == NULL)
-               return 1;
-
-       switch (icmph->type) {
-       case ICMP_DEST_UNREACH:
-       case ICMP_SOURCE_QUENCH:
-       case ICMP_REDIRECT:
-       case ICMP_TIME_EXCEEDED:
-       case ICMP_PARAMETERPROB:
-               break;
-       default:
-               return 1;
-       }
-
-       inside_iph = skb_header_pointer(skb, outside_hdrlen +
-                                       sizeof(struct icmphdr),
-                                       sizeof(_inside_iph), &_inside_iph);
-       if (inside_iph == NULL)
-               return 1;
-
-       if (inside_iph->protocol != IPPROTO_TCP &&
-           inside_iph->protocol != IPPROTO_UDP)
-               return 1;
-
-       ports = skb_header_pointer(skb, outside_hdrlen +
-                                  sizeof(struct icmphdr) +
-                                  (inside_iph->ihl << 2),
-                                  sizeof(_ports), &_ports);
-       if (ports == NULL)
-               return 1;
-
-       /* the inside IP packet is the one quoted from our side, thus
-        * its saddr is the local address */
-       *protocol = inside_iph->protocol;
-       *laddr = inside_iph->saddr;
-       *lport = ports[0];
-       *raddr = inside_iph->daddr;
-       *rport = ports[1];
-
-       return 0;
-}
-
 /* "socket" match based redirection (no specific rule)
  * ===================================================
  *
@@ -111,104 +49,6 @@ extract_icmp4_fields(const struct sk_buff *skb,
  *     then local services could intercept traffic going through the
  *     box.
  */
-static struct sock *
-xt_socket_get_sock_v4(struct net *net, struct sk_buff *skb, const int doff,
-                     const u8 protocol,
-                     const __be32 saddr, const __be32 daddr,
-                     const __be16 sport, const __be16 dport,
-                     const struct net_device *in)
-{
-       switch (protocol) {
-       case IPPROTO_TCP:
-               return inet_lookup(net, &tcp_hashinfo, skb, doff,
-                                  saddr, sport, daddr, dport,
-                                  in->ifindex);
-       case IPPROTO_UDP:
-               return udp4_lib_lookup(net, saddr, sport, daddr, dport,
-                                      in->ifindex);
-       }
-       return NULL;
-}
-
-static bool xt_socket_sk_is_transparent(struct sock *sk)
-{
-       switch (sk->sk_state) {
-       case TCP_TIME_WAIT:
-               return inet_twsk(sk)->tw_transparent;
-
-       case TCP_NEW_SYN_RECV:
-               return inet_rsk(inet_reqsk(sk))->no_srccheck;
-
-       default:
-               return inet_sk(sk)->transparent;
-       }
-}
-
-static struct sock *xt_socket_lookup_slow_v4(struct net *net,
-                                            const struct sk_buff *skb,
-                                            const struct net_device *indev)
-{
-       const struct iphdr *iph = ip_hdr(skb);
-       struct sk_buff *data_skb = NULL;
-       int doff = 0;
-       __be32 uninitialized_var(daddr), uninitialized_var(saddr);
-       __be16 uninitialized_var(dport), uninitialized_var(sport);
-       u8 uninitialized_var(protocol);
-#ifdef XT_SOCKET_HAVE_CONNTRACK
-       struct nf_conn const *ct;
-       enum ip_conntrack_info ctinfo;
-#endif
-
-       if (iph->protocol == IPPROTO_UDP || iph->protocol == IPPROTO_TCP) {
-               struct udphdr _hdr, *hp;
-
-               hp = skb_header_pointer(skb, ip_hdrlen(skb),
-                                       sizeof(_hdr), &_hdr);
-               if (hp == NULL)
-                       return NULL;
-
-               protocol = iph->protocol;
-               saddr = iph->saddr;
-               sport = hp->source;
-               daddr = iph->daddr;
-               dport = hp->dest;
-               data_skb = (struct sk_buff *)skb;
-               doff = iph->protocol == IPPROTO_TCP ?
-                       ip_hdrlen(skb) + __tcp_hdrlen((struct tcphdr *)hp) :
-                       ip_hdrlen(skb) + sizeof(*hp);
-
-       } else if (iph->protocol == IPPROTO_ICMP) {
-               if (extract_icmp4_fields(skb, &protocol, &saddr, &daddr,
-                                        &sport, &dport))
-                       return NULL;
-       } else {
-               return NULL;
-       }
-
-#ifdef XT_SOCKET_HAVE_CONNTRACK
-       /* Do the lookup with the original socket address in
-        * case this is a reply packet of an established
-        * SNAT-ted connection.
-        */
-       ct = nf_ct_get(skb, &ctinfo);
-       if (ct && !nf_ct_is_untracked(ct) &&
-           ((iph->protocol != IPPROTO_ICMP &&
-             ctinfo == IP_CT_ESTABLISHED_REPLY) ||
-            (iph->protocol == IPPROTO_ICMP &&
-             ctinfo == IP_CT_RELATED_REPLY)) &&
-           (ct->status & IPS_SRC_NAT_DONE)) {
-
-               daddr = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip;
-               dport = (iph->protocol == IPPROTO_TCP) ?
-                       ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.tcp.port :
-                       ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port;
-       }
-#endif
-
-       return xt_socket_get_sock_v4(net, data_skb, doff, protocol, saddr,
-                                    daddr, sport, dport, indev);
-}
-
 static bool
 socket_match(const struct sk_buff *skb, struct xt_action_param *par,
             const struct xt_socket_mtinfo1 *info)
@@ -217,7 +57,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
        struct sock *sk = skb->sk;
 
        if (!sk)
-               sk = xt_socket_lookup_slow_v4(par->net, skb, par->in);
+               sk = nf_sk_lookup_slow_v4(xt_net(par), skb, xt_in(par));
        if (sk) {
                bool wildcard;
                bool transparent = true;
@@ -233,7 +73,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
                 * if XT_SOCKET_TRANSPARENT is used
                 */
                if (info->flags & XT_SOCKET_TRANSPARENT)
-                       transparent = xt_socket_sk_is_transparent(sk);
+                       transparent = nf_sk_is_transparent(sk);
 
                if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard &&
                    transparent)
@@ -265,132 +105,7 @@ socket_mt4_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par)
        return socket_match(skb, par, par->matchinfo);
 }
 
-#ifdef XT_SOCKET_HAVE_IPV6
-
-static int
-extract_icmp6_fields(const struct sk_buff *skb,
-                    unsigned int outside_hdrlen,
-                    int *protocol,
-                    const struct in6_addr **raddr,
-                    const struct in6_addr **laddr,
-                    __be16 *rport,
-                    __be16 *lport,
-                    struct ipv6hdr *ipv6_var)
-{
-       const struct ipv6hdr *inside_iph;
-       struct icmp6hdr *icmph, _icmph;
-       __be16 *ports, _ports[2];
-       u8 inside_nexthdr;
-       __be16 inside_fragoff;
-       int inside_hdrlen;
-
-       icmph = skb_header_pointer(skb, outside_hdrlen,
-                                  sizeof(_icmph), &_icmph);
-       if (icmph == NULL)
-               return 1;
-
-       if (icmph->icmp6_type & ICMPV6_INFOMSG_MASK)
-               return 1;
-
-       inside_iph = skb_header_pointer(skb, outside_hdrlen + sizeof(_icmph),
-                                       sizeof(*ipv6_var), ipv6_var);
-       if (inside_iph == NULL)
-               return 1;
-       inside_nexthdr = inside_iph->nexthdr;
-
-       inside_hdrlen = ipv6_skip_exthdr(skb, outside_hdrlen + sizeof(_icmph) +
-                                             sizeof(*ipv6_var),
-                                        &inside_nexthdr, &inside_fragoff);
-       if (inside_hdrlen < 0)
-               return 1; /* hjm: Packet has no/incomplete transport layer headers. */
-
-       if (inside_nexthdr != IPPROTO_TCP &&
-           inside_nexthdr != IPPROTO_UDP)
-               return 1;
-
-       ports = skb_header_pointer(skb, inside_hdrlen,
-                                  sizeof(_ports), &_ports);
-       if (ports == NULL)
-               return 1;
-
-       /* the inside IP packet is the one quoted from our side, thus
-        * its saddr is the local address */
-       *protocol = inside_nexthdr;
-       *laddr = &inside_iph->saddr;
-       *lport = ports[0];
-       *raddr = &inside_iph->daddr;
-       *rport = ports[1];
-
-       return 0;
-}
-
-static struct sock *
-xt_socket_get_sock_v6(struct net *net, struct sk_buff *skb, int doff,
-                     const u8 protocol,
-                     const struct in6_addr *saddr, const struct in6_addr *daddr,
-                     const __be16 sport, const __be16 dport,
-                     const struct net_device *in)
-{
-       switch (protocol) {
-       case IPPROTO_TCP:
-               return inet6_lookup(net, &tcp_hashinfo, skb, doff,
-                                   saddr, sport, daddr, dport,
-                                   in->ifindex);
-       case IPPROTO_UDP:
-               return udp6_lib_lookup(net, saddr, sport, daddr, dport,
-                                      in->ifindex);
-       }
-
-       return NULL;
-}
-
-static struct sock *xt_socket_lookup_slow_v6(struct net *net,
-                                            const struct sk_buff *skb,
-                                            const struct net_device *indev)
-{
-       __be16 uninitialized_var(dport), uninitialized_var(sport);
-       const struct in6_addr *daddr = NULL, *saddr = NULL;
-       struct ipv6hdr *iph = ipv6_hdr(skb);
-       struct sk_buff *data_skb = NULL;
-       int doff = 0;
-       int thoff = 0, tproto;
-
-       tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL);
-       if (tproto < 0) {
-               pr_debug("unable to find transport header in IPv6 packet, dropping\n");
-               return NULL;
-       }
-
-       if (tproto == IPPROTO_UDP || tproto == IPPROTO_TCP) {
-               struct udphdr _hdr, *hp;
-
-               hp = skb_header_pointer(skb, thoff, sizeof(_hdr), &_hdr);
-               if (hp == NULL)
-                       return NULL;
-
-               saddr = &iph->saddr;
-               sport = hp->source;
-               daddr = &iph->daddr;
-               dport = hp->dest;
-               data_skb = (struct sk_buff *)skb;
-               doff = tproto == IPPROTO_TCP ?
-                       thoff + __tcp_hdrlen((struct tcphdr *)hp) :
-                       thoff + sizeof(*hp);
-
-       } else if (tproto == IPPROTO_ICMPV6) {
-               struct ipv6hdr ipv6_var;
-
-               if (extract_icmp6_fields(skb, thoff, &tproto, &saddr, &daddr,
-                                        &sport, &dport, &ipv6_var))
-                       return NULL;
-       } else {
-               return NULL;
-       }
-
-       return xt_socket_get_sock_v6(net, data_skb, doff, tproto, saddr, daddr,
-                                    sport, dport, indev);
-}
-
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
 static bool
 socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par)
 {
@@ -399,7 +114,7 @@ socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par)
        struct sock *sk = skb->sk;
 
        if (!sk)
-               sk = xt_socket_lookup_slow_v6(par->net, skb, par->in);
+               sk = nf_sk_lookup_slow_v6(xt_net(par), skb, xt_in(par));
        if (sk) {
                bool wildcard;
                bool transparent = true;
@@ -415,7 +130,7 @@ socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par)
                 * if XT_SOCKET_TRANSPARENT is used
                 */
                if (info->flags & XT_SOCKET_TRANSPARENT)
-                       transparent = xt_socket_sk_is_transparent(sk);
+                       transparent = nf_sk_is_transparent(sk);
 
                if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard &&
                    transparent)
@@ -488,7 +203,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = {
                                  (1 << NF_INET_LOCAL_IN),
                .me             = THIS_MODULE,
        },
-#ifdef XT_SOCKET_HAVE_IPV6
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
        {
                .name           = "socket",
                .revision       = 1,
@@ -512,7 +227,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = {
                                  (1 << NF_INET_LOCAL_IN),
                .me             = THIS_MODULE,
        },
-#ifdef XT_SOCKET_HAVE_IPV6
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
        {
                .name           = "socket",
                .revision       = 2,
@@ -536,7 +251,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = {
                                  (1 << NF_INET_LOCAL_IN),
                .me             = THIS_MODULE,
        },
-#ifdef XT_SOCKET_HAVE_IPV6
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
        {
                .name           = "socket",
                .revision       = 3,
@@ -554,7 +269,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = {
 static int __init socket_mt_init(void)
 {
        nf_defrag_ipv4_enable();
-#ifdef XT_SOCKET_HAVE_IPV6
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
        nf_defrag_ipv6_enable();
 #endif
 
index 2ec93c5e77bb07ce82ccb4d1ff8418660960d06c..d177dd0665043652c199605479959ea408145a4b 100644 (file)
@@ -60,13 +60,7 @@ struct netlbl_domhsh_walk_arg {
 };
 
 /* NetLabel Generic NETLINK CALIPSO family */
-static struct genl_family netlbl_calipso_gnl_family = {
-       .id = GENL_ID_GENERATE,
-       .hdrsize = 0,
-       .name = NETLBL_NLTYPE_CALIPSO_NAME,
-       .version = NETLBL_PROTO_VERSION,
-       .maxattr = NLBL_CALIPSO_A_MAX,
-};
+static struct genl_family netlbl_calipso_gnl_family;
 
 /* NetLabel Netlink attribute policy */
 static const struct nla_policy calipso_genl_policy[NLBL_CALIPSO_A_MAX + 1] = {
@@ -355,6 +349,16 @@ static const struct genl_ops netlbl_calipso_ops[] = {
        },
 };
 
+static struct genl_family netlbl_calipso_gnl_family __ro_after_init = {
+       .hdrsize = 0,
+       .name = NETLBL_NLTYPE_CALIPSO_NAME,
+       .version = NETLBL_PROTO_VERSION,
+       .maxattr = NLBL_CALIPSO_A_MAX,
+       .module = THIS_MODULE,
+       .ops = netlbl_calipso_ops,
+       .n_ops = ARRAY_SIZE(netlbl_calipso_ops),
+};
+
 /* NetLabel Generic NETLINK Protocol Functions
  */
 
@@ -368,8 +372,7 @@ static const struct genl_ops netlbl_calipso_ops[] = {
  */
 int __init netlbl_calipso_genl_init(void)
 {
-       return genl_register_family_with_ops(&netlbl_calipso_gnl_family,
-                                            netlbl_calipso_ops);
+       return genl_register_family(&netlbl_calipso_gnl_family);
 }
 
 static const struct netlbl_calipso_ops *calipso_ops;
index 7fd1104ba9007ec0d731d97d0a245eb3da7b4436..4149d3e6358976f093dbcb25dee79d10f6275231 100644 (file)
@@ -59,14 +59,7 @@ struct netlbl_domhsh_walk_arg {
 };
 
 /* NetLabel Generic NETLINK CIPSOv4 family */
-static struct genl_family netlbl_cipsov4_gnl_family = {
-       .id = GENL_ID_GENERATE,
-       .hdrsize = 0,
-       .name = NETLBL_NLTYPE_CIPSOV4_NAME,
-       .version = NETLBL_PROTO_VERSION,
-       .maxattr = NLBL_CIPSOV4_A_MAX,
-};
-
+static struct genl_family netlbl_cipsov4_gnl_family;
 /* NetLabel Netlink attribute policy */
 static const struct nla_policy netlbl_cipsov4_genl_policy[NLBL_CIPSOV4_A_MAX + 1] = {
        [NLBL_CIPSOV4_A_DOI] = { .type = NLA_U32 },
@@ -767,6 +760,16 @@ static const struct genl_ops netlbl_cipsov4_ops[] = {
        },
 };
 
+static struct genl_family netlbl_cipsov4_gnl_family __ro_after_init = {
+       .hdrsize = 0,
+       .name = NETLBL_NLTYPE_CIPSOV4_NAME,
+       .version = NETLBL_PROTO_VERSION,
+       .maxattr = NLBL_CIPSOV4_A_MAX,
+       .module = THIS_MODULE,
+       .ops = netlbl_cipsov4_ops,
+       .n_ops = ARRAY_SIZE(netlbl_cipsov4_ops),
+};
+
 /*
  * NetLabel Generic NETLINK Protocol Functions
  */
@@ -781,6 +784,5 @@ static const struct genl_ops netlbl_cipsov4_ops[] = {
  */
 int __init netlbl_cipsov4_genl_init(void)
 {
-       return genl_register_family_with_ops(&netlbl_cipsov4_gnl_family,
-                                            netlbl_cipsov4_ops);
+       return genl_register_family(&netlbl_cipsov4_gnl_family);
 }
index f85d0e07af2dd9c7f4f4fb2f6b05fcf99b21c8eb..21e0095b1d1422aff42e068490819fd3ac540c58 100644 (file)
@@ -60,13 +60,7 @@ struct netlbl_domhsh_walk_arg {
 };
 
 /* NetLabel Generic NETLINK CIPSOv4 family */
-static struct genl_family netlbl_mgmt_gnl_family = {
-       .id = GENL_ID_GENERATE,
-       .hdrsize = 0,
-       .name = NETLBL_NLTYPE_MGMT_NAME,
-       .version = NETLBL_PROTO_VERSION,
-       .maxattr = NLBL_MGMT_A_MAX,
-};
+static struct genl_family netlbl_mgmt_gnl_family;
 
 /* NetLabel Netlink attribute policy */
 static const struct nla_policy netlbl_mgmt_genl_policy[NLBL_MGMT_A_MAX + 1] = {
@@ -834,6 +828,16 @@ static const struct genl_ops netlbl_mgmt_genl_ops[] = {
        },
 };
 
+static struct genl_family netlbl_mgmt_gnl_family __ro_after_init = {
+       .hdrsize = 0,
+       .name = NETLBL_NLTYPE_MGMT_NAME,
+       .version = NETLBL_PROTO_VERSION,
+       .maxattr = NLBL_MGMT_A_MAX,
+       .module = THIS_MODULE,
+       .ops = netlbl_mgmt_genl_ops,
+       .n_ops = ARRAY_SIZE(netlbl_mgmt_genl_ops),
+};
+
 /*
  * NetLabel Generic NETLINK Protocol Functions
  */
@@ -848,6 +852,5 @@ static const struct genl_ops netlbl_mgmt_genl_ops[] = {
  */
 int __init netlbl_mgmt_genl_init(void)
 {
-       return genl_register_family_with_ops(&netlbl_mgmt_gnl_family,
-                                            netlbl_mgmt_genl_ops);
+       return genl_register_family(&netlbl_mgmt_gnl_family);
 }
index 4528cff9138b5499f7577bf2801dac8f85e0fcda..22dc1b9d63625e76c72bdb80b97b1d07540fac32 100644 (file)
@@ -123,13 +123,7 @@ static struct netlbl_unlhsh_iface __rcu *netlbl_unlhsh_def;
 static u8 netlabel_unlabel_acceptflg;
 
 /* NetLabel Generic NETLINK unlabeled family */
-static struct genl_family netlbl_unlabel_gnl_family = {
-       .id = GENL_ID_GENERATE,
-       .hdrsize = 0,
-       .name = NETLBL_NLTYPE_UNLABELED_NAME,
-       .version = NETLBL_PROTO_VERSION,
-       .maxattr = NLBL_UNLABEL_A_MAX,
-};
+static struct genl_family netlbl_unlabel_gnl_family;
 
 /* NetLabel Netlink attribute policy */
 static const struct nla_policy netlbl_unlabel_genl_policy[NLBL_UNLABEL_A_MAX + 1] = {
@@ -1378,6 +1372,16 @@ static const struct genl_ops netlbl_unlabel_genl_ops[] = {
        },
 };
 
+static struct genl_family netlbl_unlabel_gnl_family __ro_after_init = {
+       .hdrsize = 0,
+       .name = NETLBL_NLTYPE_UNLABELED_NAME,
+       .version = NETLBL_PROTO_VERSION,
+       .maxattr = NLBL_UNLABEL_A_MAX,
+       .module = THIS_MODULE,
+       .ops = netlbl_unlabel_genl_ops,
+       .n_ops = ARRAY_SIZE(netlbl_unlabel_genl_ops),
+};
+
 /*
  * NetLabel Generic NETLINK Protocol Functions
  */
@@ -1392,8 +1396,7 @@ static const struct genl_ops netlbl_unlabel_genl_ops[] = {
  */
 int __init netlbl_unlabel_genl_init(void)
 {
-       return genl_register_family_with_ops(&netlbl_unlabel_gnl_family,
-                                            netlbl_unlabel_genl_ops);
+       return genl_register_family(&netlbl_unlabel_gnl_family);
 }
 
 /*
index b2f0e986a6f49e79d58e9706b7c822a1f11073bb..a5546249fb1022b52144a40717b8a4268755b972 100644 (file)
@@ -178,11 +178,8 @@ static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
                }
                cb->args[1] = i;
        } else {
-               if (req->sdiag_protocol >= MAX_LINKS) {
-                       read_unlock(&nl_table_lock);
-                       rcu_read_unlock();
+               if (req->sdiag_protocol >= MAX_LINKS)
                        return -ENOENT;
-               }
 
                err = __netlink_diag_dump(skb, cb, req->sdiag_protocol, s_num);
        }
index 23cc12639ba769ac67714f5e5f5f9549a4417c38..fb6e10fdb2174320c96608aea63d3c484d3625a0 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/mutex.h>
 #include <linux/bitmap.h>
 #include <linux/rwsem.h>
+#include <linux/idr.h>
 #include <net/sock.h>
 #include <net/genetlink.h>
 
@@ -58,10 +59,8 @@ static void genl_unlock_all(void)
        up_write(&cb_lock);
 }
 
-#define GENL_FAM_TAB_SIZE      16
-#define GENL_FAM_TAB_MASK      (GENL_FAM_TAB_SIZE - 1)
+static DEFINE_IDR(genl_fam_idr);
 
-static struct list_head family_ht[GENL_FAM_TAB_SIZE];
 /*
  * Bitmap of multicast groups that are currently in use.
  *
@@ -86,45 +85,29 @@ static unsigned long mc_group_start = 0x3 | BIT(GENL_ID_CTRL) |
 static unsigned long *mc_groups = &mc_group_start;
 static unsigned long mc_groups_longs = 1;
 
-static int genl_ctrl_event(int event, struct genl_family *family,
+static int genl_ctrl_event(int event, const struct genl_family *family,
                           const struct genl_multicast_group *grp,
                           int grp_id);
 
-static inline unsigned int genl_family_hash(unsigned int id)
+static const struct genl_family *genl_family_find_byid(unsigned int id)
 {
-       return id & GENL_FAM_TAB_MASK;
+       return idr_find(&genl_fam_idr, id);
 }
 
-static inline struct list_head *genl_family_chain(unsigned int id)
+static const struct genl_family *genl_family_find_byname(char *name)
 {
-       return &family_ht[genl_family_hash(id)];
-}
-
-static struct genl_family *genl_family_find_byid(unsigned int id)
-{
-       struct genl_family *f;
-
-       list_for_each_entry(f, genl_family_chain(id), family_list)
-               if (f->id == id)
-                       return f;
-
-       return NULL;
-}
-
-static struct genl_family *genl_family_find_byname(char *name)
-{
-       struct genl_family *f;
-       int i;
+       const struct genl_family *family;
+       unsigned int id;
 
-       for (i = 0; i < GENL_FAM_TAB_SIZE; i++)
-               list_for_each_entry(f, genl_family_chain(i), family_list)
-                       if (strcmp(f->name, name) == 0)
-                               return f;
+       idr_for_each_entry(&genl_fam_idr, family, id)
+               if (strcmp(family->name, name) == 0)
+                       return family;
 
        return NULL;
 }
 
-static const struct genl_ops *genl_get_cmd(u8 cmd, struct genl_family *family)
+static const struct genl_ops *genl_get_cmd(u8 cmd,
+                                          const struct genl_family *family)
 {
        int i;
 
@@ -135,26 +118,6 @@ static const struct genl_ops *genl_get_cmd(u8 cmd, struct genl_family *family)
        return NULL;
 }
 
-/* Of course we are going to have problems once we hit
- * 2^16 alive types, but that can only happen by year 2K
-*/
-static u16 genl_generate_id(void)
-{
-       static u16 id_gen_idx = GENL_MIN_ID;
-       int i;
-
-       for (i = 0; i <= GENL_MAX_ID - GENL_MIN_ID; i++) {
-               if (id_gen_idx != GENL_ID_VFS_DQUOT &&
-                   id_gen_idx != GENL_ID_PMCRAID &&
-                   !genl_family_find_byid(id_gen_idx))
-                       return id_gen_idx;
-               if (++id_gen_idx > GENL_MAX_ID)
-                       id_gen_idx = GENL_MIN_ID;
-       }
-
-       return 0;
-}
-
 static int genl_allocate_reserve_groups(int n_groups, int *first_id)
 {
        unsigned long *new_groups;
@@ -295,7 +258,7 @@ static int genl_validate_assign_mc_groups(struct genl_family *family)
        return err;
 }
 
-static void genl_unregister_mc_groups(struct genl_family *family)
+static void genl_unregister_mc_groups(const struct genl_family *family)
 {
        struct net *net;
        int i;
@@ -344,28 +307,21 @@ static int genl_validate_ops(const struct genl_family *family)
 }
 
 /**
- * __genl_register_family - register a generic netlink family
+ * genl_register_family - register a generic netlink family
  * @family: generic netlink family
  *
  * Registers the specified family after validating it first. Only one
  * family may be registered with the same family name or identifier.
- * The family id may equal GENL_ID_GENERATE causing an unique id to
- * be automatically generated and assigned.
  *
- * The family's ops array must already be assigned, you can use the
- * genl_register_family_with_ops() helper function.
+ * The family's ops, multicast groups and module pointer must already
+ * be assigned.
  *
  * Return 0 on success or a negative error code.
  */
-int __genl_register_family(struct genl_family *family)
+int genl_register_family(struct genl_family *family)
 {
-       int err = -EINVAL, i;
-
-       if (family->id && family->id < GENL_MIN_ID)
-               goto errout;
-
-       if (family->id > GENL_MAX_ID)
-               goto errout;
+       int err, i;
+       int start = GENL_START_ALLOC, end = GENL_MAX_ID;
 
        err = genl_validate_ops(family);
        if (err)
@@ -378,18 +334,20 @@ int __genl_register_family(struct genl_family *family)
                goto errout_locked;
        }
 
-       if (family->id == GENL_ID_GENERATE) {
-               u16 newid = genl_generate_id();
-
-               if (!newid) {
-                       err = -ENOMEM;
-                       goto errout_locked;
-               }
-
-               family->id = newid;
-       } else if (genl_family_find_byid(family->id)) {
-               err = -EEXIST;
-               goto errout_locked;
+       /*
+        * Sadly, a few cases need to be special-cased
+        * due to them having previously abused the API
+        * and having used their family ID also as their
+        * multicast group ID, so we use reserved IDs
+        * for both to be sure we can do that mapping.
+        */
+       if (family == &genl_ctrl) {
+               /* and this needs to be special for initial family lookups */
+               start = end = GENL_ID_CTRL;
+       } else if (strcmp(family->name, "pmcraid") == 0) {
+               start = end = GENL_ID_PMCRAID;
+       } else if (strcmp(family->name, "VFS_DQUOT") == 0) {
+               start = end = GENL_ID_VFS_DQUOT;
        }
 
        if (family->maxattr && !family->parallel_ops) {
@@ -402,11 +360,17 @@ int __genl_register_family(struct genl_family *family)
        } else
                family->attrbuf = NULL;
 
+       family->id = idr_alloc(&genl_fam_idr, family,
+                              start, end + 1, GFP_KERNEL);
+       if (family->id < 0) {
+               err = family->id;
+               goto errout_locked;
+       }
+
        err = genl_validate_assign_mc_groups(family);
        if (err)
-               goto errout_locked;
+               goto errout_remove;
 
-       list_add_tail(&family->family_list, genl_family_chain(family->id));
        genl_unlock_all();
 
        /* send all events */
@@ -417,12 +381,14 @@ int __genl_register_family(struct genl_family *family)
 
        return 0;
 
+errout_remove:
+       idr_remove(&genl_fam_idr, family->id);
+       kfree(family->attrbuf);
 errout_locked:
        genl_unlock_all();
-errout:
        return err;
 }
-EXPORT_SYMBOL(__genl_register_family);
+EXPORT_SYMBOL(genl_register_family);
 
 /**
  * genl_unregister_family - unregister generic netlink family
@@ -432,33 +398,29 @@ EXPORT_SYMBOL(__genl_register_family);
  *
  * Returns 0 on success or a negative error code.
  */
-int genl_unregister_family(struct genl_family *family)
+int genl_unregister_family(const struct genl_family *family)
 {
-       struct genl_family *rc;
-
        genl_lock_all();
 
-       list_for_each_entry(rc, genl_family_chain(family->id), family_list) {
-               if (family->id != rc->id || strcmp(rc->name, family->name))
-                       continue;
+       if (!genl_family_find_byid(family->id)) {
+               genl_unlock_all();
+               return -ENOENT;
+       }
 
-               genl_unregister_mc_groups(family);
+       genl_unregister_mc_groups(family);
 
-               list_del(&rc->family_list);
-               family->n_ops = 0;
-               up_write(&cb_lock);
-               wait_event(genl_sk_destructing_waitq,
-                          atomic_read(&genl_sk_destructing_cnt) == 0);
-               genl_unlock();
+       idr_remove(&genl_fam_idr, family->id);
 
-               kfree(family->attrbuf);
-               genl_ctrl_event(CTRL_CMD_DELFAMILY, family, NULL, 0);
-               return 0;
-       }
+       up_write(&cb_lock);
+       wait_event(genl_sk_destructing_waitq,
+                  atomic_read(&genl_sk_destructing_cnt) == 0);
+       genl_unlock();
 
-       genl_unlock_all();
+       kfree(family->attrbuf);
+
+       genl_ctrl_event(CTRL_CMD_DELFAMILY, family, NULL, 0);
 
-       return -ENOENT;
+       return 0;
 }
 EXPORT_SYMBOL(genl_unregister_family);
 
@@ -474,7 +436,7 @@ EXPORT_SYMBOL(genl_unregister_family);
  * Returns pointer to user specific header
  */
 void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq,
-                               struct genl_family *family, int flags, u8 cmd)
+                 const struct genl_family *family, int flags, u8 cmd)
 {
        struct nlmsghdr *nlh;
        struct genlmsghdr *hdr;
@@ -533,7 +495,7 @@ static int genl_lock_done(struct netlink_callback *cb)
        return rc;
 }
 
-static int genl_family_rcv_msg(struct genl_family *family,
+static int genl_family_rcv_msg(const struct genl_family *family,
                               struct sk_buff *skb,
                               struct nlmsghdr *nlh)
 {
@@ -645,7 +607,7 @@ out:
 
 static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
-       struct genl_family *family;
+       const struct genl_family *family;
        int err;
 
        family = genl_family_find_byid(nlh->nlmsg_type);
@@ -674,15 +636,9 @@ static void genl_rcv(struct sk_buff *skb)
  * Controller
  **************************************************************************/
 
-static struct genl_family genl_ctrl = {
-       .id = GENL_ID_CTRL,
-       .name = "nlctrl",
-       .version = 0x2,
-       .maxattr = CTRL_ATTR_MAX,
-       .netnsok = true,
-};
+static struct genl_family genl_ctrl;
 
-static int ctrl_fill_info(struct genl_family *family, u32 portid, u32 seq,
+static int ctrl_fill_info(const struct genl_family *family, u32 portid, u32 seq,
                          u32 flags, struct sk_buff *skb, u8 cmd)
 {
        void *hdr;
@@ -769,7 +725,7 @@ nla_put_failure:
        return -EMSGSIZE;
 }
 
-static int ctrl_fill_mcgrp_info(struct genl_family *family,
+static int ctrl_fill_mcgrp_info(const struct genl_family *family,
                                const struct genl_multicast_group *grp,
                                int grp_id, u32 portid, u32 seq, u32 flags,
                                struct sk_buff *skb, u8 cmd)
@@ -812,37 +768,30 @@ nla_put_failure:
 
 static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb)
 {
-
-       int i, n = 0;
+       int n = 0;
        struct genl_family *rt;
        struct net *net = sock_net(skb->sk);
-       int chains_to_skip = cb->args[0];
-       int fams_to_skip = cb->args[1];
-
-       for (i = chains_to_skip; i < GENL_FAM_TAB_SIZE; i++) {
-               n = 0;
-               list_for_each_entry(rt, genl_family_chain(i), family_list) {
-                       if (!rt->netnsok && !net_eq(net, &init_net))
-                               continue;
-                       if (++n < fams_to_skip)
-                               continue;
-                       if (ctrl_fill_info(rt, NETLINK_CB(cb->skb).portid,
-                                          cb->nlh->nlmsg_seq, NLM_F_MULTI,
-                                          skb, CTRL_CMD_NEWFAMILY) < 0)
-                               goto errout;
-               }
+       int fams_to_skip = cb->args[0];
+       unsigned int id;
 
-               fams_to_skip = 0;
-       }
+       idr_for_each_entry(&genl_fam_idr, rt, id) {
+               if (!rt->netnsok && !net_eq(net, &init_net))
+                       continue;
+
+               if (n++ < fams_to_skip)
+                       continue;
 
-errout:
-       cb->args[0] = i;
-       cb->args[1] = n;
+               if (ctrl_fill_info(rt, NETLINK_CB(cb->skb).portid,
+                                  cb->nlh->nlmsg_seq, NLM_F_MULTI,
+                                  skb, CTRL_CMD_NEWFAMILY) < 0)
+                       break;
+       }
 
+       cb->args[0] = n;
        return skb->len;
 }
 
-static struct sk_buff *ctrl_build_family_msg(struct genl_family *family,
+static struct sk_buff *ctrl_build_family_msg(const struct genl_family *family,
                                             u32 portid, int seq, u8 cmd)
 {
        struct sk_buff *skb;
@@ -862,7 +811,7 @@ static struct sk_buff *ctrl_build_family_msg(struct genl_family *family,
 }
 
 static struct sk_buff *
-ctrl_build_mcgrp_msg(struct genl_family *family,
+ctrl_build_mcgrp_msg(const struct genl_family *family,
                     const struct genl_multicast_group *grp,
                     int grp_id, u32 portid, int seq, u8 cmd)
 {
@@ -892,7 +841,7 @@ static const struct nla_policy ctrl_policy[CTRL_ATTR_MAX+1] = {
 static int ctrl_getfamily(struct sk_buff *skb, struct genl_info *info)
 {
        struct sk_buff *msg;
-       struct genl_family *res = NULL;
+       const struct genl_family *res = NULL;
        int err = -EINVAL;
 
        if (info->attrs[CTRL_ATTR_FAMILY_ID]) {
@@ -936,7 +885,7 @@ static int ctrl_getfamily(struct sk_buff *skb, struct genl_info *info)
        return genlmsg_reply(msg, info);
 }
 
-static int genl_ctrl_event(int event, struct genl_family *family,
+static int genl_ctrl_event(int event, const struct genl_family *family,
                           const struct genl_multicast_group *grp,
                           int grp_id)
 {
@@ -990,27 +939,39 @@ static const struct genl_multicast_group genl_ctrl_groups[] = {
        { .name = "notify", },
 };
 
+static struct genl_family genl_ctrl __ro_after_init = {
+       .module = THIS_MODULE,
+       .ops = genl_ctrl_ops,
+       .n_ops = ARRAY_SIZE(genl_ctrl_ops),
+       .mcgrps = genl_ctrl_groups,
+       .n_mcgrps = ARRAY_SIZE(genl_ctrl_groups),
+       .id = GENL_ID_CTRL,
+       .name = "nlctrl",
+       .version = 0x2,
+       .maxattr = CTRL_ATTR_MAX,
+       .netnsok = true,
+};
+
 static int genl_bind(struct net *net, int group)
 {
-       int i, err = -ENOENT;
+       struct genl_family *f;
+       int err = -ENOENT;
+       unsigned int id;
 
        down_read(&cb_lock);
-       for (i = 0; i < GENL_FAM_TAB_SIZE; i++) {
-               struct genl_family *f;
-
-               list_for_each_entry(f, genl_family_chain(i), family_list) {
-                       if (group >= f->mcgrp_offset &&
-                           group < f->mcgrp_offset + f->n_mcgrps) {
-                               int fam_grp = group - f->mcgrp_offset;
-
-                               if (!f->netnsok && net != &init_net)
-                                       err = -ENOENT;
-                               else if (f->mcast_bind)
-                                       err = f->mcast_bind(net, fam_grp);
-                               else
-                                       err = 0;
-                               break;
-                       }
+
+       idr_for_each_entry(&genl_fam_idr, f, id) {
+               if (group >= f->mcgrp_offset &&
+                   group < f->mcgrp_offset + f->n_mcgrps) {
+                       int fam_grp = group - f->mcgrp_offset;
+
+                       if (!f->netnsok && net != &init_net)
+                               err = -ENOENT;
+                       else if (f->mcast_bind)
+                               err = f->mcast_bind(net, fam_grp);
+                       else
+                               err = 0;
+                       break;
                }
        }
        up_read(&cb_lock);
@@ -1020,21 +981,19 @@ static int genl_bind(struct net *net, int group)
 
 static void genl_unbind(struct net *net, int group)
 {
-       int i;
+       struct genl_family *f;
+       unsigned int id;
 
        down_read(&cb_lock);
-       for (i = 0; i < GENL_FAM_TAB_SIZE; i++) {
-               struct genl_family *f;
 
-               list_for_each_entry(f, genl_family_chain(i), family_list) {
-                       if (group >= f->mcgrp_offset &&
-                           group < f->mcgrp_offset + f->n_mcgrps) {
-                               int fam_grp = group - f->mcgrp_offset;
+       idr_for_each_entry(&genl_fam_idr, f, id) {
+               if (group >= f->mcgrp_offset &&
+                   group < f->mcgrp_offset + f->n_mcgrps) {
+                       int fam_grp = group - f->mcgrp_offset;
 
-                               if (f->mcast_unbind)
-                                       f->mcast_unbind(net, fam_grp);
-                               break;
-                       }
+                       if (f->mcast_unbind)
+                               f->mcast_unbind(net, fam_grp);
+                       break;
                }
        }
        up_read(&cb_lock);
@@ -1074,13 +1033,9 @@ static struct pernet_operations genl_pernet_ops = {
 
 static int __init genl_init(void)
 {
-       int i, err;
-
-       for (i = 0; i < GENL_FAM_TAB_SIZE; i++)
-               INIT_LIST_HEAD(&family_ht[i]);
+       int err;
 
-       err = genl_register_family_with_ops_groups(&genl_ctrl, genl_ctrl_ops,
-                                                  genl_ctrl_groups);
+       err = genl_register_family(&genl_ctrl);
        if (err < 0)
                goto problem;
 
@@ -1096,6 +1051,25 @@ problem:
 
 subsys_initcall(genl_init);
 
+/**
+ * genl_family_attrbuf - return family's attrbuf
+ * @family: the family
+ *
+ * Return the family's attrbuf, while validating that it's
+ * actually valid to access it.
+ *
+ * You cannot use this function with a family that has parallel_ops
+ * and you can only use it within (pre/post) doit/dumpit callbacks.
+ */
+struct nlattr **genl_family_attrbuf(const struct genl_family *family)
+{
+       if (!WARN_ON(family->parallel_ops))
+               lockdep_assert_held(&genl_mutex);
+
+       return family->attrbuf;
+}
+EXPORT_SYMBOL(genl_family_attrbuf);
+
 static int genlmsg_mcast(struct sk_buff *skb, u32 portid, unsigned long group,
                         gfp_t flags)
 {
@@ -1125,8 +1099,9 @@ static int genlmsg_mcast(struct sk_buff *skb, u32 portid, unsigned long group,
        return err;
 }
 
-int genlmsg_multicast_allns(struct genl_family *family, struct sk_buff *skb,
-                           u32 portid, unsigned int group, gfp_t flags)
+int genlmsg_multicast_allns(const struct genl_family *family,
+                           struct sk_buff *skb, u32 portid,
+                           unsigned int group, gfp_t flags)
 {
        if (WARN_ON_ONCE(group >= family->n_mcgrps))
                return -EINVAL;
@@ -1135,7 +1110,7 @@ int genlmsg_multicast_allns(struct genl_family *family, struct sk_buff *skb,
 }
 EXPORT_SYMBOL(genlmsg_multicast_allns);
 
-void genl_notify(struct genl_family *family, struct sk_buff *skb,
+void genl_notify(const struct genl_family *family, struct sk_buff *skb,
                 struct genl_info *info, u32 group, gfp_t flags)
 {
        struct net *net = genl_info_net(info);
index ea023b35f1c24b3069be02796b24bfad7805352e..03f3d5c7beb8d173fae25456b278ee9cf3e04c5e 100644 (file)
@@ -38,14 +38,7 @@ static const struct genl_multicast_group nfc_genl_mcgrps[] = {
        { .name = NFC_GENL_MCAST_EVENT_NAME, },
 };
 
-static struct genl_family nfc_genl_family = {
-       .id = GENL_ID_GENERATE,
-       .hdrsize = 0,
-       .name = NFC_GENL_NAME,
-       .version = NFC_GENL_VERSION,
-       .maxattr = NFC_ATTR_MAX,
-};
-
+static struct genl_family nfc_genl_family;
 static const struct nla_policy nfc_genl_policy[NFC_ATTR_MAX + 1] = {
        [NFC_ATTR_DEVICE_INDEX] = { .type = NLA_U32 },
        [NFC_ATTR_DEVICE_NAME] = { .type = NLA_STRING,
@@ -120,21 +113,20 @@ nla_put_failure:
 
 static struct nfc_dev *__get_device_from_cb(struct netlink_callback *cb)
 {
+       struct nlattr **attrbuf = genl_family_attrbuf(&nfc_genl_family);
        struct nfc_dev *dev;
        int rc;
        u32 idx;
 
        rc = nlmsg_parse(cb->nlh, GENL_HDRLEN + nfc_genl_family.hdrsize,
-                        nfc_genl_family.attrbuf,
-                        nfc_genl_family.maxattr,
-                        nfc_genl_policy);
+                        attrbuf, nfc_genl_family.maxattr, nfc_genl_policy);
        if (rc < 0)
                return ERR_PTR(rc);
 
-       if (!nfc_genl_family.attrbuf[NFC_ATTR_DEVICE_INDEX])
+       if (!attrbuf[NFC_ATTR_DEVICE_INDEX])
                return ERR_PTR(-EINVAL);
 
-       idx = nla_get_u32(nfc_genl_family.attrbuf[NFC_ATTR_DEVICE_INDEX]);
+       idx = nla_get_u32(attrbuf[NFC_ATTR_DEVICE_INDEX]);
 
        dev = nfc_get_device(idx);
        if (!dev)
@@ -1754,6 +1746,18 @@ static const struct genl_ops nfc_genl_ops[] = {
        },
 };
 
+static struct genl_family nfc_genl_family __ro_after_init = {
+       .hdrsize = 0,
+       .name = NFC_GENL_NAME,
+       .version = NFC_GENL_VERSION,
+       .maxattr = NFC_ATTR_MAX,
+       .module = THIS_MODULE,
+       .ops = nfc_genl_ops,
+       .n_ops = ARRAY_SIZE(nfc_genl_ops),
+       .mcgrps = nfc_genl_mcgrps,
+       .n_mcgrps = ARRAY_SIZE(nfc_genl_mcgrps),
+};
+
 
 struct urelease_work {
        struct  work_struct w;
@@ -1839,9 +1843,7 @@ int __init nfc_genl_init(void)
 {
        int rc;
 
-       rc = genl_register_family_with_ops_groups(&nfc_genl_family,
-                                                 nfc_genl_ops,
-                                                 nfc_genl_mcgrps);
+       rc = genl_register_family(&nfc_genl_family);
        if (rc)
                return rc;
 
index 1105c4e29c6275a002fac3bd9b26ab28bf1565b9..514f7bcf7c63ce0f3148e42726df74ff77c0c8af 100644 (file)
@@ -66,6 +66,7 @@ struct ovs_frag_data {
        u16 vlan_tci;
        __be16 vlan_proto;
        unsigned int l2_len;
+       u8 mac_proto;
        u8 l2_data[MAX_L2_LEN];
 };
 
@@ -137,12 +138,12 @@ static struct deferred_action *add_deferred_actions(struct sk_buff *skb,
 
 static void invalidate_flow_key(struct sw_flow_key *key)
 {
-       key->eth.type = htons(0);
+       key->mac_proto |= SW_FLOW_KEY_INVALID;
 }
 
 static bool is_flow_key_valid(const struct sw_flow_key *key)
 {
-       return !!key->eth.type;
+       return !(key->mac_proto & SW_FLOW_KEY_INVALID);
 }
 
 static void update_ethertype(struct sk_buff *skb, struct ethhdr *hdr,
@@ -186,7 +187,8 @@ static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key,
 
        skb_postpush_rcsum(skb, new_mpls_lse, MPLS_HLEN);
 
-       update_ethertype(skb, eth_hdr(skb), mpls->mpls_ethertype);
+       if (ovs_key_mac_proto(key) == MAC_PROTO_ETHERNET)
+               update_ethertype(skb, eth_hdr(skb), mpls->mpls_ethertype);
        skb->protocol = mpls->mpls_ethertype;
 
        invalidate_flow_key(key);
@@ -196,7 +198,6 @@ static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key,
 static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key,
                    const __be16 ethertype)
 {
-       struct ethhdr *hdr;
        int err;
 
        err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN);
@@ -212,11 +213,15 @@ static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key,
        skb_reset_mac_header(skb);
        skb_set_network_header(skb, skb->mac_len);
 
-       /* mpls_hdr() is used to locate the ethertype field correctly in the
-        * presence of VLAN tags.
-        */
-       hdr = (struct ethhdr *)((void *)mpls_hdr(skb) - ETH_HLEN);
-       update_ethertype(skb, hdr, ethertype);
+       if (ovs_key_mac_proto(key) == MAC_PROTO_ETHERNET) {
+               struct ethhdr *hdr;
+
+               /* mpls_hdr() is used to locate the ethertype field correctly in the
+                * presence of VLAN tags.
+                */
+               hdr = (struct ethhdr *)((void *)mpls_hdr(skb) - ETH_HLEN);
+               update_ethertype(skb, hdr, ethertype);
+       }
        if (eth_p_mpls(skb->protocol))
                skb->protocol = ethertype;
 
@@ -312,6 +317,47 @@ static int set_eth_addr(struct sk_buff *skb, struct sw_flow_key *flow_key,
        return 0;
 }
 
+/* pop_eth does not support VLAN packets as this action is never called
+ * for them.
+ */
+static int pop_eth(struct sk_buff *skb, struct sw_flow_key *key)
+{
+       skb_pull_rcsum(skb, ETH_HLEN);
+       skb_reset_mac_header(skb);
+       skb_reset_mac_len(skb);
+
+       /* safe right before invalidate_flow_key */
+       key->mac_proto = MAC_PROTO_NONE;
+       invalidate_flow_key(key);
+       return 0;
+}
+
+static int push_eth(struct sk_buff *skb, struct sw_flow_key *key,
+                   const struct ovs_action_push_eth *ethh)
+{
+       struct ethhdr *hdr;
+
+       /* Add the new Ethernet header */
+       if (skb_cow_head(skb, ETH_HLEN) < 0)
+               return -ENOMEM;
+
+       skb_push(skb, ETH_HLEN);
+       skb_reset_mac_header(skb);
+       skb_reset_mac_len(skb);
+
+       hdr = eth_hdr(skb);
+       ether_addr_copy(hdr->h_source, ethh->addresses.eth_src);
+       ether_addr_copy(hdr->h_dest, ethh->addresses.eth_dst);
+       hdr->h_proto = skb->protocol;
+
+       skb_postpush_rcsum(skb, hdr, ETH_HLEN);
+
+       /* safe right before invalidate_flow_key */
+       key->mac_proto = MAC_PROTO_ETHERNET;
+       invalidate_flow_key(key);
+       return 0;
+}
+
 static void update_ip_l4_checksum(struct sk_buff *skb, struct iphdr *nh,
                                  __be32 addr, __be32 new_addr)
 {
@@ -673,7 +719,7 @@ static int ovs_vport_output(struct net *net, struct sock *sk, struct sk_buff *sk
                skb_reset_mac_len(skb);
        }
 
-       ovs_vport_send(vport, skb);
+       ovs_vport_send(vport, skb, data->mac_proto);
        return 0;
 }
 
@@ -692,7 +738,7 @@ static struct dst_ops ovs_dst_ops = {
  * ovs_vport_output(), which is called once per fragmented packet.
  */
 static void prepare_frag(struct vport *vport, struct sk_buff *skb,
-                        u16 orig_network_offset)
+                        u16 orig_network_offset, u8 mac_proto)
 {
        unsigned int hlen = skb_network_offset(skb);
        struct ovs_frag_data *data;
@@ -705,6 +751,7 @@ static void prepare_frag(struct vport *vport, struct sk_buff *skb,
        data->network_offset = orig_network_offset;
        data->vlan_tci = skb->vlan_tci;
        data->vlan_proto = skb->vlan_proto;
+       data->mac_proto = mac_proto;
        data->l2_len = hlen;
        memcpy(&data->l2_data, skb->data, hlen);
 
@@ -713,7 +760,8 @@ static void prepare_frag(struct vport *vport, struct sk_buff *skb,
 }
 
 static void ovs_fragment(struct net *net, struct vport *vport,
-                        struct sk_buff *skb, u16 mru, __be16 ethertype)
+                        struct sk_buff *skb, u16 mru,
+                        struct sw_flow_key *key)
 {
        u16 orig_network_offset = 0;
 
@@ -727,11 +775,12 @@ static void ovs_fragment(struct net *net, struct vport *vport,
                goto err;
        }
 
-       if (ethertype == htons(ETH_P_IP)) {
+       if (key->eth.type == htons(ETH_P_IP)) {
                struct dst_entry ovs_dst;
                unsigned long orig_dst;
 
-               prepare_frag(vport, skb, orig_network_offset);
+               prepare_frag(vport, skb, orig_network_offset,
+                            ovs_key_mac_proto(key));
                dst_init(&ovs_dst, &ovs_dst_ops, NULL, 1,
                         DST_OBSOLETE_NONE, DST_NOCOUNT);
                ovs_dst.dev = vport->dev;
@@ -742,7 +791,7 @@ static void ovs_fragment(struct net *net, struct vport *vport,
 
                ip_do_fragment(net, skb->sk, skb, ovs_vport_output);
                refdst_drop(orig_dst);
-       } else if (ethertype == htons(ETH_P_IPV6)) {
+       } else if (key->eth.type == htons(ETH_P_IPV6)) {
                const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops();
                unsigned long orig_dst;
                struct rt6_info ovs_rt;
@@ -751,7 +800,8 @@ static void ovs_fragment(struct net *net, struct vport *vport,
                        goto err;
                }
 
-               prepare_frag(vport, skb, orig_network_offset);
+               prepare_frag(vport, skb, orig_network_offset,
+                            ovs_key_mac_proto(key));
                memset(&ovs_rt, 0, sizeof(ovs_rt));
                dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL, 1,
                         DST_OBSOLETE_NONE, DST_NOCOUNT);
@@ -765,7 +815,7 @@ static void ovs_fragment(struct net *net, struct vport *vport,
                refdst_drop(orig_dst);
        } else {
                WARN_ONCE(1, "Failed fragment ->%s: eth=%04x, MRU=%d, MTU=%d.",
-                         ovs_vport_name(vport), ntohs(ethertype), mru,
+                         ovs_vport_name(vport), ntohs(key->eth.type), mru,
                          vport->dev->mtu);
                goto err;
        }
@@ -785,26 +835,19 @@ static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port,
                u32 cutlen = OVS_CB(skb)->cutlen;
 
                if (unlikely(cutlen > 0)) {
-                       if (skb->len - cutlen > ETH_HLEN)
+                       if (skb->len - cutlen > ovs_mac_header_len(key))
                                pskb_trim(skb, skb->len - cutlen);
                        else
-                               pskb_trim(skb, ETH_HLEN);
+                               pskb_trim(skb, ovs_mac_header_len(key));
                }
 
-               if (likely(!mru || (skb->len <= mru + ETH_HLEN))) {
-                       ovs_vport_send(vport, skb);
+               if (likely(!mru ||
+                          (skb->len <= mru + vport->dev->hard_header_len))) {
+                       ovs_vport_send(vport, skb, ovs_key_mac_proto(key));
                } else if (mru <= vport->dev->mtu) {
                        struct net *net = read_pnet(&dp->net);
-                       __be16 ethertype = key->eth.type;
-
-                       if (!is_flow_key_valid(key)) {
-                               if (eth_p_mpls(skb->protocol))
-                                       ethertype = skb->inner_protocol;
-                               else
-                                       ethertype = vlan_get_protocol(skb);
-                       }
 
-                       ovs_fragment(net, vport, skb, mru, ethertype);
+                       ovs_fragment(net, vport, skb, mru, key);
                } else {
                        kfree_skb(skb);
                }
@@ -1198,6 +1241,14 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
                        if (err)
                                return err == -EINPROGRESS ? 0 : err;
                        break;
+
+               case OVS_ACTION_ATTR_PUSH_ETH:
+                       err = push_eth(skb, key, nla_data(a));
+                       break;
+
+               case OVS_ACTION_ATTR_POP_ETH:
+                       err = pop_eth(skb, key);
+                       break;
                }
 
                if (unlikely(err)) {
index 31045ef44a82b925e53ce34da2aca74448dd9c8d..9b8a028b7dad6b0c6175cebe9bb8c0b30f8c4149 100644 (file)
@@ -725,12 +725,8 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
                        skb->nfctinfo = IP_CT_NEW;
                }
 
-               /* Repeat if requested, see nf_iterate(). */
-               do {
-                       err = nf_conntrack_in(net, info->family,
-                                             NF_INET_PRE_ROUTING, skb);
-               } while (err == NF_REPEAT);
-
+               err = nf_conntrack_in(net, info->family,
+                                     NF_INET_PRE_ROUTING, skb);
                if (err != NF_ACCEPT)
                        return -ENOENT;
 
index 194435aa1165442fb4943ea935ff52ca83dbf1ea..2d4c4d3911c02ce91de79ac4c9ad4923d4bc2329 100644 (file)
@@ -58,7 +58,7 @@
 #include "vport-internal_dev.h"
 #include "vport-netdev.h"
 
-int ovs_net_id __read_mostly;
+unsigned int ovs_net_id __read_mostly;
 
 static struct genl_family dp_packet_genl_family;
 static struct genl_family dp_flow_genl_family;
@@ -560,7 +560,6 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
        struct sw_flow *flow;
        struct sw_flow_actions *sf_acts;
        struct datapath *dp;
-       struct ethhdr *eth;
        struct vport *input_vport;
        u16 mru = 0;
        int len;
@@ -581,17 +580,6 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
 
        nla_memcpy(__skb_put(packet, len), a[OVS_PACKET_ATTR_PACKET], len);
 
-       skb_reset_mac_header(packet);
-       eth = eth_hdr(packet);
-
-       /* Normally, setting the skb 'protocol' field would be handled by a
-        * call to eth_type_trans(), but it assumes there's a sending
-        * device, which we may not have. */
-       if (eth_proto_is_802_3(eth->h_proto))
-               packet->protocol = eth->h_proto;
-       else
-               packet->protocol = htons(ETH_P_802_2);
-
        /* Set packet's mru */
        if (a[OVS_PACKET_ATTR_MRU]) {
                mru = nla_get_u16(a[OVS_PACKET_ATTR_MRU]);
@@ -618,6 +606,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
        rcu_assign_pointer(flow->sf_acts, acts);
        packet->priority = flow->key.phy.priority;
        packet->mark = flow->key.phy.skb_mark;
+       packet->protocol = flow->key.eth.type;
 
        rcu_read_lock();
        dp = get_dp_rcu(net, ovs_header->dp_ifindex);
@@ -670,8 +659,7 @@ static const struct genl_ops dp_packet_genl_ops[] = {
        }
 };
 
-static struct genl_family dp_packet_genl_family = {
-       .id = GENL_ID_GENERATE,
+static struct genl_family dp_packet_genl_family __ro_after_init = {
        .hdrsize = sizeof(struct ovs_header),
        .name = OVS_PACKET_FAMILY,
        .version = OVS_PACKET_VERSION,
@@ -680,6 +668,7 @@ static struct genl_family dp_packet_genl_family = {
        .parallel_ops = true,
        .ops = dp_packet_genl_ops,
        .n_ops = ARRAY_SIZE(dp_packet_genl_ops),
+       .module = THIS_MODULE,
 };
 
 static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats,
@@ -1435,8 +1424,7 @@ static const struct genl_ops dp_flow_genl_ops[] = {
        },
 };
 
-static struct genl_family dp_flow_genl_family = {
-       .id = GENL_ID_GENERATE,
+static struct genl_family dp_flow_genl_family __ro_after_init = {
        .hdrsize = sizeof(struct ovs_header),
        .name = OVS_FLOW_FAMILY,
        .version = OVS_FLOW_VERSION,
@@ -1447,6 +1435,7 @@ static struct genl_family dp_flow_genl_family = {
        .n_ops = ARRAY_SIZE(dp_flow_genl_ops),
        .mcgrps = &ovs_dp_flow_multicast_group,
        .n_mcgrps = 1,
+       .module = THIS_MODULE,
 };
 
 static size_t ovs_dp_cmd_msg_size(void)
@@ -1821,8 +1810,7 @@ static const struct genl_ops dp_datapath_genl_ops[] = {
        },
 };
 
-static struct genl_family dp_datapath_genl_family = {
-       .id = GENL_ID_GENERATE,
+static struct genl_family dp_datapath_genl_family __ro_after_init = {
        .hdrsize = sizeof(struct ovs_header),
        .name = OVS_DATAPATH_FAMILY,
        .version = OVS_DATAPATH_VERSION,
@@ -1833,6 +1821,7 @@ static struct genl_family dp_datapath_genl_family = {
        .n_ops = ARRAY_SIZE(dp_datapath_genl_ops),
        .mcgrps = &ovs_dp_datapath_multicast_group,
        .n_mcgrps = 1,
+       .module = THIS_MODULE,
 };
 
 /* Called with ovs_mutex or RCU read lock. */
@@ -2243,8 +2232,7 @@ static const struct genl_ops dp_vport_genl_ops[] = {
        },
 };
 
-struct genl_family dp_vport_genl_family = {
-       .id = GENL_ID_GENERATE,
+struct genl_family dp_vport_genl_family __ro_after_init = {
        .hdrsize = sizeof(struct ovs_header),
        .name = OVS_VPORT_FAMILY,
        .version = OVS_VPORT_VERSION,
@@ -2255,6 +2243,7 @@ struct genl_family dp_vport_genl_family = {
        .n_ops = ARRAY_SIZE(dp_vport_genl_ops),
        .mcgrps = &ovs_dp_vport_multicast_group,
        .n_mcgrps = 1,
+       .module = THIS_MODULE,
 };
 
 static struct genl_family * const dp_genl_families[] = {
@@ -2272,7 +2261,7 @@ static void dp_unregister_genl(int n_families)
                genl_unregister_family(dp_genl_families[i]);
 }
 
-static int dp_register_genl(void)
+static int __init dp_register_genl(void)
 {
        int err;
        int i;
index ab85c1cae255b8fd373a0d5e0e1d05841b5bd8d0..1c6e9377436df1e93081c825c142b712a811277b 100644 (file)
@@ -144,7 +144,7 @@ struct ovs_net {
        bool xt_label;
 };
 
-extern int ovs_net_id;
+extern unsigned int ovs_net_id;
 void ovs_lock(void);
 void ovs_unlock(void);
 
index 22087062bd1013e7c526dc95bce379bcaaebc4b9..08aa926cd5cfe5f6b3f47f3dc4df5c970a6ba2a7 100644 (file)
@@ -334,14 +334,17 @@ static int parse_vlan_tag(struct sk_buff *skb, struct vlan_head *key_vh)
        return 1;
 }
 
-static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key)
+static void clear_vlan(struct sw_flow_key *key)
 {
-       int res;
-
        key->eth.vlan.tci = 0;
        key->eth.vlan.tpid = 0;
        key->eth.cvlan.tci = 0;
        key->eth.cvlan.tpid = 0;
+}
+
+static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key)
+{
+       int res;
 
        if (skb_vlan_tag_present(skb)) {
                key->eth.vlan.tci = htons(skb->vlan_tci);
@@ -483,17 +486,20 @@ invalid:
  *
  * Returns 0 if successful, otherwise a negative errno value.
  *
- * Initializes @skb header pointers as follows:
+ * Initializes @skb header fields as follows:
  *
- *    - skb->mac_header: the Ethernet header.
+ *    - skb->mac_header: the L2 header.
  *
- *    - skb->network_header: just past the Ethernet header, or just past the
- *      VLAN header, to the first byte of the Ethernet payload.
+ *    - skb->network_header: just past the L2 header, or just past the
+ *      VLAN header, to the first byte of the L2 payload.
  *
  *    - skb->transport_header: If key->eth.type is ETH_P_IP or ETH_P_IPV6
  *      on output, then just past the IP header, if one is present and
  *      of a correct length, otherwise the same as skb->network_header.
  *      For other key->eth.type values it is left untouched.
+ *
+ *    - skb->protocol: the type of the data starting at skb->network_header.
+ *      Equals to key->eth.type.
  */
 static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
 {
@@ -505,28 +511,35 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
 
        skb_reset_mac_header(skb);
 
-       /* Link layer.  We are guaranteed to have at least the 14 byte Ethernet
-        * header in the linear data area.
-        */
-       eth = eth_hdr(skb);
-       ether_addr_copy(key->eth.src, eth->h_source);
-       ether_addr_copy(key->eth.dst, eth->h_dest);
+       /* Link layer. */
+       clear_vlan(key);
+       if (key->mac_proto == MAC_PROTO_NONE) {
+               if (unlikely(eth_type_vlan(skb->protocol)))
+                       return -EINVAL;
 
-       __skb_pull(skb, 2 * ETH_ALEN);
-       /* We are going to push all headers that we pull, so no need to
-        * update skb->csum here.
-        */
+               skb_reset_network_header(skb);
+       } else {
+               eth = eth_hdr(skb);
+               ether_addr_copy(key->eth.src, eth->h_source);
+               ether_addr_copy(key->eth.dst, eth->h_dest);
 
-       if (unlikely(parse_vlan(skb, key)))
-               return -ENOMEM;
+               __skb_pull(skb, 2 * ETH_ALEN);
+               /* We are going to push all headers that we pull, so no need to
+               * update skb->csum here.
+               */
 
-       key->eth.type = parse_ethertype(skb);
-       if (unlikely(key->eth.type == htons(0)))
-               return -ENOMEM;
+               if (unlikely(parse_vlan(skb, key)))
+                       return -ENOMEM;
+
+               skb->protocol = parse_ethertype(skb);
+               if (unlikely(skb->protocol == htons(0)))
+                       return -ENOMEM;
 
-       skb_reset_network_header(skb);
+               skb_reset_network_header(skb);
+               __skb_push(skb, skb->data - skb_mac_header(skb));
+       }
        skb_reset_mac_len(skb);
-       __skb_push(skb, skb->data - skb_mac_header(skb));
+       key->eth.type = skb->protocol;
 
        /* Network layer. */
        if (key->eth.type == htons(ETH_P_IP)) {
@@ -721,9 +734,25 @@ int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key)
        return key_extract(skb, key);
 }
 
+static int key_extract_mac_proto(struct sk_buff *skb)
+{
+       switch (skb->dev->type) {
+       case ARPHRD_ETHER:
+               return MAC_PROTO_ETHERNET;
+       case ARPHRD_NONE:
+               if (skb->protocol == htons(ETH_P_TEB))
+                       return MAC_PROTO_ETHERNET;
+               return MAC_PROTO_NONE;
+       }
+       WARN_ON_ONCE(1);
+       return -EINVAL;
+}
+
 int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
                         struct sk_buff *skb, struct sw_flow_key *key)
 {
+       int res;
+
        /* Extract metadata from packet. */
        if (tun_info) {
                key->tun_proto = ip_tunnel_info_af(tun_info);
@@ -751,6 +780,10 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
        key->phy.skb_mark = skb->mark;
        ovs_ct_fill_key(skb, key);
        key->ovs_flow_hash = 0;
+       res = key_extract_mac_proto(skb);
+       if (res < 0)
+               return res;
+       key->mac_proto = res;
        key->recirc_id = 0;
 
        return key_extract(skb, key);
@@ -767,5 +800,29 @@ int ovs_flow_key_extract_userspace(struct net *net, const struct nlattr *attr,
        if (err)
                return err;
 
+       if (ovs_key_mac_proto(key) == MAC_PROTO_NONE) {
+               /* key_extract assumes that skb->protocol is set-up for
+                * layer 3 packets which is the case for other callers,
+                * in particular packets recieved from the network stack.
+                * Here the correct value can be set from the metadata
+                * extracted above.
+                */
+               skb->protocol = key->eth.type;
+       } else {
+               struct ethhdr *eth;
+
+               skb_reset_mac_header(skb);
+               eth = eth_hdr(skb);
+
+               /* Normally, setting the skb 'protocol' field would be
+                * handled by a call to eth_type_trans(), but it assumes
+                * there's a sending device, which we may not have.
+                */
+               if (eth_proto_is_802_3(eth->h_proto))
+                       skb->protocol = eth->h_proto;
+               else
+                       skb->protocol = htons(ETH_P_802_2);
+       }
+
        return key_extract(skb, key);
 }
index ae783f5c6695867a0941037700b36c2d8ef16a58..f61cae7f9030df68c3001e5e63dc2c903b3d1d3f 100644 (file)
 
 struct sk_buff;
 
+enum sw_flow_mac_proto {
+       MAC_PROTO_NONE = 0,
+       MAC_PROTO_ETHERNET,
+};
+#define SW_FLOW_KEY_INVALID    0x80
+
 /* Store options at the end of the array if they are less than the
  * maximum size. This allows us to get the benefits of variable length
  * matching for small options.
@@ -68,6 +74,7 @@ struct sw_flow_key {
                u32     skb_mark;       /* SKB mark. */
                u16     in_port;        /* Input switch port (or DP_MAX_PORTS). */
        } __packed phy; /* Safe when right after 'tun_key'. */
+       u8 mac_proto;                   /* MAC layer protocol (e.g. Ethernet). */
        u8 tun_proto;                   /* Protocol of encapsulating tunnel. */
        u32 ovs_flow_hash;              /* Datapath computed hash value.  */
        u32 recirc_id;                  /* Recirculation ID.  */
@@ -206,6 +213,21 @@ struct arp_eth_header {
        unsigned char       ar_tip[4];          /* target IP address        */
 } __packed;
 
+static inline u8 ovs_key_mac_proto(const struct sw_flow_key *key)
+{
+       return key->mac_proto & ~SW_FLOW_KEY_INVALID;
+}
+
+static inline u16 __ovs_mac_header_len(u8 mac_proto)
+{
+       return mac_proto == MAC_PROTO_ETHERNET ? ETH_HLEN : 0;
+}
+
+static inline u16 ovs_mac_header_len(const struct sw_flow_key *key)
+{
+       return __ovs_mac_header_len(ovs_key_mac_proto(key));
+}
+
 static inline bool ovs_identifier_is_ufid(const struct sw_flow_id *sfid)
 {
        return sfid->ufid_len;
index ae25ded82b3baa779438d3460aa1aa8d5ef964f9..d19044f2b1f4d1216ec7a4f25eaabecbf99c6214 100644 (file)
@@ -123,7 +123,7 @@ static void update_range(struct sw_flow_match *match,
 static bool match_validate(const struct sw_flow_match *match,
                           u64 key_attrs, u64 mask_attrs, bool log)
 {
-       u64 key_expected = 1 << OVS_KEY_ATTR_ETHERNET;
+       u64 key_expected = 0;
        u64 mask_allowed = key_attrs;  /* At most allow all key attributes */
 
        /* The following mask attributes allowed only if they
@@ -969,10 +969,33 @@ static int parse_vlan_from_nlattrs(struct sw_flow_match *match,
        return 0;
 }
 
+static int parse_eth_type_from_nlattrs(struct sw_flow_match *match,
+                                      u64 *attrs, const struct nlattr **a,
+                                      bool is_mask, bool log)
+{
+       __be16 eth_type;
+
+       eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
+       if (is_mask) {
+               /* Always exact match EtherType. */
+               eth_type = htons(0xffff);
+       } else if (!eth_proto_is_802_3(eth_type)) {
+               OVS_NLERR(log, "EtherType %x is less than min %x",
+                               ntohs(eth_type), ETH_P_802_3_MIN);
+               return -EINVAL;
+       }
+
+       SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask);
+       *attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
+       return 0;
+}
+
 static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
                                 u64 *attrs, const struct nlattr **a,
                                 bool is_mask, bool log)
 {
+       u8 mac_proto = MAC_PROTO_ETHERNET;
+
        if (*attrs & (1 << OVS_KEY_ATTR_DP_HASH)) {
                u32 hash_val = nla_get_u32(a[OVS_KEY_ATTR_DP_HASH]);
 
@@ -1059,6 +1082,21 @@ static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
                                   sizeof(*cl), is_mask);
                *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_LABELS);
        }
+
+       /* For layer 3 packets the Ethernet type is provided
+        * and treated as metadata but no MAC addresses are provided.
+        */
+       if (!(*attrs & (1ULL << OVS_KEY_ATTR_ETHERNET)) &&
+           (*attrs & (1ULL << OVS_KEY_ATTR_ETHERTYPE)))
+               mac_proto = MAC_PROTO_NONE;
+
+       /* Always exact match mac_proto */
+       SW_FLOW_KEY_PUT(match, mac_proto, is_mask ? 0xff : mac_proto, is_mask);
+
+       if (mac_proto == MAC_PROTO_NONE)
+               return parse_eth_type_from_nlattrs(match, attrs, a, is_mask,
+                                                  log);
+
        return 0;
 }
 
@@ -1081,33 +1119,26 @@ static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match,
                SW_FLOW_KEY_MEMCPY(match, eth.dst,
                                eth_key->eth_dst, ETH_ALEN, is_mask);
                attrs &= ~(1 << OVS_KEY_ATTR_ETHERNET);
-       }
 
-       if (attrs & (1 << OVS_KEY_ATTR_VLAN)) {
-               /* VLAN attribute is always parsed before getting here since it
-                * may occur multiple times.
-                */
-               OVS_NLERR(log, "VLAN attribute unexpected.");
-               return -EINVAL;
-       }
-
-       if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) {
-               __be16 eth_type;
-
-               eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
-               if (is_mask) {
-                       /* Always exact match EtherType. */
-                       eth_type = htons(0xffff);
-               } else if (!eth_proto_is_802_3(eth_type)) {
-                       OVS_NLERR(log, "EtherType %x is less than min %x",
-                                 ntohs(eth_type), ETH_P_802_3_MIN);
+               if (attrs & (1 << OVS_KEY_ATTR_VLAN)) {
+                       /* VLAN attribute is always parsed before getting here since it
+                        * may occur multiple times.
+                        */
+                       OVS_NLERR(log, "VLAN attribute unexpected.");
                        return -EINVAL;
                }
 
-               SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask);
-               attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
-       } else if (!is_mask) {
-               SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask);
+               if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) {
+                       err = parse_eth_type_from_nlattrs(match, &attrs, a, is_mask,
+                                                         log);
+                       if (err)
+                               return err;
+               } else if (!is_mask) {
+                       SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask);
+               }
+       } else if (!match->key->eth.type) {
+               OVS_NLERR(log, "Either Ethernet header or EtherType is required.");
+               return -EINVAL;
        }
 
        if (attrs & (1 << OVS_KEY_ATTR_IPV4)) {
@@ -1556,42 +1587,44 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
        if (ovs_ct_put_key(output, skb))
                goto nla_put_failure;
 
-       nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key));
-       if (!nla)
-               goto nla_put_failure;
-
-       eth_key = nla_data(nla);
-       ether_addr_copy(eth_key->eth_src, output->eth.src);
-       ether_addr_copy(eth_key->eth_dst, output->eth.dst);
-
-       if (swkey->eth.vlan.tci || eth_type_vlan(swkey->eth.type)) {
-               if (ovs_nla_put_vlan(skb, &output->eth.vlan, is_mask))
+       if (ovs_key_mac_proto(swkey) == MAC_PROTO_ETHERNET) {
+               nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key));
+               if (!nla)
                        goto nla_put_failure;
-               encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
-               if (!swkey->eth.vlan.tci)
-                       goto unencap;
 
-               if (swkey->eth.cvlan.tci || eth_type_vlan(swkey->eth.type)) {
-                       if (ovs_nla_put_vlan(skb, &output->eth.cvlan, is_mask))
+               eth_key = nla_data(nla);
+               ether_addr_copy(eth_key->eth_src, output->eth.src);
+               ether_addr_copy(eth_key->eth_dst, output->eth.dst);
+
+               if (swkey->eth.vlan.tci || eth_type_vlan(swkey->eth.type)) {
+                       if (ovs_nla_put_vlan(skb, &output->eth.vlan, is_mask))
                                goto nla_put_failure;
-                       in_encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
-                       if (!swkey->eth.cvlan.tci)
+                       encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
+                       if (!swkey->eth.vlan.tci)
                                goto unencap;
+
+                       if (swkey->eth.cvlan.tci || eth_type_vlan(swkey->eth.type)) {
+                               if (ovs_nla_put_vlan(skb, &output->eth.cvlan, is_mask))
+                                       goto nla_put_failure;
+                               in_encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
+                               if (!swkey->eth.cvlan.tci)
+                                       goto unencap;
+                       }
                }
-       }
 
-       if (swkey->eth.type == htons(ETH_P_802_2)) {
-               /*
-                * Ethertype 802.2 is represented in the netlink with omitted
-                * OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and
-                * 0xffff in the mask attribute.  Ethertype can also
-                * be wildcarded.
-                */
-               if (is_mask && output->eth.type)
-                       if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE,
-                                               output->eth.type))
-                               goto nla_put_failure;
-               goto unencap;
+               if (swkey->eth.type == htons(ETH_P_802_2)) {
+                       /*
+                       * Ethertype 802.2 is represented in the netlink with omitted
+                       * OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and
+                       * 0xffff in the mask attribute.  Ethertype can also
+                       * be wildcarded.
+                       */
+                       if (is_mask && output->eth.type)
+                               if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE,
+                                                       output->eth.type))
+                                       goto nla_put_failure;
+                       goto unencap;
+               }
        }
 
        if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type))
@@ -2126,8 +2159,8 @@ static bool validate_masked(u8 *data, int len)
 
 static int validate_set(const struct nlattr *a,
                        const struct sw_flow_key *flow_key,
-                       struct sw_flow_actions **sfa,
-                       bool *skip_copy, __be16 eth_type, bool masked, bool log)
+                       struct sw_flow_actions **sfa, bool *skip_copy,
+                       u8 mac_proto, __be16 eth_type, bool masked, bool log)
 {
        const struct nlattr *ovs_key = nla_data(a);
        int key_type = nla_type(ovs_key);
@@ -2157,9 +2190,12 @@ static int validate_set(const struct nlattr *a,
        case OVS_KEY_ATTR_SKB_MARK:
        case OVS_KEY_ATTR_CT_MARK:
        case OVS_KEY_ATTR_CT_LABELS:
-       case OVS_KEY_ATTR_ETHERNET:
                break;
 
+       case OVS_KEY_ATTR_ETHERNET:
+               if (mac_proto != MAC_PROTO_ETHERNET)
+                       return -EINVAL;
+
        case OVS_KEY_ATTR_TUNNEL:
                if (masked)
                        return -EINVAL; /* Masked tunnel set not supported. */
@@ -2324,6 +2360,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
                                  int depth, struct sw_flow_actions **sfa,
                                  __be16 eth_type, __be16 vlan_tci, bool log)
 {
+       u8 mac_proto = ovs_key_mac_proto(key);
        const struct nlattr *a;
        int rem, err;
 
@@ -2346,6 +2383,8 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
                        [OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash),
                        [OVS_ACTION_ATTR_CT] = (u32)-1,
                        [OVS_ACTION_ATTR_TRUNC] = sizeof(struct ovs_action_trunc),
+                       [OVS_ACTION_ATTR_PUSH_ETH] = sizeof(struct ovs_action_push_eth),
+                       [OVS_ACTION_ATTR_POP_ETH] = 0,
                };
                const struct ovs_action_push_vlan *vlan;
                int type = nla_type(a);
@@ -2394,10 +2433,14 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
                }
 
                case OVS_ACTION_ATTR_POP_VLAN:
+                       if (mac_proto != MAC_PROTO_ETHERNET)
+                               return -EINVAL;
                        vlan_tci = htons(0);
                        break;
 
                case OVS_ACTION_ATTR_PUSH_VLAN:
+                       if (mac_proto != MAC_PROTO_ETHERNET)
+                               return -EINVAL;
                        vlan = nla_data(a);
                        if (!eth_type_vlan(vlan->vlan_tpid))
                                return -EINVAL;
@@ -2447,14 +2490,16 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
 
                case OVS_ACTION_ATTR_SET:
                        err = validate_set(a, key, sfa,
-                                          &skip_copy, eth_type, false, log);
+                                          &skip_copy, mac_proto, eth_type,
+                                          false, log);
                        if (err)
                                return err;
                        break;
 
                case OVS_ACTION_ATTR_SET_MASKED:
                        err = validate_set(a, key, sfa,
-                                          &skip_copy, eth_type, true, log);
+                                          &skip_copy, mac_proto, eth_type,
+                                          true, log);
                        if (err)
                                return err;
                        break;
@@ -2474,6 +2519,22 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
                        skip_copy = true;
                        break;
 
+               case OVS_ACTION_ATTR_PUSH_ETH:
+                       /* Disallow pushing an Ethernet header if one
+                        * is already present */
+                       if (mac_proto != MAC_PROTO_NONE)
+                               return -EINVAL;
+                       mac_proto = MAC_PROTO_NONE;
+                       break;
+
+               case OVS_ACTION_ATTR_POP_ETH:
+                       if (mac_proto != MAC_PROTO_ETHERNET)
+                               return -EINVAL;
+                       if (vlan_tci & htons(VLAN_TAG_PRESENT))
+                               return -EINVAL;
+                       mac_proto = MAC_PROTO_ETHERNET;
+                       break;
+
                default:
                        OVS_NLERR(log, "Unknown Action type %d", type);
                        return -EINVAL;
index e825753de1e0065944a1e555872efe2198e4f3b0..0389398fa4ab81a57a4b27f47ffc879f8904f446 100644 (file)
@@ -57,8 +57,10 @@ static void netdev_port_receive(struct sk_buff *skb)
        if (unlikely(!skb))
                return;
 
-       skb_push(skb, ETH_HLEN);
-       skb_postpush_rcsum(skb, skb->data, ETH_HLEN);
+       if (skb->dev->type == ARPHRD_ETHER) {
+               skb_push(skb, ETH_HLEN);
+               skb_postpush_rcsum(skb, skb->data, ETH_HLEN);
+       }
        ovs_vport_receive(vport, skb, skb_tunnel_info(skb));
        return;
 error:
@@ -97,7 +99,8 @@ struct vport *ovs_netdev_link(struct vport *vport, const char *name)
        }
 
        if (vport->dev->flags & IFF_LOOPBACK ||
-           vport->dev->type != ARPHRD_ETHER ||
+           (vport->dev->type != ARPHRD_ETHER &&
+            vport->dev->type != ARPHRD_NONE) ||
            ovs_is_internal_dev(vport->dev)) {
                err = -EINVAL;
                goto error_put;
index 9bb85b35a1fbff2f6558417afbb9b2203dabb212..b6c8524032a0633c8f3262e9ea58e457b6a14598 100644 (file)
@@ -464,9 +464,10 @@ int ovs_vport_receive(struct vport *vport, struct sk_buff *skb,
        return 0;
 }
 
-static unsigned int packet_length(const struct sk_buff *skb)
+static unsigned int packet_length(const struct sk_buff *skb,
+                                 struct net_device *dev)
 {
-       unsigned int length = skb->len - ETH_HLEN;
+       unsigned int length = skb->len - dev->hard_header_len;
 
        if (!skb_vlan_tag_present(skb) &&
            eth_type_vlan(skb->protocol))
@@ -480,14 +481,34 @@ static unsigned int packet_length(const struct sk_buff *skb)
        return length;
 }
 
-void ovs_vport_send(struct vport *vport, struct sk_buff *skb)
+void ovs_vport_send(struct vport *vport, struct sk_buff *skb, u8 mac_proto)
 {
        int mtu = vport->dev->mtu;
 
-       if (unlikely(packet_length(skb) > mtu && !skb_is_gso(skb))) {
+       switch (vport->dev->type) {
+       case ARPHRD_NONE:
+               if (mac_proto == MAC_PROTO_ETHERNET) {
+                       skb_reset_network_header(skb);
+                       skb_reset_mac_len(skb);
+                       skb->protocol = htons(ETH_P_TEB);
+               } else if (mac_proto != MAC_PROTO_NONE) {
+                       WARN_ON_ONCE(1);
+                       goto drop;
+               }
+               break;
+       case ARPHRD_ETHER:
+               if (mac_proto != MAC_PROTO_ETHERNET)
+                       goto drop;
+               break;
+       default:
+               goto drop;
+       }
+
+       if (unlikely(packet_length(skb, vport->dev) > mtu &&
+                    !skb_is_gso(skb))) {
                net_warn_ratelimited("%s: dropped over-mtu packet: %d > %d\n",
                                     vport->dev->name,
-                                    packet_length(skb), mtu);
+                                    packet_length(skb, vport->dev), mtu);
                vport->dev->stats.tx_errors++;
                goto drop;
        }
index 46e5b69927c72f9cec5c5d32034120ad87439961..cda66c26ad0889a0d7ef99b5e14b2f83e4e2ff11 100644 (file)
@@ -197,6 +197,6 @@ int __ovs_vport_ops_register(struct vport_ops *ops);
        })
 
 void ovs_vport_ops_unregister(struct vport_ops *ops);
-void ovs_vport_send(struct vport *vport, struct sk_buff *skb);
+void ovs_vport_send(struct vport *vport, struct sk_buff *skb, u8 mac_proto);
 
 #endif /* vport.h */
index 11db0d619c007270e7ac003e916a4a4097a79dc9..fab9bbfdead5c80965daf3f6309e2b2356aea50e 100644 (file)
@@ -250,7 +250,7 @@ static void __fanout_link(struct sock *sk, struct packet_sock *po);
 static int packet_direct_xmit(struct sk_buff *skb)
 {
        struct net_device *dev = skb->dev;
-       netdev_features_t features;
+       struct sk_buff *orig_skb = skb;
        struct netdev_queue *txq;
        int ret = NETDEV_TX_BUSY;
 
@@ -258,9 +258,8 @@ static int packet_direct_xmit(struct sk_buff *skb)
                     !netif_carrier_ok(dev)))
                goto drop;
 
-       features = netif_skb_features(skb);
-       if (skb_needs_linearize(skb, features) &&
-           __skb_linearize(skb))
+       skb = validate_xmit_skb_list(skb, dev);
+       if (skb != orig_skb)
                goto drop;
 
        txq = skb_get_tx_queue(dev, skb);
@@ -280,7 +279,7 @@ static int packet_direct_xmit(struct sk_buff *skb)
        return ret;
 drop:
        atomic_long_inc(&dev->tx_dropped);
-       kfree_skb(skb);
+       kfree_skb_list(skb);
        return NET_XMIT_DROP;
 }
 
@@ -1968,17 +1967,6 @@ static unsigned int run_filter(struct sk_buff *skb,
        return res;
 }
 
-static int __packet_rcv_vnet(const struct sk_buff *skb,
-                            struct virtio_net_hdr *vnet_hdr)
-{
-       *vnet_hdr = (const struct virtio_net_hdr) { 0 };
-
-       if (virtio_net_hdr_from_skb(skb, vnet_hdr, vio_le()))
-               BUG();
-
-       return 0;
-}
-
 static int packet_rcv_vnet(struct msghdr *msg, const struct sk_buff *skb,
                           size_t *len)
 {
@@ -1988,7 +1976,7 @@ static int packet_rcv_vnet(struct msghdr *msg, const struct sk_buff *skb,
                return -EINVAL;
        *len -= sizeof(vnet_hdr);
 
-       if (__packet_rcv_vnet(skb, &vnet_hdr))
+       if (virtio_net_hdr_from_skb(skb, &vnet_hdr, vio_le()))
                return -EINVAL;
 
        return memcpy_to_msg(msg, (void *)&vnet_hdr, sizeof(vnet_hdr));
@@ -2247,8 +2235,9 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
        spin_unlock(&sk->sk_receive_queue.lock);
 
        if (po->has_vnet_hdr) {
-               if (__packet_rcv_vnet(skb, h.raw + macoff -
-                                          sizeof(struct virtio_net_hdr))) {
+               if (virtio_net_hdr_from_skb(skb, h.raw + macoff -
+                                           sizeof(struct virtio_net_hdr),
+                                           vio_le())) {
                        spin_lock(&sk->sk_receive_queue.lock);
                        goto drop_n_account;
                }
@@ -2391,8 +2380,6 @@ static void tpacket_set_protocol(const struct net_device *dev,
 
 static int __packet_snd_vnet_parse(struct virtio_net_hdr *vnet_hdr, size_t len)
 {
-       unsigned short gso_type = 0;
-
        if ((vnet_hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
            (__virtio16_to_cpu(vio_le(), vnet_hdr->csum_start) +
             __virtio16_to_cpu(vio_le(), vnet_hdr->csum_offset) + 2 >
@@ -2404,29 +2391,6 @@ static int __packet_snd_vnet_parse(struct virtio_net_hdr *vnet_hdr, size_t len)
        if (__virtio16_to_cpu(vio_le(), vnet_hdr->hdr_len) > len)
                return -EINVAL;
 
-       if (vnet_hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
-               switch (vnet_hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
-               case VIRTIO_NET_HDR_GSO_TCPV4:
-                       gso_type = SKB_GSO_TCPV4;
-                       break;
-               case VIRTIO_NET_HDR_GSO_TCPV6:
-                       gso_type = SKB_GSO_TCPV6;
-                       break;
-               case VIRTIO_NET_HDR_GSO_UDP:
-                       gso_type = SKB_GSO_UDP;
-                       break;
-               default:
-                       return -EINVAL;
-               }
-
-               if (vnet_hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN)
-                       gso_type |= SKB_GSO_TCP_ECN;
-
-               if (vnet_hdr->gso_size == 0)
-                       return -EINVAL;
-       }
-
-       vnet_hdr->gso_type = gso_type;  /* changes type, temporary storage */
        return 0;
 }
 
@@ -2446,27 +2410,6 @@ static int packet_snd_vnet_parse(struct msghdr *msg, size_t *len,
        return __packet_snd_vnet_parse(vnet_hdr, *len);
 }
 
-static int packet_snd_vnet_gso(struct sk_buff *skb,
-                              struct virtio_net_hdr *vnet_hdr)
-{
-       if (vnet_hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
-               u16 s = __virtio16_to_cpu(vio_le(), vnet_hdr->csum_start);
-               u16 o = __virtio16_to_cpu(vio_le(), vnet_hdr->csum_offset);
-
-               if (!skb_partial_csum_set(skb, s, o))
-                       return -EINVAL;
-       }
-
-       skb_shinfo(skb)->gso_size =
-               __virtio16_to_cpu(vio_le(), vnet_hdr->gso_size);
-       skb_shinfo(skb)->gso_type = vnet_hdr->gso_type;
-
-       /* Header must be checked, and gso_segs computed. */
-       skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
-       skb_shinfo(skb)->gso_segs = 0;
-       return 0;
-}
-
 static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
                void *frame, struct net_device *dev, void *data, int tp_len,
                __be16 proto, unsigned char *addr, int hlen, int copylen,
@@ -2726,7 +2669,8 @@ tpacket_error:
                        }
                }
 
-               if (po->has_vnet_hdr && packet_snd_vnet_gso(skb, vnet_hdr)) {
+               if (po->has_vnet_hdr && virtio_net_hdr_to_skb(skb, vnet_hdr,
+                                                             vio_le())) {
                        tp_len = -EINVAL;
                        goto tpacket_error;
                }
@@ -2917,7 +2861,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
        packet_pick_tx_queue(dev, skb);
 
        if (po->has_vnet_hdr) {
-               err = packet_snd_vnet_gso(skb, &vnet_hdr);
+               err = virtio_net_hdr_to_skb(skb, &vnet_hdr, vio_le());
                if (err)
                        goto out_free;
                len += sizeof(vnet_hdr);
index 850a86cde0b3f6eab5b7aa09f4e6ffa66ccd6ed6..8bad5624a27a9ffdcbf193c4c2f078b4b648b044 100644 (file)
@@ -1167,7 +1167,7 @@ disabled:
        /* Wait until flow control allows TX */
        done = atomic_read(&pn->tx_credits);
        while (!done) {
-               DEFINE_WAIT(wait);
+               DEFINE_WAIT_FUNC(wait, woken_wake_function);
 
                if (!timeo) {
                        err = -EAGAIN;
@@ -1178,10 +1178,9 @@ disabled:
                        goto out;
                }
 
-               prepare_to_wait(sk_sleep(sk), &wait,
-                               TASK_INTERRUPTIBLE);
-               done = sk_wait_event(sk, &timeo, atomic_read(&pn->tx_credits));
-               finish_wait(sk_sleep(sk), &wait);
+               add_wait_queue(sk_sleep(sk), &wait);
+               done = sk_wait_event(sk, &timeo, atomic_read(&pn->tx_credits), &wait);
+               remove_wait_queue(sk_sleep(sk), &wait);
 
                if (sk->sk_state != TCP_ESTABLISHED)
                        goto disabled;
index a58680016472b88010c30a5f1708c1f8a046280f..2cb4c5dfad6f76310275b90a0da4cddc2eb92fe1 100644 (file)
@@ -44,7 +44,7 @@ struct phonet_net {
        struct phonet_routes routes;
 };
 
-static int phonet_net_id __read_mostly;
+static unsigned int phonet_net_id __read_mostly;
 
 static struct phonet_net *phonet_pernet(struct net *net)
 {
index 0e72bec1529f52116a1aa8a2a4512d903ce03a4b..56c7d27eefee759be0c4dab0f939c84df9c49560 100644 (file)
@@ -13,5 +13,5 @@ obj-$(CONFIG_RDS_TCP) += rds_tcp.o
 rds_tcp-y :=           tcp.o tcp_connect.o tcp_listen.o tcp_recv.o \
                        tcp_send.o tcp_stats.o
 
-ccflags-$(CONFIG_RDS_DEBUG)    :=      -DDEBUG
+ccflags-$(CONFIG_RDS_DEBUG)    :=      -DRDS_DEBUG
 
index 6beaeb1138f34a82f0d0a70f86dc5caba4d0ac5d..2ac1e6194be35fced1706ebf5351bedb7cf01411 100644 (file)
@@ -605,10 +605,14 @@ static void rds_exit(void)
 }
 module_exit(rds_exit);
 
+u32 rds_gen_num;
+
 static int rds_init(void)
 {
        int ret;
 
+       net_get_random_once(&rds_gen_num, sizeof(rds_gen_num));
+
        ret = rds_bind_lock_init();
        if (ret)
                goto out;
index 13f459dad4ef3c7d34bd1ea38a699e49c25f4534..fe9d31c0b22d40ef8b45fd0f17cccbcfd893de16 100644 (file)
@@ -269,6 +269,8 @@ static struct rds_connection *__rds_conn_create(struct net *net,
                        kmem_cache_free(rds_conn_slab, conn);
                        conn = found;
                } else {
+                       conn->c_my_gen_num = rds_gen_num;
+                       conn->c_peer_gen_num = 0;
                        hlist_add_head_rcu(&conn->c_hash_node, head);
                        rds_cong_add_conn(conn);
                        rds_conn_count++;
@@ -681,6 +683,7 @@ void rds_conn_path_connect_if_down(struct rds_conn_path *cp)
            !test_and_set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags))
                queue_delayed_work(rds_wq, &cp->cp_conn_w, 0);
 }
+EXPORT_SYMBOL_GPL(rds_conn_path_connect_if_down);
 
 void rds_conn_connect_if_down(struct rds_connection *conn)
 {
index 6cb91061556a369a96a65151bea3647c8fcc92d9..49bfb512d808d9f3159a031cf375767ef755d73a 100644 (file)
@@ -42,6 +42,7 @@ static unsigned int   rds_exthdr_size[__RDS_EXTHDR_MAX] = {
 [RDS_EXTHDR_RDMA]      = sizeof(struct rds_ext_header_rdma),
 [RDS_EXTHDR_RDMA_DEST] = sizeof(struct rds_ext_header_rdma_dest),
 [RDS_EXTHDR_NPATHS]    = sizeof(u16),
+[RDS_EXTHDR_GEN_NUM]   = sizeof(u32),
 };
 
 
index 25532a46602f063065083509c55fb991243ba377..ebbf909b87ec3f62abec2573dcd55f4054138848 100644 (file)
@@ -33,7 +33,7 @@
 #define KERNEL_HAS_ATOMIC64
 #endif
 
-#ifdef DEBUG
+#ifdef RDS_DEBUG
 #define rdsdebug(fmt, args...) pr_debug("%s(): " fmt, __func__ , ##args)
 #else
 /* sigh, pr_debug() causes unused variable warnings */
@@ -151,6 +151,9 @@ struct rds_connection {
 
        struct rds_conn_path    c_path[RDS_MPATH_WORKERS];
        wait_queue_head_t       c_hs_waitq; /* handshake waitq */
+
+       u32                     c_my_gen_num;
+       u32                     c_peer_gen_num;
 };
 
 static inline
@@ -243,7 +246,8 @@ struct rds_ext_header_rdma_dest {
 /* Extension header announcing number of paths.
  * Implicit length = 2 bytes.
  */
-#define RDS_EXTHDR_NPATHS      4
+#define RDS_EXTHDR_NPATHS      5
+#define RDS_EXTHDR_GEN_NUM     6
 
 #define __RDS_EXTHDR_MAX       16 /* for now */
 
@@ -338,6 +342,7 @@ static inline u32 rds_rdma_cookie_offset(rds_rdma_cookie_t cookie)
 #define RDS_MSG_RETRANSMITTED  5
 #define RDS_MSG_MAPPED         6
 #define RDS_MSG_PAGEVEC                7
+#define RDS_MSG_FLUSH          8
 
 struct rds_message {
        atomic_t                m_refcount;
@@ -664,6 +669,7 @@ void rds_cong_exit(void);
 struct rds_message *rds_cong_update_alloc(struct rds_connection *conn);
 
 /* conn.c */
+extern u32 rds_gen_num;
 int rds_conn_init(void);
 void rds_conn_exit(void);
 struct rds_connection *rds_conn_create(struct net *net,
index cbfabdf3ff481c6b664bd06c53f22d26e65f1416..9d0666e5fe35db4215ef9c7aae9903d73f1fc5b2 100644 (file)
@@ -120,6 +120,36 @@ static void rds_recv_rcvbuf_delta(struct rds_sock *rs, struct sock *sk,
        /* do nothing if no change in cong state */
 }
 
+static void rds_conn_peer_gen_update(struct rds_connection *conn,
+                                    u32 peer_gen_num)
+{
+       int i;
+       struct rds_message *rm, *tmp;
+       unsigned long flags;
+
+       WARN_ON(conn->c_trans->t_type != RDS_TRANS_TCP);
+       if (peer_gen_num != 0) {
+               if (conn->c_peer_gen_num != 0 &&
+                   peer_gen_num != conn->c_peer_gen_num) {
+                       for (i = 0; i < RDS_MPATH_WORKERS; i++) {
+                               struct rds_conn_path *cp;
+
+                               cp = &conn->c_path[i];
+                               spin_lock_irqsave(&cp->cp_lock, flags);
+                               cp->cp_next_tx_seq = 1;
+                               cp->cp_next_rx_seq = 0;
+                               list_for_each_entry_safe(rm, tmp,
+                                                        &cp->cp_retrans,
+                                                        m_conn_item) {
+                                       set_bit(RDS_MSG_FLUSH, &rm->m_flags);
+                               }
+                               spin_unlock_irqrestore(&cp->cp_lock, flags);
+                       }
+               }
+               conn->c_peer_gen_num = peer_gen_num;
+       }
+}
+
 /*
  * Process all extension headers that come with this message.
  */
@@ -163,7 +193,9 @@ static void rds_recv_hs_exthdrs(struct rds_header *hdr,
        union {
                struct rds_ext_header_version version;
                u16 rds_npaths;
+               u32 rds_gen_num;
        } buffer;
+       u32 new_peer_gen_num = 0;
 
        while (1) {
                len = sizeof(buffer);
@@ -176,6 +208,9 @@ static void rds_recv_hs_exthdrs(struct rds_header *hdr,
                        conn->c_npaths = min_t(int, RDS_MPATH_WORKERS,
                                               buffer.rds_npaths);
                        break;
+               case RDS_EXTHDR_GEN_NUM:
+                       new_peer_gen_num = buffer.rds_gen_num;
+                       break;
                default:
                        pr_warn_ratelimited("ignoring unknown exthdr type "
                                             "0x%x\n", type);
@@ -183,6 +218,7 @@ static void rds_recv_hs_exthdrs(struct rds_header *hdr,
        }
        /* if RDS_EXTHDR_NPATHS was not found, default to a single-path */
        conn->c_npaths = max_t(int, conn->c_npaths, 1);
+       rds_conn_peer_gen_update(conn, new_peer_gen_num);
 }
 
 /* rds_start_mprds() will synchronously start multiple paths when appropriate.
index 896626b9a0efde321d64b7f2eef3b3e0200b872f..77c8c6e613adf65057d3696806024fdf2cb15247 100644 (file)
@@ -259,8 +259,9 @@ restart:
                         * connection.
                         * Therefore, we never retransmit messages with RDMA ops.
                         */
-                       if (rm->rdma.op_active &&
-                           test_bit(RDS_MSG_RETRANSMITTED, &rm->m_flags)) {
+                       if (test_bit(RDS_MSG_FLUSH, &rm->m_flags) ||
+                           (rm->rdma.op_active &&
+                           test_bit(RDS_MSG_RETRANSMITTED, &rm->m_flags))) {
                                spin_lock_irqsave(&cp->cp_lock, flags);
                                if (test_and_clear_bit(RDS_MSG_ON_CONN, &rm->m_flags))
                                        list_move(&rm->m_conn_item, &to_be_dropped);
@@ -1209,6 +1210,10 @@ rds_send_probe(struct rds_conn_path *cp, __be16 sport,
                rds_message_add_extension(&rm->m_inc.i_hdr,
                                          RDS_EXTHDR_NPATHS, &npaths,
                                          sizeof(npaths));
+               rds_message_add_extension(&rm->m_inc.i_hdr,
+                                         RDS_EXTHDR_GEN_NUM,
+                                         &cp->cp_conn->c_my_gen_num,
+                                         sizeof(u32));
        }
        spin_unlock_irqrestore(&cp->cp_lock, flags);
 
index fcddacc92e018bee041f250cde5e48ba8ea70c6a..1a0399dea7640beb3acd5fad7b17ac291e13a112 100644 (file)
@@ -220,7 +220,7 @@ void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp)
        write_unlock_bh(&sock->sk->sk_callback_lock);
 }
 
-static void rds_tcp_tc_info(struct socket *sock, unsigned int len,
+static void rds_tcp_tc_info(struct socket *rds_sock, unsigned int len,
                            struct rds_info_iterator *iter,
                            struct rds_info_lengths *lens)
 {
@@ -229,6 +229,7 @@ static void rds_tcp_tc_info(struct socket *sock, unsigned int len,
        unsigned long flags;
        struct sockaddr_in sin;
        int sinlen;
+       struct socket *sock;
 
        spin_lock_irqsave(&rds_tcp_tc_list_lock, flags);
 
@@ -237,12 +238,17 @@ static void rds_tcp_tc_info(struct socket *sock, unsigned int len,
 
        list_for_each_entry(tc, &rds_tcp_tc_list, t_list_item) {
 
-               sock->ops->getname(sock, (struct sockaddr *)&sin, &sinlen, 0);
-               tsinfo.local_addr = sin.sin_addr.s_addr;
-               tsinfo.local_port = sin.sin_port;
-               sock->ops->getname(sock, (struct sockaddr *)&sin, &sinlen, 1);
-               tsinfo.peer_addr = sin.sin_addr.s_addr;
-               tsinfo.peer_port = sin.sin_port;
+               sock = tc->t_sock;
+               if (sock) {
+                       sock->ops->getname(sock, (struct sockaddr *)&sin,
+                                          &sinlen, 0);
+                       tsinfo.local_addr = sin.sin_addr.s_addr;
+                       tsinfo.local_port = sin.sin_port;
+                       sock->ops->getname(sock, (struct sockaddr *)&sin,
+                                          &sinlen, 1);
+                       tsinfo.peer_addr = sin.sin_addr.s_addr;
+                       tsinfo.peer_port = sin.sin_port;
+               }
 
                tsinfo.hdr_rem = tc->t_tinc_hdr_rem;
                tsinfo.data_rem = tc->t_tinc_data_rem;
@@ -360,7 +366,7 @@ struct rds_transport rds_tcp_transport = {
        .t_mp_capable           = 1,
 };
 
-static int rds_tcp_netid;
+static unsigned int rds_tcp_netid;
 
 /* per-network namespace private data for this module */
 struct rds_tcp_net {
index 05f61c533ed333cd2417c38c1eafc65c04a2f834..d6839d96d539c250159e034208c7b7e875182bed 100644 (file)
@@ -60,7 +60,19 @@ void rds_tcp_state_change(struct sock *sk)
        case TCP_SYN_RECV:
                break;
        case TCP_ESTABLISHED:
-               rds_connect_path_complete(cp, RDS_CONN_CONNECTING);
+               /* Force the peer to reconnect so that we have the
+                * TCP ports going from <smaller-ip>.<transient> to
+                * <larger-ip>.<RDS_TCP_PORT>. We avoid marking the
+                * RDS connection as RDS_CONN_UP until the reconnect,
+                * to avoid RDS datagram loss.
+                */
+               if (cp->cp_conn->c_laddr > cp->cp_conn->c_faddr &&
+                   rds_conn_path_transition(cp, RDS_CONN_CONNECTING,
+                                            RDS_CONN_ERROR)) {
+                       rds_conn_path_drop(cp);
+               } else {
+                       rds_connect_path_complete(cp, RDS_CONN_CONNECTING);
+               }
                break;
        case TCP_CLOSE_WAIT:
        case TCP_CLOSE:
index e0b23fb5b8d50328b40475529c39b0e107183cda..f74bab3ecdca69b0b59e18341a15ab8fe095b16c 100644 (file)
@@ -83,27 +83,22 @@ struct rds_tcp_connection *rds_tcp_accept_one_path(struct rds_connection *conn)
 {
        int i;
        bool peer_is_smaller = (conn->c_faddr < conn->c_laddr);
-       int npaths = conn->c_npaths;
-
-       if (npaths <= 1) {
-               struct rds_conn_path *cp = &conn->c_path[0];
-               int ret;
-
-               ret = rds_conn_path_transition(cp, RDS_CONN_DOWN,
-                                              RDS_CONN_CONNECTING);
-               if (!ret)
-                       rds_conn_path_transition(cp, RDS_CONN_ERROR,
-                                                RDS_CONN_CONNECTING);
-               return cp->cp_transport_data;
-       }
+       int npaths = max_t(int, 1, conn->c_npaths);
 
-       /* for mprds, paths with cp_index > 0 MUST be initiated by the peer
+       /* for mprds, all paths MUST be initiated by the peer
         * with the smaller address.
         */
-       if (!peer_is_smaller)
+       if (!peer_is_smaller) {
+               /* Make sure we initiate at least one path if this
+                * has not already been done; rds_start_mprds() will
+                * take care of additional paths, if necessary.
+                */
+               if (npaths == 1)
+                       rds_conn_path_connect_if_down(&conn->c_path[0]);
                return NULL;
+       }
 
-       for (i = 1; i < npaths; i++) {
+       for (i = 0; i < npaths; i++) {
                struct rds_conn_path *cp = &conn->c_path[i];
 
                if (rds_conn_path_transition(cp, RDS_CONN_DOWN,
@@ -171,8 +166,8 @@ int rds_tcp_accept_one(struct socket *sock)
        mutex_lock(&rs_tcp->t_conn_path_lock);
        cp = rs_tcp->t_cpath;
        conn_state = rds_conn_path_state(cp);
-       if (conn_state != RDS_CONN_CONNECTING && conn_state != RDS_CONN_UP &&
-           conn_state != RDS_CONN_ERROR)
+       WARN_ON(conn_state == RDS_CONN_UP);
+       if (conn_state != RDS_CONN_CONNECTING && conn_state != RDS_CONN_ERROR)
                goto rst_nsk;
        if (rs_tcp->t_sock) {
                /* Need to resolve a duelling SYN between peers.
index 89d09b481f47f4b790ce5ebd2c456ae0e9549b6b..dcf4742083eab360ded7fcaddad8608b2526aa29 100644 (file)
@@ -100,6 +100,9 @@ int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm,
                set_bit(RDS_MSG_HAS_ACK_SEQ, &rm->m_flags);
                tc->t_last_expected_una = rm->m_ack_seq + 1;
 
+               if (test_bit(RDS_MSG_RETRANSMITTED, &rm->m_flags))
+                       rm->m_inc.i_hdr.h_flags |= RDS_FLAG_RETRANSMITTED;
+
                rdsdebug("rm %p tcp nxt %u ack_seq %llu\n",
                         rm, rds_tcp_snd_nxt(tc),
                         (unsigned long long)rm->m_ack_seq);
index 4353a29f3b5717d7ff67ae954e8cc1551b614160..1ed18d8c9c9fa31ac46028089184519624625a51 100644 (file)
@@ -276,7 +276,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
                goto error;
 
        trace_rxrpc_call(call, rxrpc_call_connected, atomic_read(&call->usage),
-                        here, ERR_PTR(ret));
+                        here, NULL);
 
        spin_lock_bh(&call->conn->params.peer->lock);
        hlist_add_head(&call->error_link,
index 44fb8d893c7d2c4227c09eb8badfca8e36bc92fe..1d87b5453ef7802a7f5ca6e7c2cbcff3be31159c 100644 (file)
@@ -1053,7 +1053,7 @@ void rxrpc_data_ready(struct sock *udp_sk)
 
        ASSERT(!irqs_disabled());
 
-       skb = skb_recv_datagram(udp_sk, 0, 1, &ret);
+       skb = skb_recv_udp(udp_sk, 0, 1, &ret);
        if (!skb) {
                if (ret == -EAGAIN)
                        return;
@@ -1075,10 +1075,9 @@ void rxrpc_data_ready(struct sock *udp_sk)
 
        __UDP_INC_STATS(&init_net, UDP_MIB_INDATAGRAMS, 0);
 
-       /* The socket buffer we have is owned by UDP, with UDP's data all over
-        * it, but we really want our own data there.
+       /* The UDP protocol already released all skb resources;
+        * we are free to add our own data there.
         */
-       skb_orphan(skb);
        sp = rxrpc_skb(skb);
 
        /* dig out the RxRPC connection details */
index 941b724d523bf282e5f8e6901a882c0d12effa0b..862eea6b266c95f7aaaba96d2a07b8f875f37167 100644 (file)
@@ -193,8 +193,8 @@ static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer)
                fl6->fl6_dport = htons(7001);
                fl6->fl6_sport = htons(7000);
                dst = ip6_route_output(&init_net, NULL, fl6);
-               if (IS_ERR(dst)) {
-                       _leave(" [route err %ld]", PTR_ERR(dst));
+               if (dst->error) {
+                       _leave(" [route err %d]", dst->error);
                        return;
                }
                break;
index a512b18c0088506bc577d8b3a1113871206c6d47..f893d180da1caa3b6dd1cc8773920beb1885f9b0 100644 (file)
@@ -1028,8 +1028,7 @@ static struct nlattr *find_dump_kind(const struct nlmsghdr *n)
 
        if (tb[1] == NULL)
                return NULL;
-       if (nla_parse(tb2, TCA_ACT_MAX, nla_data(tb[1]),
-                     nla_len(tb[1]), NULL) < 0)
+       if (nla_parse_nested(tb2, TCA_ACT_MAX, tb[1], NULL) < 0)
                return NULL;
        kind = tb2[TCA_ACT_KIND];
 
index 1d3960033f61d0f72d5039ba31ff1c8dc91d7cba..84c1d2da4f8b0576fde83e66ab0664d4e9d4e557 100644 (file)
@@ -28,12 +28,11 @@ struct tcf_bpf_cfg {
        struct bpf_prog *filter;
        struct sock_filter *bpf_ops;
        const char *bpf_name;
-       u32 bpf_fd;
        u16 bpf_num_ops;
        bool is_ebpf;
 };
 
-static int bpf_net_id;
+static unsigned int bpf_net_id;
 static struct tc_action_ops act_bpf_ops;
 
 static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
@@ -118,9 +117,6 @@ static int tcf_bpf_dump_bpf_info(const struct tcf_bpf *prog,
 static int tcf_bpf_dump_ebpf_info(const struct tcf_bpf *prog,
                                  struct sk_buff *skb)
 {
-       if (nla_put_u32(skb, TCA_ACT_BPF_FD, prog->bpf_fd))
-               return -EMSGSIZE;
-
        if (prog->bpf_name &&
            nla_put_string(skb, TCA_ACT_BPF_NAME, prog->bpf_name))
                return -EMSGSIZE;
@@ -226,16 +222,13 @@ static int tcf_bpf_init_from_efd(struct nlattr **tb, struct tcf_bpf_cfg *cfg)
                return PTR_ERR(fp);
 
        if (tb[TCA_ACT_BPF_NAME]) {
-               name = kmemdup(nla_data(tb[TCA_ACT_BPF_NAME]),
-                              nla_len(tb[TCA_ACT_BPF_NAME]),
-                              GFP_KERNEL);
+               name = nla_memdup(tb[TCA_ACT_BPF_NAME], GFP_KERNEL);
                if (!name) {
                        bpf_prog_put(fp);
                        return -ENOMEM;
                }
        }
 
-       cfg->bpf_fd = bpf_fd;
        cfg->bpf_name = name;
        cfg->filter = fp;
        cfg->is_ebpf = true;
@@ -334,8 +327,6 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
 
        if (cfg.bpf_num_ops)
                prog->bpf_num_ops = cfg.bpf_num_ops;
-       if (cfg.bpf_fd)
-               prog->bpf_fd = cfg.bpf_fd;
 
        prog->tcf_action = parm->action;
        rcu_assign_pointer(prog->filter, cfg.filter);
index eae07a2e774d81ecfbbe444ae0f33385b6adb7a0..ab80629099622c47933efb36662a323f98f66773 100644 (file)
@@ -30,7 +30,7 @@
 
 #define CONNMARK_TAB_MASK     3
 
-static int connmark_net_id;
+static unsigned int connmark_net_id;
 static struct tc_action_ops act_connmark_ops;
 
 static int tcf_connmark(struct sk_buff *skb, const struct tc_action *a,
index e0defcef376d872c123b7d23f81a3b1eed207c02..a0edd80a44db4ad09862ad8335340180839e83d3 100644 (file)
@@ -42,7 +42,7 @@ static const struct nla_policy csum_policy[TCA_CSUM_MAX + 1] = {
        [TCA_CSUM_PARMS] = { .len = sizeof(struct tc_csum), },
 };
 
-static int csum_net_id;
+static unsigned int csum_net_id;
 static struct tc_action_ops act_csum_ops;
 
 static int tcf_csum_init(struct net *net, struct nlattr *nla,
index e0aa30f83c6ccdb38864fbc6a3bdbb529d9da816..e6c874a2b283f6e265b61a39f1fcd053389b9051 100644 (file)
@@ -25,7 +25,7 @@
 
 #define GACT_TAB_MASK  15
 
-static int gact_net_id;
+static unsigned int gact_net_id;
 static struct tc_action_ops act_gact_ops;
 
 #ifdef CONFIG_GACT_PROB
index 95c463cbb9a6921b63fde46e040024fe4afdbd8a..80b848d3f0964b4a9ff8c726df5731dc39631d19 100644 (file)
@@ -35,7 +35,7 @@
 
 #define IFE_TAB_MASK 15
 
-static int ife_net_id;
+static unsigned int ife_net_id;
 static int max_metacnt = IFE_META_MAX + 1;
 static struct tc_action_ops act_ife_ops;
 
index 378c1c9760583818e8ea93a3e24d13bd007b0c3d..992ef8d624f11819e032411e72ea60aa6aa93ba1 100644 (file)
 
 #define IPT_TAB_MASK     15
 
-static int ipt_net_id;
+static unsigned int ipt_net_id;
 static struct tc_action_ops act_ipt_ops;
 
-static int xt_net_id;
+static unsigned int xt_net_id;
 static struct tc_action_ops act_xt_ops;
 
 static int ipt_init_target(struct xt_entry_target *t, char *table,
@@ -213,6 +213,12 @@ static int tcf_ipt(struct sk_buff *skb, const struct tc_action *a,
        int ret = 0, result = 0;
        struct tcf_ipt *ipt = to_ipt(a);
        struct xt_action_param par;
+       struct nf_hook_state state = {
+               .net    = dev_net(skb->dev),
+               .in     = skb->dev,
+               .hook   = ipt->tcfi_hook,
+               .pf     = NFPROTO_IPV4,
+       };
 
        if (skb_unclone(skb, GFP_ATOMIC))
                return TC_ACT_UNSPEC;
@@ -226,13 +232,9 @@ static int tcf_ipt(struct sk_buff *skb, const struct tc_action *a,
         * worry later - danger - this API seems to have changed
         * from earlier kernels
         */
-       par.net      = dev_net(skb->dev);
-       par.in       = skb->dev;
-       par.out      = NULL;
-       par.hooknum  = ipt->tcfi_hook;
+       par.state    = &state;
        par.target   = ipt->tcfi_t->u.kernel.target;
        par.targinfo = ipt->tcfi_t->data;
-       par.family   = NFPROTO_IPV4;
        ret = par.target->target(skb, &par);
 
        switch (ret) {
index 2d93be6717e583042e9b0de50d82ea83568b81f3..bb09ba3ca5c29bc282d19d928ed3f12fb8f0d5c1 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/gfp.h>
+#include <linux/if_arp.h>
 #include <net/net_namespace.h>
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
@@ -70,23 +71,9 @@ static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = {
        [TCA_MIRRED_PARMS]      = { .len = sizeof(struct tc_mirred) },
 };
 
-static int mirred_net_id;
+static unsigned int mirred_net_id;
 static struct tc_action_ops act_mirred_ops;
 
-static bool dev_is_mac_header_xmit(const struct net_device *dev)
-{
-       switch (dev->type) {
-       case ARPHRD_TUNNEL:
-       case ARPHRD_TUNNEL6:
-       case ARPHRD_SIT:
-       case ARPHRD_IPGRE:
-       case ARPHRD_VOID:
-       case ARPHRD_NONE:
-               return false;
-       }
-       return true;
-}
-
 static int tcf_mirred_init(struct net *net, struct nlattr *nla,
                           struct nlattr *est, struct tc_action **a, int ovr,
                           int bind)
@@ -249,8 +236,11 @@ out:
 static void tcf_stats_update(struct tc_action *a, u64 bytes, u32 packets,
                             u64 lastuse)
 {
-       tcf_lastuse_update(&a->tcfa_tm);
+       struct tcf_mirred *m = to_mirred(a);
+       struct tcf_t *tm = &m->tcf_tm;
+
        _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
+       tm->lastuse = lastuse;
 }
 
 static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind,
@@ -325,6 +315,17 @@ static struct notifier_block mirred_device_notifier = {
        .notifier_call = mirred_device_event,
 };
 
+static int tcf_mirred_device(const struct tc_action *a, struct net *net,
+                            struct net_device **mirred_dev)
+{
+       int ifindex = tcf_mirred_ifindex(a);
+
+       *mirred_dev = __dev_get_by_index(net, ifindex);
+       if (!mirred_dev)
+               return -EINVAL;
+       return 0;
+}
+
 static struct tc_action_ops act_mirred_ops = {
        .kind           =       "mirred",
        .type           =       TCA_ACT_MIRRED,
@@ -337,6 +338,7 @@ static struct tc_action_ops act_mirred_ops = {
        .walk           =       tcf_mirred_walker,
        .lookup         =       tcf_mirred_search,
        .size           =       sizeof(struct tcf_mirred),
+       .get_dev        =       tcf_mirred_device,
 };
 
 static __net_init int mirred_init_net(struct net *net)
index 8e8b0cc30704e7bbf69975932d4678f2e8caf9a1..9b6aec665495992fd6d63773890b5e897ba51819 100644 (file)
@@ -31,7 +31,7 @@
 
 #define NAT_TAB_MASK   15
 
-static int nat_net_id;
+static unsigned int nat_net_id;
 static struct tc_action_ops act_nat_ops;
 
 static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = {
index b54d56d4959b6381d35799ba84f64ea634384cce..eda322045e75e8ff2c56cea89df412e93b85c9f5 100644 (file)
@@ -25,7 +25,7 @@
 
 #define PEDIT_TAB_MASK 15
 
-static int pedit_net_id;
+static unsigned int pedit_net_id;
 static struct tc_action_ops act_pedit_ops;
 
 static const struct nla_policy pedit_policy[TCA_PEDIT_MAX + 1] = {
index d1bd248fe1461380a473baa5cb41e64771ab2bd0..c990b73a6c85151862c30971bd3787d20dbcecd1 100644 (file)
@@ -55,7 +55,7 @@ struct tc_police_compat {
 
 /* Each policer is serialized by its individual spinlock */
 
-static int police_net_id;
+static unsigned int police_net_id;
 static struct tc_action_ops act_police_ops;
 
 static int tcf_act_police_walker(struct net *net, struct sk_buff *skb,
index 289af6f9bb3b2b0712514d6e533d351cce288062..823a73ad0c602b8e079be04934f8a57d53480da1 100644 (file)
@@ -26,7 +26,7 @@
 
 #define SIMP_TAB_MASK     7
 
-static int simp_net_id;
+static unsigned int simp_net_id;
 static struct tc_action_ops act_simp_ops;
 
 #define SIMP_MAX_DATA  32
index a133dcb821323c45e03360b26977f292c5d50d09..06ccae3c12eecf85383105489320537d52c4d948 100644 (file)
@@ -29,7 +29,7 @@
 
 #define SKBEDIT_TAB_MASK     15
 
-static int skbedit_net_id;
+static unsigned int skbedit_net_id;
 static struct tc_action_ops act_skbedit_ops;
 
 static int tcf_skbedit(struct sk_buff *skb, const struct tc_action *a,
@@ -46,8 +46,10 @@ static int tcf_skbedit(struct sk_buff *skb, const struct tc_action *a,
        if (d->flags & SKBEDIT_F_QUEUE_MAPPING &&
            skb->dev->real_num_tx_queues > d->queue_mapping)
                skb_set_queue_mapping(skb, d->queue_mapping);
-       if (d->flags & SKBEDIT_F_MARK)
-               skb->mark = d->mark;
+       if (d->flags & SKBEDIT_F_MARK) {
+               skb->mark &= ~d->mask;
+               skb->mark |= d->mark & d->mask;
+       }
        if (d->flags & SKBEDIT_F_PTYPE)
                skb->pkt_type = d->ptype;
 
@@ -61,6 +63,7 @@ static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = {
        [TCA_SKBEDIT_QUEUE_MAPPING]     = { .len = sizeof(u16) },
        [TCA_SKBEDIT_MARK]              = { .len = sizeof(u32) },
        [TCA_SKBEDIT_PTYPE]             = { .len = sizeof(u16) },
+       [TCA_SKBEDIT_MASK]              = { .len = sizeof(u32) },
 };
 
 static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
@@ -71,7 +74,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
        struct nlattr *tb[TCA_SKBEDIT_MAX + 1];
        struct tc_skbedit *parm;
        struct tcf_skbedit *d;
-       u32 flags = 0, *priority = NULL, *mark = NULL;
+       u32 flags = 0, *priority = NULL, *mark = NULL, *mask = NULL;
        u16 *queue_mapping = NULL, *ptype = NULL;
        bool exists = false;
        int ret = 0, err;
@@ -108,6 +111,11 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
                mark = nla_data(tb[TCA_SKBEDIT_MARK]);
        }
 
+       if (tb[TCA_SKBEDIT_MASK] != NULL) {
+               flags |= SKBEDIT_F_MASK;
+               mask = nla_data(tb[TCA_SKBEDIT_MASK]);
+       }
+
        parm = nla_data(tb[TCA_SKBEDIT_PARMS]);
 
        exists = tcf_hash_check(tn, parm->index, a, bind);
@@ -145,6 +153,10 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
                d->mark = *mark;
        if (flags & SKBEDIT_F_PTYPE)
                d->ptype = *ptype;
+       /* default behaviour is to use all the bits */
+       d->mask = 0xffffffff;
+       if (flags & SKBEDIT_F_MASK)
+               d->mask = *mask;
 
        d->tcf_action = parm->action;
 
@@ -182,6 +194,9 @@ static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
        if ((d->flags & SKBEDIT_F_PTYPE) &&
            nla_put_u16(skb, TCA_SKBEDIT_PTYPE, d->ptype))
                goto nla_put_failure;
+       if ((d->flags & SKBEDIT_F_MASK) &&
+           nla_put_u32(skb, TCA_SKBEDIT_MASK, d->mask))
+               goto nla_put_failure;
 
        tcf_tm_dump(&t, &d->tcf_tm);
        if (nla_put_64bit(skb, TCA_SKBEDIT_TM, sizeof(t), &t, TCA_SKBEDIT_PAD))
index e7d96381c908ebbd328124c4cf59221ff2c18695..3b7074e2302487808dc1d16b01143d0b292ebe4e 100644 (file)
@@ -22,7 +22,7 @@
 
 #define SKBMOD_TAB_MASK     15
 
-static int skbmod_net_id;
+static unsigned int skbmod_net_id;
 static struct tc_action_ops act_skbmod_ops;
 
 #define MAX_EDIT_LEN ETH_HLEN
index af47bdf2f4831541fce2e2adc3d7b14eab9bcf19..7af712526f01eaa9ed77f33ff6030a2fe7253713 100644 (file)
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
 #include <net/dst.h>
-#include <net/dst_metadata.h>
 
 #include <linux/tc_act/tc_tunnel_key.h>
 #include <net/tc_act/tc_tunnel_key.h>
 
 #define TUNNEL_KEY_TAB_MASK     15
 
-static int tunnel_key_net_id;
+static unsigned int tunnel_key_net_id;
 static struct tc_action_ops act_tunnel_key_ops;
 
 static int tunnel_key_act(struct sk_buff *skb, const struct tc_action *a,
@@ -67,6 +66,7 @@ static const struct nla_policy tunnel_key_policy[TCA_TUNNEL_KEY_MAX + 1] = {
        [TCA_TUNNEL_KEY_ENC_IPV6_SRC] = { .len = sizeof(struct in6_addr) },
        [TCA_TUNNEL_KEY_ENC_IPV6_DST] = { .len = sizeof(struct in6_addr) },
        [TCA_TUNNEL_KEY_ENC_KEY_ID]   = { .type = NLA_U32 },
+       [TCA_TUNNEL_KEY_ENC_DST_PORT] = {.type = NLA_U16},
 };
 
 static int tunnel_key_init(struct net *net, struct nlattr *nla,
@@ -81,6 +81,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
        struct tc_tunnel_key *parm;
        struct tcf_tunnel_key *t;
        bool exists = false;
+       __be16 dst_port = 0;
        __be64 key_id;
        int ret = 0;
        int err;
@@ -111,6 +112,9 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
 
                key_id = key32_to_tunnel_id(nla_get_be32(tb[TCA_TUNNEL_KEY_ENC_KEY_ID]));
 
+               if (tb[TCA_TUNNEL_KEY_ENC_DST_PORT])
+                       dst_port = nla_get_be16(tb[TCA_TUNNEL_KEY_ENC_DST_PORT]);
+
                if (tb[TCA_TUNNEL_KEY_ENC_IPV4_SRC] &&
                    tb[TCA_TUNNEL_KEY_ENC_IPV4_DST]) {
                        __be32 saddr;
@@ -120,7 +124,8 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
                        daddr = nla_get_in_addr(tb[TCA_TUNNEL_KEY_ENC_IPV4_DST]);
 
                        metadata = __ip_tun_set_dst(saddr, daddr, 0, 0,
-                                                   TUNNEL_KEY, key_id, 0);
+                                                   dst_port, TUNNEL_KEY,
+                                                   key_id, 0);
                } else if (tb[TCA_TUNNEL_KEY_ENC_IPV6_SRC] &&
                           tb[TCA_TUNNEL_KEY_ENC_IPV6_DST]) {
                        struct in6_addr saddr;
@@ -130,7 +135,8 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
                        daddr = nla_get_in6_addr(tb[TCA_TUNNEL_KEY_ENC_IPV6_DST]);
 
                        metadata = __ipv6_tun_set_dst(&saddr, &daddr, 0, 0, 0,
-                                                     TUNNEL_KEY, key_id, 0);
+                                                     dst_port, TUNNEL_KEY,
+                                                     key_id, 0);
                }
 
                if (!metadata) {
@@ -258,7 +264,8 @@ static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a,
 
                if (nla_put_be32(skb, TCA_TUNNEL_KEY_ENC_KEY_ID, key_id) ||
                    tunnel_key_dump_addresses(skb,
-                                             &params->tcft_enc_metadata->u.tun_info))
+                                             &params->tcft_enc_metadata->u.tun_info) ||
+                   nla_put_be16(skb, TCA_TUNNEL_KEY_ENC_DST_PORT, key->tp_dst))
                        goto nla_put_failure;
        }
 
index b57fcbcefea1d2ae4750c8f6dc2156f5d554f8f1..19e0dba305ce8101d9db33ff0a1eaab849a730d0 100644 (file)
@@ -21,7 +21,7 @@
 
 #define VLAN_TAB_MASK     15
 
-static int vlan_net_id;
+static unsigned int vlan_net_id;
 static struct tc_action_ops act_vlan_ops;
 
 static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a,
index 2ee29a3375f6672812e45e12250ec90ac1ed892c..3fbba79a4ef0521604df48275fb7dae8a8b7099e 100644 (file)
@@ -345,7 +345,8 @@ replay:
                        if (err == 0) {
                                struct tcf_proto *next = rtnl_dereference(tp->next);
 
-                               tfilter_notify(net, skb, n, tp, fh,
+                               tfilter_notify(net, skb, n, tp,
+                                              t->tcm_handle,
                                               RTM_DELTFILTER, false);
                                if (tcf_destroy(tp, false))
                                        RCU_INIT_POINTER(*back, next);
@@ -429,7 +430,8 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb,
        if (!skb)
                return -ENOBUFS;
 
-       if (tcf_fill_node(net, skb, tp, fh, portid, n->nlmsg_seq, 0, event) <= 0) {
+       if (tcf_fill_node(net, skb, tp, fh, portid, n->nlmsg_seq,
+                         n->nlmsg_flags, event) <= 0) {
                kfree_skb(skb);
                return -EINVAL;
        }
@@ -680,6 +682,30 @@ int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts)
 }
 EXPORT_SYMBOL(tcf_exts_dump_stats);
 
+int tcf_exts_get_dev(struct net_device *dev, struct tcf_exts *exts,
+                    struct net_device **hw_dev)
+{
+#ifdef CONFIG_NET_CLS_ACT
+       const struct tc_action *a;
+       LIST_HEAD(actions);
+
+       if (tc_no_actions(exts))
+               return -EINVAL;
+
+       tcf_exts_to_list(exts, &actions);
+       list_for_each_entry(a, &actions, list) {
+               if (a->ops->get_dev) {
+                       a->ops->get_dev(a, dev_net(dev), hw_dev);
+                       break;
+               }
+       }
+       if (*hw_dev)
+               return 0;
+#endif
+       return -EOPNOTSUPP;
+}
+EXPORT_SYMBOL(tcf_exts_get_dev);
+
 static int __init tc_filter_init(void)
 {
        rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL, NULL);
index bb1d5a487081f21f80a3042cd424cf7caedf6b37..28cb5fa5af927dbe7237ce77f76d52f52642be4f 100644 (file)
@@ -45,10 +45,7 @@ struct cls_bpf_prog {
        u32 gen_flags;
        struct tcf_exts exts;
        u32 handle;
-       union {
-               u32 bpf_fd;
-               u16 bpf_num_ops;
-       };
+       u16 bpf_num_ops;
        struct sock_filter *bpf_ops;
        const char *bpf_name;
        struct tcf_proto *tp;
@@ -369,9 +366,7 @@ static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog,
                return PTR_ERR(fp);
 
        if (tb[TCA_BPF_NAME]) {
-               name = kmemdup(nla_data(tb[TCA_BPF_NAME]),
-                              nla_len(tb[TCA_BPF_NAME]),
-                              GFP_KERNEL);
+               name = nla_memdup(tb[TCA_BPF_NAME], GFP_KERNEL);
                if (!name) {
                        bpf_prog_put(fp);
                        return -ENOMEM;
@@ -379,7 +374,6 @@ static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog,
        }
 
        prog->bpf_ops = NULL;
-       prog->bpf_fd = bpf_fd;
        prog->bpf_name = name;
        prog->filter = fp;
 
@@ -563,9 +557,6 @@ static int cls_bpf_dump_bpf_info(const struct cls_bpf_prog *prog,
 static int cls_bpf_dump_ebpf_info(const struct cls_bpf_prog *prog,
                                  struct sk_buff *skb)
 {
-       if (nla_put_u32(skb, TCA_BPF_FD, prog->bpf_fd))
-               return -EMSGSIZE;
-
        if (prog->bpf_name &&
            nla_put_string(skb, TCA_BPF_NAME, prog->bpf_name))
                return -EMSGSIZE;
index f6f40fba599bad3d1db4bb14fcc4f7d663d128bc..1cacfa5c95f3246e8038f112c1fd4ed6b86a97bc 100644 (file)
@@ -43,6 +43,7 @@ struct fl_flow_key {
                struct flow_dissector_key_ipv4_addrs enc_ipv4;
                struct flow_dissector_key_ipv6_addrs enc_ipv6;
        };
+       struct flow_dissector_key_ports enc_tp;
 } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
 
 struct fl_flow_mask_range {
@@ -77,6 +78,8 @@ struct cls_fl_filter {
        u32 handle;
        u32 flags;
        struct rcu_head rcu;
+       struct tc_to_netdev tc;
+       struct net_device *hw_dev;
 };
 
 static unsigned short int fl_mask_range(const struct fl_flow_mask *mask)
@@ -155,6 +158,8 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
                }
 
                skb_key.enc_key_id.keyid = tunnel_id_to_key32(key->tun_id);
+               skb_key.enc_tp.src = key->tp_src;
+               skb_key.enc_tp.dst = key->tp_dst;
        }
 
        skb_key.indev_ifindex = skb->skb_iif;
@@ -198,75 +203,87 @@ static void fl_destroy_filter(struct rcu_head *head)
        kfree(f);
 }
 
-static void fl_hw_destroy_filter(struct tcf_proto *tp, unsigned long cookie)
+static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f)
 {
-       struct net_device *dev = tp->q->dev_queue->dev;
        struct tc_cls_flower_offload offload = {0};
-       struct tc_to_netdev tc;
+       struct net_device *dev = f->hw_dev;
+       struct tc_to_netdev *tc = &f->tc;
 
-       if (!tc_should_offload(dev, tp, 0))
+       if (!tc_can_offload(dev, tp))
                return;
 
        offload.command = TC_CLSFLOWER_DESTROY;
-       offload.cookie = cookie;
+       offload.cookie = (unsigned long)f;
 
-       tc.type = TC_SETUP_CLSFLOWER;
-       tc.cls_flower = &offload;
+       tc->type = TC_SETUP_CLSFLOWER;
+       tc->cls_flower = &offload;
 
-       dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &tc);
+       dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, tc);
 }
 
 static int fl_hw_replace_filter(struct tcf_proto *tp,
                                struct flow_dissector *dissector,
                                struct fl_flow_key *mask,
-                               struct fl_flow_key *key,
-                               struct tcf_exts *actions,
-                               unsigned long cookie, u32 flags)
+                               struct cls_fl_filter *f)
 {
        struct net_device *dev = tp->q->dev_queue->dev;
        struct tc_cls_flower_offload offload = {0};
-       struct tc_to_netdev tc;
+       struct tc_to_netdev *tc = &f->tc;
        int err;
 
-       if (!tc_should_offload(dev, tp, flags))
-               return tc_skip_sw(flags) ? -EINVAL : 0;
+       if (!tc_can_offload(dev, tp)) {
+               if (tcf_exts_get_dev(dev, &f->exts, &f->hw_dev))
+                       return tc_skip_sw(f->flags) ? -EINVAL : 0;
+               dev = f->hw_dev;
+               tc->egress_dev = true;
+       } else {
+               f->hw_dev = dev;
+       }
 
        offload.command = TC_CLSFLOWER_REPLACE;
-       offload.cookie = cookie;
+       offload.cookie = (unsigned long)f;
        offload.dissector = dissector;
        offload.mask = mask;
-       offload.key = key;
-       offload.exts = actions;
+       offload.key = &f->key;
+       offload.exts = &f->exts;
 
-       tc.type = TC_SETUP_CLSFLOWER;
-       tc.cls_flower = &offload;
+       tc->type = TC_SETUP_CLSFLOWER;
+       tc->cls_flower = &offload;
 
        err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol,
-                                           &tc);
+                                           tc);
 
-       if (tc_skip_sw(flags))
+       if (tc_skip_sw(f->flags))
                return err;
-
        return 0;
 }
 
 static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f)
 {
-       struct net_device *dev = tp->q->dev_queue->dev;
        struct tc_cls_flower_offload offload = {0};
-       struct tc_to_netdev tc;
+       struct net_device *dev = f->hw_dev;
+       struct tc_to_netdev *tc = &f->tc;
 
-       if (!tc_should_offload(dev, tp, 0))
+       if (!tc_can_offload(dev, tp))
                return;
 
        offload.command = TC_CLSFLOWER_STATS;
        offload.cookie = (unsigned long)f;
        offload.exts = &f->exts;
 
-       tc.type = TC_SETUP_CLSFLOWER;
-       tc.cls_flower = &offload;
+       tc->type = TC_SETUP_CLSFLOWER;
+       tc->cls_flower = &offload;
 
-       dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &tc);
+       dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, tc);
+}
+
+static void __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f)
+{
+       list_del_rcu(&f->list);
+       if (!tc_skip_hw(f->flags))
+               fl_hw_destroy_filter(tp, f);
+       tcf_unbind_filter(tp, &f->res);
+       call_rcu(&f->rcu, fl_destroy_filter);
 }
 
 static bool fl_destroy(struct tcf_proto *tp, bool force)
@@ -277,11 +294,8 @@ static bool fl_destroy(struct tcf_proto *tp, bool force)
        if (!force && !list_empty(&head->filters))
                return false;
 
-       list_for_each_entry_safe(f, next, &head->filters, list) {
-               fl_hw_destroy_filter(tp, (unsigned long)f);
-               list_del_rcu(&f->list);
-               call_rcu(&f->rcu, fl_destroy_filter);
-       }
+       list_for_each_entry_safe(f, next, &head->filters, list)
+               __fl_delete(tp, f);
        RCU_INIT_POINTER(tp->root, NULL);
        if (head->mask_assigned)
                rhashtable_destroy(&head->ht);
@@ -339,6 +353,14 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
        [TCA_FLOWER_KEY_TCP_DST_MASK]   = { .type = NLA_U16 },
        [TCA_FLOWER_KEY_UDP_SRC_MASK]   = { .type = NLA_U16 },
        [TCA_FLOWER_KEY_UDP_DST_MASK]   = { .type = NLA_U16 },
+       [TCA_FLOWER_KEY_SCTP_SRC_MASK]  = { .type = NLA_U16 },
+       [TCA_FLOWER_KEY_SCTP_DST_MASK]  = { .type = NLA_U16 },
+       [TCA_FLOWER_KEY_SCTP_SRC]       = { .type = NLA_U16 },
+       [TCA_FLOWER_KEY_SCTP_DST]       = { .type = NLA_U16 },
+       [TCA_FLOWER_KEY_ENC_UDP_SRC_PORT]       = { .type = NLA_U16 },
+       [TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK]  = { .type = NLA_U16 },
+       [TCA_FLOWER_KEY_ENC_UDP_DST_PORT]       = { .type = NLA_U16 },
+       [TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK]  = { .type = NLA_U16 },
 };
 
 static void fl_set_key_val(struct nlattr **tb,
@@ -448,6 +470,13 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
                fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST,
                               &mask->tp.dst, TCA_FLOWER_KEY_UDP_DST_MASK,
                               sizeof(key->tp.dst));
+       } else if (key->basic.ip_proto == IPPROTO_SCTP) {
+               fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_SCTP_SRC,
+                              &mask->tp.src, TCA_FLOWER_KEY_SCTP_SRC_MASK,
+                              sizeof(key->tp.src));
+               fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_SCTP_DST,
+                              &mask->tp.dst, TCA_FLOWER_KEY_SCTP_DST_MASK,
+                              sizeof(key->tp.dst));
        }
 
        if (tb[TCA_FLOWER_KEY_ENC_IPV4_SRC] ||
@@ -484,6 +513,14 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
                       &mask->enc_key_id.keyid, TCA_FLOWER_UNSPEC,
                       sizeof(key->enc_key_id.keyid));
 
+       fl_set_key_val(tb, &key->enc_tp.src, TCA_FLOWER_KEY_ENC_UDP_SRC_PORT,
+                      &mask->enc_tp.src, TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK,
+                      sizeof(key->enc_tp.src));
+
+       fl_set_key_val(tb, &key->enc_tp.dst, TCA_FLOWER_KEY_ENC_UDP_DST_PORT,
+                      &mask->enc_tp.dst, TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK,
+                      sizeof(key->enc_tp.dst));
+
        return 0;
 }
 
@@ -551,6 +588,18 @@ static void fl_init_dissector(struct cls_fl_head *head,
                             FLOW_DISSECTOR_KEY_PORTS, tp);
        FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
                             FLOW_DISSECTOR_KEY_VLAN, vlan);
+       FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+                            FLOW_DISSECTOR_KEY_ENC_KEYID, enc_key_id);
+       FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+                            FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, enc_ipv4);
+       FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+                            FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, enc_ipv6);
+       if (FL_KEY_IS_MASKED(&mask->key, enc_ipv4) ||
+           FL_KEY_IS_MASKED(&mask->key, enc_ipv6))
+               FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_ENC_CONTROL,
+                          enc_control);
+       FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+                            FLOW_DISSECTOR_KEY_ENC_PORTS, enc_tp);
 
        skb_flow_dissector_init(&head->dissector, keys, cnt);
 }
@@ -700,20 +749,20 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
                        goto errout;
        }
 
-       err = fl_hw_replace_filter(tp,
-                                  &head->dissector,
-                                  &mask.key,
-                                  &fnew->key,
-                                  &fnew->exts,
-                                  (unsigned long)fnew,
-                                  fnew->flags);
-       if (err)
-               goto errout;
+       if (!tc_skip_hw(fnew->flags)) {
+               err = fl_hw_replace_filter(tp,
+                                          &head->dissector,
+                                          &mask.key,
+                                          fnew);
+               if (err)
+                       goto errout;
+       }
 
        if (fold) {
                rhashtable_remove_fast(&head->ht, &fold->ht_node,
                                       head->ht_params);
-               fl_hw_destroy_filter(tp, (unsigned long)fold);
+               if (!tc_skip_hw(fold->flags))
+                       fl_hw_destroy_filter(tp, fold);
        }
 
        *arg = (unsigned long) fnew;
@@ -741,10 +790,7 @@ static int fl_delete(struct tcf_proto *tp, unsigned long arg)
 
        rhashtable_remove_fast(&head->ht, &f->ht_node,
                               head->ht_params);
-       list_del_rcu(&f->list);
-       fl_hw_destroy_filter(tp, (unsigned long)f);
-       tcf_unbind_filter(tp, &f->res);
-       call_rcu(&f->rcu, fl_destroy_filter);
+       __fl_delete(tp, f);
        return 0;
 }
 
@@ -839,7 +885,8 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
                        goto nla_put_failure;
        }
 
-       fl_hw_update_stats(tp, f);
+       if (!tc_skip_hw(f->flags))
+               fl_hw_update_stats(tp, f);
 
        if (fl_dump_key_val(skb, key->eth.dst, TCA_FLOWER_KEY_ETH_DST,
                            mask->eth.dst, TCA_FLOWER_KEY_ETH_DST_MASK,
@@ -895,6 +942,14 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
                                  &mask->tp.dst, TCA_FLOWER_KEY_UDP_DST_MASK,
                                  sizeof(key->tp.dst))))
                goto nla_put_failure;
+       else if (key->basic.ip_proto == IPPROTO_SCTP &&
+                (fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_SCTP_SRC,
+                                 &mask->tp.src, TCA_FLOWER_KEY_SCTP_SRC_MASK,
+                                 sizeof(key->tp.src)) ||
+                 fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_SCTP_DST,
+                                 &mask->tp.dst, TCA_FLOWER_KEY_SCTP_DST_MASK,
+                                 sizeof(key->tp.dst))))
+               goto nla_put_failure;
 
        if (key->enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS &&
            (fl_dump_key_val(skb, &key->enc_ipv4.src,
@@ -920,7 +975,17 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
 
        if (fl_dump_key_val(skb, &key->enc_key_id, TCA_FLOWER_KEY_ENC_KEY_ID,
                            &mask->enc_key_id, TCA_FLOWER_UNSPEC,
-                           sizeof(key->enc_key_id)))
+                           sizeof(key->enc_key_id)) ||
+           fl_dump_key_val(skb, &key->enc_tp.src,
+                           TCA_FLOWER_KEY_ENC_UDP_SRC_PORT,
+                           &mask->enc_tp.src,
+                           TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK,
+                           sizeof(key->enc_tp.src)) ||
+           fl_dump_key_val(skb, &key->enc_tp.dst,
+                           TCA_FLOWER_KEY_ENC_UDP_DST_PORT,
+                           &mask->enc_tp.dst,
+                           TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK,
+                           sizeof(key->enc_tp.dst)))
                goto nla_put_failure;
 
        nla_put_u32(skb, TCA_FLOWER_FLAGS, f->flags);
index c66ca9400ab4f03d96bc287381a935546abfe8fa..c1b23e3060b893da9a2b9ada426014f5c8bed76a 100644 (file)
@@ -57,17 +57,20 @@ static int em_ipset_match(struct sk_buff *skb, struct tcf_ematch *em,
        struct xt_action_param acpar;
        const struct xt_set_info *set = (const void *) em->data;
        struct net_device *dev, *indev = NULL;
+       struct nf_hook_state state = {
+               .net    = em->net,
+       };
        int ret, network_offset;
 
        switch (tc_skb_protocol(skb)) {
        case htons(ETH_P_IP):
-               acpar.family = NFPROTO_IPV4;
+               state.pf = NFPROTO_IPV4;
                if (!pskb_network_may_pull(skb, sizeof(struct iphdr)))
                        return 0;
                acpar.thoff = ip_hdrlen(skb);
                break;
        case htons(ETH_P_IPV6):
-               acpar.family = NFPROTO_IPV6;
+               state.pf = NFPROTO_IPV6;
                if (!pskb_network_may_pull(skb, sizeof(struct ipv6hdr)))
                        return 0;
                /* doesn't call ipv6_find_hdr() because ipset doesn't use thoff, yet */
@@ -77,9 +80,7 @@ static int em_ipset_match(struct sk_buff *skb, struct tcf_ematch *em,
                return 0;
        }
 
-       acpar.hooknum = 0;
-
-       opt.family = acpar.family;
+       opt.family = state.pf;
        opt.dim = set->dim;
        opt.flags = set->flags;
        opt.cmdflags = 0;
@@ -95,9 +96,9 @@ static int em_ipset_match(struct sk_buff *skb, struct tcf_ematch *em,
        if (skb->skb_iif)
                indev = dev_get_by_index_rcu(em->net, skb->skb_iif);
 
-       acpar.net     = em->net;
-       acpar.in      = indev ? indev : dev;
-       acpar.out     = dev;
+       state.in      = indev ? indev : dev;
+       state.out     = dev;
+       acpar.state   = &state;
 
        ret = ip_set_test(set->index, skb, &acpar, &opt);
 
index 206dc24add3af936beea07c58bb076a64d7a73f2..f337f1bdd1d4a4cac738f9fe1fbd42e5984174fe 100644 (file)
@@ -960,6 +960,17 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
 
        sch->handle = handle;
 
+       /* This exist to keep backward compatible with a userspace
+        * loophole, what allowed userspace to get IFF_NO_QUEUE
+        * facility on older kernels by setting tx_queue_len=0 (prior
+        * to qdisc init), and then forgot to reinit tx_queue_len
+        * before again attaching a qdisc.
+        */
+       if ((dev->priv_flags & IFF_NO_QUEUE) && (dev->tx_queue_len == 0)) {
+               dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN;
+               netdev_info(dev, "Caught tx_queue_len zero misconfig\n");
+       }
+
        if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) {
                if (qdisc_is_percpu_stats(sch)) {
                        sch->cpu_bstats =
index 18e752439f6f42a944a96f704105ea6e98707284..86309a3156a580d2be0a1cffbc93b62a4e1b10bf 100644 (file)
@@ -245,7 +245,7 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q)
                skb_orphan(skb);
        }
 
-       root = &q->fq_root[hash_32((u32)(long)sk, q->fq_trees_log)];
+       root = &q->fq_root[hash_ptr(sk, q->fq_trees_log)];
 
        if (q->flows >= (2U << q->fq_trees_log) &&
            q->inactive_flows > q->flows/2)
@@ -599,7 +599,7 @@ static void fq_rehash(struct fq_sched_data *q,
                                kmem_cache_free(fq_flow_cachep, of);
                                continue;
                        }
-                       nroot = &new_array[hash_32((u32)(long)of->sk, new_log)];
+                       nroot = &new_array[hash_ptr(of->sk, new_log)];
 
                        np = &nroot->rb_node;
                        parent = NULL;
index f10d3397f917986d25240fc42f6a33ae8049e7b7..68428e1f71810fbe65b7f86c750c3ad61f0266ec 100644 (file)
@@ -700,11 +700,15 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
        /* Set the peer's active state. */
        peer->state = peer_state;
 
+       /* Add this peer into the transport hashtable */
+       if (sctp_hash_transport(peer)) {
+               sctp_transport_free(peer);
+               return NULL;
+       }
+
        /* Attach the remote transport to our asoc.  */
        list_add_tail_rcu(&peer->transports, &asoc->peer.transport_addr_list);
        asoc->peer.transport_count++;
-       /* Add this peer into the transport hashtable */
-       sctp_hash_transport(peer);
 
        /* If we do not yet have a primary path, set one.  */
        if (!asoc->peer.primary_path) {
index a2ea1d1cc06a9705dab0ab74d6f0490f0d0604e1..458e506ef84bae3c53c239d6cf89a9349faafb11 100644 (file)
@@ -181,9 +181,10 @@ int sctp_rcv(struct sk_buff *skb)
         * bound to another interface, via SO_BINDTODEVICE, treat it as OOTB
         */
        if (sk->sk_bound_dev_if && (sk->sk_bound_dev_if != af->skb_iif(skb))) {
-               if (asoc) {
-                       sctp_association_put(asoc);
+               if (transport) {
+                       sctp_transport_put(transport);
                        asoc = NULL;
+                       transport = NULL;
                } else {
                        sctp_endpoint_put(ep);
                        ep = NULL;
@@ -269,8 +270,8 @@ int sctp_rcv(struct sk_buff *skb)
        bh_unlock_sock(sk);
 
        /* Release the asoc/ep ref we took in the lookup calls. */
-       if (asoc)
-               sctp_association_put(asoc);
+       if (transport)
+               sctp_transport_put(transport);
        else
                sctp_endpoint_put(ep);
 
@@ -283,8 +284,8 @@ discard_it:
 
 discard_release:
        /* Release the asoc/ep ref we took in the lookup calls. */
-       if (asoc)
-               sctp_association_put(asoc);
+       if (transport)
+               sctp_transport_put(transport);
        else
                sctp_endpoint_put(ep);
 
@@ -300,6 +301,7 @@ int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb)
 {
        struct sctp_chunk *chunk = SCTP_INPUT_CB(skb)->chunk;
        struct sctp_inq *inqueue = &chunk->rcvr->inqueue;
+       struct sctp_transport *t = chunk->transport;
        struct sctp_ep_common *rcvr = NULL;
        int backloged = 0;
 
@@ -351,7 +353,7 @@ int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb)
 done:
        /* Release the refs we took in sctp_add_backlog */
        if (SCTP_EP_TYPE_ASSOCIATION == rcvr->type)
-               sctp_association_put(sctp_assoc(rcvr));
+               sctp_transport_put(t);
        else if (SCTP_EP_TYPE_SOCKET == rcvr->type)
                sctp_endpoint_put(sctp_ep(rcvr));
        else
@@ -363,6 +365,7 @@ done:
 static int sctp_add_backlog(struct sock *sk, struct sk_buff *skb)
 {
        struct sctp_chunk *chunk = SCTP_INPUT_CB(skb)->chunk;
+       struct sctp_transport *t = chunk->transport;
        struct sctp_ep_common *rcvr = chunk->rcvr;
        int ret;
 
@@ -373,7 +376,7 @@ static int sctp_add_backlog(struct sock *sk, struct sk_buff *skb)
                 * from us
                 */
                if (SCTP_EP_TYPE_ASSOCIATION == rcvr->type)
-                       sctp_association_hold(sctp_assoc(rcvr));
+                       sctp_transport_hold(t);
                else if (SCTP_EP_TYPE_SOCKET == rcvr->type)
                        sctp_endpoint_hold(sctp_ep(rcvr));
                else
@@ -537,15 +540,15 @@ struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *skb,
        return sk;
 
 out:
-       sctp_association_put(asoc);
+       sctp_transport_put(transport);
        return NULL;
 }
 
 /* Common cleanup code for icmp/icmpv6 error handler. */
-void sctp_err_finish(struct sock *sk, struct sctp_association *asoc)
+void sctp_err_finish(struct sock *sk, struct sctp_transport *t)
 {
        bh_unlock_sock(sk);
-       sctp_association_put(asoc);
+       sctp_transport_put(t);
 }
 
 /*
@@ -641,7 +644,7 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info)
        }
 
 out_unlock:
-       sctp_err_finish(sk, asoc);
+       sctp_err_finish(sk, transport);
 }
 
 /*
@@ -787,10 +790,9 @@ hit:
 
 /* rhashtable for transport */
 struct sctp_hash_cmp_arg {
-       const struct sctp_endpoint      *ep;
-       const union sctp_addr           *laddr;
-       const union sctp_addr           *paddr;
-       const struct net                *net;
+       const union sctp_addr   *paddr;
+       const struct net        *net;
+       u16                     lport;
 };
 
 static inline int sctp_hash_cmp(struct rhashtable_compare_arg *arg,
@@ -798,7 +800,6 @@ static inline int sctp_hash_cmp(struct rhashtable_compare_arg *arg,
 {
        struct sctp_transport *t = (struct sctp_transport *)ptr;
        const struct sctp_hash_cmp_arg *x = arg->key;
-       struct sctp_association *asoc;
        int err = 1;
 
        if (!sctp_cmp_addr_exact(&t->ipaddr, x->paddr))
@@ -806,19 +807,10 @@ static inline int sctp_hash_cmp(struct rhashtable_compare_arg *arg,
        if (!sctp_transport_hold(t))
                return err;
 
-       asoc = t->asoc;
-       if (!net_eq(sock_net(asoc->base.sk), x->net))
+       if (!net_eq(sock_net(t->asoc->base.sk), x->net))
+               goto out;
+       if (x->lport != htons(t->asoc->base.bind_addr.port))
                goto out;
-       if (x->ep) {
-               if (x->ep != asoc->ep)
-                       goto out;
-       } else {
-               if (x->laddr->v4.sin_port != htons(asoc->base.bind_addr.port))
-                       goto out;
-               if (!sctp_bind_addr_match(&asoc->base.bind_addr,
-                                         x->laddr, sctp_sk(asoc->base.sk)))
-                       goto out;
-       }
 
        err = 0;
 out:
@@ -848,11 +840,9 @@ static inline u32 sctp_hash_key(const void *data, u32 len, u32 seed)
        const struct sctp_hash_cmp_arg *x = data;
        const union sctp_addr *paddr = x->paddr;
        const struct net *net = x->net;
-       u16 lport;
+       u16 lport = x->lport;
        u32 addr;
 
-       lport = x->ep ? htons(x->ep->base.bind_addr.port) :
-                       x->laddr->v4.sin_port;
        if (paddr->sa.sa_family == AF_INET6)
                addr = jhash(&paddr->v6.sin6_addr, 16, seed);
        else
@@ -872,29 +862,32 @@ static const struct rhashtable_params sctp_hash_params = {
 
 int sctp_transport_hashtable_init(void)
 {
-       return rhashtable_init(&sctp_transport_hashtable, &sctp_hash_params);
+       return rhltable_init(&sctp_transport_hashtable, &sctp_hash_params);
 }
 
 void sctp_transport_hashtable_destroy(void)
 {
-       rhashtable_destroy(&sctp_transport_hashtable);
+       rhltable_destroy(&sctp_transport_hashtable);
 }
 
-void sctp_hash_transport(struct sctp_transport *t)
+int sctp_hash_transport(struct sctp_transport *t)
 {
        struct sctp_hash_cmp_arg arg;
+       int err;
 
        if (t->asoc->temp)
-               return;
+               return 0;
 
-       arg.ep = t->asoc->ep;
-       arg.paddr = &t->ipaddr;
        arg.net   = sock_net(t->asoc->base.sk);
+       arg.paddr = &t->ipaddr;
+       arg.lport = htons(t->asoc->base.bind_addr.port);
+
+       err = rhltable_insert_key(&sctp_transport_hashtable, &arg,
+                                 &t->node, sctp_hash_params);
+       if (err)
+               pr_err_once("insert transport fail, errno %d\n", err);
 
-reinsert:
-       if (rhashtable_lookup_insert_key(&sctp_transport_hashtable, &arg,
-                                        &t->node, sctp_hash_params) == -EBUSY)
-               goto reinsert;
+       return err;
 }
 
 void sctp_unhash_transport(struct sctp_transport *t)
@@ -902,39 +895,62 @@ void sctp_unhash_transport(struct sctp_transport *t)
        if (t->asoc->temp)
                return;
 
-       rhashtable_remove_fast(&sctp_transport_hashtable, &t->node,
-                              sctp_hash_params);
+       rhltable_remove(&sctp_transport_hashtable, &t->node,
+                       sctp_hash_params);
 }
 
+/* return a transport with holding it */
 struct sctp_transport *sctp_addrs_lookup_transport(
                                struct net *net,
                                const union sctp_addr *laddr,
                                const union sctp_addr *paddr)
 {
+       struct rhlist_head *tmp, *list;
+       struct sctp_transport *t;
        struct sctp_hash_cmp_arg arg = {
-               .ep    = NULL,
-               .laddr = laddr,
                .paddr = paddr,
                .net   = net,
+               .lport = laddr->v4.sin_port,
        };
 
-       return rhashtable_lookup_fast(&sctp_transport_hashtable, &arg,
-                                     sctp_hash_params);
+       list = rhltable_lookup(&sctp_transport_hashtable, &arg,
+                              sctp_hash_params);
+
+       rhl_for_each_entry_rcu(t, tmp, list, node) {
+               if (!sctp_transport_hold(t))
+                       continue;
+
+               if (sctp_bind_addr_match(&t->asoc->base.bind_addr,
+                                        laddr, sctp_sk(t->asoc->base.sk)))
+                       return t;
+               sctp_transport_put(t);
+       }
+
+       return NULL;
 }
 
+/* return a transport without holding it, as it's only used under sock lock */
 struct sctp_transport *sctp_epaddr_lookup_transport(
                                const struct sctp_endpoint *ep,
                                const union sctp_addr *paddr)
 {
        struct net *net = sock_net(ep->base.sk);
+       struct rhlist_head *tmp, *list;
+       struct sctp_transport *t;
        struct sctp_hash_cmp_arg arg = {
-               .ep    = ep,
                .paddr = paddr,
                .net   = net,
+               .lport = htons(ep->base.bind_addr.port),
        };
 
-       return rhashtable_lookup_fast(&sctp_transport_hashtable, &arg,
-                                     sctp_hash_params);
+       list = rhltable_lookup(&sctp_transport_hashtable, &arg,
+                              sctp_hash_params);
+
+       rhl_for_each_entry_rcu(t, tmp, list, node)
+               if (ep == t->asoc->ep)
+                       return t;
+
+       return NULL;
 }
 
 /* Look up an association. */
@@ -948,15 +964,12 @@ static struct sctp_association *__sctp_lookup_association(
        struct sctp_association *asoc = NULL;
 
        t = sctp_addrs_lookup_transport(net, local, peer);
-       if (!t || !sctp_transport_hold(t))
+       if (!t)
                goto out;
 
        asoc = t->asoc;
-       sctp_association_hold(asoc);
        *pt = t;
 
-       sctp_transport_put(t);
-
 out:
        return asoc;
 }
@@ -986,7 +999,7 @@ int sctp_has_association(struct net *net,
        struct sctp_transport *transport;
 
        if ((asoc = sctp_lookup_association(net, laddr, paddr, &transport))) {
-               sctp_association_put(asoc);
+               sctp_transport_put(transport);
                return 1;
        }
 
@@ -1021,7 +1034,6 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct net *net,
        struct sctphdr *sh = sctp_hdr(skb);
        union sctp_params params;
        sctp_init_chunk_t *init;
-       struct sctp_transport *transport;
        struct sctp_af *af;
 
        /*
@@ -1052,7 +1064,7 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct net *net,
 
                af->from_addr_param(paddr, params.addr, sh->source, 0);
 
-               asoc = __sctp_lookup_association(net, laddr, paddr, &transport);
+               asoc = __sctp_lookup_association(net, laddr, paddr, transportp);
                if (asoc)
                        return asoc;
        }
index f473779e8b1c3f4d82f47fe5d2ccadc2d47af45f..176af3080a2b8f8ffc56b55f3ccb13a169e195fe 100644 (file)
@@ -198,7 +198,7 @@ static void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
        }
 
 out_unlock:
-       sctp_err_finish(sk, asoc);
+       sctp_err_finish(sk, transport);
 out:
        if (likely(idev != NULL))
                in6_dev_put(idev);
index 4282b488985b235170ddb45352a2a3765b1f2aff..f5320a87341e160d46b1160edf4c38b569e7e79b 100644 (file)
@@ -399,181 +399,72 @@ static void sctp_packet_set_owner_w(struct sk_buff *skb, struct sock *sk)
        atomic_inc(&sk->sk_wmem_alloc);
 }
 
-/* All packets are sent to the network through this function from
- * sctp_outq_tail().
- *
- * The return value is a normal kernel error return value.
- */
-int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
+static int sctp_packet_pack(struct sctp_packet *packet,
+                           struct sk_buff *head, int gso, gfp_t gfp)
 {
        struct sctp_transport *tp = packet->transport;
-       struct sctp_association *asoc = tp->asoc;
-       struct sctphdr *sh;
-       struct sk_buff *nskb = NULL, *head = NULL;
+       struct sctp_auth_chunk *auth = NULL;
        struct sctp_chunk *chunk, *tmp;
-       struct sock *sk;
-       int err = 0;
-       int padding;            /* How much padding do we need?  */
-       int pkt_size;
-       __u8 has_data = 0;
-       int gso = 0;
-       int pktcount = 0;
-       struct dst_entry *dst;
-       unsigned char *auth = NULL;     /* pointer to auth in skb data */
-
-       pr_debug("%s: packet:%p\n", __func__, packet);
+       int pkt_count = 0, pkt_size;
+       struct sock *sk = head->sk;
+       struct sk_buff *nskb;
+       int auth_len = 0;
 
-       /* Do NOT generate a chunkless packet. */
-       if (list_empty(&packet->chunk_list))
-               return err;
-
-       /* Set up convenience variables... */
-       chunk = list_entry(packet->chunk_list.next, struct sctp_chunk, list);
-       sk = chunk->skb->sk;
-
-       /* Allocate the head skb, or main one if not in GSO */
-       if (packet->size > tp->pathmtu && !packet->ipfragok) {
-               if (sk_can_gso(sk)) {
-                       gso = 1;
-                       pkt_size = packet->overhead;
-               } else {
-                       /* If this happens, we trash this packet and try
-                        * to build a new one, hopefully correct this
-                        * time. Application may notice this error.
-                        */
-                       pr_err_once("Trying to GSO but underlying device doesn't support it.");
-                       goto err;
-               }
-       } else {
-               pkt_size = packet->size;
-       }
-       head = alloc_skb(pkt_size + MAX_HEADER, gfp);
-       if (!head)
-               goto err;
        if (gso) {
-               NAPI_GRO_CB(head)->last = head;
                skb_shinfo(head)->gso_type = sk->sk_gso_type;
+               NAPI_GRO_CB(head)->last = head;
+       } else {
+               nskb = head;
+               pkt_size = packet->size;
+               goto merge;
        }
 
-       /* Make sure the outbound skb has enough header room reserved. */
-       skb_reserve(head, packet->overhead + MAX_HEADER);
-
-       /* Set the owning socket so that we know where to get the
-        * destination IP address.
-        */
-       sctp_packet_set_owner_w(head, sk);
-
-       if (!sctp_transport_dst_check(tp)) {
-               sctp_transport_route(tp, NULL, sctp_sk(sk));
-               if (asoc && (asoc->param_flags & SPP_PMTUD_ENABLE)) {
-                       sctp_assoc_sync_pmtu(sk, asoc);
-               }
-       }
-       dst = dst_clone(tp->dst);
-       if (!dst) {
-               if (asoc)
-                       IP_INC_STATS(sock_net(asoc->base.sk),
-                                    IPSTATS_MIB_OUTNOROUTES);
-               goto nodst;
-       }
-       skb_dst_set(head, dst);
-
-       /* Build the SCTP header.  */
-       sh = (struct sctphdr *)skb_push(head, sizeof(struct sctphdr));
-       skb_reset_transport_header(head);
-       sh->source = htons(packet->source_port);
-       sh->dest   = htons(packet->destination_port);
-
-       /* From 6.8 Adler-32 Checksum Calculation:
-        * After the packet is constructed (containing the SCTP common
-        * header and one or more control or DATA chunks), the
-        * transmitter shall:
-        *
-        * 1) Fill in the proper Verification Tag in the SCTP common
-        *    header and initialize the checksum field to 0's.
-        */
-       sh->vtag     = htonl(packet->vtag);
-       sh->checksum = 0;
-
-       pr_debug("***sctp_transmit_packet***\n");
-
        do {
-               /* Set up convenience variables... */
-               chunk = list_entry(packet->chunk_list.next, struct sctp_chunk, list);
-               pktcount++;
-
-               /* Calculate packet size, so it fits in PMTU. Leave
-                * other chunks for the next packets.
-                */
-               if (gso) {
-                       pkt_size = packet->overhead;
-                       list_for_each_entry(chunk, &packet->chunk_list, list) {
-                               int padded = SCTP_PAD4(chunk->skb->len);
-
-                               if (pkt_size + padded > tp->pathmtu)
-                                       break;
-                               pkt_size += padded;
-                       }
+               /* calculate the pkt_size and alloc nskb */
+               pkt_size = packet->overhead;
+               list_for_each_entry_safe(chunk, tmp, &packet->chunk_list,
+                                        list) {
+                       int padded = SCTP_PAD4(chunk->skb->len);
 
-                       /* Allocate a new skb. */
-                       nskb = alloc_skb(pkt_size + MAX_HEADER, gfp);
-                       if (!nskb)
-                               goto nomem;
-
-                       /* Make sure the outbound skb has enough header
-                        * room reserved.
-                        */
-                       skb_reserve(nskb, packet->overhead + MAX_HEADER);
-               } else {
-                       nskb = head;
+                       if (chunk == packet->auth)
+                               auth_len = padded;
+                       else if (auth_len + padded + packet->overhead >
+                                tp->pathmtu)
+                               return 0;
+                       else if (pkt_size + padded > tp->pathmtu)
+                               break;
+                       pkt_size += padded;
                }
+               nskb = alloc_skb(pkt_size + MAX_HEADER, gfp);
+               if (!nskb)
+                       return 0;
+               skb_reserve(nskb, packet->overhead + MAX_HEADER);
 
-               /**
-                * 3.2  Chunk Field Descriptions
-                *
-                * The total length of a chunk (including Type, Length and
-                * Value fields) MUST be a multiple of 4 bytes.  If the length
-                * of the chunk is not a multiple of 4 bytes, the sender MUST
-                * pad the chunk with all zero bytes and this padding is not
-                * included in the chunk length field.  The sender should
-                * never pad with more than 3 bytes.
-                *
-                * [This whole comment explains SCTP_PAD4() below.]
-                */
-
+merge:
+               /* merge chunks into nskb and append nskb into head list */
                pkt_size -= packet->overhead;
                list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) {
+                       int padding;
+
                        list_del_init(&chunk->list);
                        if (sctp_chunk_is_data(chunk)) {
-                               /* 6.3.1 C4) When data is in flight and when allowed
-                                * by rule C5, a new RTT measurement MUST be made each
-                                * round trip.  Furthermore, new RTT measurements
-                                * SHOULD be made no more than once per round-trip
-                                * for a given destination transport address.
-                                */
-
                                if (!sctp_chunk_retransmitted(chunk) &&
                                    !tp->rto_pending) {
                                        chunk->rtt_in_progress = 1;
                                        tp->rto_pending = 1;
                                }
-
-                               has_data = 1;
                        }
 
                        padding = SCTP_PAD4(chunk->skb->len) - chunk->skb->len;
                        if (padding)
                                memset(skb_put(chunk->skb, padding), 0, padding);
 
-                       /* if this is the auth chunk that we are adding,
-                        * store pointer where it will be added and put
-                        * the auth into the packet.
-                        */
                        if (chunk == packet->auth)
-                               auth = skb_tail_pointer(nskb);
+                               auth = (struct sctp_auth_chunk *)
+                                                       skb_tail_pointer(nskb);
 
-                       memcpy(skb_put(nskb, chunk->skb->len),
-                              chunk->skb->data, chunk->skb->len);
+                       memcpy(skb_put(nskb, chunk->skb->len), chunk->skb->data,
+                              chunk->skb->len);
 
                        pr_debug("*** Chunk:%p[%s] %s 0x%x, length:%d, chunk->skb->len:%d, rtt_in_progress:%d\n",
                                 chunk,
@@ -583,11 +474,6 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
                                 ntohs(chunk->chunk_hdr->length), chunk->skb->len,
                                 chunk->rtt_in_progress);
 
-                       /* If this is a control chunk, this is our last
-                        * reference. Free data chunks after they've been
-                        * acknowledged or have failed.
-                        * Re-queue auth chunks if needed.
-                        */
                        pkt_size -= SCTP_PAD4(chunk->skb->len);
 
                        if (!sctp_chunk_is_data(chunk) && chunk != packet->auth)
@@ -597,160 +483,161 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
                                break;
                }
 
-               /* SCTP-AUTH, Section 6.2
-                *    The sender MUST calculate the MAC as described in RFC2104 [2]
-                *    using the hash function H as described by the MAC Identifier and
-                *    the shared association key K based on the endpoint pair shared key
-                *    described by the shared key identifier.  The 'data' used for the
-                *    computation of the AUTH-chunk is given by the AUTH chunk with its
-                *    HMAC field set to zero (as shown in Figure 6) followed by all
-                *    chunks that are placed after the AUTH chunk in the SCTP packet.
-                */
-               if (auth)
-                       sctp_auth_calculate_hmac(asoc, nskb,
-                                                (struct sctp_auth_chunk *)auth,
-                                                gfp);
-
-               if (packet->auth) {
-                       if (!list_empty(&packet->chunk_list)) {
-                               /* We will generate more packets, so re-queue
-                                * auth chunk.
-                                */
+               if (auth) {
+                       sctp_auth_calculate_hmac(tp->asoc, nskb, auth, gfp);
+                       /* free auth if no more chunks, or add it back */
+                       if (list_empty(&packet->chunk_list))
+                               sctp_chunk_free(packet->auth);
+                       else
                                list_add(&packet->auth->list,
                                         &packet->chunk_list);
-                       } else {
-                               sctp_chunk_free(packet->auth);
-                               packet->auth = NULL;
-                       }
                }
 
-               if (!gso)
-                       break;
-
-               if (skb_gro_receive(&head, nskb)) {
-                       kfree_skb(nskb);
-                       goto nomem;
+               if (gso) {
+                       if (skb_gro_receive(&head, nskb)) {
+                               kfree_skb(nskb);
+                               return 0;
+                       }
+                       if (WARN_ON_ONCE(skb_shinfo(head)->gso_segs >=
+                                        sk->sk_gso_max_segs))
+                               return 0;
                }
-               nskb = NULL;
-               if (WARN_ON_ONCE(skb_shinfo(head)->gso_segs >=
-                                sk->sk_gso_max_segs))
-                       goto nomem;
+
+               pkt_count++;
        } while (!list_empty(&packet->chunk_list));
 
-       /* 2) Calculate the Adler-32 checksum of the whole packet,
-        *    including the SCTP common header and all the
-        *    chunks.
-        *
-        * Note: Adler-32 is no longer applicable, as has been replaced
-        * by CRC32-C as described in <draft-ietf-tsvwg-sctpcsum-02.txt>.
-        *
-        * If it's a GSO packet, it's postponed to sctp_skb_segment.
-        */
-       if (!sctp_checksum_disable || gso) {
-               if (!gso && (!(dst->dev->features & NETIF_F_SCTP_CRC) ||
-                            dst_xfrm(dst) || packet->ipfragok)) {
-                       sh->checksum = sctp_compute_cksum(head, 0);
-               } else {
-                       /* no need to seed pseudo checksum for SCTP */
-                       head->ip_summed = CHECKSUM_PARTIAL;
-                       head->csum_start = skb_transport_header(head) - head->head;
-                       head->csum_offset = offsetof(struct sctphdr, checksum);
+       if (gso) {
+               memset(head->cb, 0, max(sizeof(struct inet_skb_parm),
+                                       sizeof(struct inet6_skb_parm)));
+               skb_shinfo(head)->gso_segs = pkt_count;
+               skb_shinfo(head)->gso_size = GSO_BY_FRAGS;
+               rcu_read_lock();
+               if (skb_dst(head) != tp->dst) {
+                       dst_hold(tp->dst);
+                       sk_setup_caps(sk, tp->dst);
                }
+               rcu_read_unlock();
+               goto chksum;
        }
 
-       /* IP layer ECN support
-        * From RFC 2481
-        *  "The ECN-Capable Transport (ECT) bit would be set by the
-        *   data sender to indicate that the end-points of the
-        *   transport protocol are ECN-capable."
-        *
-        * Now setting the ECT bit all the time, as it should not cause
-        * any problems protocol-wise even if our peer ignores it.
-        *
-        * Note: The works for IPv6 layer checks this bit too later
-        * in transmission.  See IP6_ECN_flow_xmit().
-        */
-       tp->af_specific->ecn_capable(sk);
+       if (sctp_checksum_disable)
+               return 1;
 
-       /* Set up the IP options.  */
-       /* BUG: not implemented
-        * For v4 this all lives somewhere in sk->sk_opt...
-        */
+       if (!(skb_dst(head)->dev->features & NETIF_F_SCTP_CRC) ||
+           dst_xfrm(skb_dst(head)) || packet->ipfragok) {
+               struct sctphdr *sh =
+                       (struct sctphdr *)skb_transport_header(head);
 
-       /* Dump that on IP!  */
-       if (asoc) {
-               asoc->stats.opackets += pktcount;
-               if (asoc->peer.last_sent_to != tp)
-                       /* Considering the multiple CPU scenario, this is a
-                        * "correcter" place for last_sent_to.  --xguo
-                        */
-                       asoc->peer.last_sent_to = tp;
+               sh->checksum = sctp_compute_cksum(head, 0);
+       } else {
+chksum:
+               head->ip_summed = CHECKSUM_PARTIAL;
+               head->csum_start = skb_transport_header(head) - head->head;
+               head->csum_offset = offsetof(struct sctphdr, checksum);
        }
 
-       if (has_data) {
-               struct timer_list *timer;
-               unsigned long timeout;
+       return pkt_count;
+}
+
+/* All packets are sent to the network through this function from
+ * sctp_outq_tail().
+ *
+ * The return value is always 0 for now.
+ */
+int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
+{
+       struct sctp_transport *tp = packet->transport;
+       struct sctp_association *asoc = tp->asoc;
+       struct sctp_chunk *chunk, *tmp;
+       int pkt_count, gso = 0;
+       struct dst_entry *dst;
+       struct sk_buff *head;
+       struct sctphdr *sh;
+       struct sock *sk;
 
-               /* Restart the AUTOCLOSE timer when sending data. */
-               if (sctp_state(asoc, ESTABLISHED) &&
-                   asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE]) {
-                       timer = &asoc->timers[SCTP_EVENT_TIMEOUT_AUTOCLOSE];
-                       timeout = asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE];
+       pr_debug("%s: packet:%p\n", __func__, packet);
+       if (list_empty(&packet->chunk_list))
+               return 0;
+       chunk = list_entry(packet->chunk_list.next, struct sctp_chunk, list);
+       sk = chunk->skb->sk;
 
-                       if (!mod_timer(timer, jiffies + timeout))
-                               sctp_association_hold(asoc);
+       /* check gso */
+       if (packet->size > tp->pathmtu && !packet->ipfragok) {
+               if (!sk_can_gso(sk)) {
+                       pr_err_once("Trying to GSO but underlying device doesn't support it.");
+                       goto out;
                }
+               gso = 1;
+       }
+
+       /* alloc head skb */
+       head = alloc_skb((gso ? packet->overhead : packet->size) +
+                        MAX_HEADER, gfp);
+       if (!head)
+               goto out;
+       skb_reserve(head, packet->overhead + MAX_HEADER);
+       sctp_packet_set_owner_w(head, sk);
+
+       /* set sctp header */
+       sh = (struct sctphdr *)skb_push(head, sizeof(struct sctphdr));
+       skb_reset_transport_header(head);
+       sh->source = htons(packet->source_port);
+       sh->dest = htons(packet->destination_port);
+       sh->vtag = htonl(packet->vtag);
+       sh->checksum = 0;
+
+       /* update dst if in need */
+       if (!sctp_transport_dst_check(tp)) {
+               sctp_transport_route(tp, NULL, sctp_sk(sk));
+               if (asoc && asoc->param_flags & SPP_PMTUD_ENABLE)
+                       sctp_assoc_sync_pmtu(sk, asoc);
        }
+       dst = dst_clone(tp->dst);
+       if (!dst) {
+               IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
+               kfree_skb(head);
+               goto out;
+       }
+       skb_dst_set(head, dst);
 
+       /* pack up chunks */
+       pkt_count = sctp_packet_pack(packet, head, gso, gfp);
+       if (!pkt_count) {
+               kfree_skb(head);
+               goto out;
+       }
        pr_debug("***sctp_transmit_packet*** skb->len:%d\n", head->len);
 
-       if (gso) {
-               /* Cleanup our debris for IP stacks */
-               memset(head->cb, 0, max(sizeof(struct inet_skb_parm),
-                                       sizeof(struct inet6_skb_parm)));
+       /* start autoclose timer */
+       if (packet->has_data && sctp_state(asoc, ESTABLISHED) &&
+           asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE]) {
+               struct timer_list *timer =
+                       &asoc->timers[SCTP_EVENT_TIMEOUT_AUTOCLOSE];
+               unsigned long timeout =
+                       asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE];
 
-               skb_shinfo(head)->gso_segs = pktcount;
-               skb_shinfo(head)->gso_size = GSO_BY_FRAGS;
+               if (!mod_timer(timer, jiffies + timeout))
+                       sctp_association_hold(asoc);
+       }
 
-               /* We have to refresh this in case we are xmiting to
-                * more than one transport at a time
-                */
-               rcu_read_lock();
-               if (__sk_dst_get(sk) != tp->dst) {
-                       dst_hold(tp->dst);
-                       sk_setup_caps(sk, tp->dst);
-               }
-               rcu_read_unlock();
+       /* sctp xmit */
+       tp->af_specific->ecn_capable(sk);
+       if (asoc) {
+               asoc->stats.opackets += pkt_count;
+               if (asoc->peer.last_sent_to != tp)
+                       asoc->peer.last_sent_to = tp;
        }
        head->ignore_df = packet->ipfragok;
        tp->af_specific->sctp_xmit(head, tp);
-       goto out;
-
-nomem:
-       if (packet->auth && list_empty(&packet->auth->list))
-               sctp_chunk_free(packet->auth);
-
-nodst:
-       /* FIXME: Returning the 'err' will effect all the associations
-        * associated with a socket, although only one of the paths of the
-        * association is unreachable.
-        * The real failure of a transport or association can be passed on
-        * to the user via notifications. So setting this error may not be
-        * required.
-        */
-        /* err = -EHOSTUNREACH; */
-       kfree_skb(head);
 
-err:
+out:
        list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) {
                list_del_init(&chunk->list);
                if (!sctp_chunk_is_data(chunk))
                        sctp_chunk_free(chunk);
        }
-
-out:
        sctp_packet_reset(packet);
-       return err;
+       return 0;
 }
 
 /********************************************************************
index 026e3bca4a94bd34b418d5e6947f7182c1512358..8ec20a64a3f8055a0c3576627c5ec5dad7e99ca8 100644 (file)
@@ -3422,6 +3422,12 @@ sctp_disposition_t sctp_sf_ootb(struct net *net,
                        return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
                                                  commands);
 
+               /* Report violation if chunk len overflows */
+               ch_end = ((__u8 *)ch) + SCTP_PAD4(ntohs(ch->length));
+               if (ch_end > skb_tail_pointer(skb))
+                       return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
+                                                 commands);
+
                /* Now that we know we at least have a chunk header,
                 * do things that are type appropriate.
                 */
@@ -3453,12 +3459,6 @@ sctp_disposition_t sctp_sf_ootb(struct net *net,
                        }
                }
 
-               /* Report violation if chunk len overflows */
-               ch_end = ((__u8 *)ch) + SCTP_PAD4(ntohs(ch->length));
-               if (ch_end > skb_tail_pointer(skb))
-                       return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
-                                                 commands);
-
                ch = (sctp_chunkhdr_t *) ch_end;
        } while (ch_end < skb_tail_pointer(skb));
 
index fb02c70333078743e832a7a991f7a44770d19bcc..d5f4b4a8369bc64d8501be37d153e99dead88af5 100644 (file)
@@ -1214,9 +1214,12 @@ static int __sctp_connect(struct sock *sk,
 
        timeo = sock_sndtimeo(sk, f_flags & O_NONBLOCK);
 
-       err = sctp_wait_for_connect(asoc, &timeo);
-       if ((err == 0 || err == -EINPROGRESS) && assoc_id)
+       if (assoc_id)
                *assoc_id = asoc->assoc_id;
+       err = sctp_wait_for_connect(asoc, &timeo);
+       /* Note: the asoc may be freed after the return of
+        * sctp_wait_for_connect.
+        */
 
        /* Don't free association on exit. */
        asoc = NULL;
@@ -4282,19 +4285,18 @@ static void sctp_shutdown(struct sock *sk, int how)
 {
        struct net *net = sock_net(sk);
        struct sctp_endpoint *ep;
-       struct sctp_association *asoc;
 
        if (!sctp_style(sk, TCP))
                return;
 
-       if (how & SEND_SHUTDOWN) {
+       ep = sctp_sk(sk)->ep;
+       if (how & SEND_SHUTDOWN && !list_empty(&ep->asocs)) {
+               struct sctp_association *asoc;
+
                sk->sk_state = SCTP_SS_CLOSING;
-               ep = sctp_sk(sk)->ep;
-               if (!list_empty(&ep->asocs)) {
-                       asoc = list_entry(ep->asocs.next,
-                                         struct sctp_association, asocs);
-                       sctp_primitive_SHUTDOWN(net, asoc, NULL);
-               }
+               asoc = list_entry(ep->asocs.next,
+                                 struct sctp_association, asocs);
+               sctp_primitive_SHUTDOWN(net, asoc, NULL);
        }
 }
 
@@ -4390,10 +4392,7 @@ int sctp_transport_walk_start(struct rhashtable_iter *iter)
 {
        int err;
 
-       err = rhashtable_walk_init(&sctp_transport_hashtable, iter,
-                                  GFP_KERNEL);
-       if (err)
-               return err;
+       rhltable_walk_enter(&sctp_transport_hashtable, iter);
 
        err = rhashtable_walk_start(iter);
        if (err && err != -EAGAIN) {
@@ -4477,15 +4476,12 @@ int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *),
 
        rcu_read_lock();
        transport = sctp_addrs_lookup_transport(net, laddr, paddr);
-       if (!transport || !sctp_transport_hold(transport))
+       if (!transport)
                goto out;
 
-       sctp_association_hold(transport->asoc);
-       sctp_transport_put(transport);
-
        rcu_read_unlock();
        err = cb(transport, p);
-       sctp_association_put(transport->asoc);
+       sctp_transport_put(transport);
 
 out:
        return err;
@@ -4687,7 +4683,7 @@ static int sctp_getsockopt_disable_fragments(struct sock *sk, int len,
 static int sctp_getsockopt_events(struct sock *sk, int len, char __user *optval,
                                  int __user *optlen)
 {
-       if (len <= 0)
+       if (len == 0)
                return -EINVAL;
        if (len > sizeof(struct sctp_event_subscribe))
                len = sizeof(struct sctp_event_subscribe);
@@ -6430,6 +6426,9 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname,
        if (get_user(len, optlen))
                return -EFAULT;
 
+       if (len < 0)
+               return -EINVAL;
+
        lock_sock(sk);
 
        switch (optname) {
index 5a9bf5ee2464da20fbb42ea6699b29318553e210..e6318943ad07045f88e36c77787d41c5e28c16df 100644 (file)
@@ -341,8 +341,23 @@ static const struct xattr_handler sockfs_xattr_handler = {
        .get = sockfs_xattr_get,
 };
 
+static int sockfs_security_xattr_set(const struct xattr_handler *handler,
+                                    struct dentry *dentry, struct inode *inode,
+                                    const char *suffix, const void *value,
+                                    size_t size, int flags)
+{
+       /* Handled by LSM. */
+       return -EAGAIN;
+}
+
+static const struct xattr_handler sockfs_security_xattr_handler = {
+       .prefix = XATTR_SECURITY_PREFIX,
+       .set = sockfs_security_xattr_set,
+};
+
 static const struct xattr_handler *sockfs_xattr_handlers[] = {
        &sockfs_xattr_handler,
+       &sockfs_security_xattr_handler,
        NULL
 };
 
@@ -518,8 +533,22 @@ static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
        return used;
 }
 
+int sockfs_setattr(struct dentry *dentry, struct iattr *iattr)
+{
+       int err = simple_setattr(dentry, iattr);
+
+       if (!err) {
+               struct socket *sock = SOCKET_I(d_inode(dentry));
+
+               sock->sk->sk_uid = iattr->ia_uid;
+       }
+
+       return err;
+}
+
 static const struct inode_operations sockfs_inode_ops = {
        .listxattr = sockfs_listxattr,
+       .setattr = sockfs_setattr,
 };
 
 /**
@@ -664,9 +693,14 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
            (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
            ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2))
                empty = 0;
-       if (!empty)
+       if (!empty) {
                put_cmsg(msg, SOL_SOCKET,
                         SCM_TIMESTAMPING, sizeof(tss), &tss);
+
+               if (skb->len && (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS))
+                       put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS,
+                                skb->len, skb->data);
+       }
 }
 EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
 
@@ -877,6 +911,11 @@ static long sock_do_ioctl(struct net *net, struct socket *sock,
  *     what to do with it - that's up to the protocol still.
  */
 
+static struct ns_common *get_net_ns(struct ns_common *ns)
+{
+       return &get_net(container_of(ns, struct net, ns))->ns;
+}
+
 static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 {
        struct socket *sock;
@@ -945,6 +984,13 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
                                err = dlci_ioctl_hook(cmd, argp);
                        mutex_unlock(&dlci_ioctl_mutex);
                        break;
+               case SIOCGSKNS:
+                       err = -EPERM;
+                       if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
+                               break;
+
+                       err = open_related_ns(&net->ns, get_net_ns);
+                       break;
                default:
                        err = sock_do_ioctl(net, sock, cmd, arg);
                        break;
@@ -2038,6 +2084,8 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
                if (err)
                        break;
                ++datagrams;
+               if (msg_data_left(&msg_sys))
+                       break;
                cond_resched();
        }
 
@@ -3093,6 +3141,7 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
        case SIOCSIFVLAN:
        case SIOCADDDLCI:
        case SIOCDELDLCI:
+       case SIOCGSKNS:
                return sock_ioctl(file, cmd, arg);
 
        case SIOCGIFFLAGS:
index d8bd97a5a7c9d807fd478befe92bea4ea7a40407..3dfd769dc5b51a724f20273eb0c52e5d6e25e9f1 100644 (file)
@@ -1616,7 +1616,7 @@ gss_validate(struct rpc_task *task, __be32 *p)
 {
        struct rpc_cred *cred = task->tk_rqstp->rq_cred;
        struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
-       __be32          seq;
+       __be32          *seq = NULL;
        struct kvec     iov;
        struct xdr_buf  verf_buf;
        struct xdr_netobj mic;
@@ -1631,9 +1631,12 @@ gss_validate(struct rpc_task *task, __be32 *p)
                goto out_bad;
        if (flav != RPC_AUTH_GSS)
                goto out_bad;
-       seq = htonl(task->tk_rqstp->rq_seqno);
-       iov.iov_base = &seq;
-       iov.iov_len = sizeof(seq);
+       seq = kmalloc(4, GFP_NOFS);
+       if (!seq)
+               goto out_bad;
+       *seq = htonl(task->tk_rqstp->rq_seqno);
+       iov.iov_base = seq;
+       iov.iov_len = 4;
        xdr_buf_from_iov(&iov, &verf_buf);
        mic.data = (u8 *)p;
        mic.len = len;
@@ -1653,11 +1656,13 @@ gss_validate(struct rpc_task *task, __be32 *p)
        gss_put_ctx(ctx);
        dprintk("RPC: %5u %s: gss_verify_mic succeeded.\n",
                        task->tk_pid, __func__);
+       kfree(seq);
        return p + XDR_QUADLEN(len);
 out_bad:
        gss_put_ctx(ctx);
        dprintk("RPC: %5u %s failed ret %ld.\n", task->tk_pid, __func__,
                PTR_ERR(ret));
+       kfree(seq);
        return ret;
 }
 
index 244245bcbbd25554938ab099137799d47ef6791b..90115ceefd490f39456edacfca6e711c47929ec0 100644 (file)
@@ -166,8 +166,8 @@ make_checksum_hmac_md5(struct krb5_ctx *kctx, char *header, int hdrlen,
                       unsigned int usage, struct xdr_netobj *cksumout)
 {
        struct scatterlist              sg[1];
-       int err;
-       u8 checksumdata[GSS_KRB5_MAX_CKSUM_LEN];
+       int err = -1;
+       u8 *checksumdata;
        u8 rc4salt[4];
        struct crypto_ahash *md5;
        struct crypto_ahash *hmac_md5;
@@ -187,23 +187,22 @@ make_checksum_hmac_md5(struct krb5_ctx *kctx, char *header, int hdrlen,
                return GSS_S_FAILURE;
        }
 
+       checksumdata = kmalloc(GSS_KRB5_MAX_CKSUM_LEN, GFP_NOFS);
+       if (!checksumdata)
+               return GSS_S_FAILURE;
+
        md5 = crypto_alloc_ahash("md5", 0, CRYPTO_ALG_ASYNC);
        if (IS_ERR(md5))
-               return GSS_S_FAILURE;
+               goto out_free_cksum;
 
        hmac_md5 = crypto_alloc_ahash(kctx->gk5e->cksum_name, 0,
                                      CRYPTO_ALG_ASYNC);
-       if (IS_ERR(hmac_md5)) {
-               crypto_free_ahash(md5);
-               return GSS_S_FAILURE;
-       }
+       if (IS_ERR(hmac_md5))
+               goto out_free_md5;
 
        req = ahash_request_alloc(md5, GFP_KERNEL);
-       if (!req) {
-               crypto_free_ahash(hmac_md5);
-               crypto_free_ahash(md5);
-               return GSS_S_FAILURE;
-       }
+       if (!req)
+               goto out_free_hmac_md5;
 
        ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL);
 
@@ -232,11 +231,8 @@ make_checksum_hmac_md5(struct krb5_ctx *kctx, char *header, int hdrlen,
 
        ahash_request_free(req);
        req = ahash_request_alloc(hmac_md5, GFP_KERNEL);
-       if (!req) {
-               crypto_free_ahash(hmac_md5);
-               crypto_free_ahash(md5);
-               return GSS_S_FAILURE;
-       }
+       if (!req)
+               goto out_free_hmac_md5;
 
        ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL);
 
@@ -258,8 +254,12 @@ make_checksum_hmac_md5(struct krb5_ctx *kctx, char *header, int hdrlen,
        cksumout->len = kctx->gk5e->cksumlength;
 out:
        ahash_request_free(req);
-       crypto_free_ahash(md5);
+out_free_hmac_md5:
        crypto_free_ahash(hmac_md5);
+out_free_md5:
+       crypto_free_ahash(md5);
+out_free_cksum:
+       kfree(checksumdata);
        return err ? GSS_S_FAILURE : 0;
 }
 
@@ -276,8 +276,8 @@ make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen,
        struct crypto_ahash *tfm;
        struct ahash_request *req;
        struct scatterlist              sg[1];
-       int err;
-       u8 checksumdata[GSS_KRB5_MAX_CKSUM_LEN];
+       int err = -1;
+       u8 *checksumdata;
        unsigned int checksumlen;
 
        if (kctx->gk5e->ctype == CKSUMTYPE_HMAC_MD5_ARCFOUR)
@@ -291,15 +291,17 @@ make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen,
                return GSS_S_FAILURE;
        }
 
+       checksumdata = kmalloc(GSS_KRB5_MAX_CKSUM_LEN, GFP_NOFS);
+       if (checksumdata == NULL)
+               return GSS_S_FAILURE;
+
        tfm = crypto_alloc_ahash(kctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC);
        if (IS_ERR(tfm))
-               return GSS_S_FAILURE;
+               goto out_free_cksum;
 
        req = ahash_request_alloc(tfm, GFP_KERNEL);
-       if (!req) {
-               crypto_free_ahash(tfm);
-               return GSS_S_FAILURE;
-       }
+       if (!req)
+               goto out_free_ahash;
 
        ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL);
 
@@ -349,7 +351,10 @@ make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen,
        cksumout->len = kctx->gk5e->cksumlength;
 out:
        ahash_request_free(req);
+out_free_ahash:
        crypto_free_ahash(tfm);
+out_free_cksum:
+       kfree(checksumdata);
        return err ? GSS_S_FAILURE : 0;
 }
 
@@ -368,8 +373,8 @@ make_checksum_v2(struct krb5_ctx *kctx, char *header, int hdrlen,
        struct crypto_ahash *tfm;
        struct ahash_request *req;
        struct scatterlist sg[1];
-       int err;
-       u8 checksumdata[GSS_KRB5_MAX_CKSUM_LEN];
+       int err = -1;
+       u8 *checksumdata;
        unsigned int checksumlen;
 
        if (kctx->gk5e->keyed_cksum == 0) {
@@ -383,16 +388,18 @@ make_checksum_v2(struct krb5_ctx *kctx, char *header, int hdrlen,
                return GSS_S_FAILURE;
        }
 
+       checksumdata = kmalloc(GSS_KRB5_MAX_CKSUM_LEN, GFP_NOFS);
+       if (!checksumdata)
+               return GSS_S_FAILURE;
+
        tfm = crypto_alloc_ahash(kctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC);
        if (IS_ERR(tfm))
-               return GSS_S_FAILURE;
+               goto out_free_cksum;
        checksumlen = crypto_ahash_digestsize(tfm);
 
        req = ahash_request_alloc(tfm, GFP_KERNEL);
-       if (!req) {
-               crypto_free_ahash(tfm);
-               return GSS_S_FAILURE;
-       }
+       if (!req)
+               goto out_free_ahash;
 
        ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL);
 
@@ -433,7 +440,10 @@ make_checksum_v2(struct krb5_ctx *kctx, char *header, int hdrlen,
        }
 out:
        ahash_request_free(req);
+out_free_ahash:
        crypto_free_ahash(tfm);
+out_free_cksum:
+       kfree(checksumdata);
        return err ? GSS_S_FAILURE : 0;
 }
 
@@ -666,14 +676,17 @@ gss_krb5_cts_crypt(struct crypto_skcipher *cipher, struct xdr_buf *buf,
        u32 ret;
        struct scatterlist sg[1];
        SKCIPHER_REQUEST_ON_STACK(req, cipher);
-       u8 data[GSS_KRB5_MAX_BLOCKSIZE * 2];
+       u8 *data;
        struct page **save_pages;
        u32 len = buf->len - offset;
 
-       if (len > ARRAY_SIZE(data)) {
+       if (len > GSS_KRB5_MAX_BLOCKSIZE * 2) {
                WARN_ON(0);
                return -ENOMEM;
        }
+       data = kmalloc(GSS_KRB5_MAX_BLOCKSIZE * 2, GFP_NOFS);
+       if (!data)
+               return -ENOMEM;
 
        /*
         * For encryption, we want to read from the cleartext
@@ -708,6 +721,7 @@ gss_krb5_cts_crypt(struct crypto_skcipher *cipher, struct xdr_buf *buf,
        ret = write_bytes_to_xdr_buf(buf, offset, data, len);
 
 out:
+       kfree(data);
        return ret;
 }
 
index d67f7e1bc82ddc55f6f9927343387f20fa81fa32..45662d7f0943c671297955e0c44e7632460c2c68 100644 (file)
@@ -718,30 +718,37 @@ gss_write_null_verf(struct svc_rqst *rqstp)
 static int
 gss_write_verf(struct svc_rqst *rqstp, struct gss_ctx *ctx_id, u32 seq)
 {
-       __be32                  xdr_seq;
+       __be32                  *xdr_seq;
        u32                     maj_stat;
        struct xdr_buf          verf_data;
        struct xdr_netobj       mic;
        __be32                  *p;
        struct kvec             iov;
+       int err = -1;
 
        svc_putnl(rqstp->rq_res.head, RPC_AUTH_GSS);
-       xdr_seq = htonl(seq);
+       xdr_seq = kmalloc(4, GFP_KERNEL);
+       if (!xdr_seq)
+               return -1;
+       *xdr_seq = htonl(seq);
 
-       iov.iov_base = &xdr_seq;
-       iov.iov_len = sizeof(xdr_seq);
+       iov.iov_base = xdr_seq;
+       iov.iov_len = 4;
        xdr_buf_from_iov(&iov, &verf_data);
        p = rqstp->rq_res.head->iov_base + rqstp->rq_res.head->iov_len;
        mic.data = (u8 *)(p + 1);
        maj_stat = gss_get_mic(ctx_id, &verf_data, &mic);
        if (maj_stat != GSS_S_COMPLETE)
-               return -1;
+               goto out;
        *p++ = htonl(mic.len);
        memset((u8 *)p + mic.len, 0, round_up_to_quad(mic.len) - mic.len);
        p += XDR_QUADLEN(mic.len);
        if (!xdr_ressize_check(rqstp, p))
-               return -1;
-       return 0;
+               goto out;
+       err = 0;
+out:
+       kfree(xdr_seq);
+       return err;
 }
 
 struct gss_domain {
index 34dd7b26ee5f16589a46f8c7faa158570ccec9ad..62a482790937b54a5d486bc932289b0fb14da248 100644 (file)
@@ -2753,14 +2753,18 @@ EXPORT_SYMBOL_GPL(rpc_cap_max_reconnect_timeout);
 
 void rpc_clnt_xprt_switch_put(struct rpc_clnt *clnt)
 {
+       rcu_read_lock();
        xprt_switch_put(rcu_dereference(clnt->cl_xpi.xpi_xpswitch));
+       rcu_read_unlock();
 }
 EXPORT_SYMBOL_GPL(rpc_clnt_xprt_switch_put);
 
 void rpc_clnt_xprt_switch_add_xprt(struct rpc_clnt *clnt, struct rpc_xprt *xprt)
 {
+       rcu_read_lock();
        rpc_xprt_switch_add_xprt(rcu_dereference(clnt->cl_xpi.xpi_xpswitch),
                                 xprt);
+       rcu_read_unlock();
 }
 EXPORT_SYMBOL_GPL(rpc_clnt_xprt_switch_add_xprt);
 
@@ -2770,9 +2774,8 @@ bool rpc_clnt_xprt_switch_has_addr(struct rpc_clnt *clnt,
        struct rpc_xprt_switch *xps;
        bool ret;
 
-       xps = rcu_dereference(clnt->cl_xpi.xpi_xpswitch);
-
        rcu_read_lock();
+       xps = rcu_dereference(clnt->cl_xpi.xpi_xpswitch);
        ret = rpc_xprt_switch_has_addr(xps, sap);
        rcu_read_unlock();
        return ret;
index df58268765351ebd1b4376f7504915cd1b9fff6b..394ce523174c914c6e381a780af59de00579c0c1 100644 (file)
@@ -34,7 +34,7 @@ struct sunrpc_net {
        struct proc_dir_entry *use_gssp_proc;
 };
 
-extern int sunrpc_net_id;
+extern unsigned int sunrpc_net_id;
 
 int ip_map_cache_create(struct net *);
 void ip_map_cache_destroy(struct net *);
index ee5d3d253102bf5d81a39f953248a6a6ca7a38d6..d1c330a7953a0bc9bb4e08162baf77740504b80c 100644 (file)
@@ -24,7 +24,7 @@
 
 #include "netns.h"
 
-int sunrpc_net_id;
+unsigned int sunrpc_net_id;
 EXPORT_SYMBOL_GPL(sunrpc_net_id);
 
 static __net_init int sunrpc_init_net(struct net *net)
index c3f652395a80b8ded540bc60fe235ce504e239f7..3bc1d61694cbbbf7a094a1849b747b65760550b2 100644 (file)
@@ -1002,14 +1002,8 @@ static void svc_age_temp_xprts(unsigned long closure)
 void svc_age_temp_xprts_now(struct svc_serv *serv, struct sockaddr *server_addr)
 {
        struct svc_xprt *xprt;
-       struct svc_sock *svsk;
-       struct socket *sock;
        struct list_head *le, *next;
        LIST_HEAD(to_be_closed);
-       struct linger no_linger = {
-               .l_onoff = 1,
-               .l_linger = 0,
-       };
 
        spin_lock_bh(&serv->sv_lock);
        list_for_each_safe(le, next, &serv->sv_tempsocks) {
@@ -1027,10 +1021,7 @@ void svc_age_temp_xprts_now(struct svc_serv *serv, struct sockaddr *server_addr)
                list_del_init(le);
                xprt = list_entry(le, struct svc_xprt, xpt_list);
                dprintk("svc_age_temp_xprts_now: closing %p\n", xprt);
-               svsk = container_of(xprt, struct svc_sock, sk_xprt);
-               sock = svsk->sk_sock;
-               kernel_setsockopt(sock, SOL_SOCKET, SO_LINGER,
-                                 (char *)&no_linger, sizeof(no_linger));
+               xprt->xpt_ops->xpo_kill_temp_xprt(xprt);
                svc_close_xprt(xprt);
        }
 }
index e2a55dc787e641144fcf4613a3e6abfb55909b50..135ec2c11b3bff23732bd1850cdd65a5ca5d5ab6 100644 (file)
@@ -451,6 +451,21 @@ static int svc_tcp_has_wspace(struct svc_xprt *xprt)
        return !test_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
 }
 
+static void svc_tcp_kill_temp_xprt(struct svc_xprt *xprt)
+{
+       struct svc_sock *svsk;
+       struct socket *sock;
+       struct linger no_linger = {
+               .l_onoff = 1,
+               .l_linger = 0,
+       };
+
+       svsk = container_of(xprt, struct svc_sock, sk_xprt);
+       sock = svsk->sk_sock;
+       kernel_setsockopt(sock, SOL_SOCKET, SO_LINGER,
+                         (char *)&no_linger, sizeof(no_linger));
+}
+
 /*
  * See net/ipv6/ip_sockglue.c : ip_cmsg_recv_pktinfo
  */
@@ -547,7 +562,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
        err = kernel_recvmsg(svsk->sk_sock, &msg, NULL,
                             0, 0, MSG_PEEK | MSG_DONTWAIT);
        if (err >= 0)
-               skb = skb_recv_datagram(svsk->sk_sk, 0, 1, &err);
+               skb = skb_recv_udp(svsk->sk_sk, 0, 1, &err);
 
        if (skb == NULL) {
                if (err != -EAGAIN) {
@@ -660,6 +675,10 @@ static struct svc_xprt *svc_udp_accept(struct svc_xprt *xprt)
        return NULL;
 }
 
+static void svc_udp_kill_temp_xprt(struct svc_xprt *xprt)
+{
+}
+
 static struct svc_xprt *svc_udp_create(struct svc_serv *serv,
                                       struct net *net,
                                       struct sockaddr *sa, int salen,
@@ -679,6 +698,7 @@ static struct svc_xprt_ops svc_udp_ops = {
        .xpo_has_wspace = svc_udp_has_wspace,
        .xpo_accept = svc_udp_accept,
        .xpo_secure_port = svc_sock_secure_port,
+       .xpo_kill_temp_xprt = svc_udp_kill_temp_xprt,
 };
 
 static struct svc_xprt_class svc_udp_class = {
@@ -1254,6 +1274,7 @@ static struct svc_xprt_ops svc_tcp_ops = {
        .xpo_has_wspace = svc_tcp_has_wspace,
        .xpo_accept = svc_tcp_accept,
        .xpo_secure_port = svc_sock_secure_port,
+       .xpo_kill_temp_xprt = svc_tcp_kill_temp_xprt,
 };
 
 static struct svc_xprt_class svc_tcp_class = {
index 210949562786665ea1a2990e2e9e6f886e580af6..26b26beef2d4a6dd7ef9d31f09de7fe51504d841 100644 (file)
  * being done.
  *
  * When the underlying transport disconnects, MRs are left in one of
- * three states:
+ * four states:
  *
  * INVALID:    The MR was not in use before the QP entered ERROR state.
- *             (Or, the LOCAL_INV WR has not completed or flushed yet).
- *
- * STALE:      The MR was being registered or unregistered when the QP
- *             entered ERROR state, and the pending WR was flushed.
  *
  * VALID:      The MR was registered before the QP entered ERROR state.
  *
- * When frwr_op_map encounters STALE and VALID MRs, they are recovered
- * with ib_dereg_mr and then are re-initialized. Beause MR recovery
+ * FLUSHED_FR: The MR was being registered when the QP entered ERROR
+ *             state, and the pending WR was flushed.
+ *
+ * FLUSHED_LI: The MR was being invalidated when the QP entered ERROR
+ *             state, and the pending WR was flushed.
+ *
+ * When frwr_op_map encounters FLUSHED and VALID MRs, they are recovered
+ * with ib_dereg_mr and then are re-initialized. Because MR recovery
  * allocates fresh resources, it is deferred to a workqueue, and the
  * recovered MRs are placed back on the rb_mws list when recovery is
  * complete. frwr_op_map allocates another MR for the current RPC while
@@ -177,12 +179,15 @@ __frwr_reset_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r)
 static void
 frwr_op_recover_mr(struct rpcrdma_mw *mw)
 {
+       enum rpcrdma_frmr_state state = mw->frmr.fr_state;
        struct rpcrdma_xprt *r_xprt = mw->mw_xprt;
        struct rpcrdma_ia *ia = &r_xprt->rx_ia;
        int rc;
 
        rc = __frwr_reset_mr(ia, mw);
-       ib_dma_unmap_sg(ia->ri_device, mw->mw_sg, mw->mw_nents, mw->mw_dir);
+       if (state != FRMR_FLUSHED_LI)
+               ib_dma_unmap_sg(ia->ri_device,
+                               mw->mw_sg, mw->mw_nents, mw->mw_dir);
        if (rc)
                goto out_release;
 
@@ -262,10 +267,8 @@ frwr_op_maxpages(struct rpcrdma_xprt *r_xprt)
 }
 
 static void
-__frwr_sendcompletion_flush(struct ib_wc *wc, struct rpcrdma_frmr *frmr,
-                           const char *wr)
+__frwr_sendcompletion_flush(struct ib_wc *wc, const char *wr)
 {
-       frmr->fr_state = FRMR_IS_STALE;
        if (wc->status != IB_WC_WR_FLUSH_ERR)
                pr_err("rpcrdma: %s: %s (%u/0x%x)\n",
                       wr, ib_wc_status_msg(wc->status),
@@ -288,7 +291,8 @@ frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc)
        if (wc->status != IB_WC_SUCCESS) {
                cqe = wc->wr_cqe;
                frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe);
-               __frwr_sendcompletion_flush(wc, frmr, "fastreg");
+               frmr->fr_state = FRMR_FLUSHED_FR;
+               __frwr_sendcompletion_flush(wc, "fastreg");
        }
 }
 
@@ -308,7 +312,8 @@ frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc)
        if (wc->status != IB_WC_SUCCESS) {
                cqe = wc->wr_cqe;
                frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe);
-               __frwr_sendcompletion_flush(wc, frmr, "localinv");
+               frmr->fr_state = FRMR_FLUSHED_LI;
+               __frwr_sendcompletion_flush(wc, "localinv");
        }
 }
 
@@ -328,8 +333,10 @@ frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
        /* WARNING: Only wr_cqe and status are reliable at this point */
        cqe = wc->wr_cqe;
        frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe);
-       if (wc->status != IB_WC_SUCCESS)
-               __frwr_sendcompletion_flush(wc, frmr, "localinv");
+       if (wc->status != IB_WC_SUCCESS) {
+               frmr->fr_state = FRMR_FLUSHED_LI;
+               __frwr_sendcompletion_flush(wc, "localinv");
+       }
        complete(&frmr->fr_linv_done);
 }
 
index 2d8545c3409596190d027e8ad73651903f25a6aa..20027f8de129e61faba9037d552eb826ecc34ae4 100644 (file)
@@ -177,18 +177,26 @@ xprt_rdma_bc_allocate(struct rpc_task *task)
                return -EINVAL;
        }
 
+       /* svc_rdma_sendto releases this page */
        page = alloc_page(RPCRDMA_DEF_GFP);
        if (!page)
                return -ENOMEM;
-
        rqst->rq_buffer = page_address(page);
+
+       rqst->rq_rbuffer = kmalloc(rqst->rq_rcvsize, RPCRDMA_DEF_GFP);
+       if (!rqst->rq_rbuffer) {
+               put_page(page);
+               return -ENOMEM;
+       }
        return 0;
 }
 
 static void
 xprt_rdma_bc_free(struct rpc_task *task)
 {
-       /* No-op: ctxt and page have already been freed. */
+       struct rpc_rqst *rqst = task->tk_rqstp;
+
+       kfree(rqst->rq_rbuffer);
 }
 
 static int
index 6864fb967038d3bc8c410502f11a56aa442c661e..1334de2715c28112bf4f4b77600e2e78218dc853 100644 (file)
@@ -67,6 +67,7 @@ static void svc_rdma_detach(struct svc_xprt *xprt);
 static void svc_rdma_free(struct svc_xprt *xprt);
 static int svc_rdma_has_wspace(struct svc_xprt *xprt);
 static int svc_rdma_secure_port(struct svc_rqst *);
+static void svc_rdma_kill_temp_xprt(struct svc_xprt *);
 
 static struct svc_xprt_ops svc_rdma_ops = {
        .xpo_create = svc_rdma_create,
@@ -79,6 +80,7 @@ static struct svc_xprt_ops svc_rdma_ops = {
        .xpo_has_wspace = svc_rdma_has_wspace,
        .xpo_accept = svc_rdma_accept,
        .xpo_secure_port = svc_rdma_secure_port,
+       .xpo_kill_temp_xprt = svc_rdma_kill_temp_xprt,
 };
 
 struct svc_xprt_class svc_rdma_class = {
@@ -1317,6 +1319,10 @@ static int svc_rdma_secure_port(struct svc_rqst *rqstp)
        return 1;
 }
 
+static void svc_rdma_kill_temp_xprt(struct svc_xprt *xprt)
+{
+}
+
 int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)
 {
        struct ib_send_wr *bad_wr, *n_wr;
index 0d35b761c883d01a044ba5f9c635bb3a7edce741..6e1bba358203694e79cbc9074554b12053fa45c7 100644 (file)
@@ -216,7 +216,8 @@ struct rpcrdma_rep {
 enum rpcrdma_frmr_state {
        FRMR_IS_INVALID,        /* ready to be used */
        FRMR_IS_VALID,          /* in use */
-       FRMR_IS_STALE,          /* failed completion */
+       FRMR_FLUSHED_FR,        /* flushed FASTREG WR */
+       FRMR_FLUSHED_LI,        /* flushed LOCALINV WR */
 };
 
 struct rpcrdma_frmr {
index 1758665d609caec0763ff6898e11dea4ed9cf963..af392d9b9ceca5beb848175ec811388e706a09a7 100644 (file)
@@ -1080,7 +1080,7 @@ static void xs_udp_data_receive(struct sock_xprt *transport)
        if (sk == NULL)
                goto out;
        for (;;) {
-               skb = skb_recv_datagram(sk, 0, 1, &err);
+               skb = skb_recv_udp(sk, 0, 1, &err);
                if (skb != NULL) {
                        xs_udp_data_read_skb(&transport->xprt, sk, skb);
                        consume_skb(skb);
@@ -2563,6 +2563,7 @@ static int bc_malloc(struct rpc_task *task)
        buf->len = PAGE_SIZE;
 
        rqst->rq_buffer = buf->data;
+       rqst->rq_rbuffer = (char *)rqst->rq_buffer + rqst->rq_callsize;
        return 0;
 }
 
index 02beb35f577fca17e8989c7b63699a62bef8797d..017801f9dbaae28ff3bd411af3fce3a84da6c90d 100644 (file)
@@ -624,13 +624,10 @@ EXPORT_SYMBOL_GPL(unregister_switchdev_notifier);
 int call_switchdev_notifiers(unsigned long val, struct net_device *dev,
                             struct switchdev_notifier_info *info)
 {
-       int err;
-
        ASSERT_RTNL();
 
        info->dev = dev;
-       err = raw_notifier_call_chain(&switchdev_notif_chain, val, info);
-       return err;
+       return raw_notifier_call_chain(&switchdev_notif_chain, val, info);
 }
 EXPORT_SYMBOL_GPL(call_switchdev_notifiers);
 
@@ -771,6 +768,9 @@ int switchdev_port_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
        u32 mask = BR_LEARNING | BR_LEARNING_SYNC | BR_FLOOD;
        int err;
 
+       if (!netif_is_bridge_port(dev))
+               return -EOPNOTSUPP;
+
        err = switchdev_port_attr_get(dev, &attr);
        if (err && err != -EOPNOTSUPP)
                return err;
@@ -926,6 +926,9 @@ int switchdev_port_bridge_setlink(struct net_device *dev,
        struct nlattr *afspec;
        int err = 0;
 
+       if (!netif_is_bridge_port(dev))
+               return -EOPNOTSUPP;
+
        protinfo = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg),
                                   IFLA_PROTINFO);
        if (protinfo) {
@@ -959,6 +962,9 @@ int switchdev_port_bridge_dellink(struct net_device *dev,
 {
        struct nlattr *afspec;
 
+       if (!netif_is_bridge_port(dev))
+               return -EOPNOTSUPP;
+
        afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg),
                                 IFLA_AF_SPEC);
        if (afspec)
index 753f774cb46f39a7280515ca64c8c41897d9a480..aa1babbea385348f1ecd4b7467079fc442653094 100644 (file)
@@ -247,11 +247,17 @@ int tipc_bcast_rcv(struct net *net, struct tipc_link *l, struct sk_buff *skb)
  *
  * RCU is locked, no other locks set
  */
-void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l, u32 acked)
+void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l,
+                       struct tipc_msg *hdr)
 {
        struct sk_buff_head *inputq = &tipc_bc_base(net)->inputq;
+       u16 acked = msg_bcast_ack(hdr);
        struct sk_buff_head xmitq;
 
+       /* Ignore bc acks sent by peer before bcast synch point was received */
+       if (msg_bc_ack_invalid(hdr))
+               return;
+
        __skb_queue_head_init(&xmitq);
 
        tipc_bcast_lock(net);
@@ -279,11 +285,11 @@ int tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l,
        __skb_queue_head_init(&xmitq);
 
        tipc_bcast_lock(net);
-       if (msg_type(hdr) == STATE_MSG) {
+       if (msg_type(hdr) != STATE_MSG) {
+               tipc_link_bc_init_rcv(l, hdr);
+       } else if (!msg_bc_ack_invalid(hdr)) {
                tipc_link_bc_ack_rcv(l, msg_bcast_ack(hdr), &xmitq);
                rc = tipc_link_bc_sync_rcv(l, hdr, &xmitq);
-       } else {
-               tipc_link_bc_init_rcv(l, hdr);
        }
        tipc_bcast_unlock(net);
 
index 5ffe34472ccd091d19e92137c379191b0d596844..855d53c64ab347ec5b37dc06a39e10748f7bf388 100644 (file)
@@ -55,7 +55,8 @@ void tipc_bcast_dec_bearer_dst_cnt(struct net *net, int bearer_id);
 int  tipc_bcast_get_mtu(struct net *net);
 int tipc_bcast_xmit(struct net *net, struct sk_buff_head *list);
 int tipc_bcast_rcv(struct net *net, struct tipc_link *l, struct sk_buff *skb);
-void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l, u32 acked);
+void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l,
+                       struct tipc_msg *hdr);
 int tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l,
                        struct tipc_msg *hdr);
 int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg);
index 236b043a4156df151b630e979b0e8fd71addcea4..0b982d048fb9b36c8bec876c32361f264e5dac9e 100644 (file)
@@ -47,7 +47,7 @@
 #include <linux/module.h>
 
 /* configurable TIPC parameters */
-int tipc_net_id __read_mostly;
+unsigned int tipc_net_id __read_mostly;
 int sysctl_tipc_rmem[3] __read_mostly; /* min/default/max */
 
 static int __net_init tipc_init_net(struct net *net)
index a1845fb27d803973fc4a0a7a7b2a88f8b88c343c..5cc5398be7225aaaf81f0f9784ce139d9df68b74 100644 (file)
@@ -74,7 +74,7 @@ struct tipc_monitor;
 #define MAX_BEARERS             3
 #define TIPC_DEF_MON_THRESHOLD  32
 
-extern int tipc_net_id __read_mostly;
+extern unsigned int tipc_net_id __read_mostly;
 extern int sysctl_tipc_rmem[3] __read_mostly;
 extern int sysctl_tipc_named_timeout __read_mostly;
 
index b36e16cdc945230f0460f46ef5c4fb60e8b8c745..ecc12411155ea0ec7eeb94d5259ab901029b4d0c 100644 (file)
@@ -1312,6 +1312,7 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
        msg_set_next_sent(hdr, l->snd_nxt);
        msg_set_ack(hdr, l->rcv_nxt - 1);
        msg_set_bcast_ack(hdr, bcl->rcv_nxt - 1);
+       msg_set_bc_ack_invalid(hdr, !node_up);
        msg_set_last_bcast(hdr, l->bc_sndlink->snd_nxt - 1);
        msg_set_link_tolerance(hdr, tolerance);
        msg_set_linkprio(hdr, priority);
@@ -1491,8 +1492,9 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
                if (in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL))
                        l->tolerance = peers_tol;
 
-               if (peers_prio && in_range(peers_prio, TIPC_MIN_LINK_PRI,
-                                          TIPC_MAX_LINK_PRI)) {
+               /* Update own prio if peer indicates a different value */
+               if ((peers_prio != l->priority) &&
+                   in_range(peers_prio, 1, TIPC_MAX_LINK_PRI)) {
                        l->priority = peers_prio;
                        rc = tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
                }
@@ -1574,6 +1576,7 @@ static void tipc_link_build_bc_init_msg(struct tipc_link *l,
        __skb_queue_head_init(&list);
        if (!tipc_link_build_bc_proto_msg(l->bc_rcvlink, false, 0, &list))
                return;
+       msg_set_bc_ack_invalid(buf_msg(skb_peek(&list)), true);
        tipc_link_xmit(l, &list, xmitq);
 }
 
index ed97a5876ebef128937906d4115d3c1db6d16998..9e109bb1a2071836323d41b77e903644b28c7b73 100644 (file)
@@ -455,14 +455,14 @@ void tipc_mon_rcv(struct net *net, void *data, u16 dlen, u32 addr,
        int i, applied_bef;
 
        state->probing = false;
-       if (!dlen)
-               return;
 
        /* Sanity check received domain record */
-       if ((dlen < new_dlen) || ntohs(arrv_dom->len) != new_dlen) {
-               pr_warn_ratelimited("Received illegal domain record\n");
+       if (dlen < dom_rec_len(arrv_dom, 0))
+               return;
+       if (dlen != dom_rec_len(arrv_dom, new_member_cnt))
+               return;
+       if ((dlen < new_dlen) || ntohs(arrv_dom->len) != new_dlen)
                return;
-       }
 
        /* Synch generation numbers with peer if link just came up */
        if (!state->synched) {
index c3832cdf2278a3a49dd6624d350ba34096764897..8d408612ffa490521e310b3c643f2407a6c822f3 100644 (file)
@@ -95,7 +95,7 @@ struct plist;
 #define TIPC_MEDIA_INFO_OFFSET 5
 
 struct tipc_skb_cb {
-       void *handle;
+       u32 bytes_read;
        struct sk_buff *tail;
        bool validated;
        bool wakeup_pending;
@@ -714,6 +714,23 @@ static inline void msg_set_peer_stopping(struct tipc_msg *m, u32 s)
        msg_set_bits(m, 5, 13, 0x1, s);
 }
 
+static inline bool msg_bc_ack_invalid(struct tipc_msg *m)
+{
+       switch (msg_user(m)) {
+       case BCAST_PROTOCOL:
+       case NAME_DISTRIBUTOR:
+       case LINK_PROTOCOL:
+               return msg_bits(m, 5, 14, 0x1);
+       default:
+               return false;
+       }
+}
+
+static inline void msg_set_bc_ack_invalid(struct tipc_msg *m, bool invalid)
+{
+       msg_set_bits(m, 5, 14, 0x1, invalid);
+}
+
 static inline char *msg_media_addr(struct tipc_msg *m)
 {
        return (char *)&m->hdr[TIPC_MEDIA_INFO_OFFSET];
index a04fe9be1c60e2a7c1cb2f90c80731e08dcc910d..c1cfd92de17aee30a310305707a70ecb87fd2548 100644 (file)
@@ -156,6 +156,7 @@ static void named_distribute(struct net *net, struct sk_buff_head *list,
                                pr_warn("Bulk publication failure\n");
                                return;
                        }
+                       msg_set_bc_ack_invalid(buf_msg(skb), true);
                        item = (struct distr_item *)msg_data(buf_msg(skb));
                }
 
index 3200059d14b2b60b1e5728bc2f5bd00c59d35f4b..26ca8dd64ded64407db8ef885f8dbb6edd30e4b8 100644 (file)
@@ -135,15 +135,6 @@ const struct nla_policy tipc_nl_udp_policy[TIPC_NLA_UDP_MAX + 1] = {
 /* Users of the legacy API (tipc-config) can't handle that we add operations,
  * so we have a separate genl handling for the new API.
  */
-struct genl_family tipc_genl_family = {
-       .id             = GENL_ID_GENERATE,
-       .name           = TIPC_GENL_V2_NAME,
-       .version        = TIPC_GENL_V2_VERSION,
-       .hdrsize        = 0,
-       .maxattr        = TIPC_NLA_MAX,
-       .netnsok        = true,
-};
-
 static const struct genl_ops tipc_genl_v2_ops[] = {
        {
                .cmd    = TIPC_NL_BEARER_DISABLE,
@@ -258,23 +249,33 @@ static const struct genl_ops tipc_genl_v2_ops[] = {
 #endif
 };
 
+struct genl_family tipc_genl_family __ro_after_init = {
+       .name           = TIPC_GENL_V2_NAME,
+       .version        = TIPC_GENL_V2_VERSION,
+       .hdrsize        = 0,
+       .maxattr        = TIPC_NLA_MAX,
+       .netnsok        = true,
+       .module         = THIS_MODULE,
+       .ops            = tipc_genl_v2_ops,
+       .n_ops          = ARRAY_SIZE(tipc_genl_v2_ops),
+};
+
 int tipc_nlmsg_parse(const struct nlmsghdr *nlh, struct nlattr ***attr)
 {
        u32 maxattr = tipc_genl_family.maxattr;
 
-       *attr = tipc_genl_family.attrbuf;
+       *attr = genl_family_attrbuf(&tipc_genl_family);
        if (!*attr)
                return -EOPNOTSUPP;
 
        return nlmsg_parse(nlh, GENL_HDRLEN, *attr, maxattr, tipc_nl_policy);
 }
 
-int tipc_netlink_start(void)
+int __init tipc_netlink_start(void)
 {
        int res;
 
-       res = genl_register_family_with_ops(&tipc_genl_family,
-                                           tipc_genl_v2_ops);
+       res = genl_register_family(&tipc_genl_family);
        if (res) {
                pr_err("Failed to register netlink interface\n");
                return res;
index 1fd4647647650b75f17f41d19d288be7abe436a3..e1ae8a8a2b8eacf93224cc332fff4a537d8d1ab0 100644 (file)
@@ -1215,15 +1215,6 @@ send:
        return err;
 }
 
-static struct genl_family tipc_genl_compat_family = {
-       .id             = GENL_ID_GENERATE,
-       .name           = TIPC_GENL_NAME,
-       .version        = TIPC_GENL_VERSION,
-       .hdrsize        = TIPC_GENL_HDRLEN,
-       .maxattr        = 0,
-       .netnsok        = true,
-};
-
 static struct genl_ops tipc_genl_compat_ops[] = {
        {
                .cmd            = TIPC_GENL_CMD,
@@ -1231,12 +1222,22 @@ static struct genl_ops tipc_genl_compat_ops[] = {
        },
 };
 
-int tipc_netlink_compat_start(void)
+static struct genl_family tipc_genl_compat_family __ro_after_init = {
+       .name           = TIPC_GENL_NAME,
+       .version        = TIPC_GENL_VERSION,
+       .hdrsize        = TIPC_GENL_HDRLEN,
+       .maxattr        = 0,
+       .netnsok        = true,
+       .module         = THIS_MODULE,
+       .ops            = tipc_genl_compat_ops,
+       .n_ops          = ARRAY_SIZE(tipc_genl_compat_ops),
+};
+
+int __init tipc_netlink_compat_start(void)
 {
        int res;
 
-       res = genl_register_family_with_ops(&tipc_genl_compat_family,
-                                           tipc_genl_compat_ops);
+       res = genl_register_family(&tipc_genl_compat_family);
        if (res) {
                pr_err("Failed to register legacy compat interface\n");
                return res;
index 7ef14e2d2356590d72284e3ef056d63dee3d1b12..9d2f4c2b08abc56ecb627ff067ad359c54e735fd 100644 (file)
@@ -1535,7 +1535,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b)
        if (unlikely(usr == LINK_PROTOCOL))
                tipc_node_bc_sync_rcv(n, hdr, bearer_id, &xmitq);
        else if (unlikely(tipc_link_acked(n->bc_entry.link) != bc_ack))
-               tipc_bcast_ack_rcv(net, n->bc_entry.link, bc_ack);
+               tipc_bcast_ack_rcv(net, n->bc_entry.link, hdr);
 
        /* Receive packet directly if conditions permit */
        tipc_node_read_lock(n);
index f9f5f3c3dab530c0b798d314873800500ccc30b5..333c5dae0072aa3f7a517ec3f345a4a1f60ca5ad 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * net/tipc/socket.c: TIPC socket API
  *
- * Copyright (c) 2001-2007, 2012-2015, Ericsson AB
+ * Copyright (c) 2001-2007, 2012-2016, Ericsson AB
  * Copyright (c) 2004-2008, 2010-2013, Wind River Systems
  * All rights reserved.
  *
 #include "bcast.h"
 #include "netlink.h"
 
-#define SS_LISTENING           -1      /* socket is listening */
-#define SS_READY               -2      /* socket is connectionless */
-
 #define CONN_TIMEOUT_DEFAULT   8000    /* default connect timeout = 8s */
 #define CONN_PROBING_INTERVAL  msecs_to_jiffies(3600000)  /* [ms] => 1 h */
 #define TIPC_FWD_MSG           1
-#define TIPC_CONN_OK           0
-#define TIPC_CONN_PROBING      1
 #define TIPC_MAX_PORT          0xffffffff
 #define TIPC_MIN_PORT          1
 
+enum {
+       TIPC_LISTEN = TCP_LISTEN,
+       TIPC_ESTABLISHED = TCP_ESTABLISHED,
+       TIPC_OPEN = TCP_CLOSE,
+       TIPC_DISCONNECTING = TCP_CLOSE_WAIT,
+       TIPC_CONNECTING = TCP_SYN_SENT,
+};
+
 /**
  * struct tipc_sock - TIPC socket structure
  * @sk: socket - interacts with 'port' and with user via the socket API
- * @connected: non-zero if port is currently connected to a peer port
  * @conn_type: TIPC type used when connection was established
  * @conn_instance: TIPC instance used when connection was established
  * @published: non-zero if port has one or more associated names
  * @max_pkt: maximum packet size "hint" used when building messages sent by port
  * @portid: unique port identity in TIPC socket hash table
  * @phdr: preformatted message header used when sending messages
- * @port_list: adjacent ports in TIPC's global list of ports
  * @publications: list of publications for port
  * @pub_count: total # of publications port has made during its lifetime
  * @probing_state:
- * @probing_intv:
  * @conn_timeout: the time we can wait for an unresponded setup request
  * @dupl_rcvcnt: number of bytes counted twice, in both backlog and rcv queue
  * @link_cong: non-zero if owner must sleep because of link congestion
  * @sent_unacked: # messages sent by socket, and not yet acked by peer
  * @rcv_unacked: # messages read by user, but not yet acked back to peer
- * @remote: 'connected' peer for dgram/rdm
+ * @peer: 'connected' peer for dgram/rdm
  * @node: hash table node
  * @rcu: rcu struct for tipc_sock
  */
 struct tipc_sock {
        struct sock sk;
-       int connected;
        u32 conn_type;
        u32 conn_instance;
        int published;
@@ -91,17 +90,16 @@ struct tipc_sock {
        struct list_head sock_list;
        struct list_head publications;
        u32 pub_count;
-       u32 probing_state;
-       unsigned long probing_intv;
        uint conn_timeout;
        atomic_t dupl_rcvcnt;
+       bool probe_unacked;
        bool link_cong;
        u16 snt_unacked;
        u16 snd_win;
        u16 peer_caps;
        u16 rcv_unacked;
        u16 rcv_win;
-       struct sockaddr_tipc remote;
+       struct sockaddr_tipc peer;
        struct rhash_head node;
        struct rcu_head rcu;
 };
@@ -129,54 +127,8 @@ static const struct proto_ops packet_ops;
 static const struct proto_ops stream_ops;
 static const struct proto_ops msg_ops;
 static struct proto tipc_proto;
-
 static const struct rhashtable_params tsk_rht_params;
 
-/*
- * Revised TIPC socket locking policy:
- *
- * Most socket operations take the standard socket lock when they start
- * and hold it until they finish (or until they need to sleep).  Acquiring
- * this lock grants the owner exclusive access to the fields of the socket
- * data structures, with the exception of the backlog queue.  A few socket
- * operations can be done without taking the socket lock because they only
- * read socket information that never changes during the life of the socket.
- *
- * Socket operations may acquire the lock for the associated TIPC port if they
- * need to perform an operation on the port.  If any routine needs to acquire
- * both the socket lock and the port lock it must take the socket lock first
- * to avoid the risk of deadlock.
- *
- * The dispatcher handling incoming messages cannot grab the socket lock in
- * the standard fashion, since invoked it runs at the BH level and cannot block.
- * Instead, it checks to see if the socket lock is currently owned by someone,
- * and either handles the message itself or adds it to the socket's backlog
- * queue; in the latter case the queued message is processed once the process
- * owning the socket lock releases it.
- *
- * NOTE: Releasing the socket lock while an operation is sleeping overcomes
- * the problem of a blocked socket operation preventing any other operations
- * from occurring.  However, applications must be careful if they have
- * multiple threads trying to send (or receive) on the same socket, as these
- * operations might interfere with each other.  For example, doing a connect
- * and a receive at the same time might allow the receive to consume the
- * ACK message meant for the connect.  While additional work could be done
- * to try and overcome this, it doesn't seem to be worthwhile at the present.
- *
- * NOTE: Releasing the socket lock while an operation is sleeping also ensures
- * that another operation that must be performed in a non-blocking manner is
- * not delayed for very long because the lock has already been taken.
- *
- * NOTE: This code assumes that certain fields of a port/socket pair are
- * constant over its lifetime; such fields can be examined without taking
- * the socket lock and/or port lock, and do not need to be re-read even
- * after resuming processing after waiting.  These fields include:
- *   - socket type
- *   - pointer to socket sk structure (aka tipc_sock structure)
- *   - pointer to port structure
- *   - port reference
- */
-
 static u32 tsk_own_node(struct tipc_sock *tsk)
 {
        return msg_prevnode(&tsk->phdr);
@@ -232,7 +184,7 @@ static struct tipc_sock *tipc_sk(const struct sock *sk)
 
 static bool tsk_conn_cong(struct tipc_sock *tsk)
 {
-       return tsk->snt_unacked >= tsk->snd_win;
+       return tsk->snt_unacked > tsk->snd_win;
 }
 
 /* tsk_blocks(): translate a buffer size in bytes to number of
@@ -294,6 +246,21 @@ static void tsk_rej_rx_queue(struct sock *sk)
                tipc_sk_respond(sk, skb, TIPC_ERR_NO_PORT);
 }
 
+static bool tipc_sk_connected(struct sock *sk)
+{
+       return sk->sk_state == TIPC_ESTABLISHED;
+}
+
+/* tipc_sk_type_connectionless - check if the socket is datagram socket
+ * @sk: socket
+ *
+ * Returns true if connection less, false otherwise
+ */
+static bool tipc_sk_type_connectionless(struct sock *sk)
+{
+       return sk->sk_type == SOCK_RDM || sk->sk_type == SOCK_DGRAM;
+}
+
 /* tsk_peer_msg - verify if message was sent by connected port's peer
  *
  * Handles cases where the node's network address has changed from
@@ -301,12 +268,13 @@ static void tsk_rej_rx_queue(struct sock *sk)
  */
 static bool tsk_peer_msg(struct tipc_sock *tsk, struct tipc_msg *msg)
 {
-       struct tipc_net *tn = net_generic(sock_net(&tsk->sk), tipc_net_id);
+       struct sock *sk = &tsk->sk;
+       struct tipc_net *tn = net_generic(sock_net(sk), tipc_net_id);
        u32 peer_port = tsk_peer_port(tsk);
        u32 orig_node;
        u32 peer_node;
 
-       if (unlikely(!tsk->connected))
+       if (unlikely(!tipc_sk_connected(sk)))
                return false;
 
        if (unlikely(msg_origport(msg) != peer_port))
@@ -327,6 +295,45 @@ static bool tsk_peer_msg(struct tipc_sock *tsk, struct tipc_msg *msg)
        return false;
 }
 
+/* tipc_set_sk_state - set the sk_state of the socket
+ * @sk: socket
+ *
+ * Caller must hold socket lock
+ *
+ * Returns 0 on success, errno otherwise
+ */
+static int tipc_set_sk_state(struct sock *sk, int state)
+{
+       int oldsk_state = sk->sk_state;
+       int res = -EINVAL;
+
+       switch (state) {
+       case TIPC_OPEN:
+               res = 0;
+               break;
+       case TIPC_LISTEN:
+       case TIPC_CONNECTING:
+               if (oldsk_state == TIPC_OPEN)
+                       res = 0;
+               break;
+       case TIPC_ESTABLISHED:
+               if (oldsk_state == TIPC_CONNECTING ||
+                   oldsk_state == TIPC_OPEN)
+                       res = 0;
+               break;
+       case TIPC_DISCONNECTING:
+               if (oldsk_state == TIPC_CONNECTING ||
+                   oldsk_state == TIPC_ESTABLISHED)
+                       res = 0;
+               break;
+       }
+
+       if (!res)
+               sk->sk_state = state;
+
+       return res;
+}
+
 /**
  * tipc_sk_create - create a TIPC socket
  * @net: network namespace (must be default network)
@@ -344,7 +351,6 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
 {
        struct tipc_net *tn;
        const struct proto_ops *ops;
-       socket_state state;
        struct sock *sk;
        struct tipc_sock *tsk;
        struct tipc_msg *msg;
@@ -356,16 +362,13 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
        switch (sock->type) {
        case SOCK_STREAM:
                ops = &stream_ops;
-               state = SS_UNCONNECTED;
                break;
        case SOCK_SEQPACKET:
                ops = &packet_ops;
-               state = SS_UNCONNECTED;
                break;
        case SOCK_DGRAM:
        case SOCK_RDM:
                ops = &msg_ops;
-               state = SS_READY;
                break;
        default:
                return -EPROTOTYPE;
@@ -386,14 +389,15 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
 
        /* Finish initializing socket data structures */
        sock->ops = ops;
-       sock->state = state;
        sock_init_data(sock, sk);
+       tipc_set_sk_state(sk, TIPC_OPEN);
        if (tipc_sk_insert(tsk)) {
                pr_warn("Socket create failed; port number exhausted\n");
                return -EINVAL;
        }
        msg_set_origport(msg, tsk->portid);
        setup_timer(&sk->sk_timer, tipc_sk_timeout, (unsigned long)tsk);
+       sk->sk_shutdown = 0;
        sk->sk_backlog_rcv = tipc_backlog_rcv;
        sk->sk_rcvbuf = sysctl_tipc_rmem[1];
        sk->sk_data_ready = tipc_data_ready;
@@ -406,11 +410,12 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
        tsk->snd_win = tsk_adv_blocks(RCVBUF_MIN);
        tsk->rcv_win = tsk->snd_win;
 
-       if (sock->state == SS_READY) {
+       if (tipc_sk_type_connectionless(sk)) {
                tsk_set_unreturnable(tsk, true);
                if (sock->type == SOCK_DGRAM)
                        tsk_set_unreliable(tsk, true);
        }
+
        return 0;
 }
 
@@ -421,6 +426,44 @@ static void tipc_sk_callback(struct rcu_head *head)
        sock_put(&tsk->sk);
 }
 
+/* Caller should hold socket lock for the socket. */
+static void __tipc_shutdown(struct socket *sock, int error)
+{
+       struct sock *sk = sock->sk;
+       struct tipc_sock *tsk = tipc_sk(sk);
+       struct net *net = sock_net(sk);
+       u32 dnode = tsk_peer_node(tsk);
+       struct sk_buff *skb;
+
+       /* Reject all unreceived messages, except on an active connection
+        * (which disconnects locally & sends a 'FIN+' to peer).
+        */
+       while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
+               if (TIPC_SKB_CB(skb)->bytes_read) {
+                       kfree_skb(skb);
+               } else {
+                       if (!tipc_sk_type_connectionless(sk) &&
+                           sk->sk_state != TIPC_DISCONNECTING) {
+                               tipc_set_sk_state(sk, TIPC_DISCONNECTING);
+                               tipc_node_remove_conn(net, dnode, tsk->portid);
+                       }
+                       tipc_sk_respond(sk, skb, error);
+               }
+       }
+       if (sk->sk_state != TIPC_DISCONNECTING) {
+               skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE,
+                                     TIPC_CONN_MSG, SHORT_H_SIZE, 0, dnode,
+                                     tsk_own_node(tsk), tsk_peer_port(tsk),
+                                     tsk->portid, error);
+               if (skb)
+                       tipc_node_xmit_skb(net, skb, dnode, tsk->portid);
+               if (!tipc_sk_type_connectionless(sk)) {
+                       tipc_node_remove_conn(net, dnode, tsk->portid);
+                       tipc_set_sk_state(sk, TIPC_DISCONNECTING);
+               }
+       }
+}
+
 /**
  * tipc_release - destroy a TIPC socket
  * @sock: socket to destroy
@@ -440,10 +483,7 @@ static void tipc_sk_callback(struct rcu_head *head)
 static int tipc_release(struct socket *sock)
 {
        struct sock *sk = sock->sk;
-       struct net *net;
        struct tipc_sock *tsk;
-       struct sk_buff *skb;
-       u32 dnode;
 
        /*
         * Exit if socket isn't fully initialized (occurs when a failed accept()
@@ -452,47 +492,16 @@ static int tipc_release(struct socket *sock)
        if (sk == NULL)
                return 0;
 
-       net = sock_net(sk);
        tsk = tipc_sk(sk);
        lock_sock(sk);
 
-       /*
-        * Reject all unreceived messages, except on an active connection
-        * (which disconnects locally & sends a 'FIN+' to peer)
-        */
-       dnode = tsk_peer_node(tsk);
-       while (sock->state != SS_DISCONNECTING) {
-               skb = __skb_dequeue(&sk->sk_receive_queue);
-               if (skb == NULL)
-                       break;
-               if (TIPC_SKB_CB(skb)->handle != NULL)
-                       kfree_skb(skb);
-               else {
-                       if ((sock->state == SS_CONNECTING) ||
-                           (sock->state == SS_CONNECTED)) {
-                               sock->state = SS_DISCONNECTING;
-                               tsk->connected = 0;
-                               tipc_node_remove_conn(net, dnode, tsk->portid);
-                       }
-                       tipc_sk_respond(sk, skb, TIPC_ERR_NO_PORT);
-               }
-       }
-
+       __tipc_shutdown(sock, TIPC_ERR_NO_PORT);
+       sk->sk_shutdown = SHUTDOWN_MASK;
        tipc_sk_withdraw(tsk, 0, NULL);
        sk_stop_timer(sk, &sk->sk_timer);
        tipc_sk_remove(tsk);
-       if (tsk->connected) {
-               skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE,
-                                     TIPC_CONN_MSG, SHORT_H_SIZE, 0, dnode,
-                                     tsk_own_node(tsk), tsk_peer_port(tsk),
-                                     tsk->portid, TIPC_ERR_NO_PORT);
-               if (skb)
-                       tipc_node_xmit_skb(net, skb, dnode, tsk->portid);
-               tipc_node_remove_conn(net, dnode, tsk->portid);
-       }
 
        /* Reject any messages that accumulated in backlog queue */
-       sock->state = SS_DISCONNECTING;
        release_sock(sk);
 
        call_rcu(&tsk->rcu, tipc_sk_callback);
@@ -578,13 +587,14 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr,
                        int *uaddr_len, int peer)
 {
        struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr;
-       struct tipc_sock *tsk = tipc_sk(sock->sk);
+       struct sock *sk = sock->sk;
+       struct tipc_sock *tsk = tipc_sk(sk);
        struct tipc_net *tn = net_generic(sock_net(sock->sk), tipc_net_id);
 
        memset(addr, 0, sizeof(*addr));
        if (peer) {
-               if ((sock->state != SS_CONNECTED) &&
-                       ((peer != 2) || (sock->state != SS_DISCONNECTING)))
+               if ((!tipc_sk_connected(sk)) &&
+                   ((peer != 2) || (sk->sk_state != TIPC_DISCONNECTING)))
                        return -ENOTCONN;
                addr->addr.id.ref = tsk_peer_port(tsk);
                addr->addr.id.node = tsk_peer_node(tsk);
@@ -616,28 +626,6 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr,
  * exits.  TCP and other protocols seem to rely on higher level poll routines
  * to handle any preventable race conditions, so TIPC will do the same ...
  *
- * TIPC sets the returned events as follows:
- *
- * socket state                flags set
- * ------------                ---------
- * unconnected         no read flags
- *                     POLLOUT if port is not congested
- *
- * connecting          POLLIN/POLLRDNORM if ACK/NACK in rx queue
- *                     no write flags
- *
- * connected           POLLIN/POLLRDNORM if data in rx queue
- *                     POLLOUT if port is not congested
- *
- * disconnecting       POLLIN/POLLRDNORM/POLLHUP
- *                     no write flags
- *
- * listening           POLLIN if SYN in rx queue
- *                     no write flags
- *
- * ready               POLLIN/POLLRDNORM if data in rx queue
- * [connectionless]    POLLOUT (since port cannot be congested)
- *
  * IMPORTANT: The fact that a read or write operation is indicated does NOT
  * imply that the operation will succeed, merely that it should be performed
  * and will not block.
@@ -651,22 +639,29 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock,
 
        sock_poll_wait(file, sk_sleep(sk), wait);
 
-       switch ((int)sock->state) {
-       case SS_UNCONNECTED:
-               if (!tsk->link_cong)
-                       mask |= POLLOUT;
-               break;
-       case SS_READY:
-       case SS_CONNECTED:
+       if (sk->sk_shutdown & RCV_SHUTDOWN)
+               mask |= POLLRDHUP | POLLIN | POLLRDNORM;
+       if (sk->sk_shutdown == SHUTDOWN_MASK)
+               mask |= POLLHUP;
+
+       switch (sk->sk_state) {
+       case TIPC_ESTABLISHED:
                if (!tsk->link_cong && !tsk_conn_cong(tsk))
                        mask |= POLLOUT;
                /* fall thru' */
-       case SS_CONNECTING:
-       case SS_LISTENING:
+       case TIPC_LISTEN:
+       case TIPC_CONNECTING:
                if (!skb_queue_empty(&sk->sk_receive_queue))
                        mask |= (POLLIN | POLLRDNORM);
                break;
-       case SS_DISCONNECTING:
+       case TIPC_OPEN:
+               if (!tsk->link_cong)
+                       mask |= POLLOUT;
+               if (tipc_sk_type_connectionless(sk) &&
+                   (!skb_queue_empty(&sk->sk_receive_queue)))
+                       mask |= (POLLIN | POLLRDNORM);
+               break;
+       case TIPC_DISCONNECTING:
                mask = (POLLIN | POLLRDNORM | POLLHUP);
                break;
        }
@@ -697,6 +692,9 @@ static int tipc_sendmcast(struct  socket *sock, struct tipc_name_seq *seq,
        uint mtu;
        int rc;
 
+       if (!timeo && tsk->link_cong)
+               return -ELINKCONG;
+
        msg_set_type(mhdr, TIPC_MCAST_MSG);
        msg_set_lookup_scope(mhdr, TIPC_CLUSTER_SCOPE);
        msg_set_destport(mhdr, 0);
@@ -809,7 +807,7 @@ static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb,
        if (!tsk_peer_msg(tsk, hdr))
                goto exit;
 
-       tsk->probing_state = TIPC_CONN_OK;
+       tsk->probe_unacked = false;
 
        if (mtyp == CONN_PROBE) {
                msg_set_type(hdr, CONN_PROBE_REPLY);
@@ -832,25 +830,25 @@ exit:
 
 static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p)
 {
+       DEFINE_WAIT_FUNC(wait, woken_wake_function);
        struct sock *sk = sock->sk;
        struct tipc_sock *tsk = tipc_sk(sk);
-       DEFINE_WAIT(wait);
        int done;
 
        do {
                int err = sock_error(sk);
                if (err)
                        return err;
-               if (sock->state == SS_DISCONNECTING)
+               if (sk->sk_shutdown & SEND_SHUTDOWN)
                        return -EPIPE;
                if (!*timeo_p)
                        return -EAGAIN;
                if (signal_pending(current))
                        return sock_intr_errno(*timeo_p);
 
-               prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
-               done = sk_wait_event(sk, timeo_p, !tsk->link_cong);
-               finish_wait(sk_sleep(sk), &wait);
+               add_wait_queue(sk_sleep(sk), &wait);
+               done = sk_wait_event(sk, timeo_p, !tsk->link_cong, &wait);
+               remove_wait_queue(sk_sleep(sk), &wait);
        } while (!done);
        return 0;
 }
@@ -890,6 +888,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz)
        struct tipc_msg *mhdr = &tsk->phdr;
        u32 dnode, dport;
        struct sk_buff_head pktchain;
+       bool is_connectionless = tipc_sk_type_connectionless(sk);
        struct sk_buff *skb;
        struct tipc_name_seq *seq;
        struct iov_iter save;
@@ -900,18 +899,18 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz)
        if (dsz > TIPC_MAX_USER_MSG_SIZE)
                return -EMSGSIZE;
        if (unlikely(!dest)) {
-               if (tsk->connected && sock->state == SS_READY)
-                       dest = &tsk->remote;
+               if (is_connectionless && tsk->peer.family == AF_TIPC)
+                       dest = &tsk->peer;
                else
                        return -EDESTADDRREQ;
        } else if (unlikely(m->msg_namelen < sizeof(*dest)) ||
                   dest->family != AF_TIPC) {
                return -EINVAL;
        }
-       if (unlikely(sock->state != SS_READY)) {
-               if (sock->state == SS_LISTENING)
+       if (!is_connectionless) {
+               if (sk->sk_state == TIPC_LISTEN)
                        return -EPIPE;
-               if (sock->state != SS_UNCONNECTED)
+               if (sk->sk_state != TIPC_OPEN)
                        return -EISCONN;
                if (tsk->published)
                        return -EOPNOTSUPP;
@@ -963,8 +962,8 @@ new_mtu:
                TIPC_SKB_CB(skb)->wakeup_pending = tsk->link_cong;
                rc = tipc_node_xmit(net, &pktchain, dnode, tsk->portid);
                if (likely(!rc)) {
-                       if (sock->state != SS_READY)
-                               sock->state = SS_CONNECTING;
+                       if (!is_connectionless)
+                               tipc_set_sk_state(sk, TIPC_CONNECTING);
                        return dsz;
                }
                if (rc == -ELINKCONG) {
@@ -986,30 +985,30 @@ new_mtu:
 
 static int tipc_wait_for_sndpkt(struct socket *sock, long *timeo_p)
 {
+       DEFINE_WAIT_FUNC(wait, woken_wake_function);
        struct sock *sk = sock->sk;
        struct tipc_sock *tsk = tipc_sk(sk);
-       DEFINE_WAIT(wait);
        int done;
 
        do {
                int err = sock_error(sk);
                if (err)
                        return err;
-               if (sock->state == SS_DISCONNECTING)
+               if (sk->sk_state == TIPC_DISCONNECTING)
                        return -EPIPE;
-               else if (sock->state != SS_CONNECTED)
+               else if (!tipc_sk_connected(sk))
                        return -ENOTCONN;
                if (!*timeo_p)
                        return -EAGAIN;
                if (signal_pending(current))
                        return sock_intr_errno(*timeo_p);
 
-               prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+               add_wait_queue(sk_sleep(sk), &wait);
                done = sk_wait_event(sk, timeo_p,
                                     (!tsk->link_cong &&
                                      !tsk_conn_cong(tsk)) ||
-                                    !tsk->connected);
-               finish_wait(sk_sleep(sk), &wait);
+                                     !tipc_sk_connected(sk), &wait);
+               remove_wait_queue(sk_sleep(sk), &wait);
        } while (!done);
        return 0;
 }
@@ -1064,14 +1063,17 @@ static int __tipc_send_stream(struct socket *sock, struct msghdr *m, size_t dsz)
        if (dsz > (uint)INT_MAX)
                return -EMSGSIZE;
 
-       if (unlikely(sock->state != SS_CONNECTED)) {
-               if (sock->state == SS_DISCONNECTING)
+       if (unlikely(!tipc_sk_connected(sk))) {
+               if (sk->sk_state == TIPC_DISCONNECTING)
                        return -EPIPE;
                else
                        return -ENOTCONN;
        }
 
        timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
+       if (!timeo && tsk->link_cong)
+               return -ELINKCONG;
+
        dnode = tsk_peer_node(tsk);
        skb_queue_head_init(&pktchain);
 
@@ -1145,10 +1147,8 @@ static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port,
        msg_set_lookup_scope(msg, 0);
        msg_set_hdr_sz(msg, SHORT_H_SIZE);
 
-       tsk->probing_intv = CONN_PROBING_INTERVAL;
-       tsk->probing_state = TIPC_CONN_OK;
-       tsk->connected = 1;
-       sk_reset_timer(sk, &sk->sk_timer, jiffies + tsk->probing_intv);
+       sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTERVAL);
+       tipc_set_sk_state(sk, TIPC_ESTABLISHED);
        tipc_node_add_conn(net, peer_node, tsk->portid, peer_port);
        tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid);
        tsk->peer_caps = tipc_node_get_capabilities(net, peer_node);
@@ -1256,13 +1256,14 @@ static int tipc_sk_anc_data_recv(struct msghdr *m, struct tipc_msg *msg,
 
 static void tipc_sk_send_ack(struct tipc_sock *tsk)
 {
-       struct net *net = sock_net(&tsk->sk);
+       struct sock *sk = &tsk->sk;
+       struct net *net = sock_net(sk);
        struct sk_buff *skb = NULL;
        struct tipc_msg *msg;
        u32 peer_port = tsk_peer_port(tsk);
        u32 dnode = tsk_peer_node(tsk);
 
-       if (!tsk->connected)
+       if (!tipc_sk_connected(sk))
                return;
        skb = tipc_msg_create(CONN_MANAGER, CONN_ACK, INT_H_SIZE, 0,
                              dnode, tsk_own_node(tsk), peer_port,
@@ -1291,7 +1292,7 @@ static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop)
        for (;;) {
                prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
                if (timeo && skb_queue_empty(&sk->sk_receive_queue)) {
-                       if (sock->state == SS_DISCONNECTING) {
+                       if (sk->sk_shutdown & RCV_SHUTDOWN) {
                                err = -ENOTCONN;
                                break;
                        }
@@ -1332,6 +1333,7 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, size_t buf_len,
        struct tipc_sock *tsk = tipc_sk(sk);
        struct sk_buff *buf;
        struct tipc_msg *msg;
+       bool is_connectionless = tipc_sk_type_connectionless(sk);
        long timeo;
        unsigned int sz;
        u32 err;
@@ -1343,7 +1345,7 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, size_t buf_len,
 
        lock_sock(sk);
 
-       if (unlikely(sock->state == SS_UNCONNECTED)) {
+       if (!is_connectionless && unlikely(sk->sk_state == TIPC_OPEN)) {
                res = -ENOTCONN;
                goto exit;
        }
@@ -1388,8 +1390,8 @@ restart:
                        goto exit;
                res = sz;
        } else {
-               if ((sock->state == SS_READY) ||
-                   ((err == TIPC_CONN_SHUTDOWN) || m->msg_control))
+               if (is_connectionless || err == TIPC_CONN_SHUTDOWN ||
+                   m->msg_control)
                        res = 0;
                else
                        res = -ECONNRESET;
@@ -1398,7 +1400,7 @@ restart:
        if (unlikely(flags & MSG_PEEK))
                goto exit;
 
-       if (likely(sock->state != SS_READY)) {
+       if (likely(!is_connectionless)) {
                tsk->rcv_unacked += tsk_inc(tsk, hlen + sz);
                if (unlikely(tsk->rcv_unacked >= (tsk->rcv_win / 4)))
                        tipc_sk_send_ack(tsk);
@@ -1429,7 +1431,7 @@ static int tipc_recv_stream(struct socket *sock, struct msghdr *m,
        struct tipc_msg *msg;
        long timeo;
        unsigned int sz;
-       int sz_to_copy, target, needed;
+       int target;
        int sz_copied = 0;
        u32 err;
        int res = 0, hlen;
@@ -1440,7 +1442,7 @@ static int tipc_recv_stream(struct socket *sock, struct msghdr *m,
 
        lock_sock(sk);
 
-       if (unlikely(sock->state == SS_UNCONNECTED)) {
+       if (unlikely(sk->sk_state == TIPC_OPEN)) {
                res = -ENOTCONN;
                goto exit;
        }
@@ -1477,11 +1479,13 @@ restart:
 
        /* Capture message data (if valid) & compute return value (always) */
        if (!err) {
-               u32 offset = (u32)(unsigned long)(TIPC_SKB_CB(buf)->handle);
+               u32 offset = TIPC_SKB_CB(buf)->bytes_read;
+               u32 needed;
+               int sz_to_copy;
 
                sz -= offset;
                needed = (buf_len - sz_copied);
-               sz_to_copy = (sz <= needed) ? sz : needed;
+               sz_to_copy = min(sz, needed);
 
                res = skb_copy_datagram_msg(buf, hlen + offset, m, sz_to_copy);
                if (res)
@@ -1491,8 +1495,8 @@ restart:
 
                if (sz_to_copy < sz) {
                        if (!(flags & MSG_PEEK))
-                               TIPC_SKB_CB(buf)->handle =
-                               (void *)(unsigned long)(offset + sz_to_copy);
+                               TIPC_SKB_CB(buf)->bytes_read =
+                                       offset + sz_to_copy;
                        goto exit;
                }
        } else {
@@ -1574,49 +1578,31 @@ static bool filter_connect(struct tipc_sock *tsk, struct sk_buff *skb)
 {
        struct sock *sk = &tsk->sk;
        struct net *net = sock_net(sk);
-       struct socket *sock = sk->sk_socket;
        struct tipc_msg *hdr = buf_msg(skb);
 
        if (unlikely(msg_mcast(hdr)))
                return false;
 
-       switch ((int)sock->state) {
-       case SS_CONNECTED:
-
-               /* Accept only connection-based messages sent by peer */
-               if (unlikely(!tsk_peer_msg(tsk, hdr)))
-                       return false;
-
-               if (unlikely(msg_errcode(hdr))) {
-                       sock->state = SS_DISCONNECTING;
-                       tsk->connected = 0;
-                       /* Let timer expire on it's own */
-                       tipc_node_remove_conn(net, tsk_peer_node(tsk),
-                                             tsk->portid);
-               }
-               return true;
-
-       case SS_CONNECTING:
-
+       switch (sk->sk_state) {
+       case TIPC_CONNECTING:
                /* Accept only ACK or NACK message */
                if (unlikely(!msg_connected(hdr)))
                        return false;
 
                if (unlikely(msg_errcode(hdr))) {
-                       sock->state = SS_DISCONNECTING;
+                       tipc_set_sk_state(sk, TIPC_DISCONNECTING);
                        sk->sk_err = ECONNREFUSED;
                        return true;
                }
 
                if (unlikely(!msg_isdata(hdr))) {
-                       sock->state = SS_DISCONNECTING;
+                       tipc_set_sk_state(sk, TIPC_DISCONNECTING);
                        sk->sk_err = EINVAL;
                        return true;
                }
 
                tipc_sk_finish_conn(tsk, msg_origport(hdr), msg_orignode(hdr));
                msg_set_importance(&tsk->phdr, msg_importance(hdr));
-               sock->state = SS_CONNECTED;
 
                /* If 'ACK+' message, add to socket receive queue */
                if (msg_data_sz(hdr))
@@ -1630,18 +1616,31 @@ static bool filter_connect(struct tipc_sock *tsk, struct sk_buff *skb)
                msg_set_dest_droppable(hdr, 1);
                return false;
 
-       case SS_LISTENING:
-       case SS_UNCONNECTED:
-
+       case TIPC_OPEN:
+       case TIPC_DISCONNECTING:
+               break;
+       case TIPC_LISTEN:
                /* Accept only SYN message */
                if (!msg_connected(hdr) && !(msg_errcode(hdr)))
                        return true;
                break;
-       case SS_DISCONNECTING:
-               break;
+       case TIPC_ESTABLISHED:
+               /* Accept only connection-based messages sent by peer */
+               if (unlikely(!tsk_peer_msg(tsk, hdr)))
+                       return false;
+
+               if (unlikely(msg_errcode(hdr))) {
+                       tipc_set_sk_state(sk, TIPC_DISCONNECTING);
+                       /* Let timer expire on it's own */
+                       tipc_node_remove_conn(net, tsk_peer_node(tsk),
+                                             tsk->portid);
+                       sk->sk_state_change(sk);
+               }
+               return true;
        default:
-               pr_err("Unknown socket state %u\n", sock->state);
+               pr_err("Unknown sk_state %u\n", sk->sk_state);
        }
+
        return false;
 }
 
@@ -1692,7 +1691,6 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb)
 static bool filter_rcv(struct sock *sk, struct sk_buff *skb,
                       struct sk_buff_head *xmitq)
 {
-       struct socket *sock = sk->sk_socket;
        struct tipc_sock *tsk = tipc_sk(sk);
        struct tipc_msg *hdr = buf_msg(skb);
        unsigned int limit = rcvbuf_limit(sk, skb);
@@ -1718,7 +1716,7 @@ static bool filter_rcv(struct sock *sk, struct sk_buff *skb,
        }
 
        /* Reject if wrong message type for current socket state */
-       if (unlikely(sock->state == SS_READY)) {
+       if (tipc_sk_type_connectionless(sk)) {
                if (msg_connected(hdr)) {
                        err = TIPC_ERR_NO_PORT;
                        goto reject;
@@ -1735,7 +1733,7 @@ static bool filter_rcv(struct sock *sk, struct sk_buff *skb,
        }
 
        /* Enqueue message */
-       TIPC_SKB_CB(skb)->handle = NULL;
+       TIPC_SKB_CB(skb)->bytes_read = 0;
        __skb_queue_tail(&sk->sk_receive_queue, skb);
        skb_set_owner_r(skb, sk);
 
@@ -1885,8 +1883,8 @@ xmit:
 
 static int tipc_wait_for_connect(struct socket *sock, long *timeo_p)
 {
+       DEFINE_WAIT_FUNC(wait, woken_wake_function);
        struct sock *sk = sock->sk;
-       DEFINE_WAIT(wait);
        int done;
 
        do {
@@ -1898,9 +1896,10 @@ static int tipc_wait_for_connect(struct socket *sock, long *timeo_p)
                if (signal_pending(current))
                        return sock_intr_errno(*timeo_p);
 
-               prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
-               done = sk_wait_event(sk, timeo_p, sock->state != SS_CONNECTING);
-               finish_wait(sk_sleep(sk), &wait);
+               add_wait_queue(sk_sleep(sk), &wait);
+               done = sk_wait_event(sk, timeo_p,
+                                    sk->sk_state != TIPC_CONNECTING, &wait);
+               remove_wait_queue(sk_sleep(sk), &wait);
        } while (!done);
        return 0;
 }
@@ -1922,21 +1921,19 @@ static int tipc_connect(struct socket *sock, struct sockaddr *dest,
        struct sockaddr_tipc *dst = (struct sockaddr_tipc *)dest;
        struct msghdr m = {NULL,};
        long timeout = (flags & O_NONBLOCK) ? 0 : tsk->conn_timeout;
-       socket_state previous;
+       int previous;
        int res = 0;
 
        lock_sock(sk);
 
        /* DGRAM/RDM connect(), just save the destaddr */
-       if (sock->state == SS_READY) {
+       if (tipc_sk_type_connectionless(sk)) {
                if (dst->family == AF_UNSPEC) {
-                       memset(&tsk->remote, 0, sizeof(struct sockaddr_tipc));
-                       tsk->connected = 0;
+                       memset(&tsk->peer, 0, sizeof(struct sockaddr_tipc));
                } else if (destlen != sizeof(struct sockaddr_tipc)) {
                        res = -EINVAL;
                } else {
-                       memcpy(&tsk->remote, dest, destlen);
-                       tsk->connected = 1;
+                       memcpy(&tsk->peer, dest, destlen);
                }
                goto exit;
        }
@@ -1952,9 +1949,10 @@ static int tipc_connect(struct socket *sock, struct sockaddr *dest,
                goto exit;
        }
 
-       previous = sock->state;
-       switch (sock->state) {
-       case SS_UNCONNECTED:
+       previous = sk->sk_state;
+
+       switch (sk->sk_state) {
+       case TIPC_OPEN:
                /* Send a 'SYN-' to destination */
                m.msg_name = dest;
                m.msg_namelen = destlen;
@@ -1969,27 +1967,29 @@ static int tipc_connect(struct socket *sock, struct sockaddr *dest,
                if ((res < 0) && (res != -EWOULDBLOCK))
                        goto exit;
 
-               /* Just entered SS_CONNECTING state; the only
+               /* Just entered TIPC_CONNECTING state; the only
                 * difference is that return value in non-blocking
                 * case is EINPROGRESS, rather than EALREADY.
                 */
                res = -EINPROGRESS;
-       case SS_CONNECTING:
-               if (previous == SS_CONNECTING)
-                       res = -EALREADY;
-               if (!timeout)
+               /* fall thru' */
+       case TIPC_CONNECTING:
+               if (!timeout) {
+                       if (previous == TIPC_CONNECTING)
+                               res = -EALREADY;
                        goto exit;
+               }
                timeout = msecs_to_jiffies(timeout);
                /* Wait until an 'ACK' or 'RST' arrives, or a timeout occurs */
                res = tipc_wait_for_connect(sock, &timeout);
                break;
-       case SS_CONNECTED:
+       case TIPC_ESTABLISHED:
                res = -EISCONN;
                break;
        default:
                res = -EINVAL;
-               break;
        }
+
 exit:
        release_sock(sk);
        return res;
@@ -2008,15 +2008,9 @@ static int tipc_listen(struct socket *sock, int len)
        int res;
 
        lock_sock(sk);
-
-       if (sock->state != SS_UNCONNECTED)
-               res = -EINVAL;
-       else {
-               sock->state = SS_LISTENING;
-               res = 0;
-       }
-
+       res = tipc_set_sk_state(sk, TIPC_LISTEN);
        release_sock(sk);
+
        return res;
 }
 
@@ -2042,9 +2036,6 @@ static int tipc_wait_for_accept(struct socket *sock, long timeo)
                err = 0;
                if (!skb_queue_empty(&sk->sk_receive_queue))
                        break;
-               err = -EINVAL;
-               if (sock->state != SS_LISTENING)
-                       break;
                err = -EAGAIN;
                if (!timeo)
                        break;
@@ -2075,7 +2066,7 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags)
 
        lock_sock(sk);
 
-       if (sock->state != SS_LISTENING) {
+       if (sk->sk_state != TIPC_LISTEN) {
                res = -EINVAL;
                goto exit;
        }
@@ -2086,7 +2077,7 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags)
 
        buf = skb_peek(&sk->sk_receive_queue);
 
-       res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, 1);
+       res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, 0);
        if (res)
                goto exit;
        security_sk_clone(sock->sk, new_sock->sk);
@@ -2106,7 +2097,6 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags)
 
        /* Connect new socket to it's peer */
        tipc_sk_finish_conn(new_tsock, msg_origport(msg), msg_orignode(msg));
-       new_sock->state = SS_CONNECTED;
 
        tsk_set_importance(new_tsock, msg_importance(msg));
        if (msg_named(msg)) {
@@ -2146,13 +2136,6 @@ exit:
 static int tipc_shutdown(struct socket *sock, int how)
 {
        struct sock *sk = sock->sk;
-       struct net *net = sock_net(sk);
-       struct tipc_sock *tsk = tipc_sk(sk);
-       struct sk_buff *skb;
-       u32 dnode = tsk_peer_node(tsk);
-       u32 dport = tsk_peer_port(tsk);
-       u32 onode = tipc_own_addr(net);
-       u32 oport = tsk->portid;
        int res;
 
        if (how != SHUT_RDWR)
@@ -2160,45 +2143,17 @@ static int tipc_shutdown(struct socket *sock, int how)
 
        lock_sock(sk);
 
-       switch (sock->state) {
-       case SS_CONNECTING:
-       case SS_CONNECTED:
-
-restart:
-               dnode = tsk_peer_node(tsk);
-
-               /* Disconnect and send a 'FIN+' or 'FIN-' message to peer */
-               skb = __skb_dequeue(&sk->sk_receive_queue);
-               if (skb) {
-                       if (TIPC_SKB_CB(skb)->handle != NULL) {
-                               kfree_skb(skb);
-                               goto restart;
-                       }
-                       tipc_sk_respond(sk, skb, TIPC_CONN_SHUTDOWN);
-               } else {
-                       skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE,
-                                             TIPC_CONN_MSG, SHORT_H_SIZE,
-                                             0, dnode, onode, dport, oport,
-                                             TIPC_CONN_SHUTDOWN);
-                       if (skb)
-                               tipc_node_xmit_skb(net, skb, dnode, tsk->portid);
-               }
-               tsk->connected = 0;
-               sock->state = SS_DISCONNECTING;
-               tipc_node_remove_conn(net, dnode, tsk->portid);
-               /* fall through */
-
-       case SS_DISCONNECTING:
+       __tipc_shutdown(sock, TIPC_CONN_SHUTDOWN);
+       sk->sk_shutdown = SEND_SHUTDOWN;
 
+       if (sk->sk_state == TIPC_DISCONNECTING) {
                /* Discard any unreceived messages */
                __skb_queue_purge(&sk->sk_receive_queue);
 
                /* Wake up anyone sleeping in poll */
                sk->sk_state_change(sk);
                res = 0;
-               break;
-
-       default:
+       } else {
                res = -ENOTCONN;
        }
 
@@ -2215,17 +2170,16 @@ static void tipc_sk_timeout(unsigned long data)
        u32 own_node = tsk_own_node(tsk);
 
        bh_lock_sock(sk);
-       if (!tsk->connected) {
+       if (!tipc_sk_connected(sk)) {
                bh_unlock_sock(sk);
                goto exit;
        }
        peer_port = tsk_peer_port(tsk);
        peer_node = tsk_peer_node(tsk);
 
-       if (tsk->probing_state == TIPC_CONN_PROBING) {
+       if (tsk->probe_unacked) {
                if (!sock_owned_by_user(sk)) {
-                       sk->sk_socket->state = SS_DISCONNECTING;
-                       tsk->connected = 0;
+                       tipc_set_sk_state(sk, TIPC_DISCONNECTING);
                        tipc_node_remove_conn(sock_net(sk), tsk_peer_node(tsk),
                                              tsk_peer_port(tsk));
                        sk->sk_state_change(sk);
@@ -2234,13 +2188,15 @@ static void tipc_sk_timeout(unsigned long data)
                        sk_reset_timer(sk, &sk->sk_timer, (HZ / 20));
                }
 
-       } else {
-               skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE,
-                                     INT_H_SIZE, 0, peer_node, own_node,
-                                     peer_port, tsk->portid, TIPC_OK);
-               tsk->probing_state = TIPC_CONN_PROBING;
-               sk_reset_timer(sk, &sk->sk_timer, jiffies + tsk->probing_intv);
+               bh_unlock_sock(sk);
+               goto exit;
        }
+
+       skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE,
+                             INT_H_SIZE, 0, peer_node, own_node,
+                             peer_port, tsk->portid, TIPC_OK);
+       tsk->probe_unacked = true;
+       sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTERVAL);
        bh_unlock_sock(sk);
        if (skb)
                tipc_node_xmit_skb(sock_net(sk), skb, peer_node, tsk->portid);
@@ -2251,11 +2207,12 @@ exit:
 static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
                           struct tipc_name_seq const *seq)
 {
-       struct net *net = sock_net(&tsk->sk);
+       struct sock *sk = &tsk->sk;
+       struct net *net = sock_net(sk);
        struct publication *publ;
        u32 key;
 
-       if (tsk->connected)
+       if (tipc_sk_connected(sk))
                return -EINVAL;
        key = tsk->portid + tsk->pub_count + 1;
        if (key == tsk->portid)
@@ -2713,6 +2670,7 @@ static int __tipc_nl_add_sk(struct sk_buff *skb, struct netlink_callback *cb,
        struct nlattr *attrs;
        struct net *net = sock_net(skb->sk);
        struct tipc_net *tn = net_generic(net, tipc_net_id);
+       struct sock *sk = &tsk->sk;
 
        hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
                          &tipc_genl_family, NLM_F_MULTI, TIPC_NL_SOCK_GET);
@@ -2727,7 +2685,7 @@ static int __tipc_nl_add_sk(struct sk_buff *skb, struct netlink_callback *cb,
        if (nla_put_u32(skb, TIPC_NLA_SOCK_ADDR, tn->own_addr))
                goto attr_msg_cancel;
 
-       if (tsk->connected) {
+       if (tipc_sk_connected(sk)) {
                err = __tipc_nl_add_sk_con(skb, tsk);
                if (err)
                        goto attr_msg_cancel;
index 145082e2ba36068192ccef517804a14aa0d08752..1752d6b10ac4e5821172a3c06d4332b59c0c0084 100644 (file)
@@ -2113,8 +2113,8 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
                mutex_lock(&u->iolock);
 
                skip = sk_peek_offset(sk, flags);
-               skb = __skb_try_recv_datagram(sk, flags, &peeked, &skip, &err,
-                                             &last);
+               skb = __skb_try_recv_datagram(sk, flags, NULL, &peeked, &skip,
+                                             &err, &last);
                if (skb)
                        break;
 
@@ -2199,7 +2199,8 @@ out:
  *     Sleep until more data has arrived. But check for races..
  */
 static long unix_stream_data_wait(struct sock *sk, long timeo,
-                                 struct sk_buff *last, unsigned int last_len)
+                                 struct sk_buff *last, unsigned int last_len,
+                                 bool freezable)
 {
        struct sk_buff *tail;
        DEFINE_WAIT(wait);
@@ -2220,7 +2221,10 @@ static long unix_stream_data_wait(struct sock *sk, long timeo,
 
                sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
                unix_state_unlock(sk);
-               timeo = freezable_schedule_timeout(timeo);
+               if (freezable)
+                       timeo = freezable_schedule_timeout(timeo);
+               else
+                       timeo = schedule_timeout(timeo);
                unix_state_lock(sk);
 
                if (sock_flag(sk, SOCK_DEAD))
@@ -2250,7 +2254,8 @@ struct unix_stream_read_state {
        unsigned int splice_flags;
 };
 
-static int unix_stream_read_generic(struct unix_stream_read_state *state)
+static int unix_stream_read_generic(struct unix_stream_read_state *state,
+                                   bool freezable)
 {
        struct scm_cookie scm;
        struct socket *sock = state->socket;
@@ -2330,7 +2335,7 @@ again:
                        mutex_unlock(&u->iolock);
 
                        timeo = unix_stream_data_wait(sk, timeo, last,
-                                                     last_len);
+                                                     last_len, freezable);
 
                        if (signal_pending(current)) {
                                err = sock_intr_errno(timeo);
@@ -2472,7 +2477,7 @@ static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
                .flags = flags
        };
 
-       return unix_stream_read_generic(&state);
+       return unix_stream_read_generic(&state, true);
 }
 
 static int unix_stream_splice_actor(struct sk_buff *skb,
@@ -2503,7 +2508,7 @@ static ssize_t unix_stream_splice_read(struct socket *sock,  loff_t *ppos,
            flags & SPLICE_F_NONBLOCK)
                state.flags = MSG_DONTWAIT;
 
-       return unix_stream_read_generic(&state);
+       return unix_stream_read_generic(&state, false);
 }
 
 static int unix_shutdown(struct socket *sock, int mode)
@@ -2812,7 +2817,8 @@ static int unix_seq_show(struct seq_file *seq, void *v)
                                i++;
                        }
                        for ( ; i < len; i++)
-                               seq_putc(seq, u->addr->name->sun_path[i]);
+                               seq_putc(seq, u->addr->name->sun_path[i] ?:
+                                        '@');
                }
                unix_state_unlock(s);
                seq_putc(seq, '\n');
index 936d7eee62d03efbac1e278272ca8fd917a40622..2e47f9f06b962b2b80f03f67614c7dbbb13ee34a 100644 (file)
@@ -44,6 +44,10 @@ struct virtio_vsock {
        spinlock_t send_pkt_list_lock;
        struct list_head send_pkt_list;
 
+       struct work_struct loopback_work;
+       spinlock_t loopback_list_lock; /* protects loopback_list */
+       struct list_head loopback_list;
+
        atomic_t queued_replies;
 
        /* The following fields are protected by rx_lock.  vqs[VSOCK_VQ_RX]
@@ -74,6 +78,42 @@ static u32 virtio_transport_get_local_cid(void)
        return vsock->guest_cid;
 }
 
+static void virtio_transport_loopback_work(struct work_struct *work)
+{
+       struct virtio_vsock *vsock =
+               container_of(work, struct virtio_vsock, loopback_work);
+       LIST_HEAD(pkts);
+
+       spin_lock_bh(&vsock->loopback_list_lock);
+       list_splice_init(&vsock->loopback_list, &pkts);
+       spin_unlock_bh(&vsock->loopback_list_lock);
+
+       mutex_lock(&vsock->rx_lock);
+       while (!list_empty(&pkts)) {
+               struct virtio_vsock_pkt *pkt;
+
+               pkt = list_first_entry(&pkts, struct virtio_vsock_pkt, list);
+               list_del_init(&pkt->list);
+
+               virtio_transport_recv_pkt(pkt);
+       }
+       mutex_unlock(&vsock->rx_lock);
+}
+
+static int virtio_transport_send_pkt_loopback(struct virtio_vsock *vsock,
+                                             struct virtio_vsock_pkt *pkt)
+{
+       int len = pkt->len;
+
+       spin_lock_bh(&vsock->loopback_list_lock);
+       list_add_tail(&pkt->list, &vsock->loopback_list);
+       spin_unlock_bh(&vsock->loopback_list_lock);
+
+       queue_work(virtio_vsock_workqueue, &vsock->loopback_work);
+
+       return len;
+}
+
 static void
 virtio_transport_send_pkt_work(struct work_struct *work)
 {
@@ -159,6 +199,9 @@ virtio_transport_send_pkt(struct virtio_vsock_pkt *pkt)
                return -ENODEV;
        }
 
+       if (le32_to_cpu(pkt->hdr.dst_cid) == vsock->guest_cid)
+               return virtio_transport_send_pkt_loopback(vsock, pkt);
+
        if (pkt->reply)
                atomic_inc(&vsock->queued_replies);
 
@@ -510,10 +553,13 @@ static int virtio_vsock_probe(struct virtio_device *vdev)
        mutex_init(&vsock->event_lock);
        spin_lock_init(&vsock->send_pkt_list_lock);
        INIT_LIST_HEAD(&vsock->send_pkt_list);
+       spin_lock_init(&vsock->loopback_list_lock);
+       INIT_LIST_HEAD(&vsock->loopback_list);
        INIT_WORK(&vsock->rx_work, virtio_transport_rx_work);
        INIT_WORK(&vsock->tx_work, virtio_transport_tx_work);
        INIT_WORK(&vsock->event_work, virtio_transport_event_work);
        INIT_WORK(&vsock->send_pkt_work, virtio_transport_send_pkt_work);
+       INIT_WORK(&vsock->loopback_work, virtio_transport_loopback_work);
 
        mutex_lock(&vsock->rx_lock);
        virtio_vsock_rx_fill(vsock);
@@ -539,6 +585,7 @@ static void virtio_vsock_remove(struct virtio_device *vdev)
        struct virtio_vsock *vsock = vdev->priv;
        struct virtio_vsock_pkt *pkt;
 
+       flush_work(&vsock->loopback_work);
        flush_work(&vsock->rx_work);
        flush_work(&vsock->tx_work);
        flush_work(&vsock->event_work);
@@ -565,6 +612,15 @@ static void virtio_vsock_remove(struct virtio_device *vdev)
        }
        spin_unlock_bh(&vsock->send_pkt_list_lock);
 
+       spin_lock_bh(&vsock->loopback_list_lock);
+       while (!list_empty(&vsock->loopback_list)) {
+               pkt = list_first_entry(&vsock->loopback_list,
+                                      struct virtio_vsock_pkt, list);
+               list_del(&pkt->list);
+               virtio_transport_free_pkt(pkt);
+       }
+       spin_unlock_bh(&vsock->loopback_list_lock);
+
        mutex_lock(&the_virtio_vsock_mutex);
        the_virtio_vsock = NULL;
        vsock_core_exit();
index a53b3a16b4f1f79554bcbc066a49d6a0e848f093..687e9fdb3d672b9e2a36c0255e5dae51ce2d03a0 100644 (file)
@@ -619,17 +619,17 @@ static int virtio_transport_reset_no_sock(struct virtio_vsock_pkt *pkt)
 static void virtio_transport_wait_close(struct sock *sk, long timeout)
 {
        if (timeout) {
-               DEFINE_WAIT(wait);
+               DEFINE_WAIT_FUNC(wait, woken_wake_function);
+
+               add_wait_queue(sk_sleep(sk), &wait);
 
                do {
-                       prepare_to_wait(sk_sleep(sk), &wait,
-                                       TASK_INTERRUPTIBLE);
                        if (sk_wait_event(sk, &timeout,
-                                         sock_flag(sk, SOCK_DONE)))
+                                         sock_flag(sk, SOCK_DONE), &wait))
                                break;
                } while (!signal_pending(current) && timeout);
 
-               finish_wait(sk_sleep(sk), &wait);
+               remove_wait_queue(sk_sleep(sk), &wait);
        }
 }
 
index 3f816e2971ee26d97bfb4cd12976d6f67980fce6..5db731512014b3bddaf6f6553a646b7f8204a562 100644 (file)
@@ -572,16 +572,20 @@ struct d_level D_LEVEL[] = {
 size_t D_LEVEL_SIZE = ARRAY_SIZE(D_LEVEL);
 
 
-struct genl_family wimax_gnl_family = {
-       .id = GENL_ID_GENERATE,
+static const struct genl_multicast_group wimax_gnl_mcgrps[] = {
+       { .name = "msg", },
+};
+
+struct genl_family wimax_gnl_family __ro_after_init = {
        .name = "WiMAX",
        .version = WIMAX_GNL_VERSION,
        .hdrsize = 0,
        .maxattr = WIMAX_GNL_ATTR_MAX,
-};
-
-static const struct genl_multicast_group wimax_gnl_mcgrps[] = {
-       { .name = "msg", },
+       .module = THIS_MODULE,
+       .ops = wimax_gnl_ops,
+       .n_ops = ARRAY_SIZE(wimax_gnl_ops),
+       .mcgrps = wimax_gnl_mcgrps,
+       .n_mcgrps = ARRAY_SIZE(wimax_gnl_mcgrps),
 };
 
 
@@ -596,11 +600,7 @@ int __init wimax_subsys_init(void)
        d_parse_params(D_LEVEL, D_LEVEL_SIZE, wimax_debug_params,
                       "wimax.debug");
 
-       snprintf(wimax_gnl_family.name, sizeof(wimax_gnl_family.name),
-                "WiMAX");
-       result = genl_register_family_with_ops_groups(&wimax_gnl_family,
-                                                     wimax_gnl_ops,
-                                                     wimax_gnl_mcgrps);
+       result = genl_register_family(&wimax_gnl_family);
        if (unlikely(result < 0)) {
                pr_err("cannot register generic netlink family: %d\n", result);
                goto error_register_family;
index 8201e6d7449e969182152fcc2641a7e0247b7770..158c59ecf90a35a6af8ca353c7583e0bd0cda807 100644 (file)
@@ -210,11 +210,11 @@ void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev,
        if (WARN_ON(wdev->iftype != NL80211_IFTYPE_P2P_DEVICE))
                return;
 
-       if (!wdev->p2p_started)
+       if (!wdev_running(wdev))
                return;
 
        rdev_stop_p2p_device(rdev, wdev);
-       wdev->p2p_started = false;
+       wdev->is_running = false;
 
        rdev->opencount--;
 
@@ -233,11 +233,11 @@ void cfg80211_stop_nan(struct cfg80211_registered_device *rdev,
        if (WARN_ON(wdev->iftype != NL80211_IFTYPE_NAN))
                return;
 
-       if (!wdev->nan_started)
+       if (!wdev_running(wdev))
                return;
 
        rdev_stop_nan(rdev, wdev);
-       wdev->nan_started = false;
+       wdev->is_running = false;
 
        rdev->opencount--;
 }
@@ -562,6 +562,21 @@ static int wiphy_verify_combinations(struct wiphy *wiphy)
                                    c->limits[j].max > 1))
                                return -EINVAL;
 
+                       /*
+                        * This isn't well-defined right now. If you have an
+                        * IBSS interface, then its beacon interval may change
+                        * by joining other networks, and nothing prevents it
+                        * from doing that.
+                        * So technically we probably shouldn't even allow AP
+                        * and IBSS in the same interface, but it seems that
+                        * some drivers support that, possibly only with fixed
+                        * beacon intervals for IBSS.
+                        */
+                       if (WARN_ON(types & BIT(NL80211_IFTYPE_ADHOC) &&
+                                   c->beacon_int_min_gcd)) {
+                               return -EINVAL;
+                       }
+
                        cnt += c->limits[j].max;
                        /*
                         * Don't advertise an unsupported type
@@ -571,6 +586,11 @@ static int wiphy_verify_combinations(struct wiphy *wiphy)
                                return -EINVAL;
                }
 
+#ifndef CONFIG_WIRELESS_WDS
+               if (WARN_ON(all_iftypes & BIT(NL80211_IFTYPE_WDS)))
+                       return -EINVAL;
+#endif
+
                /* You can't even choose that many! */
                if (WARN_ON(cnt < c->max_interfaces))
                        return -EINVAL;
@@ -609,6 +629,11 @@ int wiphy_register(struct wiphy *wiphy)
                     !rdev->ops->add_nan_func || !rdev->ops->del_nan_func)))
                return -EINVAL;
 
+#ifndef CONFIG_WIRELESS_WDS
+       if (WARN_ON(wiphy->interface_modes & BIT(NL80211_IFTYPE_WDS)))
+               return -EINVAL;
+#endif
+
        /*
         * if a wiphy has unsupported modes for regulatory channel enforcement,
         * opt-out of enforcement checking
index 08d2e948c9ad306a0ed40531efc4103877b9f4ae..ec5f33311769f6931a78e2f412b6c9eca2630191 100644 (file)
@@ -71,6 +71,7 @@ struct cfg80211_registered_device {
        struct list_head bss_list;
        struct rb_root bss_tree;
        u32 bss_generation;
+       u32 bss_entries;
        struct cfg80211_scan_request *scan_req; /* protected by RTNL */
        struct sk_buff *scan_msg;
        struct cfg80211_sched_scan_request __rcu *sched_scan_req;
@@ -345,7 +346,7 @@ int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
                       const u8 *ssid, int ssid_len,
                       const u8 *ie, int ie_len,
                       const u8 *key, int key_len, int key_idx,
-                      const u8 *sae_data, int sae_data_len);
+                      const u8 *auth_data, int auth_data_len);
 int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
                        struct net_device *dev,
                        struct ieee80211_channel *chan,
@@ -475,7 +476,7 @@ int ieee80211_get_ratemask(struct ieee80211_supported_band *sband,
                           u32 *mask);
 
 int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev,
-                                u32 beacon_int);
+                                enum nl80211_iftype iftype, u32 beacon_int);
 
 void cfg80211_update_iface_num(struct cfg80211_registered_device *rdev,
                               enum nl80211_iftype iftype, int num);
index fa2066b56f36c469e5686b7bea36a33d58bc13d3..2d8518a37eabc4795e97f7f600f14cdf0f63d7ff 100644 (file)
@@ -183,6 +183,7 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev,
                memcpy(wdev->ssid, setup->mesh_id, setup->mesh_id_len);
                wdev->mesh_id_len = setup->mesh_id_len;
                wdev->chandef = setup->chandef;
+               wdev->beacon_interval = setup->beacon_interval;
        }
 
        return err;
@@ -258,6 +259,7 @@ int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev,
        err = rdev_leave_mesh(rdev, dev);
        if (!err) {
                wdev->mesh_id_len = 0;
+               wdev->beacon_interval = 0;
                memset(&wdev->chandef, 0, sizeof(wdev->chandef));
                rdev_set_qos_map(rdev, dev, NULL);
        }
index cbb48e26a8715b8964333bf9b69c52010289bafb..bd1f7a159d6a3e5f29e0a36516144277124cabf1 100644 (file)
@@ -204,14 +204,14 @@ int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
                       const u8 *ssid, int ssid_len,
                       const u8 *ie, int ie_len,
                       const u8 *key, int key_len, int key_idx,
-                      const u8 *sae_data, int sae_data_len)
+                      const u8 *auth_data, int auth_data_len)
 {
        struct wireless_dev *wdev = dev->ieee80211_ptr;
        struct cfg80211_auth_request req = {
                .ie = ie,
                .ie_len = ie_len,
-               .sae_data = sae_data,
-               .sae_data_len = sae_data_len,
+               .auth_data = auth_data,
+               .auth_data_len = auth_data_len,
                .auth_type = auth_type,
                .key = key,
                .key_len = key_len,
index c510810f0b7c14909fc3b83ec31ce6c4ac45dfc2..24ab199ef2fcdcf641f07aaee25bec9e17e43f8e 100644 (file)
@@ -32,22 +32,8 @@ static int nl80211_crypto_settings(struct cfg80211_registered_device *rdev,
                                   struct cfg80211_crypto_settings *settings,
                                   int cipher_limit);
 
-static int nl80211_pre_doit(const struct genl_ops *ops, struct sk_buff *skb,
-                           struct genl_info *info);
-static void nl80211_post_doit(const struct genl_ops *ops, struct sk_buff *skb,
-                             struct genl_info *info);
-
 /* the netlink family */
-static struct genl_family nl80211_fam = {
-       .id = GENL_ID_GENERATE,         /* don't bother with a hardcoded ID */
-       .name = NL80211_GENL_NAME,      /* have users key off the name instead */
-       .hdrsize = 0,                   /* no private header */
-       .version = 1,                   /* no particular meaning now */
-       .maxattr = NL80211_ATTR_MAX,
-       .netnsok = true,
-       .pre_doit = nl80211_pre_doit,
-       .post_doit = nl80211_post_doit,
-};
+static struct genl_family nl80211_fam;
 
 /* multicast groups */
 enum nl80211_multicast_groups {
@@ -357,7 +343,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
        [NL80211_ATTR_BG_SCAN_PERIOD] = { .type = NLA_U16 },
        [NL80211_ATTR_WDEV] = { .type = NLA_U64 },
        [NL80211_ATTR_USER_REG_HINT_TYPE] = { .type = NLA_U32 },
-       [NL80211_ATTR_SAE_DATA] = { .type = NLA_BINARY, },
+       [NL80211_ATTR_AUTH_DATA] = { .type = NLA_BINARY, },
        [NL80211_ATTR_VHT_CAPABILITY] = { .len = NL80211_VHT_CAPABILITY_LEN },
        [NL80211_ATTR_SCAN_FLAGS] = { .type = NLA_U32 },
        [NL80211_ATTR_P2P_CTWINDOW] = { .type = NLA_U8 },
@@ -414,6 +400,10 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
        [NL80211_ATTR_NAN_MASTER_PREF] = { .type = NLA_U8 },
        [NL80211_ATTR_NAN_DUAL] = { .type = NLA_U8 },
        [NL80211_ATTR_NAN_FUNC] = { .type = NLA_NESTED },
+       [NL80211_ATTR_FILS_KEK] = { .type = NLA_BINARY,
+                                   .len = FILS_MAX_KEK_LEN },
+       [NL80211_ATTR_FILS_NONCES] = { .len = 2 * FILS_NONCE_LEN },
+       [NL80211_ATTR_MULTICAST_TO_UNICAST_ENABLED] = { .type = NLA_FLAG, },
 };
 
 /* policy for the key attributes */
@@ -435,6 +425,7 @@ nl80211_key_default_policy[NUM_NL80211_KEY_DEFAULT_TYPES] = {
        [NL80211_KEY_DEFAULT_TYPE_MULTICAST] = { .type = NLA_FLAG },
 };
 
+#ifdef CONFIG_PM
 /* policy for WoWLAN attributes */
 static const struct nla_policy
 nl80211_wowlan_policy[NUM_NL80211_WOWLAN_TRIG] = {
@@ -468,6 +459,7 @@ nl80211_wowlan_tcp_policy[NUM_NL80211_WOWLAN_TCP] = {
        [NL80211_WOWLAN_TCP_WAKE_PAYLOAD] = { .len = 1 },
        [NL80211_WOWLAN_TCP_WAKE_MASK] = { .len = 1 },
 };
+#endif /* CONFIG_PM */
 
 /* policy for coalesce rule attributes */
 static const struct nla_policy
@@ -551,13 +543,14 @@ static int nl80211_prepare_wdev_dump(struct sk_buff *skb,
 
        if (!cb->args[0]) {
                err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize,
-                                 nl80211_fam.attrbuf, nl80211_fam.maxattr,
-                                 nl80211_policy);
+                                 genl_family_attrbuf(&nl80211_fam),
+                                 nl80211_fam.maxattr, nl80211_policy);
                if (err)
                        goto out_unlock;
 
-               *wdev = __cfg80211_wdev_from_attrs(sock_net(skb->sk),
-                                                  nl80211_fam.attrbuf);
+               *wdev = __cfg80211_wdev_from_attrs(
+                                       sock_net(skb->sk),
+                                       genl_family_attrbuf(&nl80211_fam));
                if (IS_ERR(*wdev)) {
                        err = PTR_ERR(*wdev);
                        goto out_unlock;
@@ -1075,6 +1068,10 @@ static int nl80211_put_iface_combinations(struct wiphy *wiphy,
                     nla_put_u32(msg, NL80211_IFACE_COMB_RADAR_DETECT_REGIONS,
                                c->radar_detect_regions)))
                        goto nla_put_failure;
+               if (c->beacon_int_min_gcd &&
+                   nla_put_u32(msg, NL80211_IFACE_COMB_BI_MIN_GCD,
+                               c->beacon_int_min_gcd))
+                       goto nla_put_failure;
 
                nla_nest_end(msg, nl_combi);
        }
@@ -1322,6 +1319,95 @@ nl80211_send_mgmt_stypes(struct sk_buff *msg,
        return 0;
 }
 
+#define CMD(op, n)                                                     \
+        do {                                                           \
+               if (rdev->ops->op) {                                    \
+                       i++;                                            \
+                       if (nla_put_u32(msg, i, NL80211_CMD_ ## n))     \
+                               goto nla_put_failure;                   \
+               }                                                       \
+       } while (0)
+
+static int nl80211_add_commands_unsplit(struct cfg80211_registered_device *rdev,
+                                       struct sk_buff *msg)
+{
+       int i = 0;
+
+       /*
+        * do *NOT* add anything into this function, new things need to be
+        * advertised only to new versions of userspace that can deal with
+        * the split (and they can't possibly care about new features...
+        */
+       CMD(add_virtual_intf, NEW_INTERFACE);
+       CMD(change_virtual_intf, SET_INTERFACE);
+       CMD(add_key, NEW_KEY);
+       CMD(start_ap, START_AP);
+       CMD(add_station, NEW_STATION);
+       CMD(add_mpath, NEW_MPATH);
+       CMD(update_mesh_config, SET_MESH_CONFIG);
+       CMD(change_bss, SET_BSS);
+       CMD(auth, AUTHENTICATE);
+       CMD(assoc, ASSOCIATE);
+       CMD(deauth, DEAUTHENTICATE);
+       CMD(disassoc, DISASSOCIATE);
+       CMD(join_ibss, JOIN_IBSS);
+       CMD(join_mesh, JOIN_MESH);
+       CMD(set_pmksa, SET_PMKSA);
+       CMD(del_pmksa, DEL_PMKSA);
+       CMD(flush_pmksa, FLUSH_PMKSA);
+       if (rdev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL)
+               CMD(remain_on_channel, REMAIN_ON_CHANNEL);
+       CMD(set_bitrate_mask, SET_TX_BITRATE_MASK);
+       CMD(mgmt_tx, FRAME);
+       CMD(mgmt_tx_cancel_wait, FRAME_WAIT_CANCEL);
+       if (rdev->wiphy.flags & WIPHY_FLAG_NETNS_OK) {
+               i++;
+               if (nla_put_u32(msg, i, NL80211_CMD_SET_WIPHY_NETNS))
+                       goto nla_put_failure;
+       }
+       if (rdev->ops->set_monitor_channel || rdev->ops->start_ap ||
+           rdev->ops->join_mesh) {
+               i++;
+               if (nla_put_u32(msg, i, NL80211_CMD_SET_CHANNEL))
+                       goto nla_put_failure;
+       }
+       CMD(set_wds_peer, SET_WDS_PEER);
+       if (rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) {
+               CMD(tdls_mgmt, TDLS_MGMT);
+               CMD(tdls_oper, TDLS_OPER);
+       }
+       if (rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN)
+               CMD(sched_scan_start, START_SCHED_SCAN);
+       CMD(probe_client, PROBE_CLIENT);
+       CMD(set_noack_map, SET_NOACK_MAP);
+       if (rdev->wiphy.flags & WIPHY_FLAG_REPORTS_OBSS) {
+               i++;
+               if (nla_put_u32(msg, i, NL80211_CMD_REGISTER_BEACONS))
+                       goto nla_put_failure;
+       }
+       CMD(start_p2p_device, START_P2P_DEVICE);
+       CMD(set_mcast_rate, SET_MCAST_RATE);
+#ifdef CONFIG_NL80211_TESTMODE
+       CMD(testmode_cmd, TESTMODE);
+#endif
+
+       if (rdev->ops->connect || rdev->ops->auth) {
+               i++;
+               if (nla_put_u32(msg, i, NL80211_CMD_CONNECT))
+                       goto nla_put_failure;
+       }
+
+       if (rdev->ops->disconnect || rdev->ops->deauth) {
+               i++;
+               if (nla_put_u32(msg, i, NL80211_CMD_DISCONNECT))
+                       goto nla_put_failure;
+       }
+
+       return i;
+ nla_put_failure:
+       return -ENOBUFS;
+}
+
 struct nl80211_dump_wiphy_state {
        s64 filter_wiphy;
        long start;
@@ -1549,68 +1635,9 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev,
                if (!nl_cmds)
                        goto nla_put_failure;
 
-               i = 0;
-#define CMD(op, n)                                                     \
-                do {                                                   \
-                       if (rdev->ops->op) {                            \
-                               i++;                                    \
-                               if (nla_put_u32(msg, i, NL80211_CMD_ ## n)) \
-                                       goto nla_put_failure;           \
-                       }                                               \
-               } while (0)
-
-               CMD(add_virtual_intf, NEW_INTERFACE);
-               CMD(change_virtual_intf, SET_INTERFACE);
-               CMD(add_key, NEW_KEY);
-               CMD(start_ap, START_AP);
-               CMD(add_station, NEW_STATION);
-               CMD(add_mpath, NEW_MPATH);
-               CMD(update_mesh_config, SET_MESH_CONFIG);
-               CMD(change_bss, SET_BSS);
-               CMD(auth, AUTHENTICATE);
-               CMD(assoc, ASSOCIATE);
-               CMD(deauth, DEAUTHENTICATE);
-               CMD(disassoc, DISASSOCIATE);
-               CMD(join_ibss, JOIN_IBSS);
-               CMD(join_mesh, JOIN_MESH);
-               CMD(set_pmksa, SET_PMKSA);
-               CMD(del_pmksa, DEL_PMKSA);
-               CMD(flush_pmksa, FLUSH_PMKSA);
-               if (rdev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL)
-                       CMD(remain_on_channel, REMAIN_ON_CHANNEL);
-               CMD(set_bitrate_mask, SET_TX_BITRATE_MASK);
-               CMD(mgmt_tx, FRAME);
-               CMD(mgmt_tx_cancel_wait, FRAME_WAIT_CANCEL);
-               if (rdev->wiphy.flags & WIPHY_FLAG_NETNS_OK) {
-                       i++;
-                       if (nla_put_u32(msg, i, NL80211_CMD_SET_WIPHY_NETNS))
-                               goto nla_put_failure;
-               }
-               if (rdev->ops->set_monitor_channel || rdev->ops->start_ap ||
-                   rdev->ops->join_mesh) {
-                       i++;
-                       if (nla_put_u32(msg, i, NL80211_CMD_SET_CHANNEL))
-                               goto nla_put_failure;
-               }
-               CMD(set_wds_peer, SET_WDS_PEER);
-               if (rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) {
-                       CMD(tdls_mgmt, TDLS_MGMT);
-                       CMD(tdls_oper, TDLS_OPER);
-               }
-               if (rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN)
-                       CMD(sched_scan_start, START_SCHED_SCAN);
-               CMD(probe_client, PROBE_CLIENT);
-               CMD(set_noack_map, SET_NOACK_MAP);
-               if (rdev->wiphy.flags & WIPHY_FLAG_REPORTS_OBSS) {
-                       i++;
-                       if (nla_put_u32(msg, i, NL80211_CMD_REGISTER_BEACONS))
-                               goto nla_put_failure;
-               }
-               CMD(start_p2p_device, START_P2P_DEVICE);
-               CMD(set_mcast_rate, SET_MCAST_RATE);
-#ifdef CONFIG_NL80211_TESTMODE
-               CMD(testmode_cmd, TESTMODE);
-#endif
+               i = nl80211_add_commands_unsplit(rdev, msg);
+               if (i < 0)
+                       goto nla_put_failure;
                if (state->split) {
                        CMD(crit_proto_start, CRIT_PROTOCOL_START);
                        CMD(crit_proto_stop, CRIT_PROTOCOL_STOP);
@@ -1620,22 +1647,11 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev,
                        if (rdev->wiphy.features &
                                        NL80211_FEATURE_SUPPORTS_WMM_ADMISSION)
                                CMD(add_tx_ts, ADD_TX_TS);
+                       CMD(set_multicast_to_unicast, SET_MULTICAST_TO_UNICAST);
+                       CMD(update_connect_params, UPDATE_CONNECT_PARAMS);
                }
-               /* add into the if now */
 #undef CMD
 
-               if (rdev->ops->connect || rdev->ops->auth) {
-                       i++;
-                       if (nla_put_u32(msg, i, NL80211_CMD_CONNECT))
-                               goto nla_put_failure;
-               }
-
-               if (rdev->ops->disconnect || rdev->ops->deauth) {
-                       i++;
-                       if (nla_put_u32(msg, i, NL80211_CMD_DISCONNECT))
-                               goto nla_put_failure;
-               }
-
                nla_nest_end(msg, nl_cmds);
                state->split_start++;
                if (state->split)
@@ -1881,7 +1897,7 @@ static int nl80211_dump_wiphy_parse(struct sk_buff *skb,
                                    struct netlink_callback *cb,
                                    struct nl80211_dump_wiphy_state *state)
 {
-       struct nlattr **tb = nl80211_fam.attrbuf;
+       struct nlattr **tb = genl_family_attrbuf(&nl80211_fam);
        int ret = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize,
                              tb, nl80211_fam.maxattr, nl80211_policy);
        /* ignore parse errors for backward compatibility */
@@ -2296,10 +2312,9 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
                nla_for_each_nested(nl_txq_params,
                                    info->attrs[NL80211_ATTR_WIPHY_TXQ_PARAMS],
                                    rem_txq_params) {
-                       result = nla_parse(tb, NL80211_TXQ_ATTR_MAX,
-                                          nla_data(nl_txq_params),
-                                          nla_len(nl_txq_params),
-                                          txq_params_policy);
+                       result = nla_parse_nested(tb, NL80211_TXQ_ATTR_MAX,
+                                                 nl_txq_params,
+                                                 txq_params_policy);
                        if (result)
                                return result;
                        result = parse_txq_params(tb, &txq_params);
@@ -3549,8 +3564,8 @@ static int nl80211_parse_tx_bitrate_mask(struct genl_info *info,
                sband = rdev->wiphy.bands[band];
                if (sband == NULL)
                        return -EINVAL;
-               err = nla_parse(tb, NL80211_TXRATE_MAX, nla_data(tx_rates),
-                               nla_len(tx_rates), nl80211_txattr_policy);
+               err = nla_parse_nested(tb, NL80211_TXRATE_MAX, tx_rates,
+                                      nl80211_txattr_policy);
                if (err)
                        return err;
                if (tb[NL80211_TXRATE_LEGACY]) {
@@ -3756,12 +3771,23 @@ static bool nl80211_valid_auth_type(struct cfg80211_registered_device *rdev,
                if (!(rdev->wiphy.features & NL80211_FEATURE_SAE) &&
                    auth_type == NL80211_AUTHTYPE_SAE)
                        return false;
+               if (!wiphy_ext_feature_isset(&rdev->wiphy,
+                                            NL80211_EXT_FEATURE_FILS_STA) &&
+                   (auth_type == NL80211_AUTHTYPE_FILS_SK ||
+                    auth_type == NL80211_AUTHTYPE_FILS_SK_PFS ||
+                    auth_type == NL80211_AUTHTYPE_FILS_PK))
+                       return false;
                return true;
        case NL80211_CMD_CONNECT:
        case NL80211_CMD_START_AP:
                /* SAE not supported yet */
                if (auth_type == NL80211_AUTHTYPE_SAE)
                        return false;
+               /* FILS not supported yet */
+               if (auth_type == NL80211_AUTHTYPE_FILS_SK ||
+                   auth_type == NL80211_AUTHTYPE_FILS_SK_PFS ||
+                   auth_type == NL80211_AUTHTYPE_FILS_PK)
+                       return false;
                return true;
        default:
                return false;
@@ -3803,7 +3829,8 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
        params.dtim_period =
                nla_get_u32(info->attrs[NL80211_ATTR_DTIM_PERIOD]);
 
-       err = cfg80211_validate_beacon_int(rdev, params.beacon_interval);
+       err = cfg80211_validate_beacon_int(rdev, dev->ieee80211_ptr->iftype,
+                                          params.beacon_interval);
        if (err)
                return err;
 
@@ -6305,9 +6332,8 @@ static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info)
 
        nla_for_each_nested(nl_reg_rule, info->attrs[NL80211_ATTR_REG_RULES],
                            rem_reg_rules) {
-               r = nla_parse(tb, NL80211_REG_RULE_ATTR_MAX,
-                             nla_data(nl_reg_rule), nla_len(nl_reg_rule),
-                             reg_rule_policy);
+               r = nla_parse_nested(tb, NL80211_REG_RULE_ATTR_MAX,
+                                    nl_reg_rule, reg_rule_policy);
                if (r)
                        goto bad_reg;
                r = parse_reg_rule(tb, &rd->reg_rules[rule_idx]);
@@ -6374,8 +6400,8 @@ static int parse_bss_select(struct nlattr *nla, struct wiphy *wiphy,
        if (!nla_ok(nest, nla_len(nest)))
                return -EINVAL;
 
-       err = nla_parse(attr, NL80211_BSS_SELECT_ATTR_MAX, nla_data(nest),
-                       nla_len(nest), nl80211_bss_select_policy);
+       err = nla_parse_nested(attr, NL80211_BSS_SELECT_ATTR_MAX, nest,
+                              nl80211_bss_select_policy);
        if (err)
                return err;
 
@@ -6765,9 +6791,8 @@ nl80211_parse_sched_scan_plans(struct wiphy *wiphy, int n_plans,
                if (WARN_ON(i >= n_plans))
                        return -EINVAL;
 
-               err = nla_parse(plan, NL80211_SCHED_SCAN_PLAN_MAX,
-                               nla_data(attr), nla_len(attr),
-                               nl80211_plan_policy);
+               err = nla_parse_nested(plan, NL80211_SCHED_SCAN_PLAN_MAX,
+                                      attr, nl80211_plan_policy);
                if (err)
                        return err;
 
@@ -6856,9 +6881,9 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev,
                                    tmp) {
                        struct nlattr *rssi;
 
-                       err = nla_parse(tb, NL80211_SCHED_SCAN_MATCH_ATTR_MAX,
-                                       nla_data(attr), nla_len(attr),
-                                       nl80211_match_policy);
+                       err = nla_parse_nested(tb,
+                                              NL80211_SCHED_SCAN_MATCH_ATTR_MAX,
+                                              attr, nl80211_match_policy);
                        if (err)
                                return ERR_PTR(err);
                        /* add other standalone attributes here */
@@ -7029,9 +7054,9 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev,
                                    tmp) {
                        struct nlattr *ssid, *rssi;
 
-                       err = nla_parse(tb, NL80211_SCHED_SCAN_MATCH_ATTR_MAX,
-                                       nla_data(attr), nla_len(attr),
-                                       nl80211_match_policy);
+                       err = nla_parse_nested(tb,
+                                              NL80211_SCHED_SCAN_MATCH_ATTR_MAX,
+                                              attr, nl80211_match_policy);
                        if (err)
                                goto out_free;
                        ssid = tb[NL80211_SCHED_SCAN_MATCH_ATTR_SSID];
@@ -7643,6 +7668,7 @@ static int nl80211_send_survey(struct sk_buff *msg, u32 portid, u32 seq,
 
 static int nl80211_dump_survey(struct sk_buff *skb, struct netlink_callback *cb)
 {
+       struct nlattr **attrbuf = genl_family_attrbuf(&nl80211_fam);
        struct survey_info survey;
        struct cfg80211_registered_device *rdev;
        struct wireless_dev *wdev;
@@ -7655,7 +7681,7 @@ static int nl80211_dump_survey(struct sk_buff *skb, struct netlink_callback *cb)
                return res;
 
        /* prepare_wdev_dump parsed the attributes */
-       radio_stats = nl80211_fam.attrbuf[NL80211_ATTR_SURVEY_RADIO_STATS];
+       radio_stats = attrbuf[NL80211_ATTR_SURVEY_RADIO_STATS];
 
        if (!wdev->netdev) {
                res = -EINVAL;
@@ -7708,8 +7734,8 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info)
        struct cfg80211_registered_device *rdev = info->user_ptr[0];
        struct net_device *dev = info->user_ptr[1];
        struct ieee80211_channel *chan;
-       const u8 *bssid, *ssid, *ie = NULL, *sae_data = NULL;
-       int err, ssid_len, ie_len = 0, sae_data_len = 0;
+       const u8 *bssid, *ssid, *ie = NULL, *auth_data = NULL;
+       int err, ssid_len, ie_len = 0, auth_data_len = 0;
        enum nl80211_auth_type auth_type;
        struct key_parse key;
        bool local_state_change;
@@ -7789,17 +7815,23 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info)
        if (!nl80211_valid_auth_type(rdev, auth_type, NL80211_CMD_AUTHENTICATE))
                return -EINVAL;
 
-       if (auth_type == NL80211_AUTHTYPE_SAE &&
-           !info->attrs[NL80211_ATTR_SAE_DATA])
+       if ((auth_type == NL80211_AUTHTYPE_SAE ||
+            auth_type == NL80211_AUTHTYPE_FILS_SK ||
+            auth_type == NL80211_AUTHTYPE_FILS_SK_PFS ||
+            auth_type == NL80211_AUTHTYPE_FILS_PK) &&
+           !info->attrs[NL80211_ATTR_AUTH_DATA])
                return -EINVAL;
 
-       if (info->attrs[NL80211_ATTR_SAE_DATA]) {
-               if (auth_type != NL80211_AUTHTYPE_SAE)
+       if (info->attrs[NL80211_ATTR_AUTH_DATA]) {
+               if (auth_type != NL80211_AUTHTYPE_SAE &&
+                   auth_type != NL80211_AUTHTYPE_FILS_SK &&
+                   auth_type != NL80211_AUTHTYPE_FILS_SK_PFS &&
+                   auth_type != NL80211_AUTHTYPE_FILS_PK)
                        return -EINVAL;
-               sae_data = nla_data(info->attrs[NL80211_ATTR_SAE_DATA]);
-               sae_data_len = nla_len(info->attrs[NL80211_ATTR_SAE_DATA]);
+               auth_data = nla_data(info->attrs[NL80211_ATTR_AUTH_DATA]);
+               auth_data_len = nla_len(info->attrs[NL80211_ATTR_AUTH_DATA]);
                /* need to include at least Auth Transaction and Status Code */
-               if (sae_data_len < 4)
+               if (auth_data_len < 4)
                        return -EINVAL;
        }
 
@@ -7816,7 +7848,7 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info)
        err = cfg80211_mlme_auth(rdev, dev, chan, auth_type, bssid,
                                 ssid, ssid_len, ie, ie_len,
                                 key.p.key, key.p.key_len, key.idx,
-                                sae_data, sae_data_len);
+                                auth_data, auth_data_len);
        wdev_unlock(dev->ieee80211_ptr);
        return err;
 }
@@ -7995,6 +8027,15 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
                req.flags |= ASSOC_REQ_USE_RRM;
        }
 
+       if (info->attrs[NL80211_ATTR_FILS_KEK]) {
+               req.fils_kek = nla_data(info->attrs[NL80211_ATTR_FILS_KEK]);
+               req.fils_kek_len = nla_len(info->attrs[NL80211_ATTR_FILS_KEK]);
+               if (!info->attrs[NL80211_ATTR_FILS_NONCES])
+                       return -EINVAL;
+               req.fils_nonces =
+                       nla_data(info->attrs[NL80211_ATTR_FILS_NONCES]);
+       }
+
        err = nl80211_crypto_settings(rdev, info, &req.crypto, 1);
        if (!err) {
                wdev_lock(dev->ieee80211_ptr);
@@ -8152,7 +8193,8 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info)
                ibss.beacon_interval =
                        nla_get_u32(info->attrs[NL80211_ATTR_BEACON_INTERVAL]);
 
-       err = cfg80211_validate_beacon_int(rdev, ibss.beacon_interval);
+       err = cfg80211_validate_beacon_int(rdev, NL80211_IFTYPE_ADHOC,
+                                          ibss.beacon_interval);
        if (err)
                return err;
 
@@ -8478,14 +8520,14 @@ static int nl80211_testmode_dump(struct sk_buff *skb,
                 */
                phy_idx = cb->args[0] - 1;
        } else {
+               struct nlattr **attrbuf = genl_family_attrbuf(&nl80211_fam);
+
                err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize,
-                                 nl80211_fam.attrbuf, nl80211_fam.maxattr,
-                                 nl80211_policy);
+                                 attrbuf, nl80211_fam.maxattr, nl80211_policy);
                if (err)
                        goto out_err;
 
-               rdev = __cfg80211_rdev_from_attrs(sock_net(skb->sk),
-                                                 nl80211_fam.attrbuf);
+               rdev = __cfg80211_rdev_from_attrs(sock_net(skb->sk), attrbuf);
                if (IS_ERR(rdev)) {
                        err = PTR_ERR(rdev);
                        goto out_err;
@@ -8493,9 +8535,8 @@ static int nl80211_testmode_dump(struct sk_buff *skb,
                phy_idx = rdev->wiphy_idx;
                rdev = NULL;
 
-               if (nl80211_fam.attrbuf[NL80211_ATTR_TESTDATA])
-                       cb->args[1] =
-                               (long)nl80211_fam.attrbuf[NL80211_ATTR_TESTDATA];
+               if (attrbuf[NL80211_ATTR_TESTDATA])
+                       cb->args[1] = (long)attrbuf[NL80211_ATTR_TESTDATA];
        }
 
        if (cb->args[1]) {
@@ -8726,6 +8767,37 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
        return err;
 }
 
+static int nl80211_update_connect_params(struct sk_buff *skb,
+                                        struct genl_info *info)
+{
+       struct cfg80211_connect_params connect = {};
+       struct cfg80211_registered_device *rdev = info->user_ptr[0];
+       struct net_device *dev = info->user_ptr[1];
+       struct wireless_dev *wdev = dev->ieee80211_ptr;
+       u32 changed = 0;
+       int ret;
+
+       if (!rdev->ops->update_connect_params)
+               return -EOPNOTSUPP;
+
+       if (info->attrs[NL80211_ATTR_IE]) {
+               if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE]))
+                       return -EINVAL;
+               connect.ie = nla_data(info->attrs[NL80211_ATTR_IE]);
+               connect.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);
+               changed |= UPDATE_ASSOC_IES;
+       }
+
+       wdev_lock(dev->ieee80211_ptr);
+       if (!wdev->current_bss)
+               ret = -ENOLINK;
+       else
+               ret = rdev_update_connect_params(rdev, dev, &connect, changed);
+       wdev_unlock(dev->ieee80211_ptr);
+
+       return ret;
+}
+
 static int nl80211_disconnect(struct sk_buff *skb, struct genl_info *info)
 {
        struct cfg80211_registered_device *rdev = info->user_ptr[0];
@@ -9417,7 +9489,9 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info)
                setup.beacon_interval =
                        nla_get_u32(info->attrs[NL80211_ATTR_BEACON_INTERVAL]);
 
-               err = cfg80211_validate_beacon_int(rdev, setup.beacon_interval);
+               err = cfg80211_validate_beacon_int(rdev,
+                                                  NL80211_IFTYPE_MESH_POINT,
+                                                  setup.beacon_interval);
                if (err)
                        return err;
        }
@@ -9728,9 +9802,8 @@ static int nl80211_parse_wowlan_tcp(struct cfg80211_registered_device *rdev,
        if (!rdev->wiphy.wowlan->tcp)
                return -EINVAL;
 
-       err = nla_parse(tb, MAX_NL80211_WOWLAN_TCP,
-                       nla_data(attr), nla_len(attr),
-                       nl80211_wowlan_tcp_policy);
+       err = nla_parse_nested(tb, MAX_NL80211_WOWLAN_TCP, attr,
+                              nl80211_wowlan_tcp_policy);
        if (err)
                return err;
 
@@ -9875,9 +9948,7 @@ static int nl80211_parse_wowlan_nd(struct cfg80211_registered_device *rdev,
                goto out;
        }
 
-       err = nla_parse(tb, NL80211_ATTR_MAX,
-                       nla_data(attr), nla_len(attr),
-                       nl80211_policy);
+       err = nla_parse_nested(tb, NL80211_ATTR_MAX, attr, nl80211_policy);
        if (err)
                goto out;
 
@@ -9911,10 +9982,9 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info)
                goto set_wakeup;
        }
 
-       err = nla_parse(tb, MAX_NL80211_WOWLAN_TRIG,
-                       nla_data(info->attrs[NL80211_ATTR_WOWLAN_TRIGGERS]),
-                       nla_len(info->attrs[NL80211_ATTR_WOWLAN_TRIGGERS]),
-                       nl80211_wowlan_policy);
+       err = nla_parse_nested(tb, MAX_NL80211_WOWLAN_TRIG,
+                              info->attrs[NL80211_ATTR_WOWLAN_TRIGGERS],
+                              nl80211_wowlan_policy);
        if (err)
                return err;
 
@@ -9996,8 +10066,8 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info)
                                    rem) {
                        u8 *mask_pat;
 
-                       nla_parse(pat_tb, MAX_NL80211_PKTPAT, nla_data(pat),
-                                 nla_len(pat), NULL);
+                       nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat,
+                                        NULL);
                        err = -EINVAL;
                        if (!pat_tb[NL80211_PKTPAT_MASK] ||
                            !pat_tb[NL80211_PKTPAT_PATTERN])
@@ -10207,8 +10277,8 @@ static int nl80211_parse_coalesce_rule(struct cfg80211_registered_device *rdev,
        int rem, pat_len, mask_len, pkt_offset, n_patterns = 0;
        struct nlattr *pat_tb[NUM_NL80211_PKTPAT];
 
-       err = nla_parse(tb, NL80211_ATTR_COALESCE_RULE_MAX, nla_data(rule),
-                       nla_len(rule), nl80211_coalesce_policy);
+       err = nla_parse_nested(tb, NL80211_ATTR_COALESCE_RULE_MAX, rule,
+                              nl80211_coalesce_policy);
        if (err)
                return err;
 
@@ -10246,8 +10316,7 @@ static int nl80211_parse_coalesce_rule(struct cfg80211_registered_device *rdev,
                            rem) {
                u8 *mask_pat;
 
-               nla_parse(pat_tb, MAX_NL80211_PKTPAT, nla_data(pat),
-                         nla_len(pat), NULL);
+               nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat, NULL);
                if (!pat_tb[NL80211_PKTPAT_MASK] ||
                    !pat_tb[NL80211_PKTPAT_PATTERN])
                        return -EINVAL;
@@ -10366,10 +10435,9 @@ static int nl80211_set_rekey_data(struct sk_buff *skb, struct genl_info *info)
        if (!info->attrs[NL80211_ATTR_REKEY_DATA])
                return -EINVAL;
 
-       err = nla_parse(tb, MAX_NL80211_REKEY_DATA,
-                       nla_data(info->attrs[NL80211_ATTR_REKEY_DATA]),
-                       nla_len(info->attrs[NL80211_ATTR_REKEY_DATA]),
-                       nl80211_rekey_policy);
+       err = nla_parse_nested(tb, MAX_NL80211_REKEY_DATA,
+                              info->attrs[NL80211_ATTR_REKEY_DATA],
+                              nl80211_rekey_policy);
        if (err)
                return err;
 
@@ -10518,7 +10586,7 @@ static int nl80211_start_p2p_device(struct sk_buff *skb, struct genl_info *info)
        if (wdev->iftype != NL80211_IFTYPE_P2P_DEVICE)
                return -EOPNOTSUPP;
 
-       if (wdev->p2p_started)
+       if (wdev_running(wdev))
                return 0;
 
        if (rfkill_blocked(rdev->rfkill))
@@ -10528,7 +10596,7 @@ static int nl80211_start_p2p_device(struct sk_buff *skb, struct genl_info *info)
        if (err)
                return err;
 
-       wdev->p2p_started = true;
+       wdev->is_running = true;
        rdev->opencount++;
 
        return 0;
@@ -10560,7 +10628,7 @@ static int nl80211_start_nan(struct sk_buff *skb, struct genl_info *info)
        if (wdev->iftype != NL80211_IFTYPE_NAN)
                return -EOPNOTSUPP;
 
-       if (wdev->nan_started)
+       if (!wdev_running(wdev))
                return -EEXIST;
 
        if (rfkill_blocked(rdev->rfkill))
@@ -10583,7 +10651,7 @@ static int nl80211_start_nan(struct sk_buff *skb, struct genl_info *info)
        if (err)
                return err;
 
-       wdev->nan_started = true;
+       wdev->is_running = true;
        rdev->opencount++;
 
        return 0;
@@ -10638,8 +10706,7 @@ static int handle_nan_filter(struct nlattr *attr_filter,
 
        i = 0;
        nla_for_each_nested(attr, attr_filter, rem) {
-               filter[i].filter = kmemdup(nla_data(attr), nla_len(attr),
-                                          GFP_KERNEL);
+               filter[i].filter = nla_memdup(attr, GFP_KERNEL);
                filter[i].len = nla_len(attr);
                i++;
        }
@@ -10668,7 +10735,7 @@ static int nl80211_nan_add_func(struct sk_buff *skb,
        if (wdev->iftype != NL80211_IFTYPE_NAN)
                return -EOPNOTSUPP;
 
-       if (!wdev->nan_started)
+       if (!wdev_running(wdev))
                return -ENOTCONN;
 
        if (!info->attrs[NL80211_ATTR_NAN_FUNC])
@@ -10678,10 +10745,9 @@ static int nl80211_nan_add_func(struct sk_buff *skb,
            wdev->owner_nlportid != info->snd_portid)
                return -ENOTCONN;
 
-       err = nla_parse(tb, NL80211_NAN_FUNC_ATTR_MAX,
-                       nla_data(info->attrs[NL80211_ATTR_NAN_FUNC]),
-                       nla_len(info->attrs[NL80211_ATTR_NAN_FUNC]),
-                       nl80211_nan_func_policy);
+       err = nla_parse_nested(tb, NL80211_NAN_FUNC_ATTR_MAX,
+                              info->attrs[NL80211_ATTR_NAN_FUNC],
+                              nl80211_nan_func_policy);
        if (err)
                return err;
 
@@ -10776,9 +10842,9 @@ static int nl80211_nan_add_func(struct sk_buff *skb,
        if (tb[NL80211_NAN_FUNC_SRF]) {
                struct nlattr *srf_tb[NUM_NL80211_NAN_SRF_ATTR];
 
-               err = nla_parse(srf_tb, NL80211_NAN_SRF_ATTR_MAX,
-                               nla_data(tb[NL80211_NAN_FUNC_SRF]),
-                               nla_len(tb[NL80211_NAN_FUNC_SRF]), NULL);
+               err = nla_parse_nested(srf_tb, NL80211_NAN_SRF_ATTR_MAX,
+                                      tb[NL80211_NAN_FUNC_SRF],
+                                      nl80211_nan_srf_policy);
                if (err)
                        goto out;
 
@@ -10904,7 +10970,7 @@ static int nl80211_nan_del_func(struct sk_buff *skb,
        if (wdev->iftype != NL80211_IFTYPE_NAN)
                return -EOPNOTSUPP;
 
-       if (!wdev->nan_started)
+       if (!wdev_running(wdev))
                return -ENOTCONN;
 
        if (!info->attrs[NL80211_ATTR_COOKIE])
@@ -10932,7 +10998,7 @@ static int nl80211_nan_change_config(struct sk_buff *skb,
        if (wdev->iftype != NL80211_IFTYPE_NAN)
                return -EOPNOTSUPP;
 
-       if (!wdev->nan_started)
+       if (!wdev_running(wdev))
                return -ENOTCONN;
 
        if (info->attrs[NL80211_ATTR_NAN_MASTER_PREF]) {
@@ -11244,10 +11310,7 @@ static int nl80211_vendor_cmd(struct sk_buff *skb, struct genl_info *info)
                                return -EINVAL;
 
                        if (vcmd->flags & WIPHY_VENDOR_CMD_NEED_RUNNING) {
-                               if (wdev->netdev &&
-                                   !netif_running(wdev->netdev))
-                                       return -ENETDOWN;
-                               if (!wdev->netdev && !wdev->p2p_started)
+                               if (!wdev_running(wdev))
                                        return -ENETDOWN;
                        }
 
@@ -11277,6 +11340,7 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb,
                                       struct cfg80211_registered_device **rdev,
                                       struct wireless_dev **wdev)
 {
+       struct nlattr **attrbuf = genl_family_attrbuf(&nl80211_fam);
        u32 vid, subcmd;
        unsigned int i;
        int vcmd_idx = -1;
@@ -11312,31 +11376,28 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb,
        }
 
        err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize,
-                         nl80211_fam.attrbuf, nl80211_fam.maxattr,
-                         nl80211_policy);
+                         attrbuf, nl80211_fam.maxattr, nl80211_policy);
        if (err)
                goto out_unlock;
 
-       if (!nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_ID] ||
-           !nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_SUBCMD]) {
+       if (!attrbuf[NL80211_ATTR_VENDOR_ID] ||
+           !attrbuf[NL80211_ATTR_VENDOR_SUBCMD]) {
                err = -EINVAL;
                goto out_unlock;
        }
 
-       *wdev = __cfg80211_wdev_from_attrs(sock_net(skb->sk),
-                                          nl80211_fam.attrbuf);
+       *wdev = __cfg80211_wdev_from_attrs(sock_net(skb->sk), attrbuf);
        if (IS_ERR(*wdev))
                *wdev = NULL;
 
-       *rdev = __cfg80211_rdev_from_attrs(sock_net(skb->sk),
-                                          nl80211_fam.attrbuf);
+       *rdev = __cfg80211_rdev_from_attrs(sock_net(skb->sk), attrbuf);
        if (IS_ERR(*rdev)) {
                err = PTR_ERR(*rdev);
                goto out_unlock;
        }
 
-       vid = nla_get_u32(nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_ID]);
-       subcmd = nla_get_u32(nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_SUBCMD]);
+       vid = nla_get_u32(attrbuf[NL80211_ATTR_VENDOR_ID]);
+       subcmd = nla_get_u32(attrbuf[NL80211_ATTR_VENDOR_SUBCMD]);
 
        for (i = 0; i < (*rdev)->wiphy.n_vendor_commands; i++) {
                const struct wiphy_vendor_command *vcmd;
@@ -11360,9 +11421,9 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb,
                goto out_unlock;
        }
 
-       if (nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_DATA]) {
-               data = nla_data(nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_DATA]);
-               data_len = nla_len(nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_DATA]);
+       if (attrbuf[NL80211_ATTR_VENDOR_DATA]) {
+               data = nla_data(attrbuf[NL80211_ATTR_VENDOR_DATA]);
+               data_len = nla_len(attrbuf[NL80211_ATTR_VENDOR_DATA]);
        }
 
        /* 0 is the first index - add 1 to parse only once */
@@ -11410,10 +11471,7 @@ static int nl80211_vendor_cmd_dump(struct sk_buff *skb,
                        return -EINVAL;
 
                if (vcmd->flags & WIPHY_VENDOR_CMD_NEED_RUNNING) {
-                       if (wdev->netdev &&
-                           !netif_running(wdev->netdev))
-                               return -ENETDOWN;
-                       if (!wdev->netdev && !wdev->p2p_started)
+                       if (!wdev_running(wdev))
                                return -ENETDOWN;
                }
        }
@@ -11726,6 +11784,31 @@ static int nl80211_tdls_cancel_channel_switch(struct sk_buff *skb,
        return 0;
 }
 
+static int nl80211_set_multicast_to_unicast(struct sk_buff *skb,
+                                           struct genl_info *info)
+{
+       struct cfg80211_registered_device *rdev = info->user_ptr[0];
+       struct net_device *dev = info->user_ptr[1];
+       struct wireless_dev *wdev = dev->ieee80211_ptr;
+       const struct nlattr *nla;
+       bool enabled;
+
+       if (netif_running(dev))
+               return -EBUSY;
+
+       if (!rdev->ops->set_multicast_to_unicast)
+               return -EOPNOTSUPP;
+
+       if (wdev->iftype != NL80211_IFTYPE_AP &&
+           wdev->iftype != NL80211_IFTYPE_P2P_GO)
+               return -EOPNOTSUPP;
+
+       nla = info->attrs[NL80211_ATTR_MULTICAST_TO_UNICAST_ENABLED];
+       enabled = nla_get_flag(nla);
+
+       return rdev_set_multicast_to_unicast(rdev, dev, enabled);
+}
+
 #define NL80211_FLAG_NEED_WIPHY                0x01
 #define NL80211_FLAG_NEED_NETDEV       0x02
 #define NL80211_FLAG_NEED_RTNL         0x04
@@ -11784,29 +11867,15 @@ static int nl80211_pre_doit(const struct genl_ops *ops, struct sk_buff *skb,
                        info->user_ptr[1] = wdev;
                }
 
-               if (dev) {
-                       if (ops->internal_flags & NL80211_FLAG_CHECK_NETDEV_UP &&
-                           !netif_running(dev)) {
-                               if (rtnl)
-                                       rtnl_unlock();
-                               return -ENETDOWN;
-                       }
+               if (ops->internal_flags & NL80211_FLAG_CHECK_NETDEV_UP &&
+                   !wdev_running(wdev)) {
+                       if (rtnl)
+                               rtnl_unlock();
+                       return -ENETDOWN;
+               }
 
+               if (dev)
                        dev_hold(dev);
-               } else if (ops->internal_flags & NL80211_FLAG_CHECK_NETDEV_UP) {
-                       if (wdev->iftype == NL80211_IFTYPE_P2P_DEVICE &&
-                           !wdev->p2p_started) {
-                               if (rtnl)
-                                       rtnl_unlock();
-                               return -ENETDOWN;
-                       }
-                       if (wdev->iftype == NL80211_IFTYPE_NAN &&
-                           !wdev->nan_started) {
-                               if (rtnl)
-                                       rtnl_unlock();
-                               return -ENETDOWN;
-                       }
-               }
 
                info->user_ptr[0] = rdev;
        }
@@ -12178,6 +12247,14 @@ static const struct genl_ops nl80211_ops[] = {
                .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
                                  NL80211_FLAG_NEED_RTNL,
        },
+       {
+               .cmd = NL80211_CMD_UPDATE_CONNECT_PARAMS,
+               .doit = nl80211_update_connect_params,
+               .policy = nl80211_policy,
+               .flags = GENL_ADMIN_PERM,
+               .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
+                                 NL80211_FLAG_NEED_RTNL,
+       },
        {
                .cmd = NL80211_CMD_DISCONNECT,
                .doit = nl80211_disconnect,
@@ -12599,6 +12676,29 @@ static const struct genl_ops nl80211_ops[] = {
                .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
                                  NL80211_FLAG_NEED_RTNL,
        },
+       {
+               .cmd = NL80211_CMD_SET_MULTICAST_TO_UNICAST,
+               .doit = nl80211_set_multicast_to_unicast,
+               .policy = nl80211_policy,
+               .flags = GENL_UNS_ADMIN_PERM,
+               .internal_flags = NL80211_FLAG_NEED_NETDEV |
+                                 NL80211_FLAG_NEED_RTNL,
+       },
+};
+
+static struct genl_family nl80211_fam __ro_after_init = {
+       .name = NL80211_GENL_NAME,      /* have users key off the name instead */
+       .hdrsize = 0,                   /* no private header */
+       .version = 1,                   /* no particular meaning now */
+       .maxattr = NL80211_ATTR_MAX,
+       .netnsok = true,
+       .pre_doit = nl80211_pre_doit,
+       .post_doit = nl80211_post_doit,
+       .module = THIS_MODULE,
+       .ops = nl80211_ops,
+       .n_ops = ARRAY_SIZE(nl80211_ops),
+       .mcgrps = nl80211_mcgrps,
+       .n_mcgrps = ARRAY_SIZE(nl80211_mcgrps),
 };
 
 /* notification functions */
@@ -14563,12 +14663,11 @@ void nl80211_send_ap_stopped(struct wireless_dev *wdev)
 
 /* initialisation/exit functions */
 
-int nl80211_init(void)
+int __init nl80211_init(void)
 {
        int err;
 
-       err = genl_register_family_with_ops_groups(&nl80211_fam, nl80211_ops,
-                                                  nl80211_mcgrps);
+       err = genl_register_family(&nl80211_fam);
        if (err)
                return err;
 
index 11cf83c8ad4f5008abb3f1cf8f59058027f2bb2d..2f425075ada8eae81c8ecb04d975d0c9f0d36578 100644 (file)
@@ -490,6 +490,18 @@ static inline int rdev_connect(struct cfg80211_registered_device *rdev,
        return ret;
 }
 
+static inline int
+rdev_update_connect_params(struct cfg80211_registered_device *rdev,
+                          struct net_device *dev,
+                          struct cfg80211_connect_params *sme, u32 changed)
+{
+       int ret;
+       trace_rdev_update_connect_params(&rdev->wiphy, dev, sme, changed);
+       ret = rdev->ops->update_connect_params(&rdev->wiphy, dev, sme, changed);
+       trace_rdev_return_int(&rdev->wiphy, ret);
+       return ret;
+}
+
 static inline int rdev_disconnect(struct cfg80211_registered_device *rdev,
                                  struct net_device *dev, u16 reason_code)
 {
@@ -562,6 +574,18 @@ static inline int rdev_set_wds_peer(struct cfg80211_registered_device *rdev,
        return ret;
 }
 
+static inline int
+rdev_set_multicast_to_unicast(struct cfg80211_registered_device *rdev,
+                             struct net_device *dev,
+                             const bool enabled)
+{
+       int ret;
+       trace_rdev_set_multicast_to_unicast(&rdev->wiphy, dev, enabled);
+       ret = rdev->ops->set_multicast_to_unicast(&rdev->wiphy, dev, enabled);
+       trace_rdev_return_int(&rdev->wiphy, ret);
+       return ret;
+}
+
 static inline void rdev_rfkill_poll(struct cfg80211_registered_device *rdev)
 {
        trace_rdev_rfkill_poll(&rdev->wiphy);
index b5bd58d0f73129104e32a07155b03f915c343d54..35ad69fd08383a2a8392170d2a580fc4549276d7 100644 (file)
  * also linked into the probe response struct.
  */
 
+/*
+ * Limit the number of BSS entries stored in mac80211. Each one is
+ * a bit over 4k at most, so this limits to roughly 4-5M of memory.
+ * If somebody wants to really attack this though, they'd likely
+ * use small beacons, and only one type of frame, limiting each of
+ * the entries to a much smaller size (in order to generate more
+ * entries in total, so overhead is bigger.)
+ */
+static int bss_entries_limit = 1000;
+module_param(bss_entries_limit, int, 0644);
+MODULE_PARM_DESC(bss_entries_limit,
+                 "limit to number of scan BSS entries (per wiphy, default 1000)");
+
 #define IEEE80211_SCAN_RESULT_EXPIRE   (30 * HZ)
 
 static void bss_free(struct cfg80211_internal_bss *bss)
@@ -137,6 +150,10 @@ static bool __cfg80211_unlink_bss(struct cfg80211_registered_device *rdev,
 
        list_del_init(&bss->list);
        rb_erase(&bss->rbn, &rdev->bss_tree);
+       rdev->bss_entries--;
+       WARN_ONCE((rdev->bss_entries == 0) ^ list_empty(&rdev->bss_list),
+                 "rdev bss entries[%d]/list[empty:%d] corruption\n",
+                 rdev->bss_entries, list_empty(&rdev->bss_list));
        bss_ref_put(rdev, bss);
        return true;
 }
@@ -163,6 +180,40 @@ static void __cfg80211_bss_expire(struct cfg80211_registered_device *rdev,
                rdev->bss_generation++;
 }
 
+static bool cfg80211_bss_expire_oldest(struct cfg80211_registered_device *rdev)
+{
+       struct cfg80211_internal_bss *bss, *oldest = NULL;
+       bool ret;
+
+       lockdep_assert_held(&rdev->bss_lock);
+
+       list_for_each_entry(bss, &rdev->bss_list, list) {
+               if (atomic_read(&bss->hold))
+                       continue;
+
+               if (!list_empty(&bss->hidden_list) &&
+                   !bss->pub.hidden_beacon_bss)
+                       continue;
+
+               if (oldest && time_before(oldest->ts, bss->ts))
+                       continue;
+               oldest = bss;
+       }
+
+       if (WARN_ON(!oldest))
+               return false;
+
+       /*
+        * The callers make sure to increase rdev->bss_generation if anything
+        * gets removed (and a new entry added), so there's no need to also do
+        * it here.
+        */
+
+       ret = __cfg80211_unlink_bss(rdev, oldest);
+       WARN_ON(!ret);
+       return ret;
+}
+
 void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev,
                           bool send_message)
 {
@@ -689,6 +740,7 @@ static bool cfg80211_combine_bsses(struct cfg80211_registered_device *rdev,
        const u8 *ie;
        int i, ssidlen;
        u8 fold = 0;
+       u32 n_entries = 0;
 
        ies = rcu_access_pointer(new->pub.beacon_ies);
        if (WARN_ON(!ies))
@@ -712,6 +764,12 @@ static bool cfg80211_combine_bsses(struct cfg80211_registered_device *rdev,
        /* This is the bad part ... */
 
        list_for_each_entry(bss, &rdev->bss_list, list) {
+               /*
+                * we're iterating all the entries anyway, so take the
+                * opportunity to validate the list length accounting
+                */
+               n_entries++;
+
                if (!ether_addr_equal(bss->pub.bssid, new->pub.bssid))
                        continue;
                if (bss->pub.channel != new->pub.channel)
@@ -740,6 +798,10 @@ static bool cfg80211_combine_bsses(struct cfg80211_registered_device *rdev,
                                   new->pub.beacon_ies);
        }
 
+       WARN_ONCE(n_entries != rdev->bss_entries,
+                 "rdev bss entries[%d]/list[len:%d] corruption\n",
+                 rdev->bss_entries, n_entries);
+
        return true;
 }
 
@@ -894,7 +956,14 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev,
                        }
                }
 
+               if (rdev->bss_entries >= bss_entries_limit &&
+                   !cfg80211_bss_expire_oldest(rdev)) {
+                       kfree(new);
+                       goto drop;
+               }
+
                list_add_tail(&new->list, &rdev->bss_list);
+               rdev->bss_entries++;
                rb_insert_bss(rdev, new);
                found = new;
        }
index a77db333927ec1bc8ab6efa4ee26b9fc7e711f5e..2b5bb380414b6c05eb15d3467a16e9be7f108cbd 100644 (file)
@@ -1088,7 +1088,7 @@ int cfg80211_disconnect(struct cfg80211_registered_device *rdev,
                err = cfg80211_sme_disconnect(wdev, reason);
        else if (!rdev->ops->disconnect)
                cfg80211_mlme_down(rdev, dev);
-       else if (wdev->current_bss)
+       else if (wdev->ssid_len)
                err = rdev_disconnect(rdev, dev, reason);
 
        return err;
index 0082f4b01795a1c80cbf453e7767893a1caf4dd5..14b3f007826d91da6c5a71aee105b735eb9a2071 100644 (file)
@@ -104,13 +104,16 @@ static int wiphy_suspend(struct device *dev)
 
        rtnl_lock();
        if (rdev->wiphy.registered) {
-               if (!rdev->wiphy.wowlan_config)
+               if (!rdev->wiphy.wowlan_config) {
                        cfg80211_leave_all(rdev);
+                       cfg80211_process_rdev_events(rdev);
+               }
                if (rdev->ops->suspend)
                        ret = rdev_suspend(rdev, rdev->wiphy.wowlan_config);
                if (ret == 1) {
                        /* Driver refuse to configure wowlan */
                        cfg80211_leave_all(rdev);
+                       cfg80211_process_rdev_events(rdev);
                        ret = rdev_suspend(rdev, NULL);
                }
        }
index a3d0a91b1e0957766bd77d8cc9a712ce335598ec..ea1b47e04fa474b601bd34a77d1e6cc05cf0e176 100644 (file)
@@ -1281,6 +1281,24 @@ TRACE_EVENT(rdev_connect,
                  __entry->wpa_versions, __entry->flags, MAC_PR_ARG(prev_bssid))
 );
 
+TRACE_EVENT(rdev_update_connect_params,
+       TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+                struct cfg80211_connect_params *sme, u32 changed),
+       TP_ARGS(wiphy, netdev, sme, changed),
+       TP_STRUCT__entry(
+               WIPHY_ENTRY
+               NETDEV_ENTRY
+               __field(u32, changed)
+       ),
+       TP_fast_assign(
+               WIPHY_ASSIGN;
+               NETDEV_ASSIGN;
+               __entry->changed = changed;
+       ),
+       TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", parameters changed: %u",
+                 WIPHY_PR_ARG, NETDEV_PR_ARG,  __entry->changed)
+);
+
 TRACE_EVENT(rdev_set_cqm_rssi_config,
        TP_PROTO(struct wiphy *wiphy,
                 struct net_device *netdev, s32 rssi_thold,
@@ -3030,6 +3048,25 @@ DEFINE_EVENT(wiphy_wdev_evt, rdev_abort_scan,
        TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev),
        TP_ARGS(wiphy, wdev)
 );
+
+TRACE_EVENT(rdev_set_multicast_to_unicast,
+       TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+                const bool enabled),
+       TP_ARGS(wiphy, netdev, enabled),
+       TP_STRUCT__entry(
+               WIPHY_ENTRY
+               NETDEV_ENTRY
+               __field(bool, enabled)
+       ),
+       TP_fast_assign(
+               WIPHY_ASSIGN;
+               NETDEV_ASSIGN;
+               __entry->enabled = enabled;
+       ),
+       TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", unicast: %s",
+                 WIPHY_PR_ARG, NETDEV_PR_ARG,
+                 BOOL_TO_STR(__entry->enabled))
+);
 #endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */
 
 #undef TRACE_INCLUDE_PATH
index 8edce22d1b9316bf79e99411ffe3ce66e3ae0a6f..e9d040d29846f87517c0b6b40f0ff9dbeca63a05 100644 (file)
@@ -13,6 +13,7 @@
 #include <net/dsfield.h>
 #include <linux/if_vlan.h>
 #include <linux/mpls.h>
+#include <linux/gcd.h>
 #include "core.h"
 #include "rdev-ops.h"
 
@@ -420,8 +421,8 @@ unsigned int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr)
 }
 EXPORT_SYMBOL(ieee80211_get_mesh_hdrlen);
 
-static int __ieee80211_data_to_8023(struct sk_buff *skb, struct ethhdr *ehdr,
-                                   const u8 *addr, enum nl80211_iftype iftype)
+int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
+                                 const u8 *addr, enum nl80211_iftype iftype)
 {
        struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
        struct {
@@ -525,13 +526,7 @@ static int __ieee80211_data_to_8023(struct sk_buff *skb, struct ethhdr *ehdr,
 
        return 0;
 }
-
-int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr,
-                          enum nl80211_iftype iftype)
-{
-       return __ieee80211_data_to_8023(skb, NULL, addr, iftype);
-}
-EXPORT_SYMBOL(ieee80211_data_to_8023);
+EXPORT_SYMBOL(ieee80211_data_to_8023_exthdr);
 
 int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr,
                             enum nl80211_iftype iftype,
@@ -746,24 +741,18 @@ __ieee80211_amsdu_copy(struct sk_buff *skb, unsigned int hlen,
 void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list,
                              const u8 *addr, enum nl80211_iftype iftype,
                              const unsigned int extra_headroom,
-                             bool has_80211_header)
+                             const u8 *check_da, const u8 *check_sa)
 {
        unsigned int hlen = ALIGN(extra_headroom, 4);
        struct sk_buff *frame = NULL;
        u16 ethertype;
        u8 *payload;
-       int offset = 0, remaining, err;
+       int offset = 0, remaining;
        struct ethhdr eth;
        bool reuse_frag = skb->head_frag && !skb_has_frag_list(skb);
        bool reuse_skb = false;
        bool last = false;
 
-       if (has_80211_header) {
-               err = __ieee80211_data_to_8023(skb, &eth, addr, iftype);
-               if (err)
-                       goto out;
-       }
-
        while (!last) {
                unsigned int subframe_len;
                int len;
@@ -780,8 +769,17 @@ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list,
                        goto purge;
 
                offset += sizeof(struct ethhdr);
-               /* reuse skb for the last subframe */
                last = remaining <= subframe_len + padding;
+
+               /* FIXME: should we really accept multicast DA? */
+               if ((check_da && !is_multicast_ether_addr(eth.h_dest) &&
+                    !ether_addr_equal(check_da, eth.h_dest)) ||
+                   (check_sa && !ether_addr_equal(check_sa, eth.h_source))) {
+                       offset += len + padding;
+                       continue;
+               }
+
+               /* reuse skb for the last subframe */
                if (!skb_is_nonlinear(skb) && !reuse_frag && last) {
                        skb_pull(skb, offset);
                        frame = skb;
@@ -819,7 +817,6 @@ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list,
 
  purge:
        __skb_queue_purge(list);
- out:
        dev_kfree_skb(skb);
 }
 EXPORT_SYMBOL(ieee80211_amsdu_to_8023s);
@@ -1162,7 +1159,8 @@ static u32 cfg80211_calculate_bitrate_vht(struct rate_info *rate)
                   58500000,
                   65000000,
                   78000000,
-                  0,
+               /* not in the spec, but some devices use this: */
+                  86500000,
                },
                {  13500000,
                   27000000,
@@ -1381,6 +1379,25 @@ static bool ieee80211_id_in_list(const u8 *ids, int n_ids, u8 id)
        return false;
 }
 
+static size_t skip_ie(const u8 *ies, size_t ielen, size_t pos)
+{
+       /* we assume a validly formed IEs buffer */
+       u8 len = ies[pos + 1];
+
+       pos += 2 + len;
+
+       /* the IE itself must have 255 bytes for fragments to follow */
+       if (len < 255)
+               return pos;
+
+       while (pos < ielen && ies[pos] == WLAN_EID_FRAGMENT) {
+               len = ies[pos + 1];
+               pos += 2 + len;
+       }
+
+       return pos;
+}
+
 size_t ieee80211_ie_split_ric(const u8 *ies, size_t ielen,
                              const u8 *ids, int n_ids,
                              const u8 *after_ric, int n_after_ric,
@@ -1390,14 +1407,14 @@ size_t ieee80211_ie_split_ric(const u8 *ies, size_t ielen,
 
        while (pos < ielen && ieee80211_id_in_list(ids, n_ids, ies[pos])) {
                if (ies[pos] == WLAN_EID_RIC_DATA && n_after_ric) {
-                       pos += 2 + ies[pos + 1];
+                       pos = skip_ie(ies, ielen, pos);
 
                        while (pos < ielen &&
                               !ieee80211_id_in_list(after_ric, n_after_ric,
                                                     ies[pos]))
-                               pos += 2 + ies[pos + 1];
+                               pos = skip_ie(ies, ielen, pos);
                } else {
-                       pos += 2 + ies[pos + 1];
+                       pos = skip_ie(ies, ielen, pos);
                }
        }
 
@@ -1558,31 +1575,57 @@ bool ieee80211_chandef_to_operating_class(struct cfg80211_chan_def *chandef,
 }
 EXPORT_SYMBOL(ieee80211_chandef_to_operating_class);
 
-int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev,
-                                u32 beacon_int)
+static void cfg80211_calculate_bi_data(struct wiphy *wiphy, u32 new_beacon_int,
+                                      u32 *beacon_int_gcd,
+                                      bool *beacon_int_different)
 {
        struct wireless_dev *wdev;
-       int res = 0;
 
-       if (beacon_int < 10 || beacon_int > 10000)
-               return -EINVAL;
+       *beacon_int_gcd = 0;
+       *beacon_int_different = false;
 
-       list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) {
+       list_for_each_entry(wdev, &wiphy->wdev_list, list) {
                if (!wdev->beacon_interval)
                        continue;
-               if (wdev->beacon_interval != beacon_int) {
-                       res = -EINVAL;
-                       break;
+
+               if (!*beacon_int_gcd) {
+                       *beacon_int_gcd = wdev->beacon_interval;
+                       continue;
                }
+
+               if (wdev->beacon_interval == *beacon_int_gcd)
+                       continue;
+
+               *beacon_int_different = true;
+               *beacon_int_gcd = gcd(*beacon_int_gcd, wdev->beacon_interval);
        }
 
-       return res;
+       if (new_beacon_int && *beacon_int_gcd != new_beacon_int) {
+               if (*beacon_int_gcd)
+                       *beacon_int_different = true;
+               *beacon_int_gcd = gcd(*beacon_int_gcd, new_beacon_int);
+       }
+}
+
+int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev,
+                                enum nl80211_iftype iftype, u32 beacon_int)
+{
+       /*
+        * This is just a basic pre-condition check; if interface combinations
+        * are possible the driver must already be checking those with a call
+        * to cfg80211_check_combinations(), in which case we'll validate more
+        * through the cfg80211_calculate_bi_data() call and code in
+        * cfg80211_iter_combinations().
+        */
+
+       if (beacon_int < 10 || beacon_int > 10000)
+               return -EINVAL;
+
+       return 0;
 }
 
 int cfg80211_iter_combinations(struct wiphy *wiphy,
-                              const int num_different_channels,
-                              const u8 radar_detect,
-                              const int iftype_num[NUM_NL80211_IFTYPES],
+                              struct iface_combination_params *params,
                               void (*iter)(const struct ieee80211_iface_combination *c,
                                            void *data),
                               void *data)
@@ -1592,8 +1635,23 @@ int cfg80211_iter_combinations(struct wiphy *wiphy,
        int i, j, iftype;
        int num_interfaces = 0;
        u32 used_iftypes = 0;
+       u32 beacon_int_gcd;
+       bool beacon_int_different;
 
-       if (radar_detect) {
+       /*
+        * This is a bit strange, since the iteration used to rely only on
+        * the data given by the driver, but here it now relies on context,
+        * in form of the currently operating interfaces.
+        * This is OK for all current users, and saves us from having to
+        * push the GCD calculations into all the drivers.
+        * In the future, this should probably rely more on data that's in
+        * cfg80211 already - the only thing not would appear to be any new
+        * interfaces (while being brought up) and channel/radar data.
+        */
+       cfg80211_calculate_bi_data(wiphy, params->new_beacon_int,
+                                  &beacon_int_gcd, &beacon_int_different);
+
+       if (params->radar_detect) {
                rcu_read_lock();
                regdom = rcu_dereference(cfg80211_regdomain);
                if (regdom)
@@ -1602,8 +1660,8 @@ int cfg80211_iter_combinations(struct wiphy *wiphy,
        }
 
        for (iftype = 0; iftype < NUM_NL80211_IFTYPES; iftype++) {
-               num_interfaces += iftype_num[iftype];
-               if (iftype_num[iftype] > 0 &&
+               num_interfaces += params->iftype_num[iftype];
+               if (params->iftype_num[iftype] > 0 &&
                    !(wiphy->software_iftypes & BIT(iftype)))
                        used_iftypes |= BIT(iftype);
        }
@@ -1617,7 +1675,7 @@ int cfg80211_iter_combinations(struct wiphy *wiphy,
 
                if (num_interfaces > c->max_interfaces)
                        continue;
-               if (num_different_channels > c->num_different_channels)
+               if (params->num_different_channels > c->num_different_channels)
                        continue;
 
                limits = kmemdup(c->limits, sizeof(limits[0]) * c->n_limits,
@@ -1632,16 +1690,17 @@ int cfg80211_iter_combinations(struct wiphy *wiphy,
                                all_iftypes |= limits[j].types;
                                if (!(limits[j].types & BIT(iftype)))
                                        continue;
-                               if (limits[j].max < iftype_num[iftype])
+                               if (limits[j].max < params->iftype_num[iftype])
                                        goto cont;
-                               limits[j].max -= iftype_num[iftype];
+                               limits[j].max -= params->iftype_num[iftype];
                        }
                }
 
-               if (radar_detect != (c->radar_detect_widths & radar_detect))
+               if (params->radar_detect !=
+                       (c->radar_detect_widths & params->radar_detect))
                        goto cont;
 
-               if (radar_detect && c->radar_detect_regions &&
+               if (params->radar_detect && c->radar_detect_regions &&
                    !(c->radar_detect_regions & BIT(region)))
                        goto cont;
 
@@ -1653,6 +1712,14 @@ int cfg80211_iter_combinations(struct wiphy *wiphy,
                if ((all_iftypes & used_iftypes) != used_iftypes)
                        goto cont;
 
+               if (beacon_int_gcd) {
+                       if (c->beacon_int_min_gcd &&
+                           beacon_int_gcd < c->beacon_int_min_gcd)
+                               goto cont;
+                       if (!c->beacon_int_min_gcd && beacon_int_different)
+                               goto cont;
+               }
+
                /* This combination covered all interface types and
                 * supported the requested numbers, so we're good.
                 */
@@ -1675,14 +1742,11 @@ cfg80211_iter_sum_ifcombs(const struct ieee80211_iface_combination *c,
 }
 
 int cfg80211_check_combinations(struct wiphy *wiphy,
-                               const int num_different_channels,
-                               const u8 radar_detect,
-                               const int iftype_num[NUM_NL80211_IFTYPES])
+                               struct iface_combination_params *params)
 {
        int err, num = 0;
 
-       err = cfg80211_iter_combinations(wiphy, num_different_channels,
-                                        radar_detect, iftype_num,
+       err = cfg80211_iter_combinations(wiphy, params,
                                         cfg80211_iter_sum_ifcombs, &num);
        if (err)
                return err;
index 419bf5d463bde118274e9f4dc1fffba08f75f258..45cb7c699b65ba4e1c928e94c1a65d8ee7a8ef96 100644 (file)
@@ -388,14 +388,6 @@ static void xfrm_state_gc_task(struct work_struct *work)
                xfrm_state_gc_destroy(x);
 }
 
-static inline unsigned long make_jiffies(long secs)
-{
-       if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
-               return MAX_SCHEDULE_TIMEOUT-1;
-       else
-               return secs*HZ;
-}
-
 static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me)
 {
        struct tasklet_hrtimer *thr = container_of(me, struct tasklet_hrtimer, timer);
index 85c405fcccb0cb08ec59efa262961e08bb3389f1..a6d2a43bbf2e290368410a6338abefecfa114f54 100644 (file)
@@ -99,4 +99,10 @@ config SAMPLE_SECCOMP
          Build samples of seccomp filters using various methods of
          BPF filter construction.
 
+config SAMPLE_BLACKFIN_GPTIMERS
+       tristate "Build blackfin gptimers sample code -- loadable modules only"
+       depends on BLACKFIN && BFIN_GPTIMERS && m
+       help
+         Build samples of blackfin gptimers sample module.
+
 endif # SAMPLES
index 1a20169d85acf9a6e54cdaa0cd83650ac53b7d0c..e17d66d77f099c48f88bdc8518ca34c45b39cf29 100644 (file)
@@ -2,4 +2,4 @@
 
 obj-$(CONFIG_SAMPLES)  += kobject/ kprobes/ trace_events/ livepatch/ \
                           hw_breakpoint/ kfifo/ kdb/ hidraw/ rpmsg/ seccomp/ \
-                          configfs/ connector/ v4l/ trace_printk/
+                          configfs/ connector/ v4l/ trace_printk/ blackfin/
diff --git a/samples/auxdisplay/.gitignore b/samples/auxdisplay/.gitignore
new file mode 100644 (file)
index 0000000..7af2228
--- /dev/null
@@ -0,0 +1 @@
+cfag12864b-example
diff --git a/samples/auxdisplay/Makefile b/samples/auxdisplay/Makefile
new file mode 100644 (file)
index 0000000..05e471f
--- /dev/null
@@ -0,0 +1,9 @@
+CC := $(CROSS_COMPILE)gcc
+CFLAGS := -I../../usr/include
+
+PROGS := cfag12864b-example
+
+all: $(PROGS)
+
+clean:
+       rm -fr $(PROGS)
diff --git a/samples/auxdisplay/cfag12864b-example.c b/samples/auxdisplay/cfag12864b-example.c
new file mode 100644 (file)
index 0000000..e7823ff
--- /dev/null
@@ -0,0 +1,281 @@
+/*
+ *    Filename: cfag12864b-example.c
+ *     Version: 0.1.0
+ * Description: cfag12864b LCD userspace example program
+ *     License: GPLv2
+ *
+ *      Author: Copyright (C) Miguel Ojeda Sandonis
+ *        Date: 2006-10-31
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+/*
+ * ------------------------
+ * start of cfag12864b code
+ * ------------------------
+ */
+
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+
+#define CFAG12864B_WIDTH               (128)
+#define CFAG12864B_HEIGHT              (64)
+#define CFAG12864B_SIZE                        (128 * 64 / 8)
+#define CFAG12864B_BPB                 (8)
+#define CFAG12864B_ADDRESS(x, y)       ((y) * CFAG12864B_WIDTH / \
+                                       CFAG12864B_BPB + (x) / CFAG12864B_BPB)
+#define CFAG12864B_BIT(n)              (((unsigned char) 1) << (n))
+
+#undef CFAG12864B_DOCHECK
+#ifdef CFAG12864B_DOCHECK
+       #define CFAG12864B_CHECK(x, y)          ((x) < CFAG12864B_WIDTH && \
+                                               (y) < CFAG12864B_HEIGHT)
+#else
+       #define CFAG12864B_CHECK(x, y)          (1)
+#endif
+
+int cfag12864b_fd;
+unsigned char * cfag12864b_mem;
+unsigned char cfag12864b_buffer[CFAG12864B_SIZE];
+
+/*
+ * init a cfag12864b framebuffer device
+ *
+ * No error:       return = 0
+ * Unable to open: return = -1
+ * Unable to mmap: return = -2
+ */
+static int cfag12864b_init(char *path)
+{
+       cfag12864b_fd = open(path, O_RDWR);
+       if (cfag12864b_fd == -1)
+               return -1;
+
+       cfag12864b_mem = mmap(0, CFAG12864B_SIZE, PROT_READ | PROT_WRITE,
+               MAP_SHARED, cfag12864b_fd, 0);
+       if (cfag12864b_mem == MAP_FAILED) {
+               close(cfag12864b_fd);
+               return -2;
+       }
+
+       return 0;
+}
+
+/*
+ * exit a cfag12864b framebuffer device
+ */
+static void cfag12864b_exit(void)
+{
+       munmap(cfag12864b_mem, CFAG12864B_SIZE);
+       close(cfag12864b_fd);
+}
+
+/*
+ * set (x, y) pixel
+ */
+static void cfag12864b_set(unsigned char x, unsigned char y)
+{
+       if (CFAG12864B_CHECK(x, y))
+               cfag12864b_buffer[CFAG12864B_ADDRESS(x, y)] |=
+                       CFAG12864B_BIT(x % CFAG12864B_BPB);
+}
+
+/*
+ * unset (x, y) pixel
+ */
+static void cfag12864b_unset(unsigned char x, unsigned char y)
+{
+       if (CFAG12864B_CHECK(x, y))
+               cfag12864b_buffer[CFAG12864B_ADDRESS(x, y)] &=
+                       ~CFAG12864B_BIT(x % CFAG12864B_BPB);
+}
+
+/*
+ * is set (x, y) pixel?
+ *
+ * Pixel off: return = 0
+ * Pixel on:  return = 1
+ */
+static unsigned char cfag12864b_isset(unsigned char x, unsigned char y)
+{
+       if (CFAG12864B_CHECK(x, y))
+               if (cfag12864b_buffer[CFAG12864B_ADDRESS(x, y)] &
+                       CFAG12864B_BIT(x % CFAG12864B_BPB))
+                       return 1;
+
+       return 0;
+}
+
+/*
+ * not (x, y) pixel
+ */
+static void cfag12864b_not(unsigned char x, unsigned char y)
+{
+       if (cfag12864b_isset(x, y))
+               cfag12864b_unset(x, y);
+       else
+               cfag12864b_set(x, y);
+}
+
+/*
+ * fill (set all pixels)
+ */
+static void cfag12864b_fill(void)
+{
+       unsigned short i;
+
+       for (i = 0; i < CFAG12864B_SIZE; i++)
+               cfag12864b_buffer[i] = 0xFF;
+}
+
+/*
+ * clear (unset all pixels)
+ */
+static void cfag12864b_clear(void)
+{
+       unsigned short i;
+
+       for (i = 0; i < CFAG12864B_SIZE; i++)
+               cfag12864b_buffer[i] = 0;
+}
+
+/*
+ * format a [128*64] matrix
+ *
+ * Pixel off: src[i] = 0
+ * Pixel on:  src[i] > 0
+ */
+static void cfag12864b_format(unsigned char * matrix)
+{
+       unsigned char i, j, n;
+
+       for (i = 0; i < CFAG12864B_HEIGHT; i++)
+       for (j = 0; j < CFAG12864B_WIDTH / CFAG12864B_BPB; j++) {
+               cfag12864b_buffer[i * CFAG12864B_WIDTH / CFAG12864B_BPB +
+                       j] = 0;
+               for (n = 0; n < CFAG12864B_BPB; n++)
+                       if (matrix[i * CFAG12864B_WIDTH +
+                               j * CFAG12864B_BPB + n])
+                               cfag12864b_buffer[i * CFAG12864B_WIDTH /
+                                       CFAG12864B_BPB + j] |=
+                                       CFAG12864B_BIT(n);
+       }
+}
+
+/*
+ * blit buffer to lcd
+ */
+static void cfag12864b_blit(void)
+{
+       memcpy(cfag12864b_mem, cfag12864b_buffer, CFAG12864B_SIZE);
+}
+
+/*
+ * ----------------------
+ * end of cfag12864b code
+ * ----------------------
+ */
+
+#include <stdio.h>
+
+#define EXAMPLES       6
+
+static void example(unsigned char n)
+{
+       unsigned short i, j;
+       unsigned char matrix[CFAG12864B_WIDTH * CFAG12864B_HEIGHT];
+
+       if (n > EXAMPLES)
+               return;
+
+       printf("Example %i/%i - ", n, EXAMPLES);
+
+       switch (n) {
+       case 1:
+               printf("Draw points setting bits");
+               cfag12864b_clear();
+               for (i = 0; i < CFAG12864B_WIDTH; i += 2)
+                       for (j = 0; j < CFAG12864B_HEIGHT; j += 2)
+                               cfag12864b_set(i, j);
+               break;
+
+       case 2:
+               printf("Clear the LCD");
+               cfag12864b_clear();
+               break;
+
+       case 3:
+               printf("Draw rows formatting a [128*64] matrix");
+               memset(matrix, 0, CFAG12864B_WIDTH * CFAG12864B_HEIGHT);
+               for (i = 0; i < CFAG12864B_WIDTH; i++)
+                       for (j = 0; j < CFAG12864B_HEIGHT; j += 2)
+                               matrix[j * CFAG12864B_WIDTH + i] = 1;
+               cfag12864b_format(matrix);
+               break;
+
+       case 4:
+               printf("Fill the lcd");
+               cfag12864b_fill();
+               break;
+
+       case 5:
+               printf("Draw columns unsetting bits");
+               for (i = 0; i < CFAG12864B_WIDTH; i += 2)
+                       for (j = 0; j < CFAG12864B_HEIGHT; j++)
+                               cfag12864b_unset(i, j);
+               break;
+
+       case 6:
+               printf("Do negative not-ing all bits");
+               for (i = 0; i < CFAG12864B_WIDTH; i++)
+                       for (j = 0; j < CFAG12864B_HEIGHT; j ++)
+                               cfag12864b_not(i, j);
+               break;
+       }
+
+       puts(" - [Press Enter]");
+}
+
+int main(int argc, char *argv[])
+{
+       unsigned char n;
+
+       if (argc != 2) {
+               printf(
+                       "Sintax:  %s fbdev\n"
+                       "Usually: /dev/fb0, /dev/fb1...\n", argv[0]);
+               return -1;
+       }
+
+       if (cfag12864b_init(argv[1])) {
+               printf("Can't init %s fbdev\n", argv[1]);
+               return -2;
+       }
+
+       for (n = 1; n <= EXAMPLES; n++) {
+               example(n);
+               cfag12864b_blit();
+               while (getchar() != '\n');
+       }
+
+       cfag12864b_exit();
+
+       return 0;
+}
diff --git a/samples/blackfin/Makefile b/samples/blackfin/Makefile
new file mode 100644 (file)
index 0000000..89b86cf
--- /dev/null
@@ -0,0 +1 @@
+obj-$(CONFIG_SAMPLE_BLACKFIN_GPTIMERS) += gptimers-example.o
diff --git a/samples/blackfin/gptimers-example.c b/samples/blackfin/gptimers-example.c
new file mode 100644 (file)
index 0000000..283eba9
--- /dev/null
@@ -0,0 +1,91 @@
+/*
+ * Simple gptimers example
+ *     http://docs.blackfin.uclinux.org/doku.php?id=linux-kernel:drivers:gptimers
+ *
+ * Copyright 2007-2009 Analog Devices Inc.
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/module.h>
+
+#include <asm/gptimers.h>
+#include <asm/portmux.h>
+
+/* ... random driver includes ... */
+
+#define DRIVER_NAME "gptimer_example"
+
+#ifdef IRQ_TIMER5
+#define SAMPLE_IRQ_TIMER IRQ_TIMER5
+#else
+#define SAMPLE_IRQ_TIMER IRQ_TIMER2
+#endif
+
+struct gptimer_data {
+       uint32_t period, width;
+};
+static struct gptimer_data data;
+
+/* ... random driver state ... */
+
+static irqreturn_t gptimer_example_irq(int irq, void *dev_id)
+{
+       struct gptimer_data *data = dev_id;
+
+       /* make sure it was our timer which caused the interrupt */
+       if (!get_gptimer_intr(TIMER5_id))
+               return IRQ_NONE;
+
+       /* read the width/period values that were captured for the waveform */
+       data->width = get_gptimer_pwidth(TIMER5_id);
+       data->period = get_gptimer_period(TIMER5_id);
+
+       /* acknowledge the interrupt */
+       clear_gptimer_intr(TIMER5_id);
+
+       /* tell the upper layers we took care of things */
+       return IRQ_HANDLED;
+}
+
+/* ... random driver code ... */
+
+static int __init gptimer_example_init(void)
+{
+       int ret;
+
+       /* grab the peripheral pins */
+       ret = peripheral_request(P_TMR5, DRIVER_NAME);
+       if (ret) {
+               printk(KERN_NOTICE DRIVER_NAME ": peripheral request failed\n");
+               return ret;
+       }
+
+       /* grab the IRQ for the timer */
+       ret = request_irq(SAMPLE_IRQ_TIMER, gptimer_example_irq,
+                       IRQF_SHARED, DRIVER_NAME, &data);
+       if (ret) {
+               printk(KERN_NOTICE DRIVER_NAME ": IRQ request failed\n");
+               peripheral_free(P_TMR5);
+               return ret;
+       }
+
+       /* setup the timer and enable it */
+       set_gptimer_config(TIMER5_id,
+                       WDTH_CAP | PULSE_HI | PERIOD_CNT | IRQ_ENA);
+       enable_gptimers(TIMER5bit);
+
+       return 0;
+}
+module_init(gptimer_example_init);
+
+static void __exit gptimer_example_exit(void)
+{
+       disable_gptimers(TIMER5bit);
+       free_irq(SAMPLE_IRQ_TIMER, &data);
+       peripheral_free(P_TMR5);
+}
+module_exit(gptimer_example_exit);
+
+MODULE_LICENSE("BSD");
index 5c53fdb67ca75a920089e8b3b3a3e983f246463e..bfc2cb88a1f7a10eb5311a672f13d74b2e56a73c 100644 (file)
@@ -2,6 +2,7 @@
 obj- := dummy.o
 
 # List of programs to build
+hostprogs-y := test_lru_dist
 hostprogs-y += sock_example
 hostprogs-y += fds_example
 hostprogs-y += sockex1
@@ -21,12 +22,18 @@ hostprogs-y += spintest
 hostprogs-y += map_perf_test
 hostprogs-y += test_overhead
 hostprogs-y += test_cgrp2_array_pin
+hostprogs-y += test_cgrp2_attach
+hostprogs-y += test_cgrp2_sock
+hostprogs-y += test_cgrp2_sock2
 hostprogs-y += xdp1
 hostprogs-y += xdp2
 hostprogs-y += test_current_task_under_cgroup
 hostprogs-y += trace_event
 hostprogs-y += sampleip
+hostprogs-y += tc_l2_redirect
+hostprogs-y += lwt_len_hist
 
+test_lru_dist-objs := test_lru_dist.o libbpf.o
 sock_example-objs := sock_example.o libbpf.o
 fds_example-objs := bpf_load.o libbpf.o fds_example.o
 sockex1-objs := bpf_load.o libbpf.o sockex1_user.o
@@ -46,6 +53,9 @@ spintest-objs := bpf_load.o libbpf.o spintest_user.o
 map_perf_test-objs := bpf_load.o libbpf.o map_perf_test_user.o
 test_overhead-objs := bpf_load.o libbpf.o test_overhead_user.o
 test_cgrp2_array_pin-objs := libbpf.o test_cgrp2_array_pin.o
+test_cgrp2_attach-objs := libbpf.o test_cgrp2_attach.o
+test_cgrp2_sock-objs := libbpf.o test_cgrp2_sock.o
+test_cgrp2_sock2-objs := bpf_load.o libbpf.o test_cgrp2_sock2.o
 xdp1-objs := bpf_load.o libbpf.o xdp1_user.o
 # reuse xdp1 source intentionally
 xdp2-objs := bpf_load.o libbpf.o xdp1_user.o
@@ -53,6 +63,8 @@ test_current_task_under_cgroup-objs := bpf_load.o libbpf.o \
                                       test_current_task_under_cgroup_user.o
 trace_event-objs := bpf_load.o libbpf.o trace_event_user.o
 sampleip-objs := bpf_load.o libbpf.o sampleip_user.o
+tc_l2_redirect-objs := bpf_load.o libbpf.o tc_l2_redirect_user.o
+lwt_len_hist-objs := bpf_load.o libbpf.o lwt_len_hist_user.o
 
 # Tell kbuild to always build the programs
 always := $(hostprogs-y)
@@ -65,10 +77,12 @@ always += tracex3_kern.o
 always += tracex4_kern.o
 always += tracex5_kern.o
 always += tracex6_kern.o
+always += sock_flags_kern.o
 always += test_probe_write_user_kern.o
 always += trace_output_kern.o
 always += tcbpf1_kern.o
 always += tcbpf2_kern.o
+always += tc_l2_redirect_kern.o
 always += lathist_kern.o
 always += offwaketime_kern.o
 always += spintest_kern.o
@@ -82,8 +96,10 @@ always += xdp2_kern.o
 always += test_current_task_under_cgroup_kern.o
 always += trace_event_kern.o
 always += sampleip_kern.o
+always += lwt_len_hist_kern.o
 
 HOSTCFLAGS += -I$(objtree)/usr/include
+HOSTCFLAGS += -I$(srctree)/tools/testing/selftests/bpf/
 
 HOSTCFLAGS_bpf_load.o += -I$(objtree)/usr/include -Wno-unused-variable
 HOSTLOADLIBES_fds_example += -lelf
@@ -96,6 +112,7 @@ HOSTLOADLIBES_tracex3 += -lelf
 HOSTLOADLIBES_tracex4 += -lelf -lrt
 HOSTLOADLIBES_tracex5 += -lelf
 HOSTLOADLIBES_tracex6 += -lelf
+HOSTLOADLIBES_test_cgrp2_sock2 += -lelf
 HOSTLOADLIBES_test_probe_write_user += -lelf
 HOSTLOADLIBES_trace_output += -lelf -lrt
 HOSTLOADLIBES_lathist += -lelf
@@ -108,6 +125,8 @@ HOSTLOADLIBES_xdp2 += -lelf
 HOSTLOADLIBES_test_current_task_under_cgroup += -lelf
 HOSTLOADLIBES_trace_event += -lelf
 HOSTLOADLIBES_sampleip += -lelf
+HOSTLOADLIBES_tc_l2_redirect += -l elf
+HOSTLOADLIBES_lwt_len_hist += -l elf
 
 # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
 #  make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
index 90f44bd2045e16f5dd78ed842381d25a06fffa82..a246c61226291191c72db8000bcdd915b92cd84a 100644 (file)
@@ -80,6 +80,8 @@ struct bpf_map_def {
        unsigned int map_flags;
 };
 
+static int (*bpf_skb_load_bytes)(void *ctx, int off, void *to, int len) =
+       (void *) BPF_FUNC_skb_load_bytes;
 static int (*bpf_skb_store_bytes)(void *ctx, int off, void *from, int len, int flags) =
        (void *) BPF_FUNC_skb_store_bytes;
 static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flags) =
@@ -88,6 +90,8 @@ static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flag
        (void *) BPF_FUNC_l4_csum_replace;
 static int (*bpf_skb_under_cgroup)(void *ctx, void *map, int index) =
        (void *) BPF_FUNC_skb_under_cgroup;
+static int (*bpf_skb_change_head)(void *, int len, int flags) =
+       (void *) BPF_FUNC_skb_change_head;
 
 #if defined(__x86_64__)
 
index 97913e109b144f9f02491c1557ff179171d05318..49b45ccbe1530a7839ceb008c5e6008655ba734b 100644 (file)
@@ -52,6 +52,8 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
        bool is_tracepoint = strncmp(event, "tracepoint/", 11) == 0;
        bool is_xdp = strncmp(event, "xdp", 3) == 0;
        bool is_perf_event = strncmp(event, "perf_event", 10) == 0;
+       bool is_cgroup_skb = strncmp(event, "cgroup/skb", 10) == 0;
+       bool is_cgroup_sk = strncmp(event, "cgroup/sock", 11) == 0;
        enum bpf_prog_type prog_type;
        char buf[256];
        int fd, efd, err, id;
@@ -72,6 +74,10 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
                prog_type = BPF_PROG_TYPE_XDP;
        } else if (is_perf_event) {
                prog_type = BPF_PROG_TYPE_PERF_EVENT;
+       } else if (is_cgroup_skb) {
+               prog_type = BPF_PROG_TYPE_CGROUP_SKB;
+       } else if (is_cgroup_sk) {
+               prog_type = BPF_PROG_TYPE_CGROUP_SOCK;
        } else {
                printf("Unknown event '%s'\n", event);
                return -1;
@@ -85,7 +91,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
 
        prog_fd[prog_cnt++] = fd;
 
-       if (is_xdp || is_perf_event)
+       if (is_xdp || is_perf_event || is_cgroup_skb || is_cgroup_sk)
                return 0;
 
        if (is_socket) {
@@ -317,6 +323,10 @@ int load_bpf_file(char *path)
                                    &shdr_prog, &data_prog))
                                continue;
 
+                       if (shdr_prog.sh_type != SHT_PROGBITS ||
+                           !(shdr_prog.sh_flags & SHF_EXECINSTR))
+                               continue;
+
                        insns = (struct bpf_insn *) data_prog->d_buf;
 
                        processed_sec[shdr.sh_info] = true;
@@ -330,7 +340,8 @@ int load_bpf_file(char *path)
                            memcmp(shname_prog, "tracepoint/", 11) == 0 ||
                            memcmp(shname_prog, "xdp", 3) == 0 ||
                            memcmp(shname_prog, "perf_event", 10) == 0 ||
-                           memcmp(shname_prog, "socket", 6) == 0)
+                           memcmp(shname_prog, "socket", 6) == 0 ||
+                           memcmp(shname_prog, "cgroup/", 7) == 0)
                                load_and_attach(shname_prog, insns, data_prog->d_size);
                }
        }
@@ -349,7 +360,8 @@ int load_bpf_file(char *path)
                    memcmp(shname, "tracepoint/", 11) == 0 ||
                    memcmp(shname, "xdp", 3) == 0 ||
                    memcmp(shname, "perf_event", 10) == 0 ||
-                   memcmp(shname, "socket", 6) == 0)
+                   memcmp(shname, "socket", 6) == 0 ||
+                   memcmp(shname, "cgroup/", 7) == 0)
                        load_and_attach(shname, data->d_buf, data->d_size);
        }
 
index dfa57fe65c8e0f2eb67ae251705b631b1403fc55..4adeeef53ad6ea9f4c269cf8dd0aa52ff9ccf110 100644 (file)
@@ -7,6 +7,7 @@
 extern int map_fd[MAX_MAPS];
 extern int prog_fd[MAX_PROGS];
 extern int event_fd[MAX_PROGS];
+extern int prog_cnt;
 
 /* parses elf file compiled by llvm .c->.o
  * . parses 'maps' section and creates maps via BPF syscall
index 9969e35550c3dd3c9702e7e99dab35b3df9fb7f6..9ce707bf02a7a7aee8e37383af34429d97c569cf 100644 (file)
@@ -104,6 +104,27 @@ int bpf_prog_load(enum bpf_prog_type prog_type,
        return syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
 }
 
+int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type)
+{
+       union bpf_attr attr = {
+               .target_fd = target_fd,
+               .attach_bpf_fd = prog_fd,
+               .attach_type = type,
+       };
+
+       return syscall(__NR_bpf, BPF_PROG_ATTACH, &attr, sizeof(attr));
+}
+
+int bpf_prog_detach(int target_fd, enum bpf_attach_type type)
+{
+       union bpf_attr attr = {
+               .target_fd = target_fd,
+               .attach_type = type,
+       };
+
+       return syscall(__NR_bpf, BPF_PROG_DETACH, &attr, sizeof(attr));
+}
+
 int bpf_obj_pin(int fd, const char *pathname)
 {
        union bpf_attr attr = {
index ac6edb61b64a2798f4c4beb261ed8996e6957877..94a901d86fc2ea4867a3993f87380e9894d3b1f5 100644 (file)
@@ -15,10 +15,13 @@ int bpf_prog_load(enum bpf_prog_type prog_type,
                  const struct bpf_insn *insns, int insn_len,
                  const char *license, int kern_version);
 
+int bpf_prog_attach(int prog_fd, int attachable_fd, enum bpf_attach_type type);
+int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type);
+
 int bpf_obj_pin(int fd, const char *pathname);
 int bpf_obj_get(const char *pathname);
 
-#define LOG_BUF_SIZE 65536
+#define LOG_BUF_SIZE (256 * 1024)
 extern char bpf_log_buf[LOG_BUF_SIZE];
 
 /* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */
diff --git a/samples/bpf/lwt_len_hist.sh b/samples/bpf/lwt_len_hist.sh
new file mode 100644 (file)
index 0000000..7d56774
--- /dev/null
@@ -0,0 +1,37 @@
+#!/bin/bash
+
+NS1=lwt_ns1
+VETH0=tst_lwt1a
+VETH1=tst_lwt1b
+
+TRACE_ROOT=/sys/kernel/debug/tracing
+
+function cleanup {
+       ip route del 192.168.253.2/32 dev $VETH0 2> /dev/null
+       ip link del $VETH0 2> /dev/null
+       ip link del $VETH1 2> /dev/null
+       ip netns exec $NS1 killall netserver
+       ip netns delete $NS1 2> /dev/null
+}
+
+cleanup
+
+ip netns add $NS1
+ip link add $VETH0 type veth peer name $VETH1
+ip link set dev $VETH0 up
+ip addr add 192.168.253.1/24 dev $VETH0
+ip link set $VETH1 netns $NS1
+ip netns exec $NS1 ip link set dev $VETH1 up
+ip netns exec $NS1 ip addr add 192.168.253.2/24 dev $VETH1
+ip netns exec $NS1 netserver
+
+echo 1 > ${TRACE_ROOT}/tracing_on
+cp /dev/null ${TRACE_ROOT}/trace
+ip route add 192.168.253.2/32 encap bpf out obj lwt_len_hist_kern.o section len_hist dev $VETH0
+netperf -H 192.168.253.2 -t TCP_STREAM
+cat ${TRACE_ROOT}/trace | grep -v '^#'
+./lwt_len_hist
+cleanup
+echo 0 > ${TRACE_ROOT}/tracing_on
+
+exit 0
diff --git a/samples/bpf/lwt_len_hist_kern.c b/samples/bpf/lwt_len_hist_kern.c
new file mode 100644 (file)
index 0000000..df75383
--- /dev/null
@@ -0,0 +1,82 @@
+/* Copyright (c) 2016 Thomas Graf <tgraf@tgraf.ch>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/if_ether.h>
+#include <uapi/linux/ip.h>
+#include <uapi/linux/in.h>
+#include "bpf_helpers.h"
+
+# define printk(fmt, ...)                                              \
+               ({                                                      \
+                       char ____fmt[] = fmt;                           \
+                       bpf_trace_printk(____fmt, sizeof(____fmt),      \
+                                    ##__VA_ARGS__);                    \
+               })
+
+struct bpf_elf_map {
+       __u32 type;
+       __u32 size_key;
+       __u32 size_value;
+       __u32 max_elem;
+       __u32 flags;
+       __u32 id;
+       __u32 pinning;
+};
+
+struct bpf_elf_map SEC("maps") lwt_len_hist_map = {
+       .type = BPF_MAP_TYPE_PERCPU_HASH,
+       .size_key = sizeof(__u64),
+       .size_value = sizeof(__u64),
+       .pinning = 2,
+       .max_elem = 1024,
+};
+
+static unsigned int log2(unsigned int v)
+{
+       unsigned int r;
+       unsigned int shift;
+
+       r = (v > 0xFFFF) << 4; v >>= r;
+       shift = (v > 0xFF) << 3; v >>= shift; r |= shift;
+       shift = (v > 0xF) << 2; v >>= shift; r |= shift;
+       shift = (v > 0x3) << 1; v >>= shift; r |= shift;
+       r |= (v >> 1);
+       return r;
+}
+
+static unsigned int log2l(unsigned long v)
+{
+       unsigned int hi = v >> 32;
+       if (hi)
+               return log2(hi) + 32;
+       else
+               return log2(v);
+}
+
+SEC("len_hist")
+int do_len_hist(struct __sk_buff *skb)
+{
+       __u64 *value, key, init_val = 1;
+
+       key = log2l(skb->len);
+
+       value = bpf_map_lookup_elem(&lwt_len_hist_map, &key);
+       if (value)
+               __sync_fetch_and_add(value, 1);
+       else
+               bpf_map_update_elem(&lwt_len_hist_map, &key, &init_val, BPF_ANY);
+
+       return BPF_OK;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/lwt_len_hist_user.c b/samples/bpf/lwt_len_hist_user.c
new file mode 100644 (file)
index 0000000..05d783f
--- /dev/null
@@ -0,0 +1,76 @@
+#include <linux/unistd.h>
+#include <linux/bpf.h>
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <string.h>
+#include <errno.h>
+#include <arpa/inet.h>
+
+#include "libbpf.h"
+#include "bpf_util.h"
+
+#define MAX_INDEX 64
+#define MAX_STARS 38
+
+static void stars(char *str, long val, long max, int width)
+{
+       int i;
+
+       for (i = 0; i < (width * val / max) - 1 && i < width - 1; i++)
+               str[i] = '*';
+       if (val > max)
+               str[i - 1] = '+';
+       str[i] = '\0';
+}
+
+int main(int argc, char **argv)
+{
+       unsigned int nr_cpus = bpf_num_possible_cpus();
+       const char *map_filename = "/sys/fs/bpf/tc/globals/lwt_len_hist_map";
+       uint64_t values[nr_cpus], sum, max_value = 0, data[MAX_INDEX] = {};
+       uint64_t key = 0, next_key, max_key = 0;
+       char starstr[MAX_STARS];
+       int i, map_fd;
+
+       map_fd = bpf_obj_get(map_filename);
+       if (map_fd < 0) {
+               fprintf(stderr, "bpf_obj_get(%s): %s(%d)\n",
+                       map_filename, strerror(errno), errno);
+               return -1;
+       }
+
+       while (bpf_get_next_key(map_fd, &key, &next_key) == 0) {
+               if (next_key >= MAX_INDEX) {
+                       fprintf(stderr, "Key %lu out of bounds\n", next_key);
+                       continue;
+               }
+
+               bpf_lookup_elem(map_fd, &next_key, values);
+
+               sum = 0;
+               for (i = 0; i < nr_cpus; i++)
+                       sum += values[i];
+
+               data[next_key] = sum;
+               if (sum && next_key > max_key)
+                       max_key = next_key;
+
+               if (sum > max_value)
+                       max_value = sum;
+
+               key = next_key;
+       }
+
+       for (i = 1; i <= max_key + 1; i++) {
+               stars(starstr, data[i - 1], max_value, MAX_STARS);
+               printf("%8ld -> %-8ld : %-8ld |%-*s|\n",
+                      (1l << i) >> 1, (1l << i) - 1, data[i - 1],
+                      MAX_STARS, starstr);
+       }
+
+       close(map_fd);
+
+       return 0;
+}
index 311538e5a7016e7bfdd250779d60491051ef8b4b..7ee1574c8ccff49ad7a2a301c4462f66e46d88da 100644 (file)
@@ -19,6 +19,21 @@ struct bpf_map_def SEC("maps") hash_map = {
        .max_entries = MAX_ENTRIES,
 };
 
+struct bpf_map_def SEC("maps") lru_hash_map = {
+       .type = BPF_MAP_TYPE_LRU_HASH,
+       .key_size = sizeof(u32),
+       .value_size = sizeof(long),
+       .max_entries = 10000,
+};
+
+struct bpf_map_def SEC("maps") percpu_lru_hash_map = {
+       .type = BPF_MAP_TYPE_LRU_HASH,
+       .key_size = sizeof(u32),
+       .value_size = sizeof(long),
+       .max_entries = 10000,
+       .map_flags = BPF_F_NO_COMMON_LRU,
+};
+
 struct bpf_map_def SEC("maps") percpu_hash_map = {
        .type = BPF_MAP_TYPE_PERCPU_HASH,
        .key_size = sizeof(u32),
@@ -53,6 +68,7 @@ int stress_hmap(struct pt_regs *ctx)
        value = bpf_map_lookup_elem(&hash_map, &key);
        if (value)
                bpf_map_delete_elem(&hash_map, &key);
+
        return 0;
 }
 
@@ -96,5 +112,28 @@ int stress_percpu_hmap_alloc(struct pt_regs *ctx)
                bpf_map_delete_elem(&percpu_hash_map_alloc, &key);
        return 0;
 }
+
+SEC("kprobe/sys_getpid")
+int stress_lru_hmap_alloc(struct pt_regs *ctx)
+{
+       u32 key = bpf_get_prandom_u32();
+       long val = 1;
+
+       bpf_map_update_elem(&lru_hash_map, &key, &val, BPF_ANY);
+
+       return 0;
+}
+
+SEC("kprobe/sys_getppid")
+int stress_percpu_lru_hmap_alloc(struct pt_regs *ctx)
+{
+       u32 key = bpf_get_prandom_u32();
+       long val = 1;
+
+       bpf_map_update_elem(&percpu_lru_hash_map, &key, &val, BPF_ANY);
+
+       return 0;
+}
+
 char _license[] SEC("license") = "GPL";
 u32 _version SEC("version") = LINUX_VERSION_CODE;
index 3147377e8fd3c98fe455c6d96f5c774cb5392f11..9505b4d112f426790645ecd00c50263620ae68e6 100644 (file)
@@ -35,6 +35,8 @@ static __u64 time_get_ns(void)
 #define PERCPU_HASH_PREALLOC   (1 << 1)
 #define HASH_KMALLOC           (1 << 2)
 #define PERCPU_HASH_KMALLOC    (1 << 3)
+#define LRU_HASH_PREALLOC      (1 << 4)
+#define PERCPU_LRU_HASH_PREALLOC       (1 << 5)
 
 static int test_flags = ~0;
 
@@ -50,6 +52,30 @@ static void test_hash_prealloc(int cpu)
               cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
 }
 
+static void test_lru_hash_prealloc(int cpu)
+{
+       __u64 start_time;
+       int i;
+
+       start_time = time_get_ns();
+       for (i = 0; i < MAX_CNT; i++)
+               syscall(__NR_getpid);
+       printf("%d:lru_hash_map_perf pre-alloc %lld events per sec\n",
+              cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
+}
+
+static void test_percpu_lru_hash_prealloc(int cpu)
+{
+       __u64 start_time;
+       int i;
+
+       start_time = time_get_ns();
+       for (i = 0; i < MAX_CNT; i++)
+               syscall(__NR_getppid);
+       printf("%d:lru_hash_map_perf pre-alloc %lld events per sec\n",
+              cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
+}
+
 static void test_percpu_hash_prealloc(int cpu)
 {
        __u64 start_time;
@@ -105,6 +131,12 @@ static void loop(int cpu)
 
        if (test_flags & PERCPU_HASH_KMALLOC)
                test_percpu_hash_kmalloc(cpu);
+
+       if (test_flags & LRU_HASH_PREALLOC)
+               test_lru_hash_prealloc(cpu);
+
+       if (test_flags & PERCPU_LRU_HASH_PREALLOC)
+               test_percpu_lru_hash_prealloc(cpu);
 }
 
 static void run_perf_test(int tasks)
index d17550198d0628e063e43a64253ec335a012c28f..6db6b21fdc6dd71fd230cfbc40161d087c92024f 100644 (file)
@@ -4,6 +4,7 @@
  * modify it under the terms of version 2 of the GNU General Public
  * License as published by the Free Software Foundation.
  */
+#define KBUILD_MODNAME "foo"
 #include <linux/ip.h>
 #include <linux/ipv6.h>
 #include <linux/in.h>
index cf2511c33905751bb6ed866bf6bba9b1fb8330f3..10af53d33cc2925a928fe496fdc1047629b998e3 100644 (file)
@@ -4,6 +4,7 @@
  * modify it under the terms of version 2 of the GNU General Public
  * License as published by the Free Software Foundation.
  */
+#define KBUILD_MODNAME "foo"
 #include <linux/ip.h>
 #include <linux/ipv6.h>
 #include <linux/in.h>
index edab34dce79b3794b010ca4138ef969d13c68535..95c16324760c0be1af8be927e1adffae0b582525 100644 (file)
@@ -4,6 +4,7 @@
  * modify it under the terms of version 2 of the GNU General Public
  * License as published by the Free Software Foundation.
  */
+#define KBUILD_MODNAME "foo"
 #include <linux/if_ether.h>
 #include <linux/ip.h>
 #include <linux/ipv6.h>
diff --git a/samples/bpf/sock_flags_kern.c b/samples/bpf/sock_flags_kern.c
new file mode 100644 (file)
index 0000000..533dd11
--- /dev/null
@@ -0,0 +1,44 @@
+#include <uapi/linux/bpf.h>
+#include <linux/socket.h>
+#include <linux/net.h>
+#include <uapi/linux/in.h>
+#include <uapi/linux/in6.h>
+#include "bpf_helpers.h"
+
+SEC("cgroup/sock1")
+int bpf_prog1(struct bpf_sock *sk)
+{
+       char fmt[] = "socket: family %d type %d protocol %d\n";
+
+       bpf_trace_printk(fmt, sizeof(fmt), sk->family, sk->type, sk->protocol);
+
+       /* block PF_INET6, SOCK_RAW, IPPROTO_ICMPV6 sockets
+        * ie., make ping6 fail
+        */
+       if (sk->family == PF_INET6 &&
+           sk->type == SOCK_RAW   &&
+           sk->protocol == IPPROTO_ICMPV6)
+               return 0;
+
+       return 1;
+}
+
+SEC("cgroup/sock2")
+int bpf_prog2(struct bpf_sock *sk)
+{
+       char fmt[] = "socket: family %d type %d protocol %d\n";
+
+       bpf_trace_printk(fmt, sizeof(fmt), sk->family, sk->type, sk->protocol);
+
+       /* block PF_INET, SOCK_RAW, IPPROTO_ICMP sockets
+        * ie., make ping fail
+        */
+       if (sk->family == PF_INET &&
+           sk->type == SOCK_RAW  &&
+           sk->protocol == IPPROTO_ICMP)
+               return 0;
+
+       return 1;
+}
+
+char _license[] SEC("license") = "GPL";
index 44e5846c988f865015c1fe1fc0596407a491901d..f58acfc9255612d985664ff884c80c440b43880d 100644 (file)
@@ -198,7 +198,7 @@ struct bpf_map_def SEC("maps") hash_map = {
 SEC("socket2")
 int bpf_prog2(struct __sk_buff *skb)
 {
-       struct bpf_flow_keys flow;
+       struct bpf_flow_keys flow = {};
        struct pair *value;
        u32 key;
 
diff --git a/samples/bpf/tc_l2_redirect.sh b/samples/bpf/tc_l2_redirect.sh
new file mode 100755 (executable)
index 0000000..80a0559
--- /dev/null
@@ -0,0 +1,173 @@
+#!/bin/bash
+
+[[ -z $TC ]] && TC='tc'
+[[ -z $IP ]] && IP='ip'
+
+REDIRECT_USER='./tc_l2_redirect'
+REDIRECT_BPF='./tc_l2_redirect_kern.o'
+
+RP_FILTER=$(< /proc/sys/net/ipv4/conf/all/rp_filter)
+IPV6_FORWARDING=$(< /proc/sys/net/ipv6/conf/all/forwarding)
+
+function config_common {
+       local tun_type=$1
+
+       $IP netns add ns1
+       $IP netns add ns2
+       $IP link add ve1 type veth peer name vens1
+       $IP link add ve2 type veth peer name vens2
+       $IP link set dev ve1 up
+       $IP link set dev ve2 up
+       $IP link set dev ve1 mtu 1500
+       $IP link set dev ve2 mtu 1500
+       $IP link set dev vens1 netns ns1
+       $IP link set dev vens2 netns ns2
+
+       $IP -n ns1 link set dev lo up
+       $IP -n ns1 link set dev vens1 up
+       $IP -n ns1 addr add 10.1.1.101/24 dev vens1
+       $IP -n ns1 addr add 2401:db01::65/64 dev vens1 nodad
+       $IP -n ns1 route add default via 10.1.1.1 dev vens1
+       $IP -n ns1 route add default via 2401:db01::1 dev vens1
+
+       $IP -n ns2 link set dev lo up
+       $IP -n ns2 link set dev vens2 up
+       $IP -n ns2 addr add 10.2.1.102/24 dev vens2
+       $IP -n ns2 addr add 2401:db02::66/64 dev vens2 nodad
+       $IP -n ns2 addr add 10.10.1.102 dev lo
+       $IP -n ns2 addr add 2401:face::66/64 dev lo nodad
+       $IP -n ns2 link add ipt2 type ipip local 10.2.1.102 remote 10.2.1.1
+       $IP -n ns2 link add ip6t2 type ip6tnl mode any local 2401:db02::66 remote 2401:db02::1
+       $IP -n ns2 link set dev ipt2 up
+       $IP -n ns2 link set dev ip6t2 up
+       $IP netns exec ns2 $TC qdisc add dev vens2 clsact
+       $IP netns exec ns2 $TC filter add dev vens2 ingress bpf da obj $REDIRECT_BPF sec drop_non_tun_vip
+       if [[ $tun_type == "ipip" ]]; then
+               $IP -n ns2 route add 10.1.1.0/24 dev ipt2
+               $IP netns exec ns2 sysctl -q -w net.ipv4.conf.all.rp_filter=0
+               $IP netns exec ns2 sysctl -q -w net.ipv4.conf.ipt2.rp_filter=0
+       else
+               $IP -n ns2 route add 10.1.1.0/24 dev ip6t2
+               $IP -n ns2 route add 2401:db01::/64 dev ip6t2
+               $IP netns exec ns2 sysctl -q -w net.ipv4.conf.all.rp_filter=0
+               $IP netns exec ns2 sysctl -q -w net.ipv4.conf.ip6t2.rp_filter=0
+       fi
+
+       $IP addr add 10.1.1.1/24 dev ve1
+       $IP addr add 2401:db01::1/64 dev ve1 nodad
+       $IP addr add 10.2.1.1/24 dev ve2
+       $IP addr add 2401:db02::1/64 dev ve2 nodad
+
+       $TC qdisc add dev ve2 clsact
+       $TC filter add dev ve2 ingress bpf da obj $REDIRECT_BPF sec l2_to_iptun_ingress_forward
+
+       sysctl -q -w net.ipv4.conf.all.rp_filter=0
+       sysctl -q -w net.ipv6.conf.all.forwarding=1
+}
+
+function cleanup {
+       set +e
+       [[ -z $DEBUG ]] || set +x
+       $IP netns delete ns1 >& /dev/null
+       $IP netns delete ns2 >& /dev/null
+       $IP link del ve1 >& /dev/null
+       $IP link del ve2 >& /dev/null
+       $IP link del ipt >& /dev/null
+       $IP link del ip6t >& /dev/null
+       sysctl -q -w net.ipv4.conf.all.rp_filter=$RP_FILTER
+       sysctl -q -w net.ipv6.conf.all.forwarding=$IPV6_FORWARDING
+       rm -f /sys/fs/bpf/tc/globals/tun_iface
+       [[ -z $DEBUG ]] || set -x
+       set -e
+}
+
+function l2_to_ipip {
+       echo -n "l2_to_ipip $1: "
+
+       local dir=$1
+
+       config_common ipip
+
+       $IP link add ipt type ipip external
+       $IP link set dev ipt up
+       sysctl -q -w net.ipv4.conf.ipt.rp_filter=0
+       sysctl -q -w net.ipv4.conf.ipt.forwarding=1
+
+       if [[ $dir == "egress" ]]; then
+               $IP route add 10.10.1.0/24 via 10.2.1.102 dev ve2
+               $TC filter add dev ve2 egress bpf da obj $REDIRECT_BPF sec l2_to_iptun_ingress_redirect
+               sysctl -q -w net.ipv4.conf.ve1.forwarding=1
+       else
+               $TC qdisc add dev ve1 clsact
+               $TC filter add dev ve1 ingress bpf da obj $REDIRECT_BPF sec l2_to_iptun_ingress_redirect
+       fi
+
+       $REDIRECT_USER -U /sys/fs/bpf/tc/globals/tun_iface -i $(< /sys/class/net/ipt/ifindex)
+
+       $IP netns exec ns1 ping -c1 10.10.1.102 >& /dev/null
+
+       if [[ $dir == "egress" ]]; then
+               # test direct egress to ve2 (i.e. not forwarding from
+               # ve1 to ve2).
+               ping -c1 10.10.1.102 >& /dev/null
+       fi
+
+       cleanup
+
+       echo "OK"
+}
+
+function l2_to_ip6tnl {
+       echo -n "l2_to_ip6tnl $1: "
+
+       local dir=$1
+
+       config_common ip6tnl
+
+       $IP link add ip6t type ip6tnl mode any external
+       $IP link set dev ip6t up
+       sysctl -q -w net.ipv4.conf.ip6t.rp_filter=0
+       sysctl -q -w net.ipv4.conf.ip6t.forwarding=1
+
+       if [[ $dir == "egress" ]]; then
+               $IP route add 10.10.1.0/24 via 10.2.1.102 dev ve2
+               $IP route add 2401:face::/64 via 2401:db02::66 dev ve2
+               $TC filter add dev ve2 egress bpf da obj $REDIRECT_BPF sec l2_to_ip6tun_ingress_redirect
+               sysctl -q -w net.ipv4.conf.ve1.forwarding=1
+       else
+               $TC qdisc add dev ve1 clsact
+               $TC filter add dev ve1 ingress bpf da obj $REDIRECT_BPF sec l2_to_ip6tun_ingress_redirect
+       fi
+
+       $REDIRECT_USER -U /sys/fs/bpf/tc/globals/tun_iface -i $(< /sys/class/net/ip6t/ifindex)
+
+       $IP netns exec ns1 ping -c1 10.10.1.102 >& /dev/null
+       $IP netns exec ns1 ping -6 -c1 2401:face::66 >& /dev/null
+
+       if [[ $dir == "egress" ]]; then
+               # test direct egress to ve2 (i.e. not forwarding from
+               # ve1 to ve2).
+               ping -c1 10.10.1.102 >& /dev/null
+               ping -6 -c1 2401:face::66 >& /dev/null
+       fi
+
+       cleanup
+
+       echo "OK"
+}
+
+cleanup
+test_names="l2_to_ipip l2_to_ip6tnl"
+test_dirs="ingress egress"
+if [[ $# -ge 2 ]]; then
+       test_names=$1
+       test_dirs=$2
+elif [[ $# -ge 1 ]]; then
+       test_names=$1
+fi
+
+for t in $test_names; do
+       for d in $test_dirs; do
+               $t $d
+       done
+done
diff --git a/samples/bpf/tc_l2_redirect_kern.c b/samples/bpf/tc_l2_redirect_kern.c
new file mode 100644 (file)
index 0000000..92a4472
--- /dev/null
@@ -0,0 +1,236 @@
+/* Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/if_ether.h>
+#include <uapi/linux/if_packet.h>
+#include <uapi/linux/ip.h>
+#include <uapi/linux/ipv6.h>
+#include <uapi/linux/in.h>
+#include <uapi/linux/tcp.h>
+#include <uapi/linux/filter.h>
+#include <uapi/linux/pkt_cls.h>
+#include <net/ipv6.h>
+#include "bpf_helpers.h"
+
+#define _htonl __builtin_bswap32
+
+#define PIN_GLOBAL_NS          2
+struct bpf_elf_map {
+       __u32 type;
+       __u32 size_key;
+       __u32 size_value;
+       __u32 max_elem;
+       __u32 flags;
+       __u32 id;
+       __u32 pinning;
+};
+
+/* copy of 'struct ethhdr' without __packed */
+struct eth_hdr {
+       unsigned char   h_dest[ETH_ALEN];
+       unsigned char   h_source[ETH_ALEN];
+       unsigned short  h_proto;
+};
+
+struct bpf_elf_map SEC("maps") tun_iface = {
+       .type = BPF_MAP_TYPE_ARRAY,
+       .size_key = sizeof(int),
+       .size_value = sizeof(int),
+       .pinning = PIN_GLOBAL_NS,
+       .max_elem = 1,
+};
+
+static __always_inline bool is_vip_addr(__be16 eth_proto, __be32 daddr)
+{
+       if (eth_proto == htons(ETH_P_IP))
+               return (_htonl(0xffffff00) & daddr) == _htonl(0x0a0a0100);
+       else if (eth_proto == htons(ETH_P_IPV6))
+               return (daddr == _htonl(0x2401face));
+
+       return false;
+}
+
+SEC("l2_to_iptun_ingress_forward")
+int _l2_to_iptun_ingress_forward(struct __sk_buff *skb)
+{
+       struct bpf_tunnel_key tkey = {};
+       void *data = (void *)(long)skb->data;
+       struct eth_hdr *eth = data;
+       void *data_end = (void *)(long)skb->data_end;
+       int key = 0, *ifindex;
+
+       int ret;
+
+       if (data + sizeof(*eth) > data_end)
+               return TC_ACT_OK;
+
+       ifindex = bpf_map_lookup_elem(&tun_iface, &key);
+       if (!ifindex)
+               return TC_ACT_OK;
+
+       if (eth->h_proto == htons(ETH_P_IP)) {
+               char fmt4[] = "ingress forward to ifindex:%d daddr4:%x\n";
+               struct iphdr *iph = data + sizeof(*eth);
+
+               if (data + sizeof(*eth) + sizeof(*iph) > data_end)
+                       return TC_ACT_OK;
+
+               if (iph->protocol != IPPROTO_IPIP)
+                       return TC_ACT_OK;
+
+               bpf_trace_printk(fmt4, sizeof(fmt4), *ifindex,
+                                _htonl(iph->daddr));
+               return bpf_redirect(*ifindex, BPF_F_INGRESS);
+       } else if (eth->h_proto == htons(ETH_P_IPV6)) {
+               char fmt6[] = "ingress forward to ifindex:%d daddr6:%x::%x\n";
+               struct ipv6hdr *ip6h = data + sizeof(*eth);
+
+               if (data + sizeof(*eth) + sizeof(*ip6h) > data_end)
+                       return TC_ACT_OK;
+
+               if (ip6h->nexthdr != IPPROTO_IPIP &&
+                   ip6h->nexthdr != IPPROTO_IPV6)
+                       return TC_ACT_OK;
+
+               bpf_trace_printk(fmt6, sizeof(fmt6), *ifindex,
+                                _htonl(ip6h->daddr.s6_addr32[0]),
+                                _htonl(ip6h->daddr.s6_addr32[3]));
+               return bpf_redirect(*ifindex, BPF_F_INGRESS);
+       }
+
+       return TC_ACT_OK;
+}
+
+SEC("l2_to_iptun_ingress_redirect")
+int _l2_to_iptun_ingress_redirect(struct __sk_buff *skb)
+{
+       struct bpf_tunnel_key tkey = {};
+       void *data = (void *)(long)skb->data;
+       struct eth_hdr *eth = data;
+       void *data_end = (void *)(long)skb->data_end;
+       int key = 0, *ifindex;
+
+       int ret;
+
+       if (data + sizeof(*eth) > data_end)
+               return TC_ACT_OK;
+
+       ifindex = bpf_map_lookup_elem(&tun_iface, &key);
+       if (!ifindex)
+               return TC_ACT_OK;
+
+       if (eth->h_proto == htons(ETH_P_IP)) {
+               char fmt4[] = "e/ingress redirect daddr4:%x to ifindex:%d\n";
+               struct iphdr *iph = data + sizeof(*eth);
+               __be32 daddr = iph->daddr;
+
+               if (data + sizeof(*eth) + sizeof(*iph) > data_end)
+                       return TC_ACT_OK;
+
+               if (!is_vip_addr(eth->h_proto, daddr))
+                       return TC_ACT_OK;
+
+               bpf_trace_printk(fmt4, sizeof(fmt4), _htonl(daddr), *ifindex);
+       } else {
+               return TC_ACT_OK;
+       }
+
+       tkey.tunnel_id = 10000;
+       tkey.tunnel_ttl = 64;
+       tkey.remote_ipv4 = 0x0a020166; /* 10.2.1.102 */
+       bpf_skb_set_tunnel_key(skb, &tkey, sizeof(tkey), 0);
+       return bpf_redirect(*ifindex, 0);
+}
+
+SEC("l2_to_ip6tun_ingress_redirect")
+int _l2_to_ip6tun_ingress_redirect(struct __sk_buff *skb)
+{
+       struct bpf_tunnel_key tkey = {};
+       void *data = (void *)(long)skb->data;
+       struct eth_hdr *eth = data;
+       void *data_end = (void *)(long)skb->data_end;
+       int key = 0, *ifindex;
+
+       if (data + sizeof(*eth) > data_end)
+               return TC_ACT_OK;
+
+       ifindex = bpf_map_lookup_elem(&tun_iface, &key);
+       if (!ifindex)
+               return TC_ACT_OK;
+
+       if (eth->h_proto == htons(ETH_P_IP)) {
+               char fmt4[] = "e/ingress redirect daddr4:%x to ifindex:%d\n";
+               struct iphdr *iph = data + sizeof(*eth);
+
+               if (data + sizeof(*eth) + sizeof(*iph) > data_end)
+                       return TC_ACT_OK;
+
+               if (!is_vip_addr(eth->h_proto, iph->daddr))
+                       return TC_ACT_OK;
+
+               bpf_trace_printk(fmt4, sizeof(fmt4), _htonl(iph->daddr),
+                                *ifindex);
+       } else if (eth->h_proto == htons(ETH_P_IPV6)) {
+               char fmt6[] = "e/ingress redirect daddr6:%x to ifindex:%d\n";
+               struct ipv6hdr *ip6h = data + sizeof(*eth);
+
+               if (data + sizeof(*eth) + sizeof(*ip6h) > data_end)
+                       return TC_ACT_OK;
+
+               if (!is_vip_addr(eth->h_proto, ip6h->daddr.s6_addr32[0]))
+                       return TC_ACT_OK;
+
+               bpf_trace_printk(fmt6, sizeof(fmt6),
+                                _htonl(ip6h->daddr.s6_addr32[0]), *ifindex);
+       } else {
+               return TC_ACT_OK;
+       }
+
+       tkey.tunnel_id = 10000;
+       tkey.tunnel_ttl = 64;
+       /* 2401:db02:0:0:0:0:0:66 */
+       tkey.remote_ipv6[0] = _htonl(0x2401db02);
+       tkey.remote_ipv6[1] = 0;
+       tkey.remote_ipv6[2] = 0;
+       tkey.remote_ipv6[3] = _htonl(0x00000066);
+       bpf_skb_set_tunnel_key(skb, &tkey, sizeof(tkey), BPF_F_TUNINFO_IPV6);
+       return bpf_redirect(*ifindex, 0);
+}
+
+SEC("drop_non_tun_vip")
+int _drop_non_tun_vip(struct __sk_buff *skb)
+{
+       struct bpf_tunnel_key tkey = {};
+       void *data = (void *)(long)skb->data;
+       struct eth_hdr *eth = data;
+       void *data_end = (void *)(long)skb->data_end;
+
+       if (data + sizeof(*eth) > data_end)
+               return TC_ACT_OK;
+
+       if (eth->h_proto == htons(ETH_P_IP)) {
+               struct iphdr *iph = data + sizeof(*eth);
+
+               if (data + sizeof(*eth) + sizeof(*iph) > data_end)
+                       return TC_ACT_OK;
+
+               if (is_vip_addr(eth->h_proto, iph->daddr))
+                       return TC_ACT_SHOT;
+       } else if (eth->h_proto == htons(ETH_P_IPV6)) {
+               struct ipv6hdr *ip6h = data + sizeof(*eth);
+
+               if (data + sizeof(*eth) + sizeof(*ip6h) > data_end)
+                       return TC_ACT_OK;
+
+               if (is_vip_addr(eth->h_proto, ip6h->daddr.s6_addr32[0]))
+                       return TC_ACT_SHOT;
+       }
+
+       return TC_ACT_OK;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/tc_l2_redirect_user.c b/samples/bpf/tc_l2_redirect_user.c
new file mode 100644 (file)
index 0000000..4013c53
--- /dev/null
@@ -0,0 +1,73 @@
+/* Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/unistd.h>
+#include <linux/bpf.h>
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <string.h>
+#include <errno.h>
+
+#include "libbpf.h"
+
+static void usage(void)
+{
+       printf("Usage: tc_l2_ipip_redirect [...]\n");
+       printf("       -U <file>   Update an already pinned BPF array\n");
+       printf("       -i <ifindex> Interface index\n");
+       printf("       -h          Display this help\n");
+}
+
+int main(int argc, char **argv)
+{
+       const char *pinned_file = NULL;
+       int ifindex = -1;
+       int array_key = 0;
+       int array_fd = -1;
+       int ret = -1;
+       int opt;
+
+       while ((opt = getopt(argc, argv, "F:U:i:")) != -1) {
+               switch (opt) {
+               /* General args */
+               case 'U':
+                       pinned_file = optarg;
+                       break;
+               case 'i':
+                       ifindex = atoi(optarg);
+                       break;
+               default:
+                       usage();
+                       goto out;
+               }
+       }
+
+       if (ifindex < 0 || !pinned_file) {
+               usage();
+               goto out;
+       }
+
+       array_fd = bpf_obj_get(pinned_file);
+       if (array_fd < 0) {
+               fprintf(stderr, "bpf_obj_get(%s): %s(%d)\n",
+                       pinned_file, strerror(errno), errno);
+               goto out;
+       }
+
+       /* bpf_tunnel_key.remote_ipv4 expects host byte orders */
+       ret = bpf_update_elem(array_fd, &array_key, &ifindex, 0);
+       if (ret) {
+               perror("bpf_update_elem");
+               goto out;
+       }
+
+out:
+       if (array_fd != -1)
+               close(array_fd);
+       return ret;
+}
index fa051b3d53ee0a8f18da0b0701e04d2962c3e4b6..274c884c87fe01f28adae47feecb9de7f4f0948e 100644 (file)
@@ -1,3 +1,4 @@
+#define KBUILD_MODNAME "foo"
 #include <uapi/linux/bpf.h>
 #include <uapi/linux/if_ether.h>
 #include <uapi/linux/if_packet.h>
index 3303bb85593bc62a21afcf4f2864869a40543b4d..9c823a609e75f8d66bbe1fa31a1ecebac7f65311 100644 (file)
@@ -5,6 +5,7 @@
  * modify it under the terms of version 2 of the GNU General Public
  * License as published by the Free Software Foundation.
  */
+#define KBUILD_MODNAME "foo"
 #include <uapi/linux/bpf.h>
 #include <uapi/linux/if_ether.h>
 #include <uapi/linux/if_packet.h>
diff --git a/samples/bpf/test_cgrp2_attach.c b/samples/bpf/test_cgrp2_attach.c
new file mode 100644 (file)
index 0000000..a19484c
--- /dev/null
@@ -0,0 +1,167 @@
+/* eBPF example program:
+ *
+ * - Creates arraymap in kernel with 4 bytes keys and 8 byte values
+ *
+ * - Loads eBPF program
+ *
+ *   The eBPF program accesses the map passed in to store two pieces of
+ *   information. The number of invocations of the program, which maps
+ *   to the number of packets received, is stored to key 0. Key 1 is
+ *   incremented on each iteration by the number of bytes stored in
+ *   the skb.
+ *
+ * - Attaches the new program to a cgroup using BPF_PROG_ATTACH
+ *
+ * - Every second, reads map[0] and map[1] to see how many bytes and
+ *   packets were seen on any socket of tasks in the given cgroup.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <string.h>
+#include <unistd.h>
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+
+#include <linux/bpf.h>
+
+#include "libbpf.h"
+
+enum {
+       MAP_KEY_PACKETS,
+       MAP_KEY_BYTES,
+};
+
+static int prog_load(int map_fd, int verdict)
+{
+       struct bpf_insn prog[] = {
+               BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), /* save r6 so it's not clobbered by BPF_CALL */
+
+               /* Count packets */
+               BPF_MOV64_IMM(BPF_REG_0, MAP_KEY_PACKETS), /* r0 = 0 */
+               BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */
+               BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+               BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */
+               BPF_LD_MAP_FD(BPF_REG_1, map_fd), /* load map fd to r1 */
+               BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+               BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+               BPF_MOV64_IMM(BPF_REG_1, 1), /* r1 = 1 */
+               BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */
+
+               /* Count bytes */
+               BPF_MOV64_IMM(BPF_REG_0, MAP_KEY_BYTES), /* r0 = 1 */
+               BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */
+               BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+               BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */
+               BPF_LD_MAP_FD(BPF_REG_1, map_fd),
+               BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+               BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+               BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, offsetof(struct __sk_buff, len)), /* r1 = skb->len */
+               BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */
+
+               BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */
+               BPF_EXIT_INSN(),
+       };
+
+       return bpf_prog_load(BPF_PROG_TYPE_CGROUP_SKB,
+                            prog, sizeof(prog), "GPL", 0);
+}
+
+static int usage(const char *argv0)
+{
+       printf("Usage: %s [-d] [-D] <cg-path> <egress|ingress>\n", argv0);
+       printf("        -d      Drop Traffic\n");
+       printf("        -D      Detach filter, and exit\n");
+       return EXIT_FAILURE;
+}
+
+static int attach_filter(int cg_fd, int type, int verdict)
+{
+       int prog_fd, map_fd, ret, key;
+       long long pkt_cnt, byte_cnt;
+
+       map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY,
+                               sizeof(key), sizeof(byte_cnt),
+                               256, 0);
+       if (map_fd < 0) {
+               printf("Failed to create map: '%s'\n", strerror(errno));
+               return EXIT_FAILURE;
+       }
+
+       prog_fd = prog_load(map_fd, verdict);
+       printf("Output from kernel verifier:\n%s\n-------\n", bpf_log_buf);
+
+       if (prog_fd < 0) {
+               printf("Failed to load prog: '%s'\n", strerror(errno));
+               return EXIT_FAILURE;
+       }
+
+       ret = bpf_prog_attach(prog_fd, cg_fd, type);
+       if (ret < 0) {
+               printf("Failed to attach prog to cgroup: '%s'\n",
+                      strerror(errno));
+               return EXIT_FAILURE;
+       }
+       while (1) {
+               key = MAP_KEY_PACKETS;
+               assert(bpf_lookup_elem(map_fd, &key, &pkt_cnt) == 0);
+
+               key = MAP_KEY_BYTES;
+               assert(bpf_lookup_elem(map_fd, &key, &byte_cnt) == 0);
+
+               printf("cgroup received %lld packets, %lld bytes\n",
+                      pkt_cnt, byte_cnt);
+               sleep(1);
+       }
+
+       return EXIT_SUCCESS;
+}
+
+int main(int argc, char **argv)
+{
+       int detach_only = 0, verdict = 1;
+       enum bpf_attach_type type;
+       int opt, cg_fd, ret;
+
+       while ((opt = getopt(argc, argv, "Dd")) != -1) {
+               switch (opt) {
+               case 'd':
+                       verdict = 0;
+                       break;
+               case 'D':
+                       detach_only = 1;
+                       break;
+               default:
+                       return usage(argv[0]);
+               }
+       }
+
+       if (argc - optind < 2)
+               return usage(argv[0]);
+
+       if (strcmp(argv[optind + 1], "ingress") == 0)
+               type = BPF_CGROUP_INET_INGRESS;
+       else if (strcmp(argv[optind + 1], "egress") == 0)
+               type = BPF_CGROUP_INET_EGRESS;
+       else
+               return usage(argv[0]);
+
+       cg_fd = open(argv[optind], O_DIRECTORY | O_RDONLY);
+       if (cg_fd < 0) {
+               printf("Failed to open cgroup path: '%s'\n", strerror(errno));
+               return EXIT_FAILURE;
+       }
+
+       if (detach_only) {
+               ret = bpf_prog_detach(cg_fd, type);
+               printf("bpf_prog_detach() returned '%s' (%d)\n",
+                      strerror(errno), errno);
+       } else
+               ret = attach_filter(cg_fd, type, verdict);
+
+       return ret;
+}
diff --git a/samples/bpf/test_cgrp2_sock.c b/samples/bpf/test_cgrp2_sock.c
new file mode 100644 (file)
index 0000000..d467b3c
--- /dev/null
@@ -0,0 +1,83 @@
+/* eBPF example program:
+ *
+ * - Loads eBPF program
+ *
+ *   The eBPF program sets the sk_bound_dev_if index in new AF_INET{6}
+ *   sockets opened by processes in the cgroup.
+ *
+ * - Attaches the new program to a cgroup using BPF_PROG_ATTACH
+ */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <string.h>
+#include <unistd.h>
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <net/if.h>
+#include <linux/bpf.h>
+
+#include "libbpf.h"
+
+static int prog_load(int idx)
+{
+       struct bpf_insn prog[] = {
+               BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+               BPF_MOV64_IMM(BPF_REG_3, idx),
+               BPF_MOV64_IMM(BPF_REG_2, offsetof(struct bpf_sock, bound_dev_if)),
+               BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, offsetof(struct bpf_sock, bound_dev_if)),
+               BPF_MOV64_IMM(BPF_REG_0, 1), /* r0 = verdict */
+               BPF_EXIT_INSN(),
+       };
+
+       return bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, prog, sizeof(prog),
+                            "GPL", 0);
+}
+
+static int usage(const char *argv0)
+{
+       printf("Usage: %s cg-path device-index\n", argv0);
+       return EXIT_FAILURE;
+}
+
+int main(int argc, char **argv)
+{
+       int cg_fd, prog_fd, ret;
+       unsigned int idx;
+
+       if (argc < 2)
+               return usage(argv[0]);
+
+       idx = if_nametoindex(argv[2]);
+       if (!idx) {
+               printf("Invalid device name\n");
+               return EXIT_FAILURE;
+       }
+
+       cg_fd = open(argv[1], O_DIRECTORY | O_RDONLY);
+       if (cg_fd < 0) {
+               printf("Failed to open cgroup path: '%s'\n", strerror(errno));
+               return EXIT_FAILURE;
+       }
+
+       prog_fd = prog_load(idx);
+       printf("Output from kernel verifier:\n%s\n-------\n", bpf_log_buf);
+
+       if (prog_fd < 0) {
+               printf("Failed to load prog: '%s'\n", strerror(errno));
+               return EXIT_FAILURE;
+       }
+
+       ret = bpf_prog_attach(prog_fd, cg_fd, BPF_CGROUP_INET_SOCK_CREATE);
+       if (ret < 0) {
+               printf("Failed to attach prog to cgroup: '%s'\n",
+                      strerror(errno));
+               return EXIT_FAILURE;
+       }
+
+       return EXIT_SUCCESS;
+}
diff --git a/samples/bpf/test_cgrp2_sock.sh b/samples/bpf/test_cgrp2_sock.sh
new file mode 100755 (executable)
index 0000000..925fd46
--- /dev/null
@@ -0,0 +1,47 @@
+#!/bin/bash
+
+function config_device {
+       ip netns add at_ns0
+       ip link add veth0 type veth peer name veth0b
+       ip link set veth0b up
+       ip link set veth0 netns at_ns0
+       ip netns exec at_ns0 ip addr add 172.16.1.100/24 dev veth0
+       ip netns exec at_ns0 ip addr add 2401:db00::1/64 dev veth0 nodad
+       ip netns exec at_ns0 ip link set dev veth0 up
+       ip link add foo type vrf table 1234
+       ip link set foo up
+       ip addr add 172.16.1.101/24 dev veth0b
+       ip addr add 2401:db00::2/64 dev veth0b nodad
+       ip link set veth0b master foo
+}
+
+function attach_bpf {
+       rm -rf /tmp/cgroupv2
+       mkdir -p /tmp/cgroupv2
+       mount -t cgroup2 none /tmp/cgroupv2
+       mkdir -p /tmp/cgroupv2/foo
+       test_cgrp2_sock /tmp/cgroupv2/foo foo
+       echo $$ >> /tmp/cgroupv2/foo/cgroup.procs
+}
+
+function cleanup {
+       set +ex
+       ip netns delete at_ns0
+       ip link del veth0
+       ip link del foo
+       umount /tmp/cgroupv2
+       rm -rf /tmp/cgroupv2
+       set -ex
+}
+
+function do_test {
+       ping -c1 -w1 172.16.1.100
+       ping6 -c1 -w1 2401:db00::1
+}
+
+cleanup 2>/dev/null
+config_device
+attach_bpf
+do_test
+cleanup
+echo "*** PASS ***"
diff --git a/samples/bpf/test_cgrp2_sock2.c b/samples/bpf/test_cgrp2_sock2.c
new file mode 100644 (file)
index 0000000..455ef0d
--- /dev/null
@@ -0,0 +1,66 @@
+/* eBPF example program:
+ *
+ * - Loads eBPF program
+ *
+ *   The eBPF program loads a filter from file and attaches the
+ *   program to a cgroup using BPF_PROG_ATTACH
+ */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <string.h>
+#include <unistd.h>
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <net/if.h>
+#include <linux/bpf.h>
+
+#include "libbpf.h"
+#include "bpf_load.h"
+
+static int usage(const char *argv0)
+{
+       printf("Usage: %s cg-path filter-path [filter-id]\n", argv0);
+       return EXIT_FAILURE;
+}
+
+int main(int argc, char **argv)
+{
+       int cg_fd, ret, filter_id = 0;
+
+       if (argc < 3)
+               return usage(argv[0]);
+
+       cg_fd = open(argv[1], O_DIRECTORY | O_RDONLY);
+       if (cg_fd < 0) {
+               printf("Failed to open cgroup path: '%s'\n", strerror(errno));
+               return EXIT_FAILURE;
+       }
+
+       if (load_bpf_file(argv[2]))
+               return EXIT_FAILURE;
+
+       printf("Output from kernel verifier:\n%s\n-------\n", bpf_log_buf);
+
+       if (argc > 3)
+               filter_id = atoi(argv[3]);
+
+       if (filter_id > prog_cnt) {
+               printf("Invalid program id; program not found in file\n");
+               return EXIT_FAILURE;
+       }
+
+       ret = bpf_prog_attach(prog_fd[filter_id], cg_fd,
+                             BPF_CGROUP_INET_SOCK_CREATE);
+       if (ret < 0) {
+               printf("Failed to attach prog to cgroup: '%s'\n",
+                      strerror(errno));
+               return EXIT_FAILURE;
+       }
+
+       return EXIT_SUCCESS;
+}
diff --git a/samples/bpf/test_cgrp2_sock2.sh b/samples/bpf/test_cgrp2_sock2.sh
new file mode 100755 (executable)
index 0000000..891f12a
--- /dev/null
@@ -0,0 +1,81 @@
+#!/bin/bash
+
+function config_device {
+       ip netns add at_ns0
+       ip link add veth0 type veth peer name veth0b
+       ip link set veth0b up
+       ip link set veth0 netns at_ns0
+       ip netns exec at_ns0 ip addr add 172.16.1.100/24 dev veth0
+       ip netns exec at_ns0 ip addr add 2401:db00::1/64 dev veth0 nodad
+       ip netns exec at_ns0 ip link set dev veth0 up
+       ip addr add 172.16.1.101/24 dev veth0b
+       ip addr add 2401:db00::2/64 dev veth0b nodad
+}
+
+function config_cgroup {
+       rm -rf /tmp/cgroupv2
+       mkdir -p /tmp/cgroupv2
+       mount -t cgroup2 none /tmp/cgroupv2
+       mkdir -p /tmp/cgroupv2/foo
+       echo $$ >> /tmp/cgroupv2/foo/cgroup.procs
+}
+
+
+function attach_bpf {
+       test_cgrp2_sock2 /tmp/cgroupv2/foo sock_flags_kern.o $1
+       [ $? -ne 0 ] && exit 1
+}
+
+function cleanup {
+       ip link del veth0b
+       ip netns delete at_ns0
+       umount /tmp/cgroupv2
+       rm -rf /tmp/cgroupv2
+}
+
+cleanup 2>/dev/null
+
+set -e
+config_device
+config_cgroup
+set +e
+
+#
+# Test 1 - fail ping6
+#
+attach_bpf 0
+ping -c1 -w1 172.16.1.100
+if [ $? -ne 0 ]; then
+       echo "ping failed when it should succeed"
+       cleanup
+       exit 1
+fi
+
+ping6 -c1 -w1 2401:db00::1
+if [ $? -eq 0 ]; then
+       echo "ping6 succeeded when it should not"
+       cleanup
+       exit 1
+fi
+
+#
+# Test 2 - fail ping
+#
+attach_bpf 1
+ping6 -c1 -w1 2401:db00::1
+if [ $? -ne 0 ]; then
+       echo "ping6 failed when it should succeed"
+       cleanup
+       exit 1
+fi
+
+ping -c1 -w1 172.16.1.100
+if [ $? -eq 0 ]; then
+       echo "ping succeeded when it should not"
+       cleanup
+       exit 1
+fi
+
+cleanup
+echo
+echo "*** PASS ***"
index 10ff73404e3a80fe8bab464188335317ee71515a..1547b36a7b7b9bd5251dd1be6ae2a451cdca0a29 100644 (file)
@@ -4,6 +4,7 @@
  * modify it under the terms of version 2 of the GNU General Public
  * License as published by the Free Software Foundation.
  */
+#define KBUILD_MODNAME "foo"
 #include <uapi/linux/if_ether.h>
 #include <uapi/linux/in6.h>
 #include <uapi/linux/ipv6.h>
diff --git a/samples/bpf/test_lru_dist.c b/samples/bpf/test_lru_dist.c
new file mode 100644 (file)
index 0000000..316230a
--- /dev/null
@@ -0,0 +1,541 @@
+/*
+ * Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#define _GNU_SOURCE
+#include <linux/types.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <linux/bpf.h>
+#include <errno.h>
+#include <string.h>
+#include <assert.h>
+#include <sched.h>
+#include <sys/wait.h>
+#include <sys/stat.h>
+#include <sys/resource.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <time.h>
+
+#include "libbpf.h"
+#include "bpf_util.h"
+
+#define min(a, b) ((a) < (b) ? (a) : (b))
+#define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER)
+#define container_of(ptr, type, member) ({                     \
+       const typeof( ((type *)0)->member ) *__mptr = (ptr);    \
+       (type *)( (char *)__mptr - offsetof(type,member) );})
+
+static int nr_cpus;
+static unsigned long long *dist_keys;
+static unsigned int dist_key_counts;
+
+struct list_head {
+       struct list_head *next, *prev;
+};
+
+static inline void INIT_LIST_HEAD(struct list_head *list)
+{
+       list->next = list;
+       list->prev = list;
+}
+
+static inline int list_empty(const struct list_head *head)
+{
+       return head->next == head;
+}
+
+static inline void __list_add(struct list_head *new,
+                             struct list_head *prev,
+                             struct list_head *next)
+{
+       next->prev = new;
+       new->next = next;
+       new->prev = prev;
+       prev->next = new;
+}
+
+static inline void list_add(struct list_head *new, struct list_head *head)
+{
+       __list_add(new, head, head->next);
+}
+
+static inline void __list_del(struct list_head *prev, struct list_head *next)
+{
+       next->prev = prev;
+       prev->next = next;
+}
+
+static inline void __list_del_entry(struct list_head *entry)
+{
+       __list_del(entry->prev, entry->next);
+}
+
+static inline void list_move(struct list_head *list, struct list_head *head)
+{
+       __list_del_entry(list);
+       list_add(list, head);
+}
+
+#define list_entry(ptr, type, member) \
+       container_of(ptr, type, member)
+
+#define list_last_entry(ptr, type, member) \
+       list_entry((ptr)->prev, type, member)
+
+struct pfect_lru_node {
+       struct list_head list;
+       unsigned long long key;
+};
+
+struct pfect_lru {
+       struct list_head list;
+       struct pfect_lru_node *free_nodes;
+       unsigned int cur_size;
+       unsigned int lru_size;
+       unsigned int nr_unique;
+       unsigned int nr_misses;
+       unsigned int total;
+       int map_fd;
+};
+
+static void pfect_lru_init(struct pfect_lru *lru, unsigned int lru_size,
+                          unsigned int nr_possible_elems)
+{
+       lru->map_fd = bpf_create_map(BPF_MAP_TYPE_HASH,
+                                    sizeof(unsigned long long),
+                                    sizeof(struct pfect_lru_node *),
+                                    nr_possible_elems, 0);
+       assert(lru->map_fd != -1);
+
+       lru->free_nodes = malloc(lru_size * sizeof(struct pfect_lru_node));
+       assert(lru->free_nodes);
+
+       INIT_LIST_HEAD(&lru->list);
+       lru->cur_size = 0;
+       lru->lru_size = lru_size;
+       lru->nr_unique = lru->nr_misses = lru->total = 0;
+}
+
+static void pfect_lru_destroy(struct pfect_lru *lru)
+{
+       close(lru->map_fd);
+       free(lru->free_nodes);
+}
+
+static int pfect_lru_lookup_or_insert(struct pfect_lru *lru,
+                                     unsigned long long key)
+{
+       struct pfect_lru_node *node = NULL;
+       int seen = 0;
+
+       lru->total++;
+       if (!bpf_lookup_elem(lru->map_fd, &key, &node)) {
+               if (node) {
+                       list_move(&node->list, &lru->list);
+                       return 1;
+               }
+               seen = 1;
+       }
+
+       if (lru->cur_size < lru->lru_size) {
+               node =  &lru->free_nodes[lru->cur_size++];
+               INIT_LIST_HEAD(&node->list);
+       } else {
+               struct pfect_lru_node *null_node = NULL;
+
+               node = list_last_entry(&lru->list,
+                                      struct pfect_lru_node,
+                                      list);
+               bpf_update_elem(lru->map_fd, &node->key, &null_node, BPF_EXIST);
+       }
+
+       node->key = key;
+       list_move(&node->list, &lru->list);
+
+       lru->nr_misses++;
+       if (seen) {
+               assert(!bpf_update_elem(lru->map_fd, &key, &node, BPF_EXIST));
+       } else {
+               lru->nr_unique++;
+               assert(!bpf_update_elem(lru->map_fd, &key, &node, BPF_NOEXIST));
+       }
+
+       return seen;
+}
+
+static unsigned int read_keys(const char *dist_file,
+                             unsigned long long **keys)
+{
+       struct stat fst;
+       unsigned long long *retkeys;
+       unsigned int counts = 0;
+       int dist_fd;
+       char *b, *l;
+       int i;
+
+       dist_fd = open(dist_file, 0);
+       assert(dist_fd != -1);
+
+       assert(fstat(dist_fd, &fst) == 0);
+       b = malloc(fst.st_size);
+       assert(b);
+
+       assert(read(dist_fd, b, fst.st_size) == fst.st_size);
+       close(dist_fd);
+       for (i = 0; i < fst.st_size; i++) {
+               if (b[i] == '\n')
+                       counts++;
+       }
+       counts++; /* in case the last line has no \n */
+
+       retkeys = malloc(counts * sizeof(unsigned long long));
+       assert(retkeys);
+
+       counts = 0;
+       for (l = strtok(b, "\n"); l; l = strtok(NULL, "\n"))
+               retkeys[counts++] = strtoull(l, NULL, 10);
+       free(b);
+
+       *keys = retkeys;
+
+       return counts;
+}
+
+static int create_map(int map_type, int map_flags, unsigned int size)
+{
+       int map_fd;
+
+       map_fd = bpf_create_map(map_type, sizeof(unsigned long long),
+                               sizeof(unsigned long long), size, map_flags);
+
+       if (map_fd == -1)
+               perror("bpf_create_map");
+
+       return map_fd;
+}
+
+static int sched_next_online(int pid, int next_to_try)
+{
+       cpu_set_t cpuset;
+
+       if (next_to_try == nr_cpus)
+               return -1;
+
+       while (next_to_try < nr_cpus) {
+               CPU_ZERO(&cpuset);
+               CPU_SET(next_to_try++, &cpuset);
+               if (!sched_setaffinity(pid, sizeof(cpuset), &cpuset))
+                       break;
+       }
+
+       return next_to_try;
+}
+
+static void run_parallel(unsigned int tasks, void (*fn)(int i, void *data),
+                        void *data)
+{
+       int next_sched_cpu = 0;
+       pid_t pid[tasks];
+       int i;
+
+       for (i = 0; i < tasks; i++) {
+               pid[i] = fork();
+               if (pid[i] == 0) {
+                       next_sched_cpu = sched_next_online(0, next_sched_cpu);
+                       fn(i, data);
+                       exit(0);
+               } else if (pid[i] == -1) {
+                       printf("couldn't spawn #%d process\n", i);
+                       exit(1);
+               }
+               /* It is mostly redundant and just allow the parent
+                * process to update next_shced_cpu for the next child
+                * process
+                */
+               next_sched_cpu = sched_next_online(pid[i], next_sched_cpu);
+       }
+       for (i = 0; i < tasks; i++) {
+               int status;
+
+               assert(waitpid(pid[i], &status, 0) == pid[i]);
+               assert(status == 0);
+       }
+}
+
+static void do_test_lru_dist(int task, void *data)
+{
+       unsigned int nr_misses = 0;
+       struct pfect_lru pfect_lru;
+       unsigned long long key, value = 1234;
+       unsigned int i;
+
+       unsigned int lru_map_fd = ((unsigned int *)data)[0];
+       unsigned int lru_size = ((unsigned int *)data)[1];
+       unsigned long long key_offset = task * dist_key_counts;
+
+       pfect_lru_init(&pfect_lru, lru_size, dist_key_counts);
+
+       for (i = 0; i < dist_key_counts; i++) {
+               key = dist_keys[i] + key_offset;
+
+               pfect_lru_lookup_or_insert(&pfect_lru, key);
+
+               if (!bpf_lookup_elem(lru_map_fd, &key, &value))
+                       continue;
+
+               if (bpf_update_elem(lru_map_fd, &key, &value, BPF_NOEXIST)) {
+                       printf("bpf_update_elem(lru_map_fd, %llu): errno:%d\n",
+                              key, errno);
+                       assert(0);
+               }
+
+               nr_misses++;
+       }
+
+       printf("    task:%d BPF LRU: nr_unique:%u(/%u) nr_misses:%u(/%u)\n",
+              task, pfect_lru.nr_unique, dist_key_counts, nr_misses,
+              dist_key_counts);
+       printf("    task:%d Perfect LRU: nr_unique:%u(/%u) nr_misses:%u(/%u)\n",
+              task, pfect_lru.nr_unique, pfect_lru.total,
+              pfect_lru.nr_misses, pfect_lru.total);
+
+       pfect_lru_destroy(&pfect_lru);
+       close(lru_map_fd);
+}
+
+static void test_parallel_lru_dist(int map_type, int map_flags,
+                                  int nr_tasks, unsigned int lru_size)
+{
+       int child_data[2];
+       int lru_map_fd;
+
+       printf("%s (map_type:%d map_flags:0x%X):\n", __func__, map_type,
+              map_flags);
+
+       if (map_flags & BPF_F_NO_COMMON_LRU)
+               lru_map_fd = create_map(map_type, map_flags,
+                                       nr_cpus * lru_size);
+       else
+               lru_map_fd = create_map(map_type, map_flags,
+                                       nr_tasks * lru_size);
+       assert(lru_map_fd != -1);
+
+       child_data[0] = lru_map_fd;
+       child_data[1] = lru_size;
+
+       run_parallel(nr_tasks, do_test_lru_dist, child_data);
+
+       close(lru_map_fd);
+}
+
+static void test_lru_loss0(int map_type, int map_flags)
+{
+       unsigned long long key, value[nr_cpus];
+       unsigned int old_unused_losses = 0;
+       unsigned int new_unused_losses = 0;
+       unsigned int used_losses = 0;
+       int map_fd;
+
+       printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
+              map_flags);
+
+       assert(sched_next_online(0, 0) != -1);
+
+       if (map_flags & BPF_F_NO_COMMON_LRU)
+               map_fd = create_map(map_type, map_flags, 900 * nr_cpus);
+       else
+               map_fd = create_map(map_type, map_flags, 900);
+
+       assert(map_fd != -1);
+
+       value[0] = 1234;
+
+       for (key = 1; key <= 1000; key++) {
+               int start_key, end_key;
+
+               assert(bpf_update_elem(map_fd, &key, value, BPF_NOEXIST) == 0);
+
+               start_key = 101;
+               end_key = min(key, 900);
+
+               while (start_key <= end_key) {
+                       bpf_lookup_elem(map_fd, &start_key, value);
+                       start_key++;
+               }
+       }
+
+       for (key = 1; key <= 1000; key++) {
+               if (bpf_lookup_elem(map_fd, &key, value)) {
+                       if (key <= 100)
+                               old_unused_losses++;
+                       else if (key <= 900)
+                               used_losses++;
+                       else
+                               new_unused_losses++;
+               }
+       }
+
+       close(map_fd);
+
+       printf("older-elem-losses:%d(/100) active-elem-losses:%d(/800) "
+              "newer-elem-losses:%d(/100)\n",
+              old_unused_losses, used_losses, new_unused_losses);
+}
+
+static void test_lru_loss1(int map_type, int map_flags)
+{
+       unsigned long long key, value[nr_cpus];
+       int map_fd;
+       unsigned int nr_losses = 0;
+
+       printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
+              map_flags);
+
+       assert(sched_next_online(0, 0) != -1);
+
+       if (map_flags & BPF_F_NO_COMMON_LRU)
+               map_fd = create_map(map_type, map_flags, 1000 * nr_cpus);
+       else
+               map_fd = create_map(map_type, map_flags, 1000);
+
+       assert(map_fd != -1);
+
+       value[0] = 1234;
+
+       for (key = 1; key <= 1000; key++)
+               assert(!bpf_update_elem(map_fd, &key, value, BPF_NOEXIST));
+
+       for (key = 1; key <= 1000; key++) {
+               if (bpf_lookup_elem(map_fd, &key, value))
+                       nr_losses++;
+       }
+
+       close(map_fd);
+
+       printf("nr_losses:%d(/1000)\n", nr_losses);
+}
+
+static void do_test_parallel_lru_loss(int task, void *data)
+{
+       const unsigned int nr_stable_elems = 1000;
+       const unsigned int nr_repeats = 100000;
+
+       int map_fd = *(int *)data;
+       unsigned long long stable_base;
+       unsigned long long key, value[nr_cpus];
+       unsigned long long next_ins_key;
+       unsigned int nr_losses = 0;
+       unsigned int i;
+
+       stable_base = task * nr_repeats * 2 + 1;
+       next_ins_key = stable_base;
+       value[0] = 1234;
+       for (i = 0; i < nr_stable_elems; i++) {
+               assert(bpf_update_elem(map_fd, &next_ins_key, value,
+                                      BPF_NOEXIST) == 0);
+               next_ins_key++;
+       }
+
+       for (i = 0; i < nr_repeats; i++) {
+               int rn;
+
+               rn = rand();
+
+               if (rn % 10) {
+                       key = rn % nr_stable_elems + stable_base;
+                       bpf_lookup_elem(map_fd, &key, value);
+               } else {
+                       bpf_update_elem(map_fd, &next_ins_key, value,
+                                       BPF_NOEXIST);
+                       next_ins_key++;
+               }
+       }
+
+       key = stable_base;
+       for (i = 0; i < nr_stable_elems; i++) {
+               if (bpf_lookup_elem(map_fd, &key, value))
+                       nr_losses++;
+               key++;
+       }
+
+       printf("    task:%d nr_losses:%u\n", task, nr_losses);
+}
+
+static void test_parallel_lru_loss(int map_type, int map_flags, int nr_tasks)
+{
+       int map_fd;
+
+       printf("%s (map_type:%d map_flags:0x%X):\n", __func__, map_type,
+              map_flags);
+
+       /* Give 20% more than the active working set */
+       if (map_flags & BPF_F_NO_COMMON_LRU)
+               map_fd = create_map(map_type, map_flags,
+                                   nr_cpus * (1000 + 200));
+       else
+               map_fd = create_map(map_type, map_flags,
+                                   nr_tasks * (1000 + 200));
+
+       assert(map_fd != -1);
+
+       run_parallel(nr_tasks, do_test_parallel_lru_loss, &map_fd);
+
+       close(map_fd);
+}
+
+int main(int argc, char **argv)
+{
+       struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+       int map_flags[] = {0, BPF_F_NO_COMMON_LRU};
+       const char *dist_file;
+       int nr_tasks = 1;
+       int lru_size;
+       int f;
+
+       if (argc < 4) {
+               printf("Usage: %s <dist-file> <lru-size> <nr-tasks>\n",
+                      argv[0]);
+               return -1;
+       }
+
+       dist_file = argv[1];
+       lru_size = atoi(argv[2]);
+       nr_tasks = atoi(argv[3]);
+
+       setbuf(stdout, NULL);
+
+       assert(!setrlimit(RLIMIT_MEMLOCK, &r));
+
+       srand(time(NULL));
+
+       nr_cpus = bpf_num_possible_cpus();
+       assert(nr_cpus != -1);
+       printf("nr_cpus:%d\n\n", nr_cpus);
+
+       nr_tasks = min(nr_tasks, nr_cpus);
+
+       dist_key_counts = read_keys(dist_file, &dist_keys);
+       if (!dist_key_counts) {
+               printf("%s has no key\n", dist_file);
+               return -1;
+       }
+
+       for (f = 0; f < sizeof(map_flags) / sizeof(*map_flags); f++) {
+               test_lru_loss0(BPF_MAP_TYPE_LRU_HASH, map_flags[f]);
+               test_lru_loss1(BPF_MAP_TYPE_LRU_HASH, map_flags[f]);
+               test_parallel_lru_loss(BPF_MAP_TYPE_LRU_HASH, map_flags[f],
+                                      nr_tasks);
+               test_parallel_lru_dist(BPF_MAP_TYPE_LRU_HASH, map_flags[f],
+                                      nr_tasks, lru_size);
+               printf("\n");
+       }
+
+       free(dist_keys);
+
+       return 0;
+}
diff --git a/samples/bpf/test_lwt_bpf.c b/samples/bpf/test_lwt_bpf.c
new file mode 100644 (file)
index 0000000..bacc801
--- /dev/null
@@ -0,0 +1,253 @@
+/* Copyright (c) 2016 Thomas Graf <tgraf@tgraf.ch>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#include <stdint.h>
+#include <stddef.h>
+#include <linux/bpf.h>
+#include <linux/ip.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/icmpv6.h>
+#include <linux/if_ether.h>
+#include "bpf_helpers.h"
+#include <string.h>
+
+# define printk(fmt, ...)                                              \
+               ({                                                      \
+                       char ____fmt[] = fmt;                           \
+                       bpf_trace_printk(____fmt, sizeof(____fmt),      \
+                                    ##__VA_ARGS__);                    \
+               })
+
+#define CB_MAGIC 1234
+
+/* Test: Pass all packets through */
+SEC("nop")
+int do_nop(struct __sk_buff *skb)
+{
+       return BPF_OK;
+}
+
+/* Test: Verify context information can be accessed */
+SEC("test_ctx")
+int do_test_ctx(struct __sk_buff *skb)
+{
+       skb->cb[0] = CB_MAGIC;
+       printk("len %d hash %d protocol %d\n", skb->len, skb->hash,
+              skb->protocol);
+       printk("cb %d ingress_ifindex %d ifindex %d\n", skb->cb[0],
+              skb->ingress_ifindex, skb->ifindex);
+
+       return BPF_OK;
+}
+
+/* Test: Ensure skb->cb[] buffer is cleared */
+SEC("test_cb")
+int do_test_cb(struct __sk_buff *skb)
+{
+       printk("cb0: %x cb1: %x cb2: %x\n", skb->cb[0], skb->cb[1],
+              skb->cb[2]);
+       printk("cb3: %x cb4: %x\n", skb->cb[3], skb->cb[4]);
+
+       return BPF_OK;
+}
+
+/* Test: Verify skb data can be read */
+SEC("test_data")
+int do_test_data(struct __sk_buff *skb)
+{
+       void *data = (void *)(long)skb->data;
+       void *data_end = (void *)(long)skb->data_end;
+       struct iphdr *iph = data;
+
+       if (data + sizeof(*iph) > data_end) {
+               printk("packet truncated\n");
+               return BPF_DROP;
+       }
+
+       printk("src: %x dst: %x\n", iph->saddr, iph->daddr);
+
+       return BPF_OK;
+}
+
+#define IP_CSUM_OFF offsetof(struct iphdr, check)
+#define IP_DST_OFF offsetof(struct iphdr, daddr)
+#define IP_SRC_OFF offsetof(struct iphdr, saddr)
+#define IP_PROTO_OFF offsetof(struct iphdr, protocol)
+#define TCP_CSUM_OFF offsetof(struct tcphdr, check)
+#define UDP_CSUM_OFF offsetof(struct udphdr, check)
+#define IS_PSEUDO 0x10
+
+static inline int rewrite(struct __sk_buff *skb, uint32_t old_ip,
+                         uint32_t new_ip, int rw_daddr)
+{
+       int ret, off = 0, flags = IS_PSEUDO;
+       uint8_t proto;
+
+       ret = bpf_skb_load_bytes(skb, IP_PROTO_OFF, &proto, 1);
+       if (ret < 0) {
+               printk("bpf_l4_csum_replace failed: %d\n", ret);
+               return BPF_DROP;
+       }
+
+       switch (proto) {
+       case IPPROTO_TCP:
+               off = TCP_CSUM_OFF;
+               break;
+
+       case IPPROTO_UDP:
+               off = UDP_CSUM_OFF;
+               flags |= BPF_F_MARK_MANGLED_0;
+               break;
+
+       case IPPROTO_ICMPV6:
+               off = offsetof(struct icmp6hdr, icmp6_cksum);
+               break;
+       }
+
+       if (off) {
+               ret = bpf_l4_csum_replace(skb, off, old_ip, new_ip,
+                                         flags | sizeof(new_ip));
+               if (ret < 0) {
+                       printk("bpf_l4_csum_replace failed: %d\n");
+                       return BPF_DROP;
+               }
+       }
+
+       ret = bpf_l3_csum_replace(skb, IP_CSUM_OFF, old_ip, new_ip, sizeof(new_ip));
+       if (ret < 0) {
+               printk("bpf_l3_csum_replace failed: %d\n", ret);
+               return BPF_DROP;
+       }
+
+       if (rw_daddr)
+               ret = bpf_skb_store_bytes(skb, IP_DST_OFF, &new_ip, sizeof(new_ip), 0);
+       else
+               ret = bpf_skb_store_bytes(skb, IP_SRC_OFF, &new_ip, sizeof(new_ip), 0);
+
+       if (ret < 0) {
+               printk("bpf_skb_store_bytes() failed: %d\n", ret);
+               return BPF_DROP;
+       }
+
+       return BPF_OK;
+}
+
+/* Test: Verify skb data can be modified */
+SEC("test_rewrite")
+int do_test_rewrite(struct __sk_buff *skb)
+{
+       uint32_t old_ip, new_ip = 0x3fea8c0;
+       int ret;
+
+       ret = bpf_skb_load_bytes(skb, IP_DST_OFF, &old_ip, 4);
+       if (ret < 0) {
+               printk("bpf_skb_load_bytes failed: %d\n", ret);
+               return BPF_DROP;
+       }
+
+       if (old_ip == 0x2fea8c0) {
+               printk("out: rewriting from %x to %x\n", old_ip, new_ip);
+               return rewrite(skb, old_ip, new_ip, 1);
+       }
+
+       return BPF_OK;
+}
+
+static inline int __do_push_ll_and_redirect(struct __sk_buff *skb)
+{
+       uint64_t smac = SRC_MAC, dmac = DST_MAC;
+       int ret, ifindex = DST_IFINDEX;
+       struct ethhdr ehdr;
+
+       ret = bpf_skb_change_head(skb, 14, 0);
+       if (ret < 0) {
+               printk("skb_change_head() failed: %d\n", ret);
+       }
+
+       ehdr.h_proto = __constant_htons(ETH_P_IP);
+       memcpy(&ehdr.h_source, &smac, 6);
+       memcpy(&ehdr.h_dest, &dmac, 6);
+
+       ret = bpf_skb_store_bytes(skb, 0, &ehdr, sizeof(ehdr), 0);
+       if (ret < 0) {
+               printk("skb_store_bytes() failed: %d\n", ret);
+               return BPF_DROP;
+       }
+
+       return bpf_redirect(ifindex, 0);
+}
+
+SEC("push_ll_and_redirect_silent")
+int do_push_ll_and_redirect_silent(struct __sk_buff *skb)
+{
+       return __do_push_ll_and_redirect(skb);
+}
+
+SEC("push_ll_and_redirect")
+int do_push_ll_and_redirect(struct __sk_buff *skb)
+{
+       int ret, ifindex = DST_IFINDEX;
+
+       ret = __do_push_ll_and_redirect(skb);
+       if (ret >= 0)
+               printk("redirected to %d\n", ifindex);
+
+       return ret;
+}
+
+static inline void __fill_garbage(struct __sk_buff *skb)
+{
+       uint64_t f = 0xFFFFFFFFFFFFFFFF;
+
+       bpf_skb_store_bytes(skb, 0, &f, sizeof(f), 0);
+       bpf_skb_store_bytes(skb, 8, &f, sizeof(f), 0);
+       bpf_skb_store_bytes(skb, 16, &f, sizeof(f), 0);
+       bpf_skb_store_bytes(skb, 24, &f, sizeof(f), 0);
+       bpf_skb_store_bytes(skb, 32, &f, sizeof(f), 0);
+       bpf_skb_store_bytes(skb, 40, &f, sizeof(f), 0);
+       bpf_skb_store_bytes(skb, 48, &f, sizeof(f), 0);
+       bpf_skb_store_bytes(skb, 56, &f, sizeof(f), 0);
+       bpf_skb_store_bytes(skb, 64, &f, sizeof(f), 0);
+       bpf_skb_store_bytes(skb, 72, &f, sizeof(f), 0);
+       bpf_skb_store_bytes(skb, 80, &f, sizeof(f), 0);
+       bpf_skb_store_bytes(skb, 88, &f, sizeof(f), 0);
+}
+
+SEC("fill_garbage")
+int do_fill_garbage(struct __sk_buff *skb)
+{
+       __fill_garbage(skb);
+       printk("Set initial 96 bytes of header to FF\n");
+       return BPF_OK;
+}
+
+SEC("fill_garbage_and_redirect")
+int do_fill_garbage_and_redirect(struct __sk_buff *skb)
+{
+       int ifindex = DST_IFINDEX;
+       __fill_garbage(skb);
+       printk("redirected to %d\n", ifindex);
+       return bpf_redirect(ifindex, 0);
+}
+
+/* Drop all packets */
+SEC("drop_all")
+int do_drop_all(struct __sk_buff *skb)
+{
+       printk("dropping with: %d\n", BPF_DROP);
+       return BPF_DROP;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/test_lwt_bpf.sh b/samples/bpf/test_lwt_bpf.sh
new file mode 100644 (file)
index 0000000..a695ae2
--- /dev/null
@@ -0,0 +1,399 @@
+#!/bin/bash
+
+# Uncomment to see generated bytecode
+#VERBOSE=verbose
+
+NS1=lwt_ns1
+NS2=lwt_ns2
+VETH0=tst_lwt1a
+VETH1=tst_lwt1b
+VETH2=tst_lwt2a
+VETH3=tst_lwt2b
+IPVETH0="192.168.254.1"
+IPVETH1="192.168.254.2"
+IPVETH1b="192.168.254.3"
+
+IPVETH2="192.168.111.1"
+IPVETH3="192.168.111.2"
+
+IP_LOCAL="192.168.99.1"
+
+TRACE_ROOT=/sys/kernel/debug/tracing
+
+function lookup_mac()
+{
+       set +x
+       if [ ! -z "$2" ]; then
+               MAC=$(ip netns exec $2 ip link show $1 | grep ether | awk '{print $2}')
+       else
+               MAC=$(ip link show $1 | grep ether | awk '{print $2}')
+       fi
+       MAC="${MAC//:/}"
+       echo "0x${MAC:10:2}${MAC:8:2}${MAC:6:2}${MAC:4:2}${MAC:2:2}${MAC:0:2}"
+       set -x
+}
+
+function cleanup {
+       set +ex
+       rm test_lwt_bpf.o 2> /dev/null
+       ip link del $VETH0 2> /dev/null
+       ip link del $VETH1 2> /dev/null
+       ip link del $VETH2 2> /dev/null
+       ip link del $VETH3 2> /dev/null
+       ip netns exec $NS1 killall netserver
+       ip netns delete $NS1 2> /dev/null
+       ip netns delete $NS2 2> /dev/null
+       set -ex
+}
+
+function setup_one_veth {
+       ip netns add $1
+       ip link add $2 type veth peer name $3
+       ip link set dev $2 up
+       ip addr add $4/24 dev $2
+       ip link set $3 netns $1
+       ip netns exec $1 ip link set dev $3 up
+       ip netns exec $1 ip addr add $5/24 dev $3
+
+       if [ "$6" ]; then
+               ip netns exec $1 ip addr add $6/32 dev $3
+       fi
+}
+
+function get_trace {
+       set +x
+       cat ${TRACE_ROOT}/trace | grep -v '^#'
+       set -x
+}
+
+function cleanup_routes {
+       ip route del ${IPVETH1}/32 dev $VETH0 2> /dev/null || true
+       ip route del table local local ${IP_LOCAL}/32 dev lo 2> /dev/null || true
+}
+
+function install_test {
+       cleanup_routes
+       cp /dev/null ${TRACE_ROOT}/trace
+
+       OPTS="encap bpf headroom 14 $1 obj test_lwt_bpf.o section $2 $VERBOSE"
+
+       if [ "$1" == "in" ];  then
+               ip route add table local local ${IP_LOCAL}/32 $OPTS dev lo
+       else
+               ip route add ${IPVETH1}/32 $OPTS dev $VETH0
+       fi
+}
+
+function remove_prog {
+       if [ "$1" == "in" ];  then
+               ip route del table local local ${IP_LOCAL}/32 dev lo
+       else
+               ip route del ${IPVETH1}/32 dev $VETH0
+       fi
+}
+
+function filter_trace {
+       # Add newline to allow starting EXPECT= variables on newline
+       NL=$'\n'
+       echo "${NL}$*" | sed -e 's/^.*: : //g'
+}
+
+function expect_fail {
+       set +x
+       echo "FAIL:"
+       echo "Expected: $1"
+       echo "Got: $2"
+       set -x
+       exit 1
+}
+
+function match_trace {
+       set +x
+       RET=0
+       TRACE=$1
+       EXPECT=$2
+       GOT="$(filter_trace "$TRACE")"
+
+       [ "$GOT" != "$EXPECT" ] && {
+               expect_fail "$EXPECT" "$GOT"
+               RET=1
+       }
+       set -x
+       return $RET
+}
+
+function test_start {
+       set +x
+       echo "----------------------------------------------------------------"
+       echo "Starting test: $*"
+       echo "----------------------------------------------------------------"
+       set -x
+}
+
+function failure {
+       get_trace
+       echo "FAIL: $*"
+       exit 1
+}
+
+function test_ctx_xmit {
+       test_start "test_ctx on lwt xmit"
+       install_test xmit test_ctx
+       ping -c 3 $IPVETH1 || {
+               failure "test_ctx xmit: packets are dropped"
+       }
+       match_trace "$(get_trace)" "
+len 84 hash 0 protocol 8
+cb 1234 ingress_ifindex 0 ifindex $DST_IFINDEX
+len 84 hash 0 protocol 8
+cb 1234 ingress_ifindex 0 ifindex $DST_IFINDEX
+len 84 hash 0 protocol 8
+cb 1234 ingress_ifindex 0 ifindex $DST_IFINDEX" || exit 1
+       remove_prog xmit
+}
+
+function test_ctx_out {
+       test_start "test_ctx on lwt out"
+       install_test out test_ctx
+       ping -c 3 $IPVETH1 || {
+               failure "test_ctx out: packets are dropped"
+       }
+       match_trace "$(get_trace)" "
+len 84 hash 0 protocol 0
+cb 1234 ingress_ifindex 0 ifindex 0
+len 84 hash 0 protocol 0
+cb 1234 ingress_ifindex 0 ifindex 0
+len 84 hash 0 protocol 0
+cb 1234 ingress_ifindex 0 ifindex 0" || exit 1
+       remove_prog out
+}
+
+function test_ctx_in {
+       test_start "test_ctx on lwt in"
+       install_test in test_ctx
+       ping -c 3 $IP_LOCAL || {
+               failure "test_ctx out: packets are dropped"
+       }
+       # We will both request & reply packets as the packets will
+       # be from $IP_LOCAL => $IP_LOCAL
+       match_trace "$(get_trace)" "
+len 84 hash 0 protocol 8
+cb 1234 ingress_ifindex 1 ifindex 1
+len 84 hash 0 protocol 8
+cb 1234 ingress_ifindex 1 ifindex 1
+len 84 hash 0 protocol 8
+cb 1234 ingress_ifindex 1 ifindex 1
+len 84 hash 0 protocol 8
+cb 1234 ingress_ifindex 1 ifindex 1
+len 84 hash 0 protocol 8
+cb 1234 ingress_ifindex 1 ifindex 1
+len 84 hash 0 protocol 8
+cb 1234 ingress_ifindex 1 ifindex 1" || exit 1
+       remove_prog in
+}
+
+function test_data {
+       test_start "test_data on lwt $1"
+       install_test $1 test_data
+       ping -c 3 $IPVETH1 || {
+               failure "test_data ${1}: packets are dropped"
+       }
+       match_trace "$(get_trace)" "
+src: 1fea8c0 dst: 2fea8c0
+src: 1fea8c0 dst: 2fea8c0
+src: 1fea8c0 dst: 2fea8c0" || exit 1
+       remove_prog $1
+}
+
+function test_data_in {
+       test_start "test_data on lwt in"
+       install_test in test_data
+       ping -c 3 $IP_LOCAL || {
+               failure "test_data in: packets are dropped"
+       }
+       # We will both request & reply packets as the packets will
+       # be from $IP_LOCAL => $IP_LOCAL
+       match_trace "$(get_trace)" "
+src: 163a8c0 dst: 163a8c0
+src: 163a8c0 dst: 163a8c0
+src: 163a8c0 dst: 163a8c0
+src: 163a8c0 dst: 163a8c0
+src: 163a8c0 dst: 163a8c0
+src: 163a8c0 dst: 163a8c0" || exit 1
+       remove_prog in
+}
+
+function test_cb {
+       test_start "test_cb on lwt $1"
+       install_test $1 test_cb
+       ping -c 3 $IPVETH1 || {
+               failure "test_cb ${1}: packets are dropped"
+       }
+       match_trace "$(get_trace)" "
+cb0: 0 cb1: 0 cb2: 0
+cb3: 0 cb4: 0
+cb0: 0 cb1: 0 cb2: 0
+cb3: 0 cb4: 0
+cb0: 0 cb1: 0 cb2: 0
+cb3: 0 cb4: 0" || exit 1
+       remove_prog $1
+}
+
+function test_cb_in {
+       test_start "test_cb on lwt in"
+       install_test in test_cb
+       ping -c 3 $IP_LOCAL || {
+               failure "test_cb in: packets are dropped"
+       }
+       # We will both request & reply packets as the packets will
+       # be from $IP_LOCAL => $IP_LOCAL
+       match_trace "$(get_trace)" "
+cb0: 0 cb1: 0 cb2: 0
+cb3: 0 cb4: 0
+cb0: 0 cb1: 0 cb2: 0
+cb3: 0 cb4: 0
+cb0: 0 cb1: 0 cb2: 0
+cb3: 0 cb4: 0
+cb0: 0 cb1: 0 cb2: 0
+cb3: 0 cb4: 0
+cb0: 0 cb1: 0 cb2: 0
+cb3: 0 cb4: 0
+cb0: 0 cb1: 0 cb2: 0
+cb3: 0 cb4: 0" || exit 1
+       remove_prog in
+}
+
+function test_drop_all {
+       test_start "test_drop_all on lwt $1"
+       install_test $1 drop_all
+       ping -c 3 $IPVETH1 && {
+               failure "test_drop_all ${1}: Unexpected success of ping"
+       }
+       match_trace "$(get_trace)" "
+dropping with: 2
+dropping with: 2
+dropping with: 2" || exit 1
+       remove_prog $1
+}
+
+function test_drop_all_in {
+       test_start "test_drop_all on lwt in"
+       install_test in drop_all
+       ping -c 3 $IP_LOCAL && {
+               failure "test_drop_all in: Unexpected success of ping"
+       }
+       match_trace "$(get_trace)" "
+dropping with: 2
+dropping with: 2
+dropping with: 2" || exit 1
+       remove_prog in
+}
+
+function test_push_ll_and_redirect {
+       test_start "test_push_ll_and_redirect on lwt xmit"
+       install_test xmit push_ll_and_redirect
+       ping -c 3 $IPVETH1 || {
+               failure "Redirected packets appear to be dropped"
+       }
+       match_trace "$(get_trace)" "
+redirected to $DST_IFINDEX
+redirected to $DST_IFINDEX
+redirected to $DST_IFINDEX" || exit 1
+       remove_prog xmit
+}
+
+function test_no_l2_and_redirect {
+       test_start "test_no_l2_and_redirect on lwt xmit"
+       install_test xmit fill_garbage_and_redirect
+       ping -c 3 $IPVETH1 && {
+               failure "Unexpected success despite lack of L2 header"
+       }
+       match_trace "$(get_trace)" "
+redirected to $DST_IFINDEX
+redirected to $DST_IFINDEX
+redirected to $DST_IFINDEX" || exit 1
+       remove_prog xmit
+}
+
+function test_rewrite {
+       test_start "test_rewrite on lwt xmit"
+       install_test xmit test_rewrite
+       ping -c 3 $IPVETH1 || {
+               failure "Rewritten packets appear to be dropped"
+       }
+       match_trace "$(get_trace)" "
+out: rewriting from 2fea8c0 to 3fea8c0
+out: rewriting from 2fea8c0 to 3fea8c0
+out: rewriting from 2fea8c0 to 3fea8c0" || exit 1
+       remove_prog out
+}
+
+function test_fill_garbage {
+       test_start "test_fill_garbage on lwt xmit"
+       install_test xmit fill_garbage
+       ping -c 3 $IPVETH1 && {
+               failure "test_drop_all ${1}: Unexpected success of ping"
+       }
+       match_trace "$(get_trace)" "
+Set initial 96 bytes of header to FF
+Set initial 96 bytes of header to FF
+Set initial 96 bytes of header to FF" || exit 1
+       remove_prog xmit
+}
+
+function test_netperf_nop {
+       test_start "test_netperf_nop on lwt xmit"
+       install_test xmit nop
+       netperf -H $IPVETH1 -t TCP_STREAM || {
+               failure "packets appear to be dropped"
+       }
+       match_trace "$(get_trace)" ""|| exit 1
+       remove_prog xmit
+}
+
+function test_netperf_redirect {
+       test_start "test_netperf_redirect on lwt xmit"
+       install_test xmit push_ll_and_redirect_silent
+       netperf -H $IPVETH1 -t TCP_STREAM || {
+               failure "Rewritten packets appear to be dropped"
+       }
+       match_trace "$(get_trace)" ""|| exit 1
+       remove_prog xmit
+}
+
+cleanup
+setup_one_veth $NS1 $VETH0 $VETH1 $IPVETH0 $IPVETH1 $IPVETH1b
+setup_one_veth $NS2 $VETH2 $VETH3 $IPVETH2 $IPVETH3
+ip netns exec $NS1 netserver
+echo 1 > ${TRACE_ROOT}/tracing_on
+
+DST_MAC=$(lookup_mac $VETH1 $NS1)
+SRC_MAC=$(lookup_mac $VETH0)
+DST_IFINDEX=$(cat /sys/class/net/$VETH0/ifindex)
+
+CLANG_OPTS="-O2 -target bpf -I ../include/"
+CLANG_OPTS+=" -DSRC_MAC=$SRC_MAC -DDST_MAC=$DST_MAC -DDST_IFINDEX=$DST_IFINDEX"
+clang $CLANG_OPTS -c test_lwt_bpf.c -o test_lwt_bpf.o
+
+test_ctx_xmit
+test_ctx_out
+test_ctx_in
+test_data "xmit"
+test_data "out"
+test_data_in
+test_cb "xmit"
+test_cb "out"
+test_cb_in
+test_drop_all "xmit"
+test_drop_all "out"
+test_drop_all_in
+test_rewrite
+test_push_ll_and_redirect
+test_no_l2_and_redirect
+test_fill_garbage
+test_netperf_nop
+test_netperf_redirect
+
+cleanup
+echo 0 > ${TRACE_ROOT}/tracing_on
+exit 0
index ab5b19e68acf0c3d53916ce6324f57777cf3acbd..3e225e331f664847736242fc5fd1c7abce9c696b 100644 (file)
@@ -4,8 +4,10 @@
 #include <signal.h>
 #include <linux/bpf.h>
 #include <string.h>
+
 #include "libbpf.h"
 #include "bpf_load.h"
+#include "bpf_util.h"
 
 #define MAX_INDEX      64
 #define MAX_STARS      38
@@ -36,8 +38,8 @@ struct hist_key {
 
 static void print_hist_for_pid(int fd, void *task)
 {
+       unsigned int nr_cpus = bpf_num_possible_cpus();
        struct hist_key key = {}, next_key;
-       unsigned int nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
        long values[nr_cpus];
        char starstr[MAX_STARS];
        long value;
index 48716f7f0d8b9eae647a76ba93b07fe76a79f7bd..d0851cb4fa8d2c967662d9bcdc33b3b859eff9cc 100644 (file)
 #include <stdbool.h>
 #include <string.h>
 #include <linux/bpf.h>
+
 #include "libbpf.h"
 #include "bpf_load.h"
+#include "bpf_util.h"
 
 #define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
 
@@ -20,7 +22,7 @@
 
 static void clear_stats(int fd)
 {
-       unsigned int nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
+       unsigned int nr_cpus = bpf_num_possible_cpus();
        __u64 values[nr_cpus];
        __u32 key;
 
@@ -77,7 +79,7 @@ static void print_banner(void)
 
 static void print_hist(int fd)
 {
-       unsigned int nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
+       unsigned int nr_cpus = bpf_num_possible_cpus();
        __u64 total_events = 0;
        long values[nr_cpus];
        __u64 max_cnt = 0;
index a5e109e398a1f8fb08b6cc1f42190852ada841e2..2b2150d6d6f78496a7bf7cc0026b6b9258f5d39c 100644 (file)
@@ -15,7 +15,9 @@
 #include <string.h>
 #include <sys/socket.h>
 #include <unistd.h>
+
 #include "bpf_load.h"
+#include "bpf_util.h"
 #include "libbpf.h"
 
 static int set_link_xdp_fd(int ifindex, int fd)
@@ -120,7 +122,7 @@ static void int_exit(int sig)
  */
 static void poll_stats(int interval)
 {
-       unsigned int nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
+       unsigned int nr_cpus = bpf_num_possible_cpus();
        const unsigned int nr_keys = 256;
        __u64 values[nr_cpus], prev[nr_keys][nr_cpus];
        __u32 key;
diff --git a/samples/mei/.gitignore b/samples/mei/.gitignore
new file mode 100644 (file)
index 0000000..f356b81
--- /dev/null
@@ -0,0 +1 @@
+mei-amt-version
diff --git a/samples/mei/Makefile b/samples/mei/Makefile
new file mode 100644 (file)
index 0000000..7aac216
--- /dev/null
@@ -0,0 +1,9 @@
+CC := $(CROSS_COMPILE)gcc
+CFLAGS := -I../../usr/include
+
+PROGS := mei-amt-version
+
+all: $(PROGS)
+
+clean:
+       rm -fr $(PROGS)
diff --git a/samples/mei/TODO b/samples/mei/TODO
new file mode 100644 (file)
index 0000000..6b3625d
--- /dev/null
@@ -0,0 +1,2 @@
+TODO:
+       - Cleanup and split the timer function
diff --git a/samples/mei/mei-amt-version.c b/samples/mei/mei-amt-version.c
new file mode 100644 (file)
index 0000000..57d0d87
--- /dev/null
@@ -0,0 +1,479 @@
+/******************************************************************************
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ * Intel MEI Interface Header
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2012 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110,
+ * USA
+ *
+ * The full GNU General Public License is included in this distribution
+ * in the file called LICENSE.GPL.
+ *
+ * Contact Information:
+ *     Intel Corporation.
+ *     linux-mei@linux.intel.com
+ *     http://www.intel.com
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2003 - 2012 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  * Neither the name Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *****************************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <unistd.h>
+#include <errno.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <bits/wordsize.h>
+#include <linux/mei.h>
+
+/*****************************************************************************
+ * Intel Management Engine Interface
+ *****************************************************************************/
+
+#define mei_msg(_me, fmt, ARGS...) do {         \
+       if (_me->verbose)                       \
+               fprintf(stderr, fmt, ##ARGS);   \
+} while (0)
+
+#define mei_err(_me, fmt, ARGS...) do {         \
+       fprintf(stderr, "Error: " fmt, ##ARGS); \
+} while (0)
+
+struct mei {
+       uuid_le guid;
+       bool initialized;
+       bool verbose;
+       unsigned int buf_size;
+       unsigned char prot_ver;
+       int fd;
+};
+
+static void mei_deinit(struct mei *cl)
+{
+       if (cl->fd != -1)
+               close(cl->fd);
+       cl->fd = -1;
+       cl->buf_size = 0;
+       cl->prot_ver = 0;
+       cl->initialized = false;
+}
+
+static bool mei_init(struct mei *me, const uuid_le *guid,
+               unsigned char req_protocol_version, bool verbose)
+{
+       int result;
+       struct mei_client *cl;
+       struct mei_connect_client_data data;
+
+       me->verbose = verbose;
+
+       me->fd = open("/dev/mei", O_RDWR);
+       if (me->fd == -1) {
+               mei_err(me, "Cannot establish a handle to the Intel MEI driver\n");
+               goto err;
+       }
+       memcpy(&me->guid, guid, sizeof(*guid));
+       memset(&data, 0, sizeof(data));
+       me->initialized = true;
+
+       memcpy(&data.in_client_uuid, &me->guid, sizeof(me->guid));
+       result = ioctl(me->fd, IOCTL_MEI_CONNECT_CLIENT, &data);
+       if (result) {
+               mei_err(me, "IOCTL_MEI_CONNECT_CLIENT receive message. err=%d\n", result);
+               goto err;
+       }
+       cl = &data.out_client_properties;
+       mei_msg(me, "max_message_length %d\n", cl->max_msg_length);
+       mei_msg(me, "protocol_version %d\n", cl->protocol_version);
+
+       if ((req_protocol_version > 0) &&
+            (cl->protocol_version != req_protocol_version)) {
+               mei_err(me, "Intel MEI protocol version not supported\n");
+               goto err;
+       }
+
+       me->buf_size = cl->max_msg_length;
+       me->prot_ver = cl->protocol_version;
+
+       return true;
+err:
+       mei_deinit(me);
+       return false;
+}
+
+static ssize_t mei_recv_msg(struct mei *me, unsigned char *buffer,
+                       ssize_t len, unsigned long timeout)
+{
+       ssize_t rc;
+
+       mei_msg(me, "call read length = %zd\n", len);
+
+       rc = read(me->fd, buffer, len);
+       if (rc < 0) {
+               mei_err(me, "read failed with status %zd %s\n",
+                               rc, strerror(errno));
+               mei_deinit(me);
+       } else {
+               mei_msg(me, "read succeeded with result %zd\n", rc);
+       }
+       return rc;
+}
+
+static ssize_t mei_send_msg(struct mei *me, const unsigned char *buffer,
+                       ssize_t len, unsigned long timeout)
+{
+       struct timeval tv;
+       ssize_t written;
+       ssize_t rc;
+       fd_set set;
+
+       tv.tv_sec = timeout / 1000;
+       tv.tv_usec = (timeout % 1000) * 1000000;
+
+       mei_msg(me, "call write length = %zd\n", len);
+
+       written = write(me->fd, buffer, len);
+       if (written < 0) {
+               rc = -errno;
+               mei_err(me, "write failed with status %zd %s\n",
+                       written, strerror(errno));
+               goto out;
+       }
+
+       FD_ZERO(&set);
+       FD_SET(me->fd, &set);
+       rc = select(me->fd + 1 , &set, NULL, NULL, &tv);
+       if (rc > 0 && FD_ISSET(me->fd, &set)) {
+               mei_msg(me, "write success\n");
+       } else if (rc == 0) {
+               mei_err(me, "write failed on timeout with status\n");
+               goto out;
+       } else { /* rc < 0 */
+               mei_err(me, "write failed on select with status %zd\n", rc);
+               goto out;
+       }
+
+       rc = written;
+out:
+       if (rc < 0)
+               mei_deinit(me);
+
+       return rc;
+}
+
+/***************************************************************************
+ * Intel Advanced Management Technology ME Client
+ ***************************************************************************/
+
+#define AMT_MAJOR_VERSION 1
+#define AMT_MINOR_VERSION 1
+
+#define AMT_STATUS_SUCCESS                0x0
+#define AMT_STATUS_INTERNAL_ERROR         0x1
+#define AMT_STATUS_NOT_READY              0x2
+#define AMT_STATUS_INVALID_AMT_MODE       0x3
+#define AMT_STATUS_INVALID_MESSAGE_LENGTH 0x4
+
+#define AMT_STATUS_HOST_IF_EMPTY_RESPONSE  0x4000
+#define AMT_STATUS_SDK_RESOURCES      0x1004
+
+
+#define AMT_BIOS_VERSION_LEN   65
+#define AMT_VERSIONS_NUMBER    50
+#define AMT_UNICODE_STRING_LEN 20
+
+struct amt_unicode_string {
+       uint16_t length;
+       char string[AMT_UNICODE_STRING_LEN];
+} __attribute__((packed));
+
+struct amt_version_type {
+       struct amt_unicode_string description;
+       struct amt_unicode_string version;
+} __attribute__((packed));
+
+struct amt_version {
+       uint8_t major;
+       uint8_t minor;
+} __attribute__((packed));
+
+struct amt_code_versions {
+       uint8_t bios[AMT_BIOS_VERSION_LEN];
+       uint32_t count;
+       struct amt_version_type versions[AMT_VERSIONS_NUMBER];
+} __attribute__((packed));
+
+/***************************************************************************
+ * Intel Advanced Management Technology Host Interface
+ ***************************************************************************/
+
+struct amt_host_if_msg_header {
+       struct amt_version version;
+       uint16_t _reserved;
+       uint32_t command;
+       uint32_t length;
+} __attribute__((packed));
+
+struct amt_host_if_resp_header {
+       struct amt_host_if_msg_header header;
+       uint32_t status;
+       unsigned char data[0];
+} __attribute__((packed));
+
+const uuid_le MEI_IAMTHIF = UUID_LE(0x12f80028, 0xb4b7, 0x4b2d,  \
+                               0xac, 0xa8, 0x46, 0xe0, 0xff, 0x65, 0x81, 0x4c);
+
+#define AMT_HOST_IF_CODE_VERSIONS_REQUEST  0x0400001A
+#define AMT_HOST_IF_CODE_VERSIONS_RESPONSE 0x0480001A
+
+const struct amt_host_if_msg_header CODE_VERSION_REQ = {
+       .version = {AMT_MAJOR_VERSION, AMT_MINOR_VERSION},
+       ._reserved = 0,
+       .command = AMT_HOST_IF_CODE_VERSIONS_REQUEST,
+       .length = 0
+};
+
+
+struct amt_host_if {
+       struct mei mei_cl;
+       unsigned long send_timeout;
+       bool initialized;
+};
+
+
+static bool amt_host_if_init(struct amt_host_if *acmd,
+                     unsigned long send_timeout, bool verbose)
+{
+       acmd->send_timeout = (send_timeout) ? send_timeout : 20000;
+       acmd->initialized = mei_init(&acmd->mei_cl, &MEI_IAMTHIF, 0, verbose);
+       return acmd->initialized;
+}
+
+static void amt_host_if_deinit(struct amt_host_if *acmd)
+{
+       mei_deinit(&acmd->mei_cl);
+       acmd->initialized = false;
+}
+
+static uint32_t amt_verify_code_versions(const struct amt_host_if_resp_header *resp)
+{
+       uint32_t status = AMT_STATUS_SUCCESS;
+       struct amt_code_versions *code_ver;
+       size_t code_ver_len;
+       uint32_t ver_type_cnt;
+       uint32_t len;
+       uint32_t i;
+
+       code_ver = (struct amt_code_versions *)resp->data;
+       /* length - sizeof(status) */
+       code_ver_len = resp->header.length - sizeof(uint32_t);
+       ver_type_cnt = code_ver_len -
+                       sizeof(code_ver->bios) -
+                       sizeof(code_ver->count);
+       if (code_ver->count != ver_type_cnt / sizeof(struct amt_version_type)) {
+               status = AMT_STATUS_INTERNAL_ERROR;
+               goto out;
+       }
+
+       for (i = 0; i < code_ver->count; i++) {
+               len = code_ver->versions[i].description.length;
+
+               if (len > AMT_UNICODE_STRING_LEN) {
+                       status = AMT_STATUS_INTERNAL_ERROR;
+                       goto out;
+               }
+
+               len = code_ver->versions[i].version.length;
+               if (code_ver->versions[i].version.string[len] != '\0' ||
+                   len != strlen(code_ver->versions[i].version.string)) {
+                       status = AMT_STATUS_INTERNAL_ERROR;
+                       goto out;
+               }
+       }
+out:
+       return status;
+}
+
+static uint32_t amt_verify_response_header(uint32_t command,
+                               const struct amt_host_if_msg_header *resp_hdr,
+                               uint32_t response_size)
+{
+       if (response_size < sizeof(struct amt_host_if_resp_header)) {
+               return AMT_STATUS_INTERNAL_ERROR;
+       } else if (response_size != (resp_hdr->length +
+                               sizeof(struct amt_host_if_msg_header))) {
+               return AMT_STATUS_INTERNAL_ERROR;
+       } else if (resp_hdr->command != command) {
+               return AMT_STATUS_INTERNAL_ERROR;
+       } else if (resp_hdr->_reserved != 0) {
+               return AMT_STATUS_INTERNAL_ERROR;
+       } else if (resp_hdr->version.major != AMT_MAJOR_VERSION ||
+                  resp_hdr->version.minor < AMT_MINOR_VERSION) {
+               return AMT_STATUS_INTERNAL_ERROR;
+       }
+       return AMT_STATUS_SUCCESS;
+}
+
+static uint32_t amt_host_if_call(struct amt_host_if *acmd,
+                       const unsigned char *command, ssize_t command_sz,
+                       uint8_t **read_buf, uint32_t rcmd,
+                       unsigned int expected_sz)
+{
+       uint32_t in_buf_sz;
+       uint32_t out_buf_sz;
+       ssize_t written;
+       uint32_t status;
+       struct amt_host_if_resp_header *msg_hdr;
+
+       in_buf_sz = acmd->mei_cl.buf_size;
+       *read_buf = (uint8_t *)malloc(sizeof(uint8_t) * in_buf_sz);
+       if (*read_buf == NULL)
+               return AMT_STATUS_SDK_RESOURCES;
+       memset(*read_buf, 0, in_buf_sz);
+       msg_hdr = (struct amt_host_if_resp_header *)*read_buf;
+
+       written = mei_send_msg(&acmd->mei_cl,
+                               command, command_sz, acmd->send_timeout);
+       if (written != command_sz)
+               return AMT_STATUS_INTERNAL_ERROR;
+
+       out_buf_sz = mei_recv_msg(&acmd->mei_cl, *read_buf, in_buf_sz, 2000);
+       if (out_buf_sz <= 0)
+               return AMT_STATUS_HOST_IF_EMPTY_RESPONSE;
+
+       status = msg_hdr->status;
+       if (status != AMT_STATUS_SUCCESS)
+               return status;
+
+       status = amt_verify_response_header(rcmd,
+                               &msg_hdr->header, out_buf_sz);
+       if (status != AMT_STATUS_SUCCESS)
+               return status;
+
+       if (expected_sz && expected_sz != out_buf_sz)
+               return AMT_STATUS_INTERNAL_ERROR;
+
+       return AMT_STATUS_SUCCESS;
+}
+
+
+static uint32_t amt_get_code_versions(struct amt_host_if *cmd,
+                              struct amt_code_versions *versions)
+{
+       struct amt_host_if_resp_header *response = NULL;
+       uint32_t status;
+
+       status = amt_host_if_call(cmd,
+                       (const unsigned char *)&CODE_VERSION_REQ,
+                       sizeof(CODE_VERSION_REQ),
+                       (uint8_t **)&response,
+                       AMT_HOST_IF_CODE_VERSIONS_RESPONSE, 0);
+
+       if (status != AMT_STATUS_SUCCESS)
+               goto out;
+
+       status = amt_verify_code_versions(response);
+       if (status != AMT_STATUS_SUCCESS)
+               goto out;
+
+       memcpy(versions, response->data, sizeof(struct amt_code_versions));
+out:
+       if (response != NULL)
+               free(response);
+
+       return status;
+}
+
+/************************** end of amt_host_if_command ***********************/
+int main(int argc, char **argv)
+{
+       struct amt_code_versions ver;
+       struct amt_host_if acmd;
+       unsigned int i;
+       uint32_t status;
+       int ret;
+       bool verbose;
+
+       verbose = (argc > 1 && strcmp(argv[1], "-v") == 0);
+
+       if (!amt_host_if_init(&acmd, 5000, verbose)) {
+               ret = 1;
+               goto out;
+       }
+
+       status = amt_get_code_versions(&acmd, &ver);
+
+       amt_host_if_deinit(&acmd);
+
+       switch (status) {
+       case AMT_STATUS_HOST_IF_EMPTY_RESPONSE:
+               printf("Intel AMT: DISABLED\n");
+               ret = 0;
+               break;
+       case AMT_STATUS_SUCCESS:
+               printf("Intel AMT: ENABLED\n");
+               for (i = 0; i < ver.count; i++) {
+                       printf("%s:\t%s\n", ver.versions[i].description.string,
+                               ver.versions[i].version.string);
+               }
+               ret = 0;
+               break;
+       default:
+               printf("An error has occurred\n");
+               ret = 1;
+               break;
+       }
+
+out:
+       return ret;
+}
diff --git a/samples/mic/mpssd/.gitignore b/samples/mic/mpssd/.gitignore
new file mode 100644 (file)
index 0000000..8b7c72f
--- /dev/null
@@ -0,0 +1 @@
+mpssd
diff --git a/samples/mic/mpssd/Makefile b/samples/mic/mpssd/Makefile
new file mode 100644 (file)
index 0000000..3e3ef91
--- /dev/null
@@ -0,0 +1,27 @@
+ifndef CROSS_COMPILE
+uname_M := $(shell uname -m 2>/dev/null || echo not)
+ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
+
+ifeq ($(ARCH),x86)
+
+PROGS := mpssd
+CC = $(CROSS_COMPILE)gcc
+CFLAGS := -I../../../usr/include -I../../../tools/include
+
+ifdef DEBUG
+CFLAGS += -DDEBUG=$(DEBUG)
+endif
+
+all: $(PROGS)
+mpssd: mpssd.c sysfs.c
+       $(CC) $(CFLAGS) mpssd.c sysfs.c -o mpssd -lpthread
+
+install:
+       install mpssd /usr/sbin/mpssd
+       install micctrl /usr/sbin/micctrl
+
+clean:
+       rm -fr $(PROGS)
+
+endif
+endif
diff --git a/samples/mic/mpssd/micctrl b/samples/mic/mpssd/micctrl
new file mode 100755 (executable)
index 0000000..8f2629b
--- /dev/null
@@ -0,0 +1,173 @@
+#!/bin/bash
+# Intel MIC Platform Software Stack (MPSS)
+#
+# Copyright(c) 2013 Intel Corporation.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License, version 2, as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# The full GNU General Public License is included in this distribution in
+# the file called "COPYING".
+#
+# Intel MIC User Space Tools.
+#
+# micctrl - Controls MIC boot/start/stop.
+#
+# chkconfig: 2345 95 05
+# description: start MPSS stack processing.
+#
+### BEGIN INIT INFO
+# Provides: micctrl
+### END INIT INFO
+
+# Source function library.
+. /etc/init.d/functions
+
+sysfs="/sys/class/mic"
+
+_status()
+{
+       f=$sysfs/$1
+       echo -e $1 state: "`cat $f/state`" shutdown_status: "`cat $f/shutdown_status`"
+}
+
+status()
+{
+       if [ "`echo $1 | head -c3`" == "mic" ]; then
+               _status $1
+               return $?
+       fi
+       for f in $sysfs/*
+       do
+               _status `basename $f`
+               RETVAL=$?
+               [ $RETVAL -ne 0 ] && return $RETVAL
+       done
+       return 0
+}
+
+_reset()
+{
+       f=$sysfs/$1
+       echo reset > $f/state
+}
+
+reset()
+{
+       if [ "`echo $1 | head -c3`" == "mic" ]; then
+               _reset $1
+               return $?
+       fi
+       for f in $sysfs/*
+       do
+               _reset `basename $f`
+               RETVAL=$?
+               [ $RETVAL -ne 0 ] && return $RETVAL
+       done
+       return 0
+}
+
+_boot()
+{
+       f=$sysfs/$1
+       echo "linux" > $f/bootmode
+       echo "mic/uos.img" > $f/firmware
+       echo "mic/$1.image" > $f/ramdisk
+       echo "boot" > $f/state
+}
+
+boot()
+{
+       if [ "`echo $1 | head -c3`" == "mic" ]; then
+               _boot $1
+               return $?
+       fi
+       for f in $sysfs/*
+       do
+               _boot `basename $f`
+               RETVAL=$?
+               [ $RETVAL -ne 0 ] && return $RETVAL
+       done
+       return 0
+}
+
+_shutdown()
+{
+       f=$sysfs/$1
+       echo shutdown > $f/state
+}
+
+shutdown()
+{
+       if [ "`echo $1 | head -c3`" == "mic" ]; then
+               _shutdown $1
+               return $?
+       fi
+       for f in $sysfs/*
+       do
+               _shutdown `basename $f`
+               RETVAL=$?
+               [ $RETVAL -ne 0 ] && return $RETVAL
+       done
+       return 0
+}
+
+_wait()
+{
+       f=$sysfs/$1
+       while [ "`cat $f/state`" != "offline" -a "`cat $f/state`" != "online" ]
+       do
+               sleep 1
+               echo -e "Waiting for $1 to go offline"
+       done
+}
+
+wait()
+{
+       if [ "`echo $1 | head -c3`" == "mic" ]; then
+               _wait $1
+               return $?
+       fi
+       # Wait for the cards to go offline
+       for f in $sysfs/*
+       do
+               _wait `basename $f`
+               RETVAL=$?
+               [ $RETVAL -ne 0 ] && return $RETVAL
+       done
+       return 0
+}
+
+if [ ! -d "$sysfs" ]; then
+       echo -e $"Module unloaded "
+       exit 3
+fi
+
+case $1 in
+       -s)
+               status $2
+               ;;
+       -r)
+               reset $2
+               ;;
+       -b)
+               boot $2
+               ;;
+       -S)
+               shutdown $2
+               ;;
+       -w)
+               wait $2
+               ;;
+       *)
+               echo $"Usage: $0 {-s (status) |-r (reset) |-b (boot) |-S (shutdown) |-w (wait)}"
+               exit 2
+esac
+
+exit $?
diff --git a/samples/mic/mpssd/mpss b/samples/mic/mpssd/mpss
new file mode 100755 (executable)
index 0000000..5fcf9fa
--- /dev/null
@@ -0,0 +1,200 @@
+#!/bin/bash
+# Intel MIC Platform Software Stack (MPSS)
+#
+# Copyright(c) 2013 Intel Corporation.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License, version 2, as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# The full GNU General Public License is included in this distribution in
+# the file called "COPYING".
+#
+# Intel MIC User Space Tools.
+#
+# mpss Start mpssd.
+#
+# chkconfig: 2345 95 05
+# description: start MPSS stack processing.
+#
+### BEGIN INIT INFO
+# Provides: mpss
+# Required-Start:
+# Required-Stop:
+# Short-Description: MPSS stack control
+# Description: MPSS stack control
+### END INIT INFO
+
+# Source function library.
+. /etc/init.d/functions
+
+exec=/usr/sbin/mpssd
+sysfs="/sys/class/mic"
+mic_modules="mic_host mic_x100_dma scif vop"
+
+start()
+{
+       [ -x $exec ] || exit 5
+
+       if [ "`ps -e | awk '{print $4}' | grep mpssd | head -1`" = "mpssd" ]; then
+               echo -e $"MPSSD already running! "
+               success
+               echo
+               return 0
+       fi
+
+       echo -e $"Starting MPSS Stack"
+       echo -e $"Loading MIC drivers:" $mic_modules
+
+       modprobe -a $mic_modules
+       RETVAL=$?
+       if [ $RETVAL -ne 0 ]; then
+               failure
+               echo
+               return $RETVAL
+       fi
+
+       # Start the daemon
+       echo -n $"Starting MPSSD "
+       $exec
+       RETVAL=$?
+       if [ $RETVAL -ne 0 ]; then
+               failure
+               echo
+               return $RETVAL
+       fi
+       success
+       echo
+
+       sleep 5
+
+       # Boot the cards
+       micctrl -b
+
+       # Wait till ping works
+       for f in $sysfs/*
+       do
+               count=100
+               ipaddr=`cat $f/cmdline`
+               ipaddr=${ipaddr#*address,}
+               ipaddr=`echo $ipaddr | cut -d, -f1 | cut -d\; -f1`
+               while [ $count -ge 0 ]
+               do
+                       echo -e "Pinging "`basename $f`" "
+                       ping -c 1 $ipaddr &> /dev/null
+                       RETVAL=$?
+                       if [ $RETVAL -eq 0 ]; then
+                               success
+                               break
+                       fi
+                       sleep 1
+                       count=`expr $count - 1`
+               done
+               [ $RETVAL -ne 0 ] && failure || success
+               echo
+       done
+       return $RETVAL
+}
+
+stop()
+{
+       echo -e $"Shutting down MPSS Stack: "
+
+       # Bail out if module is unloaded
+       if [ ! -d "$sysfs" ]; then
+               echo -n $"Module unloaded "
+               success
+               echo
+               return 0
+       fi
+
+       # Shut down the cards.
+       micctrl -S
+
+       # Wait for the cards to go offline
+       for f in $sysfs/*
+       do
+               while [ "`cat $f/state`" != "ready" ]
+               do
+                       sleep 1
+                       echo -e "Waiting for "`basename $f`" to become ready"
+               done
+       done
+
+       # Display the status of the cards
+       micctrl -s
+
+       # Kill MPSSD now
+       echo -n $"Killing MPSSD"
+       killall -9 mpssd 2>/dev/null
+       RETVAL=$?
+       [ $RETVAL -ne 0 ] && failure || success
+       echo
+       return $RETVAL
+}
+
+restart()
+{
+       stop
+       sleep 5
+       start
+}
+
+status()
+{
+       micctrl -s
+       if [ "`ps -e | awk '{print $4}' | grep mpssd | head -n 1`" = "mpssd" ]; then
+               echo "mpssd is running"
+       else
+               echo "mpssd is stopped"
+       fi
+       return 0
+}
+
+unload()
+{
+       if [ ! -d "$sysfs" ]; then
+               echo -n $"No MIC_HOST Module: "
+               success
+               echo
+               return
+       fi
+
+       stop
+
+       sleep 5
+       echo -n $"Removing MIC drivers:" $mic_modules
+       modprobe -r $mic_modules
+       RETVAL=$?
+       [ $RETVAL -ne 0 ] && failure || success
+       echo
+       return $RETVAL
+}
+
+case $1 in
+       start)
+               start
+               ;;
+       stop)
+               stop
+               ;;
+       restart)
+               restart
+               ;;
+       status)
+               status
+               ;;
+       unload)
+               unload
+               ;;
+       *)
+               echo $"Usage: $0 {start|stop|restart|status|unload}"
+               exit 2
+esac
+
+exit $?
diff --git a/samples/mic/mpssd/mpssd.c b/samples/mic/mpssd/mpssd.c
new file mode 100644 (file)
index 0000000..49db1de
--- /dev/null
@@ -0,0 +1,1826 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC User Space Tools.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdlib.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <assert.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <signal.h>
+#include <poll.h>
+#include <features.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <linux/virtio_ring.h>
+#include <linux/virtio_net.h>
+#include <linux/virtio_console.h>
+#include <linux/virtio_blk.h>
+#include <linux/version.h>
+#include "mpssd.h"
+#include <linux/mic_ioctl.h>
+#include <linux/mic_common.h>
+#include <tools/endian.h>
+
+static void *init_mic(void *arg);
+
+static FILE *logfp;
+static struct mic_info mic_list;
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+
+#define min_t(type, x, y) ({                           \
+               type __min1 = (x);                      \
+               type __min2 = (y);                      \
+               __min1 < __min2 ? __min1 : __min2; })
+
+/* align addr on a size boundary - adjust address up/down if needed */
+#define _ALIGN_DOWN(addr, size)  ((addr)&(~((size)-1)))
+#define _ALIGN_UP(addr, size)    _ALIGN_DOWN(addr + size - 1, size)
+
+/* align addr on a size boundary - adjust address up if needed */
+#define _ALIGN(addr, size)     _ALIGN_UP(addr, size)
+
+/* to align the pointer to the (next) page boundary */
+#define PAGE_ALIGN(addr)        _ALIGN(addr, PAGE_SIZE)
+
+#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
+
+#define GSO_ENABLED            1
+#define MAX_GSO_SIZE           (64 * 1024)
+#define ETH_H_LEN              14
+#define MAX_NET_PKT_SIZE       (_ALIGN_UP(MAX_GSO_SIZE + ETH_H_LEN, 64))
+#define MIC_DEVICE_PAGE_END    0x1000
+
+#ifndef VIRTIO_NET_HDR_F_DATA_VALID
+#define VIRTIO_NET_HDR_F_DATA_VALID    2       /* Csum is valid */
+#endif
+
+static struct {
+       struct mic_device_desc dd;
+       struct mic_vqconfig vqconfig[2];
+       __u32 host_features, guest_acknowledgements;
+       struct virtio_console_config cons_config;
+} virtcons_dev_page = {
+       .dd = {
+               .type = VIRTIO_ID_CONSOLE,
+               .num_vq = ARRAY_SIZE(virtcons_dev_page.vqconfig),
+               .feature_len = sizeof(virtcons_dev_page.host_features),
+               .config_len = sizeof(virtcons_dev_page.cons_config),
+       },
+       .vqconfig[0] = {
+               .num = htole16(MIC_VRING_ENTRIES),
+       },
+       .vqconfig[1] = {
+               .num = htole16(MIC_VRING_ENTRIES),
+       },
+};
+
+static struct {
+       struct mic_device_desc dd;
+       struct mic_vqconfig vqconfig[2];
+       __u32 host_features, guest_acknowledgements;
+       struct virtio_net_config net_config;
+} virtnet_dev_page = {
+       .dd = {
+               .type = VIRTIO_ID_NET,
+               .num_vq = ARRAY_SIZE(virtnet_dev_page.vqconfig),
+               .feature_len = sizeof(virtnet_dev_page.host_features),
+               .config_len = sizeof(virtnet_dev_page.net_config),
+       },
+       .vqconfig[0] = {
+               .num = htole16(MIC_VRING_ENTRIES),
+       },
+       .vqconfig[1] = {
+               .num = htole16(MIC_VRING_ENTRIES),
+       },
+#if GSO_ENABLED
+       .host_features = htole32(
+               1 << VIRTIO_NET_F_CSUM |
+               1 << VIRTIO_NET_F_GSO |
+               1 << VIRTIO_NET_F_GUEST_TSO4 |
+               1 << VIRTIO_NET_F_GUEST_TSO6 |
+               1 << VIRTIO_NET_F_GUEST_ECN),
+#else
+               .host_features = 0,
+#endif
+};
+
+static const char *mic_config_dir = "/etc/mpss";
+static const char *virtblk_backend = "VIRTBLK_BACKEND";
+static struct {
+       struct mic_device_desc dd;
+       struct mic_vqconfig vqconfig[1];
+       __u32 host_features, guest_acknowledgements;
+       struct virtio_blk_config blk_config;
+} virtblk_dev_page = {
+       .dd = {
+               .type = VIRTIO_ID_BLOCK,
+               .num_vq = ARRAY_SIZE(virtblk_dev_page.vqconfig),
+               .feature_len = sizeof(virtblk_dev_page.host_features),
+               .config_len = sizeof(virtblk_dev_page.blk_config),
+       },
+       .vqconfig[0] = {
+               .num = htole16(MIC_VRING_ENTRIES),
+       },
+       .host_features =
+               htole32(1<<VIRTIO_BLK_F_SEG_MAX),
+       .blk_config = {
+               .seg_max = htole32(MIC_VRING_ENTRIES - 2),
+               .capacity = htole64(0),
+        }
+};
+
+static char *myname;
+
+static int
+tap_configure(struct mic_info *mic, char *dev)
+{
+       pid_t pid;
+       char *ifargv[7];
+       char ipaddr[IFNAMSIZ];
+       int ret = 0;
+
+       pid = fork();
+       if (pid == 0) {
+               ifargv[0] = "ip";
+               ifargv[1] = "link";
+               ifargv[2] = "set";
+               ifargv[3] = dev;
+               ifargv[4] = "up";
+               ifargv[5] = NULL;
+               mpsslog("Configuring %s\n", dev);
+               ret = execvp("ip", ifargv);
+               if (ret < 0) {
+                       mpsslog("%s execvp failed errno %s\n",
+                               mic->name, strerror(errno));
+                       return ret;
+               }
+       }
+       if (pid < 0) {
+               mpsslog("%s fork failed errno %s\n",
+                       mic->name, strerror(errno));
+               return ret;
+       }
+
+       ret = waitpid(pid, NULL, 0);
+       if (ret < 0) {
+               mpsslog("%s waitpid failed errno %s\n",
+                       mic->name, strerror(errno));
+               return ret;
+       }
+
+       snprintf(ipaddr, IFNAMSIZ, "172.31.%d.254/24", mic->id + 1);
+
+       pid = fork();
+       if (pid == 0) {
+               ifargv[0] = "ip";
+               ifargv[1] = "addr";
+               ifargv[2] = "add";
+               ifargv[3] = ipaddr;
+               ifargv[4] = "dev";
+               ifargv[5] = dev;
+               ifargv[6] = NULL;
+               mpsslog("Configuring %s ipaddr %s\n", dev, ipaddr);
+               ret = execvp("ip", ifargv);
+               if (ret < 0) {
+                       mpsslog("%s execvp failed errno %s\n",
+                               mic->name, strerror(errno));
+                       return ret;
+               }
+       }
+       if (pid < 0) {
+               mpsslog("%s fork failed errno %s\n",
+                       mic->name, strerror(errno));
+               return ret;
+       }
+
+       ret = waitpid(pid, NULL, 0);
+       if (ret < 0) {
+               mpsslog("%s waitpid failed errno %s\n",
+                       mic->name, strerror(errno));
+               return ret;
+       }
+       mpsslog("MIC name %s %s %d DONE!\n",
+               mic->name, __func__, __LINE__);
+       return 0;
+}
+
+static int tun_alloc(struct mic_info *mic, char *dev)
+{
+       struct ifreq ifr;
+       int fd, err;
+#if GSO_ENABLED
+       unsigned offload;
+#endif
+       fd = open("/dev/net/tun", O_RDWR);
+       if (fd < 0) {
+               mpsslog("Could not open /dev/net/tun %s\n", strerror(errno));
+               goto done;
+       }
+
+       memset(&ifr, 0, sizeof(ifr));
+
+       ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
+       if (*dev)
+               strncpy(ifr.ifr_name, dev, IFNAMSIZ);
+
+       err = ioctl(fd, TUNSETIFF, (void *)&ifr);
+       if (err < 0) {
+               mpsslog("%s %s %d TUNSETIFF failed %s\n",
+                       mic->name, __func__, __LINE__, strerror(errno));
+               close(fd);
+               return err;
+       }
+#if GSO_ENABLED
+       offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_TSO_ECN;
+
+       err = ioctl(fd, TUNSETOFFLOAD, offload);
+       if (err < 0) {
+               mpsslog("%s %s %d TUNSETOFFLOAD failed %s\n",
+                       mic->name, __func__, __LINE__, strerror(errno));
+               close(fd);
+               return err;
+       }
+#endif
+       strcpy(dev, ifr.ifr_name);
+       mpsslog("Created TAP %s\n", dev);
+done:
+       return fd;
+}
+
+#define NET_FD_VIRTIO_NET 0
+#define NET_FD_TUN 1
+#define MAX_NET_FD 2
+
+static void set_dp(struct mic_info *mic, int type, void *dp)
+{
+       switch (type) {
+       case VIRTIO_ID_CONSOLE:
+               mic->mic_console.console_dp = dp;
+               return;
+       case VIRTIO_ID_NET:
+               mic->mic_net.net_dp = dp;
+               return;
+       case VIRTIO_ID_BLOCK:
+               mic->mic_virtblk.block_dp = dp;
+               return;
+       }
+       mpsslog("%s %s %d not found\n", mic->name, __func__, type);
+       assert(0);
+}
+
+static void *get_dp(struct mic_info *mic, int type)
+{
+       switch (type) {
+       case VIRTIO_ID_CONSOLE:
+               return mic->mic_console.console_dp;
+       case VIRTIO_ID_NET:
+               return mic->mic_net.net_dp;
+       case VIRTIO_ID_BLOCK:
+               return mic->mic_virtblk.block_dp;
+       }
+       mpsslog("%s %s %d not found\n", mic->name, __func__, type);
+       assert(0);
+       return NULL;
+}
+
+static struct mic_device_desc *get_device_desc(struct mic_info *mic, int type)
+{
+       struct mic_device_desc *d;
+       int i;
+       void *dp = get_dp(mic, type);
+
+       for (i = sizeof(struct mic_bootparam); i < PAGE_SIZE;
+               i += mic_total_desc_size(d)) {
+               d = dp + i;
+
+               /* End of list */
+               if (d->type == 0)
+                       break;
+
+               if (d->type == -1)
+                       continue;
+
+               mpsslog("%s %s d-> type %d d %p\n",
+                       mic->name, __func__, d->type, d);
+
+               if (d->type == (__u8)type)
+                       return d;
+       }
+       mpsslog("%s %s %d not found\n", mic->name, __func__, type);
+       return NULL;
+}
+
+/* See comments in vhost.c for explanation of next_desc() */
+static unsigned next_desc(struct vring_desc *desc)
+{
+       unsigned int next;
+
+       if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT))
+               return -1U;
+       next = le16toh(desc->next);
+       return next;
+}
+
+/* Sum up all the IOVEC length */
+static ssize_t
+sum_iovec_len(struct mic_copy_desc *copy)
+{
+       ssize_t sum = 0;
+       unsigned int i;
+
+       for (i = 0; i < copy->iovcnt; i++)
+               sum += copy->iov[i].iov_len;
+       return sum;
+}
+
+static inline void verify_out_len(struct mic_info *mic,
+       struct mic_copy_desc *copy)
+{
+       if (copy->out_len != sum_iovec_len(copy)) {
+               mpsslog("%s %s %d BUG copy->out_len 0x%x len 0x%zx\n",
+                       mic->name, __func__, __LINE__,
+                       copy->out_len, sum_iovec_len(copy));
+               assert(copy->out_len == sum_iovec_len(copy));
+       }
+}
+
+/* Display an iovec */
+static void
+disp_iovec(struct mic_info *mic, struct mic_copy_desc *copy,
+          const char *s, int line)
+{
+       unsigned int i;
+
+       for (i = 0; i < copy->iovcnt; i++)
+               mpsslog("%s %s %d copy->iov[%d] addr %p len 0x%zx\n",
+                       mic->name, s, line, i,
+                       copy->iov[i].iov_base, copy->iov[i].iov_len);
+}
+
+static inline __u16 read_avail_idx(struct mic_vring *vr)
+{
+       return ACCESS_ONCE(vr->info->avail_idx);
+}
+
+static inline void txrx_prepare(int type, bool tx, struct mic_vring *vr,
+                               struct mic_copy_desc *copy, ssize_t len)
+{
+       copy->vr_idx = tx ? 0 : 1;
+       copy->update_used = true;
+       if (type == VIRTIO_ID_NET)
+               copy->iov[1].iov_len = len - sizeof(struct virtio_net_hdr);
+       else
+               copy->iov[0].iov_len = len;
+}
+
+/* Central API which triggers the copies */
+static int
+mic_virtio_copy(struct mic_info *mic, int fd,
+               struct mic_vring *vr, struct mic_copy_desc *copy)
+{
+       int ret;
+
+       ret = ioctl(fd, MIC_VIRTIO_COPY_DESC, copy);
+       if (ret) {
+               mpsslog("%s %s %d errno %s ret %d\n",
+                       mic->name, __func__, __LINE__,
+                       strerror(errno), ret);
+       }
+       return ret;
+}
+
+static inline unsigned _vring_size(unsigned int num, unsigned long align)
+{
+       return ((sizeof(struct vring_desc) * num + sizeof(__u16) * (3 + num)
+                               + align - 1) & ~(align - 1))
+               + sizeof(__u16) * 3 + sizeof(struct vring_used_elem) * num;
+}
+
+/*
+ * This initialization routine requires at least one
+ * vring i.e. vr0. vr1 is optional.
+ */
+static void *
+init_vr(struct mic_info *mic, int fd, int type,
+       struct mic_vring *vr0, struct mic_vring *vr1, int num_vq)
+{
+       int vr_size;
+       char *va;
+
+       vr_size = PAGE_ALIGN(_vring_size(MIC_VRING_ENTRIES,
+                                        MIC_VIRTIO_RING_ALIGN) +
+                            sizeof(struct _mic_vring_info));
+       va = mmap(NULL, MIC_DEVICE_PAGE_END + vr_size * num_vq,
+               PROT_READ, MAP_SHARED, fd, 0);
+       if (MAP_FAILED == va) {
+               mpsslog("%s %s %d mmap failed errno %s\n",
+                       mic->name, __func__, __LINE__,
+                       strerror(errno));
+               goto done;
+       }
+       set_dp(mic, type, va);
+       vr0->va = (struct mic_vring *)&va[MIC_DEVICE_PAGE_END];
+       vr0->info = vr0->va +
+               _vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN);
+       vring_init(&vr0->vr,
+                  MIC_VRING_ENTRIES, vr0->va, MIC_VIRTIO_RING_ALIGN);
+       mpsslog("%s %s vr0 %p vr0->info %p vr_size 0x%x vring 0x%x ",
+               __func__, mic->name, vr0->va, vr0->info, vr_size,
+               _vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN));
+       mpsslog("magic 0x%x expected 0x%x\n",
+               le32toh(vr0->info->magic), MIC_MAGIC + type);
+       assert(le32toh(vr0->info->magic) == MIC_MAGIC + type);
+       if (vr1) {
+               vr1->va = (struct mic_vring *)
+                       &va[MIC_DEVICE_PAGE_END + vr_size];
+               vr1->info = vr1->va + _vring_size(MIC_VRING_ENTRIES,
+                       MIC_VIRTIO_RING_ALIGN);
+               vring_init(&vr1->vr,
+                          MIC_VRING_ENTRIES, vr1->va, MIC_VIRTIO_RING_ALIGN);
+               mpsslog("%s %s vr1 %p vr1->info %p vr_size 0x%x vring 0x%x ",
+                       __func__, mic->name, vr1->va, vr1->info, vr_size,
+                       _vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN));
+               mpsslog("magic 0x%x expected 0x%x\n",
+                       le32toh(vr1->info->magic), MIC_MAGIC + type + 1);
+               assert(le32toh(vr1->info->magic) == MIC_MAGIC + type + 1);
+       }
+done:
+       return va;
+}
+
+static int
+wait_for_card_driver(struct mic_info *mic, int fd, int type)
+{
+       struct pollfd pollfd;
+       int err;
+       struct mic_device_desc *desc = get_device_desc(mic, type);
+       __u8 prev_status;
+
+       if (!desc)
+               return -ENODEV;
+       prev_status = desc->status;
+       pollfd.fd = fd;
+       mpsslog("%s %s Waiting .... desc-> type %d status 0x%x\n",
+               mic->name, __func__, type, desc->status);
+
+       while (1) {
+               pollfd.events = POLLIN;
+               pollfd.revents = 0;
+               err = poll(&pollfd, 1, -1);
+               if (err < 0) {
+                       mpsslog("%s %s poll failed %s\n",
+                               mic->name, __func__, strerror(errno));
+                       continue;
+               }
+
+               if (pollfd.revents) {
+                       if (desc->status != prev_status) {
+                               mpsslog("%s %s Waiting... desc-> type %d "
+                                       "status 0x%x\n",
+                                       mic->name, __func__, type,
+                                       desc->status);
+                               prev_status = desc->status;
+                       }
+                       if (desc->status & VIRTIO_CONFIG_S_DRIVER_OK) {
+                               mpsslog("%s %s poll.revents %d\n",
+                                       mic->name, __func__, pollfd.revents);
+                               mpsslog("%s %s desc-> type %d status 0x%x\n",
+                                       mic->name, __func__, type,
+                                       desc->status);
+                               break;
+                       }
+               }
+       }
+       return 0;
+}
+
+/* Spin till we have some descriptors */
+static void
+spin_for_descriptors(struct mic_info *mic, struct mic_vring *vr)
+{
+       __u16 avail_idx = read_avail_idx(vr);
+
+       while (avail_idx == le16toh(ACCESS_ONCE(vr->vr.avail->idx))) {
+#ifdef DEBUG
+               mpsslog("%s %s waiting for desc avail %d info_avail %d\n",
+                       mic->name, __func__,
+                       le16toh(vr->vr.avail->idx), vr->info->avail_idx);
+#endif
+               sched_yield();
+       }
+}
+
+static void *
+virtio_net(void *arg)
+{
+       static __u8 vnet_hdr[2][sizeof(struct virtio_net_hdr)];
+       static __u8 vnet_buf[2][MAX_NET_PKT_SIZE] __attribute__ ((aligned(64)));
+       struct iovec vnet_iov[2][2] = {
+               { { .iov_base = vnet_hdr[0], .iov_len = sizeof(vnet_hdr[0]) },
+                 { .iov_base = vnet_buf[0], .iov_len = sizeof(vnet_buf[0]) } },
+               { { .iov_base = vnet_hdr[1], .iov_len = sizeof(vnet_hdr[1]) },
+                 { .iov_base = vnet_buf[1], .iov_len = sizeof(vnet_buf[1]) } },
+       };
+       struct iovec *iov0 = vnet_iov[0], *iov1 = vnet_iov[1];
+       struct mic_info *mic = (struct mic_info *)arg;
+       char if_name[IFNAMSIZ];
+       struct pollfd net_poll[MAX_NET_FD];
+       struct mic_vring tx_vr, rx_vr;
+       struct mic_copy_desc copy;
+       struct mic_device_desc *desc;
+       int err;
+
+       snprintf(if_name, IFNAMSIZ, "mic%d", mic->id);
+       mic->mic_net.tap_fd = tun_alloc(mic, if_name);
+       if (mic->mic_net.tap_fd < 0)
+               goto done;
+
+       if (tap_configure(mic, if_name))
+               goto done;
+       mpsslog("MIC name %s id %d\n", mic->name, mic->id);
+
+       net_poll[NET_FD_VIRTIO_NET].fd = mic->mic_net.virtio_net_fd;
+       net_poll[NET_FD_VIRTIO_NET].events = POLLIN;
+       net_poll[NET_FD_TUN].fd = mic->mic_net.tap_fd;
+       net_poll[NET_FD_TUN].events = POLLIN;
+
+       if (MAP_FAILED == init_vr(mic, mic->mic_net.virtio_net_fd,
+                                 VIRTIO_ID_NET, &tx_vr, &rx_vr,
+               virtnet_dev_page.dd.num_vq)) {
+               mpsslog("%s init_vr failed %s\n",
+                       mic->name, strerror(errno));
+               goto done;
+       }
+
+       copy.iovcnt = 2;
+       desc = get_device_desc(mic, VIRTIO_ID_NET);
+
+       while (1) {
+               ssize_t len;
+
+               net_poll[NET_FD_VIRTIO_NET].revents = 0;
+               net_poll[NET_FD_TUN].revents = 0;
+
+               /* Start polling for data from tap and virtio net */
+               err = poll(net_poll, 2, -1);
+               if (err < 0) {
+                       mpsslog("%s poll failed %s\n",
+                               __func__, strerror(errno));
+                       continue;
+               }
+               if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
+                       err = wait_for_card_driver(mic,
+                                                  mic->mic_net.virtio_net_fd,
+                                                  VIRTIO_ID_NET);
+                       if (err) {
+                               mpsslog("%s %s %d Exiting...\n",
+                                       mic->name, __func__, __LINE__);
+                               break;
+                       }
+               }
+               /*
+                * Check if there is data to be read from TUN and write to
+                * virtio net fd if there is.
+                */
+               if (net_poll[NET_FD_TUN].revents & POLLIN) {
+                       copy.iov = iov0;
+                       len = readv(net_poll[NET_FD_TUN].fd,
+                               copy.iov, copy.iovcnt);
+                       if (len > 0) {
+                               struct virtio_net_hdr *hdr
+                                       = (struct virtio_net_hdr *)vnet_hdr[0];
+
+                               /* Disable checksums on the card since we are on
+                                  a reliable PCIe link */
+                               hdr->flags |= VIRTIO_NET_HDR_F_DATA_VALID;
+#ifdef DEBUG
+                               mpsslog("%s %s %d hdr->flags 0x%x ", mic->name,
+                                       __func__, __LINE__, hdr->flags);
+                               mpsslog("copy.out_len %d hdr->gso_type 0x%x\n",
+                                       copy.out_len, hdr->gso_type);
+#endif
+#ifdef DEBUG
+                               disp_iovec(mic, copy, __func__, __LINE__);
+                               mpsslog("%s %s %d read from tap 0x%lx\n",
+                                       mic->name, __func__, __LINE__,
+                                       len);
+#endif
+                               spin_for_descriptors(mic, &tx_vr);
+                               txrx_prepare(VIRTIO_ID_NET, 1, &tx_vr, &copy,
+                                            len);
+
+                               err = mic_virtio_copy(mic,
+                                       mic->mic_net.virtio_net_fd, &tx_vr,
+                                       &copy);
+                               if (err < 0) {
+                                       mpsslog("%s %s %d mic_virtio_copy %s\n",
+                                               mic->name, __func__, __LINE__,
+                                               strerror(errno));
+                               }
+                               if (!err)
+                                       verify_out_len(mic, &copy);
+#ifdef DEBUG
+                               disp_iovec(mic, copy, __func__, __LINE__);
+                               mpsslog("%s %s %d wrote to net 0x%lx\n",
+                                       mic->name, __func__, __LINE__,
+                                       sum_iovec_len(&copy));
+#endif
+                               /* Reinitialize IOV for next run */
+                               iov0[1].iov_len = MAX_NET_PKT_SIZE;
+                       } else if (len < 0) {
+                               disp_iovec(mic, &copy, __func__, __LINE__);
+                               mpsslog("%s %s %d read failed %s ", mic->name,
+                                       __func__, __LINE__, strerror(errno));
+                               mpsslog("cnt %d sum %zd\n",
+                                       copy.iovcnt, sum_iovec_len(&copy));
+                       }
+               }
+
+               /*
+                * Check if there is data to be read from virtio net and
+                * write to TUN if there is.
+                */
+               if (net_poll[NET_FD_VIRTIO_NET].revents & POLLIN) {
+                       while (rx_vr.info->avail_idx !=
+                               le16toh(rx_vr.vr.avail->idx)) {
+                               copy.iov = iov1;
+                               txrx_prepare(VIRTIO_ID_NET, 0, &rx_vr, &copy,
+                                            MAX_NET_PKT_SIZE
+                                       + sizeof(struct virtio_net_hdr));
+
+                               err = mic_virtio_copy(mic,
+                                       mic->mic_net.virtio_net_fd, &rx_vr,
+                                       &copy);
+                               if (!err) {
+#ifdef DEBUG
+                                       struct virtio_net_hdr *hdr
+                                               = (struct virtio_net_hdr *)
+                                                       vnet_hdr[1];
+
+                                       mpsslog("%s %s %d hdr->flags 0x%x, ",
+                                               mic->name, __func__, __LINE__,
+                                               hdr->flags);
+                                       mpsslog("out_len %d gso_type 0x%x\n",
+                                               copy.out_len,
+                                               hdr->gso_type);
+#endif
+                                       /* Set the correct output iov_len */
+                                       iov1[1].iov_len = copy.out_len -
+                                               sizeof(struct virtio_net_hdr);
+                                       verify_out_len(mic, &copy);
+#ifdef DEBUG
+                                       disp_iovec(mic, copy, __func__,
+                                                  __LINE__);
+                                       mpsslog("%s %s %d ",
+                                               mic->name, __func__, __LINE__);
+                                       mpsslog("read from net 0x%lx\n",
+                                               sum_iovec_len(copy));
+#endif
+                                       len = writev(net_poll[NET_FD_TUN].fd,
+                                               copy.iov, copy.iovcnt);
+                                       if (len != sum_iovec_len(&copy)) {
+                                               mpsslog("Tun write failed %s ",
+                                                       strerror(errno));
+                                               mpsslog("len 0x%zx ", len);
+                                               mpsslog("read_len 0x%zx\n",
+                                                       sum_iovec_len(&copy));
+                                       } else {
+#ifdef DEBUG
+                                               disp_iovec(mic, &copy, __func__,
+                                                          __LINE__);
+                                               mpsslog("%s %s %d ",
+                                                       mic->name, __func__,
+                                                       __LINE__);
+                                               mpsslog("wrote to tap 0x%lx\n",
+                                                       len);
+#endif
+                                       }
+                               } else {
+                                       mpsslog("%s %s %d mic_virtio_copy %s\n",
+                                               mic->name, __func__, __LINE__,
+                                               strerror(errno));
+                                       break;
+                               }
+                       }
+               }
+               if (net_poll[NET_FD_VIRTIO_NET].revents & POLLERR)
+                       mpsslog("%s: %s: POLLERR\n", __func__, mic->name);
+       }
+done:
+       pthread_exit(NULL);
+}
+
+/* virtio_console */
+#define VIRTIO_CONSOLE_FD 0
+#define MONITOR_FD (VIRTIO_CONSOLE_FD + 1)
+#define MAX_CONSOLE_FD (MONITOR_FD + 1)  /* must be the last one + 1 */
+#define MAX_BUFFER_SIZE PAGE_SIZE
+
+static void *
+virtio_console(void *arg)
+{
+       static __u8 vcons_buf[2][PAGE_SIZE];
+       struct iovec vcons_iov[2] = {
+               { .iov_base = vcons_buf[0], .iov_len = sizeof(vcons_buf[0]) },
+               { .iov_base = vcons_buf[1], .iov_len = sizeof(vcons_buf[1]) },
+       };
+       struct iovec *iov0 = &vcons_iov[0], *iov1 = &vcons_iov[1];
+       struct mic_info *mic = (struct mic_info *)arg;
+       int err;
+       struct pollfd console_poll[MAX_CONSOLE_FD];
+       int pty_fd;
+       char *pts_name;
+       ssize_t len;
+       struct mic_vring tx_vr, rx_vr;
+       struct mic_copy_desc copy;
+       struct mic_device_desc *desc;
+
+       pty_fd = posix_openpt(O_RDWR);
+       if (pty_fd < 0) {
+               mpsslog("can't open a pseudoterminal master device: %s\n",
+                       strerror(errno));
+               goto _return;
+       }
+       pts_name = ptsname(pty_fd);
+       if (pts_name == NULL) {
+               mpsslog("can't get pts name\n");
+               goto _close_pty;
+       }
+       printf("%s console message goes to %s\n", mic->name, pts_name);
+       mpsslog("%s console message goes to %s\n", mic->name, pts_name);
+       err = grantpt(pty_fd);
+       if (err < 0) {
+               mpsslog("can't grant access: %s %s\n",
+                       pts_name, strerror(errno));
+               goto _close_pty;
+       }
+       err = unlockpt(pty_fd);
+       if (err < 0) {
+               mpsslog("can't unlock a pseudoterminal: %s %s\n",
+                       pts_name, strerror(errno));
+               goto _close_pty;
+       }
+       console_poll[MONITOR_FD].fd = pty_fd;
+       console_poll[MONITOR_FD].events = POLLIN;
+
+       console_poll[VIRTIO_CONSOLE_FD].fd = mic->mic_console.virtio_console_fd;
+       console_poll[VIRTIO_CONSOLE_FD].events = POLLIN;
+
+       if (MAP_FAILED == init_vr(mic, mic->mic_console.virtio_console_fd,
+                                 VIRTIO_ID_CONSOLE, &tx_vr, &rx_vr,
+               virtcons_dev_page.dd.num_vq)) {
+               mpsslog("%s init_vr failed %s\n",
+                       mic->name, strerror(errno));
+               goto _close_pty;
+       }
+
+       copy.iovcnt = 1;
+       desc = get_device_desc(mic, VIRTIO_ID_CONSOLE);
+
+       for (;;) {
+               console_poll[MONITOR_FD].revents = 0;
+               console_poll[VIRTIO_CONSOLE_FD].revents = 0;
+               err = poll(console_poll, MAX_CONSOLE_FD, -1);
+               if (err < 0) {
+                       mpsslog("%s %d: poll failed: %s\n", __func__, __LINE__,
+                               strerror(errno));
+                       continue;
+               }
+               if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
+                       err = wait_for_card_driver(mic,
+                                       mic->mic_console.virtio_console_fd,
+                                       VIRTIO_ID_CONSOLE);
+                       if (err) {
+                               mpsslog("%s %s %d Exiting...\n",
+                                       mic->name, __func__, __LINE__);
+                               break;
+                       }
+               }
+
+               if (console_poll[MONITOR_FD].revents & POLLIN) {
+                       copy.iov = iov0;
+                       len = readv(pty_fd, copy.iov, copy.iovcnt);
+                       if (len > 0) {
+#ifdef DEBUG
+                               disp_iovec(mic, copy, __func__, __LINE__);
+                               mpsslog("%s %s %d read from tap 0x%lx\n",
+                                       mic->name, __func__, __LINE__,
+                                       len);
+#endif
+                               spin_for_descriptors(mic, &tx_vr);
+                               txrx_prepare(VIRTIO_ID_CONSOLE, 1, &tx_vr,
+                                            &copy, len);
+
+                               err = mic_virtio_copy(mic,
+                                       mic->mic_console.virtio_console_fd,
+                                       &tx_vr, &copy);
+                               if (err < 0) {
+                                       mpsslog("%s %s %d mic_virtio_copy %s\n",
+                                               mic->name, __func__, __LINE__,
+                                               strerror(errno));
+                               }
+                               if (!err)
+                                       verify_out_len(mic, &copy);
+#ifdef DEBUG
+                               disp_iovec(mic, copy, __func__, __LINE__);
+                               mpsslog("%s %s %d wrote to net 0x%lx\n",
+                                       mic->name, __func__, __LINE__,
+                                       sum_iovec_len(copy));
+#endif
+                               /* Reinitialize IOV for next run */
+                               iov0->iov_len = PAGE_SIZE;
+                       } else if (len < 0) {
+                               disp_iovec(mic, &copy, __func__, __LINE__);
+                               mpsslog("%s %s %d read failed %s ",
+                                       mic->name, __func__, __LINE__,
+                                       strerror(errno));
+                               mpsslog("cnt %d sum %zd\n",
+                                       copy.iovcnt, sum_iovec_len(&copy));
+                       }
+               }
+
+               if (console_poll[VIRTIO_CONSOLE_FD].revents & POLLIN) {
+                       while (rx_vr.info->avail_idx !=
+                               le16toh(rx_vr.vr.avail->idx)) {
+                               copy.iov = iov1;
+                               txrx_prepare(VIRTIO_ID_CONSOLE, 0, &rx_vr,
+                                            &copy, PAGE_SIZE);
+
+                               err = mic_virtio_copy(mic,
+                                       mic->mic_console.virtio_console_fd,
+                                       &rx_vr, &copy);
+                               if (!err) {
+                                       /* Set the correct output iov_len */
+                                       iov1->iov_len = copy.out_len;
+                                       verify_out_len(mic, &copy);
+#ifdef DEBUG
+                                       disp_iovec(mic, copy, __func__,
+                                                  __LINE__);
+                                       mpsslog("%s %s %d ",
+                                               mic->name, __func__, __LINE__);
+                                       mpsslog("read from net 0x%lx\n",
+                                               sum_iovec_len(copy));
+#endif
+                                       len = writev(pty_fd,
+                                               copy.iov, copy.iovcnt);
+                                       if (len != sum_iovec_len(&copy)) {
+                                               mpsslog("Tun write failed %s ",
+                                                       strerror(errno));
+                                               mpsslog("len 0x%zx ", len);
+                                               mpsslog("read_len 0x%zx\n",
+                                                       sum_iovec_len(&copy));
+                                       } else {
+#ifdef DEBUG
+                                               disp_iovec(mic, copy, __func__,
+                                                          __LINE__);
+                                               mpsslog("%s %s %d ",
+                                                       mic->name, __func__,
+                                                       __LINE__);
+                                               mpsslog("wrote to tap 0x%lx\n",
+                                                       len);
+#endif
+                                       }
+                               } else {
+                                       mpsslog("%s %s %d mic_virtio_copy %s\n",
+                                               mic->name, __func__, __LINE__,
+                                               strerror(errno));
+                                       break;
+                               }
+                       }
+               }
+               if (console_poll[NET_FD_VIRTIO_NET].revents & POLLERR)
+                       mpsslog("%s: %s: POLLERR\n", __func__, mic->name);
+       }
+_close_pty:
+       close(pty_fd);
+_return:
+       pthread_exit(NULL);
+}
+
+static void
+add_virtio_device(struct mic_info *mic, struct mic_device_desc *dd)
+{
+       char path[PATH_MAX];
+       int fd, err;
+
+       snprintf(path, PATH_MAX, "/dev/vop_virtio%d", mic->id);
+       fd = open(path, O_RDWR);
+       if (fd < 0) {
+               mpsslog("Could not open %s %s\n", path, strerror(errno));
+               return;
+       }
+
+       err = ioctl(fd, MIC_VIRTIO_ADD_DEVICE, dd);
+       if (err < 0) {
+               mpsslog("Could not add %d %s\n", dd->type, strerror(errno));
+               close(fd);
+               return;
+       }
+       switch (dd->type) {
+       case VIRTIO_ID_NET:
+               mic->mic_net.virtio_net_fd = fd;
+               mpsslog("Added VIRTIO_ID_NET for %s\n", mic->name);
+               break;
+       case VIRTIO_ID_CONSOLE:
+               mic->mic_console.virtio_console_fd = fd;
+               mpsslog("Added VIRTIO_ID_CONSOLE for %s\n", mic->name);
+               break;
+       case VIRTIO_ID_BLOCK:
+               mic->mic_virtblk.virtio_block_fd = fd;
+               mpsslog("Added VIRTIO_ID_BLOCK for %s\n", mic->name);
+               break;
+       }
+}
+
+static bool
+set_backend_file(struct mic_info *mic)
+{
+       FILE *config;
+       char buff[PATH_MAX], *line, *evv, *p;
+
+       snprintf(buff, PATH_MAX, "%s/mpssd%03d.conf", mic_config_dir, mic->id);
+       config = fopen(buff, "r");
+       if (config == NULL)
+               return false;
+       do {  /* look for "virtblk_backend=XXXX" */
+               line = fgets(buff, PATH_MAX, config);
+               if (line == NULL)
+                       break;
+               if (*line == '#')
+                       continue;
+               p = strchr(line, '\n');
+               if (p)
+                       *p = '\0';
+       } while (strncmp(line, virtblk_backend, strlen(virtblk_backend)) != 0);
+       fclose(config);
+       if (line == NULL)
+               return false;
+       evv = strchr(line, '=');
+       if (evv == NULL)
+               return false;
+       mic->mic_virtblk.backend_file = malloc(strlen(evv) + 1);
+       if (mic->mic_virtblk.backend_file == NULL) {
+               mpsslog("%s %d can't allocate memory\n", mic->name, mic->id);
+               return false;
+       }
+       strcpy(mic->mic_virtblk.backend_file, evv + 1);
+       return true;
+}
+
+#define SECTOR_SIZE 512
+static bool
+set_backend_size(struct mic_info *mic)
+{
+       mic->mic_virtblk.backend_size = lseek(mic->mic_virtblk.backend, 0,
+               SEEK_END);
+       if (mic->mic_virtblk.backend_size < 0) {
+               mpsslog("%s: can't seek: %s\n",
+                       mic->name, mic->mic_virtblk.backend_file);
+               return false;
+       }
+       virtblk_dev_page.blk_config.capacity =
+               mic->mic_virtblk.backend_size / SECTOR_SIZE;
+       if ((mic->mic_virtblk.backend_size % SECTOR_SIZE) != 0)
+               virtblk_dev_page.blk_config.capacity++;
+
+       virtblk_dev_page.blk_config.capacity =
+               htole64(virtblk_dev_page.blk_config.capacity);
+
+       return true;
+}
+
+static bool
+open_backend(struct mic_info *mic)
+{
+       if (!set_backend_file(mic))
+               goto _error_exit;
+       mic->mic_virtblk.backend = open(mic->mic_virtblk.backend_file, O_RDWR);
+       if (mic->mic_virtblk.backend < 0) {
+               mpsslog("%s: can't open: %s\n", mic->name,
+                       mic->mic_virtblk.backend_file);
+               goto _error_free;
+       }
+       if (!set_backend_size(mic))
+               goto _error_close;
+       mic->mic_virtblk.backend_addr = mmap(NULL,
+               mic->mic_virtblk.backend_size,
+               PROT_READ|PROT_WRITE, MAP_SHARED,
+               mic->mic_virtblk.backend, 0L);
+       if (mic->mic_virtblk.backend_addr == MAP_FAILED) {
+               mpsslog("%s: can't map: %s %s\n",
+                       mic->name, mic->mic_virtblk.backend_file,
+                       strerror(errno));
+               goto _error_close;
+       }
+       return true;
+
+ _error_close:
+       close(mic->mic_virtblk.backend);
+ _error_free:
+       free(mic->mic_virtblk.backend_file);
+ _error_exit:
+       return false;
+}
+
+static void
+close_backend(struct mic_info *mic)
+{
+       munmap(mic->mic_virtblk.backend_addr, mic->mic_virtblk.backend_size);
+       close(mic->mic_virtblk.backend);
+       free(mic->mic_virtblk.backend_file);
+}
+
+static bool
+start_virtblk(struct mic_info *mic, struct mic_vring *vring)
+{
+       if (((unsigned long)&virtblk_dev_page.blk_config % 8) != 0) {
+               mpsslog("%s: blk_config is not 8 byte aligned.\n",
+                       mic->name);
+               return false;
+       }
+       add_virtio_device(mic, &virtblk_dev_page.dd);
+       if (MAP_FAILED == init_vr(mic, mic->mic_virtblk.virtio_block_fd,
+                                 VIRTIO_ID_BLOCK, vring, NULL,
+                                 virtblk_dev_page.dd.num_vq)) {
+               mpsslog("%s init_vr failed %s\n",
+                       mic->name, strerror(errno));
+               return false;
+       }
+       return true;
+}
+
+static void
+stop_virtblk(struct mic_info *mic)
+{
+       int vr_size, ret;
+
+       vr_size = PAGE_ALIGN(_vring_size(MIC_VRING_ENTRIES,
+                                        MIC_VIRTIO_RING_ALIGN) +
+                            sizeof(struct _mic_vring_info));
+       ret = munmap(mic->mic_virtblk.block_dp,
+               MIC_DEVICE_PAGE_END + vr_size * virtblk_dev_page.dd.num_vq);
+       if (ret < 0)
+               mpsslog("%s munmap errno %d\n", mic->name, errno);
+       close(mic->mic_virtblk.virtio_block_fd);
+}
+
+static __u8
+header_error_check(struct vring_desc *desc)
+{
+       if (le32toh(desc->len) != sizeof(struct virtio_blk_outhdr)) {
+               mpsslog("%s() %d: length is not sizeof(virtio_blk_outhd)\n",
+                       __func__, __LINE__);
+               return -EIO;
+       }
+       if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT)) {
+               mpsslog("%s() %d: alone\n",
+                       __func__, __LINE__);
+               return -EIO;
+       }
+       if (le16toh(desc->flags) & VRING_DESC_F_WRITE) {
+               mpsslog("%s() %d: not read\n",
+                       __func__, __LINE__);
+               return -EIO;
+       }
+       return 0;
+}
+
+static int
+read_header(int fd, struct virtio_blk_outhdr *hdr, __u32 desc_idx)
+{
+       struct iovec iovec;
+       struct mic_copy_desc copy;
+
+       iovec.iov_len = sizeof(*hdr);
+       iovec.iov_base = hdr;
+       copy.iov = &iovec;
+       copy.iovcnt = 1;
+       copy.vr_idx = 0;  /* only one vring on virtio_block */
+       copy.update_used = false;  /* do not update used index */
+       return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
+}
+
+static int
+transfer_blocks(int fd, struct iovec *iovec, __u32 iovcnt)
+{
+       struct mic_copy_desc copy;
+
+       copy.iov = iovec;
+       copy.iovcnt = iovcnt;
+       copy.vr_idx = 0;  /* only one vring on virtio_block */
+       copy.update_used = false;  /* do not update used index */
+       return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
+}
+
+static __u8
+status_error_check(struct vring_desc *desc)
+{
+       if (le32toh(desc->len) != sizeof(__u8)) {
+               mpsslog("%s() %d: length is not sizeof(status)\n",
+                       __func__, __LINE__);
+               return -EIO;
+       }
+       return 0;
+}
+
+static int
+write_status(int fd, __u8 *status)
+{
+       struct iovec iovec;
+       struct mic_copy_desc copy;
+
+       iovec.iov_base = status;
+       iovec.iov_len = sizeof(*status);
+       copy.iov = &iovec;
+       copy.iovcnt = 1;
+       copy.vr_idx = 0;  /* only one vring on virtio_block */
+       copy.update_used = true; /* Update used index */
+       return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
+}
+
+#ifndef VIRTIO_BLK_T_GET_ID
+#define VIRTIO_BLK_T_GET_ID    8
+#endif
+
+static void *
+virtio_block(void *arg)
+{
+       struct mic_info *mic = (struct mic_info *)arg;
+       int ret;
+       struct pollfd block_poll;
+       struct mic_vring vring;
+       __u16 avail_idx;
+       __u32 desc_idx;
+       struct vring_desc *desc;
+       struct iovec *iovec, *piov;
+       __u8 status;
+       __u32 buffer_desc_idx;
+       struct virtio_blk_outhdr hdr;
+       void *fos;
+
+       for (;;) {  /* forever */
+               if (!open_backend(mic)) { /* No virtblk */
+                       for (mic->mic_virtblk.signaled = 0;
+                               !mic->mic_virtblk.signaled;)
+                               sleep(1);
+                       continue;
+               }
+
+               /* backend file is specified. */
+               if (!start_virtblk(mic, &vring))
+                       goto _close_backend;
+               iovec = malloc(sizeof(*iovec) *
+                       le32toh(virtblk_dev_page.blk_config.seg_max));
+               if (!iovec) {
+                       mpsslog("%s: can't alloc iovec: %s\n",
+                               mic->name, strerror(ENOMEM));
+                       goto _stop_virtblk;
+               }
+
+               block_poll.fd = mic->mic_virtblk.virtio_block_fd;
+               block_poll.events = POLLIN;
+               for (mic->mic_virtblk.signaled = 0;
+                    !mic->mic_virtblk.signaled;) {
+                       block_poll.revents = 0;
+                                       /* timeout in 1 sec to see signaled */
+                       ret = poll(&block_poll, 1, 1000);
+                       if (ret < 0) {
+                               mpsslog("%s %d: poll failed: %s\n",
+                                       __func__, __LINE__,
+                                       strerror(errno));
+                               continue;
+                       }
+
+                       if (!(block_poll.revents & POLLIN)) {
+#ifdef DEBUG
+                               mpsslog("%s %d: block_poll.revents=0x%x\n",
+                                       __func__, __LINE__, block_poll.revents);
+#endif
+                               continue;
+                       }
+
+                       /* POLLIN */
+                       while (vring.info->avail_idx !=
+                               le16toh(vring.vr.avail->idx)) {
+                               /* read header element */
+                               avail_idx =
+                                       vring.info->avail_idx &
+                                       (vring.vr.num - 1);
+                               desc_idx = le16toh(
+                                       vring.vr.avail->ring[avail_idx]);
+                               desc = &vring.vr.desc[desc_idx];
+#ifdef DEBUG
+                               mpsslog("%s() %d: avail_idx=%d ",
+                                       __func__, __LINE__,
+                                       vring.info->avail_idx);
+                               mpsslog("vring.vr.num=%d desc=%p\n",
+                                       vring.vr.num, desc);
+#endif
+                               status = header_error_check(desc);
+                               ret = read_header(
+                                       mic->mic_virtblk.virtio_block_fd,
+                                       &hdr, desc_idx);
+                               if (ret < 0) {
+                                       mpsslog("%s() %d %s: ret=%d %s\n",
+                                               __func__, __LINE__,
+                                               mic->name, ret,
+                                               strerror(errno));
+                                       break;
+                               }
+                               /* buffer element */
+                               piov = iovec;
+                               status = 0;
+                               fos = mic->mic_virtblk.backend_addr +
+                                       (hdr.sector * SECTOR_SIZE);
+                               buffer_desc_idx = next_desc(desc);
+                               desc_idx = buffer_desc_idx;
+                               for (desc = &vring.vr.desc[buffer_desc_idx];
+                                    desc->flags & VRING_DESC_F_NEXT;
+                                    desc_idx = next_desc(desc),
+                                            desc = &vring.vr.desc[desc_idx]) {
+                                       piov->iov_len = desc->len;
+                                       piov->iov_base = fos;
+                                       piov++;
+                                       fos += desc->len;
+                               }
+                               /* Returning NULLs for VIRTIO_BLK_T_GET_ID. */
+                               if (hdr.type & ~(VIRTIO_BLK_T_OUT |
+                                       VIRTIO_BLK_T_GET_ID)) {
+                                       /*
+                                         VIRTIO_BLK_T_IN - does not do
+                                         anything. Probably for documenting.
+                                         VIRTIO_BLK_T_SCSI_CMD - for
+                                         virtio_scsi.
+                                         VIRTIO_BLK_T_FLUSH - turned off in
+                                         config space.
+                                         VIRTIO_BLK_T_BARRIER - defined but not
+                                         used in anywhere.
+                                       */
+                                       mpsslog("%s() %d: type %x ",
+                                               __func__, __LINE__,
+                                               hdr.type);
+                                       mpsslog("is not supported\n");
+                                       status = -ENOTSUP;
+
+                               } else {
+                                       ret = transfer_blocks(
+                                       mic->mic_virtblk.virtio_block_fd,
+                                               iovec,
+                                               piov - iovec);
+                                       if (ret < 0 &&
+                                           status != 0)
+                                               status = ret;
+                               }
+                               /* write status and update used pointer */
+                               if (status != 0)
+                                       status = status_error_check(desc);
+                               ret = write_status(
+                                       mic->mic_virtblk.virtio_block_fd,
+                                       &status);
+#ifdef DEBUG
+                               mpsslog("%s() %d: write status=%d on desc=%p\n",
+                                       __func__, __LINE__,
+                                       status, desc);
+#endif
+                       }
+               }
+               free(iovec);
+_stop_virtblk:
+               stop_virtblk(mic);
+_close_backend:
+               close_backend(mic);
+       }  /* forever */
+
+       pthread_exit(NULL);
+}
+
+static void
+reset(struct mic_info *mic)
+{
+#define RESET_TIMEOUT 120
+       int i = RESET_TIMEOUT;
+       setsysfs(mic->name, "state", "reset");
+       while (i) {
+               char *state;
+               state = readsysfs(mic->name, "state");
+               if (!state)
+                       goto retry;
+               mpsslog("%s: %s %d state %s\n",
+                       mic->name, __func__, __LINE__, state);
+
+               if (!strcmp(state, "ready")) {
+                       free(state);
+                       break;
+               }
+               free(state);
+retry:
+               sleep(1);
+               i--;
+       }
+}
+
+static int
+get_mic_shutdown_status(struct mic_info *mic, char *shutdown_status)
+{
+       if (!strcmp(shutdown_status, "nop"))
+               return MIC_NOP;
+       if (!strcmp(shutdown_status, "crashed"))
+               return MIC_CRASHED;
+       if (!strcmp(shutdown_status, "halted"))
+               return MIC_HALTED;
+       if (!strcmp(shutdown_status, "poweroff"))
+               return MIC_POWER_OFF;
+       if (!strcmp(shutdown_status, "restart"))
+               return MIC_RESTART;
+       mpsslog("%s: BUG invalid status %s\n", mic->name, shutdown_status);
+       /* Invalid state */
+       assert(0);
+};
+
+static int get_mic_state(struct mic_info *mic)
+{
+       char *state = NULL;
+       enum mic_states mic_state;
+
+       while (!state) {
+               state = readsysfs(mic->name, "state");
+               sleep(1);
+       }
+       mpsslog("%s: %s %d state %s\n",
+               mic->name, __func__, __LINE__, state);
+
+       if (!strcmp(state, "ready")) {
+               mic_state = MIC_READY;
+       } else if (!strcmp(state, "booting")) {
+               mic_state = MIC_BOOTING;
+       } else if (!strcmp(state, "online")) {
+               mic_state = MIC_ONLINE;
+       } else if (!strcmp(state, "shutting_down")) {
+               mic_state = MIC_SHUTTING_DOWN;
+       } else if (!strcmp(state, "reset_failed")) {
+               mic_state = MIC_RESET_FAILED;
+       } else if (!strcmp(state, "resetting")) {
+               mic_state = MIC_RESETTING;
+       } else {
+               mpsslog("%s: BUG invalid state %s\n", mic->name, state);
+               assert(0);
+       }
+
+       free(state);
+       return mic_state;
+};
+
+static void mic_handle_shutdown(struct mic_info *mic)
+{
+#define SHUTDOWN_TIMEOUT 60
+       int i = SHUTDOWN_TIMEOUT;
+       char *shutdown_status;
+       while (i) {
+               shutdown_status = readsysfs(mic->name, "shutdown_status");
+               if (!shutdown_status) {
+                       sleep(1);
+                       continue;
+               }
+               mpsslog("%s: %s %d shutdown_status %s\n",
+                       mic->name, __func__, __LINE__, shutdown_status);
+               switch (get_mic_shutdown_status(mic, shutdown_status)) {
+               case MIC_RESTART:
+                       mic->restart = 1;
+               case MIC_HALTED:
+               case MIC_POWER_OFF:
+               case MIC_CRASHED:
+                       free(shutdown_status);
+                       goto reset;
+               default:
+                       break;
+               }
+               free(shutdown_status);
+               sleep(1);
+               i--;
+       }
+reset:
+       if (!i)
+               mpsslog("%s: %s %d timing out waiting for shutdown_status %s\n",
+                       mic->name, __func__, __LINE__, shutdown_status);
+       reset(mic);
+}
+
+static int open_state_fd(struct mic_info *mic)
+{
+       char pathname[PATH_MAX];
+       int fd;
+
+       snprintf(pathname, PATH_MAX - 1, "%s/%s/%s",
+                MICSYSFSDIR, mic->name, "state");
+
+       fd = open(pathname, O_RDONLY);
+       if (fd < 0)
+               mpsslog("%s: opening file %s failed %s\n",
+                       mic->name, pathname, strerror(errno));
+       return fd;
+}
+
+static int block_till_state_change(int fd, struct mic_info *mic)
+{
+       struct pollfd ufds[1];
+       char value[PAGE_SIZE];
+       int ret;
+
+       ufds[0].fd = fd;
+       ufds[0].events = POLLERR | POLLPRI;
+       ret = poll(ufds, 1, -1);
+       if (ret < 0) {
+               mpsslog("%s: %s %d poll failed %s\n",
+                       mic->name, __func__, __LINE__, strerror(errno));
+               return ret;
+       }
+
+       ret = lseek(fd, 0, SEEK_SET);
+       if (ret < 0) {
+               mpsslog("%s: %s %d Failed to seek to 0: %s\n",
+                       mic->name, __func__, __LINE__, strerror(errno));
+               return ret;
+       }
+
+       ret = read(fd, value, sizeof(value));
+       if (ret < 0) {
+               mpsslog("%s: %s %d Failed to read sysfs entry: %s\n",
+                       mic->name, __func__, __LINE__, strerror(errno));
+               return ret;
+       }
+
+       return 0;
+}
+
+static void *
+mic_config(void *arg)
+{
+       struct mic_info *mic = (struct mic_info *)arg;
+       int fd, ret, stat = 0;
+
+       fd = open_state_fd(mic);
+       if (fd < 0) {
+               mpsslog("%s: %s %d open state fd failed %s\n",
+                       mic->name, __func__, __LINE__, strerror(errno));
+               goto exit;
+       }
+
+       do {
+               ret = block_till_state_change(fd, mic);
+               if (ret < 0) {
+                       mpsslog("%s: %s %d block_till_state_change error %s\n",
+                               mic->name, __func__, __LINE__, strerror(errno));
+                       goto close_exit;
+               }
+
+               switch (get_mic_state(mic)) {
+               case MIC_SHUTTING_DOWN:
+                       mic_handle_shutdown(mic);
+                       break;
+               case MIC_READY:
+               case MIC_RESET_FAILED:
+                       ret = kill(mic->pid, SIGTERM);
+                       mpsslog("%s: %s %d kill pid %d ret %d\n",
+                               mic->name, __func__, __LINE__,
+                               mic->pid, ret);
+                       if (!ret) {
+                               ret = waitpid(mic->pid, &stat,
+                                             WIFSIGNALED(stat));
+                               mpsslog("%s: %s %d waitpid ret %d pid %d\n",
+                                       mic->name, __func__, __LINE__,
+                                       ret, mic->pid);
+                       }
+                       if (mic->boot_on_resume) {
+                               setsysfs(mic->name, "state", "boot");
+                               mic->boot_on_resume = 0;
+                       }
+                       goto close_exit;
+               default:
+                       break;
+               }
+       } while (1);
+
+close_exit:
+       close(fd);
+exit:
+       init_mic(mic);
+       pthread_exit(NULL);
+}
+
+static void
+set_cmdline(struct mic_info *mic)
+{
+       char buffer[PATH_MAX];
+       int len;
+
+       len = snprintf(buffer, PATH_MAX,
+               "clocksource=tsc highres=off nohz=off ");
+       len += snprintf(buffer + len, PATH_MAX - len,
+               "cpufreq_on;corec6_off;pc3_off;pc6_off ");
+       len += snprintf(buffer + len, PATH_MAX - len,
+               "ifcfg=static;address,172.31.%d.1;netmask,255.255.255.0",
+               mic->id + 1);
+
+       setsysfs(mic->name, "cmdline", buffer);
+       mpsslog("%s: Command line: \"%s\"\n", mic->name, buffer);
+       snprintf(buffer, PATH_MAX, "172.31.%d.1", mic->id + 1);
+       mpsslog("%s: IPADDR: \"%s\"\n", mic->name, buffer);
+}
+
+static void
+set_log_buf_info(struct mic_info *mic)
+{
+       int fd;
+       off_t len;
+       char system_map[] = "/lib/firmware/mic/System.map";
+       char *map, *temp, log_buf[17] = {'\0'};
+
+       fd = open(system_map, O_RDONLY);
+       if (fd < 0) {
+               mpsslog("%s: Opening System.map failed: %d\n",
+                       mic->name, errno);
+               return;
+       }
+       len = lseek(fd, 0, SEEK_END);
+       if (len < 0) {
+               mpsslog("%s: Reading System.map size failed: %d\n",
+                       mic->name, errno);
+               close(fd);
+               return;
+       }
+       map = mmap(NULL, len, PROT_READ, MAP_PRIVATE, fd, 0);
+       if (map == MAP_FAILED) {
+               mpsslog("%s: mmap of System.map failed: %d\n",
+                       mic->name, errno);
+               close(fd);
+               return;
+       }
+       temp = strstr(map, "__log_buf");
+       if (!temp) {
+               mpsslog("%s: __log_buf not found: %d\n", mic->name, errno);
+               munmap(map, len);
+               close(fd);
+               return;
+       }
+       strncpy(log_buf, temp - 19, 16);
+       setsysfs(mic->name, "log_buf_addr", log_buf);
+       mpsslog("%s: log_buf_addr: %s\n", mic->name, log_buf);
+       temp = strstr(map, "log_buf_len");
+       if (!temp) {
+               mpsslog("%s: log_buf_len not found: %d\n", mic->name, errno);
+               munmap(map, len);
+               close(fd);
+               return;
+       }
+       strncpy(log_buf, temp - 19, 16);
+       setsysfs(mic->name, "log_buf_len", log_buf);
+       mpsslog("%s: log_buf_len: %s\n", mic->name, log_buf);
+       munmap(map, len);
+       close(fd);
+}
+
+static void
+change_virtblk_backend(int x, siginfo_t *siginfo, void *p)
+{
+       struct mic_info *mic;
+
+       for (mic = mic_list.next; mic != NULL; mic = mic->next)
+               mic->mic_virtblk.signaled = 1/* true */;
+}
+
+static void
+set_mic_boot_params(struct mic_info *mic)
+{
+       set_log_buf_info(mic);
+       set_cmdline(mic);
+}
+
+static void *
+init_mic(void *arg)
+{
+       struct mic_info *mic = (struct mic_info *)arg;
+       struct sigaction ignore = {
+               .sa_flags = 0,
+               .sa_handler = SIG_IGN
+       };
+       struct sigaction act = {
+               .sa_flags = SA_SIGINFO,
+               .sa_sigaction = change_virtblk_backend,
+       };
+       char buffer[PATH_MAX];
+       int err, fd;
+
+       /*
+        * Currently, one virtio block device is supported for each MIC card
+        * at a time. Any user (or test) can send a SIGUSR1 to the MIC daemon.
+        * The signal informs the virtio block backend about a change in the
+        * configuration file which specifies the virtio backend file name on
+        * the host. Virtio block backend then re-reads the configuration file
+        * and switches to the new block device. This signalling mechanism may
+        * not be required once multiple virtio block devices are supported by
+        * the MIC daemon.
+        */
+       sigaction(SIGUSR1, &ignore, NULL);
+retry:
+       fd = open_state_fd(mic);
+       if (fd < 0) {
+               mpsslog("%s: %s %d open state fd failed %s\n",
+                       mic->name, __func__, __LINE__, strerror(errno));
+               sleep(2);
+               goto retry;
+       }
+
+       if (mic->restart) {
+               snprintf(buffer, PATH_MAX, "boot");
+               setsysfs(mic->name, "state", buffer);
+               mpsslog("%s restarting mic %d\n",
+                       mic->name, mic->restart);
+               mic->restart = 0;
+       }
+
+       while (1) {
+               while (block_till_state_change(fd, mic)) {
+                       mpsslog("%s: %s %d block_till_state_change error %s\n",
+                               mic->name, __func__, __LINE__, strerror(errno));
+                       sleep(2);
+                       continue;
+               }
+
+               if (get_mic_state(mic) == MIC_BOOTING)
+                       break;
+       }
+
+       mic->pid = fork();
+       switch (mic->pid) {
+       case 0:
+               add_virtio_device(mic, &virtcons_dev_page.dd);
+               add_virtio_device(mic, &virtnet_dev_page.dd);
+               err = pthread_create(&mic->mic_console.console_thread, NULL,
+                       virtio_console, mic);
+               if (err)
+                       mpsslog("%s virtcons pthread_create failed %s\n",
+                               mic->name, strerror(err));
+               err = pthread_create(&mic->mic_net.net_thread, NULL,
+                       virtio_net, mic);
+               if (err)
+                       mpsslog("%s virtnet pthread_create failed %s\n",
+                               mic->name, strerror(err));
+               err = pthread_create(&mic->mic_virtblk.block_thread, NULL,
+                       virtio_block, mic);
+               if (err)
+                       mpsslog("%s virtblk pthread_create failed %s\n",
+                               mic->name, strerror(err));
+               sigemptyset(&act.sa_mask);
+               err = sigaction(SIGUSR1, &act, NULL);
+               if (err)
+                       mpsslog("%s sigaction SIGUSR1 failed %s\n",
+                               mic->name, strerror(errno));
+               while (1)
+                       sleep(60);
+       case -1:
+               mpsslog("fork failed MIC name %s id %d errno %d\n",
+                       mic->name, mic->id, errno);
+               break;
+       default:
+               err = pthread_create(&mic->config_thread, NULL,
+                                    mic_config, mic);
+               if (err)
+                       mpsslog("%s mic_config pthread_create failed %s\n",
+                               mic->name, strerror(err));
+       }
+
+       return NULL;
+}
+
+static void
+start_daemon(void)
+{
+       struct mic_info *mic;
+       int err;
+
+       for (mic = mic_list.next; mic; mic = mic->next) {
+               set_mic_boot_params(mic);
+               err = pthread_create(&mic->init_thread, NULL, init_mic, mic);
+               if (err)
+                       mpsslog("%s init_mic pthread_create failed %s\n",
+                               mic->name, strerror(err));
+       }
+
+       while (1)
+               sleep(60);
+}
+
+static int
+init_mic_list(void)
+{
+       struct mic_info *mic = &mic_list;
+       struct dirent *file;
+       DIR *dp;
+       int cnt = 0;
+
+       dp = opendir(MICSYSFSDIR);
+       if (!dp)
+               return 0;
+
+       while ((file = readdir(dp)) != NULL) {
+               if (!strncmp(file->d_name, "mic", 3)) {
+                       mic->next = calloc(1, sizeof(struct mic_info));
+                       if (mic->next) {
+                               mic = mic->next;
+                               mic->id = atoi(&file->d_name[3]);
+                               mic->name = malloc(strlen(file->d_name) + 16);
+                               if (mic->name)
+                                       strcpy(mic->name, file->d_name);
+                               mpsslog("MIC name %s id %d\n", mic->name,
+                                       mic->id);
+                               cnt++;
+                       }
+               }
+       }
+
+       closedir(dp);
+       return cnt;
+}
+
+void
+mpsslog(char *format, ...)
+{
+       va_list args;
+       char buffer[4096];
+       char ts[52], *ts1;
+       time_t t;
+
+       if (logfp == NULL)
+               return;
+
+       va_start(args, format);
+       vsprintf(buffer, format, args);
+       va_end(args);
+
+       time(&t);
+       ts1 = ctime_r(&t, ts);
+       ts1[strlen(ts1) - 1] = '\0';
+       fprintf(logfp, "%s: %s", ts1, buffer);
+
+       fflush(logfp);
+}
+
+int
+main(int argc, char *argv[])
+{
+       int cnt;
+       pid_t pid;
+
+       myname = argv[0];
+
+       logfp = fopen(LOGFILE_NAME, "a+");
+       if (!logfp) {
+               fprintf(stderr, "cannot open logfile '%s'\n", LOGFILE_NAME);
+               exit(1);
+       }
+       pid = fork();
+       switch (pid) {
+       case 0:
+               break;
+       case -1:
+               exit(2);
+       default:
+               exit(0);
+       }
+
+       mpsslog("MIC Daemon start\n");
+
+       cnt = init_mic_list();
+       if (cnt == 0) {
+               mpsslog("MIC module not loaded\n");
+               exit(3);
+       }
+       mpsslog("MIC found %d devices\n", cnt);
+
+       start_daemon();
+
+       exit(0);
+}
diff --git a/samples/mic/mpssd/mpssd.h b/samples/mic/mpssd/mpssd.h
new file mode 100644 (file)
index 0000000..8bd6494
--- /dev/null
@@ -0,0 +1,103 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC User Space Tools.
+ */
+#ifndef _MPSSD_H_
+#define _MPSSD_H_
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <dirent.h>
+#include <libgen.h>
+#include <pthread.h>
+#include <stdarg.h>
+#include <time.h>
+#include <errno.h>
+#include <sys/dir.h>
+#include <sys/ioctl.h>
+#include <sys/poll.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <sys/utsname.h>
+#include <sys/wait.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <netdb.h>
+#include <pthread.h>
+#include <signal.h>
+#include <limits.h>
+#include <syslog.h>
+#include <getopt.h>
+#include <net/if.h>
+#include <linux/if_tun.h>
+#include <linux/if_tun.h>
+#include <linux/virtio_ids.h>
+
+#define MICSYSFSDIR "/sys/class/mic"
+#define LOGFILE_NAME "/var/log/mpssd"
+#define PAGE_SIZE 4096
+
+struct mic_console_info {
+       pthread_t       console_thread;
+       int             virtio_console_fd;
+       void            *console_dp;
+};
+
+struct mic_net_info {
+       pthread_t       net_thread;
+       int             virtio_net_fd;
+       int             tap_fd;
+       void            *net_dp;
+};
+
+struct mic_virtblk_info {
+       pthread_t       block_thread;
+       int             virtio_block_fd;
+       void            *block_dp;
+       volatile sig_atomic_t   signaled;
+       char            *backend_file;
+       int             backend;
+       void            *backend_addr;
+       long            backend_size;
+};
+
+struct mic_info {
+       int             id;
+       char            *name;
+       pthread_t       config_thread;
+       pthread_t       init_thread;
+       pid_t           pid;
+       struct mic_console_info mic_console;
+       struct mic_net_info     mic_net;
+       struct mic_virtblk_info mic_virtblk;
+       int             restart;
+       int             boot_on_resume;
+       struct mic_info *next;
+};
+
+__attribute__((format(printf, 1, 2)))
+void mpsslog(char *format, ...);
+char *readsysfs(char *dir, char *entry);
+int setsysfs(char *dir, char *entry, char *value);
+#endif
diff --git a/samples/mic/mpssd/sysfs.c b/samples/mic/mpssd/sysfs.c
new file mode 100644 (file)
index 0000000..8dd3269
--- /dev/null
@@ -0,0 +1,102 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC User Space Tools.
+ */
+
+#include "mpssd.h"
+
+#define PAGE_SIZE 4096
+
+char *
+readsysfs(char *dir, char *entry)
+{
+       char filename[PATH_MAX];
+       char value[PAGE_SIZE];
+       char *string = NULL;
+       int fd;
+       int len;
+
+       if (dir == NULL)
+               snprintf(filename, PATH_MAX, "%s/%s", MICSYSFSDIR, entry);
+       else
+               snprintf(filename, PATH_MAX,
+                        "%s/%s/%s", MICSYSFSDIR, dir, entry);
+
+       fd = open(filename, O_RDONLY);
+       if (fd < 0) {
+               mpsslog("Failed to open sysfs entry '%s': %s\n",
+                       filename, strerror(errno));
+               return NULL;
+       }
+
+       len = read(fd, value, sizeof(value));
+       if (len < 0) {
+               mpsslog("Failed to read sysfs entry '%s': %s\n",
+                       filename, strerror(errno));
+               goto readsys_ret;
+       }
+       if (len == 0)
+               goto readsys_ret;
+
+       value[len - 1] = '\0';
+
+       string = malloc(strlen(value) + 1);
+       if (string)
+               strcpy(string, value);
+
+readsys_ret:
+       close(fd);
+       return string;
+}
+
+int
+setsysfs(char *dir, char *entry, char *value)
+{
+       char filename[PATH_MAX];
+       char *oldvalue;
+       int fd, ret = 0;
+
+       if (dir == NULL)
+               snprintf(filename, PATH_MAX, "%s/%s", MICSYSFSDIR, entry);
+       else
+               snprintf(filename, PATH_MAX, "%s/%s/%s",
+                        MICSYSFSDIR, dir, entry);
+
+       oldvalue = readsysfs(dir, entry);
+
+       fd = open(filename, O_RDWR);
+       if (fd < 0) {
+               ret = errno;
+               mpsslog("Failed to open sysfs entry '%s': %s\n",
+                       filename, strerror(errno));
+               goto done;
+       }
+
+       if (!oldvalue || strcmp(value, oldvalue)) {
+               if (write(fd, value, strlen(value)) < 0) {
+                       ret = errno;
+                       mpsslog("Failed to write new sysfs entry '%s': %s\n",
+                               filename, strerror(errno));
+               }
+       }
+       close(fd);
+done:
+       if (oldvalue)
+               free(oldvalue);
+       return ret;
+}
diff --git a/samples/timers/.gitignore b/samples/timers/.gitignore
new file mode 100644 (file)
index 0000000..c5c45d7
--- /dev/null
@@ -0,0 +1 @@
+hpet_example
diff --git a/samples/timers/Makefile b/samples/timers/Makefile
new file mode 100644 (file)
index 0000000..a5c3c4a
--- /dev/null
@@ -0,0 +1,15 @@
+ifndef CROSS_COMPILE
+uname_M := $(shell uname -m 2>/dev/null || echo not)
+ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
+
+ifeq ($(ARCH),x86)
+CC := $(CROSS_COMPILE)gcc
+PROGS := hpet_example
+
+all: $(PROGS)
+
+clean:
+       rm -fr $(PROGS)
+
+endif
+endif
diff --git a/samples/timers/hpet_example.c b/samples/timers/hpet_example.c
new file mode 100644 (file)
index 0000000..3ab4993
--- /dev/null
@@ -0,0 +1,294 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <string.h>
+#include <memory.h>
+#include <malloc.h>
+#include <time.h>
+#include <ctype.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <signal.h>
+#include <errno.h>
+#include <sys/time.h>
+#include <linux/hpet.h>
+
+
+extern void hpet_open_close(int, const char **);
+extern void hpet_info(int, const char **);
+extern void hpet_poll(int, const char **);
+extern void hpet_fasync(int, const char **);
+extern void hpet_read(int, const char **);
+
+#include <sys/poll.h>
+#include <sys/ioctl.h>
+
+struct hpet_command {
+       char            *command;
+       void            (*func)(int argc, const char ** argv);
+} hpet_command[] = {
+       {
+               "open-close",
+               hpet_open_close
+       },
+       {
+               "info",
+               hpet_info
+       },
+       {
+               "poll",
+               hpet_poll
+       },
+       {
+               "fasync",
+               hpet_fasync
+       },
+};
+
+int
+main(int argc, const char ** argv)
+{
+       unsigned int    i;
+
+       argc--;
+       argv++;
+
+       if (!argc) {
+               fprintf(stderr, "-hpet: requires command\n");
+               return -1;
+       }
+
+
+       for (i = 0; i < (sizeof (hpet_command) / sizeof (hpet_command[0])); i++)
+               if (!strcmp(argv[0], hpet_command[i].command)) {
+                       argc--;
+                       argv++;
+                       fprintf(stderr, "-hpet: executing %s\n",
+                               hpet_command[i].command);
+                       hpet_command[i].func(argc, argv);
+                       return 0;
+               }
+
+       fprintf(stderr, "do_hpet: command %s not implemented\n", argv[0]);
+
+       return -1;
+}
+
+void
+hpet_open_close(int argc, const char **argv)
+{
+       int     fd;
+
+       if (argc != 1) {
+               fprintf(stderr, "hpet_open_close: device-name\n");
+               return;
+       }
+
+       fd = open(argv[0], O_RDONLY);
+       if (fd < 0)
+               fprintf(stderr, "hpet_open_close: open failed\n");
+       else
+               close(fd);
+
+       return;
+}
+
+void
+hpet_info(int argc, const char **argv)
+{
+       struct hpet_info        info;
+       int                     fd;
+
+       if (argc != 1) {
+               fprintf(stderr, "hpet_info: device-name\n");
+               return;
+       }
+
+       fd = open(argv[0], O_RDONLY);
+       if (fd < 0) {
+               fprintf(stderr, "hpet_info: open of %s failed\n", argv[0]);
+               return;
+       }
+
+       if (ioctl(fd, HPET_INFO, &info) < 0) {
+               fprintf(stderr, "hpet_info: failed to get info\n");
+               goto out;
+       }
+
+       fprintf(stderr, "hpet_info: hi_irqfreq 0x%lx hi_flags 0x%lx ",
+               info.hi_ireqfreq, info.hi_flags);
+       fprintf(stderr, "hi_hpet %d hi_timer %d\n",
+               info.hi_hpet, info.hi_timer);
+
+out:
+       close(fd);
+       return;
+}
+
+void
+hpet_poll(int argc, const char **argv)
+{
+       unsigned long           freq;
+       int                     iterations, i, fd;
+       struct pollfd           pfd;
+       struct hpet_info        info;
+       struct timeval          stv, etv;
+       struct timezone         tz;
+       long                    usec;
+
+       if (argc != 3) {
+               fprintf(stderr, "hpet_poll: device-name freq iterations\n");
+               return;
+       }
+
+       freq = atoi(argv[1]);
+       iterations = atoi(argv[2]);
+
+       fd = open(argv[0], O_RDONLY);
+
+       if (fd < 0) {
+               fprintf(stderr, "hpet_poll: open of %s failed\n", argv[0]);
+               return;
+       }
+
+       if (ioctl(fd, HPET_IRQFREQ, freq) < 0) {
+               fprintf(stderr, "hpet_poll: HPET_IRQFREQ failed\n");
+               goto out;
+       }
+
+       if (ioctl(fd, HPET_INFO, &info) < 0) {
+               fprintf(stderr, "hpet_poll: failed to get info\n");
+               goto out;
+       }
+
+       fprintf(stderr, "hpet_poll: info.hi_flags 0x%lx\n", info.hi_flags);
+
+       if (info.hi_flags && (ioctl(fd, HPET_EPI, 0) < 0)) {
+               fprintf(stderr, "hpet_poll: HPET_EPI failed\n");
+               goto out;
+       }
+
+       if (ioctl(fd, HPET_IE_ON, 0) < 0) {
+               fprintf(stderr, "hpet_poll, HPET_IE_ON failed\n");
+               goto out;
+       }
+
+       pfd.fd = fd;
+       pfd.events = POLLIN;
+
+       for (i = 0; i < iterations; i++) {
+               pfd.revents = 0;
+               gettimeofday(&stv, &tz);
+               if (poll(&pfd, 1, -1) < 0)
+                       fprintf(stderr, "hpet_poll: poll failed\n");
+               else {
+                       long    data;
+
+                       gettimeofday(&etv, &tz);
+                       usec = stv.tv_sec * 1000000 + stv.tv_usec;
+                       usec = (etv.tv_sec * 1000000 + etv.tv_usec) - usec;
+
+                       fprintf(stderr,
+                               "hpet_poll: expired time = 0x%lx\n", usec);
+
+                       fprintf(stderr, "hpet_poll: revents = 0x%x\n",
+                               pfd.revents);
+
+                       if (read(fd, &data, sizeof(data)) != sizeof(data)) {
+                               fprintf(stderr, "hpet_poll: read failed\n");
+                       }
+                       else
+                               fprintf(stderr, "hpet_poll: data 0x%lx\n",
+                                       data);
+               }
+       }
+
+out:
+       close(fd);
+       return;
+}
+
+static int hpet_sigio_count;
+
+static void
+hpet_sigio(int val)
+{
+       fprintf(stderr, "hpet_sigio: called\n");
+       hpet_sigio_count++;
+}
+
+void
+hpet_fasync(int argc, const char **argv)
+{
+       unsigned long           freq;
+       int                     iterations, i, fd, value;
+       sig_t                   oldsig;
+       struct hpet_info        info;
+
+       hpet_sigio_count = 0;
+       fd = -1;
+
+       if ((oldsig = signal(SIGIO, hpet_sigio)) == SIG_ERR) {
+               fprintf(stderr, "hpet_fasync: failed to set signal handler\n");
+               return;
+       }
+
+       if (argc != 3) {
+               fprintf(stderr, "hpet_fasync: device-name freq iterations\n");
+               goto out;
+       }
+
+       fd = open(argv[0], O_RDONLY);
+
+       if (fd < 0) {
+               fprintf(stderr, "hpet_fasync: failed to open %s\n", argv[0]);
+               return;
+       }
+
+
+       if ((fcntl(fd, F_SETOWN, getpid()) == 1) ||
+               ((value = fcntl(fd, F_GETFL)) == 1) ||
+               (fcntl(fd, F_SETFL, value | O_ASYNC) == 1)) {
+               fprintf(stderr, "hpet_fasync: fcntl failed\n");
+               goto out;
+       }
+
+       freq = atoi(argv[1]);
+       iterations = atoi(argv[2]);
+
+       if (ioctl(fd, HPET_IRQFREQ, freq) < 0) {
+               fprintf(stderr, "hpet_fasync: HPET_IRQFREQ failed\n");
+               goto out;
+       }
+
+       if (ioctl(fd, HPET_INFO, &info) < 0) {
+               fprintf(stderr, "hpet_fasync: failed to get info\n");
+               goto out;
+       }
+
+       fprintf(stderr, "hpet_fasync: info.hi_flags 0x%lx\n", info.hi_flags);
+
+       if (info.hi_flags && (ioctl(fd, HPET_EPI, 0) < 0)) {
+               fprintf(stderr, "hpet_fasync: HPET_EPI failed\n");
+               goto out;
+       }
+
+       if (ioctl(fd, HPET_IE_ON, 0) < 0) {
+               fprintf(stderr, "hpet_fasync, HPET_IE_ON failed\n");
+               goto out;
+       }
+
+       for (i = 0; i < iterations; i++) {
+               (void) pause();
+               fprintf(stderr, "hpet_fasync: count = %d\n", hpet_sigio_count);
+       }
+
+out:
+       signal(SIGIO, oldsig);
+
+       if (fd >= 0)
+               close(fd);
+
+       return;
+}
diff --git a/samples/watchdog/.gitignore b/samples/watchdog/.gitignore
new file mode 100644 (file)
index 0000000..ff0ebb5
--- /dev/null
@@ -0,0 +1 @@
+watchdog-simple
diff --git a/samples/watchdog/Makefile b/samples/watchdog/Makefile
new file mode 100644 (file)
index 0000000..9b53d89
--- /dev/null
@@ -0,0 +1,8 @@
+CC := $(CROSS_COMPILE)gcc
+PROGS := watchdog-simple
+
+all: $(PROGS)
+
+clean:
+       rm -fr $(PROGS)
+
diff --git a/samples/watchdog/watchdog-simple.c b/samples/watchdog/watchdog-simple.c
new file mode 100644 (file)
index 0000000..ba45803
--- /dev/null
@@ -0,0 +1,24 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+int main(void)
+{
+       int fd = open("/dev/watchdog", O_WRONLY);
+       int ret = 0;
+       if (fd == -1) {
+               perror("watchdog");
+               exit(EXIT_FAILURE);
+       }
+       while (1) {
+               ret = write(fd, "\0", 1);
+               if (ret != 1) {
+                       ret = -1;
+                       break;
+               }
+               sleep(10);
+       }
+       close(fd);
+       return ret;
+}
index 11602e5efb3bcdf1d300315a4501b8ce3faaa78c..7675d11ee65e6d41e353debcd2fc51abff9a5dfa 100644 (file)
@@ -81,6 +81,7 @@ endif
 
 ifneq ($(strip $(lib-y) $(lib-m) $(lib-)),)
 lib-target := $(obj)/lib.a
+obj-y += $(obj)/lib-ksyms.o
 endif
 
 ifneq ($(strip $(obj-y) $(obj-m) $(obj-) $(subdir-m) $(lib-target)),)
@@ -158,7 +159,8 @@ cmd_cpp_i_c       = $(CPP) $(c_flags) -o $@ $<
 $(obj)/%.i: $(src)/%.c FORCE
        $(call if_changed_dep,cpp_i_c)
 
-cmd_gensymtypes =                                                           \
+# These mirror gensymtypes_S and co below, keep them in synch.
+cmd_gensymtypes_c =                                                         \
     $(CPP) -D__GENKSYMS__ $(c_flags) $< |                                   \
     $(GENKSYMS) $(if $(1), -T $(2))                                         \
      $(patsubst y,-s _,$(CONFIG_HAVE_UNDERSCORE_SYMBOL_PREFIX))             \
@@ -168,7 +170,7 @@ cmd_gensymtypes =                                                           \
 quiet_cmd_cc_symtypes_c = SYM $(quiet_modtag) $@
 cmd_cc_symtypes_c =                                                         \
     set -e;                                                                 \
-    $(call cmd_gensymtypes,true,$@) >/dev/null;                             \
+    $(call cmd_gensymtypes_c,true,$@) >/dev/null;                           \
     test -s $@ || rm -f $@
 
 $(obj)/%.symtypes : $(src)/%.c FORCE
@@ -197,9 +199,10 @@ else
 #   the actual value of the checksum generated by genksyms
 
 cmd_cc_o_c = $(CC) $(c_flags) -c -o $(@D)/.tmp_$(@F) $<
-cmd_modversions =                                                              \
+
+cmd_modversions_c =                                                            \
        if $(OBJDUMP) -h $(@D)/.tmp_$(@F) | grep -q __ksymtab; then             \
-               $(call cmd_gensymtypes,$(KBUILD_SYMTYPES),$(@:.o=.symtypes))    \
+               $(call cmd_gensymtypes_c,$(KBUILD_SYMTYPES),$(@:.o=.symtypes))  \
                    > $(@D)/.tmp_$(@F:.o=.ver);                                 \
                                                                                \
                $(LD) $(LDFLAGS) -r -o $@ $(@D)/.tmp_$(@F)                      \
@@ -267,13 +270,14 @@ endif # CONFIG_STACK_VALIDATION
 define rule_cc_o_c
        $(call echo-cmd,checksrc) $(cmd_checksrc)                         \
        $(call cmd_and_fixdep,cc_o_c)                                     \
-       $(cmd_modversions)                                                \
+       $(cmd_modversions_c)                                              \
        $(cmd_objtool)                                                    \
        $(call echo-cmd,record_mcount) $(cmd_record_mcount)
 endef
 
 define rule_as_o_S
        $(call cmd_and_fixdep,as_o_S)                                     \
+       $(cmd_modversions_S)                                              \
        $(cmd_objtool)
 endef
 
@@ -313,6 +317,39 @@ modkern_aflags := $(KBUILD_AFLAGS_KERNEL) $(AFLAGS_KERNEL)
 $(real-objs-m)      : modkern_aflags := $(KBUILD_AFLAGS_MODULE) $(AFLAGS_MODULE)
 $(real-objs-m:.o=.s): modkern_aflags := $(KBUILD_AFLAGS_MODULE) $(AFLAGS_MODULE)
 
+# .S file exports must have their C prototypes defined in asm/asm-prototypes.h
+# or a file that it includes, in order to get versioned symbols. We build a
+# dummy C file that includes asm-prototypes and the EXPORT_SYMBOL lines from
+# the .S file (with trailing ';'), and run genksyms on that, to extract vers.
+#
+# This is convoluted. The .S file must first be preprocessed to run guards and
+# expand names, then the resulting exports must be constructed into plain
+# EXPORT_SYMBOL(symbol); to build our dummy C file, and that gets preprocessed
+# to make the genksyms input.
+#
+# These mirror gensymtypes_c and co above, keep them in synch.
+cmd_gensymtypes_S =                                                         \
+    (echo "\#include <linux/kernel.h>" ;                                    \
+     echo "\#include <asm/asm-prototypes.h>" ;                              \
+    $(CPP) $(a_flags) $< |                                                  \
+     grep "\<___EXPORT_SYMBOL\>" |                                          \
+     sed 's/.*___EXPORT_SYMBOL[[:space:]]*\([a-zA-Z0-9_]*\)[[:space:]]*,.*/EXPORT_SYMBOL(\1);/' ) | \
+    $(CPP) -D__GENKSYMS__ $(c_flags) -xc - |                                \
+    $(GENKSYMS) $(if $(1), -T $(2))                                         \
+     $(patsubst y,-s _,$(CONFIG_HAVE_UNDERSCORE_SYMBOL_PREFIX))             \
+     $(if $(KBUILD_PRESERVE),-p)                                            \
+     -r $(firstword $(wildcard $(2:.symtypes=.symref) /dev/null))
+
+quiet_cmd_cc_symtypes_S = SYM $(quiet_modtag) $@
+cmd_cc_symtypes_S =                                                         \
+    set -e;                                                                 \
+    $(call cmd_gensymtypes_S,true,$@) >/dev/null;                           \
+    test -s $@ || rm -f $@
+
+$(obj)/%.symtypes : $(src)/%.S FORCE
+       $(call cmd,cc_symtypes_S)
+
+
 quiet_cmd_cpp_s_S = CPP $(quiet_modtag) $@
 cmd_cpp_s_S       = $(CPP) $(a_flags) -o $@ $<
 
@@ -320,7 +357,37 @@ $(obj)/%.s: $(src)/%.S FORCE
        $(call if_changed_dep,cpp_s_S)
 
 quiet_cmd_as_o_S = AS $(quiet_modtag)  $@
-cmd_as_o_S       = $(CC) $(a_flags) -c -o $@ $<
+
+ifndef CONFIG_MODVERSIONS
+cmd_as_o_S = $(CC) $(a_flags) -c -o $@ $<
+
+else
+
+ASM_PROTOTYPES := $(wildcard $(srctree)/arch/$(SRCARCH)/include/asm/asm-prototypes.h)
+
+ifeq ($(ASM_PROTOTYPES),)
+cmd_as_o_S = $(CC) $(a_flags) -c -o $@ $<
+
+else
+
+# versioning matches the C process described above, with difference that
+# we parse asm-prototypes.h C header to get function definitions.
+
+cmd_as_o_S = $(CC) $(a_flags) -c -o $(@D)/.tmp_$(@F) $<
+
+cmd_modversions_S =                                                            \
+       if $(OBJDUMP) -h $(@D)/.tmp_$(@F) | grep -q __ksymtab; then             \
+               $(call cmd_gensymtypes_S,$(KBUILD_SYMTYPES),$(@:.o=.symtypes))  \
+                   > $(@D)/.tmp_$(@F:.o=.ver);                                 \
+                                                                               \
+               $(LD) $(LDFLAGS) -r -o $@ $(@D)/.tmp_$(@F)                      \
+                       -T $(@D)/.tmp_$(@F:.o=.ver);                            \
+               rm -f $(@D)/.tmp_$(@F) $(@D)/.tmp_$(@F:.o=.ver);                \
+       else                                                                    \
+               mv -f $(@D)/.tmp_$(@F) $@;                                      \
+       fi;
+endif
+endif
 
 $(obj)/%.o: $(src)/%.S $(objtool_obj) FORCE
        $(call if_changed_rule,as_o_S)
@@ -358,12 +425,22 @@ $(sort $(subdir-obj-y)): $(subdir-ym) ;
 # Rule to compile a set of .o files into one .o file
 #
 ifdef builtin-target
-quiet_cmd_link_o_target = LD      $@
+
+ifdef CONFIG_THIN_ARCHIVES
+  cmd_make_builtin = rm -f $@; $(AR) rcST$(KBUILD_ARFLAGS)
+  cmd_make_empty_builtin = rm -f $@; $(AR) rcST$(KBUILD_ARFLAGS)
+  quiet_cmd_link_o_target = AR      $@
+else
+  cmd_make_builtin = $(LD) $(ld_flags) -r -o
+  cmd_make_empty_builtin = rm -f $@; $(AR) rcs$(KBUILD_ARFLAGS)
+  quiet_cmd_link_o_target = LD      $@
+endif
+
 # If the list of objects to link is empty, just create an empty built-in.o
 cmd_link_o_target = $(if $(strip $(obj-y)),\
-                     $(LD) $(ld_flags) -r -o $@ $(filter $(obj-y), $^) \
+                     $(cmd_make_builtin) $@ $(filter $(obj-y), $^) \
                      $(cmd_secanalysis),\
-                     rm -f $@; $(AR) rcs$(KBUILD_ARFLAGS) $@)
+                     $(cmd_make_empty_builtin) $@)
 
 $(builtin-target): $(obj-y) FORCE
        $(call if_changed,link_o_target)
@@ -389,12 +466,39 @@ $(modorder-target): $(subdir-ym) FORCE
 #
 ifdef lib-target
 quiet_cmd_link_l_target = AR      $@
-cmd_link_l_target = rm -f $@; $(AR) rcs$(KBUILD_ARFLAGS) $@ $(lib-y)
+
+ifdef CONFIG_THIN_ARCHIVES
+  cmd_link_l_target = rm -f $@; $(AR) rcsT$(KBUILD_ARFLAGS) $@ $(lib-y)
+else
+  cmd_link_l_target = rm -f $@; $(AR) rcs$(KBUILD_ARFLAGS) $@ $(lib-y)
+endif
 
 $(lib-target): $(lib-y) FORCE
        $(call if_changed,link_l_target)
 
 targets += $(lib-target)
+
+dummy-object = $(obj)/.lib_exports.o
+ksyms-lds = $(dot-target).lds
+ifdef CONFIG_HAVE_UNDERSCORE_SYMBOL_PREFIX
+ref_prefix = EXTERN(_
+else
+ref_prefix = EXTERN(
+endif
+
+quiet_cmd_export_list = EXPORTS $@
+cmd_export_list = $(OBJDUMP) -h $< | \
+       sed -ne '/___ksymtab/{s/.*+/$(ref_prefix)/;s/ .*/)/;p}' >$(ksyms-lds);\
+       rm -f $(dummy-object);\
+       $(AR) rcs$(KBUILD_ARFLAGS) $(dummy-object);\
+       $(LD) $(ld_flags) -r -o $@ -T $(ksyms-lds) $(dummy-object);\
+       rm $(dummy-object) $(ksyms-lds)
+
+$(obj)/lib-ksyms.o: $(lib-target) FORCE
+       $(call if_changed,export_list)
+
+targets += $(obj)/lib-ksyms.o
+
 endif
 
 #
index 53449a6ff6aa7de3c5c868f0645f6000b6f551e6..7c321a603b079d355bd127aebb04adc0294b4ba5 100644 (file)
@@ -36,6 +36,7 @@ warning-2 += -Wshadow
 warning-2 += $(call cc-option, -Wlogical-op)
 warning-2 += $(call cc-option, -Wmissing-field-initializers)
 warning-2 += $(call cc-option, -Wsign-compare)
+warning-2 += $(call cc-option, -Wmaybe-uninitialized)
 
 warning-3 := -Wbad-function-cast
 warning-3 += -Wcast-qual
index 61f0e6db909bbf0a7016d60f4b58a4c4bd3c0dba..060d2cb373dbe3fe788b7ad01b7868eca3b73166 100644 (file)
@@ -6,6 +6,12 @@ ifdef CONFIG_GCC_PLUGINS
 
   gcc-plugin-$(CONFIG_GCC_PLUGIN_CYC_COMPLEXITY)       += cyc_complexity_plugin.so
 
+  gcc-plugin-$(CONFIG_GCC_PLUGIN_LATENT_ENTROPY)       += latent_entropy_plugin.so
+  gcc-plugin-cflags-$(CONFIG_GCC_PLUGIN_LATENT_ENTROPY)        += -DLATENT_ENTROPY_PLUGIN
+  ifdef CONFIG_PAX_LATENT_ENTROPY
+    DISABLE_LATENT_ENTROPY_PLUGIN                      += -fplugin-arg-latent_entropy_plugin-disable
+  endif
+
   ifdef CONFIG_GCC_PLUGIN_SANCOV
     ifeq ($(CFLAGS_KCOV),)
       # It is needed because of the gcc-plugin.sh and gcc version checks.
@@ -21,7 +27,8 @@ ifdef CONFIG_GCC_PLUGINS
 
   GCC_PLUGINS_CFLAGS := $(strip $(addprefix -fplugin=$(objtree)/scripts/gcc-plugins/, $(gcc-plugin-y)) $(gcc-plugin-cflags-y))
 
-  export PLUGINCC GCC_PLUGINS_CFLAGS GCC_PLUGIN GCC_PLUGIN_SUBDIR SANCOV_PLUGIN
+  export PLUGINCC GCC_PLUGINS_CFLAGS GCC_PLUGIN GCC_PLUGIN_SUBDIR
+  export SANCOV_PLUGIN DISABLE_LATENT_ENTROPY_PLUGIN
 
   ifneq ($(PLUGINCC),)
     # SANCOV_PLUGIN can be only in CFLAGS_KCOV because avoid duplication.
index 1366a94b6c39565f094f2907e61a21464e0897fe..16923ba4b5b1005158508b4e0c9d2b3d947c119c 100644 (file)
@@ -115,14 +115,18 @@ $(modules:.ko=.mod.o): %.mod.o: %.mod.c FORCE
 
 targets += $(modules:.ko=.mod.o)
 
-# Step 6), final link of the modules
+ARCH_POSTLINK := $(wildcard $(srctree)/arch/$(SRCARCH)/Makefile.postlink)
+
+# Step 6), final link of the modules with optional arch pass after final link
 quiet_cmd_ld_ko_o = LD [M]  $@
-      cmd_ld_ko_o = $(LD) -r $(LDFLAGS)                                 \
-                             $(KBUILD_LDFLAGS_MODULE) $(LDFLAGS_MODULE) \
-                             -o $@ $(filter-out FORCE,$^)
+      cmd_ld_ko_o =                                                     \
+       $(LD) -r $(LDFLAGS)                                             \
+                 $(KBUILD_LDFLAGS_MODULE) $(LDFLAGS_MODULE)             \
+                 -o $@ $(filter-out FORCE,$^) ;                         \
+       $(if $(ARCH_POSTLINK), $(MAKE) -f $(ARCH_POSTLINK) $@, true)
 
 $(modules): %.ko :%.o %.mod.o FORCE
-       $(call if_changed,ld_ko_o)
+       +$(call if_changed,ld_ko_o)
 
 targets += $(modules)
 
index dd779c40c8e6af713c0ad7039bc929b0c6c2944a..3b1b13818d594f9ffa9dcb15206314ab349325a7 100644 (file)
@@ -17,4 +17,8 @@ endif
 ifdef CONFIG_UBSAN_NULL
       CFLAGS_UBSAN += $(call cc-option, -fsanitize=null)
 endif
+
+      # -fsanitize=* options makes GCC less smart than usual and
+      # increase number of 'maybe-uninitialized false-positives
+      CFLAGS_UBSAN += $(call cc-option, -Wno-maybe-uninitialized)
 endif
index 746ec1ece6143fab9eb4a667f521c19fbabdc67c..fff818b92acb7e3e469ce06cc722f3ab0f275171 100644 (file)
@@ -82,8 +82,7 @@
  * to date before even starting the recursive build, so it's too late
  * at this point anyway.
  *
- * The algorithm to grep for "CONFIG_..." is bit unusual, but should
- * be fast ;-) We don't even try to really parse the header files, but
+ * We don't even try to really parse the header files, but
  * merely grep, i.e. if CONFIG_FOO is mentioned in a comment, it will
  * be picked up as well. It's not a problem with respect to
  * correctness, since that can only give too many dependencies, thus
 #include <ctype.h>
 #include <arpa/inet.h>
 
-#define INT_CONF ntohl(0x434f4e46)
-#define INT_ONFI ntohl(0x4f4e4649)
-#define INT_NFIG ntohl(0x4e464947)
-#define INT_FIG_ ntohl(0x4649475f)
-
 int insert_extra_deps;
 char *target;
 char *depfile;
@@ -241,37 +235,22 @@ static void use_config(const char *m, int slen)
        print_config(m, slen);
 }
 
-static void parse_config_file(const char *map, size_t len)
+static void parse_config_file(const char *p)
 {
-       const int *end = (const int *) (map + len);
-       /* start at +1, so that p can never be < map */
-       const int *m   = (const int *) map + 1;
-       const char *p, *q;
-
-       for (; m < end; m++) {
-               if (*m == INT_CONF) { p = (char *) m  ; goto conf; }
-               if (*m == INT_ONFI) { p = (char *) m-1; goto conf; }
-               if (*m == INT_NFIG) { p = (char *) m-2; goto conf; }
-               if (*m == INT_FIG_) { p = (char *) m-3; goto conf; }
-               continue;
-       conf:
-               if (p > map + len - 7)
-                       continue;
-               if (memcmp(p, "CONFIG_", 7))
-                       continue;
+       const char *q, *r;
+
+       while ((p = strstr(p, "CONFIG_"))) {
                p += 7;
-               for (q = p; q < map + len; q++) {
-                       if (!(isalnum(*q) || *q == '_'))
-                               goto found;
-               }
-               continue;
-
-       found:
-               if (!memcmp(q - 7, "_MODULE", 7))
-                       q -= 7;
-               if (q - p < 0)
-                       continue;
-               use_config(p, q - p);
+               q = p;
+               while (*q && (isalnum(*q) || *q == '_'))
+                       q++;
+               if (memcmp(q - 7, "_MODULE", 7) == 0)
+                       r = q - 7;
+               else
+                       r = q;
+               if (r > p)
+                       use_config(p, r - p);
+               p = q;
        }
 }
 
@@ -291,7 +270,7 @@ static void do_config_file(const char *filename)
 {
        struct stat st;
        int fd;
-       void *map;
+       char *map;
 
        fd = open(filename, O_RDONLY);
        if (fd < 0) {
@@ -308,18 +287,23 @@ static void do_config_file(const char *filename)
                close(fd);
                return;
        }
-       map = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
-       if ((long) map == -1) {
-               perror("fixdep: mmap");
+       map = malloc(st.st_size + 1);
+       if (!map) {
+               perror("fixdep: malloc");
                close(fd);
                return;
        }
+       if (read(fd, map, st.st_size) != st.st_size) {
+               perror("fixdep: read");
+               close(fd);
+               return;
+       }
+       map[st.st_size] = '\0';
+       close(fd);
 
-       parse_config_file(map, st.st_size);
-
-       munmap(map, st.st_size);
+       parse_config_file(map);
 
-       close(fd);
+       free(map);
 }
 
 /*
@@ -446,22 +430,8 @@ static void print_deps(void)
        close(fd);
 }
 
-static void traps(void)
-{
-       static char test[] __attribute__((aligned(sizeof(int)))) = "CONF";
-       int *p = (int *)test;
-
-       if (*p != INT_CONF) {
-               fprintf(stderr, "fixdep: sizeof(int) != 4 or wrong endianness? %#x\n",
-                       *p);
-               exit(2);
-       }
-}
-
 int main(int argc, char *argv[])
 {
-       traps();
-
        if (argc == 5 && !strcmp(argv[1], "-e")) {
                insert_extra_deps = 1;
                argv++;
index 19f5adfd877dcf9b7e9c5b63c796ca72ee06dccf..d9ff038c1b28400799462dad5e29335b092b9a1a 100755 (executable)
@@ -8,6 +8,9 @@
 # of the GNU General Public License, incorporated herein by reference.
 
 import sys, os, re
+from signal import signal, SIGPIPE, SIG_DFL
+
+signal(SIGPIPE, SIG_DFL)
 
 if len(sys.argv) != 3:
     sys.stderr.write("usage: %s file1 file2\n" % sys.argv[0])
index c92c1528a54dd54589878df99c2d9bb8dd4c09aa..ec487b8e7051e4c0cee49a800a3d63223a7acf9c 100755 (executable)
@@ -1,7 +1,7 @@
 #!/bin/bash
 # Linux kernel coccicheck
 #
-# Read Documentation/coccinelle.txt
+# Read Documentation/dev-tools/coccinelle.rst
 #
 # This script requires at least spatch
 # version 1.0.0-rc11.
index c606231b0e4695683327bdd92132879dd1113bbe..2a5aea8e8487cdde856b18a948deb91241ea6bc6 100644 (file)
@@ -15,11 +15,11 @@ virtual org
 virtual report
 
 @depends on patch@
-expression from,to,size,flag;
+expression from,to,size;
 identifier l1,l2;
 @@
 
--  to = \(kmalloc\|kzalloc\)(size,flag);
+-  to = \(kmalloc\|kzalloc\)(size,GFP_KERNEL);
 +  to = memdup_user(from,size);
    if (
 -      to==NULL
@@ -37,12 +37,12 @@ identifier l1,l2;
 -  }
 
 @r depends on !patch@
-expression from,to,size,flag;
+expression from,to,size;
 position p;
 statement S1,S2;
 @@
 
-*  to = \(kmalloc@p\|kzalloc@p\)(size,flag);
+*  to = \(kmalloc@p\|kzalloc@p\)(size,GFP_KERNEL);
    if (to==NULL || ...) S1
    if (copy_from_user(to, from, size) != 0)
    S2
index 89b98a2f7a6f46d74863701ed49f8f60455731ed..d67ccf5f822797928151bf3d659d3e28661e4c3a 100644 (file)
@@ -17,9 +17,10 @@ virtual report
 
 @runtime_bad_err_handle exists@
 expression ret;
+position p;
 @@
 (
-ret = \(pm_runtime_idle\|
+ret@p = \(pm_runtime_idle\|
        pm_runtime_suspend\|
        pm_runtime_autosuspend\|
        pm_runtime_resume\|
@@ -47,12 +48,13 @@ IS_ERR_VALUE(ret)
 //  For context mode
 //----------------------------------------------------------
 
-@depends on runtime_bad_err_handle && context@
+@depends on context@
 identifier pm_runtime_api;
 expression ret;
+position runtime_bad_err_handle.p;
 @@
 (
-ret = pm_runtime_api(...);
+ret@p = pm_runtime_api(...);
 ...
 * IS_ERR_VALUE(ret)
 ...
@@ -62,12 +64,13 @@ ret = pm_runtime_api(...);
 //  For patch mode
 //----------------------------------------------------------
 
-@depends on runtime_bad_err_handle && patch@
+@depends on patch@
 identifier pm_runtime_api;
 expression ret;
+position runtime_bad_err_handle.p;
 @@
 (
-ret = pm_runtime_api(...);
+ret@p = pm_runtime_api(...);
 ...
 - IS_ERR_VALUE(ret)
 + ret < 0
@@ -78,13 +81,14 @@ ret = pm_runtime_api(...);
 //  For org and report mode
 //----------------------------------------------------------
 
-@r depends on runtime_bad_err_handle && (org || report) exists@
+@r depends on (org || report) exists@
 position p1, p2;
 identifier pm_runtime_api;
 expression ret;
+position runtime_bad_err_handle.p;
 @@
 (
-ret = pm_runtime_api@p1(...);
+ret@p = pm_runtime_api@p1(...);
 ...
 IS_ERR_VALUE@p2(ret)
 ...
diff --git a/scripts/coccinelle/misc/cond_no_effect.cocci b/scripts/coccinelle/misc/cond_no_effect.cocci
new file mode 100644 (file)
index 0000000..8467dbd
--- /dev/null
@@ -0,0 +1,64 @@
+///Find conditions where if and else branch are functionally
+// identical.
+//
+// There can be false positives in cases where the positional
+// information is used (as with lockdep) or where the identity
+// is a placeholder for not yet handled cases.
+// Unfortunately there also seems to be a tendency to use
+// the last if else/else as a "default behavior" - which some
+// might consider a legitimate coding pattern. From discussion
+// on kernelnewbies though it seems that this is not really an
+// accepted pattern and if at all it would need to be commented
+//
+// In the Linux kernel it does not seem to actually report
+// false positives except for those that were documented as
+// being intentional.
+// the two known cases are:
+//   arch/sh/kernel/traps_64.c:read_opcode()
+//        } else if ((pc & 1) == 0) {
+//              /* SHcompact */
+//              /* TODO : provide handling for this.  We don't really support
+//                 user-mode SHcompact yet, and for a kernel fault, this would
+//                 have to come from a module built for SHcompact.  */
+//              return -EFAULT;
+//      } else {
+//              /* misaligned */
+//              return -EFAULT;
+//      }
+//   fs/kernfs/file.c:kernfs_fop_open()
+//       * Both paths of the branch look the same.  They're supposed to
+//       * look that way and give @of->mutex different static lockdep keys.
+//       */
+//      if (has_mmap)
+//              mutex_init(&of->mutex);
+//      else
+//              mutex_init(&of->mutex);
+//
+// All other cases look like bugs or at least lack of documentation
+//
+// Confidence: Moderate
+// Copyright: (C) 2016 Nicholas Mc Guire, OSADL.  GPLv2.
+// Comments:
+// Options: --no-includes --include-headers
+
+virtual org
+virtual report
+
+@cond@
+statement S1;
+position p;
+@@
+
+* if@p (...) S1 else S1
+
+@script:python depends on org@
+p << cond.p;
+@@
+
+cocci.print_main("WARNING: possible condition with no effect (if == else)",p)
+
+@script:python depends on report@
+p << cond.p;
+@@
+
+coccilib.report.print_report(p[0],"WARNING: possible condition with no effect (if == else)")
index 34df974c6ba3a586435dc0ce313a3986d0031d6c..8af7db06122d21b91667755767d0967824b05694 100644 (file)
@@ -20,7 +20,7 @@
 
 #include "gcc-common.h"
 
-int plugin_is_GPL_compatible;
+__visible int plugin_is_GPL_compatible;
 
 static struct plugin_info cyc_complexity_plugin_info = {
        .version        = "20160225",
@@ -49,7 +49,7 @@ static unsigned int cyc_complexity_execute(void)
 
 #include "gcc-generate-gimple-pass.h"
 
-int plugin_init(struct plugin_name_args *plugin_info, struct plugin_gcc_version *version)
+__visible int plugin_init(struct plugin_name_args *plugin_info, struct plugin_gcc_version *version)
 {
        const char * const plugin_name = plugin_info->base_name;
        struct register_pass_info cyc_complexity_pass_info;
index 172850bcd0d9f0eefe6b575f6417ed2397889ccf..950fd2e64bb73b9f261188bba272ea7e7ec10249 100644 (file)
@@ -130,6 +130,7 @@ extern void dump_gimple_stmt(pretty_printer *, gimple, int, int);
 #endif
 
 #define __unused __attribute__((__unused__))
+#define __visible __attribute__((visibility("default")))
 
 #define DECL_NAME_POINTER(node) IDENTIFIER_POINTER(DECL_NAME(node))
 #define DECL_NAME_LENGTH(node) IDENTIFIER_LENGTH(DECL_NAME(node))
diff --git a/scripts/gcc-plugins/latent_entropy_plugin.c b/scripts/gcc-plugins/latent_entropy_plugin.c
new file mode 100644 (file)
index 0000000..8160f1c
--- /dev/null
@@ -0,0 +1,639 @@
+/*
+ * Copyright 2012-2016 by the PaX Team <pageexec@freemail.hu>
+ * Copyright 2016 by Emese Revfy <re.emese@gmail.com>
+ * Licensed under the GPL v2
+ *
+ * Note: the choice of the license means that the compilation process is
+ *       NOT 'eligible' as defined by gcc's library exception to the GPL v3,
+ *       but for the kernel it doesn't matter since it doesn't link against
+ *       any of the gcc libraries
+ *
+ * This gcc plugin helps generate a little bit of entropy from program state,
+ * used throughout the uptime of the kernel. Here is an instrumentation example:
+ *
+ * before:
+ * void __latent_entropy test(int argc, char *argv[])
+ * {
+ *     if (argc <= 1)
+ *             printf("%s: no command arguments :(\n", *argv);
+ *     else
+ *             printf("%s: %d command arguments!\n", *argv, args - 1);
+ * }
+ *
+ * after:
+ * void __latent_entropy test(int argc, char *argv[])
+ * {
+ *     // latent_entropy_execute() 1.
+ *     unsigned long local_entropy;
+ *     // init_local_entropy() 1.
+ *     void *local_entropy_frameaddr;
+ *     // init_local_entropy() 3.
+ *     unsigned long tmp_latent_entropy;
+ *
+ *     // init_local_entropy() 2.
+ *     local_entropy_frameaddr = __builtin_frame_address(0);
+ *     local_entropy = (unsigned long) local_entropy_frameaddr;
+ *
+ *     // init_local_entropy() 4.
+ *     tmp_latent_entropy = latent_entropy;
+ *     // init_local_entropy() 5.
+ *     local_entropy ^= tmp_latent_entropy;
+ *
+ *     // latent_entropy_execute() 3.
+ *     if (argc <= 1) {
+ *             // perturb_local_entropy()
+ *             local_entropy += 4623067384293424948;
+ *             printf("%s: no command arguments :(\n", *argv);
+ *             // perturb_local_entropy()
+ *     } else {
+ *             local_entropy ^= 3896280633962944730;
+ *             printf("%s: %d command arguments!\n", *argv, args - 1);
+ *     }
+ *
+ *     // latent_entropy_execute() 4.
+ *     tmp_latent_entropy = rol(tmp_latent_entropy, local_entropy);
+ *     latent_entropy = tmp_latent_entropy;
+ * }
+ *
+ * TODO:
+ * - add ipa pass to identify not explicitly marked candidate functions
+ * - mix in more program state (function arguments/return values,
+ *   loop variables, etc)
+ * - more instrumentation control via attribute parameters
+ *
+ * BUGS:
+ * - none known
+ *
+ * Options:
+ * -fplugin-arg-latent_entropy_plugin-disable
+ *
+ * Attribute: __attribute__((latent_entropy))
+ *  The latent_entropy gcc attribute can be only on functions and variables.
+ *  If it is on a function then the plugin will instrument it. If the attribute
+ *  is on a variable then the plugin will initialize it with a random value.
+ *  The variable must be an integer, an integer array type or a structure
+ *  with integer fields.
+ */
+
+#include "gcc-common.h"
+
+__visible int plugin_is_GPL_compatible;
+
+static GTY(()) tree latent_entropy_decl;
+
+static struct plugin_info latent_entropy_plugin_info = {
+       .version        = "201606141920vanilla",
+       .help           = "disable\tturn off latent entropy instrumentation\n",
+};
+
+static unsigned HOST_WIDE_INT seed;
+/*
+ * get_random_seed() (this is a GCC function) generates the seed.
+ * This is a simple random generator without any cryptographic security because
+ * the entropy doesn't come from here.
+ */
+static unsigned HOST_WIDE_INT get_random_const(void)
+{
+       unsigned int i;
+       unsigned HOST_WIDE_INT ret = 0;
+
+       for (i = 0; i < 8 * sizeof(ret); i++) {
+               ret = (ret << 1) | (seed & 1);
+               seed >>= 1;
+               if (ret & 1)
+                       seed ^= 0xD800000000000000ULL;
+       }
+
+       return ret;
+}
+
+static tree tree_get_random_const(tree type)
+{
+       unsigned long long mask;
+
+       mask = 1ULL << (TREE_INT_CST_LOW(TYPE_SIZE(type)) - 1);
+       mask = 2 * (mask - 1) + 1;
+
+       if (TYPE_UNSIGNED(type))
+               return build_int_cstu(type, mask & get_random_const());
+       return build_int_cst(type, mask & get_random_const());
+}
+
+static tree handle_latent_entropy_attribute(tree *node, tree name,
+                                               tree args __unused,
+                                               int flags __unused,
+                                               bool *no_add_attrs)
+{
+       tree type;
+#if BUILDING_GCC_VERSION <= 4007
+       VEC(constructor_elt, gc) *vals;
+#else
+       vec<constructor_elt, va_gc> *vals;
+#endif
+
+       switch (TREE_CODE(*node)) {
+       default:
+               *no_add_attrs = true;
+               error("%qE attribute only applies to functions and variables",
+                       name);
+               break;
+
+       case VAR_DECL:
+               if (DECL_INITIAL(*node)) {
+                       *no_add_attrs = true;
+                       error("variable %qD with %qE attribute must not be initialized",
+                               *node, name);
+                       break;
+               }
+
+               if (!TREE_STATIC(*node)) {
+                       *no_add_attrs = true;
+                       error("variable %qD with %qE attribute must not be local",
+                               *node, name);
+                       break;
+               }
+
+               type = TREE_TYPE(*node);
+               switch (TREE_CODE(type)) {
+               default:
+                       *no_add_attrs = true;
+                       error("variable %qD with %qE attribute must be an integer or a fixed length integer array type or a fixed sized structure with integer fields",
+                               *node, name);
+                       break;
+
+               case RECORD_TYPE: {
+                       tree fld, lst = TYPE_FIELDS(type);
+                       unsigned int nelt = 0;
+
+                       for (fld = lst; fld; nelt++, fld = TREE_CHAIN(fld)) {
+                               tree fieldtype;
+
+                               fieldtype = TREE_TYPE(fld);
+                               if (TREE_CODE(fieldtype) == INTEGER_TYPE)
+                                       continue;
+
+                               *no_add_attrs = true;
+                               error("structure variable %qD with %qE attribute has a non-integer field %qE",
+                                       *node, name, fld);
+                               break;
+                       }
+
+                       if (fld)
+                               break;
+
+#if BUILDING_GCC_VERSION <= 4007
+                       vals = VEC_alloc(constructor_elt, gc, nelt);
+#else
+                       vec_alloc(vals, nelt);
+#endif
+
+                       for (fld = lst; fld; fld = TREE_CHAIN(fld)) {
+                               tree random_const, fld_t = TREE_TYPE(fld);
+
+                               random_const = tree_get_random_const(fld_t);
+                               CONSTRUCTOR_APPEND_ELT(vals, fld, random_const);
+                       }
+
+                       /* Initialize the fields with random constants */
+                       DECL_INITIAL(*node) = build_constructor(type, vals);
+                       break;
+               }
+
+               /* Initialize the variable with a random constant */
+               case INTEGER_TYPE:
+                       DECL_INITIAL(*node) = tree_get_random_const(type);
+                       break;
+
+               case ARRAY_TYPE: {
+                       tree elt_type, array_size, elt_size;
+                       unsigned int i, nelt;
+                       HOST_WIDE_INT array_size_int, elt_size_int;
+
+                       elt_type = TREE_TYPE(type);
+                       elt_size = TYPE_SIZE_UNIT(TREE_TYPE(type));
+                       array_size = TYPE_SIZE_UNIT(type);
+
+                       if (TREE_CODE(elt_type) != INTEGER_TYPE || !array_size
+                               || TREE_CODE(array_size) != INTEGER_CST) {
+                               *no_add_attrs = true;
+                               error("array variable %qD with %qE attribute must be a fixed length integer array type",
+                                       *node, name);
+                               break;
+                       }
+
+                       array_size_int = TREE_INT_CST_LOW(array_size);
+                       elt_size_int = TREE_INT_CST_LOW(elt_size);
+                       nelt = array_size_int / elt_size_int;
+
+#if BUILDING_GCC_VERSION <= 4007
+                       vals = VEC_alloc(constructor_elt, gc, nelt);
+#else
+                       vec_alloc(vals, nelt);
+#endif
+
+                       for (i = 0; i < nelt; i++) {
+                               tree cst = size_int(i);
+                               tree rand_cst = tree_get_random_const(elt_type);
+
+                               CONSTRUCTOR_APPEND_ELT(vals, cst, rand_cst);
+                       }
+
+                       /*
+                        * Initialize the elements of the array with random
+                        * constants
+                        */
+                       DECL_INITIAL(*node) = build_constructor(type, vals);
+                       break;
+               }
+               }
+               break;
+
+       case FUNCTION_DECL:
+               break;
+       }
+
+       return NULL_TREE;
+}
+
+static struct attribute_spec latent_entropy_attr = {
+       .name                           = "latent_entropy",
+       .min_length                     = 0,
+       .max_length                     = 0,
+       .decl_required                  = true,
+       .type_required                  = false,
+       .function_type_required         = false,
+       .handler                        = handle_latent_entropy_attribute,
+#if BUILDING_GCC_VERSION >= 4007
+       .affects_type_identity          = false
+#endif
+};
+
+static void register_attributes(void *event_data __unused, void *data __unused)
+{
+       register_attribute(&latent_entropy_attr);
+}
+
+static bool latent_entropy_gate(void)
+{
+       tree list;
+
+       /* don't bother with noreturn functions for now */
+       if (TREE_THIS_VOLATILE(current_function_decl))
+               return false;
+
+       /* gcc-4.5 doesn't discover some trivial noreturn functions */
+       if (EDGE_COUNT(EXIT_BLOCK_PTR_FOR_FN(cfun)->preds) == 0)
+               return false;
+
+       list = DECL_ATTRIBUTES(current_function_decl);
+       return lookup_attribute("latent_entropy", list) != NULL_TREE;
+}
+
+static tree create_var(tree type, const char *name)
+{
+       tree var;
+
+       var = create_tmp_var(type, name);
+       add_referenced_var(var);
+       mark_sym_for_renaming(var);
+       return var;
+}
+
+/*
+ * Set up the next operation and its constant operand to use in the latent
+ * entropy PRNG. When RHS is specified, the request is for perturbing the
+ * local latent entropy variable, otherwise it is for perturbing the global
+ * latent entropy variable where the two operands are already given by the
+ * local and global latent entropy variables themselves.
+ *
+ * The operation is one of add/xor/rol when instrumenting the local entropy
+ * variable and one of add/xor when perturbing the global entropy variable.
+ * Rotation is not used for the latter case because it would transmit less
+ * entropy to the global variable than the other two operations.
+ */
+static enum tree_code get_op(tree *rhs)
+{
+       static enum tree_code op;
+       unsigned HOST_WIDE_INT random_const;
+
+       random_const = get_random_const();
+
+       switch (op) {
+       case BIT_XOR_EXPR:
+               op = PLUS_EXPR;
+               break;
+
+       case PLUS_EXPR:
+               if (rhs) {
+                       op = LROTATE_EXPR;
+                       /*
+                        * This code limits the value of random_const to
+                        * the size of a wide int for the rotation
+                        */
+                       random_const &= HOST_BITS_PER_WIDE_INT - 1;
+                       break;
+               }
+
+       case LROTATE_EXPR:
+       default:
+               op = BIT_XOR_EXPR;
+               break;
+       }
+       if (rhs)
+               *rhs = build_int_cstu(long_unsigned_type_node, random_const);
+       return op;
+}
+
+static gimple create_assign(enum tree_code code, tree lhs, tree op1,
+                               tree op2)
+{
+       return gimple_build_assign_with_ops(code, lhs, op1, op2);
+}
+
+static void perturb_local_entropy(basic_block bb, tree local_entropy)
+{
+       gimple_stmt_iterator gsi;
+       gimple assign;
+       tree rhs;
+       enum tree_code op;
+
+       op = get_op(&rhs);
+       assign = create_assign(op, local_entropy, local_entropy, rhs);
+       gsi = gsi_after_labels(bb);
+       gsi_insert_before(&gsi, assign, GSI_NEW_STMT);
+       update_stmt(assign);
+}
+
+static void __perturb_latent_entropy(gimple_stmt_iterator *gsi,
+                                       tree local_entropy)
+{
+       gimple assign;
+       tree temp;
+       enum tree_code op;
+
+       /* 1. create temporary copy of latent_entropy */
+       temp = create_var(long_unsigned_type_node, "temp_latent_entropy");
+
+       /* 2. read... */
+       add_referenced_var(latent_entropy_decl);
+       mark_sym_for_renaming(latent_entropy_decl);
+       assign = gimple_build_assign(temp, latent_entropy_decl);
+       gsi_insert_before(gsi, assign, GSI_NEW_STMT);
+       update_stmt(assign);
+
+       /* 3. ...modify... */
+       op = get_op(NULL);
+       assign = create_assign(op, temp, temp, local_entropy);
+       gsi_insert_after(gsi, assign, GSI_NEW_STMT);
+       update_stmt(assign);
+
+       /* 4. ...write latent_entropy */
+       assign = gimple_build_assign(latent_entropy_decl, temp);
+       gsi_insert_after(gsi, assign, GSI_NEW_STMT);
+       update_stmt(assign);
+}
+
+static bool handle_tail_calls(basic_block bb, tree local_entropy)
+{
+       gimple_stmt_iterator gsi;
+
+       for (gsi = gsi_start_bb(bb); !gsi_end_p(gsi); gsi_next(&gsi)) {
+               gcall *call;
+               gimple stmt = gsi_stmt(gsi);
+
+               if (!is_gimple_call(stmt))
+                       continue;
+
+               call = as_a_gcall(stmt);
+               if (!gimple_call_tail_p(call))
+                       continue;
+
+               __perturb_latent_entropy(&gsi, local_entropy);
+               return true;
+       }
+
+       return false;
+}
+
+static void perturb_latent_entropy(tree local_entropy)
+{
+       edge_iterator ei;
+       edge e, last_bb_e;
+       basic_block last_bb;
+
+       gcc_assert(single_pred_p(EXIT_BLOCK_PTR_FOR_FN(cfun)));
+       last_bb_e = single_pred_edge(EXIT_BLOCK_PTR_FOR_FN(cfun));
+
+       FOR_EACH_EDGE(e, ei, last_bb_e->src->preds) {
+               if (ENTRY_BLOCK_PTR_FOR_FN(cfun) == e->src)
+                       continue;
+               if (EXIT_BLOCK_PTR_FOR_FN(cfun) == e->src)
+                       continue;
+
+               handle_tail_calls(e->src, local_entropy);
+       }
+
+       last_bb = single_pred(EXIT_BLOCK_PTR_FOR_FN(cfun));
+       if (!handle_tail_calls(last_bb, local_entropy)) {
+               gimple_stmt_iterator gsi = gsi_last_bb(last_bb);
+
+               __perturb_latent_entropy(&gsi, local_entropy);
+       }
+}
+
+static void init_local_entropy(basic_block bb, tree local_entropy)
+{
+       gimple assign, call;
+       tree frame_addr, rand_const, tmp, fndecl, udi_frame_addr;
+       enum tree_code op;
+       unsigned HOST_WIDE_INT rand_cst;
+       gimple_stmt_iterator gsi = gsi_after_labels(bb);
+
+       /* 1. create local_entropy_frameaddr */
+       frame_addr = create_var(ptr_type_node, "local_entropy_frameaddr");
+
+       /* 2. local_entropy_frameaddr = __builtin_frame_address() */
+       fndecl = builtin_decl_implicit(BUILT_IN_FRAME_ADDRESS);
+       call = gimple_build_call(fndecl, 1, integer_zero_node);
+       gimple_call_set_lhs(call, frame_addr);
+       gsi_insert_before(&gsi, call, GSI_NEW_STMT);
+       update_stmt(call);
+
+       udi_frame_addr = fold_convert(long_unsigned_type_node, frame_addr);
+       assign = gimple_build_assign(local_entropy, udi_frame_addr);
+       gsi_insert_after(&gsi, assign, GSI_NEW_STMT);
+       update_stmt(assign);
+
+       /* 3. create temporary copy of latent_entropy */
+       tmp = create_var(long_unsigned_type_node, "temp_latent_entropy");
+
+       /* 4. read the global entropy variable into local entropy */
+       add_referenced_var(latent_entropy_decl);
+       mark_sym_for_renaming(latent_entropy_decl);
+       assign = gimple_build_assign(tmp, latent_entropy_decl);
+       gsi_insert_after(&gsi, assign, GSI_NEW_STMT);
+       update_stmt(assign);
+
+       /* 5. mix local_entropy_frameaddr into local entropy */
+       assign = create_assign(BIT_XOR_EXPR, local_entropy, local_entropy, tmp);
+       gsi_insert_after(&gsi, assign, GSI_NEW_STMT);
+       update_stmt(assign);
+
+       rand_cst = get_random_const();
+       rand_const = build_int_cstu(long_unsigned_type_node, rand_cst);
+       op = get_op(NULL);
+       assign = create_assign(op, local_entropy, local_entropy, rand_const);
+       gsi_insert_after(&gsi, assign, GSI_NEW_STMT);
+       update_stmt(assign);
+}
+
+static bool create_latent_entropy_decl(void)
+{
+       varpool_node_ptr node;
+
+       if (latent_entropy_decl != NULL_TREE)
+               return true;
+
+       FOR_EACH_VARIABLE(node) {
+               tree name, var = NODE_DECL(node);
+
+               if (DECL_NAME_LENGTH(var) < sizeof("latent_entropy") - 1)
+                       continue;
+
+               name = DECL_NAME(var);
+               if (strcmp(IDENTIFIER_POINTER(name), "latent_entropy"))
+                       continue;
+
+               latent_entropy_decl = var;
+               break;
+       }
+
+       return latent_entropy_decl != NULL_TREE;
+}
+
+static unsigned int latent_entropy_execute(void)
+{
+       basic_block bb;
+       tree local_entropy;
+
+       if (!create_latent_entropy_decl())
+               return 0;
+
+       /* prepare for step 2 below */
+       gcc_assert(single_succ_p(ENTRY_BLOCK_PTR_FOR_FN(cfun)));
+       bb = single_succ(ENTRY_BLOCK_PTR_FOR_FN(cfun));
+       if (!single_pred_p(bb)) {
+               split_edge(single_succ_edge(ENTRY_BLOCK_PTR_FOR_FN(cfun)));
+               gcc_assert(single_succ_p(ENTRY_BLOCK_PTR_FOR_FN(cfun)));
+               bb = single_succ(ENTRY_BLOCK_PTR_FOR_FN(cfun));
+       }
+
+       /* 1. create the local entropy variable */
+       local_entropy = create_var(long_unsigned_type_node, "local_entropy");
+
+       /* 2. initialize the local entropy variable */
+       init_local_entropy(bb, local_entropy);
+
+       bb = bb->next_bb;
+
+       /*
+        * 3. instrument each BB with an operation on the
+        *    local entropy variable
+        */
+       while (bb != EXIT_BLOCK_PTR_FOR_FN(cfun)) {
+               perturb_local_entropy(bb, local_entropy);
+               bb = bb->next_bb;
+       };
+
+       /* 4. mix local entropy into the global entropy variable */
+       perturb_latent_entropy(local_entropy);
+       return 0;
+}
+
+static void latent_entropy_start_unit(void *gcc_data __unused,
+                                       void *user_data __unused)
+{
+       tree type, id;
+       int quals;
+
+       seed = get_random_seed(false);
+
+       if (in_lto_p)
+               return;
+
+       /* extern volatile unsigned long latent_entropy */
+       quals = TYPE_QUALS(long_unsigned_type_node) | TYPE_QUAL_VOLATILE;
+       type = build_qualified_type(long_unsigned_type_node, quals);
+       id = get_identifier("latent_entropy");
+       latent_entropy_decl = build_decl(UNKNOWN_LOCATION, VAR_DECL, id, type);
+
+       TREE_STATIC(latent_entropy_decl) = 1;
+       TREE_PUBLIC(latent_entropy_decl) = 1;
+       TREE_USED(latent_entropy_decl) = 1;
+       DECL_PRESERVE_P(latent_entropy_decl) = 1;
+       TREE_THIS_VOLATILE(latent_entropy_decl) = 1;
+       DECL_EXTERNAL(latent_entropy_decl) = 1;
+       DECL_ARTIFICIAL(latent_entropy_decl) = 1;
+       lang_hooks.decls.pushdecl(latent_entropy_decl);
+}
+
+#define PASS_NAME latent_entropy
+#define PROPERTIES_REQUIRED PROP_gimple_leh | PROP_cfg
+#define TODO_FLAGS_FINISH TODO_verify_ssa | TODO_verify_stmts | TODO_dump_func \
+       | TODO_update_ssa
+#include "gcc-generate-gimple-pass.h"
+
+__visible int plugin_init(struct plugin_name_args *plugin_info,
+                         struct plugin_gcc_version *version)
+{
+       bool enabled = true;
+       const char * const plugin_name = plugin_info->base_name;
+       const int argc = plugin_info->argc;
+       const struct plugin_argument * const argv = plugin_info->argv;
+       int i;
+
+       struct register_pass_info latent_entropy_pass_info;
+
+       latent_entropy_pass_info.pass           = make_latent_entropy_pass();
+       latent_entropy_pass_info.reference_pass_name            = "optimized";
+       latent_entropy_pass_info.ref_pass_instance_number       = 1;
+       latent_entropy_pass_info.pos_op         = PASS_POS_INSERT_BEFORE;
+       static const struct ggc_root_tab gt_ggc_r_gt_latent_entropy[] = {
+               {
+                       .base = &latent_entropy_decl,
+                       .nelt = 1,
+                       .stride = sizeof(latent_entropy_decl),
+                       .cb = &gt_ggc_mx_tree_node,
+                       .pchw = &gt_pch_nx_tree_node
+               },
+               LAST_GGC_ROOT_TAB
+       };
+
+       if (!plugin_default_version_check(version, &gcc_version)) {
+               error(G_("incompatible gcc/plugin versions"));
+               return 1;
+       }
+
+       for (i = 0; i < argc; ++i) {
+               if (!(strcmp(argv[i].key, "disable"))) {
+                       enabled = false;
+                       continue;
+               }
+               error(G_("unkown option '-fplugin-arg-%s-%s'"), plugin_name, argv[i].key);
+       }
+
+       register_callback(plugin_name, PLUGIN_INFO, NULL,
+                               &latent_entropy_plugin_info);
+       if (enabled) {
+               register_callback(plugin_name, PLUGIN_START_UNIT,
+                                       &latent_entropy_start_unit, NULL);
+               register_callback(plugin_name, PLUGIN_REGISTER_GGC_ROOTS,
+                                 NULL, (void *)&gt_ggc_r_gt_latent_entropy);
+               register_callback(plugin_name, PLUGIN_PASS_MANAGER_SETUP, NULL,
+                                       &latent_entropy_pass_info);
+       }
+       register_callback(plugin_name, PLUGIN_ATTRIBUTES, register_attributes,
+                               NULL);
+
+       return 0;
+}
index aedd6113cb731bcbbec89620e1e4fb4512cef4b2..7ea0b3f50739e319a8ec57de40654d241e5a78f7 100644 (file)
@@ -21,7 +21,7 @@
 
 #include "gcc-common.h"
 
-int plugin_is_GPL_compatible;
+__visible int plugin_is_GPL_compatible;
 
 tree sancov_fndecl;
 
@@ -86,7 +86,7 @@ static void sancov_start_unit(void __unused *gcc_data, void __unused *user_data)
 #endif
 }
 
-int plugin_init(struct plugin_name_args *plugin_info, struct plugin_gcc_version *version)
+__visible int plugin_init(struct plugin_name_args *plugin_info, struct plugin_gcc_version *version)
 {
        int i;
        struct register_pass_info sancov_plugin_pass_info;
index 973e8c1415677eeba513543655e939de234521e3..17867e723a51a667fdec65194b9033346ff1b786 100755 (executable)
@@ -1,6 +1,6 @@
 #!/bin/sh
 
-echo "int foo(void) { char X[200]; return 3; }" | $* -S -x c -c -O0 -mcmodel=kernel -fstack-protector - -o - 2> /dev/null | grep -q "%gs"
+echo "int foo(void) { char X[200]; return 3; }" | $* -S -x c -c -O0 -mcmodel=kernel -fno-PIE -fstack-protector - -o - 2> /dev/null | grep -q "%gs"
 if [ "$?" -eq "0" ] ; then
        echo y
 else
index 17fa901418ae6a491ba61f3814a1143e2a62bfde..0055b07b03b68cafd0bc3c400ee23b686632efbf 100755 (executable)
@@ -97,7 +97,10 @@ print_mtime() {
 }
 
 list_parse() {
-       [ ! -L "$1" ] && echo "$1 \\" || :
+       if [ -L "$1" ]; then
+               return
+       fi
+       echo "$1" | sed 's/:/\\:/g; s/$/ \\/'
 }
 
 # for each file print a line in following format
index e583565f2011dae4aedc8ae2e0c83fa286687058..5235aa507ba533cbd631f2ba81f702d97b50925c 100644 (file)
@@ -289,6 +289,23 @@ repeat:
        }
       break;
 
+    case ST_TYPEOF_1:
+      if (token == IDENT)
+       {
+         if (is_reserved_word(yytext, yyleng)
+             || find_symbol(yytext, SYM_TYPEDEF, 1))
+           {
+             yyless(0);
+             unput('(');
+             lexstate = ST_NORMAL;
+             token = TYPEOF_KEYW;
+             break;
+           }
+         _APP("(", 1);
+       }
+       lexstate = ST_TYPEOF;
+       /* FALLTHRU */
+
     case ST_TYPEOF:
       switch (token)
        {
@@ -313,24 +330,6 @@ repeat:
        }
       break;
 
-    case ST_TYPEOF_1:
-      if (token == IDENT)
-       {
-         if (is_reserved_word(yytext, yyleng)
-             || find_symbol(yytext, SYM_TYPEDEF, 1))
-           {
-             yyless(0);
-             unput('(');
-             lexstate = ST_NORMAL;
-             token = TYPEOF_KEYW;
-             break;
-           }
-         _APP("(", 1);
-       }
-       APP;
-       lexstate = ST_TYPEOF;
-       goto repeat;
-
     case ST_BRACKET:
       APP;
       switch (token)
index f82740a69b850ce7f496700a42ef8c75f53e2f80..985c5541aae411add700c6e1158b9ebc9dca046f 100644 (file)
@@ -2098,6 +2098,23 @@ repeat:
        }
       break;
 
+    case ST_TYPEOF_1:
+      if (token == IDENT)
+       {
+         if (is_reserved_word(yytext, yyleng)
+             || find_symbol(yytext, SYM_TYPEDEF, 1))
+           {
+             yyless(0);
+             unput('(');
+             lexstate = ST_NORMAL;
+             token = TYPEOF_KEYW;
+             break;
+           }
+         _APP("(", 1);
+       }
+       lexstate = ST_TYPEOF;
+       /* FALLTHRU */
+
     case ST_TYPEOF:
       switch (token)
        {
@@ -2122,24 +2139,6 @@ repeat:
        }
       break;
 
-    case ST_TYPEOF_1:
-      if (token == IDENT)
-       {
-         if (is_reserved_word(yytext, yyleng)
-             || find_symbol(yytext, SYM_TYPEDEF, 1))
-           {
-             yyless(0);
-             unput('(');
-             lexstate = ST_NORMAL;
-             token = TYPEOF_KEYW;
-             break;
-           }
-         _APP("(", 1);
-       }
-       APP;
-       lexstate = ST_TYPEOF;
-       goto repeat;
-
     case ST_BRACKET:
       APP;
       switch (token)
index 4f727eb5ec43f294e0a508244bab4249c9001575..f742c65108b9d41f0c4d0bbed22924fc35383b37 100755 (executable)
@@ -37,12 +37,40 @@ info()
        fi
 }
 
+# Thin archive build here makes a final archive with
+# symbol table and indexes from vmlinux objects, which can be
+# used as input to linker.
+#
+# Traditional incremental style of link does not require this step
+#
+# built-in.o output file
+#
+archive_builtin()
+{
+       if [ -n "${CONFIG_THIN_ARCHIVES}" ]; then
+               info AR built-in.o
+               rm -f built-in.o;
+               ${AR} rcsT${KBUILD_ARFLAGS} built-in.o                  \
+                                       ${KBUILD_VMLINUX_INIT}          \
+                                       ${KBUILD_VMLINUX_MAIN}
+       fi
+}
+
 # Link of vmlinux.o used for section mismatch analysis
 # ${1} output file
 modpost_link()
 {
-       ${LD} ${LDFLAGS} -r -o ${1} ${KBUILD_VMLINUX_INIT}                   \
-               --start-group ${KBUILD_VMLINUX_MAIN} --end-group
+       local objects
+
+       if [ -n "${CONFIG_THIN_ARCHIVES}" ]; then
+               objects="--whole-archive built-in.o"
+       else
+               objects="${KBUILD_VMLINUX_INIT}                         \
+                       --start-group                                   \
+                       ${KBUILD_VMLINUX_MAIN}                          \
+                       --end-group"
+       fi
+       ${LD} ${LDFLAGS} -r -o ${1} ${objects}
 }
 
 # Link of vmlinux
@@ -51,18 +79,36 @@ modpost_link()
 vmlinux_link()
 {
        local lds="${objtree}/${KBUILD_LDS}"
+       local objects
 
        if [ "${SRCARCH}" != "um" ]; then
-               ${LD} ${LDFLAGS} ${LDFLAGS_vmlinux} -o ${2}                  \
-                       -T ${lds} ${KBUILD_VMLINUX_INIT}                     \
-                       --start-group ${KBUILD_VMLINUX_MAIN} --end-group ${1}
+               if [ -n "${CONFIG_THIN_ARCHIVES}" ]; then
+                       objects="--whole-archive built-in.o ${1}"
+               else
+                       objects="${KBUILD_VMLINUX_INIT}                 \
+                               --start-group                           \
+                               ${KBUILD_VMLINUX_MAIN}                  \
+                               --end-group                             \
+                               ${1}"
+               fi
+
+               ${LD} ${LDFLAGS} ${LDFLAGS_vmlinux} -o ${2}             \
+                       -T ${lds} ${objects}
        else
-               ${CC} ${CFLAGS_vmlinux} -o ${2}                              \
-                       -Wl,-T,${lds} ${KBUILD_VMLINUX_INIT}                 \
-                       -Wl,--start-group                                    \
-                                ${KBUILD_VMLINUX_MAIN}                      \
-                       -Wl,--end-group                                      \
-                       -lutil -lrt -lpthread ${1}
+               if [ -n "${CONFIG_THIN_ARCHIVES}" ]; then
+                       objects="-Wl,--whole-archive built-in.o ${1}"
+               else
+                       objects="${KBUILD_VMLINUX_INIT}                 \
+                               -Wl,--start-group                       \
+                               ${KBUILD_VMLINUX_MAIN}                  \
+                               -Wl,--end-group                         \
+                               ${1}"
+               fi
+
+               ${CC} ${CFLAGS_vmlinux} -o ${2}                         \
+                       -Wl,-T,${lds}                                   \
+                       ${objects}                                      \
+                       -lutil -lrt -lpthread
                rm -f linux
        fi
 }
@@ -119,6 +165,7 @@ cleanup()
        rm -f .tmp_kallsyms*
        rm -f .tmp_version
        rm -f .tmp_vmlinux*
+       rm -f built-in.o
        rm -f System.map
        rm -f vmlinux
        rm -f vmlinux.o
@@ -162,6 +209,8 @@ case "${KCONFIG_CONFIG}" in
        . "./${KCONFIG_CONFIG}"
 esac
 
+archive_builtin
+
 #link vmlinux.o
 info LD vmlinux.o
 modpost_link vmlinux.o
index fc3036b34e5128cc73910eb6e3b2bbd2d94d9e50..a4d90aa1045afc46499e00da9baf9bbcea34752a 100644 (file)
@@ -621,8 +621,8 @@ int aa_change_hat(const char *hats[], int count, u64 token, bool permtest)
        /* released below */
        cred = get_current_cred();
        cxt = cred_cxt(cred);
-       profile = aa_cred_profile(cred);
-       previous_profile = cxt->previous;
+       profile = aa_get_newest_profile(aa_cred_profile(cred));
+       previous_profile = aa_get_newest_profile(cxt->previous);
 
        if (unconfined(profile)) {
                info = "unconfined";
@@ -718,6 +718,8 @@ audit:
 out:
        aa_put_profile(hat);
        kfree(name);
+       aa_put_profile(profile);
+       aa_put_profile(previous_profile);
        put_cred(cred);
 
        return error;
index f826e87390233c1cfddb87e969642ab44219d84f..d942c7c2bc0aa0ee471de663d3f15587f12a1732 100644 (file)
@@ -41,7 +41,7 @@ config BIG_KEYS
        bool "Large payload keys"
        depends on KEYS
        depends on TMPFS
-       select CRYPTO
+       depends on (CRYPTO_ANSI_CPRNG = y || CRYPTO_DRBG = y)
        select CRYPTO_AES
        select CRYPTO_ECB
        select CRYPTO_RNG
index c0b3030b563486af0f1756d6d73ae32fe02a77c8..835c1ab30d01eb9a8e94b411fce09b856772efb9 100644 (file)
@@ -9,6 +9,7 @@
  * 2 of the Licence, or (at your option) any later version.
  */
 
+#define pr_fmt(fmt) "big_key: "fmt
 #include <linux/init.h>
 #include <linux/seq_file.h>
 #include <linux/file.h>
@@ -341,44 +342,48 @@ error:
  */
 static int __init big_key_init(void)
 {
-       return register_key_type(&key_type_big_key);
-}
-
-/*
- * Initialize big_key crypto and RNG algorithms
- */
-static int __init big_key_crypto_init(void)
-{
-       int ret = -EINVAL;
+       struct crypto_skcipher *cipher;
+       struct crypto_rng *rng;
+       int ret;
 
-       /* init RNG */
-       big_key_rng = crypto_alloc_rng(big_key_rng_name, 0, 0);
-       if (IS_ERR(big_key_rng)) {
-               big_key_rng = NULL;
-               return -EFAULT;
+       rng = crypto_alloc_rng(big_key_rng_name, 0, 0);
+       if (IS_ERR(rng)) {
+               pr_err("Can't alloc rng: %ld\n", PTR_ERR(rng));
+               return PTR_ERR(rng);
        }
 
+       big_key_rng = rng;
+
        /* seed RNG */
-       ret = crypto_rng_reset(big_key_rng, NULL, crypto_rng_seedsize(big_key_rng));
-       if (ret)
-               goto error;
+       ret = crypto_rng_reset(rng, NULL, crypto_rng_seedsize(rng));
+       if (ret) {
+               pr_err("Can't reset rng: %d\n", ret);
+               goto error_rng;
+       }
 
        /* init block cipher */
-       big_key_skcipher = crypto_alloc_skcipher(big_key_alg_name,
-                                                0, CRYPTO_ALG_ASYNC);
-       if (IS_ERR(big_key_skcipher)) {
-               big_key_skcipher = NULL;
-               ret = -EFAULT;
-               goto error;
+       cipher = crypto_alloc_skcipher(big_key_alg_name, 0, CRYPTO_ALG_ASYNC);
+       if (IS_ERR(cipher)) {
+               ret = PTR_ERR(cipher);
+               pr_err("Can't alloc crypto: %d\n", ret);
+               goto error_rng;
+       }
+
+       big_key_skcipher = cipher;
+
+       ret = register_key_type(&key_type_big_key);
+       if (ret < 0) {
+               pr_err("Can't register type: %d\n", ret);
+               goto error_cipher;
        }
 
        return 0;
 
-error:
+error_cipher:
+       crypto_free_skcipher(big_key_skcipher);
+error_rng:
        crypto_free_rng(big_key_rng);
-       big_key_rng = NULL;
        return ret;
 }
 
-device_initcall(big_key_init);
-late_initcall(big_key_crypto_init);
+late_initcall(big_key_init);
index f0611a6368cd2572188f9a066291b9c8d717f95d..b9f531c9e4fa753d326752b63dc2cf599579ffeb 100644 (file)
@@ -181,7 +181,7 @@ static int proc_keys_show(struct seq_file *m, void *v)
        struct timespec now;
        unsigned long timo;
        key_ref_t key_ref, skey_ref;
-       char xbuf[12];
+       char xbuf[16];
        int rc;
 
        struct keyring_search_context ctx = {
index 085057936287bdaa559e7c4be9e593e17ec1dfa9..09fd6108e42134871953f2cb46f9410808c1f702 100644 (file)
@@ -3557,7 +3557,7 @@ static int selinux_file_mprotect(struct vm_area_struct *vma,
                } else if (!vma->vm_file &&
                           ((vma->vm_start <= vma->vm_mm->start_stack &&
                             vma->vm_end >= vma->vm_mm->start_stack) ||
-                           vma_is_stack_for_task(vma, current))) {
+                           vma_is_stack_for_current(vma))) {
                        rc = current_has_perm(current, PROCESS__EXECSTACK);
                } else if (vma->vm_file && vma->anon_vma) {
                        /*
index ade7c6cad172a13833a3b41799a142ebf4cb4f46..682b73af77661a4c6260b9f450e015d86c453ede 100644 (file)
@@ -881,7 +881,7 @@ bool tomoyo_dump_page(struct linux_binprm *bprm, unsigned long pos,
         * the execve().
         */
        if (get_user_pages_remote(current, bprm->mm, pos, 1,
-                               0, 1, &page, NULL) <= 0)
+                               FOLL_FORCE, &page, NULL) <= 0)
                return false;
 #else
        page = bprm->page[pos / PAGE_SIZE];
index 895362a696c95b3737bf82123cba9b00146bb6a6..8ab72e0f593292ac91aea0b06b70d89fc084bd3c 100644 (file)
@@ -325,10 +325,15 @@ static ssize_t snd_info_text_entry_write(struct file *file,
        size_t next;
        int err = 0;
 
+       if (!entry->c.text.write)
+               return -EIO;
        pos = *offset;
        if (!valid_pos(pos, count))
                return -EIO;
        next = pos + count;
+       /* don't handle too large text inputs */
+       if (next > 16 * 1024)
+               return -EIO;
        mutex_lock(&entry->access);
        buf = data->wbuffer;
        if (!buf) {
@@ -366,7 +371,9 @@ static int snd_info_seq_show(struct seq_file *seq, void *p)
        struct snd_info_private_data *data = seq->private;
        struct snd_info_entry *entry = data->entry;
 
-       if (entry->c.text.read) {
+       if (!entry->c.text.read) {
+               return -EIO;
+       } else {
                data->rbuffer->buffer = (char *)seq; /* XXX hack! */
                entry->c.text.read(entry, data->rbuffer);
        }
index fce5697e42614a36056079b2a1aca289aec2d850..8c35072166769b9ea775a91c0a36b5c6ceacf866 100644 (file)
@@ -58,7 +58,7 @@ static int snd_seq_call_port_info_ioctl(struct snd_seq_client *client, unsigned
                goto error;
        data->kernel = NULL;
 
-       err = snd_seq_kernel_client_ctl(client->number, cmd, &data);
+       err = snd_seq_kernel_client_ctl(client->number, cmd, data);
        if (err < 0)
                goto error;
 
index dcc102813aefa4d1d6e30e24f8644c8d92b12a9f..37d9cfbc29f9c829facd29ffdc2224ada7a63efd 100644 (file)
@@ -448,8 +448,8 @@ snd_seq_real_time_t snd_seq_timer_get_cur_time(struct snd_seq_timer *tmr)
 
                ktime_get_ts64(&tm);
                tm = timespec64_sub(tm, tmr->last_update);
-               cur_time.tv_nsec = tm.tv_nsec;
-               cur_time.tv_sec = tm.tv_sec;
+               cur_time.tv_nsec += tm.tv_nsec;
+               cur_time.tv_sec += tm.tv_sec;
                snd_seq_sanity_real_time(&cur_time);
        }
        spin_unlock_irqrestore(&tmr->lock, flags);
index d17937b92331e4c1160d1cebb1ed77398a684a01..7e3aa50b21f9d2d2f5ca49f3f9a779ab1276ee4a 100644 (file)
@@ -111,7 +111,7 @@ long asihpi_hpi_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
                return -EINVAL;
 
        hm = kmalloc(sizeof(*hm), GFP_KERNEL);
-       hr = kmalloc(sizeof(*hr), GFP_KERNEL);
+       hr = kzalloc(sizeof(*hr), GFP_KERNEL);
        if (!hm || !hr) {
                err = -ENOMEM;
                goto out;
index 9c22f95838efbd31d23448cc26321918c03bd5e1..19d41da79f93cc95500859c9e1690a2d89935d23 100644 (file)
@@ -49,7 +49,7 @@ static void alc_fixup_dell_wmi(struct hda_codec *codec,
                removefunc = true;
                if (dell_led_set_func(DELL_LED_MICMUTE, false) >= 0) {
                        dell_led_value = 0;
-                       if (spec->gen.num_adc_nids > 1)
+                       if (spec->gen.num_adc_nids > 1 && !spec->gen.dyn_adc_switch)
                                codec_dbg(codec, "Skipping micmute LED control due to several ADCs");
                        else {
                                dell_old_cap_hook = spec->gen.cap_sync_hook;
index c3469f756ec258cccba7f1a339a4335d318bcc23..c64d986009a9ecf5233464d269a440a0edb6cf23 100644 (file)
@@ -341,8 +341,7 @@ enum {
 
 /* quirks for Nvidia */
 #define AZX_DCAPS_PRESET_NVIDIA \
-       (AZX_DCAPS_NO_MSI | /*AZX_DCAPS_ALIGN_BUFSIZE |*/ \
-        AZX_DCAPS_NO_64BIT | AZX_DCAPS_CORBRP_SELF_CLEAR |\
+       (AZX_DCAPS_NO_MSI | AZX_DCAPS_CORBRP_SELF_CLEAR |\
         AZX_DCAPS_SNOOP_TYPE(NVIDIA))
 
 #define AZX_DCAPS_PRESET_CTHDA \
@@ -1716,6 +1715,10 @@ static int azx_first_init(struct azx *chip)
                }
        }
 
+       /* NVidia hardware normally only supports up to 40 bits of DMA */
+       if (chip->pci->vendor == PCI_VENDOR_ID_NVIDIA)
+               dma_bits = 40;
+
        /* disable 64bit DMA address on some devices */
        if (chip->driver_caps & AZX_DCAPS_NO_64BIT) {
                dev_dbg(card->dev, "Disabling 64bit DMA\n");
index b58e8c76346ac9e87bd208b88ae8882546fa78c1..ea81c08ddc7acf77dc7c4144a5b093b0d72ef867 100644 (file)
@@ -5811,8 +5811,6 @@ static const struct hda_model_fixup alc269_fixup_models[] = {
 #define ALC295_STANDARD_PINS \
        {0x12, 0xb7a60130}, \
        {0x14, 0x90170110}, \
-       {0x17, 0x21014020}, \
-       {0x18, 0x21a19030}, \
        {0x21, 0x04211020}
 
 #define ALC298_STANDARD_PINS \
@@ -5858,10 +5856,18 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
                {0x14, 0x90170110},
                {0x1b, 0x02011020},
                {0x21, 0x0221101f}),
+       SND_HDA_PIN_QUIRK(0x10ec0255, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
+               {0x14, 0x90170110},
+               {0x1b, 0x01011020},
+               {0x21, 0x0221101f}),
        SND_HDA_PIN_QUIRK(0x10ec0255, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
                {0x14, 0x90170130},
                {0x1b, 0x01014020},
                {0x21, 0x0221103f}),
+       SND_HDA_PIN_QUIRK(0x10ec0255, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
+               {0x14, 0x90170130},
+               {0x1b, 0x01011020},
+               {0x21, 0x0221103f}),
        SND_HDA_PIN_QUIRK(0x10ec0255, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
                {0x14, 0x90170130},
                {0x1b, 0x02011020},
@@ -6039,7 +6045,13 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
                ALC292_STANDARD_PINS,
                {0x13, 0x90a60140}),
        SND_HDA_PIN_QUIRK(0x10ec0295, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE,
-               ALC295_STANDARD_PINS),
+               ALC295_STANDARD_PINS,
+               {0x17, 0x21014020},
+               {0x18, 0x21a19030}),
+       SND_HDA_PIN_QUIRK(0x10ec0295, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE,
+               ALC295_STANDARD_PINS,
+               {0x17, 0x21014040},
+               {0x18, 0x21a19050}),
        SND_HDA_PIN_QUIRK(0x10ec0298, 0x1028, "Dell", ALC298_FIXUP_DELL1_MIC_NO_PRESENCE,
                ALC298_STANDARD_PINS,
                {0x17, 0x90170110}),
@@ -6613,6 +6625,7 @@ enum {
        ALC891_FIXUP_HEADSET_MODE,
        ALC891_FIXUP_DELL_MIC_NO_PRESENCE,
        ALC662_FIXUP_ACER_VERITON,
+       ALC892_FIXUP_ASROCK_MOBO,
 };
 
 static const struct hda_fixup alc662_fixups[] = {
@@ -6889,6 +6902,14 @@ static const struct hda_fixup alc662_fixups[] = {
                        { }
                }
        },
+       [ALC892_FIXUP_ASROCK_MOBO] = {
+               .type = HDA_FIXUP_PINS,
+               .v.pins = (const struct hda_pintbl[]) {
+                       { 0x15, 0x40f000f0 }, /* disabled */
+                       { 0x16, 0x40f000f0 }, /* disabled */
+                       { }
+               }
+       },
 };
 
 static const struct snd_pci_quirk alc662_fixup_tbl[] = {
@@ -6926,6 +6947,7 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = {
        SND_PCI_QUIRK(0x144d, 0xc051, "Samsung R720", ALC662_FIXUP_IDEAPAD),
        SND_PCI_QUIRK(0x17aa, 0x38af, "Lenovo Ideapad Y550P", ALC662_FIXUP_IDEAPAD),
        SND_PCI_QUIRK(0x17aa, 0x3a0d, "Lenovo Ideapad Y550", ALC662_FIXUP_IDEAPAD),
+       SND_PCI_QUIRK(0x1849, 0x5892, "ASRock B150M", ALC892_FIXUP_ASROCK_MOBO),
        SND_PCI_QUIRK(0x19da, 0xa130, "Zotac Z68", ALC662_FIXUP_ZOTAC_Z68),
        SND_PCI_QUIRK(0x1b0a, 0x01b8, "ACER Veriton", ALC662_FIXUP_ACER_VERITON),
        SND_PCI_QUIRK(0x1b35, 0x2206, "CZC P10T", ALC662_FIXUP_CZC_P10T),
index f0955fd7a2e73157a661ad629a8099d02abfd1bf..4d9d320a79711f5e95deac5f32c2ac0f741ff21d 100644 (file)
@@ -13,7 +13,8 @@ static void (*old_vmaster_hook)(void *, int);
 static bool is_thinkpad(struct hda_codec *codec)
 {
        return (codec->core.subsystem_id >> 16 == 0x17aa) &&
-              (acpi_dev_found("LEN0068") || acpi_dev_found("IBM0068"));
+              (acpi_dev_found("LEN0068") || acpi_dev_found("LEN0268") ||
+               acpi_dev_found("IBM0068"));
 }
 
 static void update_tpacpi_mute_led(void *private_data, int enabled)
@@ -62,7 +63,7 @@ static void hda_fixup_thinkpad_acpi(struct hda_codec *codec,
                        removefunc = false;
                }
                if (led_set_func(TPACPI_LED_MICMUTE, false) >= 0) {
-                       if (spec->num_adc_nids > 1)
+                       if (spec->num_adc_nids > 1 && !spec->dyn_adc_switch)
                                codec_dbg(codec,
                                          "Skipping micmute LED control due to several ADCs");
                        else {
index 18baea2f7d654528cf52617bfb0c58fc2e3b8efe..84f86745c30e93cd746935113f3c0aa08fd4021e 100644 (file)
@@ -148,11 +148,11 @@ SND_SOC_DAPM_OUTPUT("AOUTR"),
 };
 
 static const struct snd_soc_dapm_route cs4270_dapm_routes[] = {
-       { "Capture", NULL, "AINA" },
-       { "Capture", NULL, "AINB" },
+       { "Capture", NULL, "AINL" },
+       { "Capture", NULL, "AINR" },
 
-       { "AOUTA", NULL, "Playback" },
-       { "AOUTB", NULL, "Playback" },
+       { "AOUTL", NULL, "Playback" },
+       { "AOUTR", NULL, "Playback" },
 };
 
 /**
index 1152aa5e7c394208d6e42f04a2c4d44c8b0ea906..cf37936bfe3aaaf6b29cb1f78c9c54f80a2b40b7 100644 (file)
@@ -880,7 +880,8 @@ static const struct snd_soc_dapm_widget da7219_dapm_widgets[] = {
                            SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMD),
 
        /* DAI */
-       SND_SOC_DAPM_AIF_OUT("DAIOUT", "Capture", 0, SND_SOC_NOPM, 0, 0),
+       SND_SOC_DAPM_AIF_OUT("DAIOUT", "Capture", 0, DA7219_DAI_TDM_CTRL,
+                            DA7219_DAI_OE_SHIFT, DA7219_NO_INVERT),
        SND_SOC_DAPM_AIF_IN("DAIIN", "Playback", 0, SND_SOC_NOPM, 0, 0),
 
        /* Output Muxes */
index b904492d774473a99d5e1aa451f18ff276f83732..90b5948e0ff363a91538a28513e35e32e35a0ee1 100644 (file)
@@ -364,7 +364,12 @@ static int hdmi_of_xlate_dai_name(struct snd_soc_component *component,
                                  struct of_phandle_args *args,
                                  const char **dai_name)
 {
-       int id = args->args[0];
+       int id;
+
+       if (args->args_count)
+               id = args->args[0];
+       else
+               id = 0;
 
        if (id < ARRAY_SIZE(hdmi_dai_name)) {
                *dai_name = hdmi_dai_name[id];
index 55558643166fda708ccb781daffcc9ed40806bc2..2db8179047ae89b4d2b297e5588bd8e77b3ab235 100644 (file)
@@ -249,6 +249,11 @@ static int rt298_jack_detect(struct rt298_priv *rt298, bool *hp, bool *mic)
                        snd_soc_dapm_force_enable_pin(dapm, "LDO1");
                        snd_soc_dapm_sync(dapm);
 
+                       regmap_update_bits(rt298->regmap,
+                               RT298_POWER_CTRL1, 0x1001, 0);
+                       regmap_update_bits(rt298->regmap,
+                               RT298_POWER_CTRL2, 0x4, 0x4);
+
                        regmap_write(rt298->regmap, RT298_SET_MIC1, 0x24);
                        msleep(50);
 
index 01a18d88f1eb19f4311319ce0a47771da34df131..00ff2788879e2a8982b3d0e79fa18ddb86fc188a 100644 (file)
@@ -1547,11 +1547,11 @@ static int rt5663_jack_detect(struct snd_soc_codec *codec, int jack_insert)
                        msleep(sleep_time[i]);
                        val = snd_soc_read(codec, RT5663_EM_JACK_TYPE_2) &
                                0x0003;
+                       dev_dbg(codec->dev, "%s: MX-00e7 val=%x sleep %d\n",
+                               __func__, val, sleep_time[i]);
                        i++;
                        if (val == 0x1 || val == 0x2 || val == 0x3)
                                break;
-                       dev_dbg(codec->dev, "%s: MX-00e7 val=%x sleep %d\n",
-                               __func__, val, sleep_time[i]);
                }
                dev_dbg(codec->dev, "%s val = %d\n", __func__, val);
                switch (val) {
index 7b31ee9b82bc87beb493427049fc82d75e1127e8..d6e00c77edcd7360b8df3d9942e5c4213ba530b6 100644 (file)
@@ -424,7 +424,7 @@ static const struct snd_soc_dai_ops stih407_dac_ops = {
 static const struct regmap_config stih407_sas_regmap = {
        .reg_bits = 32,
        .val_bits = 32,
-
+       .fast_io = true,
        .max_register = STIH407_AUDIO_DAC_CTRL,
        .reg_defaults = stih407_sas_reg_defaults,
        .num_reg_defaults = ARRAY_SIZE(stih407_sas_reg_defaults),
index df5e5cb33baaba035b1d9db577cbf772855ac3e9..810369f687d7166755a6b639fbfb97ed3b9672e7 100644 (file)
@@ -341,20 +341,9 @@ static int tas571x_set_bias_level(struct snd_soc_codec *codec,
                                        return ret;
                                }
                        }
-
-                       gpiod_set_value(priv->pdn_gpio, 0);
-                       usleep_range(5000, 6000);
-
-                       regcache_cache_only(priv->regmap, false);
-                       ret = regcache_sync(priv->regmap);
-                       if (ret)
-                               return ret;
                }
                break;
        case SND_SOC_BIAS_OFF:
-               regcache_cache_only(priv->regmap, true);
-               gpiod_set_value(priv->pdn_gpio, 1);
-
                if (!IS_ERR(priv->mclk))
                        clk_disable_unprepare(priv->mclk);
                break;
@@ -401,16 +390,6 @@ static const struct snd_kcontrol_new tas5711_controls[] = {
                   TAS571X_SOFT_MUTE_REG,
                   TAS571X_SOFT_MUTE_CH1_SHIFT, TAS571X_SOFT_MUTE_CH2_SHIFT,
                   1, 1),
-
-       SOC_DOUBLE_R_RANGE("CH1 Mixer Volume",
-                          TAS5717_CH1_LEFT_CH_MIX_REG,
-                          TAS5717_CH1_RIGHT_CH_MIX_REG,
-                          16, 0, 0x80, 0),
-
-       SOC_DOUBLE_R_RANGE("CH2 Mixer Volume",
-                          TAS5717_CH2_LEFT_CH_MIX_REG,
-                          TAS5717_CH2_RIGHT_CH_MIX_REG,
-                          16, 0, 0x80, 0),
 };
 
 static const struct regmap_range tas571x_readonly_regs_range[] = {
@@ -488,6 +467,16 @@ static const struct snd_kcontrol_new tas5717_controls[] = {
                   TAS571X_SOFT_MUTE_CH1_SHIFT, TAS571X_SOFT_MUTE_CH2_SHIFT,
                   1, 1),
 
+       SOC_DOUBLE_R_RANGE("CH1 Mixer Volume",
+                          TAS5717_CH1_LEFT_CH_MIX_REG,
+                          TAS5717_CH1_RIGHT_CH_MIX_REG,
+                          16, 0, 0x80, 0),
+
+       SOC_DOUBLE_R_RANGE("CH2 Mixer Volume",
+                          TAS5717_CH2_LEFT_CH_MIX_REG,
+                          TAS5717_CH2_RIGHT_CH_MIX_REG,
+                          16, 0, 0x80, 0),
+
        /*
         * The biquads are named according to the register names.
         * Please note that TI's TAS57xx Graphical Development Environment
@@ -747,13 +736,14 @@ static int tas571x_i2c_probe(struct i2c_client *client,
                /* pulse the active low reset line for ~100us */
                usleep_range(100, 200);
                gpiod_set_value(priv->reset_gpio, 0);
-               usleep_range(12000, 20000);
+               usleep_range(13500, 20000);
        }
 
        ret = regmap_write(priv->regmap, TAS571X_OSC_TRIM_REG, 0);
        if (ret)
                return ret;
 
+       usleep_range(50000, 60000);
 
        memcpy(&priv->codec_driver, &tas571x_codec, sizeof(priv->codec_driver));
        priv->codec_driver.component_driver.controls = priv->chip->controls;
@@ -770,9 +760,6 @@ static int tas571x_i2c_probe(struct i2c_client *client,
                        return ret;
        }
 
-       regcache_cache_only(priv->regmap, true);
-       gpiod_set_value(priv->pdn_gpio, 1);
-
        return snd_soc_register_codec(&client->dev, &priv->codec_driver,
                                      &tas571x_dai, 1);
 }
index 26eb5a0a55754c43f94afe8a3403982b408d3e2b..fd5d1e0910382e94233702094658b6b868c71b83 100644 (file)
@@ -47,6 +47,7 @@ config SND_SOC_INTEL_SST_MATCH
 
 config SND_SOC_INTEL_HASWELL
        tristate
+       select SND_SOC_INTEL_SST_FIRMWARE
 
 config SND_SOC_INTEL_BAYTRAIL
        tristate
@@ -56,7 +57,6 @@ config SND_SOC_INTEL_HASWELL_MACH
        depends on X86_INTEL_LPSS && I2C && I2C_DESIGNWARE_PLATFORM
        depends on DW_DMAC_CORE
        select SND_SOC_INTEL_SST
-       select SND_SOC_INTEL_SST_FIRMWARE
        select SND_SOC_INTEL_HASWELL
        select SND_SOC_RT5640
        help
@@ -138,7 +138,6 @@ config SND_SOC_INTEL_BROADWELL_MACH
                   I2C_DESIGNWARE_PLATFORM
        depends on DW_DMAC_CORE
        select SND_SOC_INTEL_SST
-       select SND_SOC_INTEL_SST_FIRMWARE
        select SND_SOC_INTEL_HASWELL
        select SND_SOC_RT286
        help
index ba5c0d71720ab8548fddbb42cd79dd90c869abab..0a88537ca58a19c9c47df036c6cd95e7d2701837 100644 (file)
@@ -416,6 +416,7 @@ static const struct dmi_system_id cht_table[] = {
                        DMI_MATCH(DMI_PRODUCT_NAME, "Surface 3"),
                },
        },
+       { }
 };
 
 
index 6532b8f0ab2fc475f09116d24dcf316170ce5618..865a21e557cce330508ceee677c16f82359087ee 100644 (file)
@@ -130,8 +130,8 @@ static int broxton_da7219_codec_init(struct snd_soc_pcm_runtime *rtd)
         */
        ret = snd_soc_card_jack_new(rtd->card, "Headset Jack",
                        SND_JACK_HEADSET | SND_JACK_BTN_0 | SND_JACK_BTN_1 |
-                       SND_JACK_BTN_2 | SND_JACK_BTN_3, &broxton_headset,
-                       NULL, 0);
+                       SND_JACK_BTN_2 | SND_JACK_BTN_3 | SND_JACK_LINEOUT,
+                       &broxton_headset, NULL, 0);
        if (ret) {
                dev_err(rtd->dev, "Headset Jack creation failed: %d\n", ret);
                return ret;
index 2989c164dafe3a719ed219d2f47ddf73a20ea64d..06fa5e85dd0e0177f6e886065e23060a87a958fa 100644 (file)
@@ -674,7 +674,7 @@ static int skl_probe(struct pci_dev *pci,
 
        if (skl->nhlt == NULL) {
                err = -ENODEV;
-               goto out_free;
+               goto out_display_power_off;
        }
 
        skl_nhlt_update_topology_bin(skl);
@@ -746,6 +746,9 @@ out_mach_free:
        skl_machine_device_unregister(skl);
 out_nhlt_free:
        skl_nhlt_free(skl->nhlt);
+out_display_power_off:
+       if (IS_ENABLED(CONFIG_SND_SOC_HDAC_HDMI))
+               snd_hdac_display_power(bus, false);
 out_free:
        skl->init_failed = 1;
        skl_free(ebus);
@@ -785,8 +788,7 @@ static void skl_remove(struct pci_dev *pci)
 
        release_firmware(skl->tplg);
 
-       if (pci_dev_run_wake(pci))
-               pm_runtime_get_noresume(&pci->dev);
+       pm_runtime_get_noresume(&pci->dev);
 
        /* codec removal, invoke bus_device_remove */
        snd_hdac_ext_bus_device_remove(ebus);
index f2bf8661dd21f782b1d472a724d69902a56b7b57..823b5a236d8dce0943d160a6cd436df2865a76e6 100644 (file)
@@ -208,7 +208,7 @@ config SND_PXA2XX_SOC_IMOTE2
 
 config SND_MMP_SOC_BROWNSTONE
        tristate "SoC Audio support for Marvell Brownstone"
-       depends on SND_MMP_SOC && MACH_BROWNSTONE
+       depends on SND_MMP_SOC && MACH_BROWNSTONE && I2C
        select SND_MMP_SOC_SSPA
        select MFD_WM8994
        select SND_SOC_WM8994
index 3cde9fb977fa72779a2ff26530fbaafae29a6386..eff3f9a8b685fc2ff0a149c4f3f7cf3101a984af 100644 (file)
@@ -586,3 +586,6 @@ int asoc_qcom_lpass_cpu_platform_remove(struct platform_device *pdev)
        return 0;
 }
 EXPORT_SYMBOL_GPL(asoc_qcom_lpass_cpu_platform_remove);
+
+MODULE_DESCRIPTION("QTi LPASS CPU Driver");
+MODULE_LICENSE("GPL v2");
index e2ff538a8aa5b63c4117f35365c39057b1f5360a..b392e51de94d173a20b130ad663b6af7c1e25f19 100644 (file)
@@ -61,7 +61,41 @@ static int lpass_platform_pcmops_open(struct snd_pcm_substream *substream)
 {
        struct snd_pcm_runtime *runtime = substream->runtime;
        struct snd_soc_pcm_runtime *soc_runtime = substream->private_data;
-       int ret;
+       struct snd_soc_dai *cpu_dai = soc_runtime->cpu_dai;
+       struct lpass_data *drvdata =
+               snd_soc_platform_get_drvdata(soc_runtime->platform);
+       struct lpass_variant *v = drvdata->variant;
+       int ret, dma_ch, dir = substream->stream;
+       struct lpass_pcm_data *data;
+
+       data = devm_kzalloc(soc_runtime->dev, sizeof(*data), GFP_KERNEL);
+       if (!data)
+               return -ENOMEM;
+
+       data->i2s_port = cpu_dai->driver->id;
+       runtime->private_data = data;
+
+       dma_ch = 0;
+       if (v->alloc_dma_channel)
+               dma_ch = v->alloc_dma_channel(drvdata, dir);
+       if (dma_ch < 0)
+               return dma_ch;
+
+       drvdata->substream[dma_ch] = substream;
+
+       ret = regmap_write(drvdata->lpaif_map,
+                       LPAIF_DMACTL_REG(v, dma_ch, dir), 0);
+       if (ret) {
+               dev_err(soc_runtime->dev,
+                       "%s() error writing to rdmactl reg: %d\n",
+                       __func__, ret);
+                       return ret;
+       }
+
+       if (dir == SNDRV_PCM_STREAM_PLAYBACK)
+               data->rdma_ch = dma_ch;
+       else
+               data->wrdma_ch = dma_ch;
 
        snd_soc_set_runtime_hwparams(substream, &lpass_platform_pcm_hardware);
 
@@ -80,13 +114,40 @@ static int lpass_platform_pcmops_open(struct snd_pcm_substream *substream)
        return 0;
 }
 
+static int lpass_platform_pcmops_close(struct snd_pcm_substream *substream)
+{
+       struct snd_pcm_runtime *runtime = substream->runtime;
+       struct snd_soc_pcm_runtime *soc_runtime = substream->private_data;
+       struct lpass_data *drvdata =
+               snd_soc_platform_get_drvdata(soc_runtime->platform);
+       struct lpass_variant *v = drvdata->variant;
+       struct lpass_pcm_data *data;
+       int dma_ch, dir = substream->stream;
+
+       data = runtime->private_data;
+       v = drvdata->variant;
+
+       if (dir == SNDRV_PCM_STREAM_PLAYBACK)
+               dma_ch = data->rdma_ch;
+       else
+               dma_ch = data->wrdma_ch;
+
+       drvdata->substream[dma_ch] = NULL;
+
+       if (v->free_dma_channel)
+               v->free_dma_channel(drvdata, dma_ch);
+
+       return 0;
+}
+
 static int lpass_platform_pcmops_hw_params(struct snd_pcm_substream *substream,
                struct snd_pcm_hw_params *params)
 {
        struct snd_soc_pcm_runtime *soc_runtime = substream->private_data;
        struct lpass_data *drvdata =
                snd_soc_platform_get_drvdata(soc_runtime->platform);
-       struct lpass_pcm_data *pcm_data = drvdata->private_data;
+       struct snd_pcm_runtime *rt = substream->runtime;
+       struct lpass_pcm_data *pcm_data = rt->private_data;
        struct lpass_variant *v = drvdata->variant;
        snd_pcm_format_t format = params_format(params);
        unsigned int channels = params_channels(params);
@@ -179,7 +240,8 @@ static int lpass_platform_pcmops_hw_free(struct snd_pcm_substream *substream)
        struct snd_soc_pcm_runtime *soc_runtime = substream->private_data;
        struct lpass_data *drvdata =
                snd_soc_platform_get_drvdata(soc_runtime->platform);
-       struct lpass_pcm_data *pcm_data = drvdata->private_data;
+       struct snd_pcm_runtime *rt = substream->runtime;
+       struct lpass_pcm_data *pcm_data = rt->private_data;
        struct lpass_variant *v = drvdata->variant;
        unsigned int reg;
        int ret;
@@ -203,7 +265,8 @@ static int lpass_platform_pcmops_prepare(struct snd_pcm_substream *substream)
        struct snd_soc_pcm_runtime *soc_runtime = substream->private_data;
        struct lpass_data *drvdata =
                snd_soc_platform_get_drvdata(soc_runtime->platform);
-       struct lpass_pcm_data *pcm_data = drvdata->private_data;
+       struct snd_pcm_runtime *rt = substream->runtime;
+       struct lpass_pcm_data *pcm_data = rt->private_data;
        struct lpass_variant *v = drvdata->variant;
        int ret, ch, dir = substream->stream;
 
@@ -257,7 +320,8 @@ static int lpass_platform_pcmops_trigger(struct snd_pcm_substream *substream,
        struct snd_soc_pcm_runtime *soc_runtime = substream->private_data;
        struct lpass_data *drvdata =
                snd_soc_platform_get_drvdata(soc_runtime->platform);
-       struct lpass_pcm_data *pcm_data = drvdata->private_data;
+       struct snd_pcm_runtime *rt = substream->runtime;
+       struct lpass_pcm_data *pcm_data = rt->private_data;
        struct lpass_variant *v = drvdata->variant;
        int ret, ch, dir = substream->stream;
 
@@ -333,7 +397,8 @@ static snd_pcm_uframes_t lpass_platform_pcmops_pointer(
        struct snd_soc_pcm_runtime *soc_runtime = substream->private_data;
        struct lpass_data *drvdata =
                        snd_soc_platform_get_drvdata(soc_runtime->platform);
-       struct lpass_pcm_data *pcm_data = drvdata->private_data;
+       struct snd_pcm_runtime *rt = substream->runtime;
+       struct lpass_pcm_data *pcm_data = rt->private_data;
        struct lpass_variant *v = drvdata->variant;
        unsigned int base_addr, curr_addr;
        int ret, ch, dir = substream->stream;
@@ -374,6 +439,7 @@ static int lpass_platform_pcmops_mmap(struct snd_pcm_substream *substream,
 
 static const struct snd_pcm_ops lpass_platform_pcm_ops = {
        .open           = lpass_platform_pcmops_open,
+       .close          = lpass_platform_pcmops_close,
        .ioctl          = snd_pcm_lib_ioctl,
        .hw_params      = lpass_platform_pcmops_hw_params,
        .hw_free        = lpass_platform_pcmops_hw_free,
@@ -470,117 +536,45 @@ static int lpass_platform_pcm_new(struct snd_soc_pcm_runtime *soc_runtime)
 {
        struct snd_pcm *pcm = soc_runtime->pcm;
        struct snd_pcm_substream *psubstream, *csubstream;
-       struct snd_soc_dai *cpu_dai = soc_runtime->cpu_dai;
-       struct lpass_data *drvdata =
-               snd_soc_platform_get_drvdata(soc_runtime->platform);
-       struct lpass_variant *v = drvdata->variant;
        int ret = -EINVAL;
-       struct lpass_pcm_data *data;
        size_t size = lpass_platform_pcm_hardware.buffer_bytes_max;
 
-       data = devm_kzalloc(soc_runtime->dev, sizeof(*data), GFP_KERNEL);
-       if (!data)
-               return -ENOMEM;
-
-       data->i2s_port = cpu_dai->driver->id;
-       drvdata->private_data = data;
-
        psubstream = pcm->streams[SNDRV_PCM_STREAM_PLAYBACK].substream;
        if (psubstream) {
-               if (v->alloc_dma_channel)
-                       data->rdma_ch = v->alloc_dma_channel(drvdata,
-                                               SNDRV_PCM_STREAM_PLAYBACK);
-
-               if (data->rdma_ch < 0)
-                       return data->rdma_ch;
-
-               drvdata->substream[data->rdma_ch] = psubstream;
-
                ret = snd_dma_alloc_pages(SNDRV_DMA_TYPE_DEV,
                                        soc_runtime->platform->dev,
                                        size, &psubstream->dma_buffer);
-               if (ret)
-                       goto playback_alloc_err;
-
-               ret = regmap_write(drvdata->lpaif_map,
-                       LPAIF_RDMACTL_REG(v, data->rdma_ch), 0);
                if (ret) {
-                       dev_err(soc_runtime->dev,
-                               "%s() error writing to rdmactl reg: %d\n",
-                               __func__, ret);
-                       goto capture_alloc_err;
+                       dev_err(soc_runtime->dev, "Cannot allocate buffer(s)\n");
+                       return ret;
                }
        }
 
        csubstream = pcm->streams[SNDRV_PCM_STREAM_CAPTURE].substream;
        if (csubstream) {
-               if (v->alloc_dma_channel)
-                       data->wrdma_ch = v->alloc_dma_channel(drvdata,
-                                               SNDRV_PCM_STREAM_CAPTURE);
-
-               if (data->wrdma_ch < 0) {
-                       ret = data->wrdma_ch;
-                       goto capture_alloc_err;
-               }
-
-               drvdata->substream[data->wrdma_ch] = csubstream;
-
                ret = snd_dma_alloc_pages(SNDRV_DMA_TYPE_DEV,
                                        soc_runtime->platform->dev,
                                        size, &csubstream->dma_buffer);
-               if (ret)
-                       goto capture_alloc_err;
-
-               ret = regmap_write(drvdata->lpaif_map,
-                       LPAIF_WRDMACTL_REG(v, data->wrdma_ch), 0);
                if (ret) {
-                       dev_err(soc_runtime->dev,
-                               "%s() error writing to wrdmactl reg: %d\n",
-                               __func__, ret);
-                       goto capture_reg_err;
+                       dev_err(soc_runtime->dev, "Cannot allocate buffer(s)\n");
+                       if (psubstream)
+                               snd_dma_free_pages(&psubstream->dma_buffer);
+                       return ret;
                }
+
        }
 
        return 0;
-
-capture_reg_err:
-       if (csubstream)
-               snd_dma_free_pages(&csubstream->dma_buffer);
-
-capture_alloc_err:
-       if (psubstream)
-               snd_dma_free_pages(&psubstream->dma_buffer);
-
- playback_alloc_err:
-       dev_err(soc_runtime->dev, "Cannot allocate buffer(s)\n");
-
-       return ret;
 }
 
 static void lpass_platform_pcm_free(struct snd_pcm *pcm)
 {
-       struct snd_soc_pcm_runtime *rt;
-       struct lpass_data *drvdata;
-       struct lpass_pcm_data *data;
-       struct lpass_variant *v;
        struct snd_pcm_substream *substream;
-       int ch, i;
+       int i;
 
        for (i = 0; i < ARRAY_SIZE(pcm->streams); i++) {
                substream = pcm->streams[i].substream;
                if (substream) {
-                       rt = substream->private_data;
-                       drvdata = snd_soc_platform_get_drvdata(rt->platform);
-                       data = drvdata->private_data;
-
-                       ch = (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
-                               ? data->rdma_ch
-                               : data->wrdma_ch;
-                       v = drvdata->variant;
-                       drvdata->substream[ch] = NULL;
-                       if (v->free_dma_channel)
-                               v->free_dma_channel(drvdata, ch);
-
                        snd_dma_free_pages(&substream->dma_buffer);
                        substream->dma_buffer.area = NULL;
                        substream->dma_buffer.addr = 0;
index 35b3cea8207d8e61773a938d7fead9b3adb8d087..924971b6ded54f52ef56de0c84910258531bf1b9 100644 (file)
@@ -59,7 +59,6 @@ struct lpass_data {
        struct clk *pcnoc_mport_clk;
        struct clk *pcnoc_sway_clk;
 
-       void *private_data;
 };
 
 /* Vairant data per each SOC */
index 97d6700b100935466d01f5afad0a3c1fed54a931..cbc0023c2bc8276da9cda23ffa4d570d7c03e09f 100644 (file)
@@ -383,11 +383,6 @@ static int s3c_ac97_probe(struct platform_device *pdev)
                goto err4;
        }
 
-       ret = devm_snd_soc_register_component(&pdev->dev, &s3c_ac97_component,
-                                        s3c_ac97_dai, ARRAY_SIZE(s3c_ac97_dai));
-       if (ret)
-               goto err5;
-
        ret = samsung_asoc_dma_platform_register(&pdev->dev,
                                                 ac97_pdata->dma_filter,
                                                 NULL, NULL);
@@ -396,6 +391,11 @@ static int s3c_ac97_probe(struct platform_device *pdev)
                goto err5;
        }
 
+       ret = devm_snd_soc_register_component(&pdev->dev, &s3c_ac97_component,
+                                        s3c_ac97_dai, ARRAY_SIZE(s3c_ac97_dai));
+       if (ret)
+               goto err5;
+
        return 0;
 err5:
        free_irq(irq_res->start, NULL);
index 7e32cf4581f8a853a2e53bf49cbb84e1bd11f0b8..7825bff45ae3a523450e6732b94056a288b032d3 100644 (file)
@@ -1237,14 +1237,14 @@ static int samsung_i2s_probe(struct platform_device *pdev)
                        dev_err(&pdev->dev, "Unable to get drvdata\n");
                        return -EFAULT;
                }
-               ret = devm_snd_soc_register_component(&sec_dai->pdev->dev,
-                                               &samsung_i2s_component,
-                                               &sec_dai->i2s_dai_drv, 1);
+               ret = samsung_asoc_dma_platform_register(&pdev->dev,
+                                       sec_dai->filter, "tx-sec", NULL);
                if (ret != 0)
                        return ret;
 
-               return samsung_asoc_dma_platform_register(&pdev->dev,
-                                       sec_dai->filter, "tx-sec", NULL);
+               return devm_snd_soc_register_component(&sec_dai->pdev->dev,
+                                               &samsung_i2s_component,
+                                               &sec_dai->i2s_dai_drv, 1);
        }
 
        pri_dai = i2s_alloc_dai(pdev, false);
@@ -1314,6 +1314,11 @@ static int samsung_i2s_probe(struct platform_device *pdev)
        if (quirks & QUIRK_PRI_6CHAN)
                pri_dai->i2s_dai_drv.playback.channels_max = 6;
 
+       ret = samsung_asoc_dma_platform_register(&pdev->dev, pri_dai->filter,
+                                                NULL, NULL);
+       if (ret < 0)
+               goto err_disable_clk;
+
        if (quirks & QUIRK_SEC_DAI) {
                sec_dai = i2s_alloc_dai(pdev, true);
                if (!sec_dai) {
@@ -1353,10 +1358,6 @@ static int samsung_i2s_probe(struct platform_device *pdev)
        if (ret < 0)
                goto err_free_dai;
 
-       ret = samsung_asoc_dma_platform_register(&pdev->dev, pri_dai->filter,
-                                                NULL, NULL);
-       if (ret < 0)
-               goto err_free_dai;
 
        pm_runtime_enable(&pdev->dev);
 
index 43e367a9acc368d148c0a5afbe777c2874f2ea30..c484985812ed681fa5a4326f3c54f76e4a80da4b 100644 (file)
@@ -565,24 +565,25 @@ static int s3c_pcm_dev_probe(struct platform_device *pdev)
        pcm->dma_capture = &s3c_pcm_stereo_in[pdev->id];
        pcm->dma_playback = &s3c_pcm_stereo_out[pdev->id];
 
+       ret = samsung_asoc_dma_platform_register(&pdev->dev, filter,
+                                                NULL, NULL);
+       if (ret) {
+               dev_err(&pdev->dev, "failed to get register DMA: %d\n", ret);
+               goto err5;
+       }
+
        pm_runtime_enable(&pdev->dev);
 
        ret = devm_snd_soc_register_component(&pdev->dev, &s3c_pcm_component,
                                         &s3c_pcm_dai[pdev->id], 1);
        if (ret != 0) {
                dev_err(&pdev->dev, "failed to get register DAI: %d\n", ret);
-               goto err5;
-       }
-
-       ret = samsung_asoc_dma_platform_register(&pdev->dev, filter,
-                                                NULL, NULL);
-       if (ret) {
-               dev_err(&pdev->dev, "failed to get register DMA: %d\n", ret);
-               goto err5;
+               goto err6;
        }
 
        return 0;
-
+err6:
+       pm_runtime_disable(&pdev->dev);
 err5:
        clk_disable_unprepare(pcm->pclk);
 err4:
index 3e89fbc0c51d046049f6b6413d554c7e62186055..0a4718207e6ec41ae9f1535c5757066c73710f6a 100644 (file)
@@ -168,19 +168,19 @@ static int s3c2412_iis_dev_probe(struct platform_device *pdev)
        s3c2412_i2s_pcm_stereo_in.addr = res->start + S3C2412_IISRXD;
        s3c2412_i2s_pcm_stereo_in.filter_data = pdata->dma_capture;
 
-       ret = s3c_i2sv2_register_component(&pdev->dev, -1,
-                                          &s3c2412_i2s_component,
-                                          &s3c2412_i2s_dai);
+       ret = samsung_asoc_dma_platform_register(&pdev->dev,
+                                                pdata->dma_filter,
+                                                NULL, NULL);
        if (ret) {
-               pr_err("failed to register the dai\n");
+               pr_err("failed to register the DMA: %d\n", ret);
                return ret;
        }
 
-       ret = samsung_asoc_dma_platform_register(&pdev->dev,
-                                                pdata->dma_filter,
-                                                NULL, NULL);
+       ret = s3c_i2sv2_register_component(&pdev->dev, -1,
+                                          &s3c2412_i2s_component,
+                                          &s3c2412_i2s_dai);
        if (ret)
-               pr_err("failed to register the DMA: %d\n", ret);
+               pr_err("failed to register the dai\n");
 
        return ret;
 }
index c78a936a30995639fdb6cf6c97008d7504b1ea5e..9052f6a7073ec8b0ca5066461ba74dcc01a5f0b8 100644 (file)
@@ -474,18 +474,18 @@ static int s3c24xx_iis_dev_probe(struct platform_device *pdev)
        s3c24xx_i2s_pcm_stereo_in.addr = res->start + S3C2410_IISFIFO;
        s3c24xx_i2s_pcm_stereo_in.filter_data = pdata->dma_capture;
 
-       ret = devm_snd_soc_register_component(&pdev->dev,
-                       &s3c24xx_i2s_component, &s3c24xx_i2s_dai, 1);
+       ret = samsung_asoc_dma_platform_register(&pdev->dev,
+                                                pdata->dma_filter,
+                                                NULL, NULL);
        if (ret) {
-               pr_err("failed to register the dai\n");
+               pr_err("failed to register the dma: %d\n", ret);
                return ret;
        }
 
-       ret = samsung_asoc_dma_platform_register(&pdev->dev,
-                                                pdata->dma_filter,
-                                                NULL, NULL);
+       ret = devm_snd_soc_register_component(&pdev->dev,
+                       &s3c24xx_i2s_component, &s3c24xx_i2s_dai, 1);
        if (ret)
-               pr_err("failed to register the dma: %d\n", ret);
+               pr_err("failed to register the dai\n");
 
        return ret;
 }
index 26c1fbed4d3543da990ea3277a24f8fb87887818..779504f54bc074fcaec0ce179483b3a4a1bc17ee 100644 (file)
@@ -416,15 +416,6 @@ static int spdif_probe(struct platform_device *pdev)
                goto err3;
        }
 
-       dev_set_drvdata(&pdev->dev, spdif);
-
-       ret = devm_snd_soc_register_component(&pdev->dev,
-                       &samsung_spdif_component, &samsung_spdif_dai, 1);
-       if (ret != 0) {
-               dev_err(&pdev->dev, "fail to register dai\n");
-               goto err4;
-       }
-
        spdif_stereo_out.addr_width = 2;
        spdif_stereo_out.addr = mem_res->start + DATA_OUTBUF;
        filter = NULL;
@@ -432,7 +423,6 @@ static int spdif_probe(struct platform_device *pdev)
                spdif_stereo_out.filter_data = spdif_pdata->dma_playback;
                filter = spdif_pdata->dma_filter;
        }
-
        spdif->dma_playback = &spdif_stereo_out;
 
        ret = samsung_asoc_dma_platform_register(&pdev->dev, filter,
@@ -442,6 +432,15 @@ static int spdif_probe(struct platform_device *pdev)
                goto err4;
        }
 
+       dev_set_drvdata(&pdev->dev, spdif);
+
+       ret = devm_snd_soc_register_component(&pdev->dev,
+                       &samsung_spdif_component, &samsung_spdif_dai, 1);
+       if (ret != 0) {
+               dev_err(&pdev->dev, "fail to register dai\n");
+               goto err4;
+       }
+
        return 0;
 err4:
        iounmap(spdif->regs);
index 1bc8ebc2528eb1bdf0c1df6832e943d8eebaa29f..ad54d4cf58ada992f6f279cc0e6a00386e8e273e 100644 (file)
@@ -614,7 +614,11 @@ static int uni_player_ctl_iec958_put(struct snd_kcontrol *kcontrol,
        iec958->status[3] = ucontrol->value.iec958.status[3];
        mutex_unlock(&player->ctrl_lock);
 
-       uni_player_set_channel_status(player, NULL);
+       if (player->substream && player->substream->runtime)
+               uni_player_set_channel_status(player,
+                                             player->substream->runtime);
+       else
+               uni_player_set_channel_status(player, NULL);
 
        return 0;
 }
index e047ec06d5382cd61e5ff4e56269b508e714ef59..56ed9472e89fe98c98f82b3b90ed61fdd2e774e4 100644 (file)
@@ -765,11 +765,11 @@ static struct snd_soc_card *sun4i_codec_create_card(struct device *dev)
 
        card = devm_kzalloc(dev, sizeof(*card), GFP_KERNEL);
        if (!card)
-               return NULL;
+               return ERR_PTR(-ENOMEM);
 
        card->dai_link = sun4i_codec_create_link(dev, &card->num_links);
        if (!card->dai_link)
-               return NULL;
+               return ERR_PTR(-ENOMEM);
 
        card->dev               = dev;
        card->name              = "sun4i-codec";
@@ -829,12 +829,6 @@ static int sun4i_codec_probe(struct platform_device *pdev)
                return PTR_ERR(scodec->clk_module);
        }
 
-       /* Enable the bus clock */
-       if (clk_prepare_enable(scodec->clk_apb)) {
-               dev_err(&pdev->dev, "Failed to enable the APB clock\n");
-               return -EINVAL;
-       }
-
        scodec->gpio_pa = devm_gpiod_get_optional(&pdev->dev, "allwinner,pa",
                                                  GPIOD_OUT_LOW);
        if (IS_ERR(scodec->gpio_pa)) {
@@ -844,6 +838,12 @@ static int sun4i_codec_probe(struct platform_device *pdev)
                return ret;
        }
 
+       /* Enable the bus clock */
+       if (clk_prepare_enable(scodec->clk_apb)) {
+               dev_err(&pdev->dev, "Failed to enable the APB clock\n");
+               return -EINVAL;
+       }
+
        /* DMA configuration for TX FIFO */
        scodec->playback_dma_data.addr = res->start + SUN4I_CODEC_DAC_TXDATA;
        scodec->playback_dma_data.maxburst = 4;
@@ -876,7 +876,8 @@ static int sun4i_codec_probe(struct platform_device *pdev)
        }
 
        card = sun4i_codec_create_card(&pdev->dev);
-       if (!card) {
+       if (IS_ERR(card)) {
+               ret = PTR_ERR(card);
                dev_err(&pdev->dev, "Failed to create our card\n");
                goto err_unregister_codec;
        }
index 9e5276d6dda05c999fcba24ff35ab7345513c6da..2ddc034673a8e99d232ebe62b87ff4115628d453 100644 (file)
@@ -315,7 +315,8 @@ static int snd_usb_audio_free(struct snd_usb_audio *chip)
                snd_usb_endpoint_free(ep);
 
        mutex_destroy(&chip->mutex);
-       dev_set_drvdata(&chip->dev->dev, NULL);
+       if (!atomic_read(&chip->shutdown))
+               dev_set_drvdata(&chip->dev->dev, NULL);
        kfree(chip);
        return 0;
 }
index 14e587e706554b4e07fc4eb42dfd55a14808a572..90009c0b3a92e42f2598e05b451a0cbc0d79c8bf 100644 (file)
@@ -604,8 +604,8 @@ line6_hwdep_write(struct snd_hwdep *hwdep, const char __user *data, long count,
        }
 
        data_copy = memdup_user(data, count);
-       if (IS_ERR(ERR_PTR))
-               return -ENOMEM;
+       if (IS_ERR(data_copy))
+               return PTR_ERR(data_copy);
 
        rv = line6_send_raw_message(line6, data_copy, count);
 
index 9352a44ae6e4a5dcb4230c7192ec75f9a788e5d1..49cd4a65e390c8b56a0d5986e3d561fe407c6d80 100644 (file)
@@ -317,7 +317,8 @@ static int podhd_init(struct usb_line6 *line6,
        if (pod->line6.properties->capabilities & LINE6_CAP_PCM) {
                /* initialize PCM subsystem: */
                err = line6_init_pcm(line6,
-                       (id->driver_info == LINE6_PODX3) ? &podx3_pcm_properties :
+                       (id->driver_info == LINE6_PODX3 ||
+                       id->driver_info == LINE6_PODX3LIVE) ? &podx3_pcm_properties :
                        &podhd_pcm_properties);
                if (err < 0)
                        return err;
index c60a776e815d72f14b9b6345f2e8a0266f8ec1b6..8a59d4782a0f4d3c33b3e6840cbe265ba8ee4406 100644 (file)
@@ -2907,6 +2907,23 @@ AU0828_DEVICE(0x2040, 0x7260, "Hauppauge", "HVR-950Q"),
 AU0828_DEVICE(0x2040, 0x7213, "Hauppauge", "HVR-950Q"),
 AU0828_DEVICE(0x2040, 0x7270, "Hauppauge", "HVR-950Q"),
 
+/* Syntek STK1160 */
+{
+       .match_flags = USB_DEVICE_ID_MATCH_DEVICE |
+                      USB_DEVICE_ID_MATCH_INT_CLASS |
+                      USB_DEVICE_ID_MATCH_INT_SUBCLASS,
+       .idVendor = 0x05e1,
+       .idProduct = 0x0408,
+       .bInterfaceClass = USB_CLASS_AUDIO,
+       .bInterfaceSubClass = USB_SUBCLASS_AUDIOCONTROL,
+       .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) {
+               .vendor_name = "Syntek",
+               .product_name = "STK1160",
+               .ifnum = QUIRK_ANY_INTERFACE,
+               .type = QUIRK_AUDIO_ALIGN_TRANSFER
+       }
+},
+
 /* Digidesign Mbox */
 {
        /* Thanks to Clemens Ladisch <clemens@ladisch.de> */
diff --git a/tools/accounting/.gitignore b/tools/accounting/.gitignore
new file mode 100644 (file)
index 0000000..8648520
--- /dev/null
@@ -0,0 +1 @@
+getdelays
diff --git a/tools/accounting/Makefile b/tools/accounting/Makefile
new file mode 100644 (file)
index 0000000..647c94a
--- /dev/null
@@ -0,0 +1,9 @@
+CC := $(CROSS_COMPILE)gcc
+CFLAGS := -I../../usr/include
+
+PROGS := getdelays
+
+all: $(PROGS)
+
+clean:
+       rm -fr $(PROGS)
diff --git a/tools/accounting/getdelays.c b/tools/accounting/getdelays.c
new file mode 100644 (file)
index 0000000..b5ca536
--- /dev/null
@@ -0,0 +1,550 @@
+/* getdelays.c
+ *
+ * Utility to get per-pid and per-tgid delay accounting statistics
+ * Also illustrates usage of the taskstats interface
+ *
+ * Copyright (C) Shailabh Nagar, IBM Corp. 2005
+ * Copyright (C) Balbir Singh, IBM Corp. 2006
+ * Copyright (c) Jay Lan, SGI. 2006
+ *
+ * Compile with
+ *     gcc -I/usr/src/linux/include getdelays.c -o getdelays
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <unistd.h>
+#include <poll.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <sys/wait.h>
+#include <signal.h>
+
+#include <linux/genetlink.h>
+#include <linux/taskstats.h>
+#include <linux/cgroupstats.h>
+
+/*
+ * Generic macros for dealing with netlink sockets. Might be duplicated
+ * elsewhere. It is recommended that commercial grade applications use
+ * libnl or libnetlink and use the interfaces provided by the library
+ */
+#define GENLMSG_DATA(glh)      ((void *)(NLMSG_DATA(glh) + GENL_HDRLEN))
+#define GENLMSG_PAYLOAD(glh)   (NLMSG_PAYLOAD(glh, 0) - GENL_HDRLEN)
+#define NLA_DATA(na)           ((void *)((char*)(na) + NLA_HDRLEN))
+#define NLA_PAYLOAD(len)       (len - NLA_HDRLEN)
+
+#define err(code, fmt, arg...)                 \
+       do {                                    \
+               fprintf(stderr, fmt, ##arg);    \
+               exit(code);                     \
+       } while (0)
+
+int done;
+int rcvbufsz;
+char name[100];
+int dbg;
+int print_delays;
+int print_io_accounting;
+int print_task_context_switch_counts;
+
+#define PRINTF(fmt, arg...) {                  \
+           if (dbg) {                          \
+               printf(fmt, ##arg);             \
+           }                                   \
+       }
+
+/* Maximum size of response requested or message sent */
+#define MAX_MSG_SIZE   1024
+/* Maximum number of cpus expected to be specified in a cpumask */
+#define MAX_CPUS       32
+
+struct msgtemplate {
+       struct nlmsghdr n;
+       struct genlmsghdr g;
+       char buf[MAX_MSG_SIZE];
+};
+
+char cpumask[100+6*MAX_CPUS];
+
+static void usage(void)
+{
+       fprintf(stderr, "getdelays [-dilv] [-w logfile] [-r bufsize] "
+                       "[-m cpumask] [-t tgid] [-p pid]\n");
+       fprintf(stderr, "  -d: print delayacct stats\n");
+       fprintf(stderr, "  -i: print IO accounting (works only with -p)\n");
+       fprintf(stderr, "  -l: listen forever\n");
+       fprintf(stderr, "  -v: debug on\n");
+       fprintf(stderr, "  -C: container path\n");
+}
+
+/*
+ * Create a raw netlink socket and bind
+ */
+static int create_nl_socket(int protocol)
+{
+       int fd;
+       struct sockaddr_nl local;
+
+       fd = socket(AF_NETLINK, SOCK_RAW, protocol);
+       if (fd < 0)
+               return -1;
+
+       if (rcvbufsz)
+               if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF,
+                               &rcvbufsz, sizeof(rcvbufsz)) < 0) {
+                       fprintf(stderr, "Unable to set socket rcv buf size to %d\n",
+                               rcvbufsz);
+                       goto error;
+               }
+
+       memset(&local, 0, sizeof(local));
+       local.nl_family = AF_NETLINK;
+
+       if (bind(fd, (struct sockaddr *) &local, sizeof(local)) < 0)
+               goto error;
+
+       return fd;
+error:
+       close(fd);
+       return -1;
+}
+
+
+static int send_cmd(int sd, __u16 nlmsg_type, __u32 nlmsg_pid,
+            __u8 genl_cmd, __u16 nla_type,
+            void *nla_data, int nla_len)
+{
+       struct nlattr *na;
+       struct sockaddr_nl nladdr;
+       int r, buflen;
+       char *buf;
+
+       struct msgtemplate msg;
+
+       msg.n.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN);
+       msg.n.nlmsg_type = nlmsg_type;
+       msg.n.nlmsg_flags = NLM_F_REQUEST;
+       msg.n.nlmsg_seq = 0;
+       msg.n.nlmsg_pid = nlmsg_pid;
+       msg.g.cmd = genl_cmd;
+       msg.g.version = 0x1;
+       na = (struct nlattr *) GENLMSG_DATA(&msg);
+       na->nla_type = nla_type;
+       na->nla_len = nla_len + 1 + NLA_HDRLEN;
+       memcpy(NLA_DATA(na), nla_data, nla_len);
+       msg.n.nlmsg_len += NLMSG_ALIGN(na->nla_len);
+
+       buf = (char *) &msg;
+       buflen = msg.n.nlmsg_len ;
+       memset(&nladdr, 0, sizeof(nladdr));
+       nladdr.nl_family = AF_NETLINK;
+       while ((r = sendto(sd, buf, buflen, 0, (struct sockaddr *) &nladdr,
+                          sizeof(nladdr))) < buflen) {
+               if (r > 0) {
+                       buf += r;
+                       buflen -= r;
+               } else if (errno != EAGAIN)
+                       return -1;
+       }
+       return 0;
+}
+
+
+/*
+ * Probe the controller in genetlink to find the family id
+ * for the TASKSTATS family
+ */
+static int get_family_id(int sd)
+{
+       struct {
+               struct nlmsghdr n;
+               struct genlmsghdr g;
+               char buf[256];
+       } ans;
+
+       int id = 0, rc;
+       struct nlattr *na;
+       int rep_len;
+
+       strcpy(name, TASKSTATS_GENL_NAME);
+       rc = send_cmd(sd, GENL_ID_CTRL, getpid(), CTRL_CMD_GETFAMILY,
+                       CTRL_ATTR_FAMILY_NAME, (void *)name,
+                       strlen(TASKSTATS_GENL_NAME)+1);
+       if (rc < 0)
+               return 0;       /* sendto() failure? */
+
+       rep_len = recv(sd, &ans, sizeof(ans), 0);
+       if (ans.n.nlmsg_type == NLMSG_ERROR ||
+           (rep_len < 0) || !NLMSG_OK((&ans.n), rep_len))
+               return 0;
+
+       na = (struct nlattr *) GENLMSG_DATA(&ans);
+       na = (struct nlattr *) ((char *) na + NLA_ALIGN(na->nla_len));
+       if (na->nla_type == CTRL_ATTR_FAMILY_ID) {
+               id = *(__u16 *) NLA_DATA(na);
+       }
+       return id;
+}
+
+#define average_ms(t, c) (t / 1000000ULL / (c ? c : 1))
+
+static void print_delayacct(struct taskstats *t)
+{
+       printf("\n\nCPU   %15s%15s%15s%15s%15s\n"
+              "      %15llu%15llu%15llu%15llu%15.3fms\n"
+              "IO    %15s%15s%15s\n"
+              "      %15llu%15llu%15llums\n"
+              "SWAP  %15s%15s%15s\n"
+              "      %15llu%15llu%15llums\n"
+              "RECLAIM  %12s%15s%15s\n"
+              "      %15llu%15llu%15llums\n",
+              "count", "real total", "virtual total",
+              "delay total", "delay average",
+              (unsigned long long)t->cpu_count,
+              (unsigned long long)t->cpu_run_real_total,
+              (unsigned long long)t->cpu_run_virtual_total,
+              (unsigned long long)t->cpu_delay_total,
+              average_ms((double)t->cpu_delay_total, t->cpu_count),
+              "count", "delay total", "delay average",
+              (unsigned long long)t->blkio_count,
+              (unsigned long long)t->blkio_delay_total,
+              average_ms(t->blkio_delay_total, t->blkio_count),
+              "count", "delay total", "delay average",
+              (unsigned long long)t->swapin_count,
+              (unsigned long long)t->swapin_delay_total,
+              average_ms(t->swapin_delay_total, t->swapin_count),
+              "count", "delay total", "delay average",
+              (unsigned long long)t->freepages_count,
+              (unsigned long long)t->freepages_delay_total,
+              average_ms(t->freepages_delay_total, t->freepages_count));
+}
+
+static void task_context_switch_counts(struct taskstats *t)
+{
+       printf("\n\nTask   %15s%15s\n"
+              "       %15llu%15llu\n",
+              "voluntary", "nonvoluntary",
+              (unsigned long long)t->nvcsw, (unsigned long long)t->nivcsw);
+}
+
+static void print_cgroupstats(struct cgroupstats *c)
+{
+       printf("sleeping %llu, blocked %llu, running %llu, stopped %llu, "
+               "uninterruptible %llu\n", (unsigned long long)c->nr_sleeping,
+               (unsigned long long)c->nr_io_wait,
+               (unsigned long long)c->nr_running,
+               (unsigned long long)c->nr_stopped,
+               (unsigned long long)c->nr_uninterruptible);
+}
+
+
+static void print_ioacct(struct taskstats *t)
+{
+       printf("%s: read=%llu, write=%llu, cancelled_write=%llu\n",
+               t->ac_comm,
+               (unsigned long long)t->read_bytes,
+               (unsigned long long)t->write_bytes,
+               (unsigned long long)t->cancelled_write_bytes);
+}
+
+int main(int argc, char *argv[])
+{
+       int c, rc, rep_len, aggr_len, len2;
+       int cmd_type = TASKSTATS_CMD_ATTR_UNSPEC;
+       __u16 id;
+       __u32 mypid;
+
+       struct nlattr *na;
+       int nl_sd = -1;
+       int len = 0;
+       pid_t tid = 0;
+       pid_t rtid = 0;
+
+       int fd = 0;
+       int count = 0;
+       int write_file = 0;
+       int maskset = 0;
+       char *logfile = NULL;
+       int loop = 0;
+       int containerset = 0;
+       char *containerpath = NULL;
+       int cfd = 0;
+       int forking = 0;
+       sigset_t sigset;
+
+       struct msgtemplate msg;
+
+       while (!forking) {
+               c = getopt(argc, argv, "qdiw:r:m:t:p:vlC:c:");
+               if (c < 0)
+                       break;
+
+               switch (c) {
+               case 'd':
+                       printf("print delayacct stats ON\n");
+                       print_delays = 1;
+                       break;
+               case 'i':
+                       printf("printing IO accounting\n");
+                       print_io_accounting = 1;
+                       break;
+               case 'q':
+                       printf("printing task/process context switch rates\n");
+                       print_task_context_switch_counts = 1;
+                       break;
+               case 'C':
+                       containerset = 1;
+                       containerpath = optarg;
+                       break;
+               case 'w':
+                       logfile = strdup(optarg);
+                       printf("write to file %s\n", logfile);
+                       write_file = 1;
+                       break;
+               case 'r':
+                       rcvbufsz = atoi(optarg);
+                       printf("receive buf size %d\n", rcvbufsz);
+                       if (rcvbufsz < 0)
+                               err(1, "Invalid rcv buf size\n");
+                       break;
+               case 'm':
+                       strncpy(cpumask, optarg, sizeof(cpumask));
+                       cpumask[sizeof(cpumask) - 1] = '\0';
+                       maskset = 1;
+                       printf("cpumask %s maskset %d\n", cpumask, maskset);
+                       break;
+               case 't':
+                       tid = atoi(optarg);
+                       if (!tid)
+                               err(1, "Invalid tgid\n");
+                       cmd_type = TASKSTATS_CMD_ATTR_TGID;
+                       break;
+               case 'p':
+                       tid = atoi(optarg);
+                       if (!tid)
+                               err(1, "Invalid pid\n");
+                       cmd_type = TASKSTATS_CMD_ATTR_PID;
+                       break;
+               case 'c':
+
+                       /* Block SIGCHLD for sigwait() later */
+                       if (sigemptyset(&sigset) == -1)
+                               err(1, "Failed to empty sigset");
+                       if (sigaddset(&sigset, SIGCHLD))
+                               err(1, "Failed to set sigchld in sigset");
+                       sigprocmask(SIG_BLOCK, &sigset, NULL);
+
+                       /* fork/exec a child */
+                       tid = fork();
+                       if (tid < 0)
+                               err(1, "Fork failed\n");
+                       if (tid == 0)
+                               if (execvp(argv[optind - 1],
+                                   &argv[optind - 1]) < 0)
+                                       exit(-1);
+
+                       /* Set the command type and avoid further processing */
+                       cmd_type = TASKSTATS_CMD_ATTR_PID;
+                       forking = 1;
+                       break;
+               case 'v':
+                       printf("debug on\n");
+                       dbg = 1;
+                       break;
+               case 'l':
+                       printf("listen forever\n");
+                       loop = 1;
+                       break;
+               default:
+                       usage();
+                       exit(-1);
+               }
+       }
+
+       if (write_file) {
+               fd = open(logfile, O_WRONLY | O_CREAT | O_TRUNC,
+                         S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
+               if (fd == -1) {
+                       perror("Cannot open output file\n");
+                       exit(1);
+               }
+       }
+
+       nl_sd = create_nl_socket(NETLINK_GENERIC);
+       if (nl_sd < 0)
+               err(1, "error creating Netlink socket\n");
+
+
+       mypid = getpid();
+       id = get_family_id(nl_sd);
+       if (!id) {
+               fprintf(stderr, "Error getting family id, errno %d\n", errno);
+               goto err;
+       }
+       PRINTF("family id %d\n", id);
+
+       if (maskset) {
+               rc = send_cmd(nl_sd, id, mypid, TASKSTATS_CMD_GET,
+                             TASKSTATS_CMD_ATTR_REGISTER_CPUMASK,
+                             &cpumask, strlen(cpumask) + 1);
+               PRINTF("Sent register cpumask, retval %d\n", rc);
+               if (rc < 0) {
+                       fprintf(stderr, "error sending register cpumask\n");
+                       goto err;
+               }
+       }
+
+       if (tid && containerset) {
+               fprintf(stderr, "Select either -t or -C, not both\n");
+               goto err;
+       }
+
+       /*
+        * If we forked a child, wait for it to exit. Cannot use waitpid()
+        * as all the delicious data would be reaped as part of the wait
+        */
+       if (tid && forking) {
+               int sig_received;
+               sigwait(&sigset, &sig_received);
+       }
+
+       if (tid) {
+               rc = send_cmd(nl_sd, id, mypid, TASKSTATS_CMD_GET,
+                             cmd_type, &tid, sizeof(__u32));
+               PRINTF("Sent pid/tgid, retval %d\n", rc);
+               if (rc < 0) {
+                       fprintf(stderr, "error sending tid/tgid cmd\n");
+                       goto done;
+               }
+       }
+
+       if (containerset) {
+               cfd = open(containerpath, O_RDONLY);
+               if (cfd < 0) {
+                       perror("error opening container file");
+                       goto err;
+               }
+               rc = send_cmd(nl_sd, id, mypid, CGROUPSTATS_CMD_GET,
+                             CGROUPSTATS_CMD_ATTR_FD, &cfd, sizeof(__u32));
+               if (rc < 0) {
+                       perror("error sending cgroupstats command");
+                       goto err;
+               }
+       }
+       if (!maskset && !tid && !containerset) {
+               usage();
+               goto err;
+       }
+
+       do {
+               rep_len = recv(nl_sd, &msg, sizeof(msg), 0);
+               PRINTF("received %d bytes\n", rep_len);
+
+               if (rep_len < 0) {
+                       fprintf(stderr, "nonfatal reply error: errno %d\n",
+                               errno);
+                       continue;
+               }
+               if (msg.n.nlmsg_type == NLMSG_ERROR ||
+                   !NLMSG_OK((&msg.n), rep_len)) {
+                       struct nlmsgerr *err = NLMSG_DATA(&msg);
+                       fprintf(stderr, "fatal reply error,  errno %d\n",
+                               err->error);
+                       goto done;
+               }
+
+               PRINTF("nlmsghdr size=%zu, nlmsg_len=%d, rep_len=%d\n",
+                      sizeof(struct nlmsghdr), msg.n.nlmsg_len, rep_len);
+
+
+               rep_len = GENLMSG_PAYLOAD(&msg.n);
+
+               na = (struct nlattr *) GENLMSG_DATA(&msg);
+               len = 0;
+               while (len < rep_len) {
+                       len += NLA_ALIGN(na->nla_len);
+                       switch (na->nla_type) {
+                       case TASKSTATS_TYPE_AGGR_TGID:
+                               /* Fall through */
+                       case TASKSTATS_TYPE_AGGR_PID:
+                               aggr_len = NLA_PAYLOAD(na->nla_len);
+                               len2 = 0;
+                               /* For nested attributes, na follows */
+                               na = (struct nlattr *) NLA_DATA(na);
+                               done = 0;
+                               while (len2 < aggr_len) {
+                                       switch (na->nla_type) {
+                                       case TASKSTATS_TYPE_PID:
+                                               rtid = *(int *) NLA_DATA(na);
+                                               if (print_delays)
+                                                       printf("PID\t%d\n", rtid);
+                                               break;
+                                       case TASKSTATS_TYPE_TGID:
+                                               rtid = *(int *) NLA_DATA(na);
+                                               if (print_delays)
+                                                       printf("TGID\t%d\n", rtid);
+                                               break;
+                                       case TASKSTATS_TYPE_STATS:
+                                               count++;
+                                               if (print_delays)
+                                                       print_delayacct((struct taskstats *) NLA_DATA(na));
+                                               if (print_io_accounting)
+                                                       print_ioacct((struct taskstats *) NLA_DATA(na));
+                                               if (print_task_context_switch_counts)
+                                                       task_context_switch_counts((struct taskstats *) NLA_DATA(na));
+                                               if (fd) {
+                                                       if (write(fd, NLA_DATA(na), na->nla_len) < 0) {
+                                                               err(1,"write error\n");
+                                                       }
+                                               }
+                                               if (!loop)
+                                                       goto done;
+                                               break;
+                                       case TASKSTATS_TYPE_NULL:
+                                               break;
+                                       default:
+                                               fprintf(stderr, "Unknown nested"
+                                                       " nla_type %d\n",
+                                                       na->nla_type);
+                                               break;
+                                       }
+                                       len2 += NLA_ALIGN(na->nla_len);
+                                       na = (struct nlattr *)((char *)na +
+                                                              NLA_ALIGN(na->nla_len));
+                               }
+                               break;
+
+                       case CGROUPSTATS_TYPE_CGROUP_STATS:
+                               print_cgroupstats(NLA_DATA(na));
+                               break;
+                       default:
+                               fprintf(stderr, "Unknown nla_type %d\n",
+                                       na->nla_type);
+                       case TASKSTATS_TYPE_NULL:
+                               break;
+                       }
+                       na = (struct nlattr *) (GENLMSG_DATA(&msg) + len);
+               }
+       } while (loop);
+done:
+       if (maskset) {
+               rc = send_cmd(nl_sd, id, mypid, TASKSTATS_CMD_GET,
+                             TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK,
+                             &cpumask, strlen(cpumask) + 1);
+               printf("Sent deregister mask, retval %d\n", rc);
+               if (rc < 0)
+                       err(rc, "error sending deregister cpumask\n");
+       }
+err:
+       close(nl_sd);
+       if (fd)
+               close(fd);
+       if (cfd)
+               close(cfd);
+       return 0;
+}
index 1188bc849ee3b3253fd8229fca21bf2d6c87856e..a39629206864e5bb74aaddea15ca1ab762877042 100644 (file)
 #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
 
 #define X86_FEATURE_INTEL_PT   ( 7*32+15) /* Intel Processor Trace */
+#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */
+#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */
 
 /* Virtualization flags: Linux defined, word 8 */
 #define X86_FEATURE_TPR_SHADOW  ( 8*32+ 0) /* Intel TPR Shadow */
diff --git a/tools/laptop/dslm/.gitignore b/tools/laptop/dslm/.gitignore
new file mode 100644 (file)
index 0000000..9fc984e
--- /dev/null
@@ -0,0 +1 @@
+dslm
diff --git a/tools/laptop/dslm/Makefile b/tools/laptop/dslm/Makefile
new file mode 100644 (file)
index 0000000..ff613b3
--- /dev/null
@@ -0,0 +1,9 @@
+CC := $(CROSS_COMPILE)gcc
+CFLAGS := -I../../usr/include
+
+PROGS := dslm
+
+all: $(PROGS)
+
+clean:
+       rm -fr $(PROGS)
diff --git a/tools/laptop/dslm/dslm.c b/tools/laptop/dslm/dslm.c
new file mode 100644 (file)
index 0000000..d5dd2d4
--- /dev/null
@@ -0,0 +1,166 @@
+/*
+ * dslm.c
+ * Simple Disk Sleep Monitor
+ *  by Bartek Kania
+ * Licensed under the GPL
+ */
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <time.h>
+#include <string.h>
+#include <signal.h>
+#include <sys/ioctl.h>
+#include <linux/hdreg.h>
+
+#ifdef DEBUG
+#define D(x) x
+#else
+#define D(x)
+#endif
+
+int endit = 0;
+
+/* Check if the disk is in powersave-mode
+ * Most of the code is stolen from hdparm.
+ * 1 = active, 0 = standby/sleep, -1 = unknown */
+static int check_powermode(int fd)
+{
+    unsigned char args[4] = {WIN_CHECKPOWERMODE1,0,0,0};
+    int state;
+
+    if (ioctl(fd, HDIO_DRIVE_CMD, &args)
+       && (args[0] = WIN_CHECKPOWERMODE2) /* try again with 0x98 */
+       && ioctl(fd, HDIO_DRIVE_CMD, &args)) {
+       if (errno != EIO || args[0] != 0 || args[1] != 0) {
+           state = -1; /* "unknown"; */
+       } else
+           state = 0; /* "sleeping"; */
+    } else {
+       state = (args[2] == 255) ? 1 : 0;
+    }
+    D(printf(" drive state is:  %d\n", state));
+
+    return state;
+}
+
+static char *state_name(int i)
+{
+    if (i == -1) return "unknown";
+    if (i == 0) return "sleeping";
+    if (i == 1) return "active";
+
+    return "internal error";
+}
+
+static char *myctime(time_t time)
+{
+    char *ts = ctime(&time);
+    ts[strlen(ts) - 1] = 0;
+
+    return ts;
+}
+
+static void measure(int fd)
+{
+    time_t start_time;
+    int last_state;
+    time_t last_time;
+    int curr_state;
+    time_t curr_time = 0;
+    time_t time_diff;
+    time_t active_time = 0;
+    time_t sleep_time = 0;
+    time_t unknown_time = 0;
+    time_t total_time = 0;
+    int changes = 0;
+    float tmp;
+
+    printf("Starting measurements\n");
+
+    last_state = check_powermode(fd);
+    start_time = last_time = time(0);
+    printf("  System is in state %s\n\n", state_name(last_state));
+
+    while(!endit) {
+       sleep(1);
+       curr_state = check_powermode(fd);
+
+       if (curr_state != last_state || endit) {
+           changes++;
+           curr_time = time(0);
+           time_diff = curr_time - last_time;
+
+           if (last_state == 1) active_time += time_diff;
+           else if (last_state == 0) sleep_time += time_diff;
+           else unknown_time += time_diff;
+
+           last_state = curr_state;
+           last_time = curr_time;
+
+           printf("%s: State-change to %s\n", myctime(curr_time),
+                  state_name(curr_state));
+       }
+    }
+    changes--; /* Compensate for SIGINT */
+
+    total_time = time(0) - start_time;
+    printf("\nTotal running time:  %lus\n", curr_time - start_time);
+    printf(" State changed %d times\n", changes);
+
+    tmp = (float)sleep_time / (float)total_time * 100;
+    printf(" Time in sleep state:   %lus (%.2f%%)\n", sleep_time, tmp);
+    tmp = (float)active_time / (float)total_time * 100;
+    printf(" Time in active state:  %lus (%.2f%%)\n", active_time, tmp);
+    tmp = (float)unknown_time / (float)total_time * 100;
+    printf(" Time in unknown state: %lus (%.2f%%)\n", unknown_time, tmp);
+}
+
+static void ender(int s)
+{
+    endit = 1;
+}
+
+static void usage(void)
+{
+    puts("usage: dslm [-w <time>] <disk>");
+    exit(0);
+}
+
+int main(int argc, char **argv)
+{
+    int fd;
+    char *disk = 0;
+    int settle_time = 60;
+
+    /* Parse the simple command-line */
+    if (argc == 2)
+       disk = argv[1];
+    else if (argc == 4) {
+       settle_time = atoi(argv[2]);
+       disk = argv[3];
+    } else
+       usage();
+
+    if (!(fd = open(disk, O_RDONLY|O_NONBLOCK))) {
+       printf("Can't open %s, because: %s\n", disk, strerror(errno));
+       exit(-1);
+    }
+
+    if (settle_time) {
+       printf("Waiting %d seconds for the system to settle down to "
+              "'normal'\n", settle_time);
+       sleep(settle_time);
+    } else
+       puts("Not waiting for system to settle down");
+
+    signal(SIGINT, ender);
+
+    measure(fd);
+
+    close(fd);
+
+    return 0;
+}
index c0c0b265e88e54b868858a812839c4c24979651e..b63a31be1218830eecc6116cf301f375c0a046c1 100644 (file)
@@ -98,6 +98,15 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
                        *type = INSN_FP_SETUP;
                break;
 
+       case 0x8d:
+               if (insn.rex_prefix.bytes &&
+                   insn.rex_prefix.bytes[0] == 0x48 &&
+                   insn.modrm.nbytes && insn.modrm.bytes[0] == 0x2c &&
+                   insn.sib.nbytes && insn.sib.bytes[0] == 0x24)
+                       /* lea %(rsp), %rbp */
+                       *type = INSN_FP_SETUP;
+               break;
+
        case 0x90:
                *type = INSN_NOP;
                break;
index 143b6cdd7f068f88fa8487aba804621c37523211..e8a1f699058a29ba695bfbf24781562c665e4525 100644 (file)
@@ -97,6 +97,19 @@ static struct instruction *next_insn_same_sec(struct objtool_file *file,
        return next;
 }
 
+static bool gcov_enabled(struct objtool_file *file)
+{
+       struct section *sec;
+       struct symbol *sym;
+
+       list_for_each_entry(sec, &file->elf->sections, list)
+               list_for_each_entry(sym, &sec->symbol_list, list)
+                       if (!strncmp(sym->name, "__gcov_.", 8))
+                               return true;
+
+       return false;
+}
+
 #define for_each_insn(file, insn)                                      \
        list_for_each_entry(insn, &file->insn_list, list)
 
@@ -713,6 +726,7 @@ static struct rela *find_switch_table(struct objtool_file *file,
                                      struct instruction *insn)
 {
        struct rela *text_rela, *rodata_rela;
+       struct instruction *orig_insn = insn;
 
        text_rela = find_rela_by_dest_range(insn->sec, insn->offset, insn->len);
        if (text_rela && text_rela->sym == file->rodata->sym) {
@@ -733,10 +747,16 @@ static struct rela *find_switch_table(struct objtool_file *file,
 
        /* case 3 */
        func_for_each_insn_continue_reverse(file, func, insn) {
-               if (insn->type == INSN_JUMP_UNCONDITIONAL ||
-                   insn->type == INSN_JUMP_DYNAMIC)
+               if (insn->type == INSN_JUMP_DYNAMIC)
                        break;
 
+               /* allow small jumps within the range */
+               if (insn->type == INSN_JUMP_UNCONDITIONAL &&
+                   insn->jump_dest &&
+                   (insn->jump_dest->offset <= insn->offset ||
+                    insn->jump_dest->offset > orig_insn->offset))
+                   break;
+
                text_rela = find_rela_by_dest_range(insn->sec, insn->offset,
                                                    insn->len);
                if (text_rela && text_rela->sym == file->rodata->sym)
@@ -1034,34 +1054,6 @@ static int validate_branch(struct objtool_file *file,
        return 0;
 }
 
-static bool is_gcov_insn(struct instruction *insn)
-{
-       struct rela *rela;
-       struct section *sec;
-       struct symbol *sym;
-       unsigned long offset;
-
-       rela = find_rela_by_dest_range(insn->sec, insn->offset, insn->len);
-       if (!rela)
-               return false;
-
-       if (rela->sym->type != STT_SECTION)
-               return false;
-
-       sec = rela->sym->sec;
-       offset = rela->addend + insn->offset + insn->len - rela->offset;
-
-       list_for_each_entry(sym, &sec->symbol_list, list) {
-               if (sym->type != STT_OBJECT)
-                       continue;
-
-               if (offset >= sym->offset && offset < sym->offset + sym->len)
-                       return (!memcmp(sym->name, "__gcov0.", 8));
-       }
-
-       return false;
-}
-
 static bool is_kasan_insn(struct instruction *insn)
 {
        return (insn->type == INSN_CALL &&
@@ -1083,9 +1075,6 @@ static bool ignore_unreachable_insn(struct symbol *func,
        if (insn->type == INSN_NOP)
                return true;
 
-       if (is_gcov_insn(insn))
-               return true;
-
        /*
         * Check if this (or a subsequent) instruction is related to
         * CONFIG_UBSAN or CONFIG_KASAN.
@@ -1146,6 +1135,19 @@ static int validate_functions(struct objtool_file *file)
                                    ignore_unreachable_insn(func, insn))
                                        continue;
 
+                               /*
+                                * gcov produces a lot of unreachable
+                                * instructions.  If we get an unreachable
+                                * warning and the file has gcov enabled, just
+                                * ignore it, and all other such warnings for
+                                * the file.
+                                */
+                               if (!file->ignore_unreachables &&
+                                   gcov_enabled(file)) {
+                                       file->ignore_unreachables = true;
+                                       continue;
+                               }
+
                                WARN_FUNC("function has unreachable instruction", insn->sec, insn->offset);
                                warnings++;
                        }
diff --git a/tools/pcmcia/.gitignore b/tools/pcmcia/.gitignore
new file mode 100644 (file)
index 0000000..53d0813
--- /dev/null
@@ -0,0 +1 @@
+crc32hash
diff --git a/tools/pcmcia/Makefile b/tools/pcmcia/Makefile
new file mode 100644 (file)
index 0000000..81a7498
--- /dev/null
@@ -0,0 +1,9 @@
+CC := $(CROSS_COMPILE)gcc
+CFLAGS := -I../../usr/include
+
+PROGS := crc32hash
+
+all: $(PROGS)
+
+clean:
+       rm -fr $(PROGS)
diff --git a/tools/pcmcia/crc32hash.c b/tools/pcmcia/crc32hash.c
new file mode 100644 (file)
index 0000000..44f8bee
--- /dev/null
@@ -0,0 +1,32 @@
+/* crc32hash.c - derived from linux/lib/crc32.c, GNU GPL v2 */
+/* Usage example:
+$ ./crc32hash "Dual Speed"
+*/
+
+#include <string.h>
+#include <stdio.h>
+#include <ctype.h>
+#include <stdlib.h>
+
+static unsigned int crc32(unsigned char const *p, unsigned int len)
+{
+       int i;
+       unsigned int crc = 0;
+       while (len--) {
+               crc ^= *p++;
+               for (i = 0; i < 8; i++)
+                       crc = (crc >> 1) ^ ((crc & 1) ? 0xedb88320 : 0);
+       }
+       return crc;
+}
+
+int main(int argc, char **argv) {
+       unsigned int result;
+       if (argc != 2) {
+               printf("no string passed as argument\n");
+               return -1;
+       }
+       result = crc32((unsigned char const *)argv[1], strlen(argv[1]));
+       printf("0x%x\n", result);
+       return 0;
+}
index 5ce61a1bda9ca863cbcd4c327f2abd8f621bf1f6..df14e6b67b63b781846d71b80cbc6e8ee1edc0e3 100644 (file)
@@ -36,7 +36,7 @@ SOLIBEXT=so
 # The following works at least on fedora 23, you may need the next
 # line for other distros.
 ifneq (,$(wildcard /usr/sbin/update-java-alternatives))
-JDIR=$(shell /usr/sbin/update-java-alternatives -l | head -1 | cut -d ' ' -f 3)
+JDIR=$(shell /usr/sbin/update-java-alternatives -l | head -1 | awk '{print $$3}')
 else
   ifneq (,$(wildcard /usr/sbin/alternatives))
     JDIR=$(shell alternatives --display java | tail -1 | cut -d' ' -f 5 | sed 's%/jre/bin/java.%%g')
index fb8e42c7507add43bc9c1848cf3ef300382ee313..a53fef0c673bbbfcf8669ffb66f5de079dbc7e18 100644 (file)
@@ -601,7 +601,8 @@ int hist_browser__run(struct hist_browser *browser, const char *help)
                        u64 nr_entries;
                        hbt->timer(hbt->arg);
 
-                       if (hist_browser__has_filter(browser))
+                       if (hist_browser__has_filter(browser) ||
+                           symbol_conf.report_hierarchy)
                                hist_browser__update_nr_entries(browser);
 
                        nr_entries = hist_browser__nr_entries(browser);
@@ -1336,8 +1337,8 @@ static int hist_browser__show_hierarchy_entry(struct hist_browser *browser,
                }
 
                if (first) {
-                       ui_browser__printf(&browser->b, "%c", folded_sign);
-                       width--;
+                       ui_browser__printf(&browser->b, "%c ", folded_sign);
+                       width -= 2;
                        first = false;
                } else {
                        ui_browser__printf(&browser->b, "  ");
@@ -1360,8 +1361,10 @@ static int hist_browser__show_hierarchy_entry(struct hist_browser *browser,
                width -= hpp.buf - s;
        }
 
-       ui_browser__write_nstring(&browser->b, "", hierarchy_indent);
-       width -= hierarchy_indent;
+       if (!first) {
+               ui_browser__write_nstring(&browser->b, "", hierarchy_indent);
+               width -= hierarchy_indent;
+       }
 
        if (column >= browser->b.horiz_scroll) {
                char s[2048];
@@ -1380,7 +1383,13 @@ static int hist_browser__show_hierarchy_entry(struct hist_browser *browser,
                }
 
                perf_hpp_list__for_each_format(entry->hpp_list, fmt) {
-                       ui_browser__write_nstring(&browser->b, "", 2);
+                       if (first) {
+                               ui_browser__printf(&browser->b, "%c ", folded_sign);
+                               first = false;
+                       } else {
+                               ui_browser__write_nstring(&browser->b, "", 2);
+                       }
+
                        width -= 2;
 
                        /*
@@ -1554,10 +1563,11 @@ static int hists_browser__scnprintf_hierarchy_headers(struct hist_browser *brows
        int indent = hists->nr_hpp_node - 2;
        bool first_node, first_col;
 
-       ret = scnprintf(buf, size, " ");
+       ret = scnprintf(buf, size, "  ");
        if (advance_hpp_check(&dummy_hpp, ret))
                return ret;
 
+       first_node = true;
        /* the first hpp_list_node is for overhead columns */
        fmt_node = list_first_entry(&hists->hpp_formats,
                                    struct perf_hpp_list_node, list);
@@ -1572,12 +1582,16 @@ static int hists_browser__scnprintf_hierarchy_headers(struct hist_browser *brows
                ret = scnprintf(dummy_hpp.buf, dummy_hpp.size, "  ");
                if (advance_hpp_check(&dummy_hpp, ret))
                        break;
+
+               first_node = false;
        }
 
-       ret = scnprintf(dummy_hpp.buf, dummy_hpp.size, "%*s",
-                       indent * HIERARCHY_INDENT, "");
-       if (advance_hpp_check(&dummy_hpp, ret))
-               return ret;
+       if (!first_node) {
+               ret = scnprintf(dummy_hpp.buf, dummy_hpp.size, "%*s",
+                               indent * HIERARCHY_INDENT, "");
+               if (advance_hpp_check(&dummy_hpp, ret))
+                       return ret;
+       }
 
        first_node = true;
        list_for_each_entry_continue(fmt_node, &hists->hpp_formats, list) {
@@ -2075,8 +2089,21 @@ void hist_browser__init(struct hist_browser *browser,
        browser->b.use_navkeypressed    = true;
        browser->show_headers           = symbol_conf.show_hist_headers;
 
-       hists__for_each_format(hists, fmt)
+       if (symbol_conf.report_hierarchy) {
+               struct perf_hpp_list_node *fmt_node;
+
+               /* count overhead columns (in the first node) */
+               fmt_node = list_first_entry(&hists->hpp_formats,
+                                           struct perf_hpp_list_node, list);
+               perf_hpp_list__for_each_format(&fmt_node->hpp, fmt)
+                       ++browser->b.columns;
+
+               /* add a single column for whole hierarchy sort keys*/
                ++browser->b.columns;
+       } else {
+               hists__for_each_format(hists, fmt)
+                       ++browser->b.columns;
+       }
 
        hists__reset_column_width(hists);
 }
index 85dd0db0a127995a725eade74a12e5da8569b80c..2f3eded54b0cc65a6d2ac59456a2acf2d7921059 100644 (file)
@@ -1895,7 +1895,6 @@ static int process_numa_topology(struct perf_file_section *section __maybe_unuse
        if (ph->needs_swap)
                nr = bswap_32(nr);
 
-       ph->env.nr_numa_nodes = nr;
        nodes = zalloc(sizeof(*nodes) * nr);
        if (!nodes)
                return -ENOMEM;
@@ -1932,6 +1931,7 @@ static int process_numa_topology(struct perf_file_section *section __maybe_unuse
 
                free(str);
        }
+       ph->env.nr_numa_nodes = nr;
        ph->env.numa_nodes = nodes;
        return 0;
 
index b02992efb51383c06ce507adcba261100e101dd8..a69f027368ef49caf0c259e035cf886fb7062d32 100644 (file)
@@ -1600,18 +1600,18 @@ static void hists__hierarchy_output_resort(struct hists *hists,
                if (prog)
                        ui_progress__update(prog, 1);
 
+               hists->nr_entries++;
+               if (!he->filtered) {
+                       hists->nr_non_filtered_entries++;
+                       hists__calc_col_len(hists, he);
+               }
+
                if (!he->leaf) {
                        hists__hierarchy_output_resort(hists, prog,
                                                       &he->hroot_in,
                                                       &he->hroot_out,
                                                       min_callchain_hits,
                                                       use_callchain);
-                       hists->nr_entries++;
-                       if (!he->filtered) {
-                               hists->nr_non_filtered_entries++;
-                               hists__calc_col_len(hists, he);
-                       }
-
                        continue;
                }
 
index 9f43fda2570f959833c85b89aa29b3612c6d6abb..660fca05bc93bd8f724a9f3cef3f0f3d86abb983 100644 (file)
@@ -136,8 +136,8 @@ do {                                                        \
 group          [^,{}/]*[{][^}]*[}][^,{}/]*
 event_pmu      [^,{}/]+[/][^/]*[/][^,{}/]*
 event          [^,{}/]+
-bpf_object     .*\.(o|bpf)
-bpf_source     .*\.c
+bpf_object     [^,{}]+\.(o|bpf)
+bpf_source     [^,{}]+\.c
 
 num_dec                [0-9]+
 num_hex                0x[a-fA-F0-9]+
index a538ff44b108952dbe25363abea8f34810211fb6..a1883bbb014478d9239d24fda259c61bde853b23 100644 (file)
@@ -8,18 +8,19 @@
 # as published by the Free Software Foundation; version 2
 # of the License.
 
-include ../../../../scripts/Makefile.include
-
-OUTPUT=./
-ifeq ("$(origin O)", "command line")
-       OUTPUT := $(O)/
+ifeq ($(srctree),)
+srctree := $(patsubst %/,%,$(dir $(shell pwd)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+#$(info Determined 'srctree' to be $(srctree))
 endif
 
-ifneq ($(OUTPUT),)
-# check that the output directory actually exists
-OUTDIR := $(shell cd $(OUTPUT) && /bin/pwd)
-$(if $(OUTDIR),, $(error output directory "$(OUTPUT)" does not exist))
+include $(srctree)/../../scripts/Makefile.include
+
+OUTPUT=$(srctree)/
+ifeq ("$(origin O)", "command line")
+       OUTPUT := $(O)/power/acpi/
 endif
+#$(info Determined 'OUTPUT' to be $(OUTPUT))
 
 # --- CONFIGURATION BEGIN ---
 
@@ -70,8 +71,8 @@ WARNINGS := -Wall
 WARNINGS += $(call cc-supports,-Wstrict-prototypes)
 WARNINGS += $(call cc-supports,-Wdeclaration-after-statement)
 
-KERNEL_INCLUDE := ../../../include
-ACPICA_INCLUDE := ../../../drivers/acpi/acpica
+KERNEL_INCLUDE := $(OUTPUT)include
+ACPICA_INCLUDE := $(srctree)/../../../drivers/acpi/acpica
 CFLAGS += -D_LINUX -I$(KERNEL_INCLUDE) -I$(ACPICA_INCLUDE)
 CFLAGS += $(WARNINGS)
 
index ec87a9e562c0ea0cd08c0cee096850e28f8615e9..373738338f5186199e8e97164eb1ae0a3ce33966 100644 (file)
@@ -8,28 +8,42 @@
 # as published by the Free Software Foundation; version 2
 # of the License.
 
-$(OUTPUT)$(TOOL): $(TOOL_OBJS) FORCE
-       $(ECHO) "  LD      " $@
-       $(QUIET) $(LD) $(CFLAGS) $(LDFLAGS) $(TOOL_OBJS) -L$(OUTPUT) -o $@
+objdir := $(OUTPUT)tools/$(TOOL)/
+toolobjs := $(addprefix $(objdir),$(TOOL_OBJS))
+$(OUTPUT)$(TOOL): $(toolobjs) FORCE
+       $(ECHO) "  LD      " $(subst $(OUTPUT),,$@)
+       $(QUIET) $(LD) $(CFLAGS) $(LDFLAGS) $(toolobjs) -L$(OUTPUT) -o $@
+       $(ECHO) "  STRIP   " $(subst $(OUTPUT),,$@)
        $(QUIET) $(STRIPCMD) $@
 
-$(OUTPUT)%.o: %.c
-       $(ECHO) "  CC      " $@
+$(KERNEL_INCLUDE):
+       $(ECHO) "  MKDIR   " $(subst $(OUTPUT),,$@)
+       $(QUIET) mkdir -p $(KERNEL_INCLUDE)
+       $(ECHO) "  CP      " $(subst $(OUTPUT),,$@)
+       $(QUIET) cp -rf $(srctree)/../../../include/acpi $(KERNEL_INCLUDE)/
+
+$(objdir)%.o: %.c $(KERNEL_INCLUDE)
+       $(ECHO) "  CC      " $(subst $(OUTPUT),,$@)
        $(QUIET) $(CC) -c $(CFLAGS) -o $@ $<
 
 all: $(OUTPUT)$(TOOL)
 clean:
-       -find $(OUTPUT) \( -not -type d \) \
-       -and \( -name '*~' -o -name '*.[oas]' \) \
-       -type f -print \
-        | xargs rm -f
-       -rm -f $(OUTPUT)$(TOOL)
+       $(ECHO) "  RMOBJ   " $(subst $(OUTPUT),,$(objdir))
+       $(QUIET) find $(objdir) \( -not -type d \)\
+                -and \( -name '*~' -o -name '*.[oas]' \)\
+                -type f -print | xargs rm -f
+       $(ECHO) "  RM      " $(TOOL)
+       $(QUIET) rm -f $(OUTPUT)$(TOOL)
+       $(ECHO) "  RMINC   " $(subst $(OUTPUT),,$(KERNEL_INCLUDE))
+       $(QUIET) rm -rf $(KERNEL_INCLUDE)
 
 install-tools:
-       $(INSTALL) -d $(DESTDIR)${sbindir}
-       $(INSTALL_PROGRAM) $(OUTPUT)$(TOOL) $(DESTDIR)${sbindir}
+       $(ECHO) "  INST    " $(TOOL)
+       $(QUIET) $(INSTALL) -d $(DESTDIR)$(sbindir)
+       $(QUIET) $(INSTALL_PROGRAM) $(OUTPUT)$(TOOL) $(DESTDIR)$(sbindir)
 uninstall-tools:
-       - rm -f $(DESTDIR)${sbindir}/$(TOOL)
+       $(ECHO) "  UNINST  " $(TOOL)
+       $(QUIET) rm -f $(DESTDIR)$(sbindir)/$(TOOL)
 
 install: all install-tools $(EXTRA_INSTALL)
 uninstall: uninstall-tools $(EXTRA_UNINSTALL)
index 352df4b41ae9ecdff1f938be6e3f28b2f53fa5c3..f2d06e773eb4fea8f2d5ea4aae91e06afbcabcf8 100644 (file)
@@ -17,9 +17,7 @@ vpath %.c \
        ../../os_specific/service_layers\
        .
 CFLAGS += -DACPI_APPLICATION -DACPI_SINGLE_THREAD -DACPI_DEBUGGER\
-       -I.\
-       -I../../../../../drivers/acpi/acpica\
-       -I../../../../../include
+       -I.
 LDFLAGS += -lpthread
 TOOL_OBJS = \
        acpidbg.o
index a88ac45b7756aedc77099ca5adbc1de268d5c33c..4308362d7068eb346931aa7eda7a8d4c769af35e 100644 (file)
 #include <acpi/acpi.h>
 
 /* Headers not included by include/acpi/platform/aclinux.h */
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <error.h>
 #include <stdbool.h>
 #include <fcntl.h>
 #include <assert.h>
-#include <linux/circ_buf.h>
+#include <sys/select.h>
+#include "../../../../../include/linux/circ_buf.h"
 
 #define ACPI_AML_FILE          "/sys/kernel/debug/acpi/acpidbg"
 #define ACPI_AML_SEC_TICK      1
index 04b5db7c7c0bd0608c8601a46766e551bb021e66..f7c7af1f9258b08ec2eb4f1c01a63184ea217b95 100644 (file)
@@ -19,9 +19,7 @@ vpath %.c \
        ./\
        ../../common\
        ../../os_specific/service_layers
-CFLAGS += -DACPI_DUMP_APP -I.\
-       -I../../../../../drivers/acpi/acpica\
-       -I../../../../../include
+CFLAGS += -DACPI_DUMP_APP -I.
 TOOL_OBJS = \
        apdump.o\
        apfiles.o\
@@ -49,7 +47,9 @@ TOOL_OBJS = \
 
 include ../../Makefile.rules
 
-install-man: ../../man/acpidump.8
-       $(INSTALL_DATA) -D $< $(DESTDIR)${mandir}/man8/acpidump.8
+install-man: $(srctree)/man/acpidump.8
+       $(ECHO) "  INST    " acpidump.8
+       $(QUIET) $(INSTALL_DATA) -D $< $(DESTDIR)$(mandir)/man8/acpidump.8
 uninstall-man:
-       - rm -f $(DESTDIR)${mandir}/man8/acpidump.8
+       $(ECHO) "  UNINST  " acpidump.8
+       $(QUIET) rm -f $(DESTDIR)$(mandir)/man8/acpidump.8
index b4bf76971dc975c5232b96f27f56310f65d2bcdd..1eef0aed64239509795229d522cada4e80edf5a2 100644 (file)
@@ -296,7 +296,7 @@ int cmd_freq_set(int argc, char **argv)
                        struct cpufreq_affected_cpus *cpus;
 
                        if (!bitmask_isbitset(cpus_chosen, cpu) ||
-                           cpupower_is_cpu_online(cpu))
+                           cpupower_is_cpu_online(cpu) != 1)
                                continue;
 
                        cpus = cpufreq_get_related_cpus(cpu);
@@ -316,10 +316,7 @@ int cmd_freq_set(int argc, char **argv)
             cpu <= bitmask_last(cpus_chosen); cpu++) {
 
                if (!bitmask_isbitset(cpus_chosen, cpu) ||
-                   cpupower_is_cpu_online(cpu))
-                       continue;
-
-               if (cpupower_is_cpu_online(cpu) != 1)
+                   cpupower_is_cpu_online(cpu) != 1)
                        continue;
 
                printf(_("Setting cpu: %d\n"), cpu);
index 4761e2d65ab80a9e0c9f325edcda72c2d397f812..7a5f24543a5f06fc92e2c0c139f6af7a05f9ea37 100644 (file)
@@ -1,8 +1,8 @@
-CFLAGS += -Wall -O2
+CFLAGS += -Wall -O2 -I../../../../usr/include
 
-test_objs = test_verifier test_maps
+test_objs = test_verifier test_maps test_lru_map
 
-TEST_PROGS := test_verifier test_maps test_kmod.sh
+TEST_PROGS := test_verifier test_maps test_lru_map test_kmod.sh
 TEST_FILES := $(test_objs)
 
 all: $(test_objs)
diff --git a/tools/testing/selftests/bpf/bpf_util.h b/tools/testing/selftests/bpf/bpf_util.h
new file mode 100644 (file)
index 0000000..84a5d18
--- /dev/null
@@ -0,0 +1,38 @@
+#ifndef __BPF_UTIL__
+#define __BPF_UTIL__
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+static inline unsigned int bpf_num_possible_cpus(void)
+{
+       static const char *fcpu = "/sys/devices/system/cpu/possible";
+       unsigned int start, end, possible_cpus = 0;
+       char buff[128];
+       FILE *fp;
+
+       fp = fopen(fcpu, "r");
+       if (!fp) {
+               printf("Failed to open %s: '%s'!\n", fcpu, strerror(errno));
+               exit(1);
+       }
+
+       while (fgets(buff, sizeof(buff), fp)) {
+               if (sscanf(buff, "%u-%u", &start, &end) == 2) {
+                       possible_cpus = start == 0 ? end + 1 : 0;
+                       break;
+               }
+       }
+
+       fclose(fp);
+       if (!possible_cpus) {
+               printf("Failed to retrieve # possible CPUs!\n");
+               exit(1);
+       }
+
+       return possible_cpus;
+}
+
+#endif /* __BPF_UTIL__ */
diff --git a/tools/testing/selftests/bpf/test_lru_map.c b/tools/testing/selftests/bpf/test_lru_map.c
new file mode 100644 (file)
index 0000000..b13fed5
--- /dev/null
@@ -0,0 +1,587 @@
+/*
+ * Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+#include <assert.h>
+#include <sched.h>
+#include <stdlib.h>
+#include <time.h>
+
+#include <sys/wait.h>
+#include <sys/resource.h>
+
+#include "bpf_sys.h"
+#include "bpf_util.h"
+
+#define LOCAL_FREE_TARGET      (128)
+#define PERCPU_FREE_TARGET     (16)
+
+static int nr_cpus;
+
+static int create_map(int map_type, int map_flags, unsigned int size)
+{
+       int map_fd;
+
+       map_fd = bpf_map_create(map_type, sizeof(unsigned long long),
+                               sizeof(unsigned long long), size, map_flags);
+
+       if (map_fd == -1)
+               perror("bpf_map_create");
+
+       return map_fd;
+}
+
+static int map_subset(int map0, int map1)
+{
+       unsigned long long next_key = 0;
+       unsigned long long value0[nr_cpus], value1[nr_cpus];
+       int ret;
+
+       while (!bpf_map_next_key(map1, &next_key, &next_key)) {
+               assert(!bpf_map_lookup(map1, &next_key, value1));
+               ret = bpf_map_lookup(map0, &next_key, value0);
+               if (ret) {
+                       printf("key:%llu not found from map. %s(%d)\n",
+                              next_key, strerror(errno), errno);
+                       return 0;
+               }
+               if (value0[0] != value1[0]) {
+                       printf("key:%llu value0:%llu != value1:%llu\n",
+                              next_key, value0[0], value1[0]);
+                       return 0;
+               }
+       }
+       return 1;
+}
+
+static int map_equal(int lru_map, int expected)
+{
+       return map_subset(lru_map, expected) && map_subset(expected, lru_map);
+}
+
+static int sched_next_online(int pid, int next_to_try)
+{
+       cpu_set_t cpuset;
+
+       if (next_to_try == nr_cpus)
+               return -1;
+
+       while (next_to_try < nr_cpus) {
+               CPU_ZERO(&cpuset);
+               CPU_SET(next_to_try++, &cpuset);
+               if (!sched_setaffinity(pid, sizeof(cpuset), &cpuset))
+                       break;
+       }
+
+       return next_to_try;
+}
+
+/* Size of the LRU amp is 2
+ * Add key=1 (+1 key)
+ * Add key=2 (+1 key)
+ * Lookup Key=1
+ * Add Key=3
+ *   => Key=2 will be removed by LRU
+ * Iterate map.  Only found key=1 and key=3
+ */
+static void test_lru_sanity0(int map_type, int map_flags)
+{
+       unsigned long long key, value[nr_cpus];
+       int lru_map_fd, expected_map_fd;
+
+       printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
+              map_flags);
+
+       assert(sched_next_online(0, 0) != -1);
+
+       if (map_flags & BPF_F_NO_COMMON_LRU)
+               lru_map_fd = create_map(map_type, map_flags, 2 * nr_cpus);
+       else
+               lru_map_fd = create_map(map_type, map_flags, 2);
+       assert(lru_map_fd != -1);
+
+       expected_map_fd = create_map(BPF_MAP_TYPE_HASH, 0, 2);
+       assert(expected_map_fd != -1);
+
+       value[0] = 1234;
+
+       /* insert key=1 element */
+
+       key = 1;
+       assert(!bpf_map_update(lru_map_fd, &key, value, BPF_NOEXIST));
+       assert(!bpf_map_update(expected_map_fd, &key, value, BPF_NOEXIST));
+
+       /* BPF_NOEXIST means: add new element if it doesn't exist */
+       assert(bpf_map_update(lru_map_fd, &key, value, BPF_NOEXIST) == -1 &&
+              /* key=1 already exists */
+              errno == EEXIST);
+
+       assert(bpf_map_update(lru_map_fd, &key, value, -1) == -1 &&
+              errno == EINVAL);
+
+       /* insert key=2 element */
+
+       /* check that key=2 is not found */
+       key = 2;
+       assert(bpf_map_lookup(lru_map_fd, &key, value) == -1 &&
+              errno == ENOENT);
+
+       /* BPF_EXIST means: update existing element */
+       assert(bpf_map_update(lru_map_fd, &key, value, BPF_EXIST) == -1 &&
+              /* key=2 is not there */
+              errno == ENOENT);
+
+       assert(!bpf_map_update(lru_map_fd, &key, value, BPF_NOEXIST));
+
+       /* insert key=3 element */
+
+       /* check that key=3 is not found */
+       key = 3;
+       assert(bpf_map_lookup(lru_map_fd, &key, value) == -1 &&
+              errno == ENOENT);
+
+       /* check that key=1 can be found and mark the ref bit to
+        * stop LRU from removing key=1
+        */
+       key = 1;
+       assert(!bpf_map_lookup(lru_map_fd, &key, value));
+       assert(value[0] == 1234);
+
+       key = 3;
+       assert(!bpf_map_update(lru_map_fd, &key, value, BPF_NOEXIST));
+       assert(!bpf_map_update(expected_map_fd, &key, value, BPF_NOEXIST));
+
+       /* key=2 has been removed from the LRU */
+       key = 2;
+       assert(bpf_map_lookup(lru_map_fd, &key, value) == -1);
+
+       assert(map_equal(lru_map_fd, expected_map_fd));
+
+       close(expected_map_fd);
+       close(lru_map_fd);
+
+       printf("Pass\n");
+}
+
+/* Size of the LRU map is 1.5*tgt_free
+ * Insert 1 to tgt_free (+tgt_free keys)
+ * Lookup 1 to tgt_free/2
+ * Insert 1+tgt_free to 2*tgt_free (+tgt_free keys)
+ * => 1+tgt_free/2 to LOCALFREE_TARGET will be removed by LRU
+ */
+static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free)
+{
+       unsigned long long key, end_key, value[nr_cpus];
+       int lru_map_fd, expected_map_fd;
+       unsigned int batch_size;
+       unsigned int map_size;
+
+       if (map_flags & BPF_F_NO_COMMON_LRU)
+               /* Ther percpu lru list (i.e each cpu has its own LRU
+                * list) does not have a local free list.  Hence,
+                * it will only free old nodes till there is no free
+                * from the LRU list.  Hence, this test does not apply
+                * to BPF_F_NO_COMMON_LRU
+                */
+               return;
+
+       printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
+              map_flags);
+
+       assert(sched_next_online(0, 0) != -1);
+
+       batch_size = tgt_free / 2;
+       assert(batch_size * 2 == tgt_free);
+
+       map_size = tgt_free + batch_size;
+       lru_map_fd = create_map(map_type, map_flags, map_size);
+       assert(lru_map_fd != -1);
+
+       expected_map_fd = create_map(BPF_MAP_TYPE_HASH, 0, map_size);
+       assert(expected_map_fd != -1);
+
+       value[0] = 1234;
+
+       /* Insert 1 to tgt_free (+tgt_free keys) */
+       end_key = 1 + tgt_free;
+       for (key = 1; key < end_key; key++)
+               assert(!bpf_map_update(lru_map_fd, &key, value, BPF_NOEXIST));
+
+       /* Lookup 1 to tgt_free/2 */
+       end_key = 1 + batch_size;
+       for (key = 1; key < end_key; key++) {
+               assert(!bpf_map_lookup(lru_map_fd, &key, value));
+               assert(!bpf_map_update(expected_map_fd, &key, value,
+                                      BPF_NOEXIST));
+       }
+
+       /* Insert 1+tgt_free to 2*tgt_free
+        * => 1+tgt_free/2 to LOCALFREE_TARGET will be
+        * removed by LRU
+        */
+       key = 1 + tgt_free;
+       end_key = key + tgt_free;
+       for (; key < end_key; key++) {
+               assert(!bpf_map_update(lru_map_fd, &key, value, BPF_NOEXIST));
+               assert(!bpf_map_update(expected_map_fd, &key, value,
+                                      BPF_NOEXIST));
+       }
+
+       assert(map_equal(lru_map_fd, expected_map_fd));
+
+       close(expected_map_fd);
+       close(lru_map_fd);
+
+       printf("Pass\n");
+}
+
+/* Size of the LRU map 1.5 * tgt_free
+ * Insert 1 to tgt_free (+tgt_free keys)
+ * Update 1 to tgt_free/2
+ *   => The original 1 to tgt_free/2 will be removed due to
+ *      the LRU shrink process
+ * Re-insert 1 to tgt_free/2 again and do a lookup immeidately
+ * Insert 1+tgt_free to tgt_free*3/2
+ * Insert 1+tgt_free*3/2 to tgt_free*5/2
+ *   => Key 1+tgt_free to tgt_free*3/2
+ *      will be removed from LRU because it has never
+ *      been lookup and ref bit is not set
+ */
+static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free)
+{
+       unsigned long long key, value[nr_cpus];
+       unsigned long long end_key;
+       int lru_map_fd, expected_map_fd;
+       unsigned int batch_size;
+       unsigned int map_size;
+
+       if (map_flags & BPF_F_NO_COMMON_LRU)
+               /* Ther percpu lru list (i.e each cpu has its own LRU
+                * list) does not have a local free list.  Hence,
+                * it will only free old nodes till there is no free
+                * from the LRU list.  Hence, this test does not apply
+                * to BPF_F_NO_COMMON_LRU
+                */
+               return;
+
+       printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
+              map_flags);
+
+       assert(sched_next_online(0, 0) != -1);
+
+       batch_size = tgt_free / 2;
+       assert(batch_size * 2 == tgt_free);
+
+       map_size = tgt_free + batch_size;
+       if (map_flags & BPF_F_NO_COMMON_LRU)
+               lru_map_fd = create_map(map_type, map_flags,
+                                       map_size * nr_cpus);
+       else
+               lru_map_fd = create_map(map_type, map_flags, map_size);
+       assert(lru_map_fd != -1);
+
+       expected_map_fd = create_map(BPF_MAP_TYPE_HASH, 0, map_size);
+       assert(expected_map_fd != -1);
+
+       value[0] = 1234;
+
+       /* Insert 1 to tgt_free (+tgt_free keys) */
+       end_key = 1 + tgt_free;
+       for (key = 1; key < end_key; key++)
+               assert(!bpf_map_update(lru_map_fd, &key, value, BPF_NOEXIST));
+
+       /* Any bpf_map_update will require to acquire a new node
+        * from LRU first.
+        *
+        * The local list is running out of free nodes.
+        * It gets from the global LRU list which tries to
+        * shrink the inactive list to get tgt_free
+        * number of free nodes.
+        *
+        * Hence, the oldest key 1 to tgt_free/2
+        * are removed from the LRU list.
+        */
+       key = 1;
+       if (map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
+               assert(!bpf_map_update(lru_map_fd, &key, value, BPF_NOEXIST));
+               assert(!bpf_map_delete(lru_map_fd, &key));
+       } else {
+               assert(bpf_map_update(lru_map_fd, &key, value, BPF_EXIST));
+       }
+
+       /* Re-insert 1 to tgt_free/2 again and do a lookup
+        * immeidately.
+        */
+       end_key = 1 + batch_size;
+       value[0] = 4321;
+       for (key = 1; key < end_key; key++) {
+               assert(bpf_map_lookup(lru_map_fd, &key, value));
+               assert(!bpf_map_update(lru_map_fd, &key, value, BPF_NOEXIST));
+               assert(!bpf_map_lookup(lru_map_fd, &key, value));
+               assert(value[0] == 4321);
+               assert(!bpf_map_update(expected_map_fd, &key, value,
+                                      BPF_NOEXIST));
+       }
+
+       value[0] = 1234;
+
+       /* Insert 1+tgt_free to tgt_free*3/2 */
+       end_key = 1 + tgt_free + batch_size;
+       for (key = 1 + tgt_free; key < end_key; key++)
+               /* These newly added but not referenced keys will be
+                * gone during the next LRU shrink.
+                */
+               assert(!bpf_map_update(lru_map_fd, &key, value, BPF_NOEXIST));
+
+       /* Insert 1+tgt_free*3/2 to  tgt_free*5/2 */
+       end_key = key + tgt_free;
+       for (; key < end_key; key++) {
+               assert(!bpf_map_update(lru_map_fd, &key, value, BPF_NOEXIST));
+               assert(!bpf_map_update(expected_map_fd, &key, value,
+                                      BPF_NOEXIST));
+       }
+
+       assert(map_equal(lru_map_fd, expected_map_fd));
+
+       close(expected_map_fd);
+       close(lru_map_fd);
+
+       printf("Pass\n");
+}
+
+/* Size of the LRU map is 2*tgt_free
+ * It is to test the active/inactive list rotation
+ * Insert 1 to 2*tgt_free (+2*tgt_free keys)
+ * Lookup key 1 to tgt_free*3/2
+ * Add 1+2*tgt_free to tgt_free*5/2 (+tgt_free/2 keys)
+ *  => key 1+tgt_free*3/2 to 2*tgt_free are removed from LRU
+ */
+static void test_lru_sanity3(int map_type, int map_flags, unsigned int tgt_free)
+{
+       unsigned long long key, end_key, value[nr_cpus];
+       int lru_map_fd, expected_map_fd;
+       unsigned int batch_size;
+       unsigned int map_size;
+
+       printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
+              map_flags);
+
+       assert(sched_next_online(0, 0) != -1);
+
+       batch_size = tgt_free / 2;
+       assert(batch_size * 2 == tgt_free);
+
+       map_size = tgt_free * 2;
+       if (map_flags & BPF_F_NO_COMMON_LRU)
+               lru_map_fd = create_map(map_type, map_flags,
+                                       map_size * nr_cpus);
+       else
+               lru_map_fd = create_map(map_type, map_flags, map_size);
+       assert(lru_map_fd != -1);
+
+       expected_map_fd = create_map(BPF_MAP_TYPE_HASH, 0, map_size);
+       assert(expected_map_fd != -1);
+
+       value[0] = 1234;
+
+       /* Insert 1 to 2*tgt_free (+2*tgt_free keys) */
+       end_key = 1 + (2 * tgt_free);
+       for (key = 1; key < end_key; key++)
+               assert(!bpf_map_update(lru_map_fd, &key, value, BPF_NOEXIST));
+
+       /* Lookup key 1 to tgt_free*3/2 */
+       end_key = tgt_free + batch_size;
+       for (key = 1; key < end_key; key++) {
+               assert(!bpf_map_lookup(lru_map_fd, &key, value));
+               assert(!bpf_map_update(expected_map_fd, &key, value,
+                                      BPF_NOEXIST));
+       }
+
+       /* Add 1+2*tgt_free to tgt_free*5/2
+        * (+tgt_free/2 keys)
+        */
+       key = 2 * tgt_free + 1;
+       end_key = key + batch_size;
+       for (; key < end_key; key++) {
+               assert(!bpf_map_update(lru_map_fd, &key, value, BPF_NOEXIST));
+               assert(!bpf_map_update(expected_map_fd, &key, value,
+                                      BPF_NOEXIST));
+       }
+
+       assert(map_equal(lru_map_fd, expected_map_fd));
+
+       close(expected_map_fd);
+       close(lru_map_fd);
+
+       printf("Pass\n");
+}
+
+/* Test deletion */
+static void test_lru_sanity4(int map_type, int map_flags, unsigned int tgt_free)
+{
+       int lru_map_fd, expected_map_fd;
+       unsigned long long key, value[nr_cpus];
+       unsigned long long end_key;
+
+       printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
+              map_flags);
+
+       assert(sched_next_online(0, 0) != -1);
+
+       if (map_flags & BPF_F_NO_COMMON_LRU)
+               lru_map_fd = create_map(map_type, map_flags,
+                                       3 * tgt_free * nr_cpus);
+       else
+               lru_map_fd = create_map(map_type, map_flags, 3 * tgt_free);
+       assert(lru_map_fd != -1);
+
+       expected_map_fd = create_map(BPF_MAP_TYPE_HASH, 0,
+                                    3 * tgt_free);
+       assert(expected_map_fd != -1);
+
+       value[0] = 1234;
+
+       for (key = 1; key <= 2 * tgt_free; key++)
+               assert(!bpf_map_update(lru_map_fd, &key, value, BPF_NOEXIST));
+
+       key = 1;
+       assert(bpf_map_update(lru_map_fd, &key, value, BPF_NOEXIST));
+
+       for (key = 1; key <= tgt_free; key++) {
+               assert(!bpf_map_lookup(lru_map_fd, &key, value));
+               assert(!bpf_map_update(expected_map_fd, &key, value,
+                                      BPF_NOEXIST));
+       }
+
+       for (; key <= 2 * tgt_free; key++) {
+               assert(!bpf_map_delete(lru_map_fd, &key));
+               assert(bpf_map_delete(lru_map_fd, &key));
+       }
+
+       end_key = key + 2 * tgt_free;
+       for (; key < end_key; key++) {
+               assert(!bpf_map_update(lru_map_fd, &key, value, BPF_NOEXIST));
+               assert(!bpf_map_update(expected_map_fd, &key, value,
+                                      BPF_NOEXIST));
+       }
+
+       assert(map_equal(lru_map_fd, expected_map_fd));
+
+       close(expected_map_fd);
+       close(lru_map_fd);
+
+       printf("Pass\n");
+}
+
+static void do_test_lru_sanity5(unsigned long long last_key, int map_fd)
+{
+       unsigned long long key, value[nr_cpus];
+
+       /* Ensure the last key inserted by previous CPU can be found */
+       assert(!bpf_map_lookup(map_fd, &last_key, value));
+
+       value[0] = 1234;
+
+       key = last_key + 1;
+       assert(!bpf_map_update(map_fd, &key, value, BPF_NOEXIST));
+       assert(!bpf_map_lookup(map_fd, &key, value));
+
+       /* Cannot find the last key because it was removed by LRU */
+       assert(bpf_map_lookup(map_fd, &last_key, value));
+}
+
+/* Test map with only one element */
+static void test_lru_sanity5(int map_type, int map_flags)
+{
+       unsigned long long key, value[nr_cpus];
+       int next_sched_cpu = 0;
+       int map_fd;
+       int i;
+
+       if (map_flags & BPF_F_NO_COMMON_LRU)
+               return;
+
+       printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
+              map_flags);
+
+       map_fd = create_map(map_type, map_flags, 1);
+       assert(map_fd != -1);
+
+       value[0] = 1234;
+       key = 0;
+       assert(!bpf_map_update(map_fd, &key, value, BPF_NOEXIST));
+
+       for (i = 0; i < nr_cpus; i++) {
+               pid_t pid;
+
+               pid = fork();
+               if (pid == 0) {
+                       next_sched_cpu = sched_next_online(0, next_sched_cpu);
+                       if (next_sched_cpu != -1)
+                               do_test_lru_sanity5(key, map_fd);
+                       exit(0);
+               } else if (pid == -1) {
+                       printf("couldn't spawn #%d process\n", i);
+                       exit(1);
+               } else {
+                       int status;
+
+                       /* It is mostly redundant and just allow the parent
+                        * process to update next_shced_cpu for the next child
+                        * process
+                        */
+                       next_sched_cpu = sched_next_online(pid, next_sched_cpu);
+
+                       assert(waitpid(pid, &status, 0) == pid);
+                       assert(status == 0);
+                       key++;
+               }
+       }
+
+       close(map_fd);
+
+       printf("Pass\n");
+}
+
+int main(int argc, char **argv)
+{
+       struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+       int map_types[] = {BPF_MAP_TYPE_LRU_HASH,
+                            BPF_MAP_TYPE_LRU_PERCPU_HASH};
+       int map_flags[] = {0, BPF_F_NO_COMMON_LRU};
+       int t, f;
+
+       setbuf(stdout, NULL);
+
+       assert(!setrlimit(RLIMIT_MEMLOCK, &r));
+
+       nr_cpus = bpf_num_possible_cpus();
+       assert(nr_cpus != -1);
+       printf("nr_cpus:%d\n\n", nr_cpus);
+
+       for (f = 0; f < sizeof(map_flags) / sizeof(*map_flags); f++) {
+               unsigned int tgt_free = (map_flags[f] & BPF_F_NO_COMMON_LRU) ?
+                       PERCPU_FREE_TARGET : LOCAL_FREE_TARGET;
+
+               for (t = 0; t < sizeof(map_types) / sizeof(*map_types); t++) {
+                       test_lru_sanity0(map_types[t], map_flags[f]);
+                       test_lru_sanity1(map_types[t], map_flags[f], tgt_free);
+                       test_lru_sanity2(map_types[t], map_flags[f], tgt_free);
+                       test_lru_sanity3(map_types[t], map_flags[f], tgt_free);
+                       test_lru_sanity4(map_types[t], map_flags[f], tgt_free);
+                       test_lru_sanity5(map_types[t], map_flags[f]);
+
+                       printf("\n");
+               }
+       }
+
+       return 0;
+}
index ee384f02cb6e3a0af072f55f1e0148179c14034a..eedfef8d29469b562e8ff8f609bef5016ca44bd1 100644 (file)
@@ -22,6 +22,7 @@
 #include <linux/bpf.h>
 
 #include "bpf_sys.h"
+#include "bpf_util.h"
 
 static int map_flags;
 
@@ -110,7 +111,7 @@ static void test_hashmap(int task, void *data)
 
 static void test_hashmap_percpu(int task, void *data)
 {
-       unsigned int nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
+       unsigned int nr_cpus = bpf_num_possible_cpus();
        long long value[nr_cpus];
        long long key, next_key;
        int expected_key_mask = 0;
@@ -258,7 +259,7 @@ static void test_arraymap(int task, void *data)
 
 static void test_arraymap_percpu(int task, void *data)
 {
-       unsigned int nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
+       unsigned int nr_cpus = bpf_num_possible_cpus();
        int key, next_key, fd, i;
        long values[nr_cpus];
 
@@ -313,7 +314,7 @@ static void test_arraymap_percpu(int task, void *data)
 
 static void test_arraymap_percpu_many_keys(void)
 {
-       unsigned int nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
+       unsigned int nr_cpus = bpf_num_possible_cpus();
        unsigned int nr_keys = 20000;
        long values[nr_cpus];
        int key, fd, i;
index 0ef8eaf6cea7c0bd161c5778e30d3b9b899f91f1..5da2e9d7689ca150390a1c5fb1360d27d4c8cb80 100644 (file)
@@ -285,7 +285,7 @@ static struct bpf_test tests[] = {
                        BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, 1234567),
                        BPF_EXIT_INSN(),
                },
-               .errstr = "invalid func 1234567",
+               .errstr = "invalid func unknown#1234567",
                .result = REJECT,
        },
        {
@@ -2660,6 +2660,29 @@ static struct bpf_test tests[] = {
                .result = ACCEPT,
                .prog_type = BPF_PROG_TYPE_SCHED_CLS
        },
+       {
+               "invalid map access from else condition",
+               .insns = {
+                       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+                       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+                       BPF_LD_MAP_FD(BPF_REG_1, 0),
+                       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+                       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+                       BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+                       BPF_JMP_IMM(BPF_JGE, BPF_REG_1, MAX_ENTRIES-1, 1),
+                       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1),
+                       BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2),
+                       BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+                       BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)),
+                       BPF_EXIT_INSN(),
+               },
+               .fixup_map2 = { 3 },
+               .errstr = "R0 unbounded memory access, make sure to bounds check any array access into a map",
+               .result = REJECT,
+               .errstr_unpriv = "R0 pointer arithmetic prohibited",
+               .result_unpriv = REJECT,
+       },
 };
 
 static int probe_filter_length(const struct bpf_insn *fp)
diff --git a/tools/testing/selftests/filesystems/.gitignore b/tools/testing/selftests/filesystems/.gitignore
new file mode 100644 (file)
index 0000000..31d6e42
--- /dev/null
@@ -0,0 +1 @@
+dnotify_test
diff --git a/tools/testing/selftests/filesystems/Makefile b/tools/testing/selftests/filesystems/Makefile
new file mode 100644 (file)
index 0000000..0ab1130
--- /dev/null
@@ -0,0 +1,7 @@
+TEST_PROGS := dnotify_test
+all: $(TEST_PROGS)
+
+include ../lib.mk
+
+clean:
+       rm -fr $(TEST_PROGS)
diff --git a/tools/testing/selftests/filesystems/dnotify_test.c b/tools/testing/selftests/filesystems/dnotify_test.c
new file mode 100644 (file)
index 0000000..8b37b4a
--- /dev/null
@@ -0,0 +1,34 @@
+#define _GNU_SOURCE    /* needed to get the defines */
+#include <fcntl.h>     /* in glibc 2.2 this has the needed
+                                  values defined */
+#include <signal.h>
+#include <stdio.h>
+#include <unistd.h>
+
+static volatile int event_fd;
+
+static void handler(int sig, siginfo_t *si, void *data)
+{
+       event_fd = si->si_fd;
+}
+
+int main(void)
+{
+       struct sigaction act;
+       int fd;
+
+       act.sa_sigaction = handler;
+       sigemptyset(&act.sa_mask);
+       act.sa_flags = SA_SIGINFO;
+       sigaction(SIGRTMIN + 1, &act, NULL);
+
+       fd = open(".", O_RDONLY);
+       fcntl(fd, F_SETSIG, SIGRTMIN + 1);
+       fcntl(fd, F_NOTIFY, DN_MODIFY|DN_CREATE|DN_MULTISHOT);
+       /* we will now be notified if any of the files
+          in "." is modified or new files are created */
+       while (1) {
+               pause();
+               printf("Got event on fd=%d\n", event_fd);
+       }
+}
index e87dbe2a0b0d2b4f09b73e37192ffff53ad3a5ae..7ff002eed62473572931bf7b3b28748dad9ebdd3 100755 (executable)
@@ -24,7 +24,7 @@
 
 # Test for a color capable console
 if [ -z "$USE_COLOR" ]; then
-    tput setf 7
+    tput setf 7 || tput setaf 7
     if [ $? -eq 0 ]; then
         USE_COLOR=1
         tput sgr0
index 4126312ad64e6080887f6c9d756c5ea1c9f6a145..88bcb1767362dcb38b9e0c62131d89362ba2ba86 100755 (executable)
@@ -23,7 +23,7 @@
 
 # Test for a color capable shell and pass the result to the subdir scripts
 USE_COLOR=0
-tput setf 7
+tput setf 7 || tput setaf 7
 if [ $? -eq 0 ]; then
     USE_COLOR=1
     tput sgr0
diff --git a/tools/testing/selftests/ia64/.gitignore b/tools/testing/selftests/ia64/.gitignore
new file mode 100644 (file)
index 0000000..ab806ed
--- /dev/null
@@ -0,0 +1 @@
+aliasing-test
diff --git a/tools/testing/selftests/ia64/Makefile b/tools/testing/selftests/ia64/Makefile
new file mode 100644 (file)
index 0000000..2b3de2d
--- /dev/null
@@ -0,0 +1,8 @@
+TEST_PROGS := aliasing-test
+
+all: $(TEST_PROGS)
+
+include ../lib.mk
+
+clean:
+       rm -fr $(TEST_PROGS)
diff --git a/tools/testing/selftests/ia64/aliasing-test.c b/tools/testing/selftests/ia64/aliasing-test.c
new file mode 100644 (file)
index 0000000..62a190d
--- /dev/null
@@ -0,0 +1,263 @@
+/*
+ * Exercise /dev/mem mmap cases that have been troublesome in the past
+ *
+ * (c) Copyright 2007 Hewlett-Packard Development Company, L.P.
+ *     Bjorn Helgaas <bjorn.helgaas@hp.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <fcntl.h>
+#include <fnmatch.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <linux/pci.h>
+
+int sum;
+
+static int map_mem(char *path, off_t offset, size_t length, int touch)
+{
+       int fd, rc;
+       void *addr;
+       int *c;
+
+       fd = open(path, O_RDWR);
+       if (fd == -1) {
+               perror(path);
+               return -1;
+       }
+
+       if (fnmatch("/proc/bus/pci/*", path, 0) == 0) {
+               rc = ioctl(fd, PCIIOC_MMAP_IS_MEM);
+               if (rc == -1)
+                       perror("PCIIOC_MMAP_IS_MEM ioctl");
+       }
+
+       addr = mmap(NULL, length, PROT_READ|PROT_WRITE, MAP_SHARED, fd, offset);
+       if (addr == MAP_FAILED)
+               return 1;
+
+       if (touch) {
+               c = (int *) addr;
+               while (c < (int *) (addr + length))
+                       sum += *c++;
+       }
+
+       rc = munmap(addr, length);
+       if (rc == -1) {
+               perror("munmap");
+               return -1;
+       }
+
+       close(fd);
+       return 0;
+}
+
+static int scan_tree(char *path, char *file, off_t offset, size_t length, int touch)
+{
+       struct dirent **namelist;
+       char *name, *path2;
+       int i, n, r, rc = 0, result = 0;
+       struct stat buf;
+
+       n = scandir(path, &namelist, 0, alphasort);
+       if (n < 0) {
+               perror("scandir");
+               return -1;
+       }
+
+       for (i = 0; i < n; i++) {
+               name = namelist[i]->d_name;
+
+               if (fnmatch(".", name, 0) == 0)
+                       goto skip;
+               if (fnmatch("..", name, 0) == 0)
+                       goto skip;
+
+               path2 = malloc(strlen(path) + strlen(name) + 3);
+               strcpy(path2, path);
+               strcat(path2, "/");
+               strcat(path2, name);
+
+               if (fnmatch(file, name, 0) == 0) {
+                       rc = map_mem(path2, offset, length, touch);
+                       if (rc == 0)
+                               fprintf(stderr, "PASS: %s 0x%lx-0x%lx is %s\n", path2, offset, offset + length, touch ? "readable" : "mappable");
+                       else if (rc > 0)
+                               fprintf(stderr, "PASS: %s 0x%lx-0x%lx not mappable\n", path2, offset, offset + length);
+                       else {
+                               fprintf(stderr, "FAIL: %s 0x%lx-0x%lx not accessible\n", path2, offset, offset + length);
+                               return rc;
+                       }
+               } else {
+                       r = lstat(path2, &buf);
+                       if (r == 0 && S_ISDIR(buf.st_mode)) {
+                               rc = scan_tree(path2, file, offset, length, touch);
+                               if (rc < 0)
+                                       return rc;
+                       }
+               }
+
+               result |= rc;
+               free(path2);
+
+skip:
+               free(namelist[i]);
+       }
+       free(namelist);
+       return result;
+}
+
+char buf[1024];
+
+static int read_rom(char *path)
+{
+       int fd, rc;
+       size_t size = 0;
+
+       fd = open(path, O_RDWR);
+       if (fd == -1) {
+               perror(path);
+               return -1;
+       }
+
+       rc = write(fd, "1", 2);
+       if (rc <= 0) {
+               close(fd);
+               perror("write");
+               return -1;
+       }
+
+       do {
+               rc = read(fd, buf, sizeof(buf));
+               if (rc > 0)
+                       size += rc;
+       } while (rc > 0);
+
+       close(fd);
+       return size;
+}
+
+static int scan_rom(char *path, char *file)
+{
+       struct dirent **namelist;
+       char *name, *path2;
+       int i, n, r, rc = 0, result = 0;
+       struct stat buf;
+
+       n = scandir(path, &namelist, 0, alphasort);
+       if (n < 0) {
+               perror("scandir");
+               return -1;
+       }
+
+       for (i = 0; i < n; i++) {
+               name = namelist[i]->d_name;
+
+               if (fnmatch(".", name, 0) == 0)
+                       goto skip;
+               if (fnmatch("..", name, 0) == 0)
+                       goto skip;
+
+               path2 = malloc(strlen(path) + strlen(name) + 3);
+               strcpy(path2, path);
+               strcat(path2, "/");
+               strcat(path2, name);
+
+               if (fnmatch(file, name, 0) == 0) {
+                       rc = read_rom(path2);
+
+                       /*
+                        * It's OK if the ROM is unreadable.  Maybe there
+                        * is no ROM, or some other error occurred.  The
+                        * important thing is that no MCA happened.
+                        */
+                       if (rc > 0)
+                               fprintf(stderr, "PASS: %s read %d bytes\n", path2, rc);
+                       else {
+                               fprintf(stderr, "PASS: %s not readable\n", path2);
+                               return rc;
+                       }
+               } else {
+                       r = lstat(path2, &buf);
+                       if (r == 0 && S_ISDIR(buf.st_mode)) {
+                               rc = scan_rom(path2, file);
+                               if (rc < 0)
+                                       return rc;
+                       }
+               }
+
+               result |= rc;
+               free(path2);
+
+skip:
+               free(namelist[i]);
+       }
+       free(namelist);
+       return result;
+}
+
+int main(void)
+{
+       int rc;
+
+       if (map_mem("/dev/mem", 0, 0xA0000, 1) == 0)
+               fprintf(stderr, "PASS: /dev/mem 0x0-0xa0000 is readable\n");
+       else
+               fprintf(stderr, "FAIL: /dev/mem 0x0-0xa0000 not accessible\n");
+
+       /*
+        * It's not safe to blindly read the VGA frame buffer.  If you know
+        * how to poke the card the right way, it should respond, but it's
+        * not safe in general.  Many machines, e.g., Intel chipsets, cover
+        * up a non-responding card by just returning -1, but others will
+        * report the failure as a machine check.
+        */
+       if (map_mem("/dev/mem", 0xA0000, 0x20000, 0) == 0)
+               fprintf(stderr, "PASS: /dev/mem 0xa0000-0xc0000 is mappable\n");
+       else
+               fprintf(stderr, "FAIL: /dev/mem 0xa0000-0xc0000 not accessible\n");
+
+       if (map_mem("/dev/mem", 0xC0000, 0x40000, 1) == 0)
+               fprintf(stderr, "PASS: /dev/mem 0xc0000-0x100000 is readable\n");
+       else
+               fprintf(stderr, "FAIL: /dev/mem 0xc0000-0x100000 not accessible\n");
+
+       /*
+        * Often you can map all the individual pieces above (0-0xA0000,
+        * 0xA0000-0xC0000, and 0xC0000-0x100000), but can't map the whole
+        * thing at once.  This is because the individual pieces use different
+        * attributes, and there's no single attribute supported over the
+        * whole region.
+        */
+       rc = map_mem("/dev/mem", 0, 1024*1024, 0);
+       if (rc == 0)
+               fprintf(stderr, "PASS: /dev/mem 0x0-0x100000 is mappable\n");
+       else if (rc > 0)
+               fprintf(stderr, "PASS: /dev/mem 0x0-0x100000 not mappable\n");
+       else
+               fprintf(stderr, "FAIL: /dev/mem 0x0-0x100000 not accessible\n");
+
+       scan_tree("/sys/class/pci_bus", "legacy_mem", 0, 0xA0000, 1);
+       scan_tree("/sys/class/pci_bus", "legacy_mem", 0xA0000, 0x20000, 0);
+       scan_tree("/sys/class/pci_bus", "legacy_mem", 0xC0000, 0x40000, 1);
+       scan_tree("/sys/class/pci_bus", "legacy_mem", 0, 1024*1024, 0);
+
+       scan_rom("/sys/devices", "rom");
+
+       scan_tree("/proc/bus/pci", "??.?", 0, 0xA0000, 1);
+       scan_tree("/proc/bus/pci", "??.?", 0xA0000, 0x20000, 0);
+       scan_tree("/proc/bus/pci", "??.?", 0xC0000, 0x40000, 1);
+       scan_tree("/proc/bus/pci", "??.?", 0, 1024*1024, 0);
+
+       return rc;
+}
diff --git a/tools/testing/selftests/networking/timestamping/.gitignore b/tools/testing/selftests/networking/timestamping/.gitignore
new file mode 100644 (file)
index 0000000..9e69e98
--- /dev/null
@@ -0,0 +1,3 @@
+timestamping
+txtimestamp
+hwtstamp_config
diff --git a/tools/testing/selftests/networking/timestamping/Makefile b/tools/testing/selftests/networking/timestamping/Makefile
new file mode 100644 (file)
index 0000000..ccbb9ed
--- /dev/null
@@ -0,0 +1,8 @@
+TEST_PROGS := hwtstamp_config timestamping txtimestamp
+
+all: $(TEST_PROGS)
+
+include ../../lib.mk
+
+clean:
+       rm -fr $(TEST_PROGS)
diff --git a/tools/testing/selftests/networking/timestamping/hwtstamp_config.c b/tools/testing/selftests/networking/timestamping/hwtstamp_config.c
new file mode 100644 (file)
index 0000000..e8b685a
--- /dev/null
@@ -0,0 +1,134 @@
+/* Test program for SIOC{G,S}HWTSTAMP
+ * Copyright 2013 Solarflare Communications
+ * Author: Ben Hutchings
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+
+#include <linux/if.h>
+#include <linux/net_tstamp.h>
+#include <linux/sockios.h>
+
+static int
+lookup_value(const char **names, int size, const char *name)
+{
+       int value;
+
+       for (value = 0; value < size; value++)
+               if (names[value] && strcasecmp(names[value], name) == 0)
+                       return value;
+
+       return -1;
+}
+
+static const char *
+lookup_name(const char **names, int size, int value)
+{
+       return (value >= 0 && value < size) ? names[value] : NULL;
+}
+
+static void list_names(FILE *f, const char **names, int size)
+{
+       int value;
+
+       for (value = 0; value < size; value++)
+               if (names[value])
+                       fprintf(f, "    %s\n", names[value]);
+}
+
+static const char *tx_types[] = {
+#define TX_TYPE(name) [HWTSTAMP_TX_ ## name] = #name
+       TX_TYPE(OFF),
+       TX_TYPE(ON),
+       TX_TYPE(ONESTEP_SYNC)
+#undef TX_TYPE
+};
+#define N_TX_TYPES ((int)(sizeof(tx_types) / sizeof(tx_types[0])))
+
+static const char *rx_filters[] = {
+#define RX_FILTER(name) [HWTSTAMP_FILTER_ ## name] = #name
+       RX_FILTER(NONE),
+       RX_FILTER(ALL),
+       RX_FILTER(SOME),
+       RX_FILTER(PTP_V1_L4_EVENT),
+       RX_FILTER(PTP_V1_L4_SYNC),
+       RX_FILTER(PTP_V1_L4_DELAY_REQ),
+       RX_FILTER(PTP_V2_L4_EVENT),
+       RX_FILTER(PTP_V2_L4_SYNC),
+       RX_FILTER(PTP_V2_L4_DELAY_REQ),
+       RX_FILTER(PTP_V2_L2_EVENT),
+       RX_FILTER(PTP_V2_L2_SYNC),
+       RX_FILTER(PTP_V2_L2_DELAY_REQ),
+       RX_FILTER(PTP_V2_EVENT),
+       RX_FILTER(PTP_V2_SYNC),
+       RX_FILTER(PTP_V2_DELAY_REQ),
+#undef RX_FILTER
+};
+#define N_RX_FILTERS ((int)(sizeof(rx_filters) / sizeof(rx_filters[0])))
+
+static void usage(void)
+{
+       fputs("Usage: hwtstamp_config if_name [tx_type rx_filter]\n"
+             "tx_type is any of (case-insensitive):\n",
+             stderr);
+       list_names(stderr, tx_types, N_TX_TYPES);
+       fputs("rx_filter is any of (case-insensitive):\n", stderr);
+       list_names(stderr, rx_filters, N_RX_FILTERS);
+}
+
+int main(int argc, char **argv)
+{
+       struct ifreq ifr;
+       struct hwtstamp_config config;
+       const char *name;
+       int sock;
+
+       if ((argc != 2 && argc != 4) || (strlen(argv[1]) >= IFNAMSIZ)) {
+               usage();
+               return 2;
+       }
+
+       if (argc == 4) {
+               config.flags = 0;
+               config.tx_type = lookup_value(tx_types, N_TX_TYPES, argv[2]);
+               config.rx_filter = lookup_value(rx_filters, N_RX_FILTERS, argv[3]);
+               if (config.tx_type < 0 || config.rx_filter < 0) {
+                       usage();
+                       return 2;
+               }
+       }
+
+       sock = socket(AF_INET, SOCK_DGRAM, 0);
+       if (sock < 0) {
+               perror("socket");
+               return 1;
+       }
+
+       strcpy(ifr.ifr_name, argv[1]);
+       ifr.ifr_data = (caddr_t)&config;
+
+       if (ioctl(sock, (argc == 2) ? SIOCGHWTSTAMP : SIOCSHWTSTAMP, &ifr)) {
+               perror("ioctl");
+               return 1;
+       }
+
+       printf("flags = %#x\n", config.flags);
+       name = lookup_name(tx_types, N_TX_TYPES, config.tx_type);
+       if (name)
+               printf("tx_type = %s\n", name);
+       else
+               printf("tx_type = %d\n", config.tx_type);
+       name = lookup_name(rx_filters, N_RX_FILTERS, config.rx_filter);
+       if (name)
+               printf("rx_filter = %s\n", name);
+       else
+               printf("rx_filter = %d\n", config.rx_filter);
+
+       return 0;
+}
diff --git a/tools/testing/selftests/networking/timestamping/timestamping.c b/tools/testing/selftests/networking/timestamping/timestamping.c
new file mode 100644 (file)
index 0000000..5cdfd74
--- /dev/null
@@ -0,0 +1,528 @@
+/*
+ * This program demonstrates how the various time stamping features in
+ * the Linux kernel work. It emulates the behavior of a PTP
+ * implementation in stand-alone master mode by sending PTPv1 Sync
+ * multicasts once every second. It looks for similar packets, but
+ * beyond that doesn't actually implement PTP.
+ *
+ * Outgoing packets are time stamped with SO_TIMESTAMPING with or
+ * without hardware support.
+ *
+ * Incoming packets are time stamped with SO_TIMESTAMPING with or
+ * without hardware support, SIOCGSTAMP[NS] (per-socket time stamp) and
+ * SO_TIMESTAMP[NS].
+ *
+ * Copyright (C) 2009 Intel Corporation.
+ * Author: Patrick Ohly <patrick.ohly@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+
+#include <sys/time.h>
+#include <sys/socket.h>
+#include <sys/select.h>
+#include <sys/ioctl.h>
+#include <arpa/inet.h>
+#include <net/if.h>
+
+#include <asm/types.h>
+#include <linux/net_tstamp.h>
+#include <linux/errqueue.h>
+
+#ifndef SO_TIMESTAMPING
+# define SO_TIMESTAMPING         37
+# define SCM_TIMESTAMPING        SO_TIMESTAMPING
+#endif
+
+#ifndef SO_TIMESTAMPNS
+# define SO_TIMESTAMPNS 35
+#endif
+
+#ifndef SIOCGSTAMPNS
+# define SIOCGSTAMPNS 0x8907
+#endif
+
+#ifndef SIOCSHWTSTAMP
+# define SIOCSHWTSTAMP 0x89b0
+#endif
+
+static void usage(const char *error)
+{
+       if (error)
+               printf("invalid option: %s\n", error);
+       printf("timestamping interface option*\n\n"
+              "Options:\n"
+              "  IP_MULTICAST_LOOP - looping outgoing multicasts\n"
+              "  SO_TIMESTAMP - normal software time stamping, ms resolution\n"
+              "  SO_TIMESTAMPNS - more accurate software time stamping\n"
+              "  SOF_TIMESTAMPING_TX_HARDWARE - hardware time stamping of outgoing packets\n"
+              "  SOF_TIMESTAMPING_TX_SOFTWARE - software fallback for outgoing packets\n"
+              "  SOF_TIMESTAMPING_RX_HARDWARE - hardware time stamping of incoming packets\n"
+              "  SOF_TIMESTAMPING_RX_SOFTWARE - software fallback for incoming packets\n"
+              "  SOF_TIMESTAMPING_SOFTWARE - request reporting of software time stamps\n"
+              "  SOF_TIMESTAMPING_RAW_HARDWARE - request reporting of raw HW time stamps\n"
+              "  SIOCGSTAMP - check last socket time stamp\n"
+              "  SIOCGSTAMPNS - more accurate socket time stamp\n");
+       exit(1);
+}
+
+static void bail(const char *error)
+{
+       printf("%s: %s\n", error, strerror(errno));
+       exit(1);
+}
+
+static const unsigned char sync[] = {
+       0x00, 0x01, 0x00, 0x01,
+       0x5f, 0x44, 0x46, 0x4c,
+       0x54, 0x00, 0x00, 0x00,
+       0x00, 0x00, 0x00, 0x00,
+       0x00, 0x00, 0x00, 0x00,
+       0x01, 0x01,
+
+       /* fake uuid */
+       0x00, 0x01,
+       0x02, 0x03, 0x04, 0x05,
+
+       0x00, 0x01, 0x00, 0x37,
+       0x00, 0x00, 0x00, 0x08,
+       0x00, 0x00, 0x00, 0x00,
+       0x49, 0x05, 0xcd, 0x01,
+       0x29, 0xb1, 0x8d, 0xb0,
+       0x00, 0x00, 0x00, 0x00,
+       0x00, 0x01,
+
+       /* fake uuid */
+       0x00, 0x01,
+       0x02, 0x03, 0x04, 0x05,
+
+       0x00, 0x00, 0x00, 0x37,
+       0x00, 0x00, 0x00, 0x04,
+       0x44, 0x46, 0x4c, 0x54,
+       0x00, 0x00, 0xf0, 0x60,
+       0x00, 0x01, 0x00, 0x00,
+       0x00, 0x00, 0x00, 0x01,
+       0x00, 0x00, 0xf0, 0x60,
+       0x00, 0x00, 0x00, 0x00,
+       0x00, 0x00, 0x00, 0x04,
+       0x44, 0x46, 0x4c, 0x54,
+       0x00, 0x01,
+
+       /* fake uuid */
+       0x00, 0x01,
+       0x02, 0x03, 0x04, 0x05,
+
+       0x00, 0x00, 0x00, 0x00,
+       0x00, 0x00, 0x00, 0x00,
+       0x00, 0x00, 0x00, 0x00,
+       0x00, 0x00, 0x00, 0x00
+};
+
+static void sendpacket(int sock, struct sockaddr *addr, socklen_t addr_len)
+{
+       struct timeval now;
+       int res;
+
+       res = sendto(sock, sync, sizeof(sync), 0,
+               addr, addr_len);
+       gettimeofday(&now, 0);
+       if (res < 0)
+               printf("%s: %s\n", "send", strerror(errno));
+       else
+               printf("%ld.%06ld: sent %d bytes\n",
+                      (long)now.tv_sec, (long)now.tv_usec,
+                      res);
+}
+
+static void printpacket(struct msghdr *msg, int res,
+                       char *data,
+                       int sock, int recvmsg_flags,
+                       int siocgstamp, int siocgstampns)
+{
+       struct sockaddr_in *from_addr = (struct sockaddr_in *)msg->msg_name;
+       struct cmsghdr *cmsg;
+       struct timeval tv;
+       struct timespec ts;
+       struct timeval now;
+
+       gettimeofday(&now, 0);
+
+       printf("%ld.%06ld: received %s data, %d bytes from %s, %zu bytes control messages\n",
+              (long)now.tv_sec, (long)now.tv_usec,
+              (recvmsg_flags & MSG_ERRQUEUE) ? "error" : "regular",
+              res,
+              inet_ntoa(from_addr->sin_addr),
+              msg->msg_controllen);
+       for (cmsg = CMSG_FIRSTHDR(msg);
+            cmsg;
+            cmsg = CMSG_NXTHDR(msg, cmsg)) {
+               printf("   cmsg len %zu: ", cmsg->cmsg_len);
+               switch (cmsg->cmsg_level) {
+               case SOL_SOCKET:
+                       printf("SOL_SOCKET ");
+                       switch (cmsg->cmsg_type) {
+                       case SO_TIMESTAMP: {
+                               struct timeval *stamp =
+                                       (struct timeval *)CMSG_DATA(cmsg);
+                               printf("SO_TIMESTAMP %ld.%06ld",
+                                      (long)stamp->tv_sec,
+                                      (long)stamp->tv_usec);
+                               break;
+                       }
+                       case SO_TIMESTAMPNS: {
+                               struct timespec *stamp =
+                                       (struct timespec *)CMSG_DATA(cmsg);
+                               printf("SO_TIMESTAMPNS %ld.%09ld",
+                                      (long)stamp->tv_sec,
+                                      (long)stamp->tv_nsec);
+                               break;
+                       }
+                       case SO_TIMESTAMPING: {
+                               struct timespec *stamp =
+                                       (struct timespec *)CMSG_DATA(cmsg);
+                               printf("SO_TIMESTAMPING ");
+                               printf("SW %ld.%09ld ",
+                                      (long)stamp->tv_sec,
+                                      (long)stamp->tv_nsec);
+                               stamp++;
+                               /* skip deprecated HW transformed */
+                               stamp++;
+                               printf("HW raw %ld.%09ld",
+                                      (long)stamp->tv_sec,
+                                      (long)stamp->tv_nsec);
+                               break;
+                       }
+                       default:
+                               printf("type %d", cmsg->cmsg_type);
+                               break;
+                       }
+                       break;
+               case IPPROTO_IP:
+                       printf("IPPROTO_IP ");
+                       switch (cmsg->cmsg_type) {
+                       case IP_RECVERR: {
+                               struct sock_extended_err *err =
+                                       (struct sock_extended_err *)CMSG_DATA(cmsg);
+                               printf("IP_RECVERR ee_errno '%s' ee_origin %d => %s",
+                                       strerror(err->ee_errno),
+                                       err->ee_origin,
+#ifdef SO_EE_ORIGIN_TIMESTAMPING
+                                       err->ee_origin == SO_EE_ORIGIN_TIMESTAMPING ?
+                                       "bounced packet" : "unexpected origin"
+#else
+                                       "probably SO_EE_ORIGIN_TIMESTAMPING"
+#endif
+                                       );
+                               if (res < sizeof(sync))
+                                       printf(" => truncated data?!");
+                               else if (!memcmp(sync, data + res - sizeof(sync),
+                                                       sizeof(sync)))
+                                       printf(" => GOT OUR DATA BACK (HURRAY!)");
+                               break;
+                       }
+                       case IP_PKTINFO: {
+                               struct in_pktinfo *pktinfo =
+                                       (struct in_pktinfo *)CMSG_DATA(cmsg);
+                               printf("IP_PKTINFO interface index %u",
+                                       pktinfo->ipi_ifindex);
+                               break;
+                       }
+                       default:
+                               printf("type %d", cmsg->cmsg_type);
+                               break;
+                       }
+                       break;
+               default:
+                       printf("level %d type %d",
+                               cmsg->cmsg_level,
+                               cmsg->cmsg_type);
+                       break;
+               }
+               printf("\n");
+       }
+
+       if (siocgstamp) {
+               if (ioctl(sock, SIOCGSTAMP, &tv))
+                       printf("   %s: %s\n", "SIOCGSTAMP", strerror(errno));
+               else
+                       printf("SIOCGSTAMP %ld.%06ld\n",
+                              (long)tv.tv_sec,
+                              (long)tv.tv_usec);
+       }
+       if (siocgstampns) {
+               if (ioctl(sock, SIOCGSTAMPNS, &ts))
+                       printf("   %s: %s\n", "SIOCGSTAMPNS", strerror(errno));
+               else
+                       printf("SIOCGSTAMPNS %ld.%09ld\n",
+                              (long)ts.tv_sec,
+                              (long)ts.tv_nsec);
+       }
+}
+
+static void recvpacket(int sock, int recvmsg_flags,
+                      int siocgstamp, int siocgstampns)
+{
+       char data[256];
+       struct msghdr msg;
+       struct iovec entry;
+       struct sockaddr_in from_addr;
+       struct {
+               struct cmsghdr cm;
+               char control[512];
+       } control;
+       int res;
+
+       memset(&msg, 0, sizeof(msg));
+       msg.msg_iov = &entry;
+       msg.msg_iovlen = 1;
+       entry.iov_base = data;
+       entry.iov_len = sizeof(data);
+       msg.msg_name = (caddr_t)&from_addr;
+       msg.msg_namelen = sizeof(from_addr);
+       msg.msg_control = &control;
+       msg.msg_controllen = sizeof(control);
+
+       res = recvmsg(sock, &msg, recvmsg_flags|MSG_DONTWAIT);
+       if (res < 0) {
+               printf("%s %s: %s\n",
+                      "recvmsg",
+                      (recvmsg_flags & MSG_ERRQUEUE) ? "error" : "regular",
+                      strerror(errno));
+       } else {
+               printpacket(&msg, res, data,
+                           sock, recvmsg_flags,
+                           siocgstamp, siocgstampns);
+       }
+}
+
+int main(int argc, char **argv)
+{
+       int so_timestamping_flags = 0;
+       int so_timestamp = 0;
+       int so_timestampns = 0;
+       int siocgstamp = 0;
+       int siocgstampns = 0;
+       int ip_multicast_loop = 0;
+       char *interface;
+       int i;
+       int enabled = 1;
+       int sock;
+       struct ifreq device;
+       struct ifreq hwtstamp;
+       struct hwtstamp_config hwconfig, hwconfig_requested;
+       struct sockaddr_in addr;
+       struct ip_mreq imr;
+       struct in_addr iaddr;
+       int val;
+       socklen_t len;
+       struct timeval next;
+
+       if (argc < 2)
+               usage(0);
+       interface = argv[1];
+
+       for (i = 2; i < argc; i++) {
+               if (!strcasecmp(argv[i], "SO_TIMESTAMP"))
+                       so_timestamp = 1;
+               else if (!strcasecmp(argv[i], "SO_TIMESTAMPNS"))
+                       so_timestampns = 1;
+               else if (!strcasecmp(argv[i], "SIOCGSTAMP"))
+                       siocgstamp = 1;
+               else if (!strcasecmp(argv[i], "SIOCGSTAMPNS"))
+                       siocgstampns = 1;
+               else if (!strcasecmp(argv[i], "IP_MULTICAST_LOOP"))
+                       ip_multicast_loop = 1;
+               else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_TX_HARDWARE"))
+                       so_timestamping_flags |= SOF_TIMESTAMPING_TX_HARDWARE;
+               else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_TX_SOFTWARE"))
+                       so_timestamping_flags |= SOF_TIMESTAMPING_TX_SOFTWARE;
+               else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_RX_HARDWARE"))
+                       so_timestamping_flags |= SOF_TIMESTAMPING_RX_HARDWARE;
+               else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_RX_SOFTWARE"))
+                       so_timestamping_flags |= SOF_TIMESTAMPING_RX_SOFTWARE;
+               else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_SOFTWARE"))
+                       so_timestamping_flags |= SOF_TIMESTAMPING_SOFTWARE;
+               else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_RAW_HARDWARE"))
+                       so_timestamping_flags |= SOF_TIMESTAMPING_RAW_HARDWARE;
+               else
+                       usage(argv[i]);
+       }
+
+       sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP);
+       if (sock < 0)
+               bail("socket");
+
+       memset(&device, 0, sizeof(device));
+       strncpy(device.ifr_name, interface, sizeof(device.ifr_name));
+       if (ioctl(sock, SIOCGIFADDR, &device) < 0)
+               bail("getting interface IP address");
+
+       memset(&hwtstamp, 0, sizeof(hwtstamp));
+       strncpy(hwtstamp.ifr_name, interface, sizeof(hwtstamp.ifr_name));
+       hwtstamp.ifr_data = (void *)&hwconfig;
+       memset(&hwconfig, 0, sizeof(hwconfig));
+       hwconfig.tx_type =
+               (so_timestamping_flags & SOF_TIMESTAMPING_TX_HARDWARE) ?
+               HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF;
+       hwconfig.rx_filter =
+               (so_timestamping_flags & SOF_TIMESTAMPING_RX_HARDWARE) ?
+               HWTSTAMP_FILTER_PTP_V1_L4_SYNC : HWTSTAMP_FILTER_NONE;
+       hwconfig_requested = hwconfig;
+       if (ioctl(sock, SIOCSHWTSTAMP, &hwtstamp) < 0) {
+               if ((errno == EINVAL || errno == ENOTSUP) &&
+                   hwconfig_requested.tx_type == HWTSTAMP_TX_OFF &&
+                   hwconfig_requested.rx_filter == HWTSTAMP_FILTER_NONE)
+                       printf("SIOCSHWTSTAMP: disabling hardware time stamping not possible\n");
+               else
+                       bail("SIOCSHWTSTAMP");
+       }
+       printf("SIOCSHWTSTAMP: tx_type %d requested, got %d; rx_filter %d requested, got %d\n",
+              hwconfig_requested.tx_type, hwconfig.tx_type,
+              hwconfig_requested.rx_filter, hwconfig.rx_filter);
+
+       /* bind to PTP port */
+       addr.sin_family = AF_INET;
+       addr.sin_addr.s_addr = htonl(INADDR_ANY);
+       addr.sin_port = htons(319 /* PTP event port */);
+       if (bind(sock,
+                (struct sockaddr *)&addr,
+                sizeof(struct sockaddr_in)) < 0)
+               bail("bind");
+
+       /* set multicast group for outgoing packets */
+       inet_aton("224.0.1.130", &iaddr); /* alternate PTP domain 1 */
+       addr.sin_addr = iaddr;
+       imr.imr_multiaddr.s_addr = iaddr.s_addr;
+       imr.imr_interface.s_addr =
+               ((struct sockaddr_in *)&device.ifr_addr)->sin_addr.s_addr;
+       if (setsockopt(sock, IPPROTO_IP, IP_MULTICAST_IF,
+                      &imr.imr_interface.s_addr, sizeof(struct in_addr)) < 0)
+               bail("set multicast");
+
+       /* join multicast group, loop our own packet */
+       if (setsockopt(sock, IPPROTO_IP, IP_ADD_MEMBERSHIP,
+                      &imr, sizeof(struct ip_mreq)) < 0)
+               bail("join multicast group");
+
+       if (setsockopt(sock, IPPROTO_IP, IP_MULTICAST_LOOP,
+                      &ip_multicast_loop, sizeof(enabled)) < 0) {
+               bail("loop multicast");
+       }
+
+       /* set socket options for time stamping */
+       if (so_timestamp &&
+               setsockopt(sock, SOL_SOCKET, SO_TIMESTAMP,
+                          &enabled, sizeof(enabled)) < 0)
+               bail("setsockopt SO_TIMESTAMP");
+
+       if (so_timestampns &&
+               setsockopt(sock, SOL_SOCKET, SO_TIMESTAMPNS,
+                          &enabled, sizeof(enabled)) < 0)
+               bail("setsockopt SO_TIMESTAMPNS");
+
+       if (so_timestamping_flags &&
+               setsockopt(sock, SOL_SOCKET, SO_TIMESTAMPING,
+                          &so_timestamping_flags,
+                          sizeof(so_timestamping_flags)) < 0)
+               bail("setsockopt SO_TIMESTAMPING");
+
+       /* request IP_PKTINFO for debugging purposes */
+       if (setsockopt(sock, SOL_IP, IP_PKTINFO,
+                      &enabled, sizeof(enabled)) < 0)
+               printf("%s: %s\n", "setsockopt IP_PKTINFO", strerror(errno));
+
+       /* verify socket options */
+       len = sizeof(val);
+       if (getsockopt(sock, SOL_SOCKET, SO_TIMESTAMP, &val, &len) < 0)
+               printf("%s: %s\n", "getsockopt SO_TIMESTAMP", strerror(errno));
+       else
+               printf("SO_TIMESTAMP %d\n", val);
+
+       if (getsockopt(sock, SOL_SOCKET, SO_TIMESTAMPNS, &val, &len) < 0)
+               printf("%s: %s\n", "getsockopt SO_TIMESTAMPNS",
+                      strerror(errno));
+       else
+               printf("SO_TIMESTAMPNS %d\n", val);
+
+       if (getsockopt(sock, SOL_SOCKET, SO_TIMESTAMPING, &val, &len) < 0) {
+               printf("%s: %s\n", "getsockopt SO_TIMESTAMPING",
+                      strerror(errno));
+       } else {
+               printf("SO_TIMESTAMPING %d\n", val);
+               if (val != so_timestamping_flags)
+                       printf("   not the expected value %d\n",
+                              so_timestamping_flags);
+       }
+
+       /* send packets forever every five seconds */
+       gettimeofday(&next, 0);
+       next.tv_sec = (next.tv_sec + 1) / 5 * 5;
+       next.tv_usec = 0;
+       while (1) {
+               struct timeval now;
+               struct timeval delta;
+               long delta_us;
+               int res;
+               fd_set readfs, errorfs;
+
+               gettimeofday(&now, 0);
+               delta_us = (long)(next.tv_sec - now.tv_sec) * 1000000 +
+                       (long)(next.tv_usec - now.tv_usec);
+               if (delta_us > 0) {
+                       /* continue waiting for timeout or data */
+                       delta.tv_sec = delta_us / 1000000;
+                       delta.tv_usec = delta_us % 1000000;
+
+                       FD_ZERO(&readfs);
+                       FD_ZERO(&errorfs);
+                       FD_SET(sock, &readfs);
+                       FD_SET(sock, &errorfs);
+                       printf("%ld.%06ld: select %ldus\n",
+                              (long)now.tv_sec, (long)now.tv_usec,
+                              delta_us);
+                       res = select(sock + 1, &readfs, 0, &errorfs, &delta);
+                       gettimeofday(&now, 0);
+                       printf("%ld.%06ld: select returned: %d, %s\n",
+                              (long)now.tv_sec, (long)now.tv_usec,
+                              res,
+                              res < 0 ? strerror(errno) : "success");
+                       if (res > 0) {
+                               if (FD_ISSET(sock, &readfs))
+                                       printf("ready for reading\n");
+                               if (FD_ISSET(sock, &errorfs))
+                                       printf("has error\n");
+                               recvpacket(sock, 0,
+                                          siocgstamp,
+                                          siocgstampns);
+                               recvpacket(sock, MSG_ERRQUEUE,
+                                          siocgstamp,
+                                          siocgstampns);
+                       }
+               } else {
+                       /* write one packet */
+                       sendpacket(sock,
+                                  (struct sockaddr *)&addr,
+                                  sizeof(addr));
+                       next.tv_sec += 5;
+                       continue;
+               }
+       }
+
+       return 0;
+}
diff --git a/tools/testing/selftests/networking/timestamping/txtimestamp.c b/tools/testing/selftests/networking/timestamping/txtimestamp.c
new file mode 100644 (file)
index 0000000..5df0704
--- /dev/null
@@ -0,0 +1,549 @@
+/*
+ * Copyright 2014 Google Inc.
+ * Author: willemb@google.com (Willem de Bruijn)
+ *
+ * Test software tx timestamping, including
+ *
+ * - SCHED, SND and ACK timestamps
+ * - RAW, UDP and TCP
+ * - IPv4 and IPv6
+ * - various packet sizes (to test GSO and TSO)
+ *
+ * Consult the command line arguments for help on running
+ * the various testcases.
+ *
+ * This test requires a dummy TCP server.
+ * A simple `nc6 [-u] -l -p $DESTPORT` will do
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <asm/types.h>
+#include <error.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <linux/errqueue.h>
+#include <linux/if_ether.h>
+#include <linux/net_tstamp.h>
+#include <netdb.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/udp.h>
+#include <netinet/tcp.h>
+#include <netpacket/packet.h>
+#include <poll.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/select.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+
+/* command line parameters */
+static int cfg_proto = SOCK_STREAM;
+static int cfg_ipproto = IPPROTO_TCP;
+static int cfg_num_pkts = 4;
+static int do_ipv4 = 1;
+static int do_ipv6 = 1;
+static int cfg_payload_len = 10;
+static bool cfg_show_payload;
+static bool cfg_do_pktinfo;
+static bool cfg_loop_nodata;
+static uint16_t dest_port = 9000;
+
+static struct sockaddr_in daddr;
+static struct sockaddr_in6 daddr6;
+static struct timespec ts_prev;
+
+static void __print_timestamp(const char *name, struct timespec *cur,
+                             uint32_t key, int payload_len)
+{
+       if (!(cur->tv_sec | cur->tv_nsec))
+               return;
+
+       fprintf(stderr, "  %s: %lu s %lu us (seq=%u, len=%u)",
+                       name, cur->tv_sec, cur->tv_nsec / 1000,
+                       key, payload_len);
+
+       if ((ts_prev.tv_sec | ts_prev.tv_nsec)) {
+               int64_t cur_ms, prev_ms;
+
+               cur_ms = (long) cur->tv_sec * 1000 * 1000;
+               cur_ms += cur->tv_nsec / 1000;
+
+               prev_ms = (long) ts_prev.tv_sec * 1000 * 1000;
+               prev_ms += ts_prev.tv_nsec / 1000;
+
+               fprintf(stderr, "  (%+" PRId64 " us)", cur_ms - prev_ms);
+       }
+
+       ts_prev = *cur;
+       fprintf(stderr, "\n");
+}
+
+static void print_timestamp_usr(void)
+{
+       struct timespec ts;
+       struct timeval tv;      /* avoid dependency on -lrt */
+
+       gettimeofday(&tv, NULL);
+       ts.tv_sec = tv.tv_sec;
+       ts.tv_nsec = tv.tv_usec * 1000;
+
+       __print_timestamp("  USR", &ts, 0, 0);
+}
+
+static void print_timestamp(struct scm_timestamping *tss, int tstype,
+                           int tskey, int payload_len)
+{
+       const char *tsname;
+
+       switch (tstype) {
+       case SCM_TSTAMP_SCHED:
+               tsname = "  ENQ";
+               break;
+       case SCM_TSTAMP_SND:
+               tsname = "  SND";
+               break;
+       case SCM_TSTAMP_ACK:
+               tsname = "  ACK";
+               break;
+       default:
+               error(1, 0, "unknown timestamp type: %u",
+               tstype);
+       }
+       __print_timestamp(tsname, &tss->ts[0], tskey, payload_len);
+}
+
+/* TODO: convert to check_and_print payload once API is stable */
+static void print_payload(char *data, int len)
+{
+       int i;
+
+       if (!len)
+               return;
+
+       if (len > 70)
+               len = 70;
+
+       fprintf(stderr, "payload: ");
+       for (i = 0; i < len; i++)
+               fprintf(stderr, "%02hhx ", data[i]);
+       fprintf(stderr, "\n");
+}
+
+static void print_pktinfo(int family, int ifindex, void *saddr, void *daddr)
+{
+       char sa[INET6_ADDRSTRLEN], da[INET6_ADDRSTRLEN];
+
+       fprintf(stderr, "         pktinfo: ifindex=%u src=%s dst=%s\n",
+               ifindex,
+               saddr ? inet_ntop(family, saddr, sa, sizeof(sa)) : "unknown",
+               daddr ? inet_ntop(family, daddr, da, sizeof(da)) : "unknown");
+}
+
+static void __poll(int fd)
+{
+       struct pollfd pollfd;
+       int ret;
+
+       memset(&pollfd, 0, sizeof(pollfd));
+       pollfd.fd = fd;
+       ret = poll(&pollfd, 1, 100);
+       if (ret != 1)
+               error(1, errno, "poll");
+}
+
+static void __recv_errmsg_cmsg(struct msghdr *msg, int payload_len)
+{
+       struct sock_extended_err *serr = NULL;
+       struct scm_timestamping *tss = NULL;
+       struct cmsghdr *cm;
+       int batch = 0;
+
+       for (cm = CMSG_FIRSTHDR(msg);
+            cm && cm->cmsg_len;
+            cm = CMSG_NXTHDR(msg, cm)) {
+               if (cm->cmsg_level == SOL_SOCKET &&
+                   cm->cmsg_type == SCM_TIMESTAMPING) {
+                       tss = (void *) CMSG_DATA(cm);
+               } else if ((cm->cmsg_level == SOL_IP &&
+                           cm->cmsg_type == IP_RECVERR) ||
+                          (cm->cmsg_level == SOL_IPV6 &&
+                           cm->cmsg_type == IPV6_RECVERR)) {
+                       serr = (void *) CMSG_DATA(cm);
+                       if (serr->ee_errno != ENOMSG ||
+                           serr->ee_origin != SO_EE_ORIGIN_TIMESTAMPING) {
+                               fprintf(stderr, "unknown ip error %d %d\n",
+                                               serr->ee_errno,
+                                               serr->ee_origin);
+                               serr = NULL;
+                       }
+               } else if (cm->cmsg_level == SOL_IP &&
+                          cm->cmsg_type == IP_PKTINFO) {
+                       struct in_pktinfo *info = (void *) CMSG_DATA(cm);
+                       print_pktinfo(AF_INET, info->ipi_ifindex,
+                                     &info->ipi_spec_dst, &info->ipi_addr);
+               } else if (cm->cmsg_level == SOL_IPV6 &&
+                          cm->cmsg_type == IPV6_PKTINFO) {
+                       struct in6_pktinfo *info6 = (void *) CMSG_DATA(cm);
+                       print_pktinfo(AF_INET6, info6->ipi6_ifindex,
+                                     NULL, &info6->ipi6_addr);
+               } else
+                       fprintf(stderr, "unknown cmsg %d,%d\n",
+                                       cm->cmsg_level, cm->cmsg_type);
+
+               if (serr && tss) {
+                       print_timestamp(tss, serr->ee_info, serr->ee_data,
+                                       payload_len);
+                       serr = NULL;
+                       tss = NULL;
+                       batch++;
+               }
+       }
+
+       if (batch > 1)
+               fprintf(stderr, "batched %d timestamps\n", batch);
+}
+
+static int recv_errmsg(int fd)
+{
+       static char ctrl[1024 /* overprovision*/];
+       static struct msghdr msg;
+       struct iovec entry;
+       static char *data;
+       int ret = 0;
+
+       data = malloc(cfg_payload_len);
+       if (!data)
+               error(1, 0, "malloc");
+
+       memset(&msg, 0, sizeof(msg));
+       memset(&entry, 0, sizeof(entry));
+       memset(ctrl, 0, sizeof(ctrl));
+
+       entry.iov_base = data;
+       entry.iov_len = cfg_payload_len;
+       msg.msg_iov = &entry;
+       msg.msg_iovlen = 1;
+       msg.msg_name = NULL;
+       msg.msg_namelen = 0;
+       msg.msg_control = ctrl;
+       msg.msg_controllen = sizeof(ctrl);
+
+       ret = recvmsg(fd, &msg, MSG_ERRQUEUE);
+       if (ret == -1 && errno != EAGAIN)
+               error(1, errno, "recvmsg");
+
+       if (ret >= 0) {
+               __recv_errmsg_cmsg(&msg, ret);
+               if (cfg_show_payload)
+                       print_payload(data, cfg_payload_len);
+       }
+
+       free(data);
+       return ret == -1;
+}
+
+static void do_test(int family, unsigned int opt)
+{
+       char *buf;
+       int fd, i, val = 1, total_len;
+
+       if (family == AF_INET6 && cfg_proto != SOCK_STREAM) {
+               /* due to lack of checksum generation code */
+               fprintf(stderr, "test: skipping datagram over IPv6\n");
+               return;
+       }
+
+       total_len = cfg_payload_len;
+       if (cfg_proto == SOCK_RAW) {
+               total_len += sizeof(struct udphdr);
+               if (cfg_ipproto == IPPROTO_RAW)
+                       total_len += sizeof(struct iphdr);
+       }
+
+       buf = malloc(total_len);
+       if (!buf)
+               error(1, 0, "malloc");
+
+       fd = socket(family, cfg_proto, cfg_ipproto);
+       if (fd < 0)
+               error(1, errno, "socket");
+
+       if (cfg_proto == SOCK_STREAM) {
+               if (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY,
+                              (char*) &val, sizeof(val)))
+                       error(1, 0, "setsockopt no nagle");
+
+               if (family == PF_INET) {
+                       if (connect(fd, (void *) &daddr, sizeof(daddr)))
+                               error(1, errno, "connect ipv4");
+               } else {
+                       if (connect(fd, (void *) &daddr6, sizeof(daddr6)))
+                               error(1, errno, "connect ipv6");
+               }
+       }
+
+       if (cfg_do_pktinfo) {
+               if (family == AF_INET6) {
+                       if (setsockopt(fd, SOL_IPV6, IPV6_RECVPKTINFO,
+                                      &val, sizeof(val)))
+                               error(1, errno, "setsockopt pktinfo ipv6");
+               } else {
+                       if (setsockopt(fd, SOL_IP, IP_PKTINFO,
+                                      &val, sizeof(val)))
+                               error(1, errno, "setsockopt pktinfo ipv4");
+               }
+       }
+
+       opt |= SOF_TIMESTAMPING_SOFTWARE |
+              SOF_TIMESTAMPING_OPT_CMSG |
+              SOF_TIMESTAMPING_OPT_ID;
+       if (cfg_loop_nodata)
+               opt |= SOF_TIMESTAMPING_OPT_TSONLY;
+
+       if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING,
+                      (char *) &opt, sizeof(opt)))
+               error(1, 0, "setsockopt timestamping");
+
+       for (i = 0; i < cfg_num_pkts; i++) {
+               memset(&ts_prev, 0, sizeof(ts_prev));
+               memset(buf, 'a' + i, total_len);
+
+               if (cfg_proto == SOCK_RAW) {
+                       struct udphdr *udph;
+                       int off = 0;
+
+                       if (cfg_ipproto == IPPROTO_RAW) {
+                               struct iphdr *iph = (void *) buf;
+
+                               memset(iph, 0, sizeof(*iph));
+                               iph->ihl      = 5;
+                               iph->version  = 4;
+                               iph->ttl      = 2;
+                               iph->daddr    = daddr.sin_addr.s_addr;
+                               iph->protocol = IPPROTO_UDP;
+                               /* kernel writes saddr, csum, len */
+
+                               off = sizeof(*iph);
+                       }
+
+                       udph = (void *) buf + off;
+                       udph->source = ntohs(9000);     /* random spoof */
+                       udph->dest   = ntohs(dest_port);
+                       udph->len    = ntohs(sizeof(*udph) + cfg_payload_len);
+                       udph->check  = 0;       /* not allowed for IPv6 */
+               }
+
+               print_timestamp_usr();
+               if (cfg_proto != SOCK_STREAM) {
+                       if (family == PF_INET)
+                               val = sendto(fd, buf, total_len, 0, (void *) &daddr, sizeof(daddr));
+                       else
+                               val = sendto(fd, buf, total_len, 0, (void *) &daddr6, sizeof(daddr6));
+               } else {
+                       val = send(fd, buf, cfg_payload_len, 0);
+               }
+               if (val != total_len)
+                       error(1, errno, "send");
+
+               /* wait for all errors to be queued, else ACKs arrive OOO */
+               usleep(50 * 1000);
+
+               __poll(fd);
+
+               while (!recv_errmsg(fd)) {}
+       }
+
+       if (close(fd))
+               error(1, errno, "close");
+
+       free(buf);
+       usleep(400 * 1000);
+}
+
+static void __attribute__((noreturn)) usage(const char *filepath)
+{
+       fprintf(stderr, "\nUsage: %s [options] hostname\n"
+                       "\nwhere options are:\n"
+                       "  -4:   only IPv4\n"
+                       "  -6:   only IPv6\n"
+                       "  -h:   show this message\n"
+                       "  -I:   request PKTINFO\n"
+                       "  -l N: send N bytes at a time\n"
+                       "  -n:   set no-payload option\n"
+                       "  -r:   use raw\n"
+                       "  -R:   use raw (IP_HDRINCL)\n"
+                       "  -p N: connect to port N\n"
+                       "  -u:   use udp\n"
+                       "  -x:   show payload (up to 70 bytes)\n",
+                       filepath);
+       exit(1);
+}
+
+static void parse_opt(int argc, char **argv)
+{
+       int proto_count = 0;
+       char c;
+
+       while ((c = getopt(argc, argv, "46hIl:np:rRux")) != -1) {
+               switch (c) {
+               case '4':
+                       do_ipv6 = 0;
+                       break;
+               case '6':
+                       do_ipv4 = 0;
+                       break;
+               case 'I':
+                       cfg_do_pktinfo = true;
+                       break;
+               case 'n':
+                       cfg_loop_nodata = true;
+                       break;
+               case 'r':
+                       proto_count++;
+                       cfg_proto = SOCK_RAW;
+                       cfg_ipproto = IPPROTO_UDP;
+                       break;
+               case 'R':
+                       proto_count++;
+                       cfg_proto = SOCK_RAW;
+                       cfg_ipproto = IPPROTO_RAW;
+                       break;
+               case 'u':
+                       proto_count++;
+                       cfg_proto = SOCK_DGRAM;
+                       cfg_ipproto = IPPROTO_UDP;
+                       break;
+               case 'l':
+                       cfg_payload_len = strtoul(optarg, NULL, 10);
+                       break;
+               case 'p':
+                       dest_port = strtoul(optarg, NULL, 10);
+                       break;
+               case 'x':
+                       cfg_show_payload = true;
+                       break;
+               case 'h':
+               default:
+                       usage(argv[0]);
+               }
+       }
+
+       if (!cfg_payload_len)
+               error(1, 0, "payload may not be nonzero");
+       if (cfg_proto != SOCK_STREAM && cfg_payload_len > 1472)
+               error(1, 0, "udp packet might exceed expected MTU");
+       if (!do_ipv4 && !do_ipv6)
+               error(1, 0, "pass -4 or -6, not both");
+       if (proto_count > 1)
+               error(1, 0, "pass -r, -R or -u, not multiple");
+
+       if (optind != argc - 1)
+               error(1, 0, "missing required hostname argument");
+}
+
+static void resolve_hostname(const char *hostname)
+{
+       struct addrinfo *addrs, *cur;
+       int have_ipv4 = 0, have_ipv6 = 0;
+
+       if (getaddrinfo(hostname, NULL, NULL, &addrs))
+               error(1, errno, "getaddrinfo");
+
+       cur = addrs;
+       while (cur && !have_ipv4 && !have_ipv6) {
+               if (!have_ipv4 && cur->ai_family == AF_INET) {
+                       memcpy(&daddr, cur->ai_addr, sizeof(daddr));
+                       daddr.sin_port = htons(dest_port);
+                       have_ipv4 = 1;
+               }
+               else if (!have_ipv6 && cur->ai_family == AF_INET6) {
+                       memcpy(&daddr6, cur->ai_addr, sizeof(daddr6));
+                       daddr6.sin6_port = htons(dest_port);
+                       have_ipv6 = 1;
+               }
+               cur = cur->ai_next;
+       }
+       if (addrs)
+               freeaddrinfo(addrs);
+
+       do_ipv4 &= have_ipv4;
+       do_ipv6 &= have_ipv6;
+}
+
+static void do_main(int family)
+{
+       fprintf(stderr, "family:       %s\n",
+                       family == PF_INET ? "INET" : "INET6");
+
+       fprintf(stderr, "test SND\n");
+       do_test(family, SOF_TIMESTAMPING_TX_SOFTWARE);
+
+       fprintf(stderr, "test ENQ\n");
+       do_test(family, SOF_TIMESTAMPING_TX_SCHED);
+
+       fprintf(stderr, "test ENQ + SND\n");
+       do_test(family, SOF_TIMESTAMPING_TX_SCHED |
+                       SOF_TIMESTAMPING_TX_SOFTWARE);
+
+       if (cfg_proto == SOCK_STREAM) {
+               fprintf(stderr, "\ntest ACK\n");
+               do_test(family, SOF_TIMESTAMPING_TX_ACK);
+
+               fprintf(stderr, "\ntest SND + ACK\n");
+               do_test(family, SOF_TIMESTAMPING_TX_SOFTWARE |
+                               SOF_TIMESTAMPING_TX_ACK);
+
+               fprintf(stderr, "\ntest ENQ + SND + ACK\n");
+               do_test(family, SOF_TIMESTAMPING_TX_SCHED |
+                               SOF_TIMESTAMPING_TX_SOFTWARE |
+                               SOF_TIMESTAMPING_TX_ACK);
+       }
+}
+
+const char *sock_names[] = { NULL, "TCP", "UDP", "RAW" };
+
+int main(int argc, char **argv)
+{
+       if (argc == 1)
+               usage(argv[0]);
+
+       parse_opt(argc, argv);
+       resolve_hostname(argv[argc - 1]);
+
+       fprintf(stderr, "protocol:     %s\n", sock_names[cfg_proto]);
+       fprintf(stderr, "payload:      %u\n", cfg_payload_len);
+       fprintf(stderr, "server port:  %u\n", dest_port);
+       fprintf(stderr, "\n");
+
+       if (do_ipv4)
+               do_main(PF_INET);
+       if (do_ipv6)
+               do_main(PF_INET6);
+
+       return 0;
+}
diff --git a/tools/testing/selftests/powerpc/copyloops/asm/export.h b/tools/testing/selftests/powerpc/copyloops/asm/export.h
new file mode 100644 (file)
index 0000000..2d14a9b
--- /dev/null
@@ -0,0 +1 @@
+#define EXPORT_SYMBOL(x)
index 4fe13a439fd7f22be735a3f084a28a94ab90e104..50ded63e25b710deadc13467cf7849c9f329ff6f 100644 (file)
@@ -4,3 +4,4 @@ fpu_preempt
 vmx_preempt
 fpu_signal
 vmx_signal
+vsx_preempt
diff --git a/tools/testing/selftests/powerpc/signal/.gitignore b/tools/testing/selftests/powerpc/signal/.gitignore
new file mode 100644 (file)
index 0000000..1b89224
--- /dev/null
@@ -0,0 +1,2 @@
+signal
+signal_tm
diff --git a/tools/testing/selftests/powerpc/stringloops/asm/export.h b/tools/testing/selftests/powerpc/stringloops/asm/export.h
new file mode 100644 (file)
index 0000000..2d14a9b
--- /dev/null
@@ -0,0 +1 @@
+#define EXPORT_SYMBOL(x)
index 82c0a9ce6e748129ffc3794be52b913626d66814..427621792229e68c7fcdaff89f28782bda14c1da 100644 (file)
@@ -7,3 +7,7 @@ tm-fork
 tm-tar
 tm-tmspr
 tm-exec
+tm-signal-context-chk-fpu
+tm-signal-context-chk-gpr
+tm-signal-context-chk-vmx
+tm-signal-context-chk-vsx
diff --git a/tools/testing/selftests/prctl/.gitignore b/tools/testing/selftests/prctl/.gitignore
new file mode 100644 (file)
index 0000000..0b5c274
--- /dev/null
@@ -0,0 +1,3 @@
+disable-tsc-ctxt-sw-stress-test
+disable-tsc-on-off-stress-test
+disable-tsc-test
diff --git a/tools/testing/selftests/prctl/Makefile b/tools/testing/selftests/prctl/Makefile
new file mode 100644 (file)
index 0000000..35aa1c8
--- /dev/null
@@ -0,0 +1,15 @@
+ifndef CROSS_COMPILE
+uname_M := $(shell uname -m 2>/dev/null || echo not)
+ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
+
+ifeq ($(ARCH),x86)
+TEST_PROGS := disable-tsc-ctxt-sw-stress-test disable-tsc-on-off-stress-test \
+               disable-tsc-test
+all: $(TEST_PROGS)
+
+include ../lib.mk
+
+clean:
+       rm -fr $(TEST_PROGS)
+endif
+endif
diff --git a/tools/testing/selftests/prctl/disable-tsc-ctxt-sw-stress-test.c b/tools/testing/selftests/prctl/disable-tsc-ctxt-sw-stress-test.c
new file mode 100644 (file)
index 0000000..f7499d1
--- /dev/null
@@ -0,0 +1,97 @@
+/*
+ * Tests for prctl(PR_GET_TSC, ...) / prctl(PR_SET_TSC, ...)
+ *
+ * Tests if the control register is updated correctly
+ * at context switches
+ *
+ * Warning: this test will cause a very high load for a few seconds
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <signal.h>
+#include <inttypes.h>
+#include <wait.h>
+
+
+#include <sys/prctl.h>
+#include <linux/prctl.h>
+
+/* Get/set the process' ability to use the timestamp counter instruction */
+#ifndef PR_GET_TSC
+#define PR_GET_TSC 25
+#define PR_SET_TSC 26
+# define PR_TSC_ENABLE         1   /* allow the use of the timestamp counter */
+# define PR_TSC_SIGSEGV                2   /* throw a SIGSEGV instead of reading the TSC */
+#endif
+
+static uint64_t rdtsc(void)
+{
+uint32_t lo, hi;
+/* We cannot use "=A", since this would use %rax on x86_64 */
+__asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
+return (uint64_t)hi << 32 | lo;
+}
+
+static void sigsegv_expect(int sig)
+{
+       /* */
+}
+
+static void segvtask(void)
+{
+       if (prctl(PR_SET_TSC, PR_TSC_SIGSEGV) < 0)
+       {
+               perror("prctl");
+               exit(0);
+       }
+       signal(SIGSEGV, sigsegv_expect);
+       alarm(10);
+       rdtsc();
+       fprintf(stderr, "FATAL ERROR, rdtsc() succeeded while disabled\n");
+       exit(0);
+}
+
+
+static void sigsegv_fail(int sig)
+{
+       fprintf(stderr, "FATAL ERROR, rdtsc() failed while enabled\n");
+       exit(0);
+}
+
+static void rdtsctask(void)
+{
+       if (prctl(PR_SET_TSC, PR_TSC_ENABLE) < 0)
+       {
+               perror("prctl");
+               exit(0);
+       }
+       signal(SIGSEGV, sigsegv_fail);
+       alarm(10);
+       for(;;) rdtsc();
+}
+
+
+int main(void)
+{
+       int n_tasks = 100, i;
+
+       fprintf(stderr, "[No further output means we're allright]\n");
+
+       for (i=0; i<n_tasks; i++)
+               if (fork() == 0)
+               {
+                       if (i & 1)
+                               segvtask();
+                       else
+                               rdtsctask();
+               }
+
+       for (i=0; i<n_tasks; i++)
+               wait(NULL);
+
+       exit(0);
+}
+
diff --git a/tools/testing/selftests/prctl/disable-tsc-on-off-stress-test.c b/tools/testing/selftests/prctl/disable-tsc-on-off-stress-test.c
new file mode 100644 (file)
index 0000000..a06f027
--- /dev/null
@@ -0,0 +1,96 @@
+/*
+ * Tests for prctl(PR_GET_TSC, ...) / prctl(PR_SET_TSC, ...)
+ *
+ * Tests if the control register is updated correctly
+ * when set with prctl()
+ *
+ * Warning: this test will cause a very high load for a few seconds
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <signal.h>
+#include <inttypes.h>
+#include <wait.h>
+
+
+#include <sys/prctl.h>
+#include <linux/prctl.h>
+
+/* Get/set the process' ability to use the timestamp counter instruction */
+#ifndef PR_GET_TSC
+#define PR_GET_TSC 25
+#define PR_SET_TSC 26
+# define PR_TSC_ENABLE         1   /* allow the use of the timestamp counter */
+# define PR_TSC_SIGSEGV                2   /* throw a SIGSEGV instead of reading the TSC */
+#endif
+
+/* snippet from wikipedia :-) */
+
+static uint64_t rdtsc(void)
+{
+uint32_t lo, hi;
+/* We cannot use "=A", since this would use %rax on x86_64 */
+__asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
+return (uint64_t)hi << 32 | lo;
+}
+
+int should_segv = 0;
+
+static void sigsegv_cb(int sig)
+{
+       if (!should_segv)
+       {
+               fprintf(stderr, "FATAL ERROR, rdtsc() failed while enabled\n");
+               exit(0);
+       }
+       if (prctl(PR_SET_TSC, PR_TSC_ENABLE) < 0)
+       {
+               perror("prctl");
+               exit(0);
+       }
+       should_segv = 0;
+
+       rdtsc();
+}
+
+static void task(void)
+{
+       signal(SIGSEGV, sigsegv_cb);
+       alarm(10);
+       for(;;)
+       {
+               rdtsc();
+               if (should_segv)
+               {
+                       fprintf(stderr, "FATAL ERROR, rdtsc() succeeded while disabled\n");
+                       exit(0);
+               }
+               if (prctl(PR_SET_TSC, PR_TSC_SIGSEGV) < 0)
+               {
+                       perror("prctl");
+                       exit(0);
+               }
+               should_segv = 1;
+       }
+}
+
+
+int main(void)
+{
+       int n_tasks = 100, i;
+
+       fprintf(stderr, "[No further output means we're allright]\n");
+
+       for (i=0; i<n_tasks; i++)
+               if (fork() == 0)
+                       task();
+
+       for (i=0; i<n_tasks; i++)
+               wait(NULL);
+
+       exit(0);
+}
+
diff --git a/tools/testing/selftests/prctl/disable-tsc-test.c b/tools/testing/selftests/prctl/disable-tsc-test.c
new file mode 100644 (file)
index 0000000..8d494f7
--- /dev/null
@@ -0,0 +1,95 @@
+/*
+ * Tests for prctl(PR_GET_TSC, ...) / prctl(PR_SET_TSC, ...)
+ *
+ * Basic test to test behaviour of PR_GET_TSC and PR_SET_TSC
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <signal.h>
+#include <inttypes.h>
+
+
+#include <sys/prctl.h>
+#include <linux/prctl.h>
+
+/* Get/set the process' ability to use the timestamp counter instruction */
+#ifndef PR_GET_TSC
+#define PR_GET_TSC 25
+#define PR_SET_TSC 26
+# define PR_TSC_ENABLE         1   /* allow the use of the timestamp counter */
+# define PR_TSC_SIGSEGV                2   /* throw a SIGSEGV instead of reading the TSC */
+#endif
+
+const char *tsc_names[] =
+{
+       [0] = "[not set]",
+       [PR_TSC_ENABLE] = "PR_TSC_ENABLE",
+       [PR_TSC_SIGSEGV] = "PR_TSC_SIGSEGV",
+};
+
+static uint64_t rdtsc(void)
+{
+uint32_t lo, hi;
+/* We cannot use "=A", since this would use %rax on x86_64 */
+__asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
+return (uint64_t)hi << 32 | lo;
+}
+
+static void sigsegv_cb(int sig)
+{
+       int tsc_val = 0;
+
+       printf("[ SIG_SEGV ]\n");
+       printf("prctl(PR_GET_TSC, &tsc_val); ");
+       fflush(stdout);
+
+       if ( prctl(PR_GET_TSC, &tsc_val) == -1)
+               perror("prctl");
+
+       printf("tsc_val == %s\n", tsc_names[tsc_val]);
+       printf("prctl(PR_SET_TSC, PR_TSC_ENABLE)\n");
+       fflush(stdout);
+       if ( prctl(PR_SET_TSC, PR_TSC_ENABLE) == -1)
+               perror("prctl");
+
+       printf("rdtsc() == ");
+}
+
+int main(void)
+{
+       int tsc_val = 0;
+
+       signal(SIGSEGV, sigsegv_cb);
+
+       printf("rdtsc() == %llu\n", (unsigned long long)rdtsc());
+       printf("prctl(PR_GET_TSC, &tsc_val); ");
+       fflush(stdout);
+
+       if ( prctl(PR_GET_TSC, &tsc_val) == -1)
+               perror("prctl");
+
+       printf("tsc_val == %s\n", tsc_names[tsc_val]);
+       printf("rdtsc() == %llu\n", (unsigned long long)rdtsc());
+       printf("prctl(PR_SET_TSC, PR_TSC_ENABLE)\n");
+       fflush(stdout);
+
+       if ( prctl(PR_SET_TSC, PR_TSC_ENABLE) == -1)
+               perror("prctl");
+
+       printf("rdtsc() == %llu\n", (unsigned long long)rdtsc());
+       printf("prctl(PR_SET_TSC, PR_TSC_SIGSEGV)\n");
+       fflush(stdout);
+
+       if ( prctl(PR_SET_TSC, PR_TSC_SIGSEGV) == -1)
+               perror("prctl");
+
+       printf("rdtsc() == ");
+       fflush(stdout);
+       printf("%llu\n", (unsigned long long)rdtsc());
+       fflush(stdout);
+
+       exit(EXIT_SUCCESS);
+}
+
diff --git a/tools/testing/selftests/ptp/.gitignore b/tools/testing/selftests/ptp/.gitignore
new file mode 100644 (file)
index 0000000..f562e49
--- /dev/null
@@ -0,0 +1 @@
+testptp
diff --git a/tools/testing/selftests/ptp/Makefile b/tools/testing/selftests/ptp/Makefile
new file mode 100644 (file)
index 0000000..83dd42b
--- /dev/null
@@ -0,0 +1,8 @@
+TEST_PROGS := testptp
+LDLIBS += -lrt
+all: $(TEST_PROGS)
+
+include ../lib.mk
+
+clean:
+       rm -fr $(TEST_PROGS)
diff --git a/tools/testing/selftests/ptp/testptp.c b/tools/testing/selftests/ptp/testptp.c
new file mode 100644 (file)
index 0000000..5d2eae1
--- /dev/null
@@ -0,0 +1,523 @@
+/*
+ * PTP 1588 clock support - User space test program
+ *
+ * Copyright (C) 2010 OMICRON electronics GmbH
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#define _GNU_SOURCE
+#define __SANE_USERSPACE_TYPES__        /* For PPC64, to get LL64 types */
+#include <errno.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <math.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <linux/ptp_clock.h>
+
+#define DEVICE "/dev/ptp0"
+
+#ifndef ADJ_SETOFFSET
+#define ADJ_SETOFFSET 0x0100
+#endif
+
+#ifndef CLOCK_INVALID
+#define CLOCK_INVALID -1
+#endif
+
+/* clock_adjtime is not available in GLIBC < 2.14 */
+#if !__GLIBC_PREREQ(2, 14)
+#include <sys/syscall.h>
+static int clock_adjtime(clockid_t id, struct timex *tx)
+{
+       return syscall(__NR_clock_adjtime, id, tx);
+}
+#endif
+
+static clockid_t get_clockid(int fd)
+{
+#define CLOCKFD 3
+#define FD_TO_CLOCKID(fd)      ((~(clockid_t) (fd) << 3) | CLOCKFD)
+
+       return FD_TO_CLOCKID(fd);
+}
+
+static void handle_alarm(int s)
+{
+       printf("received signal %d\n", s);
+}
+
+static int install_handler(int signum, void (*handler)(int))
+{
+       struct sigaction action;
+       sigset_t mask;
+
+       /* Unblock the signal. */
+       sigemptyset(&mask);
+       sigaddset(&mask, signum);
+       sigprocmask(SIG_UNBLOCK, &mask, NULL);
+
+       /* Install the signal handler. */
+       action.sa_handler = handler;
+       action.sa_flags = 0;
+       sigemptyset(&action.sa_mask);
+       sigaction(signum, &action, NULL);
+
+       return 0;
+}
+
+static long ppb_to_scaled_ppm(int ppb)
+{
+       /*
+        * The 'freq' field in the 'struct timex' is in parts per
+        * million, but with a 16 bit binary fractional field.
+        * Instead of calculating either one of
+        *
+        *    scaled_ppm = (ppb / 1000) << 16  [1]
+        *    scaled_ppm = (ppb << 16) / 1000  [2]
+        *
+        * we simply use double precision math, in order to avoid the
+        * truncation in [1] and the possible overflow in [2].
+        */
+       return (long) (ppb * 65.536);
+}
+
+static int64_t pctns(struct ptp_clock_time *t)
+{
+       return t->sec * 1000000000LL + t->nsec;
+}
+
+static void usage(char *progname)
+{
+       fprintf(stderr,
+               "usage: %s [options]\n"
+               " -a val     request a one-shot alarm after 'val' seconds\n"
+               " -A val     request a periodic alarm every 'val' seconds\n"
+               " -c         query the ptp clock's capabilities\n"
+               " -d name    device to open\n"
+               " -e val     read 'val' external time stamp events\n"
+               " -f val     adjust the ptp clock frequency by 'val' ppb\n"
+               " -g         get the ptp clock time\n"
+               " -h         prints this message\n"
+               " -i val     index for event/trigger\n"
+               " -k val     measure the time offset between system and phc clock\n"
+               "            for 'val' times (Maximum 25)\n"
+               " -l         list the current pin configuration\n"
+               " -L pin,val configure pin index 'pin' with function 'val'\n"
+               "            the channel index is taken from the '-i' option\n"
+               "            'val' specifies the auxiliary function:\n"
+               "            0 - none\n"
+               "            1 - external time stamp\n"
+               "            2 - periodic output\n"
+               " -p val     enable output with a period of 'val' nanoseconds\n"
+               " -P val     enable or disable (val=1|0) the system clock PPS\n"
+               " -s         set the ptp clock time from the system time\n"
+               " -S         set the system time from the ptp clock time\n"
+               " -t val     shift the ptp clock time by 'val' seconds\n"
+               " -T val     set the ptp clock time to 'val' seconds\n",
+               progname);
+}
+
+int main(int argc, char *argv[])
+{
+       struct ptp_clock_caps caps;
+       struct ptp_extts_event event;
+       struct ptp_extts_request extts_request;
+       struct ptp_perout_request perout_request;
+       struct ptp_pin_desc desc;
+       struct timespec ts;
+       struct timex tx;
+
+       static timer_t timerid;
+       struct itimerspec timeout;
+       struct sigevent sigevent;
+
+       struct ptp_clock_time *pct;
+       struct ptp_sys_offset *sysoff;
+
+
+       char *progname;
+       unsigned int i;
+       int c, cnt, fd;
+
+       char *device = DEVICE;
+       clockid_t clkid;
+       int adjfreq = 0x7fffffff;
+       int adjtime = 0;
+       int capabilities = 0;
+       int extts = 0;
+       int gettime = 0;
+       int index = 0;
+       int list_pins = 0;
+       int oneshot = 0;
+       int pct_offset = 0;
+       int n_samples = 0;
+       int periodic = 0;
+       int perout = -1;
+       int pin_index = -1, pin_func;
+       int pps = -1;
+       int seconds = 0;
+       int settime = 0;
+
+       int64_t t1, t2, tp;
+       int64_t interval, offset;
+
+       progname = strrchr(argv[0], '/');
+       progname = progname ? 1+progname : argv[0];
+       while (EOF != (c = getopt(argc, argv, "a:A:cd:e:f:ghi:k:lL:p:P:sSt:T:v"))) {
+               switch (c) {
+               case 'a':
+                       oneshot = atoi(optarg);
+                       break;
+               case 'A':
+                       periodic = atoi(optarg);
+                       break;
+               case 'c':
+                       capabilities = 1;
+                       break;
+               case 'd':
+                       device = optarg;
+                       break;
+               case 'e':
+                       extts = atoi(optarg);
+                       break;
+               case 'f':
+                       adjfreq = atoi(optarg);
+                       break;
+               case 'g':
+                       gettime = 1;
+                       break;
+               case 'i':
+                       index = atoi(optarg);
+                       break;
+               case 'k':
+                       pct_offset = 1;
+                       n_samples = atoi(optarg);
+                       break;
+               case 'l':
+                       list_pins = 1;
+                       break;
+               case 'L':
+                       cnt = sscanf(optarg, "%d,%d", &pin_index, &pin_func);
+                       if (cnt != 2) {
+                               usage(progname);
+                               return -1;
+                       }
+                       break;
+               case 'p':
+                       perout = atoi(optarg);
+                       break;
+               case 'P':
+                       pps = atoi(optarg);
+                       break;
+               case 's':
+                       settime = 1;
+                       break;
+               case 'S':
+                       settime = 2;
+                       break;
+               case 't':
+                       adjtime = atoi(optarg);
+                       break;
+               case 'T':
+                       settime = 3;
+                       seconds = atoi(optarg);
+                       break;
+               case 'h':
+                       usage(progname);
+                       return 0;
+               case '?':
+               default:
+                       usage(progname);
+                       return -1;
+               }
+       }
+
+       fd = open(device, O_RDWR);
+       if (fd < 0) {
+               fprintf(stderr, "opening %s: %s\n", device, strerror(errno));
+               return -1;
+       }
+
+       clkid = get_clockid(fd);
+       if (CLOCK_INVALID == clkid) {
+               fprintf(stderr, "failed to read clock id\n");
+               return -1;
+       }
+
+       if (capabilities) {
+               if (ioctl(fd, PTP_CLOCK_GETCAPS, &caps)) {
+                       perror("PTP_CLOCK_GETCAPS");
+               } else {
+                       printf("capabilities:\n"
+                              "  %d maximum frequency adjustment (ppb)\n"
+                              "  %d programmable alarms\n"
+                              "  %d external time stamp channels\n"
+                              "  %d programmable periodic signals\n"
+                              "  %d pulse per second\n"
+                              "  %d programmable pins\n"
+                              "  %d cross timestamping\n",
+                              caps.max_adj,
+                              caps.n_alarm,
+                              caps.n_ext_ts,
+                              caps.n_per_out,
+                              caps.pps,
+                              caps.n_pins,
+                              caps.cross_timestamping);
+               }
+       }
+
+       if (0x7fffffff != adjfreq) {
+               memset(&tx, 0, sizeof(tx));
+               tx.modes = ADJ_FREQUENCY;
+               tx.freq = ppb_to_scaled_ppm(adjfreq);
+               if (clock_adjtime(clkid, &tx)) {
+                       perror("clock_adjtime");
+               } else {
+                       puts("frequency adjustment okay");
+               }
+       }
+
+       if (adjtime) {
+               memset(&tx, 0, sizeof(tx));
+               tx.modes = ADJ_SETOFFSET;
+               tx.time.tv_sec = adjtime;
+               tx.time.tv_usec = 0;
+               if (clock_adjtime(clkid, &tx) < 0) {
+                       perror("clock_adjtime");
+               } else {
+                       puts("time shift okay");
+               }
+       }
+
+       if (gettime) {
+               if (clock_gettime(clkid, &ts)) {
+                       perror("clock_gettime");
+               } else {
+                       printf("clock time: %ld.%09ld or %s",
+                              ts.tv_sec, ts.tv_nsec, ctime(&ts.tv_sec));
+               }
+       }
+
+       if (settime == 1) {
+               clock_gettime(CLOCK_REALTIME, &ts);
+               if (clock_settime(clkid, &ts)) {
+                       perror("clock_settime");
+               } else {
+                       puts("set time okay");
+               }
+       }
+
+       if (settime == 2) {
+               clock_gettime(clkid, &ts);
+               if (clock_settime(CLOCK_REALTIME, &ts)) {
+                       perror("clock_settime");
+               } else {
+                       puts("set time okay");
+               }
+       }
+
+       if (settime == 3) {
+               ts.tv_sec = seconds;
+               ts.tv_nsec = 0;
+               if (clock_settime(clkid, &ts)) {
+                       perror("clock_settime");
+               } else {
+                       puts("set time okay");
+               }
+       }
+
+       if (extts) {
+               memset(&extts_request, 0, sizeof(extts_request));
+               extts_request.index = index;
+               extts_request.flags = PTP_ENABLE_FEATURE;
+               if (ioctl(fd, PTP_EXTTS_REQUEST, &extts_request)) {
+                       perror("PTP_EXTTS_REQUEST");
+                       extts = 0;
+               } else {
+                       puts("external time stamp request okay");
+               }
+               for (; extts; extts--) {
+                       cnt = read(fd, &event, sizeof(event));
+                       if (cnt != sizeof(event)) {
+                               perror("read");
+                               break;
+                       }
+                       printf("event index %u at %lld.%09u\n", event.index,
+                              event.t.sec, event.t.nsec);
+                       fflush(stdout);
+               }
+               /* Disable the feature again. */
+               extts_request.flags = 0;
+               if (ioctl(fd, PTP_EXTTS_REQUEST, &extts_request)) {
+                       perror("PTP_EXTTS_REQUEST");
+               }
+       }
+
+       if (list_pins) {
+               int n_pins = 0;
+               if (ioctl(fd, PTP_CLOCK_GETCAPS, &caps)) {
+                       perror("PTP_CLOCK_GETCAPS");
+               } else {
+                       n_pins = caps.n_pins;
+               }
+               for (i = 0; i < n_pins; i++) {
+                       desc.index = i;
+                       if (ioctl(fd, PTP_PIN_GETFUNC, &desc)) {
+                               perror("PTP_PIN_GETFUNC");
+                               break;
+                       }
+                       printf("name %s index %u func %u chan %u\n",
+                              desc.name, desc.index, desc.func, desc.chan);
+               }
+       }
+
+       if (oneshot) {
+               install_handler(SIGALRM, handle_alarm);
+               /* Create a timer. */
+               sigevent.sigev_notify = SIGEV_SIGNAL;
+               sigevent.sigev_signo = SIGALRM;
+               if (timer_create(clkid, &sigevent, &timerid)) {
+                       perror("timer_create");
+                       return -1;
+               }
+               /* Start the timer. */
+               memset(&timeout, 0, sizeof(timeout));
+               timeout.it_value.tv_sec = oneshot;
+               if (timer_settime(timerid, 0, &timeout, NULL)) {
+                       perror("timer_settime");
+                       return -1;
+               }
+               pause();
+               timer_delete(timerid);
+       }
+
+       if (periodic) {
+               install_handler(SIGALRM, handle_alarm);
+               /* Create a timer. */
+               sigevent.sigev_notify = SIGEV_SIGNAL;
+               sigevent.sigev_signo = SIGALRM;
+               if (timer_create(clkid, &sigevent, &timerid)) {
+                       perror("timer_create");
+                       return -1;
+               }
+               /* Start the timer. */
+               memset(&timeout, 0, sizeof(timeout));
+               timeout.it_interval.tv_sec = periodic;
+               timeout.it_value.tv_sec = periodic;
+               if (timer_settime(timerid, 0, &timeout, NULL)) {
+                       perror("timer_settime");
+                       return -1;
+               }
+               while (1) {
+                       pause();
+               }
+               timer_delete(timerid);
+       }
+
+       if (perout >= 0) {
+               if (clock_gettime(clkid, &ts)) {
+                       perror("clock_gettime");
+                       return -1;
+               }
+               memset(&perout_request, 0, sizeof(perout_request));
+               perout_request.index = index;
+               perout_request.start.sec = ts.tv_sec + 2;
+               perout_request.start.nsec = 0;
+               perout_request.period.sec = 0;
+               perout_request.period.nsec = perout;
+               if (ioctl(fd, PTP_PEROUT_REQUEST, &perout_request)) {
+                       perror("PTP_PEROUT_REQUEST");
+               } else {
+                       puts("periodic output request okay");
+               }
+       }
+
+       if (pin_index >= 0) {
+               memset(&desc, 0, sizeof(desc));
+               desc.index = pin_index;
+               desc.func = pin_func;
+               desc.chan = index;
+               if (ioctl(fd, PTP_PIN_SETFUNC, &desc)) {
+                       perror("PTP_PIN_SETFUNC");
+               } else {
+                       puts("set pin function okay");
+               }
+       }
+
+       if (pps != -1) {
+               int enable = pps ? 1 : 0;
+               if (ioctl(fd, PTP_ENABLE_PPS, enable)) {
+                       perror("PTP_ENABLE_PPS");
+               } else {
+                       puts("pps for system time request okay");
+               }
+       }
+
+       if (pct_offset) {
+               if (n_samples <= 0 || n_samples > 25) {
+                       puts("n_samples should be between 1 and 25");
+                       usage(progname);
+                       return -1;
+               }
+
+               sysoff = calloc(1, sizeof(*sysoff));
+               if (!sysoff) {
+                       perror("calloc");
+                       return -1;
+               }
+               sysoff->n_samples = n_samples;
+
+               if (ioctl(fd, PTP_SYS_OFFSET, sysoff))
+                       perror("PTP_SYS_OFFSET");
+               else
+                       puts("system and phc clock time offset request okay");
+
+               pct = &sysoff->ts[0];
+               for (i = 0; i < sysoff->n_samples; i++) {
+                       t1 = pctns(pct+2*i);
+                       tp = pctns(pct+2*i+1);
+                       t2 = pctns(pct+2*i+2);
+                       interval = t2 - t1;
+                       offset = (t2 + t1) / 2 - tp;
+
+                       printf("system time: %lld.%u\n",
+                               (pct+2*i)->sec, (pct+2*i)->nsec);
+                       printf("phc    time: %lld.%u\n",
+                               (pct+2*i+1)->sec, (pct+2*i+1)->nsec);
+                       printf("system time: %lld.%u\n",
+                               (pct+2*i+2)->sec, (pct+2*i+2)->nsec);
+                       printf("system/phc clock time offset is %" PRId64 " ns\n"
+                              "system     clock time delay  is %" PRId64 " ns\n",
+                               offset, interval);
+               }
+
+               free(sysoff);
+       }
+
+       close(fd);
+       return 0;
+}
diff --git a/tools/testing/selftests/ptp/testptp.mk b/tools/testing/selftests/ptp/testptp.mk
new file mode 100644 (file)
index 0000000..4ef2d97
--- /dev/null
@@ -0,0 +1,33 @@
+# PTP 1588 clock support - User space test program
+#
+# Copyright (C) 2010 OMICRON electronics GmbH
+#
+#  This program is free software; you can redistribute it and/or modify
+#  it under the terms of the GNU General Public License as published by
+#  the Free Software Foundation; either version 2 of the License, or
+#  (at your option) any later version.
+#
+#  This program is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#  GNU General Public License for more details.
+#
+#  You should have received a copy of the GNU General Public License
+#  along with this program; if not, write to the Free Software
+#  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+CC        = $(CROSS_COMPILE)gcc
+INC       = -I$(KBUILD_OUTPUT)/usr/include
+CFLAGS    = -Wall $(INC)
+LDLIBS    = -lrt
+PROGS     = testptp
+
+all: $(PROGS)
+
+testptp: testptp.o
+
+clean:
+       rm -f testptp.o
+
+distclean: clean
+       rm -f $(PROGS)
index 5a246a02dff3c6986a1bd06496473ec5be1fd252..15cf56d32155328d7ea064988b2539f4d6e2157a 100644 (file)
@@ -122,7 +122,7 @@ static int check_itimer(int which)
        else if (which == ITIMER_REAL)
                idle_loop();
 
-       gettimeofday(&end, NULL);
+       err = gettimeofday(&end, NULL);
        if (err < 0) {
                perror("Can't call gettimeofday()\n");
                return -1;
@@ -175,7 +175,7 @@ static int check_timer_create(int which)
 
        user_loop();
 
-       gettimeofday(&end, NULL);
+       err = gettimeofday(&end, NULL);
        if (err < 0) {
                perror("Can't call gettimeofday()\n");
                return -1;
diff --git a/tools/testing/selftests/vDSO/.gitignore b/tools/testing/selftests/vDSO/.gitignore
new file mode 100644 (file)
index 0000000..133bf9e
--- /dev/null
@@ -0,0 +1,2 @@
+vdso_test
+vdso_standalone_test_x86
diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile
new file mode 100644 (file)
index 0000000..706b68b
--- /dev/null
@@ -0,0 +1,20 @@
+ifndef CROSS_COMPILE
+CFLAGS := -std=gnu99
+CFLAGS_vdso_standalone_test_x86 := -nostdlib -fno-asynchronous-unwind-tables -fno-stack-protector
+ifeq ($(CONFIG_X86_32),y)
+LDLIBS += -lgcc_s
+endif
+
+TEST_PROGS := vdso_test vdso_standalone_test_x86
+
+all: $(TEST_PROGS)
+vdso_test: parse_vdso.c vdso_test.c
+vdso_standalone_test_x86: vdso_standalone_test_x86.c parse_vdso.c
+       $(CC) $(CFLAGS) $(CFLAGS_vdso_standalone_test_x86) \
+               vdso_standalone_test_x86.c parse_vdso.c \
+               -o vdso_standalone_test_x86
+
+include ../lib.mk
+clean:
+       rm -fr $(TEST_PROGS)
+endif
diff --git a/tools/testing/selftests/vDSO/parse_vdso.c b/tools/testing/selftests/vDSO/parse_vdso.c
new file mode 100644 (file)
index 0000000..1dbb4b8
--- /dev/null
@@ -0,0 +1,269 @@
+/*
+ * parse_vdso.c: Linux reference vDSO parser
+ * Written by Andrew Lutomirski, 2011-2014.
+ *
+ * This code is meant to be linked in to various programs that run on Linux.
+ * As such, it is available with as few restrictions as possible.  This file
+ * is licensed under the Creative Commons Zero License, version 1.0,
+ * available at http://creativecommons.org/publicdomain/zero/1.0/legalcode
+ *
+ * The vDSO is a regular ELF DSO that the kernel maps into user space when
+ * it starts a program.  It works equally well in statically and dynamically
+ * linked binaries.
+ *
+ * This code is tested on x86.  In principle it should work on any
+ * architecture that has a vDSO.
+ */
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <string.h>
+#include <limits.h>
+#include <elf.h>
+
+/*
+ * To use this vDSO parser, first call one of the vdso_init_* functions.
+ * If you've already parsed auxv, then pass the value of AT_SYSINFO_EHDR
+ * to vdso_init_from_sysinfo_ehdr.  Otherwise pass auxv to vdso_init_from_auxv.
+ * Then call vdso_sym for each symbol you want.  For example, to look up
+ * gettimeofday on x86_64, use:
+ *
+ *     <some pointer> = vdso_sym("LINUX_2.6", "gettimeofday");
+ * or
+ *     <some pointer> = vdso_sym("LINUX_2.6", "__vdso_gettimeofday");
+ *
+ * vdso_sym will return 0 if the symbol doesn't exist or if the init function
+ * failed or was not called.  vdso_sym is a little slow, so its return value
+ * should be cached.
+ *
+ * vdso_sym is threadsafe; the init functions are not.
+ *
+ * These are the prototypes:
+ */
+extern void vdso_init_from_auxv(void *auxv);
+extern void vdso_init_from_sysinfo_ehdr(uintptr_t base);
+extern void *vdso_sym(const char *version, const char *name);
+
+
+/* And here's the code. */
+#ifndef ELF_BITS
+# if ULONG_MAX > 0xffffffffUL
+#  define ELF_BITS 64
+# else
+#  define ELF_BITS 32
+# endif
+#endif
+
+#define ELF_BITS_XFORM2(bits, x) Elf##bits##_##x
+#define ELF_BITS_XFORM(bits, x) ELF_BITS_XFORM2(bits, x)
+#define ELF(x) ELF_BITS_XFORM(ELF_BITS, x)
+
+static struct vdso_info
+{
+       bool valid;
+
+       /* Load information */
+       uintptr_t load_addr;
+       uintptr_t load_offset;  /* load_addr - recorded vaddr */
+
+       /* Symbol table */
+       ELF(Sym) *symtab;
+       const char *symstrings;
+       ELF(Word) *bucket, *chain;
+       ELF(Word) nbucket, nchain;
+
+       /* Version table */
+       ELF(Versym) *versym;
+       ELF(Verdef) *verdef;
+} vdso_info;
+
+/* Straight from the ELF specification. */
+static unsigned long elf_hash(const unsigned char *name)
+{
+       unsigned long h = 0, g;
+       while (*name)
+       {
+               h = (h << 4) + *name++;
+               if (g = h & 0xf0000000)
+                       h ^= g >> 24;
+               h &= ~g;
+       }
+       return h;
+}
+
+void vdso_init_from_sysinfo_ehdr(uintptr_t base)
+{
+       size_t i;
+       bool found_vaddr = false;
+
+       vdso_info.valid = false;
+
+       vdso_info.load_addr = base;
+
+       ELF(Ehdr) *hdr = (ELF(Ehdr)*)base;
+       if (hdr->e_ident[EI_CLASS] !=
+           (ELF_BITS == 32 ? ELFCLASS32 : ELFCLASS64)) {
+               return;  /* Wrong ELF class -- check ELF_BITS */
+       }
+
+       ELF(Phdr) *pt = (ELF(Phdr)*)(vdso_info.load_addr + hdr->e_phoff);
+       ELF(Dyn) *dyn = 0;
+
+       /*
+        * We need two things from the segment table: the load offset
+        * and the dynamic table.
+        */
+       for (i = 0; i < hdr->e_phnum; i++)
+       {
+               if (pt[i].p_type == PT_LOAD && !found_vaddr) {
+                       found_vaddr = true;
+                       vdso_info.load_offset = base
+                               + (uintptr_t)pt[i].p_offset
+                               - (uintptr_t)pt[i].p_vaddr;
+               } else if (pt[i].p_type == PT_DYNAMIC) {
+                       dyn = (ELF(Dyn)*)(base + pt[i].p_offset);
+               }
+       }
+
+       if (!found_vaddr || !dyn)
+               return;  /* Failed */
+
+       /*
+        * Fish out the useful bits of the dynamic table.
+        */
+       ELF(Word) *hash = 0;
+       vdso_info.symstrings = 0;
+       vdso_info.symtab = 0;
+       vdso_info.versym = 0;
+       vdso_info.verdef = 0;
+       for (i = 0; dyn[i].d_tag != DT_NULL; i++) {
+               switch (dyn[i].d_tag) {
+               case DT_STRTAB:
+                       vdso_info.symstrings = (const char *)
+                               ((uintptr_t)dyn[i].d_un.d_ptr
+                                + vdso_info.load_offset);
+                       break;
+               case DT_SYMTAB:
+                       vdso_info.symtab = (ELF(Sym) *)
+                               ((uintptr_t)dyn[i].d_un.d_ptr
+                                + vdso_info.load_offset);
+                       break;
+               case DT_HASH:
+                       hash = (ELF(Word) *)
+                               ((uintptr_t)dyn[i].d_un.d_ptr
+                                + vdso_info.load_offset);
+                       break;
+               case DT_VERSYM:
+                       vdso_info.versym = (ELF(Versym) *)
+                               ((uintptr_t)dyn[i].d_un.d_ptr
+                                + vdso_info.load_offset);
+                       break;
+               case DT_VERDEF:
+                       vdso_info.verdef = (ELF(Verdef) *)
+                               ((uintptr_t)dyn[i].d_un.d_ptr
+                                + vdso_info.load_offset);
+                       break;
+               }
+       }
+       if (!vdso_info.symstrings || !vdso_info.symtab || !hash)
+               return;  /* Failed */
+
+       if (!vdso_info.verdef)
+               vdso_info.versym = 0;
+
+       /* Parse the hash table header. */
+       vdso_info.nbucket = hash[0];
+       vdso_info.nchain = hash[1];
+       vdso_info.bucket = &hash[2];
+       vdso_info.chain = &hash[vdso_info.nbucket + 2];
+
+       /* That's all we need. */
+       vdso_info.valid = true;
+}
+
+static bool vdso_match_version(ELF(Versym) ver,
+                              const char *name, ELF(Word) hash)
+{
+       /*
+        * This is a helper function to check if the version indexed by
+        * ver matches name (which hashes to hash).
+        *
+        * The version definition table is a mess, and I don't know how
+        * to do this in better than linear time without allocating memory
+        * to build an index.  I also don't know why the table has
+        * variable size entries in the first place.
+        *
+        * For added fun, I can't find a comprehensible specification of how
+        * to parse all the weird flags in the table.
+        *
+        * So I just parse the whole table every time.
+        */
+
+       /* First step: find the version definition */
+       ver &= 0x7fff;  /* Apparently bit 15 means "hidden" */
+       ELF(Verdef) *def = vdso_info.verdef;
+       while(true) {
+               if ((def->vd_flags & VER_FLG_BASE) == 0
+                   && (def->vd_ndx & 0x7fff) == ver)
+                       break;
+
+               if (def->vd_next == 0)
+                       return false;  /* No definition. */
+
+               def = (ELF(Verdef) *)((char *)def + def->vd_next);
+       }
+
+       /* Now figure out whether it matches. */
+       ELF(Verdaux) *aux = (ELF(Verdaux)*)((char *)def + def->vd_aux);
+       return def->vd_hash == hash
+               && !strcmp(name, vdso_info.symstrings + aux->vda_name);
+}
+
+void *vdso_sym(const char *version, const char *name)
+{
+       unsigned long ver_hash;
+       if (!vdso_info.valid)
+               return 0;
+
+       ver_hash = elf_hash(version);
+       ELF(Word) chain = vdso_info.bucket[elf_hash(name) % vdso_info.nbucket];
+
+       for (; chain != STN_UNDEF; chain = vdso_info.chain[chain]) {
+               ELF(Sym) *sym = &vdso_info.symtab[chain];
+
+               /* Check for a defined global or weak function w/ right name. */
+               if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC)
+                       continue;
+               if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL &&
+                   ELF64_ST_BIND(sym->st_info) != STB_WEAK)
+                       continue;
+               if (sym->st_shndx == SHN_UNDEF)
+                       continue;
+               if (strcmp(name, vdso_info.symstrings + sym->st_name))
+                       continue;
+
+               /* Check symbol version. */
+               if (vdso_info.versym
+                   && !vdso_match_version(vdso_info.versym[chain],
+                                          version, ver_hash))
+                       continue;
+
+               return (void *)(vdso_info.load_offset + sym->st_value);
+       }
+
+       return 0;
+}
+
+void vdso_init_from_auxv(void *auxv)
+{
+       ELF(auxv_t) *elf_auxv = auxv;
+       for (int i = 0; elf_auxv[i].a_type != AT_NULL; i++)
+       {
+               if (elf_auxv[i].a_type == AT_SYSINFO_EHDR) {
+                       vdso_init_from_sysinfo_ehdr(elf_auxv[i].a_un.a_val);
+                       return;
+               }
+       }
+
+       vdso_info.valid = false;
+}
diff --git a/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c b/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c
new file mode 100644 (file)
index 0000000..93b0ebf
--- /dev/null
@@ -0,0 +1,128 @@
+/*
+ * vdso_test.c: Sample code to test parse_vdso.c on x86
+ * Copyright (c) 2011-2014 Andy Lutomirski
+ * Subject to the GNU General Public License, version 2
+ *
+ * You can amuse yourself by compiling with:
+ * gcc -std=gnu99 -nostdlib
+ *     -Os -fno-asynchronous-unwind-tables -flto -lgcc_s
+ *      vdso_standalone_test_x86.c parse_vdso.c
+ * to generate a small binary.  On x86_64, you can omit -lgcc_s
+ * if you want the binary to be completely standalone.
+ */
+
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <unistd.h>
+#include <stdint.h>
+
+extern void *vdso_sym(const char *version, const char *name);
+extern void vdso_init_from_sysinfo_ehdr(uintptr_t base);
+extern void vdso_init_from_auxv(void *auxv);
+
+/* We need a libc functions... */
+int strcmp(const char *a, const char *b)
+{
+       /* This implementation is buggy: it never returns -1. */
+       while (*a || *b) {
+               if (*a != *b)
+                       return 1;
+               if (*a == 0 || *b == 0)
+                       return 1;
+               a++;
+               b++;
+       }
+
+       return 0;
+}
+
+/* ...and two syscalls.  This is x86-specific. */
+static inline long x86_syscall3(long nr, long a0, long a1, long a2)
+{
+       long ret;
+#ifdef __x86_64__
+       asm volatile ("syscall" : "=a" (ret) : "a" (nr),
+                     "D" (a0), "S" (a1), "d" (a2) :
+                     "cc", "memory", "rcx",
+                     "r8", "r9", "r10", "r11" );
+#else
+       asm volatile ("int $0x80" : "=a" (ret) : "a" (nr),
+                     "b" (a0), "c" (a1), "d" (a2) :
+                     "cc", "memory" );
+#endif
+       return ret;
+}
+
+static inline long linux_write(int fd, const void *data, size_t len)
+{
+       return x86_syscall3(__NR_write, fd, (long)data, (long)len);
+}
+
+static inline void linux_exit(int code)
+{
+       x86_syscall3(__NR_exit, code, 0, 0);
+}
+
+void to_base10(char *lastdig, time_t n)
+{
+       while (n) {
+               *lastdig = (n % 10) + '0';
+               n /= 10;
+               lastdig--;
+       }
+}
+
+__attribute__((externally_visible)) void c_main(void **stack)
+{
+       /* Parse the stack */
+       long argc = (long)*stack;
+       stack += argc + 2;
+
+       /* Now we're pointing at the environment.  Skip it. */
+       while(*stack)
+               stack++;
+       stack++;
+
+       /* Now we're pointing at auxv.  Initialize the vDSO parser. */
+       vdso_init_from_auxv((void *)stack);
+
+       /* Find gettimeofday. */
+       typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz);
+       gtod_t gtod = (gtod_t)vdso_sym("LINUX_2.6", "__vdso_gettimeofday");
+
+       if (!gtod)
+               linux_exit(1);
+
+       struct timeval tv;
+       long ret = gtod(&tv, 0);
+
+       if (ret == 0) {
+               char buf[] = "The time is                     .000000\n";
+               to_base10(buf + 31, tv.tv_sec);
+               to_base10(buf + 38, tv.tv_usec);
+               linux_write(1, buf, sizeof(buf) - 1);
+       } else {
+               linux_exit(ret);
+       }
+
+       linux_exit(0);
+}
+
+/*
+ * This is the real entry point.  It passes the initial stack into
+ * the C entry point.
+ */
+asm (
+       ".text\n"
+       ".global _start\n"
+       ".type _start,@function\n"
+       "_start:\n\t"
+#ifdef __x86_64__
+       "mov %rsp,%rdi\n\t"
+       "jmp c_main"
+#else
+       "push %esp\n\t"
+       "call c_main\n\t"
+       "int $3"
+#endif
+       );
diff --git a/tools/testing/selftests/vDSO/vdso_test.c b/tools/testing/selftests/vDSO/vdso_test.c
new file mode 100644 (file)
index 0000000..8daeb7d
--- /dev/null
@@ -0,0 +1,52 @@
+/*
+ * vdso_test.c: Sample code to test parse_vdso.c
+ * Copyright (c) 2014 Andy Lutomirski
+ * Subject to the GNU General Public License, version 2
+ *
+ * Compile with:
+ * gcc -std=gnu99 vdso_test.c parse_vdso.c
+ *
+ * Tested on x86, 32-bit and 64-bit.  It may work on other architectures, too.
+ */
+
+#include <stdint.h>
+#include <elf.h>
+#include <stdio.h>
+#include <sys/auxv.h>
+#include <sys/time.h>
+
+extern void *vdso_sym(const char *version, const char *name);
+extern void vdso_init_from_sysinfo_ehdr(uintptr_t base);
+extern void vdso_init_from_auxv(void *auxv);
+
+int main(int argc, char **argv)
+{
+       unsigned long sysinfo_ehdr = getauxval(AT_SYSINFO_EHDR);
+       if (!sysinfo_ehdr) {
+               printf("AT_SYSINFO_EHDR is not present!\n");
+               return 0;
+       }
+
+       vdso_init_from_sysinfo_ehdr(getauxval(AT_SYSINFO_EHDR));
+
+       /* Find gettimeofday. */
+       typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz);
+       gtod_t gtod = (gtod_t)vdso_sym("LINUX_2.6", "__vdso_gettimeofday");
+
+       if (!gtod) {
+               printf("Could not find __vdso_gettimeofday\n");
+               return 1;
+       }
+
+       struct timeval tv;
+       long ret = gtod(&tv, 0);
+
+       if (ret == 0) {
+               printf("The time is %lld.%06lld\n",
+                      (long long)tv.tv_sec, (long long)tv.tv_usec);
+       } else {
+               printf("__vdso_gettimeofday failed\n");
+       }
+
+       return 0;
+}
diff --git a/tools/testing/selftests/watchdog/.gitignore b/tools/testing/selftests/watchdog/.gitignore
new file mode 100644 (file)
index 0000000..5aac515
--- /dev/null
@@ -0,0 +1 @@
+watchdog-test
diff --git a/tools/testing/selftests/watchdog/Makefile b/tools/testing/selftests/watchdog/Makefile
new file mode 100644 (file)
index 0000000..f863c66
--- /dev/null
@@ -0,0 +1,8 @@
+TEST_PROGS := watchdog-test
+
+all: $(TEST_PROGS)
+
+include ../lib.mk
+
+clean:
+       rm -fr $(TEST_PROGS)
diff --git a/tools/testing/selftests/watchdog/watchdog-test.c b/tools/testing/selftests/watchdog/watchdog-test.c
new file mode 100644 (file)
index 0000000..6983d05
--- /dev/null
@@ -0,0 +1,105 @@
+/*
+ * Watchdog Driver Test Program
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <sys/ioctl.h>
+#include <linux/types.h>
+#include <linux/watchdog.h>
+
+int fd;
+const char v = 'V';
+
+/*
+ * This function simply sends an IOCTL to the driver, which in turn ticks
+ * the PC Watchdog card to reset its internal timer so it doesn't trigger
+ * a computer reset.
+ */
+static void keep_alive(void)
+{
+    int dummy;
+
+    printf(".");
+    ioctl(fd, WDIOC_KEEPALIVE, &dummy);
+}
+
+/*
+ * The main program.  Run the program with "-d" to disable the card,
+ * or "-e" to enable the card.
+ */
+
+static void term(int sig)
+{
+    int ret = write(fd, &v, 1);
+
+    close(fd);
+    if (ret < 0)
+       printf("\nStopping watchdog ticks failed (%d)...\n", errno);
+    else
+       printf("\nStopping watchdog ticks...\n");
+    exit(0);
+}
+
+int main(int argc, char *argv[])
+{
+    int flags;
+    unsigned int ping_rate = 1;
+    int ret;
+
+    setbuf(stdout, NULL);
+
+    fd = open("/dev/watchdog", O_WRONLY);
+
+    if (fd == -1) {
+       printf("Watchdog device not enabled.\n");
+       exit(-1);
+    }
+
+    if (argc > 1) {
+       if (!strncasecmp(argv[1], "-d", 2)) {
+           flags = WDIOS_DISABLECARD;
+           ioctl(fd, WDIOC_SETOPTIONS, &flags);
+           printf("Watchdog card disabled.\n");
+           goto end;
+       } else if (!strncasecmp(argv[1], "-e", 2)) {
+           flags = WDIOS_ENABLECARD;
+           ioctl(fd, WDIOC_SETOPTIONS, &flags);
+           printf("Watchdog card enabled.\n");
+           goto end;
+       } else if (!strncasecmp(argv[1], "-t", 2) && argv[2]) {
+           flags = atoi(argv[2]);
+           ioctl(fd, WDIOC_SETTIMEOUT, &flags);
+           printf("Watchdog timeout set to %u seconds.\n", flags);
+           goto end;
+       } else if (!strncasecmp(argv[1], "-p", 2) && argv[2]) {
+           ping_rate = strtoul(argv[2], NULL, 0);
+           printf("Watchdog ping rate set to %u seconds.\n", ping_rate);
+       } else {
+           printf("-d to disable, -e to enable, -t <n> to set " \
+               "the timeout,\n-p <n> to set the ping rate, and \n");
+           printf("run by itself to tick the card.\n");
+           goto end;
+       }
+    }
+
+    printf("Watchdog Ticking Away!\n");
+
+    signal(SIGINT, term);
+
+    while(1) {
+       keep_alive();
+       sleep(ping_rate);
+    }
+end:
+    ret = write(fd, &v, 1);
+    if (ret < 0)
+       printf("Stopping watchdog ticks failed (%d)...\n", errno);
+    close(fd);
+    return 0;
+}
index eb17917c8a3a57b2551abee6d4fa873e384955b5..7972cc5124080b5566a6aa151db6f4169997533b 100644 (file)
@@ -13,7 +13,7 @@ Statistics for individual zram devices are exported through sysfs nodes at
 
 Kconfig required:
 CONFIG_ZRAM=y
-CONFIG_ZRAM_LZ4_COMPRESS=y
+CONFIG_CRYPTO_LZ4=y
 CONFIG_ZPOOL=y
 CONFIG_ZSMALLOC=y
 
index 877a8a4721b679ea41c36464bc3bddcec44b9d53..c012edbdb13b65c5e8578242e996d1bc11dbcb33 100644 (file)
@@ -3,8 +3,8 @@ all:
 all: ring virtio_ring_0_9 virtio_ring_poll virtio_ring_inorder ptr_ring noring
 
 CFLAGS += -Wall
-CFLAGS += -pthread -O2 -ggdb
-LDFLAGS += -pthread -O2 -ggdb
+CFLAGS += -pthread -O2 -ggdb -flto -fwhole-program
+LDFLAGS += -pthread -O2 -ggdb -flto -fwhole-program
 
 main.o: main.c main.h
 ring.o: ring.c main.h
index 147abb452a6ccc098bf50338e0c353f4b8896f8a..f31353fac5415d8b9f5614e6f46f71a8f062f09b 100644 (file)
@@ -96,7 +96,13 @@ void set_affinity(const char *arg)
        assert(!ret);
 }
 
-static void run_guest(void)
+void poll_used(void)
+{
+       while (used_empty())
+               busy_wait();
+}
+
+static void __attribute__((__flatten__)) run_guest(void)
 {
        int completed_before;
        int completed = 0;
@@ -141,7 +147,7 @@ static void run_guest(void)
                assert(completed <= bufs);
                assert(started <= bufs);
                if (do_sleep) {
-                       if (enable_call())
+                       if (used_empty() && enable_call())
                                wait_for_call();
                } else {
                        poll_used();
@@ -149,7 +155,13 @@ static void run_guest(void)
        }
 }
 
-static void run_host(void)
+void poll_avail(void)
+{
+       while (avail_empty())
+               busy_wait();
+}
+
+static void __attribute__((__flatten__)) run_host(void)
 {
        int completed_before;
        int completed = 0;
@@ -160,7 +172,7 @@ static void run_host(void)
 
        for (;;) {
                if (do_sleep) {
-                       if (enable_kick())
+                       if (avail_empty() && enable_kick())
                                wait_for_kick();
                } else {
                        poll_avail();
index 16917acb0adef30beab588329e3e8e547aff04ef..34e63cc4c572bfcafe6fecb4784fc4ba5079bf8a 100644 (file)
@@ -56,15 +56,15 @@ void alloc_ring(void);
 int add_inbuf(unsigned, void *, void *);
 void *get_buf(unsigned *, void **);
 void disable_call();
+bool used_empty();
 bool enable_call();
 void kick_available();
-void poll_used();
 /* host side */
 void disable_kick();
+bool avail_empty();
 bool enable_kick();
 bool use_buf(unsigned *, void **);
 void call_used();
-void poll_avail();
 
 /* implemented by main */
 extern bool do_sleep;
index eda2f4824130e36f3970794f1a4b396809e6de44..b8d1c1daac7cc089734c6a6cf3453ae7277abe62 100644 (file)
@@ -24,8 +24,9 @@ void *get_buf(unsigned *lenp, void **bufp)
        return "Buffer";
 }
 
-void poll_used(void)
+bool used_empty()
 {
+       return false;
 }
 
 void disable_call()
@@ -54,8 +55,9 @@ bool enable_kick()
        assert(0);
 }
 
-void poll_avail(void)
+bool avail_empty()
 {
+       return false;
 }
 
 bool use_buf(unsigned *lenp, void **bufp)
index bd2ad1d3b7a9ef88e28e1ad982dd37638841fa04..635b07b4fdd3949c7883a2775575c0ff4d8ce228 100644 (file)
@@ -133,18 +133,9 @@ void *get_buf(unsigned *lenp, void **bufp)
        return datap;
 }
 
-void poll_used(void)
+bool used_empty()
 {
-       void *b;
-
-       do {
-               if (tailcnt == headcnt || __ptr_ring_full(&array)) {
-                       b = NULL;
-                       barrier();
-               } else {
-                       b = "Buffer\n";
-               }
-       } while (!b);
+       return (tailcnt == headcnt || __ptr_ring_full(&array));
 }
 
 void disable_call()
@@ -173,14 +164,9 @@ bool enable_kick()
        assert(0);
 }
 
-void poll_avail(void)
+bool avail_empty()
 {
-       void *b;
-
-       do {
-               barrier();
-               b = __ptr_ring_peek(&array);
-       } while (!b);
+       return !__ptr_ring_peek(&array);
 }
 
 bool use_buf(unsigned *lenp, void **bufp)
index c25c8d248b6b7dfc5a6e5bbce1bb9f6ba7561244..747c5dd47be8b075c7ca1558393e45c5b7a47e63 100644 (file)
@@ -163,12 +163,11 @@ void *get_buf(unsigned *lenp, void **bufp)
        return datap;
 }
 
-void poll_used(void)
+bool used_empty()
 {
        unsigned head = (ring_size - 1) & guest.last_used_idx;
 
-       while (ring[head].flags & DESC_HW)
-               busy_wait();
+       return (ring[head].flags & DESC_HW);
 }
 
 void disable_call()
@@ -180,13 +179,11 @@ void disable_call()
 
 bool enable_call()
 {
-       unsigned head = (ring_size - 1) & guest.last_used_idx;
-
        event->call_index = guest.last_used_idx;
        /* Flush call index write */
        /* Barrier D (for pairing) */
        smp_mb();
-       return ring[head].flags & DESC_HW;
+       return used_empty();
 }
 
 void kick_available(void)
@@ -213,20 +210,17 @@ void disable_kick()
 
 bool enable_kick()
 {
-       unsigned head = (ring_size - 1) & host.used_idx;
-
        event->kick_index = host.used_idx;
        /* Barrier C (for pairing) */
        smp_mb();
-       return !(ring[head].flags & DESC_HW);
+       return avail_empty();
 }
 
-void poll_avail(void)
+bool avail_empty()
 {
        unsigned head = (ring_size - 1) & host.used_idx;
 
-       while (!(ring[head].flags & DESC_HW))
-               busy_wait();
+       return !(ring[head].flags & DESC_HW);
 }
 
 bool use_buf(unsigned *lenp, void **bufp)
index 761866212aacf1149d03ef1151ac7727c944db72..bbc3043b2fb169aa4764922a786e9cbfc34761ed 100644 (file)
@@ -194,24 +194,16 @@ void *get_buf(unsigned *lenp, void **bufp)
        return datap;
 }
 
-void poll_used(void)
+bool used_empty()
 {
+       unsigned short last_used_idx = guest.last_used_idx;
 #ifdef RING_POLL
-       unsigned head = (ring_size - 1) & guest.last_used_idx;
+       unsigned short head = last_used_idx & (ring_size - 1);
+       unsigned index = ring.used->ring[head].id;
 
-       for (;;) {
-               unsigned index = ring.used->ring[head].id;
-
-               if ((index ^ guest.last_used_idx ^ 0x8000) & ~(ring_size - 1))
-                       busy_wait();
-               else
-                       break;
-       }
+       return (index ^ last_used_idx ^ 0x8000) & ~(ring_size - 1);
 #else
-       unsigned head = guest.last_used_idx;
-
-       while (ring.used->idx == head)
-               busy_wait();
+       return ring.used->idx == last_used_idx;
 #endif
 }
 
@@ -224,22 +216,11 @@ void disable_call()
 
 bool enable_call()
 {
-       unsigned short last_used_idx;
-
-       vring_used_event(&ring) = (last_used_idx = guest.last_used_idx);
+       vring_used_event(&ring) = guest.last_used_idx;
        /* Flush call index write */
        /* Barrier D (for pairing) */
        smp_mb();
-#ifdef RING_POLL
-       {
-               unsigned short head = last_used_idx & (ring_size - 1);
-               unsigned index = ring.used->ring[head].id;
-
-               return (index ^ last_used_idx ^ 0x8000) & ~(ring_size - 1);
-       }
-#else
-       return ring.used->idx == last_used_idx;
-#endif
+       return used_empty();
 }
 
 void kick_available(void)
@@ -266,36 +247,21 @@ void disable_kick()
 
 bool enable_kick()
 {
-       unsigned head = host.used_idx;
-
-       vring_avail_event(&ring) = head;
+       vring_avail_event(&ring) = host.used_idx;
        /* Barrier C (for pairing) */
        smp_mb();
-#ifdef RING_POLL
-       {
-               unsigned index = ring.avail->ring[head & (ring_size - 1)];
-
-               return (index ^ head ^ 0x8000) & ~(ring_size - 1);
-       }
-#else
-       return head == ring.avail->idx;
-#endif
+       return avail_empty();
 }
 
-void poll_avail(void)
+bool avail_empty()
 {
        unsigned head = host.used_idx;
 #ifdef RING_POLL
-       for (;;) {
-               unsigned index = ring.avail->ring[head & (ring_size - 1)];
-               if ((index ^ head ^ 0x8000) & ~(ring_size - 1))
-                       busy_wait();
-               else
-                       break;
-       }
+       unsigned index = ring.avail->ring[head & (ring_size - 1)];
+
+       return ((index ^ head ^ 0x8000) & ~(ring_size - 1));
 #else
-       while (ring.avail->idx == head)
-               busy_wait();
+       return head == ring.avail->idx;
 #endif
 }
 
index 6e9c40eea208a2e2e2f7e36e45dc7fc3329f5169..69ccce308458a4c3de79ab1b6ffceee0f2cfb47d 100644 (file)
@@ -305,7 +305,7 @@ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val)
                        continue;
                type = vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i)
                       & ARMV8_PMU_EVTYPE_EVENT;
-               if ((type == ARMV8_PMU_EVTYPE_EVENT_SW_INCR)
+               if ((type == ARMV8_PMUV3_PERFCTR_SW_INCR)
                    && (enable & BIT(i))) {
                        reg = vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1;
                        reg = lower_32_bits(reg);
@@ -379,7 +379,8 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
        eventsel = data & ARMV8_PMU_EVTYPE_EVENT;
 
        /* Software increment event does't need to be backed by a perf event */
-       if (eventsel == ARMV8_PMU_EVTYPE_EVENT_SW_INCR)
+       if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR &&
+           select_idx != ARMV8_PMU_CYCLE_IDX)
                return;
 
        memset(&attr, 0, sizeof(struct perf_event_attr));
@@ -391,7 +392,8 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
        attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0;
        attr.exclude_hv = 1; /* Don't count EL2 events */
        attr.exclude_host = 1; /* Don't count host events */
-       attr.config = eventsel;
+       attr.config = (select_idx == ARMV8_PMU_CYCLE_IDX) ?
+               ARMV8_PMUV3_PERFCTR_CPU_CYCLES : eventsel;
 
        counter = kvm_pmu_get_counter_value(vcpu, select_idx);
        /* The initial sample period (overflow count) of an event. */
index e18b30ddcdce94ae9577119f7bb7e1362adb25f5..ebe1b9fa3c4d39bdc04076d117a5d88970995c91 100644 (file)
@@ -453,17 +453,33 @@ struct vgic_io_device *kvm_to_vgic_iodev(const struct kvm_io_device *dev)
        return container_of(dev, struct vgic_io_device, dev);
 }
 
-static bool check_region(const struct vgic_register_region *region,
+static bool check_region(const struct kvm *kvm,
+                        const struct vgic_register_region *region,
                         gpa_t addr, int len)
 {
-       if ((region->access_flags & VGIC_ACCESS_8bit) && len == 1)
-               return true;
-       if ((region->access_flags & VGIC_ACCESS_32bit) &&
-           len == sizeof(u32) && !(addr & 3))
-               return true;
-       if ((region->access_flags & VGIC_ACCESS_64bit) &&
-           len == sizeof(u64) && !(addr & 7))
-               return true;
+       int flags, nr_irqs = kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS;
+
+       switch (len) {
+       case sizeof(u8):
+               flags = VGIC_ACCESS_8bit;
+               break;
+       case sizeof(u32):
+               flags = VGIC_ACCESS_32bit;
+               break;
+       case sizeof(u64):
+               flags = VGIC_ACCESS_64bit;
+               break;
+       default:
+               return false;
+       }
+
+       if ((region->access_flags & flags) && IS_ALIGNED(addr, len)) {
+               if (!region->bits_per_irq)
+                       return true;
+
+               /* Do we access a non-allocated IRQ? */
+               return VGIC_ADDR_TO_INTID(addr, region->bits_per_irq) < nr_irqs;
+       }
 
        return false;
 }
@@ -477,7 +493,7 @@ static int dispatch_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
 
        region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions,
                                       addr - iodev->base_addr);
-       if (!region || !check_region(region, addr, len)) {
+       if (!region || !check_region(vcpu->kvm, region, addr, len)) {
                memset(val, 0, len);
                return 0;
        }
@@ -510,10 +526,7 @@ static int dispatch_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
 
        region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions,
                                       addr - iodev->base_addr);
-       if (!region)
-               return 0;
-
-       if (!check_region(region, addr, len))
+       if (!region || !check_region(vcpu->kvm, region, addr, len))
                return 0;
 
        switch (iodev->iodev_type) {
index 4c34d39d44a0eabe1a8b70d16380d0cf8ae1787a..84961b4e4422fcf50800e156063a1595969851cd 100644 (file)
@@ -50,15 +50,15 @@ extern struct kvm_io_device_ops kvm_io_gic_ops;
 #define VGIC_ADDR_IRQ_MASK(bits) (((bits) * 1024 / 8) - 1)
 
 /*
- * (addr & mask) gives us the byte offset for the INT ID, so we want to
- * divide this with 'bytes per irq' to get the INT ID, which is given
- * by '(bits) / 8'.  But we do this with fixed-point-arithmetic and
- * take advantage of the fact that division by a fraction equals
- * multiplication with the inverted fraction, and scale up both the
- * numerator and denominator with 8 to support at most 64 bits per IRQ:
+ * (addr & mask) gives us the _byte_ offset for the INT ID.
+ * We multiply this by 8 the get the _bit_ offset, then divide this by
+ * the number of bits to learn the actual INT ID.
+ * But instead of a division (which requires a "long long div" implementation),
+ * we shift by the binary logarithm of <bits>.
+ * This assumes that <bits> is a power of two.
  */
 #define VGIC_ADDR_TO_INTID(addr, bits)  (((addr) & VGIC_ADDR_IRQ_MASK(bits)) * \
-                                       64 / (bits) / 8)
+                                       8 >> ilog2(bits))
 
 /*
  * Some VGIC registers store per-IRQ information, with a different number
index 2893d5ba523ad26f139fbac8d2dd1c2aa43649ad..6440b56ec90e2198a234fa9b09d87b0c518efea7 100644 (file)
@@ -273,6 +273,18 @@ retry:
                 * no more work for us to do.
                 */
                spin_unlock(&irq->irq_lock);
+
+               /*
+                * We have to kick the VCPU here, because we could be
+                * queueing an edge-triggered interrupt for which we
+                * get no EOI maintenance interrupt. In that case,
+                * while the IRQ is already on the VCPU's AP list, the
+                * VCPU could have EOI'ed the original interrupt and
+                * won't see this one until it exits for some other
+                * reason.
+                */
+               if (vcpu)
+                       kvm_vcpu_kick(vcpu);
                return false;
        }
 
index db9668869f6ff6866a72f278def5770d71190994..efeceb0a222dd8a793cd8d7e5a7770b7799851c3 100644 (file)
@@ -84,12 +84,14 @@ static void async_pf_execute(struct work_struct *work)
         * mm and might be done in another context, so we must
         * use FOLL_REMOTE.
         */
-       __get_user_pages_unlocked(NULL, mm, addr, 1, 1, 0, NULL, FOLL_REMOTE);
+       __get_user_pages_unlocked(NULL, mm, addr, 1, NULL,
+                       FOLL_WRITE | FOLL_REMOTE);
 
        kvm_async_page_present_sync(vcpu, apf);
 
        spin_lock(&vcpu->async_pf.lock);
        list_add_tail(&apf->link, &vcpu->async_pf.done);
+       apf->vcpu = NULL;
        spin_unlock(&vcpu->async_pf.lock);
 
        /*
@@ -112,6 +114,8 @@ static void async_pf_execute(struct work_struct *work)
 
 void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
 {
+       spin_lock(&vcpu->async_pf.lock);
+
        /* cancel outstanding work queue item */
        while (!list_empty(&vcpu->async_pf.queue)) {
                struct kvm_async_pf *work =
@@ -119,6 +123,14 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
                                         typeof(*work), queue);
                list_del(&work->queue);
 
+               /*
+                * We know it's present in vcpu->async_pf.done, do
+                * nothing here.
+                */
+               if (!work->vcpu)
+                       continue;
+
+               spin_unlock(&vcpu->async_pf.lock);
 #ifdef CONFIG_KVM_ASYNC_PF_SYNC
                flush_work(&work->work);
 #else
@@ -128,9 +140,9 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
                        kmem_cache_free(async_pf_cache, work);
                }
 #endif
+               spin_lock(&vcpu->async_pf.lock);
        }
 
-       spin_lock(&vcpu->async_pf.lock);
        while (!list_empty(&vcpu->async_pf.done)) {
                struct kvm_async_pf *work =
                        list_first_entry(&vcpu->async_pf.done,
index f397e9b20370a2fb547b04fe555802846b2e9aef..a29786dd95221017b141a060b031c5c899dac2e5 100644 (file)
@@ -42,6 +42,7 @@
 
 #ifdef CONFIG_HAVE_KVM_IRQFD
 
+static struct workqueue_struct *irqfd_cleanup_wq;
 
 static void
 irqfd_inject(struct work_struct *work)
@@ -167,7 +168,7 @@ irqfd_deactivate(struct kvm_kernel_irqfd *irqfd)
 
        list_del_init(&irqfd->list);
 
-       schedule_work(&irqfd->shutdown);
+       queue_work(irqfd_cleanup_wq, &irqfd->shutdown);
 }
 
 int __attribute__((weak)) kvm_arch_set_irq_inatomic(
@@ -554,7 +555,7 @@ kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args)
         * so that we guarantee there will not be any more interrupts on this
         * gsi once this deassign function returns.
         */
-       flush_work(&irqfd->shutdown);
+       flush_workqueue(irqfd_cleanup_wq);
 
        return 0;
 }
@@ -591,7 +592,7 @@ kvm_irqfd_release(struct kvm *kvm)
         * Block until we know all outstanding shutdown jobs have completed
         * since we do not take a kvm* reference.
         */
-       flush_work(&irqfd->shutdown);
+       flush_workqueue(irqfd_cleanup_wq);
 
 }
 
@@ -621,8 +622,23 @@ void kvm_irq_routing_update(struct kvm *kvm)
        spin_unlock_irq(&kvm->irqfds.lock);
 }
 
+/*
+ * create a host-wide workqueue for issuing deferred shutdown requests
+ * aggregated from all vm* instances. We need our own isolated
+ * queue to ease flushing work items when a VM exits.
+ */
+int kvm_irqfd_init(void)
+{
+       irqfd_cleanup_wq = alloc_workqueue("kvm-irqfd-cleanup", 0, 0);
+       if (!irqfd_cleanup_wq)
+               return -ENOMEM;
+
+       return 0;
+}
+
 void kvm_irqfd_exit(void)
 {
+       destroy_workqueue(irqfd_cleanup_wq);
 }
 #endif
 
index 81dfc73d3df39851e3b44cd546f5f66046ec5e0f..5c360347a1e9fc2091f5abf0bbb6a3432e13add9 100644 (file)
@@ -1346,21 +1346,19 @@ unsigned long kvm_vcpu_gfn_to_hva_prot(struct kvm_vcpu *vcpu, gfn_t gfn, bool *w
 static int get_user_page_nowait(unsigned long start, int write,
                struct page **page)
 {
-       int flags = FOLL_TOUCH | FOLL_NOWAIT | FOLL_HWPOISON | FOLL_GET;
+       int flags = FOLL_NOWAIT | FOLL_HWPOISON;
 
        if (write)
                flags |= FOLL_WRITE;
 
-       return __get_user_pages(current, current->mm, start, 1, flags, page,
-                       NULL, NULL);
+       return get_user_pages(start, 1, flags, page, NULL);
 }
 
 static inline int check_user_page_hwpoison(unsigned long addr)
 {
-       int rc, flags = FOLL_TOUCH | FOLL_HWPOISON | FOLL_WRITE;
+       int rc, flags = FOLL_HWPOISON | FOLL_WRITE;
 
-       rc = __get_user_pages(current, current->mm, addr, 1,
-                             flags, NULL, NULL, NULL);
+       rc = get_user_pages(addr, 1, flags, NULL, NULL);
        return rc == -EHWPOISON;
 }
 
@@ -1416,10 +1414,15 @@ static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault,
                down_read(&current->mm->mmap_sem);
                npages = get_user_page_nowait(addr, write_fault, page);
                up_read(&current->mm->mmap_sem);
-       } else
+       } else {
+               unsigned int flags = FOLL_TOUCH | FOLL_HWPOISON;
+
+               if (write_fault)
+                       flags |= FOLL_WRITE;
+
                npages = __get_user_pages_unlocked(current, current->mm, addr, 1,
-                                                  write_fault, 0, page,
-                                                  FOLL_TOUCH|FOLL_HWPOISON);
+                                                  page, flags);
+       }
        if (npages != 1)
                return npages;
 
@@ -3841,7 +3844,12 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
         * kvm_arch_init makes sure there's at most one caller
         * for architectures that support multiple implementations,
         * like intel and amd on x86.
+        * kvm_arch_init must be called before kvm_irqfd_init to avoid creating
+        * conflicts in case kvm is already setup for another implementation.
         */
+       r = kvm_irqfd_init();
+       if (r)
+               goto out_irqfd;
 
        if (!zalloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) {
                r = -ENOMEM;
@@ -3923,6 +3931,7 @@ out_free_0a:
        free_cpumask_var(cpus_hardware_enabled);
 out_free_0:
        kvm_irqfd_exit();
+out_irqfd:
        kvm_arch_exit();
 out_fail:
        return r;